[
  {
    "path": ".cargo/config.toml",
    "content": "[build]\n# This is only present for local builds, as it will be overridden\n# by the RUSTDOCFLAGS env var in CI.\nrustdocflags = [\"-Arustdoc::private_intra_doc_links\"]\n\n# Enable frame pointers. This may have a minor performance overhead, but makes it easier and more\n# efficient to obtain stack traces (and thus CPU/heap profiles). It may also avoid seg faults that\n# we've seen with libunwind-based profiling. See also:\n#\n# * <https://www.brendangregg.com/blog/2024-03-17/the-return-of-the-frame-pointers.html>\n# * <https://github.com/rust-lang/rust/pull/122646>\n#\n# NB: the RUSTFLAGS envvar will replace this. Make sure to update e.g. Dockerfile as well.\nrustflags = [\"-Cforce-frame-pointers=yes\"]\n\n[alias]\nbuild_testing = [\"build\", \"--features\", \"testing\"]\nneon = [\"run\", \"--bin\", \"neon_local\"]\n"
  },
  {
    "path": ".config/hakari.toml",
    "content": "# This file contains settings for `cargo hakari`.\n# See https://docs.rs/cargo-hakari/latest/cargo_hakari/config for a full list of options.\n\nhakari-package = \"workspace_hack\"\n\n# Format for `workspace-hack = ...` lines in other Cargo.tomls. Requires cargo-hakari 0.9.8 or above.\ndep-format-version = \"4\"\n\n# Setting workspace.resolver = \"2\" in the root Cargo.toml is HIGHLY recommended.\n# Hakari works much better with the new feature resolver.\n# For more about the new feature resolver, see:\n# https://blog.rust-lang.org/2021/03/25/Rust-1.51.0.html#cargos-new-feature-resolver\n# Have to keep the resolver still here since hakari requires this field,\n# despite it's now the default for 2021 edition & cargo.\nresolver = \"2\"\n\n# Add triples corresponding to platforms commonly used by developers here.\n# https://doc.rust-lang.org/rustc/platform-support.html\nplatforms = [\n    # \"x86_64-unknown-linux-gnu\",\n    # \"x86_64-apple-darwin\",\n    # \"x86_64-pc-windows-msvc\",\n]\n[final-excludes]\nworkspace-members = [\n    # vm_monitor benefits from the same Cargo.lock as the rest of our artifacts, but\n    # it is built primarly in separate repo neondatabase/autoscaling and thus is excluded\n    # from depending on workspace-hack because most of the dependencies are not used.\n    \"vm_monitor\",\n    # subzero-core is a stub crate that should be excluded from workspace-hack\n    \"subzero-core\",\n    # All of these exist in libs and are not usually built independently.\n    # Putting workspace hack there adds a bottleneck for cargo builds.\n    \"compute_api\",\n    \"consumption_metrics\",\n    \"desim\",\n    \"json\",\n    \"metrics\",\n    \"pageserver_api\",\n    \"postgres_backend\",\n    \"postgres_connection\",\n    \"postgres_ffi\",\n    \"pq_proto\",\n    \"remote_storage\",\n    \"safekeeper_api\",\n    \"tenant_size_model\",\n    \"tracing-utils\",\n    \"utils\",\n    \"wal_craft\",\n    \"walproposer\",\n    \"postgres-protocol2\",\n    \"postgres-types2\",\n    \"tokio-postgres2\",\n]\n\n# Write out exact versions rather than a semver range. (Defaults to false.)\n# exact-versions = true\n"
  },
  {
    "path": ".config/nextest.toml",
    "content": "[profile.default]\nslow-timeout = { period = \"60s\", terminate-after = 3 }\n"
  },
  {
    "path": ".dockerignore",
    "content": "*\n\n# Files\n!Cargo.lock\n!Cargo.toml\n!Makefile\n!postgres.mk\n!rust-toolchain.toml\n!scripts/ninstall.sh\n!docker-compose/run-tests.sh\n\n# Directories\n!.cargo/\n!.config/\n!compute/\n!compute_tools/\n!control_plane/\n!docker-compose/ext-src\n!libs/\n!pageserver/\n!pgxn/\n!proxy/\n!endpoint_storage/\n!storage_scrubber/\n!safekeeper/\n!storage_broker/\n!storage_controller/\n!vendor/postgres-*/\n!workspace_hack/\n!build-tools/patches\n"
  },
  {
    "path": ".git-blame-ignore-revs",
    "content": "4c2bb43775947775401cbb9d774823c5723a91f8\n"
  },
  {
    "path": ".gitattributes",
    "content": "# allows for nicer hunk headers with git show\n*.rs diff=rust\n"
  },
  {
    "path": ".github/ISSUE_TEMPLATE/bug-template.md",
    "content": "---\nname: Bug Template\nabout: Used for describing bugs\ntitle: ''\nlabels: t/bug\ntype: Bug\nassignees: ''\n\n---\n\n## Steps to reproduce\n\n\n## Expected result\n\n\n## Actual result\n\n\n## Environment\n\n\n## Logs, links\n- \n"
  },
  {
    "path": ".github/ISSUE_TEMPLATE/config.yml",
    "content": "\nblank_issues_enabled: true\ncontact_links:\n  - name: Feature request\n    url: https://console.neon.tech/app/projects?modal=feedback\n    about: For feature requests in the Neon product, please submit via the feedback form on `https://console.neon.tech`\n"
  },
  {
    "path": ".github/ISSUE_TEMPLATE/epic-template.md",
    "content": "---\nname: Epic Template\nabout: A set of related tasks contributing towards specific outcome, comprising of\n  more than 1 week of work.\ntitle: 'Epic: '\nlabels: t/Epic\ntype: Epic\nassignees: ''\n\n---\n\n## Motivation\n\n\n## DoD\n\n\n## Implementation ideas\n\n## Tasks\n```[tasklist]\n- [ ] Example Task\n```\n\n\n## Other related tasks and Epics\n- \n"
  },
  {
    "path": ".github/actionlint.yml",
    "content": "self-hosted-runner:\n  labels:\n    - arm64\n    - large\n    - large-arm64\n    - small\n    - small-metal\n    - small-arm64\n    - unit-perf\n    - unit-perf-aws-arm\n    - us-east-2\nconfig-variables:\n  - AWS_ECR_REGION\n  - AZURE_DEV_CLIENT_ID\n  - AZURE_DEV_REGISTRY_NAME\n  - AZURE_DEV_SUBSCRIPTION_ID\n  - AZURE_PROD_CLIENT_ID\n  - AZURE_PROD_REGISTRY_NAME\n  - AZURE_PROD_SUBSCRIPTION_ID\n  - AZURE_TENANT_ID\n  - BENCHMARK_INGEST_TARGET_PROJECTID\n  - BENCHMARK_LARGE_OLTP_PROJECTID\n  - BENCHMARK_PROJECT_ID_PUB\n  - BENCHMARK_PROJECT_ID_SUB\n  - DEV_AWS_OIDC_ROLE_ARN\n  - DEV_AWS_OIDC_ROLE_MANAGE_BENCHMARK_EC2_VMS_ARN\n  - HETZNER_CACHE_BUCKET\n  - HETZNER_CACHE_ENDPOINT\n  - HETZNER_CACHE_REGION\n  - NEON_DEV_AWS_ACCOUNT_ID\n  - NEON_PROD_AWS_ACCOUNT_ID\n  - PGREGRESS_PG16_PROJECT_ID\n  - PGREGRESS_PG17_PROJECT_ID\n  - PREWARM_PROJECT_ID\n  - REMOTE_STORAGE_AZURE_CONTAINER\n  - REMOTE_STORAGE_AZURE_REGION\n  - SLACK_CICD_CHANNEL_ID\n  - SLACK_COMPUTE_CHANNEL_ID\n  - SLACK_ON_CALL_DEVPROD_STREAM\n  - SLACK_ON_CALL_QA_STAGING_STREAM\n  - SLACK_ON_CALL_STORAGE_STAGING_STREAM\n  - SLACK_ONCALL_COMPUTE_GROUP\n  - SLACK_ONCALL_PROXY_GROUP\n  - SLACK_ONCALL_STORAGE_GROUP\n  - SLACK_PROXY_CHANNEL_ID\n  - SLACK_RUST_CHANNEL_ID\n  - SLACK_STORAGE_CHANNEL_ID\n  - SLACK_UPCOMING_RELEASE_CHANNEL_ID\n"
  },
  {
    "path": ".github/actions/allure-report-generate/action.yml",
    "content": "name: 'Create Allure report'\ndescription: 'Generate Allure report from uploaded by actions/allure-report-store tests results'\n\ninputs:\n  store-test-results-into-db:\n    description: 'Whether to store test results into the database. TEST_RESULT_CONNSTR/TEST_RESULT_CONNSTR_NEW should be set'\n    type: boolean\n    required: false\n    default: false\n  aws-oidc-role-arn:\n    description: 'OIDC role arn to interract with S3'\n    required: true\n\noutputs:\n  base-url:\n    description: 'Base URL for Allure report'\n    value: ${{ steps.generate-report.outputs.base-url }}\n  base-s3-url:\n    description: 'Base S3 URL for Allure report'\n    value: ${{ steps.generate-report.outputs.base-s3-url }}\n  report-url:\n    description: 'Allure report URL'\n    value: ${{ steps.generate-report.outputs.report-url }}\n  report-json-url:\n    description: 'Allure report JSON URL'\n    value: ${{ steps.generate-report.outputs.report-json-url }}\n\nruns:\n  using: \"composite\"\n\n  steps:\n    # We're using some of env variables quite offen, so let's set them once.\n    #\n    # It would be nice to have them set in common runs.env[0] section, but it doesn't work[1]\n    #\n    # - [0] https://docs.github.com/en/actions/creating-actions/metadata-syntax-for-github-actions#runsenv\n    # - [1] https://github.com/neondatabase/neon/pull/3907#discussion_r1154703456\n    #\n    - name: Set variables\n      shell: bash -euxo pipefail {0}\n      env:\n        PR_NUMBER: ${{ github.event.pull_request.number }}\n        BUCKET: neon-github-public-dev\n      run: |\n        if [ -n \"${PR_NUMBER}\" ]; then\n          BRANCH_OR_PR=pr-${PR_NUMBER}\n        elif [ \"${GITHUB_REF_NAME}\" = \"main\" ] || [ \"${GITHUB_REF_NAME}\" = \"release\" ] || \\\n             [ \"${GITHUB_REF_NAME}\" = \"release-proxy\" ] || [ \"${GITHUB_REF_NAME}\" = \"release-compute\" ]; then\n          # Shortcut for special branches\n          BRANCH_OR_PR=${GITHUB_REF_NAME}\n        else\n          BRANCH_OR_PR=branch-$(printf \"${GITHUB_REF_NAME}\" | tr -c \"[:alnum:]._-\" \"-\")\n        fi\n\n        LOCK_FILE=reports/${BRANCH_OR_PR}/lock.txt\n\n        WORKDIR=/tmp/${BRANCH_OR_PR}-$(date +%s)\n        mkdir -p ${WORKDIR}\n\n        echo \"BRANCH_OR_PR=${BRANCH_OR_PR}\" >> $GITHUB_ENV\n        echo \"LOCK_FILE=${LOCK_FILE}\"       >> $GITHUB_ENV\n        echo \"WORKDIR=${WORKDIR}\"           >> $GITHUB_ENV\n        echo \"BUCKET=${BUCKET}\"             >> $GITHUB_ENV\n\n    # TODO: We can replace with a special docker image with Java and Allure pre-installed\n    - uses: actions/setup-java@v4\n      with:\n        distribution: 'temurin'\n        java-version: '17'\n\n    - name: Install Allure\n      shell: bash -euxo pipefail {0}\n      working-directory: /tmp\n      run: |\n        if ! which allure; then\n          ALLURE_ZIP=allure-${ALLURE_VERSION}.zip\n          wget -q https://github.com/allure-framework/allure2/releases/download/${ALLURE_VERSION}/${ALLURE_ZIP}\n          echo \"${ALLURE_ZIP_SHA256} ${ALLURE_ZIP}\" | sha256sum --check\n          unzip -q ${ALLURE_ZIP}\n          echo \"$(pwd)/allure-${ALLURE_VERSION}/bin\" >> $GITHUB_PATH\n          rm -f ${ALLURE_ZIP}\n        fi\n      env:\n        ALLURE_VERSION: 2.32.2\n        ALLURE_ZIP_SHA256: 3f28885e2118f6317c92f667eaddcc6491400af1fb9773c1f3797a5fa5174953\n\n    - uses: aws-actions/configure-aws-credentials@v4\n      if: ${{ !cancelled() }}\n      with:\n        aws-region: eu-central-1\n        role-to-assume: ${{ inputs.aws-oidc-role-arn }}\n        role-duration-seconds: 3600 # 1 hour should be more than enough to upload report\n\n    # Potentially we could have several running build for the same key (for example, for the main branch), so we use improvised lock for this\n    - name: Acquire lock\n      shell: bash -euxo pipefail {0}\n      run: |\n        LOCK_TIMEOUT=300 # seconds\n\n        LOCK_CONTENT=\"${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}\"\n        echo ${LOCK_CONTENT} > ${WORKDIR}/lock.txt\n\n        # Do it up to 5 times to avoid race condition\n        for _ in $(seq 1 5); do\n          for i in $(seq 1 ${LOCK_TIMEOUT}); do\n            LOCK_ACQUIRED=$(aws s3api head-object --bucket neon-github-public-dev --key ${LOCK_FILE} | jq --raw-output '.LastModified' || true)\n            # `date --date=\"...\"` is supported only by gnu date (i.e. it doesn't work on BSD/macOS)\n            if [ -z \"${LOCK_ACQUIRED}\" ] || [ \"$(( $(date +%s) - $(date --date=\"${LOCK_ACQUIRED}\" +%s) ))\" -gt \"${LOCK_TIMEOUT}\" ]; then\n              break\n            fi\n            sleep 1\n          done\n\n          aws s3 mv --only-show-errors ${WORKDIR}/lock.txt \"s3://${BUCKET}/${LOCK_FILE}\"\n\n          # Double-check that exactly THIS run has acquired the lock\n          aws s3 cp --only-show-errors \"s3://${BUCKET}/${LOCK_FILE}\" ./lock.txt\n          if [ \"$(cat lock.txt)\" = \"${LOCK_CONTENT}\" ]; then\n            break\n          fi\n        done\n\n    - name: Generate and publish final Allure report\n      id: generate-report\n      shell: bash -euxo pipefail {0}\n      run: |\n        REPORT_PREFIX=reports/${BRANCH_OR_PR}\n        RAW_PREFIX=reports-raw/${BRANCH_OR_PR}/${GITHUB_RUN_ID}\n\n        BASE_URL=https://${BUCKET}.s3.amazonaws.com/${REPORT_PREFIX}/${GITHUB_RUN_ID}\n        BASE_S3_URL=s3://${BUCKET}/${REPORT_PREFIX}/${GITHUB_RUN_ID}\n        REPORT_URL=${BASE_URL}/index.html\n        REPORT_JSON_URL=${BASE_URL}/data/suites.json\n\n        # Get previously uploaded data for this run\n        ZSTD_NBTHREADS=0\n\n        S3_FILEPATHS=$(aws s3api list-objects-v2 --bucket ${BUCKET} --prefix ${RAW_PREFIX}/ | jq --raw-output '.Contents[]?.Key')\n        if [ -z \"$S3_FILEPATHS\" ]; then\n          # There's no previously uploaded data for this $GITHUB_RUN_ID\n          exit 0\n        fi\n\n        time aws s3 cp --recursive --only-show-errors \"s3://${BUCKET}/${RAW_PREFIX}/\" \"${WORKDIR}/\"\n        for archive in $(find ${WORKDIR} -name \"*.tar.zst\"); do\n          mkdir -p ${archive%.tar.zst}\n          time tar -xf ${archive} -C ${archive%.tar.zst}\n          rm -f ${archive}\n        done\n\n        # Get history trend\n        time aws s3 cp --recursive --only-show-errors \"s3://${BUCKET}/${REPORT_PREFIX}/latest/history\" \"${WORKDIR}/latest/history\" || true\n\n        # Generate report\n        time allure generate --clean --output ${WORKDIR}/report ${WORKDIR}/*\n\n        # Replace a logo link with a redirect to the latest version of the report\n        sed -i 's|<a href=\".\" class=|<a href=\"https://'${BUCKET}'.s3.amazonaws.com/'${REPORT_PREFIX}'/latest/index.html?nocache='\"'+Date.now()+'\"'\" class=|g' ${WORKDIR}/report/app.js\n\n        # Upload a history and the final report (in this particular order to not to have duplicated history in 2 places)\n        time aws s3 mv --recursive --only-show-errors \"${WORKDIR}/report/history\" \"s3://${BUCKET}/${REPORT_PREFIX}/latest/history\"\n\n        # Use aws s3 cp (instead of aws s3 sync) to keep files from previous runs to make old URLs work,\n        # and to keep files on the host to upload them to the database\n        time s5cmd --log error cp \"${WORKDIR}/report/*\" \"s3://${BUCKET}/${REPORT_PREFIX}/${GITHUB_RUN_ID}/\"\n\n        # Generate redirect\n        cat <<EOF > ${WORKDIR}/index.html\n          <!DOCTYPE html>\n\n          <meta charset=\"utf-8\">\n          <title>Redirecting to ${REPORT_URL}</title>\n          <meta http-equiv=\"refresh\" content=\"0; URL=${REPORT_URL}\">\n        EOF\n        time aws s3 cp --only-show-errors ${WORKDIR}/index.html \"s3://${BUCKET}/${REPORT_PREFIX}/latest/index.html\"\n\n        echo \"base-url=${BASE_URL}\"               >> $GITHUB_OUTPUT\n        echo \"base-s3-url=${BASE_S3_URL}\"         >> $GITHUB_OUTPUT\n        echo \"report-url=${REPORT_URL}\"           >> $GITHUB_OUTPUT\n        echo \"report-json-url=${REPORT_JSON_URL}\" >> $GITHUB_OUTPUT\n\n        echo \"[Allure Report](${REPORT_URL})\" >> ${GITHUB_STEP_SUMMARY}\n\n    - name: Release lock\n      if: always()\n      shell: bash -euxo pipefail {0}\n      run: |\n        aws s3 cp --only-show-errors \"s3://${BUCKET}/${LOCK_FILE}\" ./lock.txt || exit 0\n\n        if [ \"$(cat lock.txt)\" = \"${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}\" ]; then\n          aws s3 rm \"s3://${BUCKET}/${LOCK_FILE}\"\n        fi\n\n    - name: Cache poetry deps\n      uses: actions/cache@v4\n      with:\n        path: ~/.cache/pypoetry/virtualenvs\n        key: v2-${{ runner.os }}-${{ runner.arch }}-python-deps-bookworm-${{ hashFiles('poetry.lock') }}\n\n    - name: Store Allure test stat in the DB (new)\n      if: ${{ !cancelled() && inputs.store-test-results-into-db == 'true' }}\n      shell: bash -euxo pipefail {0}\n      env:\n        COMMIT_SHA: ${{ github.event.pull_request.head.sha || github.sha }}\n        BASE_S3_URL: ${{ steps.generate-report.outputs.base-s3-url }}\n      run: |\n        if [ ! -d \"${WORKDIR}/report/data/test-cases\" ]; then\n          exit 0\n        fi\n\n        export DATABASE_URL=${REGRESS_TEST_RESULT_CONNSTR_NEW}\n\n        ./scripts/pysync\n\n        poetry run python3 scripts/ingest_regress_test_result-new-format.py \\\n          --reference ${GITHUB_REF} \\\n          --revision ${COMMIT_SHA} \\\n          --run-id ${GITHUB_RUN_ID} \\\n          --run-attempt ${GITHUB_RUN_ATTEMPT} \\\n          --test-cases-dir ${WORKDIR}/report/data/test-cases\n\n    - name: Cleanup\n      if: always()\n      shell: bash -euxo pipefail {0}\n      run: |\n        if [ -d \"${WORKDIR}\" ]; then\n          rm -rf ${WORKDIR}\n        fi\n\n    - uses: actions/github-script@v7\n      if: always()\n      env:\n        REPORT_URL: ${{ steps.generate-report.outputs.report-url }}\n        COMMIT_SHA: ${{ github.event.pull_request.head.sha || github.sha }}\n      with:\n        # Retry script for 5XX server errors: https://github.com/actions/github-script#retries\n        retries: 5\n        script: |\n          const { REPORT_URL, COMMIT_SHA } = process.env\n\n          await github.rest.repos.createCommitStatus({\n            owner: context.repo.owner,\n            repo: context.repo.repo,\n            sha: `${COMMIT_SHA}`,\n            state: 'success',\n            target_url: `${REPORT_URL}`,\n            context: 'Allure report',\n          })\n"
  },
  {
    "path": ".github/actions/allure-report-store/action.yml",
    "content": "name: 'Store Allure results'\ndescription: 'Upload test results to be used by actions/allure-report-generate'\n\ninputs:\n  report-dir:\n    description: 'directory with test results generated by tests'\n    required: true\n  unique-key:\n    description: 'string to distinguish different results in the same run'\n    required: true\n  aws-oidc-role-arn:\n    description: 'OIDC role arn to interract with S3'\n    required: true\n\nruns:\n  using: \"composite\"\n\n  steps:\n    - name: Set variables\n      shell: bash -euxo pipefail {0}\n      env:\n        PR_NUMBER: ${{ github.event.pull_request.number }}\n        REPORT_DIR: ${{ inputs.report-dir }}\n      run: |\n        if [ -n \"${PR_NUMBER}\" ]; then\n          BRANCH_OR_PR=pr-${PR_NUMBER}\n        elif [ \"${GITHUB_REF_NAME}\" = \"main\" ] || [ \"${GITHUB_REF_NAME}\" = \"release\" ] || \\\n             [ \"${GITHUB_REF_NAME}\" = \"release-proxy\" ] || [ \"${GITHUB_REF_NAME}\" = \"release-compute\" ]; then\n          # Shortcut for special branches\n          BRANCH_OR_PR=${GITHUB_REF_NAME}\n        else\n          BRANCH_OR_PR=branch-$(printf \"${GITHUB_REF_NAME}\" | tr -c \"[:alnum:]._-\" \"-\")\n        fi\n\n        echo \"BRANCH_OR_PR=${BRANCH_OR_PR}\" >> $GITHUB_ENV\n        echo \"REPORT_DIR=${REPORT_DIR}\"     >> $GITHUB_ENV\n\n    - uses: aws-actions/configure-aws-credentials@v4\n      if: ${{ !cancelled() }}\n      with:\n        aws-region: eu-central-1\n        role-to-assume: ${{ inputs.aws-oidc-role-arn }}\n        role-duration-seconds: 3600 # 1 hour should be more than enough to upload report\n\n    - name: Upload test results\n      shell: bash -euxo pipefail {0}\n      run: |\n        REPORT_PREFIX=reports/${BRANCH_OR_PR}\n        RAW_PREFIX=reports-raw/${BRANCH_OR_PR}/${GITHUB_RUN_ID}\n\n        # Add metadata\n        cat <<EOF > ${REPORT_DIR}/executor.json\n          {\n            \"name\": \"GitHub Actions\",\n            \"type\": \"github\",\n            \"url\": \"https://${BUCKET}.s3.amazonaws.com/${REPORT_PREFIX}/latest/index.html\",\n            \"buildOrder\": ${GITHUB_RUN_ID},\n            \"buildName\": \"GitHub Actions Run #${GITHUB_RUN_NUMBER}/${GITHUB_RUN_ATTEMPT}\",\n            \"buildUrl\": \"${GITHUB_SERVER_URL}/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}/attempts/${GITHUB_RUN_ATTEMPT}\",\n            \"reportUrl\": \"https://${BUCKET}.s3.amazonaws.com/${REPORT_PREFIX}/${GITHUB_RUN_ID}/index.html\",\n            \"reportName\": \"Allure Report\"\n          }\n        EOF\n\n        cat <<EOF > ${REPORT_DIR}/environment.properties\n          COMMIT_SHA=${COMMIT_SHA}\n        EOF\n\n        ARCHIVE=\"${UNIQUE_KEY}-${GITHUB_RUN_ATTEMPT}-$(date +%s).tar.zst\"\n        ZSTD_NBTHREADS=0\n\n        time tar -C ${REPORT_DIR} -cf ${ARCHIVE} --zstd .\n        time aws s3 mv --only-show-errors ${ARCHIVE} \"s3://${BUCKET}/${RAW_PREFIX}/${ARCHIVE}\"\n      env:\n        UNIQUE_KEY: ${{ inputs.unique-key }}\n        COMMIT_SHA: ${{ github.event.pull_request.head.sha || github.sha }}\n        BUCKET: neon-github-public-dev\n\n    - name: Cleanup\n      if: always()\n      shell: bash -euxo pipefail {0}\n      run: |\n        rm -rf ${REPORT_DIR}\n"
  },
  {
    "path": ".github/actions/download/action.yml",
    "content": "name: \"Download an artifact\"\ndescription: \"Custom download action\"\ninputs:\n  name:\n    description: \"Artifact name\"\n    required: true\n  path:\n    description: \"A directory to put artifact into\"\n    default: \".\"\n    required: false\n  skip-if-does-not-exist:\n    description: \"Allow to skip if file doesn't exist, fail otherwise\"\n    default: false\n    required: false\n  prefix:\n    description: \"S3 prefix. Default is '${GITHUB_RUN_ID}/${GITHUB_RUN_ATTEMPT}'\"\n    required: false\n  aws-oidc-role-arn:\n    description: 'OIDC role arn to interract with S3'\n    required: true\n\nruns:\n  using: \"composite\"\n  steps:\n    - uses: aws-actions/configure-aws-credentials@v4\n      with:\n        aws-region: eu-central-1\n        role-to-assume: ${{ inputs.aws-oidc-role-arn }}\n        role-duration-seconds: 3600\n\n    - name: Download artifact\n      id: download-artifact\n      shell: bash -euxo pipefail {0}\n      env:\n        TARGET: ${{ inputs.path }}\n        ARCHIVE: /tmp/downloads/${{ inputs.name }}.tar.zst\n        SKIP_IF_DOES_NOT_EXIST: ${{ inputs.skip-if-does-not-exist }}\n        PREFIX: artifacts/${{ inputs.prefix || format('{0}/{1}/{2}', github.event.pull_request.head.sha || github.sha, github.run_id, github.run_attempt) }}\n      run: |\n        BUCKET=neon-github-public-dev\n        FILENAME=$(basename $ARCHIVE)\n\n        S3_KEY=$(aws s3api list-objects-v2 --bucket ${BUCKET} --prefix ${PREFIX%$GITHUB_RUN_ATTEMPT} | jq -r '.Contents[]?.Key' | grep ${FILENAME} | sort --version-sort | tail -1 || true)\n        if [ -z \"${S3_KEY}\" ]; then\n          if [ \"${SKIP_IF_DOES_NOT_EXIST}\" = \"true\" ]; then\n            echo 'SKIPPED=true' >> $GITHUB_OUTPUT\n            exit 0\n          else\n            echo >&2 \"Neither s3://${BUCKET}/${PREFIX}/${FILENAME} nor its version from previous attempts exist\"\n            exit 1\n          fi\n        fi\n\n        echo 'SKIPPED=false' >> $GITHUB_OUTPUT\n\n        mkdir -p $(dirname $ARCHIVE)\n        time aws s3 cp --only-show-errors s3://${BUCKET}/${S3_KEY} ${ARCHIVE}\n\n    - name: Extract artifact\n      if: ${{ steps.download-artifact.outputs.SKIPPED == 'false' }}\n      shell: bash -euxo pipefail {0}\n      env:\n        TARGET: ${{ inputs.path }}\n        ARCHIVE: /tmp/downloads/${{ inputs.name }}.tar.zst\n      run: |\n        mkdir -p ${TARGET}\n        time tar -xf ${ARCHIVE} -C ${TARGET}\n        rm -f ${ARCHIVE}\n"
  },
  {
    "path": ".github/actions/neon-branch-create/action.yml",
    "content": "name: 'Create Branch'\ndescription: 'Create Branch using API'\n\ninputs:\n  api_key:\n    description: 'Neon API key'\n    required: true\n  project_id:\n    description: 'ID of the Project to create Branch in'\n    required: true\n  api_host:\n    description: 'Neon API host'\n    default: console-stage.neon.build\noutputs:\n  dsn:\n    description: 'Created Branch DSN (for main database)'\n    value: ${{ steps.change-password.outputs.dsn }}\n  branch_id:\n    description: 'Created Branch ID'\n    value: ${{ steps.create-branch.outputs.branch_id }}\n\nruns:\n  using: \"composite\"\n  steps:\n    - name: Create New Branch\n      id: create-branch\n      shell: bash -euxo pipefail {0}\n      run: |\n        for i in $(seq 1 10); do\n         branch=$(curl \\\n            \"https://${API_HOST}/api/v2/projects/${PROJECT_ID}/branches\" \\\n            --header \"Accept: application/json\" \\\n            --header \"Content-Type: application/json\" \\\n            --header \"Authorization: Bearer ${API_KEY}\" \\\n            --data \"{\n              \\\"branch\\\": {\n                \\\"name\\\": \\\"Created by actions/neon-branch-create; GITHUB_RUN_ID=${GITHUB_RUN_ID} at $(date +%s)\\\"\n              },\n              \\\"endpoints\\\": [\n                {\n                  \\\"type\\\": \\\"read_write\\\"\n                }\n              ]\n            }\")\n\n          if [ -z \"${branch}\" ]; then\n            sleep 1\n            continue\n          fi\n\n          branch_id=$(echo $branch | jq --raw-output '.branch.id')\n          if [ \"${branch_id}\" == \"null\" ]; then\n            sleep 1\n            continue\n          fi\n\n          break\n        done\n\n        if [ -z \"${branch_id}\" ] || [ \"${branch_id}\" == \"null\" ]; then\n          echo >&2 \"Failed to create branch after 10 attempts, the latest response was: ${branch}\"\n          exit 1\n        fi\n\n        branch_id=$(echo $branch | jq --raw-output '.branch.id')\n        echo \"branch_id=${branch_id}\" >> $GITHUB_OUTPUT\n\n        host=$(echo $branch | jq --raw-output '.endpoints[0].host')\n        echo \"host=${host}\" >> $GITHUB_OUTPUT\n      env:\n        API_HOST: ${{ inputs.api_host }}\n        API_KEY: ${{ inputs.api_key }}\n        PROJECT_ID: ${{ inputs.project_id }}\n\n    - name: Get Role name\n      id: role-name\n      shell: bash -euxo pipefail {0}\n      run: |\n        roles=$(curl \\\n          \"https://${API_HOST}/api/v2/projects/${PROJECT_ID}/branches/${BRANCH_ID}/roles\" \\\n          --fail \\\n          --header \"Accept: application/json\" \\\n          --header \"Content-Type: application/json\" \\\n          --header \"Authorization: Bearer ${API_KEY}\"\n          )\n\n        role_name=$(echo \"$roles\" | jq --raw-output '\n          (.roles | map(select(.protected == false))) as $roles |\n          if any($roles[]; .name == \"neondb_owner\")\n          then \"neondb_owner\"\n          else $roles[0].name\n          end\n        ')\n        echo \"role_name=${role_name}\" >> $GITHUB_OUTPUT\n      env:\n        API_HOST: ${{ inputs.api_host }}\n        API_KEY: ${{ inputs.api_key }}\n        PROJECT_ID: ${{ inputs.project_id }}\n        BRANCH_ID: ${{ steps.create-branch.outputs.branch_id }}\n\n    - name: Change Password\n      id: change-password\n      # A shell without `set -x` to not to expose password/dsn in logs\n      shell: bash -euo pipefail {0}\n      run: |\n        for i in $(seq 1 10); do\n          reset_password=$(curl \\\n            \"https://${API_HOST}/api/v2/projects/${PROJECT_ID}/branches/${BRANCH_ID}/roles/${ROLE_NAME}/reset_password\" \\\n            --request POST \\\n            --header \"Accept: application/json\" \\\n            --header \"Content-Type: application/json\" \\\n            --header \"Authorization: Bearer ${API_KEY}\"\n            )\n\n          if [ -z \"${reset_password}\" ]; then\n            sleep $i\n            continue\n          fi\n\n          password=$(echo $reset_password | jq --raw-output '.role.password')\n          if [ \"${password}\" == \"null\" ]; then\n            sleep $i # increasing backoff\n            continue\n          fi\n\n          echo \"::add-mask::${password}\"\n          break\n        done\n\n        if [ -z \"${password}\" ] || [ \"${password}\" == \"null\" ]; then\n          echo >&2 \"Failed to reset password after 10 attempts, the latest response was: ${reset_password}\"\n          exit 1\n        fi\n\n        dsn=\"postgres://${ROLE_NAME}:${password}@${HOST}/neondb\"\n        echo \"::add-mask::${dsn}\"\n        echo \"dsn=${dsn}\" >> $GITHUB_OUTPUT\n      env:\n        API_HOST: ${{ inputs.api_host }}\n        API_KEY: ${{ inputs.api_key }}\n        PROJECT_ID: ${{ inputs.project_id }}\n        BRANCH_ID: ${{ steps.create-branch.outputs.branch_id }}\n        ROLE_NAME: ${{ steps.role-name.outputs.role_name }}\n        HOST: ${{ steps.create-branch.outputs.host }}\n"
  },
  {
    "path": ".github/actions/neon-branch-delete/action.yml",
    "content": "name: 'Delete Branch'\ndescription: 'Delete Branch using API'\n\ninputs:\n  api_key:\n    description: 'Neon API key'\n    required: true\n  project_id:\n    description: 'ID of the Project which should be deleted'\n    required: true\n  branch_id:\n    description: 'ID of the branch to delete'\n    required: true\n  api_host:\n    description: 'Neon API host'\n    default: console-stage.neon.build\n\nruns:\n  using: \"composite\"\n  steps:\n    - name: Delete Branch\n      # Do not try to delete a branch if .github/actions/neon-project-create\n      # or .github/actions/neon-branch-create failed before\n      if: ${{ inputs.project_id != '' && inputs.branch_id != '' }}\n      shell: bash -euxo pipefail {0}\n      run: |\n        for i in $(seq 1 10); do\n          deleted_branch=$(curl \\\n            \"https://${API_HOST}/api/v2/projects/${PROJECT_ID}/branches/${BRANCH_ID}\" \\\n            --request DELETE \\\n            --header \"Accept: application/json\" \\\n            --header \"Content-Type: application/json\" \\\n            --header \"Authorization: Bearer ${API_KEY}\"\n            )\n\n          if [ -z \"${deleted_branch}\" ]; then\n            sleep 1\n            continue\n          fi\n\n          branch_id=$(echo $deleted_branch | jq --raw-output '.branch.id')\n          if [ \"${branch_id}\" == \"null\" ]; then\n            sleep 1\n            continue\n          fi\n\n          break\n        done\n\n        if [ -z \"${branch_id}\" ] || [ \"${branch_id}\" == \"null\" ]; then\n          echo >&2 \"Failed to delete branch after 10 attempts, the latest response was: ${deleted_branch}\"\n          exit 1\n        fi\n      env:\n        API_HOST: ${{ inputs.api_host }}\n        API_KEY: ${{ inputs.api_key }}\n        PROJECT_ID: ${{ inputs.project_id }}\n        BRANCH_ID: ${{ inputs.branch_id }}\n"
  },
  {
    "path": ".github/actions/neon-project-create/action.yml",
    "content": "name: 'Create Neon Project'\ndescription: 'Create Neon Project using API'\n\ninputs:\n  api_key:\n    description: 'Neon API key'\n    required: true\n  region_id:\n    description: 'Region ID, if not set the project will be created in the default region'\n    default: aws-us-east-2\n  postgres_version:\n    description: 'Postgres version; default is 16'\n    default: '16'\n  api_host:\n    description: 'Neon API host'\n    default: console-stage.neon.build\n  compute_units:\n    description: '[Min, Max] compute units'\n    default: '[1, 1]'\n  # settings below only needed if you want the project to be sharded from the beginning\n  shard_split_project:\n    description: 'by default new projects are not shard-split initiailly, but only when shard-split threshold is reached, specify true to explicitly shard-split initially'\n    required: false\n    default: 'false'\n  disable_sharding:\n    description: 'by default new projects use storage controller default policy to shard-split when shard-split threshold is reached, specify true to explicitly disable sharding'\n    required: false\n    default: 'false'\n  admin_api_key:\n    description: 'Admin API Key needed for shard-splitting. Must be specified if shard_split_project is true'\n    required: false\n  shard_count:\n    description: 'Number of shards to split the project into, only applies if shard_split_project is true'\n    required: false\n    default: '8'\n  stripe_size:\n    description: 'Stripe size, optional, in 8kiB pages.  e.g. set 2048 for 16MB stripes. Default is 128 MiB, only applies if shard_split_project is true'\n    required: false\n    default: '32768'\n  psql_path:\n    description: 'Path to psql binary - it is caller responsibility to provision the psql binary'\n    required: false\n    default: '/tmp/neon/pg_install/v16/bin/psql'\n  libpq_lib_path:\n    description: 'Path to directory containing libpq library - it is caller responsibility to provision the libpq library'\n    required: false\n    default: '/tmp/neon/pg_install/v16/lib'\n  project_settings:\n    description: 'A JSON object with project settings'\n    required: false\n    default: '{}'\n\noutputs:\n  dsn:\n    description: 'Created Project DSN (for main database)'\n    value: ${{ steps.create-neon-project.outputs.dsn }}\n  project_id:\n    description: 'Created Project ID'\n    value: ${{ steps.create-neon-project.outputs.project_id }}\n\nruns:\n  using: \"composite\"\n  steps:\n    - name: Create Neon Project\n      id: create-neon-project\n      # A shell without `set -x` to not to expose password/dsn in logs\n      shell: bash -euo pipefail {0}\n      run: |\n        res=$(curl \\\n          \"https://${API_HOST}/api/v2/projects\" \\\n          -w \"%{http_code}\" \\\n          --header \"Accept: application/json\" \\\n          --header \"Content-Type: application/json\" \\\n          --header \"Authorization: Bearer ${API_KEY}\" \\\n          --data \"{\n            \\\"project\\\": {\n              \\\"name\\\": \\\"Created by actions/neon-project-create; GITHUB_RUN_ID=${GITHUB_RUN_ID}\\\",\n              \\\"pg_version\\\": ${POSTGRES_VERSION},\n              \\\"region_id\\\": \\\"${REGION_ID}\\\",\n              \\\"provisioner\\\": \\\"k8s-neonvm\\\",\n              \\\"autoscaling_limit_min_cu\\\": ${MIN_CU},\n              \\\"autoscaling_limit_max_cu\\\": ${MAX_CU},\n              \\\"settings\\\": ${PROJECT_SETTINGS}\n            }\n          }\")\n        \n        code=${res: -3}\n        if [[ ${code} -ge 400 ]]; then\n          echo Request failed with error code ${code}\n          echo ${res::-3}\n          exit 1\n        else\n          project=${res::-3}\n        fi\n\n        # Mask password\n        echo \"::add-mask::$(echo $project | jq --raw-output '.roles[] | select(.name != \"web_access\") | .password')\"\n\n        dsn=$(echo $project | jq --raw-output '.connection_uris[0].connection_uri')\n        echo \"::add-mask::${dsn}\"\n        echo \"dsn=${dsn}\" >> $GITHUB_OUTPUT\n\n        project_id=$(echo $project | jq --raw-output '.project.id')\n        echo \"project_id=${project_id}\" >> $GITHUB_OUTPUT\n\n        echo \"Project ${project_id} has been created\"\n\n        if [ \"${SHARD_SPLIT_PROJECT}\" = \"true\" ]; then\n          # determine tenant ID\n          TENANT_ID=`${PSQL} ${dsn} -t -A -c \"SHOW neon.tenant_id\"`\n\n          echo \"Splitting project ${project_id} with tenant_id ${TENANT_ID} into $((SHARD_COUNT)) shards with stripe size $((STRIPE_SIZE))\"\n\n          echo \"Sending PUT request to https://${API_HOST}/regions/${REGION_ID}/api/v1/admin/storage/proxy/control/v1/tenant/${TENANT_ID}/shard_split\"\n          echo \"with body {\\\"new_shard_count\\\": $((SHARD_COUNT)), \\\"new_stripe_size\\\": $((STRIPE_SIZE))}\"\n\n          # we need an ADMIN API KEY to invoke storage controller API for shard splitting (bash -u above checks that the variable is set)\n          curl -X PUT \\\n            \"https://${API_HOST}/regions/${REGION_ID}/api/v1/admin/storage/proxy/control/v1/tenant/${TENANT_ID}/shard_split\" \\\n            -H \"Accept: application/json\" -H \"Content-Type: application/json\" -H \"Authorization: Bearer ${ADMIN_API_KEY}\" \\\n            -d \"{\\\"new_shard_count\\\": $SHARD_COUNT, \\\"new_stripe_size\\\": $STRIPE_SIZE}\"\n        fi\n        if [ \"${DISABLE_SHARDING}\" = \"true\" ]; then\n          # determine tenant ID\n          TENANT_ID=`${PSQL} ${dsn} -t -A -c \"SHOW neon.tenant_id\"`\n\n          echo \"Explicitly disabling shard-splitting for project ${project_id} with tenant_id ${TENANT_ID}\"\n\n          echo \"Sending PUT request to https://${API_HOST}/regions/${REGION_ID}/api/v1/admin/storage/proxy/control/v1/tenant/${TENANT_ID}/policy\"\n          echo \"with body {\\\"scheduling\\\": \\\"Essential\\\"}\"\n\n          # we need an ADMIN API KEY to invoke storage controller API for shard splitting (bash -u above checks that the variable is set)\n          curl -X PUT \\\n            \"https://${API_HOST}/regions/${REGION_ID}/api/v1/admin/storage/proxy/control/v1/tenant/${TENANT_ID}/policy\" \\\n            -H \"Accept: application/json\" -H \"Content-Type: application/json\" -H \"Authorization: Bearer ${ADMIN_API_KEY}\" \\\n            -d \"{\\\"scheduling\\\": \\\"Essential\\\"}\"\n        fi\n        \n\n      env:\n        API_HOST: ${{ inputs.api_host }}\n        API_KEY: ${{ inputs.api_key }}\n        REGION_ID: ${{ inputs.region_id }}\n        POSTGRES_VERSION: ${{ inputs.postgres_version }}\n        MIN_CU: ${{ fromJSON(inputs.compute_units)[0] }}\n        MAX_CU: ${{ fromJSON(inputs.compute_units)[1] }}\n        SHARD_SPLIT_PROJECT: ${{ inputs.shard_split_project }}\n        DISABLE_SHARDING: ${{ inputs.disable_sharding }}\n        ADMIN_API_KEY: ${{ inputs.admin_api_key }}\n        SHARD_COUNT: ${{ inputs.shard_count }}\n        STRIPE_SIZE: ${{ inputs.stripe_size }}\n        PSQL: ${{ inputs.psql_path }}\n        LD_LIBRARY_PATH: ${{ inputs.libpq_lib_path }}\n        PROJECT_SETTINGS: ${{ inputs.project_settings }}\n"
  },
  {
    "path": ".github/actions/neon-project-delete/action.yml",
    "content": "name: 'Delete Neon Project'\ndescription: 'Delete Neon Project using API'\n\ninputs:\n  api_key:\n    description: 'Neon API key'\n    required: true\n  project_id:\n    description: 'ID of the Project to delete'\n    required: true\n  api_host:\n    description: 'Neon API host'\n    default: console-stage.neon.build\n\nruns:\n  using: \"composite\"\n  steps:\n    - name: Delete Neon Project\n      # Do not try to delete a project if .github/actions/neon-project-create failed before\n      if: ${{ inputs.project_id != '' }}\n      shell: bash -euxo pipefail {0}\n      run: |\n        curl \\\n          \"https://${API_HOST}/api/v2/projects/${PROJECT_ID}\" \\\n          --fail \\\n          --request DELETE \\\n          --header \"Accept: application/json\" \\\n          --header \"Content-Type: application/json\" \\\n          --header \"Authorization: Bearer ${API_KEY}\"\n\n        echo \"Project ${PROJECT_ID} has been deleted\"\n      env:\n        API_HOST: ${{ inputs.api_host }}\n        API_KEY: ${{ inputs.api_key }}\n        PROJECT_ID: ${{ inputs.project_id }}\n"
  },
  {
    "path": ".github/actions/prepare-for-subzero/action.yml",
    "content": "name: 'Prepare current job for subzero'\ndescription: >\n  Set git token to access `neondatabase/subzero` from cargo build,\n  and set `CARGO_NET_GIT_FETCH_WITH_CLI=true` env variable to use git CLI\n\ninputs:\n  token:\n    description: 'GitHub token with access to neondatabase/subzero'\n    required: true\n\nruns:\n  using: \"composite\"\n\n  steps:\n    - name: Set git token for neondatabase/subzero\n      uses: pyTooling/Actions/with-post-step@2307b526df64d55e95884e072e49aac2a00a9afa # v5.1.0\n      env:\n        SUBZERO_ACCESS_TOKEN: ${{ inputs.token }}\n      with:\n        main: |\n          git config --global url.\"https://x-access-token:${SUBZERO_ACCESS_TOKEN}@github.com/neondatabase/subzero\".insteadOf \"https://github.com/neondatabase/subzero\"\n          cargo add -p proxy subzero-core --git https://github.com/neondatabase/subzero --rev 396264617e78e8be428682f87469bb25429af88a\n        post: |\n          git config --global --unset url.\"https://x-access-token:${SUBZERO_ACCESS_TOKEN}@github.com/neondatabase/subzero\".insteadOf \"https://github.com/neondatabase/subzero\"\n\n    - name: Set `CARGO_NET_GIT_FETCH_WITH_CLI=true` env variable\n      shell: bash -euxo pipefail {0}\n      run: echo \"CARGO_NET_GIT_FETCH_WITH_CLI=true\" >> ${GITHUB_ENV}\n"
  },
  {
    "path": ".github/actions/run-python-test-set/action.yml",
    "content": "name: 'Run python test'\ndescription: 'Runs a Neon python test set, performing all the required preparations before'\n\ninputs:\n  build_type:\n    description: 'Type of Rust (neon) and C (postgres) builds. Must be \"release\" or \"debug\", or \"remote\" for the remote cluster'\n    required: true\n  test_selection:\n    description: 'A python test suite to run'\n    required: true\n  extra_params:\n    description: 'Arbitrary parameters to pytest. For example \"-s\" to prevent capturing stdout/stderr'\n    required: false\n    default: ''\n  needs_postgres_source:\n    description: 'Set to true if the test suite requires postgres source checked out'\n    required: false\n    default: 'false'\n  run_in_parallel:\n    description: 'Whether to run tests in parallel'\n    required: false\n    default: 'true'\n  save_perf_report:\n    description: 'Whether to upload the performance report, if true PERF_TEST_RESULT_CONNSTR env variable should be set'\n    required: false\n    default: 'false'\n  run_with_real_s3:\n    description: 'Whether to pass real s3 credentials to the test suite'\n    required: false\n    default: 'false'\n  real_s3_bucket:\n    description: 'Bucket name for real s3 tests'\n    required: false\n    default: ''\n  real_s3_region:\n    description: 'Region name for real s3 tests'\n    required: false\n    default: ''\n  rerun_failed:\n    description: 'Whether to rerun failed tests'\n    required: false\n    default: 'false'\n  pg_version:\n    description: 'Postgres version to use for tests'\n    required: false\n    default: 'v16'\n  sanitizers:\n    description: 'enabled or disabled'\n    required: false\n    default: 'disabled'\n    type: string\n  benchmark_durations:\n    description: 'benchmark durations JSON'\n    required: false\n    default: '{}'\n  aws-oidc-role-arn:\n    description: 'OIDC role arn to interract with S3'\n    required: true\n\nruns:\n  using: \"composite\"\n  steps:\n    - name: Get Neon artifact\n      if: inputs.build_type != 'remote'\n      uses: ./.github/actions/download\n      with:\n        name: neon-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build_type }}${{ inputs.sanitizers == 'enabled' && '-sanitized' || '' }}-artifact\n        path: /tmp/neon\n        aws-oidc-role-arn: ${{ inputs.aws-oidc-role-arn }}\n\n    - name: Download Neon binaries for the previous release\n      if: inputs.build_type != 'remote'\n      uses: ./.github/actions/download\n      with:\n        name: neon-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build_type }}-artifact\n        path: /tmp/neon-previous\n        prefix: latest\n        aws-oidc-role-arn: ${{ inputs.aws-oidc-role-arn }}\n\n    - name: Download compatibility snapshot\n      if: inputs.build_type != 'remote'\n      uses: ./.github/actions/download\n      with:\n        name: compatibility-snapshot-${{ runner.arch }}-${{ inputs.build_type }}-pg${{ inputs.pg_version }}\n        path: /tmp/compatibility_snapshot_pg${{ inputs.pg_version }}\n        prefix: latest\n        # The lack of compatibility snapshot (for example, for the new Postgres version)\n        # shouldn't fail the whole job. Only relevant test should fail.\n        skip-if-does-not-exist: true\n        aws-oidc-role-arn: ${{ inputs.aws-oidc-role-arn }}\n\n    - name: Checkout\n      if: inputs.needs_postgres_source == 'true'\n      uses: actions/checkout@v4\n      with:\n        submodules: true\n\n    - name: Cache poetry deps\n      uses: actions/cache@v4\n      with:\n        path: ~/.cache/pypoetry/virtualenvs\n        key: v2-${{ runner.os }}-${{ runner.arch }}-python-deps-bookworm-${{ hashFiles('poetry.lock') }}\n\n    - name: Install Python deps\n      shell: bash -euxo pipefail {0}\n      run: ./scripts/pysync\n\n    - name: Run pytest\n      env:\n        NEON_BIN: /tmp/neon/bin\n        COMPATIBILITY_NEON_BIN: /tmp/neon-previous/bin\n        COMPATIBILITY_POSTGRES_DISTRIB_DIR: /tmp/neon-previous/pg_install\n        TEST_OUTPUT: /tmp/test_output\n        BUILD_TYPE: ${{ inputs.build_type }}\n        COMPATIBILITY_SNAPSHOT_DIR: /tmp/compatibility_snapshot_pg${{ inputs.pg_version }}\n        RERUN_FAILED: ${{ inputs.rerun_failed }}\n        PG_VERSION: ${{ inputs.pg_version }}\n        SANITIZERS: ${{ inputs.sanitizers }}\n      shell: bash -euxo pipefail {0}\n      run: |\n        # PLATFORM will be embedded in the perf test report\n        # and it is needed to distinguish different environments\n        export PLATFORM=${PLATFORM:-github-actions-selfhosted}\n        export POSTGRES_DISTRIB_DIR=${POSTGRES_DISTRIB_DIR:-/tmp/neon/pg_install}\n        export DEFAULT_PG_VERSION=${PG_VERSION#v}\n        export LD_LIBRARY_PATH=${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/lib\n        export BENCHMARK_CONNSTR=${BENCHMARK_CONNSTR:-}\n        export ASAN_OPTIONS=detect_leaks=0:detect_stack_use_after_return=0:abort_on_error=1:strict_string_checks=1:check_initialization_order=1:strict_init_order=1\n        export UBSAN_OPTIONS=abort_on_error=1:print_stacktrace=1\n\n        if [ \"${BUILD_TYPE}\" = \"remote\" ]; then\n          export REMOTE_ENV=1\n        fi\n\n        PERF_REPORT_DIR=\"$(realpath test_runner/perf-report-local)\"\n        echo \"PERF_REPORT_DIR=${PERF_REPORT_DIR}\" >> ${GITHUB_ENV}\n        rm -rf $PERF_REPORT_DIR\n\n        TEST_SELECTION=\"test_runner/${{ inputs.test_selection }}\"\n        EXTRA_PARAMS=\"${{ inputs.extra_params }}\"\n        if [ -z \"$TEST_SELECTION\" ]; then\n          echo \"test_selection must be set\"\n          exit 1\n        fi\n        if [[ \"${{ inputs.run_in_parallel }}\" == \"true\" ]]; then\n          # -n sets the number of parallel processes that pytest-xdist will run\n          EXTRA_PARAMS=\"-n12 $EXTRA_PARAMS\"\n\n          # --dist=loadgroup points tests marked with @pytest.mark.xdist_group\n          # to the same worker to make @pytest.mark.order work with xdist\n          EXTRA_PARAMS=\"--dist=loadgroup $EXTRA_PARAMS\"\n        fi\n\n        if [[ \"${{ inputs.run_with_real_s3 }}\" == \"true\" ]]; then\n          echo \"REAL S3 ENABLED\"\n          export ENABLE_REAL_S3_REMOTE_STORAGE=nonempty\n          export REMOTE_STORAGE_S3_BUCKET=${{ inputs.real_s3_bucket }}\n          export REMOTE_STORAGE_S3_REGION=${{ inputs.real_s3_region }}\n        fi\n\n        if [[ \"${{ inputs.save_perf_report }}\" == \"true\" ]]; then\n          mkdir -p \"$PERF_REPORT_DIR\"\n          EXTRA_PARAMS=\"--out-dir $PERF_REPORT_DIR $EXTRA_PARAMS\"\n        fi\n\n        if [ \"${RERUN_FAILED}\" == \"true\" ]; then\n          EXTRA_PARAMS=\"--reruns 2 $EXTRA_PARAMS\"\n        fi\n\n        # We use pytest-split plugin to run benchmarks in parallel on different CI runners\n        if [ \"${TEST_SELECTION}\" = \"test_runner/performance\" ] && [ \"${{ inputs.build_type }}\" != \"remote\" ]; then\n          mkdir -p $TEST_OUTPUT\n          echo '${{ inputs.benchmark_durations || '{}' }}' > $TEST_OUTPUT/benchmark_durations.json\n\n          EXTRA_PARAMS=\"--durations-path $TEST_OUTPUT/benchmark_durations.json $EXTRA_PARAMS\"\n        fi\n\n        if [[ $BUILD_TYPE == \"debug\" && $RUNNER_ARCH == 'X64' ]]; then\n          # We don't use code coverage for regression tests (the step is disabled),\n          # so there's no need to collect it.\n          # Ref https://github.com/neondatabase/neon/issues/4540\n          # cov_prefix=(scripts/coverage \"--profraw-prefix=$GITHUB_JOB\" --dir=/tmp/coverage run)\n          cov_prefix=()\n          # Explicitly set LLVM_PROFILE_FILE to /dev/null to avoid writing *.profraw files\n          export LLVM_PROFILE_FILE=/dev/null\n        else\n          cov_prefix=()\n        fi\n\n        # Wake up the cluster if we use remote neon instance\n        if [ \"${{ inputs.build_type }}\" = \"remote\" ] && [ -n \"${BENCHMARK_CONNSTR}\" ]; then\n          QUERIES=(\"SELECT version()\")\n          if [[ \"${PLATFORM}\" = \"neon\"* ]]; then\n            QUERIES+=(\"SHOW neon.tenant_id\")\n            QUERIES+=(\"SHOW neon.timeline_id\")\n          fi\n\n          for q in \"${QUERIES[@]}\"; do\n            ${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin/psql ${BENCHMARK_CONNSTR} -c \"${q}\"\n          done\n        fi\n\n        # Run the tests.\n        #\n        # --alluredir saves test results in Allure format (in a specified directory)\n        # --verbose prints name of each test (helpful when there are\n        # multiple tests in one file)\n        # -rA prints summary in the end\n        # -s is not used to prevent pytest from capturing output, because tests are running\n        # in parallel and logs are mixed between different tests\n        #\n        mkdir -p $TEST_OUTPUT/allure/results\n        \"${cov_prefix[@]}\" ./scripts/pytest \\\n          --alluredir=$TEST_OUTPUT/allure/results \\\n          --tb=short \\\n          --verbose \\\n          -rA $TEST_SELECTION $EXTRA_PARAMS\n\n    - name: Upload performance report\n      if: ${{ !cancelled() && inputs.save_perf_report == 'true' }}\n      shell: bash -euxo pipefail {0}\n      run: |\n        export REPORT_FROM=\"${PERF_REPORT_DIR}\"\n        scripts/generate_and_push_perf_report.sh\n\n    - name: Upload compatibility snapshot\n      # Note, that we use `github.base_ref` which is a target branch for a PR\n      if: github.event_name == 'pull_request' && github.base_ref == 'release'\n      uses: ./.github/actions/upload\n      with:\n        name: compatibility-snapshot-${{ runner.arch }}-${{ inputs.build_type }}-pg${{ inputs.pg_version }}\n        # Directory is created by test_compatibility.py::test_create_snapshot, keep the path in sync with the test\n        path: /tmp/test_output/compatibility_snapshot_pg${{ inputs.pg_version }}/\n        # The lack of compatibility snapshot shouldn't fail the job\n        # (for example if we didn't run the test for non build-and-test workflow)\n        skip-if-does-not-exist: true\n        aws-oidc-role-arn: ${{ inputs.aws-oidc-role-arn }}\n\n    - uses: aws-actions/configure-aws-credentials@v4\n      if: ${{ !cancelled() }}\n      with:\n        aws-region: eu-central-1\n        role-to-assume: ${{ inputs.aws-oidc-role-arn }}\n        role-duration-seconds: 3600 # 1 hour should be more than enough to upload report\n\n    - name: Upload test results\n      if: ${{ !cancelled() }}\n      uses: ./.github/actions/allure-report-store\n      with:\n        report-dir: /tmp/test_output/allure/results\n        unique-key: ${{ inputs.build_type }}-${{ inputs.pg_version }}-${{ runner.arch }}\n        aws-oidc-role-arn: ${{ inputs.aws-oidc-role-arn }}\n"
  },
  {
    "path": ".github/actions/save-coverage-data/action.yml",
    "content": "name: 'Merge and upload coverage data'\ndescription: 'Compresses and uploads the coverage data as an artifact'\n\nruns:\n  using: \"composite\"\n  steps:\n    - name: Merge coverage data\n      shell: bash -euxo pipefail {0}\n      run: scripts/coverage \"--profraw-prefix=$GITHUB_JOB\" --dir=/tmp/coverage merge\n\n    - name: Download previous coverage data into the same directory\n      uses: ./.github/actions/download\n      with:\n        name: coverage-data-artifact\n        path: /tmp/coverage\n        skip-if-does-not-exist: true # skip if there's no previous coverage to download\n        aws-oidc-role-arn: ${{ inputs.aws-oidc-role-arn }}\n\n    - name: Upload coverage data\n      uses: ./.github/actions/upload\n      with:\n        name: coverage-data-artifact\n        path: /tmp/coverage\n        aws-oidc-role-arn: ${{ inputs.aws-oidc-role-arn }}\n"
  },
  {
    "path": ".github/actions/upload/action.yml",
    "content": "name: \"Upload an artifact\"\ndescription: \"Custom upload action\"\ninputs:\n  name:\n    description: \"Artifact name\"\n    required: true\n  path:\n    description: \"A directory or file to upload\"\n    required: true\n  skip-if-does-not-exist:\n    description: \"Allow to skip if path doesn't exist, fail otherwise\"\n    default: false\n    required: false\n  prefix:\n    description: \"S3 prefix. Default is '${GITHUB_SHA}/${GITHUB_RUN_ID}/${GITHUB_RUN_ATTEMPT}'\"\n    required: false\n  aws-oidc-role-arn:\n    description: \"the OIDC role arn for aws auth\"\n    required: false\n    default: \"\"\n\nruns:\n  using: \"composite\"\n  steps:\n    - name: Prepare artifact\n      id: prepare-artifact\n      shell: bash -euxo pipefail {0}\n      env:\n        SOURCE: ${{ inputs.path }}\n        ARCHIVE: /tmp/uploads/${{ inputs.name }}.tar.zst\n        SKIP_IF_DOES_NOT_EXIST: ${{ inputs.skip-if-does-not-exist }}\n      run: |\n        mkdir -p $(dirname $ARCHIVE)\n\n        if [ -f ${ARCHIVE} ]; then\n          echo >&2 \"File ${ARCHIVE} already exist. Something went wrong before\"\n          exit 1\n        fi\n\n        ZSTD_NBTHREADS=0\n        if [ -d  ${SOURCE} ]; then\n          time tar -C ${SOURCE} -cf ${ARCHIVE} --zstd .\n        elif [ -f ${SOURCE} ]; then\n          time tar -cf ${ARCHIVE} --zstd ${SOURCE}\n        elif ! ls ${SOURCE} > /dev/null 2>&1; then\n          if [ \"${SKIP_IF_DOES_NOT_EXIST}\" = \"true\" ]; then\n            echo 'SKIPPED=true' >> $GITHUB_OUTPUT\n            exit 0\n          else\n            echo >&2 \"${SOURCE} does not exist\"\n            exit 2\n          fi\n        else\n          echo >&2 \"${SOURCE} is neither a directory nor a file, do not know how to handle it\"\n          exit 3\n        fi\n\n        echo 'SKIPPED=false' >> $GITHUB_OUTPUT\n\n    - name: Configure AWS credentials\n      uses: aws-actions/configure-aws-credentials@v4\n      with:\n        aws-region: eu-central-1\n        role-to-assume: ${{ inputs.aws-oidc-role-arn }}\n        role-duration-seconds: 3600\n\n    - name: Upload artifact\n      if: ${{ steps.prepare-artifact.outputs.SKIPPED == 'false' }}\n      shell: bash -euxo pipefail {0}\n      env:\n        SOURCE: ${{ inputs.path }}\n        ARCHIVE: /tmp/uploads/${{ inputs.name }}.tar.zst\n        PREFIX: artifacts/${{ inputs.prefix || format('{0}/{1}/{2}', github.event.pull_request.head.sha || github.sha, github.run_id , github.run_attempt) }}\n      run: |\n        BUCKET=neon-github-public-dev\n        FILENAME=$(basename $ARCHIVE)\n\n        FILESIZE=$(du -sh ${ARCHIVE} | cut -f1)\n\n        time aws s3 mv --only-show-errors ${ARCHIVE} s3://${BUCKET}/${PREFIX}/${FILENAME}\n\n        # Ref https://docs.github.com/en/actions/using-workflows/workflow-commands-for-github-actions#adding-a-job-summary\n        echo \"[${FILENAME}](https://${BUCKET}.s3.amazonaws.com/${PREFIX}/${FILENAME}) ${FILESIZE}\" >> ${GITHUB_STEP_SUMMARY}\n"
  },
  {
    "path": ".github/file-filters.yaml",
    "content": "rust_code: ['**/*.rs', '**/Cargo.toml', '**/Cargo.lock']\nrust_dependencies: ['**/Cargo.lock']\n\nv14: ['vendor/postgres-v14/**', 'Makefile', 'pgxn/**']\nv15: ['vendor/postgres-v15/**', 'Makefile', 'pgxn/**']\nv16: ['vendor/postgres-v16/**', 'Makefile', 'pgxn/**']\nv17: ['vendor/postgres-v17/**', 'Makefile', 'pgxn/**']\n\nrebuild_neon_extra:\n    - .github/workflows/neon_extra_builds.yml\n\nrebuild_macos:\n    - .github/workflows/build-macos.yml\n"
  },
  {
    "path": ".github/pull_request_template.md",
    "content": "## Problem\n\n## Summary of changes\n"
  },
  {
    "path": ".github/scripts/generate_image_maps.py",
    "content": "import itertools\nimport json\nimport os\nimport sys\n\nsource_tag = os.getenv(\"SOURCE_TAG\")\ntarget_tag = os.getenv(\"TARGET_TAG\")\nbranch = os.getenv(\"BRANCH\")\ndev_acr = os.getenv(\"DEV_ACR\")\nprod_acr = os.getenv(\"PROD_ACR\")\ndev_aws = os.getenv(\"DEV_AWS\")\nprod_aws = os.getenv(\"PROD_AWS\")\naws_region = os.getenv(\"AWS_REGION\")\n\ncomponents = {\n    \"neon\": [\"neon\"],\n    \"compute\": [\n        \"compute-node-v14\",\n        \"compute-node-v15\",\n        \"compute-node-v16\",\n        \"compute-node-v17\",\n        \"vm-compute-node-v14\",\n        \"vm-compute-node-v15\",\n        \"vm-compute-node-v16\",\n        \"vm-compute-node-v17\",\n    ],\n}\n\nregistries = {\n    \"dev\": [\n        \"docker.io/neondatabase\",\n        \"ghcr.io/neondatabase\",\n        f\"{dev_aws}.dkr.ecr.{aws_region}.amazonaws.com\",\n        f\"{dev_acr}.azurecr.io/neondatabase\",\n    ],\n    \"prod\": [\n        f\"{prod_aws}.dkr.ecr.{aws_region}.amazonaws.com\",\n        f\"{prod_acr}.azurecr.io/neondatabase\",\n    ],\n}\n\nrelease_branches = [\"release\", \"release-proxy\", \"release-compute\"]\n\noutputs: dict[str, dict[str, list[str]]] = {}\n\ntarget_tags = (\n    [target_tag, \"latest\"]\n    if branch == \"main\"\n    else [target_tag, \"released\"]\n    if branch in release_branches\n    else [target_tag]\n)\ntarget_stages = [\"dev\", \"prod\"] if branch in release_branches else [\"dev\"]\n\nfor component_name, component_images in components.items():\n    for stage in target_stages:\n        outputs[f\"{component_name}-{stage}\"] = {\n            f\"ghcr.io/neondatabase/{component_image}:{source_tag}\": [\n                f\"{registry}/{component_image}:{tag}\"\n                for registry, tag in itertools.product(registries[stage], target_tags)\n                if not (registry == \"ghcr.io/neondatabase\" and tag == source_tag)\n            ]\n            for component_image in component_images\n        }\n\nwith open(os.getenv(\"GITHUB_OUTPUT\", \"/dev/null\"), \"a\") as f:\n    for key, value in outputs.items():\n        f.write(f\"{key}={json.dumps(value)}\\n\")\n        print(f\"Image map for {key}:\\n{json.dumps(value, indent=2)}\\n\\n\", file=sys.stderr)\n"
  },
  {
    "path": ".github/scripts/lint-release-pr.sh",
    "content": "#!/usr/bin/env bash\n\nset -euo pipefail\n\nDOCS_URL=\"https://docs.neon.build/overview/repositories/neon.html\"\n\nmessage() {\n  if [[ -n \"${GITHUB_PR_NUMBER:-}\" ]]; then\n    gh pr comment --repo \"${GITHUB_REPOSITORY}\" \"${GITHUB_PR_NUMBER}\" --edit-last --body \"$1\" \\\n      || gh pr comment --repo \"${GITHUB_REPOSITORY}\" \"${GITHUB_PR_NUMBER}\" --body \"$1\"\n  fi\n  echo \"$1\"\n}\n\nreport_error() {\n  message \"❌ $1\n  For more details, see the documentation: ${DOCS_URL}\"\n\n  exit 1\n}\n\ncase \"$RELEASE_BRANCH\" in\n  \"release\") COMPONENT=\"Storage\" ;;\n  \"release-proxy\") COMPONENT=\"Proxy\" ;;\n  \"release-compute\") COMPONENT=\"Compute\" ;;\n  *)\n    report_error \"Unknown release branch: ${RELEASE_BRANCH}\"\n    ;;\nesac\n\n\n# Identify main and release branches\nMAIN_BRANCH=\"origin/main\"\nREMOTE_RELEASE_BRANCH=\"origin/${RELEASE_BRANCH}\"\n\n# Find merge base\nMERGE_BASE=$(git merge-base \"${MAIN_BRANCH}\" \"${REMOTE_RELEASE_BRANCH}\")\necho \"Merge base of ${MAIN_BRANCH} and ${RELEASE_BRANCH}: ${MERGE_BASE}\"\n\n# Get the HEAD commit (last commit in PR, expected to be the merge commit)\nLAST_COMMIT=$(git rev-parse HEAD)\n\nMERGE_COMMIT_MESSAGE=$(git log -1 --format=%s \"${LAST_COMMIT}\")\nEXPECTED_MESSAGE_REGEX=\"^$COMPONENT release [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2} UTC$\"\n\nif ! [[ \"${MERGE_COMMIT_MESSAGE}\" =~ ${EXPECTED_MESSAGE_REGEX} ]]; then\n  report_error \"Merge commit message does not match expected pattern: '<component> release YYYY-MM-DD'\n  Expected component: ${COMPONENT}\n  Found: '${MERGE_COMMIT_MESSAGE}'\"\nfi\necho \"✅ Merge commit message is correctly formatted: '${MERGE_COMMIT_MESSAGE}'\"\n\nLAST_COMMIT_PARENTS=$(git cat-file -p \"${LAST_COMMIT}\" | jq -sR '[capture(\"parent (?<parent>[0-9a-f]{40})\"; \"g\") | .parent]')\n\nif [[ \"$(echo \"${LAST_COMMIT_PARENTS}\" | jq 'length')\" -ne 2 ]]; then\n  report_error \"Last commit must be a merge commit with exactly two parents\"\nfi\n\nEXPECTED_RELEASE_HEAD=$(git rev-parse \"${REMOTE_RELEASE_BRANCH}\")\nif echo \"${LAST_COMMIT_PARENTS}\" | jq -e --arg rel \"${EXPECTED_RELEASE_HEAD}\" 'index($rel) != null' > /dev/null; then\n  LINEAR_HEAD=$(echo \"${LAST_COMMIT_PARENTS}\" | jq -r '[.[] | select(. != $rel)][0]' --arg rel \"${EXPECTED_RELEASE_HEAD}\")\nelse\n  report_error \"Last commit must merge the release branch (${RELEASE_BRANCH})\"\nfi\necho \"✅ Last commit correctly merges the previous commit and the release branch\"\necho \"Top commit of linear history: ${LINEAR_HEAD}\"\n\nMERGE_COMMIT_TREE=$(git rev-parse \"${LAST_COMMIT}^{tree}\")\nLINEAR_HEAD_TREE=$(git rev-parse \"${LINEAR_HEAD}^{tree}\")\n\nif [[ \"${MERGE_COMMIT_TREE}\" != \"${LINEAR_HEAD_TREE}\" ]]; then\n  report_error \"Tree of merge commit (${MERGE_COMMIT_TREE}) does not match tree of linear history head (${LINEAR_HEAD_TREE})\n  This indicates that the merge of ${RELEASE_BRANCH} into this branch was not performed using the merge strategy 'ours'\"\nfi\necho \"✅ Merge commit tree matches the linear history head\"\n\nEXPECTED_PREVIOUS_COMMIT=\"${LINEAR_HEAD}\"\n\n# Now traverse down the history, ensuring each commit has exactly one parent\nCURRENT_COMMIT=\"${EXPECTED_PREVIOUS_COMMIT}\"\nwhile [[ \"${CURRENT_COMMIT}\" != \"${MERGE_BASE}\" && \"${CURRENT_COMMIT}\" != \"${EXPECTED_RELEASE_HEAD}\" ]]; do\n  CURRENT_COMMIT_PARENTS=$(git cat-file -p \"${CURRENT_COMMIT}\" | jq -sR '[capture(\"parent (?<parent>[0-9a-f]{40})\"; \"g\") | .parent]')\n\n  if [[ \"$(echo \"${CURRENT_COMMIT_PARENTS}\" | jq 'length')\" -ne 1 ]]; then\n    report_error \"Commit ${CURRENT_COMMIT} must have exactly one parent\"\n  fi\n\n  NEXT_COMMIT=$(echo \"${CURRENT_COMMIT_PARENTS}\" | jq -r '.[0]')\n\n  if [[ \"${NEXT_COMMIT}\" == \"${MERGE_BASE}\" ]]; then\n    echo \"✅ Reached merge base (${MERGE_BASE})\"\n    PR_BASE=\"${MERGE_BASE}\"\n  elif [[ \"${NEXT_COMMIT}\" == \"${EXPECTED_RELEASE_HEAD}\" ]]; then\n    echo \"✅ Reached release branch (${EXPECTED_RELEASE_HEAD})\"\n    PR_BASE=\"${EXPECTED_RELEASE_HEAD}\"\n  elif [[ -z \"${NEXT_COMMIT}\" ]]; then\n    report_error \"Unexpected end of commit history before reaching merge base\"\n  fi\n\n  # Move to the next commit in the chain\n  CURRENT_COMMIT=\"${NEXT_COMMIT}\"\ndone\n\necho \"✅ All commits are properly ordered and linear\"\necho \"✅ Release PR structure is valid\"\n\necho\n\nmessage \"Commits that are part of this release:\n$(git log --oneline \"${PR_BASE}..${LINEAR_HEAD}\")\"\n"
  },
  {
    "path": ".github/scripts/previous-releases.jq",
    "content": "# Expects response from https://docs.github.com/en/rest/releases/releases?apiVersion=2022-11-28#list-releases as input,\n# with tag names `release` for storage, `release-compute` for compute and `release-proxy` for proxy releases.\n# Extract only the `tag_name` field from each release object\n[ .[].tag_name ]\n\n# Transform each tag name into a structured object using regex capture\n| reduce map(\n    capture(\"^(?<full>release(-(?<component>proxy|compute))?-(?<version>\\\\d+))$\")\n    | {\n        component: (.component // \"storage\"),  # Default to \"storage\" if no component is specified\n        version: (.version | tonumber),        # Convert the version number to an integer\n        full: .full                            # Store the full tag name for final output\n      }\n  )[] as $entry  # Loop over the transformed list\n\n# Accumulate the latest (highest-numbered) version for each component\n({};\n .[$entry.component] |= (if . == null or $entry.version > .version then $entry else . end))\n\n# Ensure that each component exists, or fail\n| ([\"storage\", \"compute\", \"proxy\"] - (keys)) as $missing\n| if ($missing | length) > 0 then\n    \"Error: Found no release for \\($missing | join(\", \"))!\\n\" | halt_error(1)\n  else . end\n\n# Convert the resulting object into an array of formatted strings\n| to_entries\n| map(\"\\(.key)=\\(.value.full)\")\n\n# Output each string separately\n| .[]\n"
  },
  {
    "path": ".github/scripts/push_with_image_map.py",
    "content": "import json\nimport os\nimport subprocess\n\nRED = \"\\033[91m\"\nRESET = \"\\033[0m\"\n\nimage_map = os.getenv(\"IMAGE_MAP\")\nif not image_map:\n    raise ValueError(\"IMAGE_MAP environment variable is not set\")\n\ntry:\n    parsed_image_map: dict[str, list[str]] = json.loads(image_map)\nexcept json.JSONDecodeError as e:\n    raise ValueError(\"Failed to parse IMAGE_MAP as JSON\") from e\n\nfailures = []\n\npending = [(source, target) for source, targets in parsed_image_map.items() for target in targets]\n\nwhile len(pending) > 0:\n    if len(failures) > 10:\n        print(\"Error: more than 10 failures!\")\n        for failure in failures:\n            print(f'\"{failure[0]}\" failed with the following output:')\n            print(failure[1])\n        raise RuntimeError(\"Retry limit reached.\")\n\n    source, target = pending.pop(0)\n    cmd = [\"docker\", \"buildx\", \"imagetools\", \"create\", \"-t\", target, source]\n    print(f\"Running: {' '.join(cmd)}\")\n    result = subprocess.run(cmd, text=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)\n\n    if result.returncode != 0:\n        failures.append((\" \".join(cmd), result.stdout, target))\n        pending.append((source, target))\n        print(\n            f\"{RED}[RETRY]{RESET} Push failed for {target}. Retrying... (failure count: {len(failures)})\"\n        )\n        print(result.stdout)\n\nif len(failures) > 0 and (github_output := os.getenv(\"GITHUB_OUTPUT\")):\n    failed_targets = [target for _, _, target in failures]\n    with open(github_output, \"a\") as f:\n        f.write(f\"push_failures={json.dumps(failed_targets)}\\n\")\n"
  },
  {
    "path": ".github/workflows/_benchmarking_preparation.yml",
    "content": "name: Prepare benchmarking databases by restoring dumps\n\non:\n  workflow_call:\n    # no inputs needed\n\ndefaults:\n  run:\n    shell: bash -euxo pipefail {0}\n\npermissions:\n  contents: read\n\njobs:\n  setup-databases:\n    permissions:\n      contents: write\n      statuses: write\n      id-token: write # aws-actions/configure-aws-credentials\n    strategy:\n      fail-fast: false\n      matrix:\n        platform: [ aws-rds-postgres, aws-aurora-serverless-v2-postgres, neon, neon_pg17 ]\n        database: [ clickbench, tpch, userexample ]\n\n    env:\n      LD_LIBRARY_PATH: /tmp/neon/pg_install/v16/lib\n      PLATFORM: ${{ matrix.platform }}\n      PG_BINARIES: /tmp/neon/pg_install/v16/bin\n\n    runs-on: [ self-hosted, us-east-2, x64 ]\n    container:\n      image: ghcr.io/neondatabase/build-tools:pinned-bookworm\n      credentials:\n        username: ${{ github.actor }}\n        password: ${{ secrets.GITHUB_TOKEN }}\n      options: --init\n\n    steps:\n    - name: Harden the runner (Audit all outbound calls)\n      uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0\n      with:\n        egress-policy: audit\n\n    - name: Set up Connection String\n      id: set-up-prep-connstr\n      run: |\n        case \"${PLATFORM}\" in\n          neon)\n            CONNSTR=${{ secrets.BENCHMARK_CAPTEST_CONNSTR }}\n            ;;\n          neon_pg17)\n            CONNSTR=${{ secrets.BENCHMARK_CAPTEST_CONNSTR_PG17 }}\n            ;;\n          aws-rds-postgres)\n            CONNSTR=${{ secrets.BENCHMARK_RDS_POSTGRES_CONNSTR }}\n            ;;\n          aws-aurora-serverless-v2-postgres)\n            CONNSTR=${{ secrets.BENCHMARK_RDS_AURORA_CONNSTR }}\n            ;;\n          *)\n            echo >&2 \"Unknown PLATFORM=${PLATFORM}\"\n            exit 1\n            ;;\n        esac\n\n        echo \"connstr=${CONNSTR}\" >> $GITHUB_OUTPUT\n\n    - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2\n\n    - name: Configure AWS credentials\n      uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # v4.0.2\n      with:\n        aws-region: eu-central-1\n        role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}\n        role-duration-seconds: 18000 # 5 hours\n\n    - name: Download Neon artifact\n      uses: ./.github/actions/download\n      with:\n        name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact\n        path: /tmp/neon/\n        prefix: latest\n        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}\n\n    # we create a table that has one row for each database that we want to restore with the status whether the restore is done\n    - name: Create benchmark_restore_status table if it does not exist\n      env:\n        BENCHMARK_CONNSTR: ${{ steps.set-up-prep-connstr.outputs.connstr }}\n        DATABASE_NAME: ${{ matrix.database }}\n      # to avoid a race condition of multiple jobs trying to create the table at the same time,\n      # we use an advisory lock\n      run: |\n        ${PG_BINARIES}/psql \"${{ env.BENCHMARK_CONNSTR }}\" -c \"\n        SELECT pg_advisory_lock(4711);\n        CREATE TABLE IF NOT EXISTS benchmark_restore_status (\n        databasename text primary key,\n        restore_done boolean\n        );\n        SELECT pg_advisory_unlock(4711);\n        \"\n\n    - name: Check if restore is already done\n      id: check-restore-done\n      env:\n        BENCHMARK_CONNSTR: ${{ steps.set-up-prep-connstr.outputs.connstr }}\n        DATABASE_NAME: ${{ matrix.database }}\n      run: |\n        skip=false\n        if ${PG_BINARIES}/psql \"${{ env.BENCHMARK_CONNSTR }}\" -tAc \"SELECT 1 FROM benchmark_restore_status WHERE databasename='${{ env.DATABASE_NAME }}' AND restore_done=true;\" | grep -q 1; then\n          echo \"Restore already done for database ${{ env.DATABASE_NAME }} on platform ${{ env.PLATFORM }}. Skipping this database.\"\n          skip=true\n        fi\n        echo \"skip=${skip}\" | tee -a $GITHUB_OUTPUT\n\n    - name: Check and create database if it does not exist\n      if: steps.check-restore-done.outputs.skip != 'true'\n      env:\n        BENCHMARK_CONNSTR: ${{ steps.set-up-prep-connstr.outputs.connstr }}\n        DATABASE_NAME: ${{ matrix.database }}\n      run: |\n        DB_EXISTS=$(${PG_BINARIES}/psql \"${{ env.BENCHMARK_CONNSTR }}\" -tAc \"SELECT 1 FROM pg_database WHERE datname='${{ env.DATABASE_NAME }}'\")\n        if [ \"$DB_EXISTS\" != \"1\" ]; then\n          echo \"Database ${{ env.DATABASE_NAME }} does not exist. Creating it...\"\n          ${PG_BINARIES}/psql \"${{ env.BENCHMARK_CONNSTR }}\" -c \"CREATE DATABASE \\\"${{ env.DATABASE_NAME }}\\\";\"\n        else\n          echo \"Database ${{ env.DATABASE_NAME }} already exists.\"\n        fi\n\n    - name: Download dump from S3 to /tmp/dumps\n      if: steps.check-restore-done.outputs.skip != 'true'\n      env:\n        DATABASE_NAME: ${{ matrix.database }}\n      run: |\n        mkdir -p /tmp/dumps\n        aws s3 cp s3://neon-github-dev/performance/pgdumps/$DATABASE_NAME/$DATABASE_NAME.pg_dump /tmp/dumps/\n\n    - name: Replace database name in connection string\n      if: steps.check-restore-done.outputs.skip != 'true'\n      id: replace-dbname\n      env:\n        DATABASE_NAME: ${{ matrix.database }}\n        BENCHMARK_CONNSTR: ${{ steps.set-up-prep-connstr.outputs.connstr }}\n      run: |\n        # Extract the part before the database name\n        base_connstr=\"${BENCHMARK_CONNSTR%/*}\"\n        # Extract the query parameters (if any) after the database name\n        query_params=\"${BENCHMARK_CONNSTR#*\\?}\"\n        # Reconstruct the new connection string\n        if [ \"$query_params\" != \"$BENCHMARK_CONNSTR\" ]; then\n          new_connstr=\"${base_connstr}/${DATABASE_NAME}?${query_params}\"\n        else\n          new_connstr=\"${base_connstr}/${DATABASE_NAME}\"\n        fi\n        echo \"database_connstr=${new_connstr}\" >> $GITHUB_OUTPUT\n\n    - name: Restore dump\n      if: steps.check-restore-done.outputs.skip != 'true'\n      env:\n        DATABASE_NAME: ${{ matrix.database }}\n        DATABASE_CONNSTR: ${{ steps.replace-dbname.outputs.database_connstr }}\n        # the following works only with larger computes:\n        # PGOPTIONS: \"-c maintenance_work_mem=8388608 -c max_parallel_maintenance_workers=7\"\n        # we add the || true because:\n        # the dumps were created with Neon and contain neon extensions that are not\n        # available in RDS, so we will always report an error, but we can ignore it\n      run: |\n        ${PG_BINARIES}/pg_restore --clean --if-exists --no-owner --jobs=4 \\\n        -d \"${DATABASE_CONNSTR}\" /tmp/dumps/${DATABASE_NAME}.pg_dump || true\n\n    - name: Update benchmark_restore_status table\n      if: steps.check-restore-done.outputs.skip != 'true'\n      env:\n        BENCHMARK_CONNSTR: ${{ steps.set-up-prep-connstr.outputs.connstr }}\n        DATABASE_NAME: ${{ matrix.database }}\n      run: |\n        ${PG_BINARIES}/psql \"${{ env.BENCHMARK_CONNSTR }}\" -c \"\n        INSERT INTO benchmark_restore_status (databasename, restore_done) VALUES ('${{ env.DATABASE_NAME }}', true)\n        ON CONFLICT (databasename) DO UPDATE SET restore_done = true;\n        \"\n"
  },
  {
    "path": ".github/workflows/_build-and-test-locally.yml",
    "content": "name: Build and Test Locally\n\non:\n  workflow_call:\n    inputs:\n      arch:\n        description: 'x64 or arm64'\n        required: true\n        type: string\n      build-tag:\n        description: 'build tag'\n        required: true\n        type: string\n      build-tools-image:\n        description: 'build-tools image'\n        required: true\n        type: string\n      build-type:\n        description: 'debug or release'\n        required: true\n        type: string\n      test-cfg:\n        description: 'a json object of postgres versions and lfc states to run regression tests on'\n        required: true\n        type: string\n      sanitizers:\n        description: 'enabled or disabled'\n        required: false\n        default: 'disabled'\n        type: string\n      test-selection:\n        description: 'specification of selected test(s) to run'\n        required: false\n        default: ''\n        type: string\n      test-run-count:\n        description: 'number of runs to perform for selected tests'\n        required: false\n        default: 1\n        type: number\n      rerun-failed:\n        description: 'rerun failed tests to ignore flaky tests'\n        required: false\n        default: true\n        type: boolean\n\ndefaults:\n  run:\n    shell: bash -euxo pipefail {0}\n\nenv:\n  RUST_BACKTRACE: 1\n  COPT: '-Werror'\n\npermissions:\n  contents: read\n\njobs:\n  build-neon:\n    runs-on: ${{ fromJSON(format('[\"self-hosted\", \"{0}\"]', inputs.arch == 'arm64' && 'large-arm64' || 'large')) }}\n    permissions:\n      id-token: write # aws-actions/configure-aws-credentials\n      contents: read\n    container:\n      image: ${{ inputs.build-tools-image }}\n      credentials:\n        username: ${{ github.actor }}\n        password: ${{ secrets.GITHUB_TOKEN }}\n      # Raise locked memory limit for tokio-epoll-uring.\n      # On 5.10 LTS kernels < 5.10.162 (and generally mainline kernels < 5.12),\n      # io_uring will account the memory of the CQ and SQ as locked.\n      # More details: https://github.com/neondatabase/neon/issues/6373#issuecomment-1905814391\n      options: --init --shm-size=512mb --ulimit memlock=67108864:67108864\n    env:\n      BUILD_TYPE: ${{ inputs.build-type }}\n      GIT_VERSION: ${{ github.event.pull_request.head.sha || github.sha }}\n      BUILD_TAG: ${{ inputs.build-tag }}\n\n    steps:\n      - name: Harden the runner (Audit all outbound calls)\n        uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0\n        with:\n          egress-policy: audit\n\n      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2\n        with:\n          submodules: true\n\n      - uses: ./.github/actions/prepare-for-subzero\n        with:\n          token: ${{ secrets.CI_ACCESS_TOKEN }}\n\n      - name: Set pg 14 revision for caching\n        id: pg_v14_rev\n        run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v14) >> $GITHUB_OUTPUT\n\n      - name: Set pg 15 revision for caching\n        id: pg_v15_rev\n        run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v15) >> $GITHUB_OUTPUT\n\n      - name: Set pg 16 revision for caching\n        id: pg_v16_rev\n        run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v16) >> $GITHUB_OUTPUT\n\n      - name: Set pg 17 revision for caching\n        id: pg_v17_rev\n        run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v17) >> $GITHUB_OUTPUT\n\n      # Set some environment variables used by all the steps.\n      #\n      # CARGO_FLAGS is extra options to pass to all \"cargo\" subcommands.\n      #\n      # CARGO_PROFILE is passed to \"cargo build\", \"cargo test\" etc, but not to\n      #   \"cargo metadata\", because it doesn't accept --release or --debug options.\n      #\n      # We run tests with addtional features, that are turned off by default (e.g. in release builds), see\n      # corresponding Cargo.toml files for their descriptions.\n      - name: Set env variables\n        env:\n          ARCH: ${{ inputs.arch }}\n          SANITIZERS: ${{ inputs.sanitizers }}\n        run: |\n          CARGO_FLAGS=\"--locked --features testing,rest_broker\"\n          if [[ $BUILD_TYPE == \"debug\" && $ARCH == 'x64' ]]; then\n            cov_prefix=\"scripts/coverage --profraw-prefix=$GITHUB_JOB --dir=/tmp/coverage run\"\n            CARGO_PROFILE=\"\"\n          elif [[ $BUILD_TYPE == \"debug\" ]]; then\n            cov_prefix=\"\"\n            CARGO_PROFILE=\"\"\n          elif [[ $BUILD_TYPE == \"release\" ]]; then\n            cov_prefix=\"\"\n            CARGO_PROFILE=\"--release\"\n          fi\n          if [[ $SANITIZERS == 'enabled' ]]; then\n            make_vars=\"WITH_SANITIZERS=yes\"\n          else\n            make_vars=\"\"\n          fi\n          {\n            echo \"cov_prefix=${cov_prefix}\"\n            echo \"make_vars=${make_vars}\"\n            echo \"CARGO_FLAGS=${CARGO_FLAGS}\"\n            echo \"CARGO_PROFILE=${CARGO_PROFILE}\"\n            echo \"CARGO_HOME=${GITHUB_WORKSPACE}/.cargo\"\n          } >> $GITHUB_ENV\n\n      - name: Cache postgres v14 build\n        id: cache_pg_14\n        uses: tespkg/actions-cache@b7bf5fcc2f98a52ac6080eb0fd282c2f752074b1  # v1.8.0\n        with:\n          endpoint: ${{ vars.HETZNER_CACHE_REGION }}.${{ vars.HETZNER_CACHE_ENDPOINT }}\n          bucket: ${{ vars.HETZNER_CACHE_BUCKET }}\n          accessKey: ${{ secrets.HETZNER_CACHE_ACCESS_KEY }}\n          secretKey: ${{ secrets.HETZNER_CACHE_SECRET_KEY }}\n          use-fallback: false\n          path: pg_install/v14\n          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v14_rev.outputs.pg_rev }}-bookworm-${{ hashFiles('Makefile', 'build-tools/Dockerfile') }}\n\n      - name: Cache postgres v15 build\n        id: cache_pg_15\n        uses: tespkg/actions-cache@b7bf5fcc2f98a52ac6080eb0fd282c2f752074b1  # v1.8.0\n        with:\n          endpoint: ${{ vars.HETZNER_CACHE_REGION }}.${{ vars.HETZNER_CACHE_ENDPOINT }}\n          bucket: ${{ vars.HETZNER_CACHE_BUCKET }}\n          accessKey: ${{ secrets.HETZNER_CACHE_ACCESS_KEY }}\n          secretKey: ${{ secrets.HETZNER_CACHE_SECRET_KEY }}\n          use-fallback: false\n          path: pg_install/v15\n          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v15_rev.outputs.pg_rev }}-bookworm-${{ hashFiles('Makefile', 'build-tools/Dockerfile') }}\n\n      - name: Cache postgres v16 build\n        id: cache_pg_16\n        uses: tespkg/actions-cache@b7bf5fcc2f98a52ac6080eb0fd282c2f752074b1  # v1.8.0\n        with:\n          endpoint: ${{ vars.HETZNER_CACHE_REGION }}.${{ vars.HETZNER_CACHE_ENDPOINT }}\n          bucket: ${{ vars.HETZNER_CACHE_BUCKET }}\n          accessKey: ${{ secrets.HETZNER_CACHE_ACCESS_KEY }}\n          secretKey: ${{ secrets.HETZNER_CACHE_SECRET_KEY }}\n          use-fallback: false\n          path: pg_install/v16\n          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v16_rev.outputs.pg_rev }}-bookworm-${{ hashFiles('Makefile', 'build-tools/Dockerfile') }}\n\n      - name: Cache postgres v17 build\n        id: cache_pg_17\n        uses: tespkg/actions-cache@b7bf5fcc2f98a52ac6080eb0fd282c2f752074b1  # v1.8.0\n        with:\n          endpoint: ${{ vars.HETZNER_CACHE_REGION }}.${{ vars.HETZNER_CACHE_ENDPOINT }}\n          bucket: ${{ vars.HETZNER_CACHE_BUCKET }}\n          accessKey: ${{ secrets.HETZNER_CACHE_ACCESS_KEY }}\n          secretKey: ${{ secrets.HETZNER_CACHE_SECRET_KEY }}\n          use-fallback: false\n          path: pg_install/v17\n          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v17_rev.outputs.pg_rev }}-bookworm-${{ hashFiles('Makefile', 'build-tools/Dockerfile') }}\n\n      - name: Build all\n        # Note: the Makefile picks up BUILD_TYPE and CARGO_PROFILE from the env variables\n        run: mold -run make ${make_vars} all -j$(nproc) CARGO_BUILD_FLAGS=\"$CARGO_FLAGS\"\n\n      - name: Build walproposer-lib\n        run: mold -run make ${make_vars} walproposer-lib -j$(nproc)\n\n      - name: Build unit tests\n        if: inputs.sanitizers != 'enabled'\n        run: |\n          export ASAN_OPTIONS=detect_leaks=0\n          ${cov_prefix} mold -run cargo build $CARGO_FLAGS $CARGO_PROFILE --tests\n\n      # Do install *before* running rust tests because they might recompile the\n      # binaries with different features/flags.\n      - name: Install rust binaries\n        env:\n          ARCH: ${{ inputs.arch }}\n          SANITIZERS: ${{ inputs.sanitizers }}\n        run: |\n          # Install target binaries\n          mkdir -p /tmp/neon/bin/\n          binaries=$(\n            ${cov_prefix} cargo metadata $CARGO_FLAGS --format-version=1 --no-deps |\n            jq -r '.packages[].targets[] | select(.kind | index(\"bin\")) | .name'\n          )\n          for bin in $binaries; do\n            SRC=target/$BUILD_TYPE/$bin\n            DST=/tmp/neon/bin/$bin\n            cp \"$SRC\" \"$DST\"\n          done\n\n          # Install test executables and write list of all binaries (for code coverage)\n          if [[ $BUILD_TYPE == \"debug\" && $ARCH == 'x64' && $SANITIZERS != 'enabled' ]]; then\n            # Keep bloated coverage data files away from the rest of the artifact\n            mkdir -p /tmp/coverage/\n\n            mkdir -p /tmp/neon/test_bin/\n\n            test_exe_paths=$(\n              ${cov_prefix} cargo test $CARGO_FLAGS $CARGO_PROFILE --message-format=json --no-run |\n              jq -r '.executable | select(. != null)'\n            )\n            for bin in $test_exe_paths; do\n              SRC=$bin\n              DST=/tmp/neon/test_bin/$(basename $bin)\n\n              # We don't need debug symbols for code coverage, so strip them out to make\n              # the artifact smaller.\n              strip \"$SRC\" -o \"$DST\"\n              echo \"$DST\" >> /tmp/coverage/binaries.list\n            done\n\n            for bin in $binaries; do\n              echo \"/tmp/neon/bin/$bin\" >> /tmp/coverage/binaries.list\n            done\n          fi\n\n      - name: Configure AWS credentials\n        uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # v4.0.2\n        with:\n          aws-region: eu-central-1\n          role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}\n          role-duration-seconds: 18000 # 5 hours\n\n      - name: Run rust tests\n        if: ${{ inputs.sanitizers != 'enabled' }}\n        env:\n          NEXTEST_RETRIES: 3\n        run: |\n          LD_LIBRARY_PATH=$(pwd)/pg_install/v17/lib\n          export LD_LIBRARY_PATH\n\n          #nextest does not yet support running doctests\n          ${cov_prefix} cargo test --doc $CARGO_FLAGS $CARGO_PROFILE\n\n          # run all non-pageserver tests\n          ${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_PROFILE -E '!package(pageserver)'\n\n          # run pageserver tests\n          # (When developing new pageserver features gated by config fields, we commonly make the rust\n          # unit tests sensitive to an environment variable NEON_PAGESERVER_UNIT_TEST_FEATURENAME.\n          # Then run the nextest invocation below for all relevant combinations. Singling out the\n          # pageserver tests from non-pageserver tests cuts down the time it takes for this CI step.)\n          NEON_PAGESERVER_UNIT_TEST_VIRTUAL_FILE_IOENGINE=tokio-epoll-uring  \\\n          ${cov_prefix} \\\n          cargo nextest run $CARGO_FLAGS $CARGO_PROFILE  -E 'package(pageserver)'\n\n          # Run separate tests for real S3\n          export ENABLE_REAL_S3_REMOTE_STORAGE=nonempty\n          export REMOTE_STORAGE_S3_BUCKET=neon-github-ci-tests\n          export REMOTE_STORAGE_S3_REGION=eu-central-1\n          ${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_PROFILE -E 'package(remote_storage)' -E 'test(test_real_s3)'\n\n          # Run separate tests for real Azure Blob Storage\n          # XXX: replace region with `eu-central-1`-like region\n          export ENABLE_REAL_AZURE_REMOTE_STORAGE=y\n          export AZURE_STORAGE_ACCOUNT=\"${{ secrets.AZURE_STORAGE_ACCOUNT_DEV }}\"\n          export AZURE_STORAGE_ACCESS_KEY=\"${{ secrets.AZURE_STORAGE_ACCESS_KEY_DEV }}\"\n          export REMOTE_STORAGE_AZURE_CONTAINER=\"${{ vars.REMOTE_STORAGE_AZURE_CONTAINER }}\"\n          export REMOTE_STORAGE_AZURE_REGION=\"${{ vars.REMOTE_STORAGE_AZURE_REGION }}\"\n          ${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_PROFILE -E 'package(remote_storage)' -E 'test(test_real_azure)'\n\n      - name: Install postgres binaries\n        run: |\n          # Use tar to copy files matching the pattern, preserving the paths in the destionation\n          tar c \\\n            pg_install/v* \\\n            build/*/src/test/regress/*.so \\\n            build/*/src/test/regress/pg_regress \\\n            build/*/src/test/isolation/isolationtester \\\n            build/*/src/test/isolation/pg_isolation_regress \\\n            | tar  x -C /tmp/neon\n\n      - name: Upload Neon artifact\n        uses: ./.github/actions/upload\n        with:\n          name: neon-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}${{ inputs.sanitizers == 'enabled' && '-sanitized' || '' }}-artifact\n          path: /tmp/neon\n          aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}\n\n      - name: Check diesel schema\n        if: inputs.build-type == 'release' && inputs.arch == 'x64'\n        env:\n          DATABASE_URL: postgresql://localhost:1235/storage_controller\n          POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install\n        run: |\n          export ASAN_OPTIONS=detect_leaks=0\n          /tmp/neon/bin/neon_local init\n          /tmp/neon/bin/neon_local storage_controller start\n\n          diesel print-schema > storage_controller/src/schema.rs\n\n          if [ -n \"$(git diff storage_controller/src/schema.rs)\" ]; then\n            echo >&2 \"Uncommitted changes in diesel schema\"\n\n            git diff .\n            exit 1\n          fi\n\n          /tmp/neon/bin/neon_local storage_controller stop\n\n      # XXX: keep this after the binaries.list is formed, so the coverage can properly work later\n      - name: Merge and upload coverage data\n        if: inputs.build-type == 'debug'\n        uses: ./.github/actions/save-coverage-data\n\n  regress-tests:\n    # Don't run regression tests on debug arm64 builds\n    if: inputs.build-type != 'debug' || inputs.arch != 'arm64'\n    permissions:\n      id-token: write # aws-actions/configure-aws-credentials\n      contents: read\n      statuses: write\n    needs: [ build-neon ]\n    runs-on: ${{ fromJSON(format('[\"self-hosted\", \"{0}\"]', inputs.arch == 'arm64' && 'large-arm64' || 'large-metal')) }}\n    container:\n      image: ${{ inputs.build-tools-image }}\n      credentials:\n        username: ${{ github.actor }}\n        password: ${{ secrets.GITHUB_TOKEN }}\n      # for changed limits, see comments on `options:` earlier in this file\n      options: --init --shm-size=512mb --ulimit memlock=67108864:67108864\n    strategy:\n      fail-fast: false\n      matrix: ${{ fromJSON(format('{{\"include\":{0}}}', inputs.test-cfg)) }}\n    steps:\n      - name: Harden the runner (Audit all outbound calls)\n        uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0\n        with:\n          egress-policy: audit\n\n      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2\n        with:\n          submodules: true\n\n      - name: Pytest regression tests\n        continue-on-error: ${{ matrix.lfc_state == 'with-lfc' && inputs.build-type == 'debug' }}\n        uses: ./.github/actions/run-python-test-set\n        timeout-minutes: ${{ (inputs.build-type == 'release' && inputs.sanitizers != 'enabled') && 75 || 180 }}\n        with:\n          build_type: ${{ inputs.build-type }}\n          test_selection: regress\n          needs_postgres_source: true\n          run_with_real_s3: true\n          real_s3_bucket: neon-github-ci-tests\n          real_s3_region: eu-central-1\n          rerun_failed: ${{ inputs.rerun-failed }}\n          pg_version: ${{ matrix.pg_version }}\n          sanitizers: ${{ inputs.sanitizers }}\n          aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}\n          # `--session-timeout` is equal to (timeout-minutes - 10 minutes) * 60 seconds.\n          # Attempt to stop tests gracefully to generate test reports\n          # until they are forcibly stopped by the stricter `timeout-minutes` limit.\n          extra_params: --session-timeout=${{ (inputs.build-type == 'release' && inputs.sanitizers != 'enabled') && 3000 || 10200 }} --count=${{ inputs.test-run-count }}\n                        ${{ inputs.test-selection != '' && format('-k \"{0}\"', inputs.test-selection) || '' }}\n        env:\n          TEST_RESULT_CONNSTR: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}\n          CHECK_ONDISK_DATA_COMPATIBILITY: nonempty\n          BUILD_TAG: ${{ inputs.build-tag }}\n          PAGESERVER_VIRTUAL_FILE_IO_ENGINE: tokio-epoll-uring\n          USE_LFC: ${{ matrix.lfc_state == 'with-lfc' && 'true' || 'false' }}\n\n      # Temporary disable this step until we figure out why it's so flaky\n      # Ref https://github.com/neondatabase/neon/issues/4540\n      - name: Merge and upload coverage data\n        if: |\n          false &&\n          inputs.build-type == 'debug' && matrix.pg_version == 'v16'\n        uses: ./.github/actions/save-coverage-data\n"
  },
  {
    "path": ".github/workflows/_check-codestyle-python.yml",
    "content": "name: Check Codestyle Python\n\non:\n  workflow_call:\n    inputs:\n      build-tools-image:\n        description: 'build-tools image'\n        required: true\n        type: string\n\ndefaults:\n  run:\n    shell: bash -euxo pipefail {0}\n\npermissions:\n  contents: read\n\njobs:\n  check-codestyle-python:\n    runs-on: [ self-hosted, small ]\n\n    permissions:\n      packages: read\n\n    container:\n      image: ${{ inputs.build-tools-image }}\n      credentials:\n        username: ${{ github.actor }}\n        password: ${{ secrets.GITHUB_TOKEN }}\n      options: --init\n\n    steps:\n      - name: Harden the runner (Audit all outbound calls)\n        uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0\n        with:\n          egress-policy: audit\n\n      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2\n\n      - name: Cache poetry deps\n        uses: tespkg/actions-cache@b7bf5fcc2f98a52ac6080eb0fd282c2f752074b1  # v1.8.0\n        with:\n          endpoint: ${{ vars.HETZNER_CACHE_REGION }}.${{ vars.HETZNER_CACHE_ENDPOINT }}\n          bucket: ${{ vars.HETZNER_CACHE_BUCKET }}\n          accessKey: ${{ secrets.HETZNER_CACHE_ACCESS_KEY }}\n          secretKey: ${{ secrets.HETZNER_CACHE_SECRET_KEY }}\n          use-fallback: false\n          path: ~/.cache/pypoetry/virtualenvs\n          key: v2-${{ runner.os }}-${{ runner.arch }}-python-deps-bookworm-${{ hashFiles('poetry.lock') }}\n\n      - run: ./scripts/pysync\n\n      - run: poetry run ruff check .\n      - run: poetry run ruff format --check .\n      - run: poetry run mypy .\n"
  },
  {
    "path": ".github/workflows/_check-codestyle-rust.yml",
    "content": "name: Check Codestyle Rust\n\non:\n  workflow_call:\n    inputs:\n      build-tools-image:\n        description: \"build-tools image\"\n        required: true\n        type: string\n      archs:\n        description: \"Json array of architectures to run on\"\n        type: string\n\n\ndefaults:\n  run:\n    shell: bash -euxo pipefail {0}\n\n# No permission for GITHUB_TOKEN by default; the **minimal required** set of permissions should be granted in each job.\npermissions: {}\n\njobs:\n  check-codestyle-rust:\n    strategy:\n      matrix:\n        arch: ${{ fromJSON(inputs.archs) }}\n    runs-on: ${{ fromJSON(format('[\"self-hosted\", \"{0}\"]', matrix.arch == 'arm64' && 'small-arm64' || 'small')) }}\n\n    permissions:\n      packages: read\n\n    container:\n      image: ${{ inputs.build-tools-image }}\n      credentials:\n        username: ${{ github.actor }}\n        password: ${{ secrets.GITHUB_TOKEN }}\n      options: --init\n\n    steps:\n      - name: Harden the runner (Audit all outbound calls)\n        uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0\n        with:\n          egress-policy: audit\n\n      - name: Checkout\n        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2\n        with:\n          submodules: true\n      \n      - uses: ./.github/actions/prepare-for-subzero\n        with:\n          token: ${{ secrets.CI_ACCESS_TOKEN }}\n\n      - name: Cache cargo deps\n        uses: tespkg/actions-cache@b7bf5fcc2f98a52ac6080eb0fd282c2f752074b1  # v1.8.0\n        with:\n          endpoint: ${{ vars.HETZNER_CACHE_REGION }}.${{ vars.HETZNER_CACHE_ENDPOINT }}\n          bucket: ${{ vars.HETZNER_CACHE_BUCKET }}\n          accessKey: ${{ secrets.HETZNER_CACHE_ACCESS_KEY }}\n          secretKey: ${{ secrets.HETZNER_CACHE_SECRET_KEY }}\n          use-fallback: false\n          path: |\n            ~/.cargo/registry\n            !~/.cargo/registry/src\n            ~/.cargo/git\n            target\n          key: v1-${{ runner.os }}-${{ runner.arch }}-cargo-${{ hashFiles('./Cargo.lock') }}-${{ hashFiles('./rust-toolchain.toml') }}-rust\n\n      # Some of our rust modules use FFI and need those to be checked\n      - name: Get postgres headers\n        run: make postgres-headers -j$(nproc)\n\n      # cargo hack runs the given cargo subcommand (clippy in this case) for all feature combinations.\n      # This will catch compiler & clippy warnings in all feature combinations.\n      # TODO: use cargo hack for build and test as well, but, that's quite expensive.\n      # NB: keep clippy args in sync with ./run_clippy.sh\n      #\n      # The only difference between \"clippy --debug\" and \"clippy --release\" is that in --release mode,\n      # #[cfg(debug_assertions)] blocks are not built. It's not worth building everything for second\n      # time just for that, so skip \"clippy --release\".\n      - run: |\n          CLIPPY_COMMON_ARGS=\"$( source .neon_clippy_args; echo \"$CLIPPY_COMMON_ARGS\")\"\n          if [ \"$CLIPPY_COMMON_ARGS\" = \"\" ]; then\n            echo \"No clippy args found in .neon_clippy_args\"\n            exit 1\n          fi\n          echo \"CLIPPY_COMMON_ARGS=${CLIPPY_COMMON_ARGS}\" >> $GITHUB_ENV\n      - name: Run cargo clippy (debug)\n        run: cargo hack --features default --ignore-unknown-features --feature-powerset clippy $CLIPPY_COMMON_ARGS\n\n      - name: Check documentation generation\n        run: cargo doc --workspace --no-deps --document-private-items\n        env:\n          RUSTDOCFLAGS: \"-Dwarnings -Arustdoc::private_intra_doc_links\"\n\n      # Use `${{ !cancelled() }}` to run quck tests after the longer clippy run\n      - name: Check formatting\n        if: ${{ !cancelled() }}\n        run: cargo fmt --all -- --check\n\n      # https://github.com/facebookincubator/cargo-guppy/tree/bec4e0eb29dcd1faac70b1b5360267fc02bf830e/tools/cargo-hakari#2-keep-the-workspace-hack-up-to-date-in-ci\n      - name: Check rust dependencies\n        if: ${{ !cancelled() }}\n        run: |\n          cargo hakari generate --diff  # workspace-hack Cargo.toml is up-to-date\n          cargo hakari manage-deps --dry-run  # all workspace crates depend on workspace-hack\n"
  },
  {
    "path": ".github/workflows/_meta.yml",
    "content": "name: Generate run metadata\non:\n  workflow_call:\n    inputs:\n      github-event-name:\n        type: string\n        required: true\n      github-event-json:\n        type: string\n        required: true\n    outputs:\n      build-tag:\n        description: \"Tag for the current workflow run\"\n        value: ${{ jobs.tags.outputs.build-tag }}\n      release-tag:\n        description: \"Tag for the release if this is an RC PR run\"\n        value: ${{ jobs.tags.outputs.release-tag }}\n      previous-storage-release:\n        description: \"Tag of the last storage release\"\n        value: ${{ jobs.tags.outputs.storage }}\n      previous-proxy-release:\n        description: \"Tag of the last proxy release\"\n        value: ${{ jobs.tags.outputs.proxy }}\n      previous-compute-release:\n        description: \"Tag of the last compute release\"\n        value: ${{ jobs.tags.outputs.compute }}\n      run-kind:\n        description: \"The kind of run we're currently in. Will be one of `push-main`, `storage-release`, `compute-release`, `proxy-release`, `storage-rc-pr`, `compute-rc-pr`,  `proxy-rc-pr`, `pr`, or `workflow-dispatch`\"\n        value: ${{ jobs.tags.outputs.run-kind }}\n      release-pr-run-id:\n        description: \"Only available if `run-kind in [storage-release, proxy-release, compute-release]`. Contains the run ID of the `Build and Test` workflow, assuming one with the current commit can be found.\"\n        value: ${{ jobs.tags.outputs.release-pr-run-id }}\n      sha:\n        description: \"github.event.pull_request.head.sha on release PRs, github.sha otherwise\"\n        value: ${{ jobs.tags.outputs.sha }}\n\npermissions: {}\n\ndefaults:\n  run:\n    shell: bash -euo pipefail {0}\n\njobs:\n  tags:\n    runs-on: ubuntu-22.04\n    outputs:\n      build-tag: ${{ steps.build-tag.outputs.build-tag }}\n      release-tag: ${{ steps.build-tag.outputs.release-tag }}\n      compute: ${{ steps.previous-releases.outputs.compute }}\n      proxy: ${{ steps.previous-releases.outputs.proxy }}\n      storage: ${{ steps.previous-releases.outputs.storage }}\n      run-kind: ${{ steps.run-kind.outputs.run-kind }}\n      release-pr-run-id: ${{ steps.release-pr-run-id.outputs.release-pr-run-id }}\n      sha: ${{ steps.sha.outputs.sha }}\n    permissions:\n      contents: read\n    steps:\n      # Need `fetch-depth: 0` to count the number of commits in the branch\n      - name: Harden the runner (Audit all outbound calls)\n        uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0\n        with:\n          egress-policy: audit\n\n      - name: Get run kind\n        id: run-kind\n        env:\n          RUN_KIND: >-\n            ${{\n              false\n              || (inputs.github-event-name == 'push'         && github.ref_name == 'main')            && 'push-main'\n              || (inputs.github-event-name == 'push'         && github.ref_name == 'release')         && 'storage-release'\n              || (inputs.github-event-name == 'push'         && github.ref_name == 'release-compute') && 'compute-release'\n              || (inputs.github-event-name == 'push'         && github.ref_name == 'release-proxy')   && 'proxy-release'\n              || (inputs.github-event-name == 'pull_request' && github.base_ref == 'release')         && 'storage-rc-pr'\n              || (inputs.github-event-name == 'pull_request' && github.base_ref == 'release-compute') && 'compute-rc-pr'\n              || (inputs.github-event-name == 'pull_request' && github.base_ref == 'release-proxy')   && 'proxy-rc-pr'\n              || (inputs.github-event-name == 'pull_request')                                         && 'pr'\n              || (inputs.github-event-name == 'workflow_dispatch')                                    && 'workflow-dispatch'\n              || 'unknown'\n            }}\n        run: |\n          echo \"run-kind=$RUN_KIND\" | tee -a $GITHUB_OUTPUT\n\n      - name: Get the right SHA\n        id: sha\n        env:\n          SHA: >\n            ${{\n              contains(fromJSON('[\"storage-rc-pr\", \"proxy-rc-pr\", \"compute-rc-pr\"]'), steps.run-kind.outputs.run-kind)\n              && fromJSON(inputs.github-event-json).pull_request.head.sha\n              || github.sha\n            }}\n        run: |\n          echo \"sha=$SHA\" | tee -a $GITHUB_OUTPUT\n\n      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2\n        with:\n          fetch-depth: 0\n          ref: ${{ steps.sha.outputs.sha }}\n\n      - name: Get build tag\n        id: build-tag\n        env:\n          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}\n          CURRENT_BRANCH: ${{ github.head_ref || github.ref_name }}\n          CURRENT_SHA: ${{ github.event.pull_request.head.sha || github.sha }}\n          RUN_KIND: ${{ steps.run-kind.outputs.run-kind }}\n        run: |\n          case $RUN_KIND in\n          push-main)\n            echo \"build-tag=$(git rev-list --count HEAD)\" | tee -a $GITHUB_OUTPUT\n            ;;\n          storage-release)\n            echo \"build-tag=release-$(git rev-list --count HEAD)\" | tee -a $GITHUB_OUTPUT\n            ;;\n          proxy-release)\n            echo \"build-tag=release-proxy-$(git rev-list --count HEAD)\" | tee -a $GITHUB_OUTPUT\n            ;;\n          compute-release)\n            echo \"build-tag=release-compute-$(git rev-list --count HEAD)\" | tee -a $GITHUB_OUTPUT\n            ;;\n          pr|storage-rc-pr|compute-rc-pr|proxy-rc-pr)\n            BUILD_AND_TEST_RUN_ID=$(gh api --paginate \\\n              -H \"Accept: application/vnd.github+json\" \\\n              -H \"X-GitHub-Api-Version: 2022-11-28\" \\\n              \"/repos/${GITHUB_REPOSITORY}/actions/runs?head_sha=${CURRENT_SHA}&branch=${CURRENT_BRANCH}\" \\\n              | jq '[.workflow_runs[] | select(.name == \"Build and Test\")][0].id // (\"Error: No matching workflow run found.\" | halt_error(1))')\n            echo \"build-tag=$BUILD_AND_TEST_RUN_ID\" | tee -a $GITHUB_OUTPUT\n            case $RUN_KIND in\n            storage-rc-pr)\n              echo \"release-tag=release-$(git rev-list --count HEAD)\" | tee -a $GITHUB_OUTPUT\n              ;;\n            proxy-rc-pr)\n              echo \"release-tag=release-proxy-$(git rev-list --count HEAD)\" | tee -a $GITHUB_OUTPUT\n              ;;\n            compute-rc-pr)\n              echo \"release-tag=release-compute-$(git rev-list --count HEAD)\" | tee -a $GITHUB_OUTPUT\n              ;;\n            esac\n            ;;\n          workflow-dispatch)\n            echo \"build-tag=$GITHUB_RUN_ID\" | tee -a $GITHUB_OUTPUT\n            ;;\n          *)\n            echo \"Unexpected RUN_KIND ('${RUN_KIND}'), failing to assign build-tag!\"\n            exit 1\n          esac\n\n      - name: Get the previous release-tags\n        id: previous-releases\n        env:\n          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}\n        run: |\n          gh api --paginate \\\n            -H \"Accept: application/vnd.github+json\" \\\n            -H \"X-GitHub-Api-Version: 2022-11-28\" \\\n            \"/repos/${GITHUB_REPOSITORY}/releases\" \\\n          | jq -f .github/scripts/previous-releases.jq -r \\\n          | tee -a \"${GITHUB_OUTPUT}\"\n\n      - name: Get the release PR run ID\n        id: release-pr-run-id\n        if: ${{ contains(fromJSON('[\"storage-release\", \"compute-release\", \"proxy-release\"]'), steps.run-kind.outputs.run-kind) }}\n        env:\n          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}\n          CURRENT_SHA: ${{ github.sha }}\n        run: |\n          RELEASE_PR_RUN_ID=$(gh api \"/repos/${GITHUB_REPOSITORY}/actions/runs?head_sha=$CURRENT_SHA\" | jq '[.workflow_runs[] | select(.name == \"Build and Test\") | select(.head_branch | test(\"^rc/release.*$\"; \"s\"))] | first | .id // (\"Failed to find Build and Test run from  RC PR!\" | halt_error(1))')\n          echo \"release-pr-run-id=$RELEASE_PR_RUN_ID\" | tee -a $GITHUB_OUTPUT\n"
  },
  {
    "path": ".github/workflows/_push-to-container-registry.yml",
    "content": "name: Push images to Container Registry\non:\n  workflow_call:\n    inputs:\n      # Example: {\"docker.io/neondatabase/neon:13196061314\":[\"${{ vars.NEON_DEV_AWS_ACCOUNT_ID }}.dkr.ecr.${{ vars.AWS_ECR_REGION }}.amazonaws.com/neon:13196061314\",\"neoneastus2.azurecr.io/neondatabase/neon:13196061314\"]}\n      image-map:\n        description: JSON map of images, mapping from a source image to an array of target images that should be pushed.\n        required: true\n        type: string\n      aws-region:\n        description: AWS region to log in to. Required when pushing to ECR.\n        required: false\n        type: string\n      aws-account-id:\n        description: AWS account ID to log in to for pushing to ECR. Required when pushing to ECR.\n        required: false\n        type: string\n      aws-role-to-assume:\n        description: AWS role to assume to for pushing to ECR. Required when pushing to ECR.\n        required: false\n        type: string\n      azure-client-id:\n        description: Client ID of Azure managed identity or Entra app. Required when pushing to ACR.\n        required: false\n        type: string\n      azure-subscription-id:\n        description: Azure subscription ID. Required when pushing to ACR.\n        required: false\n        type: string\n      azure-tenant-id:\n        description: Azure tenant ID. Required when pushing to ACR.\n        required: false\n        type: string\n      acr-registry-name:\n        description: ACR registry name. Required when pushing to ACR.\n        required: false\n        type: string\n\npermissions: {}\n\ndefaults:\n  run:\n    shell: bash -euo pipefail {0}\n\njobs:\n  push-to-container-registry:\n    runs-on: ubuntu-22.04\n    permissions:\n      id-token: write  # Required for aws/azure login\n      packages: write  # required for pushing to GHCR\n    steps:\n      - name: Harden the runner (Audit all outbound calls)\n        uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0\n        with:\n          egress-policy: audit\n\n      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2\n        with:\n          sparse-checkout: .github/scripts/push_with_image_map.py\n          sparse-checkout-cone-mode: false\n\n      - name: Print image-map\n        run: echo '${{ inputs.image-map }}' | jq\n\n      - name: Configure AWS credentials\n        if: contains(inputs.image-map, 'amazonaws.com/')\n        uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # v4.0.2\n        with:\n          aws-region: \"${{ inputs.aws-region }}\"\n          role-to-assume: \"arn:aws:iam::${{ inputs.aws-account-id }}:role/${{ inputs.aws-role-to-assume }}\"\n          role-duration-seconds: 3600\n\n      - name: Login to ECR\n        if: contains(inputs.image-map, 'amazonaws.com/')\n        uses: aws-actions/amazon-ecr-login@062b18b96a7aff071d4dc91bc00c4c1a7945b076 # v2.0.1\n        with:\n          registries: \"${{ inputs.aws-account-id }}\"\n\n      - name: Configure Azure credentials\n        if: contains(inputs.image-map, 'azurecr.io/')\n        uses: azure/login@6c251865b4e6290e7b78be643ea2d005bc51f69a  # @v2.1.1\n        with:\n          client-id: ${{ inputs.azure-client-id }}\n          subscription-id: ${{ inputs.azure-subscription-id }}\n          tenant-id: ${{ inputs.azure-tenant-id }}\n\n      - name: Login to ACR\n        if: contains(inputs.image-map, 'azurecr.io/')\n        run: |\n          az acr login --name=${{ inputs.acr-registry-name }}\n\n      - name: Login to GHCR\n        if: contains(inputs.image-map, 'ghcr.io/')\n        uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0\n        with:\n          registry: ghcr.io\n          username: ${{ github.actor }}\n          password: ${{ secrets.GITHUB_TOKEN }}\n\n      - name: Log in to Docker Hub\n        uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0\n        with:\n          username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}\n          password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}\n\n      - name: Copy docker images to target registries\n        id: push\n        run: python3 .github/scripts/push_with_image_map.py\n        env:\n          IMAGE_MAP: ${{ inputs.image-map }}\n\n      - name: Notify Slack if container image pushing fails\n        if: steps.push.outputs.push_failures || failure()\n        uses: slackapi/slack-github-action@485a9d42d3a73031f12ec201c457e2162c45d02d # v2.0.0\n        with:\n          method: chat.postMessage\n          token: ${{ secrets.SLACK_BOT_TOKEN }}\n          payload: |\n            channel: ${{ vars.SLACK_ON_CALL_DEVPROD_STREAM }}\n            text: >\n              *Container image pushing ${{\n                steps.push.outcome == 'failure' && 'failed completely' || 'succeeded with some retries'\n              }}* in\n              <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>\n\n              ${{ steps.push.outputs.push_failures && format(\n                '*Failed targets:*\\n• {0}', join(fromJson(steps.push.outputs.push_failures), '\\n• ')\n              ) || '' }}\n"
  },
  {
    "path": ".github/workflows/actionlint.yml",
    "content": "name: Lint GitHub Workflows\n\non:\n  push:\n    branches:\n      - main\n      - release\n    paths:\n      - '.github/workflows/*.ya?ml'\n  pull_request:\n    paths:\n      - '.github/workflows/*.ya?ml'\n\nconcurrency:\n  group: ${{ github.workflow }}-${{ github.ref }}\n  cancel-in-progress: ${{ github.event_name == 'pull_request' }}\n\njobs:\n  check-permissions:\n    if: ${{ !contains(github.event.pull_request.labels.*.name, 'run-no-ci') }}\n    uses: ./.github/workflows/check-permissions.yml\n    with:\n      github-event-name: ${{ github.event_name}}\n\n  actionlint:\n    needs: [ check-permissions ]\n    runs-on: ubuntu-22.04\n    steps:\n      - name: Harden the runner (Audit all outbound calls)\n        uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0\n        with:\n          egress-policy: audit\n\n      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2\n      - uses: reviewdog/action-actionlint@a5524e1c19e62881d79c1f1b9b6f09f16356e281 # v1.65.2\n        env:\n          # SC2046 - Quote this to prevent word splitting. - https://www.shellcheck.net/wiki/SC2046\n          # SC2086 - Double quote to prevent globbing and word splitting. - https://www.shellcheck.net/wiki/SC2086\n          SHELLCHECK_OPTS: --exclude=SC2046,SC2086\n        with:\n          fail_level: error\n          filter_mode: nofilter\n          level: error\n\n      - name: Disallow 'ubuntu-latest' runners\n        run: |\n          PAT='^\\s*runs-on:.*-latest'\n          if grep -ERq $PAT .github/workflows; then\n            grep -ERl $PAT .github/workflows |\\\n            while read -r f\n            do\n              l=$(grep -nE $PAT $f | awk -F: '{print $1}' | head -1)\n              echo \"::error file=$f,line=$l::Please use 'ubuntu-22.04' instead of 'ubuntu-latest'\"\n            done\n            exit 1\n          fi\n"
  },
  {
    "path": ".github/workflows/approved-for-ci-run.yml",
    "content": "name: Handle `approved-for-ci-run` label\n# This workflow helps to run CI pipeline for PRs made by external contributors (from forks).\n\non:\n  pull_request_target:\n    branches:\n      - main\n    types:\n      # Default types that triggers a workflow ([1]):\n      # - [1] https://docs.github.com/en/actions/using-workflows/events-that-trigger-workflows#pull_request\n      - opened\n      - synchronize\n      - reopened\n      # Types that we wand to handle in addition to keep labels tidy:\n      - closed\n      # Actual magic happens here:\n      - labeled\n\nconcurrency:\n  group: ${{ github.workflow }}-${{ github.event.pull_request.number }}\n  cancel-in-progress: false\n\nenv:\n  GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}\n  PR_NUMBER: ${{ github.event.pull_request.number }}\n  BRANCH: \"ci-run/pr-${{ github.event.pull_request.number }}\"\n\n# No permission for GITHUB_TOKEN by default; the **minimal required** set of permissions should be granted in each job.\npermissions: {}\n\ndefaults:\n  run:\n    shell: bash -euo pipefail {0}\n\njobs:\n  remove-label:\n    # Remove `approved-for-ci-run` label if the workflow is triggered by changes in a PR.\n    # The PR should be reviewed and labelled manually again.\n\n    permissions:\n      pull-requests: write # For `gh pr edit`\n\n    if: |\n      contains(fromJSON('[\"opened\", \"synchronize\", \"reopened\", \"closed\"]'), github.event.action) &&\n      contains(github.event.pull_request.labels.*.name, 'approved-for-ci-run')\n\n    runs-on: ubuntu-22.04\n\n    steps:\n      - name: Harden the runner (Audit all outbound calls)\n        uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0\n        with:\n          egress-policy: audit\n\n      - run: gh pr --repo \"${GITHUB_REPOSITORY}\" edit \"${PR_NUMBER}\" --remove-label \"approved-for-ci-run\"\n\n  create-or-update-pr-for-ci-run:\n    # Create local PR for an `approved-for-ci-run` labelled PR to run CI pipeline in it.\n\n    permissions:\n      pull-requests: write # for `gh pr edit`\n      # For `git push` and `gh pr create` we use CI_ACCESS_TOKEN\n\n    if: |\n      github.event.action == 'labeled' &&\n      contains(github.event.pull_request.labels.*.name, 'approved-for-ci-run')\n\n    runs-on: ubuntu-22.04\n\n    steps:\n      - name: Harden the runner (Audit all outbound calls)\n        uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0\n        with:\n          egress-policy: audit\n\n      - run: gh pr --repo \"${GITHUB_REPOSITORY}\" edit \"${PR_NUMBER}\" --remove-label \"approved-for-ci-run\"\n\n      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2\n        with:\n          ref: ${{ github.event.pull_request.head.sha }}\n          token: ${{ secrets.CI_ACCESS_TOKEN }}\n\n      - name: Look for existing PR\n        id: get-pr\n        env:\n          GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}\n        run: |\n          ALREADY_CREATED=\"$(gh pr --repo ${GITHUB_REPOSITORY} list --head ${BRANCH} --base main --json number --jq '.[].number')\"\n          echo \"ALREADY_CREATED=${ALREADY_CREATED}\" >> ${GITHUB_OUTPUT}\n\n      - name: Get changed labels\n        id: get-labels\n        if: steps.get-pr.outputs.ALREADY_CREATED != ''\n        env:\n          ALREADY_CREATED: ${{ steps.get-pr.outputs.ALREADY_CREATED }}\n          GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}\n        run: |\n          LABELS_TO_REMOVE=$(comm -23 <(gh pr --repo ${GITHUB_REPOSITORY} view ${ALREADY_CREATED} --json labels --jq '.labels.[].name'| ( grep -E '^run' || true ) | sort) \\\n          <(gh pr --repo ${GITHUB_REPOSITORY} view ${PR_NUMBER} --json labels --jq '.labels.[].name' | ( grep -E '^run' || true ) | sort ) |\\\n          ( grep -v run-e2e-tests-in-draft || true ) | paste -sd , -)\n          LABELS_TO_ADD=$(comm -13 <(gh pr --repo ${GITHUB_REPOSITORY} view ${ALREADY_CREATED} --json labels --jq '.labels.[].name'| ( grep -E '^run' || true ) |sort) \\\n          <(gh pr --repo ${GITHUB_REPOSITORY} view ${PR_NUMBER} --json labels --jq '.labels.[].name' |  ( grep -E '^run' || true ) | sort ) |\\\n          paste -sd , -)\n          echo \"LABELS_TO_ADD=${LABELS_TO_ADD}\" >> ${GITHUB_OUTPUT}\n          echo \"LABELS_TO_REMOVE=${LABELS_TO_REMOVE}\" >> ${GITHUB_OUTPUT}\n\n      - run: git checkout -b \"${BRANCH}\"\n\n      - run: git push --force origin \"${BRANCH}\"\n        if: steps.get-pr.outputs.ALREADY_CREATED == ''\n\n      - name: Create a Pull Request for CI run (if required)\n        if: steps.get-pr.outputs.ALREADY_CREATED == ''\n        env:\n          GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}\n        run: |\n          cat << EOF > body.md\n            This Pull Request is created automatically to run the CI pipeline for #${PR_NUMBER}\n\n            Please do not alter or merge/close it.\n\n            Feel free to review/comment/discuss the original PR #${PR_NUMBER}.\n          EOF\n\n          LABELS=$( (gh pr --repo \"${GITHUB_REPOSITORY}\" view ${PR_NUMBER}  --json labels --jq '.labels.[].name'; echo run-e2e-tests-in-draft  )| \\\n          grep -E '^run' | paste -sd , -)\n          gh pr --repo \"${GITHUB_REPOSITORY}\" create --title \"CI run for PR #${PR_NUMBER}\" \\\n                                                       --body-file \"body.md\" \\\n                                                       --head \"${BRANCH}\" \\\n                                                       --base \"main\" \\\n                                                       --label ${LABELS} \\\n                                                       --draft\n      - name: Modify the existing pull request (if required)\n        if: steps.get-pr.outputs.ALREADY_CREATED != ''\n        env:\n          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}\n          LABELS_TO_ADD: ${{ steps.get-labels.outputs.LABELS_TO_ADD }}\n          LABELS_TO_REMOVE: ${{ steps.get-labels.outputs.LABELS_TO_REMOVE }}\n          ALREADY_CREATED: ${{ steps.get-pr.outputs.ALREADY_CREATED }}\n        run: |\n          ADD_CMD=\n          REMOVE_CMD=\n          [ -z \"${LABELS_TO_ADD}\" ] || ADD_CMD=\"--add-label ${LABELS_TO_ADD}\"\n          [ -z \"${LABELS_TO_REMOVE}\" ] || REMOVE_CMD=\"--remove-label ${LABELS_TO_REMOVE}\"\n          if [ -n \"${ADD_CMD}\" ] || [ -n \"${REMOVE_CMD}\" ]; then\n            gh pr --repo \"${GITHUB_REPOSITORY}\" edit ${ALREADY_CREATED} ${ADD_CMD} ${REMOVE_CMD}\n          fi\n\n      - run: git push --force origin \"${BRANCH}\"\n        if: steps.get-pr.outputs.ALREADY_CREATED != ''\n\n  cleanup:\n    # Close PRs and delete branchs if the original PR is closed.\n\n    permissions:\n      contents: write # for `--delete-branch` flag in `gh pr close`\n      pull-requests: write # for `gh pr close`\n\n    if: |\n      github.event.action == 'closed' &&\n      github.event.pull_request.head.repo.full_name != github.repository\n\n    runs-on: ubuntu-22.04\n\n    steps:\n      - name: Harden the runner (Audit all outbound calls)\n        uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0\n        with:\n          egress-policy: audit\n\n      - name: Close PR and delete `ci-run/pr-${{ env.PR_NUMBER }}` branch\n        run: |\n          CLOSED=\"$(gh pr --repo ${GITHUB_REPOSITORY} list --head ${BRANCH} --json 'closed' --jq '.[].closed')\"\n          if [ \"${CLOSED}\" == \"false\" ]; then\n            gh pr --repo \"${GITHUB_REPOSITORY}\" close \"${BRANCH}\" --delete-branch\n          fi\n"
  },
  {
    "path": ".github/workflows/benchbase_tpcc.yml",
    "content": "name: TPC-C like benchmark using benchbase\n\non:\n  schedule:\n    # * is a special character in YAML so you have to quote this string\n    #          ┌───────────── minute (0 - 59)\n    #          │ ┌───────────── hour (0 - 23)\n    #          │ │ ┌───────────── day of the month (1 - 31)\n    #          │ │ │ ┌───────────── month (1 - 12 or JAN-DEC)\n    #          │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)\n    - cron:   '0 6 * * *' # run once a day at 6 AM UTC\n  workflow_dispatch: # adds ability to run this manually\n\ndefaults:\n  run:\n    shell: bash -euxo pipefail {0}\n\nconcurrency:\n  # Allow only one workflow globally because we do not want to be too noisy in production environment\n  group: benchbase-tpcc-workflow\n  cancel-in-progress: false\n\npermissions:\n  contents: read\n\njobs:\n  benchbase-tpcc:\n    strategy:\n      fail-fast: false # allow other variants to continue even if one fails\n      matrix:\n        include:\n          - warehouses: 50 # defines number of warehouses and is used to compute number of terminals\n            max_rate: 800  # measured max TPS at scale factor based on experiments. Adjust if performance is better/worse\n            min_cu: 0.25   # simulate free tier plan (0.25 -2 CU)\n            max_cu: 2\n          - warehouses: 500 # serverless plan (2-8 CU)\n            max_rate: 2000\n            min_cu: 2\n            max_cu: 8\n          - warehouses: 1000 # business plan (2-16 CU)\n            max_rate: 2900\n            min_cu: 2\n            max_cu: 16\n      max-parallel: 1 # we want to run each workload size sequentially to avoid noisy neighbors\n    permissions:\n      contents: write\n      statuses: write\n      id-token: write # aws-actions/configure-aws-credentials\n    env:\n      PG_CONFIG: /tmp/neon/pg_install/v17/bin/pg_config\n      PSQL: /tmp/neon/pg_install/v17/bin/psql\n      PG_17_LIB_PATH: /tmp/neon/pg_install/v17/lib\n      POSTGRES_VERSION: 17\n    runs-on: [ self-hosted, us-east-2, x64 ]\n    timeout-minutes: 1440\n\n    steps:\n    - name: Harden the runner (Audit all outbound calls)\n      uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0\n      with:\n        egress-policy: audit\n\n    - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2\n\n    - name: Configure AWS credentials # necessary to download artefacts\n      uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # v4.0.2\n      with:\n        aws-region: eu-central-1\n        role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}\n        role-duration-seconds: 18000 # 5 hours is currently max associated with IAM role\n\n    - name: Download Neon artifact\n      uses: ./.github/actions/download\n      with:\n        name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact\n        path: /tmp/neon/\n        prefix: latest\n        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}\n\n    - name: Create Neon Project\n      id: create-neon-project-tpcc\n      uses: ./.github/actions/neon-project-create\n      with:\n        region_id: aws-us-east-2\n        postgres_version: ${{ env.POSTGRES_VERSION }}\n        compute_units: '[${{ matrix.min_cu }}, ${{ matrix.max_cu }}]'\n        api_key: ${{ secrets.NEON_PRODUCTION_API_KEY_4_BENCHMARKS }}\n        api_host: console.neon.tech  # production (!)\n\n    - name: Initialize Neon project\n      env:\n          BENCHMARK_TPCC_CONNSTR: ${{ steps.create-neon-project-tpcc.outputs.dsn }}\n          PROJECT_ID: ${{ steps.create-neon-project-tpcc.outputs.project_id }}\n      run: |\n        echo \"Initializing Neon project with project_id: ${PROJECT_ID}\"\n        export LD_LIBRARY_PATH=${PG_17_LIB_PATH}\n        \n        # Retry logic for psql connection with 1 minute sleep between attempts\n        for attempt in {1..3}; do\n          echo \"Attempt ${attempt}/3: Creating extensions in Neon project\"\n          if ${PSQL} \"${BENCHMARK_TPCC_CONNSTR}\" -c \"CREATE EXTENSION IF NOT EXISTS neon; CREATE EXTENSION IF NOT EXISTS neon_utils;\"; then\n            echo \"Successfully created extensions\"\n            break\n          else\n            echo \"Failed to create extensions on attempt ${attempt}\"\n            if [ ${attempt} -lt 3 ]; then\n              echo \"Waiting 60 seconds before retry...\"\n              sleep 60\n            else\n              echo \"All attempts failed, exiting\"\n              exit 1\n            fi\n          fi\n        done\n        \n        echo \"BENCHMARK_TPCC_CONNSTR=${BENCHMARK_TPCC_CONNSTR}\" >> $GITHUB_ENV\n\n    - name: Generate BenchBase workload configuration\n      env:\n        WAREHOUSES: ${{ matrix.warehouses }}\n        MAX_RATE: ${{ matrix.max_rate }}\n      run: |\n        echo \"Generating BenchBase configs for warehouses: ${WAREHOUSES}, max_rate: ${MAX_RATE}\"\n        \n        # Extract hostname and password from connection string\n        # Format: postgresql://username:password@hostname/database?params (no port for Neon)\n        HOSTNAME=$(echo \"${BENCHMARK_TPCC_CONNSTR}\" | sed -n 's|.*://[^:]*:[^@]*@\\([^/]*\\)/.*|\\1|p')\n        PASSWORD=$(echo \"${BENCHMARK_TPCC_CONNSTR}\" | sed -n 's|.*://[^:]*:\\([^@]*\\)@.*|\\1|p')\n        \n        echo \"Extracted hostname: ${HOSTNAME}\"\n        \n        # Use runner temp (NVMe) as working directory\n        cd \"${RUNNER_TEMP}\"\n        \n        # Copy the generator script\n        cp \"${GITHUB_WORKSPACE}/test_runner/performance/benchbase_tpc_c_helpers/generate_workload_size.py\" .\n        \n        # Generate configs and scripts\n        python3 generate_workload_size.py \\\n          --warehouses ${WAREHOUSES} \\\n          --max-rate ${MAX_RATE} \\\n          --hostname ${HOSTNAME} \\\n          --password ${PASSWORD} \\\n          --runner-arch ${{ runner.arch }}\n        \n        # Fix path mismatch: move generated configs and scripts to expected locations\n        mv ../configs ./configs\n        mv ../scripts ./scripts\n\n    - name: Prepare database (load data)\n      env:\n        WAREHOUSES: ${{ matrix.warehouses }}\n      run: |\n        cd \"${RUNNER_TEMP}\"\n        \n        echo \"Loading ${WAREHOUSES} warehouses into database...\"\n        \n        # Run the loader script and capture output to log file while preserving stdout/stderr\n        ./scripts/load_${WAREHOUSES}_warehouses.sh 2>&1 | tee \"load_${WAREHOUSES}_warehouses.log\"\n        \n        echo \"Database loading completed\"\n\n    - name: Run TPC-C benchmark (warmup phase, then benchmark at 70% of configuredmax TPS)\n      env:\n        WAREHOUSES: ${{ matrix.warehouses }}\n      run: |\n        cd \"${RUNNER_TEMP}\"\n        \n        echo \"Running TPC-C benchmark with ${WAREHOUSES} warehouses...\"\n        \n        # Run the optimal rate benchmark\n        ./scripts/execute_${WAREHOUSES}_warehouses_opt_rate.sh\n        \n        echo \"Benchmark execution completed\"\n\n    - name: Run TPC-C benchmark (warmup phase, then ramp down TPS and up again in 5 minute intervals)\n\n      env:\n          WAREHOUSES: ${{ matrix.warehouses }}\n      run: |\n        cd \"${RUNNER_TEMP}\"\n        \n        echo \"Running TPC-C ramp-down-up with ${WAREHOUSES} warehouses...\"\n        \n        # Run the optimal rate benchmark\n        ./scripts/execute_${WAREHOUSES}_warehouses_ramp_up.sh\n        \n        echo \"Benchmark execution completed\"\n\n    - name: Process results (upload to test results database and generate diagrams)\n      env:\n        WAREHOUSES: ${{ matrix.warehouses }}\n        MIN_CU: ${{ matrix.min_cu }}\n        MAX_CU: ${{ matrix.max_cu }}\n        PROJECT_ID: ${{ steps.create-neon-project-tpcc.outputs.project_id }}\n        REVISION: ${{ github.sha }}\n        PERF_DB_CONNSTR: ${{ secrets.PERF_TEST_RESULT_CONNSTR }}\n      run: |\n        cd \"${RUNNER_TEMP}\"\n        \n        echo \"Creating temporary Python environment for results processing...\"\n        \n        # Create temporary virtual environment\n        python3 -m venv temp_results_env\n        source temp_results_env/bin/activate\n        \n        # Install required packages in virtual environment\n        pip install matplotlib pandas psycopg2-binary\n        \n        echo \"Copying results processing scripts...\"\n        \n        # Copy both processing scripts\n        cp \"${GITHUB_WORKSPACE}/test_runner/performance/benchbase_tpc_c_helpers/generate_diagrams.py\" .\n        cp \"${GITHUB_WORKSPACE}/test_runner/performance/benchbase_tpc_c_helpers/upload_results_to_perf_test_results.py\" .\n        \n        echo \"Processing load phase metrics...\"\n        \n        # Find and process load log\n        LOAD_LOG=$(find . -name \"load_${WAREHOUSES}_warehouses.log\" -type f | head -1)\n        if [ -n \"$LOAD_LOG\" ]; then\n          echo \"Processing load metrics from: $LOAD_LOG\"\n          python upload_results_to_perf_test_results.py \\\n            --load-log \"$LOAD_LOG\" \\\n            --run-type \"load\" \\\n            --warehouses \"${WAREHOUSES}\" \\\n            --min-cu \"${MIN_CU}\" \\\n            --max-cu \"${MAX_CU}\" \\\n            --project-id \"${PROJECT_ID}\" \\\n            --revision \"${REVISION}\" \\\n            --connection-string \"${PERF_DB_CONNSTR}\"\n        else\n          echo \"Warning: Load log file not found: load_${WAREHOUSES}_warehouses.log\"\n        fi\n        \n        echo \"Processing warmup results for optimal rate...\"\n        \n        # Find and process warmup results\n        WARMUP_CSV=$(find results_warmup -name \"*.results.csv\" -type f | head -1)\n        WARMUP_JSON=$(find results_warmup -name \"*.summary.json\" -type f | head -1)\n        \n        if [ -n \"$WARMUP_CSV\" ] && [ -n \"$WARMUP_JSON\" ]; then\n          echo \"Generating warmup diagram from: $WARMUP_CSV\"\n          python generate_diagrams.py \\\n            --input-csv \"$WARMUP_CSV\" \\\n            --output-svg \"warmup_${WAREHOUSES}_warehouses_performance.svg\" \\\n            --title-suffix \"Warmup at max TPS\"\n            \n          echo \"Uploading warmup metrics from: $WARMUP_JSON\"\n          python upload_results_to_perf_test_results.py \\\n            --summary-json \"$WARMUP_JSON\" \\\n            --results-csv \"$WARMUP_CSV\" \\\n            --run-type \"warmup\" \\\n            --min-cu \"${MIN_CU}\" \\\n            --max-cu \"${MAX_CU}\" \\\n            --project-id \"${PROJECT_ID}\" \\\n            --revision \"${REVISION}\" \\\n            --connection-string \"${PERF_DB_CONNSTR}\"\n        else\n          echo \"Warning: Missing warmup results files (CSV: $WARMUP_CSV, JSON: $WARMUP_JSON)\"\n        fi\n        \n        echo \"Processing optimal rate results...\"\n        \n        # Find and process optimal rate results  \n        OPTRATE_CSV=$(find results_opt_rate -name \"*.results.csv\" -type f | head -1)\n        OPTRATE_JSON=$(find results_opt_rate -name \"*.summary.json\" -type f | head -1)\n        \n        if [ -n \"$OPTRATE_CSV\" ] && [ -n \"$OPTRATE_JSON\" ]; then\n          echo \"Generating optimal rate diagram from: $OPTRATE_CSV\"\n          python generate_diagrams.py \\\n            --input-csv \"$OPTRATE_CSV\" \\\n            --output-svg \"benchmark_${WAREHOUSES}_warehouses_performance.svg\" \\\n            --title-suffix \"70% of max TPS\"\n            \n          echo \"Uploading optimal rate metrics from: $OPTRATE_JSON\"\n          python upload_results_to_perf_test_results.py \\\n            --summary-json \"$OPTRATE_JSON\" \\\n            --results-csv \"$OPTRATE_CSV\" \\\n            --run-type \"opt-rate\" \\\n            --min-cu \"${MIN_CU}\" \\\n            --max-cu \"${MAX_CU}\" \\\n            --project-id \"${PROJECT_ID}\" \\\n            --revision \"${REVISION}\" \\\n            --connection-string \"${PERF_DB_CONNSTR}\"\n        else\n          echo \"Warning: Missing optimal rate results files (CSV: $OPTRATE_CSV, JSON: $OPTRATE_JSON)\"\n        fi\n\n        echo \"Processing warmup 2 results for ramp down/up phase...\"\n        \n        # Find and process warmup results\n        WARMUP_CSV=$(find results_warmup -name \"*.results.csv\" -type f | tail -1)\n        WARMUP_JSON=$(find results_warmup -name \"*.summary.json\" -type f | tail -1)\n        \n        if [ -n \"$WARMUP_CSV\" ] && [ -n \"$WARMUP_JSON\" ]; then\n          echo \"Generating warmup diagram from: $WARMUP_CSV\"\n          python generate_diagrams.py \\\n            --input-csv \"$WARMUP_CSV\" \\\n            --output-svg \"warmup_2_${WAREHOUSES}_warehouses_performance.svg\" \\\n            --title-suffix \"Warmup at max TPS\"\n            \n          echo \"Uploading warmup metrics from: $WARMUP_JSON\"\n          python upload_results_to_perf_test_results.py \\\n            --summary-json \"$WARMUP_JSON\" \\\n            --results-csv \"$WARMUP_CSV\" \\\n            --run-type \"warmup\" \\\n            --min-cu \"${MIN_CU}\" \\\n            --max-cu \"${MAX_CU}\" \\\n            --project-id \"${PROJECT_ID}\" \\\n            --revision \"${REVISION}\" \\\n            --connection-string \"${PERF_DB_CONNSTR}\"\n        else\n          echo \"Warning: Missing warmup results files (CSV: $WARMUP_CSV, JSON: $WARMUP_JSON)\"\n        fi\n        \n        echo \"Processing ramp results...\"\n        \n        # Find and process ramp results  \n        RAMPUP_CSV=$(find results_ramp_up -name \"*.results.csv\" -type f | head -1)\n        RAMPUP_JSON=$(find results_ramp_up -name \"*.summary.json\" -type f | head -1)\n        \n        if [ -n \"$RAMPUP_CSV\" ] && [ -n \"$RAMPUP_JSON\" ]; then\n          echo \"Generating ramp diagram from: $RAMPUP_CSV\"\n          python generate_diagrams.py \\\n            --input-csv \"$RAMPUP_CSV\" \\\n            --output-svg \"ramp_${WAREHOUSES}_warehouses_performance.svg\" \\\n            --title-suffix \"ramp TPS down and up in 5 minute intervals\"\n            \n          echo \"Uploading ramp metrics from: $RAMPUP_JSON\"\n          python upload_results_to_perf_test_results.py \\\n            --summary-json \"$RAMPUP_JSON\" \\\n            --results-csv \"$RAMPUP_CSV\" \\\n            --run-type \"ramp-up\" \\\n            --min-cu \"${MIN_CU}\" \\\n            --max-cu \"${MAX_CU}\" \\\n            --project-id \"${PROJECT_ID}\" \\\n            --revision \"${REVISION}\" \\\n            --connection-string \"${PERF_DB_CONNSTR}\"\n        else\n          echo \"Warning: Missing ramp results files (CSV: $RAMPUP_CSV, JSON: $RAMPUP_JSON)\"\n        fi\n        \n        # Deactivate and clean up virtual environment\n        deactivate\n        rm -rf temp_results_env\n        rm upload_results_to_perf_test_results.py\n        \n        echo \"Results processing completed and environment cleaned up\"\n\n    - name: Set date for upload\n      id: set-date\n      run: echo \"date=$(date +%Y-%m-%d)\" >> $GITHUB_OUTPUT\n\n    - name: Configure AWS credentials # necessary to upload results\n      uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # v4.0.2\n      with:\n        aws-region: us-east-2\n        role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}\n        role-duration-seconds: 900 # 900 is minimum value \n        \n    - name: Upload benchmark results to S3\n      env:\n        S3_BUCKET: neon-public-benchmark-results\n        S3_PREFIX: benchbase-tpc-c/${{ steps.set-date.outputs.date }}/${{ github.run_id }}/${{ matrix.warehouses }}-warehouses\n      run: |\n        echo \"Redacting passwords from configuration files before upload...\"\n        \n        # Mask all passwords in XML config files\n        find \"${RUNNER_TEMP}/configs\" -name \"*.xml\" -type f -exec sed -i 's|<password>[^<]*</password>|<password>redacted</password>|g' {} \\;\n        \n        echo \"Uploading benchmark results to s3://${S3_BUCKET}/${S3_PREFIX}/\"\n        \n        # Upload the entire benchmark directory recursively\n        aws s3 cp --only-show-errors --recursive \"${RUNNER_TEMP}\" s3://${S3_BUCKET}/${S3_PREFIX}/\n        \n        echo \"Upload completed\"\n        \n    - name: Delete Neon Project\n      if: ${{ always() }}\n      uses: ./.github/actions/neon-project-delete\n      with:\n        project_id: ${{ steps.create-neon-project-tpcc.outputs.project_id }}\n        api_key: ${{ secrets.NEON_PRODUCTION_API_KEY_4_BENCHMARKS }} \n        api_host: console.neon.tech  # production (!)"
  },
  {
    "path": ".github/workflows/benchmarking.yml",
    "content": "name: Benchmarking\n\non:\n  # uncomment to run on push for debugging your PR\n  # push:\n  #   branches: [ your branch ]\n  schedule:\n    # * is a special character in YAML so you have to quote this string\n    #          ┌───────────── minute (0 - 59)\n    #          │ ┌───────────── hour (0 - 23)\n    #          │ │ ┌───────────── day of the month (1 - 31)\n    #          │ │ │ ┌───────────── month (1 - 12 or JAN-DEC)\n    #          │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)\n    - cron:   '0 3 * * *' # run once a day, timezone is utc\n  workflow_dispatch: # adds ability to run this manually\n    inputs:\n      region_id:\n        description: 'Project region id. If not set, the default region will be used'\n        required: false\n        default: 'aws-us-east-2'\n      save_perf_report:\n        type: boolean\n        description: 'Publish perf report. If not set, the report will be published only for the main branch'\n        required: false\n      collect_olap_explain:\n        type: boolean\n        description: 'Collect EXPLAIN ANALYZE for OLAP queries. If not set, EXPLAIN ANALYZE will not be collected'\n        required: false\n        default: false\n      collect_pg_stat_statements:\n        type: boolean\n        description: 'Collect pg_stat_statements for OLAP queries. If not set, pg_stat_statements will not be collected'\n        required: false\n        default: false\n      run_AWS_RDS_AND_AURORA:\n        type: boolean\n        description: 'AWS-RDS and AWS-AURORA normally only run on Saturday. Set this to true to run them on every workflow_dispatch'\n        required: false\n        default: false\n      run_only_pgvector_tests:\n        type: boolean\n        description: 'Run pgvector tests but no other tests. If not set, all tests including pgvector tests will be run'\n        required: false\n        default: false\n\ndefaults:\n  run:\n    shell: bash -euxo pipefail {0}\n\nconcurrency:\n  # Allow only one workflow per any non-`main` branch.\n  group: ${{ github.workflow }}-${{ github.ref_name }}-${{ github.ref_name == 'main' && github.sha || 'anysha' }}\n  cancel-in-progress: true\n\njobs:\n  cleanup:\n    runs-on: [ self-hosted, us-east-2, x64 ]\n    container:\n      image: ghcr.io/neondatabase/build-tools:pinned-bookworm\n      credentials:\n        username: ${{ github.actor }}\n        password: ${{ secrets.GITHUB_TOKEN }}\n      options: --init\n    env:\n      ORG_ID: org-solitary-dew-09443886\n      LIMIT: 100\n      SEARCH: \"GITHUB_RUN_ID=\"\n      BASE_URL: https://console-stage.neon.build/api/v2\n      DRY_RUN: \"false\"  # Set to \"true\" to just test out the workflow\n\n    steps:\n    - name: Harden the runner (Audit all outbound calls)\n      uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0\n      with:\n        egress-policy: audit\n\n    - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2\n\n    - name: Cleanup inactive Neon projects left over from prior runs\n      env:\n        API_KEY: ${{ secrets.NEON_STAGING_API_KEY }}\n      run: |\n        set -euo pipefail\n\n        NOW=$(date -u +%s)\n        DAYS_AGO=$((NOW - 5 * 86400))\n\n        REQUEST_URL=\"$BASE_URL/projects?limit=$LIMIT&search=$(printf '%s' \"$SEARCH\" | jq -sRr @uri)&org_id=$ORG_ID\"\n\n        echo \"Requesting project list from:\"\n        echo \"$REQUEST_URL\"\n\n        response=$(curl -s -X GET \"$REQUEST_URL\" \\\n          --header \"Accept: application/json\" \\\n          --header \"Content-Type: application/json\" \\\n          --header \"Authorization: Bearer ${API_KEY}\" )\n\n        echo \"Response:\"\n        echo \"$response\" | jq .\n\n        projects_to_delete=$(echo \"$response\" | jq --argjson cutoff \"$DAYS_AGO\" '\n          .projects[]\n          | select(.compute_last_active_at != null)\n          | select((.compute_last_active_at | fromdateiso8601) < $cutoff)\n          | {id, name, compute_last_active_at}\n        ')\n\n        if [ -z \"$projects_to_delete\" ]; then\n          echo \"No projects eligible for deletion.\"\n          exit 0\n        fi\n\n        echo \"Projects that will be deleted:\"\n        echo \"$projects_to_delete\" | jq -r '.id'\n\n        if [ \"$DRY_RUN\" = \"false\" ]; then\n          echo \"$projects_to_delete\" | jq -r '.id' | while read -r project_id; do\n            echo \"Deleting project: $project_id\"\n            curl -s -X DELETE \"$BASE_URL/projects/$project_id\" \\\n              --header \"Accept: application/json\" \\\n              --header \"Content-Type: application/json\" \\\n              --header \"Authorization: Bearer ${API_KEY}\" \n          done\n        else\n          echo \"Dry run enabled — no projects were deleted.\"\n        fi\n  bench:\n    if: ${{ github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null }}\n    permissions:\n      contents: write\n      statuses: write\n      id-token: write # aws-actions/configure-aws-credentials\n    strategy:\n      fail-fast: false\n      matrix:\n        include:\n          - PG_VERSION: 16\n            PLATFORM: \"neon-staging\"\n            region_id: ${{ github.event.inputs.region_id || 'aws-us-east-2' }}\n            RUNNER: [ self-hosted, us-east-2, x64 ]\n          - PG_VERSION: 17\n            PLATFORM: \"neon-staging\"\n            region_id: ${{ github.event.inputs.region_id || 'aws-us-east-2' }}\n            RUNNER: [ self-hosted, us-east-2, x64 ]\n          - PG_VERSION: 16\n            PLATFORM: \"azure-staging\"\n            region_id: 'azure-eastus2'\n            RUNNER: [ self-hosted, eastus2, x64 ]\n    env:\n      TEST_PG_BENCH_DURATIONS_MATRIX: \"300\"\n      TEST_PG_BENCH_SCALES_MATRIX: \"10,100\"\n      POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install\n      PG_VERSION: ${{ matrix.PG_VERSION }}\n      TEST_OUTPUT: /tmp/test_output\n      BUILD_TYPE: remote\n      SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }}\n      PLATFORM: ${{ matrix.PLATFORM }}\n\n    runs-on: ${{ matrix.RUNNER }}\n    container:\n      image: ghcr.io/neondatabase/build-tools:pinned-bookworm\n      credentials:\n        username: ${{ github.actor }}\n        password: ${{ secrets.GITHUB_TOKEN }}\n      options: --init\n\n    steps:\n    - name: Harden the runner (Audit all outbound calls)\n      uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0\n      with:\n        egress-policy: audit\n\n    - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2\n\n    - name: Configure AWS credentials # necessary on Azure runners\n      uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # v4.0.2\n      with:\n        aws-region: eu-central-1\n        role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}\n        role-duration-seconds: 18000 # 5 hours\n\n    - name: Download Neon artifact\n      uses: ./.github/actions/download\n      with:\n        name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact\n        path: /tmp/neon/\n        prefix: latest\n        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}\n\n    - name: Create Neon Project\n      id: create-neon-project\n      uses: ./.github/actions/neon-project-create\n      with:\n        region_id: ${{ matrix.region_id }}\n        postgres_version: ${{ env.PG_VERSION }}\n        api_key: ${{ secrets.NEON_STAGING_API_KEY }}\n\n    - name: Run benchmark\n      uses: ./.github/actions/run-python-test-set\n      with:\n        build_type: ${{ env.BUILD_TYPE }}\n        test_selection: performance\n        run_in_parallel: false\n        save_perf_report: ${{ env.SAVE_PERF_REPORT }}\n        pg_version: ${{ env.PG_VERSION }}\n        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}\n        # Set --sparse-ordering option of pytest-order plugin\n        # to ensure tests are running in order of appears in the file.\n        # It's important for test_perf_pgbench.py::test_pgbench_remote_* tests\n        extra_params:\n          -m remote_cluster\n          --sparse-ordering\n          --timeout 14400\n          --ignore test_runner/performance/test_perf_olap.py\n          --ignore test_runner/performance/test_perf_pgvector_queries.py\n          --ignore test_runner/performance/test_logical_replication.py\n          --ignore test_runner/performance/test_physical_replication.py\n          --ignore test_runner/performance/test_perf_ingest_using_pgcopydb.py\n          --ignore test_runner/performance/test_cumulative_statistics_persistence.py\n          --ignore test_runner/performance/test_perf_many_relations.py\n          --ignore test_runner/performance/test_perf_oltp_large_tenant.py\n          --ignore test_runner/performance/test_lfc_prewarm.py\n      env:\n        BENCHMARK_CONNSTR: ${{ steps.create-neon-project.outputs.dsn }}\n        VIP_VAP_ACCESS_TOKEN: \"${{ secrets.VIP_VAP_ACCESS_TOKEN }}\"\n        PERF_TEST_RESULT_CONNSTR: \"${{ secrets.PERF_TEST_RESULT_CONNSTR }}\"\n\n    - name: Delete Neon Project\n      if: ${{ always() }}\n      uses: ./.github/actions/neon-project-delete\n      with:\n        project_id: ${{ steps.create-neon-project.outputs.project_id }}\n        api_key: ${{ secrets.NEON_STAGING_API_KEY }}\n\n    - name: Create Allure report\n      id: create-allure-report\n      if: ${{ !cancelled() }}\n      uses: ./.github/actions/allure-report-generate\n      with:\n        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}\n\n    - name: Post to a Slack channel\n      if: ${{ github.event.schedule && failure() }}\n      uses: slackapi/slack-github-action@fcfb566f8b0aab22203f066d80ca1d7e4b5d05b3 # v1.27.1\n      with:\n        channel-id: \"C06KHQVQ7U3\" # on-call-qa-staging-stream\n        slack-message: |\n          Periodic perf testing: ${{ job.status }}\n          <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>\n          <${{ steps.create-allure-report.outputs.report-url }}|Allure report>\n      env:\n        SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}\n\n  cumstats-test:\n    if: ${{ github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null }}\n    permissions:\n      contents: write\n      statuses: write\n      id-token: write # aws-actions/configure-aws-credentials\n    env:\n      POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install\n      DEFAULT_PG_VERSION: 17\n      TEST_OUTPUT: /tmp/test_output\n      BUILD_TYPE: remote\n      SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }}\n      PLATFORM: \"neon-staging\"\n\n    runs-on: [ self-hosted, us-east-2, x64 ]\n    container:\n      image: ghcr.io/neondatabase/build-tools:pinned-bookworm\n      credentials:\n        username: ${{ github.actor }}\n        password: ${{ secrets.GITHUB_TOKEN }}\n      options: --init\n\n    steps:\n    - name: Harden the runner (Audit all outbound calls)\n      uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0\n      with:\n        egress-policy: audit\n\n    - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2\n\n    - name: Configure AWS credentials\n      uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # v4.0.2\n      with:\n        aws-region: eu-central-1\n        role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}\n        role-duration-seconds: 18000 # 5 hours\n\n    - name: Download Neon artifact\n      uses: ./.github/actions/download\n      with:\n        name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact\n        path: /tmp/neon/\n        prefix: latest\n        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}\n\n    - name: Verify that cumulative statistics are preserved\n      uses: ./.github/actions/run-python-test-set\n      with:\n        build_type: ${{ env.BUILD_TYPE }}\n        test_selection: performance/test_cumulative_statistics_persistence.py\n        run_in_parallel: false\n        save_perf_report: ${{ env.SAVE_PERF_REPORT }}\n        extra_params: -m remote_cluster --timeout 3600\n        pg_version: ${{ env.DEFAULT_PG_VERSION }}\n        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}\n      env:\n        VIP_VAP_ACCESS_TOKEN: \"${{ secrets.VIP_VAP_ACCESS_TOKEN }}\"\n        PERF_TEST_RESULT_CONNSTR: \"${{ secrets.PERF_TEST_RESULT_CONNSTR }}\"\n        NEON_API_KEY: ${{ secrets.NEON_STAGING_API_KEY }}\n\n  replication-tests:\n    if: ${{ github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null }}\n    permissions:\n      contents: write\n      statuses: write\n      id-token: write # aws-actions/configure-aws-credentials\n    env:\n      POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install\n      DEFAULT_PG_VERSION: 16\n      TEST_OUTPUT: /tmp/test_output\n      BUILD_TYPE: remote\n      SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }}\n      PLATFORM: \"neon-staging\"\n\n    runs-on: [ self-hosted, us-east-2, x64 ]\n    container:\n      image: ghcr.io/neondatabase/build-tools:pinned-bookworm\n      credentials:\n        username: ${{ github.actor }}\n        password: ${{ secrets.GITHUB_TOKEN }}\n      options: --init\n\n    steps:\n    - name: Harden the runner (Audit all outbound calls)\n      uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0\n      with:\n        egress-policy: audit\n\n    - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2\n\n    - name: Configure AWS credentials\n      uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # v4.0.2\n      with:\n        aws-region: eu-central-1\n        role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}\n        role-duration-seconds: 18000 # 5 hours\n\n    - name: Download Neon artifact\n      uses: ./.github/actions/download\n      with:\n        name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact\n        path: /tmp/neon/\n        prefix: latest\n        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}\n\n    - name: Run Logical Replication benchmarks\n      uses: ./.github/actions/run-python-test-set\n      with:\n        build_type: ${{ env.BUILD_TYPE }}\n        test_selection: performance/test_logical_replication.py\n        run_in_parallel: false\n        save_perf_report: ${{ env.SAVE_PERF_REPORT }}\n        extra_params: -m remote_cluster --timeout 5400\n        pg_version: ${{ env.DEFAULT_PG_VERSION }}\n        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}\n      env:\n        VIP_VAP_ACCESS_TOKEN: \"${{ secrets.VIP_VAP_ACCESS_TOKEN }}\"\n        PERF_TEST_RESULT_CONNSTR: \"${{ secrets.PERF_TEST_RESULT_CONNSTR }}\"\n        NEON_API_KEY: ${{ secrets.NEON_STAGING_API_KEY }}\n        BENCHMARK_PROJECT_ID_PUB: ${{ vars.BENCHMARK_PROJECT_ID_PUB }}\n        BENCHMARK_PROJECT_ID_SUB: ${{ vars.BENCHMARK_PROJECT_ID_SUB }}\n\n    - name: Run Physical Replication benchmarks\n      uses: ./.github/actions/run-python-test-set\n      with:\n        build_type: ${{ env.BUILD_TYPE }}\n        test_selection: performance/test_physical_replication.py\n        run_in_parallel: false\n        save_perf_report: ${{ env.SAVE_PERF_REPORT }}\n        extra_params: -m remote_cluster --timeout 5400\n        pg_version: ${{ env.DEFAULT_PG_VERSION }}\n        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}\n      env:\n        VIP_VAP_ACCESS_TOKEN: \"${{ secrets.VIP_VAP_ACCESS_TOKEN }}\"\n        PERF_TEST_RESULT_CONNSTR: \"${{ secrets.PERF_TEST_RESULT_CONNSTR }}\"\n        NEON_API_KEY: ${{ secrets.NEON_STAGING_API_KEY }}\n\n    - name: Create Allure report\n      id: create-allure-report\n      if: ${{ !cancelled() }}\n      uses: ./.github/actions/allure-report-generate\n      with:\n        store-test-results-into-db: true\n        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}\n      env:\n        REGRESS_TEST_RESULT_CONNSTR_NEW: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}\n\n    # Post both success and failure to the Slack channel\n    - name: Post to a Slack channel\n      if: ${{ github.event.schedule && !cancelled() }}\n      uses: slackapi/slack-github-action@fcfb566f8b0aab22203f066d80ca1d7e4b5d05b3 # v1.27.1\n      with:\n        channel-id: \"C06T9AMNDQQ\" # on-call-compute-staging-stream\n        slack-message: |\n          Periodic replication testing: ${{ job.status }}\n          <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>\n          <${{ steps.create-allure-report.outputs.report-url }}|Allure report>\n      env:\n        SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}\n\n  prewarm-test:\n    if: ${{ github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null }}\n    permissions:\n      contents: write\n      statuses: write\n      id-token: write # aws-actions/configure-aws-credentials\n    env:\n      PROJECT_ID: ${{ vars.PREWARM_PROJECT_ID }}\n      POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install\n      DEFAULT_PG_VERSION: 17\n      TEST_OUTPUT: /tmp/test_output\n      BUILD_TYPE: remote\n      SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }}\n      PLATFORM: \"neon-staging\"\n\n    runs-on: [ self-hosted, us-east-2, x64 ]\n    container:\n      image: ghcr.io/neondatabase/build-tools:pinned-bookworm\n      credentials:\n        username: ${{ github.actor }}\n        password: ${{ secrets.GITHUB_TOKEN }}\n      options: --init\n\n    steps:\n    - name: Harden the runner (Audit all outbound calls)\n      uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0\n      with:\n        egress-policy: audit\n\n    - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2\n\n    - name: Configure AWS credentials\n      uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # v4.0.2\n      with:\n        aws-region: eu-central-1\n        role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}\n        role-duration-seconds: 18000 # 5 hours\n\n    - name: Download Neon artifact\n      uses: ./.github/actions/download\n      with:\n        name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact\n        path: /tmp/neon/\n        prefix: latest\n        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}\n\n    - name: Run prewarm benchmark\n      uses: ./.github/actions/run-python-test-set\n      with:\n        build_type: ${{ env.BUILD_TYPE }}\n        test_selection: performance/test_lfc_prewarm.py\n        run_in_parallel: false\n        save_perf_report: ${{ env.SAVE_PERF_REPORT }}\n        extra_params: -m remote_cluster --timeout 5400\n        pg_version: ${{ env.DEFAULT_PG_VERSION }}\n        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}\n      env:\n        VIP_VAP_ACCESS_TOKEN: \"${{ secrets.VIP_VAP_ACCESS_TOKEN }}\"\n        PERF_TEST_RESULT_CONNSTR: \"${{ secrets.PERF_TEST_RESULT_CONNSTR }}\"\n        NEON_API_KEY: ${{ secrets.NEON_STAGING_API_KEY }}\n\n    - name: Create Allure report\n      id: create-allure-report\n      if: ${{ !cancelled() }}\n      uses: ./.github/actions/allure-report-generate\n      with:\n        store-test-results-into-db: true\n        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}\n      env:\n        REGRESS_TEST_RESULT_CONNSTR_NEW: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}\n\n  generate-matrices:\n    if: ${{ github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null }}\n    # Create matrices for the benchmarking jobs, so we run benchmarks on rds only once a week (on Saturday)\n    #\n    # Available platforms:\n    # - neonvm-captest-new: Freshly created project (1 CU)\n    # - neonvm-captest-freetier: Use freetier-sized compute (0.25 CU)\n    # - neonvm-captest-azure-new: Freshly created project (1 CU) in azure region\n    # - neonvm-captest-azure-freetier: Use freetier-sized compute (0.25 CU) in azure region\n    # - neonvm-captest-reuse: Reusing existing project\n    # - rds-aurora: Aurora Postgres Serverless v2 with autoscaling from 0.5 to 2 ACUs\n    # - rds-postgres: RDS Postgres db.m5.large instance (2 vCPU, 8 GiB) with gp3 EBS storage\n    env:\n      RUN_AWS_RDS_AND_AURORA: ${{ github.event.inputs.run_AWS_RDS_AND_AURORA || 'false' }}\n      DEFAULT_REGION_ID: ${{ github.event.inputs.region_id || 'aws-us-east-2' }}\n    runs-on: ubuntu-22.04\n    outputs:\n      pgbench-compare-matrix: ${{ steps.pgbench-compare-matrix.outputs.matrix }}\n      olap-compare-matrix: ${{ steps.olap-compare-matrix.outputs.matrix }}\n      tpch-compare-matrix: ${{ steps.tpch-compare-matrix.outputs.matrix }}\n\n    steps:\n    - name: Harden the runner (Audit all outbound calls)\n      uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0\n      with:\n        egress-policy: audit\n\n    - name: Generate matrix for pgbench benchmark\n      id: pgbench-compare-matrix\n      run: |\n        region_id_default=${{ env.DEFAULT_REGION_ID }}\n        runner_default='[\"self-hosted\", \"us-east-2\", \"x64\"]'\n        runner_azure='[\"self-hosted\", \"eastus2\", \"x64\"]'\n        image_default=\"ghcr.io/neondatabase/build-tools:pinned-bookworm\"\n        matrix='{\n          \"pg_version\" : [\n            16\n          ],\n          \"region_id\" : [\n            \"'\"$region_id_default\"'\"\n            ],\n          \"platform\": [\n            \"neonvm-captest-new\",\n            \"neonvm-captest-reuse\",\n            \"neonvm-captest-new\"\n          ],\n          \"db_size\": [ \"10gb\" ],\n          \"runner\": ['\"$runner_default\"'],\n          \"image\": [ \"'\"$image_default\"'\" ],\n          \"include\": [{ \"pg_version\": 16, \"region_id\": \"'\"$region_id_default\"'\", \"platform\": \"neonvm-captest-freetier\",       \"db_size\": \"3gb\" ,\"runner\": '\"$runner_default\"', \"image\": \"'\"$image_default\"'\"                               },\n                      { \"pg_version\": 16, \"region_id\": \"'\"$region_id_default\"'\", \"platform\": \"neonvm-captest-new\",            \"db_size\": \"10gb\",\"runner\": '\"$runner_default\"', \"image\": \"'\"$image_default\"'\"                               },\n                      { \"pg_version\": 16, \"region_id\": \"'\"$region_id_default\"'\", \"platform\": \"neonvm-captest-new-many-tables\",\"db_size\": \"10gb\",\"runner\": '\"$runner_default\"', \"image\": \"'\"$image_default\"'\"                               },\n                      { \"pg_version\": 16, \"region_id\": \"'\"$region_id_default\"'\", \"platform\": \"neonvm-captest-new\",            \"db_size\": \"50gb\",\"runner\": '\"$runner_default\"', \"image\": \"'\"$image_default\"'\"                               },\n                      { \"pg_version\": 16, \"region_id\": \"azure-eastus2\",          \"platform\": \"neonvm-azure-captest-freetier\", \"db_size\": \"3gb\" ,\"runner\": '\"$runner_azure\"',   \"image\": \"ghcr.io/neondatabase/build-tools:pinned-bookworm\" },\n                      { \"pg_version\": 16, \"region_id\": \"azure-eastus2\",          \"platform\": \"neonvm-azure-captest-new\",      \"db_size\": \"10gb\",\"runner\": '\"$runner_azure\"',   \"image\": \"ghcr.io/neondatabase/build-tools:pinned-bookworm\" },\n                      { \"pg_version\": 16, \"region_id\": \"azure-eastus2\",          \"platform\": \"neonvm-azure-captest-new\",      \"db_size\": \"50gb\",\"runner\": '\"$runner_azure\"',   \"image\": \"ghcr.io/neondatabase/build-tools:pinned-bookworm\" },\n                      { \"pg_version\": 16, \"region_id\": \"'\"$region_id_default\"'\", \"platform\": \"neonvm-captest-sharding-reuse\", \"db_size\": \"50gb\",\"runner\": '\"$runner_default\"', \"image\": \"'\"$image_default\"'\"                               },\n                      { \"pg_version\": 17, \"region_id\": \"'\"$region_id_default\"'\", \"platform\": \"neonvm-captest-freetier\",       \"db_size\": \"3gb\" ,\"runner\": '\"$runner_default\"', \"image\": \"'\"$image_default\"'\"                               },\n                      { \"pg_version\": 17, \"region_id\": \"'\"$region_id_default\"'\", \"platform\": \"neonvm-captest-new\",            \"db_size\": \"10gb\",\"runner\": '\"$runner_default\"', \"image\": \"'\"$image_default\"'\"                               },\n                      { \"pg_version\": 17, \"region_id\": \"'\"$region_id_default\"'\", \"platform\": \"neonvm-captest-new-many-tables\",\"db_size\": \"10gb\",\"runner\": '\"$runner_default\"', \"image\": \"'\"$image_default\"'\"                               },\n                      { \"pg_version\": 17, \"region_id\": \"'\"$region_id_default\"'\", \"platform\": \"neonvm-captest-new\",            \"db_size\": \"50gb\",\"runner\": '\"$runner_default\"', \"image\": \"'\"$image_default\"'\"                               }]\n        }'\n\n        if [ \"$(date +%A)\" = \"Saturday\" ] || [ ${RUN_AWS_RDS_AND_AURORA} = \"true\" ]; then\n          matrix=$(echo \"$matrix\" | jq '.include += [{ \"pg_version\": 16, \"region_id\": \"'\"$region_id_default\"'\", \"platform\": \"rds-postgres\", \"db_size\": \"10gb\",\"runner\": '\"$runner_default\"', \"image\": \"'\"$image_default\"'\" },\n                                                     { \"pg_version\": 16, \"region_id\": \"'\"$region_id_default\"'\", \"platform\": \"rds-aurora\", \"db_size\": \"10gb\",\"runner\": '\"$runner_default\"', \"image\": \"'\"$image_default\"'\" }]')\n        fi\n\n        echo \"matrix=$(echo \"$matrix\" | jq --compact-output '.')\" >> $GITHUB_OUTPUT\n\n    - name: Generate matrix for OLAP benchmarks\n      id: olap-compare-matrix\n      run: |\n        matrix='{\n          \"platform\": [\n            \"neonvm-captest-reuse\"\n          ],\n          \"pg_version\" : [\n            16,17\n          ]\n        }'\n\n        if [ \"$(date +%A)\" = \"Saturday\" ] || [ ${RUN_AWS_RDS_AND_AURORA} = \"true\" ]; then\n          matrix=$(echo \"$matrix\" | jq '.include += [{ \"pg_version\": 16, \"platform\": \"rds-postgres\" },\n                                                     { \"pg_version\": 16, \"platform\": \"rds-aurora\"   }]')\n        fi\n\n        echo \"matrix=$(echo \"$matrix\" | jq --compact-output '.')\" >> $GITHUB_OUTPUT\n\n    - name: Generate matrix for TPC-H benchmarks\n      id: tpch-compare-matrix\n      run: |\n        matrix='{\n          \"platform\": [\n            \"neonvm-captest-reuse\"\n          ],\n          \"pg_version\" : [\n            16,17\n          ]\n        }'\n\n        if [ \"$(date +%A)\" = \"Saturday\" ] || [ ${RUN_AWS_RDS_AND_AURORA} = \"true\" ]; then\n          matrix=$(echo \"$matrix\" | jq '.include += [{ \"pg_version\": 16, \"platform\": \"rds-postgres\" },\n                                                     { \"pg_version\": 16, \"platform\": \"rds-aurora\"   }]')\n        fi\n\n        echo \"matrix=$(echo \"$matrix\" | jq --compact-output '.')\" >> $GITHUB_OUTPUT\n\n  prepare_AWS_RDS_databases:\n    uses: ./.github/workflows/_benchmarking_preparation.yml\n    secrets: inherit\n\n  pgbench-compare:\n    if: ${{ github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null }}\n    needs: [ generate-matrices, prepare_AWS_RDS_databases ]\n    permissions:\n      contents: write\n      statuses: write\n      id-token: write # aws-actions/configure-aws-credentials\n\n    strategy:\n      fail-fast: false\n      matrix: ${{fromJSON(needs.generate-matrices.outputs.pgbench-compare-matrix)}}\n\n    env:\n      TEST_PG_BENCH_DURATIONS_MATRIX: \"60m\"\n      TEST_PG_BENCH_SCALES_MATRIX: ${{ matrix.db_size }}\n      POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install\n      PG_VERSION: ${{ matrix.pg_version }}\n      TEST_OUTPUT: /tmp/test_output\n      BUILD_TYPE: remote\n      SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }}\n      PLATFORM: ${{ matrix.platform }}\n\n    runs-on: ${{ matrix.runner }}\n    container:\n      image: ${{ matrix.image }}\n      credentials:\n        username: ${{ github.actor }}\n        password: ${{ secrets.GITHUB_TOKEN }}\n      options: --init\n\n    # Increase timeout to 8h, default timeout is 6h\n    timeout-minutes: 480\n\n    steps:\n    - name: Harden the runner (Audit all outbound calls)\n      uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0\n      with:\n        egress-policy: audit\n\n    - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2\n\n    - name: Configure AWS credentials\n      uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # v4.0.2\n      with:\n        aws-region: eu-central-1\n        role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}\n        role-duration-seconds: 18000 # 5 hours\n\n    - name: Download Neon artifact\n      uses: ./.github/actions/download\n      with:\n        name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact\n        path: /tmp/neon/\n        prefix: latest\n        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}\n\n    - name: Create Neon Project\n      if: contains(fromJSON('[\"neonvm-captest-new\", \"neonvm-captest-new-many-tables\", \"neonvm-captest-freetier\", \"neonvm-azure-captest-freetier\", \"neonvm-azure-captest-new\"]'), matrix.platform)\n      id: create-neon-project\n      uses: ./.github/actions/neon-project-create\n      with:\n        region_id: ${{ matrix.region_id }}\n        postgres_version: ${{ env.PG_VERSION }}\n        api_key: ${{ secrets.NEON_STAGING_API_KEY }}\n        compute_units: ${{ (contains(matrix.platform, 'captest-freetier') && '[0.25, 0.25]') || '[1, 1]' }}\n\n    - name: Set up Connection String\n      id: set-up-connstr\n      run: |\n        case \"${PLATFORM}\" in\n          neonvm-captest-reuse)\n            CONNSTR=${{ secrets.BENCHMARK_CAPTEST_CONNSTR }}\n            ;;\n          neonvm-captest-sharding-reuse)\n            CONNSTR=${{ secrets.BENCHMARK_CAPTEST_SHARDING_CONNSTR }}\n            ;;\n          neonvm-captest-new | neonvm-captest-new-many-tables | neonvm-captest-freetier | neonvm-azure-captest-new | neonvm-azure-captest-freetier)\n            CONNSTR=${{ steps.create-neon-project.outputs.dsn }}\n            ;;\n          rds-aurora)\n            CONNSTR=${{ secrets.BENCHMARK_RDS_AURORA_CONNSTR }}\n            ;;\n          rds-postgres)\n            CONNSTR=${{ secrets.BENCHMARK_RDS_POSTGRES_CONNSTR }}\n            ;;\n          *)\n            echo >&2 \"Unknown PLATFORM=${PLATFORM}\"\n            exit 1\n            ;;\n        esac\n\n        echo \"connstr=${CONNSTR}\" >> $GITHUB_OUTPUT\n\n    # we want to compare Neon project OLTP throughput and latency at scale factor 10 GB\n    # without (neonvm-captest-new)\n    # and with (neonvm-captest-new-many-tables) many relations in the database\n    - name: Create many relations before the run\n      if: contains(fromJSON('[\"neonvm-captest-new-many-tables\"]'), matrix.platform)\n      uses: ./.github/actions/run-python-test-set\n      with:\n        build_type: ${{ env.BUILD_TYPE }}\n        test_selection: performance\n        run_in_parallel: false\n        save_perf_report: ${{ env.SAVE_PERF_REPORT }}\n        extra_params: -m remote_cluster --timeout 21600 -k test_perf_many_relations\n        pg_version: ${{ env.PG_VERSION }}\n        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}\n      env:\n        BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}\n        VIP_VAP_ACCESS_TOKEN: \"${{ secrets.VIP_VAP_ACCESS_TOKEN }}\"\n        PERF_TEST_RESULT_CONNSTR: \"${{ secrets.PERF_TEST_RESULT_CONNSTR }}\"\n        TEST_NUM_RELATIONS: 10000\n\n    - name: Benchmark init\n      uses: ./.github/actions/run-python-test-set\n      with:\n        build_type: ${{ env.BUILD_TYPE }}\n        test_selection: performance\n        run_in_parallel: false\n        save_perf_report: ${{ env.SAVE_PERF_REPORT }}\n        extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_init\n        pg_version: ${{ env.PG_VERSION }}\n        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}\n      env:\n        BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}\n        VIP_VAP_ACCESS_TOKEN: \"${{ secrets.VIP_VAP_ACCESS_TOKEN }}\"\n        PERF_TEST_RESULT_CONNSTR: \"${{ secrets.PERF_TEST_RESULT_CONNSTR }}\"\n\n    - name: Benchmark simple-update\n      uses: ./.github/actions/run-python-test-set\n      with:\n        build_type: ${{ env.BUILD_TYPE }}\n        test_selection: performance\n        run_in_parallel: false\n        save_perf_report: ${{ env.SAVE_PERF_REPORT }}\n        extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_simple_update\n        pg_version: ${{ env.PG_VERSION }}\n        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}\n      env:\n        BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}\n        VIP_VAP_ACCESS_TOKEN: \"${{ secrets.VIP_VAP_ACCESS_TOKEN }}\"\n        PERF_TEST_RESULT_CONNSTR: \"${{ secrets.PERF_TEST_RESULT_CONNSTR }}\"\n\n    - name: Benchmark select-only\n      uses: ./.github/actions/run-python-test-set\n      with:\n        build_type: ${{ env.BUILD_TYPE }}\n        test_selection: performance\n        run_in_parallel: false\n        save_perf_report: ${{ env.SAVE_PERF_REPORT }}\n        extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_select_only\n        pg_version: ${{ env.PG_VERSION }}\n        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}\n      env:\n        BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}\n        VIP_VAP_ACCESS_TOKEN: \"${{ secrets.VIP_VAP_ACCESS_TOKEN }}\"\n        PERF_TEST_RESULT_CONNSTR: \"${{ secrets.PERF_TEST_RESULT_CONNSTR }}\"\n\n    - name: Delete Neon Project\n      if: ${{ steps.create-neon-project.outputs.project_id && always() }}\n      uses: ./.github/actions/neon-project-delete\n      with:\n        project_id: ${{ steps.create-neon-project.outputs.project_id }}\n        api_key: ${{ secrets.NEON_STAGING_API_KEY }}\n\n    - name: Create Allure report\n      id: create-allure-report\n      if: ${{ !cancelled() }}\n      uses: ./.github/actions/allure-report-generate\n      with:\n        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}\n\n    - name: Post to a Slack channel\n      if: ${{ github.event.schedule && failure() }}\n      uses: slackapi/slack-github-action@fcfb566f8b0aab22203f066d80ca1d7e4b5d05b3 # v1.27.1\n      with:\n        channel-id: \"C06KHQVQ7U3\" # on-call-qa-staging-stream\n        slack-message: |\n          Periodic perf testing on ${{ matrix.platform }}: ${{ job.status }}\n          <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>\n          <${{ steps.create-allure-report.outputs.report-url }}|Allure report>\n      env:\n        SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}\n\n  pgbench-pgvector:\n    permissions:\n      contents: write\n      statuses: write\n      id-token: write # aws-actions/configure-aws-credentials\n    strategy:\n      fail-fast: false\n      matrix:\n        include:\n          - PLATFORM: \"neonvm-captest-pgvector\"\n            RUNNER: [ self-hosted, us-east-2, x64 ]\n            postgres_version: 16\n          - PLATFORM: \"neonvm-captest-pgvector-pg17\"\n            RUNNER: [ self-hosted, us-east-2, x64 ]\n            postgres_version: 17\n          - PLATFORM: \"azure-captest-pgvector\"\n            RUNNER: [ self-hosted, eastus2, x64 ]\n            postgres_version: 16\n\n    env:\n      TEST_PG_BENCH_DURATIONS_MATRIX: \"15m\"\n      TEST_PG_BENCH_SCALES_MATRIX: \"1\"\n      POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install\n      PG_VERSION: ${{ matrix.postgres_version }}\n      TEST_OUTPUT: /tmp/test_output\n      BUILD_TYPE: remote\n\n      SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }}\n      PLATFORM: ${{ matrix.PLATFORM }}\n\n    runs-on: ${{ matrix.RUNNER }}\n    container:\n      image: ghcr.io/neondatabase/build-tools:pinned-bookworm\n      credentials:\n        username: ${{ github.actor }}\n        password: ${{ secrets.GITHUB_TOKEN }}\n      options: --init\n\n    steps:\n    - name: Harden the runner (Audit all outbound calls)\n      uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0\n      with:\n        egress-policy: audit\n\n    - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2\n\n    - name: Configure AWS credentials\n      uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # v4.0.2\n      with:\n        aws-region: eu-central-1\n        role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}\n        role-duration-seconds: 18000 # 5 hours\n\n    - name: Download Neon artifact\n      uses: ./.github/actions/download\n      with:\n        name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact\n        path: /tmp/neon/\n        prefix: latest\n        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}\n\n    - name: Set up Connection String\n      id: set-up-connstr\n      run: |\n        case \"${PLATFORM}\" in\n          neonvm-captest-pgvector)\n            CONNSTR=${{ secrets.BENCHMARK_PGVECTOR_CONNSTR }}\n            ;;\n          neonvm-captest-pgvector-pg17)\n            CONNSTR=${{ secrets.BENCHMARK_PGVECTOR_CONNSTR_PG17 }}\n            ;;\n          azure-captest-pgvector)\n            CONNSTR=${{ secrets.BENCHMARK_PGVECTOR_CONNSTR_AZURE }}\n            ;;\n          *)\n            echo >&2 \"Unknown PLATFORM=${PLATFORM}\"\n            exit 1\n            ;;\n        esac\n\n        echo \"connstr=${CONNSTR}\" >> $GITHUB_OUTPUT\n\n    - name: Benchmark pgvector hnsw indexing\n      uses: ./.github/actions/run-python-test-set\n      with:\n        build_type: ${{ env.BUILD_TYPE }}\n        test_selection: performance/test_perf_olap.py\n        run_in_parallel: false\n        save_perf_report: ${{ env.SAVE_PERF_REPORT }}\n        extra_params: -m remote_cluster --timeout 21600 -k test_pgvector_indexing\n        pg_version: ${{ env.PG_VERSION }}\n        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}\n      env:\n        VIP_VAP_ACCESS_TOKEN: \"${{ secrets.VIP_VAP_ACCESS_TOKEN }}\"\n        PERF_TEST_RESULT_CONNSTR: \"${{ secrets.PERF_TEST_RESULT_CONNSTR }}\"\n        BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}\n\n    - name: Benchmark pgvector queries\n      uses: ./.github/actions/run-python-test-set\n      with:\n        build_type: ${{ env.BUILD_TYPE }}\n        test_selection: performance/test_perf_pgvector_queries.py\n        run_in_parallel: false\n        save_perf_report: ${{ env.SAVE_PERF_REPORT }}\n        extra_params: -m remote_cluster --timeout 21600\n        pg_version: ${{ env.PG_VERSION }}\n        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}\n      env:\n        BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}\n        VIP_VAP_ACCESS_TOKEN: \"${{ secrets.VIP_VAP_ACCESS_TOKEN }}\"\n        PERF_TEST_RESULT_CONNSTR: \"${{ secrets.PERF_TEST_RESULT_CONNSTR }}\"\n\n    - name: Create Allure report\n      id: create-allure-report\n      if: ${{ !cancelled() }}\n      uses: ./.github/actions/allure-report-generate\n      with:\n        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}\n\n    - name: Post to a Slack channel\n      if: ${{ github.event.schedule && failure() }}\n      uses: slackapi/slack-github-action@fcfb566f8b0aab22203f066d80ca1d7e4b5d05b3 # v1.27.1\n      with:\n        channel-id: \"C06KHQVQ7U3\" # on-call-qa-staging-stream\n        slack-message: |\n          Periodic perf testing on ${{ env.PLATFORM }}: ${{ job.status }}\n          <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>\n          <${{ steps.create-allure-report.outputs.report-url }}|Allure report>\n      env:\n        SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}\n\n  clickbench-compare:\n    # ClichBench DB for rds-aurora and rds-Postgres deployed to the same clusters\n    # we use for performance testing in pgbench-compare.\n    # Run this job only when pgbench-compare is finished to avoid the intersection.\n    # We might change it after https://github.com/neondatabase/neon/issues/2900.\n    #\n    # *_CLICKBENCH_CONNSTR: Genuine ClickBench DB with ~100M rows\n    # *_CLICKBENCH_10M_CONNSTR: DB with the first 10M rows of ClickBench DB\n    if: ${{ !cancelled() && (github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null) }}\n    permissions:\n      contents: write\n      statuses: write\n      id-token: write # aws-actions/configure-aws-credentials\n    needs: [ generate-matrices, pgbench-compare, prepare_AWS_RDS_databases ]\n\n    strategy:\n      fail-fast: false\n      matrix: ${{ fromJSON(needs.generate-matrices.outputs.olap-compare-matrix) }}\n\n    env:\n      POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install\n      PG_VERSION: ${{ matrix.pg_version }}\n      TEST_OUTPUT: /tmp/test_output\n      TEST_OLAP_COLLECT_EXPLAIN: ${{ github.event.inputs.collect_olap_explain }}\n      TEST_OLAP_COLLECT_PG_STAT_STATEMENTS: ${{ github.event.inputs.collect_pg_stat_statements }}\n      BUILD_TYPE: remote\n      SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }}\n      PLATFORM: ${{ matrix.platform }}\n\n    runs-on: [ self-hosted, us-east-2, x64 ]\n    container:\n      image: ghcr.io/neondatabase/build-tools:pinned-bookworm\n      credentials:\n        username: ${{ github.actor }}\n        password: ${{ secrets.GITHUB_TOKEN }}\n      options: --init\n\n    # Increase timeout to 12h, default timeout is 6h\n    # we have regression in clickbench causing it to run 2-3x longer\n    timeout-minutes: 720\n\n    steps:\n    - name: Harden the runner (Audit all outbound calls)\n      uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0\n      with:\n        egress-policy: audit\n\n    - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2\n\n    - name: Configure AWS credentials\n      uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # v4.0.2\n      with:\n        aws-region: eu-central-1\n        role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}\n        role-duration-seconds: 18000 # 5 hours\n\n    - name: Download Neon artifact\n      uses: ./.github/actions/download\n      with:\n        name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact\n        path: /tmp/neon/\n        prefix: latest\n        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}\n\n    - name: Set up Connection String\n      id: set-up-connstr\n      run: |\n        case \"${PLATFORM}\" in\n          neonvm-captest-reuse)\n            case \"${PG_VERSION}\" in\n              16)\n                CONNSTR=${{ secrets.BENCHMARK_CAPTEST_CLICKBENCH_10M_CONNSTR }}\n                ;;\n              17)\n                CONNSTR=${{ secrets.BENCHMARK_CAPTEST_CLICKBENCH_CONNSTR_PG17 }}\n                ;;\n              *)\n                echo >&2 \"Unsupported PG_VERSION=${PG_VERSION} for PLATFORM=${PLATFORM}\"\n                exit 1\n                ;;\n            esac\n            ;;\n          rds-aurora)\n            CONNSTR=${{ secrets.BENCHMARK_RDS_AURORA_CLICKBENCH_10M_CONNSTR }}\n            ;;\n          rds-postgres)\n            CONNSTR=${{ secrets.BENCHMARK_RDS_POSTGRES_CLICKBENCH_10M_CONNSTR }}\n            ;;\n          *)\n            echo >&2 \"Unknown PLATFORM=${PLATFORM}. Allowed only 'neonvm-captest-reuse', 'rds-aurora', or 'rds-postgres'\"\n            exit 1\n            ;;\n        esac\n\n        echo \"connstr=${CONNSTR}\" >> $GITHUB_OUTPUT\n\n    - name: ClickBench benchmark\n      uses: ./.github/actions/run-python-test-set\n      with:\n        build_type: ${{ env.BUILD_TYPE }}\n        test_selection: performance/test_perf_olap.py\n        run_in_parallel: false\n        save_perf_report: ${{ env.SAVE_PERF_REPORT }}\n        extra_params: -m remote_cluster --timeout 43200 -k test_clickbench\n        pg_version: ${{ env.PG_VERSION }}\n        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}\n      env:\n        VIP_VAP_ACCESS_TOKEN: \"${{ secrets.VIP_VAP_ACCESS_TOKEN }}\"\n        PERF_TEST_RESULT_CONNSTR: \"${{ secrets.PERF_TEST_RESULT_CONNSTR }}\"\n        TEST_OLAP_COLLECT_EXPLAIN: ${{ github.event.inputs.collect_olap_explain || 'false' }}\n        TEST_OLAP_COLLECT_PG_STAT_STATEMENTS: ${{ github.event.inputs.collect_pg_stat_statements || 'false' }}\n        BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}\n        TEST_OLAP_SCALE: 10\n\n    - name: Create Allure report\n      id: create-allure-report\n      if: ${{ !cancelled() }}\n      uses: ./.github/actions/allure-report-generate\n      with:\n        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}\n\n    - name: Post to a Slack channel\n      if: ${{ github.event.schedule && failure() }}\n      uses: slackapi/slack-github-action@fcfb566f8b0aab22203f066d80ca1d7e4b5d05b3 # v1.27.1\n      with:\n        channel-id: \"C06KHQVQ7U3\" # on-call-qa-staging-stream\n        slack-message: |\n          Periodic OLAP perf testing on ${{ matrix.platform }}: ${{ job.status }}\n          <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>\n          <${{ steps.create-allure-report.outputs.report-url }}|Allure report>\n      env:\n        SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}\n\n  tpch-compare:\n    # TCP-H DB for rds-aurora and rds-Postgres deployed to the same clusters\n    # we use for performance testing in pgbench-compare & clickbench-compare.\n    # Run this job only when clickbench-compare is finished to avoid the intersection.\n    # We might change it after https://github.com/neondatabase/neon/issues/2900.\n    #\n    # *_TPCH_S10_CONNSTR: DB generated with scale factor 10 (~10 GB)\n    # if: ${{ !cancelled() && (github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null) }}\n    permissions:\n      contents: write\n      statuses: write\n      id-token: write # aws-actions/configure-aws-credentials\n    needs: [ generate-matrices, clickbench-compare, prepare_AWS_RDS_databases ]\n\n    strategy:\n      fail-fast: false\n      matrix: ${{ fromJSON(needs.generate-matrices.outputs.tpch-compare-matrix) }}\n\n    env:\n      POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install\n      PG_VERSION: ${{ matrix.pg_version }}\n      TEST_OUTPUT: /tmp/test_output\n      BUILD_TYPE: remote\n      SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }}\n      PLATFORM: ${{ matrix.platform }}\n\n    runs-on: [ self-hosted, us-east-2, x64 ]\n    container:\n      image: ghcr.io/neondatabase/build-tools:pinned-bookworm\n      credentials:\n        username: ${{ github.actor }}\n        password: ${{ secrets.GITHUB_TOKEN }}\n      options: --init\n\n    steps:\n    - name: Harden the runner (Audit all outbound calls)\n      uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0\n      with:\n        egress-policy: audit\n\n    - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2\n\n    - name: Configure AWS credentials\n      uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # v4.0.2\n      with:\n        aws-region: eu-central-1\n        role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}\n        role-duration-seconds: 18000 # 5 hours\n\n    - name: Download Neon artifact\n      uses: ./.github/actions/download\n      with:\n        name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact\n        path: /tmp/neon/\n        prefix: latest\n        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}\n\n    - name: Get Connstring Secret Name\n      run: |\n        case \"${PLATFORM}\" in\n          neonvm-captest-reuse)\n            case \"${PG_VERSION}\" in\n              16)\n                CONNSTR_SECRET_NAME=\"BENCHMARK_CAPTEST_TPCH_S10_CONNSTR\"\n                ;;\n              17)\n                CONNSTR_SECRET_NAME=\"BENCHMARK_CAPTEST_TPCH_CONNSTR_PG17\"\n                ;;\n              *)\n                echo >&2 \"Unsupported PG_VERSION=${PG_VERSION} for PLATFORM=${PLATFORM}\"\n                exit 1\n                ;;\n            esac\n            ;;\n          rds-aurora)\n            CONNSTR_SECRET_NAME=\"BENCHMARK_RDS_AURORA_TPCH_S10_CONNSTR\"\n            ;;\n          rds-postgres)\n            CONNSTR_SECRET_NAME=\"BENCHMARK_RDS_POSTGRES_TPCH_S10_CONNSTR\"\n            ;;\n          *)\n            echo >&2 \"Unknown PLATFORM=${PLATFORM}. Allowed only 'neonvm-captest-reuse', 'rds-aurora', or 'rds-postgres'\"\n            exit 1\n            ;;\n        esac\n\n        echo \"CONNSTR_SECRET_NAME=${CONNSTR_SECRET_NAME}\" >> $GITHUB_ENV\n\n    - name: Set up Connection String\n      id: set-up-connstr\n      run: |\n        CONNSTR=${{ secrets[env.CONNSTR_SECRET_NAME] }}\n\n        echo \"connstr=${CONNSTR}\" >> $GITHUB_OUTPUT\n\n    - name: Run TPC-H benchmark\n      uses: ./.github/actions/run-python-test-set\n      with:\n        build_type: ${{ env.BUILD_TYPE }}\n        test_selection: performance/test_perf_olap.py\n        run_in_parallel: false\n        save_perf_report: ${{ env.SAVE_PERF_REPORT }}\n        extra_params: -m remote_cluster --timeout 21600 -k test_tpch\n        pg_version: ${{ env.PG_VERSION }}\n        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}\n      env:\n        VIP_VAP_ACCESS_TOKEN: \"${{ secrets.VIP_VAP_ACCESS_TOKEN }}\"\n        PERF_TEST_RESULT_CONNSTR: \"${{ secrets.PERF_TEST_RESULT_CONNSTR }}\"\n        BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}\n        TEST_OLAP_SCALE: 10\n\n    - name: Create Allure report\n      id: create-allure-report\n      if: ${{ !cancelled() }}\n      uses: ./.github/actions/allure-report-generate\n      with:\n        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}\n\n    - name: Post to a Slack channel\n      if: ${{ github.event.schedule && failure() }}\n      uses: slackapi/slack-github-action@fcfb566f8b0aab22203f066d80ca1d7e4b5d05b3 # v1.27.1\n      with:\n        channel-id: \"C06KHQVQ7U3\" # on-call-qa-staging-stream\n        slack-message: |\n          Periodic TPC-H perf testing on ${{ matrix.platform }}: ${{ job.status }}\n          <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>\n          <${{ steps.create-allure-report.outputs.report-url }}|Allure report>\n      env:\n        SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}\n\n  user-examples-compare:\n    # if: ${{ !cancelled() && (github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null) }}\n    permissions:\n      contents: write\n      statuses: write\n      id-token: write # aws-actions/configure-aws-credentials\n    needs: [ generate-matrices, tpch-compare, prepare_AWS_RDS_databases ]\n\n    strategy:\n      fail-fast: false\n      matrix: ${{ fromJSON(needs.generate-matrices.outputs.olap-compare-matrix) }}\n\n    env:\n      POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install\n      PG_VERSION: ${{ matrix.pg_version }}\n      TEST_OUTPUT: /tmp/test_output\n      BUILD_TYPE: remote\n      SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }}\n      PLATFORM: ${{ matrix.platform }}\n\n    runs-on: [ self-hosted, us-east-2, x64 ]\n    container:\n      image: ghcr.io/neondatabase/build-tools:pinned-bookworm\n      credentials:\n        username: ${{ github.actor }}\n        password: ${{ secrets.GITHUB_TOKEN }}\n      options: --init\n\n    steps:\n    - name: Harden the runner (Audit all outbound calls)\n      uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0\n      with:\n        egress-policy: audit\n\n    - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2\n\n    - name: Configure AWS credentials\n      uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # v4.0.2\n      with:\n        aws-region: eu-central-1\n        role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}\n        role-duration-seconds: 18000 # 5 hours\n\n    - name: Download Neon artifact\n      uses: ./.github/actions/download\n      with:\n        name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact\n        path: /tmp/neon/\n        prefix: latest\n        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}\n\n    - name: Set up Connection String\n      id: set-up-connstr\n      run: |\n        case \"${PLATFORM}\" in\n          neonvm-captest-reuse)\n            case \"${PG_VERSION}\" in\n              16)\n                CONNSTR=${{ secrets.BENCHMARK_USER_EXAMPLE_CAPTEST_CONNSTR }}\n                ;;\n              17)\n                CONNSTR=${{ secrets.BENCHMARK_CAPTEST_USER_EXAMPLE_CONNSTR_PG17 }}\n                ;;\n              *)\n                echo >&2 \"Unsupported PG_VERSION=${PG_VERSION} for PLATFORM=${PLATFORM}\"\n                exit 1\n                ;;\n            esac\n            ;;\n          rds-aurora)\n            CONNSTR=${{ secrets.BENCHMARK_USER_EXAMPLE_RDS_AURORA_CONNSTR }}\n            ;;\n          rds-postgres)\n            CONNSTR=${{ secrets.BENCHMARK_USER_EXAMPLE_RDS_POSTGRES_CONNSTR }}\n            ;;\n          *)\n            echo >&2 \"Unknown PLATFORM=${PLATFORM}. Allowed only 'neonvm-captest-reuse', 'rds-aurora', or 'rds-postgres'\"\n            exit 1\n            ;;\n        esac\n\n        echo \"connstr=${CONNSTR}\" >> $GITHUB_OUTPUT\n\n    - name: Run user examples\n      uses: ./.github/actions/run-python-test-set\n      with:\n        build_type: ${{ env.BUILD_TYPE }}\n        test_selection: performance/test_perf_olap.py\n        run_in_parallel: false\n        save_perf_report: ${{ env.SAVE_PERF_REPORT }}\n        extra_params: -m remote_cluster --timeout 21600 -k test_user_examples\n        pg_version: ${{ env.PG_VERSION }}\n        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}\n      env:\n        VIP_VAP_ACCESS_TOKEN: \"${{ secrets.VIP_VAP_ACCESS_TOKEN }}\"\n        PERF_TEST_RESULT_CONNSTR: \"${{ secrets.PERF_TEST_RESULT_CONNSTR }}\"\n        BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}\n\n    - name: Create Allure report\n      id: create-allure-report\n      if: ${{ !cancelled() }}\n      uses: ./.github/actions/allure-report-generate\n      with:\n        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}\n\n    - name: Post to a Slack channel\n      if: ${{ github.event.schedule && failure() }}\n      uses: slackapi/slack-github-action@fcfb566f8b0aab22203f066d80ca1d7e4b5d05b3 # v1.27.1\n      with:\n        channel-id: \"C06KHQVQ7U3\" # on-call-qa-staging-stream\n        slack-message: |\n          Periodic TPC-H perf testing on ${{ matrix.platform }}: ${{ job.status }}\n          <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>\n          <${{ steps.create-allure-report.outputs.report-url }}|Allure report>\n\n      env:\n        SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}\n"
  },
  {
    "path": ".github/workflows/build-build-tools-image.yml",
    "content": "name: Build build-tools image\n\non:\n  workflow_call:\n    inputs:\n      archs:\n        description: \"Json array of architectures to build\"\n        # Default values are set in `check-image` job, `set-variables` step\n        type: string\n        required: false\n      debians:\n        description: \"Json array of Debian versions to build\"\n        # Default values are set in `check-image` job, `set-variables` step\n        type: string\n        required: false\n    outputs:\n      image-tag:\n        description: \"build-tools tag\"\n        value: ${{ jobs.check-image.outputs.tag }}\n      image:\n        description: \"build-tools image\"\n        value: ghcr.io/neondatabase/build-tools:${{ jobs.check-image.outputs.tag }}\n\ndefaults:\n  run:\n    shell: bash -euo pipefail {0}\n\n# The initial idea was to prevent the waste of resources by not re-building the `build-tools` image\n# for the same tag in parallel workflow runs, and queue them to be skipped once we have\n# the first image pushed to Docker registry, but GitHub's concurrency mechanism is not working as expected.\n# GitHub can't have more than 1 job in a queue and removes the previous one, it causes failures if the dependent jobs.\n#\n# Ref https://github.com/orgs/community/discussions/41518\n#\n# concurrency:\n#   group: build-build-tools-image-${{ inputs.image-tag }}\n#   cancel-in-progress: false\n\n# No permission for GITHUB_TOKEN by default; the **minimal required** set of permissions should be granted in each job.\npermissions: {}\n\njobs:\n  check-image:\n    runs-on: ubuntu-22.04\n    outputs:\n      archs: ${{ steps.set-variables.outputs.archs }}\n      debians: ${{ steps.set-variables.outputs.debians }}\n      tag: ${{ steps.set-variables.outputs.image-tag }}\n      everything: ${{ steps.set-more-variables.outputs.everything }}\n      found: ${{ steps.set-more-variables.outputs.found }}\n\n    permissions:\n      packages: read\n\n    steps:\n      - name: Harden the runner (Audit all outbound calls)\n        uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0\n        with:\n          egress-policy: audit\n\n      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2\n\n      - uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0\n        with:\n          registry: ghcr.io\n          username: ${{ github.actor }}\n          password: ${{ secrets.GITHUB_TOKEN }}\n\n      - name: Set variables\n        id: set-variables\n        env:\n          ARCHS: ${{ inputs.archs || '[\"x64\",\"arm64\"]' }}\n          DEBIANS: ${{ inputs.debians || '[\"bullseye\",\"bookworm\"]' }}\n          IMAGE_TAG: |\n            ${{ hashFiles('build-tools/Dockerfile',\n                          '.github/workflows/build-build-tools-image.yml') }}\n        run: |\n          echo \"archs=${ARCHS}\"           | tee -a ${GITHUB_OUTPUT}\n          echo \"debians=${DEBIANS}\"       | tee -a ${GITHUB_OUTPUT}\n          echo \"image-tag=${IMAGE_TAG}\"   | tee -a ${GITHUB_OUTPUT}\n\n      - name: Set more variables\n        id: set-more-variables\n        env:\n          IMAGE_TAG: ${{ steps.set-variables.outputs.image-tag }}\n          EVERYTHING: |\n            ${{ contains(fromJSON(steps.set-variables.outputs.archs), 'x64') &&\n                contains(fromJSON(steps.set-variables.outputs.archs), 'arm64') &&\n                contains(fromJSON(steps.set-variables.outputs.debians), 'bullseye') &&\n                contains(fromJSON(steps.set-variables.outputs.debians), 'bookworm') }}\n        run: |\n          if docker manifest inspect ghcr.io/neondatabase/build-tools:${IMAGE_TAG}; then\n            found=true\n          else\n            found=false\n          fi\n\n          echo \"everything=${EVERYTHING}\" | tee -a ${GITHUB_OUTPUT}\n          echo \"found=${found}\"           | tee -a ${GITHUB_OUTPUT}\n\n  build-image:\n    needs: [ check-image ]\n    if: needs.check-image.outputs.found == 'false'\n\n    strategy:\n      matrix:\n        arch: ${{ fromJSON(needs.check-image.outputs.archs) }}\n        debian: ${{ fromJSON(needs.check-image.outputs.debians) }}\n\n    permissions:\n      packages: write\n\n    runs-on: ${{ fromJSON(format('[\"self-hosted\", \"{0}\"]', matrix.arch == 'arm64' && 'large-arm64' || 'large')) }}\n\n    steps:\n      - name: Harden the runner (Audit all outbound calls)\n        uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0\n        with:\n          egress-policy: audit\n\n      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2\n\n      - uses: neondatabase/dev-actions/set-docker-config-dir@6094485bf440001c94a94a3f9e221e81ff6b6193\n      - uses: docker/setup-buildx-action@b5ca514318bd6ebac0fb2aedd5d36ec1b5c232a2 # v3.10.0\n        with:\n          cache-binary: false\n\n      - uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0\n        with:\n          username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}\n          password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}\n\n      - uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0\n        with:\n          registry: ghcr.io\n          username: ${{ github.actor }}\n          password: ${{ secrets.GITHUB_TOKEN }}\n\n      - uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0\n        with:\n          registry: cache.neon.build\n          username: ${{ secrets.NEON_CI_DOCKERCACHE_USERNAME }}\n          password: ${{ secrets.NEON_CI_DOCKERCACHE_PASSWORD }}\n\n      - uses: docker/build-push-action@471d1dc4e07e5cdedd4c2171150001c434f0b7a4 # v6.15.0\n        with:\n          file: build-tools/Dockerfile\n          context: .\n          attests: |\n            type=provenance,mode=max\n            type=sbom,generator=docker.io/docker/buildkit-syft-scanner:1\n          push: true\n          pull: true\n          build-args: |\n            DEBIAN_VERSION=${{ matrix.debian }}\n          cache-from: type=registry,ref=cache.neon.build/build-tools:cache-${{ matrix.debian }}-${{ matrix.arch }}\n          cache-to: ${{ github.ref_name == 'main' && format('type=registry,ref=cache.neon.build/build-tools:cache-{0}-{1},mode=max', matrix.debian, matrix.arch) || '' }}\n          tags: |\n            ghcr.io/neondatabase/build-tools:${{ needs.check-image.outputs.tag }}-${{ matrix.debian }}-${{ matrix.arch }}\n\n  merge-images:\n    needs: [ check-image, build-image ]\n    runs-on: ubuntu-22.04\n\n    permissions:\n      packages: write\n\n    steps:\n      - name: Harden the runner (Audit all outbound calls)\n        uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0\n        with:\n          egress-policy: audit\n\n      - uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0\n        with:\n          username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}\n          password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}\n\n      - uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0\n        with:\n          registry: ghcr.io\n          username: ${{ github.actor }}\n          password: ${{ secrets.GITHUB_TOKEN }}\n\n      - name: Create multi-arch image\n        env:\n          DEFAULT_DEBIAN_VERSION: bookworm\n          ARCHS: ${{ join(fromJSON(needs.check-image.outputs.archs), ' ') }}\n          DEBIANS: ${{ join(fromJSON(needs.check-image.outputs.debians), ' ') }}\n          EVERYTHING: ${{ needs.check-image.outputs.everything }}\n          IMAGE_TAG: ${{ needs.check-image.outputs.tag }}\n        run: |\n          for debian in ${DEBIANS}; do\n            tags=(\"-t\" \"ghcr.io/neondatabase/build-tools:${IMAGE_TAG}-${debian}\")\n\n            if [ \"${EVERYTHING}\" == \"true\" ] && [ \"${debian}\" == \"${DEFAULT_DEBIAN_VERSION}\" ]; then\n              tags+=(\"-t\" \"ghcr.io/neondatabase/build-tools:${IMAGE_TAG}\")\n            fi\n\n            for arch in ${ARCHS}; do\n              tags+=(\"ghcr.io/neondatabase/build-tools:${IMAGE_TAG}-${debian}-${arch}\")\n            done\n\n            docker buildx imagetools create \"${tags[@]}\"\n          done\n"
  },
  {
    "path": ".github/workflows/build-macos.yml",
    "content": "name: Check neon with MacOS builds\n\non:\n  workflow_call:\n    inputs:\n      pg_versions:\n        description: \"Array of the pg versions to build for, for example: ['v14', 'v17']\"\n        type: string\n        default: '[]'\n        required: false\n      rebuild_rust_code:\n        description: \"Rebuild Rust code\"\n        type: boolean\n        default: false\n        required: false\n      rebuild_everything:\n        description: \"If true, rebuild for all versions\"\n        type: boolean\n        default: false\n        required: false\n\nenv:\n  RUST_BACKTRACE: 1\n  COPT: '-Werror'\n\n# TODO: move `check-*` and `files-changed` jobs to the \"Caller\" Workflow\n# We should care about that as Github has limitations:\n# - You can connect up to four levels of workflows\n# - You can call a maximum of 20 unique reusable workflows from a single workflow file.\n# https://docs.github.com/en/actions/sharing-automations/reusing-workflows#limitations\npermissions:\n  contents: read\n\njobs:\n  make-all:\n    if: |\n      inputs.pg_versions != '[]' || inputs.rebuild_rust_code || inputs.rebuild_everything ||\n      contains(github.event.pull_request.labels.*.name, 'run-extra-build-macos') ||\n      contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||\n      github.ref_name == 'main'\n    timeout-minutes: 60\n    runs-on: macos-15\n    env:\n      # Use release build only, to have less debug info around\n      # Hence keeping target/ (and general cache size) smaller\n      BUILD_TYPE: release\n    steps:\n      - name: Harden the runner (Audit all outbound calls)\n        uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0\n        with:\n          egress-policy: audit\n\n      - name: Checkout main repo\n        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2\n        with:\n          submodules: true\n      \n      - uses: ./.github/actions/prepare-for-subzero\n        with:\n          token: ${{ secrets.CI_ACCESS_TOKEN }}\n\n      - name: Install build dependencies\n        run: |\n          brew install flex bison openssl protobuf icu4c\n\n      - name: Set extra env for macOS\n        run: |\n          echo 'LDFLAGS=-L/usr/local/opt/openssl@3/lib' >> $GITHUB_ENV\n          echo 'CPPFLAGS=-I/usr/local/opt/openssl@3/include' >> $GITHUB_ENV\n\n      - name: Restore \"pg_install/\" cache\n        id: cache_pg\n        uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3\n        with:\n          path: pg_install\n          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-install-v14-${{ hashFiles('Makefile', 'postgres.mk', 'vendor/revisions.json') }}\n\n      - name: Checkout vendor/postgres submodules\n        if: steps.cache_pg.outputs.cache-hit != 'true'\n        run: |\n          git submodule init\n          git submodule update --depth 1 --recursive\n\n      - name: Build Postgres\n        if: steps.cache_pg.outputs.cache-hit != 'true'\n        run: |\n          make postgres -j$(sysctl -n hw.ncpu)\n\n      # This isn't strictly necessary, but it makes the cached and non-cached builds more similar,\n      # When pg_install is restored from cache, there is no 'build/' directory. By removing it\n      # in a non-cached build too, we enforce that the rest of the steps don't depend on it,\n      # so that we notice any build caching bugs earlier.\n      - name: Remove build artifacts\n        if: steps.cache_pg.outputs.cache-hit != 'true'\n        run: |\n          rm -rf build\n\n      # Explicitly update the rust toolchain before running 'make'. The parallel make build can\n      # invoke 'cargo build' more than once in parallel, for different crates.  That's OK, 'cargo'\n      # does its own locking to prevent concurrent builds from stepping on each other's\n      # toes. However, it will first try to update the toolchain, and that step is not locked the\n      # same way. To avoid two toolchain updates running in parallel and stepping on each other's\n      # toes, ensure that the toolchain is up-to-date beforehand.\n      - name: Update rust toolchain\n        run: |\n          rustup --version &&\n          rustup update &&\n          rustup show\n\n      - name: Cache cargo deps\n        uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3\n        with:\n          path: |\n            ~/.cargo/registry\n            !~/.cargo/registry/src\n            ~/.cargo/git\n            target\n          key: v1-${{ runner.os }}-${{ runner.arch }}-cargo-${{ hashFiles('./Cargo.lock') }}-${{ hashFiles('./rust-toolchain.toml') }}-rust\n\n      # Build the neon-specific postgres extensions, and all the Rust bits.\n      #\n      # Pass PG_INSTALL_CACHED=1 because PostgreSQL was already built and cached\n      # separately.\n      - name: Build all\n        run: PG_INSTALL_CACHED=1 BUILD_TYPE=release make -j$(sysctl -n hw.ncpu) all\n\n      - name: Check that no warnings are produced\n        run: ./run_clippy.sh\n"
  },
  {
    "path": ".github/workflows/build_and_run_selected_test.yml",
    "content": "name: Build and Run Selected Test\n\non:\n  workflow_dispatch:\n    inputs:\n      test-selection:\n        description: 'Specification of selected test(s), as accepted by pytest -k'\n        required: true\n        type: string\n      run-count:\n        description: 'Number of test runs to perform'\n        required: true\n        type: number\n      archs:\n        description: 'Archs to run tests on, e. g.: [\"x64\", \"arm64\"]'\n        default: '[\"x64\"]'\n        required: true\n        type: string\n      build-types:\n        description: 'Build types to run tests on, e. g.: [\"debug\", \"release\"]'\n        default: '[\"release\"]'\n        required: true\n        type: string\n      pg-versions:\n        description: 'Postgres versions to use for testing,  e.g,: [{\"pg_version\":\"v16\"}, {\"pg_version\":\"v17\"}])'\n        default: '[{\"pg_version\":\"v17\"}]'\n        required: true\n        type: string\n\ndefaults:\n  run:\n    shell: bash -euxo pipefail {0}\n\nenv:\n  RUST_BACKTRACE: 1\n  COPT: '-Werror'\n\njobs:\n  meta:\n    uses: ./.github/workflows/_meta.yml\n    with:\n      github-event-name: ${{ github.event_name }}\n      github-event-json: ${{ toJSON(github.event) }}\n\n  build-and-test-locally:\n    needs: [ meta ]\n    strategy:\n      fail-fast: false\n      matrix:\n        arch: ${{ fromJson(inputs.archs) }}\n        build-type: ${{ fromJson(inputs.build-types) }}\n    uses: ./.github/workflows/_build-and-test-locally.yml\n    with:\n      arch: ${{ matrix.arch }}\n      build-tools-image: ghcr.io/neondatabase/build-tools:pinned-bookworm\n      build-tag: ${{ needs.meta.outputs.build-tag }}\n      build-type: ${{ matrix.build-type }}\n      test-cfg: ${{ inputs.pg-versions }}\n      test-selection: ${{ inputs.test-selection }}\n      test-run-count: ${{ fromJson(inputs.run-count) }}\n      rerun-failed: false\n    secrets: inherit\n\n  create-test-report:\n    needs: [ build-and-test-locally ]\n    if: ${{ !cancelled() }}\n    permissions:\n      id-token: write # aws-actions/configure-aws-credentials\n      statuses: write\n      contents: write\n      pull-requests: write\n    outputs:\n      report-url: ${{ steps.create-allure-report.outputs.report-url }}\n\n    runs-on: [ self-hosted, small ]\n    container:\n      image: ghcr.io/neondatabase/build-tools:pinned-bookworm\n      credentials:\n        username: ${{ github.actor }}\n        password: ${{ secrets.GITHUB_TOKEN }}\n      options: --init\n\n    steps:\n      - name: Harden the runner (Audit all outbound calls)\n        uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0\n        with:\n          egress-policy: audit\n\n      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2\n\n      - name: Create Allure report\n        if: ${{ !cancelled() }}\n        id: create-allure-report\n        uses: ./.github/actions/allure-report-generate\n        with:\n          store-test-results-into-db: true\n          aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}\n        env:\n          REGRESS_TEST_RESULT_CONNSTR_NEW: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_DEV }}\n\n      - uses: actions/github-script@v7\n        if: ${{ !cancelled() }}\n        with:\n          # Retry script for 5XX server errors: https://github.com/actions/github-script#retries\n          retries: 5\n          script: |\n            const report = {\n              reportUrl:     \"${{ steps.create-allure-report.outputs.report-url }}\",\n              reportJsonUrl: \"${{ steps.create-allure-report.outputs.report-json-url }}\",\n            }\n\n            const coverage = {}\n\n            const script = require(\"./scripts/comment-test-report.js\")\n            await script({\n              github,\n              context,\n              fetch,\n              report,\n              coverage,\n            })\n"
  },
  {
    "path": ".github/workflows/build_and_test.yml",
    "content": "name: Build and Test\n\non:\n  push:\n    branches:\n      - main\n      - release\n      - release-proxy\n      - release-compute\n  pull_request:\n\ndefaults:\n  run:\n    shell: bash -euxo pipefail {0}\n\nconcurrency:\n  # Allow only one workflow per any non-`main` branch.\n  group: ${{ github.workflow }}-${{ github.ref_name }}-${{ github.ref_name == 'main' && github.sha || 'anysha' }}\n  cancel-in-progress: true\n\nenv:\n  RUST_BACKTRACE: 1\n  COPT: '-Werror'\n  # A concurrency group that we use for e2e-tests runs, matches `concurrency.group` above with `github.repository` as a prefix\n  E2E_CONCURRENCY_GROUP: ${{ github.repository }}-e2e-tests-${{ github.ref_name }}-${{ github.ref_name == 'main' && github.sha || 'anysha' }}\n\njobs:\n  check-permissions:\n    if: ${{ !contains(github.event.pull_request.labels.*.name, 'run-no-ci') }}\n    uses: ./.github/workflows/check-permissions.yml\n    with:\n      github-event-name: ${{ github.event_name }}\n\n  cancel-previous-e2e-tests:\n    needs: [ check-permissions ]\n    if: github.event_name == 'pull_request'\n    runs-on: ubuntu-22.04\n\n    steps:\n      - name: Harden the runner (Audit all outbound calls)\n        uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0\n        with:\n          egress-policy: audit\n\n      - name: Cancel previous e2e-tests runs for this PR\n        env:\n          GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}\n        run: |\n          gh workflow --repo neondatabase/cloud \\\n            run cancel-previous-in-concurrency-group.yml \\\n              --field concurrency_group=\"${{ env.E2E_CONCURRENCY_GROUP }}\"\n\n  files-changed:\n    needs: [ check-permissions ]\n    runs-on: [ self-hosted, small ]\n    timeout-minutes: 3\n    outputs:\n      check-rust-dependencies: ${{ steps.files-changed.outputs.rust_dependencies }}\n\n    steps:\n      - name: Harden the runner (Audit all outbound calls)\n        uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0\n        with:\n          egress-policy: audit\n\n      - name: Checkout\n        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2\n        with:\n          submodules: true\n\n      - name: Check for file changes\n        uses: dorny/paths-filter@de90cc6fb38fc0963ad72b210f1f284cd68cea36  # v3.0.2\n        id: files-changed\n        with:\n          token: ${{ secrets.GITHUB_TOKEN }}\n          filters: .github/file-filters.yaml\n\n  meta:\n    needs: [ check-permissions ]\n    uses: ./.github/workflows/_meta.yml\n    with:\n      github-event-name: ${{ github.event_name }}\n      github-event-json: ${{ toJSON(github.event) }}\n\n  build-build-tools-image:\n    needs: [ check-permissions ]\n    uses: ./.github/workflows/build-build-tools-image.yml\n    secrets: inherit\n\n  lint-yamls:\n    needs: [ meta, check-permissions, build-build-tools-image ]\n    # We do need to run this in `.*-rc-pr` because of hotfixes.\n    if: ${{ contains(fromJSON('[\"pr\", \"push-main\", \"storage-rc-pr\", \"proxy-rc-pr\", \"compute-rc-pr\"]'), needs.meta.outputs.run-kind) }}\n    runs-on: [ self-hosted, small ]\n    container:\n      image: ${{ needs.build-build-tools-image.outputs.image }}\n      credentials:\n        username: ${{ github.actor }}\n        password: ${{ secrets.GITHUB_TOKEN }}\n      options: --init\n\n    steps:\n      - name: Harden the runner (Audit all outbound calls)\n        uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0\n        with:\n          egress-policy: audit\n\n      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2\n\n      - run: make -C compute manifest-schema-validation\n      - run: make lint-openapi-spec\n\n  check-codestyle-python:\n    needs: [ meta, check-permissions, build-build-tools-image ]\n    # No need to run on `main` because we this in the merge queue. We do need to run this in `.*-rc-pr` because of hotfixes.\n    if: ${{ contains(fromJSON('[\"pr\", \"storage-rc-pr\", \"proxy-rc-pr\", \"compute-rc-pr\"]'), needs.meta.outputs.run-kind) }}\n    uses: ./.github/workflows/_check-codestyle-python.yml\n    with:\n      build-tools-image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm\n    secrets: inherit\n\n  check-codestyle-jsonnet:\n    needs: [ meta, check-permissions, build-build-tools-image ]\n    # We do need to run this in `.*-rc-pr` because of hotfixes.\n    if: ${{ contains(fromJSON('[\"pr\", \"push-main\", \"storage-rc-pr\", \"proxy-rc-pr\", \"compute-rc-pr\"]'), needs.meta.outputs.run-kind) }}\n    runs-on: [ self-hosted, small ]\n    container:\n      image: ${{ needs.build-build-tools-image.outputs.image }}\n      credentials:\n        username: ${{ github.actor }}\n        password: ${{ secrets.GITHUB_TOKEN }}\n      options: --init\n\n    steps:\n      - name: Harden the runner (Audit all outbound calls)\n        uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0\n        with:\n          egress-policy: audit\n\n      - name: Checkout\n        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2\n\n      - name: Check Jsonnet code formatting\n        run: |\n          make -C compute jsonnetfmt-test\n\n  # Check that the vendor/postgres-* submodules point to the\n  # corresponding REL_*_STABLE_neon branches.\n  check-submodules:\n    needs: [ check-permissions ]\n    runs-on: ubuntu-22.04\n    steps:\n      - name: Harden the runner (Audit all outbound calls)\n        uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0\n        with:\n          egress-policy: audit\n\n      - name: Checkout\n        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2\n        with:\n          submodules: true\n\n      - uses: dorny/paths-filter@de90cc6fb38fc0963ad72b210f1f284cd68cea36 # v3.0.2\n        id: check-if-submodules-changed\n        with:\n          filters: |\n            vendor:\n              - 'vendor/**'\n\n      - name: Check vendor/postgres-v14 submodule reference\n        if: steps.check-if-submodules-changed.outputs.vendor == 'true'\n        uses: jtmullen/submodule-branch-check-action@ab0d3a69278e3fa0a2d4f3be3199d2514b676e13 # v1.3.0\n        with:\n          path: \"vendor/postgres-v14\"\n          fetch_depth: \"50\"\n          sub_fetch_depth: \"50\"\n          pass_if_unchanged: true\n\n      - name: Check vendor/postgres-v15 submodule reference\n        if: steps.check-if-submodules-changed.outputs.vendor == 'true'\n        uses: jtmullen/submodule-branch-check-action@ab0d3a69278e3fa0a2d4f3be3199d2514b676e13 # v1.3.0\n        with:\n          path: \"vendor/postgres-v15\"\n          fetch_depth: \"50\"\n          sub_fetch_depth: \"50\"\n          pass_if_unchanged: true\n\n      - name: Check vendor/postgres-v16 submodule reference\n        if: steps.check-if-submodules-changed.outputs.vendor == 'true'\n        uses: jtmullen/submodule-branch-check-action@ab0d3a69278e3fa0a2d4f3be3199d2514b676e13 # v1.3.0\n        with:\n          path: \"vendor/postgres-v16\"\n          fetch_depth: \"50\"\n          sub_fetch_depth: \"50\"\n          pass_if_unchanged: true\n\n      - name: Check vendor/postgres-v17 submodule reference\n        if: steps.check-if-submodules-changed.outputs.vendor == 'true'\n        uses: jtmullen/submodule-branch-check-action@ab0d3a69278e3fa0a2d4f3be3199d2514b676e13 # v1.3.0\n        with:\n          path: \"vendor/postgres-v17\"\n          fetch_depth: \"50\"\n          sub_fetch_depth: \"50\"\n          pass_if_unchanged: true\n\n  check-codestyle-rust:\n    needs: [ meta, check-permissions, build-build-tools-image ]\n    # No need to run on `main` because we this in the merge queue. We do need to run this in `.*-rc-pr` because of hotfixes.\n    if: ${{ contains(fromJSON('[\"pr\", \"storage-rc-pr\", \"proxy-rc-pr\", \"compute-rc-pr\"]'), needs.meta.outputs.run-kind) }}\n    uses: ./.github/workflows/_check-codestyle-rust.yml\n    with:\n      build-tools-image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm\n      archs: '[\"x64\", \"arm64\"]'\n    secrets: inherit\n\n  check-dependencies-rust:\n    needs: [ meta, files-changed, build-build-tools-image ]\n    # No need to run on `main` because we this in the merge queue. We do need to run this in `.*-rc-pr` because of hotfixes.\n    if: ${{ needs.files-changed.outputs.check-rust-dependencies == 'true' && contains(fromJSON('[\"pr\", \"storage-rc-pr\", \"proxy-rc-pr\", \"compute-rc-pr\"]'), needs.meta.outputs.run-kind) }}\n    uses: ./.github/workflows/cargo-deny.yml\n    with:\n      build-tools-image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm\n    secrets: inherit\n\n  build-and-test-locally:\n    needs: [ meta, build-build-tools-image ]\n    # We do need to run this in `.*-rc-pr` because of hotfixes.\n    if: ${{ contains(fromJSON('[\"pr\", \"push-main\", \"storage-rc-pr\", \"proxy-rc-pr\", \"compute-rc-pr\"]'), needs.meta.outputs.run-kind) }}\n    strategy:\n      fail-fast: false\n      matrix:\n        arch: [ x64, arm64 ]\n        # Do not build or run tests in debug for release branches\n        build-type: ${{ fromJSON((startsWith(github.ref_name, 'release') && github.event_name == 'push') && '[\"release\"]' || '[\"debug\", \"release\"]') }}\n        include:\n          - build-type: release\n            arch: arm64\n    uses: ./.github/workflows/_build-and-test-locally.yml\n    with:\n      arch: ${{ matrix.arch }}\n      build-tools-image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm\n      build-tag: ${{ needs.meta.outputs.build-tag }}\n      build-type: ${{ matrix.build-type }}\n      # Run tests on all Postgres versions in release builds and only on the latest version in debug builds.\n      # Run without LFC on v17 release and debug builds only. For all the other cases LFC is enabled.\n      test-cfg: |\n        ${{ matrix.build-type == 'release' && '[{\"pg_version\":\"v14\", \"lfc_state\": \"with-lfc\"},\n                                                {\"pg_version\":\"v15\", \"lfc_state\": \"with-lfc\"},\n                                                {\"pg_version\":\"v16\", \"lfc_state\": \"with-lfc\"},\n                                                {\"pg_version\":\"v17\", \"lfc_state\": \"with-lfc\"},\n                                                {\"pg_version\":\"v17\", \"lfc_state\": \"without-lfc\"}]'\n                                           || '[{\"pg_version\":\"v17\", \"lfc_state\": \"without-lfc\" }]' }}\n    secrets: inherit\n\n  # Keep `benchmarks` job outside of `build-and-test-locally` workflow to make job failures non-blocking\n  get-benchmarks-durations:\n    if: github.ref_name == 'main' || contains(github.event.pull_request.labels.*.name, 'run-benchmarks')\n    outputs:\n      json: ${{ steps.get-benchmark-durations.outputs.json }}\n    needs: [ check-permissions, build-build-tools-image ]\n    runs-on: [ self-hosted, small ]\n    container:\n      image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm\n      credentials:\n        username: ${{ github.actor }}\n        password: ${{ secrets.GITHUB_TOKEN }}\n      options: --init\n    steps:\n      - name: Harden the runner (Audit all outbound calls)\n        uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0\n        with:\n          egress-policy: audit\n\n      - name: Checkout\n        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2\n\n      - name: Cache poetry deps\n        uses: tespkg/actions-cache@b7bf5fcc2f98a52ac6080eb0fd282c2f752074b1  # v1.8.0\n        with:\n          endpoint: ${{ vars.HETZNER_CACHE_REGION }}.${{ vars.HETZNER_CACHE_ENDPOINT }}\n          bucket: ${{ vars.HETZNER_CACHE_BUCKET }}\n          accessKey: ${{ secrets.HETZNER_CACHE_ACCESS_KEY }}\n          secretKey: ${{ secrets.HETZNER_CACHE_SECRET_KEY }}\n          use-fallback: false\n          path: ~/.cache/pypoetry/virtualenvs\n          key: v2-${{ runner.os }}-${{ runner.arch }}-python-deps-bookworm-${{ hashFiles('poetry.lock') }}\n\n      - name: Install Python deps\n        run: ./scripts/pysync\n\n      - name: get benchmark durations\n        id: get-benchmark-durations\n        env:\n          TEST_RESULT_CONNSTR: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}\n        run: |\n          poetry run ./scripts/benchmark_durations.py \"${TEST_RESULT_CONNSTR}\" \\\n                                                      --days 10 \\\n                                                      --output /tmp/benchmark_durations.json\n          echo \"json=$(jq --compact-output '.' /tmp/benchmark_durations.json)\" >> $GITHUB_OUTPUT\n\n  benchmarks:\n    # `!failure() && !cancelled()` is required because the workflow depends on the job that can be skipped: `deploy` in PRs\n    if: github.ref_name == 'main' || (contains(github.event.pull_request.labels.*.name, 'run-benchmarks') && !failure() && !cancelled())\n    needs: [ check-permissions, build-build-tools-image, get-benchmarks-durations, deploy ]\n    permissions:\n      id-token: write # aws-actions/configure-aws-credentials\n      statuses: write\n      contents: write\n      pull-requests: write\n    runs-on: [ self-hosted, unit-perf-aws-arm ]\n    container:\n      image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm\n      credentials:\n        username: ${{ github.actor }}\n        password: ${{ secrets.GITHUB_TOKEN }}\n      # for changed limits, see comments on `options:` earlier in this file\n      options: --init --shm-size=512mb --ulimit memlock=67108864:67108864 --ulimit nofile=65536:65536 --security-opt seccomp=unconfined\n    strategy:\n      fail-fast: false\n      matrix:\n        # the amount of groups (N) should be reflected in `extra_params: --splits N ...`\n        pytest_split_group: [ 1, 2, 3, 4, 5 ]\n        build_type: [ release ]\n    steps:\n      - name: Harden the runner (Audit all outbound calls)\n        uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0\n        with:\n          egress-policy: audit\n\n      - name: Checkout\n        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2\n\n      - name: Pytest benchmarks\n        uses: ./.github/actions/run-python-test-set\n        with:\n          build_type: ${{ matrix.build_type }}\n          test_selection: performance\n          run_in_parallel: false\n          save_perf_report: ${{ github.ref_name == 'main' }}\n          # test_pageserver_max_throughput_getpage_at_latest_lsn is run in separate workflow periodic_pagebench.yml because it needs snapshots\n          extra_params: --splits 5 --group ${{ matrix.pytest_split_group }} --ignore=test_runner/performance/pageserver/pagebench/test_pageserver_max_throughput_getpage_at_latest_lsn.py\n          benchmark_durations: ${{ needs.get-benchmarks-durations.outputs.json }}\n          pg_version: v16\n          aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}\n        env:\n          VIP_VAP_ACCESS_TOKEN: \"${{ secrets.VIP_VAP_ACCESS_TOKEN }}\"\n          PERF_TEST_RESULT_CONNSTR: \"${{ secrets.PERF_TEST_RESULT_CONNSTR }}\"\n          TEST_RESULT_CONNSTR: \"${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}\"\n          PAGESERVER_VIRTUAL_FILE_IO_ENGINE: tokio-epoll-uring\n          SYNC_BETWEEN_TESTS: true\n      # XXX: no coverage data handling here, since benchmarks are run on release builds,\n      # while coverage is currently collected for the debug ones\n\n  report-benchmarks-results-to-slack:\n    needs: [ benchmarks, create-test-report ]\n    if: github.ref_name == 'main' && !cancelled() && contains(fromJSON('[\"success\", \"failure\"]'), needs.benchmarks.result)\n    runs-on: ubuntu-22.04\n\n    steps:\n    - name: Harden the runner (Audit all outbound calls)\n      uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0\n      with:\n        egress-policy: audit\n\n    - uses: slackapi/slack-github-action@485a9d42d3a73031f12ec201c457e2162c45d02d # v2.0.0\n      with:\n        method: chat.postMessage\n        token: ${{ secrets.SLACK_BOT_TOKEN }}\n        payload: |\n          channel: \"${{ vars.SLACK_ON_CALL_STORAGE_STAGING_STREAM }}\"\n          text: |\n            Benchmarks on main: *${{ needs.benchmarks.result }}*\n            - <${{ needs.create-test-report.outputs.report-url }}|Allure report>\n            - <${{ github.event.head_commit.url }}|${{ github.sha }}>\n\n  create-test-report:\n    needs: [ check-permissions, build-and-test-locally, coverage-report, build-build-tools-image, benchmarks ]\n    if: ${{ !cancelled() && contains(fromJSON('[\"skipped\", \"success\"]'), needs.check-permissions.result) }}\n    permissions:\n      id-token: write # aws-actions/configure-aws-credentials\n      statuses: write\n      contents: write\n      pull-requests: write\n    outputs:\n      report-url: ${{ steps.create-allure-report.outputs.report-url }}\n\n    runs-on: [ self-hosted, small ]\n    container:\n      image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm\n      credentials:\n        username: ${{ github.actor }}\n        password: ${{ secrets.GITHUB_TOKEN }}\n      options: --init\n\n    steps:\n      - name: Harden the runner (Audit all outbound calls)\n        uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0\n        with:\n          egress-policy: audit\n\n      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2\n\n      - name: Create Allure report\n        if: ${{ !cancelled() }}\n        id: create-allure-report\n        uses: ./.github/actions/allure-report-generate\n        with:\n          store-test-results-into-db: true\n          aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}\n        env:\n          REGRESS_TEST_RESULT_CONNSTR_NEW: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}\n\n      - uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1\n        if: ${{ !cancelled() }}\n        with:\n          # Retry script for 5XX server errors: https://github.com/actions/github-script#retries\n          retries: 5\n          script: |\n            const report = {\n              reportUrl:     \"${{ steps.create-allure-report.outputs.report-url }}\",\n              reportJsonUrl: \"${{ steps.create-allure-report.outputs.report-json-url }}\",\n            }\n\n            const coverage = {\n              coverageUrl: \"${{ needs.coverage-report.outputs.coverage-html }}\",\n              summaryJsonUrl: \"${{ needs.coverage-report.outputs.coverage-json }}\",\n            }\n\n            const script = require(\"./scripts/comment-test-report.js\")\n            await script({\n              github,\n              context,\n              fetch,\n              report,\n              coverage,\n            })\n\n  coverage-report:\n    if: ${{ !startsWith(github.ref_name, 'release') }}\n    needs: [ check-permissions, build-build-tools-image, build-and-test-locally ]\n    permissions:\n      id-token: write # aws-actions/configure-aws-credentials\n      statuses: write\n      contents: write\n    runs-on: [ self-hosted, small ]\n    container:\n      image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm\n      credentials:\n        username: ${{ github.actor }}\n        password: ${{ secrets.GITHUB_TOKEN }}\n      options: --init\n    strategy:\n      fail-fast: false\n      matrix:\n        build_type: [ debug ]\n    outputs:\n        coverage-html: ${{ steps.upload-coverage-report-new.outputs.report-url }}\n        coverage-json: ${{ steps.upload-coverage-report-new.outputs.summary-json }}\n    steps:\n      # Need `fetch-depth: 0` for differential coverage (to get diff between two commits)\n      - name: Harden the runner (Audit all outbound calls)\n        uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0\n        with:\n          egress-policy: audit\n\n      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2\n        with:\n          submodules: true\n          fetch-depth: 0\n\n      - name: Get Neon artifact\n        uses: ./.github/actions/download\n        with:\n          name: neon-${{ runner.os }}-${{ runner.arch }}-${{ matrix.build_type }}-artifact\n          path: /tmp/neon\n          aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}\n\n      - name: Get coverage artifact\n        uses: ./.github/actions/download\n        with:\n          name: coverage-data-artifact\n          path: /tmp/coverage\n          aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}\n\n      - name: Merge coverage data\n        run: scripts/coverage \"--profraw-prefix=$GITHUB_JOB\" --dir=/tmp/coverage merge\n\n      - name: Build coverage report\n        env:\n          COMMIT_URL: ${{ github.server_url }}/${{ github.repository }}/commit/${{ github.event.pull_request.head.sha || github.sha }}\n        run: |\n          scripts/coverage --dir=/tmp/coverage \\\n            report \\\n            --input-objects=/tmp/coverage/binaries.list \\\n            --commit-url=${COMMIT_URL} \\\n            --format=github\n\n          scripts/coverage --dir=/tmp/coverage \\\n            report \\\n            --input-objects=/tmp/coverage/binaries.list \\\n            --format=lcov\n\n      - name: Build coverage report NEW\n        id: upload-coverage-report-new\n        env:\n          BUCKET: neon-github-public-dev\n          # A differential coverage report is available only for PRs.\n          # (i.e. for pushes into main/release branches we have a regular coverage report)\n          COMMIT_SHA: ${{ github.event.pull_request.head.sha || github.sha }}\n          BASE_SHA: ${{ github.event.pull_request.base.sha || github.sha }}\n        run: |\n          CURRENT=\"${COMMIT_SHA}\"\n          BASELINE=\"$(git merge-base $BASE_SHA $CURRENT)\"\n\n          cp /tmp/coverage/report/lcov.info ./${CURRENT}.info\n\n          GENHTML_ARGS=\"--ignore-errors path,unmapped,empty --synthesize-missing --demangle-cpp rustfilt --output-directory lcov-html ${CURRENT}.info\"\n\n          # Use differential coverage if the baseline coverage exists.\n          # It can be missing if the coverage repoer wasn't uploaded yet or tests has failed on BASELINE commit.\n          if aws s3 cp --only-show-errors s3://${BUCKET}/code-coverage/${BASELINE}/lcov.info ./${BASELINE}.info; then\n            git diff ${BASELINE} ${CURRENT} -- '*.rs' > baseline-current.diff\n\n            GENHTML_ARGS=\"--baseline-file ${BASELINE}.info --diff-file baseline-current.diff ${GENHTML_ARGS}\"\n          fi\n\n          genhtml ${GENHTML_ARGS}\n\n          aws s3 cp --only-show-errors --recursive ./lcov-html/ s3://${BUCKET}/code-coverage/${COMMIT_SHA}/lcov\n\n          REPORT_URL=https://${BUCKET}.s3.amazonaws.com/code-coverage/${COMMIT_SHA}/lcov/index.html\n          echo \"report-url=${REPORT_URL}\" >> $GITHUB_OUTPUT\n\n          REPORT_URL=https://${BUCKET}.s3.amazonaws.com/code-coverage/${COMMIT_SHA}/lcov/summary.json\n          echo \"summary-json=${REPORT_URL}\" >> $GITHUB_OUTPUT\n\n      - uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1\n        env:\n          REPORT_URL_NEW: ${{ steps.upload-coverage-report-new.outputs.report-url }}\n          COMMIT_SHA: ${{ github.event.pull_request.head.sha || github.sha }}\n        with:\n          # Retry script for 5XX server errors: https://github.com/actions/github-script#retries\n          retries: 5\n          script: |\n            const { REPORT_URL_NEW, COMMIT_SHA } = process.env\n\n            await github.rest.repos.createCommitStatus({\n              owner: context.repo.owner,\n              repo: context.repo.repo,\n              sha: `${COMMIT_SHA}`,\n              state: 'success',\n              target_url: `${REPORT_URL_NEW}`,\n              context: 'Code coverage report NEW',\n            })\n\n  trigger-e2e-tests:\n    # !failure() && !cancelled() because it depends on jobs that can get skipped\n    if: >-\n      ${{\n        (\n          (\n            needs.meta.outputs.run-kind == 'pr'\n            && (\n              !github.event.pull_request.draft\n              || contains(github.event.pull_request.labels.*.name, 'run-e2e-tests-in-draft')\n            )\n          )\n          || contains(fromJSON('[\"push-main\", \"storage-rc-pr\", \"proxy-rc-pr\", \"compute-rc-pr\"]'), needs.meta.outputs.run-kind)\n        )\n        && !failure() && !cancelled()\n      }}\n    needs: [ check-permissions, push-neon-image-dev, push-compute-image-dev, meta ]\n    uses: ./.github/workflows/trigger-e2e-tests.yml\n    with:\n      github-event-name: ${{ github.event_name }}\n      github-event-json: ${{ toJSON(github.event) }}\n    secrets: inherit\n\n  neon-image-arch:\n    needs: [ check-permissions, build-build-tools-image, meta ]\n    if: ${{ contains(fromJSON('[\"push-main\", \"pr\", \"storage-rc-pr\", \"proxy-rc-pr\"]'), needs.meta.outputs.run-kind) }}\n    strategy:\n      matrix:\n        arch: [ x64, arm64 ]\n\n    runs-on: ${{ fromJSON(format('[\"self-hosted\", \"{0}\"]', matrix.arch == 'arm64' && 'large-arm64' || 'large')) }}\n\n    permissions:\n      packages: write\n\n    steps:\n      - name: Harden the runner (Audit all outbound calls)\n        uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0\n        with:\n          egress-policy: audit\n\n      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2\n        with:\n          submodules: true\n          ref: ${{ needs.meta.outputs.sha }}\n\n      - uses: neondatabase/dev-actions/set-docker-config-dir@6094485bf440001c94a94a3f9e221e81ff6b6193\n      - uses: docker/setup-buildx-action@b5ca514318bd6ebac0fb2aedd5d36ec1b5c232a2 # v3.10.0\n        with:\n          cache-binary: false\n\n      - uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0\n        with:\n          username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}\n          password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}\n\n      - uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0\n        with:\n          registry: ghcr.io\n          username: ${{ github.actor }}\n          password: ${{ secrets.GITHUB_TOKEN }}\n\n      - uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0\n        with:\n          registry: cache.neon.build\n          username: ${{ secrets.NEON_CI_DOCKERCACHE_USERNAME }}\n          password: ${{ secrets.NEON_CI_DOCKERCACHE_PASSWORD }}\n\n      - uses: docker/build-push-action@471d1dc4e07e5cdedd4c2171150001c434f0b7a4 # v6.15.0\n        with:\n          context: .\n          # ARM-specific flags are recommended for Graviton ≥ 2, these flags are also supported by Ampere Altra (Azure)\n          # https://github.com/aws/aws-graviton-getting-started/blob/57dc813626d0266f1cc12ef83474745bb1f31fb4/rust.md\n          build-args: |\n            ADDITIONAL_RUSTFLAGS=${{ matrix.arch == 'arm64' && '-Ctarget-feature=+lse -Ctarget-cpu=neoverse-n1' || '' }}\n            GIT_VERSION=${{ github.event.pull_request.head.sha || github.sha }}\n            BUILD_TAG=${{ needs.meta.outputs.release-tag || needs.meta.outputs.build-tag }}\n            TAG=${{ needs.build-build-tools-image.outputs.image-tag }}-bookworm\n            DEBIAN_VERSION=bookworm\n          secrets: |\n            SUBZERO_ACCESS_TOKEN=${{ secrets.CI_ACCESS_TOKEN }}\n          attests: |\n            type=provenance,mode=max\n            type=sbom,generator=docker.io/docker/buildkit-syft-scanner:1\n          push: true\n          pull: true\n          file: Dockerfile\n          cache-from: type=registry,ref=cache.neon.build/neon:cache-bookworm-${{ matrix.arch }}\n          cache-to: ${{ github.ref_name == 'main' && format('type=registry,ref=cache.neon.build/neon:cache-{0}-{1},mode=max', 'bookworm', matrix.arch) || '' }}\n          tags: |\n            ghcr.io/neondatabase/neon:${{ needs.meta.outputs.build-tag }}-bookworm-${{ matrix.arch }}\n\n  neon-image:\n    needs: [ neon-image-arch, meta ]\n    if: ${{ contains(fromJSON('[\"push-main\", \"pr\", \"storage-rc-pr\", \"proxy-rc-pr\"]'), needs.meta.outputs.run-kind) }}\n    runs-on: ubuntu-22.04\n    permissions:\n      id-token: write # aws-actions/configure-aws-credentials\n      statuses: write\n      contents: read\n      packages: write\n\n    steps:\n      - name: Harden the runner (Audit all outbound calls)\n        uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0\n        with:\n          egress-policy: audit\n\n      - uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0\n        with:\n          registry: ghcr.io\n          username: ${{ github.actor }}\n          password: ${{ secrets.GITHUB_TOKEN }}\n\n      - name: Create multi-arch image\n        run: |\n          docker buildx imagetools create -t ghcr.io/neondatabase/neon:${{ needs.meta.outputs.build-tag }} \\\n                                          -t ghcr.io/neondatabase/neon:${{ needs.meta.outputs.build-tag }}-bookworm \\\n                                             ghcr.io/neondatabase/neon:${{ needs.meta.outputs.build-tag }}-bookworm-x64 \\\n                                             ghcr.io/neondatabase/neon:${{ needs.meta.outputs.build-tag }}-bookworm-arm64\n\n  compute-node-image-arch:\n    needs: [ check-permissions, meta ]\n    if: ${{ contains(fromJSON('[\"push-main\", \"pr\", \"compute-rc-pr\"]'), needs.meta.outputs.run-kind) }}\n    permissions:\n      id-token: write # aws-actions/configure-aws-credentials\n      statuses: write\n      contents: read\n      packages: write\n    strategy:\n      fail-fast: false\n      matrix:\n        version:\n          # Much data was already generated on old PG versions with bullseye's\n          # libraries, the locales of which can cause data incompatibilities.\n          # However, new PG versions should be build on newer images,\n          # as that reduces the support burden of old and ancient distros.\n          - pg: v14\n            debian: bullseye\n          - pg: v15\n            debian: bullseye\n          - pg: v16\n            debian: bullseye\n          - pg: v17\n            debian: bookworm\n        arch: [ x64, arm64 ]\n\n    runs-on: ${{ fromJSON(format('[\"self-hosted\", \"{0}\"]', matrix.arch == 'arm64' && 'large-arm64' || 'large')) }}\n\n    steps:\n      - name: Harden the runner (Audit all outbound calls)\n        uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0\n        with:\n          egress-policy: audit\n\n      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2\n        with:\n          submodules: true\n          ref: ${{ needs.meta.outputs.sha }}\n\n      - uses: neondatabase/dev-actions/set-docker-config-dir@6094485bf440001c94a94a3f9e221e81ff6b6193\n      - uses: docker/setup-buildx-action@b5ca514318bd6ebac0fb2aedd5d36ec1b5c232a2 # v3.10.0\n        with:\n          cache-binary: false\n          # Disable parallelism for docker buildkit.\n          # As we already build everything with `make -j$(nproc)`, running it in additional level of parallelisam blows up the Runner.\n          buildkitd-config-inline: |\n            [worker.oci]\n              max-parallelism = 1\n\n      - uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0\n        with:\n          username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}\n          password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}\n\n      - uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0\n        with:\n          registry: ghcr.io\n          username: ${{ github.actor }}\n          password: ${{ secrets.GITHUB_TOKEN }}\n\n      - uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0\n        with:\n          registry: cache.neon.build\n          username: ${{ secrets.NEON_CI_DOCKERCACHE_USERNAME }}\n          password: ${{ secrets.NEON_CI_DOCKERCACHE_PASSWORD }}\n\n      - name: Build compute-node image\n        uses: docker/build-push-action@471d1dc4e07e5cdedd4c2171150001c434f0b7a4 # v6.15.0\n        with:\n          context: .\n          build-args: |\n            GIT_VERSION=${{ github.event.pull_request.head.sha || github.sha }}\n            PG_VERSION=${{ matrix.version.pg }}\n            BUILD_TAG=${{ needs.meta.outputs.release-tag || needs.meta.outputs.build-tag }}\n            DEBIAN_VERSION=${{ matrix.version.debian }}\n          attests: |\n            type=provenance,mode=max\n            type=sbom,generator=docker.io/docker/buildkit-syft-scanner:1\n          push: true\n          pull: true\n          file: compute/compute-node.Dockerfile\n          cache-from: type=registry,ref=cache.neon.build/compute-node-${{ matrix.version.pg }}:cache-${{ matrix.version.debian }}-${{ matrix.arch }}\n          cache-to: ${{ github.ref_name == 'main' && format('type=registry,ref=cache.neon.build/compute-node-{0}:cache-{1}-{2},mode=max', matrix.version.pg, matrix.version.debian, matrix.arch) || '' }}\n          tags: |\n            ghcr.io/neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}-${{ matrix.version.debian }}-${{ matrix.arch }}\n\n      - name: Build neon extensions test image\n        if: matrix.version.pg >= 'v16'\n        uses: docker/build-push-action@471d1dc4e07e5cdedd4c2171150001c434f0b7a4 # v6.15.0\n        with:\n          context: .\n          build-args: |\n            GIT_VERSION=${{ github.event.pull_request.head.sha || github.sha }}\n            PG_VERSION=${{ matrix.version.pg }}\n            BUILD_TAG=${{ needs.meta.outputs.release-tag || needs.meta.outputs.build-tag }}\n            DEBIAN_VERSION=${{ matrix.version.debian }}\n          attests: |\n            type=provenance,mode=max\n            type=sbom,generator=docker.io/docker/buildkit-syft-scanner:1\n          push: true\n          pull: true\n          file: compute/compute-node.Dockerfile\n          target: extension-tests\n          cache-from: type=registry,ref=cache.neon.build/compute-node-${{ matrix.version.pg }}:cache-${{ matrix.version.debian }}-${{ matrix.arch }}\n          tags: |\n            ghcr.io/neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{needs.meta.outputs.build-tag}}-${{ matrix.version.debian }}-${{ matrix.arch }}\n\n  compute-node-image:\n    needs: [ compute-node-image-arch, meta ]\n    if: ${{ contains(fromJSON('[\"push-main\", \"pr\", \"compute-rc-pr\"]'), needs.meta.outputs.run-kind) }}\n    permissions:\n      id-token: write # aws-actions/configure-aws-credentials\n      statuses: write\n      contents: read\n      packages: write\n    runs-on: ubuntu-22.04\n\n    strategy:\n      matrix:\n        version:\n          # see the comment for `compute-node-image-arch` job\n          - pg: v14\n            debian: bullseye\n          - pg: v15\n            debian: bullseye\n          - pg: v16\n            debian: bullseye\n          - pg: v17\n            debian: bookworm\n\n    steps:\n      - name: Harden the runner (Audit all outbound calls)\n        uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0\n        with:\n          egress-policy: audit\n\n      - uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0\n        with:\n          registry: ghcr.io\n          username: ${{ github.actor }}\n          password: ${{ secrets.GITHUB_TOKEN }}\n\n      - name: Create multi-arch compute-node image\n        run: |\n          docker buildx imagetools create -t ghcr.io/neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }} \\\n                                          -t ghcr.io/neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}-${{ matrix.version.debian }} \\\n                                             ghcr.io/neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}-${{ matrix.version.debian }}-x64 \\\n                                             ghcr.io/neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}-${{ matrix.version.debian }}-arm64\n\n      - name: Create multi-arch neon-test-extensions image\n        if: matrix.version.pg >= 'v16'\n        run: |\n          docker buildx imagetools create -t ghcr.io/neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }} \\\n                                          -t ghcr.io/neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}-${{ matrix.version.debian }} \\\n                                             ghcr.io/neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}-${{ matrix.version.debian }}-x64 \\\n                                             ghcr.io/neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}-${{ matrix.version.debian }}-arm64\n\n  vm-compute-node-image-arch:\n    needs: [ check-permissions, meta, compute-node-image ]\n    if: ${{ contains(fromJSON('[\"push-main\", \"pr\", \"compute-rc-pr\"]'), needs.meta.outputs.run-kind) }}\n    runs-on: ${{ fromJSON(format('[\"self-hosted\", \"{0}\"]', matrix.arch == 'arm64' && 'large-arm64' || 'large')) }}\n    permissions:\n      contents: read\n      packages: write\n    strategy:\n      fail-fast: false\n      matrix:\n        arch: [ amd64, arm64 ]\n        version:\n          - pg: v14\n            debian: bullseye\n          - pg: v15\n            debian: bullseye\n          - pg: v16\n            debian: bullseye\n          - pg: v17\n            debian: bookworm\n    env:\n      VM_BUILDER_VERSION: v0.46.0\n\n    steps:\n      - name: Harden the runner (Audit all outbound calls)\n        uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0\n        with:\n          egress-policy: audit\n\n      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2\n\n      - name: Downloading vm-builder\n        run: |\n          curl -fL https://github.com/neondatabase/autoscaling/releases/download/$VM_BUILDER_VERSION/vm-builder-${{ matrix.arch }} -o vm-builder\n          chmod +x vm-builder\n\n      - uses: neondatabase/dev-actions/set-docker-config-dir@6094485bf440001c94a94a3f9e221e81ff6b6193\n      - uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0\n        with:\n          registry: ghcr.io\n          username: ${{ github.actor }}\n          password: ${{ secrets.GITHUB_TOKEN }}\n\n      # Note: we need a separate pull step here because otherwise vm-builder will try to pull, and\n      # it won't have the proper authentication (written at v0.6.0)\n      - name: Pulling compute-node image\n        run: |\n          docker pull ghcr.io/neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}\n\n      - name: Build vm image\n        run: |\n          ./vm-builder \\\n            -size=2G \\\n            -spec=compute/vm-image-spec-${{ matrix.version.debian }}.yaml \\\n            -src=ghcr.io/neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }} \\\n            -dst=ghcr.io/neondatabase/vm-compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}-${{ matrix.arch }} \\\n            -target-arch=linux/${{ matrix.arch }}\n\n      - name: Pushing vm-compute-node image\n        run: |\n          docker push ghcr.io/neondatabase/vm-compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}-${{ matrix.arch }}\n\n  vm-compute-node-image:\n    needs: [ vm-compute-node-image-arch, meta ]\n    if: ${{ contains(fromJSON('[\"push-main\", \"pr\", \"compute-rc-pr\"]'), needs.meta.outputs.run-kind) }}\n    permissions:\n      packages: write\n    runs-on: ubuntu-22.04\n    strategy:\n      matrix:\n        version:\n          # see the comment for `compute-node-image-arch` job\n          - pg: v14\n          - pg: v15\n          - pg: v16\n          - pg: v17\n    steps:\n      - name: Harden the runner (Audit all outbound calls)\n        uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0\n        with:\n          egress-policy: audit\n\n      - uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0\n        with:\n          registry: ghcr.io\n          username: ${{ github.actor }}\n          password: ${{ secrets.GITHUB_TOKEN }}\n\n      - name: Create multi-arch compute-node image\n        run: |\n          docker buildx imagetools create -t ghcr.io/neondatabase/vm-compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }} \\\n                                             ghcr.io/neondatabase/vm-compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}-amd64 \\\n                                             ghcr.io/neondatabase/vm-compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}-arm64\n\n\n  test-images:\n    needs: [ check-permissions, meta, neon-image, compute-node-image ]\n    # Depends on jobs that can get skipped\n    if: >-\n      ${{\n        !failure()\n        && !cancelled()\n        && contains(fromJSON('[\"push-main\", \"pr\", \"storage-rc-pr\", \"proxy-rc-pr\", \"compute-rc-pr\"]'), needs.meta.outputs.run-kind)\n      }}\n    strategy:\n      fail-fast: false\n      matrix:\n        arch: [ x64, arm64 ]\n        pg_version: [v16, v17]\n\n    permissions:\n      packages: read\n\n    runs-on: ${{ fromJSON(format('[\"self-hosted\", \"{0}\"]', matrix.arch == 'arm64' && 'small-arm64' || 'small')) }}\n\n    steps:\n      - name: Harden the runner (Audit all outbound calls)\n        uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0\n        with:\n          egress-policy: audit\n\n      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2\n\n      - uses: neondatabase/dev-actions/set-docker-config-dir@6094485bf440001c94a94a3f9e221e81ff6b6193\n\n      - uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0\n        with:\n          username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}\n          password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}\n\n      - uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0\n        with:\n          registry: ghcr.io\n          username: ${{ github.actor }}\n          password: ${{ secrets.GITHUB_TOKEN }}\n\n      # `ghcr.io/neondatabase/neon` contains multiple binaries, all of them use the same input for the version into the same version formatting library.\n      # Pick pageserver as currently the only binary with extra \"version\" features printed in the string to verify.\n      # Regular pageserver version string looks like\n      #   Neon page server git-env:32d14403bd6ab4f4520a94cbfd81a6acef7a526c failpoints: true, features: []\n      # Bad versions might loop like:\n      #   Neon page server git-env:local failpoints: true, features: [\"testing\"]\n      # Ensure that we don't have bad versions.\n      - name: Verify image versions\n        shell: bash # ensure no set -e for better error messages\n        if: ${{ contains(fromJSON('[\"push-main\", \"pr\", \"storage-rc-pr\", \"proxy-rc-pr\"]'), needs.meta.outputs.run-kind) }}\n        run: |\n          pageserver_version=$(docker run --rm ghcr.io/neondatabase/neon:${{ needs.meta.outputs.build-tag }} \"/bin/sh\" \"-c\" \"/usr/local/bin/pageserver --version\")\n\n          echo \"Pageserver version string: $pageserver_version\"\n\n          if ! echo \"$pageserver_version\" | grep -qv 'git-env:local' ; then\n            echo \"Pageserver version should not be the default Dockerfile one\"\n            exit 1\n          fi\n\n          if ! echo \"$pageserver_version\" | grep -qv '\"testing\"' ; then\n            echo \"Pageserver version should have no testing feature enabled\"\n            exit 1\n          fi\n\n      - name: Verify docker-compose example and test extensions\n        timeout-minutes: 60\n        env:\n          PARALLEL_COMPUTES: 3\n          TAG: >-\n            ${{\n              needs.meta.outputs.run-kind == 'compute-rc-pr'\n              && needs.meta.outputs.previous-storage-release\n              || needs.meta.outputs.build-tag\n            }}\n          COMPUTE_TAG: >-\n            ${{\n              contains(fromJSON('[\"storage-rc-pr\", \"proxy-rc-pr\"]'), needs.meta.outputs.run-kind)\n              && needs.meta.outputs.previous-compute-release\n              || needs.meta.outputs.build-tag\n            }}\n          TEST_EXTENSIONS_TAG: >-\n            ${{\n              contains(fromJSON('[\"storage-rc-pr\", \"proxy-rc-pr\"]'), needs.meta.outputs.run-kind)\n              && needs.meta.outputs.previous-compute-release\n              || needs.meta.outputs.build-tag\n            }}\n          TEST_VERSION_ONLY: ${{ matrix.pg_version }}\n        run: ./docker-compose/docker_compose_test.sh\n\n      - name: Print logs and clean up docker-compose test\n        if: always()\n        run: |\n          docker compose --profile test-extensions -f ./docker-compose/docker-compose.yml logs || true\n          docker compose --profile test-extensions -f ./docker-compose/docker-compose.yml down\n\n      - name: Test extension upgrade\n        timeout-minutes: 20\n        if: ${{ contains(fromJSON('[\"pr\", \"compute-rc-pr\"]'), needs.meta.outputs.run-kind) }}\n        env:\n          TAG: >-\n            ${{\n              false\n              || needs.meta.outputs.run-kind == 'pr' && needs.meta.outputs.build-tag\n              || needs.meta.outputs.run-kind == 'compute-rc-pr' && needs.meta.outputs.previous-storage-release\n            }}\n          TEST_EXTENSIONS_TAG: ${{ needs.meta.outputs.previous-compute-release }}\n          NEW_COMPUTE_TAG: ${{ needs.meta.outputs.build-tag }}\n          OLD_COMPUTE_TAG: ${{ needs.meta.outputs.previous-compute-release }}\n        run: ./docker-compose/test_extensions_upgrade.sh\n\n      - name: Print logs and clean up\n        if: always()\n        run: |\n          docker compose --profile test-extensions -f ./docker-compose/docker-compose.yml logs || true\n          docker compose --profile test-extensions -f ./docker-compose/docker-compose.yml down\n\n  generate-image-maps:\n    needs: [ meta ]\n    runs-on: ubuntu-22.04\n    outputs:\n      neon-dev: ${{ steps.generate.outputs.neon-dev }}\n      neon-prod: ${{ steps.generate.outputs.neon-prod }}\n      compute-dev: ${{ steps.generate.outputs.compute-dev }}\n      compute-prod: ${{ steps.generate.outputs.compute-prod }}\n    steps:\n      - name: Harden the runner (Audit all outbound calls)\n        uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0\n        with:\n          egress-policy: audit\n\n      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2\n        with:\n          sparse-checkout: .github/scripts/generate_image_maps.py\n          sparse-checkout-cone-mode: false\n\n      - name: Generate Image Maps\n        id: generate\n        run: python3 .github/scripts/generate_image_maps.py\n        env:\n          SOURCE_TAG: >-\n            ${{\n              contains(fromJSON('[\"storage-release\", \"compute-release\", \"proxy-release\"]'), needs.meta.outputs.run-kind)\n              && needs.meta.outputs.release-pr-run-id\n              || needs.meta.outputs.build-tag\n            }}\n          TARGET_TAG: ${{ needs.meta.outputs.build-tag }}\n          BRANCH: \"${{ github.ref_name }}\"\n          DEV_ACR: \"${{ vars.AZURE_DEV_REGISTRY_NAME }}\"\n          PROD_ACR: \"${{ vars.AZURE_PROD_REGISTRY_NAME }}\"\n          DEV_AWS: \"${{ vars.NEON_DEV_AWS_ACCOUNT_ID }}\"\n          PROD_AWS: \"${{ vars.NEON_PROD_AWS_ACCOUNT_ID }}\"\n          AWS_REGION: \"${{ vars.AWS_ECR_REGION }}\"\n\n  push-neon-image-dev:\n    needs: [ meta, generate-image-maps, neon-image ]\n    if: ${{ !failure() && !cancelled() && contains(fromJSON('[\"push-main\", \"pr\", \"storage-release\", \"storage-rc-pr\", \"proxy-release\", \"proxy-rc-pr\"]'), needs.meta.outputs.run-kind) }}\n    uses: ./.github/workflows/_push-to-container-registry.yml\n    permissions:\n      id-token: write  # Required for aws/azure login\n      packages: write  # required for pushing to GHCR\n    with:\n      image-map: '${{ needs.generate-image-maps.outputs.neon-dev }}'\n      aws-region: ${{ vars.AWS_ECR_REGION }}\n      aws-account-id: \"${{ vars.NEON_DEV_AWS_ACCOUNT_ID }}\"\n      aws-role-to-assume: \"gha-oidc-neon-admin\"\n      azure-client-id: ${{ vars.AZURE_DEV_CLIENT_ID }}\n      azure-subscription-id: ${{ vars.AZURE_DEV_SUBSCRIPTION_ID }}\n      azure-tenant-id: ${{ vars.AZURE_TENANT_ID }}\n      acr-registry-name: ${{ vars.AZURE_DEV_REGISTRY_NAME }}\n    secrets: inherit\n\n  push-compute-image-dev:\n    needs: [ meta, generate-image-maps, vm-compute-node-image ]\n    if: ${{ !failure() && !cancelled() && contains(fromJSON('[\"push-main\", \"pr\", \"compute-release\", \"compute-rc-pr\"]'), needs.meta.outputs.run-kind) }}\n    uses: ./.github/workflows/_push-to-container-registry.yml\n    permissions:\n      id-token: write  # Required for aws/azure login\n      packages: write  # required for pushing to GHCR\n    with:\n      image-map: '${{ needs.generate-image-maps.outputs.compute-dev }}'\n      aws-region: ${{ vars.AWS_ECR_REGION }}\n      aws-account-id: \"${{ vars.NEON_DEV_AWS_ACCOUNT_ID }}\"\n      aws-role-to-assume: \"gha-oidc-neon-admin\"\n      azure-client-id: ${{ vars.AZURE_DEV_CLIENT_ID }}\n      azure-subscription-id: ${{ vars.AZURE_DEV_SUBSCRIPTION_ID }}\n      azure-tenant-id: ${{ vars.AZURE_TENANT_ID }}\n      acr-registry-name: ${{ vars.AZURE_DEV_REGISTRY_NAME }}\n    secrets: inherit\n\n  push-neon-image-prod:\n    needs: [ meta, generate-image-maps, neon-image, test-images ]\n    # Depends on jobs that can get skipped\n    if: ${{ !failure() && !cancelled() && contains(fromJSON('[\"storage-release\", \"proxy-release\"]'), needs.meta.outputs.run-kind) }}\n    uses: ./.github/workflows/_push-to-container-registry.yml\n    permissions:\n      id-token: write  # Required for aws/azure login\n      packages: write  # required for pushing to GHCR\n    with:\n      image-map: '${{ needs.generate-image-maps.outputs.neon-prod }}'\n      aws-region: ${{ vars.AWS_ECR_REGION }}\n      aws-account-id: \"${{ vars.NEON_PROD_AWS_ACCOUNT_ID }}\"\n      aws-role-to-assume: \"gha-oidc-neon-admin\"\n      azure-client-id: ${{ vars.AZURE_PROD_CLIENT_ID }}\n      azure-subscription-id: ${{ vars.AZURE_PROD_SUBSCRIPTION_ID }}\n      azure-tenant-id: ${{ vars.AZURE_TENANT_ID }}\n      acr-registry-name: ${{ vars.AZURE_PROD_REGISTRY_NAME }}\n    secrets: inherit\n\n  push-compute-image-prod:\n    needs: [ meta, generate-image-maps, vm-compute-node-image, test-images ]\n    # Depends on jobs that can get skipped\n    if: ${{ !failure() && !cancelled() && needs.meta.outputs.run-kind == 'compute-release' }}\n    uses: ./.github/workflows/_push-to-container-registry.yml\n    permissions:\n      id-token: write  # Required for aws/azure login\n      packages: write  # required for pushing to GHCR\n    with:\n      image-map: '${{ needs.generate-image-maps.outputs.compute-prod }}'\n      aws-region: ${{ vars.AWS_ECR_REGION }}\n      aws-account-id: \"${{ vars.NEON_PROD_AWS_ACCOUNT_ID }}\"\n      aws-role-to-assume: \"gha-oidc-neon-admin\"\n      azure-client-id: ${{ vars.AZURE_PROD_CLIENT_ID }}\n      azure-subscription-id: ${{ vars.AZURE_PROD_SUBSCRIPTION_ID }}\n      azure-tenant-id: ${{ vars.AZURE_TENANT_ID }}\n      acr-registry-name: ${{ vars.AZURE_PROD_REGISTRY_NAME }}\n    secrets: inherit\n\n  push-neon-test-extensions-image-dockerhub:\n    if: ${{ contains(fromJSON('[\"push-main\", \"pr\", \"compute-rc-pr\"]'), needs.meta.outputs.run-kind) }}\n    needs: [ meta, compute-node-image ]\n    uses: ./.github/workflows/_push-to-container-registry.yml\n    permissions:\n      packages: write\n      id-token: write\n    with:\n      image-map: |\n        {\n          \"ghcr.io/neondatabase/neon-test-extensions-v16:${{ needs.meta.outputs.build-tag }}\": [\n            \"docker.io/neondatabase/neon-test-extensions-v16:${{ needs.meta.outputs.build-tag }}\"\n          ],\n          \"ghcr.io/neondatabase/neon-test-extensions-v17:${{ needs.meta.outputs.build-tag }}\": [\n            \"docker.io/neondatabase/neon-test-extensions-v17:${{ needs.meta.outputs.build-tag }}\"\n          ]\n        }\n    secrets: inherit\n\n  add-latest-tag-to-neon-test-extensions-image:\n    if: ${{ needs.meta.outputs.run-kind == 'push-main' }}\n    needs: [ meta, compute-node-image ]\n    uses: ./.github/workflows/_push-to-container-registry.yml\n    permissions:\n      packages: write\n      id-token: write\n    with:\n      image-map: |\n        {\n          \"ghcr.io/neondatabase/neon-test-extensions-v16:${{ needs.meta.outputs.build-tag }}\": [\n            \"docker.io/neondatabase/neon-test-extensions-v16:latest\",\n            \"ghcr.io/neondatabase/neon-test-extensions-v16:latest\"\n          ],\n          \"ghcr.io/neondatabase/neon-test-extensions-v17:${{ needs.meta.outputs.build-tag }}\": [\n            \"docker.io/neondatabase/neon-test-extensions-v17:latest\",\n            \"ghcr.io/neondatabase/neon-test-extensions-v17:latest\"\n          ]\n        }\n    secrets: inherit\n\n  add-release-tag-to-neon-test-extensions-image:\n    if: ${{ needs.meta.outputs.run-kind == 'compute-release' }}\n    needs: [ meta ]\n    uses: ./.github/workflows/_push-to-container-registry.yml\n    permissions:\n      packages: write\n      id-token: write\n    with:\n      image-map: |\n        {\n          \"ghcr.io/neondatabase/neon-test-extensions-v16:${{ needs.meta.outputs.release-pr-run-id }}\": [\n            \"docker.io/neondatabase/neon-test-extensions-v16:${{ needs.meta.outputs.build-tag }}\",\n            \"ghcr.io/neondatabase/neon-test-extensions-v16:${{ needs.meta.outputs.build-tag }}\"\n          ],\n          \"ghcr.io/neondatabase/neon-test-extensions-v17:${{ needs.meta.outputs.release-pr-run-id }}\": [\n            \"docker.io/neondatabase/neon-test-extensions-v17:${{ needs.meta.outputs.build-tag }}\",\n            \"ghcr.io/neondatabase/neon-test-extensions-v17:${{ needs.meta.outputs.build-tag }}\"\n          ]\n        }\n    secrets: inherit\n\n  trigger-custom-extensions-build-and-wait:\n    needs: [ check-permissions, meta ]\n    if: ${{ contains(fromJSON('[\"push-main\", \"pr\", \"compute-release\", \"compute-rc-pr\"]'), needs.meta.outputs.run-kind) }}\n    runs-on: ubuntu-22.04\n    permissions:\n      id-token: write # aws-actions/configure-aws-credentials\n      statuses: write\n      contents: write\n      pull-requests: write\n    steps:\n      - name: Harden the runner (Audit all outbound calls)\n        uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0\n        with:\n          egress-policy: audit\n\n      - name: Set PR's status to pending and request a remote CI test\n        run: |\n          COMMIT_SHA=${{ github.event.pull_request.head.sha || github.sha }}\n          REMOTE_REPO=\"${{ github.repository_owner }}/build-custom-extensions\"\n\n          curl -f -X POST \\\n          https://api.github.com/repos/${{ github.repository }}/statuses/$COMMIT_SHA \\\n          -H \"Accept: application/vnd.github.v3+json\" \\\n          --user \"${{ secrets.CI_ACCESS_TOKEN }}\" \\\n          --data \\\n            \"{\n              \\\"state\\\": \\\"pending\\\",\n              \\\"context\\\": \\\"build-and-upload-extensions\\\",\n              \\\"description\\\": \\\"[$REMOTE_REPO] Remote CI job is about to start\\\"\n            }\"\n\n          curl -f -X POST \\\n          https://api.github.com/repos/$REMOTE_REPO/actions/workflows/build_and_upload_extensions.yml/dispatches \\\n          -H \"Accept: application/vnd.github.v3+json\" \\\n          --user \"${{ secrets.CI_ACCESS_TOKEN }}\" \\\n          --data \\\n            \"{\n              \\\"ref\\\": \\\"main\\\",\n              \\\"inputs\\\": {\n                \\\"ci_job_name\\\": \\\"build-and-upload-extensions\\\",\n                \\\"commit_hash\\\": \\\"$COMMIT_SHA\\\",\n                \\\"remote_repo\\\": \\\"${{ github.repository }}\\\",\n                \\\"compute_image_tag\\\": \\\"${{ needs.meta.outputs.build-tag }}\\\",\n                \\\"remote_branch_name\\\": \\\"${{ github.ref_name }}\\\"\n              }\n            }\"\n\n      - name: Wait for extension build to finish\n        env:\n          GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}\n        run: |\n          TIMEOUT=5400 # 90 minutes, usually it takes ~2-3 minutes, but if runners are busy, it might take longer\n          INTERVAL=15 # try each N seconds\n\n          last_status=\"\" # a variable to carry the last status of the \"build-and-upload-extensions\" context\n\n          for ((i=0; i <= TIMEOUT; i+=INTERVAL)); do\n            sleep $INTERVAL\n\n            # Get statuses for the latest commit in the PR / branch\n            gh api \\\n              -H \"Accept: application/vnd.github+json\" \\\n              -H \"X-GitHub-Api-Version: 2022-11-28\" \\\n              \"/repos/${{ github.repository }}/statuses/${{ github.event.pull_request.head.sha || github.sha }}\" > statuses.json\n\n            # Get the latest status for the \"build-and-upload-extensions\" context\n            last_status=$(jq --raw-output '[.[] | select(.context == \"build-and-upload-extensions\")] | sort_by(.created_at)[-1].state' statuses.json)\n            if [ \"${last_status}\" = \"pending\" ]; then\n              # Extension build is still in progress.\n              continue\n            elif [ \"${last_status}\" = \"success\" ]; then\n              # Extension build is successful.\n              exit 0\n            else\n              # Status is neither \"pending\" nor \"success\", exit the loop and fail the job.\n              break\n            fi\n          done\n\n          # Extension build failed, print `statuses.json` for debugging and fail the job.\n          jq '.' statuses.json\n\n          echo >&2 \"Status of extension build is '${last_status}' != 'success'\"\n          exit 1\n\n  deploy:\n    needs: [ check-permissions, push-neon-image-dev, push-compute-image-dev, push-neon-image-prod, push-compute-image-prod, meta, trigger-custom-extensions-build-and-wait ]\n    # `!failure() && !cancelled()` is required because the workflow depends on the job that can be skipped: `push-neon-image-prod` and `push-compute-image-prod`\n    if: ${{ contains(fromJSON('[\"push-main\", \"storage-release\", \"proxy-release\", \"compute-release\"]'), needs.meta.outputs.run-kind) && !failure() && !cancelled() }}\n    permissions:\n      id-token: write # aws-actions/configure-aws-credentials\n      statuses: write\n      contents: write\n    runs-on: [ self-hosted, small ]\n    container: ${{ vars.NEON_DEV_AWS_ACCOUNT_ID }}.dkr.ecr.${{ vars.AWS_ECR_REGION }}.amazonaws.com/ansible:latest\n    steps:\n      - name: Harden the runner (Audit all outbound calls)\n        uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0\n        with:\n          egress-policy: audit\n\n      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2\n\n      - name: Create git tag and GitHub release\n        if: ${{ contains(fromJSON('[\"storage-release\", \"proxy-release\", \"compute-release\"]'), needs.meta.outputs.run-kind) }}\n        uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1\n        env:\n          TAG: \"${{ needs.meta.outputs.build-tag }}\"\n          BRANCH: \"${{ github.ref_name }}\"\n          PREVIOUS_RELEASE: >-\n            ${{\n              false\n              || needs.meta.outputs.run-kind == 'storage-release' && needs.meta.outputs.previous-storage-release\n              || needs.meta.outputs.run-kind == 'proxy-release' && needs.meta.outputs.previous-proxy-release\n              || needs.meta.outputs.run-kind == 'compute-release' && needs.meta.outputs.previous-compute-release\n              || 'unknown'\n            }}\n        with:\n          retries: 5\n          script: |\n            const { TAG, BRANCH, PREVIOUS_RELEASE } = process.env\n\n            try {\n              const existingRef = await github.rest.git.getRef({\n                owner: context.repo.owner,\n                repo: context.repo.repo,\n                ref: `tags/${TAG}`,\n              });\n\n              if (existingRef.data.object.sha !== context.sha) {\n                throw new Error(`Tag ${TAG} already exists but points to a different commit (expected: ${context.sha}, actual: ${existingRef.data.object.sha}).`);\n              }\n\n              console.log(`Tag ${TAG} already exists and points to ${context.sha} as expected.`);\n            } catch (error) {\n              if (error.status !== 404) {\n                throw error;\n              }\n\n              console.log(`Tag ${TAG} does not exist. Creating it...`);\n              await github.rest.git.createRef({\n                owner: context.repo.owner,\n                repo: context.repo.repo,\n                ref: `refs/tags/${TAG}`,\n                sha: context.sha,\n              });\n              console.log(`Tag ${TAG} created successfully.`);\n            }\n\n            try {\n              const existingRelease = await github.rest.repos.getReleaseByTag({\n                owner: context.repo.owner,\n                repo: context.repo.repo,\n                tag: TAG,\n              });\n\n              console.log(`Release for tag ${TAG} already exists (ID: ${existingRelease.data.id}).`);\n            } catch (error) {\n              if (error.status !== 404) {\n                throw error;\n              }\n\n              console.log(`Release for tag ${TAG} does not exist. Creating it...`);\n\n              // Find the PR number using the commit SHA\n              const pullRequests = await github.rest.pulls.list({\n                owner: context.repo.owner,\n                repo: context.repo.repo,\n                state: 'closed',\n                base: BRANCH,\n              });\n\n              const pr = pullRequests.data.find(pr => pr.merge_commit_sha === context.sha);\n              const prNumber = pr ? pr.number : null;\n\n              const releaseNotes = [\n                prNumber\n                  ? `Release PR https://github.com/${context.repo.owner}/${context.repo.repo}/pull/${prNumber}.`\n                  : 'Release PR not found.',\n                `Diff with the previous release https://github.com/${context.repo.owner}/${context.repo.repo}/compare/${PREVIOUS_RELEASE}...${TAG}.`\n              ].join('\\n\\n');\n\n              await github.rest.repos.createRelease({\n                owner: context.repo.owner,\n                repo: context.repo.repo,\n                tag_name: TAG,\n                body: releaseNotes,\n              });\n              console.log(`Release for tag ${TAG} created successfully.`);\n            }\n\n      - name: Trigger deploy workflow\n        env:\n          GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}\n          RUN_KIND: ${{ needs.meta.outputs.run-kind }}\n        run: |\n          case ${RUN_KIND} in\n          push-main)\n            gh workflow --repo neondatabase/infra run deploy-dev.yml --ref main -f branch=main -f dockerTag=${{needs.meta.outputs.build-tag}} -f deployPreprodRegion=false\n            ;;\n          storage-release)\n            gh workflow --repo neondatabase/infra run deploy-dev.yml --ref main \\\n              -f deployPgSniRouter=false \\\n              -f deployProxy=false \\\n              -f deployStorage=true \\\n              -f deployStorageBroker=false \\\n              -f deployStorageController=true \\\n              -f branch=main \\\n              -f dockerTag=${{needs.meta.outputs.build-tag}} \\\n              -f deployPreprodRegion=true\n\n            gh workflow --repo neondatabase/infra run deploy-prod.yml --ref main \\\n              -f deployStorage=true \\\n              -f deployStorageBroker=false \\\n              -f deployStorageController=true \\\n              -f branch=main \\\n              -f dockerTag=${{needs.meta.outputs.build-tag}}\n            ;;\n          proxy-release)\n            gh workflow --repo neondatabase/infra run deploy-dev.yml --ref main \\\n              -f deployPgSniRouter=true \\\n              -f deployProxy=true \\\n              -f deployStorage=false \\\n              -f deployStorageBroker=false \\\n              -f deployStorageController=false \\\n              -f branch=main \\\n              -f dockerTag=${{needs.meta.outputs.build-tag}} \\\n              -f deployPreprodRegion=true\n\n            gh workflow --repo neondatabase/infra run deploy-proxy-prod.yml --ref main \\\n              -f deployPgSniRouter=true \\\n              -f deployProxyLink=true \\\n              -f deployPrivatelinkProxy=true \\\n              -f deployProxyScram=true \\\n              -f deployProxyAuthBroker=true \\\n              -f branch=main \\\n              -f dockerTag=${{needs.meta.outputs.build-tag}}\n            ;;\n          compute-release)\n            gh workflow --repo neondatabase/infra run deploy-compute-dev.yml --ref main -f dockerTag=${{needs.meta.outputs.build-tag}}\n            ;;\n          *)\n            echo \"RUN_KIND (value '${RUN_KIND}') is not set to either 'push-main', 'storage-release', 'proxy-release' or 'compute-release'\"\n            exit 1\n            ;;\n          esac\n\n  notify-release-deploy-failure:\n    needs: [ meta, deploy ]\n    # We want this to run even if (transitive) dependencies are skipped, because deploy should really be successful on release branch workflow runs.\n    if: contains(fromJSON('[\"storage-release\", \"compute-release\", \"proxy-release\"]'), needs.meta.outputs.run-kind) && needs.deploy.result != 'success' && always()\n    runs-on: ubuntu-22.04\n    steps:\n      - name: Harden the runner (Audit all outbound calls)\n        uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0\n        with:\n          egress-policy: audit\n\n      - name: Post release-deploy failure to team slack channel\n        uses: slackapi/slack-github-action@485a9d42d3a73031f12ec201c457e2162c45d02d # v2.0.0\n        env:\n          TEAM_ONCALL: >-\n            ${{\n              fromJSON(format('{\n                \"storage-release\": \"<!subteam^{0}|@oncall-storage>\",\n                \"compute-release\": \"<!subteam^{1}|@oncall-compute>\",\n                \"proxy-release\":   \"<!subteam^{2}|@oncall-proxy>\"\n              }',\n                vars.SLACK_ONCALL_STORAGE_GROUP,\n                vars.SLACK_ONCALL_COMPUTE_GROUP,\n                vars.SLACK_ONCALL_PROXY_GROUP\n              ))[needs.meta.outputs.run-kind]\n            }}\n          CHANNEL: >-\n            ${{\n              fromJSON(format('{\n                \"storage-release\": \"{0}\",\n                \"compute-release\": \"{1}\",\n                \"proxy-release\":   \"{2}\"\n              }',\n                vars.SLACK_STORAGE_CHANNEL_ID,\n                vars.SLACK_COMPUTE_CHANNEL_ID,\n                vars.SLACK_PROXY_CHANNEL_ID\n              ))[needs.meta.outputs.run-kind]\n            }}\n        with:\n          method: chat.postMessage\n          token: ${{ secrets.SLACK_BOT_TOKEN }}\n          payload: |\n            channel: ${{ env.CHANNEL }}\n            text: |\n              🔴 ${{ env.TEAM_ONCALL }}: deploy job on release branch had unexpected status \"${{ needs.deploy.result }}\" <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>.\n\n  # The job runs on `release` branch and copies compatibility data and Neon artifact from the last *release PR* to the latest directory\n  promote-compatibility-data:\n    needs: [ meta, deploy ]\n    permissions:\n      id-token: write # aws-actions/configure-aws-credentials\n      statuses: write\n      contents: read\n    # `!failure() && !cancelled()` is required because the workflow transitively depends on the job that can be skipped: `push-neon-image-prod` and `push-compute-image-prod`\n    if: github.ref_name == 'release' && !failure() && !cancelled()\n\n    runs-on: ubuntu-22.04\n    steps:\n      - name: Harden the runner (Audit all outbound calls)\n        uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0\n        with:\n          egress-policy: audit\n\n      - uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # v4.0.2\n        with:\n          aws-region: eu-central-1\n          role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}\n          role-duration-seconds: 3600\n\n      - name: Promote compatibility snapshot and Neon artifact\n        env:\n          BUCKET: neon-github-public-dev\n          AWS_REGION: eu-central-1\n          COMMIT_SHA: ${{ github.sha }}\n          RUN_ID: ${{ needs.meta.outputs.release-pr-run-id }}\n        run: |\n          old_prefix=\"artifacts/${COMMIT_SHA}/${RUN_ID}\"\n          new_prefix=\"artifacts/latest\"\n\n          files_to_promote=()\n          files_on_s3=$(aws s3api list-objects-v2 --bucket ${BUCKET} --prefix ${old_prefix} | jq -r '.Contents[]?.Key' || true)\n\n          for arch in X64 ARM64; do\n            for build_type in debug release; do\n              neon_artifact_filename=\"neon-Linux-${arch}-${build_type}-artifact.tar.zst\"\n              s3_key=$(echo \"${files_on_s3}\" | grep ${neon_artifact_filename} | sort --version-sort | tail -1 || true)\n              if [ -z \"${s3_key}\" ]; then\n                echo >&2 \"Neither s3://${BUCKET}/${old_prefix}/${neon_artifact_filename} nor its version from previous attempts exist\"\n                exit 1\n              fi\n\n              files_to_promote+=(\"s3://${BUCKET}/${s3_key}\")\n\n              for pg_version in v14 v15 v16 v17; do\n                # We run less tests for debug builds, so we don't need to promote them\n                if [ \"${build_type}\" == \"debug\" ] && { [ \"${arch}\" == \"ARM64\" ] || [ \"${pg_version}\" != \"v17\" ] ; }; then\n                  continue\n                fi\n\n                compatibility_data_filename=\"compatibility-snapshot-${arch}-${build_type}-pg${pg_version}.tar.zst\"\n                s3_key=$(echo \"${files_on_s3}\" | grep ${compatibility_data_filename} | sort --version-sort | tail -1 || true)\n                if [ -z \"${s3_key}\" ]; then\n                  echo >&2 \"Neither s3://${BUCKET}/${old_prefix}/${compatibility_data_filename} nor its version from previous attempts exist\"\n                  exit 1\n                fi\n\n                files_to_promote+=(\"s3://${BUCKET}/${s3_key}\")\n              done\n            done\n          done\n\n          for f in \"${files_to_promote[@]}\"; do\n            time aws s3 cp --only-show-errors ${f} s3://${BUCKET}/${new_prefix}/\n          done\n\n  pin-build-tools-image:\n    needs: [ build-build-tools-image, test-images, build-and-test-locally ]\n    # `!failure() && !cancelled()` is required because the job (transitively) depends on jobs that can be skipped\n    if: github.ref_name == 'main' && !failure() && !cancelled()\n    uses: ./.github/workflows/pin-build-tools-image.yml\n    with:\n      from-tag: ${{ needs.build-build-tools-image.outputs.image-tag }}\n    secrets: inherit\n\n  # This job simplifies setting branch protection rules (in GitHub UI)\n  # by allowing to set only this job instead of listing many others.\n  # It also makes it easier to rename or parametrise jobs (using matrix)\n  # which requires changes in branch protection rules\n  #\n  # Note, that we can't add external check (like `neon-cloud-e2e`) we still need to use GitHub UI for that.\n  #\n  # https://github.com/neondatabase/neon/settings/branch_protection_rules\n  conclusion:\n    if: always()\n    # Format `needs` differently to make the list more readable.\n    # Usually we do `needs: [...]`\n    needs:\n      - meta\n      - build-and-test-locally\n      - check-codestyle-python\n      - check-codestyle-rust\n      - check-dependencies-rust\n      - files-changed\n      - push-compute-image-dev\n      - push-neon-image-dev\n      - test-images\n      - trigger-custom-extensions-build-and-wait\n    runs-on: ubuntu-22.04\n    steps:\n      # The list of possible results:\n      # https://docs.github.com/en/actions/learn-github-actions/contexts#needs-context\n      - name: Harden the runner (Audit all outbound calls)\n        uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0\n        with:\n          egress-policy: audit\n\n      - name: Fail the job if any of the dependencies do not succeed\n        run: exit 1\n        if: |\n          contains(needs.*.result, 'failure')\n          || contains(needs.*.result, 'cancelled')\n          || (needs.check-dependencies-rust.result == 'skipped' && needs.files-changed.outputs.check-rust-dependencies == 'true' && contains(fromJSON('[\"pr\", \"storage-rc-pr\", \"proxy-rc-pr\", \"compute-rc-pr\"]'), needs.meta.outputs.run-kind))\n          || (needs.build-and-test-locally.result == 'skipped' && contains(fromJSON('[\"pr\", \"push-main\", \"storage-rc-pr\", \"proxy-rc-pr\", \"compute-rc-pr\"]'), needs.meta.outputs.run-kind))\n          || (needs.check-codestyle-python.result == 'skipped' && contains(fromJSON('[\"pr\", \"storage-rc-pr\", \"proxy-rc-pr\", \"compute-rc-pr\"]'), needs.meta.outputs.run-kind))\n          || (needs.check-codestyle-rust.result == 'skipped' && contains(fromJSON('[\"pr\", \"storage-rc-pr\", \"proxy-rc-pr\", \"compute-rc-pr\"]'), needs.meta.outputs.run-kind))\n          || needs.files-changed.result == 'skipped'\n          || (needs.push-compute-image-dev.result == 'skipped' && contains(fromJSON('[\"push-main\", \"pr\", \"compute-release\", \"compute-rc-pr\"]'), needs.meta.outputs.run-kind))\n          || (needs.push-neon-image-dev.result == 'skipped' && contains(fromJSON('[\"push-main\", \"pr\", \"storage-release\", \"storage-rc-pr\", \"proxy-release\", \"proxy-rc-pr\"]'), needs.meta.outputs.run-kind))\n          || (needs.test-images.result == 'skipped' && contains(fromJSON('[\"push-main\", \"pr\", \"storage-rc-pr\", \"proxy-rc-pr\", \"compute-rc-pr\"]'), needs.meta.outputs.run-kind))\n          || (needs.trigger-custom-extensions-build-and-wait.result == 'skipped' && contains(fromJSON('[\"push-main\", \"pr\", \"compute-release\", \"compute-rc-pr\"]'), needs.meta.outputs.run-kind))\n"
  },
  {
    "path": ".github/workflows/build_and_test_fully.yml",
    "content": "name: Build and Test Fully\n\non:\n  schedule:\n    # * is a special character in YAML so you have to quote this string\n    #          ┌───────────── minute (0 - 59)\n    #          │ ┌───────────── hour (0 - 23)\n    #          │ │ ┌───────────── day of the month (1 - 31)\n    #          │ │ │ ┌───────────── month (1 - 12 or JAN-DEC)\n    #          │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)\n    - cron:   '0 3 * * *' # run once a day, timezone is utc\n  workflow_dispatch:\n\ndefaults:\n  run:\n    shell: bash -euxo pipefail {0}\n\nconcurrency:\n  # Allow only one workflow per any non-`main` branch.\n  group: ${{ github.workflow }}-${{ github.ref_name }}-${{ github.ref_name == 'main' && github.sha || 'anysha' }}\n  cancel-in-progress: true\n\nenv:\n  RUST_BACKTRACE: 1\n  COPT: '-Werror'\n\njobs:\n  tag:\n    runs-on: [ self-hosted, small ]\n    container: ${{ vars.NEON_DEV_AWS_ACCOUNT_ID }}.dkr.ecr.${{ vars.AWS_ECR_REGION }}.amazonaws.com/base:pinned\n    outputs:\n      build-tag: ${{steps.build-tag.outputs.tag}}\n\n    steps:\n      # Need `fetch-depth: 0` to count the number of commits in the branch\n      - name: Harden the runner (Audit all outbound calls)\n        uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0\n        with:\n          egress-policy: audit\n\n      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2\n        with:\n          fetch-depth: 0\n\n      - name: Get build tag\n        run: |\n          echo run:$GITHUB_RUN_ID\n          echo ref:$GITHUB_REF_NAME\n          echo rev:$(git rev-list --count HEAD)\n          if [[ \"$GITHUB_REF_NAME\" == \"main\" ]]; then\n            echo \"tag=$(git rev-list --count HEAD)\" >> $GITHUB_OUTPUT\n          elif [[ \"$GITHUB_REF_NAME\" == \"release\" ]]; then\n            echo \"tag=release-$(git rev-list --count HEAD)\" >> $GITHUB_OUTPUT\n          elif [[ \"$GITHUB_REF_NAME\" == \"release-proxy\" ]]; then\n            echo \"tag=release-proxy-$(git rev-list --count HEAD)\" >> $GITHUB_OUTPUT\n          elif [[ \"$GITHUB_REF_NAME\" == \"release-compute\" ]]; then\n            echo \"tag=release-compute-$(git rev-list --count HEAD)\" >> $GITHUB_OUTPUT\n          else\n            echo \"GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release', 'release-proxy', 'release-compute'\"\n            echo \"tag=$GITHUB_RUN_ID\" >> $GITHUB_OUTPUT\n          fi\n        shell: bash\n        id: build-tag\n\n  build-build-tools-image:\n    uses: ./.github/workflows/build-build-tools-image.yml\n    secrets: inherit\n\n  build-and-test-locally:\n    needs: [ tag, build-build-tools-image ]\n    strategy:\n      fail-fast: false\n      matrix:\n        arch: [ x64, arm64 ]\n        build-type: [ debug, release ]\n    uses: ./.github/workflows/_build-and-test-locally.yml\n    with:\n      arch: ${{ matrix.arch }}\n      build-tools-image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm\n      build-tag: ${{ needs.tag.outputs.build-tag }}\n      build-type: ${{ matrix.build-type }}\n      rerun-failed: false\n      test-cfg: '[{\"pg_version\":\"v14\", \"lfc_state\": \"with-lfc\"},\n                  {\"pg_version\":\"v15\", \"lfc_state\": \"with-lfc\"},\n                  {\"pg_version\":\"v16\", \"lfc_state\": \"with-lfc\"},\n                  {\"pg_version\":\"v17\", \"lfc_state\": \"with-lfc\"},\n                  {\"pg_version\":\"v14\", \"lfc_state\": \"without-lfc\"},\n                  {\"pg_version\":\"v15\", \"lfc_state\": \"without-lfc\"},\n                  {\"pg_version\":\"v16\", \"lfc_state\": \"without-lfc\"},\n                  {\"pg_version\":\"v17\", \"lfc_state\": \"withouts-lfc\"}]'\n    secrets: inherit\n\n\n  create-test-report:\n    needs: [ build-and-test-locally, build-build-tools-image ]\n    if: ${{ !cancelled() }}\n    permissions:\n      id-token: write # aws-actions/configure-aws-credentials\n      statuses: write\n      contents: write\n      pull-requests: write\n    outputs:\n      report-url: ${{ steps.create-allure-report.outputs.report-url }}\n\n    runs-on: [ self-hosted, small ]\n    container:\n      image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm\n      credentials:\n        username: ${{ github.actor }}\n        password: ${{ secrets.GITHUB_TOKEN }}\n      options: --init\n\n    steps:\n      - name: Harden the runner (Audit all outbound calls)\n        uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0\n        with:\n          egress-policy: audit\n\n      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2\n\n      - name: Create Allure report\n        if: ${{ !cancelled() }}\n        id: create-allure-report\n        uses: ./.github/actions/allure-report-generate\n        with:\n          store-test-results-into-db: true\n          aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}\n        env:\n          REGRESS_TEST_RESULT_CONNSTR_NEW: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}\n\n      - uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1\n        if: ${{ !cancelled() }}\n        with:\n          # Retry script for 5XX server errors: https://github.com/actions/github-script#retries\n          retries: 5\n          script: |\n            const report = {\n              reportUrl:     \"${{ steps.create-allure-report.outputs.report-url }}\",\n              reportJsonUrl: \"${{ steps.create-allure-report.outputs.report-json-url }}\",\n            }\n\n            const coverage = {}\n\n            const script = require(\"./scripts/comment-test-report.js\")\n            await script({\n              github,\n              context,\n              fetch,\n              report,\n              coverage,\n            })\n"
  },
  {
    "path": ".github/workflows/build_and_test_with_sanitizers.yml",
    "content": "name: Build and Test with Sanitizers\n\non:\n  schedule:\n    # * is a special character in YAML so you have to quote this string\n    #          ┌───────────── minute (0 - 59)\n    #          │ ┌───────────── hour (0 - 23)\n    #          │ │ ┌───────────── day of the month (1 - 31)\n    #          │ │ │ ┌───────────── month (1 - 12 or JAN-DEC)\n    #          │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)\n    - cron:   '0 1 * * *' # run once a day, timezone is utc\n  workflow_dispatch:\n\ndefaults:\n  run:\n    shell: bash -euxo pipefail {0}\n\nconcurrency:\n  # Allow only one workflow per any non-`main` branch.\n  group: ${{ github.workflow }}-${{ github.ref_name }}-${{ github.ref_name == 'main' && github.sha || 'anysha' }}\n  cancel-in-progress: true\n\nenv:\n  RUST_BACKTRACE: 1\n  COPT: '-Werror'\n\njobs:\n  tag:\n    runs-on: [ self-hosted, small ]\n    container: ${{ vars.NEON_DEV_AWS_ACCOUNT_ID }}.dkr.ecr.${{ vars.AWS_ECR_REGION }}.amazonaws.com/base:pinned\n    outputs:\n      build-tag: ${{steps.build-tag.outputs.tag}}\n\n    steps:\n      # Need `fetch-depth: 0` to count the number of commits in the branch\n      - name: Harden the runner (Audit all outbound calls)\n        uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0\n        with:\n          egress-policy: audit\n\n      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2\n        with:\n          fetch-depth: 0\n\n      - name: Get build tag\n        run: |\n          echo run:$GITHUB_RUN_ID\n          echo ref:$GITHUB_REF_NAME\n          echo rev:$(git rev-list --count HEAD)\n          if [[ \"$GITHUB_REF_NAME\" == \"main\" ]]; then\n            echo \"tag=$(git rev-list --count HEAD)\" >> $GITHUB_OUTPUT\n          elif [[ \"$GITHUB_REF_NAME\" == \"release\" ]]; then\n            echo \"tag=release-$(git rev-list --count HEAD)\" >> $GITHUB_OUTPUT\n          elif [[ \"$GITHUB_REF_NAME\" == \"release-proxy\" ]]; then\n            echo \"tag=release-proxy-$(git rev-list --count HEAD)\" >> $GITHUB_OUTPUT\n          elif [[ \"$GITHUB_REF_NAME\" == \"release-compute\" ]]; then\n            echo \"tag=release-compute-$(git rev-list --count HEAD)\" >> $GITHUB_OUTPUT\n          else\n            echo \"GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release', 'release-proxy', 'release-compute'\"\n            echo \"tag=$GITHUB_RUN_ID\" >> $GITHUB_OUTPUT\n          fi\n        shell: bash\n        id: build-tag\n\n  build-build-tools-image:\n    uses: ./.github/workflows/build-build-tools-image.yml\n    secrets: inherit\n\n  build-and-test-locally:\n    needs: [ tag, build-build-tools-image ]\n    strategy:\n      fail-fast: false\n      matrix:\n        arch: [ x64, arm64 ]\n        build-type: [ release ]\n    uses: ./.github/workflows/_build-and-test-locally.yml\n    with:\n      arch: ${{ matrix.arch }}\n      build-tools-image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm\n      build-tag: ${{ needs.tag.outputs.build-tag }}\n      build-type: ${{ matrix.build-type }}\n      rerun-failed: false\n      test-cfg: '[{\"pg_version\":\"v17\"}]'\n      sanitizers: enabled\n    secrets: inherit\n\n\n  create-test-report:\n    needs: [ build-and-test-locally, build-build-tools-image ]\n    if: ${{ !cancelled() }}\n    permissions:\n      id-token: write # aws-actions/configure-aws-credentials\n      statuses: write\n      contents: write\n      pull-requests: write\n    outputs:\n      report-url: ${{ steps.create-allure-report.outputs.report-url }}\n\n    runs-on: [ self-hosted, small ]\n    container:\n      image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm\n      credentials:\n        username: ${{ github.actor }}\n        password: ${{ secrets.GITHUB_TOKEN }}\n      options: --init\n\n    steps:\n      - name: Harden the runner (Audit all outbound calls)\n        uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0\n        with:\n          egress-policy: audit\n\n      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2\n\n      - name: Create Allure report\n        if: ${{ !cancelled() }}\n        id: create-allure-report\n        uses: ./.github/actions/allure-report-generate\n        with:\n          store-test-results-into-db: true\n          aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}\n        env:\n          REGRESS_TEST_RESULT_CONNSTR_NEW: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}\n\n      - uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1\n        if: ${{ !cancelled() }}\n        with:\n          # Retry script for 5XX server errors: https://github.com/actions/github-script#retries\n          retries: 5\n          script: |\n            const report = {\n              reportUrl:     \"${{ steps.create-allure-report.outputs.report-url }}\",\n              reportJsonUrl: \"${{ steps.create-allure-report.outputs.report-json-url }}\",\n            }\n\n            const coverage = {}\n\n            const script = require(\"./scripts/comment-test-report.js\")\n            await script({\n              github,\n              context,\n              fetch,\n              report,\n              coverage,\n            })\n"
  },
  {
    "path": ".github/workflows/cargo-deny.yml",
    "content": "name: cargo deny checks\n\non:\n  workflow_call:\n    inputs:\n      build-tools-image:\n        required: false\n        type: string\n  schedule:\n    - cron: '0 10 * * *'\n\npermissions:\n  contents: read\n\njobs:\n  cargo-deny:\n    strategy:\n      matrix:\n        ref: >-\n          ${{\n            fromJSON(\n              github.event_name == 'schedule'\n                && '[\"main\",\"release\",\"release-proxy\",\"release-compute\"]'\n                || format('[\"{0}\"]', github.sha)\n            )\n          }}\n\n    runs-on: [self-hosted, small]\n\n    permissions:\n      packages: read\n\n    container:\n      image: ${{ inputs.build-tools-image || 'ghcr.io/neondatabase/build-tools:pinned' }}\n      credentials:\n        username: ${{ github.actor }}\n        password: ${{ secrets.GITHUB_TOKEN }}\n      options: --init\n\n    steps:\n      - name: Harden the runner (Audit all outbound calls)\n        uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0\n        with:\n          egress-policy: audit\n\n      - name: Checkout\n        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2\n        with:\n          ref: ${{ matrix.ref }}\n\n      - name: Check rust licenses/bans/advisories/sources\n        env:\n          CARGO_DENY_TARGET: >-\n            ${{ github.event_name == 'schedule' && 'advisories' || 'all' }}\n        run: cargo deny check --hide-inclusion-graph $CARGO_DENY_TARGET\n\n      - name: Post to a Slack channel\n        if: ${{ github.event_name == 'schedule' && failure() }}\n        uses: slackapi/slack-github-action@485a9d42d3a73031f12ec201c457e2162c45d02d # v2.0.0\n        with:\n          method: chat.postMessage\n          token: ${{ secrets.SLACK_BOT_TOKEN }}\n          payload: |\n            channel: ${{ vars.SLACK_ON_CALL_DEVPROD_STREAM }}\n            text: |\n              Periodic cargo-deny on ${{ matrix.ref }}: ${{ job.status }}\n              <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>\n              Fixing the problem should be fairly straight forward from the logs. If not, <#${{ vars.SLACK_RUST_CHANNEL_ID }}> is there to help.\n              Pinging <!subteam^S0838JPSH32|@oncall-devprod>.\n"
  },
  {
    "path": ".github/workflows/check-permissions.yml",
    "content": "name: Check Permissions\n\non:\n  workflow_call:\n    inputs:\n      github-event-name:\n        required: true\n        type: string\n\ndefaults:\n  run:\n    shell: bash -euo pipefail {0}\n\n# No permission for GITHUB_TOKEN by default; the **minimal required** set of permissions should be granted in each job.\npermissions: {}\n\njobs:\n  check-permissions:\n    runs-on: ubuntu-22.04\n    steps:\n    - name: Harden the runner (Audit all outbound calls)\n      uses: step-security/harden-runner@0634a2670c59f64b4a01f0f96f84700a4088b9f0 # v2.12.0\n      with:\n        egress-policy: audit\n\n    - name: Disallow CI runs on PRs from forks\n      if: |\n        inputs.github-event-name  == 'pull_request' &&\n        github.event.pull_request.head.repo.full_name != github.repository\n      run: |\n        if [ \"${{ contains(fromJSON('[\"OWNER\", \"MEMBER\", \"COLLABORATOR\"]'), github.event.pull_request.author_association) }}\" = \"true\" ]; then\n          MESSAGE=\"Please create a PR from a branch of ${GITHUB_REPOSITORY} instead of a fork\"\n        else\n          MESSAGE=\"The PR should be reviewed and labelled with 'approved-for-ci-run' to trigger a CI run\"\n        fi\n\n        # TODO: use actions/github-script to post this message as a PR comment\n        echo >&2 \"We don't run CI for PRs from forks\"\n        echo >&2 \"${MESSAGE}\"\n\n        exit 1\n"
  },
  {
    "path": ".github/workflows/cleanup-caches-by-a-branch.yml",
    "content": "# A workflow from\n# https://docs.github.com/en/actions/using-workflows/caching-dependencies-to-speed-up-workflows#force-deleting-cache-entries\n\nname: cleanup caches by a branch\non:\n  pull_request:\n    types:\n      - closed\n\njobs:\n  cleanup:\n    runs-on: ubuntu-22.04\n    steps:\n      - name: Harden the runner (Audit all outbound calls)\n        uses: step-security/harden-runner@0634a2670c59f64b4a01f0f96f84700a4088b9f0 # v2.12.0\n        with:\n          egress-policy: audit\n\n      - name: Cleanup\n        run: |\n          gh extension install actions/gh-actions-cache\n\n          echo \"Fetching list of cache key\"\n          cacheKeysForPR=$(gh actions-cache list -R $REPO -B $BRANCH -L 100 | cut -f 1 )\n\n          ## Setting this to not fail the workflow while deleting cache keys.\n          set +e\n          echo \"Deleting caches...\"\n          for cacheKey in $cacheKeysForPR\n          do\n              gh actions-cache delete $cacheKey -R $REPO -B $BRANCH --confirm\n          done\n          echo \"Done\"\n        env:\n          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}\n          REPO: ${{ github.repository }}\n          BRANCH: refs/pull/${{ github.event.pull_request.number }}/merge\n"
  },
  {
    "path": ".github/workflows/cloud-extensions.yml",
    "content": "name: Cloud Extensions Test\non:\n  schedule:\n    # * is a special character in YAML so you have to quote this string\n    #          ┌───────────── minute (0 - 59)\n    #          │ ┌───────────── hour (0 - 23)\n    #          │ │ ┌───────────── day of the month (1 - 31)\n    #          │ │ │ ┌───────────── month (1 - 12 or JAN-DEC)\n    #          │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)\n    - cron:  '45 1 * * *' # run once a day, timezone is utc\n  workflow_dispatch: # adds ability to run this manually\n    inputs:\n      region_id:\n        description: 'Project region id. If not set, the default region will be used'\n        required: false\n        default: 'aws-us-east-2'\n\ndefaults:\n  run:\n    shell: bash -euxo pipefail {0}\n\npermissions:\n  id-token: write # aws-actions/configure-aws-credentials\n  statuses: write\n  contents: write\n\njobs:\n  regress:\n    env:\n      POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install\n      TEST_OUTPUT: /tmp/test_output\n      BUILD_TYPE: remote\n    strategy:\n      fail-fast: false\n      matrix:\n        pg-version: [16, 17]\n\n    runs-on: us-east-2\n    container:\n      # We use the neon-test-extensions image here as it contains the source code for the extensions.\n      image: ghcr.io/neondatabase/neon-test-extensions-v${{ matrix.pg-version }}:latest\n      credentials:\n        username: ${{ github.actor }}\n        password: ${{ secrets.GITHUB_TOKEN }}\n      options: --init\n\n    steps:\n      - name: Harden the runner (Audit all outbound calls)\n        uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0\n        with:\n          egress-policy: audit\n\n      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2\n\n      - name: Evaluate the settings\n        id: project-settings\n        run: |\n          if [[ $((${{ matrix.pg-version }})) -lt 17 ]]; then\n            ULID=ulid\n          else\n            ULID=pgx_ulid\n          fi\n          LIBS=timescaledb:rag_bge_small_en_v15,rag_jina_reranker_v1_tiny_en:$ULID\n          settings=$(jq -c -n --arg libs $LIBS '{preload_libraries:{use_defaults:false,enabled_libraries:($libs| split(\":\"))}}')\n          echo settings=$settings >> $GITHUB_OUTPUT\n          \n      - name: Create Neon Project\n        id: create-neon-project\n        uses: ./.github/actions/neon-project-create\n        with:\n          region_id: ${{ inputs.region_id || 'aws-us-east-2' }}\n          postgres_version: ${{ matrix.pg-version }}\n          project_settings: ${{ steps.project-settings.outputs.settings }}\n          api_key: ${{ secrets.NEON_STAGING_API_KEY }}\n\n      - name: Run the regression tests\n        run: /run-tests.sh -r /ext-src\n        env:\n          BENCHMARK_CONNSTR: ${{ steps.create-neon-project.outputs.dsn }}\n          SKIP: \"pg_hint_plan-src,pg_repack-src,pg_cron-src,plpgsql_check-src\"\n\n      - name: Delete Neon Project\n        if: ${{ always() }}\n        uses: ./.github/actions/neon-project-delete\n        with:\n          project_id: ${{ steps.create-neon-project.outputs.project_id }}\n          api_key: ${{ secrets.NEON_STAGING_API_KEY }}\n\n      - name: Post to a Slack channel\n        if: ${{ github.event.schedule && failure() }}\n        uses: slackapi/slack-github-action@fcfb566f8b0aab22203f066d80ca1d7e4b5d05b3 # v1.27.1\n        with:\n          channel-id: ${{ vars.SLACK_ON_CALL_QA_STAGING_STREAM }}\n          slack-message: |\n            Periodic extensions test on staging: ${{ job.status }}\n            <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>\n        env:\n          SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}\n\n"
  },
  {
    "path": ".github/workflows/cloud-regress.yml",
    "content": "name: Cloud Regression Test\non:\n  schedule:\n    # * is a special character in YAML so you have to quote this string\n    #          ┌───────────── minute (0 - 59)\n    #          │ ┌───────────── hour (0 - 23)\n    #          │ │ ┌───────────── day of the month (1 - 31)\n    #          │ │ │ ┌───────────── month (1 - 12 or JAN-DEC)\n    #          │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)\n    - cron:  '45 1 * * *' # run once a day, timezone is utc\n  workflow_dispatch: # adds ability to run this manually\n\ndefaults:\n  run:\n    shell: bash -euxo pipefail {0}\n\nconcurrency:\n  # Allow only one workflow\n  group: ${{ github.workflow }}\n  cancel-in-progress: true\n\npermissions:\n  id-token: write # aws-actions/configure-aws-credentials\n  statuses: write\n  contents: write\n\njobs:\n  regress:\n    env:\n      POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install\n      TEST_OUTPUT: /tmp/test_output\n      BUILD_TYPE: remote\n    strategy:\n      fail-fast: false\n      matrix:\n        pg-version: [16, 17]\n\n    runs-on: us-east-2\n    container:\n      image: ghcr.io/neondatabase/build-tools:pinned-bookworm\n      credentials:\n        username: ${{ github.actor }}\n        password: ${{ secrets.GITHUB_TOKEN }}\n      options: --init\n\n    steps:\n      - name: Harden the runner (Audit all outbound calls)\n        uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0\n        with:\n          egress-policy: audit\n\n      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2\n        with:\n          submodules: true\n\n      - name: Patch the test\n        env:\n          PG_VERSION: ${{matrix.pg-version}}\n        run: |\n          cd \"vendor/postgres-v${PG_VERSION}\"\n          patch -p1 < \"../../compute/patches/cloud_regress_pg${PG_VERSION}.patch\"\n\n      - name: Generate a random password\n        id: pwgen\n        run: |\n          set +x\n          DBPASS=$(dd if=/dev/random bs=48 count=1 2>/dev/null | base64)\n          echo \"::add-mask::${DBPASS//\\//}\"\n          echo DBPASS=\"${DBPASS//\\//}\" >> \"${GITHUB_OUTPUT}\"\n\n      - name: Change tests according to the generated password\n        env:\n          DBPASS: ${{ steps.pwgen.outputs.DBPASS }}\n          PG_VERSION: ${{matrix.pg-version}}\n        run: |\n          cd vendor/postgres-v\"${PG_VERSION}\"/src/test/regress\n          for fname in sql/*.sql expected/*.out; do\n            sed -i.bak s/NEON_PASSWORD_PLACEHOLDER/\"'${DBPASS}'\"/ \"${fname}\"\n          done\n          for ph in $(grep NEON_MD5_PLACEHOLDER expected/password.out | awk '{print $3;}' | sort | uniq); do\n            USER=$(echo \"${ph}\" | cut -c 22-)\n            MD5=md5$(echo -n \"${DBPASS}${USER}\" | md5sum | awk '{print $1;}')\n            sed -i.bak \"s/${ph}/${MD5}/\" expected/password.out\n          done\n\n      - name: Download Neon artifact\n        uses: ./.github/actions/download\n        with:\n          name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact\n          path: /tmp/neon/\n          prefix: latest\n          aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}\n\n      - name: Create a new branch\n        id: create-branch\n        uses: ./.github/actions/neon-branch-create\n        with:\n          api_key: ${{ secrets.NEON_STAGING_API_KEY }}\n          project_id: ${{ vars[format('PGREGRESS_PG{0}_PROJECT_ID', matrix.pg-version)] }}\n\n      - name: Run the regression tests\n        uses: ./.github/actions/run-python-test-set\n        with:\n          build_type: ${{ env.BUILD_TYPE }}\n          test_selection: cloud_regress\n          pg_version: ${{matrix.pg-version}}\n          extra_params: -m remote_cluster\n          aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}\n        env:\n          BENCHMARK_CONNSTR: ${{steps.create-branch.outputs.dsn}}\n\n      - name: Delete branch\n        if: always()\n        uses: ./.github/actions/neon-branch-delete\n        with:\n          api_key: ${{ secrets.NEON_STAGING_API_KEY }}\n          project_id: ${{ vars[format('PGREGRESS_PG{0}_PROJECT_ID', matrix.pg-version)] }}\n          branch_id: ${{steps.create-branch.outputs.branch_id}}\n\n      - name: Create Allure report\n        id: create-allure-report\n        if: ${{ !cancelled() }}\n        uses: ./.github/actions/allure-report-generate\n        with:\n          aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}\n\n      - name: Post to a Slack channel\n        if: ${{ github.event.schedule && failure() }}\n        uses: slackapi/slack-github-action@fcfb566f8b0aab22203f066d80ca1d7e4b5d05b3 # v1.27.1\n        with:\n          channel-id: ${{ vars.SLACK_ON_CALL_QA_STAGING_STREAM }}\n          slack-message: |\n            Periodic pg_regress on staging: ${{ job.status }}\n            <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>\n            <${{ steps.create-allure-report.outputs.report-url }}|Allure report>\n        env:\n          SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}\n\n"
  },
  {
    "path": ".github/workflows/fast-forward.yml",
    "content": "name: Fast forward merge\non:\n  pull_request:\n    types: [labeled]\n    branches:\n      - release\n      - release-proxy\n      - release-compute\n\njobs:\n  fast-forward:\n    if: ${{ github.event.label.name == 'fast-forward' }}\n    runs-on: ubuntu-22.04\n\n    steps:\n      - name: Harden the runner (Audit all outbound calls)\n        uses: step-security/harden-runner@0634a2670c59f64b4a01f0f96f84700a4088b9f0 # v2.12.0\n        with:\n          egress-policy: audit\n\n      - name: Remove fast-forward label to PR\n        env:\n          GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}\n        run: |\n          gh pr edit ${{ github.event.pull_request.number }} --repo \"${GITHUB_REPOSITORY}\" --remove-label \"fast-forward\"\n\n      - name: Fast forwarding\n        uses: sequoia-pgp/fast-forward@ea7628bedcb0b0b96e94383ada458d812fca4979\n        # See https://docs.github.com/en/graphql/reference/enums#mergestatestatus\n        if: ${{ contains(fromJSON('[\"clean\", \"unstable\"]'), github.event.pull_request.mergeable_state) }}\n        with:\n          merge: true\n          comment: on-error\n          github_token: ${{ secrets.CI_ACCESS_TOKEN }}\n\n      - name: Comment if mergeable_state is not clean\n        if: ${{ !contains(fromJSON('[\"clean\", \"unstable\"]'), github.event.pull_request.mergeable_state) }}\n        env:\n          GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}\n        run: |\n          gh pr comment ${{ github.event.pull_request.number }} \\\n            --repo \"${GITHUB_REPOSITORY}\" \\\n            --body \"Not trying to forward pull-request, because \\`mergeable_state\\` is \\`${{ github.event.pull_request.mergeable_state }}\\`, not \\`clean\\` or \\`unstable\\`.\"\n"
  },
  {
    "path": ".github/workflows/force-test-extensions-upgrade.yml",
    "content": "name: Force Test Upgrading of Extension\non:\n  schedule:\n    # * is a special character in YAML so you have to quote this string\n    #          ┌───────────── minute (0 - 59)\n    #          │ ┌───────────── hour (0 - 23)\n    #          │ │ ┌───────────── day of the month (1 - 31)\n    #          │ │ │ ┌───────────── month (1 - 12 or JAN-DEC)\n    #          │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)\n    - cron:  '45 2 * * *' # run once a day, timezone is utc\n  workflow_dispatch: # adds ability to run this manually\n\ndefaults:\n  run:\n    shell: bash -euxo pipefail {0}\n\nconcurrency:\n  # Allow only one workflow\n  group: ${{ github.workflow }}\n  cancel-in-progress: true\n\npermissions:\n  id-token: write # aws-actions/configure-aws-credentials\n  statuses: write\n  contents: read\n\njobs:\n  regress:\n    strategy:\n      fail-fast: false\n      matrix:\n        pg-version: [16, 17]\n\n    runs-on: small\n\n    steps:\n      - name: Harden the runner (Audit all outbound calls)\n        uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0\n        with:\n          egress-policy: audit\n\n      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2\n        with:\n          submodules: false\n\n      - name: Get the last compute release tag\n        id: get-last-compute-release-tag\n        env:\n          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}\n        run: |\n          tag=$(gh api -q '[.[].tag_name | select(startswith(\"release-compute\"))][0]'\\\n            -H \"Accept: application/vnd.github+json\" \\\n            -H \"X-GitHub-Api-Version: 2022-11-28\" \\\n            \"/repos/${GITHUB_REPOSITORY}/releases\")\n          echo tag=${tag} >> ${GITHUB_OUTPUT}\n\n      - name: Test extension upgrade\n        timeout-minutes: 60\n        env:\n          NEW_COMPUTE_TAG: latest\n          OLD_COMPUTE_TAG: ${{ steps.get-last-compute-release-tag.outputs.tag }}\n          TEST_EXTENSIONS_TAG: ${{ steps.get-last-compute-release-tag.outputs.tag }}\n          PG_VERSION: ${{ matrix.pg-version }}\n          FORCE_ALL_UPGRADE_TESTS: true\n        run: ./docker-compose/test_extensions_upgrade.sh\n\n      - name: Print logs and clean up\n        if: always()\n        run: |\n          docker compose --profile test-extensions -f ./docker-compose/docker-compose.yml logs || true\n          docker compose --profile test-extensions -f ./docker-compose/docker-compose.yml down\n\n      - name: Post to the Slack channel\n        if: ${{ github.event.schedule && failure() }}\n        uses: slackapi/slack-github-action@fcfb566f8b0aab22203f066d80ca1d7e4b5d05b3 # v1.27.1\n        with:\n          channel-id: ${{ vars.SLACK_ON_CALL_QA_STAGING_STREAM }}\n          slack-message: |\n            Test upgrading of extensions: ${{ job.status }}\n            <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>\n        env:\n          SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}\n"
  },
  {
    "path": ".github/workflows/ingest_benchmark.yml",
    "content": "name: benchmarking ingest\n\non:\n  # uncomment to run on push for debugging your PR\n  # push:\n  #   branches: [ your branch ]\n  schedule:\n    # * is a special character in YAML so you have to quote this string\n    #          ┌───────────── minute (0 - 59)\n    #          │ ┌───────────── hour (0 - 23)\n    #          │ │ ┌───────────── day of the month (1 - 31)\n    #          │ │ │ ┌───────────── month (1 - 12 or JAN-DEC)\n    #          │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)\n    - cron:   '0 9 * * *' # run once a day, timezone is utc\n  workflow_dispatch: # adds ability to run this manually\n\ndefaults:\n  run:\n    shell: bash -euxo pipefail {0}\n\nconcurrency:\n  # Allow only one workflow globally because we need dedicated resources which only exist once\n  group: ingest-bench-workflow\n  cancel-in-progress: true\n\npermissions:\n  contents: read\n\njobs:\n  ingest:\n    strategy:\n      fail-fast: false # allow other variants to continue even if one fails\n      matrix:\n        include:\n          - target_project: new_empty_project_stripe_size_2048\n            stripe_size: 2048 # 16 MiB\n            postgres_version: 16\n            disable_sharding: false\n          - target_project: new_empty_project_stripe_size_32768\n            stripe_size: 32768 # 256 MiB # note that this is different from null because using null will shard_split the project only if it reaches the threshold\n                               # while here it is sharded from the beginning with a shard size of 256 MiB\n            disable_sharding: false\n            postgres_version: 16\n          - target_project: new_empty_project\n            stripe_size: null # run with neon defaults which will shard split only when reaching the threshold\n            disable_sharding: false\n            postgres_version: 16\n          - target_project: new_empty_project\n            stripe_size: null # run with neon defaults which will shard split only when reaching the threshold\n            disable_sharding: false\n            postgres_version: 17\n          - target_project: large_existing_project\n            stripe_size: null # cannot re-shared or choose different stripe size for existing, already sharded project\n            disable_sharding: false\n            postgres_version: 16\n          - target_project: new_empty_project_unsharded\n            stripe_size: null # run with neon defaults which will shard split only when reaching the threshold\n            disable_sharding: true\n            postgres_version: 16\n      max-parallel: 1 # we want to run each stripe size sequentially to be able to compare the results\n    permissions:\n      contents: write\n      statuses: write\n      id-token: write # aws-actions/configure-aws-credentials\n    env:\n      PG_CONFIG: /tmp/neon/pg_install/v16/bin/pg_config\n      PSQL: /tmp/neon/pg_install/v16/bin/psql\n      PG_16_LIB_PATH: /tmp/neon/pg_install/v16/lib\n      PGCOPYDB: /pgcopydb/bin/pgcopydb\n      PGCOPYDB_LIB_PATH: /pgcopydb/lib\n    runs-on: [ self-hosted, us-east-2, x64 ]\n    container:\n      image: ghcr.io/neondatabase/build-tools:pinned-bookworm\n      credentials:\n        username: ${{ github.actor }}\n        password: ${{ secrets.GITHUB_TOKEN }}\n      options: --init\n    timeout-minutes: 1440\n\n    steps:\n    - name: Harden the runner (Audit all outbound calls)\n      uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0\n      with:\n        egress-policy: audit\n\n    - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2\n\n    - name: Configure AWS credentials # necessary to download artefacts\n      uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # v4.0.2\n      with:\n        aws-region: eu-central-1\n        role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}\n        role-duration-seconds: 18000 # 5 hours is currently max associated with IAM role\n\n    - name: Download Neon artifact\n      uses: ./.github/actions/download\n      with:\n        name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact\n        path: /tmp/neon/\n        prefix: latest\n        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}\n\n    - name: Create Neon Project\n      if: ${{ startsWith(matrix.target_project, 'new_empty_project') }}\n      id: create-neon-project-ingest-target\n      uses: ./.github/actions/neon-project-create\n      with:\n        region_id: aws-us-east-2\n        postgres_version: ${{ matrix.postgres_version }}\n        compute_units: '[7, 7]' # we want to test large compute here to avoid compute-side bottleneck\n        api_key: ${{ secrets.NEON_STAGING_API_KEY }}\n        shard_split_project: ${{ matrix.stripe_size != null && 'true' || 'false' }}\n        admin_api_key: ${{ secrets.NEON_STAGING_ADMIN_API_KEY }}\n        shard_count: 8\n        stripe_size: ${{ matrix.stripe_size }}\n        disable_sharding: ${{ matrix.disable_sharding }}\n\n    - name: Initialize Neon project\n      if: ${{ startsWith(matrix.target_project, 'new_empty_project') }}\n      env:\n          BENCHMARK_INGEST_TARGET_CONNSTR: ${{ steps.create-neon-project-ingest-target.outputs.dsn }}\n          NEW_PROJECT_ID: ${{ steps.create-neon-project-ingest-target.outputs.project_id }}\n      run: |\n        echo \"Initializing Neon project with project_id: ${NEW_PROJECT_ID}\"\n        export LD_LIBRARY_PATH=${PG_16_LIB_PATH}\n        ${PSQL} \"${BENCHMARK_INGEST_TARGET_CONNSTR}\" -c \"CREATE EXTENSION IF NOT EXISTS neon; CREATE EXTENSION IF NOT EXISTS neon_utils;\"\n        echo \"BENCHMARK_INGEST_TARGET_CONNSTR=${BENCHMARK_INGEST_TARGET_CONNSTR}\" >> $GITHUB_ENV\n\n    - name: Create Neon Branch for large tenant\n      if: ${{ matrix.target_project == 'large_existing_project' }}\n      id: create-neon-branch-ingest-target\n      uses: ./.github/actions/neon-branch-create\n      with:\n        project_id: ${{ vars.BENCHMARK_INGEST_TARGET_PROJECTID }}\n        api_key: ${{ secrets.NEON_STAGING_API_KEY }}\n\n    - name: Initialize Neon project\n      if: ${{ matrix.target_project == 'large_existing_project' }}\n      env:\n          BENCHMARK_INGEST_TARGET_CONNSTR: ${{ steps.create-neon-branch-ingest-target.outputs.dsn }}\n          NEW_BRANCH_ID: ${{ steps.create-neon-branch-ingest-target.outputs.branch_id }}\n      run: |\n        echo \"Initializing Neon branch with branch_id: ${NEW_BRANCH_ID}\"\n        export LD_LIBRARY_PATH=${PG_16_LIB_PATH}\n        # Extract the part before the database name\n        base_connstr=\"${BENCHMARK_INGEST_TARGET_CONNSTR%/*}\"\n        # Extract the query parameters (if any) after the database name\n        query_params=\"${BENCHMARK_INGEST_TARGET_CONNSTR#*\\?}\"\n        # Reconstruct the new connection string\n        if [ \"$query_params\" != \"$BENCHMARK_INGEST_TARGET_CONNSTR\" ]; then\n          new_connstr=\"${base_connstr}/neondb?${query_params}\"\n        else\n          new_connstr=\"${base_connstr}/neondb\"\n        fi\n        ${PSQL} \"${new_connstr}\" -c \"drop database ludicrous;\"\n        ${PSQL} \"${new_connstr}\" -c \"CREATE DATABASE ludicrous;\"\n        if [ \"$query_params\" != \"$BENCHMARK_INGEST_TARGET_CONNSTR\" ]; then\n          BENCHMARK_INGEST_TARGET_CONNSTR=\"${base_connstr}/ludicrous?${query_params}\"\n        else\n          BENCHMARK_INGEST_TARGET_CONNSTR=\"${base_connstr}/ludicrous\"\n        fi\n        ${PSQL} \"${BENCHMARK_INGEST_TARGET_CONNSTR}\" -c \"CREATE EXTENSION IF NOT EXISTS neon; CREATE EXTENSION IF NOT EXISTS neon_utils;\"\n        echo \"BENCHMARK_INGEST_TARGET_CONNSTR=${BENCHMARK_INGEST_TARGET_CONNSTR}\" >> $GITHUB_ENV\n\n    - name: Invoke pgcopydb\n      uses: ./.github/actions/run-python-test-set\n      with:\n        build_type: remote\n        test_selection: performance/test_perf_ingest_using_pgcopydb.py\n        run_in_parallel: false\n        extra_params: -s -m remote_cluster --timeout 86400 -k test_ingest_performance_using_pgcopydb\n        pg_version: v${{ matrix.postgres_version }}\n        save_perf_report: true\n        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}\n      env:\n        BENCHMARK_INGEST_SOURCE_CONNSTR: ${{ secrets.BENCHMARK_INGEST_SOURCE_CONNSTR }}\n        TARGET_PROJECT_TYPE: ${{ matrix.target_project }}\n        # we report PLATFORM in zenbenchmark NeonBenchmarker perf database and want to distinguish between new project and large tenant\n        PLATFORM: \"${{ matrix.target_project }}-us-east-2-staging\"\n        PERF_TEST_RESULT_CONNSTR: \"${{ secrets.PERF_TEST_RESULT_CONNSTR }}\"\n\n    - name: show tables sizes after ingest\n      run: |\n        export LD_LIBRARY_PATH=${PG_16_LIB_PATH}\n        ${PSQL} \"${BENCHMARK_INGEST_TARGET_CONNSTR}\" -c \"\\dt+\"\n\n    - name: Delete Neon Project\n      if: ${{ always() && startsWith(matrix.target_project, 'new_empty_project') }}\n      uses: ./.github/actions/neon-project-delete\n      with:\n        project_id: ${{ steps.create-neon-project-ingest-target.outputs.project_id }}\n        api_key: ${{ secrets.NEON_STAGING_API_KEY }}\n\n    - name: Delete Neon Branch for large tenant\n      if: ${{ always() && matrix.target_project == 'large_existing_project' }}\n      uses: ./.github/actions/neon-branch-delete\n      with:\n        project_id: ${{ vars.BENCHMARK_INGEST_TARGET_PROJECTID }}\n        branch_id: ${{ steps.create-neon-branch-ingest-target.outputs.branch_id }}\n        api_key: ${{ secrets.NEON_STAGING_API_KEY }}\n"
  },
  {
    "path": ".github/workflows/label-for-external-users.yml",
    "content": "name: Add `external` label to issues and PRs created by external users\n\non:\n  issues:\n    types:\n      - opened\n  pull_request_target:\n    types:\n      - opened\n  workflow_dispatch:\n    inputs:\n      github-actor:\n        description: 'GitHub username. If empty, the username of the current user will be used'\n        required: false\n\n# No permission for GITHUB_TOKEN by default; the **minimal required** set of permissions should be granted in each job.\npermissions: {}\n\nenv:\n  LABEL: external\n\njobs:\n  check-user:\n    runs-on: ubuntu-22.04\n\n    outputs:\n      is-member: ${{ steps.check-user.outputs.is-member }}\n\n    steps:\n    - name: Harden the runner (Audit all outbound calls)\n      uses: step-security/harden-runner@0634a2670c59f64b4a01f0f96f84700a4088b9f0 # v2.12.0\n      with:\n        egress-policy: audit\n\n    - name: Check whether `${{ github.actor }}` is a member of `${{ github.repository_owner }}`\n      id: check-user\n      env:\n        GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}\n        ACTOR: ${{ inputs.github-actor || github.actor }}\n      run: |\n        expected_error=\"User does not exist or is not a member of the organization\"\n        output_file=output.txt\n\n        for i in $(seq 1 10); do\n          if gh api \"/orgs/${GITHUB_REPOSITORY_OWNER}/members/${ACTOR}\" \\\n              -H \"Accept: application/vnd.github+json\" \\\n              -H \"X-GitHub-Api-Version: 2022-11-28\" > ${output_file}; then\n\n            is_member=true\n            break\n          elif grep -q \"${expected_error}\" ${output_file}; then\n            is_member=false\n            break\n          elif [ $i -eq 10 ]; then\n            title=\"Failed to get memmbership status for ${ACTOR}\"\n            message=\"The latest GitHub API error message: '$(cat ${output_file})'\"\n            echo \"::error file=.github/workflows/label-for-external-users.yml,title=${title}::${message}\"\n\n            exit 1\n          fi\n\n          sleep 1\n        done\n\n        echo \"is-member=${is_member}\" | tee -a ${GITHUB_OUTPUT}\n\n  add-label:\n    if: needs.check-user.outputs.is-member == 'false'\n    needs: [ check-user ]\n\n    runs-on: ubuntu-22.04\n    permissions:\n      pull-requests: write # for `gh pr edit`\n      issues: write        # for `gh issue edit`\n\n    steps:\n    - name: Harden the runner (Audit all outbound calls)\n      uses: step-security/harden-runner@0634a2670c59f64b4a01f0f96f84700a4088b9f0 # v2.12.0\n      with:\n        egress-policy: audit\n\n    - name: Add `${{ env.LABEL }}` label\n      env:\n        GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}\n        ITEM_NUMBER: ${{ github.event[github.event_name == 'pull_request_target' && 'pull_request' || 'issue'].number }}\n        GH_CLI_COMMAND: ${{ github.event_name == 'pull_request_target' && 'pr' || 'issue' }}\n      run: |\n        gh ${GH_CLI_COMMAND} --repo ${GITHUB_REPOSITORY} edit --add-label=${LABEL} ${ITEM_NUMBER}\n"
  },
  {
    "path": ".github/workflows/large_oltp_benchmark.yml",
    "content": "name: large oltp benchmark\n\non:\n  # uncomment to run on push for debugging your PR\n  #push:\n  #  branches: [ bodobolero/synthetic_oltp_workload ]\n\n  schedule:\n    # * is a special character in YAML so you have to quote this string\n    #          ┌───────────── minute (0 - 59)\n    #          │ ┌───────────── hour (0 - 23)\n    #          │ │  ┌───────────── day of the month (1 - 31)\n    #          │ │  │ ┌───────────── month (1 - 12 or JAN-DEC)\n    #          │ │  │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)\n    - cron:   '0 15 * * 0,2,4' # run on Sunday, Tuesday, Thursday at 3 PM UTC\n  workflow_dispatch: # adds ability to run this manually\n\ndefaults:\n  run:\n    shell: bash -euxo pipefail {0}\n\nconcurrency:\n  # Allow only one workflow globally because we need dedicated resources which only exist once\n  group: large-oltp-bench-workflow\n  cancel-in-progress: false\n\npermissions:\n  contents: read\n\njobs:\n  oltp:\n    strategy:\n      fail-fast: false # allow other variants to continue even if one fails\n      matrix:\n        include:\n          # test only read-only custom scripts in new branch without database maintenance\n          - target: new_branch\n            custom_scripts: select_any_webhook_with_skew.sql@300 select_recent_webhook.sql@397 select_prefetch_webhook.sql@3\n            test_maintenance: false\n          # test all custom scripts in new branch with database maintenance\n          - target: new_branch\n            custom_scripts: insert_webhooks.sql@200 select_any_webhook_with_skew.sql@300 select_recent_webhook.sql@397 select_prefetch_webhook.sql@3 IUD_one_transaction.sql@100\n            test_maintenance: true\n          # test all custom scripts in reuse branch with database maintenance\n          - target: reuse_branch\n            custom_scripts: insert_webhooks.sql@200 select_any_webhook_with_skew.sql@300 select_recent_webhook.sql@397 select_prefetch_webhook.sql@3 IUD_one_transaction.sql@100\n            test_maintenance: true\n      max-parallel: 1 # we want to run each benchmark sequentially to not have noisy neighbors on shared storage (PS, SK)\n    permissions:\n      contents: write\n      statuses: write\n      id-token: write # aws-actions/configure-aws-credentials\n    env:\n      TEST_PG_BENCH_DURATIONS_MATRIX: \"1h\" # todo update to > 1 h\n      TEST_PGBENCH_CUSTOM_SCRIPTS: ${{ matrix.custom_scripts }}\n      POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install\n      PG_VERSION: 16 # pre-determined by pre-determined project\n      TEST_OUTPUT: /tmp/test_output\n      BUILD_TYPE: remote\n      PLATFORM: ${{ matrix.target }}\n\n    runs-on: [ self-hosted, us-east-2, x64 ]\n    container:\n      image: ghcr.io/neondatabase/build-tools:pinned-bookworm\n      credentials:\n        username: ${{ github.actor }}\n        password: ${{ secrets.GITHUB_TOKEN }}\n      options: --init\n\n    # Increase timeout to 2 days, default timeout is 6h - database maintenance can take a long time\n    # (normally 1h pgbench, 3h vacuum analyze 3.5h re-index) x 2 = 15h, leave some buffer for regressions\n    # in one run vacuum didn't finish within 12 hours\n    timeout-minutes: 2880\n\n    steps:\n    - name: Harden the runner (Audit all outbound calls)\n      uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0\n      with:\n        egress-policy: audit\n\n    - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2\n\n    - name: Configure AWS credentials # necessary to download artefacts\n      uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # v4.0.2\n      with:\n        aws-region: eu-central-1\n        role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}\n        role-duration-seconds: 18000 # 5 hours is currently max associated with IAM role\n\n    - name: Download Neon artifact\n      uses: ./.github/actions/download\n      with:\n        name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact\n        path: /tmp/neon/\n        prefix: latest\n        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}\n\n    - name: Create Neon Branch for large tenant\n      if: ${{ matrix.target == 'new_branch' }}\n      id: create-neon-branch-oltp-target\n      uses: ./.github/actions/neon-branch-create\n      with:\n          project_id: ${{ vars.BENCHMARK_LARGE_OLTP_PROJECTID }}\n          api_key: ${{ secrets.NEON_STAGING_API_KEY }}\n\n    - name: Set up Connection String\n      id: set-up-connstr\n      run: |\n        case \"${{ matrix.target }}\" in\n          new_branch)\n          CONNSTR=${{ steps.create-neon-branch-oltp-target.outputs.dsn }}\n          ;;\n          reuse_branch)\n          CONNSTR=${{ secrets.BENCHMARK_LARGE_OLTP_REUSE_CONNSTR }}\n          ;;\n          *)\n          echo >&2 \"Unknown target=${{ matrix.target }}\"\n          exit 1\n          ;;\n        esac\n\n        CONNSTR_WITHOUT_POOLER=\"${CONNSTR//-pooler/}\"\n\n        echo \"connstr=${CONNSTR}\" >> $GITHUB_OUTPUT\n        echo \"connstr_without_pooler=${CONNSTR_WITHOUT_POOLER}\" >> $GITHUB_OUTPUT\n\n    - name: Delete rows from prior runs in reuse branch\n      if: ${{ matrix.target == 'reuse_branch' }}\n      env:\n          BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr_without_pooler }}\n          PG_CONFIG: /tmp/neon/pg_install/v16/bin/pg_config\n          PSQL: /tmp/neon/pg_install/v16/bin/psql\n          PG_16_LIB_PATH: /tmp/neon/pg_install/v16/lib\n      run: |\n        echo \"$(date '+%Y-%m-%d %H:%M:%S') - Deleting rows in table webhook.incoming_webhooks from prior runs\"\n        export LD_LIBRARY_PATH=${PG_16_LIB_PATH}\n        ${PSQL} \"${BENCHMARK_CONNSTR}\" -c \"SET statement_timeout = 0; DELETE FROM webhook.incoming_webhooks WHERE created_at > '2025-02-27 23:59:59+00';\"\n        echo \"$(date '+%Y-%m-%d %H:%M:%S') - Finished deleting rows in table webhook.incoming_webhooks from prior runs\"\n\n    - name: Benchmark pgbench with custom-scripts\n      uses: ./.github/actions/run-python-test-set\n      with:\n        build_type: ${{ env.BUILD_TYPE }}\n        test_selection: performance\n        run_in_parallel: false\n        save_perf_report: true\n        extra_params: -m remote_cluster --timeout 7200 -k test_perf_oltp_large_tenant_pgbench\n        pg_version: ${{ env.PG_VERSION }}\n        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}\n      env:\n        BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}\n        VIP_VAP_ACCESS_TOKEN: \"${{ secrets.VIP_VAP_ACCESS_TOKEN }}\"\n        PERF_TEST_RESULT_CONNSTR: \"${{ secrets.PERF_TEST_RESULT_CONNSTR }}\"\n\n    - name: Benchmark database maintenance\n      if: ${{ matrix.test_maintenance }}\n      uses: ./.github/actions/run-python-test-set\n      with:\n        build_type: ${{ env.BUILD_TYPE }}\n        test_selection: performance\n        run_in_parallel: false\n        save_perf_report: true\n        extra_params: -m remote_cluster --timeout 172800 -k test_perf_oltp_large_tenant_maintenance\n        pg_version: ${{ env.PG_VERSION }}\n        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}\n      env:\n        BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr_without_pooler }}\n        VIP_VAP_ACCESS_TOKEN: \"${{ secrets.VIP_VAP_ACCESS_TOKEN }}\"\n        PERF_TEST_RESULT_CONNSTR: \"${{ secrets.PERF_TEST_RESULT_CONNSTR }}\"\n\n    - name: Delete Neon Branch for large tenant\n      if: ${{ always() && matrix.target == 'new_branch' }}\n      uses: ./.github/actions/neon-branch-delete\n      with:\n        project_id: ${{ vars.BENCHMARK_LARGE_OLTP_PROJECTID }}\n        branch_id: ${{ steps.create-neon-branch-oltp-target.outputs.branch_id }}\n        api_key: ${{ secrets.NEON_STAGING_API_KEY }}\n\n    - name: Configure AWS credentials # again because prior steps could have exceeded 5 hours\n      uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # v4.0.2\n      with:\n        aws-region: eu-central-1\n        role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}\n        role-duration-seconds: 18000 # 5 hours\n\n    - name: Create Allure report\n      id: create-allure-report\n      if: ${{ !cancelled() }}\n      uses: ./.github/actions/allure-report-generate\n      with:\n        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}\n\n    - name: Post to a Slack channel\n      if: ${{ github.event.schedule && failure() }}\n      uses: slackapi/slack-github-action@fcfb566f8b0aab22203f066d80ca1d7e4b5d05b3 # v1.27.1\n      with:\n        channel-id: \"C06KHQVQ7U3\" # on-call-qa-staging-stream\n        slack-message: |\n          Periodic large oltp perf testing: ${{ job.status }}\n          <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>\n          <${{ steps.create-allure-report.outputs.report-url }}|Allure report>\n      env:\n        SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}\n"
  },
  {
    "path": ".github/workflows/large_oltp_growth.yml",
    "content": "name: large oltp growth\n# workflow to grow the reuse branch of large oltp benchmark continuously (about 16 GB per run)\n\non:\n\n  schedule:\n    # * is a special character in YAML so you have to quote this string\n    #        ┌───────────── minute (0 - 59)\n    #        │ ┌───────────── hour (0 - 23)\n    #        │ │  ┌───────────── day of the month (1 - 31)\n    #        │ │  │ ┌───────────── month (1 - 12 or JAN-DEC)\n    #        │ │  │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)\n    - cron: '0 6 * * *'   # 06:00 UTC\n    - cron: '0 8 * * *'   # 08:00 UTC\n    - cron: '0 10 * * *'  # 10:00 UTC\n    - cron: '0 12 * * *'  # 12:00 UTC\n    - cron: '0 14 * * *'  # 14:00 UTC\n    - cron: '0 16 * * *'  # 16:00 UTC\n  workflow_dispatch: # adds ability to run this manually\n\ndefaults:\n  run:\n    shell: bash -euxo pipefail {0}\n\nconcurrency:\n  # Allow only one workflow globally because we need dedicated resources which only exist once\n  group: large-oltp-growth\n  cancel-in-progress: true\n\npermissions:\n  contents: read\n\njobs:\n  oltp:\n    strategy:\n      fail-fast: false # allow other variants to continue even if one fails\n      matrix:\n        include:\n          # for now only grow the reuse branch, not the other branches.\n          - target: reuse_branch\n            custom_scripts:\n            - grow_action_blocks.sql\n            - grow_action_kwargs.sql\n            - grow_device_fingerprint_event.sql\n            - grow_edges.sql\n            - grow_hotel_rate_mapping.sql\n            - grow_ocr_pipeline_results_version.sql\n            - grow_priceline_raw_response.sql\n            - grow_relabled_transactions.sql\n            - grow_state_values.sql\n            - grow_values.sql\n            - grow_vertices.sql\n            - update_accounting_coding_body_tracking_category_selection.sql\n            - update_action_blocks.sql\n            - update_action_kwargs.sql\n            - update_denormalized_approval_workflow.sql\n            - update_device_fingerprint_event.sql\n            - update_edges.sql\n            - update_heron_transaction_enriched_log.sql\n            - update_heron_transaction_enrichment_requests.sql\n            - update_hotel_rate_mapping.sql\n            - update_incoming_webhooks.sql\n            - update_manual_transaction.sql\n            - update_ml_receipt_matching_log.sql\n            - update_ocr_pipeine_results_version.sql\n            - update_orc_pipeline_step_results.sql\n            - update_orc_pipeline_step_results_version.sql\n            - update_priceline_raw_response.sql\n            - update_quickbooks_transactions.sql\n            - update_raw_finicity_transaction.sql\n            - update_relabeled_transactions.sql\n            - update_state_values.sql\n            - update_stripe_authorization_event_log.sql\n            - update_transaction.sql\n            - update_values.sql\n            - update_vertices.sql\n      max-parallel: 1 # we want to run each growth workload sequentially (for now there is just one)\n    permissions:\n      contents: write\n      statuses: write\n      id-token: write # aws-actions/configure-aws-credentials\n    env:\n      TEST_PG_BENCH_DURATIONS_MATRIX: \"1h\"\n      TEST_PGBENCH_CUSTOM_SCRIPTS: ${{ join(matrix.custom_scripts, ' ') }}\n      POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install\n      PG_VERSION: 16 # pre-determined by pre-determined project\n      TEST_OUTPUT: /tmp/test_output\n      BUILD_TYPE: remote\n      PLATFORM: ${{ matrix.target }}\n\n    runs-on: [ self-hosted, us-east-2, x64 ]\n    container:\n      image: ghcr.io/neondatabase/build-tools:pinned-bookworm\n      credentials:\n        username: ${{ github.actor }}\n        password: ${{ secrets.GITHUB_TOKEN }}\n      options: --init\n\n    steps:\n    - name: Harden the runner (Audit all outbound calls)\n      uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0\n      with:\n        egress-policy: audit\n\n    - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2\n\n    - name: Configure AWS credentials # necessary to download artefacts\n      uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # v4.0.2\n      with:\n        aws-region: eu-central-1\n        role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}\n        role-duration-seconds: 18000 # 5 hours is currently max associated with IAM role\n\n    - name: Download Neon artifact\n      uses: ./.github/actions/download\n      with:\n        name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact\n        path: /tmp/neon/\n        prefix: latest\n        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}\n\n    - name: Set up Connection String\n      id: set-up-connstr\n      run: |\n        case \"${{ matrix.target }}\" in\n          reuse_branch)\n          CONNSTR=${{ secrets.BENCHMARK_LARGE_OLTP_REUSE_CONNSTR }}\n          ;;\n          *)\n          echo >&2 \"Unknown target=${{ matrix.target }}\"\n          exit 1\n          ;;\n        esac\n\n        CONNSTR_WITHOUT_POOLER=\"${CONNSTR//-pooler/}\"\n\n        echo \"connstr=${CONNSTR}\" >> $GITHUB_OUTPUT\n        echo \"connstr_without_pooler=${CONNSTR_WITHOUT_POOLER}\" >> $GITHUB_OUTPUT\n\n    - name: pgbench with custom-scripts\n      uses: ./.github/actions/run-python-test-set\n      with:\n        build_type: ${{ env.BUILD_TYPE }}\n        test_selection: performance\n        run_in_parallel: false\n        save_perf_report: true\n        extra_params: -m remote_cluster --timeout 7200 -k test_perf_oltp_large_tenant_growth\n        pg_version: ${{ env.PG_VERSION }}\n        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}\n      env:\n        BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}\n        VIP_VAP_ACCESS_TOKEN: \"${{ secrets.VIP_VAP_ACCESS_TOKEN }}\"\n        PERF_TEST_RESULT_CONNSTR: \"${{ secrets.PERF_TEST_RESULT_CONNSTR }}\"\n\n    - name: Create Allure report\n      id: create-allure-report\n      if: ${{ !cancelled() }}\n      uses: ./.github/actions/allure-report-generate\n      with:\n        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}\n\n    - name: Post to a Slack channel\n      if: ${{ github.event.schedule && failure() }}\n      uses: slackapi/slack-github-action@fcfb566f8b0aab22203f066d80ca1d7e4b5d05b3 # v1.27.1\n      with:\n        channel-id: \"C06KHQVQ7U3\" # on-call-qa-staging-stream\n        slack-message: |\n          Periodic large oltp tenant growth increase: ${{ job.status }}\n          <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>\n          <${{ steps.create-allure-report.outputs.report-url }}|Allure report>\n      env:\n        SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}\n"
  },
  {
    "path": ".github/workflows/lint-release-pr.yml",
    "content": "name: Lint Release PR\n\non:\n  pull_request:\n    branches:\n      - release\n      - release-proxy\n      - release-compute\n\npermissions:\n  contents: read\n\njobs:\n  lint-release-pr:\n    runs-on: ubuntu-22.04\n    steps:\n      - name: Harden the runner (Audit all outbound calls)\n        uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0\n        with:\n          egress-policy: audit\n\n      - name: Checkout PR branch\n        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2\n        with:\n          fetch-depth: 0  # Fetch full history for git operations\n          ref: ${{ github.event.pull_request.head.ref }}\n\n      - name: Run lint script\n        env:\n          RELEASE_BRANCH: ${{ github.base_ref }}\n        run: |\n          ./.github/scripts/lint-release-pr.sh\n"
  },
  {
    "path": ".github/workflows/neon_extra_builds.yml",
    "content": "name: Check neon with extra platform builds\n\non:\n  push:\n    branches:\n      - main\n  pull_request:\n\ndefaults:\n  run:\n    shell: bash -euxo pipefail {0}\n\nconcurrency:\n  # Allow only one workflow per any non-`main` branch.\n  group: ${{ github.workflow }}-${{ github.ref_name }}-${{ github.ref_name == 'main' && github.sha || 'anysha' }}\n  cancel-in-progress: true\n\nenv:\n  RUST_BACKTRACE: 1\n  COPT: '-Werror'\n\njobs:\n  check-permissions:\n    if: ${{ !contains(github.event.pull_request.labels.*.name, 'run-no-ci') }}\n    uses: ./.github/workflows/check-permissions.yml\n    with:\n      github-event-name: ${{ github.event_name}}\n\n  build-build-tools-image:\n    needs: [ check-permissions ]\n    uses: ./.github/workflows/build-build-tools-image.yml\n    secrets: inherit\n\n  files-changed:\n    name: Detect what files changed\n    runs-on: ubuntu-22.04\n    timeout-minutes: 3\n    outputs:\n      v17: ${{ steps.files_changed.outputs.v17 }}\n      postgres_changes: ${{ steps.postgres_changes.outputs.changes }}\n      rebuild_rust_code: ${{ steps.files_changed.outputs.rust_code }}\n      rebuild_everything: ${{ steps.files_changed.outputs.rebuild_neon_extra || steps.files_changed.outputs.rebuild_macos }}\n\n    steps:\n      - name: Harden the runner (Audit all outbound calls)\n        uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0\n        with:\n          egress-policy: audit\n\n      - name: Checkout\n        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2\n        with:\n          submodules: true\n\n      - name: Check for Postgres changes\n        uses: dorny/paths-filter@1441771bbfdd59dcd748680ee64ebd8faab1a242  #v3\n        id: files_changed\n        with:\n          token: ${{ github.token }}\n          filters: .github/file-filters.yaml\n          base: ${{ github.event_name != 'pull_request' && (github.event.merge_group.base_ref || github.ref_name) || '' }}\n          ref: ${{ github.event_name != 'pull_request' && (github.event.merge_group.head_ref || github.ref) || '' }}\n\n      - name: Filter out only v-string for build matrix\n        id: postgres_changes\n        env:\n          CHANGES: ${{ steps.files_changed.outputs.changes }}\n        run: |\n          v_strings_only_as_json_array=$(echo ${CHANGES} | jq '.[]|select(test(\"v\\\\d+\"))' | jq --slurp -c)\n          echo \"changes=${v_strings_only_as_json_array}\" | tee -a \"${GITHUB_OUTPUT}\"\n\n  check-macos-build:\n    needs: [ check-permissions, files-changed ]\n    uses: ./.github/workflows/build-macos.yml\n    secrets: inherit\n    with:\n      pg_versions: ${{ needs.files-changed.outputs.postgres_changes }}\n      rebuild_rust_code: ${{ fromJSON(needs.files-changed.outputs.rebuild_rust_code) }}\n      rebuild_everything: ${{ fromJSON(needs.files-changed.outputs.rebuild_everything) }}\n\n  gather-rust-build-stats:\n    needs: [ check-permissions, build-build-tools-image, files-changed ]\n    permissions:\n      id-token: write # aws-actions/configure-aws-credentials\n      statuses: write\n      contents: write\n    if: |\n      (needs.files-changed.outputs.v17 == 'true' || needs.files-changed.outputs.rebuild_everything == 'true') && (\n        contains(github.event.pull_request.labels.*.name, 'run-extra-build-stats') ||\n        contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||\n        github.ref_name == 'main'\n      )\n    runs-on: [ self-hosted, large ]\n    container:\n      image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm\n      credentials:\n        username: ${{ github.actor }}\n        password: ${{ secrets.GITHUB_TOKEN }}\n      options: --init\n\n    env:\n      BUILD_TYPE: release\n      # build with incremental compilation produce partial results\n      # so do not attempt to cache this build, also disable the incremental compilation\n      CARGO_INCREMENTAL: 0\n\n    steps:\n      - name: Harden the runner (Audit all outbound calls)\n        uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0\n        with:\n          egress-policy: audit\n\n      - name: Checkout\n        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2\n        with:\n          submodules: true\n\n      # Some of our rust modules use FFI and need those to be checked\n      - name: Get postgres headers\n        run: make postgres-headers -j$(nproc)\n\n      - name: Build walproposer-lib\n        run: make walproposer-lib -j$(nproc)\n\n      - name: Produce the build stats\n        run: cargo build --all --release --timings -j$(nproc)\n\n      - name: Configure AWS credentials\n        uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # v4.0.2\n        with:\n          aws-region: eu-central-1\n          role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}\n          role-duration-seconds: 3600\n\n      - name: Upload the build stats\n        id: upload-stats\n        env:\n          BUCKET: neon-github-public-dev\n          SHA: ${{ github.event.pull_request.head.sha || github.sha }}\n        run: |\n          REPORT_URL=https://${BUCKET}.s3.amazonaws.com/build-stats/${SHA}/${GITHUB_RUN_ID}/cargo-timing.html\n          aws s3 cp --only-show-errors ./target/cargo-timings/cargo-timing.html \"s3://${BUCKET}/build-stats/${SHA}/${GITHUB_RUN_ID}/\"\n          echo \"report-url=${REPORT_URL}\" >> $GITHUB_OUTPUT\n\n      - name: Publish build stats report\n        uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1\n        env:\n          REPORT_URL: ${{ steps.upload-stats.outputs.report-url }}\n          SHA: ${{ github.event.pull_request.head.sha || github.sha }}\n        with:\n          # Retry script for 5XX server errors: https://github.com/actions/github-script#retries\n          retries: 5\n          script: |\n            const { REPORT_URL, SHA } = process.env\n\n            await github.rest.repos.createCommitStatus({\n              owner: context.repo.owner,\n              repo: context.repo.repo,\n              sha: `${SHA}`,\n              state: 'success',\n              target_url: `${REPORT_URL}`,\n              context: `Build stats (release)`,\n            })\n"
  },
  {
    "path": ".github/workflows/periodic_pagebench.yml",
    "content": "name: Periodic pagebench performance test on unit-perf-aws-arm runners\n\non:\n  schedule:\n    # * is a special character in YAML so you have to quote this string\n    #        ┌───────────── minute (0 - 59)\n    #        │   ┌───────────── hour (0 - 23)\n    #        │   │ ┌───────────── day of the month (1 - 31)\n    #        │   │ │ ┌───────────── month (1 - 12 or JAN-DEC)\n    #        │   │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)\n    - cron: '0 */4 * * *' # Runs every 4 hours\n  workflow_dispatch: # Allows manual triggering of the workflow\n    inputs:\n      commit_hash:\n        type: string\n        description: 'The long neon repo commit hash for the system under test (pageserver) to be tested.'\n        required: false\n        default: ''\n      recreate_snapshots:\n        type: boolean\n        description: 'Recreate snapshots - !!!WARNING!!! We should only recreate snapshots if the previous ones are no longer compatible. Otherwise benchmarking results are not comparable across runs.'\n        required: false\n        default: false\n\ndefaults:\n  run:\n    shell: bash -euo pipefail {0}\n\nconcurrency:\n  group: ${{ github.workflow }}\n  cancel-in-progress: false\n\npermissions:\n  contents: read\n\njobs:\n  run_periodic_pagebench_test:\n    permissions:\n      id-token: write # aws-actions/configure-aws-credentials\n      statuses: write\n      contents: write\n      pull-requests: write\n    runs-on: [ self-hosted, unit-perf-aws-arm ]\n    container:\n      image: ghcr.io/neondatabase/build-tools:pinned-bookworm\n      credentials:\n        username: ${{ github.actor }}\n        password: ${{ secrets.GITHUB_TOKEN }}\n      options: --init\n    timeout-minutes: 360  # Set the timeout to 6 hours\n    env:\n      RUN_ID: ${{ github.run_id }}\n      DEFAULT_PG_VERSION: 16\n      BUILD_TYPE: release\n      RUST_BACKTRACE: 1\n      # NEON_ENV_BUILDER_USE_OVERLAYFS_FOR_SNAPSHOTS: 1 - doesn't work without root in container\n      S3_BUCKET: neon-github-public-dev\n      PERF_TEST_RESULT_CONNSTR: \"${{ secrets.PERF_TEST_RESULT_CONNSTR }}\"\n    steps:\n    # we don't need the neon source code because we run everything remotely\n    # however we still need the local github actions to run the allure step below\n    - name: Harden the runner (Audit all outbound calls)\n      uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0\n      with:\n        egress-policy: audit\n\n    - name: Set up the environment which depends on $RUNNER_TEMP on nvme drive\n      id: set-env\n      shell: bash -euxo pipefail {0}\n      run: |\n        {\n          echo \"NEON_DIR=${RUNNER_TEMP}/neon\"\n          echo \"NEON_BIN=${RUNNER_TEMP}/neon/bin\"\n          echo \"POSTGRES_DISTRIB_DIR=${RUNNER_TEMP}/neon/pg_install\"\n          echo \"LD_LIBRARY_PATH=${RUNNER_TEMP}/neon/pg_install/v${DEFAULT_PG_VERSION}/lib\"\n          echo \"BACKUP_DIR=${RUNNER_TEMP}/instance_store/saved_snapshots\"\n          echo \"TEST_OUTPUT=${RUNNER_TEMP}/neon/test_output\"\n          echo \"PERF_REPORT_DIR=${RUNNER_TEMP}/neon/test_output/perf-report-local\"\n          echo \"ALLURE_DIR=${RUNNER_TEMP}/neon/test_output/allure-results\"\n          echo \"ALLURE_RESULTS_DIR=${RUNNER_TEMP}/neon/test_output/allure-results/results\"\n        } >> \"$GITHUB_ENV\"\n\n        echo \"allure_results_dir=${RUNNER_TEMP}/neon/test_output/allure-results/results\" >> \"$GITHUB_OUTPUT\"\n\n    - uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # v4.0.2\n      with:\n        aws-region: eu-central-1\n        role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}\n        role-duration-seconds: 18000 # max 5 hours (needed in case commit hash is still being built)\n    - name: Determine commit hash\n      id: commit_hash\n      shell: bash -euxo pipefail {0}\n      env:\n        INPUT_COMMIT_HASH: ${{ github.event.inputs.commit_hash }}\n      run: |\n        if [[ -z \"${INPUT_COMMIT_HASH}\" ]]; then\n          COMMIT_HASH=$(curl -s https://api.github.com/repos/neondatabase/neon/commits/main | jq -r '.sha')\n          echo \"COMMIT_HASH=$COMMIT_HASH\" >> $GITHUB_ENV\n          echo \"commit_hash=$COMMIT_HASH\" >> \"$GITHUB_OUTPUT\"\n          echo \"COMMIT_HASH_TYPE=latest\" >> $GITHUB_ENV\n        else\n          COMMIT_HASH=\"${INPUT_COMMIT_HASH}\"\n          echo \"COMMIT_HASH=$COMMIT_HASH\" >> $GITHUB_ENV\n          echo \"commit_hash=$COMMIT_HASH\" >> \"$GITHUB_OUTPUT\"\n          echo \"COMMIT_HASH_TYPE=manual\" >> $GITHUB_ENV\n        fi\n    - name: Checkout the neon repository at given commit hash\n      uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2\n      with:\n        ref: ${{ steps.commit_hash.outputs.commit_hash }}\n\n    # does not reuse ./.github/actions/download because we need to download the artifact for the given commit hash\n    # example artifact\n    # s3://neon-github-public-dev/artifacts/48b870bc078bd2c450eb7b468e743b9c118549bf/15036827400/1/neon-Linux-X64-release-artifact.tar.zst /instance_store/artifacts/neon-Linux-release-artifact.tar.zst\n    - name: Determine artifact S3_KEY for given commit hash and download and extract artifact\n      id: artifact_prefix\n      shell: bash -euxo pipefail {0}\n      env:\n        ARCHIVE: ${{ runner.temp }}/downloads/neon-${{ runner.os }}-${{ runner.arch }}-release-artifact.tar.zst\n        COMMIT_HASH: ${{ env.COMMIT_HASH }}\n        COMMIT_HASH_TYPE: ${{ env.COMMIT_HASH_TYPE }}\n      run: |\n        attempt=0\n        max_attempts=24 # 5 minutes * 24 = 2 hours\n\n        while [[ $attempt -lt $max_attempts ]]; do\n          # the following command will fail until the artifacts are available ...\n          S3_KEY=$(aws s3api list-objects-v2 --bucket \"$S3_BUCKET\" --prefix \"artifacts/$COMMIT_HASH/\" \\\n            | jq -r '.Contents[]?.Key' \\\n            | grep \"neon-${{ runner.os }}-${{ runner.arch }}-release-artifact.tar.zst\" \\\n            | sort --version-sort \\\n            | tail -1) || true # ... thus ignore errors from the command\n          if [[ -n \"${S3_KEY}\" ]]; then\n            echo \"Artifact found: $S3_KEY\"\n            echo \"S3_KEY=$S3_KEY\" >> $GITHUB_ENV\n            break\n          fi\n          \n          # Increment attempt counter and sleep for 5 minutes\n          attempt=$((attempt + 1))\n          echo \"Attempt $attempt of $max_attempts to find artifacts in S3 bucket s3://$S3_BUCKET/artifacts/$COMMIT_HASH failed. Retrying in 5 minutes...\"\n          sleep 300 # Sleep for 5 minutes\n        done\n\n        if [[ -z \"${S3_KEY}\" ]]; then\n          echo \"Error: artifact not found in S3 bucket s3://$S3_BUCKET/artifacts/$COMMIT_HASH\" after 2 hours\n        else\n          mkdir -p $(dirname $ARCHIVE)\n          time aws s3 cp --only-show-errors s3://$S3_BUCKET/${S3_KEY} ${ARCHIVE}\n          mkdir -p ${NEON_DIR}\n          time tar -xf ${ARCHIVE} -C ${NEON_DIR}\n          rm -f ${ARCHIVE}\n        fi\n\n    - name: Download snapshots from S3\n      if: ${{ github.event_name != 'workflow_dispatch' || github.event.inputs.recreate_snapshots == 'false' || github.event.inputs.recreate_snapshots == '' }}\n      id: download_snapshots\n      shell: bash -euxo pipefail {0}\n      run: |\n        # Download the snapshots from S3\n        mkdir -p ${TEST_OUTPUT}\n        mkdir -p $BACKUP_DIR\n        cd $BACKUP_DIR\n        mkdir parts\n        cd parts\n        PART=$(aws s3api list-objects-v2 --bucket $S3_BUCKET --prefix performance/pagebench/ \\\n          | jq -r '.Contents[]?.Key' \\\n          | grep -E 'shared-snapshots-[0-9]{4}-[0-9]{2}-[0-9]{2}' \\\n          | sort \\\n          | tail -1)\n        echo \"Latest PART: $PART\"\n        if [[ -z \"$PART\" ]]; then\n          echo \"ERROR: No matching S3 key found\" >&2\n          exit 1\n        fi\n        S3_KEY=$(dirname $PART)\n        time aws s3 cp --only-show-errors --recursive s3://${S3_BUCKET}/$S3_KEY/ .\n        cd $TEST_OUTPUT\n        time cat $BACKUP_DIR/parts/* | zstdcat | tar --extract --preserve-permissions\n        rm -rf ${BACKUP_DIR}\n\n    - name: Cache poetry deps\n      uses: actions/cache@v4\n      with:\n        path: ~/.cache/pypoetry/virtualenvs\n        key: v2-${{ runner.os }}-${{ runner.arch }}-python-deps-bookworm-${{ hashFiles('poetry.lock') }}\n\n    - name: Install Python deps\n      shell: bash -euxo pipefail {0}\n      run: ./scripts/pysync\n\n    # we need high number of open files for pagebench\n    - name: show ulimits\n      shell: bash -euxo pipefail {0}\n      run: |\n        ulimit -a\n\n    - name: Run pagebench testcase\n      shell: bash -euxo pipefail {0}\n      env:\n        CI: false  # need to override this env variable set by github to enforce using snapshots\n      run: |\n        export PLATFORM=hetzner-unit-perf-${COMMIT_HASH_TYPE}\n        # report the commit hash of the neon repository in the revision of the test results\n        export GITHUB_SHA=${COMMIT_HASH}\n        rm -rf ${PERF_REPORT_DIR}\n        rm -rf ${ALLURE_RESULTS_DIR}\n        mkdir -p ${PERF_REPORT_DIR}\n        mkdir -p ${ALLURE_RESULTS_DIR}\n        PARAMS=\"--alluredir=${ALLURE_RESULTS_DIR} --tb=short --verbose -rA\"\n        EXTRA_PARAMS=\"--out-dir ${PERF_REPORT_DIR} --durations-path $TEST_OUTPUT/benchmark_durations.json\"\n        # run only two selected tests\n        # environment set by parent:\n        # RUST_BACKTRACE=1 DEFAULT_PG_VERSION=16 BUILD_TYPE=release\n        ./scripts/pytest ${PARAMS} test_runner/performance/pageserver/pagebench/test_pageserver_max_throughput_getpage_at_latest_lsn.py::test_pageserver_characterize_throughput_with_n_tenants ${EXTRA_PARAMS}\n        ./scripts/pytest ${PARAMS} test_runner/performance/pageserver/pagebench/test_pageserver_max_throughput_getpage_at_latest_lsn.py::test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant ${EXTRA_PARAMS}\n\n    - name: upload the performance metrics to the Neon performance database which is used by grafana dashboards to display the results\n      shell: bash -euxo pipefail {0}\n      run: |\n        export REPORT_FROM=\"$PERF_REPORT_DIR\"\n        export GITHUB_SHA=${COMMIT_HASH}\n        time ./scripts/generate_and_push_perf_report.sh\n\n    - name: Upload test results\n      if: ${{ !cancelled() }}\n      uses: ./.github/actions/allure-report-store\n      with:\n        report-dir:  ${{ steps.set-env.outputs.allure_results_dir }}\n        unique-key: ${{ env.BUILD_TYPE }}-${{ env.DEFAULT_PG_VERSION }}-${{ runner.arch }}\n        aws-oidc-role-arn:  ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}\n\n    - name: Create Allure report\n      id: create-allure-report\n      if: ${{ !cancelled() }}\n      uses: ./.github/actions/allure-report-generate\n      with:\n        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}\n\n    - name: Upload snapshots\n      if: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.recreate_snapshots != 'false' && github.event.inputs.recreate_snapshots != '' }}\n      id: upload_snapshots\n      shell: bash -euxo pipefail {0}\n      run: |\n        mkdir -p $BACKUP_DIR\n        cd $TEST_OUTPUT\n        tar --create --preserve-permissions --file - shared-snapshots | zstd -o $BACKUP_DIR/shared_snapshots.tar.zst\n        cd $BACKUP_DIR\n        mkdir parts\n        split -b 1G shared_snapshots.tar.zst ./parts/shared_snapshots.tar.zst.part.\n        SNAPSHOT_DATE=$(date +%F)  # YYYY-MM-DD\n        cd parts\n        time aws s3 cp --recursive . s3://${S3_BUCKET}/performance/pagebench/shared-snapshots-${SNAPSHOT_DATE}/\n\n    - name: Post to a Slack channel\n      if: ${{ github.event.schedule && failure() }}\n      uses: slackapi/slack-github-action@fcfb566f8b0aab22203f066d80ca1d7e4b5d05b3 # v1.27.1\n      with:\n        channel-id: \"C06KHQVQ7U3\" # on-call-qa-staging-stream\n        slack-message: \"Periodic pagebench testing on dedicated hardware: ${{ job.status }}\\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}\"\n      env:\n        SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}\n        \n    - name: Cleanup Test Resources\n      if: always()\n      shell: bash -euxo pipefail {0}\n      env:\n        ARCHIVE: ${{ runner.temp }}/downloads/neon-${{ runner.os }}-${{ runner.arch }}-release-artifact.tar.zst\n      run: |\n        # Cleanup the test resources\n        if [[ -d \"${BACKUP_DIR}\" ]]; then\n          rm -rf ${BACKUP_DIR}\n        fi\n        if [[ -d \"${TEST_OUTPUT}\" ]]; then\n          rm -rf ${TEST_OUTPUT}\n        fi\n        if [[ -d \"${NEON_DIR}\" ]]; then\n          rm -rf ${NEON_DIR}\n        fi\n        rm -rf $(dirname $ARCHIVE)\n\n"
  },
  {
    "path": ".github/workflows/pg-clients.yml",
    "content": "name: Test Postgres client libraries\n\non:\n  schedule:\n    # * is a special character in YAML so you have to quote this string\n    #          ┌───────────── minute (0 - 59)\n    #          │ ┌───────────── hour (0 - 23)\n    #          │ │ ┌───────────── day of the month (1 - 31)\n    #          │ │ │ ┌───────────── month (1 - 12 or JAN-DEC)\n    #          │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)\n    - cron:  '23 02 * * *' # run once a day, timezone is utc\n  pull_request:\n    paths:\n      - '.github/workflows/pg-clients.yml'\n      - 'test_runner/pg_clients/**/*.py'\n      - 'test_runner/logical_repl/**/*.py'\n      - 'poetry.lock'\n  workflow_dispatch:\n\nconcurrency:\n  group: ${{ github.workflow }}-${{ github.ref_name }}\n  cancel-in-progress: ${{ github.event_name == 'pull_request' }}\n\ndefaults:\n  run:\n    shell: bash -euxo pipefail {0}\n\npermissions:\n  id-token: write # aws-actions/configure-aws-credentials\n  statuses: write # require for posting a status update\n\nenv:\n  DEFAULT_PG_VERSION: 17\n  PLATFORM: neon-captest-new\n  AWS_DEFAULT_REGION: eu-central-1\n\njobs:\n  check-permissions:\n    if: ${{ !contains(github.event.pull_request.labels.*.name, 'run-no-ci') }}\n    uses: ./.github/workflows/check-permissions.yml\n    with:\n      github-event-name: ${{ github.event_name }}\n\n  build-build-tools-image:\n    permissions:\n      packages: write\n    needs: [ check-permissions ]\n    uses: ./.github/workflows/build-build-tools-image.yml\n    secrets: inherit\n\n  generate-ch-tmppw:\n    runs-on: ubuntu-22.04\n    outputs:\n      tmp_val: ${{ steps.pwgen.outputs.tmp_val }}\n    steps:\n      - name: Generate a random password\n        id: pwgen\n        run: |\n          set +x\n          p=$(dd if=/dev/random bs=14 count=1 2>/dev/null | base64)\n          echo tmp_val=\"${p//\\//}\" >> \"${GITHUB_OUTPUT}\"\n\n  test-logical-replication:\n    needs: [ build-build-tools-image, generate-ch-tmppw ]\n    runs-on: ubuntu-22.04\n\n    container:\n      image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm\n      credentials:\n        username: ${{ github.actor }}\n        password: ${{ secrets.GITHUB_TOKEN }}\n      options: --init --user root\n    services:\n      clickhouse:\n        image: clickhouse/clickhouse-server:25.6\n        env:\n          CLICKHOUSE_PASSWORD: ${{ needs.generate-ch-tmppw.outputs.tmp_val }}\n          PGSSLCERT: /tmp/postgresql.crt\n        ports:\n          - 9000:9000\n          - 8123:8123\n      zookeeper:\n        image: quay.io/debezium/zookeeper:3.1.3.Final\n        ports:\n          - 2181:2181\n          - 2888:2888\n          - 3888:3888\n      kafka:\n        image: quay.io/debezium/kafka:3.1.3.Final\n        env:\n          ZOOKEEPER_CONNECT: \"zookeeper:2181\"\n          KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka:9092\n          KAFKA_BROKER_ID: 1\n          KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1\n          KAFKA_JMX_PORT: 9991\n        ports:\n          - 9092:9092\n      debezium:\n        image: quay.io/debezium/connect:3.1.3.Final\n        env:\n          BOOTSTRAP_SERVERS: kafka:9092\n          GROUP_ID: 1\n          CONFIG_STORAGE_TOPIC: debezium-config\n          OFFSET_STORAGE_TOPIC: debezium-offset\n          STATUS_STORAGE_TOPIC: debezium-status\n          DEBEZIUM_CONFIG_CONNECTOR_CLASS: io.debezium.connector.postgresql.PostgresConnector\n        ports:\n          - 8083:8083\n    steps:\n      - name: Harden the runner (Audit all outbound calls)\n        uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0\n        with:\n          egress-policy: audit\n\n      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2\n\n      - name: Download Neon artifact\n        uses: ./.github/actions/download\n        with:\n          name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact\n          path: /tmp/neon/\n          prefix: latest\n          aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}\n\n      - name: Create Neon Project\n        id: create-neon-project\n        uses: ./.github/actions/neon-project-create\n        with:\n          api_key: ${{ secrets.NEON_STAGING_API_KEY }}\n          postgres_version: ${{ env.DEFAULT_PG_VERSION }}\n          project_settings: >-\n            {\"enable_logical_replication\": true}\n\n      - name: Run tests\n        uses: ./.github/actions/run-python-test-set\n        with:\n          build_type: remote\n          test_selection: logical_repl\n          run_in_parallel: false\n          extra_params: -m remote_cluster\n          pg_version: ${{ env.DEFAULT_PG_VERSION }}\n          aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}\n        env:\n          BENCHMARK_CONNSTR: ${{ steps.create-neon-project.outputs.dsn }}\n          CLICKHOUSE_PASSWORD: ${{ needs.generate-ch-tmppw.outputs.tmp_val }}\n\n      - name: Delete Neon Project\n        if: always()\n        uses: ./.github/actions/neon-project-delete\n        with:\n          project_id: ${{ steps.create-neon-project.outputs.project_id }}\n          api_key: ${{ secrets.NEON_STAGING_API_KEY }}\n\n      - name: Create Allure report\n        if: ${{ !cancelled() }}\n        id: create-allure-report\n        uses: ./.github/actions/allure-report-generate\n        with:\n          store-test-results-into-db: true\n          aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}\n        env:\n          REGRESS_TEST_RESULT_CONNSTR_NEW: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}\n\n      - name: Post to a Slack channel\n        if: github.event.schedule && failure()\n        uses: slackapi/slack-github-action@fcfb566f8b0aab22203f066d80ca1d7e4b5d05b3 # v1.27.1\n        with:\n          channel-id: \"C06KHQVQ7U3\" # on-call-qa-staging-stream\n          slack-message: |\n            Testing the logical replication: <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|${{ job.status }}> (<${{ steps.create-allure-report.outputs.report-url }}|test report>)\n        env:\n          SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}\n\n  test-postgres-client-libs:\n    needs: [ build-build-tools-image ]\n    runs-on: ubuntu-22.04\n\n    container:\n      image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm\n      credentials:\n        username: ${{ github.actor }}\n        password: ${{ secrets.GITHUB_TOKEN }}\n      options: --init --user root\n\n    steps:\n    - name: Harden the runner (Audit all outbound calls)\n      uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0\n      with:\n        egress-policy: audit\n\n    - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2\n\n    - name: Download Neon artifact\n      uses: ./.github/actions/download\n      with:\n        name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact\n        path: /tmp/neon/\n        prefix: latest\n        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}\n\n    - name: Create Neon Project\n      id: create-neon-project\n      uses: ./.github/actions/neon-project-create\n      with:\n        api_key: ${{ secrets.NEON_STAGING_API_KEY }}\n        postgres_version: ${{ env.DEFAULT_PG_VERSION }}\n\n    - name: Run tests\n      uses: ./.github/actions/run-python-test-set\n      with:\n        build_type: remote\n        test_selection: pg_clients\n        run_in_parallel: false\n        extra_params: -m remote_cluster\n        pg_version: ${{ env.DEFAULT_PG_VERSION }}\n        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}\n      env:\n        BENCHMARK_CONNSTR: ${{ steps.create-neon-project.outputs.dsn }}\n\n    - name: Delete Neon Project\n      if: always()\n      uses: ./.github/actions/neon-project-delete\n      with:\n        project_id: ${{ steps.create-neon-project.outputs.project_id }}\n        api_key: ${{ secrets.NEON_STAGING_API_KEY }}\n\n    - name: Create Allure report\n      if: ${{ !cancelled() }}\n      id: create-allure-report\n      uses: ./.github/actions/allure-report-generate\n      with:\n        store-test-results-into-db: true\n        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}\n      env:\n        REGRESS_TEST_RESULT_CONNSTR_NEW: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}\n\n    - name: Post to a Slack channel\n      if: github.event.schedule && failure()\n      uses: slackapi/slack-github-action@fcfb566f8b0aab22203f066d80ca1d7e4b5d05b3 # v1.27.1\n      with:\n        channel-id: \"C06KHQVQ7U3\" # on-call-qa-staging-stream\n        slack-message: |\n          Testing Postgres clients: <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|${{ job.status }}> (<${{ steps.create-allure-report.outputs.report-url }}|test report>)\n      env:\n        SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}\n"
  },
  {
    "path": ".github/workflows/pin-build-tools-image.yml",
    "content": "name: 'Pin build-tools image'\n\non:\n  workflow_dispatch:\n    inputs:\n      from-tag:\n        description: 'Source tag'\n        required: true\n        type: string\n      force:\n        description: 'Force the image to be pinned'\n        default: false\n        type: boolean\n  workflow_call:\n    inputs:\n      from-tag:\n        description: 'Source tag'\n        required: true\n        type: string\n      force:\n        description: 'Force the image to be pinned'\n        default: false\n        type: boolean\n\ndefaults:\n  run:\n    shell: bash -euo pipefail {0}\n\nconcurrency:\n  group: pin-build-tools-image-${{ inputs.from-tag }}\n  cancel-in-progress: false\n\n# No permission for GITHUB_TOKEN by default; the **minimal required** set of permissions should be granted in each job.\npermissions: {}\n\njobs:\n  check-manifests:\n    runs-on: ubuntu-22.04\n    outputs:\n      skip: ${{ steps.check-manifests.outputs.skip }}\n\n    steps:\n      - name: Harden the runner (Audit all outbound calls)\n        uses: step-security/harden-runner@0634a2670c59f64b4a01f0f96f84700a4088b9f0 # v2.12.0\n        with:\n          egress-policy: audit\n\n      - name: Check if we really need to pin the image\n        id: check-manifests\n        env:\n          FROM_TAG: ${{ inputs.from-tag }}\n          TO_TAG: pinned\n        run: |\n          docker manifest inspect \"ghcr.io/neondatabase/build-tools:${FROM_TAG}\" > \"${FROM_TAG}.json\"\n          docker manifest inspect \"ghcr.io/neondatabase/build-tools:${TO_TAG}\"   > \"${TO_TAG}.json\"\n\n          if diff \"${FROM_TAG}.json\" \"${TO_TAG}.json\"; then\n            skip=true\n          else\n            skip=false\n          fi\n\n          echo \"skip=${skip}\" | tee -a $GITHUB_OUTPUT\n\n  tag-image:\n    needs: check-manifests\n\n    # use format(..) to catch both inputs.force = true AND inputs.force = 'true'\n    if: needs.check-manifests.outputs.skip == 'false' || format('{0}', inputs.force) == 'true'\n\n    permissions:\n      id-token: write  # Required for aws/azure login\n      packages: write  # required for pushing to GHCR\n\n    uses: ./.github/workflows/_push-to-container-registry.yml\n    with:\n      image-map: |\n        {\n          \"ghcr.io/neondatabase/build-tools:${{ inputs.from-tag }}-bullseye\": [\n            \"docker.io/neondatabase/build-tools:pinned-bullseye\",\n            \"ghcr.io/neondatabase/build-tools:pinned-bullseye\",\n            \"${{ vars.NEON_DEV_AWS_ACCOUNT_ID }}.dkr.ecr.${{ vars.AWS_ECR_REGION }}.amazonaws.com/build-tools:pinned-bullseye\",\n            \"${{ vars.AZURE_DEV_REGISTRY_NAME }}.azurecr.io/neondatabase/build-tools:pinned-bullseye\"\n          ],\n          \"ghcr.io/neondatabase/build-tools:${{ inputs.from-tag }}-bookworm\": [\n            \"docker.io/neondatabase/build-tools:pinned-bookworm\",\n            \"docker.io/neondatabase/build-tools:pinned\",\n            \"ghcr.io/neondatabase/build-tools:pinned-bookworm\",\n            \"ghcr.io/neondatabase/build-tools:pinned\",\n            \"${{ vars.NEON_DEV_AWS_ACCOUNT_ID }}.dkr.ecr.${{ vars.AWS_ECR_REGION }}.amazonaws.com/build-tools:pinned-bookworm\",\n            \"${{ vars.NEON_DEV_AWS_ACCOUNT_ID }}.dkr.ecr.${{ vars.AWS_ECR_REGION }}.amazonaws.com/build-tools:pinned\",\n            \"${{ vars.AZURE_DEV_REGISTRY_NAME }}.azurecr.io/neondatabase/build-tools:pinned-bookworm\",\n            \"${{ vars.AZURE_DEV_REGISTRY_NAME }}.azurecr.io/neondatabase/build-tools:pinned\"\n          ]\n        }\n      aws-region: ${{ vars.AWS_ECR_REGION }}\n      aws-account-id: \"${{ vars.NEON_DEV_AWS_ACCOUNT_ID }}\"\n      aws-role-to-assume: \"gha-oidc-neon-admin\"\n      azure-client-id: ${{ vars.AZURE_DEV_CLIENT_ID }}\n      azure-subscription-id: ${{ vars.AZURE_DEV_SUBSCRIPTION_ID }}\n      azure-tenant-id: ${{ vars.AZURE_TENANT_ID }}\n      acr-registry-name: ${{ vars.AZURE_DEV_REGISTRY_NAME }}\n    secrets: inherit\n"
  },
  {
    "path": ".github/workflows/pre-merge-checks.yml",
    "content": "name: Pre-merge checks\n\non:\n  pull_request:\n    paths:\n      - .github/workflows/_check-codestyle-python.yml\n      - .github/workflows/_check-codestyle-rust.yml\n      - .github/workflows/build-build-tools-image.yml\n      - .github/workflows/pre-merge-checks.yml\n  merge_group:\n\ndefaults:\n  run:\n    shell: bash -euxo pipefail {0}\n\n# No permission for GITHUB_TOKEN by default; the **minimal required** set of permissions should be granted in each job.\npermissions: {}\n\njobs:\n  meta:\n    runs-on: ubuntu-22.04\n    permissions:\n      contents: read\n    outputs:\n      python-changed: ${{ steps.python-src.outputs.any_changed }}\n      rust-changed: ${{ steps.rust-src.outputs.any_changed }}\n      branch: ${{ steps.group-metadata.outputs.branch }}\n      pr-number: ${{ steps.group-metadata.outputs.pr-number }}\n    steps:\n      - name: Harden the runner (Audit all outbound calls)\n        uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0\n        with:\n          egress-policy: audit\n\n      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2\n\n      - uses: tj-actions/changed-files@ed68ef82c095e0d48ec87eccea555d944a631a4c # v46.0.5\n        id: python-src\n        with:\n          files: |\n            .github/workflows/_check-codestyle-python.yml\n            .github/workflows/build-build-tools-image.yml\n            .github/workflows/pre-merge-checks.yml\n            **/**.py\n            poetry.lock\n            pyproject.toml\n\n      - uses: tj-actions/changed-files@ed68ef82c095e0d48ec87eccea555d944a631a4c # v46.0.5\n        id: rust-src\n        with:\n          files: |\n            .github/workflows/_check-codestyle-rust.yml\n            .github/workflows/build-build-tools-image.yml\n            .github/workflows/pre-merge-checks.yml\n            **/**.rs\n            **/Cargo.toml\n            Cargo.toml\n            Cargo.lock\n\n      - name: PRINT ALL CHANGED FILES FOR DEBUG PURPOSES\n        env:\n          PYTHON_CHANGED_FILES: ${{ steps.python-src.outputs.all_changed_files }}\n          RUST_CHANGED_FILES: ${{ steps.rust-src.outputs.all_changed_files }}\n        run: |\n          echo \"${PYTHON_CHANGED_FILES}\"\n          echo \"${RUST_CHANGED_FILES}\"\n\n      - name: Merge group metadata\n        if: ${{ github.event_name == 'merge_group' }}\n        id: group-metadata\n        env:\n          MERGE_QUEUE_REF: ${{ github.event.merge_group.head_ref }}\n        run: |\n          echo $MERGE_QUEUE_REF | jq -Rr 'capture(\"refs/heads/gh-readonly-queue/(?<branch>.*)/pr-(?<pr_number>[0-9]+)-[0-9a-f]{40}\") | [\"branch=\" + .branch, \"pr-number=\" + .pr_number] | .[]' | tee -a \"${GITHUB_OUTPUT}\"\n\n  build-build-tools-image:\n    if: |\n      false\n      || needs.meta.outputs.python-changed == 'true'\n      || needs.meta.outputs.rust-changed == 'true'\n    needs: [ meta ]\n    permissions:\n      contents: read\n      packages: write\n    uses: ./.github/workflows/build-build-tools-image.yml\n    with:\n      # Build only one combination to save time\n      archs: '[\"x64\"]'\n      debians: '[\"bookworm\"]'\n    secrets: inherit\n\n  check-codestyle-python:\n    if: needs.meta.outputs.python-changed == 'true'\n    needs: [ meta, build-build-tools-image ]\n    permissions:\n      contents: read\n      packages: read\n    uses: ./.github/workflows/_check-codestyle-python.yml\n    with:\n      # `-bookworm-x64` suffix should match the combination in `build-build-tools-image`\n      build-tools-image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm-x64\n    secrets: inherit\n\n  check-codestyle-rust:\n    if: needs.meta.outputs.rust-changed == 'true'\n    needs: [ meta, build-build-tools-image ]\n    permissions:\n      contents: read\n      packages: read\n    uses: ./.github/workflows/_check-codestyle-rust.yml\n    with:\n      # `-bookworm-x64` suffix should match the combination in `build-build-tools-image`\n      build-tools-image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm-x64\n      archs: '[\"x64\"]'\n    secrets: inherit\n\n  # To get items from the merge queue merged into main we need to satisfy \"Status checks that are required\".\n  # Currently we require 2 jobs (checks with exact name):\n  # - conclusion\n  # - neon-cloud-e2e\n  conclusion:\n    # Do not run job on Pull Requests as it interferes with the `conclusion` job from the `build_and_test` workflow\n    if: always() && github.event_name == 'merge_group'\n    permissions:\n      statuses: write # for `github.repos.createCommitStatus(...)`\n      contents: write\n    needs:\n      - meta\n      - check-codestyle-python\n      - check-codestyle-rust\n    runs-on: ubuntu-22.04\n    steps:\n      - name: Harden the runner (Audit all outbound calls)\n        uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0\n        with:\n          egress-policy: audit\n\n      - name: Create fake `neon-cloud-e2e` check\n        uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1\n        with:\n          # Retry script for 5XX server errors: https://github.com/actions/github-script#retries\n          retries: 5\n          script: |\n            const { repo, owner } = context.repo;\n            const targetUrl = `${context.serverUrl}/${owner}/${repo}/actions/runs/${context.runId}`;\n\n            await github.rest.repos.createCommitStatus({\n              owner: owner,\n              repo: repo,\n              sha: context.sha,\n              context: `neon-cloud-e2e`,\n              state: `success`,\n              target_url: targetUrl,\n              description: `fake check for merge queue`,\n            });\n\n      - name: Fail the job if any of the dependencies do not succeed or skipped\n        run: exit 1\n        if: |\n          false\n          || (github.event_name == 'merge_group' && needs.meta.outputs.branch != 'main')\n          || (needs.check-codestyle-python.result == 'skipped' && needs.meta.outputs.python-changed == 'true')\n          || (needs.check-codestyle-rust.result   == 'skipped' && needs.meta.outputs.rust-changed   == 'true')\n          || contains(needs.*.result, 'failure')\n          || contains(needs.*.result, 'cancelled')\n\n      - name: Add fast-forward label to PR to trigger fast-forward merge\n        if: >-\n          ${{\n            always()\n            && github.event_name == 'merge_group'\n            && contains(fromJSON('[\"release\", \"release-proxy\", \"release-compute\"]'), needs.meta.outputs.branch)\n          }}\n        env:\n          GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}\n        run: >-\n          gh pr edit ${{ needs.meta.outputs.pr-number }} --repo \"${GITHUB_REPOSITORY}\" --add-label \"fast-forward\"\n"
  },
  {
    "path": ".github/workflows/proxy-benchmark.yml",
    "content": "name: Periodic proxy performance test on unit-perf-aws-arm runners\n\non:\n  push: # TODO: remove after testing\n    branches:\n      - test-proxy-bench # Runs on pushes to test-proxy-bench branch\n  # schedule:\n    # * is a special character in YAML so you have to quote this string\n    #        ┌───────────── minute (0 - 59)\n    #        │ ┌───────────── hour (0 - 23)\n    #        │ │ ┌───────────── day of the month (1 - 31)\n    #        │ │ │ ┌───────────── month (1 - 12 or JAN-DEC)\n    #        │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)\n    # - cron: '0 5 * * *' # Runs at 5 UTC once a day\n  workflow_dispatch: # adds an ability to run this manually\n\ndefaults:\n  run:\n    shell: bash -euo pipefail {0}\n\nconcurrency:\n  group: ${{ github.workflow }}\n  cancel-in-progress: false\n\npermissions:\n  contents: read\n\njobs:\n  run_periodic_proxybench_test:\n    permissions:\n      id-token: write # aws-actions/configure-aws-credentials\n      statuses: write\n      contents: write\n      pull-requests: write\n    runs-on: [ self-hosted, unit-perf-aws-arm ]\n    timeout-minutes: 60  # 1h timeout\n    container:\n      image: ghcr.io/neondatabase/build-tools:pinned-bookworm\n      credentials:\n        username: ${{ github.actor }}\n        password: ${{ secrets.GITHUB_TOKEN }}\n      options: --init\n    steps:\n    - name: Checkout proxy-bench Repo\n      uses: actions/checkout@v4\n      with:\n        repository: neondatabase/proxy-bench\n        path: proxy-bench\n\n    - name: Set up the environment which depends on $RUNNER_TEMP on nvme drive\n      id: set-env\n      shell: bash -euxo pipefail {0}\n      run: |\n        PROXY_BENCH_PATH=$(realpath ./proxy-bench)\n        {\n          echo \"PROXY_BENCH_PATH=$PROXY_BENCH_PATH\"\n          echo \"NEON_DIR=${RUNNER_TEMP}/neon\"\n          echo \"NEON_PROXY_PATH=${RUNNER_TEMP}/neon/bin/proxy\"\n          echo \"TEST_OUTPUT=${PROXY_BENCH_PATH}/test_output\"\n          echo \"\"\n        } >> \"$GITHUB_ENV\"\n\n    - name: Cache poetry deps\n      uses: actions/cache@v4\n      with:\n        path: ~/.cache/pypoetry/virtualenvs\n        key: v2-${{ runner.os }}-${{ runner.arch }}-python-deps-bookworm-${{ hashFiles('poetry.lock') }}\n\n    - name: Install Python deps\n      shell: bash -euxo pipefail {0}\n      run: ./scripts/pysync\n\n    - name: show ulimits\n      shell: bash -euxo pipefail {0}\n      run: |\n        ulimit -a\n\n    - name: Run proxy-bench\n      working-directory: ${{ env.PROXY_BENCH_PATH }}\n      run: ./run.sh --with-grafana --bare-metal\n\n    - name: Ingest Bench Results\n      if: always()\n      working-directory: ${{ env.NEON_DIR }}\n      run: |\n        mkdir -p $TEST_OUTPUT\n        python $NEON_DIR/scripts/proxy_bench_results_ingest.py --out $TEST_OUTPUT\n\n    - name: Push Metrics to Proxy perf database\n      shell: bash -euxo pipefail {0}\n      if: always()\n      env:\n        PERF_TEST_RESULT_CONNSTR: \"${{ secrets.PROXY_TEST_RESULT_CONNSTR }}\"\n        REPORT_FROM: $TEST_OUTPUT\n      working-directory: ${{ env.NEON_DIR }}\n      run: $NEON_DIR/scripts/generate_and_push_perf_report.sh\n\n    - name: Notify Failure\n      if: failure()\n      run: echo \"Proxy bench job failed\" && exit 1\n\n    - name: Cleanup Test Resources\n      if: always()\n      shell: bash -euxo pipefail {0}\n      run: |\n        # Cleanup the test resources\n        if [[ -d \"${TEST_OUTPUT}\" ]]; then\n          rm -rf ${TEST_OUTPUT}\n        fi\n        if [[ -d \"${PROXY_BENCH_PATH}/test_output\" ]]; then\n          rm -rf ${PROXY_BENCH_PATH}/test_output\n        fi"
  },
  {
    "path": ".github/workflows/random-ops-test.yml",
    "content": "name: Random Operations Test\n\non:\n  schedule:\n    # * is a special character in YAML so you have to quote this string\n    #          ┌───────────── minute (0 - 59)\n    #          │  ┌───────────── hour (0 - 23)\n    #          │  │  ┌───────────── day of the month (1 - 31)\n    #          │  │  │ ┌───────────── month (1 - 12 or JAN-DEC)\n    #          │  │  │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)\n    - cron:  '23 */2 * * *' # runs every 2 hours\n  workflow_dispatch:\n    inputs:\n      random_seed:\n        type: number\n        description: 'The random seed'\n        required: false\n        default: 0\n      num_operations:\n        type: number\n        description: \"The number of operations to test\"\n        default: 250\n\ndefaults:\n  run:\n    shell: bash -euxo pipefail {0}\n\npermissions: {}\n\nenv:\n  DEFAULT_PG_VERSION: 16\n  PLATFORM: neon-captest-new\n  AWS_DEFAULT_REGION: eu-central-1\n\njobs:\n  run-random-rests:\n    env:\n      POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install\n    runs-on: small\n    permissions:\n      id-token: write\n      statuses: write\n\n    strategy:\n      fail-fast: false\n      matrix:\n        pg-version: [16, 17]\n\n    container:\n      image: ghcr.io/neondatabase/build-tools:pinned-bookworm\n      credentials:\n        username: ${{ github.actor }}\n        password: ${{ secrets.GITHUB_TOKEN }}\n      options: --init\n    steps:\n      - name: Harden the runner (Audit all outbound calls)\n        uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0\n        with:\n          egress-policy: audit\n\n      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2\n\n      - name: Download Neon artifact\n        uses: ./.github/actions/download\n        with:\n          name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact\n          path: /tmp/neon/\n          prefix: latest\n          aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}\n\n      - name: Run tests\n        uses: ./.github/actions/run-python-test-set\n        with:\n          build_type: remote\n          test_selection: random_ops\n          run_in_parallel: false\n          extra_params: -m remote_cluster\n          pg_version: ${{ matrix.pg-version }}\n          aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}\n        env:\n          NEON_API_KEY: ${{ secrets.NEON_STAGING_API_KEY }}\n          RANDOM_SEED: ${{ inputs.random_seed }}\n          NUM_OPERATIONS: ${{ inputs.num_operations }}\n\n      - name: Create Allure report\n        if: ${{ !cancelled() }}\n        id: create-allure-report\n        uses: ./.github/actions/allure-report-generate\n        with:\n          store-test-results-into-db: true\n          aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}\n        env:\n          REGRESS_TEST_RESULT_CONNSTR_NEW: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}\n"
  },
  {
    "path": ".github/workflows/regenerate-pg-setting.yml",
    "content": "name: Regenerate Postgres Settings\n\non:\n  pull_request:\n    types:\n      - opened\n      - synchronize\n      - reopened\n    paths:\n      - pgxn/neon/**.c\n      - vendor/postgres-v*\n      - vendor/revisions.json\n\nconcurrency:\n  group: ${{ github.workflow }}-${{ github.head_ref }}\n  cancel-in-progress: true\n\npermissions:\n  pull-requests: write\n\njobs:\n  regenerate-pg-settings:\n    runs-on: ubuntu-22.04\n\n    steps:\n      - name: Harden the runner (Audit all outbound calls)\n        uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0\n        with:\n          egress-policy: audit\n\n      - name: Add comment\n        uses: thollander/actions-comment-pull-request@65f9e5c9a1f2cd378bd74b2e057c9736982a8e74 # v3\n        with:\n          comment-tag: ${{ github.job }}\n          pr-number: ${{ github.event.number }}\n          message: |\n            If this PR added a GUC in the Postgres fork or `neon` extension,\n            please regenerate the Postgres settings in the `cloud` repo:\n\n            ```\n            make NEON_WORKDIR=path/to/neon/checkout \\\n              -C goapp/internal/shareddomain/postgres generate\n            ```\n\n            If you're an external contributor, a Neon employee will assist in\n            making sure this step is done.\n"
  },
  {
    "path": ".github/workflows/release-compute.yml",
    "content": "name: Create compute release PR\n\non:\n  schedule:\n    - cron: '0 7 * * FRI'\n\njobs:\n  create-release-pr:\n    uses: ./.github/workflows/release.yml\n    with:\n      component: compute\n    secrets: inherit\n"
  },
  {
    "path": ".github/workflows/release-notify.yml",
    "content": "name: Notify Slack channel about upcoming release\n\nconcurrency:\n  group: ${{ github.workflow }}-${{ github.event.number }}\n  cancel-in-progress: true\n\non:\n  pull_request:\n    branches:\n      - release\n    types:\n      # Default types that triggers a workflow:\n      # - https://docs.github.com/en/actions/using-workflows/events-that-trigger-workflows#pull_request\n      - opened\n      - synchronize\n      - reopened\n      # Additional types that we want to handle:\n      - closed\n\njobs:\n  notify:\n    runs-on: ubuntu-22.04\n\n    steps:\n      - name: Harden the runner (Audit all outbound calls)\n        uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0\n        with:\n          egress-policy: audit\n\n      - uses: neondatabase/dev-actions/release-pr-notify@483a843f2a8bcfbdc4c69d27630528a3ddc4e14b # main\n        with:\n          slack-token: ${{ secrets.SLACK_BOT_TOKEN }}\n          slack-channel-id: ${{ vars.SLACK_UPCOMING_RELEASE_CHANNEL_ID || 'C05QQ9J1BRC' }} # if not set, then `#test-release-notifications`\n          github-token: ${{ secrets.GITHUB_TOKEN }}\n"
  },
  {
    "path": ".github/workflows/release-proxy.yml",
    "content": "name: Create proxy release PR\n\non:\n  schedule:\n    - cron: '0 6 * * TUE'\n\njobs:\n  create-release-pr:\n    uses: ./.github/workflows/release.yml\n    with:\n      component: proxy\n    secrets: inherit\n"
  },
  {
    "path": ".github/workflows/release-storage.yml",
    "content": "name: Create storage release PR\n\non:\n  schedule:\n    - cron: '0 6 * * FRI'\n\njobs:\n  create-release-pr:\n    uses: ./.github/workflows/release.yml\n    with:\n      component: storage\n    secrets: inherit\n"
  },
  {
    "path": ".github/workflows/release.yml",
    "content": "name: Create release PR\n\non:\n  workflow_dispatch:\n    inputs:\n      component:\n        description: \"Component to release\"\n        required: true\n        type: choice\n        options:\n          - compute\n          - proxy\n          - storage\n      cherry-pick:\n        description: \"Commits to cherry-pick (space separated, makes this a hotfix based on previous release)\"\n        required: false\n        type: string\n        default: ''\n\n  workflow_call:\n    inputs:\n      component:\n        description: \"Component to release\"\n        required: true\n        type: string\n      cherry-pick:\n        description: \"Commits to cherry-pick (space separated, makes this a hotfix based on previous release)\"\n        required: false\n        type: string\n        default: ''\n\n\n# No permission for GITHUB_TOKEN by default; the **minimal required** set of permissions should be granted in each job.\npermissions: {}\n\ndefaults:\n  run:\n    shell: bash -euo pipefail {0}\n\njobs:\n  create-release-pr:\n    runs-on: ubuntu-22.04\n\n    permissions:\n      contents: write\n\n    steps:\n      - name: Harden the runner (Audit all outbound calls)\n        uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0\n        with:\n          egress-policy: audit\n\n      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2\n        with:\n          fetch-depth: 0\n\n      - name: Configure git\n        run: |\n          git config user.name \"github-actions[bot]\"\n          git config user.email \"41898282+github-actions[bot]@users.noreply.github.com\"\n\n      - name: Create release PR\n        uses: neondatabase/dev-actions/release-pr@290dec821d86fa8a93f019e8c69720f5865b5677\n        with:\n          component: ${{ inputs.component }}\n          cherry-pick: ${{ inputs.cherry-pick }}\n        env:\n          GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}\n"
  },
  {
    "path": ".github/workflows/report-workflow-stats-batch.yml",
    "content": "name: Report Workflow Stats Batch\n\non:\n  schedule:\n    - cron: '*/15 * * * *'\n    - cron: '25 0 * * *'\n    - cron: '25 1 * * 6'\n\npermissions:\n  contents: read\n\njobs:\n  gh-workflow-stats-batch-2h:\n    name: GitHub Workflow Stats Batch 2 hours\n    if: github.event.schedule == '*/15 * * * *'\n    runs-on: ubuntu-22.04\n    permissions:\n      actions: read\n    steps:\n    - name: Harden the runner (Audit all outbound calls)\n      uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0\n      with:\n        egress-policy: audit\n\n    - name: Export Workflow Run for the past 2 hours\n      uses: neondatabase/gh-workflow-stats-action@701b1f202666d0b82e67b4d387e909af2b920127 # v0.2.2\n      with:\n        db_uri: ${{ secrets.GH_REPORT_STATS_DB_RW_CONNSTR }}\n        db_table: \"gh_workflow_stats_neon\"\n        gh_token: ${{ secrets.GITHUB_TOKEN }}\n        duration: '2h'\n\n  gh-workflow-stats-batch-48h:\n    name: GitHub Workflow Stats Batch 48 hours\n    if: github.event.schedule == '25 0 * * *'\n    runs-on: ubuntu-22.04\n    permissions:\n      actions: read\n    steps:\n    - name: Harden the runner (Audit all outbound calls)\n      uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0\n      with:\n        egress-policy: audit\n\n    - name: Export Workflow Run for the past 48 hours\n      uses: neondatabase/gh-workflow-stats-action@701b1f202666d0b82e67b4d387e909af2b920127 # v0.2.2\n      with:\n        db_uri: ${{ secrets.GH_REPORT_STATS_DB_RW_CONNSTR }}\n        db_table: \"gh_workflow_stats_neon\"\n        gh_token: ${{ secrets.GITHUB_TOKEN }}\n        duration: '48h'\n\n  gh-workflow-stats-batch-30d:\n    name: GitHub Workflow Stats Batch 30 days\n    if: github.event.schedule == '25 1 * * 6'\n    runs-on: ubuntu-22.04\n    permissions:\n      actions: read\n    steps:\n    - name: Harden the runner (Audit all outbound calls)\n      uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0\n      with:\n        egress-policy: audit\n\n    - name: Export Workflow Run for the past 30 days\n      uses: neondatabase/gh-workflow-stats-action@701b1f202666d0b82e67b4d387e909af2b920127 # v0.2.2\n      with:\n        db_uri: ${{ secrets.GH_REPORT_STATS_DB_RW_CONNSTR }}\n        db_table: \"gh_workflow_stats_neon\"\n        gh_token: ${{ secrets.GITHUB_TOKEN }}\n        duration: '720h'\n"
  },
  {
    "path": ".github/workflows/trigger-e2e-tests.yml",
    "content": "name: Trigger E2E Tests\n\non:\n  pull_request:\n    types:\n      - ready_for_review\n  workflow_call:\n    inputs:\n      github-event-name:\n        type: string\n        required: true\n      github-event-json:\n        type: string\n        required: true\n\ndefaults:\n  run:\n    shell: bash -euxo pipefail {0}\n\nenv:\n  # A concurrency group that we use for e2e-tests runs, matches `concurrency.group` above with `github.repository` as a prefix\n  E2E_CONCURRENCY_GROUP: ${{ github.repository }}-e2e-tests-${{ github.ref_name }}-${{ github.ref_name == 'main' && github.sha || 'anysha' }}\n\njobs:\n  check-permissions:\n    if: ${{ !contains(github.event.pull_request.labels.*.name, 'run-no-ci') }}\n    uses: ./.github/workflows/check-permissions.yml\n    with:\n      github-event-name: ${{ inputs.github-event-name || github.event_name }}\n\n  cancel-previous-e2e-tests:\n    needs: [ check-permissions ]\n    if: github.event_name == 'pull_request'\n    runs-on: ubuntu-22.04\n\n    steps:\n      - name: Harden the runner (Audit all outbound calls)\n        uses: step-security/harden-runner@0634a2670c59f64b4a01f0f96f84700a4088b9f0 # v2.12.0\n        with:\n          egress-policy: audit\n\n      - name: Cancel previous e2e-tests runs for this PR\n        env:\n          GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}\n        run: |\n          gh workflow --repo neondatabase/cloud \\\n            run cancel-previous-in-concurrency-group.yml \\\n              --field concurrency_group=\"${{ env.E2E_CONCURRENCY_GROUP }}\"\n\n  meta:\n    uses: ./.github/workflows/_meta.yml\n    with:\n      github-event-name: ${{ inputs.github-event-name || github.event_name }}\n      github-event-json: ${{ inputs.github-event-json || toJSON(github.event) }}\n\n  trigger-e2e-tests:\n    needs: [ meta ]\n    runs-on: ubuntu-22.04\n    env:\n      EVENT_ACTION: ${{ github.event.action }}\n      GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}\n      TAG: >-\n        ${{\n          contains(fromJSON('[\"compute-release\", \"compute-rc-pr\"]'), needs.meta.outputs.run-kind)\n          && needs.meta.outputs.previous-storage-release\n          || needs.meta.outputs.build-tag\n        }}\n      COMPUTE_TAG: >-\n        ${{\n          contains(fromJSON('[\"storage-release\", \"storage-rc-pr\", \"proxy-release\", \"proxy-rc-pr\"]'), needs.meta.outputs.run-kind)\n          && needs.meta.outputs.previous-compute-release\n          || needs.meta.outputs.build-tag\n        }}\n    steps:\n      - name: Harden the runner (Audit all outbound calls)\n        uses: step-security/harden-runner@0634a2670c59f64b4a01f0f96f84700a4088b9f0 # v2.12.0\n        with:\n          egress-policy: audit\n\n      - name: Wait for `push-{neon,compute}-image-dev` job to finish\n        # It's important to have a timeout here, the script in the step can run infinitely\n        timeout-minutes: 60\n        run: |\n          if [ \"${GITHUB_EVENT_NAME}\" != \"pull_request\" ] || [ \"${EVENT_ACTION}\" != \"ready_for_review\" ]; then\n            exit 0\n          fi\n\n          # For PRs we use the run id as the tag\n          BUILD_AND_TEST_RUN_ID=${TAG}\n          while true; do\n            gh run --repo ${GITHUB_REPOSITORY} view ${BUILD_AND_TEST_RUN_ID} --json jobs --jq '[.jobs[] | select((.name | startswith(\"push-neon-image-dev\")) or (.name | startswith(\"push-compute-image-dev\"))) | {\"name\": .name, \"conclusion\": .conclusion, \"url\": .url}]' > jobs.json\n            if [ $(jq '[.[] | select(.conclusion == \"success\")] | length' jobs.json) -eq 2 ]; then\n              break\n            fi\n            jq -c '.[]' jobs.json | while read -r job; do\n              case $(echo $job | jq .conclusion) in\n                failure | cancelled | skipped)\n                  echo \"The '$(echo $job | jq .name)' job didn't succeed: '$(echo $job | jq .conclusion)'. See log in '$(echo $job | jq .url)' Exiting...\"\n                  exit 1\n                  ;;\n              esac\n            done\n            echo \"The 'push-{neon,compute}-image-dev' jobs haven't succeeded yet. Waiting...\"\n            sleep 60\n          done\n\n      - name: Set e2e-platforms\n        id: e2e-platforms\n        env:\n          PR_NUMBER: ${{ github.event.pull_request.number }}\n          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}\n        run: |\n          # Default set of platforms to run e2e tests on\n          platforms='[\"docker\", \"k8s\"]'\n\n          # If a PR changes anything that affects computes, add k8s-neonvm to the list of platforms.\n          # If the workflow run is not a pull request, add k8s-neonvm to the list.\n          if [ \"$GITHUB_EVENT_NAME\" == \"pull_request\" ]; then\n            for f in $(gh api \"/repos/${GITHUB_REPOSITORY}/pulls/${PR_NUMBER}/files\" --paginate --jq '.[].filename'); do\n              case \"$f\" in\n                # List of directories that contain code which affect compute images.\n                #\n                # This isn't exhaustive, just the paths that are most directly compute-related.\n                # For example, compute_ctl also depends on libs/utils, but we don't trigger\n                # an e2e run on that.\n                vendor/*|pgxn/*|compute_tools/*|libs/vm_monitor/*|compute/compute-node.Dockerfile)\n                  platforms=$(echo \"${platforms}\" | jq --compact-output '. += [\"k8s-neonvm\"] | unique')\n                  ;;\n                *)\n                  # no-op\n                  ;;\n              esac\n            done\n          else\n            platforms=$(echo \"${platforms}\" | jq --compact-output '. += [\"k8s-neonvm\"] | unique')\n          fi\n\n          echo \"e2e-platforms=${platforms}\" | tee -a $GITHUB_OUTPUT\n\n      - name: Set PR's status to pending and request a remote CI test\n        env:\n          E2E_PLATFORMS: ${{ steps.e2e-platforms.outputs.e2e-platforms }}\n          COMMIT_SHA: ${{ github.event.pull_request.head.sha || github.sha }}\n          GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}\n        run: |\n          REMOTE_REPO=\"${GITHUB_REPOSITORY_OWNER}/cloud\"\n\n          gh api \"/repos/${GITHUB_REPOSITORY}/statuses/${COMMIT_SHA}\" \\\n            --method POST \\\n            --raw-field \"state=pending\" \\\n            --raw-field \"description=[$REMOTE_REPO] Remote CI job is about to start\" \\\n            --raw-field \"context=neon-cloud-e2e\"\n\n          gh workflow --repo ${REMOTE_REPO} \\\n            run testing.yml \\\n              --ref \"main\" \\\n              --raw-field \"ci_job_name=neon-cloud-e2e\" \\\n              --raw-field \"commit_hash=$COMMIT_SHA\" \\\n              --raw-field \"remote_repo=${GITHUB_REPOSITORY}\" \\\n              --raw-field \"storage_image_tag=${TAG}\" \\\n              --raw-field \"compute_image_tag=${COMPUTE_TAG}\" \\\n              --raw-field \"concurrency_group=${E2E_CONCURRENCY_GROUP}\" \\\n              --raw-field \"e2e-platforms=${E2E_PLATFORMS}\"\n"
  },
  {
    "path": ".gitignore",
    "content": "/artifact_cache\n/build\n/pg_install\n/target\n/tmp_check\n/tmp_check_cli\n__pycache__/\ntest_output/\nneon_previous/\n.vscode\n.idea\n*.swp\ntags\nneon.iml\n/.neon\n/integration_tests/.neon\ncompaction-suite-results.*\ndocker-compose/docker-compose-parallel.yml\n\n# Coverage\n*.profraw\n*.profdata\n\n*.key\n*.crt\n*.o\n*.so\n*.Po\n*.pid\n\n# pgindent typedef lists\n*.list\n\n# Node\n**/node_modules/\n\n# various files for local testing\n/proxy/.subzero\nlocal_proxy.json\n"
  },
  {
    "path": ".gitmodules",
    "content": "[submodule \"vendor/postgres-v14\"]\n\tpath = vendor/postgres-v14\n\turl = ../postgres.git\n\tbranch = REL_14_STABLE_neon\n[submodule \"vendor/postgres-v15\"]\n\tpath = vendor/postgres-v15\n\turl = ../postgres.git\n\tbranch = REL_15_STABLE_neon\n[submodule \"vendor/postgres-v16\"]\n\tpath = vendor/postgres-v16\n\turl = ../postgres.git\n\tbranch = REL_16_STABLE_neon\n[submodule \"vendor/postgres-v17\"]\n\tpath = vendor/postgres-v17\n\turl = ../postgres.git\n\tbranch = REL_17_STABLE_neon\n"
  },
  {
    "path": ".neon_clippy_args",
    "content": "# * `-A unknown_lints` – do not warn about unknown lint suppressions\n#                        that people with newer toolchains might use\n# * `-D warnings`      - fail on any warnings (`cargo` returns non-zero exit status)\n# * `-D clippy::todo`  - don't let `todo!()` slip into `main`\nexport CLIPPY_COMMON_ARGS=\"--locked --workspace --all-targets -- -A unknown_lints -D warnings -D clippy::todo\"\n"
  },
  {
    "path": "CODEOWNERS",
    "content": "# Autoscaling\n/libs/vm_monitor/ @neondatabase/autoscaling\n\n# DevProd & PerfCorr\n/.github/ @neondatabase/developer-productivity @neondatabase/performance-correctness\n\n# Compute\n/pgxn/ @neondatabase/compute\n/vendor/ @neondatabase/compute\n/compute/ @neondatabase/compute\n/compute_tools/ @neondatabase/compute\n\n# Proxy\n/libs/proxy/ @neondatabase/proxy\n/proxy/ @neondatabase/proxy\n\n# Storage\n/pageserver/ @neondatabase/storage\n/safekeeper/ @neondatabase/storage\n/storage_controller @neondatabase/storage\n/storage_scrubber @neondatabase/storage\n/libs/pageserver_api/ @neondatabase/storage\n/libs/remote_storage/ @neondatabase/storage\n/libs/safekeeper_api/ @neondatabase/storage\n\n# Shared\n/pgxn/neon/ @neondatabase/compute @neondatabase/storage\n/libs/compute_api/ @neondatabase/compute @neondatabase/control-plane\n/libs/postgres_ffi/ @neondatabase/compute @neondatabase/storage\n"
  },
  {
    "path": "CONTRIBUTING.md",
    "content": "# How to contribute\n\nHowdy! Usual good software engineering practices apply. Write\ntests. Write comments. Follow standard Rust coding practices where\npossible. Use `cargo fmt` and `cargo clippy` to tidy up formatting.\n\nThere are soft spots in the code, which could use cleanup,\nrefactoring, additional comments, and so forth. Let's try to raise the\nbar, and clean things up as we go. Try to leave code in a better shape\nthan it was before.\n\n## Pre-commit hook\n\nWe have a sample pre-commit hook in `pre-commit.py`.\nTo set it up, run:\n\n```bash\nln -s ../../pre-commit.py .git/hooks/pre-commit\n```\n\nThis will run following checks on staged files before each commit:\n- `rustfmt`\n- checks for Python files, see [obligatory checks](/docs/sourcetree.md#obligatory-checks).\n\nThere is also a separate script `./run_clippy.sh` that runs `cargo clippy` on the whole project\nand `./scripts/reformat` that runs all formatting tools to ensure the project is up to date.\n\nIf you want to skip the hook, run `git commit` with `--no-verify` option.\n\n## Submitting changes\n\n1. Get at least one +1 on your PR before you push.\n\n   For simple patches, it will only take a minute for someone to review\nit.\n\n2. Don't force push small changes after making the PR ready for review.\nDoing so will force readers to re-read your entire PR, which will delay\nthe review process.\n\n3. Always keep the CI green.\n\n   Do not push, if the CI failed on your PR. Even if you think it's not\nyour patch's fault. Help to fix the root cause if something else has\nbroken the CI, before pushing.\n\n*Happy Hacking!*\n\n# How to run a CI pipeline on Pull Requests from external contributors\n_An instruction for maintainers_\n\n## TL;DR:\n- Review the PR\n- If and only if it looks **safe** (i.e. it doesn't contain any malicious code which could expose secrets or harm the CI), then:\n    - Press the \"Approve and run\" button in GitHub UI\n    - Add the `approved-for-ci-run` label to the PR\n    - Currently draft PR will skip e2e test (only for internal contributors). After turning the PR 'Ready to Review' CI will trigger e2e test\n      - Add `run-e2e-tests-in-draft` label to run e2e test in draft PR (override above behaviour)\n      - The `approved-for-ci-run` workflow will add `run-e2e-tests-in-draft` automatically to run e2e test for external contributors\n\nRepeat all steps after any change to the PR.\n- When the changes are ready to get merged — merge the original PR (not the internal one)\n\n## Longer version:\n\nGitHub Actions triggered by the `pull_request` event don't share repository secrets with the forks (for security reasons).\nSo, passing the CI pipeline on Pull Requests from external contributors is impossible.\n\nWe're using the following approach to make it work:\n- After the review, assign the `approved-for-ci-run` label to the PR if changes look safe\n- A GitHub Action will create an internal branch and a new PR with the same changes (for example, for a PR `#1234`, it'll be a branch `ci-run/pr-1234`)\n- Because the PR is created from the internal branch, it is able to access repository secrets (that's why it's crucial to make sure that the PR doesn't contain any malicious code that could expose our secrets or intentionally harm the CI)\n- The label gets removed automatically, so to run CI again with new changes, the label should be added again (after the review)\n\nFor details see [`approved-for-ci-run.yml`](.github/workflows/approved-for-ci-run.yml)\n\n## How do I make build-tools image \"pinned\"\n\nIt's possible to update the `pinned` tag of the `build-tools` image using the `pin-build-tools-image.yml` workflow.\n\n```bash\ngh workflow -R neondatabase/neon run pin-build-tools-image.yml \\\n            -f from-tag=cc98d9b00d670f182c507ae3783342bd7e64c31e\n```\n"
  },
  {
    "path": "Cargo.toml",
    "content": "[workspace]\nresolver = \"2\"\nmembers = [\n    \"compute_tools\",\n    \"control_plane\",\n    \"control_plane/storcon_cli\",\n    \"pageserver\",\n    \"pageserver/compaction\",\n    \"pageserver/ctl\",\n    \"pageserver/client\",\n    \"pageserver/client_grpc\",\n    \"pageserver/pagebench\",\n    \"pageserver/page_api\",\n    \"proxy\",\n    \"safekeeper\",\n    \"safekeeper/client\",\n    \"storage_broker\",\n    \"storage_controller\",\n    \"storage_controller/client\",\n    \"storage_scrubber\",\n    \"workspace_hack\",\n    \"libs/compute_api\",\n    \"libs/http-utils\",\n    \"libs/pageserver_api\",\n    \"libs/postgres_ffi\",\n    \"libs/postgres_ffi_types\",\n    \"libs/postgres_versioninfo\",\n    \"libs/safekeeper_api\",\n    \"libs/desim\",\n    \"libs/neon-shmem\",\n    \"libs/utils\",\n    \"libs/consumption_metrics\",\n    \"libs/postgres_backend\",\n    \"libs/posthog_client_lite\",\n    \"libs/pq_proto\",\n    \"libs/tenant_size_model\",\n    \"libs/metrics\",\n    \"libs/postgres_connection\",\n    \"libs/remote_storage\",\n    \"libs/tracing-utils\",\n    \"libs/postgres_ffi/wal_craft\",\n    \"libs/vm_monitor\",\n    \"libs/walproposer\",\n    \"libs/wal_decoder\",\n    \"libs/postgres_initdb\",\n    \"libs/proxy/json\",\n    \"libs/proxy/postgres-protocol2\",\n    \"libs/proxy/postgres-types2\",\n    \"libs/proxy/subzero_core\",\n    \"libs/proxy/tokio-postgres2\",\n    \"endpoint_storage\",\n    \"pgxn/neon/communicator\",\n]\n\n[workspace.package]\nedition = \"2024\"\nlicense = \"Apache-2.0\"\n\n## All dependency versions, used in the project\n[workspace.dependencies]\nahash = \"0.8\"\nanyhow = { version = \"1.0\", features = [\"backtrace\"] }\narc-swap = \"1.7\"\nasync-compression = { version = \"0.4.0\", features = [\"tokio\", \"gzip\", \"zstd\"] }\natomic-take = \"1.1.0\"\nflate2 = \"1.0.26\"\nassert-json-diff = \"2\"\nasync-stream = \"0.3\"\nasync-trait = \"0.1\"\naws-config = { version = \"1.5\", default-features = false, features=[\"rustls\", \"sso\"] }\naws-sdk-s3 = \"1.52\"\naws-sdk-iam = \"1.46.0\"\naws-sdk-kms = \"1.47.0\"\naws-smithy-async = { version = \"1.2.1\", default-features = false, features=[\"rt-tokio\"] }\naws-smithy-types = \"1.2\"\naws-credential-types = \"1.2.0\"\naws-sigv4 = { version = \"1.2\", features = [\"sign-http\"] }\naws-types = \"1.3\"\naxum = { version = \"0.8.1\", features = [\"ws\"] }\naxum-extra = { version = \"0.10.0\", features = [\"typed-header\", \"query\"] }\nbase64 = \"0.22\"\nbincode = \"1.3\"\nbindgen = \"0.71\"\nbit_field = \"0.10.2\"\nbstr = \"1.0\"\nbyteorder = \"1.4\"\nbytes = \"1.9\"\ncamino = \"1.1.6\"\ncfg-if = \"1.0.0\"\ncron = \"0.15\"\nchrono = { version = \"0.4\", default-features = false, features = [\"clock\"] }\nclap = { version = \"4.0\", features = [\"derive\", \"env\"] }\nclashmap = { version = \"1.0\", features = [\"raw-api\"] }\ncomfy-table = \"7.1\"\nconst_format = \"0.2\"\ncrc32c = \"0.6\"\ndiatomic-waker = { version = \"0.2.3\" }\neither = \"1.8\"\nenum-map = \"2.4.2\"\nenumset = \"1.0.12\"\nfail = \"0.5.0\"\nfallible-iterator = \"0.2\"\nframed-websockets = { version = \"0.1.0\", git = \"https://github.com/neondatabase/framed-websockets\" }\nfutures = \"0.3\"\nfutures-core = \"0.3\"\nfutures-util = \"0.3\"\ngit-version = \"0.3\"\ngovernor = \"0.8\"\nhashbrown = \"0.14\"\nhashlink = \"0.9.1\"\nhdrhistogram = \"7.5.2\"\nhex = \"0.4\"\nhex-literal = \"0.4\"\nhmac = \"0.12.1\"\nhostname = \"0.4\"\nhttp = {version = \"1.1.0\", features = [\"std\"]}\nhttp-types = { version = \"2\", default-features = false }\nhttp-body-util = \"0.1.2\"\nhumantime = \"2.2\"\nhumantime-serde = \"1.1.1\"\nhyper0 = { package = \"hyper\", version = \"0.14\" }\nhyper = \"1.4\"\nhyper-util = \"0.1\"\ntokio-tungstenite = \"0.21.0\"\nindexmap = { version = \"2\", features = [\"serde\"] }\nindoc = \"2\"\nipnet = \"2.10.0\"\nitertools = \"0.10\"\nitoa = \"1.0.11\"\njemalloc_pprof = { version = \"0.7\", features = [\"symbolize\", \"flamegraph\"] }\njsonwebtoken = \"9\"\nlasso = \"0.7\"\nlibc = \"0.2\"\nlock_api = \"0.4.13\"\nmd5 = \"0.7.0\"\nmeasured = { version = \"0.0.22\", features=[\"lasso\"] }\nmeasured-process = { version = \"0.0.22\" }\nmoka = { version = \"0.12\", features = [\"sync\"] }\nnix = { version = \"0.30.1\", features = [\"dir\", \"fs\", \"mman\", \"process\", \"socket\", \"signal\", \"poll\"] }\n# Do not update to >= 7.0.0, at least. The update will have a significant impact\n# on compute startup metrics (start_postgres_ms), >= 25% degradation.\nnotify = \"6.0.0\"\nnum_cpus = \"1.15\"\nnum-traits = \"0.2.19\"\nonce_cell = \"1.13\"\nopentelemetry = \"0.30\"\nopentelemetry_sdk = \"0.30\"\nopentelemetry-otlp = { version = \"0.30\", default-features = false, features = [\"http-proto\", \"trace\", \"http\", \"reqwest-blocking-client\"] }\nopentelemetry-semantic-conventions = \"0.30\"\nparking_lot = \"0.12\"\nparquet = { version = \"53\", default-features = false, features = [\"zstd\"] }\nparquet_derive = \"53\"\npbkdf2 = { version = \"0.12.1\", features = [\"simple\", \"std\"] }\npem = \"3.0.3\"\npin-project-lite = \"0.2\"\npprof = { version = \"0.14\", features = [\"criterion\", \"flamegraph\", \"frame-pointer\", \"prost-codec\"] }\nprocfs = \"0.16\"\nprometheus = {version = \"0.13\", default-features=false, features = [\"process\"]} # removes protobuf dependency\nprost = \"0.13.5\"\nprost-types = \"0.13.5\"\nrand = \"0.9\"\n# Remove after p256 is updated to 0.14.\nrand_core = \"=0.6\"\nredis = { version = \"0.29.2\", features = [\"tokio-rustls-comp\", \"keep-alive\"] }\nregex = \"1.10.2\"\nreqwest = { version = \"0.12\", default-features = false, features = [\"rustls-tls\"] }\nreqwest-tracing = { version = \"0.5\", features = [\"opentelemetry_0_30\"] }\nreqwest-middleware = \"0.4\"\nreqwest-retry = \"0.7\"\nrouterify = \"3\"\nrpds = \"0.13\"\nrustc-hash = \"2.1.1\"\nrustls = { version = \"0.23.16\", default-features = false }\nrustls-pemfile = \"2\"\nrustls-pki-types = \"1.11\"\nscopeguard = \"1.1\"\nsysinfo = \"0.29.2\"\nsd-notify = \"0.4.1\"\nsend-future = \"0.1.0\"\nsentry = { version = \"0.37\", default-features = false, features = [\"backtrace\", \"contexts\", \"panic\", \"rustls\", \"reqwest\" ] }\nserde = { version = \"1.0\", features = [\"derive\"] }\nserde_json = \"1\"\nserde_path_to_error = \"0.1\"\nserde_with = { version = \"3\", features = [ \"base64\" ] }\nserde_assert = \"0.5.0\"\nserde_repr = \"0.1.20\"\nsha2 = \"0.10.2\"\nsignal-hook = \"0.3\"\nsmallvec = \"1.11\"\nsmol_str = { version = \"0.2.0\", features = [\"serde\"] }\nsocket2 = \"0.5\"\nspki = \"0.7.3\"\nstrum = \"0.26\"\nstrum_macros = \"0.26\"\n\"subtle\"  = \"2.5.0\"\nsvg_fmt = \"0.4.3\"\nsync_wrapper = \"0.1.2\"\ntar = \"0.4\"\ntest-context = \"0.3\"\nthiserror = \"1.0\"\ntikv-jemallocator = { version = \"0.6\", features = [\"profiling\", \"stats\", \"unprefixed_malloc_on_supported_platforms\"] }\ntikv-jemalloc-ctl = { version = \"0.6\", features = [\"stats\"] }\ntokio = { version = \"1.43.1\", features = [\"macros\"] }\ntokio-epoll-uring = { git = \"https://github.com/neondatabase/tokio-epoll-uring.git\" , branch = \"main\" }\ntokio-io-timeout = \"1.2.0\"\ntokio-postgres-rustls = \"0.12.0\"\ntokio-rustls = { version = \"0.26.0\", default-features = false, features = [\"tls12\", \"ring\"]}\ntokio-stream = { version = \"0.1\", features = [\"sync\"] }\ntokio-tar = \"0.3\"\ntokio-util = { version = \"0.7.10\", features = [\"io\", \"io-util\", \"rt\"] }\ntoml = \"0.8\"\ntoml_edit = \"0.22\"\ntonic = { version = \"0.13.1\", default-features = false, features = [\"channel\", \"codegen\", \"gzip\", \"prost\", \"router\", \"server\", \"tls-ring\", \"tls-native-roots\", \"zstd\"] }\ntonic-reflection = { version = \"0.13.1\", features = [\"server\"] }\ntower = { version = \"0.5.2\", default-features = false }\ntower-http = { version = \"0.6.2\", features = [\"auth\", \"request-id\", \"trace\"] }\ntower-otel = { version = \"0.6\", features = [\"axum\"] }\ntower-service = \"0.3.3\"\ntracing = \"0.1\"\ntracing-error = \"0.2\"\ntracing-log = \"0.2\"\ntracing-opentelemetry = \"0.31\"\ntracing-serde = \"0.2.0\"\ntracing-subscriber = { version = \"0.3\", default-features = false, features = [\"smallvec\", \"fmt\", \"tracing-log\", \"std\", \"env-filter\", \"json\"] }\ntracing-appender = \"0.2.3\"\ntry-lock = \"0.2.5\"\ntest-log = { version = \"0.2.17\", default-features = false, features = [\"log\"] }\ntwox-hash = { version = \"1.6.3\", default-features = false }\ntyped-json = \"0.1\"\nurl = \"2.2\"\nurlencoding = \"2.1\"\nuuid = { version = \"1.6.1\", features = [\"v4\", \"v7\", \"serde\"] }\nwalkdir = \"2.3.2\"\nrustls-native-certs = \"0.8\"\nwhoami = \"1.5.1\"\njson-structural-diff = { version = \"0.2.0\" }\nx509-cert = { version = \"0.2.5\" }\nzerocopy = { version = \"0.8\", features = [\"derive\", \"simd\"] }\nzeroize = \"1.8\"\n\n## TODO replace this with tracing\nenv_logger = \"0.11\"\nlog = \"0.4\"\n\n## Libraries from neondatabase/ git forks, ideally with changes to be upstreamed\npostgres = { git = \"https://github.com/neondatabase/rust-postgres.git\", branch = \"neon\" }\npostgres-protocol = { git = \"https://github.com/neondatabase/rust-postgres.git\", branch = \"neon\" }\npostgres-types = { git = \"https://github.com/neondatabase/rust-postgres.git\", branch = \"neon\" }\ntokio-postgres = { git = \"https://github.com/neondatabase/rust-postgres.git\", branch = \"neon\" }\n\n## Azure SDK crates\nazure_core = { git = \"https://github.com/neondatabase/azure-sdk-for-rust.git\", branch = \"neon\", default-features = false, features = [\"enable_reqwest_rustls\", \"hmac_rust\"] }\nazure_identity = { git = \"https://github.com/neondatabase/azure-sdk-for-rust.git\", branch = \"neon\", default-features = false, features = [\"enable_reqwest_rustls\"] }\nazure_storage = { git = \"https://github.com/neondatabase/azure-sdk-for-rust.git\", branch = \"neon\", default-features = false, features = [\"enable_reqwest_rustls\"] }\nazure_storage_blobs = { git = \"https://github.com/neondatabase/azure-sdk-for-rust.git\", branch = \"neon\", default-features = false, features = [\"enable_reqwest_rustls\"] }\n\n## Local libraries\ncompute_api = { version = \"0.1\", path = \"./libs/compute_api/\" }\nconsumption_metrics = { version = \"0.1\", path = \"./libs/consumption_metrics/\" }\ndesim = { version = \"0.1\", path = \"./libs/desim\" }\nendpoint_storage = { version = \"0.0.1\", path = \"./endpoint_storage/\" }\nhttp-utils = { version = \"0.1\", path = \"./libs/http-utils/\" }\nmetrics = { version = \"0.1\", path = \"./libs/metrics/\" }\nneon-shmem = { version = \"0.1\", path = \"./libs/neon-shmem/\" }\npageserver = { path = \"./pageserver\" }\npageserver_api = { version = \"0.1\", path = \"./libs/pageserver_api/\" }\npageserver_client = { path = \"./pageserver/client\" }\npageserver_client_grpc = { path = \"./pageserver/client_grpc\" }\npageserver_compaction = { version = \"0.1\", path = \"./pageserver/compaction/\" }\npageserver_page_api = { path = \"./pageserver/page_api\" }\npostgres_backend = { version = \"0.1\", path = \"./libs/postgres_backend/\" }\npostgres_connection = { version = \"0.1\", path = \"./libs/postgres_connection/\" }\npostgres_ffi = { version = \"0.1\", path = \"./libs/postgres_ffi/\" }\npostgres_ffi_types = { version = \"0.1\", path = \"./libs/postgres_ffi_types/\" }\npostgres_versioninfo = { version = \"0.1\", path = \"./libs/postgres_versioninfo/\" }\npostgres_initdb = { path = \"./libs/postgres_initdb\" }\nposthog_client_lite = { version = \"0.1\", path = \"./libs/posthog_client_lite\" }\npq_proto = { version = \"0.1\", path = \"./libs/pq_proto/\" }\nremote_storage = { version = \"0.1\", path = \"./libs/remote_storage/\" }\nsafekeeper_api = { version = \"0.1\", path = \"./libs/safekeeper_api\" }\nsafekeeper_client = { path = \"./safekeeper/client\" }\nstorage_broker = { version = \"0.1\", path = \"./storage_broker/\" } # Note: main broker code is inside the binary crate, so linking with the library shouldn't be heavy.\nstorage_controller_client = { path = \"./storage_controller/client\" }\ntenant_size_model = { version = \"0.1\", path = \"./libs/tenant_size_model/\" }\ntracing-utils = { version = \"0.1\", path = \"./libs/tracing-utils/\" }\nutils = { version = \"0.1\", path = \"./libs/utils/\" }\nvm_monitor = { version = \"0.1\", path = \"./libs/vm_monitor/\" }\nwal_decoder = { version = \"0.1\", path = \"./libs/wal_decoder\" }\nwalproposer = { version = \"0.1\", path = \"./libs/walproposer/\" }\n\n## Common library dependency\nworkspace_hack = { version = \"0.1\", path = \"./workspace_hack/\" }\n\n## Build dependencies\ncbindgen = \"0.29.0\"\ncriterion = \"0.5.1\"\nrcgen = \"0.13\"\nrstest = \"0.18\"\ncamino-tempfile = \"1.0.2\"\ntonic-build = \"0.13.1\"\n\n[patch.crates-io]\n\n# Needed to get `tokio-postgres-rustls` to depend on our fork.\ntokio-postgres = { git = \"https://github.com/neondatabase/rust-postgres.git\", branch = \"neon\" }\n\n################# Binary contents sections\n\n[profile.release]\n# This is useful for profiling and, to some extent, debug.\n# Besides, debug info should not affect the performance.\n#\n# NB: we also enable frame pointers for improved profiling, see .cargo/config.toml.\ndebug = true\n\n# disable debug symbols for all packages except this one to decrease binaries size\n[profile.release.package.\"*\"]\ndebug = false\n\n[profile.release-line-debug]\ninherits = \"release\"\ndebug = 1 # true = 2 = all symbols, 1 = line only\n[profile.release-line-debug-lto]\ninherits = \"release\"\ndebug = 1 # true = 2 = all symbols, 1 = line only\nlto = true\n\n[profile.release-line-debug-size]\ninherits = \"release\"\ndebug = 1 # true = 2 = all symbols, 1 = line only\nopt-level = \"s\"\n[profile.release-line-debug-zize]\ninherits = \"release\"\ndebug = 1 # true = 2 = all symbols, 1 = line only\nopt-level = \"z\"\n[profile.release-line-debug-size-lto]\ninherits = \"release\"\ndebug = 1 # true = 2 = all symbols, 1 = line only\nopt-level = \"s\"\nlto = true\n[profile.release-line-debug-zize-lto]\ninherits = \"release\"\ndebug = 1 # true = 2 = all symbols, 1 = line only\nopt-level = \"z\"\nlto = true\n\n[profile.release-no-debug]\ninherits = \"release\"\ndebug = false # true = 2 = all symbols, 1 = line only\n\n[profile.release-no-debug-size]\ninherits = \"release\"\ndebug = false # true = 2 = all symbols, 1 = line only\nopt-level = \"s\"\n[profile.release-no-debug-zize]\ninherits = \"release\"\ndebug = false # true = 2 = all symbols, 1 = line only\nopt-level = \"z\"\n\n[profile.release-no-debug-size-lto]\ninherits = \"release\"\ndebug = false # true = 2 = all symbols, 1 = line only\nopt-level = \"s\"\nlto = true\n\n[profile.release-no-debug-zize-lto]\ninherits = \"release\"\ndebug = false # true = 2 = all symbols, 1 = line only\nopt-level = \"z\"\nlto = true\n"
  },
  {
    "path": "Dockerfile",
    "content": "### Creates a storage Docker image with postgres, pageserver, safekeeper and proxy binaries.\n### The image itself is mainly used as a container for the binaries and for starting e2e tests with custom parameters.\n### By default, the binaries inside the image have some mock parameters and can start, but are not intended to be used\n### inside this image in the real deployments.\nARG REPOSITORY=ghcr.io/neondatabase\nARG IMAGE=build-tools\nARG TAG=pinned\nARG DEBIAN_VERSION=bookworm\nARG DEBIAN_FLAVOR=${DEBIAN_VERSION}-slim\n\n# Here are the INDEX DIGESTS for the images we use.\n# You can get them following next steps for now:\n# 1. Get an authentication token from DockerHub:\n#    TOKEN=$(curl -s \"https://auth.docker.io/token?service=registry.docker.io&scope=repository:library/debian:pull\" | jq -r .token)\n# 2. Using that token, query index for the given tag:\n#    curl -s -H \"Authorization: Bearer $TOKEN\" \\\n#       -H \"Accept: application/vnd.docker.distribution.manifest.list.v2+json\" \\\n#       \"https://registry.hub.docker.com/v2/library/debian/manifests/bullseye-slim\" \\\n#       -I | grep -i docker-content-digest\n# 3. As a next step, TODO(fedordikarev): create script and schedule workflow to run these checks\n#    and updates on regular bases and in automated way.\nARG BOOKWORM_SLIM_SHA=sha256:40b107342c492725bc7aacbe93a49945445191ae364184a6d24fedb28172f6f7\nARG BULLSEYE_SLIM_SHA=sha256:e831d9a884d63734fe3dd9c491ed9a5a3d4c6a6d32c5b14f2067357c49b0b7e1\n\n# Here we use ${var/search/replace} syntax, to check\n# if base image is one of the images, we pin image index for.\n# If var will match one the known images, we will replace it with the known sha.\n# If no match, than value will be unaffected, and will process with no-pinned image.\nARG BASE_IMAGE_SHA=debian:${DEBIAN_FLAVOR}\nARG BASE_IMAGE_SHA=${BASE_IMAGE_SHA/debian:bookworm-slim/debian@$BOOKWORM_SLIM_SHA}\nARG BASE_IMAGE_SHA=${BASE_IMAGE_SHA/debian:bullseye-slim/debian@$BULLSEYE_SLIM_SHA}\n\n# Naive way:\n#\n# 1. COPY . .\n# 1. make neon-pg-ext\n# 2. cargo build <storage binaries>\n#\n# But to enable docker to cache intermediate layers, we perform a few preparatory steps:\n#\n# - Build all postgres versions, depending on just the contents of vendor/\n# - Use cargo chef to build all rust dependencies\n\n# 1. Build all postgres versions\nFROM $REPOSITORY/$IMAGE:$TAG AS pg-build\nWORKDIR /home/nonroot\n\nCOPY --chown=nonroot vendor/postgres-v14 vendor/postgres-v14\nCOPY --chown=nonroot vendor/postgres-v15 vendor/postgres-v15\nCOPY --chown=nonroot vendor/postgres-v16 vendor/postgres-v16\nCOPY --chown=nonroot vendor/postgres-v17 vendor/postgres-v17\nCOPY --chown=nonroot Makefile Makefile\nCOPY --chown=nonroot postgres.mk postgres.mk\nCOPY --chown=nonroot scripts/ninstall.sh scripts/ninstall.sh\n\nENV BUILD_TYPE=release\nRUN set -e \\\n    && mold -run make -j $(nproc) -s postgres\n\n# 2. Prepare cargo-chef recipe\nFROM $REPOSITORY/$IMAGE:$TAG AS plan\nWORKDIR /home/nonroot\n\nCOPY --chown=nonroot . .\n\nRUN --mount=type=secret,uid=1000,id=SUBZERO_ACCESS_TOKEN \\\n    set -e \\\n    && if [ -s /run/secrets/SUBZERO_ACCESS_TOKEN ]; then \\\n        export CARGO_NET_GIT_FETCH_WITH_CLI=true && \\\n        git config --global url.\"https://$(cat /run/secrets/SUBZERO_ACCESS_TOKEN)@github.com/neondatabase/subzero\".insteadOf \"https://github.com/neondatabase/subzero\" && \\\n        cargo add -p proxy subzero-core --git https://github.com/neondatabase/subzero --rev 396264617e78e8be428682f87469bb25429af88a; \\\n    fi \\\n    && cargo chef prepare --recipe-path recipe.json\n\n# Main build image\nFROM $REPOSITORY/$IMAGE:$TAG AS build\nWORKDIR /home/nonroot\nARG GIT_VERSION=local\nARG BUILD_TAG\nARG ADDITIONAL_RUSTFLAGS=\"\"\nARG IO_ALIGNMENT=512\nENV CARGO_FEATURES=\"default\"\n\n# 3. Build cargo dependencies. Note that this step doesn't depend on anything else than\n# `recipe.json`, so the layer can be reused as long as none of the dependencies change.\nCOPY --from=plan     /home/nonroot/recipe.json                              recipe.json\nRUN --mount=type=secret,uid=1000,id=SUBZERO_ACCESS_TOKEN \\\n    set -e \\\n    && if [ -s /run/secrets/SUBZERO_ACCESS_TOKEN ]; then \\\n        export CARGO_NET_GIT_FETCH_WITH_CLI=true && \\\n        git config --global url.\"https://$(cat /run/secrets/SUBZERO_ACCESS_TOKEN)@github.com/neondatabase/subzero\".insteadOf \"https://github.com/neondatabase/subzero\"; \\\n    fi \\\n    && RUSTFLAGS=\"-Clinker=clang -Clink-arg=-fuse-ld=mold -Clink-arg=-Wl,--no-rosegment -Cforce-frame-pointers=yes ${ADDITIONAL_RUSTFLAGS}\" cargo chef cook --locked --release --recipe-path recipe.json\n\n# Perform the main build. We reuse the Postgres build artifacts from the intermediate 'pg-build'\n# layer, and the cargo dependencies built in the previous step.\nCOPY --chown=nonroot --from=pg-build /home/nonroot/pg_install/ pg_install\nCOPY --chown=nonroot . .\nCOPY --chown=nonroot --from=plan     /home/nonroot/proxy/Cargo.toml         proxy/Cargo.toml\nCOPY --chown=nonroot --from=plan     /home/nonroot/Cargo.lock               Cargo.lock\n\nRUN  --mount=type=secret,uid=1000,id=SUBZERO_ACCESS_TOKEN \\\n    set -e \\\n    && if [ -s /run/secrets/SUBZERO_ACCESS_TOKEN ]; then \\\n        export CARGO_FEATURES=\"${CARGO_FEATURES},rest_broker\"; \\\n    fi \\\n    && if [ \"$IO_ALIGNMENT\" = \"4k\" ]; then \\\n        export CARGO_FEATURES=\"${CARGO_FEATURES},io-align-4k\"; \\\n    elif [ \"$IO_ALIGNMENT\" = \"512\" ]; then \\\n        export CARGO_FEATURES=\"${CARGO_FEATURES},io-align-512\"; \\\n    fi \\\n    && RUSTFLAGS=\"-Clinker=clang -Clink-arg=-fuse-ld=mold -Clink-arg=-Wl,--no-rosegment -Cforce-frame-pointers=yes ${ADDITIONAL_RUSTFLAGS}\" cargo auditable build \\\n      --features $CARGO_FEATURES \\\n      --bin pg_sni_router  \\\n      --bin pageserver  \\\n      --bin pagectl  \\\n      --bin safekeeper  \\\n      --bin storage_broker  \\\n      --bin storage_controller  \\\n      --bin proxy  \\\n      --bin endpoint_storage \\\n      --bin neon_local \\\n      --bin storage_scrubber \\\n      --locked --release \\\n    && mold -run make -j $(nproc) -s neon-pg-ext\n\n# Assemble the final image\nFROM $BASE_IMAGE_SHA\nWORKDIR /data\n\nRUN set -e \\\n    && echo 'Acquire::Retries \"5\";' > /etc/apt/apt.conf.d/80-retries \\\n    && apt update \\\n    && apt install -y \\\n        libreadline-dev \\\n        libseccomp-dev \\\n        ca-certificates \\\n        openssl \\\n        unzip \\\n        curl \\\n    && ARCH=$(uname -m) \\\n    && if [ \"$ARCH\" = \"x86_64\" ]; then \\\n        curl \"https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip\" -o \"awscliv2.zip\"; \\\n    elif [ \"$ARCH\" = \"aarch64\" ]; then \\\n        curl \"https://awscli.amazonaws.com/awscli-exe-linux-aarch64.zip\" -o \"awscliv2.zip\"; \\\n    else \\\n        echo \"Unsupported architecture: $ARCH\" && exit 1; \\\n    fi \\\n    && unzip awscliv2.zip \\\n    && ./aws/install \\\n    && rm -rf aws awscliv2.zip \\\n    && rm -f /etc/apt/apt.conf.d/80-retries \\\n    && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* \\\n    && useradd -d /data neon \\\n    && chown -R neon:neon /data\n\nCOPY --from=build --chown=neon:neon /home/nonroot/target/release/pg_sni_router       /usr/local/bin\nCOPY --from=build --chown=neon:neon /home/nonroot/target/release/pageserver          /usr/local/bin\nCOPY --from=build --chown=neon:neon /home/nonroot/target/release/pagectl             /usr/local/bin\nCOPY --from=build --chown=neon:neon /home/nonroot/target/release/safekeeper          /usr/local/bin\nCOPY --from=build --chown=neon:neon /home/nonroot/target/release/storage_broker      /usr/local/bin\nCOPY --from=build --chown=neon:neon /home/nonroot/target/release/storage_controller  /usr/local/bin\nCOPY --from=build --chown=neon:neon /home/nonroot/target/release/proxy               /usr/local/bin\nCOPY --from=build --chown=neon:neon /home/nonroot/target/release/endpoint_storage    /usr/local/bin\nCOPY --from=build --chown=neon:neon /home/nonroot/target/release/neon_local          /usr/local/bin\nCOPY --from=build --chown=neon:neon /home/nonroot/target/release/storage_scrubber    /usr/local/bin\nCOPY --from=build /home/nonroot/pg_install/v14 /usr/local/v14/\nCOPY --from=build /home/nonroot/pg_install/v15 /usr/local/v15/\nCOPY --from=build /home/nonroot/pg_install/v16 /usr/local/v16/\nCOPY --from=build /home/nonroot/pg_install/v17 /usr/local/v17/\n\n# Deprecated: Old deployment scripts use this tarball which contains all the Postgres binaries.\n# That's obsolete, since all the same files are also present under /usr/local/v*. But to keep the\n# old scripts working for now, create the tarball.\nRUN tar -C /usr/local -cvzf /data/postgres_install.tar.gz v14 v15 v16 v17\n\n# By default, pageserver uses `.neon/` working directory in WORKDIR, so create one and fill it with the dummy config.\n# Now, when `docker run ... pageserver` is run, it can start without errors, yet will have some default dummy values.\nRUN mkdir -p /data/.neon/ && \\\n  echo \"id=1234\" > \"/data/.neon/identity.toml\" && \\\n  echo \"broker_endpoint='http://storage_broker:50051'\\n\" \\\n       \"pg_distrib_dir='/usr/local/'\\n\" \\\n       \"listen_pg_addr='0.0.0.0:6400'\\n\" \\\n       \"listen_http_addr='0.0.0.0:9898'\\n\" \\\n       \"availability_zone='local'\\n\" \\\n  > /data/.neon/pageserver.toml && \\\n  chown -R neon:neon /data/.neon\n\nVOLUME [\"/data\"]\nUSER neon\nEXPOSE 6400\nEXPOSE 9898\n\nCMD [\"/usr/local/bin/pageserver\", \"-D\", \"/data/.neon\"]\n"
  },
  {
    "path": "LICENSE",
    "content": "\n                                 Apache License\n                           Version 2.0, January 2004\n                        http://www.apache.org/licenses/\n\n   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION\n\n   1. Definitions.\n\n      \"License\" shall mean the terms and conditions for use, reproduction,\n      and distribution as defined by Sections 1 through 9 of this document.\n\n      \"Licensor\" shall mean the copyright owner or entity authorized by\n      the copyright owner that is granting the License.\n\n      \"Legal Entity\" shall mean the union of the acting entity and all\n      other entities that control, are controlled by, or are under common\n      control with that entity. For the purposes of this definition,\n      \"control\" means (i) the power, direct or indirect, to cause the\n      direction or management of such entity, whether by contract or\n      otherwise, or (ii) ownership of fifty percent (50%) or more of the\n      outstanding shares, or (iii) beneficial ownership of such entity.\n\n      \"You\" (or \"Your\") shall mean an individual or Legal Entity\n      exercising permissions granted by this License.\n\n      \"Source\" form shall mean the preferred form for making modifications,\n      including but not limited to software source code, documentation\n      source, and configuration files.\n\n      \"Object\" form shall mean any form resulting from mechanical\n      transformation or translation of a Source form, including but\n      not limited to compiled object code, generated documentation,\n      and conversions to other media types.\n\n      \"Work\" shall mean the work of authorship, whether in Source or\n      Object form, made available under the License, as indicated by a\n      copyright notice that is included in or attached to the work\n      (an example is provided in the Appendix below).\n\n      \"Derivative Works\" shall mean any work, whether in Source or Object\n      form, that is based on (or derived from) the Work and for which the\n      editorial revisions, annotations, elaborations, or other modifications\n      represent, as a whole, an original work of authorship. For the purposes\n      of this License, Derivative Works shall not include works that remain\n      separable from, or merely link (or bind by name) to the interfaces of,\n      the Work and Derivative Works thereof.\n\n      \"Contribution\" shall mean any work of authorship, including\n      the original version of the Work and any modifications or additions\n      to that Work or Derivative Works thereof, that is intentionally\n      submitted to Licensor for inclusion in the Work by the copyright owner\n      or by an individual or Legal Entity authorized to submit on behalf of\n      the copyright owner. For the purposes of this definition, \"submitted\"\n      means any form of electronic, verbal, or written communication sent\n      to the Licensor or its representatives, including but not limited to\n      communication on electronic mailing lists, source code control systems,\n      and issue tracking systems that are managed by, or on behalf of, the\n      Licensor for the purpose of discussing and improving the Work, but\n      excluding communication that is conspicuously marked or otherwise\n      designated in writing by the copyright owner as \"Not a Contribution.\"\n\n      \"Contributor\" shall mean Licensor and any individual or Legal Entity\n      on behalf of whom a Contribution has been received by Licensor and\n      subsequently incorporated within the Work.\n\n   2. Grant of Copyright License. Subject to the terms and conditions of\n      this License, each Contributor hereby grants to You a perpetual,\n      worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n      copyright license to reproduce, prepare Derivative Works of,\n      publicly display, publicly perform, sublicense, and distribute the\n      Work and such Derivative Works in Source or Object form.\n\n   3. Grant of Patent License. Subject to the terms and conditions of\n      this License, each Contributor hereby grants to You a perpetual,\n      worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n      (except as stated in this section) patent license to make, have made,\n      use, offer to sell, sell, import, and otherwise transfer the Work,\n      where such license applies only to those patent claims licensable\n      by such Contributor that are necessarily infringed by their\n      Contribution(s) alone or by combination of their Contribution(s)\n      with the Work to which such Contribution(s) was submitted. If You\n      institute patent litigation against any entity (including a\n      cross-claim or counterclaim in a lawsuit) alleging that the Work\n      or a Contribution incorporated within the Work constitutes direct\n      or contributory patent infringement, then any patent licenses\n      granted to You under this License for that Work shall terminate\n      as of the date such litigation is filed.\n\n   4. Redistribution. You may reproduce and distribute copies of the\n      Work or Derivative Works thereof in any medium, with or without\n      modifications, and in Source or Object form, provided that You\n      meet the following conditions:\n\n      (a) You must give any other recipients of the Work or\n          Derivative Works a copy of this License; and\n\n      (b) You must cause any modified files to carry prominent notices\n          stating that You changed the files; and\n\n      (c) You must retain, in the Source form of any Derivative Works\n          that You distribute, all copyright, patent, trademark, and\n          attribution notices from the Source form of the Work,\n          excluding those notices that do not pertain to any part of\n          the Derivative Works; and\n\n      (d) If the Work includes a \"NOTICE\" text file as part of its\n          distribution, then any Derivative Works that You distribute must\n          include a readable copy of the attribution notices contained\n          within such NOTICE file, excluding those notices that do not\n          pertain to any part of the Derivative Works, in at least one\n          of the following places: within a NOTICE text file distributed\n          as part of the Derivative Works; within the Source form or\n          documentation, if provided along with the Derivative Works; or,\n          within a display generated by the Derivative Works, if and\n          wherever such third-party notices normally appear. The contents\n          of the NOTICE file are for informational purposes only and\n          do not modify the License. You may add Your own attribution\n          notices within Derivative Works that You distribute, alongside\n          or as an addendum to the NOTICE text from the Work, provided\n          that such additional attribution notices cannot be construed\n          as modifying the License.\n\n      You may add Your own copyright statement to Your modifications and\n      may provide additional or different license terms and conditions\n      for use, reproduction, or distribution of Your modifications, or\n      for any such Derivative Works as a whole, provided Your use,\n      reproduction, and distribution of the Work otherwise complies with\n      the conditions stated in this License.\n\n   5. Submission of Contributions. Unless You explicitly state otherwise,\n      any Contribution intentionally submitted for inclusion in the Work\n      by You to the Licensor shall be under the terms and conditions of\n      this License, without any additional terms or conditions.\n      Notwithstanding the above, nothing herein shall supersede or modify\n      the terms of any separate license agreement you may have executed\n      with Licensor regarding such Contributions.\n\n   6. Trademarks. This License does not grant permission to use the trade\n      names, trademarks, service marks, or product names of the Licensor,\n      except as required for reasonable and customary use in describing the\n      origin of the Work and reproducing the content of the NOTICE file.\n\n   7. Disclaimer of Warranty. Unless required by applicable law or\n      agreed to in writing, Licensor provides the Work (and each\n      Contributor provides its Contributions) on an \"AS IS\" BASIS,\n      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or\n      implied, including, without limitation, any warranties or conditions\n      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A\n      PARTICULAR PURPOSE. You are solely responsible for determining the\n      appropriateness of using or redistributing the Work and assume any\n      risks associated with Your exercise of permissions under this License.\n\n   8. Limitation of Liability. In no event and under no legal theory,\n      whether in tort (including negligence), contract, or otherwise,\n      unless required by applicable law (such as deliberate and grossly\n      negligent acts) or agreed to in writing, shall any Contributor be\n      liable to You for damages, including any direct, indirect, special,\n      incidental, or consequential damages of any character arising as a\n      result of this License or out of the use or inability to use the\n      Work (including but not limited to damages for loss of goodwill,\n      work stoppage, computer failure or malfunction, or any and all\n      other commercial damages or losses), even if such Contributor\n      has been advised of the possibility of such damages.\n\n   9. Accepting Warranty or Additional Liability. While redistributing\n      the Work or Derivative Works thereof, You may choose to offer,\n      and charge a fee for, acceptance of support, warranty, indemnity,\n      or other liability obligations and/or rights consistent with this\n      License. However, in accepting such obligations, You may act only\n      on Your own behalf and on Your sole responsibility, not on behalf\n      of any other Contributor, and only if You agree to indemnify,\n      defend, and hold each Contributor harmless for any liability\n      incurred by, or claims asserted against, such Contributor by reason\n      of your accepting any such warranty or additional liability.\n\n   END OF TERMS AND CONDITIONS\n\n   APPENDIX: How to apply the Apache License to your work.\n\n      To apply the Apache License to your work, attach the following\n      boilerplate notice, with the fields enclosed by brackets \"[]\"\n      replaced with your own identifying information. (Don't include\n      the brackets!)  The text should be enclosed in the appropriate\n      comment syntax for the file format. We also recommend that a\n      file or class name and description of purpose be included on the\n      same \"printed page\" as the copyright notice for easier\n      identification within third-party archives.\n\n   Copyright [yyyy] [name of copyright owner]\n\n   Licensed under the Apache License, Version 2.0 (the \"License\");\n   you may not use this file except in compliance with the License.\n   You may obtain a copy of the License at\n\n       http://www.apache.org/licenses/LICENSE-2.0\n\n   Unless required by applicable law or agreed to in writing, software\n   distributed under the License is distributed on an \"AS IS\" BASIS,\n   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n   See the License for the specific language governing permissions and\n   limitations under the License.\n"
  },
  {
    "path": "Makefile",
    "content": "ROOT_PROJECT_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))\n\n# Where to install Postgres, default is ./pg_install, maybe useful for package\n# managers.\nPOSTGRES_INSTALL_DIR ?= $(ROOT_PROJECT_DIR)/pg_install\n\n# Supported PostgreSQL versions\nPOSTGRES_VERSIONS = v17 v16 v15 v14\n\n# CARGO_BUILD_FLAGS: Extra flags to pass to `cargo build`. `--locked`\n# and `--features testing` are popular examples.\n#\n# CARGO_PROFILE: Set to override the cargo profile to use. By default,\n# it is derived from BUILD_TYPE.\n\n# All intermediate build artifacts are stored here.\nBUILD_DIR := $(ROOT_PROJECT_DIR)/build\n\nICU_PREFIX_DIR := /usr/local/icu\n\n#\n# We differentiate between release / debug build types using the BUILD_TYPE\n# environment variable.\n#\nBUILD_TYPE ?= debug\nWITH_SANITIZERS ?= no\nPG_CFLAGS = -fsigned-char\nifeq ($(BUILD_TYPE),release)\n\tPG_CONFIGURE_OPTS = --enable-debug --with-openssl\n\tPG_CFLAGS += -O2 -g3 $(CFLAGS)\n\tPG_LDFLAGS = $(LDFLAGS)\n\tCARGO_PROFILE ?= --profile=release\n\t# NEON_CARGO_ARTIFACT_TARGET_DIR is the directory where `cargo build` places\n\t# the final build artifacts. There is unfortunately no easy way of changing\n\t# it to a fully predictable path, nor to extract the path with a simple\n\t# command. See https://github.com/rust-lang/cargo/issues/9661 and\n\t# https://github.com/rust-lang/cargo/issues/6790.\n\tNEON_CARGO_ARTIFACT_TARGET_DIR = $(ROOT_PROJECT_DIR)/target/release\nelse ifeq ($(BUILD_TYPE),debug)\n\tPG_CONFIGURE_OPTS = --enable-debug --with-openssl --enable-cassert --enable-depend\n\tPG_CFLAGS += -O0 -g3 $(CFLAGS)\n\tPG_LDFLAGS = $(LDFLAGS)\n\tCARGO_PROFILE ?= --profile=dev\n\tNEON_CARGO_ARTIFACT_TARGET_DIR = $(ROOT_PROJECT_DIR)/target/debug\nelse\n\t$(error Bad build type '$(BUILD_TYPE)', see Makefile for options)\nendif\n\nifeq ($(WITH_SANITIZERS),yes)\n\tPG_CFLAGS += -fsanitize=address -fsanitize=undefined -fno-sanitize-recover\n\tCOPT += -Wno-error # to avoid failing on warnings induced by sanitizers\n\tPG_LDFLAGS = -fsanitize=address -fsanitize=undefined -static-libasan -static-libubsan $(LDFLAGS)\n\texport CC := gcc\n\texport ASAN_OPTIONS := detect_leaks=0\nendif\n\nifeq ($(shell test -e /home/nonroot/.docker_build && echo -n yes),yes)\n\t# Exclude static build openssl, icu for local build (MacOS, Linux)\n\t# Only keep for build type release and debug\n\tPG_CONFIGURE_OPTS += --with-icu\n\tPG_CONFIGURE_OPTS += ICU_CFLAGS='-I/$(ICU_PREFIX_DIR)/include -DU_STATIC_IMPLEMENTATION'\n\tPG_CONFIGURE_OPTS += ICU_LIBS='-L$(ICU_PREFIX_DIR)/lib -L$(ICU_PREFIX_DIR)/lib64 -licui18n -licuuc -licudata -lstdc++ -Wl,-Bdynamic -lm'\nendif\n\nUNAME_S := $(shell uname -s)\nifeq ($(UNAME_S),Linux)\n\t# Seccomp BPF is only available for Linux\n\tifneq ($(WITH_SANITIZERS),yes)\n\t\tPG_CONFIGURE_OPTS += --with-libseccomp\n\tendif\nelse ifeq ($(UNAME_S),Darwin)\n\tPG_CFLAGS += -DUSE_PREFETCH\n\tifndef DISABLE_HOMEBREW\n\t\t# macOS with brew-installed openssl requires explicit paths\n\t\t# It can be configured with OPENSSL_PREFIX variable\n\t\tOPENSSL_PREFIX := $(shell brew --prefix openssl@3)\n\t\tPG_CONFIGURE_OPTS += --with-includes=$(OPENSSL_PREFIX)/include --with-libraries=$(OPENSSL_PREFIX)/lib\n\t\tPG_CONFIGURE_OPTS += PKG_CONFIG_PATH=$(shell brew --prefix icu4c)/lib/pkgconfig\n\t\t# macOS already has bison and flex in the system, but they are old and result in postgres-v14 target failure\n\t\t# brew formulae are keg-only and not symlinked into HOMEBREW_PREFIX, force their usage\n\t\tEXTRA_PATH_OVERRIDES += $(shell brew --prefix bison)/bin/:$(shell brew --prefix flex)/bin/:\n\tendif\nendif\n\n# Use -C option so that when PostgreSQL \"make install\" installs the\n# headers, the mtime of the headers are not changed when there have\n# been no changes to the files. Changing the mtime triggers an\n# unnecessary rebuild of 'postgres_ffi'.\nPG_CONFIGURE_OPTS += INSTALL='$(ROOT_PROJECT_DIR)/scripts/ninstall.sh -C'\n\n# Choose whether we should be silent or verbose\nCARGO_BUILD_FLAGS += --$(if $(filter s,$(MAKEFLAGS)),quiet,verbose)\n# Fix for a corner case when make doesn't pass a jobserver\nCARGO_BUILD_FLAGS += $(filter -j1,$(MAKEFLAGS))\n\n# This option has a side effect of passing make jobserver to cargo.\n# However, we shouldn't do this if `make -n` (--dry-run) has been asked.\nCARGO_CMD_PREFIX += $(if $(filter n,$(MAKEFLAGS)),,+)\n# Force cargo not to print progress bar\nCARGO_CMD_PREFIX += CARGO_TERM_PROGRESS_WHEN=never CI=1\n\nCACHEDIR_TAG_CONTENTS := \"Signature: 8a477f597d28d172789f06886806bc55\"\n\n#\n# Top level Makefile to build Neon and PostgreSQL\n#\n.PHONY: all\nall: neon postgres-install neon-pg-ext\n\n### Neon Rust bits\n#\n# The 'postgres_ffi' crate depends on the Postgres headers.\n.PHONY: neon\nneon: postgres-headers-install walproposer-lib cargo-target-dir\n\t+@echo \"Compiling Neon\"\n\t$(CARGO_CMD_PREFIX) cargo build $(CARGO_BUILD_FLAGS) $(CARGO_PROFILE)\n\n.PHONY: cargo-target-dir\ncargo-target-dir:\n\t# https://github.com/rust-lang/cargo/issues/14281\n\tmkdir -p target\n\ttest -e target/CACHEDIR.TAG || echo \"$(CACHEDIR_TAG_CONTENTS)\" > target/CACHEDIR.TAG\n\n.PHONY: neon-pg-ext-%\nneon-pg-ext-%: postgres-install-% cargo-target-dir\n\t+@echo \"Compiling neon-specific Postgres extensions for $*\"\n\tmkdir -p $(BUILD_DIR)/pgxn-$*\n\t$(MAKE) PG_CONFIG=\"$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config\" COPT='$(COPT)' \\\n\t\tNEON_CARGO_ARTIFACT_TARGET_DIR=\"$(NEON_CARGO_ARTIFACT_TARGET_DIR)\" \\\n\t\tCARGO_BUILD_FLAGS=\"$(CARGO_BUILD_FLAGS)\" \\\n\t\tCARGO_PROFILE=\"$(CARGO_PROFILE)\" \\\n\t\t-C $(BUILD_DIR)/pgxn-$*\\\n\t\t-f $(ROOT_PROJECT_DIR)/pgxn/Makefile  install\n\n# Build walproposer as a static library. walproposer source code is located\n# in the pgxn/neon directory.\n#\n# We also need to include libpgport.a and libpgcommon.a, because walproposer\n# uses some functions from those libraries.\n#\n# Some object files are removed from libpgport.a and libpgcommon.a because\n# they depend on openssl and other libraries that are not included in our\n# Rust build.\n.PHONY: walproposer-lib\nwalproposer-lib: neon-pg-ext-v17\n\t+@echo \"Compiling walproposer-lib\"\n\tmkdir -p $(BUILD_DIR)/walproposer-lib\n\t$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v17/bin/pg_config COPT='$(COPT)' \\\n\t\t-C $(BUILD_DIR)/walproposer-lib \\\n\t\t-f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile walproposer-lib\n\tcp $(POSTGRES_INSTALL_DIR)/v17/lib/libpgport.a $(BUILD_DIR)/walproposer-lib\n\tcp $(POSTGRES_INSTALL_DIR)/v17/lib/libpgcommon.a $(BUILD_DIR)/walproposer-lib\n\t$(AR) d $(BUILD_DIR)/walproposer-lib/libpgport.a \\\n\t\tpg_strong_random.o\n\t$(AR) d $(BUILD_DIR)/walproposer-lib/libpgcommon.a \\\n\t\tchecksum_helper.o \\\n\t\tcryptohash_openssl.o \\\n\t\thmac_openssl.o \\\n\t\tmd5_common.o \\\n\t\tparse_manifest.o \\\n\t\tscram-common.o\nifeq ($(UNAME_S),Linux)\n\t$(AR) d $(BUILD_DIR)/walproposer-lib/libpgcommon.a \\\n\t\tpg_crc32c.o\nendif\n\n# Shorthand to call neon-pg-ext-% target for all Postgres versions\n.PHONY: neon-pg-ext\nneon-pg-ext: $(foreach pg_version,$(POSTGRES_VERSIONS),neon-pg-ext-$(pg_version))\n\n# This removes everything\n.PHONY: distclean\ndistclean:\n\t$(RM) -r $(POSTGRES_INSTALL_DIR) $(BUILD_DIR)\n\t$(CARGO_CMD_PREFIX) cargo clean\n\n.PHONY: fmt\nfmt:\n\t./pre-commit.py --fix-inplace\n\npostgres-%-pg-bsd-indent: postgres-%\n\t+@echo \"Compiling pg_bsd_indent\"\n\t$(MAKE) -C $(BUILD_DIR)/$*/src/tools/pg_bsd_indent/\n\n# Create typedef list for the core. Note that generally it should be combined with\n# buildfarm one to cover platform specific stuff.\n# https://wiki.postgresql.org/wiki/Running_pgindent_on_non-core_code_or_development_code\npostgres-%-typedefs.list: postgres-%\n\t$(ROOT_PROJECT_DIR)/vendor/postgres-$*/src/tools/find_typedef $(POSTGRES_INSTALL_DIR)/$*/bin > $@\n\n# Indent postgres. See src/tools/pgindent/README for details.\n.PHONY: postgres-%-pgindent\npostgres-%-pgindent: postgres-%-pg-bsd-indent postgres-%-typedefs.list\n\t+@echo merge with buildfarm typedef to cover all platforms\n\t+@echo note: I first tried to download from pgbuildfarm.org, but for unclear reason e.g. \\\n\t\tREL_16_STABLE list misses PGSemaphoreData\n\t# wget -q -O - \"http://www.pgbuildfarm.org/cgi-bin/typedefs.pl?branch=REL_16_STABLE\" |\\\n\t# cat - postgres-$*-typedefs.list | sort | uniq > postgres-$*-typedefs-full.list\n\tcat $(ROOT_PROJECT_DIR)/vendor/postgres-$*/src/tools/pgindent/typedefs.list |\\\n\t\tcat - postgres-$*-typedefs.list | sort | uniq > postgres-$*-typedefs-full.list\n\t+@echo note: you might want to run it on selected files/dirs instead.\n\tINDENT=$(BUILD_DIR)/$*/src/tools/pg_bsd_indent/pg_bsd_indent \\\n\t\t$(ROOT_PROJECT_DIR)/vendor/postgres-$*/src/tools/pgindent/pgindent --typedefs postgres-$*-typedefs-full.list \\\n\t\t$(ROOT_PROJECT_DIR)/vendor/postgres-$*/src/ \\\n\t\t--excludes $(ROOT_PROJECT_DIR)/vendor/postgres-$*/src/tools/pgindent/exclude_file_patterns\n\t$(RM) pg*.BAK\n\n# Indent pxgn/neon.\n.PHONY: neon-pgindent\nneon-pgindent: postgres-v17-pg-bsd-indent neon-pg-ext-v17\n\t$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v17/bin/pg_config COPT='$(COPT)' \\\n\t\tFIND_TYPEDEF=$(ROOT_PROJECT_DIR)/vendor/postgres-v17/src/tools/find_typedef \\\n\t\tINDENT=$(BUILD_DIR)/v17/src/tools/pg_bsd_indent/pg_bsd_indent \\\n\t\tPGINDENT_SCRIPT=$(ROOT_PROJECT_DIR)/vendor/postgres-v17/src/tools/pgindent/pgindent \\\n\t\t-C $(BUILD_DIR)/pgxn-v17/neon \\\n\t\t-f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile pgindent\n\n\n.PHONY: setup-pre-commit-hook\nsetup-pre-commit-hook:\n\tln -s -f $(ROOT_PROJECT_DIR)/pre-commit.py .git/hooks/pre-commit\n\nbuild-tools/node_modules: build-tools/package.json\n\tcd build-tools && $(if $(CI),npm ci,npm install)\n\ttouch build-tools/node_modules\n\n.PHONY: lint-openapi-spec\nlint-openapi-spec: build-tools/node_modules\n\t# operation-2xx-response: pageserver timeline delete returns 404 on success\n\tfind . -iname \"openapi_spec.y*ml\" -exec\\\n\t\tnpx --prefix=build-tools/ redocly\\\n\t\t\t--skip-rule=operation-operationId --skip-rule=operation-summary --extends=minimal\\\n\t\t\t--skip-rule=no-server-example.com --skip-rule=operation-2xx-response\\\n\t\t\tlint {} \\+\n\n# Targets for building PostgreSQL are defined in postgres.mk.\n#\n# But if the caller has indicated that PostgreSQL is already\n# installed, by setting the PG_INSTALL_CACHED variable, skip it.\nifdef PG_INSTALL_CACHED\npostgres-install: skip-install\n$(foreach pg_version,$(POSTGRES_VERSIONS),postgres-install-$(pg_version)): skip-install\npostgres-headers-install:\n\t+@echo \"Skipping installation of PostgreSQL headers because PG_INSTALL_CACHED is set\"\nskip-install:\n\t+@echo \"Skipping PostgreSQL installation because PG_INSTALL_CACHED is set\"\n\nelse\ninclude postgres.mk\nendif\n"
  },
  {
    "path": "NOTICE",
    "content": "Neon\nCopyright 2022 - 2024 Neon Inc.\n\nThe PostgreSQL submodules in vendor/ are licensed under the PostgreSQL license.\nSee vendor/postgres-vX/COPYRIGHT for details.\n"
  },
  {
    "path": "README.md",
    "content": "[![Neon](https://github.com/user-attachments/assets/fd91da5f-44a9-41c7-9075-36a5b5608083)](https://neon.com)\n\n\n\n# Neon\n\nNeon is an open-source serverless Postgres database platform. It separates storage and compute and substitutes the PostgreSQL storage layer by redistributing data across a cluster of nodes.\n\n## Quick start\nTry the [Neon Free Tier](https://neon.com/signup) to create a serverless Postgres instance. Then connect to it with your preferred Postgres client (psql, dbeaver, etc) or use the online [SQL Editor](https://neon.com/docs/get-started-with-neon/query-with-neon-sql-editor/). See [Connect from any application](https://neon.com/docs/connect/connect-from-any-app/) for connection instructions.\n\nAlternatively, compile and run the project [locally](#running-local-installation).\n\n## Architecture overview\n\nA Neon installation consists of compute nodes and the Neon storage engine. Compute nodes are stateless PostgreSQL nodes backed by the Neon storage engine.\n\nThe Neon storage engine consists of two major components:\n- Pageserver: Scalable storage backend for the compute nodes.\n- Safekeepers: The safekeepers form a redundant WAL service that received WAL from the compute node, and stores it durably until it has been processed by the pageserver and uploaded to cloud storage.\n\nSee developer documentation in [SUMMARY.md](/docs/SUMMARY.md) for more information.\n\n## Running a local development environment\n\nNeon can be run on a workstation for small experiments and to test code changes, by\nfollowing these instructions.\n\n#### Installing dependencies on Linux\n1. Install build dependencies and other applicable packages\n\n* On Ubuntu or Debian, this set of packages should be sufficient to build the code:\n```bash\napt install build-essential libtool libreadline-dev zlib1g-dev flex bison libseccomp-dev \\\nlibssl-dev clang pkg-config libpq-dev cmake postgresql-client protobuf-compiler \\\nlibprotobuf-dev libcurl4-openssl-dev openssl python3-poetry lsof libicu-dev\n```\n* On Fedora, these packages are needed:\n```bash\ndnf install flex bison readline-devel zlib-devel openssl-devel \\\n  libseccomp-devel perl clang cmake postgresql postgresql-contrib protobuf-compiler \\\n  protobuf-devel libcurl-devel openssl poetry lsof libicu-devel libpq-devel python3-devel \\\n  libffi-devel\n```\n* On Arch based systems, these packages are needed:\n```bash\npacman -S base-devel readline zlib libseccomp openssl clang \\\npostgresql-libs cmake postgresql protobuf curl lsof\n```\n\nBuilding Neon requires 3.15+ version of `protoc` (protobuf-compiler). If your distribution provides an older version, you can install a newer version from [here](https://github.com/protocolbuffers/protobuf/releases).\n\n2. [Install Rust](https://www.rust-lang.org/tools/install)\n```\n# recommended approach from https://www.rust-lang.org/tools/install\ncurl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh\n```\n\n#### Installing dependencies on macOS (12.3.1)\n1. Install XCode and dependencies\n```\nxcode-select --install\nbrew install protobuf openssl flex bison icu4c pkg-config m4\n\n# add openssl to PATH, required for ed25519 keys generation in neon_local\necho 'export PATH=\"$(brew --prefix openssl)/bin:$PATH\"' >> ~/.zshrc\n```\n\nIf you get errors about missing `m4` you may have to install it manually:\n```\nbrew install m4\nbrew link --force m4\n```\n\n2. [Install Rust](https://www.rust-lang.org/tools/install)\n```\n# recommended approach from https://www.rust-lang.org/tools/install\ncurl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh\n```\n\n3. Install PostgreSQL Client\n```\n# from https://stackoverflow.com/questions/44654216/correct-way-to-install-psql-without-full-postgres-on-macos\nbrew install libpq\nbrew link --force libpq\n```\n\n#### Rustc version\n\nThe project uses [rust toolchain file](./rust-toolchain.toml) to define the version it's built with in CI for testing and local builds.\n\nThis file is automatically picked up by [`rustup`](https://rust-lang.github.io/rustup/overrides.html#the-toolchain-file) that installs (if absent) and uses the toolchain version pinned in the file.\n\nrustup users who want to build with another toolchain can use the [`rustup override`](https://rust-lang.github.io/rustup/overrides.html#directory-overrides) command to set a specific toolchain for the project's directory.\n\nnon-rustup users most probably are not getting the same toolchain automatically from the file, so are responsible to manually verify that their toolchain matches the version in the file.\nNewer rustc versions most probably will work fine, yet older ones might not be supported due to some new features used by the project or the crates.\n\n#### Building on Linux\n\n1. Build neon and patched postgres\n```\n# Note: The path to the neon sources can not contain a space.\n\ngit clone --recursive https://github.com/neondatabase/neon.git\ncd neon\n\n# The preferred and default is to make a debug build. This will create a\n# demonstrably slower build than a release build. For a release build,\n# use \"BUILD_TYPE=release make -j`nproc` -s\"\n# Remove -s for the verbose build log\n\nmake -j`nproc` -s\n```\n\n#### Building on OSX\n\n1. Build neon and patched postgres\n```\n# Note: The path to the neon sources can not contain a space.\n\ngit clone --recursive https://github.com/neondatabase/neon.git\ncd neon\n\n# The preferred and default is to make a debug build. This will create a\n# demonstrably slower build than a release build. For a release build,\n# use \"BUILD_TYPE=release make -j`sysctl -n hw.logicalcpu` -s\"\n# Remove -s for the verbose build log\n\nmake -j`sysctl -n hw.logicalcpu` -s\n```\n\n#### Dependency installation notes\nTo run the `psql` client, install the `postgresql-client` package or modify `PATH` and `LD_LIBRARY_PATH` to include `pg_install/bin` and `pg_install/lib`, respectively.\n\nTo run the integration tests or Python scripts (not required to use the code), install\nPython (3.11 or higher), and install the python3 packages using `./scripts/pysync` (requires [poetry>=1.8](https://python-poetry.org/)) in the project directory.\n\n\n#### Running neon database\n1. Start pageserver and postgres on top of it (should be called from repo root):\n```sh\n# Create repository in .neon with proper paths to binaries and data\n# Later that would be responsibility of a package install script\n> cargo neon init\nInitializing pageserver node 1 at '127.0.0.1:64000' in \".neon\"\n\n# start pageserver, safekeeper, and broker for their intercommunication\n> cargo neon start\nStarting neon broker at 127.0.0.1:50051.\nstorage_broker started, pid: 2918372\nStarting pageserver node 1 at '127.0.0.1:64000' in \".neon\".\npageserver started, pid: 2918386\nStarting safekeeper at '127.0.0.1:5454' in '.neon/safekeepers/sk1'.\nsafekeeper 1 started, pid: 2918437\n\n# create initial tenant and use it as a default for every future neon_local invocation\n> cargo neon tenant create --set-default\ntenant 9ef87a5bf0d92544f6fafeeb3239695c successfully created on the pageserver\nCreated an initial timeline 'de200bd42b49cc1814412c7e592dd6e9' at Lsn 0/16B5A50 for tenant: 9ef87a5bf0d92544f6fafeeb3239695c\nSetting tenant 9ef87a5bf0d92544f6fafeeb3239695c as a default one\n\n# create postgres compute node\n> cargo neon endpoint create main\n\n# start postgres compute node\n> cargo neon endpoint start main\nStarting new endpoint main (PostgreSQL v14) on timeline de200bd42b49cc1814412c7e592dd6e9 ...\nStarting postgres at 'postgresql://cloud_admin@127.0.0.1:55432/postgres'\n\n# check list of running postgres instances\n> cargo neon endpoint list\n ENDPOINT  ADDRESS          TIMELINE                          BRANCH NAME  LSN        STATUS\n main      127.0.0.1:55432  de200bd42b49cc1814412c7e592dd6e9  main         0/16B5BA8  running\n```\n\n2. Now, it is possible to connect to postgres and run some queries:\n```text\n> psql -p 55432 -h 127.0.0.1 -U cloud_admin postgres\npostgres=# CREATE TABLE t(key int primary key, value text);\nCREATE TABLE\npostgres=# insert into t values(1,1);\nINSERT 0 1\npostgres=# select * from t;\n key | value\n-----+-------\n   1 | 1\n(1 row)\n```\n\n3. And create branches and run postgres on them:\n```sh\n# create branch named migration_check\n> cargo neon timeline branch --branch-name migration_check\nCreated timeline 'b3b863fa45fa9e57e615f9f2d944e601' at Lsn 0/16F9A00 for tenant: 9ef87a5bf0d92544f6fafeeb3239695c. Ancestor timeline: 'main'\n\n# check branches tree\n> cargo neon timeline list\n(L) main [de200bd42b49cc1814412c7e592dd6e9]\n(L) ┗━ @0/16F9A00: migration_check [b3b863fa45fa9e57e615f9f2d944e601]\n\n# create postgres on that branch\n> cargo neon endpoint create migration_check --branch-name migration_check\n\n# start postgres on that branch\n> cargo neon endpoint start migration_check\nStarting new endpoint migration_check (PostgreSQL v14) on timeline b3b863fa45fa9e57e615f9f2d944e601 ...\nStarting postgres at 'postgresql://cloud_admin@127.0.0.1:55434/postgres'\n\n# check the new list of running postgres instances\n> cargo neon endpoint list\n ENDPOINT         ADDRESS          TIMELINE                          BRANCH NAME      LSN        STATUS\n main             127.0.0.1:55432  de200bd42b49cc1814412c7e592dd6e9  main             0/16F9A38  running\n migration_check  127.0.0.1:55434  b3b863fa45fa9e57e615f9f2d944e601  migration_check  0/16F9A70  running\n\n# this new postgres instance will have all the data from 'main' postgres,\n# but all modifications would not affect data in original postgres\n> psql -p 55434 -h 127.0.0.1 -U cloud_admin postgres\npostgres=# select * from t;\n key | value\n-----+-------\n   1 | 1\n(1 row)\n\npostgres=# insert into t values(2,2);\nINSERT 0 1\n\n# check that the new change doesn't affect the 'main' postgres\n> psql -p 55432 -h 127.0.0.1 -U cloud_admin postgres\npostgres=# select * from t;\n key | value\n-----+-------\n   1 | 1\n(1 row)\n```\n\n4. If you want to run tests afterwards (see below), you must stop all the running pageserver, safekeeper, and postgres instances\n   you have just started. You can terminate them all with one command:\n```sh\n> cargo neon stop\n```\n\nMore advanced usages can be found at [Local Development Control Plane (`neon_local`))](./control_plane/README.md).\n\n#### Handling build failures\n\nIf you encounter errors during setting up the initial tenant, it's best to stop everything (`cargo neon stop`) and remove the `.neon` directory. Then fix the problems, and start the setup again.\n\n## Running tests\n\n### Rust unit tests\n\nWe are using [`cargo-nextest`](https://nexte.st/) to run the tests in Github Workflows.\nSome crates do not support running plain `cargo test` anymore, prefer `cargo nextest run` instead.\nYou can install `cargo-nextest` with `cargo install cargo-nextest`.\n\n### Integration tests\n\nEnsure your dependencies are installed as described [here](https://github.com/neondatabase/neon#dependency-installation-notes).\n\n```sh\ngit clone --recursive https://github.com/neondatabase/neon.git\n\nCARGO_BUILD_FLAGS=\"--features=testing\" make\n\n./scripts/pytest\n```\n\nBy default, this runs both debug and release modes, and all supported postgres versions. When\ntesting locally, it is convenient to run just one set of permutations, like this:\n\n```sh\nDEFAULT_PG_VERSION=17 BUILD_TYPE=release ./scripts/pytest\n```\n\n## Flamegraphs\n\nYou may find yourself in need of flamegraphs for software in this repository.\nYou can use [`flamegraph-rs`](https://github.com/flamegraph-rs/flamegraph) or the original [`flamegraph.pl`](https://github.com/brendangregg/FlameGraph). Your choice!\n\n>[!IMPORTANT]\n> If you're using `lld` or `mold`, you need the `--no-rosegment` linker argument.\n> It's a [general thing with Rust / lld / mold](https://crbug.com/919499#c16), not specific to this repository.\n> See [this PR for further instructions](https://github.com/neondatabase/neon/pull/6764).\n\n## Cleanup\n\nFor cleaning up the source tree from build artifacts, run `make clean` in the source directory.\n\nFor removing every artifact from build and configure steps, run `make distclean`, and also consider removing the cargo binaries in the `target` directory, as well as the database in the `.neon` directory. Note that removing the `.neon` directory will remove your database, with all data in it. You have been warned!\n\n## Documentation\n\n[docs](/docs) Contains a top-level overview of all available markdown documentation.\n\n- [sourcetree.md](/docs/sourcetree.md) contains overview of source tree layout.\n\nTo view your `rustdoc` documentation in a browser, try running `cargo doc --no-deps --open`\n\nSee also README files in some source directories, and `rustdoc` style documentation comments.\n\nOther resources:\n\n- [SELECT 'Hello, World'](https://neon.com/blog/hello-world/): Blog post by Nikita Shamgunov on the high level architecture\n- [Architecture decisions in Neon](https://neon.com/blog/architecture-decisions-in-neon/): Blog post by Heikki Linnakangas\n- [Neon: Serverless PostgreSQL!](https://www.youtube.com/watch?v=rES0yzeERns): Presentation on storage system by Heikki Linnakangas in the CMU Database Group seminar series\n\n### Postgres-specific terms\n\nDue to Neon's very close relation with PostgreSQL internals, numerous specific terms are used.\nThe same applies to certain spelling: i.e. we use MB to denote 1024 * 1024 bytes, while MiB would be technically more correct, it's inconsistent with what PostgreSQL code and its documentation use.\n\nTo get more familiar with this aspect, refer to:\n\n- [Neon glossary](/docs/glossary.md)\n- [PostgreSQL glossary](https://www.postgresql.org/docs/14/glossary.html)\n- Other PostgreSQL documentation and sources (Neon fork sources can be found [here](https://github.com/neondatabase/postgres))\n\n## Join the development\n\n- Read [CONTRIBUTING.md](/CONTRIBUTING.md) to learn about project code style and practices.\n- To get familiar with a source tree layout, use [sourcetree.md](/docs/sourcetree.md).\n- To learn more about PostgreSQL internals, check http://www.interdb.jp/pg/index.html\n"
  },
  {
    "path": "build-tools/Dockerfile",
    "content": "ARG DEBIAN_VERSION=bookworm\nARG DEBIAN_FLAVOR=${DEBIAN_VERSION}-slim\n\n# Here are the INDEX DIGESTS for the images we use.\n# You can get them following next steps for now:\n# 1. Get an authentication token from DockerHub:\n#    TOKEN=$(curl -s \"https://auth.docker.io/token?service=registry.docker.io&scope=repository:library/debian:pull\" | jq -r .token)\n# 2. Using that token, query index for the given tag:\n#    curl -s -H \"Authorization: Bearer $TOKEN\" \\\n#       -H \"Accept: application/vnd.docker.distribution.manifest.list.v2+json\" \\\n#       \"https://registry.hub.docker.com/v2/library/debian/manifests/bullseye-slim\" \\\n#       -I | grep -i docker-content-digest\n# 3. As a next step, TODO(fedordikarev): create script and schedule workflow to run these checks\n#    and updates on regular bases and in automated way.\nARG BOOKWORM_SLIM_SHA=sha256:40b107342c492725bc7aacbe93a49945445191ae364184a6d24fedb28172f6f7\nARG BULLSEYE_SLIM_SHA=sha256:e831d9a884d63734fe3dd9c491ed9a5a3d4c6a6d32c5b14f2067357c49b0b7e1\n\n# Here we use ${var/search/replace} syntax, to check\n# if base image is one of the images, we pin image index for.\n# If var will match one the known images, we will replace it with the known sha.\n# If no match, than value will be unaffected, and will process with no-pinned image.\nARG BASE_IMAGE_SHA=debian:${DEBIAN_FLAVOR}\nARG BASE_IMAGE_SHA=${BASE_IMAGE_SHA/debian:bookworm-slim/debian@$BOOKWORM_SLIM_SHA}\nARG BASE_IMAGE_SHA=${BASE_IMAGE_SHA/debian:bullseye-slim/debian@$BULLSEYE_SLIM_SHA}\n\nFROM $BASE_IMAGE_SHA AS pgcopydb_builder\nARG DEBIAN_VERSION\n\n# Use strict mode for bash to catch errors early\nSHELL [\"/bin/bash\", \"-euo\", \"pipefail\", \"-c\"]\n\n# By default, /bin/sh used in debian images will treat '\\n' as eol,\n# but as we use bash as SHELL, and built-in echo in bash requires '-e' flag for that.\nRUN echo 'Acquire::Retries \"5\";' > /etc/apt/apt.conf.d/80-retries && \\\n    echo -e \"retry_connrefused=on\\ntimeout=15\\ntries=5\\nretry-on-host-error=on\\n\" > /root/.wgetrc && \\\n    echo -e \"--retry-connrefused\\n--connect-timeout 15\\n--retry 5\\n--max-time 300\\n\" > /root/.curlrc\n\nCOPY build-tools/patches/pgcopydbv017.patch /pgcopydbv017.patch\n\nRUN if [ \"${DEBIAN_VERSION}\" = \"bookworm\" ]; then \\\n        set -e && \\\n        apt-get update && \\\n        apt-get install -y --no-install-recommends \\\n        ca-certificates wget gpg && \\\n        wget -qO - https://www.postgresql.org/media/keys/ACCC4CF8.asc | gpg --dearmor -o /usr/share/keyrings/postgresql-keyring.gpg && \\\n        echo \"deb [signed-by=/usr/share/keyrings/postgresql-keyring.gpg] http://apt.postgresql.org/pub/repos/apt bookworm-pgdg main\" > /etc/apt/sources.list.d/pgdg.list && \\\n        apt-get update && \\\n        apt-get install -y --no-install-recommends \\\n        build-essential \\\n        autotools-dev \\\n        libedit-dev \\\n        libgc-dev \\\n        libpam0g-dev \\\n        libreadline-dev \\\n        libselinux1-dev \\\n        libxslt1-dev \\\n        libssl-dev \\\n        libkrb5-dev \\\n        zlib1g-dev \\\n        liblz4-dev \\\n        libpq5 \\\n        libpq-dev \\\n        libzstd-dev \\\n        postgresql-16 \\\n        postgresql-server-dev-16 \\\n        postgresql-common  \\\n        python3-sphinx && \\\n        wget -O /tmp/pgcopydb.tar.gz https://github.com/dimitri/pgcopydb/archive/refs/tags/v0.17.tar.gz && \\\n        mkdir /tmp/pgcopydb && \\\n        tar -xzf /tmp/pgcopydb.tar.gz -C /tmp/pgcopydb --strip-components=1 && \\\n        cd /tmp/pgcopydb && \\\n        patch -p1 < /pgcopydbv017.patch && \\\n        make -s clean && \\\n        make -s -j12 install && \\\n        libpq_path=$(find /lib /usr/lib -name \"libpq.so.5\" | head -n 1) && \\\n        mkdir -p /pgcopydb/lib && \\\n        cp \"$libpq_path\" /pgcopydb/lib/; \\\n    else \\\n        # copy command below will fail if we don't have dummy files, so we create them for other debian versions\n        mkdir -p /usr/lib/postgresql/16/bin && touch /usr/lib/postgresql/16/bin/pgcopydb && \\\n        mkdir -p mkdir -p /pgcopydb/lib && touch /pgcopydb/lib/libpq.so.5; \\\n    fi\n\nFROM $BASE_IMAGE_SHA AS build_tools\nARG DEBIAN_VERSION\n\n# Add nonroot user\nRUN useradd -ms /bin/bash nonroot -b /home\n# Use strict mode for bash to catch errors early\nSHELL [\"/bin/bash\", \"-euo\", \"pipefail\", \"-c\"]\n\nRUN mkdir -p /pgcopydb/{bin,lib} && \\    \n    chmod -R 755 /pgcopydb && \\\n    chown -R nonroot:nonroot /pgcopydb\n\nCOPY --from=pgcopydb_builder /usr/lib/postgresql/16/bin/pgcopydb /pgcopydb/bin/pgcopydb\nCOPY --from=pgcopydb_builder /pgcopydb/lib/libpq.so.5 /pgcopydb/lib/libpq.so.5\n\nRUN echo 'Acquire::Retries \"5\";' > /etc/apt/apt.conf.d/80-retries && \\\n    echo -e \"retry_connrefused=on\\ntimeout=15\\ntries=5\\nretry-on-host-error=on\\n\" > /root/.wgetrc && \\\n    echo -e \"--retry-connrefused\\n--connect-timeout 15\\n--retry 5\\n--max-time 300\\n\" > /root/.curlrc\n\n# System deps\n#\n# 'gdb' is included so that we get backtraces of core dumps produced in\n# regression tests\nRUN set -e \\\n    && apt-get update \\\n    && apt-get install -y --no-install-recommends \\\n        autoconf \\\n        automake \\\n        bison \\\n        build-essential \\\n        ca-certificates \\\n        cmake \\\n        curl \\\n        flex \\\n        gdb \\\n        git \\\n        gnupg \\\n        gzip \\\n        jq \\\n        jsonnet \\\n        libcurl4-openssl-dev \\\n        libbz2-dev \\\n        libffi-dev \\\n        liblzma-dev \\\n        libncurses5-dev \\\n        libncursesw5-dev \\\n        libreadline-dev \\\n        libseccomp-dev \\\n        libsqlite3-dev \\\n        libssl-dev \\\n        $([[ \"${DEBIAN_VERSION}\" = \"bullseye\" ]] && echo libstdc++-10-dev || echo libstdc++-11-dev) \\\n        libtool \\\n        libxml2-dev \\\n        libxmlsec1-dev \\\n        libxxhash-dev \\\n        lsof \\\n        make \\\n        netcat-openbsd \\\n        net-tools \\\n        openssh-client \\\n        parallel \\\n        pkg-config \\\n        unzip \\\n        wget \\\n        xz-utils \\\n        zlib1g-dev \\\n        zstd \\\n    && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*\n\n# sql_exporter\n\n# Keep the version the same as in compute/compute-node.Dockerfile and\n# test_runner/regress/test_compute_metrics.py.\nENV SQL_EXPORTER_VERSION=0.17.3\nRUN curl -fsSL \\\n    \"https://github.com/burningalchemist/sql_exporter/releases/download/${SQL_EXPORTER_VERSION}/sql_exporter-${SQL_EXPORTER_VERSION}.linux-$(case \"$(uname -m)\" in x86_64) echo amd64;; aarch64) echo arm64;; esac).tar.gz\" \\\n    --output sql_exporter.tar.gz \\\n    && mkdir /tmp/sql_exporter \\\n    && tar xzvf sql_exporter.tar.gz -C /tmp/sql_exporter --strip-components=1 \\\n    && mv /tmp/sql_exporter/sql_exporter /usr/local/bin/sql_exporter \\\n    && rm sql_exporter.tar.gz\n\n# protobuf-compiler (protoc)\n# Keep the version the same as in compute/compute-node.Dockerfile\nENV PROTOC_VERSION=25.1\nRUN curl -fsSL \"https://github.com/protocolbuffers/protobuf/releases/download/v${PROTOC_VERSION}/protoc-${PROTOC_VERSION}-linux-$(uname -m | sed 's/aarch64/aarch_64/g').zip\" -o \"protoc.zip\" \\\n    && unzip -q protoc.zip -d protoc \\\n    && mv protoc/bin/protoc /usr/local/bin/protoc \\\n    && mv protoc/include/google /usr/local/include/google \\\n    && rm -rf protoc.zip protoc\n\n# s5cmd\nENV S5CMD_VERSION=2.3.0\nRUN curl -sL \"https://github.com/peak/s5cmd/releases/download/v${S5CMD_VERSION}/s5cmd_${S5CMD_VERSION}_Linux-$(uname -m | sed 's/x86_64/64bit/g' | sed 's/aarch64/arm64/g').tar.gz\" | tar zxvf - s5cmd \\\n    && chmod +x s5cmd \\\n    && mv s5cmd /usr/local/bin/s5cmd\n\n# LLVM\nENV LLVM_VERSION=20\nRUN curl -fsSL 'https://apt.llvm.org/llvm-snapshot.gpg.key' | apt-key add - \\\n    && echo \"deb http://apt.llvm.org/${DEBIAN_VERSION}/ llvm-toolchain-${DEBIAN_VERSION}-${LLVM_VERSION} main\" > /etc/apt/sources.list.d/llvm.stable.list \\\n    && apt-get update \\\n    && apt-get install -y --no-install-recommends clang-${LLVM_VERSION} llvm-${LLVM_VERSION} \\\n    && bash -c 'for f in /usr/bin/clang*-${LLVM_VERSION} /usr/bin/llvm*-${LLVM_VERSION}; do ln -s \"${f}\" \"${f%-${LLVM_VERSION}}\"; done' \\\n    && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*\n\n# Install node\nENV NODE_VERSION=24\nRUN curl -fsSL https://deb.nodesource.com/setup_${NODE_VERSION}.x | bash - \\\n    && apt-get install -y --no-install-recommends nodejs \\\n    && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*\n\n# Install docker\nRUN curl -fsSL https://download.docker.com/linux/ubuntu/gpg | gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg \\\n    && echo \"deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/debian ${DEBIAN_VERSION} stable\" > /etc/apt/sources.list.d/docker.list \\\n    && apt-get update \\\n    && apt-get install -y --no-install-recommends docker-ce docker-ce-cli \\\n    && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*\n\n# Configure sudo & docker\nRUN usermod -aG sudo nonroot && \\\n    echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers && \\\n    usermod -aG docker nonroot\n\n# AWS CLI\nRUN curl \"https://awscli.amazonaws.com/awscli-exe-linux-$(uname -m).zip\" -o \"awscliv2.zip\" \\\n    && unzip -q awscliv2.zip \\\n    && ./aws/install \\\n    && rm awscliv2.zip\n\n# Mold: A Modern Linker\nENV MOLD_VERSION=v2.37.1\nRUN set -e \\\n    && git clone -b \"${MOLD_VERSION}\" --depth 1 https://github.com/rui314/mold.git \\\n    && mkdir mold/build \\\n    && cd mold/build \\    \n    && cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_COMPILER=clang++ .. \\\n    && cmake --build . -j \"$(nproc)\" \\\n    && cmake --install . \\\n    && cd .. \\\n    && rm -rf mold\n\n# LCOV\n# Build lcov from a fork:\n# It includes several bug fixes on top on v2.0 release (https://github.com/linux-test-project/lcov/compare/v2.0...master)\n# And patches from us:\n# - Generates json file with code coverage summary (https://github.com/neondatabase/lcov/commit/426e7e7a22f669da54278e9b55e6d8caabd00af0.tar.gz)\nRUN set +o pipefail && \\\n\t for package in Capture::Tiny DateTime Devel::Cover Digest::MD5 File::Spec JSON::XS Memory::Process Time::HiRes JSON; do \\\n\t\tyes | perl -MCPAN -e \"CPAN::Shell->notest('install', '$package')\";\\\n\t done && \\\n\tset -o pipefail\n# Split into separate step to debug flaky failures here\nRUN wget https://github.com/neondatabase/lcov/archive/426e7e7a22f669da54278e9b55e6d8caabd00af0.tar.gz -O lcov.tar.gz \\\n    && ls -laht lcov.tar.gz && sha256sum lcov.tar.gz \\\n    && echo \"61a22a62e20908b8b9e27d890bd0ea31f567a7b9668065589266371dcbca0992  lcov.tar.gz\" | sha256sum --check \\\n    && mkdir -p lcov && tar -xzf lcov.tar.gz -C lcov --strip-components=1 \\\n    && cd lcov \\\n    && make install \\\n    && rm -rf ../lcov.tar.gz\n\n# Use the same version of libicu as the compute nodes so that\n# clusters created using inidb on pageserver can be used by computes.\n#\n# TODO: at this time, compute-node.Dockerfile uses the debian bullseye libicu\n# package, which is 67.1. We're duplicating that knowledge here, and also, technically,\n# Debian has a few patches on top of 67.1 that we're not adding here.\nENV ICU_VERSION=67.1\nENV ICU_PREFIX=/usr/local/icu\n\n# Download and build static ICU\nRUN wget -O \"/tmp/libicu-${ICU_VERSION}.tgz\" https://github.com/unicode-org/icu/releases/download/release-${ICU_VERSION//./-}/icu4c-${ICU_VERSION//./_}-src.tgz && \\\n    echo \"94a80cd6f251a53bd2a997f6f1b5ac6653fe791dfab66e1eb0227740fb86d5dc /tmp/libicu-${ICU_VERSION}.tgz\" | sha256sum --check && \\\n    mkdir /tmp/icu && \\\n    pushd /tmp/icu && \\\n    tar -xzf /tmp/libicu-${ICU_VERSION}.tgz && \\\n    pushd icu/source && \\\n    ./configure --prefix=${ICU_PREFIX}  --enable-static --enable-shared=no CXXFLAGS=\"-fPIC\" CFLAGS=\"-fPIC\" && \\\n    make -j \"$(nproc)\" && \\\n    make install && \\\n    popd && \\\n    rm -rf icu && \\\n    rm -f /tmp/libicu-${ICU_VERSION}.tgz\n\n# Switch to nonroot user\nUSER nonroot:nonroot\nWORKDIR /home/nonroot\n\nRUN echo -e \"--retry-connrefused\\n--connect-timeout 15\\n--retry 5\\n--max-time 300\\n\" > /home/nonroot/.curlrc\n\n# Python\nENV PYTHON_VERSION=3.11.12 \\\n    PYENV_ROOT=/home/nonroot/.pyenv \\\n    PATH=/home/nonroot/.pyenv/shims:/home/nonroot/.pyenv/bin:/home/nonroot/.poetry/bin:$PATH\nRUN set -e \\\n    && cd \"$HOME\" \\\n    && curl -sSO https://raw.githubusercontent.com/pyenv/pyenv-installer/master/bin/pyenv-installer \\\n    && chmod +x pyenv-installer \\\n    && ./pyenv-installer \\\n    && export PYENV_ROOT=/home/nonroot/.pyenv \\\n    && export PATH=\"$PYENV_ROOT/bin:$PATH\" \\\n    && export PATH=\"$PYENV_ROOT/shims:$PATH\" \\\n    && pyenv install \"${PYTHON_VERSION}\" \\\n    && pyenv global \"${PYTHON_VERSION}\" \\\n    && python --version \\\n    && pip install --no-cache-dir --upgrade pip \\\n    && pip --version \\\n    && pip install --no-cache-dir pipenv wheel poetry\n\n# Switch to nonroot user (again)\nUSER nonroot:nonroot\nWORKDIR /home/nonroot\n\n# Rust\n# Please keep the version of llvm (installed above) in sync with rust llvm (`rustc --version --verbose | grep LLVM`)\nENV RUSTC_VERSION=1.88.0\nENV RUSTUP_HOME=\"/home/nonroot/.rustup\"\nENV PATH=\"/home/nonroot/.cargo/bin:${PATH}\"\nARG CARGO_AUDITABLE_VERSION=0.7.0\nARG RUSTFILT_VERSION=0.2.1\nARG CARGO_HAKARI_VERSION=0.9.36\nARG CARGO_DENY_VERSION=0.18.2\nARG CARGO_HACK_VERSION=0.6.36\nARG CARGO_NEXTEST_VERSION=0.9.94\nARG CARGO_CHEF_VERSION=0.1.71\nARG CARGO_DIESEL_CLI_VERSION=2.2.9\nRUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux-gnu/rustup-init && whoami && \\\n\tchmod +x rustup-init && \\\n\t./rustup-init -y --default-toolchain ${RUSTC_VERSION} && \\\n\trm rustup-init && \\\n    export PATH=\"$HOME/.cargo/bin:$PATH\" && \\\n    . \"$HOME/.cargo/env\" && \\\n    cargo --version && rustup --version && \\\n    rustup component add llvm-tools rustfmt clippy && \\\n    cargo install cargo-auditable           --locked --version \"${CARGO_AUDITABLE_VERSION}\" && \\\n    cargo auditable install cargo-auditable --locked --version \"${CARGO_AUDITABLE_VERSION}\" --force && \\\n    cargo auditable install rustfilt                 --version \"${RUSTFILT_VERSION}\" && \\\n    cargo auditable install cargo-hakari    --locked --version \"${CARGO_HAKARI_VERSION}\" && \\\n    cargo auditable install cargo-deny      --locked --version \"${CARGO_DENY_VERSION}\" && \\\n    cargo auditable install cargo-hack      --locked --version \"${CARGO_HACK_VERSION}\" && \\\n    cargo auditable install cargo-nextest   --locked --version \"${CARGO_NEXTEST_VERSION}\" && \\\n    cargo auditable install cargo-chef      --locked --version \"${CARGO_CHEF_VERSION}\" && \\\n    cargo auditable install diesel_cli      --locked --version \"${CARGO_DIESEL_CLI_VERSION}\" \\\n                                            --features postgres-bundled --no-default-features && \\\n    rm -rf /home/nonroot/.cargo/registry && \\\n    rm -rf /home/nonroot/.cargo/git\n\n# Show versions\nRUN whoami \\\n    && python --version \\\n    && pip --version \\\n    && cargo --version --verbose \\\n    && rustup --version --verbose \\\n    && rustc --version --verbose \\\n    && clang --version\n\nRUN if [ \"${DEBIAN_VERSION}\" = \"bookworm\" ]; then \\\n    LD_LIBRARY_PATH=/pgcopydb/lib /pgcopydb/bin/pgcopydb --version; \\\nelse \\\n    echo \"pgcopydb is not available for ${DEBIAN_VERSION}\"; \\\nfi\n\n# Set following flag to check in Makefile if its running in Docker\nRUN touch /home/nonroot/.docker_build\n"
  },
  {
    "path": "build-tools/package.json",
    "content": "{\n  \"name\": \"build-tools\",\n  \"private\": true,\n  \"devDependencies\": {\n    \"@redocly/cli\": \"1.34.5\",\n    \"@sourcemeta/jsonschema\": \"10.0.0\"\n  }\n}\n"
  },
  {
    "path": "build-tools/patches/pgcopydbv017.patch",
    "content": "diff --git a/src/bin/pgcopydb/copydb.c b/src/bin/pgcopydb/copydb.c\nindex d730b03..69a9be9 100644\n--- a/src/bin/pgcopydb/copydb.c\n+++ b/src/bin/pgcopydb/copydb.c\n@@ -44,6 +44,7 @@ GUC dstSettings[] = {\n \t{ \"synchronous_commit\", \"'off'\" },\n \t{ \"statement_timeout\", \"0\" },\n \t{ \"lock_timeout\", \"0\" },\n+\t{ \"idle_in_transaction_session_timeout\", \"0\" },\n \t{ NULL, NULL },\n };\n \ndiff --git a/src/bin/pgcopydb/pgsql.c b/src/bin/pgcopydb/pgsql.c\nindex 94f2f46..e051ba8 100644\n--- a/src/bin/pgcopydb/pgsql.c\n+++ b/src/bin/pgcopydb/pgsql.c\n@@ -2319,6 +2319,11 @@ pgsql_execute_log_error(PGSQL *pgsql,\n \n \tLinesBuffer lbuf = { 0 };\n \n+\tif (message != NULL){\n+\t\t// make sure message is writable by splitLines\n+\t\tmessage = strdup(message);\n+\t}\n+\n \tif (!splitLines(&lbuf, message))\n \t{\n \t\t/* errors have already been logged */\n@@ -2332,6 +2337,7 @@ pgsql_execute_log_error(PGSQL *pgsql,\n \t\t\t\t  PQbackendPID(pgsql->connection),\n \t\t\t\t  lbuf.lines[lineNumber]);\n \t}\n+        free(message); // free copy of message we created above\n \n \tif (pgsql->logSQL)\n \t{\n@@ -3174,11 +3180,18 @@ pgcopy_log_error(PGSQL *pgsql, PGresult *res, const char *context)\n \t\t/* errors have already been logged */\n \t\treturn;\n \t}\n-\n \tif (res != NULL)\n \t{\n \t\tchar *sqlstate = PQresultErrorField(res, PG_DIAG_SQLSTATE);\n-\t\tstrlcpy(pgsql->sqlstate, sqlstate, sizeof(pgsql->sqlstate));\n+\t\tif (sqlstate == NULL)\n+\t\t{\n+\t\t\t// PQresultErrorField returned NULL!\n+\t\t\tpgsql->sqlstate[0] = '\\0';  // Set to an empty string to avoid segfault\n+\t\t}\n+\t\telse\n+\t\t{\n+\t\t\tstrlcpy(pgsql->sqlstate, sqlstate, sizeof(pgsql->sqlstate));\n+\t\t}\n \t}\n \n \tchar *endpoint =\n"
  },
  {
    "path": "clippy.toml",
    "content": "disallowed-methods = [\n    \"tokio::task::block_in_place\",\n\n    # Allow this for now, to deny it later once we stop using Handle::block_on completely\n    # \"tokio::runtime::Handle::block_on\",\n\n    # tokio-epoll-uring:\n    # - allow-invalid because the method doesn't exist on macOS\n    { path = \"tokio_epoll_uring::thread_local_system\", replacement = \"tokio_epoll_uring_ext module inside pageserver crate\", allow-invalid = true }\n]\n\ndisallowed-macros = [\n    # use std::pin::pin\n    \"futures::pin_mut\",\n    # cannot disallow this, because clippy finds used from tokio macros\n    #\"tokio::pin\",\n]\n\nallow-unwrap-in-tests = true\n"
  },
  {
    "path": "compute/.gitignore",
    "content": "# sql_exporter config files generated from Jsonnet\netc/neon_collector.yml\netc/neon_collector_autoscaling.yml\netc/sql_exporter.yml\netc/sql_exporter_autoscaling.yml\n\n# Node.js dependencies\nnode_modules/\n"
  },
  {
    "path": "compute/Makefile",
    "content": "jsonnet_files = $(wildcard \\\n\tetc/*.jsonnet \\\n\tetc/sql_exporter/*.libsonnet)\n\n.PHONY: all\nall: neon_collector.yml neon_collector_autoscaling.yml sql_exporter.yml sql_exporter_autoscaling.yml\n\nneon_collector.yml: $(jsonnet_files)\n\tJSONNET_PATH=jsonnet:etc jsonnet \\\n\t\t--output-file etc/$@ \\\n\t\t--ext-str pg_version=$(PG_VERSION) \\\n\t\tetc/neon_collector.jsonnet\n\nneon_collector_autoscaling.yml: $(jsonnet_files)\n\tJSONNET_PATH=jsonnet:etc jsonnet \\\n\t\t--output-file etc/$@ \\\n\t\t--ext-str pg_version=$(PG_VERSION) \\\n\t\tetc/neon_collector_autoscaling.jsonnet\n\nsql_exporter.yml: $(jsonnet_files)\n\tJSONNET_PATH=etc jsonnet \\\n\t\t--output-file etc/$@ \\\n\t\t--tla-str collector_name=neon_collector \\\n\t\t--tla-str collector_file=neon_collector.yml \\\n\t\t--tla-str 'connection_string=postgresql://cloud_admin@127.0.0.1:5432/postgres?sslmode=disable&application_name=sql_exporter&pgaudit.log=none' \\\n\t\tetc/sql_exporter.jsonnet\n\nsql_exporter_autoscaling.yml: $(jsonnet_files)\n\tJSONNET_PATH=etc jsonnet \\\n\t\t--output-file etc/$@ \\\n\t\t--tla-str collector_name=neon_collector_autoscaling \\\n\t\t--tla-str collector_file=neon_collector_autoscaling.yml \\\n\t\t--tla-str 'connection_string=postgresql://cloud_admin@127.0.0.1:5432/postgres?sslmode=disable&application_name=sql_exporter_autoscaling&pgaudit.log=none' \\\n\t\tetc/sql_exporter.jsonnet\n\n.PHONY: clean\nclean:\n\t$(RM) \\\n\t\tetc/neon_collector.yml \\\n\t\tetc/neon_collector_autoscaling.yml \\\n\t\tetc/sql_exporter.yml \\\n\t\tetc/sql_exporter_autoscaling.yml\n\n.PHONY: jsonnetfmt-test\njsonnetfmt-test:\n\tjsonnetfmt --test $(jsonnet_files)\n\n.PHONY: jsonnetfmt-format\njsonnetfmt-format:\n\tjsonnetfmt --in-place $(jsonnet_files)\n\n.PHONY: manifest-schema-validation\nmanifest-schema-validation: ../build-tools/node_modules\n\tnpx --prefix=../build-tools/ jsonschema validate -d https://json-schema.org/draft/2020-12/schema manifest.schema.json manifest.yaml\n\n../build-tools/node_modules: ../build-tools/package.json\n\tcd ../build-tools && $(if $(CI),npm ci,npm install)\n\ttouch ../build-tools/node_modules\n"
  },
  {
    "path": "compute/README.md",
    "content": "This directory contains files that are needed to build the compute\nimages, or included in the compute images.\n\ncompute-node.Dockerfile\n\tTo build the compute image\n\nvm-image-spec.yaml\n\tInstructions for vm-builder, to turn the compute-node image into\n\tcorresponding vm-compute-node image.\n\netc/\n\tConfiguration files included in /etc in the compute image\n\npatches/\n\tSome extensions need to be patched to work with Neon. This\n\tdirectory contains such patches. They are applied to the extension\n\tsources in compute-node.Dockerfile\n\nIn addition to these, postgres itself, the neon postgres extension,\nand compute_ctl are built and copied into the compute image by\ncompute-node.Dockerfile.\n"
  },
  {
    "path": "compute/compute-node.Dockerfile",
    "content": "#\n# This Dockerfile builds the compute image. It is built multiple times to produce\n# different images for each PostgreSQL major version.\n#\n# We use Debian as the base for all the steps. The production images use Debian bookworm\n# for v17, and Debian bullseye for older PostgreSQL versions.\n#\n# ## Intermediary layers\n#\n# build-tools:   This contains Rust compiler toolchain and other tools needed at compile\n#                time. This is also used for the storage builds. This image is defined in\n#                build-tools/Dockerfile.\n#\n# build-deps:    Contains C compiler, other build tools, and compile-time dependencies\n#                needed to compile PostgreSQL and most extensions. (Some extensions need\n#                extra tools and libraries that are not included in this image. They are\n#                installed in the extension-specific build stages.)\n#\n# pg-build:      Result of compiling PostgreSQL. The PostgreSQL binaries are copied from\n#                this to the final image. This is also used as the base for compiling all\n#                the extensions.\n#\n# compute-tools: This contains compute_ctl, the launcher program that starts Postgres\n#                in Neon. It also contains a few other tools that are built from the\n#                sources from this repository and used in compute VMs: 'fast_import' and\n#                'local_proxy'\n#\n# ## Extensions\n#\n# By convention, the build of each extension consists of two layers:\n#\n# {extension}-src:   Contains the source tarball, possible neon-specific patches, and\n#                    the extracted tarball with the patches applied. All of these are\n#                    under the /ext-src/ directory.\n#\n# {extension}-build: Contains the installed extension files, under /usr/local/pgsql\n#                    (in addition to the PostgreSQL binaries inherited from the pg-build\n#                    image). A few extensions need extra libraries or other files\n#                    installed elsewhere in the filesystem. They are installed by ONBUILD\n#                    directives.\n#\n# These are merged together into two layers:\n#\n# all-extensions:    All the extension -build layers merged together\n#\n# extension-tests:   All the extension -src layers merged together. This is used by the\n#                    extension tests. The tests are executed against the compiled image,\n#                    but the tests need test scripts, expected result files etc. from the\n#                    original sources, which are not included in the binary image.\n#\n# ## Extra components\n#\n# These are extra included in the compute image, but are not directly used by PostgreSQL\n# itself.\n#\n# pgbouncer:         pgbouncer and its configuration\n#\n# sql_exporter:      Metrics exporter daemon.\n#\n# postgres_exporter: Another metrics exporter daemon, for different sets of metrics.\n#\n# The configuration files for the metrics exporters are under etc/ directory. We use\n# a templating system to handle variations between different PostgreSQL versions,\n# building slightly different config files for each PostgreSQL version.\n#\n#\n# ## Final image\n#\n# The final image puts together the PostgreSQL binaries (pg-build), the compute tools\n# (compute-tools), all the extensions (all-extensions) and the extra components into\n# one image.\n#\n# VM image: The final image built by this dockerfile isn't actually the final image that\n# we use in computes VMs. There's an extra step that adds some files and makes other\n# small adjustments, and builds the QCOV2 filesystem image suitable for using in a VM.\n# That step is done by the 'vm-builder' tool. See the vm-compute-node-image job in the\n# build_and_test.yml github workflow for how that's done.\n\nARG PG_VERSION\nARG BUILD_TAG\nARG DEBIAN_VERSION=bookworm\nARG DEBIAN_FLAVOR=${DEBIAN_VERSION}-slim\n\n# Here are the INDEX DIGESTS for the images we use.\n# You can get them following next steps for now:\n# 1. Get an authentication token from DockerHub:\n#    TOKEN=$(curl -s \"https://auth.docker.io/token?service=registry.docker.io&scope=repository:library/debian:pull\" | jq -r .token)\n# 2. Using that token, query index for the given tag:\n#    curl -s -H \"Authorization: Bearer $TOKEN\" \\\n#       -H \"Accept: application/vnd.docker.distribution.manifest.list.v2+json\" \\\n#       \"https://registry.hub.docker.com/v2/library/debian/manifests/bullseye-slim\" \\\n#       -I | grep -i docker-content-digest\n# 3. As a next step, TODO(fedordikarev): create script and schedule workflow to run these checks\n#    and updates on regular bases and in automated way.\nARG BOOKWORM_SLIM_SHA=sha256:40b107342c492725bc7aacbe93a49945445191ae364184a6d24fedb28172f6f7\nARG BULLSEYE_SLIM_SHA=sha256:e831d9a884d63734fe3dd9c491ed9a5a3d4c6a6d32c5b14f2067357c49b0b7e1\n\n# Here we use ${var/search/replace} syntax, to check\n# if base image is one of the images, we pin image index for.\n# If var will match one the known images, we will replace it with the known sha.\n# If no match, than value will be unaffected, and will process with no-pinned image.\nARG BASE_IMAGE_SHA=debian:${DEBIAN_FLAVOR}\nARG BASE_IMAGE_SHA=${BASE_IMAGE_SHA/debian:bookworm-slim/debian@$BOOKWORM_SLIM_SHA}\nARG BASE_IMAGE_SHA=${BASE_IMAGE_SHA/debian:bullseye-slim/debian@$BULLSEYE_SLIM_SHA}\n\n# By default, build all PostgreSQL extensions. For quick local testing when you don't\n# care about the extensions, pass EXTENSIONS=none or EXTENSIONS=minimal\nARG EXTENSIONS=all\n\n#########################################################################################\n#\n# Layer \"build-deps\"\n#\n#########################################################################################\nFROM $BASE_IMAGE_SHA AS build-deps\nARG DEBIAN_VERSION\n\n# Keep in sync with build-tools/Dockerfile\nENV PROTOC_VERSION=25.1\n\n# Use strict mode for bash to catch errors early\nSHELL [\"/bin/bash\", \"-euo\", \"pipefail\", \"-c\"]\n\n# By default, /bin/sh used in debian images will treat '\\n' as eol,\n# but as we use bash as SHELL, and built-in echo in bash requires '-e' flag for that.\nRUN echo 'Acquire::Retries \"5\";' > /etc/apt/apt.conf.d/80-retries && \\\n    echo -e \"retry_connrefused = on\\ntimeout=15\\ntries=5\\nretry-on-host-error=on\\n\" > /root/.wgetrc && \\\n    echo -e \"--retry-connrefused\\n--connect-timeout 15\\n--retry 5\\n--max-time 300\\n\" > /root/.curlrc\n\nRUN case $DEBIAN_VERSION in \\\n      # Version-specific installs for Bullseye (PG14-PG16):\n      # The h3_pg extension needs a cmake 3.20+, but Debian bullseye has 3.18.\n      # Install newer version (3.25) from backports.\n      # libstdc++-10-dev is required for plv8\n      bullseye) \\\n        echo \"deb http://archive.debian.org/debian bullseye-backports main\" > /etc/apt/sources.list.d/bullseye-backports.list; \\\n        VERSION_INSTALLS=\"cmake/bullseye-backports cmake-data/bullseye-backports libstdc++-10-dev\"; \\\n      ;; \\\n      # Version-specific installs for Bookworm (PG17):\n      bookworm) \\\n        VERSION_INSTALLS=\"cmake libstdc++-12-dev\"; \\\n      ;; \\\n      *) \\\n        echo \"Unknown Debian version ${DEBIAN_VERSION}\" && exit 1 \\\n      ;; \\\n    esac && \\\n    apt update &&  \\\n    apt install --no-install-recommends --no-install-suggests -y \\\n    ninja-build git autoconf automake libtool build-essential bison flex libreadline-dev \\\n    zlib1g-dev libxml2-dev libcurl4-openssl-dev libossp-uuid-dev wget ca-certificates pkg-config libssl-dev \\\n    libicu-dev libxslt1-dev liblz4-dev libzstd-dev zstd curl unzip g++ \\\n    libclang-dev \\\n    jsonnet \\\n    $VERSION_INSTALLS \\\n    && apt clean && rm -rf /var/lib/apt/lists/* \\\n    && useradd -ms /bin/bash nonroot -b /home \\\n    # Install protoc from binary release, since Debian's versions are too old.\n    && curl -fsSL \"https://github.com/protocolbuffers/protobuf/releases/download/v${PROTOC_VERSION}/protoc-${PROTOC_VERSION}-linux-$(uname -m | sed 's/aarch64/aarch_64/g').zip\" -o \"protoc.zip\" \\\n    && unzip -q protoc.zip -d protoc \\\n    && mv protoc/bin/protoc /usr/local/bin/protoc \\\n    && mv protoc/include/google /usr/local/include/google \\\n    && rm -rf protoc.zip protoc\n\n#########################################################################################\n#\n# Layer \"pg-build\"\n# Build Postgres from the neon postgres repository.\n#\n#########################################################################################\nFROM build-deps AS pg-build\nARG PG_VERSION\nCOPY vendor/postgres-${PG_VERSION:?} postgres\nCOPY compute/patches/postgres_fdw.patch .\nCOPY compute/patches/pg_stat_statements_pg14-16.patch .\nCOPY compute/patches/pg_stat_statements_pg17.patch .\nRUN cd postgres && \\\n    # Apply patches to some contrib extensions\n    # For example, we need to grant EXECUTE on pg_stat_statements_reset() to {privileged_role_name}.\n    # In vanilla Postgres this function is limited to Postgres role superuser.\n    # In Neon we have {privileged_role_name} role that is not a superuser but replaces superuser in some cases.\n    # We could add the additional grant statements to the Postgres repository but it would be hard to maintain,\n    # whenever we need to pick up a new Postgres version and we want to limit the changes in our Postgres fork,\n    # so we do it here.\n    case \"${PG_VERSION}\" in \\\n    \"v14\" | \"v15\" | \"v16\") \\\n    patch -p1 < /pg_stat_statements_pg14-16.patch; \\\n    ;; \\\n    \"v17\") \\\n    patch -p1 < /pg_stat_statements_pg17.patch; \\\n    ;; \\\n    *) \\\n    # To do not forget to migrate patches to the next major version\n    echo \"No contrib patches for this PostgreSQL version\" && exit 1;; \\\n    esac && \\\n    patch -p1 < /postgres_fdw.patch && \\\n    export CONFIGURE_CMD=\"./configure CFLAGS='-O2 -g3 -fsigned-char' --enable-debug --with-openssl --with-uuid=ossp \\\n    --with-icu --with-libxml --with-libxslt --with-lz4\" && \\\n    if [ \"${PG_VERSION:?}\" != \"v14\" ]; then \\\n        # zstd is available only from PG15\n        export CONFIGURE_CMD=\"${CONFIGURE_CMD} --with-zstd\"; \\\n    fi && \\\n    eval $CONFIGURE_CMD && \\\n    make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s install && \\\n    make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C contrib/ install && \\\n    # Enable some of contrib extensions\n    echo 'trusted = true' >> /usr/local/pgsql/share/extension/autoinc.control && \\\n    echo 'trusted = true' >> /usr/local/pgsql/share/extension/dblink.control && \\\n    echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgres_fdw.control && \\\n    echo 'trusted = true' >> /usr/local/pgsql/share/extension/bloom.control && \\\n    echo 'trusted = true' >> /usr/local/pgsql/share/extension/earthdistance.control && \\\n    echo 'trusted = true' >> /usr/local/pgsql/share/extension/insert_username.control && \\\n    echo 'trusted = true' >> /usr/local/pgsql/share/extension/intagg.control && \\\n    echo 'trusted = true' >> /usr/local/pgsql/share/extension/moddatetime.control && \\\n    echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_stat_statements.control && \\\n    echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgrowlocks.control && \\\n    echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgstattuple.control && \\\n    echo 'trusted = true' >> /usr/local/pgsql/share/extension/refint.control && \\\n    echo 'trusted = true' >> /usr/local/pgsql/share/extension/xml2.control\n\n# Set PATH for all the subsequent build steps\nENV PATH=\"/usr/local/pgsql/bin:$PATH\"\n\n#########################################################################################\n#\n# Layer \"postgis-build\"\n# Build PostGIS from the upstream PostGIS mirror.\n#\n#########################################################################################\nFROM build-deps AS postgis-src\nARG DEBIAN_VERSION\nARG PG_VERSION\n\n# Postgis 3.5.0 requires SFCGAL 1.4+\n#\n# It would be nice to update all versions together, but we must solve the SFCGAL dependency first.\n# SFCGAL > 1.3 requires CGAL > 5.2, Bullseye's libcgal-dev is 5.2\n# and also we must check backward compatibility with older versions of PostGIS.\n#\n# Use new version only for v17\nWORKDIR /ext-src\nRUN case \"${DEBIAN_VERSION}\" in \\\n    \"bookworm\") \\\n        export SFCGAL_VERSION=1.4.1 \\\n        export SFCGAL_CHECKSUM=1800c8a26241588f11cddcf433049e9b9aea902e923414d2ecef33a3295626c3 \\\n    ;; \\\n    \"bullseye\") \\\n        export SFCGAL_VERSION=1.3.10 \\\n        export SFCGAL_CHECKSUM=4e39b3b2adada6254a7bdba6d297bb28e1a9835a9f879b74f37e2dab70203232 \\\n    ;; \\\n    *) \\\n        echo \"unexpected PostgreSQL version\" && exit 1 \\\n    ;; \\\n    esac && \\\n    wget https://gitlab.com/sfcgal/SFCGAL/-/archive/v${SFCGAL_VERSION}/SFCGAL-v${SFCGAL_VERSION}.tar.gz -O SFCGAL.tar.gz && \\\n    echo \"${SFCGAL_CHECKSUM} SFCGAL.tar.gz\" | sha256sum --check && \\\n    mkdir sfcgal-src && cd sfcgal-src && tar xzf ../SFCGAL.tar.gz --strip-components=1 -C .\n\n# Postgis 3.5.0 supports v17\nWORKDIR /ext-src\nRUN case \"${PG_VERSION:?}\" in \\\n    \"v17\") \\\n        export POSTGIS_VERSION=3.5.0 \\\n        export POSTGIS_CHECKSUM=ca698a22cc2b2b3467ac4e063b43a28413f3004ddd505bdccdd74c56a647f510 \\\n    ;; \\\n    \"v14\" | \"v15\" | \"v16\") \\\n        export POSTGIS_VERSION=3.3.3 \\\n        export POSTGIS_CHECKSUM=74eb356e3f85f14233791013360881b6748f78081cc688ff9d6f0f673a762d13 \\\n    ;; \\\n    *) \\\n        echo \"unexpected PostgreSQL version\" && exit 1 \\\n    ;; \\\n    esac && \\\n    wget https://download.osgeo.org/postgis/source/postgis-${POSTGIS_VERSION}.tar.gz -O postgis.tar.gz && \\\n    echo \"${POSTGIS_CHECKSUM} postgis.tar.gz\" | sha256sum --check && \\\n    mkdir postgis-src && cd postgis-src && tar xzf ../postgis.tar.gz --strip-components=1 -C .\n\n# This is reused for pgrouting\nFROM pg-build AS postgis-build-deps\nRUN apt update && \\\n    apt install --no-install-recommends --no-install-suggests -y \\\n    gdal-bin libboost-dev libboost-thread-dev libboost-filesystem-dev \\\n    libboost-system-dev libboost-iostreams-dev libboost-program-options-dev libboost-timer-dev \\\n    libcgal-dev libgdal-dev libgmp-dev libmpfr-dev libopenscenegraph-dev libprotobuf-c-dev \\\n    protobuf-c-compiler xsltproc \\\n    && apt clean && rm -rf /var/lib/apt/lists/*\n\nFROM postgis-build-deps AS postgis-build\nCOPY --from=postgis-src /ext-src/ /ext-src/\nWORKDIR /ext-src/sfcgal-src\nRUN cmake -DCMAKE_BUILD_TYPE=Release -GNinja . && ninja -j $(getconf _NPROCESSORS_ONLN) && \\\n    DESTDIR=/sfcgal ninja install -j $(getconf _NPROCESSORS_ONLN) && \\\n    ninja clean && cp -R /sfcgal/* /\n\nWORKDIR /ext-src/postgis-src\nRUN ./autogen.sh && \\\n    ./configure --with-sfcgal=/usr/local/bin/sfcgal-config && \\\n    make -j $(getconf _NPROCESSORS_ONLN) && \\\n    make -j $(getconf _NPROCESSORS_ONLN) install && \\\n    make staged-install && \\\n    cd extensions/postgis && \\\n    make clean && \\\n    make -j $(getconf _NPROCESSORS_ONLN) install && \\\n    echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgis.control && \\\n    echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgis_raster.control && \\\n    echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgis_sfcgal.control && \\\n    echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgis_tiger_geocoder.control && \\\n    echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgis_topology.control && \\\n    echo 'trusted = true' >> /usr/local/pgsql/share/extension/address_standardizer.control && \\\n    echo 'trusted = true' >> /usr/local/pgsql/share/extension/address_standardizer_data_us.control && \\\n    mkdir -p /extensions/postgis && \\\n    cp /usr/local/pgsql/share/extension/postgis.control /extensions/postgis && \\\n    cp /usr/local/pgsql/share/extension/postgis_raster.control /extensions/postgis && \\\n    cp /usr/local/pgsql/share/extension/postgis_sfcgal.control /extensions/postgis && \\\n    cp /usr/local/pgsql/share/extension/postgis_tiger_geocoder.control /extensions/postgis && \\\n    cp /usr/local/pgsql/share/extension/postgis_topology.control /extensions/postgis && \\\n    cp /usr/local/pgsql/share/extension/address_standardizer.control /extensions/postgis && \\\n    cp /usr/local/pgsql/share/extension/address_standardizer_data_us.control /extensions/postgis\n\n#########################################################################################\n#\n# Layer \"pgrouting-build\"\n# Build pgrouting. Note: This depends on the postgis-build-deps layer built above\n#\n#########################################################################################\n\n# Uses versioned libraries, i.e. libpgrouting-3.4\n# and may introduce function signature changes between releases\n# i.e. release 3.5.0 has new signature for pg_dijkstra function\n#\n# Use new version only for v17\n# last release v3.6.2 - Mar 30, 2024\nFROM build-deps AS pgrouting-src\nARG DEBIAN_VERSION\nARG PG_VERSION\nWORKDIR /ext-src\nRUN case \"${PG_VERSION:?}\" in \\\n    \"v17\") \\\n        export PGROUTING_VERSION=3.6.2 \\\n        export PGROUTING_CHECKSUM=f4a1ed79d6f714e52548eca3bb8e5593c6745f1bde92eb5fb858efd8984dffa2 \\\n    ;; \\\n    \"v14\" | \"v15\" | \"v16\") \\\n        export PGROUTING_VERSION=3.4.2 \\\n        export PGROUTING_CHECKSUM=cac297c07d34460887c4f3b522b35c470138760fe358e351ad1db4edb6ee306e \\\n    ;; \\\n    *) \\\n        echo \"unexpected PostgreSQL version\" && exit 1 \\\n    ;; \\\n    esac && \\\n    wget https://github.com/pgRouting/pgrouting/archive/v${PGROUTING_VERSION}.tar.gz -O pgrouting.tar.gz && \\\n    echo \"${PGROUTING_CHECKSUM} pgrouting.tar.gz\" | sha256sum --check && \\\n    mkdir pgrouting-src && cd pgrouting-src && tar xzf ../pgrouting.tar.gz --strip-components=1 -C .\n\nFROM postgis-build-deps AS pgrouting-build\nCOPY --from=pgrouting-src /ext-src/ /ext-src/\nWORKDIR /ext-src/pgrouting-src\nRUN mkdir build && cd build && \\\n    cmake -GNinja -DCMAKE_BUILD_TYPE=Release .. && \\\n    ninja -j $(getconf _NPROCESSORS_ONLN) && \\\n    ninja -j $(getconf _NPROCESSORS_ONLN) install && \\\n    echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgrouting.control\n\n#########################################################################################\n#\n# Layer \"plv8-build\"\n# Build plv8\n#\n#########################################################################################\nFROM build-deps AS plv8-src\nARG PG_VERSION\nWORKDIR /ext-src\n\nCOPY compute/patches/plv8* .\n\n# plv8 3.2.3 supports v17\n# last release v3.2.3 - Sep 7, 2024\n#\n# clone the repo instead of downloading the release tarball because plv8 has submodule dependencies\n# and the release tarball doesn't include them\n#\n# Use new version only for v17\n# because since v3.2, plv8 doesn't include plcoffee and plls extensions\nRUN case \"${PG_VERSION:?}\" in \\\n    \"v17\") \\\n        export PLV8_TAG=v3.2.3 \\\n    ;; \\\n    \"v14\" | \"v15\" | \"v16\") \\\n        export PLV8_TAG=v3.1.10 \\\n    ;; \\\n    *) \\\n        echo \"unexpected PostgreSQL version\" && exit 1 \\\n    ;; \\\n    esac && \\\n    git clone --recurse-submodules --depth 1 --branch ${PLV8_TAG} https://github.com/plv8/plv8.git plv8-src && \\\n    tar -czf plv8.tar.gz --exclude .git plv8-src && \\\n    cd plv8-src && \\\n    if [[ \"${PG_VERSION:?}\" < \"v17\" ]]; then patch -p1 < /ext-src/plv8_v3.1.10.patch; else patch -p1 < /ext-src/plv8_v3.2.3.patch; fi\n\n# Step 1: Build the vendored V8 engine. It doesn't depend on PostgreSQL, so use\n# 'build-deps' as the base. This enables caching and avoids unnecessary rebuilds.\n# (The V8 engine takes a very long time to build)\nFROM build-deps AS plv8-build\nARG PG_VERSION\nWORKDIR /ext-src/plv8-src\nRUN apt update && \\\n    apt install --no-install-recommends --no-install-suggests -y \\\n    ninja-build python3-dev libncurses5 binutils clang \\\n    && apt clean && rm -rf /var/lib/apt/lists/*\nCOPY --from=plv8-src /ext-src/ /ext-src/\nRUN make DOCKER=1 -j $(getconf _NPROCESSORS_ONLN) v8\n\n# Step 2: Build the PostgreSQL-dependent parts\nCOPY --from=pg-build /usr/local/pgsql /usr/local/pgsql\nENV PATH=\"/usr/local/pgsql/bin:$PATH\"\nRUN \\\n    # generate and copy upgrade scripts\n    make generate_upgrades && \\\n    cp upgrade/* /usr/local/pgsql/share/extension/ && \\\n    make DOCKER=1 -j $(getconf _NPROCESSORS_ONLN) install && \\\n    rm -rf /plv8-* && \\\n    find /usr/local/pgsql/ -name \"plv8-*.so\" | xargs strip && \\\n    # don't break computes with installed old version of plv8\n    cd /usr/local/pgsql/lib/ && \\\n    case \"${PG_VERSION:?}\" in \\\n    \"v17\") \\\n        ln -s plv8-3.2.3.so plv8-3.1.8.so && \\\n        ln -s plv8-3.2.3.so plv8-3.1.5.so && \\\n        ln -s plv8-3.2.3.so plv8-3.1.10.so \\\n    ;; \\\n    \"v14\" | \"v15\" | \"v16\") \\\n        ln -s plv8-3.1.10.so plv8-3.1.5.so && \\\n        ln -s plv8-3.1.10.so plv8-3.1.8.so \\\n    ;; \\\n    esac && \\\n    echo 'trusted = true' >> /usr/local/pgsql/share/extension/plv8.control && \\\n    echo 'trusted = true' >> /usr/local/pgsql/share/extension/plcoffee.control && \\\n    echo 'trusted = true' >> /usr/local/pgsql/share/extension/plls.control\n\n#########################################################################################\n#\n# Layer \"h3-pg-build\"\n# Build h3_pg\n#\n#########################################################################################\nFROM build-deps AS h3-pg-src\nARG PG_VERSION\nWORKDIR /ext-src\n\n# not version-specific\n# last release v4.1.0 - Jan 18, 2023\nRUN mkdir -p /h3/usr/ && \\\n    wget https://github.com/uber/h3/archive/refs/tags/v4.1.0.tar.gz -O h3.tar.gz && \\\n    echo \"ec99f1f5974846bde64f4513cf8d2ea1b8d172d2218ab41803bf6a63532272bc h3.tar.gz\" | sha256sum --check && \\\n    mkdir h3-src && cd h3-src && tar xzf ../h3.tar.gz --strip-components=1 -C .\n\n# not version-specific\n# last release v4.1.3 - Jul 26, 2023\nWORKDIR /ext-src\nRUN wget https://github.com/zachasme/h3-pg/archive/refs/tags/v4.1.3.tar.gz -O h3-pg.tar.gz && \\\n    echo \"5c17f09a820859ffe949f847bebf1be98511fb8f1bd86f94932512c00479e324 h3-pg.tar.gz\" | sha256sum --check && \\\n    mkdir h3-pg-src && cd h3-pg-src && tar xzf ../h3-pg.tar.gz --strip-components=1 -C .\n\nFROM pg-build AS h3-pg-build\nCOPY --from=h3-pg-src /ext-src/ /ext-src/\nWORKDIR /ext-src/h3-src\nRUN mkdir build && cd build && \\\n    cmake .. -GNinja -DBUILD_BENCHMARKS=0 -DCMAKE_BUILD_TYPE=Release \\\n        -DBUILD_FUZZERS=0 -DBUILD_FILTERS=0 -DBUILD_GENERATORS=0 -DBUILD_TESTING=0 \\\n    && ninja -j $(getconf _NPROCESSORS_ONLN) && \\\n    DESTDIR=/h3 ninja install && \\\n    cp -R /h3/usr / && \\\n    rm -rf build\n\nWORKDIR /ext-src/h3-pg-src\nRUN ls -l && \\\n    make -j $(getconf _NPROCESSORS_ONLN) && \\\n    make -j $(getconf _NPROCESSORS_ONLN) install && \\\n    echo 'trusted = true' >> /usr/local/pgsql/share/extension/h3.control && \\\n    echo 'trusted = true' >> /usr/local/pgsql/share/extension/h3_postgis.control\n\n#########################################################################################\n#\n# Layer \"postgresql-unit-build\"\n# compile unit extension\n#\n#########################################################################################\nFROM build-deps AS postgresql-unit-src\nARG PG_VERSION\n\n# not version-specific\n# last release 7.9 - Sep 15, 2024\nWORKDIR /ext-src\nRUN wget https://github.com/df7cb/postgresql-unit/archive/refs/tags/7.9.tar.gz -O postgresql-unit.tar.gz && \\\n    echo \"e46de6245dcc8b2c2ecf29873dbd43b2b346773f31dd5ce4b8315895a052b456 postgresql-unit.tar.gz\" | sha256sum --check && \\\n    mkdir postgresql-unit-src && cd postgresql-unit-src && tar xzf ../postgresql-unit.tar.gz --strip-components=1 -C .\n\nFROM pg-build AS postgresql-unit-build\nCOPY --from=postgresql-unit-src /ext-src/ /ext-src/\nWORKDIR /ext-src/postgresql-unit-src\nRUN make -j $(getconf _NPROCESSORS_ONLN) && \\\n    make -j $(getconf _NPROCESSORS_ONLN) install && \\\n    # unit extension's \"create extension\" script relies on absolute install path to fill some reference tables.\n    # We move the extension from '/usr/local/pgsql/' to '/usr/local/'  after it is build. So we need to adjust the path.\n    # This one-liner removes pgsql/ part of the path.\n    # NOTE: Other extensions that rely on MODULEDIR variable after building phase will need the same fix.\n    find /usr/local/pgsql/share/extension/ -name \"unit*.sql\" -print0 | xargs -0 sed -i \"s|pgsql/||g\" && \\\n    echo 'trusted = true' >> /usr/local/pgsql/share/extension/unit.control\n\n#########################################################################################\n#\n# Layer \"pgvector-build\"\n# compile pgvector extension\n#\n#########################################################################################\nFROM build-deps AS pgvector-src\nARG PG_VERSION\n\nWORKDIR /ext-src\nCOPY compute/patches/pgvector.patch .\n\n# By default, pgvector Makefile uses `-march=native`. We don't want that,\n# because we build the images on different machines than where we run them.\n# Pass OPTFLAGS=\"\" to remove it.\n#\n# vector >0.7.4 supports v17\n# last release v0.8.0 - Oct 30, 2024\nRUN wget https://github.com/pgvector/pgvector/archive/refs/tags/v0.8.0.tar.gz -O pgvector.tar.gz && \\\n    echo \"867a2c328d4928a5a9d6f052cd3bc78c7d60228a9b914ad32aa3db88e9de27b0 pgvector.tar.gz\" | sha256sum --check && \\\n    mkdir pgvector-src && cd pgvector-src && tar xzf ../pgvector.tar.gz --strip-components=1 -C . && \\\n    wget https://github.com/pgvector/pgvector/raw/refs/tags/v0.7.4/sql/vector.sql -O ./sql/vector--0.7.4.sql && \\\n    echo \"10218d05dc02299562252a9484775178b14a1d8edb92a2d1672ef488530f7778 ./sql/vector--0.7.4.sql\" | sha256sum --check && \\\n    patch -p1 < /ext-src/pgvector.patch\n\nFROM pg-build AS pgvector-build\nCOPY --from=pgvector-src /ext-src/ /ext-src/\nWORKDIR /ext-src/pgvector-src\nRUN make -j $(getconf _NPROCESSORS_ONLN) OPTFLAGS=\"\" && \\\n    make -j $(getconf _NPROCESSORS_ONLN) OPTFLAGS=\"\" install && \\\n    echo 'trusted = true' >> /usr/local/pgsql/share/extension/vector.control\n\n#########################################################################################\n#\n# Layer \"pgjwt-build\"\n# compile pgjwt extension\n#\n#########################################################################################\nFROM build-deps AS pgjwt-src\nARG PG_VERSION\n\n# not version-specific\n# doesn't use releases, last commit f3d82fd - Mar 2, 2023\nWORKDIR /ext-src\nRUN wget https://github.com/michelp/pgjwt/archive/f3d82fd30151e754e19ce5d6a06c71c20689ce3d.tar.gz -O pgjwt.tar.gz && \\\n    echo \"dae8ed99eebb7593b43013f6532d772b12dfecd55548d2673f2dfd0163f6d2b9 pgjwt.tar.gz\" | sha256sum --check && \\\n    mkdir pgjwt-src && cd pgjwt-src && tar xzf ../pgjwt.tar.gz --strip-components=1 -C .\n\nFROM pg-build AS pgjwt-build\nCOPY --from=pgjwt-src /ext-src/ /ext-src/\nWORKDIR /ext-src/pgjwt-src\nRUN make -j $(getconf _NPROCESSORS_ONLN) install && \\\n    echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgjwt.control\n\n#########################################################################################\n#\n# Layer \"hypopg-build\"\n# compile hypopg extension\n#\n#########################################################################################\nFROM build-deps AS hypopg-src\nARG PG_VERSION\n\n# HypoPG 1.4.1 supports v17\n# last release 1.4.1 - Apr 28, 2024\nWORKDIR /ext-src\nRUN wget https://github.com/HypoPG/hypopg/archive/refs/tags/1.4.1.tar.gz -O hypopg.tar.gz && \\\n    echo \"9afe6357fd389d8d33fad81703038ce520b09275ec00153c6c89282bcdedd6bc hypopg.tar.gz\" | sha256sum --check && \\\n    mkdir hypopg-src && cd hypopg-src && tar xzf ../hypopg.tar.gz --strip-components=1 -C .\n\nFROM pg-build AS hypopg-build\nCOPY --from=hypopg-src /ext-src/ /ext-src/\nWORKDIR /ext-src/hypopg-src\nRUN make -j $(getconf _NPROCESSORS_ONLN) && \\\n    make -j $(getconf _NPROCESSORS_ONLN) install && \\\n    echo 'trusted = true' >> /usr/local/pgsql/share/extension/hypopg.control\n\n#########################################################################################\n#\n# Layer \"online_advisor-build\"\n# compile online_advisor extension\n#\n#########################################################################################\nFROM build-deps AS online_advisor-src\nARG PG_VERSION\n\n# online_advisor supports all Postgres version starting from PG14, but prior to PG17 has to be included in preload_shared_libraries\n# last release 1.0 - May 15, 2025\nWORKDIR /ext-src\nRUN case \"${PG_VERSION:?}\" in \\\n    \"v17\") \\\n        ;; \\\n    *) \\\n        echo \"skipping the version of online_advistor for $PG_VERSION\" && exit 0 \\\n        ;; \\\n    esac && \\\n\twget https://github.com/knizhnik/online_advisor/archive/refs/tags/1.0.tar.gz -O online_advisor.tar.gz && \\\n    echo \"37dcadf8f7cc8d6cc1f8831276ee245b44f1b0274f09e511e47a67738ba9ed0f online_advisor.tar.gz\" | sha256sum --check && \\\n    mkdir online_advisor-src && cd online_advisor-src && tar xzf ../online_advisor.tar.gz --strip-components=1 -C .\n\nFROM pg-build AS online_advisor-build\nCOPY --from=online_advisor-src /ext-src/ /ext-src/\nWORKDIR /ext-src/\nRUN if [ -d online_advisor-src ]; then \\\n\t    cd online_advisor-src && \\\n        make -j install && \\\n        echo 'trusted = true' >> /usr/local/pgsql/share/extension/online_advisor.control; \\\n    fi\n\n#########################################################################################\n#\n# Layer \"pg_hashids-build\"\n# compile pg_hashids extension\n#\n#########################################################################################\nFROM build-deps AS pg_hashids-src\nARG PG_VERSION\n\n# not version-specific\n# last release v1.2.1 -Jan 12, 2018\nWORKDIR /ext-src\nRUN wget https://github.com/iCyberon/pg_hashids/archive/refs/tags/v1.2.1.tar.gz -O pg_hashids.tar.gz && \\\n    echo \"74576b992d9277c92196dd8d816baa2cc2d8046fe102f3dcd7f3c3febed6822a pg_hashids.tar.gz\" | sha256sum --check && \\\n    mkdir pg_hashids-src && cd pg_hashids-src && tar xzf ../pg_hashids.tar.gz --strip-components=1 -C .\n\nFROM pg-build AS pg_hashids-build\nCOPY --from=pg_hashids-src /ext-src/ /ext-src/\nWORKDIR /ext-src/pg_hashids-src\nRUN make -j $(getconf _NPROCESSORS_ONLN) USE_PGXS=1 && \\\n    make -j $(getconf _NPROCESSORS_ONLN) install USE_PGXS=1 && \\\n    echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_hashids.control\n\n#########################################################################################\n#\n# Layer \"rum-build\"\n# compile rum extension\n#\n#########################################################################################\nFROM build-deps AS rum-src\nARG PG_VERSION\n\nWORKDIR /ext-src\nCOPY compute/patches/rum.patch .\n\n# supports v17 since https://github.com/postgrespro/rum/commit/cb1edffc57736cd2a4455f8d0feab0d69928da25\n# doesn't use releases since 1.3.13 - Sep 19, 2022\n# use latest commit from the master branch\nRUN wget https://github.com/postgrespro/rum/archive/cb1edffc57736cd2a4455f8d0feab0d69928da25.tar.gz -O rum.tar.gz && \\\n    echo \"65e0a752e99f4c3226400c9b899f997049e93503db8bf5c8072efa136d32fd83 rum.tar.gz\" | sha256sum --check && \\\n    mkdir rum-src && cd rum-src && tar xzf ../rum.tar.gz --strip-components=1 -C . && \\\n    patch -p1 < /ext-src/rum.patch\n\nFROM pg-build AS rum-build\nCOPY --from=rum-src /ext-src/ /ext-src/\nWORKDIR /ext-src/rum-src\nRUN make -j $(getconf _NPROCESSORS_ONLN) USE_PGXS=1 && \\\n    make -j $(getconf _NPROCESSORS_ONLN) install USE_PGXS=1 && \\\n    echo 'trusted = true' >> /usr/local/pgsql/share/extension/rum.control\n\n#########################################################################################\n#\n# Layer \"pgtap-build\"\n# compile pgTAP extension\n#\n#########################################################################################\nFROM build-deps AS pgtap-src\nARG PG_VERSION\n\n# pgtap 1.3.3 supports v17\n# last release v1.3.3 - Apr 8, 2024\nWORKDIR /ext-src\nRUN wget https://github.com/theory/pgtap/archive/refs/tags/v1.3.3.tar.gz -O pgtap.tar.gz && \\\n    echo \"325ea79d0d2515bce96bce43f6823dcd3effbd6c54cb2a4d6c2384fffa3a14c7 pgtap.tar.gz\" | sha256sum --check && \\\n    mkdir pgtap-src && cd pgtap-src && tar xzf ../pgtap.tar.gz --strip-components=1 -C .\n\nFROM pg-build AS pgtap-build\nCOPY --from=pgtap-src /ext-src/ /ext-src/\nWORKDIR /ext-src/pgtap-src\nRUN make -j $(getconf _NPROCESSORS_ONLN) && \\\n    make -j $(getconf _NPROCESSORS_ONLN) install && \\\n    echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgtap.control\n\n#########################################################################################\n#\n# Layer \"ip4r-build\"\n# compile ip4r extension\n#\n#########################################################################################\nFROM build-deps AS ip4r-src\nARG PG_VERSION\n\n# not version-specific\n# last release v2.4.2 - Jul 29, 2023\nWORKDIR /ext-src\nRUN wget https://github.com/RhodiumToad/ip4r/archive/refs/tags/2.4.2.tar.gz -O ip4r.tar.gz && \\\n    echo \"0f7b1f159974f49a47842a8ab6751aecca1ed1142b6d5e38d81b064b2ead1b4b ip4r.tar.gz\" | sha256sum --check && \\\n    mkdir ip4r-src && cd ip4r-src && tar xzf ../ip4r.tar.gz --strip-components=1 -C .\n\nFROM pg-build AS ip4r-build\nCOPY --from=ip4r-src /ext-src/ /ext-src/\nWORKDIR /ext-src/ip4r-src\nRUN make -j $(getconf _NPROCESSORS_ONLN) && \\\n    make -j $(getconf _NPROCESSORS_ONLN) install && \\\n    echo 'trusted = true' >> /usr/local/pgsql/share/extension/ip4r.control\n\n#########################################################################################\n#\n# Layer \"prefix-build\"\n# compile Prefix extension\n#\n#########################################################################################\nFROM build-deps AS prefix-src\nARG PG_VERSION\n\n# not version-specific\n# last release v1.2.10  - Jul 5, 2023\nWORKDIR /ext-src\nRUN wget https://github.com/dimitri/prefix/archive/refs/tags/v1.2.10.tar.gz -O prefix.tar.gz && \\\n    echo \"4342f251432a5f6fb05b8597139d3ccde8dcf87e8ca1498e7ee931ca057a8575 prefix.tar.gz\" | sha256sum --check && \\\n    mkdir prefix-src && cd prefix-src && tar xzf ../prefix.tar.gz --strip-components=1 -C .\n\nFROM pg-build AS prefix-build\nCOPY --from=prefix-src /ext-src/ /ext-src/\nWORKDIR /ext-src/prefix-src\nRUN make -j $(getconf _NPROCESSORS_ONLN) && \\\n    make -j $(getconf _NPROCESSORS_ONLN) install && \\\n    echo 'trusted = true' >> /usr/local/pgsql/share/extension/prefix.control\n\n#########################################################################################\n#\n# Layer \"hll-build\"\n# compile hll extension\n#\n#########################################################################################\nFROM build-deps AS hll-src\nARG PG_VERSION\n\n# not version-specific\n# last release v2.18 - Aug 29, 2023\nWORKDIR /ext-src\nRUN wget https://github.com/citusdata/postgresql-hll/archive/refs/tags/v2.18.tar.gz -O hll.tar.gz && \\\n    echo \"e2f55a6f4c4ab95ee4f1b4a2b73280258c5136b161fe9d059559556079694f0e hll.tar.gz\" | sha256sum --check && \\\n    mkdir hll-src && cd hll-src && tar xzf ../hll.tar.gz --strip-components=1 -C .\n\nFROM pg-build AS hll-build\nCOPY --from=hll-src /ext-src/ /ext-src/\nWORKDIR /ext-src/hll-src\nRUN make -j $(getconf _NPROCESSORS_ONLN) && \\\n    make -j $(getconf _NPROCESSORS_ONLN) install && \\\n    echo 'trusted = true' >> /usr/local/pgsql/share/extension/hll.control\n\n#########################################################################################\n#\n# Layer \"plpgsql_check-build\"\n# compile plpgsql_check extension\n#\n#########################################################################################\nFROM build-deps AS plpgsql_check-src\nARG PG_VERSION\n\n# plpgsql_check v2.7.11 supports v17\n# last release v2.7.11 - Sep 16, 2024\nWORKDIR /ext-src\nRUN wget https://github.com/okbob/plpgsql_check/archive/refs/tags/v2.7.11.tar.gz -O plpgsql_check.tar.gz && \\\n    echo \"208933f8dbe8e0d2628eb3851e9f52e6892b8e280c63700c0f1ce7883625d172 plpgsql_check.tar.gz\" | sha256sum --check && \\\n    mkdir plpgsql_check-src && cd plpgsql_check-src && tar xzf ../plpgsql_check.tar.gz --strip-components=1 -C .\n\nFROM pg-build AS plpgsql_check-build\nCOPY --from=plpgsql_check-src /ext-src/ /ext-src/\nWORKDIR /ext-src/plpgsql_check-src\nRUN make -j $(getconf _NPROCESSORS_ONLN) USE_PGXS=1 && \\\n    make -j $(getconf _NPROCESSORS_ONLN) install USE_PGXS=1 && \\\n    echo 'trusted = true' >> /usr/local/pgsql/share/extension/plpgsql_check.control\n\n#########################################################################################\n#\n# Layer \"timescaledb-build\"\n# compile timescaledb extension\n#\n#########################################################################################\nFROM build-deps AS timescaledb-src\nARG PG_VERSION\n\nWORKDIR /ext-src\nRUN case \"${PG_VERSION:?}\" in \\\n      \"v14\" | \"v15\") \\\n        export TIMESCALEDB_VERSION=2.10.1 \\\n        export TIMESCALEDB_CHECKSUM=6fca72a6ed0f6d32d2b3523951ede73dc5f9b0077b38450a029a5f411fdb8c73 \\\n        ;; \\\n      \"v16\") \\\n        export TIMESCALEDB_VERSION=2.13.0 \\\n        export TIMESCALEDB_CHECKSUM=584a351c7775f0e067eaa0e7277ea88cab9077cc4c455cbbf09a5d9723dce95d \\\n        ;; \\\n      \"v17\") \\\n        export TIMESCALEDB_VERSION=2.17.1 \\\n        export TIMESCALEDB_CHECKSUM=6277cf43f5695e23dae1c5cfeba00474d730b66ed53665a84b787a6bb1a57e28 \\\n        ;; \\\n    esac && \\\n    wget https://github.com/timescale/timescaledb/archive/refs/tags/${TIMESCALEDB_VERSION}.tar.gz -O timescaledb.tar.gz && \\\n    echo \"${TIMESCALEDB_CHECKSUM} timescaledb.tar.gz\" | sha256sum --check && \\\n    mkdir timescaledb-src && cd timescaledb-src && tar xzf ../timescaledb.tar.gz --strip-components=1 -C .\n\nFROM pg-build AS timescaledb-build\nCOPY --from=timescaledb-src /ext-src/ /ext-src/\nWORKDIR /ext-src/timescaledb-src\nRUN ./bootstrap -DSEND_TELEMETRY_DEFAULT:BOOL=OFF -DUSE_TELEMETRY:BOOL=OFF -DAPACHE_ONLY:BOOL=ON -DCMAKE_BUILD_TYPE=Release && \\\n    cd build && \\\n    make -j $(getconf _NPROCESSORS_ONLN) && \\\n    make install -j $(getconf _NPROCESSORS_ONLN) && \\\n    echo \"trusted = true\" >> /usr/local/pgsql/share/extension/timescaledb.control\n\n#########################################################################################\n#\n# Layer \"pg_hint_plan-build\"\n# compile pg_hint_plan extension\n#\n#########################################################################################\nFROM build-deps AS pg_hint_plan-src\nARG PG_VERSION\n\n# version-specific, has separate releases for each version\nWORKDIR /ext-src\nRUN case \"${PG_VERSION:?}\" in \\\n      \"v14\") \\\n        export PG_HINT_PLAN_VERSION=14_1_4_1 \\\n        export PG_HINT_PLAN_CHECKSUM=c3501becf70ead27f70626bce80ea401ceac6a77e2083ee5f3ff1f1444ec1ad1 \\\n        ;; \\\n      \"v15\") \\\n        export PG_HINT_PLAN_VERSION=15_1_5_0 \\\n        export PG_HINT_PLAN_CHECKSUM=564cbbf4820973ffece63fbf76e3c0af62c4ab23543142c7caaa682bc48918be \\\n        ;; \\\n      \"v16\") \\\n        export PG_HINT_PLAN_VERSION=16_1_6_0 \\\n        export PG_HINT_PLAN_CHECKSUM=fc85a9212e7d2819d4ae4ac75817481101833c3cfa9f0fe1f980984e12347d00 \\\n        ;; \\\n      \"v17\") \\\n        export PG_HINT_PLAN_VERSION=17_1_7_0 \\\n        export PG_HINT_PLAN_CHECKSUM=06dd306328c67a4248f48403c50444f30959fb61ebe963248dbc2afb396fe600 \\\n        ;; \\\n      *) \\\n        echo \"Export the valid PG_HINT_PLAN_VERSION variable\" && exit 1 \\\n        ;; \\\n    esac && \\\n    wget https://github.com/ossc-db/pg_hint_plan/archive/refs/tags/REL${PG_HINT_PLAN_VERSION}.tar.gz -O pg_hint_plan.tar.gz && \\\n    echo \"${PG_HINT_PLAN_CHECKSUM} pg_hint_plan.tar.gz\" | sha256sum --check && \\\n    mkdir pg_hint_plan-src && cd pg_hint_plan-src && tar xzf ../pg_hint_plan.tar.gz --strip-components=1 -C .\n\nFROM pg-build AS pg_hint_plan-build\nCOPY --from=pg_hint_plan-src /ext-src/ /ext-src/\nWORKDIR /ext-src/pg_hint_plan-src\nRUN make -j $(getconf _NPROCESSORS_ONLN) && \\\n    make install -j $(getconf _NPROCESSORS_ONLN) && \\\n    echo \"trusted = true\" >> /usr/local/pgsql/share/extension/pg_hint_plan.control\n\n\n#########################################################################################\n#\n# Layer \"pg_cron-build\"\n# compile pg_cron extension\n#\n#########################################################################################\nFROM build-deps AS pg_cron-src\nARG PG_VERSION\n\n# This is an experimental extension that we do not support on prod yet.\n# !Do not remove!\n# We set it in shared_preload_libraries and computes will fail to start if library is not found.\nWORKDIR /ext-src\nCOPY compute/patches/pg_cron.patch .\nRUN wget https://github.com/citusdata/pg_cron/archive/refs/tags/v1.6.4.tar.gz -O pg_cron.tar.gz && \\\n    echo \"52d1850ee7beb85a4cb7185731ef4e5a90d1de216709d8988324b0d02e76af61 pg_cron.tar.gz\" | sha256sum --check && \\\n    mkdir pg_cron-src && cd pg_cron-src && tar xzf ../pg_cron.tar.gz --strip-components=1 -C . && \\\n    patch < /ext-src/pg_cron.patch\n\nFROM pg-build AS pg_cron-build\nCOPY --from=pg_cron-src /ext-src/ /ext-src/\nWORKDIR /ext-src/pg_cron-src\nRUN make -j $(getconf _NPROCESSORS_ONLN) && \\\n    make -j $(getconf _NPROCESSORS_ONLN) install && \\\n    echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_cron.control\n\n#########################################################################################\n#\n# Layer \"rdkit-build\"\n# compile rdkit extension\n#\n#########################################################################################\nFROM build-deps AS rdkit-src\nARG PG_VERSION\n\n# rdkit Release_2024_09_1 supports v17\n# last release Release_2024_09_1 - Sep 27, 2024\n#\n# Use new version only for v17\n# because Release_2024_09_1 has some backward incompatible changes\n# https://github.com/rdkit/rdkit/releases/tag/Release_2024_09_1\n\nWORKDIR /ext-src\nRUN case \"${PG_VERSION:?}\" in \\\n    \"v17\") \\\n        export RDKIT_VERSION=Release_2024_09_1 \\\n        export RDKIT_CHECKSUM=034c00d6e9de323506834da03400761ed8c3721095114369d06805409747a60f \\\n    ;; \\\n    \"v14\" | \"v15\" | \"v16\") \\\n        export RDKIT_VERSION=Release_2023_03_3 \\\n        export RDKIT_CHECKSUM=bdbf9a2e6988526bfeb8c56ce3cdfe2998d60ac289078e2215374288185e8c8d \\\n    ;; \\\n    *) \\\n        echo \"unexpected PostgreSQL version\" && exit 1 \\\n    ;; \\\n    esac && \\\n    wget https://github.com/rdkit/rdkit/archive/refs/tags/${RDKIT_VERSION}.tar.gz -O rdkit.tar.gz && \\\n    echo \"${RDKIT_CHECKSUM} rdkit.tar.gz\" | sha256sum --check && \\\n    mkdir rdkit-src && cd rdkit-src && tar xzf ../rdkit.tar.gz --strip-components=1 -C .\n\nFROM pg-build AS rdkit-build\nRUN apt update && \\\n    apt install --no-install-recommends --no-install-suggests -y \\\n        libboost-iostreams1.74-dev \\\n        libboost-regex1.74-dev \\\n        libboost-serialization1.74-dev \\\n        libboost-system1.74-dev \\\n        libeigen3-dev \\\n        libboost-all-dev \\\n    && apt clean && rm -rf /var/lib/apt/lists/*\n\nCOPY --from=rdkit-src /ext-src/ /ext-src/\nWORKDIR /ext-src/rdkit-src\n\n# XXX: /usr/local/pgsql/bin is already in PATH, and that should be enough to find\n# pg_config. For some reason the rdkit cmake script doesn't work with just that,\n# however. By also adding /usr/local/pgsql, it works, which is weird because there\n# are no executables in that directory.\nENV PATH=\"/usr/local/pgsql:$PATH\"\nRUN cmake \\\n        -D RDK_BUILD_CAIRO_SUPPORT=OFF \\\n        -D RDK_BUILD_INCHI_SUPPORT=ON \\\n        -D RDK_BUILD_AVALON_SUPPORT=ON \\\n        -D RDK_BUILD_PYTHON_WRAPPERS=OFF \\\n        -D RDK_BUILD_DESCRIPTORS3D=OFF \\\n        -D RDK_BUILD_FREESASA_SUPPORT=OFF \\\n        -D RDK_BUILD_COORDGEN_SUPPORT=ON \\\n        -D RDK_BUILD_MOLINTERCHANGE_SUPPORT=OFF \\\n        -D RDK_BUILD_YAEHMOP_SUPPORT=OFF \\\n        -D RDK_BUILD_STRUCTCHECKER_SUPPORT=OFF \\\n        -D RDK_TEST_MULTITHREADED=OFF \\\n        -D RDK_BUILD_CPP_TESTS=OFF \\\n        -D RDK_USE_URF=OFF \\\n        -D RDK_BUILD_PGSQL=ON \\\n        -D RDK_PGSQL_STATIC=ON \\\n        -D PostgreSQL_CONFIG=pg_config \\\n        -D PostgreSQL_INCLUDE_DIR=`pg_config --includedir` \\\n        -D PostgreSQL_TYPE_INCLUDE_DIR=`pg_config --includedir-server` \\\n        -D PostgreSQL_LIBRARY_DIR=`pg_config --libdir` \\\n        -D RDK_INSTALL_INTREE=OFF \\\n        -D RDK_INSTALL_COMIC_FONTS=OFF \\\n        -D RDK_BUILD_FREETYPE_SUPPORT=OFF \\\n        -D CMAKE_BUILD_TYPE=Release \\\n        -GNinja \\\n        . && \\\n    ninja -j $(getconf _NPROCESSORS_ONLN) && \\\n    ninja -j $(getconf _NPROCESSORS_ONLN) install && \\\n    echo 'trusted = true' >> /usr/local/pgsql/share/extension/rdkit.control\n\n#########################################################################################\n#\n# Layer \"pg_uuidv7-build\"\n# compile pg_uuidv7 extension\n#\n#########################################################################################\nFROM build-deps AS pg_uuidv7-src\nARG PG_VERSION\n\n# not version-specific\n# last release v1.6.0 - Oct 9, 2024\nWORKDIR /ext-src\nRUN wget https://github.com/fboulnois/pg_uuidv7/archive/refs/tags/v1.6.0.tar.gz -O pg_uuidv7.tar.gz && \\\n    echo \"0fa6c710929d003f6ce276a7de7a864e9d1667b2d78be3dc2c07f2409eb55867 pg_uuidv7.tar.gz\" | sha256sum --check && \\\n    mkdir pg_uuidv7-src && cd pg_uuidv7-src && tar xzf ../pg_uuidv7.tar.gz --strip-components=1 -C .\n\nFROM pg-build AS pg_uuidv7-build\nCOPY --from=pg_uuidv7-src /ext-src/ /ext-src/\nWORKDIR /ext-src/pg_uuidv7-src\nRUN make -j $(getconf _NPROCESSORS_ONLN) && \\\n    make -j $(getconf _NPROCESSORS_ONLN) install && \\\n    echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_uuidv7.control\n\n#########################################################################################\n#\n# Layer \"pg_roaringbitmap-build\"\n# compile pg_roaringbitmap extension\n#\n#########################################################################################\nFROM build-deps AS pg_roaringbitmap-src\nARG PG_VERSION\n\n# not version-specific\n# last release v0.5.4 - Jun 28, 2022\nWORKDIR /ext-src\nRUN wget https://github.com/ChenHuajun/pg_roaringbitmap/archive/refs/tags/v0.5.4.tar.gz -O pg_roaringbitmap.tar.gz && \\\n    echo \"b75201efcb1c2d1b014ec4ae6a22769cc7a224e6e406a587f5784a37b6b5a2aa pg_roaringbitmap.tar.gz\" | sha256sum --check && \\\n    mkdir pg_roaringbitmap-src && cd pg_roaringbitmap-src && tar xzf ../pg_roaringbitmap.tar.gz --strip-components=1 -C .\n\nFROM pg-build AS pg_roaringbitmap-build\nCOPY --from=pg_roaringbitmap-src /ext-src/ /ext-src/\nWORKDIR /ext-src/pg_roaringbitmap-src\nRUN make -j $(getconf _NPROCESSORS_ONLN) && \\\n    make -j $(getconf _NPROCESSORS_ONLN) install && \\\n    echo 'trusted = true' >> /usr/local/pgsql/share/extension/roaringbitmap.control\n\n#########################################################################################\n#\n# Layer \"pg_semver-build\"\n# compile pg_semver extension\n#\n#########################################################################################\nFROM build-deps AS pg_semver-src\nARG PG_VERSION\n\n# Release 0.40.0 breaks backward compatibility with previous versions\n# see release note https://github.com/theory/pg-semver/releases/tag/v0.40.0\n# Use new version only for v17\n#\n# last release v0.40.0 - Jul 22, 2024\nWORKDIR /ext-src\nRUN case \"${PG_VERSION:?}\" in \\\n    \"v17\") \\\n        export SEMVER_VERSION=0.40.0 \\\n        export SEMVER_CHECKSUM=3e50bcc29a0e2e481e7b6d2bc937cadc5f5869f55d983b5a1aafeb49f5425cfc \\\n    ;; \\\n    \"v14\" | \"v15\" | \"v16\") \\\n        export SEMVER_VERSION=0.32.1 \\\n        export SEMVER_CHECKSUM=fbdaf7512026d62eec03fad8687c15ed509b6ba395bff140acd63d2e4fbe25d7 \\\n    ;; \\\n    *) \\\n        echo \"unexpected PostgreSQL version\" && exit 1 \\\n    ;; \\\n    esac && \\\n    wget https://github.com/theory/pg-semver/archive/refs/tags/v${SEMVER_VERSION}.tar.gz -O pg_semver.tar.gz && \\\n    echo \"${SEMVER_CHECKSUM} pg_semver.tar.gz\" | sha256sum --check && \\\n    mkdir pg_semver-src && cd pg_semver-src && tar xzf ../pg_semver.tar.gz --strip-components=1 -C .\n\nFROM pg-build AS pg_semver-build\nCOPY --from=pg_semver-src /ext-src/ /ext-src/\nWORKDIR /ext-src/pg_semver-src\nRUN make -j $(getconf _NPROCESSORS_ONLN) && \\\n    make -j $(getconf _NPROCESSORS_ONLN) install && \\\n    echo 'trusted = true' >> /usr/local/pgsql/share/extension/semver.control\n\n#########################################################################################\n#\n# Layer \"build-deps with Rust toolchain installed\"\n#\n#########################################################################################\nFROM build-deps AS build-deps-with-cargo\n\nENV HOME=/home/nonroot\nENV PATH=\"/home/nonroot/.cargo/bin:$PATH\"\nUSER nonroot\nWORKDIR /home/nonroot\n\n# See comment on the top of the file regading `echo` and `\\n`\nRUN echo -e \"--retry-connrefused\\n--connect-timeout 15\\n--retry 5\\n--max-time 300\\n\" > /home/nonroot/.curlrc\n\nRUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux-gnu/rustup-init && \\\n    chmod +x rustup-init && \\\n    ./rustup-init -y --no-modify-path --profile minimal --default-toolchain stable && \\\n    rm rustup-init\n\n#########################################################################################\n#\n# Layer \"pg-build with Rust toolchain installed\"\n# This layer is base and common for layers with `pgrx`\n#\n#########################################################################################\nFROM pg-build AS pg-build-with-cargo\nARG PG_VERSION\n\nENV HOME=/home/nonroot\nENV PATH=\"/home/nonroot/.cargo/bin:$PATH\"\nUSER nonroot\nWORKDIR /home/nonroot\n\nCOPY --from=build-deps-with-cargo /home/nonroot /home/nonroot\n\n#########################################################################################\n#\n# Layer \"rust extensions\"\n# This layer is used to build `pgrx` deps\n#\n#########################################################################################\nFROM pg-build-with-cargo AS rust-extensions-build\nARG PG_VERSION\n\nRUN case \"${PG_VERSION:?}\" in \\\n        'v17') \\\n            echo 'v17 is not supported yet by pgrx. Quit' && exit 0;; \\\n    esac && \\\n    cargo install --locked --version 0.11.3 cargo-pgrx && \\\n    /bin/bash -c 'cargo pgrx init --pg${PG_VERSION:1}=/usr/local/pgsql/bin/pg_config'\n\nUSER root\n\n#########################################################################################\n#\n# Layer \"rust extensions pgrx12\"\n#\n# pgrx started to support Postgres 17 since version 12,\n# but some older extension aren't compatible with it.\n# This layer should be used as a base for new pgrx extensions,\n# and eventually get merged with `rust-extensions-build`\n#\n#########################################################################################\nFROM pg-build-with-cargo AS rust-extensions-build-pgrx12\nARG PG_VERSION\n\nRUN cargo install --locked --version 0.12.9 cargo-pgrx && \\\n    /bin/bash -c 'cargo pgrx init --pg${PG_VERSION:1}=/usr/local/pgsql/bin/pg_config'\n\nUSER root\n\n#########################################################################################\n#\n# Layer \"rust extensions pgrx14\"\n#\n# Version 14 is now required by a few\n# This layer should be used as a base for new pgrx extensions,\n# and eventually get merged with `rust-extensions-build`\n#\n#########################################################################################\nFROM pg-build-with-cargo AS rust-extensions-build-pgrx14\nARG PG_VERSION\n\nRUN cargo install --locked --version 0.14.1 cargo-pgrx && \\\n    /bin/bash -c 'cargo pgrx init --pg${PG_VERSION:1}=/usr/local/pgsql/bin/pg_config'\n\nUSER root\n\n#########################################################################################\n#\n# Layers \"pg-onnx-build\" and \"pgrag-build\"\n# Compile \"pgrag\" extensions\n#\n#########################################################################################\n\nFROM build-deps AS pgrag-src\nARG PG_VERSION\nWORKDIR /ext-src\nCOPY compute/patches/onnxruntime.patch .\n\nRUN wget https://github.com/microsoft/onnxruntime/archive/refs/tags/v1.18.1.tar.gz -O onnxruntime.tar.gz && \\\n    mkdir onnxruntime-src && cd onnxruntime-src && tar xzf ../onnxruntime.tar.gz --strip-components=1 -C . && \\\n    patch -p1 < /ext-src/onnxruntime.patch && \\\n    echo \"#nothing to test here\" > neon-test.sh\n\nRUN wget https://github.com/neondatabase-labs/pgrag/archive/refs/tags/v0.1.2.tar.gz -O pgrag.tar.gz &&  \\\n    echo \"7361654ea24f08cbb9db13c2ee1c0fe008f6114076401bb871619690dafc5225 pgrag.tar.gz\" | sha256sum --check && \\\n    mkdir pgrag-src && cd pgrag-src && tar xzf ../pgrag.tar.gz --strip-components=1 -C .\n\nFROM rust-extensions-build-pgrx14 AS pgrag-build\nCOPY --from=pgrag-src /ext-src/ /ext-src/\n\n# Install build-time dependencies\n# cmake 3.26 or higher is required, so installing it using pip (bullseye-backports has cmake 3.25).\n# Install it using virtual environment, because Python 3.11 (the default version on Debian 12 (Bookworm)) complains otherwise\nWORKDIR /ext-src/onnxruntime-src\nRUN apt update && apt install --no-install-recommends --no-install-suggests -y \\\n    python3 python3-pip python3-venv && \\\n    apt clean && rm -rf /var/lib/apt/lists/* && \\\n    python3 -m venv venv && \\\n    . venv/bin/activate && \\\n    python3 -m pip install cmake==3.30.5\n\nRUN . venv/bin/activate && \\\n    ./build.sh --config Release --parallel --cmake_generator Ninja \\\n    --skip_submodule_sync --skip_tests --allow_running_as_root\n\nWORKDIR /ext-src/pgrag-src\nRUN cd exts/rag && \\\n    sed -i 's/pgrx = \"0.14.1\"/pgrx = { version = \"0.14.1\", features = [ \"unsafe-postgres\" ] }/g' Cargo.toml && \\\n    cargo pgrx install --release && \\\n    echo \"trusted = true\" >> /usr/local/pgsql/share/extension/rag.control\n\nRUN cd exts/rag_bge_small_en_v15 && \\\n    sed -i 's/pgrx = \"0.14.1\"/pgrx = { version = \"0.14.1\", features = [ \"unsafe-postgres\" ] }/g' Cargo.toml && \\\n    ORT_LIB_LOCATION=/ext-src/onnxruntime-src/build/Linux \\\n        REMOTE_ONNX_URL=http://pg-ext-s3-gateway.pg-ext-s3-gateway.svc.cluster.local/pgrag-data/bge_small_en_v15.onnx \\\n        cargo pgrx install --release --features remote_onnx && \\\n    echo \"trusted = true\" >> /usr/local/pgsql/share/extension/rag_bge_small_en_v15.control\n\nRUN cd exts/rag_jina_reranker_v1_tiny_en && \\\n    sed -i 's/pgrx = \"0.14.1\"/pgrx = { version = \"0.14.1\", features = [ \"unsafe-postgres\" ] }/g' Cargo.toml && \\\n    ORT_LIB_LOCATION=/ext-src/onnxruntime-src/build/Linux \\\n        REMOTE_ONNX_URL=http://pg-ext-s3-gateway.pg-ext-s3-gateway.svc.cluster.local/pgrag-data/jina_reranker_v1_tiny_en.onnx \\\n        cargo pgrx install --release --features remote_onnx && \\\n    echo \"trusted = true\" >> /usr/local/pgsql/share/extension/rag_jina_reranker_v1_tiny_en.control\n\n\n#########################################################################################\n#\n# Layer \"pg_jsonschema-build\"\n# Compile \"pg_jsonschema\" extension\n#\n#########################################################################################\n\nFROM build-deps AS pg_jsonschema-src\nARG PG_VERSION\n# last release v0.3.3 - Oct 16, 2024\nWORKDIR /ext-src\nRUN wget https://github.com/supabase/pg_jsonschema/archive/refs/tags/v0.3.3.tar.gz -O pg_jsonschema.tar.gz && \\\n    echo \"40c2cffab4187e0233cb8c3bde013be92218c282f95f4469c5282f6b30d64eac pg_jsonschema.tar.gz\" | sha256sum --check && \\\n    mkdir pg_jsonschema-src && cd pg_jsonschema-src && tar xzf ../pg_jsonschema.tar.gz --strip-components=1 -C .\n\nFROM rust-extensions-build-pgrx12 AS pg_jsonschema-build\nCOPY --from=pg_jsonschema-src /ext-src/ /ext-src/\nWORKDIR /ext-src/pg_jsonschema-src\nRUN \\\n    # see commit 252b3685a27a0f4c31a0f91e983c6314838e89e8\n    # `unsafe-postgres` feature allows to build pgx extensions\n    # against postgres forks that decided to change their ABI name (like us).\n    # With that we can build extensions without forking them and using stock\n    # pgx. As this feature is new few manual version bumps were required.\n    sed -i 's/pgrx = \"0.12.6\"/pgrx = { version = \"0.12.9\", features = [ \"unsafe-postgres\" ] }/g' Cargo.toml && \\\n    sed -i 's/pgrx-tests = \"0.12.6\"/pgrx-tests = \"0.12.9\"/g' Cargo.toml && \\\n    cargo pgrx install --release && \\\n    echo \"trusted = true\" >> /usr/local/pgsql/share/extension/pg_jsonschema.control\n\n#########################################################################################\n#\n# Layer \"pg_graphql-build\"\n# Compile \"pg_graphql\" extension\n#\n#########################################################################################\n\nFROM build-deps AS pg_graphql-src\nARG PG_VERSION\n\n# last release v1.5.9 - Oct 16, 2024\nWORKDIR /ext-src\nCOPY compute/patches/pg_graphql.patch .\nRUN wget https://github.com/supabase/pg_graphql/archive/refs/tags/v1.5.9.tar.gz -O pg_graphql.tar.gz && \\\n    echo \"cf768385a41278be1333472204fc0328118644ae443182cf52f7b9b23277e497 pg_graphql.tar.gz\" | sha256sum --check && \\\n    mkdir pg_graphql-src && cd pg_graphql-src && tar xzf ../pg_graphql.tar.gz --strip-components=1 -C . && \\\n    sed -i 's/pgrx = \"=0.12.6\"/pgrx = { version = \"0.12.9\", features = [ \"unsafe-postgres\" ] }/g' Cargo.toml && \\\n    sed -i 's/pgrx-tests = \"=0.12.6\"/pgrx-tests = \"=0.12.9\"/g' Cargo.toml && \\\n    patch -p1 < /ext-src/pg_graphql.patch\n\n\nFROM rust-extensions-build-pgrx12 AS pg_graphql-build\nCOPY --from=pg_graphql-src /ext-src/ /ext-src/\nWORKDIR /ext-src/pg_graphql-src\nRUN cargo pgrx install --release && \\\n    # it's needed to enable extension because it uses untrusted C language\n    sed -i 's/superuser = false/superuser = true/g' /usr/local/pgsql/share/extension/pg_graphql.control && \\\n    echo \"trusted = true\" >> /usr/local/pgsql/share/extension/pg_graphql.control\n\n#########################################################################################\n#\n# Layer \"pg_tiktoken-build\"\n# Compile \"pg_tiktoken\" extension\n#\n#########################################################################################\n\nFROM build-deps AS pg_tiktoken-src\nARG PG_VERSION\n\n# doesn't use releases\n# 9118dd4549b7d8c0bbc98e04322499f7bf2fa6f7 - on Oct 29, 2024\nWORKDIR /ext-src\nRUN wget https://github.com/kelvich/pg_tiktoken/archive/9118dd4549b7d8c0bbc98e04322499f7bf2fa6f7.tar.gz -O pg_tiktoken.tar.gz && \\\n    echo \"a5bc447e7920ee149d3c064b8b9f0086c0e83939499753178f7d35788416f628 pg_tiktoken.tar.gz\" | sha256sum --check && \\\n    mkdir pg_tiktoken-src && cd pg_tiktoken-src && tar xzf ../pg_tiktoken.tar.gz --strip-components=1 -C . && \\\n    sed -i 's/pgrx = { version = \"=0.12.6\",/pgrx = { version = \"0.12.9\",/g' Cargo.toml && \\\n    sed -i 's/pgrx-tests = \"=0.12.6\"/pgrx-tests = \"0.12.9\"/g' Cargo.toml\n\nFROM rust-extensions-build-pgrx12 AS pg_tiktoken-build\nCOPY --from=pg_tiktoken-src /ext-src/ /ext-src/\nWORKDIR /ext-src/pg_tiktoken-src\nRUN cargo pgrx install --release && \\\n    echo \"trusted = true\" >> /usr/local/pgsql/share/extension/pg_tiktoken.control\n\n#########################################################################################\n#\n# Layer \"pgx_ulid-build\"\n# Compile \"pgx_ulid\" extension for v16 and below\n#\n#########################################################################################\n\nFROM build-deps AS pgx_ulid-src\nARG PG_VERSION\n\nWORKDIR /ext-src\nRUN case \"${PG_VERSION:?}\" in \\\n    \"v14\" | \"v15\" | \"v16\") \\\n        ;; \\\n    *) \\\n        echo \"skipping the version of pgx_ulid for $PG_VERSION\" && exit 0 \\\n        ;; \\\n    esac && \\\n    wget https://github.com/pksunkara/pgx_ulid/archive/refs/tags/v0.1.5.tar.gz -O pgx_ulid.tar.gz && \\\n    echo \"9d1659a2da65af0133d5451c454de31b37364e3502087dadf579f790bc8bef17  pgx_ulid.tar.gz\" | sha256sum --check && \\\n    mkdir pgx_ulid-src && cd pgx_ulid-src && tar xzf ../pgx_ulid.tar.gz --strip-components=1 -C . && \\\n    sed -i 's/pgrx       = \"^0.11.2\"/pgrx = { version = \"=0.11.3\", features = [ \"unsafe-postgres\" ] }/g' Cargo.toml\n\nFROM rust-extensions-build AS pgx_ulid-build\nCOPY --from=pgx_ulid-src /ext-src/ /ext-src/\nWORKDIR /ext-src/\nRUN if [ -d pgx_ulid-src ]; then \\\n        cd pgx_ulid-src && \\\n        cargo pgrx install --release && \\\n        echo 'trusted = true' >> /usr/local/pgsql/share/extension/ulid.control; \\\n    fi\n\n#########################################################################################\n#\n# Layer \"pgx_ulid-pgrx12-build\"\n# Compile \"pgx_ulid\" extension for v17 and up\n#\n#########################################################################################\n\nFROM build-deps AS pgx_ulid-pgrx12-src\nARG PG_VERSION\n\nWORKDIR /ext-src\nRUN case \"${PG_VERSION:?}\" in \\\n    \"v17\") \\\n        ;; \\\n    *) \\\n        echo \"skipping the version of pgx_ulid for $PG_VERSION\" && exit 0 \\\n        ;; \\\n    esac && \\\n    wget https://github.com/pksunkara/pgx_ulid/archive/refs/tags/v0.2.0.tar.gz -O pgx_ulid.tar.gz && \\\n    echo \"cef6a9a2e5e7bd1a10a18989286586ee9e6c1c06005a4055cff190de41bf3e9f pgx_ulid.tar.gz\" | sha256sum --check && \\\n    mkdir pgx_ulid-src && cd pgx_ulid-src && tar xzf ../pgx_ulid.tar.gz --strip-components=1 -C . && \\\n    sed -i 's/pgrx       = \"^0.12.7\"/pgrx       = { version = \"0.12.9\", features = [ \"unsafe-postgres\" ] }/g' Cargo.toml\n\nFROM rust-extensions-build-pgrx12 AS pgx_ulid-pgrx12-build\nARG PG_VERSION\nWORKDIR /ext-src\nCOPY --from=pgx_ulid-pgrx12-src /ext-src/ /ext-src/\nRUN if [ -d pgx_ulid-src ]; then \\\n        cd pgx_ulid-src && \\\n        cargo pgrx install --release && \\\n        echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgx_ulid.control; \\\n    fi\n\n#########################################################################################\n#\n# Layer \"pg_session_jwt-build\"\n# Compile \"pg_session_jwt\" extension\n#\n#########################################################################################\n\nFROM build-deps AS pg_session_jwt-src\nARG PG_VERSION\n\n# NOTE: local_proxy depends on the version of pg_session_jwt\n# Do not update without approve from proxy team\n# Make sure the version is reflected in proxy/src/serverless/local_conn_pool.rs\nWORKDIR /ext-src\nRUN wget https://github.com/neondatabase/pg_session_jwt/archive/refs/tags/v0.3.1.tar.gz -O pg_session_jwt.tar.gz && \\\n    echo \"62fec9e472cb805c53ba24a0765afdb8ea2720cfc03ae7813e61687b36d1b0ad pg_session_jwt.tar.gz\" | sha256sum --check && \\\n    mkdir pg_session_jwt-src && cd pg_session_jwt-src && tar xzf ../pg_session_jwt.tar.gz --strip-components=1 -C . && \\\n    sed -i 's/pgrx = \"0.12.6\"/pgrx = { version = \"0.12.9\", features = [ \"unsafe-postgres\" ] }/g' Cargo.toml && \\\n    sed -i 's/version = \"0.12.6\"/version = \"0.12.9\"/g' pgrx-tests/Cargo.toml && \\\n    sed -i 's/pgrx = \"=0.12.6\"/pgrx = { version = \"=0.12.9\", features = [ \"unsafe-postgres\" ] }/g' pgrx-tests/Cargo.toml && \\\n    sed -i 's/pgrx-macros = \"=0.12.6\"/pgrx-macros = \"=0.12.9\"/g' pgrx-tests/Cargo.toml && \\\n    sed -i 's/pgrx-pg-config = \"=0.12.6\"/pgrx-pg-config = \"=0.12.9\"/g' pgrx-tests/Cargo.toml\n\nFROM rust-extensions-build-pgrx12 AS pg_session_jwt-build\nCOPY --from=pg_session_jwt-src /ext-src/ /ext-src/\nWORKDIR /ext-src/pg_session_jwt-src\nRUN cargo pgrx install --release\n\n#########################################################################################\n#\n# Layer \"pg-anon-pg-build\"\n# compile anon extension\n#\n#########################################################################################\nFROM pg-build AS pg_anon-src\nARG PG_VERSION\nCOPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/\nWORKDIR /ext-src\nCOPY compute/patches/anon_v2.patch .\n\n# This is an experimental extension, never got to real production.\n# !Do not remove! It can be present in shared_preload_libraries and compute will fail to start if library is not found.\nENV PATH=\"/usr/local/pgsql/bin/:$PATH\"\nRUN wget https://gitlab.com/dalibo/postgresql_anonymizer/-/archive/2.1.0/postgresql_anonymizer-latest.tar.gz -O pg_anon.tar.gz && \\\n    echo \"48e7f5ae2f1ca516df3da86c5c739d48dd780a4e885705704ccaad0faa89d6c0  pg_anon.tar.gz\" | sha256sum --check && \\\n    mkdir pg_anon-src && cd pg_anon-src && tar xzf ../pg_anon.tar.gz --strip-components=1 -C . && \\\n    find /usr/local/pgsql -type f | sed 's|^/usr/local/pgsql/||' > /before.txt && \\\n    sed -i 's/pgrx = \"0.14.1\"/pgrx = { version = \"=0.14.1\", features = [ \"unsafe-postgres\" ] }/g' Cargo.toml && \\\n    patch -p1 < /ext-src/anon_v2.patch\n\nFROM rust-extensions-build-pgrx14 AS pg-anon-pg-build\nARG PG_VERSION\nCOPY --from=pg_anon-src /ext-src/ /ext-src/\nWORKDIR /ext-src\nRUN cd pg_anon-src && \\\n    make -j $(getconf _NPROCESSORS_ONLN) extension PG_CONFIG=/usr/local/pgsql/bin/pg_config PGVER=pg$(echo \"$PG_VERSION\" | sed 's/^v//') && \\\n    make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config PGVER=pg$(echo \"$PG_VERSION\" | sed 's/^v//') && \\\n    chmod -R a+r ../pg_anon-src && \\\n    echo 'trusted = true' >> /usr/local/pgsql/share/extension/anon.control;\n\n########################################################################################\n\n#########################################################################################\n#\n# Layer \"wal2json-build\"\n# Compile \"wal2json\" extension\n#\n#########################################################################################\n\nFROM build-deps AS wal2json-src\nARG PG_VERSION\n\n# wal2json wal2json_2_6 supports v17\n# last release wal2json_2_6 - Apr 25, 2024\nWORKDIR /ext-src\nRUN wget https://github.com/eulerto/wal2json/archive/refs/tags/wal2json_2_6.tar.gz -O wal2json.tar.gz && \\\n    echo \"18b4bdec28c74a8fc98a11c72de38378a760327ef8e5e42e975b0029eb96ba0d wal2json.tar.gz\" | sha256sum --check && \\\n    mkdir wal2json-src && cd wal2json-src && tar xzf ../wal2json.tar.gz --strip-components=1 -C .\n\nFROM pg-build AS wal2json-build\nCOPY --from=wal2json-src /ext-src/ /ext-src/\nWORKDIR /ext-src/wal2json-src\nRUN make -j $(getconf _NPROCESSORS_ONLN) && \\\n    make -j $(getconf _NPROCESSORS_ONLN) install\n\n#########################################################################################\n#\n# Layer \"pg_ivm\"\n# compile pg_ivm extension\n#\n#########################################################################################\nFROM build-deps AS pg_ivm-src\nARG PG_VERSION\n\n# pg_ivm v1.9 supports v17\n# last release v1.9 - Jul 31\nWORKDIR /ext-src\nRUN wget https://github.com/sraoss/pg_ivm/archive/refs/tags/v1.9.tar.gz -O pg_ivm.tar.gz && \\\n    echo \"59e15722939f274650abf637f315dd723c87073496ca77236b044cb205270d8b pg_ivm.tar.gz\" | sha256sum --check && \\\n    mkdir pg_ivm-src && cd pg_ivm-src && tar xzf ../pg_ivm.tar.gz --strip-components=1 -C .\n\nFROM pg-build AS pg_ivm-build\nCOPY --from=pg_ivm-src /ext-src/ /ext-src/\nWORKDIR /ext-src/pg_ivm-src\nRUN make -j $(getconf _NPROCESSORS_ONLN) && \\\n    make -j $(getconf _NPROCESSORS_ONLN) install && \\\n    echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_ivm.control\n\n#########################################################################################\n#\n# Layer \"pg_partman\"\n# compile pg_partman extension\n#\n#########################################################################################\nFROM build-deps AS pg_partman-src\nARG PG_VERSION\n\n# should support v17 https://github.com/pgpartman/pg_partman/discussions/693\n# last release 5.1.0  Apr 2, 2024\nWORKDIR /ext-src\nRUN wget https://github.com/pgpartman/pg_partman/archive/refs/tags/v5.1.0.tar.gz -O pg_partman.tar.gz && \\\n    echo \"3e3a27d7ff827295d5c55ef72f07a49062d6204b3cb0b9a048645d6db9f3cb9f pg_partman.tar.gz\" | sha256sum --check && \\\n    mkdir pg_partman-src && cd pg_partman-src && tar xzf ../pg_partman.tar.gz --strip-components=1 -C .\n\nFROM pg-build AS pg_partman-build\nCOPY --from=pg_partman-src /ext-src/ /ext-src/\nWORKDIR /ext-src/pg_partman-src\nRUN make -j $(getconf _NPROCESSORS_ONLN) && \\\n    make -j $(getconf _NPROCESSORS_ONLN) install && \\\n    echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_partman.control\n\n#########################################################################################\n#\n# Layer \"pg_mooncake\"\n# compile pg_mooncake extension\n#\n#########################################################################################\nFROM build-deps AS pg_mooncake-src\nARG PG_VERSION\nWORKDIR /ext-src\nCOPY compute/patches/duckdb_v113.patch .\nRUN wget https://github.com/Mooncake-Labs/pg_mooncake/releases/download/v0.1.2/pg_mooncake-0.1.2.tar.gz -O pg_mooncake.tar.gz && \\\n    echo \"4550473784fcdd2e1e18062bc01eb9c286abd27cdf5e11a4399be6c0a426ba90 pg_mooncake.tar.gz\" | sha256sum --check && \\\n    mkdir pg_mooncake-src && cd pg_mooncake-src && tar xzf ../pg_mooncake.tar.gz --strip-components=1 -C . && \\\n    cd third_party/duckdb && patch -p1 < /ext-src/duckdb_v113.patch && cd ../.. && \\\n    echo \"make -f pg_mooncake-src/Makefile.build installcheck TEST_DIR=./test SQL_DIR=./sql SRC_DIR=./src\" > neon-test.sh && \\\n    chmod a+x neon-test.sh\n\nFROM rust-extensions-build AS pg_mooncake-build\nCOPY --from=pg_mooncake-src /ext-src/ /ext-src/\nWORKDIR /ext-src/pg_mooncake-src\nRUN make release -j $(getconf _NPROCESSORS_ONLN) && \\\n    make install -j $(getconf _NPROCESSORS_ONLN) && \\\n    echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_mooncake.control\n\n#########################################################################################\n#\n# Layer \"pg-duckdb-pg-build\"\n# compile pg_duckdb extension\n#\n#########################################################################################\nFROM build-deps AS pg_duckdb-src\nWORKDIR /ext-src\nCOPY compute/patches/pg_duckdb_v031.patch .\nCOPY compute/patches/duckdb_v120.patch .\n# pg_duckdb build requires source dir to be a git repo to get submodules\n# allow {privileged_role_name} to execute some functions that in pg_duckdb are available to superuser only:\n# - extension management function duckdb.install_extension()\n# - access to duckdb.extensions table and its sequence\nRUN git clone --depth 1 --branch v0.3.1 https://github.com/duckdb/pg_duckdb.git pg_duckdb-src && \\\n    cd pg_duckdb-src && \\\n    git submodule update --init --recursive && \\\n    patch -p1 < /ext-src/pg_duckdb_v031.patch && \\\n    cd third_party/duckdb && \\\n    patch -p1 < /ext-src/duckdb_v120.patch\n\nFROM pg-build AS pg_duckdb-build\nARG PG_VERSION\nCOPY --from=pg_duckdb-src /ext-src/ /ext-src/\nWORKDIR /ext-src/pg_duckdb-src\nRUN make install -j $(getconf _NPROCESSORS_ONLN) && \\\n    echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_duckdb.control\n\n#########################################################################################\n#\n# Layer \"pg_repack\"\n# compile pg_repack extension\n#\n#########################################################################################\n\nFROM build-deps AS pg_repack-src\nARG PG_VERSION\nWORKDIR /ext-src\nRUN wget https://github.com/reorg/pg_repack/archive/refs/tags/ver_1.5.2.tar.gz -O pg_repack.tar.gz && \\\n    echo '4516cad42251ed3ad53ff619733004db47d5755acac83f75924cd94d1c4fb681 pg_repack.tar.gz' | sha256sum --check && \\\n    mkdir pg_repack-src && cd pg_repack-src && tar xzf ../pg_repack.tar.gz --strip-components=1 -C .\n\nFROM rust-extensions-build AS pg_repack-build\nCOPY --from=pg_repack-src /ext-src/ /ext-src/\nWORKDIR /ext-src/pg_repack-src\nRUN make -j $(getconf _NPROCESSORS_ONLN) && \\\n    make -j $(getconf _NPROCESSORS_ONLN) install\n\n\n#########################################################################################\n#\n# Layer \"pgaudit\"\n# compile pgaudit extension\n#\n#########################################################################################\n\nFROM build-deps AS pgaudit-src\nARG PG_VERSION\nWORKDIR /ext-src\nCOPY \"compute/patches/pgaudit-parallel_workers-${PG_VERSION}.patch\" .\nRUN case \"${PG_VERSION}\" in \\\n    \"v14\") \\\n    export PGAUDIT_VERSION=1.6.3 \\\n    export PGAUDIT_CHECKSUM=37a8f5a7cc8d9188e536d15cf0fdc457fcdab2547caedb54442c37f124110919 \\\n    ;; \\\n    \"v15\") \\\n    export PGAUDIT_VERSION=1.7.1 \\\n    export PGAUDIT_CHECKSUM=e9c8e6e092d82b2f901d72555ce0fe7780552f35f8985573796cd7e64b09d4ec \\\n    ;; \\\n    \"v16\") \\\n    export PGAUDIT_VERSION=16.1 \\\n    export PGAUDIT_CHECKSUM=3bae908ab70ba0c6f51224009dbcfff1a97bd6104c6273297a64292e1b921fee \\\n    ;; \\\n    \"v17\") \\\n    export PGAUDIT_VERSION=17.1 \\\n    export PGAUDIT_CHECKSUM=9c5f37504d393486cc75d2ced83f75f5899be64fa85f689d6babb833b4361e6c \\\n    ;; \\\n    *) \\\n    echo \"pgaudit is not supported on this PostgreSQL version\" && exit 1;; \\\n    esac && \\\n    wget https://github.com/pgaudit/pgaudit/archive/refs/tags/${PGAUDIT_VERSION}.tar.gz -O pgaudit.tar.gz && \\\n    echo \"${PGAUDIT_CHECKSUM} pgaudit.tar.gz\" | sha256sum --check && \\\n    mkdir pgaudit-src && cd pgaudit-src && tar xzf ../pgaudit.tar.gz --strip-components=1 -C . && \\\n    patch -p1 < \"/ext-src/pgaudit-parallel_workers-${PG_VERSION}.patch\"\n\nFROM pg-build AS pgaudit-build\nCOPY --from=pgaudit-src /ext-src/ /ext-src/\nWORKDIR /ext-src/pgaudit-src\nRUN make install USE_PGXS=1 -j $(getconf _NPROCESSORS_ONLN)\n\n#########################################################################################\n#\n# Layer \"pgauditlogtofile\"\n# compile pgauditlogtofile extension\n#\n#########################################################################################\n\nFROM build-deps AS pgauditlogtofile-src\nARG PG_VERSION\nWORKDIR /ext-src\nRUN case \"${PG_VERSION}\" in \\\n    \"v14\" | \"v15\" | \"v16\" | \"v17\") \\\n    export PGAUDITLOGTOFILE_VERSION=v1.6.4 \\\n    export PGAUDITLOGTOFILE_CHECKSUM=ef801eb09c26aaa935c0dabd92c81eb9ebe338930daa9674d420a280c6bc2d70 \\\n    ;; \\\n    *) \\\n    echo \"pgauditlogtofile is not supported on this PostgreSQL version\" && exit 1;; \\\n    esac && \\\n    wget https://github.com/fmbiete/pgauditlogtofile/archive/refs/tags/${PGAUDITLOGTOFILE_VERSION}.tar.gz -O pgauditlogtofile.tar.gz && \\\n    echo \"${PGAUDITLOGTOFILE_CHECKSUM} pgauditlogtofile.tar.gz\" | sha256sum --check && \\\n    mkdir pgauditlogtofile-src && cd pgauditlogtofile-src && tar xzf ../pgauditlogtofile.tar.gz --strip-components=1 -C .\n\nFROM pg-build AS pgauditlogtofile-build\nCOPY --from=pgauditlogtofile-src /ext-src/ /ext-src/\nWORKDIR /ext-src/pgauditlogtofile-src\nRUN make install USE_PGXS=1 -j $(getconf _NPROCESSORS_ONLN)\n\n#########################################################################################\n#\n# Layer \"neon-ext-build\"\n# compile neon extensions\n#\n#########################################################################################\nFROM pg-build-with-cargo AS neon-ext-build\nARG PG_VERSION\n\nUSER root\nCOPY . .\n\nRUN make -j $(getconf _NPROCESSORS_ONLN) -C pgxn -s install-compute \\\n      BUILD_TYPE=release CARGO_BUILD_FLAGS=\"--locked --release\" NEON_CARGO_ARTIFACT_TARGET_DIR=\"$(pwd)/target/release\"\n\n#########################################################################################\n#\n# Layer \"extensions-none\"\n#\n#########################################################################################\nFROM build-deps AS extensions-none\n\nRUN mkdir /usr/local/pgsql\n\n#########################################################################################\n#\n# Layer \"extensions-minimal\"\n#\n# This subset of extensions includes the extensions that we have in\n# shared_preload_libraries by default.\n#\n#########################################################################################\nFROM build-deps AS extensions-minimal\n\nCOPY --from=pgrag-build /usr/local/pgsql/ /usr/local/pgsql/\nCOPY --from=timescaledb-build /usr/local/pgsql/ /usr/local/pgsql/\nCOPY --from=pg_cron-build /usr/local/pgsql/ /usr/local/pgsql/\nCOPY --from=pg_partman-build /usr/local/pgsql/ /usr/local/pgsql/\n\n#########################################################################################\n#\n# Layer \"extensions-all\"\n# Bundle together all the extensions\n#\n#########################################################################################\nFROM build-deps AS extensions-all\n\n# Public extensions\nCOPY --from=postgis-build /usr/local/pgsql/ /usr/local/pgsql/\nCOPY --from=postgis-build /sfcgal/* /\nCOPY --from=pgrouting-build /usr/local/pgsql/ /usr/local/pgsql/\nCOPY --from=plv8-build /usr/local/pgsql/ /usr/local/pgsql/\nCOPY --from=h3-pg-build /usr/local/pgsql/ /usr/local/pgsql/\nCOPY --from=h3-pg-build /h3/usr /\nCOPY --from=postgresql-unit-build /usr/local/pgsql/ /usr/local/pgsql/\nCOPY --from=pgvector-build /usr/local/pgsql/ /usr/local/pgsql/\nCOPY --from=pgjwt-build /usr/local/pgsql/ /usr/local/pgsql/\nCOPY --from=pgrag-build /usr/local/pgsql/ /usr/local/pgsql/\nCOPY --from=pg_jsonschema-build /usr/local/pgsql/ /usr/local/pgsql/\nCOPY --from=pg_graphql-build /usr/local/pgsql/ /usr/local/pgsql/\nCOPY --from=pg_tiktoken-build /usr/local/pgsql/ /usr/local/pgsql/\nCOPY --from=hypopg-build /usr/local/pgsql/ /usr/local/pgsql/\nCOPY --from=online_advisor-build /usr/local/pgsql/ /usr/local/pgsql/\nCOPY --from=pg_hashids-build /usr/local/pgsql/ /usr/local/pgsql/\nCOPY --from=rum-build /usr/local/pgsql/ /usr/local/pgsql/\nCOPY --from=pgtap-build /usr/local/pgsql/ /usr/local/pgsql/\nCOPY --from=ip4r-build /usr/local/pgsql/ /usr/local/pgsql/\nCOPY --from=prefix-build /usr/local/pgsql/ /usr/local/pgsql/\nCOPY --from=hll-build /usr/local/pgsql/ /usr/local/pgsql/\nCOPY --from=plpgsql_check-build /usr/local/pgsql/ /usr/local/pgsql/\nCOPY --from=timescaledb-build /usr/local/pgsql/ /usr/local/pgsql/\nCOPY --from=pg_hint_plan-build /usr/local/pgsql/ /usr/local/pgsql/\nCOPY --from=pg_cron-build /usr/local/pgsql/ /usr/local/pgsql/\nCOPY --from=pgx_ulid-build /usr/local/pgsql/ /usr/local/pgsql/\nCOPY --from=pgx_ulid-pgrx12-build /usr/local/pgsql/ /usr/local/pgsql/\nCOPY --from=pg_session_jwt-build /usr/local/pgsql/ /usr/local/pgsql/\nCOPY --from=rdkit-build /usr/local/pgsql/ /usr/local/pgsql/\nCOPY --from=pg_uuidv7-build /usr/local/pgsql/ /usr/local/pgsql/\nCOPY --from=pg_roaringbitmap-build /usr/local/pgsql/ /usr/local/pgsql/\nCOPY --from=pg_semver-build /usr/local/pgsql/ /usr/local/pgsql/\nCOPY --from=wal2json-build /usr/local/pgsql /usr/local/pgsql\nCOPY --from=pg-anon-pg-build /usr/local/pgsql/ /usr/local/pgsql/\nCOPY --from=pg_ivm-build /usr/local/pgsql/ /usr/local/pgsql/\nCOPY --from=pg_partman-build /usr/local/pgsql/ /usr/local/pgsql/\nCOPY --from=pg_mooncake-build /usr/local/pgsql/ /usr/local/pgsql/\nCOPY --from=pg_duckdb-build /usr/local/pgsql/ /usr/local/pgsql/\nCOPY --from=pg_repack-build /usr/local/pgsql/ /usr/local/pgsql/\nCOPY --from=pgaudit-build /usr/local/pgsql/ /usr/local/pgsql/\nCOPY --from=pgauditlogtofile-build /usr/local/pgsql/ /usr/local/pgsql/\n\n#########################################################################################\n#\n# Layer \"neon-pg-ext-build\"\n# Includes Postgres and all the extensions chosen by EXTENSIONS arg.\n#\n#########################################################################################\nFROM extensions-${EXTENSIONS} AS neon-pg-ext-build\n\n#########################################################################################\n#\n# Compile the Neon-specific `compute_ctl`, `fast_import`, and `local_proxy` binaries\n#\n#########################################################################################\nFROM build-deps-with-cargo AS compute-tools\nARG BUILD_TAG\nENV BUILD_TAG=$BUILD_TAG\n\nUSER nonroot\n# Copy entire project to get Cargo.* files with proper dependencies for the whole project\nCOPY --chown=nonroot . .\nRUN --mount=type=cache,uid=1000,target=/home/nonroot/.cargo/registry \\\n    --mount=type=cache,uid=1000,target=/home/nonroot/.cargo/git \\\n    --mount=type=cache,uid=1000,target=/home/nonroot/target \\\n    cargo build --locked --profile release-line-debug-size-lto --bin compute_ctl --bin fast_import --bin local_proxy && \\\n    mkdir target-bin && \\\n    cp target/release-line-debug-size-lto/compute_ctl \\\n       target/release-line-debug-size-lto/fast_import \\\n       target/release-line-debug-size-lto/local_proxy \\\n       target-bin\n\n#########################################################################################\n#\n# Layer \"pgbouncer\"\n#\n#########################################################################################\n\nFROM $BASE_IMAGE_SHA AS pgbouncer\nRUN set -e \\\n    && echo 'Acquire::Retries \"5\";' > /etc/apt/apt.conf.d/80-retries \\\n    && apt update \\\n    && apt install --no-install-suggests --no-install-recommends -y \\\n        build-essential \\\n        git \\\n        ca-certificates \\\n        autoconf \\\n        automake \\\n        libevent-dev \\\n        libtool \\\n        pkg-config \\\n        libcurl4-openssl-dev \\\n        libssl-dev \\\n    && apt clean && rm -rf /var/lib/apt/lists/*\n\n# Use `dist_man_MANS=` to skip manpage generation (which requires python3/pandoc)\nENV PGBOUNCER_TAG=pgbouncer_1_24_1\nRUN set -e \\\n    && git clone --recurse-submodules --depth 1 --branch ${PGBOUNCER_TAG} https://github.com/pgbouncer/pgbouncer.git pgbouncer \\\n    && cd pgbouncer \\\n    && ./autogen.sh \\\n    && ./configure --prefix=/usr/local/pgbouncer \\\n    && make -j $(nproc) dist_man_MANS= \\\n    && make install dist_man_MANS=\n\n#########################################################################################\n#\n# Layer \"exporters\"\n#\n#########################################################################################\nFROM build-deps AS exporters\nARG TARGETARCH\n# Keep sql_exporter version same as in build-tools/Dockerfile and\n# test_runner/regress/test_compute_metrics.py\n# See comment on the top of the file regading `echo`, `-e` and `\\n`\nRUN if [ \"$TARGETARCH\" = \"amd64\" ]; then\\\n        postgres_exporter_sha256='59aa4a7bb0f7d361f5e05732f5ed8c03cc08f78449cef5856eadec33a627694b';\\\n        pgbouncer_exporter_sha256='c9f7cf8dcff44f0472057e9bf52613d93f3ffbc381ad7547a959daa63c5e84ac';\\\n        sql_exporter_sha256='9a41127a493e8bfebfe692bf78c7ed2872a58a3f961ee534d1b0da9ae584aaab';\\\n    else\\\n        postgres_exporter_sha256='d1dedea97f56c6d965837bfd1fbb3e35a3b4a4556f8cccee8bd513d8ee086124';\\\n        pgbouncer_exporter_sha256='217c4afd7e6492ae904055bc14fe603552cf9bac458c063407e991d68c519da3';\\\n        sql_exporter_sha256='530e6afc77c043497ed965532c4c9dfa873bc2a4f0b3047fad367715c0081d6a';\\\n    fi\\\n    && curl -sL https://github.com/prometheus-community/postgres_exporter/releases/download/v0.17.1/postgres_exporter-0.17.1.linux-${TARGETARCH}.tar.gz\\\n     | tar xzf - --strip-components=1 -C.\\\n    && curl -sL https://github.com/prometheus-community/pgbouncer_exporter/releases/download/v0.10.2/pgbouncer_exporter-0.10.2.linux-${TARGETARCH}.tar.gz\\\n     | tar xzf - --strip-components=1 -C.\\\n    && curl -sL https://github.com/burningalchemist/sql_exporter/releases/download/0.17.3/sql_exporter-0.17.3.linux-${TARGETARCH}.tar.gz\\\n     | tar xzf - --strip-components=1 -C.\\\n    && echo \"${postgres_exporter_sha256} postgres_exporter\" | sha256sum -c -\\\n    && echo \"${pgbouncer_exporter_sha256} pgbouncer_exporter\" | sha256sum -c -\\\n    && echo \"${sql_exporter_sha256} sql_exporter\" | sha256sum -c -\n\n#########################################################################################\n#\n# Clean up postgres folder before inclusion\n#\n#########################################################################################\nFROM neon-ext-build AS postgres-cleanup-layer\n\nCOPY --from=neon-pg-ext-build /usr/local/pgsql /usr/local/pgsql\n\n# Remove binaries from /bin/ that we won't use (or would manually copy & install otherwise)\nRUN cd /usr/local/pgsql/bin && rm -f ecpg raster2pgsql shp2pgsql pgtopo_export pgtopo_import pgsql2shp\n\n# Remove headers that we won't need anymore - we've completed installation of all extensions\nRUN rm -r /usr/local/pgsql/include\n\n# Remove static postgresql libraries - all compilation is finished, so we\n# can now remove these files - they must be included in other binaries by now\n# if they were to be used by other libraries.\nRUN rm /usr/local/pgsql/lib/lib*.a\n\n#########################################################################################\n#\n# Preprocess the sql_exporter configuration files\n#\n#########################################################################################\nFROM build-deps AS sql_exporter_preprocessor\nARG PG_VERSION\n\nUSER nonroot\nWORKDIR /home/nonroot\n\nCOPY --chown=nonroot compute compute\n\nRUN make PG_VERSION=\"${PG_VERSION:?}\" -C compute\n\n#########################################################################################\n#\n# Layer extension-tests\n#\n#########################################################################################\n\nFROM pg-build AS extension-tests\nARG PG_VERSION\n# This is required for the PostGIS test\nRUN apt-get update && case $DEBIAN_VERSION in \\\n      bullseye) \\\n        apt-get install -y libproj19 libgdal28 time; \\\n      ;; \\\n      bookworm) \\\n        apt-get install -y libgdal32 libproj25 time; \\\n      ;; \\\n      *) \\\n        echo \"Unknown Debian version ${DEBIAN_VERSION}\" && exit 1 \\\n      ;; \\\n    esac\n\nCOPY docker-compose/ext-src/ /ext-src/\n\nCOPY --from=pg-build /postgres /postgres\nCOPY --from=postgis-build /usr/local/pgsql/ /usr/local/pgsql/\nCOPY --from=postgis-build /ext-src/postgis-src /ext-src/postgis-src\nCOPY --from=postgis-build /sfcgal/* /usr\nCOPY --from=plv8-src /ext-src/ /ext-src/\nCOPY --from=h3-pg-src /ext-src/h3-pg-src /ext-src/h3-pg-src\nCOPY --from=postgresql-unit-src /ext-src/ /ext-src/\nCOPY --from=pgvector-src /ext-src/ /ext-src/\nCOPY --from=pgjwt-src /ext-src/ /ext-src/\n#COPY --from=pgrag-src /ext-src/ /ext-src/\n#COPY --from=pg_jsonschema-src /ext-src/ /ext-src/\nCOPY --from=pg_graphql-src /ext-src/ /ext-src/\n#COPY --from=pg_tiktoken-src /ext-src/ /ext-src/\nCOPY --from=hypopg-src /ext-src/ /ext-src/\nCOPY --from=online_advisor-src /ext-src/ /ext-src/\nCOPY --from=pg_hashids-src /ext-src/ /ext-src/\nCOPY --from=rum-src /ext-src/ /ext-src/\nCOPY --from=pgtap-src /ext-src/ /ext-src/\nCOPY --from=ip4r-src /ext-src/ /ext-src/\nCOPY --from=prefix-src /ext-src/ /ext-src/\nCOPY --from=hll-src /ext-src/ /ext-src/\nCOPY --from=plpgsql_check-src /ext-src/ /ext-src/\n#COPY --from=timescaledb-src /ext-src/ /ext-src/\nCOPY --from=pg_hint_plan-src /ext-src/ /ext-src/\nCOPY compute/patches/pg_hint_plan_${PG_VERSION:?}.patch /ext-src\nRUN cd /ext-src/pg_hint_plan-src && patch -p1 < /ext-src/pg_hint_plan_${PG_VERSION:?}.patch\nCOPY --from=pg_cron-src /ext-src/ /ext-src/\n#COPY --from=pgx_ulid-src /ext-src/ /ext-src/\n#COPY --from=pgx_ulid-pgrx12-src /ext-src/ /ext-src/\n#COPY --from=pg_session_jwt-src /ext-src/ /ext-src/\n#COPY --from=rdkit-src /ext-src/ /ext-src/\nCOPY --from=pg_uuidv7-src /ext-src/ /ext-src/\nCOPY --from=pg_roaringbitmap-src /ext-src/ /ext-src/\nCOPY --from=pg_semver-src /ext-src/ /ext-src/\n#COPY --from=wal2json-src /ext-src/ /ext-src/\nCOPY --from=pg_ivm-src /ext-src/ /ext-src/\nCOPY --from=pg_partman-src /ext-src/ /ext-src/\n#COPY --from=pg_mooncake-src /ext-src/ /ext-src/\nCOPY --from=pg_repack-src /ext-src/ /ext-src/\nCOPY --from=pg_repack-build /usr/local/pgsql/ /usr/local/pgsql/\nCOPY compute/patches/pg_repack.patch /ext-src\nRUN cd /ext-src/pg_repack-src && patch -p1 </ext-src/pg_repack.patch && rm -f /ext-src/pg_repack.patch\n\nCOPY --chmod=755 docker-compose/run-tests.sh /run-tests.sh\nRUN echo /usr/local/pgsql/lib > /etc/ld.so.conf.d/00-neon.conf && /sbin/ldconfig\nRUN apt-get update && apt-get install -y libtap-parser-sourcehandler-pgtap-perl jq parallel \\\n   && apt clean && rm -rf /ext-src/*.tar.gz /ext-src/*.patch /var/lib/apt/lists/*\nENV PATH=/usr/local/pgsql/bin:$PATH\nENV PGHOST=compute1\nENV PGPORT=55433\nENV PGUSER=cloud_admin\nENV PGDATABASE=postgres\nENV PG_VERSION=${PG_VERSION:?}\n\n#########################################################################################\n#\n# Final layer\n# Put it all together into the final image\n#\n#########################################################################################\nFROM $BASE_IMAGE_SHA\nARG DEBIAN_VERSION\n\n# Use strict mode for bash to catch errors early\nSHELL [\"/bin/bash\", \"-euo\", \"pipefail\", \"-c\"]\n\n# Install:\n# libreadline8 for psql\n# liblz4-1 for lz4\n# libossp-uuid16 for extension ossp-uuid\n# libgeos, libsfcgal1, and libprotobuf-c1 for PostGIS\n# libxml2, libxslt1.1 for xml2\n# libzstd1 for zstd\n# libboost* for rdkit\n# ca-certificates for communicating with s3 by compute_ctl\n# libevent for pgbouncer\nRUN echo 'Acquire::Retries \"5\";' > /etc/apt/apt.conf.d/80-retries && \\\n    echo -e \"retry_connrefused = on\\ntimeout=15\\ntries=5\\n\" > /root/.wgetrc\nRUN apt update && \\\n    case $DEBIAN_VERSION in \\\n      # Version-specific installs for Bullseye (PG14-PG16):\n      # libicu67, locales for collations (including ICU and plpgsql_check)\n      # libgdal28, libproj19 for PostGIS\n      bullseye) \\\n        VERSION_INSTALLS=\"libicu67 libgdal28 libproj19\"; \\\n      ;; \\\n      # Version-specific installs for Bookworm (PG17):\n      # libicu72, locales for collations (including ICU and plpgsql_check)\n      # libgdal32, libproj25 for PostGIS\n      bookworm) \\\n        VERSION_INSTALLS=\"libicu72 libgdal32 libproj25\"; \\\n      ;; \\\n      *) \\\n        echo \"Unknown Debian version ${DEBIAN_VERSION}\" && exit 1 \\\n      ;; \\\n    esac && \\\n    apt install --no-install-recommends -y \\\n        ca-certificates \\\n        gdb \\\n        iproute2 \\\n        libboost-iostreams1.74.0 \\\n        libboost-regex1.74.0 \\\n        libboost-serialization1.74.0 \\\n        libboost-system1.74.0 \\\n        libcurl4 \\\n        libevent-2.1-7 \\\n        libgeos-c1v5 \\\n        liblz4-1 \\\n        libossp-uuid16 \\\n        libprotobuf-c1 \\\n        libreadline8 \\\n        libsfcgal1 \\\n        libxml2 \\\n        libxslt1.1 \\\n        libzstd1 \\\n        locales \\\n        lsof \\\n        procps \\\n        rsyslog-gnutls \\\n        screen \\\n        tcpdump \\\n        $VERSION_INSTALLS && \\\n    apt clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \\\n    localedef -i en_US -c -f UTF-8 -A /usr/share/locale/locale.alias en_US.UTF-8\n\n# Add user postgres\nRUN mkdir /var/db && useradd -m -d /var/db/postgres postgres && \\\n    echo \"postgres:test_console_pass\" | chpasswd && \\\n    mkdir /var/db/postgres/compute && mkdir /var/db/postgres/specs && \\\n    mkdir /var/db/postgres/pgbouncer && \\\n    chown -R postgres:postgres /var/db/postgres && \\\n    chmod 0750 /var/db/postgres/compute && \\\n    chmod 0750 /var/db/postgres/pgbouncer && \\\n    # create folder for file cache\n    mkdir -p -m 777 /neon/cache && \\\n    # Create remote extension download directory\n    mkdir /usr/local/download_extensions && \\\n    chown -R postgres:postgres /usr/local/download_extensions\n\n# pgbouncer and its config\nCOPY --from=pgbouncer         /usr/local/pgbouncer/bin/pgbouncer /usr/local/bin/pgbouncer\nCOPY --chmod=0666 --chown=postgres compute/etc/pgbouncer.ini /etc/pgbouncer.ini\n\nCOPY --from=postgres-cleanup-layer --chown=postgres /usr/local/pgsql /usr/local\nCOPY --from=compute-tools --chown=postgres /home/nonroot/target-bin/compute_ctl /usr/local/bin/compute_ctl\nCOPY --from=compute-tools --chown=postgres /home/nonroot/target-bin/fast_import /usr/local/bin/fast_import\n\n# local_proxy and its config\nCOPY --from=compute-tools --chown=postgres /home/nonroot/target-bin/local_proxy /usr/local/bin/local_proxy\nRUN mkdir -p /etc/local_proxy && chown postgres:postgres /etc/local_proxy\n\n# Metrics exporter binaries and configuration files\nCOPY --from=exporters ./postgres_exporter /bin/postgres_exporter\nCOPY --from=exporters ./pgbouncer_exporter /bin/pgbouncer_exporter\nCOPY --from=exporters ./sql_exporter /bin/sql_exporter\n\nCOPY --chown=postgres compute/etc/postgres_exporter.yml /etc/postgres_exporter.yml\n\nCOPY --from=sql_exporter_preprocessor --chmod=0644 /home/nonroot/compute/etc/sql_exporter.yml               /etc/sql_exporter.yml\nCOPY --from=sql_exporter_preprocessor --chmod=0644 /home/nonroot/compute/etc/neon_collector.yml             /etc/neon_collector.yml\nCOPY --from=sql_exporter_preprocessor --chmod=0644 /home/nonroot/compute/etc/sql_exporter_autoscaling.yml   /etc/sql_exporter_autoscaling.yml\nCOPY --from=sql_exporter_preprocessor --chmod=0644 /home/nonroot/compute/etc/neon_collector_autoscaling.yml /etc/neon_collector_autoscaling.yml\n\n# Make the libraries we built available\nCOPY --chmod=0666 compute/etc/ld.so.conf.d/00-neon.conf /etc/ld.so.conf.d/00-neon.conf\nRUN /sbin/ldconfig\n\n# rsyslog config permissions\n# directory for rsyslogd pid file\nRUN mkdir /var/run/rsyslogd && \\\n    chown -R postgres:postgres /var/run/rsyslogd && \\\n    chown -R postgres:postgres /etc/rsyslog.d/\n\n\nENV LANG=en_US.utf8\nUSER postgres\nENTRYPOINT [\"/usr/local/bin/compute_ctl\"]\n"
  },
  {
    "path": "compute/etc/README.md",
    "content": "# Compute Configuration\n\nThese files are the configuration files for various other pieces of software\nthat will be running in the compute alongside Postgres.\n\n## `sql_exporter`\n\n### Adding a `sql_exporter` Metric\n\nWe use `sql_exporter` to export various metrics from Postgres. In order to add\na metric, you will need to create two files: a `libsonnet` and a `sql` file. You\nwill then import the `libsonnet` file in one of the collector files, and the\n`sql` file will be imported in the `libsonnet` file.\n\nIn the event your statistic is an LSN, you may want to cast it to a `float8`\nbecause Prometheus only supports floats. It's probably fine because `float8` can\nstore integers from `-2^53` to `+2^53` exactly.\n"
  },
  {
    "path": "compute/etc/ld.so.conf.d/00-neon.conf",
    "content": "/usr/local/lib\n"
  },
  {
    "path": "compute/etc/neon_collector.jsonnet",
    "content": "{\n  collector_name: 'neon_collector',\n  metrics: [\n    import 'sql_exporter/checkpoints_req.libsonnet',\n    import 'sql_exporter/checkpoints_timed.libsonnet',\n    import 'sql_exporter/compute_backpressure_throttling_seconds_total.libsonnet',\n    import 'sql_exporter/compute_current_lsn.libsonnet',\n    import 'sql_exporter/compute_logical_snapshot_files.libsonnet',\n    import 'sql_exporter/compute_logical_snapshots_bytes.libsonnet',\n    import 'sql_exporter/compute_max_connections.libsonnet',\n    import 'sql_exporter/compute_pg_oldest_frozen_xid_age.libsonnet',\n    import 'sql_exporter/compute_pg_oldest_mxid_age.libsonnet',\n    import 'sql_exporter/compute_receive_lsn.libsonnet',\n    import 'sql_exporter/compute_subscriptions_count.libsonnet',\n    import 'sql_exporter/connection_counts.libsonnet',\n    import 'sql_exporter/db_total_size.libsonnet',\n    import 'sql_exporter/file_cache_read_wait_seconds_bucket.libsonnet',\n    import 'sql_exporter/file_cache_read_wait_seconds_count.libsonnet',\n    import 'sql_exporter/file_cache_read_wait_seconds_sum.libsonnet',\n    import 'sql_exporter/file_cache_write_wait_seconds_bucket.libsonnet',\n    import 'sql_exporter/file_cache_write_wait_seconds_count.libsonnet',\n    import 'sql_exporter/file_cache_write_wait_seconds_sum.libsonnet',\n    import 'sql_exporter/getpage_prefetch_discards_total.libsonnet',\n    import 'sql_exporter/getpage_prefetch_misses_total.libsonnet',\n    import 'sql_exporter/getpage_prefetch_requests_total.libsonnet',\n    import 'sql_exporter/getpage_prefetches_buffered.libsonnet',\n    import 'sql_exporter/getpage_sync_requests_total.libsonnet',\n    import 'sql_exporter/compute_getpage_stuck_requests_total.libsonnet',\n    import 'sql_exporter/compute_getpage_max_inflight_stuck_time_ms.libsonnet',\n    import 'sql_exporter/getpage_wait_seconds_bucket.libsonnet',\n    import 'sql_exporter/getpage_wait_seconds_count.libsonnet',\n    import 'sql_exporter/getpage_wait_seconds_sum.libsonnet',\n    import 'sql_exporter/lfc_approximate_working_set_size.libsonnet',\n    import 'sql_exporter/lfc_approximate_working_set_size_windows.libsonnet',\n    import 'sql_exporter/lfc_cache_size_limit.libsonnet',\n    import 'sql_exporter/lfc_chunk_size.libsonnet',\n    import 'sql_exporter/lfc_hits.libsonnet',\n    import 'sql_exporter/lfc_misses.libsonnet',\n    import 'sql_exporter/lfc_used.libsonnet',\n    import 'sql_exporter/lfc_used_pages.libsonnet',\n    import 'sql_exporter/lfc_writes.libsonnet',\n    import 'sql_exporter/logical_slot_restart_lsn.libsonnet',\n    import 'sql_exporter/max_cluster_size.libsonnet',\n    import 'sql_exporter/pageserver_disconnects_total.libsonnet',\n    import 'sql_exporter/pageserver_requests_sent_total.libsonnet',\n    import 'sql_exporter/pageserver_send_flushes_total.libsonnet',\n    import 'sql_exporter/pageserver_open_requests.libsonnet',\n    import 'sql_exporter/pg_stats_userdb.libsonnet',\n    import 'sql_exporter/replication_delay_bytes.libsonnet',\n    import 'sql_exporter/replication_delay_seconds.libsonnet',\n    import 'sql_exporter/retained_wal.libsonnet',\n    import 'sql_exporter/wal_is_lost.libsonnet',\n  ],\n  queries: [\n    {\n      query_name: 'neon_perf_counters',\n      query: importstr 'sql_exporter/neon_perf_counters.sql',\n    },\n  ],\n}\n"
  },
  {
    "path": "compute/etc/neon_collector_autoscaling.jsonnet",
    "content": "{\n  collector_name: 'neon_collector_autoscaling',\n  metrics: [\n    import 'sql_exporter/lfc_approximate_working_set_size_windows.autoscaling.libsonnet',\n    import 'sql_exporter/lfc_cache_size_limit.libsonnet',\n    import 'sql_exporter/lfc_hits.libsonnet',\n    import 'sql_exporter/lfc_misses.libsonnet',\n    import 'sql_exporter/lfc_used.libsonnet',\n    import 'sql_exporter/lfc_writes.libsonnet',\n  ],\n}\n"
  },
  {
    "path": "compute/etc/pgbouncer.ini",
    "content": "[databases]\n;; pgbouncer propagates application_name (if it's specified) to the server, but some\n;; clients don't set it. We set default application_name=pgbouncer to make it\n;; easier to identify pgbouncer connections in Postgres. If client sets\n;; application_name, it will be used instead.\n*=host=localhost port=5432 auth_user=cloud_admin application_name=pgbouncer\n[pgbouncer]\nlisten_port=6432\nlisten_addr=0.0.0.0\nauth_type=scram-sha-256\nauth_user=cloud_admin\nauth_dbname=postgres\nclient_tls_sslmode=disable\nserver_tls_sslmode=disable\npool_mode=transaction\nmax_client_conn=10000\ndefault_pool_size=64\nmax_prepared_statements=0\nadmin_users=postgres\nunix_socket_dir=/tmp/\nunix_socket_mode=0777\n; required for pgbouncer_exporter\nignore_startup_parameters=extra_float_digits\n; pidfile for graceful termination\npidfile=/tmp/pgbouncer.pid\n\n;; Disable connection logging. It produces a lot of logs that no one looks at,\n;; and we can get similar log entries from the proxy too. We had incidents in\n;; the past where the logging significantly stressed the log device or pgbouncer\n;; itself.\nlog_connections=0\nlog_disconnections=0\n"
  },
  {
    "path": "compute/etc/postgres_exporter.yml",
    "content": ""
  },
  {
    "path": "compute/etc/sql_exporter/checkpoints_req.17.sql",
    "content": "SELECT num_requested AS checkpoints_req FROM pg_catalog.pg_stat_checkpointer;\n"
  },
  {
    "path": "compute/etc/sql_exporter/checkpoints_req.libsonnet",
    "content": "local neon = import 'neon.libsonnet';\n\nlocal pg_stat_bgwriter = importstr 'sql_exporter/checkpoints_req.sql';\nlocal pg_stat_checkpointer = importstr 'sql_exporter/checkpoints_req.17.sql';\n\n{\n  metric_name: 'checkpoints_req',\n  type: 'gauge',\n  help: 'Number of requested checkpoints',\n  key_labels: null,\n  values: [\n    'checkpoints_req',\n  ],\n  query: if neon.PG_MAJORVERSION_NUM < 17 then pg_stat_bgwriter else pg_stat_checkpointer,\n}\n"
  },
  {
    "path": "compute/etc/sql_exporter/checkpoints_req.sql",
    "content": "SELECT checkpoints_req FROM pg_catalog.pg_stat_bgwriter;\n"
  },
  {
    "path": "compute/etc/sql_exporter/checkpoints_timed.17.sql",
    "content": "SELECT num_timed AS checkpoints_timed FROM pg_stat_checkpointer;\n"
  },
  {
    "path": "compute/etc/sql_exporter/checkpoints_timed.libsonnet",
    "content": "local neon = import 'neon.libsonnet';\n\nlocal pg_stat_bgwriter = importstr 'sql_exporter/checkpoints_timed.sql';\nlocal pg_stat_checkpointer = importstr 'sql_exporter/checkpoints_timed.17.sql';\n\n{\n  metric_name: 'checkpoints_timed',\n  type: 'gauge',\n  help: 'Number of scheduled checkpoints',\n  key_labels: null,\n  values: [\n    'checkpoints_timed',\n  ],\n  query: if neon.PG_MAJORVERSION_NUM < 17 then pg_stat_bgwriter else pg_stat_checkpointer,\n}\n"
  },
  {
    "path": "compute/etc/sql_exporter/checkpoints_timed.sql",
    "content": "SELECT checkpoints_timed FROM pg_catalog.pg_stat_bgwriter;\n"
  },
  {
    "path": "compute/etc/sql_exporter/compute_backpressure_throttling_seconds_total.libsonnet",
    "content": "{\n  metric_name: 'compute_backpressure_throttling_seconds_total',\n  type: 'counter',\n  help: 'Time compute has spent throttled',\n  key_labels: null,\n  values: [\n    'throttled',\n  ],\n  query: importstr 'sql_exporter/compute_backpressure_throttling_seconds_total.sql',\n}\n"
  },
  {
    "path": "compute/etc/sql_exporter/compute_backpressure_throttling_seconds_total.sql",
    "content": "SELECT (neon.backpressure_throttling_time()::pg_catalog.float8 / 1000000) AS throttled;\n"
  },
  {
    "path": "compute/etc/sql_exporter/compute_current_lsn.libsonnet",
    "content": "{\n  metric_name: 'compute_current_lsn',\n  type: 'gauge',\n  help: 'Current LSN of the database',\n  key_labels: null,\n  values: [\n    'lsn',\n  ],\n  query: importstr 'sql_exporter/compute_current_lsn.sql',\n}\n"
  },
  {
    "path": "compute/etc/sql_exporter/compute_current_lsn.sql",
    "content": "SELECT CASE\n  WHEN pg_catalog.pg_is_in_recovery() THEN (pg_catalog.pg_last_wal_replay_lsn() - '0/0')::pg_catalog.FLOAT8\n  ELSE (pg_catalog.pg_current_wal_lsn() - '0/0')::pg_catalog.FLOAT8\nEND AS lsn;\n"
  },
  {
    "path": "compute/etc/sql_exporter/compute_getpage_max_inflight_stuck_time_ms.libsonnet",
    "content": "{\n  metric_name: 'compute_getpage_max_inflight_stuck_time_ms',\n  type: 'gauge',\n  help: 'Max wait time for stuck requests among all backends. Includes only active stuck requests, terminated or disconnected ones are not accounted for',\n  values: [\n    'compute_getpage_max_inflight_stuck_time_ms',\n  ],\n  query_ref: 'neon_perf_counters',\n}\n"
  },
  {
    "path": "compute/etc/sql_exporter/compute_getpage_stuck_requests_total.libsonnet",
    "content": "{\n  metric_name: 'compute_getpage_stuck_requests_total',\n  type: 'counter',\n  help: 'Total number of Getpage requests left without an answer for more than pageserver_response_log_timeout but less than pageserver_response_disconnect_timeout',\n  values: [\n    'compute_getpage_stuck_requests_total',\n  ],\n  query_ref: 'neon_perf_counters',\n}\n"
  },
  {
    "path": "compute/etc/sql_exporter/compute_logical_snapshot_files.libsonnet",
    "content": "{\n  metric_name: 'compute_logical_snapshot_files',\n  type: 'gauge',\n  help: 'Number of snapshot files in pg_logical/snapshot',\n  key_labels: [\n    'timeline_id',\n  ],\n  values: [\n    'num_logical_snapshot_files',\n  ],\n  query: importstr 'sql_exporter/compute_logical_snapshot_files.sql',\n}\n"
  },
  {
    "path": "compute/etc/sql_exporter/compute_logical_snapshot_files.sql",
    "content": "SELECT\n  (SELECT setting FROM pg_catalog.pg_settings WHERE name = 'neon.timeline_id') AS timeline_id,\n  -- Postgres creates temporary snapshot files of the form %X-%X.snap.%d.tmp.\n  -- These temporary snapshot files are renamed to the actual snapshot files\n  -- after they are completely built. We only WAL-log the completely built\n  -- snapshot files\n  (SELECT COUNT(*) FROM pg_catalog.pg_ls_dir('pg_logical/snapshots') AS name WHERE name LIKE '%.snap') AS num_logical_snapshot_files;\n"
  },
  {
    "path": "compute/etc/sql_exporter/compute_logical_snapshots_bytes.15.sql",
    "content": "SELECT\n  (SELECT pg_catalog.current_setting('neon.timeline_id')) AS timeline_id,\n  -- Postgres creates temporary snapshot files of the form %X-%X.snap.%d.tmp.\n  -- These temporary snapshot files are renamed to the actual snapshot files\n  -- after they are completely built. We only WAL-log the completely built\n  -- snapshot files\n  (SELECT COALESCE(pg_catalog.sum(size), 0) FROM pg_catalog.pg_ls_logicalsnapdir() WHERE name LIKE '%.snap') AS logical_snapshots_bytes;\n"
  },
  {
    "path": "compute/etc/sql_exporter/compute_logical_snapshots_bytes.libsonnet",
    "content": "local neon = import 'neon.libsonnet';\n\nlocal pg_ls_logicalsnapdir = importstr 'sql_exporter/compute_logical_snapshots_bytes.15.sql';\nlocal pg_ls_dir = importstr 'sql_exporter/compute_logical_snapshots_bytes.sql';\n\n{\n  metric_name: 'compute_logical_snapshots_bytes',\n  type: 'gauge',\n  help: 'Size of the pg_logical/snapshots directory, not including temporary files',\n  key_labels: [\n    'timeline_id',\n  ],\n  values: [\n    'logical_snapshots_bytes',\n  ],\n  query: if neon.PG_MAJORVERSION_NUM < 15 then pg_ls_dir else pg_ls_logicalsnapdir,\n}\n"
  },
  {
    "path": "compute/etc/sql_exporter/compute_logical_snapshots_bytes.sql",
    "content": "SELECT\n  (SELECT setting FROM pg_catalog.pg_settings WHERE name = 'neon.timeline_id') AS timeline_id,\n  -- Postgres creates temporary snapshot files of the form %X-%X.snap.%d.tmp.\n  -- These temporary snapshot files are renamed to the actual snapshot files\n  -- after they are completely built. We only WAL-log the completely built\n  -- snapshot files\n  (SELECT COALESCE(pg_catalog.sum((pg_catalog.pg_stat_file('pg_logical/snapshots/' || name, missing_ok => true)).size), 0)\n   FROM (SELECT * FROM pg_catalog.pg_ls_dir('pg_logical/snapshots') WHERE pg_ls_dir LIKE '%.snap') AS name\n  ) AS logical_snapshots_bytes;\n"
  },
  {
    "path": "compute/etc/sql_exporter/compute_max_connections.libsonnet",
    "content": "{\n  metric_name: 'compute_max_connections',\n  type: 'gauge',\n  help: 'Max connections allowed for Postgres',\n  key_labels: null,\n  values: [\n    'max_connections',\n  ],\n  query: importstr 'sql_exporter/compute_max_connections.sql',\n}\n"
  },
  {
    "path": "compute/etc/sql_exporter/compute_max_connections.sql",
    "content": "SELECT pg_catalog.current_setting('max_connections') AS max_connections;\n"
  },
  {
    "path": "compute/etc/sql_exporter/compute_pg_oldest_frozen_xid_age.libsonnet",
    "content": "{\n  metric_name: 'compute_pg_oldest_frozen_xid_age',\n  type: 'gauge',\n  help: 'Age of oldest XIDs that have not been frozen by VACUUM. An indicator of how long it has been since VACUUM last ran.',\n  key_labels: [\n    'database_name',\n  ],\n  value_label: 'metric',\n  values: [\n    'frozen_xid_age',\n  ],\n  query: importstr 'sql_exporter/compute_pg_oldest_frozen_xid_age.sql',\n}\n"
  },
  {
    "path": "compute/etc/sql_exporter/compute_pg_oldest_frozen_xid_age.sql",
    "content": "SELECT datname database_name,\n   pg_catalog.age(datfrozenxid) frozen_xid_age\nFROM pg_catalog.pg_database\nORDER BY frozen_xid_age DESC LIMIT 10;\n"
  },
  {
    "path": "compute/etc/sql_exporter/compute_pg_oldest_mxid_age.libsonnet",
    "content": "{\n  metric_name: 'compute_pg_oldest_mxid_age',\n  type: 'gauge',\n  help: 'Age of oldest MXIDs that have not been replaced by VACUUM. An indicator of how long it has been since VACUUM last ran.',\n  key_labels: [\n    'database_name',\n  ],\n  value_label: 'metric',\n  values: [\n    'min_mxid_age',\n  ],\n  query: importstr 'sql_exporter/compute_pg_oldest_mxid_age.sql',\n}\n"
  },
  {
    "path": "compute/etc/sql_exporter/compute_pg_oldest_mxid_age.sql",
    "content": "SELECT datname database_name,\n  pg_catalog.mxid_age(datminmxid) min_mxid_age\nFROM pg_catalog.pg_database\nORDER BY min_mxid_age DESC LIMIT 10;\n"
  },
  {
    "path": "compute/etc/sql_exporter/compute_receive_lsn.libsonnet",
    "content": "{\n  metric_name: 'compute_receive_lsn',\n  type: 'gauge',\n  help: 'Returns the last write-ahead log location that has been received and synced to disk by streaming replication',\n  key_labels: null,\n  values: [\n    'lsn',\n  ],\n  query: importstr 'sql_exporter/compute_receive_lsn.sql',\n}\n"
  },
  {
    "path": "compute/etc/sql_exporter/compute_receive_lsn.sql",
    "content": "SELECT CASE\n  WHEN pg_catalog.pg_is_in_recovery() THEN (pg_catalog.pg_last_wal_receive_lsn() - '0/0')::pg_catalog.FLOAT8\n  ELSE 0\nEND AS lsn;\n"
  },
  {
    "path": "compute/etc/sql_exporter/compute_subscriptions_count.libsonnet",
    "content": "{\n  metric_name: 'compute_subscriptions_count',\n  type: 'gauge',\n  help: 'Number of logical replication subscriptions grouped by enabled/disabled',\n  key_labels: [\n    'enabled',\n  ],\n  values: [\n    'subscriptions_count',\n  ],\n  query: importstr 'sql_exporter/compute_subscriptions_count.sql',\n}\n"
  },
  {
    "path": "compute/etc/sql_exporter/compute_subscriptions_count.sql",
    "content": "SELECT subenabled::pg_catalog.text AS enabled, pg_catalog.count(*) AS subscriptions_count FROM pg_catalog.pg_subscription GROUP BY subenabled;\n"
  },
  {
    "path": "compute/etc/sql_exporter/connection_counts.libsonnet",
    "content": "{\n  metric_name: 'connection_counts',\n  type: 'gauge',\n  help: 'Connection counts',\n  key_labels: [\n    'datname',\n    'state',\n  ],\n  values: [\n    'count',\n  ],\n  query: importstr 'sql_exporter/connection_counts.sql',\n}\n"
  },
  {
    "path": "compute/etc/sql_exporter/connection_counts.sql",
    "content": "SELECT datname, state, pg_catalog.count(*) AS count FROM pg_catalog.pg_stat_activity WHERE state <> '' GROUP BY datname, state;\n"
  },
  {
    "path": "compute/etc/sql_exporter/db_total_size.libsonnet",
    "content": "{\n  metric_name: 'db_total_size',\n  type: 'gauge',\n  help: 'Size of all databases',\n  key_labels: null,\n  values: [\n    'total',\n  ],\n  query: importstr 'sql_exporter/db_total_size.sql',\n}\n"
  },
  {
    "path": "compute/etc/sql_exporter/db_total_size.sql",
    "content": "SELECT pg_catalog.sum(pg_catalog.pg_database_size(datname)) AS total\nFROM pg_catalog.pg_database\n-- Ignore invalid databases, as we will likely have problems with\n-- getting their size from the Pageserver.\nWHERE datconnlimit != -2;\n"
  },
  {
    "path": "compute/etc/sql_exporter/file_cache_read_wait_seconds_bucket.libsonnet",
    "content": "{\n  metric_name: 'file_cache_read_wait_seconds_bucket',\n  type: 'counter',\n  help: 'Histogram buckets of LFC read operation latencies',\n  key_labels: [\n    'bucket_le',\n  ],\n  values: [\n    'value',\n  ],\n  query: importstr 'sql_exporter/file_cache_read_wait_seconds_bucket.sql',\n}\n"
  },
  {
    "path": "compute/etc/sql_exporter/file_cache_read_wait_seconds_bucket.sql",
    "content": "SELECT bucket_le, value FROM neon.neon_perf_counters WHERE metric = 'file_cache_read_wait_seconds_bucket';\n"
  },
  {
    "path": "compute/etc/sql_exporter/file_cache_read_wait_seconds_count.libsonnet",
    "content": "{\n  metric_name: 'file_cache_read_wait_seconds_count',\n  type: 'counter',\n  help: 'Number of read operations in LFC',\n  values: [\n    'file_cache_read_wait_seconds_count',\n  ],\n  query_ref: 'neon_perf_counters',\n}\n"
  },
  {
    "path": "compute/etc/sql_exporter/file_cache_read_wait_seconds_sum.libsonnet",
    "content": "{\n  metric_name: 'file_cache_read_wait_seconds_sum',\n  type: 'counter',\n  help: 'Time spent in LFC read operations',\n  values: [\n    'file_cache_read_wait_seconds_sum',\n  ],\n  query_ref: 'neon_perf_counters',\n}\n"
  },
  {
    "path": "compute/etc/sql_exporter/file_cache_write_wait_seconds_bucket.libsonnet",
    "content": "{\n  metric_name: 'file_cache_write_wait_seconds_bucket',\n  type: 'counter',\n  help: 'Histogram buckets of LFC write operation latencies',\n  key_labels: [\n    'bucket_le',\n  ],\n  values: [\n    'value',\n  ],\n  query: importstr 'sql_exporter/file_cache_write_wait_seconds_bucket.sql',\n}\n"
  },
  {
    "path": "compute/etc/sql_exporter/file_cache_write_wait_seconds_bucket.sql",
    "content": "SELECT bucket_le, value FROM neon.neon_perf_counters WHERE metric = 'file_cache_write_wait_seconds_bucket';\n"
  },
  {
    "path": "compute/etc/sql_exporter/file_cache_write_wait_seconds_count.libsonnet",
    "content": "{\n  metric_name: 'file_cache_write_wait_seconds_count',\n  type: 'counter',\n  help: 'Number of write operations in LFC',\n  values: [\n    'file_cache_write_wait_seconds_count',\n  ],\n  query_ref: 'neon_perf_counters',\n}\n"
  },
  {
    "path": "compute/etc/sql_exporter/file_cache_write_wait_seconds_sum.libsonnet",
    "content": "{\n  metric_name: 'file_cache_write_wait_seconds_sum',\n  type: 'counter',\n  help: 'Time spent in LFC write operations',\n  values: [\n    'file_cache_write_wait_seconds_sum',\n  ],\n  query_ref: 'neon_perf_counters',\n}\n"
  },
  {
    "path": "compute/etc/sql_exporter/getpage_prefetch_discards_total.libsonnet",
    "content": "{\n  metric_name: 'getpage_prefetch_discards_total',\n  type: 'counter',\n  help: 'Number of prefetch responses issued but not used',\n  values: [\n    'getpage_prefetch_discards_total',\n  ],\n  query_ref: 'neon_perf_counters',\n}\n"
  },
  {
    "path": "compute/etc/sql_exporter/getpage_prefetch_misses_total.libsonnet",
    "content": "{\n  metric_name: 'getpage_prefetch_misses_total',\n  type: 'counter',\n  help: \"Total number of readahead misses; consisting of either prefetches that don't satisfy the LSN bounds once the prefetch got read by the backend, or cases where somehow no readahead was issued for the read\",\n  values: [\n    'getpage_prefetch_misses_total',\n  ],\n  query_ref: 'neon_perf_counters',\n}\n"
  },
  {
    "path": "compute/etc/sql_exporter/getpage_prefetch_requests_total.libsonnet",
    "content": "{\n  metric_name: 'getpage_prefetch_requests_total',\n  type: 'counter',\n  help: 'Number of getpage issued for prefetching',\n  values: [\n    'getpage_prefetch_requests_total',\n  ],\n  query_ref: 'neon_perf_counters',\n}\n"
  },
  {
    "path": "compute/etc/sql_exporter/getpage_prefetches_buffered.libsonnet",
    "content": "{\n  metric_name: 'getpage_prefetches_buffered',\n  type: 'gauge',\n  help: 'Number of prefetched pages buffered in neon',\n  values: [\n    'getpage_prefetches_buffered',\n  ],\n  query_ref: 'neon_perf_counters',\n}\n"
  },
  {
    "path": "compute/etc/sql_exporter/getpage_sync_requests_total.libsonnet",
    "content": "{\n  metric_name: 'getpage_sync_requests_total',\n  type: 'counter',\n  help: 'Number of synchronous getpage issued',\n  values: [\n    'getpage_sync_requests_total',\n  ],\n  query_ref: 'neon_perf_counters',\n}\n"
  },
  {
    "path": "compute/etc/sql_exporter/getpage_wait_seconds_bucket.libsonnet",
    "content": "{\n  metric_name: 'getpage_wait_seconds_bucket',\n  type: 'counter',\n  help: 'Histogram buckets of getpage request latency',\n  key_labels: [\n    'bucket_le',\n  ],\n  values: [\n    'value',\n  ],\n  query: importstr 'sql_exporter/getpage_wait_seconds_bucket.sql',\n}\n"
  },
  {
    "path": "compute/etc/sql_exporter/getpage_wait_seconds_bucket.sql",
    "content": "SELECT bucket_le, value FROM neon.neon_perf_counters WHERE metric = 'getpage_wait_seconds_bucket';\n"
  },
  {
    "path": "compute/etc/sql_exporter/getpage_wait_seconds_count.libsonnet",
    "content": "{\n  metric_name: 'getpage_wait_seconds_count',\n  type: 'counter',\n  help: 'Number of getpage requests',\n  values: [\n    'getpage_wait_seconds_count',\n  ],\n  query_ref: 'neon_perf_counters',\n}\n"
  },
  {
    "path": "compute/etc/sql_exporter/getpage_wait_seconds_sum.libsonnet",
    "content": "{\n  metric_name: 'getpage_wait_seconds_sum',\n  type: 'counter',\n  help: 'Time spent in getpage requests',\n  values: [\n    'getpage_wait_seconds_sum',\n  ],\n  query_ref: 'neon_perf_counters',\n}\n"
  },
  {
    "path": "compute/etc/sql_exporter/lfc_approximate_working_set_size.libsonnet",
    "content": "// DEPRECATED\n\n{\n  metric_name: 'lfc_approximate_working_set_size',\n  type: 'gauge',\n  help: 'Approximate working set size in pages of 8192 bytes',\n  key_labels: null,\n  values: [\n    'approximate_working_set_size',\n  ],\n  query: importstr 'sql_exporter/lfc_approximate_working_set_size.sql',\n}\n"
  },
  {
    "path": "compute/etc/sql_exporter/lfc_approximate_working_set_size.sql",
    "content": "SELECT neon.approximate_working_set_size(false) AS approximate_working_set_size;\n"
  },
  {
    "path": "compute/etc/sql_exporter/lfc_approximate_working_set_size_windows.autoscaling.libsonnet",
    "content": "{\n  metric_name: 'lfc_approximate_working_set_size_windows',\n  type: 'gauge',\n  help: 'Approximate working set size in pages of 8192 bytes',\n  key_labels: [\n    'duration_seconds',\n  ],\n  values: [\n    'size',\n  ],\n  query: importstr 'sql_exporter/lfc_approximate_working_set_size_windows.autoscaling.sql',\n}\n"
  },
  {
    "path": "compute/etc/sql_exporter/lfc_approximate_working_set_size_windows.autoscaling.sql",
    "content": "-- NOTE: This is the \"internal\" / \"machine-readable\" version. This outputs the\n-- working set size looking back 1..60 minutes, labeled with the number of\n-- minutes.\n\nSELECT\n  x::pg_catalog.text AS duration_seconds,\n  neon.approximate_working_set_size_seconds(x) AS size\nFROM (SELECT generate_series * 60 AS x FROM generate_series(1, 60)) AS t (x);\n"
  },
  {
    "path": "compute/etc/sql_exporter/lfc_approximate_working_set_size_windows.libsonnet",
    "content": "{\n  metric_name: 'lfc_approximate_working_set_size_windows',\n  type: 'gauge',\n  help: 'Approximate working set size in pages of 8192 bytes',\n  key_labels: [\n    'duration',\n  ],\n  values: [\n    'size',\n  ],\n  query: importstr 'sql_exporter/lfc_approximate_working_set_size_windows.sql',\n}\n"
  },
  {
    "path": "compute/etc/sql_exporter/lfc_approximate_working_set_size_windows.sql",
    "content": "-- NOTE: This is the \"public\" / \"human-readable\" version. Here, we supply a\n-- small selection of durations in a pretty-printed form.\n\nSELECT\n  x AS duration,\n  neon.approximate_working_set_size_seconds(extract('epoch' FROM x::pg_catalog.interval)::pg_catalog.int4) AS size FROM (\n    VALUES ('5m'), ('15m'), ('1h')\n  ) AS t (x);\n"
  },
  {
    "path": "compute/etc/sql_exporter/lfc_cache_size_limit.libsonnet",
    "content": "{\n  metric_name: 'lfc_cache_size_limit',\n  type: 'gauge',\n  help: 'LFC cache size limit in bytes',\n  key_labels: null,\n  values: [\n    'lfc_cache_size_limit',\n  ],\n  query: importstr 'sql_exporter/lfc_cache_size_limit.sql',\n}\n"
  },
  {
    "path": "compute/etc/sql_exporter/lfc_cache_size_limit.sql",
    "content": "SELECT pg_catalog.pg_size_bytes(pg_catalog.current_setting('neon.file_cache_size_limit')) AS lfc_cache_size_limit;\n"
  },
  {
    "path": "compute/etc/sql_exporter/lfc_chunk_size.libsonnet",
    "content": "{\n  metric_name: 'lfc_chunk_size',\n  type: 'gauge',\n  help: 'LFC chunk size, measured in 8KiB pages',\n  key_labels: null,\n  values: [\n    'lfc_chunk_size_pages',\n  ],\n  query: importstr 'sql_exporter/lfc_chunk_size.sql',\n}\n"
  },
  {
    "path": "compute/etc/sql_exporter/lfc_chunk_size.sql",
    "content": "SELECT lfc_value AS lfc_chunk_size_pages FROM neon.neon_lfc_stats WHERE lfc_key = 'file_cache_chunk_size_pages';\n"
  },
  {
    "path": "compute/etc/sql_exporter/lfc_hits.libsonnet",
    "content": "{\n  metric_name: 'lfc_hits',\n  type: 'gauge',\n  help: 'lfc_hits',\n  key_labels: null,\n  values: [\n    'lfc_hits',\n  ],\n  query: importstr 'sql_exporter/lfc_hits.sql',\n}\n"
  },
  {
    "path": "compute/etc/sql_exporter/lfc_hits.sql",
    "content": "SELECT lfc_value AS lfc_hits FROM neon.neon_lfc_stats WHERE lfc_key = 'file_cache_hits';\n"
  },
  {
    "path": "compute/etc/sql_exporter/lfc_misses.libsonnet",
    "content": "{\n  metric_name: 'lfc_misses',\n  type: 'gauge',\n  help: 'lfc_misses',\n  key_labels: null,\n  values: [\n    'lfc_misses',\n  ],\n  query: importstr 'sql_exporter/lfc_misses.sql',\n}\n"
  },
  {
    "path": "compute/etc/sql_exporter/lfc_misses.sql",
    "content": "SELECT lfc_value AS lfc_misses FROM neon.neon_lfc_stats WHERE lfc_key = 'file_cache_misses';\n"
  },
  {
    "path": "compute/etc/sql_exporter/lfc_used.libsonnet",
    "content": "{\n  metric_name: 'lfc_used',\n  type: 'gauge',\n  help: 'LFC chunks used (chunk = 1MB)',\n  key_labels: null,\n  values: [\n    'lfc_used',\n  ],\n  query: importstr 'sql_exporter/lfc_used.sql',\n}\n"
  },
  {
    "path": "compute/etc/sql_exporter/lfc_used.sql",
    "content": "SELECT lfc_value AS lfc_used FROM neon.neon_lfc_stats WHERE lfc_key = 'file_cache_used';\n"
  },
  {
    "path": "compute/etc/sql_exporter/lfc_used_pages.libsonnet",
    "content": "{\n  metric_name: 'lfc_used_pages',\n  type: 'gauge',\n  help: 'LFC pages used',\n  key_labels: null,\n  values: [\n    'lfc_used_pages',\n  ],\n  query: importstr 'sql_exporter/lfc_used_pages.sql',\n}\n"
  },
  {
    "path": "compute/etc/sql_exporter/lfc_used_pages.sql",
    "content": "SELECT lfc_value AS lfc_used_pages FROM neon.neon_lfc_stats WHERE lfc_key = 'file_cache_used_pages';\n"
  },
  {
    "path": "compute/etc/sql_exporter/lfc_writes.libsonnet",
    "content": "{\n  metric_name: 'lfc_writes',\n  type: 'gauge',\n  help: 'lfc_writes',\n  key_labels: null,\n  values: [\n    'lfc_writes',\n  ],\n  query: importstr 'sql_exporter/lfc_writes.sql',\n}\n"
  },
  {
    "path": "compute/etc/sql_exporter/lfc_writes.sql",
    "content": "SELECT lfc_value AS lfc_writes FROM neon.neon_lfc_stats WHERE lfc_key = 'file_cache_writes';\n"
  },
  {
    "path": "compute/etc/sql_exporter/logical_slot_restart_lsn.libsonnet",
    "content": "// Number of slots is limited by max_replication_slots, so collecting position\n// for all of them shouldn't be bad.\n\n{\n  metric_name: 'logical_slot_restart_lsn',\n  type: 'gauge',\n  help: 'restart_lsn of logical slots',\n  key_labels: [\n    'slot_name',\n  ],\n  values: [\n    'restart_lsn',\n  ],\n  query: importstr 'sql_exporter/logical_slot_restart_lsn.sql',\n}\n"
  },
  {
    "path": "compute/etc/sql_exporter/logical_slot_restart_lsn.sql",
    "content": "SELECT slot_name, (restart_lsn - '0/0')::pg_catalog.FLOAT8 AS restart_lsn\nFROM pg_catalog.pg_replication_slots\nWHERE slot_type = 'logical';\n"
  },
  {
    "path": "compute/etc/sql_exporter/max_cluster_size.libsonnet",
    "content": "{\n  metric_name: 'max_cluster_size',\n  type: 'gauge',\n  help: 'neon.max_cluster_size setting',\n  key_labels: null,\n  values: [\n    'max_cluster_size',\n  ],\n  query: importstr 'sql_exporter/max_cluster_size.sql',\n}\n"
  },
  {
    "path": "compute/etc/sql_exporter/max_cluster_size.sql",
    "content": "SELECT setting::pg_catalog.int4 AS max_cluster_size FROM pg_catalog.pg_settings WHERE name = 'neon.max_cluster_size';\n"
  },
  {
    "path": "compute/etc/sql_exporter/neon_perf_counters.sql",
    "content": "WITH c AS (SELECT pg_catalog.jsonb_object_agg(metric, value) jb FROM neon.neon_perf_counters)\n\nSELECT d.* FROM pg_catalog.jsonb_to_record((SELECT jb FROM c)) AS d(\n  file_cache_read_wait_seconds_count numeric,\n  file_cache_read_wait_seconds_sum numeric,\n  file_cache_write_wait_seconds_count numeric,\n  file_cache_write_wait_seconds_sum numeric,\n  getpage_wait_seconds_count numeric,\n  getpage_wait_seconds_sum numeric,\n  getpage_prefetch_requests_total numeric,\n  getpage_sync_requests_total numeric,\n  compute_getpage_stuck_requests_total numeric,\n  compute_getpage_max_inflight_stuck_time_ms numeric,\n  getpage_prefetch_misses_total numeric,\n  getpage_prefetch_discards_total numeric,\n  getpage_prefetches_buffered numeric,\n  pageserver_requests_sent_total numeric,\n  pageserver_disconnects_total numeric,\n  pageserver_send_flushes_total numeric,\n  pageserver_open_requests numeric\n);\n"
  },
  {
    "path": "compute/etc/sql_exporter/pageserver_disconnects_total.libsonnet",
    "content": "{\n  metric_name: 'pageserver_disconnects_total',\n  type: 'counter',\n  help: 'Number of times that the connection to the pageserver was lost',\n  values: [\n    'pageserver_disconnects_total',\n  ],\n  query_ref: 'neon_perf_counters',\n}\n"
  },
  {
    "path": "compute/etc/sql_exporter/pageserver_open_requests.libsonnet",
    "content": "{\n  metric_name: 'pageserver_open_requests',\n  type: 'gauge',\n  help: 'Number of open requests to PageServer',\n  values: [\n    'pageserver_open_requests',\n  ],\n  query_ref: 'neon_perf_counters',\n}\n"
  },
  {
    "path": "compute/etc/sql_exporter/pageserver_requests_sent_total.libsonnet",
    "content": "{\n  metric_name: 'pageserver_requests_sent_total',\n  type: 'counter',\n  help: 'Number of all requests sent to the pageserver (not just GetPage requests)',\n  values: [\n    'pageserver_requests_sent_total',\n  ],\n  query_ref: 'neon_perf_counters',\n}\n"
  },
  {
    "path": "compute/etc/sql_exporter/pageserver_send_flushes_total.libsonnet",
    "content": "{\n  metric_name: 'pageserver_send_flushes_total',\n  type: 'counter',\n  help: 'Number of flushes to the pageserver connection',\n  values: [\n    'pageserver_send_flushes_total',\n  ],\n  query_ref: 'neon_perf_counters',\n}\n"
  },
  {
    "path": "compute/etc/sql_exporter/pg_stats_userdb.libsonnet",
    "content": "{\n  metric_name: 'pg_stats_userdb',\n  type: 'gauge',\n  help: 'Stats for several oldest non-system dbs',\n  key_labels: [\n    'datname',\n  ],\n  value_label: 'kind',\n  values: [\n    'db_size',\n    'deadlocks',\n    // Rows\n    'inserted',\n    'updated',\n    'deleted',\n  ],\n  query: importstr 'sql_exporter/pg_stats_userdb.sql',\n}\n"
  },
  {
    "path": "compute/etc/sql_exporter/pg_stats_userdb.sql",
    "content": "-- We export stats for 10 non-system databases. Without this limit it is too\n-- easy to abuse the system by creating lots of databases.\n\nSELECT pg_catalog.pg_database_size(datname) AS db_size,\n  deadlocks,\n  tup_inserted AS inserted,\n  tup_updated AS updated,\n  tup_deleted AS deleted,\n  datname\nFROM pg_catalog.pg_stat_database\nWHERE datname IN (\n  SELECT datname FROM pg_database\n  -- Ignore invalid databases, as we will likely have problems with\n  -- getting their size from the Pageserver.\n  WHERE datconnlimit != -2\n    AND datname <> 'postgres'\n    AND NOT datistemplate\n  ORDER BY oid\n  LIMIT 10\n);\n"
  },
  {
    "path": "compute/etc/sql_exporter/replication_delay_bytes.libsonnet",
    "content": "{\n  metric_name: 'replication_delay_bytes',\n  type: 'gauge',\n  help: 'Bytes between received and replayed LSN',\n  key_labels: null,\n  values: [\n    'replication_delay_bytes',\n  ],\n  query: importstr 'sql_exporter/replication_delay_bytes.sql',\n}\n"
  },
  {
    "path": "compute/etc/sql_exporter/replication_delay_bytes.sql",
    "content": "-- We use a GREATEST call here because this calculation can be negative. The\n-- calculation is not atomic, meaning after we've gotten the receive LSN, the\n-- replay LSN may have advanced past the receive LSN we are using for the\n-- calculation.\n\nSELECT GREATEST(0, pg_catalog.pg_wal_lsn_diff(pg_catalog.pg_last_wal_receive_lsn(), pg_catalog.pg_last_wal_replay_lsn())) AS replication_delay_bytes;\n"
  },
  {
    "path": "compute/etc/sql_exporter/replication_delay_seconds.libsonnet",
    "content": "{\n  metric_name: 'replication_delay_seconds',\n  type: 'gauge',\n  help: 'Time since last LSN was replayed',\n  key_labels: null,\n  values: [\n    'replication_delay_seconds',\n  ],\n  query: importstr 'sql_exporter/replication_delay_seconds.sql',\n}\n"
  },
  {
    "path": "compute/etc/sql_exporter/replication_delay_seconds.sql",
    "content": "SELECT\n  CASE\n    WHEN pg_catalog.pg_last_wal_receive_lsn() = pg_catalog.pg_last_wal_replay_lsn() THEN 0\n    ELSE GREATEST(0, EXTRACT (EPOCH FROM pg_catalog.now() - pg_catalog.pg_last_xact_replay_timestamp()))\n  END AS replication_delay_seconds;\n"
  },
  {
    "path": "compute/etc/sql_exporter/retained_wal.libsonnet",
    "content": "{\n  metric_name: 'retained_wal',\n  type: 'gauge',\n  help: 'Retained WAL in inactive replication slots',\n  key_labels: [\n    'slot_name',\n  ],\n  values: [\n    'retained_wal',\n  ],\n  query: importstr 'sql_exporter/retained_wal.sql',\n}\n"
  },
  {
    "path": "compute/etc/sql_exporter/retained_wal.sql",
    "content": "SELECT\n  slot_name,\n  pg_catalog.pg_wal_lsn_diff(\n    CASE\n      WHEN pg_catalog.pg_is_in_recovery() THEN pg_catalog.pg_last_wal_replay_lsn()\n      ELSE pg_catalog.pg_current_wal_lsn()\n    END,\n    restart_lsn)::pg_catalog.FLOAT8 AS retained_wal\nFROM pg_catalog.pg_replication_slots\nWHERE active = false;\n"
  },
  {
    "path": "compute/etc/sql_exporter/wal_is_lost.libsonnet",
    "content": "{\n  metric_name: 'wal_is_lost',\n  type: 'gauge',\n  help: 'Whether or not the replication slot wal_status is lost',\n  key_labels: [\n    'slot_name',\n  ],\n  values: [\n    'wal_is_lost',\n  ],\n  query: importstr 'sql_exporter/wal_is_lost.sql',\n}\n"
  },
  {
    "path": "compute/etc/sql_exporter/wal_is_lost.sql",
    "content": "SELECT\n  slot_name,\n  CASE\n    WHEN wal_status = 'lost' THEN 1\n    ELSE 0\n  END AS wal_is_lost\nFROM pg_catalog.pg_replication_slots;\n"
  },
  {
    "path": "compute/etc/sql_exporter.jsonnet",
    "content": "function(collector_name, collector_file, connection_string) {\n  // Configuration for sql_exporter for autoscaling-agent\n  // Global defaults.\n  global: {\n    // If scrape_timeout <= 0, no timeout is set unless Prometheus provides one. The default is 10s.\n    scrape_timeout: '10s',\n    // Subtracted from Prometheus' scrape_timeout to give us some headroom and prevent Prometheus from timing out first.\n    scrape_timeout_offset: '500ms',\n    // Minimum interval between collector runs: by default (0s) collectors are executed on every scrape.\n    min_interval: '0s',\n    // Maximum number of open connections to any one target. Metric queries will run concurrently on multiple connections,\n    // as will concurrent scrapes.\n    max_connections: 1,\n    // Maximum number of idle connections to any one target. Unless you use very long collection intervals, this should\n    // always be the same as max_connections.\n    max_idle_connections: 1,\n    // Maximum number of maximum amount of time a connection may be reused. Expired connections may be closed lazily before reuse.\n    // If 0, connections are not closed due to a connection's age.\n    max_connection_lifetime: '5m',\n  },\n\n  // The target to monitor and the collectors to execute on it.\n  target: {\n    // Data source name always has a URI schema that matches the driver name. In some cases (e.g. MySQL)\n    // the schema gets dropped or replaced to match the driver expected DSN format.\n    data_source_name: connection_string,\n\n    // Collectors (referenced by name) to execute on the target.\n    // Glob patterns are supported (see <https://pkg.go.dev/path/filepath#Match> for syntax).\n    collectors: [\n      collector_name,\n    ],\n  },\n\n  // Collector files specifies a list of globs. One collector definition is read from each matching file.\n  // Glob patterns are supported (see <https://pkg.go.dev/path/filepath#Match> for syntax).\n  collector_files: [\n    collector_file,\n  ],\n}\n"
  },
  {
    "path": "compute/jsonnet/neon.libsonnet",
    "content": "local MIN_SUPPORTED_VERSION = 14;\nlocal MAX_SUPPORTED_VERSION = 17;\nlocal SUPPORTED_VERSIONS = std.range(MIN_SUPPORTED_VERSION, MAX_SUPPORTED_VERSION);\n\n# If we receive the pg_version with a leading \"v\", ditch it.\nlocal pg_version = std.strReplace(std.extVar('pg_version'), 'v', '');\nlocal pg_version_num = std.parseInt(pg_version);\n\nassert std.setMember(pg_version_num, SUPPORTED_VERSIONS) :\n       std.format('%s is an unsupported Postgres version: %s',\n                  [pg_version, std.toString(SUPPORTED_VERSIONS)]);\n\n{\n  PG_MAJORVERSION: pg_version,\n  PG_MAJORVERSION_NUM: pg_version_num,\n}\n"
  },
  {
    "path": "compute/manifest.schema.json",
    "content": "{\n  \"$schema\": \"https://json-schema.org/draft/2020-12/schema\",\n  \"title\": \"Neon Compute Manifest Schema\",\n  \"description\": \"Schema for Neon compute node configuration manifest\",\n  \"type\": \"object\",\n  \"properties\": {\n    \"pg_settings\": {\n      \"type\": \"object\",\n      \"properties\": {\n        \"common\": {\n          \"type\": \"object\",\n          \"properties\": {\n            \"client_connection_check_interval\": {\n              \"type\": \"string\",\n              \"description\": \"Check for client disconnection interval in milliseconds\"\n            },\n            \"effective_io_concurrency\": {\n              \"type\": \"string\",\n              \"description\": \"Effective IO concurrency setting\"\n            },\n            \"fsync\": {\n              \"type\": \"string\",\n              \"enum\": [\"on\", \"off\"],\n              \"description\": \"Whether to force fsync to disk\"\n            },\n            \"hot_standby\": {\n              \"type\": \"string\",\n              \"enum\": [\"on\", \"off\"],\n              \"description\": \"Whether hot standby is enabled\"\n            },\n            \"idle_in_transaction_session_timeout\": {\n              \"type\": \"string\",\n              \"description\": \"Timeout for idle transactions in milliseconds\"\n            },\n            \"listen_addresses\": {\n              \"type\": \"string\",\n              \"description\": \"Addresses to listen on\"\n            },\n            \"log_connections\": {\n              \"type\": \"string\",\n              \"enum\": [\"on\", \"off\"],\n              \"description\": \"Whether to log connections\"\n            },\n            \"log_disconnections\": {\n              \"type\": \"string\",\n              \"enum\": [\"on\", \"off\"],\n              \"description\": \"Whether to log disconnections\"\n            },\n            \"log_temp_files\": {\n              \"type\": \"string\",\n              \"description\": \"Size threshold for logging temporary files in KB\"\n            },\n            \"log_error_verbosity\": {\n              \"type\": \"string\",\n              \"enum\": [\"terse\", \"verbose\", \"default\"],\n              \"description\": \"Error logging verbosity level\"\n            },\n            \"log_min_error_statement\": {\n              \"type\": \"string\",\n              \"description\": \"Minimum error level for statement logging\"\n            },\n            \"maintenance_io_concurrency\": {\n              \"type\": \"string\",\n              \"description\": \"Maintenance IO concurrency setting\"\n            },\n            \"max_connections\": {\n              \"type\": \"string\",\n              \"description\": \"Maximum number of connections\"\n            },\n            \"max_replication_flush_lag\": {\n              \"type\": \"string\",\n              \"description\": \"Maximum replication flush lag\"\n            },\n            \"max_replication_slots\": {\n              \"type\": \"string\",\n              \"description\": \"Maximum number of replication slots\"\n            },\n            \"max_replication_write_lag\": {\n              \"type\": \"string\",\n              \"description\": \"Maximum replication write lag\"\n            },\n            \"max_wal_senders\": {\n              \"type\": \"string\",\n              \"description\": \"Maximum number of WAL senders\"\n            },\n            \"max_wal_size\": {\n              \"type\": \"string\",\n              \"description\": \"Maximum WAL size\"\n            },\n            \"neon.unstable_extensions\": {\n              \"type\": \"string\",\n              \"description\": \"List of unstable extensions\"\n            },\n            \"neon.protocol_version\": {\n              \"type\": \"string\",\n              \"description\": \"Neon protocol version\"\n            },\n            \"password_encryption\": {\n              \"type\": \"string\",\n              \"description\": \"Password encryption method\"\n            },\n            \"restart_after_crash\": {\n              \"type\": \"string\",\n              \"enum\": [\"on\", \"off\"],\n              \"description\": \"Whether to restart after crash\"\n            },\n            \"superuser_reserved_connections\": {\n              \"type\": \"string\",\n              \"description\": \"Number of reserved connections for superuser\"\n            },\n            \"synchronous_standby_names\": {\n              \"type\": \"string\",\n              \"description\": \"Names of synchronous standby servers\"\n            },\n            \"wal_keep_size\": {\n              \"type\": \"string\",\n              \"description\": \"WAL keep size\"\n            },\n            \"wal_level\": {\n              \"type\": \"string\",\n              \"description\": \"WAL level\"\n            },\n            \"wal_log_hints\": {\n              \"type\": \"string\",\n              \"enum\": [\"on\", \"off\"],\n              \"description\": \"Whether to log hints in WAL\"\n            },\n            \"wal_sender_timeout\": {\n              \"type\": \"string\",\n              \"description\": \"WAL sender timeout in milliseconds\"\n            }\n          },\n          \"required\": [\n            \"client_connection_check_interval\",\n            \"effective_io_concurrency\",\n            \"fsync\",\n            \"hot_standby\",\n            \"idle_in_transaction_session_timeout\",\n            \"listen_addresses\",\n            \"log_connections\",\n            \"log_disconnections\",\n            \"log_temp_files\",\n            \"log_error_verbosity\",\n            \"log_min_error_statement\",\n            \"maintenance_io_concurrency\",\n            \"max_connections\",\n            \"max_replication_flush_lag\",\n            \"max_replication_slots\",\n            \"max_replication_write_lag\",\n            \"max_wal_senders\",\n            \"max_wal_size\",\n            \"neon.unstable_extensions\",\n            \"neon.protocol_version\",\n            \"password_encryption\",\n            \"restart_after_crash\",\n            \"superuser_reserved_connections\",\n            \"synchronous_standby_names\",\n            \"wal_keep_size\",\n            \"wal_level\",\n            \"wal_log_hints\",\n            \"wal_sender_timeout\"\n          ]\n        },\n        \"replica\": {\n          \"type\": \"object\",\n          \"properties\": {\n            \"hot_standby\": {\n              \"type\": \"string\",\n              \"enum\": [\"on\", \"off\"],\n              \"description\": \"Whether hot standby is enabled for replicas\"\n            }\n          },\n          \"required\": [\"hot_standby\"]\n        },\n        \"per_version\": {\n          \"type\": \"object\",\n          \"patternProperties\": {\n            \"^1[4-7]$\": {\n              \"type\": \"object\",\n              \"properties\": {\n                \"common\": {\n                  \"type\": \"object\",\n                  \"properties\": {\n                    \"io_combine_limit\": {\n                      \"type\": \"string\",\n                      \"description\": \"IO combine limit\"\n                    }\n                  }\n                },\n                \"replica\": {\n                  \"type\": \"object\",\n                  \"properties\": {\n                    \"recovery_prefetch\": {\n                      \"type\": \"string\",\n                      \"enum\": [\"on\", \"off\"],\n                      \"description\": \"Whether to enable recovery prefetch for PostgreSQL replicas\"\n                    }\n                  }\n                }\n              }\n            }\n          }\n        }\n      },\n      \"required\": [\"common\", \"replica\", \"per_version\"]\n    }\n  },\n  \"required\": [\"pg_settings\"]\n} \n"
  },
  {
    "path": "compute/manifest.yaml",
    "content": "pg_settings:\n  # Common settings for primaries and replicas of all versions.\n  common:\n    # Check for client disconnection every 1 minute. By default, Postgres will detect the\n    # loss of the connection only at the next interaction with the socket, when it waits\n    # for, receives or sends data, so it will likely waste resources till the end of the\n    # query execution. There should be no drawbacks in setting this for everyone, so enable\n    # it by default. If anyone will complain, we can allow editing it.\n    # https://www.postgresql.org/docs/16/runtime-config-connection.html#GUC-CLIENT-CONNECTION-CHECK-INTERVAL\n    client_connection_check_interval: \"60000\" # 1 minute\n    # ---- IO ---- \n    effective_io_concurrency: \"20\"\n    maintenance_io_concurrency: \"100\"\n    fsync: \"off\"\n    hot_standby: \"off\"\n    # We allow users to change this if needed, but by default we\n    # just don't want to see long-lasting idle transactions, as they\n    # prevent activity monitor from suspending projects.\n    idle_in_transaction_session_timeout: \"300000\" # 5 minutes\n    listen_addresses: \"*\"\n    # --- LOGGING ---- helps investigations\n    log_connections: \"on\"\n    log_disconnections: \"on\"\n    # 1GB, unit is KB\n    log_temp_files: \"1048576\"\n    # Disable dumping customer data to logs, both to increase data privacy\n    # and to reduce the amount the logs.\n    log_error_verbosity: \"terse\"\n    log_min_error_statement: \"panic\"\n    max_connections: \"100\"\n    # --- WAL ---\n    # - flush lag is the max amount of WAL that has been generated but not yet stored\n    # to disk in the page server. A smaller value means less delay after a pageserver\n    # restart, but if you set it too small you might again need to slow down writes if the\n    # pageserver cannot flush incoming WAL to disk fast enough. This must be larger\n    # than the pageserver's checkpoint interval, currently 1 GB! Otherwise you get a\n    # a deadlock where the compute node refuses to generate more WAL before the\n    # old WAL has been uploaded to S3, but the pageserver is waiting for more WAL\n    # to be generated before it is uploaded to S3.\n    max_replication_flush_lag: \"10GB\"\n    max_replication_slots: \"10\"\n    # Backpressure configuration:\n    # - write lag is the max amount of WAL that has been generated by Postgres but not yet\n    # processed by the page server. Making this smaller reduces the worst case latency\n    # of a GetPage request, if you request a page that was recently modified. On the other\n    # hand, if this is too small, the compute node might need to wait on a write if there is a\n    # hiccup in the network or page server so that the page server has temporarily fallen\n    # behind.\n    #\n    # Previously it was set to 500 MB, but it caused compute being unresponsive under load\n    # https://github.com/neondatabase/neon/issues/2028\n    max_replication_write_lag: \"500MB\"\n    max_wal_senders: \"10\"\n    # A Postgres checkpoint is cheap in storage, as doesn't involve any significant amount\n    # of real I/O. Only the SLRU buffers and some other small files are flushed to disk.\n    # However, as long as we have full_page_writes=on, page updates after a checkpoint\n    # include full-page images which bloats the WAL. So may want to bump max_wal_size to\n    # reduce the WAL bloating, but at the same it will increase pg_wal directory size on\n    # compute and can lead to out of disk error on k8s nodes.\n    max_wal_size: \"1024\"\n    wal_keep_size: \"0\"\n    wal_level: \"replica\"\n    # Reduce amount of WAL generated by default.\n    wal_log_hints: \"off\"\n    # - without wal_sender_timeout set we don't get feedback messages,\n    # required for backpressure.\n    wal_sender_timeout: \"10000\"\n    # We have some experimental extensions, which we don't want users to install unconsciously.\n    # To install them, users would need to set the `neon.allow_unstable_extensions` setting.\n    # There are two of them currently:\n    # - `pgrag` - https://github.com/neondatabase-labs/pgrag - extension is actually called just `rag`,\n    #                                                          and two dependencies:\n    #                                                          - `rag_bge_small_en_v15`\n    #                                                          - `rag_jina_reranker_v1_tiny_en`\n    # - `pg_mooncake` - https://github.com/Mooncake-Labs/pg_mooncake/  \n    neon.unstable_extensions: \"rag,rag_bge_small_en_v15,rag_jina_reranker_v1_tiny_en,pg_mooncake,anon\"\n    neon.protocol_version: \"3\"\n    password_encryption: \"scram-sha-256\"\n    # This is important to prevent Postgres from trying to perform\n    # a local WAL redo after backend crash. It should exit and let\n    # the systemd or k8s to do a fresh startup with compute_ctl.\n    restart_after_crash: \"off\"\n    # By default 3. We have the following persistent connections in the VM:\n    # * compute_activity_monitor (from compute_ctl)\n    # * postgres-exporter (metrics collector; it has 2 connections)\n    # * sql_exporter (metrics collector; we have 2 instances [1 for us & users; 1 for autoscaling])\n    # * vm-monitor (to query & change file cache size)\n    # i.e. total of 6. Let's reserve 7, so there's still at least one left over.\n    superuser_reserved_connections: \"7\"\n    synchronous_standby_names: \"walproposer\"\n\n  replica:\n    hot_standby: \"on\"\n\n  per_version:\n    17:\n      common:\n        # PostgreSQL 17 has a new IO system called \"read stream\", which can combine IOs up to some\n        # size. It still has some issues with readahead, though, so we default to disabled/\n        # \"no combining of IOs\" to make sure we get the maximum prefetch depth.\n        # See also: https://github.com/neondatabase/neon/pull/9860\n        io_combine_limit: \"1\"\n      replica:\n        # prefetching of blocks referenced in WAL doesn't make sense for us\n        # Neon hot standby ignores pages that are not in the shared_buffers\n        recovery_prefetch: \"off\"\n    16:\n      common: {}\n      replica:\n        # prefetching of blocks referenced in WAL doesn't make sense for us\n        # Neon hot standby ignores pages that are not in the shared_buffers\n        recovery_prefetch: \"off\"\n    15:\n      common: {}\n      replica:\n        # prefetching of blocks referenced in WAL doesn't make sense for us\n        # Neon hot standby ignores pages that are not in the shared_buffers\n        recovery_prefetch: \"off\"\n    14:\n      common: {}\n      replica: {}\n"
  },
  {
    "path": "compute/patches/anon_v2.patch",
    "content": "diff --git a/sql/anon.sql b/sql/anon.sql\nindex 0cdc769..5eab1d6 100644\n--- a/sql/anon.sql\n+++ b/sql/anon.sql\n@@ -1141,3 +1141,19 @@ $$\n -- TODO : https://en.wikipedia.org/wiki/L-diversity\n \n -- TODO : https://en.wikipedia.org/wiki/T-closeness\n+\n+-- NEON Patches\n+\n+DO $$\n+DECLARE\n+  privileged_role_name text;\n+BEGIN\n+  privileged_role_name := current_setting('neon.privileged_role_name');\n+\n+  EXECUTE format('GRANT ALL ON SCHEMA anon to %I', privileged_role_name);\n+  EXECUTE format('GRANT ALL ON ALL TABLES IN SCHEMA anon TO %I', privileged_role_name);\n+\n+  IF current_setting('server_version_num')::int >= 150000 THEN\n+    EXECUTE format('GRANT SET ON PARAMETER anon.transparent_dynamic_masking TO %I', privileged_role_name);\n+  END IF;\n+END $$;\ndiff --git a/sql/init.sql b/sql/init.sql\nindex 7da6553..9b6164b 100644\n--- a/sql/init.sql\n+++ b/sql/init.sql\n@@ -74,50 +74,49 @@ $$\n \n SECURITY LABEL FOR anon ON FUNCTION anon.load_csv IS 'UNTRUSTED';\n \n--- load fake data from a given path\n-CREATE OR REPLACE FUNCTION anon.init(\n-  datapath TEXT\n-)\n+CREATE OR REPLACE FUNCTION anon.load_fake_data()\n RETURNS BOOLEAN\n AS $$\n DECLARE\n-  datapath_check TEXT;\n   success BOOLEAN;\n+  sharedir TEXT;\n+  datapath TEXT;\n BEGIN\n \n-  IF anon.is_initialized() THEN\n-    RAISE NOTICE 'The anon extension is already initialized.';\n-    RETURN TRUE;\n-  END IF;\n+  datapath := '/extension/anon/';\n+  -- find the local extension directory\n+  SELECT setting INTO sharedir\n+  FROM pg_catalog.pg_config\n+  WHERE name = 'SHAREDIR';\n \n   SELECT bool_or(results) INTO success\n   FROM unnest(array[\n-    anon.load_csv('anon.identifiers_category',datapath||'/identifiers_category.csv'),\n-    anon.load_csv('anon.identifier',datapath ||'/identifier.csv'),\n-    anon.load_csv('anon.address',datapath ||'/address.csv'),\n-    anon.load_csv('anon.city',datapath ||'/city.csv'),\n-    anon.load_csv('anon.company',datapath ||'/company.csv'),\n-    anon.load_csv('anon.country',datapath ||'/country.csv'),\n-    anon.load_csv('anon.email', datapath ||'/email.csv'),\n-    anon.load_csv('anon.first_name',datapath ||'/first_name.csv'),\n-    anon.load_csv('anon.iban',datapath ||'/iban.csv'),\n-    anon.load_csv('anon.last_name',datapath ||'/last_name.csv'),\n-    anon.load_csv('anon.postcode',datapath ||'/postcode.csv'),\n-    anon.load_csv('anon.siret',datapath ||'/siret.csv'),\n-    anon.load_csv('anon.lorem_ipsum',datapath ||'/lorem_ipsum.csv')\n+    anon.load_csv('anon.identifiers_category',sharedir || datapath || '/identifiers_category.csv'),\n+    anon.load_csv('anon.identifier',sharedir || datapath || '/identifier.csv'),\n+    anon.load_csv('anon.address',sharedir || datapath || '/address.csv'),\n+    anon.load_csv('anon.city',sharedir || datapath || '/city.csv'),\n+    anon.load_csv('anon.company',sharedir || datapath || '/company.csv'),\n+    anon.load_csv('anon.country',sharedir || datapath || '/country.csv'),\n+    anon.load_csv('anon.email', sharedir || datapath || '/email.csv'),\n+    anon.load_csv('anon.first_name',sharedir || datapath || '/first_name.csv'),\n+    anon.load_csv('anon.iban',sharedir || datapath || '/iban.csv'),\n+    anon.load_csv('anon.last_name',sharedir || datapath || '/last_name.csv'),\n+    anon.load_csv('anon.postcode',sharedir || datapath || '/postcode.csv'),\n+    anon.load_csv('anon.siret',sharedir || datapath || '/siret.csv'),\n+    anon.load_csv('anon.lorem_ipsum',sharedir || datapath || '/lorem_ipsum.csv')\n   ]) results;\n   RETURN success;\n-\n END;\n $$\n-  LANGUAGE PLPGSQL\n+  LANGUAGE plpgsql\n   VOLATILE\n   RETURNS NULL ON NULL INPUT\n-  PARALLEL UNSAFE -- because load_csv is unsafe\n-  SECURITY INVOKER\n+  PARALLEL UNSAFE -- because of the EXCEPTION\n+  SECURITY DEFINER\n   SET search_path=''\n ;\n-SECURITY LABEL FOR anon ON FUNCTION anon.init(TEXT) IS 'UNTRUSTED';\n+\n+SECURITY LABEL FOR anon ON FUNCTION anon.load_fake_data IS 'UNTRUSTED';\n \n -- People tend to forget the anon.init() step\n -- This is a friendly notice for them\n@@ -144,7 +143,7 @@ SECURITY LABEL FOR anon ON FUNCTION anon.notice_if_not_init IS 'UNTRUSTED';\n CREATE OR REPLACE FUNCTION anon.load(TEXT)\n RETURNS BOOLEAN AS\n $$\n-  SELECT anon.init($1);\n+  SELECT anon.init();\n $$\n   LANGUAGE SQL\n   VOLATILE\n@@ -159,16 +158,16 @@ SECURITY LABEL FOR anon ON FUNCTION anon.load(TEXT) IS 'UNTRUSTED';\n CREATE OR REPLACE FUNCTION anon.init()\n RETURNS BOOLEAN\n AS $$\n-  WITH conf AS (\n-        -- find the local extension directory\n-        SELECT setting AS sharedir\n-        FROM pg_catalog.pg_config\n-        WHERE name = 'SHAREDIR'\n-    )\n-  SELECT anon.init(conf.sharedir || '/extension/anon/')\n-  FROM conf;\n+BEGIN\n+  IF anon.is_initialized() THEN\n+    RAISE NOTICE 'The anon extension is already initialized.';\n+    RETURN TRUE;\n+  END IF;\n+\n+  RETURN anon.load_fake_data();\n+END;\n $$\n-  LANGUAGE SQL\n+  LANGUAGE plpgsql\n   VOLATILE\n   PARALLEL UNSAFE -- because init is unsafe\n   SECURITY INVOKER\n"
  },
  {
    "path": "compute/patches/cloud_regress_pg16.patch",
    "content": "diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out\nindex 0c24f6afe4..dd808ac2b4 100644\n--- a/src/test/regress/expected/aggregates.out\n+++ b/src/test/regress/expected/aggregates.out\n@@ -11,7 +11,8 @@ CREATE TABLE aggtest (\n \tb\t\t\tfloat4\n );\n \\set filename :abs_srcdir '/data/agg.data'\n-COPY aggtest FROM :'filename';\n+\\set command '\\\\copy aggtest FROM ' :'filename';\n+:command\n ANALYZE aggtest;\n SELECT avg(four) AS avg_1 FROM onek;\n        avg_1        \ndiff --git a/src/test/regress/expected/alter_generic.out b/src/test/regress/expected/alter_generic.out\nindex ae54cb254f..888e2ee8bc 100644\n--- a/src/test/regress/expected/alter_generic.out\n+++ b/src/test/regress/expected/alter_generic.out\n@@ -15,9 +15,9 @@ DROP ROLE IF EXISTS regress_alter_generic_user1;\n DROP ROLE IF EXISTS regress_alter_generic_user2;\n DROP ROLE IF EXISTS regress_alter_generic_user3;\n RESET client_min_messages;\n-CREATE USER regress_alter_generic_user3;\n-CREATE USER regress_alter_generic_user2;\n-CREATE USER regress_alter_generic_user1 IN ROLE regress_alter_generic_user3;\n+CREATE USER regress_alter_generic_user3 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_alter_generic_user2 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_alter_generic_user1 PASSWORD NEON_PASSWORD_PLACEHOLDER IN ROLE regress_alter_generic_user3;\n CREATE SCHEMA alt_nsp1;\n CREATE SCHEMA alt_nsp2;\n GRANT ALL ON SCHEMA alt_nsp1, alt_nsp2 TO public;\n@@ -370,7 +370,7 @@ ERROR:  STORAGE cannot be specified in ALTER OPERATOR FAMILY\n DROP OPERATOR FAMILY alt_opf4 USING btree;\n -- Should fail. Need to be SUPERUSER to do ALTER OPERATOR FAMILY .. ADD / DROP\n BEGIN TRANSACTION;\n-CREATE ROLE regress_alter_generic_user5 NOSUPERUSER;\n+CREATE ROLE regress_alter_generic_user5 PASSWORD NEON_PASSWORD_PLACEHOLDER NOSUPERUSER;\n CREATE OPERATOR FAMILY alt_opf5 USING btree;\n SET ROLE regress_alter_generic_user5;\n ALTER OPERATOR FAMILY alt_opf5 USING btree ADD OPERATOR 1 < (int4, int2), FUNCTION 1 btint42cmp(int4, int2);\n@@ -382,7 +382,7 @@ ERROR:  current transaction is aborted, commands ignored until end of transactio\n ROLLBACK;\n -- Should fail. Need rights to namespace for ALTER OPERATOR FAMILY .. ADD / DROP\n BEGIN TRANSACTION;\n-CREATE ROLE regress_alter_generic_user6;\n+CREATE ROLE regress_alter_generic_user6 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n CREATE SCHEMA alt_nsp6;\n REVOKE ALL ON SCHEMA alt_nsp6 FROM regress_alter_generic_user6;\n CREATE OPERATOR FAMILY alt_nsp6.alt_opf6 USING btree;\ndiff --git a/src/test/regress/expected/alter_operator.out b/src/test/regress/expected/alter_operator.out\nindex 71bd484282..066ea4ec0d 100644\n--- a/src/test/regress/expected/alter_operator.out\n+++ b/src/test/regress/expected/alter_operator.out\n@@ -127,7 +127,7 @@ ERROR:  operator attribute \"Restrict\" not recognized\n --\n -- Test permission check. Must be owner to ALTER OPERATOR.\n --\n-CREATE USER regress_alter_op_user;\n+CREATE USER regress_alter_op_user PASSWORD NEON_PASSWORD_PLACEHOLDER;\n SET SESSION AUTHORIZATION regress_alter_op_user;\n ALTER OPERATOR === (boolean, boolean) SET (RESTRICT = NONE);\n ERROR:  must be owner of operator ===\ndiff --git a/src/test/regress/expected/alter_table.out b/src/test/regress/expected/alter_table.out\nindex 0e439a6488..393f316c3e 100644\n--- a/src/test/regress/expected/alter_table.out\n+++ b/src/test/regress/expected/alter_table.out\n@@ -5,7 +5,7 @@\n SET client_min_messages TO 'warning';\n DROP ROLE IF EXISTS regress_alter_table_user1;\n RESET client_min_messages;\n-CREATE USER regress_alter_table_user1;\n+CREATE USER regress_alter_table_user1 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n --\n -- add attribute\n --\n@@ -3896,8 +3896,8 @@ DROP TABLE fail_part;\n ALTER TABLE list_parted ATTACH PARTITION nonexistent FOR VALUES IN (1);\n ERROR:  relation \"nonexistent\" does not exist\n -- check ownership of the source table\n-CREATE ROLE regress_test_me;\n-CREATE ROLE regress_test_not_me;\n+CREATE ROLE regress_test_me PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_test_not_me PASSWORD NEON_PASSWORD_PLACEHOLDER;\n CREATE TABLE not_owned_by_me (LIKE list_parted);\n ALTER TABLE not_owned_by_me OWNER TO regress_test_not_me;\n SET SESSION AUTHORIZATION regress_test_me;\ndiff --git a/src/test/regress/expected/arrays.out b/src/test/regress/expected/arrays.out\nindex 57a283dc59..9672d526b4 100644\n--- a/src/test/regress/expected/arrays.out\n+++ b/src/test/regress/expected/arrays.out\n@@ -18,7 +18,8 @@ CREATE TABLE array_op_test (\n \tt\t\t\ttext[]\n );\n \\set filename :abs_srcdir '/data/array.data'\n-COPY array_op_test FROM :'filename';\n+\\set command '\\\\copy array_op_test FROM ' :'filename';\n+:command\n ANALYZE array_op_test;\n --\n -- only the 'e' array is 0-based, the others are 1-based.\ndiff --git a/src/test/regress/expected/btree_index.out b/src/test/regress/expected/btree_index.out\nindex 93ed5e8cc0..54bd7d535c 100644\n--- a/src/test/regress/expected/btree_index.out\n+++ b/src/test/regress/expected/btree_index.out\n@@ -20,13 +20,17 @@ CREATE TABLE bt_f8_heap (\n \trandom \t\tint4\n );\n \\set filename :abs_srcdir '/data/desc.data'\n-COPY bt_i4_heap FROM :'filename';\n+\\set command '\\\\copy bt_i4_heap FROM ' :'filename';\n+:command\n \\set filename :abs_srcdir '/data/hash.data'\n-COPY bt_name_heap FROM :'filename';\n+\\set command '\\\\copy bt_name_heap FROM ' :'filename';\n+:command\n \\set filename :abs_srcdir '/data/desc.data'\n-COPY bt_txt_heap FROM :'filename';\n+\\set command '\\\\copy bt_txt_heap FROM ' :'filename';\n+:command\n \\set filename :abs_srcdir '/data/hash.data'\n-COPY bt_f8_heap FROM :'filename';\n+\\set command '\\\\copy bt_f8_heap FROM ' :'filename';\n+:command\n ANALYZE bt_i4_heap;\n ANALYZE bt_name_heap;\n ANALYZE bt_txt_heap;\ndiff --git a/src/test/regress/expected/cluster.out b/src/test/regress/expected/cluster.out\nindex 542c2e098c..0062d3024f 100644\n--- a/src/test/regress/expected/cluster.out\n+++ b/src/test/regress/expected/cluster.out\n@@ -308,7 +308,7 @@ WHERE pg_class.oid=indexrelid\n -- Verify that toast tables are clusterable\n CLUSTER pg_toast.pg_toast_826 USING pg_toast_826_index;\n -- Verify that clustering all tables does in fact cluster the right ones\n-CREATE USER regress_clstr_user;\n+CREATE USER regress_clstr_user PASSWORD NEON_PASSWORD_PLACEHOLDER;\n CREATE TABLE clstr_1 (a INT PRIMARY KEY);\n CREATE TABLE clstr_2 (a INT PRIMARY KEY);\n CREATE TABLE clstr_3 (a INT PRIMARY KEY);\n@@ -497,7 +497,7 @@ DROP TABLE clstrpart;\n CREATE TABLE ptnowner(i int unique) PARTITION BY LIST (i);\n CREATE INDEX ptnowner_i_idx ON ptnowner(i);\n CREATE TABLE ptnowner1 PARTITION OF ptnowner FOR VALUES IN (1);\n-CREATE ROLE regress_ptnowner;\n+CREATE ROLE regress_ptnowner PASSWORD NEON_PASSWORD_PLACEHOLDER;\n CREATE TABLE ptnowner2 PARTITION OF ptnowner FOR VALUES IN (2);\n ALTER TABLE ptnowner1 OWNER TO regress_ptnowner;\n ALTER TABLE ptnowner OWNER TO regress_ptnowner;\ndiff --git a/src/test/regress/expected/collate.icu.utf8.out b/src/test/regress/expected/collate.icu.utf8.out\nindex 3f9a8f539c..0a51b52940 100644\n--- a/src/test/regress/expected/collate.icu.utf8.out\n+++ b/src/test/regress/expected/collate.icu.utf8.out\n@@ -1016,7 +1016,7 @@ select * from collate_test1 where b ilike 'ABC';\n \n reset enable_seqscan;\n -- schema manipulation commands\n-CREATE ROLE regress_test_role;\n+CREATE ROLE regress_test_role PASSWORD NEON_PASSWORD_PLACEHOLDER;\n CREATE SCHEMA test_schema;\n -- We need to do this this way to cope with varying names for encodings:\n SET client_min_messages TO WARNING;\ndiff --git a/src/test/regress/expected/constraints.out b/src/test/regress/expected/constraints.out\nindex cf0b80d616..e8e2a14a4a 100644\n--- a/src/test/regress/expected/constraints.out\n+++ b/src/test/regress/expected/constraints.out\n@@ -349,7 +349,8 @@ CREATE TABLE COPY_TBL (x INT, y TEXT, z INT,\n \tCONSTRAINT COPY_CON\n \tCHECK (x > 3 AND y <> 'check failed' AND x < 7 ));\n \\set filename :abs_srcdir '/data/constro.data'\n-COPY COPY_TBL FROM :'filename';\n+\\set command '\\\\copy COPY_TBL FROM ' :'filename';\n+:command\n SELECT * FROM COPY_TBL;\n  x |       y       | z \n ---+---------------+---\n@@ -358,7 +359,8 @@ SELECT * FROM COPY_TBL;\n (2 rows)\n \n \\set filename :abs_srcdir '/data/constrf.data'\n-COPY COPY_TBL FROM :'filename';\n+\\set command '\\\\copy COPY_TBL FROM ' :'filename';\n+:command\n ERROR:  new row for relation \"copy_tbl\" violates check constraint \"copy_con\"\n DETAIL:  Failing row contains (7, check failed, 6).\n CONTEXT:  COPY copy_tbl, line 2: \"7\tcheck failed\t6\"\n@@ -799,7 +801,7 @@ DETAIL:  Key (f1)=(3) conflicts with key (f1)=(3).\n DROP TABLE deferred_excl;\n -- Comments\n -- Setup a low-level role to enforce non-superuser checks.\n-CREATE ROLE regress_constraint_comments;\n+CREATE ROLE regress_constraint_comments PASSWORD NEON_PASSWORD_PLACEHOLDER;\n SET SESSION AUTHORIZATION regress_constraint_comments;\n CREATE TABLE constraint_comments_tbl (a int CONSTRAINT the_constraint CHECK (a > 0));\n CREATE DOMAIN constraint_comments_dom AS int CONSTRAINT the_constraint CHECK (value > 0);\n@@ -819,7 +821,7 @@ COMMENT ON CONSTRAINT the_constraint ON constraint_comments_tbl IS NULL;\n COMMENT ON CONSTRAINT the_constraint ON DOMAIN constraint_comments_dom IS NULL;\n -- unauthorized user\n RESET SESSION AUTHORIZATION;\n-CREATE ROLE regress_constraint_comments_noaccess;\n+CREATE ROLE regress_constraint_comments_noaccess PASSWORD NEON_PASSWORD_PLACEHOLDER;\n SET SESSION AUTHORIZATION regress_constraint_comments_noaccess;\n COMMENT ON CONSTRAINT the_constraint ON constraint_comments_tbl IS 'no, the comment';\n ERROR:  must be owner of relation constraint_comments_tbl\ndiff --git a/src/test/regress/expected/conversion.out b/src/test/regress/expected/conversion.out\nindex d785f92561..16377e5ac9 100644\n--- a/src/test/regress/expected/conversion.out\n+++ b/src/test/regress/expected/conversion.out\n@@ -15,7 +15,7 @@ SELECT FROM test_enc_setup();\n CREATE FUNCTION test_enc_conversion(bytea, name, name, bool, validlen OUT int, result OUT bytea)\n     AS :'regresslib', 'test_enc_conversion'\n     LANGUAGE C STRICT;\n-CREATE USER regress_conversion_user WITH NOCREATEDB NOCREATEROLE;\n+CREATE USER regress_conversion_user WITH NOCREATEDB NOCREATEROLE PASSWORD NEON_PASSWORD_PLACEHOLDER;\n SET SESSION AUTHORIZATION regress_conversion_user;\n CREATE CONVERSION myconv FOR 'LATIN1' TO 'UTF8' FROM iso8859_1_to_utf8;\n --\ndiff --git a/src/test/regress/expected/copy.out b/src/test/regress/expected/copy.out\nindex b48365ec98..a6ef910055 100644\n--- a/src/test/regress/expected/copy.out\n+++ b/src/test/regress/expected/copy.out\n@@ -15,9 +15,11 @@ insert into copytest values('Unix',E'abc\\ndef',2);\n insert into copytest values('Mac',E'abc\\rdef',3);\n insert into copytest values(E'esc\\\\ape',E'a\\\\r\\\\\\r\\\\\\n\\\\nb',4);\n \\set filename :abs_builddir '/results/copytest.csv'\n-copy copytest to :'filename' csv;\n+\\set command '\\\\copy copytest to ' :'filename' csv;\n+:command\n create temp table copytest2 (like copytest);\n-copy copytest2 from :'filename' csv;\n+\\set command '\\\\copy copytest2 from ' :'filename' csv;\n+:command\n select * from copytest except select * from copytest2;\n  style | test | filler \n -------+------+--------\n@@ -25,8 +27,10 @@ select * from copytest except select * from copytest2;\n \n truncate copytest2;\n --- same test but with an escape char different from quote char\n-copy copytest to :'filename' csv quote '''' escape E'\\\\';\n-copy copytest2 from :'filename' csv quote '''' escape E'\\\\';\n+\\set command '\\\\copy copytest to ' :'filename' ' csv quote ' '\\'\\'\\'\\'' ' escape ' 'E\\'' '\\\\\\\\\\'';\n+:command\n+\\set command '\\\\copy copytest2 from ' :'filename' ' csv quote ' '\\'\\'\\'\\'' ' escape ' 'E\\'' '\\\\\\\\\\'';\n+:command\n select * from copytest except select * from copytest2;\n  style | test | filler \n -------+------+--------\n@@ -66,13 +70,16 @@ insert into parted_copytest select x,1,'One' from generate_series(1,1000) x;\n insert into parted_copytest select x,2,'Two' from generate_series(1001,1010) x;\n insert into parted_copytest select x,1,'One' from generate_series(1011,1020) x;\n \\set filename :abs_builddir '/results/parted_copytest.csv'\n-copy (select * from parted_copytest order by a) to :'filename';\n+\\set command '\\\\copy (select * from parted_copytest order by a) to ' :'filename';\n+:command\n truncate parted_copytest;\n-copy parted_copytest from :'filename';\n+\\set command '\\\\copy parted_copytest from ' :'filename';\n+:command\n -- Ensure COPY FREEZE errors for partitioned tables.\n begin;\n truncate parted_copytest;\n-copy parted_copytest from :'filename' (freeze);\n+\\set command '\\\\copy parted_copytest from ' :'filename' (freeze);\n+:command\n ERROR:  cannot perform COPY FREEZE on a partitioned table\n rollback;\n select tableoid::regclass,count(*),sum(a) from parted_copytest\n@@ -94,7 +101,8 @@ create trigger part_ins_trig\n \tbefore insert on parted_copytest_a2\n \tfor each row\n \texecute procedure part_ins_func();\n-copy parted_copytest from :'filename';\n+\\set command '\\\\copy parted_copytest from ' :'filename';\n+:command\n select tableoid::regclass,count(*),sum(a) from parted_copytest\n group by tableoid order by tableoid::regclass::name;\n       tableoid      | count |  sum   \n@@ -106,7 +114,8 @@ group by tableoid order by tableoid::regclass::name;\n truncate table parted_copytest;\n create index on parted_copytest (b);\n drop trigger part_ins_trig on parted_copytest_a2;\n-copy parted_copytest from stdin;\n+\\set command '\\\\copy parted_copytest from ' stdin;\n+:command\n -- Ensure index entries were properly added during the copy.\n select * from parted_copytest where b = 1;\n  a | b |  c   \n@@ -170,9 +179,9 @@ INFO:  progress: {\"type\": \"PIPE\", \"command\": \"COPY FROM\", \"relname\": \"tab_progre\n -- Generate COPY FROM report with FILE, with some excluded tuples.\n truncate tab_progress_reporting;\n \\set filename :abs_srcdir '/data/emp.data'\n-copy tab_progress_reporting from :'filename'\n-\twhere (salary < 2000);\n-INFO:  progress: {\"type\": \"FILE\", \"command\": \"COPY FROM\", \"relname\": \"tab_progress_reporting\", \"has_bytes_total\": true, \"tuples_excluded\": 1, \"tuples_processed\": 2, \"has_bytes_processed\": true}\n+\\set command '\\\\copy tab_progress_reporting from ' :'filename' 'where (salary < 2000)';\n+:command\n+INFO:  progress: {\"type\": \"PIPE\", \"command\": \"COPY FROM\", \"relname\": \"tab_progress_reporting\", \"has_bytes_total\": false, \"tuples_excluded\": 1, \"tuples_processed\": 2, \"has_bytes_processed\": true}\n drop trigger check_after_tab_progress_reporting on tab_progress_reporting;\n drop function notice_after_tab_progress_reporting();\n drop table tab_progress_reporting;\n@@ -281,7 +290,8 @@ CREATE TABLE parted_si_p_odd PARTITION OF parted_si FOR VALUES IN (1);\n -- https://postgr.es/m/18130-7a86a7356a75209d%40postgresql.org\n -- https://postgr.es/m/257696.1695670946%40sss.pgh.pa.us\n \\set filename :abs_srcdir '/data/desc.data'\n-COPY parted_si(id, data) FROM :'filename';\n+\\set command '\\\\COPY parted_si(id, data) FROM ' :'filename';\n+:command\n -- An earlier bug (see commit b1ecb9b3fcf) could end up using a buffer from\n -- the wrong partition. This test is *not* guaranteed to trigger that bug, but\n -- does so when shared_buffers is small enough.  To test if we encountered the\ndiff --git a/src/test/regress/expected/copy2.out b/src/test/regress/expected/copy2.out\nindex 9a74820ee8..22400a5551 100644\n--- a/src/test/regress/expected/copy2.out\n+++ b/src/test/regress/expected/copy2.out\n@@ -553,8 +553,8 @@ select * from check_con_tbl;\n (2 rows)\n \n -- test with RLS enabled.\n-CREATE ROLE regress_rls_copy_user;\n-CREATE ROLE regress_rls_copy_user_colperms;\n+CREATE ROLE regress_rls_copy_user PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_rls_copy_user_colperms PASSWORD NEON_PASSWORD_PLACEHOLDER;\n CREATE TABLE rls_t1 (a int, b int, c int);\n COPY rls_t1 (a, b, c) from stdin;\n CREATE POLICY p1 ON rls_t1 FOR SELECT USING (a % 2 = 0);\ndiff --git a/src/test/regress/expected/create_function_sql.out b/src/test/regress/expected/create_function_sql.out\nindex 50aca5940f..42527142f6 100644\n--- a/src/test/regress/expected/create_function_sql.out\n+++ b/src/test/regress/expected/create_function_sql.out\n@@ -4,7 +4,7 @@\n -- Assorted tests using SQL-language functions\n --\n -- All objects made in this test are in temp_func_test schema\n-CREATE USER regress_unpriv_user;\n+CREATE USER regress_unpriv_user PASSWORD NEON_PASSWORD_PLACEHOLDER;\n CREATE SCHEMA temp_func_test;\n GRANT ALL ON SCHEMA temp_func_test TO public;\n SET search_path TO temp_func_test, public;\ndiff --git a/src/test/regress/expected/create_index.out b/src/test/regress/expected/create_index.out\nindex acfd9d1f4f..0eeb64e47a 100644\n--- a/src/test/regress/expected/create_index.out\n+++ b/src/test/regress/expected/create_index.out\n@@ -51,7 +51,8 @@ CREATE TABLE fast_emp4000 (\n \thome_base\t box\n );\n \\set filename :abs_srcdir '/data/rect.data'\n-COPY slow_emp4000 FROM :'filename';\n+\\set command '\\\\copy slow_emp4000 FROM ' :'filename';\n+:command\n INSERT INTO fast_emp4000 SELECT * FROM slow_emp4000;\n ANALYZE slow_emp4000;\n ANALYZE fast_emp4000;\n@@ -655,7 +656,8 @@ CREATE TABLE array_index_op_test (\n \tt\t\t\ttext[]\n );\n \\set filename :abs_srcdir '/data/array.data'\n-COPY array_index_op_test FROM :'filename';\n+\\set command '\\\\copy array_index_op_test FROM ' :'filename';\n+:command\n ANALYZE array_index_op_test;\n SELECT * FROM array_index_op_test WHERE i = '{NULL}' ORDER BY seqno;\n  seqno |   i    |   t    \n@@ -2822,7 +2824,7 @@ END;\n -- concurrently\n REINDEX SCHEMA CONCURRENTLY schema_to_reindex;\n -- Failure for unauthorized user\n-CREATE ROLE regress_reindexuser NOLOGIN;\n+CREATE ROLE regress_reindexuser NOLOGIN PASSWORD NEON_PASSWORD_PLACEHOLDER;\n SET SESSION ROLE regress_reindexuser;\n REINDEX SCHEMA schema_to_reindex;\n ERROR:  must be owner of schema schema_to_reindex\ndiff --git a/src/test/regress/expected/create_procedure.out b/src/test/regress/expected/create_procedure.out\nindex 2177ba3509..ae3ca94d00 100644\n--- a/src/test/regress/expected/create_procedure.out\n+++ b/src/test/regress/expected/create_procedure.out\n@@ -421,7 +421,7 @@ ERROR:  cp_testfunc1(integer) is not a procedure\n DROP PROCEDURE nonexistent();\n ERROR:  procedure nonexistent() does not exist\n -- privileges\n-CREATE USER regress_cp_user1;\n+CREATE USER regress_cp_user1 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n GRANT INSERT ON cp_test TO regress_cp_user1;\n REVOKE EXECUTE ON PROCEDURE ptest1(text) FROM PUBLIC;\n SET ROLE regress_cp_user1;\ndiff --git a/src/test/regress/expected/create_role.out b/src/test/regress/expected/create_role.out\nindex 46d4f9efe9..fc2a28a2f6 100644\n--- a/src/test/regress/expected/create_role.out\n+++ b/src/test/regress/expected/create_role.out\n@@ -1,28 +1,28 @@\n -- ok, superuser can create users with any set of privileges\n-CREATE ROLE regress_role_super SUPERUSER;\n-CREATE ROLE regress_role_admin CREATEDB CREATEROLE REPLICATION BYPASSRLS;\n+CREATE ROLE regress_role_super SUPERUSER PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_role_admin CREATEDB CREATEROLE REPLICATION BYPASSRLS PASSWORD NEON_PASSWORD_PLACEHOLDER;\n GRANT CREATE ON DATABASE regression TO regress_role_admin WITH GRANT OPTION;\n-CREATE ROLE regress_role_limited_admin CREATEROLE;\n-CREATE ROLE regress_role_normal;\n+CREATE ROLE regress_role_limited_admin CREATEROLE PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_role_normal PASSWORD NEON_PASSWORD_PLACEHOLDER;\n -- fail, CREATEROLE user can't give away role attributes without having them\n SET SESSION AUTHORIZATION regress_role_limited_admin;\n-CREATE ROLE regress_nosuch_superuser SUPERUSER;\n+CREATE ROLE regress_nosuch_superuser SUPERUSER PASSWORD NEON_PASSWORD_PLACEHOLDER;\n ERROR:  permission denied to create role\n DETAIL:  Only roles with the SUPERUSER attribute may create roles with the SUPERUSER attribute.\n-CREATE ROLE regress_nosuch_replication_bypassrls REPLICATION BYPASSRLS;\n+CREATE ROLE regress_nosuch_replication_bypassrls REPLICATION BYPASSRLS PASSWORD NEON_PASSWORD_PLACEHOLDER;\n ERROR:  permission denied to create role\n DETAIL:  Only roles with the REPLICATION attribute may create roles with the REPLICATION attribute.\n-CREATE ROLE regress_nosuch_replication REPLICATION;\n+CREATE ROLE regress_nosuch_replication REPLICATION PASSWORD NEON_PASSWORD_PLACEHOLDER;\n ERROR:  permission denied to create role\n DETAIL:  Only roles with the REPLICATION attribute may create roles with the REPLICATION attribute.\n-CREATE ROLE regress_nosuch_bypassrls BYPASSRLS;\n+CREATE ROLE regress_nosuch_bypassrls BYPASSRLS PASSWORD NEON_PASSWORD_PLACEHOLDER;\n ERROR:  permission denied to create role\n DETAIL:  Only roles with the BYPASSRLS attribute may create roles with the BYPASSRLS attribute.\n-CREATE ROLE regress_nosuch_createdb CREATEDB;\n+CREATE ROLE regress_nosuch_createdb CREATEDB PASSWORD NEON_PASSWORD_PLACEHOLDER;\n ERROR:  permission denied to create role\n DETAIL:  Only roles with the CREATEDB attribute may create roles with the CREATEDB attribute.\n -- ok, can create a role without any special attributes\n-CREATE ROLE regress_role_limited;\n+CREATE ROLE regress_role_limited PASSWORD NEON_PASSWORD_PLACEHOLDER;\n -- fail, can't give it in any of the restricted attributes\n ALTER ROLE regress_role_limited SUPERUSER;\n ERROR:  permission denied to alter role\n@@ -39,10 +39,10 @@ DETAIL:  Only roles with the BYPASSRLS attribute may change the BYPASSRLS attrib\n DROP ROLE regress_role_limited;\n -- ok, can give away these role attributes if you have them\n SET SESSION AUTHORIZATION regress_role_admin;\n-CREATE ROLE regress_replication_bypassrls REPLICATION BYPASSRLS;\n-CREATE ROLE regress_replication REPLICATION;\n-CREATE ROLE regress_bypassrls BYPASSRLS;\n-CREATE ROLE regress_createdb CREATEDB;\n+CREATE ROLE regress_replication_bypassrls REPLICATION BYPASSRLS PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_replication REPLICATION PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_bypassrls BYPASSRLS PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_createdb CREATEDB PASSWORD NEON_PASSWORD_PLACEHOLDER;\n -- ok, can toggle these role attributes off and on if you have them\n ALTER ROLE regress_replication NOREPLICATION;\n ALTER ROLE regress_replication REPLICATION;\n@@ -58,48 +58,48 @@ ALTER ROLE regress_createdb NOSUPERUSER;\n ERROR:  permission denied to alter role\n DETAIL:  Only roles with the SUPERUSER attribute may change the SUPERUSER attribute.\n -- ok, having CREATEROLE is enough to create users with these privileges\n-CREATE ROLE regress_createrole CREATEROLE NOINHERIT;\n+CREATE ROLE regress_createrole CREATEROLE NOINHERIT PASSWORD NEON_PASSWORD_PLACEHOLDER;\n GRANT CREATE ON DATABASE regression TO regress_createrole WITH GRANT OPTION;\n-CREATE ROLE regress_login LOGIN;\n-CREATE ROLE regress_inherit INHERIT;\n-CREATE ROLE regress_connection_limit CONNECTION LIMIT 5;\n-CREATE ROLE regress_encrypted_password ENCRYPTED PASSWORD 'foo';\n-CREATE ROLE regress_password_null PASSWORD NULL;\n+CREATE ROLE regress_login LOGIN PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_inherit INHERIT PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_connection_limit CONNECTION LIMIT 5 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_encrypted_password ENCRYPTED PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_password_null PASSWORD NEON_PASSWORD_PLACEHOLDER;\n -- ok, backwards compatible noise words should be ignored\n-CREATE ROLE regress_noiseword SYSID 12345;\n+CREATE ROLE regress_noiseword SYSID 12345 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n NOTICE:  SYSID can no longer be specified\n -- fail, cannot grant membership in superuser role\n-CREATE ROLE regress_nosuch_super IN ROLE regress_role_super;\n+CREATE ROLE regress_nosuch_super IN ROLE regress_role_super PASSWORD NEON_PASSWORD_PLACEHOLDER;\n ERROR:  permission denied to grant role \"regress_role_super\"\n DETAIL:  Only roles with the SUPERUSER attribute may grant roles with the SUPERUSER attribute.\n -- fail, database owner cannot have members\n-CREATE ROLE regress_nosuch_dbowner IN ROLE pg_database_owner;\n+CREATE ROLE regress_nosuch_dbowner IN ROLE pg_database_owner PASSWORD NEON_PASSWORD_PLACEHOLDER;\n ERROR:  role \"pg_database_owner\" cannot have explicit members\n -- ok, can grant other users into a role\n CREATE ROLE regress_inroles ROLE\n \tregress_role_super, regress_createdb, regress_createrole, regress_login,\n-\tregress_inherit, regress_connection_limit, regress_encrypted_password, regress_password_null;\n+\tregress_inherit, regress_connection_limit, regress_encrypted_password, regress_password_null PASSWORD NEON_PASSWORD_PLACEHOLDER;\n -- fail, cannot grant a role into itself\n-CREATE ROLE regress_nosuch_recursive ROLE regress_nosuch_recursive;\n+CREATE ROLE regress_nosuch_recursive ROLE regress_nosuch_recursive PASSWORD NEON_PASSWORD_PLACEHOLDER;\n ERROR:  role \"regress_nosuch_recursive\" is a member of role \"regress_nosuch_recursive\"\n -- ok, can grant other users into a role with admin option\n CREATE ROLE regress_adminroles ADMIN\n \tregress_role_super, regress_createdb, regress_createrole, regress_login,\n-\tregress_inherit, regress_connection_limit, regress_encrypted_password, regress_password_null;\n+\tregress_inherit, regress_connection_limit, regress_encrypted_password, regress_password_null PASSWORD NEON_PASSWORD_PLACEHOLDER;\n -- fail, cannot grant a role into itself with admin option\n-CREATE ROLE regress_nosuch_admin_recursive ADMIN regress_nosuch_admin_recursive;\n+CREATE ROLE regress_nosuch_admin_recursive ADMIN regress_nosuch_admin_recursive PASSWORD NEON_PASSWORD_PLACEHOLDER;\n ERROR:  role \"regress_nosuch_admin_recursive\" is a member of role \"regress_nosuch_admin_recursive\"\n -- fail, regress_createrole does not have CREATEDB privilege\n SET SESSION AUTHORIZATION regress_createrole;\n CREATE DATABASE regress_nosuch_db;\n ERROR:  permission denied to create database\n -- ok, regress_createrole can create new roles\n-CREATE ROLE regress_plainrole;\n+CREATE ROLE regress_plainrole PASSWORD NEON_PASSWORD_PLACEHOLDER;\n -- ok, roles with CREATEROLE can create new roles with it\n-CREATE ROLE regress_rolecreator CREATEROLE;\n+CREATE ROLE regress_rolecreator CREATEROLE PASSWORD NEON_PASSWORD_PLACEHOLDER;\n -- ok, roles with CREATEROLE can create new roles with different role\n -- attributes, including CREATEROLE\n-CREATE ROLE regress_hasprivs CREATEROLE LOGIN INHERIT CONNECTION LIMIT 5;\n+CREATE ROLE regress_hasprivs CREATEROLE LOGIN INHERIT CONNECTION LIMIT 5 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n -- ok, we should be able to modify a role we created\n COMMENT ON ROLE regress_hasprivs IS 'some comment';\n ALTER ROLE regress_hasprivs RENAME TO regress_tenant;\n@@ -141,7 +141,7 @@ ERROR:  permission denied to reassign objects\n DETAIL:  Only roles with privileges of role \"regress_tenant\" may reassign objects owned by it.\n -- ok, create a role with a value for createrole_self_grant\n SET createrole_self_grant = 'set, inherit';\n-CREATE ROLE regress_tenant2;\n+CREATE ROLE regress_tenant2 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n GRANT CREATE ON DATABASE regression TO regress_tenant2;\n -- ok, regress_tenant2 can create objects within the database\n SET SESSION AUTHORIZATION regress_tenant2;\n@@ -165,34 +165,34 @@ ALTER TABLE tenant2_table OWNER TO regress_tenant2;\n ERROR:  must be able to SET ROLE \"regress_tenant2\"\n DROP TABLE tenant2_table;\n -- fail, CREATEROLE is not enough to create roles in privileged roles\n-CREATE ROLE regress_read_all_data IN ROLE pg_read_all_data;\n+CREATE ROLE regress_read_all_data PASSWORD NEON_PASSWORD_PLACEHOLDER IN ROLE pg_read_all_data;\n ERROR:  permission denied to grant role \"pg_read_all_data\"\n DETAIL:  Only roles with the ADMIN option on role \"pg_read_all_data\" may grant this role.\n-CREATE ROLE regress_write_all_data IN ROLE pg_write_all_data;\n+CREATE ROLE regress_write_all_data PASSWORD NEON_PASSWORD_PLACEHOLDER IN ROLE pg_write_all_data;\n ERROR:  permission denied to grant role \"pg_write_all_data\"\n DETAIL:  Only roles with the ADMIN option on role \"pg_write_all_data\" may grant this role.\n-CREATE ROLE regress_monitor IN ROLE pg_monitor;\n+CREATE ROLE regress_monitor PASSWORD NEON_PASSWORD_PLACEHOLDER IN ROLE pg_monitor;\n ERROR:  permission denied to grant role \"pg_monitor\"\n DETAIL:  Only roles with the ADMIN option on role \"pg_monitor\" may grant this role.\n-CREATE ROLE regress_read_all_settings IN ROLE pg_read_all_settings;\n+CREATE ROLE regress_read_all_settings PASSWORD NEON_PASSWORD_PLACEHOLDER IN ROLE pg_read_all_settings;\n ERROR:  permission denied to grant role \"pg_read_all_settings\"\n DETAIL:  Only roles with the ADMIN option on role \"pg_read_all_settings\" may grant this role.\n-CREATE ROLE regress_read_all_stats IN ROLE pg_read_all_stats;\n+CREATE ROLE regress_read_all_stats PASSWORD NEON_PASSWORD_PLACEHOLDER IN ROLE pg_read_all_stats;\n ERROR:  permission denied to grant role \"pg_read_all_stats\"\n DETAIL:  Only roles with the ADMIN option on role \"pg_read_all_stats\" may grant this role.\n-CREATE ROLE regress_stat_scan_tables IN ROLE pg_stat_scan_tables;\n+CREATE ROLE regress_stat_scan_tables PASSWORD NEON_PASSWORD_PLACEHOLDER IN ROLE pg_stat_scan_tables;\n ERROR:  permission denied to grant role \"pg_stat_scan_tables\"\n DETAIL:  Only roles with the ADMIN option on role \"pg_stat_scan_tables\" may grant this role.\n-CREATE ROLE regress_read_server_files IN ROLE pg_read_server_files;\n+CREATE ROLE regress_read_server_files PASSWORD NEON_PASSWORD_PLACEHOLDER IN ROLE pg_read_server_files;\n ERROR:  permission denied to grant role \"pg_read_server_files\"\n DETAIL:  Only roles with the ADMIN option on role \"pg_read_server_files\" may grant this role.\n-CREATE ROLE regress_write_server_files IN ROLE pg_write_server_files;\n+CREATE ROLE regress_write_server_files PASSWORD NEON_PASSWORD_PLACEHOLDER IN ROLE pg_write_server_files;\n ERROR:  permission denied to grant role \"pg_write_server_files\"\n DETAIL:  Only roles with the ADMIN option on role \"pg_write_server_files\" may grant this role.\n-CREATE ROLE regress_execute_server_program IN ROLE pg_execute_server_program;\n+CREATE ROLE regress_execute_server_program PASSWORD NEON_PASSWORD_PLACEHOLDER IN ROLE pg_execute_server_program;\n ERROR:  permission denied to grant role \"pg_execute_server_program\"\n DETAIL:  Only roles with the ADMIN option on role \"pg_execute_server_program\" may grant this role.\n-CREATE ROLE regress_signal_backend IN ROLE pg_signal_backend;\n+CREATE ROLE regress_signal_backend PASSWORD NEON_PASSWORD_PLACEHOLDER IN ROLE pg_signal_backend;\n ERROR:  permission denied to grant role \"pg_signal_backend\"\n DETAIL:  Only roles with the ADMIN option on role \"pg_signal_backend\" may grant this role.\n -- fail, role still owns database objects\ndiff --git a/src/test/regress/expected/create_schema.out b/src/test/regress/expected/create_schema.out\nindex 93302a07ef..1a73f083ac 100644\n--- a/src/test/regress/expected/create_schema.out\n+++ b/src/test/regress/expected/create_schema.out\n@@ -2,7 +2,7 @@\n -- CREATE_SCHEMA\n --\n -- Schema creation with elements.\n-CREATE ROLE regress_create_schema_role SUPERUSER;\n+CREATE ROLE regress_create_schema_role SUPERUSER PASSWORD NEON_PASSWORD_PLACEHOLDER;\n -- Cases where schema creation fails as objects are qualified with a schema\n -- that does not match with what's expected.\n -- This checks all the object types that include schema qualifications.\ndiff --git a/src/test/regress/expected/create_view.out b/src/test/regress/expected/create_view.out\nindex f551624afb..57f1e432d4 100644\n--- a/src/test/regress/expected/create_view.out\n+++ b/src/test/regress/expected/create_view.out\n@@ -18,7 +18,8 @@ CREATE TABLE real_city (\n \toutline \tpath\n );\n \\set filename :abs_srcdir '/data/real_city.data'\n-COPY real_city FROM :'filename';\n+\\set command '\\\\copy real_city FROM ' :'filename';\n+:command\n ANALYZE real_city;\n SELECT *\n    INTO TABLE ramp\ndiff --git a/src/test/regress/expected/database.out b/src/test/regress/expected/database.out\nindex 4cbdbdf84d..573362850e 100644\n--- a/src/test/regress/expected/database.out\n+++ b/src/test/regress/expected/database.out\n@@ -1,8 +1,6 @@\n CREATE DATABASE regression_tbd\n \tENCODING utf8 LC_COLLATE \"C\" LC_CTYPE \"C\" TEMPLATE template0;\n ALTER DATABASE regression_tbd RENAME TO regression_utf8;\n-ALTER DATABASE regression_utf8 SET TABLESPACE regress_tblspace;\n-ALTER DATABASE regression_utf8 RESET TABLESPACE;\n ALTER DATABASE regression_utf8 CONNECTION_LIMIT 123;\n -- Test PgDatabaseToastTable.  Doing this with GRANT would be slow.\n BEGIN;\ndiff --git a/src/test/regress/expected/dependency.out b/src/test/regress/expected/dependency.out\nindex 6d9498cdd1..692cf979d0 100644\n--- a/src/test/regress/expected/dependency.out\n+++ b/src/test/regress/expected/dependency.out\n@@ -1,10 +1,10 @@\n --\n -- DEPENDENCIES\n --\n-CREATE USER regress_dep_user;\n-CREATE USER regress_dep_user2;\n-CREATE USER regress_dep_user3;\n-CREATE GROUP regress_dep_group;\n+CREATE USER regress_dep_user PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_dep_user2 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_dep_user3 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE GROUP regress_dep_group PASSWORD NEON_PASSWORD_PLACEHOLDER;\n CREATE TABLE deptest (f1 serial primary key, f2 text);\n GRANT SELECT ON TABLE deptest TO GROUP regress_dep_group;\n GRANT ALL ON TABLE deptest TO regress_dep_user, regress_dep_user2;\n@@ -41,9 +41,9 @@ ERROR:  role \"regress_dep_user3\" cannot be dropped because some objects depend o\n DROP TABLE deptest;\n DROP USER regress_dep_user3;\n -- Test DROP OWNED\n-CREATE USER regress_dep_user0;\n-CREATE USER regress_dep_user1;\n-CREATE USER regress_dep_user2;\n+CREATE USER regress_dep_user0 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_dep_user1 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_dep_user2 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n SET SESSION AUTHORIZATION regress_dep_user0;\n -- permission denied\n DROP OWNED BY regress_dep_user1;\ndiff --git a/src/test/regress/expected/drop_if_exists.out b/src/test/regress/expected/drop_if_exists.out\nindex 5e44c2c3ce..eb3bb329fb 100644\n--- a/src/test/regress/expected/drop_if_exists.out\n+++ b/src/test/regress/expected/drop_if_exists.out\n@@ -64,9 +64,9 @@ ERROR:  type \"test_domain_exists\" does not exist\n ---\n --- role/user/group\n ---\n-CREATE USER regress_test_u1;\n-CREATE ROLE regress_test_r1;\n-CREATE GROUP regress_test_g1;\n+CREATE USER regress_test_u1 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_test_r1 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE GROUP regress_test_g1 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n DROP USER regress_test_u2;\n ERROR:  role \"regress_test_u2\" does not exist\n DROP USER IF EXISTS regress_test_u1, regress_test_u2;\ndiff --git a/src/test/regress/expected/equivclass.out b/src/test/regress/expected/equivclass.out\nindex 126f7047fe..0e2cc73426 100644\n--- a/src/test/regress/expected/equivclass.out\n+++ b/src/test/regress/expected/equivclass.out\n@@ -384,7 +384,7 @@ set enable_nestloop = on;\n set enable_mergejoin = off;\n alter table ec1 enable row level security;\n create policy p1 on ec1 using (f1 < '5'::int8alias1);\n-create user regress_user_ectest;\n+create user regress_user_ectest PASSWORD NEON_PASSWORD_PLACEHOLDER;\n grant select on ec0 to regress_user_ectest;\n grant select on ec1 to regress_user_ectest;\n -- without any RLS, we'll treat {a.ff, b.ff, 43} as an EquivalenceClass\ndiff --git a/src/test/regress/expected/event_trigger.out b/src/test/regress/expected/event_trigger.out\nindex 5a10958df5..a578c06ebd 100644\n--- a/src/test/regress/expected/event_trigger.out\n+++ b/src/test/regress/expected/event_trigger.out\n@@ -85,7 +85,7 @@ create event trigger regress_event_trigger2 on ddl_command_start\n -- OK\n comment on event trigger regress_event_trigger is 'test comment';\n -- drop as non-superuser should fail\n-create role regress_evt_user;\n+create role regress_evt_user PASSWORD NEON_PASSWORD_PLACEHOLDER;\n set role regress_evt_user;\n create event trigger regress_event_trigger_noperms on ddl_command_start\n    execute procedure test_event_trigger();\ndiff --git a/src/test/regress/expected/foreign_data.out b/src/test/regress/expected/foreign_data.out\nindex 6ed50fdcfa..caa00a345d 100644\n--- a/src/test/regress/expected/foreign_data.out\n+++ b/src/test/regress/expected/foreign_data.out\n@@ -14,13 +14,13 @@ CREATE FUNCTION test_fdw_handler()\n SET client_min_messages TO 'warning';\n DROP ROLE IF EXISTS regress_foreign_data_user, regress_test_role, regress_test_role2, regress_test_role_super, regress_test_indirect, regress_unprivileged_role;\n RESET client_min_messages;\n-CREATE ROLE regress_foreign_data_user LOGIN SUPERUSER;\n+CREATE ROLE regress_foreign_data_user LOGIN SUPERUSER PASSWORD NEON_PASSWORD_PLACEHOLDER;\n SET SESSION AUTHORIZATION 'regress_foreign_data_user';\n-CREATE ROLE regress_test_role;\n-CREATE ROLE regress_test_role2;\n-CREATE ROLE regress_test_role_super SUPERUSER;\n-CREATE ROLE regress_test_indirect;\n-CREATE ROLE regress_unprivileged_role;\n+CREATE ROLE regress_test_role PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_test_role2 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_test_role_super SUPERUSER PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_test_indirect PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_unprivileged_role PASSWORD NEON_PASSWORD_PLACEHOLDER;\n CREATE FOREIGN DATA WRAPPER dummy;\n COMMENT ON FOREIGN DATA WRAPPER dummy IS 'useless';\n CREATE FOREIGN DATA WRAPPER postgresql VALIDATOR postgresql_fdw_validator;\ndiff --git a/src/test/regress/expected/foreign_key.out b/src/test/regress/expected/foreign_key.out\nindex 84745b9f60..4883c12351 100644\n--- a/src/test/regress/expected/foreign_key.out\n+++ b/src/test/regress/expected/foreign_key.out\n@@ -1985,7 +1985,7 @@ ALTER TABLE fk_partitioned_fk_6 ATTACH PARTITION fk_partitioned_pk_6 FOR VALUES\n ERROR:  cannot ALTER TABLE \"fk_partitioned_pk_61\" because it is being used by active queries in this session\n DROP TABLE fk_partitioned_pk_6, fk_partitioned_fk_6;\n -- test the case when the referenced table is owned by a different user\n-create role regress_other_partitioned_fk_owner;\n+create role regress_other_partitioned_fk_owner PASSWORD NEON_PASSWORD_PLACEHOLDER;\n grant references on fk_notpartitioned_pk to regress_other_partitioned_fk_owner;\n set role regress_other_partitioned_fk_owner;\n create table other_partitioned_fk(a int, b int) partition by list (a);\ndiff --git a/src/test/regress/expected/generated.out b/src/test/regress/expected/generated.out\nindex 5881420388..4ae21aa43c 100644\n--- a/src/test/regress/expected/generated.out\n+++ b/src/test/regress/expected/generated.out\n@@ -534,7 +534,7 @@ CREATE TABLE gtest10a (a int PRIMARY KEY, b int GENERATED ALWAYS AS (a * 2) STOR\n ALTER TABLE gtest10a DROP COLUMN b;\n INSERT INTO gtest10a (a) VALUES (1);\n -- privileges\n-CREATE USER regress_user11;\n+CREATE USER regress_user11 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n CREATE TABLE gtest11s (a int PRIMARY KEY, b int, c int GENERATED ALWAYS AS (b * 2) STORED);\n INSERT INTO gtest11s VALUES (1, 10), (2, 20);\n GRANT SELECT (a, c) ON gtest11s TO regress_user11;\ndiff --git a/src/test/regress/expected/guc.out b/src/test/regress/expected/guc.out\nindex 127c953297..e6f8272f99 100644\n--- a/src/test/regress/expected/guc.out\n+++ b/src/test/regress/expected/guc.out\n@@ -584,7 +584,7 @@ PREPARE foo AS SELECT 1;\n LISTEN foo_event;\n SET vacuum_cost_delay = 13;\n CREATE TEMP TABLE tmp_foo (data text) ON COMMIT DELETE ROWS;\n-CREATE ROLE regress_guc_user;\n+CREATE ROLE regress_guc_user PASSWORD NEON_PASSWORD_PLACEHOLDER;\n SET SESSION AUTHORIZATION regress_guc_user;\n -- look changes\n SELECT pg_listening_channels();\ndiff --git a/src/test/regress/expected/hash_index.out b/src/test/regress/expected/hash_index.out\nindex a2036a1597..805d73b9d2 100644\n--- a/src/test/regress/expected/hash_index.out\n+++ b/src/test/regress/expected/hash_index.out\n@@ -20,10 +20,14 @@ CREATE TABLE hash_f8_heap (\n \trandom \t\tfloat8\n );\n \\set filename :abs_srcdir '/data/hash.data'\n-COPY hash_i4_heap FROM :'filename';\n-COPY hash_name_heap FROM :'filename';\n-COPY hash_txt_heap FROM :'filename';\n-COPY hash_f8_heap FROM :'filename';\n+\\set command '\\\\copy hash_i4_heap FROM ' :'filename';\n+:command\n+\\set command '\\\\copy hash_name_heap FROM ' :'filename';\n+:command\n+\\set command '\\\\copy hash_txt_heap FROM ' :'filename';\n+:command\n+\\set command '\\\\copy hash_f8_heap FROM ' :'filename';\n+:command\n -- the data in this file has a lot of duplicates in the index key\n -- fields, leading to long bucket chains and lots of table expansion.\n -- this is therefore a stress test of the bucket overflow code (unlike\ndiff --git a/src/test/regress/expected/identity.out b/src/test/regress/expected/identity.out\nindex 1b74958de9..078187b542 100644\n--- a/src/test/regress/expected/identity.out\n+++ b/src/test/regress/expected/identity.out\n@@ -520,7 +520,7 @@ ALTER TABLE itest7 ALTER COLUMN a SET GENERATED BY DEFAULT;\n ALTER TABLE itest7 ALTER COLUMN a RESTART;\n ALTER TABLE itest7 ALTER COLUMN a DROP IDENTITY;\n -- privileges\n-CREATE USER regress_identity_user1;\n+CREATE USER regress_identity_user1 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n CREATE TABLE itest8 (a int GENERATED ALWAYS AS IDENTITY, b text);\n GRANT SELECT, INSERT ON itest8 TO regress_identity_user1;\n SET ROLE regress_identity_user1;\ndiff --git a/src/test/regress/expected/inherit.out b/src/test/regress/expected/inherit.out\nindex 8f831c95c3..ec681b52af 100644\n--- a/src/test/regress/expected/inherit.out\n+++ b/src/test/regress/expected/inherit.out\n@@ -2636,7 +2636,7 @@ create index on permtest_parent (left(c, 3));\n insert into permtest_parent\n   select 1, 'a', left(fipshash(i::text), 5) from generate_series(0, 100) i;\n analyze permtest_parent;\n-create role regress_no_child_access;\n+create role regress_no_child_access PASSWORD NEON_PASSWORD_PLACEHOLDER;\n revoke all on permtest_grandchild from regress_no_child_access;\n grant select on permtest_parent to regress_no_child_access;\n set session authorization regress_no_child_access;\ndiff --git a/src/test/regress/expected/insert.out b/src/test/regress/expected/insert.out\nindex cf4b5221a8..fa6ccb639c 100644\n--- a/src/test/regress/expected/insert.out\n+++ b/src/test/regress/expected/insert.out\n@@ -802,7 +802,7 @@ drop table mlparted5;\n -- appropriate key description (or none) in various situations\n create table key_desc (a int, b int) partition by list ((a+0));\n create table key_desc_1 partition of key_desc for values in (1) partition by range (b);\n-create user regress_insert_other_user;\n+create user regress_insert_other_user PASSWORD NEON_PASSWORD_PLACEHOLDER;\n grant select (a) on key_desc_1 to regress_insert_other_user;\n grant insert on key_desc to regress_insert_other_user;\n set role regress_insert_other_user;\n@@ -914,7 +914,7 @@ DETAIL:  Failing row contains (2, hi there).\n -- check that the message shows the appropriate column description in a\n -- situation where the partitioned table is not the primary ModifyTable node\n create table inserttest3 (f1 text default 'foo', f2 text default 'bar', f3 int);\n-create role regress_coldesc_role;\n+create role regress_coldesc_role PASSWORD NEON_PASSWORD_PLACEHOLDER;\n grant insert on inserttest3 to regress_coldesc_role;\n grant insert on brtrigpartcon to regress_coldesc_role;\n revoke select on brtrigpartcon from regress_coldesc_role;\ndiff --git a/src/test/regress/expected/jsonb.out b/src/test/regress/expected/jsonb.out\nindex f8a7dac960..64dcaf171c 100644\n--- a/src/test/regress/expected/jsonb.out\n+++ b/src/test/regress/expected/jsonb.out\n@@ -4,7 +4,8 @@ CREATE TABLE testjsonb (\n        j jsonb\n );\n \\set filename :abs_srcdir '/data/jsonb.data'\n-COPY testjsonb FROM :'filename';\n+\\set command '\\\\copy testjsonb FROM ' :'filename';\n+:command\n -- Strings.\n SELECT '\"\"'::jsonb;\t\t\t\t-- OK.\n  jsonb \ndiff --git a/src/test/regress/expected/largeobject.out b/src/test/regress/expected/largeobject.out\nindex 4921dd79ae..d18a3cdd66 100644\n--- a/src/test/regress/expected/largeobject.out\n+++ b/src/test/regress/expected/largeobject.out\n@@ -7,7 +7,7 @@\n -- ensure consistent test output regardless of the default bytea format\n SET bytea_output TO escape;\n -- Test ALTER LARGE OBJECT OWNER\n-CREATE ROLE regress_lo_user;\n+CREATE ROLE regress_lo_user PASSWORD NEON_PASSWORD_PLACEHOLDER;\n SELECT lo_create(42);\n  lo_create \n -----------\n@@ -346,7 +346,8 @@ SELECT lo_unlink(loid) from lotest_stash_values;\n \n TRUNCATE lotest_stash_values;\n \\set filename :abs_srcdir '/data/tenk.data'\n-INSERT INTO lotest_stash_values (loid) SELECT lo_import(:'filename');\n+\\lo_import :filename\n+INSERT INTO lotest_stash_values (loid) VALUES (:LASTOID);\n BEGIN;\n UPDATE lotest_stash_values SET fd=lo_open(loid, CAST(x'20000' | x'40000' AS integer));\n -- verify length of large object\n@@ -410,12 +411,8 @@ SELECT lo_close(fd) FROM lotest_stash_values;\n \n END;\n \\set filename :abs_builddir '/results/lotest.txt'\n-SELECT lo_export(loid, :'filename') FROM lotest_stash_values;\n- lo_export \n------------\n-         1\n-(1 row)\n-\n+SELECT loid FROM lotest_stash_values \\gset\n+\\lo_export :loid, :filename\n \\lo_import :filename\n \\set newloid :LASTOID\n -- just make sure \\lo_export does not barf\ndiff --git a/src/test/regress/expected/lock.out b/src/test/regress/expected/lock.out\nindex ad137d3645..8dac447436 100644\n--- a/src/test/regress/expected/lock.out\n+++ b/src/test/regress/expected/lock.out\n@@ -16,7 +16,7 @@ CREATE VIEW lock_view3 AS SELECT * from lock_view2;\n CREATE VIEW lock_view4 AS SELECT (select a from lock_tbl1a limit 1) from lock_tbl1;\n CREATE VIEW lock_view5 AS SELECT * from lock_tbl1 where a in (select * from lock_tbl1a);\n CREATE VIEW lock_view6 AS SELECT * from (select * from lock_tbl1) sub;\n-CREATE ROLE regress_rol_lock1;\n+CREATE ROLE regress_rol_lock1 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n ALTER ROLE regress_rol_lock1 SET search_path = lock_schema1;\n GRANT USAGE ON SCHEMA lock_schema1 TO regress_rol_lock1;\n -- Try all valid lock options; also try omitting the optional TABLE keyword.\ndiff --git a/src/test/regress/expected/matview.out b/src/test/regress/expected/matview.out\nindex 67a50bde3d..7eeafd2603 100644\n--- a/src/test/regress/expected/matview.out\n+++ b/src/test/regress/expected/matview.out\n@@ -549,7 +549,7 @@ SELECT * FROM mvtest_mv_v;\n DROP TABLE mvtest_v CASCADE;\n NOTICE:  drop cascades to materialized view mvtest_mv_v\n -- make sure running as superuser works when MV owned by another role (bug #11208)\n-CREATE ROLE regress_user_mvtest;\n+CREATE ROLE regress_user_mvtest PASSWORD NEON_PASSWORD_PLACEHOLDER;\n SET ROLE regress_user_mvtest;\n -- this test case also checks for ambiguity in the queries issued by\n -- refresh_by_match_merge(), by choosing column names that intentionally\n@@ -615,7 +615,7 @@ HINT:  Use the REFRESH MATERIALIZED VIEW command.\n ROLLBACK;\n -- INSERT privileges if relation owner is not allowed to insert.\n CREATE SCHEMA matview_schema;\n-CREATE USER regress_matview_user;\n+CREATE USER regress_matview_user PASSWORD NEON_PASSWORD_PLACEHOLDER;\n ALTER DEFAULT PRIVILEGES FOR ROLE regress_matview_user\n   REVOKE INSERT ON TABLES FROM regress_matview_user;\n GRANT ALL ON SCHEMA matview_schema TO public;\ndiff --git a/src/test/regress/expected/merge.out b/src/test/regress/expected/merge.out\nindex bc9a59803f..5b9ddf0626 100644\n--- a/src/test/regress/expected/merge.out\n+++ b/src/test/regress/expected/merge.out\n@@ -1,9 +1,9 @@\n --\n -- MERGE\n --\n-CREATE USER regress_merge_privs;\n-CREATE USER regress_merge_no_privs;\n-CREATE USER regress_merge_none;\n+CREATE USER regress_merge_privs PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_merge_no_privs PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_merge_none PASSWORD NEON_PASSWORD_PLACEHOLDER;\n DROP TABLE IF EXISTS target;\n NOTICE:  table \"target\" does not exist, skipping\n DROP TABLE IF EXISTS source;\ndiff --git a/src/test/regress/expected/misc.out b/src/test/regress/expected/misc.out\nindex 6e816c57f1..6ef45b468e 100644\n--- a/src/test/regress/expected/misc.out\n+++ b/src/test/regress/expected/misc.out\n@@ -59,9 +59,11 @@ DROP TABLE tmp;\n -- copy\n --\n \\set filename :abs_builddir '/results/onek.data'\n-COPY onek TO :'filename';\n+\\set command '\\\\copy onek TO ' :'filename';\n+:command\n CREATE TEMP TABLE onek_copy (LIKE onek);\n-COPY onek_copy FROM :'filename';\n+\\set command '\\\\copy onek_copy FROM ' :'filename';\n+:command\n SELECT * FROM onek EXCEPT ALL SELECT * FROM onek_copy;\n  unique1 | unique2 | two | four | ten | twenty | hundred | thousand | twothousand | fivethous | tenthous | odd | even | stringu1 | stringu2 | string4 \n ---------+---------+-----+------+-----+--------+---------+----------+-------------+-----------+----------+-----+------+----------+----------+---------\n@@ -73,9 +75,11 @@ SELECT * FROM onek_copy EXCEPT ALL SELECT * FROM onek;\n (0 rows)\n \n \\set filename :abs_builddir '/results/stud_emp.data'\n-COPY BINARY stud_emp TO :'filename';\n+\\set command '\\\\COPY BINARY stud_emp TO ' :'filename';\n+:command\n CREATE TEMP TABLE stud_emp_copy (LIKE stud_emp);\n-COPY BINARY stud_emp_copy FROM :'filename';\n+\\set command '\\\\COPY BINARY stud_emp_copy FROM ' :'filename';\n+:command\n SELECT * FROM stud_emp_copy;\n  name  | age |  location  | salary | manager | gpa | percent \n -------+-----+------------+--------+---------+-----+---------\ndiff --git a/src/test/regress/expected/misc_functions.out b/src/test/regress/expected/misc_functions.out\nindex c669948370..47111b1d24 100644\n--- a/src/test/regress/expected/misc_functions.out\n+++ b/src/test/regress/expected/misc_functions.out\n@@ -297,7 +297,7 @@ SELECT pg_log_backend_memory_contexts(pid) FROM pg_stat_activity\n  t\n (1 row)\n \n-CREATE ROLE regress_log_memory;\n+CREATE ROLE regress_log_memory PASSWORD NEON_PASSWORD_PLACEHOLDER;\n SELECT has_function_privilege('regress_log_memory',\n   'pg_log_backend_memory_contexts(integer)', 'EXECUTE'); -- no\n  has_function_privilege \n@@ -483,7 +483,7 @@ select count(*) > 0 from\n --\n -- Test replication slot directory functions\n --\n-CREATE ROLE regress_slot_dir_funcs;\n+CREATE ROLE regress_slot_dir_funcs PASSWORD NEON_PASSWORD_PLACEHOLDER;\n -- Not available by default.\n SELECT has_function_privilege('regress_slot_dir_funcs',\n   'pg_ls_logicalsnapdir()', 'EXECUTE');\ndiff --git a/src/test/regress/expected/object_address.out b/src/test/regress/expected/object_address.out\nindex fc42d418bf..e38f517574 100644\n--- a/src/test/regress/expected/object_address.out\n+++ b/src/test/regress/expected/object_address.out\n@@ -5,7 +5,7 @@\n SET client_min_messages TO 'warning';\n DROP ROLE IF EXISTS regress_addr_user;\n RESET client_min_messages;\n-CREATE USER regress_addr_user;\n+CREATE USER regress_addr_user PASSWORD NEON_PASSWORD_PLACEHOLDER;\n -- Test generic object addressing/identification functions\n CREATE SCHEMA addr_nsp;\n SET search_path TO 'addr_nsp';\ndiff --git a/src/test/regress/expected/password.out b/src/test/regress/expected/password.out\nindex 8475231735..0653946337 100644\n--- a/src/test/regress/expected/password.out\n+++ b/src/test/regress/expected/password.out\n@@ -12,11 +12,11 @@ SET password_encryption = 'md5'; -- ok\n SET password_encryption = 'scram-sha-256'; -- ok\n -- consistency of password entries\n SET password_encryption = 'md5';\n-CREATE ROLE regress_passwd1 PASSWORD 'role_pwd1';\n-CREATE ROLE regress_passwd2 PASSWORD 'role_pwd2';\n+CREATE ROLE regress_passwd1 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_passwd2 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n SET password_encryption = 'scram-sha-256';\n-CREATE ROLE regress_passwd3 PASSWORD 'role_pwd3';\n-CREATE ROLE regress_passwd4 PASSWORD NULL;\n+CREATE ROLE regress_passwd3 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_passwd4 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n -- check list of created entries\n --\n -- The scram secret will look something like:\n@@ -30,10 +30,10 @@ SELECT rolname, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+\n     ORDER BY rolname, rolpassword;\n      rolname     |                rolpassword_masked                 \n -----------------+---------------------------------------------------\n- regress_passwd1 | md5783277baca28003b33453252be4dbb34\n- regress_passwd2 | md54044304ba511dd062133eb5b4b84a2a3\n+ regress_passwd1 | NEON_MD5_PLACEHOLDER:regress_passwd1\n+ regress_passwd2 | NEON_MD5_PLACEHOLDER:regress_passwd2\n  regress_passwd3 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey>\n- regress_passwd4 | \n+ regress_passwd4 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey>\n (4 rows)\n \n -- Rename a role\n@@ -54,24 +54,16 @@ ALTER ROLE regress_passwd2_new RENAME TO regress_passwd2;\n -- passwords.\n SET password_encryption = 'md5';\n -- encrypt with MD5\n-ALTER ROLE regress_passwd2 PASSWORD 'foo';\n--- already encrypted, use as they are\n-ALTER ROLE regress_passwd1 PASSWORD 'md5cd3578025fe2c3d7ed1b9a9b26238b70';\n-ALTER ROLE regress_passwd3 PASSWORD 'SCRAM-SHA-256$4096:VLK4RMaQLCvNtQ==$6YtlR4t69SguDiwFvbVgVZtuz6gpJQQqUMZ7IQJK5yI=:ps75jrHeYU4lXCcXI4O8oIdJ3eO8o2jirjruw9phBTo=';\n+ALTER ROLE regress_passwd2 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n SET password_encryption = 'scram-sha-256';\n -- create SCRAM secret\n-ALTER ROLE  regress_passwd4 PASSWORD 'foo';\n--- already encrypted with MD5, use as it is\n-CREATE ROLE regress_passwd5 PASSWORD 'md5e73a4b11df52a6068f8b39f90be36023';\n--- This looks like a valid SCRAM-SHA-256 secret, but it is not\n--- so it should be hashed with SCRAM-SHA-256.\n-CREATE ROLE regress_passwd6 PASSWORD 'SCRAM-SHA-256$1234';\n--- These may look like valid MD5 secrets, but they are not, so they\n--- should be hashed with SCRAM-SHA-256.\n--- trailing garbage at the end\n-CREATE ROLE regress_passwd7 PASSWORD 'md5012345678901234567890123456789zz';\n--- invalid length\n-CREATE ROLE regress_passwd8 PASSWORD 'md501234567890123456789012345678901zz';\n+ALTER ROLE  regress_passwd4 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+-- Neon does not support encrypted passwords, use unencrypted instead\n+CREATE ROLE regress_passwd5 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+-- Neon does not support encrypted passwords, use unencrypted instead\n+CREATE ROLE regress_passwd6 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_passwd7 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_passwd8 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n -- Changing the SCRAM iteration count\n SET scram_iterations = 1024;\n CREATE ROLE regress_passwd9 PASSWORD 'alterediterationcount';\n@@ -81,11 +73,11 @@ SELECT rolname, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+\n     ORDER BY rolname, rolpassword;\n      rolname     |                rolpassword_masked                 \n -----------------+---------------------------------------------------\n- regress_passwd1 | md5cd3578025fe2c3d7ed1b9a9b26238b70\n- regress_passwd2 | md5dfa155cadd5f4ad57860162f3fab9cdb\n+ regress_passwd1 | NEON_MD5_PLACEHOLDER:regress_passwd1\n+ regress_passwd2 | NEON_MD5_PLACEHOLDER:regress_passwd2\n  regress_passwd3 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey>\n  regress_passwd4 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey>\n- regress_passwd5 | md5e73a4b11df52a6068f8b39f90be36023\n+ regress_passwd5 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey>\n  regress_passwd6 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey>\n  regress_passwd7 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey>\n  regress_passwd8 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey>\n@@ -95,23 +87,20 @@ SELECT rolname, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+\n -- An empty password is not allowed, in any form\n CREATE ROLE regress_passwd_empty PASSWORD '';\n NOTICE:  empty string is not a valid password, clearing password\n+ERROR:  Failed to get encrypted password: User \"regress_passwd_empty\" has no password assigned.\n ALTER ROLE regress_passwd_empty PASSWORD 'md585939a5ce845f1a1b620742e3c659e0a';\n-NOTICE:  empty string is not a valid password, clearing password\n+ERROR:  role \"regress_passwd_empty\" does not exist\n ALTER ROLE regress_passwd_empty PASSWORD 'SCRAM-SHA-256$4096:hpFyHTUsSWcR7O9P$LgZFIt6Oqdo27ZFKbZ2nV+vtnYM995pDh9ca6WSi120=:qVV5NeluNfUPkwm7Vqat25RjSPLkGeoZBQs6wVv+um4=';\n-NOTICE:  empty string is not a valid password, clearing password\n+ERROR:  role \"regress_passwd_empty\" does not exist\n SELECT rolpassword FROM pg_authid WHERE rolname='regress_passwd_empty';\n  rolpassword \n -------------\n- \n-(1 row)\n+(0 rows)\n \n--- Test with invalid stored and server keys.\n---\n--- The first is valid, to act as a control. The others have too long\n--- stored/server keys. They will be re-hashed.\n-CREATE ROLE regress_passwd_sha_len0 PASSWORD 'SCRAM-SHA-256$4096:A6xHKoH/494E941doaPOYg==$Ky+A30sewHIH3VHQLRN9vYsuzlgNyGNKCh37dy96Rqw=:COPdlNiIkrsacU5QoxydEuOH6e/KfiipeETb/bPw8ZI=';\n-CREATE ROLE regress_passwd_sha_len1 PASSWORD 'SCRAM-SHA-256$4096:A6xHKoH/494E941doaPOYg==$Ky+A30sewHIH3VHQLRN9vYsuzlgNyGNKCh37dy96RqwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=:COPdlNiIkrsacU5QoxydEuOH6e/KfiipeETb/bPw8ZI=';\n-CREATE ROLE regress_passwd_sha_len2 PASSWORD 'SCRAM-SHA-256$4096:A6xHKoH/494E941doaPOYg==$Ky+A30sewHIH3VHQLRN9vYsuzlgNyGNKCh37dy96Rqw=:COPdlNiIkrsacU5QoxydEuOH6e/KfiipeETb/bPw8ZIAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=';\n+-- Neon does not support encrypted passwords, use unencrypted instead\n+CREATE ROLE regress_passwd_sha_len0 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_passwd_sha_len1 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_passwd_sha_len2 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n -- Check that the invalid secrets were re-hashed. A re-hashed secret\n -- should not contain the original salt.\n SELECT rolname, rolpassword not like '%A6xHKoH/494E941doaPOYg==%' as is_rolpassword_rehashed\n@@ -120,7 +109,7 @@ SELECT rolname, rolpassword not like '%A6xHKoH/494E941doaPOYg==%' as is_rolpassw\n     ORDER BY rolname;\n          rolname         | is_rolpassword_rehashed \n -------------------------+-------------------------\n- regress_passwd_sha_len0 | f\n+ regress_passwd_sha_len0 | t\n  regress_passwd_sha_len1 | t\n  regress_passwd_sha_len2 | t\n (3 rows)\n@@ -135,6 +124,7 @@ DROP ROLE regress_passwd7;\n DROP ROLE regress_passwd8;\n DROP ROLE regress_passwd9;\n DROP ROLE regress_passwd_empty;\n+ERROR:  role \"regress_passwd_empty\" does not exist\n DROP ROLE regress_passwd_sha_len0;\n DROP ROLE regress_passwd_sha_len1;\n DROP ROLE regress_passwd_sha_len2;\ndiff --git a/src/test/regress/expected/privileges.out b/src/test/regress/expected/privileges.out\nindex 620fbe8c52..0570102357 100644\n--- a/src/test/regress/expected/privileges.out\n+++ b/src/test/regress/expected/privileges.out\n@@ -20,19 +20,19 @@ SELECT lo_unlink(oid) FROM pg_largeobject_metadata WHERE oid >= 1000 AND oid < 3\n \n RESET client_min_messages;\n -- test proper begins here\n-CREATE USER regress_priv_user1;\n-CREATE USER regress_priv_user2;\n-CREATE USER regress_priv_user3;\n-CREATE USER regress_priv_user4;\n-CREATE USER regress_priv_user5;\n-CREATE USER regress_priv_user5;\t-- duplicate\n+CREATE USER regress_priv_user1 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_priv_user2 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_priv_user3 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_priv_user4 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_priv_user5 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_priv_user5 PASSWORD NEON_PASSWORD_PLACEHOLDER;\t-- duplicate\n ERROR:  role \"regress_priv_user5\" already exists\n-CREATE USER regress_priv_user6;\n-CREATE USER regress_priv_user7;\n-CREATE USER regress_priv_user8;\n-CREATE USER regress_priv_user9;\n-CREATE USER regress_priv_user10;\n-CREATE ROLE regress_priv_role;\n+CREATE USER regress_priv_user6 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_priv_user7 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_priv_user8 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_priv_user9 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_priv_user10 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_priv_role PASSWORD NEON_PASSWORD_PLACEHOLDER;\n -- circular ADMIN OPTION grants should be disallowed\n GRANT regress_priv_user1 TO regress_priv_user2 WITH ADMIN OPTION;\n GRANT regress_priv_user1 TO regress_priv_user3 WITH ADMIN OPTION GRANTED BY regress_priv_user2;\n@@ -108,11 +108,11 @@ ERROR:  role \"regress_priv_user5\" cannot be dropped because some objects depend\n DETAIL:  privileges for membership of role regress_priv_user6 in role regress_priv_user1\n DROP ROLE regress_priv_user1, regress_priv_user5; -- ok, despite order\n -- recreate the roles we just dropped\n-CREATE USER regress_priv_user1;\n-CREATE USER regress_priv_user2;\n-CREATE USER regress_priv_user3;\n-CREATE USER regress_priv_user4;\n-CREATE USER regress_priv_user5;\n+CREATE USER regress_priv_user1 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_priv_user2 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_priv_user3 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_priv_user4 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_priv_user5 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n GRANT pg_read_all_data TO regress_priv_user6;\n GRANT pg_write_all_data TO regress_priv_user7;\n GRANT pg_read_all_settings TO regress_priv_user8 WITH ADMIN OPTION;\n@@ -212,8 +212,8 @@ REVOKE pg_read_all_settings FROM regress_priv_user8;\n DROP USER regress_priv_user10;\n DROP USER regress_priv_user9;\n DROP USER regress_priv_user8;\n-CREATE GROUP regress_priv_group1;\n-CREATE GROUP regress_priv_group2 WITH ADMIN regress_priv_user1 USER regress_priv_user2;\n+CREATE GROUP regress_priv_group1 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE GROUP regress_priv_group2 WITH ADMIN regress_priv_user1 PASSWORD NEON_PASSWORD_PLACEHOLDER USER regress_priv_user2;\n ALTER GROUP regress_priv_group1 ADD USER regress_priv_user4;\n GRANT regress_priv_group2 TO regress_priv_user2 GRANTED BY regress_priv_user1;\n SET SESSION AUTHORIZATION regress_priv_user3;\n@@ -246,12 +246,16 @@ GRANT regress_priv_role TO regress_priv_user1 WITH ADMIN OPTION GRANTED BY regre\n ERROR:  permission denied to grant privileges as role \"regress_priv_role\"\n DETAIL:  The grantor must have the ADMIN option on role \"regress_priv_role\".\n GRANT regress_priv_role TO regress_priv_user1 WITH ADMIN OPTION GRANTED BY CURRENT_ROLE;\n+ERROR:  permission denied to grant privileges as role \"neondb_owner\"\n+DETAIL:  The grantor must have the ADMIN option on role \"regress_priv_role\".\n REVOKE ADMIN OPTION FOR regress_priv_role FROM regress_priv_user1 GRANTED BY foo; -- error\n ERROR:  role \"foo\" does not exist\n REVOKE ADMIN OPTION FOR regress_priv_role FROM regress_priv_user1 GRANTED BY regress_priv_user2; -- warning, noop\n WARNING:  role \"regress_priv_user1\" has not been granted membership in role \"regress_priv_role\" by role \"regress_priv_user2\"\n REVOKE ADMIN OPTION FOR regress_priv_role FROM regress_priv_user1 GRANTED BY CURRENT_USER;\n+WARNING:  role \"regress_priv_user1\" has not been granted membership in role \"regress_priv_role\" by role \"neondb_owner\"\n REVOKE regress_priv_role FROM regress_priv_user1 GRANTED BY CURRENT_ROLE;\n+WARNING:  role \"regress_priv_user1\" has not been granted membership in role \"regress_priv_role\" by role \"neondb_owner\"\n DROP ROLE regress_priv_role;\n SET SESSION AUTHORIZATION regress_priv_user1;\n SELECT session_user, current_user;\n@@ -1783,7 +1787,7 @@ SELECT has_table_privilege('regress_priv_user1', 'atest4', 'SELECT WITH GRANT OP\n \n -- security-restricted operations\n \\c -\n-CREATE ROLE regress_sro_user;\n+CREATE ROLE regress_sro_user PASSWORD NEON_PASSWORD_PLACEHOLDER;\n -- Check that index expressions and predicates are run as the table's owner\n -- A dummy index function checking current_user\n CREATE FUNCTION sro_ifun(int) RETURNS int AS $$\n@@ -2675,8 +2679,8 @@ drop cascades to function testns.priv_testagg(integer)\n drop cascades to function testns.priv_testproc(integer)\n -- Change owner of the schema & and rename of new schema owner\n \\c -\n-CREATE ROLE regress_schemauser1 superuser login;\n-CREATE ROLE regress_schemauser2 superuser login;\n+CREATE ROLE regress_schemauser1 superuser login PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_schemauser2 superuser login PASSWORD NEON_PASSWORD_PLACEHOLDER;\n SET SESSION ROLE regress_schemauser1;\n CREATE SCHEMA testns;\n SELECT nspname, rolname FROM pg_namespace, pg_roles WHERE pg_namespace.nspname = 'testns' AND pg_namespace.nspowner = pg_roles.oid;\n@@ -2799,7 +2803,7 @@ DROP USER regress_priv_user7;\n DROP USER regress_priv_user8; -- does not exist\n ERROR:  role \"regress_priv_user8\" does not exist\n -- permissions with LOCK TABLE\n-CREATE USER regress_locktable_user;\n+CREATE USER regress_locktable_user PASSWORD NEON_PASSWORD_PLACEHOLDER;\n CREATE TABLE lock_table (a int);\n -- LOCK TABLE and SELECT permission\n GRANT SELECT ON lock_table TO regress_locktable_user;\n@@ -2881,7 +2885,7 @@ DROP USER regress_locktable_user;\n -- pg_backend_memory_contexts.\n -- switch to superuser\n \\c -\n-CREATE ROLE regress_readallstats;\n+CREATE ROLE regress_readallstats PASSWORD NEON_PASSWORD_PLACEHOLDER;\n SELECT has_table_privilege('regress_readallstats','pg_backend_memory_contexts','SELECT'); -- no\n  has_table_privilege \n ---------------------\n@@ -2925,10 +2929,10 @@ RESET ROLE;\n -- clean up\n DROP ROLE regress_readallstats;\n -- test role grantor machinery\n-CREATE ROLE regress_group;\n-CREATE ROLE regress_group_direct_manager;\n-CREATE ROLE regress_group_indirect_manager;\n-CREATE ROLE regress_group_member;\n+CREATE ROLE regress_group PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_group_direct_manager PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_group_indirect_manager PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_group_member PASSWORD NEON_PASSWORD_PLACEHOLDER;\n GRANT regress_group TO regress_group_direct_manager WITH INHERIT FALSE, ADMIN TRUE;\n GRANT regress_group_direct_manager TO regress_group_indirect_manager;\n SET SESSION AUTHORIZATION regress_group_direct_manager;\n@@ -2957,9 +2961,9 @@ DROP ROLE regress_group_direct_manager;\n DROP ROLE regress_group_indirect_manager;\n DROP ROLE regress_group_member;\n -- test SET and INHERIT options with object ownership changes\n-CREATE ROLE regress_roleoption_protagonist;\n-CREATE ROLE regress_roleoption_donor;\n-CREATE ROLE regress_roleoption_recipient;\n+CREATE ROLE regress_roleoption_protagonist PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_roleoption_donor PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_roleoption_recipient PASSWORD NEON_PASSWORD_PLACEHOLDER;\n CREATE SCHEMA regress_roleoption;\n GRANT CREATE, USAGE ON SCHEMA regress_roleoption TO PUBLIC;\n GRANT regress_roleoption_donor TO regress_roleoption_protagonist WITH INHERIT TRUE, SET FALSE;\ndiff --git a/src/test/regress/expected/psql.out b/src/test/regress/expected/psql.out\nindex 7cd0c27cca..d7a124ed68 100644\n--- a/src/test/regress/expected/psql.out\n+++ b/src/test/regress/expected/psql.out\n@@ -2857,7 +2857,7 @@ Type                | func\n -- check conditional am display\n \\pset expanded off\n CREATE SCHEMA tableam_display;\n-CREATE ROLE regress_display_role;\n+CREATE ROLE regress_display_role PASSWORD NEON_PASSWORD_PLACEHOLDER;\n ALTER SCHEMA tableam_display OWNER TO regress_display_role;\n SET search_path TO tableam_display;\n CREATE ACCESS METHOD heap_psql TYPE TABLE HANDLER heap_tableam_handler;\n@@ -4808,7 +4808,7 @@ last error message: division by zero\n last error code: 22012\n \\unset FETCH_COUNT\n create schema testpart;\n-create role regress_partitioning_role;\n+create role regress_partitioning_role PASSWORD NEON_PASSWORD_PLACEHOLDER;\n alter schema testpart owner to regress_partitioning_role;\n set role to regress_partitioning_role;\n -- run test inside own schema and hide other partitions\n@@ -5260,7 +5260,7 @@ reset work_mem;\n \n -- check \\df+\n -- we have to use functions with a predictable owner name, so make a role\n-create role regress_psql_user superuser;\n+create role regress_psql_user superuser PASSWORD NEON_PASSWORD_PLACEHOLDER;\n begin;\n set session authorization regress_psql_user;\n create function psql_df_internal (float8)\n@@ -5544,11 +5544,14 @@ CREATE TEMPORARY TABLE reload_output(\n   line text\n );\n SELECT 1 AS a \\g :g_out_file\n-COPY reload_output(line) FROM :'g_out_file';\n+\\set command '\\\\COPY reload_output(line) FROM ' :'g_out_file';\n+:command\n SELECT 2 AS b\\; SELECT 3 AS c\\; SELECT 4 AS d \\g :g_out_file\n-COPY reload_output(line) FROM :'g_out_file';\n+\\set command '\\\\COPY reload_output(line) FROM ' :'g_out_file';\n+:command\n COPY (SELECT 'foo') TO STDOUT \\; COPY (SELECT 'bar') TO STDOUT \\g :g_out_file\n-COPY reload_output(line) FROM :'g_out_file';\n+\\set command '\\\\COPY reload_output(line) FROM ' :'g_out_file';\n+:command\n SELECT line FROM reload_output ORDER BY lineno;\n   line   \n ---------\n@@ -5587,13 +5590,15 @@ SELECT 1 AS a\\; SELECT 2 AS b\\; SELECT 3 AS c;\n -- COPY TO file\n -- The data goes to :g_out_file and the status to :o_out_file\n \\set QUIET false\n-COPY (SELECT unique1 FROM onek ORDER BY unique1 LIMIT 10) TO :'g_out_file';\n+\\set command '\\\\COPY (SELECT unique1 FROM onek ORDER BY unique1 LIMIT 10) TO ' :'g_out_file';\n+:command\n -- DML command status\n UPDATE onek SET unique1 = unique1 WHERE false;\n \\set QUIET true\n \\o\n -- Check the contents of the files generated.\n-COPY reload_output(line) FROM :'g_out_file';\n+\\set command '\\\\COPY reload_output(line) FROM ' :'g_out_file';\n+:command\n SELECT line FROM reload_output ORDER BY lineno;\n  line \n ------\n@@ -5610,7 +5615,8 @@ SELECT line FROM reload_output ORDER BY lineno;\n (10 rows)\n \n TRUNCATE TABLE reload_output;\n-COPY reload_output(line) FROM :'o_out_file';\n+\\set command '\\\\COPY reload_output(line) FROM ' :'o_out_file';\n+:command\n SELECT line FROM reload_output ORDER BY lineno;\n    line   \n ----------\n@@ -5647,7 +5653,8 @@ COPY (SELECT 'foo1') TO STDOUT \\; COPY (SELECT 'bar1') TO STDOUT;\n COPY (SELECT 'foo2') TO STDOUT \\; COPY (SELECT 'bar2') TO STDOUT \\g :g_out_file\n \\o\n -- Check the contents of the files generated.\n-COPY reload_output(line) FROM :'g_out_file';\n+\\set command '\\\\COPY reload_output(line) FROM ' :'g_out_file';\n+:command\n SELECT line FROM reload_output ORDER BY lineno;\n  line \n ------\n@@ -5656,7 +5663,8 @@ SELECT line FROM reload_output ORDER BY lineno;\n (2 rows)\n \n TRUNCATE TABLE reload_output;\n-COPY reload_output(line) FROM :'o_out_file';\n+\\set command '\\\\COPY reload_output(line) FROM ' :'o_out_file';\n+:command\n SELECT line FROM reload_output ORDER BY lineno;\n  line \n ------\n@@ -6619,10 +6627,10 @@ cross-database references are not implemented: \"no.such.database\".\"no.such.schem\n \\dX \"no.such.database\".\"no.such.schema\".\"no.such.extended.statistics\"\n cross-database references are not implemented: \"no.such.database\".\"no.such.schema\".\"no.such.extended.statistics\"\n -- check \\drg and \\du\n-CREATE ROLE regress_du_role0;\n-CREATE ROLE regress_du_role1;\n-CREATE ROLE regress_du_role2;\n-CREATE ROLE regress_du_admin;\n+CREATE ROLE regress_du_role0 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_du_role1 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_du_role2 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_du_admin PASSWORD NEON_PASSWORD_PLACEHOLDER;\n GRANT regress_du_role0 TO regress_du_admin WITH ADMIN TRUE;\n GRANT regress_du_role1 TO regress_du_admin WITH ADMIN TRUE;\n GRANT regress_du_role2 TO regress_du_admin WITH ADMIN TRUE;\ndiff --git a/src/test/regress/expected/publication.out b/src/test/regress/expected/publication.out\nindex 69dc6cfd85..68390cc18a 100644\n--- a/src/test/regress/expected/publication.out\n+++ b/src/test/regress/expected/publication.out\n@@ -1,9 +1,9 @@\n --\n -- PUBLICATION\n --\n-CREATE ROLE regress_publication_user LOGIN SUPERUSER;\n-CREATE ROLE regress_publication_user2;\n-CREATE ROLE regress_publication_user_dummy LOGIN NOSUPERUSER;\n+CREATE ROLE regress_publication_user LOGIN SUPERUSER PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_publication_user2 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_publication_user_dummy LOGIN NOSUPERUSER PASSWORD NEON_PASSWORD_PLACEHOLDER;\n SET SESSION AUTHORIZATION 'regress_publication_user';\n -- suppress warning that depends on wal_level\n SET client_min_messages = 'ERROR';\n@@ -1211,7 +1211,7 @@ ALTER PUBLICATION testpub2 ADD TABLE testpub_tbl1;  -- ok\n DROP PUBLICATION testpub2;\n DROP PUBLICATION testpub3;\n SET ROLE regress_publication_user;\n-CREATE ROLE regress_publication_user3;\n+CREATE ROLE regress_publication_user3 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n GRANT regress_publication_user2 TO regress_publication_user3;\n SET client_min_messages = 'ERROR';\n CREATE PUBLICATION testpub4 FOR TABLES IN SCHEMA pub_test;\ndiff --git a/src/test/regress/expected/regproc.out b/src/test/regress/expected/regproc.out\nindex a9420850b8..bd3b5f312d 100644\n--- a/src/test/regress/expected/regproc.out\n+++ b/src/test/regress/expected/regproc.out\n@@ -2,7 +2,7 @@\n -- regproc\n --\n /* If objects exist, return oids */\n-CREATE ROLE regress_regrole_test;\n+CREATE ROLE regress_regrole_test PASSWORD NEON_PASSWORD_PLACEHOLDER;\n -- without schemaname\n SELECT regoper('||/');\n  regoper \ndiff --git a/src/test/regress/expected/roleattributes.out b/src/test/regress/expected/roleattributes.out\nindex 5e6969b173..2c4d52237f 100644\n--- a/src/test/regress/expected/roleattributes.out\n+++ b/src/test/regress/expected/roleattributes.out\n@@ -1,233 +1,233 @@\n -- default for superuser is false\n-CREATE ROLE regress_test_def_superuser;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_superuser';\n-          rolname           | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil \n-----------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+---------------\n- regress_test_def_superuser | f        | t          | f             | f           | f           | f              | f            |           -1 |             | \n+CREATE ROLE regress_test_def_superuser PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_superuser';\n+          rolname           | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit |                  regexp_replace                   | rolvaliduntil \n+----------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+---------------\n+ regress_test_def_superuser | f        | t          | f             | f           | f           | f              | f            |           -1 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey> | \n (1 row)\n \n-CREATE ROLE regress_test_superuser WITH SUPERUSER;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_superuser';\n-        rolname         | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil \n-------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+---------------\n- regress_test_superuser | t        | t          | f             | f           | f           | f              | f            |           -1 |             | \n+CREATE ROLE regress_test_superuser WITH SUPERUSER PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_superuser';\n+        rolname         | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit |                  regexp_replace                   | rolvaliduntil \n+------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+---------------\n+ regress_test_superuser | t        | t          | f             | f           | f           | f              | f            |           -1 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey> | \n (1 row)\n \n ALTER ROLE regress_test_superuser WITH NOSUPERUSER;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_superuser';\n-        rolname         | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil \n-------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+---------------\n- regress_test_superuser | f        | t          | f             | f           | f           | f              | f            |           -1 |             | \n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_superuser';\n+        rolname         | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit |                  regexp_replace                   | rolvaliduntil \n+------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+---------------\n+ regress_test_superuser | f        | t          | f             | f           | f           | f              | f            |           -1 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey> | \n (1 row)\n \n ALTER ROLE regress_test_superuser WITH SUPERUSER;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_superuser';\n-        rolname         | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil \n-------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+---------------\n- regress_test_superuser | t        | t          | f             | f           | f           | f              | f            |           -1 |             | \n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_superuser';\n+        rolname         | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit |                  regexp_replace                   | rolvaliduntil \n+------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+---------------\n+ regress_test_superuser | t        | t          | f             | f           | f           | f              | f            |           -1 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey> | \n (1 row)\n \n -- default for inherit is true\n-CREATE ROLE regress_test_def_inherit;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_inherit';\n-         rolname          | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil \n---------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+---------------\n- regress_test_def_inherit | f        | t          | f             | f           | f           | f              | f            |           -1 |             | \n+CREATE ROLE regress_test_def_inherit PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_inherit';\n+         rolname          | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit |                  regexp_replace                   | rolvaliduntil \n+--------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+---------------\n+ regress_test_def_inherit | f        | t          | f             | f           | f           | f              | f            |           -1 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey> | \n (1 row)\n \n-CREATE ROLE regress_test_inherit WITH NOINHERIT;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_inherit';\n-       rolname        | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil \n-----------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+---------------\n- regress_test_inherit | f        | f          | f             | f           | f           | f              | f            |           -1 |             | \n+CREATE ROLE regress_test_inherit WITH NOINHERIT PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_inherit';\n+       rolname        | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit |                  regexp_replace                   | rolvaliduntil \n+----------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+---------------\n+ regress_test_inherit | f        | f          | f             | f           | f           | f              | f            |           -1 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey> | \n (1 row)\n \n ALTER ROLE regress_test_inherit WITH INHERIT;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_inherit';\n-       rolname        | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil \n-----------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+---------------\n- regress_test_inherit | f        | t          | f             | f           | f           | f              | f            |           -1 |             | \n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_inherit';\n+       rolname        | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit |                  regexp_replace                   | rolvaliduntil \n+----------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+---------------\n+ regress_test_inherit | f        | t          | f             | f           | f           | f              | f            |           -1 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey> | \n (1 row)\n \n ALTER ROLE regress_test_inherit WITH NOINHERIT;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_inherit';\n-       rolname        | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil \n-----------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+---------------\n- regress_test_inherit | f        | f          | f             | f           | f           | f              | f            |           -1 |             | \n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_inherit';\n+       rolname        | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit |                  regexp_replace                   | rolvaliduntil \n+----------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+---------------\n+ regress_test_inherit | f        | f          | f             | f           | f           | f              | f            |           -1 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey> | \n (1 row)\n \n -- default for create role is false\n-CREATE ROLE regress_test_def_createrole;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_createrole';\n-           rolname           | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil \n------------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+---------------\n- regress_test_def_createrole | f        | t          | f             | f           | f           | f              | f            |           -1 |             | \n+CREATE ROLE regress_test_def_createrole PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_createrole';\n+           rolname           | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit |                  regexp_replace                   | rolvaliduntil \n+-----------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+---------------\n+ regress_test_def_createrole | f        | t          | f             | f           | f           | f              | f            |           -1 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey> | \n (1 row)\n \n-CREATE ROLE regress_test_createrole WITH CREATEROLE;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_createrole';\n-         rolname         | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil \n--------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+---------------\n- regress_test_createrole | f        | t          | t             | f           | f           | f              | f            |           -1 |             | \n+CREATE ROLE regress_test_createrole WITH CREATEROLE PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_createrole';\n+         rolname         | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit |                  regexp_replace                   | rolvaliduntil \n+-------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+---------------\n+ regress_test_createrole | f        | t          | t             | f           | f           | f              | f            |           -1 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey> | \n (1 row)\n \n ALTER ROLE regress_test_createrole WITH NOCREATEROLE;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_createrole';\n-         rolname         | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil \n--------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+---------------\n- regress_test_createrole | f        | t          | f             | f           | f           | f              | f            |           -1 |             | \n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_createrole';\n+         rolname         | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit |                  regexp_replace                   | rolvaliduntil \n+-------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+---------------\n+ regress_test_createrole | f        | t          | f             | f           | f           | f              | f            |           -1 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey> | \n (1 row)\n \n ALTER ROLE regress_test_createrole WITH CREATEROLE;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_createrole';\n-         rolname         | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil \n--------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+---------------\n- regress_test_createrole | f        | t          | t             | f           | f           | f              | f            |           -1 |             | \n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_createrole';\n+         rolname         | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit |                  regexp_replace                   | rolvaliduntil \n+-------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+---------------\n+ regress_test_createrole | f        | t          | t             | f           | f           | f              | f            |           -1 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey> | \n (1 row)\n \n -- default for create database is false\n-CREATE ROLE regress_test_def_createdb;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_createdb';\n-          rolname          | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil \n----------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+---------------\n- regress_test_def_createdb | f        | t          | f             | f           | f           | f              | f            |           -1 |             | \n+CREATE ROLE regress_test_def_createdb PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_createdb';\n+          rolname          | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit |                  regexp_replace                   | rolvaliduntil \n+---------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+---------------\n+ regress_test_def_createdb | f        | t          | f             | f           | f           | f              | f            |           -1 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey> | \n (1 row)\n \n-CREATE ROLE regress_test_createdb WITH CREATEDB;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_createdb';\n-        rolname        | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil \n------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+---------------\n- regress_test_createdb | f        | t          | f             | t           | f           | f              | f            |           -1 |             | \n+CREATE ROLE regress_test_createdb WITH CREATEDB PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_createdb';\n+        rolname        | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit |                  regexp_replace                   | rolvaliduntil \n+-----------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+---------------\n+ regress_test_createdb | f        | t          | f             | t           | f           | f              | f            |           -1 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey> | \n (1 row)\n \n ALTER ROLE regress_test_createdb WITH NOCREATEDB;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_createdb';\n-        rolname        | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil \n------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+---------------\n- regress_test_createdb | f        | t          | f             | f           | f           | f              | f            |           -1 |             | \n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_createdb';\n+        rolname        | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit |                  regexp_replace                   | rolvaliduntil \n+-----------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+---------------\n+ regress_test_createdb | f        | t          | f             | f           | f           | f              | f            |           -1 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey> | \n (1 row)\n \n ALTER ROLE regress_test_createdb WITH CREATEDB;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_createdb';\n-        rolname        | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil \n------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+---------------\n- regress_test_createdb | f        | t          | f             | t           | f           | f              | f            |           -1 |             | \n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_createdb';\n+        rolname        | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit |                  regexp_replace                   | rolvaliduntil \n+-----------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+---------------\n+ regress_test_createdb | f        | t          | f             | t           | f           | f              | f            |           -1 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey> | \n (1 row)\n \n -- default for can login is false for role\n-CREATE ROLE regress_test_def_role_canlogin;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_role_canlogin';\n-            rolname             | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil \n---------------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+---------------\n- regress_test_def_role_canlogin | f        | t          | f             | f           | f           | f              | f            |           -1 |             | \n+CREATE ROLE regress_test_def_role_canlogin PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_role_canlogin';\n+            rolname             | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit |                  regexp_replace                   | rolvaliduntil \n+--------------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+---------------\n+ regress_test_def_role_canlogin | f        | t          | f             | f           | f           | f              | f            |           -1 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey> | \n (1 row)\n \n-CREATE ROLE regress_test_role_canlogin WITH LOGIN;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_role_canlogin';\n-          rolname           | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil \n-----------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+---------------\n- regress_test_role_canlogin | f        | t          | f             | f           | t           | f              | f            |           -1 |             | \n+CREATE ROLE regress_test_role_canlogin WITH LOGIN PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_role_canlogin';\n+          rolname           | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit |                  regexp_replace                   | rolvaliduntil \n+----------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+---------------\n+ regress_test_role_canlogin | f        | t          | f             | f           | t           | f              | f            |           -1 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey> | \n (1 row)\n \n ALTER ROLE regress_test_role_canlogin WITH NOLOGIN;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_role_canlogin';\n-          rolname           | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil \n-----------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+---------------\n- regress_test_role_canlogin | f        | t          | f             | f           | f           | f              | f            |           -1 |             | \n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_role_canlogin';\n+          rolname           | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit |                  regexp_replace                   | rolvaliduntil \n+----------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+---------------\n+ regress_test_role_canlogin | f        | t          | f             | f           | f           | f              | f            |           -1 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey> | \n (1 row)\n \n ALTER ROLE regress_test_role_canlogin WITH LOGIN;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_role_canlogin';\n-          rolname           | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil \n-----------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+---------------\n- regress_test_role_canlogin | f        | t          | f             | f           | t           | f              | f            |           -1 |             | \n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_role_canlogin';\n+          rolname           | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit |                  regexp_replace                   | rolvaliduntil \n+----------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+---------------\n+ regress_test_role_canlogin | f        | t          | f             | f           | t           | f              | f            |           -1 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey> | \n (1 row)\n \n -- default for can login is true for user\n-CREATE USER regress_test_def_user_canlogin;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_user_canlogin';\n-            rolname             | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil \n---------------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+---------------\n- regress_test_def_user_canlogin | f        | t          | f             | f           | t           | f              | f            |           -1 |             | \n+CREATE USER regress_test_def_user_canlogin PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_user_canlogin';\n+            rolname             | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit |                  regexp_replace                   | rolvaliduntil \n+--------------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+---------------\n+ regress_test_def_user_canlogin | f        | t          | f             | f           | t           | f              | f            |           -1 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey> | \n (1 row)\n \n-CREATE USER regress_test_user_canlogin WITH NOLOGIN;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_user_canlogin';\n-          rolname           | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil \n-----------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+---------------\n- regress_test_user_canlogin | f        | t          | f             | f           | f           | f              | f            |           -1 |             | \n+CREATE USER regress_test_user_canlogin WITH NOLOGIN PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_user_canlogin';\n+          rolname           | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit |                  regexp_replace                   | rolvaliduntil \n+----------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+---------------\n+ regress_test_user_canlogin | f        | t          | f             | f           | f           | f              | f            |           -1 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey> | \n (1 row)\n \n ALTER USER regress_test_user_canlogin WITH LOGIN;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_user_canlogin';\n-          rolname           | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil \n-----------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+---------------\n- regress_test_user_canlogin | f        | t          | f             | f           | t           | f              | f            |           -1 |             | \n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_user_canlogin';\n+          rolname           | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit |                  regexp_replace                   | rolvaliduntil \n+----------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+---------------\n+ regress_test_user_canlogin | f        | t          | f             | f           | t           | f              | f            |           -1 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey> | \n (1 row)\n \n ALTER USER regress_test_user_canlogin WITH NOLOGIN;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_user_canlogin';\n-          rolname           | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil \n-----------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+---------------\n- regress_test_user_canlogin | f        | t          | f             | f           | f           | f              | f            |           -1 |             | \n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_user_canlogin';\n+          rolname           | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit |                  regexp_replace                   | rolvaliduntil \n+----------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+---------------\n+ regress_test_user_canlogin | f        | t          | f             | f           | f           | f              | f            |           -1 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey> | \n (1 row)\n \n -- default for replication is false\n-CREATE ROLE regress_test_def_replication;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_replication';\n-           rolname            | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil \n-------------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+---------------\n- regress_test_def_replication | f        | t          | f             | f           | f           | f              | f            |           -1 |             | \n+CREATE ROLE regress_test_def_replication PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_replication';\n+           rolname            | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit |                  regexp_replace                   | rolvaliduntil \n+------------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+---------------\n+ regress_test_def_replication | f        | t          | f             | f           | f           | f              | f            |           -1 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey> | \n (1 row)\n \n-CREATE ROLE regress_test_replication WITH REPLICATION;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_replication';\n-         rolname          | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil \n---------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+---------------\n- regress_test_replication | f        | t          | f             | f           | f           | t              | f            |           -1 |             | \n+CREATE ROLE regress_test_replication WITH REPLICATION PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_replication';\n+         rolname          | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit |                  regexp_replace                   | rolvaliduntil \n+--------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+---------------\n+ regress_test_replication | f        | t          | f             | f           | f           | t              | f            |           -1 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey> | \n (1 row)\n \n ALTER ROLE regress_test_replication WITH NOREPLICATION;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_replication';\n-         rolname          | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil \n---------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+---------------\n- regress_test_replication | f        | t          | f             | f           | f           | f              | f            |           -1 |             | \n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_replication';\n+         rolname          | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit |                  regexp_replace                   | rolvaliduntil \n+--------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+---------------\n+ regress_test_replication | f        | t          | f             | f           | f           | f              | f            |           -1 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey> | \n (1 row)\n \n ALTER ROLE regress_test_replication WITH REPLICATION;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_replication';\n-         rolname          | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil \n---------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+---------------\n- regress_test_replication | f        | t          | f             | f           | f           | t              | f            |           -1 |             | \n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_replication';\n+         rolname          | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit |                  regexp_replace                   | rolvaliduntil \n+--------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+---------------\n+ regress_test_replication | f        | t          | f             | f           | f           | t              | f            |           -1 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey> | \n (1 row)\n \n -- default for bypassrls is false\n-CREATE ROLE regress_test_def_bypassrls;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_bypassrls';\n-          rolname           | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil \n-----------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+---------------\n- regress_test_def_bypassrls | f        | t          | f             | f           | f           | f              | f            |           -1 |             | \n+CREATE ROLE regress_test_def_bypassrls PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_bypassrls';\n+          rolname           | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit |                  regexp_replace                   | rolvaliduntil \n+----------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+---------------\n+ regress_test_def_bypassrls | f        | t          | f             | f           | f           | f              | f            |           -1 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey> | \n (1 row)\n \n-CREATE ROLE regress_test_bypassrls WITH BYPASSRLS;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_bypassrls';\n-        rolname         | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil \n-------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+---------------\n- regress_test_bypassrls | f        | t          | f             | f           | f           | f              | t            |           -1 |             | \n+CREATE ROLE regress_test_bypassrls WITH BYPASSRLS PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_bypassrls';\n+        rolname         | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit |                  regexp_replace                   | rolvaliduntil \n+------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+---------------\n+ regress_test_bypassrls | f        | t          | f             | f           | f           | f              | t            |           -1 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey> | \n (1 row)\n \n ALTER ROLE regress_test_bypassrls WITH NOBYPASSRLS;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_bypassrls';\n-        rolname         | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil \n-------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+---------------\n- regress_test_bypassrls | f        | t          | f             | f           | f           | f              | f            |           -1 |             | \n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_bypassrls';\n+        rolname         | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit |                  regexp_replace                   | rolvaliduntil \n+------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+---------------\n+ regress_test_bypassrls | f        | t          | f             | f           | f           | f              | f            |           -1 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey> | \n (1 row)\n \n ALTER ROLE regress_test_bypassrls WITH BYPASSRLS;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_bypassrls';\n-        rolname         | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil \n-------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+---------------\n- regress_test_bypassrls | f        | t          | f             | f           | f           | f              | t            |           -1 |             | \n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_bypassrls';\n+        rolname         | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit |                  regexp_replace                   | rolvaliduntil \n+------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+---------------\n+ regress_test_bypassrls | f        | t          | f             | f           | f           | f              | t            |           -1 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey> | \n (1 row)\n \n -- clean up roles\ndiff --git a/src/test/regress/expected/rowsecurity.out b/src/test/regress/expected/rowsecurity.out\nindex 218c0c2863..f7af0cfb12 100644\n--- a/src/test/regress/expected/rowsecurity.out\n+++ b/src/test/regress/expected/rowsecurity.out\n@@ -14,13 +14,13 @@ DROP ROLE IF EXISTS regress_rls_group2;\n DROP SCHEMA IF EXISTS regress_rls_schema CASCADE;\n RESET client_min_messages;\n -- initial setup\n-CREATE USER regress_rls_alice NOLOGIN;\n-CREATE USER regress_rls_bob NOLOGIN;\n-CREATE USER regress_rls_carol NOLOGIN;\n-CREATE USER regress_rls_dave NOLOGIN;\n-CREATE USER regress_rls_exempt_user BYPASSRLS NOLOGIN;\n-CREATE ROLE regress_rls_group1 NOLOGIN;\n-CREATE ROLE regress_rls_group2 NOLOGIN;\n+CREATE USER regress_rls_alice NOLOGIN PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_rls_bob NOLOGIN PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_rls_carol NOLOGIN PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_rls_dave NOLOGIN PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_rls_exempt_user BYPASSRLS NOLOGIN PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_rls_group1 NOLOGIN PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_rls_group2 NOLOGIN PASSWORD NEON_PASSWORD_PLACEHOLDER;\n GRANT regress_rls_group1 TO regress_rls_bob;\n GRANT regress_rls_group2 TO regress_rls_carol;\n CREATE SCHEMA regress_rls_schema;\n@@ -4352,8 +4352,8 @@ SELECT count(*) = 0 FROM pg_depend\n \n -- DROP OWNED BY testing\n RESET SESSION AUTHORIZATION;\n-CREATE ROLE regress_rls_dob_role1;\n-CREATE ROLE regress_rls_dob_role2;\n+CREATE ROLE regress_rls_dob_role1 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_rls_dob_role2 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n CREATE TABLE dob_t1 (c1 int);\n CREATE TABLE dob_t2 (c1 int) PARTITION BY RANGE (c1);\n CREATE POLICY p1 ON dob_t1 TO regress_rls_dob_role1 USING (true);\ndiff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out\nindex 09a255649b..15895f0c53 100644\n--- a/src/test/regress/expected/rules.out\n+++ b/src/test/regress/expected/rules.out\n@@ -3708,7 +3708,7 @@ DROP TABLE ruletest2;\n -- Test non-SELECT rule on security invoker view.\n -- Should use view owner's permissions.\n --\n-CREATE USER regress_rule_user1;\n+CREATE USER regress_rule_user1 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n CREATE TABLE ruletest_t1 (x int);\n CREATE TABLE ruletest_t2 (x int);\n CREATE VIEW ruletest_v1 WITH (security_invoker=true) AS\ndiff --git a/src/test/regress/expected/security_label.out b/src/test/regress/expected/security_label.out\nindex a8e01a6220..83543b250a 100644\n--- a/src/test/regress/expected/security_label.out\n+++ b/src/test/regress/expected/security_label.out\n@@ -6,8 +6,8 @@ SET client_min_messages TO 'warning';\n DROP ROLE IF EXISTS regress_seclabel_user1;\n DROP ROLE IF EXISTS regress_seclabel_user2;\n RESET client_min_messages;\n-CREATE USER regress_seclabel_user1 WITH CREATEROLE;\n-CREATE USER regress_seclabel_user2;\n+CREATE USER regress_seclabel_user1 WITH CREATEROLE PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_seclabel_user2 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n CREATE TABLE seclabel_tbl1 (a int, b text);\n CREATE TABLE seclabel_tbl2 (x int, y text);\n CREATE VIEW seclabel_view1 AS SELECT * FROM seclabel_tbl2;\ndiff --git a/src/test/regress/expected/select_into.out b/src/test/regress/expected/select_into.out\nindex b79fe9a1c0..e29fab88ab 100644\n--- a/src/test/regress/expected/select_into.out\n+++ b/src/test/regress/expected/select_into.out\n@@ -15,7 +15,7 @@ DROP TABLE sitmp1;\n -- SELECT INTO and INSERT permission, if owner is not allowed to insert.\n --\n CREATE SCHEMA selinto_schema;\n-CREATE USER regress_selinto_user;\n+CREATE USER regress_selinto_user PASSWORD NEON_PASSWORD_PLACEHOLDER;\n ALTER DEFAULT PRIVILEGES FOR ROLE regress_selinto_user\n \t  REVOKE INSERT ON TABLES FROM regress_selinto_user;\n GRANT ALL ON SCHEMA selinto_schema TO public;\ndiff --git a/src/test/regress/expected/select_parallel.out b/src/test/regress/expected/select_parallel.out\nindex afc6ab08c2..dfcd891af3 100644\n--- a/src/test/regress/expected/select_parallel.out\n+++ b/src/test/regress/expected/select_parallel.out\n@@ -1220,7 +1220,7 @@ SELECT 1 FROM tenk1_vw_sec\n \n rollback;\n -- test that function option SET ROLE works in parallel workers.\n-create role regress_parallel_worker;\n+create role regress_parallel_worker PASSWORD NEON_PASSWORD_PLACEHOLDER;\n create function set_and_report_role() returns text as\n   $$ select current_setting('role') $$ language sql parallel safe\n   set role = regress_parallel_worker;\ndiff --git a/src/test/regress/expected/select_views.out b/src/test/regress/expected/select_views.out\nindex 1aeed8452b..7d9427d070 100644\n--- a/src/test/regress/expected/select_views.out\n+++ b/src/test/regress/expected/select_views.out\n@@ -1250,7 +1250,7 @@ SELECT * FROM toyemp WHERE name = 'sharon';\n --\n -- Test for Leaky view scenario\n --\n-CREATE ROLE regress_alice;\n+CREATE ROLE regress_alice PASSWORD NEON_PASSWORD_PLACEHOLDER;\n CREATE FUNCTION f_leak (text)\n        RETURNS bool LANGUAGE 'plpgsql' COST 0.0000001\n        AS 'BEGIN RAISE NOTICE ''f_leak => %'', $1; RETURN true; END';\ndiff --git a/src/test/regress/expected/sequence.out b/src/test/regress/expected/sequence.out\nindex f02f020542..c9e0fda350 100644\n--- a/src/test/regress/expected/sequence.out\n+++ b/src/test/regress/expected/sequence.out\n@@ -22,7 +22,7 @@ CREATE SEQUENCE sequence_testx OWNED BY pg_class_oid_index.oid;  -- not a table\n ERROR:  sequence cannot be owned by relation \"pg_class_oid_index\"\n DETAIL:  This operation is not supported for indexes.\n CREATE SEQUENCE sequence_testx OWNED BY pg_class.relname;  -- not same schema\n-ERROR:  sequence must be in same schema as table it is linked to\n+ERROR:  sequence must have same owner as table it is linked to\n CREATE TABLE sequence_test_table (a int);\n CREATE SEQUENCE sequence_testx OWNED BY sequence_test_table.b;  -- wrong column\n ERROR:  column \"b\" of relation \"sequence_test_table\" does not exist\n@@ -639,7 +639,7 @@ SELECT setval('sequence_test2', 1);  -- error\n ERROR:  cannot execute setval() in a read-only transaction\n ROLLBACK;\n -- privileges tests\n-CREATE USER regress_seq_user;\n+CREATE USER regress_seq_user PASSWORD NEON_PASSWORD_PLACEHOLDER;\n -- nextval\n BEGIN;\n SET LOCAL SESSION AUTHORIZATION regress_seq_user;\ndiff --git a/src/test/regress/expected/stats.out b/src/test/regress/expected/stats.out\nindex 94187e59cf..72346e2c71 100644\n--- a/src/test/regress/expected/stats.out\n+++ b/src/test/regress/expected/stats.out\n@@ -1283,37 +1283,6 @@ SELECT current_setting('fsync') = 'off'\n  t\n (1 row)\n \n--- Change the tablespace so that the table is rewritten directly, then SELECT\n--- from it to cause it to be read back into shared buffers.\n-SELECT sum(reads) AS io_sum_shared_before_reads\n-  FROM pg_stat_io WHERE context = 'normal' AND object = 'relation' \\gset\n--- Do this in a transaction to prevent spurious failures due to concurrent accesses to our newly\n--- rewritten table, e.g. by autovacuum.\n-BEGIN;\n-ALTER TABLE test_io_shared SET TABLESPACE regress_tblspace;\n--- SELECT from the table so that the data is read into shared buffers and\n--- context 'normal', object 'relation' reads are counted.\n-SELECT COUNT(*) FROM test_io_shared;\n- count \n--------\n-   100\n-(1 row)\n-\n-COMMIT;\n-SELECT pg_stat_force_next_flush();\n- pg_stat_force_next_flush \n---------------------------\n- \n-(1 row)\n-\n-SELECT sum(reads) AS io_sum_shared_after_reads\n-  FROM pg_stat_io WHERE context = 'normal' AND object = 'relation'  \\gset\n-SELECT :io_sum_shared_after_reads > :io_sum_shared_before_reads;\n- ?column? \n-----------\n- t\n-(1 row)\n-\n SELECT sum(hits) AS io_sum_shared_before_hits\n   FROM pg_stat_io WHERE context = 'normal' AND object = 'relation' \\gset\n -- Select from the table again to count hits.\n@@ -1415,6 +1384,7 @@ SELECT :io_sum_local_after_evictions > :io_sum_local_before_evictions,\n -- local buffers, exercising a different codepath than standard local buffer\n -- writes.\n ALTER TABLE test_io_local SET TABLESPACE regress_tblspace;\n+ERROR:  tablespace \"regress_tblspace\" does not exist\n SELECT pg_stat_force_next_flush();\n  pg_stat_force_next_flush \n --------------------------\n@@ -1426,7 +1396,7 @@ SELECT sum(writes) AS io_sum_local_new_tblspc_writes\n SELECT :io_sum_local_new_tblspc_writes > :io_sum_local_after_writes;\n  ?column? \n ----------\n- t\n+ f\n (1 row)\n \n RESET temp_buffers;\ndiff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out\nindex b4c85613de..d32a9a69ad 100644\n--- a/src/test/regress/expected/stats_ext.out\n+++ b/src/test/regress/expected/stats_ext.out\n@@ -70,7 +70,7 @@ DROP TABLE ext_stats_test;\n CREATE TABLE ab1 (a INTEGER, b INTEGER, c INTEGER);\n CREATE STATISTICS IF NOT EXISTS ab1_a_b_stats ON a, b FROM ab1;\n COMMENT ON STATISTICS ab1_a_b_stats IS 'new comment';\n-CREATE ROLE regress_stats_ext;\n+CREATE ROLE regress_stats_ext PASSWORD NEON_PASSWORD_PLACEHOLDER;\n SET SESSION AUTHORIZATION regress_stats_ext;\n COMMENT ON STATISTICS ab1_a_b_stats IS 'changed comment';\n ERROR:  must be owner of statistics object ab1_a_b_stats\n@@ -3214,7 +3214,7 @@ set search_path to public, stts_s1;\n  stts_s1 | stts_foo               | col1, col2 FROM stts_t3                                          | defined   | defined      | defined\n (10 rows)\n \n-create role regress_stats_ext nosuperuser;\n+create role regress_stats_ext nosuperuser PASSWORD NEON_PASSWORD_PLACEHOLDER;\n set role regress_stats_ext;\n \\dX\n                                                        List of extended statistics\n@@ -3237,7 +3237,7 @@ drop schema stts_s1, stts_s2 cascade;\n drop user regress_stats_ext;\n reset search_path;\n -- User with no access\n-CREATE USER regress_stats_user1;\n+CREATE USER regress_stats_user1 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n GRANT USAGE ON SCHEMA tststats TO regress_stats_user1;\n SET SESSION AUTHORIZATION regress_stats_user1;\n SELECT * FROM tststats.priv_test_tbl; -- Permission denied\ndiff --git a/src/test/regress/expected/subscription.out b/src/test/regress/expected/subscription.out\nindex b15eddbff3..e9ba4568eb 100644\n--- a/src/test/regress/expected/subscription.out\n+++ b/src/test/regress/expected/subscription.out\n@@ -1,10 +1,10 @@\n --\n -- SUBSCRIPTION\n --\n-CREATE ROLE regress_subscription_user LOGIN SUPERUSER;\n-CREATE ROLE regress_subscription_user2;\n-CREATE ROLE regress_subscription_user3 IN ROLE pg_create_subscription;\n-CREATE ROLE regress_subscription_user_dummy LOGIN NOSUPERUSER;\n+CREATE ROLE regress_subscription_user LOGIN SUPERUSER PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_subscription_user2 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_subscription_user3 PASSWORD NEON_PASSWORD_PLACEHOLDER IN ROLE pg_create_subscription;\n+CREATE ROLE regress_subscription_user_dummy LOGIN NOSUPERUSER PASSWORD NEON_PASSWORD_PLACEHOLDER;\n SET SESSION AUTHORIZATION 'regress_subscription_user';\n -- fail - no publications\n CREATE SUBSCRIPTION regress_testsub CONNECTION 'foo';\ndiff --git a/src/test/regress/expected/test_setup.out b/src/test/regress/expected/test_setup.out\nindex 5d9e6bf12b..c5fddfdca6 100644\n--- a/src/test/regress/expected/test_setup.out\n+++ b/src/test/regress/expected/test_setup.out\n@@ -21,6 +21,7 @@ GRANT ALL ON SCHEMA public TO public;\n -- Create a tablespace we can use in tests.\n SET allow_in_place_tablespaces = true;\n CREATE TABLESPACE regress_tblspace LOCATION '';\n+ERROR:  CREATE TABLESPACE is not supported on Neon\n --\n -- These tables have traditionally been referenced by many tests,\n -- so create and populate them.  Insert only non-error values here.\n@@ -111,7 +112,8 @@ CREATE TABLE onek (\n \tstring4\t\tname\n );\n \\set filename :abs_srcdir '/data/onek.data'\n-COPY onek FROM :'filename';\n+\\set command '\\\\copy onek FROM ' :'filename';\n+:command\n VACUUM ANALYZE onek;\n CREATE TABLE onek2 AS SELECT * FROM onek;\n VACUUM ANALYZE onek2;\n@@ -134,7 +136,8 @@ CREATE TABLE tenk1 (\n \tstring4\t\tname\n );\n \\set filename :abs_srcdir '/data/tenk.data'\n-COPY tenk1 FROM :'filename';\n+\\set command '\\\\copy tenk1 FROM ' :'filename';\n+:command\n VACUUM ANALYZE tenk1;\n CREATE TABLE tenk2 AS SELECT * FROM tenk1;\n VACUUM ANALYZE tenk2;\n@@ -144,20 +147,23 @@ CREATE TABLE person (\n \tlocation \tpoint\n );\n \\set filename :abs_srcdir '/data/person.data'\n-COPY person FROM :'filename';\n+\\set command '\\\\copy person FROM ' :'filename';\n+:command\n VACUUM ANALYZE person;\n CREATE TABLE emp (\n \tsalary \t\tint4,\n \tmanager \tname\n ) INHERITS (person);\n \\set filename :abs_srcdir '/data/emp.data'\n-COPY emp FROM :'filename';\n+\\set command '\\\\copy emp FROM ' :'filename';\n+:command\n VACUUM ANALYZE emp;\n CREATE TABLE student (\n \tgpa \t\tfloat8\n ) INHERITS (person);\n \\set filename :abs_srcdir '/data/student.data'\n-COPY student FROM :'filename';\n+\\set command '\\\\copy student FROM ' :'filename';\n+:command\n VACUUM ANALYZE student;\n CREATE TABLE stud_emp (\n \tpercent \tint4\n@@ -166,14 +172,16 @@ NOTICE:  merging multiple inherited definitions of column \"name\"\n NOTICE:  merging multiple inherited definitions of column \"age\"\n NOTICE:  merging multiple inherited definitions of column \"location\"\n \\set filename :abs_srcdir '/data/stud_emp.data'\n-COPY stud_emp FROM :'filename';\n+\\set command '\\\\copy stud_emp FROM ' :'filename';\n+:command\n VACUUM ANALYZE stud_emp;\n CREATE TABLE road (\n \tname\t\ttext,\n \tthepath \tpath\n );\n \\set filename :abs_srcdir '/data/streets.data'\n-COPY road FROM :'filename';\n+\\set command '\\\\copy road FROM ' :'filename';\n+:command\n VACUUM ANALYZE road;\n CREATE TABLE ihighway () INHERITS (road);\n INSERT INTO ihighway\ndiff --git a/src/test/regress/expected/tsearch.out b/src/test/regress/expected/tsearch.out\nindex 9fad6c8b04..a1b8e82389 100644\n--- a/src/test/regress/expected/tsearch.out\n+++ b/src/test/regress/expected/tsearch.out\n@@ -63,7 +63,8 @@ CREATE TABLE test_tsvector(\n \ta tsvector\n );\n \\set filename :abs_srcdir '/data/tsearch.data'\n-COPY test_tsvector FROM :'filename';\n+\\set command '\\\\copy test_tsvector FROM ' :'filename';\n+:command\n ANALYZE test_tsvector;\n -- test basic text search behavior without indexes, then with\n SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';\ndiff --git a/src/test/regress/expected/updatable_views.out b/src/test/regress/expected/updatable_views.out\nindex ba46c32029..eac3017bac 100644\n--- a/src/test/regress/expected/updatable_views.out\n+++ b/src/test/regress/expected/updatable_views.out\n@@ -999,9 +999,9 @@ NOTICE:  drop cascades to 2 other objects\n DETAIL:  drop cascades to view rw_view1\n drop cascades to function rw_view1_aa(rw_view1)\n -- permissions checks\n-CREATE USER regress_view_user1;\n-CREATE USER regress_view_user2;\n-CREATE USER regress_view_user3;\n+CREATE USER regress_view_user1 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_view_user2 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_view_user3 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n SET SESSION AUTHORIZATION regress_view_user1;\n CREATE TABLE base_tbl(a int, b text, c float);\n INSERT INTO base_tbl VALUES (1, 'Row 1', 1.0);\n@@ -3094,8 +3094,8 @@ DETAIL:  View columns that are not columns of their base relation are not updata\n drop view uv_iocu_view;\n drop table uv_iocu_tab;\n -- ON CONFLICT DO UPDATE permissions checks\n-create user regress_view_user1;\n-create user regress_view_user2;\n+create user regress_view_user1 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+create user regress_view_user2 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n set session authorization regress_view_user1;\n create table base_tbl(a int unique, b text, c float);\n insert into base_tbl values (1,'xxx',1.0);\ndiff --git a/src/test/regress/expected/update.out b/src/test/regress/expected/update.out\nindex c809f88f54..d1d57852d4 100644\n--- a/src/test/regress/expected/update.out\n+++ b/src/test/regress/expected/update.out\n@@ -602,7 +602,7 @@ DROP FUNCTION func_parted_mod_b();\n -- RLS policies with update-row-movement\n -----------------------------------------\n ALTER TABLE range_parted ENABLE ROW LEVEL SECURITY;\n-CREATE USER regress_range_parted_user;\n+CREATE USER regress_range_parted_user PASSWORD NEON_PASSWORD_PLACEHOLDER;\n GRANT ALL ON range_parted, mintab TO regress_range_parted_user;\n CREATE POLICY seeall ON range_parted AS PERMISSIVE FOR SELECT USING (true);\n CREATE POLICY policy_range_parted ON range_parted for UPDATE USING (true) WITH CHECK (c % 2 = 0);\ndiff --git a/src/test/regress/expected/vacuum.out b/src/test/regress/expected/vacuum.out\nindex 4aaf4f025d..40a339758a 100644\n--- a/src/test/regress/expected/vacuum.out\n+++ b/src/test/regress/expected/vacuum.out\n@@ -433,7 +433,7 @@ CREATE TABLE vacowned (a int);\n CREATE TABLE vacowned_parted (a int) PARTITION BY LIST (a);\n CREATE TABLE vacowned_part1 PARTITION OF vacowned_parted FOR VALUES IN (1);\n CREATE TABLE vacowned_part2 PARTITION OF vacowned_parted FOR VALUES IN (2);\n-CREATE ROLE regress_vacuum;\n+CREATE ROLE regress_vacuum PASSWORD NEON_PASSWORD_PLACEHOLDER;\n SET ROLE regress_vacuum;\n -- Simple table\n VACUUM vacowned;\ndiff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule\nindex 3d14bf4e4f..87f351b1d1 100644\n--- a/src/test/regress/parallel_schedule\n+++ b/src/test/regress/parallel_schedule\n@@ -130,4 +130,4 @@ test: fast_default\n \n # run tablespace test at the end because it drops the tablespace created during\n # setup that other tests may use.\n-test: tablespace\n+#test: tablespace\ndiff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql\nindex f51726e8ed..8854104eff 100644\n--- a/src/test/regress/sql/aggregates.sql\n+++ b/src/test/regress/sql/aggregates.sql\n@@ -15,7 +15,8 @@ CREATE TABLE aggtest (\n );\n \n \\set filename :abs_srcdir '/data/agg.data'\n-COPY aggtest FROM :'filename';\n+\\set command '\\\\copy aggtest FROM ' :'filename';\n+:command\n \n ANALYZE aggtest;\n \ndiff --git a/src/test/regress/sql/alter_generic.sql b/src/test/regress/sql/alter_generic.sql\nindex de58d268d3..9d38df7f42 100644\n--- a/src/test/regress/sql/alter_generic.sql\n+++ b/src/test/regress/sql/alter_generic.sql\n@@ -22,9 +22,9 @@ DROP ROLE IF EXISTS regress_alter_generic_user3;\n \n RESET client_min_messages;\n \n-CREATE USER regress_alter_generic_user3;\n-CREATE USER regress_alter_generic_user2;\n-CREATE USER regress_alter_generic_user1 IN ROLE regress_alter_generic_user3;\n+CREATE USER regress_alter_generic_user3 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_alter_generic_user2 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_alter_generic_user1 PASSWORD NEON_PASSWORD_PLACEHOLDER IN ROLE regress_alter_generic_user3;\n \n CREATE SCHEMA alt_nsp1;\n CREATE SCHEMA alt_nsp2;\n@@ -316,7 +316,7 @@ DROP OPERATOR FAMILY alt_opf4 USING btree;\n \n -- Should fail. Need to be SUPERUSER to do ALTER OPERATOR FAMILY .. ADD / DROP\n BEGIN TRANSACTION;\n-CREATE ROLE regress_alter_generic_user5 NOSUPERUSER;\n+CREATE ROLE regress_alter_generic_user5 PASSWORD NEON_PASSWORD_PLACEHOLDER NOSUPERUSER;\n CREATE OPERATOR FAMILY alt_opf5 USING btree;\n SET ROLE regress_alter_generic_user5;\n ALTER OPERATOR FAMILY alt_opf5 USING btree ADD OPERATOR 1 < (int4, int2), FUNCTION 1 btint42cmp(int4, int2);\n@@ -326,7 +326,7 @@ ROLLBACK;\n \n -- Should fail. Need rights to namespace for ALTER OPERATOR FAMILY .. ADD / DROP\n BEGIN TRANSACTION;\n-CREATE ROLE regress_alter_generic_user6;\n+CREATE ROLE regress_alter_generic_user6 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n CREATE SCHEMA alt_nsp6;\n REVOKE ALL ON SCHEMA alt_nsp6 FROM regress_alter_generic_user6;\n CREATE OPERATOR FAMILY alt_nsp6.alt_opf6 USING btree;\ndiff --git a/src/test/regress/sql/alter_operator.sql b/src/test/regress/sql/alter_operator.sql\nindex fd40370165..ca8055e06d 100644\n--- a/src/test/regress/sql/alter_operator.sql\n+++ b/src/test/regress/sql/alter_operator.sql\n@@ -87,7 +87,7 @@ ALTER OPERATOR & (bit, bit) SET (\"Restrict\" = _int_contsel, \"Join\" = _int_contjo\n --\n -- Test permission check. Must be owner to ALTER OPERATOR.\n --\n-CREATE USER regress_alter_op_user;\n+CREATE USER regress_alter_op_user PASSWORD NEON_PASSWORD_PLACEHOLDER;\n SET SESSION AUTHORIZATION regress_alter_op_user;\n \n ALTER OPERATOR === (boolean, boolean) SET (RESTRICT = NONE);\ndiff --git a/src/test/regress/sql/alter_table.sql b/src/test/regress/sql/alter_table.sql\nindex d2845abc97..a0719b8d0e 100644\n--- a/src/test/regress/sql/alter_table.sql\n+++ b/src/test/regress/sql/alter_table.sql\n@@ -7,7 +7,7 @@ SET client_min_messages TO 'warning';\n DROP ROLE IF EXISTS regress_alter_table_user1;\n RESET client_min_messages;\n \n-CREATE USER regress_alter_table_user1;\n+CREATE USER regress_alter_table_user1 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n --\n -- add attribute\n@@ -2397,8 +2397,8 @@ DROP TABLE fail_part;\n ALTER TABLE list_parted ATTACH PARTITION nonexistent FOR VALUES IN (1);\n \n -- check ownership of the source table\n-CREATE ROLE regress_test_me;\n-CREATE ROLE regress_test_not_me;\n+CREATE ROLE regress_test_me PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_test_not_me PASSWORD NEON_PASSWORD_PLACEHOLDER;\n CREATE TABLE not_owned_by_me (LIKE list_parted);\n ALTER TABLE not_owned_by_me OWNER TO regress_test_not_me;\n SET SESSION AUTHORIZATION regress_test_me;\ndiff --git a/src/test/regress/sql/arrays.sql b/src/test/regress/sql/arrays.sql\nindex e414fa560d..79a75a0e57 100644\n--- a/src/test/regress/sql/arrays.sql\n+++ b/src/test/regress/sql/arrays.sql\n@@ -22,7 +22,8 @@ CREATE TABLE array_op_test (\n );\n \n \\set filename :abs_srcdir '/data/array.data'\n-COPY array_op_test FROM :'filename';\n+\\set command '\\\\copy array_op_test FROM ' :'filename';\n+:command\n ANALYZE array_op_test;\n \n --\ndiff --git a/src/test/regress/sql/btree_index.sql b/src/test/regress/sql/btree_index.sql\nindex 239f4a4755..f29d87bdff 100644\n--- a/src/test/regress/sql/btree_index.sql\n+++ b/src/test/regress/sql/btree_index.sql\n@@ -26,16 +26,20 @@ CREATE TABLE bt_f8_heap (\n );\n \n \\set filename :abs_srcdir '/data/desc.data'\n-COPY bt_i4_heap FROM :'filename';\n+\\set command '\\\\copy bt_i4_heap FROM ' :'filename';\n+:command\n \n \\set filename :abs_srcdir '/data/hash.data'\n-COPY bt_name_heap FROM :'filename';\n+\\set command '\\\\copy bt_name_heap FROM ' :'filename';\n+:command\n \n \\set filename :abs_srcdir '/data/desc.data'\n-COPY bt_txt_heap FROM :'filename';\n+\\set command '\\\\copy bt_txt_heap FROM ' :'filename';\n+:command\n \n \\set filename :abs_srcdir '/data/hash.data'\n-COPY bt_f8_heap FROM :'filename';\n+\\set command '\\\\copy bt_f8_heap FROM ' :'filename';\n+:command\n \n ANALYZE bt_i4_heap;\n ANALYZE bt_name_heap;\ndiff --git a/src/test/regress/sql/cluster.sql b/src/test/regress/sql/cluster.sql\nindex 6cb9c926c0..5e689e4062 100644\n--- a/src/test/regress/sql/cluster.sql\n+++ b/src/test/regress/sql/cluster.sql\n@@ -108,7 +108,7 @@ WHERE pg_class.oid=indexrelid\n CLUSTER pg_toast.pg_toast_826 USING pg_toast_826_index;\n \n -- Verify that clustering all tables does in fact cluster the right ones\n-CREATE USER regress_clstr_user;\n+CREATE USER regress_clstr_user PASSWORD NEON_PASSWORD_PLACEHOLDER;\n CREATE TABLE clstr_1 (a INT PRIMARY KEY);\n CREATE TABLE clstr_2 (a INT PRIMARY KEY);\n CREATE TABLE clstr_3 (a INT PRIMARY KEY);\n@@ -233,7 +233,7 @@ DROP TABLE clstrpart;\n CREATE TABLE ptnowner(i int unique) PARTITION BY LIST (i);\n CREATE INDEX ptnowner_i_idx ON ptnowner(i);\n CREATE TABLE ptnowner1 PARTITION OF ptnowner FOR VALUES IN (1);\n-CREATE ROLE regress_ptnowner;\n+CREATE ROLE regress_ptnowner PASSWORD NEON_PASSWORD_PLACEHOLDER;\n CREATE TABLE ptnowner2 PARTITION OF ptnowner FOR VALUES IN (2);\n ALTER TABLE ptnowner1 OWNER TO regress_ptnowner;\n ALTER TABLE ptnowner OWNER TO regress_ptnowner;\ndiff --git a/src/test/regress/sql/collate.icu.utf8.sql b/src/test/regress/sql/collate.icu.utf8.sql\nindex 8aa902d5ab..24bb823b86 100644\n--- a/src/test/regress/sql/collate.icu.utf8.sql\n+++ b/src/test/regress/sql/collate.icu.utf8.sql\n@@ -353,7 +353,7 @@ reset enable_seqscan;\n \n -- schema manipulation commands\n \n-CREATE ROLE regress_test_role;\n+CREATE ROLE regress_test_role PASSWORD NEON_PASSWORD_PLACEHOLDER;\n CREATE SCHEMA test_schema;\n \n -- We need to do this this way to cope with varying names for encodings:\ndiff --git a/src/test/regress/sql/constraints.sql b/src/test/regress/sql/constraints.sql\nindex e3e3bea709..fa86ddc326 100644\n--- a/src/test/regress/sql/constraints.sql\n+++ b/src/test/regress/sql/constraints.sql\n@@ -243,12 +243,14 @@ CREATE TABLE COPY_TBL (x INT, y TEXT, z INT,\n \tCHECK (x > 3 AND y <> 'check failed' AND x < 7 ));\n \n \\set filename :abs_srcdir '/data/constro.data'\n-COPY COPY_TBL FROM :'filename';\n+\\set command '\\\\copy COPY_TBL FROM ' :'filename';\n+:command\n \n SELECT * FROM COPY_TBL;\n \n \\set filename :abs_srcdir '/data/constrf.data'\n-COPY COPY_TBL FROM :'filename';\n+\\set command '\\\\copy COPY_TBL FROM ' :'filename';\n+:command\n \n SELECT * FROM COPY_TBL;\n \n@@ -599,7 +601,7 @@ DROP TABLE deferred_excl;\n \n -- Comments\n -- Setup a low-level role to enforce non-superuser checks.\n-CREATE ROLE regress_constraint_comments;\n+CREATE ROLE regress_constraint_comments PASSWORD NEON_PASSWORD_PLACEHOLDER;\n SET SESSION AUTHORIZATION regress_constraint_comments;\n \n CREATE TABLE constraint_comments_tbl (a int CONSTRAINT the_constraint CHECK (a > 0));\n@@ -621,7 +623,7 @@ COMMENT ON CONSTRAINT the_constraint ON DOMAIN constraint_comments_dom IS NULL;\n \n -- unauthorized user\n RESET SESSION AUTHORIZATION;\n-CREATE ROLE regress_constraint_comments_noaccess;\n+CREATE ROLE regress_constraint_comments_noaccess PASSWORD NEON_PASSWORD_PLACEHOLDER;\n SET SESSION AUTHORIZATION regress_constraint_comments_noaccess;\n COMMENT ON CONSTRAINT the_constraint ON constraint_comments_tbl IS 'no, the comment';\n COMMENT ON CONSTRAINT the_constraint ON DOMAIN constraint_comments_dom IS 'no, another comment';\ndiff --git a/src/test/regress/sql/conversion.sql b/src/test/regress/sql/conversion.sql\nindex b567a1a572..4d1ac2e631 100644\n--- a/src/test/regress/sql/conversion.sql\n+++ b/src/test/regress/sql/conversion.sql\n@@ -17,7 +17,7 @@ CREATE FUNCTION test_enc_conversion(bytea, name, name, bool, validlen OUT int, r\n     AS :'regresslib', 'test_enc_conversion'\n     LANGUAGE C STRICT;\n \n-CREATE USER regress_conversion_user WITH NOCREATEDB NOCREATEROLE;\n+CREATE USER regress_conversion_user WITH NOCREATEDB NOCREATEROLE PASSWORD NEON_PASSWORD_PLACEHOLDER;\n SET SESSION AUTHORIZATION regress_conversion_user;\n CREATE CONVERSION myconv FOR 'LATIN1' TO 'UTF8' FROM iso8859_1_to_utf8;\n --\ndiff --git a/src/test/regress/sql/copy.sql b/src/test/regress/sql/copy.sql\nindex 43d2e906dd..6c993d70f0 100644\n--- a/src/test/regress/sql/copy.sql\n+++ b/src/test/regress/sql/copy.sql\n@@ -20,11 +20,13 @@ insert into copytest values('Mac',E'abc\\rdef',3);\n insert into copytest values(E'esc\\\\ape',E'a\\\\r\\\\\\r\\\\\\n\\\\nb',4);\n \n \\set filename :abs_builddir '/results/copytest.csv'\n-copy copytest to :'filename' csv;\n+\\set command '\\\\copy copytest to ' :'filename' csv;\n+:command\n \n create temp table copytest2 (like copytest);\n \n-copy copytest2 from :'filename' csv;\n+\\set command '\\\\copy copytest2 from ' :'filename' csv;\n+:command\n \n select * from copytest except select * from copytest2;\n \n@@ -32,9 +34,11 @@ truncate copytest2;\n \n --- same test but with an escape char different from quote char\n \n-copy copytest to :'filename' csv quote '''' escape E'\\\\';\n+\\set command '\\\\copy copytest to ' :'filename' ' csv quote ' '\\'\\'\\'\\'' ' escape ' 'E\\'' '\\\\\\\\\\'';\n+:command\n \n-copy copytest2 from :'filename' csv quote '''' escape E'\\\\';\n+\\set command '\\\\copy copytest2 from ' :'filename' ' csv quote ' '\\'\\'\\'\\'' ' escape ' 'E\\'' '\\\\\\\\\\'';\n+:command\n \n select * from copytest except select * from copytest2;\n \n@@ -86,16 +90,19 @@ insert into parted_copytest select x,2,'Two' from generate_series(1001,1010) x;\n insert into parted_copytest select x,1,'One' from generate_series(1011,1020) x;\n \n \\set filename :abs_builddir '/results/parted_copytest.csv'\n-copy (select * from parted_copytest order by a) to :'filename';\n+\\set command '\\\\copy (select * from parted_copytest order by a) to ' :'filename';\n+:command\n \n truncate parted_copytest;\n \n-copy parted_copytest from :'filename';\n+\\set command '\\\\copy parted_copytest from ' :'filename';\n+:command\n \n -- Ensure COPY FREEZE errors for partitioned tables.\n begin;\n truncate parted_copytest;\n-copy parted_copytest from :'filename' (freeze);\n+\\set command '\\\\copy parted_copytest from ' :'filename' (freeze);\n+:command\n rollback;\n \n select tableoid::regclass,count(*),sum(a) from parted_copytest\n@@ -115,7 +122,8 @@ create trigger part_ins_trig\n \tfor each row\n \texecute procedure part_ins_func();\n \n-copy parted_copytest from :'filename';\n+\\set command '\\\\copy parted_copytest from ' :'filename';\n+:command\n \n select tableoid::regclass,count(*),sum(a) from parted_copytest\n group by tableoid order by tableoid::regclass::name;\n@@ -124,7 +132,8 @@ truncate table parted_copytest;\n create index on parted_copytest (b);\n drop trigger part_ins_trig on parted_copytest_a2;\n \n-copy parted_copytest from stdin;\n+\\set command '\\\\copy parted_copytest from ' stdin;\n+:command\n 1\t1\tstr1\n 2\t2\tstr2\n \\.\n@@ -191,8 +200,8 @@ bill\t20\t(11,10)\t1000\tsharon\n -- Generate COPY FROM report with FILE, with some excluded tuples.\n truncate tab_progress_reporting;\n \\set filename :abs_srcdir '/data/emp.data'\n-copy tab_progress_reporting from :'filename'\n-\twhere (salary < 2000);\n+\\set command '\\\\copy tab_progress_reporting from ' :'filename' 'where (salary < 2000)';\n+:command\n \n drop trigger check_after_tab_progress_reporting on tab_progress_reporting;\n drop function notice_after_tab_progress_reporting();\n@@ -311,7 +320,8 @@ CREATE TABLE parted_si_p_odd PARTITION OF parted_si FOR VALUES IN (1);\n -- https://postgr.es/m/18130-7a86a7356a75209d%40postgresql.org\n -- https://postgr.es/m/257696.1695670946%40sss.pgh.pa.us\n \\set filename :abs_srcdir '/data/desc.data'\n-COPY parted_si(id, data) FROM :'filename';\n+\\set command '\\\\COPY parted_si(id, data) FROM ' :'filename';\n+:command\n \n -- An earlier bug (see commit b1ecb9b3fcf) could end up using a buffer from\n -- the wrong partition. This test is *not* guaranteed to trigger that bug, but\ndiff --git a/src/test/regress/sql/copy2.sql b/src/test/regress/sql/copy2.sql\nindex cf3828c16e..cf3ca38175 100644\n--- a/src/test/regress/sql/copy2.sql\n+++ b/src/test/regress/sql/copy2.sql\n@@ -365,8 +365,8 @@ copy check_con_tbl from stdin;\n select * from check_con_tbl;\n \n -- test with RLS enabled.\n-CREATE ROLE regress_rls_copy_user;\n-CREATE ROLE regress_rls_copy_user_colperms;\n+CREATE ROLE regress_rls_copy_user PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_rls_copy_user_colperms PASSWORD NEON_PASSWORD_PLACEHOLDER;\n CREATE TABLE rls_t1 (a int, b int, c int);\n \n COPY rls_t1 (a, b, c) from stdin;\ndiff --git a/src/test/regress/sql/create_function_sql.sql b/src/test/regress/sql/create_function_sql.sql\nindex 89e9af3a49..2b86fe2285 100644\n--- a/src/test/regress/sql/create_function_sql.sql\n+++ b/src/test/regress/sql/create_function_sql.sql\n@@ -6,7 +6,7 @@\n \n -- All objects made in this test are in temp_func_test schema\n \n-CREATE USER regress_unpriv_user;\n+CREATE USER regress_unpriv_user PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n CREATE SCHEMA temp_func_test;\n GRANT ALL ON SCHEMA temp_func_test TO public;\ndiff --git a/src/test/regress/sql/create_index.sql b/src/test/regress/sql/create_index.sql\nindex d49ce9f300..47fa813bc8 100644\n--- a/src/test/regress/sql/create_index.sql\n+++ b/src/test/regress/sql/create_index.sql\n@@ -71,7 +71,8 @@ CREATE TABLE fast_emp4000 (\n );\n \n \\set filename :abs_srcdir '/data/rect.data'\n-COPY slow_emp4000 FROM :'filename';\n+\\set command '\\\\copy slow_emp4000 FROM ' :'filename';\n+:command\n \n INSERT INTO fast_emp4000 SELECT * FROM slow_emp4000;\n \n@@ -269,7 +270,8 @@ CREATE TABLE array_index_op_test (\n );\n \n \\set filename :abs_srcdir '/data/array.data'\n-COPY array_index_op_test FROM :'filename';\n+\\set command '\\\\copy array_index_op_test FROM ' :'filename';\n+:command\n ANALYZE array_index_op_test;\n \n SELECT * FROM array_index_op_test WHERE i = '{NULL}' ORDER BY seqno;\n@@ -1246,7 +1248,7 @@ END;\n REINDEX SCHEMA CONCURRENTLY schema_to_reindex;\n \n -- Failure for unauthorized user\n-CREATE ROLE regress_reindexuser NOLOGIN;\n+CREATE ROLE regress_reindexuser NOLOGIN PASSWORD NEON_PASSWORD_PLACEHOLDER;\n SET SESSION ROLE regress_reindexuser;\n REINDEX SCHEMA schema_to_reindex;\n -- Permission failures with toast tables and indexes (pg_authid here)\ndiff --git a/src/test/regress/sql/create_procedure.sql b/src/test/regress/sql/create_procedure.sql\nindex 069a3727ce..faeeb3f744 100644\n--- a/src/test/regress/sql/create_procedure.sql\n+++ b/src/test/regress/sql/create_procedure.sql\n@@ -255,7 +255,7 @@ DROP PROCEDURE nonexistent();\n \n -- privileges\n \n-CREATE USER regress_cp_user1;\n+CREATE USER regress_cp_user1 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n GRANT INSERT ON cp_test TO regress_cp_user1;\n REVOKE EXECUTE ON PROCEDURE ptest1(text) FROM PUBLIC;\n SET ROLE regress_cp_user1;\ndiff --git a/src/test/regress/sql/create_role.sql b/src/test/regress/sql/create_role.sql\nindex 4491a28a8a..3045434865 100644\n--- a/src/test/regress/sql/create_role.sql\n+++ b/src/test/regress/sql/create_role.sql\n@@ -1,20 +1,20 @@\n -- ok, superuser can create users with any set of privileges\n-CREATE ROLE regress_role_super SUPERUSER;\n-CREATE ROLE regress_role_admin CREATEDB CREATEROLE REPLICATION BYPASSRLS;\n+CREATE ROLE regress_role_super SUPERUSER PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_role_admin CREATEDB CREATEROLE REPLICATION BYPASSRLS PASSWORD NEON_PASSWORD_PLACEHOLDER;\n GRANT CREATE ON DATABASE regression TO regress_role_admin WITH GRANT OPTION;\n-CREATE ROLE regress_role_limited_admin CREATEROLE;\n-CREATE ROLE regress_role_normal;\n+CREATE ROLE regress_role_limited_admin CREATEROLE PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_role_normal PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n -- fail, CREATEROLE user can't give away role attributes without having them\n SET SESSION AUTHORIZATION regress_role_limited_admin;\n-CREATE ROLE regress_nosuch_superuser SUPERUSER;\n-CREATE ROLE regress_nosuch_replication_bypassrls REPLICATION BYPASSRLS;\n-CREATE ROLE regress_nosuch_replication REPLICATION;\n-CREATE ROLE regress_nosuch_bypassrls BYPASSRLS;\n-CREATE ROLE regress_nosuch_createdb CREATEDB;\n+CREATE ROLE regress_nosuch_superuser SUPERUSER PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_nosuch_replication_bypassrls REPLICATION BYPASSRLS PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_nosuch_replication REPLICATION PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_nosuch_bypassrls BYPASSRLS PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_nosuch_createdb CREATEDB PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n -- ok, can create a role without any special attributes\n-CREATE ROLE regress_role_limited;\n+CREATE ROLE regress_role_limited PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n -- fail, can't give it in any of the restricted attributes\n ALTER ROLE regress_role_limited SUPERUSER;\n@@ -25,10 +25,10 @@ DROP ROLE regress_role_limited;\n \n -- ok, can give away these role attributes if you have them\n SET SESSION AUTHORIZATION regress_role_admin;\n-CREATE ROLE regress_replication_bypassrls REPLICATION BYPASSRLS;\n-CREATE ROLE regress_replication REPLICATION;\n-CREATE ROLE regress_bypassrls BYPASSRLS;\n-CREATE ROLE regress_createdb CREATEDB;\n+CREATE ROLE regress_replication_bypassrls REPLICATION BYPASSRLS PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_replication REPLICATION PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_bypassrls BYPASSRLS PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_createdb CREATEDB PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n -- ok, can toggle these role attributes off and on if you have them\n ALTER ROLE regress_replication NOREPLICATION;\n@@ -43,52 +43,52 @@ ALTER ROLE regress_createdb SUPERUSER;\n ALTER ROLE regress_createdb NOSUPERUSER;\n \n -- ok, having CREATEROLE is enough to create users with these privileges\n-CREATE ROLE regress_createrole CREATEROLE NOINHERIT;\n+CREATE ROLE regress_createrole CREATEROLE NOINHERIT PASSWORD NEON_PASSWORD_PLACEHOLDER;\n GRANT CREATE ON DATABASE regression TO regress_createrole WITH GRANT OPTION;\n-CREATE ROLE regress_login LOGIN;\n-CREATE ROLE regress_inherit INHERIT;\n-CREATE ROLE regress_connection_limit CONNECTION LIMIT 5;\n-CREATE ROLE regress_encrypted_password ENCRYPTED PASSWORD 'foo';\n-CREATE ROLE regress_password_null PASSWORD NULL;\n+CREATE ROLE regress_login LOGIN PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_inherit INHERIT PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_connection_limit CONNECTION LIMIT 5 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_encrypted_password ENCRYPTED PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_password_null PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n -- ok, backwards compatible noise words should be ignored\n-CREATE ROLE regress_noiseword SYSID 12345;\n+CREATE ROLE regress_noiseword SYSID 12345 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n -- fail, cannot grant membership in superuser role\n-CREATE ROLE regress_nosuch_super IN ROLE regress_role_super;\n+CREATE ROLE regress_nosuch_super IN ROLE regress_role_super PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n -- fail, database owner cannot have members\n-CREATE ROLE regress_nosuch_dbowner IN ROLE pg_database_owner;\n+CREATE ROLE regress_nosuch_dbowner IN ROLE pg_database_owner PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n -- ok, can grant other users into a role\n CREATE ROLE regress_inroles ROLE\n \tregress_role_super, regress_createdb, regress_createrole, regress_login,\n-\tregress_inherit, regress_connection_limit, regress_encrypted_password, regress_password_null;\n+\tregress_inherit, regress_connection_limit, regress_encrypted_password, regress_password_null PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n -- fail, cannot grant a role into itself\n-CREATE ROLE regress_nosuch_recursive ROLE regress_nosuch_recursive;\n+CREATE ROLE regress_nosuch_recursive ROLE regress_nosuch_recursive PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n -- ok, can grant other users into a role with admin option\n CREATE ROLE regress_adminroles ADMIN\n \tregress_role_super, regress_createdb, regress_createrole, regress_login,\n-\tregress_inherit, regress_connection_limit, regress_encrypted_password, regress_password_null;\n+\tregress_inherit, regress_connection_limit, regress_encrypted_password, regress_password_null PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n -- fail, cannot grant a role into itself with admin option\n-CREATE ROLE regress_nosuch_admin_recursive ADMIN regress_nosuch_admin_recursive;\n+CREATE ROLE regress_nosuch_admin_recursive ADMIN regress_nosuch_admin_recursive PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n -- fail, regress_createrole does not have CREATEDB privilege\n SET SESSION AUTHORIZATION regress_createrole;\n CREATE DATABASE regress_nosuch_db;\n \n -- ok, regress_createrole can create new roles\n-CREATE ROLE regress_plainrole;\n+CREATE ROLE regress_plainrole PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n -- ok, roles with CREATEROLE can create new roles with it\n-CREATE ROLE regress_rolecreator CREATEROLE;\n+CREATE ROLE regress_rolecreator CREATEROLE PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n -- ok, roles with CREATEROLE can create new roles with different role\n -- attributes, including CREATEROLE\n-CREATE ROLE regress_hasprivs CREATEROLE LOGIN INHERIT CONNECTION LIMIT 5;\n+CREATE ROLE regress_hasprivs CREATEROLE LOGIN INHERIT CONNECTION LIMIT 5 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n -- ok, we should be able to modify a role we created\n COMMENT ON ROLE regress_hasprivs IS 'some comment';\n@@ -123,7 +123,7 @@ REASSIGN OWNED BY regress_tenant TO regress_createrole;\n \n -- ok, create a role with a value for createrole_self_grant\n SET createrole_self_grant = 'set, inherit';\n-CREATE ROLE regress_tenant2;\n+CREATE ROLE regress_tenant2 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n GRANT CREATE ON DATABASE regression TO regress_tenant2;\n \n -- ok, regress_tenant2 can create objects within the database\n@@ -150,16 +150,16 @@ ALTER TABLE tenant2_table OWNER TO regress_tenant2;\n DROP TABLE tenant2_table;\n \n -- fail, CREATEROLE is not enough to create roles in privileged roles\n-CREATE ROLE regress_read_all_data IN ROLE pg_read_all_data;\n-CREATE ROLE regress_write_all_data IN ROLE pg_write_all_data;\n-CREATE ROLE regress_monitor IN ROLE pg_monitor;\n-CREATE ROLE regress_read_all_settings IN ROLE pg_read_all_settings;\n-CREATE ROLE regress_read_all_stats IN ROLE pg_read_all_stats;\n-CREATE ROLE regress_stat_scan_tables IN ROLE pg_stat_scan_tables;\n-CREATE ROLE regress_read_server_files IN ROLE pg_read_server_files;\n-CREATE ROLE regress_write_server_files IN ROLE pg_write_server_files;\n-CREATE ROLE regress_execute_server_program IN ROLE pg_execute_server_program;\n-CREATE ROLE regress_signal_backend IN ROLE pg_signal_backend;\n+CREATE ROLE regress_read_all_data PASSWORD NEON_PASSWORD_PLACEHOLDER IN ROLE pg_read_all_data;\n+CREATE ROLE regress_write_all_data PASSWORD NEON_PASSWORD_PLACEHOLDER IN ROLE pg_write_all_data;\n+CREATE ROLE regress_monitor PASSWORD NEON_PASSWORD_PLACEHOLDER IN ROLE pg_monitor;\n+CREATE ROLE regress_read_all_settings PASSWORD NEON_PASSWORD_PLACEHOLDER IN ROLE pg_read_all_settings;\n+CREATE ROLE regress_read_all_stats PASSWORD NEON_PASSWORD_PLACEHOLDER IN ROLE pg_read_all_stats;\n+CREATE ROLE regress_stat_scan_tables PASSWORD NEON_PASSWORD_PLACEHOLDER IN ROLE pg_stat_scan_tables;\n+CREATE ROLE regress_read_server_files PASSWORD NEON_PASSWORD_PLACEHOLDER IN ROLE pg_read_server_files;\n+CREATE ROLE regress_write_server_files PASSWORD NEON_PASSWORD_PLACEHOLDER IN ROLE pg_write_server_files;\n+CREATE ROLE regress_execute_server_program PASSWORD NEON_PASSWORD_PLACEHOLDER IN ROLE pg_execute_server_program;\n+CREATE ROLE regress_signal_backend PASSWORD NEON_PASSWORD_PLACEHOLDER IN ROLE pg_signal_backend;\n \n -- fail, role still owns database objects\n DROP ROLE regress_tenant;\ndiff --git a/src/test/regress/sql/create_schema.sql b/src/test/regress/sql/create_schema.sql\nindex 1b7064247a..be5b662ce1 100644\n--- a/src/test/regress/sql/create_schema.sql\n+++ b/src/test/regress/sql/create_schema.sql\n@@ -4,7 +4,7 @@\n \n -- Schema creation with elements.\n \n-CREATE ROLE regress_create_schema_role SUPERUSER;\n+CREATE ROLE regress_create_schema_role SUPERUSER PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n -- Cases where schema creation fails as objects are qualified with a schema\n -- that does not match with what's expected.\ndiff --git a/src/test/regress/sql/create_view.sql b/src/test/regress/sql/create_view.sql\nindex ae6841308b..47bc792e30 100644\n--- a/src/test/regress/sql/create_view.sql\n+++ b/src/test/regress/sql/create_view.sql\n@@ -23,7 +23,8 @@ CREATE TABLE real_city (\n );\n \n \\set filename :abs_srcdir '/data/real_city.data'\n-COPY real_city FROM :'filename';\n+\\set command '\\\\copy real_city FROM ' :'filename';\n+:command\n ANALYZE real_city;\n \n SELECT *\ndiff --git a/src/test/regress/sql/database.sql b/src/test/regress/sql/database.sql\nindex 46ad263478..eb05584ed5 100644\n--- a/src/test/regress/sql/database.sql\n+++ b/src/test/regress/sql/database.sql\n@@ -1,8 +1,6 @@\n CREATE DATABASE regression_tbd\n \tENCODING utf8 LC_COLLATE \"C\" LC_CTYPE \"C\" TEMPLATE template0;\n ALTER DATABASE regression_tbd RENAME TO regression_utf8;\n-ALTER DATABASE regression_utf8 SET TABLESPACE regress_tblspace;\n-ALTER DATABASE regression_utf8 RESET TABLESPACE;\n ALTER DATABASE regression_utf8 CONNECTION_LIMIT 123;\n \n -- Test PgDatabaseToastTable.  Doing this with GRANT would be slow.\ndiff --git a/src/test/regress/sql/dependency.sql b/src/test/regress/sql/dependency.sql\nindex 2559c62d0b..06c3aa1a36 100644\n--- a/src/test/regress/sql/dependency.sql\n+++ b/src/test/regress/sql/dependency.sql\n@@ -2,10 +2,10 @@\n -- DEPENDENCIES\n --\n \n-CREATE USER regress_dep_user;\n-CREATE USER regress_dep_user2;\n-CREATE USER regress_dep_user3;\n-CREATE GROUP regress_dep_group;\n+CREATE USER regress_dep_user PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_dep_user2 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_dep_user3 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE GROUP regress_dep_group PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n CREATE TABLE deptest (f1 serial primary key, f2 text);\n \n@@ -45,9 +45,9 @@ DROP TABLE deptest;\n DROP USER regress_dep_user3;\n \n -- Test DROP OWNED\n-CREATE USER regress_dep_user0;\n-CREATE USER regress_dep_user1;\n-CREATE USER regress_dep_user2;\n+CREATE USER regress_dep_user0 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_dep_user1 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_dep_user2 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n SET SESSION AUTHORIZATION regress_dep_user0;\n -- permission denied\n DROP OWNED BY regress_dep_user1;\ndiff --git a/src/test/regress/sql/drop_if_exists.sql b/src/test/regress/sql/drop_if_exists.sql\nindex ac6168b91f..4270062ec7 100644\n--- a/src/test/regress/sql/drop_if_exists.sql\n+++ b/src/test/regress/sql/drop_if_exists.sql\n@@ -86,9 +86,9 @@ DROP DOMAIN test_domain_exists;\n --- role/user/group\n ---\n \n-CREATE USER regress_test_u1;\n-CREATE ROLE regress_test_r1;\n-CREATE GROUP regress_test_g1;\n+CREATE USER regress_test_u1 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_test_r1 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE GROUP regress_test_g1 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n DROP USER regress_test_u2;\n \ndiff --git a/src/test/regress/sql/equivclass.sql b/src/test/regress/sql/equivclass.sql\nindex 247b0a3105..bf018fd3a1 100644\n--- a/src/test/regress/sql/equivclass.sql\n+++ b/src/test/regress/sql/equivclass.sql\n@@ -230,7 +230,7 @@ set enable_mergejoin = off;\n alter table ec1 enable row level security;\n create policy p1 on ec1 using (f1 < '5'::int8alias1);\n \n-create user regress_user_ectest;\n+create user regress_user_ectest PASSWORD NEON_PASSWORD_PLACEHOLDER;\n grant select on ec0 to regress_user_ectest;\n grant select on ec1 to regress_user_ectest;\n \ndiff --git a/src/test/regress/sql/event_trigger.sql b/src/test/regress/sql/event_trigger.sql\nindex 1aeaddbe71..89a410ec4a 100644\n--- a/src/test/regress/sql/event_trigger.sql\n+++ b/src/test/regress/sql/event_trigger.sql\n@@ -86,7 +86,7 @@ create event trigger regress_event_trigger2 on ddl_command_start\n comment on event trigger regress_event_trigger is 'test comment';\n \n -- drop as non-superuser should fail\n-create role regress_evt_user;\n+create role regress_evt_user PASSWORD NEON_PASSWORD_PLACEHOLDER;\n set role regress_evt_user;\n create event trigger regress_event_trigger_noperms on ddl_command_start\n    execute procedure test_event_trigger();\ndiff --git a/src/test/regress/sql/foreign_data.sql b/src/test/regress/sql/foreign_data.sql\nindex aa147b14a9..370e0dd570 100644\n--- a/src/test/regress/sql/foreign_data.sql\n+++ b/src/test/regress/sql/foreign_data.sql\n@@ -22,14 +22,14 @@ DROP ROLE IF EXISTS regress_foreign_data_user, regress_test_role, regress_test_r\n \n RESET client_min_messages;\n \n-CREATE ROLE regress_foreign_data_user LOGIN SUPERUSER;\n+CREATE ROLE regress_foreign_data_user LOGIN SUPERUSER PASSWORD NEON_PASSWORD_PLACEHOLDER;\n SET SESSION AUTHORIZATION 'regress_foreign_data_user';\n \n-CREATE ROLE regress_test_role;\n-CREATE ROLE regress_test_role2;\n-CREATE ROLE regress_test_role_super SUPERUSER;\n-CREATE ROLE regress_test_indirect;\n-CREATE ROLE regress_unprivileged_role;\n+CREATE ROLE regress_test_role PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_test_role2 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_test_role_super SUPERUSER PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_test_indirect PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_unprivileged_role PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n CREATE FOREIGN DATA WRAPPER dummy;\n COMMENT ON FOREIGN DATA WRAPPER dummy IS 'useless';\ndiff --git a/src/test/regress/sql/foreign_key.sql b/src/test/regress/sql/foreign_key.sql\nindex 9f4210b26e..620d3fc87e 100644\n--- a/src/test/regress/sql/foreign_key.sql\n+++ b/src/test/regress/sql/foreign_key.sql\n@@ -1435,7 +1435,7 @@ ALTER TABLE fk_partitioned_fk_6 ATTACH PARTITION fk_partitioned_pk_6 FOR VALUES\n DROP TABLE fk_partitioned_pk_6, fk_partitioned_fk_6;\n \n -- test the case when the referenced table is owned by a different user\n-create role regress_other_partitioned_fk_owner;\n+create role regress_other_partitioned_fk_owner PASSWORD NEON_PASSWORD_PLACEHOLDER;\n grant references on fk_notpartitioned_pk to regress_other_partitioned_fk_owner;\n set role regress_other_partitioned_fk_owner;\n create table other_partitioned_fk(a int, b int) partition by list (a);\ndiff --git a/src/test/regress/sql/generated.sql b/src/test/regress/sql/generated.sql\nindex 298f6b3aa8..f058913ae0 100644\n--- a/src/test/regress/sql/generated.sql\n+++ b/src/test/regress/sql/generated.sql\n@@ -263,7 +263,7 @@ ALTER TABLE gtest10a DROP COLUMN b;\n INSERT INTO gtest10a (a) VALUES (1);\n \n -- privileges\n-CREATE USER regress_user11;\n+CREATE USER regress_user11 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n CREATE TABLE gtest11s (a int PRIMARY KEY, b int, c int GENERATED ALWAYS AS (b * 2) STORED);\n INSERT INTO gtest11s VALUES (1, 10), (2, 20);\ndiff --git a/src/test/regress/sql/guc.sql b/src/test/regress/sql/guc.sql\nindex dc79761955..a9ead75349 100644\n--- a/src/test/regress/sql/guc.sql\n+++ b/src/test/regress/sql/guc.sql\n@@ -188,7 +188,7 @@ PREPARE foo AS SELECT 1;\n LISTEN foo_event;\n SET vacuum_cost_delay = 13;\n CREATE TEMP TABLE tmp_foo (data text) ON COMMIT DELETE ROWS;\n-CREATE ROLE regress_guc_user;\n+CREATE ROLE regress_guc_user PASSWORD NEON_PASSWORD_PLACEHOLDER;\n SET SESSION AUTHORIZATION regress_guc_user;\n -- look changes\n SELECT pg_listening_channels();\ndiff --git a/src/test/regress/sql/hash_index.sql b/src/test/regress/sql/hash_index.sql\nindex 527024f710..de49c0b85f 100644\n--- a/src/test/regress/sql/hash_index.sql\n+++ b/src/test/regress/sql/hash_index.sql\n@@ -26,10 +26,14 @@ CREATE TABLE hash_f8_heap (\n );\n \n \\set filename :abs_srcdir '/data/hash.data'\n-COPY hash_i4_heap FROM :'filename';\n-COPY hash_name_heap FROM :'filename';\n-COPY hash_txt_heap FROM :'filename';\n-COPY hash_f8_heap FROM :'filename';\n+\\set command '\\\\copy hash_i4_heap FROM ' :'filename';\n+:command\n+\\set command '\\\\copy hash_name_heap FROM ' :'filename';\n+:command\n+\\set command '\\\\copy hash_txt_heap FROM ' :'filename';\n+:command\n+\\set command '\\\\copy hash_f8_heap FROM ' :'filename';\n+:command\n \n -- the data in this file has a lot of duplicates in the index key\n -- fields, leading to long bucket chains and lots of table expansion.\ndiff --git a/src/test/regress/sql/identity.sql b/src/test/regress/sql/identity.sql\nindex 7537258a75..9041e35e34 100644\n--- a/src/test/regress/sql/identity.sql\n+++ b/src/test/regress/sql/identity.sql\n@@ -287,7 +287,7 @@ ALTER TABLE itest7 ALTER COLUMN a RESTART;\n ALTER TABLE itest7 ALTER COLUMN a DROP IDENTITY;\n \n -- privileges\n-CREATE USER regress_identity_user1;\n+CREATE USER regress_identity_user1 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n CREATE TABLE itest8 (a int GENERATED ALWAYS AS IDENTITY, b text);\n GRANT SELECT, INSERT ON itest8 TO regress_identity_user1;\n SET ROLE regress_identity_user1;\ndiff --git a/src/test/regress/sql/inherit.sql b/src/test/regress/sql/inherit.sql\nindex b5b554a125..109889ad24 100644\n--- a/src/test/regress/sql/inherit.sql\n+++ b/src/test/regress/sql/inherit.sql\n@@ -958,7 +958,7 @@ create index on permtest_parent (left(c, 3));\n insert into permtest_parent\n   select 1, 'a', left(fipshash(i::text), 5) from generate_series(0, 100) i;\n analyze permtest_parent;\n-create role regress_no_child_access;\n+create role regress_no_child_access PASSWORD NEON_PASSWORD_PLACEHOLDER;\n revoke all on permtest_grandchild from regress_no_child_access;\n grant select on permtest_parent to regress_no_child_access;\n set session authorization regress_no_child_access;\ndiff --git a/src/test/regress/sql/insert.sql b/src/test/regress/sql/insert.sql\nindex 2b086eeb6d..913d8a0aed 100644\n--- a/src/test/regress/sql/insert.sql\n+++ b/src/test/regress/sql/insert.sql\n@@ -513,7 +513,7 @@ drop table mlparted5;\n create table key_desc (a int, b int) partition by list ((a+0));\n create table key_desc_1 partition of key_desc for values in (1) partition by range (b);\n \n-create user regress_insert_other_user;\n+create user regress_insert_other_user PASSWORD NEON_PASSWORD_PLACEHOLDER;\n grant select (a) on key_desc_1 to regress_insert_other_user;\n grant insert on key_desc to regress_insert_other_user;\n \n@@ -597,7 +597,7 @@ insert into brtrigpartcon1 values (1, 'hi there');\n -- check that the message shows the appropriate column description in a\n -- situation where the partitioned table is not the primary ModifyTable node\n create table inserttest3 (f1 text default 'foo', f2 text default 'bar', f3 int);\n-create role regress_coldesc_role;\n+create role regress_coldesc_role PASSWORD NEON_PASSWORD_PLACEHOLDER;\n grant insert on inserttest3 to regress_coldesc_role;\n grant insert on brtrigpartcon to regress_coldesc_role;\n revoke select on brtrigpartcon from regress_coldesc_role;\ndiff --git a/src/test/regress/sql/jsonb.sql b/src/test/regress/sql/jsonb.sql\nindex 6dae715afd..aa320ba7be 100644\n--- a/src/test/regress/sql/jsonb.sql\n+++ b/src/test/regress/sql/jsonb.sql\n@@ -6,7 +6,8 @@ CREATE TABLE testjsonb (\n );\n \n \\set filename :abs_srcdir '/data/jsonb.data'\n-COPY testjsonb FROM :'filename';\n+\\set command '\\\\copy testjsonb FROM ' :'filename';\n+:command\n \n -- Strings.\n SELECT '\"\"'::jsonb;\t\t\t\t-- OK.\ndiff --git a/src/test/regress/sql/largeobject.sql b/src/test/regress/sql/largeobject.sql\nindex a4aee02e3a..8839c9496a 100644\n--- a/src/test/regress/sql/largeobject.sql\n+++ b/src/test/regress/sql/largeobject.sql\n@@ -10,7 +10,7 @@\n SET bytea_output TO escape;\n \n -- Test ALTER LARGE OBJECT OWNER\n-CREATE ROLE regress_lo_user;\n+CREATE ROLE regress_lo_user PASSWORD NEON_PASSWORD_PLACEHOLDER;\n SELECT lo_create(42);\n ALTER LARGE OBJECT 42 OWNER TO regress_lo_user;\n \n@@ -189,7 +189,8 @@ SELECT lo_unlink(loid) from lotest_stash_values;\n TRUNCATE lotest_stash_values;\n \n \\set filename :abs_srcdir '/data/tenk.data'\n-INSERT INTO lotest_stash_values (loid) SELECT lo_import(:'filename');\n+\\lo_import :filename\n+INSERT INTO lotest_stash_values (loid) VALUES (:LASTOID);\n \n BEGIN;\n UPDATE lotest_stash_values SET fd=lo_open(loid, CAST(x'20000' | x'40000' AS integer));\n@@ -219,8 +220,8 @@ SELECT lo_close(fd) FROM lotest_stash_values;\n END;\n \n \\set filename :abs_builddir '/results/lotest.txt'\n-SELECT lo_export(loid, :'filename') FROM lotest_stash_values;\n-\n+SELECT loid FROM lotest_stash_values \\gset\n+\\lo_export :loid, :filename\n \\lo_import :filename\n \n \\set newloid :LASTOID\ndiff --git a/src/test/regress/sql/lock.sql b/src/test/regress/sql/lock.sql\nindex b88488c6d0..78b31e6dd3 100644\n--- a/src/test/regress/sql/lock.sql\n+++ b/src/test/regress/sql/lock.sql\n@@ -19,7 +19,7 @@ CREATE VIEW lock_view3 AS SELECT * from lock_view2;\n CREATE VIEW lock_view4 AS SELECT (select a from lock_tbl1a limit 1) from lock_tbl1;\n CREATE VIEW lock_view5 AS SELECT * from lock_tbl1 where a in (select * from lock_tbl1a);\n CREATE VIEW lock_view6 AS SELECT * from (select * from lock_tbl1) sub;\n-CREATE ROLE regress_rol_lock1;\n+CREATE ROLE regress_rol_lock1 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n ALTER ROLE regress_rol_lock1 SET search_path = lock_schema1;\n GRANT USAGE ON SCHEMA lock_schema1 TO regress_rol_lock1;\n \ndiff --git a/src/test/regress/sql/matview.sql b/src/test/regress/sql/matview.sql\nindex 235123de1e..58e73cec5d 100644\n--- a/src/test/regress/sql/matview.sql\n+++ b/src/test/regress/sql/matview.sql\n@@ -209,7 +209,7 @@ SELECT * FROM mvtest_mv_v;\n DROP TABLE mvtest_v CASCADE;\n \n -- make sure running as superuser works when MV owned by another role (bug #11208)\n-CREATE ROLE regress_user_mvtest;\n+CREATE ROLE regress_user_mvtest PASSWORD NEON_PASSWORD_PLACEHOLDER;\n SET ROLE regress_user_mvtest;\n -- this test case also checks for ambiguity in the queries issued by\n -- refresh_by_match_merge(), by choosing column names that intentionally\n@@ -264,7 +264,7 @@ ROLLBACK;\n \n -- INSERT privileges if relation owner is not allowed to insert.\n CREATE SCHEMA matview_schema;\n-CREATE USER regress_matview_user;\n+CREATE USER regress_matview_user PASSWORD NEON_PASSWORD_PLACEHOLDER;\n ALTER DEFAULT PRIVILEGES FOR ROLE regress_matview_user\n   REVOKE INSERT ON TABLES FROM regress_matview_user;\n GRANT ALL ON SCHEMA matview_schema TO public;\ndiff --git a/src/test/regress/sql/merge.sql b/src/test/regress/sql/merge.sql\nindex 2a220a248f..91a404d51e 100644\n--- a/src/test/regress/sql/merge.sql\n+++ b/src/test/regress/sql/merge.sql\n@@ -2,9 +2,9 @@\n -- MERGE\n --\n \n-CREATE USER regress_merge_privs;\n-CREATE USER regress_merge_no_privs;\n-CREATE USER regress_merge_none;\n+CREATE USER regress_merge_privs PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_merge_no_privs PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_merge_none PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n DROP TABLE IF EXISTS target;\n DROP TABLE IF EXISTS source;\ndiff --git a/src/test/regress/sql/misc.sql b/src/test/regress/sql/misc.sql\nindex 165a2e175f..08d7096e2c 100644\n--- a/src/test/regress/sql/misc.sql\n+++ b/src/test/regress/sql/misc.sql\n@@ -74,22 +74,26 @@ DROP TABLE tmp;\n -- copy\n --\n \\set filename :abs_builddir '/results/onek.data'\n-COPY onek TO :'filename';\n+\\set command '\\\\copy onek TO ' :'filename';\n+:command\n \n CREATE TEMP TABLE onek_copy (LIKE onek);\n \n-COPY onek_copy FROM :'filename';\n+\\set command '\\\\copy onek_copy FROM ' :'filename';\n+:command\n \n SELECT * FROM onek EXCEPT ALL SELECT * FROM onek_copy;\n \n SELECT * FROM onek_copy EXCEPT ALL SELECT * FROM onek;\n \n \\set filename :abs_builddir '/results/stud_emp.data'\n-COPY BINARY stud_emp TO :'filename';\n+\\set command '\\\\COPY BINARY stud_emp TO ' :'filename';\n+:command\n \n CREATE TEMP TABLE stud_emp_copy (LIKE stud_emp);\n \n-COPY BINARY stud_emp_copy FROM :'filename';\n+\\set command '\\\\COPY BINARY stud_emp_copy FROM ' :'filename';\n+:command\n \n SELECT * FROM stud_emp_copy;\n \ndiff --git a/src/test/regress/sql/misc_functions.sql b/src/test/regress/sql/misc_functions.sql\nindex b57f01f3e9..3e05aa6400 100644\n--- a/src/test/regress/sql/misc_functions.sql\n+++ b/src/test/regress/sql/misc_functions.sql\n@@ -82,7 +82,7 @@ SELECT pg_log_backend_memory_contexts(pg_backend_pid());\n SELECT pg_log_backend_memory_contexts(pid) FROM pg_stat_activity\n   WHERE backend_type = 'checkpointer';\n \n-CREATE ROLE regress_log_memory;\n+CREATE ROLE regress_log_memory PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n SELECT has_function_privilege('regress_log_memory',\n   'pg_log_backend_memory_contexts(integer)', 'EXECUTE'); -- no\n@@ -169,7 +169,7 @@ select count(*) > 0 from\n --\n -- Test replication slot directory functions\n --\n-CREATE ROLE regress_slot_dir_funcs;\n+CREATE ROLE regress_slot_dir_funcs PASSWORD NEON_PASSWORD_PLACEHOLDER;\n -- Not available by default.\n SELECT has_function_privilege('regress_slot_dir_funcs',\n   'pg_ls_logicalsnapdir()', 'EXECUTE');\ndiff --git a/src/test/regress/sql/object_address.sql b/src/test/regress/sql/object_address.sql\nindex 1a6c61f49d..1c31ac6a53 100644\n--- a/src/test/regress/sql/object_address.sql\n+++ b/src/test/regress/sql/object_address.sql\n@@ -7,7 +7,7 @@ SET client_min_messages TO 'warning';\n DROP ROLE IF EXISTS regress_addr_user;\n RESET client_min_messages;\n \n-CREATE USER regress_addr_user;\n+CREATE USER regress_addr_user PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n -- Test generic object addressing/identification functions\n CREATE SCHEMA addr_nsp;\ndiff --git a/src/test/regress/sql/password.sql b/src/test/regress/sql/password.sql\nindex 53e86b0b6c..0303fdfe96 100644\n--- a/src/test/regress/sql/password.sql\n+++ b/src/test/regress/sql/password.sql\n@@ -10,11 +10,11 @@ SET password_encryption = 'scram-sha-256'; -- ok\n \n -- consistency of password entries\n SET password_encryption = 'md5';\n-CREATE ROLE regress_passwd1 PASSWORD 'role_pwd1';\n-CREATE ROLE regress_passwd2 PASSWORD 'role_pwd2';\n+CREATE ROLE regress_passwd1 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_passwd2 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n SET password_encryption = 'scram-sha-256';\n-CREATE ROLE regress_passwd3 PASSWORD 'role_pwd3';\n-CREATE ROLE regress_passwd4 PASSWORD NULL;\n+CREATE ROLE regress_passwd3 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_passwd4 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n -- check list of created entries\n --\n@@ -42,26 +42,18 @@ ALTER ROLE regress_passwd2_new RENAME TO regress_passwd2;\n SET password_encryption = 'md5';\n \n -- encrypt with MD5\n-ALTER ROLE regress_passwd2 PASSWORD 'foo';\n--- already encrypted, use as they are\n-ALTER ROLE regress_passwd1 PASSWORD 'md5cd3578025fe2c3d7ed1b9a9b26238b70';\n-ALTER ROLE regress_passwd3 PASSWORD 'SCRAM-SHA-256$4096:VLK4RMaQLCvNtQ==$6YtlR4t69SguDiwFvbVgVZtuz6gpJQQqUMZ7IQJK5yI=:ps75jrHeYU4lXCcXI4O8oIdJ3eO8o2jirjruw9phBTo=';\n+ALTER ROLE regress_passwd2 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n SET password_encryption = 'scram-sha-256';\n -- create SCRAM secret\n-ALTER ROLE  regress_passwd4 PASSWORD 'foo';\n--- already encrypted with MD5, use as it is\n-CREATE ROLE regress_passwd5 PASSWORD 'md5e73a4b11df52a6068f8b39f90be36023';\n+ALTER ROLE  regress_passwd4 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+-- Neon does not support encrypted passwords, use unencrypted instead\n+CREATE ROLE regress_passwd5 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n--- This looks like a valid SCRAM-SHA-256 secret, but it is not\n--- so it should be hashed with SCRAM-SHA-256.\n-CREATE ROLE regress_passwd6 PASSWORD 'SCRAM-SHA-256$1234';\n--- These may look like valid MD5 secrets, but they are not, so they\n--- should be hashed with SCRAM-SHA-256.\n--- trailing garbage at the end\n-CREATE ROLE regress_passwd7 PASSWORD 'md5012345678901234567890123456789zz';\n--- invalid length\n-CREATE ROLE regress_passwd8 PASSWORD 'md501234567890123456789012345678901zz';\n+-- Neon does not support encrypted passwords, use unencrypted instead\n+CREATE ROLE regress_passwd6 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_passwd7 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_passwd8 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n -- Changing the SCRAM iteration count\n SET scram_iterations = 1024;\n@@ -78,13 +70,10 @@ ALTER ROLE regress_passwd_empty PASSWORD 'md585939a5ce845f1a1b620742e3c659e0a';\n ALTER ROLE regress_passwd_empty PASSWORD 'SCRAM-SHA-256$4096:hpFyHTUsSWcR7O9P$LgZFIt6Oqdo27ZFKbZ2nV+vtnYM995pDh9ca6WSi120=:qVV5NeluNfUPkwm7Vqat25RjSPLkGeoZBQs6wVv+um4=';\n SELECT rolpassword FROM pg_authid WHERE rolname='regress_passwd_empty';\n \n--- Test with invalid stored and server keys.\n---\n--- The first is valid, to act as a control. The others have too long\n--- stored/server keys. They will be re-hashed.\n-CREATE ROLE regress_passwd_sha_len0 PASSWORD 'SCRAM-SHA-256$4096:A6xHKoH/494E941doaPOYg==$Ky+A30sewHIH3VHQLRN9vYsuzlgNyGNKCh37dy96Rqw=:COPdlNiIkrsacU5QoxydEuOH6e/KfiipeETb/bPw8ZI=';\n-CREATE ROLE regress_passwd_sha_len1 PASSWORD 'SCRAM-SHA-256$4096:A6xHKoH/494E941doaPOYg==$Ky+A30sewHIH3VHQLRN9vYsuzlgNyGNKCh37dy96RqwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=:COPdlNiIkrsacU5QoxydEuOH6e/KfiipeETb/bPw8ZI=';\n-CREATE ROLE regress_passwd_sha_len2 PASSWORD 'SCRAM-SHA-256$4096:A6xHKoH/494E941doaPOYg==$Ky+A30sewHIH3VHQLRN9vYsuzlgNyGNKCh37dy96Rqw=:COPdlNiIkrsacU5QoxydEuOH6e/KfiipeETb/bPw8ZIAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=';\n+-- Neon does not support encrypted passwords, use unencrypted instead\n+CREATE ROLE regress_passwd_sha_len0 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_passwd_sha_len1 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_passwd_sha_len2 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n -- Check that the invalid secrets were re-hashed. A re-hashed secret\n -- should not contain the original salt.\ndiff --git a/src/test/regress/sql/privileges.sql b/src/test/regress/sql/privileges.sql\nindex 259f1aedd1..6e1a3d17b7 100644\n--- a/src/test/regress/sql/privileges.sql\n+++ b/src/test/regress/sql/privileges.sql\n@@ -24,18 +24,18 @@ RESET client_min_messages;\n \n -- test proper begins here\n \n-CREATE USER regress_priv_user1;\n-CREATE USER regress_priv_user2;\n-CREATE USER regress_priv_user3;\n-CREATE USER regress_priv_user4;\n-CREATE USER regress_priv_user5;\n-CREATE USER regress_priv_user5;\t-- duplicate\n-CREATE USER regress_priv_user6;\n-CREATE USER regress_priv_user7;\n-CREATE USER regress_priv_user8;\n-CREATE USER regress_priv_user9;\n-CREATE USER regress_priv_user10;\n-CREATE ROLE regress_priv_role;\n+CREATE USER regress_priv_user1 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_priv_user2 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_priv_user3 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_priv_user4 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_priv_user5 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_priv_user5 PASSWORD NEON_PASSWORD_PLACEHOLDER;\t-- duplicate\n+CREATE USER regress_priv_user6 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_priv_user7 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_priv_user8 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_priv_user9 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_priv_user10 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_priv_role PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n -- circular ADMIN OPTION grants should be disallowed\n GRANT regress_priv_user1 TO regress_priv_user2 WITH ADMIN OPTION;\n@@ -84,11 +84,11 @@ DROP ROLE regress_priv_user5; -- should fail, dependency\n DROP ROLE regress_priv_user1, regress_priv_user5; -- ok, despite order\n \n -- recreate the roles we just dropped\n-CREATE USER regress_priv_user1;\n-CREATE USER regress_priv_user2;\n-CREATE USER regress_priv_user3;\n-CREATE USER regress_priv_user4;\n-CREATE USER regress_priv_user5;\n+CREATE USER regress_priv_user1 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_priv_user2 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_priv_user3 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_priv_user4 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_priv_user5 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n GRANT pg_read_all_data TO regress_priv_user6;\n GRANT pg_write_all_data TO regress_priv_user7;\n@@ -163,8 +163,8 @@ DROP USER regress_priv_user10;\n DROP USER regress_priv_user9;\n DROP USER regress_priv_user8;\n \n-CREATE GROUP regress_priv_group1;\n-CREATE GROUP regress_priv_group2 WITH ADMIN regress_priv_user1 USER regress_priv_user2;\n+CREATE GROUP regress_priv_group1 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE GROUP regress_priv_group2 WITH ADMIN regress_priv_user1 PASSWORD NEON_PASSWORD_PLACEHOLDER USER regress_priv_user2;\n \n ALTER GROUP regress_priv_group1 ADD USER regress_priv_user4;\n \n@@ -1160,7 +1160,7 @@ SELECT has_table_privilege('regress_priv_user1', 'atest4', 'SELECT WITH GRANT OP\n \n -- security-restricted operations\n \\c -\n-CREATE ROLE regress_sro_user;\n+CREATE ROLE regress_sro_user PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n -- Check that index expressions and predicates are run as the table's owner\n \n@@ -1656,8 +1656,8 @@ DROP SCHEMA testns CASCADE;\n -- Change owner of the schema & and rename of new schema owner\n \\c -\n \n-CREATE ROLE regress_schemauser1 superuser login;\n-CREATE ROLE regress_schemauser2 superuser login;\n+CREATE ROLE regress_schemauser1 superuser login PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_schemauser2 superuser login PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n SET SESSION ROLE regress_schemauser1;\n CREATE SCHEMA testns;\n@@ -1751,7 +1751,7 @@ DROP USER regress_priv_user8; -- does not exist\n \n \n -- permissions with LOCK TABLE\n-CREATE USER regress_locktable_user;\n+CREATE USER regress_locktable_user PASSWORD NEON_PASSWORD_PLACEHOLDER;\n CREATE TABLE lock_table (a int);\n \n -- LOCK TABLE and SELECT permission\n@@ -1839,7 +1839,7 @@ DROP USER regress_locktable_user;\n -- switch to superuser\n \\c -\n \n-CREATE ROLE regress_readallstats;\n+CREATE ROLE regress_readallstats PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n SELECT has_table_privilege('regress_readallstats','pg_backend_memory_contexts','SELECT'); -- no\n SELECT has_table_privilege('regress_readallstats','pg_shmem_allocations','SELECT'); -- no\n@@ -1859,10 +1859,10 @@ RESET ROLE;\n DROP ROLE regress_readallstats;\n \n -- test role grantor machinery\n-CREATE ROLE regress_group;\n-CREATE ROLE regress_group_direct_manager;\n-CREATE ROLE regress_group_indirect_manager;\n-CREATE ROLE regress_group_member;\n+CREATE ROLE regress_group PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_group_direct_manager PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_group_indirect_manager PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_group_member PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n GRANT regress_group TO regress_group_direct_manager WITH INHERIT FALSE, ADMIN TRUE;\n GRANT regress_group_direct_manager TO regress_group_indirect_manager;\n@@ -1884,9 +1884,9 @@ DROP ROLE regress_group_indirect_manager;\n DROP ROLE regress_group_member;\n \n -- test SET and INHERIT options with object ownership changes\n-CREATE ROLE regress_roleoption_protagonist;\n-CREATE ROLE regress_roleoption_donor;\n-CREATE ROLE regress_roleoption_recipient;\n+CREATE ROLE regress_roleoption_protagonist PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_roleoption_donor PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_roleoption_recipient PASSWORD NEON_PASSWORD_PLACEHOLDER;\n CREATE SCHEMA regress_roleoption;\n GRANT CREATE, USAGE ON SCHEMA regress_roleoption TO PUBLIC;\n GRANT regress_roleoption_donor TO regress_roleoption_protagonist WITH INHERIT TRUE, SET FALSE;\ndiff --git a/src/test/regress/sql/psql.sql b/src/test/regress/sql/psql.sql\nindex f3bc6cd07e..f1a2f58069 100644\n--- a/src/test/regress/sql/psql.sql\n+++ b/src/test/regress/sql/psql.sql\n@@ -496,7 +496,7 @@ select 1 where false;\n \\pset expanded off\n \n CREATE SCHEMA tableam_display;\n-CREATE ROLE regress_display_role;\n+CREATE ROLE regress_display_role PASSWORD NEON_PASSWORD_PLACEHOLDER;\n ALTER SCHEMA tableam_display OWNER TO regress_display_role;\n SET search_path TO tableam_display;\n CREATE ACCESS METHOD heap_psql TYPE TABLE HANDLER heap_tableam_handler;\n@@ -1174,7 +1174,7 @@ select 1/(15-unique2) from tenk1 order by unique2 limit 19;\n \\unset FETCH_COUNT\n \n create schema testpart;\n-create role regress_partitioning_role;\n+create role regress_partitioning_role PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n alter schema testpart owner to regress_partitioning_role;\n \n@@ -1285,7 +1285,7 @@ reset work_mem;\n \n -- check \\df+\n -- we have to use functions with a predictable owner name, so make a role\n-create role regress_psql_user superuser;\n+create role regress_psql_user superuser PASSWORD NEON_PASSWORD_PLACEHOLDER;\n begin;\n set session authorization regress_psql_user;\n \n@@ -1431,11 +1431,14 @@ CREATE TEMPORARY TABLE reload_output(\n );\n \n SELECT 1 AS a \\g :g_out_file\n-COPY reload_output(line) FROM :'g_out_file';\n+\\set command '\\\\COPY reload_output(line) FROM ' :'g_out_file';\n+:command\n SELECT 2 AS b\\; SELECT 3 AS c\\; SELECT 4 AS d \\g :g_out_file\n-COPY reload_output(line) FROM :'g_out_file';\n+\\set command '\\\\COPY reload_output(line) FROM ' :'g_out_file';\n+:command\n COPY (SELECT 'foo') TO STDOUT \\; COPY (SELECT 'bar') TO STDOUT \\g :g_out_file\n-COPY reload_output(line) FROM :'g_out_file';\n+\\set command '\\\\COPY reload_output(line) FROM ' :'g_out_file';\n+:command\n \n SELECT line FROM reload_output ORDER BY lineno;\n TRUNCATE TABLE reload_output;\n@@ -1452,17 +1455,20 @@ SELECT 1 AS a\\; SELECT 2 AS b\\; SELECT 3 AS c;\n -- COPY TO file\n -- The data goes to :g_out_file and the status to :o_out_file\n \\set QUIET false\n-COPY (SELECT unique1 FROM onek ORDER BY unique1 LIMIT 10) TO :'g_out_file';\n+\\set command '\\\\COPY (SELECT unique1 FROM onek ORDER BY unique1 LIMIT 10) TO ' :'g_out_file';\n+:command\n -- DML command status\n UPDATE onek SET unique1 = unique1 WHERE false;\n \\set QUIET true\n \\o\n \n -- Check the contents of the files generated.\n-COPY reload_output(line) FROM :'g_out_file';\n+\\set command '\\\\COPY reload_output(line) FROM ' :'g_out_file';\n+:command\n SELECT line FROM reload_output ORDER BY lineno;\n TRUNCATE TABLE reload_output;\n-COPY reload_output(line) FROM :'o_out_file';\n+\\set command '\\\\COPY reload_output(line) FROM ' :'o_out_file';\n+:command\n SELECT line FROM reload_output ORDER BY lineno;\n TRUNCATE TABLE reload_output;\n \n@@ -1475,10 +1481,12 @@ COPY (SELECT 'foo2') TO STDOUT \\; COPY (SELECT 'bar2') TO STDOUT \\g :g_out_file\n \\o\n \n -- Check the contents of the files generated.\n-COPY reload_output(line) FROM :'g_out_file';\n+\\set command '\\\\COPY reload_output(line) FROM ' :'g_out_file';\n+:command\n SELECT line FROM reload_output ORDER BY lineno;\n TRUNCATE TABLE reload_output;\n-COPY reload_output(line) FROM :'o_out_file';\n+\\set command '\\\\COPY reload_output(line) FROM ' :'o_out_file';\n+:command\n SELECT line FROM reload_output ORDER BY lineno;\n \n DROP TABLE reload_output;\n@@ -1825,10 +1833,10 @@ DROP FUNCTION psql_error;\n \\dX \"no.such.database\".\"no.such.schema\".\"no.such.extended.statistics\"\n \n -- check \\drg and \\du\n-CREATE ROLE regress_du_role0;\n-CREATE ROLE regress_du_role1;\n-CREATE ROLE regress_du_role2;\n-CREATE ROLE regress_du_admin;\n+CREATE ROLE regress_du_role0 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_du_role1 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_du_role2 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_du_admin PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n GRANT regress_du_role0 TO regress_du_admin WITH ADMIN TRUE;\n GRANT regress_du_role1 TO regress_du_admin WITH ADMIN TRUE;\ndiff --git a/src/test/regress/sql/publication.sql b/src/test/regress/sql/publication.sql\nindex d5051a5e74..b32d729271 100644\n--- a/src/test/regress/sql/publication.sql\n+++ b/src/test/regress/sql/publication.sql\n@@ -1,9 +1,9 @@\n --\n -- PUBLICATION\n --\n-CREATE ROLE regress_publication_user LOGIN SUPERUSER;\n-CREATE ROLE regress_publication_user2;\n-CREATE ROLE regress_publication_user_dummy LOGIN NOSUPERUSER;\n+CREATE ROLE regress_publication_user LOGIN SUPERUSER PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_publication_user2 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_publication_user_dummy LOGIN NOSUPERUSER PASSWORD NEON_PASSWORD_PLACEHOLDER;\n SET SESSION AUTHORIZATION 'regress_publication_user';\n \n -- suppress warning that depends on wal_level\n@@ -801,7 +801,7 @@ DROP PUBLICATION testpub2;\n DROP PUBLICATION testpub3;\n \n SET ROLE regress_publication_user;\n-CREATE ROLE regress_publication_user3;\n+CREATE ROLE regress_publication_user3 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n GRANT regress_publication_user2 TO regress_publication_user3;\n SET client_min_messages = 'ERROR';\n CREATE PUBLICATION testpub4 FOR TABLES IN SCHEMA pub_test;\ndiff --git a/src/test/regress/sql/regproc.sql b/src/test/regress/sql/regproc.sql\nindex de2aa881a8..41a675fd35 100644\n--- a/src/test/regress/sql/regproc.sql\n+++ b/src/test/regress/sql/regproc.sql\n@@ -4,7 +4,7 @@\n \n /* If objects exist, return oids */\n \n-CREATE ROLE regress_regrole_test;\n+CREATE ROLE regress_regrole_test PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n -- without schemaname\n \ndiff --git a/src/test/regress/sql/roleattributes.sql b/src/test/regress/sql/roleattributes.sql\nindex c961b2d730..0859b89c4f 100644\n--- a/src/test/regress/sql/roleattributes.sql\n+++ b/src/test/regress/sql/roleattributes.sql\n@@ -1,83 +1,83 @@\n -- default for superuser is false\n-CREATE ROLE regress_test_def_superuser;\n+CREATE ROLE regress_test_def_superuser PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_superuser';\n-CREATE ROLE regress_test_superuser WITH SUPERUSER;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_superuser';\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_superuser';\n+CREATE ROLE regress_test_superuser WITH SUPERUSER PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_superuser';\n ALTER ROLE regress_test_superuser WITH NOSUPERUSER;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_superuser';\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_superuser';\n ALTER ROLE regress_test_superuser WITH SUPERUSER;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_superuser';\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_superuser';\n \n -- default for inherit is true\n-CREATE ROLE regress_test_def_inherit;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_inherit';\n-CREATE ROLE regress_test_inherit WITH NOINHERIT;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_inherit';\n+CREATE ROLE regress_test_def_inherit PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_inherit';\n+CREATE ROLE regress_test_inherit WITH NOINHERIT PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_inherit';\n ALTER ROLE regress_test_inherit WITH INHERIT;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_inherit';\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_inherit';\n ALTER ROLE regress_test_inherit WITH NOINHERIT;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_inherit';\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_inherit';\n \n -- default for create role is false\n-CREATE ROLE regress_test_def_createrole;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_createrole';\n-CREATE ROLE regress_test_createrole WITH CREATEROLE;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_createrole';\n+CREATE ROLE regress_test_def_createrole PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_createrole';\n+CREATE ROLE regress_test_createrole WITH CREATEROLE PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_createrole';\n ALTER ROLE regress_test_createrole WITH NOCREATEROLE;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_createrole';\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_createrole';\n ALTER ROLE regress_test_createrole WITH CREATEROLE;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_createrole';\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_createrole';\n \n -- default for create database is false\n-CREATE ROLE regress_test_def_createdb;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_createdb';\n-CREATE ROLE regress_test_createdb WITH CREATEDB;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_createdb';\n+CREATE ROLE regress_test_def_createdb PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_createdb';\n+CREATE ROLE regress_test_createdb WITH CREATEDB PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_createdb';\n ALTER ROLE regress_test_createdb WITH NOCREATEDB;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_createdb';\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_createdb';\n ALTER ROLE regress_test_createdb WITH CREATEDB;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_createdb';\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_createdb';\n \n -- default for can login is false for role\n-CREATE ROLE regress_test_def_role_canlogin;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_role_canlogin';\n-CREATE ROLE regress_test_role_canlogin WITH LOGIN;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_role_canlogin';\n+CREATE ROLE regress_test_def_role_canlogin PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_role_canlogin';\n+CREATE ROLE regress_test_role_canlogin WITH LOGIN PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_role_canlogin';\n ALTER ROLE regress_test_role_canlogin WITH NOLOGIN;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_role_canlogin';\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_role_canlogin';\n ALTER ROLE regress_test_role_canlogin WITH LOGIN;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_role_canlogin';\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_role_canlogin';\n \n -- default for can login is true for user\n-CREATE USER regress_test_def_user_canlogin;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_user_canlogin';\n-CREATE USER regress_test_user_canlogin WITH NOLOGIN;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_user_canlogin';\n+CREATE USER regress_test_def_user_canlogin PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_user_canlogin';\n+CREATE USER regress_test_user_canlogin WITH NOLOGIN PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_user_canlogin';\n ALTER USER regress_test_user_canlogin WITH LOGIN;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_user_canlogin';\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_user_canlogin';\n ALTER USER regress_test_user_canlogin WITH NOLOGIN;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_user_canlogin';\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_user_canlogin';\n \n -- default for replication is false\n-CREATE ROLE regress_test_def_replication;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_replication';\n-CREATE ROLE regress_test_replication WITH REPLICATION;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_replication';\n+CREATE ROLE regress_test_def_replication PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_replication';\n+CREATE ROLE regress_test_replication WITH REPLICATION PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_replication';\n ALTER ROLE regress_test_replication WITH NOREPLICATION;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_replication';\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_replication';\n ALTER ROLE regress_test_replication WITH REPLICATION;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_replication';\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_replication';\n \n -- default for bypassrls is false\n-CREATE ROLE regress_test_def_bypassrls;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_bypassrls';\n-CREATE ROLE regress_test_bypassrls WITH BYPASSRLS;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_bypassrls';\n+CREATE ROLE regress_test_def_bypassrls PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_bypassrls';\n+CREATE ROLE regress_test_bypassrls WITH BYPASSRLS PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_bypassrls';\n ALTER ROLE regress_test_bypassrls WITH NOBYPASSRLS;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_bypassrls';\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_bypassrls';\n ALTER ROLE regress_test_bypassrls WITH BYPASSRLS;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_bypassrls';\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_bypassrls';\n \n -- clean up roles\n DROP ROLE regress_test_def_superuser;\ndiff --git a/src/test/regress/sql/rowsecurity.sql b/src/test/regress/sql/rowsecurity.sql\nindex d3bfd53e23..919ce1d0c6 100644\n--- a/src/test/regress/sql/rowsecurity.sql\n+++ b/src/test/regress/sql/rowsecurity.sql\n@@ -20,13 +20,13 @@ DROP SCHEMA IF EXISTS regress_rls_schema CASCADE;\n RESET client_min_messages;\n \n -- initial setup\n-CREATE USER regress_rls_alice NOLOGIN;\n-CREATE USER regress_rls_bob NOLOGIN;\n-CREATE USER regress_rls_carol NOLOGIN;\n-CREATE USER regress_rls_dave NOLOGIN;\n-CREATE USER regress_rls_exempt_user BYPASSRLS NOLOGIN;\n-CREATE ROLE regress_rls_group1 NOLOGIN;\n-CREATE ROLE regress_rls_group2 NOLOGIN;\n+CREATE USER regress_rls_alice NOLOGIN PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_rls_bob NOLOGIN PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_rls_carol NOLOGIN PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_rls_dave NOLOGIN PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_rls_exempt_user BYPASSRLS NOLOGIN PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_rls_group1 NOLOGIN PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_rls_group2 NOLOGIN PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n GRANT regress_rls_group1 TO regress_rls_bob;\n GRANT regress_rls_group2 TO regress_rls_carol;\n@@ -2065,8 +2065,8 @@ SELECT count(*) = 0 FROM pg_depend\n -- DROP OWNED BY testing\n RESET SESSION AUTHORIZATION;\n \n-CREATE ROLE regress_rls_dob_role1;\n-CREATE ROLE regress_rls_dob_role2;\n+CREATE ROLE regress_rls_dob_role1 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_rls_dob_role2 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n CREATE TABLE dob_t1 (c1 int);\n CREATE TABLE dob_t2 (c1 int) PARTITION BY RANGE (c1);\ndiff --git a/src/test/regress/sql/rules.sql b/src/test/regress/sql/rules.sql\nindex 8b7e255dcd..c58d095c05 100644\n--- a/src/test/regress/sql/rules.sql\n+++ b/src/test/regress/sql/rules.sql\n@@ -1356,7 +1356,7 @@ DROP TABLE ruletest2;\n -- Test non-SELECT rule on security invoker view.\n -- Should use view owner's permissions.\n --\n-CREATE USER regress_rule_user1;\n+CREATE USER regress_rule_user1 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n CREATE TABLE ruletest_t1 (x int);\n CREATE TABLE ruletest_t2 (x int);\ndiff --git a/src/test/regress/sql/security_label.sql b/src/test/regress/sql/security_label.sql\nindex 98e6a5f211..68c868fef2 100644\n--- a/src/test/regress/sql/security_label.sql\n+++ b/src/test/regress/sql/security_label.sql\n@@ -10,8 +10,8 @@ DROP ROLE IF EXISTS regress_seclabel_user2;\n \n RESET client_min_messages;\n \n-CREATE USER regress_seclabel_user1 WITH CREATEROLE;\n-CREATE USER regress_seclabel_user2;\n+CREATE USER regress_seclabel_user1 WITH CREATEROLE PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_seclabel_user2 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n CREATE TABLE seclabel_tbl1 (a int, b text);\n CREATE TABLE seclabel_tbl2 (x int, y text);\ndiff --git a/src/test/regress/sql/select_into.sql b/src/test/regress/sql/select_into.sql\nindex 689c448cc2..223ceb1d75 100644\n--- a/src/test/regress/sql/select_into.sql\n+++ b/src/test/regress/sql/select_into.sql\n@@ -20,7 +20,7 @@ DROP TABLE sitmp1;\n -- SELECT INTO and INSERT permission, if owner is not allowed to insert.\n --\n CREATE SCHEMA selinto_schema;\n-CREATE USER regress_selinto_user;\n+CREATE USER regress_selinto_user PASSWORD NEON_PASSWORD_PLACEHOLDER;\n ALTER DEFAULT PRIVILEGES FOR ROLE regress_selinto_user\n \t  REVOKE INSERT ON TABLES FROM regress_selinto_user;\n GRANT ALL ON SCHEMA selinto_schema TO public;\ndiff --git a/src/test/regress/sql/select_parallel.sql b/src/test/regress/sql/select_parallel.sql\nindex 33d78e16dc..cb193c9b27 100644\n--- a/src/test/regress/sql/select_parallel.sql\n+++ b/src/test/regress/sql/select_parallel.sql\n@@ -464,7 +464,7 @@ SELECT 1 FROM tenk1_vw_sec\n rollback;\n \n -- test that function option SET ROLE works in parallel workers.\n-create role regress_parallel_worker;\n+create role regress_parallel_worker PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n create function set_and_report_role() returns text as\n   $$ select current_setting('role') $$ language sql parallel safe\ndiff --git a/src/test/regress/sql/select_views.sql b/src/test/regress/sql/select_views.sql\nindex e742f13699..7bd0255df8 100644\n--- a/src/test/regress/sql/select_views.sql\n+++ b/src/test/regress/sql/select_views.sql\n@@ -12,7 +12,7 @@ SELECT * FROM toyemp WHERE name = 'sharon';\n --\n -- Test for Leaky view scenario\n --\n-CREATE ROLE regress_alice;\n+CREATE ROLE regress_alice PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n CREATE FUNCTION f_leak (text)\n        RETURNS bool LANGUAGE 'plpgsql' COST 0.0000001\ndiff --git a/src/test/regress/sql/sequence.sql b/src/test/regress/sql/sequence.sql\nindex 793f1415f6..ec07c1f193 100644\n--- a/src/test/regress/sql/sequence.sql\n+++ b/src/test/regress/sql/sequence.sql\n@@ -293,7 +293,7 @@ ROLLBACK;\n \n -- privileges tests\n \n-CREATE USER regress_seq_user;\n+CREATE USER regress_seq_user PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n -- nextval\n BEGIN;\ndiff --git a/src/test/regress/sql/stats.sql b/src/test/regress/sql/stats.sql\nindex 1e21e55c6d..2251f50c5e 100644\n--- a/src/test/regress/sql/stats.sql\n+++ b/src/test/regress/sql/stats.sql\n@@ -622,23 +622,6 @@ SELECT :io_sum_shared_after_writes > :io_sum_shared_before_writes;\n SELECT current_setting('fsync') = 'off'\n   OR :io_sum_shared_after_fsyncs > :io_sum_shared_before_fsyncs;\n \n--- Change the tablespace so that the table is rewritten directly, then SELECT\n--- from it to cause it to be read back into shared buffers.\n-SELECT sum(reads) AS io_sum_shared_before_reads\n-  FROM pg_stat_io WHERE context = 'normal' AND object = 'relation' \\gset\n--- Do this in a transaction to prevent spurious failures due to concurrent accesses to our newly\n--- rewritten table, e.g. by autovacuum.\n-BEGIN;\n-ALTER TABLE test_io_shared SET TABLESPACE regress_tblspace;\n--- SELECT from the table so that the data is read into shared buffers and\n--- context 'normal', object 'relation' reads are counted.\n-SELECT COUNT(*) FROM test_io_shared;\n-COMMIT;\n-SELECT pg_stat_force_next_flush();\n-SELECT sum(reads) AS io_sum_shared_after_reads\n-  FROM pg_stat_io WHERE context = 'normal' AND object = 'relation'  \\gset\n-SELECT :io_sum_shared_after_reads > :io_sum_shared_before_reads;\n-\n SELECT sum(hits) AS io_sum_shared_before_hits\n   FROM pg_stat_io WHERE context = 'normal' AND object = 'relation' \\gset\n -- Select from the table again to count hits.\ndiff --git a/src/test/regress/sql/stats_ext.sql b/src/test/regress/sql/stats_ext.sql\nindex 1b80d3687b..4d8798b0b1 100644\n--- a/src/test/regress/sql/stats_ext.sql\n+++ b/src/test/regress/sql/stats_ext.sql\n@@ -50,7 +50,7 @@ DROP TABLE ext_stats_test;\n CREATE TABLE ab1 (a INTEGER, b INTEGER, c INTEGER);\n CREATE STATISTICS IF NOT EXISTS ab1_a_b_stats ON a, b FROM ab1;\n COMMENT ON STATISTICS ab1_a_b_stats IS 'new comment';\n-CREATE ROLE regress_stats_ext;\n+CREATE ROLE regress_stats_ext PASSWORD NEON_PASSWORD_PLACEHOLDER;\n SET SESSION AUTHORIZATION regress_stats_ext;\n COMMENT ON STATISTICS ab1_a_b_stats IS 'changed comment';\n DROP STATISTICS ab1_a_b_stats;\n@@ -1607,7 +1607,7 @@ drop statistics stts_t1_expr_expr_stat;\n set search_path to public, stts_s1;\n \\dX\n \n-create role regress_stats_ext nosuperuser;\n+create role regress_stats_ext nosuperuser PASSWORD NEON_PASSWORD_PLACEHOLDER;\n set role regress_stats_ext;\n \\dX\n reset role;\n@@ -1618,7 +1618,7 @@ drop user regress_stats_ext;\n reset search_path;\n \n -- User with no access\n-CREATE USER regress_stats_user1;\n+CREATE USER regress_stats_user1 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n GRANT USAGE ON SCHEMA tststats TO regress_stats_user1;\n SET SESSION AUTHORIZATION regress_stats_user1;\n SELECT * FROM tststats.priv_test_tbl; -- Permission denied\ndiff --git a/src/test/regress/sql/subscription.sql b/src/test/regress/sql/subscription.sql\nindex 444e563ff3..1a538a98a0 100644\n--- a/src/test/regress/sql/subscription.sql\n+++ b/src/test/regress/sql/subscription.sql\n@@ -2,10 +2,10 @@\n -- SUBSCRIPTION\n --\n \n-CREATE ROLE regress_subscription_user LOGIN SUPERUSER;\n-CREATE ROLE regress_subscription_user2;\n-CREATE ROLE regress_subscription_user3 IN ROLE pg_create_subscription;\n-CREATE ROLE regress_subscription_user_dummy LOGIN NOSUPERUSER;\n+CREATE ROLE regress_subscription_user LOGIN SUPERUSER PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_subscription_user2 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_subscription_user3 PASSWORD NEON_PASSWORD_PLACEHOLDER IN ROLE pg_create_subscription;\n+CREATE ROLE regress_subscription_user_dummy LOGIN NOSUPERUSER PASSWORD NEON_PASSWORD_PLACEHOLDER;\n SET SESSION AUTHORIZATION 'regress_subscription_user';\n \n -- fail - no publications\ndiff --git a/src/test/regress/sql/test_setup.sql b/src/test/regress/sql/test_setup.sql\nindex 1b2d434683..b765c748b8 100644\n--- a/src/test/regress/sql/test_setup.sql\n+++ b/src/test/regress/sql/test_setup.sql\n@@ -135,7 +135,8 @@ CREATE TABLE onek (\n );\n \n \\set filename :abs_srcdir '/data/onek.data'\n-COPY onek FROM :'filename';\n+\\set command '\\\\copy onek FROM ' :'filename';\n+:command\n VACUUM ANALYZE onek;\n \n CREATE TABLE onek2 AS SELECT * FROM onek;\n@@ -161,7 +162,8 @@ CREATE TABLE tenk1 (\n );\n \n \\set filename :abs_srcdir '/data/tenk.data'\n-COPY tenk1 FROM :'filename';\n+\\set command '\\\\copy tenk1 FROM ' :'filename';\n+:command\n VACUUM ANALYZE tenk1;\n \n CREATE TABLE tenk2 AS SELECT * FROM tenk1;\n@@ -174,7 +176,8 @@ CREATE TABLE person (\n );\n \n \\set filename :abs_srcdir '/data/person.data'\n-COPY person FROM :'filename';\n+\\set command '\\\\copy person FROM ' :'filename';\n+:command\n VACUUM ANALYZE person;\n \n CREATE TABLE emp (\n@@ -183,7 +186,8 @@ CREATE TABLE emp (\n ) INHERITS (person);\n \n \\set filename :abs_srcdir '/data/emp.data'\n-COPY emp FROM :'filename';\n+\\set command '\\\\copy emp FROM ' :'filename';\n+:command\n VACUUM ANALYZE emp;\n \n CREATE TABLE student (\n@@ -191,7 +195,8 @@ CREATE TABLE student (\n ) INHERITS (person);\n \n \\set filename :abs_srcdir '/data/student.data'\n-COPY student FROM :'filename';\n+\\set command '\\\\copy student FROM ' :'filename';\n+:command\n VACUUM ANALYZE student;\n \n CREATE TABLE stud_emp (\n@@ -199,7 +204,8 @@ CREATE TABLE stud_emp (\n ) INHERITS (emp, student);\n \n \\set filename :abs_srcdir '/data/stud_emp.data'\n-COPY stud_emp FROM :'filename';\n+\\set command '\\\\copy stud_emp FROM ' :'filename';\n+:command\n VACUUM ANALYZE stud_emp;\n \n CREATE TABLE road (\n@@ -208,7 +214,8 @@ CREATE TABLE road (\n );\n \n \\set filename :abs_srcdir '/data/streets.data'\n-COPY road FROM :'filename';\n+\\set command '\\\\copy road FROM ' :'filename';\n+:command\n VACUUM ANALYZE road;\n \n CREATE TABLE ihighway () INHERITS (road);\ndiff --git a/src/test/regress/sql/tsearch.sql b/src/test/regress/sql/tsearch.sql\nindex fbd26cdba4..7ec2d78eee 100644\n--- a/src/test/regress/sql/tsearch.sql\n+++ b/src/test/regress/sql/tsearch.sql\n@@ -49,7 +49,8 @@ CREATE TABLE test_tsvector(\n );\n \n \\set filename :abs_srcdir '/data/tsearch.data'\n-COPY test_tsvector FROM :'filename';\n+\\set command '\\\\copy test_tsvector FROM ' :'filename';\n+:command\n \n ANALYZE test_tsvector;\n \ndiff --git a/src/test/regress/sql/updatable_views.sql b/src/test/regress/sql/updatable_views.sql\nindex 0a3176e25d..7744ef68f5 100644\n--- a/src/test/regress/sql/updatable_views.sql\n+++ b/src/test/regress/sql/updatable_views.sql\n@@ -425,9 +425,9 @@ DROP TABLE base_tbl CASCADE;\n \n -- permissions checks\n \n-CREATE USER regress_view_user1;\n-CREATE USER regress_view_user2;\n-CREATE USER regress_view_user3;\n+CREATE USER regress_view_user1 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_view_user2 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_view_user3 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n SET SESSION AUTHORIZATION regress_view_user1;\n CREATE TABLE base_tbl(a int, b text, c float);\n@@ -1586,8 +1586,8 @@ drop view uv_iocu_view;\n drop table uv_iocu_tab;\n \n -- ON CONFLICT DO UPDATE permissions checks\n-create user regress_view_user1;\n-create user regress_view_user2;\n+create user regress_view_user1 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+create user regress_view_user2 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n set session authorization regress_view_user1;\n create table base_tbl(a int unique, b text, c float);\ndiff --git a/src/test/regress/sql/update.sql b/src/test/regress/sql/update.sql\nindex 7a7bee77b9..07b480cd59 100644\n--- a/src/test/regress/sql/update.sql\n+++ b/src/test/regress/sql/update.sql\n@@ -339,7 +339,7 @@ DROP FUNCTION func_parted_mod_b();\n -----------------------------------------\n \n ALTER TABLE range_parted ENABLE ROW LEVEL SECURITY;\n-CREATE USER regress_range_parted_user;\n+CREATE USER regress_range_parted_user PASSWORD NEON_PASSWORD_PLACEHOLDER;\n GRANT ALL ON range_parted, mintab TO regress_range_parted_user;\n CREATE POLICY seeall ON range_parted AS PERMISSIVE FOR SELECT USING (true);\n CREATE POLICY policy_range_parted ON range_parted for UPDATE USING (true) WITH CHECK (c % 2 = 0);\ndiff --git a/src/test/regress/sql/vacuum.sql b/src/test/regress/sql/vacuum.sql\nindex ae36b54641..5612b8e162 100644\n--- a/src/test/regress/sql/vacuum.sql\n+++ b/src/test/regress/sql/vacuum.sql\n@@ -335,7 +335,7 @@ CREATE TABLE vacowned (a int);\n CREATE TABLE vacowned_parted (a int) PARTITION BY LIST (a);\n CREATE TABLE vacowned_part1 PARTITION OF vacowned_parted FOR VALUES IN (1);\n CREATE TABLE vacowned_part2 PARTITION OF vacowned_parted FOR VALUES IN (2);\n-CREATE ROLE regress_vacuum;\n+CREATE ROLE regress_vacuum PASSWORD NEON_PASSWORD_PLACEHOLDER;\n SET ROLE regress_vacuum;\n -- Simple table\n VACUUM vacowned;\n"
  },
  {
    "path": "compute/patches/cloud_regress_pg17.patch",
    "content": "diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out\nindex 1c1ca7573a..6dfe537647 100644\n--- a/src/test/regress/expected/aggregates.out\n+++ b/src/test/regress/expected/aggregates.out\n@@ -11,7 +11,8 @@ CREATE TABLE aggtest (\n \tb\t\t\tfloat4\n );\n \\set filename :abs_srcdir '/data/agg.data'\n-COPY aggtest FROM :'filename';\n+\\set command '\\\\copy aggtest FROM ' :'filename';\n+:command\n ANALYZE aggtest;\n SELECT avg(four) AS avg_1 FROM onek;\n        avg_1        \ndiff --git a/src/test/regress/expected/alter_generic.out b/src/test/regress/expected/alter_generic.out\nindex ae54cb254f..888e2ee8bc 100644\n--- a/src/test/regress/expected/alter_generic.out\n+++ b/src/test/regress/expected/alter_generic.out\n@@ -15,9 +15,9 @@ DROP ROLE IF EXISTS regress_alter_generic_user1;\n DROP ROLE IF EXISTS regress_alter_generic_user2;\n DROP ROLE IF EXISTS regress_alter_generic_user3;\n RESET client_min_messages;\n-CREATE USER regress_alter_generic_user3;\n-CREATE USER regress_alter_generic_user2;\n-CREATE USER regress_alter_generic_user1 IN ROLE regress_alter_generic_user3;\n+CREATE USER regress_alter_generic_user3 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_alter_generic_user2 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_alter_generic_user1 PASSWORD NEON_PASSWORD_PLACEHOLDER IN ROLE regress_alter_generic_user3;\n CREATE SCHEMA alt_nsp1;\n CREATE SCHEMA alt_nsp2;\n GRANT ALL ON SCHEMA alt_nsp1, alt_nsp2 TO public;\n@@ -370,7 +370,7 @@ ERROR:  STORAGE cannot be specified in ALTER OPERATOR FAMILY\n DROP OPERATOR FAMILY alt_opf4 USING btree;\n -- Should fail. Need to be SUPERUSER to do ALTER OPERATOR FAMILY .. ADD / DROP\n BEGIN TRANSACTION;\n-CREATE ROLE regress_alter_generic_user5 NOSUPERUSER;\n+CREATE ROLE regress_alter_generic_user5 PASSWORD NEON_PASSWORD_PLACEHOLDER NOSUPERUSER;\n CREATE OPERATOR FAMILY alt_opf5 USING btree;\n SET ROLE regress_alter_generic_user5;\n ALTER OPERATOR FAMILY alt_opf5 USING btree ADD OPERATOR 1 < (int4, int2), FUNCTION 1 btint42cmp(int4, int2);\n@@ -382,7 +382,7 @@ ERROR:  current transaction is aborted, commands ignored until end of transactio\n ROLLBACK;\n -- Should fail. Need rights to namespace for ALTER OPERATOR FAMILY .. ADD / DROP\n BEGIN TRANSACTION;\n-CREATE ROLE regress_alter_generic_user6;\n+CREATE ROLE regress_alter_generic_user6 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n CREATE SCHEMA alt_nsp6;\n REVOKE ALL ON SCHEMA alt_nsp6 FROM regress_alter_generic_user6;\n CREATE OPERATOR FAMILY alt_nsp6.alt_opf6 USING btree;\ndiff --git a/src/test/regress/expected/alter_operator.out b/src/test/regress/expected/alter_operator.out\nindex 4217ba15de..d28e3ff86e 100644\n--- a/src/test/regress/expected/alter_operator.out\n+++ b/src/test/regress/expected/alter_operator.out\n@@ -119,7 +119,7 @@ ERROR:  operator attribute \"Restrict\" not recognized\n --\n -- Test permission check. Must be owner to ALTER OPERATOR.\n --\n-CREATE USER regress_alter_op_user;\n+CREATE USER regress_alter_op_user PASSWORD NEON_PASSWORD_PLACEHOLDER;\n SET SESSION AUTHORIZATION regress_alter_op_user;\n ALTER OPERATOR === (boolean, boolean) SET (RESTRICT = NONE);\n ERROR:  must be owner of operator ===\ndiff --git a/src/test/regress/expected/alter_table.out b/src/test/regress/expected/alter_table.out\nindex 6de74a26a9..cd59809194 100644\n--- a/src/test/regress/expected/alter_table.out\n+++ b/src/test/regress/expected/alter_table.out\n@@ -5,7 +5,7 @@\n SET client_min_messages TO 'warning';\n DROP ROLE IF EXISTS regress_alter_table_user1;\n RESET client_min_messages;\n-CREATE USER regress_alter_table_user1;\n+CREATE USER regress_alter_table_user1 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n --\n -- add attribute\n --\n@@ -3928,8 +3928,8 @@ DROP TABLE fail_part;\n ALTER TABLE list_parted ATTACH PARTITION nonexistent FOR VALUES IN (1);\n ERROR:  relation \"nonexistent\" does not exist\n -- check ownership of the source table\n-CREATE ROLE regress_test_me;\n-CREATE ROLE regress_test_not_me;\n+CREATE ROLE regress_test_me PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_test_not_me PASSWORD NEON_PASSWORD_PLACEHOLDER;\n CREATE TABLE not_owned_by_me (LIKE list_parted);\n ALTER TABLE not_owned_by_me OWNER TO regress_test_not_me;\n SET SESSION AUTHORIZATION regress_test_me;\ndiff --git a/src/test/regress/expected/arrays.out b/src/test/regress/expected/arrays.out\nindex a6d81fd5f9..afefd761cb 100644\n--- a/src/test/regress/expected/arrays.out\n+++ b/src/test/regress/expected/arrays.out\n@@ -18,7 +18,8 @@ CREATE TABLE array_op_test (\n \tt\t\t\ttext[]\n );\n \\set filename :abs_srcdir '/data/array.data'\n-COPY array_op_test FROM :'filename';\n+\\set command '\\\\copy array_op_test FROM ' :'filename';\n+:command\n ANALYZE array_op_test;\n --\n -- only the 'e' array is 0-based, the others are 1-based.\ndiff --git a/src/test/regress/expected/btree_index.out b/src/test/regress/expected/btree_index.out\nindex 510646cbce..0b3ca1f720 100644\n--- a/src/test/regress/expected/btree_index.out\n+++ b/src/test/regress/expected/btree_index.out\n@@ -20,13 +20,17 @@ CREATE TABLE bt_f8_heap (\n \trandom \t\tint4\n );\n \\set filename :abs_srcdir '/data/desc.data'\n-COPY bt_i4_heap FROM :'filename';\n+\\set command '\\\\copy bt_i4_heap FROM ' :'filename';\n+:command\n \\set filename :abs_srcdir '/data/hash.data'\n-COPY bt_name_heap FROM :'filename';\n+\\set command '\\\\copy bt_name_heap FROM ' :'filename';\n+:command\n \\set filename :abs_srcdir '/data/desc.data'\n-COPY bt_txt_heap FROM :'filename';\n+\\set command '\\\\copy bt_txt_heap FROM ' :'filename';\n+:command\n \\set filename :abs_srcdir '/data/hash.data'\n-COPY bt_f8_heap FROM :'filename';\n+\\set command '\\\\copy bt_f8_heap FROM ' :'filename';\n+:command\n ANALYZE bt_i4_heap;\n ANALYZE bt_name_heap;\n ANALYZE bt_txt_heap;\ndiff --git a/src/test/regress/expected/cluster.out b/src/test/regress/expected/cluster.out\nindex a13aafff0b..f0289b5c06 100644\n--- a/src/test/regress/expected/cluster.out\n+++ b/src/test/regress/expected/cluster.out\n@@ -308,7 +308,7 @@ WHERE pg_class.oid=indexrelid\n -- Verify that toast tables are clusterable\n CLUSTER pg_toast.pg_toast_826 USING pg_toast_826_index;\n -- Verify that clustering all tables does in fact cluster the right ones\n-CREATE USER regress_clstr_user;\n+CREATE USER regress_clstr_user PASSWORD NEON_PASSWORD_PLACEHOLDER;\n CREATE TABLE clstr_1 (a INT PRIMARY KEY);\n CREATE TABLE clstr_2 (a INT PRIMARY KEY);\n CREATE TABLE clstr_3 (a INT PRIMARY KEY);\n@@ -499,7 +499,7 @@ DROP TABLE clstrpart;\n CREATE TABLE ptnowner(i int unique) PARTITION BY LIST (i);\n CREATE INDEX ptnowner_i_idx ON ptnowner(i);\n CREATE TABLE ptnowner1 PARTITION OF ptnowner FOR VALUES IN (1);\n-CREATE ROLE regress_ptnowner;\n+CREATE ROLE regress_ptnowner PASSWORD NEON_PASSWORD_PLACEHOLDER;\n CREATE TABLE ptnowner2 PARTITION OF ptnowner FOR VALUES IN (2);\n ALTER TABLE ptnowner1 OWNER TO regress_ptnowner;\n SET SESSION AUTHORIZATION regress_ptnowner;\ndiff --git a/src/test/regress/expected/collate.icu.utf8.out b/src/test/regress/expected/collate.icu.utf8.out\nindex 7a425afe1f..2756fb2d55 100644\n--- a/src/test/regress/expected/collate.icu.utf8.out\n+++ b/src/test/regress/expected/collate.icu.utf8.out\n@@ -1016,7 +1016,7 @@ select * from collate_test1 where b ilike 'ABC';\n \n reset enable_seqscan;\n -- schema manipulation commands\n-CREATE ROLE regress_test_role;\n+CREATE ROLE regress_test_role PASSWORD NEON_PASSWORD_PLACEHOLDER;\n CREATE SCHEMA test_schema;\n -- We need to do this this way to cope with varying names for encodings:\n SET client_min_messages TO WARNING;\ndiff --git a/src/test/regress/expected/constraints.out b/src/test/regress/expected/constraints.out\nindex cf0b80d616..e8e2a14a4a 100644\n--- a/src/test/regress/expected/constraints.out\n+++ b/src/test/regress/expected/constraints.out\n@@ -349,7 +349,8 @@ CREATE TABLE COPY_TBL (x INT, y TEXT, z INT,\n \tCONSTRAINT COPY_CON\n \tCHECK (x > 3 AND y <> 'check failed' AND x < 7 ));\n \\set filename :abs_srcdir '/data/constro.data'\n-COPY COPY_TBL FROM :'filename';\n+\\set command '\\\\copy COPY_TBL FROM ' :'filename';\n+:command\n SELECT * FROM COPY_TBL;\n  x |       y       | z \n ---+---------------+---\n@@ -358,7 +359,8 @@ SELECT * FROM COPY_TBL;\n (2 rows)\n \n \\set filename :abs_srcdir '/data/constrf.data'\n-COPY COPY_TBL FROM :'filename';\n+\\set command '\\\\copy COPY_TBL FROM ' :'filename';\n+:command\n ERROR:  new row for relation \"copy_tbl\" violates check constraint \"copy_con\"\n DETAIL:  Failing row contains (7, check failed, 6).\n CONTEXT:  COPY copy_tbl, line 2: \"7\tcheck failed\t6\"\n@@ -799,7 +801,7 @@ DETAIL:  Key (f1)=(3) conflicts with key (f1)=(3).\n DROP TABLE deferred_excl;\n -- Comments\n -- Setup a low-level role to enforce non-superuser checks.\n-CREATE ROLE regress_constraint_comments;\n+CREATE ROLE regress_constraint_comments PASSWORD NEON_PASSWORD_PLACEHOLDER;\n SET SESSION AUTHORIZATION regress_constraint_comments;\n CREATE TABLE constraint_comments_tbl (a int CONSTRAINT the_constraint CHECK (a > 0));\n CREATE DOMAIN constraint_comments_dom AS int CONSTRAINT the_constraint CHECK (value > 0);\n@@ -819,7 +821,7 @@ COMMENT ON CONSTRAINT the_constraint ON constraint_comments_tbl IS NULL;\n COMMENT ON CONSTRAINT the_constraint ON DOMAIN constraint_comments_dom IS NULL;\n -- unauthorized user\n RESET SESSION AUTHORIZATION;\n-CREATE ROLE regress_constraint_comments_noaccess;\n+CREATE ROLE regress_constraint_comments_noaccess PASSWORD NEON_PASSWORD_PLACEHOLDER;\n SET SESSION AUTHORIZATION regress_constraint_comments_noaccess;\n COMMENT ON CONSTRAINT the_constraint ON constraint_comments_tbl IS 'no, the comment';\n ERROR:  must be owner of relation constraint_comments_tbl\ndiff --git a/src/test/regress/expected/conversion.out b/src/test/regress/expected/conversion.out\nindex d785f92561..16377e5ac9 100644\n--- a/src/test/regress/expected/conversion.out\n+++ b/src/test/regress/expected/conversion.out\n@@ -15,7 +15,7 @@ SELECT FROM test_enc_setup();\n CREATE FUNCTION test_enc_conversion(bytea, name, name, bool, validlen OUT int, result OUT bytea)\n     AS :'regresslib', 'test_enc_conversion'\n     LANGUAGE C STRICT;\n-CREATE USER regress_conversion_user WITH NOCREATEDB NOCREATEROLE;\n+CREATE USER regress_conversion_user WITH NOCREATEDB NOCREATEROLE PASSWORD NEON_PASSWORD_PLACEHOLDER;\n SET SESSION AUTHORIZATION regress_conversion_user;\n CREATE CONVERSION myconv FOR 'LATIN1' TO 'UTF8' FROM iso8859_1_to_utf8;\n --\ndiff --git a/src/test/regress/expected/copy.out b/src/test/regress/expected/copy.out\nindex 44114089a6..fc1894a0f2 100644\n--- a/src/test/regress/expected/copy.out\n+++ b/src/test/regress/expected/copy.out\n@@ -15,9 +15,11 @@ insert into copytest values('Unix',E'abc\\ndef',2);\n insert into copytest values('Mac',E'abc\\rdef',3);\n insert into copytest values(E'esc\\\\ape',E'a\\\\r\\\\\\r\\\\\\n\\\\nb',4);\n \\set filename :abs_builddir '/results/copytest.csv'\n-copy copytest to :'filename' csv;\n+\\set command '\\\\copy copytest to ' :'filename' csv;\n+:command\n create temp table copytest2 (like copytest);\n-copy copytest2 from :'filename' csv;\n+\\set command '\\\\copy copytest2 from ' :'filename' csv;\n+:command\n select * from copytest except select * from copytest2;\n  style | test | filler \n -------+------+--------\n@@ -25,8 +27,10 @@ select * from copytest except select * from copytest2;\n \n truncate copytest2;\n --- same test but with an escape char different from quote char\n-copy copytest to :'filename' csv quote '''' escape E'\\\\';\n-copy copytest2 from :'filename' csv quote '''' escape E'\\\\';\n+\\set command '\\\\copy copytest to ' :'filename' ' csv quote ' '\\'\\'\\'\\'' ' escape ' 'E\\'' '\\\\\\\\\\'';\n+:command\n+\\set command '\\\\copy copytest2 from ' :'filename' ' csv quote ' '\\'\\'\\'\\'' ' escape ' 'E\\'' '\\\\\\\\\\'';\n+:command\n select * from copytest except select * from copytest2;\n  style | test | filler \n -------+------+--------\n@@ -66,13 +70,16 @@ insert into parted_copytest select x,1,'One' from generate_series(1,1000) x;\n insert into parted_copytest select x,2,'Two' from generate_series(1001,1010) x;\n insert into parted_copytest select x,1,'One' from generate_series(1011,1020) x;\n \\set filename :abs_builddir '/results/parted_copytest.csv'\n-copy (select * from parted_copytest order by a) to :'filename';\n+\\set command '\\\\copy (select * from parted_copytest order by a) to ' :'filename';\n+:command\n truncate parted_copytest;\n-copy parted_copytest from :'filename';\n+\\set command '\\\\copy parted_copytest from ' :'filename';\n+:command\n -- Ensure COPY FREEZE errors for partitioned tables.\n begin;\n truncate parted_copytest;\n-copy parted_copytest from :'filename' (freeze);\n+\\set command '\\\\copy parted_copytest from ' :'filename' (freeze);\n+:command\n ERROR:  cannot perform COPY FREEZE on a partitioned table\n rollback;\n select tableoid::regclass,count(*),sum(a) from parted_copytest\n@@ -94,7 +101,8 @@ create trigger part_ins_trig\n \tbefore insert on parted_copytest_a2\n \tfor each row\n \texecute procedure part_ins_func();\n-copy parted_copytest from :'filename';\n+\\set command '\\\\copy parted_copytest from ' :'filename';\n+:command\n select tableoid::regclass,count(*),sum(a) from parted_copytest\n group by tableoid order by tableoid::regclass::name;\n       tableoid      | count |  sum   \n@@ -106,7 +114,8 @@ group by tableoid order by tableoid::regclass::name;\n truncate table parted_copytest;\n create index on parted_copytest (b);\n drop trigger part_ins_trig on parted_copytest_a2;\n-copy parted_copytest from stdin;\n+\\set command '\\\\copy parted_copytest from ' stdin;\n+:command\n -- Ensure index entries were properly added during the copy.\n select * from parted_copytest where b = 1;\n  a | b |  c   \n@@ -170,9 +179,9 @@ INFO:  progress: {\"type\": \"PIPE\", \"command\": \"COPY FROM\", \"relname\": \"tab_progre\n -- Generate COPY FROM report with FILE, with some excluded tuples.\n truncate tab_progress_reporting;\n \\set filename :abs_srcdir '/data/emp.data'\n-copy tab_progress_reporting from :'filename'\n-\twhere (salary < 2000);\n-INFO:  progress: {\"type\": \"FILE\", \"command\": \"COPY FROM\", \"relname\": \"tab_progress_reporting\", \"has_bytes_total\": true, \"tuples_excluded\": 1, \"tuples_processed\": 2, \"has_bytes_processed\": true}\n+\\set command '\\\\copy tab_progress_reporting from ' :'filename' 'where (salary < 2000)';\n+:command\n+INFO:  progress: {\"type\": \"PIPE\", \"command\": \"COPY FROM\", \"relname\": \"tab_progress_reporting\", \"has_bytes_total\": false, \"tuples_excluded\": 1, \"tuples_processed\": 2, \"has_bytes_processed\": true}\n drop trigger check_after_tab_progress_reporting on tab_progress_reporting;\n drop function notice_after_tab_progress_reporting();\n drop table tab_progress_reporting;\n@@ -281,7 +290,8 @@ CREATE TABLE parted_si_p_odd PARTITION OF parted_si FOR VALUES IN (1);\n -- https://postgr.es/m/18130-7a86a7356a75209d%40postgresql.org\n -- https://postgr.es/m/257696.1695670946%40sss.pgh.pa.us\n \\set filename :abs_srcdir '/data/desc.data'\n-COPY parted_si(id, data) FROM :'filename';\n+\\set command '\\\\COPY parted_si(id, data) FROM ' :'filename';\n+:command\n -- An earlier bug (see commit b1ecb9b3fcf) could end up using a buffer from\n -- the wrong partition. This test is *not* guaranteed to trigger that bug, but\n -- does so when shared_buffers is small enough.  To test if we encountered the\ndiff --git a/src/test/regress/expected/copy2.out b/src/test/regress/expected/copy2.out\nindex 695b1b2d63..9c9addead6 100644\n--- a/src/test/regress/expected/copy2.out\n+++ b/src/test/regress/expected/copy2.out\n@@ -631,8 +631,8 @@ select * from check_con_tbl;\n (2 rows)\n \n -- test with RLS enabled.\n-CREATE ROLE regress_rls_copy_user;\n-CREATE ROLE regress_rls_copy_user_colperms;\n+CREATE ROLE regress_rls_copy_user PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_rls_copy_user_colperms PASSWORD NEON_PASSWORD_PLACEHOLDER;\n CREATE TABLE rls_t1 (a int, b int, c int);\n COPY rls_t1 (a, b, c) from stdin;\n CREATE POLICY p1 ON rls_t1 FOR SELECT USING (a % 2 = 0);\ndiff --git a/src/test/regress/expected/create_function_sql.out b/src/test/regress/expected/create_function_sql.out\nindex 50aca5940f..42527142f6 100644\n--- a/src/test/regress/expected/create_function_sql.out\n+++ b/src/test/regress/expected/create_function_sql.out\n@@ -4,7 +4,7 @@\n -- Assorted tests using SQL-language functions\n --\n -- All objects made in this test are in temp_func_test schema\n-CREATE USER regress_unpriv_user;\n+CREATE USER regress_unpriv_user PASSWORD NEON_PASSWORD_PLACEHOLDER;\n CREATE SCHEMA temp_func_test;\n GRANT ALL ON SCHEMA temp_func_test TO public;\n SET search_path TO temp_func_test, public;\ndiff --git a/src/test/regress/expected/create_index.out b/src/test/regress/expected/create_index.out\nindex cf6eac5734..3e56ea09d7 100644\n--- a/src/test/regress/expected/create_index.out\n+++ b/src/test/regress/expected/create_index.out\n@@ -51,7 +51,8 @@ CREATE TABLE fast_emp4000 (\n \thome_base\t box\n );\n \\set filename :abs_srcdir '/data/rect.data'\n-COPY slow_emp4000 FROM :'filename';\n+\\set command '\\\\copy slow_emp4000 FROM ' :'filename';\n+:command\n INSERT INTO fast_emp4000 SELECT * FROM slow_emp4000;\n ANALYZE slow_emp4000;\n ANALYZE fast_emp4000;\n@@ -655,7 +656,8 @@ CREATE TABLE array_index_op_test (\n \tt\t\t\ttext[]\n );\n \\set filename :abs_srcdir '/data/array.data'\n-COPY array_index_op_test FROM :'filename';\n+\\set command '\\\\copy array_index_op_test FROM ' :'filename';\n+:command\n ANALYZE array_index_op_test;\n SELECT * FROM array_index_op_test WHERE i = '{NULL}' ORDER BY seqno;\n  seqno |   i    |   t    \n@@ -2966,7 +2968,7 @@ END;\n -- concurrently\n REINDEX SCHEMA CONCURRENTLY schema_to_reindex;\n -- Failure for unauthorized user\n-CREATE ROLE regress_reindexuser NOLOGIN;\n+CREATE ROLE regress_reindexuser NOLOGIN PASSWORD NEON_PASSWORD_PLACEHOLDER;\n SET SESSION ROLE regress_reindexuser;\n REINDEX SCHEMA schema_to_reindex;\n ERROR:  must be owner of schema schema_to_reindex\ndiff --git a/src/test/regress/expected/create_procedure.out b/src/test/regress/expected/create_procedure.out\nindex 2177ba3509..ae3ca94d00 100644\n--- a/src/test/regress/expected/create_procedure.out\n+++ b/src/test/regress/expected/create_procedure.out\n@@ -421,7 +421,7 @@ ERROR:  cp_testfunc1(integer) is not a procedure\n DROP PROCEDURE nonexistent();\n ERROR:  procedure nonexistent() does not exist\n -- privileges\n-CREATE USER regress_cp_user1;\n+CREATE USER regress_cp_user1 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n GRANT INSERT ON cp_test TO regress_cp_user1;\n REVOKE EXECUTE ON PROCEDURE ptest1(text) FROM PUBLIC;\n SET ROLE regress_cp_user1;\ndiff --git a/src/test/regress/expected/create_role.out b/src/test/regress/expected/create_role.out\nindex 46d4f9efe9..fc2a28a2f6 100644\n--- a/src/test/regress/expected/create_role.out\n+++ b/src/test/regress/expected/create_role.out\n@@ -1,28 +1,28 @@\n -- ok, superuser can create users with any set of privileges\n-CREATE ROLE regress_role_super SUPERUSER;\n-CREATE ROLE regress_role_admin CREATEDB CREATEROLE REPLICATION BYPASSRLS;\n+CREATE ROLE regress_role_super SUPERUSER PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_role_admin CREATEDB CREATEROLE REPLICATION BYPASSRLS PASSWORD NEON_PASSWORD_PLACEHOLDER;\n GRANT CREATE ON DATABASE regression TO regress_role_admin WITH GRANT OPTION;\n-CREATE ROLE regress_role_limited_admin CREATEROLE;\n-CREATE ROLE regress_role_normal;\n+CREATE ROLE regress_role_limited_admin CREATEROLE PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_role_normal PASSWORD NEON_PASSWORD_PLACEHOLDER;\n -- fail, CREATEROLE user can't give away role attributes without having them\n SET SESSION AUTHORIZATION regress_role_limited_admin;\n-CREATE ROLE regress_nosuch_superuser SUPERUSER;\n+CREATE ROLE regress_nosuch_superuser SUPERUSER PASSWORD NEON_PASSWORD_PLACEHOLDER;\n ERROR:  permission denied to create role\n DETAIL:  Only roles with the SUPERUSER attribute may create roles with the SUPERUSER attribute.\n-CREATE ROLE regress_nosuch_replication_bypassrls REPLICATION BYPASSRLS;\n+CREATE ROLE regress_nosuch_replication_bypassrls REPLICATION BYPASSRLS PASSWORD NEON_PASSWORD_PLACEHOLDER;\n ERROR:  permission denied to create role\n DETAIL:  Only roles with the REPLICATION attribute may create roles with the REPLICATION attribute.\n-CREATE ROLE regress_nosuch_replication REPLICATION;\n+CREATE ROLE regress_nosuch_replication REPLICATION PASSWORD NEON_PASSWORD_PLACEHOLDER;\n ERROR:  permission denied to create role\n DETAIL:  Only roles with the REPLICATION attribute may create roles with the REPLICATION attribute.\n-CREATE ROLE regress_nosuch_bypassrls BYPASSRLS;\n+CREATE ROLE regress_nosuch_bypassrls BYPASSRLS PASSWORD NEON_PASSWORD_PLACEHOLDER;\n ERROR:  permission denied to create role\n DETAIL:  Only roles with the BYPASSRLS attribute may create roles with the BYPASSRLS attribute.\n-CREATE ROLE regress_nosuch_createdb CREATEDB;\n+CREATE ROLE regress_nosuch_createdb CREATEDB PASSWORD NEON_PASSWORD_PLACEHOLDER;\n ERROR:  permission denied to create role\n DETAIL:  Only roles with the CREATEDB attribute may create roles with the CREATEDB attribute.\n -- ok, can create a role without any special attributes\n-CREATE ROLE regress_role_limited;\n+CREATE ROLE regress_role_limited PASSWORD NEON_PASSWORD_PLACEHOLDER;\n -- fail, can't give it in any of the restricted attributes\n ALTER ROLE regress_role_limited SUPERUSER;\n ERROR:  permission denied to alter role\n@@ -39,10 +39,10 @@ DETAIL:  Only roles with the BYPASSRLS attribute may change the BYPASSRLS attrib\n DROP ROLE regress_role_limited;\n -- ok, can give away these role attributes if you have them\n SET SESSION AUTHORIZATION regress_role_admin;\n-CREATE ROLE regress_replication_bypassrls REPLICATION BYPASSRLS;\n-CREATE ROLE regress_replication REPLICATION;\n-CREATE ROLE regress_bypassrls BYPASSRLS;\n-CREATE ROLE regress_createdb CREATEDB;\n+CREATE ROLE regress_replication_bypassrls REPLICATION BYPASSRLS PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_replication REPLICATION PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_bypassrls BYPASSRLS PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_createdb CREATEDB PASSWORD NEON_PASSWORD_PLACEHOLDER;\n -- ok, can toggle these role attributes off and on if you have them\n ALTER ROLE regress_replication NOREPLICATION;\n ALTER ROLE regress_replication REPLICATION;\n@@ -58,48 +58,48 @@ ALTER ROLE regress_createdb NOSUPERUSER;\n ERROR:  permission denied to alter role\n DETAIL:  Only roles with the SUPERUSER attribute may change the SUPERUSER attribute.\n -- ok, having CREATEROLE is enough to create users with these privileges\n-CREATE ROLE regress_createrole CREATEROLE NOINHERIT;\n+CREATE ROLE regress_createrole CREATEROLE NOINHERIT PASSWORD NEON_PASSWORD_PLACEHOLDER;\n GRANT CREATE ON DATABASE regression TO regress_createrole WITH GRANT OPTION;\n-CREATE ROLE regress_login LOGIN;\n-CREATE ROLE regress_inherit INHERIT;\n-CREATE ROLE regress_connection_limit CONNECTION LIMIT 5;\n-CREATE ROLE regress_encrypted_password ENCRYPTED PASSWORD 'foo';\n-CREATE ROLE regress_password_null PASSWORD NULL;\n+CREATE ROLE regress_login LOGIN PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_inherit INHERIT PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_connection_limit CONNECTION LIMIT 5 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_encrypted_password ENCRYPTED PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_password_null PASSWORD NEON_PASSWORD_PLACEHOLDER;\n -- ok, backwards compatible noise words should be ignored\n-CREATE ROLE regress_noiseword SYSID 12345;\n+CREATE ROLE regress_noiseword SYSID 12345 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n NOTICE:  SYSID can no longer be specified\n -- fail, cannot grant membership in superuser role\n-CREATE ROLE regress_nosuch_super IN ROLE regress_role_super;\n+CREATE ROLE regress_nosuch_super IN ROLE regress_role_super PASSWORD NEON_PASSWORD_PLACEHOLDER;\n ERROR:  permission denied to grant role \"regress_role_super\"\n DETAIL:  Only roles with the SUPERUSER attribute may grant roles with the SUPERUSER attribute.\n -- fail, database owner cannot have members\n-CREATE ROLE regress_nosuch_dbowner IN ROLE pg_database_owner;\n+CREATE ROLE regress_nosuch_dbowner IN ROLE pg_database_owner PASSWORD NEON_PASSWORD_PLACEHOLDER;\n ERROR:  role \"pg_database_owner\" cannot have explicit members\n -- ok, can grant other users into a role\n CREATE ROLE regress_inroles ROLE\n \tregress_role_super, regress_createdb, regress_createrole, regress_login,\n-\tregress_inherit, regress_connection_limit, regress_encrypted_password, regress_password_null;\n+\tregress_inherit, regress_connection_limit, regress_encrypted_password, regress_password_null PASSWORD NEON_PASSWORD_PLACEHOLDER;\n -- fail, cannot grant a role into itself\n-CREATE ROLE regress_nosuch_recursive ROLE regress_nosuch_recursive;\n+CREATE ROLE regress_nosuch_recursive ROLE regress_nosuch_recursive PASSWORD NEON_PASSWORD_PLACEHOLDER;\n ERROR:  role \"regress_nosuch_recursive\" is a member of role \"regress_nosuch_recursive\"\n -- ok, can grant other users into a role with admin option\n CREATE ROLE regress_adminroles ADMIN\n \tregress_role_super, regress_createdb, regress_createrole, regress_login,\n-\tregress_inherit, regress_connection_limit, regress_encrypted_password, regress_password_null;\n+\tregress_inherit, regress_connection_limit, regress_encrypted_password, regress_password_null PASSWORD NEON_PASSWORD_PLACEHOLDER;\n -- fail, cannot grant a role into itself with admin option\n-CREATE ROLE regress_nosuch_admin_recursive ADMIN regress_nosuch_admin_recursive;\n+CREATE ROLE regress_nosuch_admin_recursive ADMIN regress_nosuch_admin_recursive PASSWORD NEON_PASSWORD_PLACEHOLDER;\n ERROR:  role \"regress_nosuch_admin_recursive\" is a member of role \"regress_nosuch_admin_recursive\"\n -- fail, regress_createrole does not have CREATEDB privilege\n SET SESSION AUTHORIZATION regress_createrole;\n CREATE DATABASE regress_nosuch_db;\n ERROR:  permission denied to create database\n -- ok, regress_createrole can create new roles\n-CREATE ROLE regress_plainrole;\n+CREATE ROLE regress_plainrole PASSWORD NEON_PASSWORD_PLACEHOLDER;\n -- ok, roles with CREATEROLE can create new roles with it\n-CREATE ROLE regress_rolecreator CREATEROLE;\n+CREATE ROLE regress_rolecreator CREATEROLE PASSWORD NEON_PASSWORD_PLACEHOLDER;\n -- ok, roles with CREATEROLE can create new roles with different role\n -- attributes, including CREATEROLE\n-CREATE ROLE regress_hasprivs CREATEROLE LOGIN INHERIT CONNECTION LIMIT 5;\n+CREATE ROLE regress_hasprivs CREATEROLE LOGIN INHERIT CONNECTION LIMIT 5 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n -- ok, we should be able to modify a role we created\n COMMENT ON ROLE regress_hasprivs IS 'some comment';\n ALTER ROLE regress_hasprivs RENAME TO regress_tenant;\n@@ -141,7 +141,7 @@ ERROR:  permission denied to reassign objects\n DETAIL:  Only roles with privileges of role \"regress_tenant\" may reassign objects owned by it.\n -- ok, create a role with a value for createrole_self_grant\n SET createrole_self_grant = 'set, inherit';\n-CREATE ROLE regress_tenant2;\n+CREATE ROLE regress_tenant2 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n GRANT CREATE ON DATABASE regression TO regress_tenant2;\n -- ok, regress_tenant2 can create objects within the database\n SET SESSION AUTHORIZATION regress_tenant2;\n@@ -165,34 +165,34 @@ ALTER TABLE tenant2_table OWNER TO regress_tenant2;\n ERROR:  must be able to SET ROLE \"regress_tenant2\"\n DROP TABLE tenant2_table;\n -- fail, CREATEROLE is not enough to create roles in privileged roles\n-CREATE ROLE regress_read_all_data IN ROLE pg_read_all_data;\n+CREATE ROLE regress_read_all_data PASSWORD NEON_PASSWORD_PLACEHOLDER IN ROLE pg_read_all_data;\n ERROR:  permission denied to grant role \"pg_read_all_data\"\n DETAIL:  Only roles with the ADMIN option on role \"pg_read_all_data\" may grant this role.\n-CREATE ROLE regress_write_all_data IN ROLE pg_write_all_data;\n+CREATE ROLE regress_write_all_data PASSWORD NEON_PASSWORD_PLACEHOLDER IN ROLE pg_write_all_data;\n ERROR:  permission denied to grant role \"pg_write_all_data\"\n DETAIL:  Only roles with the ADMIN option on role \"pg_write_all_data\" may grant this role.\n-CREATE ROLE regress_monitor IN ROLE pg_monitor;\n+CREATE ROLE regress_monitor PASSWORD NEON_PASSWORD_PLACEHOLDER IN ROLE pg_monitor;\n ERROR:  permission denied to grant role \"pg_monitor\"\n DETAIL:  Only roles with the ADMIN option on role \"pg_monitor\" may grant this role.\n-CREATE ROLE regress_read_all_settings IN ROLE pg_read_all_settings;\n+CREATE ROLE regress_read_all_settings PASSWORD NEON_PASSWORD_PLACEHOLDER IN ROLE pg_read_all_settings;\n ERROR:  permission denied to grant role \"pg_read_all_settings\"\n DETAIL:  Only roles with the ADMIN option on role \"pg_read_all_settings\" may grant this role.\n-CREATE ROLE regress_read_all_stats IN ROLE pg_read_all_stats;\n+CREATE ROLE regress_read_all_stats PASSWORD NEON_PASSWORD_PLACEHOLDER IN ROLE pg_read_all_stats;\n ERROR:  permission denied to grant role \"pg_read_all_stats\"\n DETAIL:  Only roles with the ADMIN option on role \"pg_read_all_stats\" may grant this role.\n-CREATE ROLE regress_stat_scan_tables IN ROLE pg_stat_scan_tables;\n+CREATE ROLE regress_stat_scan_tables PASSWORD NEON_PASSWORD_PLACEHOLDER IN ROLE pg_stat_scan_tables;\n ERROR:  permission denied to grant role \"pg_stat_scan_tables\"\n DETAIL:  Only roles with the ADMIN option on role \"pg_stat_scan_tables\" may grant this role.\n-CREATE ROLE regress_read_server_files IN ROLE pg_read_server_files;\n+CREATE ROLE regress_read_server_files PASSWORD NEON_PASSWORD_PLACEHOLDER IN ROLE pg_read_server_files;\n ERROR:  permission denied to grant role \"pg_read_server_files\"\n DETAIL:  Only roles with the ADMIN option on role \"pg_read_server_files\" may grant this role.\n-CREATE ROLE regress_write_server_files IN ROLE pg_write_server_files;\n+CREATE ROLE regress_write_server_files PASSWORD NEON_PASSWORD_PLACEHOLDER IN ROLE pg_write_server_files;\n ERROR:  permission denied to grant role \"pg_write_server_files\"\n DETAIL:  Only roles with the ADMIN option on role \"pg_write_server_files\" may grant this role.\n-CREATE ROLE regress_execute_server_program IN ROLE pg_execute_server_program;\n+CREATE ROLE regress_execute_server_program PASSWORD NEON_PASSWORD_PLACEHOLDER IN ROLE pg_execute_server_program;\n ERROR:  permission denied to grant role \"pg_execute_server_program\"\n DETAIL:  Only roles with the ADMIN option on role \"pg_execute_server_program\" may grant this role.\n-CREATE ROLE regress_signal_backend IN ROLE pg_signal_backend;\n+CREATE ROLE regress_signal_backend PASSWORD NEON_PASSWORD_PLACEHOLDER IN ROLE pg_signal_backend;\n ERROR:  permission denied to grant role \"pg_signal_backend\"\n DETAIL:  Only roles with the ADMIN option on role \"pg_signal_backend\" may grant this role.\n -- fail, role still owns database objects\ndiff --git a/src/test/regress/expected/create_schema.out b/src/test/regress/expected/create_schema.out\nindex 93302a07ef..1a73f083ac 100644\n--- a/src/test/regress/expected/create_schema.out\n+++ b/src/test/regress/expected/create_schema.out\n@@ -2,7 +2,7 @@\n -- CREATE_SCHEMA\n --\n -- Schema creation with elements.\n-CREATE ROLE regress_create_schema_role SUPERUSER;\n+CREATE ROLE regress_create_schema_role SUPERUSER PASSWORD NEON_PASSWORD_PLACEHOLDER;\n -- Cases where schema creation fails as objects are qualified with a schema\n -- that does not match with what's expected.\n -- This checks all the object types that include schema qualifications.\ndiff --git a/src/test/regress/expected/create_view.out b/src/test/regress/expected/create_view.out\nindex f551624afb..57f1e432d4 100644\n--- a/src/test/regress/expected/create_view.out\n+++ b/src/test/regress/expected/create_view.out\n@@ -18,7 +18,8 @@ CREATE TABLE real_city (\n \toutline \tpath\n );\n \\set filename :abs_srcdir '/data/real_city.data'\n-COPY real_city FROM :'filename';\n+\\set command '\\\\copy real_city FROM ' :'filename';\n+:command\n ANALYZE real_city;\n SELECT *\n    INTO TABLE ramp\ndiff --git a/src/test/regress/expected/database.out b/src/test/regress/expected/database.out\nindex 4cbdbdf84d..573362850e 100644\n--- a/src/test/regress/expected/database.out\n+++ b/src/test/regress/expected/database.out\n@@ -1,8 +1,6 @@\n CREATE DATABASE regression_tbd\n \tENCODING utf8 LC_COLLATE \"C\" LC_CTYPE \"C\" TEMPLATE template0;\n ALTER DATABASE regression_tbd RENAME TO regression_utf8;\n-ALTER DATABASE regression_utf8 SET TABLESPACE regress_tblspace;\n-ALTER DATABASE regression_utf8 RESET TABLESPACE;\n ALTER DATABASE regression_utf8 CONNECTION_LIMIT 123;\n -- Test PgDatabaseToastTable.  Doing this with GRANT would be slow.\n BEGIN;\ndiff --git a/src/test/regress/expected/dependency.out b/src/test/regress/expected/dependency.out\nindex 74d9ff2998..fad0151614 100644\n--- a/src/test/regress/expected/dependency.out\n+++ b/src/test/regress/expected/dependency.out\n@@ -1,10 +1,10 @@\n --\n -- DEPENDENCIES\n --\n-CREATE USER regress_dep_user;\n-CREATE USER regress_dep_user2;\n-CREATE USER regress_dep_user3;\n-CREATE GROUP regress_dep_group;\n+CREATE USER regress_dep_user PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_dep_user2 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_dep_user3 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE GROUP regress_dep_group PASSWORD NEON_PASSWORD_PLACEHOLDER;\n CREATE TABLE deptest (f1 serial primary key, f2 text);\n GRANT SELECT ON TABLE deptest TO GROUP regress_dep_group;\n GRANT ALL ON TABLE deptest TO regress_dep_user, regress_dep_user2;\n@@ -41,9 +41,9 @@ ERROR:  role \"regress_dep_user3\" cannot be dropped because some objects depend o\n DROP TABLE deptest;\n DROP USER regress_dep_user3;\n -- Test DROP OWNED\n-CREATE USER regress_dep_user0;\n-CREATE USER regress_dep_user1;\n-CREATE USER regress_dep_user2;\n+CREATE USER regress_dep_user0 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_dep_user1 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_dep_user2 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n SET SESSION AUTHORIZATION regress_dep_user0;\n -- permission denied\n DROP OWNED BY regress_dep_user1;\ndiff --git a/src/test/regress/expected/drop_if_exists.out b/src/test/regress/expected/drop_if_exists.out\nindex 5e44c2c3ce..eb3bb329fb 100644\n--- a/src/test/regress/expected/drop_if_exists.out\n+++ b/src/test/regress/expected/drop_if_exists.out\n@@ -64,9 +64,9 @@ ERROR:  type \"test_domain_exists\" does not exist\n ---\n --- role/user/group\n ---\n-CREATE USER regress_test_u1;\n-CREATE ROLE regress_test_r1;\n-CREATE GROUP regress_test_g1;\n+CREATE USER regress_test_u1 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_test_r1 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE GROUP regress_test_g1 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n DROP USER regress_test_u2;\n ERROR:  role \"regress_test_u2\" does not exist\n DROP USER IF EXISTS regress_test_u1, regress_test_u2;\ndiff --git a/src/test/regress/expected/equivclass.out b/src/test/regress/expected/equivclass.out\nindex 126f7047fe..0e2cc73426 100644\n--- a/src/test/regress/expected/equivclass.out\n+++ b/src/test/regress/expected/equivclass.out\n@@ -384,7 +384,7 @@ set enable_nestloop = on;\n set enable_mergejoin = off;\n alter table ec1 enable row level security;\n create policy p1 on ec1 using (f1 < '5'::int8alias1);\n-create user regress_user_ectest;\n+create user regress_user_ectest PASSWORD NEON_PASSWORD_PLACEHOLDER;\n grant select on ec0 to regress_user_ectest;\n grant select on ec1 to regress_user_ectest;\n -- without any RLS, we'll treat {a.ff, b.ff, 43} as an EquivalenceClass\ndiff --git a/src/test/regress/expected/event_trigger.out b/src/test/regress/expected/event_trigger.out\nindex 7b2198eac6..39919697ad 100644\n--- a/src/test/regress/expected/event_trigger.out\n+++ b/src/test/regress/expected/event_trigger.out\n@@ -85,7 +85,7 @@ create event trigger regress_event_trigger2 on ddl_command_start\n -- OK\n comment on event trigger regress_event_trigger is 'test comment';\n -- drop as non-superuser should fail\n-create role regress_evt_user;\n+create role regress_evt_user PASSWORD NEON_PASSWORD_PLACEHOLDER;\n set role regress_evt_user;\n create event trigger regress_event_trigger_noperms on ddl_command_start\n    execute procedure test_event_trigger();\ndiff --git a/src/test/regress/expected/foreign_data.out b/src/test/regress/expected/foreign_data.out\nindex 6ed50fdcfa..caa00a345d 100644\n--- a/src/test/regress/expected/foreign_data.out\n+++ b/src/test/regress/expected/foreign_data.out\n@@ -14,13 +14,13 @@ CREATE FUNCTION test_fdw_handler()\n SET client_min_messages TO 'warning';\n DROP ROLE IF EXISTS regress_foreign_data_user, regress_test_role, regress_test_role2, regress_test_role_super, regress_test_indirect, regress_unprivileged_role;\n RESET client_min_messages;\n-CREATE ROLE regress_foreign_data_user LOGIN SUPERUSER;\n+CREATE ROLE regress_foreign_data_user LOGIN SUPERUSER PASSWORD NEON_PASSWORD_PLACEHOLDER;\n SET SESSION AUTHORIZATION 'regress_foreign_data_user';\n-CREATE ROLE regress_test_role;\n-CREATE ROLE regress_test_role2;\n-CREATE ROLE regress_test_role_super SUPERUSER;\n-CREATE ROLE regress_test_indirect;\n-CREATE ROLE regress_unprivileged_role;\n+CREATE ROLE regress_test_role PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_test_role2 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_test_role_super SUPERUSER PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_test_indirect PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_unprivileged_role PASSWORD NEON_PASSWORD_PLACEHOLDER;\n CREATE FOREIGN DATA WRAPPER dummy;\n COMMENT ON FOREIGN DATA WRAPPER dummy IS 'useless';\n CREATE FOREIGN DATA WRAPPER postgresql VALIDATOR postgresql_fdw_validator;\ndiff --git a/src/test/regress/expected/foreign_key.out b/src/test/regress/expected/foreign_key.out\nindex fe6a1015f2..614b387b7d 100644\n--- a/src/test/regress/expected/foreign_key.out\n+++ b/src/test/regress/expected/foreign_key.out\n@@ -1985,7 +1985,7 @@ ALTER TABLE fk_partitioned_fk_6 ATTACH PARTITION fk_partitioned_pk_6 FOR VALUES\n ERROR:  cannot ALTER TABLE \"fk_partitioned_pk_61\" because it is being used by active queries in this session\n DROP TABLE fk_partitioned_pk_6, fk_partitioned_fk_6;\n -- test the case when the referenced table is owned by a different user\n-create role regress_other_partitioned_fk_owner;\n+create role regress_other_partitioned_fk_owner PASSWORD NEON_PASSWORD_PLACEHOLDER;\n grant references on fk_notpartitioned_pk to regress_other_partitioned_fk_owner;\n set role regress_other_partitioned_fk_owner;\n create table other_partitioned_fk(a int, b int) partition by list (a);\ndiff --git a/src/test/regress/expected/generated.out b/src/test/regress/expected/generated.out\nindex 499072e14c..bd7a8b3f18 100644\n--- a/src/test/regress/expected/generated.out\n+++ b/src/test/regress/expected/generated.out\n@@ -534,7 +534,7 @@ CREATE TABLE gtest10a (a int PRIMARY KEY, b int GENERATED ALWAYS AS (a * 2) STOR\n ALTER TABLE gtest10a DROP COLUMN b;\n INSERT INTO gtest10a (a) VALUES (1);\n -- privileges\n-CREATE USER regress_user11;\n+CREATE USER regress_user11 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n CREATE TABLE gtest11s (a int PRIMARY KEY, b int, c int GENERATED ALWAYS AS (b * 2) STORED);\n INSERT INTO gtest11s VALUES (1, 10), (2, 20);\n GRANT SELECT (a, c) ON gtest11s TO regress_user11;\ndiff --git a/src/test/regress/expected/guc.out b/src/test/regress/expected/guc.out\nindex 455b6d6c0c..12fa350c6d 100644\n--- a/src/test/regress/expected/guc.out\n+++ b/src/test/regress/expected/guc.out\n@@ -584,7 +584,7 @@ PREPARE foo AS SELECT 1;\n LISTEN foo_event;\n SET vacuum_cost_delay = 13;\n CREATE TEMP TABLE tmp_foo (data text) ON COMMIT DELETE ROWS;\n-CREATE ROLE regress_guc_user;\n+CREATE ROLE regress_guc_user PASSWORD NEON_PASSWORD_PLACEHOLDER;\n SET SESSION AUTHORIZATION regress_guc_user;\n -- look changes\n SELECT pg_listening_channels();\ndiff --git a/src/test/regress/expected/hash_index.out b/src/test/regress/expected/hash_index.out\nindex 0d4bdb2ade..9a5a9b5407 100644\n--- a/src/test/regress/expected/hash_index.out\n+++ b/src/test/regress/expected/hash_index.out\n@@ -20,10 +20,14 @@ CREATE TABLE hash_f8_heap (\n \trandom \t\tfloat8\n );\n \\set filename :abs_srcdir '/data/hash.data'\n-COPY hash_i4_heap FROM :'filename';\n-COPY hash_name_heap FROM :'filename';\n-COPY hash_txt_heap FROM :'filename';\n-COPY hash_f8_heap FROM :'filename';\n+\\set command '\\\\copy hash_i4_heap FROM ' :'filename';\n+:command\n+\\set command '\\\\copy hash_name_heap FROM ' :'filename';\n+:command\n+\\set command '\\\\copy hash_txt_heap FROM ' :'filename';\n+:command\n+\\set command '\\\\copy hash_f8_heap FROM ' :'filename';\n+:command\n -- the data in this file has a lot of duplicates in the index key\n -- fields, leading to long bucket chains and lots of table expansion.\n -- this is therefore a stress test of the bucket overflow code (unlike\ndiff --git a/src/test/regress/expected/identity.out b/src/test/regress/expected/identity.out\nindex f14bfccfb1..bbb2092df9 100644\n--- a/src/test/regress/expected/identity.out\n+++ b/src/test/regress/expected/identity.out\n@@ -520,7 +520,7 @@ ALTER TABLE itest7 ALTER COLUMN a SET GENERATED BY DEFAULT;\n ALTER TABLE itest7 ALTER COLUMN a RESTART;\n ALTER TABLE itest7 ALTER COLUMN a DROP IDENTITY;\n -- privileges\n-CREATE USER regress_identity_user1;\n+CREATE USER regress_identity_user1 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n CREATE TABLE itest8 (a int GENERATED ALWAYS AS IDENTITY, b text);\n GRANT SELECT, INSERT ON itest8 TO regress_identity_user1;\n SET ROLE regress_identity_user1;\ndiff --git a/src/test/regress/expected/inherit.out b/src/test/regress/expected/inherit.out\nindex 85240a9b0b..5294f7557d 100644\n--- a/src/test/regress/expected/inherit.out\n+++ b/src/test/regress/expected/inherit.out\n@@ -2055,8 +2055,8 @@ NOTICE:  drop cascades to table cnullchild\n --\n -- Mixed ownership inheritance tree\n --\n-create role regress_alice;\n-create role regress_bob;\n+create role regress_alice password NEON_PASSWORD_PLACEHOLDER;\n+create role regress_bob password NEON_PASSWORD_PLACEHOLDER;\n grant all on schema public to regress_alice, regress_bob;\n grant regress_alice to regress_bob;\n set session authorization regress_alice;\n@@ -2789,7 +2789,7 @@ create index on permtest_parent (left(c, 3));\n insert into permtest_parent\n   select 1, 'a', left(fipshash(i::text), 5) from generate_series(0, 100) i;\n analyze permtest_parent;\n-create role regress_no_child_access;\n+create role regress_no_child_access PASSWORD NEON_PASSWORD_PLACEHOLDER;\n revoke all on permtest_grandchild from regress_no_child_access;\n grant select on permtest_parent to regress_no_child_access;\n set session authorization regress_no_child_access;\ndiff --git a/src/test/regress/expected/insert.out b/src/test/regress/expected/insert.out\nindex cf4b5221a8..fa6ccb639c 100644\n--- a/src/test/regress/expected/insert.out\n+++ b/src/test/regress/expected/insert.out\n@@ -802,7 +802,7 @@ drop table mlparted5;\n -- appropriate key description (or none) in various situations\n create table key_desc (a int, b int) partition by list ((a+0));\n create table key_desc_1 partition of key_desc for values in (1) partition by range (b);\n-create user regress_insert_other_user;\n+create user regress_insert_other_user PASSWORD NEON_PASSWORD_PLACEHOLDER;\n grant select (a) on key_desc_1 to regress_insert_other_user;\n grant insert on key_desc to regress_insert_other_user;\n set role regress_insert_other_user;\n@@ -914,7 +914,7 @@ DETAIL:  Failing row contains (2, hi there).\n -- check that the message shows the appropriate column description in a\n -- situation where the partitioned table is not the primary ModifyTable node\n create table inserttest3 (f1 text default 'foo', f2 text default 'bar', f3 int);\n-create role regress_coldesc_role;\n+create role regress_coldesc_role PASSWORD NEON_PASSWORD_PLACEHOLDER;\n grant insert on inserttest3 to regress_coldesc_role;\n grant insert on brtrigpartcon to regress_coldesc_role;\n revoke select on brtrigpartcon from regress_coldesc_role;\ndiff --git a/src/test/regress/expected/jsonb.out b/src/test/regress/expected/jsonb.out\nindex e66d760189..86348fd416 100644\n--- a/src/test/regress/expected/jsonb.out\n+++ b/src/test/regress/expected/jsonb.out\n@@ -4,7 +4,8 @@ CREATE TABLE testjsonb (\n        j jsonb\n );\n \\set filename :abs_srcdir '/data/jsonb.data'\n-COPY testjsonb FROM :'filename';\n+\\set command '\\\\copy testjsonb FROM ' :'filename';\n+:command\n -- Strings.\n SELECT '\"\"'::jsonb;\t\t\t\t-- OK.\n  jsonb \ndiff --git a/src/test/regress/expected/largeobject.out b/src/test/regress/expected/largeobject.out\nindex 4921dd79ae..d18a3cdd66 100644\n--- a/src/test/regress/expected/largeobject.out\n+++ b/src/test/regress/expected/largeobject.out\n@@ -7,7 +7,7 @@\n -- ensure consistent test output regardless of the default bytea format\n SET bytea_output TO escape;\n -- Test ALTER LARGE OBJECT OWNER\n-CREATE ROLE regress_lo_user;\n+CREATE ROLE regress_lo_user PASSWORD NEON_PASSWORD_PLACEHOLDER;\n SELECT lo_create(42);\n  lo_create \n -----------\n@@ -346,7 +346,8 @@ SELECT lo_unlink(loid) from lotest_stash_values;\n \n TRUNCATE lotest_stash_values;\n \\set filename :abs_srcdir '/data/tenk.data'\n-INSERT INTO lotest_stash_values (loid) SELECT lo_import(:'filename');\n+\\lo_import :filename\n+INSERT INTO lotest_stash_values (loid) VALUES (:LASTOID);\n BEGIN;\n UPDATE lotest_stash_values SET fd=lo_open(loid, CAST(x'20000' | x'40000' AS integer));\n -- verify length of large object\n@@ -410,12 +411,8 @@ SELECT lo_close(fd) FROM lotest_stash_values;\n \n END;\n \\set filename :abs_builddir '/results/lotest.txt'\n-SELECT lo_export(loid, :'filename') FROM lotest_stash_values;\n- lo_export \n------------\n-         1\n-(1 row)\n-\n+SELECT loid FROM lotest_stash_values \\gset\n+\\lo_export :loid, :filename\n \\lo_import :filename\n \\set newloid :LASTOID\n -- just make sure \\lo_export does not barf\ndiff --git a/src/test/regress/expected/lock.out b/src/test/regress/expected/lock.out\nindex ad137d3645..8dac447436 100644\n--- a/src/test/regress/expected/lock.out\n+++ b/src/test/regress/expected/lock.out\n@@ -16,7 +16,7 @@ CREATE VIEW lock_view3 AS SELECT * from lock_view2;\n CREATE VIEW lock_view4 AS SELECT (select a from lock_tbl1a limit 1) from lock_tbl1;\n CREATE VIEW lock_view5 AS SELECT * from lock_tbl1 where a in (select * from lock_tbl1a);\n CREATE VIEW lock_view6 AS SELECT * from (select * from lock_tbl1) sub;\n-CREATE ROLE regress_rol_lock1;\n+CREATE ROLE regress_rol_lock1 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n ALTER ROLE regress_rol_lock1 SET search_path = lock_schema1;\n GRANT USAGE ON SCHEMA lock_schema1 TO regress_rol_lock1;\n -- Try all valid lock options; also try omitting the optional TABLE keyword.\ndiff --git a/src/test/regress/expected/matview.out b/src/test/regress/expected/matview.out\nindex 038ab73517..bd471f9fac 100644\n--- a/src/test/regress/expected/matview.out\n+++ b/src/test/regress/expected/matview.out\n@@ -549,7 +549,7 @@ SELECT * FROM mvtest_mv_v;\n DROP TABLE mvtest_v CASCADE;\n NOTICE:  drop cascades to materialized view mvtest_mv_v\n -- make sure running as superuser works when MV owned by another role (bug #11208)\n-CREATE ROLE regress_user_mvtest;\n+CREATE ROLE regress_user_mvtest PASSWORD NEON_PASSWORD_PLACEHOLDER;\n SET ROLE regress_user_mvtest;\n -- this test case also checks for ambiguity in the queries issued by\n -- refresh_by_match_merge(), by choosing column names that intentionally\n@@ -617,7 +617,7 @@ HINT:  Use the REFRESH MATERIALIZED VIEW command.\n ROLLBACK;\n -- INSERT privileges if relation owner is not allowed to insert.\n CREATE SCHEMA matview_schema;\n-CREATE USER regress_matview_user;\n+CREATE USER regress_matview_user PASSWORD NEON_PASSWORD_PLACEHOLDER;\n ALTER DEFAULT PRIVILEGES FOR ROLE regress_matview_user\n   REVOKE INSERT ON TABLES FROM regress_matview_user;\n GRANT ALL ON SCHEMA matview_schema TO public;\ndiff --git a/src/test/regress/expected/merge.out b/src/test/regress/expected/merge.out\nindex 521d70a891..7fd218f3d8 100644\n--- a/src/test/regress/expected/merge.out\n+++ b/src/test/regress/expected/merge.out\n@@ -1,9 +1,9 @@\n --\n -- MERGE\n --\n-CREATE USER regress_merge_privs;\n-CREATE USER regress_merge_no_privs;\n-CREATE USER regress_merge_none;\n+CREATE USER regress_merge_privs PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_merge_no_privs PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_merge_none PASSWORD NEON_PASSWORD_PLACEHOLDER;\n DROP TABLE IF EXISTS target;\n NOTICE:  table \"target\" does not exist, skipping\n DROP TABLE IF EXISTS source;\ndiff --git a/src/test/regress/expected/misc.out b/src/test/regress/expected/misc.out\nindex 6e816c57f1..6ef45b468e 100644\n--- a/src/test/regress/expected/misc.out\n+++ b/src/test/regress/expected/misc.out\n@@ -59,9 +59,11 @@ DROP TABLE tmp;\n -- copy\n --\n \\set filename :abs_builddir '/results/onek.data'\n-COPY onek TO :'filename';\n+\\set command '\\\\copy onek TO ' :'filename';\n+:command\n CREATE TEMP TABLE onek_copy (LIKE onek);\n-COPY onek_copy FROM :'filename';\n+\\set command '\\\\copy onek_copy FROM ' :'filename';\n+:command\n SELECT * FROM onek EXCEPT ALL SELECT * FROM onek_copy;\n  unique1 | unique2 | two | four | ten | twenty | hundred | thousand | twothousand | fivethous | tenthous | odd | even | stringu1 | stringu2 | string4 \n ---------+---------+-----+------+-----+--------+---------+----------+-------------+-----------+----------+-----+------+----------+----------+---------\n@@ -73,9 +75,11 @@ SELECT * FROM onek_copy EXCEPT ALL SELECT * FROM onek;\n (0 rows)\n \n \\set filename :abs_builddir '/results/stud_emp.data'\n-COPY BINARY stud_emp TO :'filename';\n+\\set command '\\\\COPY BINARY stud_emp TO ' :'filename';\n+:command\n CREATE TEMP TABLE stud_emp_copy (LIKE stud_emp);\n-COPY BINARY stud_emp_copy FROM :'filename';\n+\\set command '\\\\COPY BINARY stud_emp_copy FROM ' :'filename';\n+:command\n SELECT * FROM stud_emp_copy;\n  name  | age |  location  | salary | manager | gpa | percent \n -------+-----+------------+--------+---------+-----+---------\ndiff --git a/src/test/regress/expected/misc_functions.out b/src/test/regress/expected/misc_functions.out\nindex d94056862a..f8270d8343 100644\n--- a/src/test/regress/expected/misc_functions.out\n+++ b/src/test/regress/expected/misc_functions.out\n@@ -297,7 +297,7 @@ SELECT pg_log_backend_memory_contexts(pid) FROM pg_stat_activity\n  t\n (1 row)\n \n-CREATE ROLE regress_log_memory;\n+CREATE ROLE regress_log_memory PASSWORD NEON_PASSWORD_PLACEHOLDER;\n SELECT has_function_privilege('regress_log_memory',\n   'pg_log_backend_memory_contexts(integer)', 'EXECUTE'); -- no\n  has_function_privilege \n@@ -483,7 +483,7 @@ select count(*) > 0 from\n --\n -- Test replication slot directory functions\n --\n-CREATE ROLE regress_slot_dir_funcs;\n+CREATE ROLE regress_slot_dir_funcs PASSWORD NEON_PASSWORD_PLACEHOLDER;\n -- Not available by default.\n SELECT has_function_privilege('regress_slot_dir_funcs',\n   'pg_ls_logicalsnapdir()', 'EXECUTE');\n@@ -671,7 +671,7 @@ FROM pg_walfile_name_offset('0/0'::pg_lsn + :segment_size - 1),\n (1 row)\n \n -- pg_current_logfile\n-CREATE ROLE regress_current_logfile;\n+CREATE ROLE regress_current_logfile PASSWORD NEON_PASSWORD_PLACEHOLDER;\n -- not available by default\n SELECT has_function_privilege('regress_current_logfile',\n   'pg_current_logfile()', 'EXECUTE');\ndiff --git a/src/test/regress/expected/multirangetypes.out b/src/test/regress/expected/multirangetypes.out\nindex c6363ebeb2..8f43732404 100644\n--- a/src/test/regress/expected/multirangetypes.out\n+++ b/src/test/regress/expected/multirangetypes.out\n@@ -3118,7 +3118,7 @@ drop type textrange2;\n -- Multiranges don't have their own ownership or permissions.\n --\n create type textrange1 as range(subtype=text, multirange_type_name=multitextrange1, collation=\"C\");\n-create role regress_multirange_owner;\n+create role regress_multirange_owner password NEON_PASSWORD_PLACEHOLDER;\n alter type multitextrange1 owner to regress_multirange_owner;  -- fail\n ERROR:  cannot alter multirange type multitextrange1\n HINT:  You can alter type textrange1, which will alter the multirange type as well.\ndiff --git a/src/test/regress/expected/object_address.out b/src/test/regress/expected/object_address.out\nindex fc42d418bf..e38f517574 100644\n--- a/src/test/regress/expected/object_address.out\n+++ b/src/test/regress/expected/object_address.out\n@@ -5,7 +5,7 @@\n SET client_min_messages TO 'warning';\n DROP ROLE IF EXISTS regress_addr_user;\n RESET client_min_messages;\n-CREATE USER regress_addr_user;\n+CREATE USER regress_addr_user PASSWORD NEON_PASSWORD_PLACEHOLDER;\n -- Test generic object addressing/identification functions\n CREATE SCHEMA addr_nsp;\n SET search_path TO 'addr_nsp';\ndiff --git a/src/test/regress/expected/password.out b/src/test/regress/expected/password.out\nindex 924d6e001d..7fdda73439 100644\n--- a/src/test/regress/expected/password.out\n+++ b/src/test/regress/expected/password.out\n@@ -12,13 +12,11 @@ SET password_encryption = 'md5'; -- ok\n SET password_encryption = 'scram-sha-256'; -- ok\n -- consistency of password entries\n SET password_encryption = 'md5';\n-CREATE ROLE regress_passwd1;\n-ALTER ROLE regress_passwd1 PASSWORD 'role_pwd1';\n-CREATE ROLE regress_passwd2;\n-ALTER ROLE regress_passwd2 PASSWORD 'role_pwd2';\n+CREATE ROLE regress_passwd1 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_passwd2 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n SET password_encryption = 'scram-sha-256';\n-CREATE ROLE regress_passwd3 PASSWORD 'role_pwd3';\n-CREATE ROLE regress_passwd4 PASSWORD NULL;\n+CREATE ROLE regress_passwd3 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_passwd4 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n -- check list of created entries\n --\n -- The scram secret will look something like:\n@@ -32,10 +30,10 @@ SELECT rolname, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+\n     ORDER BY rolname, rolpassword;\n      rolname     |                rolpassword_masked                 \n -----------------+---------------------------------------------------\n- regress_passwd1 | md5783277baca28003b33453252be4dbb34\n- regress_passwd2 | md54044304ba511dd062133eb5b4b84a2a3\n+ regress_passwd1 | NEON_MD5_PLACEHOLDER:regress_passwd1\n+ regress_passwd2 | NEON_MD5_PLACEHOLDER:regress_passwd2\n  regress_passwd3 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey>\n- regress_passwd4 | \n+ regress_passwd4 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey>\n (4 rows)\n \n -- Rename a role\n@@ -56,24 +54,17 @@ ALTER ROLE regress_passwd2_new RENAME TO regress_passwd2;\n -- passwords.\n SET password_encryption = 'md5';\n -- encrypt with MD5\n-ALTER ROLE regress_passwd2 PASSWORD 'foo';\n--- already encrypted, use as they are\n-ALTER ROLE regress_passwd1 PASSWORD 'md5cd3578025fe2c3d7ed1b9a9b26238b70';\n-ALTER ROLE regress_passwd3 PASSWORD 'SCRAM-SHA-256$4096:VLK4RMaQLCvNtQ==$6YtlR4t69SguDiwFvbVgVZtuz6gpJQQqUMZ7IQJK5yI=:ps75jrHeYU4lXCcXI4O8oIdJ3eO8o2jirjruw9phBTo=';\n+ALTER ROLE regress_passwd2 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n SET password_encryption = 'scram-sha-256';\n -- create SCRAM secret\n-ALTER ROLE  regress_passwd4 PASSWORD 'foo';\n+ALTER ROLE  regress_passwd4 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n -- already encrypted with MD5, use as it is\n-CREATE ROLE regress_passwd5 PASSWORD 'md5e73a4b11df52a6068f8b39f90be36023';\n--- This looks like a valid SCRAM-SHA-256 secret, but it is not\n--- so it should be hashed with SCRAM-SHA-256.\n-CREATE ROLE regress_passwd6 PASSWORD 'SCRAM-SHA-256$1234';\n--- These may look like valid MD5 secrets, but they are not, so they\n--- should be hashed with SCRAM-SHA-256.\n--- trailing garbage at the end\n-CREATE ROLE regress_passwd7 PASSWORD 'md5012345678901234567890123456789zz';\n--- invalid length\n-CREATE ROLE regress_passwd8 PASSWORD 'md501234567890123456789012345678901zz';\n+-- Neon does not support encrypted passwords, use unencrypted instead\n+CREATE ROLE regress_passwd5 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+-- Neon does not support encrypted passwords, use unencrypted instead\n+CREATE ROLE regress_passwd6 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_passwd7 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_passwd8 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n -- Changing the SCRAM iteration count\n SET scram_iterations = 1024;\n CREATE ROLE regress_passwd9 PASSWORD 'alterediterationcount';\n@@ -83,11 +74,11 @@ SELECT rolname, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+\n     ORDER BY rolname, rolpassword;\n      rolname     |                rolpassword_masked                 \n -----------------+---------------------------------------------------\n- regress_passwd1 | md5cd3578025fe2c3d7ed1b9a9b26238b70\n- regress_passwd2 | md5dfa155cadd5f4ad57860162f3fab9cdb\n+ regress_passwd1 | NEON_MD5_PLACEHOLDER:regress_passwd1\n+ regress_passwd2 | NEON_MD5_PLACEHOLDER:regress_passwd2\n  regress_passwd3 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey>\n  regress_passwd4 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey>\n- regress_passwd5 | md5e73a4b11df52a6068f8b39f90be36023\n+ regress_passwd5 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey>\n  regress_passwd6 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey>\n  regress_passwd7 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey>\n  regress_passwd8 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey>\n@@ -97,23 +88,20 @@ SELECT rolname, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+\n -- An empty password is not allowed, in any form\n CREATE ROLE regress_passwd_empty PASSWORD '';\n NOTICE:  empty string is not a valid password, clearing password\n+ERROR:  Failed to get encrypted password: User \"regress_passwd_empty\" has no password assigned.\n ALTER ROLE regress_passwd_empty PASSWORD 'md585939a5ce845f1a1b620742e3c659e0a';\n-NOTICE:  empty string is not a valid password, clearing password\n+ERROR:  role \"regress_passwd_empty\" does not exist\n ALTER ROLE regress_passwd_empty PASSWORD 'SCRAM-SHA-256$4096:hpFyHTUsSWcR7O9P$LgZFIt6Oqdo27ZFKbZ2nV+vtnYM995pDh9ca6WSi120=:qVV5NeluNfUPkwm7Vqat25RjSPLkGeoZBQs6wVv+um4=';\n-NOTICE:  empty string is not a valid password, clearing password\n+ERROR:  role \"regress_passwd_empty\" does not exist\n SELECT rolpassword FROM pg_authid WHERE rolname='regress_passwd_empty';\n  rolpassword \n -------------\n- \n-(1 row)\n+(0 rows)\n \n--- Test with invalid stored and server keys.\n---\n--- The first is valid, to act as a control. The others have too long\n--- stored/server keys. They will be re-hashed.\n-CREATE ROLE regress_passwd_sha_len0 PASSWORD 'SCRAM-SHA-256$4096:A6xHKoH/494E941doaPOYg==$Ky+A30sewHIH3VHQLRN9vYsuzlgNyGNKCh37dy96Rqw=:COPdlNiIkrsacU5QoxydEuOH6e/KfiipeETb/bPw8ZI=';\n-CREATE ROLE regress_passwd_sha_len1 PASSWORD 'SCRAM-SHA-256$4096:A6xHKoH/494E941doaPOYg==$Ky+A30sewHIH3VHQLRN9vYsuzlgNyGNKCh37dy96RqwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=:COPdlNiIkrsacU5QoxydEuOH6e/KfiipeETb/bPw8ZI=';\n-CREATE ROLE regress_passwd_sha_len2 PASSWORD 'SCRAM-SHA-256$4096:A6xHKoH/494E941doaPOYg==$Ky+A30sewHIH3VHQLRN9vYsuzlgNyGNKCh37dy96Rqw=:COPdlNiIkrsacU5QoxydEuOH6e/KfiipeETb/bPw8ZIAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=';\n+-- Neon does not support encrypted passwords, use unencrypted instead\n+CREATE ROLE regress_passwd_sha_len0 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_passwd_sha_len1 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_passwd_sha_len2 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n -- Check that the invalid secrets were re-hashed. A re-hashed secret\n -- should not contain the original salt.\n SELECT rolname, rolpassword not like '%A6xHKoH/494E941doaPOYg==%' as is_rolpassword_rehashed\n@@ -122,7 +110,7 @@ SELECT rolname, rolpassword not like '%A6xHKoH/494E941doaPOYg==%' as is_rolpassw\n     ORDER BY rolname;\n          rolname         | is_rolpassword_rehashed \n -------------------------+-------------------------\n- regress_passwd_sha_len0 | f\n+ regress_passwd_sha_len0 | t\n  regress_passwd_sha_len1 | t\n  regress_passwd_sha_len2 | t\n (3 rows)\n@@ -137,6 +125,7 @@ DROP ROLE regress_passwd7;\n DROP ROLE regress_passwd8;\n DROP ROLE regress_passwd9;\n DROP ROLE regress_passwd_empty;\n+ERROR:  role \"regress_passwd_empty\" does not exist\n DROP ROLE regress_passwd_sha_len0;\n DROP ROLE regress_passwd_sha_len1;\n DROP ROLE regress_passwd_sha_len2;\ndiff --git a/src/test/regress/expected/privileges.out b/src/test/regress/expected/privileges.out\nindex e8c668e0a1..03be5c2120 100644\n--- a/src/test/regress/expected/privileges.out\n+++ b/src/test/regress/expected/privileges.out\n@@ -20,19 +20,19 @@ SELECT lo_unlink(oid) FROM pg_largeobject_metadata WHERE oid >= 1000 AND oid < 3\n \n RESET client_min_messages;\n -- test proper begins here\n-CREATE USER regress_priv_user1;\n-CREATE USER regress_priv_user2;\n-CREATE USER regress_priv_user3;\n-CREATE USER regress_priv_user4;\n-CREATE USER regress_priv_user5;\n-CREATE USER regress_priv_user5;\t-- duplicate\n+CREATE USER regress_priv_user1 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_priv_user2 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_priv_user3 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_priv_user4 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_priv_user5 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_priv_user5 PASSWORD NEON_PASSWORD_PLACEHOLDER;\t-- duplicate\n ERROR:  role \"regress_priv_user5\" already exists\n-CREATE USER regress_priv_user6;\n-CREATE USER regress_priv_user7;\n-CREATE USER regress_priv_user8;\n-CREATE USER regress_priv_user9;\n-CREATE USER regress_priv_user10;\n-CREATE ROLE regress_priv_role;\n+CREATE USER regress_priv_user6 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_priv_user7 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_priv_user8 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_priv_user9 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_priv_user10 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_priv_role PASSWORD NEON_PASSWORD_PLACEHOLDER;\n -- circular ADMIN OPTION grants should be disallowed\n GRANT regress_priv_user1 TO regress_priv_user2 WITH ADMIN OPTION;\n GRANT regress_priv_user1 TO regress_priv_user3 WITH ADMIN OPTION GRANTED BY regress_priv_user2;\n@@ -108,11 +108,11 @@ ERROR:  role \"regress_priv_user5\" cannot be dropped because some objects depend\n DETAIL:  privileges for membership of role regress_priv_user6 in role regress_priv_user1\n DROP ROLE regress_priv_user1, regress_priv_user5; -- ok, despite order\n -- recreate the roles we just dropped\n-CREATE USER regress_priv_user1;\n-CREATE USER regress_priv_user2;\n-CREATE USER regress_priv_user3;\n-CREATE USER regress_priv_user4;\n-CREATE USER regress_priv_user5;\n+CREATE USER regress_priv_user1 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_priv_user2 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_priv_user3 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_priv_user4 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_priv_user5 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n GRANT pg_read_all_data TO regress_priv_user6;\n GRANT pg_write_all_data TO regress_priv_user7;\n GRANT pg_read_all_settings TO regress_priv_user8 WITH ADMIN OPTION;\n@@ -212,8 +212,8 @@ REVOKE pg_read_all_settings FROM regress_priv_user8;\n DROP USER regress_priv_user10;\n DROP USER regress_priv_user9;\n DROP USER regress_priv_user8;\n-CREATE GROUP regress_priv_group1;\n-CREATE GROUP regress_priv_group2 WITH ADMIN regress_priv_user1 USER regress_priv_user2;\n+CREATE GROUP regress_priv_group1 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE GROUP regress_priv_group2 WITH ADMIN regress_priv_user1 PASSWORD NEON_PASSWORD_PLACEHOLDER USER regress_priv_user2;\n ALTER GROUP regress_priv_group1 ADD USER regress_priv_user4;\n GRANT regress_priv_group2 TO regress_priv_user2 GRANTED BY regress_priv_user1;\n SET SESSION AUTHORIZATION regress_priv_user3;\n@@ -246,12 +246,16 @@ GRANT regress_priv_role TO regress_priv_user1 WITH ADMIN OPTION GRANTED BY regre\n ERROR:  permission denied to grant privileges as role \"regress_priv_role\"\n DETAIL:  The grantor must have the ADMIN option on role \"regress_priv_role\".\n GRANT regress_priv_role TO regress_priv_user1 WITH ADMIN OPTION GRANTED BY CURRENT_ROLE;\n+ERROR:  permission denied to grant privileges as role \"neondb_owner\"\n+DETAIL:  The grantor must have the ADMIN option on role \"regress_priv_role\".\n REVOKE ADMIN OPTION FOR regress_priv_role FROM regress_priv_user1 GRANTED BY foo; -- error\n ERROR:  role \"foo\" does not exist\n REVOKE ADMIN OPTION FOR regress_priv_role FROM regress_priv_user1 GRANTED BY regress_priv_user2; -- warning, noop\n WARNING:  role \"regress_priv_user1\" has not been granted membership in role \"regress_priv_role\" by role \"regress_priv_user2\"\n REVOKE ADMIN OPTION FOR regress_priv_role FROM regress_priv_user1 GRANTED BY CURRENT_USER;\n+WARNING:  role \"regress_priv_user1\" has not been granted membership in role \"regress_priv_role\" by role \"neondb_owner\"\n REVOKE regress_priv_role FROM regress_priv_user1 GRANTED BY CURRENT_ROLE;\n+WARNING:  role \"regress_priv_user1\" has not been granted membership in role \"regress_priv_role\" by role \"neondb_owner\"\n DROP ROLE regress_priv_role;\n SET SESSION AUTHORIZATION regress_priv_user1;\n SELECT session_user, current_user;\n@@ -1783,7 +1787,7 @@ SELECT has_table_privilege('regress_priv_user1', 'atest4', 'SELECT WITH GRANT OP\n \n -- security-restricted operations\n \\c -\n-CREATE ROLE regress_sro_user;\n+CREATE ROLE regress_sro_user PASSWORD NEON_PASSWORD_PLACEHOLDER;\n -- Check that index expressions and predicates are run as the table's owner\n -- A dummy index function checking current_user\n CREATE FUNCTION sro_ifun(int) RETURNS int AS $$\n@@ -2675,8 +2679,8 @@ drop cascades to function testns.priv_testagg(integer)\n drop cascades to function testns.priv_testproc(integer)\n -- Change owner of the schema & and rename of new schema owner\n \\c -\n-CREATE ROLE regress_schemauser1 superuser login;\n-CREATE ROLE regress_schemauser2 superuser login;\n+CREATE ROLE regress_schemauser1 superuser login PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_schemauser2 superuser login PASSWORD NEON_PASSWORD_PLACEHOLDER;\n SET SESSION ROLE regress_schemauser1;\n CREATE SCHEMA testns;\n SELECT nspname, rolname FROM pg_namespace, pg_roles WHERE pg_namespace.nspname = 'testns' AND pg_namespace.nspowner = pg_roles.oid;\n@@ -2799,7 +2803,7 @@ DROP USER regress_priv_user7;\n DROP USER regress_priv_user8; -- does not exist\n ERROR:  role \"regress_priv_user8\" does not exist\n -- permissions with LOCK TABLE\n-CREATE USER regress_locktable_user;\n+CREATE USER regress_locktable_user PASSWORD NEON_PASSWORD_PLACEHOLDER;\n CREATE TABLE lock_table (a int);\n -- LOCK TABLE and SELECT permission\n GRANT SELECT ON lock_table TO regress_locktable_user;\n@@ -2895,7 +2899,7 @@ DROP USER regress_locktable_user;\n -- pg_backend_memory_contexts.\n -- switch to superuser\n \\c -\n-CREATE ROLE regress_readallstats;\n+CREATE ROLE regress_readallstats PASSWORD NEON_PASSWORD_PLACEHOLDER;\n SELECT has_table_privilege('regress_readallstats','pg_backend_memory_contexts','SELECT'); -- no\n  has_table_privilege \n ---------------------\n@@ -2939,10 +2943,10 @@ RESET ROLE;\n -- clean up\n DROP ROLE regress_readallstats;\n -- test role grantor machinery\n-CREATE ROLE regress_group;\n-CREATE ROLE regress_group_direct_manager;\n-CREATE ROLE regress_group_indirect_manager;\n-CREATE ROLE regress_group_member;\n+CREATE ROLE regress_group PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_group_direct_manager PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_group_indirect_manager PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_group_member PASSWORD NEON_PASSWORD_PLACEHOLDER;\n GRANT regress_group TO regress_group_direct_manager WITH INHERIT FALSE, ADMIN TRUE;\n GRANT regress_group_direct_manager TO regress_group_indirect_manager;\n SET SESSION AUTHORIZATION regress_group_direct_manager;\n@@ -2971,9 +2975,9 @@ DROP ROLE regress_group_direct_manager;\n DROP ROLE regress_group_indirect_manager;\n DROP ROLE regress_group_member;\n -- test SET and INHERIT options with object ownership changes\n-CREATE ROLE regress_roleoption_protagonist;\n-CREATE ROLE regress_roleoption_donor;\n-CREATE ROLE regress_roleoption_recipient;\n+CREATE ROLE regress_roleoption_protagonist PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_roleoption_donor PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_roleoption_recipient PASSWORD NEON_PASSWORD_PLACEHOLDER;\n CREATE SCHEMA regress_roleoption;\n GRANT CREATE, USAGE ON SCHEMA regress_roleoption TO PUBLIC;\n GRANT regress_roleoption_donor TO regress_roleoption_protagonist WITH INHERIT TRUE, SET FALSE;\n@@ -3002,9 +3006,9 @@ DROP ROLE regress_roleoption_protagonist;\n DROP ROLE regress_roleoption_donor;\n DROP ROLE regress_roleoption_recipient;\n -- MAINTAIN\n-CREATE ROLE regress_no_maintain;\n-CREATE ROLE regress_maintain;\n-CREATE ROLE regress_maintain_all IN ROLE pg_maintain;\n+CREATE ROLE regress_no_maintain PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_maintain PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_maintain_all IN ROLE pg_maintain PASSWORD NEON_PASSWORD_PLACEHOLDER;\n CREATE TABLE maintain_test (a INT);\n CREATE INDEX ON maintain_test (a);\n GRANT MAINTAIN ON maintain_test TO regress_maintain;\ndiff --git a/src/test/regress/expected/psql.out b/src/test/regress/expected/psql.out\nindex 3bbe4c5f97..e742a46a63 100644\n--- a/src/test/regress/expected/psql.out\n+++ b/src/test/regress/expected/psql.out\n@@ -2862,7 +2862,7 @@ Type                | func\n -- check conditional am display\n \\pset expanded off\n CREATE SCHEMA tableam_display;\n-CREATE ROLE regress_display_role;\n+CREATE ROLE regress_display_role PASSWORD NEON_PASSWORD_PLACEHOLDER;\n ALTER SCHEMA tableam_display OWNER TO regress_display_role;\n SET search_path TO tableam_display;\n CREATE ACCESS METHOD heap_psql TYPE TABLE HANDLER heap_tableam_handler;\n@@ -4817,7 +4817,7 @@ last error code: 22012\n reset debug_parallel_query;\n \\unset FETCH_COUNT\n create schema testpart;\n-create role regress_partitioning_role;\n+create role regress_partitioning_role PASSWORD NEON_PASSWORD_PLACEHOLDER;\n alter schema testpart owner to regress_partitioning_role;\n set role to regress_partitioning_role;\n -- run test inside own schema and hide other partitions\n@@ -5269,7 +5269,7 @@ reset work_mem;\n \n -- check \\df+\n -- we have to use functions with a predictable owner name, so make a role\n-create role regress_psql_user superuser;\n+create role regress_psql_user superuser PASSWORD NEON_PASSWORD_PLACEHOLDER;\n begin;\n set session authorization regress_psql_user;\n create function psql_df_internal (float8)\n@@ -5557,11 +5557,14 @@ CREATE TEMPORARY TABLE reload_output(\n   line text\n );\n SELECT 1 AS a \\g :g_out_file\n-COPY reload_output(line) FROM :'g_out_file';\n+\\set command '\\\\COPY reload_output(line) FROM ' :'g_out_file';\n+:command\n SELECT 2 AS b\\; SELECT 3 AS c\\; SELECT 4 AS d \\g :g_out_file\n-COPY reload_output(line) FROM :'g_out_file';\n+\\set command '\\\\COPY reload_output(line) FROM ' :'g_out_file';\n+:command\n COPY (SELECT 'foo') TO STDOUT \\; COPY (SELECT 'bar') TO STDOUT \\g :g_out_file\n-COPY reload_output(line) FROM :'g_out_file';\n+\\set command '\\\\COPY reload_output(line) FROM ' :'g_out_file';\n+:command\n SELECT line FROM reload_output ORDER BY lineno;\n   line   \n ---------\n@@ -5600,13 +5603,15 @@ SELECT 1 AS a\\; SELECT 2 AS b\\; SELECT 3 AS c;\n -- COPY TO file\n -- The data goes to :g_out_file and the status to :o_out_file\n \\set QUIET false\n-COPY (SELECT unique1 FROM onek ORDER BY unique1 LIMIT 10) TO :'g_out_file';\n+\\set command '\\\\COPY (SELECT unique1 FROM onek ORDER BY unique1 LIMIT 10) TO ' :'g_out_file';\n+:command\n -- DML command status\n UPDATE onek SET unique1 = unique1 WHERE false;\n \\set QUIET true\n \\o\n -- Check the contents of the files generated.\n-COPY reload_output(line) FROM :'g_out_file';\n+\\set command '\\\\COPY reload_output(line) FROM ' :'g_out_file';\n+:command\n SELECT line FROM reload_output ORDER BY lineno;\n  line \n ------\n@@ -5623,7 +5628,8 @@ SELECT line FROM reload_output ORDER BY lineno;\n (10 rows)\n \n TRUNCATE TABLE reload_output;\n-COPY reload_output(line) FROM :'o_out_file';\n+\\set command '\\\\COPY reload_output(line) FROM ' :'o_out_file';\n+:command\n SELECT line FROM reload_output ORDER BY lineno;\n    line   \n ----------\n@@ -5660,7 +5666,8 @@ COPY (SELECT 'foo1') TO STDOUT \\; COPY (SELECT 'bar1') TO STDOUT;\n COPY (SELECT 'foo2') TO STDOUT \\; COPY (SELECT 'bar2') TO STDOUT \\g :g_out_file\n \\o\n -- Check the contents of the files generated.\n-COPY reload_output(line) FROM :'g_out_file';\n+\\set command '\\\\COPY reload_output(line) FROM ' :'g_out_file';\n+:command\n SELECT line FROM reload_output ORDER BY lineno;\n  line \n ------\n@@ -5669,7 +5676,8 @@ SELECT line FROM reload_output ORDER BY lineno;\n (2 rows)\n \n TRUNCATE TABLE reload_output;\n-COPY reload_output(line) FROM :'o_out_file';\n+\\set command '\\\\COPY reload_output(line) FROM ' :'o_out_file';\n+:command\n SELECT line FROM reload_output ORDER BY lineno;\n  line \n ------\n@@ -6633,10 +6641,10 @@ cross-database references are not implemented: \"no.such.database\".\"no.such.schem\n \\dX \"no.such.database\".\"no.such.schema\".\"no.such.extended.statistics\"\n cross-database references are not implemented: \"no.such.database\".\"no.such.schema\".\"no.such.extended.statistics\"\n -- check \\drg and \\du\n-CREATE ROLE regress_du_role0;\n-CREATE ROLE regress_du_role1;\n-CREATE ROLE regress_du_role2;\n-CREATE ROLE regress_du_admin;\n+CREATE ROLE regress_du_role0 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_du_role1 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_du_role2 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_du_admin PASSWORD NEON_PASSWORD_PLACEHOLDER;\n GRANT regress_du_role0 TO regress_du_admin WITH ADMIN TRUE;\n GRANT regress_du_role1 TO regress_du_admin WITH ADMIN TRUE;\n GRANT regress_du_role2 TO regress_du_admin WITH ADMIN TRUE;\ndiff --git a/src/test/regress/expected/publication.out b/src/test/regress/expected/publication.out\nindex 30b6371134..cc01076c22 100644\n--- a/src/test/regress/expected/publication.out\n+++ b/src/test/regress/expected/publication.out\n@@ -1,9 +1,9 @@\n --\n -- PUBLICATION\n --\n-CREATE ROLE regress_publication_user LOGIN SUPERUSER;\n-CREATE ROLE regress_publication_user2;\n-CREATE ROLE regress_publication_user_dummy LOGIN NOSUPERUSER;\n+CREATE ROLE regress_publication_user LOGIN SUPERUSER PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_publication_user2 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_publication_user_dummy LOGIN NOSUPERUSER PASSWORD NEON_PASSWORD_PLACEHOLDER;\n SET SESSION AUTHORIZATION 'regress_publication_user';\n -- suppress warning that depends on wal_level\n SET client_min_messages = 'ERROR';\n@@ -1221,7 +1221,7 @@ ALTER PUBLICATION testpub2 ADD TABLE testpub_tbl1;  -- ok\n DROP PUBLICATION testpub2;\n DROP PUBLICATION testpub3;\n SET ROLE regress_publication_user;\n-CREATE ROLE regress_publication_user3;\n+CREATE ROLE regress_publication_user3 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n GRANT regress_publication_user2 TO regress_publication_user3;\n SET client_min_messages = 'ERROR';\n CREATE PUBLICATION testpub4 FOR TABLES IN SCHEMA pub_test;\ndiff --git a/src/test/regress/expected/regproc.out b/src/test/regress/expected/regproc.out\nindex 97b917502c..e9428535cb 100644\n--- a/src/test/regress/expected/regproc.out\n+++ b/src/test/regress/expected/regproc.out\n@@ -2,7 +2,7 @@\n -- regproc\n --\n /* If objects exist, return oids */\n-CREATE ROLE regress_regrole_test;\n+CREATE ROLE regress_regrole_test PASSWORD NEON_PASSWORD_PLACEHOLDER;\n -- without schemaname\n SELECT regoper('||/');\n  regoper \ndiff --git a/src/test/regress/expected/roleattributes.out b/src/test/regress/expected/roleattributes.out\nindex 5e6969b173..2c4d52237f 100644\n--- a/src/test/regress/expected/roleattributes.out\n+++ b/src/test/regress/expected/roleattributes.out\n@@ -1,233 +1,233 @@\n -- default for superuser is false\n-CREATE ROLE regress_test_def_superuser;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_superuser';\n-          rolname           | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil \n-----------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+---------------\n- regress_test_def_superuser | f        | t          | f             | f           | f           | f              | f            |           -1 |             | \n+CREATE ROLE regress_test_def_superuser PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_superuser';\n+          rolname           | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit |                  regexp_replace                   | rolvaliduntil \n+----------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+---------------\n+ regress_test_def_superuser | f        | t          | f             | f           | f           | f              | f            |           -1 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey> | \n (1 row)\n \n-CREATE ROLE regress_test_superuser WITH SUPERUSER;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_superuser';\n-        rolname         | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil \n-------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+---------------\n- regress_test_superuser | t        | t          | f             | f           | f           | f              | f            |           -1 |             | \n+CREATE ROLE regress_test_superuser WITH SUPERUSER PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_superuser';\n+        rolname         | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit |                  regexp_replace                   | rolvaliduntil \n+------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+---------------\n+ regress_test_superuser | t        | t          | f             | f           | f           | f              | f            |           -1 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey> | \n (1 row)\n \n ALTER ROLE regress_test_superuser WITH NOSUPERUSER;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_superuser';\n-        rolname         | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil \n-------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+---------------\n- regress_test_superuser | f        | t          | f             | f           | f           | f              | f            |           -1 |             | \n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_superuser';\n+        rolname         | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit |                  regexp_replace                   | rolvaliduntil \n+------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+---------------\n+ regress_test_superuser | f        | t          | f             | f           | f           | f              | f            |           -1 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey> | \n (1 row)\n \n ALTER ROLE regress_test_superuser WITH SUPERUSER;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_superuser';\n-        rolname         | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil \n-------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+---------------\n- regress_test_superuser | t        | t          | f             | f           | f           | f              | f            |           -1 |             | \n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_superuser';\n+        rolname         | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit |                  regexp_replace                   | rolvaliduntil \n+------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+---------------\n+ regress_test_superuser | t        | t          | f             | f           | f           | f              | f            |           -1 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey> | \n (1 row)\n \n -- default for inherit is true\n-CREATE ROLE regress_test_def_inherit;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_inherit';\n-         rolname          | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil \n---------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+---------------\n- regress_test_def_inherit | f        | t          | f             | f           | f           | f              | f            |           -1 |             | \n+CREATE ROLE regress_test_def_inherit PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_inherit';\n+         rolname          | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit |                  regexp_replace                   | rolvaliduntil \n+--------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+---------------\n+ regress_test_def_inherit | f        | t          | f             | f           | f           | f              | f            |           -1 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey> | \n (1 row)\n \n-CREATE ROLE regress_test_inherit WITH NOINHERIT;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_inherit';\n-       rolname        | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil \n-----------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+---------------\n- regress_test_inherit | f        | f          | f             | f           | f           | f              | f            |           -1 |             | \n+CREATE ROLE regress_test_inherit WITH NOINHERIT PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_inherit';\n+       rolname        | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit |                  regexp_replace                   | rolvaliduntil \n+----------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+---------------\n+ regress_test_inherit | f        | f          | f             | f           | f           | f              | f            |           -1 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey> | \n (1 row)\n \n ALTER ROLE regress_test_inherit WITH INHERIT;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_inherit';\n-       rolname        | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil \n-----------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+---------------\n- regress_test_inherit | f        | t          | f             | f           | f           | f              | f            |           -1 |             | \n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_inherit';\n+       rolname        | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit |                  regexp_replace                   | rolvaliduntil \n+----------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+---------------\n+ regress_test_inherit | f        | t          | f             | f           | f           | f              | f            |           -1 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey> | \n (1 row)\n \n ALTER ROLE regress_test_inherit WITH NOINHERIT;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_inherit';\n-       rolname        | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil \n-----------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+---------------\n- regress_test_inherit | f        | f          | f             | f           | f           | f              | f            |           -1 |             | \n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_inherit';\n+       rolname        | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit |                  regexp_replace                   | rolvaliduntil \n+----------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+---------------\n+ regress_test_inherit | f        | f          | f             | f           | f           | f              | f            |           -1 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey> | \n (1 row)\n \n -- default for create role is false\n-CREATE ROLE regress_test_def_createrole;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_createrole';\n-           rolname           | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil \n------------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+---------------\n- regress_test_def_createrole | f        | t          | f             | f           | f           | f              | f            |           -1 |             | \n+CREATE ROLE regress_test_def_createrole PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_createrole';\n+           rolname           | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit |                  regexp_replace                   | rolvaliduntil \n+-----------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+---------------\n+ regress_test_def_createrole | f        | t          | f             | f           | f           | f              | f            |           -1 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey> | \n (1 row)\n \n-CREATE ROLE regress_test_createrole WITH CREATEROLE;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_createrole';\n-         rolname         | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil \n--------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+---------------\n- regress_test_createrole | f        | t          | t             | f           | f           | f              | f            |           -1 |             | \n+CREATE ROLE regress_test_createrole WITH CREATEROLE PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_createrole';\n+         rolname         | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit |                  regexp_replace                   | rolvaliduntil \n+-------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+---------------\n+ regress_test_createrole | f        | t          | t             | f           | f           | f              | f            |           -1 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey> | \n (1 row)\n \n ALTER ROLE regress_test_createrole WITH NOCREATEROLE;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_createrole';\n-         rolname         | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil \n--------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+---------------\n- regress_test_createrole | f        | t          | f             | f           | f           | f              | f            |           -1 |             | \n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_createrole';\n+         rolname         | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit |                  regexp_replace                   | rolvaliduntil \n+-------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+---------------\n+ regress_test_createrole | f        | t          | f             | f           | f           | f              | f            |           -1 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey> | \n (1 row)\n \n ALTER ROLE regress_test_createrole WITH CREATEROLE;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_createrole';\n-         rolname         | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil \n--------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+---------------\n- regress_test_createrole | f        | t          | t             | f           | f           | f              | f            |           -1 |             | \n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_createrole';\n+         rolname         | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit |                  regexp_replace                   | rolvaliduntil \n+-------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+---------------\n+ regress_test_createrole | f        | t          | t             | f           | f           | f              | f            |           -1 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey> | \n (1 row)\n \n -- default for create database is false\n-CREATE ROLE regress_test_def_createdb;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_createdb';\n-          rolname          | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil \n----------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+---------------\n- regress_test_def_createdb | f        | t          | f             | f           | f           | f              | f            |           -1 |             | \n+CREATE ROLE regress_test_def_createdb PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_createdb';\n+          rolname          | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit |                  regexp_replace                   | rolvaliduntil \n+---------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+---------------\n+ regress_test_def_createdb | f        | t          | f             | f           | f           | f              | f            |           -1 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey> | \n (1 row)\n \n-CREATE ROLE regress_test_createdb WITH CREATEDB;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_createdb';\n-        rolname        | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil \n------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+---------------\n- regress_test_createdb | f        | t          | f             | t           | f           | f              | f            |           -1 |             | \n+CREATE ROLE regress_test_createdb WITH CREATEDB PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_createdb';\n+        rolname        | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit |                  regexp_replace                   | rolvaliduntil \n+-----------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+---------------\n+ regress_test_createdb | f        | t          | f             | t           | f           | f              | f            |           -1 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey> | \n (1 row)\n \n ALTER ROLE regress_test_createdb WITH NOCREATEDB;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_createdb';\n-        rolname        | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil \n------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+---------------\n- regress_test_createdb | f        | t          | f             | f           | f           | f              | f            |           -1 |             | \n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_createdb';\n+        rolname        | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit |                  regexp_replace                   | rolvaliduntil \n+-----------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+---------------\n+ regress_test_createdb | f        | t          | f             | f           | f           | f              | f            |           -1 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey> | \n (1 row)\n \n ALTER ROLE regress_test_createdb WITH CREATEDB;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_createdb';\n-        rolname        | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil \n------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+---------------\n- regress_test_createdb | f        | t          | f             | t           | f           | f              | f            |           -1 |             | \n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_createdb';\n+        rolname        | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit |                  regexp_replace                   | rolvaliduntil \n+-----------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+---------------\n+ regress_test_createdb | f        | t          | f             | t           | f           | f              | f            |           -1 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey> | \n (1 row)\n \n -- default for can login is false for role\n-CREATE ROLE regress_test_def_role_canlogin;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_role_canlogin';\n-            rolname             | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil \n---------------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+---------------\n- regress_test_def_role_canlogin | f        | t          | f             | f           | f           | f              | f            |           -1 |             | \n+CREATE ROLE regress_test_def_role_canlogin PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_role_canlogin';\n+            rolname             | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit |                  regexp_replace                   | rolvaliduntil \n+--------------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+---------------\n+ regress_test_def_role_canlogin | f        | t          | f             | f           | f           | f              | f            |           -1 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey> | \n (1 row)\n \n-CREATE ROLE regress_test_role_canlogin WITH LOGIN;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_role_canlogin';\n-          rolname           | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil \n-----------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+---------------\n- regress_test_role_canlogin | f        | t          | f             | f           | t           | f              | f            |           -1 |             | \n+CREATE ROLE regress_test_role_canlogin WITH LOGIN PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_role_canlogin';\n+          rolname           | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit |                  regexp_replace                   | rolvaliduntil \n+----------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+---------------\n+ regress_test_role_canlogin | f        | t          | f             | f           | t           | f              | f            |           -1 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey> | \n (1 row)\n \n ALTER ROLE regress_test_role_canlogin WITH NOLOGIN;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_role_canlogin';\n-          rolname           | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil \n-----------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+---------------\n- regress_test_role_canlogin | f        | t          | f             | f           | f           | f              | f            |           -1 |             | \n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_role_canlogin';\n+          rolname           | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit |                  regexp_replace                   | rolvaliduntil \n+----------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+---------------\n+ regress_test_role_canlogin | f        | t          | f             | f           | f           | f              | f            |           -1 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey> | \n (1 row)\n \n ALTER ROLE regress_test_role_canlogin WITH LOGIN;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_role_canlogin';\n-          rolname           | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil \n-----------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+---------------\n- regress_test_role_canlogin | f        | t          | f             | f           | t           | f              | f            |           -1 |             | \n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_role_canlogin';\n+          rolname           | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit |                  regexp_replace                   | rolvaliduntil \n+----------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+---------------\n+ regress_test_role_canlogin | f        | t          | f             | f           | t           | f              | f            |           -1 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey> | \n (1 row)\n \n -- default for can login is true for user\n-CREATE USER regress_test_def_user_canlogin;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_user_canlogin';\n-            rolname             | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil \n---------------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+---------------\n- regress_test_def_user_canlogin | f        | t          | f             | f           | t           | f              | f            |           -1 |             | \n+CREATE USER regress_test_def_user_canlogin PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_user_canlogin';\n+            rolname             | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit |                  regexp_replace                   | rolvaliduntil \n+--------------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+---------------\n+ regress_test_def_user_canlogin | f        | t          | f             | f           | t           | f              | f            |           -1 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey> | \n (1 row)\n \n-CREATE USER regress_test_user_canlogin WITH NOLOGIN;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_user_canlogin';\n-          rolname           | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil \n-----------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+---------------\n- regress_test_user_canlogin | f        | t          | f             | f           | f           | f              | f            |           -1 |             | \n+CREATE USER regress_test_user_canlogin WITH NOLOGIN PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_user_canlogin';\n+          rolname           | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit |                  regexp_replace                   | rolvaliduntil \n+----------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+---------------\n+ regress_test_user_canlogin | f        | t          | f             | f           | f           | f              | f            |           -1 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey> | \n (1 row)\n \n ALTER USER regress_test_user_canlogin WITH LOGIN;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_user_canlogin';\n-          rolname           | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil \n-----------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+---------------\n- regress_test_user_canlogin | f        | t          | f             | f           | t           | f              | f            |           -1 |             | \n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_user_canlogin';\n+          rolname           | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit |                  regexp_replace                   | rolvaliduntil \n+----------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+---------------\n+ regress_test_user_canlogin | f        | t          | f             | f           | t           | f              | f            |           -1 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey> | \n (1 row)\n \n ALTER USER regress_test_user_canlogin WITH NOLOGIN;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_user_canlogin';\n-          rolname           | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil \n-----------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+---------------\n- regress_test_user_canlogin | f        | t          | f             | f           | f           | f              | f            |           -1 |             | \n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_user_canlogin';\n+          rolname           | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit |                  regexp_replace                   | rolvaliduntil \n+----------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+---------------\n+ regress_test_user_canlogin | f        | t          | f             | f           | f           | f              | f            |           -1 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey> | \n (1 row)\n \n -- default for replication is false\n-CREATE ROLE regress_test_def_replication;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_replication';\n-           rolname            | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil \n-------------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+---------------\n- regress_test_def_replication | f        | t          | f             | f           | f           | f              | f            |           -1 |             | \n+CREATE ROLE regress_test_def_replication PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_replication';\n+           rolname            | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit |                  regexp_replace                   | rolvaliduntil \n+------------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+---------------\n+ regress_test_def_replication | f        | t          | f             | f           | f           | f              | f            |           -1 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey> | \n (1 row)\n \n-CREATE ROLE regress_test_replication WITH REPLICATION;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_replication';\n-         rolname          | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil \n---------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+---------------\n- regress_test_replication | f        | t          | f             | f           | f           | t              | f            |           -1 |             | \n+CREATE ROLE regress_test_replication WITH REPLICATION PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_replication';\n+         rolname          | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit |                  regexp_replace                   | rolvaliduntil \n+--------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+---------------\n+ regress_test_replication | f        | t          | f             | f           | f           | t              | f            |           -1 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey> | \n (1 row)\n \n ALTER ROLE regress_test_replication WITH NOREPLICATION;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_replication';\n-         rolname          | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil \n---------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+---------------\n- regress_test_replication | f        | t          | f             | f           | f           | f              | f            |           -1 |             | \n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_replication';\n+         rolname          | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit |                  regexp_replace                   | rolvaliduntil \n+--------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+---------------\n+ regress_test_replication | f        | t          | f             | f           | f           | f              | f            |           -1 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey> | \n (1 row)\n \n ALTER ROLE regress_test_replication WITH REPLICATION;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_replication';\n-         rolname          | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil \n---------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+---------------\n- regress_test_replication | f        | t          | f             | f           | f           | t              | f            |           -1 |             | \n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_replication';\n+         rolname          | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit |                  regexp_replace                   | rolvaliduntil \n+--------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+---------------\n+ regress_test_replication | f        | t          | f             | f           | f           | t              | f            |           -1 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey> | \n (1 row)\n \n -- default for bypassrls is false\n-CREATE ROLE regress_test_def_bypassrls;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_bypassrls';\n-          rolname           | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil \n-----------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+---------------\n- regress_test_def_bypassrls | f        | t          | f             | f           | f           | f              | f            |           -1 |             | \n+CREATE ROLE regress_test_def_bypassrls PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_bypassrls';\n+          rolname           | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit |                  regexp_replace                   | rolvaliduntil \n+----------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+---------------\n+ regress_test_def_bypassrls | f        | t          | f             | f           | f           | f              | f            |           -1 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey> | \n (1 row)\n \n-CREATE ROLE regress_test_bypassrls WITH BYPASSRLS;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_bypassrls';\n-        rolname         | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil \n-------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+---------------\n- regress_test_bypassrls | f        | t          | f             | f           | f           | f              | t            |           -1 |             | \n+CREATE ROLE regress_test_bypassrls WITH BYPASSRLS PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_bypassrls';\n+        rolname         | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit |                  regexp_replace                   | rolvaliduntil \n+------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+---------------\n+ regress_test_bypassrls | f        | t          | f             | f           | f           | f              | t            |           -1 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey> | \n (1 row)\n \n ALTER ROLE regress_test_bypassrls WITH NOBYPASSRLS;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_bypassrls';\n-        rolname         | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil \n-------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+---------------\n- regress_test_bypassrls | f        | t          | f             | f           | f           | f              | f            |           -1 |             | \n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_bypassrls';\n+        rolname         | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit |                  regexp_replace                   | rolvaliduntil \n+------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+---------------\n+ regress_test_bypassrls | f        | t          | f             | f           | f           | f              | f            |           -1 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey> | \n (1 row)\n \n ALTER ROLE regress_test_bypassrls WITH BYPASSRLS;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_bypassrls';\n-        rolname         | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil \n-------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+---------------\n- regress_test_bypassrls | f        | t          | f             | f           | f           | f              | t            |           -1 |             | \n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_bypassrls';\n+        rolname         | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit |                  regexp_replace                   | rolvaliduntil \n+------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+---------------\n+ regress_test_bypassrls | f        | t          | f             | f           | f           | f              | t            |           -1 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey> | \n (1 row)\n \n -- clean up roles\ndiff --git a/src/test/regress/expected/rowsecurity.out b/src/test/regress/expected/rowsecurity.out\nindex 51bba175ec..45355a9c66 100644\n--- a/src/test/regress/expected/rowsecurity.out\n+++ b/src/test/regress/expected/rowsecurity.out\n@@ -14,13 +14,13 @@ DROP ROLE IF EXISTS regress_rls_group2;\n DROP SCHEMA IF EXISTS regress_rls_schema CASCADE;\n RESET client_min_messages;\n -- initial setup\n-CREATE USER regress_rls_alice NOLOGIN;\n-CREATE USER regress_rls_bob NOLOGIN;\n-CREATE USER regress_rls_carol NOLOGIN;\n-CREATE USER regress_rls_dave NOLOGIN;\n-CREATE USER regress_rls_exempt_user BYPASSRLS NOLOGIN;\n-CREATE ROLE regress_rls_group1 NOLOGIN;\n-CREATE ROLE regress_rls_group2 NOLOGIN;\n+CREATE USER regress_rls_alice NOLOGIN PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_rls_bob NOLOGIN PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_rls_carol NOLOGIN PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_rls_dave NOLOGIN PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_rls_exempt_user BYPASSRLS NOLOGIN PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_rls_group1 NOLOGIN PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_rls_group2 NOLOGIN PASSWORD NEON_PASSWORD_PLACEHOLDER;\n GRANT regress_rls_group1 TO regress_rls_bob;\n GRANT regress_rls_group2 TO regress_rls_carol;\n CREATE SCHEMA regress_rls_schema;\n@@ -4423,8 +4423,8 @@ SELECT count(*) = 0 FROM pg_depend\n \n -- DROP OWNED BY testing\n RESET SESSION AUTHORIZATION;\n-CREATE ROLE regress_rls_dob_role1;\n-CREATE ROLE regress_rls_dob_role2;\n+CREATE ROLE regress_rls_dob_role1 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_rls_dob_role2 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n CREATE TABLE dob_t1 (c1 int);\n CREATE TABLE dob_t2 (c1 int) PARTITION BY RANGE (c1);\n CREATE POLICY p1 ON dob_t1 TO regress_rls_dob_role1 USING (true);\ndiff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out\nindex 13178e2b3d..9a3ebfea3c 100644\n--- a/src/test/regress/expected/rules.out\n+++ b/src/test/regress/expected/rules.out\n@@ -3799,7 +3799,7 @@ DROP TABLE ruletest2;\n -- Test non-SELECT rule on security invoker view.\n -- Should use view owner's permissions.\n --\n-CREATE USER regress_rule_user1;\n+CREATE USER regress_rule_user1 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n CREATE TABLE ruletest_t1 (x int);\n CREATE TABLE ruletest_t2 (x int);\n CREATE VIEW ruletest_v1 WITH (security_invoker=true) AS\ndiff --git a/src/test/regress/expected/security_label.out b/src/test/regress/expected/security_label.out\nindex a8e01a6220..83543b250a 100644\n--- a/src/test/regress/expected/security_label.out\n+++ b/src/test/regress/expected/security_label.out\n@@ -6,8 +6,8 @@ SET client_min_messages TO 'warning';\n DROP ROLE IF EXISTS regress_seclabel_user1;\n DROP ROLE IF EXISTS regress_seclabel_user2;\n RESET client_min_messages;\n-CREATE USER regress_seclabel_user1 WITH CREATEROLE;\n-CREATE USER regress_seclabel_user2;\n+CREATE USER regress_seclabel_user1 WITH CREATEROLE PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_seclabel_user2 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n CREATE TABLE seclabel_tbl1 (a int, b text);\n CREATE TABLE seclabel_tbl2 (x int, y text);\n CREATE VIEW seclabel_view1 AS SELECT * FROM seclabel_tbl2;\ndiff --git a/src/test/regress/expected/select_into.out b/src/test/regress/expected/select_into.out\nindex b79fe9a1c0..e29fab88ab 100644\n--- a/src/test/regress/expected/select_into.out\n+++ b/src/test/regress/expected/select_into.out\n@@ -15,7 +15,7 @@ DROP TABLE sitmp1;\n -- SELECT INTO and INSERT permission, if owner is not allowed to insert.\n --\n CREATE SCHEMA selinto_schema;\n-CREATE USER regress_selinto_user;\n+CREATE USER regress_selinto_user PASSWORD NEON_PASSWORD_PLACEHOLDER;\n ALTER DEFAULT PRIVILEGES FOR ROLE regress_selinto_user\n \t  REVOKE INSERT ON TABLES FROM regress_selinto_user;\n GRANT ALL ON SCHEMA selinto_schema TO public;\ndiff --git a/src/test/regress/expected/select_parallel.out b/src/test/regress/expected/select_parallel.out\nindex 496ddb1289..a4fea8e367 100644\n--- a/src/test/regress/expected/select_parallel.out\n+++ b/src/test/regress/expected/select_parallel.out\n@@ -1295,7 +1295,7 @@ SELECT 1 FROM tenk1_vw_sec\n \n rollback;\n -- test that function option SET ROLE works in parallel workers.\n-create role regress_parallel_worker;\n+create role regress_parallel_worker PASSWORD NEON_PASSWORD_PLACEHOLDER;\n create function set_and_report_role() returns text as\n   $$ select current_setting('role') $$ language sql parallel safe\n   set role = regress_parallel_worker;\ndiff --git a/src/test/regress/expected/select_views.out b/src/test/regress/expected/select_views.out\nindex 1aeed8452b..7d9427d070 100644\n--- a/src/test/regress/expected/select_views.out\n+++ b/src/test/regress/expected/select_views.out\n@@ -1250,7 +1250,7 @@ SELECT * FROM toyemp WHERE name = 'sharon';\n --\n -- Test for Leaky view scenario\n --\n-CREATE ROLE regress_alice;\n+CREATE ROLE regress_alice PASSWORD NEON_PASSWORD_PLACEHOLDER;\n CREATE FUNCTION f_leak (text)\n        RETURNS bool LANGUAGE 'plpgsql' COST 0.0000001\n        AS 'BEGIN RAISE NOTICE ''f_leak => %'', $1; RETURN true; END';\ndiff --git a/src/test/regress/expected/sequence.out b/src/test/regress/expected/sequence.out\nindex fa8059dbcd..190d41afc7 100644\n--- a/src/test/regress/expected/sequence.out\n+++ b/src/test/regress/expected/sequence.out\n@@ -22,7 +22,7 @@ CREATE SEQUENCE sequence_testx OWNED BY pg_class_oid_index.oid;  -- not a table\n ERROR:  sequence cannot be owned by relation \"pg_class_oid_index\"\n DETAIL:  This operation is not supported for indexes.\n CREATE SEQUENCE sequence_testx OWNED BY pg_class.relname;  -- not same schema\n-ERROR:  sequence must be in same schema as table it is linked to\n+ERROR:  sequence must have same owner as table it is linked to\n CREATE TABLE sequence_test_table (a int);\n CREATE SEQUENCE sequence_testx OWNED BY sequence_test_table.b;  -- wrong column\n ERROR:  column \"b\" of relation \"sequence_test_table\" does not exist\n@@ -640,7 +640,7 @@ SELECT setval('sequence_test2', 1);  -- error\n ERROR:  cannot execute setval() in a read-only transaction\n ROLLBACK;\n -- privileges tests\n-CREATE USER regress_seq_user;\n+CREATE USER regress_seq_user PASSWORD NEON_PASSWORD_PLACEHOLDER;\n -- nextval\n BEGIN;\n SET LOCAL SESSION AUTHORIZATION regress_seq_user;\ndiff --git a/src/test/regress/expected/stats.out b/src/test/regress/expected/stats.out\nindex 6e08898b18..7eb5385b7a 100644\n--- a/src/test/regress/expected/stats.out\n+++ b/src/test/regress/expected/stats.out\n@@ -1301,37 +1301,6 @@ SELECT current_setting('fsync') = 'off'\n  t\n (1 row)\n \n--- Change the tablespace so that the table is rewritten directly, then SELECT\n--- from it to cause it to be read back into shared buffers.\n-SELECT sum(reads) AS io_sum_shared_before_reads\n-  FROM pg_stat_io WHERE context = 'normal' AND object = 'relation' \\gset\n--- Do this in a transaction to prevent spurious failures due to concurrent accesses to our newly\n--- rewritten table, e.g. by autovacuum.\n-BEGIN;\n-ALTER TABLE test_io_shared SET TABLESPACE regress_tblspace;\n--- SELECT from the table so that the data is read into shared buffers and\n--- context 'normal', object 'relation' reads are counted.\n-SELECT COUNT(*) FROM test_io_shared;\n- count \n--------\n-   100\n-(1 row)\n-\n-COMMIT;\n-SELECT pg_stat_force_next_flush();\n- pg_stat_force_next_flush \n---------------------------\n- \n-(1 row)\n-\n-SELECT sum(reads) AS io_sum_shared_after_reads\n-  FROM pg_stat_io WHERE context = 'normal' AND object = 'relation'  \\gset\n-SELECT :io_sum_shared_after_reads > :io_sum_shared_before_reads;\n- ?column? \n-----------\n- t\n-(1 row)\n-\n SELECT sum(hits) AS io_sum_shared_before_hits\n   FROM pg_stat_io WHERE context = 'normal' AND object = 'relation' \\gset\n -- Select from the table again to count hits.\n@@ -1433,6 +1402,7 @@ SELECT :io_sum_local_after_evictions > :io_sum_local_before_evictions,\n -- local buffers, exercising a different codepath than standard local buffer\n -- writes.\n ALTER TABLE test_io_local SET TABLESPACE regress_tblspace;\n+ERROR:  tablespace \"regress_tblspace\" does not exist\n SELECT pg_stat_force_next_flush();\n  pg_stat_force_next_flush \n --------------------------\n@@ -1444,7 +1414,7 @@ SELECT sum(writes) AS io_sum_local_new_tblspc_writes\n SELECT :io_sum_local_new_tblspc_writes > :io_sum_local_after_writes;\n  ?column? \n ----------\n- t\n+ f\n (1 row)\n \n RESET temp_buffers;\ndiff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out\nindex 8c4da95508..346961f92a 100644\n--- a/src/test/regress/expected/stats_ext.out\n+++ b/src/test/regress/expected/stats_ext.out\n@@ -70,7 +70,7 @@ DROP TABLE ext_stats_test;\n CREATE TABLE ab1 (a INTEGER, b INTEGER, c INTEGER);\n CREATE STATISTICS IF NOT EXISTS ab1_a_b_stats ON a, b FROM ab1;\n COMMENT ON STATISTICS ab1_a_b_stats IS 'new comment';\n-CREATE ROLE regress_stats_ext;\n+CREATE ROLE regress_stats_ext PASSWORD NEON_PASSWORD_PLACEHOLDER;\n SET SESSION AUTHORIZATION regress_stats_ext;\n COMMENT ON STATISTICS ab1_a_b_stats IS 'changed comment';\n ERROR:  must be owner of statistics object ab1_a_b_stats\n@@ -3214,7 +3214,7 @@ set search_path to public, stts_s1;\n  stts_s1 | stts_foo               | col1, col2 FROM stts_t3                                          | defined   | defined      | defined\n (10 rows)\n \n-create role regress_stats_ext nosuperuser;\n+create role regress_stats_ext nosuperuser PASSWORD NEON_PASSWORD_PLACEHOLDER;\n set role regress_stats_ext;\n \\dX\n                                                        List of extended statistics\n@@ -3237,7 +3237,7 @@ drop schema stts_s1, stts_s2 cascade;\n drop user regress_stats_ext;\n reset search_path;\n -- User with no access\n-CREATE USER regress_stats_user1;\n+CREATE USER regress_stats_user1 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n GRANT USAGE ON SCHEMA tststats TO regress_stats_user1;\n SET SESSION AUTHORIZATION regress_stats_user1;\n SELECT * FROM tststats.priv_test_tbl; -- Permission denied\ndiff --git a/src/test/regress/expected/subscription.out b/src/test/regress/expected/subscription.out\nindex 0f2a25cdc1..de168e39d9 100644\n--- a/src/test/regress/expected/subscription.out\n+++ b/src/test/regress/expected/subscription.out\n@@ -1,10 +1,10 @@\n --\n -- SUBSCRIPTION\n --\n-CREATE ROLE regress_subscription_user LOGIN SUPERUSER;\n-CREATE ROLE regress_subscription_user2;\n-CREATE ROLE regress_subscription_user3 IN ROLE pg_create_subscription;\n-CREATE ROLE regress_subscription_user_dummy LOGIN NOSUPERUSER;\n+CREATE ROLE regress_subscription_user LOGIN SUPERUSER PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_subscription_user2 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_subscription_user3 PASSWORD NEON_PASSWORD_PLACEHOLDER IN ROLE pg_create_subscription;\n+CREATE ROLE regress_subscription_user_dummy LOGIN NOSUPERUSER PASSWORD NEON_PASSWORD_PLACEHOLDER;\n SET SESSION AUTHORIZATION 'regress_subscription_user';\n -- fail - no publications\n CREATE SUBSCRIPTION regress_testsub CONNECTION 'foo';\ndiff --git a/src/test/regress/expected/test_setup.out b/src/test/regress/expected/test_setup.out\nindex 3d0eeec996..2c3932139d 100644\n--- a/src/test/regress/expected/test_setup.out\n+++ b/src/test/regress/expected/test_setup.out\n@@ -21,6 +21,7 @@ GRANT ALL ON SCHEMA public TO public;\n -- Create a tablespace we can use in tests.\n SET allow_in_place_tablespaces = true;\n CREATE TABLESPACE regress_tblspace LOCATION '';\n+ERROR:  CREATE TABLESPACE is not supported on Neon\n --\n -- These tables have traditionally been referenced by many tests,\n -- so create and populate them.  Insert only non-error values here.\n@@ -111,7 +112,8 @@ CREATE TABLE onek (\n \tstring4\t\tname\n );\n \\set filename :abs_srcdir '/data/onek.data'\n-COPY onek FROM :'filename';\n+\\set command '\\\\copy onek FROM ' :'filename';\n+:command\n VACUUM ANALYZE onek;\n CREATE TABLE onek2 AS SELECT * FROM onek;\n VACUUM ANALYZE onek2;\n@@ -134,7 +136,8 @@ CREATE TABLE tenk1 (\n \tstring4\t\tname\n );\n \\set filename :abs_srcdir '/data/tenk.data'\n-COPY tenk1 FROM :'filename';\n+\\set command '\\\\copy tenk1 FROM ' :'filename';\n+:command\n VACUUM ANALYZE tenk1;\n CREATE TABLE tenk2 AS SELECT * FROM tenk1;\n VACUUM ANALYZE tenk2;\n@@ -144,20 +147,23 @@ CREATE TABLE person (\n \tlocation \tpoint\n );\n \\set filename :abs_srcdir '/data/person.data'\n-COPY person FROM :'filename';\n+\\set command '\\\\copy person FROM ' :'filename';\n+:command\n VACUUM ANALYZE person;\n CREATE TABLE emp (\n \tsalary \t\tint4,\n \tmanager \tname\n ) INHERITS (person);\n \\set filename :abs_srcdir '/data/emp.data'\n-COPY emp FROM :'filename';\n+\\set command '\\\\copy emp FROM ' :'filename';\n+:command\n VACUUM ANALYZE emp;\n CREATE TABLE student (\n \tgpa \t\tfloat8\n ) INHERITS (person);\n \\set filename :abs_srcdir '/data/student.data'\n-COPY student FROM :'filename';\n+\\set command '\\\\copy student FROM ' :'filename';\n+:command\n VACUUM ANALYZE student;\n CREATE TABLE stud_emp (\n \tpercent \tint4\n@@ -166,14 +172,16 @@ NOTICE:  merging multiple inherited definitions of column \"name\"\n NOTICE:  merging multiple inherited definitions of column \"age\"\n NOTICE:  merging multiple inherited definitions of column \"location\"\n \\set filename :abs_srcdir '/data/stud_emp.data'\n-COPY stud_emp FROM :'filename';\n+\\set command '\\\\copy stud_emp FROM ' :'filename';\n+:command\n VACUUM ANALYZE stud_emp;\n CREATE TABLE road (\n \tname\t\ttext,\n \tthepath \tpath\n );\n \\set filename :abs_srcdir '/data/streets.data'\n-COPY road FROM :'filename';\n+\\set command '\\\\copy road FROM ' :'filename';\n+:command\n VACUUM ANALYZE road;\n CREATE TABLE ihighway () INHERITS (road);\n INSERT INTO ihighway\ndiff --git a/src/test/regress/expected/tsearch.out b/src/test/regress/expected/tsearch.out\nindex 9fad6c8b04..a1b8e82389 100644\n--- a/src/test/regress/expected/tsearch.out\n+++ b/src/test/regress/expected/tsearch.out\n@@ -63,7 +63,8 @@ CREATE TABLE test_tsvector(\n \ta tsvector\n );\n \\set filename :abs_srcdir '/data/tsearch.data'\n-COPY test_tsvector FROM :'filename';\n+\\set command '\\\\copy test_tsvector FROM ' :'filename';\n+:command\n ANALYZE test_tsvector;\n -- test basic text search behavior without indexes, then with\n SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';\ndiff --git a/src/test/regress/expected/updatable_views.out b/src/test/regress/expected/updatable_views.out\nindex 442b55120c..7224709d6f 100644\n--- a/src/test/regress/expected/updatable_views.out\n+++ b/src/test/regress/expected/updatable_views.out\n@@ -1338,9 +1338,9 @@ NOTICE:  drop cascades to 2 other objects\n DETAIL:  drop cascades to view rw_view1\n drop cascades to function rw_view1_aa(rw_view1)\n -- permissions checks\n-CREATE USER regress_view_user1;\n-CREATE USER regress_view_user2;\n-CREATE USER regress_view_user3;\n+CREATE USER regress_view_user1 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_view_user2 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_view_user3 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n SET SESSION AUTHORIZATION regress_view_user1;\n CREATE TABLE base_tbl(a int, b text, c float);\n INSERT INTO base_tbl VALUES (1, 'Row 1', 1.0);\n@@ -3734,8 +3734,8 @@ DETAIL:  View columns that are not columns of their base relation are not updata\n drop view uv_iocu_view;\n drop table uv_iocu_tab;\n -- ON CONFLICT DO UPDATE permissions checks\n-create user regress_view_user1;\n-create user regress_view_user2;\n+create user regress_view_user1 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+create user regress_view_user2 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n set session authorization regress_view_user1;\n create table base_tbl(a int unique, b text, c float);\n insert into base_tbl values (1,'xxx',1.0);\ndiff --git a/src/test/regress/expected/update.out b/src/test/regress/expected/update.out\nindex 1b27d132d7..25b109d609 100644\n--- a/src/test/regress/expected/update.out\n+++ b/src/test/regress/expected/update.out\n@@ -608,7 +608,7 @@ DROP FUNCTION func_parted_mod_b();\n -- RLS policies with update-row-movement\n -----------------------------------------\n ALTER TABLE range_parted ENABLE ROW LEVEL SECURITY;\n-CREATE USER regress_range_parted_user;\n+CREATE USER regress_range_parted_user PASSWORD NEON_PASSWORD_PLACEHOLDER;\n GRANT ALL ON range_parted, mintab TO regress_range_parted_user;\n CREATE POLICY seeall ON range_parted AS PERMISSIVE FOR SELECT USING (true);\n CREATE POLICY policy_range_parted ON range_parted for UPDATE USING (true) WITH CHECK (c % 2 = 0);\ndiff --git a/src/test/regress/expected/vacuum.out b/src/test/regress/expected/vacuum.out\nindex 2eba712887..d46877aca9 100644\n--- a/src/test/regress/expected/vacuum.out\n+++ b/src/test/regress/expected/vacuum.out\n@@ -433,7 +433,7 @@ CREATE TABLE vacowned (a int);\n CREATE TABLE vacowned_parted (a int) PARTITION BY LIST (a);\n CREATE TABLE vacowned_part1 PARTITION OF vacowned_parted FOR VALUES IN (1);\n CREATE TABLE vacowned_part2 PARTITION OF vacowned_parted FOR VALUES IN (2);\n-CREATE ROLE regress_vacuum;\n+CREATE ROLE regress_vacuum PASSWORD NEON_PASSWORD_PLACEHOLDER;\n SET ROLE regress_vacuum;\n -- Simple table\n VACUUM vacowned;\ndiff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule\nindex f53a526f7c..c07b093476 100644\n--- a/src/test/regress/parallel_schedule\n+++ b/src/test/regress/parallel_schedule\n@@ -135,4 +135,4 @@ test: fast_default\n \n # run tablespace test at the end because it drops the tablespace created during\n # setup that other tests may use.\n-test: tablespace\n+#test: tablespace\ndiff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql\nindex 1a18ca3d8f..b2009628d0 100644\n--- a/src/test/regress/sql/aggregates.sql\n+++ b/src/test/regress/sql/aggregates.sql\n@@ -15,7 +15,8 @@ CREATE TABLE aggtest (\n );\n \n \\set filename :abs_srcdir '/data/agg.data'\n-COPY aggtest FROM :'filename';\n+\\set command '\\\\copy aggtest FROM ' :'filename';\n+:command\n \n ANALYZE aggtest;\n \ndiff --git a/src/test/regress/sql/alter_generic.sql b/src/test/regress/sql/alter_generic.sql\nindex de58d268d3..9d38df7f42 100644\n--- a/src/test/regress/sql/alter_generic.sql\n+++ b/src/test/regress/sql/alter_generic.sql\n@@ -22,9 +22,9 @@ DROP ROLE IF EXISTS regress_alter_generic_user3;\n \n RESET client_min_messages;\n \n-CREATE USER regress_alter_generic_user3;\n-CREATE USER regress_alter_generic_user2;\n-CREATE USER regress_alter_generic_user1 IN ROLE regress_alter_generic_user3;\n+CREATE USER regress_alter_generic_user3 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_alter_generic_user2 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_alter_generic_user1 PASSWORD NEON_PASSWORD_PLACEHOLDER IN ROLE regress_alter_generic_user3;\n \n CREATE SCHEMA alt_nsp1;\n CREATE SCHEMA alt_nsp2;\n@@ -316,7 +316,7 @@ DROP OPERATOR FAMILY alt_opf4 USING btree;\n \n -- Should fail. Need to be SUPERUSER to do ALTER OPERATOR FAMILY .. ADD / DROP\n BEGIN TRANSACTION;\n-CREATE ROLE regress_alter_generic_user5 NOSUPERUSER;\n+CREATE ROLE regress_alter_generic_user5 PASSWORD NEON_PASSWORD_PLACEHOLDER NOSUPERUSER;\n CREATE OPERATOR FAMILY alt_opf5 USING btree;\n SET ROLE regress_alter_generic_user5;\n ALTER OPERATOR FAMILY alt_opf5 USING btree ADD OPERATOR 1 < (int4, int2), FUNCTION 1 btint42cmp(int4, int2);\n@@ -326,7 +326,7 @@ ROLLBACK;\n \n -- Should fail. Need rights to namespace for ALTER OPERATOR FAMILY .. ADD / DROP\n BEGIN TRANSACTION;\n-CREATE ROLE regress_alter_generic_user6;\n+CREATE ROLE regress_alter_generic_user6 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n CREATE SCHEMA alt_nsp6;\n REVOKE ALL ON SCHEMA alt_nsp6 FROM regress_alter_generic_user6;\n CREATE OPERATOR FAMILY alt_nsp6.alt_opf6 USING btree;\ndiff --git a/src/test/regress/sql/alter_operator.sql b/src/test/regress/sql/alter_operator.sql\nindex 8faecf7830..bb8b8e14ea 100644\n--- a/src/test/regress/sql/alter_operator.sql\n+++ b/src/test/regress/sql/alter_operator.sql\n@@ -83,7 +83,7 @@ ALTER OPERATOR & (bit, bit) SET (\"Restrict\" = _int_contsel, \"Join\" = _int_contjo\n --\n -- Test permission check. Must be owner to ALTER OPERATOR.\n --\n-CREATE USER regress_alter_op_user;\n+CREATE USER regress_alter_op_user PASSWORD NEON_PASSWORD_PLACEHOLDER;\n SET SESSION AUTHORIZATION regress_alter_op_user;\n \n ALTER OPERATOR === (boolean, boolean) SET (RESTRICT = NONE);\ndiff --git a/src/test/regress/sql/alter_table.sql b/src/test/regress/sql/alter_table.sql\nindex da12724473..86f5ae5444 100644\n--- a/src/test/regress/sql/alter_table.sql\n+++ b/src/test/regress/sql/alter_table.sql\n@@ -7,7 +7,7 @@ SET client_min_messages TO 'warning';\n DROP ROLE IF EXISTS regress_alter_table_user1;\n RESET client_min_messages;\n \n-CREATE USER regress_alter_table_user1;\n+CREATE USER regress_alter_table_user1 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n --\n -- add attribute\n@@ -2404,8 +2404,8 @@ DROP TABLE fail_part;\n ALTER TABLE list_parted ATTACH PARTITION nonexistent FOR VALUES IN (1);\n \n -- check ownership of the source table\n-CREATE ROLE regress_test_me;\n-CREATE ROLE regress_test_not_me;\n+CREATE ROLE regress_test_me PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_test_not_me PASSWORD NEON_PASSWORD_PLACEHOLDER;\n CREATE TABLE not_owned_by_me (LIKE list_parted);\n ALTER TABLE not_owned_by_me OWNER TO regress_test_not_me;\n SET SESSION AUTHORIZATION regress_test_me;\ndiff --git a/src/test/regress/sql/arrays.sql b/src/test/regress/sql/arrays.sql\nindex 47058dfde5..f8962592e4 100644\n--- a/src/test/regress/sql/arrays.sql\n+++ b/src/test/regress/sql/arrays.sql\n@@ -22,7 +22,8 @@ CREATE TABLE array_op_test (\n );\n \n \\set filename :abs_srcdir '/data/array.data'\n-COPY array_op_test FROM :'filename';\n+\\set command '\\\\copy array_op_test FROM ' :'filename';\n+:command\n ANALYZE array_op_test;\n \n --\ndiff --git a/src/test/regress/sql/btree_index.sql b/src/test/regress/sql/btree_index.sql\nindex 0d2a33f370..df86e6b050 100644\n--- a/src/test/regress/sql/btree_index.sql\n+++ b/src/test/regress/sql/btree_index.sql\n@@ -26,16 +26,20 @@ CREATE TABLE bt_f8_heap (\n );\n \n \\set filename :abs_srcdir '/data/desc.data'\n-COPY bt_i4_heap FROM :'filename';\n+\\set command '\\\\copy bt_i4_heap FROM ' :'filename';\n+:command\n \n \\set filename :abs_srcdir '/data/hash.data'\n-COPY bt_name_heap FROM :'filename';\n+\\set command '\\\\copy bt_name_heap FROM ' :'filename';\n+:command\n \n \\set filename :abs_srcdir '/data/desc.data'\n-COPY bt_txt_heap FROM :'filename';\n+\\set command '\\\\copy bt_txt_heap FROM ' :'filename';\n+:command\n \n \\set filename :abs_srcdir '/data/hash.data'\n-COPY bt_f8_heap FROM :'filename';\n+\\set command '\\\\copy bt_f8_heap FROM ' :'filename';\n+:command\n \n ANALYZE bt_i4_heap;\n ANALYZE bt_name_heap;\ndiff --git a/src/test/regress/sql/cluster.sql b/src/test/regress/sql/cluster.sql\nindex b7115f8610..a753f2c794 100644\n--- a/src/test/regress/sql/cluster.sql\n+++ b/src/test/regress/sql/cluster.sql\n@@ -108,7 +108,7 @@ WHERE pg_class.oid=indexrelid\n CLUSTER pg_toast.pg_toast_826 USING pg_toast_826_index;\n \n -- Verify that clustering all tables does in fact cluster the right ones\n-CREATE USER regress_clstr_user;\n+CREATE USER regress_clstr_user PASSWORD NEON_PASSWORD_PLACEHOLDER;\n CREATE TABLE clstr_1 (a INT PRIMARY KEY);\n CREATE TABLE clstr_2 (a INT PRIMARY KEY);\n CREATE TABLE clstr_3 (a INT PRIMARY KEY);\n@@ -235,7 +235,7 @@ DROP TABLE clstrpart;\n CREATE TABLE ptnowner(i int unique) PARTITION BY LIST (i);\n CREATE INDEX ptnowner_i_idx ON ptnowner(i);\n CREATE TABLE ptnowner1 PARTITION OF ptnowner FOR VALUES IN (1);\n-CREATE ROLE regress_ptnowner;\n+CREATE ROLE regress_ptnowner PASSWORD NEON_PASSWORD_PLACEHOLDER;\n CREATE TABLE ptnowner2 PARTITION OF ptnowner FOR VALUES IN (2);\n ALTER TABLE ptnowner1 OWNER TO regress_ptnowner;\n SET SESSION AUTHORIZATION regress_ptnowner;\ndiff --git a/src/test/regress/sql/collate.icu.utf8.sql b/src/test/regress/sql/collate.icu.utf8.sql\nindex 4eb1adf028..28636ec711 100644\n--- a/src/test/regress/sql/collate.icu.utf8.sql\n+++ b/src/test/regress/sql/collate.icu.utf8.sql\n@@ -353,7 +353,7 @@ reset enable_seqscan;\n \n -- schema manipulation commands\n \n-CREATE ROLE regress_test_role;\n+CREATE ROLE regress_test_role PASSWORD NEON_PASSWORD_PLACEHOLDER;\n CREATE SCHEMA test_schema;\n \n -- We need to do this this way to cope with varying names for encodings:\ndiff --git a/src/test/regress/sql/constraints.sql b/src/test/regress/sql/constraints.sql\nindex e3e3bea709..fa86ddc326 100644\n--- a/src/test/regress/sql/constraints.sql\n+++ b/src/test/regress/sql/constraints.sql\n@@ -243,12 +243,14 @@ CREATE TABLE COPY_TBL (x INT, y TEXT, z INT,\n \tCHECK (x > 3 AND y <> 'check failed' AND x < 7 ));\n \n \\set filename :abs_srcdir '/data/constro.data'\n-COPY COPY_TBL FROM :'filename';\n+\\set command '\\\\copy COPY_TBL FROM ' :'filename';\n+:command\n \n SELECT * FROM COPY_TBL;\n \n \\set filename :abs_srcdir '/data/constrf.data'\n-COPY COPY_TBL FROM :'filename';\n+\\set command '\\\\copy COPY_TBL FROM ' :'filename';\n+:command\n \n SELECT * FROM COPY_TBL;\n \n@@ -599,7 +601,7 @@ DROP TABLE deferred_excl;\n \n -- Comments\n -- Setup a low-level role to enforce non-superuser checks.\n-CREATE ROLE regress_constraint_comments;\n+CREATE ROLE regress_constraint_comments PASSWORD NEON_PASSWORD_PLACEHOLDER;\n SET SESSION AUTHORIZATION regress_constraint_comments;\n \n CREATE TABLE constraint_comments_tbl (a int CONSTRAINT the_constraint CHECK (a > 0));\n@@ -621,7 +623,7 @@ COMMENT ON CONSTRAINT the_constraint ON DOMAIN constraint_comments_dom IS NULL;\n \n -- unauthorized user\n RESET SESSION AUTHORIZATION;\n-CREATE ROLE regress_constraint_comments_noaccess;\n+CREATE ROLE regress_constraint_comments_noaccess PASSWORD NEON_PASSWORD_PLACEHOLDER;\n SET SESSION AUTHORIZATION regress_constraint_comments_noaccess;\n COMMENT ON CONSTRAINT the_constraint ON constraint_comments_tbl IS 'no, the comment';\n COMMENT ON CONSTRAINT the_constraint ON DOMAIN constraint_comments_dom IS 'no, another comment';\ndiff --git a/src/test/regress/sql/conversion.sql b/src/test/regress/sql/conversion.sql\nindex b567a1a572..4d1ac2e631 100644\n--- a/src/test/regress/sql/conversion.sql\n+++ b/src/test/regress/sql/conversion.sql\n@@ -17,7 +17,7 @@ CREATE FUNCTION test_enc_conversion(bytea, name, name, bool, validlen OUT int, r\n     AS :'regresslib', 'test_enc_conversion'\n     LANGUAGE C STRICT;\n \n-CREATE USER regress_conversion_user WITH NOCREATEDB NOCREATEROLE;\n+CREATE USER regress_conversion_user WITH NOCREATEDB NOCREATEROLE PASSWORD NEON_PASSWORD_PLACEHOLDER;\n SET SESSION AUTHORIZATION regress_conversion_user;\n CREATE CONVERSION myconv FOR 'LATIN1' TO 'UTF8' FROM iso8859_1_to_utf8;\n --\ndiff --git a/src/test/regress/sql/copy.sql b/src/test/regress/sql/copy.sql\nindex e2dd24cb35..4a186750f8 100644\n--- a/src/test/regress/sql/copy.sql\n+++ b/src/test/regress/sql/copy.sql\n@@ -20,11 +20,13 @@ insert into copytest values('Mac',E'abc\\rdef',3);\n insert into copytest values(E'esc\\\\ape',E'a\\\\r\\\\\\r\\\\\\n\\\\nb',4);\n \n \\set filename :abs_builddir '/results/copytest.csv'\n-copy copytest to :'filename' csv;\n+\\set command '\\\\copy copytest to ' :'filename' csv;\n+:command\n \n create temp table copytest2 (like copytest);\n \n-copy copytest2 from :'filename' csv;\n+\\set command '\\\\copy copytest2 from ' :'filename' csv;\n+:command\n \n select * from copytest except select * from copytest2;\n \n@@ -32,9 +34,11 @@ truncate copytest2;\n \n --- same test but with an escape char different from quote char\n \n-copy copytest to :'filename' csv quote '''' escape E'\\\\';\n+\\set command '\\\\copy copytest to ' :'filename' ' csv quote ' '\\'\\'\\'\\'' ' escape ' 'E\\'' '\\\\\\\\\\'';\n+:command\n \n-copy copytest2 from :'filename' csv quote '''' escape E'\\\\';\n+\\set command '\\\\copy copytest2 from ' :'filename' ' csv quote ' '\\'\\'\\'\\'' ' escape ' 'E\\'' '\\\\\\\\\\'';\n+:command\n \n select * from copytest except select * from copytest2;\n \n@@ -86,16 +90,19 @@ insert into parted_copytest select x,2,'Two' from generate_series(1001,1010) x;\n insert into parted_copytest select x,1,'One' from generate_series(1011,1020) x;\n \n \\set filename :abs_builddir '/results/parted_copytest.csv'\n-copy (select * from parted_copytest order by a) to :'filename';\n+\\set command '\\\\copy (select * from parted_copytest order by a) to ' :'filename';\n+:command\n \n truncate parted_copytest;\n \n-copy parted_copytest from :'filename';\n+\\set command '\\\\copy parted_copytest from ' :'filename';\n+:command\n \n -- Ensure COPY FREEZE errors for partitioned tables.\n begin;\n truncate parted_copytest;\n-copy parted_copytest from :'filename' (freeze);\n+\\set command '\\\\copy parted_copytest from ' :'filename' (freeze);\n+:command\n rollback;\n \n select tableoid::regclass,count(*),sum(a) from parted_copytest\n@@ -115,7 +122,8 @@ create trigger part_ins_trig\n \tfor each row\n \texecute procedure part_ins_func();\n \n-copy parted_copytest from :'filename';\n+\\set command '\\\\copy parted_copytest from ' :'filename';\n+:command\n \n select tableoid::regclass,count(*),sum(a) from parted_copytest\n group by tableoid order by tableoid::regclass::name;\n@@ -124,7 +132,8 @@ truncate table parted_copytest;\n create index on parted_copytest (b);\n drop trigger part_ins_trig on parted_copytest_a2;\n \n-copy parted_copytest from stdin;\n+\\set command '\\\\copy parted_copytest from ' stdin;\n+:command\n 1\t1\tstr1\n 2\t2\tstr2\n \\.\n@@ -191,8 +200,8 @@ bill\t20\t(11,10)\t1000\tsharon\n -- Generate COPY FROM report with FILE, with some excluded tuples.\n truncate tab_progress_reporting;\n \\set filename :abs_srcdir '/data/emp.data'\n-copy tab_progress_reporting from :'filename'\n-\twhere (salary < 2000);\n+\\set command '\\\\copy tab_progress_reporting from ' :'filename' 'where (salary < 2000)';\n+:command\n \n drop trigger check_after_tab_progress_reporting on tab_progress_reporting;\n drop function notice_after_tab_progress_reporting();\n@@ -311,7 +320,8 @@ CREATE TABLE parted_si_p_odd PARTITION OF parted_si FOR VALUES IN (1);\n -- https://postgr.es/m/18130-7a86a7356a75209d%40postgresql.org\n -- https://postgr.es/m/257696.1695670946%40sss.pgh.pa.us\n \\set filename :abs_srcdir '/data/desc.data'\n-COPY parted_si(id, data) FROM :'filename';\n+\\set command '\\\\COPY parted_si(id, data) FROM ' :'filename';\n+:command\n \n -- An earlier bug (see commit b1ecb9b3fcf) could end up using a buffer from\n -- the wrong partition. This test is *not* guaranteed to trigger that bug, but\ndiff --git a/src/test/regress/sql/copy2.sql b/src/test/regress/sql/copy2.sql\nindex 6b75b6c7ea..f3655b413c 100644\n--- a/src/test/regress/sql/copy2.sql\n+++ b/src/test/regress/sql/copy2.sql\n@@ -407,8 +407,8 @@ copy check_con_tbl from stdin;\n select * from check_con_tbl;\n \n -- test with RLS enabled.\n-CREATE ROLE regress_rls_copy_user;\n-CREATE ROLE regress_rls_copy_user_colperms;\n+CREATE ROLE regress_rls_copy_user PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_rls_copy_user_colperms PASSWORD NEON_PASSWORD_PLACEHOLDER;\n CREATE TABLE rls_t1 (a int, b int, c int);\n \n COPY rls_t1 (a, b, c) from stdin;\ndiff --git a/src/test/regress/sql/create_function_sql.sql b/src/test/regress/sql/create_function_sql.sql\nindex 89e9af3a49..2b86fe2285 100644\n--- a/src/test/regress/sql/create_function_sql.sql\n+++ b/src/test/regress/sql/create_function_sql.sql\n@@ -6,7 +6,7 @@\n \n -- All objects made in this test are in temp_func_test schema\n \n-CREATE USER regress_unpriv_user;\n+CREATE USER regress_unpriv_user PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n CREATE SCHEMA temp_func_test;\n GRANT ALL ON SCHEMA temp_func_test TO public;\ndiff --git a/src/test/regress/sql/create_index.sql b/src/test/regress/sql/create_index.sql\nindex e296891cab..70cea565e4 100644\n--- a/src/test/regress/sql/create_index.sql\n+++ b/src/test/regress/sql/create_index.sql\n@@ -71,7 +71,8 @@ CREATE TABLE fast_emp4000 (\n );\n \n \\set filename :abs_srcdir '/data/rect.data'\n-COPY slow_emp4000 FROM :'filename';\n+\\set command '\\\\copy slow_emp4000 FROM ' :'filename';\n+:command\n \n INSERT INTO fast_emp4000 SELECT * FROM slow_emp4000;\n \n@@ -269,7 +270,8 @@ CREATE TABLE array_index_op_test (\n );\n \n \\set filename :abs_srcdir '/data/array.data'\n-COPY array_index_op_test FROM :'filename';\n+\\set command '\\\\copy array_index_op_test FROM ' :'filename';\n+:command\n ANALYZE array_index_op_test;\n \n SELECT * FROM array_index_op_test WHERE i = '{NULL}' ORDER BY seqno;\n@@ -1298,7 +1300,7 @@ END;\n REINDEX SCHEMA CONCURRENTLY schema_to_reindex;\n \n -- Failure for unauthorized user\n-CREATE ROLE regress_reindexuser NOLOGIN;\n+CREATE ROLE regress_reindexuser NOLOGIN PASSWORD NEON_PASSWORD_PLACEHOLDER;\n SET SESSION ROLE regress_reindexuser;\n REINDEX SCHEMA schema_to_reindex;\n -- Permission failures with toast tables and indexes (pg_authid here)\ndiff --git a/src/test/regress/sql/create_procedure.sql b/src/test/regress/sql/create_procedure.sql\nindex 069a3727ce..faeeb3f744 100644\n--- a/src/test/regress/sql/create_procedure.sql\n+++ b/src/test/regress/sql/create_procedure.sql\n@@ -255,7 +255,7 @@ DROP PROCEDURE nonexistent();\n \n -- privileges\n \n-CREATE USER regress_cp_user1;\n+CREATE USER regress_cp_user1 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n GRANT INSERT ON cp_test TO regress_cp_user1;\n REVOKE EXECUTE ON PROCEDURE ptest1(text) FROM PUBLIC;\n SET ROLE regress_cp_user1;\ndiff --git a/src/test/regress/sql/create_role.sql b/src/test/regress/sql/create_role.sql\nindex 4491a28a8a..3045434865 100644\n--- a/src/test/regress/sql/create_role.sql\n+++ b/src/test/regress/sql/create_role.sql\n@@ -1,20 +1,20 @@\n -- ok, superuser can create users with any set of privileges\n-CREATE ROLE regress_role_super SUPERUSER;\n-CREATE ROLE regress_role_admin CREATEDB CREATEROLE REPLICATION BYPASSRLS;\n+CREATE ROLE regress_role_super SUPERUSER PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_role_admin CREATEDB CREATEROLE REPLICATION BYPASSRLS PASSWORD NEON_PASSWORD_PLACEHOLDER;\n GRANT CREATE ON DATABASE regression TO regress_role_admin WITH GRANT OPTION;\n-CREATE ROLE regress_role_limited_admin CREATEROLE;\n-CREATE ROLE regress_role_normal;\n+CREATE ROLE regress_role_limited_admin CREATEROLE PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_role_normal PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n -- fail, CREATEROLE user can't give away role attributes without having them\n SET SESSION AUTHORIZATION regress_role_limited_admin;\n-CREATE ROLE regress_nosuch_superuser SUPERUSER;\n-CREATE ROLE regress_nosuch_replication_bypassrls REPLICATION BYPASSRLS;\n-CREATE ROLE regress_nosuch_replication REPLICATION;\n-CREATE ROLE regress_nosuch_bypassrls BYPASSRLS;\n-CREATE ROLE regress_nosuch_createdb CREATEDB;\n+CREATE ROLE regress_nosuch_superuser SUPERUSER PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_nosuch_replication_bypassrls REPLICATION BYPASSRLS PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_nosuch_replication REPLICATION PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_nosuch_bypassrls BYPASSRLS PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_nosuch_createdb CREATEDB PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n -- ok, can create a role without any special attributes\n-CREATE ROLE regress_role_limited;\n+CREATE ROLE regress_role_limited PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n -- fail, can't give it in any of the restricted attributes\n ALTER ROLE regress_role_limited SUPERUSER;\n@@ -25,10 +25,10 @@ DROP ROLE regress_role_limited;\n \n -- ok, can give away these role attributes if you have them\n SET SESSION AUTHORIZATION regress_role_admin;\n-CREATE ROLE regress_replication_bypassrls REPLICATION BYPASSRLS;\n-CREATE ROLE regress_replication REPLICATION;\n-CREATE ROLE regress_bypassrls BYPASSRLS;\n-CREATE ROLE regress_createdb CREATEDB;\n+CREATE ROLE regress_replication_bypassrls REPLICATION BYPASSRLS PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_replication REPLICATION PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_bypassrls BYPASSRLS PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_createdb CREATEDB PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n -- ok, can toggle these role attributes off and on if you have them\n ALTER ROLE regress_replication NOREPLICATION;\n@@ -43,52 +43,52 @@ ALTER ROLE regress_createdb SUPERUSER;\n ALTER ROLE regress_createdb NOSUPERUSER;\n \n -- ok, having CREATEROLE is enough to create users with these privileges\n-CREATE ROLE regress_createrole CREATEROLE NOINHERIT;\n+CREATE ROLE regress_createrole CREATEROLE NOINHERIT PASSWORD NEON_PASSWORD_PLACEHOLDER;\n GRANT CREATE ON DATABASE regression TO regress_createrole WITH GRANT OPTION;\n-CREATE ROLE regress_login LOGIN;\n-CREATE ROLE regress_inherit INHERIT;\n-CREATE ROLE regress_connection_limit CONNECTION LIMIT 5;\n-CREATE ROLE regress_encrypted_password ENCRYPTED PASSWORD 'foo';\n-CREATE ROLE regress_password_null PASSWORD NULL;\n+CREATE ROLE regress_login LOGIN PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_inherit INHERIT PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_connection_limit CONNECTION LIMIT 5 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_encrypted_password ENCRYPTED PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_password_null PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n -- ok, backwards compatible noise words should be ignored\n-CREATE ROLE regress_noiseword SYSID 12345;\n+CREATE ROLE regress_noiseword SYSID 12345 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n -- fail, cannot grant membership in superuser role\n-CREATE ROLE regress_nosuch_super IN ROLE regress_role_super;\n+CREATE ROLE regress_nosuch_super IN ROLE regress_role_super PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n -- fail, database owner cannot have members\n-CREATE ROLE regress_nosuch_dbowner IN ROLE pg_database_owner;\n+CREATE ROLE regress_nosuch_dbowner IN ROLE pg_database_owner PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n -- ok, can grant other users into a role\n CREATE ROLE regress_inroles ROLE\n \tregress_role_super, regress_createdb, regress_createrole, regress_login,\n-\tregress_inherit, regress_connection_limit, regress_encrypted_password, regress_password_null;\n+\tregress_inherit, regress_connection_limit, regress_encrypted_password, regress_password_null PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n -- fail, cannot grant a role into itself\n-CREATE ROLE regress_nosuch_recursive ROLE regress_nosuch_recursive;\n+CREATE ROLE regress_nosuch_recursive ROLE regress_nosuch_recursive PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n -- ok, can grant other users into a role with admin option\n CREATE ROLE regress_adminroles ADMIN\n \tregress_role_super, regress_createdb, regress_createrole, regress_login,\n-\tregress_inherit, regress_connection_limit, regress_encrypted_password, regress_password_null;\n+\tregress_inherit, regress_connection_limit, regress_encrypted_password, regress_password_null PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n -- fail, cannot grant a role into itself with admin option\n-CREATE ROLE regress_nosuch_admin_recursive ADMIN regress_nosuch_admin_recursive;\n+CREATE ROLE regress_nosuch_admin_recursive ADMIN regress_nosuch_admin_recursive PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n -- fail, regress_createrole does not have CREATEDB privilege\n SET SESSION AUTHORIZATION regress_createrole;\n CREATE DATABASE regress_nosuch_db;\n \n -- ok, regress_createrole can create new roles\n-CREATE ROLE regress_plainrole;\n+CREATE ROLE regress_plainrole PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n -- ok, roles with CREATEROLE can create new roles with it\n-CREATE ROLE regress_rolecreator CREATEROLE;\n+CREATE ROLE regress_rolecreator CREATEROLE PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n -- ok, roles with CREATEROLE can create new roles with different role\n -- attributes, including CREATEROLE\n-CREATE ROLE regress_hasprivs CREATEROLE LOGIN INHERIT CONNECTION LIMIT 5;\n+CREATE ROLE regress_hasprivs CREATEROLE LOGIN INHERIT CONNECTION LIMIT 5 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n -- ok, we should be able to modify a role we created\n COMMENT ON ROLE regress_hasprivs IS 'some comment';\n@@ -123,7 +123,7 @@ REASSIGN OWNED BY regress_tenant TO regress_createrole;\n \n -- ok, create a role with a value for createrole_self_grant\n SET createrole_self_grant = 'set, inherit';\n-CREATE ROLE regress_tenant2;\n+CREATE ROLE regress_tenant2 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n GRANT CREATE ON DATABASE regression TO regress_tenant2;\n \n -- ok, regress_tenant2 can create objects within the database\n@@ -150,16 +150,16 @@ ALTER TABLE tenant2_table OWNER TO regress_tenant2;\n DROP TABLE tenant2_table;\n \n -- fail, CREATEROLE is not enough to create roles in privileged roles\n-CREATE ROLE regress_read_all_data IN ROLE pg_read_all_data;\n-CREATE ROLE regress_write_all_data IN ROLE pg_write_all_data;\n-CREATE ROLE regress_monitor IN ROLE pg_monitor;\n-CREATE ROLE regress_read_all_settings IN ROLE pg_read_all_settings;\n-CREATE ROLE regress_read_all_stats IN ROLE pg_read_all_stats;\n-CREATE ROLE regress_stat_scan_tables IN ROLE pg_stat_scan_tables;\n-CREATE ROLE regress_read_server_files IN ROLE pg_read_server_files;\n-CREATE ROLE regress_write_server_files IN ROLE pg_write_server_files;\n-CREATE ROLE regress_execute_server_program IN ROLE pg_execute_server_program;\n-CREATE ROLE regress_signal_backend IN ROLE pg_signal_backend;\n+CREATE ROLE regress_read_all_data PASSWORD NEON_PASSWORD_PLACEHOLDER IN ROLE pg_read_all_data;\n+CREATE ROLE regress_write_all_data PASSWORD NEON_PASSWORD_PLACEHOLDER IN ROLE pg_write_all_data;\n+CREATE ROLE regress_monitor PASSWORD NEON_PASSWORD_PLACEHOLDER IN ROLE pg_monitor;\n+CREATE ROLE regress_read_all_settings PASSWORD NEON_PASSWORD_PLACEHOLDER IN ROLE pg_read_all_settings;\n+CREATE ROLE regress_read_all_stats PASSWORD NEON_PASSWORD_PLACEHOLDER IN ROLE pg_read_all_stats;\n+CREATE ROLE regress_stat_scan_tables PASSWORD NEON_PASSWORD_PLACEHOLDER IN ROLE pg_stat_scan_tables;\n+CREATE ROLE regress_read_server_files PASSWORD NEON_PASSWORD_PLACEHOLDER IN ROLE pg_read_server_files;\n+CREATE ROLE regress_write_server_files PASSWORD NEON_PASSWORD_PLACEHOLDER IN ROLE pg_write_server_files;\n+CREATE ROLE regress_execute_server_program PASSWORD NEON_PASSWORD_PLACEHOLDER IN ROLE pg_execute_server_program;\n+CREATE ROLE regress_signal_backend PASSWORD NEON_PASSWORD_PLACEHOLDER IN ROLE pg_signal_backend;\n \n -- fail, role still owns database objects\n DROP ROLE regress_tenant;\ndiff --git a/src/test/regress/sql/create_schema.sql b/src/test/regress/sql/create_schema.sql\nindex 1b7064247a..be5b662ce1 100644\n--- a/src/test/regress/sql/create_schema.sql\n+++ b/src/test/regress/sql/create_schema.sql\n@@ -4,7 +4,7 @@\n \n -- Schema creation with elements.\n \n-CREATE ROLE regress_create_schema_role SUPERUSER;\n+CREATE ROLE regress_create_schema_role SUPERUSER PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n -- Cases where schema creation fails as objects are qualified with a schema\n -- that does not match with what's expected.\ndiff --git a/src/test/regress/sql/create_view.sql b/src/test/regress/sql/create_view.sql\nindex ae6841308b..47bc792e30 100644\n--- a/src/test/regress/sql/create_view.sql\n+++ b/src/test/regress/sql/create_view.sql\n@@ -23,7 +23,8 @@ CREATE TABLE real_city (\n );\n \n \\set filename :abs_srcdir '/data/real_city.data'\n-COPY real_city FROM :'filename';\n+\\set command '\\\\copy real_city FROM ' :'filename';\n+:command\n ANALYZE real_city;\n \n SELECT *\ndiff --git a/src/test/regress/sql/database.sql b/src/test/regress/sql/database.sql\nindex 46ad263478..eb05584ed5 100644\n--- a/src/test/regress/sql/database.sql\n+++ b/src/test/regress/sql/database.sql\n@@ -1,8 +1,6 @@\n CREATE DATABASE regression_tbd\n \tENCODING utf8 LC_COLLATE \"C\" LC_CTYPE \"C\" TEMPLATE template0;\n ALTER DATABASE regression_tbd RENAME TO regression_utf8;\n-ALTER DATABASE regression_utf8 SET TABLESPACE regress_tblspace;\n-ALTER DATABASE regression_utf8 RESET TABLESPACE;\n ALTER DATABASE regression_utf8 CONNECTION_LIMIT 123;\n \n -- Test PgDatabaseToastTable.  Doing this with GRANT would be slow.\ndiff --git a/src/test/regress/sql/dependency.sql b/src/test/regress/sql/dependency.sql\nindex 8d74ed7122..293194615e 100644\n--- a/src/test/regress/sql/dependency.sql\n+++ b/src/test/regress/sql/dependency.sql\n@@ -2,10 +2,10 @@\n -- DEPENDENCIES\n --\n \n-CREATE USER regress_dep_user;\n-CREATE USER regress_dep_user2;\n-CREATE USER regress_dep_user3;\n-CREATE GROUP regress_dep_group;\n+CREATE USER regress_dep_user PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_dep_user2 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_dep_user3 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE GROUP regress_dep_group PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n CREATE TABLE deptest (f1 serial primary key, f2 text);\n \n@@ -45,9 +45,9 @@ DROP TABLE deptest;\n DROP USER regress_dep_user3;\n \n -- Test DROP OWNED\n-CREATE USER regress_dep_user0;\n-CREATE USER regress_dep_user1;\n-CREATE USER regress_dep_user2;\n+CREATE USER regress_dep_user0 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_dep_user1 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_dep_user2 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n SET SESSION AUTHORIZATION regress_dep_user0;\n -- permission denied\n DROP OWNED BY regress_dep_user1;\ndiff --git a/src/test/regress/sql/drop_if_exists.sql b/src/test/regress/sql/drop_if_exists.sql\nindex ac6168b91f..4270062ec7 100644\n--- a/src/test/regress/sql/drop_if_exists.sql\n+++ b/src/test/regress/sql/drop_if_exists.sql\n@@ -86,9 +86,9 @@ DROP DOMAIN test_domain_exists;\n --- role/user/group\n ---\n \n-CREATE USER regress_test_u1;\n-CREATE ROLE regress_test_r1;\n-CREATE GROUP regress_test_g1;\n+CREATE USER regress_test_u1 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_test_r1 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE GROUP regress_test_g1 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n DROP USER regress_test_u2;\n \ndiff --git a/src/test/regress/sql/equivclass.sql b/src/test/regress/sql/equivclass.sql\nindex 247b0a3105..bf018fd3a1 100644\n--- a/src/test/regress/sql/equivclass.sql\n+++ b/src/test/regress/sql/equivclass.sql\n@@ -230,7 +230,7 @@ set enable_mergejoin = off;\n alter table ec1 enable row level security;\n create policy p1 on ec1 using (f1 < '5'::int8alias1);\n \n-create user regress_user_ectest;\n+create user regress_user_ectest PASSWORD NEON_PASSWORD_PLACEHOLDER;\n grant select on ec0 to regress_user_ectest;\n grant select on ec1 to regress_user_ectest;\n \ndiff --git a/src/test/regress/sql/event_trigger.sql b/src/test/regress/sql/event_trigger.sql\nindex 013546b830..616a46da1d 100644\n--- a/src/test/regress/sql/event_trigger.sql\n+++ b/src/test/regress/sql/event_trigger.sql\n@@ -86,7 +86,7 @@ create event trigger regress_event_trigger2 on ddl_command_start\n comment on event trigger regress_event_trigger is 'test comment';\n \n -- drop as non-superuser should fail\n-create role regress_evt_user;\n+create role regress_evt_user PASSWORD NEON_PASSWORD_PLACEHOLDER;\n set role regress_evt_user;\n create event trigger regress_event_trigger_noperms on ddl_command_start\n    execute procedure test_event_trigger();\ndiff --git a/src/test/regress/sql/foreign_data.sql b/src/test/regress/sql/foreign_data.sql\nindex aa147b14a9..370e0dd570 100644\n--- a/src/test/regress/sql/foreign_data.sql\n+++ b/src/test/regress/sql/foreign_data.sql\n@@ -22,14 +22,14 @@ DROP ROLE IF EXISTS regress_foreign_data_user, regress_test_role, regress_test_r\n \n RESET client_min_messages;\n \n-CREATE ROLE regress_foreign_data_user LOGIN SUPERUSER;\n+CREATE ROLE regress_foreign_data_user LOGIN SUPERUSER PASSWORD NEON_PASSWORD_PLACEHOLDER;\n SET SESSION AUTHORIZATION 'regress_foreign_data_user';\n \n-CREATE ROLE regress_test_role;\n-CREATE ROLE regress_test_role2;\n-CREATE ROLE regress_test_role_super SUPERUSER;\n-CREATE ROLE regress_test_indirect;\n-CREATE ROLE regress_unprivileged_role;\n+CREATE ROLE regress_test_role PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_test_role2 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_test_role_super SUPERUSER PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_test_indirect PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_unprivileged_role PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n CREATE FOREIGN DATA WRAPPER dummy;\n COMMENT ON FOREIGN DATA WRAPPER dummy IS 'useless';\ndiff --git a/src/test/regress/sql/foreign_key.sql b/src/test/regress/sql/foreign_key.sql\nindex 8c4e4c7c83..e946cd2119 100644\n--- a/src/test/regress/sql/foreign_key.sql\n+++ b/src/test/regress/sql/foreign_key.sql\n@@ -1435,7 +1435,7 @@ ALTER TABLE fk_partitioned_fk_6 ATTACH PARTITION fk_partitioned_pk_6 FOR VALUES\n DROP TABLE fk_partitioned_pk_6, fk_partitioned_fk_6;\n \n -- test the case when the referenced table is owned by a different user\n-create role regress_other_partitioned_fk_owner;\n+create role regress_other_partitioned_fk_owner PASSWORD NEON_PASSWORD_PLACEHOLDER;\n grant references on fk_notpartitioned_pk to regress_other_partitioned_fk_owner;\n set role regress_other_partitioned_fk_owner;\n create table other_partitioned_fk(a int, b int) partition by list (a);\ndiff --git a/src/test/regress/sql/generated.sql b/src/test/regress/sql/generated.sql\nindex cb55d77821..9c15ae954c 100644\n--- a/src/test/regress/sql/generated.sql\n+++ b/src/test/regress/sql/generated.sql\n@@ -263,7 +263,7 @@ ALTER TABLE gtest10a DROP COLUMN b;\n INSERT INTO gtest10a (a) VALUES (1);\n \n -- privileges\n-CREATE USER regress_user11;\n+CREATE USER regress_user11 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n CREATE TABLE gtest11s (a int PRIMARY KEY, b int, c int GENERATED ALWAYS AS (b * 2) STORED);\n INSERT INTO gtest11s VALUES (1, 10), (2, 20);\ndiff --git a/src/test/regress/sql/guc.sql b/src/test/regress/sql/guc.sql\nindex dc79761955..a9ead75349 100644\n--- a/src/test/regress/sql/guc.sql\n+++ b/src/test/regress/sql/guc.sql\n@@ -188,7 +188,7 @@ PREPARE foo AS SELECT 1;\n LISTEN foo_event;\n SET vacuum_cost_delay = 13;\n CREATE TEMP TABLE tmp_foo (data text) ON COMMIT DELETE ROWS;\n-CREATE ROLE regress_guc_user;\n+CREATE ROLE regress_guc_user PASSWORD NEON_PASSWORD_PLACEHOLDER;\n SET SESSION AUTHORIZATION regress_guc_user;\n -- look changes\n SELECT pg_listening_channels();\ndiff --git a/src/test/regress/sql/hash_index.sql b/src/test/regress/sql/hash_index.sql\nindex 219da82981..bf99d2ec4c 100644\n--- a/src/test/regress/sql/hash_index.sql\n+++ b/src/test/regress/sql/hash_index.sql\n@@ -26,10 +26,14 @@ CREATE TABLE hash_f8_heap (\n );\n \n \\set filename :abs_srcdir '/data/hash.data'\n-COPY hash_i4_heap FROM :'filename';\n-COPY hash_name_heap FROM :'filename';\n-COPY hash_txt_heap FROM :'filename';\n-COPY hash_f8_heap FROM :'filename';\n+\\set command '\\\\copy hash_i4_heap FROM ' :'filename';\n+:command\n+\\set command '\\\\copy hash_name_heap FROM ' :'filename';\n+:command\n+\\set command '\\\\copy hash_txt_heap FROM ' :'filename';\n+:command\n+\\set command '\\\\copy hash_f8_heap FROM ' :'filename';\n+:command\n \n -- the data in this file has a lot of duplicates in the index key\n -- fields, leading to long bucket chains and lots of table expansion.\ndiff --git a/src/test/regress/sql/identity.sql b/src/test/regress/sql/identity.sql\nindex cb0e05a2f1..b11492bd31 100644\n--- a/src/test/regress/sql/identity.sql\n+++ b/src/test/regress/sql/identity.sql\n@@ -287,7 +287,7 @@ ALTER TABLE itest7 ALTER COLUMN a RESTART;\n ALTER TABLE itest7 ALTER COLUMN a DROP IDENTITY;\n \n -- privileges\n-CREATE USER regress_identity_user1;\n+CREATE USER regress_identity_user1 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n CREATE TABLE itest8 (a int GENERATED ALWAYS AS IDENTITY, b text);\n GRANT SELECT, INSERT ON itest8 TO regress_identity_user1;\n SET ROLE regress_identity_user1;\ndiff --git a/src/test/regress/sql/inherit.sql b/src/test/regress/sql/inherit.sql\nindex 51251b0e51..3492f1cfef 100644\n--- a/src/test/regress/sql/inherit.sql\n+++ b/src/test/regress/sql/inherit.sql\n@@ -770,8 +770,8 @@ drop table cnullparent cascade;\n --\n -- Mixed ownership inheritance tree\n --\n-create role regress_alice;\n-create role regress_bob;\n+create role regress_alice password NEON_PASSWORD_PLACEHOLDER;\n+create role regress_bob password NEON_PASSWORD_PLACEHOLDER;\n grant all on schema public to regress_alice, regress_bob;\n grant regress_alice to regress_bob;\n set session authorization regress_alice;\n@@ -1031,7 +1031,7 @@ create index on permtest_parent (left(c, 3));\n insert into permtest_parent\n   select 1, 'a', left(fipshash(i::text), 5) from generate_series(0, 100) i;\n analyze permtest_parent;\n-create role regress_no_child_access;\n+create role regress_no_child_access PASSWORD NEON_PASSWORD_PLACEHOLDER;\n revoke all on permtest_grandchild from regress_no_child_access;\n grant select on permtest_parent to regress_no_child_access;\n set session authorization regress_no_child_access;\ndiff --git a/src/test/regress/sql/insert.sql b/src/test/regress/sql/insert.sql\nindex 2b086eeb6d..913d8a0aed 100644\n--- a/src/test/regress/sql/insert.sql\n+++ b/src/test/regress/sql/insert.sql\n@@ -513,7 +513,7 @@ drop table mlparted5;\n create table key_desc (a int, b int) partition by list ((a+0));\n create table key_desc_1 partition of key_desc for values in (1) partition by range (b);\n \n-create user regress_insert_other_user;\n+create user regress_insert_other_user PASSWORD NEON_PASSWORD_PLACEHOLDER;\n grant select (a) on key_desc_1 to regress_insert_other_user;\n grant insert on key_desc to regress_insert_other_user;\n \n@@ -597,7 +597,7 @@ insert into brtrigpartcon1 values (1, 'hi there');\n -- check that the message shows the appropriate column description in a\n -- situation where the partitioned table is not the primary ModifyTable node\n create table inserttest3 (f1 text default 'foo', f2 text default 'bar', f3 int);\n-create role regress_coldesc_role;\n+create role regress_coldesc_role PASSWORD NEON_PASSWORD_PLACEHOLDER;\n grant insert on inserttest3 to regress_coldesc_role;\n grant insert on brtrigpartcon to regress_coldesc_role;\n revoke select on brtrigpartcon from regress_coldesc_role;\ndiff --git a/src/test/regress/sql/jsonb.sql b/src/test/regress/sql/jsonb.sql\nindex 97bc2242a1..88c8b1dcdb 100644\n--- a/src/test/regress/sql/jsonb.sql\n+++ b/src/test/regress/sql/jsonb.sql\n@@ -6,7 +6,8 @@ CREATE TABLE testjsonb (\n );\n \n \\set filename :abs_srcdir '/data/jsonb.data'\n-COPY testjsonb FROM :'filename';\n+\\set command '\\\\copy testjsonb FROM ' :'filename';\n+:command\n \n -- Strings.\n SELECT '\"\"'::jsonb;\t\t\t\t-- OK.\ndiff --git a/src/test/regress/sql/largeobject.sql b/src/test/regress/sql/largeobject.sql\nindex a4aee02e3a..8839c9496a 100644\n--- a/src/test/regress/sql/largeobject.sql\n+++ b/src/test/regress/sql/largeobject.sql\n@@ -10,7 +10,7 @@\n SET bytea_output TO escape;\n \n -- Test ALTER LARGE OBJECT OWNER\n-CREATE ROLE regress_lo_user;\n+CREATE ROLE regress_lo_user PASSWORD NEON_PASSWORD_PLACEHOLDER;\n SELECT lo_create(42);\n ALTER LARGE OBJECT 42 OWNER TO regress_lo_user;\n \n@@ -189,7 +189,8 @@ SELECT lo_unlink(loid) from lotest_stash_values;\n TRUNCATE lotest_stash_values;\n \n \\set filename :abs_srcdir '/data/tenk.data'\n-INSERT INTO lotest_stash_values (loid) SELECT lo_import(:'filename');\n+\\lo_import :filename\n+INSERT INTO lotest_stash_values (loid) VALUES (:LASTOID);\n \n BEGIN;\n UPDATE lotest_stash_values SET fd=lo_open(loid, CAST(x'20000' | x'40000' AS integer));\n@@ -219,8 +220,8 @@ SELECT lo_close(fd) FROM lotest_stash_values;\n END;\n \n \\set filename :abs_builddir '/results/lotest.txt'\n-SELECT lo_export(loid, :'filename') FROM lotest_stash_values;\n-\n+SELECT loid FROM lotest_stash_values \\gset\n+\\lo_export :loid, :filename\n \\lo_import :filename\n \n \\set newloid :LASTOID\ndiff --git a/src/test/regress/sql/lock.sql b/src/test/regress/sql/lock.sql\nindex b88488c6d0..78b31e6dd3 100644\n--- a/src/test/regress/sql/lock.sql\n+++ b/src/test/regress/sql/lock.sql\n@@ -19,7 +19,7 @@ CREATE VIEW lock_view3 AS SELECT * from lock_view2;\n CREATE VIEW lock_view4 AS SELECT (select a from lock_tbl1a limit 1) from lock_tbl1;\n CREATE VIEW lock_view5 AS SELECT * from lock_tbl1 where a in (select * from lock_tbl1a);\n CREATE VIEW lock_view6 AS SELECT * from (select * from lock_tbl1) sub;\n-CREATE ROLE regress_rol_lock1;\n+CREATE ROLE regress_rol_lock1 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n ALTER ROLE regress_rol_lock1 SET search_path = lock_schema1;\n GRANT USAGE ON SCHEMA lock_schema1 TO regress_rol_lock1;\n \ndiff --git a/src/test/regress/sql/matview.sql b/src/test/regress/sql/matview.sql\nindex b74ee305e0..33b8b690fc 100644\n--- a/src/test/regress/sql/matview.sql\n+++ b/src/test/regress/sql/matview.sql\n@@ -209,7 +209,7 @@ SELECT * FROM mvtest_mv_v;\n DROP TABLE mvtest_v CASCADE;\n \n -- make sure running as superuser works when MV owned by another role (bug #11208)\n-CREATE ROLE regress_user_mvtest;\n+CREATE ROLE regress_user_mvtest PASSWORD NEON_PASSWORD_PLACEHOLDER;\n SET ROLE regress_user_mvtest;\n -- this test case also checks for ambiguity in the queries issued by\n -- refresh_by_match_merge(), by choosing column names that intentionally\n@@ -266,7 +266,7 @@ ROLLBACK;\n \n -- INSERT privileges if relation owner is not allowed to insert.\n CREATE SCHEMA matview_schema;\n-CREATE USER regress_matview_user;\n+CREATE USER regress_matview_user PASSWORD NEON_PASSWORD_PLACEHOLDER;\n ALTER DEFAULT PRIVILEGES FOR ROLE regress_matview_user\n   REVOKE INSERT ON TABLES FROM regress_matview_user;\n GRANT ALL ON SCHEMA matview_schema TO public;\ndiff --git a/src/test/regress/sql/merge.sql b/src/test/regress/sql/merge.sql\nindex 5ddcca84f8..99f4cef9ef 100644\n--- a/src/test/regress/sql/merge.sql\n+++ b/src/test/regress/sql/merge.sql\n@@ -2,9 +2,9 @@\n -- MERGE\n --\n \n-CREATE USER regress_merge_privs;\n-CREATE USER regress_merge_no_privs;\n-CREATE USER regress_merge_none;\n+CREATE USER regress_merge_privs PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_merge_no_privs PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_merge_none PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n DROP TABLE IF EXISTS target;\n DROP TABLE IF EXISTS source;\ndiff --git a/src/test/regress/sql/misc.sql b/src/test/regress/sql/misc.sql\nindex 165a2e175f..08d7096e2c 100644\n--- a/src/test/regress/sql/misc.sql\n+++ b/src/test/regress/sql/misc.sql\n@@ -74,22 +74,26 @@ DROP TABLE tmp;\n -- copy\n --\n \\set filename :abs_builddir '/results/onek.data'\n-COPY onek TO :'filename';\n+\\set command '\\\\copy onek TO ' :'filename';\n+:command\n \n CREATE TEMP TABLE onek_copy (LIKE onek);\n \n-COPY onek_copy FROM :'filename';\n+\\set command '\\\\copy onek_copy FROM ' :'filename';\n+:command\n \n SELECT * FROM onek EXCEPT ALL SELECT * FROM onek_copy;\n \n SELECT * FROM onek_copy EXCEPT ALL SELECT * FROM onek;\n \n \\set filename :abs_builddir '/results/stud_emp.data'\n-COPY BINARY stud_emp TO :'filename';\n+\\set command '\\\\COPY BINARY stud_emp TO ' :'filename';\n+:command\n \n CREATE TEMP TABLE stud_emp_copy (LIKE stud_emp);\n \n-COPY BINARY stud_emp_copy FROM :'filename';\n+\\set command '\\\\COPY BINARY stud_emp_copy FROM ' :'filename';\n+:command\n \n SELECT * FROM stud_emp_copy;\n \ndiff --git a/src/test/regress/sql/misc_functions.sql b/src/test/regress/sql/misc_functions.sql\nindex 76470fcb3f..09746de223 100644\n--- a/src/test/regress/sql/misc_functions.sql\n+++ b/src/test/regress/sql/misc_functions.sql\n@@ -82,7 +82,7 @@ SELECT pg_log_backend_memory_contexts(pg_backend_pid());\n SELECT pg_log_backend_memory_contexts(pid) FROM pg_stat_activity\n   WHERE backend_type = 'checkpointer';\n \n-CREATE ROLE regress_log_memory;\n+CREATE ROLE regress_log_memory PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n SELECT has_function_privilege('regress_log_memory',\n   'pg_log_backend_memory_contexts(integer)', 'EXECUTE'); -- no\n@@ -169,7 +169,7 @@ select count(*) > 0 from\n --\n -- Test replication slot directory functions\n --\n-CREATE ROLE regress_slot_dir_funcs;\n+CREATE ROLE regress_slot_dir_funcs PASSWORD NEON_PASSWORD_PLACEHOLDER;\n -- Not available by default.\n SELECT has_function_privilege('regress_slot_dir_funcs',\n   'pg_ls_logicalsnapdir()', 'EXECUTE');\n@@ -252,7 +252,7 @@ FROM pg_walfile_name_offset('0/0'::pg_lsn + :segment_size - 1),\n      pg_split_walfile_name(file_name);\n \n -- pg_current_logfile\n-CREATE ROLE regress_current_logfile;\n+CREATE ROLE regress_current_logfile PASSWORD NEON_PASSWORD_PLACEHOLDER;\n -- not available by default\n SELECT has_function_privilege('regress_current_logfile',\n   'pg_current_logfile()', 'EXECUTE');\ndiff --git a/src/test/regress/sql/multirangetypes.sql b/src/test/regress/sql/multirangetypes.sql\nindex 41d5524285..373be031a2 100644\n--- a/src/test/regress/sql/multirangetypes.sql\n+++ b/src/test/regress/sql/multirangetypes.sql\n@@ -704,7 +704,7 @@ drop type textrange2;\n -- Multiranges don't have their own ownership or permissions.\n --\n create type textrange1 as range(subtype=text, multirange_type_name=multitextrange1, collation=\"C\");\n-create role regress_multirange_owner;\n+create role regress_multirange_owner password NEON_PASSWORD_PLACEHOLDER;\n \n alter type multitextrange1 owner to regress_multirange_owner;  -- fail\n alter type textrange1 owner to regress_multirange_owner;\ndiff --git a/src/test/regress/sql/object_address.sql b/src/test/regress/sql/object_address.sql\nindex 1a6c61f49d..1c31ac6a53 100644\n--- a/src/test/regress/sql/object_address.sql\n+++ b/src/test/regress/sql/object_address.sql\n@@ -7,7 +7,7 @@ SET client_min_messages TO 'warning';\n DROP ROLE IF EXISTS regress_addr_user;\n RESET client_min_messages;\n \n-CREATE USER regress_addr_user;\n+CREATE USER regress_addr_user PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n -- Test generic object addressing/identification functions\n CREATE SCHEMA addr_nsp;\ndiff --git a/src/test/regress/sql/password.sql b/src/test/regress/sql/password.sql\nindex bb82aa4aa2..dd8a05e24d 100644\n--- a/src/test/regress/sql/password.sql\n+++ b/src/test/regress/sql/password.sql\n@@ -10,13 +10,11 @@ SET password_encryption = 'scram-sha-256'; -- ok\n \n -- consistency of password entries\n SET password_encryption = 'md5';\n-CREATE ROLE regress_passwd1;\n-ALTER ROLE regress_passwd1 PASSWORD 'role_pwd1';\n-CREATE ROLE regress_passwd2;\n-ALTER ROLE regress_passwd2 PASSWORD 'role_pwd2';\n+CREATE ROLE regress_passwd1 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_passwd2 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n SET password_encryption = 'scram-sha-256';\n-CREATE ROLE regress_passwd3 PASSWORD 'role_pwd3';\n-CREATE ROLE regress_passwd4 PASSWORD NULL;\n+CREATE ROLE regress_passwd3 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_passwd4 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n -- check list of created entries\n --\n@@ -44,26 +42,19 @@ ALTER ROLE regress_passwd2_new RENAME TO regress_passwd2;\n SET password_encryption = 'md5';\n \n -- encrypt with MD5\n-ALTER ROLE regress_passwd2 PASSWORD 'foo';\n--- already encrypted, use as they are\n-ALTER ROLE regress_passwd1 PASSWORD 'md5cd3578025fe2c3d7ed1b9a9b26238b70';\n-ALTER ROLE regress_passwd3 PASSWORD 'SCRAM-SHA-256$4096:VLK4RMaQLCvNtQ==$6YtlR4t69SguDiwFvbVgVZtuz6gpJQQqUMZ7IQJK5yI=:ps75jrHeYU4lXCcXI4O8oIdJ3eO8o2jirjruw9phBTo=';\n+ALTER ROLE regress_passwd2 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n SET password_encryption = 'scram-sha-256';\n -- create SCRAM secret\n-ALTER ROLE  regress_passwd4 PASSWORD 'foo';\n+ALTER ROLE  regress_passwd4 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n -- already encrypted with MD5, use as it is\n-CREATE ROLE regress_passwd5 PASSWORD 'md5e73a4b11df52a6068f8b39f90be36023';\n+-- Neon does not support encrypted passwords, use unencrypted instead\n+CREATE ROLE regress_passwd5 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n--- This looks like a valid SCRAM-SHA-256 secret, but it is not\n--- so it should be hashed with SCRAM-SHA-256.\n-CREATE ROLE regress_passwd6 PASSWORD 'SCRAM-SHA-256$1234';\n--- These may look like valid MD5 secrets, but they are not, so they\n--- should be hashed with SCRAM-SHA-256.\n--- trailing garbage at the end\n-CREATE ROLE regress_passwd7 PASSWORD 'md5012345678901234567890123456789zz';\n--- invalid length\n-CREATE ROLE regress_passwd8 PASSWORD 'md501234567890123456789012345678901zz';\n+-- Neon does not support encrypted passwords, use unencrypted instead\n+CREATE ROLE regress_passwd6 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_passwd7 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_passwd8 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n -- Changing the SCRAM iteration count\n SET scram_iterations = 1024;\n@@ -80,13 +71,10 @@ ALTER ROLE regress_passwd_empty PASSWORD 'md585939a5ce845f1a1b620742e3c659e0a';\n ALTER ROLE regress_passwd_empty PASSWORD 'SCRAM-SHA-256$4096:hpFyHTUsSWcR7O9P$LgZFIt6Oqdo27ZFKbZ2nV+vtnYM995pDh9ca6WSi120=:qVV5NeluNfUPkwm7Vqat25RjSPLkGeoZBQs6wVv+um4=';\n SELECT rolpassword FROM pg_authid WHERE rolname='regress_passwd_empty';\n \n--- Test with invalid stored and server keys.\n---\n--- The first is valid, to act as a control. The others have too long\n--- stored/server keys. They will be re-hashed.\n-CREATE ROLE regress_passwd_sha_len0 PASSWORD 'SCRAM-SHA-256$4096:A6xHKoH/494E941doaPOYg==$Ky+A30sewHIH3VHQLRN9vYsuzlgNyGNKCh37dy96Rqw=:COPdlNiIkrsacU5QoxydEuOH6e/KfiipeETb/bPw8ZI=';\n-CREATE ROLE regress_passwd_sha_len1 PASSWORD 'SCRAM-SHA-256$4096:A6xHKoH/494E941doaPOYg==$Ky+A30sewHIH3VHQLRN9vYsuzlgNyGNKCh37dy96RqwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=:COPdlNiIkrsacU5QoxydEuOH6e/KfiipeETb/bPw8ZI=';\n-CREATE ROLE regress_passwd_sha_len2 PASSWORD 'SCRAM-SHA-256$4096:A6xHKoH/494E941doaPOYg==$Ky+A30sewHIH3VHQLRN9vYsuzlgNyGNKCh37dy96Rqw=:COPdlNiIkrsacU5QoxydEuOH6e/KfiipeETb/bPw8ZIAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=';\n+-- Neon does not support encrypted passwords, use unencrypted instead\n+CREATE ROLE regress_passwd_sha_len0 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_passwd_sha_len1 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_passwd_sha_len2 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n -- Check that the invalid secrets were re-hashed. A re-hashed secret\n -- should not contain the original salt.\ndiff --git a/src/test/regress/sql/privileges.sql b/src/test/regress/sql/privileges.sql\nindex b7e1cb6cdd..6e5a2217f1 100644\n--- a/src/test/regress/sql/privileges.sql\n+++ b/src/test/regress/sql/privileges.sql\n@@ -24,18 +24,18 @@ RESET client_min_messages;\n \n -- test proper begins here\n \n-CREATE USER regress_priv_user1;\n-CREATE USER regress_priv_user2;\n-CREATE USER regress_priv_user3;\n-CREATE USER regress_priv_user4;\n-CREATE USER regress_priv_user5;\n-CREATE USER regress_priv_user5;\t-- duplicate\n-CREATE USER regress_priv_user6;\n-CREATE USER regress_priv_user7;\n-CREATE USER regress_priv_user8;\n-CREATE USER regress_priv_user9;\n-CREATE USER regress_priv_user10;\n-CREATE ROLE regress_priv_role;\n+CREATE USER regress_priv_user1 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_priv_user2 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_priv_user3 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_priv_user4 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_priv_user5 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_priv_user5 PASSWORD NEON_PASSWORD_PLACEHOLDER;\t-- duplicate\n+CREATE USER regress_priv_user6 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_priv_user7 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_priv_user8 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_priv_user9 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_priv_user10 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_priv_role PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n -- circular ADMIN OPTION grants should be disallowed\n GRANT regress_priv_user1 TO regress_priv_user2 WITH ADMIN OPTION;\n@@ -84,11 +84,11 @@ DROP ROLE regress_priv_user5; -- should fail, dependency\n DROP ROLE regress_priv_user1, regress_priv_user5; -- ok, despite order\n \n -- recreate the roles we just dropped\n-CREATE USER regress_priv_user1;\n-CREATE USER regress_priv_user2;\n-CREATE USER regress_priv_user3;\n-CREATE USER regress_priv_user4;\n-CREATE USER regress_priv_user5;\n+CREATE USER regress_priv_user1 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_priv_user2 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_priv_user3 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_priv_user4 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_priv_user5 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n GRANT pg_read_all_data TO regress_priv_user6;\n GRANT pg_write_all_data TO regress_priv_user7;\n@@ -163,8 +163,8 @@ DROP USER regress_priv_user10;\n DROP USER regress_priv_user9;\n DROP USER regress_priv_user8;\n \n-CREATE GROUP regress_priv_group1;\n-CREATE GROUP regress_priv_group2 WITH ADMIN regress_priv_user1 USER regress_priv_user2;\n+CREATE GROUP regress_priv_group1 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE GROUP regress_priv_group2 WITH ADMIN regress_priv_user1 PASSWORD NEON_PASSWORD_PLACEHOLDER USER regress_priv_user2;\n \n ALTER GROUP regress_priv_group1 ADD USER regress_priv_user4;\n \n@@ -1160,7 +1160,7 @@ SELECT has_table_privilege('regress_priv_user1', 'atest4', 'SELECT WITH GRANT OP\n \n -- security-restricted operations\n \\c -\n-CREATE ROLE regress_sro_user;\n+CREATE ROLE regress_sro_user PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n -- Check that index expressions and predicates are run as the table's owner\n \n@@ -1656,8 +1656,8 @@ DROP SCHEMA testns CASCADE;\n -- Change owner of the schema & and rename of new schema owner\n \\c -\n \n-CREATE ROLE regress_schemauser1 superuser login;\n-CREATE ROLE regress_schemauser2 superuser login;\n+CREATE ROLE regress_schemauser1 superuser login PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_schemauser2 superuser login PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n SET SESSION ROLE regress_schemauser1;\n CREATE SCHEMA testns;\n@@ -1751,7 +1751,7 @@ DROP USER regress_priv_user8; -- does not exist\n \n \n -- permissions with LOCK TABLE\n-CREATE USER regress_locktable_user;\n+CREATE USER regress_locktable_user PASSWORD NEON_PASSWORD_PLACEHOLDER;\n CREATE TABLE lock_table (a int);\n \n -- LOCK TABLE and SELECT permission\n@@ -1854,7 +1854,7 @@ DROP USER regress_locktable_user;\n -- switch to superuser\n \\c -\n \n-CREATE ROLE regress_readallstats;\n+CREATE ROLE regress_readallstats PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n SELECT has_table_privilege('regress_readallstats','pg_backend_memory_contexts','SELECT'); -- no\n SELECT has_table_privilege('regress_readallstats','pg_shmem_allocations','SELECT'); -- no\n@@ -1874,10 +1874,10 @@ RESET ROLE;\n DROP ROLE regress_readallstats;\n \n -- test role grantor machinery\n-CREATE ROLE regress_group;\n-CREATE ROLE regress_group_direct_manager;\n-CREATE ROLE regress_group_indirect_manager;\n-CREATE ROLE regress_group_member;\n+CREATE ROLE regress_group PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_group_direct_manager PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_group_indirect_manager PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_group_member PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n GRANT regress_group TO regress_group_direct_manager WITH INHERIT FALSE, ADMIN TRUE;\n GRANT regress_group_direct_manager TO regress_group_indirect_manager;\n@@ -1899,9 +1899,9 @@ DROP ROLE regress_group_indirect_manager;\n DROP ROLE regress_group_member;\n \n -- test SET and INHERIT options with object ownership changes\n-CREATE ROLE regress_roleoption_protagonist;\n-CREATE ROLE regress_roleoption_donor;\n-CREATE ROLE regress_roleoption_recipient;\n+CREATE ROLE regress_roleoption_protagonist PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_roleoption_donor PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_roleoption_recipient PASSWORD NEON_PASSWORD_PLACEHOLDER;\n CREATE SCHEMA regress_roleoption;\n GRANT CREATE, USAGE ON SCHEMA regress_roleoption TO PUBLIC;\n GRANT regress_roleoption_donor TO regress_roleoption_protagonist WITH INHERIT TRUE, SET FALSE;\n@@ -1929,9 +1929,9 @@ DROP ROLE regress_roleoption_donor;\n DROP ROLE regress_roleoption_recipient;\n \n -- MAINTAIN\n-CREATE ROLE regress_no_maintain;\n-CREATE ROLE regress_maintain;\n-CREATE ROLE regress_maintain_all IN ROLE pg_maintain;\n+CREATE ROLE regress_no_maintain PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_maintain PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_maintain_all IN ROLE pg_maintain PASSWORD NEON_PASSWORD_PLACEHOLDER;\n CREATE TABLE maintain_test (a INT);\n CREATE INDEX ON maintain_test (a);\n GRANT MAINTAIN ON maintain_test TO regress_maintain;\ndiff --git a/src/test/regress/sql/psql.sql b/src/test/regress/sql/psql.sql\nindex 3b3c6f6e29..b09d6231f8 100644\n--- a/src/test/regress/sql/psql.sql\n+++ b/src/test/regress/sql/psql.sql\n@@ -500,7 +500,7 @@ select 1 where false;\n \\pset expanded off\n \n CREATE SCHEMA tableam_display;\n-CREATE ROLE regress_display_role;\n+CREATE ROLE regress_display_role PASSWORD NEON_PASSWORD_PLACEHOLDER;\n ALTER SCHEMA tableam_display OWNER TO regress_display_role;\n SET search_path TO tableam_display;\n CREATE ACCESS METHOD heap_psql TYPE TABLE HANDLER heap_tableam_handler;\n@@ -1182,7 +1182,7 @@ reset debug_parallel_query;\n \\unset FETCH_COUNT\n \n create schema testpart;\n-create role regress_partitioning_role;\n+create role regress_partitioning_role PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n alter schema testpart owner to regress_partitioning_role;\n \n@@ -1293,7 +1293,7 @@ reset work_mem;\n \n -- check \\df+\n -- we have to use functions with a predictable owner name, so make a role\n-create role regress_psql_user superuser;\n+create role regress_psql_user superuser PASSWORD NEON_PASSWORD_PLACEHOLDER;\n begin;\n set session authorization regress_psql_user;\n \n@@ -1439,11 +1439,14 @@ CREATE TEMPORARY TABLE reload_output(\n );\n \n SELECT 1 AS a \\g :g_out_file\n-COPY reload_output(line) FROM :'g_out_file';\n+\\set command '\\\\COPY reload_output(line) FROM ' :'g_out_file';\n+:command\n SELECT 2 AS b\\; SELECT 3 AS c\\; SELECT 4 AS d \\g :g_out_file\n-COPY reload_output(line) FROM :'g_out_file';\n+\\set command '\\\\COPY reload_output(line) FROM ' :'g_out_file';\n+:command\n COPY (SELECT 'foo') TO STDOUT \\; COPY (SELECT 'bar') TO STDOUT \\g :g_out_file\n-COPY reload_output(line) FROM :'g_out_file';\n+\\set command '\\\\COPY reload_output(line) FROM ' :'g_out_file';\n+:command\n \n SELECT line FROM reload_output ORDER BY lineno;\n TRUNCATE TABLE reload_output;\n@@ -1460,17 +1463,20 @@ SELECT 1 AS a\\; SELECT 2 AS b\\; SELECT 3 AS c;\n -- COPY TO file\n -- The data goes to :g_out_file and the status to :o_out_file\n \\set QUIET false\n-COPY (SELECT unique1 FROM onek ORDER BY unique1 LIMIT 10) TO :'g_out_file';\n+\\set command '\\\\COPY (SELECT unique1 FROM onek ORDER BY unique1 LIMIT 10) TO ' :'g_out_file';\n+:command\n -- DML command status\n UPDATE onek SET unique1 = unique1 WHERE false;\n \\set QUIET true\n \\o\n \n -- Check the contents of the files generated.\n-COPY reload_output(line) FROM :'g_out_file';\n+\\set command '\\\\COPY reload_output(line) FROM ' :'g_out_file';\n+:command\n SELECT line FROM reload_output ORDER BY lineno;\n TRUNCATE TABLE reload_output;\n-COPY reload_output(line) FROM :'o_out_file';\n+\\set command '\\\\COPY reload_output(line) FROM ' :'o_out_file';\n+:command\n SELECT line FROM reload_output ORDER BY lineno;\n TRUNCATE TABLE reload_output;\n \n@@ -1483,10 +1489,12 @@ COPY (SELECT 'foo2') TO STDOUT \\; COPY (SELECT 'bar2') TO STDOUT \\g :g_out_file\n \\o\n \n -- Check the contents of the files generated.\n-COPY reload_output(line) FROM :'g_out_file';\n+\\set command '\\\\COPY reload_output(line) FROM ' :'g_out_file';\n+:command\n SELECT line FROM reload_output ORDER BY lineno;\n TRUNCATE TABLE reload_output;\n-COPY reload_output(line) FROM :'o_out_file';\n+\\set command '\\\\COPY reload_output(line) FROM ' :'o_out_file';\n+:command\n SELECT line FROM reload_output ORDER BY lineno;\n \n DROP TABLE reload_output;\n@@ -1834,10 +1842,10 @@ DROP FUNCTION psql_error;\n \\dX \"no.such.database\".\"no.such.schema\".\"no.such.extended.statistics\"\n \n -- check \\drg and \\du\n-CREATE ROLE regress_du_role0;\n-CREATE ROLE regress_du_role1;\n-CREATE ROLE regress_du_role2;\n-CREATE ROLE regress_du_admin;\n+CREATE ROLE regress_du_role0 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_du_role1 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_du_role2 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_du_admin PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n GRANT regress_du_role0 TO regress_du_admin WITH ADMIN TRUE;\n GRANT regress_du_role1 TO regress_du_admin WITH ADMIN TRUE;\ndiff --git a/src/test/regress/sql/publication.sql b/src/test/regress/sql/publication.sql\nindex 479d4f3264..6d348a93e7 100644\n--- a/src/test/regress/sql/publication.sql\n+++ b/src/test/regress/sql/publication.sql\n@@ -1,9 +1,9 @@\n --\n -- PUBLICATION\n --\n-CREATE ROLE regress_publication_user LOGIN SUPERUSER;\n-CREATE ROLE regress_publication_user2;\n-CREATE ROLE regress_publication_user_dummy LOGIN NOSUPERUSER;\n+CREATE ROLE regress_publication_user LOGIN SUPERUSER PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_publication_user2 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_publication_user_dummy LOGIN NOSUPERUSER PASSWORD NEON_PASSWORD_PLACEHOLDER;\n SET SESSION AUTHORIZATION 'regress_publication_user';\n \n -- suppress warning that depends on wal_level\n@@ -810,7 +810,7 @@ DROP PUBLICATION testpub2;\n DROP PUBLICATION testpub3;\n \n SET ROLE regress_publication_user;\n-CREATE ROLE regress_publication_user3;\n+CREATE ROLE regress_publication_user3 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n GRANT regress_publication_user2 TO regress_publication_user3;\n SET client_min_messages = 'ERROR';\n CREATE PUBLICATION testpub4 FOR TABLES IN SCHEMA pub_test;\ndiff --git a/src/test/regress/sql/regproc.sql b/src/test/regress/sql/regproc.sql\nindex 232289ac39..d967ef0cd3 100644\n--- a/src/test/regress/sql/regproc.sql\n+++ b/src/test/regress/sql/regproc.sql\n@@ -4,7 +4,7 @@\n \n /* If objects exist, return oids */\n \n-CREATE ROLE regress_regrole_test;\n+CREATE ROLE regress_regrole_test PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n -- without schemaname\n \ndiff --git a/src/test/regress/sql/roleattributes.sql b/src/test/regress/sql/roleattributes.sql\nindex c961b2d730..0859b89c4f 100644\n--- a/src/test/regress/sql/roleattributes.sql\n+++ b/src/test/regress/sql/roleattributes.sql\n@@ -1,83 +1,83 @@\n -- default for superuser is false\n-CREATE ROLE regress_test_def_superuser;\n+CREATE ROLE regress_test_def_superuser PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_superuser';\n-CREATE ROLE regress_test_superuser WITH SUPERUSER;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_superuser';\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_superuser';\n+CREATE ROLE regress_test_superuser WITH SUPERUSER PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_superuser';\n ALTER ROLE regress_test_superuser WITH NOSUPERUSER;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_superuser';\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_superuser';\n ALTER ROLE regress_test_superuser WITH SUPERUSER;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_superuser';\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_superuser';\n \n -- default for inherit is true\n-CREATE ROLE regress_test_def_inherit;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_inherit';\n-CREATE ROLE regress_test_inherit WITH NOINHERIT;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_inherit';\n+CREATE ROLE regress_test_def_inherit PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_inherit';\n+CREATE ROLE regress_test_inherit WITH NOINHERIT PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_inherit';\n ALTER ROLE regress_test_inherit WITH INHERIT;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_inherit';\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_inherit';\n ALTER ROLE regress_test_inherit WITH NOINHERIT;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_inherit';\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_inherit';\n \n -- default for create role is false\n-CREATE ROLE regress_test_def_createrole;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_createrole';\n-CREATE ROLE regress_test_createrole WITH CREATEROLE;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_createrole';\n+CREATE ROLE regress_test_def_createrole PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_createrole';\n+CREATE ROLE regress_test_createrole WITH CREATEROLE PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_createrole';\n ALTER ROLE regress_test_createrole WITH NOCREATEROLE;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_createrole';\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_createrole';\n ALTER ROLE regress_test_createrole WITH CREATEROLE;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_createrole';\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_createrole';\n \n -- default for create database is false\n-CREATE ROLE regress_test_def_createdb;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_createdb';\n-CREATE ROLE regress_test_createdb WITH CREATEDB;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_createdb';\n+CREATE ROLE regress_test_def_createdb PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_createdb';\n+CREATE ROLE regress_test_createdb WITH CREATEDB PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_createdb';\n ALTER ROLE regress_test_createdb WITH NOCREATEDB;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_createdb';\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_createdb';\n ALTER ROLE regress_test_createdb WITH CREATEDB;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_createdb';\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_createdb';\n \n -- default for can login is false for role\n-CREATE ROLE regress_test_def_role_canlogin;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_role_canlogin';\n-CREATE ROLE regress_test_role_canlogin WITH LOGIN;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_role_canlogin';\n+CREATE ROLE regress_test_def_role_canlogin PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_role_canlogin';\n+CREATE ROLE regress_test_role_canlogin WITH LOGIN PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_role_canlogin';\n ALTER ROLE regress_test_role_canlogin WITH NOLOGIN;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_role_canlogin';\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_role_canlogin';\n ALTER ROLE regress_test_role_canlogin WITH LOGIN;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_role_canlogin';\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_role_canlogin';\n \n -- default for can login is true for user\n-CREATE USER regress_test_def_user_canlogin;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_user_canlogin';\n-CREATE USER regress_test_user_canlogin WITH NOLOGIN;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_user_canlogin';\n+CREATE USER regress_test_def_user_canlogin PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_user_canlogin';\n+CREATE USER regress_test_user_canlogin WITH NOLOGIN PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_user_canlogin';\n ALTER USER regress_test_user_canlogin WITH LOGIN;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_user_canlogin';\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_user_canlogin';\n ALTER USER regress_test_user_canlogin WITH NOLOGIN;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_user_canlogin';\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_user_canlogin';\n \n -- default for replication is false\n-CREATE ROLE regress_test_def_replication;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_replication';\n-CREATE ROLE regress_test_replication WITH REPLICATION;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_replication';\n+CREATE ROLE regress_test_def_replication PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_replication';\n+CREATE ROLE regress_test_replication WITH REPLICATION PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_replication';\n ALTER ROLE regress_test_replication WITH NOREPLICATION;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_replication';\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_replication';\n ALTER ROLE regress_test_replication WITH REPLICATION;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_replication';\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_replication';\n \n -- default for bypassrls is false\n-CREATE ROLE regress_test_def_bypassrls;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_bypassrls';\n-CREATE ROLE regress_test_bypassrls WITH BYPASSRLS;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_bypassrls';\n+CREATE ROLE regress_test_def_bypassrls PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_bypassrls';\n+CREATE ROLE regress_test_bypassrls WITH BYPASSRLS PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_bypassrls';\n ALTER ROLE regress_test_bypassrls WITH NOBYPASSRLS;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_bypassrls';\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_bypassrls';\n ALTER ROLE regress_test_bypassrls WITH BYPASSRLS;\n-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_bypassrls';\n+SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\\$(\\d+):([a-zA-Z0-9+/=]+)\\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\\1$\\2:<salt>$<storedkey>:<serverkey>'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_bypassrls';\n \n -- clean up roles\n DROP ROLE regress_test_def_superuser;\ndiff --git a/src/test/regress/sql/rowsecurity.sql b/src/test/regress/sql/rowsecurity.sql\nindex eab7d99003..0cf1139e01 100644\n--- a/src/test/regress/sql/rowsecurity.sql\n+++ b/src/test/regress/sql/rowsecurity.sql\n@@ -20,13 +20,13 @@ DROP SCHEMA IF EXISTS regress_rls_schema CASCADE;\n RESET client_min_messages;\n \n -- initial setup\n-CREATE USER regress_rls_alice NOLOGIN;\n-CREATE USER regress_rls_bob NOLOGIN;\n-CREATE USER regress_rls_carol NOLOGIN;\n-CREATE USER regress_rls_dave NOLOGIN;\n-CREATE USER regress_rls_exempt_user BYPASSRLS NOLOGIN;\n-CREATE ROLE regress_rls_group1 NOLOGIN;\n-CREATE ROLE regress_rls_group2 NOLOGIN;\n+CREATE USER regress_rls_alice NOLOGIN PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_rls_bob NOLOGIN PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_rls_carol NOLOGIN PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_rls_dave NOLOGIN PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_rls_exempt_user BYPASSRLS NOLOGIN PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_rls_group1 NOLOGIN PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_rls_group2 NOLOGIN PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n GRANT regress_rls_group1 TO regress_rls_bob;\n GRANT regress_rls_group2 TO regress_rls_carol;\n@@ -2105,8 +2105,8 @@ SELECT count(*) = 0 FROM pg_depend\n -- DROP OWNED BY testing\n RESET SESSION AUTHORIZATION;\n \n-CREATE ROLE regress_rls_dob_role1;\n-CREATE ROLE regress_rls_dob_role2;\n+CREATE ROLE regress_rls_dob_role1 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_rls_dob_role2 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n CREATE TABLE dob_t1 (c1 int);\n CREATE TABLE dob_t2 (c1 int) PARTITION BY RANGE (c1);\ndiff --git a/src/test/regress/sql/rules.sql b/src/test/regress/sql/rules.sql\nindex 4a5fa50585..a9e9eab77d 100644\n--- a/src/test/regress/sql/rules.sql\n+++ b/src/test/regress/sql/rules.sql\n@@ -1390,7 +1390,7 @@ DROP TABLE ruletest2;\n -- Test non-SELECT rule on security invoker view.\n -- Should use view owner's permissions.\n --\n-CREATE USER regress_rule_user1;\n+CREATE USER regress_rule_user1 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n CREATE TABLE ruletest_t1 (x int);\n CREATE TABLE ruletest_t2 (x int);\ndiff --git a/src/test/regress/sql/security_label.sql b/src/test/regress/sql/security_label.sql\nindex 98e6a5f211..68c868fef2 100644\n--- a/src/test/regress/sql/security_label.sql\n+++ b/src/test/regress/sql/security_label.sql\n@@ -10,8 +10,8 @@ DROP ROLE IF EXISTS regress_seclabel_user2;\n \n RESET client_min_messages;\n \n-CREATE USER regress_seclabel_user1 WITH CREATEROLE;\n-CREATE USER regress_seclabel_user2;\n+CREATE USER regress_seclabel_user1 WITH CREATEROLE PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_seclabel_user2 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n CREATE TABLE seclabel_tbl1 (a int, b text);\n CREATE TABLE seclabel_tbl2 (x int, y text);\ndiff --git a/src/test/regress/sql/select_into.sql b/src/test/regress/sql/select_into.sql\nindex 689c448cc2..223ceb1d75 100644\n--- a/src/test/regress/sql/select_into.sql\n+++ b/src/test/regress/sql/select_into.sql\n@@ -20,7 +20,7 @@ DROP TABLE sitmp1;\n -- SELECT INTO and INSERT permission, if owner is not allowed to insert.\n --\n CREATE SCHEMA selinto_schema;\n-CREATE USER regress_selinto_user;\n+CREATE USER regress_selinto_user PASSWORD NEON_PASSWORD_PLACEHOLDER;\n ALTER DEFAULT PRIVILEGES FOR ROLE regress_selinto_user\n \t  REVOKE INSERT ON TABLES FROM regress_selinto_user;\n GRANT ALL ON SCHEMA selinto_schema TO public;\ndiff --git a/src/test/regress/sql/select_parallel.sql b/src/test/regress/sql/select_parallel.sql\nindex 3e4bfcb71f..99757eff3c 100644\n--- a/src/test/regress/sql/select_parallel.sql\n+++ b/src/test/regress/sql/select_parallel.sql\n@@ -498,7 +498,7 @@ SELECT 1 FROM tenk1_vw_sec\n rollback;\n \n -- test that function option SET ROLE works in parallel workers.\n-create role regress_parallel_worker;\n+create role regress_parallel_worker PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n create function set_and_report_role() returns text as\n   $$ select current_setting('role') $$ language sql parallel safe\ndiff --git a/src/test/regress/sql/select_views.sql b/src/test/regress/sql/select_views.sql\nindex e742f13699..7bd0255df8 100644\n--- a/src/test/regress/sql/select_views.sql\n+++ b/src/test/regress/sql/select_views.sql\n@@ -12,7 +12,7 @@ SELECT * FROM toyemp WHERE name = 'sharon';\n --\n -- Test for Leaky view scenario\n --\n-CREATE ROLE regress_alice;\n+CREATE ROLE regress_alice PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n CREATE FUNCTION f_leak (text)\n        RETURNS bool LANGUAGE 'plpgsql' COST 0.0000001\ndiff --git a/src/test/regress/sql/sequence.sql b/src/test/regress/sql/sequence.sql\nindex 793f1415f6..ec07c1f193 100644\n--- a/src/test/regress/sql/sequence.sql\n+++ b/src/test/regress/sql/sequence.sql\n@@ -293,7 +293,7 @@ ROLLBACK;\n \n -- privileges tests\n \n-CREATE USER regress_seq_user;\n+CREATE USER regress_seq_user PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n -- nextval\n BEGIN;\ndiff --git a/src/test/regress/sql/stats.sql b/src/test/regress/sql/stats.sql\nindex d8ac0d06f4..c9cfcea208 100644\n--- a/src/test/regress/sql/stats.sql\n+++ b/src/test/regress/sql/stats.sql\n@@ -631,23 +631,6 @@ SELECT :io_sum_shared_after_writes > :io_sum_shared_before_writes;\n SELECT current_setting('fsync') = 'off'\n   OR :io_sum_shared_after_fsyncs > :io_sum_shared_before_fsyncs;\n \n--- Change the tablespace so that the table is rewritten directly, then SELECT\n--- from it to cause it to be read back into shared buffers.\n-SELECT sum(reads) AS io_sum_shared_before_reads\n-  FROM pg_stat_io WHERE context = 'normal' AND object = 'relation' \\gset\n--- Do this in a transaction to prevent spurious failures due to concurrent accesses to our newly\n--- rewritten table, e.g. by autovacuum.\n-BEGIN;\n-ALTER TABLE test_io_shared SET TABLESPACE regress_tblspace;\n--- SELECT from the table so that the data is read into shared buffers and\n--- context 'normal', object 'relation' reads are counted.\n-SELECT COUNT(*) FROM test_io_shared;\n-COMMIT;\n-SELECT pg_stat_force_next_flush();\n-SELECT sum(reads) AS io_sum_shared_after_reads\n-  FROM pg_stat_io WHERE context = 'normal' AND object = 'relation'  \\gset\n-SELECT :io_sum_shared_after_reads > :io_sum_shared_before_reads;\n-\n SELECT sum(hits) AS io_sum_shared_before_hits\n   FROM pg_stat_io WHERE context = 'normal' AND object = 'relation' \\gset\n -- Select from the table again to count hits.\ndiff --git a/src/test/regress/sql/stats_ext.sql b/src/test/regress/sql/stats_ext.sql\nindex 0c08a6cc42..7a5b1036d8 100644\n--- a/src/test/regress/sql/stats_ext.sql\n+++ b/src/test/regress/sql/stats_ext.sql\n@@ -50,7 +50,7 @@ DROP TABLE ext_stats_test;\n CREATE TABLE ab1 (a INTEGER, b INTEGER, c INTEGER);\n CREATE STATISTICS IF NOT EXISTS ab1_a_b_stats ON a, b FROM ab1;\n COMMENT ON STATISTICS ab1_a_b_stats IS 'new comment';\n-CREATE ROLE regress_stats_ext;\n+CREATE ROLE regress_stats_ext PASSWORD NEON_PASSWORD_PLACEHOLDER;\n SET SESSION AUTHORIZATION regress_stats_ext;\n COMMENT ON STATISTICS ab1_a_b_stats IS 'changed comment';\n DROP STATISTICS ab1_a_b_stats;\n@@ -1607,7 +1607,7 @@ drop statistics stts_t1_expr_expr_stat;\n set search_path to public, stts_s1;\n \\dX\n \n-create role regress_stats_ext nosuperuser;\n+create role regress_stats_ext nosuperuser PASSWORD NEON_PASSWORD_PLACEHOLDER;\n set role regress_stats_ext;\n \\dX\n reset role;\n@@ -1618,7 +1618,7 @@ drop user regress_stats_ext;\n reset search_path;\n \n -- User with no access\n-CREATE USER regress_stats_user1;\n+CREATE USER regress_stats_user1 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n GRANT USAGE ON SCHEMA tststats TO regress_stats_user1;\n SET SESSION AUTHORIZATION regress_stats_user1;\n SELECT * FROM tststats.priv_test_tbl; -- Permission denied\ndiff --git a/src/test/regress/sql/subscription.sql b/src/test/regress/sql/subscription.sql\nindex 3e5ba4cb8c..a35f030908 100644\n--- a/src/test/regress/sql/subscription.sql\n+++ b/src/test/regress/sql/subscription.sql\n@@ -2,10 +2,10 @@\n -- SUBSCRIPTION\n --\n \n-CREATE ROLE regress_subscription_user LOGIN SUPERUSER;\n-CREATE ROLE regress_subscription_user2;\n-CREATE ROLE regress_subscription_user3 IN ROLE pg_create_subscription;\n-CREATE ROLE regress_subscription_user_dummy LOGIN NOSUPERUSER;\n+CREATE ROLE regress_subscription_user LOGIN SUPERUSER PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_subscription_user2 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE ROLE regress_subscription_user3 PASSWORD NEON_PASSWORD_PLACEHOLDER IN ROLE pg_create_subscription;\n+CREATE ROLE regress_subscription_user_dummy LOGIN NOSUPERUSER PASSWORD NEON_PASSWORD_PLACEHOLDER;\n SET SESSION AUTHORIZATION 'regress_subscription_user';\n \n -- fail - no publications\ndiff --git a/src/test/regress/sql/test_setup.sql b/src/test/regress/sql/test_setup.sql\nindex 06b0e2121f..01444f9426 100644\n--- a/src/test/regress/sql/test_setup.sql\n+++ b/src/test/regress/sql/test_setup.sql\n@@ -135,7 +135,8 @@ CREATE TABLE onek (\n );\n \n \\set filename :abs_srcdir '/data/onek.data'\n-COPY onek FROM :'filename';\n+\\set command '\\\\copy onek FROM ' :'filename';\n+:command\n VACUUM ANALYZE onek;\n \n CREATE TABLE onek2 AS SELECT * FROM onek;\n@@ -161,7 +162,8 @@ CREATE TABLE tenk1 (\n );\n \n \\set filename :abs_srcdir '/data/tenk.data'\n-COPY tenk1 FROM :'filename';\n+\\set command '\\\\copy tenk1 FROM ' :'filename';\n+:command\n VACUUM ANALYZE tenk1;\n \n CREATE TABLE tenk2 AS SELECT * FROM tenk1;\n@@ -174,7 +176,8 @@ CREATE TABLE person (\n );\n \n \\set filename :abs_srcdir '/data/person.data'\n-COPY person FROM :'filename';\n+\\set command '\\\\copy person FROM ' :'filename';\n+:command\n VACUUM ANALYZE person;\n \n CREATE TABLE emp (\n@@ -183,7 +186,8 @@ CREATE TABLE emp (\n ) INHERITS (person);\n \n \\set filename :abs_srcdir '/data/emp.data'\n-COPY emp FROM :'filename';\n+\\set command '\\\\copy emp FROM ' :'filename';\n+:command\n VACUUM ANALYZE emp;\n \n CREATE TABLE student (\n@@ -191,7 +195,8 @@ CREATE TABLE student (\n ) INHERITS (person);\n \n \\set filename :abs_srcdir '/data/student.data'\n-COPY student FROM :'filename';\n+\\set command '\\\\copy student FROM ' :'filename';\n+:command\n VACUUM ANALYZE student;\n \n CREATE TABLE stud_emp (\n@@ -199,7 +204,8 @@ CREATE TABLE stud_emp (\n ) INHERITS (emp, student);\n \n \\set filename :abs_srcdir '/data/stud_emp.data'\n-COPY stud_emp FROM :'filename';\n+\\set command '\\\\copy stud_emp FROM ' :'filename';\n+:command\n VACUUM ANALYZE stud_emp;\n \n CREATE TABLE road (\n@@ -208,7 +214,8 @@ CREATE TABLE road (\n );\n \n \\set filename :abs_srcdir '/data/streets.data'\n-COPY road FROM :'filename';\n+\\set command '\\\\copy road FROM ' :'filename';\n+:command\n VACUUM ANALYZE road;\n \n CREATE TABLE ihighway () INHERITS (road);\ndiff --git a/src/test/regress/sql/tsearch.sql b/src/test/regress/sql/tsearch.sql\nindex fbd26cdba4..7ec2d78eee 100644\n--- a/src/test/regress/sql/tsearch.sql\n+++ b/src/test/regress/sql/tsearch.sql\n@@ -49,7 +49,8 @@ CREATE TABLE test_tsvector(\n );\n \n \\set filename :abs_srcdir '/data/tsearch.data'\n-COPY test_tsvector FROM :'filename';\n+\\set command '\\\\copy test_tsvector FROM ' :'filename';\n+:command\n \n ANALYZE test_tsvector;\n \ndiff --git a/src/test/regress/sql/updatable_views.sql b/src/test/regress/sql/updatable_views.sql\nindex 93b693ae83..2983475265 100644\n--- a/src/test/regress/sql/updatable_views.sql\n+++ b/src/test/regress/sql/updatable_views.sql\n@@ -569,9 +569,9 @@ DROP TABLE base_tbl CASCADE;\n \n -- permissions checks\n \n-CREATE USER regress_view_user1;\n-CREATE USER regress_view_user2;\n-CREATE USER regress_view_user3;\n+CREATE USER regress_view_user1 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_view_user2 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+CREATE USER regress_view_user3 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n SET SESSION AUTHORIZATION regress_view_user1;\n CREATE TABLE base_tbl(a int, b text, c float);\n@@ -1909,8 +1909,8 @@ drop view uv_iocu_view;\n drop table uv_iocu_tab;\n \n -- ON CONFLICT DO UPDATE permissions checks\n-create user regress_view_user1;\n-create user regress_view_user2;\n+create user regress_view_user1 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n+create user regress_view_user2 PASSWORD NEON_PASSWORD_PLACEHOLDER;\n \n set session authorization regress_view_user1;\n create table base_tbl(a int unique, b text, c float);\ndiff --git a/src/test/regress/sql/update.sql b/src/test/regress/sql/update.sql\nindex 8b4707eb9c..b9041f8134 100644\n--- a/src/test/regress/sql/update.sql\n+++ b/src/test/regress/sql/update.sql\n@@ -342,7 +342,7 @@ DROP FUNCTION func_parted_mod_b();\n -----------------------------------------\n \n ALTER TABLE range_parted ENABLE ROW LEVEL SECURITY;\n-CREATE USER regress_range_parted_user;\n+CREATE USER regress_range_parted_user PASSWORD NEON_PASSWORD_PLACEHOLDER;\n GRANT ALL ON range_parted, mintab TO regress_range_parted_user;\n CREATE POLICY seeall ON range_parted AS PERMISSIVE FOR SELECT USING (true);\n CREATE POLICY policy_range_parted ON range_parted for UPDATE USING (true) WITH CHECK (c % 2 = 0);\ndiff --git a/src/test/regress/sql/vacuum.sql b/src/test/regress/sql/vacuum.sql\nindex 548cd7acca..5b15d4dab0 100644\n--- a/src/test/regress/sql/vacuum.sql\n+++ b/src/test/regress/sql/vacuum.sql\n@@ -335,7 +335,7 @@ CREATE TABLE vacowned (a int);\n CREATE TABLE vacowned_parted (a int) PARTITION BY LIST (a);\n CREATE TABLE vacowned_part1 PARTITION OF vacowned_parted FOR VALUES IN (1);\n CREATE TABLE vacowned_part2 PARTITION OF vacowned_parted FOR VALUES IN (2);\n-CREATE ROLE regress_vacuum;\n+CREATE ROLE regress_vacuum PASSWORD NEON_PASSWORD_PLACEHOLDER;\n SET ROLE regress_vacuum;\n -- Simple table\n VACUUM vacowned;\n"
  },
  {
    "path": "compute/patches/contrib_pg16.patch",
    "content": "diff --git a/contrib/amcheck/expected/check_heap.out b/contrib/amcheck/expected/check_heap.out\nindex 979e5e8..2375b45 100644\n--- a/contrib/amcheck/expected/check_heap.out\n+++ b/contrib/amcheck/expected/check_heap.out\n@@ -80,12 +80,9 @@ INSERT INTO heaptest (a, b)\n -- same transaction.  The heaptest table is smaller than the default\n -- wal_skip_threshold, so a wal_level=minimal commit reads the table into\n -- shared_buffers.  A transaction delays that and excludes any autovacuum.\n-SET allow_in_place_tablespaces = true;\n-CREATE TABLESPACE regress_test_stats_tblspc LOCATION '';\n SELECT sum(reads) AS stats_bulkreads_before\n   FROM pg_stat_io WHERE context = 'bulkread' \\gset\n BEGIN;\n-ALTER TABLE heaptest SET TABLESPACE regress_test_stats_tblspc;\n -- Check that valid options are not rejected nor corruption reported\n -- for a non-empty table\n SELECT * FROM verify_heapam(relation := 'heaptest', skip := 'none');\n@@ -118,14 +115,6 @@ SELECT pg_stat_force_next_flush();\n  \n (1 row)\n \n-SELECT sum(reads) AS stats_bulkreads_after\n-  FROM pg_stat_io WHERE context = 'bulkread' \\gset\n-SELECT :stats_bulkreads_after > :stats_bulkreads_before;\n- ?column? \n-----------\n- t\n-(1 row)\n-\n CREATE ROLE regress_heaptest_role;\n -- verify permissions are checked (error due to function not callable)\n SET ROLE regress_heaptest_role;\n@@ -233,7 +222,6 @@ ERROR:  cannot check relation \"test_foreign_table\"\n DETAIL:  This operation is not supported for foreign tables.\n -- cleanup\n DROP TABLE heaptest;\n-DROP TABLESPACE regress_test_stats_tblspc;\n DROP TABLE test_partition;\n DROP TABLE test_partitioned;\n DROP OWNED BY regress_heaptest_role; -- permissions\ndiff --git a/contrib/amcheck/sql/check_heap.sql b/contrib/amcheck/sql/check_heap.sql\nindex 1745bae..3b429c3 100644\n--- a/contrib/amcheck/sql/check_heap.sql\n+++ b/contrib/amcheck/sql/check_heap.sql\n@@ -40,12 +40,9 @@ INSERT INTO heaptest (a, b)\n -- same transaction.  The heaptest table is smaller than the default\n -- wal_skip_threshold, so a wal_level=minimal commit reads the table into\n -- shared_buffers.  A transaction delays that and excludes any autovacuum.\n-SET allow_in_place_tablespaces = true;\n-CREATE TABLESPACE regress_test_stats_tblspc LOCATION '';\n SELECT sum(reads) AS stats_bulkreads_before\n   FROM pg_stat_io WHERE context = 'bulkread' \\gset\n BEGIN;\n-ALTER TABLE heaptest SET TABLESPACE regress_test_stats_tblspc;\n -- Check that valid options are not rejected nor corruption reported\n -- for a non-empty table\n SELECT * FROM verify_heapam(relation := 'heaptest', skip := 'none');\n@@ -58,9 +55,6 @@ COMMIT;\n --   ALTER TABLE ... SET TABLESPACE ...\n -- causing an additional bulkread, which should be reflected in pg_stat_io.\n SELECT pg_stat_force_next_flush();\n-SELECT sum(reads) AS stats_bulkreads_after\n-  FROM pg_stat_io WHERE context = 'bulkread' \\gset\n-SELECT :stats_bulkreads_after > :stats_bulkreads_before;\n \n CREATE ROLE regress_heaptest_role;\n \n@@ -140,7 +134,6 @@ SELECT * FROM verify_heapam('test_foreign_table',\n \n -- cleanup\n DROP TABLE heaptest;\n-DROP TABLESPACE regress_test_stats_tblspc;\n DROP TABLE test_partition;\n DROP TABLE test_partitioned;\n DROP OWNED BY regress_heaptest_role; -- permissions\ndiff --git a/contrib/citext/expected/create_index_acl.out b/contrib/citext/expected/create_index_acl.out\nindex 33be13a..70a406c 100644\n--- a/contrib/citext/expected/create_index_acl.out\n+++ b/contrib/citext/expected/create_index_acl.out\n@@ -5,9 +5,6 @@\n -- owner having as few applicable privileges as possible.  (The privileges.sql\n -- regress_sro_user tests look for the opposite defect; they confirm that\n -- DefineIndex() uses the table owner userid where necessary.)\n-SET allow_in_place_tablespaces = true;\n-CREATE TABLESPACE regress_create_idx_tblspace LOCATION '';\n-RESET allow_in_place_tablespaces;\n BEGIN;\n CREATE ROLE regress_minimal;\n CREATE SCHEMA s;\n@@ -49,11 +46,9 @@ ALTER TABLE s.x OWNER TO regress_minimal;\n -- Empty-table DefineIndex()\n CREATE UNIQUE INDEX u0rows ON s.x USING btree\n   ((s.index_this_expr(y, s.const())) COLLATE s.coll s.citext_pattern_ops)\n-  TABLESPACE regress_create_idx_tblspace\n   WHERE s.index_row_if(y);\n ALTER TABLE s.x ADD CONSTRAINT e0rows EXCLUDE USING btree\n   ((s.index_this_expr(y, s.const())) COLLATE s.coll WITH s.=)\n-  USING INDEX TABLESPACE regress_create_idx_tblspace\n   WHERE (s.index_row_if(y));\n -- Make the table nonempty.\n INSERT INTO s.x VALUES ('foo'), ('bar');\n@@ -66,11 +61,9 @@ RESET search_path;\n GRANT EXECUTE ON FUNCTION s.index_this_expr TO regress_minimal;\n CREATE UNIQUE INDEX u2rows ON s.x USING btree\n   ((s.index_this_expr(y, s.const())) COLLATE s.coll s.citext_pattern_ops)\n-  TABLESPACE regress_create_idx_tblspace\n   WHERE s.index_row_if(y);\n ALTER TABLE s.x ADD CONSTRAINT e2rows EXCLUDE USING btree\n   ((s.index_this_expr(y, s.const())) COLLATE s.coll WITH s.=)\n-  USING INDEX TABLESPACE regress_create_idx_tblspace\n   WHERE (s.index_row_if(y));\n -- Shall not find s.coll via search_path, despite the s.const->public.setter\n -- call having set search_path=s during expression planning.  Suppress the\n@@ -78,9 +71,7 @@ ALTER TABLE s.x ADD CONSTRAINT e2rows EXCLUDE USING btree\n \\set VERBOSITY sqlstate\n ALTER TABLE s.x ADD CONSTRAINT underqualified EXCLUDE USING btree\n   ((s.index_this_expr(y, s.const())) COLLATE coll WITH s.=)\n-  USING INDEX TABLESPACE regress_create_idx_tblspace\n   WHERE (s.index_row_if(y));\n ERROR:  42704\n \\set VERBOSITY default\n ROLLBACK;\n-DROP TABLESPACE regress_create_idx_tblspace;\ndiff --git a/contrib/citext/sql/create_index_acl.sql b/contrib/citext/sql/create_index_acl.sql\nindex 10b5225..ae442e1 100644\n--- a/contrib/citext/sql/create_index_acl.sql\n+++ b/contrib/citext/sql/create_index_acl.sql\n@@ -6,10 +6,6 @@\n -- regress_sro_user tests look for the opposite defect; they confirm that\n -- DefineIndex() uses the table owner userid where necessary.)\n \n-SET allow_in_place_tablespaces = true;\n-CREATE TABLESPACE regress_create_idx_tblspace LOCATION '';\n-RESET allow_in_place_tablespaces;\n-\n BEGIN;\n CREATE ROLE regress_minimal;\n CREATE SCHEMA s;\n@@ -51,11 +47,9 @@ ALTER TABLE s.x OWNER TO regress_minimal;\n -- Empty-table DefineIndex()\n CREATE UNIQUE INDEX u0rows ON s.x USING btree\n   ((s.index_this_expr(y, s.const())) COLLATE s.coll s.citext_pattern_ops)\n-  TABLESPACE regress_create_idx_tblspace\n   WHERE s.index_row_if(y);\n ALTER TABLE s.x ADD CONSTRAINT e0rows EXCLUDE USING btree\n   ((s.index_this_expr(y, s.const())) COLLATE s.coll WITH s.=)\n-  USING INDEX TABLESPACE regress_create_idx_tblspace\n   WHERE (s.index_row_if(y));\n -- Make the table nonempty.\n INSERT INTO s.x VALUES ('foo'), ('bar');\n@@ -68,11 +62,9 @@ RESET search_path;\n GRANT EXECUTE ON FUNCTION s.index_this_expr TO regress_minimal;\n CREATE UNIQUE INDEX u2rows ON s.x USING btree\n   ((s.index_this_expr(y, s.const())) COLLATE s.coll s.citext_pattern_ops)\n-  TABLESPACE regress_create_idx_tblspace\n   WHERE s.index_row_if(y);\n ALTER TABLE s.x ADD CONSTRAINT e2rows EXCLUDE USING btree\n   ((s.index_this_expr(y, s.const())) COLLATE s.coll WITH s.=)\n-  USING INDEX TABLESPACE regress_create_idx_tblspace\n   WHERE (s.index_row_if(y));\n -- Shall not find s.coll via search_path, despite the s.const->public.setter\n -- call having set search_path=s during expression planning.  Suppress the\n@@ -80,9 +72,7 @@ ALTER TABLE s.x ADD CONSTRAINT e2rows EXCLUDE USING btree\n \\set VERBOSITY sqlstate\n ALTER TABLE s.x ADD CONSTRAINT underqualified EXCLUDE USING btree\n   ((s.index_this_expr(y, s.const())) COLLATE coll WITH s.=)\n-  USING INDEX TABLESPACE regress_create_idx_tblspace\n   WHERE (s.index_row_if(y));\n \\set VERBOSITY default\n ROLLBACK;\n \n-DROP TABLESPACE regress_create_idx_tblspace;\ndiff --git a/contrib/file_fdw/expected/file_fdw.out b/contrib/file_fdw/expected/file_fdw.out\nindex 72304e0..ebe131b 100644\n--- a/contrib/file_fdw/expected/file_fdw.out\n+++ b/contrib/file_fdw/expected/file_fdw.out\n@@ -4,6 +4,7 @@\n -- directory paths are passed to us in environment variables\n \\getenv abs_srcdir PG_ABS_SRCDIR\n -- Clean up in case a prior regression run failed\n+SET compute_query_id TO 'off';\n SET client_min_messages TO 'warning';\n DROP ROLE IF EXISTS regress_file_fdw_superuser, regress_file_fdw_user, regress_no_priv_user;\n RESET client_min_messages;\ndiff --git a/contrib/file_fdw/sql/file_fdw.sql b/contrib/file_fdw/sql/file_fdw.sql\nindex f0548e1..848a08c 100644\n--- a/contrib/file_fdw/sql/file_fdw.sql\n+++ b/contrib/file_fdw/sql/file_fdw.sql\n@@ -6,6 +6,7 @@\n \\getenv abs_srcdir PG_ABS_SRCDIR\n \n -- Clean up in case a prior regression run failed\n+SET compute_query_id TO 'off';\n SET client_min_messages TO 'warning';\n DROP ROLE IF EXISTS regress_file_fdw_superuser, regress_file_fdw_user, regress_no_priv_user;\n RESET client_min_messages;\ndiff --git a/contrib/pageinspect/expected/gist.out b/contrib/pageinspect/expected/gist.out\nindex d1adbab..38b52ac 100644\n--- a/contrib/pageinspect/expected/gist.out\n+++ b/contrib/pageinspect/expected/gist.out\n@@ -10,25 +10,6 @@ BEGIN;\n CREATE TABLE test_gist AS SELECT point(i,i) p, i::text t FROM\n     generate_series(1,1000) i;\n CREATE INDEX test_gist_idx ON test_gist USING gist (p);\n--- Page 0 is the root, the rest are leaf pages\n-SELECT * FROM gist_page_opaque_info(get_raw_page('test_gist_idx', 0));\n- lsn | nsn | rightlink  | flags \n------+-----+------------+-------\n- 0/1 | 0/0 | 4294967295 | {}\n-(1 row)\n-\n-SELECT * FROM gist_page_opaque_info(get_raw_page('test_gist_idx', 1));\n- lsn | nsn | rightlink  | flags  \n------+-----+------------+--------\n- 0/1 | 0/0 | 4294967295 | {leaf}\n-(1 row)\n-\n-SELECT * FROM gist_page_opaque_info(get_raw_page('test_gist_idx', 2));\n- lsn | nsn | rightlink | flags  \n------+-----+-----------+--------\n- 0/1 | 0/0 |         1 | {leaf}\n-(1 row)\n-\n COMMIT;\n SELECT * FROM gist_page_items(get_raw_page('test_gist_idx', 0), 'test_gist_idx');\n  itemoffset |   ctid    | itemlen | dead |             keys              \ndiff --git a/contrib/pageinspect/sql/gist.sql b/contrib/pageinspect/sql/gist.sql\nindex d263542..607992f 100644\n--- a/contrib/pageinspect/sql/gist.sql\n+++ b/contrib/pageinspect/sql/gist.sql\n@@ -12,11 +12,6 @@ CREATE TABLE test_gist AS SELECT point(i,i) p, i::text t FROM\n     generate_series(1,1000) i;\n CREATE INDEX test_gist_idx ON test_gist USING gist (p);\n \n--- Page 0 is the root, the rest are leaf pages\n-SELECT * FROM gist_page_opaque_info(get_raw_page('test_gist_idx', 0));\n-SELECT * FROM gist_page_opaque_info(get_raw_page('test_gist_idx', 1));\n-SELECT * FROM gist_page_opaque_info(get_raw_page('test_gist_idx', 2));\n-\n COMMIT;\n \n SELECT * FROM gist_page_items(get_raw_page('test_gist_idx', 0), 'test_gist_idx');\n"
  },
  {
    "path": "compute/patches/contrib_pg17.patch",
    "content": "diff --git a/contrib/amcheck/expected/check_heap.out b/contrib/amcheck/expected/check_heap.out\nindex 979e5e8..2375b45 100644\n--- a/contrib/amcheck/expected/check_heap.out\n+++ b/contrib/amcheck/expected/check_heap.out\n@@ -80,12 +80,9 @@ INSERT INTO heaptest (a, b)\n -- same transaction.  The heaptest table is smaller than the default\n -- wal_skip_threshold, so a wal_level=minimal commit reads the table into\n -- shared_buffers.  A transaction delays that and excludes any autovacuum.\n-SET allow_in_place_tablespaces = true;\n-CREATE TABLESPACE regress_test_stats_tblspc LOCATION '';\n SELECT sum(reads) AS stats_bulkreads_before\n   FROM pg_stat_io WHERE context = 'bulkread' \\gset\n BEGIN;\n-ALTER TABLE heaptest SET TABLESPACE regress_test_stats_tblspc;\n -- Check that valid options are not rejected nor corruption reported\n -- for a non-empty table\n SELECT * FROM verify_heapam(relation := 'heaptest', skip := 'none');\n@@ -118,14 +115,6 @@ SELECT pg_stat_force_next_flush();\n  \n (1 row)\n \n-SELECT sum(reads) AS stats_bulkreads_after\n-  FROM pg_stat_io WHERE context = 'bulkread' \\gset\n-SELECT :stats_bulkreads_after > :stats_bulkreads_before;\n- ?column? \n-----------\n- t\n-(1 row)\n-\n CREATE ROLE regress_heaptest_role;\n -- verify permissions are checked (error due to function not callable)\n SET ROLE regress_heaptest_role;\n@@ -233,7 +222,6 @@ ERROR:  cannot check relation \"test_foreign_table\"\n DETAIL:  This operation is not supported for foreign tables.\n -- cleanup\n DROP TABLE heaptest;\n-DROP TABLESPACE regress_test_stats_tblspc;\n DROP TABLE test_partition;\n DROP TABLE test_partitioned;\n DROP OWNED BY regress_heaptest_role; -- permissions\ndiff --git a/contrib/amcheck/sql/check_heap.sql b/contrib/amcheck/sql/check_heap.sql\nindex 1745bae..3b429c3 100644\n--- a/contrib/amcheck/sql/check_heap.sql\n+++ b/contrib/amcheck/sql/check_heap.sql\n@@ -40,12 +40,9 @@ INSERT INTO heaptest (a, b)\n -- same transaction.  The heaptest table is smaller than the default\n -- wal_skip_threshold, so a wal_level=minimal commit reads the table into\n -- shared_buffers.  A transaction delays that and excludes any autovacuum.\n-SET allow_in_place_tablespaces = true;\n-CREATE TABLESPACE regress_test_stats_tblspc LOCATION '';\n SELECT sum(reads) AS stats_bulkreads_before\n   FROM pg_stat_io WHERE context = 'bulkread' \\gset\n BEGIN;\n-ALTER TABLE heaptest SET TABLESPACE regress_test_stats_tblspc;\n -- Check that valid options are not rejected nor corruption reported\n -- for a non-empty table\n SELECT * FROM verify_heapam(relation := 'heaptest', skip := 'none');\n@@ -58,9 +55,6 @@ COMMIT;\n --   ALTER TABLE ... SET TABLESPACE ...\n -- causing an additional bulkread, which should be reflected in pg_stat_io.\n SELECT pg_stat_force_next_flush();\n-SELECT sum(reads) AS stats_bulkreads_after\n-  FROM pg_stat_io WHERE context = 'bulkread' \\gset\n-SELECT :stats_bulkreads_after > :stats_bulkreads_before;\n \n CREATE ROLE regress_heaptest_role;\n \n@@ -140,7 +134,6 @@ SELECT * FROM verify_heapam('test_foreign_table',\n \n -- cleanup\n DROP TABLE heaptest;\n-DROP TABLESPACE regress_test_stats_tblspc;\n DROP TABLE test_partition;\n DROP TABLE test_partitioned;\n DROP OWNED BY regress_heaptest_role; -- permissions\ndiff --git a/contrib/citext/expected/create_index_acl.out b/contrib/citext/expected/create_index_acl.out\nindex 33be13a..70a406c 100644\n--- a/contrib/citext/expected/create_index_acl.out\n+++ b/contrib/citext/expected/create_index_acl.out\n@@ -5,9 +5,6 @@\n -- owner having as few applicable privileges as possible.  (The privileges.sql\n -- regress_sro_user tests look for the opposite defect; they confirm that\n -- DefineIndex() uses the table owner userid where necessary.)\n-SET allow_in_place_tablespaces = true;\n-CREATE TABLESPACE regress_create_idx_tblspace LOCATION '';\n-RESET allow_in_place_tablespaces;\n BEGIN;\n CREATE ROLE regress_minimal;\n CREATE SCHEMA s;\n@@ -49,11 +46,9 @@ ALTER TABLE s.x OWNER TO regress_minimal;\n -- Empty-table DefineIndex()\n CREATE UNIQUE INDEX u0rows ON s.x USING btree\n   ((s.index_this_expr(y, s.const())) COLLATE s.coll s.citext_pattern_ops)\n-  TABLESPACE regress_create_idx_tblspace\n   WHERE s.index_row_if(y);\n ALTER TABLE s.x ADD CONSTRAINT e0rows EXCLUDE USING btree\n   ((s.index_this_expr(y, s.const())) COLLATE s.coll WITH s.=)\n-  USING INDEX TABLESPACE regress_create_idx_tblspace\n   WHERE (s.index_row_if(y));\n -- Make the table nonempty.\n INSERT INTO s.x VALUES ('foo'), ('bar');\n@@ -66,11 +61,9 @@ RESET search_path;\n GRANT EXECUTE ON FUNCTION s.index_this_expr TO regress_minimal;\n CREATE UNIQUE INDEX u2rows ON s.x USING btree\n   ((s.index_this_expr(y, s.const())) COLLATE s.coll s.citext_pattern_ops)\n-  TABLESPACE regress_create_idx_tblspace\n   WHERE s.index_row_if(y);\n ALTER TABLE s.x ADD CONSTRAINT e2rows EXCLUDE USING btree\n   ((s.index_this_expr(y, s.const())) COLLATE s.coll WITH s.=)\n-  USING INDEX TABLESPACE regress_create_idx_tblspace\n   WHERE (s.index_row_if(y));\n -- Shall not find s.coll via search_path, despite the s.const->public.setter\n -- call having set search_path=s during expression planning.  Suppress the\n@@ -78,9 +71,7 @@ ALTER TABLE s.x ADD CONSTRAINT e2rows EXCLUDE USING btree\n \\set VERBOSITY sqlstate\n ALTER TABLE s.x ADD CONSTRAINT underqualified EXCLUDE USING btree\n   ((s.index_this_expr(y, s.const())) COLLATE coll WITH s.=)\n-  USING INDEX TABLESPACE regress_create_idx_tblspace\n   WHERE (s.index_row_if(y));\n ERROR:  42704\n \\set VERBOSITY default\n ROLLBACK;\n-DROP TABLESPACE regress_create_idx_tblspace;\ndiff --git a/contrib/citext/sql/create_index_acl.sql b/contrib/citext/sql/create_index_acl.sql\nindex 10b5225..ae442e1 100644\n--- a/contrib/citext/sql/create_index_acl.sql\n+++ b/contrib/citext/sql/create_index_acl.sql\n@@ -6,10 +6,6 @@\n -- regress_sro_user tests look for the opposite defect; they confirm that\n -- DefineIndex() uses the table owner userid where necessary.)\n \n-SET allow_in_place_tablespaces = true;\n-CREATE TABLESPACE regress_create_idx_tblspace LOCATION '';\n-RESET allow_in_place_tablespaces;\n-\n BEGIN;\n CREATE ROLE regress_minimal;\n CREATE SCHEMA s;\n@@ -51,11 +47,9 @@ ALTER TABLE s.x OWNER TO regress_minimal;\n -- Empty-table DefineIndex()\n CREATE UNIQUE INDEX u0rows ON s.x USING btree\n   ((s.index_this_expr(y, s.const())) COLLATE s.coll s.citext_pattern_ops)\n-  TABLESPACE regress_create_idx_tblspace\n   WHERE s.index_row_if(y);\n ALTER TABLE s.x ADD CONSTRAINT e0rows EXCLUDE USING btree\n   ((s.index_this_expr(y, s.const())) COLLATE s.coll WITH s.=)\n-  USING INDEX TABLESPACE regress_create_idx_tblspace\n   WHERE (s.index_row_if(y));\n -- Make the table nonempty.\n INSERT INTO s.x VALUES ('foo'), ('bar');\n@@ -68,11 +62,9 @@ RESET search_path;\n GRANT EXECUTE ON FUNCTION s.index_this_expr TO regress_minimal;\n CREATE UNIQUE INDEX u2rows ON s.x USING btree\n   ((s.index_this_expr(y, s.const())) COLLATE s.coll s.citext_pattern_ops)\n-  TABLESPACE regress_create_idx_tblspace\n   WHERE s.index_row_if(y);\n ALTER TABLE s.x ADD CONSTRAINT e2rows EXCLUDE USING btree\n   ((s.index_this_expr(y, s.const())) COLLATE s.coll WITH s.=)\n-  USING INDEX TABLESPACE regress_create_idx_tblspace\n   WHERE (s.index_row_if(y));\n -- Shall not find s.coll via search_path, despite the s.const->public.setter\n -- call having set search_path=s during expression planning.  Suppress the\n@@ -80,9 +72,7 @@ ALTER TABLE s.x ADD CONSTRAINT e2rows EXCLUDE USING btree\n \\set VERBOSITY sqlstate\n ALTER TABLE s.x ADD CONSTRAINT underqualified EXCLUDE USING btree\n   ((s.index_this_expr(y, s.const())) COLLATE coll WITH s.=)\n-  USING INDEX TABLESPACE regress_create_idx_tblspace\n   WHERE (s.index_row_if(y));\n \\set VERBOSITY default\n ROLLBACK;\n \n-DROP TABLESPACE regress_create_idx_tblspace;\ndiff --git a/contrib/file_fdw/expected/file_fdw.out b/contrib/file_fdw/expected/file_fdw.out\nindex 86c148a..81bdb2c 100644\n--- a/contrib/file_fdw/expected/file_fdw.out\n+++ b/contrib/file_fdw/expected/file_fdw.out\n@@ -4,6 +4,7 @@\n -- directory paths are passed to us in environment variables\n \\getenv abs_srcdir PG_ABS_SRCDIR\n -- Clean up in case a prior regression run failed\n+SET compute_query_id TO 'off';\n SET client_min_messages TO 'warning';\n DROP ROLE IF EXISTS regress_file_fdw_superuser, regress_file_fdw_user, regress_no_priv_user;\n RESET client_min_messages;\ndiff --git a/contrib/file_fdw/sql/file_fdw.sql b/contrib/file_fdw/sql/file_fdw.sql\nindex f0548e1..848a08c 100644\n--- a/contrib/file_fdw/sql/file_fdw.sql\n+++ b/contrib/file_fdw/sql/file_fdw.sql\n@@ -6,6 +6,7 @@\n \\getenv abs_srcdir PG_ABS_SRCDIR\n \n -- Clean up in case a prior regression run failed\n+SET compute_query_id TO 'off';\n SET client_min_messages TO 'warning';\n DROP ROLE IF EXISTS regress_file_fdw_superuser, regress_file_fdw_user, regress_no_priv_user;\n RESET client_min_messages;\n"
  },
  {
    "path": "compute/patches/duckdb_v113.patch",
    "content": "diff --git a/libduckdb.map b/libduckdb.map\nnew file mode 100644\nindex 0000000000..3b56f00cd7\n--- /dev/null\n+++ b/libduckdb.map\n@@ -0,0 +1,6 @@\n+DUCKDB_1.1.3 {\n+    global:\n+        *duckdb*;\n+    local:\n+        *;\n+};\ndiff --git a/src/CMakeLists.txt b/src/CMakeLists.txt\nindex 3e757a4bcc..88ab4005b9 100644\n--- a/src/CMakeLists.txt\n+++ b/src/CMakeLists.txt\n@@ -135,6 +135,8 @@ else()\n   target_link_libraries(duckdb ${DUCKDB_LINK_LIBS})\n   link_threads(duckdb)\n   link_extension_libraries(duckdb)\n+  target_link_options(duckdb PRIVATE\n+    -Wl,--version-script=${CMAKE_SOURCE_DIR}/libduckdb.map)\n \n   add_library(duckdb_static STATIC ${ALL_OBJECT_FILES})\n   target_link_libraries(duckdb_static ${DUCKDB_LINK_LIBS})\n"
  },
  {
    "path": "compute/patches/duckdb_v120.patch",
    "content": "diff --git a/libduckdb_pg_duckdb.map b/libduckdb_pg_duckdb.map\nnew file mode 100644\nindex 0000000000..0872978b48\n--- /dev/null\n+++ b/libduckdb_pg_duckdb.map\n@@ -0,0 +1,6 @@\n+DUCKDB_1.2.0 {\n+    global:\n+        *duckdb*;\n+    local:\n+        *;\n+};\ndiff --git a/src/CMakeLists.txt b/src/CMakeLists.txt\nindex 58adef3fc0..2c522f91be 100644\n--- a/src/CMakeLists.txt\n+++ b/src/CMakeLists.txt\n@@ -59,7 +59,7 @@ endfunction()\n \n if(AMALGAMATION_BUILD)\n \n-  add_library(duckdb SHARED \"${PROJECT_SOURCE_DIR}/src/amalgamation/duckdb.cpp\")\n+  add_library(duckdb_pg_duckdb SHARED \"${PROJECT_SOURCE_DIR}/src/amalgamation/duckdb.cpp\")\n   target_link_libraries(duckdb ${DUCKDB_SYSTEM_LIBS})\n   link_threads(duckdb)\n   link_extension_libraries(duckdb)\n@@ -109,7 +109,7 @@ else()\n       duckdb_yyjson\n       duckdb_zstd)\n \n-  add_library(duckdb SHARED ${ALL_OBJECT_FILES})\n+  add_library(duckdb_pg_duckdb SHARED ${ALL_OBJECT_FILES})\n \n   if(WIN32 AND NOT MINGW)\n     ensure_variable_is_number(DUCKDB_MAJOR_VERSION RC_MAJOR_VERSION)\n@@ -131,9 +131,11 @@ else()\n     target_sources(duckdb PRIVATE version.rc)\n   endif()\n \n-  target_link_libraries(duckdb ${DUCKDB_LINK_LIBS})\n-  link_threads(duckdb)\n-  link_extension_libraries(duckdb)\n+  target_link_libraries(duckdb_pg_duckdb ${DUCKDB_LINK_LIBS})\n+  link_threads(duckdb_pg_duckdb)\n+  link_extension_libraries(duckdb_pg_duckdb)\n+  target_link_options(duckdb_pg_duckdb PRIVATE\n+    -Wl,--version-script=${CMAKE_SOURCE_DIR}/libduckdb_pg_duckdb.map)\n \n   add_library(duckdb_static STATIC ${ALL_OBJECT_FILES})\n   target_link_libraries(duckdb_static ${DUCKDB_LINK_LIBS})\n@@ -141,7 +143,7 @@ else()\n   link_extension_libraries(duckdb_static)\n \n   target_include_directories(\n-    duckdb PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>\n+    duckdb_pg_duckdb PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>\n                   $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>)\n \n   target_include_directories(\n@@ -161,7 +163,7 @@ else()\n endif()\n \n install(\n-  TARGETS duckdb duckdb_static\n+  TARGETS duckdb_pg_duckdb duckdb_static\n   EXPORT \"${DUCKDB_EXPORT_SET}\"\n   LIBRARY DESTINATION \"${INSTALL_LIB_DIR}\"\n   ARCHIVE DESTINATION \"${INSTALL_LIB_DIR}\"\n"
  },
  {
    "path": "compute/patches/onnxruntime.patch",
    "content": "diff --git a/cmake/deps.txt b/cmake/deps.txt\nindex d213b09034..229de2ebf0 100644\n--- a/cmake/deps.txt\n+++ b/cmake/deps.txt\n@@ -22,7 +22,9 @@ dlpack;https://github.com/dmlc/dlpack/archive/refs/tags/v0.6.zip;4d565dd2e5b3132\n # it contains changes on top of 3.4.0 which are required to fix build issues.\n # Until the 3.4.1 release this is the best option we have.\n # Issue link: https://gitlab.com/libeigen/eigen/-/issues/2744\n-eigen;https://gitlab.com/libeigen/eigen/-/archive/e7248b26a1ed53fa030c5c459f7ea095dfd276ac/eigen-e7248b26a1ed53fa030c5c459f7ea095dfd276ac.zip;be8be39fdbc6e60e94fa7870b280707069b5b81a\n+# Moved to github mirror to avoid gitlab issues.Add commentMore actions\n+# Issue link: https://github.com/bazelbuild/bazel-central-registry/issues/4355\n+eigen;https://github.com/eigen-mirror/eigen/archive/e7248b26a1ed53fa030c5c459f7ea095dfd276ac/eigen-e7248b26a1ed53fa030c5c459f7ea095dfd276ac.zip;61418a349000ba7744a3ad03cf5071f22ebf860a\n flatbuffers;https://github.com/google/flatbuffers/archive/refs/tags/v23.5.26.zip;59422c3b5e573dd192fead2834d25951f1c1670c\n fp16;https://github.com/Maratyszcza/FP16/archive/0a92994d729ff76a58f692d3028ca1b64b145d91.zip;b985f6985a05a1c03ff1bb71190f66d8f98a1494\n fxdiv;https://github.com/Maratyszcza/FXdiv/archive/63058eff77e11aa15bf531df5dd34395ec3017c8.zip;a5658f4036402dbca7cebee32be57fb8149811e1\n"
  },
  {
    "path": "compute/patches/pg_cron.patch",
    "content": "commit b3ea51ee158f113f2f82d0b97c12c54343c9a695 (HEAD -> master)\nAuthor: Alexey Masterov <alexeymasterov@neon.tech>\nDate:   Fri Jun 7 19:23:42 2024 +0000\n\n    Disable REGRESS_OPTIONS causing initdb\n\ndiff --git a/ext-src/pg_cron-src/Makefile b/ext-src/pg_cron-src/Makefile\nindex 053314c..fbd5fb5 100644\n--- a/ext-src/pg_cron-src/Makefile\n+++ b/ext-src/pg_cron-src/Makefile\n@@ -5,7 +5,7 @@ EXTENSION = pg_cron\n DATA_built = $(EXTENSION)--1.0.sql\n DATA = $(wildcard $(EXTENSION)--*--*.sql)\n \n-REGRESS_OPTS =--temp-config=./pg_cron.conf --temp-instance=./tmp_check\n+#REGRESS_OPTS =--temp-config=./pg_cron.conf --temp-instance=./tmp_check\n REGRESS = pg_cron-test \n \n # compilation configuration\n"
  },
  {
    "path": "compute/patches/pg_duckdb_v031.patch",
    "content": "diff --git a/Makefile b/Makefile\nindex 3235cc8..6b892bc 100644\n--- a/Makefile\n+++ b/Makefile\n@@ -32,7 +32,7 @@ else\n \tDUCKDB_BUILD_TYPE = release\n endif\n \n-DUCKDB_LIB = libduckdb$(DLSUFFIX)\n+DUCKDB_LIB = libduckdb_pg_duckdb$(DLSUFFIX)\n FULL_DUCKDB_LIB = third_party/duckdb/build/$(DUCKDB_BUILD_TYPE)/src/$(DUCKDB_LIB)\n \n ERROR_ON_WARNING ?=\n@@ -54,7 +54,7 @@ override PG_CXXFLAGS += -std=c++17 ${DUCKDB_BUILD_CXX_FLAGS} ${COMPILER_FLAGS} -\n # changes to the vendored code in one place.\n override PG_CFLAGS += -Wno-declaration-after-statement\n \n-SHLIB_LINK += -Wl,-rpath,$(PG_LIB)/ -lpq -Lthird_party/duckdb/build/$(DUCKDB_BUILD_TYPE)/src -L$(PG_LIB) -lduckdb -lstdc++ -llz4\n+SHLIB_LINK += -Wl,-rpath,$(PG_LIB)/ -lpq -Lthird_party/duckdb/build/$(DUCKDB_BUILD_TYPE)/src -L$(PG_LIB) -lduckdb_pg_duckdb -lstdc++ -llz4\n \n include Makefile.global\n \ndiff --git a/sql/pg_duckdb--0.2.0--0.3.0.sql b/sql/pg_duckdb--0.2.0--0.3.0.sql\nindex d777d76..3b54396 100644\n--- a/sql/pg_duckdb--0.2.0--0.3.0.sql\n+++ b/sql/pg_duckdb--0.2.0--0.3.0.sql\n@@ -1056,3 +1056,14 @@ GRANT ALL ON FUNCTION duckdb.cache(TEXT, TEXT) TO PUBLIC;\n GRANT ALL ON FUNCTION duckdb.cache_info() TO PUBLIC;\n GRANT ALL ON FUNCTION duckdb.cache_delete(TEXT) TO PUBLIC;\n GRANT ALL ON PROCEDURE duckdb.recycle_ddb() TO PUBLIC;\n+\n+DO $$\n+DECLARE\n+  privileged_role_name text;\n+BEGIN\n+  privileged_role_name := current_setting('neon.privileged_role_name');\n+\n+  EXECUTE format('GRANT ALL ON FUNCTION duckdb.install_extension(TEXT) TO %I', privileged_role_name);\n+  EXECUTE format('GRANT ALL ON TABLE duckdb.extensions TO %I', privileged_role_name);\n+  EXECUTE format('GRANT ALL ON SEQUENCE duckdb.extensions_table_seq TO %I', privileged_role_name);\n+END $$;\n"
  },
  {
    "path": "compute/patches/pg_graphql.patch",
    "content": "commit ec6a491d126882966a696f9ad5d3698935361d55\nAuthor: Alexey Masterov <alexeymasterov@neon.tech>\nDate:   Tue Dec 17 10:25:00 2024 +0100\n\n    Changes required to run tests on Neon\n\ndiff --git a/test/expected/permissions_functions.out b/test/expected/permissions_functions.out\nindex 1e9fbc2..94cbe25 100644\n--- a/test/expected/permissions_functions.out\n+++ b/test/expected/permissions_functions.out\n@@ -64,7 +64,7 @@ begin;\n     select current_user;\n  current_user \n --------------\n- postgres\n+ cloud_admin\n (1 row)\n \n     -- revoke default access from the public role for new functions\n"
  },
  {
    "path": "compute/patches/pg_hint_plan_v16.patch",
    "content": "diff --git a/expected/ut-A.out b/expected/ut-A.out\nindex da723b8..5328114 100644\n--- a/expected/ut-A.out\n+++ b/expected/ut-A.out\n@@ -3175,6 +3178,7 @@ SELECT s.query, s.calls\n   FROM public.pg_stat_statements s\n   JOIN pg_catalog.pg_database d\n     ON (s.dbid = d.oid)\n+  WHERE s.query LIKE 'SELECT * FROM s1.t1%' OR s.query LIKE '%pg_stat_statements_reset%'\n  ORDER BY 1;\n                 query                 | calls \n --------------------------------------+-------\ndiff --git a/sql/ut-A.sql b/sql/ut-A.sql\nindex 7c7d58a..4fd1a07 100644\n--- a/sql/ut-A.sql\n+++ b/sql/ut-A.sql\n@@ -963,6 +963,7 @@ SELECT s.query, s.calls\n   FROM public.pg_stat_statements s\n   JOIN pg_catalog.pg_database d\n     ON (s.dbid = d.oid)\n+  WHERE s.query LIKE 'SELECT * FROM s1.t1%' OR s.query LIKE '%pg_stat_statements_reset%'\n  ORDER BY 1;\n \n ----\n"
  },
  {
    "path": "compute/patches/pg_hint_plan_v17.patch",
    "content": "diff --git a/expected/ut-J.out b/expected/ut-J.out\nindex 2fa3c70..314e929 100644\n--- a/expected/ut-J.out\n+++ b/expected/ut-J.out\n@@ -789,38 +789,6 @@ NestLoop(st1 st2)\n MergeJoin(t1 t2)\n not used hint:\n duplication hint:\n-error hint:\n-\n-LOG:  pg_hint_plan:\n-used hint:\n-not used hint:\n-NestLoop(st1 st2)\n-MergeJoin(t1 t2)\n-duplication hint:\n-error hint:\n-\n-LOG:  pg_hint_plan:\n-used hint:\n-not used hint:\n-NestLoop(st1 st2)\n-MergeJoin(t1 t2)\n-duplication hint:\n-error hint:\n-\n-LOG:  pg_hint_plan:\n-used hint:\n-not used hint:\n-NestLoop(st1 st2)\n-MergeJoin(t1 t2)\n-duplication hint:\n-error hint:\n-\n-LOG:  pg_hint_plan:\n-used hint:\n-not used hint:\n-NestLoop(st1 st2)\n-MergeJoin(t1 t2)\n-duplication hint:\n error hint:\n \n                                        explain_filter                                        \ndiff --git a/expected/ut-S.out b/expected/ut-S.out\nindex 0bfcfb8..e75f581 100644\n--- a/expected/ut-S.out\n+++ b/expected/ut-S.out\n@@ -4415,34 +4415,6 @@ used hint:\n IndexScan(ti1 ti1_pred)\n not used hint:\n duplication hint:\n-error hint:\n-\n-LOG:  pg_hint_plan:\n-used hint:\n-not used hint:\n-IndexScan(ti1 ti1_pred)\n-duplication hint:\n-error hint:\n-\n-LOG:  pg_hint_plan:\n-used hint:\n-not used hint:\n-IndexScan(ti1 ti1_pred)\n-duplication hint:\n-error hint:\n-\n-LOG:  pg_hint_plan:\n-used hint:\n-not used hint:\n-IndexScan(ti1 ti1_pred)\n-duplication hint:\n-error hint:\n-\n-LOG:  pg_hint_plan:\n-used hint:\n-not used hint:\n-IndexScan(ti1 ti1_pred)\n-duplication hint:\n error hint:\n \n                     explain_filter                     \ndiff --git a/expected/ut-W.out b/expected/ut-W.out\nindex a09bd34..0ad227c 100644\n--- a/expected/ut-W.out\n+++ b/expected/ut-W.out\n@@ -1341,54 +1341,6 @@ IndexScan(ft1)\n IndexScan(t)\n Parallel(s1 3 hard)\n duplication hint:\n-error hint:\n-\n-LOG:  pg_hint_plan:\n-used hint:\n-not used hint:\n-IndexScan(*VALUES*)\n-SeqScan(cte1)\n-IndexScan(ft1)\n-IndexScan(t)\n-Parallel(p1 5 hard)\n-Parallel(s1 3 hard)\n-duplication hint:\n-error hint:\n-\n-LOG:  pg_hint_plan:\n-used hint:\n-not used hint:\n-IndexScan(*VALUES*)\n-SeqScan(cte1)\n-IndexScan(ft1)\n-IndexScan(t)\n-Parallel(p1 5 hard)\n-Parallel(s1 3 hard)\n-duplication hint:\n-error hint:\n-\n-LOG:  pg_hint_plan:\n-used hint:\n-not used hint:\n-IndexScan(*VALUES*)\n-SeqScan(cte1)\n-IndexScan(ft1)\n-IndexScan(t)\n-Parallel(p1 5 hard)\n-Parallel(s1 3 hard)\n-duplication hint:\n-error hint:\n-\n-LOG:  pg_hint_plan:\n-used hint:\n-not used hint:\n-IndexScan(*VALUES*)\n-SeqScan(cte1)\n-IndexScan(ft1)\n-IndexScan(t)\n-Parallel(p1 5 hard)\n-Parallel(s1 3 hard)\n-duplication hint:\n error hint:\n \n                     explain_filter                    \n"
  },
  {
    "path": "compute/patches/pg_repack.patch",
    "content": "commit 5eb393810cf7c7bafa4e394dad2e349e2a8cb2cb\nAuthor: Alexey Masterov <alexey.masterov@databricks.com>\nDate:   Mon Jul 28 18:11:02 2025 +0200\n\n    Patch for pg_repack\n\ndiff --git a/regress/Makefile b/regress/Makefile\nindex bf6edcb..110e734 100644\n--- a/regress/Makefile\n+++ b/regress/Makefile\n@@ -17,7 +17,7 @@ INTVERSION := $(shell echo $$(($$(echo $(VERSION).0 | sed 's/\\([[:digit:]]\\{1,\\}\n # Test suite\n #\n \n-REGRESS := init-extension repack-setup repack-run error-on-invalid-idx no-error-on-invalid-idx after-schema repack-check nosuper tablespace get_order_by trigger\n+REGRESS := init-extension noautovacuum repack-setup repack-run error-on-invalid-idx no-error-on-invalid-idx after-schema repack-check nosuper get_order_by trigger autovacuum\n \n USE_PGXS = 1\t# use pgxs if not in contrib directory\n PGXS := $(shell $(PG_CONFIG) --pgxs)\ndiff --git a/regress/expected/autovacuum.out b/regress/expected/autovacuum.out\nnew file mode 100644\nindex 0000000..e7f2363\n--- /dev/null\n+++ b/regress/expected/autovacuum.out\n@@ -0,0 +1,7 @@\n+ALTER SYSTEM SET autovacuum='on';\n+SELECT pg_reload_conf();\n+ pg_reload_conf \n+----------------\n+ t\n+(1 row)\n+\ndiff --git a/regress/expected/noautovacuum.out b/regress/expected/noautovacuum.out\nnew file mode 100644\nindex 0000000..fc7978e\n--- /dev/null\n+++ b/regress/expected/noautovacuum.out\n@@ -0,0 +1,7 @@\n+ALTER SYSTEM SET autovacuum='off';\n+SELECT pg_reload_conf();\n+ pg_reload_conf \n+----------------\n+ t\n+(1 row)\n+\ndiff --git a/regress/expected/nosuper.out b/regress/expected/nosuper.out\nindex 8d0a94e..63b68bf 100644\n--- a/regress/expected/nosuper.out\n+++ b/regress/expected/nosuper.out\n@@ -4,22 +4,22 @@\n SET client_min_messages = error;\n DROP ROLE IF EXISTS nosuper;\n SET client_min_messages = warning;\n-CREATE ROLE nosuper WITH LOGIN;\n+CREATE ROLE nosuper WITH LOGIN PASSWORD 'NoSuPeRpAsSwOrD';\n -- => OK\n \\! pg_repack --dbname=contrib_regression --table=tbl_cluster --no-superuser-check\n INFO: repacking table \"public.tbl_cluster\"\n -- => ERROR\n-\\! pg_repack --dbname=contrib_regression --table=tbl_cluster --username=nosuper\n+\\! PGPASSWORD=NoSuPeRpAsSwOrD pg_repack --dbname=contrib_regression --table=tbl_cluster --username=nosuper\n ERROR: pg_repack failed with error: You must be a superuser to use pg_repack\n -- => ERROR\n-\\! pg_repack --dbname=contrib_regression --table=tbl_cluster --username=nosuper --no-superuser-check\n+\\! PGPASSWORD=NoSuPeRpAsSwOrD pg_repack --dbname=contrib_regression --table=tbl_cluster --username=nosuper --no-superuser-check\n ERROR: pg_repack failed with error: ERROR:  permission denied for schema repack\n LINE 1: select repack.version(), repack.version_sql()\n                ^\n GRANT ALL ON ALL TABLES IN SCHEMA repack TO nosuper;\n GRANT USAGE ON SCHEMA repack TO nosuper;\n -- => ERROR\n-\\! pg_repack --dbname=contrib_regression --table=tbl_cluster --username=nosuper --no-superuser-check\n+\\! PGPASSWORD=NoSuPeRpAsSwOrD pg_repack --dbname=contrib_regression --table=tbl_cluster --username=nosuper --no-superuser-check\n INFO: repacking table \"public.tbl_cluster\"\n ERROR: query failed: ERROR:  current transaction is aborted, commands ignored until end of transaction block\n DETAIL: query was: RESET lock_timeout\ndiff --git a/regress/sql/autovacuum.sql b/regress/sql/autovacuum.sql\nnew file mode 100644\nindex 0000000..a8eda63\n--- /dev/null\n+++ b/regress/sql/autovacuum.sql\n@@ -0,0 +1,2 @@\n+ALTER SYSTEM SET autovacuum='on';\n+SELECT pg_reload_conf();\ndiff --git a/regress/sql/noautovacuum.sql b/regress/sql/noautovacuum.sql\nnew file mode 100644\nindex 0000000..13d4836\n--- /dev/null\n+++ b/regress/sql/noautovacuum.sql\n@@ -0,0 +1,2 @@\n+ALTER SYSTEM SET autovacuum='off';\n+SELECT pg_reload_conf();\ndiff --git a/regress/sql/nosuper.sql b/regress/sql/nosuper.sql\nindex 072f0fa..dbe60f8 100644\n--- a/regress/sql/nosuper.sql\n+++ b/regress/sql/nosuper.sql\n@@ -4,19 +4,19 @@\n SET client_min_messages = error;\n DROP ROLE IF EXISTS nosuper;\n SET client_min_messages = warning;\n-CREATE ROLE nosuper WITH LOGIN;\n+CREATE ROLE nosuper WITH LOGIN PASSWORD 'NoSuPeRpAsSwOrD';\n -- => OK\n \\! pg_repack --dbname=contrib_regression --table=tbl_cluster --no-superuser-check\n -- => ERROR\n-\\! pg_repack --dbname=contrib_regression --table=tbl_cluster --username=nosuper\n+\\! PGPASSWORD=NoSuPeRpAsSwOrD pg_repack --dbname=contrib_regression --table=tbl_cluster --username=nosuper\n -- => ERROR\n-\\! pg_repack --dbname=contrib_regression --table=tbl_cluster --username=nosuper --no-superuser-check\n+\\! PGPASSWORD=NoSuPeRpAsSwOrD pg_repack --dbname=contrib_regression --table=tbl_cluster --username=nosuper --no-superuser-check\n \n GRANT ALL ON ALL TABLES IN SCHEMA repack TO nosuper;\n GRANT USAGE ON SCHEMA repack TO nosuper;\n \n -- => ERROR\n-\\! pg_repack --dbname=contrib_regression --table=tbl_cluster --username=nosuper --no-superuser-check\n+\\! PGPASSWORD=NoSuPeRpAsSwOrD pg_repack --dbname=contrib_regression --table=tbl_cluster --username=nosuper --no-superuser-check\n \n REVOKE ALL ON ALL TABLES IN SCHEMA repack FROM nosuper;\n REVOKE USAGE ON SCHEMA repack FROM nosuper;\n"
  },
  {
    "path": "compute/patches/pg_stat_statements_pg14-16.patch",
    "content": "diff --git a/contrib/pg_stat_statements/pg_stat_statements--1.4.sql b/contrib/pg_stat_statements/pg_stat_statements--1.4.sql\nindex 58cdf600fce..8be57a996f6 100644\n--- a/contrib/pg_stat_statements/pg_stat_statements--1.4.sql\n+++ b/contrib/pg_stat_statements/pg_stat_statements--1.4.sql\n@@ -46,3 +46,12 @@ GRANT SELECT ON pg_stat_statements TO PUBLIC;\n \n -- Don't want this to be available to non-superusers.\n REVOKE ALL ON FUNCTION pg_stat_statements_reset() FROM PUBLIC;\n+\n+DO $$\n+DECLARE\n+  privileged_role_name text;\n+BEGIN\n+  privileged_role_name := current_setting('neon.privileged_role_name');\n+\n+  EXECUTE format('GRANT EXECUTE ON FUNCTION pg_stat_statements_reset() TO %I', privileged_role_name);\n+END $$;\ndiff --git a/contrib/pg_stat_statements/pg_stat_statements--1.6--1.7.sql b/contrib/pg_stat_statements/pg_stat_statements--1.6--1.7.sql\nindex 6fc3fed4c93..256345a8f79 100644\n--- a/contrib/pg_stat_statements/pg_stat_statements--1.6--1.7.sql\n+++ b/contrib/pg_stat_statements/pg_stat_statements--1.6--1.7.sql\n@@ -20,3 +20,12 @@ LANGUAGE C STRICT PARALLEL SAFE;\n \n -- Don't want this to be available to non-superusers.\n REVOKE ALL ON FUNCTION pg_stat_statements_reset(Oid, Oid, bigint) FROM PUBLIC;\n+\n+DO $$\n+DECLARE\n+  privileged_role_name text;\n+BEGIN\n+  privileged_role_name := current_setting('neon.privileged_role_name');\n+\n+  EXECUTE format('GRANT EXECUTE ON FUNCTION pg_stat_statements_reset(Oid, Oid, bigint) TO %I', privileged_role_name);\n+END $$;\n"
  },
  {
    "path": "compute/patches/pg_stat_statements_pg17.patch",
    "content": "diff --git a/contrib/pg_stat_statements/pg_stat_statements--1.10--1.11.sql b/contrib/pg_stat_statements/pg_stat_statements--1.10--1.11.sql\nindex 0bb2c397711..32764db1d8b 100644\n--- a/contrib/pg_stat_statements/pg_stat_statements--1.10--1.11.sql\n+++ b/contrib/pg_stat_statements/pg_stat_statements--1.10--1.11.sql\n@@ -80,3 +80,12 @@ LANGUAGE C STRICT PARALLEL SAFE;\n \n -- Don't want this to be available to non-superusers.\n REVOKE ALL ON FUNCTION pg_stat_statements_reset(Oid, Oid, bigint, boolean) FROM PUBLIC;\n+\n+DO $$\n+DECLARE\n+  privileged_role_name text;\n+BEGIN\n+  privileged_role_name := current_setting('neon.privileged_role_name');\n+\n+  EXECUTE format('GRANT EXECUTE ON FUNCTION pg_stat_statements_reset(Oid, Oid, bigint, boolean) TO %I', privileged_role_name);\n+END $$;\n\\ No newline at end of file\ndiff --git a/contrib/pg_stat_statements/pg_stat_statements--1.4.sql b/contrib/pg_stat_statements/pg_stat_statements--1.4.sql\nindex 58cdf600fce..8be57a996f6 100644\n--- a/contrib/pg_stat_statements/pg_stat_statements--1.4.sql\n+++ b/contrib/pg_stat_statements/pg_stat_statements--1.4.sql\n@@ -46,3 +46,12 @@ GRANT SELECT ON pg_stat_statements TO PUBLIC;\n \n -- Don't want this to be available to non-superusers.\n REVOKE ALL ON FUNCTION pg_stat_statements_reset() FROM PUBLIC;\n+\n+DO $$\n+DECLARE\n+  privileged_role_name text;\n+BEGIN\n+  privileged_role_name := current_setting('neon.privileged_role_name');\n+\n+  EXECUTE format('GRANT EXECUTE ON FUNCTION pg_stat_statements_reset() TO %I', privileged_role_name);\n+END $$;\ndiff --git a/contrib/pg_stat_statements/pg_stat_statements--1.6--1.7.sql b/contrib/pg_stat_statements/pg_stat_statements--1.6--1.7.sql\nindex 6fc3fed4c93..256345a8f79 100644\n--- a/contrib/pg_stat_statements/pg_stat_statements--1.6--1.7.sql\n+++ b/contrib/pg_stat_statements/pg_stat_statements--1.6--1.7.sql\n@@ -20,3 +20,12 @@ LANGUAGE C STRICT PARALLEL SAFE;\n \n -- Don't want this to be available to non-superusers.\n REVOKE ALL ON FUNCTION pg_stat_statements_reset(Oid, Oid, bigint) FROM PUBLIC;\n+\n+DO $$\n+DECLARE\n+  privileged_role_name text;\n+BEGIN\n+  privileged_role_name := current_setting('neon.privileged_role_name');\n+\n+  EXECUTE format('GRANT EXECUTE ON FUNCTION pg_stat_statements_reset(Oid, Oid, bigint) TO %I', privileged_role_name);\n+END $$;\n"
  },
  {
    "path": "compute/patches/pgaudit-parallel_workers-v14.patch",
    "content": "commit 7220bb3a3f23fa27207d77562dcc286f9a123313\nAuthor: Tristan Partin <tristan.partin@databricks.com>\nDate:   2025-06-23 02:09:31 +0000\n\n    Disable logging in parallel workers\n    \n    When a query uses parallel workers, pgaudit will log the same query for\n    every parallel worker. This is undesireable since it can result in log\n    amplification for queries that use parallel workers.\n    \n    Signed-off-by: Tristan Partin <tristan.partin@databricks.com>\n\ndiff --git a/expected/pgaudit.out b/expected/pgaudit.out\nindex baa8011..a601375 100644\n--- a/expected/pgaudit.out\n+++ b/expected/pgaudit.out\n@@ -2563,6 +2563,37 @@ COMMIT;\n NOTICE:  AUDIT: SESSION,12,4,MISC,COMMIT,,,COMMIT;,<not logged>\n DROP TABLE part_test;\n NOTICE:  AUDIT: SESSION,13,1,DDL,DROP TABLE,,,DROP TABLE part_test;,<not logged>\n+--\n+-- Test logging in parallel workers\n+SET pgaudit.log = 'read';\n+SET pgaudit.log_client = on;\n+SET pgaudit.log_level = 'notice';\n+-- Force parallel execution for testing\n+SET max_parallel_workers_per_gather = 2;\n+SET parallel_tuple_cost = 0;\n+SET parallel_setup_cost = 0;\n+SET min_parallel_table_scan_size = 0;\n+SET min_parallel_index_scan_size = 0;\n+-- Create table with enough data to trigger parallel execution\n+CREATE TABLE parallel_test (id int, data text);\n+INSERT INTO parallel_test SELECT generate_series(1, 1000), 'test data';\n+SELECT count(*) FROM parallel_test;\n+NOTICE:  AUDIT: SESSION,14,1,READ,SELECT,,,SELECT count(*) FROM parallel_test;,<not logged>\n+ count \n+-------\n+  1000\n+(1 row)\n+\n+-- Cleanup parallel test\n+DROP TABLE parallel_test;\n+RESET max_parallel_workers_per_gather;\n+RESET parallel_tuple_cost;\n+RESET parallel_setup_cost;\n+RESET min_parallel_table_scan_size;\n+RESET min_parallel_index_scan_size;\n+RESET pgaudit.log;\n+RESET pgaudit.log_client;\n+RESET pgaudit.log_level;\n -- Cleanup\n -- Set client_min_messages up to warning to avoid noise\n SET client_min_messages = 'warning';\ndiff --git a/pgaudit.c b/pgaudit.c\nindex 5e6fd38..ac9ded2 100644\n--- a/pgaudit.c\n+++ b/pgaudit.c\n@@ -11,6 +11,7 @@\n #include \"postgres.h\"\n \n #include \"access/htup_details.h\"\n+#include \"access/parallel.h\"\n #include \"access/sysattr.h\"\n #include \"access/xact.h\"\n #include \"access/relation.h\"\n@@ -1303,7 +1304,7 @@ pgaudit_ExecutorStart_hook(QueryDesc *queryDesc, int eflags)\n {\n     AuditEventStackItem *stackItem = NULL;\n \n-    if (!internalStatement)\n+    if (!internalStatement && !IsParallelWorker())\n     {\n         /* Push the audit even onto the stack */\n         stackItem = stack_push();\n@@ -1384,7 +1385,7 @@ pgaudit_ExecutorCheckPerms_hook(List *rangeTabls, bool abort)\n \n     /* Log DML if the audit role is valid or session logging is enabled */\n     if ((auditOid != InvalidOid || auditLogBitmap != 0) &&\n-        !IsAbortedTransactionBlockState())\n+        !IsAbortedTransactionBlockState() && !IsParallelWorker())\n     {\n         /* If auditLogRows is on, wait for rows processed to be set */\n         if (auditLogRows && auditEventStack != NULL)\n@@ -1438,7 +1439,7 @@ pgaudit_ExecutorRun_hook(QueryDesc *queryDesc, ScanDirection direction, uint64 c\n     else\n         standard_ExecutorRun(queryDesc, direction, count, execute_once);\n \n-    if (auditLogRows && !internalStatement)\n+    if (auditLogRows && !internalStatement && !IsParallelWorker())\n     {\n         /* Find an item from the stack by the query memory context */\n         stackItem = stack_find_context(queryDesc->estate->es_query_cxt);\n@@ -1458,7 +1459,7 @@ pgaudit_ExecutorEnd_hook(QueryDesc *queryDesc)\n     AuditEventStackItem *stackItem = NULL;\n     AuditEventStackItem *auditEventStackFull = NULL;\n \n-    if (auditLogRows && !internalStatement)\n+    if (auditLogRows && !internalStatement && !IsParallelWorker())\n     {\n         /* Find an item from the stack by the query memory context */\n         stackItem = stack_find_context(queryDesc->estate->es_query_cxt);\ndiff --git a/sql/pgaudit.sql b/sql/pgaudit.sql\nindex cc1374a..1870a60 100644\n--- a/sql/pgaudit.sql\n+++ b/sql/pgaudit.sql\n@@ -1612,6 +1612,36 @@ COMMIT;\n \n DROP TABLE part_test;\n \n+--\n+-- Test logging in parallel workers\n+SET pgaudit.log = 'read';\n+SET pgaudit.log_client = on;\n+SET pgaudit.log_level = 'notice';\n+\n+-- Force parallel execution for testing\n+SET max_parallel_workers_per_gather = 2;\n+SET parallel_tuple_cost = 0;\n+SET parallel_setup_cost = 0;\n+SET min_parallel_table_scan_size = 0;\n+SET min_parallel_index_scan_size = 0;\n+\n+-- Create table with enough data to trigger parallel execution\n+CREATE TABLE parallel_test (id int, data text);\n+INSERT INTO parallel_test SELECT generate_series(1, 1000), 'test data';\n+\n+SELECT count(*) FROM parallel_test;\n+\n+-- Cleanup parallel test\n+DROP TABLE parallel_test;\n+RESET max_parallel_workers_per_gather;\n+RESET parallel_tuple_cost;\n+RESET parallel_setup_cost;\n+RESET min_parallel_table_scan_size;\n+RESET min_parallel_index_scan_size;\n+RESET pgaudit.log;\n+RESET pgaudit.log_client;\n+RESET pgaudit.log_level;\n+\n -- Cleanup\n -- Set client_min_messages up to warning to avoid noise\n SET client_min_messages = 'warning';\n"
  },
  {
    "path": "compute/patches/pgaudit-parallel_workers-v15.patch",
    "content": "commit 29dc2847f6255541992f18faf8a815dfab79631a\nAuthor: Tristan Partin <tristan.partin@databricks.com>\nDate:   2025-06-23 02:09:31 +0000\n\n    Disable logging in parallel workers\n    \n    When a query uses parallel workers, pgaudit will log the same query for\n    every parallel worker. This is undesireable since it can result in log\n    amplification for queries that use parallel workers.\n    \n    Signed-off-by: Tristan Partin <tristan.partin@databricks.com>\n\ndiff --git a/expected/pgaudit.out b/expected/pgaudit.out\nindex b22560b..73f0327 100644\n--- a/expected/pgaudit.out\n+++ b/expected/pgaudit.out\n@@ -2563,6 +2563,37 @@ COMMIT;\n NOTICE:  AUDIT: SESSION,12,4,MISC,COMMIT,,,COMMIT;,<not logged>\n DROP TABLE part_test;\n NOTICE:  AUDIT: SESSION,13,1,DDL,DROP TABLE,,,DROP TABLE part_test;,<not logged>\n+--\n+-- Test logging in parallel workers\n+SET pgaudit.log = 'read';\n+SET pgaudit.log_client = on;\n+SET pgaudit.log_level = 'notice';\n+-- Force parallel execution for testing\n+SET max_parallel_workers_per_gather = 2;\n+SET parallel_tuple_cost = 0;\n+SET parallel_setup_cost = 0;\n+SET min_parallel_table_scan_size = 0;\n+SET min_parallel_index_scan_size = 0;\n+-- Create table with enough data to trigger parallel execution\n+CREATE TABLE parallel_test (id int, data text);\n+INSERT INTO parallel_test SELECT generate_series(1, 1000), 'test data';\n+SELECT count(*) FROM parallel_test;\n+NOTICE:  AUDIT: SESSION,14,1,READ,SELECT,,,SELECT count(*) FROM parallel_test;,<not logged>\n+ count \n+-------\n+  1000\n+(1 row)\n+\n+-- Cleanup parallel test\n+DROP TABLE parallel_test;\n+RESET max_parallel_workers_per_gather;\n+RESET parallel_tuple_cost;\n+RESET parallel_setup_cost;\n+RESET min_parallel_table_scan_size;\n+RESET min_parallel_index_scan_size;\n+RESET pgaudit.log;\n+RESET pgaudit.log_client;\n+RESET pgaudit.log_level;\n -- Cleanup\n -- Set client_min_messages up to warning to avoid noise\n SET client_min_messages = 'warning';\ndiff --git a/pgaudit.c b/pgaudit.c\nindex 5e6fd38..ac9ded2 100644\n--- a/pgaudit.c\n+++ b/pgaudit.c\n@@ -11,6 +11,7 @@\n #include \"postgres.h\"\n \n #include \"access/htup_details.h\"\n+#include \"access/parallel.h\"\n #include \"access/sysattr.h\"\n #include \"access/xact.h\"\n #include \"access/relation.h\"\n@@ -1303,7 +1304,7 @@ pgaudit_ExecutorStart_hook(QueryDesc *queryDesc, int eflags)\n {\n     AuditEventStackItem *stackItem = NULL;\n \n-    if (!internalStatement)\n+    if (!internalStatement && !IsParallelWorker())\n     {\n         /* Push the audit even onto the stack */\n         stackItem = stack_push();\n@@ -1384,7 +1385,7 @@ pgaudit_ExecutorCheckPerms_hook(List *rangeTabls, bool abort)\n \n     /* Log DML if the audit role is valid or session logging is enabled */\n     if ((auditOid != InvalidOid || auditLogBitmap != 0) &&\n-        !IsAbortedTransactionBlockState())\n+        !IsAbortedTransactionBlockState() && !IsParallelWorker())\n     {\n         /* If auditLogRows is on, wait for rows processed to be set */\n         if (auditLogRows && auditEventStack != NULL)\n@@ -1438,7 +1439,7 @@ pgaudit_ExecutorRun_hook(QueryDesc *queryDesc, ScanDirection direction, uint64 c\n     else\n         standard_ExecutorRun(queryDesc, direction, count, execute_once);\n \n-    if (auditLogRows && !internalStatement)\n+    if (auditLogRows && !internalStatement && !IsParallelWorker())\n     {\n         /* Find an item from the stack by the query memory context */\n         stackItem = stack_find_context(queryDesc->estate->es_query_cxt);\n@@ -1458,7 +1459,7 @@ pgaudit_ExecutorEnd_hook(QueryDesc *queryDesc)\n     AuditEventStackItem *stackItem = NULL;\n     AuditEventStackItem *auditEventStackFull = NULL;\n \n-    if (auditLogRows && !internalStatement)\n+    if (auditLogRows && !internalStatement && !IsParallelWorker())\n     {\n         /* Find an item from the stack by the query memory context */\n         stackItem = stack_find_context(queryDesc->estate->es_query_cxt);\ndiff --git a/sql/pgaudit.sql b/sql/pgaudit.sql\nindex 8052426..7f0667b 100644\n--- a/sql/pgaudit.sql\n+++ b/sql/pgaudit.sql\n@@ -1612,6 +1612,36 @@ COMMIT;\n \n DROP TABLE part_test;\n \n+--\n+-- Test logging in parallel workers\n+SET pgaudit.log = 'read';\n+SET pgaudit.log_client = on;\n+SET pgaudit.log_level = 'notice';\n+\n+-- Force parallel execution for testing\n+SET max_parallel_workers_per_gather = 2;\n+SET parallel_tuple_cost = 0;\n+SET parallel_setup_cost = 0;\n+SET min_parallel_table_scan_size = 0;\n+SET min_parallel_index_scan_size = 0;\n+\n+-- Create table with enough data to trigger parallel execution\n+CREATE TABLE parallel_test (id int, data text);\n+INSERT INTO parallel_test SELECT generate_series(1, 1000), 'test data';\n+\n+SELECT count(*) FROM parallel_test;\n+\n+-- Cleanup parallel test\n+DROP TABLE parallel_test;\n+RESET max_parallel_workers_per_gather;\n+RESET parallel_tuple_cost;\n+RESET parallel_setup_cost;\n+RESET min_parallel_table_scan_size;\n+RESET min_parallel_index_scan_size;\n+RESET pgaudit.log;\n+RESET pgaudit.log_client;\n+RESET pgaudit.log_level;\n+\n -- Cleanup\n -- Set client_min_messages up to warning to avoid noise\n SET client_min_messages = 'warning';\n"
  },
  {
    "path": "compute/patches/pgaudit-parallel_workers-v16.patch",
    "content": "commit cc708dde7ef2af2a8120d757102d2e34c0463a0f\nAuthor: Tristan Partin <tristan.partin@databricks.com>\nDate:   2025-06-23 02:09:31 +0000\n\n    Disable logging in parallel workers\n    \n    When a query uses parallel workers, pgaudit will log the same query for\n    every parallel worker. This is undesireable since it can result in log\n    amplification for queries that use parallel workers.\n    \n    Signed-off-by: Tristan Partin <tristan.partin@databricks.com>\n\ndiff --git a/expected/pgaudit.out b/expected/pgaudit.out\nindex 8772054..9b66ac6 100644\n--- a/expected/pgaudit.out\n+++ b/expected/pgaudit.out\n@@ -2556,6 +2556,37 @@ DROP SERVER fdw_server;\n NOTICE:  AUDIT: SESSION,11,1,DDL,DROP SERVER,,,DROP SERVER fdw_server;,<not logged>\n DROP EXTENSION postgres_fdw;\n NOTICE:  AUDIT: SESSION,12,1,DDL,DROP EXTENSION,,,DROP EXTENSION postgres_fdw;,<not logged>\n+--\n+-- Test logging in parallel workers\n+SET pgaudit.log = 'read';\n+SET pgaudit.log_client = on;\n+SET pgaudit.log_level = 'notice';\n+-- Force parallel execution for testing\n+SET max_parallel_workers_per_gather = 2;\n+SET parallel_tuple_cost = 0;\n+SET parallel_setup_cost = 0;\n+SET min_parallel_table_scan_size = 0;\n+SET min_parallel_index_scan_size = 0;\n+-- Create table with enough data to trigger parallel execution\n+CREATE TABLE parallel_test (id int, data text);\n+INSERT INTO parallel_test SELECT generate_series(1, 1000), 'test data';\n+SELECT count(*) FROM parallel_test;\n+NOTICE:  AUDIT: SESSION,13,1,READ,SELECT,,,SELECT count(*) FROM parallel_test;,<not logged>\n+ count \n+-------\n+  1000\n+(1 row)\n+\n+-- Cleanup parallel test\n+DROP TABLE parallel_test;\n+RESET max_parallel_workers_per_gather;\n+RESET parallel_tuple_cost;\n+RESET parallel_setup_cost;\n+RESET min_parallel_table_scan_size;\n+RESET min_parallel_index_scan_size;\n+RESET pgaudit.log;\n+RESET pgaudit.log_client;\n+RESET pgaudit.log_level;\n -- Cleanup\n -- Set client_min_messages up to warning to avoid noise\n SET client_min_messages = 'warning';\ndiff --git a/pgaudit.c b/pgaudit.c\nindex 004d1f9..f061164 100644\n--- a/pgaudit.c\n+++ b/pgaudit.c\n@@ -11,6 +11,7 @@\n #include \"postgres.h\"\n \n #include \"access/htup_details.h\"\n+#include \"access/parallel.h\"\n #include \"access/sysattr.h\"\n #include \"access/xact.h\"\n #include \"access/relation.h\"\n@@ -1339,7 +1340,7 @@ pgaudit_ExecutorStart_hook(QueryDesc *queryDesc, int eflags)\n {\n     AuditEventStackItem *stackItem = NULL;\n \n-    if (!internalStatement)\n+    if (!internalStatement && !IsParallelWorker())\n     {\n         /* Push the audit even onto the stack */\n         stackItem = stack_push();\n@@ -1420,7 +1421,7 @@ pgaudit_ExecutorCheckPerms_hook(List *rangeTabls, List *permInfos, bool abort)\n \n     /* Log DML if the audit role is valid or session logging is enabled */\n     if ((auditOid != InvalidOid || auditLogBitmap != 0) &&\n-        !IsAbortedTransactionBlockState())\n+        !IsAbortedTransactionBlockState() && !IsParallelWorker())\n     {\n         /* If auditLogRows is on, wait for rows processed to be set */\n         if (auditLogRows && auditEventStack != NULL)\n@@ -1475,7 +1476,7 @@ pgaudit_ExecutorRun_hook(QueryDesc *queryDesc, ScanDirection direction, uint64 c\n     else\n         standard_ExecutorRun(queryDesc, direction, count, execute_once);\n \n-    if (auditLogRows && !internalStatement)\n+    if (auditLogRows && !internalStatement && !IsParallelWorker())\n     {\n         /* Find an item from the stack by the query memory context */\n         stackItem = stack_find_context(queryDesc->estate->es_query_cxt);\n@@ -1495,7 +1496,7 @@ pgaudit_ExecutorEnd_hook(QueryDesc *queryDesc)\n     AuditEventStackItem *stackItem = NULL;\n     AuditEventStackItem *auditEventStackFull = NULL;\n \n-    if (auditLogRows && !internalStatement)\n+    if (auditLogRows && !internalStatement && !IsParallelWorker())\n     {\n         /* Find an item from the stack by the query memory context */\n         stackItem = stack_find_context(queryDesc->estate->es_query_cxt);\ndiff --git a/sql/pgaudit.sql b/sql/pgaudit.sql\nindex 6aae88b..de6d7fd 100644\n--- a/sql/pgaudit.sql\n+++ b/sql/pgaudit.sql\n@@ -1631,6 +1631,36 @@ DROP USER MAPPING FOR regress_user1 SERVER fdw_server;\n DROP SERVER fdw_server;\n DROP EXTENSION postgres_fdw;\n \n+--\n+-- Test logging in parallel workers\n+SET pgaudit.log = 'read';\n+SET pgaudit.log_client = on;\n+SET pgaudit.log_level = 'notice';\n+\n+-- Force parallel execution for testing\n+SET max_parallel_workers_per_gather = 2;\n+SET parallel_tuple_cost = 0;\n+SET parallel_setup_cost = 0;\n+SET min_parallel_table_scan_size = 0;\n+SET min_parallel_index_scan_size = 0;\n+\n+-- Create table with enough data to trigger parallel execution\n+CREATE TABLE parallel_test (id int, data text);\n+INSERT INTO parallel_test SELECT generate_series(1, 1000), 'test data';\n+\n+SELECT count(*) FROM parallel_test;\n+\n+-- Cleanup parallel test\n+DROP TABLE parallel_test;\n+RESET max_parallel_workers_per_gather;\n+RESET parallel_tuple_cost;\n+RESET parallel_setup_cost;\n+RESET min_parallel_table_scan_size;\n+RESET min_parallel_index_scan_size;\n+RESET pgaudit.log;\n+RESET pgaudit.log_client;\n+RESET pgaudit.log_level;\n+\n -- Cleanup\n -- Set client_min_messages up to warning to avoid noise\n SET client_min_messages = 'warning';\n"
  },
  {
    "path": "compute/patches/pgaudit-parallel_workers-v17.patch",
    "content": "commit 8d02e4c6c5e1e8676251b0717a46054267091cb4\nAuthor: Tristan Partin <tristan.partin@databricks.com>\nDate:   2025-06-23 02:09:31 +0000\n\n    Disable logging in parallel workers\n    \n    When a query uses parallel workers, pgaudit will log the same query for\n    every parallel worker. This is undesireable since it can result in log\n    amplification for queries that use parallel workers.\n    \n    Signed-off-by: Tristan Partin <tristan.partin@databricks.com>\n\ndiff --git a/expected/pgaudit.out b/expected/pgaudit.out\nindex d696287..4b1059a 100644\n--- a/expected/pgaudit.out\n+++ b/expected/pgaudit.out\n@@ -2568,6 +2568,37 @@ DROP SERVER fdw_server;\n NOTICE:  AUDIT: SESSION,11,1,DDL,DROP SERVER,,,DROP SERVER fdw_server,<not logged>\n DROP EXTENSION postgres_fdw;\n NOTICE:  AUDIT: SESSION,12,1,DDL,DROP EXTENSION,,,DROP EXTENSION postgres_fdw,<not logged>\n+--\n+-- Test logging in parallel workers\n+SET pgaudit.log = 'read';\n+SET pgaudit.log_client = on;\n+SET pgaudit.log_level = 'notice';\n+-- Force parallel execution for testing\n+SET max_parallel_workers_per_gather = 2;\n+SET parallel_tuple_cost = 0;\n+SET parallel_setup_cost = 0;\n+SET min_parallel_table_scan_size = 0;\n+SET min_parallel_index_scan_size = 0;\n+-- Create table with enough data to trigger parallel execution\n+CREATE TABLE parallel_test (id int, data text);\n+INSERT INTO parallel_test SELECT generate_series(1, 1000), 'test data';\n+SELECT count(*) FROM parallel_test;\n+NOTICE:  AUDIT: SESSION,13,1,READ,SELECT,,,SELECT count(*) FROM parallel_test,<not logged>\n+ count \n+-------\n+  1000\n+(1 row)\n+\n+-- Cleanup parallel test\n+DROP TABLE parallel_test;\n+RESET max_parallel_workers_per_gather;\n+RESET parallel_tuple_cost;\n+RESET parallel_setup_cost;\n+RESET min_parallel_table_scan_size;\n+RESET min_parallel_index_scan_size;\n+RESET pgaudit.log;\n+RESET pgaudit.log_client;\n+RESET pgaudit.log_level;\n -- Cleanup\n -- Set client_min_messages up to warning to avoid noise\n SET client_min_messages = 'warning';\ndiff --git a/pgaudit.c b/pgaudit.c\nindex 1764af1..0e48875 100644\n--- a/pgaudit.c\n+++ b/pgaudit.c\n@@ -11,6 +11,7 @@\n #include \"postgres.h\"\n \n #include \"access/htup_details.h\"\n+#include \"access/parallel.h\"\n #include \"access/sysattr.h\"\n #include \"access/xact.h\"\n #include \"access/relation.h\"\n@@ -1406,7 +1407,7 @@ pgaudit_ExecutorStart_hook(QueryDesc *queryDesc, int eflags)\n {\n     AuditEventStackItem *stackItem = NULL;\n \n-    if (!internalStatement)\n+    if (!internalStatement && !IsParallelWorker())\n     {\n         /* Push the audit event onto the stack */\n         stackItem = stack_push();\n@@ -1489,7 +1490,7 @@ pgaudit_ExecutorCheckPerms_hook(List *rangeTabls, List *permInfos, bool abort)\n \n     /* Log DML if the audit role is valid or session logging is enabled */\n     if ((auditOid != InvalidOid || auditLogBitmap != 0) &&\n-        !IsAbortedTransactionBlockState())\n+        !IsAbortedTransactionBlockState() && !IsParallelWorker())\n     {\n         /* If auditLogRows is on, wait for rows processed to be set */\n         if (auditLogRows && auditEventStack != NULL)\n@@ -1544,7 +1545,7 @@ pgaudit_ExecutorRun_hook(QueryDesc *queryDesc, ScanDirection direction, uint64 c\n     else\n         standard_ExecutorRun(queryDesc, direction, count, execute_once);\n \n-    if (auditLogRows && !internalStatement)\n+    if (auditLogRows && !internalStatement && !IsParallelWorker())\n     {\n         /* Find an item from the stack by the query memory context */\n         stackItem = stack_find_context(queryDesc->estate->es_query_cxt);\n@@ -1564,7 +1565,7 @@ pgaudit_ExecutorEnd_hook(QueryDesc *queryDesc)\n     AuditEventStackItem *stackItem = NULL;\n     AuditEventStackItem *auditEventStackFull = NULL;\n \n-    if (auditLogRows && !internalStatement)\n+    if (auditLogRows && !internalStatement && !IsParallelWorker())\n     {\n         /* Find an item from the stack by the query memory context */\n         stackItem = stack_find_context(queryDesc->estate->es_query_cxt);\ndiff --git a/sql/pgaudit.sql b/sql/pgaudit.sql\nindex e161f01..c873098 100644\n--- a/sql/pgaudit.sql\n+++ b/sql/pgaudit.sql\n@@ -1637,6 +1637,36 @@ DROP USER MAPPING FOR regress_user1 SERVER fdw_server;\n DROP SERVER fdw_server;\n DROP EXTENSION postgres_fdw;\n \n+--\n+-- Test logging in parallel workers\n+SET pgaudit.log = 'read';\n+SET pgaudit.log_client = on;\n+SET pgaudit.log_level = 'notice';\n+\n+-- Force parallel execution for testing\n+SET max_parallel_workers_per_gather = 2;\n+SET parallel_tuple_cost = 0;\n+SET parallel_setup_cost = 0;\n+SET min_parallel_table_scan_size = 0;\n+SET min_parallel_index_scan_size = 0;\n+\n+-- Create table with enough data to trigger parallel execution\n+CREATE TABLE parallel_test (id int, data text);\n+INSERT INTO parallel_test SELECT generate_series(1, 1000), 'test data';\n+\n+SELECT count(*) FROM parallel_test;\n+\n+-- Cleanup parallel test\n+DROP TABLE parallel_test;\n+RESET max_parallel_workers_per_gather;\n+RESET parallel_tuple_cost;\n+RESET parallel_setup_cost;\n+RESET min_parallel_table_scan_size;\n+RESET min_parallel_index_scan_size;\n+RESET pgaudit.log;\n+RESET pgaudit.log_client;\n+RESET pgaudit.log_level;\n+\n -- Cleanup\n -- Set client_min_messages up to warning to avoid noise\n SET client_min_messages = 'warning';\n"
  },
  {
    "path": "compute/patches/pgvector.patch",
    "content": "diff --git a/Makefile b/Makefile\nindex 7a4b88c..56678af 100644\n--- a/Makefile\n+++ b/Makefile\n@@ -3,7 +3,10 @@ EXTVERSION = 0.8.0\n \n MODULE_big = vector\n DATA = $(wildcard sql/*--*--*.sql)\n-DATA_built = sql/$(EXTENSION)--$(EXTVERSION).sql\n+# This change is needed to install different per-version SQL files\n+# like pgvector--0.8.0.sql and pgvector--0.7.4.sql\n+# The corresponding file is downloaded during the Docker image build process\n+DATA_built = sql/$(EXTENSION)--$(EXTVERSION).sql sql/vector--0.7.4.sql\n OBJS = src/bitutils.o src/bitvec.o src/halfutils.o src/halfvec.o src/hnsw.o src/hnswbuild.o src/hnswinsert.o src/hnswscan.o src/hnswutils.o src/hnswvacuum.o src/ivfbuild.o src/ivfflat.o src/ivfinsert.o src/ivfkmeans.o src/ivfscan.o src/ivfutils.o src/ivfvacuum.o src/sparsevec.o src/vector.o\n HEADERS = src/halfvec.h src/sparsevec.h src/vector.h\n \ndiff --git a/src/hnswbuild.c b/src/hnswbuild.c\nindex b667478..1298aa1 100644\n--- a/src/hnswbuild.c\n+++ b/src/hnswbuild.c\n@@ -843,9 +843,17 @@ HnswParallelBuildMain(dsm_segment *seg, shm_toc *toc)\n \n \thnswarea = shm_toc_lookup(toc, PARALLEL_KEY_HNSW_AREA, false);\n \n+#ifdef NEON_SMGR\n+\tsmgr_start_unlogged_build(RelationGetSmgr(indexRel));\n+#endif\n+\n \t/* Perform inserts */\n \tHnswParallelScanAndInsert(heapRel, indexRel, hnswshared, hnswarea, false);\n \n+#ifdef NEON_SMGR\n+\tsmgr_finish_unlogged_build_phase_1(RelationGetSmgr(indexRel));\n+#endif\n+\n \t/* Close relations within worker */\n \tindex_close(indexRel, indexLockmode);\n \ttable_close(heapRel, heapLockmode);\n@@ -1100,13 +1108,25 @@ BuildIndex(Relation heap, Relation index, IndexInfo *indexInfo,\n \tSeedRandom(42);\n #endif\n \n+#ifdef NEON_SMGR\n+\tsmgr_start_unlogged_build(RelationGetSmgr(index));\n+#endif\n+\n \tInitBuildState(buildstate, heap, index, indexInfo, forkNum);\n \n \tBuildGraph(buildstate, forkNum);\n \n+#ifdef NEON_SMGR\n+\tsmgr_finish_unlogged_build_phase_1(RelationGetSmgr(index));\n+#endif\n+\n \tif (RelationNeedsWAL(index) || forkNum == INIT_FORKNUM)\n \t\tlog_newpage_range(index, forkNum, 0, RelationGetNumberOfBlocksInFork(index, forkNum), true);\n \n+#ifdef NEON_SMGR\n+\tsmgr_end_unlogged_build(RelationGetSmgr(index));\n+#endif\n+\n \tFreeBuildState(buildstate);\n }\n \n"
  },
  {
    "path": "compute/patches/plv8_v3.1.10.patch",
    "content": "diff --git a/patches/code/84cf3230a9680aac3b73c410c2b758760b6d3066.patch b/patches/code/84cf3230a9680aac3b73c410c2b758760b6d3066.patch\nnew file mode 100644\nindex 0000000..fae1cb3\n--- /dev/null\n+++ b/patches/code/84cf3230a9680aac3b73c410c2b758760b6d3066.patch\n@@ -0,0 +1,30 @@\n+From 84cf3230a9680aac3b73c410c2b758760b6d3066 Mon Sep 17 00:00:00 2001\n+From: Michael Lippautz <mlippautz@chromium.org>\n+Date: Thu, 27 Jan 2022 14:14:11 +0100\n+Subject: [PATCH] cppgc: Fix include\n+\n+Add <utility> to cover for std::exchange.\n+\n+Bug: v8:12585\n+Change-Id: Ida65144e93e466be8914527d0e646f348c136bcb\n+Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3420309\n+Auto-Submit: Michael Lippautz <mlippautz@chromium.org>\n+Reviewed-by: Omer Katz <omerkatz@chromium.org>\n+Commit-Queue: Michael Lippautz <mlippautz@chromium.org>\n+Cr-Commit-Position: refs/heads/main@{#78820}\n+---\n+ src/heap/cppgc/prefinalizer-handler.h | 1 +\n+ 1 file changed, 1 insertion(+)\n+\n+diff --git a/src/heap/cppgc/prefinalizer-handler.h b/src/heap/cppgc/prefinalizer-handler.h\n+index bc17c99b1838..c82c91ff5a45 100644\n+--- a/src/heap/cppgc/prefinalizer-handler.h\n++++ b/src/heap/cppgc/prefinalizer-handler.h\n+@@ -5,6 +5,7 @@\n+ #ifndef V8_HEAP_CPPGC_PREFINALIZER_HANDLER_H_\n+ #define V8_HEAP_CPPGC_PREFINALIZER_HANDLER_H_\n+\n++#include <utility>\n+ #include <vector>\n+\n+ #include \"include/cppgc/prefinalizer.h\"\ndiff --git a/plv8.cc b/plv8.cc\nindex c1ce883..6e47e94 100644\n--- a/plv8.cc\n+++ b/plv8.cc\n@@ -379,7 +379,7 @@ _PG_init(void)\n \t\t\t\t\t\t\t   NULL,\n \t\t\t\t\t\t\t   &plv8_v8_flags,\n \t\t\t\t\t\t\t   NULL,\n-\t\t\t\t\t\t\t   PGC_USERSET, 0,\n+\t\t\t\t\t\t\t   PGC_SUSET, 0,\n #if PG_VERSION_NUM >= 90100\n \t\t\t\t\t\t\t   NULL,\n #endif\n"
  },
  {
    "path": "compute/patches/plv8_v3.2.3.patch",
    "content": "diff --git a/plv8.cc b/plv8.cc\nindex edfa2aa..623e7f2 100644\n--- a/plv8.cc\n+++ b/plv8.cc\n@@ -385,7 +385,7 @@ _PG_init(void)\n                                    NULL,\n                                    &plv8_v8_flags,\n                                    NULL,\n-                                   PGC_USERSET, 0,\n+                                   PGC_SUSET, 0,\n #if PG_VERSION_NUM >= 90100\n                                    NULL,\n #endif\n"
  },
  {
    "path": "compute/patches/postgres_fdw.patch",
    "content": "diff --git a/contrib/postgres_fdw/postgres_fdw--1.0.sql b/contrib/postgres_fdw/postgres_fdw--1.0.sql\nindex a0f0fc1bf45..ee077f2eea6 100644\n--- a/contrib/postgres_fdw/postgres_fdw--1.0.sql\n+++ b/contrib/postgres_fdw/postgres_fdw--1.0.sql\n@@ -16,3 +16,12 @@ LANGUAGE C STRICT;\n CREATE FOREIGN DATA WRAPPER postgres_fdw\n   HANDLER postgres_fdw_handler\n   VALIDATOR postgres_fdw_validator;\n+\n+DO $$\n+DECLARE\n+  privileged_role_name text;\n+BEGIN\n+  privileged_role_name := current_setting('neon.privileged_role_name');\n+\n+  EXECUTE format('GRANT USAGE ON FOREIGN DATA WRAPPER postgres_fdw TO %I', privileged_role_name);\n+END $$;\n"
  },
  {
    "path": "compute/patches/rum.patch",
    "content": "diff --git a/src/ruminsert.c b/src/ruminsert.c\nindex 255e616..1c6edb7 100644\n--- a/src/ruminsert.c\n+++ b/src/ruminsert.c\n@@ -628,6 +628,10 @@ rumbuild(Relation heap, Relation index, struct IndexInfo *indexInfo)\n \t\telog(ERROR, \"index \\\"%s\\\" already contains data\",\n \t\t\t RelationGetRelationName(index));\n \n+#ifdef NEON_SMGR\n+\tsmgr_start_unlogged_build(RelationGetSmgr(index));\n+#endif\n+\n \tinitRumState(&buildstate.rumstate, index);\n \tbuildstate.rumstate.isBuild = true;\n \tbuildstate.indtuples = 0;\n@@ -693,6 +697,10 @@ rumbuild(Relation heap, Relation index, struct IndexInfo *indexInfo)\n \tbuildstate.buildStats.nTotalPages = RelationGetNumberOfBlocks(index);\n \trumUpdateStats(index, &buildstate.buildStats, buildstate.rumstate.isBuild);\n \n+#ifdef NEON_SMGR\n+\tsmgr_finish_unlogged_build_phase_1(RelationGetSmgr(index));\n+#endif\n+\n \t/*\n \t * Write index to xlog\n \t */\n@@ -713,6 +721,10 @@ rumbuild(Relation heap, Relation index, struct IndexInfo *indexInfo)\n \t\tUnlockReleaseBuffer(buffer);\n \t}\n \n+#ifdef NEON_SMGR\n+\tsmgr_end_unlogged_build(RelationGetSmgr(index));\n+#endif\n+\n \t/*\n \t * Return statistics\n \t */\n"
  },
  {
    "path": "compute/vm-image-spec-bookworm.yaml",
    "content": "# Supplemental file for neondatabase/autoscaling's vm-builder, for producing the VM compute image.\n---\ncommands:\n  - name: cgconfigparser\n    user: root\n    sysvInitAction: sysinit\n    shell: 'cgconfigparser -l /etc/cgconfig.conf -s 1664'\n  # restrict permissions on /neonvm/bin/resize-swap, because we grant access to compute_ctl for\n  # running it as root.\n  - name: chmod-resize-swap\n    user: root\n    sysvInitAction: sysinit\n    shell: 'chmod 711 /neonvm/bin/resize-swap'\n  - name: chmod-set-disk-quota\n    user: root\n    sysvInitAction: sysinit\n    shell: 'chmod 711 /neonvm/bin/set-disk-quota'\n  - name: pgbouncer\n    user: postgres\n    sysvInitAction: respawn\n    shell: '/usr/local/bin/pgbouncer /etc/pgbouncer.ini 2>&1 > /dev/virtio-ports/tech.neon.log.0'\n  - name: local_proxy\n    user: postgres\n    sysvInitAction: respawn\n    shell: 'RUST_LOG=\"error\" /usr/local/bin/local_proxy --config-path /etc/local_proxy/config.json --pid-path /etc/local_proxy/pid --http 0.0.0.0:10432'\n  - name: postgres-exporter\n    user: nobody\n    sysvInitAction: respawn\n    # Turn off database collector (`--no-collector.database`), we don't use `pg_database_size_bytes` metric anyway, see\n    # https://github.com/neondatabase/flux-fleet/blob/5e19b3fd897667b70d9a7ad4aa06df0ca22b49ff/apps/base/compute-metrics/scrape-compute-pg-exporter-neon.yaml#L29\n    # but it's enabled by default and it doesn't filter out invalid databases, see\n    # https://github.com/prometheus-community/postgres_exporter/blob/06a553c8166512c9d9c5ccf257b0f9bba8751dbc/collector/pg_database.go#L67\n    # so if it hits one, it starts spamming logs\n    #   ERROR:  [NEON_SMGR] [reqid d9700000018] could not read db size of db 705302 from page server at lsn 5/A2457EB0\n    shell: 'DATA_SOURCE_NAME=\"user=cloud_admin sslmode=disable dbname=postgres application_name=postgres-exporter pgaudit.log=none\" /bin/postgres_exporter --no-collector.database --config.file=/etc/postgres_exporter.yml'\n  - name: pgbouncer-exporter\n    user: postgres\n    sysvInitAction: respawn\n    shell: '/bin/pgbouncer_exporter --pgBouncer.connectionString=\"postgres:///pgbouncer?host=/tmp&port=6432&dbname=pgbouncer&user=pgbouncer\"'\n  - name: sql-exporter\n    user: nobody\n    sysvInitAction: respawn\n    shell: '/bin/sql_exporter -config.file=/etc/sql_exporter.yml -web.listen-address=:9399'\n  - name: sql-exporter-autoscaling\n    user: nobody\n    sysvInitAction: respawn\n    shell: '/bin/sql_exporter -config.file=/etc/sql_exporter_autoscaling.yml -web.listen-address=:9499'\n  # Rsyslog by default creates a unix socket under /dev/log . That's where Postgres sends logs also.\n  # We run syslog with postgres user so it can't create /dev/log. Instead we configure rsyslog to\n  # use a different path for the socket. The symlink actually points to our custom path.\n  - name: rsyslogd-socket-symlink\n    user: root\n    sysvInitAction: sysinit\n    shell: \"ln -s /var/db/postgres/rsyslogpipe /dev/log\"\n  - name: rsyslogd\n    user: postgres\n    sysvInitAction: respawn\n    shell: '/usr/sbin/rsyslogd -n -i /var/run/rsyslogd/rsyslogd.pid -f /etc/compute_rsyslog.conf'\nshutdownHook: |\n  su -p postgres --session-command '/usr/local/bin/pg_ctl stop -D /var/db/postgres/compute/pgdata -m fast --wait -t 10'\nfiles:\n  - filename: compute_ctl-sudoers\n    content: |\n      # Reverse hostname lookup doesn't currently work, and isn't needed anyway when all\n      # the rules use ALL as the hostname. Avoid the pointless lookups and the \"unable to\n      # resolve host\" log messages that they generate.\n      Defaults !fqdn\n\n      # Allow postgres user (which is what compute_ctl runs as) to run /neonvm/bin/resize-swap\n      # and /neonvm/bin/set-disk-quota as root without requiring entering a password (NOPASSWD),\n      # regardless of hostname (ALL)\n      #\n      # Also allow it to shut down the VM. The fast_import job does that when it's finished.\n      postgres ALL=(root) NOPASSWD: /neonvm/bin/resize-swap, /neonvm/bin/set-disk-quota, /neonvm/bin/poweroff, /usr/sbin/rsyslogd\n  - filename: cgconfig.conf\n    content: |\n      # Configuration for cgroups in VM compute nodes\n      group neon-postgres {\n          perm {\n              admin {\n                  uid = postgres;\n              }\n              task {\n                  gid = users;\n              }\n          }\n          memory {}\n      }\n# Create dummy rsyslog config, because it refuses to start without at least one action configured.\n# compute_ctl will rewrite this file with the actual configuration, if needed.\n  - filename: compute_rsyslog.conf\n    content: |\n      # Syslock.Name specifies a non-default pipe location that is writeable for the postgres user.\n      module(load=\"imuxsock\" SysSock.Name=\"/var/db/postgres/rsyslogpipe\") # provides support for local system logging\n\n      *.*    /dev/null\n      $IncludeConfig /etc/rsyslog.d/*.conf\nbuild: |\n  # Build cgroup-tools\n  #\n  # At time of writing (2023-03-14), debian bullseye has a version of cgroup-tools (technically\n  # libcgroup) that doesn't support cgroup v2 (version 0.41-11). Unfortunately, the vm-monitor\n  # requires cgroup v2, so we'll build cgroup-tools ourselves.\n  #\n  # At time of migration to bookworm (2024-10-09), debian has a version of libcgroup/cgroup-tools 2.0.2,\n  # and it _probably_ can be used as-is. However, we'll build it ourselves to minimise the changeset\n  # for debian version migration.\n  ARG BOOKWORM_SLIM_SHA=sha256:40b107342c492725bc7aacbe93a49945445191ae364184a6d24fedb28172f6f7\n  FROM debian@$BOOKWORM_SLIM_SHA as libcgroup-builder\n  ENV LIBCGROUP_VERSION=v2.0.3\n\n  RUN set -exu \\\n      && apt update \\\n      && apt install --no-install-recommends -y \\\n          git \\\n          ca-certificates \\\n          automake \\\n          cmake \\\n          make \\\n          gcc \\\n          byacc \\\n          flex \\\n          libtool \\\n          libpam0g-dev \\\n      && git clone --depth 1 -b $LIBCGROUP_VERSION https://github.com/libcgroup/libcgroup \\\n      && INSTALL_DIR=\"/libcgroup-install\" \\\n      && mkdir -p \"$INSTALL_DIR/bin\" \"$INSTALL_DIR/include\" \\\n      && cd libcgroup \\\n      # extracted from bootstrap.sh, with modified flags:\n      && (test -d m4 || mkdir m4) \\\n      && autoreconf -fi \\\n      && rm -rf autom4te.cache \\\n      && CFLAGS=\"-O3\" ./configure --prefix=\"$INSTALL_DIR\" --sysconfdir=/etc --localstatedir=/var --enable-opaque-hierarchy=\"name=systemd\" \\\n      # actually build the thing...\n      && make install\nmerge: |\n  # tweak nofile limits\n  RUN set -e \\\n      && echo 'fs.file-max = 1048576' >>/etc/sysctl.conf \\\n      && test ! -e /etc/security || ( \\\n         echo '*    - nofile 1048576' >>/etc/security/limits.conf \\\n      && echo 'root - nofile 1048576' >>/etc/security/limits.conf \\\n         )\n\n  # Allow postgres user (compute_ctl) to run swap resizer.\n  # Need to install sudo in order to allow this.\n  #\n  # Also, remove the 'read' permission from group/other on /neonvm/bin/resize-swap, just to be safe.\n  RUN set -e \\\n      && apt update \\\n      && apt install --no-install-recommends -y \\\n             sudo \\\n      && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*\n  COPY compute_ctl-sudoers /etc/sudoers.d/compute_ctl-sudoers\n\n  COPY cgconfig.conf /etc/cgconfig.conf\n\n  RUN set -e \\\n      && chmod 0644 /etc/cgconfig.conf\n\n\n  COPY compute_rsyslog.conf /etc/compute_rsyslog.conf\n  RUN chmod 0666 /etc/compute_rsyslog.conf\n  RUN mkdir /var/log/rsyslog && chown -R postgres /var/log/rsyslog\n\n\n  COPY --from=libcgroup-builder /libcgroup-install/bin/*  /usr/bin/\n  COPY --from=libcgroup-builder /libcgroup-install/lib/*  /usr/lib/\n  COPY --from=libcgroup-builder /libcgroup-install/sbin/* /usr/sbin/\n"
  },
  {
    "path": "compute/vm-image-spec-bullseye.yaml",
    "content": "# Supplemental file for neondatabase/autoscaling's vm-builder, for producing the VM compute image.\n---\ncommands:\n  - name: cgconfigparser\n    user: root\n    sysvInitAction: sysinit\n    shell: 'cgconfigparser -l /etc/cgconfig.conf -s 1664'\n  # restrict permissions on /neonvm/bin/resize-swap, because we grant access to compute_ctl for\n  # running it as root.\n  - name: chmod-resize-swap\n    user: root\n    sysvInitAction: sysinit\n    shell: 'chmod 711 /neonvm/bin/resize-swap'\n  - name: chmod-set-disk-quota\n    user: root\n    sysvInitAction: sysinit\n    shell: 'chmod 711 /neonvm/bin/set-disk-quota'\n  - name: pgbouncer\n    user: postgres\n    sysvInitAction: respawn\n    shell: '/usr/local/bin/pgbouncer /etc/pgbouncer.ini 2>&1 > /dev/virtio-ports/tech.neon.log.0'\n  - name: local_proxy\n    user: postgres\n    sysvInitAction: respawn\n    shell: 'RUST_LOG=\"error\" /usr/local/bin/local_proxy --config-path /etc/local_proxy/config.json --pid-path /etc/local_proxy/pid --http 0.0.0.0:10432'\n  - name: postgres-exporter\n    user: nobody\n    sysvInitAction: respawn\n    # Turn off database collector (`--no-collector.database`), we don't use `pg_database_size_bytes` metric anyway, see\n    # https://github.com/neondatabase/flux-fleet/blob/5e19b3fd897667b70d9a7ad4aa06df0ca22b49ff/apps/base/compute-metrics/scrape-compute-pg-exporter-neon.yaml#L29\n    # but it's enabled by default and it doesn't filter out invalid databases, see\n    # https://github.com/prometheus-community/postgres_exporter/blob/06a553c8166512c9d9c5ccf257b0f9bba8751dbc/collector/pg_database.go#L67\n    # so if it hits one, it starts spamming logs\n    #   ERROR:  [NEON_SMGR] [reqid d9700000018] could not read db size of db 705302 from page server at lsn 5/A2457EB0\n    shell: 'DATA_SOURCE_NAME=\"user=cloud_admin sslmode=disable dbname=postgres application_name=postgres-exporter pgaudit.log=none\" /bin/postgres_exporter --no-collector.database --config.file=/etc/postgres_exporter.yml'\n  - name: pgbouncer-exporter\n    user: postgres\n    sysvInitAction: respawn\n    shell: '/bin/pgbouncer_exporter --pgBouncer.connectionString=\"postgres:///pgbouncer?host=/tmp&port=6432&dbname=pgbouncer&user=pgbouncer\"'\n  - name: sql-exporter\n    user: nobody\n    sysvInitAction: respawn\n    shell: '/bin/sql_exporter -config.file=/etc/sql_exporter.yml -web.listen-address=:9399'\n  - name: sql-exporter-autoscaling\n    user: nobody\n    sysvInitAction: respawn\n    shell: '/bin/sql_exporter -config.file=/etc/sql_exporter_autoscaling.yml -web.listen-address=:9499'\n  # Rsyslog by default creates a unix socket under /dev/log . That's where Postgres sends logs also.\n  # We run syslog with postgres user so it can't create /dev/log. Instead we configure rsyslog to\n  # use a different path for the socket. The symlink actually points to our custom path.\n  - name: rsyslogd-socket-symlink\n    user: root\n    sysvInitAction: sysinit\n    shell: \"ln -s /var/db/postgres/rsyslogpipe /dev/log\"\n  - name: rsyslogd\n    user: postgres\n    sysvInitAction: respawn\n    shell: '/usr/sbin/rsyslogd -n -i /var/run/rsyslogd/rsyslogd.pid -f /etc/compute_rsyslog.conf'\nshutdownHook: |\n  su -p postgres --session-command '/usr/local/bin/pg_ctl stop -D /var/db/postgres/compute/pgdata -m fast --wait -t 10'\nfiles:\n  - filename: compute_ctl-sudoers\n    content: |\n      # Reverse hostname lookup doesn't currently work, and isn't needed anyway when all\n      # the rules use ALL as the hostname. Avoid the pointless lookups and the \"unable to\n      # resolve host\" log messages that they generate.\n      Defaults !fqdn\n\n      # Allow postgres user (which is what compute_ctl runs as) to run /neonvm/bin/resize-swap\n      # and /neonvm/bin/set-disk-quota as root without requiring entering a password (NOPASSWD),\n      # regardless of hostname (ALL)\n      #\n      # Also allow it to shut down the VM. The fast_import job does that when it's finished.\n      postgres ALL=(root) NOPASSWD: /neonvm/bin/resize-swap, /neonvm/bin/set-disk-quota, /neonvm/bin/poweroff, /usr/sbin/rsyslogd\n  - filename: cgconfig.conf\n    content: |\n      # Configuration for cgroups in VM compute nodes\n      group neon-postgres {\n          perm {\n              admin {\n                  uid = postgres;\n              }\n              task {\n                  gid = users;\n              }\n          }\n          memory {}\n      }\n# Create dummy rsyslog config, because it refuses to start without at least one action configured.\n# compute_ctl will rewrite this file with the actual configuration, if needed.\n  - filename: compute_rsyslog.conf\n    content: |\n      # Syslock.Name specifies a non-default pipe location that is writeable for the postgres user.\n      module(load=\"imuxsock\" SysSock.Name=\"/var/db/postgres/rsyslogpipe\") # provides support for local system logging\n\n      *.*    /dev/null\n      $IncludeConfig /etc/rsyslog.d/*.conf\nbuild: |\n  # Build cgroup-tools\n  #\n  # At time of writing (2023-03-14), debian bullseye has a version of cgroup-tools (technically\n  # libcgroup) that doesn't support cgroup v2 (version 0.41-11). Unfortunately, the vm-monitor\n  # requires cgroup v2, so we'll build cgroup-tools ourselves.\n  ARG BULLSEYE_SLIM_SHA=sha256:e831d9a884d63734fe3dd9c491ed9a5a3d4c6a6d32c5b14f2067357c49b0b7e1\n  FROM debian@$BULLSEYE_SLIM_SHA as libcgroup-builder\n  ENV LIBCGROUP_VERSION=v2.0.3\n\n  RUN set -exu \\\n      && apt update \\\n      && apt install --no-install-recommends -y \\\n          git \\\n          ca-certificates \\\n          automake \\\n          cmake \\\n          make \\\n          gcc \\\n          byacc \\\n          flex \\\n          libtool \\\n          libpam0g-dev \\\n      && git clone --depth 1 -b $LIBCGROUP_VERSION https://github.com/libcgroup/libcgroup \\\n      && INSTALL_DIR=\"/libcgroup-install\" \\\n      && mkdir -p \"$INSTALL_DIR/bin\" \"$INSTALL_DIR/include\" \\\n      && cd libcgroup \\\n      # extracted from bootstrap.sh, with modified flags:\n      && (test -d m4 || mkdir m4) \\\n      && autoreconf -fi \\\n      && rm -rf autom4te.cache \\\n      && CFLAGS=\"-O3\" ./configure --prefix=\"$INSTALL_DIR\" --sysconfdir=/etc --localstatedir=/var --enable-opaque-hierarchy=\"name=systemd\" \\\n      # actually build the thing...\n      && make install\nmerge: |\n  # tweak nofile limits\n  RUN set -e \\\n      && echo 'fs.file-max = 1048576' >>/etc/sysctl.conf \\\n      && test ! -e /etc/security || ( \\\n         echo '*    - nofile 1048576' >>/etc/security/limits.conf \\\n      && echo 'root - nofile 1048576' >>/etc/security/limits.conf \\\n         )\n\n  # Allow postgres user (compute_ctl) to run swap resizer.\n  # Need to install sudo in order to allow this.\n  #\n  # Also, remove the 'read' permission from group/other on /neonvm/bin/resize-swap, just to be safe.\n  RUN set -e \\\n      && apt update \\\n      && apt install --no-install-recommends -y \\\n             sudo \\\n      && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*\n  COPY compute_ctl-sudoers /etc/sudoers.d/compute_ctl-sudoers\n\n  COPY cgconfig.conf /etc/cgconfig.conf\n\n  RUN set -e \\\n      && chmod 0644 /etc/cgconfig.conf\n\n  COPY compute_rsyslog.conf /etc/compute_rsyslog.conf\n  RUN chmod 0666 /etc/compute_rsyslog.conf\n  RUN mkdir /var/log/rsyslog && chown -R postgres /var/log/rsyslog\n\n\n  COPY --from=libcgroup-builder /libcgroup-install/bin/*  /usr/bin/\n  COPY --from=libcgroup-builder /libcgroup-install/lib/*  /usr/lib/\n  COPY --from=libcgroup-builder /libcgroup-install/sbin/* /usr/sbin/\n"
  },
  {
    "path": "compute_tools/.dockerignore",
    "content": "target\n"
  },
  {
    "path": "compute_tools/.gitignore",
    "content": "target\n"
  },
  {
    "path": "compute_tools/Cargo.toml",
    "content": "[package]\nname = \"compute_tools\"\nversion = \"0.1.0\"\nedition = \"2024\"\nlicense.workspace = true\n\n[features]\ndefault = []\n# Enables test specific features.\ntesting = [\"fail/failpoints\"]\n\n[dependencies]\nasync-compression.workspace = true\nbase64.workspace = true\naws-config.workspace = true\naws-sdk-s3.workspace = true\naws-sdk-kms.workspace = true\naws-smithy-types.workspace = true\nanyhow.workspace = true\naxum = { workspace = true, features = [] }\naxum-extra.workspace = true\ncamino.workspace = true\nchrono.workspace = true\ncfg-if.workspace = true\nclap.workspace = true\nfail.workspace = true\nflate2.workspace = true\nfutures.workspace = true\nhttp.workspace = true\nhttp-body-util.workspace = true\nhostname-validator = \"1.1\"\nhyper.workspace = true\nhyper-util.workspace = true\nindexmap.workspace = true\nitertools.workspace = true\njsonwebtoken.workspace = true\nmetrics.workspace = true\nnix.workspace = true\nnotify.workspace = true\nnum_cpus.workspace = true\nonce_cell.workspace = true\nopentelemetry.workspace = true\nopentelemetry_sdk.workspace = true\np256 = { version = \"0.13\", features = [\"pem\"] }\npageserver_page_api.workspace = true\npostgres.workspace = true\nregex.workspace = true\nreqwest = { workspace = true, features = [\"json\"] }\nring = \"0.17\"\nscopeguard.workspace = true\nserde.workspace = true\nserde_with.workspace = true\nserde_json.workspace = true\nsignal-hook.workspace = true\ntar.workspace = true\ntower.workspace = true\ntower-http.workspace = true\ntokio = { workspace = true, features = [\"rt\", \"rt-multi-thread\"] }\ntokio-postgres.workspace = true\ntokio-util.workspace = true\ntokio-stream.workspace = true\ntonic.workspace = true\ntower-otel.workspace = true\ntracing.workspace = true\ntracing-appender.workspace = true\ntracing-opentelemetry.workspace = true\ntracing-subscriber.workspace = true\ntracing-utils.workspace = true\nthiserror.workspace = true\nurl.workspace = true\nuuid.workspace = true\nwalkdir.workspace = true\nx509-cert.workspace = true\npostgres-types.workspace = true\npostgres_versioninfo.workspace = true\npostgres_initdb.workspace = true\ncompute_api.workspace = true\nutils.workspace = true\nworkspace_hack.workspace = true\nremote_storage = { version = \"0.1\", path = \"../libs/remote_storage/\" }\nvm_monitor = { version = \"0.1\", path = \"../libs/vm_monitor/\" }\nzstd = \"0.13\"\nbytes = \"1.0\"\nrust-ini = \"0.20.0\"\nrlimit = \"0.10.1\"\n"
  },
  {
    "path": "compute_tools/README.md",
    "content": "# Compute node tools\n\nPostgres wrapper (`compute_ctl`) is intended to be run as a Docker entrypoint or as a `systemd`\n`ExecStart` option. It will handle all the `Neon` specifics during compute node\ninitialization:\n- `compute_ctl` accepts cluster (compute node) specification as a JSON file.\n- Every start is a fresh start, so the data directory is removed and\n  initialized again on each run.\n- Next it will put configuration files into the `PGDATA` directory.\n- Sync safekeepers and get commit LSN.\n- Get `basebackup` from pageserver using the returned on the previous step LSN.\n- Try to start `postgres` and wait until it is ready to accept connections.\n- Check and alter/drop/create roles and databases.\n- Hang waiting on the `postmaster` process to exit.\n\nAlso `compute_ctl` spawns two separate service threads:\n- `compute-monitor` checks the last Postgres activity timestamp and saves it\n  into the shared `ComputeNode`;\n- `http-endpoint` runs a Hyper HTTP API server, which serves readiness and the\n  last activity requests.\n\nIf `AUTOSCALING` environment variable is set, `compute_ctl` will start the\n`vm-monitor` located in [`neon/libs/vm_monitor`]. For VM compute nodes,\n`vm-monitor` communicates with the VM autoscaling system. It coordinates\ndownscaling and requests immediate upscaling under resource pressure.\n\nUsage example:\n```sh\ncompute_ctl -D /var/db/postgres/compute \\\n            -C 'postgresql://cloud_admin@localhost/postgres' \\\n            -S /var/db/postgres/specs/current.json \\\n            -b /usr/local/bin/postgres\n```\n\n## State Diagram\n\nComputes can be in various states. Below is a diagram that details how a\ncompute moves between states.\n\n```mermaid\n%% https://mermaid.js.org/syntax/stateDiagram.html\nstateDiagram-v2\n  [*] --> Empty : Compute spawned\n  Empty --> ConfigurationPending : Waiting for compute spec\n  ConfigurationPending --> Configuration : Received compute spec\n  Configuration --> Failed : Failed to configure the compute\n  Configuration --> Running : Compute has been configured\n  Empty --> Init : Compute spec is immediately available\n  Empty --> TerminationPendingFast : Requested termination\n  Empty --> TerminationPendingImmediate : Requested termination\n  Init --> Failed : Failed to start Postgres\n  Init --> Running : Started Postgres\n  Running --> TerminationPendingFast : Requested termination\n  Running --> TerminationPendingImmediate : Requested termination\n  Running --> ConfigurationPending : Received a /configure request with spec\n  Running --> RefreshConfigurationPending : Received a /refresh_configuration request, compute node will pull a new spec and reconfigure\n  RefreshConfigurationPending --> RefreshConfiguration: Received compute spec and started configuration\n  RefreshConfiguration --> Running : Compute has been re-configured\n  RefreshConfiguration --> RefreshConfigurationPending : Configuration failed and to be retried\n  TerminationPendingFast --> Terminated compute with 30s delay for cplane to inspect status\n  TerminationPendingImmediate --> Terminated : Terminated compute immediately\n  Failed --> RefreshConfigurationPending : Received a /refresh_configuration request\n  Failed --> [*] : Compute exited\n  Terminated --> [*] : Compute exited\n```\n\n## Tests\n\nCargo formatter:\n```sh\ncargo fmt\n```\n\nRun tests:\n```sh\ncargo test\n```\n\nClippy linter:\n```sh\ncargo clippy --all --all-targets -- -Dwarnings -Drust-2018-idioms\n```\n\n## Cross-platform compilation\n\nImaging that you are on macOS (x86) and you want a Linux GNU (`x86_64-unknown-linux-gnu` platform in `rust` terminology) executable.\n\n### Using docker\n\nYou can use a throw-away Docker container ([rustlang/rust](https://hub.docker.com/r/rustlang/rust/) image) for doing that:\n```sh\ndocker run --rm \\\n    -v $(pwd):/compute_tools \\\n    -w /compute_tools \\\n    -t rustlang/rust:nightly cargo build --release --target=x86_64-unknown-linux-gnu\n```\nor one-line:\n```sh\ndocker run --rm -v $(pwd):/compute_tools -w /compute_tools -t rust:latest cargo build --release --target=x86_64-unknown-linux-gnu\n```\n\n### Using rust native cross-compilation\n\nAnother way is to add `x86_64-unknown-linux-gnu` target on your host system:\n```sh\nrustup target add x86_64-unknown-linux-gnu\n```\n\nInstall macOS cross-compiler toolchain:\n```sh\nbrew tap SergioBenitez/osxct\nbrew install x86_64-unknown-linux-gnu\n```\n\nAnd finally run `cargo build`:\n```sh\nCARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_LINKER=x86_64-unknown-linux-gnu-gcc cargo build --target=x86_64-unknown-linux-gnu --release\n```\n"
  },
  {
    "path": "compute_tools/rustfmt.toml",
    "content": "max_width = 100\n"
  },
  {
    "path": "compute_tools/src/bin/compute_ctl.rs",
    "content": "//!\n//! Postgres wrapper (`compute_ctl`) is intended to be run as a Docker entrypoint or as a `systemd`\n//! `ExecStart` option. It will handle all the `Neon` specifics during compute node\n//! initialization:\n//! - `compute_ctl` accepts cluster (compute node) specification as a JSON file.\n//! - Every start is a fresh start, so the data directory is removed and\n//!   initialized again on each run.\n//! - If remote_extension_config is provided, it will be used to fetch extensions list\n//!   and download `shared_preload_libraries` from the remote storage.\n//! - Next it will put configuration files into the `PGDATA` directory.\n//! - Sync safekeepers and get commit LSN.\n//! - Get `basebackup` from pageserver using the returned on the previous step LSN.\n//! - Try to start `postgres` and wait until it is ready to accept connections.\n//! - Check and alter/drop/create roles and databases.\n//! - Hang waiting on the `postmaster` process to exit.\n//!\n//! Also `compute_ctl` spawns two separate service threads:\n//! - `compute-monitor` checks the last Postgres activity timestamp and saves it\n//!   into the shared `ComputeNode`;\n//! - `http-endpoint` runs a Hyper HTTP API server, which serves readiness and the\n//!   last activity requests.\n//!\n//! If `AUTOSCALING` environment variable is set, `compute_ctl` will start the\n//! `vm-monitor` located in [`neon/libs/vm_monitor`]. For VM compute nodes,\n//! `vm-monitor` communicates with the VM autoscaling system. It coordinates\n//! downscaling and requests immediate upscaling under resource pressure.\n//!\n//! Usage example:\n//! ```sh\n//! compute_ctl -D /var/db/postgres/compute \\\n//!             -C 'postgresql://cloud_admin@localhost/postgres' \\\n//!             -c /var/db/postgres/configs/config.json \\\n//!             -b /usr/local/bin/postgres \\\n//!             -r http://pg-ext-s3-gateway \\\n//! ```\nuse std::ffi::OsString;\nuse std::fs::File;\nuse std::process::exit;\nuse std::sync::Arc;\nuse std::sync::atomic::AtomicU64;\nuse std::sync::mpsc;\nuse std::thread;\nuse std::time::Duration;\n\nuse anyhow::{Context, Result, bail};\nuse clap::Parser;\nuse compute_api::responses::ComputeConfig;\nuse compute_tools::compute::{\n    BUILD_TAG, ComputeNode, ComputeNodeParams, forward_termination_signal,\n};\nuse compute_tools::extension_server::get_pg_version_string;\nuse compute_tools::params::*;\nuse compute_tools::pg_isready::get_pg_isready_bin;\nuse compute_tools::spec::*;\nuse compute_tools::{hadron_metrics, installed_extensions, logger::*};\nuse rlimit::{Resource, setrlimit};\nuse signal_hook::consts::{SIGINT, SIGQUIT, SIGTERM};\nuse signal_hook::iterator::Signals;\nuse tracing::{error, info};\nuse url::Url;\nuse utils::failpoint_support;\n\n#[derive(Debug, Parser)]\n#[command(rename_all = \"kebab-case\")]\nstruct Cli {\n    #[arg(short = 'b', long, default_value = \"postgres\", env = \"POSTGRES_PATH\")]\n    pub pgbin: String,\n\n    /// The base URL for the remote extension storage proxy gateway.\n    #[arg(short = 'r', long, value_parser = Self::parse_remote_ext_base_url)]\n    pub remote_ext_base_url: Option<Url>,\n\n    /// The port to bind the external listening HTTP server to. Clients running\n    /// outside the compute will talk to the compute through this port. Keep\n    /// the previous name for this argument around for a smoother release\n    /// with the control plane.\n    #[arg(long, default_value_t = 3080)]\n    pub external_http_port: u16,\n\n    /// The port to bind the internal listening HTTP server to. Clients include\n    /// the neon extension (for installing remote extensions) and local_proxy.\n    #[arg(long, default_value_t = 3081)]\n    pub internal_http_port: u16,\n\n    /// Backwards-compatible --http-port for Hadron deployments. Functionally the\n    /// same as --external-http-port.\n    #[arg(\n        long,\n        conflicts_with = \"external_http_port\",\n        conflicts_with = \"internal_http_port\"\n    )]\n    pub http_port: Option<u16>,\n\n    #[arg(short = 'D', long, value_name = \"DATADIR\")]\n    pub pgdata: String,\n\n    #[arg(short = 'C', long, value_name = \"DATABASE_URL\")]\n    pub connstr: String,\n\n    #[arg(\n        long,\n        default_value = \"neon_superuser\",\n        value_name = \"PRIVILEGED_ROLE_NAME\",\n        value_parser = Self::parse_privileged_role_name\n    )]\n    pub privileged_role_name: String,\n\n    #[cfg(target_os = \"linux\")]\n    #[arg(long, default_value = \"neon-postgres\")]\n    pub cgroup: String,\n\n    #[cfg(target_os = \"linux\")]\n    #[arg(\n        long,\n        default_value = \"host=localhost port=5432 dbname=postgres user=cloud_admin sslmode=disable application_name=vm-monitor\"\n    )]\n    pub filecache_connstr: String,\n\n    #[cfg(target_os = \"linux\")]\n    #[arg(long, default_value = \"0.0.0.0:10301\")]\n    pub vm_monitor_addr: String,\n\n    #[arg(long, action = clap::ArgAction::SetTrue)]\n    pub resize_swap_on_bind: bool,\n\n    #[arg(long)]\n    pub set_disk_quota_for_fs: Option<String>,\n\n    #[arg(short = 'c', long)]\n    pub config: Option<OsString>,\n\n    #[arg(short = 'i', long, group = \"compute-id\")]\n    pub compute_id: String,\n\n    #[arg(\n        short = 'p',\n        long,\n        conflicts_with = \"config\",\n        value_name = \"CONTROL_PLANE_API_BASE_URL\",\n        requires = \"compute-id\"\n    )]\n    pub control_plane_uri: Option<String>,\n\n    /// Interval in seconds for collecting installed extensions statistics\n    #[arg(long, default_value = \"3600\")]\n    pub installed_extensions_collection_interval: u64,\n\n    /// Run in development mode, skipping VM-specific operations like process termination\n    #[arg(long, action = clap::ArgAction::SetTrue)]\n    pub dev: bool,\n\n    #[arg(long)]\n    pub pg_init_timeout: Option<u64>,\n\n    #[arg(long, default_value_t = false, action = clap::ArgAction::Set)]\n    pub lakebase_mode: bool,\n}\n\nimpl Cli {\n    /// Parse a URL from an argument. By default, this isn't necessary, but we\n    /// want to do some sanity checking.\n    fn parse_remote_ext_base_url(value: &str) -> Result<Url> {\n        // Remove extra trailing slashes, and add one. We use Url::join() later\n        // when downloading remote extensions. If the base URL is something like\n        // http://example.com/pg-ext-s3-gateway, and join() is called with\n        // something like \"xyz\", the resulting URL is http://example.com/xyz.\n        let value = value.trim_end_matches('/').to_owned() + \"/\";\n        let url = Url::parse(&value)?;\n\n        if url.query_pairs().count() != 0 {\n            bail!(\"parameters detected in remote extensions base URL\")\n        }\n\n        Ok(url)\n    }\n\n    /// For simplicity, we do not escape `privileged_role_name` anywhere in the code.\n    /// Since it's a system role, which we fully control, that's fine. Still, let's\n    /// validate it to avoid any surprises.\n    fn parse_privileged_role_name(value: &str) -> Result<String> {\n        use regex::Regex;\n\n        let pattern = Regex::new(r\"^[a-z_]+$\").unwrap();\n\n        if !pattern.is_match(value) {\n            bail!(\"--privileged-role-name can only contain lowercase letters and underscores\")\n        }\n\n        Ok(value.to_string())\n    }\n}\n\n// Hadron helpers to get compatible compute_ctl http ports from Cli. The old `--http-port`\n// arg is used and acts the same as `--external-http-port`. The internal http port is defined\n// to be http_port + 1. Hadron runs in the dblet environment which uses the host network, so\n// we need to be careful with the ports to choose.\nfn get_external_http_port(cli: &Cli) -> u16 {\n    if cli.lakebase_mode {\n        return cli.http_port.unwrap_or(cli.external_http_port);\n    }\n    cli.external_http_port\n}\nfn get_internal_http_port(cli: &Cli) -> u16 {\n    if cli.lakebase_mode {\n        return cli\n            .http_port\n            .map(|p| p + 1)\n            .unwrap_or(cli.internal_http_port);\n    }\n    cli.internal_http_port\n}\n\nfn main() -> Result<()> {\n    let cli = Cli::parse();\n\n    let scenario = failpoint_support::init();\n\n    // For historical reasons, the main thread that processes the config and launches postgres\n    // is synchronous, but we always have this tokio runtime available and we \"enter\" it so\n    // that you can use tokio::spawn() and tokio::runtime::Handle::current().block_on(...)\n    // from all parts of compute_ctl.\n    let runtime = tokio::runtime::Builder::new_multi_thread()\n        .enable_all()\n        .build()?;\n    let _rt_guard = runtime.enter();\n\n    let mut log_dir = None;\n    if cli.lakebase_mode {\n        log_dir = std::env::var(\"COMPUTE_CTL_LOG_DIRECTORY\").ok();\n    }\n\n    let (tracing_provider, _file_logs_guard) = init(cli.dev, log_dir)?;\n\n    // enable core dumping for all child processes\n    setrlimit(Resource::CORE, rlimit::INFINITY, rlimit::INFINITY)?;\n\n    if cli.lakebase_mode {\n        installed_extensions::initialize_metrics();\n        hadron_metrics::initialize_metrics();\n    }\n\n    let connstr = Url::parse(&cli.connstr).context(\"cannot parse connstr as a URL\")?;\n\n    let config = get_config(&cli)?;\n\n    let external_http_port = get_external_http_port(&cli);\n    let internal_http_port = get_internal_http_port(&cli);\n\n    let compute_node = ComputeNode::new(\n        ComputeNodeParams {\n            compute_id: cli.compute_id,\n            connstr,\n            privileged_role_name: cli.privileged_role_name.clone(),\n            pgdata: cli.pgdata.clone(),\n            pgbin: cli.pgbin.clone(),\n            pgversion: get_pg_version_string(&cli.pgbin),\n            external_http_port,\n            internal_http_port,\n            remote_ext_base_url: cli.remote_ext_base_url.clone(),\n            resize_swap_on_bind: cli.resize_swap_on_bind,\n            set_disk_quota_for_fs: cli.set_disk_quota_for_fs,\n            #[cfg(target_os = \"linux\")]\n            filecache_connstr: cli.filecache_connstr,\n            #[cfg(target_os = \"linux\")]\n            cgroup: cli.cgroup,\n            #[cfg(target_os = \"linux\")]\n            vm_monitor_addr: cli.vm_monitor_addr,\n            installed_extensions_collection_interval: Arc::new(AtomicU64::new(\n                cli.installed_extensions_collection_interval,\n            )),\n            pg_init_timeout: cli.pg_init_timeout.map(Duration::from_secs),\n            pg_isready_bin: get_pg_isready_bin(&cli.pgbin),\n            instance_id: std::env::var(\"INSTANCE_ID\").ok(),\n            lakebase_mode: cli.lakebase_mode,\n            build_tag: BUILD_TAG.to_string(),\n            control_plane_uri: cli.control_plane_uri,\n            config_path_test_only: cli.config,\n        },\n        config,\n    )?;\n\n    let exit_code = compute_node.run().context(\"running compute node\")?;\n\n    scenario.teardown();\n\n    deinit_and_exit(tracing_provider, exit_code);\n}\n\nfn init(\n    dev_mode: bool,\n    log_dir: Option<String>,\n) -> Result<(\n    Option<tracing_utils::Provider>,\n    Option<tracing_appender::non_blocking::WorkerGuard>,\n)> {\n    let (provider, file_logs_guard) = init_tracing_and_logging(DEFAULT_LOG_LEVEL, &log_dir)?;\n\n    let mut signals = Signals::new([SIGINT, SIGTERM, SIGQUIT])?;\n    thread::spawn(move || {\n        for sig in signals.forever() {\n            handle_exit_signal(sig, dev_mode);\n        }\n    });\n\n    info!(\"compute build_tag: {}\", &BUILD_TAG.to_string());\n\n    Ok((provider, file_logs_guard))\n}\n\nfn get_config(cli: &Cli) -> Result<ComputeConfig> {\n    // First, read the config from the path if provided\n    if let Some(ref config) = cli.config {\n        let file = File::open(config)?;\n        return Ok(serde_json::from_reader(&file)?);\n    }\n\n    // If the config wasn't provided in the CLI arguments, then retrieve it from\n    // the control plane\n    match get_config_from_control_plane(cli.control_plane_uri.as_ref().unwrap(), &cli.compute_id) {\n        Ok(config) => Ok(config),\n        Err(e) => {\n            error!(\n                \"cannot get response from control plane: {}\\n\\\n                neither spec nor confirmation that compute is in the Empty state was received\",\n                e\n            );\n            Err(e)\n        }\n    }\n}\n\nfn deinit_and_exit(tracing_provider: Option<tracing_utils::Provider>, exit_code: Option<i32>) -> ! {\n    if let Some(p) = tracing_provider {\n        // Shutdown trace pipeline gracefully, so that it has a chance to send any\n        // pending traces before we exit. Shutting down OTEL tracing provider may\n        // hang for quite some time, see, for example:\n        // - https://github.com/open-telemetry/opentelemetry-rust/issues/868\n        // - and our problems with staging https://github.com/neondatabase/cloud/issues/3707#issuecomment-1493983636\n        //\n        // Yet, we want computes to shut down fast enough, as we may need a new one\n        // for the same timeline ASAP. So wait no longer than 2s for the shutdown to\n        // complete, then just error out and exit the main thread.\n        info!(\"shutting down tracing\");\n        let (sender, receiver) = mpsc::channel();\n        let _ = thread::spawn(move || {\n            _ = p.shutdown();\n            sender.send(()).ok()\n        });\n        let shutdown_res = receiver.recv_timeout(Duration::from_millis(2000));\n        if shutdown_res.is_err() {\n            error!(\"timed out while shutting down tracing, exiting anyway\");\n        }\n    }\n\n    info!(\"shutting down\");\n    exit(exit_code.unwrap_or(1))\n}\n\n/// When compute_ctl is killed, send also termination signal to sync-safekeepers\n/// to prevent leakage. TODO: it is better to convert compute_ctl to async and\n/// wait for termination which would be easy then.\nfn handle_exit_signal(sig: i32, dev_mode: bool) {\n    info!(\"received {sig} termination signal\");\n    forward_termination_signal(dev_mode);\n    exit(1);\n}\n\n#[cfg(test)]\nmod test {\n    use clap::{CommandFactory, Parser};\n    use url::Url;\n\n    use super::Cli;\n\n    #[test]\n    fn verify_cli() {\n        Cli::command().debug_assert()\n    }\n\n    #[test]\n    fn verify_remote_ext_base_url() {\n        let cli = Cli::parse_from([\n            \"compute_ctl\",\n            \"--pgdata=test\",\n            \"--connstr=test\",\n            \"--compute-id=test\",\n            \"--remote-ext-base-url\",\n            \"https://example.com/subpath\",\n        ]);\n        assert_eq!(\n            cli.remote_ext_base_url.unwrap(),\n            Url::parse(\"https://example.com/subpath/\").unwrap()\n        );\n\n        let cli = Cli::parse_from([\n            \"compute_ctl\",\n            \"--pgdata=test\",\n            \"--connstr=test\",\n            \"--compute-id=test\",\n            \"--remote-ext-base-url\",\n            \"https://example.com//\",\n        ]);\n        assert_eq!(\n            cli.remote_ext_base_url.unwrap(),\n            Url::parse(\"https://example.com\").unwrap()\n        );\n\n        Cli::try_parse_from([\n            \"compute_ctl\",\n            \"--pgdata=test\",\n            \"--connstr=test\",\n            \"--compute-id=test\",\n            \"--remote-ext-base-url\",\n            \"https://example.com?hello=world\",\n        ])\n        .expect_err(\"URL parameters are not allowed\");\n    }\n\n    #[test]\n    fn verify_privileged_role_name() {\n        // Valid name\n        let cli = Cli::parse_from([\n            \"compute_ctl\",\n            \"--pgdata=test\",\n            \"--connstr=test\",\n            \"--compute-id=test\",\n            \"--privileged-role-name\",\n            \"my_superuser\",\n        ]);\n        assert_eq!(cli.privileged_role_name, \"my_superuser\");\n\n        // Invalid names\n        Cli::try_parse_from([\n            \"compute_ctl\",\n            \"--pgdata=test\",\n            \"--connstr=test\",\n            \"--compute-id=test\",\n            \"--privileged-role-name\",\n            \"NeonSuperuser\",\n        ])\n        .expect_err(\"uppercase letters are not allowed\");\n\n        Cli::try_parse_from([\n            \"compute_ctl\",\n            \"--pgdata=test\",\n            \"--connstr=test\",\n            \"--compute-id=test\",\n            \"--privileged-role-name\",\n            \"$'neon_superuser\",\n        ])\n        .expect_err(\"special characters are not allowed\");\n\n        Cli::try_parse_from([\n            \"compute_ctl\",\n            \"--pgdata=test\",\n            \"--connstr=test\",\n            \"--compute-id=test\",\n            \"--privileged-role-name\",\n            \"\",\n        ])\n        .expect_err(\"empty name is not allowed\");\n    }\n}\n"
  },
  {
    "path": "compute_tools/src/bin/fast_import/aws_s3_sync.rs",
    "content": "use camino::{Utf8Path, Utf8PathBuf};\nuse tokio::task::JoinSet;\nuse tracing::{info, warn};\nuse walkdir::WalkDir;\n\nuse super::s3_uri::S3Uri;\n\nconst MAX_PARALLEL_UPLOADS: usize = 10;\n\n/// Upload all files from 'local' to 'remote'\npub(crate) async fn upload_dir_recursive(\n    s3_client: &aws_sdk_s3::Client,\n    local: &Utf8Path,\n    remote: &S3Uri,\n) -> anyhow::Result<()> {\n    // Recursively scan directory\n    let mut dirwalker = WalkDir::new(local)\n        .into_iter()\n        .map(|entry| {\n            let entry = entry?;\n            let file_type = entry.file_type();\n            let path = <&Utf8Path>::try_from(entry.path())?.to_path_buf();\n            Ok((file_type, path))\n        })\n        .filter_map(|e: anyhow::Result<(std::fs::FileType, Utf8PathBuf)>| {\n            match e {\n                Ok((file_type, path)) if file_type.is_file() => Some(Ok(path)),\n                Ok((file_type, _path)) if file_type.is_dir() => {\n                    // The WalkDir iterator will recurse into directories, but we don't want\n                    // to do anything with directories as such. There's no concept of uploading\n                    // an empty directory to S3.\n                    None\n                }\n                Ok((file_type, path)) if file_type.is_symlink() => {\n                    // huh, didn't expect a symlink. Can't upload that to S3. Warn and skip.\n                    warn!(\"cannot upload symlink ({})\", path);\n                    None\n                }\n                Ok((_file_type, path)) => {\n                    // should not happen\n                    warn!(\"directory entry has unexpected type ({})\", path);\n                    None\n                }\n                Err(e) => Some(Err(e)),\n            }\n        });\n\n    // Spawn upload tasks for each file, keeping MAX_PARALLEL_UPLOADS active in\n    // parallel.\n    let mut joinset = JoinSet::new();\n    loop {\n        // Could we upload more?\n        while joinset.len() < MAX_PARALLEL_UPLOADS {\n            if let Some(full_local_path) = dirwalker.next() {\n                let full_local_path = full_local_path?;\n                let relative_local_path = full_local_path\n                    .strip_prefix(local)\n                    .expect(\"all paths start from the walkdir root\");\n                let remote_path = remote.append(relative_local_path.as_str());\n                info!(\n                    \"starting upload of {} to {}\",\n                    &full_local_path, &remote_path\n                );\n                let upload_task = upload_file(s3_client.clone(), full_local_path, remote_path);\n                joinset.spawn(upload_task);\n            } else {\n                info!(\"draining upload tasks\");\n                break;\n            }\n        }\n\n        // Wait for an upload to complete\n        if let Some(res) = joinset.join_next().await {\n            let _ = res?;\n        } else {\n            // all done!\n            break;\n        }\n    }\n    Ok(())\n}\n\npub(crate) async fn upload_file(\n    s3_client: aws_sdk_s3::Client,\n    local_path: Utf8PathBuf,\n    remote: S3Uri,\n) -> anyhow::Result<()> {\n    use aws_smithy_types::byte_stream::ByteStream;\n    let stream = ByteStream::from_path(&local_path).await?;\n\n    let _result = s3_client\n        .put_object()\n        .bucket(remote.bucket)\n        .key(&remote.key)\n        .body(stream)\n        .send()\n        .await?;\n    info!(\"upload of {} to {} finished\", &local_path, &remote.key);\n\n    Ok(())\n}\n"
  },
  {
    "path": "compute_tools/src/bin/fast_import/child_stdio_to_log.rs",
    "content": "use tokio::io::{AsyncBufReadExt, BufReader};\nuse tokio::process::{ChildStderr, ChildStdout};\nuse tracing::info;\n\n/// Asynchronously relays the output from a child process's `stdout` and `stderr` to the tracing log.\n/// Each line is read and logged individually, with lossy UTF-8 conversion.\n///\n/// # Arguments\n///\n/// * `stdout`: An `Option<ChildStdout>` from the child process.\n/// * `stderr`: An `Option<ChildStderr>` from the child process.\n///\npub(crate) async fn relay_process_output(stdout: Option<ChildStdout>, stderr: Option<ChildStderr>) {\n    let stdout_fut = async {\n        if let Some(stdout) = stdout {\n            let reader = BufReader::new(stdout);\n            let mut lines = reader.lines();\n            while let Ok(Some(line)) = lines.next_line().await {\n                info!(fd = \"stdout\", \"{}\", line);\n            }\n        }\n    };\n\n    let stderr_fut = async {\n        if let Some(stderr) = stderr {\n            let reader = BufReader::new(stderr);\n            let mut lines = reader.lines();\n            while let Ok(Some(line)) = lines.next_line().await {\n                info!(fd = \"stderr\", \"{}\", line);\n            }\n        }\n    };\n\n    tokio::join!(stdout_fut, stderr_fut);\n}\n"
  },
  {
    "path": "compute_tools/src/bin/fast_import/s3_uri.rs",
    "content": "use std::str::FromStr;\n\nuse anyhow::Result;\n\n/// Struct to hold parsed S3 components\n#[derive(Debug, Clone, PartialEq, Eq)]\npub struct S3Uri {\n    pub bucket: String,\n    pub key: String,\n}\n\nimpl FromStr for S3Uri {\n    type Err = anyhow::Error;\n\n    /// Parse an S3 URI into a bucket and key\n    fn from_str(uri: &str) -> Result<Self> {\n        // Ensure the URI starts with \"s3://\"\n        if !uri.starts_with(\"s3://\") {\n            return Err(anyhow::anyhow!(\"Invalid S3 URI scheme\"));\n        }\n\n        // Remove the \"s3://\" prefix\n        let stripped_uri = &uri[5..];\n\n        // Split the remaining string into bucket and key parts\n        if let Some((bucket, key)) = stripped_uri.split_once('/') {\n            Ok(S3Uri {\n                bucket: bucket.to_string(),\n                key: key.to_string(),\n            })\n        } else {\n            Err(anyhow::anyhow!(\n                \"Invalid S3 URI format, missing bucket or key\"\n            ))\n        }\n    }\n}\n\nimpl S3Uri {\n    pub fn append(&self, suffix: &str) -> Self {\n        Self {\n            bucket: self.bucket.clone(),\n            key: format!(\"{}{}\", self.key, suffix),\n        }\n    }\n}\n\nimpl std::fmt::Display for S3Uri {\n    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {\n        write!(f, \"s3://{}/{}\", self.bucket, self.key)\n    }\n}\n\nimpl clap::builder::TypedValueParser for S3Uri {\n    type Value = Self;\n\n    fn parse_ref(\n        &self,\n        _cmd: &clap::Command,\n        _arg: Option<&clap::Arg>,\n        value: &std::ffi::OsStr,\n    ) -> Result<Self::Value, clap::Error> {\n        let value_str = value.to_str().ok_or_else(|| {\n            clap::Error::raw(\n                clap::error::ErrorKind::InvalidUtf8,\n                \"Invalid UTF-8 sequence\",\n            )\n        })?;\n        S3Uri::from_str(value_str).map_err(|e| {\n            clap::Error::raw(\n                clap::error::ErrorKind::InvalidValue,\n                format!(\"Failed to parse S3 URI: {e}\"),\n            )\n        })\n    }\n}\n"
  },
  {
    "path": "compute_tools/src/bin/fast_import.rs",
    "content": "//! This program dumps a remote Postgres database into a local Postgres database\n//! and uploads the resulting PGDATA into object storage for import into a Timeline.\n//!\n//! # Context, Architecture, Design\n//!\n//! See cloud.git Fast Imports RFC (<https://github.com/neondatabase/cloud/pull/19799>)\n//! for the full picture.\n//! The RFC describing the storage pieces of importing the PGDATA dump into a Timeline\n//! is publicly accessible at <https://github.com/neondatabase/neon/pull/9538>.\n//!\n//! # This is a Prototype!\n//!\n//! This program is part of a prototype feature and not yet used in production.\n//!\n//! The cloud.git RFC contains lots of suggestions for improving e2e throughput\n//! of this step of the timeline import process.\n//!\n//! # Local Testing\n//!\n//! - Comment out most of the pgxns in compute-node.Dockerfile to speed up the build.\n//! - Build the image with the following command:\n//!\n//! ```bash\n//! docker buildx build --platform linux/amd64 --build-arg DEBIAN_VERSION=bullseye --build-arg GIT_VERSION=local --build-arg PG_VERSION=v14 --build-arg BUILD_TAG=\"$(date --iso-8601=s -u)\" -t localhost:3030/localregistry/compute-node-v14:latest -f compute/compute-node.Dockerfile .\n//! docker push localhost:3030/localregistry/compute-node-v14:latest\n//! ```\n\nuse anyhow::{Context, bail};\nuse aws_config::BehaviorVersion;\nuse camino::{Utf8Path, Utf8PathBuf};\nuse clap::{Parser, Subcommand};\nuse compute_tools::extension_server::get_pg_version;\nuse nix::unistd::Pid;\nuse std::ops::Not;\nuse tracing::{Instrument, error, info, info_span, warn};\nuse utils::fs_ext::is_directory_empty;\n\n#[path = \"fast_import/aws_s3_sync.rs\"]\nmod aws_s3_sync;\n#[path = \"fast_import/child_stdio_to_log.rs\"]\nmod child_stdio_to_log;\n#[path = \"fast_import/s3_uri.rs\"]\nmod s3_uri;\n\nconst PG_WAIT_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(600);\nconst PG_WAIT_RETRY_INTERVAL: std::time::Duration = std::time::Duration::from_millis(300);\n\n#[derive(Subcommand, Debug, Clone, serde::Serialize)]\nenum Command {\n    /// Runs local postgres (neon binary), restores into it,\n    /// uploads pgdata to s3 to be consumed by pageservers\n    Pgdata {\n        /// Raw connection string to the source database. Used only in tests,\n        /// real scenario uses encrypted connection string in spec.json from s3.\n        #[clap(long)]\n        source_connection_string: Option<String>,\n        /// If specified, will not shut down the local postgres after the import. Used in local testing\n        #[clap(short, long)]\n        interactive: bool,\n        /// Port to run postgres on. Default is 5432.\n        #[clap(long, default_value_t = 5432)]\n        pg_port: u16, // port to run postgres on, 5432 is default\n\n        /// Number of CPUs in the system. This is used to configure # of\n        /// parallel worker processes, for index creation.\n        #[clap(long, env = \"NEON_IMPORTER_NUM_CPUS\")]\n        num_cpus: Option<usize>,\n\n        /// Amount of RAM in the system. This is used to configure shared_buffers\n        /// and maintenance_work_mem.\n        #[clap(long, env = \"NEON_IMPORTER_MEMORY_MB\")]\n        memory_mb: Option<usize>,\n    },\n\n    /// Runs pg_dump-pg_restore from source to destination without running local postgres.\n    DumpRestore {\n        /// Raw connection string to the source database. Used only in tests,\n        /// real scenario uses encrypted connection string in spec.json from s3.\n        #[clap(long)]\n        source_connection_string: Option<String>,\n        /// Raw connection string to the destination database. Used only in tests,\n        /// real scenario uses encrypted connection string in spec.json from s3.\n        #[clap(long)]\n        destination_connection_string: Option<String>,\n    },\n}\n\nimpl Command {\n    fn as_str(&self) -> &'static str {\n        match self {\n            Command::Pgdata { .. } => \"pgdata\",\n            Command::DumpRestore { .. } => \"dump-restore\",\n        }\n    }\n}\n\n#[derive(clap::Parser)]\nstruct Args {\n    #[clap(long, env = \"NEON_IMPORTER_WORKDIR\")]\n    working_directory: Utf8PathBuf,\n    #[clap(long, env = \"NEON_IMPORTER_S3_PREFIX\")]\n    s3_prefix: Option<s3_uri::S3Uri>,\n    #[clap(long, env = \"NEON_IMPORTER_PG_BIN_DIR\")]\n    pg_bin_dir: Utf8PathBuf,\n    #[clap(long, env = \"NEON_IMPORTER_PG_LIB_DIR\")]\n    pg_lib_dir: Utf8PathBuf,\n\n    #[clap(subcommand)]\n    command: Command,\n}\n\n#[serde_with::serde_as]\n#[derive(serde::Deserialize)]\nstruct Spec {\n    encryption_secret: EncryptionSecret,\n    #[serde_as(as = \"serde_with::base64::Base64\")]\n    source_connstring_ciphertext_base64: Vec<u8>,\n    #[serde_as(as = \"Option<serde_with::base64::Base64>\")]\n    destination_connstring_ciphertext_base64: Option<Vec<u8>>,\n}\n\n#[derive(serde::Deserialize)]\nenum EncryptionSecret {\n    #[allow(clippy::upper_case_acronyms)]\n    KMS { key_id: String },\n}\n\n// copied from pageserver_api::config::defaults::DEFAULT_LOCALE to avoid dependency just for a constant\nconst DEFAULT_LOCALE: &str = if cfg!(target_os = \"macos\") {\n    \"C\"\n} else {\n    \"C.UTF-8\"\n};\n\nasync fn decode_connstring(\n    kms_client: &aws_sdk_kms::Client,\n    key_id: &String,\n    connstring_ciphertext_base64: Vec<u8>,\n) -> Result<String, anyhow::Error> {\n    let mut output = kms_client\n        .decrypt()\n        .key_id(key_id)\n        .ciphertext_blob(aws_sdk_s3::primitives::Blob::new(\n            connstring_ciphertext_base64,\n        ))\n        .send()\n        .await\n        .context(\"decrypt connection string\")?;\n\n    let plaintext = output\n        .plaintext\n        .take()\n        .context(\"get plaintext connection string\")?;\n\n    String::from_utf8(plaintext.into_inner()).context(\"parse connection string as utf8\")\n}\n\nstruct PostgresProcess {\n    pgdata_dir: Utf8PathBuf,\n    pg_bin_dir: Utf8PathBuf,\n    pgbin: Utf8PathBuf,\n    pg_lib_dir: Utf8PathBuf,\n    postgres_proc: Option<tokio::process::Child>,\n}\n\nimpl PostgresProcess {\n    fn new(pgdata_dir: Utf8PathBuf, pg_bin_dir: Utf8PathBuf, pg_lib_dir: Utf8PathBuf) -> Self {\n        Self {\n            pgdata_dir,\n            pgbin: pg_bin_dir.join(\"postgres\"),\n            pg_bin_dir,\n            pg_lib_dir,\n            postgres_proc: None,\n        }\n    }\n\n    async fn prepare(&self, initdb_user: &str) -> Result<(), anyhow::Error> {\n        tokio::fs::create_dir(&self.pgdata_dir)\n            .await\n            .context(\"create pgdata directory\")?;\n\n        let pg_version = get_pg_version(self.pgbin.as_ref());\n\n        postgres_initdb::do_run_initdb(postgres_initdb::RunInitdbArgs {\n            superuser: initdb_user,\n            locale: DEFAULT_LOCALE, // XXX: this shouldn't be hard-coded,\n            pg_version,\n            initdb_bin: self.pg_bin_dir.join(\"initdb\").as_ref(),\n            library_search_path: &self.pg_lib_dir, // TODO: is this right? Prob works in compute image, not sure about neon_local.\n            pgdata: &self.pgdata_dir,\n        })\n        .await\n        .context(\"initdb\")\n    }\n\n    async fn start(\n        &mut self,\n        initdb_user: &str,\n        port: u16,\n        nproc: usize,\n        memory_mb: usize,\n    ) -> Result<&tokio::process::Child, anyhow::Error> {\n        self.prepare(initdb_user).await?;\n\n        // Somewhat arbitrarily, use 10 % of memory for shared buffer cache, 70% for\n        // maintenance_work_mem (i.e. for sorting during index creation), and leave the rest\n        // available for misc other stuff that PostgreSQL uses memory for.\n        let shared_buffers_mb = ((memory_mb as f32) * 0.10) as usize;\n        let maintenance_work_mem_mb = ((memory_mb as f32) * 0.70) as usize;\n\n        //\n        // Launch postgres process\n        //\n        let mut proc = tokio::process::Command::new(&self.pgbin)\n            .arg(\"-D\")\n            .arg(&self.pgdata_dir)\n            .args([\"-p\", &format!(\"{port}\")])\n            .args([\"-c\", \"wal_level=minimal\"])\n            .args([\"-c\", &format!(\"shared_buffers={shared_buffers_mb}MB\")])\n            .args([\"-c\", \"max_wal_senders=0\"])\n            .args([\"-c\", \"fsync=off\"])\n            .args([\"-c\", \"full_page_writes=off\"])\n            .args([\"-c\", \"synchronous_commit=off\"])\n            .args([\n                \"-c\",\n                &format!(\"maintenance_work_mem={maintenance_work_mem_mb}MB\"),\n            ])\n            .args([\"-c\", &format!(\"max_parallel_maintenance_workers={nproc}\")])\n            .args([\"-c\", &format!(\"max_parallel_workers={nproc}\")])\n            .args([\"-c\", &format!(\"max_parallel_workers_per_gather={nproc}\")])\n            .args([\"-c\", &format!(\"max_worker_processes={nproc}\")])\n            .args([\"-c\", \"effective_io_concurrency=100\"])\n            .env_clear()\n            .env(\"LD_LIBRARY_PATH\", &self.pg_lib_dir)\n            .env(\n                \"ASAN_OPTIONS\",\n                std::env::var(\"ASAN_OPTIONS\").unwrap_or_default(),\n            )\n            .env(\n                \"UBSAN_OPTIONS\",\n                std::env::var(\"UBSAN_OPTIONS\").unwrap_or_default(),\n            )\n            .stdout(std::process::Stdio::piped())\n            .stderr(std::process::Stdio::piped())\n            .spawn()\n            .context(\"spawn postgres\")?;\n\n        info!(\"spawned postgres, waiting for it to become ready\");\n        tokio::spawn(\n            child_stdio_to_log::relay_process_output(proc.stdout.take(), proc.stderr.take())\n                .instrument(info_span!(\"postgres\")),\n        );\n\n        self.postgres_proc = Some(proc);\n        Ok(self.postgres_proc.as_ref().unwrap())\n    }\n\n    async fn shutdown(&mut self) -> Result<(), anyhow::Error> {\n        let proc: &mut tokio::process::Child = self.postgres_proc.as_mut().unwrap();\n        info!(\"shutdown postgres\");\n        nix::sys::signal::kill(\n            Pid::from_raw(i32::try_from(proc.id().unwrap()).expect(\"convert child pid to i32\")),\n            nix::sys::signal::SIGTERM,\n        )\n        .context(\"signal postgres to shut down\")?;\n        proc.wait()\n            .await\n            .context(\"wait for postgres to shut down\")\n            .map(|_| ())\n    }\n}\n\nasync fn wait_until_ready(connstring: String, create_dbname: String) {\n    // Create neondb database in the running postgres\n    let start_time = std::time::Instant::now();\n\n    loop {\n        if start_time.elapsed() > PG_WAIT_TIMEOUT {\n            error!(\n                \"timeout exceeded: failed to poll postgres and create database within 10 minutes\"\n            );\n            std::process::exit(1);\n        }\n\n        match tokio_postgres::connect(\n            &connstring.replace(\"dbname=neondb\", \"dbname=postgres\"),\n            tokio_postgres::NoTls,\n        )\n        .await\n        {\n            Ok((client, connection)) => {\n                // Spawn the connection handling task to maintain the connection\n                tokio::spawn(async move {\n                    if let Err(e) = connection.await {\n                        warn!(\"connection error: {}\", e);\n                    }\n                });\n\n                match client\n                    .simple_query(format!(\"CREATE DATABASE {create_dbname};\").as_str())\n                    .await\n                {\n                    Ok(_) => {\n                        info!(\"created {} database\", create_dbname);\n                        break;\n                    }\n                    Err(e) => {\n                        warn!(\n                            \"failed to create database: {}, retying in {}s\",\n                            e,\n                            PG_WAIT_RETRY_INTERVAL.as_secs_f32()\n                        );\n                        tokio::time::sleep(PG_WAIT_RETRY_INTERVAL).await;\n                        continue;\n                    }\n                }\n            }\n            Err(_) => {\n                info!(\n                    \"postgres not ready yet, retrying in {}s\",\n                    PG_WAIT_RETRY_INTERVAL.as_secs_f32()\n                );\n                tokio::time::sleep(PG_WAIT_RETRY_INTERVAL).await;\n                continue;\n            }\n        }\n    }\n}\n\nasync fn run_dump_restore(\n    workdir: Utf8PathBuf,\n    pg_bin_dir: Utf8PathBuf,\n    pg_lib_dir: Utf8PathBuf,\n    source_connstring: String,\n    destination_connstring: String,\n) -> Result<(), anyhow::Error> {\n    let dumpdir = workdir.join(\"dumpdir\");\n    let num_jobs = num_cpus::get().to_string();\n    info!(\"using {num_jobs} jobs for dump/restore\");\n\n    let common_args = [\n        // schema mapping (prob suffices to specify them on one side)\n        \"--no-owner\".to_string(),\n        \"--no-privileges\".to_string(),\n        \"--no-publications\".to_string(),\n        \"--no-security-labels\".to_string(),\n        \"--no-subscriptions\".to_string(),\n        \"--no-tablespaces\".to_string(),\n        \"--no-event-triggers\".to_string(),\n        // format\n        \"--format\".to_string(),\n        \"directory\".to_string(),\n        // concurrency\n        \"--jobs\".to_string(),\n        num_jobs,\n        // progress updates\n        \"--verbose\".to_string(),\n    ];\n\n    info!(\"dump into the working directory\");\n    {\n        let mut pg_dump = tokio::process::Command::new(pg_bin_dir.join(\"pg_dump\"))\n            .args(&common_args)\n            .arg(\"-f\")\n            .arg(&dumpdir)\n            .arg(\"--no-sync\")\n            // POSITIONAL args\n            // source db (db name included in connection string)\n            .arg(&source_connstring)\n            // how we run it\n            .env_clear()\n            .env(\"LD_LIBRARY_PATH\", &pg_lib_dir)\n            .env(\n                \"ASAN_OPTIONS\",\n                std::env::var(\"ASAN_OPTIONS\").unwrap_or_default(),\n            )\n            .env(\n                \"UBSAN_OPTIONS\",\n                std::env::var(\"UBSAN_OPTIONS\").unwrap_or_default(),\n            )\n            .kill_on_drop(true)\n            .stdout(std::process::Stdio::piped())\n            .stderr(std::process::Stdio::piped())\n            .spawn()\n            .context(\"spawn pg_dump\")?;\n\n        info!(pid=%pg_dump.id().unwrap(), \"spawned pg_dump\");\n\n        tokio::spawn(\n            child_stdio_to_log::relay_process_output(pg_dump.stdout.take(), pg_dump.stderr.take())\n                .instrument(info_span!(\"pg_dump\")),\n        );\n\n        let st = pg_dump.wait().await.context(\"wait for pg_dump\")?;\n        info!(status=?st, \"pg_dump exited\");\n        if !st.success() {\n            error!(status=%st, \"pg_dump failed, restore will likely fail as well\");\n            bail!(\"pg_dump failed\");\n        }\n    }\n\n    // TODO: maybe do it in a streaming way, plenty of internal research done on this already\n    // TODO: do the unlogged table trick\n    {\n        let mut pg_restore = tokio::process::Command::new(pg_bin_dir.join(\"pg_restore\"))\n            .args(&common_args)\n            .arg(\"-d\")\n            .arg(&destination_connstring)\n            // POSITIONAL args\n            .arg(&dumpdir)\n            // how we run it\n            .env_clear()\n            .env(\"LD_LIBRARY_PATH\", &pg_lib_dir)\n            .env(\n                \"ASAN_OPTIONS\",\n                std::env::var(\"ASAN_OPTIONS\").unwrap_or_default(),\n            )\n            .env(\n                \"UBSAN_OPTIONS\",\n                std::env::var(\"UBSAN_OPTIONS\").unwrap_or_default(),\n            )\n            .kill_on_drop(true)\n            .stdout(std::process::Stdio::piped())\n            .stderr(std::process::Stdio::piped())\n            .spawn()\n            .context(\"spawn pg_restore\")?;\n\n        info!(pid=%pg_restore.id().unwrap(), \"spawned pg_restore\");\n        tokio::spawn(\n            child_stdio_to_log::relay_process_output(\n                pg_restore.stdout.take(),\n                pg_restore.stderr.take(),\n            )\n            .instrument(info_span!(\"pg_restore\")),\n        );\n        let st = pg_restore.wait().await.context(\"wait for pg_restore\")?;\n        info!(status=?st, \"pg_restore exited\");\n        if !st.success() {\n            error!(status=%st, \"pg_restore failed, restore will likely fail as well\");\n            bail!(\"pg_restore failed\");\n        }\n    }\n\n    Ok(())\n}\n\n#[allow(clippy::too_many_arguments)]\nasync fn cmd_pgdata(\n    s3_client: Option<&aws_sdk_s3::Client>,\n    kms_client: Option<aws_sdk_kms::Client>,\n    maybe_s3_prefix: Option<s3_uri::S3Uri>,\n    maybe_spec: Option<Spec>,\n    source_connection_string: Option<String>,\n    interactive: bool,\n    pg_port: u16,\n    workdir: Utf8PathBuf,\n    pg_bin_dir: Utf8PathBuf,\n    pg_lib_dir: Utf8PathBuf,\n    num_cpus: Option<usize>,\n    memory_mb: Option<usize>,\n) -> Result<(), anyhow::Error> {\n    if maybe_spec.is_none() && source_connection_string.is_none() {\n        bail!(\"spec must be provided for pgdata command\");\n    }\n    if maybe_spec.is_some() && source_connection_string.is_some() {\n        bail!(\"only one of spec or source_connection_string can be provided\");\n    }\n\n    let source_connection_string = if let Some(spec) = maybe_spec {\n        match spec.encryption_secret {\n            EncryptionSecret::KMS { key_id } => {\n                decode_connstring(\n                    kms_client.as_ref().unwrap(),\n                    &key_id,\n                    spec.source_connstring_ciphertext_base64,\n                )\n                .await?\n            }\n        }\n    } else {\n        source_connection_string.unwrap()\n    };\n\n    let superuser = \"cloud_admin\";\n    let destination_connstring =\n        format!(\"host=localhost port={pg_port} user={superuser} dbname=neondb\");\n\n    let pgdata_dir = workdir.join(\"pgdata\");\n    let mut proc = PostgresProcess::new(pgdata_dir.clone(), pg_bin_dir.clone(), pg_lib_dir.clone());\n    let nproc = num_cpus.unwrap_or_else(num_cpus::get);\n    let memory_mb = memory_mb.unwrap_or(256);\n    proc.start(superuser, pg_port, nproc, memory_mb).await?;\n    wait_until_ready(destination_connstring.clone(), \"neondb\".to_string()).await;\n\n    run_dump_restore(\n        workdir.clone(),\n        pg_bin_dir,\n        pg_lib_dir,\n        source_connection_string,\n        destination_connstring,\n    )\n    .await?;\n\n    // If interactive mode, wait for Ctrl+C\n    if interactive {\n        info!(\"Running in interactive mode. Press Ctrl+C to shut down.\");\n        tokio::signal::ctrl_c().await.context(\"wait for ctrl-c\")?;\n    }\n\n    proc.shutdown().await?;\n\n    // Only sync if s3_prefix was specified\n    if let Some(s3_prefix) = maybe_s3_prefix {\n        info!(\"upload pgdata\");\n        aws_s3_sync::upload_dir_recursive(\n            s3_client.unwrap(),\n            Utf8Path::new(&pgdata_dir),\n            &s3_prefix.append(\"/pgdata/\"),\n        )\n        .await\n        .context(\"sync dump directory to destination\")?;\n\n        info!(\"write pgdata status to s3\");\n        {\n            let status_dir = workdir.join(\"status\");\n            std::fs::create_dir(&status_dir).context(\"create status directory\")?;\n            let status_file = status_dir.join(\"pgdata\");\n            std::fs::write(&status_file, serde_json::json!({\"done\": true}).to_string())\n                .context(\"write status file\")?;\n            aws_s3_sync::upload_dir_recursive(\n                s3_client.as_ref().unwrap(),\n                &status_dir,\n                &s3_prefix.append(\"/status/\"),\n            )\n            .await\n            .context(\"sync status directory to destination\")?;\n        }\n    }\n\n    Ok(())\n}\n\nasync fn cmd_dumprestore(\n    kms_client: Option<aws_sdk_kms::Client>,\n    maybe_spec: Option<Spec>,\n    source_connection_string: Option<String>,\n    destination_connection_string: Option<String>,\n    workdir: Utf8PathBuf,\n    pg_bin_dir: Utf8PathBuf,\n    pg_lib_dir: Utf8PathBuf,\n) -> Result<(), anyhow::Error> {\n    let (source_connstring, destination_connstring) = if let Some(spec) = maybe_spec {\n        match spec.encryption_secret {\n            EncryptionSecret::KMS { key_id } => {\n                let source = decode_connstring(\n                    kms_client.as_ref().unwrap(),\n                    &key_id,\n                    spec.source_connstring_ciphertext_base64,\n                )\n                .await\n                .context(\"decrypt source connection string\")?;\n\n                let dest = if let Some(dest_ciphertext) =\n                    spec.destination_connstring_ciphertext_base64\n                {\n                    decode_connstring(kms_client.as_ref().unwrap(), &key_id, dest_ciphertext)\n                        .await\n                        .context(\"decrypt destination connection string\")?\n                } else {\n                    bail!(\n                        \"destination connection string must be provided in spec for dump_restore command\"\n                    );\n                };\n\n                (source, dest)\n            }\n        }\n    } else {\n        (\n            source_connection_string.unwrap(),\n            if let Some(val) = destination_connection_string {\n                val\n            } else {\n                bail!(\"destination connection string must be provided for dump_restore command\");\n            },\n        )\n    };\n\n    run_dump_restore(\n        workdir,\n        pg_bin_dir,\n        pg_lib_dir,\n        source_connstring,\n        destination_connstring,\n    )\n    .await\n}\n\n#[tokio::main]\npub(crate) async fn main() -> anyhow::Result<()> {\n    utils::logging::init(\n        utils::logging::LogFormat::Json,\n        utils::logging::TracingErrorLayerEnablement::EnableWithRustLogFilter,\n        utils::logging::Output::Stdout,\n    )?;\n\n    info!(\"starting\");\n\n    let args = Args::parse();\n\n    // Initialize AWS clients only if s3_prefix is specified\n    let (s3_client, kms_client) = if args.s3_prefix.is_some() {\n        // Create AWS config with enhanced retry settings\n        let config = aws_config::defaults(BehaviorVersion::v2024_03_28())\n            .retry_config(\n                aws_config::retry::RetryConfig::standard()\n                    .with_max_attempts(5) // Retry up to 5 times\n                    .with_initial_backoff(std::time::Duration::from_millis(200)) // Start with 200ms delay\n                    .with_max_backoff(std::time::Duration::from_secs(5)), // Cap at 5 seconds\n            )\n            .load()\n            .await;\n\n        // Create clients from the config with enhanced retry settings\n        let s3_client = aws_sdk_s3::Client::new(&config);\n        let kms = aws_sdk_kms::Client::new(&config);\n        (Some(s3_client), Some(kms))\n    } else {\n        (None, None)\n    };\n\n    // Capture everything from spec assignment onwards to handle errors\n    let res = async {\n        let spec: Option<Spec> = if let Some(s3_prefix) = &args.s3_prefix {\n            let spec_key = s3_prefix.append(\"/spec.json\");\n            let object = s3_client\n                .as_ref()\n                .unwrap()\n                .get_object()\n                .bucket(&spec_key.bucket)\n                .key(spec_key.key)\n                .send()\n                .await\n                .context(\"get spec from s3\")?\n                .body\n                .collect()\n                .await\n                .context(\"download spec body\")?;\n            serde_json::from_slice(&object.into_bytes()).context(\"parse spec as json\")?\n        } else {\n            None\n        };\n\n        match tokio::fs::create_dir(&args.working_directory).await {\n            Ok(()) => {}\n            Err(e) if e.kind() == std::io::ErrorKind::AlreadyExists => {\n                if !is_directory_empty(&args.working_directory)\n                    .await\n                    .context(\"check if working directory is empty\")?\n                {\n                    bail!(\"working directory is not empty\");\n                } else {\n                    // ok\n                }\n            }\n            Err(e) => return Err(anyhow::Error::new(e).context(\"create working directory\")),\n        }\n\n        match args.command.clone() {\n            Command::Pgdata {\n                source_connection_string,\n                interactive,\n                pg_port,\n                num_cpus,\n                memory_mb,\n            } => {\n                cmd_pgdata(\n                    s3_client.as_ref(),\n                    kms_client,\n                    args.s3_prefix.clone(),\n                    spec,\n                    source_connection_string,\n                    interactive,\n                    pg_port,\n                    args.working_directory.clone(),\n                    args.pg_bin_dir,\n                    args.pg_lib_dir,\n                    num_cpus,\n                    memory_mb,\n                )\n                .await\n            }\n            Command::DumpRestore {\n                source_connection_string,\n                destination_connection_string,\n            } => {\n                cmd_dumprestore(\n                    kms_client,\n                    spec,\n                    source_connection_string,\n                    destination_connection_string,\n                    args.working_directory.clone(),\n                    args.pg_bin_dir,\n                    args.pg_lib_dir,\n                )\n                .await\n            }\n        }\n    }\n    .await;\n\n    if let Some(s3_prefix) = args.s3_prefix {\n        info!(\"write job status to s3\");\n        {\n            let status_dir = args.working_directory.join(\"status\");\n            if std::fs::exists(&status_dir)?.not() {\n                std::fs::create_dir(&status_dir).context(\"create status directory\")?;\n            }\n            let status_file = status_dir.join(\"fast_import\");\n            let res_obj = match res {\n                Ok(_) => serde_json::json!({\"command\": args.command.as_str(), \"done\": true}),\n                Err(err) => {\n                    serde_json::json!({\"command\": args.command.as_str(), \"done\": false, \"error\": err.to_string()})\n                }\n            };\n            std::fs::write(&status_file, res_obj.to_string()).context(\"write status file\")?;\n            aws_s3_sync::upload_dir_recursive(\n                s3_client.as_ref().unwrap(),\n                &status_dir,\n                &s3_prefix.append(\"/status/\"),\n            )\n            .await\n            .context(\"sync status directory to destination\")?;\n        }\n    }\n\n    Ok(())\n}\n"
  },
  {
    "path": "compute_tools/src/catalog.rs",
    "content": "use std::path::Path;\nuse std::process::Stdio;\nuse std::result::Result;\nuse std::sync::Arc;\n\nuse compute_api::responses::CatalogObjects;\nuse futures::Stream;\nuse postgres::NoTls;\nuse tokio::io::{AsyncBufReadExt, BufReader};\nuse tokio::process::Command;\nuse tokio::spawn;\nuse tokio_stream::{self as stream, StreamExt};\nuse tokio_util::codec::{BytesCodec, FramedRead};\nuse tracing::warn;\n\nuse crate::compute::ComputeNode;\nuse crate::pg_helpers::{get_existing_dbs_async, get_existing_roles_async, postgres_conf_for_db};\n\npub async fn get_dbs_and_roles(compute: &Arc<ComputeNode>) -> anyhow::Result<CatalogObjects> {\n    let conf = compute.get_tokio_conn_conf(Some(\"compute_ctl:get_dbs_and_roles\"));\n    let (client, connection): (tokio_postgres::Client, _) = conf.connect(NoTls).await?;\n\n    spawn(async move {\n        if let Err(e) = connection.await {\n            eprintln!(\"connection error: {e}\");\n        }\n    });\n\n    let roles = get_existing_roles_async(&client).await?;\n\n    let databases = get_existing_dbs_async(&client)\n        .await?\n        .into_values()\n        .collect();\n\n    Ok(CatalogObjects { roles, databases })\n}\n\n#[derive(Debug, thiserror::Error)]\npub enum SchemaDumpError {\n    #[error(\"database does not exist\")]\n    DatabaseDoesNotExist,\n    #[error(\"failed to execute pg_dump\")]\n    IO(#[from] std::io::Error),\n    #[error(\"unexpected I/O error\")]\n    Unexpected,\n}\n\n// It uses the pg_dump utility to dump the schema of the specified database.\n// The output is streamed back to the caller and supposed to be streamed via HTTP.\n//\n// Before return the result with the output, it checks that pg_dump produced any output.\n// If not, it tries to parse the stderr output to determine if the database does not exist\n// and special error is returned.\n//\n// To make sure that the process is killed when the caller drops the stream, we use tokio kill_on_drop feature.\npub async fn get_database_schema(\n    compute: &Arc<ComputeNode>,\n    dbname: &str,\n) -> Result<impl Stream<Item = Result<bytes::Bytes, std::io::Error>> + use<>, SchemaDumpError> {\n    let pgbin = &compute.params.pgbin;\n    let basepath = Path::new(pgbin).parent().unwrap();\n    let pgdump = basepath.join(\"pg_dump\");\n\n    // Replace the DB in the connection string and disable it to parts.\n    // This is the only option to handle DBs with special characters.\n    let conf = postgres_conf_for_db(&compute.params.connstr, dbname)\n        .map_err(|_| SchemaDumpError::Unexpected)?;\n    let host = conf\n        .get_hosts()\n        .first()\n        .ok_or(SchemaDumpError::Unexpected)?;\n    let host = match host {\n        tokio_postgres::config::Host::Tcp(ip) => ip.to_string(),\n        #[cfg(unix)]\n        tokio_postgres::config::Host::Unix(path) => path.to_string_lossy().to_string(),\n    };\n    let port = conf\n        .get_ports()\n        .first()\n        .ok_or(SchemaDumpError::Unexpected)?;\n    let user = conf.get_user().ok_or(SchemaDumpError::Unexpected)?;\n    let dbname = conf.get_dbname().ok_or(SchemaDumpError::Unexpected)?;\n\n    let mut cmd = Command::new(pgdump)\n        // XXX: this seems to be the only option to deal with DBs with `=` in the name\n        // See <https://www.postgresql.org/message-id/flat/20151023003445.931.91267%40wrigleys.postgresql.org>\n        .env(\"PGDATABASE\", dbname)\n        .arg(\"--host\")\n        .arg(host)\n        .arg(\"--port\")\n        .arg(port.to_string())\n        .arg(\"--username\")\n        .arg(user)\n        .arg(\"--schema-only\")\n        .stdout(Stdio::piped())\n        .stderr(Stdio::piped())\n        .kill_on_drop(true)\n        .spawn()?;\n\n    let stdout = cmd\n        .stdout\n        .take()\n        .ok_or_else(|| std::io::Error::other(\"Failed to capture stdout.\"))?;\n\n    let stderr = cmd\n        .stderr\n        .take()\n        .ok_or_else(|| std::io::Error::other(\"Failed to capture stderr.\"))?;\n\n    let mut stdout_reader = FramedRead::new(stdout, BytesCodec::new());\n    let stderr_reader = BufReader::new(stderr);\n\n    let first_chunk = match stdout_reader.next().await {\n        Some(Ok(bytes)) if !bytes.is_empty() => bytes,\n        Some(Err(e)) => {\n            return Err(SchemaDumpError::IO(e));\n        }\n        _ => {\n            let mut lines = stderr_reader.lines();\n            if let Some(line) = lines.next_line().await? {\n                if line.contains(&format!(\"FATAL:  database \\\"{dbname}\\\" does not exist\")) {\n                    return Err(SchemaDumpError::DatabaseDoesNotExist);\n                }\n                warn!(\"pg_dump stderr: {}\", line)\n            }\n            tokio::spawn(async move {\n                while let Ok(Some(line)) = lines.next_line().await {\n                    warn!(\"pg_dump stderr: {}\", line)\n                }\n            });\n\n            return Err(SchemaDumpError::IO(std::io::Error::other(\n                \"failed to start pg_dump\",\n            )));\n        }\n    };\n    let initial_stream = stream::once(Ok(first_chunk.freeze()));\n    // Consume stderr and log warnings\n    tokio::spawn(async move {\n        let mut lines = stderr_reader.lines();\n        while let Ok(Some(line)) = lines.next_line().await {\n            warn!(\"pg_dump stderr: {}\", line)\n        }\n    });\n\n    #[allow(dead_code)]\n    struct SchemaStream<S> {\n        // We keep a reference to the child process to ensure it stays alive\n        // while the stream is being consumed. When SchemaStream is dropped,\n        // cmd will be dropped, which triggers kill_on_drop and terminates pg_dump\n        cmd: tokio::process::Child,\n        stream: S,\n    }\n\n    impl<S> Stream for SchemaStream<S>\n    where\n        S: Stream<Item = Result<bytes::Bytes, std::io::Error>> + Unpin,\n    {\n        type Item = Result<bytes::Bytes, std::io::Error>;\n\n        fn poll_next(\n            mut self: std::pin::Pin<&mut Self>,\n            cx: &mut std::task::Context<'_>,\n        ) -> std::task::Poll<Option<Self::Item>> {\n            Stream::poll_next(std::pin::Pin::new(&mut self.stream), cx)\n        }\n    }\n\n    let schema_stream = SchemaStream {\n        cmd,\n        stream: initial_stream.chain(stdout_reader.map(|res| res.map(|b| b.freeze()))),\n    };\n\n    Ok(schema_stream)\n}\n"
  },
  {
    "path": "compute_tools/src/checker.rs",
    "content": "use anyhow::{Ok, Result, anyhow};\nuse tokio_postgres::NoTls;\nuse tracing::{error, instrument, warn};\n\nuse crate::compute::ComputeNode;\n\n/// Update timestamp in a row in a special service table to check\n/// that we can actually write some data in this particular timeline.\n#[instrument(skip_all)]\npub async fn check_writability(compute: &ComputeNode) -> Result<()> {\n    // Connect to the database.\n    let conf = compute.get_tokio_conn_conf(Some(\"compute_ctl:availability_checker\"));\n    let (client, connection) = conf.connect(NoTls).await?;\n    if client.is_closed() {\n        return Err(anyhow!(\"connection to postgres closed\"));\n    }\n\n    // The connection object performs the actual communication with the database,\n    // so spawn it off to run on its own.\n    tokio::spawn(async move {\n        if let Err(e) = connection.await {\n            error!(\"connection error: {}\", e);\n        }\n    });\n\n    let query = \"\n    INSERT INTO public.health_check VALUES (1, pg_catalog.now())\n        ON CONFLICT (id) DO UPDATE\n         SET updated_at = pg_catalog.now();\";\n\n    match client.simple_query(query).await {\n        Result::Ok(result) => {\n            if result.len() != 1 {\n                return Err(anyhow::anyhow!(\n                    \"expected 1 query results, but got {}\",\n                    result.len()\n                ));\n            }\n        }\n        Err(err) => {\n            if let Some(state) = err.code() {\n                if state == &tokio_postgres::error::SqlState::DISK_FULL {\n                    warn!(\"Tenant disk is full\");\n                    return Ok(());\n                }\n            }\n            return Err(err.into());\n        }\n    }\n\n    Ok(())\n}\n"
  },
  {
    "path": "compute_tools/src/communicator_socket_client.rs",
    "content": "//! Client for making request to a running Postgres server's communicator control socket.\n//!\n//! The storage communicator process that runs inside Postgres exposes an HTTP endpoint in\n//! a Unix Domain Socket in the Postgres data directory. This provides access to it.\n\nuse std::path::Path;\n\nuse anyhow::Context;\nuse hyper::client::conn::http1::SendRequest;\nuse hyper_util::rt::TokioIo;\n\n/// Name of the socket within the Postgres data directory. This better match that in\n/// `pgxn/neon/communicator/src/lib.rs`.\nconst NEON_COMMUNICATOR_SOCKET_NAME: &str = \"neon-communicator.socket\";\n\n/// Open a connection to the communicator's control socket, prepare to send requests to it\n/// with hyper.\npub async fn connect_communicator_socket<B>(pgdata: &Path) -> anyhow::Result<SendRequest<B>>\nwhere\n    B: hyper::body::Body + 'static + Send,\n    B::Data: Send,\n    B::Error: Into<Box<dyn std::error::Error + Send + Sync>>,\n{\n    let socket_path = pgdata.join(NEON_COMMUNICATOR_SOCKET_NAME);\n    let socket_path_len = socket_path.display().to_string().len();\n\n    // There is a limit of around 100 bytes (108 on Linux?) on the length of the path to a\n    // Unix Domain socket. The limit is on the connect(2) function used to open the\n    // socket, not on the absolute path itself. Postgres changes the current directory to\n    // the data directory and uses a relative path to bind to the socket, and the relative\n    // path \"./neon-communicator.socket\" is always short, but when compute_ctl needs to\n    // open the socket, we need to use a full path, which can be arbitrarily long.\n    //\n    // There are a few ways we could work around this:\n    //\n    // 1. Change the current directory to the Postgres data directory and use a relative\n    //    path in the connect(2) call. That's problematic because the current directory\n    //    applies to the whole process. We could change the current directory early in\n    //    compute_ctl startup, and that might be a good idea anyway for other reasons too:\n    //    it would be more robust if the data directory is moved around or unlinked for\n    //    some reason, and you would be less likely to accidentally litter other parts of\n    //    the filesystem with e.g. temporary files. However, that's a pretty invasive\n    //    change.\n    //\n    // 2. On Linux, you could open() the data directory, and refer to the the socket\n    //    inside it as \"/proc/self/fd/<fd>/neon-communicator.socket\". But that's\n    //    Linux-only.\n    //\n    // 3. Create a symbolic link to the socket with a shorter path, and use that.\n    //\n    // We use the symbolic link approach here. Hopefully the paths we use in production\n    // are shorter, so that we can open the socket directly, so that this hack is needed\n    // only in development.\n    let connect_result = if socket_path_len < 100 {\n        // We can open the path directly with no hacks.\n        tokio::net::UnixStream::connect(socket_path).await\n    } else {\n        // The path to the socket is too long. Create a symlink to it with a shorter path.\n        let short_path = std::env::temp_dir().join(format!(\n            \"compute_ctl.short-socket.{}.{}\",\n            std::process::id(),\n            tokio::task::id()\n        ));\n        std::os::unix::fs::symlink(&socket_path, &short_path)?;\n\n        // Delete the symlink as soon as we have connected to it. There's a small chance\n        // of leaking if the process dies before we remove it, so try to keep that window\n        // as small as possible.\n        scopeguard::defer! {\n            if let Err(err) = std::fs::remove_file(&short_path) {\n                tracing::warn!(\"could not remove symlink \\\"{}\\\" created for socket: {}\",\n                               short_path.display(), err);\n            }\n        }\n\n        tracing::info!(\n            \"created symlink \\\"{}\\\" for socket \\\"{}\\\", opening it now\",\n            short_path.display(),\n            socket_path.display()\n        );\n\n        tokio::net::UnixStream::connect(&short_path).await\n    };\n\n    let stream = connect_result.context(\"connecting to communicator control socket\")?;\n\n    let io = TokioIo::new(stream);\n    let (request_sender, connection) = hyper::client::conn::http1::handshake(io).await?;\n\n    // spawn a task to poll the connection and drive the HTTP state\n    tokio::spawn(async move {\n        if let Err(err) = connection.await {\n            eprintln!(\"Error in connection: {err}\");\n        }\n    });\n\n    Ok(request_sender)\n}\n"
  },
  {
    "path": "compute_tools/src/compute.rs",
    "content": "use anyhow::{Context, Result};\nuse chrono::{DateTime, Utc};\nuse compute_api::privilege::Privilege;\nuse compute_api::responses::{\n    ComputeConfig, ComputeCtlConfig, ComputeMetrics, ComputeStatus, LfcOffloadState,\n    LfcPrewarmState, PromoteState, TlsConfig,\n};\nuse compute_api::spec::{\n    ComputeAudit, ComputeFeature, ComputeMode, ComputeSpec, ExtVersion, GenericOption,\n    PageserverConnectionInfo, PageserverProtocol, PgIdent, Role,\n};\nuse futures::StreamExt;\nuse futures::future::join_all;\nuse futures::stream::FuturesUnordered;\nuse itertools::Itertools;\nuse nix::sys::signal::{Signal, kill};\nuse nix::unistd::Pid;\nuse once_cell::sync::Lazy;\nuse pageserver_page_api::{self as page_api, BaseBackupCompression};\nuse postgres;\nuse postgres::NoTls;\nuse postgres::error::SqlState;\nuse remote_storage::{DownloadError, RemotePath};\nuse std::collections::{HashMap, HashSet};\nuse std::ffi::OsString;\nuse std::os::unix::fs::{PermissionsExt, symlink};\nuse std::path::Path;\nuse std::process::{Command, Stdio};\nuse std::str::FromStr;\nuse std::sync::atomic::{AtomicU32, AtomicU64, Ordering};\nuse std::sync::{Arc, Condvar, Mutex, RwLock};\nuse std::time::{Duration, Instant};\nuse std::{env, fs};\nuse tokio::{spawn, sync::watch, task::JoinHandle, time};\nuse tokio_util::sync::CancellationToken;\nuse tracing::{Instrument, debug, error, info, instrument, warn};\nuse url::Url;\nuse utils::backoff::{\n    DEFAULT_BASE_BACKOFF_SECONDS, DEFAULT_MAX_BACKOFF_SECONDS, exponential_backoff_duration,\n};\nuse utils::id::{TenantId, TimelineId};\nuse utils::lsn::Lsn;\nuse utils::measured_stream::MeasuredReader;\nuse utils::pid_file;\nuse utils::shard::{ShardIndex, ShardNumber, ShardStripeSize};\n\nuse crate::configurator::launch_configurator;\nuse crate::disk_quota::set_disk_quota;\nuse crate::hadron_metrics::COMPUTE_ATTACHED;\nuse crate::installed_extensions::get_installed_extensions;\nuse crate::logger::{self, startup_context_from_env};\nuse crate::lsn_lease::launch_lsn_lease_bg_task_for_static;\nuse crate::metrics::COMPUTE_CTL_UP;\nuse crate::monitor::launch_monitor;\nuse crate::pg_helpers::*;\nuse crate::pgbouncer::*;\nuse crate::rsyslog::{\n    PostgresLogsRsyslogConfig, configure_audit_rsyslog, configure_postgres_logs_export,\n    launch_pgaudit_gc,\n};\nuse crate::spec::*;\nuse crate::swap::resize_swap;\nuse crate::sync_sk::{check_if_synced, ping_safekeeper};\nuse crate::tls::watch_cert_for_changes;\nuse crate::{config, extension_server, local_proxy};\n\npub static SYNC_SAFEKEEPERS_PID: AtomicU32 = AtomicU32::new(0);\npub static PG_PID: AtomicU32 = AtomicU32::new(0);\n// This is an arbitrary build tag. Fine as a default / for testing purposes\n// in-case of not-set environment var\nconst BUILD_TAG_DEFAULT: &str = \"latest\";\n/// Build tag/version of the compute node binaries/image. It's tricky and ugly\n/// to pass it everywhere as a part of `ComputeNodeParams`, so we use a\n/// global static variable.\npub static BUILD_TAG: Lazy<String> = Lazy::new(|| {\n    option_env!(\"BUILD_TAG\")\n        .unwrap_or(BUILD_TAG_DEFAULT)\n        .to_string()\n});\nconst DEFAULT_INSTALLED_EXTENSIONS_COLLECTION_INTERVAL: u64 = 3600;\n\n/// Static configuration params that don't change after startup. These mostly\n/// come from the CLI args, or are derived from them.\n#[derive(Clone, Debug)]\npub struct ComputeNodeParams {\n    /// The ID of the compute\n    pub compute_id: String,\n\n    /// Url type maintains proper escaping\n    pub connstr: url::Url,\n\n    /// The name of the 'weak' superuser role, which we give to the users.\n    /// It follows the allow list approach, i.e., we take a standard role\n    /// and grant it extra permissions with explicit GRANTs here and there,\n    /// and core patches.\n    pub privileged_role_name: String,\n\n    pub resize_swap_on_bind: bool,\n    pub set_disk_quota_for_fs: Option<String>,\n\n    // VM monitor parameters\n    #[cfg(target_os = \"linux\")]\n    pub filecache_connstr: String,\n    #[cfg(target_os = \"linux\")]\n    pub cgroup: String,\n    #[cfg(target_os = \"linux\")]\n    pub vm_monitor_addr: String,\n\n    pub pgdata: String,\n    pub pgbin: String,\n    pub pgversion: String,\n\n    /// The port that the compute's external HTTP server listens on\n    pub external_http_port: u16,\n    /// The port that the compute's internal HTTP server listens on\n    pub internal_http_port: u16,\n\n    /// the address of extension storage proxy gateway\n    pub remote_ext_base_url: Option<Url>,\n\n    /// Interval for installed extensions collection\n    pub installed_extensions_collection_interval: Arc<AtomicU64>,\n    /// Hadron instance ID of the compute node.\n    pub instance_id: Option<String>,\n    /// Timeout of PG compute startup in the Init state.\n    pub pg_init_timeout: Option<Duration>,\n    // Path to the `pg_isready` binary.\n    pub pg_isready_bin: String,\n    pub lakebase_mode: bool,\n\n    pub build_tag: String,\n    pub control_plane_uri: Option<String>,\n    pub config_path_test_only: Option<OsString>,\n}\n\ntype TaskHandle = Mutex<Option<JoinHandle<()>>>;\n\n/// Compute node info shared across several `compute_ctl` threads.\npub struct ComputeNode {\n    pub params: ComputeNodeParams,\n\n    // We connect to Postgres from many different places, so build configs once\n    // and reuse them where needed. These are derived from 'params.connstr'\n    pub conn_conf: postgres::config::Config,\n    pub tokio_conn_conf: tokio_postgres::config::Config,\n\n    /// Volatile part of the `ComputeNode`, which should be used under `Mutex`.\n    /// To allow HTTP API server to serving status requests, while configuration\n    /// is in progress, lock should be held only for short periods of time to do\n    /// read/write, not the whole configuration process.\n    pub state: Mutex<ComputeState>,\n    /// `Condvar` to allow notifying waiters about state changes.\n    pub state_changed: Condvar,\n\n    // key: ext_archive_name, value: started download time, download_completed?\n    pub ext_download_progress: RwLock<HashMap<String, (DateTime<Utc>, bool)>>,\n    pub compute_ctl_config: ComputeCtlConfig,\n\n    /// Handle to the extension stats collection task\n    extension_stats_task: TaskHandle,\n    lfc_offload_task: TaskHandle,\n}\n\n// store some metrics about download size that might impact startup time\n#[derive(Clone, Debug)]\npub struct RemoteExtensionMetrics {\n    num_ext_downloaded: u64,\n    largest_ext_size: u64,\n    total_ext_download_size: u64,\n}\n\n#[derive(Clone, Debug)]\npub struct ComputeState {\n    pub start_time: DateTime<Utc>,\n    pub pg_start_time: Option<DateTime<Utc>>,\n    pub status: ComputeStatus,\n    /// Timestamp of the last Postgres activity. It could be `None` if\n    /// compute wasn't used since start.\n    pub last_active: Option<DateTime<Utc>>,\n    pub error: Option<String>,\n\n    /// Compute spec. This can be received from the CLI or - more likely -\n    /// passed by the control plane with a /configure HTTP request.\n    pub pspec: Option<ParsedSpec>,\n\n    /// If the spec is passed by a /configure request, 'startup_span' is the\n    /// /configure request's tracing span. The main thread enters it when it\n    /// processes the compute startup, so that the compute startup is considered\n    /// to be part of the /configure request for tracing purposes.\n    ///\n    /// If the request handling thread/task called startup_compute() directly,\n    /// it would automatically be a child of the request handling span, and we\n    /// wouldn't need this. But because we use the main thread to perform the\n    /// startup, and the /configure task just waits for it to finish, we need to\n    /// set up the span relationship ourselves.\n    pub startup_span: Option<tracing::span::Span>,\n\n    pub lfc_prewarm_state: LfcPrewarmState,\n    pub lfc_prewarm_token: CancellationToken,\n    pub lfc_offload_state: LfcOffloadState,\n\n    /// WAL flush LSN that is set after terminating Postgres and syncing safekeepers if\n    /// mode == ComputeMode::Primary. None otherwise\n    pub terminate_flush_lsn: Option<Lsn>,\n    pub promote_state: Option<watch::Receiver<PromoteState>>,\n\n    pub metrics: ComputeMetrics,\n}\n\nimpl ComputeState {\n    pub fn new() -> Self {\n        Self {\n            start_time: Utc::now(),\n            pg_start_time: None,\n            status: ComputeStatus::Empty,\n            last_active: None,\n            error: None,\n            pspec: None,\n            startup_span: None,\n            metrics: ComputeMetrics::default(),\n            lfc_prewarm_state: LfcPrewarmState::default(),\n            lfc_offload_state: LfcOffloadState::default(),\n            terminate_flush_lsn: None,\n            promote_state: None,\n            lfc_prewarm_token: CancellationToken::new(),\n        }\n    }\n\n    pub fn set_status(&mut self, status: ComputeStatus, state_changed: &Condvar) {\n        let prev = self.status;\n        info!(\"Changing compute status from {} to {}\", prev, status);\n        self.status = status;\n        state_changed.notify_all();\n\n        COMPUTE_CTL_UP.reset();\n        COMPUTE_CTL_UP\n            .with_label_values(&[&BUILD_TAG, status.to_string().as_str()])\n            .set(1);\n    }\n\n    pub fn set_failed_status(&mut self, err: anyhow::Error, state_changed: &Condvar) {\n        self.error = Some(format!(\"{err:?}\"));\n        self.set_status(ComputeStatus::Failed, state_changed);\n    }\n}\n\nimpl Default for ComputeState {\n    fn default() -> Self {\n        Self::new()\n    }\n}\n\n#[derive(Clone, Debug)]\npub struct ParsedSpec {\n    pub spec: ComputeSpec,\n    pub tenant_id: TenantId,\n    pub timeline_id: TimelineId,\n    pub pageserver_conninfo: PageserverConnectionInfo,\n    pub safekeeper_connstrings: Vec<String>,\n    pub storage_auth_token: Option<String>,\n    /// k8s dns name and port\n    pub endpoint_storage_addr: Option<String>,\n    pub endpoint_storage_token: Option<String>,\n}\n\nimpl ParsedSpec {\n    pub fn validate(&self) -> Result<(), String> {\n        // Only Primary nodes are using safekeeper_connstrings, and at the moment\n        // this method only validates that part of the specs.\n        if self.spec.mode != ComputeMode::Primary {\n            return Ok(());\n        }\n\n        // While it seems like a good idea to check for an odd number of entries in\n        // the safekeepers connection string, changes to the list of safekeepers might\n        // incur appending a new server to a list of 3, in which case a list of 4\n        // entries is okay in production.\n        //\n        // Still we want unique entries, and at least one entry in the vector\n        if self.safekeeper_connstrings.is_empty() {\n            return Err(String::from(\"safekeeper_connstrings is empty\"));\n        }\n\n        // check for uniqueness of the connection strings in the set\n        let mut connstrings = self.safekeeper_connstrings.clone();\n\n        connstrings.sort();\n        let mut previous = &connstrings[0];\n\n        for current in connstrings.iter().skip(1) {\n            // duplicate entry?\n            if current == previous {\n                return Err(format!(\n                    \"duplicate entry in safekeeper_connstrings: {current}!\",\n                ));\n            }\n\n            previous = current;\n        }\n\n        Ok(())\n    }\n}\n\nimpl TryFrom<ComputeSpec> for ParsedSpec {\n    type Error = anyhow::Error;\n    fn try_from(spec: ComputeSpec) -> Result<Self, anyhow::Error> {\n        // Extract the options from the spec file that are needed to connect to\n        // the storage system.\n        //\n        // In compute specs generated by old control plane versions, the spec file might\n        // be missing the `pageserver_connection_info` field. In that case, we need to dig\n        // the pageserver connection info from the `pageserver_connstr` field instead, or\n        // if that's missing too, from the GUC in the cluster.settings field.\n        let mut pageserver_conninfo = spec.pageserver_connection_info.clone();\n        if pageserver_conninfo.is_none() {\n            if let Some(pageserver_connstr_field) = &spec.pageserver_connstring {\n                pageserver_conninfo = Some(PageserverConnectionInfo::from_connstr(\n                    pageserver_connstr_field,\n                    spec.shard_stripe_size,\n                )?);\n            }\n        }\n        if pageserver_conninfo.is_none() {\n            if let Some(guc) = spec.cluster.settings.find(\"neon.pageserver_connstring\") {\n                let stripe_size = if let Some(guc) = spec.cluster.settings.find(\"neon.stripe_size\")\n                {\n                    Some(ShardStripeSize(u32::from_str(&guc)?))\n                } else {\n                    None\n                };\n                pageserver_conninfo =\n                    Some(PageserverConnectionInfo::from_connstr(&guc, stripe_size)?);\n            }\n        }\n        let pageserver_conninfo = pageserver_conninfo.ok_or(anyhow::anyhow!(\n            \"pageserver connection information should be provided\"\n        ))?;\n\n        // Similarly for safekeeper connection strings\n        let safekeeper_connstrings = if spec.safekeeper_connstrings.is_empty() {\n            if matches!(spec.mode, ComputeMode::Primary) {\n                spec.cluster\n                    .settings\n                    .find(\"neon.safekeepers\")\n                    .ok_or(anyhow::anyhow!(\"safekeeper connstrings should be provided\"))?\n                    .split(',')\n                    .map(|str| str.to_string())\n                    .collect()\n            } else {\n                vec![]\n            }\n        } else {\n            spec.safekeeper_connstrings.clone()\n        };\n\n        let storage_auth_token = spec.storage_auth_token.clone();\n        let tenant_id: TenantId = if let Some(tenant_id) = spec.tenant_id {\n            tenant_id\n        } else {\n            let guc = spec\n                .cluster\n                .settings\n                .find(\"neon.tenant_id\")\n                .ok_or(anyhow::anyhow!(\"tenant id should be provided\"))?;\n            TenantId::from_str(&guc).context(\"invalid tenant id\")?\n        };\n        let timeline_id: TimelineId = if let Some(timeline_id) = spec.timeline_id {\n            timeline_id\n        } else {\n            let guc = spec\n                .cluster\n                .settings\n                .find(\"neon.timeline_id\")\n                .ok_or(anyhow::anyhow!(\"timeline id should be provided\"))?;\n            TimelineId::from_str(&guc).context(anyhow::anyhow!(\"invalid timeline id\"))?\n        };\n\n        let endpoint_storage_addr: Option<String> = spec\n            .endpoint_storage_addr\n            .clone()\n            .or_else(|| spec.cluster.settings.find(\"neon.endpoint_storage_addr\"));\n        let endpoint_storage_token = spec\n            .endpoint_storage_token\n            .clone()\n            .or_else(|| spec.cluster.settings.find(\"neon.endpoint_storage_token\"));\n\n        let res = ParsedSpec {\n            spec,\n            pageserver_conninfo,\n            safekeeper_connstrings,\n            storage_auth_token,\n            tenant_id,\n            timeline_id,\n            endpoint_storage_addr,\n            endpoint_storage_token,\n        };\n\n        // Now check validity of the parsed specification\n        res.validate().map_err(anyhow::Error::msg)?;\n        Ok(res)\n    }\n}\n\n/// If we are a VM, returns a [`Command`] that will run in the `neon-postgres`\n/// cgroup. Otherwise returns the default `Command::new(cmd)`\n///\n/// This function should be used to start postgres, as it will start it in the\n/// neon-postgres cgroup if we are a VM. This allows autoscaling to control\n/// postgres' resource usage. The cgroup will exist in VMs because vm-builder\n/// creates it during the sysinit phase of its inittab.\nfn maybe_cgexec(cmd: &str) -> Command {\n    // The cplane sets this env var for autoscaling computes.\n    // use `var_os` so we don't have to worry about the variable being valid\n    // unicode. Should never be an concern . . . but just in case\n    if env::var_os(\"AUTOSCALING\").is_some() {\n        let mut command = Command::new(\"cgexec\");\n        command.args([\"-g\", \"memory:neon-postgres\"]);\n        command.arg(cmd);\n        command\n    } else {\n        Command::new(cmd)\n    }\n}\n\nstruct PostgresHandle {\n    postgres: std::process::Child,\n    log_collector: JoinHandle<Result<()>>,\n}\n\nimpl PostgresHandle {\n    /// Return PID of the postgres (postmaster) process\n    fn pid(&self) -> Pid {\n        Pid::from_raw(self.postgres.id() as i32)\n    }\n}\n\nstruct StartVmMonitorResult {\n    #[cfg(target_os = \"linux\")]\n    token: tokio_util::sync::CancellationToken,\n    #[cfg(target_os = \"linux\")]\n    vm_monitor: Option<JoinHandle<Result<()>>>,\n}\n\n// BEGIN_HADRON\n/// This function creates roles that are used by Databricks.\n/// These roles are not needs to be botostrapped at PG Compute provisioning time.\n/// The auth method for these roles are configured in databricks_pg_hba.conf in universe repository.\npub(crate) fn create_databricks_roles() -> Vec<String> {\n    let roles = vec![\n        // Role for prometheus_stats_exporter\n        Role {\n            name: \"databricks_monitor\".to_string(),\n            // This uses \"local\" connection and auth method for that is \"trust\", so no password is needed.\n            encrypted_password: None,\n            options: Some(vec![GenericOption {\n                name: \"IN ROLE pg_monitor\".to_string(),\n                value: None,\n                vartype: \"string\".to_string(),\n            }]),\n        },\n        // Role for brickstore control plane\n        Role {\n            name: \"databricks_control_plane\".to_string(),\n            // Certificate user does not need password.\n            encrypted_password: None,\n            options: Some(vec![GenericOption {\n                name: \"SUPERUSER\".to_string(),\n                value: None,\n                vartype: \"string\".to_string(),\n            }]),\n        },\n        // Role for brickstore httpgateway.\n        Role {\n            name: \"databricks_gateway\".to_string(),\n            // Certificate user does not need password.\n            encrypted_password: None,\n            options: None,\n        },\n    ];\n\n    roles\n        .into_iter()\n        .map(|role| {\n            let query = format!(\n                r#\"\n                DO $$\n                    BEGIN\n                        IF NOT EXISTS (\n                            SELECT FROM pg_catalog.pg_roles WHERE rolname = '{}')\n                        THEN\n                            CREATE ROLE {} {};\n                        END IF;\n                    END\n                $$;\"#,\n                role.name,\n                role.name.pg_quote(),\n                role.to_pg_options(),\n            );\n            query\n        })\n        .collect()\n}\n\n/// Databricks-specific environment variables to be passed to the `postgres` sub-process.\npub struct DatabricksEnvVars {\n    /// The Databricks \"endpoint ID\" of the compute instance. Used by `postgres` to check\n    /// the token scopes of internal auth tokens.\n    pub endpoint_id: String,\n    /// Hostname of the Databricks workspace URL this compute instance belongs to.\n    /// Used by postgres to verify Databricks PAT tokens.\n    pub workspace_host: String,\n\n    pub lakebase_mode: bool,\n}\n\nimpl DatabricksEnvVars {\n    pub fn new(\n        compute_spec: &ComputeSpec,\n        compute_id: Option<&String>,\n        instance_id: Option<String>,\n        lakebase_mode: bool,\n    ) -> Self {\n        let endpoint_id = if let Some(instance_id) = instance_id {\n            // Use instance_id as endpoint_id if it is set. This code path is for PuPr model.\n            instance_id\n        } else {\n            // Use compute_id as endpoint_id if instance_id is not set. The code path is for PrPr model.\n            // compute_id is a string format of \"{endpoint_id}/{compute_idx}\"\n            // endpoint_id is a uuid. We only need to pass down endpoint_id to postgres.\n            // Panics if compute_id is not set or not in the expected format.\n            compute_id.unwrap().split('/').next().unwrap().to_string()\n        };\n        let workspace_host = compute_spec\n            .databricks_settings\n            .as_ref()\n            .map(|s| s.databricks_workspace_host.clone())\n            .unwrap_or(\"\".to_string());\n        Self {\n            endpoint_id,\n            workspace_host,\n            lakebase_mode,\n        }\n    }\n\n    /// Constants for the names of Databricks-specific postgres environment variables.\n    const DATABRICKS_ENDPOINT_ID_ENVVAR: &'static str = \"DATABRICKS_ENDPOINT_ID\";\n    const DATABRICKS_WORKSPACE_HOST_ENVVAR: &'static str = \"DATABRICKS_WORKSPACE_HOST\";\n\n    /// Convert DatabricksEnvVars to a list of string pairs that can be passed as env vars. Consumes `self`.\n    pub fn to_env_var_list(self) -> Vec<(String, String)> {\n        if !self.lakebase_mode {\n            // In neon env, we don't need to pass down the env vars to postgres.\n            return vec![];\n        }\n        vec![\n            (\n                Self::DATABRICKS_ENDPOINT_ID_ENVVAR.to_string(),\n                self.endpoint_id.clone(),\n            ),\n            (\n                Self::DATABRICKS_WORKSPACE_HOST_ENVVAR.to_string(),\n                self.workspace_host.clone(),\n            ),\n        ]\n    }\n}\n\nimpl ComputeNode {\n    pub fn new(params: ComputeNodeParams, config: ComputeConfig) -> Result<Self> {\n        let connstr = params.connstr.as_str();\n        let mut conn_conf = postgres::config::Config::from_str(connstr)\n            .context(\"cannot build postgres config from connstr\")?;\n        let mut tokio_conn_conf = tokio_postgres::config::Config::from_str(connstr)\n            .context(\"cannot build tokio postgres config from connstr\")?;\n\n        // Users can set some configuration parameters per database with\n        //   ALTER DATABASE ... SET ...\n        //\n        // There are at least these parameters:\n        //\n        //   - role=some_other_role\n        //   - default_transaction_read_only=on\n        //   - statement_timeout=1, i.e., 1ms, which will cause most of the queries to fail\n        //   - search_path=non_public_schema, this should be actually safe because\n        //     we don't call any functions in user databases, but better to always reset\n        //     it to public.\n        //\n        // that can affect `compute_ctl` and prevent it from properly configuring the database schema.\n        // Unset them via connection string options before connecting to the database.\n        // N.B. keep it in sync with `ZENITH_OPTIONS` in `get_maintenance_client()`.\n        const EXTRA_OPTIONS: &str = \"-c role=cloud_admin -c default_transaction_read_only=off -c search_path='' -c statement_timeout=0 -c pgaudit.log=none\";\n        let options = match conn_conf.get_options() {\n            // Allow the control plane to override any options set by the\n            // compute\n            Some(options) => format!(\"{EXTRA_OPTIONS} {options}\"),\n            None => EXTRA_OPTIONS.to_string(),\n        };\n        conn_conf.options(&options);\n        tokio_conn_conf.options(&options);\n\n        let mut new_state = ComputeState::new();\n        if let Some(spec) = config.spec {\n            let pspec = ParsedSpec::try_from(spec).map_err(|msg| anyhow::anyhow!(msg))?;\n            if params.lakebase_mode {\n                ComputeNode::set_spec(&params, &mut new_state, pspec);\n            } else {\n                new_state.pspec = Some(pspec);\n            }\n        }\n\n        Ok(ComputeNode {\n            params,\n            conn_conf,\n            tokio_conn_conf,\n            state: Mutex::new(new_state),\n            state_changed: Condvar::new(),\n            ext_download_progress: RwLock::new(HashMap::new()),\n            compute_ctl_config: config.compute_ctl_config,\n            extension_stats_task: Mutex::new(None),\n            lfc_offload_task: Mutex::new(None),\n        })\n    }\n\n    /// Top-level control flow of compute_ctl. Returns a process exit code we should\n    /// exit with.\n    pub fn run(self) -> Result<Option<i32>> {\n        let this = Arc::new(self);\n\n        let cli_spec = this.state.lock().unwrap().pspec.clone();\n\n        // If this is a pooled VM, prewarm before starting HTTP server and becoming\n        // available for binding. Prewarming helps Postgres start quicker later,\n        // because QEMU will already have its memory allocated from the host, and\n        // the necessary binaries will already be cached.\n        if cli_spec.is_none() {\n            this.prewarm_postgres_vm_memory()?;\n        }\n\n        // Set the up metric with Empty status before starting the HTTP server.\n        // That way on the first metric scrape, an external observer will see us\n        // as 'up' and 'empty' (unless the compute was started with a spec or\n        // already configured by control plane).\n        COMPUTE_CTL_UP\n            .with_label_values(&[&BUILD_TAG, ComputeStatus::Empty.to_string().as_str()])\n            .set(1);\n\n        // Launch the external HTTP server first, so that we can serve control plane\n        // requests while configuration is still in progress.\n        crate::http::server::Server::External {\n            port: this.params.external_http_port,\n            config: this.compute_ctl_config.clone(),\n            compute_id: this.params.compute_id.clone(),\n            instance_id: this.params.instance_id.clone(),\n        }\n        .launch(&this);\n\n        // The internal HTTP server could be launched later, but there isn't much\n        // sense in waiting.\n        crate::http::server::Server::Internal {\n            port: this.params.internal_http_port,\n        }\n        .launch(&this);\n\n        // If we got a spec from the CLI already, use that. Otherwise wait for the\n        // control plane to pass it to us with a /configure HTTP request\n        let pspec = if let Some(cli_spec) = cli_spec {\n            cli_spec\n        } else {\n            this.wait_spec()?\n        };\n\n        launch_lsn_lease_bg_task_for_static(&this);\n\n        // We have a spec, start the compute\n        let mut delay_exit = false;\n        let mut vm_monitor = None;\n        let mut pg_process: Option<PostgresHandle> = None;\n\n        match this.start_compute(&mut pg_process) {\n            Ok(()) => {\n                // Success! Launch remaining services (just vm-monitor currently)\n                vm_monitor =\n                    Some(this.start_vm_monitor(pspec.spec.disable_lfc_resizing.unwrap_or(false)));\n            }\n            Err(err) => {\n                // Something went wrong with the startup. Log it and expose the error to\n                // HTTP status requests.\n                error!(\"could not start the compute node: {:#}\", err);\n                this.set_failed_status(err);\n                delay_exit = true;\n\n                // If the error happened after starting PostgreSQL, kill it\n                if let Some(ref pg_process) = pg_process {\n                    kill(pg_process.pid(), Signal::SIGQUIT).ok();\n                }\n            }\n        }\n\n        // If startup was successful, or it failed in the late stages,\n        // PostgreSQL is now running. Wait until it exits.\n        let exit_code = if let Some(pg_handle) = pg_process {\n            let exit_status = this.wait_postgres(pg_handle);\n            info!(\"Postgres exited with code {}, shutting down\", exit_status);\n            exit_status.code()\n        } else {\n            None\n        };\n\n        this.terminate_extension_stats_task();\n        this.terminate_lfc_offload_task();\n\n        // Terminate the vm_monitor so it releases the file watcher on\n        // /sys/fs/cgroup/neon-postgres.\n        // Note: the vm-monitor only runs on linux because it requires cgroups.\n        if let Some(vm_monitor) = vm_monitor {\n            cfg_if::cfg_if! {\n                if #[cfg(target_os = \"linux\")] {\n                    // Kills all threads spawned by the monitor\n                    vm_monitor.token.cancel();\n                    if let Some(handle) = vm_monitor.vm_monitor {\n                        // Kills the actual task running the monitor\n                        handle.abort();\n                    }\n                } else {\n                    _ = vm_monitor; // appease unused lint on macOS\n                }\n            }\n        }\n\n        // Reap the postgres process\n        delay_exit |= this.cleanup_after_postgres_exit()?;\n\n        // /terminate returns LSN. If we don't sleep at all, connection will break and we\n        // won't get result. If we sleep too much, tests will take significantly longer\n        // and Github Action run will error out\n        let sleep_duration = if delay_exit {\n            Duration::from_secs(30)\n        } else {\n            Duration::from_millis(300)\n        };\n\n        // If launch failed, keep serving HTTP requests for a while, so the cloud\n        // control plane can get the actual error.\n        if delay_exit {\n            info!(\"giving control plane 30s to collect the error before shutdown\");\n        }\n        std::thread::sleep(sleep_duration);\n        Ok(exit_code)\n    }\n\n    pub fn wait_spec(&self) -> Result<ParsedSpec> {\n        info!(\"no compute spec provided, waiting\");\n        let mut state = self.state.lock().unwrap();\n        while state.status != ComputeStatus::ConfigurationPending {\n            state = self.state_changed.wait(state).unwrap();\n        }\n\n        info!(\"got spec, continue configuration\");\n        let spec = state.pspec.as_ref().unwrap().clone();\n\n        // Record for how long we slept waiting for the spec.\n        let now = Utc::now();\n        state.metrics.wait_for_spec_ms = now\n            .signed_duration_since(state.start_time)\n            .to_std()\n            .unwrap()\n            .as_millis() as u64;\n\n        // Reset start time, so that the total startup time that is calculated later will\n        // not include the time that we waited for the spec.\n        state.start_time = now;\n\n        Ok(spec)\n    }\n\n    /// Start compute.\n    ///\n    /// Prerequisites:\n    /// - the compute spec has been placed in self.state.pspec\n    ///\n    /// On success:\n    /// - status is set to ComputeStatus::Running\n    /// - self.running_postgres is set\n    ///\n    /// On error:\n    /// - status is left in ComputeStatus::Init. The caller is responsible for setting it to Failed\n    /// - if Postgres was started before the fatal error happened, self.running_postgres is\n    ///   set. The caller is responsible for killing it.\n    ///\n    /// Note that this is in the critical path of a compute cold start. Keep this fast.\n    /// Try to do things concurrently, to hide the latencies.\n    fn start_compute(self: &Arc<Self>, pg_handle: &mut Option<PostgresHandle>) -> Result<()> {\n        let compute_state: ComputeState;\n\n        let start_compute_span;\n        let _this_entered;\n        {\n            let mut state_guard = self.state.lock().unwrap();\n\n            // Create a tracing span for the startup operation.\n            //\n            // We could otherwise just annotate the function with #[instrument], but if\n            // we're being configured from a /configure HTTP request, we want the\n            // startup to be considered part of the /configure request.\n            //\n            // Similarly, if a trace ID was passed in env variables, attach it to the span.\n            start_compute_span = {\n                // Temporarily enter the parent span, so that the new span becomes its child.\n                if let Some(p) = state_guard.startup_span.take() {\n                    let _parent_entered = p.entered();\n                    tracing::info_span!(\"start_compute\")\n                } else if let Some(otel_context) = startup_context_from_env() {\n                    use tracing_opentelemetry::OpenTelemetrySpanExt;\n                    let span = tracing::info_span!(\"start_compute\");\n                    span.set_parent(otel_context);\n                    span\n                } else {\n                    tracing::info_span!(\"start_compute\")\n                }\n            };\n            _this_entered = start_compute_span.enter();\n\n            // Hadron: Record postgres start time (used to enforce pg_init_timeout).\n            state_guard.pg_start_time.replace(Utc::now());\n\n            state_guard.set_status(ComputeStatus::Init, &self.state_changed);\n            compute_state = state_guard.clone()\n        }\n\n        let pspec = compute_state.pspec.as_ref().expect(\"spec must be set\");\n        info!(\n            \"starting compute for project {}, operation {}, tenant {}, timeline {}, project {}, branch {}, endpoint {}, features {:?}, spec.remote_extensions {:?}\",\n            pspec.spec.cluster.cluster_id.as_deref().unwrap_or(\"None\"),\n            pspec.spec.operation_uuid.as_deref().unwrap_or(\"None\"),\n            pspec.tenant_id,\n            pspec.timeline_id,\n            pspec.spec.project_id.as_deref().unwrap_or(\"None\"),\n            pspec.spec.branch_id.as_deref().unwrap_or(\"None\"),\n            pspec.spec.endpoint_id.as_deref().unwrap_or(\"None\"),\n            pspec.spec.features,\n            pspec.spec.remote_extensions,\n        );\n\n        ////// PRE-STARTUP PHASE: things that need to be finished before we start the Postgres process\n\n        // Collect all the tasks that must finish here\n        let mut pre_tasks = tokio::task::JoinSet::new();\n\n        // Make sure TLS certificates are properly loaded and in the right place.\n        if self.compute_ctl_config.tls.is_some() {\n            let this = self.clone();\n            pre_tasks.spawn(async move {\n                this.watch_cert_for_changes().await;\n\n                Ok::<(), anyhow::Error>(())\n            });\n        }\n\n        let tls_config = self.tls_config(&pspec.spec);\n\n        // If there are any remote extensions in shared_preload_libraries, start downloading them\n        if pspec.spec.remote_extensions.is_some() {\n            let (this, spec) = (self.clone(), pspec.spec.clone());\n            pre_tasks.spawn(async move {\n                this.download_preload_extensions(&spec)\n                    .in_current_span()\n                    .await\n            });\n        }\n\n        // Prepare pgdata directory. This downloads the basebackup, among other things.\n        {\n            let (this, cs) = (self.clone(), compute_state.clone());\n            pre_tasks.spawn_blocking_child(move || this.prepare_pgdata(&cs));\n        }\n\n        // Resize swap to the desired size if the compute spec says so\n        if let (Some(size_bytes), true) =\n            (pspec.spec.swap_size_bytes, self.params.resize_swap_on_bind)\n        {\n            pre_tasks.spawn_blocking_child(move || {\n                // To avoid 'swapoff' hitting postgres startup, we need to run resize-swap to completion\n                // *before* starting postgres.\n                //\n                // In theory, we could do this asynchronously if SkipSwapon was enabled for VMs, but this\n                // carries a risk of introducing hard-to-debug issues - e.g. if postgres sometimes gets\n                // OOM-killed during startup because swap wasn't available yet.\n                resize_swap(size_bytes).context(\"failed to resize swap\")?;\n                let size_mib = size_bytes as f32 / (1 << 20) as f32; // just for more coherent display.\n                info!(%size_bytes, %size_mib, \"resized swap\");\n\n                Ok::<(), anyhow::Error>(())\n            });\n        }\n\n        // Set disk quota if the compute spec says so\n        if let (Some(disk_quota_bytes), Some(disk_quota_fs_mountpoint)) = (\n            pspec.spec.disk_quota_bytes,\n            self.params.set_disk_quota_for_fs.as_ref(),\n        ) {\n            let disk_quota_fs_mountpoint = disk_quota_fs_mountpoint.clone();\n            pre_tasks.spawn_blocking_child(move || {\n                set_disk_quota(disk_quota_bytes, &disk_quota_fs_mountpoint)\n                    .context(\"failed to set disk quota\")?;\n                let size_mib = disk_quota_bytes as f32 / (1 << 20) as f32; // just for more coherent display.\n                info!(%disk_quota_bytes, %size_mib, \"set disk quota\");\n\n                Ok::<(), anyhow::Error>(())\n            });\n        }\n\n        // tune pgbouncer\n        if let Some(pgbouncer_settings) = &pspec.spec.pgbouncer_settings {\n            info!(\"tuning pgbouncer\");\n\n            let pgbouncer_settings = pgbouncer_settings.clone();\n            let tls_config = tls_config.clone();\n\n            // Spawn a background task to do the tuning,\n            // so that we don't block the main thread that starts Postgres.\n            let _handle = tokio::spawn(async move {\n                let res = tune_pgbouncer(pgbouncer_settings, tls_config).await;\n                if let Err(err) = res {\n                    error!(\"error while tuning pgbouncer: {err:?}\");\n                    // Continue with the startup anyway\n                }\n            });\n        }\n\n        // configure local_proxy\n        if let Some(local_proxy) = &pspec.spec.local_proxy_config {\n            info!(\"configuring local_proxy\");\n\n            // Spawn a background task to do the configuration,\n            // so that we don't block the main thread that starts Postgres.\n\n            let mut local_proxy = local_proxy.clone();\n            local_proxy.tls = tls_config.clone();\n\n            let _handle = tokio::spawn(async move {\n                if let Err(err) = local_proxy::configure(&local_proxy) {\n                    error!(\"error while configuring local_proxy: {err:?}\");\n                    // Continue with the startup anyway\n                }\n            });\n        }\n\n        // Configure and start rsyslog for compliance audit logging\n        match pspec.spec.audit_log_level {\n            ComputeAudit::Hipaa | ComputeAudit::Extended | ComputeAudit::Full => {\n                let remote_tls_endpoint =\n                    std::env::var(\"AUDIT_LOGGING_TLS_ENDPOINT\").unwrap_or(\"\".to_string());\n                let remote_plain_endpoint =\n                    std::env::var(\"AUDIT_LOGGING_ENDPOINT\").unwrap_or(\"\".to_string());\n\n                if remote_plain_endpoint.is_empty() && remote_tls_endpoint.is_empty() {\n                    anyhow::bail!(\n                        \"AUDIT_LOGGING_ENDPOINT and AUDIT_LOGGING_TLS_ENDPOINT are both empty\"\n                    );\n                }\n\n                let log_directory_path = Path::new(&self.params.pgdata).join(\"log\");\n                let log_directory_path = log_directory_path.to_string_lossy().to_string();\n\n                // Add project_id,endpoint_id to identify the logs.\n                //\n                // These ids are passed from cplane,\n                let endpoint_id = pspec.spec.endpoint_id.as_deref().unwrap_or(\"\");\n                let project_id = pspec.spec.project_id.as_deref().unwrap_or(\"\");\n\n                configure_audit_rsyslog(\n                    log_directory_path.clone(),\n                    endpoint_id,\n                    project_id,\n                    &remote_plain_endpoint,\n                    &remote_tls_endpoint,\n                )?;\n\n                // Launch a background task to clean up the audit logs\n                launch_pgaudit_gc(log_directory_path);\n            }\n            _ => {}\n        }\n\n        // Configure and start rsyslog for Postgres logs export\n        let conf = PostgresLogsRsyslogConfig::new(pspec.spec.logs_export_host.as_deref());\n        configure_postgres_logs_export(conf)?;\n\n        // Launch remaining service threads\n        let _monitor_handle = launch_monitor(self);\n        let _configurator_handle = launch_configurator(self);\n\n        // Wait for all the pre-tasks to finish before starting postgres\n        let rt = tokio::runtime::Handle::current();\n        while let Some(res) = rt.block_on(pre_tasks.join_next()) {\n            res??;\n        }\n\n        ////// START POSTGRES\n        let start_time = Utc::now();\n        let pg_process = self.start_postgres(pspec.storage_auth_token.clone())?;\n        let postmaster_pid = pg_process.pid();\n        *pg_handle = Some(pg_process);\n\n        // If this is a primary endpoint, perform some post-startup configuration before\n        // opening it up for the world.\n        let config_time = Utc::now();\n        if pspec.spec.mode == ComputeMode::Primary {\n            self.configure_as_primary(&compute_state)?;\n\n            let conf = self.get_tokio_conn_conf(None);\n            tokio::task::spawn(async {\n                let _ = installed_extensions(conf).await;\n            });\n        }\n\n        // All done!\n        let startup_end_time = Utc::now();\n        let metrics = {\n            let mut state = self.state.lock().unwrap();\n            state.metrics.start_postgres_ms = config_time\n                .signed_duration_since(start_time)\n                .to_std()\n                .unwrap()\n                .as_millis() as u64;\n            state.metrics.config_ms = startup_end_time\n                .signed_duration_since(config_time)\n                .to_std()\n                .unwrap()\n                .as_millis() as u64;\n            state.metrics.total_startup_ms = startup_end_time\n                .signed_duration_since(compute_state.start_time)\n                .to_std()\n                .unwrap()\n                .as_millis() as u64;\n            state.metrics.clone()\n        };\n        self.set_status(ComputeStatus::Running);\n\n        // Log metrics so that we can search for slow operations in logs\n        info!(?metrics, postmaster_pid = %postmaster_pid, \"compute start finished\");\n\n        self.spawn_extension_stats_task();\n\n        if pspec.spec.autoprewarm {\n            info!(\"autoprewarming on startup as requested\");\n            self.prewarm_lfc(None);\n        }\n        if let Some(seconds) = pspec.spec.offload_lfc_interval_seconds {\n            self.spawn_lfc_offload_task(Duration::from_secs(seconds.into()));\n        };\n        Ok(())\n    }\n\n    #[instrument(skip_all)]\n    async fn download_preload_extensions(&self, spec: &ComputeSpec) -> Result<()> {\n        let remote_extensions = if let Some(remote_extensions) = &spec.remote_extensions {\n            remote_extensions\n        } else {\n            return Ok(());\n        };\n\n        // First, create control files for all available extensions\n        extension_server::create_control_files(remote_extensions, &self.params.pgbin);\n\n        let library_load_start_time = Utc::now();\n        let remote_ext_metrics = self.prepare_preload_libraries(spec).await?;\n\n        let library_load_time = Utc::now()\n            .signed_duration_since(library_load_start_time)\n            .to_std()\n            .unwrap()\n            .as_millis() as u64;\n        let mut state = self.state.lock().unwrap();\n        state.metrics.load_ext_ms = library_load_time;\n        state.metrics.num_ext_downloaded = remote_ext_metrics.num_ext_downloaded;\n        state.metrics.largest_ext_size = remote_ext_metrics.largest_ext_size;\n        state.metrics.total_ext_download_size = remote_ext_metrics.total_ext_download_size;\n        info!(\n            \"Loading shared_preload_libraries took {:?}ms\",\n            library_load_time\n        );\n        info!(\"{:?}\", remote_ext_metrics);\n\n        Ok(())\n    }\n\n    /// Start the vm-monitor if directed to. The vm-monitor only runs on linux\n    /// because it requires cgroups.\n    fn start_vm_monitor(&self, disable_lfc_resizing: bool) -> StartVmMonitorResult {\n        cfg_if::cfg_if! {\n            if #[cfg(target_os = \"linux\")] {\n                use std::env;\n                use tokio_util::sync::CancellationToken;\n\n                // This token is used internally by the monitor to clean up all threads\n                let token = CancellationToken::new();\n\n                // don't pass postgres connection string to vm-monitor if we don't want it to resize LFC\n                let pgconnstr = if disable_lfc_resizing {\n                    None\n                } else {\n                    Some(self.params.filecache_connstr.clone())\n                };\n\n                let vm_monitor = if env::var_os(\"AUTOSCALING\").is_some() {\n                    let vm_monitor = tokio::spawn(vm_monitor::start(\n                        Box::leak(Box::new(vm_monitor::Args {\n                            cgroup: Some(self.params.cgroup.clone()),\n                            pgconnstr,\n                            addr: self.params.vm_monitor_addr.clone(),\n                        })),\n                        token.clone(),\n                    ));\n                    Some(vm_monitor)\n                } else {\n                    None\n                };\n                StartVmMonitorResult { token, vm_monitor }\n            } else {\n                _ = disable_lfc_resizing; // appease unused lint on macOS\n                StartVmMonitorResult { }\n            }\n        }\n    }\n\n    fn cleanup_after_postgres_exit(&self) -> Result<bool> {\n        // Maybe sync safekeepers again, to speed up next startup\n        let compute_state = self.state.lock().unwrap().clone();\n        let pspec = compute_state.pspec.as_ref().expect(\"spec must be set\");\n        let lsn = if matches!(pspec.spec.mode, compute_api::spec::ComputeMode::Primary) {\n            info!(\"syncing safekeepers on shutdown\");\n            let storage_auth_token = pspec.storage_auth_token.clone();\n            let lsn = self.sync_safekeepers(storage_auth_token)?;\n            info!(%lsn, \"synced safekeepers\");\n            Some(lsn)\n        } else {\n            info!(\"not primary, not syncing safekeepers\");\n            None\n        };\n\n        let mut state = self.state.lock().unwrap();\n        state.terminate_flush_lsn = lsn;\n\n        let delay_exit = state.status == ComputeStatus::TerminationPendingFast;\n        if state.status == ComputeStatus::TerminationPendingFast\n            || state.status == ComputeStatus::TerminationPendingImmediate\n        {\n            info!(\n                \"Changing compute status from {} to {}\",\n                state.status,\n                ComputeStatus::Terminated\n            );\n            state.status = ComputeStatus::Terminated;\n            self.state_changed.notify_all();\n        }\n        drop(state);\n\n        if let Err(err) = self.check_for_core_dumps() {\n            error!(\"error while checking for core dumps: {err:?}\");\n        }\n\n        Ok(delay_exit)\n    }\n\n    /// Check that compute node has corresponding feature enabled.\n    pub fn has_feature(&self, feature: ComputeFeature) -> bool {\n        let state = self.state.lock().unwrap();\n\n        if let Some(s) = state.pspec.as_ref() {\n            s.spec.features.contains(&feature)\n        } else {\n            false\n        }\n    }\n\n    pub fn set_status(&self, status: ComputeStatus) {\n        let mut state = self.state.lock().unwrap();\n        state.set_status(status, &self.state_changed);\n    }\n\n    pub fn set_failed_status(&self, err: anyhow::Error) {\n        let mut state = self.state.lock().unwrap();\n        state.set_failed_status(err, &self.state_changed);\n    }\n\n    pub fn get_status(&self) -> ComputeStatus {\n        self.state.lock().unwrap().status\n    }\n\n    pub fn get_timeline_id(&self) -> Option<TimelineId> {\n        self.state\n            .lock()\n            .unwrap()\n            .pspec\n            .as_ref()\n            .map(|s| s.timeline_id)\n    }\n\n    // Remove `pgdata` directory and create it again with right permissions.\n    fn create_pgdata(&self) -> Result<()> {\n        // Ignore removal error, likely it is a 'No such file or directory (os error 2)'.\n        // If it is something different then create_dir() will error out anyway.\n        let pgdata = &self.params.pgdata;\n        let _ok = fs::remove_dir_all(pgdata);\n        if self.params.lakebase_mode {\n            // Ignore creation errors if the directory already exists (e.g. mounting it ahead of time).\n            // If it is something different then PG startup will error out anyway.\n            let _ok = fs::create_dir(pgdata);\n        } else {\n            fs::create_dir(pgdata)?;\n        }\n\n        fs::set_permissions(pgdata, fs::Permissions::from_mode(0o700))?;\n\n        Ok(())\n    }\n\n    /// Fetches a basebackup from the Pageserver using the compute state's Pageserver connstring and\n    /// unarchives it to `pgdata` directory, replacing any existing contents.\n    #[instrument(skip_all, fields(%lsn))]\n    fn try_get_basebackup(&self, compute_state: &ComputeState, lsn: Lsn) -> Result<()> {\n        let spec = compute_state.pspec.as_ref().expect(\"spec must be set\");\n\n        let started = Instant::now();\n        let (connected, size) = match spec.pageserver_conninfo.prefer_protocol {\n            PageserverProtocol::Grpc => self.try_get_basebackup_grpc(spec, lsn)?,\n            PageserverProtocol::Libpq => self.try_get_basebackup_libpq(spec, lsn)?,\n        };\n\n        self.fix_zenith_signal_neon_signal()?;\n\n        let mut state = self.state.lock().unwrap();\n        state.metrics.pageserver_connect_micros =\n            connected.duration_since(started).as_micros() as u64;\n        state.metrics.basebackup_bytes = size as u64;\n        state.metrics.basebackup_ms = started.elapsed().as_millis() as u64;\n\n        Ok(())\n    }\n\n    /// Move the Zenith signal file to Neon signal file location.\n    /// This makes Compute compatible with older PageServers that don't yet\n    /// know about the Zenith->Neon rename.\n    fn fix_zenith_signal_neon_signal(&self) -> Result<()> {\n        let datadir = Path::new(&self.params.pgdata);\n\n        let neonsig = datadir.join(\"neon.signal\");\n\n        if neonsig.is_file() {\n            return Ok(());\n        }\n\n        let zenithsig = datadir.join(\"zenith.signal\");\n\n        if zenithsig.is_file() {\n            fs::copy(zenithsig, neonsig)?;\n        }\n\n        Ok(())\n    }\n\n    /// Fetches a basebackup via gRPC. The connstring must use grpc://. Returns the timestamp when\n    /// the connection was established, and the (compressed) size of the basebackup.\n    fn try_get_basebackup_grpc(&self, spec: &ParsedSpec, lsn: Lsn) -> Result<(Instant, usize)> {\n        let shard0_index = ShardIndex {\n            shard_number: ShardNumber(0),\n            shard_count: spec.pageserver_conninfo.shard_count,\n        };\n        let shard0_url = spec\n            .pageserver_conninfo\n            .shard_url(ShardNumber(0), PageserverProtocol::Grpc)?\n            .to_owned();\n        let (reader, connected) = tokio::runtime::Handle::current().block_on(async move {\n            let mut client = page_api::Client::connect(\n                shard0_url,\n                spec.tenant_id,\n                spec.timeline_id,\n                shard0_index,\n                spec.storage_auth_token.clone(),\n                None, // NB: base backups use payload compression\n            )\n            .await?;\n            let connected = Instant::now();\n            let reader = client\n                .get_base_backup(page_api::GetBaseBackupRequest {\n                    lsn: (lsn != Lsn(0)).then_some(lsn),\n                    compression: BaseBackupCompression::Gzip,\n                    replica: spec.spec.mode != ComputeMode::Primary,\n                    full: false,\n                })\n                .await?;\n            anyhow::Ok((reader, connected))\n        })?;\n\n        let mut reader = MeasuredReader::new(tokio_util::io::SyncIoBridge::new(reader));\n\n        // Set `ignore_zeros` so that unpack() reads the entire stream and doesn't just stop at the\n        // end-of-archive marker. If the server errors, the tar::Builder drop handler will write an\n        // end-of-archive marker before the error is emitted, and we would not see the error.\n        let mut ar = tar::Archive::new(flate2::read::GzDecoder::new(&mut reader));\n        ar.set_ignore_zeros(true);\n        ar.unpack(&self.params.pgdata)?;\n\n        Ok((connected, reader.get_byte_count()))\n    }\n\n    /// Fetches a basebackup via libpq. The connstring must use postgresql://. Returns the timestamp\n    /// when the connection was established, and the (compressed) size of the basebackup.\n    fn try_get_basebackup_libpq(&self, spec: &ParsedSpec, lsn: Lsn) -> Result<(Instant, usize)> {\n        let shard0_connstr = spec\n            .pageserver_conninfo\n            .shard_url(ShardNumber(0), PageserverProtocol::Libpq)?;\n        let mut config = postgres::Config::from_str(shard0_connstr)?;\n\n        // Use the storage auth token from the config file, if given.\n        // Note: this overrides any password set in the connection string.\n        if let Some(storage_auth_token) = &spec.storage_auth_token {\n            info!(\"Got storage auth token from spec file\");\n            config.password(storage_auth_token);\n        } else {\n            info!(\"Storage auth token not set\");\n        }\n\n        config.application_name(\"compute_ctl\");\n        config.options(&format!(\n            \"-c neon.compute_mode={}\",\n            spec.spec.mode.to_type_str()\n        ));\n\n        // Connect to pageserver\n        let mut client = config.connect(NoTls)?;\n        let connected = Instant::now();\n\n        let basebackup_cmd = match lsn {\n            Lsn(0) => {\n                if spec.spec.mode != ComputeMode::Primary {\n                    format!(\n                        \"basebackup {} {} --gzip --replica\",\n                        spec.tenant_id, spec.timeline_id\n                    )\n                } else {\n                    format!(\"basebackup {} {} --gzip\", spec.tenant_id, spec.timeline_id)\n                }\n            }\n            _ => {\n                if spec.spec.mode != ComputeMode::Primary {\n                    format!(\n                        \"basebackup {} {} {} --gzip --replica\",\n                        spec.tenant_id, spec.timeline_id, lsn\n                    )\n                } else {\n                    format!(\n                        \"basebackup {} {} {} --gzip\",\n                        spec.tenant_id, spec.timeline_id, lsn\n                    )\n                }\n            }\n        };\n\n        let copyreader = client.copy_out(basebackup_cmd.as_str())?;\n        let mut measured_reader = MeasuredReader::new(copyreader);\n        let mut bufreader = std::io::BufReader::new(&mut measured_reader);\n\n        // Read the archive directly from the `CopyOutReader`\n        //\n        // Set `ignore_zeros` so that unpack() reads all the Copy data and\n        // doesn't stop at the end-of-archive marker. Otherwise, if the server\n        // sends an Error after finishing the tarball, we will not notice it.\n        // The tar::Builder drop handler will write an end-of-archive marker\n        // before emitting the error, and we would not see it otherwise.\n        let mut ar = tar::Archive::new(flate2::read::GzDecoder::new(&mut bufreader));\n        ar.set_ignore_zeros(true);\n        ar.unpack(&self.params.pgdata)?;\n\n        Ok((connected, measured_reader.get_byte_count()))\n    }\n\n    // Gets the basebackup in a retry loop\n    #[instrument(skip_all, fields(%lsn))]\n    pub fn get_basebackup(&self, compute_state: &ComputeState, lsn: Lsn) -> Result<()> {\n        let mut retry_period_ms = 500.0;\n        let mut attempts = 0;\n        const DEFAULT_ATTEMPTS: u16 = 10;\n        #[cfg(feature = \"testing\")]\n        let max_attempts = if let Ok(v) = env::var(\"NEON_COMPUTE_TESTING_BASEBACKUP_RETRIES\") {\n            u16::from_str(&v).unwrap()\n        } else {\n            DEFAULT_ATTEMPTS\n        };\n        #[cfg(not(feature = \"testing\"))]\n        let max_attempts = DEFAULT_ATTEMPTS;\n        loop {\n            let result = self.try_get_basebackup(compute_state, lsn);\n            match result {\n                Ok(_) => {\n                    return result;\n                }\n                Err(ref e) if attempts < max_attempts => {\n                    warn!(\"Failed to get basebackup: {e:?} (attempt {attempts}/{max_attempts})\");\n                    std::thread::sleep(std::time::Duration::from_millis(retry_period_ms as u64));\n                    retry_period_ms *= 1.5;\n                }\n                Err(_) => {\n                    return result;\n                }\n            }\n            attempts += 1;\n        }\n    }\n\n    pub async fn check_safekeepers_synced_async(\n        &self,\n        compute_state: &ComputeState,\n    ) -> Result<Option<Lsn>> {\n        // Construct a connection config for each safekeeper\n        let pspec: ParsedSpec = compute_state\n            .pspec\n            .as_ref()\n            .expect(\"spec must be set\")\n            .clone();\n        let sk_connstrs: Vec<String> = pspec.safekeeper_connstrings.clone();\n        let sk_configs = sk_connstrs.into_iter().map(|connstr| {\n            // Format connstr\n            let id = connstr.clone();\n            let connstr = format!(\"postgresql://no_user@{connstr}\");\n            let options = format!(\n                \"-c timeline_id={} tenant_id={}\",\n                pspec.timeline_id, pspec.tenant_id\n            );\n\n            // Construct client\n            let mut config = tokio_postgres::Config::from_str(&connstr).unwrap();\n            config.options(&options);\n            if let Some(storage_auth_token) = pspec.storage_auth_token.clone() {\n                config.password(storage_auth_token);\n            }\n\n            (id, config)\n        });\n\n        // Create task set to query all safekeepers\n        let mut tasks = FuturesUnordered::new();\n        let quorum = sk_configs.len() / 2 + 1;\n        for (id, config) in sk_configs {\n            let timeout = tokio::time::Duration::from_millis(100);\n            let task = tokio::time::timeout(timeout, ping_safekeeper(id, config));\n            tasks.push(tokio::spawn(task));\n        }\n\n        // Get a quorum of responses or errors\n        let mut responses = Vec::new();\n        let mut join_errors = Vec::new();\n        let mut task_errors = Vec::new();\n        let mut timeout_errors = Vec::new();\n        while let Some(response) = tasks.next().await {\n            match response {\n                Ok(Ok(Ok(r))) => responses.push(r),\n                Ok(Ok(Err(e))) => task_errors.push(e),\n                Ok(Err(e)) => timeout_errors.push(e),\n                Err(e) => join_errors.push(e),\n            };\n            if responses.len() >= quorum {\n                break;\n            }\n            if join_errors.len() + task_errors.len() + timeout_errors.len() >= quorum {\n                break;\n            }\n        }\n\n        // In case of error, log and fail the check, but don't crash.\n        // We're playing it safe because these errors could be transient\n        // and we don't yet retry.\n        if responses.len() < quorum {\n            error!(\n                \"failed sync safekeepers check {:?} {:?} {:?}\",\n                join_errors, task_errors, timeout_errors\n            );\n            return Ok(None);\n        }\n\n        Ok(check_if_synced(responses))\n    }\n\n    // Fast path for sync_safekeepers. If they're already synced we get the lsn\n    // in one roundtrip. If not, we should do a full sync_safekeepers.\n    #[instrument(skip_all)]\n    pub fn check_safekeepers_synced(&self, compute_state: &ComputeState) -> Result<Option<Lsn>> {\n        let start_time = Utc::now();\n\n        let rt = tokio::runtime::Handle::current();\n        let result = rt.block_on(self.check_safekeepers_synced_async(compute_state));\n\n        // Record runtime\n        self.state.lock().unwrap().metrics.sync_sk_check_ms = Utc::now()\n            .signed_duration_since(start_time)\n            .to_std()\n            .unwrap()\n            .as_millis() as u64;\n        result\n    }\n\n    // Run `postgres` in a special mode with `--sync-safekeepers` argument\n    // and return the reported LSN back to the caller.\n    #[instrument(skip_all)]\n    pub fn sync_safekeepers(&self, storage_auth_token: Option<String>) -> Result<Lsn> {\n        let start_time = Utc::now();\n\n        let mut sync_handle = maybe_cgexec(&self.params.pgbin)\n            .args([\"--sync-safekeepers\"])\n            .env(\"PGDATA\", &self.params.pgdata) // we cannot use -D in this mode\n            .envs(if let Some(storage_auth_token) = &storage_auth_token {\n                vec![(\"NEON_AUTH_TOKEN\", storage_auth_token)]\n            } else {\n                vec![]\n            })\n            .stdout(Stdio::piped())\n            .stderr(Stdio::piped())\n            .spawn()\n            .expect(\"postgres --sync-safekeepers failed to start\");\n        SYNC_SAFEKEEPERS_PID.store(sync_handle.id(), Ordering::SeqCst);\n\n        // `postgres --sync-safekeepers` will print all log output to stderr and\n        // final LSN to stdout. So we leave stdout to collect LSN, while stderr logs\n        // will be collected in a child thread.\n        let stderr = sync_handle\n            .stderr\n            .take()\n            .expect(\"stderr should be captured\");\n        let logs_handle = handle_postgres_logs(stderr);\n\n        let sync_output = sync_handle\n            .wait_with_output()\n            .expect(\"postgres --sync-safekeepers failed\");\n        SYNC_SAFEKEEPERS_PID.store(0, Ordering::SeqCst);\n\n        // Process has exited, so we can join the logs thread.\n        let _ = tokio::runtime::Handle::current()\n            .block_on(logs_handle)\n            .map_err(|e| tracing::error!(\"log task panicked: {:?}\", e));\n\n        if !sync_output.status.success() {\n            anyhow::bail!(\n                \"postgres --sync-safekeepers exited with non-zero status: {}. stdout: {}\",\n                sync_output.status,\n                String::from_utf8(sync_output.stdout)\n                    .expect(\"postgres --sync-safekeepers exited, and stdout is not utf-8\"),\n            );\n        }\n\n        self.state.lock().unwrap().metrics.sync_safekeepers_ms = Utc::now()\n            .signed_duration_since(start_time)\n            .to_std()\n            .unwrap()\n            .as_millis() as u64;\n\n        let lsn = Lsn::from_str(String::from_utf8(sync_output.stdout)?.trim())?;\n\n        Ok(lsn)\n    }\n\n    fn sync_safekeepers_with_retries(&self, storage_auth_token: Option<String>) -> Result<Lsn> {\n        let max_retries = 5;\n        let mut attempts = 0;\n        loop {\n            let result = self.sync_safekeepers(storage_auth_token.clone());\n            match &result {\n                Ok(_) => {\n                    if attempts > 0 {\n                        tracing::info!(\"sync_safekeepers succeeded after {attempts} retries\");\n                    }\n                    return result;\n                }\n                Err(e) if attempts < max_retries => {\n                    tracing::info!(\n                        \"sync_safekeepers failed, will retry (attempt {attempts}): {e:#}\"\n                    );\n                }\n                Err(err) => {\n                    tracing::warn!(\n                        \"sync_safekeepers still failed after {attempts} retries, giving up: {err:?}\"\n                    );\n                    return result;\n                }\n            }\n            // sleep and retry\n            let backoff = exponential_backoff_duration(\n                attempts,\n                DEFAULT_BASE_BACKOFF_SECONDS,\n                DEFAULT_MAX_BACKOFF_SECONDS,\n            );\n            std::thread::sleep(backoff);\n            attempts += 1;\n        }\n    }\n\n    /// Do all the preparations like PGDATA directory creation, configuration,\n    /// safekeepers sync, basebackup, etc.\n    #[instrument(skip_all)]\n    pub fn prepare_pgdata(&self, compute_state: &ComputeState) -> Result<()> {\n        let pspec = compute_state.pspec.as_ref().expect(\"spec must be set\");\n        let spec = &pspec.spec;\n        let pgdata_path = Path::new(&self.params.pgdata);\n\n        let tls_config = self.tls_config(&pspec.spec);\n        let databricks_settings = spec.databricks_settings.as_ref();\n        let postgres_port = self.params.connstr.port();\n\n        // Remove/create an empty pgdata directory and put configuration there.\n        self.create_pgdata()?;\n        config::write_postgres_conf(\n            pgdata_path,\n            &self.params,\n            &pspec.spec,\n            postgres_port,\n            self.params.internal_http_port,\n            tls_config,\n            databricks_settings,\n            self.params.lakebase_mode,\n        )?;\n\n        // Syncing safekeepers is only safe with primary nodes: if a primary\n        // is already connected it will be kicked out, so a secondary (standby)\n        // cannot sync safekeepers.\n        let lsn = match spec.mode {\n            ComputeMode::Primary => {\n                info!(\"checking if safekeepers are synced\");\n                let lsn = if let Ok(Some(lsn)) = self.check_safekeepers_synced(compute_state) {\n                    lsn\n                } else {\n                    info!(\"starting safekeepers syncing\");\n                    self.sync_safekeepers_with_retries(pspec.storage_auth_token.clone())\n                        .with_context(|| \"failed to sync safekeepers\")?\n                };\n                info!(\"safekeepers synced at LSN {}\", lsn);\n                lsn\n            }\n            ComputeMode::Static(lsn) => {\n                info!(\"Starting read-only node at static LSN {}\", lsn);\n                lsn\n            }\n            ComputeMode::Replica => {\n                info!(\"Initializing standby from latest Pageserver LSN\");\n                Lsn(0)\n            }\n        };\n\n        self.get_basebackup(compute_state, lsn)\n            .with_context(|| format!(\"failed to get basebackup@{lsn}\"))?;\n\n        if let Some(settings) = databricks_settings {\n            copy_tls_certificates(\n                &settings.pg_compute_tls_settings.key_file,\n                &settings.pg_compute_tls_settings.cert_file,\n                pgdata_path,\n            )?;\n\n            // Update pg_hba.conf received with basebackup including additional databricks settings.\n            update_pg_hba(pgdata_path, Some(&settings.databricks_pg_hba))?;\n            update_pg_ident(pgdata_path, Some(&settings.databricks_pg_ident))?;\n        } else {\n            // Update pg_hba.conf received with basebackup.\n            update_pg_hba(pgdata_path, None)?;\n        }\n\n        if let Some(databricks_settings) = spec.databricks_settings.as_ref() {\n            copy_tls_certificates(\n                &databricks_settings.pg_compute_tls_settings.key_file,\n                &databricks_settings.pg_compute_tls_settings.cert_file,\n                pgdata_path,\n            )?;\n        }\n\n        // Place pg_dynshmem under /dev/shm. This allows us to use\n        // 'dynamic_shared_memory_type = mmap' so that the files are placed in\n        // /dev/shm, similar to how 'dynamic_shared_memory_type = posix' works.\n        //\n        // Why on earth don't we just stick to the 'posix' default, you might\n        // ask.  It turns out that making large allocations with 'posix' doesn't\n        // work very well with autoscaling. The behavior we want is that:\n        //\n        // 1. You can make large DSM allocations, larger than the current RAM\n        //    size of the VM, without errors\n        //\n        // 2. If the allocated memory is really used, the VM is scaled up\n        //    automatically to accommodate that\n        //\n        // We try to make that possible by having swap in the VM. But with the\n        // default 'posix' DSM implementation, we fail step 1, even when there's\n        // plenty of swap available. PostgreSQL uses posix_fallocate() to create\n        // the shmem segment, which is really just a file in /dev/shm in Linux,\n        // but posix_fallocate() on tmpfs returns ENOMEM if the size is larger\n        // than available RAM.\n        //\n        // Using 'dynamic_shared_memory_type = mmap' works around that, because\n        // the Postgres 'mmap' DSM implementation doesn't use\n        // posix_fallocate(). Instead, it uses repeated calls to write(2) to\n        // fill the file with zeros. It's weird that that differs between\n        // 'posix' and 'mmap', but we take advantage of it. When the file is\n        // filled slowly with write(2), the kernel allows it to grow larger, as\n        // long as there's swap available.\n        //\n        // In short, using 'dynamic_shared_memory_type = mmap' allows us one DSM\n        // segment to be larger than currently available RAM. But because we\n        // don't want to store it on a real file, which the kernel would try to\n        // flush to disk, so symlink pg_dynshm to /dev/shm.\n        //\n        // We don't set 'dynamic_shared_memory_type = mmap' here, we let the\n        // control plane control that option. If 'mmap' is not used, this\n        // symlink doesn't affect anything.\n        //\n        // See https://github.com/neondatabase/autoscaling/issues/800\n        std::fs::remove_dir_all(pgdata_path.join(\"pg_dynshmem\"))?;\n        symlink(\"/dev/shm/\", pgdata_path.join(\"pg_dynshmem\"))?;\n\n        match spec.mode {\n            ComputeMode::Primary => {}\n            ComputeMode::Replica | ComputeMode::Static(..) => {\n                add_standby_signal(pgdata_path)?;\n            }\n        }\n\n        Ok(())\n    }\n\n    /// Start and stop a postgres process to warm up the VM for startup.\n    pub fn prewarm_postgres_vm_memory(&self) -> Result<()> {\n        if self.params.lakebase_mode {\n            // We are running in Hadron mode. Disabling this prewarming step for now as it could run\n            // into dblet port conflicts and also doesn't add much value with our current infra.\n            info!(\"Skipping postgres prewarming in Hadron mode\");\n            return Ok(());\n        }\n        info!(\"prewarming VM memory\");\n\n        // Create pgdata\n        let pgdata = &format!(\"{}.warmup\", self.params.pgdata);\n        create_pgdata(pgdata)?;\n\n        // Run initdb to completion\n        info!(\"running initdb\");\n        let initdb_bin = Path::new(&self.params.pgbin)\n            .parent()\n            .unwrap()\n            .join(\"initdb\");\n        Command::new(initdb_bin)\n            .args([\"--pgdata\", pgdata])\n            .output()\n            .expect(\"cannot start initdb process\");\n\n        // Write conf\n        use std::io::Write;\n        let conf_path = Path::new(pgdata).join(\"postgresql.conf\");\n        let mut file = std::fs::File::create(conf_path)?;\n        writeln!(file, \"shared_buffers=65536\")?;\n        writeln!(file, \"port=51055\")?; // Nobody should be connecting\n        writeln!(file, \"shared_preload_libraries = 'neon'\")?;\n\n        // Start postgres\n        info!(\"starting postgres\");\n        let mut pg = maybe_cgexec(&self.params.pgbin)\n            .args([\"-D\", pgdata])\n            .spawn()\n            .expect(\"cannot start postgres process\");\n\n        // Stop it when it's ready\n        info!(\"waiting for postgres\");\n        wait_for_postgres(&mut pg, Path::new(pgdata))?;\n        // SIGQUIT orders postgres to exit immediately. We don't want to SIGKILL\n        // it to avoid orphaned processes prowling around while datadir is\n        // wiped.\n        let pm_pid = Pid::from_raw(pg.id() as i32);\n        kill(pm_pid, Signal::SIGQUIT)?;\n        info!(\"sent SIGQUIT signal\");\n        pg.wait()?;\n        info!(\"done prewarming vm memory\");\n\n        // clean up\n        let _ok = fs::remove_dir_all(pgdata);\n        Ok(())\n    }\n\n    /// Start Postgres as a child process and wait for it to start accepting\n    /// connections.\n    ///\n    /// Returns a handle to the child process and a handle to the logs thread.\n    #[instrument(skip_all)]\n    pub fn start_postgres(&self, storage_auth_token: Option<String>) -> Result<PostgresHandle> {\n        let pgdata_path = Path::new(&self.params.pgdata);\n\n        let env_vars: Vec<(String, String)> = if self.params.lakebase_mode {\n            let databricks_env_vars = {\n                let state = self.state.lock().unwrap();\n                let spec = &state.pspec.as_ref().unwrap().spec;\n                DatabricksEnvVars::new(\n                    spec,\n                    Some(&self.params.compute_id),\n                    self.params.instance_id.clone(),\n                    self.params.lakebase_mode,\n                )\n            };\n\n            info!(\n                \"Starting Postgres for databricks endpoint id: {}\",\n                &databricks_env_vars.endpoint_id\n            );\n\n            let mut env_vars = databricks_env_vars.to_env_var_list();\n            env_vars.extend(storage_auth_token.map(|t| (\"NEON_AUTH_TOKEN\".to_string(), t)));\n            env_vars\n        } else if let Some(storage_auth_token) = &storage_auth_token {\n            vec![(\"NEON_AUTH_TOKEN\".to_owned(), storage_auth_token.to_owned())]\n        } else {\n            vec![]\n        };\n\n        // Run postgres as a child process.\n        let mut pg = maybe_cgexec(&self.params.pgbin)\n            .args([\"-D\", &self.params.pgdata])\n            .envs(env_vars)\n            .stderr(Stdio::piped())\n            .spawn()\n            .expect(\"cannot start postgres process\");\n        PG_PID.store(pg.id(), Ordering::SeqCst);\n\n        // Start a task to collect logs from stderr.\n        let stderr = pg.stderr.take().expect(\"stderr should be captured\");\n        let logs_handle = handle_postgres_logs(stderr);\n\n        wait_for_postgres(&mut pg, pgdata_path)?;\n\n        Ok(PostgresHandle {\n            postgres: pg,\n            log_collector: logs_handle,\n        })\n    }\n\n    /// Wait for the child Postgres process forever. In this state Ctrl+C will\n    /// propagate to Postgres and it will be shut down as well.\n    fn wait_postgres(&self, mut pg_handle: PostgresHandle) -> std::process::ExitStatus {\n        info!(postmaster_pid = %pg_handle.postgres.id(), \"Waiting for Postgres to exit\");\n\n        let ecode = pg_handle\n            .postgres\n            .wait()\n            .expect(\"failed to start waiting on Postgres process\");\n        PG_PID.store(0, Ordering::SeqCst);\n\n        // Process has exited. Wait for the log collecting task to finish.\n        let _ = tokio::runtime::Handle::current()\n            .block_on(pg_handle.log_collector)\n            .map_err(|e| tracing::error!(\"log task panicked: {:?}\", e));\n\n        ecode\n    }\n\n    /// Do post configuration of the already started Postgres. This function spawns a background task to\n    /// configure the database after applying the compute spec. Currently, it upgrades the neon extension\n    /// version. In the future, it may upgrade all 3rd-party extensions.\n    #[instrument(skip_all)]\n    pub fn post_apply_config(&self) -> Result<()> {\n        let conf = self.get_tokio_conn_conf(Some(\"compute_ctl:post_apply_config\"));\n        tokio::spawn(async move {\n            let res = async {\n                let (mut client, connection) = conf.connect(NoTls).await?;\n                tokio::spawn(async move {\n                    if let Err(e) = connection.await {\n                        eprintln!(\"connection error: {e}\");\n                    }\n                });\n\n                handle_neon_extension_upgrade(&mut client)\n                    .await\n                    .context(\"handle_neon_extension_upgrade\")?;\n                Ok::<_, anyhow::Error>(())\n            }\n            .await;\n            if let Err(err) = res {\n                error!(\"error while post_apply_config: {err:#}\");\n            }\n        });\n        Ok(())\n    }\n\n    pub fn get_conn_conf(&self, application_name: Option<&str>) -> postgres::Config {\n        let mut conf = self.conn_conf.clone();\n        if let Some(application_name) = application_name {\n            conf.application_name(application_name);\n        }\n        conf\n    }\n\n    pub fn get_tokio_conn_conf(&self, application_name: Option<&str>) -> tokio_postgres::Config {\n        let mut conf = self.tokio_conn_conf.clone();\n        if let Some(application_name) = application_name {\n            conf.application_name(application_name);\n        }\n        conf\n    }\n\n    pub async fn get_maintenance_client(\n        conf: &tokio_postgres::Config,\n    ) -> Result<tokio_postgres::Client> {\n        let mut conf = conf.clone();\n        conf.application_name(\"compute_ctl:apply_config\");\n\n        let (client, conn) = match conf.connect(NoTls).await {\n            // If connection fails, it may be the old node with `zenith_admin` superuser.\n            //\n            // In this case we need to connect with old `zenith_admin` name\n            // and create new user. We cannot simply rename connected user,\n            // but we can create a new one and grant it all privileges.\n            Err(e) => match e.code() {\n                Some(&SqlState::INVALID_PASSWORD)\n                | Some(&SqlState::INVALID_AUTHORIZATION_SPECIFICATION) => {\n                    // Connect with `zenith_admin` if `cloud_admin` could not authenticate\n                    info!(\n                        \"cannot connect to Postgres: {}, retrying with 'zenith_admin' username\",\n                        e\n                    );\n                    let mut zenith_admin_conf = postgres::config::Config::from(conf.clone());\n                    zenith_admin_conf.application_name(\"compute_ctl:apply_config\");\n                    zenith_admin_conf.user(\"zenith_admin\");\n\n                    // It doesn't matter what were the options before, here we just want\n                    // to connect and create a new superuser role.\n                    const ZENITH_OPTIONS: &str = \"-c role=zenith_admin -c default_transaction_read_only=off -c search_path='' -c statement_timeout=0\";\n                    zenith_admin_conf.options(ZENITH_OPTIONS);\n\n                    let mut client =\n                        zenith_admin_conf.connect(NoTls)\n                            .context(\"broken cloud_admin credential: tried connecting with cloud_admin but could not authenticate, and zenith_admin does not work either\")?;\n\n                    // Disable forwarding so that users don't get a cloud_admin role\n                    let mut func = || {\n                        client.simple_query(\"SET neon.forward_ddl = false\")?;\n                        client.simple_query(\"CREATE USER cloud_admin WITH SUPERUSER\")?;\n                        client.simple_query(\"GRANT zenith_admin TO cloud_admin\")?;\n                        Ok::<_, anyhow::Error>(())\n                    };\n                    func().context(\"apply_config setup cloud_admin\")?;\n\n                    drop(client);\n\n                    // Reconnect with connstring with expected name\n                    conf.connect(NoTls).await?\n                }\n                _ => return Err(e.into()),\n            },\n            Ok((client, conn)) => (client, conn),\n        };\n\n        spawn(async move {\n            if let Err(e) = conn.await {\n                error!(\"maintenance client connection error: {}\", e);\n            }\n        });\n\n        // Disable DDL forwarding because control plane already knows about the roles/databases\n        // we're about to modify.\n        client\n            .simple_query(\"SET neon.forward_ddl = false\")\n            .await\n            .context(\"apply_config SET neon.forward_ddl = false\")?;\n\n        Ok(client)\n    }\n\n    /// Do initial configuration of the already started Postgres.\n    #[instrument(skip_all)]\n    pub fn apply_config(&self, compute_state: &ComputeState) -> Result<()> {\n        let mut conf = self.get_tokio_conn_conf(Some(\"compute_ctl:apply_config\"));\n\n        if self.params.lakebase_mode {\n            // Set a 2-minute statement_timeout for the session applying config. The individual SQL statements\n            // used in apply_spec_sql() should not take long (they are just creating users and installing\n            // extensions). If any of them are stuck for an extended period of time it usually indicates a\n            // pageserver connectivity problem and we should bail out.\n            conf.options(\"-c statement_timeout=2min\");\n        }\n\n        let conf = Arc::new(conf);\n        let spec = Arc::new(\n            compute_state\n                .pspec\n                .as_ref()\n                .expect(\"spec must be set\")\n                .spec\n                .clone(),\n        );\n\n        let mut tls_config = None::<TlsConfig>;\n        if spec.features.contains(&ComputeFeature::TlsExperimental) {\n            tls_config = self.compute_ctl_config.tls.clone();\n        }\n\n        self.update_installed_extensions_collection_interval(&spec);\n\n        let max_concurrent_connections = self.max_service_connections(compute_state, &spec);\n\n        // Merge-apply spec & changes to PostgreSQL state.\n        self.apply_spec_sql(spec.clone(), conf.clone(), max_concurrent_connections)?;\n\n        if let Some(local_proxy) = &spec.clone().local_proxy_config {\n            let mut local_proxy = local_proxy.clone();\n            local_proxy.tls = tls_config.clone();\n\n            info!(\"configuring local_proxy\");\n            local_proxy::configure(&local_proxy).context(\"apply_config local_proxy\")?;\n        }\n\n        // Run migrations separately to not hold up cold starts\n        let lakebase_mode = self.params.lakebase_mode;\n        let params = self.params.clone();\n        tokio::spawn(async move {\n            let mut conf = conf.as_ref().clone();\n            conf.application_name(\"compute_ctl:migrations\");\n\n            match conf.connect(NoTls).await {\n                Ok((mut client, connection)) => {\n                    tokio::spawn(async move {\n                        if let Err(e) = connection.await {\n                            eprintln!(\"connection error: {e}\");\n                        }\n                    });\n                    if let Err(e) = handle_migrations(params, &mut client, lakebase_mode).await {\n                        error!(\"Failed to run migrations: {}\", e);\n                    }\n                }\n                Err(e) => {\n                    error!(\n                        \"Failed to connect to the compute for running migrations: {}\",\n                        e\n                    );\n                }\n            };\n        });\n\n        Ok::<(), anyhow::Error>(())\n    }\n\n    // Signal to the configurator to refresh the configuration by pulling a new spec from the HCC.\n    // Note that this merely triggers a notification on a condition variable the configurator thread\n    // waits on. The configurator thread (in configurator.rs) pulls the new spec from the HCC and\n    // applies it.\n    pub async fn signal_refresh_configuration(&self) -> Result<()> {\n        let states_allowing_configuration_refresh = [\n            ComputeStatus::Running,\n            ComputeStatus::Failed,\n            ComputeStatus::RefreshConfigurationPending,\n        ];\n\n        let mut state = self.state.lock().expect(\"state lock poisoned\");\n        if states_allowing_configuration_refresh.contains(&state.status) {\n            state.status = ComputeStatus::RefreshConfigurationPending;\n            self.state_changed.notify_all();\n            Ok(())\n        } else if state.status == ComputeStatus::Init {\n            // If the compute is in Init state, we can't refresh the configuration immediately,\n            // but we should be able to do that soon.\n            Ok(())\n        } else {\n            Err(anyhow::anyhow!(\n                \"Cannot refresh compute configuration in state {:?}\",\n                state.status\n            ))\n        }\n    }\n\n    // Wrapped this around `pg_ctl reload`, but right now we don't use\n    // `pg_ctl` for start / stop.\n    #[instrument(skip_all)]\n    fn pg_reload_conf(&self) -> Result<()> {\n        let pgctl_bin = Path::new(&self.params.pgbin)\n            .parent()\n            .unwrap()\n            .join(\"pg_ctl\");\n        Command::new(pgctl_bin)\n            .args([\"reload\", \"-D\", &self.params.pgdata])\n            .output()\n            .expect(\"cannot run pg_ctl process\");\n        Ok(())\n    }\n\n    /// Similar to `apply_config()`, but does a bit different sequence of operations,\n    /// as it's used to reconfigure a previously started and configured Postgres node.\n    #[instrument(skip_all)]\n    pub fn reconfigure(&self) -> Result<()> {\n        let spec = self.state.lock().unwrap().pspec.clone().unwrap().spec;\n\n        let tls_config = self.tls_config(&spec);\n\n        self.update_installed_extensions_collection_interval(&spec);\n\n        if let Some(ref pgbouncer_settings) = spec.pgbouncer_settings {\n            info!(\"tuning pgbouncer\");\n\n            let pgbouncer_settings = pgbouncer_settings.clone();\n            let tls_config = tls_config.clone();\n\n            // Spawn a background task to do the tuning,\n            // so that we don't block the main thread that starts Postgres.\n            tokio::spawn(async move {\n                let res = tune_pgbouncer(pgbouncer_settings, tls_config).await;\n                if let Err(err) = res {\n                    error!(\"error while tuning pgbouncer: {err:?}\");\n                }\n            });\n        }\n\n        if let Some(ref local_proxy) = spec.local_proxy_config {\n            info!(\"configuring local_proxy\");\n\n            // Spawn a background task to do the configuration,\n            // so that we don't block the main thread that starts Postgres.\n            let mut local_proxy = local_proxy.clone();\n            local_proxy.tls = tls_config.clone();\n            tokio::spawn(async move {\n                if let Err(err) = local_proxy::configure(&local_proxy) {\n                    error!(\"error while configuring local_proxy: {err:?}\");\n                }\n            });\n        }\n\n        // Reconfigure rsyslog for Postgres logs export\n        let conf = PostgresLogsRsyslogConfig::new(spec.logs_export_host.as_deref());\n        configure_postgres_logs_export(conf)?;\n\n        // Write new config\n        let pgdata_path = Path::new(&self.params.pgdata);\n        let postgres_port = self.params.connstr.port();\n        config::write_postgres_conf(\n            pgdata_path,\n            &self.params,\n            &spec,\n            postgres_port,\n            self.params.internal_http_port,\n            tls_config,\n            spec.databricks_settings.as_ref(),\n            self.params.lakebase_mode,\n        )?;\n\n        self.pg_reload_conf()?;\n\n        if !spec.skip_pg_catalog_updates {\n            let max_concurrent_connections = spec.reconfigure_concurrency;\n            // Temporarily reset max_cluster_size in config\n            // to avoid the possibility of hitting the limit, while we are reconfiguring:\n            // creating new extensions, roles, etc.\n            config::with_compute_ctl_tmp_override(pgdata_path, \"neon.max_cluster_size=-1\", || {\n                self.pg_reload_conf()?;\n\n                if spec.mode == ComputeMode::Primary {\n                    let conf = self.get_tokio_conn_conf(Some(\"compute_ctl:reconfigure\"));\n                    let conf = Arc::new(conf);\n\n                    let spec = Arc::new(spec.clone());\n\n                    self.apply_spec_sql(spec, conf, max_concurrent_connections)?;\n                }\n\n                Ok(())\n            })?;\n            self.pg_reload_conf()?;\n        }\n\n        let unknown_op = \"unknown\".to_string();\n        let op_id = spec.operation_uuid.as_ref().unwrap_or(&unknown_op);\n        info!(\n            \"finished reconfiguration of compute node for operation {}\",\n            op_id\n        );\n\n        Ok(())\n    }\n\n    #[instrument(skip_all)]\n    pub fn configure_as_primary(&self, compute_state: &ComputeState) -> Result<()> {\n        let pspec = compute_state.pspec.as_ref().expect(\"spec must be set\");\n\n        assert!(pspec.spec.mode == ComputeMode::Primary);\n        if !pspec.spec.skip_pg_catalog_updates {\n            let pgdata_path = Path::new(&self.params.pgdata);\n            // temporarily reset max_cluster_size in config\n            // to avoid the possibility of hitting the limit, while we are applying config:\n            // creating new extensions, roles, etc...\n            config::with_compute_ctl_tmp_override(pgdata_path, \"neon.max_cluster_size=-1\", || {\n                self.pg_reload_conf()?;\n\n                self.apply_config(compute_state)?;\n\n                Ok(())\n            })?;\n\n            let postgresql_conf_path = pgdata_path.join(\"postgresql.conf\");\n            if config::line_in_file(\n                &postgresql_conf_path,\n                \"neon.disable_logical_replication_subscribers=false\",\n            )? {\n                info!(\n                    \"updated postgresql.conf to set neon.disable_logical_replication_subscribers=false\"\n                );\n            }\n            self.pg_reload_conf()?;\n        }\n        self.post_apply_config()?;\n\n        Ok(())\n    }\n\n    pub async fn watch_cert_for_changes(self: Arc<Self>) {\n        // update status on cert renewal\n        if let Some(tls_config) = &self.compute_ctl_config.tls {\n            let tls_config = tls_config.clone();\n\n            // wait until the cert exists.\n            let mut cert_watch = watch_cert_for_changes(tls_config.cert_path.clone()).await;\n\n            tokio::task::spawn_blocking(move || {\n                let handle = tokio::runtime::Handle::current();\n                'cert_update: loop {\n                    // let postgres/pgbouncer/local_proxy know the new cert/key exists.\n                    // we need to wait until it's configurable first.\n\n                    let mut state = self.state.lock().unwrap();\n                    'status_update: loop {\n                        match state.status {\n                            // let's update the state to config pending\n                            ComputeStatus::ConfigurationPending | ComputeStatus::Running => {\n                                state.set_status(\n                                    ComputeStatus::ConfigurationPending,\n                                    &self.state_changed,\n                                );\n                                break 'status_update;\n                            }\n\n                            // exit loop\n                            ComputeStatus::Failed\n                            | ComputeStatus::TerminationPendingFast\n                            | ComputeStatus::TerminationPendingImmediate\n                            | ComputeStatus::Terminated => break 'cert_update,\n\n                            // wait\n                            ComputeStatus::Init\n                            | ComputeStatus::Configuration\n                            | ComputeStatus::RefreshConfiguration\n                            | ComputeStatus::RefreshConfigurationPending\n                            | ComputeStatus::Empty => {\n                                state = self.state_changed.wait(state).unwrap();\n                            }\n                        }\n                    }\n                    drop(state);\n\n                    // wait for a new certificate update\n                    if handle.block_on(cert_watch.changed()).is_err() {\n                        break;\n                    }\n                }\n            });\n        }\n    }\n\n    pub fn tls_config(&self, spec: &ComputeSpec) -> &Option<TlsConfig> {\n        if spec.features.contains(&ComputeFeature::TlsExperimental) {\n            &self.compute_ctl_config.tls\n        } else {\n            &None::<TlsConfig>\n        }\n    }\n\n    /// Update the `last_active` in the shared state, but ensure that it's a more recent one.\n    pub fn update_last_active(&self, last_active: Option<DateTime<Utc>>) {\n        let mut state = self.state.lock().unwrap();\n        // NB: `Some(<DateTime>)` is always greater than `None`.\n        if last_active > state.last_active {\n            state.last_active = last_active;\n            debug!(\"set the last compute activity time to: {:?}\", last_active);\n        }\n    }\n\n    // Look for core dumps and collect backtraces.\n    //\n    // EKS worker nodes have following core dump settings:\n    //   /proc/sys/kernel/core_pattern -> core\n    //   /proc/sys/kernel/core_uses_pid -> 1\n    //   ulimit -c -> unlimited\n    // which results in core dumps being written to postgres data directory as core.<pid>.\n    //\n    // Use that as a default location and pattern, except macos where core dumps are written\n    // to /cores/ directory by default.\n    //\n    // With default Linux settings, the core dump file is called just \"core\", so check for\n    // that too.\n    pub fn check_for_core_dumps(&self) -> Result<()> {\n        let core_dump_dir = match std::env::consts::OS {\n            \"macos\" => Path::new(\"/cores/\"),\n            // BEGIN HADRON\n            // NB: Read core dump files from a fixed location outside of\n            // the data directory since `compute_ctl` wipes the data directory\n            // across container restarts.\n            _ => {\n                if self.params.lakebase_mode {\n                    Path::new(\"/databricks/logs/brickstore\")\n                } else {\n                    Path::new(&self.params.pgdata)\n                }\n            } // END HADRON\n        };\n\n        // Collect core dump paths if any\n        info!(\"checking for core dumps in {}\", core_dump_dir.display());\n        let files = fs::read_dir(core_dump_dir)?;\n        let cores = files.filter_map(|entry| {\n            let entry = entry.ok()?;\n\n            let is_core_dump = match entry.file_name().to_str()? {\n                n if n.starts_with(\"core.\") => true,\n                \"core\" => true,\n                _ => false,\n            };\n            if is_core_dump {\n                Some(entry.path())\n            } else {\n                None\n            }\n        });\n\n        // Print backtrace for each core dump\n        for core_path in cores {\n            warn!(\n                \"core dump found: {}, collecting backtrace\",\n                core_path.display()\n            );\n\n            // Try first with gdb\n            let backtrace = Command::new(\"gdb\")\n                .args([\"--batch\", \"-q\", \"-ex\", \"bt\", &self.params.pgbin])\n                .arg(&core_path)\n                .output();\n\n            // Try lldb if no gdb is found -- that is handy for local testing on macOS\n            let backtrace = match backtrace {\n                Err(ref e) if e.kind() == std::io::ErrorKind::NotFound => {\n                    warn!(\"cannot find gdb, trying lldb\");\n                    Command::new(\"lldb\")\n                        .arg(\"-c\")\n                        .arg(&core_path)\n                        .args([\"--batch\", \"-o\", \"bt all\", \"-o\", \"quit\"])\n                        .output()\n                }\n                _ => backtrace,\n            }?;\n\n            warn!(\n                \"core dump backtrace: {}\",\n                String::from_utf8_lossy(&backtrace.stdout)\n            );\n            warn!(\n                \"debugger stderr: {}\",\n                String::from_utf8_lossy(&backtrace.stderr)\n            );\n        }\n\n        Ok(())\n    }\n\n    /// Select `pg_stat_statements` data and return it as a stringified JSON\n    pub async fn collect_insights(&self) -> String {\n        let mut result_rows: Vec<String> = Vec::new();\n        let conf = self.get_tokio_conn_conf(Some(\"compute_ctl:collect_insights\"));\n        let connect_result = conf.connect(NoTls).await;\n        let (client, connection) = connect_result.unwrap();\n        tokio::spawn(async move {\n            if let Err(e) = connection.await {\n                eprintln!(\"connection error: {e}\");\n            }\n        });\n        let result = client\n            .simple_query(\n                \"SELECT\n    pg_catalog.row_to_json(pss)\nFROM\n    public.pg_stat_statements pss\nWHERE\n    pss.userid != 'cloud_admin'::pg_catalog.regrole::pg_catalog.oid\nORDER BY\n    (pss.mean_exec_time + pss.mean_plan_time) DESC\nLIMIT 100\",\n            )\n            .await;\n\n        if let Ok(raw_rows) = result {\n            for message in raw_rows.iter() {\n                if let postgres::SimpleQueryMessage::Row(row) = message {\n                    if let Some(json) = row.get(0) {\n                        result_rows.push(json.to_string());\n                    }\n                }\n            }\n\n            format!(\"{{\\\"pg_stat_statements\\\": [{}]}}\", result_rows.join(\",\"))\n        } else {\n            \"{{\\\"pg_stat_statements\\\": []}}\".to_string()\n        }\n    }\n\n    // download an archive, unzip and place files in correct locations\n    pub async fn download_extension(\n        &self,\n        real_ext_name: String,\n        ext_path: RemotePath,\n    ) -> Result<u64, DownloadError> {\n        let remote_ext_base_url =\n            self.params\n                .remote_ext_base_url\n                .as_ref()\n                .ok_or(DownloadError::BadInput(anyhow::anyhow!(\n                    \"Remote extensions storage is not configured\",\n                )))?;\n\n        let ext_archive_name = ext_path.object_name().expect(\"bad path\");\n\n        let mut first_try = false;\n        if !self\n            .ext_download_progress\n            .read()\n            .expect(\"lock err\")\n            .contains_key(ext_archive_name)\n        {\n            self.ext_download_progress\n                .write()\n                .expect(\"lock err\")\n                .insert(ext_archive_name.to_string(), (Utc::now(), false));\n            first_try = true;\n        }\n        let (download_start, download_completed) =\n            self.ext_download_progress.read().expect(\"lock err\")[ext_archive_name];\n        let start_time_delta = Utc::now()\n            .signed_duration_since(download_start)\n            .to_std()\n            .unwrap()\n            .as_millis() as u64;\n\n        // how long to wait for extension download if it was started by another process\n        const HANG_TIMEOUT: u64 = 3000; // milliseconds\n\n        if download_completed {\n            info!(\"extension already downloaded, skipping re-download\");\n            return Ok(0);\n        } else if start_time_delta < HANG_TIMEOUT && !first_try {\n            info!(\n                \"download {ext_archive_name} already started by another process, hanging untill completion or timeout\"\n            );\n            let mut interval = tokio::time::interval(tokio::time::Duration::from_millis(500));\n            loop {\n                info!(\"waiting for download\");\n                interval.tick().await;\n                let (_, download_completed_now) =\n                    self.ext_download_progress.read().expect(\"lock\")[ext_archive_name];\n                if download_completed_now {\n                    info!(\"download finished by whoever else downloaded it\");\n                    return Ok(0);\n                }\n            }\n            // NOTE: the above loop will get terminated\n            // based on the timeout of the download function\n        }\n\n        // if extension hasn't been downloaded before or the previous\n        // attempt to download was at least HANG_TIMEOUT ms ago\n        // then we try to download it here\n        info!(\"downloading new extension {ext_archive_name}\");\n\n        let download_size = extension_server::download_extension(\n            &real_ext_name,\n            &ext_path,\n            remote_ext_base_url,\n            &self.params.pgbin,\n        )\n        .await\n        .map_err(DownloadError::Other);\n\n        if download_size.is_ok() {\n            self.ext_download_progress\n                .write()\n                .expect(\"bad lock\")\n                .insert(ext_archive_name.to_string(), (download_start, true));\n        }\n\n        download_size\n    }\n\n    pub async fn set_role_grants(\n        &self,\n        db_name: &PgIdent,\n        schema_name: &PgIdent,\n        privileges: &[Privilege],\n        role_name: &PgIdent,\n    ) -> Result<()> {\n        use tokio_postgres::NoTls;\n\n        let mut conf = self.get_tokio_conn_conf(Some(\"compute_ctl:set_role_grants\"));\n        conf.dbname(db_name);\n\n        let (db_client, conn) = conf\n            .connect(NoTls)\n            .await\n            .context(\"Failed to connect to the database\")?;\n        tokio::spawn(conn);\n\n        // TODO: support other types of grants apart from schemas?\n\n        // check the role grants first - to gracefully handle read-replicas.\n        let select = \"SELECT privilege_type\n            FROM pg_catalog.pg_namespace\n                JOIN LATERAL (SELECT * FROM aclexplode(nspacl) AS x) AS acl ON true\n                JOIN pg_catalog.pg_user users ON acl.grantee = users.usesysid\n            WHERE users.usename OPERATOR(pg_catalog.=) $1::pg_catalog.name\n                AND nspname OPERATOR(pg_catalog.=) $2::pg_catalog.name\";\n        let rows = db_client\n            .query(select, &[role_name, schema_name])\n            .await\n            .with_context(|| format!(\"Failed to execute query: {select}\"))?;\n\n        let already_granted: HashSet<String> = rows.into_iter().map(|row| row.get(0)).collect();\n\n        let grants = privileges\n            .iter()\n            .filter(|p| !already_granted.contains(p.as_str()))\n            // should not be quoted as it's part of the command.\n            // is already sanitized so it's ok\n            .map(|p| p.as_str())\n            .join(\", \");\n\n        if !grants.is_empty() {\n            // quote the schema and role name as identifiers to sanitize them.\n            let schema_name = schema_name.pg_quote();\n            let role_name = role_name.pg_quote();\n\n            let query = format!(\"GRANT {grants} ON SCHEMA {schema_name} TO {role_name}\",);\n            db_client\n                .simple_query(&query)\n                .await\n                .with_context(|| format!(\"Failed to execute query: {query}\"))?;\n        }\n\n        Ok(())\n    }\n\n    pub async fn install_extension(\n        &self,\n        ext_name: &PgIdent,\n        db_name: &PgIdent,\n        ext_version: ExtVersion,\n    ) -> Result<ExtVersion> {\n        use tokio_postgres::NoTls;\n\n        let mut conf = self.get_tokio_conn_conf(Some(\"compute_ctl:install_extension\"));\n        conf.dbname(db_name);\n\n        let (db_client, conn) = conf\n            .connect(NoTls)\n            .await\n            .context(\"Failed to connect to the database\")?;\n        tokio::spawn(conn);\n\n        let version_query = \"SELECT extversion FROM pg_extension WHERE extname = $1\";\n        let version: Option<ExtVersion> = db_client\n            .query_opt(version_query, &[&ext_name])\n            .await\n            .with_context(|| format!(\"Failed to execute query: {version_query}\"))?\n            .map(|row| row.get(0));\n\n        // sanitize the inputs as postgres idents.\n        let ext_name: String = ext_name.pg_quote();\n        let quoted_version: String = ext_version.pg_quote();\n\n        if let Some(installed_version) = version {\n            if installed_version == ext_version {\n                return Ok(installed_version);\n            }\n            let query = format!(\"ALTER EXTENSION {ext_name} UPDATE TO {quoted_version}\");\n            db_client\n                .simple_query(&query)\n                .await\n                .with_context(|| format!(\"Failed to execute query: {query}\"))?;\n        } else {\n            let query = format!(\n                \"CREATE EXTENSION IF NOT EXISTS {ext_name} WITH SCHEMA public VERSION {quoted_version}\"\n            );\n            db_client\n                .simple_query(&query)\n                .await\n                .with_context(|| format!(\"Failed to execute query: {query}\"))?;\n        }\n\n        Ok(ext_version)\n    }\n\n    pub async fn prepare_preload_libraries(\n        &self,\n        spec: &ComputeSpec,\n    ) -> Result<RemoteExtensionMetrics> {\n        if self.params.remote_ext_base_url.is_none() {\n            return Ok(RemoteExtensionMetrics {\n                num_ext_downloaded: 0,\n                largest_ext_size: 0,\n                total_ext_download_size: 0,\n            });\n        }\n        let remote_extensions = spec\n            .remote_extensions\n            .as_ref()\n            .ok_or(anyhow::anyhow!(\"Remote extensions are not configured\"))?;\n\n        info!(\"parse shared_preload_libraries from spec.cluster.settings\");\n        let mut libs_vec = Vec::new();\n        if let Some(libs) = spec.cluster.settings.find(\"shared_preload_libraries\") {\n            libs_vec = libs\n                .split(&[',', '\\'', ' '])\n                .filter(|s| *s != \"neon\" && *s != \"databricks_auth\" && !s.is_empty())\n                .map(str::to_string)\n                .collect();\n        }\n        info!(\"parse shared_preload_libraries from provided postgresql.conf\");\n\n        // that is used in neon_local and python tests\n        if let Some(conf) = &spec.cluster.postgresql_conf {\n            let conf_lines = conf.split('\\n').collect::<Vec<&str>>();\n            let mut shared_preload_libraries_line = \"\";\n            for line in conf_lines {\n                if line.starts_with(\"shared_preload_libraries\") {\n                    shared_preload_libraries_line = line;\n                }\n            }\n            let mut preload_libs_vec = Vec::new();\n            if let Some(libs) = shared_preload_libraries_line.split(\"='\").nth(1) {\n                preload_libs_vec = libs\n                    .split(&[',', '\\'', ' '])\n                    .filter(|s| *s != \"neon\" && *s != \"databricks_auth\" && !s.is_empty())\n                    .map(str::to_string)\n                    .collect();\n            }\n            libs_vec.extend(preload_libs_vec);\n        }\n\n        // Don't try to download libraries that are not in the index.\n        // Assume that they are already present locally.\n        libs_vec.retain(|lib| remote_extensions.library_index.contains_key(lib));\n\n        info!(\"Downloading to shared preload libraries: {:?}\", &libs_vec);\n\n        let mut download_tasks = Vec::new();\n        for library in &libs_vec {\n            let (ext_name, ext_path) =\n                remote_extensions.get_ext(library, true, &BUILD_TAG, &self.params.pgversion)?;\n            download_tasks.push(self.download_extension(ext_name, ext_path));\n        }\n        let results = join_all(download_tasks).await;\n\n        let mut remote_ext_metrics = RemoteExtensionMetrics {\n            num_ext_downloaded: 0,\n            largest_ext_size: 0,\n            total_ext_download_size: 0,\n        };\n        for result in results {\n            let download_size = match result {\n                Ok(res) => {\n                    remote_ext_metrics.num_ext_downloaded += 1;\n                    res\n                }\n                Err(err) => {\n                    // if we failed to download an extension, we don't want to fail the whole\n                    // process, but we do want to log the error\n                    error!(\"Failed to download extension: {}\", err);\n                    0\n                }\n            };\n\n            remote_ext_metrics.largest_ext_size =\n                std::cmp::max(remote_ext_metrics.largest_ext_size, download_size);\n            remote_ext_metrics.total_ext_download_size += download_size;\n        }\n        Ok(remote_ext_metrics)\n    }\n\n    /// Waits until current thread receives a state changed notification and\n    /// the pageserver connection strings has changed.\n    ///\n    /// The operation will time out after a specified duration.\n    pub fn wait_timeout_while_pageserver_connstr_unchanged(&self, duration: Duration) {\n        let state = self.state.lock().unwrap();\n        let old_pageserver_conninfo = state\n            .pspec\n            .as_ref()\n            .expect(\"spec must be set\")\n            .pageserver_conninfo\n            .clone();\n        let mut unchanged = true;\n        let _ = self\n            .state_changed\n            .wait_timeout_while(state, duration, |s| {\n                let pageserver_conninfo = &s\n                    .pspec\n                    .as_ref()\n                    .expect(\"spec must be set\")\n                    .pageserver_conninfo;\n                unchanged = pageserver_conninfo == &old_pageserver_conninfo;\n                unchanged\n            })\n            .unwrap();\n        if !unchanged {\n            info!(\"Pageserver config changed\");\n        }\n    }\n\n    pub fn spawn_extension_stats_task(&self) {\n        self.terminate_extension_stats_task();\n\n        let conf = self.tokio_conn_conf.clone();\n        let atomic_interval = self.params.installed_extensions_collection_interval.clone();\n        let mut installed_extensions_collection_interval =\n            2 * atomic_interval.load(std::sync::atomic::Ordering::SeqCst);\n        info!(\n            \"[NEON_EXT_SPAWN] Spawning background installed extensions worker with Timeout: {}\",\n            installed_extensions_collection_interval\n        );\n        let handle = tokio::spawn(async move {\n            loop {\n                info!(\n                    \"[NEON_EXT_INT_SLEEP]: Interval: {}\",\n                    installed_extensions_collection_interval\n                );\n                // Sleep at the start of the loop to ensure that two collections don't happen at the same time.\n                // The first collection happens during compute startup.\n                tokio::time::sleep(tokio::time::Duration::from_secs(\n                    installed_extensions_collection_interval,\n                ))\n                .await;\n                let _ = installed_extensions(conf.clone()).await;\n                // Acquire a read lock on the compute spec and then update the interval if necessary\n                installed_extensions_collection_interval = std::cmp::max(\n                    installed_extensions_collection_interval,\n                    2 * atomic_interval.load(std::sync::atomic::Ordering::SeqCst),\n                );\n            }\n        });\n\n        // Store the new task handle\n        *self.extension_stats_task.lock().unwrap() = Some(handle);\n    }\n\n    fn terminate_extension_stats_task(&self) {\n        if let Some(h) = self.extension_stats_task.lock().unwrap().take() {\n            h.abort()\n        }\n    }\n\n    pub fn spawn_lfc_offload_task(self: &Arc<Self>, interval: Duration) {\n        self.terminate_lfc_offload_task();\n        let secs = interval.as_secs();\n        let this = self.clone();\n\n        info!(\"spawning LFC offload worker with {secs}s interval\");\n        let handle = spawn(async move {\n            let mut interval = time::interval(interval);\n            interval.tick().await; // returns immediately\n            loop {\n                interval.tick().await;\n\n                let prewarm_state = this.state.lock().unwrap().lfc_prewarm_state.clone();\n                // Do not offload LFC state if we are currently prewarming or any issue occurred.\n                // If we'd do that, we might override the LFC state in endpoint storage with some\n                // incomplete state. Imagine a situation:\n                // 1. Endpoint started with `autoprewarm: true`\n                // 2. While prewarming is not completed, we upload the new incomplete state\n                // 3. Compute gets interrupted and restarts\n                // 4. We start again and try to prewarm with the state from 2. instead of the previous complete state\n                if matches!(\n                    prewarm_state,\n                    LfcPrewarmState::Completed { .. }\n                        | LfcPrewarmState::NotPrewarmed\n                        | LfcPrewarmState::Skipped\n                ) {\n                    this.offload_lfc_async().await;\n                }\n            }\n        });\n        *self.lfc_offload_task.lock().unwrap() = Some(handle);\n    }\n\n    fn terminate_lfc_offload_task(&self) {\n        if let Some(h) = self.lfc_offload_task.lock().unwrap().take() {\n            h.abort()\n        }\n    }\n\n    fn update_installed_extensions_collection_interval(&self, spec: &ComputeSpec) {\n        // Update the interval for collecting installed extensions statistics\n        // If the value is -1, we never suspend so set the value to default collection.\n        // If the value is 0, it means default, we will just continue to use the default.\n        if spec.suspend_timeout_seconds == -1 || spec.suspend_timeout_seconds == 0 {\n            self.params.installed_extensions_collection_interval.store(\n                DEFAULT_INSTALLED_EXTENSIONS_COLLECTION_INTERVAL,\n                std::sync::atomic::Ordering::SeqCst,\n            );\n        } else {\n            self.params.installed_extensions_collection_interval.store(\n                spec.suspend_timeout_seconds as u64,\n                std::sync::atomic::Ordering::SeqCst,\n            );\n        }\n    }\n\n    /// Set the compute spec and update related metrics.\n    /// This is the central place where pspec is updated.\n    pub fn set_spec(params: &ComputeNodeParams, state: &mut ComputeState, pspec: ParsedSpec) {\n        state.pspec = Some(pspec);\n        ComputeNode::update_attached_metric(params, state);\n        let _ = logger::update_ids(&params.instance_id, &Some(params.compute_id.clone()));\n    }\n\n    pub fn update_attached_metric(params: &ComputeNodeParams, state: &mut ComputeState) {\n        // Update the pg_cctl_attached gauge when all identifiers are available.\n        if let Some(instance_id) = &params.instance_id {\n            if let Some(pspec) = &state.pspec {\n                // Clear all values in the metric\n                COMPUTE_ATTACHED.reset();\n\n                // Set new metric value\n                COMPUTE_ATTACHED\n                    .with_label_values(&[\n                        &params.compute_id,\n                        instance_id,\n                        &pspec.tenant_id.to_string(),\n                        &pspec.timeline_id.to_string(),\n                    ])\n                    .set(1);\n            }\n        }\n    }\n}\n\npub async fn installed_extensions(conf: tokio_postgres::Config) -> Result<()> {\n    let res = get_installed_extensions(conf).await;\n    match res {\n        Ok(extensions) => {\n            info!(\n                \"[NEON_EXT_STAT] {}\",\n                serde_json::to_string(&extensions).expect(\"failed to serialize extensions list\")\n            );\n        }\n        Err(err) => error!(\"could not get installed extensions: {err}\"),\n    }\n    Ok(())\n}\n\npub fn forward_termination_signal(dev_mode: bool) {\n    let ss_pid = SYNC_SAFEKEEPERS_PID.load(Ordering::SeqCst);\n    if ss_pid != 0 {\n        let ss_pid = nix::unistd::Pid::from_raw(ss_pid as i32);\n        kill(ss_pid, Signal::SIGTERM).ok();\n    }\n\n    if !dev_mode {\n        //  Terminate pgbouncer with SIGKILL\n        match pid_file::read(PGBOUNCER_PIDFILE.into()) {\n            Ok(pid_file::PidFileRead::LockedByOtherProcess(pid)) => {\n                info!(\"sending SIGKILL to pgbouncer process pid: {}\", pid);\n                if let Err(e) = kill(pid, Signal::SIGKILL) {\n                    error!(\"failed to terminate pgbouncer: {}\", e);\n                }\n            }\n            // pgbouncer does not lock the pid file, so we read and kill the process directly\n            Ok(pid_file::PidFileRead::NotHeldByAnyProcess(_)) => {\n                if let Ok(pid_str) = std::fs::read_to_string(PGBOUNCER_PIDFILE) {\n                    if let Ok(pid) = pid_str.trim().parse::<i32>() {\n                        info!(\n                            \"sending SIGKILL to pgbouncer process pid: {} (from unlocked pid file)\",\n                            pid\n                        );\n                        if let Err(e) = kill(Pid::from_raw(pid), Signal::SIGKILL) {\n                            error!(\"failed to terminate pgbouncer: {}\", e);\n                        }\n                    }\n                } else {\n                    info!(\"pgbouncer pid file exists but process not running\");\n                }\n            }\n            Ok(pid_file::PidFileRead::NotExist) => {\n                info!(\"pgbouncer pid file not found, process may not be running\");\n            }\n            Err(e) => {\n                error!(\"error reading pgbouncer pid file: {}\", e);\n            }\n        }\n\n        // Terminate local_proxy\n        match pid_file::read(\"/etc/local_proxy/pid\".into()) {\n            Ok(pid_file::PidFileRead::LockedByOtherProcess(pid)) => {\n                info!(\"sending SIGTERM to local_proxy process pid: {}\", pid);\n                if let Err(e) = kill(pid, Signal::SIGTERM) {\n                    error!(\"failed to terminate local_proxy: {}\", e);\n                }\n            }\n            Ok(pid_file::PidFileRead::NotHeldByAnyProcess(_)) => {\n                info!(\"local_proxy PID file exists but process not running\");\n            }\n            Ok(pid_file::PidFileRead::NotExist) => {\n                info!(\"local_proxy PID file not found, process may not be running\");\n            }\n            Err(e) => {\n                error!(\"error reading local_proxy PID file: {}\", e);\n            }\n        }\n    } else {\n        info!(\"Skipping pgbouncer and local_proxy termination because in dev mode\");\n    }\n\n    let pg_pid = PG_PID.load(Ordering::SeqCst);\n    if pg_pid != 0 {\n        let pg_pid = nix::unistd::Pid::from_raw(pg_pid as i32);\n        // Use 'fast' shutdown (SIGINT) because it also creates a shutdown checkpoint, which is important for\n        // ROs to get a list of running xacts faster instead of going through the CLOG.\n        // See https://www.postgresql.org/docs/current/server-shutdown.html for the list of modes and signals.\n        kill(pg_pid, Signal::SIGINT).ok();\n    }\n}\n\n// helper trait to call JoinSet::spawn_blocking(f), but propagates the current\n// tracing span to the thread.\ntrait JoinSetExt<T> {\n    fn spawn_blocking_child<F>(&mut self, f: F) -> tokio::task::AbortHandle\n    where\n        F: FnOnce() -> T + Send + 'static,\n        T: Send;\n}\n\nimpl<T: 'static> JoinSetExt<T> for tokio::task::JoinSet<T> {\n    fn spawn_blocking_child<F>(&mut self, f: F) -> tokio::task::AbortHandle\n    where\n        F: FnOnce() -> T + Send + 'static,\n        T: Send,\n    {\n        let sp = tracing::Span::current();\n        self.spawn_blocking(move || {\n            let _e = sp.enter();\n            f()\n        })\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use std::fs::File;\n\n    use super::*;\n\n    #[test]\n    fn duplicate_safekeeper_connstring() {\n        let file = File::open(\"tests/cluster_spec.json\").unwrap();\n        let spec: ComputeSpec = serde_json::from_reader(file).unwrap();\n\n        match ParsedSpec::try_from(spec.clone()) {\n            Ok(_p) => panic!(\"Failed to detect duplicate entry\"),\n            Err(e) => assert!(\n                e.to_string()\n                    .starts_with(\"duplicate entry in safekeeper_connstrings:\")\n            ),\n        };\n    }\n}\n"
  },
  {
    "path": "compute_tools/src/compute_prewarm.rs",
    "content": "use crate::compute::ComputeNode;\nuse anyhow::{Context, Result, bail};\nuse async_compression::tokio::bufread::{ZstdDecoder, ZstdEncoder};\nuse compute_api::responses::LfcOffloadState;\nuse compute_api::responses::LfcPrewarmState;\nuse http::StatusCode;\nuse reqwest::Client;\nuse std::mem::replace;\nuse std::sync::Arc;\nuse std::time::Instant;\nuse tokio::{io::AsyncReadExt, select, spawn};\nuse tokio_util::sync::CancellationToken;\nuse tracing::{error, info};\n\n/// A pair of url and a token to query endpoint storage for LFC prewarm-related tasks\nstruct EndpointStoragePair {\n    url: String,\n    token: String,\n}\n\nconst KEY: &str = \"lfc_state\";\nimpl EndpointStoragePair {\n    /// endpoint_id is set to None while prewarming from other endpoint, see compute_promote.rs\n    /// If not None, takes precedence over pspec.spec.endpoint_id\n    fn from_spec_and_endpoint(\n        pspec: &crate::compute::ParsedSpec,\n        endpoint_id: Option<String>,\n    ) -> Result<Self> {\n        let endpoint_id = endpoint_id.as_ref().or(pspec.spec.endpoint_id.as_ref());\n        let Some(ref endpoint_id) = endpoint_id else {\n            bail!(\"pspec.endpoint_id missing, other endpoint_id not provided\")\n        };\n        let Some(ref base_uri) = pspec.endpoint_storage_addr else {\n            bail!(\"pspec.endpoint_storage_addr missing\")\n        };\n        let tenant_id = pspec.tenant_id;\n        let timeline_id = pspec.timeline_id;\n\n        let url = format!(\"http://{base_uri}/{tenant_id}/{timeline_id}/{endpoint_id}/{KEY}\");\n        let Some(ref token) = pspec.endpoint_storage_token else {\n            bail!(\"pspec.endpoint_storage_token missing\")\n        };\n        let token = token.clone();\n        Ok(EndpointStoragePair { url, token })\n    }\n}\n\nimpl ComputeNode {\n    pub async fn lfc_prewarm_state(&self) -> LfcPrewarmState {\n        self.state.lock().unwrap().lfc_prewarm_state.clone()\n    }\n\n    pub fn lfc_offload_state(&self) -> LfcOffloadState {\n        self.state.lock().unwrap().lfc_offload_state.clone()\n    }\n\n    /// If there is a prewarm request ongoing, return `false`, `true` otherwise.\n    /// Has a failpoint \"compute-prewarm\"\n    pub fn prewarm_lfc(self: &Arc<Self>, from_endpoint: Option<String>) -> bool {\n        let token: CancellationToken;\n        {\n            let state = &mut self.state.lock().unwrap();\n            token = state.lfc_prewarm_token.clone();\n            if let LfcPrewarmState::Prewarming =\n                replace(&mut state.lfc_prewarm_state, LfcPrewarmState::Prewarming)\n            {\n                return false;\n            }\n        }\n        crate::metrics::LFC_PREWARMS.inc();\n\n        let this = self.clone();\n        spawn(async move {\n            let prewarm_state = match this.prewarm_impl(from_endpoint, token).await {\n                Ok(state) => state,\n                Err(err) => {\n                    crate::metrics::LFC_PREWARM_ERRORS.inc();\n                    error!(%err, \"could not prewarm LFC\");\n                    let error = format!(\"{err:#}\");\n                    LfcPrewarmState::Failed { error }\n                }\n            };\n\n            let state = &mut this.state.lock().unwrap();\n            if let LfcPrewarmState::Cancelled = prewarm_state {\n                state.lfc_prewarm_token = CancellationToken::new();\n            }\n            state.lfc_prewarm_state = prewarm_state;\n        });\n        true\n    }\n\n    /// from_endpoint: None for endpoint managed by this compute_ctl\n    fn endpoint_storage_pair(&self, from_endpoint: Option<String>) -> Result<EndpointStoragePair> {\n        let state = self.state.lock().unwrap();\n        EndpointStoragePair::from_spec_and_endpoint(state.pspec.as_ref().unwrap(), from_endpoint)\n    }\n\n    /// Request LFC state from endpoint storage and load corresponding pages into Postgres.\n    async fn prewarm_impl(\n        &self,\n        from_endpoint: Option<String>,\n        token: CancellationToken,\n    ) -> Result<LfcPrewarmState> {\n        let EndpointStoragePair {\n            url,\n            token: storage_token,\n        } = self.endpoint_storage_pair(from_endpoint)?;\n\n        #[cfg(feature = \"testing\")]\n        fail::fail_point!(\"compute-prewarm\", |_| bail!(\"compute-prewarm failpoint\"));\n\n        info!(%url, \"requesting LFC state from endpoint storage\");\n        let mut now = Instant::now();\n        let request = Client::new().get(&url).bearer_auth(storage_token);\n        let response = select! {\n            _ = token.cancelled() => return Ok(LfcPrewarmState::Cancelled),\n            response = request.send() => response\n        }\n        .context(\"querying endpoint storage\")?;\n\n        match response.status() {\n            StatusCode::OK => (),\n            StatusCode::NOT_FOUND => return Ok(LfcPrewarmState::Skipped),\n            status => bail!(\"{status} querying endpoint storage\"),\n        }\n        let state_download_time_ms = now.elapsed().as_millis() as u32;\n        now = Instant::now();\n\n        let mut uncompressed = Vec::new();\n        let lfc_state = select! {\n            _ = token.cancelled() => return Ok(LfcPrewarmState::Cancelled),\n            lfc_state = response.bytes() => lfc_state\n        }\n        .context(\"getting request body from endpoint storage\")?;\n\n        let mut decoder = ZstdDecoder::new(lfc_state.iter().as_slice());\n        select! {\n            _ = token.cancelled() => return Ok(LfcPrewarmState::Cancelled),\n            read = decoder.read_to_end(&mut uncompressed) => read\n        }\n        .context(\"decoding LFC state\")?;\n        let uncompress_time_ms = now.elapsed().as_millis() as u32;\n        now = Instant::now();\n\n        let uncompressed_len = uncompressed.len();\n        info!(%url, \"downloaded LFC state, uncompressed size {uncompressed_len}\");\n\n        // Client connection and prewarm info querying are fast and therefore don't need\n        // cancellation\n        let client = ComputeNode::get_maintenance_client(&self.tokio_conn_conf)\n            .await\n            .context(\"connecting to postgres\")?;\n        let pg_token = client.cancel_token();\n\n        let params: Vec<&(dyn postgres_types::ToSql + Sync)> = vec![&uncompressed];\n        select! {\n            res = client.query_one(\"select neon.prewarm_local_cache($1)\", &params) => res,\n            _ = token.cancelled() => {\n                pg_token.cancel_query(postgres::NoTls).await\n                    .context(\"cancelling neon.prewarm_local_cache()\")?;\n                return Ok(LfcPrewarmState::Cancelled)\n            }\n        }\n        .context(\"loading LFC state into postgres\")\n        .map(|_| ())?;\n        let prewarm_time_ms = now.elapsed().as_millis() as u32;\n\n        let row = client\n            .query_one(\"select * from neon.get_prewarm_info()\", &[])\n            .await\n            .context(\"querying prewarm info\")?;\n        let total = row.try_get(0).unwrap_or_default();\n        let prewarmed = row.try_get(1).unwrap_or_default();\n        let skipped = row.try_get(2).unwrap_or_default();\n\n        Ok(LfcPrewarmState::Completed {\n            total,\n            prewarmed,\n            skipped,\n            state_download_time_ms,\n            uncompress_time_ms,\n            prewarm_time_ms,\n        })\n    }\n\n    /// If offload request is ongoing, return false, true otherwise\n    pub fn offload_lfc(self: &Arc<Self>) -> bool {\n        {\n            let state = &mut self.state.lock().unwrap().lfc_offload_state;\n            if matches!(\n                replace(state, LfcOffloadState::Offloading),\n                LfcOffloadState::Offloading\n            ) {\n                return false;\n            }\n        }\n        let cloned = self.clone();\n        spawn(async move { cloned.offload_lfc_with_state_update().await });\n        true\n    }\n\n    pub async fn offload_lfc_async(self: &Arc<Self>) {\n        {\n            let state = &mut self.state.lock().unwrap().lfc_offload_state;\n            if matches!(\n                replace(state, LfcOffloadState::Offloading),\n                LfcOffloadState::Offloading\n            ) {\n                return;\n            }\n        }\n        self.offload_lfc_with_state_update().await\n    }\n\n    async fn offload_lfc_with_state_update(&self) {\n        crate::metrics::LFC_OFFLOADS.inc();\n        let state = match self.offload_lfc_impl().await {\n            Ok(state) => state,\n            Err(err) => {\n                crate::metrics::LFC_OFFLOAD_ERRORS.inc();\n                error!(%err, \"could not offload LFC\");\n                let error = format!(\"{err:#}\");\n                LfcOffloadState::Failed { error }\n            }\n        };\n        self.state.lock().unwrap().lfc_offload_state = state;\n    }\n\n    async fn offload_lfc_impl(&self) -> Result<LfcOffloadState> {\n        let EndpointStoragePair { url, token } = self.endpoint_storage_pair(None)?;\n        info!(%url, \"requesting LFC state from Postgres\");\n\n        let mut now = Instant::now();\n        let row = ComputeNode::get_maintenance_client(&self.tokio_conn_conf)\n            .await\n            .context(\"connecting to postgres\")?\n            .query_one(\"select neon.get_local_cache_state()\", &[])\n            .await\n            .context(\"querying LFC state\")?;\n        let state = row\n            .try_get::<usize, Option<&[u8]>>(0)\n            .context(\"deserializing LFC state\")?;\n        let Some(state) = state else {\n            info!(%url, \"empty LFC state, not exporting\");\n            return Ok(LfcOffloadState::Skipped);\n        };\n        let state_query_time_ms = now.elapsed().as_millis() as u32;\n        now = Instant::now();\n\n        let mut compressed = Vec::new();\n        ZstdEncoder::new(state)\n            .read_to_end(&mut compressed)\n            .await\n            .context(\"compressing LFC state\")?;\n        let compress_time_ms = now.elapsed().as_millis() as u32;\n        now = Instant::now();\n\n        let compressed_len = compressed.len();\n        info!(%url, \"downloaded LFC state, compressed size {compressed_len}\");\n\n        let request = Client::new().put(url).bearer_auth(token).body(compressed);\n        let response = request\n            .send()\n            .await\n            .context(\"writing to endpoint storage\")?;\n        let state_upload_time_ms = now.elapsed().as_millis() as u32;\n        let status = response.status();\n        if status != StatusCode::OK {\n            bail!(\"request to endpoint storage failed: {status}\");\n        }\n\n        Ok(LfcOffloadState::Completed {\n            compress_time_ms,\n            state_query_time_ms,\n            state_upload_time_ms,\n        })\n    }\n\n    pub fn cancel_prewarm(self: &Arc<Self>) {\n        self.state.lock().unwrap().lfc_prewarm_token.cancel();\n    }\n}\n"
  },
  {
    "path": "compute_tools/src/compute_promote.rs",
    "content": "use crate::compute::ComputeNode;\nuse anyhow::{Context, bail};\nuse compute_api::responses::{LfcPrewarmState, PromoteConfig, PromoteState};\nuse std::time::Instant;\nuse tracing::info;\n\nimpl ComputeNode {\n    /// Returns only when promote fails or succeeds. If http client calling this function\n    /// disconnects, this does not stop promotion, and subsequent calls block until promote finishes.\n    /// Called by control plane on secondary after primary endpoint is terminated\n    /// Has a failpoint \"compute-promotion\"\n    pub async fn promote(self: &std::sync::Arc<Self>, cfg: PromoteConfig) -> PromoteState {\n        let this = self.clone();\n        let promote_fn = async move || match this.promote_impl(cfg).await {\n            Ok(state) => state,\n            Err(err) => {\n                tracing::error!(%err, \"promoting replica\");\n                let error = format!(\"{err:#}\");\n                PromoteState::Failed { error }\n            }\n        };\n        let start_promotion = || {\n            let (tx, rx) = tokio::sync::watch::channel(PromoteState::NotPromoted);\n            tokio::spawn(async move { tx.send(promote_fn().await) });\n            rx\n        };\n\n        let mut task;\n        // promote_impl locks self.state so we need to unlock it before calling task.changed()\n        {\n            let promote_state = &mut self.state.lock().unwrap().promote_state;\n            task = promote_state.get_or_insert_with(start_promotion).clone()\n        }\n        if task.changed().await.is_err() {\n            let error = \"promote sender dropped\".to_string();\n            return PromoteState::Failed { error };\n        }\n        task.borrow().clone()\n    }\n\n    async fn promote_impl(&self, cfg: PromoteConfig) -> anyhow::Result<PromoteState> {\n        {\n            let state = self.state.lock().unwrap();\n            let mode = &state.pspec.as_ref().unwrap().spec.mode;\n            if *mode != compute_api::spec::ComputeMode::Replica {\n                bail!(\"compute mode \\\"{}\\\" is not replica\", mode.to_type_str());\n            }\n            match &state.lfc_prewarm_state {\n                status @ (LfcPrewarmState::NotPrewarmed | LfcPrewarmState::Prewarming) => {\n                    bail!(\"compute {status}\")\n                }\n                LfcPrewarmState::Failed { error } => {\n                    tracing::warn!(%error, \"compute prewarm failed\")\n                }\n                _ => {}\n            }\n        }\n\n        let client = ComputeNode::get_maintenance_client(&self.tokio_conn_conf)\n            .await\n            .context(\"connecting to postgres\")?;\n        let mut now = Instant::now();\n\n        let primary_lsn = cfg.wal_flush_lsn;\n        let mut standby_lsn = utils::lsn::Lsn::INVALID;\n        const RETRIES: i32 = 20;\n        for i in 0..=RETRIES {\n            let row = client\n                .query_one(\"SELECT pg_catalog.pg_last_wal_replay_lsn()\", &[])\n                .await\n                .context(\"getting last replay lsn\")?;\n            let lsn: u64 = row.get::<usize, postgres_types::PgLsn>(0).into();\n            standby_lsn = lsn.into();\n            if standby_lsn >= primary_lsn {\n                break;\n            }\n            info!(%standby_lsn, %primary_lsn, \"catching up, try {i}\");\n            tokio::time::sleep(std::time::Duration::from_secs(1)).await;\n        }\n        if standby_lsn < primary_lsn {\n            bail!(\"didn't catch up with primary in {RETRIES} retries\");\n        }\n        let lsn_wait_time_ms = now.elapsed().as_millis() as u32;\n        now = Instant::now();\n\n        // using $1 doesn't work with ALTER SYSTEM SET\n        let safekeepers_sql = format!(\n            \"ALTER SYSTEM SET neon.safekeepers='{}'\",\n            cfg.spec.safekeeper_connstrings.join(\",\")\n        );\n        client\n            .query(&safekeepers_sql, &[])\n            .await\n            .context(\"setting safekeepers\")?;\n        client\n            .query(\n                \"ALTER SYSTEM SET synchronous_standby_names=walproposer\",\n                &[],\n            )\n            .await\n            .context(\"setting synchronous_standby_names\")?;\n        client\n            .query(\"SELECT pg_catalog.pg_reload_conf()\", &[])\n            .await\n            .context(\"reloading postgres config\")?;\n\n        #[cfg(feature = \"testing\")]\n        fail::fail_point!(\"compute-promotion\", |_| bail!(\n            \"compute-promotion failpoint\"\n        ));\n\n        let row = client\n            .query_one(\"SELECT * FROM pg_catalog.pg_promote()\", &[])\n            .await\n            .context(\"pg_promote\")?;\n        if !row.get::<usize, bool>(0) {\n            bail!(\"pg_promote() failed\");\n        }\n        let pg_promote_time_ms = now.elapsed().as_millis() as u32;\n        let now = Instant::now();\n\n        let row = client\n            .query_one(\"SHOW transaction_read_only\", &[])\n            .await\n            .context(\"getting transaction_read_only\")?;\n        if row.get::<usize, &str>(0) == \"on\" {\n            bail!(\"replica in read only mode after promotion\");\n        }\n\n        // Already checked validity in http handler\n        #[allow(unused_mut)]\n        let mut new_pspec = crate::compute::ParsedSpec::try_from(cfg.spec).expect(\"invalid spec\");\n        {\n            let mut state = self.state.lock().unwrap();\n\n            // Local setup has different ports for pg process (port=) for primary and secondary.\n            // Primary is stopped so we need secondary's \"port\" value\n            #[cfg(feature = \"testing\")]\n            {\n                let old_spec = &state.pspec.as_ref().unwrap().spec;\n                let Some(old_conf) = old_spec.cluster.postgresql_conf.as_ref() else {\n                    bail!(\"pspec.spec.cluster.postgresql_conf missing for endpoint\");\n                };\n                let set: std::collections::HashMap<&str, &str> = old_conf\n                    .split_terminator('\\n')\n                    .map(|e| e.split_once(\"=\").expect(\"invalid item\"))\n                    .collect();\n\n                let Some(new_conf) = new_pspec.spec.cluster.postgresql_conf.as_mut() else {\n                    bail!(\"pspec.spec.cluster.postgresql_conf missing for supplied config\");\n                };\n                new_conf.push_str(&format!(\"port={}\\n\", set[\"port\"]));\n            }\n\n            tracing::debug!(\"applied spec: {:#?}\", new_pspec.spec);\n            if self.params.lakebase_mode {\n                ComputeNode::set_spec(&self.params, &mut state, new_pspec);\n            } else {\n                state.pspec = Some(new_pspec);\n            }\n        }\n\n        info!(\"applied new spec, reconfiguring as primary\");\n        self.reconfigure()?;\n        let reconfigure_time_ms = now.elapsed().as_millis() as u32;\n\n        Ok(PromoteState::Completed {\n            lsn_wait_time_ms,\n            pg_promote_time_ms,\n            reconfigure_time_ms,\n        })\n    }\n}\n"
  },
  {
    "path": "compute_tools/src/config.rs",
    "content": "use anyhow::Result;\nuse std::fmt::Write as FmtWrite;\nuse std::fs::{File, OpenOptions};\nuse std::io;\nuse std::io::Write;\nuse std::io::prelude::*;\nuse std::path::Path;\n\nuse compute_api::responses::TlsConfig;\nuse compute_api::spec::{\n    ComputeAudit, ComputeMode, ComputeSpec, DatabricksSettings, GenericOption,\n};\n\nuse crate::compute::ComputeNodeParams;\nuse crate::pg_helpers::{\n    DatabricksSettingsExt as _, GenericOptionExt, GenericOptionsSearch, PgOptionsSerialize,\n    escape_conf_value,\n};\nuse crate::tls::{self, SERVER_CRT, SERVER_KEY};\n\nuse utils::shard::{ShardIndex, ShardNumber};\n\n/// Check that `line` is inside a text file and put it there if it is not.\n/// Create file if it doesn't exist.\npub fn line_in_file(path: &Path, line: &str) -> Result<bool> {\n    let mut file = OpenOptions::new()\n        .read(true)\n        .write(true)\n        .create(true)\n        .append(false)\n        .truncate(false)\n        .open(path)?;\n    let buf = io::BufReader::new(&file);\n    let mut count: usize = 0;\n\n    for l in buf.lines() {\n        if l? == line {\n            return Ok(false);\n        }\n        count = 1;\n    }\n\n    write!(file, \"{}{}\", \"\\n\".repeat(count), line)?;\n    Ok(true)\n}\n\n/// Create or completely rewrite configuration file specified by `path`\n#[allow(clippy::too_many_arguments)]\npub fn write_postgres_conf(\n    pgdata_path: &Path,\n    params: &ComputeNodeParams,\n    spec: &ComputeSpec,\n    postgres_port: Option<u16>,\n    extension_server_port: u16,\n    tls_config: &Option<TlsConfig>,\n    databricks_settings: Option<&DatabricksSettings>,\n    lakebase_mode: bool,\n) -> Result<()> {\n    let path = pgdata_path.join(\"postgresql.conf\");\n    // File::create() destroys the file content if it exists.\n    let mut file = File::create(path)?;\n\n    // Write the postgresql.conf content from the spec file as is.\n    if let Some(conf) = &spec.cluster.postgresql_conf {\n        writeln!(file, \"{conf}\")?;\n    }\n\n    // Add options for connecting to storage\n    writeln!(file, \"# Neon storage settings\")?;\n    writeln!(file)?;\n    if let Some(conninfo) = &spec.pageserver_connection_info {\n        // Stripe size GUC should be defined prior to connection string\n        if let Some(stripe_size) = conninfo.stripe_size {\n            writeln!(\n                file,\n                \"# from compute spec's pageserver_connection_info.stripe_size field\"\n            )?;\n            writeln!(file, \"neon.stripe_size={stripe_size}\")?;\n        }\n\n        let mut libpq_urls: Option<Vec<String>> = Some(Vec::new());\n        let num_shards = if conninfo.shard_count.0 == 0 {\n            1 // unsharded, treat it as a single shard\n        } else {\n            conninfo.shard_count.0\n        };\n\n        for shard_number in 0..num_shards {\n            let shard_index = ShardIndex {\n                shard_number: ShardNumber(shard_number),\n                shard_count: conninfo.shard_count,\n            };\n            let info = conninfo.shards.get(&shard_index).ok_or_else(|| {\n                anyhow::anyhow!(\n                    \"shard {shard_index} missing from pageserver_connection_info shard map\"\n                )\n            })?;\n\n            let first_pageserver = info\n                .pageservers\n                .first()\n                .expect(\"must have at least one pageserver\");\n\n            // Add the libpq URL to the array, or if the URL is missing, reset the array\n            // forgetting any previous entries. All servers must have a libpq URL, or none\n            // at all.\n            if let Some(url) = &first_pageserver.libpq_url {\n                if let Some(ref mut urls) = libpq_urls {\n                    urls.push(url.clone());\n                }\n            } else {\n                libpq_urls = None\n            }\n        }\n        if let Some(libpq_urls) = libpq_urls {\n            writeln!(\n                file,\n                \"# derived from compute spec's pageserver_connection_info field\"\n            )?;\n            writeln!(\n                file,\n                \"neon.pageserver_connstring={}\",\n                escape_conf_value(&libpq_urls.join(\",\"))\n            )?;\n        } else {\n            writeln!(file, \"# no neon.pageserver_connstring\")?;\n        }\n    } else {\n        // Stripe size GUC should be defined prior to connection string\n        if let Some(stripe_size) = spec.shard_stripe_size {\n            writeln!(file, \"# from compute spec's shard_stripe_size field\")?;\n            writeln!(file, \"neon.stripe_size={stripe_size}\")?;\n        }\n        if let Some(s) = &spec.pageserver_connstring {\n            writeln!(file, \"# from compute spec's pageserver_connstring field\")?;\n            writeln!(file, \"neon.pageserver_connstring={}\", escape_conf_value(s))?;\n        }\n    }\n\n    if !spec.safekeeper_connstrings.is_empty() {\n        let mut neon_safekeepers_value = String::new();\n        tracing::info!(\n            \"safekeepers_connstrings is not zero, gen: {:?}\",\n            spec.safekeepers_generation\n        );\n        // If generation is given, prepend sk list with g#number:\n        if let Some(generation) = spec.safekeepers_generation {\n            write!(neon_safekeepers_value, \"g#{generation}:\")?;\n        }\n        neon_safekeepers_value.push_str(&spec.safekeeper_connstrings.join(\",\"));\n        writeln!(\n            file,\n            \"neon.safekeepers={}\",\n            escape_conf_value(&neon_safekeepers_value)\n        )?;\n    }\n    if let Some(s) = &spec.tenant_id {\n        writeln!(file, \"neon.tenant_id={}\", escape_conf_value(&s.to_string()))?;\n    }\n    if let Some(s) = &spec.timeline_id {\n        writeln!(\n            file,\n            \"neon.timeline_id={}\",\n            escape_conf_value(&s.to_string())\n        )?;\n    }\n    if let Some(s) = &spec.project_id {\n        writeln!(file, \"neon.project_id={}\", escape_conf_value(s))?;\n    }\n    if let Some(s) = &spec.branch_id {\n        writeln!(file, \"neon.branch_id={}\", escape_conf_value(s))?;\n    }\n    if let Some(s) = &spec.endpoint_id {\n        writeln!(file, \"neon.endpoint_id={}\", escape_conf_value(s))?;\n    }\n\n    // tls\n    if let Some(tls_config) = tls_config {\n        writeln!(file, \"ssl = on\")?;\n\n        // postgres requires the keyfile to be in a secure file,\n        // currently too complicated to ensure that at the VM level,\n        // so we just copy them to another file instead. :shrug:\n        tls::update_key_path_blocking(pgdata_path, tls_config);\n\n        // these are the default, but good to be explicit.\n        writeln!(file, \"ssl_cert_file = '{SERVER_CRT}'\")?;\n        writeln!(file, \"ssl_key_file = '{SERVER_KEY}'\")?;\n    }\n\n    // Locales\n    if cfg!(target_os = \"macos\") {\n        writeln!(file, \"lc_messages='C'\")?;\n        writeln!(file, \"lc_monetary='C'\")?;\n        writeln!(file, \"lc_time='C'\")?;\n        writeln!(file, \"lc_numeric='C'\")?;\n    } else {\n        writeln!(file, \"lc_messages='C.UTF-8'\")?;\n        writeln!(file, \"lc_monetary='C.UTF-8'\")?;\n        writeln!(file, \"lc_time='C.UTF-8'\")?;\n        writeln!(file, \"lc_numeric='C.UTF-8'\")?;\n    }\n\n    writeln!(file, \"neon.compute_mode={}\", spec.mode.to_type_str())?;\n    match spec.mode {\n        ComputeMode::Primary => {}\n        ComputeMode::Static(lsn) => {\n            // hot_standby is 'on' by default, but let's be explicit\n            writeln!(file, \"hot_standby=on\")?;\n            writeln!(file, \"recovery_target_lsn='{lsn}'\")?;\n        }\n        ComputeMode::Replica => {\n            // hot_standby is 'on' by default, but let's be explicit\n            writeln!(file, \"hot_standby=on\")?;\n        }\n    }\n\n    if cfg!(target_os = \"linux\") {\n        // Check /proc/sys/vm/overcommit_memory -- if it equals 2 (i.e. linux memory overcommit is\n        // disabled), then the control plane has enabled swap and we should set\n        // dynamic_shared_memory_type = 'mmap'.\n        //\n        // This is (maybe?) temporary - for more, see https://github.com/neondatabase/cloud/issues/12047.\n        let overcommit_memory_contents = std::fs::read_to_string(\"/proc/sys/vm/overcommit_memory\")\n            // ignore any errors - they may be expected to occur under certain situations (e.g. when\n            // not running in Linux).\n            .unwrap_or_else(|_| String::new());\n        if overcommit_memory_contents.trim() == \"2\" {\n            let opt = GenericOption {\n                name: \"dynamic_shared_memory_type\".to_owned(),\n                value: Some(\"mmap\".to_owned()),\n                vartype: \"enum\".to_owned(),\n            };\n\n            writeln!(file, \"{}\", opt.to_pg_setting())?;\n        }\n    }\n\n    writeln!(\n        file,\n        \"neon.privileged_role_name={}\",\n        escape_conf_value(params.privileged_role_name.as_str())\n    )?;\n\n    // If there are any extra options in the 'settings' field, append those\n    if spec.cluster.settings.is_some() {\n        writeln!(file, \"# Managed by compute_ctl: begin\")?;\n        write!(file, \"{}\", spec.cluster.settings.as_pg_settings())?;\n        writeln!(file, \"# Managed by compute_ctl: end\")?;\n    }\n\n    // If base audit logging is enabled, configure it.\n    // In this setup, the audit log will be written to the standard postgresql log.\n    //\n    // If compliance audit logging is enabled, configure pgaudit.\n    //\n    // Note, that this is called after the settings from spec are written.\n    // This way we always override the settings from the spec\n    // and don't allow the user or the control plane admin to change them.\n    match spec.audit_log_level {\n        ComputeAudit::Disabled => {}\n        ComputeAudit::Log | ComputeAudit::Base => {\n            writeln!(file, \"# Managed by compute_ctl base audit settings: start\")?;\n            writeln!(file, \"pgaudit.log='ddl,role'\")?;\n            // Disable logging of catalog queries to reduce the noise\n            writeln!(file, \"pgaudit.log_catalog=off\")?;\n\n            if let Some(libs) = spec.cluster.settings.find(\"shared_preload_libraries\") {\n                let mut extra_shared_preload_libraries = String::new();\n                if !libs.contains(\"pgaudit\") {\n                    extra_shared_preload_libraries.push_str(\",pgaudit\");\n                }\n                writeln!(\n                    file,\n                    \"shared_preload_libraries='{libs}{extra_shared_preload_libraries}'\"\n                )?;\n            } else {\n                // Typically, this should be unreacheable,\n                // because we always set at least some shared_preload_libraries in the spec\n                // but let's handle it explicitly anyway.\n                writeln!(file, \"shared_preload_libraries='neon,pgaudit'\")?;\n            }\n            writeln!(file, \"# Managed by compute_ctl base audit settings: end\")?;\n        }\n        ComputeAudit::Hipaa | ComputeAudit::Extended | ComputeAudit::Full => {\n            writeln!(\n                file,\n                \"# Managed by compute_ctl compliance audit settings: begin\"\n            )?;\n            // Enable logging of parameters.\n            // This is very verbose and may contain sensitive data.\n            if spec.audit_log_level == ComputeAudit::Full {\n                writeln!(file, \"pgaudit.log_parameter=on\")?;\n                writeln!(file, \"pgaudit.log='all'\")?;\n            } else {\n                writeln!(file, \"pgaudit.log_parameter=off\")?;\n                writeln!(file, \"pgaudit.log='all, -misc'\")?;\n            }\n            // Disable logging of catalog queries\n            // The catalog doesn't contain sensitive data, so we don't need to audit it.\n            writeln!(file, \"pgaudit.log_catalog=off\")?;\n            // Set log rotation to 5 minutes\n            // TODO: tune this after performance testing\n            writeln!(file, \"pgaudit.log_rotation_age=5\")?;\n\n            // Enable audit logs for pg_session_jwt extension\n            // TODO: Consider a good approach for shipping pg_session_jwt logs to the same sink as\n            // pgAudit - additional context in https://github.com/neondatabase/cloud/issues/28863\n            //\n            // writeln!(file, \"pg_session_jwt.audit_log=on\")?;\n\n            // Add audit shared_preload_libraries, if they are not present.\n            //\n            // The caller who sets the flag is responsible for ensuring that the necessary\n            // shared_preload_libraries are present in the compute image,\n            // otherwise the compute start will fail.\n            if let Some(libs) = spec.cluster.settings.find(\"shared_preload_libraries\") {\n                let mut extra_shared_preload_libraries = String::new();\n                if !libs.contains(\"pgaudit\") {\n                    extra_shared_preload_libraries.push_str(\",pgaudit\");\n                }\n                if !libs.contains(\"pgauditlogtofile\") {\n                    extra_shared_preload_libraries.push_str(\",pgauditlogtofile\");\n                }\n                writeln!(\n                    file,\n                    \"shared_preload_libraries='{libs}{extra_shared_preload_libraries}'\"\n                )?;\n            } else {\n                // Typically, this should be unreacheable,\n                // because we always set at least some shared_preload_libraries in the spec\n                // but let's handle it explicitly anyway.\n                writeln!(\n                    file,\n                    \"shared_preload_libraries='neon,pgaudit,pgauditlogtofile'\"\n                )?;\n            }\n            writeln!(\n                file,\n                \"# Managed by compute_ctl compliance audit settings: end\"\n            )?;\n        }\n    }\n\n    writeln!(file, \"neon.extension_server_port={extension_server_port}\")?;\n\n    if spec.drop_subscriptions_before_start {\n        writeln!(file, \"neon.disable_logical_replication_subscribers=true\")?;\n    } else {\n        // be explicit about the default value\n        writeln!(file, \"neon.disable_logical_replication_subscribers=false\")?;\n    }\n\n    // We need Postgres to send logs to rsyslog so that we can forward them\n    // further to customers' log aggregation systems.\n    if spec.logs_export_host.is_some() {\n        writeln!(file, \"log_destination='stderr,syslog'\")?;\n    }\n\n    if lakebase_mode {\n        // Explicitly set the port based on the connstr, overriding any previous port setting.\n        // Note: It is important that we don't specify a different port again after this.\n        let port = postgres_port.expect(\"port must be present in connstr\");\n        writeln!(file, \"port = {port}\")?;\n\n        // This is databricks specific settings.\n        // This should be at the end of the file but before `compute_ctl_temp_override.conf` below\n        // so that it can override any settings above.\n        // `compute_ctl_temp_override.conf` is intended to override any settings above during specific operations.\n        // To prevent potential breakage in the future, we keep it above `compute_ctl_temp_override.conf`.\n        writeln!(file, \"# Databricks settings start\")?;\n        if let Some(settings) = databricks_settings {\n            writeln!(file, \"{}\", settings.as_pg_settings())?;\n        }\n        writeln!(file, \"# Databricks settings end\")?;\n    }\n\n    // This is essential to keep this line at the end of the file,\n    // because it is intended to override any settings above.\n    writeln!(file, \"include_if_exists = 'compute_ctl_temp_override.conf'\")?;\n\n    Ok(())\n}\n\npub fn with_compute_ctl_tmp_override<F>(pgdata_path: &Path, options: &str, exec: F) -> Result<()>\nwhere\n    F: FnOnce() -> Result<()>,\n{\n    let path = pgdata_path.join(\"compute_ctl_temp_override.conf\");\n    let mut file = File::create(path)?;\n    write!(file, \"{options}\")?;\n\n    let res = exec();\n\n    file.set_len(0)?;\n\n    res\n}\n"
  },
  {
    "path": "compute_tools/src/config_template/compute_audit_rsyslog_template.conf",
    "content": "# Load imfile module to read log files\nmodule(load=\"imfile\")\n\n# Input configuration for log files in the specified directory\n# The messages can be multiline. The start of the message is a timestamp\n# in \"%Y-%m-%d %H:%M:%S.%3N GMT\" (so timezone hardcoded).\n# Replace log_directory with the directory containing the log files\ninput(type=\"imfile\" File=\"{log_directory}/*.log\"\n  Tag=\"pgaudit_log\" Severity=\"info\" Facility=\"local5\"\n  startmsg.regex=\"^[[:digit:]]{{4}}-[[:digit:]]{{2}}-[[:digit:]]{{2}} [[:digit:]]{{2}}:[[:digit:]]{{2}}:[[:digit:]]{{2}}.[[:digit:]]{{3}} GMT,\")\n\n# the directory to store rsyslog state files\nglobal(\n  workDirectory=\"/var/log/rsyslog\"\n  DefaultNetstreamDriverCAFile=\"/etc/ssl/certs/ca-certificates.crt\"\n)\n\n# Whether the remote syslog receiver uses tls\nset $.remote_syslog_tls = \"{remote_syslog_tls}\";\n\n# Construct json, endpoint_id and project_id as additional metadata\nset $.json_log!endpoint_id = \"{endpoint_id}\";\nset $.json_log!project_id = \"{project_id}\";\nset $.json_log!msg = $msg;\n\n# Template suitable for rfc5424 syslog format\ntemplate(name=\"PgAuditLog\" type=\"string\"\n    string=\"<%PRI%>1 %TIMESTAMP:::date-rfc3339% %HOSTNAME% - - - - %$.json_log%\")\n\n# Forward to remote syslog receiver (over TLS)\nif ( $syslogtag == 'pgaudit_log' ) then {{\n  if ( $.remote_syslog_tls == 'true' ) then {{\n    action(type=\"omfwd\" target=\"{remote_syslog_host}\" port=\"{remote_syslog_port}\" protocol=\"tcp\"\n      template=\"PgAuditLog\"\n      queue.type=\"linkedList\"\n      queue.size=\"1000\"\n      action.ResumeRetryCount=\"10\"\n      StreamDriver=\"gtls\"\n      StreamDriverMode=\"1\"\n      StreamDriverAuthMode=\"x509/name\"\n      StreamDriverPermittedPeers=\"{remote_syslog_host}\"\n      StreamDriver.CheckExtendedKeyPurpose=\"on\"\n      StreamDriver.PermitExpiredCerts=\"off\"\n    )\n    stop\n  }} else {{\n    action(type=\"omfwd\" target=\"{remote_syslog_host}\" port=\"{remote_syslog_port}\" protocol=\"tcp\"\n      template=\"PgAuditLog\"\n      queue.type=\"linkedList\"\n      queue.size=\"1000\"\n      action.ResumeRetryCount=\"10\"\n    )\n    stop\n  }}\n}}\n"
  },
  {
    "path": "compute_tools/src/config_template/compute_rsyslog_postgres_export_template.conf",
    "content": "# Program name comes from postgres' syslog_facility configuration: https://www.postgresql.org/docs/current/runtime-config-logging.html#GUC-SYSLOG-IDENT\n# Default value is 'postgres'.\nif $programname == 'postgres' then {{\n    # Forward Postgres logs to telemetry otel collector\n    action(type=\"omfwd\" target=\"{logs_export_target}\" port=\"{logs_export_port}\" protocol=\"tcp\"\n           template=\"RSYSLOG_SyslogProtocol23Format\"\n           action.resumeRetryCount=\"3\"\n           queue.type=\"linkedList\" queue.size=\"1000\")\n    stop\n}}\n"
  },
  {
    "path": "compute_tools/src/configurator.rs",
    "content": "use std::fs::File;\nuse std::thread;\nuse std::{path::Path, sync::Arc};\n\nuse anyhow::Result;\nuse compute_api::responses::{ComputeConfig, ComputeStatus};\nuse tracing::{error, info, instrument};\n\nuse crate::compute::{ComputeNode, ParsedSpec};\nuse crate::spec::get_config_from_control_plane;\n\n#[instrument(skip_all)]\nfn configurator_main_loop(compute: &Arc<ComputeNode>) {\n    info!(\"waiting for reconfiguration requests\");\n    loop {\n        let mut state = compute.state.lock().unwrap();\n        /* BEGIN_HADRON */\n        // RefreshConfiguration should only be used inside the loop\n        assert_ne!(state.status, ComputeStatus::RefreshConfiguration);\n        /* END_HADRON */\n\n        if compute.params.lakebase_mode {\n            while state.status != ComputeStatus::ConfigurationPending\n                && state.status != ComputeStatus::RefreshConfigurationPending\n                && state.status != ComputeStatus::Failed\n            {\n                info!(\"configurator: compute status: {:?}, sleeping\", state.status);\n                state = compute.state_changed.wait(state).unwrap();\n            }\n        } else {\n            // We have to re-check the status after re-acquiring the lock because it could be that\n            // the status has changed while we were waiting for the lock, and we might not need to\n            // wait on the condition variable. Otherwise, we might end up in some soft-/deadlock, i.e.\n            // we are waiting for a condition variable that will never be signaled.\n            if state.status != ComputeStatus::ConfigurationPending {\n                state = compute.state_changed.wait(state).unwrap();\n            }\n        }\n\n        // Re-check the status after waking up\n        if state.status == ComputeStatus::ConfigurationPending {\n            info!(\"got configuration request\");\n            state.set_status(ComputeStatus::Configuration, &compute.state_changed);\n            drop(state);\n\n            let mut new_status = ComputeStatus::Failed;\n            if let Err(e) = compute.reconfigure() {\n                error!(\"could not configure compute node: {}\", e);\n            } else {\n                new_status = ComputeStatus::Running;\n                info!(\"compute node configured\");\n            }\n\n            // XXX: used to test that API is blocking\n            // std::thread::sleep(std::time::Duration::from_millis(10000));\n\n            compute.set_status(new_status);\n        } else if state.status == ComputeStatus::RefreshConfigurationPending {\n            info!(\n                \"compute node suspects its configuration is out of date, now refreshing configuration\"\n            );\n            state.set_status(ComputeStatus::RefreshConfiguration, &compute.state_changed);\n            // Drop the lock guard here to avoid holding the lock while downloading config from the control plane / HCC.\n            // This is the only thread that can move compute_ctl out of the `RefreshConfiguration` state, so it\n            // is safe to drop the lock like this.\n            drop(state);\n\n            let get_config_result: anyhow::Result<ComputeConfig> =\n                if let Some(config_path) = &compute.params.config_path_test_only {\n                    // This path is only to make testing easier. In production we always get the config from the HCC.\n                    info!(\n                        \"reloading config.json from path: {}\",\n                        config_path.to_string_lossy()\n                    );\n                    let path = Path::new(config_path);\n                    if let Ok(file) = File::open(path) {\n                        match serde_json::from_reader::<File, ComputeConfig>(file) {\n                            Ok(config) => Ok(config),\n                            Err(e) => {\n                                error!(\"could not parse config file: {}\", e);\n                                Err(anyhow::anyhow!(\"could not parse config file: {}\", e))\n                            }\n                        }\n                    } else {\n                        error!(\n                            \"could not open config file at path: {:?}\",\n                            config_path.to_string_lossy()\n                        );\n                        Err(anyhow::anyhow!(\n                            \"could not open config file at path: {}\",\n                            config_path.to_string_lossy()\n                        ))\n                    }\n                } else if let Some(control_plane_uri) = &compute.params.control_plane_uri {\n                    get_config_from_control_plane(control_plane_uri, &compute.params.compute_id)\n                } else {\n                    Err(anyhow::anyhow!(\"config_path_test_only is not set\"))\n                };\n\n            // Parse any received ComputeSpec and transpose the result into a Result<Option<ParsedSpec>>.\n            let parsed_spec_result: Result<Option<ParsedSpec>> =\n                get_config_result.and_then(|config| {\n                    if let Some(spec) = config.spec {\n                        if let Ok(pspec) = ParsedSpec::try_from(spec) {\n                            Ok(Some(pspec))\n                        } else {\n                            Err(anyhow::anyhow!(\"could not parse spec\"))\n                        }\n                    } else {\n                        Ok(None)\n                    }\n                });\n\n            let new_status: ComputeStatus;\n            match parsed_spec_result {\n                // Control plane (HCM) returned a spec and we were able to parse it.\n                Ok(Some(pspec)) => {\n                    {\n                        let mut state = compute.state.lock().unwrap();\n                        // Defensive programming to make sure this thread is indeed the only one that can move the compute\n                        // node out of the `RefreshConfiguration` state. Would be nice if we can encode this invariant\n                        // into the type system.\n                        assert_eq!(state.status, ComputeStatus::RefreshConfiguration);\n\n                        if state\n                            .pspec\n                            .as_ref()\n                            .map(|ps| ps.pageserver_conninfo.clone())\n                            == Some(pspec.pageserver_conninfo.clone())\n                        {\n                            info!(\n                                \"Refresh configuration: Retrieved spec is the same as the current spec. Waiting for control plane to update the spec before attempting reconfiguration.\"\n                            );\n                            state.status = ComputeStatus::Running;\n                            compute.state_changed.notify_all();\n                            drop(state);\n                            std::thread::sleep(std::time::Duration::from_secs(5));\n                            continue;\n                        }\n                        // state.pspec is consumed by compute.reconfigure() below. Note that compute.reconfigure() will acquire\n                        // the compute.state lock again so we need to have the lock guard go out of scope here. We could add a\n                        // \"locked\" variant of compute.reconfigure() that takes the lock guard as an argument to make this cleaner,\n                        // but it's not worth forking the codebase too much for this minor point alone right now.\n                        state.pspec = Some(pspec);\n                    }\n                    match compute.reconfigure() {\n                        Ok(_) => {\n                            info!(\"Refresh configuration: compute node configured\");\n                            new_status = ComputeStatus::Running;\n                        }\n                        Err(e) => {\n                            error!(\n                                \"Refresh configuration: could not configure compute node: {}\",\n                                e\n                            );\n                            // Set the compute node back to the `RefreshConfigurationPending` state if the configuration\n                            // was not successful. It should be okay to treat this situation the same as if the loop\n                            // hasn't executed yet as long as the detection side keeps notifying.\n                            new_status = ComputeStatus::RefreshConfigurationPending;\n                        }\n                    }\n                }\n                // Control plane (HCM)'s response does not contain a spec. This is the \"Empty\" attachment case.\n                Ok(None) => {\n                    info!(\n                        \"Compute Manager signaled that this compute is no longer attached to any storage. Exiting.\"\n                    );\n                    // We just immediately terminate the whole compute_ctl in this case. It's not necessary to attempt a\n                    // clean shutdown as Postgres is probably not responding anyway (which is why we are in this refresh\n                    // configuration state).\n                    std::process::exit(1);\n                }\n                // Various error cases:\n                // - The request to the control plane (HCM) either failed or returned a malformed spec.\n                // - compute_ctl itself is configured incorrectly (e.g., compute_id is not set).\n                Err(e) => {\n                    error!(\n                        \"Refresh configuration: error getting a parsed spec: {:?}\",\n                        e\n                    );\n                    new_status = ComputeStatus::RefreshConfigurationPending;\n                    // We may be dealing with an overloaded HCM if we end up in this path. Backoff 5 seconds before\n                    // retrying to avoid hammering the HCM.\n                    std::thread::sleep(std::time::Duration::from_secs(5));\n                }\n            }\n            compute.set_status(new_status);\n        } else if state.status == ComputeStatus::Failed {\n            info!(\"compute node is now in Failed state, exiting\");\n            break;\n        } else {\n            info!(\"woken up for compute status: {:?}, sleeping\", state.status);\n        }\n    }\n}\n\npub fn launch_configurator(compute: &Arc<ComputeNode>) -> thread::JoinHandle<()> {\n    let compute = Arc::clone(compute);\n\n    let runtime = tokio::runtime::Handle::current();\n\n    thread::Builder::new()\n        .name(\"compute-configurator\".into())\n        .spawn(move || {\n            let _rt_guard = runtime.enter();\n            configurator_main_loop(&compute);\n            info!(\"configurator thread is exited\");\n        })\n        .expect(\"cannot launch configurator thread\")\n}\n"
  },
  {
    "path": "compute_tools/src/disk_quota.rs",
    "content": "use anyhow::Context;\nuse tracing::instrument;\n\npub const DISK_QUOTA_BIN: &str = \"/neonvm/bin/set-disk-quota\";\n\n/// If size_bytes is 0, it disables the quota. Otherwise, it sets filesystem quota to size_bytes.\n/// `fs_mountpoint` should point to the mountpoint of the filesystem where the quota should be set.\n#[instrument]\npub fn set_disk_quota(size_bytes: u64, fs_mountpoint: &str) -> anyhow::Result<()> {\n    let size_kb = size_bytes / 1024;\n    // run `/neonvm/bin/set-disk-quota {size_kb} {mountpoint}`\n    let child_result = std::process::Command::new(\"/usr/bin/sudo\")\n        .arg(DISK_QUOTA_BIN)\n        .arg(size_kb.to_string())\n        .arg(fs_mountpoint)\n        .spawn();\n\n    child_result\n        .context(\"spawn() failed\")\n        .and_then(|mut child| child.wait().context(\"wait() failed\"))\n        .and_then(|status| match status.success() {\n            true => Ok(()),\n            false => Err(anyhow::anyhow!(\"process exited with {status}\")),\n        })\n        // wrap any prior error with the overall context that we couldn't run the command\n        .with_context(|| format!(\"could not run `/usr/bin/sudo {DISK_QUOTA_BIN}`\"))\n}\n"
  },
  {
    "path": "compute_tools/src/extension_server.rs",
    "content": "// Download extension files from the extension store\n// and put them in the right place in the postgres directory (share / lib)\n/*\nThe layout of the S3 bucket is as follows:\n5615610098 // this is an extension build number\n├── v14\n│   ├── extensions\n│   │   ├── anon.tar.zst\n│   │   └── embedding.tar.zst\n│   └── ext_index.json\n└── v15\n    ├── extensions\n    │   ├── anon.tar.zst\n    │   └── embedding.tar.zst\n    └── ext_index.json\n5615261079\n├── v14\n│   ├── extensions\n│   │   └── anon.tar.zst\n│   └── ext_index.json\n└── v15\n    ├── extensions\n    │   └── anon.tar.zst\n    └── ext_index.json\n5623261088\n├── v14\n│   ├── extensions\n│   │   └── embedding.tar.zst\n│   └── ext_index.json\n└── v15\n    ├── extensions\n    │   └── embedding.tar.zst\n    └── ext_index.json\n\nNote that build number cannot be part of prefix because we might need extensions\nfrom other build numbers.\n\next_index.json stores the control files and location of extension archives\nIt also stores a list of public extensions and a library_index\n\nWe don't need to duplicate extension.tar.zst files.\nWe only need to upload a new one if it is updated.\n(Although currently we just upload every time anyways, hopefully will change\nthis sometime)\n\n*access* is controlled by spec\n\nMore specifically, here is an example ext_index.json\n{\n    \"public_extensions\": [\n        \"anon\",\n        \"pg_buffercache\"\n    ],\n    \"library_index\": {\n        \"anon\": \"anon\",\n        \"pg_buffercache\": \"pg_buffercache\"\n    },\n    \"extension_data\": {\n        \"pg_buffercache\": {\n            \"control_data\": {\n                \"pg_buffercache.control\": \"# pg_buffercache extension \\ncomment = 'examine the shared buffer cache' \\ndefault_version = '1.3' \\nmodule_pathname = '$libdir/pg_buffercache' \\nrelocatable = true \\ntrusted=true\"\n            },\n            \"archive_path\": \"5670669815/v14/extensions/pg_buffercache.tar.zst\"\n        },\n        \"anon\": {\n            \"control_data\": {\n                \"anon.control\": \"# PostgreSQL Anonymizer (anon) extension \\ncomment = 'Data anonymization tools' \\ndefault_version = '1.1.0' \\ndirectory='extension/anon' \\nrelocatable = false \\nrequires = 'pgcrypto' \\nsuperuser = false \\nmodule_pathname = '$libdir/anon' \\ntrusted = true \\n\"\n            },\n            \"archive_path\": \"5670669815/v14/extensions/anon.tar.zst\"\n        }\n    }\n}\n*/\nuse std::path::Path;\nuse std::str;\n\nuse crate::metrics::{REMOTE_EXT_REQUESTS_TOTAL, UNKNOWN_HTTP_STATUS};\nuse anyhow::{Context, Result, bail};\nuse bytes::Bytes;\nuse compute_api::spec::RemoteExtSpec;\nuse postgres_versioninfo::PgMajorVersion;\nuse regex::Regex;\nuse remote_storage::*;\nuse reqwest::StatusCode;\nuse tar::Archive;\nuse tracing::info;\nuse tracing::log::warn;\nuse url::Url;\nuse zstd::stream::read::Decoder;\n\nfn get_pg_config(argument: &str, pgbin: &str) -> String {\n    // gives the result of `pg_config [argument]`\n    // where argument is a flag like `--version` or `--sharedir`\n    let pgconfig = pgbin\n        .strip_suffix(\"postgres\")\n        .expect(\"bad pgbin\")\n        .to_owned()\n        + \"/pg_config\";\n    let config_output = std::process::Command::new(pgconfig)\n        .arg(argument)\n        .output()\n        .expect(\"pg_config error\");\n    std::str::from_utf8(&config_output.stdout)\n        .expect(\"pg_config error\")\n        .trim()\n        .to_string()\n}\n\npub fn get_pg_version(pgbin: &str) -> PgMajorVersion {\n    // pg_config --version returns a (platform specific) human readable string\n    // such as \"PostgreSQL 15.4\". We parse this to v14/v15/v16 etc.\n    let human_version = get_pg_config(\"--version\", pgbin);\n    parse_pg_version(&human_version)\n}\n\npub fn get_pg_version_string(pgbin: &str) -> String {\n    get_pg_version(pgbin).v_str()\n}\n\nfn parse_pg_version(human_version: &str) -> PgMajorVersion {\n    use PgMajorVersion::*;\n    // Normal releases have version strings like \"PostgreSQL 15.4\". But there\n    // are also pre-release versions like \"PostgreSQL 17devel\" or \"PostgreSQL\n    // 16beta2\" or \"PostgreSQL 17rc1\". And with the --with-extra-version\n    // configure option, you can tack any string to the version number,\n    // e.g. \"PostgreSQL 15.4foobar\".\n    match Regex::new(r\"^PostgreSQL (?<major>\\d+).+\")\n        .unwrap()\n        .captures(human_version)\n    {\n        Some(captures) if captures.len() == 2 => match &captures[\"major\"] {\n            \"14\" => return PG14,\n            \"15\" => return PG15,\n            \"16\" => return PG16,\n            \"17\" => return PG17,\n            _ => {}\n        },\n        _ => {}\n    }\n    panic!(\"Unsuported postgres version {human_version}\");\n}\n\n// download the archive for a given extension,\n// unzip it, and place files in the appropriate locations (share/lib)\npub async fn download_extension(\n    ext_name: &str,\n    ext_path: &RemotePath,\n    remote_ext_base_url: &Url,\n    pgbin: &str,\n) -> Result<u64> {\n    info!(\"Download extension {:?} from {:?}\", ext_name, ext_path);\n\n    // TODO add retry logic\n    let download_buffer =\n        match download_extension_tar(remote_ext_base_url, &ext_path.to_string()).await {\n            Ok(buffer) => buffer,\n            Err(error_message) => {\n                return Err(anyhow::anyhow!(\n                    \"error downloading extension {:?}: {:?}\",\n                    ext_name,\n                    error_message\n                ));\n            }\n        };\n\n    let download_size = download_buffer.len() as u64;\n    info!(\"Download size {:?}\", download_size);\n    // it's unclear whether it is more performant to decompress into memory or not\n    // TODO: decompressing into memory can be avoided\n    let decoder = Decoder::new(download_buffer.as_ref())?;\n    let mut archive = Archive::new(decoder);\n\n    let unzip_dest = pgbin\n        .strip_suffix(\"/bin/postgres\")\n        .expect(\"bad pgbin\")\n        .to_string()\n        + \"/download_extensions\";\n    archive.unpack(&unzip_dest)?;\n    info!(\"Download + unzip {:?} completed successfully\", &ext_path);\n\n    let sharedir_paths = (\n        unzip_dest.to_string() + \"/share/extension\",\n        Path::new(&get_pg_config(\"--sharedir\", pgbin)).join(\"extension\"),\n    );\n    let libdir_paths = (\n        unzip_dest.to_string() + \"/lib\",\n        Path::new(&get_pg_config(\"--pkglibdir\", pgbin)).to_path_buf(),\n    );\n    // move contents of the libdir / sharedir in unzipped archive to the correct local paths\n    for paths in [sharedir_paths, libdir_paths] {\n        let (zip_dir, real_dir) = paths;\n\n        let dir = match std::fs::read_dir(&zip_dir) {\n            Ok(dir) => dir,\n            Err(e) => match e.kind() {\n                // In the event of a SQL-only extension, there would be nothing\n                // to move from the lib/ directory, so note that in the log and\n                // move on.\n                std::io::ErrorKind::NotFound => {\n                    info!(\"nothing to move from {}\", zip_dir);\n                    continue;\n                }\n                _ => return Err(anyhow::anyhow!(e)),\n            },\n        };\n\n        info!(\"mv {zip_dir:?}/*  {real_dir:?}\");\n\n        for file in dir {\n            let old_file = file?.path();\n            let new_file =\n                Path::new(&real_dir).join(old_file.file_name().context(\"error parsing file\")?);\n            info!(\"moving {old_file:?} to {new_file:?}\");\n\n            // extension download failed: Directory not empty (os error 39)\n            match std::fs::rename(old_file, new_file) {\n                Ok(()) => info!(\"move succeeded\"),\n                Err(e) => {\n                    warn!(\"move failed, probably because the extension already exists: {e}\")\n                }\n            }\n        }\n    }\n    info!(\"done moving extension {ext_name}\");\n    Ok(download_size)\n}\n\n// Create extension control files from spec\npub fn create_control_files(remote_extensions: &RemoteExtSpec, pgbin: &str) {\n    let local_sharedir = Path::new(&get_pg_config(\"--sharedir\", pgbin)).join(\"extension\");\n    for (ext_name, ext_data) in remote_extensions.extension_data.iter() {\n        // Check if extension is present in public or custom.\n        // If not, then it is not allowed to be used by this compute.\n        if let Some(public_extensions) = &remote_extensions.public_extensions {\n            if !public_extensions.contains(ext_name) {\n                if let Some(custom_extensions) = &remote_extensions.custom_extensions {\n                    if !custom_extensions.contains(ext_name) {\n                        continue; // skip this extension, it is not allowed\n                    }\n                }\n            }\n        }\n\n        for (control_name, control_content) in &ext_data.control_data {\n            let control_path = local_sharedir.join(control_name);\n            if !control_path.exists() {\n                info!(\"writing file {:?}{:?}\", control_path, control_content);\n                std::fs::write(control_path, control_content).unwrap();\n            } else {\n                warn!(\n                    \"control file {:?} exists both locally and remotely. ignoring the remote version.\",\n                    control_path\n                );\n            }\n        }\n    }\n}\n\n// Do request to extension storage proxy, e.g.,\n// curl http://pg-ext-s3-gateway.pg-ext-s3-gateway.svc.cluster.local/latest/v15/extensions/anon.tar.zst\n// using HTTP GET and return the response body as bytes.\nasync fn download_extension_tar(remote_ext_base_url: &Url, ext_path: &str) -> Result<Bytes> {\n    let uri = remote_ext_base_url.join(ext_path).with_context(|| {\n        format!(\n            \"failed to create the remote extension URI for {ext_path} using {remote_ext_base_url}\"\n        )\n    })?;\n    let filename = Path::new(ext_path)\n        .file_name()\n        .unwrap_or_else(|| std::ffi::OsStr::new(\"unknown\"))\n        .to_str()\n        .unwrap_or(\"unknown\")\n        .to_string();\n\n    info!(\"Downloading extension file '{}' from uri {}\", filename, uri);\n\n    match do_extension_server_request(uri).await {\n        Ok(resp) => {\n            info!(\"Successfully downloaded remote extension data {}\", ext_path);\n            REMOTE_EXT_REQUESTS_TOTAL\n                .with_label_values(&[&StatusCode::OK.to_string(), &filename])\n                .inc();\n            Ok(resp)\n        }\n        Err((msg, status)) => {\n            REMOTE_EXT_REQUESTS_TOTAL\n                .with_label_values(&[&status, &filename])\n                .inc();\n            bail!(msg);\n        }\n    }\n}\n\n// Do a single remote extensions server request.\n// Return result or (error message + stringified status code) in case of any failures.\nasync fn do_extension_server_request(uri: Url) -> Result<Bytes, (String, String)> {\n    let resp = reqwest::get(uri).await.map_err(|e| {\n        (\n            format!(\"could not perform remote extensions server request: {e:?}\"),\n            UNKNOWN_HTTP_STATUS.to_string(),\n        )\n    })?;\n    let status = resp.status();\n\n    match status {\n        StatusCode::OK => match resp.bytes().await {\n            Ok(resp) => Ok(resp),\n            Err(e) => Err((\n                format!(\"could not read remote extensions server response: {e:?}\"),\n                // It's fine to return and report error with status as 200 OK,\n                // because we still failed to read the response.\n                status.to_string(),\n            )),\n        },\n        StatusCode::SERVICE_UNAVAILABLE => Err((\n            \"remote extensions server is temporarily unavailable\".to_string(),\n            status.to_string(),\n        )),\n        _ => Err((\n            format!(\"unexpected remote extensions server response status code: {status}\"),\n            status.to_string(),\n        )),\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use super::parse_pg_version;\n\n    #[test]\n    fn test_parse_pg_version() {\n        use postgres_versioninfo::PgMajorVersion::*;\n        assert_eq!(parse_pg_version(\"PostgreSQL 15.4\"), PG15);\n        assert_eq!(parse_pg_version(\"PostgreSQL 15.14\"), PG15);\n        assert_eq!(\n            parse_pg_version(\"PostgreSQL 15.4 (Ubuntu 15.4-0ubuntu0.23.04.1)\"),\n            PG15\n        );\n\n        assert_eq!(parse_pg_version(\"PostgreSQL 14.15\"), PG14);\n        assert_eq!(parse_pg_version(\"PostgreSQL 14.0\"), PG14);\n        assert_eq!(\n            parse_pg_version(\"PostgreSQL 14.9 (Debian 14.9-1.pgdg120+1\"),\n            PG14\n        );\n\n        assert_eq!(parse_pg_version(\"PostgreSQL 16devel\"), PG16);\n        assert_eq!(parse_pg_version(\"PostgreSQL 16beta1\"), PG16);\n        assert_eq!(parse_pg_version(\"PostgreSQL 16rc2\"), PG16);\n        assert_eq!(parse_pg_version(\"PostgreSQL 16extra\"), PG16);\n    }\n\n    #[test]\n    #[should_panic]\n    fn test_parse_pg_unsupported_version() {\n        parse_pg_version(\"PostgreSQL 13.14\");\n    }\n\n    #[test]\n    #[should_panic]\n    fn test_parse_pg_incorrect_version_format() {\n        parse_pg_version(\"PostgreSQL 14\");\n    }\n}\n"
  },
  {
    "path": "compute_tools/src/hadron_metrics.rs",
    "content": "use metrics::{\n    IntCounter, IntGaugeVec, core::Collector, proto::MetricFamily, register_int_counter,\n    register_int_gauge_vec,\n};\nuse once_cell::sync::Lazy;\n\n// Counter keeping track of the number of PageStream request errors reported by Postgres.\n// An error is registered every time Postgres calls compute_ctl's /refresh_configuration API.\n// Postgres will invoke this API if it detected trouble with PageStream requests (get_page@lsn,\n// get_base_backup, etc.) it sends to any pageserver. An increase in this counter value typically\n// indicates Postgres downtime, as PageStream requests are critical for Postgres to function.\npub static POSTGRES_PAGESTREAM_REQUEST_ERRORS: Lazy<IntCounter> = Lazy::new(|| {\n    register_int_counter!(\n        \"pg_cctl_pagestream_request_errors_total\",\n        \"Number of PageStream request errors reported by the postgres process\"\n    )\n    .expect(\"failed to define a metric\")\n});\n\n// Counter keeping track of the number of compute configuration errors due to Postgres statement\n// timeouts. An error is registered every time `ComputeNode::reconfigure()` fails due to Postgres\n// error code 57014 (query cancelled). This statement timeout typically occurs when postgres is\n// stuck in a problematic retry loop when the PS is reject its connection requests (usually due\n// to PG pointing at the wrong PS). We should investigate the root cause when this counter value\n// increases by checking PG and PS logs.\npub static COMPUTE_CONFIGURE_STATEMENT_TIMEOUT_ERRORS: Lazy<IntCounter> = Lazy::new(|| {\n    register_int_counter!(\n        \"pg_cctl_configure_statement_timeout_errors_total\",\n        \"Number of compute configuration errors due to Postgres statement timeouts.\"\n    )\n    .expect(\"failed to define a metric\")\n});\n\npub static COMPUTE_ATTACHED: Lazy<IntGaugeVec> = Lazy::new(|| {\n    register_int_gauge_vec!(\n        \"pg_cctl_attached\",\n        \"Compute node attached status (1 if attached)\",\n        &[\n            \"pg_compute_id\",\n            \"pg_instance_id\",\n            \"tenant_id\",\n            \"timeline_id\"\n        ]\n    )\n    .expect(\"failed to define a metric\")\n});\n\npub fn collect() -> Vec<MetricFamily> {\n    let mut metrics = Vec::new();\n    metrics.extend(POSTGRES_PAGESTREAM_REQUEST_ERRORS.collect());\n    metrics.extend(COMPUTE_CONFIGURE_STATEMENT_TIMEOUT_ERRORS.collect());\n    metrics.extend(COMPUTE_ATTACHED.collect());\n    metrics\n}\n\npub fn initialize_metrics() {\n    Lazy::force(&POSTGRES_PAGESTREAM_REQUEST_ERRORS);\n    Lazy::force(&COMPUTE_CONFIGURE_STATEMENT_TIMEOUT_ERRORS);\n    Lazy::force(&COMPUTE_ATTACHED);\n}\n"
  },
  {
    "path": "compute_tools/src/http/extract/json.rs",
    "content": "use std::ops::{Deref, DerefMut};\n\nuse axum::extract::rejection::JsonRejection;\nuse axum::extract::{FromRequest, Request};\nuse compute_api::responses::GenericAPIError;\nuse http::StatusCode;\n\n/// Custom `Json` extractor, so that we can format errors into\n/// `JsonResponse<GenericAPIError>`.\n#[derive(Debug, Clone, Copy, Default)]\npub(crate) struct Json<T>(pub T);\n\nimpl<S, T> FromRequest<S> for Json<T>\nwhere\n    axum::Json<T>: FromRequest<S, Rejection = JsonRejection>,\n    S: Send + Sync,\n{\n    type Rejection = (StatusCode, axum::Json<GenericAPIError>);\n\n    async fn from_request(req: Request, state: &S) -> Result<Self, Self::Rejection> {\n        match axum::Json::<T>::from_request(req, state).await {\n            Ok(value) => Ok(Self(value.0)),\n            Err(rejection) => Err((\n                rejection.status(),\n                axum::Json(GenericAPIError {\n                    error: rejection.body_text().to_lowercase(),\n                }),\n            )),\n        }\n    }\n}\n\nimpl<T> Deref for Json<T> {\n    type Target = T;\n\n    fn deref(&self) -> &Self::Target {\n        &self.0\n    }\n}\n\nimpl<T> DerefMut for Json<T> {\n    fn deref_mut(&mut self) -> &mut Self::Target {\n        &mut self.0\n    }\n}\n"
  },
  {
    "path": "compute_tools/src/http/extract/mod.rs",
    "content": "pub(crate) mod json;\npub(crate) mod path;\npub(crate) mod query;\npub(crate) mod request_id;\n\npub(crate) use json::Json;\npub(crate) use path::Path;\npub(crate) use query::Query;\n#[allow(unused)]\npub(crate) use request_id::RequestId;\n"
  },
  {
    "path": "compute_tools/src/http/extract/path.rs",
    "content": "use std::ops::{Deref, DerefMut};\n\nuse axum::extract::FromRequestParts;\nuse axum::extract::rejection::PathRejection;\nuse compute_api::responses::GenericAPIError;\nuse http::StatusCode;\nuse http::request::Parts;\n\n/// Custom `Path` extractor, so that we can format errors into\n/// `JsonResponse<GenericAPIError>`.\n#[derive(Debug, Clone, Copy, Default)]\npub(crate) struct Path<T>(pub T);\n\nimpl<S, T> FromRequestParts<S> for Path<T>\nwhere\n    axum::extract::Path<T>: FromRequestParts<S, Rejection = PathRejection>,\n    S: Send + Sync,\n{\n    type Rejection = (StatusCode, axum::Json<GenericAPIError>);\n\n    async fn from_request_parts(parts: &mut Parts, state: &S) -> Result<Self, Self::Rejection> {\n        match axum::extract::Path::<T>::from_request_parts(parts, state).await {\n            Ok(value) => Ok(Self(value.0)),\n            Err(rejection) => Err((\n                rejection.status(),\n                axum::Json(GenericAPIError {\n                    error: rejection.body_text().to_ascii_lowercase(),\n                }),\n            )),\n        }\n    }\n}\n\nimpl<T> Deref for Path<T> {\n    type Target = T;\n\n    fn deref(&self) -> &Self::Target {\n        &self.0\n    }\n}\n\nimpl<T> DerefMut for Path<T> {\n    fn deref_mut(&mut self) -> &mut Self::Target {\n        &mut self.0\n    }\n}\n"
  },
  {
    "path": "compute_tools/src/http/extract/query.rs",
    "content": "use std::ops::{Deref, DerefMut};\n\nuse axum::extract::FromRequestParts;\nuse axum::extract::rejection::QueryRejection;\nuse compute_api::responses::GenericAPIError;\nuse http::StatusCode;\nuse http::request::Parts;\n\n/// Custom `Query` extractor, so that we can format errors into\n/// `JsonResponse<GenericAPIError>`.\n#[derive(Debug, Clone, Copy, Default)]\npub(crate) struct Query<T>(pub T);\n\nimpl<S, T> FromRequestParts<S> for Query<T>\nwhere\n    axum::extract::Query<T>: FromRequestParts<S, Rejection = QueryRejection>,\n    S: Send + Sync,\n{\n    type Rejection = (StatusCode, axum::Json<GenericAPIError>);\n\n    async fn from_request_parts(parts: &mut Parts, state: &S) -> Result<Self, Self::Rejection> {\n        match axum::extract::Query::<T>::from_request_parts(parts, state).await {\n            Ok(value) => Ok(Self(value.0)),\n            Err(rejection) => Err((\n                rejection.status(),\n                axum::Json(GenericAPIError {\n                    error: rejection.body_text().to_ascii_lowercase(),\n                }),\n            )),\n        }\n    }\n}\n\nimpl<T> Deref for Query<T> {\n    type Target = T;\n\n    fn deref(&self) -> &Self::Target {\n        &self.0\n    }\n}\n\nimpl<T> DerefMut for Query<T> {\n    fn deref_mut(&mut self) -> &mut Self::Target {\n        &mut self.0\n    }\n}\n"
  },
  {
    "path": "compute_tools/src/http/extract/request_id.rs",
    "content": "use std::{\n    fmt::Display,\n    ops::{Deref, DerefMut},\n};\n\nuse axum::{extract::FromRequestParts, response::IntoResponse};\nuse http::{StatusCode, request::Parts};\n\nuse crate::http::{JsonResponse, headers::X_REQUEST_ID};\n\n/// Extract the request ID from the `X-Request-Id` header.\n#[derive(Debug, Clone, Default)]\npub(crate) struct RequestId(pub String);\n\n#[derive(Debug)]\n/// Rejection used for [`RequestId`].\n///\n/// Contains one variant for each way the [`RequestId`] extractor can\n/// fail.\npub(crate) enum RequestIdRejection {\n    /// The request is missing the header.\n    MissingRequestId,\n\n    /// The value of the header is invalid UTF-8.\n    InvalidUtf8,\n}\n\nimpl RequestIdRejection {\n    pub fn status(&self) -> StatusCode {\n        match self {\n            RequestIdRejection::MissingRequestId => StatusCode::INTERNAL_SERVER_ERROR,\n            RequestIdRejection::InvalidUtf8 => StatusCode::BAD_REQUEST,\n        }\n    }\n\n    pub fn message(&self) -> String {\n        match self {\n            RequestIdRejection::MissingRequestId => \"request ID is missing\",\n            RequestIdRejection::InvalidUtf8 => \"request ID is invalid UTF-8\",\n        }\n        .to_string()\n    }\n}\n\nimpl IntoResponse for RequestIdRejection {\n    fn into_response(self) -> axum::response::Response {\n        JsonResponse::error(self.status(), self.message())\n    }\n}\n\nimpl<S> FromRequestParts<S> for RequestId\nwhere\n    S: Send + Sync,\n{\n    type Rejection = RequestIdRejection;\n\n    async fn from_request_parts(parts: &mut Parts, _state: &S) -> Result<Self, Self::Rejection> {\n        match parts.headers.get(X_REQUEST_ID) {\n            Some(value) => match value.to_str() {\n                Ok(request_id) => Ok(Self(request_id.to_string())),\n                Err(_) => Err(RequestIdRejection::InvalidUtf8),\n            },\n            None => Err(RequestIdRejection::MissingRequestId),\n        }\n    }\n}\n\nimpl Deref for RequestId {\n    type Target = String;\n\n    fn deref(&self) -> &Self::Target {\n        &self.0\n    }\n}\n\nimpl DerefMut for RequestId {\n    fn deref_mut(&mut self) -> &mut Self::Target {\n        &mut self.0\n    }\n}\n\nimpl Display for RequestId {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        f.write_str(&self.0)\n    }\n}\n"
  },
  {
    "path": "compute_tools/src/http/headers.rs",
    "content": "/// Constant for `X-Request-Id` header.\npub const X_REQUEST_ID: &str = \"x-request-id\";\n"
  },
  {
    "path": "compute_tools/src/http/middleware/authorize.rs",
    "content": "use anyhow::{Result, anyhow};\nuse axum::{RequestExt, body::Body};\nuse axum_extra::{\n    TypedHeader,\n    headers::{Authorization, authorization::Bearer},\n};\nuse compute_api::requests::{COMPUTE_AUDIENCE, ComputeClaims, ComputeClaimsScope};\nuse futures::future::BoxFuture;\nuse http::{Request, Response, StatusCode};\nuse jsonwebtoken::{Algorithm, DecodingKey, TokenData, Validation, jwk::JwkSet};\nuse tower_http::auth::AsyncAuthorizeRequest;\nuse tracing::{debug, warn};\n\nuse crate::http::JsonResponse;\n\n#[derive(Clone, Debug)]\npub(in crate::http) struct Authorize {\n    compute_id: String,\n    // BEGIN HADRON\n    // Hadron instance ID. Only set if it's a Lakebase V1 a.k.a. Hadron instance.\n    instance_id: Option<String>,\n    // END HADRON\n    jwks: JwkSet,\n    validation: Validation,\n}\n\nimpl Authorize {\n    pub fn new(compute_id: String, instance_id: Option<String>, jwks: JwkSet) -> Self {\n        let mut validation = Validation::new(Algorithm::EdDSA);\n\n        // BEGIN HADRON\n        let use_rsa = jwks.keys.iter().any(|jwk| {\n            jwk.common\n                .key_algorithm\n                .is_some_and(|alg| alg == jsonwebtoken::jwk::KeyAlgorithm::RS256)\n        });\n        if use_rsa {\n            validation = Validation::new(Algorithm::RS256);\n        }\n        // END HADRON\n\n        validation.validate_exp = true;\n        // Unused by the control plane\n        validation.validate_nbf = false;\n        // Unused by the control plane\n        validation.validate_aud = false;\n        validation.set_audience(&[COMPUTE_AUDIENCE]);\n        // Nothing is currently required\n        validation.set_required_spec_claims(&[] as &[&str; 0]);\n\n        Self {\n            compute_id,\n            instance_id,\n            jwks,\n            validation,\n        }\n    }\n}\n\nimpl AsyncAuthorizeRequest<Body> for Authorize {\n    type RequestBody = Body;\n    type ResponseBody = Body;\n    type Future = BoxFuture<'static, Result<Request<Body>, Response<Self::ResponseBody>>>;\n\n    fn authorize(&mut self, mut request: Request<Body>) -> Self::Future {\n        let compute_id = self.compute_id.clone();\n        let is_hadron_instance = self.instance_id.is_some();\n        let jwks = self.jwks.clone();\n        let validation = self.validation.clone();\n\n        Box::pin(async move {\n            // BEGIN HADRON\n            // In Hadron deployments the \"external\" HTTP endpoint on compute_ctl can only be\n            // accessed by trusted components (enforced by dblet network policy), so we can bypass\n            // all auth here.\n            if is_hadron_instance {\n                return Ok(request);\n            }\n            // END HADRON\n\n            let TypedHeader(Authorization(bearer)) = request\n                .extract_parts::<TypedHeader<Authorization<Bearer>>>()\n                .await\n                .map_err(|_| {\n                    JsonResponse::error(StatusCode::BAD_REQUEST, \"invalid authorization token\")\n                })?;\n\n            let data = match Self::verify(&jwks, bearer.token(), &validation) {\n                Ok(claims) => claims,\n                Err(e) => return Err(JsonResponse::error(StatusCode::UNAUTHORIZED, e)),\n            };\n\n            match data.claims.scope {\n                // TODO: We should validate audience for every token, but\n                // instead of this ad-hoc validation, we should turn\n                // [`Validation::validate_aud`] on. This is merely a stopgap\n                // while we roll out `aud` deployment. We return a 401\n                // Unauthorized because when we eventually do use\n                // [`Validation`], we will hit the above `Err` match arm which\n                // returns 401 Unauthorized.\n                Some(ComputeClaimsScope::Admin) => {\n                    let Some(ref audience) = data.claims.audience else {\n                        return Err(JsonResponse::error(\n                            StatusCode::UNAUTHORIZED,\n                            \"missing audience in authorization token claims\",\n                        ));\n                    };\n\n                    if !audience.iter().any(|a| a == COMPUTE_AUDIENCE) {\n                        return Err(JsonResponse::error(\n                            StatusCode::UNAUTHORIZED,\n                            \"invalid audience in authorization token claims\",\n                        ));\n                    }\n                }\n\n                // If the scope is not [`ComputeClaimsScope::Admin`], then we\n                // must validate the compute_id\n                _ => {\n                    let Some(ref claimed_compute_id) = data.claims.compute_id else {\n                        return Err(JsonResponse::error(\n                            StatusCode::FORBIDDEN,\n                            \"missing compute_id in authorization token claims\",\n                        ));\n                    };\n\n                    if *claimed_compute_id != compute_id {\n                        return Err(JsonResponse::error(\n                            StatusCode::FORBIDDEN,\n                            \"invalid compute ID in authorization token claims\",\n                        ));\n                    }\n                }\n            }\n\n            // Make claims available to any subsequent middleware or request\n            // handlers\n            request.extensions_mut().insert(data.claims);\n\n            Ok(request)\n        })\n    }\n}\n\nimpl Authorize {\n    /// Verify the token using the JSON Web Key set and return the token data.\n    fn verify(\n        jwks: &JwkSet,\n        token: &str,\n        validation: &Validation,\n    ) -> Result<TokenData<ComputeClaims>> {\n        debug_assert!(!jwks.keys.is_empty());\n\n        debug!(\"verifying token {}\", token);\n\n        for jwk in jwks.keys.iter() {\n            let decoding_key = match DecodingKey::from_jwk(jwk) {\n                Ok(key) => key,\n                Err(e) => {\n                    warn!(\n                        \"failed to construct decoding key from {}: {}\",\n                        jwk.common.key_id.as_ref().unwrap(),\n                        e\n                    );\n\n                    continue;\n                }\n            };\n\n            match jsonwebtoken::decode::<ComputeClaims>(token, &decoding_key, validation) {\n                Ok(data) => return Ok(data),\n                Err(e) => {\n                    warn!(\n                        \"failed to decode authorization token using {}: {}\",\n                        jwk.common.key_id.as_ref().unwrap(),\n                        e\n                    );\n\n                    continue;\n                }\n            }\n        }\n\n        Err(anyhow!(\"failed to verify authorization token\"))\n    }\n}\n"
  },
  {
    "path": "compute_tools/src/http/middleware/mod.rs",
    "content": "pub(in crate::http) mod authorize;\npub(in crate::http) mod request_id;\n"
  },
  {
    "path": "compute_tools/src/http/middleware/request_id.rs",
    "content": "use axum::{extract::Request, middleware::Next, response::Response};\nuse uuid::Uuid;\n\nuse crate::http::headers::X_REQUEST_ID;\n\n/// This middleware function allows compute_ctl to generate its own request ID\n/// if one isn't supplied. The control plane will always send one as a UUID. The\n/// neon Postgres extension on the other hand does not send one.\npub async fn maybe_add_request_id_header(mut request: Request, next: Next) -> Response {\n    let headers = request.headers_mut();\n    if !headers.contains_key(X_REQUEST_ID) {\n        headers.append(X_REQUEST_ID, Uuid::new_v4().to_string().parse().unwrap());\n    }\n\n    next.run(request).await\n}\n"
  },
  {
    "path": "compute_tools/src/http/mod.rs",
    "content": "use axum::body::Body;\nuse axum::response::Response;\nuse compute_api::responses::{ComputeStatus, GenericAPIError};\nuse http::StatusCode;\nuse http::header::CONTENT_TYPE;\nuse serde::Serialize;\nuse tracing::error;\n\nmod extract;\nmod headers;\nmod middleware;\nmod routes;\npub mod server;\n\n/// Convenience response builder for JSON responses\nstruct JsonResponse;\n\nimpl JsonResponse {\n    /// Helper for actually creating a response\n    fn create_response(code: StatusCode, body: impl Serialize) -> Response {\n        Response::builder()\n            .status(code)\n            .header(CONTENT_TYPE.as_str(), \"application/json\")\n            .body(Body::from(serde_json::to_string(&body).unwrap()))\n            .unwrap()\n    }\n\n    /// Create a successful error response\n    pub(self) fn success(code: StatusCode, body: impl Serialize) -> Response {\n        assert!({\n            let code = code.as_u16();\n\n            (200..300).contains(&code)\n        });\n\n        Self::create_response(code, body)\n    }\n\n    /// Create an error response\n    pub(self) fn error(code: StatusCode, error: impl ToString) -> Response {\n        assert!(code.as_u16() >= 400);\n\n        let message = error.to_string();\n        error!(message);\n\n        Self::create_response(code, &GenericAPIError { error: message })\n    }\n\n    /// Create an error response related to the compute being in an invalid state\n    pub(self) fn invalid_status(status: ComputeStatus) -> Response {\n        Self::error(\n            StatusCode::PRECONDITION_FAILED,\n            format!(\"invalid compute status: {status}\"),\n        )\n    }\n}\n"
  },
  {
    "path": "compute_tools/src/http/openapi_spec.yaml",
    "content": "openapi: \"3.0.2\"\ninfo:\n  title: Compute node control API\n  version: \"1.0\"\n\nservers:\n  - url: \"http://localhost:3080\"\n\npaths:\n  /status:\n    get:\n      tags:\n      - Info\n      summary: Get compute node internal status.\n      description: \"\"\n      operationId: getComputeStatus\n      responses:\n        200:\n          description: ComputeState\n          content:\n            application/json:\n              schema:\n                $ref: \"#/components/schemas/ComputeState\"\n\n  /metrics.json:\n    get:\n      tags:\n      - Info\n      summary: Get compute node startup metrics in JSON format.\n      description: \"\"\n      operationId: getComputeMetricsJSON\n      responses:\n        200:\n          description: ComputeMetrics\n          content:\n            application/json:\n              schema:\n                $ref: \"#/components/schemas/ComputeMetrics\"\n\n  /metrics:\n    get:\n      tags:\n      - Info\n      summary: Get compute node metrics in text format.\n      description: \"\"\n      operationId: getComputeMetrics\n      responses:\n        200:\n          description: ComputeMetrics\n          content:\n            text/plain:\n              schema:\n                type: string\n                description: Metrics in text format.\n  /insights:\n    get:\n      tags:\n      - Info\n      summary: Get current compute insights in JSON format.\n      description: |\n        Note, that this doesn't include any historical data.\n      operationId: getComputeInsights\n      responses:\n        200:\n          description: Compute insights\n          content:\n            application/json:\n              schema:\n                $ref: \"#/components/schemas/ComputeInsights\"\n\n  /dbs_and_roles:\n    get:\n      tags:\n        - Info\n      summary: Get databases and roles in the catalog.\n      description: \"\"\n      operationId: getDbsAndRoles\n      responses:\n        200:\n          description: Compute schema objects\n          content:\n            application/json:\n              schema:\n                $ref: \"#/components/schemas/DbsAndRoles\"\n\n  /promote:\n    post:\n      tags:\n        - Promotion\n      summary: Promote secondary replica to primary\n      description: \"\"\n      operationId: promoteReplica\n      requestBody:\n        description: Promote requests data\n        required: true\n        content:\n          application/json:\n            schema:\n              $ref: \"#/components/schemas/ComputeSchemaWithLsn\"\n      responses:\n        200:\n          description: Promote succeeded or wasn't started\n          content:\n            application/json:\n              schema:\n                $ref: \"#/components/schemas/PromoteState\"\n        500:\n          description: Promote failed\n          content:\n            application/json:\n              schema:\n                $ref: \"#/components/schemas/PromoteState\"\n\n  /lfc/prewarm:\n    post:\n      summary: Request LFC Prewarm\n      parameters:\n        - name: from_endpoint\n          in: query\n          schema:\n            type: string\n      description: \"\"\n      operationId: lfcPrewarm\n      responses:\n        202:\n          description: LFC prewarm started\n        429:\n          description: LFC prewarm ongoing\n    get:\n      tags:\n        - Prewarm\n      summary: Get LFC prewarm state\n      description: \"\"\n      operationId: getLfcPrewarmState\n      responses:\n        200:\n          description: Prewarm state\n          content:\n            application/json:\n              schema:\n                $ref: \"#/components/schemas/LfcPrewarmState\"\n    delete:\n      tags:\n        - Prewarm\n      summary: Cancel ongoing LFC prewarm\n      description: \"\"\n      operationId: cancelLfcPrewarm\n      responses:\n        202:\n          description: Prewarm cancelled\n\n  /lfc/offload:\n    post:\n      summary: Request LFC offload\n      description: \"\"\n      operationId: lfcOffload\n      responses:\n        202:\n          description: LFC offload started\n        429:\n          description: LFC offload ongoing\n    get:\n      tags:\n        - Prewarm\n      summary: Get LFC offloading state\n      description: \"\"\n      operationId: getLfcOffloadState\n      responses:\n        200:\n          description: Offload state\n          content:\n            application/json:\n              schema:\n                $ref: \"#/components/schemas/LfcOffloadState\"\n\n  /database_schema:\n    get:\n      tags:\n        - Info\n      summary: Get schema dump\n      parameters:\n        - name: database\n          in: query\n          description: Database name to dump.\n          required: true\n          schema:\n            type: string\n          example: \"postgres\"\n      description: Get schema dump in SQL format.\n      operationId: getDatabaseSchema\n      responses:\n        200:\n          description: Schema dump\n          content:\n            text/plain:\n              schema:\n                type: string\n                description: Schema dump in SQL format.\n        404:\n          description: Non existing database.\n          content:\n            application/json:\n              schema:\n                $ref: \"#/components/schemas/GenericError\"\n\n  /grants:\n    post:\n      tags:\n        - Grants\n      summary: Apply grants to the database.\n      description: \"\"\n      operationId: setRoleGrants\n      requestBody:\n        description: Grants request.\n        required: true\n        content:\n          application/json:\n            schema:\n                $ref: \"#/components/schemas/SetRoleGrantsRequest\"\n      responses:\n        200:\n          description: Grants applied.\n          content:\n            application/json:\n              schema:\n                $ref: \"#/components/schemas/SetRoleGrantsResponse\"\n        412:\n          description: |\n            Compute is not in the right state for processing the request.\n          content:\n            application/json:\n              schema:\n                $ref: \"#/components/schemas/GenericError\"\n        500:\n          description: Error occurred during grants application.\n          content:\n            application/json:\n              schema:\n                $ref: \"#/components/schemas/GenericError\"\n\n  /check_writability:\n    post:\n      tags:\n      - Check\n      summary: Check that we can write new data on this compute.\n      description: \"\"\n      operationId: checkComputeWritability\n      responses:\n        200:\n          description: Check result\n          content:\n            text/plain:\n              schema:\n                type: string\n                description: Error text or 'true' if check passed.\n                example: \"true\"\n\n  /extensions:\n    post:\n      tags:\n        - Extensions\n      summary: Install extension if possible.\n      description: \"\"\n      operationId: installExtension\n      requestBody:\n        description: Extension name and database to install it to.\n        required: true\n        content:\n          application/json:\n            schema:\n              $ref: \"#/components/schemas/ExtensionInstallRequest\"\n      responses:\n        200:\n          description: Result from extension installation\n          content:\n            application/json:\n              schema:\n                $ref: \"#/components/schemas/ExtensionInstallResult\"\n        412:\n          description: |\n            Compute is in the wrong state for processing the request.\n          content:\n            application/json:\n              schema:\n                $ref: \"#/components/schemas/GenericError\"\n        500:\n          description: Error during extension installation.\n          content:\n            application/json:\n              schema:\n                $ref: \"#/components/schemas/GenericError\"\n\n  /configure:\n    post:\n      tags:\n      - Configure\n      summary: Perform compute node configuration.\n      description: |\n        This is a blocking API endpoint, i.e. it blocks waiting until\n        compute is finished configuration and is in `Running` state.\n        Optional non-blocking mode could be added later.\n      operationId: configureCompute\n      requestBody:\n        description: Configuration request.\n        required: true\n        content:\n          application/json:\n            schema:\n              $ref: \"#/components/schemas/ComputeSchema\"\n      responses:\n        200:\n          description: Compute configuration finished.\n          content:\n            application/json:\n              schema:\n                $ref: \"#/components/schemas/ComputeState\"\n        400:\n          description: Provided spec is invalid.\n          content:\n            application/json:\n              schema:\n                $ref: \"#/components/schemas/GenericError\"\n        412:\n          description: |\n            It's not possible to do live-configuration of the compute.\n            It's either in the wrong state, or compute doesn't use pull\n            mode of configuration.\n          content:\n            application/json:\n              schema:\n                $ref: \"#/components/schemas/GenericError\"\n        500:\n          description: |\n            Compute configuration request was processed, but error\n            occurred. Compute will likely shutdown soon.\n          content:\n            application/json:\n              schema:\n                $ref: \"#/components/schemas/GenericError\"\n  /extension_server:\n    post:\n      tags:\n      - Extension\n      summary: Download extension from S3 to local folder.\n      description: \"\"\n      operationId: downloadExtension\n      responses:\n        200:\n          description: Extension downloaded\n          content:\n            text/plain:\n              schema:\n                type: string\n                description: Error text or 'OK' if download succeeded.\n                example: \"OK\"\n        400:\n          description: Request is invalid.\n          content:\n            application/json:\n              schema:\n                $ref: \"#/components/schemas/GenericError\"\n        500:\n          description: Extension download request failed.\n          content:\n            application/json:\n              schema:\n                $ref: \"#/components/schemas/GenericError\"\n\n  /terminate:\n    post:\n      tags:\n      - Terminate\n      summary: Terminate Postgres and wait for it to exit\n      description: \"\"\n      operationId: terminate\n      parameters:\n        - name: mode\n          in: query\n          description: \"Terminate mode: fast (wait 30s before returning) and immediate\"\n          required: false\n          schema:\n            type: string\n            enum: [\"fast\", \"immediate\"]\n            default: fast\n      responses:\n        200:\n          description: Result\n          content:\n            application/json:\n              schema:\n                $ref: \"#/components/schemas/TerminateResponse\"\n        201:\n          description: Result if compute is already terminated\n          content:\n            application/json:\n              schema:\n                $ref: \"#/components/schemas/TerminateResponse\"\n        412:\n          description: \"wrong state\"\n          content:\n            application/json:\n              schema:\n                $ref: \"#/components/schemas/GenericError\"\n        500:\n          description: \"Unexpected error\"\n          content:\n            application/json:\n              schema:\n                $ref: \"#/components/schemas/GenericError\"\n\ncomponents:\n  securitySchemes:\n    JWT:\n      type: http\n      scheme: bearer\n      bearerFormat: JWT\n\n  schemas:\n    ComputeMetrics:\n      type: object\n      description: Compute startup metrics.\n      required:\n        - wait_for_spec_ms\n        - sync_safekeepers_ms\n        - basebackup_ms\n        - config_ms\n        - total_startup_ms\n      properties:\n        wait_for_spec_ms:\n          type: integer\n        sync_safekeepers_ms:\n          type: integer\n        basebackup_ms:\n          type: integer\n        config_ms:\n          type: integer\n        total_startup_ms:\n          type: integer\n\n    DbsAndRoles:\n      type: object\n      description: Databases and Roles\n      required:\n        - roles\n        - databases\n      properties:\n        roles:\n          type: array\n          items:\n            $ref: \"#/components/schemas/Role\"\n        databases:\n          type: array\n          items:\n            $ref: \"#/components/schemas/Database\"\n\n    Database:\n      type: object\n      description: Database\n      required:\n        - name\n        - owner\n        - restrict_conn\n        - invalid\n      properties:\n        name:\n          type: string\n        owner:\n          type: string\n        options:\n          type: array\n          items:\n            $ref: \"#/components/schemas/GenericOption\"\n        restrict_conn:\n          type: boolean\n        invalid:\n          type: boolean\n\n    Role:\n      type: object\n      description: Role\n      required:\n        - name\n      properties:\n        name:\n          type: string\n        encrypted_password:\n          type: string\n        options:\n          type: array\n          items:\n            $ref: \"#/components/schemas/GenericOption\"\n\n    GenericOption:\n      type: object\n      description: Schema Generic option\n      required:\n        - name\n        - vartype\n      properties:\n        name:\n          type: string\n        value:\n          type: string\n        vartype:\n          type: string\n\n    ComputeState:\n      type: object\n      required:\n        - start_time\n        - status\n      properties:\n        start_time:\n          type: string\n          description: |\n            Time when compute was started. If initially compute was started in the `empty`\n            state and then provided with valid spec, `start_time` will be reset to the\n            moment, when spec was received.\n          example: \"2022-10-12T07:20:50.52Z\"\n        status:\n          $ref: \"#/components/schemas/ComputeStatus\"\n        last_active:\n          type: string\n          description: |\n            The last detected compute activity timestamp in UTC and RFC3339 format.\n            It could be empty if compute was never used by user since start.\n          example: \"2022-10-12T07:20:50.52Z\"\n        error:\n          type: string\n          description: Text of the error during compute startup or reconfiguration, if any.\n          example: \"\"\n        tenant:\n          type: string\n          description: Identifier of the current tenant served by compute node, if any.\n          example: c9269c359e9a199fad1ea0981246a78f\n        timeline:\n          type: string\n          description: Identifier of the current timeline served by compute node, if any.\n          example: ece7de74d4b8cbe5433a68ce4d1b97b4\n\n    ComputeInsights:\n      type: object\n      properties:\n        pg_stat_statements:\n          description: Contains raw output from pg_stat_statements in JSON format.\n          type: array\n          items:\n            type: object\n\n    ComputeStatus:\n      type: string\n      enum:\n        - empty\n        - configuration_pending\n        - init\n        - running\n        - configuration\n        - failed\n        - termination_pending_fast\n        - termination_pending_immediate\n        - terminated\n      example: running\n\n    ExtensionInstallRequest:\n      type: object\n      required:\n        - extension\n        - database\n        - version\n      properties:\n        extension:\n          type: string\n          description: Extension name.\n          example: \"pg_session_jwt\"\n        version:\n          type: string\n          description: Version of the extension.\n          example: \"1.0.0\"\n        database:\n          type: string\n          description: Database name.\n          example: \"neondb\"\n\n    ExtensionInstallResult:\n      type: object\n      properties:\n        extension:\n          description: Name of the extension.\n          type: string\n          example: \"pg_session_jwt\"\n        version:\n          description: Version of the extension.\n          type: string\n          example: \"1.0.0\"\n\n    ComputeSchema:\n      type: object\n      required:\n        - spec\n      properties:\n        spec:\n          type: object\n    ComputeSchemaWithLsn:\n      type: object\n      required:\n        - spec\n        - wal_flush_lsn\n      properties:\n        spec:\n          $ref: \"#/components/schemas/ComputeState\"\n        wal_flush_lsn:\n          type: string\n          description: \"last WAL flush LSN\"\n          example: \"0/028F10D8\"\n\n    LfcPrewarmState:\n      type: object\n      required:\n        - status\n      properties:\n        status:\n          description: LFC prewarm status\n          enum: [not_prewarmed, prewarming, completed, failed, skipped]\n          type: string\n        error:\n          description: LFC prewarm error, if any\n          type: string\n        total:\n          description: Total pages processed\n          type: integer\n        prewarmed:\n          description: Total pages prewarmed\n          type: integer\n        skipped:\n          description: Pages processed but not prewarmed\n          type: integer\n        state_download_time_ms:\n          description: Time it takes to download LFC state to compute\n          type: integer\n        uncompress_time_ms:\n          description: Time it takes to uncompress LFC state\n          type: integer\n        prewarm_time_ms:\n          description: Time it takes to prewarm LFC state in Postgres\n          type: integer\n\n    LfcOffloadState:\n      type: object\n      required:\n        - status\n      properties:\n        status:\n          description: LFC offload status\n          enum: [not_offloaded, offloading, completed, skipped, failed]\n          type: string\n        error:\n          description: LFC offload error, if any\n          type: string\n        state_query_time_ms:\n          description: Time it takes to get LFC state from Postgres\n          type: integer\n        compress_time_ms:\n          description: Time it takes to compress LFC state\n          type: integer\n        state_upload_time_ms:\n          description: Time it takes to upload LFC state to endpoint storage\n          type: integer\n\n\n    PromoteState:\n      type: object\n      required:\n        - status\n      properties:\n        status:\n          description: Promote result\n          enum: [not_promoted, completed, failed]\n          type: string\n        error:\n          description: Promote error, if any\n          type: string\n        lsn_wait_time_ms:\n          description: Time it takes for secondary to catch up with primary WAL flush LSN\n          type: integer\n        pg_promote_time_ms:\n          description: Time it takes to call pg_promote on secondary\n          type: integer\n        reconfigure_time_ms:\n          description: Time it takes to reconfigure promoted secondary\n          type: integer\n\n    SetRoleGrantsRequest:\n      type: object\n      required:\n        - database\n        - schema\n        - privileges\n        - role\n      properties:\n        database:\n          type: string\n          description: Database name.\n          example: \"neondb\"\n        schema:\n          type: string\n          description: Schema name.\n          example: \"public\"\n        privileges:\n          type: array\n          items:\n            type: string\n          description: List of privileges to set.\n          example: [\"SELECT\", \"INSERT\"]\n        role:\n          type: string\n          description: Role name.\n          example: \"neon\"\n\n    TerminateResponse:\n      type: object\n      required:\n        - lsn\n      properties:\n        lsn:\n          type: string\n          nullable: true\n          description: \"last WAL flush LSN\"\n          example: \"0/028F10D8\"\n\n    SetRoleGrantsResponse:\n      type: object\n      required:\n        - database\n        - schema\n        - privileges\n        - role\n      properties:\n        database:\n          type: string\n          description: Database name.\n          example: \"neondb\"\n        schema:\n          type: string\n          description: Schema name.\n          example: \"public\"\n        privileges:\n          type: array\n          items:\n            type: string\n          description: List of privileges set.\n          example: [\"SELECT\", \"INSERT\"]\n        role:\n          type: string\n          description: Role name.\n          example: \"neon\"\n\n    #\n    # Errors\n    #\n\n    GenericError:\n      type: object\n      required:\n        - error\n      properties:\n        error:\n          type: string\n\nsecurity:\n  - JWT: []\n"
  },
  {
    "path": "compute_tools/src/http/routes/check_writability.rs",
    "content": "use std::sync::Arc;\n\nuse axum::extract::State;\nuse axum::response::Response;\nuse compute_api::responses::ComputeStatus;\nuse http::StatusCode;\n\nuse crate::checker::check_writability;\nuse crate::compute::ComputeNode;\nuse crate::http::JsonResponse;\n\n/// Check that the compute is currently running.\npub(in crate::http) async fn is_writable(State(compute): State<Arc<ComputeNode>>) -> Response {\n    let status = compute.get_status();\n    if status != ComputeStatus::Running {\n        return JsonResponse::invalid_status(status);\n    }\n\n    match check_writability(&compute).await {\n        Ok(_) => JsonResponse::success(StatusCode::OK, true),\n        Err(e) => JsonResponse::error(StatusCode::INTERNAL_SERVER_ERROR, e),\n    }\n}\n"
  },
  {
    "path": "compute_tools/src/http/routes/configure.rs",
    "content": "use std::sync::Arc;\n\nuse axum::extract::State;\nuse axum::response::Response;\nuse compute_api::requests::ConfigurationRequest;\nuse compute_api::responses::{ComputeStatus, ComputeStatusResponse};\nuse http::StatusCode;\nuse tokio::task;\nuse tracing::info;\n\nuse crate::compute::{ComputeNode, ParsedSpec};\nuse crate::http::JsonResponse;\nuse crate::http::extract::Json;\n\n// Accept spec in JSON format and request compute configuration. If anything\n// goes wrong after we set the compute status to `ConfigurationPending` and\n// update compute state with new spec, we basically leave compute in the\n// potentially wrong state. That said, it's control-plane's responsibility to\n// watch compute state after reconfiguration request and to clean restart in\n// case of errors.\npub(in crate::http) async fn configure(\n    State(compute): State<Arc<ComputeNode>>,\n    request: Json<ConfigurationRequest>,\n) -> Response {\n    let pspec = match ParsedSpec::try_from(request.0.spec) {\n        Ok(p) => p,\n        Err(e) => return JsonResponse::error(StatusCode::BAD_REQUEST, e),\n    };\n\n    // XXX: wrap state update under lock in a code block. Otherwise, we will try\n    // to `Send` `mut state` into the spawned thread bellow, which will cause\n    // the following rustc error:\n    //\n    // error: future cannot be sent between threads safely\n    {\n        let mut state = compute.state.lock().unwrap();\n        if !matches!(state.status, ComputeStatus::Empty | ComputeStatus::Running) {\n            return JsonResponse::invalid_status(state.status);\n        }\n\n        // Pass the tracing span to the main thread that performs the startup,\n        // so that the start_compute operation is considered a child of this\n        // configure request for tracing purposes.\n        state.startup_span = Some(tracing::Span::current());\n\n        if compute.params.lakebase_mode {\n            ComputeNode::set_spec(&compute.params, &mut state, pspec);\n        } else {\n            state.pspec = Some(pspec);\n        }\n\n        state.set_status(ComputeStatus::ConfigurationPending, &compute.state_changed);\n        drop(state);\n    }\n\n    // Spawn a blocking thread to wait for compute to become Running. This is\n    // needed to not block the main pool of workers and to be able to serve\n    // other requests while some particular request is waiting for compute to\n    // finish configuration.\n    let c = compute.clone();\n    let completed = task::spawn_blocking(move || {\n        let mut state = c.state.lock().unwrap();\n        while state.status != ComputeStatus::Running {\n            state = c.state_changed.wait(state).unwrap();\n            info!(\n                \"waiting for compute to become {}, current status: {}\",\n                ComputeStatus::Running,\n                state.status\n            );\n\n            if state.status == ComputeStatus::Failed {\n                let err = state.error.as_ref().map_or(\"unknown error\", |x| x);\n                let msg = format!(\"compute configuration failed: {err:?}\");\n                return Err(msg);\n            }\n        }\n\n        Ok(())\n    })\n    .await\n    .unwrap();\n\n    if let Err(e) = completed {\n        return JsonResponse::error(StatusCode::INTERNAL_SERVER_ERROR, e);\n    }\n\n    // Return current compute state if everything went well.\n    let state = compute.state.lock().unwrap().clone();\n    let body = ComputeStatusResponse::from(&state);\n\n    JsonResponse::success(StatusCode::OK, body)\n}\n"
  },
  {
    "path": "compute_tools/src/http/routes/database_schema.rs",
    "content": "use std::sync::Arc;\n\nuse axum::body::Body;\nuse axum::extract::State;\nuse axum::response::Response;\nuse http::StatusCode;\nuse http::header::CONTENT_TYPE;\nuse serde::Deserialize;\n\nuse crate::catalog::{SchemaDumpError, get_database_schema};\nuse crate::compute::ComputeNode;\nuse crate::http::JsonResponse;\nuse crate::http::extract::Query;\n\n#[derive(Debug, Clone, Deserialize)]\npub(in crate::http) struct DatabaseSchemaParams {\n    database: String,\n}\n\n/// Get a schema dump of the requested database.\npub(in crate::http) async fn get_schema_dump(\n    params: Query<DatabaseSchemaParams>,\n    State(compute): State<Arc<ComputeNode>>,\n) -> Response {\n    match get_database_schema(&compute, &params.database).await {\n        Ok(schema) => Response::builder()\n            .status(StatusCode::OK)\n            .header(CONTENT_TYPE.as_str(), \"application/json\")\n            .body(Body::from_stream(schema))\n            .unwrap(),\n        Err(SchemaDumpError::DatabaseDoesNotExist) => {\n            JsonResponse::error(StatusCode::NOT_FOUND, SchemaDumpError::DatabaseDoesNotExist)\n        }\n        Err(e) => JsonResponse::error(StatusCode::INTERNAL_SERVER_ERROR, e),\n    }\n}\n"
  },
  {
    "path": "compute_tools/src/http/routes/dbs_and_roles.rs",
    "content": "use std::sync::Arc;\n\nuse axum::extract::State;\nuse axum::response::Response;\nuse http::StatusCode;\n\nuse crate::catalog::get_dbs_and_roles;\nuse crate::compute::ComputeNode;\nuse crate::http::JsonResponse;\n\n/// Get the databases and roles from the compute.\npub(in crate::http) async fn get_catalog_objects(\n    State(compute): State<Arc<ComputeNode>>,\n) -> Response {\n    match get_dbs_and_roles(&compute).await {\n        Ok(catalog_objects) => JsonResponse::success(StatusCode::OK, catalog_objects),\n        Err(e) => JsonResponse::error(StatusCode::INTERNAL_SERVER_ERROR, e),\n    }\n}\n"
  },
  {
    "path": "compute_tools/src/http/routes/extension_server.rs",
    "content": "use std::sync::Arc;\n\nuse axum::extract::State;\nuse axum::response::{IntoResponse, Response};\nuse http::StatusCode;\nuse serde::Deserialize;\n\nuse crate::compute::{BUILD_TAG, ComputeNode};\nuse crate::http::JsonResponse;\nuse crate::http::extract::{Path, Query};\n\n#[derive(Debug, Clone, Deserialize)]\npub(in crate::http) struct ExtensionServerParams {\n    #[serde(default)]\n    is_library: bool,\n}\n\n/// Download a remote extension.\npub(in crate::http) async fn download_extension(\n    Path(filename): Path<String>,\n    ext_server_params: Query<ExtensionServerParams>,\n    State(compute): State<Arc<ComputeNode>>,\n) -> Response {\n    // Don't even try to download extensions if no remote storage is configured\n    if compute.params.remote_ext_base_url.is_none() {\n        return JsonResponse::error(\n            StatusCode::PRECONDITION_FAILED,\n            \"remote storage is not configured\",\n        );\n    }\n\n    let ext = {\n        let state = compute.state.lock().unwrap();\n        let pspec = state.pspec.as_ref().unwrap();\n        let spec = &pspec.spec;\n\n        let remote_extensions = match spec.remote_extensions.as_ref() {\n            Some(r) => r,\n            None => {\n                return JsonResponse::error(\n                    StatusCode::CONFLICT,\n                    \"information about remote extensions is unavailable\",\n                );\n            }\n        };\n\n        remote_extensions.get_ext(\n            &filename,\n            ext_server_params.is_library,\n            &BUILD_TAG,\n            &compute.params.pgversion,\n        )\n    };\n\n    match ext {\n        Ok((ext_name, ext_path)) => match compute.download_extension(ext_name, ext_path).await {\n            Ok(_) => StatusCode::OK.into_response(),\n            Err(e) => JsonResponse::error(StatusCode::INTERNAL_SERVER_ERROR, e),\n        },\n        Err(e) => JsonResponse::error(StatusCode::NOT_FOUND, e),\n    }\n}\n"
  },
  {
    "path": "compute_tools/src/http/routes/extensions.rs",
    "content": "use std::sync::Arc;\n\nuse axum::extract::State;\nuse axum::response::Response;\nuse compute_api::requests::ExtensionInstallRequest;\nuse compute_api::responses::{ComputeStatus, ExtensionInstallResponse};\nuse http::StatusCode;\n\nuse crate::compute::ComputeNode;\nuse crate::http::JsonResponse;\nuse crate::http::extract::Json;\n\n/// Install a extension.\npub(in crate::http) async fn install_extension(\n    State(compute): State<Arc<ComputeNode>>,\n    request: Json<ExtensionInstallRequest>,\n) -> Response {\n    let status = compute.get_status();\n    if status != ComputeStatus::Running {\n        return JsonResponse::invalid_status(status);\n    }\n\n    match compute\n        .install_extension(\n            &request.extension,\n            &request.database,\n            request.version.to_string(),\n        )\n        .await\n    {\n        Ok(version) => JsonResponse::success(\n            StatusCode::CREATED,\n            Some(ExtensionInstallResponse {\n                extension: request.extension.clone(),\n                version,\n            }),\n        ),\n        Err(e) => JsonResponse::error(\n            StatusCode::INTERNAL_SERVER_ERROR,\n            format!(\"failed to install extension: {e}\"),\n        ),\n    }\n}\n"
  },
  {
    "path": "compute_tools/src/http/routes/failpoints.rs",
    "content": "use axum::response::{IntoResponse, Response};\nuse http::StatusCode;\nuse serde::{Deserialize, Serialize};\nuse tracing::info;\nuse utils::failpoint_support::apply_failpoint;\n\npub type ConfigureFailpointsRequest = Vec<FailpointConfig>;\n\n/// Information for configuring a single fail point\n#[derive(Debug, Serialize, Deserialize)]\npub struct FailpointConfig {\n    /// Name of the fail point\n    pub name: String,\n    /// List of actions to take, using the format described in `fail::cfg`\n    ///\n    /// We also support `actions = \"exit\"` to cause the fail point to immediately exit.\n    pub actions: String,\n}\n\nuse crate::http::JsonResponse;\nuse crate::http::extract::Json;\n\n/// Configure failpoints for testing purposes.\npub(in crate::http) async fn configure_failpoints(\n    failpoints: Json<ConfigureFailpointsRequest>,\n) -> Response {\n    if !fail::has_failpoints() {\n        return JsonResponse::error(\n            StatusCode::PRECONDITION_FAILED,\n            \"Cannot manage failpoints because neon was compiled without failpoints support\",\n        );\n    }\n\n    for fp in &*failpoints {\n        info!(\"cfg failpoint: {} {}\", fp.name, fp.actions);\n\n        // We recognize one extra \"action\" that's not natively recognized\n        // by the failpoints crate: exit, to immediately kill the process\n        let cfg_result = apply_failpoint(&fp.name, &fp.actions);\n\n        if let Err(e) = cfg_result {\n            return JsonResponse::error(\n                StatusCode::BAD_REQUEST,\n                format!(\"failed to configure failpoints: {e}\"),\n            );\n        }\n    }\n\n    StatusCode::OK.into_response()\n}\n"
  },
  {
    "path": "compute_tools/src/http/routes/grants.rs",
    "content": "use std::sync::Arc;\n\nuse axum::extract::State;\nuse axum::response::Response;\nuse compute_api::requests::SetRoleGrantsRequest;\nuse compute_api::responses::{ComputeStatus, SetRoleGrantsResponse};\nuse http::StatusCode;\n\nuse crate::compute::ComputeNode;\nuse crate::http::JsonResponse;\nuse crate::http::extract::Json;\n\n/// Add grants for a role.\npub(in crate::http) async fn add_grant(\n    State(compute): State<Arc<ComputeNode>>,\n    request: Json<SetRoleGrantsRequest>,\n) -> Response {\n    let status = compute.get_status();\n    if status != ComputeStatus::Running {\n        return JsonResponse::invalid_status(status);\n    }\n\n    match compute\n        .set_role_grants(\n            &request.database,\n            &request.schema,\n            &request.privileges,\n            &request.role,\n        )\n        .await\n    {\n        Ok(()) => JsonResponse::success(\n            StatusCode::CREATED,\n            Some(SetRoleGrantsResponse {\n                database: request.database.clone(),\n                schema: request.schema.clone(),\n                role: request.role.clone(),\n                privileges: request.privileges.clone(),\n            }),\n        ),\n        Err(e) => JsonResponse::error(\n            StatusCode::INTERNAL_SERVER_ERROR,\n            format!(\"failed to grant role privileges to the schema: {e}\"),\n        ),\n    }\n}\n"
  },
  {
    "path": "compute_tools/src/http/routes/hadron_liveness_probe.rs",
    "content": "use crate::pg_isready::pg_isready;\nuse crate::{compute::ComputeNode, http::JsonResponse};\nuse axum::{extract::State, http::StatusCode, response::Response};\nuse std::sync::Arc;\n\n/// NOTE: NOT ENABLED YET\n/// Detect if the compute is alive.\n/// Called by the liveness probe of the compute container.\npub(in crate::http) async fn hadron_liveness_probe(\n    State(compute): State<Arc<ComputeNode>>,\n) -> Response {\n    let port = match compute.params.connstr.port() {\n        Some(port) => port,\n        None => {\n            return JsonResponse::error(\n                StatusCode::INTERNAL_SERVER_ERROR,\n                \"Failed to get the port from the connection string\",\n            );\n        }\n    };\n    match pg_isready(&compute.params.pg_isready_bin, port) {\n        Ok(_) => {\n            // The connection is successful, so the compute is alive.\n            // Return a 200 OK response.\n            JsonResponse::success(StatusCode::OK, \"ok\")\n        }\n        Err(e) => {\n            tracing::error!(\"Hadron liveness probe failed: {}\", e);\n            // The connection failed, so the compute is not alive.\n            // Return a 500 Internal Server Error response.\n            JsonResponse::error(StatusCode::INTERNAL_SERVER_ERROR, e)\n        }\n    }\n}\n"
  },
  {
    "path": "compute_tools/src/http/routes/insights.rs",
    "content": "use std::sync::Arc;\n\nuse axum::extract::State;\nuse axum::response::Response;\nuse compute_api::responses::ComputeStatus;\nuse http::StatusCode;\n\nuse crate::compute::ComputeNode;\nuse crate::http::JsonResponse;\n\n/// Collect current Postgres usage insights.\npub(in crate::http) async fn get_insights(State(compute): State<Arc<ComputeNode>>) -> Response {\n    let status = compute.get_status();\n    if status != ComputeStatus::Running {\n        return JsonResponse::invalid_status(status);\n    }\n\n    let insights = compute.collect_insights().await;\n    JsonResponse::success(StatusCode::OK, insights)\n}\n"
  },
  {
    "path": "compute_tools/src/http/routes/lfc.rs",
    "content": "use crate::http::JsonResponse;\nuse axum::response::{IntoResponse, Response};\nuse axum::{Json, http::StatusCode};\nuse axum_extra::extract::OptionalQuery;\nuse compute_api::responses::{LfcOffloadState, LfcPrewarmState};\ntype Compute = axum::extract::State<std::sync::Arc<crate::compute::ComputeNode>>;\n\npub(in crate::http) async fn prewarm_state(compute: Compute) -> Json<LfcPrewarmState> {\n    Json(compute.lfc_prewarm_state().await)\n}\n\n// Following functions are marked async for axum, as it's more convenient than wrapping these\n// in async lambdas at call site\n\npub(in crate::http) async fn offload_state(compute: Compute) -> Json<LfcOffloadState> {\n    Json(compute.lfc_offload_state())\n}\n\n#[derive(serde::Deserialize)]\npub struct PrewarmQuery {\n    pub from_endpoint: String,\n}\n\npub(in crate::http) async fn prewarm(\n    compute: Compute,\n    OptionalQuery(query): OptionalQuery<PrewarmQuery>,\n) -> Response {\n    if compute.prewarm_lfc(query.map(|q| q.from_endpoint)) {\n        StatusCode::ACCEPTED.into_response()\n    } else {\n        JsonResponse::error(\n            StatusCode::TOO_MANY_REQUESTS,\n            \"Multiple requests for prewarm are not allowed\",\n        )\n    }\n}\n\npub(in crate::http) async fn offload(compute: Compute) -> Response {\n    if compute.offload_lfc() {\n        StatusCode::ACCEPTED.into_response()\n    } else {\n        JsonResponse::error(\n            StatusCode::TOO_MANY_REQUESTS,\n            \"Multiple requests for prewarm offload are not allowed\",\n        )\n    }\n}\n\npub(in crate::http) async fn cancel_prewarm(compute: Compute) -> StatusCode {\n    compute.cancel_prewarm();\n    StatusCode::ACCEPTED\n}\n"
  },
  {
    "path": "compute_tools/src/http/routes/metrics.rs",
    "content": "use std::path::Path;\nuse std::sync::Arc;\n\nuse anyhow::Context;\nuse axum::body::Body;\nuse axum::extract::State;\nuse axum::response::Response;\nuse http::header::CONTENT_TYPE;\nuse http_body_util::BodyExt;\nuse hyper::{Request, StatusCode};\nuse metrics::proto::MetricFamily;\nuse metrics::{Encoder, TextEncoder};\n\nuse crate::communicator_socket_client::connect_communicator_socket;\nuse crate::compute::ComputeNode;\nuse crate::hadron_metrics;\nuse crate::http::JsonResponse;\nuse crate::metrics::collect;\n\n/// Expose Prometheus metrics.\npub(in crate::http) async fn get_metrics() -> Response {\n    // When we call TextEncoder::encode() below, it will immediately return an\n    // error if a metric family has no metrics, so we need to preemptively\n    // filter out metric families with no metrics.\n    let mut metrics = collect()\n        .into_iter()\n        .filter(|m| !m.get_metric().is_empty())\n        .collect::<Vec<MetricFamily>>();\n\n    // Add Hadron metrics.\n    let hadron_metrics: Vec<MetricFamily> = hadron_metrics::collect()\n        .into_iter()\n        .filter(|m| !m.get_metric().is_empty())\n        .collect();\n    metrics.extend(hadron_metrics);\n\n    let encoder = TextEncoder::new();\n    let mut buffer = vec![];\n\n    if let Err(e) = encoder.encode(&metrics, &mut buffer) {\n        return JsonResponse::error(StatusCode::INTERNAL_SERVER_ERROR, e);\n    }\n\n    Response::builder()\n        .status(StatusCode::OK)\n        .header(CONTENT_TYPE, encoder.format_type())\n        .body(Body::from(buffer))\n        .unwrap()\n}\n\n/// Fetch and forward metrics from the Postgres neon extension's metrics\n/// exporter that are used by autoscaling-agent.\n///\n/// The neon extension exposes these metrics over a Unix domain socket\n/// in the data directory. That's not accessible directly from the outside\n/// world, so we have this endpoint in compute_ctl to expose it\npub(in crate::http) async fn get_autoscaling_metrics(\n    State(compute): State<Arc<ComputeNode>>,\n) -> Result<Response, Response> {\n    let pgdata = Path::new(&compute.params.pgdata);\n\n    // Connect to the communicator process's metrics socket\n    let mut metrics_client = connect_communicator_socket(pgdata)\n        .await\n        .map_err(|e| JsonResponse::error(StatusCode::INTERNAL_SERVER_ERROR, format!(\"{e:#}\")))?;\n\n    // Make a request for /autoscaling_metrics\n    let request = Request::builder()\n        .method(\"GET\")\n        .uri(\"/autoscaling_metrics\")\n        .header(\"Host\", \"localhost\") // hyper requires Host, even though the server won't care\n        .body(Body::from(\"\"))\n        .unwrap();\n    let resp = metrics_client\n        .send_request(request)\n        .await\n        .context(\"fetching metrics from Postgres metrics service\")\n        .map_err(|e| JsonResponse::error(StatusCode::INTERNAL_SERVER_ERROR, format!(\"{e:#}\")))?;\n\n    // Build a response that just forwards the response we got.\n    let mut response = Response::builder();\n    response = response.status(resp.status());\n    if let Some(content_type) = resp.headers().get(CONTENT_TYPE) {\n        response = response.header(CONTENT_TYPE, content_type);\n    }\n    let body = tonic::service::AxumBody::from_stream(resp.into_body().into_data_stream());\n    Ok(response.body(body).unwrap())\n}\n"
  },
  {
    "path": "compute_tools/src/http/routes/metrics_json.rs",
    "content": "use std::sync::Arc;\n\nuse axum::extract::State;\nuse axum::response::Response;\nuse http::StatusCode;\n\nuse crate::compute::ComputeNode;\nuse crate::http::JsonResponse;\n\n/// Get startup metrics.\npub(in crate::http) async fn get_metrics(State(compute): State<Arc<ComputeNode>>) -> Response {\n    let metrics = compute.state.lock().unwrap().metrics.clone();\n    JsonResponse::success(StatusCode::OK, metrics)\n}\n"
  },
  {
    "path": "compute_tools/src/http/routes/mod.rs",
    "content": "use compute_api::responses::ComputeStatusResponse;\n\nuse crate::compute::ComputeState;\n\npub(in crate::http) mod check_writability;\npub(in crate::http) mod configure;\npub(in crate::http) mod database_schema;\npub(in crate::http) mod dbs_and_roles;\npub(in crate::http) mod extension_server;\npub(in crate::http) mod extensions;\npub(in crate::http) mod failpoints;\npub(in crate::http) mod grants;\npub(in crate::http) mod hadron_liveness_probe;\npub(in crate::http) mod insights;\npub(in crate::http) mod lfc;\npub(in crate::http) mod metrics;\npub(in crate::http) mod metrics_json;\npub(in crate::http) mod promote;\npub(in crate::http) mod refresh_configuration;\npub(in crate::http) mod status;\npub(in crate::http) mod terminate;\n\nimpl From<&ComputeState> for ComputeStatusResponse {\n    fn from(state: &ComputeState) -> Self {\n        ComputeStatusResponse {\n            start_time: state.start_time,\n            tenant: state\n                .pspec\n                .as_ref()\n                .map(|pspec| pspec.tenant_id.to_string()),\n            timeline: state\n                .pspec\n                .as_ref()\n                .map(|pspec| pspec.timeline_id.to_string()),\n            status: state.status,\n            last_active: state.last_active,\n            error: state.error.clone(),\n        }\n    }\n}\n"
  },
  {
    "path": "compute_tools/src/http/routes/promote.rs",
    "content": "use crate::http::JsonResponse;\nuse axum::extract::Json;\nuse compute_api::responses::PromoteConfig;\nuse http::StatusCode;\n\npub(in crate::http) async fn promote(\n    compute: axum::extract::State<std::sync::Arc<crate::compute::ComputeNode>>,\n    Json(cfg): Json<PromoteConfig>,\n) -> axum::response::Response {\n    // Return early at the cost of extra parsing spec\n    let pspec = match crate::compute::ParsedSpec::try_from(cfg.spec) {\n        Ok(p) => p,\n        Err(e) => return JsonResponse::error(StatusCode::BAD_REQUEST, e),\n    };\n\n    let cfg = PromoteConfig {\n        spec: pspec.spec,\n        wal_flush_lsn: cfg.wal_flush_lsn,\n    };\n    let state = compute.promote(cfg).await;\n    if let compute_api::responses::PromoteState::Failed { error: _ } = state {\n        return JsonResponse::create_response(StatusCode::INTERNAL_SERVER_ERROR, state);\n    }\n    JsonResponse::success(StatusCode::OK, state)\n}\n"
  },
  {
    "path": "compute_tools/src/http/routes/refresh_configuration.rs",
    "content": "// This file is added by Hadron\n\nuse std::sync::Arc;\n\nuse axum::{\n    extract::State,\n    response::{IntoResponse, Response},\n};\nuse http::StatusCode;\n\nuse crate::compute::ComputeNode;\nuse crate::hadron_metrics::POSTGRES_PAGESTREAM_REQUEST_ERRORS;\nuse crate::http::JsonResponse;\n\n/// The /refresh_configuration POST method is used to nudge compute_ctl to pull a new spec\n/// from the HCC and attempt to reconfigure Postgres with the new spec. The method does not wait\n/// for the reconfiguration to complete. Rather, it simply delivers a signal that will cause\n/// configuration to be reloaded in a best effort manner. Invocation of this method does not\n/// guarantee that a reconfiguration will occur. The caller should consider keep sending this\n/// request while it believes that the compute configuration is out of date.\npub(in crate::http) async fn refresh_configuration(\n    State(compute): State<Arc<ComputeNode>>,\n) -> Response {\n    POSTGRES_PAGESTREAM_REQUEST_ERRORS.inc();\n    match compute.signal_refresh_configuration().await {\n        Ok(_) => StatusCode::OK.into_response(),\n        Err(e) => JsonResponse::error(StatusCode::INTERNAL_SERVER_ERROR, e),\n    }\n}\n"
  },
  {
    "path": "compute_tools/src/http/routes/status.rs",
    "content": "use std::ops::Deref;\nuse std::sync::Arc;\n\nuse axum::extract::State;\nuse axum::http::StatusCode;\nuse axum::response::Response;\nuse compute_api::responses::ComputeStatusResponse;\n\nuse crate::compute::ComputeNode;\nuse crate::http::JsonResponse;\n\n/// Retrieve the state of the comute.\npub(in crate::http) async fn get_status(State(compute): State<Arc<ComputeNode>>) -> Response {\n    let state = compute.state.lock().unwrap();\n    let body = ComputeStatusResponse::from(state.deref());\n\n    JsonResponse::success(StatusCode::OK, body)\n}\n"
  },
  {
    "path": "compute_tools/src/http/routes/terminate.rs",
    "content": "use crate::compute::{ComputeNode, forward_termination_signal};\nuse crate::http::JsonResponse;\nuse axum::extract::State;\nuse axum::response::{IntoResponse, Response};\nuse axum_extra::extract::OptionalQuery;\nuse compute_api::responses::{ComputeStatus, TerminateMode, TerminateResponse};\nuse http::StatusCode;\nuse serde::Deserialize;\nuse std::sync::Arc;\nuse tokio::task;\nuse tracing::info;\n\n#[derive(Deserialize, Default)]\npub struct TerminateQuery {\n    mode: TerminateMode,\n}\n\n/// Terminate the compute.\npub(in crate::http) async fn terminate(\n    State(compute): State<Arc<ComputeNode>>,\n    OptionalQuery(terminate): OptionalQuery<TerminateQuery>,\n) -> Response {\n    let mode = terminate.unwrap_or_default().mode;\n    {\n        let mut state = compute.state.lock().unwrap();\n        if state.status == ComputeStatus::Terminated {\n            let response = TerminateResponse {\n                lsn: state.terminate_flush_lsn,\n            };\n            return JsonResponse::success(StatusCode::CREATED, response);\n        }\n\n        if !matches!(state.status, ComputeStatus::Empty | ComputeStatus::Running) {\n            return JsonResponse::invalid_status(state.status);\n        }\n\n        // If compute is Empty, there's no Postgres to terminate. The regular compute_ctl termination path\n        // assumes Postgres to be configured and running, so we just special-handle this case by exiting\n        // the process directly.\n        if compute.params.lakebase_mode && state.status == ComputeStatus::Empty {\n            drop(state);\n            info!(\"terminating empty compute - will exit process\");\n\n            // Queue a task to exit the process after 5 seconds. The 5-second delay aims to\n            // give enough time for the HTTP response to be sent so that HCM doesn't get an abrupt\n            // connection termination.\n            tokio::spawn(async {\n                tokio::time::sleep(tokio::time::Duration::from_secs(5)).await;\n                info!(\"exiting process after terminating empty compute\");\n                std::process::exit(0);\n            });\n\n            return StatusCode::OK.into_response();\n        }\n\n        // For Running status, proceed with normal termination\n        state.set_status(mode.into(), &compute.state_changed);\n        drop(state);\n    }\n\n    forward_termination_signal(false);\n    info!(\"sent signal and notified waiters\");\n\n    // Spawn a blocking thread to wait for compute to become Terminated.\n    // This is needed to do not block the main pool of workers and\n    // be able to serve other requests while some particular request\n    // is waiting for compute to finish configuration.\n    let c = compute.clone();\n    let lsn = task::spawn_blocking(move || {\n        let mut state = c.state.lock().unwrap();\n        while state.status != ComputeStatus::Terminated {\n            state = c.state_changed.wait(state).unwrap();\n            info!(\n                \"waiting for compute to become {}, current status: {:?}\",\n                ComputeStatus::Terminated,\n                state.status\n            );\n        }\n        state.terminate_flush_lsn\n    })\n    .await\n    .unwrap();\n    info!(\"terminated Postgres\");\n    JsonResponse::success(StatusCode::OK, TerminateResponse { lsn })\n}\n"
  },
  {
    "path": "compute_tools/src/http/server.rs",
    "content": "use std::fmt::Display;\nuse std::net::{IpAddr, Ipv6Addr, SocketAddr};\nuse std::sync::Arc;\nuse std::time::Duration;\n\nuse anyhow::Result;\nuse axum::Router;\nuse axum::middleware::{self};\nuse axum::response::IntoResponse;\nuse axum::routing::{get, post};\nuse compute_api::responses::ComputeCtlConfig;\nuse http::StatusCode;\nuse tokio::net::TcpListener;\nuse tower::ServiceBuilder;\nuse tower_http::{\n    auth::AsyncRequireAuthorizationLayer, request_id::PropagateRequestIdLayer, trace::TraceLayer,\n};\nuse tracing::{Span, error, info};\n\nuse super::middleware::request_id::maybe_add_request_id_header;\nuse super::{\n    headers::X_REQUEST_ID,\n    middleware::authorize::Authorize,\n    routes::{\n        check_writability, configure, database_schema, dbs_and_roles, extension_server, extensions,\n        grants, hadron_liveness_probe, insights, lfc, metrics, metrics_json, promote,\n        refresh_configuration, status, terminate,\n    },\n};\nuse crate::compute::ComputeNode;\n\n/// `compute_ctl` has two servers: internal and external. The internal server\n/// binds to the loopback interface and handles communication from clients on\n/// the compute. The external server is what receives communication from the\n/// control plane, the metrics scraper, etc. We make the distinction because\n/// certain routes in `compute_ctl` only need to be exposed to local processes\n/// like Postgres via the neon extension and local_proxy.\n#[derive(Clone, Debug)]\npub enum Server {\n    Internal {\n        port: u16,\n    },\n    External {\n        port: u16,\n        config: ComputeCtlConfig,\n        compute_id: String,\n        instance_id: Option<String>,\n    },\n}\n\nimpl Display for Server {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        match self {\n            Server::Internal { .. } => f.write_str(\"internal\"),\n            Server::External { .. } => f.write_str(\"external\"),\n        }\n    }\n}\n\nimpl From<&Server> for Router<Arc<ComputeNode>> {\n    fn from(server: &Server) -> Self {\n        let mut router = Router::<Arc<ComputeNode>>::new();\n\n        router = match server {\n            Server::Internal { .. } => {\n                router = router\n                    .route(\n                        \"/extension_server/{*filename}\",\n                        post(extension_server::download_extension),\n                    )\n                    .route(\"/extensions\", post(extensions::install_extension))\n                    .route(\"/grants\", post(grants::add_grant))\n                    // Hadron: Compute-initiated configuration refresh\n                    .route(\n                        \"/refresh_configuration\",\n                        post(refresh_configuration::refresh_configuration),\n                    );\n\n                // Add in any testing support\n                if cfg!(feature = \"testing\") {\n                    use super::routes::failpoints;\n\n                    router = router.route(\"/failpoints\", post(failpoints::configure_failpoints));\n                }\n\n                router\n            }\n            Server::External {\n                config,\n                compute_id,\n                instance_id,\n                ..\n            } => {\n                let unauthenticated_router = Router::<Arc<ComputeNode>>::new()\n                    .route(\"/metrics\", get(metrics::get_metrics))\n                    .route(\n                        \"/autoscaling_metrics\",\n                        get(metrics::get_autoscaling_metrics),\n                    );\n\n                let authenticated_router = Router::<Arc<ComputeNode>>::new()\n                    .route(\n                        \"/lfc/prewarm\",\n                        get(lfc::prewarm_state)\n                            .post(lfc::prewarm)\n                            .delete(lfc::cancel_prewarm),\n                    )\n                    .route(\"/lfc/offload\", get(lfc::offload_state).post(lfc::offload))\n                    .route(\"/promote\", post(promote::promote))\n                    .route(\"/check_writability\", post(check_writability::is_writable))\n                    .route(\"/configure\", post(configure::configure))\n                    .route(\"/database_schema\", get(database_schema::get_schema_dump))\n                    .route(\"/dbs_and_roles\", get(dbs_and_roles::get_catalog_objects))\n                    .route(\"/insights\", get(insights::get_insights))\n                    .route(\"/metrics.json\", get(metrics_json::get_metrics))\n                    .route(\"/status\", get(status::get_status))\n                    .route(\"/terminate\", post(terminate::terminate))\n                    .route(\n                        \"/hadron_liveness_probe\",\n                        get(hadron_liveness_probe::hadron_liveness_probe),\n                    )\n                    .layer(AsyncRequireAuthorizationLayer::new(Authorize::new(\n                        compute_id.clone(),\n                        instance_id.clone(),\n                        config.jwks.clone(),\n                    )));\n\n                router\n                    .merge(unauthenticated_router)\n                    .merge(authenticated_router)\n            }\n        };\n\n        router\n            .fallback(Server::handle_404)\n            .method_not_allowed_fallback(Server::handle_405)\n            .layer(\n                ServiceBuilder::new()\n                    .layer(tower_otel::trace::HttpLayer::server(tracing::Level::INFO))\n                    // Add this middleware since we assume the request ID exists\n                    .layer(middleware::from_fn(maybe_add_request_id_header))\n                    .layer(\n                        TraceLayer::new_for_http()\n                            .on_request(|request: &http::Request<_>, _span: &Span| {\n                                let request_id = request\n                                    .headers()\n                                    .get(X_REQUEST_ID)\n                                    .unwrap()\n                                    .to_str()\n                                    .unwrap();\n\n                                info!(%request_id, \"{} {}\", request.method(), request.uri());\n                            })\n                            .on_response(\n                                |response: &http::Response<_>, latency: Duration, _span: &Span| {\n                                    let request_id = response\n                                        .headers()\n                                        .get(X_REQUEST_ID)\n                                        .unwrap()\n                                        .to_str()\n                                        .unwrap();\n\n                                    info!(\n                                        %request_id,\n                                        code = response.status().as_u16(),\n                                        latency = latency.as_millis()\n                                    );\n                                },\n                            ),\n                    )\n                    .layer(PropagateRequestIdLayer::x_request_id()),\n            )\n    }\n}\n\nimpl Server {\n    async fn handle_404() -> impl IntoResponse {\n        StatusCode::NOT_FOUND\n    }\n\n    async fn handle_405() -> impl IntoResponse {\n        StatusCode::METHOD_NOT_ALLOWED\n    }\n\n    async fn listener(&self) -> Result<TcpListener> {\n        let addr = SocketAddr::new(self.ip(), self.port());\n        let listener = TcpListener::bind(&addr).await?;\n\n        Ok(listener)\n    }\n\n    fn ip(&self) -> IpAddr {\n        match self {\n            // TODO: Change this to Ipv6Addr::LOCALHOST when the GitHub runners\n            // allow binding to localhost\n            Server::Internal { .. } => IpAddr::from(Ipv6Addr::UNSPECIFIED),\n            Server::External { .. } => IpAddr::from(Ipv6Addr::UNSPECIFIED),\n        }\n    }\n\n    fn port(&self) -> u16 {\n        match self {\n            Server::Internal { port, .. } => *port,\n            Server::External { port, .. } => *port,\n        }\n    }\n\n    async fn serve(self, compute: Arc<ComputeNode>) {\n        let listener = self.listener().await.unwrap_or_else(|e| {\n            // If we can't bind, the compute cannot operate correctly\n            panic!(\n                \"failed to bind the compute_ctl {} HTTP server to {}: {}\",\n                self,\n                SocketAddr::new(self.ip(), self.port()),\n                e\n            );\n        });\n\n        if tracing::enabled!(tracing::Level::INFO) {\n            let local_addr = match listener.local_addr() {\n                Ok(local_addr) => local_addr,\n                Err(_) => SocketAddr::new(self.ip(), self.port()),\n            };\n\n            info!(\n                \"compute_ctl {} HTTP server listening at {}\",\n                self, local_addr\n            );\n        }\n\n        let router = Router::from(&self)\n            .with_state(compute)\n            .into_make_service_with_connect_info::<SocketAddr>();\n\n        if let Err(e) = axum::serve(listener, router).await {\n            error!(\"compute_ctl {} HTTP server error: {}\", self, e);\n        }\n    }\n\n    pub fn launch(self, compute: &Arc<ComputeNode>) {\n        let state = Arc::clone(compute);\n\n        info!(\"Launching the {} server\", self);\n\n        tokio::spawn(self.serve(state));\n    }\n}\n"
  },
  {
    "path": "compute_tools/src/installed_extensions.rs",
    "content": "use std::collections::HashMap;\n\nuse anyhow::Result;\nuse compute_api::responses::{InstalledExtension, InstalledExtensions};\nuse once_cell::sync::Lazy;\nuse tokio_postgres::error::Error as PostgresError;\nuse tokio_postgres::{Client, Config, NoTls};\n\nuse crate::metrics::INSTALLED_EXTENSIONS;\n\n/// We don't reuse get_existing_dbs() just for code clarity\n/// and to make database listing query here more explicit.\n///\n/// Limit the number of databases to 500 to avoid excessive load.\nasync fn list_dbs(client: &mut Client) -> Result<Vec<String>, PostgresError> {\n    // `pg_database.datconnlimit = -2` means that the database is in the\n    // invalid state\n    let databases = client\n        .query(\n            \"SELECT datname FROM pg_catalog.pg_database\n                WHERE datallowconn\n                AND datconnlimit OPERATOR(pg_catalog.<>) (OPERATOR(pg_catalog.-) 2::pg_catalog.int4)\n                LIMIT 500\",\n            &[],\n        )\n        .await?\n        .iter()\n        .map(|row| {\n            let db: String = row.get(\"datname\");\n            db\n        })\n        .collect();\n\n    Ok(databases)\n}\n\n/// Connect to every database (see list_dbs above) and get the list of installed extensions.\n///\n/// Same extension can be installed in multiple databases with different versions,\n/// so we report a separate metric (number of databases where it is installed)\n/// for each extension version.\npub async fn get_installed_extensions(\n    mut conf: Config,\n) -> Result<InstalledExtensions, PostgresError> {\n    conf.application_name(\"compute_ctl:get_installed_extensions\");\n    let databases: Vec<String> = {\n        let (mut client, connection) = conf.connect(NoTls).await?;\n        tokio::spawn(async move {\n            if let Err(e) = connection.await {\n                eprintln!(\"connection error: {e}\");\n            }\n        });\n\n        list_dbs(&mut client).await?\n    };\n\n    let mut extensions_map: HashMap<(String, String, String), InstalledExtension> = HashMap::new();\n    for db in databases.iter() {\n        conf.dbname(db);\n\n        let (client, connection) = conf.connect(NoTls).await?;\n        tokio::spawn(async move {\n            if let Err(e) = connection.await {\n                eprintln!(\"connection error: {e}\");\n            }\n        });\n\n        let extensions: Vec<(String, String, i32)> = client\n            .query(\n                \"SELECT extname, extversion, extowner::pg_catalog.int4 FROM pg_catalog.pg_extension\",\n                &[],\n            )\n            .await?\n            .iter()\n            .map(|row| {\n                (\n                    row.get(\"extname\"),\n                    row.get(\"extversion\"),\n                    row.get(\"extowner\"),\n                )\n            })\n            .collect();\n\n        for (extname, v, extowner) in extensions.iter() {\n            let version = v.to_string();\n\n            // check if the extension is owned by superuser\n            // 10 is the oid of superuser\n            let owned_by_superuser = if *extowner == 10 { \"1\" } else { \"0\" };\n\n            extensions_map\n                .entry((\n                    extname.to_string(),\n                    version.clone(),\n                    owned_by_superuser.to_string(),\n                ))\n                .and_modify(|e| {\n                    // count the number of databases where the extension is installed\n                    e.n_databases += 1;\n                })\n                .or_insert(InstalledExtension {\n                    extname: extname.to_string(),\n                    version: version.clone(),\n                    n_databases: 1,\n                    owned_by_superuser: owned_by_superuser.to_string(),\n                });\n        }\n    }\n\n    for (key, ext) in extensions_map.iter() {\n        let (extname, version, owned_by_superuser) = key;\n        let n_databases = ext.n_databases as u64;\n\n        INSTALLED_EXTENSIONS\n            .with_label_values(&[extname, version, owned_by_superuser])\n            .set(n_databases);\n    }\n\n    Ok(InstalledExtensions {\n        extensions: extensions_map.into_values().collect(),\n    })\n}\n\npub fn initialize_metrics() {\n    Lazy::force(&INSTALLED_EXTENSIONS);\n}\n"
  },
  {
    "path": "compute_tools/src/lib.rs",
    "content": "//! Various tools and helpers to handle cluster / compute node (Postgres)\n//! configuration.\n#![deny(unsafe_code)]\n#![deny(clippy::undocumented_unsafe_blocks)]\n\npub mod checker;\npub mod communicator_socket_client;\npub mod config;\npub mod configurator;\npub mod http;\n#[macro_use]\npub mod logger;\npub mod catalog;\npub mod compute;\npub mod compute_prewarm;\npub mod compute_promote;\npub mod disk_quota;\npub mod extension_server;\npub mod hadron_metrics;\npub mod installed_extensions;\npub mod local_proxy;\npub mod lsn_lease;\npub mod metrics;\nmod migration;\npub mod monitor;\npub mod params;\npub mod pg_helpers;\npub mod pg_isready;\npub mod pgbouncer;\npub mod rsyslog;\npub mod spec;\nmod spec_apply;\npub mod swap;\npub mod sync_sk;\npub mod tls;\n"
  },
  {
    "path": "compute_tools/src/local_proxy.rs",
    "content": "//! Local Proxy is a feature of our BaaS Neon Authorize project.\n//!\n//! Local Proxy validates JWTs and manages the pg_session_jwt extension.\n//! It also maintains a connection pool to postgres.\n\nuse anyhow::{Context, Result};\nuse camino::Utf8Path;\nuse compute_api::spec::LocalProxySpec;\nuse nix::sys::signal::Signal;\nuse utils::pid_file::{self, PidFileRead};\n\npub fn configure(local_proxy: &LocalProxySpec) -> Result<()> {\n    write_local_proxy_conf(\"/etc/local_proxy/config.json\".as_ref(), local_proxy)?;\n    notify_local_proxy(\"/etc/local_proxy/pid\".as_ref())?;\n\n    Ok(())\n}\n\n/// Create or completely rewrite configuration file specified by `path`\nfn write_local_proxy_conf(path: &Utf8Path, local_proxy: &LocalProxySpec) -> Result<()> {\n    let config =\n        serde_json::to_string_pretty(local_proxy).context(\"serializing LocalProxySpec to json\")?;\n    std::fs::write(path, config).with_context(|| format!(\"writing {path}\"))?;\n\n    Ok(())\n}\n\n/// Notify local proxy about a new config file.\nfn notify_local_proxy(path: &Utf8Path) -> Result<()> {\n    match pid_file::read(path)? {\n        // if the file doesn't exist, or isn't locked, local_proxy isn't running\n        // and will naturally pick up our config later\n        PidFileRead::NotExist | PidFileRead::NotHeldByAnyProcess(_) => {}\n        PidFileRead::LockedByOtherProcess(pid) => {\n            // From the pid_file docs:\n            //\n            // > 1. The other process might exit at any time, turning the given PID stale.\n            // > 2. There is a small window in which `claim_for_current_process` has already\n            // >    locked the file but not yet updates its contents. [`read`] will return\n            // >    this variant here, but with the old file contents, i.e., a stale PID.\n            // >\n            // > The kernel is free to recycle PID once it has been `wait(2)`ed upon by\n            // > its creator. Thus, acting upon a stale PID, e.g., by issuing a `kill`\n            // > system call on it, bears the risk of killing an unrelated process.\n            // > This is an inherent limitation of using pidfiles.\n            // > The only race-free solution is to have a supervisor-process with a lifetime\n            // > that exceeds that of all of its child-processes (e.g., `runit`, `supervisord`).\n            //\n            // This is an ok risk as we only send a SIGHUP which likely won't actually\n            // kill the process, only reload config.\n            nix::sys::signal::kill(pid, Signal::SIGHUP).context(\"sending signal to local_proxy\")?;\n        }\n    }\n\n    Ok(())\n}\n"
  },
  {
    "path": "compute_tools/src/logger.rs",
    "content": "use std::collections::HashMap;\nuse std::sync::{LazyLock, RwLock};\nuse tracing::Subscriber;\nuse tracing::info;\nuse tracing_appender;\nuse tracing_subscriber::prelude::*;\nuse tracing_subscriber::{fmt, layer::SubscriberExt, registry::LookupSpan};\n\n/// Initialize logging to stderr, and OpenTelemetry tracing and exporter.\n///\n/// Logging is configured using either `default_log_level` or\n/// `RUST_LOG` environment variable as default log level.\n///\n/// OpenTelemetry is configured with OTLP/HTTP exporter. It picks up\n/// configuration from environment variables. For example, to change the destination,\n/// set `OTEL_EXPORTER_OTLP_ENDPOINT=http://jaeger:4318`. See\n/// `tracing-utils` package description.\n///\npub fn init_tracing_and_logging(\n    default_log_level: &str,\n    log_dir_opt: &Option<String>,\n) -> anyhow::Result<(\n    Option<tracing_utils::Provider>,\n    Option<tracing_appender::non_blocking::WorkerGuard>,\n)> {\n    // Initialize Logging\n    let env_filter = tracing_subscriber::EnvFilter::try_from_default_env()\n        .unwrap_or_else(|_| tracing_subscriber::EnvFilter::new(default_log_level));\n\n    // Standard output streams\n    let fmt_layer = tracing_subscriber::fmt::layer()\n        .with_ansi(false)\n        .with_target(false)\n        .with_writer(std::io::stderr);\n\n    // Logs with file rotation. Files in `$log_dir/pgcctl.yyyy-MM-dd`\n    let (json_to_file_layer, _file_logs_guard) = if let Some(log_dir) = log_dir_opt {\n        std::fs::create_dir_all(log_dir)?;\n        let file_logs_appender = tracing_appender::rolling::RollingFileAppender::builder()\n            .rotation(tracing_appender::rolling::Rotation::DAILY)\n            .filename_prefix(\"pgcctl\")\n            // Lib appends to existing files, so we will keep files for up to 2 days even on restart loops.\n            // At minimum, log-daemon will have 1 day to detect and upload a file (if created right before midnight).\n            .max_log_files(2)\n            .build(log_dir)\n            .expect(\"Initializing rolling file appender should succeed\");\n        let (file_logs_writer, _file_logs_guard) =\n            tracing_appender::non_blocking(file_logs_appender);\n        let json_to_file_layer = tracing_subscriber::fmt::layer()\n            .with_ansi(false)\n            .with_target(false)\n            .event_format(PgJsonLogShapeFormatter)\n            .with_writer(file_logs_writer);\n        (Some(json_to_file_layer), Some(_file_logs_guard))\n    } else {\n        (None, None)\n    };\n\n    // Initialize OpenTelemetry\n    let provider =\n        tracing_utils::init_tracing(\"compute_ctl\", tracing_utils::ExportConfig::default());\n    let otlp_layer = provider.as_ref().map(tracing_utils::layer);\n\n    // Put it all together\n    tracing_subscriber::registry()\n        .with(env_filter)\n        .with(otlp_layer)\n        .with(fmt_layer)\n        .with(json_to_file_layer)\n        .init();\n    tracing::info!(\"logging and tracing started\");\n\n    utils::logging::replace_panic_hook_with_tracing_panic_hook().forget();\n\n    Ok((provider, _file_logs_guard))\n}\n\n/// Replace all newline characters with a special character to make it\n/// easier to grep for log messages.\npub fn inlinify(s: &str) -> String {\n    s.replace('\\n', \"\\u{200B}\")\n}\n\npub fn startup_context_from_env() -> Option<opentelemetry::Context> {\n    // Extract OpenTelemetry context for the startup actions from the\n    // TRACEPARENT and TRACESTATE env variables, and attach it to the current\n    // tracing context.\n    //\n    // This is used to propagate the context for the 'start_compute' operation\n    // from the neon control plane. This allows linking together the wider\n    // 'start_compute' operation that creates the compute container, with the\n    // startup actions here within the container.\n    //\n    // There is no standard for passing context in env variables, but a lot of\n    // tools use TRACEPARENT/TRACESTATE, so we use that convention too. See\n    // https://github.com/open-telemetry/opentelemetry-specification/issues/740\n    //\n    // Switch to the startup context here, and exit it once the startup has\n    // completed and Postgres is up and running.\n    //\n    // If this pod is pre-created without binding it to any particular endpoint\n    // yet, this isn't the right place to enter the startup context. In that\n    // case, the control plane should pass the tracing context as part of the\n    // /configure API call.\n    //\n    // NOTE: This is supposed to only cover the *startup* actions. Once\n    // postgres is configured and up-and-running, we exit this span. Any other\n    // actions that are performed on incoming HTTP requests, for example, are\n    // performed in separate spans.\n    //\n    // XXX: If the pod is restarted, we perform the startup actions in the same\n    // context as the original startup actions, which probably doesn't make\n    // sense.\n    let mut startup_tracing_carrier: HashMap<String, String> = HashMap::new();\n    if let Ok(val) = std::env::var(\"TRACEPARENT\") {\n        startup_tracing_carrier.insert(\"traceparent\".to_string(), val);\n    }\n    if let Ok(val) = std::env::var(\"TRACESTATE\") {\n        startup_tracing_carrier.insert(\"tracestate\".to_string(), val);\n    }\n    if !startup_tracing_carrier.is_empty() {\n        use opentelemetry::propagation::TextMapPropagator;\n        use opentelemetry_sdk::propagation::TraceContextPropagator;\n        info!(\"got startup tracing context from env variables\");\n        Some(TraceContextPropagator::new().extract(&startup_tracing_carrier))\n    } else {\n        None\n    }\n}\n\n/// Track relevant id's\nconst UNKNOWN_IDS: &str = r#\"\"pg_instance_id\": \"\", \"pg_compute_id\": \"\"\"#;\nstatic IDS: LazyLock<RwLock<String>> = LazyLock::new(|| RwLock::new(UNKNOWN_IDS.to_string()));\n\npub fn update_ids(instance_id: &Option<String>, compute_id: &Option<String>) -> anyhow::Result<()> {\n    let ids = format!(\n        r#\"\"pg_instance_id\": \"{}\", \"pg_compute_id\": \"{}\"\"#,\n        instance_id.as_ref().map(|s| s.as_str()).unwrap_or_default(),\n        compute_id.as_ref().map(|s| s.as_str()).unwrap_or_default()\n    );\n    let mut guard = IDS\n        .write()\n        .map_err(|e| anyhow::anyhow!(\"Log set id's rwlock poisoned: {}\", e))?;\n    *guard = ids;\n    Ok(())\n}\n\n/// Massage compute_ctl logs into PG json log shape so we can use the same Lumberjack setup.\nstruct PgJsonLogShapeFormatter;\nimpl<S, N> fmt::format::FormatEvent<S, N> for PgJsonLogShapeFormatter\nwhere\n    S: Subscriber + for<'a> LookupSpan<'a>,\n    N: for<'a> fmt::format::FormatFields<'a> + 'static,\n{\n    fn format_event(\n        &self,\n        ctx: &fmt::FmtContext<'_, S, N>,\n        mut writer: fmt::format::Writer<'_>,\n        event: &tracing::Event<'_>,\n    ) -> std::fmt::Result {\n        // Format values from the event's metadata, and open message string\n        let metadata = event.metadata();\n        {\n            let ids_guard = IDS.read();\n            let ids = ids_guard\n                .as_ref()\n                .map(|guard| guard.as_str())\n                // Surpress so that we don't lose all uploaded/ file logs if something goes super wrong. We would notice the missing id's.\n                .unwrap_or(UNKNOWN_IDS);\n            write!(\n                &mut writer,\n                r#\"{{\"timestamp\": \"{}\", \"error_severity\": \"{}\", \"file_name\": \"{}\", \"backend_type\": \"compute_ctl_self\", {}, \"message\": \"#,\n                chrono::Utc::now().format(\"%Y-%m-%d %H:%M:%S%.3f GMT\"),\n                metadata.level(),\n                metadata.target(),\n                ids\n            )?;\n        }\n\n        let mut message = String::new();\n        let message_writer = fmt::format::Writer::new(&mut message);\n\n        // Gather the message\n        ctx.field_format().format_fields(message_writer, event)?;\n\n        // TODO: any better options than to copy-paste this OSS span formatter?\n        // impl<S, N, T> FormatEvent<S, N> for Format<Full, T>\n        // https://docs.rs/tracing-subscriber/latest/tracing_subscriber/fmt/trait.FormatEvent.html#impl-FormatEvent%3CS,+N%3E-for-Format%3CFull,+T%3E\n\n        // write message, close bracket, and new line\n        writeln!(writer, \"{}}}\", serde_json::to_string(&message).unwrap())\n    }\n}\n\n#[cfg(feature = \"testing\")]\n#[cfg(test)]\nmod test {\n    use super::*;\n    use std::{cell::RefCell, io};\n\n    // Use thread_local! instead of Mutex for test isolation\n    thread_local! {\n        static WRITER_OUTPUT: RefCell<String> = const { RefCell::new(String::new()) };\n    }\n\n    #[derive(Clone, Default)]\n    struct StaticStringWriter;\n\n    impl io::Write for StaticStringWriter {\n        fn write(&mut self, buf: &[u8]) -> io::Result<usize> {\n            let output = String::from_utf8(buf.to_vec()).expect(\"Invalid UTF-8 in test output\");\n            WRITER_OUTPUT.with(|s| s.borrow_mut().push_str(&output));\n            Ok(buf.len())\n        }\n\n        fn flush(&mut self) -> io::Result<()> {\n            Ok(())\n        }\n    }\n\n    impl fmt::MakeWriter<'_> for StaticStringWriter {\n        type Writer = Self;\n\n        fn make_writer(&self) -> Self::Writer {\n            Self\n        }\n    }\n\n    #[test]\n    fn test_log_pg_json_shape_formatter() {\n        // Use a scoped subscriber to prevent global state pollution\n        let subscriber = tracing_subscriber::registry().with(\n            tracing_subscriber::fmt::layer()\n                .with_ansi(false)\n                .with_target(false)\n                .event_format(PgJsonLogShapeFormatter)\n                .with_writer(StaticStringWriter),\n        );\n\n        let _ = update_ids(&Some(\"000\".to_string()), &Some(\"111\".to_string()));\n\n        // Clear any previous test state\n        WRITER_OUTPUT.with(|s| s.borrow_mut().clear());\n\n        let messages = [\n            \"test message\",\n            r#\"json escape check:  name=\"BatchSpanProcessor.Flush.ExportError\" reason=\"Other(reqwest::Error { kind: Request, url: \\\"http://localhost:4318/v1/traces\\\", source: hyper_\n            util::client::legacy::Error(Connect, ConnectError(\\\"tcp connect error\\\", Os { code: 111, kind: ConnectionRefused, message: \\\"Connection refused\\\" })) })\" Failed during the export process\"#,\n        ];\n\n        tracing::subscriber::with_default(subscriber, || {\n            for message in messages {\n                tracing::info!(message);\n            }\n        });\n        tracing::info!(\"not test message\");\n\n        // Get captured output\n        let output = WRITER_OUTPUT.with(|s| s.borrow().clone());\n\n        let json_strings: Vec<&str> = output.lines().collect();\n        assert_eq!(\n            json_strings.len(),\n            messages.len(),\n            \"Log didn't have the expected number of json strings.\"\n        );\n\n        let json_string_shape_regex = regex::Regex::new(\n            r#\"\\{\"timestamp\": \"\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}\\.\\d{3} GMT\", \"error_severity\": \"INFO\", \"file_name\": \".+\", \"backend_type\": \"compute_ctl_self\", \"pg_instance_id\": \"000\", \"pg_compute_id\": \"111\", \"message\": \".+\"\\}\"#\n        ).unwrap();\n\n        for (i, expected_message) in messages.iter().enumerate() {\n            let json_string = json_strings[i];\n            assert!(\n                json_string_shape_regex.is_match(json_string),\n                \"Json log didn't match expected pattern:\\n{json_string}\",\n            );\n            let parsed_json: serde_json::Value = serde_json::from_str(json_string).unwrap();\n            let actual_message = parsed_json[\"message\"].as_str().unwrap();\n            assert_eq!(*expected_message, actual_message);\n        }\n    }\n}\n"
  },
  {
    "path": "compute_tools/src/lsn_lease.rs",
    "content": "use std::str::FromStr;\nuse std::sync::Arc;\nuse std::thread;\nuse std::time::{Duration, SystemTime};\n\nuse anyhow::{Result, bail};\nuse compute_api::spec::{ComputeMode, PageserverConnectionInfo, PageserverProtocol};\nuse pageserver_page_api as page_api;\nuse postgres::{NoTls, SimpleQueryMessage};\nuse tracing::{info, warn};\nuse utils::id::{TenantId, TimelineId};\nuse utils::lsn::Lsn;\nuse utils::shard::TenantShardId;\n\nuse crate::compute::ComputeNode;\n\n/// Spawns a background thread to periodically renew LSN leases for static compute.\n/// Do nothing if the compute is not in static mode.\npub fn launch_lsn_lease_bg_task_for_static(compute: &Arc<ComputeNode>) {\n    let (tenant_id, timeline_id, lsn) = {\n        let state = compute.state.lock().unwrap();\n        let spec = state.pspec.as_ref().expect(\"Spec must be set\");\n        match spec.spec.mode {\n            ComputeMode::Static(lsn) => (spec.tenant_id, spec.timeline_id, lsn),\n            _ => return,\n        }\n    };\n    let compute = compute.clone();\n\n    let span = tracing::info_span!(\"lsn_lease_bg_task\", %tenant_id, %timeline_id, %lsn);\n    thread::spawn(move || {\n        let _entered = span.entered();\n        if let Err(e) = lsn_lease_bg_task(compute, tenant_id, timeline_id, lsn) {\n            // TODO: might need stronger error feedback than logging an warning.\n            warn!(\"Exited with error: {e}\");\n        }\n    });\n}\n\n/// Renews lsn lease periodically so static compute are not affected by GC.\nfn lsn_lease_bg_task(\n    compute: Arc<ComputeNode>,\n    tenant_id: TenantId,\n    timeline_id: TimelineId,\n    lsn: Lsn,\n) -> Result<()> {\n    loop {\n        let valid_until = acquire_lsn_lease_with_retry(&compute, tenant_id, timeline_id, lsn)?;\n        let valid_duration = valid_until\n            .duration_since(SystemTime::now())\n            .unwrap_or(Duration::ZERO);\n\n        // Sleep for 60 seconds less than the valid duration but no more than half of the valid duration.\n        let sleep_duration = valid_duration\n            .saturating_sub(Duration::from_secs(60))\n            .max(valid_duration / 2);\n\n        info!(\n            \"Request succeeded, sleeping for {} seconds\",\n            sleep_duration.as_secs()\n        );\n        compute.wait_timeout_while_pageserver_connstr_unchanged(sleep_duration);\n    }\n}\n\n/// Acquires lsn lease in a retry loop. Returns the expiration time if a lease is granted.\n/// Returns an error if a lease is explicitly not granted. Otherwise, we keep sending requests.\nfn acquire_lsn_lease_with_retry(\n    compute: &Arc<ComputeNode>,\n    tenant_id: TenantId,\n    timeline_id: TimelineId,\n    lsn: Lsn,\n) -> Result<SystemTime> {\n    let mut attempts = 0usize;\n    let mut retry_period_ms: f64 = 500.0;\n    const MAX_RETRY_PERIOD_MS: f64 = 60.0 * 1000.0;\n\n    loop {\n        // Note: List of pageservers is dynamic, need to re-read configs before each attempt.\n        let (conninfo, auth) = {\n            let state = compute.state.lock().unwrap();\n            let spec = state.pspec.as_ref().expect(\"spec must be set\");\n            (\n                spec.pageserver_conninfo.clone(),\n                spec.storage_auth_token.clone(),\n            )\n        };\n\n        let result = try_acquire_lsn_lease(conninfo, auth.as_deref(), tenant_id, timeline_id, lsn);\n        match result {\n            Ok(Some(res)) => {\n                return Ok(res);\n            }\n            Ok(None) => {\n                bail!(\"Permanent error: lease could not be obtained, LSN is behind the GC cutoff\");\n            }\n            Err(e) => {\n                warn!(\"Failed to acquire lsn lease: {e} (attempt {attempts})\");\n\n                compute.wait_timeout_while_pageserver_connstr_unchanged(Duration::from_millis(\n                    retry_period_ms as u64,\n                ));\n                retry_period_ms *= 1.5;\n                retry_period_ms = retry_period_ms.min(MAX_RETRY_PERIOD_MS);\n            }\n        }\n        attempts += 1;\n    }\n}\n\n/// Tries to acquire LSN leases on all Pageserver shards.\nfn try_acquire_lsn_lease(\n    conninfo: PageserverConnectionInfo,\n    auth: Option<&str>,\n    tenant_id: TenantId,\n    timeline_id: TimelineId,\n    lsn: Lsn,\n) -> Result<Option<SystemTime>> {\n    let mut leases = Vec::new();\n\n    for (shard_index, shard) in conninfo.shards.into_iter() {\n        let tenant_shard_id = TenantShardId {\n            tenant_id,\n            shard_number: shard_index.shard_number,\n            shard_count: shard_index.shard_count,\n        };\n\n        // XXX: If there are more than pageserver for the one shard, do we need to get a\n        // leas on all of them? Currently, that's what we assume, but this is hypothetical\n        // as of this writing, as we never pass the info for more than one pageserver per\n        // shard.\n        for pageserver in shard.pageservers {\n            let lease = match conninfo.prefer_protocol {\n                PageserverProtocol::Grpc => acquire_lsn_lease_grpc(\n                    &pageserver.grpc_url.unwrap(),\n                    auth,\n                    tenant_shard_id,\n                    timeline_id,\n                    lsn,\n                )?,\n                PageserverProtocol::Libpq => acquire_lsn_lease_libpq(\n                    &pageserver.libpq_url.unwrap(),\n                    auth,\n                    tenant_shard_id,\n                    timeline_id,\n                    lsn,\n                )?,\n            };\n            leases.push(lease);\n        }\n    }\n\n    Ok(leases.into_iter().min().flatten())\n}\n\n/// Acquires an LSN lease on a single shard, using the libpq API. The connstring must use a\n/// postgresql:// scheme.\nfn acquire_lsn_lease_libpq(\n    connstring: &str,\n    auth: Option<&str>,\n    tenant_shard_id: TenantShardId,\n    timeline_id: TimelineId,\n    lsn: Lsn,\n) -> Result<Option<SystemTime>> {\n    let mut config = postgres::Config::from_str(connstring)?;\n    if let Some(auth) = auth {\n        config.password(auth);\n    }\n    let mut client = config.connect(NoTls)?;\n    let cmd = format!(\"lease lsn {tenant_shard_id} {timeline_id} {lsn} \");\n    let res = client.simple_query(&cmd)?;\n    let msg = match res.first() {\n        Some(msg) => msg,\n        None => bail!(\"empty response\"),\n    };\n    let row = match msg {\n        SimpleQueryMessage::Row(row) => row,\n        _ => bail!(\"error parsing lsn lease response\"),\n    };\n\n    // Note: this will be None if a lease is explicitly not granted.\n    let valid_until_str = row.get(\"valid_until\");\n\n    let valid_until = valid_until_str.map(|s| {\n        SystemTime::UNIX_EPOCH\n            .checked_add(Duration::from_millis(u128::from_str(s).unwrap() as u64))\n            .expect(\"Time larger than max SystemTime could handle\")\n    });\n    Ok(valid_until)\n}\n\n/// Acquires an LSN lease on a single shard, using the gRPC API. The connstring must use a\n/// grpc:// scheme.\nfn acquire_lsn_lease_grpc(\n    connstring: &str,\n    auth: Option<&str>,\n    tenant_shard_id: TenantShardId,\n    timeline_id: TimelineId,\n    lsn: Lsn,\n) -> Result<Option<SystemTime>> {\n    tokio::runtime::Handle::current().block_on(async move {\n        let mut client = page_api::Client::connect(\n            connstring.to_string(),\n            tenant_shard_id.tenant_id,\n            timeline_id,\n            tenant_shard_id.to_index(),\n            auth.map(String::from),\n            None,\n        )\n        .await?;\n\n        let req = page_api::LeaseLsnRequest { lsn };\n        match client.lease_lsn(req).await {\n            Ok(expires) => Ok(Some(expires)),\n            // Lease couldn't be acquired because the LSN has been garbage collected.\n            Err(err) if err.code() == tonic::Code::FailedPrecondition => Ok(None),\n            Err(err) => Err(err.into()),\n        }\n    })\n}\n"
  },
  {
    "path": "compute_tools/src/metrics.rs",
    "content": "use metrics::core::{AtomicF64, AtomicU64, Collector, GenericCounter, GenericGauge};\nuse metrics::proto::MetricFamily;\nuse metrics::{\n    IntCounter, IntCounterVec, IntGaugeVec, UIntGaugeVec, register_gauge, register_int_counter,\n    register_int_counter_vec, register_int_gauge_vec, register_uint_gauge_vec,\n};\nuse once_cell::sync::Lazy;\n\npub(crate) static INSTALLED_EXTENSIONS: Lazy<UIntGaugeVec> = Lazy::new(|| {\n    register_uint_gauge_vec!(\n        \"compute_installed_extensions\",\n        \"Number of databases where the version of extension is installed\",\n        &[\"extension_name\", \"version\", \"owned_by_superuser\"]\n    )\n    .expect(\"failed to define a metric\")\n});\n\n// Normally, any HTTP API request is described by METHOD (e.g. GET, POST, etc.) + PATH,\n// but for all our APIs we defined a 'slug'/method/operationId in the OpenAPI spec.\n// And it's fair to call it a 'RPC' (Remote Procedure Call).\npub enum CPlaneRequestRPC {\n    GetConfig,\n}\n\nimpl CPlaneRequestRPC {\n    pub fn as_str(&self) -> &str {\n        match self {\n            CPlaneRequestRPC::GetConfig => \"GetConfig\",\n        }\n    }\n}\n\npub const UNKNOWN_HTTP_STATUS: &str = \"unknown\";\n\npub(crate) static CPLANE_REQUESTS_TOTAL: Lazy<IntCounterVec> = Lazy::new(|| {\n    register_int_counter_vec!(\n        \"compute_ctl_cplane_requests_total\",\n        \"Total number of control plane requests made by compute_ctl by status\",\n        &[\"rpc\", \"http_status\"]\n    )\n    .expect(\"failed to define a metric\")\n});\n\n/// Total number of failed database migrations. Per-compute, this is actually a boolean metric,\n/// either empty or with a single value (1, migration_id) because we stop at the first failure.\n/// Yet, the sum over the fleet will provide the total number of failures.\npub(crate) static DB_MIGRATION_FAILED: Lazy<IntCounterVec> = Lazy::new(|| {\n    register_int_counter_vec!(\n        \"compute_ctl_db_migration_failed_total\",\n        \"Total number of failed database migrations\",\n        &[\"migration_id\"]\n    )\n    .expect(\"failed to define a metric\")\n});\n\npub(crate) static REMOTE_EXT_REQUESTS_TOTAL: Lazy<IntCounterVec> = Lazy::new(|| {\n    register_int_counter_vec!(\n        \"compute_ctl_remote_ext_requests_total\",\n        \"Total number of requests made by compute_ctl to download extensions from S3 proxy by status\",\n        &[\"http_status\", \"filename\"]\n    )\n    .expect(\"failed to define a metric\")\n});\n\n// Size of audit log directory in bytes\npub(crate) static AUDIT_LOG_DIR_SIZE: Lazy<GenericGauge<AtomicF64>> = Lazy::new(|| {\n    register_gauge!(\n        \"compute_audit_log_dir_size\",\n        \"Size of audit log directory in bytes\",\n    )\n    .expect(\"failed to define a metric\")\n});\n\n// Report that `compute_ctl` is up and what's the current compute status.\npub(crate) static COMPUTE_CTL_UP: Lazy<IntGaugeVec> = Lazy::new(|| {\n    register_int_gauge_vec!(\n        \"compute_ctl_up\",\n        \"Whether compute_ctl is running\",\n        &[\"build_tag\", \"status\"]\n    )\n    .expect(\"failed to define a metric\")\n});\n\npub(crate) static PG_CURR_DOWNTIME_MS: Lazy<GenericGauge<AtomicF64>> = Lazy::new(|| {\n    register_gauge!(\n        \"compute_pg_current_downtime_ms\",\n        \"Non-cumulative duration of Postgres downtime in ms; resets after successful check\",\n    )\n    .expect(\"failed to define a metric\")\n});\n\npub(crate) static PG_TOTAL_DOWNTIME_MS: Lazy<GenericCounter<AtomicU64>> = Lazy::new(|| {\n    register_int_counter!(\n        \"compute_pg_downtime_ms_total\",\n        \"Cumulative duration of Postgres downtime in ms\",\n    )\n    .expect(\"failed to define a metric\")\n});\n\npub(crate) static LFC_PREWARMS: Lazy<IntCounter> = Lazy::new(|| {\n    register_int_counter!(\n        \"compute_ctl_lfc_prewarms_total\",\n        \"Total number of LFC prewarms requested by compute_ctl or autoprewarm option\",\n    )\n    .expect(\"failed to define a metric\")\n});\n\npub(crate) static LFC_PREWARM_ERRORS: Lazy<IntCounter> = Lazy::new(|| {\n    register_int_counter!(\n        \"compute_ctl_lfc_prewarm_errors_total\",\n        \"Total number of LFC prewarm errors\",\n    )\n    .expect(\"failed to define a metric\")\n});\n\npub(crate) static LFC_OFFLOADS: Lazy<IntCounter> = Lazy::new(|| {\n    register_int_counter!(\n        \"compute_ctl_lfc_offloads_total\",\n        \"Total number of LFC offloads requested by compute_ctl or lfc_offload_period_seconds option\",\n    )\n    .expect(\"failed to define a metric\")\n});\n\npub(crate) static LFC_OFFLOAD_ERRORS: Lazy<IntCounter> = Lazy::new(|| {\n    register_int_counter!(\n        \"compute_ctl_lfc_offload_errors_total\",\n        \"Total number of LFC offload errors\",\n    )\n    .expect(\"failed to define a metric\")\n});\n\npub fn collect() -> Vec<MetricFamily> {\n    let mut metrics = COMPUTE_CTL_UP.collect();\n    metrics.extend(INSTALLED_EXTENSIONS.collect());\n    metrics.extend(CPLANE_REQUESTS_TOTAL.collect());\n    metrics.extend(REMOTE_EXT_REQUESTS_TOTAL.collect());\n    metrics.extend(DB_MIGRATION_FAILED.collect());\n    metrics.extend(AUDIT_LOG_DIR_SIZE.collect());\n    metrics.extend(PG_CURR_DOWNTIME_MS.collect());\n    metrics.extend(PG_TOTAL_DOWNTIME_MS.collect());\n    metrics.extend(LFC_PREWARMS.collect());\n    metrics.extend(LFC_PREWARM_ERRORS.collect());\n    metrics.extend(LFC_OFFLOADS.collect());\n    metrics.extend(LFC_OFFLOAD_ERRORS.collect());\n    metrics\n}\n"
  },
  {
    "path": "compute_tools/src/migration.rs",
    "content": "use anyhow::{Context, Result};\nuse fail::fail_point;\nuse tokio_postgres::{Client, Transaction};\nuse tracing::{error, info};\n\nuse crate::metrics::DB_MIGRATION_FAILED;\n\n/// Runs a series of migrations on a target database\npub(crate) struct MigrationRunner<'m> {\n    client: &'m mut Client,\n    migrations: &'m [&'m str],\n    lakebase_mode: bool,\n}\n\nimpl<'m> MigrationRunner<'m> {\n    /// Create a new migration runner\n    pub fn new(client: &'m mut Client, migrations: &'m [&'m str], lakebase_mode: bool) -> Self {\n        // The neon_migration.migration_id::id column is a bigint, which is equivalent to an i64\n        assert!(migrations.len() + 1 < i64::MAX as usize);\n\n        Self {\n            client,\n            migrations,\n            lakebase_mode,\n        }\n    }\n\n    /// Get the current value neon_migration.migration_id\n    async fn get_migration_id(&mut self) -> Result<i64> {\n        let row = self\n            .client\n            .query_one(\"SELECT id FROM neon_migration.migration_id\", &[])\n            .await?;\n\n        Ok(row.get::<&str, i64>(\"id\"))\n    }\n\n    /// Update the neon_migration.migration_id value\n    ///\n    /// This function has a fail point called compute-migration, which can be\n    /// used if you would like to fail the application of a series of migrations\n    /// at some point.\n    async fn update_migration_id(txn: &mut Transaction<'_>, migration_id: i64) -> Result<()> {\n        // We use this fail point in order to check that failing in the\n        // middle of applying a series of migrations fails in an expected\n        // manner\n        if cfg!(feature = \"testing\") {\n            let fail = (|| {\n                fail_point!(\"compute-migration\", |fail_migration_id| {\n                    migration_id == fail_migration_id.unwrap().parse::<i64>().unwrap()\n                });\n\n                false\n            })();\n\n            if fail {\n                return Err(anyhow::anyhow!(format!(\n                    \"migration {} was configured to fail because of a failpoint\",\n                    migration_id\n                )));\n            }\n        }\n\n        txn.query(\n            \"UPDATE neon_migration.migration_id SET id = $1\",\n            &[&migration_id],\n        )\n        .await\n        .with_context(|| format!(\"update neon_migration.migration_id to {migration_id}\"))?;\n\n        Ok(())\n    }\n\n    /// Prepare the migrations the target database for handling migrations\n    async fn prepare_database(&mut self) -> Result<()> {\n        self.client\n            .simple_query(\"CREATE SCHEMA IF NOT EXISTS neon_migration\")\n            .await?;\n        self.client.simple_query(\"CREATE TABLE IF NOT EXISTS neon_migration.migration_id (key pg_catalog.int4 NOT NULL PRIMARY KEY, id pg_catalog.int8 NOT NULL DEFAULT 0)\").await?;\n        self.client\n            .simple_query(\n                \"INSERT INTO neon_migration.migration_id VALUES (0, 0) ON CONFLICT DO NOTHING\",\n            )\n            .await?;\n        self.client\n            .simple_query(\"ALTER SCHEMA neon_migration OWNER TO cloud_admin\")\n            .await?;\n        self.client\n            .simple_query(\"REVOKE ALL ON SCHEMA neon_migration FROM PUBLIC\")\n            .await?;\n\n        Ok(())\n    }\n\n    /// Run an individual migration in a separate transaction block.\n    async fn run_migration(client: &mut Client, migration_id: i64, migration: &str) -> Result<()> {\n        let mut txn = client\n            .transaction()\n            .await\n            .with_context(|| format!(\"begin transaction for migration {migration_id}\"))?;\n\n        if migration.starts_with(\"-- SKIP\") {\n            info!(\"Skipping migration id={}\", migration_id);\n\n            // Even though we are skipping the migration, updating the\n            // migration ID should help keep logic easy to understand when\n            // trying to understand the state of a cluster.\n            Self::update_migration_id(&mut txn, migration_id).await?;\n        } else {\n            info!(\"Running migration id={}:\\n{}\\n\", migration_id, migration);\n\n            txn.simple_query(migration)\n                .await\n                .with_context(|| format!(\"apply migration {migration_id}\"))?;\n\n            Self::update_migration_id(&mut txn, migration_id).await?;\n        }\n\n        txn.commit()\n            .await\n            .with_context(|| format!(\"commit transaction for migration {migration_id}\"))?;\n\n        Ok(())\n    }\n\n    /// Run the configured set of migrations\n    pub async fn run_migrations(mut self) -> Result<()> {\n        self.prepare_database()\n            .await\n            .context(\"prepare database to handle migrations\")?;\n\n        let mut current_migration = self.get_migration_id().await? as usize;\n        while current_migration < self.migrations.len() {\n            // The index lags the migration ID by 1, so the current migration\n            // ID is also the next index\n            let migration_id = (current_migration + 1) as i64;\n            let migration = self.migrations[current_migration];\n            let migration = if self.lakebase_mode {\n                migration.replace(\"neon_superuser\", \"databricks_superuser\")\n            } else {\n                migration.to_string()\n            };\n\n            match Self::run_migration(self.client, migration_id, &migration).await {\n                Ok(_) => {\n                    info!(\"Finished migration id={}\", migration_id);\n                }\n                Err(e) => {\n                    error!(\"Failed to run migration id={}: {:?}\", migration_id, e);\n                    DB_MIGRATION_FAILED\n                        .with_label_values(&[migration_id.to_string().as_str()])\n                        .inc();\n                    return Err(e);\n                }\n            }\n\n            current_migration += 1;\n        }\n\n        Ok(())\n    }\n}\n"
  },
  {
    "path": "compute_tools/src/migrations/0001-add_bypass_rls_to_privileged_role.sql",
    "content": "ALTER ROLE {privileged_role_name} BYPASSRLS;\n"
  },
  {
    "path": "compute_tools/src/migrations/0002-alter_roles.sql",
    "content": "-- On December 8th, 2023, an engineering escalation (INC-110) was opened after\n-- it was found that BYPASSRLS was being applied to all roles.\n--\n-- PR that introduced the issue: https://github.com/neondatabase/neon/pull/5657\n-- Subsequent commit on main: https://github.com/neondatabase/neon/commit/ad99fa5f0393e2679e5323df653c508ffa0ac072\n--\n-- NOBYPASSRLS and INHERIT are the defaults for a Postgres role, but because it\n-- isn't easy to know if a Postgres cluster is affected by the issue, we need to\n-- keep the migration around for a long time, if not indefinitely, so any\n-- cluster can be fixed.\n--\n-- Branching is the gift that keeps on giving...\n\nDO $$\nDECLARE\n    role_name text;\nBEGIN\n    FOR role_name IN SELECT rolname FROM pg_catalog.pg_roles WHERE pg_catalog.pg_has_role(rolname, '{privileged_role_name}', 'member')\n    LOOP\n        RAISE NOTICE 'EXECUTING ALTER ROLE % INHERIT', pg_catalog.quote_ident(role_name);\n        EXECUTE pg_catalog.format('ALTER ROLE %I INHERIT;', role_name);\n    END LOOP;\n\n    FOR role_name IN SELECT rolname FROM pg_catalog.pg_roles\n        WHERE\n            NOT pg_catalog.pg_has_role(rolname, '{privileged_role_name}', 'member') AND NOT pg_catalog.starts_with(rolname, 'pg_')\n    LOOP\n        RAISE NOTICE 'EXECUTING ALTER ROLE % NOBYPASSRLS', pg_catalog.quote_ident(role_name);\n        EXECUTE pg_catalog.format('ALTER ROLE %I NOBYPASSRLS;', role_name);\n    END LOOP;\nEND $$;\n"
  },
  {
    "path": "compute_tools/src/migrations/0003-grant_pg_create_subscription_to_privileged_role.sql",
    "content": "DO $$\nBEGIN\n    IF (SELECT setting::pg_catalog.numeric >= 160000 FROM pg_catalog.pg_settings WHERE name = 'server_version_num') THEN\n        EXECUTE 'GRANT pg_create_subscription TO {privileged_role_name}';\n    END IF;\nEND $$;\n"
  },
  {
    "path": "compute_tools/src/migrations/0004-grant_pg_monitor_to_privileged_role.sql",
    "content": "GRANT pg_monitor TO {privileged_role_name} WITH ADMIN OPTION;\n"
  },
  {
    "path": "compute_tools/src/migrations/0005-grant_all_on_tables_to_privileged_role.sql",
    "content": "-- SKIP: Deemed insufficient for allowing relations created by extensions to be\n--       interacted with by {privileged_role_name} without permission issues.\n\nALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON TABLES TO {privileged_role_name};\n"
  },
  {
    "path": "compute_tools/src/migrations/0006-grant_all_on_sequences_to_privileged_role.sql",
    "content": "-- SKIP: Deemed insufficient for allowing relations created by extensions to be\n--       interacted with by {privileged_role_name} without permission issues.\n\nALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON SEQUENCES TO {privileged_role_name};\n"
  },
  {
    "path": "compute_tools/src/migrations/0007-grant_all_on_tables_with_grant_option_to_privileged_role.sql",
    "content": "-- SKIP: Moved inline to the handle_grants() functions.\n\nALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON TABLES TO {privileged_role_name} WITH GRANT OPTION;\n"
  },
  {
    "path": "compute_tools/src/migrations/0008-grant_all_on_sequences_with_grant_option_to_privileged_role.sql",
    "content": "-- SKIP: Moved inline to the handle_grants() functions.\n\nALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON SEQUENCES TO {privileged_role_name} WITH GRANT OPTION;\n"
  },
  {
    "path": "compute_tools/src/migrations/0009-revoke_replication_for_previously_allowed_roles.sql",
    "content": "-- SKIP: The original goal of this migration was to prevent creating\n--       subscriptions, but this migration was insufficient.\n\nDO $$\nDECLARE\n    role_name TEXT;\nBEGIN\n    FOR role_name IN SELECT rolname FROM pg_catalog.pg_roles WHERE rolreplication IS TRUE\n    LOOP\n        RAISE NOTICE 'EXECUTING ALTER ROLE % NOREPLICATION', pg_catalog.quote_ident(role_name);\n        EXECUTE pg_catalog.format('ALTER ROLE %I NOREPLICATION;', role_name);\n    END LOOP;\nEND $$;\n"
  },
  {
    "path": "compute_tools/src/migrations/0010-grant_snapshot_synchronization_funcs_to_privileged_role.sql",
    "content": "DO $$\nBEGIN\n    IF (SELECT setting::pg_catalog.numeric >= 160000 FROM pg_catalog.pg_settings WHERE name OPERATOR(pg_catalog.=) 'server_version_num'::pg_catalog.text) THEN\n       EXECUTE 'GRANT EXECUTE ON FUNCTION pg_export_snapshot TO {privileged_role_name}';\n       EXECUTE 'GRANT EXECUTE ON FUNCTION pg_log_standby_snapshot TO {privileged_role_name}';\n    END IF;\nEND $$;\n"
  },
  {
    "path": "compute_tools/src/migrations/0011-grant_pg_show_replication_origin_status_to_privileged_role.sql",
    "content": "GRANT EXECUTE ON FUNCTION pg_show_replication_origin_status TO {privileged_role_name};\n"
  },
  {
    "path": "compute_tools/src/migrations/0012-grant_pg_signal_backend_to_privileged_role.sql",
    "content": "GRANT pg_signal_backend TO {privileged_role_name} WITH ADMIN OPTION;\n"
  },
  {
    "path": "compute_tools/src/migrations/tests/0001-add_bypass_rls_to_privileged_role.sql",
    "content": "DO $$\nDECLARE\n    bypassrls boolean;\nBEGIN\n    SELECT rolbypassrls INTO bypassrls FROM pg_catalog.pg_roles WHERE rolname = 'neon_superuser';\n    IF NOT bypassrls THEN\n        RAISE EXCEPTION 'neon_superuser cannot bypass RLS';\n    END IF;\nEND $$;\n"
  },
  {
    "path": "compute_tools/src/migrations/tests/0002-alter_roles.sql",
    "content": "DO $$\nDECLARE\n    role record;\nBEGIN\n    FOR role IN\n        SELECT rolname AS name, rolinherit AS inherit\n        FROM pg_catalog.pg_roles\n        WHERE pg_catalog.pg_has_role(rolname, 'neon_superuser', 'member')\n    LOOP\n        IF NOT role.inherit THEN\n            RAISE EXCEPTION '% cannot inherit', quote_ident(role.name);\n        END IF;\n    END LOOP;\n\n    FOR role IN\n        SELECT rolname AS name, rolbypassrls AS bypassrls\n        FROM pg_catalog.pg_roles\n        WHERE NOT pg_catalog.pg_has_role(rolname, 'neon_superuser', 'member')\n            AND NOT pg_catalog.starts_with(rolname, 'pg_')\n    LOOP\n        IF role.bypassrls THEN\n            RAISE EXCEPTION  '% can bypass RLS', pg_catalog.quote_ident(role.name);\n        END IF;\n    END LOOP;\nEND $$;\n"
  },
  {
    "path": "compute_tools/src/migrations/tests/0003-grant_pg_create_subscription_to_privileged_role.sql",
    "content": "DO $$\nBEGIN\n    IF (SELECT pg_catalog.current_setting('server_version_num')::pg_catalog.numeric < 160000) THEN\n        RETURN;\n    END IF;\n\n    IF NOT (SELECT pg_catalog.pg_has_role('neon_superuser', 'pg_create_subscription', 'member')) THEN\n        RAISE EXCEPTION 'neon_superuser cannot execute pg_create_subscription';\n    END IF;\nEND $$;\n"
  },
  {
    "path": "compute_tools/src/migrations/tests/0004-grant_pg_monitor_to_privileged_role.sql",
    "content": "DO $$\nDECLARE\n    monitor record;\nBEGIN\n    SELECT pg_catalog.pg_has_role('neon_superuser', 'pg_monitor', 'member') AS member,\n            admin_option AS admin\n        INTO monitor\n        FROM pg_catalog.pg_auth_members\n        WHERE roleid = 'pg_monitor'::pg_catalog.regrole\n            AND member = 'neon_superuser'::pg_catalog.regrole;\n\n    IF monitor IS NULL THEN\n        RAISE EXCEPTION 'no entry in pg_auth_members for neon_superuser and pg_monitor';\n    END IF;\n\n    IF monitor.admin IS NULL OR NOT monitor.member THEN\n        RAISE EXCEPTION 'neon_superuser is not a member of pg_monitor';\n    END IF;\n\n    IF monitor.admin IS NULL OR NOT monitor.admin THEN\n        RAISE EXCEPTION 'neon_superuser cannot grant pg_monitor';\n    END IF;\nEND $$;\n"
  },
  {
    "path": "compute_tools/src/migrations/tests/0005-grant_all_on_tables_to_privileged_role.sql",
    "content": "-- This test was never written becuase at the time migration tests were added\n-- the accompanying migration was already skipped.\n"
  },
  {
    "path": "compute_tools/src/migrations/tests/0006-grant_all_on_sequences_to_privileged_role.sql",
    "content": "-- This test was never written becuase at the time migration tests were added\n-- the accompanying migration was already skipped.\n"
  },
  {
    "path": "compute_tools/src/migrations/tests/0007-grant_all_on_tables_with_grant_option_to_privileged_role.sql",
    "content": "-- This test was never written becuase at the time migration tests were added\n-- the accompanying migration was already skipped.\n"
  },
  {
    "path": "compute_tools/src/migrations/tests/0008-grant_all_on_sequences_with_grant_option_to_privileged_role.sql",
    "content": "-- This test was never written becuase at the time migration tests were added\n-- the accompanying migration was already skipped.\n"
  },
  {
    "path": "compute_tools/src/migrations/tests/0009-revoke_replication_for_previously_allowed_roles.sql",
    "content": "-- This test was never written becuase at the time migration tests were added\n-- the accompanying migration was already skipped.\n"
  },
  {
    "path": "compute_tools/src/migrations/tests/0010-grant_snapshot_synchronization_funcs_to_privileged_role.sql",
    "content": "DO $$\nDECLARE\n    can_execute boolean;\nBEGIN\n    SELECT pg_catalog.bool_and(pg_catalog.has_function_privilege('neon_superuser', oid, 'execute'))\n       INTO can_execute\n       FROM pg_catalog.pg_proc\n       WHERE proname IN ('pg_export_snapshot', 'pg_log_standby_snapshot')\n           AND pronamespace = 'pg_catalog'::pg_catalog.regnamespace;\n    IF NOT can_execute THEN\n        RAISE EXCEPTION 'neon_superuser cannot execute both pg_export_snapshot and pg_log_standby_snapshot';\n    END IF;\nEND $$;\n"
  },
  {
    "path": "compute_tools/src/migrations/tests/0011-grant_pg_show_replication_origin_status_to_privileged_role.sql",
    "content": "DO $$\nDECLARE\n    can_execute boolean;\nBEGIN\n    SELECT pg_catalog.has_function_privilege('neon_superuser', oid, 'execute')\n       INTO can_execute\n       FROM pg_catalog.pg_proc\n       WHERE proname = 'pg_show_replication_origin_status'\n           AND pronamespace = 'pg_catalog'::regnamespace;\n    IF NOT can_execute THEN\n        RAISE EXCEPTION 'neon_superuser cannot execute pg_show_replication_origin_status';\n    END IF;\nEND $$;\n"
  },
  {
    "path": "compute_tools/src/migrations/tests/0012-grant_pg_signal_backend_to_privileged_role.sql",
    "content": "DO $$\nDECLARE\n    signal_backend record;\nBEGIN\n    SELECT pg_catalog.pg_has_role('neon_superuser', 'pg_signal_backend', 'member') AS member,\n            admin_option AS admin\n        INTO signal_backend\n        FROM pg_catalog.pg_auth_members\n        WHERE roleid = 'pg_signal_backend'::regrole\n            AND member = 'neon_superuser'::regrole;\n\n    IF signal_backend IS NULL THEN\n        RAISE EXCEPTION 'no entry in pg_auth_members for neon_superuser and pg_signal_backend';\n    END IF;\n\n    IF signal_backend.member IS NULL OR NOT signal_backend.member THEN\n        RAISE EXCEPTION 'neon_superuser is not a member of pg_signal_backend';\n    END IF;\n\n    IF signal_backend.admin IS NULL OR NOT signal_backend.admin THEN\n        RAISE EXCEPTION 'neon_superuser cannot grant pg_signal_backend';\n    END IF;\nEND $$;\n"
  },
  {
    "path": "compute_tools/src/monitor.rs",
    "content": "use std::sync::Arc;\nuse std::thread;\nuse std::time::Duration;\n\nuse chrono::{DateTime, Utc};\nuse compute_api::responses::ComputeStatus;\nuse compute_api::spec::ComputeFeature;\nuse postgres::{Client, NoTls};\nuse tracing::{Level, error, info, instrument, span};\n\nuse crate::compute::ComputeNode;\nuse crate::metrics::{PG_CURR_DOWNTIME_MS, PG_TOTAL_DOWNTIME_MS};\n\nconst PG_DEFAULT_INIT_TIMEOUIT: Duration = Duration::from_secs(60);\nconst MONITOR_CHECK_INTERVAL: Duration = Duration::from_millis(500);\n\n/// Struct to store runtime state of the compute monitor thread.\n/// In theory, this could be a part of `Compute`, but i)\n/// this state is expected to be accessed only by single thread,\n/// so we don't need to care about locking; ii) `Compute` is\n/// already quite big. Thus, it seems to be a good idea to keep\n/// all the activity/health monitoring parts here.\nstruct ComputeMonitor {\n    compute: Arc<ComputeNode>,\n\n    /// The moment when Postgres had some activity,\n    /// that should prevent compute from being suspended.\n    last_active: Option<DateTime<Utc>>,\n\n    /// The moment when we last tried to check Postgres.\n    last_checked: DateTime<Utc>,\n    /// The last moment we did a successful Postgres check.\n    last_up: DateTime<Utc>,\n\n    /// Only used for internal statistics change tracking\n    /// between monitor runs and can be outdated.\n    active_time: Option<f64>,\n    /// Only used for internal statistics change tracking\n    /// between monitor runs and can be outdated.\n    sessions: Option<i64>,\n\n    /// Use experimental statistics-based activity monitor. It's no longer\n    /// 'experimental' per se, as it's enabled for everyone, but we still\n    /// keep the flag as an option to turn it off in some cases if it will\n    /// misbehave.\n    experimental: bool,\n}\n\nimpl ComputeMonitor {\n    fn report_down(&self) {\n        let now = Utc::now();\n\n        // Calculate and report current downtime\n        // (since the last time Postgres was up)\n        let downtime = now.signed_duration_since(self.last_up);\n        PG_CURR_DOWNTIME_MS.set(downtime.num_milliseconds() as f64);\n\n        // Calculate and update total downtime\n        // (cumulative duration of Postgres downtime in ms)\n        let inc = now\n            .signed_duration_since(self.last_checked)\n            .num_milliseconds();\n        PG_TOTAL_DOWNTIME_MS.inc_by(inc as u64);\n    }\n\n    fn report_up(&mut self) {\n        self.last_up = Utc::now();\n        PG_CURR_DOWNTIME_MS.set(0.0);\n    }\n\n    fn downtime_info(&self) -> String {\n        format!(\n            \"total_ms: {}, current_ms: {}, last_up: {}\",\n            PG_TOTAL_DOWNTIME_MS.get(),\n            PG_CURR_DOWNTIME_MS.get(),\n            self.last_up\n        )\n    }\n\n    /// Check if compute is in some terminal or soon-to-be-terminal\n    /// state, then return `true`, signalling the caller that it\n    /// should exit gracefully. Otherwise, return `false`.\n    fn check_interrupts(&mut self) -> bool {\n        let compute_status = self.compute.get_status();\n        if matches!(\n            compute_status,\n            ComputeStatus::Terminated\n                | ComputeStatus::TerminationPendingFast\n                | ComputeStatus::TerminationPendingImmediate\n                | ComputeStatus::Failed\n        ) {\n            info!(\n                \"compute is in {} status, stopping compute monitor\",\n                compute_status\n            );\n            return true;\n        }\n\n        false\n    }\n\n    /// Spin in a loop and figure out the last activity time in the Postgres.\n    /// Then update it in the shared state. This function currently never\n    /// errors out explicitly, but there is a graceful termination path.\n    /// Every time we receive an error trying to check Postgres, we use\n    /// [`ComputeMonitor::check_interrupts()`] because it could be that\n    /// compute is being terminated already, then we can exit gracefully\n    /// to not produce errors' noise in the log.\n    /// NB: the only expected panic is at `Mutex` unwrap(), all other errors\n    /// should be handled gracefully.\n    #[instrument(skip_all)]\n    pub fn run(&mut self) -> anyhow::Result<()> {\n        // Suppose that `connstr` doesn't change\n        let connstr = self.compute.params.connstr.clone();\n        let conf = self\n            .compute\n            .get_conn_conf(Some(\"compute_ctl:compute_monitor\"));\n\n        // During startup and configuration we connect to every Postgres database,\n        // but we don't want to count this as some user activity. So wait until\n        // the compute fully started before monitoring activity.\n        wait_for_postgres_start(&self.compute);\n\n        // Define `client` outside of the loop to reuse existing connection if it's active.\n        let mut client = conf.connect(NoTls);\n\n        info!(\"starting compute monitor for {}\", connstr);\n\n        loop {\n            if self.check_interrupts() {\n                break;\n            }\n\n            match &mut client {\n                Ok(cli) => {\n                    if cli.is_closed() {\n                        info!(\n                            downtime_info = self.downtime_info(),\n                            \"connection to Postgres is closed, trying to reconnect\"\n                        );\n                        if self.check_interrupts() {\n                            break;\n                        }\n\n                        self.report_down();\n\n                        // Connection is closed, reconnect and try again.\n                        client = conf.connect(NoTls);\n                    } else {\n                        match self.check(cli) {\n                            Ok(_) => {\n                                self.report_up();\n                                self.compute.update_last_active(self.last_active);\n                            }\n                            Err(e) => {\n                                error!(\n                                    downtime_info = self.downtime_info(),\n                                    \"could not check Postgres: {}\", e\n                                );\n                                if self.check_interrupts() {\n                                    break;\n                                }\n\n                                // Although we have many places where we can return errors in `check()`,\n                                // normally it shouldn't happen. I.e., we will likely return error if\n                                // connection got broken, query timed out, Postgres returned invalid data, etc.\n                                // In all such cases it's suspicious, so let's report this as downtime.\n                                self.report_down();\n\n                                // Reconnect to Postgres just in case. During tests, I noticed\n                                // that queries in `check()` can fail with `connection closed`,\n                                // but `cli.is_closed()` above doesn't detect it. Even if old\n                                // connection is still alive, it will be dropped when we reassign\n                                // `client` to a new connection.\n                                client = conf.connect(NoTls);\n                            }\n                        }\n                    }\n                }\n                Err(e) => {\n                    info!(\n                        downtime_info = self.downtime_info(),\n                        \"could not connect to Postgres: {}, retrying\", e\n                    );\n                    if self.check_interrupts() {\n                        break;\n                    }\n\n                    self.report_down();\n\n                    // Establish a new connection and try again.\n                    client = conf.connect(NoTls);\n                }\n            }\n\n            // Reset the `last_checked` timestamp and sleep before the next iteration.\n            self.last_checked = Utc::now();\n            thread::sleep(MONITOR_CHECK_INTERVAL);\n        }\n\n        // Graceful termination path\n        Ok(())\n    }\n\n    #[instrument(skip_all)]\n    fn check(&mut self, cli: &mut Client) -> anyhow::Result<()> {\n        // This is new logic, only enable if the feature flag is set.\n        // TODO: remove this once we are sure that it works OR drop it altogether.\n        if self.experimental {\n            // Check if the total active time or sessions across all databases has changed.\n            // If it did, it means that user executed some queries. In theory, it can even go down if\n            // some databases were dropped, but it's still user activity.\n            match get_database_stats(cli) {\n                Ok((active_time, sessions)) => {\n                    let mut detected_activity = false;\n\n                    if let Some(prev_active_time) = self.active_time {\n                        if active_time != prev_active_time {\n                            detected_activity = true;\n                        }\n                    }\n                    self.active_time = Some(active_time);\n\n                    if let Some(prev_sessions) = self.sessions {\n                        if sessions != prev_sessions {\n                            detected_activity = true;\n                        }\n                    }\n                    self.sessions = Some(sessions);\n\n                    if detected_activity {\n                        // Update the last active time and continue, we don't need to\n                        // check backends state change.\n                        self.last_active = Some(Utc::now());\n                        return Ok(());\n                    }\n                }\n                Err(e) => {\n                    return Err(anyhow::anyhow!(\"could not get database statistics: {}\", e));\n                }\n            }\n        }\n\n        // If database statistics are the same, check all backends for state changes.\n        // Maybe there are some with more recent activity. `get_backends_state_change()`\n        // can return None or stale timestamp, so it's `compute.update_last_active()`\n        // responsibility to check if the new timestamp is more recent than the current one.\n        // This helps us to discover new sessions that have not done anything yet.\n        match get_backends_state_change(cli) {\n            Ok(last_active) => match (last_active, self.last_active) {\n                (Some(last_active), Some(prev_last_active)) => {\n                    if last_active > prev_last_active {\n                        self.last_active = Some(last_active);\n                        return Ok(());\n                    }\n                }\n                (Some(last_active), None) => {\n                    self.last_active = Some(last_active);\n                    return Ok(());\n                }\n                _ => {}\n            },\n            Err(e) => {\n                return Err(anyhow::anyhow!(\n                    \"could not get backends state change: {}\",\n                    e\n                ));\n            }\n        }\n\n        // If there are existing (logical) walsenders, do not suspend.\n        //\n        // N.B. walproposer doesn't currently show up in pg_stat_replication,\n        // but protect if it will.\n        const WS_COUNT_QUERY: &str =\n            \"select count(*) from pg_stat_replication where application_name != 'walproposer';\";\n        match cli.query_one(WS_COUNT_QUERY, &[]) {\n            Ok(r) => match r.try_get::<&str, i64>(\"count\") {\n                Ok(num_ws) => {\n                    if num_ws > 0 {\n                        self.last_active = Some(Utc::now());\n                        return Ok(());\n                    }\n                }\n                Err(e) => {\n                    let err: anyhow::Error = e.into();\n                    return Err(err.context(\"failed to parse walsenders count\"));\n                }\n            },\n            Err(e) => {\n                return Err(anyhow::anyhow!(\"failed to get list of walsenders: {}\", e));\n            }\n        }\n\n        // Don't suspend compute if there is an active logical replication subscription\n        //\n        // `where pid is not null` – to filter out read only computes and subscription on branches\n        const LOGICAL_SUBSCRIPTIONS_QUERY: &str =\n            \"select count(*) from pg_stat_subscription where pid is not null;\";\n        match cli.query_one(LOGICAL_SUBSCRIPTIONS_QUERY, &[]) {\n            Ok(row) => match row.try_get::<&str, i64>(\"count\") {\n                Ok(num_subscribers) => {\n                    if num_subscribers > 0 {\n                        self.last_active = Some(Utc::now());\n                        return Ok(());\n                    }\n                }\n                Err(e) => {\n                    return Err(anyhow::anyhow!(\n                        \"failed to parse 'pg_stat_subscription' count: {}\",\n                        e\n                    ));\n                }\n            },\n            Err(e) => {\n                return Err(anyhow::anyhow!(\n                    \"failed to get list of active logical replication subscriptions: {}\",\n                    e\n                ));\n            }\n        }\n\n        // Do not suspend compute if autovacuum is running\n        const AUTOVACUUM_COUNT_QUERY: &str =\n            \"select count(*) from pg_stat_activity where backend_type = 'autovacuum worker'\";\n        match cli.query_one(AUTOVACUUM_COUNT_QUERY, &[]) {\n            Ok(r) => match r.try_get::<&str, i64>(\"count\") {\n                Ok(num_workers) => {\n                    if num_workers > 0 {\n                        self.last_active = Some(Utc::now());\n                        return Ok(());\n                    };\n                }\n                Err(e) => {\n                    return Err(anyhow::anyhow!(\n                        \"failed to parse autovacuum workers count: {}\",\n                        e\n                    ));\n                }\n            },\n            Err(e) => {\n                return Err(anyhow::anyhow!(\n                    \"failed to get list of autovacuum workers: {}\",\n                    e\n                ));\n            }\n        }\n\n        Ok(())\n    }\n}\n\n// Hang on condition variable waiting until the compute status is `Running`.\nfn wait_for_postgres_start(compute: &ComputeNode) {\n    let mut state = compute.state.lock().unwrap();\n    let pg_init_timeout = compute\n        .params\n        .pg_init_timeout\n        .unwrap_or(PG_DEFAULT_INIT_TIMEOUIT);\n\n    while state.status != ComputeStatus::Running {\n        info!(\"compute is not running, waiting before monitoring activity\");\n        if !compute.params.lakebase_mode {\n            state = compute.state_changed.wait(state).unwrap();\n\n            if state.status == ComputeStatus::Running {\n                break;\n            }\n            continue;\n        }\n\n        if state.pg_start_time.is_some()\n            && Utc::now()\n                .signed_duration_since(state.pg_start_time.unwrap())\n                .to_std()\n                .unwrap_or_default()\n                > pg_init_timeout\n        {\n            // If Postgres isn't up and running with working PS/SK connections within POSTGRES_STARTUP_TIMEOUT, it is\n            // possible that we started Postgres with a wrong spec (so it is talking to the wrong PS/SK nodes). To prevent\n            // deadends we simply exit (panic) the compute node so it can restart with the latest spec.\n            //\n            // NB: We skip this check if we have not attempted to start PG yet (indicated by state.pg_start_up == None).\n            // This is to make sure the more appropriate errors are surfaced if we encounter issues before we even attempt\n            // to start PG (e.g., if we can't pull the spec, can't sync safekeepers, or can't get the basebackup).\n            error!(\n                \"compute did not enter Running state in {} seconds, exiting\",\n                pg_init_timeout.as_secs()\n            );\n            std::process::exit(1);\n        }\n        state = compute\n            .state_changed\n            .wait_timeout(state, Duration::from_secs(5))\n            .unwrap()\n            .0;\n    }\n}\n\n// Figure out the total active time and sessions across all non-system databases.\n// Returned tuple is `(active_time, sessions)`.\n// It can return `0.0` active time or `0` sessions, which means no user databases exist OR\n// it was a start with skipped `pg_catalog` updates and user didn't do any queries\n// (or open any sessions) yet.\nfn get_database_stats(cli: &mut Client) -> anyhow::Result<(f64, i64)> {\n    // Filter out `postgres` database as `compute_ctl` and other monitoring tools\n    // like `postgres_exporter` use it to query Postgres statistics.\n    // Use explicit 8 bytes type casts to match Rust types.\n    let stats = cli.query_one(\n        \"SELECT pg_catalog.coalesce(pg_catalog.sum(active_time), 0.0)::pg_catalog.float8 AS total_active_time,\n            pg_catalog.coalesce(pg_catalog.sum(sessions), 0)::pg_catalog.bigint AS total_sessions\n        FROM pg_catalog.pg_stat_database\n        WHERE datname NOT IN (\n                'postgres',\n                'template0',\n                'template1'\n            );\",\n        &[],\n    );\n    let stats = match stats {\n        Ok(stats) => stats,\n        Err(e) => {\n            return Err(anyhow::anyhow!(\"could not query active_time: {}\", e));\n        }\n    };\n\n    let active_time: f64 = match stats.try_get(\"total_active_time\") {\n        Ok(active_time) => active_time,\n        Err(e) => return Err(anyhow::anyhow!(\"could not get total_active_time: {}\", e)),\n    };\n\n    let sessions: i64 = match stats.try_get(\"total_sessions\") {\n        Ok(sessions) => sessions,\n        Err(e) => return Err(anyhow::anyhow!(\"could not get total_sessions: {}\", e)),\n    };\n\n    Ok((active_time, sessions))\n}\n\n// Figure out the most recent state change time across all client backends.\n// If there is currently active backend, timestamp will be `Utc::now()`.\n// It can return `None`, which means no client backends exist or we were\n// unable to parse the timestamp.\nfn get_backends_state_change(cli: &mut Client) -> anyhow::Result<Option<DateTime<Utc>>> {\n    let mut last_active: Option<DateTime<Utc>> = None;\n    // Get all running client backends except ourself, use RFC3339 DateTime format.\n    let backends = cli.query(\n        \"SELECT state, pg_catalog.to_char(state_change, 'YYYY-MM-DD\\\"T\\\"HH24:MI:SS.US\\\"Z\\\"'::pg_catalog.text) AS state_change\n                FROM pg_stat_activity\n                    WHERE backend_type OPERATOR(pg_catalog.=) 'client backend'::pg_catalog.text\n                    AND pid OPERATOR(pg_catalog.!=) pg_catalog.pg_backend_pid()\n                    AND usename OPERATOR(pg_catalog.!=) 'cloud_admin'::pg_catalog.name;\", // XXX: find a better way to filter other monitors?\n        &[],\n    );\n\n    match backends {\n        Ok(backs) => {\n            let mut idle_backs: Vec<DateTime<Utc>> = vec![];\n\n            for b in backs.into_iter() {\n                let state: String = match b.try_get(\"state\") {\n                    Ok(state) => state,\n                    Err(_) => continue,\n                };\n\n                if state == \"idle\" {\n                    let change: String = match b.try_get(\"state_change\") {\n                        Ok(state_change) => state_change,\n                        Err(_) => continue,\n                    };\n                    let change = DateTime::parse_from_rfc3339(&change);\n                    match change {\n                        Ok(t) => idle_backs.push(t.with_timezone(&Utc)),\n                        Err(e) => {\n                            info!(\"cannot parse backend state_change DateTime: {}\", e);\n                            continue;\n                        }\n                    }\n                } else {\n                    // Found non-idle backend, so the last activity is NOW.\n                    // Return immediately, no need to check other backends.\n                    return Ok(Some(Utc::now()));\n                }\n            }\n\n            // Get idle backend `state_change` with the max timestamp.\n            if let Some(last) = idle_backs.iter().max() {\n                last_active = Some(*last);\n            }\n        }\n        Err(e) => {\n            return Err(anyhow::anyhow!(\"could not query backends: {}\", e));\n        }\n    }\n\n    Ok(last_active)\n}\n\n/// Launch a separate compute monitor thread and return its `JoinHandle`.\npub fn launch_monitor(compute: &Arc<ComputeNode>) -> thread::JoinHandle<()> {\n    let compute = Arc::clone(compute);\n    let experimental = compute.has_feature(ComputeFeature::ActivityMonitorExperimental);\n    let now = Utc::now();\n    let mut monitor = ComputeMonitor {\n        compute,\n        last_active: None,\n        last_checked: now,\n        last_up: now,\n        active_time: None,\n        sessions: None,\n        experimental,\n    };\n\n    thread::Builder::new()\n        .name(\"compute-monitor\".into())\n        .spawn(move || {\n            let span = span!(Level::INFO, \"compute_monitor\");\n            let _enter = span.enter();\n            match monitor.run() {\n                Ok(_) => info!(\"compute monitor thread terminated gracefully\"),\n                Err(err) => error!(\"compute monitor thread terminated abnormally {:?}\", err),\n            }\n        })\n        .expect(\"cannot launch compute monitor thread\")\n}\n"
  },
  {
    "path": "compute_tools/src/params.rs",
    "content": "pub const DEFAULT_LOG_LEVEL: &str = \"info\";\n// From Postgres docs:\n//   To ease transition from the md5 method to the newer SCRAM method, if md5 is specified\n//   as a method in pg_hba.conf but the user's password on the server is encrypted for SCRAM\n//   (see below), then SCRAM-based authentication will automatically be chosen instead.\n//   https://www.postgresql.org/docs/15/auth-password.html\n//\n// So it's safe to set md5 here, as `control-plane` anyway uses SCRAM for all roles.\npub const PG_HBA_ALL_MD5: &str = \"host\\tall\\t\\tall\\t\\tall\\t\\tmd5\";\n"
  },
  {
    "path": "compute_tools/src/pg_helpers.rs",
    "content": "use std::collections::HashMap;\nuse std::fmt::Write;\nuse std::fs;\nuse std::fs::File;\nuse std::io::{BufRead, BufReader};\nuse std::os::unix::fs::PermissionsExt;\nuse std::path::Path;\nuse std::process::Child;\nuse std::str::FromStr;\nuse std::time::{Duration, Instant};\n\nuse anyhow::{Result, bail};\nuse compute_api::responses::TlsConfig;\nuse compute_api::spec::{\n    Database, DatabricksSettings, GenericOption, GenericOptions, PgIdent, Role,\n};\nuse futures::StreamExt;\nuse indexmap::IndexMap;\nuse ini::Ini;\nuse notify::{RecursiveMode, Watcher};\nuse postgres::config::Config;\nuse tokio::io::AsyncBufReadExt;\nuse tokio::task::JoinHandle;\nuse tokio::time::timeout;\nuse tokio_postgres;\nuse tokio_postgres::NoTls;\nuse tracing::{debug, error, info, instrument};\n\nconst POSTGRES_WAIT_TIMEOUT: Duration = Duration::from_millis(60 * 1000); // milliseconds\n\n/// Escape a string for including it in a SQL literal.\n///\n/// Wrapping the result with `E'{}'` or `'{}'` is not required,\n/// as it returns a ready-to-use SQL string literal, e.g. `'db'''` or `E'db\\\\'`.\n/// See <https://github.com/postgres/postgres/blob/da98d005cdbcd45af563d0c4ac86d0e9772cd15f/src/backend/utils/adt/quote.c#L47>\n/// for the original implementation.\npub fn escape_literal(s: &str) -> String {\n    let res = s.replace('\\'', \"''\").replace('\\\\', \"\\\\\\\\\");\n\n    if res.contains('\\\\') {\n        format!(\"E'{res}'\")\n    } else {\n        format!(\"'{res}'\")\n    }\n}\n\n/// Escape a string so that it can be used in postgresql.conf. Wrapping the result\n/// with `'{}'` is not required, as it returns a ready-to-use config string.\npub fn escape_conf_value(s: &str) -> String {\n    let res = s.replace('\\'', \"''\").replace('\\\\', \"\\\\\\\\\");\n    format!(\"'{res}'\")\n}\n\npub trait GenericOptionExt {\n    fn to_pg_option(&self) -> String;\n    fn to_pg_setting(&self) -> String;\n}\n\nimpl GenericOptionExt for GenericOption {\n    /// Represent `GenericOption` as SQL statement parameter.\n    fn to_pg_option(&self) -> String {\n        if let Some(val) = &self.value {\n            match self.vartype.as_ref() {\n                \"string\" => format!(\"{} {}\", self.name, escape_literal(val)),\n                _ => format!(\"{} {}\", self.name, val),\n            }\n        } else {\n            self.name.to_owned()\n        }\n    }\n\n    /// Represent `GenericOption` as configuration option.\n    fn to_pg_setting(&self) -> String {\n        if let Some(val) = &self.value {\n            match self.vartype.as_ref() {\n                \"string\" => format!(\"{} = {}\", self.name, escape_conf_value(val)),\n                _ => format!(\"{} = {}\", self.name, val),\n            }\n        } else {\n            self.name.to_owned()\n        }\n    }\n}\n\npub trait PgOptionsSerialize {\n    fn as_pg_options(&self) -> String;\n    fn as_pg_settings(&self) -> String;\n}\n\nimpl PgOptionsSerialize for GenericOptions {\n    /// Serialize an optional collection of `GenericOption`'s to\n    /// Postgres SQL statement arguments.\n    fn as_pg_options(&self) -> String {\n        if let Some(ops) = &self {\n            ops.iter()\n                .map(|op| op.to_pg_option())\n                .collect::<Vec<String>>()\n                .join(\" \")\n        } else {\n            \"\".to_string()\n        }\n    }\n\n    /// Serialize an optional collection of `GenericOption`'s to\n    /// `postgresql.conf` compatible format.\n    fn as_pg_settings(&self) -> String {\n        if let Some(ops) = &self {\n            ops.iter()\n                .map(|op| op.to_pg_setting())\n                .collect::<Vec<String>>()\n                .join(\"\\n\")\n                + \"\\n\" // newline after last setting\n        } else {\n            \"\".to_string()\n        }\n    }\n}\n\npub trait GenericOptionsSearch {\n    fn find(&self, name: &str) -> Option<String>;\n    fn find_ref(&self, name: &str) -> Option<&GenericOption>;\n}\n\nimpl GenericOptionsSearch for GenericOptions {\n    /// Lookup option by name\n    fn find(&self, name: &str) -> Option<String> {\n        let ops = self.as_ref()?;\n        let op = ops.iter().find(|s| s.name == name)?;\n        op.value.clone()\n    }\n\n    /// Lookup option by name, returning ref\n    fn find_ref(&self, name: &str) -> Option<&GenericOption> {\n        let ops = self.as_ref()?;\n        ops.iter().find(|s| s.name == name)\n    }\n}\n\npub trait RoleExt {\n    fn to_pg_options(&self) -> String;\n}\n\nimpl RoleExt for Role {\n    /// Serialize a list of role parameters into a Postgres-acceptable\n    /// string of arguments.\n    fn to_pg_options(&self) -> String {\n        // XXX: consider putting LOGIN as a default option somewhere higher, e.g. in control-plane.\n        let mut params: String = self.options.as_pg_options();\n        params.push_str(\" LOGIN\");\n\n        if let Some(pass) = &self.encrypted_password {\n            // Some time ago we supported only md5 and treated all encrypted_password as md5.\n            // Now we also support SCRAM-SHA-256 and to preserve compatibility\n            // we treat all encrypted_password as md5 unless they starts with SCRAM-SHA-256.\n            if pass.starts_with(\"SCRAM-SHA-256\") {\n                write!(params, \" PASSWORD '{pass}'\")\n                    .expect(\"String is documented to not to error during write operations\");\n            } else {\n                write!(params, \" PASSWORD 'md5{pass}'\")\n                    .expect(\"String is documented to not to error during write operations\");\n            }\n        } else {\n            params.push_str(\" PASSWORD NULL\");\n        }\n\n        params\n    }\n}\n\npub trait DatabaseExt {\n    fn to_pg_options(&self) -> String;\n}\n\nimpl DatabaseExt for Database {\n    /// Serialize a list of database parameters into a Postgres-acceptable\n    /// string of arguments.\n    /// NB: `TEMPLATE` is actually also an identifier, but so far we only need\n    /// to use `template0` and `template1`, so it is not a problem. Yet in the future\n    /// it may require a proper quoting too.\n    fn to_pg_options(&self) -> String {\n        let mut params: String = self.options.as_pg_options();\n        write!(params, \" OWNER {}\", &self.owner.pg_quote())\n            .expect(\"String is documented to not to error during write operations\");\n\n        params\n    }\n}\n\npub trait DatabricksSettingsExt {\n    fn as_pg_settings(&self) -> String;\n}\n\nimpl DatabricksSettingsExt for DatabricksSettings {\n    fn as_pg_settings(&self) -> String {\n        // Postgres GUCs rendered from DatabricksSettings\n        vec![\n            // ssl_ca_file\n            Some(format!(\n                \"ssl_ca_file = '{}'\",\n                self.pg_compute_tls_settings.ca_file\n            )),\n            // [Optional] databricks.workspace_url\n            Some(format!(\n                \"databricks.workspace_url = '{}'\",\n                &self.databricks_workspace_host\n            )),\n            // todo(vikas.jain): these are not required anymore as they are moved to static\n            // conf but keeping these to avoid image mismatch between hcc and pg.\n            // Once hcc and pg are in sync, we can remove these.\n            //\n            // databricks.enable_databricks_identity_login\n            Some(\"databricks.enable_databricks_identity_login = true\".to_string()),\n            // databricks.enable_sql_restrictions\n            Some(\"databricks.enable_sql_restrictions = true\".to_string()),\n        ]\n        .into_iter()\n        // Removes `None`s\n        .flatten()\n        .collect::<Vec<String>>()\n        .join(\"\\n\")\n            + \"\\n\"\n    }\n}\n\n/// Generic trait used to provide quoting / encoding for strings used in the\n/// Postgres SQL queries and DATABASE_URL.\npub trait Escaping {\n    fn pg_quote(&self) -> String;\n    fn pg_quote_dollar(&self) -> (String, String);\n}\n\nimpl Escaping for PgIdent {\n    /// This is intended to mimic Postgres quote_ident(), but for simplicity it\n    /// always quotes provided string with `\"\"` and escapes every `\"`.\n    /// **Not idempotent**, i.e. if string is already escaped it will be escaped again.\n    /// N.B. it's not useful for escaping identifiers that are used inside WHERE\n    /// clause, use `escape_literal()` instead.\n    fn pg_quote(&self) -> String {\n        format!(\"\\\"{}\\\"\", self.replace('\"', \"\\\"\\\"\"))\n    }\n\n    /// This helper is intended to be used for dollar-escaping strings for usage\n    /// inside PL/pgSQL procedures. In addition to dollar-escaping the string,\n    /// it also returns a tag that is intended to be used inside the outer\n    /// PL/pgSQL procedure. If you do not need an outer tag, just discard it.\n    /// Here we somewhat mimic the logic of Postgres' `pg_get_functiondef()`,\n    /// <https://github.com/postgres/postgres/blob/8b49392b270b4ac0b9f5c210e2a503546841e832/src/backend/utils/adt/ruleutils.c#L2924>\n    fn pg_quote_dollar(&self) -> (String, String) {\n        let mut tag: String = \"x\".to_string();\n        let mut outer_tag = \"xx\".to_string();\n\n        // Find the first suitable tag that is not present in the string.\n        // Postgres' max role/DB name length is 63 bytes, so even in the\n        // worst case it won't take long. Outer tag is always `tag + \"x\"`,\n        // so if `tag` is not present in the string, `outer_tag` is not\n        // present in the string either.\n        while self.contains(&tag.to_string()) {\n            tag += \"x\";\n            outer_tag = tag.clone() + \"x\";\n        }\n\n        let escaped = format!(\"${tag}${self}${tag}$\");\n\n        (escaped, outer_tag)\n    }\n}\n\n/// Build a list of existing Postgres roles\npub async fn get_existing_roles_async(client: &tokio_postgres::Client) -> Result<Vec<Role>> {\n    let postgres_roles = client\n        .query_raw::<str, &String, &[String; 0]>(\n            \"SELECT rolname, rolpassword FROM pg_catalog.pg_authid\",\n            &[],\n        )\n        .await?\n        .filter_map(|row| async { row.ok() })\n        .map(|row| Role {\n            name: row.get(\"rolname\"),\n            encrypted_password: row.get(\"rolpassword\"),\n            options: None,\n        })\n        .collect()\n        .await;\n\n    Ok(postgres_roles)\n}\n\n/// Build a list of existing Postgres databases\npub async fn get_existing_dbs_async(\n    client: &tokio_postgres::Client,\n) -> Result<HashMap<String, Database>> {\n    // `pg_database.datconnlimit = -2` means that the database is in the\n    // invalid state. See:\n    //   https://github.com/postgres/postgres/commit/a4b4cc1d60f7e8ccfcc8ff8cb80c28ee411ad9a9\n    let rowstream = client\n        // We use a subquery instead of a fancy `datdba::regrole::text AS owner`,\n        // because the latter automatically wraps the result in double quotes,\n        // if the role name contains special characters.\n        .query_raw::<str, &String, &[String; 0]>(\n            \"SELECT\n                datname AS name,\n                (SELECT rolname FROM pg_catalog.pg_roles WHERE oid OPERATOR(pg_catalog.=) datdba) AS owner,\n                NOT datallowconn AS restrict_conn,\n                datconnlimit OPERATOR(pg_catalog.=) (OPERATOR(pg_catalog.-) 2) AS invalid\n            FROM\n                pg_catalog.pg_database;\",\n            &[],\n        )\n        .await?;\n\n    let dbs_map = rowstream\n        .filter_map(|r| async { r.ok() })\n        .map(|row| Database {\n            name: row.get(\"name\"),\n            owner: row.get(\"owner\"),\n            restrict_conn: row.get(\"restrict_conn\"),\n            invalid: row.get(\"invalid\"),\n            options: None,\n        })\n        .map(|db| (db.name.clone(), db.clone()))\n        .collect::<HashMap<_, _>>()\n        .await;\n\n    Ok(dbs_map)\n}\n\n/// Wait for Postgres to become ready to accept connections. It's ready to\n/// accept connections when the state-field in `pgdata/postmaster.pid` says\n/// 'ready'.\n#[instrument(skip_all, fields(pgdata = %pgdata.display()))]\npub fn wait_for_postgres(pg: &mut Child, pgdata: &Path) -> Result<()> {\n    let pid_path = pgdata.join(\"postmaster.pid\");\n\n    // PostgreSQL writes line \"ready\" to the postmaster.pid file, when it has\n    // completed initialization and is ready to accept connections. We want to\n    // react quickly and perform the rest of our initialization as soon as\n    // PostgreSQL starts accepting connections. Use 'notify' to be notified\n    // whenever the PID file is changed, and whenever it changes, read it to\n    // check if it's now \"ready\".\n    //\n    // You cannot actually watch a file before it exists, so we first watch the\n    // data directory, and once the postmaster.pid file appears, we switch to\n    // watch the file instead. We also wake up every 100 ms to poll, just in\n    // case we miss some events for some reason. Not strictly necessary, but\n    // better safe than sorry.\n    let (tx, rx) = std::sync::mpsc::channel();\n    let watcher_res = notify::recommended_watcher(move |res| {\n        let _ = tx.send(res);\n    });\n    let (mut watcher, rx): (Box<dyn Watcher>, _) = match watcher_res {\n        Ok(watcher) => (Box::new(watcher), rx),\n        Err(e) => {\n            match e.kind {\n                notify::ErrorKind::Io(os) if os.raw_os_error() == Some(38) => {\n                    // docker on m1 macs does not support recommended_watcher\n                    // but return \"Function not implemented (os error 38)\"\n                    // see https://github.com/notify-rs/notify/issues/423\n                    let (tx, rx) = std::sync::mpsc::channel();\n\n                    // let's poll it faster than what we check the results for (100ms)\n                    let config =\n                        notify::Config::default().with_poll_interval(Duration::from_millis(50));\n\n                    let watcher = notify::PollWatcher::new(\n                        move |res| {\n                            let _ = tx.send(res);\n                        },\n                        config,\n                    )?;\n\n                    (Box::new(watcher), rx)\n                }\n                _ => return Err(e.into()),\n            }\n        }\n    };\n\n    watcher.watch(pgdata, RecursiveMode::NonRecursive)?;\n\n    let started_at = Instant::now();\n    let mut postmaster_pid_seen = false;\n    loop {\n        if let Ok(Some(status)) = pg.try_wait() {\n            // Postgres exited, that is not what we expected, bail out earlier.\n            let code = status.code().unwrap_or(-1);\n            bail!(\"Postgres exited unexpectedly with code {}\", code);\n        }\n\n        let res = rx.recv_timeout(Duration::from_millis(100));\n        debug!(\"woken up by notify: {res:?}\");\n        // If there are multiple events in the channel already, we only need to be\n        // check once. Swallow the extra events before we go ahead to check the\n        // pid file.\n        while let Ok(res) = rx.try_recv() {\n            debug!(\"swallowing extra event: {res:?}\");\n        }\n\n        // Check that we can open pid file first.\n        if let Ok(file) = File::open(&pid_path) {\n            if !postmaster_pid_seen {\n                debug!(\"postmaster.pid appeared\");\n                watcher\n                    .unwatch(pgdata)\n                    .expect(\"Failed to remove pgdata dir watch\");\n                watcher\n                    .watch(&pid_path, RecursiveMode::NonRecursive)\n                    .expect(\"Failed to add postmaster.pid file watch\");\n                postmaster_pid_seen = true;\n            }\n\n            let file = BufReader::new(file);\n            let last_line = file.lines().last();\n\n            // Pid file could be there and we could read it, but it could be empty, for example.\n            if let Some(Ok(line)) = last_line {\n                let status = line.trim();\n                debug!(\"last line of postmaster.pid: {status:?}\");\n\n                // Now Postgres is ready to accept connections\n                if status == \"ready\" {\n                    break;\n                }\n            }\n        }\n\n        // Give up after POSTGRES_WAIT_TIMEOUT.\n        let duration = started_at.elapsed();\n        if duration >= POSTGRES_WAIT_TIMEOUT {\n            bail!(\"timed out while waiting for Postgres to start\");\n        }\n    }\n\n    tracing::info!(\"PostgreSQL is now running, continuing to configure it\");\n\n    Ok(())\n}\n\n/// Remove `pgdata` directory and create it again with right permissions.\npub fn create_pgdata(pgdata: &str) -> Result<()> {\n    // Ignore removal error, likely it is a 'No such file or directory (os error 2)'.\n    // If it is something different then create_dir() will error out anyway.\n    let _ok = fs::remove_dir_all(pgdata);\n    fs::create_dir(pgdata)?;\n    fs::set_permissions(pgdata, fs::Permissions::from_mode(0o700))?;\n\n    Ok(())\n}\n\n/// Update pgbouncer.ini with provided options\nfn update_pgbouncer_ini(\n    pgbouncer_config: IndexMap<String, String>,\n    pgbouncer_ini_path: &str,\n) -> Result<()> {\n    let mut conf = Ini::load_from_file(pgbouncer_ini_path)?;\n    let section = conf.section_mut(Some(\"pgbouncer\")).unwrap();\n\n    for (option_name, value) in pgbouncer_config.iter() {\n        section.insert(option_name, value);\n        debug!(\n            \"Updating pgbouncer.ini with new values {}={}\",\n            option_name, value\n        );\n    }\n\n    conf.write_to_file(pgbouncer_ini_path)?;\n    Ok(())\n}\n\n/// Tune pgbouncer.\n/// 1. Apply new config using pgbouncer admin console\n/// 2. Add new values to pgbouncer.ini to preserve them after restart\npub async fn tune_pgbouncer(\n    mut pgbouncer_config: IndexMap<String, String>,\n    tls_config: Option<TlsConfig>,\n) -> Result<()> {\n    let pgbouncer_connstr = if std::env::var_os(\"AUTOSCALING\").is_some() {\n        // for VMs use pgbouncer specific way to connect to\n        // pgbouncer admin console without password\n        // when pgbouncer is running under the same user.\n        \"host=/tmp port=6432 dbname=pgbouncer user=pgbouncer\".to_string()\n    } else {\n        // for k8s use normal connection string with password\n        // to connect to pgbouncer admin console\n        let mut pgbouncer_connstr =\n            \"host=localhost port=6432 dbname=pgbouncer user=postgres sslmode=disable\".to_string();\n        if let Ok(pass) = std::env::var(\"PGBOUNCER_PASSWORD\") {\n            pgbouncer_connstr.push_str(format!(\" password={pass}\").as_str());\n        }\n        pgbouncer_connstr\n    };\n\n    info!(\n        \"Connecting to pgbouncer with connection string: {}\",\n        pgbouncer_connstr\n    );\n\n    // connect to pgbouncer, retrying several times\n    // because pgbouncer may not be ready yet\n    let mut retries = 3;\n    let client = loop {\n        match tokio_postgres::connect(&pgbouncer_connstr, NoTls).await {\n            Ok((client, connection)) => {\n                tokio::spawn(async move {\n                    if let Err(e) = connection.await {\n                        eprintln!(\"connection error: {e}\");\n                    }\n                });\n                break client;\n            }\n            Err(e) => {\n                if retries == 0 {\n                    return Err(e.into());\n                }\n                error!(\"Failed to connect to pgbouncer: pgbouncer_connstr {}\", e);\n                retries -= 1;\n                tokio::time::sleep(Duration::from_secs(1)).await;\n            }\n        }\n    };\n\n    if let Some(tls_config) = tls_config {\n        // pgbouncer starts in a half-ok state if it cannot find these files.\n        // It will default to client_tls_sslmode=deny, which causes proxy to error.\n        // There is a small window at startup where these files don't yet exist in the VM.\n        // Best to wait until it exists.\n        loop {\n            if let Ok(true) = tokio::fs::try_exists(&tls_config.key_path).await {\n                break;\n            }\n            tokio::time::sleep(Duration::from_millis(500)).await\n        }\n\n        pgbouncer_config.insert(\"client_tls_cert_file\".to_string(), tls_config.cert_path);\n        pgbouncer_config.insert(\"client_tls_key_file\".to_string(), tls_config.key_path);\n        pgbouncer_config.insert(\"client_tls_sslmode\".to_string(), \"allow\".to_string());\n    }\n\n    // save values to pgbouncer.ini\n    // so that they are preserved after pgbouncer restart\n    let pgbouncer_ini_path = if std::env::var_os(\"AUTOSCALING\").is_some() {\n        // in VMs we use /etc/pgbouncer.ini\n        \"/etc/pgbouncer.ini\".to_string()\n    } else {\n        // in pods we use /var/db/postgres/pgbouncer/pgbouncer.ini\n        // this is a shared volume between pgbouncer and postgres containers\n        // FIXME: fix permissions for this file\n        \"/var/db/postgres/pgbouncer/pgbouncer.ini\".to_string()\n    };\n    update_pgbouncer_ini(pgbouncer_config, &pgbouncer_ini_path)?;\n\n    info!(\"Applying pgbouncer setting change\");\n\n    if let Err(err) = client.simple_query(\"RELOAD\").await {\n        // Don't fail on error, just print it into log\n        error!(\"Failed to apply pgbouncer setting change,  {err}\",);\n    };\n\n    Ok(())\n}\n\n/// Spawn a task that will read Postgres logs from `stderr`, join multiline logs\n/// and send them to the logger. In the future we may also want to add context to\n/// these logs.\npub fn handle_postgres_logs(stderr: std::process::ChildStderr) -> JoinHandle<Result<()>> {\n    tokio::spawn(async move {\n        let stderr = tokio::process::ChildStderr::from_std(stderr)?;\n        handle_postgres_logs_async(stderr).await\n    })\n}\n\n/// Read Postgres logs from `stderr` until EOF. Buffer is flushed on one of the following conditions:\n/// - next line starts with timestamp\n/// - EOF\n/// - no new lines were written for the last 100 milliseconds\nasync fn handle_postgres_logs_async(stderr: tokio::process::ChildStderr) -> Result<()> {\n    let mut lines = tokio::io::BufReader::new(stderr).lines();\n    let timeout_duration = Duration::from_millis(100);\n    let ts_regex =\n        regex::Regex::new(r\"^\\d+-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}\").expect(\"regex is valid\");\n\n    let mut buf = vec![];\n    loop {\n        let next_line = timeout(timeout_duration, lines.next_line()).await;\n\n        // we should flush lines from the buffer if we cannot continue reading multiline message\n        let should_flush_buf = match next_line {\n            // Flushing if new line starts with timestamp\n            Ok(Ok(Some(ref line))) => ts_regex.is_match(line),\n            // Flushing on EOF, timeout or error\n            _ => true,\n        };\n\n        if !buf.is_empty() && should_flush_buf {\n            // join multiline message into a single line, separated by unicode Zero Width Space.\n            // \"PG:\" suffix is used to distinguish postgres logs from other logs.\n            let combined = format!(\"PG:{}\\n\", buf.join(\"\\u{200B}\"));\n            buf.clear();\n\n            // sync write to stderr to avoid interleaving with other logs\n            use std::io::Write;\n            let res = std::io::stderr().lock().write_all(combined.as_bytes());\n            if let Err(e) = res {\n                tracing::error!(\"error while writing to stderr: {}\", e);\n            }\n        }\n\n        // if not timeout, append line to the buffer\n        if next_line.is_ok() {\n            match next_line?? {\n                Some(line) => buf.push(line),\n                // EOF\n                None => break,\n            };\n        }\n    }\n\n    Ok(())\n}\n\n/// `Postgres::config::Config` handles database names with whitespaces\n/// and special characters properly.\npub fn postgres_conf_for_db(connstr: &url::Url, dbname: &str) -> Result<Config> {\n    let mut conf = Config::from_str(connstr.as_str())?;\n    conf.dbname(dbname);\n    Ok(conf)\n}\n"
  },
  {
    "path": "compute_tools/src/pg_isready.rs",
    "content": "use anyhow::{Context, anyhow};\n\n// Run `/usr/local/bin/pg_isready -p {port}`\n// Check the connectivity of PG\n// Success means PG is listening on the port and accepting connections\n// Note that PG does not need to authenticate the connection, nor reserve a connection quota for it.\n// See https://www.postgresql.org/docs/current/app-pg-isready.html\npub fn pg_isready(bin: &str, port: u16) -> anyhow::Result<()> {\n    let child_result = std::process::Command::new(bin)\n        .arg(\"-p\")\n        .arg(port.to_string())\n        .spawn();\n\n    child_result\n        .context(\"spawn() failed\")\n        .and_then(|mut child| child.wait().context(\"wait() failed\"))\n        .and_then(|status| match status.success() {\n            true => Ok(()),\n            false => Err(anyhow!(\"process exited with {status}\")),\n        })\n        // wrap any prior error with the overall context that we couldn't run the command\n        .with_context(|| format!(\"could not run `{bin} --port {port}`\"))\n}\n\n// It's safe to assume pg_isready is under the same directory with postgres,\n// because it is a PG util bin installed along with postgres\npub fn get_pg_isready_bin(pgbin: &str) -> String {\n    let split = pgbin.split(\"/\").collect::<Vec<&str>>();\n    split[0..split.len() - 1].join(\"/\") + \"/pg_isready\"\n}\n"
  },
  {
    "path": "compute_tools/src/pgbouncer.rs",
    "content": "pub const PGBOUNCER_PIDFILE: &str = \"/tmp/pgbouncer.pid\";\n"
  },
  {
    "path": "compute_tools/src/rsyslog.rs",
    "content": "use std::fs;\nuse std::io::ErrorKind;\nuse std::path::Path;\nuse std::process::Command;\nuse std::time::Duration;\nuse std::{fs::OpenOptions, io::Write};\nuse url::{Host, Url};\n\nuse anyhow::{Context, Result, anyhow};\nuse hostname_validator;\nuse tracing::{error, info, instrument, warn};\n\nconst POSTGRES_LOGS_CONF_PATH: &str = \"/etc/rsyslog.d/postgres_logs.conf\";\n\nfn get_rsyslog_pid() -> Option<String> {\n    let output = Command::new(\"pgrep\")\n        .arg(\"rsyslogd\")\n        .output()\n        .expect(\"Failed to execute pgrep\");\n\n    if !output.stdout.is_empty() {\n        let pid = std::str::from_utf8(&output.stdout)\n            .expect(\"Invalid UTF-8 in process output\")\n            .trim()\n            .to_string();\n        Some(pid)\n    } else {\n        None\n    }\n}\n\nfn wait_for_rsyslog_pid() -> Result<String, anyhow::Error> {\n    const MAX_WAIT: Duration = Duration::from_secs(5);\n    const INITIAL_SLEEP: Duration = Duration::from_millis(2);\n\n    let mut sleep_duration = INITIAL_SLEEP;\n    let start = std::time::Instant::now();\n    let mut attempts = 1;\n\n    for attempt in 1.. {\n        attempts = attempt;\n        match get_rsyslog_pid() {\n            Some(pid) => return Ok(pid),\n            None => {\n                if start.elapsed() >= MAX_WAIT {\n                    break;\n                }\n                info!(\n                    \"rsyslogd is not running, attempt {}. Sleeping for {} ms\",\n                    attempt,\n                    sleep_duration.as_millis()\n                );\n                std::thread::sleep(sleep_duration);\n                sleep_duration *= 2;\n            }\n        }\n    }\n\n    Err(anyhow::anyhow!(\n        \"rsyslogd is not running after waiting for {} seconds and {} attempts\",\n        attempts,\n        start.elapsed().as_secs()\n    ))\n}\n\n// Restart rsyslogd to apply the new configuration.\n// This is necessary, because there is no other way to reload the rsyslog configuration.\n//\n// Rsyslogd shouldn't lose any messages, because of the restart,\n// because it tracks the last read position in the log files\n// and will continue reading from that position.\n// TODO: test it properly\n//\nfn restart_rsyslog() -> Result<()> {\n    // kill it to restart\n    let _ = Command::new(\"pkill\")\n        .arg(\"rsyslogd\")\n        .output()\n        .context(\"Failed to restart rsyslogd\")?;\n\n    // ensure rsyslogd is running\n    wait_for_rsyslog_pid()?;\n\n    Ok(())\n}\n\nfn parse_audit_syslog_address(\n    remote_plain_endpoint: &str,\n    remote_tls_endpoint: &str,\n) -> Result<(String, u16, String)> {\n    let tls;\n    let remote_endpoint = if !remote_tls_endpoint.is_empty() {\n        tls = \"true\".to_string();\n        remote_tls_endpoint\n    } else {\n        tls = \"false\".to_string();\n        remote_plain_endpoint\n    };\n    // Urlify the remote_endpoint, so parsing can be done with url::Url.\n    let url_str = format!(\"http://{remote_endpoint}\");\n    let url = Url::parse(&url_str).map_err(|err| {\n        anyhow!(\"Error parsing {remote_endpoint}, expected host:port, got {err:?}\")\n    })?;\n\n    let is_valid = url.scheme() == \"http\"\n        && url.path() == \"/\"\n        && url.query().is_none()\n        && url.fragment().is_none()\n        && url.username() == \"\"\n        && url.password().is_none();\n\n    if !is_valid {\n        return Err(anyhow!(\n            \"Invalid address format {remote_endpoint}, expected host:port\"\n        ));\n    }\n    let host = match url.host() {\n        Some(Host::Domain(h)) if hostname_validator::is_valid(h) => h.to_string(),\n        Some(Host::Ipv4(ip4)) => ip4.to_string(),\n        Some(Host::Ipv6(ip6)) => ip6.to_string(),\n        _ => return Err(anyhow!(\"Invalid host\")),\n    };\n    let port = url\n        .port()\n        .ok_or_else(|| anyhow!(\"Invalid port in {remote_endpoint}\"))?;\n\n    Ok((host, port, tls))\n}\n\nfn generate_audit_rsyslog_config(\n    log_directory: String,\n    endpoint_id: &str,\n    project_id: &str,\n    remote_syslog_host: &str,\n    remote_syslog_port: u16,\n    remote_syslog_tls: &str,\n) -> String {\n    format!(\n        include_str!(\"config_template/compute_audit_rsyslog_template.conf\"),\n        log_directory = log_directory,\n        endpoint_id = endpoint_id,\n        project_id = project_id,\n        remote_syslog_host = remote_syslog_host,\n        remote_syslog_port = remote_syslog_port,\n        remote_syslog_tls = remote_syslog_tls\n    )\n}\n\npub fn configure_audit_rsyslog(\n    log_directory: String,\n    endpoint_id: &str,\n    project_id: &str,\n    remote_endpoint: &str,\n    remote_tls_endpoint: &str,\n) -> Result<()> {\n    let (remote_syslog_host, remote_syslog_port, remote_syslog_tls) =\n        parse_audit_syslog_address(remote_endpoint, remote_tls_endpoint).unwrap();\n    let config_content = generate_audit_rsyslog_config(\n        log_directory,\n        endpoint_id,\n        project_id,\n        &remote_syslog_host,\n        remote_syslog_port,\n        &remote_syslog_tls,\n    );\n\n    info!(\"rsyslog config_content: {}\", config_content);\n\n    let rsyslog_conf_path = \"/etc/rsyslog.d/compute_audit_rsyslog.conf\";\n    let mut file = OpenOptions::new()\n        .create(true)\n        .write(true)\n        .truncate(true)\n        .open(rsyslog_conf_path)?;\n\n    file.write_all(config_content.as_bytes())?;\n\n    info!(\n        \"rsyslog configuration file {} added successfully. Starting rsyslogd\",\n        rsyslog_conf_path\n    );\n\n    // start the service, using the configuration\n    restart_rsyslog()?;\n\n    Ok(())\n}\n\n/// Configuration for enabling Postgres logs forwarding from rsyslogd\npub struct PostgresLogsRsyslogConfig<'a> {\n    pub host: Option<&'a str>,\n}\n\nimpl<'a> PostgresLogsRsyslogConfig<'a> {\n    pub fn new(host: Option<&'a str>) -> Self {\n        Self { host }\n    }\n\n    pub fn build(&self) -> Result<String> {\n        match self.host {\n            Some(host) => {\n                if let Some((target, port)) = host.split_once(\":\") {\n                    Ok(format!(\n                        include_str!(\n                            \"config_template/compute_rsyslog_postgres_export_template.conf\"\n                        ),\n                        logs_export_target = target,\n                        logs_export_port = port,\n                    ))\n                } else {\n                    Err(anyhow!(\"Invalid host format for Postgres logs export\"))\n                }\n            }\n            None => Ok(\"\".to_string()),\n        }\n    }\n\n    fn current_config() -> Result<String> {\n        let config_content = match std::fs::read_to_string(POSTGRES_LOGS_CONF_PATH) {\n            Ok(c) => c,\n            Err(err) if err.kind() == ErrorKind::NotFound => String::new(),\n            Err(err) => return Err(err.into()),\n        };\n        Ok(config_content)\n    }\n}\n\n/// Writes rsyslogd configuration for Postgres logs export and restarts rsyslog.\npub fn configure_postgres_logs_export(conf: PostgresLogsRsyslogConfig) -> Result<()> {\n    let new_config = conf.build()?;\n    let current_config = PostgresLogsRsyslogConfig::current_config()?;\n\n    if new_config == current_config {\n        info!(\"postgres logs rsyslog configuration is up-to-date\");\n        return Ok(());\n    }\n\n    // Nothing to configure\n    if new_config.is_empty() {\n        // When the configuration is removed, PostgreSQL will stop sending data\n        // to the files watched by rsyslog, so restarting rsyslog is more effort\n        // than just ignoring this change.\n        return Ok(());\n    }\n\n    info!(\n        \"configuring rsyslog for postgres logs export to: {:?}\",\n        conf.host\n    );\n\n    let mut file = OpenOptions::new()\n        .create(true)\n        .write(true)\n        .truncate(true)\n        .open(POSTGRES_LOGS_CONF_PATH)?;\n    file.write_all(new_config.as_bytes())?;\n\n    info!(\n        \"rsyslog configuration file {} added successfully. Starting rsyslogd\",\n        POSTGRES_LOGS_CONF_PATH\n    );\n\n    restart_rsyslog()?;\n    Ok(())\n}\n\n#[instrument(skip_all)]\nasync fn pgaudit_gc_main_loop(log_directory: String) -> Result<()> {\n    info!(\"running pgaudit GC main loop\");\n    loop {\n        // Check log_directory for old pgaudit logs and delete them.\n        // New log files are checked every 5 minutes, as set in pgaudit.log_rotation_age\n        // Find files that were not modified in the last 15 minutes and delete them.\n        // This should be enough time for rsyslog to process the logs and for us to catch the alerts.\n        //\n        // In case of a very high load, we might need to adjust this value and pgaudit.log_rotation_age.\n        //\n        // TODO: add some smarter logic to delete the files that are fully streamed according to rsyslog\n        // imfile-state files, but for now just do a simple GC to avoid filling up the disk.\n        let _ = Command::new(\"find\")\n            .arg(&log_directory)\n            .arg(\"-name\")\n            .arg(\"audit*.log\")\n            .arg(\"-mmin\")\n            .arg(\"+15\")\n            .arg(\"-delete\")\n            .output()?;\n\n        // also collect the metric for the size of the log directory\n        async fn get_log_files_size(path: &Path) -> Result<u64> {\n            let mut total_size = 0;\n\n            for entry in fs::read_dir(path)? {\n                let entry = entry?;\n                let entry_path = entry.path();\n\n                if entry_path.is_file() && entry_path.to_string_lossy().ends_with(\"log\") {\n                    total_size += entry.metadata()?.len();\n                }\n            }\n\n            Ok(total_size)\n        }\n\n        let log_directory_size = get_log_files_size(Path::new(&log_directory))\n            .await\n            .unwrap_or_else(|e| {\n                warn!(\"Failed to get log directory size: {}\", e);\n                0\n            });\n        crate::metrics::AUDIT_LOG_DIR_SIZE.set(log_directory_size as f64);\n        tokio::time::sleep(Duration::from_secs(60)).await;\n    }\n}\n\n// launch pgaudit GC thread to clean up the old pgaudit logs stored in the log_directory\npub fn launch_pgaudit_gc(log_directory: String) {\n    tokio::spawn(async move {\n        if let Err(e) = pgaudit_gc_main_loop(log_directory).await {\n            error!(\"pgaudit GC main loop failed: {}\", e);\n        }\n    });\n}\n\n#[cfg(test)]\nmod tests {\n    use crate::rsyslog::PostgresLogsRsyslogConfig;\n\n    use super::{generate_audit_rsyslog_config, parse_audit_syslog_address};\n\n    #[test]\n    fn test_postgres_logs_config() {\n        {\n            // Verify empty config\n            let conf = PostgresLogsRsyslogConfig::new(None);\n            let res = conf.build();\n            assert!(res.is_ok());\n            let conf_str = res.unwrap();\n            assert_eq!(&conf_str, \"\");\n        }\n\n        {\n            // Verify config\n            let conf = PostgresLogsRsyslogConfig::new(Some(\"collector.cvc.local:514\"));\n            let res = conf.build();\n            assert!(res.is_ok());\n            let conf_str = res.unwrap();\n            assert!(conf_str.contains(\"omfwd\"));\n            assert!(conf_str.contains(r#\"target=\"collector.cvc.local\"\"#));\n            assert!(conf_str.contains(r#\"port=\"514\"\"#));\n        }\n\n        {\n            // Verify invalid config\n            let conf = PostgresLogsRsyslogConfig::new(Some(\"invalid\"));\n            let res = conf.build();\n            assert!(res.is_err());\n        }\n    }\n\n    #[test]\n    fn test_parse_audit_syslog_address() {\n        {\n            // host:port format (plaintext)\n            let parsed = parse_audit_syslog_address(\"collector.host.tld:5555\", \"\");\n            assert!(parsed.is_ok());\n            assert_eq!(\n                parsed.unwrap(),\n                (\n                    String::from(\"collector.host.tld\"),\n                    5555,\n                    String::from(\"false\")\n                )\n            );\n        }\n\n        {\n            // host:port format with ipv4 ip address (plaintext)\n            let parsed = parse_audit_syslog_address(\"10.0.0.1:5555\", \"\");\n            assert!(parsed.is_ok());\n            assert_eq!(\n                parsed.unwrap(),\n                (String::from(\"10.0.0.1\"), 5555, String::from(\"false\"))\n            );\n        }\n\n        {\n            // host:port format with ipv6 ip address (plaintext)\n            let parsed =\n                parse_audit_syslog_address(\"[7e60:82ed:cb2e:d617:f904:f395:aaca:e252]:5555\", \"\");\n            assert_eq!(\n                parsed.unwrap(),\n                (\n                    String::from(\"7e60:82ed:cb2e:d617:f904:f395:aaca:e252\"),\n                    5555,\n                    String::from(\"false\")\n                )\n            );\n        }\n\n        {\n            // Only TLS host:port defined\n            let parsed = parse_audit_syslog_address(\"\", \"tls.host.tld:5556\");\n            assert_eq!(\n                parsed.unwrap(),\n                (String::from(\"tls.host.tld\"), 5556, String::from(\"true\"))\n            );\n        }\n\n        {\n            // tls host should take precedence, when both defined\n            let parsed = parse_audit_syslog_address(\"plaintext.host.tld:5555\", \"tls.host.tld:5556\");\n            assert_eq!(\n                parsed.unwrap(),\n                (String::from(\"tls.host.tld\"), 5556, String::from(\"true\"))\n            );\n        }\n\n        {\n            // host without port (plaintext)\n            let parsed = parse_audit_syslog_address(\"collector.host.tld\", \"\");\n            assert!(parsed.is_err());\n        }\n\n        {\n            // port without host\n            let parsed = parse_audit_syslog_address(\":5555\", \"\");\n            assert!(parsed.is_err());\n        }\n\n        {\n            // valid host with invalid port\n            let parsed = parse_audit_syslog_address(\"collector.host.tld:90001\", \"\");\n            assert!(parsed.is_err());\n        }\n\n        {\n            // invalid hostname with valid port\n            let parsed = parse_audit_syslog_address(\"-collector.host.tld:5555\", \"\");\n            assert!(parsed.is_err());\n        }\n\n        {\n            // parse error\n            let parsed = parse_audit_syslog_address(\"collector.host.tld:::5555\", \"\");\n            assert!(parsed.is_err());\n        }\n    }\n\n    #[test]\n    fn test_generate_audit_rsyslog_config() {\n        {\n            // plaintext version\n            let log_directory = \"/tmp/log\".to_string();\n            let endpoint_id = \"ep-test-endpoint-id\";\n            let project_id = \"test-project-id\";\n            let remote_syslog_host = \"collector.host.tld\";\n            let remote_syslog_port = 5555;\n            let remote_syslog_tls = \"false\";\n\n            let conf_str = generate_audit_rsyslog_config(\n                log_directory,\n                endpoint_id,\n                project_id,\n                remote_syslog_host,\n                remote_syslog_port,\n                remote_syslog_tls,\n            );\n\n            assert!(conf_str.contains(r#\"set $.remote_syslog_tls = \"false\";\"#));\n            assert!(conf_str.contains(r#\"type=\"omfwd\"\"#));\n            assert!(conf_str.contains(r#\"target=\"collector.host.tld\"\"#));\n            assert!(conf_str.contains(r#\"port=\"5555\"\"#));\n            assert!(conf_str.contains(r#\"StreamDriverPermittedPeers=\"collector.host.tld\"\"#));\n        }\n\n        {\n            // TLS version\n            let log_directory = \"/tmp/log\".to_string();\n            let endpoint_id = \"ep-test-endpoint-id\";\n            let project_id = \"test-project-id\";\n            let remote_syslog_host = \"collector.host.tld\";\n            let remote_syslog_port = 5556;\n            let remote_syslog_tls = \"true\";\n\n            let conf_str = generate_audit_rsyslog_config(\n                log_directory,\n                endpoint_id,\n                project_id,\n                remote_syslog_host,\n                remote_syslog_port,\n                remote_syslog_tls,\n            );\n\n            assert!(conf_str.contains(r#\"set $.remote_syslog_tls = \"true\";\"#));\n            assert!(conf_str.contains(r#\"type=\"omfwd\"\"#));\n            assert!(conf_str.contains(r#\"target=\"collector.host.tld\"\"#));\n            assert!(conf_str.contains(r#\"port=\"5556\"\"#));\n            assert!(conf_str.contains(r#\"StreamDriverPermittedPeers=\"collector.host.tld\"\"#));\n        }\n    }\n}\n"
  },
  {
    "path": "compute_tools/src/spec.rs",
    "content": "use std::fs::File;\nuse std::fs::{self, Permissions};\nuse std::os::unix::fs::PermissionsExt;\nuse std::path::Path;\n\nuse anyhow::{Result, anyhow, bail};\nuse compute_api::responses::{\n    ComputeConfig, ControlPlaneComputeStatus, ControlPlaneConfigResponse,\n};\nuse reqwest::StatusCode;\nuse tokio_postgres::Client;\nuse tracing::{error, info, instrument};\n\nuse crate::compute::ComputeNodeParams;\nuse crate::config;\nuse crate::metrics::{CPLANE_REQUESTS_TOTAL, CPlaneRequestRPC, UNKNOWN_HTTP_STATUS};\nuse crate::migration::MigrationRunner;\nuse crate::params::PG_HBA_ALL_MD5;\n\n// Do control plane request and return response if any. In case of error it\n// returns a bool flag indicating whether it makes sense to retry the request\n// and a string with error message.\nfn do_control_plane_request(\n    uri: &str,\n    jwt: &str,\n) -> Result<ControlPlaneConfigResponse, (bool, String, String)> {\n    let resp = reqwest::blocking::Client::new()\n        .get(uri)\n        .header(\"Authorization\", format!(\"Bearer {jwt}\"))\n        .send()\n        .map_err(|e| {\n            (\n                true,\n                format!(\"could not perform request to control plane: {e:?}\"),\n                UNKNOWN_HTTP_STATUS.to_string(),\n            )\n        })?;\n\n    let status = resp.status();\n    match status {\n        StatusCode::OK => match resp.json::<ControlPlaneConfigResponse>() {\n            Ok(spec_resp) => Ok(spec_resp),\n            Err(e) => Err((\n                true,\n                format!(\"could not deserialize control plane response: {e:?}\"),\n                status.to_string(),\n            )),\n        },\n        StatusCode::SERVICE_UNAVAILABLE => Err((\n            true,\n            \"control plane is temporarily unavailable\".to_string(),\n            status.to_string(),\n        )),\n        StatusCode::BAD_GATEWAY => {\n            // We have a problem with intermittent 502 errors now\n            // https://github.com/neondatabase/cloud/issues/2353\n            // It's fine to retry GET request in this case.\n            Err((\n                true,\n                \"control plane request failed with 502\".to_string(),\n                status.to_string(),\n            ))\n        }\n        // Another code, likely 500 or 404, means that compute is unknown to the control plane\n        // or some internal failure happened. Doesn't make much sense to retry in this case.\n        _ => Err((\n            false,\n            format!(\"unexpected control plane response status code: {status}\"),\n            status.to_string(),\n        )),\n    }\n}\n\n/// Request config from the control-plane by compute_id. If\n/// `NEON_CONTROL_PLANE_TOKEN` env variable is set, it will be used for\n/// authorization.\npub fn get_config_from_control_plane(base_uri: &str, compute_id: &str) -> Result<ComputeConfig> {\n    let cp_uri = format!(\"{base_uri}/compute/api/v2/computes/{compute_id}/spec\");\n    let jwt: String = std::env::var(\"NEON_CONTROL_PLANE_TOKEN\").unwrap_or_default();\n    let mut attempt = 1;\n\n    info!(\"getting config from control plane: {}\", cp_uri);\n\n    // Do 3 attempts to get spec from the control plane using the following logic:\n    // - network error -> then retry\n    // - compute id is unknown or any other error -> bail out\n    // - no spec for compute yet (Empty state) -> return Ok(None)\n    // - got config -> return Ok(Some(config))\n    while attempt < 4 {\n        let result = match do_control_plane_request(&cp_uri, &jwt) {\n            Ok(config_resp) => {\n                CPLANE_REQUESTS_TOTAL\n                    .with_label_values(&[\n                        CPlaneRequestRPC::GetConfig.as_str(),\n                        &StatusCode::OK.to_string(),\n                    ])\n                    .inc();\n                match config_resp.status {\n                    ControlPlaneComputeStatus::Empty => Ok(config_resp.into()),\n                    ControlPlaneComputeStatus::Attached => {\n                        if config_resp.spec.is_some() {\n                            Ok(config_resp.into())\n                        } else {\n                            bail!(\"compute is attached, but spec is empty\")\n                        }\n                    }\n                }\n            }\n            Err((retry, msg, status)) => {\n                CPLANE_REQUESTS_TOTAL\n                    .with_label_values(&[CPlaneRequestRPC::GetConfig.as_str(), &status])\n                    .inc();\n                if retry {\n                    Err(anyhow!(msg))\n                } else {\n                    bail!(msg);\n                }\n            }\n        };\n\n        if let Err(e) = &result {\n            error!(\"attempt {} to get config failed with: {}\", attempt, e);\n        } else {\n            return result;\n        }\n\n        attempt += 1;\n        std::thread::sleep(std::time::Duration::from_millis(100));\n    }\n\n    // All attempts failed, return error.\n    Err(anyhow::anyhow!(\n        \"Exhausted all attempts to retrieve the config from the control plane\"\n    ))\n}\n\n/// Check `pg_hba.conf` and update if needed to allow external connections.\npub fn update_pg_hba(pgdata_path: &Path, databricks_pg_hba: Option<&String>) -> Result<()> {\n    // XXX: consider making it a part of config.json\n    let pghba_path = pgdata_path.join(\"pg_hba.conf\");\n\n    // Update pg_hba to contains databricks specfic settings before adding neon settings\n    // PG uses the first record that matches to perform authentication, so we need to have\n    // our rules before the default ones from neon.\n    // See https://www.postgresql.org/docs/current/auth-pg-hba-conf.html\n    if let Some(databricks_pg_hba) = databricks_pg_hba {\n        if config::line_in_file(\n            &pghba_path,\n            &format!(\"include_if_exists {}\\n\", *databricks_pg_hba),\n        )? {\n            info!(\"updated pg_hba.conf to include databricks_pg_hba.conf\");\n        } else {\n            info!(\"pg_hba.conf already included databricks_pg_hba.conf\");\n        }\n    }\n\n    if config::line_in_file(&pghba_path, PG_HBA_ALL_MD5)? {\n        info!(\"updated pg_hba.conf to allow external connections\");\n    } else {\n        info!(\"pg_hba.conf is up-to-date\");\n    }\n\n    Ok(())\n}\n\n/// Check `pg_ident.conf` and update if needed to allow databricks config.\npub fn update_pg_ident(pgdata_path: &Path, databricks_pg_ident: Option<&String>) -> Result<()> {\n    info!(\"checking pg_ident.conf\");\n    let pghba_path = pgdata_path.join(\"pg_ident.conf\");\n\n    // Update pg_ident to contains databricks specfic settings\n    if let Some(databricks_pg_ident) = databricks_pg_ident {\n        if config::line_in_file(\n            &pghba_path,\n            &format!(\"include_if_exists {}\\n\", *databricks_pg_ident),\n        )? {\n            info!(\"updated pg_ident.conf to include databricks_pg_ident.conf\");\n        } else {\n            info!(\"pg_ident.conf already included databricks_pg_ident.conf\");\n        }\n    }\n\n    Ok(())\n}\n\n/// Copy tls key_file and cert_file from k8s secret mount directory\n/// to pgdata and set private key file permissions as expected by Postgres.\n/// See this doc for expected permission <https://www.postgresql.org/docs/current/ssl-tcp.html>\n/// K8s secrets mount on dblet does not honor permission and ownership\n/// specified in the Volume or VolumeMount. So we need to explicitly copy the file and set the permissions.\npub fn copy_tls_certificates(\n    key_file: &String,\n    cert_file: &String,\n    pgdata_path: &Path,\n) -> Result<()> {\n    let files = [cert_file, key_file];\n    for file in files.iter() {\n        let source = Path::new(file);\n        let dest = pgdata_path.join(source.file_name().unwrap());\n        if !dest.exists() {\n            std::fs::copy(source, &dest)?;\n            info!(\n                \"Copying tls file: {} to {}\",\n                &source.display(),\n                &dest.display()\n            );\n        }\n        if *file == key_file {\n            // Postgres requires private key to be readable only by the owner by having\n            // chmod 600 permissions.\n            let permissions = Permissions::from_mode(0o600);\n            fs::set_permissions(&dest, permissions)?;\n            info!(\"Setting permission on {}.\", &dest.display());\n        }\n    }\n    Ok(())\n}\n\n/// Create a standby.signal file\npub fn add_standby_signal(pgdata_path: &Path) -> Result<()> {\n    // XXX: consider making it a part of config.json\n    let signalfile = pgdata_path.join(\"standby.signal\");\n\n    if !signalfile.exists() {\n        File::create(signalfile)?;\n        info!(\"created standby.signal\");\n    } else {\n        info!(\"reused pre-existing standby.signal\");\n    }\n    Ok(())\n}\n\n#[instrument(skip_all)]\npub async fn handle_neon_extension_upgrade(client: &mut Client) -> Result<()> {\n    let query = \"ALTER EXTENSION neon UPDATE\";\n    info!(\"update neon extension version with query: {}\", query);\n    client.simple_query(query).await?;\n\n    Ok(())\n}\n\n#[instrument(skip_all)]\npub async fn handle_migrations(\n    params: ComputeNodeParams,\n    client: &mut Client,\n    lakebase_mode: bool,\n) -> Result<()> {\n    info!(\"handle migrations\");\n\n    // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n    // !BE SURE TO ONLY ADD MIGRATIONS TO THE END OF THIS ARRAY. IF YOU DO NOT, VERY VERY BAD THINGS MAY HAPPEN!\n    // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n\n    // Add new migrations in numerical order.\n    let migrations = [\n        &format!(\n            include_str!(\"./migrations/0001-add_bypass_rls_to_privileged_role.sql\"),\n            privileged_role_name = params.privileged_role_name\n        ),\n        &format!(\n            include_str!(\"./migrations/0002-alter_roles.sql\"),\n            privileged_role_name = params.privileged_role_name\n        ),\n        &format!(\n            include_str!(\"./migrations/0003-grant_pg_create_subscription_to_privileged_role.sql\"),\n            privileged_role_name = params.privileged_role_name\n        ),\n        &format!(\n            include_str!(\"./migrations/0004-grant_pg_monitor_to_privileged_role.sql\"),\n            privileged_role_name = params.privileged_role_name\n        ),\n        &format!(\n            include_str!(\"./migrations/0005-grant_all_on_tables_to_privileged_role.sql\"),\n            privileged_role_name = params.privileged_role_name\n        ),\n        &format!(\n            include_str!(\"./migrations/0006-grant_all_on_sequences_to_privileged_role.sql\"),\n            privileged_role_name = params.privileged_role_name\n        ),\n        &format!(\n            include_str!(\n                \"./migrations/0007-grant_all_on_tables_with_grant_option_to_privileged_role.sql\"\n            ),\n            privileged_role_name = params.privileged_role_name\n        ),\n        &format!(\n            include_str!(\n                \"./migrations/0008-grant_all_on_sequences_with_grant_option_to_privileged_role.sql\"\n            ),\n            privileged_role_name = params.privileged_role_name\n        ),\n        include_str!(\"./migrations/0009-revoke_replication_for_previously_allowed_roles.sql\"),\n        &format!(\n            include_str!(\n                \"./migrations/0010-grant_snapshot_synchronization_funcs_to_privileged_role.sql\"\n            ),\n            privileged_role_name = params.privileged_role_name\n        ),\n        &format!(\n            include_str!(\n                \"./migrations/0011-grant_pg_show_replication_origin_status_to_privileged_role.sql\"\n            ),\n            privileged_role_name = params.privileged_role_name\n        ),\n        &format!(\n            include_str!(\"./migrations/0012-grant_pg_signal_backend_to_privileged_role.sql\"),\n            privileged_role_name = params.privileged_role_name\n        ),\n    ];\n\n    MigrationRunner::new(client, &migrations, lakebase_mode)\n        .run_migrations()\n        .await?;\n\n    Ok(())\n}\n"
  },
  {
    "path": "compute_tools/src/spec_apply.rs",
    "content": "use std::collections::{HashMap, HashSet};\nuse std::fmt::{Debug, Formatter};\nuse std::future::Future;\nuse std::iter::{empty, once};\nuse std::sync::Arc;\n\nuse anyhow::{Context, Result};\nuse compute_api::responses::ComputeStatus;\nuse compute_api::spec::{ComputeAudit, ComputeSpec, Database, PgIdent, Role};\nuse futures::future::join_all;\nuse tokio::sync::RwLock;\nuse tokio_postgres::Client;\nuse tokio_postgres::error::SqlState;\nuse tracing::{Instrument, debug, error, info, info_span, instrument, warn};\n\nuse crate::compute::{ComputeNode, ComputeNodeParams, ComputeState, create_databricks_roles};\nuse crate::hadron_metrics::COMPUTE_CONFIGURE_STATEMENT_TIMEOUT_ERRORS;\nuse crate::pg_helpers::{\n    DatabaseExt, Escaping, GenericOptionsSearch, RoleExt, get_existing_dbs_async,\n    get_existing_roles_async,\n};\nuse crate::spec_apply::ApplySpecPhase::{\n    AddDatabricksGrants, AlterDatabricksRoles, CreateAndAlterDatabases, CreateAndAlterRoles,\n    CreateAvailabilityCheck, CreateDatabricksMisc, CreateDatabricksRoles, CreatePgauditExtension,\n    CreatePgauditlogtofileExtension, CreatePrivilegedRole, CreateSchemaNeon,\n    DisablePostgresDBPgAudit, DropInvalidDatabases, DropRoles, FinalizeDropLogicalSubscriptions,\n    HandleDatabricksAuthExtension, HandleNeonExtension, HandleOtherExtensions,\n    RenameAndDeleteDatabases, RenameRoles, RunInEachDatabase,\n};\nuse crate::spec_apply::PerDatabasePhase::{\n    ChangeSchemaPerms, DeleteDBRoleReferences, DropLogicalSubscriptions,\n};\n\nimpl ComputeNode {\n    /// Apply the spec to the running PostgreSQL instance.\n    /// The caller can decide to run with multiple clients in parallel, or\n    /// single mode.  Either way, the commands executed will be the same, and\n    /// only commands run in different databases are parallelized.\n    #[instrument(skip_all)]\n    pub fn apply_spec_sql(\n        &self,\n        spec: Arc<ComputeSpec>,\n        conf: Arc<tokio_postgres::Config>,\n        concurrency: usize,\n    ) -> Result<()> {\n        info!(\"Applying config with max {} concurrency\", concurrency);\n        debug!(\"Config: {:?}\", spec);\n\n        let rt = tokio::runtime::Handle::current();\n        rt.block_on(async {\n            // Proceed with post-startup configuration. Note, that order of operations is important.\n            let client = Self::get_maintenance_client(&conf).await?;\n            let spec = spec.clone();\n            let params = Arc::new(self.params.clone());\n\n            let databases = get_existing_dbs_async(&client).await?;\n            let roles = get_existing_roles_async(&client)\n                .await?\n                .into_iter()\n                .map(|role| (role.name.clone(), role))\n                .collect::<HashMap<String, Role>>();\n\n            // Check if we need to drop subscriptions before starting the endpoint.\n            //\n            // It is important to do this operation exactly once when endpoint starts on a new branch.\n            // Otherwise, we may drop not inherited, but newly created subscriptions.\n            //\n            // We cannot rely only on spec.drop_subscriptions_before_start flag,\n            // because if for some reason compute restarts inside VM,\n            // it will start again with the same spec and flag value.\n            //\n            // To handle this, we save the fact of the operation in the database\n            // in the neon.drop_subscriptions_done table.\n            // If the table does not exist, we assume that the operation was never performed, so we must do it.\n            // If table exists, we check if the operation was performed on the current timelilne.\n            //\n            let mut drop_subscriptions_done = false;\n\n            if spec.drop_subscriptions_before_start {\n                let timeline_id = self.get_timeline_id().context(\"timeline_id must be set\")?;\n\n                info!(\"Checking if drop subscription operation was already performed for timeline_id: {}\", timeline_id);\n\n                drop_subscriptions_done = match\n                    client.query(\"select 1 from neon.drop_subscriptions_done where timeline_id OPERATOR(pg_catalog.=) $1\", &[&timeline_id.to_string()]).await {\n                    Ok(result) => !result.is_empty(),\n                    Err(e) =>\n                    {\n                        match e.code() {\n                            Some(&SqlState::UNDEFINED_TABLE) => false,\n                            _ => {\n                                // We don't expect any other error here, except for the schema/table not existing\n                                error!(\"Error checking if drop subscription operation was already performed: {}\", e);\n                                return Err(e.into());\n                            }\n                        }\n                    }\n                }\n            };\n\n\n            let jwks_roles = Arc::new(\n                spec.as_ref()\n                    .local_proxy_config\n                    .iter()\n                    .flat_map(|it| &it.jwks)\n                    .flatten()\n                    .flat_map(|setting| &setting.role_names)\n                    .cloned()\n                    .collect::<HashSet<_>>(),\n            );\n\n            let ctx = Arc::new(tokio::sync::RwLock::new(MutableApplyContext {\n                roles,\n                dbs: databases,\n            }));\n\n            // Apply special pre drop database phase.\n            // NOTE: we use the code of RunInEachDatabase phase for parallelism\n            // and connection management, but we don't really run it in *each* database,\n            // only in databases, we're about to drop.\n            info!(\"Applying PerDatabase (pre-dropdb) phase\");\n            let concurrency_token = Arc::new(tokio::sync::Semaphore::new(concurrency));\n\n            // Run the phase for each database that we're about to drop.\n            let db_processes = spec\n                .delta_operations\n                .iter()\n                .flatten()\n                .filter_map(move |op| {\n                    if op.action.as_str() == \"delete_db\" {\n                        Some(op.name.clone())\n                    } else {\n                        None\n                    }\n                })\n                .map(|dbname| {\n                    let spec = spec.clone();\n                    let ctx = ctx.clone();\n                    let jwks_roles = jwks_roles.clone();\n                    let mut conf = conf.as_ref().clone();\n                    let concurrency_token = concurrency_token.clone();\n                    // We only need dbname field for this phase, so set other fields to dummy values\n                    let db = DB::UserDB(Database {\n                        name: dbname.clone(),\n                        owner: \"cloud_admin\".to_string(),\n                        options: None,\n                        restrict_conn: false,\n                        invalid: false,\n                    });\n\n                    debug!(\"Applying per-database phases for Database {:?}\", &db);\n\n                    match &db {\n                        DB::SystemDB => {}\n                        DB::UserDB(db) => {\n                            conf.dbname(db.name.as_str());\n                        }\n                    }\n\n                    let conf = Arc::new(conf);\n                    let fut = Self::apply_spec_sql_db(\n                        params.clone(),\n                        spec.clone(),\n                        conf,\n                        ctx.clone(),\n                        jwks_roles.clone(),\n                        concurrency_token.clone(),\n                        db,\n                        [DropLogicalSubscriptions].to_vec(),\n                        self.params.lakebase_mode,\n                    );\n\n                    Ok(tokio::spawn(fut))\n                })\n                .collect::<Vec<Result<_, anyhow::Error>>>();\n\n            for process in db_processes.into_iter() {\n                let handle = process?;\n                if let Err(e) = handle.await? {\n                    // Handle the error case where the database does not exist\n                    // We do not check whether the DB exists or not in the deletion phase,\n                    // so we shouldn't be strict about it in pre-deletion cleanup as well.\n                    if e.to_string().contains(\"does not exist\") {\n                        warn!(\"Error dropping subscription: {}\", e);\n                    } else {\n                        return Err(e);\n                    }\n                };\n            }\n\n            let phases = if self.params.lakebase_mode {\n                vec![\n                    CreatePrivilegedRole,\n                // BEGIN_HADRON\n                CreateDatabricksRoles,\n                AlterDatabricksRoles,\n                // END_HADRON\n                DropInvalidDatabases,\n                RenameRoles,\n                CreateAndAlterRoles,\n                RenameAndDeleteDatabases,\n                CreateAndAlterDatabases,\n                CreateSchemaNeon,\n            ]\n            } else {\n                vec![\n                    CreatePrivilegedRole,\n                DropInvalidDatabases,\n                RenameRoles,\n                CreateAndAlterRoles,\n                RenameAndDeleteDatabases,\n                CreateAndAlterDatabases,\n                CreateSchemaNeon,\n            ]\n            };\n\n            for phase in phases {\n                info!(\"Applying phase {:?}\", &phase);\n                apply_operations(\n                    params.clone(),\n                    spec.clone(),\n                    ctx.clone(),\n                    jwks_roles.clone(),\n                    phase,\n                    || async { Ok(&client) },\n                    self.params.lakebase_mode,\n                )\n                .await?;\n            }\n\n            info!(\"Applying RunInEachDatabase2 phase\");\n            let concurrency_token = Arc::new(tokio::sync::Semaphore::new(concurrency));\n\n            let db_processes = spec\n                .cluster\n                .databases\n                .iter()\n                .map(|db| DB::new(db.clone()))\n                // include\n                .chain(once(DB::SystemDB))\n                .map(|db| {\n                    let spec = spec.clone();\n                    let ctx = ctx.clone();\n                    let jwks_roles = jwks_roles.clone();\n                    let mut conf = conf.as_ref().clone();\n                    let concurrency_token = concurrency_token.clone();\n                    let db = db.clone();\n\n                    debug!(\"Applying per-database phases for Database {:?}\", &db);\n\n                    match &db {\n                        DB::SystemDB => {}\n                        DB::UserDB(db) => {\n                            conf.dbname(db.name.as_str());\n                        }\n                    }\n\n                    let conf = Arc::new(conf);\n                    let mut phases = vec![\n                        DeleteDBRoleReferences,\n                        ChangeSchemaPerms,\n                    ];\n\n                    if spec.drop_subscriptions_before_start && !drop_subscriptions_done {\n                        info!(\"Adding DropLogicalSubscriptions phase because drop_subscriptions_before_start is set\");\n                        phases.push(DropLogicalSubscriptions);\n                    }\n\n                    let fut = Self::apply_spec_sql_db(\n                        params.clone(),\n                        spec.clone(),\n                        conf,\n                        ctx.clone(),\n                        jwks_roles.clone(),\n                        concurrency_token.clone(),\n                        db,\n                        phases,\n                        self.params.lakebase_mode,\n                    );\n\n                    Ok(tokio::spawn(fut))\n                })\n                .collect::<Vec<Result<_, anyhow::Error>>>();\n\n            for process in db_processes.into_iter() {\n                let handle = process?;\n                handle.await??;\n            }\n\n            let mut phases = if self.params.lakebase_mode {\n                vec![\n                HandleOtherExtensions,\n                HandleNeonExtension, // This step depends on CreateSchemaNeon\n                // BEGIN_HADRON\n                HandleDatabricksAuthExtension,\n                // END_HADRON\n                CreateAvailabilityCheck,\n                DropRoles,\n                // BEGIN_HADRON\n                AddDatabricksGrants,\n                CreateDatabricksMisc,\n                // END_HADRON\n            ]\n            } else {\n                vec![\n                HandleOtherExtensions,\n                HandleNeonExtension, // This step depends on CreateSchemaNeon\n                CreateAvailabilityCheck,\n                DropRoles,\n            ]\n            };\n\n            // This step depends on CreateSchemaNeon\n            if spec.drop_subscriptions_before_start && !drop_subscriptions_done {\n                info!(\"Adding FinalizeDropLogicalSubscriptions phase because drop_subscriptions_before_start is set\");\n                phases.push(FinalizeDropLogicalSubscriptions);\n            }\n\n            // Keep DisablePostgresDBPgAudit phase at the end,\n            // so that all config operations are audit logged.\n            match spec.audit_log_level\n            {\n                ComputeAudit::Hipaa | ComputeAudit::Extended | ComputeAudit::Full => {\n                    phases.push(CreatePgauditExtension);\n                    phases.push(CreatePgauditlogtofileExtension);\n                    phases.push(DisablePostgresDBPgAudit);\n                }\n                ComputeAudit::Log | ComputeAudit::Base => {\n                    phases.push(CreatePgauditExtension);\n                    phases.push(DisablePostgresDBPgAudit);\n                }\n                ComputeAudit::Disabled => {}\n            }\n\n            for phase in phases {\n                debug!(\"Applying phase {:?}\", &phase);\n                apply_operations(\n                    params.clone(),\n                    spec.clone(),\n                    ctx.clone(),\n                    jwks_roles.clone(),\n                    phase,\n                    || async { Ok(&client) },\n                    self.params.lakebase_mode,\n                )\n                .await?;\n            }\n\n            Ok::<(), anyhow::Error>(())\n        })?;\n\n        Ok(())\n    }\n\n    /// Apply SQL migrations of the RunInEachDatabase phase.\n    ///\n    /// May opt to not connect to databases that don't have any scheduled\n    /// operations.  The function is concurrency-controlled with the provided\n    /// semaphore.  The caller has to make sure the semaphore isn't exhausted.\n    #[allow(clippy::too_many_arguments)] // TODO: needs bigger refactoring\n    async fn apply_spec_sql_db(\n        params: Arc<ComputeNodeParams>,\n        spec: Arc<ComputeSpec>,\n        conf: Arc<tokio_postgres::Config>,\n        ctx: Arc<tokio::sync::RwLock<MutableApplyContext>>,\n        jwks_roles: Arc<HashSet<String>>,\n        concurrency_token: Arc<tokio::sync::Semaphore>,\n        db: DB,\n        subphases: Vec<PerDatabasePhase>,\n        lakebase_mode: bool,\n    ) -> Result<()> {\n        let _permit = concurrency_token.acquire().await?;\n\n        let mut client_conn = None;\n\n        for subphase in subphases {\n            apply_operations(\n                params.clone(),\n                spec.clone(),\n                ctx.clone(),\n                jwks_roles.clone(),\n                RunInEachDatabase {\n                    db: db.clone(),\n                    subphase,\n                },\n                // Only connect if apply_operation actually wants a connection.\n                // It's quite possible this database doesn't need any queries,\n                // so by not connecting we save time and effort connecting to\n                // that database.\n                || async {\n                    if client_conn.is_none() {\n                        let db_client = Self::get_maintenance_client(&conf).await?;\n                        client_conn.replace(db_client);\n                    }\n                    let client = client_conn.as_ref().unwrap();\n                    Ok(client)\n                },\n                lakebase_mode,\n            )\n            .await?;\n        }\n\n        drop(client_conn);\n\n        Ok::<(), anyhow::Error>(())\n    }\n\n    /// Choose how many concurrent connections to use for applying the spec changes.\n    pub fn max_service_connections(\n        &self,\n        compute_state: &ComputeState,\n        spec: &ComputeSpec,\n    ) -> usize {\n        // If the cluster is in Init state we don't have to deal with user connections,\n        // and can thus use all `max_connections` connection slots. However, that's generally not\n        // very efficient, so we generally still limit it to a smaller number.\n        if compute_state.status == ComputeStatus::Init {\n            // If the settings contain 'max_connections', use that as template\n            if let Some(config) = spec.cluster.settings.find(\"max_connections\") {\n                config.parse::<usize>().ok()\n            } else {\n                // Otherwise, try to find the setting in the postgresql_conf string\n                spec.cluster\n                    .postgresql_conf\n                    .iter()\n                    .flat_map(|conf| conf.split(\"\\n\"))\n                    .filter_map(|line| {\n                        if !line.contains(\"max_connections\") {\n                            return None;\n                        }\n\n                        let (key, value) = line.split_once(\"=\")?;\n                        let key = key\n                            .trim_start_matches(char::is_whitespace)\n                            .trim_end_matches(char::is_whitespace);\n\n                        let value = value\n                            .trim_start_matches(char::is_whitespace)\n                            .trim_end_matches(char::is_whitespace);\n\n                        if key != \"max_connections\" {\n                            return None;\n                        }\n\n                        value.parse::<usize>().ok()\n                    })\n                    .next()\n            }\n            // If max_connections is present, use at most 1/3rd of that.\n            // When max_connections is lower than 30, try to use at least 10 connections, but\n            // never more than max_connections.\n            .map(|limit| match limit {\n                0..10 => limit,\n                10..30 => 10,\n                30..300 => limit / 3,\n                300.. => 100,\n            })\n            // If we didn't find max_connections, default to 10 concurrent connections.\n            .unwrap_or(10)\n        } else {\n            // state == Running\n            // Because the cluster is already in the Running state, we should assume users are\n            // already connected to the cluster, and high concurrency could negatively\n            // impact user connectivity. Therefore, we can limit concurrency to the number of\n            // reserved superuser connections, which users wouldn't be able to use anyway.\n            spec.cluster\n                .settings\n                .find(\"superuser_reserved_connections\")\n                .iter()\n                .filter_map(|val| val.parse::<usize>().ok())\n                .map(|val| if val > 1 { val - 1 } else { 1 })\n                .next_back()\n                .unwrap_or(3)\n        }\n    }\n}\n\n#[derive(Clone)]\npub enum DB {\n    SystemDB,\n    UserDB(Database),\n}\n\nimpl DB {\n    pub fn new(db: Database) -> DB {\n        Self::UserDB(db)\n    }\n\n    pub fn is_owned_by(&self, role: &PgIdent) -> bool {\n        match self {\n            DB::SystemDB => false,\n            DB::UserDB(db) => &db.owner == role,\n        }\n    }\n}\n\nimpl Debug for DB {\n    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {\n        match self {\n            DB::SystemDB => f.debug_tuple(\"SystemDB\").finish(),\n            DB::UserDB(db) => f.debug_tuple(\"UserDB\").field(&db.name).finish(),\n        }\n    }\n}\n\n#[derive(Copy, Clone, Debug)]\npub enum PerDatabasePhase {\n    DeleteDBRoleReferences,\n    ChangeSchemaPerms,\n    /// This is a shared phase, used for both i) dropping dangling LR subscriptions\n    /// before dropping the DB, and ii) dropping all subscriptions after creating\n    /// a fresh branch.\n    /// N.B. we will skip all DBs that are not present in Postgres, invalid, or\n    /// have `datallowconn = false` (`restrict_conn`).\n    DropLogicalSubscriptions,\n}\n\n#[derive(Clone, Debug)]\npub enum ApplySpecPhase {\n    CreatePrivilegedRole,\n    // BEGIN_HADRON\n    CreateDatabricksRoles,\n    AlterDatabricksRoles,\n    // END_HADRON\n    DropInvalidDatabases,\n    RenameRoles,\n    CreateAndAlterRoles,\n    RenameAndDeleteDatabases,\n    CreateAndAlterDatabases,\n    CreateSchemaNeon,\n    RunInEachDatabase { db: DB, subphase: PerDatabasePhase },\n    CreatePgauditExtension,\n    CreatePgauditlogtofileExtension,\n    DisablePostgresDBPgAudit,\n    HandleOtherExtensions,\n    HandleNeonExtension,\n    // BEGIN_HADRON\n    HandleDatabricksAuthExtension,\n    // END_HADRON\n    CreateAvailabilityCheck,\n    // BEGIN_HADRON\n    AddDatabricksGrants,\n    CreateDatabricksMisc,\n    // END_HADRON\n    DropRoles,\n    FinalizeDropLogicalSubscriptions,\n}\n\npub struct Operation {\n    pub query: String,\n    pub comment: Option<String>,\n}\n\npub struct MutableApplyContext {\n    pub roles: HashMap<String, Role>,\n    pub dbs: HashMap<String, Database>,\n}\n\n/// Apply the operations that belong to the given spec apply phase.\n///\n/// Commands within a single phase are executed in order of Iterator yield.\n/// Commands of ApplySpecPhase::RunInEachDatabase will execute in the database\n/// indicated by its `db` field, and can share a single client for all changes\n/// to that database.\n///\n/// Notes:\n/// - Commands are pipelined, and thus may cause incomplete apply if one\n///   command of many fails.\n/// - Failing commands will fail the phase's apply step once the return value\n///   is processed.\n/// - No timeouts have (yet) been implemented.\n/// - The caller is responsible for limiting and/or applying concurrency.\npub async fn apply_operations<'a, Fut, F>(\n    params: Arc<ComputeNodeParams>,\n    spec: Arc<ComputeSpec>,\n    ctx: Arc<RwLock<MutableApplyContext>>,\n    jwks_roles: Arc<HashSet<String>>,\n    apply_spec_phase: ApplySpecPhase,\n    client: F,\n    lakebase_mode: bool,\n) -> Result<()>\nwhere\n    F: FnOnce() -> Fut,\n    Fut: Future<Output = Result<&'a Client>>,\n{\n    debug!(\"Starting phase {:?}\", &apply_spec_phase);\n    let span = info_span!(\"db_apply_changes\", phase=?apply_spec_phase);\n    let span2 = span.clone();\n    async move {\n        debug!(\"Processing phase {:?}\", &apply_spec_phase);\n        let ctx = ctx;\n\n        let mut ops = get_operations(&params, &spec, &ctx, &jwks_roles, &apply_spec_phase)\n            .await?\n            .peekable();\n\n        // Return (and by doing so, skip requesting the PostgreSQL client) if\n        // we don't have any operations scheduled.\n        if ops.peek().is_none() {\n            return Ok(());\n        }\n\n        let client = client().await?;\n\n        debug!(\"Applying phase {:?}\", &apply_spec_phase);\n\n        let active_queries = ops\n            .map(|op| {\n                let Operation { comment, query } = op;\n                let inspan = match comment {\n                    None => span.clone(),\n                    Some(comment) => info_span!(\"phase {}: {}\", comment),\n                };\n\n                async {\n                    let query = query;\n                    let res = client.simple_query(&query).await;\n                    debug!(\n                        \"{} {}\",\n                        if res.is_ok() {\n                            \"successfully executed\"\n                        } else {\n                            \"failed to execute\"\n                        },\n                        query\n                    );\n                    if !lakebase_mode {\n                        return res;\n                    }\n                    // BEGIN HADRON\n                    if let Err(e) = res.as_ref() {\n                        if let Some(sql_state) = e.code() {\n                            if sql_state.code() == \"57014\" {\n                                // SQL State 57014 (ERRCODE_QUERY_CANCELED) is used for statement timeouts.\n                                // Increment the counter whenever a statement timeout occurs. Timeouts on\n                                // this configuration path can only occur due to PS connectivity problems that\n                                // Postgres failed to recover from.\n                                COMPUTE_CONFIGURE_STATEMENT_TIMEOUT_ERRORS.inc();\n                            }\n                        }\n                    }\n                    // END HADRON\n\n                    res\n                }\n                .instrument(inspan)\n            })\n            .collect::<Vec<_>>();\n\n        drop(ctx);\n\n        for it in join_all(active_queries).await {\n            drop(it?);\n        }\n\n        debug!(\"Completed phase {:?}\", &apply_spec_phase);\n\n        Ok(())\n    }\n    .instrument(span2)\n    .await\n}\n\n/// Create a stream of operations to be executed for that phase of applying\n/// changes.\n///\n/// In the future we may generate a single stream of changes and then\n/// sort/merge/batch execution, but for now this is a nice way to improve\n/// batching behavior of the commands.\nasync fn get_operations<'a>(\n    params: &'a ComputeNodeParams,\n    spec: &'a ComputeSpec,\n    ctx: &'a RwLock<MutableApplyContext>,\n    jwks_roles: &'a HashSet<String>,\n    apply_spec_phase: &'a ApplySpecPhase,\n) -> Result<Box<dyn Iterator<Item = Operation> + 'a + Send>> {\n    match apply_spec_phase {\n        ApplySpecPhase::CreatePrivilegedRole => Ok(Box::new(once(Operation {\n            query: format!(\n                include_str!(\"sql/create_privileged_role.sql\"),\n                privileged_role_name = params.privileged_role_name,\n                privileges = if params.lakebase_mode {\n                    \"CREATEDB CREATEROLE NOLOGIN BYPASSRLS\"\n                } else {\n                    \"CREATEDB CREATEROLE NOLOGIN REPLICATION BYPASSRLS\"\n                }\n            ),\n            comment: None,\n        }))),\n        // BEGIN_HADRON\n        // New Hadron phase\n        ApplySpecPhase::CreateDatabricksRoles => {\n            let queries = create_databricks_roles();\n            let operations = queries.into_iter().map(|query| Operation {\n                query,\n                comment: None,\n            });\n            Ok(Box::new(operations))\n        }\n\n        // Backfill existing databricks_reader_* roles with statement timeout from GUC\n        ApplySpecPhase::AlterDatabricksRoles => {\n            let query = String::from(include_str!(\n                \"sql/alter_databricks_reader_roles_timeout.sql\"\n            ));\n\n            let operations = once(Operation {\n                query,\n                comment: Some(\n                    \"Backfill existing databricks_reader_* roles with statement timeout\"\n                        .to_string(),\n                ),\n            });\n\n            Ok(Box::new(operations))\n        }\n        // End of new Hadron Phase\n        // END_HADRON\n        ApplySpecPhase::DropInvalidDatabases => {\n            let mut ctx = ctx.write().await;\n            let databases = &mut ctx.dbs;\n\n            let keys: Vec<_> = databases\n                .iter()\n                .filter(|(_, db)| db.invalid)\n                .map(|(dbname, _)| dbname.clone())\n                .collect();\n\n            // After recent commit in Postgres, interrupted DROP DATABASE\n            // leaves the database in the invalid state. According to the\n            // commit message, the only option for user is to drop it again.\n            // See:\n            //   https://github.com/postgres/postgres/commit/a4b4cc1d60f7e8ccfcc8ff8cb80c28ee411ad9a9\n            //\n            // Postgres Neon extension is done the way, that db is de-registered\n            // in the control plane metadata only after it is dropped. So there is\n            // a chance that it still thinks that the db should exist. This means\n            // that it will be re-created by the `CreateDatabases` phase. This\n            // is fine, as user can just drop the table again (in vanilla\n            // Postgres they would need to do the same).\n            let operations = keys\n                .into_iter()\n                .filter_map(move |dbname| ctx.dbs.remove(&dbname))\n                .map(|db| Operation {\n                    query: format!(\"DROP DATABASE IF EXISTS {}\", db.name.pg_quote()),\n                    comment: Some(format!(\"Dropping invalid database {}\", db.name)),\n                });\n\n            Ok(Box::new(operations))\n        }\n        ApplySpecPhase::RenameRoles => {\n            let mut ctx = ctx.write().await;\n\n            let operations = spec\n                .delta_operations\n                .iter()\n                .flatten()\n                .filter(|op| op.action == \"rename_role\")\n                .filter_map(move |op| {\n                    let roles = &mut ctx.roles;\n\n                    if roles.contains_key(op.name.as_str()) {\n                        None\n                    } else {\n                        let new_name = op.new_name.as_ref().unwrap();\n                        let mut role = roles.remove(op.name.as_str()).unwrap();\n\n                        role.name = new_name.clone();\n                        role.encrypted_password = None;\n                        roles.insert(role.name.clone(), role);\n\n                        Some(Operation {\n                            query: format!(\n                                \"ALTER ROLE {} RENAME TO {}\",\n                                op.name.pg_quote(),\n                                new_name.pg_quote()\n                            ),\n                            comment: Some(format!(\"renaming role '{}' to '{}'\", op.name, new_name)),\n                        })\n                    }\n                });\n\n            Ok(Box::new(operations))\n        }\n        ApplySpecPhase::CreateAndAlterRoles => {\n            let mut ctx = ctx.write().await;\n\n            let operations = spec.cluster.roles\n                .iter()\n                .filter_map(move |role| {\n                    let roles = &mut ctx.roles;\n                    let db_role = roles.get(&role.name);\n\n                    match db_role {\n                        Some(db_role) => {\n                            if db_role.encrypted_password != role.encrypted_password {\n                                // This can be run on /every/ role! Not just ones created through the console.\n                                // This means that if you add some funny ALTER here that adds a permission,\n                                // this will get run even on user-created roles! This will result in different\n                                // behavior before and after a spec gets reapplied. The below ALTER as it stands\n                                // now only grants LOGIN and changes the password. Please do not allow this branch\n                                // to do anything silly.\n                                Some(Operation {\n                                    query: format!(\n                                        \"ALTER ROLE {} {}\",\n                                        role.name.pg_quote(),\n                                        role.to_pg_options(),\n                                    ),\n                                    comment: None,\n                                })\n                            } else {\n                                None\n                            }\n                        }\n                        None => {\n                            let query = if !jwks_roles.contains(role.name.as_str()) {\n                                format!(\n                                    \"CREATE ROLE {} INHERIT CREATEROLE CREATEDB BYPASSRLS REPLICATION IN ROLE {} {}\",\n                                    role.name.pg_quote(),\n                                    params.privileged_role_name,\n                                    role.to_pg_options(),\n                                )\n                            } else {\n                                format!(\n                                    \"CREATE ROLE {} {}\",\n                                    role.name.pg_quote(),\n                                    role.to_pg_options(),\n                                )\n                            };\n                            Some(Operation {\n                                query,\n                                comment: Some(format!(\"creating role {}\", role.name)),\n                            })\n                        }\n                    }\n                });\n\n            Ok(Box::new(operations))\n        }\n        ApplySpecPhase::RenameAndDeleteDatabases => {\n            let mut ctx = ctx.write().await;\n\n            let operations = spec\n                .delta_operations\n                .iter()\n                .flatten()\n                .filter_map(move |op| {\n                    let databases = &mut ctx.dbs;\n                    match op.action.as_str() {\n                        // We do not check whether the DB exists or not,\n                        // Postgres will take care of it for us\n                        \"delete_db\" => {\n                            let (db_name, outer_tag) = op.name.pg_quote_dollar();\n                            // In Postgres we can't drop a database if it is a template.\n                            // So we need to unset the template flag first, but it could\n                            // be a retry, so we could've already dropped the database.\n                            // Check that database exists first to make it idempotent.\n                            let unset_template_query: String = format!(\n                                include_str!(\"sql/unset_template_for_drop_dbs.sql\"),\n                                datname = db_name,\n                                outer_tag = outer_tag,\n                            );\n\n                            // Use FORCE to drop database even if there are active connections.\n                            // We run this from `cloud_admin`, so it should have enough privileges.\n                            //\n                            // NB: there could be other db states, which prevent us from dropping\n                            // the database. For example, if db is used by any active subscription\n                            // or replication slot.\n                            // Such cases are handled in the DropLogicalSubscriptions\n                            // phase. We do all the cleanup before actually dropping the database.\n                            let drop_db_query: String = format!(\n                                \"DROP DATABASE IF EXISTS {} WITH (FORCE)\",\n                                &op.name.pg_quote()\n                            );\n\n                            databases.remove(&op.name);\n\n                            Some(vec![\n                                Operation {\n                                    query: unset_template_query,\n                                    comment: Some(format!(\n                                        \"optionally clearing template flags for DB {}\",\n                                        op.name,\n                                    )),\n                                },\n                                Operation {\n                                    query: drop_db_query,\n                                    comment: Some(format!(\"deleting database {}\", op.name,)),\n                                },\n                            ])\n                        }\n                        \"rename_db\" => {\n                            if let Some(mut db) = databases.remove(&op.name) {\n                                // update state of known databases\n                                let new_name = op.new_name.as_ref().unwrap();\n                                db.name = new_name.clone();\n                                databases.insert(db.name.clone(), db);\n\n                                Some(vec![Operation {\n                                    query: format!(\n                                        \"ALTER DATABASE {} RENAME TO {}\",\n                                        op.name.pg_quote(),\n                                        new_name.pg_quote(),\n                                    ),\n                                    comment: Some(format!(\n                                        \"renaming database '{}' to '{}'\",\n                                        op.name, new_name\n                                    )),\n                                }])\n                            } else {\n                                None\n                            }\n                        }\n                        _ => None,\n                    }\n                })\n                .flatten();\n\n            Ok(Box::new(operations))\n        }\n        ApplySpecPhase::CreateAndAlterDatabases => {\n            let mut ctx = ctx.write().await;\n\n            let operations = spec\n                .cluster\n                .databases\n                .iter()\n                .filter_map(move |db| {\n                    let databases = &mut ctx.dbs;\n                    if let Some(edb) = databases.get_mut(&db.name) {\n                        let change_owner = if edb.owner.starts_with('\"') {\n                            db.owner.pg_quote() != edb.owner\n                        } else {\n                            db.owner != edb.owner\n                        };\n\n                        edb.owner = db.owner.clone();\n\n                        if change_owner {\n                            Some(vec![Operation {\n                                query: format!(\n                                    \"ALTER DATABASE {} OWNER TO {}\",\n                                    db.name.pg_quote(),\n                                    db.owner.pg_quote()\n                                ),\n                                comment: Some(format!(\n                                    \"changing database owner of database {} to {}\",\n                                    db.name, db.owner\n                                )),\n                            }])\n                        } else {\n                            None\n                        }\n                    } else {\n                        databases.insert(db.name.clone(), db.clone());\n\n                        Some(vec![\n                            Operation {\n                                query: format!(\n                                    \"CREATE DATABASE {} {}\",\n                                    db.name.pg_quote(),\n                                    db.to_pg_options(),\n                                ),\n                                comment: None,\n                            },\n                            Operation {\n                                // ALL PRIVILEGES grants CREATE, CONNECT, and TEMPORARY on the database\n                                // (see https://www.postgresql.org/docs/current/ddl-priv.html)\n                                query: format!(\n                                    \"GRANT ALL PRIVILEGES ON DATABASE {} TO {}\",\n                                    db.name.pg_quote(),\n                                    params.privileged_role_name\n                                ),\n                                comment: None,\n                            },\n                        ])\n                    }\n                })\n                .flatten();\n\n            Ok(Box::new(operations))\n        }\n        ApplySpecPhase::CreateSchemaNeon => Ok(Box::new(once(Operation {\n            query: String::from(\"CREATE SCHEMA IF NOT EXISTS neon\"),\n            comment: Some(String::from(\n                \"create schema for neon extension and utils tables\",\n            )),\n        }))),\n        ApplySpecPhase::RunInEachDatabase { db, subphase } => {\n            // Do some checks that user DB exists and we can access it.\n            //\n            // During the phases like DropLogicalSubscriptions, DeleteDBRoleReferences,\n            // which happen before dropping the DB, the current run could be a retry,\n            // so it's a valid case when DB is absent already. The case of\n            // `pg_database.datallowconn = false`/`restrict_conn` is a bit tricky, as\n            // in theory user can have some dangling objects there, so we will fail at\n            // the actual drop later. Yet, to fix that in the current code we would need\n            // to ALTER DATABASE, and then check back, but that even more invasive, so\n            // that's not what we really want to do here.\n            //\n            // For ChangeSchemaPerms, skipping DBs we cannot access is totally fine.\n            if let DB::UserDB(db) = db {\n                let databases = &ctx.read().await.dbs;\n\n                let edb = match databases.get(&db.name) {\n                    Some(edb) => edb,\n                    None => {\n                        warn!(\n                            \"skipping RunInEachDatabase phase {:?}, database {} doesn't exist in PostgreSQL\",\n                            subphase, db.name\n                        );\n                        return Ok(Box::new(empty()));\n                    }\n                };\n\n                if edb.restrict_conn || edb.invalid {\n                    warn!(\n                        \"skipping RunInEachDatabase phase {:?}, database {} is (restrict_conn={}, invalid={})\",\n                        subphase, db.name, edb.restrict_conn, edb.invalid\n                    );\n                    return Ok(Box::new(empty()));\n                }\n            }\n\n            match subphase {\n                PerDatabasePhase::DropLogicalSubscriptions => {\n                    match &db {\n                        DB::UserDB(db) => {\n                            let (db_name, outer_tag) = db.name.pg_quote_dollar();\n                            let drop_subscription_query: String = format!(\n                                include_str!(\"sql/drop_subscriptions.sql\"),\n                                datname_str = db_name,\n                                outer_tag = outer_tag,\n                            );\n\n                            let operations = vec![Operation {\n                                query: drop_subscription_query,\n                                comment: Some(format!(\n                                    \"optionally dropping subscriptions for DB {}\",\n                                    db.name,\n                                )),\n                            }]\n                            .into_iter();\n\n                            Ok(Box::new(operations))\n                        }\n                        // skip this cleanup for the system databases\n                        // because users can't drop them\n                        DB::SystemDB => Ok(Box::new(empty())),\n                    }\n                }\n                PerDatabasePhase::DeleteDBRoleReferences => {\n                    let ctx = ctx.read().await;\n\n                    let operations = spec\n                        .delta_operations\n                        .iter()\n                        .flatten()\n                        .filter(|op| op.action == \"delete_role\")\n                        .filter_map(move |op| {\n                            if db.is_owned_by(&op.name) {\n                                return None;\n                            }\n                            if !ctx.roles.contains_key(&op.name) {\n                                return None;\n                            }\n                            let quoted = op.name.pg_quote();\n                            let new_owner = match &db {\n                                DB::SystemDB => PgIdent::from(\"cloud_admin\").pg_quote(),\n                                DB::UserDB(db) => db.owner.pg_quote(),\n                            };\n                            let (escaped_role, outer_tag) = op.name.pg_quote_dollar();\n\n                            Some(vec![\n                                // This will reassign all dependent objects to the db owner\n                                Operation {\n                                    query: format!(\"REASSIGN OWNED BY {quoted} TO {new_owner}\",),\n                                    comment: None,\n                                },\n                                // Revoke some potentially blocking privileges (Neon-specific currently)\n                                Operation {\n                                    query: format!(\n                                        include_str!(\"sql/pre_drop_role_revoke_privileges.sql\"),\n                                        // N.B. this has to be properly dollar-escaped with `pg_quote_dollar()`\n                                        role_name = escaped_role,\n                                        outer_tag = outer_tag,\n                                    )\n                                    // HADRON change:\n                                    .replace(\"neon_superuser\", &params.privileged_role_name),\n                                    // HADRON change end                                    ,\n                                    comment: None,\n                                },\n                                // This now will only drop privileges of the role\n                                // TODO: this is obviously not 100% true because of the above case,\n                                // there could be still some privileges that are not revoked. Maybe this\n                                // only drops privileges that were granted *by this* role, not *to this* role,\n                                // but this has to be checked.\n                                Operation {\n                                    query: format!(\"DROP OWNED BY {quoted}\"),\n                                    comment: None,\n                                },\n                            ])\n                        })\n                        .flatten();\n\n                    Ok(Box::new(operations))\n                }\n                PerDatabasePhase::ChangeSchemaPerms => {\n                    let db = match &db {\n                        // ignore schema permissions on the system database\n                        DB::SystemDB => return Ok(Box::new(empty())),\n                        DB::UserDB(db) => db,\n                    };\n                    let (db_owner, outer_tag) = db.owner.pg_quote_dollar();\n\n                    let operations = vec![\n                        Operation {\n                            query: format!(\n                                include_str!(\"sql/set_public_schema_owner.sql\"),\n                                db_owner = db_owner,\n                                outer_tag = outer_tag,\n                            ),\n                            comment: None,\n                        },\n                        Operation {\n                            query: String::from(include_str!(\"sql/default_grants.sql\"))\n                                .replace(\"neon_superuser\", &params.privileged_role_name),\n                            comment: None,\n                        },\n                    ]\n                    .into_iter();\n\n                    Ok(Box::new(operations))\n                }\n            }\n        }\n        // Interestingly, we only install p_s_s in the main database, even when\n        // it's preloaded.\n        ApplySpecPhase::HandleOtherExtensions => {\n            if let Some(libs) = spec.cluster.settings.find(\"shared_preload_libraries\") {\n                if libs.contains(\"pg_stat_statements\") {\n                    return Ok(Box::new(once(Operation {\n                        query: String::from(\n                            \"CREATE EXTENSION IF NOT EXISTS pg_stat_statements WITH SCHEMA public\",\n                        ),\n                        comment: Some(String::from(\"create system extensions\")),\n                    })));\n                }\n            }\n            Ok(Box::new(empty()))\n        }\n        ApplySpecPhase::CreatePgauditExtension => Ok(Box::new(once(Operation {\n            query: String::from(\"CREATE EXTENSION IF NOT EXISTS pgaudit WITH SCHEMA public\"),\n            comment: Some(String::from(\"create pgaudit extensions\")),\n        }))),\n        ApplySpecPhase::CreatePgauditlogtofileExtension => Ok(Box::new(once(Operation {\n            query: String::from(\n                \"CREATE EXTENSION IF NOT EXISTS pgauditlogtofile WITH SCHEMA public\",\n            ),\n            comment: Some(String::from(\"create pgauditlogtofile extensions\")),\n        }))),\n        // Disable pgaudit logging for postgres database.\n        // Postgres is neon system database used by monitors\n        // and compute_ctl tuning functions and thus generates a lot of noise.\n        // We do not consider data stored in this database as sensitive.\n        ApplySpecPhase::DisablePostgresDBPgAudit => {\n            let query = \"ALTER DATABASE postgres SET pgaudit.log to 'none'\";\n            Ok(Box::new(once(Operation {\n                query: query.to_string(),\n                comment: Some(query.to_string()),\n            })))\n        }\n        ApplySpecPhase::HandleNeonExtension => {\n            let operations = vec![\n                Operation {\n                    query: String::from(\"CREATE EXTENSION IF NOT EXISTS neon WITH SCHEMA neon\"),\n                    comment: Some(String::from(\n                        \"init: install the extension if not already installed\",\n                    )),\n                },\n                Operation {\n                    query: String::from(\n                        \"UPDATE pg_catalog.pg_extension SET extrelocatable = true WHERE extname OPERATOR(pg_catalog.=) 'neon'::pg_catalog.name AND extrelocatable OPERATOR(pg_catalog.=) false\",\n                    ),\n                    comment: Some(String::from(\"compat/fix: make neon relocatable\")),\n                },\n                Operation {\n                    query: String::from(\"ALTER EXTENSION neon SET SCHEMA neon\"),\n                    comment: Some(String::from(\"compat/fix: alter neon extension schema\")),\n                },\n                Operation {\n                    query: String::from(\"ALTER EXTENSION neon UPDATE\"),\n                    comment: Some(String::from(\"compat/update: update neon extension version\")),\n                },\n            ]\n            .into_iter();\n\n            Ok(Box::new(operations))\n        }\n        // BEGIN_HADRON\n        // Note: we may want to version the extension someday, but for now we just drop it and recreate it.\n        ApplySpecPhase::HandleDatabricksAuthExtension => {\n            let operations = vec![\n                Operation {\n                    query: String::from(\"DROP EXTENSION IF EXISTS databricks_auth\"),\n                    comment: Some(String::from(\"dropping existing databricks_auth extension\")),\n                },\n                Operation {\n                    query: String::from(\"CREATE EXTENSION databricks_auth\"),\n                    comment: Some(String::from(\"creating databricks_auth extension\")),\n                },\n                Operation {\n                    query: String::from(\"GRANT SELECT ON databricks_auth_metrics TO pg_monitor\"),\n                    comment: Some(String::from(\"grant select on databricks auth counters\")),\n                },\n            ]\n            .into_iter();\n\n            Ok(Box::new(operations))\n        }\n        // END_HADRON\n        ApplySpecPhase::CreateAvailabilityCheck => Ok(Box::new(once(Operation {\n            query: String::from(include_str!(\"sql/add_availabilitycheck_tables.sql\")),\n            comment: None,\n        }))),\n        ApplySpecPhase::DropRoles => {\n            let operations = spec\n                .delta_operations\n                .iter()\n                .flatten()\n                .filter(|op| op.action == \"delete_role\")\n                .map(|op| Operation {\n                    query: format!(\"DROP ROLE IF EXISTS {}\", op.name.pg_quote()),\n                    comment: None,\n                });\n\n            Ok(Box::new(operations))\n        }\n\n        // BEGIN_HADRON\n        // New Hadron phases\n        //\n        // Grants permissions to roles that are used by Databricks.\n        ApplySpecPhase::AddDatabricksGrants => {\n            let operations = vec![\n                Operation {\n                    query: String::from(\"GRANT USAGE ON SCHEMA neon TO databricks_monitor\"),\n                    comment: Some(String::from(\n                        \"Permissions needed to execute neon.* functions (in the postgres database)\",\n                    )),\n                },\n                Operation {\n                    query: String::from(\n                        \"GRANT SELECT, INSERT, UPDATE ON health_check TO databricks_monitor\",\n                    ),\n                    comment: Some(String::from(\"Permissions needed for read and write probes\")),\n                },\n                Operation {\n                    query: String::from(\n                        \"GRANT EXECUTE ON FUNCTION pg_ls_dir(text) TO databricks_monitor\",\n                    ),\n                    comment: Some(String::from(\n                        \"Permissions needed to monitor .snap file counts\",\n                    )),\n                },\n                Operation {\n                    query: String::from(\n                        \"GRANT SELECT ON neon.neon_perf_counters TO databricks_monitor\",\n                    ),\n                    comment: Some(String::from(\n                        \"Permissions needed to access neon performance counters view\",\n                    )),\n                },\n                Operation {\n                    query: String::from(\n                        \"GRANT EXECUTE ON FUNCTION neon.get_perf_counters() TO databricks_monitor\",\n                    ),\n                    comment: Some(String::from(\n                        \"Permissions needed to execute the underlying performance counters function\",\n                    )),\n                },\n            ]\n            .into_iter();\n\n            Ok(Box::new(operations))\n        }\n        // Creates minor objects that are used by Databricks.\n        ApplySpecPhase::CreateDatabricksMisc => Ok(Box::new(once(Operation {\n            query: String::from(include_str!(\"sql/create_databricks_misc.sql\")),\n            comment: Some(String::from(\n                \"The function databricks_monitor uses to convert exception to 0 or 1\",\n            )),\n        }))),\n        // End of new Hadron phases\n        // END_HADRON\n        ApplySpecPhase::FinalizeDropLogicalSubscriptions => Ok(Box::new(once(Operation {\n            query: String::from(include_str!(\"sql/finalize_drop_subscriptions.sql\")),\n            comment: None,\n        }))),\n    }\n}\n"
  },
  {
    "path": "compute_tools/src/sql/add_availabilitycheck_tables.sql",
    "content": "DO $$\nBEGIN\n    IF NOT EXISTS(\n        SELECT 1\n        FROM pg_catalog.pg_tables\n        WHERE tablename::pg_catalog.name OPERATOR(pg_catalog.=) 'health_check'::pg_catalog.name\n        AND schemaname::pg_catalog.name OPERATOR(pg_catalog.=) 'public'::pg_catalog.name\n    )\n    THEN\n    CREATE TABLE public.health_check (\n        id pg_catalog.int4 primary key generated by default as identity,\n        updated_at pg_catalog.timestamptz default pg_catalog.now()\n    );\n    INSERT INTO public.health_check VALUES (1, pg_catalog.now())\n        ON CONFLICT (id) DO UPDATE\n         SET updated_at = pg_catalog.now();\n    END IF;\nEND\n$$"
  },
  {
    "path": "compute_tools/src/sql/alter_databricks_reader_roles_timeout.sql",
    "content": "DO $$\nDECLARE\n    reader_role RECORD;\n    timeout_value TEXT;\nBEGIN\n    -- Get the current GUC setting for reader statement timeout\n    SELECT current_setting('databricks.reader_statement_timeout', true) INTO timeout_value;\n    \n    -- Only proceed if timeout_value is not null/empty and not '0' (disabled)\n    IF timeout_value IS NOT NULL AND timeout_value != '' AND timeout_value != '0' THEN\n        -- Find all databricks_reader_* roles and update their statement_timeout\n        FOR reader_role IN \n            SELECT r.rolname\n            FROM pg_roles r\n            WHERE r.rolname ~ '^databricks_reader_\\d+$'\n        LOOP\n            -- Apply the timeout setting to the role (will overwrite existing setting)\n            EXECUTE format('ALTER ROLE %I SET statement_timeout = %L', \n                         reader_role.rolname, timeout_value);\n            \n            RAISE LOG 'Updated statement_timeout = % for role %', timeout_value, reader_role.rolname;\n        END LOOP;\n    END IF;\nEND\n$$;\n"
  },
  {
    "path": "compute_tools/src/sql/create_databricks_misc.sql",
    "content": "ALTER ROLE databricks_monitor SET statement_timeout = '60s';\n\nCREATE OR REPLACE FUNCTION health_check_write_succeeds()\nRETURNS INTEGER AS $$\nBEGIN\nINSERT INTO health_check VALUES (1, now())\nON CONFLICT (id) DO UPDATE\n    SET updated_at = now();\n\nRETURN 1;\nEXCEPTION WHEN OTHERS THEN\nRAISE EXCEPTION '[DATABRICKS_SMGR] health_check failed: [%] %', SQLSTATE, SQLERRM;\nRETURN 0;\nEND;\n$$ LANGUAGE plpgsql;\n"
  },
  {
    "path": "compute_tools/src/sql/create_privileged_role.sql",
    "content": "DO $$\n    BEGIN\n        IF NOT EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname OPERATOR(pg_catalog.=) '{privileged_role_name}'::pg_catalog.name)\n        THEN\n            CREATE ROLE {privileged_role_name} {privileges} IN ROLE pg_read_all_data, pg_write_all_data;\n        END IF;\n    END\n$$;\n"
  },
  {
    "path": "compute_tools/src/sql/default_grants.sql",
    "content": "DO\n$$\n    BEGIN\n        IF EXISTS(\n            SELECT nspname\n            FROM pg_catalog.pg_namespace\n            WHERE nspname OPERATOR(pg_catalog.=) 'public'\n        ) AND\n           pg_catalog.current_setting('server_version_num')::int OPERATOR(pg_catalog./) 10000 OPERATOR(pg_catalog.>=) 15\n        THEN\n            IF EXISTS(\n                SELECT rolname\n                FROM pg_catalog.pg_roles\n                WHERE rolname OPERATOR(pg_catalog.=) 'web_access'\n            )\n            THEN\n                GRANT CREATE ON SCHEMA public TO web_access;\n            END IF;\n        END IF;\n        IF EXISTS(\n            SELECT nspname\n            FROM pg_catalog.pg_namespace\n            WHERE nspname OPERATOR(pg_catalog.=) 'public'\n        )\n        THEN\n            ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON TABLES TO neon_superuser WITH GRANT OPTION;\n            ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON SEQUENCES TO neon_superuser WITH GRANT OPTION;\n        END IF;\n    END\n$$;"
  },
  {
    "path": "compute_tools/src/sql/drop_subscriptions.sql",
    "content": "DO ${outer_tag}$\nDECLARE\n    subname TEXT;\nBEGIN\n    LOCK TABLE pg_catalog.pg_subscription IN ACCESS EXCLUSIVE MODE;\n    FOR subname IN\n        SELECT pg_subscription.subname\n        FROM pg_catalog.pg_subscription\n        WHERE subdbid OPERATOR(pg_catalog.=) (\n            SELECT oid FROM pg_database WHERE datname OPERATOR(pg_catalog.=) {datname_str}::pg_catalog.name\n        )\n    LOOP\n        EXECUTE pg_catalog.format('ALTER SUBSCRIPTION %I DISABLE;', subname);\n        EXECUTE pg_catalog.format('ALTER SUBSCRIPTION %I SET (slot_name = NONE);', subname);\n        EXECUTE pg_catalog.format('DROP SUBSCRIPTION %I;', subname);\n    END LOOP;\nEND;\n${outer_tag}$;\n"
  },
  {
    "path": "compute_tools/src/sql/finalize_drop_subscriptions.sql",
    "content": "DO $$\nBEGIN\n    IF NOT EXISTS(\n        SELECT 1\n        FROM pg_catalog.pg_tables\n        WHERE tablename OPERATOR(pg_catalog.=) 'drop_subscriptions_done'::pg_catalog.name\n        AND schemaname OPERATOR(pg_catalog.=) 'neon'::pg_catalog.name\n    )\n    THEN\n        CREATE TABLE neon.drop_subscriptions_done\n        (id pg_catalog.int4 primary key generated by default as identity, timeline_id pg_catalog.text);\n    END IF;\n\n    -- preserve the timeline_id of the last drop_subscriptions run\n    -- to ensure that the cleanup of a timeline is executed only once.\n    -- use upsert to avoid the table bloat in case of cascade branching (branch of a branch)\n    INSERT INTO neon.drop_subscriptions_done VALUES (1, pg_catalog.current_setting('neon.timeline_id'))\n    ON CONFLICT (id) DO UPDATE\n    SET timeline_id = pg_catalog.current_setting('neon.timeline_id')::pg_catalog.text;\nEND\n$$\n"
  },
  {
    "path": "compute_tools/src/sql/pre_drop_role_revoke_privileges.sql",
    "content": "DO ${outer_tag}$\nDECLARE\n    schema TEXT;\n    grantor TEXT;\n    revoke_query TEXT;\nBEGIN\n    FOR schema IN\n        SELECT schema_name\n        FROM information_schema.schemata\n        -- So far, we only had issues with 'public' schema. Probably, because we do some additional grants,\n        -- e.g., make DB owner the owner of 'public' schema automatically (when created via API).\n        -- See https://github.com/neondatabase/cloud/issues/13582 for the context.\n        -- Still, keep the loop because i) it efficiently handles the case when there is no 'public' schema,\n        -- ii) it's easy to add more schemas to the list if needed.\n        WHERE schema_name IN ('public')\n    LOOP\n        FOR grantor IN EXECUTE\n            pg_catalog.format(\n                'SELECT DISTINCT rtg.grantor FROM information_schema.role_table_grants AS rtg WHERE grantee OPERATOR(pg_catalog.=) %s',\n                -- N.B. this has to be properly dollar-escaped with `pg_quote_dollar()`\n                quote_literal({role_name})\n            )\n        LOOP\n            EXECUTE pg_catalog.format('SET LOCAL ROLE %I', grantor);\n\n            revoke_query := pg_catalog.format(\n                'REVOKE ALL PRIVILEGES ON ALL TABLES IN SCHEMA %I FROM %I GRANTED BY %I',\n                schema,\n                -- N.B. this has to be properly dollar-escaped with `pg_quote_dollar()`\n                {role_name},\n                grantor\n            );\n\n            EXECUTE revoke_query;\n        END LOOP;\n    END LOOP;\nEND;\n${outer_tag}$;\n"
  },
  {
    "path": "compute_tools/src/sql/set_public_schema_owner.sql",
    "content": "DO ${outer_tag}$\n    DECLARE\n        schema_owner TEXT;\n    BEGIN\n        IF EXISTS(\n            SELECT nspname\n            FROM pg_catalog.pg_namespace\n            WHERE nspname OPERATOR(pg_catalog.=) 'public'::pg_catalog.name\n        )\n        THEN\n            SELECT nspowner::regrole::text\n            FROM pg_catalog.pg_namespace\n            WHERE nspname OPERATOR(pg_catalog.=) 'public'::pg_catalog.text\n            INTO schema_owner;\n\n            IF schema_owner OPERATOR(pg_catalog.=) 'cloud_admin'::pg_catalog.text OR schema_owner OPERATOR(pg_catalog.=) 'zenith_admin'::pg_catalog.text\n            THEN\n                EXECUTE pg_catalog.format('ALTER SCHEMA public OWNER TO %I', {db_owner});\n            END IF;\n        END IF;\n    END\n${outer_tag}$;"
  },
  {
    "path": "compute_tools/src/sql/unset_template_for_drop_dbs.sql",
    "content": "DO ${outer_tag}$\n    BEGIN\n        IF EXISTS(\n            SELECT 1\n            FROM pg_catalog.pg_database\n            WHERE datname OPERATOR(pg_catalog.=) {datname}::pg_catalog.name\n        )\n        THEN\n            EXECUTE pg_catalog.format('ALTER DATABASE %I is_template false', {datname});\n        END IF;\n    END\n${outer_tag}$;\n"
  },
  {
    "path": "compute_tools/src/swap.rs",
    "content": "use std::path::Path;\n\nuse anyhow::{Context, anyhow};\nuse tracing::{instrument, warn};\n\npub const RESIZE_SWAP_BIN: &str = \"/neonvm/bin/resize-swap\";\n\n#[instrument]\npub fn resize_swap(size_bytes: u64) -> anyhow::Result<()> {\n    // run `/neonvm/bin/resize-swap --once {size_bytes}`\n    //\n    // Passing '--once' causes resize-swap to delete itself after successful completion, which\n    // means that if compute_ctl restarts later, we won't end up calling 'swapoff' while\n    // postgres is running.\n    //\n    // NOTE: resize-swap is not very clever. If present, --once MUST be the first arg.\n    let child_result = std::process::Command::new(\"/usr/bin/sudo\")\n        .arg(RESIZE_SWAP_BIN)\n        .arg(\"--once\")\n        .arg(size_bytes.to_string())\n        .spawn();\n\n    child_result\n        .context(\"spawn() failed\")\n        .and_then(|mut child| child.wait().context(\"wait() failed\"))\n        .and_then(|status| match status.success() {\n            true => Ok(()),\n            false => {\n                // The command failed. Maybe it was because the resize-swap file doesn't exist?\n                // The --once flag causes it to delete itself on success so we don't disable swap\n                // while postgres is running; maybe this is fine.\n                match Path::new(RESIZE_SWAP_BIN).try_exists() {\n                    Err(_) | Ok(true) => Err(anyhow!(\"process exited with {status}\")),\n                    // The path doesn't exist; we're actually ok \n                    Ok(false) => {\n                        warn!(\"ignoring \\\"not found\\\" error from resize-swap to avoid swapoff while compute is running\");\n                        Ok(())\n                    },\n                }\n            }\n        })\n        // wrap any prior error with the overall context that we couldn't run the command\n        .with_context(|| {\n            format!(\"could not run `/usr/bin/sudo {RESIZE_SWAP_BIN} --once {size_bytes}`\")\n        })\n}\n"
  },
  {
    "path": "compute_tools/src/sync_sk.rs",
    "content": "// Utils for running sync_safekeepers\nuse anyhow::Result;\nuse tracing::info;\nuse utils::lsn::Lsn;\n\n#[derive(Copy, Clone, Debug)]\npub enum TimelineStatusResponse {\n    NotFound,\n    Ok(TimelineStatusOkResponse),\n}\n\n#[derive(Copy, Clone, Debug)]\npub struct TimelineStatusOkResponse {\n    flush_lsn: Lsn,\n    commit_lsn: Lsn,\n}\n\n/// Get a safekeeper's metadata for our timeline. The id is only used for logging\npub async fn ping_safekeeper(\n    id: String,\n    config: tokio_postgres::Config,\n) -> Result<TimelineStatusResponse> {\n    // TODO add retries\n\n    // Connect\n    info!(\"connecting to {}\", id);\n    let (client, conn) = config.connect(tokio_postgres::NoTls).await?;\n    tokio::spawn(async move {\n        if let Err(e) = conn.await {\n            eprintln!(\"connection error: {e}\");\n        }\n    });\n\n    // Query\n    info!(\"querying {}\", id);\n    let result = client.simple_query(\"TIMELINE_STATUS\").await?;\n\n    // Parse result\n    info!(\"done with {}\", id);\n    if let postgres::SimpleQueryMessage::Row(row) = &result[0] {\n        use std::str::FromStr;\n        let response = TimelineStatusResponse::Ok(TimelineStatusOkResponse {\n            flush_lsn: Lsn::from_str(row.get(\"flush_lsn\").unwrap())?,\n            commit_lsn: Lsn::from_str(row.get(\"commit_lsn\").unwrap())?,\n        });\n        Ok(response)\n    } else {\n        // Timeline doesn't exist\n        Ok(TimelineStatusResponse::NotFound)\n    }\n}\n\n/// Given a quorum of responses, check if safekeepers are synced at some Lsn\npub fn check_if_synced(responses: Vec<TimelineStatusResponse>) -> Option<Lsn> {\n    // Check if all responses are ok\n    let ok_responses: Vec<TimelineStatusOkResponse> = responses\n        .iter()\n        .filter_map(|r| match r {\n            TimelineStatusResponse::Ok(ok_response) => Some(ok_response),\n            _ => None,\n        })\n        .cloned()\n        .collect();\n    if ok_responses.len() < responses.len() {\n        info!(\n            \"not synced. Only {} out of {} know about this timeline\",\n            ok_responses.len(),\n            responses.len()\n        );\n        return None;\n    }\n\n    // Get the min and the max of everything\n    let commit: Vec<Lsn> = ok_responses.iter().map(|r| r.commit_lsn).collect();\n    let flush: Vec<Lsn> = ok_responses.iter().map(|r| r.flush_lsn).collect();\n    let commit_max = commit.iter().max().unwrap();\n    let commit_min = commit.iter().min().unwrap();\n    let flush_max = flush.iter().max().unwrap();\n    let flush_min = flush.iter().min().unwrap();\n\n    // Check that all values are equal\n    if commit_min != commit_max {\n        info!(\"not synced. {:?} {:?}\", commit_min, commit_max);\n        return None;\n    }\n    if flush_min != flush_max {\n        info!(\"not synced. {:?} {:?}\", flush_min, flush_max);\n        return None;\n    }\n\n    // Check that commit == flush\n    if commit_max != flush_max {\n        info!(\"not synced. {:?} {:?}\", commit_max, flush_max);\n        return None;\n    }\n\n    Some(*commit_max)\n}\n"
  },
  {
    "path": "compute_tools/src/tls.rs",
    "content": "use std::{io::Write, os::unix::fs::OpenOptionsExt, path::Path, time::Duration};\n\nuse anyhow::{Context, Result, bail};\nuse compute_api::responses::TlsConfig;\nuse ring::digest;\nuse x509_cert::Certificate;\n\n#[derive(Clone, Copy)]\npub struct CertDigest(digest::Digest);\n\npub async fn watch_cert_for_changes(cert_path: String) -> tokio::sync::watch::Receiver<CertDigest> {\n    let mut digest = compute_digest(&cert_path).await;\n    let (tx, rx) = tokio::sync::watch::channel(digest);\n    tokio::spawn(async move {\n        while !tx.is_closed() {\n            let new_digest = compute_digest(&cert_path).await;\n            if digest.0.as_ref() != new_digest.0.as_ref() {\n                digest = new_digest;\n                _ = tx.send(digest);\n            }\n\n            tokio::time::sleep(Duration::from_secs(60)).await\n        }\n    });\n    rx\n}\n\nasync fn compute_digest(cert_path: &str) -> CertDigest {\n    loop {\n        match try_compute_digest(cert_path).await {\n            Ok(d) => break d,\n            Err(e) => {\n                tracing::error!(\"could not read cert file {e:?}\");\n                tokio::time::sleep(Duration::from_secs(1)).await\n            }\n        }\n    }\n}\n\nasync fn try_compute_digest(cert_path: &str) -> Result<CertDigest> {\n    let data = tokio::fs::read(cert_path).await?;\n    // sha256 is extremely collision resistent. can safely assume the digest to be unique\n    Ok(CertDigest(digest::digest(&digest::SHA256, &data)))\n}\n\npub const SERVER_CRT: &str = \"server.crt\";\npub const SERVER_KEY: &str = \"server.key\";\n\npub fn update_key_path_blocking(pg_data: &Path, tls_config: &TlsConfig) {\n    loop {\n        match try_update_key_path_blocking(pg_data, tls_config) {\n            Ok(()) => break,\n            Err(e) => {\n                tracing::error!(error = ?e, \"could not create key file\");\n                std::thread::sleep(Duration::from_secs(1))\n            }\n        }\n    }\n}\n\n// Postgres requires the keypath be \"secure\". This means\n// 1. Owned by the postgres user.\n// 2. Have permission 600.\nfn try_update_key_path_blocking(pg_data: &Path, tls_config: &TlsConfig) -> Result<()> {\n    let key = std::fs::read_to_string(&tls_config.key_path)?;\n    let crt = std::fs::read_to_string(&tls_config.cert_path)?;\n\n    // to mitigate a race condition during renewal.\n    verify_key_cert(&key, &crt)?;\n\n    let mut key_file = std::fs::OpenOptions::new()\n        .write(true)\n        .create(true)\n        .truncate(true)\n        .mode(0o600)\n        .open(pg_data.join(SERVER_KEY))?;\n\n    let mut crt_file = std::fs::OpenOptions::new()\n        .write(true)\n        .create(true)\n        .truncate(true)\n        .mode(0o600)\n        .open(pg_data.join(SERVER_CRT))?;\n\n    key_file.write_all(key.as_bytes())?;\n    crt_file.write_all(crt.as_bytes())?;\n\n    Ok(())\n}\n\nfn verify_key_cert(key: &str, cert: &str) -> Result<()> {\n    use x509_cert::der::oid::db::rfc5912::ECDSA_WITH_SHA_256;\n\n    let certs = Certificate::load_pem_chain(cert.as_bytes())\n        .context(\"decoding PEM encoded certificates\")?;\n\n    // First certificate is our server-cert,\n    // all the rest of the certs are the CA cert chain.\n    let Some(cert) = certs.first() else {\n        bail!(\"no certificates found\");\n    };\n\n    match cert.signature_algorithm.oid {\n        ECDSA_WITH_SHA_256 => {\n            let key = p256::SecretKey::from_sec1_pem(key).context(\"parse key\")?;\n\n            let a = key.public_key().to_sec1_bytes();\n            let b = cert\n                .tbs_certificate\n                .subject_public_key_info\n                .subject_public_key\n                .raw_bytes();\n\n            if *a != *b {\n                bail!(\"private key file does not match certificate\")\n            }\n        }\n        _ => bail!(\"unknown TLS key type\"),\n    }\n\n    Ok(())\n}\n\n#[cfg(test)]\nmod tests {\n    use super::verify_key_cert;\n\n    /// Real certificate chain file, generated by cert-manager in dev.\n    /// The server auth certificate has expired since 2025-04-24T15:41:35Z.\n    const CERT: &str = \"\n-----BEGIN CERTIFICATE-----\nMIICCDCCAa+gAwIBAgIQKhLomFcNULbZA/bPdGzaSzAKBggqhkjOPQQDAjBEMQsw\nCQYDVQQGEwJVUzESMBAGA1UEChMJTmVvbiBJbmMuMSEwHwYDVQQDExhOZW9uIEs4\ncyBJbnRlcm1lZGlhdGUgQ0EwHhcNMjUwNDIzMTU0MTM1WhcNMjUwNDI0MTU0MTM1\nWjBBMT8wPQYDVQQDEzZjb21wdXRlLXdpc3B5LWdyYXNzLXcwY21laWp3LmRlZmF1\nbHQuc3ZjLmNsdXN0ZXIubG9jYWwwWTATBgcqhkjOPQIBBggqhkjOPQMBBwNCAATF\nQCcG2m/EVHAiZtSsYgVnHgoTjUL/Jtwfdrpvz2t0bVRZmBmSKhlo53uPV9Y5eKFG\nAmR54p9/gT2eO3xU7vAgo4GFMIGCMA4GA1UdDwEB/wQEAwIFoDAMBgNVHRMBAf8E\nAjAAMB8GA1UdIwQYMBaAFFR2JAhXkeiNQNEixTvAYIwxUu3QMEEGA1UdEQQ6MDiC\nNmNvbXB1dGUtd2lzcHktZ3Jhc3MtdzBjbWVpancuZGVmYXVsdC5zdmMuY2x1c3Rl\nci5sb2NhbDAKBggqhkjOPQQDAgNHADBEAiBLG22wKG8XS9e9RxBT+kmUx/kIThcP\nDIpp7jx0PrFcdQIgEMTdnXpx5Cv/Z0NIEDxtMHUD7G0vuRPfztki36JuakM=\n-----END CERTIFICATE-----\n-----BEGIN CERTIFICATE-----\nMIICFzCCAb6gAwIBAgIUbbX98N2Ip6lWAONRk8dU9hSz+YIwCgYIKoZIzj0EAwIw\nRDELMAkGA1UEBhMCVVMxEjAQBgNVBAoTCU5lb24gSW5jLjEhMB8GA1UEAxMYTmVv\nbiBBV1MgSW50ZXJtZWRpYXRlIENBMB4XDTI1MDQyMjE1MTAxMFoXDTI1MDcyMTE1\nMTAxMFowRDELMAkGA1UEBhMCVVMxEjAQBgNVBAoTCU5lb24gSW5jLjEhMB8GA1UE\nAxMYTmVvbiBLOHMgSW50ZXJtZWRpYXRlIENBMFkwEwYHKoZIzj0CAQYIKoZIzj0D\nAQcDQgAE5++m5owqNI4BPMTVNIUQH0qvU7pYhdpHGVGhdj/Lgars6ROvE6uSNQV4\nSAmJN5HBzj5/6kLQaTPWpXW7EHXjK6OBjTCBijAOBgNVHQ8BAf8EBAMCAQYwEgYD\nVR0TAQH/BAgwBgEB/wIBADAdBgNVHQ4EFgQUVHYkCFeR6I1A0SLFO8BgjDFS7dAw\nHwYDVR0jBBgwFoAUgHfNXfyKtHO0V9qoLOWCjkNiaI8wJAYDVR0eAQH/BBowGKAW\nMBSCEi5zdmMuY2x1c3Rlci5sb2NhbDAKBggqhkjOPQQDAgNHADBEAiBObVFFdXaL\nQpOXmN60dYUNnQRwjKreFduEkQgOdOlssgIgVAdJJQFgvlrvEOBhY8j5WyeKRwUN\nk/ALs6KpgaFBCGY=\n-----END CERTIFICATE-----\n-----BEGIN CERTIFICATE-----\nMIIB4jCCAYegAwIBAgIUFlxWFn/11yoGdmD+6gf+yQMToS0wCgYIKoZIzj0EAwIw\nODELMAkGA1UEBhMCVVMxEjAQBgNVBAoTCU5lb24gSW5jLjEVMBMGA1UEAxMMTmVv\nbiBSb290IENBMB4XDTI1MDQwMzA3MTUyMloXDTI2MDQwMzA3MTUyMlowRDELMAkG\nA1UEBhMCVVMxEjAQBgNVBAoTCU5lb24gSW5jLjEhMB8GA1UEAxMYTmVvbiBBV1Mg\nSW50ZXJtZWRpYXRlIENBMFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAEqonG/IQ6\nZxtEtOUTkkoNopPieXDO5CBKUkNFTGeJEB7OxRlSpYJgsBpaYIaD6Vc4sVk3thIF\np+pLw52idQOIN6NjMGEwDgYDVR0PAQH/BAQDAgEGMA8GA1UdEwEB/wQFMAMBAf8w\nHQYDVR0OBBYEFIB3zV38irRztFfaqCzlgo5DYmiPMB8GA1UdIwQYMBaAFKh7M4/G\nFHvr/ORDQZt4bMLlJvHCMAoGCCqGSM49BAMCA0kAMEYCIQCbS4x7QPslONzBYbjC\nUQaQ0QLDW4CJHvQ4u4gbWFG87wIhAJMsHQHjP9qTT27Q65zQCR7O8QeLAfha1jrH\nAg/LsxSr\n-----END CERTIFICATE-----\n\";\n\n    /// The key corresponding to [`CERT`]\n    const KEY: &str = \"\n-----BEGIN EC PRIVATE KEY-----\nMHcCAQEEIDnAnrqmIJjndCLWP1iIO5X3X63Aia48TGpGuMXwvm6IoAoGCCqGSM49\nAwEHoUQDQgAExUAnBtpvxFRwImbUrGIFZx4KE41C/ybcH3a6b89rdG1UWZgZkioZ\naOd7j1fWOXihRgJkeeKff4E9njt8VO7wIA==\n-----END EC PRIVATE KEY-----\n\";\n\n    /// An incorrect key.\n    const INCORRECT_KEY: &str = \"\n-----BEGIN EC PRIVATE KEY-----\nMHcCAQEEIL6WqqBDyvM0HWz7Ir5M5+jhFWB7IzOClGn26OPrzHCXoAoGCCqGSM49\nAwEHoUQDQgAE7XVvdOy5lfwtNKb+gJEUtnG+DrnnXLY5LsHDeGQKV9PTRcEMeCrG\nYZzHyML4P6Sr4yi2ts+4B9i47uvAG8+XwQ==\n-----END EC PRIVATE KEY-----\n\";\n\n    #[test]\n    fn certificate_verification() {\n        verify_key_cert(KEY, CERT).unwrap();\n    }\n\n    #[test]\n    #[should_panic(expected = \"private key file does not match certificate\")]\n    fn certificate_verification_fail() {\n        verify_key_cert(INCORRECT_KEY, CERT).unwrap();\n    }\n}\n"
  },
  {
    "path": "compute_tools/tests/README.md",
    "content": "### Test files\n\nThe file `cluster_spec.json` has been copied over from libs/compute_api\ntests, with some edits:\n\n  - the neon.safekeepers setting contains a duplicate value\n"
  },
  {
    "path": "compute_tools/tests/cluster_spec.json",
    "content": "{\n  \"format_version\": 1.0,\n\n  \"timestamp\": \"2021-05-23T18:25:43.511Z\",\n  \"operation_uuid\": \"0f657b36-4b0f-4a2d-9c2e-1dcd615e7d8b\",\n  \"suspend_timeout_seconds\": 3600,\n  \n  \"cluster\": {\n    \"cluster_id\": \"test-cluster-42\",\n    \"name\": \"Zenith Test\",\n    \"state\": \"restarted\",\n    \"roles\": [\n      {\n        \"name\": \"postgres\",\n        \"encrypted_password\": \"6b1d16b78004bbd51fa06af9eda75972\",\n        \"options\": null\n      },\n      {\n        \"name\": \"alexk\",\n        \"encrypted_password\": null,\n        \"options\": null\n      },\n      {\n        \"name\": \"zenith \\\"new\\\"\",\n        \"encrypted_password\": \"5b1d16b78004bbd51fa06af9eda75972\",\n        \"options\": null\n      },\n      {\n        \"name\": \"zen\",\n        \"encrypted_password\": \"9b1d16b78004bbd51fa06af9eda75972\"\n      },\n      {\n        \"name\": \"\\\"name\\\";\\\\n select 1;\",\n        \"encrypted_password\": \"5b1d16b78004bbd51fa06af9eda75972\"\n      },\n      {\n        \"name\": \"MyRole\",\n        \"encrypted_password\": \"5b1d16b78004bbd51fa06af9eda75972\"\n      }\n    ],\n    \"databases\": [\n      {\n        \"name\": \"DB2\",\n        \"owner\": \"alexk\",\n        \"options\": [\n          {\n            \"name\": \"LC_COLLATE\",\n            \"value\": \"C\",\n            \"vartype\": \"string\"\n          },\n          {\n            \"name\": \"LC_CTYPE\",\n            \"value\": \"C\",\n            \"vartype\": \"string\"\n          },\n          {\n            \"name\": \"TEMPLATE\",\n            \"value\": \"template0\",\n            \"vartype\": \"enum\"\n          }\n        ]\n      },\n      {\n        \"name\": \"zenith\",\n        \"owner\": \"MyRole\"\n      },\n      {\n        \"name\": \"zen\",\n        \"owner\": \"zen\"\n      }\n    ],\n    \"settings\": [\n      {\n        \"name\": \"fsync\",\n        \"value\": \"off\",\n        \"vartype\": \"bool\"\n      },\n      {\n        \"name\": \"wal_level\",\n        \"value\": \"logical\",\n        \"vartype\": \"enum\"\n      },\n      {\n        \"name\": \"hot_standby\",\n        \"value\": \"on\",\n        \"vartype\": \"bool\"\n      },\n      {\n        \"name\": \"prewarm_lfc_on_startup\",\n        \"value\": \"off\",\n        \"vartype\": \"bool\"\n      },\n      {\n        \"name\": \"neon.safekeepers\",\n        \"value\": \"127.0.0.1:6502,127.0.0.1:6503,127.0.0.1:6501,127.0.0.1:6502\",\n        \"vartype\": \"string\"\n      },\n      {\n        \"name\": \"wal_log_hints\",\n        \"value\": \"on\",\n        \"vartype\": \"bool\"\n      },\n      {\n        \"name\": \"log_connections\",\n        \"value\": \"on\",\n        \"vartype\": \"bool\"\n      },\n      {\n        \"name\": \"shared_buffers\",\n        \"value\": \"32768\",\n        \"vartype\": \"integer\"\n      },\n      {\n        \"name\": \"port\",\n        \"value\": \"55432\",\n        \"vartype\": \"integer\"\n      },\n      {\n        \"name\": \"max_connections\",\n        \"value\": \"100\",\n        \"vartype\": \"integer\"\n      },\n      {\n        \"name\": \"max_wal_senders\",\n        \"value\": \"10\",\n        \"vartype\": \"integer\"\n      },\n      {\n        \"name\": \"listen_addresses\",\n        \"value\": \"0.0.0.0\",\n        \"vartype\": \"string\"\n      },\n      {\n        \"name\": \"wal_sender_timeout\",\n        \"value\": \"0\",\n        \"vartype\": \"integer\"\n      },\n      {\n        \"name\": \"password_encryption\",\n        \"value\": \"md5\",\n        \"vartype\": \"enum\"\n      },\n      {\n        \"name\": \"maintenance_work_mem\",\n        \"value\": \"65536\",\n        \"vartype\": \"integer\"\n      },\n      {\n        \"name\": \"max_parallel_workers\",\n        \"value\": \"8\",\n        \"vartype\": \"integer\"\n      },\n      {\n        \"name\": \"max_worker_processes\",\n        \"value\": \"8\",\n        \"vartype\": \"integer\"\n      },\n      {\n        \"name\": \"neon.tenant_id\",\n        \"value\": \"b0554b632bd4d547a63b86c3630317e8\",\n        \"vartype\": \"string\"\n      },\n      {\n        \"name\": \"max_replication_slots\",\n        \"value\": \"10\",\n        \"vartype\": \"integer\"\n      },\n      {\n        \"name\": \"neon.timeline_id\",\n        \"value\": \"2414a61ffc94e428f14b5758fe308e13\",\n        \"vartype\": \"string\"\n      },\n      {\n        \"name\": \"shared_preload_libraries\",\n        \"value\": \"neon\",\n        \"vartype\": \"string\"\n      },\n      {\n        \"name\": \"synchronous_standby_names\",\n        \"value\": \"walproposer\",\n        \"vartype\": \"string\"\n      },\n      {\n        \"name\": \"neon.pageserver_connstring\",\n        \"value\": \"host=127.0.0.1 port=6400\",\n        \"vartype\": \"string\"\n      },\n      {\n        \"name\": \"test.escaping\",\n        \"value\": \"here's a backslash \\\\ and a quote ' and a double-quote \\\" hooray\",\n        \"vartype\": \"string\"\n      }\n    ]\n  },\n  \"delta_operations\": [\n    {\n      \"action\": \"delete_db\",\n      \"name\": \"zenith_test\"\n    },\n    {\n      \"action\": \"rename_db\",\n      \"name\": \"DB\",\n      \"new_name\": \"DB2\"\n    },\n    {\n      \"action\": \"delete_role\",\n      \"name\": \"zenith2\"\n    },\n    {\n      \"action\": \"rename_role\",\n      \"name\": \"zenith new\",\n      \"new_name\": \"zenith \\\"new\\\"\"\n    }\n  ],\n  \"remote_extensions\": {\n    \"library_index\": {\n      \"postgis-3\": \"postgis\",\n      \"libpgrouting-3.4\": \"postgis\",\n      \"postgis_raster-3\": \"postgis\",\n      \"postgis_sfcgal-3\": \"postgis\",\n      \"postgis_topology-3\": \"postgis\",\n      \"address_standardizer-3\": \"postgis\"\n    },\n    \"extension_data\": {\n      \"postgis\": {\n        \"archive_path\": \"5834329303/v15/extensions/postgis.tar.zst\",\n        \"control_data\": {\n          \"postgis.control\": \"# postgis extension\\ncomment = ''PostGIS geometry and geography spatial types and functions''\\ndefault_version = ''3.3.2''\\nmodule_pathname = ''$libdir/postgis-3''\\nrelocatable = false\\ntrusted = true\\n\",\n          \"pgrouting.control\": \"# pgRouting Extension\\ncomment = ''pgRouting Extension''\\ndefault_version = ''3.4.2''\\nmodule_pathname = ''$libdir/libpgrouting-3.4''\\nrelocatable = true\\nrequires = ''plpgsql''\\nrequires = ''postgis''\\ntrusted = true\\n\",\n          \"postgis_raster.control\": \"# postgis_raster extension\\ncomment = ''PostGIS raster types and functions''\\ndefault_version = ''3.3.2''\\nmodule_pathname = ''$libdir/postgis_raster-3''\\nrelocatable = false\\nrequires = postgis\\ntrusted = true\\n\",\n          \"postgis_sfcgal.control\": \"# postgis topology extension\\ncomment = ''PostGIS SFCGAL functions''\\ndefault_version = ''3.3.2''\\nrelocatable = true\\nrequires = postgis\\ntrusted = true\\n\",\n          \"postgis_topology.control\": \"# postgis topology extension\\ncomment = ''PostGIS topology spatial types and functions''\\ndefault_version = ''3.3.2''\\nrelocatable = false\\nschema = topology\\nrequires = postgis\\ntrusted = true\\n\",\n          \"address_standardizer.control\": \"# address_standardizer extension\\ncomment = ''Used to parse an address into constituent elements. Generally used to support geocoding address normalization step.''\\ndefault_version = ''3.3.2''\\nrelocatable = true\\ntrusted = true\\n\",\n          \"postgis_tiger_geocoder.control\": \"# postgis tiger geocoder extension\\ncomment = ''PostGIS tiger geocoder and reverse geocoder''\\ndefault_version = ''3.3.2''\\nrelocatable = false\\nschema = tiger\\nrequires = ''postgis,fuzzystrmatch''\\nsuperuser= false\\ntrusted = true\\n\",\n          \"address_standardizer_data_us.control\": \"# address standardizer us dataset\\ncomment = ''Address Standardizer US dataset example''\\ndefault_version = ''3.3.2''\\nrelocatable = true\\ntrusted = true\\n\"\n        }\n      }\n    },\n    \"custom_extensions\": [],\n    \"public_extensions\": [\"postgis\"]\n  },\n  \"pgbouncer_settings\": {\n    \"default_pool_size\": \"42\",\n    \"pool_mode\": \"session\"\n  }\n}\n"
  },
  {
    "path": "compute_tools/tests/config_test.rs",
    "content": "#[cfg(test)]\nmod config_tests {\n\n    use std::fs::{File, remove_file};\n    use std::io::{Read, Write};\n    use std::path::Path;\n\n    use compute_tools::config::*;\n\n    fn write_test_file(path: &Path, content: &str) {\n        let mut file = File::create(path).unwrap();\n        file.write_all(content.as_bytes()).unwrap();\n    }\n\n    fn check_file_content(path: &Path, expected_content: &str) {\n        let mut file = File::open(path).unwrap();\n        let mut content = String::new();\n\n        file.read_to_string(&mut content).unwrap();\n        assert_eq!(content, expected_content);\n    }\n\n    #[test]\n    fn test_line_in_file() {\n        let path = Path::new(\"./tests/tmp/config_test.txt\");\n        write_test_file(path, \"line1\\nline2.1\\t line2.2\\nline3\");\n\n        let line = \"line2.1\\t line2.2\";\n        let result = line_in_file(path, line).unwrap();\n        assert!(!result);\n        check_file_content(path, \"line1\\nline2.1\\t line2.2\\nline3\");\n\n        let line = \"line4\";\n        let result = line_in_file(path, line).unwrap();\n        assert!(result);\n        check_file_content(path, \"line1\\nline2.1\\t line2.2\\nline3\\nline4\");\n\n        remove_file(path).unwrap();\n\n        let path = Path::new(\"./tests/tmp/new_config_test.txt\");\n        let line = \"line4\";\n        let result = line_in_file(path, line).unwrap();\n        assert!(result);\n        check_file_content(path, \"line4\");\n\n        remove_file(path).unwrap();\n    }\n}\n"
  },
  {
    "path": "compute_tools/tests/pg_helpers_tests.rs",
    "content": "#[cfg(test)]\nmod pg_helpers_tests {\n    use std::fs::File;\n\n    use compute_api::spec::{ComputeSpec, GenericOption, GenericOptions, PgIdent};\n    use compute_tools::pg_helpers::*;\n\n    #[test]\n    fn params_serialize() {\n        let file = File::open(\"../libs/compute_api/tests/cluster_spec.json\").unwrap();\n        let spec: ComputeSpec = serde_json::from_reader(file).unwrap();\n\n        assert_eq!(\n            spec.cluster.databases.first().unwrap().to_pg_options(),\n            \"LC_COLLATE 'C' LC_CTYPE 'C' TEMPLATE template0 OWNER \\\"alexk\\\"\"\n        );\n        assert_eq!(\n            spec.cluster.roles.first().unwrap().to_pg_options(),\n            \" LOGIN PASSWORD 'md56b1d16b78004bbd51fa06af9eda75972'\"\n        );\n    }\n\n    #[test]\n    fn settings_serialize() {\n        let file = File::open(\"../libs/compute_api/tests/cluster_spec.json\").unwrap();\n        let spec: ComputeSpec = serde_json::from_reader(file).unwrap();\n\n        assert_eq!(\n            spec.cluster.settings.as_pg_settings(),\n            r#\"fsync = off\nwal_level = logical\nhot_standby = on\nautoprewarm = off\noffload_lfc_interval_seconds = 20\nneon.safekeepers = '127.0.0.1:6502,127.0.0.1:6503,127.0.0.1:6501'\nwal_log_hints = on\nlog_connections = on\nshared_buffers = 32768\nport = 55432\nmax_connections = 100\nmax_wal_senders = 10\nlisten_addresses = '0.0.0.0'\nwal_sender_timeout = 0\npassword_encryption = md5\nmaintenance_work_mem = 65536\nmax_parallel_workers = 8\nmax_worker_processes = 8\nneon.tenant_id = 'b0554b632bd4d547a63b86c3630317e8'\nmax_replication_slots = 10\nneon.timeline_id = '2414a61ffc94e428f14b5758fe308e13'\nshared_preload_libraries = 'neon'\nsynchronous_standby_names = 'walproposer'\nneon.pageserver_connstring = 'host=127.0.0.1 port=6400'\ntest.escaping = 'here''s a backslash \\\\ and a quote '' and a double-quote \" hooray'\n\"#\n        );\n    }\n\n    #[test]\n    fn ident_pg_quote() {\n        let ident: PgIdent = PgIdent::from(\"\\\"name\\\";\\\\n select 1;\");\n\n        assert_eq!(ident.pg_quote(), \"\\\"\\\"\\\"name\\\"\\\";\\\\n select 1;\\\"\");\n    }\n\n    #[test]\n    fn ident_pg_quote_dollar() {\n        let test_cases = vec![\n            (\"name\", (\"$x$name$x$\", \"xx\")),\n            (\"name$\", (\"$x$name$$x$\", \"xx\")),\n            (\"name$$\", (\"$x$name$$$x$\", \"xx\")),\n            (\"name$$$\", (\"$x$name$$$$x$\", \"xx\")),\n            (\"name$$$$\", (\"$x$name$$$$$x$\", \"xx\")),\n            (\"name$x$\", (\"$xx$name$x$$xx$\", \"xxx\")),\n            (\"x\", (\"$xx$x$xx$\", \"xxx\")),\n            (\"xx\", (\"$xxx$xx$xxx$\", \"xxxx\")),\n            (\"$x\", (\"$xx$$x$xx$\", \"xxx\")),\n            (\"x$\", (\"$xx$x$$xx$\", \"xxx\")),\n            (\"$x$\", (\"$xx$$x$$xx$\", \"xxx\")),\n            (\"xx$\", (\"$xxx$xx$$xxx$\", \"xxxx\")),\n            (\"$xx\", (\"$xxx$$xx$xxx$\", \"xxxx\")),\n            (\"$xx$\", (\"$xxx$$xx$$xxx$\", \"xxxx\")),\n        ];\n\n        for (input, expected) in test_cases {\n            let (escaped, tag) = PgIdent::from(input).pg_quote_dollar();\n            assert_eq!(escaped, expected.0);\n            assert_eq!(tag, expected.1);\n        }\n    }\n\n    #[test]\n    fn generic_options_search() {\n        let generic_options: GenericOptions = Some(vec![\n            GenericOption {\n                name: \"present_value\".into(),\n                value: Some(\"value\".into()),\n                vartype: \"string\".into(),\n            },\n            GenericOption {\n                name: \"missed_value\".into(),\n                value: None,\n                vartype: \"int\".into(),\n            },\n        ]);\n        assert_eq!(generic_options.find(\"present_value\"), Some(\"value\".into()));\n        assert_eq!(generic_options.find(\"missed_value\"), None);\n        assert_eq!(generic_options.find(\"invalid_value\"), None);\n\n        let empty_generic_options: GenericOptions = Some(vec![]);\n        assert_eq!(empty_generic_options.find(\"present_value\"), None);\n        assert_eq!(empty_generic_options.find(\"missed_value\"), None);\n        assert_eq!(empty_generic_options.find(\"invalid_value\"), None);\n\n        let none_generic_options: GenericOptions = None;\n        assert_eq!(none_generic_options.find(\"present_value\"), None);\n        assert_eq!(none_generic_options.find(\"missed_value\"), None);\n        assert_eq!(none_generic_options.find(\"invalid_value\"), None);\n    }\n\n    #[test]\n    fn test_escape_literal() {\n        assert_eq!(escape_literal(\"test\"), \"'test'\");\n        assert_eq!(escape_literal(\"test'\"), \"'test'''\");\n        assert_eq!(escape_literal(\"test\\\\'\"), \"E'test\\\\\\\\'''\");\n        assert_eq!(escape_literal(\"test\\\\'\\\\'\"), \"E'test\\\\\\\\''\\\\\\\\'''\");\n    }\n}\n"
  },
  {
    "path": "control_plane/.gitignore",
    "content": "tmp_check/\n"
  },
  {
    "path": "control_plane/Cargo.toml",
    "content": "[package]\nname = \"control_plane\"\nversion = \"0.1.0\"\nedition.workspace = true\nlicense.workspace = true\n\n[dependencies]\nanyhow.workspace = true\nbase64.workspace = true\ncamino.workspace = true\nclap.workspace = true\ncomfy-table.workspace = true\nfutures.workspace = true\nhumantime.workspace = true\njsonwebtoken.workspace = true\nnix.workspace = true\nonce_cell.workspace = true\npem.workspace = true\nhumantime-serde.workspace = true\nhyper0.workspace = true\nregex.workspace = true\nreqwest = { workspace = true, features = [\"blocking\", \"json\"] }\nscopeguard.workspace = true\nserde.workspace = true\nserde_json.workspace = true\nsha2.workspace = true\nspki.workspace = true\nthiserror.workspace = true\ntoml.workspace = true\ntoml_edit.workspace = true\ntokio.workspace = true\ntokio-postgres.workspace = true\ntokio-util.workspace = true\nurl.workspace = true\npageserver_api.workspace = true\npageserver_client.workspace = true\npostgres_backend.workspace = true\nsafekeeper_api.workspace = true\nsafekeeper_client.workspace = true\npostgres_connection.workspace = true\nstorage_broker.workspace = true\nhttp-utils.workspace = true\nutils.workspace = true\nwhoami.workspace = true\nendpoint_storage.workspace = true\ncompute_api.workspace = true\nworkspace_hack.workspace = true\ntracing.workspace = true\n"
  },
  {
    "path": "control_plane/README.md",
    "content": "# Local Development Control Plane (`neon_local`)\n\nThis crate contains tools to start a Neon development environment locally. This utility can be used with the `cargo neon` command.  This is a convenience to invoke\nthe `neon_local` binary.\n\n**Note**: this is a dev/test tool -- a minimal control plane suitable for testing\ncode changes locally, but not suitable for running production systems.\n\n## Example: Start with Postgres 16\n\nTo create and start a local development environment with Postgres 16, you will need to provide `--pg-version` flag to 2 of the start-up commands.\n\n```shell\ncargo neon init\ncargo neon start\ncargo neon tenant create --set-default --pg-version 16\ncargo neon endpoint create main --pg-version 16\ncargo neon endpoint start main\n```\n\n## Example: Create Test User and Database\n\nBy default, `cargo neon` starts an endpoint with `cloud_admin` and `postgres` database. If you want to have a role and a database similar to what we have on the cloud service, you can do it with the following commands when starting an endpoint.\n\n```shell\ncargo neon endpoint create main --pg-version 16 --update-catalog true\ncargo neon endpoint start main --create-test-user true\n```\n\nThe first command creates `neon_superuser` and necessary roles. The second command creates `test` user and `neondb` database. You will see a connection string that connects you to the test user after running the second command.\n"
  },
  {
    "path": "control_plane/safekeepers.conf",
    "content": "# Page server and three safekeepers.\n[pageserver]\nlisten_pg_addr = '127.0.0.1:64000'\nlisten_http_addr = '127.0.0.1:9898'\nlisten_grpc_addr = '127.0.0.1:51051'\npg_auth_type = 'Trust'\nhttp_auth_type = 'Trust'\ngrpc_auth_type = 'Trust'\n\n[[safekeepers]]\nid = 1\npg_port = 5454\nhttp_port = 7676\n\n[[safekeepers]]\nid = 2\npg_port = 5455\nhttp_port = 7677\n\n[[safekeepers]]\nid = 3\npg_port = 5456\nhttp_port = 7678\n"
  },
  {
    "path": "control_plane/simple.conf",
    "content": "# Minimal neon environment with one safekeeper. This is equivalent to the built-in\n# defaults that you get with no --config\n[[pageservers]]\nid=1\nlisten_pg_addr = '127.0.0.1:64000'\nlisten_http_addr = '127.0.0.1:9898'\nlisten_grpc_addr = '127.0.0.1:51051'\npg_auth_type = 'Trust'\nhttp_auth_type = 'Trust'\ngrpc_auth_type = 'Trust'\n\n[[safekeepers]]\nid = 1\npg_port = 5454\nhttp_port = 7676\n\n[broker]\nlisten_addr = '127.0.0.1:50051'\n"
  },
  {
    "path": "control_plane/src/background_process.rs",
    "content": "//! Spawns and kills background processes that are needed by Neon CLI.\n//! Applies common set-up such as log and pid files (if needed) to every process.\n//!\n//! Neon CLI does not run in background, so it needs to store the information about\n//! spawned processes, which it does in this module.\n//! We do that by storing the pid of the process in the \"${process_name}.pid\" file.\n//! The pid file can be created by the process itself\n//! (Neon storage binaries do that and also ensure that a lock is taken onto that file)\n//! or we create such file after starting the process\n//! (non-Neon binaries don't necessarily follow our pidfile conventions).\n//! The pid stored in the file is later used to stop the service.\n//!\n//! See the [`lock_file`](utils::lock_file) module for more info.\n\nuse std::ffi::OsStr;\nuse std::io::Write;\nuse std::os::fd::AsFd;\nuse std::os::unix::process::CommandExt;\nuse std::path::Path;\nuse std::process::Command;\nuse std::time::Duration;\nuse std::{fs, io, thread};\n\nuse anyhow::Context;\nuse camino::{Utf8Path, Utf8PathBuf};\nuse nix::errno::Errno;\nuse nix::fcntl::{FcntlArg, FdFlag};\nuse nix::sys::signal::{Signal, kill};\nuse nix::unistd::Pid;\nuse utils::pid_file::{self, PidFileRead};\n\n// These constants control the loop used to poll for process start / stop.\n//\n// The loop waits for at most 10 seconds, polling every 100 ms.\n// Once a second, it prints a dot (\".\"), to give the user an indication that\n// it's waiting. If the process hasn't started/stopped after 5 seconds,\n// it prints a notice that it's taking long, but keeps waiting.\n//\nconst STOP_RETRY_TIMEOUT: Duration = Duration::from_secs(10);\nconst STOP_RETRIES: u128 = STOP_RETRY_TIMEOUT.as_millis() / RETRY_INTERVAL.as_millis();\nconst RETRY_INTERVAL: Duration = Duration::from_millis(100);\nconst DOT_EVERY_RETRIES: u128 = 10;\nconst NOTICE_AFTER_RETRIES: u128 = 50;\n\n/// Argument to `start_process`, to indicate whether it should create pidfile or if the process creates\n/// it itself.\npub enum InitialPidFile {\n    /// Create a pidfile, to allow future CLI invocations to manipulate the process.\n    Create(Utf8PathBuf),\n    /// The process will create the pidfile itself, need to wait for that event.\n    Expect(Utf8PathBuf),\n}\n\n/// Start a background child process using the parameters given.\n#[allow(clippy::too_many_arguments)]\npub async fn start_process<F, Fut, AI, A, EI>(\n    process_name: &str,\n    datadir: &Path,\n    command: &Path,\n    args: AI,\n    envs: EI,\n    initial_pid_file: InitialPidFile,\n    retry_timeout: &Duration,\n    process_status_check: F,\n) -> anyhow::Result<()>\nwhere\n    F: Fn() -> Fut,\n    Fut: std::future::Future<Output = anyhow::Result<bool>>,\n    AI: IntoIterator<Item = A>,\n    A: AsRef<OsStr>,\n    // Not generic AsRef<OsStr>, otherwise empty `envs` prevents type inference\n    EI: IntoIterator<Item = (String, String)>,\n{\n    let retries: u128 = retry_timeout.as_millis() / RETRY_INTERVAL.as_millis();\n    if !datadir.metadata().context(\"stat datadir\")?.is_dir() {\n        anyhow::bail!(\"`datadir` must be a directory when calling this function: {datadir:?}\");\n    }\n    let log_path = datadir.join(format!(\"{process_name}.log\"));\n    let process_log_file = fs::OpenOptions::new()\n        .create(true)\n        .append(true)\n        .open(&log_path)\n        .with_context(|| {\n            format!(\"Could not open {process_name} log file {log_path:?} for writing\")\n        })?;\n    let same_file_for_stderr = process_log_file.try_clone().with_context(|| {\n        format!(\"Could not reuse {process_name} log file {log_path:?} for writing stderr\")\n    })?;\n\n    let mut command = Command::new(command);\n    let background_command = command\n        .stdout(process_log_file)\n        .stderr(same_file_for_stderr)\n        .args(args)\n        // spawn all child processes in their datadir, useful for all kinds of things,\n        // not least cleaning up child processes e.g. after an unclean exit from the test suite:\n        // ```\n        // lsof  -d cwd -a +D  Users/cs/src/neon/test_output\n        // ```\n        .current_dir(datadir);\n\n    let filled_cmd = fill_env_vars_prefixed_neon(fill_remote_storage_secrets_vars(\n        fill_rust_env_vars(background_command),\n    ));\n    filled_cmd.envs(envs);\n\n    let pid_file_to_check = match &initial_pid_file {\n        InitialPidFile::Create(path) => {\n            pre_exec_create_pidfile(filled_cmd, path);\n            path\n        }\n        InitialPidFile::Expect(path) => path,\n    };\n\n    let spawned_process = filled_cmd.spawn().with_context(|| {\n        format!(\"Could not spawn {process_name}, see console output and log files for details.\")\n    })?;\n    let pid = spawned_process.id();\n    let pid = Pid::from_raw(\n        i32::try_from(pid)\n            .with_context(|| format!(\"Subprocess {process_name} has invalid pid {pid}\"))?,\n    );\n    // set up a scopeguard to kill & wait for the child in case we panic or bail below\n    let spawned_process = scopeguard::guard(spawned_process, |mut spawned_process| {\n        println!(\"SIGKILL & wait the started process\");\n        (|| {\n            // TODO: use another signal that can be caught by the child so it can clean up any children it spawned (e..g, walredo).\n            spawned_process.kill().context(\"SIGKILL child\")?;\n            spawned_process.wait().context(\"wait() for child process\")?;\n            anyhow::Ok(())\n        })()\n        .with_context(|| format!(\"scopeguard kill&wait child {process_name:?}\"))\n        .unwrap();\n    });\n\n    for retries in 0..retries {\n        match process_started(pid, pid_file_to_check, &process_status_check).await {\n            Ok(true) => {\n                println!(\"\\n{process_name} started and passed status check, pid: {pid}\");\n                // leak the child process, it'll outlive this neon_local invocation\n                drop(scopeguard::ScopeGuard::into_inner(spawned_process));\n                return Ok(());\n            }\n            Ok(false) => {\n                if retries == NOTICE_AFTER_RETRIES {\n                    // The process is taking a long time to start up. Keep waiting, but\n                    // print a message\n                    print!(\"\\n{process_name} has not started yet, continuing to wait\");\n                }\n                if retries % DOT_EVERY_RETRIES == 0 {\n                    print!(\".\");\n                    io::stdout().flush().unwrap();\n                }\n                tokio::time::sleep(RETRY_INTERVAL).await;\n            }\n            Err(e) => {\n                println!(\"error starting process {process_name:?}: {e:#}\");\n                return Err(e);\n            }\n        }\n    }\n    println!();\n    anyhow::bail!(format!(\n        \"{} did not start+pass status checks within {:?} seconds\",\n        process_name, retry_timeout\n    ));\n}\n\n/// Stops the process, using the pid file given. Returns Ok also if the process is already not running.\npub fn stop_process(\n    immediate: bool,\n    process_name: &str,\n    pid_file: &Utf8Path,\n) -> anyhow::Result<()> {\n    let pid = match pid_file::read(pid_file)\n        .with_context(|| format!(\"read pid_file {pid_file:?}\"))?\n    {\n        PidFileRead::NotExist => {\n            println!(\"{process_name} is already stopped: no pid file present at {pid_file:?}\");\n            return Ok(());\n        }\n        PidFileRead::NotHeldByAnyProcess(_) => {\n            // Don't try to kill according to file contents beacuse the pid might have been re-used by another process.\n            // Don't delete the file either, it can race with new pid file creation.\n            // Read `pid_file` module comment for details.\n            println!(\n                \"No process is holding the pidfile. The process must have already exited. Leave in place to avoid race conditions: {pid_file:?}\"\n            );\n            return Ok(());\n        }\n        PidFileRead::LockedByOtherProcess(pid) => pid,\n    };\n    // XXX the pid could become invalid (and recycled) at any time before the kill() below.\n\n    // send signal\n    let sig = if immediate {\n        print!(\"Stopping {process_name} with pid {pid} immediately..\");\n        Signal::SIGQUIT\n    } else {\n        print!(\"Stopping {process_name} with pid {pid} gracefully..\");\n        Signal::SIGTERM\n    };\n    io::stdout().flush().unwrap();\n    match kill(pid, sig) {\n        Ok(()) => (),\n        Err(Errno::ESRCH) => {\n            // Again, don't delete the pid file. The unlink can race with a new pid file being created.\n            println!(\n                \"{process_name} with pid {pid} does not exist, but a pid file {pid_file:?} was found. Likely the pid got recycled. Lucky we didn't harm anyone.\"\n            );\n            return Ok(());\n        }\n        Err(e) => anyhow::bail!(\"Failed to send signal to {process_name} with pid {pid}: {e}\"),\n    }\n\n    // Wait until process is gone\n    wait_until_stopped(process_name, pid)?;\n    Ok(())\n}\n\npub fn wait_until_stopped(process_name: &str, pid: Pid) -> anyhow::Result<()> {\n    for retries in 0..STOP_RETRIES {\n        match process_has_stopped(pid) {\n            Ok(true) => {\n                println!(\"\\n{process_name} stopped\");\n                return Ok(());\n            }\n            Ok(false) => {\n                if retries == NOTICE_AFTER_RETRIES {\n                    // The process is taking a long time to start up. Keep waiting, but\n                    // print a message\n                    print!(\"\\n{process_name} has not stopped yet, continuing to wait\");\n                }\n                if retries % DOT_EVERY_RETRIES == 0 {\n                    print!(\".\");\n                    io::stdout().flush().unwrap();\n                }\n                thread::sleep(RETRY_INTERVAL);\n            }\n            Err(e) => {\n                println!(\"{process_name} with pid {pid} failed to stop: {e:#}\");\n                return Err(e);\n            }\n        }\n    }\n    println!();\n    anyhow::bail!(format!(\n        \"{} with pid {} did not stop in {:?} seconds\",\n        process_name, pid, STOP_RETRY_TIMEOUT\n    ));\n}\n\nfn fill_rust_env_vars(cmd: &mut Command) -> &mut Command {\n    // If RUST_BACKTRACE is set, pass it through. But if it's not set, default\n    // to RUST_BACKTRACE=1.\n    let backtrace_setting = std::env::var_os(\"RUST_BACKTRACE\");\n    let backtrace_setting = backtrace_setting\n        .as_deref()\n        .unwrap_or_else(|| OsStr::new(\"1\"));\n\n    let mut filled_cmd = cmd.env_clear().env(\"RUST_BACKTRACE\", backtrace_setting);\n\n    // Pass through these environment variables to the command\n    for var in [\n        \"LLVM_PROFILE_FILE\",\n        \"FAILPOINTS\",\n        \"RUST_LOG\",\n        \"ASAN_OPTIONS\",\n        \"UBSAN_OPTIONS\",\n    ] {\n        if let Some(val) = std::env::var_os(var) {\n            filled_cmd = filled_cmd.env(var, val);\n        }\n    }\n\n    filled_cmd\n}\n\nfn fill_remote_storage_secrets_vars(mut cmd: &mut Command) -> &mut Command {\n    for env_key in [\n        \"AWS_ACCESS_KEY_ID\",\n        \"AWS_SECRET_ACCESS_KEY\",\n        \"AWS_SESSION_TOKEN\",\n        \"AWS_PROFILE\",\n        // HOME is needed in combination with `AWS_PROFILE` to pick up the SSO sessions.\n        \"HOME\",\n        \"AZURE_STORAGE_ACCOUNT\",\n        \"AZURE_STORAGE_ACCESS_KEY\",\n    ] {\n        if let Ok(value) = std::env::var(env_key) {\n            cmd = cmd.env(env_key, value);\n        }\n    }\n    cmd\n}\n\nfn fill_env_vars_prefixed_neon(mut cmd: &mut Command) -> &mut Command {\n    for (var, val) in std::env::vars() {\n        if var.starts_with(\"NEON_\") {\n            cmd = cmd.env(var, val);\n        }\n    }\n    cmd\n}\n\n/// Add a `pre_exec` to the cmd that, inbetween fork() and exec(),\n/// 1. Claims a pidfile with a fcntl lock on it and\n/// 2. Sets up the pidfile's file descriptor so that it (and the lock)\n///    will remain held until the cmd exits.\nfn pre_exec_create_pidfile<P>(cmd: &mut Command, path: P) -> &mut Command\nwhere\n    P: Into<Utf8PathBuf>,\n{\n    let path: Utf8PathBuf = path.into();\n    // SAFETY:\n    // pre_exec is marked unsafe because it runs between fork and exec.\n    // Why is that dangerous in various ways?\n    // Long answer:  https://github.com/rust-lang/rust/issues/39575\n    // Short answer: in a multi-threaded program, other threads may have\n    // been inside of critical sections at the time of fork. In the\n    // original process, that was allright, assuming they protected\n    // the critical sections appropriately, e.g., through locks.\n    // Fork adds another process to the mix that\n    //   1. Has a single thread T\n    //   2. In an exact copy of the address space at the time of fork.\n    // A variety of problems scan occur now:\n    //   1. T tries to grab a lock that was locked at the time of fork.\n    //      It will wait forever since in its address space, the lock\n    //      is in state 'taken' but the thread that would unlock it is\n    //      not there.\n    //   2. A rust object that represented some external resource in the\n    //      parent now got implicitly copied by the fork, even though\n    //      the object's type is not `Copy`. The parent program may use\n    //      non-copyability as way to enforce unique ownership of an\n    //      external resource in the typesystem. The fork breaks that\n    //      assumption, as now both parent and child process have an\n    //      owned instance of the object that represents the same\n    //      underlying resource.\n    // While these seem like niche problems, (1) in particular is\n    // highly relevant. For example, `malloc()` may grab a mutex internally,\n    // and so, if we forked while another thread was mallocing' and our\n    // pre_exec closure allocates as well, it will block on the malloc\n    // mutex forever\n    //\n    // The proper solution is to only use C library functions that are marked\n    // \"async-signal-safe\": https://man7.org/linux/man-pages/man7/signal-safety.7.html\n    //\n    // With this specific pre_exec() closure, the non-error path doesn't allocate.\n    // The error path uses `anyhow`, and hence does allocate.\n    // We take our chances there, hoping that any potential disaster is constrained\n    // to the child process (e.g., malloc has no state ourside of the child process).\n    // Last, `expect` prints to stderr, and stdio is not async-signal-safe.\n    // Again, we take our chances, making the same assumptions as for malloc.\n    unsafe {\n        cmd.pre_exec(move || {\n            let file = pid_file::claim_for_current_process(&path).expect(\"claim pid file\");\n            // Remove the FD_CLOEXEC flag on the pidfile descriptor so that the pidfile\n            // remains locked after exec.\n            nix::fcntl::fcntl(file.as_fd(), FcntlArg::F_SETFD(FdFlag::empty()))\n                .expect(\"remove FD_CLOEXEC\");\n            // Don't run drop(file), it would close the file before we actually exec.\n            std::mem::forget(file);\n            Ok(())\n        });\n    }\n    cmd\n}\n\nasync fn process_started<F, Fut>(\n    pid: Pid,\n    pid_file_to_check: &Utf8Path,\n    status_check: &F,\n) -> anyhow::Result<bool>\nwhere\n    F: Fn() -> Fut,\n    Fut: std::future::Future<Output = anyhow::Result<bool>>,\n{\n    match status_check().await {\n        Ok(true) => match pid_file::read(pid_file_to_check)? {\n            PidFileRead::NotExist => Ok(false),\n            PidFileRead::LockedByOtherProcess(pid_in_file) => Ok(pid_in_file == pid),\n            PidFileRead::NotHeldByAnyProcess(_) => Ok(false),\n        },\n        Ok(false) => Ok(false),\n        Err(e) => anyhow::bail!(\"process failed to start: {e}\"),\n    }\n}\n\npub(crate) fn process_has_stopped(pid: Pid) -> anyhow::Result<bool> {\n    match kill(pid, None) {\n        // Process exists, keep waiting\n        Ok(_) => Ok(false),\n        // Process not found, we're done\n        Err(Errno::ESRCH) => Ok(true),\n        Err(err) => anyhow::bail!(\"Failed to send signal to process with pid {pid}: {err}\"),\n    }\n}\n"
  },
  {
    "path": "control_plane/src/bin/neon_local.rs",
    "content": "//!\n//! `neon_local` is an executable that can be used to create a local\n//! Neon environment, for testing purposes. The local environment is\n//! quite different from the cloud environment with Kubernetes, but it\n//! easier to work with locally. The python tests in `test_runner`\n//! rely on `neon_local` to set up the environment for each test.\n//!\nuse std::borrow::Cow;\nuse std::collections::{BTreeSet, HashMap};\nuse std::fs::File;\nuse std::path::PathBuf;\nuse std::process::exit;\nuse std::str::FromStr;\nuse std::time::Duration;\n\nuse anyhow::{Context, Result, anyhow, bail};\nuse clap::Parser;\nuse compute_api::requests::ComputeClaimsScope;\nuse compute_api::spec::{ComputeMode, PageserverProtocol};\nuse control_plane::broker::StorageBroker;\nuse control_plane::endpoint::{ComputeControlPlane, EndpointTerminateMode};\nuse control_plane::endpoint::{\n    local_pageserver_conf_to_conn_info, tenant_locate_response_to_conn_info,\n};\nuse control_plane::endpoint_storage::{ENDPOINT_STORAGE_DEFAULT_ADDR, EndpointStorage};\nuse control_plane::local_env;\nuse control_plane::local_env::{\n    EndpointStorageConf, InitForceMode, LocalEnv, NeonBroker, NeonLocalInitConf,\n    NeonLocalInitPageserverConf, SafekeeperConf,\n};\nuse control_plane::pageserver::PageServerNode;\nuse control_plane::safekeeper::SafekeeperNode;\nuse control_plane::storage_controller::{\n    NeonStorageControllerStartArgs, NeonStorageControllerStopArgs, StorageController,\n};\nuse nix::fcntl::{Flock, FlockArg};\nuse pageserver_api::config::{\n    DEFAULT_GRPC_LISTEN_PORT as DEFAULT_PAGESERVER_GRPC_PORT,\n    DEFAULT_HTTP_LISTEN_PORT as DEFAULT_PAGESERVER_HTTP_PORT,\n    DEFAULT_PG_LISTEN_PORT as DEFAULT_PAGESERVER_PG_PORT,\n};\nuse pageserver_api::controller_api::{\n    NodeAvailabilityWrapper, PlacementPolicy, TenantCreateRequest,\n};\nuse pageserver_api::models::{\n    ShardParameters, TenantConfigRequest, TimelineCreateRequest, TimelineInfo,\n};\nuse pageserver_api::shard::{DEFAULT_STRIPE_SIZE, ShardCount, ShardStripeSize, TenantShardId};\nuse postgres_backend::AuthType;\nuse safekeeper_api::membership::{SafekeeperGeneration, SafekeeperId};\nuse safekeeper_api::{\n    DEFAULT_HTTP_LISTEN_PORT as DEFAULT_SAFEKEEPER_HTTP_PORT,\n    DEFAULT_PG_LISTEN_PORT as DEFAULT_SAFEKEEPER_PG_PORT, PgMajorVersion, PgVersionId,\n};\nuse storage_broker::DEFAULT_LISTEN_ADDR as DEFAULT_BROKER_ADDR;\nuse tokio::task::JoinSet;\nuse utils::auth::{Claims, Scope};\nuse utils::id::{NodeId, TenantId, TenantTimelineId, TimelineId};\nuse utils::lsn::Lsn;\nuse utils::project_git_version;\n\n// Default id of a safekeeper node, if not specified on the command line.\nconst DEFAULT_SAFEKEEPER_ID: NodeId = NodeId(1);\nconst DEFAULT_PAGESERVER_ID: NodeId = NodeId(1);\nconst DEFAULT_BRANCH_NAME: &str = \"main\";\nproject_git_version!(GIT_VERSION);\n\n#[allow(dead_code)]\nconst DEFAULT_PG_VERSION: PgMajorVersion = PgMajorVersion::PG17;\nconst DEFAULT_PG_VERSION_NUM: &str = \"17\";\n\nconst DEFAULT_PAGESERVER_CONTROL_PLANE_API: &str = \"http://127.0.0.1:1234/upcall/v1/\";\n\n/// Neon CLI.\n#[derive(clap::Parser)]\n#[command(version = GIT_VERSION, name = \"Neon CLI\")]\nstruct Cli {\n    #[command(subcommand)]\n    command: NeonLocalCmd,\n}\n\n#[derive(clap::Subcommand)]\nenum NeonLocalCmd {\n    Init(InitCmdArgs),\n\n    #[command(subcommand)]\n    Tenant(TenantCmd),\n    #[command(subcommand)]\n    Timeline(TimelineCmd),\n    #[command(subcommand)]\n    Pageserver(PageserverCmd),\n    #[command(subcommand)]\n    #[clap(alias = \"storage_controller\")]\n    StorageController(StorageControllerCmd),\n    #[command(subcommand)]\n    #[clap(alias = \"storage_broker\")]\n    StorageBroker(StorageBrokerCmd),\n    #[command(subcommand)]\n    Safekeeper(SafekeeperCmd),\n    #[command(subcommand)]\n    EndpointStorage(EndpointStorageCmd),\n    #[command(subcommand)]\n    Endpoint(EndpointCmd),\n    #[command(subcommand)]\n    Mappings(MappingsCmd),\n\n    Start(StartCmdArgs),\n    Stop(StopCmdArgs),\n}\n\n/// Initialize a new Neon repository, preparing configs for services to start with.\n#[derive(clap::Args)]\nstruct InitCmdArgs {\n    /// How many pageservers to create (default 1).\n    #[clap(long)]\n    num_pageservers: Option<u16>,\n\n    #[clap(long)]\n    config: Option<PathBuf>,\n\n    /// Force initialization even if the repository is not empty.\n    #[clap(long, default_value = \"must-not-exist\")]\n    #[arg(value_parser)]\n    force: InitForceMode,\n}\n\n/// Start pageserver and safekeepers.\n#[derive(clap::Args)]\nstruct StartCmdArgs {\n    #[clap(long = \"start-timeout\", default_value = \"10s\")]\n    timeout: humantime::Duration,\n}\n\n/// Stop pageserver and safekeepers.\n#[derive(clap::Args)]\nstruct StopCmdArgs {\n    #[arg(value_enum)]\n    #[clap(long, default_value_t = StopMode::Fast)]\n    mode: StopMode,\n}\n\n#[derive(Clone, Copy, clap::ValueEnum)]\nenum StopMode {\n    Fast,\n    Immediate,\n}\n\n/// Manage tenants.\n#[derive(clap::Subcommand)]\nenum TenantCmd {\n    List,\n    Create(TenantCreateCmdArgs),\n    SetDefault(TenantSetDefaultCmdArgs),\n    Config(TenantConfigCmdArgs),\n    Import(TenantImportCmdArgs),\n}\n\n#[derive(clap::Args)]\nstruct TenantCreateCmdArgs {\n    /// Tenant ID, as a 32-byte hexadecimal string.\n    #[clap(long = \"tenant-id\")]\n    tenant_id: Option<TenantId>,\n\n    /// Use a specific timeline id when creating a tenant and its initial timeline.\n    #[clap(long)]\n    timeline_id: Option<TimelineId>,\n\n    #[clap(short = 'c')]\n    config: Vec<String>,\n\n    /// Postgres version to use for the initial timeline.\n    #[arg(default_value = DEFAULT_PG_VERSION_NUM)]\n    #[clap(long)]\n    pg_version: PgMajorVersion,\n\n    /// Use this tenant in future CLI commands where tenant_id is needed, but not specified.\n    #[clap(long)]\n    set_default: bool,\n\n    /// Number of shards in the new tenant.\n    #[clap(long)]\n    #[arg(default_value_t = 0)]\n    shard_count: u8,\n    /// Sharding stripe size in pages.\n    #[clap(long)]\n    shard_stripe_size: Option<u32>,\n\n    /// Placement policy shards in this tenant.\n    #[clap(long)]\n    #[arg(value_parser = parse_placement_policy)]\n    placement_policy: Option<PlacementPolicy>,\n}\n\nfn parse_placement_policy(s: &str) -> anyhow::Result<PlacementPolicy> {\n    Ok(serde_json::from_str::<PlacementPolicy>(s)?)\n}\n\n/// Set a particular tenant as default in future CLI commands where tenant_id is needed, but not\n/// specified.\n#[derive(clap::Args)]\nstruct TenantSetDefaultCmdArgs {\n    /// Tenant ID, as a 32-byte hexadecimal string.\n    #[clap(long = \"tenant-id\")]\n    tenant_id: TenantId,\n}\n\n#[derive(clap::Args)]\nstruct TenantConfigCmdArgs {\n    /// Tenant ID, as a 32-byte hexadecimal string.\n    #[clap(long = \"tenant-id\")]\n    tenant_id: Option<TenantId>,\n\n    #[clap(short = 'c')]\n    config: Vec<String>,\n}\n\n/// Import a tenant that is present in remote storage, and create branches for its timelines.\n#[derive(clap::Args)]\nstruct TenantImportCmdArgs {\n    /// Tenant ID, as a 32-byte hexadecimal string.\n    #[clap(long = \"tenant-id\")]\n    tenant_id: TenantId,\n}\n\n/// Manage timelines.\n#[derive(clap::Subcommand)]\nenum TimelineCmd {\n    List(TimelineListCmdArgs),\n    Branch(TimelineBranchCmdArgs),\n    Create(TimelineCreateCmdArgs),\n    Import(TimelineImportCmdArgs),\n}\n\n/// List all timelines available to this pageserver.\n#[derive(clap::Args)]\nstruct TimelineListCmdArgs {\n    /// Tenant ID, as a 32-byte hexadecimal string.\n    #[clap(long = \"tenant-id\")]\n    tenant_shard_id: Option<TenantShardId>,\n}\n\n/// Create a new timeline, branching off from another timeline.\n#[derive(clap::Args)]\nstruct TimelineBranchCmdArgs {\n    /// Tenant ID, as a 32-byte hexadecimal string.\n    #[clap(long = \"tenant-id\")]\n    tenant_id: Option<TenantId>,\n    /// New timeline's ID, as a 32-byte hexadecimal string.\n    #[clap(long)]\n    timeline_id: Option<TimelineId>,\n    /// Human-readable alias for the new timeline.\n    #[clap(long)]\n    branch_name: String,\n    /// Use last Lsn of another timeline (and its data) as base when creating the new timeline. The\n    /// timeline gets resolved by its branch name.\n    #[clap(long)]\n    ancestor_branch_name: Option<String>,\n    /// When using another timeline as base, use a specific Lsn in it instead of the latest one.\n    #[clap(long)]\n    ancestor_start_lsn: Option<Lsn>,\n}\n\n/// Create a new blank timeline.\n#[derive(clap::Args)]\nstruct TimelineCreateCmdArgs {\n    /// Tenant ID, as a 32-byte hexadecimal string.\n    #[clap(long = \"tenant-id\")]\n    tenant_id: Option<TenantId>,\n    /// New timeline's ID, as a 32-byte hexadecimal string.\n    #[clap(long)]\n    timeline_id: Option<TimelineId>,\n    /// Human-readable alias for the new timeline.\n    #[clap(long)]\n    branch_name: String,\n\n    /// Postgres version.\n    #[arg(default_value = DEFAULT_PG_VERSION_NUM)]\n    #[clap(long)]\n    pg_version: PgMajorVersion,\n}\n\n/// Import a timeline from a basebackup directory.\n#[derive(clap::Args)]\nstruct TimelineImportCmdArgs {\n    /// Tenant ID, as a 32-byte hexadecimal string.\n    #[clap(long = \"tenant-id\")]\n    tenant_id: Option<TenantId>,\n    /// New timeline's ID, as a 32-byte hexadecimal string.\n    #[clap(long)]\n    timeline_id: TimelineId,\n    /// Human-readable alias for the new timeline.\n    #[clap(long)]\n    branch_name: String,\n    /// Basebackup tarfile to import.\n    #[clap(long)]\n    base_tarfile: PathBuf,\n    /// LSN the basebackup starts at.\n    #[clap(long)]\n    base_lsn: Lsn,\n    /// WAL to add after base.\n    #[clap(long)]\n    wal_tarfile: Option<PathBuf>,\n    /// LSN the basebackup ends at.\n    #[clap(long)]\n    end_lsn: Option<Lsn>,\n\n    /// Postgres version of the basebackup being imported.\n    #[arg(default_value = DEFAULT_PG_VERSION_NUM)]\n    #[clap(long)]\n    pg_version: PgMajorVersion,\n}\n\n/// Manage pageservers.\n#[derive(clap::Subcommand)]\nenum PageserverCmd {\n    Status(PageserverStatusCmdArgs),\n    Start(PageserverStartCmdArgs),\n    Stop(PageserverStopCmdArgs),\n    Restart(PageserverRestartCmdArgs),\n}\n\n/// Show status of a local pageserver.\n#[derive(clap::Args)]\nstruct PageserverStatusCmdArgs {\n    /// Pageserver ID.\n    #[clap(long = \"id\")]\n    pageserver_id: Option<NodeId>,\n}\n\n/// Start local pageserver.\n#[derive(clap::Args)]\nstruct PageserverStartCmdArgs {\n    /// Pageserver ID.\n    #[clap(long = \"id\")]\n    pageserver_id: Option<NodeId>,\n    /// Timeout until we fail the command.\n    #[clap(short = 't', long)]\n    #[arg(default_value = \"10s\")]\n    start_timeout: humantime::Duration,\n}\n\n/// Stop local pageserver.\n#[derive(clap::Args)]\nstruct PageserverStopCmdArgs {\n    /// Pageserver ID.\n    #[clap(long = \"id\")]\n    pageserver_id: Option<NodeId>,\n    /// If 'immediate', don't flush repository data at shutdown\n    #[clap(short = 'm')]\n    #[arg(value_enum, default_value = \"fast\")]\n    stop_mode: StopMode,\n}\n\n/// Restart local pageserver.\n#[derive(clap::Args)]\nstruct PageserverRestartCmdArgs {\n    /// Pageserver ID.\n    #[clap(long = \"id\")]\n    pageserver_id: Option<NodeId>,\n    /// Timeout until we fail the command.\n    #[clap(short = 't', long)]\n    #[arg(default_value = \"10s\")]\n    start_timeout: humantime::Duration,\n}\n\n/// Manage storage controller.\n#[derive(clap::Subcommand)]\nenum StorageControllerCmd {\n    Start(StorageControllerStartCmdArgs),\n    Stop(StorageControllerStopCmdArgs),\n}\n\n/// Start storage controller.\n#[derive(clap::Args)]\nstruct StorageControllerStartCmdArgs {\n    /// Timeout until we fail the command.\n    #[clap(short = 't', long)]\n    #[arg(default_value = \"10s\")]\n    start_timeout: humantime::Duration,\n    /// Identifier used to distinguish storage controller instances.\n    #[clap(long)]\n    #[arg(default_value_t = 1)]\n    instance_id: u8,\n    /// Base port for the storage controller instance identified by instance-id (defaults to\n    /// pageserver cplane api).\n    #[clap(long)]\n    base_port: Option<u16>,\n\n    /// Whether the storage controller should handle pageserver-reported local disk loss events.\n    #[clap(long)]\n    handle_ps_local_disk_loss: Option<bool>,\n}\n\n/// Stop storage controller.\n#[derive(clap::Args)]\nstruct StorageControllerStopCmdArgs {\n    /// If 'immediate', don't flush repository data at shutdown\n    #[clap(short = 'm')]\n    #[arg(value_enum, default_value = \"fast\")]\n    stop_mode: StopMode,\n    /// Identifier used to distinguish storage controller instances.\n    #[clap(long)]\n    #[arg(default_value_t = 1)]\n    instance_id: u8,\n}\n\n/// Manage storage broker.\n#[derive(clap::Subcommand)]\nenum StorageBrokerCmd {\n    Start(StorageBrokerStartCmdArgs),\n    Stop(StorageBrokerStopCmdArgs),\n}\n\n/// Start broker.\n#[derive(clap::Args)]\nstruct StorageBrokerStartCmdArgs {\n    /// Timeout until we fail the command.\n    #[clap(short = 't', long, default_value = \"10s\")]\n    start_timeout: humantime::Duration,\n}\n\n/// Stop broker.\n#[derive(clap::Args)]\nstruct StorageBrokerStopCmdArgs {\n    /// If 'immediate', don't flush repository data on shutdown.\n    #[clap(short = 'm')]\n    #[arg(value_enum, default_value = \"fast\")]\n    stop_mode: StopMode,\n}\n\n/// Manage safekeepers.\n#[derive(clap::Subcommand)]\nenum SafekeeperCmd {\n    Start(SafekeeperStartCmdArgs),\n    Stop(SafekeeperStopCmdArgs),\n    Restart(SafekeeperRestartCmdArgs),\n}\n\n/// Manage object storage.\n#[derive(clap::Subcommand)]\nenum EndpointStorageCmd {\n    Start(EndpointStorageStartCmd),\n    Stop(EndpointStorageStopCmd),\n}\n\n/// Start object storage.\n#[derive(clap::Args)]\nstruct EndpointStorageStartCmd {\n    /// Timeout until we fail the command.\n    #[clap(short = 't', long)]\n    #[arg(default_value = \"10s\")]\n    start_timeout: humantime::Duration,\n}\n\n/// Stop object storage.\n#[derive(clap::Args)]\nstruct EndpointStorageStopCmd {\n    /// If 'immediate', don't flush repository data on shutdown.\n    #[clap(short = 'm')]\n    #[arg(value_enum, default_value = \"fast\")]\n    stop_mode: StopMode,\n}\n\n/// Start local safekeeper.\n#[derive(clap::Args)]\nstruct SafekeeperStartCmdArgs {\n    /// Safekeeper ID.\n    #[arg(default_value_t = NodeId(1))]\n    id: NodeId,\n\n    /// Additional safekeeper invocation options, e.g. -e=--http-auth-public-key-path=foo.\n    #[clap(short = 'e', long = \"safekeeper-extra-opt\")]\n    extra_opt: Vec<String>,\n\n    /// Timeout until we fail the command.\n    #[clap(short = 't', long)]\n    #[arg(default_value = \"10s\")]\n    start_timeout: humantime::Duration,\n}\n\n/// Stop local safekeeper.\n#[derive(clap::Args)]\nstruct SafekeeperStopCmdArgs {\n    /// Safekeeper ID.\n    #[arg(default_value_t = NodeId(1))]\n    id: NodeId,\n\n    /// If 'immediate', don't flush repository data on shutdown.\n    #[arg(value_enum, default_value = \"fast\")]\n    #[clap(short = 'm')]\n    stop_mode: StopMode,\n}\n\n/// Restart local safekeeper.\n#[derive(clap::Args)]\nstruct SafekeeperRestartCmdArgs {\n    /// Safekeeper ID.\n    #[arg(default_value_t = NodeId(1))]\n    id: NodeId,\n\n    /// If 'immediate', don't flush repository data on shutdown.\n    #[arg(value_enum, default_value = \"fast\")]\n    #[clap(short = 'm')]\n    stop_mode: StopMode,\n\n    /// Additional safekeeper invocation options, e.g. -e=--http-auth-public-key-path=foo.\n    #[clap(short = 'e', long = \"safekeeper-extra-opt\")]\n    extra_opt: Vec<String>,\n\n    /// Timeout until we fail the command.\n    #[clap(short = 't', long)]\n    #[arg(default_value = \"10s\")]\n    start_timeout: humantime::Duration,\n}\n\n/// Manage Postgres instances.\n#[derive(clap::Subcommand)]\nenum EndpointCmd {\n    List(EndpointListCmdArgs),\n    Create(EndpointCreateCmdArgs),\n    Start(EndpointStartCmdArgs),\n    Reconfigure(EndpointReconfigureCmdArgs),\n    RefreshConfiguration(EndpointRefreshConfigurationArgs),\n    Stop(EndpointStopCmdArgs),\n    UpdatePageservers(EndpointUpdatePageserversCmdArgs),\n    GenerateJwt(EndpointGenerateJwtCmdArgs),\n}\n\n/// List endpoints.\n#[derive(clap::Args)]\nstruct EndpointListCmdArgs {\n    /// Tenant ID, as a 32-byte hexadecimal string.\n    #[clap(long = \"tenant-id\")]\n    tenant_shard_id: Option<TenantShardId>,\n}\n\n/// Create a compute endpoint.\n#[derive(clap::Args)]\nstruct EndpointCreateCmdArgs {\n    /// Tenant ID, as a 32-byte hexadecimal string.\n    #[clap(long = \"tenant-id\")]\n    tenant_id: Option<TenantId>,\n    /// Postgres endpoint ID.\n    endpoint_id: Option<String>,\n    /// Name of the branch the endpoint will run on.\n    #[clap(long)]\n    branch_name: Option<String>,\n    /// Specify LSN on the timeline to start from. By default, end of the timeline would be used.\n    #[clap(long)]\n    lsn: Option<Lsn>,\n    #[clap(long)]\n    pg_port: Option<u16>,\n    #[clap(long, alias = \"http-port\")]\n    external_http_port: Option<u16>,\n    #[clap(long)]\n    internal_http_port: Option<u16>,\n    #[clap(long = \"pageserver-id\")]\n    endpoint_pageserver_id: Option<NodeId>,\n\n    /// Don't do basebackup, create endpoint directory with only config files.\n    #[clap(long, action = clap::ArgAction::Set, default_value_t = false)]\n    config_only: bool,\n\n    /// Postgres version.\n    #[arg(default_value = DEFAULT_PG_VERSION_NUM)]\n    #[clap(long)]\n    pg_version: PgMajorVersion,\n\n    /// Use gRPC to communicate with Pageservers, by generating grpc:// connstrings.\n    ///\n    /// Specified on creation such that it's retained across reconfiguration and restarts.\n    ///\n    /// NB: not yet supported by computes.\n    #[clap(long)]\n    grpc: bool,\n\n    /// If set, the node will be a hot replica on the specified timeline.\n    #[clap(long, action = clap::ArgAction::Set, default_value_t = false)]\n    hot_standby: bool,\n    /// If set, will set up the catalog for neon_superuser.\n    #[clap(long)]\n    update_catalog: bool,\n    /// Allow multiple primary endpoints running on the same branch. Shouldn't be used normally, but\n    /// useful for tests.\n    #[clap(long)]\n    allow_multiple: bool,\n\n    /// Name of the privileged role for the endpoint.\n    // Only allow changing it on creation.\n    #[clap(long)]\n    privileged_role_name: Option<String>,\n}\n\n/// Start Postgres. If the endpoint doesn't exist yet, it is created.\n#[derive(clap::Args)]\nstruct EndpointStartCmdArgs {\n    /// Postgres endpoint ID.\n    endpoint_id: String,\n    /// Pageserver ID.\n    #[clap(long = \"pageserver-id\")]\n    endpoint_pageserver_id: Option<NodeId>,\n    /// Safekeepers membership generation to prefix neon.safekeepers with.\n    #[clap(long)]\n    safekeepers_generation: Option<u32>,\n    /// List of safekeepers endpoint will talk to.\n    #[clap(long)]\n    safekeepers: Option<String>,\n    /// Configure the remote extensions storage proxy gateway URL to request for extensions.\n    #[clap(long, alias = \"remote-ext-config\")]\n    remote_ext_base_url: Option<String>,\n    /// If set, will create test user `user` and `neondb` database. Requires `update-catalog = true`\n    #[clap(long)]\n    create_test_user: bool,\n    /// Allow multiple primary endpoints running on the same branch. Shouldn't be used normally, but\n    /// useful for tests.\n    #[clap(long)]\n    allow_multiple: bool,\n    /// Timeout until we fail the command.\n    #[clap(short = 't', long, value_parser= humantime::parse_duration)]\n    #[arg(default_value = \"90s\")]\n    start_timeout: Duration,\n\n    /// Download LFC cache from endpoint storage on endpoint startup\n    #[clap(long, default_value = \"false\")]\n    autoprewarm: bool,\n\n    /// Upload LFC cache to endpoint storage periodically\n    #[clap(long)]\n    offload_lfc_interval_seconds: Option<std::num::NonZeroU64>,\n\n    /// Run in development mode, skipping VM-specific operations like process termination\n    #[clap(long, action = clap::ArgAction::SetTrue)]\n    dev: bool,\n}\n\n/// Reconfigure an endpoint.\n#[derive(clap::Args)]\nstruct EndpointReconfigureCmdArgs {\n    /// Tenant id. Represented as a hexadecimal string 32 symbols length\n    #[clap(long = \"tenant-id\")]\n    tenant_id: Option<TenantId>,\n    /// Postgres endpoint ID.\n    endpoint_id: String,\n    /// Pageserver ID.\n    #[clap(long = \"pageserver-id\")]\n    endpoint_pageserver_id: Option<NodeId>,\n    #[clap(long)]\n    safekeepers: Option<String>,\n}\n\n/// Refresh the endpoint's configuration by forcing it reload it's spec\n#[derive(clap::Args)]\nstruct EndpointRefreshConfigurationArgs {\n    /// Postgres endpoint id\n    endpoint_id: String,\n}\n\n/// Stop an endpoint.\n#[derive(clap::Args)]\nstruct EndpointStopCmdArgs {\n    /// Postgres endpoint ID.\n    endpoint_id: String,\n    /// Also delete data directory (now optional, should be default in future).\n    #[clap(long)]\n    destroy: bool,\n\n    /// Postgres shutdown mode, passed to `pg_ctl -m <mode>`.\n    #[clap(long)]\n    #[clap(default_value = \"fast\")]\n    mode: EndpointTerminateMode,\n}\n\n/// Update the pageservers in the spec file of the compute endpoint\n#[derive(clap::Args)]\nstruct EndpointUpdatePageserversCmdArgs {\n    /// Postgres endpoint id\n    endpoint_id: String,\n\n    /// Specified pageserver id\n    #[clap(short = 'p', long)]\n    pageserver_id: Option<NodeId>,\n}\n\n/// Generate a JWT for an endpoint.\n#[derive(clap::Args)]\nstruct EndpointGenerateJwtCmdArgs {\n    /// Postgres endpoint ID.\n    endpoint_id: String,\n    /// Scope to generate the JWT with.\n    #[clap(short = 's', long, value_parser = ComputeClaimsScope::from_str)]\n    scope: Option<ComputeClaimsScope>,\n}\n\n/// Manage neon_local branch name mappings.\n#[derive(clap::Subcommand)]\nenum MappingsCmd {\n    Map(MappingsMapCmdArgs),\n}\n\n/// Create new mapping which cannot exist already.\n#[derive(clap::Args)]\nstruct MappingsMapCmdArgs {\n    /// Tenant ID, as a 32-byte hexadecimal string.\n    #[clap(long)]\n    tenant_id: TenantId,\n    /// Timeline ID, as a 32-byte hexadecimal string.\n    #[clap(long)]\n    timeline_id: TimelineId,\n    /// Branch name to give to the timeline.\n    #[clap(long)]\n    branch_name: String,\n}\n\n///\n/// Timelines tree element used as a value in the HashMap.\n///\nstruct TimelineTreeEl {\n    /// `TimelineInfo` received from the `pageserver` via the `timeline_list` http API call.\n    pub info: TimelineInfo,\n    /// Name, recovered from neon config mappings\n    pub name: Option<String>,\n    /// Holds all direct children of this timeline referenced using `timeline_id`.\n    pub children: BTreeSet<TimelineId>,\n}\n\n/// A flock-based guard over the neon_local repository directory\nstruct RepoLock {\n    _file: Flock<File>,\n}\n\nimpl RepoLock {\n    fn new() -> Result<Self> {\n        let repo_dir = File::open(local_env::base_path())?;\n        match Flock::lock(repo_dir, FlockArg::LockExclusive) {\n            Ok(f) => Ok(Self { _file: f }),\n            Err((_, e)) => Err(e).context(\"flock error\"),\n        }\n    }\n}\n\n// Main entry point for the 'neon_local' CLI utility\n//\n// This utility helps to manage neon installation. That includes following:\n//   * Management of local postgres installations running on top of the\n//     pageserver.\n//   * Providing CLI api to the pageserver\n//   * TODO: export/import to/from usual postgres\nfn main() -> Result<()> {\n    let cli = Cli::parse();\n\n    // Check for 'neon init' command first.\n    let (subcommand_result, _lock) = if let NeonLocalCmd::Init(args) = cli.command {\n        (handle_init(&args).map(|env| Some(Cow::Owned(env))), None)\n    } else {\n        // This tool uses a collection of simple files to store its state, and consequently\n        // it is not generally safe to run multiple commands concurrently.  Rather than expect\n        // all callers to know this, use a lock file to protect against concurrent execution.\n        let _repo_lock = RepoLock::new().unwrap();\n\n        // all other commands need an existing config\n        let env = LocalEnv::load_config(&local_env::base_path()).context(\"Error loading config\")?;\n        let original_env = env.clone();\n        let env = Box::leak(Box::new(env));\n        let rt = tokio::runtime::Builder::new_current_thread()\n            .enable_all()\n            .build()\n            .unwrap();\n\n        let subcommand_result = match cli.command {\n            NeonLocalCmd::Init(_) => unreachable!(\"init was handled earlier already\"),\n            NeonLocalCmd::Start(args) => rt.block_on(handle_start_all(&args, env)),\n            NeonLocalCmd::Stop(args) => rt.block_on(handle_stop_all(&args, env)),\n            NeonLocalCmd::Tenant(subcmd) => rt.block_on(handle_tenant(&subcmd, env)),\n            NeonLocalCmd::Timeline(subcmd) => rt.block_on(handle_timeline(&subcmd, env)),\n            NeonLocalCmd::Pageserver(subcmd) => rt.block_on(handle_pageserver(&subcmd, env)),\n            NeonLocalCmd::StorageController(subcmd) => {\n                rt.block_on(handle_storage_controller(&subcmd, env))\n            }\n            NeonLocalCmd::StorageBroker(subcmd) => rt.block_on(handle_storage_broker(&subcmd, env)),\n            NeonLocalCmd::Safekeeper(subcmd) => rt.block_on(handle_safekeeper(&subcmd, env)),\n            NeonLocalCmd::EndpointStorage(subcmd) => {\n                rt.block_on(handle_endpoint_storage(&subcmd, env))\n            }\n            NeonLocalCmd::Endpoint(subcmd) => rt.block_on(handle_endpoint(&subcmd, env)),\n            NeonLocalCmd::Mappings(subcmd) => handle_mappings(&subcmd, env),\n        };\n\n        let subcommand_result = if &original_env != env {\n            subcommand_result.map(|()| Some(Cow::Borrowed(env)))\n        } else {\n            subcommand_result.map(|()| None)\n        };\n        (subcommand_result, Some(_repo_lock))\n    };\n\n    match subcommand_result {\n        Ok(Some(updated_env)) => updated_env.persist_config()?,\n        Ok(None) => (),\n        Err(e) => {\n            eprintln!(\"command failed: {e:?}\");\n            exit(1);\n        }\n    }\n    Ok(())\n}\n\n///\n/// Prints timelines list as a tree-like structure.\n///\nfn print_timelines_tree(\n    timelines: Vec<TimelineInfo>,\n    mut timeline_name_mappings: HashMap<TenantTimelineId, String>,\n) -> Result<()> {\n    let mut timelines_hash = timelines\n        .iter()\n        .map(|t| {\n            (\n                t.timeline_id,\n                TimelineTreeEl {\n                    info: t.clone(),\n                    children: BTreeSet::new(),\n                    name: timeline_name_mappings\n                        .remove(&TenantTimelineId::new(t.tenant_id.tenant_id, t.timeline_id)),\n                },\n            )\n        })\n        .collect::<HashMap<_, _>>();\n\n    // Memorize all direct children of each timeline.\n    for timeline in timelines.iter() {\n        if let Some(ancestor_timeline_id) = timeline.ancestor_timeline_id {\n            timelines_hash\n                .get_mut(&ancestor_timeline_id)\n                .context(\"missing timeline info in the HashMap\")?\n                .children\n                .insert(timeline.timeline_id);\n        }\n    }\n\n    for timeline in timelines_hash.values() {\n        // Start with root local timelines (no ancestors) first.\n        if timeline.info.ancestor_timeline_id.is_none() {\n            print_timeline(0, &Vec::from([true]), timeline, &timelines_hash)?;\n        }\n    }\n\n    Ok(())\n}\n\n///\n/// Recursively prints timeline info with all its children.\n///\nfn print_timeline(\n    nesting_level: usize,\n    is_last: &[bool],\n    timeline: &TimelineTreeEl,\n    timelines: &HashMap<TimelineId, TimelineTreeEl>,\n) -> Result<()> {\n    if nesting_level > 0 {\n        let ancestor_lsn = match timeline.info.ancestor_lsn {\n            Some(lsn) => lsn.to_string(),\n            None => \"Unknown Lsn\".to_string(),\n        };\n\n        let mut br_sym = \"┣━\";\n\n        // Draw each nesting padding with proper style\n        // depending on whether its timeline ended or not.\n        if nesting_level > 1 {\n            for l in &is_last[1..is_last.len() - 1] {\n                if *l {\n                    print!(\"   \");\n                } else {\n                    print!(\"┃  \");\n                }\n            }\n        }\n\n        // We are the last in this sub-timeline\n        if *is_last.last().unwrap() {\n            br_sym = \"┗━\";\n        }\n\n        print!(\"{br_sym} @{ancestor_lsn}: \");\n    }\n\n    // Finally print a timeline id and name with new line\n    println!(\n        \"{} [{}]\",\n        timeline.name.as_deref().unwrap_or(\"_no_name_\"),\n        timeline.info.timeline_id\n    );\n\n    let len = timeline.children.len();\n    let mut i: usize = 0;\n    let mut is_last_new = Vec::from(is_last);\n    is_last_new.push(false);\n\n    for child in &timeline.children {\n        i += 1;\n\n        // Mark that the last padding is the end of the timeline\n        if i == len {\n            if let Some(last) = is_last_new.last_mut() {\n                *last = true;\n            }\n        }\n\n        print_timeline(\n            nesting_level + 1,\n            &is_last_new,\n            timelines\n                .get(child)\n                .context(\"missing timeline info in the HashMap\")?,\n            timelines,\n        )?;\n    }\n\n    Ok(())\n}\n\n/// Helper function to get tenant id from an optional --tenant_id option or from the config file\nfn get_tenant_id(\n    tenant_id_arg: Option<TenantId>,\n    env: &local_env::LocalEnv,\n) -> anyhow::Result<TenantId> {\n    if let Some(tenant_id_from_arguments) = tenant_id_arg {\n        Ok(tenant_id_from_arguments)\n    } else if let Some(default_id) = env.default_tenant_id {\n        Ok(default_id)\n    } else {\n        anyhow::bail!(\"No tenant id. Use --tenant-id, or set a default tenant\");\n    }\n}\n\n/// Helper function to get tenant-shard ID from an optional --tenant_id option or from the config file,\n/// for commands that accept a shard suffix\nfn get_tenant_shard_id(\n    tenant_shard_id_arg: Option<TenantShardId>,\n    env: &local_env::LocalEnv,\n) -> anyhow::Result<TenantShardId> {\n    if let Some(tenant_id_from_arguments) = tenant_shard_id_arg {\n        Ok(tenant_id_from_arguments)\n    } else if let Some(default_id) = env.default_tenant_id {\n        Ok(TenantShardId::unsharded(default_id))\n    } else {\n        anyhow::bail!(\"No tenant shard id. Use --tenant-id, or set a default tenant\");\n    }\n}\n\nfn handle_init(args: &InitCmdArgs) -> anyhow::Result<LocalEnv> {\n    // Create the in-memory `LocalEnv` that we'd normally load from disk in `load_config`.\n    let init_conf: NeonLocalInitConf = if let Some(config_path) = &args.config {\n        // User (likely the Python test suite) provided a description of the environment.\n        if args.num_pageservers.is_some() {\n            bail!(\n                \"Cannot specify both --num-pageservers and --config, use key `pageservers` in the --config file instead\"\n            );\n        }\n        // load and parse the file\n        let contents = std::fs::read_to_string(config_path).with_context(|| {\n            format!(\n                \"Could not read configuration file '{}'\",\n                config_path.display()\n            )\n        })?;\n        toml_edit::de::from_str(&contents)?\n    } else {\n        // User (likely interactive) did not provide a description of the environment, give them the default\n        NeonLocalInitConf {\n            control_plane_api: Some(DEFAULT_PAGESERVER_CONTROL_PLANE_API.parse().unwrap()),\n            broker: NeonBroker {\n                listen_addr: Some(DEFAULT_BROKER_ADDR.parse().unwrap()),\n                listen_https_addr: None,\n            },\n            safekeepers: vec![SafekeeperConf {\n                id: DEFAULT_SAFEKEEPER_ID,\n                pg_port: DEFAULT_SAFEKEEPER_PG_PORT,\n                http_port: DEFAULT_SAFEKEEPER_HTTP_PORT,\n                ..Default::default()\n            }],\n            pageservers: (0..args.num_pageservers.unwrap_or(1))\n                .map(|i| {\n                    let pageserver_id = NodeId(DEFAULT_PAGESERVER_ID.0 + i as u64);\n                    let pg_port = DEFAULT_PAGESERVER_PG_PORT + i;\n                    let http_port = DEFAULT_PAGESERVER_HTTP_PORT + i;\n                    let grpc_port = DEFAULT_PAGESERVER_GRPC_PORT + i;\n                    NeonLocalInitPageserverConf {\n                        id: pageserver_id,\n                        listen_pg_addr: format!(\"127.0.0.1:{pg_port}\"),\n                        listen_http_addr: format!(\"127.0.0.1:{http_port}\"),\n                        listen_https_addr: None,\n                        listen_grpc_addr: Some(format!(\"127.0.0.1:{grpc_port}\")),\n                        pg_auth_type: AuthType::Trust,\n                        http_auth_type: AuthType::Trust,\n                        grpc_auth_type: AuthType::Trust,\n                        other: Default::default(),\n                        // Typical developer machines use disks with slow fsync, and we don't care\n                        // about data integrity: disable disk syncs.\n                        no_sync: true,\n                    }\n                })\n                .collect(),\n            endpoint_storage: EndpointStorageConf {\n                listen_addr: ENDPOINT_STORAGE_DEFAULT_ADDR,\n            },\n            pg_distrib_dir: None,\n            neon_distrib_dir: None,\n            default_tenant_id: TenantId::from_array(std::array::from_fn(|_| 0)),\n            storage_controller: None,\n            control_plane_hooks_api: None,\n            generate_local_ssl_certs: false,\n        }\n    };\n\n    LocalEnv::init(init_conf, &args.force)\n        .context(\"materialize initial neon_local environment on disk\")?;\n    Ok(LocalEnv::load_config(&local_env::base_path())\n        .expect(\"freshly written config should be loadable\"))\n}\n\n/// The default pageserver is the one where CLI tenant/timeline operations are sent by default.\n/// For typical interactive use, one would just run with a single pageserver.  Scenarios with\n/// tenant/timeline placement across multiple pageservers are managed by python test code rather\n/// than this CLI.\nfn get_default_pageserver(env: &local_env::LocalEnv) -> PageServerNode {\n    let ps_conf = env\n        .pageservers\n        .first()\n        .expect(\"Config is validated to contain at least one pageserver\");\n    PageServerNode::from_env(env, ps_conf)\n}\n\nasync fn handle_tenant(subcmd: &TenantCmd, env: &mut local_env::LocalEnv) -> anyhow::Result<()> {\n    let pageserver = get_default_pageserver(env);\n    match subcmd {\n        TenantCmd::List => {\n            for t in pageserver.tenant_list().await? {\n                println!(\"{} {:?}\", t.id, t.state);\n            }\n        }\n        TenantCmd::Import(args) => {\n            let tenant_id = args.tenant_id;\n\n            let storage_controller = StorageController::from_env(env);\n            let create_response = storage_controller.tenant_import(tenant_id).await?;\n\n            let shard_zero = create_response\n                .shards\n                .first()\n                .expect(\"Import response omitted shards\");\n\n            let attached_pageserver_id = shard_zero.node_id;\n            let pageserver =\n                PageServerNode::from_env(env, env.get_pageserver_conf(attached_pageserver_id)?);\n\n            println!(\n                \"Imported tenant {tenant_id}, attached to pageserver {attached_pageserver_id}\"\n            );\n\n            let timelines = pageserver\n                .http_client\n                .list_timelines(shard_zero.shard_id)\n                .await?;\n\n            // Pick a 'main' timeline that has no ancestors, the rest will get arbitrary names\n            let main_timeline = timelines\n                .iter()\n                .find(|t| t.ancestor_timeline_id.is_none())\n                .expect(\"No timelines found\")\n                .timeline_id;\n\n            let mut branch_i = 0;\n            for timeline in timelines.iter() {\n                let branch_name = if timeline.timeline_id == main_timeline {\n                    \"main\".to_string()\n                } else {\n                    branch_i += 1;\n                    format!(\"branch_{branch_i}\")\n                };\n\n                println!(\n                    \"Importing timeline {tenant_id}/{} as branch {branch_name}\",\n                    timeline.timeline_id\n                );\n\n                env.register_branch_mapping(branch_name, tenant_id, timeline.timeline_id)?;\n            }\n        }\n        TenantCmd::Create(args) => {\n            let tenant_conf: HashMap<_, _> =\n                args.config.iter().flat_map(|c| c.split_once(':')).collect();\n\n            let tenant_conf = PageServerNode::parse_config(tenant_conf)?;\n\n            // If tenant ID was not specified, generate one\n            let tenant_id = args.tenant_id.unwrap_or_else(TenantId::generate);\n\n            // We must register the tenant with the storage controller, so\n            // that when the pageserver restarts, it will be re-attached.\n            let storage_controller = StorageController::from_env(env);\n            storage_controller\n                .tenant_create(TenantCreateRequest {\n                    // Note that ::unsharded here isn't actually because the tenant is unsharded, its because the\n                    // storage controller expects a shard-naive tenant_id in this attribute, and the TenantCreateRequest\n                    // type is used both in the storage controller (for creating tenants) and in the pageserver (for\n                    // creating shards)\n                    new_tenant_id: TenantShardId::unsharded(tenant_id),\n                    generation: None,\n                    shard_parameters: ShardParameters {\n                        count: ShardCount::new(args.shard_count),\n                        stripe_size: args\n                            .shard_stripe_size\n                            .map(ShardStripeSize)\n                            .unwrap_or(DEFAULT_STRIPE_SIZE),\n                    },\n                    placement_policy: args.placement_policy.clone(),\n                    config: tenant_conf,\n                })\n                .await?;\n            println!(\"tenant {tenant_id} successfully created on the pageserver\");\n\n            // Create an initial timeline for the new tenant\n            let new_timeline_id = args.timeline_id.unwrap_or(TimelineId::generate());\n\n            // FIXME: passing None for ancestor_start_lsn is not kosher in a sharded world: we can't have\n            // different shards picking different start lsns.  Maybe we have to teach storage controller\n            // to let shard 0 branch first and then propagate the chosen LSN to other shards.\n            storage_controller\n                .tenant_timeline_create(\n                    tenant_id,\n                    TimelineCreateRequest {\n                        new_timeline_id,\n                        mode: pageserver_api::models::TimelineCreateRequestMode::Bootstrap {\n                            existing_initdb_timeline_id: None,\n                            pg_version: Some(args.pg_version),\n                        },\n                    },\n                )\n                .await?;\n\n            env.register_branch_mapping(\n                DEFAULT_BRANCH_NAME.to_string(),\n                tenant_id,\n                new_timeline_id,\n            )?;\n\n            println!(\"Created an initial timeline '{new_timeline_id}' for tenant: {tenant_id}\",);\n\n            if args.set_default {\n                println!(\"Setting tenant {tenant_id} as a default one\");\n                env.default_tenant_id = Some(tenant_id);\n            }\n        }\n        TenantCmd::SetDefault(args) => {\n            println!(\"Setting tenant {} as a default one\", args.tenant_id);\n            env.default_tenant_id = Some(args.tenant_id);\n        }\n        TenantCmd::Config(args) => {\n            let tenant_id = get_tenant_id(args.tenant_id, env)?;\n            let tenant_conf: HashMap<_, _> =\n                args.config.iter().flat_map(|c| c.split_once(':')).collect();\n            let config = PageServerNode::parse_config(tenant_conf)?;\n\n            let req = TenantConfigRequest { tenant_id, config };\n\n            let storage_controller = StorageController::from_env(env);\n            storage_controller\n                .set_tenant_config(&req)\n                .await\n                .with_context(|| format!(\"Tenant config failed for tenant with id {tenant_id}\"))?;\n            println!(\"tenant {tenant_id} successfully configured via storcon\");\n        }\n    }\n    Ok(())\n}\n\nasync fn handle_timeline(cmd: &TimelineCmd, env: &mut local_env::LocalEnv) -> Result<()> {\n    let pageserver = get_default_pageserver(env);\n\n    match cmd {\n        TimelineCmd::List(args) => {\n            // TODO(sharding): this command shouldn't have to specify a shard ID: we should ask the storage controller\n            // where shard 0 is attached, and query there.\n            let tenant_shard_id = get_tenant_shard_id(args.tenant_shard_id, env)?;\n            let timelines = pageserver.timeline_list(&tenant_shard_id).await?;\n            print_timelines_tree(timelines, env.timeline_name_mappings())?;\n        }\n        TimelineCmd::Create(args) => {\n            let tenant_id = get_tenant_id(args.tenant_id, env)?;\n            let new_branch_name = &args.branch_name;\n            let new_timeline_id_opt = args.timeline_id;\n            let new_timeline_id = new_timeline_id_opt.unwrap_or(TimelineId::generate());\n\n            let storage_controller = StorageController::from_env(env);\n            let create_req = TimelineCreateRequest {\n                new_timeline_id,\n                mode: pageserver_api::models::TimelineCreateRequestMode::Bootstrap {\n                    existing_initdb_timeline_id: None,\n                    pg_version: Some(args.pg_version),\n                },\n            };\n            let timeline_info = storage_controller\n                .tenant_timeline_create(tenant_id, create_req)\n                .await?;\n\n            let last_record_lsn = timeline_info.last_record_lsn;\n            env.register_branch_mapping(new_branch_name.to_string(), tenant_id, new_timeline_id)?;\n\n            println!(\n                \"Created timeline '{}' at Lsn {last_record_lsn} for tenant: {tenant_id}\",\n                timeline_info.timeline_id\n            );\n        }\n        // TODO: rename to import-basebackup-plus-wal\n        TimelineCmd::Import(args) => {\n            let tenant_id = get_tenant_id(args.tenant_id, env)?;\n            let timeline_id = args.timeline_id;\n            let branch_name = &args.branch_name;\n\n            // Parse base inputs\n            let base = (args.base_lsn, args.base_tarfile.clone());\n\n            // Parse pg_wal inputs\n            let wal_tarfile = args.wal_tarfile.clone();\n            let end_lsn = args.end_lsn;\n            // TODO validate both or none are provided\n            let pg_wal = end_lsn.zip(wal_tarfile);\n\n            println!(\"Importing timeline into pageserver ...\");\n            pageserver\n                .timeline_import(tenant_id, timeline_id, base, pg_wal, args.pg_version)\n                .await?;\n            if env.storage_controller.timelines_onto_safekeepers {\n                println!(\"Creating timeline on safekeeper ...\");\n                let timeline_info = pageserver\n                    .timeline_info(\n                        TenantShardId::unsharded(tenant_id),\n                        timeline_id,\n                        pageserver_client::mgmt_api::ForceAwaitLogicalSize::No,\n                    )\n                    .await?;\n                let default_sk = SafekeeperNode::from_env(env, env.safekeepers.first().unwrap());\n                let default_host = default_sk\n                    .conf\n                    .listen_addr\n                    .clone()\n                    .unwrap_or_else(|| \"localhost\".to_string());\n                let mconf = safekeeper_api::membership::Configuration {\n                    generation: SafekeeperGeneration::new(1),\n                    members: safekeeper_api::membership::MemberSet {\n                        m: vec![SafekeeperId {\n                            host: default_host,\n                            id: default_sk.conf.id,\n                            pg_port: default_sk.conf.pg_port,\n                        }],\n                    },\n                    new_members: None,\n                };\n                let pg_version = PgVersionId::from(args.pg_version);\n                let req = safekeeper_api::models::TimelineCreateRequest {\n                    tenant_id,\n                    timeline_id,\n                    mconf,\n                    pg_version,\n                    system_id: None,\n                    wal_seg_size: None,\n                    start_lsn: timeline_info.last_record_lsn,\n                    commit_lsn: None,\n                };\n                default_sk.create_timeline(&req).await?;\n            }\n            env.register_branch_mapping(branch_name.to_string(), tenant_id, timeline_id)?;\n            println!(\"Done\");\n        }\n        TimelineCmd::Branch(args) => {\n            let tenant_id = get_tenant_id(args.tenant_id, env)?;\n            let new_timeline_id = args.timeline_id.unwrap_or(TimelineId::generate());\n            let new_branch_name = &args.branch_name;\n            let ancestor_branch_name = args\n                .ancestor_branch_name\n                .clone()\n                .unwrap_or(DEFAULT_BRANCH_NAME.to_owned());\n            let ancestor_timeline_id = env\n                .get_branch_timeline_id(&ancestor_branch_name, tenant_id)\n                .ok_or_else(|| {\n                    anyhow!(\"Found no timeline id for branch name '{ancestor_branch_name}'\")\n                })?;\n\n            let start_lsn = args.ancestor_start_lsn;\n            let storage_controller = StorageController::from_env(env);\n            let create_req = TimelineCreateRequest {\n                new_timeline_id,\n                mode: pageserver_api::models::TimelineCreateRequestMode::Branch {\n                    ancestor_timeline_id,\n                    ancestor_start_lsn: start_lsn,\n                    read_only: false,\n                    pg_version: None,\n                },\n            };\n            let timeline_info = storage_controller\n                .tenant_timeline_create(tenant_id, create_req)\n                .await?;\n\n            let last_record_lsn = timeline_info.last_record_lsn;\n\n            env.register_branch_mapping(new_branch_name.to_string(), tenant_id, new_timeline_id)?;\n\n            println!(\n                \"Created timeline '{}' at Lsn {last_record_lsn} for tenant: {tenant_id}. Ancestor timeline: '{ancestor_branch_name}'\",\n                timeline_info.timeline_id\n            );\n        }\n    }\n\n    Ok(())\n}\n\nasync fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Result<()> {\n    let mut cplane = ComputeControlPlane::load(env.clone())?;\n\n    match subcmd {\n        EndpointCmd::List(args) => {\n            // TODO(sharding): this command shouldn't have to specify a shard ID: we should ask the storage controller\n            // where shard 0 is attached, and query there.\n            let tenant_shard_id = get_tenant_shard_id(args.tenant_shard_id, env)?;\n\n            let timeline_name_mappings = env.timeline_name_mappings();\n\n            let mut table = comfy_table::Table::new();\n\n            table.load_preset(comfy_table::presets::NOTHING);\n\n            table.set_header([\n                \"ENDPOINT\",\n                \"ADDRESS\",\n                \"TIMELINE\",\n                \"BRANCH NAME\",\n                \"LSN\",\n                \"STATUS\",\n            ]);\n\n            for (endpoint_id, endpoint) in cplane\n                .endpoints\n                .iter()\n                .filter(|(_, endpoint)| endpoint.tenant_id == tenant_shard_id.tenant_id)\n            {\n                let lsn_str = match endpoint.mode {\n                    ComputeMode::Static(lsn) => {\n                        // -> read-only endpoint\n                        // Use the node's LSN.\n                        lsn.to_string()\n                    }\n                    _ => {\n                        // As the LSN here refers to the one that the compute is started with,\n                        // we display nothing as it is a primary/hot standby compute.\n                        \"---\".to_string()\n                    }\n                };\n\n                let branch_name = timeline_name_mappings\n                    .get(&TenantTimelineId::new(\n                        tenant_shard_id.tenant_id,\n                        endpoint.timeline_id,\n                    ))\n                    .map(|name| name.as_str())\n                    .unwrap_or(\"?\");\n\n                table.add_row([\n                    endpoint_id.as_str(),\n                    &endpoint.pg_address.to_string(),\n                    &endpoint.timeline_id.to_string(),\n                    branch_name,\n                    lsn_str.as_str(),\n                    &format!(\"{}\", endpoint.status()),\n                ]);\n            }\n\n            println!(\"{table}\");\n        }\n        EndpointCmd::Create(args) => {\n            let tenant_id = get_tenant_id(args.tenant_id, env)?;\n            let branch_name = args\n                .branch_name\n                .clone()\n                .unwrap_or(DEFAULT_BRANCH_NAME.to_owned());\n            let endpoint_id = args\n                .endpoint_id\n                .clone()\n                .unwrap_or_else(|| format!(\"ep-{branch_name}\"));\n\n            let timeline_id = env\n                .get_branch_timeline_id(&branch_name, tenant_id)\n                .ok_or_else(|| anyhow!(\"Found no timeline id for branch name '{branch_name}'\"))?;\n\n            let mode = match (args.lsn, args.hot_standby) {\n                (Some(lsn), false) => ComputeMode::Static(lsn),\n                (None, true) => ComputeMode::Replica,\n                (None, false) => ComputeMode::Primary,\n                (Some(_), true) => anyhow::bail!(\"cannot specify both lsn and hot-standby\"),\n            };\n\n            match (mode, args.hot_standby) {\n                (ComputeMode::Static(_), true) => {\n                    bail!(\n                        \"Cannot start a node in hot standby mode when it is already configured as a static replica\"\n                    )\n                }\n                (ComputeMode::Primary, true) => {\n                    bail!(\n                        \"Cannot start a node as a hot standby replica, it is already configured as primary node\"\n                    )\n                }\n                _ => {}\n            }\n\n            if !args.allow_multiple {\n                cplane.check_conflicting_endpoints(mode, tenant_id, timeline_id)?;\n            }\n\n            cplane.new_endpoint(\n                &endpoint_id,\n                tenant_id,\n                timeline_id,\n                args.pg_port,\n                args.external_http_port,\n                args.internal_http_port,\n                args.pg_version,\n                mode,\n                args.grpc,\n                !args.update_catalog,\n                false,\n                args.privileged_role_name.clone(),\n            )?;\n        }\n        EndpointCmd::Start(args) => {\n            let endpoint_id = &args.endpoint_id;\n            let pageserver_id = args.endpoint_pageserver_id;\n            let remote_ext_base_url = &args.remote_ext_base_url;\n\n            let default_generation = env\n                .storage_controller\n                .timelines_onto_safekeepers\n                .then_some(1);\n            let safekeepers_generation = args\n                .safekeepers_generation\n                .or(default_generation)\n                .map(SafekeeperGeneration::new);\n            // If --safekeepers argument is given, use only the listed\n            // safekeeper nodes; otherwise all from the env.\n            let safekeepers = if let Some(safekeepers) = parse_safekeepers(&args.safekeepers)? {\n                safekeepers\n            } else {\n                env.safekeepers.iter().map(|sk| sk.id).collect()\n            };\n\n            let endpoint = cplane\n                .endpoints\n                .get(endpoint_id.as_str())\n                .ok_or_else(|| anyhow!(\"endpoint {endpoint_id} not found\"))?;\n\n            if !args.allow_multiple {\n                cplane.check_conflicting_endpoints(\n                    endpoint.mode,\n                    endpoint.tenant_id,\n                    endpoint.timeline_id,\n                )?;\n            }\n\n            let prefer_protocol = if endpoint.grpc {\n                PageserverProtocol::Grpc\n            } else {\n                PageserverProtocol::Libpq\n            };\n\n            let mut pageserver_conninfo = if let Some(ps_id) = pageserver_id {\n                let conf = env.get_pageserver_conf(ps_id).unwrap();\n                local_pageserver_conf_to_conn_info(conf)?\n            } else {\n                // Look up the currently attached location of the tenant, and its striping metadata,\n                // to pass these on to postgres.\n                let storage_controller = StorageController::from_env(env);\n                let locate_result = storage_controller.tenant_locate(endpoint.tenant_id).await?;\n                assert!(!locate_result.shards.is_empty());\n\n                // Initialize LSN leases for static computes.\n                if let ComputeMode::Static(lsn) = endpoint.mode {\n                    futures::future::try_join_all(locate_result.shards.iter().map(\n                        |shard| async move {\n                            let conf = env.get_pageserver_conf(shard.node_id).unwrap();\n                            let pageserver = PageServerNode::from_env(env, conf);\n\n                            pageserver\n                                .http_client\n                                .timeline_init_lsn_lease(shard.shard_id, endpoint.timeline_id, lsn)\n                                .await\n                        },\n                    ))\n                    .await?;\n                }\n\n                tenant_locate_response_to_conn_info(&locate_result)?\n            };\n            pageserver_conninfo.prefer_protocol = prefer_protocol;\n\n            let ps_conf = env.get_pageserver_conf(DEFAULT_PAGESERVER_ID)?;\n            let auth_token = if matches!(ps_conf.pg_auth_type, AuthType::NeonJWT) {\n                let claims = Claims::new(Some(endpoint.tenant_id), Scope::Tenant);\n\n                Some(env.generate_auth_token(&claims)?)\n            } else {\n                None\n            };\n\n            let exp = (std::time::SystemTime::now().duration_since(std::time::UNIX_EPOCH)?\n                + Duration::from_secs(86400))\n            .as_secs();\n            let claims = endpoint_storage::claims::EndpointStorageClaims {\n                tenant_id: endpoint.tenant_id,\n                timeline_id: endpoint.timeline_id,\n                endpoint_id: endpoint_id.to_string(),\n                exp,\n            };\n\n            let endpoint_storage_token = env.generate_auth_token(&claims)?;\n            let endpoint_storage_addr = env.endpoint_storage.listen_addr.to_string();\n\n            let args = control_plane::endpoint::EndpointStartArgs {\n                auth_token,\n                endpoint_storage_token,\n                endpoint_storage_addr,\n                safekeepers_generation,\n                safekeepers,\n                pageserver_conninfo,\n                remote_ext_base_url: remote_ext_base_url.clone(),\n                create_test_user: args.create_test_user,\n                start_timeout: args.start_timeout,\n                autoprewarm: args.autoprewarm,\n                offload_lfc_interval_seconds: args.offload_lfc_interval_seconds,\n                dev: args.dev,\n            };\n\n            println!(\"Starting existing endpoint {endpoint_id}...\");\n            endpoint.start(args).await?;\n        }\n        EndpointCmd::UpdatePageservers(args) => {\n            let endpoint_id = &args.endpoint_id;\n            let endpoint = cplane\n                .endpoints\n                .get(endpoint_id.as_str())\n                .with_context(|| format!(\"postgres endpoint {endpoint_id} is not found\"))?;\n            let prefer_protocol = if endpoint.grpc {\n                PageserverProtocol::Grpc\n            } else {\n                PageserverProtocol::Libpq\n            };\n            let mut pageserver_conninfo = match args.pageserver_id {\n                Some(pageserver_id) => {\n                    let conf = env.get_pageserver_conf(pageserver_id)?;\n                    local_pageserver_conf_to_conn_info(conf)?\n                }\n                None => {\n                    let storage_controller = StorageController::from_env(env);\n                    let locate_result =\n                        storage_controller.tenant_locate(endpoint.tenant_id).await?;\n\n                    tenant_locate_response_to_conn_info(&locate_result)?\n                }\n            };\n            pageserver_conninfo.prefer_protocol = prefer_protocol;\n\n            endpoint\n                .update_pageservers_in_config(&pageserver_conninfo)\n                .await?;\n        }\n        EndpointCmd::Reconfigure(args) => {\n            let endpoint_id = &args.endpoint_id;\n            let endpoint = cplane\n                .endpoints\n                .get(endpoint_id.as_str())\n                .with_context(|| format!(\"postgres endpoint {endpoint_id} is not found\"))?;\n\n            let prefer_protocol = if endpoint.grpc {\n                PageserverProtocol::Grpc\n            } else {\n                PageserverProtocol::Libpq\n            };\n            let mut pageserver_conninfo = if let Some(ps_id) = args.endpoint_pageserver_id {\n                let conf = env.get_pageserver_conf(ps_id)?;\n                local_pageserver_conf_to_conn_info(conf)?\n            } else {\n                // Look up the currently attached location of the tenant, and its striping metadata,\n                // to pass these on to postgres.\n                let storage_controller = StorageController::from_env(env);\n                let locate_result = storage_controller.tenant_locate(endpoint.tenant_id).await?;\n\n                tenant_locate_response_to_conn_info(&locate_result)?\n            };\n            pageserver_conninfo.prefer_protocol = prefer_protocol;\n\n            // If --safekeepers argument is given, use only the listed\n            // safekeeper nodes; otherwise all from the env.\n            let safekeepers = parse_safekeepers(&args.safekeepers)?;\n            endpoint\n                .reconfigure(Some(&pageserver_conninfo), safekeepers, None)\n                .await?;\n        }\n        EndpointCmd::RefreshConfiguration(args) => {\n            let endpoint_id = &args.endpoint_id;\n            let endpoint = cplane\n                .endpoints\n                .get(endpoint_id.as_str())\n                .with_context(|| format!(\"postgres endpoint {endpoint_id} is not found\"))?;\n            endpoint.refresh_configuration().await?;\n        }\n        EndpointCmd::Stop(args) => {\n            let endpoint_id = &args.endpoint_id;\n            let endpoint = cplane\n                .endpoints\n                .get(endpoint_id)\n                .with_context(|| format!(\"postgres endpoint {endpoint_id} is not found\"))?;\n            match endpoint.stop(args.mode, args.destroy).await?.lsn {\n                Some(lsn) => println!(\"{lsn}\"),\n                None => println!(\"null\"),\n            }\n        }\n        EndpointCmd::GenerateJwt(args) => {\n            let endpoint = {\n                let endpoint_id = &args.endpoint_id;\n\n                cplane\n                    .endpoints\n                    .get(endpoint_id)\n                    .with_context(|| format!(\"postgres endpoint {endpoint_id} is not found\"))?\n            };\n\n            let jwt = endpoint.generate_jwt(args.scope)?;\n\n            print!(\"{jwt}\");\n        }\n    }\n\n    Ok(())\n}\n\n/// Parse --safekeepers as list of safekeeper ids.\nfn parse_safekeepers(safekeepers_str: &Option<String>) -> Result<Option<Vec<NodeId>>> {\n    if let Some(safekeepers_str) = safekeepers_str {\n        let mut safekeepers: Vec<NodeId> = Vec::new();\n        for sk_id in safekeepers_str.split(',').map(str::trim) {\n            let sk_id = NodeId(\n                u64::from_str(sk_id)\n                    .map_err(|_| anyhow!(\"invalid node ID \\\"{sk_id}\\\" in --safekeepers list\"))?,\n            );\n            safekeepers.push(sk_id);\n        }\n        Ok(Some(safekeepers))\n    } else {\n        Ok(None)\n    }\n}\n\nfn handle_mappings(subcmd: &MappingsCmd, env: &mut local_env::LocalEnv) -> Result<()> {\n    match subcmd {\n        MappingsCmd::Map(args) => {\n            env.register_branch_mapping(\n                args.branch_name.to_owned(),\n                args.tenant_id,\n                args.timeline_id,\n            )?;\n\n            Ok(())\n        }\n    }\n}\n\nfn get_pageserver(\n    env: &local_env::LocalEnv,\n    pageserver_id_arg: Option<NodeId>,\n) -> Result<PageServerNode> {\n    let node_id = pageserver_id_arg.unwrap_or(DEFAULT_PAGESERVER_ID);\n\n    Ok(PageServerNode::from_env(\n        env,\n        env.get_pageserver_conf(node_id)?,\n    ))\n}\n\nasync fn handle_pageserver(subcmd: &PageserverCmd, env: &local_env::LocalEnv) -> Result<()> {\n    match subcmd {\n        PageserverCmd::Start(args) => {\n            if let Err(e) = get_pageserver(env, args.pageserver_id)?\n                .start(&args.start_timeout)\n                .await\n            {\n                eprintln!(\"pageserver start failed: {e}\");\n                exit(1);\n            }\n        }\n\n        PageserverCmd::Stop(args) => {\n            let immediate = match args.stop_mode {\n                StopMode::Fast => false,\n                StopMode::Immediate => true,\n            };\n            if let Err(e) = get_pageserver(env, args.pageserver_id)?.stop(immediate) {\n                eprintln!(\"pageserver stop failed: {e}\");\n                exit(1);\n            }\n        }\n\n        PageserverCmd::Restart(args) => {\n            let pageserver = get_pageserver(env, args.pageserver_id)?;\n            //TODO what shutdown strategy should we use here?\n            if let Err(e) = pageserver.stop(false) {\n                eprintln!(\"pageserver stop failed: {e}\");\n                exit(1);\n            }\n\n            if let Err(e) = pageserver.start(&args.start_timeout).await {\n                eprintln!(\"pageserver start failed: {e}\");\n                exit(1);\n            }\n        }\n\n        PageserverCmd::Status(args) => {\n            match get_pageserver(env, args.pageserver_id)?\n                .check_status()\n                .await\n            {\n                Ok(_) => println!(\"Page server is up and running\"),\n                Err(err) => {\n                    eprintln!(\"Page server is not available: {err}\");\n                    exit(1);\n                }\n            }\n        }\n    }\n    Ok(())\n}\n\nasync fn handle_storage_controller(\n    subcmd: &StorageControllerCmd,\n    env: &local_env::LocalEnv,\n) -> Result<()> {\n    let svc = StorageController::from_env(env);\n    match subcmd {\n        StorageControllerCmd::Start(args) => {\n            let start_args = NeonStorageControllerStartArgs {\n                instance_id: args.instance_id,\n                base_port: args.base_port,\n                start_timeout: args.start_timeout,\n                handle_ps_local_disk_loss: args.handle_ps_local_disk_loss,\n            };\n\n            if let Err(e) = svc.start(start_args).await {\n                eprintln!(\"start failed: {e}\");\n                exit(1);\n            }\n        }\n\n        StorageControllerCmd::Stop(args) => {\n            let stop_args = NeonStorageControllerStopArgs {\n                instance_id: args.instance_id,\n                immediate: match args.stop_mode {\n                    StopMode::Fast => false,\n                    StopMode::Immediate => true,\n                },\n            };\n            if let Err(e) = svc.stop(stop_args).await {\n                eprintln!(\"stop failed: {e}\");\n                exit(1);\n            }\n        }\n    }\n    Ok(())\n}\n\nfn get_safekeeper(env: &local_env::LocalEnv, id: NodeId) -> Result<SafekeeperNode> {\n    if let Some(node) = env.safekeepers.iter().find(|node| node.id == id) {\n        Ok(SafekeeperNode::from_env(env, node))\n    } else {\n        bail!(\"could not find safekeeper {id}\")\n    }\n}\n\nasync fn handle_safekeeper(subcmd: &SafekeeperCmd, env: &local_env::LocalEnv) -> Result<()> {\n    match subcmd {\n        SafekeeperCmd::Start(args) => {\n            let safekeeper = get_safekeeper(env, args.id)?;\n\n            if let Err(e) = safekeeper.start(&args.extra_opt, &args.start_timeout).await {\n                eprintln!(\"safekeeper start failed: {e}\");\n                exit(1);\n            }\n        }\n\n        SafekeeperCmd::Stop(args) => {\n            let safekeeper = get_safekeeper(env, args.id)?;\n            let immediate = match args.stop_mode {\n                StopMode::Fast => false,\n                StopMode::Immediate => true,\n            };\n            if let Err(e) = safekeeper.stop(immediate) {\n                eprintln!(\"safekeeper stop failed: {e}\");\n                exit(1);\n            }\n        }\n\n        SafekeeperCmd::Restart(args) => {\n            let safekeeper = get_safekeeper(env, args.id)?;\n            let immediate = match args.stop_mode {\n                StopMode::Fast => false,\n                StopMode::Immediate => true,\n            };\n\n            if let Err(e) = safekeeper.stop(immediate) {\n                eprintln!(\"safekeeper stop failed: {e}\");\n                exit(1);\n            }\n\n            if let Err(e) = safekeeper.start(&args.extra_opt, &args.start_timeout).await {\n                eprintln!(\"safekeeper start failed: {e}\");\n                exit(1);\n            }\n        }\n    }\n    Ok(())\n}\n\nasync fn handle_endpoint_storage(\n    subcmd: &EndpointStorageCmd,\n    env: &local_env::LocalEnv,\n) -> Result<()> {\n    use EndpointStorageCmd::*;\n    let storage = EndpointStorage::from_env(env);\n\n    // In tests like test_forward_compatibility or test_graceful_cluster_restart\n    // old neon binaries (without endpoint_storage) are present\n    if !storage.bin.exists() {\n        eprintln!(\n            \"{} binary not found. Ignore if this is a compatibility test\",\n            storage.bin\n        );\n        return Ok(());\n    }\n\n    match subcmd {\n        Start(EndpointStorageStartCmd { start_timeout }) => {\n            if let Err(e) = storage.start(start_timeout).await {\n                eprintln!(\"endpoint_storage start failed: {e}\");\n                exit(1);\n            }\n        }\n        Stop(EndpointStorageStopCmd { stop_mode }) => {\n            let immediate = match stop_mode {\n                StopMode::Fast => false,\n                StopMode::Immediate => true,\n            };\n            if let Err(e) = storage.stop(immediate) {\n                eprintln!(\"proxy stop failed: {e}\");\n                exit(1);\n            }\n        }\n    };\n    Ok(())\n}\n\nasync fn handle_storage_broker(subcmd: &StorageBrokerCmd, env: &local_env::LocalEnv) -> Result<()> {\n    match subcmd {\n        StorageBrokerCmd::Start(args) => {\n            let storage_broker = StorageBroker::from_env(env);\n            if let Err(e) = storage_broker.start(&args.start_timeout).await {\n                eprintln!(\"broker start failed: {e}\");\n                exit(1);\n            }\n        }\n\n        StorageBrokerCmd::Stop(_args) => {\n            // FIXME: stop_mode unused\n            let storage_broker = StorageBroker::from_env(env);\n            if let Err(e) = storage_broker.stop() {\n                eprintln!(\"broker stop failed: {e}\");\n                exit(1);\n            }\n        }\n    }\n    Ok(())\n}\n\nasync fn handle_start_all(\n    args: &StartCmdArgs,\n    env: &'static local_env::LocalEnv,\n) -> anyhow::Result<()> {\n    // FIXME: this was called \"retry_timeout\", is it right?\n    let Err(errors) = handle_start_all_impl(env, args.timeout).await else {\n        neon_start_status_check(env, args.timeout.as_ref())\n            .await\n            .context(\"status check after successful startup of all services\")?;\n        return Ok(());\n    };\n\n    eprintln!(\"startup failed because one or more services could not be started\");\n\n    for e in errors {\n        eprintln!(\"{e}\");\n        let debug_repr = format!(\"{e:?}\");\n        for line in debug_repr.lines() {\n            eprintln!(\"  {line}\");\n        }\n    }\n\n    try_stop_all(env, true).await;\n\n    exit(2);\n}\n\n/// Returns Ok() if and only if all services could be started successfully.\n/// Otherwise, returns the list of errors that occurred during startup.\nasync fn handle_start_all_impl(\n    env: &'static local_env::LocalEnv,\n    retry_timeout: humantime::Duration,\n) -> Result<(), Vec<anyhow::Error>> {\n    // Endpoints are not started automatically\n\n    let mut js = JoinSet::new();\n\n    // force infalliblity through closure\n    #[allow(clippy::redundant_closure_call)]\n    (|| {\n        js.spawn(async move {\n            let storage_broker = StorageBroker::from_env(env);\n            storage_broker\n                .start(&retry_timeout)\n                .await\n                .map_err(|e| e.context(\"start storage_broker\"))\n        });\n\n        js.spawn(async move {\n            let storage_controller = StorageController::from_env(env);\n            storage_controller\n                .start(NeonStorageControllerStartArgs::with_default_instance_id(\n                    retry_timeout,\n                ))\n                .await\n                .map_err(|e| e.context(\"start storage_controller\"))\n        });\n\n        for ps_conf in &env.pageservers {\n            js.spawn(async move {\n                let pageserver = PageServerNode::from_env(env, ps_conf);\n                pageserver\n                    .start(&retry_timeout)\n                    .await\n                    .map_err(|e| e.context(format!(\"start pageserver {}\", ps_conf.id)))\n            });\n        }\n\n        for node in env.safekeepers.iter() {\n            js.spawn(async move {\n                let safekeeper = SafekeeperNode::from_env(env, node);\n                safekeeper\n                    .start(&[], &retry_timeout)\n                    .await\n                    .map_err(|e| e.context(format!(\"start safekeeper {}\", safekeeper.id)))\n            });\n        }\n\n        js.spawn(async move {\n            EndpointStorage::from_env(env)\n                .start(&retry_timeout)\n                .await\n                .map_err(|e| e.context(\"start endpoint_storage\"))\n        });\n    })();\n\n    let mut errors = Vec::new();\n    while let Some(result) = js.join_next().await {\n        let result = result.expect(\"we don't panic or cancel the tasks\");\n        if let Err(e) = result {\n            errors.push(e);\n        }\n    }\n\n    if !errors.is_empty() {\n        return Err(errors);\n    }\n\n    Ok(())\n}\n\nasync fn neon_start_status_check(\n    env: &local_env::LocalEnv,\n    retry_timeout: &Duration,\n) -> anyhow::Result<()> {\n    const RETRY_INTERVAL: Duration = Duration::from_millis(100);\n    const NOTICE_AFTER_RETRIES: Duration = Duration::from_secs(5);\n\n    let storcon = StorageController::from_env(env);\n\n    let retries = retry_timeout.as_millis() / RETRY_INTERVAL.as_millis();\n    let notice_after_retries = retry_timeout.as_millis() / NOTICE_AFTER_RETRIES.as_millis();\n\n    println!(\"\\nRunning neon status check\");\n\n    for retry in 0..retries {\n        if retry == notice_after_retries {\n            println!(\"\\nNeon status check has not passed yet, continuing to wait\")\n        }\n\n        let mut passed = true;\n        let mut nodes = storcon.node_list().await?;\n        let mut pageservers = env.pageservers.clone();\n\n        if nodes.len() != pageservers.len() {\n            continue;\n        }\n\n        nodes.sort_by_key(|ps| ps.id);\n        pageservers.sort_by_key(|ps| ps.id);\n\n        for (idx, pageserver) in pageservers.iter().enumerate() {\n            let node = &nodes[idx];\n            if node.id != pageserver.id {\n                passed = false;\n                break;\n            }\n\n            if !matches!(node.availability, NodeAvailabilityWrapper::Active) {\n                passed = false;\n                break;\n            }\n        }\n\n        if passed {\n            println!(\"\\nNeon started and passed status check\");\n            return Ok(());\n        }\n\n        tokio::time::sleep(RETRY_INTERVAL).await;\n    }\n\n    anyhow::bail!(\"\\nNeon passed status check\")\n}\n\nasync fn handle_stop_all(args: &StopCmdArgs, env: &local_env::LocalEnv) -> Result<()> {\n    let immediate = match args.mode {\n        StopMode::Fast => false,\n        StopMode::Immediate => true,\n    };\n\n    try_stop_all(env, immediate).await;\n\n    Ok(())\n}\n\nasync fn try_stop_all(env: &local_env::LocalEnv, immediate: bool) {\n    let mode = if immediate {\n        EndpointTerminateMode::Immediate\n    } else {\n        EndpointTerminateMode::Fast\n    };\n    // Stop all endpoints\n    match ComputeControlPlane::load(env.clone()) {\n        Ok(cplane) => {\n            for (_k, node) in cplane.endpoints {\n                if let Err(e) = node.stop(mode, false).await {\n                    eprintln!(\"postgres stop failed: {e:#}\");\n                }\n            }\n        }\n        Err(e) => {\n            eprintln!(\"postgres stop failed, could not restore control plane data from env: {e:#}\")\n        }\n    }\n\n    let storage = EndpointStorage::from_env(env);\n    if let Err(e) = storage.stop(immediate) {\n        eprintln!(\"endpoint_storage stop failed: {e:#}\");\n    }\n\n    for ps_conf in &env.pageservers {\n        let pageserver = PageServerNode::from_env(env, ps_conf);\n        if let Err(e) = pageserver.stop(immediate) {\n            eprintln!(\"pageserver {} stop failed: {:#}\", ps_conf.id, e);\n        }\n    }\n\n    for node in env.safekeepers.iter() {\n        let safekeeper = SafekeeperNode::from_env(env, node);\n        if let Err(e) = safekeeper.stop(immediate) {\n            eprintln!(\"safekeeper {} stop failed: {:#}\", safekeeper.id, e);\n        }\n    }\n\n    let storage_broker = StorageBroker::from_env(env);\n    if let Err(e) = storage_broker.stop() {\n        eprintln!(\"neon broker stop failed: {e:#}\");\n    }\n\n    // Stop all storage controller instances. In the most common case there's only one,\n    // but iterate though the base data directory in order to discover the instances.\n    let storcon_instances = env\n        .storage_controller_instances()\n        .await\n        .expect(\"Must inspect data dir\");\n    for (instance_id, _instance_dir_path) in storcon_instances {\n        let storage_controller = StorageController::from_env(env);\n        let stop_args = NeonStorageControllerStopArgs {\n            instance_id,\n            immediate,\n        };\n\n        if let Err(e) = storage_controller.stop(stop_args).await {\n            eprintln!(\"Storage controller instance {instance_id} stop failed: {e:#}\");\n        }\n    }\n}\n"
  },
  {
    "path": "control_plane/src/branch_mappings.rs",
    "content": "//! Branch mappings for convenience\n\nuse std::collections::HashMap;\nuse std::fs;\nuse std::path::Path;\n\nuse anyhow::{bail, Context};\nuse serde::{Deserialize, Serialize};\n\nuse utils::id::{TenantId, TenantTimelineId, TimelineId};\n\n/// Keep human-readable aliases in memory (and persist them to config XXX), to hide tenant/timeline hex strings from the user.\n#[derive(PartialEq, Eq, Clone, Debug, Default, Serialize, Deserialize)]\n#[serde(default, deny_unknown_fields)]\npub struct BranchMappings {\n    /// Default tenant ID to use with the 'neon_local' command line utility, when\n    /// --tenant_id is not explicitly specified. This comes from the branches.\n    pub default_tenant_id: Option<TenantId>,\n\n    // A `HashMap<String, HashMap<TenantId, TimelineId>>` would be more appropriate here,\n    // but deserialization into a generic toml object as `toml::Value::try_from` fails with an error.\n    // https://toml.io/en/v1.0.0 does not contain a concept of \"a table inside another table\".\n    pub mappings: HashMap<String, Vec<(TenantId, TimelineId)>>,\n}\n\nimpl BranchMappings {\n    pub fn register_branch_mapping(\n        &mut self,\n        branch_name: String,\n        tenant_id: TenantId,\n        timeline_id: TimelineId,\n    ) -> anyhow::Result<()> {\n        let existing_values = self.mappings.entry(branch_name.clone()).or_default();\n\n        let existing_ids = existing_values\n            .iter()\n            .find(|(existing_tenant_id, _)| existing_tenant_id == &tenant_id);\n\n        if let Some((_, old_timeline_id)) = existing_ids {\n            if old_timeline_id == &timeline_id {\n                Ok(())\n            } else {\n                bail!(\"branch '{branch_name}' is already mapped to timeline {old_timeline_id}, cannot map to another timeline {timeline_id}\");\n            }\n        } else {\n            existing_values.push((tenant_id, timeline_id));\n            Ok(())\n        }\n    }\n\n    pub fn get_branch_timeline_id(\n        &self,\n        branch_name: &str,\n        tenant_id: TenantId,\n    ) -> Option<TimelineId> {\n        // If it looks like a timeline ID, return it as it is\n        if let Ok(timeline_id) = branch_name.parse::<TimelineId>() {\n            return Some(timeline_id);\n        }\n\n        self.mappings\n            .get(branch_name)?\n            .iter()\n            .find(|(mapped_tenant_id, _)| mapped_tenant_id == &tenant_id)\n            .map(|&(_, timeline_id)| timeline_id)\n            .map(TimelineId::from)\n    }\n\n    pub fn timeline_name_mappings(&self) -> HashMap<TenantTimelineId, String> {\n        self.mappings\n            .iter()\n            .flat_map(|(name, tenant_timelines)| {\n                tenant_timelines.iter().map(|&(tenant_id, timeline_id)| {\n                    (TenantTimelineId::new(tenant_id, timeline_id), name.clone())\n                })\n            })\n            .collect()\n    }\n\n    pub fn persist(&self, path: &Path) -> anyhow::Result<()> {\n        let content = &toml::to_string_pretty(self)?;\n        fs::write(path, content).with_context(|| {\n            format!(\n                \"Failed to write branch information into path '{}'\",\n                path.display()\n            )\n        })\n    }\n\n    pub fn load(path: &Path) -> anyhow::Result<BranchMappings> {\n        let branches_file_contents = fs::read_to_string(path)?;\n        Ok(toml::from_str(branches_file_contents.as_str())?)\n    }\n}\n"
  },
  {
    "path": "control_plane/src/broker.rs",
    "content": "//! Code to manage the storage broker\n//!\n//! In the local test environment, the storage broker stores its data directly in\n//!\n//! ```text\n//!   .neon/storage_broker\n//! ```\nuse std::time::Duration;\n\nuse anyhow::Context;\nuse camino::Utf8PathBuf;\n\nuse crate::{background_process, local_env::LocalEnv};\n\npub struct StorageBroker {\n    env: LocalEnv,\n}\n\nimpl StorageBroker {\n    /// Create a new `StorageBroker` instance from the environment.\n    pub fn from_env(env: &LocalEnv) -> Self {\n        Self { env: env.clone() }\n    }\n\n    pub fn initialize(&self) -> anyhow::Result<()> {\n        if self.env.generate_local_ssl_certs {\n            self.env.generate_ssl_cert(\n                &self.env.storage_broker_data_dir().join(\"server.crt\"),\n                &self.env.storage_broker_data_dir().join(\"server.key\"),\n            )?;\n        }\n        Ok(())\n    }\n\n    /// Start the storage broker process.\n    pub async fn start(&self, retry_timeout: &Duration) -> anyhow::Result<()> {\n        let broker = &self.env.broker;\n\n        println!(\"Starting neon broker at {}\", broker.client_url());\n\n        let mut args = Vec::new();\n\n        if let Some(addr) = &broker.listen_addr {\n            args.push(format!(\"--listen-addr={addr}\"));\n        }\n        if let Some(addr) = &broker.listen_https_addr {\n            args.push(format!(\"--listen-https-addr={addr}\"));\n        }\n\n        let client = self.env.create_http_client();\n        background_process::start_process(\n            \"storage_broker\",\n            &self.env.storage_broker_data_dir(),\n            &self.env.storage_broker_bin(),\n            args,\n            [],\n            background_process::InitialPidFile::Create(self.pid_file_path()),\n            retry_timeout,\n            || async {\n                let url = broker.client_url();\n                let status_url = url.join(\"status\").with_context(|| {\n                    format!(\"Failed to append /status path to broker endpoint {url}\")\n                })?;\n                let request = client.get(status_url).build().with_context(|| {\n                    format!(\"Failed to construct request to broker endpoint {url}\")\n                })?;\n                match client.execute(request).await {\n                    Ok(resp) => Ok(resp.status().is_success()),\n                    Err(_) => Ok(false),\n                }\n            },\n        )\n        .await\n        .context(\"Failed to spawn storage_broker subprocess\")?;\n        Ok(())\n    }\n\n    /// Stop the storage broker process.\n    pub fn stop(&self) -> anyhow::Result<()> {\n        background_process::stop_process(true, \"storage_broker\", &self.pid_file_path())\n    }\n\n    /// Get the path to the PID file for the storage broker.\n    fn pid_file_path(&self) -> Utf8PathBuf {\n        Utf8PathBuf::from_path_buf(self.env.base_data_dir.join(\"storage_broker.pid\"))\n            .expect(\"non-Unicode path\")\n    }\n}\n"
  },
  {
    "path": "control_plane/src/endpoint.rs",
    "content": "//! Code to manage compute endpoints\n//!\n//! In the local test environment, the data for each endpoint is stored in\n//!\n//! ```text\n//!   .neon/endpoints/<endpoint id>\n//! ```\n//!\n//! Some basic information about the endpoint, like the tenant and timeline IDs,\n//! are stored in the `endpoint.json` file. The `endpoint.json` file is created\n//! when the endpoint is created, and doesn't change afterwards.\n//!\n//! The endpoint is managed by the `compute_ctl` binary. When an endpoint is\n//! started, we launch `compute_ctl` It synchronizes the safekeepers, downloads\n//! the basebackup from the pageserver to initialize the data directory, and\n//! finally launches the PostgreSQL process. It watches the PostgreSQL process\n//! until it exits.\n//!\n//! When an endpoint is created, a `postgresql.conf` file is also created in\n//! the endpoint's directory. The file can be modified before starting PostgreSQL.\n//! However, the `postgresql.conf` file in the endpoint directory is not used directly\n//! by PostgreSQL. It is passed to `compute_ctl`, and `compute_ctl` writes another\n//! copy of it in the data directory.\n//!\n//! Directory contents:\n//!\n//! ```text\n//! .neon/endpoints/main/\n//!     compute.log               - log output of `compute_ctl` and `postgres`\n//!     endpoint.json             - serialized `EndpointConf` struct\n//!     postgresql.conf           - postgresql settings\n//!     config.json                 - passed to `compute_ctl`\n//!     pgdata/\n//!         postgresql.conf       - copy of postgresql.conf created by `compute_ctl`\n//!         neon.signal\n//!         zenith.signal         - copy of neon.signal, for backward compatibility\n//!         <other PostgreSQL files>\n//! ```\n//!\nuse std::collections::{BTreeMap, HashMap};\nuse std::fmt::Display;\nuse std::net::{IpAddr, Ipv4Addr, SocketAddr, TcpStream};\nuse std::path::PathBuf;\nuse std::process::Command;\nuse std::str::FromStr;\nuse std::sync::Arc;\nuse std::time::{Duration, Instant};\n\nuse anyhow::{Context, Result, anyhow, bail};\nuse base64::Engine;\nuse base64::prelude::BASE64_URL_SAFE_NO_PAD;\nuse compute_api::requests::{\n    COMPUTE_AUDIENCE, ComputeClaims, ComputeClaimsScope, ConfigurationRequest,\n};\nuse compute_api::responses::{\n    ComputeConfig, ComputeCtlConfig, ComputeStatus, ComputeStatusResponse, TerminateResponse,\n    TlsConfig,\n};\nuse compute_api::spec::{\n    Cluster, ComputeAudit, ComputeFeature, ComputeMode, ComputeSpec, Database, PageserverProtocol,\n    PageserverShardInfo, PgIdent, RemoteExtSpec, Role,\n};\n\n// re-export these, because they're used in the reconfigure() function\npub use compute_api::spec::{PageserverConnectionInfo, PageserverShardConnectionInfo};\n\nuse jsonwebtoken::jwk::{\n    AlgorithmParameters, CommonParameters, EllipticCurve, Jwk, JwkSet, KeyAlgorithm, KeyOperations,\n    OctetKeyPairParameters, OctetKeyPairType, PublicKeyUse,\n};\nuse nix::sys::signal::{Signal, kill};\nuse pem::Pem;\nuse reqwest::header::CONTENT_TYPE;\nuse safekeeper_api::PgMajorVersion;\nuse safekeeper_api::membership::SafekeeperGeneration;\nuse serde::{Deserialize, Serialize};\nuse sha2::{Digest, Sha256};\nuse spki::der::Decode;\nuse spki::{SubjectPublicKeyInfo, SubjectPublicKeyInfoRef};\nuse tracing::debug;\nuse utils::id::{NodeId, TenantId, TimelineId};\nuse utils::shard::{ShardCount, ShardIndex, ShardNumber};\n\nuse pageserver_api::config::DEFAULT_GRPC_LISTEN_PORT as DEFAULT_PAGESERVER_GRPC_PORT;\nuse postgres_connection::parse_host_port;\n\nuse crate::local_env::LocalEnv;\nuse crate::postgresql_conf::PostgresConf;\n\n// contents of a endpoint.json file\n#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]\npub struct EndpointConf {\n    endpoint_id: String,\n    tenant_id: TenantId,\n    timeline_id: TimelineId,\n    mode: ComputeMode,\n    pg_port: u16,\n    external_http_port: u16,\n    internal_http_port: u16,\n    pg_version: PgMajorVersion,\n    grpc: bool,\n    skip_pg_catalog_updates: bool,\n    reconfigure_concurrency: usize,\n    drop_subscriptions_before_start: bool,\n    features: Vec<ComputeFeature>,\n    cluster: Option<Cluster>,\n    compute_ctl_config: ComputeCtlConfig,\n    privileged_role_name: Option<String>,\n}\n\n//\n// ComputeControlPlane\n//\npub struct ComputeControlPlane {\n    base_port: u16,\n\n    // endpoint ID is the key\n    pub endpoints: BTreeMap<String, Arc<Endpoint>>,\n\n    env: LocalEnv,\n}\n\nimpl ComputeControlPlane {\n    // Load current endpoints from the endpoints/ subdirectories\n    pub fn load(env: LocalEnv) -> Result<ComputeControlPlane> {\n        let mut endpoints = BTreeMap::default();\n        for endpoint_dir in std::fs::read_dir(env.endpoints_path())\n            .with_context(|| format!(\"failed to list {}\", env.endpoints_path().display()))?\n        {\n            let ep_res = Endpoint::from_dir_entry(endpoint_dir?, &env);\n            let ep = match ep_res {\n                Ok(ep) => ep,\n                Err(e) => match e.downcast::<std::io::Error>() {\n                    Ok(e) => {\n                        // A parallel task could delete an endpoint while we have just scanned the directory\n                        if e.kind() == std::io::ErrorKind::NotFound {\n                            continue;\n                        } else {\n                            Err(e)?\n                        }\n                    }\n                    Err(e) => Err(e)?,\n                },\n            };\n            endpoints.insert(ep.endpoint_id.clone(), Arc::new(ep));\n        }\n\n        Ok(ComputeControlPlane {\n            base_port: 55431,\n            endpoints,\n            env,\n        })\n    }\n\n    fn get_port(&mut self) -> u16 {\n        1 + self\n            .endpoints\n            .values()\n            .map(|ep| std::cmp::max(ep.pg_address.port(), ep.external_http_address.port()))\n            .max()\n            .unwrap_or(self.base_port)\n    }\n\n    /// Create a JSON Web Key Set. This ideally matches the way we create a JWKS\n    /// from the production control plane.\n    fn create_jwks_from_pem(pem: &Pem) -> Result<JwkSet> {\n        let spki: SubjectPublicKeyInfoRef = SubjectPublicKeyInfo::from_der(pem.contents())?;\n        let public_key = spki.subject_public_key.raw_bytes();\n\n        let mut hasher = Sha256::new();\n        hasher.update(public_key);\n        let key_hash = hasher.finalize();\n\n        Ok(JwkSet {\n            keys: vec![Jwk {\n                common: CommonParameters {\n                    public_key_use: Some(PublicKeyUse::Signature),\n                    key_operations: Some(vec![KeyOperations::Verify]),\n                    key_algorithm: Some(KeyAlgorithm::EdDSA),\n                    key_id: Some(BASE64_URL_SAFE_NO_PAD.encode(key_hash)),\n                    x509_url: None::<String>,\n                    x509_chain: None::<Vec<String>>,\n                    x509_sha1_fingerprint: None::<String>,\n                    x509_sha256_fingerprint: None::<String>,\n                },\n                algorithm: AlgorithmParameters::OctetKeyPair(OctetKeyPairParameters {\n                    key_type: OctetKeyPairType::OctetKeyPair,\n                    curve: EllipticCurve::Ed25519,\n                    x: BASE64_URL_SAFE_NO_PAD.encode(public_key),\n                }),\n            }],\n        })\n    }\n\n    #[allow(clippy::too_many_arguments)]\n    pub fn new_endpoint(\n        &mut self,\n        endpoint_id: &str,\n        tenant_id: TenantId,\n        timeline_id: TimelineId,\n        pg_port: Option<u16>,\n        external_http_port: Option<u16>,\n        internal_http_port: Option<u16>,\n        pg_version: PgMajorVersion,\n        mode: ComputeMode,\n        grpc: bool,\n        skip_pg_catalog_updates: bool,\n        drop_subscriptions_before_start: bool,\n        privileged_role_name: Option<String>,\n    ) -> Result<Arc<Endpoint>> {\n        let pg_port = pg_port.unwrap_or_else(|| self.get_port());\n        let external_http_port = external_http_port.unwrap_or_else(|| self.get_port() + 1);\n        let internal_http_port = internal_http_port.unwrap_or_else(|| external_http_port + 1);\n        let compute_ctl_config = ComputeCtlConfig {\n            jwks: Self::create_jwks_from_pem(&self.env.read_public_key()?)?,\n            tls: None::<TlsConfig>,\n        };\n        let ep = Arc::new(Endpoint {\n            endpoint_id: endpoint_id.to_owned(),\n            pg_address: SocketAddr::new(IpAddr::from(Ipv4Addr::LOCALHOST), pg_port),\n            external_http_address: SocketAddr::new(\n                IpAddr::from(Ipv4Addr::UNSPECIFIED),\n                external_http_port,\n            ),\n            internal_http_address: SocketAddr::new(\n                IpAddr::from(Ipv4Addr::LOCALHOST),\n                internal_http_port,\n            ),\n            env: self.env.clone(),\n            timeline_id,\n            mode,\n            tenant_id,\n            pg_version,\n            // We don't setup roles and databases in the spec locally, so we don't need to\n            // do catalog updates. Catalog updates also include check availability\n            // data creation. Yet, we have tests that check that size and db dump\n            // before and after start are the same. So, skip catalog updates,\n            // with this we basically test a case of waking up an idle compute, where\n            // we also skip catalog updates in the cloud.\n            skip_pg_catalog_updates,\n            drop_subscriptions_before_start,\n            grpc,\n            reconfigure_concurrency: 1,\n            features: vec![],\n            cluster: None,\n            compute_ctl_config: compute_ctl_config.clone(),\n            privileged_role_name: privileged_role_name.clone(),\n        });\n\n        ep.create_endpoint_dir()?;\n        std::fs::write(\n            ep.endpoint_path().join(\"endpoint.json\"),\n            serde_json::to_string_pretty(&EndpointConf {\n                endpoint_id: endpoint_id.to_string(),\n                tenant_id,\n                timeline_id,\n                mode,\n                external_http_port,\n                internal_http_port,\n                pg_port,\n                pg_version,\n                grpc,\n                skip_pg_catalog_updates,\n                drop_subscriptions_before_start,\n                reconfigure_concurrency: 1,\n                features: vec![],\n                cluster: None,\n                compute_ctl_config,\n                privileged_role_name,\n            })?,\n        )?;\n        std::fs::write(\n            ep.endpoint_path().join(\"postgresql.conf\"),\n            ep.setup_pg_conf()?.to_string(),\n        )?;\n\n        self.endpoints\n            .insert(ep.endpoint_id.clone(), Arc::clone(&ep));\n\n        Ok(ep)\n    }\n\n    pub fn check_conflicting_endpoints(\n        &self,\n        mode: ComputeMode,\n        tenant_id: TenantId,\n        timeline_id: TimelineId,\n    ) -> Result<()> {\n        if matches!(mode, ComputeMode::Primary) {\n            // this check is not complete, as you could have a concurrent attempt at\n            // creating another primary, both reading the state before checking it here,\n            // but it's better than nothing.\n            let mut duplicates = self.endpoints.iter().filter(|(_k, v)| {\n                v.tenant_id == tenant_id\n                    && v.timeline_id == timeline_id\n                    && v.mode == mode\n                    && v.status() != EndpointStatus::Stopped\n            });\n\n            if let Some((key, _)) = duplicates.next() {\n                bail!(\n                    \"attempting to create a duplicate primary endpoint on tenant {tenant_id}, timeline {timeline_id}: endpoint {key:?} exists already. please don't do this, it is not supported.\"\n                );\n            }\n        }\n        Ok(())\n    }\n}\n\n///////////////////////////////////////////////////////////////////////////////\n\npub struct Endpoint {\n    /// used as the directory name\n    endpoint_id: String,\n    pub tenant_id: TenantId,\n    pub timeline_id: TimelineId,\n    pub mode: ComputeMode,\n    /// If true, the endpoint should use gRPC to communicate with Pageservers.\n    pub grpc: bool,\n\n    // port and address of the Postgres server and `compute_ctl`'s HTTP APIs\n    pub pg_address: SocketAddr,\n    pub external_http_address: SocketAddr,\n    pub internal_http_address: SocketAddr,\n\n    // postgres major version in the format: 14, 15, etc.\n    pg_version: PgMajorVersion,\n\n    // These are not part of the endpoint as such, but the environment\n    // the endpoint runs in.\n    pub env: LocalEnv,\n\n    // Optimizations\n    skip_pg_catalog_updates: bool,\n\n    drop_subscriptions_before_start: bool,\n    reconfigure_concurrency: usize,\n    // Feature flags\n    features: Vec<ComputeFeature>,\n    // Cluster settings\n    cluster: Option<Cluster>,\n\n    /// The compute_ctl config for the endpoint's compute.\n    compute_ctl_config: ComputeCtlConfig,\n\n    /// The name of the privileged role for the endpoint.\n    privileged_role_name: Option<String>,\n}\n\n#[derive(PartialEq, Eq)]\npub enum EndpointStatus {\n    Running,\n    Stopped,\n    Crashed,\n    RunningNoPidfile,\n}\n\nimpl Display for EndpointStatus {\n    fn fmt(&self, writer: &mut std::fmt::Formatter) -> std::fmt::Result {\n        writer.write_str(match self {\n            Self::Running => \"running\",\n            Self::Stopped => \"stopped\",\n            Self::Crashed => \"crashed\",\n            Self::RunningNoPidfile => \"running, no pidfile\",\n        })\n    }\n}\n\n#[derive(Default, Clone, Copy, clap::ValueEnum)]\npub enum EndpointTerminateMode {\n    #[default]\n    /// Use pg_ctl stop -m fast\n    Fast,\n    /// Use pg_ctl stop -m immediate\n    Immediate,\n    /// Use /terminate?mode=immediate\n    ImmediateTerminate,\n}\n\nimpl std::fmt::Display for EndpointTerminateMode {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        f.write_str(match &self {\n            EndpointTerminateMode::Fast => \"fast\",\n            EndpointTerminateMode::Immediate => \"immediate\",\n            EndpointTerminateMode::ImmediateTerminate => \"immediate-terminate\",\n        })\n    }\n}\n\npub struct EndpointStartArgs {\n    pub auth_token: Option<String>,\n    pub endpoint_storage_token: String,\n    pub endpoint_storage_addr: String,\n    pub safekeepers_generation: Option<SafekeeperGeneration>,\n    pub safekeepers: Vec<NodeId>,\n    pub pageserver_conninfo: PageserverConnectionInfo,\n    pub remote_ext_base_url: Option<String>,\n    pub create_test_user: bool,\n    pub start_timeout: Duration,\n    pub autoprewarm: bool,\n    pub offload_lfc_interval_seconds: Option<std::num::NonZeroU64>,\n    pub dev: bool,\n}\n\nimpl Endpoint {\n    fn from_dir_entry(entry: std::fs::DirEntry, env: &LocalEnv) -> Result<Endpoint> {\n        if !entry.file_type()?.is_dir() {\n            anyhow::bail!(\n                \"Endpoint::from_dir_entry failed: '{}' is not a directory\",\n                entry.path().display()\n            );\n        }\n\n        // parse data directory name\n        let fname = entry.file_name();\n        let endpoint_id = fname.to_str().unwrap().to_string();\n\n        // Read the endpoint.json file\n        let conf: EndpointConf =\n            serde_json::from_slice(&std::fs::read(entry.path().join(\"endpoint.json\"))?)?;\n\n        debug!(\"serialized endpoint conf: {:?}\", conf);\n\n        Ok(Endpoint {\n            pg_address: SocketAddr::new(IpAddr::from(Ipv4Addr::LOCALHOST), conf.pg_port),\n            external_http_address: SocketAddr::new(\n                IpAddr::from(Ipv4Addr::UNSPECIFIED),\n                conf.external_http_port,\n            ),\n            internal_http_address: SocketAddr::new(\n                IpAddr::from(Ipv4Addr::LOCALHOST),\n                conf.internal_http_port,\n            ),\n            endpoint_id,\n            env: env.clone(),\n            timeline_id: conf.timeline_id,\n            mode: conf.mode,\n            tenant_id: conf.tenant_id,\n            pg_version: conf.pg_version,\n            grpc: conf.grpc,\n            skip_pg_catalog_updates: conf.skip_pg_catalog_updates,\n            reconfigure_concurrency: conf.reconfigure_concurrency,\n            drop_subscriptions_before_start: conf.drop_subscriptions_before_start,\n            features: conf.features,\n            cluster: conf.cluster,\n            compute_ctl_config: conf.compute_ctl_config,\n            privileged_role_name: conf.privileged_role_name,\n        })\n    }\n\n    fn create_endpoint_dir(&self) -> Result<()> {\n        std::fs::create_dir_all(self.endpoint_path()).with_context(|| {\n            format!(\n                \"could not create endpoint directory {}\",\n                self.endpoint_path().display()\n            )\n        })\n    }\n\n    // Generate postgresql.conf with default configuration\n    fn setup_pg_conf(&self) -> Result<PostgresConf> {\n        let mut conf = PostgresConf::new();\n        conf.append(\"max_wal_senders\", \"10\");\n        conf.append(\"wal_log_hints\", \"off\");\n        conf.append(\"max_replication_slots\", \"10\");\n        conf.append(\"hot_standby\", \"on\");\n        // Set to 1MB to both exercise getPage requests/LFC, and still have enough room for\n        // Postgres to operate. Everything smaller might be not enough for Postgres under load,\n        // and can cause errors like 'no unpinned buffers available', see\n        // <https://github.com/neondatabase/neon/issues/9956>\n        conf.append(\"shared_buffers\", \"1MB\");\n        // Postgres defaults to effective_io_concurrency=1, which does not exercise the pageserver's\n        // batching logic.  Set this to 2 so that we exercise the code a bit without letting\n        // individual tests do a lot of concurrent work on underpowered test machines\n        conf.append(\"effective_io_concurrency\", \"2\");\n        conf.append(\"fsync\", \"off\");\n        conf.append(\"max_connections\", \"100\");\n        conf.append(\"wal_level\", \"logical\");\n        // wal_sender_timeout is the maximum time to wait for WAL replication.\n        // It also defines how often the walreceiver will send a feedback message to the wal sender.\n        conf.append(\"wal_sender_timeout\", \"5s\");\n        conf.append(\"listen_addresses\", &self.pg_address.ip().to_string());\n        conf.append(\"port\", &self.pg_address.port().to_string());\n        conf.append(\"wal_keep_size\", \"0\");\n        // walproposer panics when basebackup is invalid, it is pointless to restart in this case.\n        conf.append(\"restart_after_crash\", \"off\");\n\n        // Load the 'neon' extension\n        conf.append(\"shared_preload_libraries\", \"neon\");\n\n        conf.append_line(\"\");\n        // Replication-related configurations, such as WAL sending\n        match &self.mode {\n            ComputeMode::Primary => {\n                // Configure backpressure\n                // - Replication write lag depends on how fast the walreceiver can process incoming WAL.\n                //   This lag determines latency of get_page_at_lsn. Speed of applying WAL is about 10MB/sec,\n                //   so to avoid expiration of 1 minute timeout, this lag should not be larger than 600MB.\n                //   Actually latency should be much smaller (better if < 1sec). But we assume that recently\n                //   updates pages are not requested from pageserver.\n                // - Replication flush lag depends on speed of persisting data by checkpointer (creation of\n                //   delta/image layers) and advancing disk_consistent_lsn. Safekeepers are able to\n                //   remove/archive WAL only beyond disk_consistent_lsn. Too large a lag can cause long\n                //   recovery time (in case of pageserver crash) and disk space overflow at safekeepers.\n                // - Replication apply lag depends on speed of uploading changes to S3 by uploader thread.\n                //   To be able to restore database in case of pageserver node crash, safekeeper should not\n                //   remove WAL beyond this point. Too large lag can cause space exhaustion in safekeepers\n                //   (if they are not able to upload WAL to S3).\n                conf.append(\"max_replication_write_lag\", \"15MB\");\n                conf.append(\"max_replication_flush_lag\", \"10GB\");\n\n                if !self.env.safekeepers.is_empty() {\n                    // Configure Postgres to connect to the safekeepers\n                    conf.append(\"synchronous_standby_names\", \"walproposer\");\n\n                    let safekeepers = self\n                        .env\n                        .safekeepers\n                        .iter()\n                        .map(|sk| format!(\"localhost:{}\", sk.get_compute_port()))\n                        .collect::<Vec<String>>()\n                        .join(\",\");\n                    conf.append(\"neon.safekeepers\", &safekeepers);\n                } else {\n                    // We only use setup without safekeepers for tests,\n                    // and don't care about data durability on pageserver,\n                    // so set more relaxed synchronous_commit.\n                    conf.append(\"synchronous_commit\", \"remote_write\");\n\n                    // Configure the node to stream WAL directly to the pageserver\n                    // This isn't really a supported configuration, but can be useful for\n                    // testing.\n                    conf.append(\"synchronous_standby_names\", \"pageserver\");\n                }\n            }\n            ComputeMode::Static(lsn) => {\n                conf.append(\"recovery_target_lsn\", &lsn.to_string());\n            }\n            ComputeMode::Replica => {\n                assert!(!self.env.safekeepers.is_empty());\n\n                // TODO: use future host field from safekeeper spec\n                // Pass the list of safekeepers to the replica so that it can connect to any of them,\n                // whichever is available.\n                let sk_ports = self\n                    .env\n                    .safekeepers\n                    .iter()\n                    .map(|x| x.get_compute_port().to_string())\n                    .collect::<Vec<_>>()\n                    .join(\",\");\n                let sk_hosts = vec![\"localhost\"; self.env.safekeepers.len()].join(\",\");\n\n                let connstr = format!(\n                    \"host={} port={} options='-c timeline_id={} tenant_id={}' application_name=replica replication=true\",\n                    sk_hosts,\n                    sk_ports,\n                    &self.timeline_id.to_string(),\n                    &self.tenant_id.to_string(),\n                );\n\n                let slot_name = format!(\"repl_{}_\", self.timeline_id);\n                conf.append(\"primary_conninfo\", connstr.as_str());\n                conf.append(\"primary_slot_name\", slot_name.as_str());\n                conf.append(\"hot_standby\", \"on\");\n                // prefetching of blocks referenced in WAL doesn't make sense for us\n                // Neon hot standby ignores pages that are not in the shared_buffers\n                if self.pg_version >= PgMajorVersion::PG15 {\n                    conf.append(\"recovery_prefetch\", \"off\");\n                }\n            }\n        }\n\n        Ok(conf)\n    }\n\n    pub fn endpoint_path(&self) -> PathBuf {\n        self.env.endpoints_path().join(&self.endpoint_id)\n    }\n\n    pub fn pgdata(&self) -> PathBuf {\n        self.endpoint_path().join(\"pgdata\")\n    }\n\n    pub fn status(&self) -> EndpointStatus {\n        let timeout = Duration::from_millis(300);\n        let has_pidfile = self.pgdata().join(\"postmaster.pid\").exists();\n        let can_connect = TcpStream::connect_timeout(&self.pg_address, timeout).is_ok();\n\n        match (has_pidfile, can_connect) {\n            (true, true) => EndpointStatus::Running,\n            (false, false) => EndpointStatus::Stopped,\n            (true, false) => EndpointStatus::Crashed,\n            (false, true) => EndpointStatus::RunningNoPidfile,\n        }\n    }\n\n    fn pg_ctl(&self, args: &[&str], auth_token: &Option<String>) -> Result<()> {\n        let pg_ctl_path = self.env.pg_bin_dir(self.pg_version)?.join(\"pg_ctl\");\n        let mut cmd = Command::new(&pg_ctl_path);\n        cmd.args(\n            [\n                &[\n                    \"-D\",\n                    self.pgdata().to_str().unwrap(),\n                    \"-w\", //wait till pg_ctl actually does what was asked\n                ],\n                args,\n            ]\n            .concat(),\n        )\n        .env_clear()\n        .env(\n            \"LD_LIBRARY_PATH\",\n            self.env.pg_lib_dir(self.pg_version)?.to_str().unwrap(),\n        )\n        .env(\n            \"DYLD_LIBRARY_PATH\",\n            self.env.pg_lib_dir(self.pg_version)?.to_str().unwrap(),\n        );\n\n        // Pass authentication token used for the connections to pageserver and safekeepers\n        if let Some(token) = auth_token {\n            cmd.env(\"NEON_AUTH_TOKEN\", token);\n        }\n\n        let pg_ctl = cmd\n            .output()\n            .context(format!(\"{} failed\", pg_ctl_path.display()))?;\n        if !pg_ctl.status.success() {\n            anyhow::bail!(\n                \"pg_ctl failed, exit code: {}, stdout: {}, stderr: {}\",\n                pg_ctl.status,\n                String::from_utf8_lossy(&pg_ctl.stdout),\n                String::from_utf8_lossy(&pg_ctl.stderr),\n            );\n        }\n\n        Ok(())\n    }\n\n    fn wait_for_compute_ctl_to_exit(&self, send_sigterm: bool) -> Result<()> {\n        // TODO use background_process::stop_process instead: https://github.com/neondatabase/neon/pull/6482\n        let pidfile_path = self.endpoint_path().join(\"compute_ctl.pid\");\n        let pid: u32 = std::fs::read_to_string(pidfile_path)?.parse()?;\n        let pid = nix::unistd::Pid::from_raw(pid as i32);\n        if send_sigterm {\n            kill(pid, Signal::SIGTERM).ok();\n        }\n        crate::background_process::wait_until_stopped(\"compute_ctl\", pid)?;\n        Ok(())\n    }\n\n    fn read_postgresql_conf(&self) -> Result<String> {\n        // Slurp the endpoints/<endpoint id>/postgresql.conf file into\n        // memory. We will include it in the spec file that we pass to\n        // `compute_ctl`, and `compute_ctl` will write it to the postgresql.conf\n        // in the data directory.\n        let postgresql_conf_path = self.endpoint_path().join(\"postgresql.conf\");\n        match std::fs::read(&postgresql_conf_path) {\n            Ok(content) => Ok(String::from_utf8(content)?),\n            Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(\"\".to_string()),\n            Err(e) => Err(anyhow::Error::new(e).context(format!(\n                \"failed to read config file in {}\",\n                postgresql_conf_path.to_str().unwrap()\n            ))),\n        }\n    }\n\n    /// Map safekeepers ids to the actual connection strings.\n    fn build_safekeepers_connstrs(&self, sk_ids: Vec<NodeId>) -> Result<Vec<String>> {\n        let mut safekeeper_connstrings = Vec::new();\n        if self.mode == ComputeMode::Primary {\n            for sk_id in sk_ids {\n                let sk = self\n                    .env\n                    .safekeepers\n                    .iter()\n                    .find(|node| node.id == sk_id)\n                    .ok_or_else(|| anyhow!(\"safekeeper {sk_id} does not exist\"))?;\n                safekeeper_connstrings.push(format!(\"127.0.0.1:{}\", sk.get_compute_port()));\n            }\n        }\n        Ok(safekeeper_connstrings)\n    }\n\n    /// Generate a JWT with the correct claims.\n    pub fn generate_jwt(&self, scope: Option<ComputeClaimsScope>) -> Result<String> {\n        self.env.generate_auth_token(&ComputeClaims {\n            audience: match scope {\n                Some(ComputeClaimsScope::Admin) => Some(vec![COMPUTE_AUDIENCE.to_owned()]),\n                _ => None,\n            },\n            compute_id: match scope {\n                Some(ComputeClaimsScope::Admin) => None,\n                _ => Some(self.endpoint_id.clone()),\n            },\n            scope,\n        })\n    }\n\n    pub async fn start(&self, args: EndpointStartArgs) -> Result<()> {\n        if self.status() == EndpointStatus::Running {\n            anyhow::bail!(\"The endpoint is already running\");\n        }\n\n        let postgresql_conf = self.read_postgresql_conf()?;\n\n        // We always start the compute node from scratch, so if the Postgres\n        // data dir exists from a previous launch, remove it first.\n        if self.pgdata().exists() {\n            std::fs::remove_dir_all(self.pgdata())?;\n        }\n\n        let safekeeper_connstrings = self.build_safekeepers_connstrs(args.safekeepers)?;\n\n        // check for file remote_extensions_spec.json\n        // if it is present, read it and pass to compute_ctl\n        let remote_extensions_spec_path = self.endpoint_path().join(\"remote_extensions_spec.json\");\n        let remote_extensions_spec = std::fs::File::open(remote_extensions_spec_path);\n        let remote_extensions: Option<RemoteExtSpec>;\n\n        if let Ok(spec_file) = remote_extensions_spec {\n            remote_extensions = serde_json::from_reader(spec_file).ok();\n        } else {\n            remote_extensions = None;\n        };\n\n        // For the sake of backwards-compatibility, also fill in 'pageserver_connstring'\n        //\n        // XXX: I believe this is not really needed, except to make\n        // test_forward_compatibility happy.\n        //\n        // Use a closure so that we can conviniently return None in the middle of the\n        // loop.\n        let pageserver_connstring: Option<String> = (|| {\n            let num_shards = args.pageserver_conninfo.shard_count.count();\n            let mut connstrings = Vec::new();\n            for shard_no in 0..num_shards {\n                let shard_index = ShardIndex {\n                    shard_count: args.pageserver_conninfo.shard_count,\n                    shard_number: ShardNumber(shard_no),\n                };\n                let shard = args\n                    .pageserver_conninfo\n                    .shards\n                    .get(&shard_index)\n                    .ok_or_else(|| {\n                        anyhow!(\n                            \"shard {} not found in pageserver_connection_info\",\n                            shard_index\n                        )\n                    })?;\n                let pageserver = shard\n                    .pageservers\n                    .first()\n                    .ok_or(anyhow!(\"must have at least one pageserver\"))?;\n                if let Some(libpq_url) = &pageserver.libpq_url {\n                    connstrings.push(libpq_url.clone());\n                } else {\n                    return Ok::<_, anyhow::Error>(None);\n                }\n            }\n            Ok(Some(connstrings.join(\",\")))\n        })()?;\n\n        // Create config file\n        let config = {\n            let mut spec = ComputeSpec {\n                skip_pg_catalog_updates: self.skip_pg_catalog_updates,\n                format_version: 1.0,\n                operation_uuid: None,\n                features: self.features.clone(),\n                swap_size_bytes: None,\n                disk_quota_bytes: None,\n                disable_lfc_resizing: None,\n                cluster: Cluster {\n                    cluster_id: None, // project ID: not used\n                    name: None,       // project name: not used\n                    state: None,\n                    roles: if args.create_test_user {\n                        vec![Role {\n                            name: PgIdent::from_str(\"test\").unwrap(),\n                            encrypted_password: None,\n                            options: None,\n                        }]\n                    } else {\n                        Vec::new()\n                    },\n                    databases: if args.create_test_user {\n                        vec![Database {\n                            name: PgIdent::from_str(\"neondb\").unwrap(),\n                            owner: PgIdent::from_str(\"test\").unwrap(),\n                            options: None,\n                            restrict_conn: false,\n                            invalid: false,\n                        }]\n                    } else {\n                        Vec::new()\n                    },\n                    settings: None,\n                    postgresql_conf: Some(postgresql_conf.clone()),\n                },\n                delta_operations: None,\n                tenant_id: Some(self.tenant_id),\n                timeline_id: Some(self.timeline_id),\n                project_id: None,\n                branch_id: None,\n                endpoint_id: Some(self.endpoint_id.clone()),\n                mode: self.mode,\n                pageserver_connection_info: Some(args.pageserver_conninfo.clone()),\n                pageserver_connstring,\n                safekeepers_generation: args.safekeepers_generation.map(|g| g.into_inner()),\n                safekeeper_connstrings,\n                storage_auth_token: args.auth_token.clone(),\n                remote_extensions,\n                pgbouncer_settings: None,\n                shard_stripe_size: args.pageserver_conninfo.stripe_size, // redundant with pageserver_connection_info.stripe_size\n                local_proxy_config: None,\n                reconfigure_concurrency: self.reconfigure_concurrency,\n                drop_subscriptions_before_start: self.drop_subscriptions_before_start,\n                audit_log_level: ComputeAudit::Disabled,\n                logs_export_host: None::<String>,\n                endpoint_storage_addr: Some(args.endpoint_storage_addr),\n                endpoint_storage_token: Some(args.endpoint_storage_token),\n                autoprewarm: args.autoprewarm,\n                offload_lfc_interval_seconds: args.offload_lfc_interval_seconds,\n                suspend_timeout_seconds: -1, // Only used in neon_local.\n                databricks_settings: None,\n            };\n\n            // this strange code is needed to support respec() in tests\n            if self.cluster.is_some() {\n                debug!(\"Cluster is already set in the endpoint spec, using it\");\n                spec.cluster = self.cluster.clone().unwrap();\n\n                debug!(\"spec.cluster {:?}\", spec.cluster);\n\n                // fill missing fields again\n                if args.create_test_user {\n                    spec.cluster.roles.push(Role {\n                        name: PgIdent::from_str(\"test\").unwrap(),\n                        encrypted_password: None,\n                        options: None,\n                    });\n                    spec.cluster.databases.push(Database {\n                        name: PgIdent::from_str(\"neondb\").unwrap(),\n                        owner: PgIdent::from_str(\"test\").unwrap(),\n                        options: None,\n                        restrict_conn: false,\n                        invalid: false,\n                    });\n                }\n                spec.cluster.postgresql_conf = Some(postgresql_conf);\n            }\n\n            ComputeConfig {\n                spec: Some(spec),\n                compute_ctl_config: self.compute_ctl_config.clone(),\n            }\n        };\n\n        let config_path = self.endpoint_path().join(\"config.json\");\n        std::fs::write(config_path, serde_json::to_string_pretty(&config)?)?;\n\n        // Open log file. We'll redirect the stdout and stderr of `compute_ctl` to it.\n        let logfile = std::fs::OpenOptions::new()\n            .create(true)\n            .append(true)\n            .open(self.endpoint_path().join(\"compute.log\"))?;\n\n        // Launch compute_ctl\n        let conn_str = self.connstr(\"cloud_admin\", \"postgres\");\n        println!(\"Starting postgres node at '{conn_str}'\");\n        if args.create_test_user {\n            let conn_str = self.connstr(\"test\", \"neondb\");\n            println!(\"Also at '{conn_str}'\");\n        }\n        let mut cmd = Command::new(self.env.neon_distrib_dir.join(\"compute_ctl\"));\n        cmd.args([\n            \"--external-http-port\",\n            &self.external_http_address.port().to_string(),\n        ])\n        .args([\n            \"--internal-http-port\",\n            &self.internal_http_address.port().to_string(),\n        ])\n        .args([\"--pgdata\", self.pgdata().to_str().unwrap()])\n        .args([\"--connstr\", &conn_str])\n        .arg(\"--config\")\n        .arg(self.endpoint_path().join(\"config.json\").as_os_str())\n        .args([\n            \"--pgbin\",\n            self.env\n                .pg_bin_dir(self.pg_version)?\n                .join(\"postgres\")\n                .to_str()\n                .unwrap(),\n        ])\n        // TODO: It would be nice if we generated compute IDs with the same\n        // algorithm as the real control plane.\n        .args([\"--compute-id\", &self.endpoint_id])\n        .stdin(std::process::Stdio::null())\n        .stderr(logfile.try_clone()?)\n        .stdout(logfile);\n\n        if let Some(remote_ext_base_url) = args.remote_ext_base_url {\n            cmd.args([\"--remote-ext-base-url\", &remote_ext_base_url]);\n        }\n\n        if args.dev {\n            cmd.arg(\"--dev\");\n        }\n\n        if let Some(privileged_role_name) = self.privileged_role_name.clone() {\n            cmd.args([\"--privileged-role-name\", &privileged_role_name]);\n        }\n\n        let child = cmd.spawn()?;\n        // set up a scopeguard to kill & wait for the child in case we panic or bail below\n        let child = scopeguard::guard(child, |mut child| {\n            println!(\"SIGKILL & wait the started process\");\n            (|| {\n                // TODO: use another signal that can be caught by the child so it can clean up any children it spawned\n                child.kill().context(\"SIGKILL child\")?;\n                child.wait().context(\"wait() for child process\")?;\n                anyhow::Ok(())\n            })()\n            .with_context(|| format!(\"scopeguard kill&wait child {child:?}\"))\n            .unwrap();\n        });\n\n        // Write down the pid so we can wait for it when we want to stop\n        // TODO use background_process::start_process instead: https://github.com/neondatabase/neon/pull/6482\n        let pid = child.id();\n        let pidfile_path = self.endpoint_path().join(\"compute_ctl.pid\");\n        std::fs::write(pidfile_path, pid.to_string())?;\n\n        // Wait for it to start\n        const ATTEMPT_INTERVAL: Duration = Duration::from_millis(100);\n        let start_at = Instant::now();\n        loop {\n            match self.get_status().await {\n                Ok(state) => {\n                    match state.status {\n                        ComputeStatus::Init => {\n                            let timeout = args.start_timeout;\n                            if Instant::now().duration_since(start_at) > timeout {\n                                bail!(\n                                    \"compute startup timed out {:?}; still in Init state\",\n                                    timeout\n                                );\n                            }\n                            // keep retrying\n                        }\n                        ComputeStatus::Running => {\n                            // All good!\n                            break;\n                        }\n                        ComputeStatus::Failed => {\n                            bail!(\n                                \"compute startup failed: {}\",\n                                state\n                                    .error\n                                    .as_deref()\n                                    .unwrap_or(\"<no error from compute_ctl>\")\n                            );\n                        }\n                        ComputeStatus::Empty\n                        | ComputeStatus::ConfigurationPending\n                        | ComputeStatus::Configuration\n                        | ComputeStatus::TerminationPendingFast\n                        | ComputeStatus::TerminationPendingImmediate\n                        | ComputeStatus::Terminated\n                        | ComputeStatus::RefreshConfigurationPending\n                        | ComputeStatus::RefreshConfiguration => {\n                            bail!(\"unexpected compute status: {:?}\", state.status)\n                        }\n                    }\n                }\n                Err(e) => {\n                    if Instant::now().duration_since(start_at) > args.start_timeout {\n                        return Err(e).context(format!(\n                            \"timed out {:?} waiting to connect to compute_ctl HTTP\",\n                            args.start_timeout\n                        ));\n                    }\n                }\n            }\n            tokio::time::sleep(ATTEMPT_INTERVAL).await;\n        }\n\n        // disarm the scopeguard, let the child outlive this function (and neon_local invoction)\n        drop(scopeguard::ScopeGuard::into_inner(child));\n\n        Ok(())\n    }\n\n    // Update the pageservers in the spec file of the endpoint. This is useful to test the spec refresh scenario.\n    pub async fn update_pageservers_in_config(\n        &self,\n        pageserver_conninfo: &PageserverConnectionInfo,\n    ) -> Result<()> {\n        let config_path = self.endpoint_path().join(\"config.json\");\n        let mut config: ComputeConfig = {\n            let file = std::fs::File::open(&config_path)?;\n            serde_json::from_reader(file)?\n        };\n\n        let mut spec = config.spec.unwrap();\n        spec.pageserver_connection_info = Some(pageserver_conninfo.clone());\n        config.spec = Some(spec);\n\n        let file = std::fs::File::create(&config_path)?;\n        serde_json::to_writer_pretty(file, &config)?;\n\n        Ok(())\n    }\n\n    // Call the /status HTTP API\n    pub async fn get_status(&self) -> Result<ComputeStatusResponse> {\n        let client = reqwest::Client::new();\n\n        let response = client\n            .request(\n                reqwest::Method::GET,\n                format!(\n                    \"http://{}:{}/status\",\n                    self.external_http_address.ip(),\n                    self.external_http_address.port()\n                ),\n            )\n            .bearer_auth(self.generate_jwt(None::<ComputeClaimsScope>)?)\n            .send()\n            .await?;\n\n        // Interpret the response\n        let status = response.status();\n        if !(status.is_client_error() || status.is_server_error()) {\n            Ok(response.json().await?)\n        } else {\n            // reqwest does not export its error construction utility functions, so let's craft the message ourselves\n            let url = response.url().to_owned();\n            let msg = match response.text().await {\n                Ok(err_body) => format!(\"Error: {err_body}\"),\n                Err(_) => format!(\"Http error ({}) at {}.\", status.as_u16(), url),\n            };\n            Err(anyhow::anyhow!(msg))\n        }\n    }\n\n    pub async fn reconfigure(\n        &self,\n        pageserver_conninfo: Option<&PageserverConnectionInfo>,\n        safekeepers: Option<Vec<NodeId>>,\n        safekeeper_generation: Option<SafekeeperGeneration>,\n    ) -> Result<()> {\n        let (mut spec, compute_ctl_config) = {\n            let config_path = self.endpoint_path().join(\"config.json\");\n            let file = std::fs::File::open(config_path)?;\n            let config: ComputeConfig = serde_json::from_reader(file)?;\n\n            (config.spec.unwrap(), config.compute_ctl_config)\n        };\n\n        let postgresql_conf = self.read_postgresql_conf()?;\n        spec.cluster.postgresql_conf = Some(postgresql_conf);\n\n        if let Some(pageserver_conninfo) = pageserver_conninfo {\n            // If pageservers are provided, we need to ensure that they are not empty.\n            // This is a requirement for the compute_ctl configuration.\n            anyhow::ensure!(\n                !pageserver_conninfo.shards.is_empty(),\n                \"no pageservers provided\"\n            );\n            spec.pageserver_connection_info = Some(pageserver_conninfo.clone());\n            spec.shard_stripe_size = pageserver_conninfo.stripe_size;\n        }\n\n        // If safekeepers are not specified, don't change them.\n        if let Some(safekeepers) = safekeepers {\n            let safekeeper_connstrings = self.build_safekeepers_connstrs(safekeepers)?;\n            spec.safekeeper_connstrings = safekeeper_connstrings;\n            if let Some(g) = safekeeper_generation {\n                spec.safekeepers_generation = Some(g.into_inner());\n            }\n        }\n\n        let client = reqwest::Client::builder()\n            .timeout(Duration::from_secs(120))\n            .build()\n            .unwrap();\n        let response = client\n            .post(format!(\n                \"http://{}:{}/configure\",\n                self.external_http_address.ip(),\n                self.external_http_address.port()\n            ))\n            .header(CONTENT_TYPE.as_str(), \"application/json\")\n            .bearer_auth(self.generate_jwt(None::<ComputeClaimsScope>)?)\n            .body(\n                serde_json::to_string(&ConfigurationRequest {\n                    spec,\n                    compute_ctl_config,\n                })\n                .unwrap(),\n            )\n            .send()\n            .await?;\n\n        let status = response.status();\n        if !(status.is_client_error() || status.is_server_error()) {\n            Ok(())\n        } else {\n            let url = response.url().to_owned();\n            let msg = match response.text().await {\n                Ok(err_body) => format!(\"Error: {err_body}\"),\n                Err(_) => format!(\"Http error ({}) at {}.\", status.as_u16(), url),\n            };\n            Err(anyhow::anyhow!(msg))\n        }\n    }\n\n    pub async fn reconfigure_pageservers(\n        &self,\n        pageservers: &PageserverConnectionInfo,\n    ) -> Result<()> {\n        self.reconfigure(Some(pageservers), None, None).await\n    }\n\n    pub async fn reconfigure_safekeepers(\n        &self,\n        safekeepers: Vec<NodeId>,\n        generation: SafekeeperGeneration,\n    ) -> Result<()> {\n        self.reconfigure(None, Some(safekeepers), Some(generation))\n            .await\n    }\n\n    pub async fn stop(\n        &self,\n        mode: EndpointTerminateMode,\n        destroy: bool,\n    ) -> Result<TerminateResponse> {\n        // pg_ctl stop is fast but doesn't allow us to collect LSN. /terminate is\n        // slow, and test runs time out. Solution: special mode \"immediate-terminate\"\n        // which uses /terminate\n        let response = if let EndpointTerminateMode::ImmediateTerminate = mode {\n            let ip = self.external_http_address.ip();\n            let port = self.external_http_address.port();\n            let url = format!(\"http://{ip}:{port}/terminate?mode=immediate\");\n            let token = self.generate_jwt(Some(ComputeClaimsScope::Admin))?;\n            let request = reqwest::Client::new().post(url).bearer_auth(token);\n            let response = request.send().await.context(\"/terminate\")?;\n            let text = response.text().await.context(\"/terminate result\")?;\n            serde_json::from_str(&text).with_context(|| format!(\"deserializing {text}\"))?\n        } else {\n            self.pg_ctl(&[\"-m\", &mode.to_string(), \"stop\"], &None)?;\n            TerminateResponse { lsn: None }\n        };\n\n        // Also wait for the compute_ctl process to die. It might have some\n        // cleanup work to do after postgres stops, like syncing safekeepers,\n        // etc.\n        //\n        // If destroying or stop mode is immediate, send it SIGTERM before\n        // waiting. Sometimes we do *not* want this cleanup: tests intentionally\n        // do stop when majority of safekeepers is down, so sync-safekeepers\n        // would hang otherwise. This could be a separate flag though.\n        let send_sigterm = destroy || !matches!(mode, EndpointTerminateMode::Fast);\n        self.wait_for_compute_ctl_to_exit(send_sigterm)?;\n        if destroy {\n            println!(\n                \"Destroying postgres data directory '{}'\",\n                self.pgdata().to_str().unwrap()\n            );\n            std::fs::remove_dir_all(self.endpoint_path())?;\n        }\n        Ok(response)\n    }\n\n    pub async fn refresh_configuration(&self) -> Result<()> {\n        let client = reqwest::Client::builder()\n            .timeout(Duration::from_secs(30))\n            .build()\n            .unwrap();\n        let response = client\n            .post(format!(\n                \"http://{}:{}/refresh_configuration\",\n                self.internal_http_address.ip(),\n                self.internal_http_address.port()\n            ))\n            .send()\n            .await?;\n\n        let status = response.status();\n        if !(status.is_client_error() || status.is_server_error()) {\n            Ok(())\n        } else {\n            let url = response.url().to_owned();\n            let msg = match response.text().await {\n                Ok(err_body) => format!(\"Error: {err_body}\"),\n                Err(_) => format!(\"Http error ({}) at {}.\", status.as_u16(), url),\n            };\n            Err(anyhow::anyhow!(msg))\n        }\n    }\n\n    pub fn connstr(&self, user: &str, db_name: &str) -> String {\n        format!(\n            \"postgresql://{}@{}:{}/{}\",\n            user,\n            self.pg_address.ip(),\n            self.pg_address.port(),\n            db_name\n        )\n    }\n}\n\n/// If caller is telling us what pageserver to use, this is not a tenant which is\n/// fully managed by storage controller, therefore not sharded.\npub fn local_pageserver_conf_to_conn_info(\n    conf: &crate::local_env::PageServerConf,\n) -> Result<PageserverConnectionInfo> {\n    let libpq_url = {\n        let (host, port) = parse_host_port(&conf.listen_pg_addr)?;\n        let port = port.unwrap_or(5432);\n        Some(format!(\"postgres://no_user@{host}:{port}\"))\n    };\n    let grpc_url = if let Some(grpc_addr) = &conf.listen_grpc_addr {\n        let (host, port) = parse_host_port(grpc_addr)?;\n        let port = port.unwrap_or(DEFAULT_PAGESERVER_GRPC_PORT);\n        Some(format!(\"grpc://no_user@{host}:{port}\"))\n    } else {\n        None\n    };\n    let ps_conninfo = PageserverShardConnectionInfo {\n        id: Some(conf.id),\n        libpq_url,\n        grpc_url,\n    };\n\n    let shard_info = PageserverShardInfo {\n        pageservers: vec![ps_conninfo],\n    };\n\n    let shards: HashMap<_, _> = vec![(ShardIndex::unsharded(), shard_info)]\n        .into_iter()\n        .collect();\n    Ok(PageserverConnectionInfo {\n        shard_count: ShardCount::unsharded(),\n        stripe_size: None,\n        shards,\n        prefer_protocol: PageserverProtocol::default(),\n    })\n}\n\npub fn tenant_locate_response_to_conn_info(\n    response: &pageserver_api::controller_api::TenantLocateResponse,\n) -> Result<PageserverConnectionInfo> {\n    let mut shards = HashMap::new();\n    for shard in response.shards.iter() {\n        tracing::info!(\"parsing {}\", shard.listen_pg_addr);\n        let libpq_url = {\n            let host = &shard.listen_pg_addr;\n            let port = shard.listen_pg_port;\n            Some(format!(\"postgres://no_user@{host}:{port}\"))\n        };\n        let grpc_url = if let Some(grpc_addr) = &shard.listen_grpc_addr {\n            let host = grpc_addr;\n            let port = shard.listen_grpc_port.expect(\"no gRPC port\");\n            Some(format!(\"grpc://no_user@{host}:{port}\"))\n        } else {\n            None\n        };\n\n        let shard_info = PageserverShardInfo {\n            pageservers: vec![PageserverShardConnectionInfo {\n                id: Some(shard.node_id),\n                libpq_url,\n                grpc_url,\n            }],\n        };\n\n        shards.insert(shard.shard_id.to_index(), shard_info);\n    }\n\n    let stripe_size = if response.shard_params.count.is_unsharded() {\n        None\n    } else {\n        Some(response.shard_params.stripe_size)\n    };\n    Ok(PageserverConnectionInfo {\n        shard_count: response.shard_params.count,\n        stripe_size,\n        shards,\n        prefer_protocol: PageserverProtocol::default(),\n    })\n}\n"
  },
  {
    "path": "control_plane/src/endpoint_storage.rs",
    "content": "use crate::background_process::{self, start_process, stop_process};\nuse crate::local_env::LocalEnv;\nuse anyhow::{Context, Result};\nuse camino::Utf8PathBuf;\nuse std::io::Write;\nuse std::net::SocketAddr;\nuse std::time::Duration;\n\n/// Directory within .neon which will be used by default for LocalFs remote storage.\npub const ENDPOINT_STORAGE_REMOTE_STORAGE_DIR: &str = \"local_fs_remote_storage/endpoint_storage\";\npub const ENDPOINT_STORAGE_DEFAULT_ADDR: SocketAddr =\n    SocketAddr::new(std::net::IpAddr::V4(std::net::Ipv4Addr::LOCALHOST), 9993);\n\npub struct EndpointStorage {\n    pub bin: Utf8PathBuf,\n    pub data_dir: Utf8PathBuf,\n    pub pemfile: Utf8PathBuf,\n    pub addr: SocketAddr,\n}\n\nimpl EndpointStorage {\n    pub fn from_env(env: &LocalEnv) -> EndpointStorage {\n        EndpointStorage {\n            bin: Utf8PathBuf::from_path_buf(env.endpoint_storage_bin()).unwrap(),\n            data_dir: Utf8PathBuf::from_path_buf(env.endpoint_storage_data_dir()).unwrap(),\n            pemfile: Utf8PathBuf::from_path_buf(env.public_key_path.clone()).unwrap(),\n            addr: env.endpoint_storage.listen_addr,\n        }\n    }\n\n    fn config_path(&self) -> Utf8PathBuf {\n        self.data_dir.join(\"endpoint_storage.json\")\n    }\n\n    fn listen_addr(&self) -> Utf8PathBuf {\n        format!(\"{}:{}\", self.addr.ip(), self.addr.port()).into()\n    }\n\n    pub fn init(&self) -> Result<()> {\n        println!(\"Initializing object storage in {:?}\", self.data_dir);\n        let parent = self.data_dir.parent().unwrap();\n\n        #[derive(serde::Serialize)]\n        struct Cfg {\n            listen: Utf8PathBuf,\n            pemfile: Utf8PathBuf,\n            local_path: Utf8PathBuf,\n            r#type: String,\n        }\n        let cfg = Cfg {\n            listen: self.listen_addr(),\n            pemfile: parent.join(self.pemfile.clone()),\n            local_path: parent.join(ENDPOINT_STORAGE_REMOTE_STORAGE_DIR),\n            r#type: \"LocalFs\".to_string(),\n        };\n        std::fs::create_dir_all(self.config_path().parent().unwrap())?;\n        std::fs::write(self.config_path(), serde_json::to_string(&cfg)?)\n            .context(\"write object storage config\")?;\n        Ok(())\n    }\n\n    pub async fn start(&self, retry_timeout: &Duration) -> Result<()> {\n        println!(\"Starting endpoint_storage at {}\", self.listen_addr());\n        std::io::stdout().flush().context(\"flush stdout\")?;\n\n        let process_status_check = || async {\n            let res = reqwest::Client::new().get(format!(\"http://{}/metrics\", self.listen_addr()));\n            match res.send().await {\n                Ok(res) => Ok(res.status().is_success()),\n                Err(_) => Ok(false),\n            }\n        };\n\n        let res = start_process(\n            \"endpoint_storage\",\n            &self.data_dir.clone().into_std_path_buf(),\n            &self.bin.clone().into_std_path_buf(),\n            vec![self.config_path().to_string()],\n            vec![(\"RUST_LOG\".into(), \"debug\".into())],\n            background_process::InitialPidFile::Create(self.pid_file()),\n            retry_timeout,\n            process_status_check,\n        )\n        .await;\n        if res.is_err() {\n            eprintln!(\"Logs:\\n{}\", std::fs::read_to_string(self.log_file())?);\n        }\n\n        res\n    }\n\n    pub fn stop(&self, immediate: bool) -> anyhow::Result<()> {\n        stop_process(immediate, \"endpoint_storage\", &self.pid_file())\n    }\n\n    fn log_file(&self) -> Utf8PathBuf {\n        self.data_dir.join(\"endpoint_storage.log\")\n    }\n\n    fn pid_file(&self) -> Utf8PathBuf {\n        self.data_dir.join(\"endpoint_storage.pid\")\n    }\n}\n"
  },
  {
    "path": "control_plane/src/lib.rs",
    "content": "//! Local control plane.\n//!\n//! Can start, configure and stop postgres instances running as a local processes.\n//!\n//! Intended to be used in integration tests and in CLI tools for\n//! local installations.\n#![deny(clippy::undocumented_unsafe_blocks)]\n\nmod background_process;\npub mod broker;\npub mod endpoint;\npub mod endpoint_storage;\npub mod local_env;\npub mod pageserver;\npub mod postgresql_conf;\npub mod safekeeper;\npub mod storage_controller;\n"
  },
  {
    "path": "control_plane/src/local_env.rs",
    "content": "//! This module is responsible for locating and loading paths in a local setup.\n//!\n//! Now it also provides init method which acts like a stub for proper installation\n//! script which will use local paths.\n\nuse std::collections::HashMap;\nuse std::net::SocketAddr;\nuse std::path::{Path, PathBuf};\nuse std::process::{Command, Stdio};\nuse std::time::Duration;\nuse std::{env, fs};\n\nuse anyhow::{Context, bail};\nuse clap::ValueEnum;\nuse pageserver_api::config::PostHogConfig;\nuse pem::Pem;\nuse postgres_backend::AuthType;\nuse reqwest::{Certificate, Url};\nuse safekeeper_api::PgMajorVersion;\nuse serde::{Deserialize, Serialize};\nuse utils::auth::encode_from_key_file;\nuse utils::id::{NodeId, TenantId, TenantTimelineId, TimelineId};\n\nuse crate::broker::StorageBroker;\nuse crate::endpoint_storage::{\n    ENDPOINT_STORAGE_DEFAULT_ADDR, ENDPOINT_STORAGE_REMOTE_STORAGE_DIR, EndpointStorage,\n};\nuse crate::pageserver::{PAGESERVER_REMOTE_STORAGE_DIR, PageServerNode};\nuse crate::safekeeper::SafekeeperNode;\n\npub const DEFAULT_PG_VERSION: u32 = 17;\n\n//\n// This data structures represents neon_local CLI config\n//\n// It is deserialized from the .neon/config file, or the config file passed\n// to 'neon_local init --config=<path>' option. See control_plane/simple.conf for\n// an example.\n//\n#[derive(PartialEq, Eq, Clone, Debug)]\npub struct LocalEnv {\n    // Base directory for all the nodes (the pageserver, safekeepers and\n    // compute endpoints).\n    //\n    // This is not stored in the config file. Rather, this is the path where the\n    // config file itself is. It is read from the NEON_REPO_DIR env variable which\n    // must be an absolute path. If the env var is not set, $PWD/.neon is used.\n    pub base_data_dir: PathBuf,\n\n    // Path to postgres distribution. It's expected that \"bin\", \"include\",\n    // \"lib\", \"share\" from postgres distribution are there. If at some point\n    // in time we will be able to run against vanilla postgres we may split that\n    // to four separate paths and match OS-specific installation layout.\n    pub pg_distrib_dir: PathBuf,\n\n    // Path to pageserver binary.\n    pub neon_distrib_dir: PathBuf,\n\n    // Default tenant ID to use with the 'neon_local' command line utility, when\n    // --tenant_id is not explicitly specified.\n    pub default_tenant_id: Option<TenantId>,\n\n    // used to issue tokens during e.g pg start\n    pub private_key_path: PathBuf,\n    /// Path to environment's public key\n    pub public_key_path: PathBuf,\n\n    pub broker: NeonBroker,\n\n    // Configuration for the storage controller (1 per neon_local environment)\n    pub storage_controller: NeonStorageControllerConf,\n\n    /// This Vec must always contain at least one pageserver\n    /// Populdated by [`Self::load_config`] from the individual `pageserver.toml`s.\n    /// NB: not used anymore except for informing users that they need to change their `.neon/config`.\n    pub pageservers: Vec<PageServerConf>,\n\n    pub safekeepers: Vec<SafekeeperConf>,\n\n    pub endpoint_storage: EndpointStorageConf,\n\n    // Control plane upcall API for pageserver: if None, we will not run storage_controller  If set, this will\n    // be propagated into each pageserver's configuration.\n    pub control_plane_api: Url,\n\n    // Control plane upcall APIs for storage controller.  If set, this will be propagated into the\n    // storage controller's configuration.\n    pub control_plane_hooks_api: Option<Url>,\n\n    /// Keep human-readable aliases in memory (and persist them to config), to hide ZId hex strings from the user.\n    // A `HashMap<String, HashMap<TenantId, TimelineId>>` would be more appropriate here,\n    // but deserialization into a generic toml object as `toml::Value::try_from` fails with an error.\n    // https://toml.io/en/v1.0.0 does not contain a concept of \"a table inside another table\".\n    pub branch_name_mappings: HashMap<String, Vec<(TenantId, TimelineId)>>,\n\n    /// Flag to generate SSL certificates for components that need it.\n    /// Also generates root CA certificate that is used to sign all other certificates.\n    pub generate_local_ssl_certs: bool,\n}\n\n/// On-disk state stored in `.neon/config`.\n#[derive(PartialEq, Eq, Clone, Debug, Default, Serialize, Deserialize)]\n#[serde(default, deny_unknown_fields)]\npub struct OnDiskConfig {\n    pub pg_distrib_dir: PathBuf,\n    pub neon_distrib_dir: PathBuf,\n    pub default_tenant_id: Option<TenantId>,\n    pub private_key_path: PathBuf,\n    pub public_key_path: PathBuf,\n    pub broker: NeonBroker,\n    pub storage_controller: NeonStorageControllerConf,\n    #[serde(\n        skip_serializing,\n        deserialize_with = \"fail_if_pageservers_field_specified\"\n    )]\n    pub pageservers: Vec<PageServerConf>,\n    pub safekeepers: Vec<SafekeeperConf>,\n    pub endpoint_storage: EndpointStorageConf,\n    pub control_plane_api: Option<Url>,\n    pub control_plane_hooks_api: Option<Url>,\n    pub control_plane_compute_hook_api: Option<Url>,\n    branch_name_mappings: HashMap<String, Vec<(TenantId, TimelineId)>>,\n    // Note: skip serializing because in compat tests old storage controller fails\n    // to load new config file. May be removed after this field is in release branch.\n    #[serde(skip_serializing_if = \"std::ops::Not::not\")]\n    pub generate_local_ssl_certs: bool,\n}\n\nfn fail_if_pageservers_field_specified<'de, D>(_: D) -> Result<Vec<PageServerConf>, D::Error>\nwhere\n    D: serde::Deserializer<'de>,\n{\n    Err(serde::de::Error::custom(\n        \"The 'pageservers' field is no longer used; pageserver.toml is now authoritative; \\\n         Please remove the `pageservers` from your .neon/config.\",\n    ))\n}\n\n/// The description of the neon_local env to be initialized by `neon_local init --config`.\n#[derive(Clone, Debug, Deserialize)]\n#[serde(deny_unknown_fields)]\npub struct NeonLocalInitConf {\n    // TODO: do we need this? Seems unused\n    pub pg_distrib_dir: Option<PathBuf>,\n    // TODO: do we need this? Seems unused\n    pub neon_distrib_dir: Option<PathBuf>,\n    pub default_tenant_id: TenantId,\n    pub broker: NeonBroker,\n    pub storage_controller: Option<NeonStorageControllerConf>,\n    pub pageservers: Vec<NeonLocalInitPageserverConf>,\n    pub safekeepers: Vec<SafekeeperConf>,\n    pub endpoint_storage: EndpointStorageConf,\n    pub control_plane_api: Option<Url>,\n    pub control_plane_hooks_api: Option<Url>,\n    pub generate_local_ssl_certs: bool,\n}\n\n#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]\n#[serde(default)]\npub struct EndpointStorageConf {\n    pub listen_addr: SocketAddr,\n}\n\n/// Broker config for cluster internal communication.\n#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug, Default)]\n#[serde(default)]\npub struct NeonBroker {\n    /// Broker listen HTTP address for storage nodes coordination, e.g. '127.0.0.1:50051'.\n    /// At least one of listen_addr or listen_https_addr must be set.\n    pub listen_addr: Option<SocketAddr>,\n    /// Broker listen HTTPS address for storage nodes coordination, e.g. '127.0.0.1:50051'.\n    /// At least one of listen_addr or listen_https_addr must be set.\n    /// listen_https_addr is preferred over listen_addr in neon_local.\n    pub listen_https_addr: Option<SocketAddr>,\n}\n\n/// A part of storage controller's config the neon_local knows about.\n#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]\n#[serde(default)]\npub struct NeonStorageControllerConf {\n    /// Heartbeat timeout before marking a node offline\n    #[serde(with = \"humantime_serde\")]\n    pub max_offline: Duration,\n\n    #[serde(with = \"humantime_serde\")]\n    pub max_warming_up: Duration,\n\n    pub start_as_candidate: bool,\n\n    /// Database url used when running multiple storage controller instances\n    pub database_url: Option<SocketAddr>,\n\n    /// Thresholds for auto-splitting a tenant into shards.\n    pub split_threshold: Option<u64>,\n    pub max_split_shards: Option<u8>,\n    pub initial_split_threshold: Option<u64>,\n    pub initial_split_shards: Option<u8>,\n\n    pub max_secondary_lag_bytes: Option<u64>,\n\n    #[serde(with = \"humantime_serde\")]\n    pub heartbeat_interval: Duration,\n\n    #[serde(with = \"humantime_serde\")]\n    pub long_reconcile_threshold: Option<Duration>,\n\n    pub use_https_pageserver_api: bool,\n\n    pub timelines_onto_safekeepers: bool,\n\n    pub use_https_safekeeper_api: bool,\n\n    pub use_local_compute_notifications: bool,\n\n    pub timeline_safekeeper_count: Option<usize>,\n\n    pub posthog_config: Option<PostHogConfig>,\n\n    pub kick_secondary_downloads: Option<bool>,\n\n    #[serde(with = \"humantime_serde\")]\n    pub shard_split_request_timeout: Option<Duration>,\n}\n\nimpl NeonStorageControllerConf {\n    // Use a shorter pageserver unavailability interval than the default to speed up tests.\n    const DEFAULT_MAX_OFFLINE_INTERVAL: std::time::Duration = std::time::Duration::from_secs(10);\n\n    const DEFAULT_MAX_WARMING_UP_INTERVAL: std::time::Duration = std::time::Duration::from_secs(30);\n\n    // Very tight heartbeat interval to speed up tests\n    const DEFAULT_HEARTBEAT_INTERVAL: std::time::Duration = std::time::Duration::from_millis(1000);\n}\n\nimpl Default for NeonStorageControllerConf {\n    fn default() -> Self {\n        Self {\n            max_offline: Self::DEFAULT_MAX_OFFLINE_INTERVAL,\n            max_warming_up: Self::DEFAULT_MAX_WARMING_UP_INTERVAL,\n            start_as_candidate: false,\n            database_url: None,\n            split_threshold: None,\n            max_split_shards: None,\n            initial_split_threshold: None,\n            initial_split_shards: None,\n            max_secondary_lag_bytes: None,\n            heartbeat_interval: Self::DEFAULT_HEARTBEAT_INTERVAL,\n            long_reconcile_threshold: None,\n            use_https_pageserver_api: false,\n            timelines_onto_safekeepers: true,\n            use_https_safekeeper_api: false,\n            use_local_compute_notifications: true,\n            timeline_safekeeper_count: None,\n            posthog_config: None,\n            kick_secondary_downloads: None,\n            shard_split_request_timeout: None,\n        }\n    }\n}\n\nimpl Default for EndpointStorageConf {\n    fn default() -> Self {\n        Self {\n            listen_addr: ENDPOINT_STORAGE_DEFAULT_ADDR,\n        }\n    }\n}\n\nimpl NeonBroker {\n    pub fn client_url(&self) -> Url {\n        let url = if let Some(addr) = self.listen_https_addr {\n            format!(\"https://{addr}\")\n        } else {\n            format!(\n                \"http://{}\",\n                self.listen_addr\n                    .expect(\"at least one address should be set\")\n            )\n        };\n\n        Url::parse(&url).expect(\"failed to construct url\")\n    }\n}\n\n// neon_local needs to know this subset of pageserver configuration.\n// For legacy reasons, this information is duplicated from `pageserver.toml` into `.neon/config`.\n// It can get stale if `pageserver.toml` is changed.\n// TODO(christian): don't store this at all in `.neon/config`, always load it from `pageserver.toml`\n#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]\n#[serde(default, deny_unknown_fields)]\npub struct PageServerConf {\n    pub id: NodeId,\n    pub listen_pg_addr: String,\n    pub listen_http_addr: String,\n    pub listen_https_addr: Option<String>,\n    pub listen_grpc_addr: Option<String>,\n    pub pg_auth_type: AuthType,\n    pub http_auth_type: AuthType,\n    pub grpc_auth_type: AuthType,\n    pub no_sync: bool,\n}\n\nimpl Default for PageServerConf {\n    fn default() -> Self {\n        Self {\n            id: NodeId(0),\n            listen_pg_addr: String::new(),\n            listen_http_addr: String::new(),\n            listen_https_addr: None,\n            listen_grpc_addr: None,\n            pg_auth_type: AuthType::Trust,\n            http_auth_type: AuthType::Trust,\n            grpc_auth_type: AuthType::Trust,\n            no_sync: false,\n        }\n    }\n}\n\n/// The toml that can be passed to `neon_local init --config`.\n/// This is a subset of the `pageserver.toml` configuration.\n// TODO(christian): use pageserver_api::config::ConfigToml (PR #7656)\n#[derive(Clone, Debug, serde::Deserialize, serde::Serialize)]\npub struct NeonLocalInitPageserverConf {\n    pub id: NodeId,\n    pub listen_pg_addr: String,\n    pub listen_http_addr: String,\n    pub listen_https_addr: Option<String>,\n    pub listen_grpc_addr: Option<String>,\n    pub pg_auth_type: AuthType,\n    pub http_auth_type: AuthType,\n    pub grpc_auth_type: AuthType,\n    #[serde(default, skip_serializing_if = \"std::ops::Not::not\")]\n    pub no_sync: bool,\n    #[serde(flatten)]\n    pub other: HashMap<String, toml::Value>,\n}\n\nimpl From<&NeonLocalInitPageserverConf> for PageServerConf {\n    fn from(conf: &NeonLocalInitPageserverConf) -> Self {\n        let NeonLocalInitPageserverConf {\n            id,\n            listen_pg_addr,\n            listen_http_addr,\n            listen_https_addr,\n            listen_grpc_addr,\n            pg_auth_type,\n            http_auth_type,\n            grpc_auth_type,\n            no_sync,\n            other: _,\n        } = conf;\n        Self {\n            id: *id,\n            listen_pg_addr: listen_pg_addr.clone(),\n            listen_http_addr: listen_http_addr.clone(),\n            listen_https_addr: listen_https_addr.clone(),\n            listen_grpc_addr: listen_grpc_addr.clone(),\n            pg_auth_type: *pg_auth_type,\n            grpc_auth_type: *grpc_auth_type,\n            http_auth_type: *http_auth_type,\n            no_sync: *no_sync,\n        }\n    }\n}\n\n#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]\n#[serde(default)]\npub struct SafekeeperConf {\n    pub id: NodeId,\n    pub pg_port: u16,\n    pub pg_tenant_only_port: Option<u16>,\n    pub http_port: u16,\n    pub https_port: Option<u16>,\n    pub sync: bool,\n    pub remote_storage: Option<String>,\n    pub backup_threads: Option<u32>,\n    pub auth_enabled: bool,\n    pub listen_addr: Option<String>,\n}\n\nimpl Default for SafekeeperConf {\n    fn default() -> Self {\n        Self {\n            id: NodeId(0),\n            pg_port: 0,\n            pg_tenant_only_port: None,\n            http_port: 0,\n            https_port: None,\n            sync: true,\n            remote_storage: None,\n            backup_threads: None,\n            auth_enabled: false,\n            listen_addr: None,\n        }\n    }\n}\n\n#[derive(Clone, Copy)]\npub enum InitForceMode {\n    MustNotExist,\n    EmptyDirOk,\n    RemoveAllContents,\n}\n\nimpl ValueEnum for InitForceMode {\n    fn value_variants<'a>() -> &'a [Self] {\n        &[\n            Self::MustNotExist,\n            Self::EmptyDirOk,\n            Self::RemoveAllContents,\n        ]\n    }\n\n    fn to_possible_value(&self) -> Option<clap::builder::PossibleValue> {\n        Some(clap::builder::PossibleValue::new(match self {\n            InitForceMode::MustNotExist => \"must-not-exist\",\n            InitForceMode::EmptyDirOk => \"empty-dir-ok\",\n            InitForceMode::RemoveAllContents => \"remove-all-contents\",\n        }))\n    }\n}\n\nimpl SafekeeperConf {\n    /// Compute is served by port on which only tenant scoped tokens allowed, if\n    /// it is configured.\n    pub fn get_compute_port(&self) -> u16 {\n        self.pg_tenant_only_port.unwrap_or(self.pg_port)\n    }\n}\n\nimpl LocalEnv {\n    pub fn pg_distrib_dir_raw(&self) -> PathBuf {\n        self.pg_distrib_dir.clone()\n    }\n\n    pub fn pg_distrib_dir(&self, pg_version: PgMajorVersion) -> anyhow::Result<PathBuf> {\n        let path = self.pg_distrib_dir.clone();\n\n        Ok(path.join(pg_version.v_str()))\n    }\n\n    pub fn pg_dir(&self, pg_version: PgMajorVersion, dir_name: &str) -> anyhow::Result<PathBuf> {\n        Ok(self.pg_distrib_dir(pg_version)?.join(dir_name))\n    }\n\n    pub fn pg_bin_dir(&self, pg_version: PgMajorVersion) -> anyhow::Result<PathBuf> {\n        self.pg_dir(pg_version, \"bin\")\n    }\n\n    pub fn pg_lib_dir(&self, pg_version: PgMajorVersion) -> anyhow::Result<PathBuf> {\n        self.pg_dir(pg_version, \"lib\")\n    }\n\n    pub fn endpoint_storage_bin(&self) -> PathBuf {\n        self.neon_distrib_dir.join(\"endpoint_storage\")\n    }\n\n    pub fn pageserver_bin(&self) -> PathBuf {\n        self.neon_distrib_dir.join(\"pageserver\")\n    }\n\n    pub fn storage_controller_bin(&self) -> PathBuf {\n        // Irrespective of configuration, storage controller binary is always\n        // run from the same location as neon_local.  This means that for compatibility\n        // tests that run old pageserver/safekeeper, they still run latest storage controller.\n        let neon_local_bin_dir = env::current_exe().unwrap().parent().unwrap().to_owned();\n        neon_local_bin_dir.join(\"storage_controller\")\n    }\n\n    pub fn safekeeper_bin(&self) -> PathBuf {\n        self.neon_distrib_dir.join(\"safekeeper\")\n    }\n\n    pub fn storage_broker_bin(&self) -> PathBuf {\n        self.neon_distrib_dir.join(\"storage_broker\")\n    }\n\n    pub fn endpoints_path(&self) -> PathBuf {\n        self.base_data_dir.join(\"endpoints\")\n    }\n\n    pub fn storage_broker_data_dir(&self) -> PathBuf {\n        self.base_data_dir.join(\"storage_broker\")\n    }\n\n    pub fn pageserver_data_dir(&self, pageserver_id: NodeId) -> PathBuf {\n        self.base_data_dir\n            .join(format!(\"pageserver_{pageserver_id}\"))\n    }\n\n    pub fn safekeeper_data_dir(&self, data_dir_name: &str) -> PathBuf {\n        self.base_data_dir.join(\"safekeepers\").join(data_dir_name)\n    }\n\n    pub fn endpoint_storage_data_dir(&self) -> PathBuf {\n        self.base_data_dir.join(\"endpoint_storage\")\n    }\n\n    pub fn get_pageserver_conf(&self, id: NodeId) -> anyhow::Result<&PageServerConf> {\n        if let Some(conf) = self.pageservers.iter().find(|node| node.id == id) {\n            Ok(conf)\n        } else {\n            let have_ids = self\n                .pageservers\n                .iter()\n                .map(|node| format!(\"{}:{}\", node.id, node.listen_http_addr))\n                .collect::<Vec<_>>();\n            let joined = have_ids.join(\",\");\n            bail!(\"could not find pageserver {id}, have ids {joined}\")\n        }\n    }\n\n    pub fn ssl_ca_cert_path(&self) -> Option<PathBuf> {\n        if self.generate_local_ssl_certs {\n            Some(self.base_data_dir.join(\"rootCA.crt\"))\n        } else {\n            None\n        }\n    }\n\n    pub fn ssl_ca_key_path(&self) -> Option<PathBuf> {\n        if self.generate_local_ssl_certs {\n            Some(self.base_data_dir.join(\"rootCA.key\"))\n        } else {\n            None\n        }\n    }\n\n    pub fn generate_ssl_ca_cert(&self) -> anyhow::Result<()> {\n        let cert_path = self.ssl_ca_cert_path().unwrap();\n        let key_path = self.ssl_ca_key_path().unwrap();\n        if !fs::exists(cert_path.as_path())? {\n            generate_ssl_ca_cert(cert_path.as_path(), key_path.as_path())?;\n        }\n        Ok(())\n    }\n\n    pub fn generate_ssl_cert(&self, cert_path: &Path, key_path: &Path) -> anyhow::Result<()> {\n        self.generate_ssl_ca_cert()?;\n        generate_ssl_cert(\n            cert_path,\n            key_path,\n            self.ssl_ca_cert_path().unwrap().as_path(),\n            self.ssl_ca_key_path().unwrap().as_path(),\n        )\n    }\n\n    /// Creates HTTP client with local SSL CA certificates.\n    pub fn create_http_client(&self) -> reqwest::Client {\n        let ssl_ca_certs = self.ssl_ca_cert_path().map(|ssl_ca_file| {\n            let buf = std::fs::read(ssl_ca_file).expect(\"SSL CA file should exist\");\n            Certificate::from_pem_bundle(&buf).expect(\"SSL CA file should be valid\")\n        });\n\n        let mut http_client = reqwest::Client::builder();\n        for ssl_ca_cert in ssl_ca_certs.unwrap_or_default() {\n            http_client = http_client.add_root_certificate(ssl_ca_cert);\n        }\n\n        http_client\n            .build()\n            .expect(\"HTTP client should construct with no error\")\n    }\n\n    /// Inspect the base data directory and extract the instance id and instance directory path\n    /// for all storage controller instances\n    pub async fn storage_controller_instances(&self) -> std::io::Result<Vec<(u8, PathBuf)>> {\n        let mut instances = Vec::default();\n\n        let dir = std::fs::read_dir(self.base_data_dir.clone())?;\n        for dentry in dir {\n            let dentry = dentry?;\n            let is_dir = dentry.metadata()?.is_dir();\n            let filename = dentry.file_name().into_string().unwrap();\n            let parsed_instance_id = match filename.strip_prefix(\"storage_controller_\") {\n                Some(suffix) => suffix.parse::<u8>().ok(),\n                None => None,\n            };\n\n            let is_instance_dir = is_dir && parsed_instance_id.is_some();\n\n            if !is_instance_dir {\n                continue;\n            }\n\n            instances.push((\n                parsed_instance_id.expect(\"Checked previously\"),\n                dentry.path(),\n            ));\n        }\n\n        Ok(instances)\n    }\n\n    pub fn register_branch_mapping(\n        &mut self,\n        branch_name: String,\n        tenant_id: TenantId,\n        timeline_id: TimelineId,\n    ) -> anyhow::Result<()> {\n        let existing_values = self\n            .branch_name_mappings\n            .entry(branch_name.clone())\n            .or_default();\n\n        let existing_ids = existing_values\n            .iter()\n            .find(|(existing_tenant_id, _)| existing_tenant_id == &tenant_id);\n\n        if let Some((_, old_timeline_id)) = existing_ids {\n            if old_timeline_id == &timeline_id {\n                Ok(())\n            } else {\n                bail!(\n                    \"branch '{branch_name}' is already mapped to timeline {old_timeline_id}, cannot map to another timeline {timeline_id}\"\n                );\n            }\n        } else {\n            existing_values.push((tenant_id, timeline_id));\n            Ok(())\n        }\n    }\n\n    pub fn get_branch_timeline_id(\n        &self,\n        branch_name: &str,\n        tenant_id: TenantId,\n    ) -> Option<TimelineId> {\n        self.branch_name_mappings\n            .get(branch_name)?\n            .iter()\n            .find(|(mapped_tenant_id, _)| mapped_tenant_id == &tenant_id)\n            .map(|&(_, timeline_id)| timeline_id)\n    }\n\n    pub fn timeline_name_mappings(&self) -> HashMap<TenantTimelineId, String> {\n        self.branch_name_mappings\n            .iter()\n            .flat_map(|(name, tenant_timelines)| {\n                tenant_timelines.iter().map(|&(tenant_id, timeline_id)| {\n                    (TenantTimelineId::new(tenant_id, timeline_id), name.clone())\n                })\n            })\n            .collect()\n    }\n\n    ///  Construct `Self` from on-disk state.\n    pub fn load_config(repopath: &Path) -> anyhow::Result<Self> {\n        if !repopath.exists() {\n            bail!(\n                \"Neon config is not found in {}. You need to run 'neon_local init' first\",\n                repopath.to_str().unwrap()\n            );\n        }\n\n        // TODO: check that it looks like a neon repository\n\n        // load and parse file\n        let config_file_contents = fs::read_to_string(repopath.join(\"config\"))?;\n        let on_disk_config: OnDiskConfig = toml::from_str(config_file_contents.as_str())?;\n        let mut env = {\n            let OnDiskConfig {\n                pg_distrib_dir,\n                neon_distrib_dir,\n                default_tenant_id,\n                private_key_path,\n                public_key_path,\n                broker,\n                storage_controller,\n                pageservers,\n                safekeepers,\n                control_plane_api,\n                control_plane_hooks_api,\n                control_plane_compute_hook_api: _,\n                branch_name_mappings,\n                generate_local_ssl_certs,\n                endpoint_storage,\n            } = on_disk_config;\n            LocalEnv {\n                base_data_dir: repopath.to_owned(),\n                pg_distrib_dir,\n                neon_distrib_dir,\n                default_tenant_id,\n                private_key_path,\n                public_key_path,\n                broker,\n                storage_controller,\n                pageservers,\n                safekeepers,\n                control_plane_api: control_plane_api.unwrap(),\n                control_plane_hooks_api,\n                branch_name_mappings,\n                generate_local_ssl_certs,\n                endpoint_storage,\n            }\n        };\n\n        // The source of truth for pageserver configuration is the pageserver.toml.\n        assert!(\n            env.pageservers.is_empty(),\n            \"we ensure this during deserialization\"\n        );\n        env.pageservers = {\n            let iter = std::fs::read_dir(repopath).context(\"open dir\")?;\n            let mut pageservers = Vec::new();\n            for res in iter {\n                let dentry = res?;\n                const PREFIX: &str = \"pageserver_\";\n                let dentry_name = dentry\n                    .file_name()\n                    .into_string()\n                    .ok()\n                    .with_context(|| format!(\"non-utf8 dentry: {:?}\", dentry.path()))\n                    .unwrap();\n                if !dentry_name.starts_with(PREFIX) {\n                    continue;\n                }\n                if !dentry.file_type().context(\"determine file type\")?.is_dir() {\n                    anyhow::bail!(\"expected a directory, got {:?}\", dentry.path());\n                }\n                let id = dentry_name[PREFIX.len()..]\n                    .parse::<NodeId>()\n                    .with_context(|| format!(\"parse id from {:?}\", dentry.path()))?;\n                // TODO(christian): use pageserver_api::config::ConfigToml (PR #7656)\n                #[derive(serde::Serialize, serde::Deserialize)]\n                // (allow unknown fields, unlike PageServerConf)\n                struct PageserverConfigTomlSubset {\n                    listen_pg_addr: String,\n                    listen_http_addr: String,\n                    listen_https_addr: Option<String>,\n                    listen_grpc_addr: Option<String>,\n                    pg_auth_type: AuthType,\n                    http_auth_type: AuthType,\n                    grpc_auth_type: AuthType,\n                    #[serde(default)]\n                    no_sync: bool,\n                }\n                let config_toml_path = dentry.path().join(\"pageserver.toml\");\n                let config_toml: PageserverConfigTomlSubset = toml_edit::de::from_str(\n                    &std::fs::read_to_string(&config_toml_path)\n                        .with_context(|| format!(\"read {config_toml_path:?}\"))?,\n                )\n                .context(\"parse pageserver.toml\")?;\n                let identity_toml_path = dentry.path().join(\"identity.toml\");\n                #[derive(serde::Serialize, serde::Deserialize)]\n                struct IdentityTomlSubset {\n                    id: NodeId,\n                }\n                let identity_toml: IdentityTomlSubset = toml_edit::de::from_str(\n                    &std::fs::read_to_string(&identity_toml_path)\n                        .with_context(|| format!(\"read {identity_toml_path:?}\"))?,\n                )\n                .context(\"parse identity.toml\")?;\n                let PageserverConfigTomlSubset {\n                    listen_pg_addr,\n                    listen_http_addr,\n                    listen_https_addr,\n                    listen_grpc_addr,\n                    pg_auth_type,\n                    http_auth_type,\n                    grpc_auth_type,\n                    no_sync,\n                } = config_toml;\n                let IdentityTomlSubset {\n                    id: identity_toml_id,\n                } = identity_toml;\n                let conf = PageServerConf {\n                    id: {\n                        anyhow::ensure!(\n                            identity_toml_id == id,\n                            \"id mismatch: identity.toml:id={identity_toml_id} pageserver_(.*) id={id}\",\n                        );\n                        id\n                    },\n                    listen_pg_addr,\n                    listen_http_addr,\n                    listen_https_addr,\n                    listen_grpc_addr,\n                    pg_auth_type,\n                    http_auth_type,\n                    grpc_auth_type,\n                    no_sync,\n                };\n                pageservers.push(conf);\n            }\n            pageservers\n        };\n\n        Ok(env)\n    }\n\n    pub fn persist_config(&self) -> anyhow::Result<()> {\n        Self::persist_config_impl(\n            &self.base_data_dir,\n            &OnDiskConfig {\n                pg_distrib_dir: self.pg_distrib_dir.clone(),\n                neon_distrib_dir: self.neon_distrib_dir.clone(),\n                default_tenant_id: self.default_tenant_id,\n                private_key_path: self.private_key_path.clone(),\n                public_key_path: self.public_key_path.clone(),\n                broker: self.broker.clone(),\n                storage_controller: self.storage_controller.clone(),\n                pageservers: vec![], // it's skip_serializing anyway\n                safekeepers: self.safekeepers.clone(),\n                control_plane_api: Some(self.control_plane_api.clone()),\n                control_plane_hooks_api: self.control_plane_hooks_api.clone(),\n                control_plane_compute_hook_api: None,\n                branch_name_mappings: self.branch_name_mappings.clone(),\n                generate_local_ssl_certs: self.generate_local_ssl_certs,\n                endpoint_storage: self.endpoint_storage.clone(),\n            },\n        )\n    }\n\n    pub fn persist_config_impl(base_path: &Path, config: &OnDiskConfig) -> anyhow::Result<()> {\n        let conf_content = &toml::to_string_pretty(config)?;\n        let target_config_path = base_path.join(\"config\");\n        fs::write(&target_config_path, conf_content).with_context(|| {\n            format!(\n                \"Failed to write config file into path '{}'\",\n                target_config_path.display()\n            )\n        })\n    }\n\n    // this function is used only for testing purposes in CLI e g generate tokens during init\n    pub fn generate_auth_token<S: Serialize>(&self, claims: &S) -> anyhow::Result<String> {\n        let key = self.read_private_key()?;\n        encode_from_key_file(claims, &key)\n    }\n\n    /// Get the path to the private key.\n    pub fn get_private_key_path(&self) -> PathBuf {\n        if self.private_key_path.is_absolute() {\n            self.private_key_path.to_path_buf()\n        } else {\n            self.base_data_dir.join(&self.private_key_path)\n        }\n    }\n\n    /// Get the path to the public key.\n    pub fn get_public_key_path(&self) -> PathBuf {\n        if self.public_key_path.is_absolute() {\n            self.public_key_path.to_path_buf()\n        } else {\n            self.base_data_dir.join(&self.public_key_path)\n        }\n    }\n\n    /// Read the contents of the private key file.\n    pub fn read_private_key(&self) -> anyhow::Result<Pem> {\n        let private_key_path = self.get_private_key_path();\n        let pem = pem::parse(fs::read(private_key_path)?)?;\n        Ok(pem)\n    }\n\n    /// Read the contents of the public key file.\n    pub fn read_public_key(&self) -> anyhow::Result<Pem> {\n        let public_key_path = self.get_public_key_path();\n        let pem = pem::parse(fs::read(public_key_path)?)?;\n        Ok(pem)\n    }\n\n    /// Materialize the [`NeonLocalInitConf`] to disk. Called during [`neon_local init`].\n    pub fn init(conf: NeonLocalInitConf, force: &InitForceMode) -> anyhow::Result<()> {\n        let base_path = base_path();\n        assert_ne!(base_path, Path::new(\"\"));\n        let base_path = &base_path;\n\n        // create base_path dir\n        if base_path.exists() {\n            match force {\n                InitForceMode::MustNotExist => {\n                    bail!(\n                        \"directory '{}' already exists. Perhaps already initialized?\",\n                        base_path.display()\n                    );\n                }\n                InitForceMode::EmptyDirOk => {\n                    if let Some(res) = std::fs::read_dir(base_path)?.next() {\n                        res.context(\"check if directory is empty\")?;\n                        anyhow::bail!(\"directory not empty: {base_path:?}\");\n                    }\n                }\n                InitForceMode::RemoveAllContents => {\n                    println!(\"removing all contents of '{}'\", base_path.display());\n                    // instead of directly calling `remove_dir_all`, we keep the original dir but removing\n                    // all contents inside. This helps if the developer symbol links another directory (i.e.,\n                    // S3 local SSD) to the `.neon` base directory.\n                    for entry in std::fs::read_dir(base_path)? {\n                        let entry = entry?;\n                        let path = entry.path();\n                        if path.is_dir() {\n                            fs::remove_dir_all(&path)?;\n                        } else {\n                            fs::remove_file(&path)?;\n                        }\n                    }\n                }\n            }\n        }\n        if !base_path.exists() {\n            fs::create_dir(base_path)?;\n        }\n\n        let NeonLocalInitConf {\n            pg_distrib_dir,\n            neon_distrib_dir,\n            default_tenant_id,\n            broker,\n            storage_controller,\n            pageservers,\n            safekeepers,\n            control_plane_api,\n            generate_local_ssl_certs,\n            control_plane_hooks_api,\n            endpoint_storage,\n        } = conf;\n\n        // Find postgres binaries.\n        // Follow POSTGRES_DISTRIB_DIR if set, otherwise look in \"pg_install\".\n        // Note that later in the code we assume, that distrib dirs follow the same pattern\n        // for all postgres versions.\n        let pg_distrib_dir = pg_distrib_dir.unwrap_or_else(|| {\n            if let Some(postgres_bin) = env::var_os(\"POSTGRES_DISTRIB_DIR\") {\n                postgres_bin.into()\n            } else {\n                let cwd = env::current_dir().unwrap();\n                cwd.join(\"pg_install\")\n            }\n        });\n\n        // Find neon binaries.\n        let neon_distrib_dir = neon_distrib_dir\n            .unwrap_or_else(|| env::current_exe().unwrap().parent().unwrap().to_owned());\n\n        // Generate keypair for JWT.\n        //\n        // The keypair is only needed if authentication is enabled in any of the\n        // components. For convenience, we generate the keypair even if authentication\n        // is not enabled, so that you can easily enable it after the initialization\n        // step.\n        generate_auth_keys(\n            base_path.join(\"auth_private_key.pem\").as_path(),\n            base_path.join(\"auth_public_key.pem\").as_path(),\n        )\n        .context(\"generate auth keys\")?;\n        let private_key_path = PathBuf::from(\"auth_private_key.pem\");\n        let public_key_path = PathBuf::from(\"auth_public_key.pem\");\n\n        // create the runtime type because the remaining initialization code below needs\n        // a LocalEnv instance op operation\n        // TODO: refactor to avoid this, LocalEnv should only be constructed from on-disk state\n        let env = LocalEnv {\n            base_data_dir: base_path.clone(),\n            pg_distrib_dir,\n            neon_distrib_dir,\n            default_tenant_id: Some(default_tenant_id),\n            private_key_path,\n            public_key_path,\n            broker,\n            storage_controller: storage_controller.unwrap_or_default(),\n            pageservers: pageservers.iter().map(Into::into).collect(),\n            safekeepers,\n            control_plane_api: control_plane_api.unwrap(),\n            control_plane_hooks_api,\n            branch_name_mappings: Default::default(),\n            generate_local_ssl_certs,\n            endpoint_storage,\n        };\n\n        if generate_local_ssl_certs {\n            env.generate_ssl_ca_cert()?;\n        }\n\n        // create endpoints dir\n        fs::create_dir_all(env.endpoints_path())?;\n\n        // create storage broker dir\n        fs::create_dir_all(env.storage_broker_data_dir())?;\n        StorageBroker::from_env(&env)\n            .initialize()\n            .context(\"storage broker init failed\")?;\n\n        // create safekeeper dirs\n        for safekeeper in &env.safekeepers {\n            fs::create_dir_all(SafekeeperNode::datadir_path_by_id(&env, safekeeper.id))?;\n            SafekeeperNode::from_env(&env, safekeeper)\n                .initialize()\n                .context(\"safekeeper init failed\")?;\n        }\n\n        // initialize pageserver state\n        for (i, ps) in pageservers.into_iter().enumerate() {\n            let runtime_ps = &env.pageservers[i];\n            assert_eq!(&PageServerConf::from(&ps), runtime_ps);\n            fs::create_dir(env.pageserver_data_dir(ps.id))?;\n            PageServerNode::from_env(&env, runtime_ps)\n                .initialize(ps)\n                .context(\"pageserver init failed\")?;\n        }\n\n        EndpointStorage::from_env(&env)\n            .init()\n            .context(\"object storage init failed\")?;\n\n        // setup remote remote location for default LocalFs remote storage\n        std::fs::create_dir_all(env.base_data_dir.join(PAGESERVER_REMOTE_STORAGE_DIR))?;\n        std::fs::create_dir_all(env.base_data_dir.join(ENDPOINT_STORAGE_REMOTE_STORAGE_DIR))?;\n\n        env.persist_config()\n    }\n}\n\npub fn base_path() -> PathBuf {\n    let path = match std::env::var_os(\"NEON_REPO_DIR\") {\n        Some(val) => {\n            let path = PathBuf::from(val);\n            if !path.is_absolute() {\n                // repeat the env var in the error because our default is always absolute\n                panic!(\"NEON_REPO_DIR must be an absolute path, got {path:?}\");\n            }\n            path\n        }\n        None => {\n            let pwd = std::env::current_dir()\n                // technically this can fail but it's quite unlikeley\n                .expect(\"determine current directory\");\n            let pwd_abs = pwd.canonicalize().expect(\"canonicalize current directory\");\n            pwd_abs.join(\".neon\")\n        }\n    };\n    assert!(path.is_absolute());\n    path\n}\n\n/// Generate a public/private key pair for JWT authentication\nfn generate_auth_keys(private_key_path: &Path, public_key_path: &Path) -> anyhow::Result<()> {\n    // Generate the key pair\n    //\n    // openssl genpkey -algorithm ed25519 -out auth_private_key.pem\n    let keygen_output = Command::new(\"openssl\")\n        .arg(\"genpkey\")\n        .args([\"-algorithm\", \"ed25519\"])\n        .args([\"-out\", private_key_path.to_str().unwrap()])\n        .stdout(Stdio::null())\n        .output()\n        .context(\"failed to generate auth private key\")?;\n    if !keygen_output.status.success() {\n        bail!(\n            \"openssl failed: '{}'\",\n            String::from_utf8_lossy(&keygen_output.stderr)\n        );\n    }\n\n    // Extract the public key from the private key file\n    //\n    // openssl pkey -in auth_private_key.pem -pubout -out auth_public_key.pem\n    let keygen_output = Command::new(\"openssl\")\n        .arg(\"pkey\")\n        .args([\"-in\", private_key_path.to_str().unwrap()])\n        .arg(\"-pubout\")\n        .args([\"-out\", public_key_path.to_str().unwrap()])\n        .output()\n        .context(\"failed to extract public key from private key\")?;\n    if !keygen_output.status.success() {\n        bail!(\n            \"openssl failed: '{}'\",\n            String::from_utf8_lossy(&keygen_output.stderr)\n        );\n    }\n\n    Ok(())\n}\n\nfn generate_ssl_ca_cert(cert_path: &Path, key_path: &Path) -> anyhow::Result<()> {\n    // openssl req -x509 -newkey ed25519 -nodes -subj \"/CN=Neon Local CA\" -days 36500 \\\n    // -out rootCA.crt -keyout rootCA.key\n    let keygen_output = Command::new(\"openssl\")\n        .args([\n            \"req\", \"-x509\", \"-newkey\", \"ed25519\", \"-nodes\", \"-days\", \"36500\",\n        ])\n        .args([\"-subj\", \"/CN=Neon Local CA\"])\n        .args([\"-out\", cert_path.to_str().unwrap()])\n        .args([\"-keyout\", key_path.to_str().unwrap()])\n        .output()\n        .context(\"failed to generate CA certificate\")?;\n    if !keygen_output.status.success() {\n        bail!(\n            \"openssl failed: '{}'\",\n            String::from_utf8_lossy(&keygen_output.stderr)\n        );\n    }\n    Ok(())\n}\n\nfn generate_ssl_cert(\n    cert_path: &Path,\n    key_path: &Path,\n    ca_cert_path: &Path,\n    ca_key_path: &Path,\n) -> anyhow::Result<()> {\n    // Generate Certificate Signing Request (CSR).\n    let mut csr_path = cert_path.to_path_buf();\n    csr_path.set_extension(\".csr\");\n\n    // openssl req -new -nodes -newkey ed25519 -keyout server.key -out server.csr \\\n    // -subj \"/CN=localhost\" -addext \"subjectAltName=DNS:localhost,IP:127.0.0.1\"\n    let keygen_output = Command::new(\"openssl\")\n        .args([\"req\", \"-new\", \"-nodes\"])\n        .args([\"-newkey\", \"ed25519\"])\n        .args([\"-subj\", \"/CN=localhost\"])\n        .args([\"-addext\", \"subjectAltName=DNS:localhost,IP:127.0.0.1\"])\n        .args([\"-keyout\", key_path.to_str().unwrap()])\n        .args([\"-out\", csr_path.to_str().unwrap()])\n        .output()\n        .context(\"failed to generate CSR\")?;\n    if !keygen_output.status.success() {\n        bail!(\n            \"openssl failed: '{}'\",\n            String::from_utf8_lossy(&keygen_output.stderr)\n        );\n    }\n\n    // Sign CSR with CA key.\n    //\n    // openssl x509 -req -in server.csr -CA rootCA.crt -CAkey rootCA.key -CAcreateserial \\\n    // -out server.crt -days 36500 -copy_extensions copyall\n    let keygen_output = Command::new(\"openssl\")\n        .args([\"x509\", \"-req\"])\n        .args([\"-in\", csr_path.to_str().unwrap()])\n        .args([\"-CA\", ca_cert_path.to_str().unwrap()])\n        .args([\"-CAkey\", ca_key_path.to_str().unwrap()])\n        .arg(\"-CAcreateserial\")\n        .args([\"-out\", cert_path.to_str().unwrap()])\n        .args([\"-days\", \"36500\"])\n        .args([\"-copy_extensions\", \"copyall\"])\n        .output()\n        .context(\"failed to sign CSR\")?;\n    if !keygen_output.status.success() {\n        bail!(\n            \"openssl failed: '{}'\",\n            String::from_utf8_lossy(&keygen_output.stderr)\n        );\n    }\n\n    // Remove CSR file as it's not needed anymore.\n    fs::remove_file(csr_path)?;\n\n    Ok(())\n}\n"
  },
  {
    "path": "control_plane/src/pageserver.rs",
    "content": "//! Code to manage pageservers\n//!\n//! In the local test environment, the data for each pageserver is stored in\n//!\n//! ```text\n//!   .neon/pageserver_<pageserver_id>\n//! ```\n//!\nuse std::collections::HashMap;\nuse std::io;\nuse std::io::Write;\nuse std::num::NonZeroU64;\nuse std::path::PathBuf;\nuse std::str::FromStr;\nuse std::time::Duration;\n\nuse anyhow::{Context, bail};\nuse camino::Utf8PathBuf;\nuse pageserver_api::config::{DEFAULT_GRPC_LISTEN_PORT, DEFAULT_HTTP_LISTEN_PORT};\nuse pageserver_api::models::{self, TenantInfo, TimelineInfo};\nuse pageserver_api::shard::TenantShardId;\nuse pageserver_client::mgmt_api;\nuse postgres_backend::AuthType;\nuse postgres_connection::{PgConnectionConfig, parse_host_port};\nuse safekeeper_api::PgMajorVersion;\nuse utils::auth::{Claims, Scope};\nuse utils::id::{NodeId, TenantId, TimelineId};\nuse utils::lsn::Lsn;\n\nuse crate::background_process;\nuse crate::local_env::{LocalEnv, NeonLocalInitPageserverConf, PageServerConf};\n\n/// Directory within .neon which will be used by default for LocalFs remote storage.\npub const PAGESERVER_REMOTE_STORAGE_DIR: &str = \"local_fs_remote_storage/pageserver\";\n\n//\n// Control routines for pageserver.\n//\n// Used in CLI and tests.\n//\n#[derive(Debug)]\npub struct PageServerNode {\n    pub pg_connection_config: PgConnectionConfig,\n    pub conf: PageServerConf,\n    pub env: LocalEnv,\n    pub http_client: mgmt_api::Client,\n}\n\nimpl PageServerNode {\n    pub fn from_env(env: &LocalEnv, conf: &PageServerConf) -> PageServerNode {\n        let (host, port) =\n            parse_host_port(&conf.listen_pg_addr).expect(\"Unable to parse listen_pg_addr\");\n        let port = port.unwrap_or(5432);\n\n        let endpoint = if env.storage_controller.use_https_pageserver_api {\n            format!(\n                \"https://{}\",\n                conf.listen_https_addr.as_ref().expect(\n                    \"listen https address should be specified if use_https_pageserver_api is on\"\n                )\n            )\n        } else {\n            format!(\"http://{}\", conf.listen_http_addr)\n        };\n\n        Self {\n            pg_connection_config: PgConnectionConfig::new_host_port(host, port),\n            conf: conf.clone(),\n            env: env.clone(),\n            http_client: mgmt_api::Client::new(\n                env.create_http_client(),\n                endpoint,\n                {\n                    match conf.http_auth_type {\n                        AuthType::Trust => None,\n                        AuthType::NeonJWT => Some(\n                            env.generate_auth_token(&Claims::new(None, Scope::PageServerApi))\n                                .unwrap(),\n                        ),\n                    }\n                }\n                .as_deref(),\n            ),\n        }\n    }\n\n    fn pageserver_make_identity_toml(&self, node_id: NodeId) -> toml_edit::DocumentMut {\n        toml_edit::DocumentMut::from_str(&format!(\"id={node_id}\")).unwrap()\n    }\n\n    fn pageserver_init_make_toml(\n        &self,\n        conf: NeonLocalInitPageserverConf,\n    ) -> anyhow::Result<toml_edit::DocumentMut> {\n        assert_eq!(\n            &PageServerConf::from(&conf),\n            &self.conf,\n            \"during neon_local init, we derive the runtime state of ps conf (self.conf) from the --config flag fully\"\n        );\n\n        // TODO(christian): instead of what we do here, create a pageserver_api::config::ConfigToml (PR #7656)\n\n        // FIXME: the paths should be shell-escaped to handle paths with spaces, quotas etc.\n        let pg_distrib_dir_param = format!(\n            \"pg_distrib_dir='{}'\",\n            self.env.pg_distrib_dir_raw().display()\n        );\n\n        let broker_endpoint_param = format!(\"broker_endpoint='{}'\", self.env.broker.client_url());\n\n        let mut overrides = vec![pg_distrib_dir_param, broker_endpoint_param];\n\n        overrides.push(format!(\n            \"control_plane_api='{}'\",\n            self.env.control_plane_api.as_str()\n        ));\n\n        // Storage controller uses the same auth as pageserver: if JWT is enabled\n        // for us, we will also need it to talk to them.\n        if matches!(conf.http_auth_type, AuthType::NeonJWT) {\n            let jwt_token = self\n                .env\n                .generate_auth_token(&Claims::new(None, Scope::GenerationsApi))\n                .unwrap();\n            overrides.push(format!(\"control_plane_api_token='{jwt_token}'\"));\n        }\n\n        if !conf.other.contains_key(\"remote_storage\") {\n            overrides.push(format!(\n                \"remote_storage={{local_path='../{PAGESERVER_REMOTE_STORAGE_DIR}'}}\"\n            ));\n        }\n\n        if [conf.http_auth_type, conf.pg_auth_type, conf.grpc_auth_type]\n            .contains(&AuthType::NeonJWT)\n        {\n            // Keys are generated in the toplevel repo dir, pageservers' workdirs\n            // are one level below that, so refer to keys with ../\n            overrides.push(\"auth_validation_public_key_path='../auth_public_key.pem'\".to_owned());\n        }\n\n        if let Some(ssl_ca_file) = self.env.ssl_ca_cert_path() {\n            overrides.push(format!(\"ssl_ca_file='{}'\", ssl_ca_file.to_str().unwrap()));\n        }\n\n        // Apply the user-provided overrides\n        overrides.push({\n            let mut doc =\n                toml_edit::ser::to_document(&conf).expect(\"we deserialized this from toml earlier\");\n            // `id` is written out to `identity.toml` instead of `pageserver.toml`\n            doc.remove(\"id\").expect(\"it's part of the struct\");\n            doc.to_string()\n        });\n\n        // Turn `overrides` into a toml document.\n        // TODO: above code is legacy code, it should be refactored to use toml_edit directly.\n        let mut config_toml = toml_edit::DocumentMut::new();\n        for fragment_str in overrides {\n            let fragment = toml_edit::DocumentMut::from_str(&fragment_str)\n                .expect(\"all fragments in `overrides` are valid toml documents, this function controls that\");\n            for (key, item) in fragment.iter() {\n                config_toml.insert(key, item.clone());\n            }\n        }\n        Ok(config_toml)\n    }\n\n    /// Initializes a pageserver node by creating its config with the overrides provided.\n    pub fn initialize(&self, conf: NeonLocalInitPageserverConf) -> anyhow::Result<()> {\n        self.pageserver_init(conf)\n            .with_context(|| format!(\"Failed to run init for pageserver node {}\", self.conf.id))\n    }\n\n    pub fn repo_path(&self) -> PathBuf {\n        self.env.pageserver_data_dir(self.conf.id)\n    }\n\n    /// The pid file is created by the pageserver process, with its pid stored inside.\n    /// Other pageservers cannot lock the same file and overwrite it for as long as the current\n    /// pageserver runs. (Unless someone removes the file manually; never do that!)\n    fn pid_file(&self) -> Utf8PathBuf {\n        Utf8PathBuf::from_path_buf(self.repo_path().join(\"pageserver.pid\"))\n            .expect(\"non-Unicode path\")\n    }\n\n    pub async fn start(&self, retry_timeout: &Duration) -> anyhow::Result<()> {\n        self.start_node(retry_timeout).await\n    }\n\n    fn pageserver_init(&self, conf: NeonLocalInitPageserverConf) -> anyhow::Result<()> {\n        let datadir = self.repo_path();\n        let node_id = self.conf.id;\n        println!(\n            \"Initializing pageserver node {} at '{}' in {:?}\",\n            node_id,\n            self.pg_connection_config.raw_address(),\n            datadir\n        );\n        io::stdout().flush()?;\n\n        // If the config file we got as a CLI argument includes the `availability_zone`\n        // config, then use that to populate the `metadata.json` file for the pageserver.\n        // In production the deployment orchestrator does this for us.\n        let az_id = conf\n            .other\n            .get(\"availability_zone\")\n            .map(|toml| {\n                let az_str = toml.to_string();\n                // Trim the (\") chars from the toml representation\n                if az_str.starts_with('\"') && az_str.ends_with('\"') {\n                    az_str[1..az_str.len() - 1].to_string()\n                } else {\n                    az_str\n                }\n            })\n            .unwrap_or(\"local\".to_string());\n\n        let config = self\n            .pageserver_init_make_toml(conf)\n            .context(\"make pageserver toml\")?;\n        let config_file_path = datadir.join(\"pageserver.toml\");\n        let mut config_file = std::fs::OpenOptions::new()\n            .create_new(true)\n            .write(true)\n            .open(&config_file_path)\n            .with_context(|| format!(\"open pageserver toml for write: {config_file_path:?}\"))?;\n        config_file\n            .write_all(config.to_string().as_bytes())\n            .context(\"write pageserver toml\")?;\n        drop(config_file);\n\n        let identity_file_path = datadir.join(\"identity.toml\");\n        let mut identity_file = std::fs::OpenOptions::new()\n            .create_new(true)\n            .write(true)\n            .open(&identity_file_path)\n            .with_context(|| format!(\"open identity toml for write: {identity_file_path:?}\"))?;\n        let identity_toml = self.pageserver_make_identity_toml(node_id);\n        identity_file\n            .write_all(identity_toml.to_string().as_bytes())\n            .context(\"write identity toml\")?;\n        drop(identity_toml);\n\n        if self.env.generate_local_ssl_certs {\n            self.env.generate_ssl_cert(\n                datadir.join(\"server.crt\").as_path(),\n                datadir.join(\"server.key\").as_path(),\n            )?;\n        }\n\n        // TODO: invoke a TBD config-check command to validate that pageserver will start with the written config\n\n        // Write metadata file, used by pageserver on startup to register itself with\n        // the storage controller\n        let metadata_path = datadir.join(\"metadata.json\");\n\n        let http_host = \"localhost\".to_string();\n        let (_, http_port) =\n            parse_host_port(&self.conf.listen_http_addr).expect(\"Unable to parse listen_http_addr\");\n        let http_port = http_port.unwrap_or(DEFAULT_HTTP_LISTEN_PORT);\n\n        let https_port = match self.conf.listen_https_addr.as_ref() {\n            Some(https_addr) => {\n                let (_https_host, https_port) =\n                    parse_host_port(https_addr).expect(\"Unable to parse listen_https_addr\");\n                Some(https_port.unwrap_or(9899))\n            }\n            None => None,\n        };\n\n        let (mut grpc_host, mut grpc_port) = (None, None);\n        if let Some(grpc_addr) = &self.conf.listen_grpc_addr {\n            let (_, port) = parse_host_port(grpc_addr).expect(\"Unable to parse listen_grpc_addr\");\n            grpc_host = Some(\"localhost\".to_string());\n            grpc_port = Some(port.unwrap_or(DEFAULT_GRPC_LISTEN_PORT));\n        }\n\n        // Intentionally hand-craft JSON: this acts as an implicit format compat test\n        // in case the pageserver-side structure is edited, and reflects the real life\n        // situation: the metadata is written by some other script.\n        std::fs::write(\n            metadata_path,\n            serde_json::to_vec(&pageserver_api::config::NodeMetadata {\n                postgres_host: \"localhost\".to_string(),\n                postgres_port: self.pg_connection_config.port(),\n                grpc_host,\n                grpc_port,\n                http_host,\n                http_port,\n                https_port,\n                other: HashMap::from([(\n                    \"availability_zone_id\".to_string(),\n                    serde_json::json!(az_id),\n                )]),\n            })\n            .unwrap(),\n        )\n        .expect(\"Failed to write metadata file\");\n\n        Ok(())\n    }\n\n    async fn start_node(&self, retry_timeout: &Duration) -> anyhow::Result<()> {\n        // TODO: using a thread here because start_process() is not async but we need to call check_status()\n        let datadir = self.repo_path();\n        println!(\n            \"Starting pageserver node {} at '{}' in {:?}, retrying for {:?}\",\n            self.conf.id,\n            self.pg_connection_config.raw_address(),\n            datadir,\n            retry_timeout\n        );\n        io::stdout().flush().context(\"flush stdout\")?;\n\n        let datadir_path_str = datadir.to_str().with_context(|| {\n            format!(\n                \"Cannot start pageserver node {} in path that has no string representation: {:?}\",\n                self.conf.id, datadir,\n            )\n        })?;\n        let args = vec![\"-D\", datadir_path_str];\n\n        background_process::start_process(\n            \"pageserver\",\n            &datadir,\n            &self.env.pageserver_bin(),\n            args,\n            self.pageserver_env_variables()?,\n            background_process::InitialPidFile::Expect(self.pid_file()),\n            retry_timeout,\n            || async {\n                let st = self.check_status().await;\n                match st {\n                    Ok(()) => Ok(true),\n                    Err(mgmt_api::Error::ReceiveBody(_)) => Ok(false),\n                    Err(e) => Err(anyhow::anyhow!(\"Failed to check node status: {e}\")),\n                }\n            },\n        )\n        .await?;\n\n        Ok(())\n    }\n\n    fn pageserver_env_variables(&self) -> anyhow::Result<Vec<(String, String)>> {\n        // FIXME: why is this tied to pageserver's auth type? Whether or not the safekeeper\n        // needs a token, and how to generate that token, seems independent to whether\n        // the pageserver requires a token in incoming requests.\n        Ok(if self.conf.http_auth_type != AuthType::Trust {\n            // Generate a token to connect from the pageserver to a safekeeper\n            let token = self\n                .env\n                .generate_auth_token(&Claims::new(None, Scope::SafekeeperData))?;\n            vec![(\"NEON_AUTH_TOKEN\".to_owned(), token)]\n        } else {\n            Vec::new()\n        })\n    }\n\n    ///\n    /// Stop the server.\n    ///\n    /// If 'immediate' is true, we use SIGQUIT, killing the process immediately.\n    /// Otherwise we use SIGTERM, triggering a clean shutdown\n    ///\n    /// If the server is not running, returns success\n    ///\n    pub fn stop(&self, immediate: bool) -> anyhow::Result<()> {\n        background_process::stop_process(immediate, \"pageserver\", &self.pid_file())\n    }\n\n    pub async fn check_status(&self) -> mgmt_api::Result<()> {\n        self.http_client.status().await\n    }\n\n    pub async fn tenant_list(&self) -> mgmt_api::Result<Vec<TenantInfo>> {\n        self.http_client.list_tenants().await\n    }\n    pub fn parse_config(mut settings: HashMap<&str, &str>) -> anyhow::Result<models::TenantConfig> {\n        let result = models::TenantConfig {\n            checkpoint_distance: settings\n                .remove(\"checkpoint_distance\")\n                .map(|x| x.parse::<u64>())\n                .transpose()\n                .context(\"Failed to parse 'checkpoint_distance' as an integer\")?,\n            checkpoint_timeout: settings\n                .remove(\"checkpoint_timeout\")\n                .map(humantime::parse_duration)\n                .transpose()\n                .context(\"Failed to parse 'checkpoint_timeout' as duration\")?,\n            compaction_target_size: settings\n                .remove(\"compaction_target_size\")\n                .map(|x| x.parse::<u64>())\n                .transpose()\n                .context(\"Failed to parse 'compaction_target_size' as an integer\")?,\n            compaction_period: settings\n                .remove(\"compaction_period\")\n                .map(humantime::parse_duration)\n                .transpose()\n                .context(\"Failed to parse 'compaction_period' as duration\")?,\n            compaction_threshold: settings\n                .remove(\"compaction_threshold\")\n                .map(|x| x.parse::<usize>())\n                .transpose()\n                .context(\"Failed to parse 'compaction_threshold' as an integer\")?,\n            compaction_upper_limit: settings\n                .remove(\"compaction_upper_limit\")\n                .map(|x| x.parse::<usize>())\n                .transpose()\n                .context(\"Failed to parse 'compaction_upper_limit' as an integer\")?,\n            compaction_algorithm: settings\n                .remove(\"compaction_algorithm\")\n                .map(serde_json::from_str)\n                .transpose()\n                .context(\"Failed to parse 'compaction_algorithm' json\")?,\n            compaction_shard_ancestor: settings\n                .remove(\"compaction_shard_ancestor\")\n                .map(|x| x.parse::<bool>())\n                .transpose()\n                .context(\"Failed to parse 'compaction_shard_ancestor' as a bool\")?,\n            compaction_l0_first: settings\n                .remove(\"compaction_l0_first\")\n                .map(|x| x.parse::<bool>())\n                .transpose()\n                .context(\"Failed to parse 'compaction_l0_first' as a bool\")?,\n            compaction_l0_semaphore: settings\n                .remove(\"compaction_l0_semaphore\")\n                .map(|x| x.parse::<bool>())\n                .transpose()\n                .context(\"Failed to parse 'compaction_l0_semaphore' as a bool\")?,\n            l0_flush_delay_threshold: settings\n                .remove(\"l0_flush_delay_threshold\")\n                .map(|x| x.parse::<usize>())\n                .transpose()\n                .context(\"Failed to parse 'l0_flush_delay_threshold' as an integer\")?,\n            l0_flush_stall_threshold: settings\n                .remove(\"l0_flush_stall_threshold\")\n                .map(|x| x.parse::<usize>())\n                .transpose()\n                .context(\"Failed to parse 'l0_flush_stall_threshold' as an integer\")?,\n            gc_horizon: settings\n                .remove(\"gc_horizon\")\n                .map(|x| x.parse::<u64>())\n                .transpose()\n                .context(\"Failed to parse 'gc_horizon' as an integer\")?,\n            gc_period: settings.remove(\"gc_period\")\n                .map(humantime::parse_duration)\n                .transpose()\n                .context(\"Failed to parse 'gc_period' as duration\")?,\n            image_creation_threshold: settings\n                .remove(\"image_creation_threshold\")\n                .map(|x| x.parse::<usize>())\n                .transpose()\n                .context(\"Failed to parse 'image_creation_threshold' as non zero integer\")?,\n            // HADRON\n            image_layer_force_creation_period: settings\n                .remove(\"image_layer_force_creation_period\")\n                .map(humantime::parse_duration)\n                .transpose()\n                .context(\"Failed to parse 'image_layer_force_creation_period' as duration\")?,\n            image_layer_creation_check_threshold: settings\n                .remove(\"image_layer_creation_check_threshold\")\n                .map(|x| x.parse::<u8>())\n                .transpose()\n                .context(\"Failed to parse 'image_creation_check_threshold' as integer\")?,\n            image_creation_preempt_threshold: settings\n                .remove(\"image_creation_preempt_threshold\")\n                .map(|x| x.parse::<usize>())\n                .transpose()\n                .context(\"Failed to parse 'image_creation_preempt_threshold' as integer\")?,\n            pitr_interval: settings.remove(\"pitr_interval\")\n                .map(humantime::parse_duration)\n                .transpose()\n                .context(\"Failed to parse 'pitr_interval' as duration\")?,\n            walreceiver_connect_timeout: settings\n                .remove(\"walreceiver_connect_timeout\")\n                .map(humantime::parse_duration)\n                .transpose()\n                .context(\"Failed to parse 'walreceiver_connect_timeout' as duration\")?,\n            lagging_wal_timeout: settings\n                .remove(\"lagging_wal_timeout\")\n                .map(humantime::parse_duration)\n                .transpose()\n                .context(\"Failed to parse 'lagging_wal_timeout' as duration\")?,\n            max_lsn_wal_lag: settings\n                .remove(\"max_lsn_wal_lag\")\n                .map(|x| x.parse::<NonZeroU64>())\n                .transpose()\n                .context(\"Failed to parse 'max_lsn_wal_lag' as non zero integer\")?,\n            eviction_policy: settings\n                .remove(\"eviction_policy\")\n                .map(serde_json::from_str)\n                .transpose()\n                .context(\"Failed to parse 'eviction_policy' json\")?,\n            min_resident_size_override: settings\n                .remove(\"min_resident_size_override\")\n                .map(|x| x.parse::<u64>())\n                .transpose()\n                .context(\"Failed to parse 'min_resident_size_override' as integer\")?,\n            evictions_low_residence_duration_metric_threshold: settings\n                .remove(\"evictions_low_residence_duration_metric_threshold\")\n                .map(humantime::parse_duration)\n                .transpose()\n                .context(\"Failed to parse 'evictions_low_residence_duration_metric_threshold' as duration\")?,\n            heatmap_period: settings\n                .remove(\"heatmap_period\")\n                .map(humantime::parse_duration)\n                .transpose()\n                .context(\"Failed to parse 'heatmap_period' as duration\")?,\n            lazy_slru_download: settings\n                .remove(\"lazy_slru_download\")\n                .map(|x| x.parse::<bool>())\n                .transpose()\n                .context(\"Failed to parse 'lazy_slru_download' as bool\")?,\n            timeline_get_throttle: settings\n                .remove(\"timeline_get_throttle\")\n                .map(serde_json::from_str)\n                .transpose()\n                .context(\"parse `timeline_get_throttle` from json\")?,\n            lsn_lease_length: settings.remove(\"lsn_lease_length\")\n                .map(humantime::parse_duration)\n                .transpose()\n                .context(\"Failed to parse 'lsn_lease_length' as duration\")?,\n            lsn_lease_length_for_ts: settings\n                .remove(\"lsn_lease_length_for_ts\")\n                .map(humantime::parse_duration)\n                .transpose()\n                .context(\"Failed to parse 'lsn_lease_length_for_ts' as duration\")?,\n            timeline_offloading: settings\n                .remove(\"timeline_offloading\")\n                .map(|x| x.parse::<bool>())\n                .transpose()\n                .context(\"Failed to parse 'timeline_offloading' as bool\")?,\n            rel_size_v2_enabled: settings\n                .remove(\"rel_size_v2_enabled\")\n                .map(|x| x.parse::<bool>())\n                .transpose()\n                .context(\"Failed to parse 'rel_size_v2_enabled' as bool\")?,\n            gc_compaction_enabled: settings\n                .remove(\"gc_compaction_enabled\")\n                .map(|x| x.parse::<bool>())\n                .transpose()\n                .context(\"Failed to parse 'gc_compaction_enabled' as bool\")?,\n            gc_compaction_verification: settings\n                .remove(\"gc_compaction_verification\")\n                .map(|x| x.parse::<bool>())\n                .transpose()\n                .context(\"Failed to parse 'gc_compaction_verification' as bool\")?,\n            gc_compaction_initial_threshold_kb: settings\n                .remove(\"gc_compaction_initial_threshold_kb\")\n                .map(|x| x.parse::<u64>())\n                .transpose()\n                .context(\"Failed to parse 'gc_compaction_initial_threshold_kb' as integer\")?,\n            gc_compaction_ratio_percent: settings\n                .remove(\"gc_compaction_ratio_percent\")\n                .map(|x| x.parse::<u64>())\n                .transpose()\n                .context(\"Failed to parse 'gc_compaction_ratio_percent' as integer\")?,\n            sampling_ratio: settings\n                .remove(\"sampling_ratio\")\n                .map(serde_json::from_str)\n                .transpose()\n                .context(\"Failed to parse 'sampling_ratio'\")?,\n            relsize_snapshot_cache_capacity: settings\n                .remove(\"relsize snapshot cache capacity\")\n                .map(|x| x.parse::<usize>())\n                .transpose()\n                .context(\"Failed to parse 'relsize_snapshot_cache_capacity' as integer\")?,\n            basebackup_cache_enabled: settings\n                .remove(\"basebackup_cache_enabled\")\n                .map(|x| x.parse::<bool>())\n                .transpose()\n                .context(\"Failed to parse 'basebackup_cache_enabled' as bool\")?,\n        };\n        if !settings.is_empty() {\n            bail!(\"Unrecognized tenant settings: {settings:?}\")\n        } else {\n            Ok(result)\n        }\n    }\n\n    pub async fn tenant_config(\n        &self,\n        tenant_id: TenantId,\n        settings: HashMap<&str, &str>,\n    ) -> anyhow::Result<()> {\n        let config = Self::parse_config(settings)?;\n        self.http_client\n            .set_tenant_config(&models::TenantConfigRequest { tenant_id, config })\n            .await?;\n\n        Ok(())\n    }\n\n    pub async fn timeline_list(\n        &self,\n        tenant_shard_id: &TenantShardId,\n    ) -> anyhow::Result<Vec<TimelineInfo>> {\n        Ok(self.http_client.list_timelines(*tenant_shard_id).await?)\n    }\n\n    /// Import a basebackup prepared using either:\n    /// a) `pg_basebackup -F tar`, or\n    /// b) The `fullbackup` pageserver endpoint\n    ///\n    /// # Arguments\n    /// * `tenant_id` - tenant to import into. Created if not exists\n    /// * `timeline_id` - id to assign to imported timeline\n    /// * `base` - (start lsn of basebackup, path to `base.tar` file)\n    /// * `pg_wal` - if there's any wal to import: (end lsn, path to `pg_wal.tar`)\n    pub async fn timeline_import(\n        &self,\n        tenant_id: TenantId,\n        timeline_id: TimelineId,\n        base: (Lsn, PathBuf),\n        pg_wal: Option<(Lsn, PathBuf)>,\n        pg_version: PgMajorVersion,\n    ) -> anyhow::Result<()> {\n        // Init base reader\n        let (start_lsn, base_tarfile_path) = base;\n        let base_tarfile = tokio::fs::File::open(base_tarfile_path).await?;\n        let base_tarfile =\n            mgmt_api::ReqwestBody::wrap_stream(tokio_util::io::ReaderStream::new(base_tarfile));\n\n        // Init wal reader if necessary\n        let (end_lsn, wal_reader) = if let Some((end_lsn, wal_tarfile_path)) = pg_wal {\n            let wal_tarfile = tokio::fs::File::open(wal_tarfile_path).await?;\n            let wal_reader =\n                mgmt_api::ReqwestBody::wrap_stream(tokio_util::io::ReaderStream::new(wal_tarfile));\n            (end_lsn, Some(wal_reader))\n        } else {\n            (start_lsn, None)\n        };\n\n        // Import base\n        self.http_client\n            .import_basebackup(\n                tenant_id,\n                timeline_id,\n                start_lsn,\n                end_lsn,\n                pg_version,\n                base_tarfile,\n            )\n            .await?;\n\n        // Import wal if necessary\n        if let Some(wal_reader) = wal_reader {\n            self.http_client\n                .import_wal(tenant_id, timeline_id, start_lsn, end_lsn, wal_reader)\n                .await?;\n        }\n\n        Ok(())\n    }\n    pub async fn timeline_info(\n        &self,\n        tenant_shard_id: TenantShardId,\n        timeline_id: TimelineId,\n        force_await_logical_size: mgmt_api::ForceAwaitLogicalSize,\n    ) -> anyhow::Result<TimelineInfo> {\n        let timeline_info = self\n            .http_client\n            .timeline_info(tenant_shard_id, timeline_id, force_await_logical_size)\n            .await?;\n        Ok(timeline_info)\n    }\n}\n"
  },
  {
    "path": "control_plane/src/postgresql_conf.rs",
    "content": "use std::collections::HashMap;\nuse std::fmt;\n\n///\n/// Module for parsing postgresql.conf file.\n///\n/// NOTE: This doesn't implement the full, correct postgresql.conf syntax. Just\n/// enough to extract a few settings we need in Neon, assuming you don't do\n/// funny stuff like include-directives or funny escaping.\nuse once_cell::sync::Lazy;\nuse regex::Regex;\n\n/// In-memory representation of a postgresql.conf file\n#[derive(Default, Debug)]\npub struct PostgresConf {\n    lines: Vec<String>,\n    hash: HashMap<String, String>,\n}\n\nimpl PostgresConf {\n    pub fn new() -> PostgresConf {\n        PostgresConf::default()\n    }\n\n    /// Return the current value of 'option'\n    pub fn get(&self, option: &str) -> Option<&str> {\n        self.hash.get(option).map(|x| x.as_ref())\n    }\n\n    ///\n    /// Note: if you call this multiple times for the same option, the config\n    /// file will a line for each call. It would be nice to have a function\n    /// to change an existing line, but that's a TODO.\n    ///\n    pub fn append(&mut self, option: &str, value: &str) {\n        self.lines\n            .push(format!(\"{}={}\\n\", option, escape_str(value)));\n        self.hash.insert(option.to_string(), value.to_string());\n    }\n\n    /// Append an arbitrary non-setting line to the config file\n    pub fn append_line(&mut self, line: &str) {\n        self.lines.push(line.to_string());\n    }\n}\n\nimpl fmt::Display for PostgresConf {\n    /// Return the whole configuration file as a string\n    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {\n        for line in self.lines.iter() {\n            f.write_str(line)?;\n        }\n        Ok(())\n    }\n}\n\n/// Escape a value for putting in postgresql.conf.\nfn escape_str(s: &str) -> String {\n    // If the string doesn't contain anything that needs quoting or escaping, return it\n    // as it is.\n    //\n    // The first part of the regex, before the '|', matches the INTEGER rule in the\n    // PostgreSQL flex grammar (guc-file.l). It matches plain integers like \"123\" and\n    // \"-123\", and also accepts units like \"10MB\". The second part of the regex matches\n    // the UNQUOTED_STRING rule, and accepts strings that contain a single word, beginning\n    // with a letter. That covers words like \"off\" or \"posix\". Everything else is quoted.\n    //\n    // This regex is a bit more conservative than the rules in guc-file.l, so we quote some\n    // strings that PostgreSQL would accept without quoting, but that's OK.\n\n    static UNQUOTED_RE: Lazy<Regex> =\n        Lazy::new(|| Regex::new(r\"(^[-+]?[0-9]+[a-zA-Z]*$)|(^[a-zA-Z][a-zA-Z0-9]*$)\").unwrap());\n\n    if UNQUOTED_RE.is_match(s) {\n        s.to_string()\n    } else {\n        // Otherwise escape and quote it\n        let s = s\n            .replace('\\\\', \"\\\\\\\\\")\n            .replace('\\n', \"\\\\n\")\n            .replace('\\'', \"''\");\n\n        \"\\'\".to_owned() + &s + \"\\'\"\n    }\n}\n\n#[test]\nfn test_postgresql_conf_escapes() -> anyhow::Result<()> {\n    assert_eq!(escape_str(\"foo bar\"), \"'foo bar'\");\n    // these don't need to be quoted\n    assert_eq!(escape_str(\"foo\"), \"foo\");\n    assert_eq!(escape_str(\"123\"), \"123\");\n    assert_eq!(escape_str(\"+123\"), \"+123\");\n    assert_eq!(escape_str(\"-10\"), \"-10\");\n    assert_eq!(escape_str(\"1foo\"), \"1foo\");\n    assert_eq!(escape_str(\"foo1\"), \"foo1\");\n    assert_eq!(escape_str(\"10MB\"), \"10MB\");\n    assert_eq!(escape_str(\"-10kB\"), \"-10kB\");\n\n    // these need quoting and/or escaping\n    assert_eq!(escape_str(\"foo bar\"), \"'foo bar'\");\n    assert_eq!(escape_str(\"fo'o\"), \"'fo''o'\");\n    assert_eq!(escape_str(\"fo\\no\"), \"'fo\\\\no'\");\n    assert_eq!(escape_str(\"fo\\\\o\"), \"'fo\\\\\\\\o'\");\n    assert_eq!(escape_str(\"10 cats\"), \"'10 cats'\");\n\n    Ok(())\n}\n"
  },
  {
    "path": "control_plane/src/safekeeper.rs",
    "content": "//! Code to manage safekeepers\n//!\n//! In the local test environment, the data for each safekeeper is stored in\n//!\n//! ```text\n//!   .neon/safekeepers/<safekeeper id>\n//! ```\nuse std::error::Error as _;\nuse std::io::Write;\nuse std::path::PathBuf;\nuse std::time::Duration;\nuse std::{io, result};\n\nuse anyhow::Context;\nuse camino::Utf8PathBuf;\nuse postgres_connection::PgConnectionConfig;\nuse safekeeper_api::models::TimelineCreateRequest;\nuse safekeeper_client::mgmt_api;\nuse thiserror::Error;\nuse utils::auth::{Claims, Scope};\nuse utils::id::NodeId;\n\nuse crate::background_process;\nuse crate::local_env::{LocalEnv, SafekeeperConf};\n\n#[derive(Error, Debug)]\npub enum SafekeeperHttpError {\n    #[error(\"request error: {0}{}\", .0.source().map(|e| format!(\": {e}\")).unwrap_or_default())]\n    Transport(#[from] reqwest::Error),\n\n    #[error(\"Error: {0}\")]\n    Response(String),\n}\n\ntype Result<T> = result::Result<T, SafekeeperHttpError>;\n\nfn err_from_client_err(err: mgmt_api::Error) -> SafekeeperHttpError {\n    use mgmt_api::Error::*;\n    match err {\n        ApiError(_, str) => SafekeeperHttpError::Response(str),\n        Cancelled => SafekeeperHttpError::Response(\"Cancelled\".to_owned()),\n        ReceiveBody(err) => SafekeeperHttpError::Transport(err),\n        ReceiveErrorBody(err) => SafekeeperHttpError::Response(err),\n        Timeout(str) => SafekeeperHttpError::Response(format!(\"timeout: {str}\")),\n    }\n}\n\n//\n// Control routines for safekeeper.\n//\n// Used in CLI and tests.\n//\n#[derive(Debug)]\npub struct SafekeeperNode {\n    pub id: NodeId,\n\n    pub conf: SafekeeperConf,\n\n    pub pg_connection_config: PgConnectionConfig,\n    pub env: LocalEnv,\n    pub http_client: mgmt_api::Client,\n    pub listen_addr: String,\n}\n\nimpl SafekeeperNode {\n    pub fn from_env(env: &LocalEnv, conf: &SafekeeperConf) -> SafekeeperNode {\n        let listen_addr = if let Some(ref listen_addr) = conf.listen_addr {\n            listen_addr.clone()\n        } else {\n            \"127.0.0.1\".to_string()\n        };\n        let jwt = None;\n        let http_base_url = format!(\"http://{}:{}\", listen_addr, conf.http_port);\n        SafekeeperNode {\n            id: conf.id,\n            conf: conf.clone(),\n            pg_connection_config: Self::safekeeper_connection_config(&listen_addr, conf.pg_port),\n            env: env.clone(),\n            http_client: mgmt_api::Client::new(env.create_http_client(), http_base_url, jwt),\n            listen_addr,\n        }\n    }\n\n    /// Construct libpq connection string for connecting to this safekeeper.\n    fn safekeeper_connection_config(addr: &str, port: u16) -> PgConnectionConfig {\n        PgConnectionConfig::new_host_port(url::Host::parse(addr).unwrap(), port)\n    }\n\n    pub fn datadir_path_by_id(env: &LocalEnv, sk_id: NodeId) -> PathBuf {\n        env.safekeeper_data_dir(&format!(\"sk{sk_id}\"))\n    }\n\n    pub fn datadir_path(&self) -> PathBuf {\n        SafekeeperNode::datadir_path_by_id(&self.env, self.id)\n    }\n\n    pub fn pid_file(&self) -> Utf8PathBuf {\n        Utf8PathBuf::from_path_buf(self.datadir_path().join(\"safekeeper.pid\"))\n            .expect(\"non-Unicode path\")\n    }\n\n    /// Initializes a safekeeper node by creating all necessary files,\n    /// e.g. SSL certificates and JWT token file.\n    pub fn initialize(&self) -> anyhow::Result<()> {\n        if self.env.generate_local_ssl_certs {\n            self.env.generate_ssl_cert(\n                &self.datadir_path().join(\"server.crt\"),\n                &self.datadir_path().join(\"server.key\"),\n            )?;\n        }\n\n        // Generate a token file for authentication with other safekeepers\n        if self.conf.auth_enabled {\n            let token = self\n                .env\n                .generate_auth_token(&Claims::new(None, Scope::SafekeeperData))?;\n\n            let token_path = self.datadir_path().join(\"peer_jwt_token\");\n            std::fs::write(token_path, token)?;\n        }\n\n        Ok(())\n    }\n\n    pub async fn start(\n        &self,\n        extra_opts: &[String],\n        retry_timeout: &Duration,\n    ) -> anyhow::Result<()> {\n        println!(\n            \"Starting safekeeper at '{}' in '{}', retrying for {:?}\",\n            self.pg_connection_config.raw_address(),\n            self.datadir_path().display(),\n            retry_timeout,\n        );\n        io::stdout().flush().unwrap();\n\n        let listen_pg = format!(\"{}:{}\", self.listen_addr, self.conf.pg_port);\n        let listen_http = format!(\"{}:{}\", self.listen_addr, self.conf.http_port);\n        let id = self.id;\n        let datadir = self.datadir_path();\n\n        let id_string = id.to_string();\n        // TODO: add availability_zone to the config.\n        // Right now we just specify any value here and use it to check metrics in tests.\n        let availability_zone = format!(\"sk-{id_string}\");\n\n        let mut args = vec![\n            \"-D\".to_owned(),\n            datadir\n                .to_str()\n                .with_context(|| {\n                    format!(\"Datadir path {datadir:?} cannot be represented as a unicode string\")\n                })?\n                .to_owned(),\n            \"--id\".to_owned(),\n            id_string,\n            \"--listen-pg\".to_owned(),\n            listen_pg,\n            \"--listen-http\".to_owned(),\n            listen_http,\n            \"--availability-zone\".to_owned(),\n            availability_zone,\n        ];\n        if let Some(pg_tenant_only_port) = self.conf.pg_tenant_only_port {\n            let listen_pg_tenant_only = format!(\"{}:{}\", self.listen_addr, pg_tenant_only_port);\n            args.extend([\"--listen-pg-tenant-only\".to_owned(), listen_pg_tenant_only]);\n        }\n        if !self.conf.sync {\n            args.push(\"--no-sync\".to_owned());\n        }\n\n        let broker_endpoint = format!(\"{}\", self.env.broker.client_url());\n        args.extend([\"--broker-endpoint\".to_owned(), broker_endpoint]);\n\n        let mut backup_threads = String::new();\n        if let Some(threads) = self.conf.backup_threads {\n            backup_threads = threads.to_string();\n            args.extend([\"--backup-threads\".to_owned(), backup_threads]);\n        } else {\n            drop(backup_threads);\n        }\n\n        if let Some(ref remote_storage) = self.conf.remote_storage {\n            args.extend([\"--remote-storage\".to_owned(), remote_storage.clone()]);\n        }\n\n        let key_path = self.env.base_data_dir.join(\"auth_public_key.pem\");\n        if self.conf.auth_enabled {\n            let key_path_string = key_path\n                .to_str()\n                .with_context(|| {\n                    format!(\"Key path {key_path:?} cannot be represented as a unicode string\")\n                })?\n                .to_owned();\n            args.extend([\n                \"--pg-auth-public-key-path\".to_owned(),\n                key_path_string.clone(),\n            ]);\n            args.extend([\n                \"--pg-tenant-only-auth-public-key-path\".to_owned(),\n                key_path_string.clone(),\n            ]);\n            args.extend([\n                \"--http-auth-public-key-path\".to_owned(),\n                key_path_string.clone(),\n            ]);\n        }\n\n        if let Some(https_port) = self.conf.https_port {\n            args.extend([\n                \"--listen-https\".to_owned(),\n                format!(\"{}:{}\", self.listen_addr, https_port),\n            ]);\n        }\n        if let Some(ssl_ca_file) = self.env.ssl_ca_cert_path() {\n            args.push(format!(\"--ssl-ca-file={}\", ssl_ca_file.to_str().unwrap()));\n        }\n\n        if self.conf.auth_enabled {\n            let token_path = self.datadir_path().join(\"peer_jwt_token\");\n            let token_path_str = token_path\n                .to_str()\n                .with_context(|| {\n                    format!(\"Token path {token_path:?} cannot be represented as a unicode string\")\n                })?\n                .to_owned();\n            args.extend([\"--auth-token-path\".to_owned(), token_path_str]);\n        }\n\n        args.extend_from_slice(extra_opts);\n\n        let env_variables = Vec::new();\n        background_process::start_process(\n            &format!(\"safekeeper-{id}\"),\n            &datadir,\n            &self.env.safekeeper_bin(),\n            &args,\n            env_variables,\n            background_process::InitialPidFile::Expect(self.pid_file()),\n            retry_timeout,\n            || async {\n                match self.check_status().await {\n                    Ok(()) => Ok(true),\n                    Err(SafekeeperHttpError::Transport(_)) => Ok(false),\n                    Err(e) => Err(anyhow::anyhow!(\"Failed to check node status: {e}\")),\n                }\n            },\n        )\n        .await\n    }\n\n    ///\n    /// Stop the server.\n    ///\n    /// If 'immediate' is true, we use SIGQUIT, killing the process immediately.\n    /// Otherwise we use SIGTERM, triggering a clean shutdown\n    ///\n    /// If the server is not running, returns success\n    ///\n    pub fn stop(&self, immediate: bool) -> anyhow::Result<()> {\n        background_process::stop_process(\n            immediate,\n            &format!(\"safekeeper {}\", self.id),\n            &self.pid_file(),\n        )\n    }\n\n    pub async fn check_status(&self) -> Result<()> {\n        self.http_client\n            .status()\n            .await\n            .map_err(err_from_client_err)?;\n        Ok(())\n    }\n\n    pub async fn create_timeline(&self, req: &TimelineCreateRequest) -> Result<()> {\n        self.http_client\n            .create_timeline(req)\n            .await\n            .map_err(err_from_client_err)?;\n        Ok(())\n    }\n}\n"
  },
  {
    "path": "control_plane/src/storage_controller.rs",
    "content": "use std::ffi::OsStr;\nuse std::fs;\nuse std::path::PathBuf;\nuse std::process::ExitStatus;\nuse std::str::FromStr;\nuse std::sync::OnceLock;\nuse std::time::{Duration, Instant};\n\nuse crate::background_process;\nuse crate::local_env::{LocalEnv, NeonStorageControllerConf};\nuse camino::{Utf8Path, Utf8PathBuf};\nuse hyper0::Uri;\nuse nix::unistd::Pid;\nuse pageserver_api::controller_api::{\n    NodeConfigureRequest, NodeDescribeResponse, NodeRegisterRequest,\n    SafekeeperSchedulingPolicyRequest, SkSchedulingPolicy, TenantCreateRequest,\n    TenantCreateResponse, TenantLocateResponse,\n};\nuse pageserver_api::models::{\n    TenantConfig, TenantConfigRequest, TimelineCreateRequest, TimelineInfo,\n};\nuse pageserver_api::shard::TenantShardId;\nuse pageserver_client::mgmt_api::ResponseErrorMessageExt;\nuse pem::Pem;\nuse postgres_backend::AuthType;\nuse reqwest::{Method, Response};\nuse safekeeper_api::PgMajorVersion;\nuse serde::de::DeserializeOwned;\nuse serde::{Deserialize, Serialize};\nuse tokio::process::Command;\nuse tracing::instrument;\nuse url::Url;\nuse utils::auth::{Claims, Scope, encode_from_key_file};\nuse utils::id::{NodeId, TenantId};\nuse whoami::username;\n\npub struct StorageController {\n    env: LocalEnv,\n    private_key: Option<Pem>,\n    public_key: Option<Pem>,\n    client: reqwest::Client,\n    config: NeonStorageControllerConf,\n\n    // The listen port is learned when starting the storage controller,\n    // hence the use of OnceLock to init it at the right time.\n    listen_port: OnceLock<u16>,\n}\n\nconst COMMAND: &str = \"storage_controller\";\n\nconst STORAGE_CONTROLLER_POSTGRES_VERSION: PgMajorVersion = PgMajorVersion::PG16;\n\nconst DB_NAME: &str = \"storage_controller\";\n\npub struct NeonStorageControllerStartArgs {\n    pub instance_id: u8,\n    pub base_port: Option<u16>,\n    pub start_timeout: humantime::Duration,\n    pub handle_ps_local_disk_loss: Option<bool>,\n}\n\nimpl NeonStorageControllerStartArgs {\n    pub fn with_default_instance_id(start_timeout: humantime::Duration) -> Self {\n        Self {\n            instance_id: 1,\n            base_port: None,\n            start_timeout,\n            handle_ps_local_disk_loss: None,\n        }\n    }\n}\n\npub struct NeonStorageControllerStopArgs {\n    pub instance_id: u8,\n    pub immediate: bool,\n}\n\nimpl NeonStorageControllerStopArgs {\n    pub fn with_default_instance_id(immediate: bool) -> Self {\n        Self {\n            instance_id: 1,\n            immediate,\n        }\n    }\n}\n\n#[derive(Serialize, Deserialize)]\npub struct AttachHookRequest {\n    pub tenant_shard_id: TenantShardId,\n    pub node_id: Option<NodeId>,\n    pub generation_override: Option<i32>, // only new tenants\n    pub config: Option<TenantConfig>,     // only new tenants\n}\n\n#[derive(Serialize, Deserialize)]\npub struct AttachHookResponse {\n    #[serde(rename = \"gen\")]\n    pub generation: Option<u32>,\n}\n\n#[derive(Serialize, Deserialize)]\npub struct InspectRequest {\n    pub tenant_shard_id: TenantShardId,\n}\n\n#[derive(Serialize, Deserialize)]\npub struct InspectResponse {\n    pub attachment: Option<(u32, NodeId)>,\n}\n\nimpl StorageController {\n    pub fn from_env(env: &LocalEnv) -> Self {\n        // Assume all pageservers have symmetric auth configuration: this service\n        // expects to use one JWT token to talk to all of them.\n        let ps_conf = env\n            .pageservers\n            .first()\n            .expect(\"Config is validated to contain at least one pageserver\");\n        let (private_key, public_key) = match ps_conf.http_auth_type {\n            AuthType::Trust => (None, None),\n            AuthType::NeonJWT => {\n                let private_key_path = env.get_private_key_path();\n                let private_key =\n                    pem::parse(fs::read(private_key_path).expect(\"failed to read private key\"))\n                        .expect(\"failed to parse PEM file\");\n\n                // If pageserver auth is enabled, this implicitly enables auth for this service,\n                // using the same credentials.\n                let public_key_path =\n                    camino::Utf8PathBuf::try_from(env.base_data_dir.join(\"auth_public_key.pem\"))\n                        .unwrap();\n\n                // This service takes keys as a string rather than as a path to a file/dir: read the key into memory.\n                let public_key = if std::fs::metadata(&public_key_path)\n                    .expect(\"Can't stat public key\")\n                    .is_dir()\n                {\n                    // Our config may specify a directory: this is for the pageserver's ability to handle multiple\n                    // keys.  We only use one key at a time, so, arbitrarily load the first one in the directory.\n                    let mut dir =\n                        std::fs::read_dir(&public_key_path).expect(\"Can't readdir public key path\");\n                    let dent = dir\n                        .next()\n                        .expect(\"Empty key dir\")\n                        .expect(\"Error reading key dir\");\n\n                    pem::parse(std::fs::read_to_string(dent.path()).expect(\"Can't read public key\"))\n                        .expect(\"Failed to parse PEM file\")\n                } else {\n                    pem::parse(\n                        std::fs::read_to_string(&public_key_path).expect(\"Can't read public key\"),\n                    )\n                    .expect(\"Failed to parse PEM file\")\n                };\n                (Some(private_key), Some(public_key))\n            }\n        };\n\n        Self {\n            env: env.clone(),\n            private_key,\n            public_key,\n            client: env.create_http_client(),\n            config: env.storage_controller.clone(),\n            listen_port: OnceLock::default(),\n        }\n    }\n\n    fn storage_controller_instance_dir(&self, instance_id: u8) -> PathBuf {\n        self.env\n            .base_data_dir\n            .join(format!(\"storage_controller_{instance_id}\"))\n    }\n\n    fn pid_file(&self, instance_id: u8) -> Utf8PathBuf {\n        Utf8PathBuf::from_path_buf(\n            self.storage_controller_instance_dir(instance_id)\n                .join(\"storage_controller.pid\"),\n        )\n        .expect(\"non-Unicode path\")\n    }\n\n    /// Find the directory containing postgres subdirectories, such `bin` and `lib`\n    ///\n    /// This usually uses STORAGE_CONTROLLER_POSTGRES_VERSION of postgres, but will fall back\n    /// to other versions if that one isn't found.  Some automated tests create circumstances\n    /// where only one version is available in pg_distrib_dir, such as `test_remote_extensions`.\n    async fn get_pg_dir(&self, dir_name: &str) -> anyhow::Result<Utf8PathBuf> {\n        const PREFER_VERSIONS: [PgMajorVersion; 5] = [\n            STORAGE_CONTROLLER_POSTGRES_VERSION,\n            PgMajorVersion::PG16,\n            PgMajorVersion::PG15,\n            PgMajorVersion::PG14,\n            PgMajorVersion::PG17,\n        ];\n\n        for v in PREFER_VERSIONS {\n            let path = Utf8PathBuf::from_path_buf(self.env.pg_dir(v, dir_name)?).unwrap();\n            if tokio::fs::try_exists(&path).await? {\n                return Ok(path);\n            }\n        }\n\n        // Fall through\n        anyhow::bail!(\n            \"Postgres directory '{}' not found in {}\",\n            dir_name,\n            self.env.pg_distrib_dir.display(),\n        );\n    }\n\n    pub async fn get_pg_bin_dir(&self) -> anyhow::Result<Utf8PathBuf> {\n        self.get_pg_dir(\"bin\").await\n    }\n\n    pub async fn get_pg_lib_dir(&self) -> anyhow::Result<Utf8PathBuf> {\n        self.get_pg_dir(\"lib\").await\n    }\n\n    /// Readiness check for our postgres process\n    async fn pg_isready(&self, pg_bin_dir: &Utf8Path, postgres_port: u16) -> anyhow::Result<bool> {\n        let bin_path = pg_bin_dir.join(\"pg_isready\");\n        let args = [\n            \"-h\",\n            \"localhost\",\n            \"-U\",\n            &username(),\n            \"-d\",\n            DB_NAME,\n            \"-p\",\n            &format!(\"{postgres_port}\"),\n        ];\n        let pg_lib_dir = self.get_pg_lib_dir().await.unwrap();\n        let envs = [\n            (\"LD_LIBRARY_PATH\".to_owned(), pg_lib_dir.to_string()),\n            (\"DYLD_LIBRARY_PATH\".to_owned(), pg_lib_dir.to_string()),\n        ];\n        let exitcode = Command::new(bin_path)\n            .args(args)\n            .envs(envs)\n            .spawn()?\n            .wait()\n            .await?;\n\n        Ok(exitcode.success())\n    }\n\n    /// Create our database if it doesn't exist\n    ///\n    /// This function is equivalent to the `diesel setup` command in the diesel CLI.  We implement\n    /// the same steps by hand to avoid imposing a dependency on installing diesel-cli for developers\n    /// who just want to run `cargo neon_local` without knowing about diesel.\n    ///\n    /// Returns the database url\n    pub async fn setup_database(&self, postgres_port: u16) -> anyhow::Result<String> {\n        let database_url = format!(\n            \"postgresql://{}@localhost:{}/{DB_NAME}\",\n            &username(),\n            postgres_port\n        );\n\n        let pg_bin_dir = self.get_pg_bin_dir().await?;\n        let createdb_path = pg_bin_dir.join(\"createdb\");\n        let pg_lib_dir = self.get_pg_lib_dir().await.unwrap();\n        let envs = [\n            (\"LD_LIBRARY_PATH\".to_owned(), pg_lib_dir.to_string()),\n            (\"DYLD_LIBRARY_PATH\".to_owned(), pg_lib_dir.to_string()),\n        ];\n        let output = Command::new(&createdb_path)\n            .args([\n                \"-h\",\n                \"localhost\",\n                \"-p\",\n                &format!(\"{postgres_port}\"),\n                \"-U\",\n                &username(),\n                \"-O\",\n                &username(),\n                DB_NAME,\n            ])\n            .envs(envs)\n            .output()\n            .await\n            .expect(\"Failed to spawn createdb\");\n\n        if !output.status.success() {\n            let stderr = String::from_utf8(output.stderr).expect(\"Non-UTF8 output from createdb\");\n            if stderr.contains(\"already exists\") {\n                tracing::info!(\"Database {DB_NAME} already exists\");\n            } else {\n                anyhow::bail!(\"createdb failed with status {}: {stderr}\", output.status);\n            }\n        }\n\n        Ok(database_url)\n    }\n\n    pub async fn connect_to_database(\n        &self,\n        postgres_port: u16,\n    ) -> anyhow::Result<(\n        tokio_postgres::Client,\n        tokio_postgres::Connection<tokio_postgres::Socket, tokio_postgres::tls::NoTlsStream>,\n    )> {\n        tokio_postgres::Config::new()\n            .host(\"localhost\")\n            .port(postgres_port)\n            // The user is the ambient operating system user name.\n            // That is an impurity which we want to fix in => TODO https://github.com/neondatabase/neon/issues/8400\n            //\n            // Until we get there, use the ambient operating system user name.\n            // Recent tokio-postgres versions default to this if the user isn't specified.\n            // But tokio-postgres fork doesn't have this upstream commit:\n            // https://github.com/sfackler/rust-postgres/commit/cb609be758f3fb5af537f04b584a2ee0cebd5e79\n            // => we should rebase our fork => TODO https://github.com/neondatabase/neon/issues/8399\n            .user(&username())\n            .dbname(DB_NAME)\n            .connect(tokio_postgres::NoTls)\n            .await\n            .map_err(anyhow::Error::new)\n    }\n\n    /// Wrapper for the pg_ctl binary, which we spawn as a short-lived subprocess when starting and stopping postgres\n    async fn pg_ctl<I, S>(&self, args: I) -> ExitStatus\n    where\n        I: IntoIterator<Item = S>,\n        S: AsRef<OsStr>,\n    {\n        let pg_bin_dir = self.get_pg_bin_dir().await.unwrap();\n        let bin_path = pg_bin_dir.join(\"pg_ctl\");\n\n        let pg_lib_dir = self.get_pg_lib_dir().await.unwrap();\n        let envs = [\n            (\"LD_LIBRARY_PATH\".to_owned(), pg_lib_dir.to_string()),\n            (\"DYLD_LIBRARY_PATH\".to_owned(), pg_lib_dir.to_string()),\n        ];\n\n        Command::new(bin_path)\n            .args(args)\n            .envs(envs)\n            .spawn()\n            .expect(\"Failed to spawn pg_ctl, binary_missing?\")\n            .wait()\n            .await\n            .expect(\"Failed to wait for pg_ctl termination\")\n    }\n\n    pub async fn start(&self, start_args: NeonStorageControllerStartArgs) -> anyhow::Result<()> {\n        let instance_dir = self.storage_controller_instance_dir(start_args.instance_id);\n        if let Err(err) = tokio::fs::create_dir(&instance_dir).await {\n            if err.kind() != std::io::ErrorKind::AlreadyExists {\n                panic!(\"Failed to create instance dir {instance_dir:?}\");\n            }\n        }\n\n        if self.env.generate_local_ssl_certs {\n            self.env.generate_ssl_cert(\n                &instance_dir.join(\"server.crt\"),\n                &instance_dir.join(\"server.key\"),\n            )?;\n        }\n\n        let listen_url = &self.env.control_plane_api;\n\n        let scheme = listen_url.scheme();\n        let host = listen_url.host_str().unwrap();\n\n        let (listen_port, postgres_port) = if let Some(base_port) = start_args.base_port {\n            (\n                base_port,\n                self.config\n                    .database_url\n                    .expect(\"--base-port requires NeonStorageControllerConf::database_url\")\n                    .port(),\n            )\n        } else {\n            let port = listen_url.port().unwrap();\n            (port, port + 1)\n        };\n\n        self.listen_port\n            .set(listen_port)\n            .expect(\"StorageController::listen_port is only set here\");\n\n        // Do we remove the pid file on stop?\n        let pg_started = self.is_postgres_running().await?;\n        let pg_lib_dir = self.get_pg_lib_dir().await?;\n\n        if !pg_started {\n            // Start a vanilla Postgres process used by the storage controller for persistence.\n            let pg_data_path = Utf8PathBuf::from_path_buf(self.env.base_data_dir.clone())\n                .unwrap()\n                .join(\"storage_controller_db\");\n            let pg_bin_dir = self.get_pg_bin_dir().await?;\n            let pg_log_path = pg_data_path.join(\"postgres.log\");\n\n            if !tokio::fs::try_exists(&pg_data_path).await? {\n                let initdb_args = [\n                    \"--pgdata\",\n                    pg_data_path.as_ref(),\n                    \"--username\",\n                    &username(),\n                    \"--no-sync\",\n                    \"--no-instructions\",\n                ];\n                tracing::info!(\n                    \"Initializing storage controller database with args: {:?}\",\n                    initdb_args\n                );\n\n                // Initialize empty database\n                let initdb_path = pg_bin_dir.join(\"initdb\");\n                let mut child = Command::new(&initdb_path)\n                    .envs(vec![\n                        (\"LD_LIBRARY_PATH\".to_owned(), pg_lib_dir.to_string()),\n                        (\"DYLD_LIBRARY_PATH\".to_owned(), pg_lib_dir.to_string()),\n                    ])\n                    .args(initdb_args)\n                    .spawn()\n                    .expect(\"Failed to spawn initdb\");\n                let status = child.wait().await?;\n                if !status.success() {\n                    anyhow::bail!(\"initdb failed with status {status}\");\n                }\n            };\n\n            // Write a minimal config file:\n            // - Specify the port, since this is chosen dynamically\n            // - Switch off fsync, since we're running on lightweight test environments and when e.g. scale testing\n            //   the storage controller we don't want a slow local disk to interfere with that.\n            //\n            // NB: it's important that we rewrite this file on each start command so we propagate changes\n            // from `LocalEnv`'s config file (`.neon/config`).\n            tokio::fs::write(\n                &pg_data_path.join(\"postgresql.conf\"),\n                format!(\"port = {postgres_port}\\nfsync=off\\n\"),\n            )\n            .await?;\n\n            println!(\"Starting storage controller database...\");\n            let db_start_args = [\n                \"-w\",\n                \"-D\",\n                pg_data_path.as_ref(),\n                \"-l\",\n                pg_log_path.as_ref(),\n                \"-U\",\n                &username(),\n                \"start\",\n            ];\n            tracing::info!(\n                \"Starting storage controller database with args: {:?}\",\n                db_start_args\n            );\n\n            let db_start_status = self.pg_ctl(db_start_args).await;\n            let start_timeout: Duration = start_args.start_timeout.into();\n            let db_start_deadline = Instant::now() + start_timeout;\n            if !db_start_status.success() {\n                return Err(anyhow::anyhow!(\n                    \"Failed to start postgres {}\",\n                    db_start_status.code().unwrap()\n                ));\n            }\n\n            loop {\n                if Instant::now() > db_start_deadline {\n                    return Err(anyhow::anyhow!(\"Timed out waiting for postgres to start\"));\n                }\n\n                match self.pg_isready(&pg_bin_dir, postgres_port).await {\n                    Ok(true) => {\n                        tracing::info!(\"storage controller postgres is now ready\");\n                        break;\n                    }\n                    Ok(false) => {\n                        tokio::time::sleep(Duration::from_millis(100)).await;\n                    }\n                    Err(e) => {\n                        tracing::warn!(\"Failed to check postgres status: {e}\")\n                    }\n                }\n            }\n\n            self.setup_database(postgres_port).await?;\n        }\n\n        let database_url = format!(\"postgresql://localhost:{postgres_port}/{DB_NAME}\");\n\n        // We support running a startup SQL script to fiddle with the database before we launch storcon.\n        // This is used by the test suite.\n        let startup_script_path = self\n            .env\n            .base_data_dir\n            .join(\"storage_controller_db.startup.sql\");\n        let startup_script = match tokio::fs::read_to_string(&startup_script_path).await {\n            Ok(script) => {\n                tokio::fs::remove_file(startup_script_path).await?;\n                script\n            }\n            Err(e) => {\n                if e.kind() == std::io::ErrorKind::NotFound {\n                    // always run some startup script so that this code path doesn't bit rot\n                    \"BEGIN; COMMIT;\".to_string()\n                } else {\n                    anyhow::bail!(\"Failed to read startup script: {e}\")\n                }\n            }\n        };\n        let (mut client, conn) = self.connect_to_database(postgres_port).await?;\n        let conn = tokio::spawn(conn);\n        let tx = client.build_transaction();\n        let tx = tx.start().await?;\n        tx.batch_execute(&startup_script).await?;\n        tx.commit().await?;\n        drop(client);\n        conn.await??;\n\n        let addr = format!(\"{host}:{listen_port}\");\n        let address_for_peers = Uri::builder()\n            .scheme(scheme)\n            .authority(addr.clone())\n            .path_and_query(\"\")\n            .build()\n            .unwrap();\n\n        let mut args = vec![\n            \"--dev\",\n            \"--database-url\",\n            &database_url,\n            \"--max-offline-interval\",\n            &humantime::Duration::from(self.config.max_offline).to_string(),\n            \"--max-warming-up-interval\",\n            &humantime::Duration::from(self.config.max_warming_up).to_string(),\n            \"--heartbeat-interval\",\n            &humantime::Duration::from(self.config.heartbeat_interval).to_string(),\n            \"--address-for-peers\",\n            &address_for_peers.to_string(),\n        ]\n        .into_iter()\n        .map(|s| s.to_string())\n        .collect::<Vec<_>>();\n\n        match scheme {\n            \"http\" => args.extend([\"--listen\".to_string(), addr]),\n            \"https\" => args.extend([\"--listen-https\".to_string(), addr]),\n            _ => {\n                panic!(\"Unexpected url scheme in control_plane_api: {scheme}\");\n            }\n        }\n\n        if self.config.start_as_candidate {\n            args.push(\"--start-as-candidate\".to_string());\n        }\n\n        if self.config.use_https_pageserver_api {\n            args.push(\"--use-https-pageserver-api\".to_string());\n        }\n\n        if self.config.use_https_safekeeper_api {\n            args.push(\"--use-https-safekeeper-api\".to_string());\n        }\n\n        if self.config.use_local_compute_notifications {\n            args.push(\"--use-local-compute-notifications\".to_string());\n        }\n\n        if let Some(value) = self.config.kick_secondary_downloads {\n            args.push(format!(\"--kick-secondary-downloads={value}\"));\n        }\n\n        if let Some(ssl_ca_file) = self.env.ssl_ca_cert_path() {\n            args.push(format!(\"--ssl-ca-file={}\", ssl_ca_file.to_str().unwrap()));\n        }\n\n        if let Some(private_key) = &self.private_key {\n            let claims = Claims::new(None, Scope::PageServerApi);\n            let jwt_token =\n                encode_from_key_file(&claims, private_key).expect(\"failed to generate jwt token\");\n            args.push(format!(\"--jwt-token={jwt_token}\"));\n\n            let peer_claims = Claims::new(None, Scope::Admin);\n            let peer_jwt_token = encode_from_key_file(&peer_claims, private_key)\n                .expect(\"failed to generate jwt token\");\n            args.push(format!(\"--peer-jwt-token={peer_jwt_token}\"));\n\n            let claims = Claims::new(None, Scope::SafekeeperData);\n            let jwt_token =\n                encode_from_key_file(&claims, private_key).expect(\"failed to generate jwt token\");\n            args.push(format!(\"--safekeeper-jwt-token={jwt_token}\"));\n        }\n\n        if let Some(public_key) = &self.public_key {\n            args.push(format!(\"--public-key=\\\"{public_key}\\\"\"));\n        }\n\n        if let Some(control_plane_hooks_api) = &self.env.control_plane_hooks_api {\n            args.push(format!(\"--control-plane-url={control_plane_hooks_api}\"));\n        }\n\n        if let Some(split_threshold) = self.config.split_threshold.as_ref() {\n            args.push(format!(\"--split-threshold={split_threshold}\"))\n        }\n\n        if let Some(max_split_shards) = self.config.max_split_shards.as_ref() {\n            args.push(format!(\"--max-split-shards={max_split_shards}\"))\n        }\n\n        if let Some(initial_split_threshold) = self.config.initial_split_threshold.as_ref() {\n            args.push(format!(\n                \"--initial-split-threshold={initial_split_threshold}\"\n            ))\n        }\n\n        if let Some(initial_split_shards) = self.config.initial_split_shards.as_ref() {\n            args.push(format!(\"--initial-split-shards={initial_split_shards}\"))\n        }\n\n        if let Some(lag) = self.config.max_secondary_lag_bytes.as_ref() {\n            args.push(format!(\"--max-secondary-lag-bytes={lag}\"))\n        }\n\n        if let Some(threshold) = self.config.long_reconcile_threshold {\n            args.push(format!(\n                \"--long-reconcile-threshold={}\",\n                humantime::Duration::from(threshold)\n            ))\n        }\n\n        args.push(format!(\n            \"--neon-local-repo-dir={}\",\n            self.env.base_data_dir.display()\n        ));\n\n        if self.env.safekeepers.iter().any(|sk| sk.auth_enabled) && self.private_key.is_none() {\n            anyhow::bail!(\"Safekeeper set up for auth but no private key specified\");\n        }\n\n        if self.config.timelines_onto_safekeepers {\n            args.push(\"--timelines-onto-safekeepers\".to_string());\n        }\n\n        // neon_local is used in test environments where we often have less than 3 safekeepers.\n        if self.config.timeline_safekeeper_count.is_some() || self.env.safekeepers.len() < 3 {\n            let sk_cnt = self\n                .config\n                .timeline_safekeeper_count\n                .unwrap_or(self.env.safekeepers.len());\n\n            args.push(format!(\"--timeline-safekeeper-count={sk_cnt}\"));\n        }\n\n        if let Some(duration) = self.config.shard_split_request_timeout {\n            args.push(format!(\n                \"--shard-split-request-timeout={}\",\n                humantime::Duration::from(duration)\n            ));\n        }\n\n        let mut envs = vec![\n            (\"LD_LIBRARY_PATH\".to_owned(), pg_lib_dir.to_string()),\n            (\"DYLD_LIBRARY_PATH\".to_owned(), pg_lib_dir.to_string()),\n        ];\n\n        if let Some(posthog_config) = &self.config.posthog_config {\n            envs.push((\n                \"POSTHOG_CONFIG\".to_string(),\n                serde_json::to_string(posthog_config)?,\n            ));\n        }\n\n        println!(\"Starting storage controller at {scheme}://{host}:{listen_port}\");\n\n        if start_args.handle_ps_local_disk_loss.unwrap_or_default() {\n            args.push(\"--handle-ps-local-disk-loss\".to_string());\n        }\n\n        background_process::start_process(\n            COMMAND,\n            &instance_dir,\n            &self.env.storage_controller_bin(),\n            args,\n            envs,\n            background_process::InitialPidFile::Create(self.pid_file(start_args.instance_id)),\n            &start_args.start_timeout,\n            || async {\n                match self.ready().await {\n                    Ok(_) => Ok(true),\n                    Err(_) => Ok(false),\n                }\n            },\n        )\n        .await?;\n\n        if self.config.timelines_onto_safekeepers {\n            self.register_safekeepers().await?;\n        }\n\n        Ok(())\n    }\n\n    pub async fn stop(&self, stop_args: NeonStorageControllerStopArgs) -> anyhow::Result<()> {\n        background_process::stop_process(\n            stop_args.immediate,\n            COMMAND,\n            &self.pid_file(stop_args.instance_id),\n        )?;\n\n        let storcon_instances = self.env.storage_controller_instances().await?;\n        for (instance_id, instanced_dir_path) in storcon_instances {\n            if instance_id == stop_args.instance_id {\n                continue;\n            }\n\n            let pid_file = instanced_dir_path.join(\"storage_controller.pid\");\n            let pid = tokio::fs::read_to_string(&pid_file)\n                .await\n                .map_err(|err| {\n                    anyhow::anyhow!(\"Failed to read storcon pid file at {pid_file:?}: {err}\")\n                })?\n                .parse::<i32>()\n                .expect(\"pid is valid i32\");\n\n            let other_proc_alive = !background_process::process_has_stopped(Pid::from_raw(pid))?;\n            if other_proc_alive {\n                // There is another storage controller instance running, so we return\n                // and leave the database running.\n                return Ok(());\n            }\n        }\n\n        let pg_data_path = self.env.base_data_dir.join(\"storage_controller_db\");\n\n        println!(\"Stopping storage controller database...\");\n        let pg_stop_args = [\"-D\", &pg_data_path.to_string_lossy(), \"stop\"];\n        let stop_status = self.pg_ctl(pg_stop_args).await;\n        if !stop_status.success() {\n            match self.is_postgres_running().await {\n                Ok(false) => {\n                    println!(\"Storage controller database is already stopped\");\n                    return Ok(());\n                }\n                Ok(true) => {\n                    anyhow::bail!(\"Failed to stop storage controller database\");\n                }\n                Err(err) => {\n                    anyhow::bail!(\"Failed to stop storage controller database: {err}\");\n                }\n            }\n        }\n\n        Ok(())\n    }\n\n    async fn is_postgres_running(&self) -> anyhow::Result<bool> {\n        let pg_data_path = self.env.base_data_dir.join(\"storage_controller_db\");\n\n        let pg_status_args = [\"-D\", &pg_data_path.to_string_lossy(), \"status\"];\n        let status_exitcode = self.pg_ctl(pg_status_args).await;\n\n        // pg_ctl status returns this exit code if postgres is not running: in this case it is\n        // fine that stop failed.  Otherwise it is an error that stop failed.\n        const PG_STATUS_NOT_RUNNING: i32 = 3;\n        const PG_NO_DATA_DIR: i32 = 4;\n        const PG_STATUS_RUNNING: i32 = 0;\n        match status_exitcode.code() {\n            Some(PG_STATUS_NOT_RUNNING) => Ok(false),\n            Some(PG_NO_DATA_DIR) => Ok(false),\n            Some(PG_STATUS_RUNNING) => Ok(true),\n            Some(code) => Err(anyhow::anyhow!(\n                \"pg_ctl status returned unexpected status code: {:?}\",\n                code\n            )),\n            None => Err(anyhow::anyhow!(\"pg_ctl status returned no status code\")),\n        }\n    }\n\n    fn get_claims_for_path(path: &str) -> anyhow::Result<Option<Claims>> {\n        let category = match path.find('/') {\n            Some(idx) => &path[..idx],\n            None => path,\n        };\n\n        match category {\n            \"status\" | \"ready\" => Ok(None),\n            \"control\" | \"debug\" => Ok(Some(Claims::new(None, Scope::Admin))),\n            \"v1\" => Ok(Some(Claims::new(None, Scope::PageServerApi))),\n            _ => Err(anyhow::anyhow!(\"Failed to determine claims for {}\", path)),\n        }\n    }\n\n    /// Simple HTTP request wrapper for calling into storage controller\n    async fn dispatch<RQ, RS>(\n        &self,\n        method: reqwest::Method,\n        path: String,\n        body: Option<RQ>,\n    ) -> anyhow::Result<RS>\n    where\n        RQ: Serialize + Sized,\n        RS: DeserializeOwned + Sized,\n    {\n        let response = self.dispatch_inner(method, path, body).await?;\n        Ok(response\n            .json()\n            .await\n            .map_err(pageserver_client::mgmt_api::Error::ReceiveBody)?)\n    }\n\n    /// Simple HTTP request wrapper for calling into storage controller\n    async fn dispatch_inner<RQ>(\n        &self,\n        method: reqwest::Method,\n        path: String,\n        body: Option<RQ>,\n    ) -> anyhow::Result<Response>\n    where\n        RQ: Serialize + Sized,\n    {\n        // In the special case of the `storage_controller start` subcommand, we wish\n        // to use the API endpoint of the newly started storage controller in order\n        // to pass the readiness check. In this scenario [`Self::listen_port`] will\n        // be set (see [`Self::start`]).\n        //\n        // Otherwise, we infer the storage controller api endpoint from the configured\n        // control plane API.\n        let port = if let Some(port) = self.listen_port.get() {\n            *port\n        } else {\n            self.env.control_plane_api.port().unwrap()\n        };\n\n        // The configured URL has the /upcall path prefix for pageservers to use: we will strip that out\n        // for general purpose API access.\n        let url = Url::from_str(&format!(\n            \"{}://{}:{port}/{path}\",\n            self.env.control_plane_api.scheme(),\n            self.env.control_plane_api.host_str().unwrap(),\n        ))\n        .unwrap();\n\n        let mut builder = self.client.request(method, url);\n        if let Some(body) = body {\n            builder = builder.json(&body)\n        }\n        if let Some(private_key) = &self.private_key {\n            println!(\"Getting claims for path {path}\");\n            if let Some(required_claims) = Self::get_claims_for_path(&path)? {\n                println!(\"Got claims {required_claims:?} for path {path}\");\n                let jwt_token = encode_from_key_file(&required_claims, private_key)?;\n                builder = builder.header(\n                    reqwest::header::AUTHORIZATION,\n                    format!(\"Bearer {jwt_token}\"),\n                );\n            }\n        }\n\n        let response = builder.send().await?;\n        let response = response.error_from_body().await?;\n\n        Ok(response)\n    }\n\n    /// Register the safekeepers in the storage controller\n    #[instrument(skip(self))]\n    async fn register_safekeepers(&self) -> anyhow::Result<()> {\n        for sk in self.env.safekeepers.iter() {\n            let sk_id = sk.id;\n            let body = serde_json::json!({\n                \"id\": sk_id,\n                \"created_at\": \"2023-10-25T09:11:25Z\",\n                \"updated_at\": \"2024-08-28T11:32:43Z\",\n                \"region_id\": \"aws-us-east-2\",\n                \"host\": \"127.0.0.1\",\n                \"port\": sk.pg_port,\n                \"http_port\": sk.http_port,\n                \"https_port\": sk.https_port,\n                \"version\": 5957,\n                \"availability_zone_id\": format!(\"us-east-2b-{sk_id}\"),\n            });\n            self.upsert_safekeeper(sk_id, body).await?;\n            self.safekeeper_scheduling_policy(sk_id, SkSchedulingPolicy::Active)\n                .await?;\n        }\n        Ok(())\n    }\n\n    /// Call into the attach_hook API, for use before handing out attachments to pageservers\n    #[instrument(skip(self))]\n    pub async fn attach_hook(\n        &self,\n        tenant_shard_id: TenantShardId,\n        pageserver_id: NodeId,\n    ) -> anyhow::Result<Option<u32>> {\n        let request = AttachHookRequest {\n            tenant_shard_id,\n            node_id: Some(pageserver_id),\n            generation_override: None,\n            config: None,\n        };\n\n        let response = self\n            .dispatch::<_, AttachHookResponse>(\n                Method::POST,\n                \"debug/v1/attach-hook\".to_string(),\n                Some(request),\n            )\n            .await?;\n\n        Ok(response.generation)\n    }\n\n    #[instrument(skip(self))]\n    pub async fn upsert_safekeeper(\n        &self,\n        node_id: NodeId,\n        request: serde_json::Value,\n    ) -> anyhow::Result<()> {\n        let resp = self\n            .dispatch_inner::<serde_json::Value>(\n                Method::POST,\n                format!(\"control/v1/safekeeper/{node_id}\"),\n                Some(request),\n            )\n            .await?;\n        if !resp.status().is_success() {\n            anyhow::bail!(\n                \"setting scheduling policy unsuccessful for safekeeper {node_id}: {}\",\n                resp.status()\n            );\n        }\n        Ok(())\n    }\n\n    #[instrument(skip(self))]\n    pub async fn safekeeper_scheduling_policy(\n        &self,\n        node_id: NodeId,\n        scheduling_policy: SkSchedulingPolicy,\n    ) -> anyhow::Result<()> {\n        self.dispatch::<SafekeeperSchedulingPolicyRequest, ()>(\n            Method::POST,\n            format!(\"control/v1/safekeeper/{node_id}/scheduling_policy\"),\n            Some(SafekeeperSchedulingPolicyRequest { scheduling_policy }),\n        )\n        .await\n    }\n\n    #[instrument(skip(self))]\n    pub async fn inspect(\n        &self,\n        tenant_shard_id: TenantShardId,\n    ) -> anyhow::Result<Option<(u32, NodeId)>> {\n        let request = InspectRequest { tenant_shard_id };\n\n        let response = self\n            .dispatch::<_, InspectResponse>(\n                Method::POST,\n                \"debug/v1/inspect\".to_string(),\n                Some(request),\n            )\n            .await?;\n\n        Ok(response.attachment)\n    }\n\n    #[instrument(skip(self))]\n    pub async fn tenant_create(\n        &self,\n        req: TenantCreateRequest,\n    ) -> anyhow::Result<TenantCreateResponse> {\n        self.dispatch(Method::POST, \"v1/tenant\".to_string(), Some(req))\n            .await\n    }\n\n    #[instrument(skip(self))]\n    pub async fn tenant_import(&self, tenant_id: TenantId) -> anyhow::Result<TenantCreateResponse> {\n        self.dispatch::<(), TenantCreateResponse>(\n            Method::POST,\n            format!(\"debug/v1/tenant/{tenant_id}/import\"),\n            None,\n        )\n        .await\n    }\n\n    #[instrument(skip(self))]\n    pub async fn tenant_locate(&self, tenant_id: TenantId) -> anyhow::Result<TenantLocateResponse> {\n        self.dispatch::<(), _>(\n            Method::GET,\n            format!(\"debug/v1/tenant/{tenant_id}/locate\"),\n            None,\n        )\n        .await\n    }\n\n    #[instrument(skip_all, fields(node_id=%req.node_id))]\n    pub async fn node_register(&self, req: NodeRegisterRequest) -> anyhow::Result<()> {\n        self.dispatch::<_, ()>(Method::POST, \"control/v1/node\".to_string(), Some(req))\n            .await\n    }\n\n    #[instrument(skip_all, fields(node_id=%req.node_id))]\n    pub async fn node_configure(&self, req: NodeConfigureRequest) -> anyhow::Result<()> {\n        self.dispatch::<_, ()>(\n            Method::PUT,\n            format!(\"control/v1/node/{}/config\", req.node_id),\n            Some(req),\n        )\n        .await\n    }\n\n    pub async fn node_list(&self) -> anyhow::Result<Vec<NodeDescribeResponse>> {\n        self.dispatch::<(), Vec<NodeDescribeResponse>>(\n            Method::GET,\n            \"control/v1/node\".to_string(),\n            None,\n        )\n        .await\n    }\n\n    #[instrument(skip(self))]\n    pub async fn ready(&self) -> anyhow::Result<()> {\n        self.dispatch::<(), ()>(Method::GET, \"ready\".to_string(), None)\n            .await\n    }\n\n    #[instrument(skip_all, fields(%tenant_id, timeline_id=%req.new_timeline_id))]\n    pub async fn tenant_timeline_create(\n        &self,\n        tenant_id: TenantId,\n        req: TimelineCreateRequest,\n    ) -> anyhow::Result<TimelineInfo> {\n        self.dispatch(\n            Method::POST,\n            format!(\"v1/tenant/{tenant_id}/timeline\"),\n            Some(req),\n        )\n        .await\n    }\n\n    pub async fn set_tenant_config(&self, req: &TenantConfigRequest) -> anyhow::Result<()> {\n        self.dispatch(Method::PUT, \"v1/tenant/config\".to_string(), Some(req))\n            .await\n    }\n}\n"
  },
  {
    "path": "control_plane/storcon_cli/Cargo.toml",
    "content": "[package]\nname = \"storcon_cli\"\nversion = \"0.1.0\"\nedition.workspace = true\nlicense.workspace = true\n\n\n[dependencies]\nanyhow.workspace = true\nclap.workspace = true\ncomfy-table.workspace = true\nfutures.workspace = true\nhumantime.workspace = true\npageserver_api.workspace = true\npageserver_client.workspace = true\nreqwest.workspace = true\nsafekeeper_api.workspace=true\nserde_json = { workspace = true, features = [\"raw_value\"] }\nstorage_controller_client.workspace = true\ntokio.workspace = true\ntracing.workspace = true\nutils.workspace = true\nworkspace_hack.workspace = true\n\n"
  },
  {
    "path": "control_plane/storcon_cli/src/main.rs",
    "content": "use std::collections::{HashMap, HashSet};\nuse std::path::PathBuf;\nuse std::str::FromStr;\nuse std::time::Duration;\n\nuse clap::{Parser, Subcommand};\nuse futures::StreamExt;\nuse pageserver_api::controller_api::{\n    AvailabilityZone, MigrationConfig, NodeAvailabilityWrapper, NodeConfigureRequest,\n    NodeDescribeResponse, NodeRegisterRequest, NodeSchedulingPolicy, NodeShardResponse,\n    PlacementPolicy, SafekeeperDescribeResponse, SafekeeperSchedulingPolicyRequest,\n    ShardSchedulingPolicy, ShardsPreferredAzsRequest, ShardsPreferredAzsResponse,\n    SkSchedulingPolicy, TenantCreateRequest, TenantDescribeResponse, TenantPolicyRequest,\n    TenantShardMigrateRequest, TenantShardMigrateResponse, TimelineSafekeeperMigrateRequest,\n};\nuse pageserver_api::models::{\n    EvictionPolicy, EvictionPolicyLayerAccessThreshold, ShardParameters, TenantConfig,\n    TenantConfigPatchRequest, TenantConfigRequest, TenantShardSplitRequest,\n    TenantShardSplitResponse,\n};\nuse pageserver_api::shard::{ShardStripeSize, TenantShardId};\nuse pageserver_client::mgmt_api::{self};\nuse reqwest::{Certificate, Method, StatusCode, Url};\nuse safekeeper_api::models::TimelineLocateResponse;\nuse storage_controller_client::control_api::Client;\nuse utils::id::{NodeId, TenantId, TimelineId};\n\n#[derive(Subcommand, Debug)]\nenum Command {\n    /// Register a pageserver with the storage controller.  This shouldn't usually be necessary,\n    /// since pageservers auto-register when they start up\n    NodeRegister {\n        #[arg(long)]\n        node_id: NodeId,\n\n        #[arg(long)]\n        listen_pg_addr: String,\n        #[arg(long)]\n        listen_pg_port: u16,\n        #[arg(long)]\n        listen_grpc_addr: Option<String>,\n        #[arg(long)]\n        listen_grpc_port: Option<u16>,\n\n        #[arg(long)]\n        listen_http_addr: String,\n        #[arg(long)]\n        listen_http_port: u16,\n        #[arg(long)]\n        listen_https_port: Option<u16>,\n\n        #[arg(long)]\n        availability_zone_id: String,\n    },\n\n    /// Modify a node's configuration in the storage controller\n    NodeConfigure {\n        #[arg(long)]\n        node_id: NodeId,\n\n        /// Availability is usually auto-detected based on heartbeats.  Set 'offline' here to\n        /// manually mark a node offline\n        #[arg(long)]\n        availability: Option<NodeAvailabilityArg>,\n        /// Scheduling policy controls whether tenant shards may be scheduled onto this node.\n        #[arg(long)]\n        scheduling: Option<NodeSchedulingPolicy>,\n    },\n    /// Exists for backup usage and will be removed in future.\n    /// Use [`Command::NodeStartDelete`] instead, if possible.\n    NodeDelete {\n        #[arg(long)]\n        node_id: NodeId,\n    },\n    /// Start deletion of the specified pageserver.\n    NodeStartDelete {\n        #[arg(long)]\n        node_id: NodeId,\n        /// When `force` is true, skip waiting for shards to prewarm during migration.\n        /// This can significantly speed up node deletion since prewarming all shards\n        /// can take considerable time, but may result in slower initial access to\n        /// migrated shards until they warm up naturally.\n        #[arg(long)]\n        force: bool,\n    },\n    /// Cancel deletion of the specified pageserver and wait for `timeout`\n    /// for the operation to be canceled. May be retried.\n    NodeCancelDelete {\n        #[arg(long)]\n        node_id: NodeId,\n        #[arg(long)]\n        timeout: humantime::Duration,\n    },\n    /// Delete a tombstone of node from the storage controller.\n    /// This is used when we want to allow the node to be re-registered.\n    NodeDeleteTombstone {\n        #[arg(long)]\n        node_id: NodeId,\n    },\n    /// Modify a tenant's policies in the storage controller\n    TenantPolicy {\n        #[arg(long)]\n        tenant_id: TenantId,\n        /// Placement policy controls whether a tenant is `detached`, has only a secondary location (`secondary`),\n        /// or is in the normal attached state with N secondary locations (`attached:N`)\n        #[arg(long)]\n        placement: Option<PlacementPolicyArg>,\n        /// Scheduling policy enables pausing the controller's scheduling activity involving this tenant.  `active` is normal,\n        /// `essential` disables optimization scheduling changes, `pause` disables all scheduling changes, and `stop` prevents\n        /// all reconciliation activity including for scheduling changes already made.  `pause` and `stop` can make a tenant\n        /// unavailable, and are only for use in emergencies.\n        #[arg(long)]\n        scheduling: Option<ShardSchedulingPolicyArg>,\n    },\n    /// List nodes known to the storage controller\n    Nodes {},\n    /// List soft deleted nodes known to the storage controller\n    NodeTombstones {},\n    /// List tenants known to the storage controller\n    Tenants {\n        /// If this field is set, it will list the tenants on a specific node\n        node_id: Option<NodeId>,\n    },\n    /// Create a new tenant in the storage controller, and by extension on pageservers.\n    TenantCreate {\n        #[arg(long)]\n        tenant_id: TenantId,\n    },\n    /// Delete a tenant in the storage controller, and by extension on pageservers.\n    TenantDelete {\n        #[arg(long)]\n        tenant_id: TenantId,\n    },\n    /// Split an existing tenant into a higher number of shards than its current shard count.\n    TenantShardSplit {\n        #[arg(long)]\n        tenant_id: TenantId,\n        #[arg(long)]\n        shard_count: u8,\n        /// Optional, in 8kiB pages.  e.g. set 2048 for 16MB stripes.\n        #[arg(long)]\n        stripe_size: Option<u32>,\n    },\n    /// Migrate the attached location for a tenant shard to a specific pageserver.\n    TenantShardMigrate {\n        #[arg(long)]\n        tenant_shard_id: TenantShardId,\n        #[arg(long)]\n        node: NodeId,\n        #[arg(long, default_value_t = true, action = clap::ArgAction::Set)]\n        prewarm: bool,\n        #[arg(long, default_value_t = false, action = clap::ArgAction::Set)]\n        override_scheduler: bool,\n    },\n    /// Watch the location of a tenant shard evolve, e.g. while expecting it to migrate\n    TenantShardWatch {\n        #[arg(long)]\n        tenant_shard_id: TenantShardId,\n    },\n    /// Migrate the secondary location for a tenant shard to a specific pageserver.\n    TenantShardMigrateSecondary {\n        #[arg(long)]\n        tenant_shard_id: TenantShardId,\n        #[arg(long)]\n        node: NodeId,\n    },\n    /// Cancel any ongoing reconciliation for this shard\n    TenantShardCancelReconcile {\n        #[arg(long)]\n        tenant_shard_id: TenantShardId,\n    },\n    /// Set the pageserver tenant configuration of a tenant: this is the configuration structure\n    /// that is passed through to pageservers, and does not affect storage controller behavior.\n    /// Any previous tenant configs are overwritten.\n    SetTenantConfig {\n        #[arg(long)]\n        tenant_id: TenantId,\n        #[arg(long)]\n        config: String,\n    },\n    /// Patch the pageserver tenant configuration of a tenant. Any fields with null values in the\n    /// provided JSON are unset from the tenant config and all fields with non-null values are set.\n    /// Unspecified fields are not changed.\n    PatchTenantConfig {\n        #[arg(long)]\n        tenant_id: TenantId,\n        #[arg(long)]\n        config: String,\n    },\n    /// Print details about a particular tenant, including all its shards' states.\n    TenantDescribe {\n        #[arg(long)]\n        tenant_id: TenantId,\n    },\n    TenantSetPreferredAz {\n        #[arg(long)]\n        tenant_id: TenantId,\n        #[arg(long)]\n        preferred_az: Option<String>,\n    },\n    /// Uncleanly drop a tenant from the storage controller: this doesn't delete anything from pageservers. Appropriate\n    /// if you e.g. used `tenant-warmup` by mistake on a tenant ID that doesn't really exist, or is in some other region.\n    TenantDrop {\n        #[arg(long)]\n        tenant_id: TenantId,\n        #[arg(long)]\n        unclean: bool,\n    },\n    NodeDrop {\n        #[arg(long)]\n        node_id: NodeId,\n        #[arg(long)]\n        unclean: bool,\n    },\n    TenantSetTimeBasedEviction {\n        #[arg(long)]\n        tenant_id: TenantId,\n        #[arg(long)]\n        period: humantime::Duration,\n        #[arg(long)]\n        threshold: humantime::Duration,\n    },\n    // Migrate away from a set of specified pageservers by moving the primary attachments to pageservers\n    // outside of the specified set.\n    BulkMigrate {\n        // Set of pageserver node ids to drain.\n        #[arg(long)]\n        nodes: Vec<NodeId>,\n        // Optional: migration concurrency (default is 8)\n        #[arg(long)]\n        concurrency: Option<usize>,\n        // Optional: maximum number of shards to migrate\n        #[arg(long)]\n        max_shards: Option<usize>,\n        // Optional: when set to true, nothing is migrated, but the plan is printed to stdout\n        #[arg(long)]\n        dry_run: Option<bool>,\n    },\n    /// Start draining the specified pageserver.\n    /// The drain is complete when the schedulling policy returns to active.\n    StartDrain {\n        #[arg(long)]\n        node_id: NodeId,\n    },\n    /// Cancel draining the specified pageserver and wait for `timeout`\n    /// for the operation to be canceled. May be retried.\n    CancelDrain {\n        #[arg(long)]\n        node_id: NodeId,\n        #[arg(long)]\n        timeout: humantime::Duration,\n    },\n    /// Start filling the specified pageserver.\n    /// The drain is complete when the schedulling policy returns to active.\n    StartFill {\n        #[arg(long)]\n        node_id: NodeId,\n    },\n    /// Cancel filling the specified pageserver and wait for `timeout`\n    /// for the operation to be canceled. May be retried.\n    CancelFill {\n        #[arg(long)]\n        node_id: NodeId,\n        #[arg(long)]\n        timeout: humantime::Duration,\n    },\n    /// List safekeepers known to the storage controller\n    Safekeepers {},\n    /// Set the scheduling policy of the specified safekeeper\n    SafekeeperScheduling {\n        #[arg(long)]\n        node_id: NodeId,\n        #[arg(long)]\n        scheduling_policy: SkSchedulingPolicyArg,\n    },\n    /// Downloads any missing heatmap layers for all shard for a given timeline\n    DownloadHeatmapLayers {\n        /// Tenant ID or tenant shard ID. When an unsharded tenant ID is specified,\n        /// the operation is performed on all shards. When a sharded tenant ID is\n        /// specified, the operation is only performed on the specified shard.\n        #[arg(long)]\n        tenant_shard_id: TenantShardId,\n        #[arg(long)]\n        timeline_id: TimelineId,\n        /// Optional: Maximum download concurrency (default is 16)\n        #[arg(long)]\n        concurrency: Option<usize>,\n    },\n    /// Locate safekeepers for a timeline from the storcon DB.\n    TimelineLocate {\n        #[arg(long)]\n        tenant_id: TenantId,\n        #[arg(long)]\n        timeline_id: TimelineId,\n    },\n    /// Migrate a timeline to a new set of safekeepers\n    TimelineSafekeeperMigrate {\n        #[arg(long)]\n        tenant_id: TenantId,\n        #[arg(long)]\n        timeline_id: TimelineId,\n        /// Example: --new-sk-set 1,2,3\n        #[arg(long, required = true, value_delimiter = ',')]\n        new_sk_set: Vec<NodeId>,\n    },\n    /// Abort ongoing safekeeper migration.\n    TimelineSafekeeperMigrateAbort {\n        #[arg(long)]\n        tenant_id: TenantId,\n        #[arg(long)]\n        timeline_id: TimelineId,\n    },\n}\n\n#[derive(Parser)]\n#[command(\n    author,\n    version,\n    about,\n    long_about = \"CLI for Storage Controller Support/Debug\"\n)]\n#[command(arg_required_else_help(true))]\nstruct Cli {\n    #[arg(long)]\n    /// URL to storage controller.  e.g. http://127.0.0.1:1234 when using `neon_local`\n    api: Url,\n\n    #[arg(long)]\n    /// JWT token for authenticating with storage controller.  Depending on the API used, this\n    /// should have either `pageserverapi` or `admin` scopes: for convenience, you should mint\n    /// a token with both scopes to use with this tool.\n    jwt: Option<String>,\n\n    #[arg(long)]\n    /// Trusted root CA certificates to use in https APIs.\n    ssl_ca_file: Option<PathBuf>,\n\n    #[command(subcommand)]\n    command: Command,\n}\n\n#[derive(Debug, Clone)]\nstruct PlacementPolicyArg(PlacementPolicy);\n\nimpl FromStr for PlacementPolicyArg {\n    type Err = anyhow::Error;\n\n    fn from_str(s: &str) -> Result<Self, Self::Err> {\n        match s {\n            \"detached\" => Ok(Self(PlacementPolicy::Detached)),\n            \"secondary\" => Ok(Self(PlacementPolicy::Secondary)),\n            _ if s.starts_with(\"attached:\") => {\n                let mut splitter = s.split(':');\n                let _prefix = splitter.next().unwrap();\n                match splitter.next().and_then(|s| s.parse::<usize>().ok()) {\n                    Some(n) => Ok(Self(PlacementPolicy::Attached(n))),\n                    None => Err(anyhow::anyhow!(\n                        \"Invalid format '{s}', a valid example is 'attached:1'\"\n                    )),\n                }\n            }\n            _ => Err(anyhow::anyhow!(\n                \"Unknown placement policy '{s}', try detached,secondary,attached:<n>\"\n            )),\n        }\n    }\n}\n\n#[derive(Debug, Clone)]\nstruct SkSchedulingPolicyArg(SkSchedulingPolicy);\n\nimpl FromStr for SkSchedulingPolicyArg {\n    type Err = anyhow::Error;\n\n    fn from_str(s: &str) -> Result<Self, Self::Err> {\n        SkSchedulingPolicy::from_str(s).map(Self)\n    }\n}\n\n#[derive(Debug, Clone)]\nstruct ShardSchedulingPolicyArg(ShardSchedulingPolicy);\n\nimpl FromStr for ShardSchedulingPolicyArg {\n    type Err = anyhow::Error;\n\n    fn from_str(s: &str) -> Result<Self, Self::Err> {\n        match s {\n            \"active\" => Ok(Self(ShardSchedulingPolicy::Active)),\n            \"essential\" => Ok(Self(ShardSchedulingPolicy::Essential)),\n            \"pause\" => Ok(Self(ShardSchedulingPolicy::Pause)),\n            \"stop\" => Ok(Self(ShardSchedulingPolicy::Stop)),\n            _ => Err(anyhow::anyhow!(\n                \"Unknown scheduling policy '{s}', try active,essential,pause,stop\"\n            )),\n        }\n    }\n}\n\n#[derive(Debug, Clone)]\nstruct NodeAvailabilityArg(NodeAvailabilityWrapper);\n\nimpl FromStr for NodeAvailabilityArg {\n    type Err = anyhow::Error;\n\n    fn from_str(s: &str) -> Result<Self, Self::Err> {\n        match s {\n            \"active\" => Ok(Self(NodeAvailabilityWrapper::Active)),\n            \"offline\" => Ok(Self(NodeAvailabilityWrapper::Offline)),\n            _ => Err(anyhow::anyhow!(\"Unknown availability state '{s}'\")),\n        }\n    }\n}\n\nasync fn wait_for_scheduling_policy<F>(\n    client: Client,\n    node_id: NodeId,\n    timeout: Duration,\n    f: F,\n) -> anyhow::Result<NodeSchedulingPolicy>\nwhere\n    F: Fn(NodeSchedulingPolicy) -> bool,\n{\n    let waiter = tokio::time::timeout(timeout, async move {\n        loop {\n            let node = client\n                .dispatch::<(), NodeDescribeResponse>(\n                    Method::GET,\n                    format!(\"control/v1/node/{node_id}\"),\n                    None,\n                )\n                .await?;\n\n            if f(node.scheduling) {\n                return Ok::<NodeSchedulingPolicy, mgmt_api::Error>(node.scheduling);\n            }\n        }\n    });\n\n    Ok(waiter.await??)\n}\n\n#[tokio::main]\nasync fn main() -> anyhow::Result<()> {\n    let cli = Cli::parse();\n\n    let ssl_ca_certs = match &cli.ssl_ca_file {\n        Some(ssl_ca_file) => {\n            let buf = tokio::fs::read(ssl_ca_file).await?;\n            Certificate::from_pem_bundle(&buf)?\n        }\n        None => Vec::new(),\n    };\n\n    let mut http_client = reqwest::Client::builder();\n    for ssl_ca_cert in ssl_ca_certs {\n        http_client = http_client.add_root_certificate(ssl_ca_cert);\n    }\n    let http_client = http_client.build()?;\n\n    let storcon_client = Client::new(http_client.clone(), cli.api.clone(), cli.jwt.clone());\n\n    let mut trimmed = cli.api.to_string();\n    trimmed.pop();\n    let vps_client = mgmt_api::Client::new(http_client.clone(), trimmed, cli.jwt.as_deref());\n\n    match cli.command {\n        Command::NodeRegister {\n            node_id,\n            listen_pg_addr,\n            listen_pg_port,\n            listen_grpc_addr,\n            listen_grpc_port,\n            listen_http_addr,\n            listen_http_port,\n            listen_https_port,\n            availability_zone_id,\n        } => {\n            storcon_client\n                .dispatch::<_, ()>(\n                    Method::POST,\n                    \"control/v1/node\".to_string(),\n                    Some(NodeRegisterRequest {\n                        node_id,\n                        listen_pg_addr,\n                        listen_pg_port,\n                        listen_grpc_addr,\n                        listen_grpc_port,\n                        listen_http_addr,\n                        listen_http_port,\n                        listen_https_port,\n                        availability_zone_id: AvailabilityZone(availability_zone_id),\n                        node_ip_addr: None,\n                    }),\n                )\n                .await?;\n        }\n        Command::TenantCreate { tenant_id } => {\n            storcon_client\n                .dispatch::<_, ()>(\n                    Method::POST,\n                    \"v1/tenant\".to_string(),\n                    Some(TenantCreateRequest {\n                        new_tenant_id: TenantShardId::unsharded(tenant_id),\n                        generation: None,\n                        shard_parameters: ShardParameters::default(),\n                        placement_policy: Some(PlacementPolicy::Attached(1)),\n                        config: TenantConfig::default(),\n                    }),\n                )\n                .await?;\n        }\n        Command::TenantDelete { tenant_id } => {\n            let status = vps_client\n                .tenant_delete(TenantShardId::unsharded(tenant_id))\n                .await?;\n            tracing::info!(\"Delete status: {}\", status);\n        }\n        Command::Nodes {} => {\n            let mut resp = storcon_client\n                .dispatch::<(), Vec<NodeDescribeResponse>>(\n                    Method::GET,\n                    \"control/v1/node\".to_string(),\n                    None,\n                )\n                .await?;\n\n            resp.sort_by(|a, b| a.listen_http_addr.cmp(&b.listen_http_addr));\n\n            let mut table = comfy_table::Table::new();\n            table.set_header([\"Id\", \"Hostname\", \"AZ\", \"Scheduling\", \"Availability\"]);\n            for node in resp {\n                table.add_row([\n                    format!(\"{}\", node.id),\n                    node.listen_http_addr,\n                    node.availability_zone_id,\n                    format!(\"{:?}\", node.scheduling),\n                    format!(\"{:?}\", node.availability),\n                ]);\n            }\n            println!(\"{table}\");\n        }\n        Command::NodeConfigure {\n            node_id,\n            availability,\n            scheduling,\n        } => {\n            let req = NodeConfigureRequest {\n                node_id,\n                availability: availability.map(|a| a.0),\n                scheduling,\n            };\n            storcon_client\n                .dispatch::<_, ()>(\n                    Method::PUT,\n                    format!(\"control/v1/node/{node_id}/config\"),\n                    Some(req),\n                )\n                .await?;\n        }\n        Command::Tenants {\n            node_id: Some(node_id),\n        } => {\n            let describe_response = storcon_client\n                .dispatch::<(), NodeShardResponse>(\n                    Method::GET,\n                    format!(\"control/v1/node/{node_id}/shards\"),\n                    None,\n                )\n                .await?;\n            let shards = describe_response.shards;\n            let mut table = comfy_table::Table::new();\n            table.set_header([\n                \"Shard\",\n                \"Intended Primary/Secondary\",\n                \"Observed Primary/Secondary\",\n            ]);\n            for shard in shards {\n                table.add_row([\n                    format!(\"{}\", shard.tenant_shard_id),\n                    match shard.is_intended_secondary {\n                        None => \"\".to_string(),\n                        Some(true) => \"Secondary\".to_string(),\n                        Some(false) => \"Primary\".to_string(),\n                    },\n                    match shard.is_observed_secondary {\n                        None => \"\".to_string(),\n                        Some(true) => \"Secondary\".to_string(),\n                        Some(false) => \"Primary\".to_string(),\n                    },\n                ]);\n            }\n            println!(\"{table}\");\n        }\n        Command::Tenants { node_id: None } => {\n            // Set up output formatting\n            let mut table = comfy_table::Table::new();\n            table.set_header([\n                \"TenantId\",\n                \"Preferred AZ\",\n                \"ShardCount\",\n                \"StripeSize\",\n                \"Placement\",\n                \"Scheduling\",\n            ]);\n\n            // Pagination loop over listing API\n            let mut start_after = None;\n            const LIMIT: usize = 1000;\n            loop {\n                let path = match start_after {\n                    None => format!(\"control/v1/tenant?limit={LIMIT}\"),\n                    Some(start_after) => {\n                        format!(\"control/v1/tenant?limit={LIMIT}&start_after={start_after}\")\n                    }\n                };\n\n                let resp = storcon_client\n                    .dispatch::<(), Vec<TenantDescribeResponse>>(Method::GET, path, None)\n                    .await?;\n\n                if resp.is_empty() {\n                    // End of data reached\n                    break;\n                }\n\n                // Give some visual feedback while we're building up the table (comfy_table doesn't have\n                // streaming output)\n                if resp.len() >= LIMIT {\n                    eprint!(\".\");\n                }\n\n                start_after = Some(resp.last().unwrap().tenant_id);\n\n                for tenant in resp {\n                    let shard_zero = tenant.shards.into_iter().next().unwrap();\n                    table.add_row([\n                        format!(\"{}\", tenant.tenant_id),\n                        shard_zero\n                            .preferred_az_id\n                            .as_ref()\n                            .cloned()\n                            .unwrap_or(\"\".to_string()),\n                        format!(\"{}\", shard_zero.tenant_shard_id.shard_count.literal()),\n                        format!(\"{:?}\", tenant.stripe_size),\n                        format!(\"{:?}\", tenant.policy),\n                        format!(\"{:?}\", shard_zero.scheduling_policy),\n                    ]);\n                }\n            }\n\n            // Terminate progress dots\n            if table.row_count() > LIMIT {\n                eprint!(\"\");\n            }\n\n            println!(\"{table}\");\n        }\n        Command::TenantPolicy {\n            tenant_id,\n            placement,\n            scheduling,\n        } => {\n            let req = TenantPolicyRequest {\n                scheduling: scheduling.map(|s| s.0),\n                placement: placement.map(|p| p.0),\n            };\n            storcon_client\n                .dispatch::<_, ()>(\n                    Method::PUT,\n                    format!(\"control/v1/tenant/{tenant_id}/policy\"),\n                    Some(req),\n                )\n                .await?;\n        }\n        Command::TenantShardSplit {\n            tenant_id,\n            shard_count,\n            stripe_size,\n        } => {\n            let req = TenantShardSplitRequest {\n                new_shard_count: shard_count,\n                new_stripe_size: stripe_size.map(ShardStripeSize),\n            };\n\n            let response = storcon_client\n                .dispatch::<TenantShardSplitRequest, TenantShardSplitResponse>(\n                    Method::PUT,\n                    format!(\"control/v1/tenant/{tenant_id}/shard_split\"),\n                    Some(req),\n                )\n                .await?;\n            println!(\n                \"Split tenant {} into {} shards: {}\",\n                tenant_id,\n                shard_count,\n                response\n                    .new_shards\n                    .iter()\n                    .map(|s| format!(\"{s:?}\"))\n                    .collect::<Vec<_>>()\n                    .join(\",\")\n            );\n        }\n        Command::TenantShardMigrate {\n            tenant_shard_id,\n            node,\n            prewarm,\n            override_scheduler,\n        } => {\n            let migration_config = MigrationConfig {\n                prewarm,\n                override_scheduler,\n                ..Default::default()\n            };\n\n            let req = TenantShardMigrateRequest {\n                node_id: node,\n                origin_node_id: None,\n                migration_config,\n            };\n\n            match storcon_client\n                .dispatch::<TenantShardMigrateRequest, TenantShardMigrateResponse>(\n                    Method::PUT,\n                    format!(\"control/v1/tenant/{tenant_shard_id}/migrate\"),\n                    Some(req),\n                )\n                .await\n            {\n                Err(mgmt_api::Error::ApiError(StatusCode::PRECONDITION_FAILED, msg)) => {\n                    anyhow::bail!(\n                        \"Migration to {node} rejected, may require `--force` ({}) \",\n                        msg\n                    );\n                }\n                Err(e) => return Err(e.into()),\n                Ok(_) => {}\n            }\n\n            watch_tenant_shard(storcon_client, tenant_shard_id, Some(node)).await?;\n        }\n        Command::TenantShardWatch { tenant_shard_id } => {\n            watch_tenant_shard(storcon_client, tenant_shard_id, None).await?;\n        }\n        Command::TenantShardMigrateSecondary {\n            tenant_shard_id,\n            node,\n        } => {\n            let req = TenantShardMigrateRequest {\n                node_id: node,\n                origin_node_id: None,\n                migration_config: MigrationConfig::default(),\n            };\n\n            storcon_client\n                .dispatch::<TenantShardMigrateRequest, TenantShardMigrateResponse>(\n                    Method::PUT,\n                    format!(\"control/v1/tenant/{tenant_shard_id}/migrate_secondary\"),\n                    Some(req),\n                )\n                .await?;\n        }\n        Command::TenantShardCancelReconcile { tenant_shard_id } => {\n            storcon_client\n                .dispatch::<(), ()>(\n                    Method::PUT,\n                    format!(\"control/v1/tenant/{tenant_shard_id}/cancel_reconcile\"),\n                    None,\n                )\n                .await?;\n        }\n        Command::SetTenantConfig { tenant_id, config } => {\n            let tenant_conf = serde_json::from_str(&config)?;\n\n            vps_client\n                .set_tenant_config(&TenantConfigRequest {\n                    tenant_id,\n                    config: tenant_conf,\n                })\n                .await?;\n        }\n        Command::PatchTenantConfig { tenant_id, config } => {\n            let tenant_conf = serde_json::from_str(&config)?;\n\n            vps_client\n                .patch_tenant_config(&TenantConfigPatchRequest {\n                    tenant_id,\n                    config: tenant_conf,\n                })\n                .await?;\n        }\n        Command::TenantDescribe { tenant_id } => {\n            let TenantDescribeResponse {\n                tenant_id,\n                shards,\n                stripe_size,\n                policy,\n                config,\n            } = storcon_client\n                .dispatch::<(), TenantDescribeResponse>(\n                    Method::GET,\n                    format!(\"control/v1/tenant/{tenant_id}\"),\n                    None,\n                )\n                .await?;\n\n            let nodes = storcon_client\n                .dispatch::<(), Vec<NodeDescribeResponse>>(\n                    Method::GET,\n                    \"control/v1/node\".to_string(),\n                    None,\n                )\n                .await?;\n            let nodes = nodes\n                .into_iter()\n                .map(|n| (n.id, n))\n                .collect::<HashMap<_, _>>();\n\n            println!(\"Tenant {tenant_id}\");\n            let mut table = comfy_table::Table::new();\n            table.add_row([\"Policy\", &format!(\"{policy:?}\")]);\n            table.add_row([\"Stripe size\", &format!(\"{stripe_size:?}\")]);\n            table.add_row([\"Config\", &serde_json::to_string_pretty(&config).unwrap()]);\n            println!(\"{table}\");\n            println!(\"Shards:\");\n            let mut table = comfy_table::Table::new();\n            table.set_header([\n                \"Shard\",\n                \"Attached\",\n                \"Attached AZ\",\n                \"Secondary\",\n                \"Last error\",\n                \"status\",\n            ]);\n            for shard in shards {\n                let secondary = shard\n                    .node_secondary\n                    .iter()\n                    .map(|n| format!(\"{n}\"))\n                    .collect::<Vec<_>>()\n                    .join(\",\");\n\n                let mut status_parts = Vec::new();\n                if shard.is_reconciling {\n                    status_parts.push(\"reconciling\");\n                }\n\n                if shard.is_pending_compute_notification {\n                    status_parts.push(\"pending_compute\");\n                }\n\n                if shard.is_splitting {\n                    status_parts.push(\"splitting\");\n                }\n                let status = status_parts.join(\",\");\n\n                let attached_node = shard\n                    .node_attached\n                    .as_ref()\n                    .map(|id| nodes.get(id).expect(\"Shard references nonexistent node\"));\n\n                table.add_row([\n                    format!(\"{}\", shard.tenant_shard_id),\n                    attached_node\n                        .map(|n| format!(\"{} ({})\", n.listen_http_addr, n.id))\n                        .unwrap_or(String::new()),\n                    attached_node\n                        .map(|n| n.availability_zone_id.clone())\n                        .unwrap_or(String::new()),\n                    secondary,\n                    shard.last_error,\n                    status,\n                ]);\n            }\n            println!(\"{table}\");\n        }\n        Command::TenantSetPreferredAz {\n            tenant_id,\n            preferred_az,\n        } => {\n            // First learn about the tenant's shards\n            let describe_response = storcon_client\n                .dispatch::<(), TenantDescribeResponse>(\n                    Method::GET,\n                    format!(\"control/v1/tenant/{tenant_id}\"),\n                    None,\n                )\n                .await?;\n\n            // Learn about nodes to validate the AZ ID\n            let nodes = storcon_client\n                .dispatch::<(), Vec<NodeDescribeResponse>>(\n                    Method::GET,\n                    \"control/v1/node\".to_string(),\n                    None,\n                )\n                .await?;\n\n            if let Some(preferred_az) = &preferred_az {\n                let azs = nodes\n                    .into_iter()\n                    .map(|n| (n.availability_zone_id))\n                    .collect::<HashSet<_>>();\n                if !azs.contains(preferred_az) {\n                    anyhow::bail!(\n                        \"AZ {} not found on any node: known AZs are: {:?}\",\n                        preferred_az,\n                        azs\n                    );\n                }\n            } else {\n                // Make it obvious to the user that since they've omitted an AZ, we're clearing it\n                eprintln!(\"Clearing preferred AZ for tenant {tenant_id}\");\n            }\n\n            // Construct a request that modifies all the tenant's shards\n            let req = ShardsPreferredAzsRequest {\n                preferred_az_ids: describe_response\n                    .shards\n                    .into_iter()\n                    .map(|s| {\n                        (\n                            s.tenant_shard_id,\n                            preferred_az.clone().map(AvailabilityZone),\n                        )\n                    })\n                    .collect(),\n            };\n            storcon_client\n                .dispatch::<ShardsPreferredAzsRequest, ShardsPreferredAzsResponse>(\n                    Method::PUT,\n                    \"control/v1/preferred_azs\".to_string(),\n                    Some(req),\n                )\n                .await?;\n        }\n        Command::TenantDrop { tenant_id, unclean } => {\n            if !unclean {\n                anyhow::bail!(\n                    \"This command is not a tenant deletion, and uncleanly drops all controller state for the tenant.  If you know what you're doing, add `--unclean` to proceed.\"\n                )\n            }\n            storcon_client\n                .dispatch::<(), ()>(\n                    Method::POST,\n                    format!(\"debug/v1/tenant/{tenant_id}/drop\"),\n                    None,\n                )\n                .await?;\n        }\n        Command::NodeDrop { node_id, unclean } => {\n            if !unclean {\n                anyhow::bail!(\n                    \"This command is not a clean node decommission, and uncleanly drops all controller state for the node, without checking if any tenants still refer to it.  If you know what you're doing, add `--unclean` to proceed.\"\n                )\n            }\n            storcon_client\n                .dispatch::<(), ()>(Method::POST, format!(\"debug/v1/node/{node_id}/drop\"), None)\n                .await?;\n        }\n        Command::NodeDelete { node_id } => {\n            eprintln!(\"Warning: This command is obsolete and will be removed in a future version\");\n            eprintln!(\"Use `NodeStartDelete` instead, if possible\");\n            storcon_client\n                .dispatch::<(), ()>(Method::DELETE, format!(\"control/v1/node/{node_id}\"), None)\n                .await?;\n        }\n        Command::NodeStartDelete { node_id, force } => {\n            let query = if force {\n                format!(\"control/v1/node/{node_id}/delete?force=true\")\n            } else {\n                format!(\"control/v1/node/{node_id}/delete\")\n            };\n            storcon_client\n                .dispatch::<(), ()>(Method::PUT, query, None)\n                .await?;\n            println!(\"Delete started for {node_id}\");\n        }\n        Command::NodeCancelDelete { node_id, timeout } => {\n            storcon_client\n                .dispatch::<(), ()>(\n                    Method::DELETE,\n                    format!(\"control/v1/node/{node_id}/delete\"),\n                    None,\n                )\n                .await?;\n\n            println!(\"Waiting for node {node_id} to quiesce on scheduling policy ...\");\n\n            let final_policy =\n                wait_for_scheduling_policy(storcon_client, node_id, *timeout, |sched| {\n                    !matches!(sched, NodeSchedulingPolicy::Deleting)\n                })\n                .await?;\n\n            println!(\n                \"Delete was cancelled for node {node_id}. Schedulling policy is now {final_policy:?}\"\n            );\n        }\n        Command::NodeDeleteTombstone { node_id } => {\n            storcon_client\n                .dispatch::<(), ()>(\n                    Method::DELETE,\n                    format!(\"debug/v1/tombstone/{node_id}\"),\n                    None,\n                )\n                .await?;\n        }\n        Command::NodeTombstones {} => {\n            let mut resp = storcon_client\n                .dispatch::<(), Vec<NodeDescribeResponse>>(\n                    Method::GET,\n                    \"debug/v1/tombstone\".to_string(),\n                    None,\n                )\n                .await?;\n\n            resp.sort_by(|a, b| a.listen_http_addr.cmp(&b.listen_http_addr));\n\n            let mut table = comfy_table::Table::new();\n            table.set_header([\"Id\", \"Hostname\", \"AZ\", \"Scheduling\", \"Availability\"]);\n            for node in resp {\n                table.add_row([\n                    format!(\"{}\", node.id),\n                    node.listen_http_addr,\n                    node.availability_zone_id,\n                    format!(\"{:?}\", node.scheduling),\n                    format!(\"{:?}\", node.availability),\n                ]);\n            }\n            println!(\"{table}\");\n        }\n        Command::TenantSetTimeBasedEviction {\n            tenant_id,\n            period,\n            threshold,\n        } => {\n            vps_client\n                .set_tenant_config(&TenantConfigRequest {\n                    tenant_id,\n                    config: TenantConfig {\n                        eviction_policy: Some(EvictionPolicy::LayerAccessThreshold(\n                            EvictionPolicyLayerAccessThreshold {\n                                period: period.into(),\n                                threshold: threshold.into(),\n                            },\n                        )),\n                        heatmap_period: Some(Duration::from_secs(300)),\n                        ..Default::default()\n                    },\n                })\n                .await?;\n        }\n        Command::BulkMigrate {\n            nodes,\n            concurrency,\n            max_shards,\n            dry_run,\n        } => {\n            // Load the list of nodes, split them up into the drained and filled sets,\n            // and validate that draining is possible.\n            let node_descs = storcon_client\n                .dispatch::<(), Vec<NodeDescribeResponse>>(\n                    Method::GET,\n                    \"control/v1/node\".to_string(),\n                    None,\n                )\n                .await?;\n\n            let mut node_to_drain_descs = Vec::new();\n            let mut node_to_fill_descs = Vec::new();\n\n            for desc in node_descs {\n                let to_drain = nodes.contains(&desc.id);\n                if to_drain {\n                    node_to_drain_descs.push(desc);\n                } else {\n                    node_to_fill_descs.push(desc);\n                }\n            }\n\n            if nodes.len() != node_to_drain_descs.len() {\n                anyhow::bail!(\"Bulk migration requested away from node which doesn't exist.\")\n            }\n\n            node_to_fill_descs.retain(|desc| {\n                matches!(desc.availability, NodeAvailabilityWrapper::Active)\n                    && matches!(\n                        desc.scheduling,\n                        NodeSchedulingPolicy::Active | NodeSchedulingPolicy::Filling\n                    )\n            });\n\n            if node_to_fill_descs.is_empty() {\n                anyhow::bail!(\"There are no nodes to migrate to\")\n            }\n\n            // Set the node scheduling policy to draining for the nodes which\n            // we plan to drain.\n            for node_desc in node_to_drain_descs.iter() {\n                let req = NodeConfigureRequest {\n                    node_id: node_desc.id,\n                    availability: None,\n                    scheduling: Some(NodeSchedulingPolicy::Draining),\n                };\n\n                storcon_client\n                    .dispatch::<_, ()>(\n                        Method::PUT,\n                        format!(\"control/v1/node/{}/config\", node_desc.id),\n                        Some(req),\n                    )\n                    .await?;\n            }\n\n            // Perform the migration: move each tenant shard scheduled on a node to\n            // be drained to a node which is being filled. A simple round robin\n            // strategy is used to pick the new node.\n            let tenants = storcon_client\n                .dispatch::<(), Vec<TenantDescribeResponse>>(\n                    Method::GET,\n                    \"control/v1/tenant\".to_string(),\n                    None,\n                )\n                .await?;\n\n            let mut selected_node_idx = 0;\n\n            struct MigrationMove {\n                tenant_shard_id: TenantShardId,\n                from: NodeId,\n                to: NodeId,\n            }\n\n            let mut moves: Vec<MigrationMove> = Vec::new();\n\n            let shards = tenants\n                .into_iter()\n                .flat_map(|tenant| tenant.shards.into_iter());\n            for shard in shards {\n                if let Some(max_shards) = max_shards {\n                    if moves.len() >= max_shards {\n                        println!(\n                            \"Stop planning shard moves since the requested maximum was reached\"\n                        );\n                        break;\n                    }\n                }\n\n                let should_migrate = {\n                    if let Some(attached_to) = shard.node_attached {\n                        node_to_drain_descs\n                            .iter()\n                            .map(|desc| desc.id)\n                            .any(|id| id == attached_to)\n                    } else {\n                        false\n                    }\n                };\n\n                if !should_migrate {\n                    continue;\n                }\n\n                moves.push(MigrationMove {\n                    tenant_shard_id: shard.tenant_shard_id,\n                    from: shard\n                        .node_attached\n                        .expect(\"We only migrate attached tenant shards\"),\n                    to: node_to_fill_descs[selected_node_idx].id,\n                });\n                selected_node_idx = (selected_node_idx + 1) % node_to_fill_descs.len();\n            }\n\n            let total_moves = moves.len();\n\n            if dry_run == Some(true) {\n                println!(\"Dryrun requested. Planned {total_moves} moves:\");\n                for mv in &moves {\n                    println!(\"{}: {} -> {}\", mv.tenant_shard_id, mv.from, mv.to)\n                }\n\n                return Ok(());\n            }\n\n            const DEFAULT_MIGRATE_CONCURRENCY: usize = 8;\n            let mut stream = futures::stream::iter(moves)\n                .map(|mv| {\n                    let client = Client::new(http_client.clone(), cli.api.clone(), cli.jwt.clone());\n                    async move {\n                        client\n                            .dispatch::<TenantShardMigrateRequest, TenantShardMigrateResponse>(\n                                Method::PUT,\n                                format!(\"control/v1/tenant/{}/migrate\", mv.tenant_shard_id),\n                                Some(TenantShardMigrateRequest {\n                                    node_id: mv.to,\n                                    origin_node_id: Some(mv.from),\n                                    migration_config: MigrationConfig::default(),\n                                }),\n                            )\n                            .await\n                            .map_err(|e| (mv.tenant_shard_id, mv.from, mv.to, e))\n                    }\n                })\n                .buffered(concurrency.unwrap_or(DEFAULT_MIGRATE_CONCURRENCY));\n\n            let mut success = 0;\n            let mut failure = 0;\n\n            while let Some(res) = stream.next().await {\n                match res {\n                    Ok(_) => {\n                        success += 1;\n                    }\n                    Err((tenant_shard_id, from, to, error)) => {\n                        failure += 1;\n                        println!(\n                            \"Failed to migrate {tenant_shard_id} from node {from} to node {to}: {error}\"\n                        );\n                    }\n                }\n\n                if (success + failure) % 20 == 0 {\n                    println!(\n                        \"Processed {}/{} shards: {} succeeded, {} failed\",\n                        success + failure,\n                        total_moves,\n                        success,\n                        failure\n                    );\n                }\n            }\n\n            println!(\n                \"Processed {}/{} shards: {} succeeded, {} failed\",\n                success + failure,\n                total_moves,\n                success,\n                failure\n            );\n        }\n        Command::StartDrain { node_id } => {\n            storcon_client\n                .dispatch::<(), ()>(\n                    Method::PUT,\n                    format!(\"control/v1/node/{node_id}/drain\"),\n                    None,\n                )\n                .await?;\n            println!(\"Drain started for {node_id}\");\n        }\n        Command::CancelDrain { node_id, timeout } => {\n            storcon_client\n                .dispatch::<(), ()>(\n                    Method::DELETE,\n                    format!(\"control/v1/node/{node_id}/drain\"),\n                    None,\n                )\n                .await?;\n\n            println!(\"Waiting for node {node_id} to quiesce on scheduling policy ...\");\n\n            let final_policy =\n                wait_for_scheduling_policy(storcon_client, node_id, *timeout, |sched| {\n                    use NodeSchedulingPolicy::*;\n                    matches!(sched, Active | PauseForRestart)\n                })\n                .await?;\n\n            println!(\n                \"Drain was cancelled for node {node_id}. Schedulling policy is now {final_policy:?}\"\n            );\n        }\n        Command::StartFill { node_id } => {\n            storcon_client\n                .dispatch::<(), ()>(Method::PUT, format!(\"control/v1/node/{node_id}/fill\"), None)\n                .await?;\n\n            println!(\"Fill started for {node_id}\");\n        }\n        Command::CancelFill { node_id, timeout } => {\n            storcon_client\n                .dispatch::<(), ()>(\n                    Method::DELETE,\n                    format!(\"control/v1/node/{node_id}/fill\"),\n                    None,\n                )\n                .await?;\n\n            println!(\"Waiting for node {node_id} to quiesce on scheduling policy ...\");\n\n            let final_policy =\n                wait_for_scheduling_policy(storcon_client, node_id, *timeout, |sched| {\n                    use NodeSchedulingPolicy::*;\n                    matches!(sched, Active)\n                })\n                .await?;\n\n            println!(\n                \"Fill was cancelled for node {node_id}. Schedulling policy is now {final_policy:?}\"\n            );\n        }\n        Command::Safekeepers {} => {\n            let mut resp = storcon_client\n                .dispatch::<(), Vec<SafekeeperDescribeResponse>>(\n                    Method::GET,\n                    \"control/v1/safekeeper\".to_string(),\n                    None,\n                )\n                .await?;\n\n            resp.sort_by(|a, b| a.id.cmp(&b.id));\n\n            let mut table = comfy_table::Table::new();\n            table.set_header([\n                \"Id\",\n                \"Version\",\n                \"Host\",\n                \"Port\",\n                \"Http Port\",\n                \"AZ Id\",\n                \"Scheduling\",\n            ]);\n            for sk in resp {\n                table.add_row([\n                    format!(\"{}\", sk.id),\n                    format!(\"{}\", sk.version),\n                    sk.host,\n                    format!(\"{}\", sk.port),\n                    format!(\"{}\", sk.http_port),\n                    sk.availability_zone_id.clone(),\n                    String::from(sk.scheduling_policy),\n                ]);\n            }\n            println!(\"{table}\");\n        }\n        Command::SafekeeperScheduling {\n            node_id,\n            scheduling_policy,\n        } => {\n            let scheduling_policy = scheduling_policy.0;\n            storcon_client\n                .dispatch::<SafekeeperSchedulingPolicyRequest, ()>(\n                    Method::POST,\n                    format!(\"control/v1/safekeeper/{node_id}/scheduling_policy\"),\n                    Some(SafekeeperSchedulingPolicyRequest { scheduling_policy }),\n                )\n                .await?;\n            println!(\n                \"Scheduling policy of {node_id} set to {}\",\n                String::from(scheduling_policy)\n            );\n        }\n        Command::DownloadHeatmapLayers {\n            tenant_shard_id,\n            timeline_id,\n            concurrency,\n        } => {\n            let mut path = format!(\n                \"v1/tenant/{tenant_shard_id}/timeline/{timeline_id}/download_heatmap_layers\",\n            );\n\n            if let Some(c) = concurrency {\n                path = format!(\"{path}?concurrency={c}\");\n            }\n\n            storcon_client\n                .dispatch::<(), ()>(Method::POST, path, None)\n                .await?;\n        }\n        Command::TimelineLocate {\n            tenant_id,\n            timeline_id,\n        } => {\n            let path = format!(\"debug/v1/tenant/{tenant_id}/timeline/{timeline_id}/locate\");\n\n            let resp = storcon_client\n                .dispatch::<(), TimelineLocateResponse>(Method::GET, path, None)\n                .await?;\n\n            let sk_set = resp.sk_set.iter().map(|id| id.0 as i64).collect::<Vec<_>>();\n            let new_sk_set = resp\n                .new_sk_set\n                .as_ref()\n                .map(|ids| ids.iter().map(|id| id.0 as i64).collect::<Vec<_>>());\n\n            println!(\"generation = {}\", resp.generation);\n            println!(\"sk_set = {sk_set:?}\");\n            println!(\"new_sk_set = {new_sk_set:?}\");\n        }\n        Command::TimelineSafekeeperMigrate {\n            tenant_id,\n            timeline_id,\n            new_sk_set,\n        } => {\n            let path = format!(\"v1/tenant/{tenant_id}/timeline/{timeline_id}/safekeeper_migrate\");\n\n            storcon_client\n                .dispatch::<_, ()>(\n                    Method::POST,\n                    path,\n                    Some(TimelineSafekeeperMigrateRequest { new_sk_set }),\n                )\n                .await?;\n        }\n        Command::TimelineSafekeeperMigrateAbort {\n            tenant_id,\n            timeline_id,\n        } => {\n            let path =\n                format!(\"v1/tenant/{tenant_id}/timeline/{timeline_id}/safekeeper_migrate_abort\");\n\n            storcon_client\n                .dispatch::<(), ()>(Method::POST, path, None)\n                .await?;\n        }\n    }\n\n    Ok(())\n}\n\nstatic WATCH_INTERVAL: Duration = Duration::from_secs(5);\n\nasync fn watch_tenant_shard(\n    storcon_client: Client,\n    tenant_shard_id: TenantShardId,\n    until_migrated_to: Option<NodeId>,\n) -> anyhow::Result<()> {\n    if let Some(until_migrated_to) = until_migrated_to {\n        println!(\n            \"Waiting for tenant shard {tenant_shard_id} to be migrated to node {until_migrated_to}\"\n        );\n    }\n\n    loop {\n        let desc = storcon_client\n            .dispatch::<(), TenantDescribeResponse>(\n                Method::GET,\n                format!(\"control/v1/tenant/{}\", tenant_shard_id.tenant_id),\n                None,\n            )\n            .await?;\n\n        // Output the current state of the tenant shard\n        let shard = desc\n            .shards\n            .iter()\n            .find(|s| s.tenant_shard_id == tenant_shard_id)\n            .ok_or(anyhow::anyhow!(\"Tenant shard not found\"))?;\n        let summary = format!(\n            \"attached: {} secondary: {} {}\",\n            shard\n                .node_attached\n                .map(|n| format!(\"{n}\"))\n                .unwrap_or(\"none\".to_string()),\n            shard\n                .node_secondary\n                .iter()\n                .map(|n| n.to_string())\n                .collect::<Vec<_>>()\n                .join(\",\"),\n            if shard.is_reconciling {\n                \"(reconciler active)\"\n            } else {\n                \"(reconciler idle)\"\n            }\n        );\n        println!(\"{summary}\");\n\n        // Maybe drop out if we finished migration\n        if let Some(until_migrated_to) = until_migrated_to {\n            if shard.node_attached == Some(until_migrated_to) && !shard.is_reconciling {\n                println!(\"Tenant shard {tenant_shard_id} is now on node {until_migrated_to}\");\n                break;\n            }\n        }\n\n        tokio::time::sleep(WATCH_INTERVAL).await;\n    }\n    Ok(())\n}\n"
  },
  {
    "path": "deny.toml",
    "content": "# This file was auto-generated using `cargo deny init`.\n# cargo-deny is a cargo plugin that lets you lint your project's\n# dependency graph to ensure all your dependencies conform\n# to your expectations and requirements.\n\n# Root options\n[graph]\ntargets = [\n    { triple = \"x86_64-unknown-linux-gnu\" },\n    { triple = \"aarch64-unknown-linux-gnu\" },\n    { triple = \"aarch64-apple-darwin\" },\n    { triple = \"x86_64-apple-darwin\" },\n]\nall-features = false\nno-default-features = false\n[output]\nfeature-depth = 1\n\n# This section is considered when running `cargo deny check advisories`\n# More documentation for the advisories section can be found here:\n# https://embarkstudios.github.io/cargo-deny/checks/advisories/cfg.html\n[advisories]\ndb-urls = [\"https://github.com/rustsec/advisory-db\"]\nyanked = \"warn\"\n\n[[advisories.ignore]]\nid = \"RUSTSEC-2023-0071\"\nreason = \"the marvin attack only affects private key decryption, not public key signature verification\"\n\n[[advisories.ignore]]\nid = \"RUSTSEC-2024-0436\"\nreason = \"The paste crate is a build-only dependency with no runtime components. It is unlikely to have any security impact.\"\n\n# This section is considered when running `cargo deny check licenses`\n# More documentation for the licenses section can be found here:\n# https://embarkstudios.github.io/cargo-deny/checks/licenses/cfg.html\n[licenses]\nversion = 2\nallow = [\n    \"0BSD\",\n    \"Apache-2.0\",\n    \"BSD-2-Clause\",\n    \"BSD-3-Clause\",\n    \"CC0-1.0\",\n    \"CDDL-1.0\",\n    \"ISC\",\n    \"MIT\",\n    \"MPL-2.0\",\n    \"Unicode-3.0\",\n]\nconfidence-threshold = 0.8\nexceptions = [\n    # Zlib license has some restrictions if we decide to change sth\n    { allow = [\"Zlib\"], name = \"const_format_proc_macros\", version = \"*\" },\n    { allow = [\"Zlib\"], name = \"const_format\", version = \"*\" },\n]\n\n[licenses.private]\nignore = true\nregistries = []\n\n# This section is considered when running `cargo deny check bans`.\n# More documentation about the 'bans' section can be found here:\n# https://embarkstudios.github.io/cargo-deny/checks/bans/cfg.html\n[bans]\nmultiple-versions = \"allow\"\nwildcards = \"allow\"\nhighlight = \"all\"\nworkspace-default-features = \"allow\"\nexternal-default-features = \"allow\"\nallow = []\n\nskip = []\nskip-tree = []\n\n[[bans.deny]]\n# we use tokio, the same rationale applies for async-{io,waker,global-executor,executor,channel,lock}, smol\n# if you find yourself here while adding a dependency, try \"default-features = false\", ask around on #rust\nname = \"async-std\"\n\n[[bans.deny]]\nname = \"async-io\"\n\n[[bans.deny]]\nname = \"async-waker\"\n\n[[bans.deny]]\nname = \"async-global-executor\"\n\n[[bans.deny]]\nname = \"async-executor\"\n\n[[bans.deny]]\nname = \"smol\"\n\n[[bans.deny]]\n# We want to use rustls instead of the platform's native tls implementation.\nname = \"native-tls\"\n\n[[bans.deny]]\nname = \"openssl\"\n\n# This section is considered when running `cargo deny check sources`.\n# More documentation about the 'sources' section can be found here:\n# https://embarkstudios.github.io/cargo-deny/checks/sources/cfg.html\n[sources]\nunknown-registry = \"warn\"\nunknown-git = \"warn\"\nallow-registry = [\"https://github.com/rust-lang/crates.io-index\"]\nallow-git = [\n    # Crate pinned to commit in origin repo due to opentelemetry version.\n    # TODO: Remove this once crate is fetched from crates.io again.\n    \"https://github.com/mattiapenati/tower-otel\",\n]\n\n[sources.allow-org]\ngithub = [\n    \"neondatabase\",\n]\ngitlab = []\nbitbucket = []\n"
  },
  {
    "path": "diesel.toml",
    "content": "# For documentation on how to configure this file,\n# see https://diesel.rs/guides/configuring-diesel-cli\n\n[print_schema]\nfile = \"storage_controller/src/schema.rs\"\ncustom_type_derives = [\"diesel::query_builder::QueryId\"]\n\n[migrations_directory]\ndir = \"storage_controller/migrations\"\n"
  },
  {
    "path": "docker-compose/README.md",
    "content": "# Example docker compose configuration\n\nThe configuration in this directory is used for testing Neon docker images: it is\nnot intended for deploying a usable system. To run a development environment where\nyou can experiment with a miniature Neon system, use `cargo neon` rather than container images.\n\nThis configuration does not start the storage controller, because the controller\nneeds a way to reconfigure running computes, and no such thing exists in this setup.\n\n## Generating the JWKS for a compute\n\n```shell\nopenssl genpkey -algorithm Ed25519 -out private-key.pem\nopenssl pkey -in private-key.pem -pubout -out public-key.pem\nopenssl pkey -pubin -inform pem -in public-key.pem -pubout -outform der -out public-key.der\nkey=\"$(xxd -plain -cols 32 -s -32 public-key.der)\"\nkey_id=\"$(printf '%s' \"$key\" | sha256sum | awk '{ print $1 }' | basenc --base64url --wrap=0)\"\nx=\"$(printf '%s' \"$key\" | basenc --base64url --wrap=0)\"\n```\n"
  },
  {
    "path": "docker-compose/compute_wrapper/Dockerfile",
    "content": "ARG REPOSITORY=ghcr.io/neondatabase\nARG COMPUTE_IMAGE=compute-node-v14\nARG TAG=latest\n\nFROM $REPOSITORY/${COMPUTE_IMAGE}:$TAG\n\nARG COMPUTE_IMAGE\n\nUSER root\nRUN echo 'Acquire::Retries \"5\";' > /etc/apt/apt.conf.d/80-retries && \\\n    apt-get update &&       \\\n    apt-get install -y curl \\\n                       jq   \\\n                       netcat-openbsd\n#This is required for the pg_hintplan test\nRUN mkdir -p /ext-src/pg_hint_plan-src /postgres/contrib/file_fdw /ext-src/postgis-src/ && chown postgres /ext-src/pg_hint_plan-src /postgres/contrib/file_fdw /ext-src/postgis-src\n\nUSER postgres\n"
  },
  {
    "path": "docker-compose/compute_wrapper/private-key.pem",
    "content": "-----BEGIN PRIVATE KEY-----\nMC4CAQAwBQYDK2VwBCIEIOmnRbzt2AJ0d+S3aU1hiYOl/tXpvz1FmWBfwHYBgOma\n-----END PRIVATE KEY-----\n"
  },
  {
    "path": "docker-compose/compute_wrapper/public-key.pem",
    "content": "-----BEGIN PUBLIC KEY-----\nMCowBQYDK2VwAyEADY0al/U0bgB3+9fUGk+3PKWnsck9OyxN5DjHIN6Xep0=\n-----END PUBLIC KEY-----\n"
  },
  {
    "path": "docker-compose/compute_wrapper/shell/compute.sh",
    "content": "#!/usr/bin/env bash\nset -eux\n\n# Generate a random tenant or timeline ID\n#\n# Takes a variable name as argument. The result is stored in that variable.\ngenerate_id() {\n    local -n resvar=${1}\n    printf -v resvar '%08x%08x%08x%08x' ${SRANDOM} ${SRANDOM} ${SRANDOM} ${SRANDOM}\n}\n\nPG_VERSION=${PG_VERSION:-14}\n\nreadonly CONFIG_FILE_ORG=/var/db/postgres/configs/config.json\nreadonly CONFIG_FILE=/tmp/config.json\n\n# Test that the first library path that the dynamic loader looks in is the path\n# that we use for custom compiled software\nfirst_path=\"$(ldconfig --verbose 2>/dev/null \\\n    | grep --invert-match ^$'\\t' \\\n    | cut --delimiter=: --fields=1 \\\n    | head --lines=1)\"\ntest \"${first_path}\" = '/usr/local/lib'\n\necho \"Waiting pageserver become ready.\"\nwhile ! nc -z pageserver 6400; do\n     sleep 1\ndone\necho \"Page server is ready.\"\n\ncp \"${CONFIG_FILE_ORG}\" \"${CONFIG_FILE}\"\n\n if [[ -n \"${TENANT_ID:-}\" && -n \"${TIMELINE_ID:-}\" ]]; then\n   tenant_id=${TENANT_ID}\n   timeline_id=${TIMELINE_ID}\nelse\n  echo \"Check if a tenant present\"\n  PARAMS=(\n       -X GET\n       -H \"Content-Type: application/json\"\n       \"http://pageserver:9898/v1/tenant\"\n  )\n  tenant_id=$(curl \"${PARAMS[@]}\" | jq -r .[0].id)\n  if [[ -z \"${tenant_id}\" || \"${tenant_id}\" = null ]]; then\n    echo \"Create a tenant\"\n    generate_id tenant_id\n    PARAMS=(\n         -X PUT\n         -H \"Content-Type: application/json\"\n         -d \"{\\\"mode\\\": \\\"AttachedSingle\\\", \\\"generation\\\": 1, \\\"tenant_conf\\\": {}}\"\n        \"http://pageserver:9898/v1/tenant/${tenant_id}/location_config\"\n    )\n    result=$(curl \"${PARAMS[@]}\")\n    printf '%s\\n' \"${result}\" | jq .\n  fi\n\n  if [[ \"${RUN_PARALLEL:-false}\" != \"true\" ]]; then\n    echo \"Check if a timeline present\"\n    PARAMS=(\n         -X GET\n         -H \"Content-Type: application/json\"\n        \"http://pageserver:9898/v1/tenant/${tenant_id}/timeline\"\n    )\n    timeline_id=$(curl \"${PARAMS[@]}\" | jq -r .[0].timeline_id)\n  fi\n  if [[ -z \"${timeline_id:-}\" || \"${timeline_id:-}\" = null ]]; then\n    generate_id timeline_id\n    PARAMS=(\n        -sbf\n        -X POST\n        -H \"Content-Type: application/json\"\n        -d \"{\\\"new_timeline_id\\\": \\\"${timeline_id}\\\", \\\"pg_version\\\": ${PG_VERSION}}\"\n        \"http://pageserver:9898/v1/tenant/${tenant_id}/timeline/\"\n    )\n    result=$(curl \"${PARAMS[@]}\")\n    printf '%s\\n' \"${result}\" | jq .\n  fi\nfi\n\nif [[ ${PG_VERSION} -ge 17 ]]; then\n  ulid_extension=pgx_ulid\nelse\n  ulid_extension=ulid\nfi\necho \"Adding pgx_ulid\"\nshared_libraries=$(jq -r '.spec.cluster.settings[] | select(.name==\"shared_preload_libraries\").value' ${CONFIG_FILE})\nsed -i \"s|${shared_libraries}|${shared_libraries},${ulid_extension}|\" ${CONFIG_FILE}\necho \"Overwrite tenant id and timeline id in spec file\"\nsed -i \"s|TENANT_ID|${tenant_id}|\" ${CONFIG_FILE}\nsed -i \"s|TIMELINE_ID|${timeline_id}|\" ${CONFIG_FILE}\n\ncat ${CONFIG_FILE}\n\necho \"Start compute node\"\n/usr/local/bin/compute_ctl --pgdata /var/db/postgres/compute \\\n     -C \"postgresql://cloud_admin@localhost:55433/postgres\"  \\\n     -b /usr/local/bin/postgres                              \\\n     --compute-id \"compute-${RANDOM}\"                          \\\n     --config \"${CONFIG_FILE}\"\n     --dev\n"
  },
  {
    "path": "docker-compose/compute_wrapper/var/db/postgres/configs/config.json",
    "content": "{\n    \"spec\": {\n        \"format_version\": 1.0,\n\n        \"timestamp\": \"2022-10-12T18:00:00.000Z\",\n        \"operation_uuid\": \"0f657b36-4b0f-4a2d-9c2e-1dcd615e7d8c\",\n        \"suspend_timeout_seconds\": -1,\n\n        \"cluster\": {\n            \"cluster_id\": \"docker_compose\",\n            \"name\": \"docker_compose_test\",\n            \"state\": \"restarted\",\n            \"roles\": [\n                {\n                    \"name\": \"cloud_admin\",\n                    \"encrypted_password\": \"b093c0d3b281ba6da1eacc608620abd8\",\n                    \"options\": null\n                }\n            ],\n            \"databases\": [\n            ],\n            \"settings\": [\n                {\n                    \"name\": \"fsync\",\n                    \"value\": \"off\",\n                    \"vartype\": \"bool\"\n                },\n                {\n                    \"name\": \"wal_level\",\n                    \"value\": \"logical\",\n                    \"vartype\": \"enum\"\n                },\n                {\n                    \"name\": \"wal_log_hints\",\n                    \"value\": \"on\",\n                    \"vartype\": \"bool\"\n                },\n                {\n                    \"name\": \"log_connections\",\n                    \"value\": \"on\",\n                    \"vartype\": \"bool\"\n                },\n                {\n                    \"name\": \"port\",\n                    \"value\": \"55433\",\n                    \"vartype\": \"integer\"\n                },\n                {\n                    \"name\": \"shared_buffers\",\n                    \"value\": \"1MB\",\n                    \"vartype\": \"string\"\n                },\n                {\n                    \"name\": \"max_connections\",\n                    \"value\": \"100\",\n                    \"vartype\": \"integer\"\n                },\n                {\n                    \"name\": \"listen_addresses\",\n                    \"value\": \"0.0.0.0\",\n                    \"vartype\": \"string\"\n                },\n                {\n                    \"name\": \"max_wal_senders\",\n                    \"value\": \"10\",\n                    \"vartype\": \"integer\"\n                },\n                {\n                    \"name\": \"max_replication_slots\",\n                    \"value\": \"10\",\n                    \"vartype\": \"integer\"\n                },\n                {\n                    \"name\": \"wal_sender_timeout\",\n                    \"value\": \"5s\",\n                    \"vartype\": \"string\"\n                },\n                {\n                    \"name\": \"wal_keep_size\",\n                    \"value\": \"0\",\n                    \"vartype\": \"integer\"\n                },\n                {\n                    \"name\": \"password_encryption\",\n                    \"value\": \"md5\",\n                    \"vartype\": \"enum\"\n                },\n                {\n                    \"name\": \"restart_after_crash\",\n                    \"value\": \"off\",\n                    \"vartype\": \"bool\"\n                },\n                {\n                    \"name\": \"synchronous_standby_names\",\n                    \"value\": \"walproposer\",\n                    \"vartype\": \"string\"\n                },\n                {\n                    \"name\": \"shared_preload_libraries\",\n                    \"value\": \"neon,pg_cron,timescaledb,pg_stat_statements\",\n                    \"vartype\": \"string\"\n                },\n                {\n                    \"name\": \"neon.safekeepers\",\n                    \"value\": \"safekeeper1:5454,safekeeper2:5454,safekeeper3:5454\",\n                    \"vartype\": \"string\"\n                },\n                {\n                    \"name\": \"neon.timeline_id\",\n                    \"value\": \"TIMELINE_ID\",\n                    \"vartype\": \"string\"\n                },\n                {\n                    \"name\": \"neon.tenant_id\",\n                    \"value\": \"TENANT_ID\",\n                    \"vartype\": \"string\"\n                },\n                {\n                    \"name\": \"neon.pageserver_connstring\",\n                    \"value\": \"host=pageserver port=6400\",\n                    \"vartype\": \"string\"\n                },\n                {\n                    \"name\": \"max_replication_write_lag\",\n                    \"value\": \"500MB\",\n                    \"vartype\": \"string\"\n                },\n                {\n                    \"name\": \"max_replication_flush_lag\",\n                    \"value\": \"10GB\",\n                    \"vartype\": \"string\"\n                },\n                {\n                    \"name\": \"cron.database\",\n                    \"value\": \"postgres\",\n                    \"vartype\": \"string\"\n                }\n            ]\n        },\n\n        \"delta_operations\": [\n        ]\n    },\n    \"compute_ctl_config\": {\n        \"jwks\": {\n            \"keys\": [\n                {\n                    \"use\": \"sig\",\n                    \"key_ops\": [\n                        \"verify\"\n                    ],\n                    \"alg\": \"EdDSA\",\n                    \"kid\": \"ZGIxMzAzOGY0YWQwODk2ODU1MTk1NzMxMDFkYmUyOWU2NzZkOWNjNjMyMGRkZGJjOWY0MjdjYWVmNzE1MjUyOAo=\",\n                    \"kty\": \"OKP\",\n                    \"crv\": \"Ed25519\",\n                    \"x\": \"MGQ4ZDFhOTdmNTM0NmUwMDc3ZmJkN2Q0MWE0ZmI3M2NhNWE3YjFjOTNkM2IyYzRkZTQzOGM3MjBkZTk3N2E5ZAo=\"\n                }\n            ]\n        }\n    }\n}\n"
  },
  {
    "path": "docker-compose/docker-compose.yml",
    "content": "services:\n  minio:\n    restart: always\n    image: quay.io/minio/minio:RELEASE.2022-10-20T00-55-09Z\n    ports:\n      - 9000:9000\n      - 9001:9001\n    environment:\n      - MINIO_ROOT_USER=minio\n      - MINIO_ROOT_PASSWORD=password\n    command: server /data --address :9000 --console-address \":9001\"\n\n  minio_create_buckets:\n    image: minio/mc\n    environment:\n      - MINIO_ROOT_USER=minio\n      - MINIO_ROOT_PASSWORD=password\n    entrypoint:\n      - \"/bin/sh\"\n      - \"-c\"\n    command:\n      - \"until (/usr/bin/mc alias set minio http://minio:9000 $$MINIO_ROOT_USER $$MINIO_ROOT_PASSWORD) do\n             echo 'Waiting to start minio...' && sleep 1;\n         done;\n         /usr/bin/mc mb minio/neon --region=eu-north-1;\n         exit 0;\"\n    depends_on:\n      - minio\n\n  pageserver:\n    restart: always\n    image: ${REPOSITORY:-ghcr.io/neondatabase}/neon:${TAG:-latest}\n    environment:\n      - AWS_ACCESS_KEY_ID=minio\n      - AWS_SECRET_ACCESS_KEY=password\n      #- RUST_BACKTRACE=1\n    ports:\n       #- 6400:6400  # pg protocol handler\n       - 9898:9898 # http endpoints\n    volumes:\n      - ./pageserver_config:/data/.neon/\n    depends_on:\n      - storage_broker\n      - minio_create_buckets\n\n  safekeeper1:\n    restart: always\n    image: ${REPOSITORY:-ghcr.io/neondatabase}/neon:${TAG:-latest}\n    environment:\n      - SAFEKEEPER_ADVERTISE_URL=safekeeper1:5454\n      - SAFEKEEPER_ID=1\n      - BROKER_ENDPOINT=http://storage_broker:50051\n      - AWS_ACCESS_KEY_ID=minio\n      - AWS_SECRET_ACCESS_KEY=password\n      #- RUST_BACKTRACE=1\n    ports:\n      #- 5454:5454 # pg protocol handler\n      - 7676:7676 # http endpoints\n    entrypoint:\n      - \"/bin/sh\"\n      - \"-c\"\n    command:\n      - \"safekeeper --listen-pg=$$SAFEKEEPER_ADVERTISE_URL\n                    --listen-http='0.0.0.0:7676'\n                    --id=$$SAFEKEEPER_ID\n                    --broker-endpoint=$$BROKER_ENDPOINT\n                    -D /data\n                    --remote-storage=\\\"{endpoint='http://minio:9000',\n                                        bucket_name='neon',\n                                        bucket_region='eu-north-1',\n                                        prefix_in_bucket='/safekeeper/'}\\\"\"\n    depends_on:\n      - storage_broker\n      - minio_create_buckets\n\n  safekeeper2:\n    restart: always\n    image: ${REPOSITORY:-ghcr.io/neondatabase}/neon:${TAG:-latest}\n    environment:\n      - SAFEKEEPER_ADVERTISE_URL=safekeeper2:5454\n      - SAFEKEEPER_ID=2\n      - BROKER_ENDPOINT=http://storage_broker:50051\n      - AWS_ACCESS_KEY_ID=minio\n      - AWS_SECRET_ACCESS_KEY=password\n      #- RUST_BACKTRACE=1\n    ports:\n      #- 5454:5454 # pg protocol handler\n      - 7677:7676 # http endpoints\n    entrypoint:\n      - \"/bin/sh\"\n      - \"-c\"\n    command:\n      - \"safekeeper --listen-pg=$$SAFEKEEPER_ADVERTISE_URL\n                    --listen-http='0.0.0.0:7676'\n                    --id=$$SAFEKEEPER_ID\n                    --broker-endpoint=$$BROKER_ENDPOINT\n                    -D /data\n                    --remote-storage=\\\"{endpoint='http://minio:9000',\n                                        bucket_name='neon',\n                                        bucket_region='eu-north-1',\n                                        prefix_in_bucket='/safekeeper/'}\\\"\"\n    depends_on:\n      - storage_broker\n      - minio_create_buckets\n\n  safekeeper3:\n    restart: always\n    image: ${REPOSITORY:-ghcr.io/neondatabase}/neon:${TAG:-latest}\n    environment:\n      - SAFEKEEPER_ADVERTISE_URL=safekeeper3:5454\n      - SAFEKEEPER_ID=3\n      - BROKER_ENDPOINT=http://storage_broker:50051\n      - AWS_ACCESS_KEY_ID=minio\n      - AWS_SECRET_ACCESS_KEY=password\n      #- RUST_BACKTRACE=1\n    ports:\n      #- 5454:5454 # pg protocol handler\n      - 7678:7676 # http endpoints\n    entrypoint:\n      - \"/bin/sh\"\n      - \"-c\"\n    command:\n      - \"safekeeper --listen-pg=$$SAFEKEEPER_ADVERTISE_URL\n                    --listen-http='0.0.0.0:7676'\n                    --id=$$SAFEKEEPER_ID\n                    --broker-endpoint=$$BROKER_ENDPOINT\n                    -D /data\n                    --remote-storage=\\\"{endpoint='http://minio:9000',\n                                        bucket_name='neon',\n                                        bucket_region='eu-north-1',\n                                        prefix_in_bucket='/safekeeper/'}\\\"\"\n    depends_on:\n      - storage_broker\n      - minio_create_buckets\n\n  storage_broker:\n    restart: always\n    image: ${REPOSITORY:-ghcr.io/neondatabase}/neon:${TAG:-latest}\n    ports:\n      - 50051:50051\n    command:\n      - \"storage_broker\"\n      - \"--listen-addr=0.0.0.0:50051\"\n\n  compute1:\n    restart: always\n    build:\n      context: ./compute_wrapper/\n      args:\n        - REPOSITORY=${REPOSITORY:-ghcr.io/neondatabase}\n        - COMPUTE_IMAGE=compute-node-v${PG_VERSION:-16}\n        - TAG=${COMPUTE_TAG:-${TAG:-latest}}\n        - http_proxy=${http_proxy:-}\n        - https_proxy=${https_proxy:-}\n    image: built-compute\n    environment:\n      - PG_VERSION=${PG_VERSION:-16}\n      - TENANT_ID=${TENANT_ID:-}\n      - TIMELINE_ID=${TIMELINE_ID:-}\n      #- RUST_BACKTRACE=1\n    # Mount the test files directly, for faster editing cycle.\n    volumes:\n      - ./compute_wrapper/var/db/postgres/configs/:/var/db/postgres/configs/\n      - ./compute_wrapper/shell/:/shell/\n    ports:\n      - 55433:55433 # pg protocol handler\n      - 3080:3080 # http endpoints\n    entrypoint:\n      - \"/shell/compute.sh\"\n    # Ad an alias for compute1 for compatibility\n    networks:\n      default:\n        aliases:\n            - compute\n    depends_on:\n      - safekeeper1\n      - safekeeper2\n      - safekeeper3\n      - pageserver\n\n  compute_is_ready:\n    image: postgres:latest\n    environment:\n      - PARALLEL_COMPUTES=1\n    entrypoint:\n      - \"/bin/sh\"\n      - \"-c\"\n    command:\n      - \"for i in $(seq 1 $${PARALLEL_COMPUTES}); do\n           until pg_isready -h compute$$i -p 55433 -U cloud_admin ; do\n             sleep 1;\n           done;\n         done;\n         echo All computes are started\"\n    depends_on:\n      - compute1\n\n  neon-test-extensions:\n    profiles: [\"test-extensions\"]\n    image: ${REPOSITORY:-ghcr.io/neondatabase}/neon-test-extensions-v${PG_TEST_VERSION:-${PG_VERSION:-16}}:${TEST_EXTENSIONS_TAG:-${TAG:-latest}}\n    environment:\n      - PGUSER=${PGUSER:-cloud_admin}\n      - PGPASSWORD=${PGPASSWORD:-cloud_admin}\n    entrypoint:\n      - \"/bin/bash\"\n      - \"-c\"\n    command:\n      - sleep 3600\n    depends_on:\n      - compute1\n"
  },
  {
    "path": "docker-compose/docker_compose_test.sh",
    "content": "#!/usr/bin/env bash\n\n# A basic test to ensure Docker images are built correctly.\n# Build a wrapper around the compute, start all services and runs a simple SQL query.\n# Repeats the process for all currenly supported Postgres versions.\n\n# Implicitly accepts `REPOSITORY` and `TAG` env vars that are passed into the compose file\n# Their defaults point at DockerHub `neondatabase/neon:latest` image.`,\n# to verify custom image builds (e.g pre-published ones).\n#\n# A test script for postgres extensions\n# Currently supports only v16+\n#\nset -eux -o pipefail\n\ncd \"$(dirname \"${0}\")\"\nexport COMPOSE_FILE='docker-compose.yml'\nexport COMPOSE_PROFILES=test-extensions\nexport PARALLEL_COMPUTES=${PARALLEL_COMPUTES:-1}\nREADY_MESSAGE=\"All computes are started\"\nCOMPUTES=()\nfor i in $(seq 1 \"${PARALLEL_COMPUTES}\"); do\n  COMPUTES+=(\"compute${i}\")\ndone\nCURRENT_TMPDIR=$(mktemp -d)\ntrap 'rm -rf ${CURRENT_TMPDIR} docker-compose-parallel.yml' EXIT\nif [[ ${PARALLEL_COMPUTES} -gt 1 ]]; then\n  export COMPOSE_FILE=docker-compose-parallel.yml\n  cp docker-compose.yml docker-compose-parallel.yml\n  # Replace the environment variable PARALLEL_COMPUTES with the actual value\n  yq eval -i \".services.compute_is_ready.environment |=  map(select(. | test(\\\"^PARALLEL_COMPUTES=\\\") | not)) + [\\\"PARALLEL_COMPUTES=${PARALLEL_COMPUTES}\\\"]\" ${COMPOSE_FILE}\n  for i in $(seq 2 \"${PARALLEL_COMPUTES}\"); do\n    # Duplicate compute1 as compute${i} for parallel execution\n    yq eval -i \".services.compute${i} = .services.compute1\" ${COMPOSE_FILE}\n    # We don't need these sections, so delete them\n    yq eval -i \"(del .services.compute${i}.build) | (del .services.compute${i}.ports) | (del .services.compute${i}.networks)\" ${COMPOSE_FILE}\n    # Let the compute 1 be the only dependence\n    yq eval -i \".services.compute${i}.depends_on = [\\\"compute1\\\"]\" ${COMPOSE_FILE}\n    # Set RUN_PARALLEL=true for compute2. They will generate tenant_id and timeline_id to avoid using the same as other computes\n    yq eval -i \".services.compute${i}.environment += [\\\"RUN_PARALLEL=true\\\"]\" ${COMPOSE_FILE}\n    # Remove TENANT_ID and TIMELINE_ID from the environment variables of the generated computes\n    # They will create new TENANT_ID and TIMELINE_ID anyway.\n    yq eval -i \".services.compute${i}.environment |= map(select(. | (test(\\\"^TENANT_ID=\\\") or test(\\\"^TIMELINE_ID=\\\")) | not))\" ${COMPOSE_FILE}\n  done\nfi\nPSQL_OPTION=\"-h localhost -U cloud_admin -p 55433 -d postgres\"\n\nfunction cleanup() {\n    echo \"show container information\"\n    docker ps\n    echo \"stop containers...\"\n    docker compose down\n}\n\nfor pg_version in ${TEST_VERSION_ONLY-14 15 16 17}; do\n    pg_version=${pg_version/v/}\n    echo \"clean up containers if exist\"\n    cleanup\n    PG_TEST_VERSION=$((pg_version < 16 ? 16 : pg_version))\n    PG_VERSION=${pg_version} PG_TEST_VERSION=${PG_TEST_VERSION} docker compose build compute1\n    PG_VERSION=${pg_version} PG_TEST_VERSION=${PG_TEST_VERSION} docker compose up --quiet-pull -d\n    echo \"wait until the compute is ready. timeout after 60s. \"\n    cnt=0\n    while sleep 3; do\n        # check timeout\n        (( cnt += 3 ))\n        if [[ ${cnt} -gt 60 ]]; then\n            echo \"timeout before the compute is ready.\"\n            exit 1\n        fi\n        if docker compose logs compute_is_ready | grep -q \"${READY_MESSAGE}\"; then\n            echo \"OK. The compute is ready to connect.\"\n            echo \"execute simple queries.\"\n            for compute in \"${COMPUTES[@]}\"; do\n              docker compose exec \"${compute}\" /bin/bash -c \"psql ${PSQL_OPTION} -c 'SELECT 1'\"\n            done\n            break\n        fi\n    done\n\n    if [[ ${pg_version} -ge 16 ]]; then\n        mkdir \"${CURRENT_TMPDIR}\"/{pg_hint_plan-src,file_fdw,postgis-src}\n        docker compose cp neon-test-extensions:/ext-src/postgis-src/raster/test \"${CURRENT_TMPDIR}/postgis-src/test\"\n        docker compose cp neon-test-extensions:/ext-src/postgis-src/regress/00-regress-install \"${CURRENT_TMPDIR}/postgis-src/00-regress-install\"\n        docker compose cp neon-test-extensions:/ext-src/pg_hint_plan-src/data \"${CURRENT_TMPDIR}/pg_hint_plan-src/data\"\n        docker compose cp neon-test-extensions:/postgres/contrib/file_fdw/data \"${CURRENT_TMPDIR}/file_fdw/data\"\n\n        for compute in \"${COMPUTES[@]}\"; do\n          # This is required for the pg_hint_plan test, to prevent flaky log message causing the test to fail\n          # It cannot be moved to Dockerfile now because the database directory is created after the start of the container\n          echo Adding dummy config on \"${compute}\"\n          docker compose exec \"${compute}\" touch /var/db/postgres/compute/compute_ctl_temp_override.conf\n          # Prepare for the PostGIS test\n          docker compose exec \"${compute}\" mkdir -p /tmp/pgis_reg/pgis_reg_tmp /ext-src/postgis-src/raster /ext-src/postgis-src/regress /ext-src/postgis-src/regress/00-regress-install\n          docker compose cp \"${CURRENT_TMPDIR}/postgis-src/test\" \"${compute}\":/ext-src/postgis-src/raster/test\n          docker compose cp \"${CURRENT_TMPDIR}/postgis-src/00-regress-install\" \"${compute}\":/ext-src/postgis-src/regress\n          # The following block copies the files for the pg_hintplan test to the compute node for the extension test in an isolated docker-compose environment\n          docker compose cp \"${CURRENT_TMPDIR}/pg_hint_plan-src/data\" \"${compute}\":/ext-src/pg_hint_plan-src/\n          # The following block does the same for the contrib/file_fdw test\n          docker compose cp \"${CURRENT_TMPDIR}/file_fdw/data\" \"${compute}\":/postgres/contrib/file_fdw/data\n        done\n        # Apply patches\n        docker compose exec -T neon-test-extensions bash -c \"(cd /postgres && patch -p1)\" <\"../compute/patches/contrib_pg${pg_version}.patch\"\n        # We are running tests now\n        rm -f testout.txt testout_contrib.txt\n        # We want to run the longest tests first to better utilize parallelization and reduce overall test time.\n        # Tests listed in the RUN_FIRST variable will be run before others.\n        # If parallelization is not used, this environment variable will be ignored.\n\n        docker compose exec -e USE_PGXS=1 -e SKIP=timescaledb-src,rdkit-src,pg_jsonschema-src,kq_imcx-src,wal2json_2_5-src,rag_jina_reranker_v1_tiny_en-src,rag_bge_small_en_v15-src \\\n        -e RUN_FIRST=hll-src,postgis-src,pgtap-src -e PARALLEL_COMPUTES=\"${PARALLEL_COMPUTES}\" \\\n        neon-test-extensions /run-tests.sh /ext-src | tee testout.txt && EXT_SUCCESS=1 || EXT_SUCCESS=0\n        docker compose exec -e SKIP=start-scripts,postgres_fdw,ltree_plpython,jsonb_plpython,jsonb_plperl,hstore_plpython,hstore_plperl,dblink,bool_plperl \\\n        -e PARALLEL_COMPUTES=\"${PARALLEL_COMPUTES}\" \\\n        neon-test-extensions /run-tests.sh /postgres/contrib | tee testout_contrib.txt && CONTRIB_SUCCESS=1 || CONTRIB_SUCCESS=0\n        if [[ ${EXT_SUCCESS} -eq 0 || ${CONTRIB_SUCCESS} -eq 0 ]]; then\n            CONTRIB_FAILED=\n            FAILED=\n            [[ ${EXT_SUCCESS} -eq 0 ]] && FAILED=$(tail -1 testout.txt | awk '{for(i=1;i<=NF;i++){print \"/ext-src/\"$i;}}')\n            [[ ${CONTRIB_SUCCESS} -eq 0 ]] && CONTRIB_FAILED=$(tail -1 testout_contrib.txt | awk '{for(i=0;i<=NF;i++){print \"/postgres/contrib/\"$i;}}')\n            for d in ${FAILED} ${CONTRIB_FAILED}; do\n                docker compose exec neon-test-extensions bash -c 'for file in $(find '\"${d}\"' -name regression.diffs -o -name regression.out); do cat ${file}; done' || [[ ${?} -eq 1 ]]\n            done\n        exit 1\n        fi\n    fi\ndone\n"
  },
  {
    "path": "docker-compose/ext-src/README.md",
    "content": "# PostgreSQL Extensions for Testing\n\nThis directory contains PostgreSQL extensions used primarily for:\n1. Testing extension upgrades between different Compute versions\n2. Running regression tests with regular users (mostly for cloud instances)\n\n## Directory Structure\n\nEach extension directory follows a standard structure:\n\n- `extension-name-src/` - Directory containing test files for the extension\n  - `test-upgrade.sh` - Script for testing upgrade scenarios\n  - `regular-test.sh` - Script for testing with regular users\n  - Additional test files depending on the extension\n\n## Available Extensions\n\nThis directory includes the following extensions:\n\n- `hll-src` - HyperLogLog, a fixed-size data structure for approximating cardinality\n- `hypopg-src` - Extension to create hypothetical indexes\n- `ip4r-src` - IPv4/v6 and subnet data types\n- `pg_cron-src` - Run periodic jobs in PostgreSQL\n- `pg_graphql-src` - GraphQL support for PostgreSQL\n- `pg_hint_plan-src` - Execution plan hints\n- `pg_ivm-src` - Incremental view maintenance\n- `pg_jsonschema-src` - JSON Schema validation\n- `pg_repack-src` - Reorganize tables with minimal locks\n- `pg_roaringbitmap-src` - Roaring bitmap implementation\n- `pg_semver-src` - Semantic version data type\n- `pg_session_jwt-src` - JWT authentication for PostgreSQL\n- `pg_tiktoken-src` - OpenAI Tiktoken tokenizer\n- `pg_uuidv7-src` - UUIDv7 implementation for PostgreSQL\n- `pgjwt-src` - JWT tokens for PostgreSQL\n- `pgrag-src` - Retrieval Augmented Generation for PostgreSQL\n- `pgtap-src` - Unit testing framework for PostgreSQL\n- `pgvector-src` - Vector similarity search\n- `pgx_ulid-src` - ULID data type\n- `plv8-src` - JavaScript language for PostgreSQL stored procedures\n- `postgresql-unit-src` - SI units for PostgreSQL\n- `prefix-src` - Prefix matching for strings\n- `rag_bge_small_en_v15-src` - BGE embedding model for RAG\n- `rag_jina_reranker_v1_tiny_en-src` - Jina reranker model for RAG\n- `rum-src` - RUM access method for text search\n\n## Usage\n\n### Extension Upgrade Testing\n\nThe extensions in this directory are used by the `test-upgrade.sh` script to test upgrading extensions between different versions of Neon Compute nodes. The script:\n\n1. Creates a database with extensions installed on an old Compute version\n2. Creates timelines for each extension\n3. Switches to a new Compute version and tests the upgrade process\n4. Verifies extension functionality after upgrade\n\n### Regular User Testing\n\nFor testing with regular users (particularly for cloud instances), each extension directory typically contains a `regular-test.sh` script that:\n\n1. Drops the database if it exists\n2. Creates a fresh test database\n3. Installs the extension\n4. Runs regression tests\n\nA note about pg_regress: Since pg_regress attempts to set `lc_messages` for the database by default, which is forbidden for regular users, we create databases manually and use the `--use-existing` option to bypass this limitation.\n\n### CI Workflows\n\nTwo main workflows use these extensions:\n\n1. **Cloud Extensions Test** - Tests extensions on Neon cloud projects\n2. **Force Test Upgrading of Extension** - Tests upgrading extensions between different Compute versions\n\nThese workflows are integrated into the build-and-test pipeline through shell scripts:\n\n- `docker_compose_test.sh` - Tests extensions in a Docker Compose environment\n       \n- `test_extensions_upgrade.sh` - Tests extension upgrades between different Compute versions\n\n## Adding New Extensions\n\nTo add a new extension for testing:\n\n1. Create a directory named `extension-name-src` in this directory\n2. Add at minimum:\n   - `regular-test.sh` for testing with regular users\n   - If `regular-test.sh` doesn't exist, the system will look for `neon-test.sh`\n   - If neither exists, it will try to run `make installcheck`\n   - `test-upgrade.sh` is only needed if you want to test upgrade scenarios\n3. Update the list of extensions in the `test_extensions_upgrade.sh` script if needed for upgrade testing\n\n### Patching Extension Sources\n\nIf you need to patch the extension sources:\n\n1. Place the patch file in the extension's directory\n2. Apply the patch in the appropriate script (`test-upgrade.sh`, `neon-test.sh`, `regular-test.sh`, or `Makefile`)\n3. The patch will be applied during the testing process\n"
  },
  {
    "path": "docker-compose/ext-src/alter_db.sh",
    "content": "#!/bin/bash\n# We need these settings to get the expected output results.\n# We cannot use the environment variables e.g. PGTZ due to\n# https://github.com/neondatabase/neon/issues/1287\nexport DATABASE=${1:-contrib_regression}\npsql -c \"ALTER DATABASE ${DATABASE} SET neon.allow_unstable_extensions='on'\" \\\n     -c \"ALTER DATABASE ${DATABASE} SET DateStyle='Postgres,MDY'\" \\\n     -c \"ALTER DATABASE ${DATABASE} SET TimeZone='America/Los_Angeles'\" \\\n"
  },
  {
    "path": "docker-compose/ext-src/h3-pg-src/neon-test.sh",
    "content": "#!/usr/bin/env bash\nset -ex\ncd \"$(dirname \"${0}\")\"\nPG_REGRESS=$(dirname \"$(pg_config --pgxs)\")/../test/regress/pg_regress\ndropdb --if-exists contrib_regression\ncreatedb contrib_regression\ncd h3_postgis/test\npsql -d contrib_regression -c \"CREATE EXTENSION postgis\" -c \"CREATE EXTENSION postgis_raster\" -c \"CREATE EXTENSION h3\" -c \"CREATE EXTENSION h3_postgis\"\nTESTS=$(echo sql/* | sed 's|sql/||g; s|\\.sql||g')\n${PG_REGRESS} --use-existing --dbname contrib_regression ${TESTS}\ncd ../../h3/test\nTESTS=$(echo sql/* | sed 's|sql/||g; s|\\.sql||g')\ndropdb --if-exists contrib_regression\ncreatedb contrib_regression\npsql -d contrib_regression -c \"CREATE EXTENSION h3\"\n${PG_REGRESS} --use-existing --dbname contrib_regression ${TESTS}\n"
  },
  {
    "path": "docker-compose/ext-src/h3-pg-src/test-upgrade.sh",
    "content": "#!/bin/sh\nset -ex\ncd \"$(dirname ${0})\"\nPG_REGRESS=$(dirname \"$(pg_config --pgxs)\")/../test/regress/pg_regress\ncd h3/test\nTESTS=$(echo sql/* | sed 's|sql/||g; s|\\.sql||g')\n${PG_REGRESS} --use-existing --inputdir=./ --bindir='/usr/local/pgsql/bin'  --dbname=contrib_regression  ${TESTS}"
  },
  {
    "path": "docker-compose/ext-src/hll-src/regular-test.sh",
    "content": "#!/bin/sh\nset -ex\ncd \"$(dirname ${0})\"\nPG_REGRESS=$(dirname \"$(pg_config --pgxs)\")/../test/regress/pg_regress\ndropdb --if-exists contrib_regression\ncreatedb contrib_regression\n${PG_REGRESS} --use-existing --inputdir=./ --bindir='/usr/local/pgsql/bin'    --dbname=contrib_regression setup add_agg agg_oob auto_sparse card_op cast_shape copy_binary cumulative_add_cardinality_correction cumulative_add_comprehensive_promotion cumulative_add_sparse_edge cumulative_add_sparse_random cumulative_add_sparse_step cumulative_union_comprehensive cumulative_union_explicit_explicit cumulative_union_explicit_promotion cumulative_union_probabilistic_probabilistic cumulative_union_sparse_full_representation cumulative_union_sparse_promotion cumulative_union_sparse_sparse disable_hashagg equal explicit_thresh hash hash_any meta_func murmur_bigint murmur_bytea nosparse notequal scalar_oob storedproc transaction typmod typmod_insert union_op"
  },
  {
    "path": "docker-compose/ext-src/hll-src/test-upgrade.sh",
    "content": "#!/bin/sh\nset -ex\ncd \"$(dirname ${0})\"\nPG_REGRESS=$(dirname \"$(pg_config --pgxs)\")/../test/regress/pg_regress\n${PG_REGRESS} --use-existing --inputdir=./ --bindir='/usr/local/pgsql/bin'    --dbname=contrib_regression add_agg agg_oob auto_sparse card_op cast_shape copy_binary cumulative_add_cardinality_correction cumulative_add_comprehensive_promotion cumulative_add_sparse_edge cumulative_add_sparse_random cumulative_add_sparse_step cumulative_union_comprehensive cumulative_union_explicit_explicit cumulative_union_explicit_promotion cumulative_union_probabilistic_probabilistic cumulative_union_sparse_full_representation cumulative_union_sparse_promotion cumulative_union_sparse_sparse disable_hashagg equal explicit_thresh hash hash_any meta_func murmur_bigint murmur_bytea nosparse notequal scalar_oob storedproc transaction typmod typmod_insert union_op"
  },
  {
    "path": "docker-compose/ext-src/hypopg-src/regular-test.sh",
    "content": "#!/bin/sh\nset -ex\ncd \"$(dirname ${0})\"\ndropdb --if-exists contrib_regression\ncreatedb contrib_regression\nPG_REGRESS=$(dirname \"$(pg_config --pgxs)\")/../test/regress/pg_regress\n${PG_REGRESS} --inputdir=./ --bindir='/usr/local/pgsql/bin' --use-existing --inputdir=test --dbname=contrib_regression hypopg hypo_brin hypo_index_part hypo_include hypo_hash hypo_hide_index"
  },
  {
    "path": "docker-compose/ext-src/hypopg-src/test-upgrade.patch",
    "content": "diff --git a/expected/hypopg.out b/expected/hypopg.out\nindex 90121d0..859260b 100644\n--- a/expected/hypopg.out\n+++ b/expected/hypopg.out\n@@ -11,7 +11,8 @@ BEGIN\n END;\n $_$\n LANGUAGE plpgsql;\n-CREATE EXTENSION hypopg;\n+CREATE EXTENSION IF NOT EXISTS hypopg;\n+NOTICE:  extension \"hypopg\" already exists, skipping\n CREATE TABLE hypo (id integer, val text, \"Id2\" bigint);\n INSERT INTO hypo SELECT i, 'line ' || i\n FROM generate_series(1,100000) f(i);\ndiff --git a/test/sql/hypopg.sql b/test/sql/hypopg.sql\nindex 99722b0..8d6bacb 100644\n--- a/test/sql/hypopg.sql\n+++ b/test/sql/hypopg.sql\n@@ -12,7 +12,7 @@ END;\n $_$\n LANGUAGE plpgsql;\n\n-CREATE EXTENSION hypopg;\n+CREATE EXTENSION IF NOT EXISTS hypopg;\n\n CREATE TABLE hypo (id integer, val text, \"Id2\" bigint);\n\n"
  },
  {
    "path": "docker-compose/ext-src/hypopg-src/test-upgrade.sh",
    "content": "#!/bin/sh\nset -ex\ncd \"$(dirname ${0})\"\npatch -p1 <test-upgrade.patch\nPG_REGRESS=$(dirname \"$(pg_config --pgxs)\")/../test/regress/pg_regress\n${PG_REGRESS} --inputdir=./ --bindir='/usr/local/pgsql/bin' --use-existing --inputdir=test --dbname=contrib_regression hypopg hypo_brin hypo_index_part hypo_include hypo_hash hypo_hide_index"
  },
  {
    "path": "docker-compose/ext-src/ip4r-src/regular-test.sh",
    "content": "#!/bin/sh\nset -ex\ncd \"$(dirname ${0})\"\ndropdb --if-exist contrib_regression\ncreatedb contrib_regression\nPG_REGRESS=$(dirname \"$(pg_config --pgxs)\")/../test/regress/pg_regress\n${PG_REGRESS} --use-existing --inputdir=./ --bindir='/usr/local/pgsql/bin'    --dbname=contrib_regression ip4r ip4r-softerr ip4r-v11"
  },
  {
    "path": "docker-compose/ext-src/ip4r-src/test-upgrade.patch",
    "content": "diff --git a/expected/ip4r.out b/expected/ip4r.out\nindex 7527af3..b38ed29 100644\n--- a/expected/ip4r.out\n+++ b/expected/ip4r.out\n@@ -1,6 +1,5 @@\n --\n /*CUT-HERE*/\n-CREATE EXTENSION ip4r;\n -- Check whether any of our opclasses fail amvalidate\n DO $d$\n   DECLARE\ndiff --git a/sql/ip4r.sql b/sql/ip4r.sql\nindex 65c49ec..24ade09 100644\n--- a/sql/ip4r.sql\n+++ b/sql/ip4r.sql\n@@ -1,7 +1,6 @@\n --\n\n /*CUT-HERE*/\n-CREATE EXTENSION ip4r;\n\n -- Check whether any of our opclasses fail amvalidate\n\n"
  },
  {
    "path": "docker-compose/ext-src/ip4r-src/test-upgrade.sh",
    "content": "#!/bin/sh\nset -ex\ncd \"$(dirname ${0})\"\npatch -p1 <test-upgrade.patch\nPG_REGRESS=$(dirname \"$(pg_config --pgxs)\")/../test/regress/pg_regress\n${PG_REGRESS} --use-existing --inputdir=./ --bindir='/usr/local/pgsql/bin'    --dbname=contrib_regression ip4r ip4r-softerr ip4r-v11"
  },
  {
    "path": "docker-compose/ext-src/online_advisor-src/neon-test.sh",
    "content": "#!/bin/sh\nset -ex\ncd \"$(dirname \"${0}\")\"\nif [ -f Makefile ]; then\n  make installcheck\nfi\n"
  },
  {
    "path": "docker-compose/ext-src/online_advisor-src/regular-test.sh",
    "content": "#!/bin/sh\nset -ex\ncd \"$(dirname ${0})\"\n[ -f Makefile ] || exit 0\ndropdb --if-exist contrib_regression\ncreatedb contrib_regression\nPG_REGRESS=$(dirname \"$(pg_config --pgxs)\")/../test/regress/pg_regress\nTESTS=$(echo sql/* | sed 's|sql/||g; s|\\.sql||g')\n${PG_REGRESS} --use-existing --inputdir=./ --bindir='/usr/local/pgsql/bin' --dbname=contrib_regression ${TESTS}\n"
  },
  {
    "path": "docker-compose/ext-src/pg_cron-src/regular-test.sh",
    "content": "#!/bin/sh\nset -ex\ncd \"$(dirname ${0})\"\ndropdb --if-exist contrib_regression\ncreatedb contrib_regression\nPG_REGRESS=$(dirname \"$(pg_config --pgxs)\")/../test/regress/pg_regress\n${PG_REGRESS} --use-existing --inputdir=./ --bindir='/usr/local/pgsql/bin'    --dbname=contrib_regression pg_cron-test"
  },
  {
    "path": "docker-compose/ext-src/pg_cron-src/test-upgrade.patch",
    "content": "diff --git a/expected/pg_cron-test.out b/expected/pg_cron-test.out\nindex d79d542..1663886 100644\n--- a/expected/pg_cron-test.out\n+++ b/expected/pg_cron-test.out\n@@ -1,30 +1,3 @@\n-CREATE EXTENSION pg_cron VERSION '1.0';\n-SELECT extversion FROM pg_extension WHERE extname='pg_cron';\n- extversion \n-------------\n- 1.0\n-(1 row)\n-\n--- Test binary compatibility with v1.4 function signature.\n-ALTER EXTENSION pg_cron UPDATE TO '1.4';\n-SELECT cron.unschedule(job_name := 'no_such_job');\n-ERROR:  could not find valid entry for job 'no_such_job'\n-SELECT cron.schedule('testjob', '* * * * *', 'SELECT 1');\n- schedule \n-----------\n-        1\n-(1 row)\n-\n-SELECT cron.unschedule('testjob');\n- unschedule \n-------------\n- t\n-(1 row)\n-\n--- Test cache invalidation\n-DROP EXTENSION pg_cron;\n-CREATE EXTENSION pg_cron VERSION '1.4';\n-ALTER EXTENSION pg_cron UPDATE;\n -- Vacuum every day at 10:00am (GMT)\n SELECT cron.schedule('0 10 * * *', 'VACUUM');\n  schedule \n@@ -300,8 +273,3 @@ SELECT jobid, jobname, schedule, command FROM cron.job ORDER BY jobid;\n SELECT cron.schedule('bad-last-dom-job1', '0 11 $foo * *', 'VACUUM FULL');\n ERROR:  invalid schedule: 0 11 $foo * *\n HINT:  Use cron format (e.g. 5 4 * * *), or interval format '[1-59] seconds'\n--- cleaning\n-DROP EXTENSION pg_cron;\n-drop user pgcron_cront;\n-drop database pgcron_dbno;\n-drop database pgcron_dbyes;\ndiff --git a/sql/pg_cron-test.sql b/sql/pg_cron-test.sql\nindex 45f94d9..241cf73 100644\n--- a/sql/pg_cron-test.sql\n+++ b/sql/pg_cron-test.sql\n@@ -1,17 +1,3 @@\n-CREATE EXTENSION pg_cron VERSION '1.0';\n-SELECT extversion FROM pg_extension WHERE extname='pg_cron';\n--- Test binary compatibility with v1.4 function signature.\n-ALTER EXTENSION pg_cron UPDATE TO '1.4';\n-SELECT cron.unschedule(job_name := 'no_such_job');\n-SELECT cron.schedule('testjob', '* * * * *', 'SELECT 1');\n-SELECT cron.unschedule('testjob');\n-\n--- Test cache invalidation\n-DROP EXTENSION pg_cron;\n-CREATE EXTENSION pg_cron VERSION '1.4';\n-\n-ALTER EXTENSION pg_cron UPDATE;\n-\n -- Vacuum every day at 10:00am (GMT)\n SELECT cron.schedule('0 10 * * *', 'VACUUM');\n \n@@ -156,8 +142,3 @@ SELECT jobid, jobname, schedule, command FROM cron.job ORDER BY jobid;\n -- invalid last of day job\n SELECT cron.schedule('bad-last-dom-job1', '0 11 $foo * *', 'VACUUM FULL');\n \n--- cleaning\n-DROP EXTENSION pg_cron;\n-drop user pgcron_cront;\n-drop database pgcron_dbno;\n-drop database pgcron_dbyes;\n"
  },
  {
    "path": "docker-compose/ext-src/pg_cron-src/test-upgrade.sh",
    "content": "#!/bin/sh\nset -ex\ncd \"$(dirname ${0})\"\npatch -p1 <test-upgrade.patch\nPG_REGRESS=$(dirname \"$(pg_config --pgxs)\")/../test/regress/pg_regress\n${PG_REGRESS} --use-existing --inputdir=./ --bindir='/usr/local/pgsql/bin'    --dbname=contrib_regression pg_cron-test"
  },
  {
    "path": "docker-compose/ext-src/pg_graphql-src/neon-test.sh",
    "content": "#!/bin/bash\nset -ex\ncd \"$(dirname \"${0}\")\"\ndropdb --if-exists contrib_regression\ncreatedb contrib_regression\nPGXS=\"$(dirname \"$(pg_config --pgxs)\" )\"\nREGRESS=\"${PGXS}/../test/regress/pg_regress\"\nTESTDIR=\"test\"\nTESTS=$(ls \"${TESTDIR}/sql\" | sort )\nTESTS=${TESTS//\\.sql/}\npsql -v ON_ERROR_STOP=1 -f test/fixtures.sql -d contrib_regression\n${REGRESS} --use-existing --dbname=contrib_regression --inputdir=${TESTDIR} ${TESTS}\n\n"
  },
  {
    "path": "docker-compose/ext-src/pg_graphql-src/regular-test.sh",
    "content": "#!/bin/bash\nset -ex\ncd \"$(dirname \"${0}\")\"\nPGXS=\"$(dirname \"$(pg_config --pgxs)\" )\"\nREGRESS=\"${PGXS}/../test/regress/pg_regress\"\nTESTDIR=\"test\"\nTESTS=$(ls \"${TESTDIR}/sql\" | sort )\nTESTS=${TESTS//\\.sql/}\nTESTS=${TESTS/empty_mutations/}\nTESTS=${TESTS/function_return_row_is_selectable/}\nTESTS=${TESTS/issue_300/}\nTESTS=${TESTS/permissions_connection_column/}\nTESTS=${TESTS/permissions_functions/}\nTESTS=${TESTS/permissions_node_column/}\nTESTS=${TESTS/permissions_table_level/}\nTESTS=${TESTS/permissions_types/}\nTESTS=${TESTS/row_level_security/}\nTESTS=${TESTS/sqli_connection/}\ndropdb --if-exist contrib_regression\ncreatedb contrib_regression\n. ../alter_db.sh\npsql -v ON_ERROR_STOP=1 -f test/fixtures.sql -d contrib_regression\n${REGRESS} --use-existing --dbname=contrib_regression --inputdir=${TESTDIR} ${TESTS}\n\n"
  },
  {
    "path": "docker-compose/ext-src/pg_hint_plan-src/regular-test.sh",
    "content": "#!/bin/sh\nset -ex\ncd \"$(dirname ${0})\"\ndropdb --if-exist contrib_regression\ncreatedb contrib_regression\nPG_REGRESS=$(dirname \"$(pg_config --pgxs)\")/../test/regress/pg_regress\n${PG_REGRESS} --use-existing  --inputdir=./ --bindir='/usr/local/pgsql/bin'    --encoding=UTF8 --dbname=contrib_regression init base_plan pg_hint_plan ut-init ut-A ut-S ut-J ut-L ut-G ut-R ut-fdw ut-W ut-T ut-fini hints_anywhere plpgsql oldextversions"
  },
  {
    "path": "docker-compose/ext-src/pg_ivm-src/regular-test.sh",
    "content": "#!/bin/sh\nset -ex\ndropdb --if-exist contrib_regression\ncreatedb contrib_regression\ncd \"$(dirname ${0})\"\nPG_REGRESS=$(dirname \"$(pg_config --pgxs)\")/../test/regress/pg_regress\npatch -p1 <regular.patch\n${PG_REGRESS} --use-existing --inputdir=./ --bindir='/usr/local/pgsql/bin' --dbname=contrib_regression pg_ivm create_immv refresh_immv\npatch -R -p1 <regular.patch"
  },
  {
    "path": "docker-compose/ext-src/pg_ivm-src/regular.patch",
    "content": "diff --git a/expected/pg_ivm.out b/expected/pg_ivm.out\nindex e8798ee..4081680 100644\n--- a/expected/pg_ivm.out\n+++ b/expected/pg_ivm.out\n@@ -1363,61 +1363,6 @@ SELECT * FROM mv ORDER BY i;\n    |   2 |   4 |                 2 |                 2 |             2\n (1 row)\n \n-ROLLBACK;\n--- IMMV containing user defined type\n-BEGIN;\n-CREATE TYPE mytype;\n-CREATE FUNCTION mytype_in(cstring)\n- RETURNS mytype AS 'int4in'\n- LANGUAGE INTERNAL STRICT IMMUTABLE;\n-NOTICE:  return type mytype is only a shell\n-CREATE FUNCTION mytype_out(mytype)\n- RETURNS cstring AS 'int4out'\n- LANGUAGE INTERNAL STRICT IMMUTABLE;\n-NOTICE:  argument type mytype is only a shell\n-CREATE TYPE mytype (\n- LIKE = int4,\n- INPUT = mytype_in,\n- OUTPUT = mytype_out\n-);\n-CREATE FUNCTION mytype_eq(mytype, mytype)\n- RETURNS bool AS 'int4eq'\n- LANGUAGE INTERNAL STRICT IMMUTABLE;\n-CREATE FUNCTION mytype_lt(mytype, mytype)\n- RETURNS bool AS 'int4lt'\n- LANGUAGE INTERNAL STRICT IMMUTABLE;\n-CREATE FUNCTION mytype_cmp(mytype, mytype)\n- RETURNS integer AS 'btint4cmp'\n- LANGUAGE INTERNAL STRICT IMMUTABLE;\n-CREATE OPERATOR = (\n- leftarg = mytype, rightarg = mytype,\n- procedure = mytype_eq);\n-CREATE OPERATOR < (\n- leftarg = mytype, rightarg = mytype,\n- procedure = mytype_lt);\n-CREATE OPERATOR CLASS mytype_ops\n- DEFAULT FOR TYPE mytype USING btree AS\n- OPERATOR        1       <,\n- OPERATOR        3       = ,\n- FUNCTION\t\t1\t\tmytype_cmp(mytype,mytype);\n-CREATE TABLE t_mytype (x mytype);\n-SELECT create_immv('mv_mytype',\n- 'SELECT * FROM t_mytype');\n-NOTICE:  could not create an index on immv \"mv_mytype\" automatically\n-DETAIL:  This target list does not have all the primary key columns, or this view does not contain GROUP BY or DISTINCT clause.\n-HINT:  Create an index on the immv for efficient incremental maintenance.\n- create_immv \n--------------\n-           0\n-(1 row)\n-\n-INSERT INTO t_mytype VALUES ('1'::mytype);\n-SELECT * FROM mv_mytype;\n- x \n----\n- 1\n-(1 row)\n-\n ROLLBACK;\n -- outer join is not supported\n SELECT create_immv('mv(a,b)',\n@@ -1510,112 +1455,6 @@ SELECT create_immv('mv_ivm_only_values1', 'values(1)');\n ERROR:  VALUES is not supported on incrementally maintainable materialized view\n SELECT create_immv('mv_ivm_only_values2',  'SELECT * FROM (values(1)) AS tmp');\n ERROR:  VALUES is not supported on incrementally maintainable materialized view\n--- views containing base tables with Row Level Security\n-DROP USER IF EXISTS ivm_admin;\n-NOTICE:  role \"ivm_admin\" does not exist, skipping\n-DROP USER IF EXISTS ivm_user;\n-NOTICE:  role \"ivm_user\" does not exist, skipping\n-CREATE USER ivm_admin;\n-CREATE USER ivm_user;\n---- create a table with RLS\n-SET SESSION AUTHORIZATION ivm_admin;\n-CREATE TABLE rls_tbl(id int, data text, owner name);\n-INSERT INTO rls_tbl VALUES\n-  (1,'foo','ivm_user'),\n-  (2,'bar','postgres');\n-CREATE TABLE num_tbl(id int, num text);\n-INSERT INTO num_tbl VALUES\n-  (1,'one'),\n-  (2,'two'),\n-  (3,'three'),\n-  (4,'four'),\n-  (5,'five'),\n-  (6,'six');\n---- Users can access only their own rows\n-CREATE POLICY rls_tbl_policy ON rls_tbl FOR SELECT TO PUBLIC USING(owner = current_user);\n-ALTER TABLE rls_tbl ENABLE ROW LEVEL SECURITY;\n-GRANT ALL on rls_tbl TO PUBLIC;\n-GRANT ALL on num_tbl TO PUBLIC;\n---- create a view owned by ivm_user\n-SET SESSION AUTHORIZATION ivm_user;\n-SELECT create_immv('ivm_rls', 'SELECT * FROM rls_tbl');\n-NOTICE:  could not create an index on immv \"ivm_rls\" automatically\n-DETAIL:  This target list does not have all the primary key columns, or this view does not contain GROUP BY or DISTINCT clause.\n-HINT:  Create an index on the immv for efficient incremental maintenance.\n- create_immv \n--------------\n-           1\n-(1 row)\n-\n-SELECT id, data, owner FROM ivm_rls ORDER BY 1,2,3;\n- id | data |  owner   \n-----+------+----------\n-  1 | foo  | ivm_user\n-(1 row)\n-\n-RESET SESSION AUTHORIZATION;\n---- inserts rows owned by different users\n-INSERT INTO rls_tbl VALUES\n-  (3,'baz','ivm_user'),\n-  (4,'qux','postgres');\n-SELECT id, data, owner FROM ivm_rls ORDER BY 1,2,3;\n- id | data |  owner   \n-----+------+----------\n-  1 | foo  | ivm_user\n-  3 | baz  | ivm_user\n-(2 rows)\n-\n---- combination of diffent kinds of commands\n-WITH\n- i AS (INSERT INTO rls_tbl VALUES(5,'quux','postgres'), (6,'corge','ivm_user')),\n- u AS (UPDATE rls_tbl SET owner = 'postgres' WHERE id = 1),\n- u2 AS (UPDATE rls_tbl SET owner = 'ivm_user' WHERE id = 2)\n-SELECT;\n---\n-(1 row)\n-\n-SELECT id, data, owner FROM ivm_rls ORDER BY 1,2,3;\n- id | data  |  owner   \n-----+-------+----------\n-  2 | bar   | ivm_user\n-  3 | baz   | ivm_user\n-  6 | corge | ivm_user\n-(3 rows)\n-\n----\n-SET SESSION AUTHORIZATION ivm_user;\n-SELECT create_immv('ivm_rls2', 'SELECT * FROM rls_tbl JOIN num_tbl USING(id)');\n-NOTICE:  could not create an index on immv \"ivm_rls2\" automatically\n-DETAIL:  This target list does not have all the primary key columns, or this view does not contain GROUP BY or DISTINCT clause.\n-HINT:  Create an index on the immv for efficient incremental maintenance.\n- create_immv \n--------------\n-           3\n-(1 row)\n-\n-RESET SESSION AUTHORIZATION;\n-WITH\n- x AS (UPDATE rls_tbl SET data = data || '_2' where id in (3,4)),\n- y AS (UPDATE num_tbl SET num = num || '_2' where id in (3,4))\n-SELECT;\n---\n-(1 row)\n-\n-SELECT * FROM ivm_rls2 ORDER BY 1,2,3;\n- id | data  |  owner   |   num   \n-----+-------+----------+---------\n-  2 | bar   | ivm_user | two\n-  3 | baz_2 | ivm_user | three_2\n-  6 | corge | ivm_user | six\n-(3 rows)\n-\n-DROP TABLE rls_tbl CASCADE;\n-NOTICE:  drop cascades to 2 other objects\n-DETAIL:  drop cascades to table ivm_rls\n-drop cascades to table ivm_rls2\n-DROP TABLE num_tbl CASCADE;\n-DROP USER ivm_user;\n-DROP USER ivm_admin;\n -- automatic index creation\n BEGIN;\n CREATE TABLE base_a (i int primary key, j int);\ndiff --git a/sql/pg_ivm.sql b/sql/pg_ivm.sql\nindex d3c1a01..203213d 100644\n--- a/sql/pg_ivm.sql\n+++ b/sql/pg_ivm.sql\n@@ -454,53 +454,6 @@ DELETE FROM base_t WHERE v = 5;\n SELECT * FROM mv ORDER BY i;\n ROLLBACK;\n \n--- IMMV containing user defined type\n-BEGIN;\n-\n-CREATE TYPE mytype;\n-CREATE FUNCTION mytype_in(cstring)\n- RETURNS mytype AS 'int4in'\n- LANGUAGE INTERNAL STRICT IMMUTABLE;\n-CREATE FUNCTION mytype_out(mytype)\n- RETURNS cstring AS 'int4out'\n- LANGUAGE INTERNAL STRICT IMMUTABLE;\n-CREATE TYPE mytype (\n- LIKE = int4,\n- INPUT = mytype_in,\n- OUTPUT = mytype_out\n-);\n-\n-CREATE FUNCTION mytype_eq(mytype, mytype)\n- RETURNS bool AS 'int4eq'\n- LANGUAGE INTERNAL STRICT IMMUTABLE;\n-CREATE FUNCTION mytype_lt(mytype, mytype)\n- RETURNS bool AS 'int4lt'\n- LANGUAGE INTERNAL STRICT IMMUTABLE;\n-CREATE FUNCTION mytype_cmp(mytype, mytype)\n- RETURNS integer AS 'btint4cmp'\n- LANGUAGE INTERNAL STRICT IMMUTABLE;\n-\n-CREATE OPERATOR = (\n- leftarg = mytype, rightarg = mytype,\n- procedure = mytype_eq);\n-CREATE OPERATOR < (\n- leftarg = mytype, rightarg = mytype,\n- procedure = mytype_lt);\n-\n-CREATE OPERATOR CLASS mytype_ops\n- DEFAULT FOR TYPE mytype USING btree AS\n- OPERATOR        1       <,\n- OPERATOR        3       = ,\n- FUNCTION\t\t1\t\tmytype_cmp(mytype,mytype);\n-\n-CREATE TABLE t_mytype (x mytype);\n-SELECT create_immv('mv_mytype',\n- 'SELECT * FROM t_mytype');\n-INSERT INTO t_mytype VALUES ('1'::mytype);\n-SELECT * FROM mv_mytype;\n-\n-ROLLBACK;\n-\n -- outer join is not supported\n SELECT create_immv('mv(a,b)',\n     'SELECT a.i, b.i FROM mv_base_a a LEFT JOIN mv_base_b b ON a.i=b.i');\n@@ -579,71 +532,6 @@ SELECT create_immv('mv_ivm31', 'SELECT sum(i)/sum(j) FROM mv_base_a');\n SELECT create_immv('mv_ivm_only_values1', 'values(1)');\n SELECT create_immv('mv_ivm_only_values2',  'SELECT * FROM (values(1)) AS tmp');\n \n-\n--- views containing base tables with Row Level Security\n-DROP USER IF EXISTS ivm_admin;\n-DROP USER IF EXISTS ivm_user;\n-CREATE USER ivm_admin;\n-CREATE USER ivm_user;\n-\n---- create a table with RLS\n-SET SESSION AUTHORIZATION ivm_admin;\n-CREATE TABLE rls_tbl(id int, data text, owner name);\n-INSERT INTO rls_tbl VALUES\n-  (1,'foo','ivm_user'),\n-  (2,'bar','postgres');\n-CREATE TABLE num_tbl(id int, num text);\n-INSERT INTO num_tbl VALUES\n-  (1,'one'),\n-  (2,'two'),\n-  (3,'three'),\n-  (4,'four'),\n-  (5,'five'),\n-  (6,'six');\n-\n---- Users can access only their own rows\n-CREATE POLICY rls_tbl_policy ON rls_tbl FOR SELECT TO PUBLIC USING(owner = current_user);\n-ALTER TABLE rls_tbl ENABLE ROW LEVEL SECURITY;\n-GRANT ALL on rls_tbl TO PUBLIC;\n-GRANT ALL on num_tbl TO PUBLIC;\n-\n---- create a view owned by ivm_user\n-SET SESSION AUTHORIZATION ivm_user;\n-SELECT create_immv('ivm_rls', 'SELECT * FROM rls_tbl');\n-SELECT id, data, owner FROM ivm_rls ORDER BY 1,2,3;\n-RESET SESSION AUTHORIZATION;\n-\n---- inserts rows owned by different users\n-INSERT INTO rls_tbl VALUES\n-  (3,'baz','ivm_user'),\n-  (4,'qux','postgres');\n-SELECT id, data, owner FROM ivm_rls ORDER BY 1,2,3;\n-\n---- combination of diffent kinds of commands\n-WITH\n- i AS (INSERT INTO rls_tbl VALUES(5,'quux','postgres'), (6,'corge','ivm_user')),\n- u AS (UPDATE rls_tbl SET owner = 'postgres' WHERE id = 1),\n- u2 AS (UPDATE rls_tbl SET owner = 'ivm_user' WHERE id = 2)\n-SELECT;\n-SELECT id, data, owner FROM ivm_rls ORDER BY 1,2,3;\n-\n----\n-SET SESSION AUTHORIZATION ivm_user;\n-SELECT create_immv('ivm_rls2', 'SELECT * FROM rls_tbl JOIN num_tbl USING(id)');\n-RESET SESSION AUTHORIZATION;\n-\n-WITH\n- x AS (UPDATE rls_tbl SET data = data || '_2' where id in (3,4)),\n- y AS (UPDATE num_tbl SET num = num || '_2' where id in (3,4))\n-SELECT;\n-SELECT * FROM ivm_rls2 ORDER BY 1,2,3;\n-\n-DROP TABLE rls_tbl CASCADE;\n-DROP TABLE num_tbl CASCADE;\n-\n-DROP USER ivm_user;\n-DROP USER ivm_admin;\n-\n -- automatic index creation\n BEGIN;\n CREATE TABLE base_a (i int primary key, j int);\n"
  },
  {
    "path": "docker-compose/ext-src/pg_ivm-src/test-upgrade.patch",
    "content": "diff --git a/expected/pg_ivm.out b/expected/pg_ivm.out\nindex e8798ee..cca58d0 100644\n--- a/expected/pg_ivm.out\n+++ b/expected/pg_ivm.out\n@@ -1,4 +1,3 @@\n-CREATE EXTENSION pg_ivm;\n GRANT ALL ON SCHEMA public TO public;\n -- create a table to use as a basis for views and materialized views in various combinations\n CREATE TABLE mv_base_a (i int, j int);\ndiff --git a/sql/pg_ivm.sql b/sql/pg_ivm.sql\nindex d3c1a01..9382d7f 100644\n--- a/sql/pg_ivm.sql\n+++ b/sql/pg_ivm.sql\n@@ -1,4 +1,3 @@\n-CREATE EXTENSION pg_ivm;\n GRANT ALL ON SCHEMA public TO public;\n \n -- create a table to use as a basis for views and materialized views in various combinations\n"
  },
  {
    "path": "docker-compose/ext-src/pg_ivm-src/test-upgrade.sh",
    "content": "#!/bin/sh\nset -ex\ncd \"$(dirname ${0})\"\npatch -p1 <test-upgrade.patch\nPG_REGRESS=$(dirname \"$(pg_config --pgxs)\")/../test/regress/pg_regress\n${PG_REGRESS} --use-existing --inputdir=./ --bindir='/usr/local/pgsql/bin' --dbname=contrib_regression pg_ivm create_immv refresh_immv"
  },
  {
    "path": "docker-compose/ext-src/pg_jsonschema-src/Makefile",
    "content": "EXTENSION = pg_jsonschema\nDATA = pg_jsonschema--1.0.sql\nREGRESS = jsonschema_valid_api  jsonschema_edge_cases\n\nPG_CONFIG ?= pg_config\nPGXS := $(shell $(PG_CONFIG) --pgxs)\nPG_REGRESS := $(dir $(PGXS))../../src/test/regress/pg_regress\n.PHONY installcheck:\ninstallcheck:\n\tdropdb --if-exists contrib_regression\n\tcreatedb contrib_regression\n\tpsql -d contrib_regression -c \"CREATE EXTENSION $(EXTENSION)\"\n\t$(PG_REGRESS) --use-existing --dbname=contrib_regression $(REGRESS)\n"
  },
  {
    "path": "docker-compose/ext-src/pg_jsonschema-src/expected/jsonschema_edge_cases.out",
    "content": "-- Schema with enums, nulls, extra properties disallowed\nSELECT jsonschema_is_valid('{\n  \"type\": \"object\",\n  \"properties\": {\n    \"status\": { \"type\": \"string\", \"enum\": [\"active\", \"inactive\", \"pending\"] },\n    \"email\": { \"type\": [\"string\", \"null\"], \"format\": \"email\" }\n  },\n  \"required\": [\"status\"],\n  \"additionalProperties\": false\n}'::json);\n jsonschema_is_valid \n---------------------\n t\n(1 row)\n\n-- Valid enum and null email\nSELECT jsonschema_validation_errors(\n  '{\n    \"type\": \"object\",\n    \"properties\": {\n      \"status\": { \"type\": \"string\", \"enum\": [\"active\", \"inactive\", \"pending\"] },\n      \"email\": { \"type\": [\"string\", \"null\"], \"format\": \"email\" }\n    },\n    \"required\": [\"status\"],\n    \"additionalProperties\": false\n  }'::json,\n  '{\"status\": \"active\", \"email\": null}'::json\n);\n jsonschema_validation_errors \n------------------------------\n {}\n(1 row)\n\n-- Invalid enum value\nSELECT jsonschema_validation_errors(\n  '{\n    \"type\": \"object\",\n    \"properties\": {\n      \"status\": { \"type\": \"string\", \"enum\": [\"active\", \"inactive\", \"pending\"] },\n      \"email\": { \"type\": [\"string\", \"null\"], \"format\": \"email\" }\n    },\n    \"required\": [\"status\"],\n    \"additionalProperties\": false\n  }'::json,\n  '{\"status\": \"disabled\", \"email\": null}'::json\n);\n                     jsonschema_validation_errors                     \n----------------------------------------------------------------------\n {\"\\\"disabled\\\" is not one of [\\\"active\\\",\\\"inactive\\\",\\\"pending\\\"]\"}\n(1 row)\n\n-- Invalid email format (assuming format is validated)\nSELECT jsonschema_validation_errors(\n  '{\n    \"type\": \"object\",\n    \"properties\": {\n      \"status\": { \"type\": \"string\", \"enum\": [\"active\", \"inactive\", \"pending\"] },\n      \"email\": { \"type\": [\"string\", \"null\"], \"format\": \"email\" }\n    },\n    \"required\": [\"status\"],\n    \"additionalProperties\": false\n  }'::json,\n  '{\"status\": \"active\", \"email\": \"not-an-email\"}'::json\n);\n      jsonschema_validation_errors       \n-----------------------------------------\n {\"\\\"not-an-email\\\" is not a \\\"email\\\"\"}\n(1 row)\n\n-- Extra property not allowed\nSELECT jsonschema_validation_errors(\n  '{\n    \"type\": \"object\",\n    \"properties\": {\n      \"status\": { \"type\": \"string\", \"enum\": [\"active\", \"inactive\", \"pending\"] },\n      \"email\": { \"type\": [\"string\", \"null\"], \"format\": \"email\" }\n    },\n    \"required\": [\"status\"],\n    \"additionalProperties\": false\n  }'::json,\n  '{\"status\": \"active\", \"extra\": \"should not be here\"}'::json\n);\n                    jsonschema_validation_errors                    \n--------------------------------------------------------------------\n {\"Additional properties are not allowed ('extra' was unexpected)\"}\n(1 row)\n\n"
  },
  {
    "path": "docker-compose/ext-src/pg_jsonschema-src/expected/jsonschema_valid_api.out",
    "content": "-- Define schema\nSELECT jsonschema_is_valid('{\n  \"type\": \"object\",\n  \"properties\": {\n    \"username\": { \"type\": \"string\" },\n    \"age\": { \"type\": \"integer\" }\n  },\n  \"required\": [\"username\"]\n}'::json);\n jsonschema_is_valid \n---------------------\n t\n(1 row)\n\n-- Valid instance\nSELECT jsonschema_validation_errors(\n  '{\n    \"type\": \"object\",\n    \"properties\": {\n      \"username\": { \"type\": \"string\" },\n      \"age\": { \"type\": \"integer\" }\n    },\n    \"required\": [\"username\"]\n  }'::json,\n  '{\"username\": \"alice\", \"age\": 25}'::json\n);\n jsonschema_validation_errors \n------------------------------\n {}\n(1 row)\n\n-- Invalid instance: missing required \"username\"\nSELECT jsonschema_validation_errors(\n  '{\n    \"type\": \"object\",\n    \"properties\": {\n      \"username\": { \"type\": \"string\" },\n      \"age\": { \"type\": \"integer\" }\n    },\n    \"required\": [\"username\"]\n  }'::json,\n  '{\"age\": 25}'::json\n);\n      jsonschema_validation_errors       \n-----------------------------------------\n {\"\\\"username\\\" is a required property\"}\n(1 row)\n\n-- Invalid instance: wrong type for \"age\"\nSELECT jsonschema_validation_errors(\n  '{\n    \"type\": \"object\",\n    \"properties\": {\n      \"username\": { \"type\": \"string\" },\n      \"age\": { \"type\": \"integer\" }\n    },\n    \"required\": [\"username\"]\n  }'::json,\n  '{\"username\": \"bob\", \"age\": \"twenty\"}'::json\n);\n       jsonschema_validation_errors        \n-------------------------------------------\n {\"\\\"twenty\\\" is not of type \\\"integer\\\"\"}\n(1 row)\n\n"
  },
  {
    "path": "docker-compose/ext-src/pg_jsonschema-src/sql/jsonschema_edge_cases.sql",
    "content": "-- Schema with enums, nulls, extra properties disallowed\nSELECT jsonschema_is_valid('{\n  \"type\": \"object\",\n  \"properties\": {\n    \"status\": { \"type\": \"string\", \"enum\": [\"active\", \"inactive\", \"pending\"] },\n    \"email\": { \"type\": [\"string\", \"null\"], \"format\": \"email\" }\n  },\n  \"required\": [\"status\"],\n  \"additionalProperties\": false\n}'::json);\n\n-- Valid enum and null email\nSELECT jsonschema_validation_errors(\n  '{\n    \"type\": \"object\",\n    \"properties\": {\n      \"status\": { \"type\": \"string\", \"enum\": [\"active\", \"inactive\", \"pending\"] },\n      \"email\": { \"type\": [\"string\", \"null\"], \"format\": \"email\" }\n    },\n    \"required\": [\"status\"],\n    \"additionalProperties\": false\n  }'::json,\n  '{\"status\": \"active\", \"email\": null}'::json\n);\n\n-- Invalid enum value\nSELECT jsonschema_validation_errors(\n  '{\n    \"type\": \"object\",\n    \"properties\": {\n      \"status\": { \"type\": \"string\", \"enum\": [\"active\", \"inactive\", \"pending\"] },\n      \"email\": { \"type\": [\"string\", \"null\"], \"format\": \"email\" }\n    },\n    \"required\": [\"status\"],\n    \"additionalProperties\": false\n  }'::json,\n  '{\"status\": \"disabled\", \"email\": null}'::json\n);\n\n-- Invalid email format (assuming format is validated)\nSELECT jsonschema_validation_errors(\n  '{\n    \"type\": \"object\",\n    \"properties\": {\n      \"status\": { \"type\": \"string\", \"enum\": [\"active\", \"inactive\", \"pending\"] },\n      \"email\": { \"type\": [\"string\", \"null\"], \"format\": \"email\" }\n    },\n    \"required\": [\"status\"],\n    \"additionalProperties\": false\n  }'::json,\n  '{\"status\": \"active\", \"email\": \"not-an-email\"}'::json\n);\n\n-- Extra property not allowed\nSELECT jsonschema_validation_errors(\n  '{\n    \"type\": \"object\",\n    \"properties\": {\n      \"status\": { \"type\": \"string\", \"enum\": [\"active\", \"inactive\", \"pending\"] },\n      \"email\": { \"type\": [\"string\", \"null\"], \"format\": \"email\" }\n    },\n    \"required\": [\"status\"],\n    \"additionalProperties\": false\n  }'::json,\n  '{\"status\": \"active\", \"extra\": \"should not be here\"}'::json\n);\n"
  },
  {
    "path": "docker-compose/ext-src/pg_jsonschema-src/sql/jsonschema_valid_api.sql",
    "content": "-- Define schema\nSELECT jsonschema_is_valid('{\n  \"type\": \"object\",\n  \"properties\": {\n    \"username\": { \"type\": \"string\" },\n    \"age\": { \"type\": \"integer\" }\n  },\n  \"required\": [\"username\"]\n}'::json);\n\n-- Valid instance\nSELECT jsonschema_validation_errors(\n  '{\n    \"type\": \"object\",\n    \"properties\": {\n      \"username\": { \"type\": \"string\" },\n      \"age\": { \"type\": \"integer\" }\n    },\n    \"required\": [\"username\"]\n  }'::json,\n  '{\"username\": \"alice\", \"age\": 25}'::json\n);\n\n-- Invalid instance: missing required \"username\"\nSELECT jsonschema_validation_errors(\n  '{\n    \"type\": \"object\",\n    \"properties\": {\n      \"username\": { \"type\": \"string\" },\n      \"age\": { \"type\": \"integer\" }\n    },\n    \"required\": [\"username\"]\n  }'::json,\n  '{\"age\": 25}'::json\n);\n\n-- Invalid instance: wrong type for \"age\"\nSELECT jsonschema_validation_errors(\n  '{\n    \"type\": \"object\",\n    \"properties\": {\n      \"username\": { \"type\": \"string\" },\n      \"age\": { \"type\": \"integer\" }\n    },\n    \"required\": [\"username\"]\n  }'::json,\n  '{\"username\": \"bob\", \"age\": \"twenty\"}'::json\n);\n"
  },
  {
    "path": "docker-compose/ext-src/pg_repack-src/test-upgrade.sh",
    "content": "#!/bin/sh\nset -ex\ncd \"$(dirname ${0})\"\nPG_REGRESS=$(dirname \"$(pg_config --pgxs)\")/../test/regress/pg_regress\n${PG_REGRESS} --use-existing --inputdir=./regress --bindir='/usr/local/pgsql/bin' --dbname=contrib_regression repack-setup repack-run error-on-invalid-idx no-error-on-invalid-idx after-schema repack-check nosuper get_order_by trigger\n"
  },
  {
    "path": "docker-compose/ext-src/pg_roaringbitmap-src/regular-test.sh",
    "content": "#!/bin/sh\nset -ex\ncd \"$(dirname ${0})\"\ndropdb --if-exist contrib_regression\ncreatedb contrib_regression\nPG_REGRESS=$(dirname \"$(pg_config --pgxs)\")/../test/regress/pg_regress\n${PG_REGRESS} --use-existing --inputdir=./ --bindir='/usr/local/pgsql/bin'    --dbname=contrib_regression roaringbitmap"
  },
  {
    "path": "docker-compose/ext-src/pg_roaringbitmap-src/test-upgrade.patch",
    "content": "diff --git a/expected/roaringbitmap.out b/expected/roaringbitmap.out\nindex de70531..a5f7c15 100644\n--- a/expected/roaringbitmap.out\n+++ b/expected/roaringbitmap.out\n@@ -1,7 +1,6 @@\n --\n --  Test roaringbitmap extension\n --\n-CREATE EXTENSION if not exists roaringbitmap;\n -- Test input and output\n set roaringbitmap.output_format='array';\n set extra_float_digits = 0;\ndiff --git a/sql/roaringbitmap.sql b/sql/roaringbitmap.sql\nindex a0e9c74..84bc966 100644\n--- a/sql/roaringbitmap.sql\n+++ b/sql/roaringbitmap.sql\n@@ -2,8 +2,6 @@\n --  Test roaringbitmap extension\n --\n \n-CREATE EXTENSION if not exists roaringbitmap;\n-\n -- Test input and output\n \n set roaringbitmap.output_format='array';\n"
  },
  {
    "path": "docker-compose/ext-src/pg_roaringbitmap-src/test-upgrade.sh",
    "content": "#!/bin/sh\nset -ex\ncd \"$(dirname ${0})\"\npatch -p1 <test-upgrade.patch\nPG_REGRESS=$(dirname \"$(pg_config --pgxs)\")/../test/regress/pg_regress\n${PG_REGRESS} --use-existing --inputdir=./ --bindir='/usr/local/pgsql/bin'    --dbname=contrib_regression roaringbitmap"
  },
  {
    "path": "docker-compose/ext-src/pg_semver-src/regular-test.sh",
    "content": "#!/bin/bash\nset -ex\n# For v16 it's required to create a type which is impossible without superuser access\n# do not run this test so far\nif [[ \"${PG_VERSION}\" = v16 ]]; then\n  exit 0\nfi\ncd \"$(dirname ${0})\"\ndropdb --if-exist contrib_regression\ncreatedb contrib_regression\nPG_REGRESS=$(dirname \"$(pg_config --pgxs)\")/../test/regress/pg_regress\n${PG_REGRESS} --use-existing --inputdir=./ --bindir='/usr/local/pgsql/bin'    --inputdir=test --dbname=contrib_regression base corpus"
  },
  {
    "path": "docker-compose/ext-src/pg_semver-src/test-upgrade-v16.patch",
    "content": "diff --git a/test/sql/base.sql b/test/sql/base.sql\nindex af599d8..2eed91b 100644\n--- a/test/sql/base.sql\n+++ b/test/sql/base.sql\n@@ -2,7 +2,6 @@\n BEGIN;\n \n \\i test/pgtap-core.sql\n-\\i sql/semver.sql\n \n SELECT plan(334);\n --SELECT * FROM no_plan();\ndiff --git a/test/sql/corpus.sql b/test/sql/corpus.sql\nindex 1f5f637..a519905 100644\n--- a/test/sql/corpus.sql\n+++ b/test/sql/corpus.sql\n@@ -4,7 +4,6 @@ BEGIN;\n -- Test the SemVer corpus from https://regex101.com/r/Ly7O1x/3/.\n \n \\i test/pgtap-core.sql\n-\\i sql/semver.sql\n \n SELECT plan(71);\n --SELECT * FROM no_plan();\n"
  },
  {
    "path": "docker-compose/ext-src/pg_semver-src/test-upgrade-v17.patch",
    "content": "diff --git a/test/sql/base.sql b/test/sql/base.sql\nindex 53adb30..2eed91b 100644\n--- a/test/sql/base.sql\n+++ b/test/sql/base.sql\n@@ -2,7 +2,6 @@\n BEGIN;\n \n \\i test/pgtap-core.sql\n-CREATE EXTENSION semver;\n \n SELECT plan(334);\n --SELECT * FROM no_plan();\ndiff --git a/test/sql/corpus.sql b/test/sql/corpus.sql\nindex c0fe98e..39cdd2e 100644\n--- a/test/sql/corpus.sql\n+++ b/test/sql/corpus.sql\n@@ -4,7 +4,6 @@ BEGIN;\n -- Test the SemVer corpus from https://regex101.com/r/Ly7O1x/3/.\n \n \\i test/pgtap-core.sql\n-CREATE EXTENSION semver;\n \n SELECT plan(76);\n --SELECT * FROM no_plan();\n"
  },
  {
    "path": "docker-compose/ext-src/pg_semver-src/test-upgrade.sh",
    "content": "#!/bin/sh\nset -ex\ncd \"$(dirname ${0})\"\npatch -p1 <test-upgrade-${PG_VERSION}.patch\npsql -d contrib_regression -c \"DROP EXTENSION IF EXISTS pgtap\"\nPG_REGRESS=$(dirname \"$(pg_config --pgxs)\")/../test/regress/pg_regress\n${PG_REGRESS} --use-existing --inputdir=./ --bindir='/usr/local/pgsql/bin'    --inputdir=test --dbname=contrib_regression base corpus"
  },
  {
    "path": "docker-compose/ext-src/pg_session_jwt-src/Makefile",
    "content": "EXTENSION = pg_session_jwt\n\nREGRESS = basic_functions\nREGRESS_OPTS = --load-extension=$(EXTENSION)\nexport PGOPTIONS = -c pg_session_jwt.jwk={\"crv\":\"Ed25519\",\"kty\":\"OKP\",\"x\":\"R_Abz-63zJ00l-IraL5fQhwkhGVZCSooQFV5ntC3C7M\"}\n\nPG_CONFIG ?= pg_config\nPGXS := $(shell $(PG_CONFIG) --pgxs)\nPG_REGRESS := $(dir $(PGXS))../../src/test/regress/pg_regress\n.PHONY installcheck:\ninstallcheck:\n\tdropdb --if-exists contrib_regression\n\tcreatedb contrib_regression\n\tpsql -d contrib_regression -c \"CREATE EXTENSION $(EXTENSION)\"\n\t$(PG_REGRESS) --use-existing --dbname=contrib_regression $(REGRESS)\n"
  },
  {
    "path": "docker-compose/ext-src/pg_session_jwt-src/expected/basic_functions.out",
    "content": "-- Basic functionality tests for pg_session_jwt\n-- Test auth.init() function\nSELECT auth.init();\n init \n------\n \n(1 row)\n\n-- Test an invalid JWT\nSELECT auth.jwt_session_init('INVALID-JWT');\nERROR:  invalid JWT encoding\n-- Test creating a session with an expired JWT\nSELECT auth.jwt_session_init('eyJhbGciOiJFZERTQSJ9.eyJleHAiOjE3NDI1NjQ0MzIsImlhdCI6MTc0MjU2NDI1MiwianRpIjo0MjQyNDIsInN1YiI6InVzZXIxMjMifQ.A6FwKuaSduHB9O7Gz37g0uoD_U9qVS0JNtT7YABGVgB7HUD1AMFc9DeyhNntWBqncg8k5brv-hrNTuUh5JYMAw');\nERROR:  Token used after it has expired\nDETAIL:  exp=1742564432\n-- Test creating a session with a valid JWT\nSELECT auth.jwt_session_init('eyJhbGciOiJFZERTQSJ9.eyJleHAiOjQ4OTYxNjQyNTIsImlhdCI6MTc0MjU2NDI1MiwianRpIjo0MzQzNDMsInN1YiI6InVzZXIxMjMifQ.2TXVgjb6JSUq6_adlvp-m_SdOxZSyGS30RS9TLB0xu2N83dMSs2NybwE1NMU8Fb0tcAZR_ET7M2rSxbTrphfCg');\n jwt_session_init \n------------------\n \n(1 row)\n\n-- Test auth.session() function\nSELECT auth.session();\n                                 session                                 \n-------------------------------------------------------------------------\n {\"exp\": 4896164252, \"iat\": 1742564252, \"jti\": 434343, \"sub\": \"user123\"}\n(1 row)\n\n-- Test auth.user_id() function\nSELECT auth.user_id() AS user_id;\n user_id \n---------\n user123\n(1 row)\n\n"
  },
  {
    "path": "docker-compose/ext-src/pg_session_jwt-src/sql/basic_functions.sql",
    "content": "-- Basic functionality tests for pg_session_jwt\n\n-- Test auth.init() function\nSELECT auth.init();\n\n-- Test an invalid JWT\nSELECT auth.jwt_session_init('INVALID-JWT');\n\n-- Test creating a session with an expired JWT\nSELECT auth.jwt_session_init('eyJhbGciOiJFZERTQSJ9.eyJleHAiOjE3NDI1NjQ0MzIsImlhdCI6MTc0MjU2NDI1MiwianRpIjo0MjQyNDIsInN1YiI6InVzZXIxMjMifQ.A6FwKuaSduHB9O7Gz37g0uoD_U9qVS0JNtT7YABGVgB7HUD1AMFc9DeyhNntWBqncg8k5brv-hrNTuUh5JYMAw');\n\n-- Test creating a session with a valid JWT\nSELECT auth.jwt_session_init('eyJhbGciOiJFZERTQSJ9.eyJleHAiOjQ4OTYxNjQyNTIsImlhdCI6MTc0MjU2NDI1MiwianRpIjo0MzQzNDMsInN1YiI6InVzZXIxMjMifQ.2TXVgjb6JSUq6_adlvp-m_SdOxZSyGS30RS9TLB0xu2N83dMSs2NybwE1NMU8Fb0tcAZR_ET7M2rSxbTrphfCg');\n\n-- Test auth.session() function\nSELECT auth.session();\n\n-- Test auth.user_id() function\nSELECT auth.user_id() AS user_id;"
  },
  {
    "path": "docker-compose/ext-src/pg_tiktoken-src/Makefile",
    "content": "PG_CONFIG ?= pg_config\nPG_REGRESS = $(shell dirname $$($(PG_CONFIG) --pgxs))/../../src/test/regress/pg_regress\nREGRESS = pg_tiktoken\n\ninstallcheck: regression-test\n\nregression-test:\n\tdropdb --if-exists contrib_regression\n\tcreatedb contrib_regression\n\t$(PG_REGRESS) --inputdir=. --outputdir=. --use-existing --dbname=contrib_regression $(REGRESS)"
  },
  {
    "path": "docker-compose/ext-src/pg_tiktoken-src/expected/pg_tiktoken.out",
    "content": "-- Load the extension\nCREATE EXTENSION IF NOT EXISTS pg_tiktoken;\n-- Test encoding function\nSELECT tiktoken_encode('cl100k_base', 'Hello world!');\n tiktoken_encode \n-----------------\n {9906,1917,0}\n(1 row)\n\n-- Test token count function\nSELECT tiktoken_count('cl100k_base', 'Hello world!');\n tiktoken_count \n----------------\n              3\n(1 row)\n\n-- Test encoding function with a different model\nSELECT tiktoken_encode('r50k_base', 'PostgreSQL is amazing!');\n     tiktoken_encode     \n-------------------------\n {6307,47701,318,4998,0}\n(1 row)\n\n-- Test token count function with the same model\nSELECT tiktoken_count('r50k_base', 'PostgreSQL is amazing!');\n tiktoken_count \n----------------\n              5\n(1 row)\n\n-- Edge cases: Empty string\nSELECT tiktoken_encode('cl100k_base', '');\n tiktoken_encode \n-----------------\n {}\n(1 row)\n\nSELECT tiktoken_count('cl100k_base', '');\n tiktoken_count \n----------------\n              0\n(1 row)\n\n-- Edge cases: Long text\nSELECT tiktoken_count('cl100k_base', repeat('word ', 100));\n tiktoken_count \n----------------\n            101\n(1 row)\n\n-- Edge case: Invalid encoding\nSELECT tiktoken_encode('invalid_model', 'Test') AS should_fail;\nERROR:  'invalid_model': unknown model or encoder\n"
  },
  {
    "path": "docker-compose/ext-src/pg_tiktoken-src/sql/pg_tiktoken.sql",
    "content": "-- Load the extension\nCREATE EXTENSION IF NOT EXISTS pg_tiktoken;\n\n-- Test encoding function\nSELECT tiktoken_encode('cl100k_base', 'Hello world!');\n\n-- Test token count function\nSELECT tiktoken_count('cl100k_base', 'Hello world!');\n\n-- Test encoding function with a different model\nSELECT tiktoken_encode('r50k_base', 'PostgreSQL is amazing!');\n\n-- Test token count function with the same model\nSELECT tiktoken_count('r50k_base', 'PostgreSQL is amazing!');\n\n-- Edge cases: Empty string\nSELECT tiktoken_encode('cl100k_base', '');\nSELECT tiktoken_count('cl100k_base', '');\n\n-- Edge cases: Long text\nSELECT tiktoken_count('cl100k_base', repeat('word ', 100));\n\n-- Edge case: Invalid encoding\nSELECT tiktoken_encode('invalid_model', 'Test') AS should_fail;"
  },
  {
    "path": "docker-compose/ext-src/pg_uuidv7-src/regular-test.sh",
    "content": "#!/bin/sh\nset -ex\ncd \"$(dirname \"${0}\")\"\ndropdb --if-exist contrib_regression\ncreatedb contrib_regression\nPG_REGRESS=$(dirname \"$(pg_config --pgxs)\")/../test/regress/pg_regress\n${PG_REGRESS} --use-existing --inputdir=./ --bindir='/usr/local/pgsql/bin'    --inputdir=test --dbname=contrib_regression 001_setup 002_uuid_generate_v7 003_uuid_v7_to_timestamptz 004_uuid_timestamptz_to_v7 005_uuid_v7_to_timestamp 006_uuid_timestamp_to_v7"
  },
  {
    "path": "docker-compose/ext-src/pg_uuidv7-src/test-upgrade.sh",
    "content": "#!/bin/sh\nset -ex\ncd \"$(dirname ${0})\"\nPG_REGRESS=$(dirname \"$(pg_config --pgxs)\")/../test/regress/pg_regress\n${PG_REGRESS} --use-existing --inputdir=./ --bindir='/usr/local/pgsql/bin'    --inputdir=test --dbname=contrib_regression  002_uuid_generate_v7 003_uuid_v7_to_timestamptz 004_uuid_timestamptz_to_v7 005_uuid_v7_to_timestamp 006_uuid_timestamp_to_v7"
  },
  {
    "path": "docker-compose/ext-src/pgjwt-src/neon-test.sh",
    "content": "#!/bin/bash\nset -ex\ncd \"$(dirname \"${0}\")\"\ndropdb --if-exists contrib_regression\ncreatedb contrib_regression\npg_prove -d contrib_regression test.sql"
  },
  {
    "path": "docker-compose/ext-src/pgjwt-src/test-upgrade.patch",
    "content": "diff --git a/test.sql b/test.sql\nindex d7a0ca8..f15bc76 100644\n--- a/test.sql\n+++ b/test.sql\n@@ -9,9 +9,7 @@\n \\set ON_ERROR_STOP true\n \\set QUIET 1\n \n-CREATE EXTENSION pgcrypto;\n-CREATE EXTENSION pgtap;\n-CREATE EXTENSION pgjwt;\n+CREATE EXTENSION IF NOT EXISTS pgtap;\n \n BEGIN;\n SELECT plan(23);\n"
  },
  {
    "path": "docker-compose/ext-src/pgjwt-src/test-upgrade.sh",
    "content": "#!/bin/sh\nset -ex\ncd \"$(dirname ${0})\"\npatch -p1 <test-upgrade.patch\npg_prove -d contrib_regression test.sql"
  },
  {
    "path": "docker-compose/ext-src/pgrag-src/Makefile",
    "content": "EXTENSION = rag\nMODULE_big = rag\nOBJS = $(patsubst %.rs,%.o,$(wildcard src/*.rs))\n\nREGRESS = basic_functions text_processing api_keys chunking_functions document_processing embedding_api_functions voyageai_functions\nREGRESS_OPTS = --load-extension=vector --load-extension=rag\n\nPG_CONFIG = pg_config\nPGXS := $(shell $(PG_CONFIG) --pgxs)\ninclude $(PGXS)\n"
  },
  {
    "path": "docker-compose/ext-src/pgrag-src/expected/api_keys.out",
    "content": "-- API key function tests\nSELECT rag.anthropic_set_api_key('test_key');\n anthropic_set_api_key \n-----------------------\n \n(1 row)\n\nSELECT rag.anthropic_get_api_key();\n anthropic_get_api_key \n-----------------------\n test_key\n(1 row)\n\nSELECT rag.openai_set_api_key('test_key');\n openai_set_api_key \n--------------------\n \n(1 row)\n\nSELECT rag.openai_get_api_key();\n openai_get_api_key \n--------------------\n test_key\n(1 row)\n\nSELECT rag.fireworks_set_api_key('test_key');\n fireworks_set_api_key \n-----------------------\n \n(1 row)\n\nSELECT rag.fireworks_get_api_key();\n fireworks_get_api_key \n-----------------------\n test_key\n(1 row)\n\nSELECT rag.voyageai_set_api_key('test_key');\n voyageai_set_api_key \n----------------------\n \n(1 row)\n\nSELECT rag.voyageai_get_api_key();\n voyageai_get_api_key \n----------------------\n test_key\n(1 row)\n\n"
  },
  {
    "path": "docker-compose/ext-src/pgrag-src/expected/basic_functions.out",
    "content": "-- Basic function tests\nSELECT rag.markdown_from_html('<p>Hello</p>');\n markdown_from_html \n--------------------\n Hello\n(1 row)\n\nSELECT array_length(rag.chunks_by_character_count('the cat sat on the mat', 10, 5), 1);\n array_length \n--------------\n            3\n(1 row)\n\n"
  },
  {
    "path": "docker-compose/ext-src/pgrag-src/expected/chunking_functions.out",
    "content": "-- Chunking function tests\nSELECT rag.chunks_by_character_count('the cat sat on the mat', 10, 5);\n       chunks_by_character_count       \n---------------------------------------\n {\"the cat\",\"cat sat on\",\"on the mat\"}\n(1 row)\n\nSELECT rag.chunks_by_character_count('Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.', 20, 10);\n                                                                                     chunks_by_character_count                                                                                      \n----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\n {\"Lorem ipsum dolor\",\"dolor sit amet,\",\"amet, consectetur\",\"adipiscing elit.\",\"Sed do eiusmod\",\"do eiusmod tempor\",\"tempor incididunt ut\",\"ut labore et dolore\",\"et dolore magna\",\"magna aliqua.\"}\n(1 row)\n\nSELECT (rag.chunks_by_character_count('the cat', 10, 0))[1];\n chunks_by_character_count \n---------------------------\n the cat\n(1 row)\n\nSELECT rag.chunks_by_character_count('', 10, 5);\n chunks_by_character_count \n---------------------------\n {}\n(1 row)\n\nSELECT rag.chunks_by_character_count('a b c d e f g h i j k l m n o p', 5, 2);\n                    chunks_by_character_count                    \n-----------------------------------------------------------------\n {\"a b c\",\"c d e\",\"e f g\",\"g h i\",\"i j k\",\"k l m\",\"m n o\",\"o p\"}\n(1 row)\n\n"
  },
  {
    "path": "docker-compose/ext-src/pgrag-src/expected/document_processing.out",
    "content": "-- HTML to Markdown conversion tests\nSELECT rag.markdown_from_html('<p>Hello</p>');\n markdown_from_html \n--------------------\n Hello\n(1 row)\n\nSELECT rag.markdown_from_html('<p>Hello <i>world</i></p>');\n markdown_from_html \n--------------------\n Hello _world_\n(1 row)\n\nSELECT rag.markdown_from_html('<h1>Title</h1><p>Paragraph</p>');\n markdown_from_html \n--------------------\n # Title           +\n                   +\n Paragraph\n(1 row)\n\nSELECT rag.markdown_from_html('<ul><li>Item 1</li><li>Item 2</li></ul>');\n markdown_from_html \n--------------------\n *   Item 1        +\n *   Item 2\n(1 row)\n\nSELECT rag.markdown_from_html('<a href=\"https://example.com\">Link</a>');\n     markdown_from_html      \n-----------------------------\n [Link](https://example.com)\n(1 row)\n\n-- Note: text_from_pdf and text_from_docx require binary input which is harder to test in regression tests\n-- We'll test that the functions exist and have the right signature\nSELECT 'text_from_pdf_exists' AS test_name,\n       count(*) > 0 AS result\nFROM pg_proc\nWHERE proname = 'text_from_pdf'\n  AND pronamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'rag');\n      test_name       | result \n----------------------+--------\n text_from_pdf_exists | t\n(1 row)\n\nSELECT 'text_from_docx_exists' AS test_name,\n       count(*) > 0 AS result\nFROM pg_proc\nWHERE proname = 'text_from_docx'\n  AND pronamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'rag');\n       test_name       | result \n-----------------------+--------\n text_from_docx_exists | t\n(1 row)\n\n"
  },
  {
    "path": "docker-compose/ext-src/pgrag-src/expected/embedding_api_functions.out",
    "content": "-- Test embedding functions exist with correct signatures\n-- OpenAI embedding functions\nSELECT 'openai_text_embedding_exists' AS test_name,\n       count(*) > 0 AS result\nFROM pg_proc\nWHERE proname = 'openai_text_embedding'\n  AND pronamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'rag');\n          test_name           | result \n------------------------------+--------\n openai_text_embedding_exists | t\n(1 row)\n\nSELECT 'openai_text_embedding_3_small_exists' AS test_name,\n       count(*) > 0 AS result\nFROM pg_proc\nWHERE proname = 'openai_text_embedding_3_small'\n  AND pronamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'rag');\n              test_name               | result \n--------------------------------------+--------\n openai_text_embedding_3_small_exists | t\n(1 row)\n\nSELECT 'openai_text_embedding_3_large_exists' AS test_name,\n       count(*) > 0 AS result\nFROM pg_proc\nWHERE proname = 'openai_text_embedding_3_large'\n  AND pronamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'rag');\n              test_name               | result \n--------------------------------------+--------\n openai_text_embedding_3_large_exists | t\n(1 row)\n\nSELECT 'openai_text_embedding_ada_002_exists' AS test_name,\n       count(*) > 0 AS result\nFROM pg_proc\nWHERE proname = 'openai_text_embedding_ada_002'\n  AND pronamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'rag');\n              test_name               | result \n--------------------------------------+--------\n openai_text_embedding_ada_002_exists | t\n(1 row)\n\n-- Fireworks embedding functions\nSELECT 'fireworks_nomic_embed_text_v1_exists' AS test_name,\n       count(*) > 0 AS result\nFROM pg_proc\nWHERE proname = 'fireworks_nomic_embed_text_v1'\n  AND pronamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'rag');\n              test_name               | result \n--------------------------------------+--------\n fireworks_nomic_embed_text_v1_exists | t\n(1 row)\n\nSELECT 'fireworks_nomic_embed_text_v15_exists' AS test_name,\n       count(*) > 0 AS result\nFROM pg_proc\nWHERE proname = 'fireworks_nomic_embed_text_v15'\n  AND pronamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'rag');\n               test_name               | result \n---------------------------------------+--------\n fireworks_nomic_embed_text_v15_exists | t\n(1 row)\n\nSELECT 'fireworks_text_embedding_exists' AS test_name,\n       count(*) > 0 AS result\nFROM pg_proc\nWHERE proname = 'fireworks_text_embedding'\n  AND pronamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'rag');\n            test_name            | result \n---------------------------------+--------\n fireworks_text_embedding_exists | t\n(1 row)\n\nSELECT 'fireworks_text_embedding_thenlper_gte_base_exists' AS test_name,\n       count(*) > 0 AS result\nFROM pg_proc\nWHERE proname = 'fireworks_text_embedding_thenlper_gte_base'\n  AND pronamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'rag');\n                     test_name                     | result \n---------------------------------------------------+--------\n fireworks_text_embedding_thenlper_gte_base_exists | t\n(1 row)\n\nSELECT 'fireworks_text_embedding_thenlper_gte_large_exists' AS test_name,\n       count(*) > 0 AS result\nFROM pg_proc\nWHERE proname = 'fireworks_text_embedding_thenlper_gte_large'\n  AND pronamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'rag');\n                     test_name                      | result \n----------------------------------------------------+--------\n fireworks_text_embedding_thenlper_gte_large_exists | t\n(1 row)\n\nSELECT 'fireworks_text_embedding_whereisai_uae_large_v1_exists' AS test_name,\n       count(*) > 0 AS result\nFROM pg_proc\nWHERE proname = 'fireworks_text_embedding_whereisai_uae_large_v1'\n  AND pronamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'rag');\n                       test_name                        | result \n--------------------------------------------------------+--------\n fireworks_text_embedding_whereisai_uae_large_v1_exists | t\n(1 row)\n\n"
  },
  {
    "path": "docker-compose/ext-src/pgrag-src/expected/embedding_functions.out",
    "content": "BEGIN\nCREATE EXTENSION IF NOT EXISTS vector;\nDROP EXTENSION IF EXISTS rag CASCADE;\nCREATE EXTENSION rag CASCADE;\ntest_name|result\nopenai_embedding_dimensions_test|t\ntest_name|result\nfireworks_embedding_dimensions_test|t\nCOMMIT\n"
  },
  {
    "path": "docker-compose/ext-src/pgrag-src/expected/text_processing.out",
    "content": "-- Text processing function tests\nSELECT rag.markdown_from_html('<p>Hello <i>world</i></p>');\n markdown_from_html \n--------------------\n Hello _world_\n(1 row)\n\nSELECT rag.chunks_by_character_count('the cat sat on the mat', 10, 5);\n       chunks_by_character_count       \n---------------------------------------\n {\"the cat\",\"cat sat on\",\"on the mat\"}\n(1 row)\n\n"
  },
  {
    "path": "docker-compose/ext-src/pgrag-src/expected/voyageai_functions.out",
    "content": "-- Test VoyageAI API key functions\nSELECT 'voyageai_api_key_test' AS test_name, \n       (SELECT rag.voyageai_set_api_key('test_key') IS NULL) AS result;\n       test_name       | result \n-----------------------+--------\n voyageai_api_key_test | t\n(1 row)\n\nSELECT 'voyageai_get_api_key_test' AS test_name,\n       (SELECT rag.voyageai_get_api_key() = 'test_key') AS result;\n         test_name         | result \n---------------------------+--------\n voyageai_get_api_key_test | t\n(1 row)\n\n-- Test VoyageAI embedding functions exist\nSELECT 'voyageai_embedding_exists' AS test_name,\n       count(*) > 0 AS result\nFROM pg_proc\nWHERE proname = 'voyageai_embedding'\n  AND pronamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'rag');\n         test_name         | result \n---------------------------+--------\n voyageai_embedding_exists | t\n(1 row)\n\nSELECT 'voyageai_embedding_3_exists' AS test_name,\n       count(*) > 0 AS result\nFROM pg_proc\nWHERE proname = 'voyageai_embedding_3'\n  AND pronamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'rag');\n          test_name          | result \n-----------------------------+--------\n voyageai_embedding_3_exists | t\n(1 row)\n\nSELECT 'voyageai_embedding_3_lite_exists' AS test_name,\n       count(*) > 0 AS result\nFROM pg_proc\nWHERE proname = 'voyageai_embedding_3_lite'\n  AND pronamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'rag');\n            test_name             | result \n----------------------------------+--------\n voyageai_embedding_3_lite_exists | t\n(1 row)\n\nSELECT 'voyageai_embedding_code_2_exists' AS test_name,\n       count(*) > 0 AS result\nFROM pg_proc\nWHERE proname = 'voyageai_embedding_code_2'\n  AND pronamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'rag');\n            test_name             | result \n----------------------------------+--------\n voyageai_embedding_code_2_exists | t\n(1 row)\n\nSELECT 'voyageai_embedding_finance_2_exists' AS test_name,\n       count(*) > 0 AS result\nFROM pg_proc\nWHERE proname = 'voyageai_embedding_finance_2'\n  AND pronamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'rag');\n              test_name              | result \n-------------------------------------+--------\n voyageai_embedding_finance_2_exists | t\n(1 row)\n\nSELECT 'voyageai_embedding_law_2_exists' AS test_name,\n       count(*) > 0 AS result\nFROM pg_proc\nWHERE proname = 'voyageai_embedding_law_2'\n  AND pronamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'rag');\n            test_name            | result \n---------------------------------+--------\n voyageai_embedding_law_2_exists | t\n(1 row)\n\nSELECT 'voyageai_embedding_multilingual_2_exists' AS test_name,\n       count(*) > 0 AS result\nFROM pg_proc\nWHERE proname = 'voyageai_embedding_multilingual_2'\n  AND pronamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'rag');\n                test_name                 | result \n------------------------------------------+--------\n voyageai_embedding_multilingual_2_exists | t\n(1 row)\n\n-- Test VoyageAI reranking functions exist\nSELECT 'voyageai_rerank_distance_exists' AS test_name,\n       count(*) > 0 AS result\nFROM pg_proc\nWHERE proname = 'voyageai_rerank_distance'\n  AND pronamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'rag');\n            test_name            | result \n---------------------------------+--------\n voyageai_rerank_distance_exists | t\n(1 row)\n\nSELECT 'voyageai_rerank_score_exists' AS test_name,\n       count(*) > 0 AS result\nFROM pg_proc\nWHERE proname = 'voyageai_rerank_score'\n  AND pronamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'rag');\n          test_name           | result \n------------------------------+--------\n voyageai_rerank_score_exists | t\n(1 row)\n\n-- Test VoyageAI function signatures\nSELECT 'voyageai_embedding_signature' AS test_name,\n       count(*) > 0 AS result\nFROM pg_proc\nWHERE proname = 'voyageai_embedding'\n  AND pronamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'rag')\n  AND pronargs = 3;\n          test_name           | result \n------------------------------+--------\n voyageai_embedding_signature | t\n(1 row)\n\nSELECT 'voyageai_rerank_distance_signature' AS test_name,\n       count(*) > 0 AS result\nFROM pg_proc\nWHERE proname = 'voyageai_rerank_distance'\n  AND pronamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'rag')\n  AND pronargs IN (3, 4);\n             test_name              | result \n------------------------------------+--------\n voyageai_rerank_distance_signature | t\n(1 row)\n\nSELECT 'voyageai_rerank_score_signature' AS test_name,\n       count(*) > 0 AS result\nFROM pg_proc\nWHERE proname = 'voyageai_rerank_score'\n  AND pronamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'rag')\n  AND pronargs IN (3, 4);\n            test_name            | result \n---------------------------------+--------\n voyageai_rerank_score_signature | t\n(1 row)\n\n"
  },
  {
    "path": "docker-compose/ext-src/pgrag-src/regular-test.sh",
    "content": "#!/bin/sh\nset -ex\ncd \"$(dirname \"${0}\")\"\ndropdb --if-exist contrib_regression\ncreatedb contrib_regression\n. ../alter_db.sh\npsql -d contrib_regression -c \"CREATE EXTENSION vector\" -c \"CREATE EXTENSION rag\"\nPG_REGRESS=$(dirname \"$(pg_config --pgxs)\")/../test/regress/pg_regress\n${PG_REGRESS} --inputdir=./ --bindir='/usr/local/pgsql/bin'    --use-existing --load-extension=vector --load-extension=rag --dbname=contrib_regression basic_functions text_processing api_keys chunking_functions document_processing embedding_api_functions voyageai_functions\n"
  },
  {
    "path": "docker-compose/ext-src/pgrag-src/sql/api_keys.sql",
    "content": "-- API key function tests\nSELECT rag.anthropic_set_api_key('test_key');\n\nSELECT rag.anthropic_get_api_key();\n\nSELECT rag.openai_set_api_key('test_key');\n\nSELECT rag.openai_get_api_key();\n\nSELECT rag.fireworks_set_api_key('test_key');\n\nSELECT rag.fireworks_get_api_key();\n\nSELECT rag.voyageai_set_api_key('test_key');\n\nSELECT rag.voyageai_get_api_key();\n"
  },
  {
    "path": "docker-compose/ext-src/pgrag-src/sql/basic_functions.sql",
    "content": "-- Basic function tests\nSELECT rag.markdown_from_html('<p>Hello</p>');\n\nSELECT array_length(rag.chunks_by_character_count('the cat sat on the mat', 10, 5), 1);\n"
  },
  {
    "path": "docker-compose/ext-src/pgrag-src/sql/chunking_functions.sql",
    "content": "-- Chunking function tests\nSELECT rag.chunks_by_character_count('the cat sat on the mat', 10, 5);\n\nSELECT rag.chunks_by_character_count('Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.', 20, 10);\n\nSELECT (rag.chunks_by_character_count('the cat', 10, 0))[1];\n\nSELECT rag.chunks_by_character_count('', 10, 5);\n\nSELECT rag.chunks_by_character_count('a b c d e f g h i j k l m n o p', 5, 2);\n\n"
  },
  {
    "path": "docker-compose/ext-src/pgrag-src/sql/document_processing.sql",
    "content": "-- HTML to Markdown conversion tests\nSELECT rag.markdown_from_html('<p>Hello</p>');\n\nSELECT rag.markdown_from_html('<p>Hello <i>world</i></p>');\n\nSELECT rag.markdown_from_html('<h1>Title</h1><p>Paragraph</p>');\n\nSELECT rag.markdown_from_html('<ul><li>Item 1</li><li>Item 2</li></ul>');\n\nSELECT rag.markdown_from_html('<a href=\"https://example.com\">Link</a>');\n\n-- Note: text_from_pdf and text_from_docx require binary input which is harder to test in regression tests\n-- We'll test that the functions exist and have the right signature\nSELECT 'text_from_pdf_exists' AS test_name,\n       count(*) > 0 AS result\nFROM pg_proc\nWHERE proname = 'text_from_pdf'\n  AND pronamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'rag');\n\nSELECT 'text_from_docx_exists' AS test_name,\n       count(*) > 0 AS result\nFROM pg_proc\nWHERE proname = 'text_from_docx'\n  AND pronamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'rag');\n"
  },
  {
    "path": "docker-compose/ext-src/pgrag-src/sql/embedding_api_functions.sql",
    "content": "-- Test embedding functions exist with correct signatures\n-- OpenAI embedding functions\nSELECT 'openai_text_embedding_exists' AS test_name,\n       count(*) > 0 AS result\nFROM pg_proc\nWHERE proname = 'openai_text_embedding'\n  AND pronamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'rag');\n\nSELECT 'openai_text_embedding_3_small_exists' AS test_name,\n       count(*) > 0 AS result\nFROM pg_proc\nWHERE proname = 'openai_text_embedding_3_small'\n  AND pronamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'rag');\n\nSELECT 'openai_text_embedding_3_large_exists' AS test_name,\n       count(*) > 0 AS result\nFROM pg_proc\nWHERE proname = 'openai_text_embedding_3_large'\n  AND pronamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'rag');\n\nSELECT 'openai_text_embedding_ada_002_exists' AS test_name,\n       count(*) > 0 AS result\nFROM pg_proc\nWHERE proname = 'openai_text_embedding_ada_002'\n  AND pronamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'rag');\n\n-- Fireworks embedding functions\nSELECT 'fireworks_nomic_embed_text_v1_exists' AS test_name,\n       count(*) > 0 AS result\nFROM pg_proc\nWHERE proname = 'fireworks_nomic_embed_text_v1'\n  AND pronamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'rag');\n\nSELECT 'fireworks_nomic_embed_text_v15_exists' AS test_name,\n       count(*) > 0 AS result\nFROM pg_proc\nWHERE proname = 'fireworks_nomic_embed_text_v15'\n  AND pronamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'rag');\n\nSELECT 'fireworks_text_embedding_exists' AS test_name,\n       count(*) > 0 AS result\nFROM pg_proc\nWHERE proname = 'fireworks_text_embedding'\n  AND pronamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'rag');\n\nSELECT 'fireworks_text_embedding_thenlper_gte_base_exists' AS test_name,\n       count(*) > 0 AS result\nFROM pg_proc\nWHERE proname = 'fireworks_text_embedding_thenlper_gte_base'\n  AND pronamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'rag');\n\nSELECT 'fireworks_text_embedding_thenlper_gte_large_exists' AS test_name,\n       count(*) > 0 AS result\nFROM pg_proc\nWHERE proname = 'fireworks_text_embedding_thenlper_gte_large'\n  AND pronamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'rag');\n\nSELECT 'fireworks_text_embedding_whereisai_uae_large_v1_exists' AS test_name,\n       count(*) > 0 AS result\nFROM pg_proc\nWHERE proname = 'fireworks_text_embedding_whereisai_uae_large_v1'\n  AND pronamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'rag');\n"
  },
  {
    "path": "docker-compose/ext-src/pgrag-src/sql/text_processing.sql",
    "content": "-- Text processing function tests\nSELECT rag.markdown_from_html('<p>Hello <i>world</i></p>');\n\nSELECT rag.chunks_by_character_count('the cat sat on the mat', 10, 5);\n"
  },
  {
    "path": "docker-compose/ext-src/pgrag-src/sql/voyageai_functions.sql",
    "content": "-- Test VoyageAI API key functions\nSELECT 'voyageai_api_key_test' AS test_name, \n       (SELECT rag.voyageai_set_api_key('test_key') IS NULL) AS result;\n\nSELECT 'voyageai_get_api_key_test' AS test_name,\n       (SELECT rag.voyageai_get_api_key() = 'test_key') AS result;\n\n-- Test VoyageAI embedding functions exist\nSELECT 'voyageai_embedding_exists' AS test_name,\n       count(*) > 0 AS result\nFROM pg_proc\nWHERE proname = 'voyageai_embedding'\n  AND pronamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'rag');\n\nSELECT 'voyageai_embedding_3_exists' AS test_name,\n       count(*) > 0 AS result\nFROM pg_proc\nWHERE proname = 'voyageai_embedding_3'\n  AND pronamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'rag');\n\nSELECT 'voyageai_embedding_3_lite_exists' AS test_name,\n       count(*) > 0 AS result\nFROM pg_proc\nWHERE proname = 'voyageai_embedding_3_lite'\n  AND pronamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'rag');\n\nSELECT 'voyageai_embedding_code_2_exists' AS test_name,\n       count(*) > 0 AS result\nFROM pg_proc\nWHERE proname = 'voyageai_embedding_code_2'\n  AND pronamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'rag');\n\nSELECT 'voyageai_embedding_finance_2_exists' AS test_name,\n       count(*) > 0 AS result\nFROM pg_proc\nWHERE proname = 'voyageai_embedding_finance_2'\n  AND pronamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'rag');\n\nSELECT 'voyageai_embedding_law_2_exists' AS test_name,\n       count(*) > 0 AS result\nFROM pg_proc\nWHERE proname = 'voyageai_embedding_law_2'\n  AND pronamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'rag');\n\nSELECT 'voyageai_embedding_multilingual_2_exists' AS test_name,\n       count(*) > 0 AS result\nFROM pg_proc\nWHERE proname = 'voyageai_embedding_multilingual_2'\n  AND pronamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'rag');\n\n-- Test VoyageAI reranking functions exist\nSELECT 'voyageai_rerank_distance_exists' AS test_name,\n       count(*) > 0 AS result\nFROM pg_proc\nWHERE proname = 'voyageai_rerank_distance'\n  AND pronamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'rag');\n\nSELECT 'voyageai_rerank_score_exists' AS test_name,\n       count(*) > 0 AS result\nFROM pg_proc\nWHERE proname = 'voyageai_rerank_score'\n  AND pronamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'rag');\n\n-- Test VoyageAI function signatures\nSELECT 'voyageai_embedding_signature' AS test_name,\n       count(*) > 0 AS result\nFROM pg_proc\nWHERE proname = 'voyageai_embedding'\n  AND pronamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'rag')\n  AND pronargs = 3;\n\nSELECT 'voyageai_rerank_distance_signature' AS test_name,\n       count(*) > 0 AS result\nFROM pg_proc\nWHERE proname = 'voyageai_rerank_distance'\n  AND pronamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'rag')\n  AND pronargs IN (3, 4);\n\nSELECT 'voyageai_rerank_score_signature' AS test_name,\n       count(*) > 0 AS result\nFROM pg_proc\nWHERE proname = 'voyageai_rerank_score'\n  AND pronamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'rag')\n  AND pronargs IN (3, 4);\n"
  },
  {
    "path": "docker-compose/ext-src/pgtap-src/regular-test.sh",
    "content": "#!/bin/sh\nset -ex\ncd \"$(dirname ${0})\"\nmake installcheck || true\ndropdb --if-exist contrib_regression\ncreatedb contrib_regression\nPG_REGRESS=$(dirname \"$(pg_config --pgxs)\")/../test/regress/pg_regress\nsed -i '/hastap/d' test/build/run.sch\nsed -Ei 's/\\b(aretap|enumtap|ownership|privs|usergroup)\\b//g' test/build/run.sch\n${PG_REGRESS} --use-existing --dbname=contrib_regression --inputdir=./ --bindir='/usr/local/pgsql/bin'    --inputdir=test --max-connections=879 --schedule test/schedule/main.sch   --schedule test/build/run.sch\n"
  },
  {
    "path": "docker-compose/ext-src/pgtap-src/test-upgrade.patch",
    "content": "diff --git a/Makefile b/Makefile\nindex f255fe6..0a0fa65 100644\n--- a/Makefile\n+++ b/Makefile\n@@ -346,7 +346,7 @@ test: test-serial test-parallel\n TB_DIR = test/build\n GENERATED_SCHEDULE_DEPS = $(TB_DIR)/all_tests $(TB_DIR)/exclude_tests\n REGRESS = --schedule $(TB_DIR)/run.sch # Set this again just to be safe\n-REGRESS_OPTS = --inputdir=test --max-connections=$(PARALLEL_CONN) --schedule $(SETUP_SCH) $(REGRESS_CONF)\n+REGRESS_OPTS = --use-existing --dbname=contrib_regression --inputdir=test --max-connections=$(PARALLEL_CONN) --schedule $(SETUP_SCH) $(REGRESS_CONF)\n SETUP_SCH = test/schedule/main.sch # schedule to use for test setup; this can be forcibly changed by some targets!\n IGNORE_TESTS = $(notdir $(EXCLUDE_TEST_FILES:.sql=))\n PARALLEL_TESTS = $(filter-out $(IGNORE_TESTS),$(filter-out $(SERIAL_TESTS),$(ALL_TESTS)))\ndiff --git a/test/schedule/create.sql b/test/schedule/create.sql\nindex ba355ed..7e250f5 100644\n--- a/test/schedule/create.sql\n+++ b/test/schedule/create.sql\n@@ -1,3 +1,2 @@\n \\unset ECHO\n \\i test/psql.sql\n-CREATE EXTENSION pgtap;\ndiff --git a/test/schedule/main.sch b/test/schedule/main.sch\nindex a8a5fbc..0463fc4 100644\n--- a/test/schedule/main.sch\n+++ b/test/schedule/main.sch\n@@ -1,2 +1 @@\n-test: build\n test: create\n"
  },
  {
    "path": "docker-compose/ext-src/pgtap-src/test-upgrade.sh",
    "content": "#!/bin/sh\nset -ex\ncd \"$(dirname ${0})\"\npatch -p1 <test-upgrade.patch\nmake installcheck"
  },
  {
    "path": "docker-compose/ext-src/pgvector-src/regular-test.sh",
    "content": "#!/bin/sh\nset -ex\ncd \"$(dirname ${0})\"\ndropdb --if-exist contrib_regression\ncreatedb contrib_regression\npsql -d contrib_regression -c \"CREATE EXTENSION vector\"\nPG_REGRESS=$(dirname \"$(pg_config --pgxs)\")/../test/regress/pg_regress\n${PG_REGRESS} --inputdir=./ --bindir='/usr/local/pgsql/bin' --inputdir=test --use-existing --dbname=contrib_regression bit btree cast copy halfvec hnsw_bit hnsw_halfvec hnsw_sparsevec hnsw_vector ivfflat_bit ivfflat_halfvec ivfflat_vector sparsevec vector_type"
  },
  {
    "path": "docker-compose/ext-src/pgvector-src/test-upgrade.sh",
    "content": "#!/bin/sh\nset -ex\ncd \"$(dirname ${0})\"\nPG_REGRESS=$(dirname \"$(pg_config --pgxs)\")/../test/regress/pg_regress\n${PG_REGRESS} --inputdir=./ --bindir='/usr/local/pgsql/bin' --inputdir=test --use-existing --dbname=contrib_regression bit btree cast copy halfvec hnsw_bit hnsw_halfvec hnsw_sparsevec hnsw_vector ivfflat_bit ivfflat_halfvec ivfflat_vector sparsevec vector_type"
  },
  {
    "path": "docker-compose/ext-src/pgx_ulid-src/Makefile",
    "content": "EXTENSION = pgx_ulid\n\nPGFILEDESC = \"pgx_ulid - ULID type for PostgreSQL\"\n\nPG_CONFIG ?= pg_config\nPGXS := $(shell $(PG_CONFIG) --pgxs)\nPG_REGRESS = $(dir $(PGXS))/../../src/test/regress/pg_regress\nPG_MAJOR_VERSION := $(word 2, $(subst ., , $(shell $(PG_CONFIG) --version)))\nifeq ($(shell test $(PG_MAJOR_VERSION) -lt 17; echo $$?),0)\n  REGRESS = 00_ulid_generation 01_ulid_conversions 03_ulid_errors\n  EXTNAME = ulid\nelse\n  REGRESS = 00_ulid_generation 01_ulid_conversions 02_ulid_conversions 03_ulid_errors\n  EXTNAME = pgx_ulid\nendif\n\n.PHONY: installcheck\ninstallcheck: regression-test\n\nregression-test:\n\tdropdb --if-exists contrib_regression\n\tcreatedb contrib_regression\n\t../alter_db.sh\n\tpsql -d contrib_regression -c \"CREATE EXTENSION $(EXTNAME)\"\n\t$(PG_REGRESS) --inputdir=. --outputdir=. --use-existing --dbname=contrib_regression $(REGRESS)\n"
  },
  {
    "path": "docker-compose/ext-src/pgx_ulid-src/expected/00_ulid_generation.out",
    "content": "-- Test basic ULID generation\n-- Test gen_ulid() function\nSELECT 'gen_ulid() returns a non-null value' as test_name,\n       gen_ulid() IS NOT NULL as result;\n              test_name              | result \n-------------------------------------+--------\n gen_ulid() returns a non-null value | t\n(1 row)\n\n-- Test that multiple calls to gen_ulid() return different values\nSELECT 'gen_ulid() returns unique values' as test_name,\n       gen_ulid() != gen_ulid() as result;\n            test_name             | result \n----------------------------------+--------\n gen_ulid() returns unique values | t\n(1 row)\n\n-- Test that gen_ulid() returns a value with the correct format\nSELECT 'gen_ulid() returns correctly formatted value' as test_name,\n       length(gen_ulid()::text) = 26 as result;\n                  test_name                   | result \n----------------------------------------------+--------\n gen_ulid() returns correctly formatted value | t\n(1 row)\n\n-- Test monotonic ULID generation\nSELECT 'gen_monotonic_ulid() returns a non-null value' as test_name,\n       gen_monotonic_ulid() IS NOT NULL as result;\n                   test_name                   | result \n-----------------------------------------------+--------\n gen_monotonic_ulid() returns a non-null value | t\n(1 row)\n\n-- Test that multiple calls to gen_monotonic_ulid() return different values\nSELECT 'gen_monotonic_ulid() returns unique values' as test_name,\n       gen_monotonic_ulid() != gen_monotonic_ulid() as result;\n                 test_name                  | result \n--------------------------------------------+--------\n gen_monotonic_ulid() returns unique values | t\n(1 row)\n\n-- Test that gen_monotonic_ulid() returns a value with the correct format\nSELECT 'gen_monotonic_ulid() returns correctly formatted value' as test_name,\n       length(gen_monotonic_ulid()::text) = 26 as result;\n                       test_name                        | result \n--------------------------------------------------------+--------\n gen_monotonic_ulid() returns correctly formatted value | t\n(1 row)\n\n-- Test that monotonic ULIDs are ordered correctly\nSELECT 'gen_monotonic_ulid() returns ordered values' as test_name,\n       u1 < u2 as result\nFROM (\n    SELECT gen_monotonic_ulid() as u1, gen_monotonic_ulid() as u2\n) subq;\n                  test_name                  | result \n---------------------------------------------+--------\n gen_monotonic_ulid() returns ordered values | t\n(1 row)\n\n"
  },
  {
    "path": "docker-compose/ext-src/pgx_ulid-src/expected/01_ulid_conversions.out",
    "content": "-- Create a test ULID value\nCREATE TEMP TABLE test_ulids AS\nSELECT '01GV5PA9EQG7D82Q3Y4PKBZSYV'::ulid as test_ulid;\n-- Test conversion to text\nSELECT 'ulid to text conversion' as test_name,\n       test_ulid::text = '01GV5PA9EQG7D82Q3Y4PKBZSYV' as result\nFROM test_ulids;\n        test_name        | result \n-------------------------+--------\n ulid to text conversion | t\n(1 row)\n\n-- Test conversion to UUID\nSELECT 'ulid to UUID conversion' as test_name,\n       test_ulid::uuid::text = '0186cb65-25d7-81da-815c-7e25a6bfe7db' as result\nFROM test_ulids;\n        test_name        | result \n-------------------------+--------\n ulid to UUID conversion | t\n(1 row)\n\n-- Test conversion to bytea\nSELECT 'ulid to bytea conversion' as test_name,\n       length(test_ulid::bytea) = 16 as result\nFROM test_ulids;\n        test_name         | result \n--------------------------+--------\n ulid to bytea conversion | t\n(1 row)\n\n-- Test conversion to timestamp\nSELECT 'ulid to timestamp conversion' as test_name,\n       to_char(test_ulid::timestamp, 'YYYY-MM-DD HH24:MI:SS.MS') = '2023-03-10 04:00:49.111' as result\nFROM test_ulids;\n          test_name           | result \n------------------------------+--------\n ulid to timestamp conversion | t\n(1 row)\n\n-- Test conversion from UUID\nSELECT 'UUID to ulid conversion' as test_name,\n       '0186cb65-25d7-81da-815c-7e25a6bfe7db'::uuid::ulid::text = '01GV5PA9EQG7D82Q3Y4PKBZSYV' as result;\n        test_name        | result \n-------------------------+--------\n UUID to ulid conversion | t\n(1 row)\n\n-- Test conversion from timestamp\nSELECT 'timestamp to ulid conversion' as test_name,\n       '2023-03-10 12:00:49.111'::timestamp::ulid::text = '01GV5PA9EQ0000000000000000' as result;\n          test_name           | result \n------------------------------+--------\n timestamp to ulid conversion | t\n(1 row)\n\n"
  },
  {
    "path": "docker-compose/ext-src/pgx_ulid-src/expected/02_ulid_conversions.out",
    "content": "-- Test conversion from timestamptz\nSELECT 'timestamptz to ulid conversion' as test_name,\n       '2023-03-10 04:00:49.111'::timestamptz::ulid::text = '01GV5PA9EQ0000000000000000' as result;\n           test_name            | result \n--------------------------------+--------\n timestamptz to ulid conversion | t\n(1 row)\n\n"
  },
  {
    "path": "docker-compose/ext-src/pgx_ulid-src/expected/03_ulid_errors.out",
    "content": "-- Test ULID error handling\n-- Test invalid ULID string (too short)\nSELECT '01GV5PA9EQG7D82Q3Y4PKBZSY'::ulid;\nERROR:  invalid input syntax for type ulid: \"01GV5PA9EQG7D82Q3Y4PKBZSY\": invalid length\nLINE 1: SELECT '01GV5PA9EQG7D82Q3Y4PKBZSY'::ulid;\n               ^\n-- Test invalid ULID string (invalid character)\nSELECT '01GV5PA9EQG7D82Q3Y4PKBZSYU'::ulid;\nERROR:  invalid input syntax for type ulid: \"01GV5PA9EQG7D82Q3Y4PKBZSYU\": invalid character\nLINE 1: SELECT '01GV5PA9EQG7D82Q3Y4PKBZSYU'::ulid;\n               ^\n-- Test NULL handling\nSELECT 'NULL to ulid conversion returns NULL' as test_name,\n       NULL::ulid IS NULL as result;\n              test_name               | result \n--------------------------------------+--------\n NULL to ulid conversion returns NULL | t\n(1 row)\n\n"
  },
  {
    "path": "docker-compose/ext-src/pgx_ulid-src/sql/00_ulid_generation.sql",
    "content": "-- Test basic ULID generation\n\n-- Test gen_ulid() function\nSELECT 'gen_ulid() returns a non-null value' as test_name,\n       gen_ulid() IS NOT NULL as result;\n\n-- Test that multiple calls to gen_ulid() return different values\nSELECT 'gen_ulid() returns unique values' as test_name,\n       gen_ulid() != gen_ulid() as result;\n\n-- Test that gen_ulid() returns a value with the correct format\nSELECT 'gen_ulid() returns correctly formatted value' as test_name,\n       length(gen_ulid()::text) = 26 as result;\n\n-- Test monotonic ULID generation\nSELECT 'gen_monotonic_ulid() returns a non-null value' as test_name,\n       gen_monotonic_ulid() IS NOT NULL as result;\n\n-- Test that multiple calls to gen_monotonic_ulid() return different values\nSELECT 'gen_monotonic_ulid() returns unique values' as test_name,\n       gen_monotonic_ulid() != gen_monotonic_ulid() as result;\n\n-- Test that gen_monotonic_ulid() returns a value with the correct format\nSELECT 'gen_monotonic_ulid() returns correctly formatted value' as test_name,\n       length(gen_monotonic_ulid()::text) = 26 as result;\n\n-- Test that monotonic ULIDs are ordered correctly\nSELECT 'gen_monotonic_ulid() returns ordered values' as test_name,\n       u1 < u2 as result\nFROM (\n    SELECT gen_monotonic_ulid() as u1, gen_monotonic_ulid() as u2\n) subq;\n"
  },
  {
    "path": "docker-compose/ext-src/pgx_ulid-src/sql/01_ulid_conversions.sql",
    "content": "-- Create a test ULID value\nCREATE TEMP TABLE test_ulids AS\nSELECT '01GV5PA9EQG7D82Q3Y4PKBZSYV'::ulid as test_ulid;\n\n-- Test conversion to text\nSELECT 'ulid to text conversion' as test_name,\n       test_ulid::text = '01GV5PA9EQG7D82Q3Y4PKBZSYV' as result\nFROM test_ulids;\n\n-- Test conversion to UUID\nSELECT 'ulid to UUID conversion' as test_name,\n       test_ulid::uuid::text = '0186cb65-25d7-81da-815c-7e25a6bfe7db' as result\nFROM test_ulids;\n\n-- Test conversion to bytea\nSELECT 'ulid to bytea conversion' as test_name,\n       length(test_ulid::bytea) = 16 as result\nFROM test_ulids;\n\n-- Test conversion to timestamp\nSELECT 'ulid to timestamp conversion' as test_name,\n       to_char(test_ulid::timestamp, 'YYYY-MM-DD HH24:MI:SS.MS') = '2023-03-10 04:00:49.111' as result\nFROM test_ulids;\n\n-- Test conversion from UUID\nSELECT 'UUID to ulid conversion' as test_name,\n       '0186cb65-25d7-81da-815c-7e25a6bfe7db'::uuid::ulid::text = '01GV5PA9EQG7D82Q3Y4PKBZSYV' as result;\n\n-- Test conversion from timestamp\nSELECT 'timestamp to ulid conversion' as test_name,\n       '2023-03-10 12:00:49.111'::timestamp::ulid::text = '01GV5PA9EQ0000000000000000' as result;\n\n"
  },
  {
    "path": "docker-compose/ext-src/pgx_ulid-src/sql/02_ulid_conversions.sql",
    "content": "-- Test conversion from timestamptz\nSELECT 'timestamptz to ulid conversion' as test_name,\n       '2023-03-10 04:00:49.111'::timestamptz::ulid::text = '01GV5PA9EQ0000000000000000' as result;\n"
  },
  {
    "path": "docker-compose/ext-src/pgx_ulid-src/sql/03_ulid_errors.sql",
    "content": "-- Test ULID error handling\n\n-- Test invalid ULID string (too short)\nSELECT '01GV5PA9EQG7D82Q3Y4PKBZSY'::ulid;\n\n-- Test invalid ULID string (invalid character)\nSELECT '01GV5PA9EQG7D82Q3Y4PKBZSYU'::ulid;\n\n-- Test NULL handling\nSELECT 'NULL to ulid conversion returns NULL' as test_name,\n       NULL::ulid IS NULL as result;\n\n"
  },
  {
    "path": "docker-compose/ext-src/plv8-src/regular-test.sh",
    "content": "#!/bin/bash\nset -ex\ncd \"$(dirname ${0})\"\ndropdb --if-exist contrib_regression\ncreatedb contrib_regression\n. ../alter_db.sh\nPG_REGRESS=$(dirname \"$(pg_config --pgxs)\")/../test/regress/pg_regress\nREGRESS=\"$(make -n installcheck | awk '{print substr($0,index($0,\"init-extension\"));}')\"\nREGRESS=\"${REGRESS/startup_perms/}\"\nREGRESS=\"${REGRESS/startup /}\"\nREGRESS=\"${REGRESS/find_function_perms/}\"\nREGRESS=\"${REGRESS/guc/}\"\n${PG_REGRESS} --inputdir=./ --bindir='/usr/local/pgsql/bin'  --use-existing --dbname=contrib_regression ${REGRESS}"
  },
  {
    "path": "docker-compose/ext-src/plv8-src/test-upgrade.sh",
    "content": "#!/bin/sh\nset -ex\ncd \"$(dirname ${0})\"\nPG_REGRESS=$(dirname \"$(pg_config --pgxs)\")/../test/regress/pg_regress\nREGRESS=\"$(make -n installcheck | awk '{print substr($0,index($0,\"init-extension\")+15);}')\"\n${PG_REGRESS} --inputdir=./ --bindir='/usr/local/pgsql/bin'  --use-existing --dbname=contrib_regression ${REGRESS}"
  },
  {
    "path": "docker-compose/ext-src/postgis-src/README-Neon.md",
    "content": "# PostGIS Testing in Neon\n\nThis directory contains configuration files and patches for running PostGIS tests in the Neon database environment.\n\n## Overview\n\nPostGIS is a spatial database extension for PostgreSQL that adds support for geographic objects. Testing PostGIS compatibility ensures that Neon's modifications to PostgreSQL don't break compatibility with this critical extension.\n\n## PostGIS Versions\n\n- PostgreSQL v17: PostGIS 3.5.0\n- PostgreSQL v14/v15/v16: PostGIS 3.3.3\n\n## Test Configuration\n\nThe test setup includes:\n\n- `postgis-no-upgrade-test.patch`: Disables upgrade tests by removing the upgrade test section from regress/runtest.mk\n- `postgis-regular-v16.patch`: Version-specific patch for PostgreSQL v16\n- `postgis-regular-v17.patch`: Version-specific patch for PostgreSQL v17\n- `regular-test.sh`: Script to run PostGIS tests as a regular user\n- `neon-test.sh`: Script to handle version-specific test configurations\n- `raster_outdb_template.sql`: Template for raster tests with explicit file paths\n\n## Excluded Tests\n\n**Important Note:** The test exclusions listed below are specifically for regular-user tests against staging instances. These exclusions are necessary because staging instances run with limited privileges and cannot perform operations requiring superuser access. Docker-compose based tests are not affected by these exclusions.\n\n### Tests Requiring Superuser Permissions\n\nThese tests cannot be run as a regular user:\n- `estimatedextent`\n- `regress/core/legacy`\n- `regress/core/typmod`\n- `regress/loader/TestSkipANALYZE`\n- `regress/loader/TestANALYZE`\n\n### Tests Requiring Filesystem Access\n\nThese tests need direct filesystem access that is only possible for superusers:\n- `loader/load_outdb`\n\n### Tests with Flaky Results\n\nThese tests have assumptions that don't always hold true:\n- `regress/core/computed_columns` - Assumes computed columns always outperform alternatives, which is not consistently true\n\n### Tests Requiring Tunable Parameter Modifications\n\nThese tests attempt to modify the `postgis.gdal_enabled_drivers` parameter, which is only accessible to superusers:\n- `raster/test/regress/rt_wkb`\n- `raster/test/regress/rt_addband`\n- `raster/test/regress/rt_setbandpath`\n- `raster/test/regress/rt_fromgdalraster`\n- `raster/test/regress/rt_asgdalraster`\n- `raster/test/regress/rt_astiff`\n- `raster/test/regress/rt_asjpeg`\n- `raster/test/regress/rt_aspng`\n- `raster/test/regress/permitted_gdal_drivers`\n- Loader tests: `BasicOutDB`, `Tiled10x10`, `Tiled10x10Copy`, `Tiled8x8`, `TiledAuto`, `TiledAutoSkipNoData`, `TiledAutoCopyn`\n\n### Topology Tests (v17 only)\n- `populate_topology_layer`\n- `renametopogeometrycolumn`\n\n## Other Modifications\n\n- Binary.sql tests are modified to use explicit file paths\n- Server-side SQL COPY commands (which require superuser privileges) are converted to client-side `\\copy` commands\n- Upgrade tests are disabled\n"
  },
  {
    "path": "docker-compose/ext-src/postgis-src/neon-test.sh",
    "content": "#!/bin/sh\nset -ex\ncd \"$(dirname \"$0\")\"\npatch -p1 <\"postgis-common-${PG_VERSION}.patch\"\ntrap 'echo Cleaning up; patch -R -p1 <postgis-common-${PG_VERSION}.patch' EXIT\nmake installcheck-base"
  },
  {
    "path": "docker-compose/ext-src/postgis-src/postgis-common-v16.patch",
    "content": "diff --git a/regress/core/tests.mk b/regress/core/tests.mk\nindex 3abd7bc..64a9254 100644\n--- a/regress/core/tests.mk\n+++ b/regress/core/tests.mk\n@@ -144,11 +144,6 @@ TESTS_SLOW = \\\n \t$(top_srcdir)/regress/core/concave_hull_hard \\\n \t$(top_srcdir)/regress/core/knn_recheck\n \n-ifeq ($(shell expr \"$(POSTGIS_PGSQL_VERSION)\" \">=\" 120),1)\n-\tTESTS += \\\n-\t\t$(top_srcdir)/regress/core/computed_columns\n-endif\n-\n ifeq ($(shell expr \"$(POSTGIS_GEOS_VERSION)\" \">=\" 30700),1)\n \t# GEOS-3.7 adds:\n \t# ST_FrechetDistance\ndiff --git a/regress/runtest.mk b/regress/runtest.mk\nindex c051f03..010e493 100644\n--- a/regress/runtest.mk\n+++ b/regress/runtest.mk\n@@ -24,16 +24,6 @@ check-regress:\n \n \tPOSTGIS_TOP_BUILD_DIR=$(abs_top_builddir) $(PERL) $(top_srcdir)/regress/run_test.pl $(RUNTESTFLAGS) $(RUNTESTFLAGS_INTERNAL) $(TESTS)\n \n-\t@if echo \"$(RUNTESTFLAGS)\" | grep -vq -- --upgrade; then \\\n-\t\techo \"Running upgrade test as RUNTESTFLAGS did not contain that\"; \\\n-\t\tPOSTGIS_TOP_BUILD_DIR=$(abs_top_builddir) $(PERL) $(top_srcdir)/regress/run_test.pl \\\n-      --upgrade \\\n-      $(RUNTESTFLAGS) \\\n-      $(RUNTESTFLAGS_INTERNAL) \\\n-      $(TESTS); \\\n-\telse \\\n-\t\techo \"Skipping upgrade test as RUNTESTFLAGS already requested upgrades\"; \\\n-\tfi\n \n check-long:\n \t$(PERL) $(top_srcdir)/regress/run_test.pl $(RUNTESTFLAGS) $(TESTS) $(TESTS_SLOW)\n"
  },
  {
    "path": "docker-compose/ext-src/postgis-src/postgis-common-v17.patch",
    "content": "diff --git a/regress/core/tests.mk b/regress/core/tests.mk\nindex 9e05244..90987df 100644\n--- a/regress/core/tests.mk\n+++ b/regress/core/tests.mk\n@@ -143,8 +143,7 @@ TESTS += \\\n \t$(top_srcdir)/regress/core/oriented_envelope \\\n \t$(top_srcdir)/regress/core/point_coordinates \\\n \t$(top_srcdir)/regress/core/out_geojson \\\n-  $(top_srcdir)/regress/core/wrapx \\\n-\t$(top_srcdir)/regress/core/computed_columns\n+  $(top_srcdir)/regress/core/wrapx\n \n # Slow slow tests\n TESTS_SLOW = \\\ndiff --git a/regress/runtest.mk b/regress/runtest.mk\nindex 4b95b7e..449d5a2 100644\n--- a/regress/runtest.mk\n+++ b/regress/runtest.mk\n@@ -24,16 +24,6 @@ check-regress:\n \n \t@POSTGIS_TOP_BUILD_DIR=$(abs_top_builddir) $(PERL) $(top_srcdir)/regress/run_test.pl $(RUNTESTFLAGS) $(RUNTESTFLAGS_INTERNAL) $(TESTS)\n \n-\t@if echo \"$(RUNTESTFLAGS)\" | grep -vq -- --upgrade; then \\\n-\t\techo \"Running upgrade test as RUNTESTFLAGS did not contain that\"; \\\n-\t\tPOSTGIS_TOP_BUILD_DIR=$(abs_top_builddir) $(PERL) $(top_srcdir)/regress/run_test.pl \\\n-      --upgrade \\\n-      $(RUNTESTFLAGS) \\\n-      $(RUNTESTFLAGS_INTERNAL) \\\n-      $(TESTS); \\\n-\telse \\\n-\t\techo \"Skipping upgrade test as RUNTESTFLAGS already requested upgrades\"; \\\n-\tfi\n \n check-long:\n \t$(PERL) $(top_srcdir)/regress/run_test.pl $(RUNTESTFLAGS) $(TESTS) $(TESTS_SLOW)\n"
  },
  {
    "path": "docker-compose/ext-src/postgis-src/postgis-regular-v16.patch",
    "content": "diff --git a/raster/test/regress/tests.mk b/raster/test/regress/tests.mk\nindex 00918e1..7e2b6cd 100644\n--- a/raster/test/regress/tests.mk\n+++ b/raster/test/regress/tests.mk\n@@ -17,9 +17,7 @@ override RUNTESTFLAGS_INTERNAL := \\\n   $(RUNTESTFLAGS_INTERNAL) \\\n   --after-upgrade-script $(top_srcdir)/raster/test/regress/hooks/hook-after-upgrade-raster.sql\n \n-RASTER_TEST_FIRST = \\\n-\t$(top_srcdir)/raster/test/regress/check_gdal \\\n-\t$(top_srcdir)/raster/test/regress/loader/load_outdb\n+RASTER_TEST_FIRST =\n \n RASTER_TEST_LAST = \\\n \t$(top_srcdir)/raster/test/regress/clean\n@@ -33,9 +31,7 @@ RASTER_TEST_IO = \\\n \n RASTER_TEST_BASIC_FUNC = \\\n \t$(top_srcdir)/raster/test/regress/rt_bytea \\\n-\t$(top_srcdir)/raster/test/regress/rt_wkb \\\n \t$(top_srcdir)/raster/test/regress/box3d \\\n-\t$(top_srcdir)/raster/test/regress/rt_addband \\\n \t$(top_srcdir)/raster/test/regress/rt_band \\\n \t$(top_srcdir)/raster/test/regress/rt_tile\n \n@@ -73,16 +69,10 @@ RASTER_TEST_BANDPROPS = \\\n \t$(top_srcdir)/raster/test/regress/rt_neighborhood \\\n \t$(top_srcdir)/raster/test/regress/rt_nearestvalue \\\n \t$(top_srcdir)/raster/test/regress/rt_pixelofvalue \\\n-\t$(top_srcdir)/raster/test/regress/rt_polygon \\\n-\t$(top_srcdir)/raster/test/regress/rt_setbandpath\n+\t$(top_srcdir)/raster/test/regress/rt_polygon\n \n RASTER_TEST_UTILITY = \\\n \t$(top_srcdir)/raster/test/regress/rt_utility \\\n-\t$(top_srcdir)/raster/test/regress/rt_fromgdalraster \\\n-\t$(top_srcdir)/raster/test/regress/rt_asgdalraster \\\n-\t$(top_srcdir)/raster/test/regress/rt_astiff \\\n-\t$(top_srcdir)/raster/test/regress/rt_asjpeg \\\n-\t$(top_srcdir)/raster/test/regress/rt_aspng \\\n \t$(top_srcdir)/raster/test/regress/rt_reclass \\\n \t$(top_srcdir)/raster/test/regress/rt_gdalwarp \\\n \t$(top_srcdir)/raster/test/regress/rt_gdalcontour \\\n@@ -120,21 +110,13 @@ RASTER_TEST_SREL = \\\n \n RASTER_TEST_BUGS = \\\n \t$(top_srcdir)/raster/test/regress/bug_test_car5 \\\n-\t$(top_srcdir)/raster/test/regress/permitted_gdal_drivers \\\n \t$(top_srcdir)/raster/test/regress/tickets\n \n RASTER_TEST_LOADER = \\\n \t$(top_srcdir)/raster/test/regress/loader/Basic \\\n \t$(top_srcdir)/raster/test/regress/loader/Projected \\\n \t$(top_srcdir)/raster/test/regress/loader/BasicCopy \\\n-\t$(top_srcdir)/raster/test/regress/loader/BasicFilename \\\n-\t$(top_srcdir)/raster/test/regress/loader/BasicOutDB \\\n-\t$(top_srcdir)/raster/test/regress/loader/Tiled10x10 \\\n-\t$(top_srcdir)/raster/test/regress/loader/Tiled10x10Copy \\\n-\t$(top_srcdir)/raster/test/regress/loader/Tiled8x8 \\\n-\t$(top_srcdir)/raster/test/regress/loader/TiledAuto \\\n-\t$(top_srcdir)/raster/test/regress/loader/TiledAutoSkipNoData \\\n-\t$(top_srcdir)/raster/test/regress/loader/TiledAutoCopyn\n+\t$(top_srcdir)/raster/test/regress/loader/BasicFilename\n \n RASTER_TESTS := $(RASTER_TEST_FIRST) \\\n \t$(RASTER_TEST_METADATA) $(RASTER_TEST_IO) $(RASTER_TEST_BASIC_FUNC) \\\ndiff --git a/regress/core/binary.sql b/regress/core/binary.sql\nindex 7a36b65..ad78fc7 100644\n--- a/regress/core/binary.sql\n+++ b/regress/core/binary.sql\n@@ -1,4 +1,5 @@\n SET client_min_messages TO warning;\n+\n CREATE SCHEMA tm;\n \n CREATE TABLE tm.geoms (id serial, g geometry);\n@@ -31,24 +32,39 @@ SELECT st_force4d(g) FROM tm.geoms WHERE id < 15 ORDER BY id;\n INSERT INTO tm.geoms(g)\n SELECT st_setsrid(g,4326) FROM tm.geoms ORDER BY id;\n \n-COPY tm.geoms TO :tmpfile WITH BINARY;\n+-- define temp file path\n+\\set tmpfile '/tmp/postgis_binary_test.dat'\n+\n+-- export\n+\\set command '\\\\copy tm.geoms TO ':tmpfile' WITH (FORMAT BINARY)'\n+:command\n+\n+-- import\n CREATE TABLE tm.geoms_in AS SELECT * FROM tm.geoms LIMIT 0;\n-COPY tm.geoms_in FROM :tmpfile WITH BINARY;\n-SELECT 'geometry', count(*) FROM tm.geoms_in i, tm.geoms o WHERE i.id = o.id\n- AND ST_OrderingEquals(i.g, o.g);\n+\\set command '\\\\copy tm.geoms_in FROM ':tmpfile' WITH (FORMAT BINARY)'\n+:command\n+\n+SELECT 'geometry', count(*) FROM tm.geoms_in i, tm.geoms o\n+WHERE i.id = o.id AND ST_OrderingEquals(i.g, o.g);\n \n CREATE TABLE tm.geogs AS SELECT id,g::geography FROM tm.geoms\n WHERE geometrytype(g) NOT LIKE '%CURVE%'\n   AND geometrytype(g) NOT LIKE '%CIRCULAR%'\n   AND geometrytype(g) NOT LIKE '%SURFACE%'\n   AND geometrytype(g) NOT LIKE 'TRIANGLE%'\n-  AND geometrytype(g) NOT LIKE 'TIN%'\n-;\n+  AND geometrytype(g) NOT LIKE 'TIN%';\n \n-COPY tm.geogs TO :tmpfile WITH BINARY;\n+-- export\n+\\set command '\\\\copy tm.geogs TO ':tmpfile' WITH (FORMAT BINARY)'\n+:command\n+\n+-- import\n CREATE TABLE tm.geogs_in AS SELECT * FROM tm.geogs LIMIT 0;\n-COPY tm.geogs_in FROM :tmpfile WITH BINARY;\n-SELECT 'geometry', count(*) FROM tm.geogs_in i, tm.geogs o WHERE i.id = o.id\n- AND ST_OrderingEquals(i.g::geometry, o.g::geometry);\n+\\set command '\\\\copy tm.geogs_in FROM ':tmpfile' WITH (FORMAT BINARY)'\n+:command\n+\n+SELECT 'geometry', count(*) FROM tm.geogs_in i, tm.geogs o\n+WHERE i.id = o.id AND ST_OrderingEquals(i.g::geometry, o.g::geometry);\n \n DROP SCHEMA tm CASCADE;\n+\ndiff --git a/regress/core/tests.mk b/regress/core/tests.mk\nindex 64a9254..94903c3 100644\n--- a/regress/core/tests.mk\n+++ b/regress/core/tests.mk\n@@ -23,7 +23,6 @@ current_dir := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))\n RUNTESTFLAGS_INTERNAL += \\\n   --before-upgrade-script $(top_srcdir)/regress/hooks/hook-before-upgrade.sql \\\n   --after-upgrade-script  $(top_srcdir)/regress/hooks/hook-after-upgrade.sql \\\n-  --after-create-script   $(top_srcdir)/regress/hooks/hook-after-create.sql \\\n   --before-uninstall-script $(top_srcdir)/regress/hooks/hook-before-uninstall.sql\n \n TESTS += \\\n@@ -40,7 +39,6 @@ TESTS += \\\n \t$(top_srcdir)/regress/core/dumppoints \\\n \t$(top_srcdir)/regress/core/dumpsegments \\\n \t$(top_srcdir)/regress/core/empty \\\n-\t$(top_srcdir)/regress/core/estimatedextent \\\n \t$(top_srcdir)/regress/core/forcecurve \\\n \t$(top_srcdir)/regress/core/flatgeobuf \\\n \t$(top_srcdir)/regress/core/geography \\\n@@ -55,7 +53,6 @@ TESTS += \\\n \t$(top_srcdir)/regress/core/out_marc21 \\\n \t$(top_srcdir)/regress/core/in_encodedpolyline \\\n \t$(top_srcdir)/regress/core/iscollection \\\n-\t$(top_srcdir)/regress/core/legacy \\\n \t$(top_srcdir)/regress/core/letters \\\n \t$(top_srcdir)/regress/core/long_xact \\\n \t$(top_srcdir)/regress/core/lwgeom_regress \\\n@@ -112,7 +109,6 @@ TESTS += \\\n \t$(top_srcdir)/regress/core/temporal_knn \\\n \t$(top_srcdir)/regress/core/tickets \\\n \t$(top_srcdir)/regress/core/twkb \\\n-\t$(top_srcdir)/regress/core/typmod \\\n \t$(top_srcdir)/regress/core/wkb \\\n \t$(top_srcdir)/regress/core/wkt \\\n \t$(top_srcdir)/regress/core/wmsservers \\\ndiff --git a/regress/loader/tests.mk b/regress/loader/tests.mk\nindex 1fc77ac..c3cb9de 100644\n--- a/regress/loader/tests.mk\n+++ b/regress/loader/tests.mk\n@@ -38,7 +38,5 @@ TESTS += \\\n \t$(top_srcdir)/regress/loader/Latin1 \\\n \t$(top_srcdir)/regress/loader/Latin1-implicit \\\n \t$(top_srcdir)/regress/loader/mfile \\\n-\t$(top_srcdir)/regress/loader/TestSkipANALYZE \\\n-\t$(top_srcdir)/regress/loader/TestANALYZE \\\n \t$(top_srcdir)/regress/loader/CharNoWidth\n \ndiff --git a/regress/run_test.pl b/regress/run_test.pl\nindex 0ec5b2d..1c331f4 100755\n--- a/regress/run_test.pl\n+++ b/regress/run_test.pl\n@@ -147,7 +147,6 @@ $ENV{\"LANG\"} = \"C\";\n # Add locale info to the psql options\n # Add pg12 precision suppression\n my $PGOPTIONS = $ENV{\"PGOPTIONS\"};\n-$PGOPTIONS .= \" -c lc_messages=C\";\n $PGOPTIONS .= \" -c client_min_messages=NOTICE\";\n $PGOPTIONS .= \" -c extra_float_digits=0\";\n $ENV{\"PGOPTIONS\"} = $PGOPTIONS;\n"
  },
  {
    "path": "docker-compose/ext-src/postgis-src/postgis-regular-v17.patch",
    "content": "diff --git a/raster/test/regress/tests.mk b/raster/test/regress/tests.mk\nindex 00918e1..7e2b6cd 100644\n--- a/raster/test/regress/tests.mk\n+++ b/raster/test/regress/tests.mk\n@@ -17,9 +17,7 @@ override RUNTESTFLAGS_INTERNAL := \\\n   $(RUNTESTFLAGS_INTERNAL) \\\n   --after-upgrade-script $(top_srcdir)/raster/test/regress/hooks/hook-after-upgrade-raster.sql\n \n-RASTER_TEST_FIRST = \\\n-\t$(top_srcdir)/raster/test/regress/check_gdal \\\n-\t$(top_srcdir)/raster/test/regress/loader/load_outdb\n+RASTER_TEST_FIRST =\n \n RASTER_TEST_LAST = \\\n \t$(top_srcdir)/raster/test/regress/clean\n@@ -33,9 +31,7 @@ RASTER_TEST_IO = \\\n \n RASTER_TEST_BASIC_FUNC = \\\n \t$(top_srcdir)/raster/test/regress/rt_bytea \\\n-\t$(top_srcdir)/raster/test/regress/rt_wkb \\\n \t$(top_srcdir)/raster/test/regress/box3d \\\n-\t$(top_srcdir)/raster/test/regress/rt_addband \\\n \t$(top_srcdir)/raster/test/regress/rt_band \\\n \t$(top_srcdir)/raster/test/regress/rt_tile\n \n@@ -73,16 +69,10 @@ RASTER_TEST_BANDPROPS = \\\n \t$(top_srcdir)/raster/test/regress/rt_neighborhood \\\n \t$(top_srcdir)/raster/test/regress/rt_nearestvalue \\\n \t$(top_srcdir)/raster/test/regress/rt_pixelofvalue \\\n-\t$(top_srcdir)/raster/test/regress/rt_polygon \\\n-\t$(top_srcdir)/raster/test/regress/rt_setbandpath\n+\t$(top_srcdir)/raster/test/regress/rt_polygon\n \n RASTER_TEST_UTILITY = \\\n \t$(top_srcdir)/raster/test/regress/rt_utility \\\n-\t$(top_srcdir)/raster/test/regress/rt_fromgdalraster \\\n-\t$(top_srcdir)/raster/test/regress/rt_asgdalraster \\\n-\t$(top_srcdir)/raster/test/regress/rt_astiff \\\n-\t$(top_srcdir)/raster/test/regress/rt_asjpeg \\\n-\t$(top_srcdir)/raster/test/regress/rt_aspng \\\n \t$(top_srcdir)/raster/test/regress/rt_reclass \\\n \t$(top_srcdir)/raster/test/regress/rt_gdalwarp \\\n \t$(top_srcdir)/raster/test/regress/rt_gdalcontour \\\n@@ -120,21 +110,13 @@ RASTER_TEST_SREL = \\\n \n RASTER_TEST_BUGS = \\\n \t$(top_srcdir)/raster/test/regress/bug_test_car5 \\\n-\t$(top_srcdir)/raster/test/regress/permitted_gdal_drivers \\\n \t$(top_srcdir)/raster/test/regress/tickets\n \n RASTER_TEST_LOADER = \\\n \t$(top_srcdir)/raster/test/regress/loader/Basic \\\n \t$(top_srcdir)/raster/test/regress/loader/Projected \\\n \t$(top_srcdir)/raster/test/regress/loader/BasicCopy \\\n-\t$(top_srcdir)/raster/test/regress/loader/BasicFilename \\\n-\t$(top_srcdir)/raster/test/regress/loader/BasicOutDB \\\n-\t$(top_srcdir)/raster/test/regress/loader/Tiled10x10 \\\n-\t$(top_srcdir)/raster/test/regress/loader/Tiled10x10Copy \\\n-\t$(top_srcdir)/raster/test/regress/loader/Tiled8x8 \\\n-\t$(top_srcdir)/raster/test/regress/loader/TiledAuto \\\n-\t$(top_srcdir)/raster/test/regress/loader/TiledAutoSkipNoData \\\n-\t$(top_srcdir)/raster/test/regress/loader/TiledAutoCopyn\n+\t$(top_srcdir)/raster/test/regress/loader/BasicFilename\n \n RASTER_TESTS := $(RASTER_TEST_FIRST) \\\n \t$(RASTER_TEST_METADATA) $(RASTER_TEST_IO) $(RASTER_TEST_BASIC_FUNC) \\\ndiff --git a/regress/core/binary.sql b/regress/core/binary.sql\nindex 7a36b65..ad78fc7 100644\n--- a/regress/core/binary.sql\n+++ b/regress/core/binary.sql\n@@ -1,4 +1,5 @@\n SET client_min_messages TO warning;\n+\n CREATE SCHEMA tm;\n \n CREATE TABLE tm.geoms (id serial, g geometry);\n@@ -31,24 +32,39 @@ SELECT st_force4d(g) FROM tm.geoms WHERE id < 15 ORDER BY id;\n INSERT INTO tm.geoms(g)\n SELECT st_setsrid(g,4326) FROM tm.geoms ORDER BY id;\n \n-COPY tm.geoms TO :tmpfile WITH BINARY;\n+-- define temp file path\n+\\set tmpfile '/tmp/postgis_binary_test.dat'\n+\n+-- export\n+\\set command '\\\\copy tm.geoms TO ':tmpfile' WITH (FORMAT BINARY)'\n+:command\n+\n+-- import\n CREATE TABLE tm.geoms_in AS SELECT * FROM tm.geoms LIMIT 0;\n-COPY tm.geoms_in FROM :tmpfile WITH BINARY;\n-SELECT 'geometry', count(*) FROM tm.geoms_in i, tm.geoms o WHERE i.id = o.id\n- AND ST_OrderingEquals(i.g, o.g);\n+\\set command '\\\\copy tm.geoms_in FROM ':tmpfile' WITH (FORMAT BINARY)'\n+:command\n+\n+SELECT 'geometry', count(*) FROM tm.geoms_in i, tm.geoms o\n+WHERE i.id = o.id AND ST_OrderingEquals(i.g, o.g);\n \n CREATE TABLE tm.geogs AS SELECT id,g::geography FROM tm.geoms\n WHERE geometrytype(g) NOT LIKE '%CURVE%'\n   AND geometrytype(g) NOT LIKE '%CIRCULAR%'\n   AND geometrytype(g) NOT LIKE '%SURFACE%'\n   AND geometrytype(g) NOT LIKE 'TRIANGLE%'\n-  AND geometrytype(g) NOT LIKE 'TIN%'\n-;\n+  AND geometrytype(g) NOT LIKE 'TIN%';\n \n-COPY tm.geogs TO :tmpfile WITH BINARY;\n+-- export\n+\\set command '\\\\copy tm.geogs TO ':tmpfile' WITH (FORMAT BINARY)'\n+:command\n+\n+-- import\n CREATE TABLE tm.geogs_in AS SELECT * FROM tm.geogs LIMIT 0;\n-COPY tm.geogs_in FROM :tmpfile WITH BINARY;\n-SELECT 'geometry', count(*) FROM tm.geogs_in i, tm.geogs o WHERE i.id = o.id\n- AND ST_OrderingEquals(i.g::geometry, o.g::geometry);\n+\\set command '\\\\copy tm.geogs_in FROM ':tmpfile' WITH (FORMAT BINARY)'\n+:command\n+\n+SELECT 'geometry', count(*) FROM tm.geogs_in i, tm.geogs o\n+WHERE i.id = o.id AND ST_OrderingEquals(i.g::geometry, o.g::geometry);\n \n DROP SCHEMA tm CASCADE;\n+\ndiff --git a/regress/core/tests.mk b/regress/core/tests.mk\nindex 90987df..74fe3f1 100644\n--- a/regress/core/tests.mk\n+++ b/regress/core/tests.mk\n@@ -16,14 +16,13 @@ POSTGIS_PGSQL_VERSION=170\n POSTGIS_GEOS_VERSION=31101\n HAVE_JSON=yes\n HAVE_SPGIST=yes\n-INTERRUPTTESTS=yes\n+INTERRUPTTESTS=no\n \n current_dir := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))\n \n RUNTESTFLAGS_INTERNAL += \\\n   --before-upgrade-script $(top_srcdir)/regress/hooks/hook-before-upgrade.sql \\\n   --after-upgrade-script  $(top_srcdir)/regress/hooks/hook-after-upgrade.sql \\\n-  --after-create-script   $(top_srcdir)/regress/hooks/hook-after-create.sql \\\n   --before-uninstall-script $(top_srcdir)/regress/hooks/hook-before-uninstall.sql\n \n TESTS += \\\n@@ -40,7 +39,6 @@ TESTS += \\\n \t$(top_srcdir)/regress/core/dumppoints \\\n \t$(top_srcdir)/regress/core/dumpsegments \\\n \t$(top_srcdir)/regress/core/empty \\\n-\t$(top_srcdir)/regress/core/estimatedextent \\\n \t$(top_srcdir)/regress/core/forcecurve \\\n \t$(top_srcdir)/regress/core/flatgeobuf \\\n \t$(top_srcdir)/regress/core/frechet \\\n@@ -60,7 +58,6 @@ TESTS += \\\n \t$(top_srcdir)/regress/core/out_marc21 \\\n \t$(top_srcdir)/regress/core/in_encodedpolyline \\\n \t$(top_srcdir)/regress/core/iscollection \\\n-\t$(top_srcdir)/regress/core/legacy \\\n \t$(top_srcdir)/regress/core/letters \\\n \t$(top_srcdir)/regress/core/lwgeom_regress \\\n \t$(top_srcdir)/regress/core/measures \\\n@@ -119,7 +116,6 @@ TESTS += \\\n \t$(top_srcdir)/regress/core/temporal_knn \\\n \t$(top_srcdir)/regress/core/tickets \\\n \t$(top_srcdir)/regress/core/twkb \\\n-\t$(top_srcdir)/regress/core/typmod \\\n \t$(top_srcdir)/regress/core/wkb \\\n \t$(top_srcdir)/regress/core/wkt \\\n \t$(top_srcdir)/regress/core/wmsservers \\\ndiff --git a/regress/loader/tests.mk b/regress/loader/tests.mk\nindex ac4f8ad..4bad4fc 100644\n--- a/regress/loader/tests.mk\n+++ b/regress/loader/tests.mk\n@@ -38,7 +38,5 @@ TESTS += \\\n \t$(top_srcdir)/regress/loader/Latin1 \\\n \t$(top_srcdir)/regress/loader/Latin1-implicit \\\n \t$(top_srcdir)/regress/loader/mfile \\\n-\t$(top_srcdir)/regress/loader/TestSkipANALYZE \\\n-\t$(top_srcdir)/regress/loader/TestANALYZE \\\n \t$(top_srcdir)/regress/loader/CharNoWidth \\\n \ndiff --git a/regress/run_test.pl b/regress/run_test.pl\nindex cac4b2e..4c7c82b 100755\n--- a/regress/run_test.pl\n+++ b/regress/run_test.pl\n@@ -238,7 +238,6 @@ $ENV{\"LANG\"} = \"C\";\n # Add locale info to the psql options\n # Add pg12 precision suppression\n my $PGOPTIONS = $ENV{\"PGOPTIONS\"};\n-$PGOPTIONS .= \" -c lc_messages=C\";\n $PGOPTIONS .= \" -c client_min_messages=NOTICE\";\n $PGOPTIONS .= \" -c extra_float_digits=0\";\n $ENV{\"PGOPTIONS\"} = $PGOPTIONS;\ndiff --git a/topology/test/tests.mk b/topology/test/tests.mk\nindex cbe2633..2c7c18f 100644\n--- a/topology/test/tests.mk\n+++ b/topology/test/tests.mk\n@@ -46,9 +46,7 @@ TESTS += \\\n \t$(top_srcdir)/topology/test/regress/legacy_query.sql \\\n \t$(top_srcdir)/topology/test/regress/legacy_validate.sql \\\n \t$(top_srcdir)/topology/test/regress/polygonize.sql \\\n-\t$(top_srcdir)/topology/test/regress/populate_topology_layer.sql \\\n \t$(top_srcdir)/topology/test/regress/removeunusedprimitives.sql \\\n-\t$(top_srcdir)/topology/test/regress/renametopogeometrycolumn.sql \\\n \t$(top_srcdir)/topology/test/regress/renametopology.sql \\\n \t$(top_srcdir)/topology/test/regress/share_sequences.sql \\\n \t$(top_srcdir)/topology/test/regress/sqlmm.sql \\\n"
  },
  {
    "path": "docker-compose/ext-src/postgis-src/raster_outdb_template.sql",
    "content": "--\n-- PostgreSQL database dump\n--\n\n-- Dumped from database version 17.4\n-- Dumped by pg_dump version 17.4\n\nSET statement_timeout = 0;\nSET lock_timeout = 0;\nSET idle_in_transaction_session_timeout = 0;\nSET transaction_timeout = 0;\nSET client_encoding = 'UTF8';\nSET standard_conforming_strings = on;\nSELECT pg_catalog.set_config('search_path', '', false);\nSET check_function_bodies = false;\nSET xmloption = content;\nSET client_min_messages = warning;\n\n--\n-- Name: raster_outdb_template; Type: TABLE; Schema: public; Owner: cloud_admin\n--\n\nCREATE TABLE public.raster_outdb_template (\n    rid integer,\n    rast public.raster\n);\n\n\nALTER TABLE public.raster_outdb_template OWNER TO cloud_admin;\n\n--\n-- Data for Name: raster_outdb_template; Type: TABLE DATA; Schema: public; Owner: cloud_admin\n--\n\nCOPY public.raster_outdb_template (rid, rast) FROM stdin;\n1\t0100000300000000000000F03F000000000000F0BF0000000000000000000000000000000000000000000000000000000000000000000000005A0032008400002F6578742D7372632F706F73746769732D7372632F726567726573732F2E2E2F7261737465722F746573742F726567726573732F6C6F616465722F746573747261737465722E746966008400012F6578742D7372632F706F73746769732D7372632F726567726573732F2E2E2F7261737465722F746573742F726567726573732F6C6F616465722F746573747261737465722E746966008400022F6578742D7372632F706F73746769732D7372632F726567726573732F2E2E2F7261737465722F746573742F726567726573732F6C6F616465722F746573747261737465722E74696600\n2\t0100000300000000000000F03F000000000000F0BF0000000000000000000000000000000000000000000000000000000000000000000000005A0032008400002F6578742D7372632F706F73746769732D7372632F726567726573732F2E2E2F7261737465722F746573742F726567726573732F6C6F616465722F746573747261737465722E746966008400012F6578742D7372632F706F73746769732D7372632F726567726573732F2E2E2F7261737465722F746573742F726567726573732F6C6F616465722F746573747261737465722E746966008400022F6578742D7372632F706F73746769732D7372632F726567726573732F2E2E2F7261737465722F746573742F726567726573732F6C6F616465722F746573747261737465722E74696600\n3\t0100000200000000000000F03F000000000000F0BF0000000000000000000000000000000000000000000000000000000000000000000000005A00320044000101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101018400012F6578742D7372632F706F73746769732D7372632F726567726573732F2E2E2F7261737465722F746573742F726567726573732F6C6F616465722F746573747261737465722E74696600\n4\t0100000200000000000000F03F000000000000F0BF0000000000000000000000000000000000000000000000000000000000000000000000005A003200C4FF012F6578742D7372632F706F73746769732D7372632F726567726573732F2E2E2F7261737465722F746573742F726567726573732F6C6F616465722F746573747261737465722E746966004400010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101\n\\.\n\n\n--\n-- PostgreSQL database dump complete\n--\n\n"
  },
  {
    "path": "docker-compose/ext-src/postgis-src/regular-test.sh",
    "content": "#!/bin/bash\nset -ex\ncd \"$(dirname \"${0}\")\"\ndropdb --if-exist contrib_regression\ncreatedb contrib_regression\npsql -d contrib_regression -c \"ALTER DATABASE contrib_regression SET TimeZone='UTC'\" \\\n     -c \"ALTER DATABASE contrib_regression SET DateStyle='ISO, MDY'\" \\\n     -c \"CREATE EXTENSION postgis SCHEMA public\" \\\n     -c \"CREATE EXTENSION postgis_topology\" \\\n     -c \"CREATE EXTENSION postgis_tiger_geocoder CASCADE\" \\\n     -c \"CREATE EXTENSION postgis_raster SCHEMA public\" \\\n     -c \"CREATE EXTENSION postgis_sfcgal SCHEMA public\"\npatch -p1 <\"postgis-common-${PG_VERSION}.patch\"\npatch -p1 <\"postgis-regular-${PG_VERSION}.patch\"\npsql -d contrib_regression -f raster_outdb_template.sql\ntrap 'patch -R -p1 <postgis-regular-${PG_VERSION}.patch && patch -R -p1 <\"postgis-common-${PG_VERSION}.patch\"' EXIT\nPOSTGIS_REGRESS_DB=contrib_regression RUNTESTFLAGS=--nocreate make installcheck-base"
  },
  {
    "path": "docker-compose/ext-src/postgresql-unit-src/regular-test.sh",
    "content": "#!/bin/sh\nset -ex\ncd \"$(dirname ${0})\"\ndropdb --if-exist contrib_regression\ncreatedb contrib_regression\nPG_REGRESS=$(dirname \"$(pg_config --pgxs)\")/../test/regress/pg_regress\n${PG_REGRESS} --inputdir=./ --bindir='/usr/local/pgsql/bin' --use-existing --dbname=contrib_regression extension tables unit binary unicode prefix units time temperature functions language_functions round derived compare aggregate iec custom crosstab convert"
  },
  {
    "path": "docker-compose/ext-src/postgresql-unit-src/test-upgrade.sh",
    "content": "#!/bin/sh\nset -ex\ncd \"$(dirname ${0})\"\nPG_REGRESS=$(dirname \"$(pg_config --pgxs)\")/../test/regress/pg_regress\n${PG_REGRESS} --inputdir=./ --bindir='/usr/local/pgsql/bin' --use-existing --dbname=contrib_regression extension tables unit binary unicode prefix units time temperature functions language_functions round derived compare aggregate iec custom crosstab convert"
  },
  {
    "path": "docker-compose/ext-src/prefix-src/regular-test.sh",
    "content": "#!/bin/sh\nset -ex\ncd \"$(dirname ${0})\"\ndropdb --if-exist contrib_regression\ncreatedb contrib_regression\nPG_REGRESS=$(dirname \"$(pg_config --pgxs)\")/../test/regress/pg_regress\n${PG_REGRESS} --use-existing --inputdir=./ --bindir='/usr/local/pgsql/bin'    --dbname=contrib_regression create_extension prefix falcon explain queries"
  },
  {
    "path": "docker-compose/ext-src/prefix-src/test-upgrade.sh",
    "content": "#!/bin/sh\nset -ex\ncd \"$(dirname ${0})\"\nPG_REGRESS=$(dirname \"$(pg_config --pgxs)\")/../test/regress/pg_regress\n${PG_REGRESS} --use-existing --inputdir=./ --bindir='/usr/local/pgsql/bin'    --dbname=contrib_regression prefix falcon explain queries"
  },
  {
    "path": "docker-compose/ext-src/rag_bge_small_en_v15-src/Makefile",
    "content": "EXTENSION = rag_bge_small_en_v15\nMODULE_big = rag_bge_small_en_v15\nOBJS = $(patsubst %.rs,%.o,$(wildcard src/*.rs))\n\nREGRESS = basic_functions embedding_functions basic_functions_enhanced embedding_functions_enhanced\n\nPG_CONFIG = pg_config\nPGXS := $(shell $(PG_CONFIG) --pgxs)\nPG_REGRESS := $(dir $(PGXS))../../src/test/regress/pg_regress\n.PHONY installcheck:\ninstallcheck:\n\tdropdb --if-exists contrib_regression\n\tcreatedb contrib_regression\n\t../alter_db.sh\n\tpsql -d contrib_regression -c \"CREATE EXTENSION vector\" -c \"CREATE EXTENSION rag_bge_small_en_v15\"\n\t$(PG_REGRESS) --use-existing --dbname=contrib_regression $(REGRESS)"
  },
  {
    "path": "docker-compose/ext-src/rag_bge_small_en_v15-src/expected/basic_functions.out",
    "content": "-- Basic function tests\nSELECT rag_bge_small_en_v15.chunks_by_token_count('the cat sat on the mat', 3, 2);\n                 chunks_by_token_count                  \n--------------------------------------------------------\n {\"the cat sat\",\"cat sat on\",\"sat on the\",\"on the mat\"}\n(1 row)\n\n"
  },
  {
    "path": "docker-compose/ext-src/rag_bge_small_en_v15-src/expected/basic_functions_enhanced.out",
    "content": "-- Basic function tests for chunks_by_token_count\nSELECT rag_bge_small_en_v15.chunks_by_token_count('the cat sat on the mat', 3, 2);\n                 chunks_by_token_count                  \n--------------------------------------------------------\n {\"the cat sat\",\"cat sat on\",\"sat on the\",\"on the mat\"}\n(1 row)\n\nSELECT rag_bge_small_en_v15.chunks_by_token_count('Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.', 5, 2);\n                                                                              chunks_by_token_count                                                                              \n---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\n {\"Lorem ipsum\",\"ipsum dolor sit\",\"sit amet,\",consectetur,\"adipiscing elit\",elit.,\"Sed do\",\"do eiusmod\",tempor,\"incididunt ut\",\"ut labore et\",\"et dolore magna\",\"magna aliqua.\"}\n(1 row)\n\nSELECT (rag_bge_small_en_v15.chunks_by_token_count('the cat', 5, 0))[1];\n chunks_by_token_count \n-----------------------\n the cat\n(1 row)\n\nSELECT rag_bge_small_en_v15.chunks_by_token_count('', 5, 2);\n chunks_by_token_count \n-----------------------\n {}\n(1 row)\n\nSELECT rag_bge_small_en_v15.chunks_by_token_count('a b c d e f g h i j k l m n o p', 3, 1);\n                      chunks_by_token_count                      \n-----------------------------------------------------------------\n {\"a b c\",\"c d e\",\"e f g\",\"g h i\",\"i j k\",\"k l m\",\"m n o\",\"o p\"}\n(1 row)\n\n"
  },
  {
    "path": "docker-compose/ext-src/rag_bge_small_en_v15-src/expected/embedding_functions.out",
    "content": "-- Embedding function tests\nSELECT 'embedding_for_passage_test' AS test_name, \n       vector_dims(rag_bge_small_en_v15.embedding_for_passage('the cat sat on the mat')) > 0 AS result;\n         test_name          | result \n----------------------------+--------\n embedding_for_passage_test | t\n(1 row)\n\nSELECT 'embedding_for_query_test' AS test_name,\n       vector_dims(rag_bge_small_en_v15.embedding_for_query('the cat sat on the mat')) > 0 AS result;\n        test_name         | result \n--------------------------+--------\n embedding_for_query_test | t\n(1 row)\n\n"
  },
  {
    "path": "docker-compose/ext-src/rag_bge_small_en_v15-src/expected/embedding_functions_enhanced.out",
    "content": "-- Embedding function tests\nSELECT 'embedding_for_passage_test_1' AS test_name, \n       vector_dims(rag_bge_small_en_v15.embedding_for_passage('the cat sat on the mat')) > 0 AS result;\n          test_name           | result \n------------------------------+--------\n embedding_for_passage_test_1 | t\n(1 row)\n\nSELECT 'embedding_for_passage_test_2' AS test_name,\n       vector_dims(rag_bge_small_en_v15.embedding_for_passage('Lorem ipsum dolor sit amet')) > 0 AS result;\n          test_name           | result \n------------------------------+--------\n embedding_for_passage_test_2 | t\n(1 row)\n\nSELECT 'embedding_for_passage_test_3' AS test_name,\n       vector_dims(rag_bge_small_en_v15.embedding_for_passage('')) > 0 AS result;\n          test_name           | result \n------------------------------+--------\n embedding_for_passage_test_3 | t\n(1 row)\n\nSELECT 'embedding_for_query_test_1' AS test_name,\n       vector_dims(rag_bge_small_en_v15.embedding_for_query('the cat sat on the mat')) > 0 AS result;\n         test_name          | result \n----------------------------+--------\n embedding_for_query_test_1 | t\n(1 row)\n\nSELECT 'embedding_for_query_test_2' AS test_name,\n       vector_dims(rag_bge_small_en_v15.embedding_for_query('Lorem ipsum dolor sit amet')) > 0 AS result;\n         test_name          | result \n----------------------------+--------\n embedding_for_query_test_2 | t\n(1 row)\n\nSELECT 'embedding_for_query_test_3' AS test_name,\n       vector_dims(rag_bge_small_en_v15.embedding_for_query('')) > 0 AS result;\n         test_name          | result \n----------------------------+--------\n embedding_for_query_test_3 | t\n(1 row)\n\n-- Test that passage and query embeddings have the same dimensions\nSELECT 'embedding_dimensions_match' AS test_name,\n       vector_dims(rag_bge_small_en_v15.embedding_for_passage('test')) = \n       vector_dims(rag_bge_small_en_v15.embedding_for_query('test')) AS result;\n         test_name          | result \n----------------------------+--------\n embedding_dimensions_match | t\n(1 row)\n\n"
  },
  {
    "path": "docker-compose/ext-src/rag_bge_small_en_v15-src/sql/basic_functions.sql",
    "content": "-- Basic function tests\nSELECT rag_bge_small_en_v15.chunks_by_token_count('the cat sat on the mat', 3, 2);\n"
  },
  {
    "path": "docker-compose/ext-src/rag_bge_small_en_v15-src/sql/basic_functions_enhanced.sql",
    "content": "-- Basic function tests for chunks_by_token_count\nSELECT rag_bge_small_en_v15.chunks_by_token_count('the cat sat on the mat', 3, 2);\n\nSELECT rag_bge_small_en_v15.chunks_by_token_count('Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.', 5, 2);\n\nSELECT (rag_bge_small_en_v15.chunks_by_token_count('the cat', 5, 0))[1];\n\nSELECT rag_bge_small_en_v15.chunks_by_token_count('', 5, 2);\n\nSELECT rag_bge_small_en_v15.chunks_by_token_count('a b c d e f g h i j k l m n o p', 3, 1);\n"
  },
  {
    "path": "docker-compose/ext-src/rag_bge_small_en_v15-src/sql/embedding_functions.sql",
    "content": "-- Embedding function tests\nSELECT 'embedding_for_passage_test' AS test_name, \n       vector_dims(rag_bge_small_en_v15.embedding_for_passage('the cat sat on the mat')) > 0 AS result;\n\nSELECT 'embedding_for_query_test' AS test_name,\n       vector_dims(rag_bge_small_en_v15.embedding_for_query('the cat sat on the mat')) > 0 AS result;\n"
  },
  {
    "path": "docker-compose/ext-src/rag_bge_small_en_v15-src/sql/embedding_functions_enhanced.sql",
    "content": "-- Embedding function tests\nSELECT 'embedding_for_passage_test_1' AS test_name, \n       vector_dims(rag_bge_small_en_v15.embedding_for_passage('the cat sat on the mat')) > 0 AS result;\n\nSELECT 'embedding_for_passage_test_2' AS test_name,\n       vector_dims(rag_bge_small_en_v15.embedding_for_passage('Lorem ipsum dolor sit amet')) > 0 AS result;\n\nSELECT 'embedding_for_passage_test_3' AS test_name,\n       vector_dims(rag_bge_small_en_v15.embedding_for_passage('')) > 0 AS result;\n\nSELECT 'embedding_for_query_test_1' AS test_name,\n       vector_dims(rag_bge_small_en_v15.embedding_for_query('the cat sat on the mat')) > 0 AS result;\n\nSELECT 'embedding_for_query_test_2' AS test_name,\n       vector_dims(rag_bge_small_en_v15.embedding_for_query('Lorem ipsum dolor sit amet')) > 0 AS result;\n\nSELECT 'embedding_for_query_test_3' AS test_name,\n       vector_dims(rag_bge_small_en_v15.embedding_for_query('')) > 0 AS result;\n\n-- Test that passage and query embeddings have the same dimensions\nSELECT 'embedding_dimensions_match' AS test_name,\n       vector_dims(rag_bge_small_en_v15.embedding_for_passage('test')) = \n       vector_dims(rag_bge_small_en_v15.embedding_for_query('test')) AS result;\n"
  },
  {
    "path": "docker-compose/ext-src/rag_jina_reranker_v1_tiny_en-src/Makefile",
    "content": "EXTENSION = rag_jina_reranker_v1_tiny_en\nMODULE_big = rag_jina_reranker_v1_tiny_en\nOBJS = $(patsubst %.rs,%.o,$(wildcard src/*.rs))\n\nREGRESS = reranking_functions reranking_functions_enhanced\n\nPG_CONFIG = pg_config\nPGXS := $(shell $(PG_CONFIG) --pgxs)\nPG_REGRESS := $(dir $(PGXS))../../src/test/regress/pg_regress\n.PHONY installcheck:\ninstallcheck:\n\tdropdb --if-exists contrib_regression\n\tcreatedb contrib_regression\n\t../alter_db.sh\n\tpsql -d contrib_regression -c \"CREATE EXTENSION vector\" -c \"CREATE EXTENSION rag_jina_reranker_v1_tiny_en\"\n\t$(PG_REGRESS) --use-existing --dbname=contrib_regression $(REGRESS)\n"
  },
  {
    "path": "docker-compose/ext-src/rag_jina_reranker_v1_tiny_en-src/expected/reranking_functions.out",
    "content": "-- Reranking function tests\nSELECT ROUND(rag_jina_reranker_v1_tiny_en.rerank_distance('the cat sat on the mat', 'the baboon played with the balloon')::NUMERIC,4);\n round  \n--------\n 0.8989\n(1 row)\n\nSELECT ARRAY(SELECT ROUND(x::NUMERIC,4) FROM unnest(rag_jina_reranker_v1_tiny_en.rerank_distance('the cat sat on the mat',\n    ARRAY['the baboon played with the balloon', 'the tanks fired at the buildings'])) AS x);\n      array      \n-----------------\n {0.8989,1.3018}\n(1 row)\n\nSELECT ROUND(rag_jina_reranker_v1_tiny_en.rerank_score('the cat sat on the mat', 'the baboon played with the balloon')::NUMERIC,4);\n  round  \n---------\n -0.8989\n(1 row)\n\nSELECT ARRAY(SELECT ROUND(x::NUMERIC,4) FROM unnest(rag_jina_reranker_v1_tiny_en.rerank_score('the cat sat on the mat',\n    ARRAY['the baboon played with the balloon', 'the tanks fired at the buildings'])) as x);\n       array       \n-------------------\n {-0.8989,-1.3018}\n(1 row)\n\n"
  },
  {
    "path": "docker-compose/ext-src/rag_jina_reranker_v1_tiny_en-src/expected/reranking_functions_enhanced.out",
    "content": "-- Reranking function tests - single passage\nSELECT ROUND(rag_jina_reranker_v1_tiny_en.rerank_distance('the cat sat on the mat', 'the baboon played with the balloon')::NUMERIC,4);\n round  \n--------\n 0.8989\n(1 row)\n\nSELECT ROUND(rag_jina_reranker_v1_tiny_en.rerank_distance('the cat sat on the mat', 'the tanks fired at the buildings')::NUMERIC,4);\n round  \n--------\n 1.3018\n(1 row)\n\nSELECT ROUND(rag_jina_reranker_v1_tiny_en.rerank_distance('query about cats', 'information about felines')::NUMERIC,4);\n round  \n--------\n 1.3133\n(1 row)\n\nSELECT ROUND(rag_jina_reranker_v1_tiny_en.rerank_distance('', 'empty query test')::NUMERIC,4);\n round  \n--------\n 0.7076\n(1 row)\n\n-- Reranking function tests - array of passages\nSELECT ARRAY(SELECT ROUND(x::NUMERIC,4) FROM unnest(rag_jina_reranker_v1_tiny_en.rerank_distance('the cat sat on the mat',\n    ARRAY['the baboon played with the balloon', 'the tanks fired at the buildings'])) AS x);\n      array      \n-----------------\n {0.8989,1.3018}\n(1 row)\n\nSELECT ARRAY(SELECT ROUND(x::NUMERIC,4) FROM unnest(rag_jina_reranker_v1_tiny_en.rerank_distance('query about programming',\n    ARRAY['Python is a programming language', 'Java is also a programming language', 'SQL is used for databases'])) AS x);\n         array          \n------------------------\n {0.1659,0.3348,0.1013}\n(1 row)\n\nSELECT rag_jina_reranker_v1_tiny_en.rerank_distance('empty array test', ARRAY[]::text[]);\n rerank_distance \n-----------------\n {}\n(1 row)\n\n-- Reranking score function tests - single passage\nSELECT ROUND(rag_jina_reranker_v1_tiny_en.rerank_score('the cat sat on the mat', 'the baboon played with the balloon')::NUMERIC,4);\n  round  \n---------\n -0.8989\n(1 row)\n\nSELECT ROUND(rag_jina_reranker_v1_tiny_en.rerank_score('the cat sat on the mat', 'the tanks fired at the buildings')::NUMERIC,4);\n  round  \n---------\n -1.3018\n(1 row)\n\nSELECT ROUND(rag_jina_reranker_v1_tiny_en.rerank_score('query about cats', 'information about felines')::NUMERIC,4);\n  round  \n---------\n -1.3133\n(1 row)\n\nSELECT ROUND(rag_jina_reranker_v1_tiny_en.rerank_score('', 'empty query test')::NUMERIC,4);\n  round  \n---------\n -0.7076\n(1 row)\n\n-- Reranking score function tests - array of passages\nSELECT ARRAY(SELECT ROUND(x::NUMERIC,4) FROM unnest(rag_jina_reranker_v1_tiny_en.rerank_score('the cat sat on the mat',\n    ARRAY['the baboon played with the balloon', 'the tanks fired at the buildings'])) AS x);\n       array       \n-------------------\n {-0.8989,-1.3018}\n(1 row)\n\nSELECT ARRAY(SELECT ROUND(x::NUMERIC,4) FROM unnest(rag_jina_reranker_v1_tiny_en.rerank_score('query about programming',\n    ARRAY['Python is a programming language', 'Java is also a programming language', 'SQL is used for databases'])) AS x);\n           array           \n---------------------------\n {-0.1659,-0.3348,-0.1013}\n(1 row)\n\nSELECT rag_jina_reranker_v1_tiny_en.rerank_score('empty array test', ARRAY[]::text[]);\n rerank_score \n--------------\n {}\n(1 row)\n\n"
  },
  {
    "path": "docker-compose/ext-src/rag_jina_reranker_v1_tiny_en-src/sql/reranking_functions.sql",
    "content": "-- Reranking function tests\nSELECT ROUND(rag_jina_reranker_v1_tiny_en.rerank_distance('the cat sat on the mat', 'the baboon played with the balloon')::NUMERIC,4);\n\nSELECT ARRAY(SELECT ROUND(x::NUMERIC,4) FROM unnest(rag_jina_reranker_v1_tiny_en.rerank_distance('the cat sat on the mat',\n    ARRAY['the baboon played with the balloon', 'the tanks fired at the buildings'])) AS x);\n\nSELECT ROUND(rag_jina_reranker_v1_tiny_en.rerank_score('the cat sat on the mat', 'the baboon played with the balloon')::NUMERIC,4);\n\nSELECT ARRAY(SELECT ROUND(x::NUMERIC,4) FROM unnest(rag_jina_reranker_v1_tiny_en.rerank_score('the cat sat on the mat',\n    ARRAY['the baboon played with the balloon', 'the tanks fired at the buildings'])) as x);"
  },
  {
    "path": "docker-compose/ext-src/rag_jina_reranker_v1_tiny_en-src/sql/reranking_functions_enhanced.sql",
    "content": "-- Reranking function tests - single passage\nSELECT ROUND(rag_jina_reranker_v1_tiny_en.rerank_distance('the cat sat on the mat', 'the baboon played with the balloon')::NUMERIC,4);\n\nSELECT ROUND(rag_jina_reranker_v1_tiny_en.rerank_distance('the cat sat on the mat', 'the tanks fired at the buildings')::NUMERIC,4);\n\nSELECT ROUND(rag_jina_reranker_v1_tiny_en.rerank_distance('query about cats', 'information about felines')::NUMERIC,4);\n\nSELECT ROUND(rag_jina_reranker_v1_tiny_en.rerank_distance('', 'empty query test')::NUMERIC,4);\n\n-- Reranking function tests - array of passages\nSELECT ARRAY(SELECT ROUND(x::NUMERIC,4) FROM unnest(rag_jina_reranker_v1_tiny_en.rerank_distance('the cat sat on the mat',\n    ARRAY['the baboon played with the balloon', 'the tanks fired at the buildings'])) AS x);\n\nSELECT ARRAY(SELECT ROUND(x::NUMERIC,4) FROM unnest(rag_jina_reranker_v1_tiny_en.rerank_distance('query about programming',\n    ARRAY['Python is a programming language', 'Java is also a programming language', 'SQL is used for databases'])) AS x);\n\nSELECT rag_jina_reranker_v1_tiny_en.rerank_distance('empty array test', ARRAY[]::text[]);\n\n-- Reranking score function tests - single passage\nSELECT ROUND(rag_jina_reranker_v1_tiny_en.rerank_score('the cat sat on the mat', 'the baboon played with the balloon')::NUMERIC,4);\n\nSELECT ROUND(rag_jina_reranker_v1_tiny_en.rerank_score('the cat sat on the mat', 'the tanks fired at the buildings')::NUMERIC,4);\n\nSELECT ROUND(rag_jina_reranker_v1_tiny_en.rerank_score('query about cats', 'information about felines')::NUMERIC,4);\n\nSELECT ROUND(rag_jina_reranker_v1_tiny_en.rerank_score('', 'empty query test')::NUMERIC,4);\n\n-- Reranking score function tests - array of passages\nSELECT ARRAY(SELECT ROUND(x::NUMERIC,4) FROM unnest(rag_jina_reranker_v1_tiny_en.rerank_score('the cat sat on the mat',\n    ARRAY['the baboon played with the balloon', 'the tanks fired at the buildings'])) AS x);\n\nSELECT ARRAY(SELECT ROUND(x::NUMERIC,4) FROM unnest(rag_jina_reranker_v1_tiny_en.rerank_score('query about programming',\n    ARRAY['Python is a programming language', 'Java is also a programming language', 'SQL is used for databases'])) AS x);\n\nSELECT rag_jina_reranker_v1_tiny_en.rerank_score('empty array test', ARRAY[]::text[]);\n"
  },
  {
    "path": "docker-compose/ext-src/rum-src/regular-test.sh",
    "content": "#!/bin/sh\nset -ex\ncd \"$(dirname ${0})\"\ndropdb --if-exist contrib_regression\ncreatedb contrib_regression\n. ../alter_db.sh\nPG_REGRESS=$(dirname \"$(pg_config --pgxs)\")/../test/regress/pg_regress\n${PG_REGRESS} --inputdir=./ --bindir='/usr/local/pgsql/bin' --use-existing --dbname=contrib_regression rum rum_hash ruminv timestamp orderby orderby_hash altorder altorder_hash limits int2 int4 int8 float4 float8 money oid time timetz date interval macaddr inet cidr text varchar char bytea bit varbit numeric rum_weight expr array"
  },
  {
    "path": "docker-compose/ext-src/rum-src/test-upgrade.patch",
    "content": "diff --git a/expected/rum.out b/expected/rum.out\nindex 5966d19..8860b79 100644\n--- a/expected/rum.out\n+++ b/expected/rum.out\n@@ -1,4 +1,3 @@\n-CREATE EXTENSION rum;\n CREATE TABLE test_rum( t text, a tsvector );\n CREATE TRIGGER tsvectorupdate\n BEFORE UPDATE OR INSERT ON test_rum\ndiff --git a/sql/rum.sql b/sql/rum.sql\nindex 8414bb9..898e6ab 100644\n--- a/sql/rum.sql\n+++ b/sql/rum.sql\n@@ -1,5 +1,3 @@\n-CREATE EXTENSION rum;\n-\n CREATE TABLE test_rum( t text, a tsvector );\n\n CREATE TRIGGER tsvectorupdate"
  },
  {
    "path": "docker-compose/ext-src/rum-src/test-upgrade.sh",
    "content": "#!/bin/sh\nset -ex\ncd \"$(dirname ${0})\"\npatch -p1 <test-upgrade.patch\nPG_REGRESS=$(dirname \"$(pg_config --pgxs)\")/../test/regress/pg_regress\n${PG_REGRESS} --inputdir=./ --bindir='/usr/local/pgsql/bin' --use-existing --dbname=contrib_regression rum rum_validate rum_hash ruminv timestamp orderby orderby_hash altorder altorder_hash limits int2 int4 int8 float4 float8 money oid time timetz date interval macaddr inet cidr text varchar char bytea bit varbit numeric rum_weight expr array"
  },
  {
    "path": "docker-compose/pageserver_config/identity.toml",
    "content": "id=1234\n"
  },
  {
    "path": "docker-compose/pageserver_config/pageserver.toml",
    "content": "broker_endpoint='http://storage_broker:50051'\npg_distrib_dir='/usr/local/'\nlisten_pg_addr='0.0.0.0:6400'\nlisten_http_addr='0.0.0.0:9898'\nremote_storage={ endpoint='http://minio:9000', bucket_name='neon', bucket_region='eu-north-1', prefix_in_bucket='/pageserver' }\ncontrol_plane_api='http://0.0.0.0:6666' # No storage controller in docker compose, specify a junk address\ncontrol_plane_emergency_mode=true\nvirtual_file_io_mode=\"buffered\" # the CI runners where we run the docker compose tests have slow disks\n"
  },
  {
    "path": "docker-compose/run-tests.sh",
    "content": "#!/usr/bin/env bash\nset -x\n\nif [[ -v BENCHMARK_CONNSTR ]]; then\n  uri_no_proto=\"${BENCHMARK_CONNSTR#postgres://}\"\n  uri_no_proto=\"${uri_no_proto#postgresql://}\"\n  if [[ $uri_no_proto == *\\?* ]]; then\n    base=\"${uri_no_proto%%\\?*}\"       # before '?'\n  else\n    base=\"$uri_no_proto\"\n  fi\n  if [[ $base =~ ^([^:]+):([^@]+)@([^:/]+):?([0-9]*)/(.+)$ ]]; then\n    export PGUSER=\"${BASH_REMATCH[1]}\"\n    export PGPASSWORD=\"${BASH_REMATCH[2]}\"\n    export PGHOST=\"${BASH_REMATCH[3]}\"\n    export PGPORT=\"${BASH_REMATCH[4]:-5432}\"\n    export PGDATABASE=\"${BASH_REMATCH[5]}\"\n    echo export PGUSER=\"${BASH_REMATCH[1]}\"\n    echo export PGPASSWORD=\"${BASH_REMATCH[2]}\"\n    echo export PGHOST=\"${BASH_REMATCH[3]}\"\n    echo export PGPORT=\"${BASH_REMATCH[4]:-5432}\"\n    echo export PGDATABASE=\"${BASH_REMATCH[5]}\"\n  else\n    echo \"Invalid PostgreSQL base URI\"\n    exit 1\n  fi\nfi\nREGULAR_USER=false\nPARALLEL_COMPUTES=${PARALLEL_COMPUTES:-1}\nwhile getopts pr arg; do\n  case ${arg} in\n  r)\n    REGULAR_USER=true\n    shift $((OPTIND-1))\n    ;;\n  *) :\n    ;;\n  esac\ndone\n\nextdir=${1}\n\ncd \"${extdir}\" || exit 2\nFAILED=\nexport FAILED_FILE=/tmp/failed\nrm -f ${FAILED_FILE}\nmapfile -t LIST < <( (echo -e \"${SKIP//\",\"/\"\\n\"}\"; ls) | sort | uniq -u)\nif [[ ${PARALLEL_COMPUTES} -gt 1 ]]; then\n  # Avoid errors if RUN_FIRST is not defined\n  RUN_FIRST=${RUN_FIRST:-}\n  # Move entries listed in the RUN_FIRST variable to the beginning\n  ORDERED_LIST=$(printf \"%s\\n\" \"${LIST[@]}\" | grep -x -Ff <(echo -e \"${RUN_FIRST//,/$'\\n'}\"); printf \"%s\\n\" \"${LIST[@]}\" | grep -vx -Ff <(echo -e \"${RUN_FIRST//,/$'\\n'}\"))\n  parallel -j\"${PARALLEL_COMPUTES}\" \"[[ -d {} ]] || exit 0\n                export PGHOST=compute{%}\n                if ! psql -c 'select 1'>/dev/null; then\n                  exit 1\n                fi\n                echo Running on \\${PGHOST}\n                if [[ -f ${extdir}/{}/neon-test.sh ]]; then\n                  echo Running from script\n                  ${extdir}/{}/neon-test.sh || echo {} >> ${FAILED_FILE};\n                else\n                  echo Running using make;\n                  USE_PGXS=1 make -C {} installcheck || echo {} >> ${FAILED_FILE};\n                fi\" ::: ${ORDERED_LIST}\n  [[ ! -f ${FAILED_FILE} ]] && exit 0\nelse\n  for d in \"${LIST[@]}\"; do\n      [ -d \"${d}\" ] || continue\n      if ! psql -w -c \"select 1\" >/dev/null; then\n        FAILED=\"${d} ${FAILED}\"\n        break\n      fi\n      if [[ ${REGULAR_USER} = true ]] && [ -f \"${d}\"/regular-test.sh ]; then\n        \"${d}/regular-test.sh\" || FAILED=\"${d} ${FAILED}\"\n        continue\n      fi\n\n      if [ -f \"${d}/neon-test.sh\" ]; then\n        \"${d}/neon-test.sh\" || FAILED=\"${d} ${FAILED}\"\n      else\n        USE_PGXS=1 make -C \"${d}\" installcheck || FAILED=\"${d} ${FAILED}\"\n      fi\n  done\n  [[ -z ${FAILED} ]]  && exit 0\nfi\nfor d in ${FAILED} $([[ ! -f ${FAILED_FILE} ]] || cat ${FAILED_FILE}); do\n  cat \"$(find $d -name regression.diffs)\"\ndone\nfor postgis_diff in /tmp/pgis_reg/*_diff; do\n  echo \"${postgis_diff}:\"\n  cat \"${postgis_diff}\"\ndone\necho \"${FAILED}\"\ncat ${FAILED_FILE}\nexit 1\n"
  },
  {
    "path": "docker-compose/test_extensions_upgrade.sh",
    "content": "#!/usr/bin/env bash\nset -eux -o pipefail\ncd \"$(dirname \"${0}\")\"\n# Takes a variable name as argument. The result is stored in that variable.\ngenerate_id() {\n    local -n resvar=$1\n    printf -v resvar '%08x%08x%08x%08x' $SRANDOM $SRANDOM $SRANDOM $SRANDOM\n}\necho \"${OLD_COMPUTE_TAG}\"\necho \"${NEW_COMPUTE_TAG}\"\necho \"${TEST_EXTENSIONS_TAG}\"\nif [ -z \"${OLD_COMPUTE_TAG:-}\" ] || [ -z \"${NEW_COMPUTE_TAG:-}\" ] || [ -z \"${TEST_EXTENSIONS_TAG:-}\" ]; then\n  echo OLD_COMPUTE_TAG, NEW_COMPUTE_TAG and TEST_EXTENSIONS_TAG must be set\n  exit 1\nfi\nexport PG_VERSION=${PG_VERSION:-16}\nexport PG_TEST_VERSION=${PG_VERSION}\n# Waits for compute node is ready\nfunction wait_for_ready {\n  TIME=0\n  while ! docker compose logs compute_is_ready | grep -q \"accepting connections\" && [ ${TIME} -le 300 ] ; do\n    ((TIME += 1 ))\n    sleep 1\n  done\n  if [ ${TIME} -gt 300 ]; then\n    echo Time is out.\n    exit 2\n  fi\n}\n# Creates extensions. Gets a string with space-separated extensions as a parameter\nfunction create_extensions() {\n  for ext in ${1}; do\n    docker compose exec neon-test-extensions psql -X -v ON_ERROR_STOP=1 -d contrib_regression -c \"CREATE EXTENSION IF NOT EXISTS ${ext} CASCADE\"\n  done\n}\n# Creates a new timeline. Gets the parent ID and an extension name as parameters.\n# Saves the timeline ID in the variable EXT_TIMELINE\nfunction create_timeline() {\n  generate_id new_timeline_id\n\n  PARAMS=(\n      -sbf\n      -X POST\n      -H \"Content-Type: application/json\"\n      -d \"{\\\"new_timeline_id\\\": \\\"${new_timeline_id}\\\", \\\"pg_version\\\": ${PG_VERSION}, \\\"ancestor_timeline_id\\\": \\\"${1}\\\"}\"\n      \"http://127.0.0.1:9898/v1/tenant/${tenant_id}/timeline/\"\n  )\n  result=$(curl \"${PARAMS[@]}\")\n  echo $result | jq .\n  EXT_TIMELINE[${2}]=${new_timeline_id}\n}\n# Checks if the timeline ID of the compute node is expected. Gets the timeline ID as a parameter\nfunction check_timeline() {\n    TID=$(docker compose exec neon-test-extensions psql -Aqt -c \"SHOW neon.timeline_id\")\n    if [ \"${TID}\" != \"${1}\" ]; then\n      echo Timeline mismatch\n      exit 1\n    fi\n}\n# Restarts the compute node with the required compute tag and timeline.\n# Accepts the tag for the compute node and the timeline as parameters.\nfunction restart_compute() {\n  docker compose down compute1 compute_is_ready\n  COMPUTE_TAG=${1} TENANT_ID=${tenant_id} TIMELINE_ID=${2} docker compose up --quiet-pull -d --build compute1 compute_is_ready\n  wait_for_ready\n  check_timeline ${2}\n}\ndeclare -A EXT_TIMELINE\nEXTENSIONS='[\n{\"extname\": \"plv8\", \"extdir\": \"plv8-src\"},\n{\"extname\": \"vector\", \"extdir\": \"pgvector-src\"},\n{\"extname\": \"unit\", \"extdir\": \"postgresql-unit-src\"},\n{\"extname\": \"hypopg\", \"extdir\": \"hypopg-src\"},\n{\"extname\": \"rum\", \"extdir\": \"rum-src\"},\n{\"extname\": \"ip4r\", \"extdir\": \"ip4r-src\"},\n{\"extname\": \"prefix\", \"extdir\": \"prefix-src\"},\n{\"extname\": \"hll\", \"extdir\": \"hll-src\"},\n{\"extname\": \"pg_cron\", \"extdir\": \"pg_cron-src\"},\n{\"extname\": \"pg_uuidv7\", \"extdir\": \"pg_uuidv7-src\"},\n{\"extname\": \"roaringbitmap\", \"extdir\": \"pg_roaringbitmap-src\"},\n{\"extname\": \"semver\", \"extdir\": \"pg_semver-src\"},\n{\"extname\": \"pg_ivm\", \"extdir\": \"pg_ivm-src\"},\n{\"extname\": \"pgjwt\", \"extdir\": \"pgjwt-src\"},\n{\"extname\": \"pgtap\", \"extdir\": \"pgtap-src\"},\n{\"extname\": \"pg_repack\", \"extdir\": \"pg_repack-src\"},\n{\"extname\": \"h3\", \"extdir\": \"h3-pg-src\"}\n]'\nEXTNAMES=$(echo ${EXTENSIONS} | jq -r '.[].extname' | paste -sd ' ' -)\nCOMPUTE_TAG=${NEW_COMPUTE_TAG} docker compose --profile test-extensions up --quiet-pull --build -d\nwait_for_ready\ndocker compose exec neon-test-extensions psql -c \"DROP DATABASE IF EXISTS contrib_regression\"\ndocker compose exec neon-test-extensions psql -c \"CREATE DATABASE contrib_regression\"\ncreate_extensions \"${EXTNAMES}\"\nquery=\"select json_object_agg(extname,extversion) from pg_extension where extname in ('${EXTNAMES// /\\',\\'}')\"\nnew_vers=$(docker compose exec neon-test-extensions psql -Aqt -d contrib_regression -c \"$query\")\ndocker compose --profile test-extensions down\nCOMPUTE_TAG=${OLD_COMPUTE_TAG} docker compose --profile test-extensions up --quiet-pull --build -d --force-recreate\nwait_for_ready\ndocker compose exec neon-test-extensions psql -c \"DROP DATABASE IF EXISTS contrib_regression\"\ndocker compose exec neon-test-extensions psql -c \"CREATE DATABASE contrib_regression\"\ntenant_id=$(docker compose exec neon-test-extensions psql -Aqt -c \"SHOW neon.tenant_id\")\nEXT_TIMELINE[\"main\"]=$(docker compose exec neon-test-extensions psql -Aqt -c \"SHOW neon.timeline_id\")\ncreate_timeline \"${EXT_TIMELINE[\"main\"]}\" init\nrestart_compute \"${OLD_COMPUTE_TAG}\" \"${EXT_TIMELINE[\"init\"]}\"\ncreate_extensions \"${EXTNAMES}\"\nif [ \"${FORCE_ALL_UPGRADE_TESTS:-false}\" = true ]; then\n  exts=\"${EXTNAMES}\"\nelse\n  query=\"select pge.extname from pg_extension pge join (select key as extname, value as extversion from json_each_text('${new_vers}')) x on pge.extname=x.extname and pge.extversion <> x.extversion\"\n  exts=$(docker compose exec neon-test-extensions psql -Aqt -d contrib_regression -c \"$query\")\nfi\nif [ -z \"${exts}\" ]; then\n  echo \"No extensions were upgraded\"\nelse\n  for ext in ${exts}; do\n    echo Testing ${ext}...\n    create_timeline \"${EXT_TIMELINE[\"main\"]}\" ${ext}\n    EXTDIR=$(echo ${EXTENSIONS} | jq -r '.[] | select(.extname==\"'${ext}'\") | .extdir')\n    restart_compute \"${OLD_COMPUTE_TAG}\" \"${EXT_TIMELINE[${ext}]}\"\n    docker compose exec neon-test-extensions psql -d contrib_regression -c \"CREATE EXTENSION ${ext} CASCADE\"\n    restart_compute \"${NEW_COMPUTE_TAG}\" \"${EXT_TIMELINE[${ext}]}\"\n    docker compose exec neon-test-extensions psql -d contrib_regression -c \"\\dx ${ext}\"\n    if ! docker compose exec neon-test-extensions sh -c /ext-src/${EXTDIR}/test-upgrade.sh; then\n      docker  compose exec neon-test-extensions  cat /ext-src/${EXTDIR}/regression.diffs\n      exit 1\n    fi\n    docker compose exec neon-test-extensions psql -d contrib_regression -c \"alter extension ${ext} update\"\n    docker compose exec neon-test-extensions psql -d contrib_regression -c \"\\dx ${ext}\"\n  done\nfi\n"
  },
  {
    "path": "docs/.gitignore",
    "content": "book\n"
  },
  {
    "path": "docs/SUMMARY.md",
    "content": "# Summary\n\n# Looking for `neon.tech` docs?\n\nThis page linkes to a selection of technical content about the open source code in this repository.\n\nPlease visit https://neon.tech/docs for documentation about using the Neon service, which is based on the code\nin this repository.\n\n# Architecture\n\n[Introduction]()\n- [Separation of Compute and Storage](./separation-compute-storage.md)\n\n- [Compute]()\n  - [Postgres changes](./core_changes.md)\n\n- [Pageserver](./pageserver.md)\n    - [Services](./pageserver-services.md)\n    - [Thread management](./pageserver-thread-mgmt.md)\n    - [WAL Redo](./pageserver-walredo.md)\n    - [Page cache](./pageserver-pagecache.md)\n    - [Storage](./pageserver-storage.md)\n    - [Compaction](./pageserver-compaction.md)\n    - [Processing a GetPage request](./pageserver-processing-getpage.md)\n    - [Processing WAL](./pageserver-processing-wal.md)\n\n- [WAL Service](walservice.md)\n  - [Consensus protocol](safekeeper-protocol.md)\n\n- [Source view](./sourcetree.md)\n  - [docker.md](./docker.md) — Docker images and building pipeline.\n  - [Error handling and logging](./error-handling.md)\n\n- [Glossary](./glossary.md)\n\n# Uncategorized\n\n- [authentication.md](./authentication.md)\n- [multitenancy.md](./multitenancy.md) — how multitenancy is organized in the pageserver and Zenith CLI.\n- [settings.md](./settings.md)\n#FIXME: move these under sourcetree.md\n#- [postgres_ffi/README.md](/libs/postgres_ffi/README.md)\n#- [test_runner/README.md](/test_runner/README.md)\n\n\n# RFCs\n\nMajor changes are documented in RFCS:\n- See [RFCs](./rfcs/README.md) for more information\n- view the RFCs at https://github.com/neondatabase/neon/tree/main/docs/rfcs\n"
  },
  {
    "path": "docs/authentication.md",
    "content": "## Authentication\n\n### Overview\nWe use JWT tokens in communication between almost all components (compute, pageserver, safekeeper, CLI) regardless of the protocol used (HTTP/PostgreSQL).\nstorage_broker currently has no authentication.\nAuthentication is optional and is disabled by default for easier debugging.\nIt is used in some tests, though.\nNote that we do not cover authentication with `pg.neon.tech` here.\n\nFor HTTP connections we use the Bearer authentication scheme.\nFor PostgreSQL connections we expect the token to be passed as a password.\nThere is a caveat for `psql`: it silently truncates passwords to 100 symbols, so to correctly pass JWT via `psql` you have to either use `PGPASSWORD` environment variable, or store password in `psql`'s config file.\n\nCurrent token scopes are described in `utils::auth::Scope`.\nThere are no expiration or rotation schemes.\n\n_TODO_: some scopes allow both access to server management API and to the data.\nThese probably should be split into multiple scopes.\n\nTokens should not occur in logs.\nThey may sometimes occur in configuration files, although this is discouraged\nbecause configs may be parsed and dumped into logs.\n\n#### Tokens generation and validation\nJWT tokens are signed using a private key.\nCompute/pageserver/safekeeper use the private key's public counterpart to validate JWT tokens.\nThese components should not have access to the private key and may only get tokens from their configuration or external clients. \n\nThe key pair is generated once for an installation of compute/pageserver/safekeeper, e.g. by `neon_local init`.\nThere is currently no way to rotate the key without bringing down all components.\n\n### Best practices\n\nSee [RFC 8725: JSON Web Token Best Current Practices](https://www.rfc-editor.org/rfc/rfc8725)\n\n\n### Token format\n\nThe JWT tokens in Neon use \"EdDSA\" as the algorithm (defined in [RFC8037](https://www.rfc-editor.org/rfc/rfc8037)).\n\nExample:\n\nHeader:\n\n```\n{\n  \"alg\": \"EdDSA\",\n  \"typ\": \"JWT\"\n}\n```\n\nPayload:\n\n```\n{\n  \"scope\": \"tenant\",  # \"tenant\", \"pageserverapi\", or \"safekeeperdata\"\n  \"tenant_id\": \"5204921ff44f09de8094a1390a6a50f6\",\n}\n```\n\n\nMeanings of scope:\n\n\"tenant\": Provides access to all data for a specific tenant\n\n\"pageserverapi\": Provides blanket access to all tenants on the pageserver plus pageserver-wide APIs.\nShould only be used e.g. for status check/tenant creation/list.\n\n\"safekeeperdata\": Provides blanket access to all data on the safekeeper plus safekeeper-wide APIs.\nShould only be used e.g. for status check.\nCurrently also used for connection from any pageserver to any safekeeper.\n\n\"generations_api\": Provides access to the upcall APIs served by the storage controller or the control plane.\n\n\"admin\": Provides access to the control plane and admin APIs of the storage controller.\n\n### CLI\nCLI generates a key pair during call to `neon_local init` with the following commands:\n\n```bash\nopenssl genpkey -algorithm ed25519 -out auth_private_key.pem\nopenssl pkey -in auth_private_key.pem -pubout -out auth_public_key.pem\n```\n\nConfiguration files for all components point to `public_key.pem` for JWT validation.\nHowever, authentication is disabled by default.\nThere is no way to automatically enable it everywhere, you have to configure each component individually.\n\nCLI also generates signed token (full access to Pageserver) and saves it in\nthe CLI's `config` file under `pageserver.auth_token`.\nNote that pageserver's config does not have any similar parameter.\nCLI is the only component which accesses that token.\nTechnically it could generate it from the private key on each run,\nbut it does not do that for some reason (_TODO_).\n\n### Compute\n#### Overview\nCompute is a per-timeline PostgreSQL instance, so it should not have\nany access to data of other tenants.\nAll tokens used by a compute are restricted to a specific tenant.\nThere is no auth isolation from other timelines of the same tenant,\nbut a non-rogue client never accesses another timeline even by an accident:\ntimeline IDs are random and hard to guess.\n\n#### Incoming connections\nAll incoming connections are from PostgreSQL clients.\nTheir authentication is just plain PostgreSQL authentication and out of scope for this document.\n\nThere is no administrative API except those provided by PostgreSQL.\n\n#### Outgoing connections\nCompute connects to Pageserver for getting pages. The connection string is\nconfigured by the `neon.pageserver_connstring` PostgreSQL GUC,\ne.g. `postgresql://no_user@localhost:15028`. If the `$NEON_AUTH_TOKEN`\nenvironment variable is set, it is used as the password for the connection. (The\npageserver uses JWT tokens for authentication, so the password is really a\ntoken.)\n\nCompute connects to Safekeepers to write and commit data. The list of safekeeper\naddresses is given in the `neon.safekeepers` GUC. The connections to the\nsafekeepers take the password from the `$NEON_AUTH_TOKEN` environment\nvariable, if set.\n\nThe `compute_ctl` binary that runs before the PostgreSQL server, and launches\nPostgreSQL, also makes a connection to the pageserver. It uses it to fetch the\ninitial \"base backup\" dump, to initialize the PostgreSQL data directory. It also\nuses `$NEON_AUTH_TOKEN` as the password for the connection.\n\n### Pageserver\n#### Overview\nPageserver keeps track of multiple tenants, each having multiple timelines.\nFor each timeline, it connects to the corresponding Safekeeper.\nInformation about \"corresponding Safekeeper\" is published by Safekeepers\nin the storage_broker, but they do not publish access tokens, otherwise what is\nthe point of authentication.\n\nPageserver keeps a connection to some set of Safekeepers, which\nmay or may not correspond to active Computes.\nHence, we cannot obtain a per-timeline access token from a Compute.\nE.g. if the timeline's Compute terminates before all WAL is\nconsumed by the Pageserver, the Pageserver continues consuming WAL.\n\nPageserver replicas' authentication is the same as the main's.\n\n#### Incoming connections\nPageserver listens for connections from computes.\nEach compute should present a token valid for the timeline's tenant.\n\nPageserver also has HTTP API: some parts are per-tenant,\nsome parts are server-wide, these are different scopes.\n\nAuthentication can be enabled separately for the HTTP mgmt API, and\nfor the libpq connections from compute. The `http_auth_type` and\n`pg_auth_type` configuration variables in Pageserver's config may\nhave one of these values:\n\n* `Trust` removes all authentication.\n* `NeonJWT` enables JWT validation.\n   Tokens are validated using the public key which lies in a PEM file\n   specified in the `auth_validation_public_key_path` config.\n\n#### Outgoing connections\nPageserver makes a connection to a Safekeeper for each active timeline.\nAs Pageserver may want to access any timeline it has on the disk,\nit is given a blanket JWT token to access any data on any Safekeeper.\nThis token is passed through an environment variable called `NEON_AUTH_TOKEN`\n(non-configurable as of writing this text).\n\nA better way _may be_ to store JWT token for each timeline next to it,\nbut may be not.\n\n### Safekeeper\n#### Overview\nSafekeeper keeps track of multiple tenants, each having multiple timelines.\n\n#### Incoming connections\nSafekeeper accepts connections from Compute/Pageserver, each\nconnection corresponds to a specific timeline and requires\na corresponding JWT token.\n\nSafekeeper also has HTTP API: some parts are per-tenant,\nsome parts are server-wide, these are different scopes.\n\nThe `auth-validation-public-key-path` command line options controls\nthe authentication mode:\n\n* If the option is missing, there is no authentication or JWT token validation.\n* If the option is present, it should be a path to the public key PEM file used for JWT token validation.\n\n#### Outgoing connections\nNo connections are initiated by a Safekeeper.\n\n### In the source code\nTests do not use authentication by default.\nIf you need it, you can enable it by configuring the test's environment:\n\n```python\nneon_env_builder.auth_enabled = True\n```\n\nYou will have to generate tokens if you want to access components inside the test directly,\nuse `AuthKeys.generate_*_token` methods for that.\nIf you create a new scope, please create a new method to prevent mistypes in scope's name.\n"
  },
  {
    "path": "docs/book.toml",
    "content": "[book]\nlanguage = \"en\"\nmultilingual = false\nsrc = \".\"\ntitle = \"Neon architecture\"\n"
  },
  {
    "path": "docs/consumption_metrics.md",
    "content": "### Overview\nPageserver and proxy periodically collect consumption metrics and push them to a HTTP endpoint.\n\nThis doc describes current implementation details.\nFor design details see [the RFC](./rfcs/021-metering.md) and [the discussion on Github](https://github.com/neondatabase/neon/pull/2884).\n\n- The metrics are collected in a separate thread, and the collection interval and endpoint are configurable.\n\n- Metrics are cached, so that we don't send unchanged metrics on every iteration.\n\n- Metrics are sent in batches of 1000 (see CHUNK_SIZE const) metrics max with no particular grouping guarantees.\n\nbatch format is\n```json\n\n{ \"events\" : [metric1, metric2, ...] }\n\n```\nSee metric format examples below.\n\n- All metrics values are in bytes, unless otherwise specified.\n\n- Currently no retries are implemented.\n\n### Pageserver metrics\n\n#### Configuration\nThe endpoint and the collection interval are specified in the pageserver config file (or can be passed as command line arguments):\n`metric_collection_endpoint` defaults to None, which means that metric collection is disabled by default.\n`metric_collection_interval` defaults to 10min\n\n#### Metrics\n\nCurrently, the following metrics are collected:\n\n- `written_size`\n\nAmount of WAL produced , by a timeline, i.e. last_record_lsn\nThis is an absolute, per-timeline metric.\n\n- `remote_storage_size`\n\nSize of the remote storage (S3) directory.\nThis is an absolute, per-tenant metric.\n\n- `timeline_logical_size`\n\nLogical size of the data in the timeline.\nThis is an absolute, per-timeline metric.\n\n- `synthetic_storage_size`\n\nSize of all tenant's branches including WAL.\nThis is the same metric that `tenant/{tenant_id}/size` endpoint returns.\nThis is an absolute, per-tenant metric.\n\nSynthetic storage size is calculated in a separate thread, so it might be slightly outdated.\n\n#### Format example\n\n```json\n{\n\"metric\": \"remote_storage_size\",\n\"type\": \"absolute\",\n\"time\": \"2022-12-28T11:07:19.317310284Z\",\n\"idempotency_key\": \"2022-12-28 11:07:19.317310324 UTC-1-4019\",\n\"value\": 12345454,\n\"tenant_id\": \"5d07d9ce9237c4cd845ea7918c0afa7d\",\n\"timeline_id\": \"a03ebb4f5922a1c56ff7485cc8854143\",\n}\n```\n\n`idempotency_key` is a unique key for each metric, so that we can deduplicate metrics.\nIt is a combination of the time, node_id and a random number.\n\n### Proxy consumption metrics\n\n#### Configuration\nThe endpoint and the collection interval can be passed as command line arguments for proxy:\n`metric_collection_endpoint` no default, which means that metric collection is disabled by default.\n`metric_collection_interval` no default\n\n#### Metrics\n\nCurrently, only one proxy metric is collected:\n\n- `proxy_io_bytes_per_client`\nOutbound traffic per client.\nThis is an incremental, per-endpoint metric.\n\n#### Format example\n\n```json\n{\n\"metric\": \"proxy_io_bytes_per_client\",\n\"type\": \"incremental\",\n\"start_time\": \"2022-12-28T11:07:19.317310284Z\",\n\"stop_time\": \"2022-12-28T11:07:19.317310284Z\",\n\"idempotency_key\": \"2022-12-28 11:07:19.317310324 UTC-1-4019\",\n\"value\": 12345454,\n\"endpoint_id\": \"5d07d9ce9237c4cd845ea7918c0afa7d\",\n}\n```\n\nThe metric is incremental, so the value is the difference between the current and the previous value.\nIf there is no previous value, the value is the current value and the `start_time` equals `stop_time`.\n\n### TODO\n\n- [ ] Handle errors better: currently if one tenant fails to gather metrics, the whole iteration fails and metrics are not sent for any tenant.\n- [ ] Add retries\n- [ ] Tune the interval\n"
  },
  {
    "path": "docs/core_changes.md",
    "content": "# Postgres core changes\n\nThis lists all the changes that have been made to the PostgreSQL\nsource tree, as a somewhat logical set of patches. The long-term goal\nis to eliminate all these changes, by submitting patches to upstream\nand refactoring code into extensions, so that you can run unmodified\nPostgreSQL against Neon storage.\n\nIn Neon, we run PostgreSQL in the compute nodes, but we also run a special WAL redo process in the\npage server. We currently use the same binary for both, with --wal-redo runtime flag to launch it in\nthe WAL redo mode. Some PostgreSQL changes are needed in the compute node, while others are just for\nthe WAL redo process.\n\nIn addition to core PostgreSQL changes, there is a Neon extension in the pgxn/neon directory that\nhooks into the smgr interface, and rmgr extension in pgxn/neon_rmgr. The extensions are loaded into\nthe Postgres processes with shared_preload_libraries. Most of the Neon-specific code is in the\nextensions, and for any new features, that is preferred over modifying core PostgreSQL code.\n\nBelow is a list of all the PostgreSQL source code changes, categorized into changes needed for\ncompute, and changes needed for the WAL redo process:\n\n# Changes for Compute node\n\n## Prefetching\n\nThere are changes in many places to perform prefetching, for example for sequential scans. Neon\ndoesn't benefit from OS readahead, and the latency to pageservers is quite high compared to local\ndisk, so prefetching is critical for performance, also for sequential scans.\n\n### How to get rid of the patch\n\nUpcoming \"streaming read\" work in v17 might simplify this. And async I/O work in v18 will hopefully\ndo more.\n\n\n## Add t_cid to heap WAL records\n\n```\n src/backend/access/heap/heapam.c                            |   26 +-\n src/include/access/heapam_xlog.h                            |    6 +-\n```\n\nWe have added a new t_cid field to heap WAL records. This changes the WAL record format, making Neon WAL format incompatible with vanilla PostgreSQL!\n\n### Problem we're trying to solve\n\nThe problem is that the XLOG_HEAP_INSERT record does not include the command id of the inserted row. And same with deletion/update. So in the primary, a row is inserted with current xmin + cmin. But in the replica, the cmin is always set to 1. That works in PostgreSQL, because the command id is only relevant to the inserting transaction itself. After commit/abort, no one cares about it anymore. But with Neon, we rely on WAL replay to reconstruct the page, even while the original transaction is still running.\n\n### How to get rid of the patch\n\nBite the bullet and submit the patch to PostgreSQL, to add the t_cid to the WAL records. It makes the WAL records larger, which could make this unpopular in the PostgreSQL community. However, it might simplify some logical decoding code; Andres Freund briefly mentioned in PGCon 2022 discussion on Heikki's Neon presentation that logical decoding currently needs to jump through some hoops to reconstruct the same information.\n\nUpdate from Heikki (2024-04-17): I tried to write an upstream patch for that, to use the t_cid field for logical decoding, but it was not as straightforward as it first sounded.\n\n### Alternatives\nPerhaps we could write an extra WAL record with the t_cid information, when a page is evicted that contains rows that were touched a transaction that's still running. However, that seems very complicated.\n\n## Mark index builds that use buffer manager without logging explicitly\n\n```\n src/backend/access/gin/gininsert.c                          |    7 +\n src/backend/access/gist/gistbuild.c                         |   15 +-\n src/backend/access/spgist/spginsert.c                       |    8 +-\n\nalso some changes in src/backend/storage/smgr/smgr.c\n```\n\npgvector 0.6.0 also needs a similar change, which would be very nice to get rid of too.\n\nWhen a GIN index is built, for example, it is built by inserting the entries into the index more or\nless normally, but without WAL-logging anything. After the index has been built, we iterate through\nall pages and write them to the WAL. That doesn't work for Neon, because if a page is not WAL-logged\nand is evicted from the buffer cache, it is lost. We have an check to catch that in the Neon\nextension. To fix that, we've added a few functions to track explicitly when we're performing such\nan operation: `smgr_start_unlogged_build`, `smgr_finish_unlogged_build_phase_1` and\n`smgr_end_unlogged_build`.\n\n\n### How to get rid of the patch\n\nI think it would make sense to be more explicit about that in PostgreSQL too. So extract these\nchanges to a patch and post to pgsql-hackers.\n\nPerhaps we could deduce that an unlogged index build has started when we see a page being evicted\nwith zero LSN. How to be sure it's an unlogged index build rather than a bug? Currently we have a\ncheck for that and PANIC if we see page with zero LSN being evicted. And how do we detect when the\nindex build has finished? See https://github.com/neondatabase/neon/pull/7440 for an attempt at that.\n\n## Track last-written page LSN\n\n```\n src/backend/commands/dbcommands.c                           |   17 +-\n\nAlso one call to SetLastWrittenPageLSN() in spginsert.c, maybe elsewhere too\n```\n\nWhenever a page is evicted from the buffer cache, we remember its LSN, so that we can use the same\nLSN in the GetPage@LSN request when reading the page back from the page server. The value is\nconservative: it would be correct to always use the last-inserted LSN, but it would be slow because\nthen the page server would need to wait for the recent WAL to be streamed and processed, before\nresponding to any GetPage@LSN request.\n\nThe last-written page LSN is mostly tracked in the smgrwrite() function, without core code changes,\nbut there are a few exceptions where we've had to add explicit calls to the Neon-specific\nSetLastWrittenPageLSN() function.\n\nThere's an open PR to track the LSN in a more-fine grained fashion:\nhttps://github.com/neondatabase/postgres/pull/177\n\nPostgreSQL v15 introduces a new method to do CREATE DATABASE that WAL-logs the database instead of\nrelying copying files and checkpoint. With that method, we probably won't need any special handling.\nThe old method is still available, though.\n\n### How to get rid of the patch\n\nWait until v15?\n\n\n## Allow startup without reading checkpoint record\n\nIn Neon, the compute node is stateless. So when we are launching compute node, we need to provide\nsome dummy PG_DATADIR. Relation pages can be requested on demand from page server. But Postgres\nstill need some non-relational data: control and configuration files, SLRUs,...  It is currently\nimplemented using basebackup (do not mix with pg_basebackup) which is created by pageserver. It\nincludes in this tarball config/control files, SLRUs and required directories.\n\nAs pageserver does not have the original WAL segments, the basebackup tarball includes an empty WAL\nsegment to bootstrap the WAL writing, but it doesn't contain the checkpoint record.  There are some\nchanges in xlog.c, to allow starting the compute node without reading the last checkpoint record\nfrom WAL.\n\nThis includes code to read the `neon.signal` (also `zenith.signal`) file, which tells the startup \ncode the LSN to start at. When the `neon.signal` file is present, the startup uses that LSN\ninstead of the last checkpoint's LSN. The system is known to be consistent at that LSN, without \nany WAL redo.\n\n\n### How to get rid of the patch\n\n???\n\n\n### Alternatives\n\nInclude a fake checkpoint record in the tarball. Creating fake WAL is a bit risky, though; I'm\nafraid it might accidentally get streamed to the safekeepers and overwrite or corrupt the real WAL.\n\n## Disable sequence caching\n\n```\ndiff --git a/src/backend/commands/sequence.c b/src/backend/commands/sequence.c\nindex 0415df9ccb..9f9db3c8bc 100644\n--- a/src/backend/commands/sequence.c\n+++ b/src/backend/commands/sequence.c\n@@ -53,7 +53,9 @@\n  * so we pre-log a few fetches in advance. In the event of\n  * crash we can lose (skip over) as many values as we pre-logged.\n  */\n-#define SEQ_LOG_VALS   32\n+/* Neon XXX: to ensure sequence order of sequence in Zenith we need to WAL log each sequence update. */\n+/* #define SEQ_LOG_VALS        32 */\n+#define SEQ_LOG_VALS   0\n```\n\nDue to performance reasons Postgres don't want to log each fetching of a value from a sequence, so\nit pre-logs a few fetches in advance. In the event of crash we can lose (skip over) as many values\nas we pre-logged. But with Neon, because page with sequence value can be evicted from buffer cache,\nwe can get a gap in sequence values even without crash.\n\n### How to get rid of the patch\n\nMaybe we can just remove it, and accept the gaps. Or add some special handling for sequence\nrelations in the Neon extension, to WAL log the sequence page when it's about to be evicted. It\nwould be weird if the sequence moved backwards though, think of PITR.\n\nOr add a GUC for the amount to prefix to PostgreSQL, and force it to 1 in Neon.\n\n\n## Make smgr interface available to extensions\n\n```\n src/backend/storage/smgr/smgr.c                             |  203 +++---\n src/include/storage/smgr.h                                  |   72 +-\n```\n\n### How to get rid of the patch\n\nSubmit to upstream. This could be useful for the Disk Encryption patches too, or for compression.\n\nWe have submitted this to upstream, but it's moving at glacial a speed.\nhttps://commitfest.postgresql.org/47/4428/\n\n## Added relpersistence argument to smgropen()\n\n```\n src/backend/access/heap/heapam_handler.c                    |    2 +-\n src/backend/catalog/storage.c                               |   10 +-\n src/backend/commands/tablecmds.c                            |    2 +-\n src/backend/storage/smgr/md.c                               |    4 +-\n src/include/utils/rel.h                                     |    3 +-\n```\n\nNeon needs to treat unlogged relations differently from others, so the smgrread(), smgrwrite() etc.\nimplementations need to know the 'relpersistence' of the relation. To get that information where\nit's needed, we added the 'relpersistence' field to smgropen().\n\n### How to get rid of the patch\n\nMaybe 'relpersistence' would be useful in PostgreSQL for debugging purposes? Or simply for the\nbenefit of extensions like Neon. Should consider this in the patch to make smgr API usable to\nextensions.\n\n## Alternatives\n\nCurrently in Neon, unlogged tables live on local disk in the compute node, and are wiped away on\ncompute node restart. One alternative would be to instead WAL-log even unlogged tables, essentially\nignoring the UNLOGGED option. Or prohibit UNLOGGED tables completely. But would we still need the\nrelpersistence argument to handle index builds? See item on \"Mark index builds that use buffer\nmanager without logging explicitly\".\n\n## Use smgr and dbsize_hook for size calculations\n\n```\n src/backend/utils/adt/dbsize.c                              |   61 +-\n```\n\nIn PostgreSQL, the rel and db-size functions scan the data directory directly. That won't work in Neon.\n\n### How to get rid of the patch\n\nSend patch to PostgreSQL, to use smgr API functions for relation size calculation instead. Maybe as\npart of the general smgr API patch.\n\n\n\n# WAL redo process changes\n\nPageserver delegates complex WAL decoding duties to Postgres, which means that the latter might fall\nvictim to carefully designed malicious WAL records and start doing harmful things to the system.  To\nprevent this, the redo functions are executed in a separate process that is sandboxed with Linux\nSecure Computing mode (see seccomp(2) man page).\n\nAs an alternative to having a separate WAL redo process, we could rewrite all redo handlers in Rust\nThis is infeasible. However, it would take a lot of effort to rewrite them, ensure that you've done\nthe rewrite correctly, and once you've done that, it would be a lot of ongoing maintenance effort to\nkeep the rewritten code in sync over time, across new PostgreSQL versions. That's why we want to\nleverage PostgreSQL code.\n\nAnother alternative would be to harden all the PostgreSQL WAL redo functions so that it would be\nsafe to call them directly from Rust code, without needing the security sandbox. That's not feasible\nfor similar reasons as rewriting them in Rust.\n\n\n## Don't replay change in XLogReadBufferForRedo that are not for the target page we're replaying\n\n```\n src/backend/access/gin/ginxlog.c                            |   19 +-\n\nAlso some changes in xlog.c and xlogutils.c\n\nExample:\n\n@@ -415,21 +416,27 @@ ginRedoSplit(XLogReaderState *record)\n        if (!isLeaf)\n                ginRedoClearIncompleteSplit(record, 3);\n \n-       if (XLogReadBufferForRedo(record, 0, &lbuffer) != BLK_RESTORED)\n+       action = XLogReadBufferForRedo(record, 0, &lbuffer);\n+       if (action != BLK_RESTORED && action != BLK_DONE)\n                elog(ERROR, \"GIN split record did not contain a full-page image of left page\");\n```\n\n### Problem we're trying to solve\n\nIn PostgreSQL, if a WAL redo function calls XLogReadBufferForRead() for a page that has a full-page\nimage, it always succeeds. However, Neon WAL redo process is only concerned about replaying changes\nto a singe page, so replaying any changes for other pages is a waste of cycles. We have modified\nXLogReadBufferForRead() to return BLK_DONE for all other pages, to avoid the overhead. That is\nunexpected by code like the above.\n\n### How to get rid of the patch\n\nSubmit the changes to upstream, hope the community accepts them. There's no harm to PostgreSQL from\nthese changes, although it doesn't have any benefit either.\n\nTo make these changes useful to upstream PostgreSQL, we could implement a feature to look ahead the\nWAL, and detect truncated relations. Even in PostgreSQL, it is a waste of cycles to replay changes\nto pages that are later truncated away, so we could have XLogReadBufferForRedo() return BLK_DONE or\nBLK_NOTFOUND for pages that are known to be truncated away later in the WAL stream.\n\n### Alternatives\n\nMaybe we could revert this optimization, and restore pages other than the target page too.\n\n## Add predefined_sysidentifier flag to initdb\n\n```\n src/backend/bootstrap/bootstrap.c                           |   13 +-\n src/bin/initdb/initdb.c                                     |    4 +\n\nAnd some changes in xlog.c\n```\n\nThis is used to help with restoring a database when you have all the WAL, all the way back to\ninitdb, but no backup. You can reconstruct the missing backup by running initdb again, with the same\nsysidentifier.\n\n\n### How to get rid of the patch\n\nIgnore it. This is only needed for disaster recovery, so once we've eliminated all other Postgres\npatches, we can just keep it around as a patch or as separate branch in a repo.\n\n\n## pg_waldump flags to ignore errors\n\nAfter creating a new project or branch in Neon, the first timeline can begin in the middle of a WAL segment. pg_waldump chokes on that, so we added some flags to make it possible to ignore errors.\n\n### How to get rid of the patch\n\nLike previous one, ignore it.\n\n\n\n## Backpressure if pageserver doesn't ingest WAL fast enough\n\n```\n@@ -3200,6 +3202,7 @@ ProcessInterrupts(void)\n                return;\n        InterruptPending = false;\n \n+retry:\n        if (ProcDiePending)\n        {\n                ProcDiePending = false;\n@@ -3447,6 +3450,13 @@ ProcessInterrupts(void)\n \n        if (ParallelApplyMessagePending)\n                HandleParallelApplyMessages();\n+\n+       /* Call registered callback if any */\n+       if (ProcessInterruptsCallback)\n+       {\n+               if (ProcessInterruptsCallback())\n+                       goto retry;\n+       }\n }\n```\n\n\n### How to get rid of the patch\n\nSubmit a patch to upstream, for a hook in ProcessInterrupts. Could be useful for other extensions\ntoo.\n\n\n## SLRU on-demand download\n\n```\n src/backend/access/transam/slru.c | 105 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------\n 1 file changed, 92 insertions(+), 13 deletions(-)\n```\n\n### Problem we're trying to solve\n\nPreviously, SLRU files were included in the basebackup, but the total size of them can be large,\nseveral GB, and downloading them all made the startup time too long.\n\n### Alternatives\n\nFUSE hook or LD_PRELOAD trick to intercept the reads on SLRU files\n\n\n## WAL-log an all-zeros page as one large hole\n\n- In XLogRecordAssemble()\n\n### Problem we're trying to solve\n\nThis change was made in v16. Starting with v16, when PostgreSQL extends a relation, it first extends\nit with zeros, and it can extend the relation more than one block at a time. The all-zeros page is WAL-ogged, but it's very wasteful to include 8 kB of zeros in the WAL for that. This hack was made so that we WAL logged a compact record with a whole-page \"hole\". However, PostgreSQL has assertions that prevent that such WAL records from being replayed, so this breaks compatibility such that unmodified PostreSQL cannot process Neon-generated WAL.\n\n### How to get rid of the patch\n\nFind another compact representation for a full-page image of an all-zeros page. A compressed image perhaps.\n\n\n## Shut down walproposer after checkpointer\n\n```\n+                       /* Neon: Also allow walproposer background worker to be treated like a WAL sender, so that it's shut down last */\n+                       if ((bp->bkend_type == BACKEND_TYPE_NORMAL || bp->bkend_type == BACKEND_TYPE_BGWORKER) &&\n```\n\nThis changes was needed so that postmaster shuts down the walproposer process only after the shutdown checkpoint record is written. Otherwise, the shutdown record will never make it to the safekeepers.\n\n### How to get rid of the patch\n\nDo a bigger refactoring of the postmaster state machine, such that a background worker can specify\nthe shutdown ordering by itself. The postmaster state machine has grown pretty complicated, and\nwould benefit from a refactoring for the sake of readability anyway.\n\n\n## EXPLAIN changes for prefetch and LFC\n\n### How to get rid of the patch\n\nKonstantin submitted a patch to -hackers already: https://commitfest.postgresql.org/47/4643/. Get that into a committable state.\n\n\n## On-demand download of extensions\n\n### How to get rid of the patch\n\nFUSE or LD_PRELOAD trickery to intercept reads?\n\n\n## Publication superuser checks\n\nWe have hacked CreatePublication so that also neon_superuser can create them.\n\n### How to get rid of the patch\n\nCreate an upstream patch with more fine-grained privileges for publications CREATE/DROP that can be GRANTed to users.\n\n\n## WAL log replication slots\n\n### How to get rid of the patch\n\nUtilize the upcoming v17 \"slot sync worker\", or a similar neon-specific background worker process, to periodically WAL-log the slots, or to export them somewhere else.\n\n\n## WAL-log replication snapshots\n\n### How to get rid of the patch\n\nWAL-log them periodically, from a backgound worker.\n\n\n## WAL-log relmapper files\n\nSimilarly to replications snapshot files, the CID mapping files generated during VACUUM FULL of a catalog table are WAL-logged\n\n### How to get rid of the patch\n\nWAL-log them periodically, from a backgound worker.\n\n\n## XLogWaitForReplayOf()\n\n??\n\n\n\n\n# Not currently committed but proposed\n\n## Disable ring buffer buffer manager strategies\n\n### Why?\n\nPostgres tries to avoid cache flushing by bulk operations (copy, seqscan, vacuum,...).\nEven if there are free space in buffer cache, pages may be evicted.\nNegative effect of it can be somehow compensated by file system cache, but in Neon,\ncost of requesting page from page server is much higher.\n\n### Alternatives?\n\nInstead of just prohibiting ring buffer we may try to implement more flexible eviction policy,\nfor example copy evicted page from ring buffer to some other buffer if there is free space\nin buffer cache.\n\n## Disable marking page as dirty when hint bits are set.\n\n### Why?\n\nPostgres has to modify page twice: first time when some tuple is updated and second time when\nhint bits are set. Wal logging hint bits updates requires FPI which significantly increase size of WAL.\n\n### Alternatives?\n\nAdd special WAL record for setting page hints.\n\n## Prewarming\n\n### Why?\n\nShort downtime (or, in other words, fast compute node restart time) is one of the key feature of Neon.\nBut overhead of request-response round-trip for loading pages on demand can make started node warm-up quite slow.\nWe can capture state of compute node buffer cache and send bulk request for this pages at startup.\n"
  },
  {
    "path": "docs/docker.md",
    "content": "# Docker images of Neon\n\n## Images\n\nCurrently we build two main images:\n\n- [neondatabase/neon](https://hub.docker.com/repository/docker/neondatabase/neon) — image with pre-built `pageserver`, `safekeeper` and `proxy` binaries and all the required runtime dependencies. Built from [/Dockerfile](/Dockerfile).\n- [neondatabase/compute-node-v16](https://hub.docker.com/repository/docker/neondatabase/compute-node-v16) — compute node image with pre-built Postgres binaries from [neondatabase/postgres](https://github.com/neondatabase/postgres). Similar images exist for v15 and v14. Built from [/compute-node/Dockerfile](/compute/compute-node.Dockerfile).\n\n## Build pipeline\n\nWe build all images after a successful `release` tests run and push automatically to Docker Hub with two parallel CI jobs\n\n1. `neondatabase/compute-node-v17` (and -16, -v15, -v14)\n\n2. `neondatabase/neon`\n\n## Docker Compose example\n\nYou can see a [docker compose](https://docs.docker.com/compose/) example to create a neon cluster in [/docker-compose/docker-compose.yml](/docker-compose/docker-compose.yml). It creates the following containers.\n\n- pageserver x 1\n- safekeeper x 3\n- storage_broker x 1\n- compute x 1\n- MinIO x 1        # This is Amazon S3 compatible object storage\n\n### How to use\n\n1. create containers\n\nYou can specify version of neon cluster using following environment values.\n- PG_VERSION: postgres version for compute (default is 16 as of this writing)\n- TAG: the tag version of [docker image](https://registry.hub.docker.com/r/neondatabase/neon/tags), which is tagged in [CI test](/.github/workflows/build_and_test.yml). Default is 'latest'\n```\n$ cd docker-compose/\n$ docker-compose down   # remove the containers if exists\n$ PG_VERSION=16 TAG=latest docker-compose up --build -d  # You can specify the postgres and image version\nCreating network \"dockercompose_default\" with the default driver\nCreating docker-compose_storage_broker_1       ... done\n(...omit...)\n```\n\n2. connect compute node\n```\n$ psql postgresql://cloud_admin:cloud_admin@localhost:55433/postgres\npsql (16.3)\nType \"help\" for help.\n\npostgres=# CREATE TABLE t(key int primary key, value text);\nCREATE TABLE\npostgres=# insert into t values(1, 1);\nINSERT 0 1\npostgres=# select * from t;\n key | value\n-----+-------\n   1 | 1\n(1 row)\n\n```\n\n3. If you want to see the log, you can use `docker-compose logs` command.\n```\n# check the container name you want to see\n$ docker ps\nCONTAINER ID   IMAGE                                              COMMAND                  CREATED         STATUS         PORTS                                                                                      NAMES\n3582f6d76227   docker-compose_compute                             \"/shell/compute.sh\"      2 minutes ago   Up 2 minutes   0.0.0.0:3080->3080/tcp, :::3080->3080/tcp, 0.0.0.0:55433->55433/tcp, :::55433->55433/tcp   docker-compose_compute_1\n(...omit...)\n\n$ docker logs -f docker-compose_compute_1\n2022-10-21 06:15:48.757 GMT [56] LOG:  connection authorized: user=cloud_admin database=postgres application_name=psql\n2022-10-21 06:17:00.307 GMT [56] LOG:  [NEON_SMGR] libpagestore: connected to 'host=pageserver port=6400'\n(...omit...)\n```\n\n4. If you want to see durable data in MinIO which is s3 compatible storage\n\nAccess http://localhost:9001 and sign in.\n\n- Username: `minio`\n- Password: `password`\n\nYou can see durable pages and WAL data in `neon` bucket.\n"
  },
  {
    "path": "docs/error-handling.md",
    "content": "# Error handling and logging\n\n## Logging errors\n\nThe principle is that errors are logged when they are handled. If you\njust propagate an error to the caller in a function, you don't need to\nlog it; the caller will. But if you consume an error in a function,\nyou *must* log it (if it needs to be logged at all).\n\nFor example:\n\n```rust\nfn read_motd_file() -> std::io::Result<String> {\n    let mut f = File::open(\"/etc/motd\")?;\n    let mut result = String::new();\n    f.read_to_string(&mut result)?;\n    result\n}\n```\n\nOpening or reading the file could fail, but there is no need to log\nthe error here. The function merely propagates the error to the\ncaller, and it is up to the caller to log the error or propagate it\nfurther, if the failure is not expected. But if, for example, it is\nnormal that the \"/etc/motd\" file doesn't exist, the caller can choose\nto silently ignore the error, or log it as an INFO or DEBUG level\nmessage:\n\n```rust\nfn get_message_of_the_day() -> String {\n    // Get the motd from /etc/motd, or return the default proverb\n    match read_motd_file() {\n        Ok(motd) => motd,\n        Err(err)  => {\n            // It's normal that /etc/motd doesn't exist, but if we fail to\n            // read it for some other reason, that's unexpected. The message\n            // of the day isn't very important though, so we just WARN and\n            // continue with the default in any case.\n            if err.kind() != std::io::ErrorKind::NotFound {\n                 tracing::warn!(\"could not read \\\"/etc/motd\\\": {err:?}\");\n            }\n            \"An old error is always more popular than a new truth. - German proverb\"\n        }\n    }\n}\n```\n\n## Error types\n\nWe use the `anyhow` crate widely. It contains many convenient macros\nlike `bail!` and `ensure!` to construct and return errors, and to\npropagate many kinds of low-level errors, wrapped in `anyhow::Error`.\n\nA downside of `anyhow::Error` is that the caller cannot distinguish\nbetween different error cases. Most errors are propagated all the way\nto the mgmt API handler function, or the main loop that handles a\nconnection with the compute node, and they are all handled the same\nway: the error is logged and returned to the client as an HTTP or\nlibpq error.\n\nBut in some cases, we need to distinguish between errors and handle\nthem differently. For example, attaching a tenant to the pageserver\ncould fail either because the tenant has already been attached, or\nbecause we could not load its metadata from cloud storage. The first\ncase is more or less expected. The console sends the Attach request to\nthe pageserver, and the pageserver completes the operation, but the\nnetwork connection might be lost before the console receives the\nresponse. The console will retry the operation in that case, but the\ntenant has already been attached. It is important that the pagserver\nresponds with the HTTP 403 Already Exists error in that case, rather\nthan a generic HTTP 500 Internal Server Error.\n\nIf you need to distinguish between different kinds of errors, create a\nnew `Error` type. The `thiserror` crate is useful for that. But in\nmost cases `anyhow::Error` is good enough.\n\n## Panics\n\nDepending on where a panic happens, it can cause the whole pageserver\nor safekeeper to restart, or just a single tenant. In either case,\nthat is pretty bad and causes an outage. Avoid panics. Never use\n`unwrap()` or other calls that might panic, to verify inputs from the\nnetwork or from disk.\n\nIt is acceptable to use functions that might panic, like `unwrap()`, if\nit is obvious that it cannot panic. For example, if you have just\nchecked that a variable is not None, it is OK to call `unwrap()` on it,\nbut it is still preferable to use `expect(\"reason\")` instead to explain\nwhy the function cannot fail.\n\n`assert!` and `panic!` are reserved for checking clear invariants and\nvery obvious \"can't happen\" cases. When in doubt, use anyhow `ensure!`\nor `bail!` instead.\n\n## Error levels\n\n`tracing::Level` doesn't provide very clear guidelines on what the\ndifferent levels mean, or when to use which level. Here is how we use\nthem:\n\n### Error\n\nExamples:\n- could not open file \"foobar\"\n- invalid tenant id\n\nErrors are not expected to happen during normal operation. Incorrect\ninputs from client can cause ERRORs. For example, if a client tries to\ncall a mgmt API that doesn't exist, or if a compute node sends passes\nan LSN that has already been garbage collected away.\n\nThese should *not* happen during normal operations. \"Normal\noperations\" is not a very precise concept. But for example, disk\nerrors are not expected to happen when the system is working, so those\ncount as Errors. However, if a TCP connection to a compute node is\nlost, that is not considered an Error, because it doesn't affect the\npageserver's or safekeeper's operation in any way, and happens fairly\nfrequently when compute nodes are shut down, or are killed abruptly\nbecause of errors in the compute.\n\n**Errors are monitored, and always need human investigation to determine\nthe cause.**\n\nWhether something should be logged at ERROR, WARNING or INFO level can\ndepend on the callers and clients. For example, it might be unexpected\nand a sign of a serious issue if the console calls the\n\"timeline_detail\" mgmt API for a timeline that doesn't exist. ERROR\nwould be appropriate in that case. But if the console routinely calls\nthe API after deleting a timeline, to check if the deletion has\ncompleted, then it would be totally normal and an INFO or DEBUG level\nmessage would be more appropriate. If a message is logged as an ERROR,\nbut it in fact happens frequently in production and never requires any\naction, it should probably be demoted to an INFO level message.\n\n### Warn\n\nExamples:\n- could not remove temporary file \"foobar.temp\"\n- unrecognized file \"foobar\" in timeline directory\n\nWarnings are similar to Errors, in that they should not happen\nwhen the system is operating normally. The difference between Error and\nWarning is that an Error means that the operation failed, whereas Warning\nmeans that something unexpected happened, but the operation continued anyway.\nFor example, if deleting a file fails because the file already didn't exist,\nit should be logged as Warning.\n\n> **Note:** The python regression tests, under `test_regress`, check the\n> pageserver log after each test for any ERROR and WARN lines. If there are\n> any ERRORs or WARNs that have not been explicitly listed in the test as\n> allowed, the test is marked a failed. This is to catch unexpected errors\n> e.g. in background operations, that don't cause immediate misbehaviour in\n> the tested functionality.\n\n### Info\n\nInfo level is used to log useful information when the system is\noperating normally. Info level is appropriate e.g. for logging state\nchanges, background operations, and network connections.\n\nExamples:\n- \"system is shutting down\"\n- \"tenant was created\"\n- \"retrying S3 upload\"\n\n### Debug & Trace\n\nDebug and Trace level messages are not printed to the log in our normal\nproduction configuration, but could be enabled for a specific server or\ntenant, to aid debugging. (Although we don't actually have that\ncapability as of this writing).\n\n## Context\n\nWe use logging \"spans\" to hold context information about the current\noperation. Almost every operation happens on a particular tenant and\ntimeline, so we enter a span with the \"tenant_id\" and \"timeline_id\"\nvery early when processing an incoming API request, for example. All\nbackground operations should also run in a span containing at least\nthose two fields, and any other parameters or information that might\nbe useful when debugging an error that might happen when performing\nthe operation.\n\nTODO: Spans are not captured in the Error when it is created, but when\nthe error is logged. It would be more useful to capture them at Error\ncreation. We should consider using `tracing_error::SpanTrace` to do\nthat.\n\n## Error message style\n\n### PostgreSQL extensions\n\nPostgreSQL has a style guide for writing error messages:\n\nhttps://www.postgresql.org/docs/current/error-style-guide.html\n\nFollow that guide when writing error messages in the PostgreSQL\nextensions.\n\n### Neon Rust code\n\n#### Anyhow Context\n\nWhen adding anyhow `context()`, use form `present-tense-verb+action`.\n\nExample:\n- Bad: `file.metadata().context(\"could not get file metadata\")?;`\n- Good: `file.metadata().context(\"get file metadata\")?;`\n\n#### Logging Errors\n\nWhen logging any error `e`, use `could not {e:#}` or `failed to {e:#}`.\n\nIf `e` is an `anyhow` error and you want to log the backtrace that it contains,\nuse `{e:?}` instead of `{e:#}`.\n\n#### Rationale\n\nThe `{:#}` (\"alternate Display\") of an `anyhow` error chain is concatenation fo the contexts, using `: `.\n\nFor example, the following Rust code will result in output\n```\nERROR  failed to list users: load users from server: parse response: invalid json\n```\n\nThis is more concise / less noisy than what happens if you do `.context(\"could not ...\")?` at each level, i.e.:\n\n```\nERROR  could not list users: could not load users from server: could not parse response: invalid json\n```\n\n\n```rust\nfn main() {\n  match list_users().context(\"list users\") else {\n    Ok(_) => ...,\n    Err(e) => tracing::error!(\"failed to {e:#}\"),\n  }\n}\nfn list_users() {\n  http_get_users().context(\"load users from server\")?;\n}\nfn http_get_users() {\n  let response = client....?;\n  response.parse().context(\"parse response\")?; // fails with serde error \"invalid json\"\n}\n```\n"
  },
  {
    "path": "docs/glossary.md",
    "content": "# Glossary\n\n### Authentication\n\n### Backpressure\n\nBackpressure is used to limit the lag between pageserver and compute node or WAL service.\n\nIf compute node or WAL service run far ahead of Page Server,\nthe time of serving page requests increases. This may lead to timeout errors.\n\nTo tune backpressure limits use `max_replication_write_lag`, `max_replication_flush_lag` and `max_replication_apply_lag` settings.\nWhen lag between current LSN (pg_current_wal_flush_lsn() at compute node) and minimal write/flush/apply position of replica exceeds the limit\nbackends performing writes are blocked until the replica is caught up.\n### Base image (page image)\n\n### Basebackup\n\nA tarball with files needed to bootstrap a compute node[] and a corresponding command to create it.\nNOTE:It has nothing to do with PostgreSQL pg_basebackup.\n\n### Branch\n\nWe can create branch at certain LSN using `neon_local timeline branch` command.\nEach Branch lives in a corresponding timeline[] and has an ancestor[].\n\n\n### Checkpoint (PostgreSQL)\n\nNOTE: This is an overloaded term.\n\nA checkpoint record in the WAL marks a point in the WAL sequence at which it is guaranteed that all data files have been updated with all information from shared memory modified before that checkpoint;\n\n### Checkpoint (Layered repository)\n\nNOTE: This is an overloaded term.\n\nWhenever enough WAL has been accumulated in memory, the page server []\nwrites out the changes from the in-memory layer into a new delta layer file. This process\nis called \"checkpointing\".\n\nConfiguration parameter `checkpoint_distance` defines the distance\nfrom current LSN to perform checkpoint of in-memory layers.\nDefault is `DEFAULT_CHECKPOINT_DISTANCE`.\n\n### Compaction\n\nA background operation on layer files. Compaction takes a number of L0\nlayer files, each of which covers the whole key space and a range of\nLSN, and reshuffles the data in them into L1 files so that each file\ncovers the whole LSN range, but only part of the key space.\n\nCompaction should also opportunistically leave obsolete page versions\nfrom the L1 files, and materialize other page versions for faster\naccess. That hasn't been implemented as of this writing, though.\n\n\n### Compute node\n\nStateless Postgres node that stores data in pageserver.\n\n### Garbage collection\n\nThe process of removing old on-disk layers that are not needed by any timeline anymore.\n\n### Fork\n\nEach of the separate segmented file sets in which a relation is stored. The main fork is where the actual data resides. There also exist two secondary forks for metadata: the free space map and the visibility map.\n\n### Layer\n\nA layer contains data needed to reconstruct any page versions within the\nlayer's Segment and range of LSNs.\n\nThere are two kinds of layers, in-memory and on-disk layers. In-memory\nlayers are used to ingest incoming WAL, and provide fast access\nto the recent page versions. On-disk layers are stored as files on disk, and\nare immutable. See [pageserver-storage.md](./pageserver-storage.md) for more.\n\n### Layer file (on-disk layer)\n\nLayered repository on-disk format is based on immutable files.  The\nfiles are called \"layer files\". There are two kinds of layer files:\nimage files and delta files. An image file contains a \"snapshot\" of a\nrange of keys at a particular LSN, and a delta file contains WAL\nrecords applicable to a range of keys, in a range of LSNs.\n\n### Layer map\n\nThe layer map tracks what layers exist in a timeline.\n\n### Layered repository\n\nNeon repository implementation that keeps data in layers.\n\n### LSN\n\nThe Log Sequence Number (LSN) is a unique identifier of the WAL record[] in the WAL log.\nThe insert position is a byte offset into the logs, increasing monotonically with each new record.\nInternally, an LSN is a 64-bit integer, representing a byte position in the write-ahead log stream.\nIt is printed as two hexadecimal numbers of up to 8 digits each, separated by a slash.\nCheck also [PostgreSQL doc about pg_lsn type](https://www.postgresql.org/docs/devel/datatype-pg-lsn.html)\nValues can be compared to calculate the volume of WAL data that separates them, so they are used to measure the progress of replication and recovery.\n\nIn Postgres and Neon LSNs are used to describe certain points in WAL handling.\n\nPostgreSQL LSNs and functions to monitor them:\n* `pg_current_wal_insert_lsn()` - Returns the current write-ahead log insert location.\n* `pg_current_wal_lsn()` - Returns the current write-ahead log write location.\n* `pg_current_wal_flush_lsn()` - Returns the current write-ahead log flush location.\n* `pg_last_wal_receive_lsn()` - Returns the last write-ahead log location that has been received and synced to disk by streaming replication. While streaming replication is in progress this will increase monotonically.\n* `pg_last_wal_replay_lsn ()` - Returns the last write-ahead log location that has been replayed during recovery. If recovery is still in progress this will increase monotonically.\n[source PostgreSQL documentation](https://www.postgresql.org/docs/devel/functions-admin.html):\n\nNeon safekeeper LSNs. See [safekeeper protocol section](safekeeper-protocol.md) for more information.\n* `CommitLSN`: position in WAL confirmed by quorum safekeepers.\n* `RestartLSN`: position in WAL confirmed by all safekeepers.\n* `FlushLSN`: part of WAL persisted to the disk by safekeeper.\n* `VCL`: the largest LSN for which we can guarantee availability of all prior records.\n\nNeon pageserver LSNs:\n* `last_record_lsn` - the end of last processed WAL record.\n* `disk_consistent_lsn` - data is known to be fully flushed and fsync'd to local disk on pageserver up to this LSN.\n* `remote_consistent_lsn` - The last LSN that is synced to remote storage and is guaranteed to survive pageserver crash.\nTODO: use this name consistently in remote storage code. Now `disk_consistent_lsn` is used and meaning depends on the context.\n* `ancestor_lsn` - LSN of the branch point (the LSN at which this branch was created)\n\nTODO: add table that describes mapping between PostgreSQL (compute), safekeeper and pageserver LSNs.\n\n### Logical size\n\nThe pageserver tracks the \"logical size\" of a timeline. It is the\ntotal size of all relations in all Postgres databases on the\ntimeline. It includes all user and system tables, including their FSM\nand VM forks. But it does not include SLRUs, twophase files or any\nother such data or metadata that lives outside relations.\n\nThe logical size is calculated by the pageserver, and is sent to\nPostgreSQL via feedback messages to the safekeepers. PostgreSQL uses\nthe logical size to enforce the size limit in the free tier. The\nlogical size is also shown to users in the web console.\n\nThe logical size is not affected by branches or the physical layout of\nlayer files in the pageserver. If you have a database with 1 GB\nlogical size and you create a branch of it, both branches will have 1\nGB logical size, even though the branch is copy-on-write and won't\nconsume any extra physical disk space until you make changes to it.\n\n### Page (block)\n\nThe basic structure used to store relation data. All pages are of the same size.\nThis is the unit of data exchange between compute node and pageserver.\n\n### Pageserver\n\nNeon storage engine: repositories + wal receiver + page service + wal redo.\n\n### Page service\n\nThe Page Service listens for GetPage@LSN requests from the Compute Nodes,\nand responds with pages from the repository.\n\n\n### PITR (Point-in-time-recovery)\n\nPostgreSQL's ability to restore up to a specified LSN.\n\n### Primary node\n\n\n### Proxy\n\nPostgres protocol proxy/router.\nThis service listens psql port, can check auth via external service\nand create new databases and accounts (control plane API in our case).\n\n### Relation\n\nThe generic term in PostgreSQL for all objects in a database that have a name and a list of attributes defined in a specific order.\n\n### Replication slot\n\n\n### Replica node\n\n\n### Repository\n\nRepository stores multiple timelines, forked off from the same initial call to 'initdb'\nand has associated WAL redo service.\nOne repository corresponds to one Tenant.\n\n### Retention policy\n\nHow much history do we need to keep around for PITR and read-only nodes?\n\n### Segment\n\nA physical file that stores data for a given relation. File segments are\nlimited in size by a compile-time setting (1 gigabyte by default), so if a\nrelation exceeds that size, it is split into multiple segments.\n\n### SLRU\n\nSLRUs include pg_clog, pg_multixact/members, and\npg_multixact/offsets. There are other SLRUs in PostgreSQL, but\nthey don't need to be stored permanently (e.g. pg_subtrans),\nor we do not support them in neon yet (pg_commit_ts).\n\n### Tenant (Multitenancy)\nTenant represents a single customer, interacting with Neon.\nWal redo[] activity, timelines[], layers[] are managed for each tenant independently.\nOne pageserver[] can serve multiple tenants at once.\nOne safekeeper\n\nSee `docs/multitenancy.md` for more.\n\n### Timeline\n\nTimeline accepts page changes and serves get_page_at_lsn() and\nget_rel_size() requests. The term \"timeline\" is used internally\nin the system, but to users they are exposed as \"branches\", with\nhuman-friendly names.\n\nNOTE: this has nothing to do with PostgreSQL WAL timelines.\n\n### XLOG\n\nPostgreSQL alias for WAL[].\n\n### WAL (Write-ahead log)\n\nThe journal that keeps track of the changes in the database cluster as user- and system-invoked operations take place. It comprises many individual WAL records[] written sequentially to WAL files[].\n\n### WAL acceptor, WAL proposer\n\nIn the context of the consensus algorithm, the Postgres\ncompute node is also known as the WAL proposer, and the safekeeper is also known\nas the acceptor. Those are the standard terms in the Paxos algorithm.\n\n### WAL receiver (WAL decoder)\n\nThe WAL receiver connects to the external WAL safekeeping service (or\ndirectly to the primary) using PostgreSQL physical streaming\nreplication, and continuously receives WAL. It decodes the WAL records,\nand stores them to the repository.\n\nWe keep one WAL receiver active per timeline.\n\n### WAL record\n\nA low-level description of an individual data change.\n\n### WAL redo\n\nA service that runs PostgreSQL in a special wal_redo mode\nto apply given WAL records over an old page image and return new page image.\n\n### WAL safekeeper\n\nOne node that participates in the quorum. All the safekeepers\ntogether form the WAL service.\n\n### WAL segment (WAL file)\n\nAlso known as WAL segment or WAL segment file. Each of the sequentially-numbered files that provide storage space for WAL. The files are all of the same predefined size and are written in sequential order, interspersing changes as they occur in multiple simultaneous sessions.\n\n### WAL service\n\nThe service as whole that ensures that WAL is stored durably.\n\n### Web console\n\n"
  },
  {
    "path": "docs/multitenancy.md",
    "content": "## Multitenancy\n\n### Overview\n\nNeon supports multitenancy. One pageserver can serve multiple tenants at once. Tenants can be managed via neon_local CLI. During page server setup tenant can be created using ```neon_local init --create-tenant``` Also tenants can be added into the system on the fly without pageserver restart. This can be done using the following cli command: ```neon_local tenant create``` Tenants use random identifiers which can be represented as a 32 symbols hexadecimal string. So neon_local tenant create accepts desired tenant id as an optional argument. The concept of timelines/branches is working independently per tenant.\n\n### Tenants in other commands\n\nBy default during `neon_local init` new tenant is created on the pageserver. Newly created tenant's id is saved to cli config, so other commands can use it automatically if no direct argument `--tenant_id=<tenant_id>` is provided. So generally tenant_id more frequently appears in internal pageserver interface. Its commands take tenant_id argument to distinguish to which tenant operation should be applied. CLI support creation of new tenants.\n\nExamples for cli:\n\n```sh\nneon_local tenant list\n\nneon_local tenant create // generates new id\n\nneon_local tenant create ee6016ec31116c1b7c33dfdfca38892f\n\nneon_local pg create main // default tenant from neon init\n\nneon_local pg create main --tenant_id=ee6016ec31116c1b7c33dfdfca38892f\n\nneon_local branch --tenant_id=ee6016ec31116c1b7c33dfdfca38892f\n```\n\n### Data layout\n\nOn the page server tenants introduce one level of indirection, so data directory structured the following way:\n```\n<pageserver working directory>\n├── pageserver.log\n├── pageserver.pid\n├── pageserver.toml\n└── tenants\n   ├── 537cffa58a4fa557e49e19951b5a9d6b\n   ├── de182bc61fb11a5a6b390a8aed3a804a\n   └── ee6016ec31116c1b7c33dfdfca38891f\n```\nWal redo activity and timelines are managed for each tenant independently.\n\nFor local environment used for example in tests there also new level of indirection for tenants. It touches `pgdatadirs` directory. Now it contains `tenants` subdirectory so the structure looks the following way:\n\n```\npgdatadirs\n└── tenants\n   ├── de182bc61fb11a5a6b390a8aed3a804a\n   │  └── main\n   └── ee6016ec31116c1b7c33dfdfca38892f\n      └── main\n```\n\n### Changes to postgres\n\nTenant id is passed to postgres via GUC the same way as the timeline. Tenant id is added to commands issued to pageserver, namely: pagestream, callmemaybe. Tenant id is also exists in ServerInfo structure, this is needed to pass the value to wal receiver to be able to forward it to the pageserver.\n\n### Safety\n\nFor now particular tenant can only appear on a particular pageserver. Set of safekeepers are also pinned to particular (tenant_id, timeline_id) pair so there can only be one writer for particular (tenant_id, timeline_id).\n"
  },
  {
    "path": "docs/pageserver-compaction.md",
    "content": "# Pageserver Compaction\n\nLifted from <https://www.notion.so/neondatabase/Rough-Notes-on-Compaction-1baf189e004780859e65ef63b85cfa81?pvs=4>.\n\nUpdated 2025-03-26.\n\n## Pages and WAL\n\nPostgres stores data in 8 KB pages, identified by a page number.\n\nThe WAL contains a sequence of page writes: either images (complete page contents) or deltas (patches applied to images). Each write is identified by its byte position in the WAL, aka LSN. \n\nEach page version is thus identified by page@LSN. Postgres may read pages at past LSNs.\n\nPageservers ingest WAL by writing WAL records into a key/value store keyed by page@LSN.\n\nPageservers materialize pages for Postgres reads by finding the most recent page image and applying all subsequent page deltas, up to the read LSN.\n\n## Compaction: Why?\n\nPageservers store page@LSN keys in a key/value store using a custom variant of an LSM tree. Each timeline on each tenant shard has its own LSM tree.\n\nWhen Pageservers write new page@LSN entries, they are appended unordered to an ephemeral layer file. When the ephemeral layer file exceeds `checkpoint_distance` (default 256 MB), the key/value pairs are sorted by key and written out to a layer file (for efficient lookups).\n\nAs WAL writes continue, more layer files accumulate.\n\nReads must search through the layer files to find the page’s image and deltas. The more layer files accumulate, the more la yer files reads must search through before they find a page image, aka read amplification.\n\nCompaction’s job is to:\n\n- Reduce read amplification by reorganizing and combining layer files.\n- Remove old garbage from layer files.\n\nAs part of this, it may combine several page deltas into a single page image where possible.\n\n## Compaction: How?\n\nNeon uses a non-standard variant of an LSM tree made up of two levels of layer files: L0 and L1.\n\nCompaction runs in two phases: L0→L1 compaction, and L1 image compaction.\n\nL0 contains a stack of L0 layers at decreasing LSN ranges. These have been flushed sequentially from ephemeral layers. Each L0 layer covers the entire page space (page 0 to ~infinity) and the LSN range that was ingested into it. L0 layers are therefore particularly bad for read amp, since every read must search all L0 layers below the read LSN. For example:\n\n```\n| Page 0-99 @ LSN 0400-04ff |\n| Page 0-99 @ LSN 0300-03ff |\n| Page 0-99 @ LSN 0200-02ff |\n| Page 0-99 @ LSN 0100-01ff |\n| Page 0-99 @ LSN 0000-00ff |\n```\n\nL0→L1 compaction takes the bottom-most chunk of L0 layer files of between `compaction_threshold` (default 10) and `compaction_upper_limit` (default 20) layers. It uses merge-sort to write out sorted L1 delta layers of size `compaction_target_size` (default 128 MB).\n\nL1 typically consists of a “bed” of image layers with materialized page images at a specific LSN, and then delta layers of various page/LSN ranges above them with page deltas. For example:\n\n```\nDelta layers:               |     30-84@0310-04ff      |\nDelta layers:    | 10-42@0200-02ff |           | 65-92@0174-02aa |\nImage layers: |    0-39@0100    |    40-79@0100    |    80-99@0100    |\n```\n\nL1 image compaction scans across the L1 keyspace at some LSN, materializes page images by reading the image and delta layers below the LSN (via vectored reads), and writes out new sorted image layers of roughly size `compaction_target_size` (default 128 MB) at that LSN.\n\nLayer files below the new image files’ LSN can be garbage collected when they are no longer needed for PITR.\n\nEven though the old layer files are not immediately garbage collected, the new image layers help with read amp because reads can stop traversing the layer stack as soon as they encounter a page image.\n\n## Compaction: When?\n\nPageservers run a `compaction_loop` background task for each tenant shard. Every `compaction_period` (default 20 seconds) it will wake up and check if any of the shard’s timelines need compaction. Additionally, L0 layer flushes will eagerly wake the compaction loop if the L0 count exceeds `compaction_threshold` (default 10).\n\nL0 compaction runs if the number of L0 layers exceeds `compaction_threshold` (default 10).\n\nL1 image compaction runs across sections of the L1 keyspace that have at least `image_creation_threshold` (default 3) delta layers overlapping image layers.\n\nAt most `CONCURRENT_BACKGROUND_TASKS` (default 3 / 4 * CPUs = 6) background tasks can run concurrently on a Pageserver, including compaction. Further compaction tasks must wait.\n\nBecause L0 layers cause the most read amp (they overlap the entire keyspace and only contain page deltas), they are aggressively compacted down:\n\n- L0 is compacted down across all tenant timelines before L1 compaction is attempted (`compaction_l0_first`).\n- L0 compaction uses a separate concurrency limit of `CONCURRENT_L0_COMPACTION_TASKS` (default 3 / 4 * CPUs = 6) to avoid waiting for other tasks (`compaction_l0_semaphore`).\n- If L0 compaction is needed on any tenant timeline, L1 image compaction will yield to start an immediate L0 compaction run (except for compaction run via admin APIs).\n\n## Backpressure\n\nWith sustained heavy write loads, new L0 layers may be flushed faster than they can be compacted down. This can cause an unbounded buildup of read amplification and compaction debt, which can take hours to resolve even after the writes stop.\n\nTo avoid this and allow compaction to keep up, layer flushes will slow writes down to apply backpressure on the workload:\n\n- At `l0_flush_delay_threshold` (default 30) L0 layers, layer flushes are delayed by the flush duration, such that they take 2x as long.\n- At `l0_flush_stall_threshold` (default disabled) L0 layers, layer flushes stall entirely until the L0 count falls back below the threshold. This is currently disabled because we don’t trust L0 compaction to be responsive enough.\n\nThis backpressure is propagated to the compute by waiting for layer flushes when WAL ingestion rolls the ephemeral layer. The compute will significantly slow down WAL writes at:\n\n- `max_replication_write_lag` (default 500 MB), when Pageserver WAL ingestion lags\n- `max_replication_flush_lag` (default 10 GB), when Pageserver L0 flushes lag\n\nCombined, this means that the compute will backpressure when there are 30 L0 layers (30 * 256 MB = 7.7 GB) and the Pageserver WAL ingestion lags the compute by 500 MB, for a total of ~8 GB L0+ephemeral compaction debt on a single shard.\n\nSince we only delay L0 flushes by 2x when backpressuring, and haven’t enabled stalls, it is still possible for read amp to increase unbounded if compaction is too slow (although we haven’t seen this in practice). But this is considered better than stalling flushes and causing unavailability for as long as it takes L0 compaction to react, since we don’t trust it to be fast enough — at the expense of continually increasing read latency and CPU usage for this tenant. We should either enable stalls when we have enough confidence in L0 compaction, or scale the flush delay by the number of L0 layers to apply increasing backpressure.\n\n## Circuit Breaker\n\nCompaction can fail, often repeatedly. This can happen e.g. due to data corruption, faulty hardware, S3 outages, etc.\n\nIf compaction fails, the compaction loop will naïvely try and fail again almost immediately. It may only fail after doing a significant amount of wasted work, while holding onto the background task semaphore.\n\nTo avoid repeatedly doing wasted work and starving out other compaction jobs, each tenant has a compaction circuit breaker. After 5 repeated compaction failures, the circuit breaker trips and disables compaction for the next 24 hours, before resetting the breaker and trying again. This disables compaction across all tenant timelines (faulty or not).\n\nDisabling compaction for a long time is dangerous, since it can lead to unbounded read amp and compaction debt, and continuous workload backpressure. However, continually failing would not help either. Tripped circuit breakers trigger an alert and must be investigated promptly."
  },
  {
    "path": "docs/pageserver-page-service.md",
    "content": "# Page Service\n\nThe Page Service listens for GetPage@LSN requests from the Compute Nodes,\nand responds with pages from the repository. On each GetPage@LSN request,\nit calls into the Repository function\n\nA separate thread is spawned for each incoming connection to the page\nservice. The page service uses the libpq protocol to communicate with\nthe client. The client is a Compute Postgres instance.\n"
  },
  {
    "path": "docs/pageserver-pagecache.md",
    "content": "# Page cache\n\nTODO:\n\n- shared across tenants\n- store pages from layer files\n- store pages from \"in-memory layer\"\n"
  },
  {
    "path": "docs/pageserver-processing-getpage.md",
    "content": "# Processing a GetPage request\n\nTODO:\n- sequence diagram that shows how a GetPage@LSN request is processed\n"
  },
  {
    "path": "docs/pageserver-processing-wal.md",
    "content": "# Processing WAL\n\nTODO:\n- diagram that shows how incoming WAL is processed\n- explain durability, what is fsync'd when, disk_consistent_lsn\n"
  },
  {
    "path": "docs/pageserver-services.md",
    "content": "# Services\n\nThe Page Server consists of multiple threads that operate on a shared\nrepository of page versions:\n```\n                                           | WAL\n                                           V\n                                   +--------------+\n                                   |              |\n                                   | WAL receiver |\n                                   |              |\n                                   +--------------+\n                                                                                 ......\n                  +---------+                              +--------+            .    .\n                  |         |                              |        |            .    .\n GetPage@LSN      |         |                              | backup |  ------->  . S3 .\n------------->    |  Page   |         repository           |        |            .    .\n                  | Service |                              +--------+            .    .\n   page           |         |                                                    ......\n<-------------    |         |\n                  +---------+     +-----------+     +--------------------+\n                                  | WAL redo  |     | Checkpointing,     |\n                  +----------+    | processes |     | Garbage collection |\n                  |          |    +-----------+     +--------------------+\n                  |   HTTP   |\n                  | mgmt API |\n                  |          |\n                  +----------+\n\nLegend:\n\n+--+\n|  |   A thread or multi-threaded service\n+--+\n\n--->   Data flow\n<---\n```\n\n## Page Service\n\nThe Page Service listens for GetPage@LSN requests from the Compute Nodes,\nand responds with pages from the repository. On each GetPage@LSN request,\nit calls into the Repository function\n\nA separate thread is spawned for each incoming connection to the page\nservice. The page service uses the libpq protocol to communicate with\nthe client. The client is a Compute Postgres instance.\n\n## WAL Receiver\n\nThe WAL receiver connects to the external WAL safekeeping service\nusing PostgreSQL physical streaming replication, and continuously\nreceives WAL. It decodes the WAL records, and stores them to the\nrepository.\n\n\n## Backup service\n\nThe backup service, responsible for storing pageserver recovery data externally.\n\nCurrently, pageserver stores its files in a filesystem directory it's pointed to.\nThat working directory could be rather ephemeral for such cases as \"a pageserver pod running in k8s with no persistent volumes attached\".\nTherefore, the server interacts with external, more reliable storage to back up and restore its state.\n\nThe code for storage support is extensible and can support arbitrary ones as long as they implement a certain Rust trait.\nThere are the following implementations present:\n* local filesystem — to use in tests mainly\n* AWS S3           - to use in production\n\nThe backup service is disabled by default and can be enabled to interact with a single remote storage.\n\nCLI examples:\n* Local FS: `${PAGESERVER_BIN} -c \"remote_storage={local_path='/some/local/path/'}\"`\n* AWS S3  : `env AWS_ACCESS_KEY_ID='SOMEKEYAAAAASADSAH*#' AWS_SECRET_ACCESS_KEY='SOMEsEcReTsd292v' ${PAGESERVER_BIN} -c \"remote_storage={bucket_name='some-sample-bucket',bucket_region='eu-north-1', prefix_in_bucket='/test_prefix/'}\"`\n\nFor Amazon AWS S3, a key id and secret access key could be located in `~/.aws/credentials` if awscli was ever configured to work with the desired bucket, on the AWS Settings page for a certain user. Also note, that the bucket names does not contain any protocols when used on AWS.\nFor local S3 installations, refer to their documentation for name format and credentials.\n\nSimilar to other pageserver settings, toml config file can be used to configure either of the storages as backup targets.\nRequired sections are:\n\n```toml\n[remote_storage]\nlocal_path = '/Users/someonetoignore/Downloads/tmp_dir/'\n```\n\nor\n\n```toml\n[remote_storage]\nbucket_name = 'some-sample-bucket'\nbucket_region = 'eu-north-1'\nprefix_in_bucket = '/test_prefix/'\n```\n\n`AWS_SECRET_ACCESS_KEY` and `AWS_ACCESS_KEY_ID` env variables can be used to specify the S3 credentials if needed.\n\nor\n\n```toml\n[remote_storage]\ncontainer_name = 'some-container-name'\nstorage_account = 'somestorageaccnt'\ncontainer_region = 'us-east'\nprefix_in_container = '/test-prefix/'\n```\n\nThe `AZURE_STORAGE_ACCESS_KEY` env variable can be used to specify the azure credentials if needed.\n\n## Repository background tasks\n\nThe Repository also has a few different background threads and tokio tasks that perform\nbackground duties like dumping accumulated WAL data from memory to disk, reorganizing\nfiles for performance (compaction), and garbage collecting old files.\n\n\nRepository\n----------\n\nThe repository stores all the page versions, or WAL records needed to\nreconstruct them. Each tenant has a separate Repository, which is\nstored in the .neon/tenants/<tenant_id> directory.\n\nRepository is an abstract trait, defined in `repository.rs`. It is\nimplemented by the LayeredRepository object in\n`layered_repository.rs`. There is only that one implementation of the\nRepository trait, but it's still a useful abstraction that keeps the\ninterface for the low-level storage functionality clean. The layered\nstorage format is described in [pageserver-storage.md](./pageserver-storage.md).\n\nEach repository consists of multiple Timelines. Timeline is a\nworkhorse that accepts page changes from the WAL, and serves\nget_page_at_lsn() and get_rel_size() requests. Note: this has nothing\nto do with PostgreSQL WAL timeline. The term \"timeline\" is mostly\ninterchangeable with \"branch\", there is a one-to-one mapping from\nbranch to timeline. A timeline has a unique ID within the tenant,\nrepresented as 16-byte hex string that never changes, whereas a\nbranch is a user-given name for a timeline.\n\nEach repository also has a WAL redo manager associated with it, see\n`walredo.rs`. The WAL redo manager is used to replay PostgreSQL WAL\nrecords, whenever we need to reconstruct a page version from WAL to\nsatisfy a GetPage@LSN request, or to avoid accumulating too much WAL\nfor a page. The WAL redo manager uses a Postgres process running in\nspecial Neon wal-redo mode to do the actual WAL redo, and\ncommunicates with the process using a pipe.\n\n\nCheckpointing / Garbage Collection\n----------------------------------\n\nPeriodically, the checkpointer thread wakes up and performs housekeeping\nduties on the repository. It has two duties:\n\n### Checkpointing\n\nFlush WAL that has accumulated in memory to disk, so that the old WAL\ncan be truncated away in the WAL safekeepers. Also, to free up memory\nfor receiving new WAL. This process is called \"checkpointing\". It's\nsimilar to checkpointing in PostgreSQL or other DBMSs, but in the page\nserver, checkpointing happens on a per-segment basis.\n\n### Garbage collection\n\nRemove old on-disk layer files that are no longer needed according to the\nPITR retention policy\n\n\n\nTODO: Sharding\n--------------------\n\nWe should be able to run multiple Page Servers that handle sharded data.\n"
  },
  {
    "path": "docs/pageserver-storage.md",
    "content": "# Pageserver storage\n\nThe main responsibility of the Page Server is to process the incoming WAL, and\nreprocess it into a format that allows reasonably quick access to any page\nversion. The page server slices the incoming WAL per relation and page, and\npackages the sliced WAL into suitably-sized \"layer files\". The layer files\ncontain all the history of the database, back to some reasonable retention\nperiod. This system replaces the base backups and the WAL archive used in a\ntraditional PostgreSQL installation. The layer files are immutable, they are not\nmodified in-place after creation. New layer files are created for new incoming\nWAL, and old layer files are removed when they are no longer needed.\n\nThe on-disk format is based on immutable files. The page server receives a\nstream of incoming WAL, parses the WAL records to determine which pages they\napply to, and accumulates the incoming changes in memory. Whenever enough WAL\nhas been accumulated in memory, it is written out to a new immutable file. That\nprocess accumulates \"L0 delta files\" on disk. When enough L0 files have been\naccumulated, they are merged and re-partitioned into L1 files, and old files\nthat are no longer needed are removed by Garbage Collection (GC).\n\nThe incoming WAL contains updates to arbitrary pages in the system. The\ndistribution depends on the workload: the updates could be totally random, or\nthere could be a long stream of updates to a single relation when data is bulk\nloaded, for example, or something in between.\n\n```\nCloud Storage                   Page Server                           Safekeeper\n                        L1               L0             Memory            WAL\n\n+----+               +----+----+\n|AAAA|               |AAAA|AAAA|      +---+-----+         |\n+----+               +----+----+      |   |     |         |AA\n|BBBB|               |BBBB|BBBB|      |BB | AA  |         |BB\n+----+----+          +----+----+      |C  | BB  |         |CC\n|CCCC|CCCC|  <----   |CCCC|CCCC| <--- |D  | CC  |  <---   |DDD     <----   ADEBAABED\n+----+----+          +----+----+      |   | DDD |         |E\n|DDDD|DDDD|          |DDDD|DDDD|      |E  |     |         |\n+----+----+          +----+----+      |   |     |\n|EEEE|               |EEEE|EEEE|      +---+-----+\n+----+               +----+----+\n```\n\nIn this illustration, WAL is received as a stream from the Safekeeper, from the\nright.  It is immediately captured by the page server and stored quickly in\nmemory. The page server memory can be thought of as a quick \"reorder buffer\",\nused to hold the incoming WAL and reorder it so that we keep the WAL records for\nthe same page and relation close to each other.\n\nFrom the page server memory, whenever enough WAL has been accumulated, it is flushed\nto disk into a new L0 layer file, and the memory is released.\n\nWhen enough L0 files have been accumulated, they are merged together and sliced\nper key-space, producing a new set of files where each file contains a more\nnarrow key range, but larger LSN range.\n\nFrom the local disk, the layers are further copied to Cloud Storage, for\nlong-term archival. After a layer has been copied to Cloud Storage, it can be\nremoved from local disk, although we currently keep everything locally for fast\naccess. If a layer is needed that isn't found locally, it is fetched from Cloud\nStorage and stored in local disk. L0 and L1 files are both uploaded to Cloud\nStorage.\n\n# Layer map\n\nThe LayerMap tracks what layers exist in a timeline.\n\nCurrently, the layer map is just a resizable array (Vec). On a GetPage@LSN or\nother read request, the layer map scans through the array to find the right layer\nthat contains the data for the requested page. The read-code in LayeredTimeline\nis aware of the ancestor, and returns data from the ancestor timeline if it's\nnot found on the current timeline.\n\n# Different kinds of layers\n\nA layer can be in different states:\n\n- Open - a layer where new WAL records can be appended to.\n- Closed - a layer that is read-only, no new WAL records can be appended to it\n- Historic: synonym for closed\n- InMemory: A layer that needs to be rebuilt from WAL on pageserver start.\nTo avoid OOM errors, InMemory layers can be spilled to disk into ephemeral file.\n- OnDisk: A layer that is stored on disk. If its end-LSN is older than\n  disk_consistent_lsn, it is known to be fully flushed and fsync'd to local disk.\n- Frozen layer: an in-memory layer that is Closed.\n\nTODO: Clarify the difference between Closed, Historic and Frozen.\n\nThere are two kinds of OnDisk layers:\n- ImageLayer represents a snapshot of all the keys in a particular range, at one\n  particular LSN. Any keys that are not present in the ImageLayer are known not\n  to exist at that LSN.\n- DeltaLayer represents a collection of WAL records or page images in a range of\n  LSNs, for a range of keys.\n\n# Layer life cycle\n\nLSN range defined by start_lsn and end_lsn:\n- start_lsn is inclusive.\n- end_lsn is exclusive.\n\nFor an open in-memory layer, the end_lsn is MAX_LSN. For a frozen in-memory\nlayer or a delta layer, it is a valid end bound. An image layer represents\nsnapshot at one LSN, so end_lsn is always the snapshot LSN + 1\n\nEvery layer starts its life as an Open In-Memory layer. When the page server\nreceives the first WAL record for a timeline, it creates a new In-Memory layer\nfor it, and puts it to the layer map. Later, when the layer becomes full, its\ncontents are written to disk, as an on-disk layers.\n\nFlushing a layer is a two-step process: First, the layer is marked as closed, so\nthat it no longer accepts new WAL records, and a new in-memory layer is created\nto hold any WAL after that point. After this first step, the layer is a Closed\nInMemory state. This first step is called \"freezing\" the layer.\n\nIn the second step, a new Delta layers is created, containing all the data from\nthe Frozen InMemory layer. When it has been created and flushed to disk, the\noriginal frozen layer is replaced with the new layers in the layer map, and the\noriginal frozen layer is dropped, releasing the memory.\n\n# Layer files (On-disk layers)\n\nThe files are called \"layer files\". Each layer file covers a range of keys, and\na range of LSNs (or a single LSN, in case of image layers). You can think of it\nas a rectangle in the two-dimensional key-LSN space. The layer files for each\ntimeline are stored in the timeline's subdirectory under\n`.neon/tenants/<tenant_id>/timelines`.\n\nThere are two kind of layer files: images, and delta layers. An image file\ncontains a snapshot of all keys at a particular LSN, whereas a delta file\ncontains modifications to a segment - mostly in the form of WAL records - in a\nrange of LSN.\n\nimage file:\n\n```\n    000000067F000032BE0000400000000070B6-000000067F000032BE0000400000000080B6__00000000346BC568\n              start key                          end key                           LSN\n```\n\n\nThe first parts define the key range that the layer covers. See\npgdatadir_mapping.rs for how the key space is used. The last part is the LSN.\n\ndelta file:\n\nDelta files are named similarly, but they cover a range of LSNs:\n\n```\n    000000067F000032BE0000400000000020B6-000000067F000032BE0000400000000030B6__000000578C6B29-0000000057A50051\n              start key                          end key                          start LSN     end LSN\n```\n\nA delta file contains all the key-values in the key-range that were updated in\nthe LSN range. If a key has not been modified, there is no trace of it in the\ndelta layer.\n\n\nA delta layer file can cover a part of the overall key space, as in the previous\nexample, or the whole key range like this:\n\n```\n    000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__000000578C6B29-0000000057A50051\n```\n\nA file that covers the whole key range is called a L0 file (Level 0), while a\nfile that covers only part of the key range is called a L1 file. The \"level\" of\na file is not explicitly stored anywhere, you can only distinguish them by\nlooking at the key range that a file covers. The read-path doesn't need to\ntreat L0 and L1 files any differently.\n\n\n## Notation used in this document\n\nFIXME: This is somewhat obsolete, the layer files cover a key-range rather than\na particular relation nowadays. However, the description on how you find a page\nversion, and how branching and GC works is still valid.\n\nThe full path of a delta file looks like this:\n\n```\n    .neon/tenants/941ddc8604413b88b3d208bddf90396c/timelines/4af489b06af8eed9e27a841775616962/rel_1663_13990_2609_0_10_000000000169C348_0000000001702000\n```\n\nFor simplicity, the examples below use a simplified notation for the\npaths.  The tenant ID is left out, the timeline ID is replaced with\nthe human-readable branch name, and spcnode+dbnode+relnode+forkum+segno\nwith a human-readable table name. The LSNs are also shorter. For\nexample, a base image file at LSN 100 and a delta file between 100-200\nfor 'orders' table on 'main' branch is represented like this:\n\n```\n    main/orders_100\n    main/orders_100_200\n```\n\n\n# Creating layer files\n\nLet's start with a simple example with a system that contains one\nbranch called 'main' and two tables, 'orders' and 'customers'. The end\nof WAL is currently at LSN 250. In this starting situation, you would\nhave these files on disk:\n\n```\n\tmain/orders_100\n\tmain/orders_100_200\n\tmain/orders_200\n\tmain/customers_100\n\tmain/customers_100_200\n\tmain/customers_200\n```\n\nIn addition to those files, the recent changes between LSN 200 and the\nend of WAL at 250 are kept in memory. If the page server crashes, the\nlatest records between 200-250 need to be re-read from the WAL.\n\nWhenever enough WAL has been accumulated in memory, the page server\nwrites out the changes in memory into new layer files. This process\nis called \"checkpointing\" (not to be confused with the PostgreSQL\ncheckpoints, that's a different thing). The page server only creates\nlayer files for relations that have been modified since the last\ncheckpoint. For example, if the current end of WAL is at LSN 450, and\nthe last checkpoint happened at LSN 400 but there hasn't been any\nrecent changes to 'customers' table, you would have these files on\ndisk:\n\n\tmain/orders_100\n\tmain/orders_100_200\n\tmain/orders_200\n\tmain/orders_200_300\n\tmain/orders_300\n\tmain/orders_300_400\n\tmain/orders_400\n\tmain/customers_100\n\tmain/customers_100_200\n\tmain/customers_200\n\nIf the customers table is modified later, a new file is created for it\nat the next checkpoint. The new file will cover the \"gap\" from the\nlast layer file, so the LSN ranges are always contiguous:\n\n```\n\tmain/orders_100\n\tmain/orders_100_200\n\tmain/orders_200\n\tmain/orders_200_300\n\tmain/orders_300\n\tmain/orders_300_400\n\tmain/orders_400\n\tmain/customers_100\n\tmain/customers_100_200\n\tmain/customers_200\n\tmain/customers_200_500\n\tmain/customers_500\n```\n\n## Reading page versions\n\nWhenever a GetPage@LSN request comes in from the compute node, the\npage server needs to reconstruct the requested page, as it was at the\nrequested LSN. To do that, the page server first checks the recent\nin-memory layer; if the requested page version is found there, it can\nbe returned immediately without looking at the files on\ndisk. Otherwise the page server needs to locate the layer file that\ncontains the requested page version.\n\nFor example, if a request comes in for table 'orders' at LSN 250, the\npage server would load the 'main/orders_200_300' file into memory, and\nreconstruct and return the requested page from it, as it was at\nLSN 250. Because the layer file consists of a full image of the\nrelation at the start LSN and the WAL, reconstructing the page\ninvolves replaying any WAL records applicable to the page between LSNs\n200-250, starting from the base image at LSN 200.\n\n# Multiple branches\n\nImagine that a child branch is created at LSN 250:\n\n```\n            @250\n    ----main--+-------------------------->\n               \\\n                +---child-------------->\n```\n\n\nThen, the 'orders' table is updated differently on the 'main' and\n'child' branches. You now have this situation on disk:\n\n```\n    main/orders_100\n    main/orders_100_200\n    main/orders_200\n    main/orders_200_300\n    main/orders_300\n    main/orders_300_400\n    main/orders_400\n    main/customers_100\n    main/customers_100_200\n    main/customers_200\n    child/orders_250_300\n    child/orders_300\n    child/orders_300_400\n    child/orders_400\n```\n\nBecause the 'customers' table hasn't been modified on the child\nbranch, there is no file for it there. If you request a page for it on\nthe 'child' branch, the page server will not find any layer file\nfor it in the 'child' directory, so it will recurse to look into the\nparent 'main' branch instead.\n\nFrom the 'child' branch's point of view, the history for each relation\nis linear, and the request's LSN identifies unambiguously which file\nyou need to look at. For example, the history for the 'orders' table\non the 'main' branch consists of these files:\n\n```\n    main/orders_100\n    main/orders_100_200\n    main/orders_200\n    main/orders_200_300\n    main/orders_300\n    main/orders_300_400\n    main/orders_400\n```\n\nAnd from the 'child' branch's point of view, it consists of these\nfiles:\n\n```\n    main/orders_100\n    main/orders_100_200\n    main/orders_200\n    main/orders_200_300\n    child/orders_250_300\n    child/orders_300\n    child/orders_300_400\n    child/orders_400\n```\n\nThe branch metadata includes the point where the child branch was\ncreated, LSN 250. If a page request comes with LSN 275, we read the\npage version from the 'child/orders_250_300' file. We might also\nneed to reconstruct the page version as it was at LSN 250, in order\nto replay the WAL up to LSN 275, using 'main/orders_200_300' and\n'main/orders_200'. The page versions between 250-300 in the\n'main/orders_200_300' file are ignored when operating on the child\nbranch.\n\nNote: It doesn't make any difference if the child branch is created\nwhen the end of the main branch was at LSN 250, or later when the tip of\nthe main branch had already moved on. The latter case, creating a\nbranch at a historic LSN, is how we support PITR in Neon.\n\n\n# Garbage collection\n\nIn this scheme, we keep creating new layer files over time. We also\nneed a mechanism to remove old files that are no longer needed,\nbecause disk space isn't infinite.\n\nWhat files are still needed? Currently, the page server supports PITR\nand branching from any branch at any LSN that is \"recent enough\" from\nthe tip of the branch.  \"Recent enough\" is defined as an LSN horizon,\nwhich by default is 64 MB.  (See DEFAULT_GC_HORIZON). For this\nexample, let's assume that the LSN horizon is 150 units.\n\nLet's look at the single branch scenario again. Imagine that the end\nof the branch is LSN 525, so that the GC horizon is currently at\n525-150 = 375\n\n```\n\tmain/orders_100\n\tmain/orders_100_200\n\tmain/orders_200\n\tmain/orders_200_300\n\tmain/orders_300\n\tmain/orders_300_400\n\tmain/orders_400\n\tmain/orders_400_500\n\tmain/orders_500\n\tmain/customers_100\n\tmain/customers_100_200\n\tmain/customers_200\n```\n\nWe can remove the following files because the end LSNs of those files are\nolder than GC horizon 375, and there are more recent layer files for the\ntable:\n\n```\n\tmain/orders_100       DELETE\n\tmain/orders_100_200   DELETE\n\tmain/orders_200       DELETE\n\tmain/orders_200_300   DELETE\n\tmain/orders_300       STILL NEEDED BY orders_300_400\n\tmain/orders_300_400   KEEP, NEWER THAN GC HORIZON\n\tmain/orders_400       ..\n\tmain/orders_400_500   ..\n\tmain/orders_500       ..\n\tmain/customers_100      DELETE\n\tmain/customers_100_200  DELETE\n\tmain/customers_200      KEEP, NO NEWER VERSION\n```\n\n'main/customers_200' is old enough, but it cannot be\nremoved because there is no newer layer file for the table.\n\nThings get slightly more complicated with multiple branches. All of\nthe above still holds, but in addition to recent files we must also\nretain older snapshot files that are still needed by child branches.\nFor example, if child branch is created at LSN 150, and the 'customers'\ntable is updated on the branch, you would have these files:\n\n```\n\tmain/orders_100        KEEP, NEEDED BY child BRANCH\n\tmain/orders_100_200    KEEP, NEEDED BY child BRANCH\n\tmain/orders_200        DELETE\n\tmain/orders_200_300    DELETE\n\tmain/orders_300        KEEP, NEWER THAN GC HORIZON\n\tmain/orders_300_400    KEEP, NEWER THAN GC HORIZON\n\tmain/orders_400        KEEP, NEWER THAN GC HORIZON\n\tmain/orders_400_500    KEEP, NEWER THAN GC HORIZON\n\tmain/orders_500        KEEP, NEWER THAN GC HORIZON\n\tmain/customers_100       DELETE\n\tmain/customers_100_200   DELETE\n\tmain/customers_200       KEEP, NO NEWER VERSION\n\tchild/customers_150_300  DELETE\n\tchild/customers_300      KEEP, NO NEWER VERSION\n```\n\nIn this situation, 'main/orders_100' and 'main/orders_100_200' cannot\nbe removed, even though they are older than the GC horizon, because\nthey are still needed by the child branch. 'main/orders_200'\nand 'main/orders_200_300' can still be removed.\n\nIf 'orders' is modified later on the 'child' branch, we will create a\nnew base image and delta file for it on the child:\n\n```\n\tmain/orders_100\n\tmain/orders_100_200\n\n\tmain/orders_300\n\tmain/orders_300_400\n\tmain/orders_400\n\tmain/orders_400_500\n\tmain/orders_500\n\tmain/customers_200\n\tchild/customers_300\n\tchild/orders_150_400\n\tchild/orders_400\n```\n\nAfter this, the 'main/orders_100' and 'main/orders_100_200' file could\nbe removed. It is no longer needed by the child branch, because there\nis a newer layer file there. TODO: This optimization hasn't been\nimplemented! The GC algorithm will currently keep the file on the\n'main' branch anyway, for as long as the child branch exists.\n\nTODO:\nDescribe GC and checkpoint interval settings.\n\n# TODO: On LSN ranges\n\nIn principle, each relation can be checkpointed separately, i.e. the\nLSN ranges of the files don't need to line up. So this would be legal:\n\n```\n\tmain/orders_100\n\tmain/orders_100_200\n\tmain/orders_200\n\tmain/orders_200_300\n\tmain/orders_300\n\tmain/orders_300_400\n\tmain/orders_400\n\tmain/customers_150\n\tmain/customers_150_250\n\tmain/customers_250\n\tmain/customers_250_500\n\tmain/customers_500\n```\n\nHowever, the code currently always checkpoints all relations together.\nSo that situation doesn't arise in practice.\n\nIt would also be OK to have overlapping LSN ranges for the same relation:\n\n\tmain/orders_100\n\tmain/orders_100_200\n\tmain/orders_200\n\tmain/orders_200_300\n\tmain/orders_300\n\tmain/orders_250_350\n\tmain/orders_350\n\tmain/orders_300_400\n\tmain/orders_400\n\nThe code that reads the layer files should cope with this, but this\nsituation doesn't arise either, because the checkpointing code never\ndoes that.  It could be useful, however, as a transient state when\ngarbage collecting around branch points, or explicit recovery\npoints. For example, if we start with this:\n\n```\n\tmain/orders_100\n\tmain/orders_100_200\n\tmain/orders_200\n\tmain/orders_200_300\n\tmain/orders_300\n```\n\nAnd there is a branch or explicit recovery point at LSN 150, we could\nreplace 'main/orders_100_200' with 'main/orders_150' to keep a\nlayer only at that exact point that's still needed, removing the\nother page versions around it. But such compaction has not been\nimplemented yet.\n"
  },
  {
    "path": "docs/pageserver-tenant-migration.md",
    "content": "## Pageserver tenant migration\n\n### Overview\n\nThis feature allows to migrate a timeline from one pageserver to another by utilizing remote storage capability.\n\n### Migration process\n\nPageserver implements two new http handlers: timeline attach and timeline detach.\nTimeline migration is performed in a following way:\n1. Timeline attach is called on a target pageserver. This asks pageserver to download latest checkpoint uploaded to s3.\n2. For now it is necessary to manually initialize replication stream via callmemaybe call so target pageserver initializes replication from safekeeper (it is desired to avoid this and initialize replication directly in attach handler, but this requires some refactoring (probably [#997](https://github.com/neondatabase/neon/issues/997)/[#1049](https://github.com/neondatabase/neon/issues/1049))\n3. Replication state can be tracked via timeline detail pageserver call.\n4. Compute node should be restarted with new pageserver connection string. Issue with multiple compute nodes for one timeline is handled on the safekeeper consensus level. So this is not a problem here.Currently responsibility for rescheduling the compute with updated config lies on external coordinator (console).\n5. Timeline is detached from old pageserver. On disk data is removed.\n\n\n### Implementation details\n\nNow safekeeper needs to track which pageserver it is replicating to. This introduces complications into replication code:\n* We need to distinguish different pageservers (now this is done by connection string which is imperfect and is covered here: https://github.com/neondatabase/neon/issues/1105). Callmemaybe subscription management also needs to track that (this is already implemented).\n* We need to track which pageserver is the primary. This is needed to avoid reconnections to non primary pageservers. Because we shouldn't reconnect to them when they decide to stop their walreceiver. I e this can appear when there is a load on the compute and we are trying to detach timeline from old pageserver. In this case callmemaybe will try to reconnect to it because replication termination condition is not met (page server with active compute could never catch up to the latest lsn, so there is always some wal tail)\n"
  },
  {
    "path": "docs/pageserver-thread-mgmt.md",
    "content": "## Thread management\n\nThe pageserver uses Tokio for handling concurrency. Everything runs in\nTokio tasks, although some parts are written in blocking style and use\nspawn_blocking().\n\nWe currently use std blocking functions for disk I/O, however.  The\ncurrent model is that we consider disk I/Os to be short enough that we\nperform them while running in a Tokio task. Changing all the disk I/O\ncalls to async is a TODO.\n\nEach Tokio task is tracked by the `task_mgr` module. It maintains a\nregistry of tasks, and which tenant or timeline they are operating\non.\n\n### Handling shutdown\n\nWhen a tenant or timeline is deleted, we need to shut down all tasks\noperating on it, before deleting the data on disk. There's a function,\n`shutdown_tasks`, to request all tasks of a particular tenant or\ntimeline to shutdown. It will also wait for them to finish.\n\nA task registered in the task registry can check if it has been\nrequested to shut down, by calling `is_shutdown_requested()`. There's\nalso a `shutdown_watcher()` Future that can be used with `tokio::select!`\nor similar, to wake up on shutdown.\n\n\n### Async cancellation safety\n\nIn async Rust, futures can be \"cancelled\" at any await point, by\ndropping the Future. For example, `tokio::select!` returns as soon as\none of the Futures returns, and drops the others. `tokio::time::timeout`\nis another example. In the Rust ecosystem, some functions are\ncancellation-safe, meaning they can be safely dropped without\nside-effects, while others are not. See documentation of\n`tokio::select!` for examples.\n\nIn the pageserver and safekeeper, async code is *not*\ncancellation-safe by default. Unless otherwise marked, any async\nfunction that you call cannot be assumed to be async\ncancellation-safe, and must be polled to completion.\n\nThe downside of non-cancellation safe code is that you have to be very\ncareful when using `tokio::select!`, `tokio::time::timeout`, and other\nsuch functions that can cause a Future to be dropped. They can only be\nused with functions that are explicitly documented to be cancellation-safe,\nor you need to spawn a separate task to shield from the cancellation.\n\nAt the entry points to the code, we also take care to poll futures to\ncompletion, or shield the rest of the code from surprise cancellations\nby spawning a separate task. The code that handles incoming HTTP\nrequests, for example, spawns a separate task for each request,\nbecause Hyper will drop the request-handling Future if the HTTP\nconnection is lost.\n\n\n#### How to cancel, then?\n\nIf our code is not cancellation-safe, how do you cancel long-running\ntasks? Use CancellationTokens.\n\nTODO: More details on that. And we have an ongoing discussion on what\nto do if cancellations might come from multiple sources.\n\n#### Exceptions\nSome library functions are cancellation-safe, and are explicitly marked\nas such. For example, `utils::seqwait`.\n\n#### Rationale\n\nThe alternative would be to make all async code cancellation-safe,\nunless otherwise marked. That way, you could use `tokio::select!` more\nliberally. The reasons we didn't choose that are explained in this\nsection.\n\nWriting code in a cancellation-safe manner is tedious, as you need to\nscrutinize every `.await` and ensure that if the `.await` call never\nreturns, the system is in a safe, consistent state. In some ways, you\nneed to do that with `?` and early `returns`, too, but `.await`s are\neasier to miss. It is also easier to perform cleanup tasks when a\nfunction returns an `Err` than when an `.await` simply never\nreturns. You can use `scopeguard` and Drop guards to perform cleanup\ntasks, but it is more tedious. An `.await` that never returns is more\nsimilar to a panic.\n\nNote that even if you only use building blocks that themselves are\ncancellation-safe, it doesn't mean that the code as whole is\ncancellation-safe. For example, consider the following code:\n\n```\nwhile let Some(i) = work_inbox.recv().await {\n\tif let Err(_) = results_outbox.send(i).await {\n\t\tprintln!(\"receiver dropped\");\n\t\treturn;\n\t\t}\n\t}\n}\n```\n\nIt reads messages from one channel, sends them to another channel. If\nthis code is cancelled at the `results_outbox.send(i).await`, the\nmessage read from the receiver is lost. That may or may not be OK,\ndepending on the context.\n\nAnother reason to not require cancellation-safety is historical: we\nalready had a lot of async code that was not scrutinized for\ncancellation-safety when this issue was raised. Scrutinizing all\nexisting code is no fun.\n"
  },
  {
    "path": "docs/pageserver-walredo.md",
    "content": "# WAL Redo\n\nTo reconstruct a particular page version from an image of the page and\nsome WAL records, the pageserver needs to replay the WAL records. This\nhappens on-demand, when a GetPage@LSN request comes in, or as part of\nbackground jobs that reorganize data for faster access.\n\nIt's important that data cannot leak from one tenant to another, and\nthat a corrupt WAL record on one timeline doesn't affect other tenants\nor timelines.\n\n## Multi-tenant security\n\nIf you have direct access to the WAL directory, or if you have\nsuperuser access to a running PostgreSQL server, it's easy to\nconstruct a malicious or corrupt WAL record that causes the WAL redo\nfunctions to crash, or to execute arbitrary code. That is not a\nsecurity problem for PostgreSQL; if you have superuser access, you\nhave full access to the system anyway.\n\nThe Neon pageserver, however, is multi-tenant. It needs to execute WAL\nbelonging to different tenants in the same system, and malicious WAL\nin one tenant must not affect other tenants.\n\nA separate WAL redo process is launched for each tenant, and the\nprocess uses the seccomp(2) system call to restrict its access to the\nbare minimum needed to replay WAL records. The process does not have\naccess to the filesystem or network. It can only communicate with the\nparent pageserver process through a pipe.\n\nIf an attacker creates a malicious WAL record and injects it into the\nWAL stream of a timeline, he can take control of the WAL redo process\nin the pageserver. However, the WAL redo process cannot access the\nrest of the system. And because there is a separate WAL redo process\nfor each tenant, the hijacked WAL redo process can only see WAL and\ndata belonging to the same tenant, which the attacker would have\naccess to anyway.\n\n## WAL-redo process communication\n\nThe WAL redo process runs the 'postgres' executable, launched with a\nNeon-specific command-line option to put it into WAL-redo process\nmode.  The pageserver controls the lifetime of the WAL redo processes,\nlaunching them as needed. If a tenant is detached from the pageserver,\nany WAL redo processes for that tenant are killed.\n\nThe pageserver communicates with each WAL redo process over its\nstdin/stdout/stderr. It works in request-response model with a simple\ncustom protocol, described in walredo.rs. To replay a set of WAL\nrecords for a page, the pageserver sends the \"before\" image of the\npage and the WAL records over 'stdin', followed by a command to\nperform the replay. The WAL redo process responds with an \"after\"\nimage of the page.\n\n## Special handling of some records\n\nSome WAL record types are handled directly in the pageserver, by\nbespoken Rust code, and are not sent over to the WAL redo process.\nThis includes SLRU-related WAL records, like commit records. SLRUs\ndon't use the standard Postgres buffer manager, so dealing with them\nin the Neon WAL redo mode would require quite a few changes to\nPostgres code and special handling in the protocol anyway.\n\nSome record types that include a full-page-image (e.g. XLOG_FPI) are\nalso handled specially when incoming WAL is processed already, and are\nstored as page images rather than WAL records.\n\n\n## Records that modify multiple pages\n\nSome Postgres WAL records modify multiple pages. Such WAL records are\nduplicated, so that a copy is stored for each affected page. This is\nsomewhat wasteful, but because most WAL records only affect one page,\nthe overhead is acceptable.\n\nThe WAL redo always happens for one particular page. If the WAL record\ncontains changes to other pages, they are ignored.\n"
  },
  {
    "path": "docs/pageserver.md",
    "content": "# Page server architecture\n\nThe Page Server has a few different duties:\n\n- Respond to GetPage@LSN requests from the Compute Nodes\n- Receive WAL from WAL safekeeper, and store it\n- Upload data to S3 to make it durable, download files from S3 as needed\n\nS3 is the main fault-tolerant storage of all data, as there are no Page Server\nreplicas. We use a separate fault-tolerant WAL service to reduce latency. It\nkeeps track of WAL records which are not synced to S3 yet.\n"
  },
  {
    "path": "docs/rfcs/001-cluster-size-limits.md",
    "content": "Cluster size limits\n==================\n\n## Summary\n\nOne of the resource consumption limits for free-tier users is a cluster size limit.\n\nTo enforce it, we need to calculate the timeline size and check if the limit is reached before relation create/extend operations.\nIf the limit is reached, the query must fail with some meaningful error/warning.\nWe may want to exempt some operations from the quota to allow users free space to fit back into the limit.\n\nThe stateless compute node that performs validation is separate from the storage that calculates the usage, so we need to exchange cluster size information between those components.\n\n## Motivation\n\nLimit the maximum size of a PostgreSQL instance to limit free tier users (and other tiers in the future).\nFirst of all, this is needed to control our free tier production costs.\nAnother reason to limit resources is risk management — we haven't (fully) tested and optimized neon for big clusters,\nso we don't want to give users access to the functionality that we don't think is ready.\n\n## Components\n\n* pageserver - calculate the size consumed by a timeline and add it to the feedback message.\n* safekeeper - pass feedback message from pageserver to compute.\n* compute - receive feedback message, enforce size limit based on GUC `neon.max_cluster_size`.\n* console - set and update `neon.max_cluster_size` setting\n\n## Proposed implementation\n\nFirst of all, it's necessary to define timeline size.\n\nThe current approach is to count all data, including SLRUs. (not including WAL)\nHere we think of it as a physical disk underneath the Postgres cluster.\nThis is how the `LOGICAL_TIMELINE_SIZE` metric is implemented in the pageserver.\n\nAlternatively, we could count only relation data. As in pg_database_size().\nThis approach is somewhat more user-friendly because it is the data that is really affected by the user.\nOn the other hand, it puts us in a weaker position than other services, i.e., RDS.\nWe will need to refactor the timeline_size counter or add another counter to implement it.\n\nTimeline size is updated during wal digestion. It is not versioned and is valid at the last_received_lsn moment.\nThen this size should be reported to compute node.\n\n`current_timeline_size` value is included in the walreceiver's custom feedback message: `ReplicationFeedback.`\n\n(PR about protocol changes https://github.com/neondatabase/neon/pull/1037).\n\nThis message is received by the safekeeper and propagated to compute node as a part of `AppendResponse`.\n\nFinally, when compute node receives the `current_timeline_size` from safekeeper (or from pageserver directly), it updates the global variable.\n\nAnd then every neon_extend() operation checks if limit is reached `(current_timeline_size > neon.max_cluster_size)` and throws `ERRCODE_DISK_FULL` error if so.\n(see Postgres error codes [https://www.postgresql.org/docs/devel/errcodes-appendix.html](https://www.postgresql.org/docs/devel/errcodes-appendix.html))\n\nTODO:\nWe can allow autovacuum processes to bypass this check, simply checking `IsAutoVacuumWorkerProcess()`.\nIt would be nice to allow manual VACUUM and VACUUM FULL to bypass the check, but it's uneasy to distinguish these operations at the low level.\nSee issues https://github.com/neondatabase/neon/issues/1245\nhttps://github.com/neondatabase/neon/issues/1445\n\nTODO:\nWe should warn users if the limit is soon to be reached.\n\n### **Reliability, failure modes and corner cases**\n\n1. `current_timeline_size` is valid at the last received and digested by pageserver lsn.\n\n    If pageserver lags behind compute node, `current_timeline_size` will lag too. This lag can be tuned using backpressure, but it is not expected to be 0 all the time.\n\n    So transactions that happen in this lsn range may cause limit overflow. Especially operations that generate (i.e., CREATE DATABASE) or free (i.e., TRUNCATE) a lot of data pages while generating a small amount of WAL. Are there other operations like this?\n\n    Currently, CREATE DATABASE operations are restricted in the console. So this is not an issue.\n\n\n### **Security implications**\n\nWe treat compute as an untrusted component. That's why we try to isolate it with secure container runtime or a VM.\nMalicious users may change the `neon.max_cluster_size`, so we need an extra size limit check.\nTo cover this case, we also monitor the compute node size in the console.\n"
  },
  {
    "path": "docs/rfcs/002-storage.md",
    "content": "# Neon storage node — alternative\n\n## **Design considerations**\n\nSimplify storage operations for people => Gain adoption/installs on laptops and small private installation => Attract customers to DBaaS by seamless integration between our tooling and cloud.\n\nProposed architecture addresses:\n\n- High availability -- tolerates n/2 - 1 failures\n- Multi-tenancy -- one storage for all databases\n- Elasticity -- increase storage size on the go by adding nodes\n- Snapshots / backups / PITR with S3 offload\n- Compression\n\nMinuses are:\n\n- Quite a lot of work\n- Single page access may touch few disk pages\n- Some bloat in data — may slowdown sequential scans\n\n## **Summary**\n\nStorage cluster is sharded key-value store with ordered keys. Key (****page_key****) is a tuple of `(pg_id, db_id, timeline_id, rel_id, forkno, segno, pageno, lsn)`. Value is either page or page diff/wal. Each chunk (chunk == shard) stores approx 50-100GB ~~and automatically splits in half when grows bigger then soft 100GB limit~~. by having a fixed range of pageno's it is responsible for. Chunks placement on storage nodes is stored in a separate metadata service, so chunk can be freely moved around the cluster if it is need. Chunk itself is a filesystem directory with following sub directories:\n\n```\n\n|-chunk_42/\n  |-store/ -- contains lsm with pages/pagediffs ranging from\n  |\t      page_key_lo to page_key_hi\n  |-wal/\n  |  |- db_1234/ db-specific wal files with pages from page_key_lo\n  |\t\t to page_key_hi\n  |\n  |-chunk.meta -- small file with snapshot references\n\t\t  (page_key_prefix+lsn+name)\n\t\t  and PITR regions (page_key_start, page_key_end)\n```\n\n## **Chunk**\n\nChunk is responsible for storing pages potentially from different databases and relations. Each page is addressed by a lexicographically ordered tuple (****page_key****) with following fields:\n\n- `pg_id` -- unique id of given postgres instance (or postgres cluster as it is called in postgres docs)\n- `db_id` -- database that was created by 'CREATE DATABASE' in a given postgres instance\n- `db_timeline` -- used to create Copy-on-Write instances from snapshots, described later\n- `rel_id` -- tuple of (relation_id, 0) for tables and (indexed_relation_id, rel_id) for indices. Done this way so table indices were closer to table itself on our global key space.\n- `(forkno, segno, pageno)` -- page coordinates in postgres data files\n- `lsn_timeline` -- postgres feature, increments when PITR was done.\n- `lsn` -- lsn of current page version.\n\nChunk stores pages and page diffs ranging from page_key_lo to page_key_hi. Processing node looks at page in wal record and sends record to a chunk responsible for this page range. When wal record arrives to a chunk it is initially stored in `chunk_id/wal/db_id/wal_segno.wal`. Then background process moves records from that wal files to the lsm tree in `chunk_id/store`. Or, more precisely, wal records would be materialized into lsm memtable and when that memtable is flushed to SSTable on disk we may trim the wal. That way some not durably (in the distributed sense) committed pages may enter the tree -- here we rely on processing node behavior: page request from processing node should contain proper lsm horizons so that storage node may respond with proper page version.\n\nLSM here is a usual LSM for variable-length values: at first data is stored in memory (we hold incoming wal records to be able to regenerate it after restart) at some balanced tree. When this tree grows big enough we dump it into disk file (SSTable) sorting records by key. Then SStables are mergesorted in the background to a different files. All file operation are sequential and do not require WAL for durability.\n\nContent of SSTable can be following:\n\n```jsx\n(pg_id, db_id, ... , pageno=42, lsn=100) (full 8k page data)\n(pg_id, db_id, ... , pageno=42, lsn=150) (per-page diff)\n(pg_id, db_id, ... , pageno=42, lsn=180) (per-page diff)\n(pg_id, db_id, ... , pageno=42, lsn=200) (per-page diff)\n(pg_id, db_id, ... , pageno=42, lsn=220) (full 8k page data)\n(pg_id, db_id, ... , pageno=42, lsn=250) (per-page diff)\n(pg_id, db_id, ... , pageno=42, lsn=270) (per-page diff)\n(pg_id, db_id, ... , pageno=5000, lsn=100) (full 8k page data)\n```\n\nSo query for `pageno=42 up to lsn=260` would need to find closest entry less then this key, iterate back to the latest full page and iterate forward to apply diffs. How often page is materialized in lsn-version sequence is up to us -- let's say each 5th version should be a full page.\n\n### **Page deletion**\n\nTo delete old pages we insert blind deletion marker `(pg_id, db_id, #trim_lsn < 150)` into a lsm tree. During merges such marker would indicate that all pages with smaller lsn should be discarded. Delete marker will travel down the tree levels hierarchy until it reaches last level. In non-PITR scenario where old page version are not needed at all such deletion marker would (in average) prevent old page versions propagation down the tree -- so all bloat would concentrate at higher tree layers without affecting bigger bottom layers.\n\n### **Recovery**\n\nUpon storage node restart recent WAL files are applied to appropriate pages and resulting pages stored in lsm memtable. So this should be fast since we are not writing anything to disk.\n\n### **Checkpointing**\n\nNo such mechanism is needed. Or we may look at the storage node as at kind of continuous checkpointer.\n\n### **Full page writes (torn page protection)**\n\nStorage node never updates individual pages, only merges SSTable, so torn pages is not an issue.\n\n### **Snapshot**\n\nThat is the part that I like about this design -- snapshot creation is instant and cheap operation that can have flexible granularity level: whole instance, database, table. Snapshot creation inserts a record in `chunk.meta` file with lsn of this snapshot and key prefix `(pg_id, db_id, db_timeline, rel_id, *)` that prohibits pages deletion within this range. Storage node may not know anything about page internals, but by changing number of fields in our prefix we may change snapshot granularity.\n\nIt is again useful to remap `rel_id` to `(indexed_relation_id, rel_id)` so that snapshot of relation would include it's indices. Also table snapshot would trickily interact with catalog. Probably all table snapshots should hold also a catalog snapshot. And when node is started with such snapshot it should check that only tables from snapshot are queried. I assume here that for snapshot reading one need to start a new postgres instance.\n\nStorage consumed by snapshot is proportional to the amount of data changed. We may have some heuristic (calculated based on cost of different storages) about when to offload old snapshot to s3. For example, if current database has more then 40% of changed pages with respect to previous snapshot then we may offload that snapshot to s3, and release this space.\n\n**Starting db from snapshot**\n\nWhen we are starting database from snapshot it can be done in two ways. First, we may create new db_id, move all the data from snapshot to a new db and start a database. Second option is to create Copy-on-Write (CoW) instance out of snapshot and read old pages from old snapshot and store new pages separately. That is why there is `db_timeline` key field near `db_id` -- CoW (🐮) database should create new `db_timeline` and remember old `db_timeline`. Such a database can have hashmap of pages that it is changed to query pages from proper snapshot on the first try. `db_timeline` is located near `db_id` so that new page versions generated by new instance would not bloat data of initial snapshot. It is not clear for whether it is possibly to effectively support \"stacked\" CoW snapshot, so we may disallow them. (Well, one way to support them is to move `db_timeline` close to `lsn` -- so we may scan neighboring pages and find right one. But again that way we bloat snapshot with unrelated data and may slowdown full scans that are happening in different database).\n\n**Snapshot export/import**\n\nOnce we may start CoW instances it is easy to run auxiliary postgres instance on this snapshot and run `COPY FROM (...) TO stdout` or `pg_dump` and export data from the snapshot to some portable formats. Also we may start postgres on a new empty database and run `COPY FROM stdin`. This way we can initialize new non-CoW databases and transfer snapshots via network.\n\n### **PITR area**\n\nIn described scheme PITR is just a prohibition to delete any versions within some key prefix, either it is a database or a table key prefix. So PITR may have different settings for different tables, databases, etc.\n\nPITR is quite bloaty, so we may aggressively offload it to s3 -- we may push same (or bigger) SSTables to s3 and maintain lsm structure there.\n\n### **Compression**\n\nSince we are storing page diffs of variable sizes there is no structural dependency on a page size and we may compress it. Again that could be enabled only on pages with some key prefixes, so we may have this with db/table granularity.\n\n### **Chunk metadata**\n\nChunk metadata is a file lies in chunk directory that stores info about current snapshots and PITR regions. Chunk should always consult this data when merging SSTables and applying delete markers.\n\n### **Chunk splitting**\n\n*(NB: following paragraph is about how to avoid page splitting)*\n\nWhen chunks hits some soft storage limit (let's say 100Gb) it should be split in half and global metadata about chunk boundaries should be updated. Here i assume that chunk split is a local operation happening on single node. Process of chink splitting should look like following:\n\n1. Find separation key and spawn two new chunks with [lo, mid) [mid, hi) boundaries.\n\n2. Prohibit WAL deletion and old SSTables deletion on original chunk.\n\n3. On each lsm layer we would need to split only one SSTable, all other would fit within left or right range. Symlink/split that files to new chunks.\n\n4. Start WAL replay on new chunks.\n\n5. Update global metadata about new chunk boundaries.\n\n6. Eventually (metadata update should be pushed to processing node by metadata service) storage node will start sending WAL and page requests to the new nodes.\n\n7. New chunk may start serving read queries when following conditions are met:\n\na) it receives at least on WAL record from processing node\n\nb) it replayed all WAL up to the new received one\n\nc) checked by downlinks that there were no WAL gaps.\n\nChunk split as it is described here is quite fast operation when it is happening on the local disk -- vast majority of files will be just moved without copying anything. I suggest to keep split always local and not to mix it with chunk moving around cluster. So if we want to split some chunk but there is small amount of free space left on the device, we should first move some chunks away from the node and then proceed with splitting.\n\n### Fixed chunks\n\nAlternative strategy is to not to split at all and have pageno-fixed chunk boundaries. When table is created we first materialize this chunk by storing first new pages only and chunks is small. Then chunk is growing while table is filled, but it can't grow substantially bigger then allowed pageno range, so at max it would be 1GB or whatever limit we want + some bloat due to snapshots and old page versions.\n\n### **Chunk lsm internals**\n\nSo how to implement chunk's lsm?\n\n- Write from scratch and use RocksDB to prototype/benchmark, then switch to own lsm implementation. RocksDB can provide some sanity check for performance of home-brewed implementation and it would be easier to prototype.\n- Use postgres as lego constructor. We may model memtable with postgres B-tree referencing some in-memory log of incoming records. SSTable merging may reuse postgres external merging algorithm, etc. One thing that would definitely not fit (or I didn't came up with idea how to fit that) -- is multi-tenancy. If we are storing pages from different databases we can't use postgres buffer pool, since there is no db_id in the page header. We can add new field there but IMO it would be no go for committing that to vanilla.\n\nOther possibility is to not to try to fit few databases in one storage node. But that way it is no go for multi-tenant cloud installation: we would need to run a lot of storage node instances on one physical storage node, all with it own local page cache. So that would be much closer to ordinary managed RDS.\n\nMulti-tenant storage makes sense even on a laptop, when you work with different databases, running tests with temp database, etc. And when installation grows bigger it start to make more and more sense, so it seems important.\n\n# Storage fleet\n\n# **Storage fleet**\n\n- When database is smaller then a chunk size we naturally can store them in one chunk (since their page_key would fit in some chunk's [hi, lo) range).\n\n<img width=\"937\" alt=\"Screenshot_2021-02-22_at_16 49 17\" src=\"https://user-images.githubusercontent.com/284219/108729836-ffcbd200-753b-11eb-9412-db802ec30021.png\">\n\nFew databases are stored in one chunk, replicated three times\n\n- When database can't fit into one storage node it can occupy lots of chunks that were split while database was growing. Chunk placement on nodes is controlled by us with some automatization, but we always may manually move chunks around the cluster.\n\n<img width=\"940\" alt=\"Screenshot_2021-02-22_at_16 49 10\" src=\"https://user-images.githubusercontent.com/284219/108729815-fb071e00-753b-11eb-86e0-be6703e47d82.png\">\n\nHere one big database occupies two set of nodes. Also some chunks were moved around to restore replication factor after disk failure. In this case we also have \"sharded\" storage for a big database and issue wal writes to different chunks in parallel.\n\n## **Chunk placement strategies**\n\nThere are few scenarios where we may want to move chunks around the cluster:\n\n- disk usage on some node is big\n- some disk experienced a failure\n- some node experienced a failure or need maintenance\n\n## **Chunk replication**\n\nChunk replication may be done by cloning page ranges with respect to some lsn from peer nodes, updating global metadata, waiting for WAL to come, replaying previous WAL and becoming online -- more or less like during chunk split.\n\n"
  },
  {
    "path": "docs/rfcs/003-laptop-cli.md",
    "content": "# Command line interface (end-user)\n\nNeon CLI as it is described here mostly resides on the same conceptual level as pg_ctl/initdb/pg_recvxlog/etc and replaces some of them in an opinionated way. I would also suggest bundling our patched postgres inside neon distribution at least at the start.\n\nThis proposal is focused on managing local installations. For cluster operations, different tooling would be needed. The point of integration between the two is storage URL: no matter how complex cluster setup is it may provide an endpoint where the user may push snapshots.\n\nThe most important concept here is a snapshot, which can be created/pushed/pulled/exported. Also, we may start temporary read-only postgres instance over any local snapshot. A more complex scenario would consist of several basic operations over snapshots.\n\n# Possible usage scenarios\n\n## Install neon, run a postgres\n\n```\n> brew install pg-neon \n> neon pg create # creates pgdata with default pattern pgdata$i\n> neon pg list\nID            PGDATA        USED    STORAGE            ENDPOINT\nprimary1      pgdata1       0G      neon-local       localhost:5432\n```\n\n## Import standalone postgres to neon\n\n```\n> neon snapshot import --from=basebackup://replication@localhost:5432/ oldpg\n[====================------------] 60% | 20MB/s\n> neon snapshot list\nID          SIZE        PARENT\noldpg       5G          -\n\n> neon pg create --snapshot oldpg\nStarted postgres on localhost:5432\n\n> neon pg list\nID            PGDATA        USED    STORAGE            ENDPOINT\nprimary1      pgdata1       5G      neon-local       localhost:5432\n\n> neon snapshot destroy oldpg\nOk\n```\n\nAlso, we may start snapshot import implicitly by looking at snapshot schema\n\n```\n> neon pg create --snapshot basebackup://replication@localhost:5432/\nDownloading snapshot... Done.\nStarted postgres on localhost:5432\nDestroying snapshot... Done.\n```\n\n## Pull snapshot with some publicly shared database\n\nSince we may export the whole snapshot as one big file (tar of basebackup, maybe with some manifest) it may be shared over conventional means: http, ssh, [git+lfs](https://docs.github.com/en/github/managing-large-files/about-git-large-file-storage).\n\n```\n> neon pg create --snapshot http://learn-postgres.com/movies_db.neon movies\n```\n\n## Create snapshot and push it to the cloud\n\n```\n> neon snapshot create pgdata1@snap1\n> neon snapshot push --to ssh://stas@neon.tech pgdata1@snap1\n```\n\n## Rollback database to the snapshot\n\nOne way to rollback the database is just to init a new database from the snapshot and destroy the old one. But creating a new database from a snapshot would require a copy of that snapshot which is time consuming operation. Another option that would be cool to support is the ability to create the copy-on-write database from the snapshot without copying data, and store updated pages in a separate location, however that way would have performance implications. So to properly rollback the database to the older state we have `neon pg checkout`.\n\n```\n> neon pg list\nID            PGDATA        USED    STORAGE            ENDPOINT\nprimary1      pgdata1       5G      neon-local       localhost:5432\n\n> neon snapshot create pgdata1@snap1\n\n> neon snapshot list\nID                    SIZE        PARENT\noldpg                 5G          -\npgdata1@snap1         6G          -\npgdata1@CURRENT       6G          -\n\n> neon pg checkout pgdata1@snap1\nStopping postgres on pgdata1.\nRolling back pgdata1@CURRENT to pgdata1@snap1.\nStarting postgres on pgdata1.\n\n> neon snapshot list\nID                    SIZE        PARENT\noldpg                 5G          -\npgdata1@snap1         6G          -\npgdata1@HEAD{0}       6G          -\npgdata1@CURRENT       6G          -\n```\n\nSome notes: pgdata1@CURRENT -- implicit snapshot representing the current state of the database in the data directory. When we are checking out some snapshot CURRENT will be set to this snapshot and the old CURRENT state will be named HEAD{0} (0 is the number of postgres timeline, it would be incremented after each such checkout).\n\n## Configure PITR area (Point In Time Recovery).\n\nPITR area acts like a continuous snapshot where you can reset the database to any point in time within this area (by area I mean some TTL period or some size limit, both possibly infinite).\n\n```\n> neon pitr create --storage s3tank --ttl 30d --name pitr_last_month\n```\n\nResetting the database to some state in past would require creating a snapshot on some lsn / time in this pirt area.\n\n# Manual\n\n## storage\n\nStorage is either neon pagestore or s3. Users may create a database in a pagestore and create/move *snapshots* and *pitr regions* in both pagestore and s3. Storage is a concept similar to `git remote`. After installation, I imagine one local storage is available by default.\n\n**neon storage attach** -t [native|s3] -c key=value -n name\n\nAttaches/initializes storage. For --type=s3, user credentials and path should be provided. For --type=native we may support --path=/local/path and --url=neon.tech/stas/mystore. Other possible term for native is 'zstore'.\n\n\n**neon storage list**\n\nShow currently attached storages. For example:\n\n```\n> neon storage list\nNAME            USED    TYPE                OPTIONS          PATH\nlocal           5.1G    neon-local                         /opt/neon/store/local\nlocal.compr     20.4G   neon-local        compression=on    /opt/neon/store/local.compr\nzcloud          60G     neon-remote                        neon.tech/stas/mystore\ns3tank          80G     S3\n```\n\n**neon storage detach**\n\n**neon storage show**\n\n\n\n## pg\n\nManages postgres data directories and can start postgres instances with proper configuration. An experienced user may avoid using that (except pg create) and configure/run postgres by themselves.\n\nPg is a term for a single postgres running on some data. I'm trying to avoid separation of datadir management and postgres instance management -- both that concepts bundled here together.\n\n**neon pg create** [--no-start --snapshot --cow] -s storage-name -n pgdata\n\nCreates (initializes) new data directory in given storage and starts postgres. I imagine that storage for this operation may be only local and data movement to remote location happens through snapshots/pitr.\n\n--no-start: just init datadir without creating \n\n--snapshot snap: init from the snapshot. Snap is a name or URL (neon.tech/stas/mystore/snap1)\n\n--cow: initialize Copy-on-Write data directory on top of some snapshot (makes sense if it is a snapshot of currently running a database)\n\n**neon pg destroy**\n\n**neon pg start** [--replica] pgdata\n\nStart postgres with proper extensions preloaded/installed.\n\n**neon pg checkout**\n\nRollback data directory to some previous snapshot. \n\n**neon pg stop** pg_id\n\n**neon pg list**\n\n```\nROLE                 PGDATA        USED    STORAGE            ENDPOINT\nprimary              my_pg         5.1G    local              localhost:5432\nreplica-1                                                     localhost:5433\nreplica-2                                                     localhost:5434\nprimary              my_pg2        3.2G    local.compr        localhost:5435\n-                    my_pg3        9.2G    local.compr        -\n```\n\n**neon pg show**\n\n```\nmy_pg:\n    storage: local\n    space used on local: 5.1G\n    space used on all storages: 15.1G\n    snapshots:\n        on local:\n            snap1: 1G\n            snap2: 1G\n        on zcloud:\n            snap2: 1G\n        on s3tank:\n            snap5: 2G\n    pitr:\n        on s3tank:\n            pitr_one_month: 45G\n\n```\n\n**neon pg start-rest/graphql** pgdata\n\nStarts REST/GraphQL proxy on top of postgres master. Not sure we should do that, just an idea.\n\n\n## snapshot\n\nSnapshot creation is cheap -- no actual data is copied, we just start retaining old pages. Snapshot size means the amount of retained data, not all data. Snapshot name looks like pgdata_name@tag_name. tag_name is set by the user during snapshot creation. There are some reserved tag names: CURRENT represents the current state of the data directory; HEAD{i} represents the data directory state that resided in the database before i-th checkout.\n\n**neon snapshot create** pgdata_name@snap_name\n\nCreates a new snapshot in the same storage where pgdata_name exists.\n\n**neon snapshot push** --to url pgdata_name@snap_name\n\nProduces binary stream of a given snapshot. Under the hood starts temp read-only postgres over this snapshot and sends basebackup stream. Receiving side should start `neon snapshot recv` before push happens. If url has some special schema like neon:// receiving side may require auth start `neon snapshot recv` on the go.\n\n**neon snapshot recv**\n\nStarts a port listening for a basebackup stream, prints connection info to stdout (so that user may use that in push command), and expects data on that socket.\n\n**neon snapshot pull** --from url or path\n\nConnects to a remote neon/s3/file and pulls snapshot. The remote site should be neon service or files in our format.\n\n**neon snapshot import** --from basebackup://<...>  or path\n\nCreates a new snapshot out of running postgres via basebackup protocol or basebackup files.\n\n**neon snapshot export**\n\nStarts read-only postgres over this snapshot and exports data in some format (pg_dump, or COPY TO on some/all tables). One of the options may be neon own format which is handy for us (but I think just tar of basebackup would be okay).\n\n**neon snapshot diff** snap1 snap2\n\nShows size of data changed between two snapshots. We also may provide options to diff schema/data in tables. To do that start temp read-only postgreses.\n\n**neon snapshot destroy**\n\n## pitr\n\nPitr represents wal stream and ttl policy for that stream\n\nXXX: any suggestions on a better name?\n\n**neon pitr create** name\n\n--ttl = inf | period\n\n--size-limit = inf | limit\n\n--storage = storage_name\n\n**neon pitr extract-snapshot** pitr_name --lsn xxx\n\nCreates a snapshot out of some lsn in PITR area. The obtained snapshot may be managed with snapshot routines (move/send/export)\n\n**neon pitr gc** pitr_name\n\nForce garbage collection on some PITR area.\n\n**neon pitr list**\n\n**neon pitr destroy**\n\n\n## console\n\n**neon console**\n\nOpens browser targeted at web console with the more or less same functionality as described here.\n"
  },
  {
    "path": "docs/rfcs/004-durability.md",
    "content": "Durability & Consensus\n======================\n\nWhen a transaction commits, a commit record is generated in the WAL.\nWhen do we consider the WAL record as durable, so that we can\nacknowledge the commit to the client and be reasonably certain that we\nwill not lose the transaction?\n\nNeon uses a group of WAL safekeeper nodes to hold the generated WAL.\nA WAL record is considered durable, when it has been written to a\nmajority of WAL safekeeper nodes. In this document, I use 5\nsafekeepers, because I have five fingers. A WAL record is durable,\nwhen at least 3 safekeepers have written it to disk.\n\nFirst, assume that only one primary node can be running at a\ntime. This can be achieved by Kubernetes or etcd or some\ncloud-provider specific facility, or we can implement it\nourselves. These options are discussed in later chapters.  For now,\nassume that there is a Magic STONITH Fairy that ensures that.\n\nIn addition to the WAL safekeeper nodes, the WAL is archived in\nS3. WAL that has been archived to S3 can be removed from the\nsafekeepers, so the safekeepers don't need a lot of disk space.\n\n```\n                                +----------------+\n                        +-----> | WAL safekeeper |\n                        |       +----------------+\n                        |       +----------------+\n                        +-----> | WAL safekeeper |\n+------------+          |       +----------------+\n|  Primary   |          |       +----------------+\n| Processing | ---------+-----> | WAL safekeeper |\n|   Node     |          |       +----------------+\n+------------+          |       +----------------+\n            \\           +-----> | WAL safekeeper |\n             \\          |       +----------------+\n              \\         |       +----------------+\n               \\        +-----> | WAL safekeeper |\n                \\               +----------------+\n                 \\\n                  \\\n                   \\\n                    \\\n                     \\          +--------+\n                      \\         |        |\n                       +------> |   S3   |\n                                |        |\n                                +--------+\n\n```\nEvery WAL safekeeper holds a section of WAL, and a VCL value.\nThe WAL can be divided into three portions:\n\n```\n                                    VCL                   LSN\n                                     |                     |\n                                     V                     V\n.................ccccccccccccccccccccXXXXXXXXXXXXXXXXXXXXXXX\nArchived WAL       Completed WAL          In-flight WAL\n```\n\nNote that all this WAL kept in a safekeeper is a contiguous section.\nThis is different from Aurora: In Aurora, there can be holes in the\nWAL, and there is a Gossip protocol to fill the holes. That could be\nimplemented in the future, but let's keep it simple for now. WAL needs\nto be written to a safekeeper in order. However, during crash\nrecovery, In-flight WAL that has already been stored in a safekeeper\ncan be truncated or overwritten.\n\nThe Archived WAL has already been stored in S3, and can be removed from\nthe safekeeper.\n\nThe Completed WAL has been written to at least three safekeepers. The\nalgorithm ensures that it is not lost, when at most two nodes fail at\nthe same time.\n\nThe In-flight WAL has been persisted in the safekeeper, but if a crash\nhappens, it may still be overwritten or truncated.\n\n\nThe VCL point is determined in the Primary. It is not strictly\nnecessary to store it in the safekeepers, but it allows some\noptimizations and sanity checks and is probably generally useful for\nthe system as whole. The VCL values stored in the safekeepers can lag\nbehind the VCL computed by the primary.\n\n\nPrimary node Normal operation\n-----------------------------\n\n1. Generate some WAL.\n\n2. Send the WAL to all the safekeepers that you can reach.\n\n3. As soon as a quorum of safekeepers have acknowledged that they have\n   received and durably stored the WAL up to that LSN, update local VCL\n   value in memory, and acknowledge commits to the clients.\n\n4. Send the new VCL to all the safekeepers that were part of the quorum.\n   (Optional)\n\n\nPrimary Crash recovery\n----------------------\n\nWhen a new Primary node starts up, before it can generate any new WAL\nit needs to contact a majority of the WAL safekeepers to compute the\nVCL. Remember that there is a Magic STONITH fairy that ensures that\nonly node process can be doing this at a time.\n\n1. Contact all WAL safekeepers. Find the Max((Epoch, LSN)) tuple among the ones you\n   can reach. This is the Winner safekeeper, and its LSN becomes the new VCL.\n\n2. Update the other safekeepers you can reach, by copying all the WAL\n   from the Winner, starting from each safekeeper's old VCL point. Any old\n   In-Flight WAL from previous Epoch is truncated away.\n\n3. Increment Epoch, and send the new Epoch to the quorum of\n   safekeepers.  (This ensures that if any of the safekeepers that we\n   could not reach later come back online, they will be considered as\n   older than this in any future recovery)\n\nYou can now start generating new WAL, starting from the newly-computed\nVCL.\n\nOptimizations\n-------------\n\nAs described, the Primary node sends all the WAL to all the WAL safekeepers. That\ncan be a lot of network traffic. Instead of sending the WAL directly from Primary,\nsome safekeepers can be daisy-chained off other safekeepers, or there can be a\nbroadcast mechanism among them. There should still be a direct connection from the\neach safekeeper to the Primary for the acknowledgments though.\n\nSimilarly, the responsibility for archiving WAL to S3 can be delegated to one of\nthe safekeepers, to reduce the load on the primary.\n\n\nMagic STONITH fairy\n-------------------\n\nNow that we have a system that works as long as only one primary node is running at a time, how\ndo we ensure that?\n\n1. Use etcd to grant a lease on a key. The primary node is only allowed to operate as primary\n   when it's holding a valid lease. If the primary node dies, the lease expires after a timeout\n   period, and a new node is allowed to become the primary.\n\n2. Use S3 to store the lease. S3's consistency guarantees are more lenient, so in theory you\n   cannot do this safely. In practice, it would probably be OK if you make the lease times and\n   timeouts long enough. This has the advantage that we don't need to introduce a new\n   component to the architecture.\n\n3. Use Raft or Paxos, with the WAL safekeepers acting as the Acceptors to form the quorum. The\n   next chapter describes this option.\n\n\nBuilt-in Paxos\n--------------\n\nThe WAL safekeepers act as PAXOS Acceptors, and the Processing nodes\nas both Proposers and Learners.\n\nEach WAL safekeeper holds an Epoch value in addition to the VCL and\nthe WAL. Each request by the primary to safekeep WAL is accompanied by\nan Epoch value. If a safekeeper receives a request with Epoch that\ndoesn't match its current Accepted Epoch, it must ignore (NACK) it.\n(In different Paxos papers, Epochs are called \"terms\" or \"round\nnumbers\")\n\nWhen a node wants to become the primary, it generates a new Epoch\nvalue that is higher than any previously observed Epoch value, and\nglobally unique.\n\n\nAccepted Epoch: 555                VCL                   LSN\n                                     |                     |\n                                     V                     V\n.................ccccccccccccccccccccXXXXXXXXXXXXXXXXXXXXXXX\nArchived WAL       Completed WAL          In-flight WAL\n\n\nPrimary node startup:\n\n1. Contact all WAL safekeepers that you can reach (if you cannot\n   connect to a quorum of them, you can give up immediately). Find the\n   latest Epoch among them.\n\n2. Generate a new globally unique Epoch, greater than the latest Epoch\n   found in previous step.\n\n2. Send the new Epoch in a Prepare message to a quorum of\n   safekeepers. (PAXOS Prepare message)\n\n3. Each safekeeper responds with a Promise. If a safekeeper has\n   already made a promise with a higher Epoch, it doesn't respond (or\n   responds with a NACK). After making a promise, the safekeeper stops\n   responding to any write requests with earlier Epoch.\n\n4. Once you have received a majority of promises, you know that the\n   VCL cannot advance on the old Epoch anymore. This effectively kills\n   any old primary server.\n\n5. Find the highest written LSN among the quorum of safekeepers (these\n   can be included in the Promise messages already). This is the new\n   VCL.  If a new node starts the election process after this point,\n   it will compute the same or higher VCL.\n\n6. Copy the WAL from the safekeeper with the highest LSN to the other\n   safekeepers in the quorum, using the new Epoch. (PAXOS Accept\n   phase)\n\n7. You can now start generating new WAL starting from the VCL. If\n   another process starts the election process after this point and\n   gains control of a majority of the safekeepers, we will no longer\n   be able to advance the VCL.\n\n"
  },
  {
    "path": "docs/rfcs/005-zenith_local.md",
    "content": "# Neon local\n\nHere I list some objectives to keep in mind when discussing neon-local design and a proposal that brings all components together.  Your comments on both parts are very welcome.\n\n#### Why do we need it?\n- For distribution - this easy to use binary will help us to build adoption among developers.\n- For internal use - to test all components together.\n\nIn my understanding, we consider it to be just a mock-up version of neon-cloud.\n> Question: How much should we care about durability and security issues for a local setup?\n\n\n#### Why is it better than a simple local postgres?\n\n- Easy one-line setup. As simple as `cargo install neon && neon start`\n\n- Quick and cheap creation of compute nodes over the same storage.\n> Question: How can we describe a use-case for this feature?\n\n- Neon-local can work with S3 directly. \n\n- Push and pull images (snapshots) to remote S3 to exchange data with other users.\n\n- Quick and cheap snapshot checkouts to switch back and forth in the database history.\n> Question: Do we want it in the very first release? This feature seems quite complicated.\n\n#### Distribution:\n\nIdeally, just one binary that incorporates all elements we need.\n> Question: Let's discuss pros and cons of having a separate package with modified PostgreSQL.\n\n#### Components:\n\n- **neon-CLI** - interface for end-users.  Turns commands to REST requests and handles responses to show them in a user-friendly way.  \nCLI proposal is here https://github.com/neondatabase/rfcs/blob/003-laptop-cli.md/003-laptop-cli.md\nWIP code is here: https://github.com/neondatabase/postgres/tree/main/pageserver/src/bin/cli\n\n- **neon-console** - WEB UI with same functionality as CLI.\n>Note: not for the first release.\n\n- **neon-local** - entrypoint. Service that starts all other components and handles REST API requests. See REST API proposal below.\n    > Idea: spawn all other components as child processes, so that we could shutdown everything by stopping neon-local.\n\n- **neon-pageserver** - consists of a storage and WAL-replaying service (modified PG in current implementation).\n> Question: Probably, for local setup we should be able to bypass page-storage and interact directly with S3 to avoid double caching in shared buffers and page-server?\n\nWIP code is here: https://github.com/neondatabase/postgres/tree/main/pageserver/src\n\n- **neon-S3** - stores base images of the database and WAL in S3 object storage. Import and export images from/to neon.\n> Question: How should it operate in a local setup? Will we manage it ourselves or ask user to provide credentials for existing S3 object storage (i.e. minio)?\n> Question: Do we use it together with local page store or they are interchangeable?\n\nWIP code is ???\n\n- **neon-safekeeper** - receives WAL from postgres, stores it durably, answers to Postgres that \"sync\" is succeed.\n> Question: How should it operate in a local setup? In my understanding it should push WAL directly to S3 (if we use it) or store all data locally (if we use local page storage). The latter option seems meaningless (extra overhead and no gain), but it is still good to test the system.\n\nWIP code is here: https://github.com/neondatabase/postgres/tree/main/src/bin/safekeeper\n\n- **neon-computenode** - bottomless PostgreSQL, ideally upstream, but for a start - our modified version. User can quickly create and destroy them and work with it as a regular postgres database.\n \n WIP code is in main branch and here: https://github.com/neondatabase/postgres/commits/compute_node\n\n#### REST API:\n\nService endpoint: `http://localhost:3000`\n\nResources:\n- /storages - Where data lives: neon-pageserver or neon-s3\n- /pgs - Postgres - neon-computenode\n- /snapshots - snapshots **TODO**\n\n>Question: Do we want to extend this API to manage neon components? I.e. start page-server, manage safekeepers and so on? Or they will be hardcoded to just start once and for all?\n\nMethods and their mapping to CLI:\n\n- /storages - neon-pageserver or neon-s3\n\nCLI  | REST API\n------------- | -------------\nstorage attach -n name --type [native\\s3]  --path=[datadir\\URL] | PUT  -d { \"name\": \"name\", \"type\": \"native\", \"path\": \"/tmp\" } /storages\nstorage detach -n name | DELETE /storages/:storage_name \nstorage list | GET /storages\nstorage show -n name | GET /storages/:storage_name \n\n\n- /pgs - neon-computenode\n\nCLI  | REST API\n------------- | -------------\npg create -n name --s storage_name | PUT  -d { \"name\": \"name\", \"storage_name\": \"storage_name\" } /pgs\npg destroy -n name | DELETE /pgs/:pg_name \npg start -n name --replica | POST -d {\"action\": \"start\", \"is_replica\":\"replica\"}  /pgs/:pg_name /actions\npg stop -n name | POST  -d {\"action\": \"stop\"}  /pgs/:pg_name /actions\npg promote -n name | POST  -d {\"action\": \"promote\"}  /pgs/:pg_name /actions\npg list | GET /pgs\npg show -n name | GET /pgs/:pg_name \n\n- /snapshots **TODO**\n\nCLI  | REST API\n------------- | -------------\n\n"
  },
  {
    "path": "docs/rfcs/006-laptop-cli-v2-CLI.md",
    "content": "Neon CLI allows you to operate database clusters (catalog clusters) and their commit history locally and in the cloud. Since ANSI calls them catalog clusters and cluster is a loaded term in the modern infrastructure we will call it \"catalog\".\n\n# CLI v2 (after chatting with Carl)\n\nNeon introduces the notion of a repository.\n\n```bash\nneon init\nneon clone neon://neon.tech/piedpiper/northwind -- clones a repo to the northwind directory\n```\n\nOnce you have a cluster catalog you can explore it\n\n```bash\nneon log -- returns a list of commits\nneon status -- returns if there are changes in the catalog that can be committed\nneon commit -- commits the changes and generates a new commit hash\nneon branch experimental <hash> -- creates a branch called testdb based on a given commit hash\n```\n\nTo make changes in the catalog you need to run compute nodes\n\n```bash\n-- here is how you a compute node\nneon start /home/pipedpiper/northwind:main -- starts a compute instance\nneon start neon://neon.tech/northwind:main -- starts a compute instance in the cloud\n-- you can start a compute node against any hash or branch\nneon start /home/pipedpiper/northwind:experimental --port 8008 -- start another compute instance (on different port)\n-- you can start a compute node against any hash or branch\nneon start /home/pipedpiper/northwind:<hash> --port 8009 -- start another compute instance (on different port)\n\n-- After running some DML you can run \n-- neon status and see how there are two WAL streams one on top of \n-- the main branch\nneon status \n-- and another on top of the experimental branch\nneon status -b experimental\n\n-- you can commit each branch separately\nneon commit main\n-- or\nneon commit -c /home/pipedpiper/northwind:experimental\n```\n\nStarting compute instances against cloud environments\n\n```bash\n-- you can start a compute instance against the cloud environment\n-- in this case all of the changes will be streamed into the cloud\nneon start https://neon:tecj/pipedpiper/northwind:main\nneon start https://neon:tecj/pipedpiper/northwind:main\nneon status -c https://neon:tecj/pipedpiper/northwind:main\nneon commit -c https://neon:tecj/pipedpiper/northwind:main\nneon branch -c https://neon:tecj/pipedpiper/northwind:<hash> experimental\n```\n\nPushing data into the cloud\n\n```bash\n-- pull all the commits from the cloud\nneon pull\n-- push all the commits to the cloud\nneon push\n```\n"
  },
  {
    "path": "docs/rfcs/006-laptop-cli-v2-repository-structure.md",
    "content": "# Repository format\n\nA Neon repository is similar to a traditional PostgreSQL backup\narchive, like a WAL-G bucket or pgbarman backup catalogue. It holds\nmultiple versions of a PostgreSQL database cluster.\n\nThe distinguishing feature is that you can launch a Neon Postgres\nserver directly against a branch in the repository, without having to\n\"restore\" it first. Also, Neon manages the storage automatically,\nthere is no separation between full and incremental backups nor WAL\narchive. Neon relies heavily on the WAL, and uses concepts similar\nto incremental backups and WAL archiving internally, but it is hidden\nfrom the user.\n\n## Directory structure, version 1\n\nThis first version is pretty straightforward but not very\nefficient. Just something to get us started.\n\nThe repository directory looks like this:\n\n    .neon/timelines/4543be3daeab2ed4e58a285cbb8dd1fce6970f8c/wal/\n    .neon/timelines/4543be3daeab2ed4e58a285cbb8dd1fce6970f8c/snapshots/<lsn>/\n    .neon/timelines/4543be3daeab2ed4e58a285cbb8dd1fce6970f8c/history\n    \n    .neon/refs/branches/mybranch\n    .neon/refs/tags/foo\n    .neon/refs/tags/bar\n    \n    .neon/datadirs/<timeline uuid>\n\n### Timelines\n\nA timeline is similar to PostgeSQL's timeline, but is identified by a\nUUID instead of a 32-bit timeline Id.  For user convenience, it can be\ngiven a name that refers to the UUID (called a branch).\n\nAll WAL is generated on a timeline. You can launch a read-only node\nagainst a tag or arbitrary LSN on a timeline, but in order to write,\nyou need to create a timeline.\n\nEach timeline is stored in a directory under .neon/timelines. It\nconsists of a WAL archive, containing all the WAL in the standard\nPostgreSQL format, under the wal/ subdirectory.\n\nThe 'snapshots/' subdirectory, contains \"base backups\" of the data\ndirectory at a different LSNs. Each snapshot is simply a copy of the\nPostgres data directory.\n\nWhen a new timeline is forked from a previous timeline, the ancestor\ntimeline's UUID is stored in the 'history' file.\n\n### Refs\n\nThere are two kinds of named objects in the repository: branches and\ntags.  A branch is a human-friendly name for a timeline UUID, and a\ntag is a human-friendly name for a specific LSN on a timeline\n(timeline UUID + LSN).  Like in git, these are just for user\nconvenience; you can also use timeline UUIDs and LSNs directly.\n\nRefs do have one additional purpose though: naming a timeline or LSN\nprevents it from being automatically garbage collected.\n\nThe refs directory contains a small text file for each tag/branch. It\ncontains the UUID of the timeline (and LSN, for tags).\n\n### Datadirs\n\n.neon/datadirs contains PostgreSQL data directories. You can launch\na Postgres instance on one of them with:\n\n```\n  postgres -D .neon/datadirs/4543be3daeab2ed4e58a285cbb8dd1fce6970f8c\n```\n\nAll the actual data is kept in the timeline directories, under\n.neon/timelines. The data directories are only needed for active\nPostgreQSL instances. After an instance is stopped, the data directory\ncan be safely removed. \"neon start\" will recreate it quickly from\nthe data in .neon/timelines, if it's missing.\n\n## Version 2\n\nThe format described above isn't very different from a traditional\ndaily base backup + WAL archive configuration. The main difference is\nthe nicer naming of branches and tags.\n\nThat's not very efficient. For performance, we need something like\nincremental backups that don't require making a full copy of all\ndata. So only store modified files or pages. And instead of having to\nreplay all WAL from the last snapshot, \"slice\" the WAL into\nper-relation WAL files and only recover what's needed when a table is\naccessed.\n\nIn version 2, the file format in the \"snapshots\" subdirectory gets\nmore advanced. The exact format is TODO. But it should support:\n- storing WAL records of individual relations/pages\n- storing a delta from an older snapshot\n- compression\n\n\n## Operations\n\n### Garbage collection\n\nWhen you run \"neon gc\", old timelines that are no longer needed are\nremoved. That involves collecting the list of \"unreachable\" objects,\nstarting from the named branches and tags.\n\nAlso, if enough WAL has been generated on a timeline since last\nsnapshot, a new snapshot or delta is created.\n\n### neon push/pull\n\nCompare the tags and branches on both servers, and copy missing ones.\nFor each branch, compare the timeline it points to in both servers. If\none is behind the other, copy the missing parts.\n\nFIXME: how do you prevent confusion if you have to clones of the same\nrepository, launch an instance on the same branch in both clones, and\nlater try to push/pull between them? Perhaps create a new timeline\nevery time you start up an instance? Then you would detect that the\ntimelines have diverged. That would match with the \"epoch\" concept\nthat we have in the WAL safekeeper\n\n### neon checkout/commit\n\nIn this format, there is no concept of a \"working tree\", and hence no\nconcept of checking out or committing. All modifications are done on\na branch or a timeline. As soon as you launch a server, the changes are\nappended to the timeline.\n\nYou can easily fork off a temporary timeline to emulate a \"working tree\".\nYou can later remove it and have it garbage collected, or to \"commit\",\nre-point the branch to the new timeline.\n\nIf we want to have a worktree and \"neon checkout/commit\" concept, we can\nemulate that with a temporary timeline. Create the temporary timeline at\n\"neon checkout\", and have \"neon commit\" modify the branch to point to\nthe new timeline.\n"
  },
  {
    "path": "docs/rfcs/007-serverless-on-laptop.md",
    "content": "How it works now\n----------------\n\n1. Create repository, start page server on it\n\n```\n$ neon init\n...\ncreated main branch\nnew neon repository was created in .neon\n\n$ neon pageserver start\nStarting pageserver at '127.0.0.1:64000' in .neon\nPage server started\n```\n\n2. Create a branch, and start a Postgres instance on it\n\n```\n$ neon branch heikki main\nbranching at end of WAL: 0/15ECF68\n\n$ neon pg create heikki\nInitializing Postgres on timeline 76cf9279915be7797095241638e64644...\nExtracting base backup to create postgres instance: path=.neon/pgdatadirs/pg1 port=55432\n\n$ neon pg start pg1\nStarting postgres node at 'host=127.0.0.1 port=55432 user=heikki'\nwaiting for server to start.... done\nserver started\n```\n\n\n3. Connect to it and run queries\n\n```\n$ psql \"dbname=postgres port=55432\"\npsql (14devel)\nType \"help\" for help.\n\npostgres=# \n```\n\n\nProposal: Serverless on your Laptop\n-----------------------------------\n\nWe've been talking about doing the \"pg create\" step automatically at\n\"pg start\", to eliminate that step. What if we go further, go\nserverless on your laptop, so that the workflow becomes just:\n\n1. Create repository, start page server on it (same as before)\n\n```\n$ neon init\n...\ncreated main branch\nnew neon repository was created in .neon\n\n$ neon pageserver start\nStarting pageserver at '127.0.0.1:64000' in .neon\nPage server started\n```\n\n2. Create branch\n\n```\n$ neon branch heikki main\nbranching at end of WAL: 0/15ECF68\n```\n\n3. Connect to it:\n\n```\n$ psql \"dbname=postgres port=5432 branch=heikki\"\npsql (14devel)\nType \"help\" for help.\n\npostgres=# \n```\n\n\nThe trick behind the scenes is that when you launch the page server,\nit starts to listen on port 5432. When you connect to it with psql, it\nlooks at the 'branch' parameter that you passed in the connection\nstring. It automatically performs the \"pg create\" and \"pg start\" steps\nfor that branch, and then forwards the connection to the Postgres\ninstance that it launched. After you disconnect, if there are no more\nactive connections to the server running on the branch, it can\nautomatically shut it down again.\n\nThis is how serverless would work in the cloud. We can do it on your\nlaptop, too.\n"
  },
  {
    "path": "docs/rfcs/008-push-pull.md",
    "content": "# Push and pull between pageservers\n\nHere is a proposal about implementing push/pull mechanics between pageservers. We also want to be able to push/pull to S3 but that would depend on the exact storage format so we don't touch that in this proposal.\n\n## Origin management\n\nThe origin represents connection info for some remote pageserver. Let's use here same commands as git uses except using explicit list subcommand (git uses `origin -v` for that).\n\n```\nneon origin add <name> <connection_uri>\nneon origin list\nneon origin remove <name>\n```\n\nConnection URI a string of form `postgresql://user:pass@hostname:port` (https://www.postgresql.org/docs/13/libpq-connect.html#id-1.7.3.8.3.6). We can start with libpq password auth and later add support for client certs or require ssh as transport or invent some other kind of transport.\n\nBehind the scenes, this commands may update toml file inside .neon directory.\n\n## Push\n\n### Pushing branch\n\n```\nneon push mybranch cloudserver # push to eponymous branch in cloudserver\nneon push mybranch cloudserver:otherbranch # push to a different branch in cloudserver\n```\n\nExact mechanics would be slightly different in the following situations:\n\n1) Destination branch does not exist.\n\n    That is the simplest scenario. We can just create an empty branch (or timeline in internal terminology) and transfer all the pages/records that we have in our timeline. Right now each timeline is quite independent of other timelines so I suggest skipping any checks that there is a common ancestor and just fill it with data. Later when CoW timelines will land to the pageserver we may add that check and decide whether this timeline belongs to this pageserver repository or not [*].\n\n    The exact mechanics may be the following:\n\n    * CLI asks local pageserver to perform push and hands over connection uri: `perform_push <branch_name> <uri>`.\n    * local pageserver connects to the remote pageserver and runs `branch_push <branch_name> <timetine_id>`\n        Handler for branch_create would create destination timeline and switch connection to copyboth mode.\n    * Sending pageserver may start iterator on that timeline and send all the records as copy messages.\n\n2) Destination branch exists and latest_valid_lsn is less than ours.\n\n    In this case, we need to send missing records. To do that we need to find all pages that were changed since that remote LSN. Right now we don't have any tracking mechanism for that, so let's just iterate over all records and send ones that are newer than remote LSN. Later we probably should add a sparse bitmap that would track changed pages to avoid full scan.\n\n3) Destination branch exists and latest_valid_lsn is bigger than ours.\n\n    In this case, we can't push to that branch. We can only pull.\n\n### Pulling branch\n\nHere we need to handle the same three cases, but also keep in mind that local pageserver can be behind NAT and we can't trivially re-use pushing by asking remote to 'perform_push' to our address. So we would need a new set of commands:\n\n* CLI calls `perform_pull <branch_name> <uri>` on local pageserver.\n* local pageserver calls `branch_pull <branch_name> <timetine_id>` on remote pageserver.\n* remote pageserver sends records in our direction\n\nBut despite the different set of commands code that performs iteration over records and receiving code that inserts that records can be the same for both pull and push.\n\n\n\n[*] It looks to me that there are two different possible approaches to handling unrelated timelines:\n\n1) Allow storing unrelated timelines in one repo. Some timelines may have parents and some may not.\n2) Transparently create and manage several repositories in one pageserver.\n\nBut that is the topic for a separate RFC/discussion.\n"
  },
  {
    "path": "docs/rfcs/009-snapshot-first-storage-cli.md",
    "content": "While working on export/import commands, I understood that they fit really well into \"snapshot-first design\".\n\nWe may think about backups as snapshots in a different format (i.e plain pgdata format, basebackup tar format, WAL-G format (if they want to support it) and so on). They use same storage API, the only difference is the code that packs/unpacks files.\n\nEven if neon aims to maintains durability using it's own snapshots, backups will be useful for uploading data from postgres to neon.\n\nSo here is an attempt to design consistent CLI for different usage scenarios:\n\n#### 1. Start empty pageserver.\nThat is what we have now.\nInit empty pageserver using `initdb` in temporary directory.\n\n`--storage_dest=FILE_PREFIX | S3_PREFIX |...` option defines object storage type, all other parameters are passed via env variables. Inspired by WAL-G style naming : https://wal-g.readthedocs.io/STORAGES/.\n\nSave`storage_dest` and other parameters in config.\nPush snapshots to `storage_dest` in background.\n\n```\nneon init --storage_dest=S3_PREFIX\nneon start\n```\n\n#### 2. Restart pageserver (manually or crash-recovery).\nTake `storage_dest` from pageserver config, start pageserver from latest snapshot in `storage_dest`.\nPush snapshots to `storage_dest` in background.\n\n```\nneon start\n```\n\n#### 3. Import.\nStart pageserver from existing snapshot.\nPath to snapshot provided via `--snapshot_path=FILE_PREFIX | S3_PREFIX | ...`\nDo not save `snapshot_path` and `snapshot_format` in config, as it is a one-time operation.\nSave`storage_dest` parameters in config.\nPush snapshots to `storage_dest` in background.\n```\n//I.e. we want to start neon on top of existing $PGDATA and use s3 as a persistent storage.\nneon init --snapshot_path=FILE_PREFIX --snapshot_format=pgdata --storage_dest=S3_PREFIX\nneon start\n```\nHow to pass credentials needed for `snapshot_path`?\n\n#### 4. Export.\nManually push snapshot to `snapshot_path` which differs from `storage_dest`\nOptionally set `snapshot_format`, which can be plain pgdata format or neon format.\n```\nneon export --snapshot_path=FILE_PREFIX --snapshot_format=pgdata\n```\n\n#### Notes and questions\n- safekeeper s3_offload should use same (similar) syntax for storage. How to set it in UI?\n- Why do we need `neon init` as a separate command? Can't we init everything at first start?\n- We can think of better names for all options.\n- Export to plain postgres format will be useless, if we are not 100% compatible on page level.\nI can recall at least one such difference - PD_WAL_LOGGED flag in pages.\n"
  },
  {
    "path": "docs/rfcs/009-snapshot-first-storage-pitr.md",
    "content": "# Preface\n\nGetPage@LSN can be called with older LSNs, and the page server needs\nto be able to reconstruct older page versions. That's needed for\nhaving read-only replicas that lag behind the primary, or that are\n\"anchored\" at an older LSN, and internally in the page server when you\nbranch at an older point in time. How do you do that?\n\nFor now, I'm not considering incremental snapshots at all. I don't\nthink that changes things. So whenever you create a snapshot or a\nsnapshot file, it contains an image of all the pages, there is no need\nto look at an older snapshot file.\n\nAlso, I'm imagining that this works on a per-relation basis, so that\neach snapshot file contains data for one relation. A \"relation\" is a\nfuzzy concept - it could actually be one 1 GB relation segment. Or it\ncould include all the different \"forks\" of a relation, or you could\ntreat each fork as a separate relation for storage purpose. And once\nwe have the \"non-relational\" work is finished, a \"relation\" could\nactually mean some other versioned object kept in the PostgreSQL data\ndirectory. Let's ignore that for now.\n\n# Eric's RFC:\n\nEvery now and then, you create a \"snapshot\". It means that you create\na new snapshot file for each relation that was modified after the last\nsnapshot, and write out the contents the relation as it is/was at the\nsnapshot LSN. Write-ahead log is stored separately in S3 by the WAL\nsafekeeping service, in the original PostgreSQL WAL file format.\n\n    SNAPSHOT @100       WAL\n       .                 |\n       .                 |\n       .                 |\n       .                 |\n    SNAPSHOT @200        |\n       .                 |\n       .                 |\n       .                 |\n       .                 |\n    SNAPSHOT @300        |\n       .                 |\n       .                 V\n    IN-MEMORY @400\n\nIf a GetPage@LSN request comes from the primary, you return the latest\npage from the in-memory layer. If there is no trace of the page in\nmemory, it means that it hasn't been modified since the last snapshot,\nso you return the page from the latest snapshot, at LSN 300 in the\nabove example.\n\nPITR is implemented using the original WAL files:\n\nIf a GetPage@LSN request comes from a read replica with LSN 250, you\nread the image of the page from the snapshot at LSN 200, and you also\nscan the WAL between 200 and 250, and apply all WAL records for the\nrequested page, to reconstruct it at LSN 250.\n\nScanning the WAL naively for every GetPage@LSN request would be\nexpensive, so in practice you'd construct an in-memory data structure\nof all the WAL between 200 and 250 once that allows quickly looking up\nrecords for a given page.\n\n## Problems/questions\n\nI think you'll need to store the list of snapshot LSNs on each\ntimeline somewhere.\n\nIf the latest snapshot of a relation is at LSN 100, and you request a\npage at LSN 1000000, how do you know if there are some modifications\nto it between 100 and 1000000 that you need to replay? You can scan\nall the WAL between 100 and 1000000, but that would be expensive.\n\nYou can skip that, if you know that a snapshot was taken e.g. at LSN\n999900. Then you know that the fact that there is no snapshot file at\n999900 means that the relation hasn't been modified between\n100-999900.  Then you only need to scan the WAL between 999900 and\n1000000. However, there is no trace of a snapshot happening at LSN\n999900 in the snapshot file for this relation, so you need to get\nthat information from somewhere else.\n\nWhere do you get that information from? Perhaps you can scan all the\nother relations, and if you see a snapshot file for *any* relation at\nLSN 999900, you know that if there were modifications to this\nrelation, there would be a newer snapshot file for it, too. In other\nwords, the list of snapshots that have been taken can be constructed\nby scanning all relations and computing the union of all snapshot LSNs\nthat you see for any relation. But that's expensive so at least you\nshould keep that in memory, after computing it once. Also, if you rely\non that, it's not possible to have snapshots at different intervals\nfor different files. That seems limiting.\n\nAnother option is to explicitly store a list of snapshot LSNs in a\nseparate metadata file.\n\n\n# Current implementation in the 'layered_repo' branch:\n\nWe store snapshot files like in the RFC, but each snapshot file also\ncontains all the WAL in the range of LSNs, so that you don't need to\nfetch the WAL separately from S3. So you have \"layers\" like this:\n\n    SNAPSHOT+WAL 100-200\n          |\n          |\n          |\n          |\n    SNAPSHOT+WAL 200-300\n          |\n          |\n          |\n          |\n    IN-MEMORY 300-\n\nEach \"snapshot+WAL\" is a file that contains a snapshot - i.e. full\ncopy of each page in the relation, at the *start* LSN. In addition to\nthat, it contains all the WAL applicable to the relation from the\nstart LSN to the end LSN. With that, you can reconstruct any page\nversion in the range that the file covers.\n\n\n## Problems/questions\n\nI can see one potential performance issue here, compared to the RFC.\nLet's focus on a single relation for now. Imagine that you start from\nan empty relation, and you receive WAL from 100 to 200, containing\na bunch of inserts and updates to the relation. You now have all that\nWAL in memory:\n\n    memory:  WAL from 100-200\n\nWe decide that it's time to materialize that to a snapshot file on\ndisk.  We materialize full image of the relation as it was at LSN 100\nto the snapshot file, and include all of the WAL. Since the relation\nwas initially empty, the \"image\" at the beginning of th range is empty\ntoo.\n\nSo now you have one file on on disk:\n\n    SNAPSHOT+WAL 100-200\n\nIt contains a full image of the relation at LSN 100 and all WAL\nbetween 100-200. (It's actually stored as a serialized BTreeMap of\npage versions, with the page images and WAL records all stored\ntogether in the same BtreeMap. But for this story, that's not\nimportant.)\n\nWe now receive more WAL updating the relation, up to LSN 300. We\ndecide it's time to materialize a new snapshot file, and we now have\ntwo files:\n\n    SNAPSHOT+WAL 100-200\n    SNAPSHOT+WAL 200-300\n\nNote that the latest \"full snapshot\" that we store on disk always lags\nbehind by one snapshot cycle. The first file contains a full image of\nthe relation at LSN 100, the second at LSN 200. When we have received\nWAL up to LSN 300, we write a materialized image at LSN 200. That\nseems a bit silly. In the design per your RFC, you would write a\nsnapshots at LSNs 200 and 300, instead. That seems better.\n\n\n\n# Third option (not implemented yet)\n\nStore snapshot files like in the RFC, but also store per-relation\nWAL files that contain WAL in a range of LSNs for that relation.\n\n    SNAPSHOT @100   WAL 100-200\n       .                 |\n       .                 |\n       .                 |\n       .                 |\n    SNAPSHOT @200   WAL 200-300\n       .                 |\n       .                 |\n       .                 |\n       .                 |\n    SNAPSHOT @300\n       .\n       .\n    IN-MEMORY 300-\n\n\nThis could be the best of both worlds. The snapshot files would be\nindependent of the PostgreSQL WAL format. When it's time to write\nsnapshot file @300, you write a full image of the relation at LSN 300,\nand you write the WAL that you had accumulated between 200 and 300 to\na separate file. That way, you don't \"lag behind\" for one snapshot\ncycle like in the current implementation. But you still have the WAL\nfor a particular relation readily available alongside the snapshot\nfiles, and you don't need to track what snapshot LSNs exist\nseparately.\n\n(If we wanted to minimize the number of files, you could include the\nsnapshot @300 and the WAL between 200 and 300 in the same file, but I\nfeel it's probably better to keep them separate)\n\n\n\n# Further thoughts\n\nThere's no fundamental reason why the LSNs of the snapshot files and the\nranges of the WAL files would need to line up. So this would be possible\ntoo:\n\n    SNAPSHOT @100   WAL 100-150\n       .                 |\n       .                 |\n       .            WAL 150-250\n       .                 |\n    SNAPSHOT @200        |\n       .                 |\n       .            WAL 250-400\n       .                 |\n       .                 |\n    SNAPSHOT @300        |\n       .                 |\n       .                 |\n    IN-MEMORY 300-\n\nI'm not sure what the benefit of this would be. You could materialize\nadditional snapshot files in the middle of a range covered by a WAL\nfile, maybe? Might be useful to speed up access when you create a new\nbranch in the middle of an LSN range or if there's some other reason\nto believe that a particular LSN is \"interesting\" and there will be\na lot of requests using it.\n"
  },
  {
    "path": "docs/rfcs/009-snapshot-first-storage.md",
    "content": "# Snapshot-first storage architecture\n\nGoals:\n- Long-term storage of database pages.\n- Easy snapshots; simple snapshot and branch management.\n- Allow cloud-based snapshot/branch management.\n- Allow cloud-centric branching; decouple branch state from running pageserver.\n- Allow customer ownership of data via s3 permissions.\n- Provide same or better performance for typical workloads, vs plain postgres.\n\nNon-goals:\n- Service database reads from s3 (reads should be serviced from the pageserver cache).\n- Keep every version of every page / Implement point-in-time recovery (possibly a future paid feature, based on WAL replay from an existing snapshot).\n\n## Principle of operation\n\nThe database “lives in s3”. This means that all of the long term page storage is in s3, and the “live database”-- the version that lives in the pageserver-- is a set of “dirty pages” that haven’t yet been written back to s3.\n\nIn practice, this is mostly similar to storing frequent snapshots to s3 of a database that lives primarily elsewhere.\n\nThe main difference is that s3 is authoritative about which branches exist; pageservers consume branches, snapshots, and related metadata by reading them from s3. This allows cloud-based management of branches and snapshots, regardless of whether a pageserver is running or not.\n\nIt’s expected that a pageserver should keep a copy of all pages, to shield users from s3 latency. A cheap/slow pageserver that falls back to s3 for some reads would be possible, but doesn’t seem very useful right now.\n\nBecause s3 keeps all history, and the safekeeper(s) preserve any WAL records needed to reconstruct the most recent changes, the pageserver can store dirty pages in RAM or using non-durable local storage; this should allow very good write performance, since there is no need for fsync or journaling.\n\nObjects in s3 are immutable snapshots, never to be modified once written (only deleted).\n\nObjects in s3 are files, each containing a set of pages for some branch/relation/segment as of a specific time (LSN). A snapshot could be complete (meaning it has a copy of every page), or it could be incremental (containing only the pages that were modified since the previous snapshot). It’s expected that most snapshots are incremental to keep storage costs low.\n\nIt’s expected that the pageserver would upload new snapshot objects frequently, e.g. somewhere between 30 seconds and 15 minutes, depending on cost/performance balance.\n\nNo-longer needed snapshots can be “squashed”-- meaning snapshot N and snapshot N+1 can be read by some cloud agent software, which writes out a new object containing the combined set of pages (keeping only the newest version of each page) and then deletes the original snapshots.\n\nA pageserver only needs to store the set of pages needed to satisfy operations in flight: if a snapshot is still being written, the pageserver needs to hold historical pages so that snapshot captures a consistent moment in time (similar to what is needed to satisfy a slow replica).\n\nWAL records can be discarded once a snapshot has been stored to s3. (Unless we want to keep them longer as part of a point-in-time recovery feature.)\n\n## Pageserver operation\n\nTo start a pageserver from a stored snapshot, the pageserver downloads a set of snapshots sufficient to start handling requests. We assume this includes the latest copy of every page, though it might be possible to start handling requests early, and retrieve pages for the first time only when needed.\n\nTo halt a pageserver, one final snapshot should be written containing all pending WAL updates; then the pageserver and safekeepers can shut down.\n\nIt’s assumed there is some cloud management service that ensures only one pageserver is active and servicing writes to a given branch.\n\nThe pageserver needs to be able to track whether a given page has been modified since the last snapshot, and should be able to produce the set of dirty pages efficiently to create a new snapshot.\n\nThe pageserver need only store pages that are “reachable” from a particular LSN. For example, a page may be written four times, at LSN 100, 200, 300, and 400. If no snapshot is being created when LSN 200 is written, the page at LSN 100 can be discarded. If a snapshot is triggered when the pageserver is at LSN 299, the pageserver must preserve the page from LSN 200 until that snapshot is complete. As before, the page at LSN 300 can be discarded when the LSN 400 pages is written (regardless of whether the LSN 200 snapshot has completed.)\n\nIf the pageserver is servicing multiple branches, those branches may contain common history. While it would be possible to serve branches with zero knowledge of their common history, a pageserver could save a lot of space using an awareness of branch history to share the common set of pages. Computing the “liveness” of a historical page may be tricky in the face of multiple branches.\n\nThe pageserver may store dirty pages to memory or to local block storage; any local block storage format is only temporary “overflow” storage, and is not expected to be readable by future software versions.\n\nThe pageserver may store clean pages (those that are captured in a snapshot) any way it likes: in memory, in a local filesystem (possibly keeping a local copy of the snapshot file), or using some custom storage format. Reading pages from s3 would be functional, but is expected to be prohibitively slow.\n\nThe mechanism for recovery after a pageserver failure is WAL redo. If we find that too slow in some situations (e.g. write-heavy workload causes long startup), we can write more frequent snapshots to keep the number of outstanding WAL records low. If that’s still not good enough, we could look at other options (e.g. redundant pageserver or an EBS page journal).\n\nA read-only pageserver is possible; such a pageserver could be a read-only cache of a specific snapshot, or could auto-update to the latest snapshot on some branch. Either way, no safekeeper is required. Multiple read-only pageservers could exist for a single branch or snapshot.\n\n## Cloud snapshot manager operation\n\nCloud software may wish to do the following operations (commanded by a user, or based on some pre-programmed policy or other cloud agent):\nCreate/delete/clone/rename a database\nCreate a new branch (possibly from a historical snapshot)\nStart/stop the pageserver/safekeeper on a branch\nList databases/branches/snapshots that are visible to this user account\n\nSome metadata operations (e.g. list branches/snapshots of a particular db) could be performed by scanning the contents of a bucket and inspecting the file headers of each snapshot object. This might not be fast enough; it might be necessary to build a metadata service that can respond more quickly to some queries.\n\nThis is especially true if there are public databases: there may be many thousands of buckets that are public, and scanning all of them is not a practical strategy for answering metadata queries.\n\n## Snapshot names, deletion and concurrency\n\nThere may be race conditions between operations-- in particular, a “squash” operation may replace two snapshot objects (A, B) with some combined object (C). Since C is logically equivalent to B, anything that attempts to access B should be able to seamlessly switch over to C. It’s assumed that concurrent delete won’t disrupt a read in flight, but it may be possible for some process to read B’s header, and then discover on the next operation that B is gone.\n\nFor this reason, any attempted read should attempt a fallback procedure (list objects; search list for an equivalent object) if an attempted read fails.  This requires a predictable naming scheme, e.g. `XXXX_YYYY_ZZZZ_DDDD`, where `XXXX` is the branch unique id, and `YYYY` and `ZZZZ` are the starting/ending LSN values.  `DDDD` is a timestamp indicating when the object was created; this is used to disambiguate a series of empty snapshots, or to help a snapshot policy engine understand which snapshots should be kept or discarded.\n\n## Branching\n\nA user may request a new branch from the cloud user interface. There is a sequence of things that needs to happen:\n- If the branch is supposed to be based on the latest contents, the pageserver should perform an immediate snapshot. This is the parent snapshot for the new branch.\n- Cloud software should create the new branch, by generating a new (random) unique branch identifier, and creating a placeholder snapshot object.\n    - The placeholder object is an empty snapshot containing only metadata (which anchors it to the right parent history) and no pages.\n    - The placeholder can be discarded when the first snapshot (containing data) is completed. Discarding is equivalent to squashing, when the snapshot contains no data.\n- If the branch needs to be started immediately, a pageserver should be notified that it needs to start servicing the branch. This may not be the same pageserver that services the parent branch, though the common history may make it the best choice.\n\nSome of these steps could be combined into the pageserver, but that process would not be possible under all cases (e.g. if no pageserver is currently running, or if the branch is based on an older snapshot, or if a different pageserver will be serving the new branch). Regardless of which software drives the process, the result should look the same.\n\n## Long-term file format\n\nSnapshot files (and any other object stored in s3) must be readable by future software versions.\n\nIt should be possible to build multiple tools (in addition to the pageserver) that can read and write this file format-- for example, to allow cloud snapshot management.\n\nFiles should contain the following metadata, in addition to the set of pages:\n- The version of the file format.\n- A unique identifier for this branch (should be worldwide-unique and unchanging).\n- Optionally, any human-readable names assigned to this branch (for management UI/debugging/logging).\n- For incremental snapshots, the identifier of the predecessor snapshot. For new branches, this will be the parent snapshot (the point at which history diverges).\n- The location of the predecessor branch snapshot, if different from this branch’s location.\n- The LSN range `(parent, latest]` for this snapshot. For complete snapshots, the parent LSN can be 0.\n- The UTC timestamp of the snapshot creation (which may be different from the time of its highest LSN, if the database is idle).\n- A SHA2 checksum over the entire file (excluding the checksum itself), to preserve file integrity.\n\nA file may contain no pages, and an empty LSN range (probably `(latest, latest]`?), which serves as a placeholder for either a newly-created branch, or a snapshot of an idle database.\n\nAny human-readable names stored in the file may fall out of date if database/branch renames are allowed; there may need to be a cloud metadata service to query (current name -> unique identifier). We may choose instead to not store human-readable names in the database, or treat them as debugging information only.\n\n## S3 semantics, and other kinds of storage\n\nFor development and testing, it may be easier to use other kinds of storage in place of s3. For example, a directory full of files can substitute for an s3 bucket with multiple objects. This mode is expected to match the s3 semantics (e.g. don’t edit existing files or use symlinks). Unit tests may omit files entirely and use an in-memory mock bucket.\n\nSome users may want to use a local or network filesystem in place of s3. This isn’t prohibited but it’s not a priority, either.\n\nAlternate implementations of s3 should be supported, including Google Cloud Storage.\n\nAzure Blob Storage should be supported. We assume (without evidence) that it’s semantically equivalent to s3 for this purpose.\n\nThe properties of s3 that we depend on are:\nlist objects\nstreaming read of entire object\nread byte range from object\nstreaming write new object (may use multipart upload for better reliability)\ndelete object (that should not disrupt an already-started read).\n\nUploaded files, restored backups, or s3 buckets controlled by users could contain malicious content. We should always validate that objects contain the content they’re supposed to. Incorrect, Corrupt or malicious-looking contents should cause software (cloud tools, pageserver) to fail gracefully.\n\n## Notes\n\nPossible simplifications, for a first draft implementation:\n- Assume that dirty pages fit in pageserver RAM. Can use kernel virtual memory to page out to disk if needed. Can improve this later.\n- Don’t worry about the details of the squashing process yet.\n- Don’t implement cloud metadata service; try to make everything work using basic s3 list-objects and reads.\n- Don’t implement rename, delete at first.\n- Don’t implement public/private, just use s3 permissions.\n- Don’t worry about sharing history yet-- each user has their own bucket and a full copy of all data.\n- Don’t worry about history that spans multiple buckets.\n- Don’t worry about s3 regions.\n- Don’t support user-writeable s3 buckets; users get only read-only access at most.\n\nOpen questions:\n- How important is point-in-time recovery? When should we add this? How should it work?\n- Should snapshot files use compression?\n- Should we use snapshots for async replication? A spare pageserver could stay mostly warmed up by consuming snapshots as they’re created.\n- Should manual snapshots, or snapshots triggered by branch creation, be named differently from snapshots that are triggered by a snapshot policy?\n- When a new branch is created, should it always be served by the same pageserver that owns its parent branch? When should we start a new pageserver?\n- How can pageserver software upgrade be done with minimal downtime?\n"
  },
  {
    "path": "docs/rfcs/010-storage_details.md",
    "content": "# Storage details\n\nHere I tried to describe the current state of thinking about our storage subsystem as I understand it. Feel free to correct me. Also, I tried to address items from Heikki's TODO and be specific on some of the details.\n\n## Overview\n\n![storage](images/storage.jpeg)\n\n### MemStore\n\nMemStore holds the data between `latest_snapshot_lsn` and `latest_lsn`. It consists of PageIndex that holds references to WAL records or pages, PageStore that stores recently materialized pages, and WalStore that stores recently received WAL.\n\n### PageIndex\n\nPageIndex is an ordered collection that maps `(BufferTag, LSN)` to one of the following references (by reference I mean some information that is needed to access that data, e.g. file_id and offset):\n\n* PageStoreRef -- page offset in the PageStore\n* LocalStoreRef -- snapshot_id and page offset inside of that snapshot\n* WalStoreRef -- offset (and size optionally) of WalRecord in WalStore\n\nPageIndex holds information about all the pages in all incremental snapshots and in the latest full snapshot. If we aren't using page compression inside snapshots we actually can avoid storing references to the full snapshot and calculate page offsets based on relation sizes metadata in the full snapshot (assuming that full snapshot stores pages sorted by page number). However, I would suggest embracing page compression from the beginning and treat all pages as variable-sized.\n\nWe assume that PageIndex is few orders of magnitude smaller than addressed data hence it should fit memory. We also don't care about crash tolerance as we can rebuild it from snapshots metadata and WAL records from WalStore or/and Safekeeper.\n\n### WalStore\n\nWalStore is a queue of recent WalRecords. I imagine that we can store recent WAL the same way as Postgres does -- as 16MB files on disk. On top of that, we can add some fixed-size cache that would keep some amount of segments in memory.\n\nFor now, we may rely on the Safekeeper to safely store that recent WAL. But generally, I think we can pack all S3 operations into the page server so that it would be also responsible for the recent WAL pushdown to S3 (and Safekeeper may just delete WAL that was confirmed as S3-durable by the page server).\n\n### PageStore\n\nPageStore is storage for recently materialized pages (or in other words cache of getPage results). It is also can be implemented as a file-based queue with some memory cache on top of it.\n\nThere are few possible options for PageStore:\n\na) we just add all recently materialized pages there (so several versions of the same page can be stored there) -- that is more or less how it happens now with the current RocksDB implementation.\n\nb) overwrite older pages with the newer pages -- if there is no replica we probably don't need older pages. During page overwrite, we would also need to change PageStoreRef back to WalStoreRef in PageIndex.\n\nI imagine that newly created pages would just be added to the back of PageStore (again in queue-like fashion) and this way there wouldn't be any meaningful ordering inside of that queue. When we are forming a new incremental snapshot we may prohibit any updates to the current set of pages in PageStore (giving up on single page version rule) and cut off that whole set when snapshot creation is complete.\n\nWith option b) we can also treat PageStor as an uncompleted incremental snapshot.\n\n### LocalStore\n\nLocalStore keeps the latest full snapshot and set of incremental snapshots on top of it. We add new snapshots when the number of changed pages grows bigger than a certain threshold.\n\n## Granularity\n\nBy granularity, I mean a set of pages that goes into a certain full snapshot. Following things should be taken into account:\n\n* can we shard big databases between page servers?\n* how much time will we spend applying WAL to access certain pages with older LSN's?\n* how many files do we create for a single database?\n\nI can think of the following options here:\n\n1. whole database goes to one full snapshot.\n    * +: we never create a lot of files for one database\n    * +: the approach is quite straightforward, moving data around is simple\n    * -: can not be sharded\n    * -: long recovery -- we always need to recover the whole database\n2. table segment is the unit of snapshotting\n    * +: straightforward for sharding\n    * +: individual segment can be quickly recovered with sliced WAL\n    * -: full snapshot can be really small (e.g. when the corresponding segment consists of a single page) and we can blow amount of files. Then we would spend eternity in directory scans and the amount of metadata for sharding can be also quite big.\n3. range-partitioned snapshots -- snapshot includes all pages between [BuffTagLo, BuffTagHi] mixing different relations, databases, and potentially clusters (albeit from one tenant only). When full snapshot outgrows a certain limit (could be also a few gigabytes) we split the snapshot in two during the next full snapshot write. That approach would also require pages sorted by BuffTag inside our snapshots.\n    * +: addresses all mentioned issues\n    * -: harder to implement\n\nI think it is okay to start with table segments granularity and just check how we will perform in cases of lots of small tables and check is there any way besides c) to deal with it.\n\nBoth PageStore and WalStore should be \"sharded\" by this granularity level.\n\n## Security\n\nWe can generate different IAM keys for each tenant and potentially share them with users (in read-only mode?) or even allow users to provide their S3 buckets credentials.\n\nAlso, S3 backups are usually encrypted by per-tenant privates keys. I'm not sure in what threat model such encryption would improve something (taking into account per-tenant IAM keys), but it seems that everybody is doing that (both AMZN and YNDX). Most likely that comes as a requirement about \"cold backups\" by some certification procedure.\n\n## Dynamics\n\n### WAL stream handling\n\nWhen a new WAL record is received we need to parse BufferTags in that record and insert them in PageIndex with WalStoreRef as a value.\n\n### getPage queries\n\nLook up the page in PageIndex. If the value is a page reference then just respond with that page. If the referenced value is WAL record then find the most recent page with the same BuffTag (that is why we need ordering in PageIndex); recover it by applying WAL records; save it in PageStore; respond with that page.\n\n### Starting page server without local data\n\n* build set of latest full snapshots and incremental snapshots on top of them\n* load all their metadata into PageIndex\n* Safekeeper should connect soon and we can ask for a WAL stream starting from the latest incremental snapshot\n* for databases that are connected to us through the Safekeeper we can start loading the set of the latest snapshots or we can do that lazily based on getPage request (I'd better avoid doing that lazily for now without some access stats from the previous run and just transfer all data for active database from S3 to LocalStore).\n\n### Starting page server with local data (aka restart or reboot)\n\n* check that local snapshot files are consistent with S3\n\n### Snapshot creation\n\nTrack size of future snapshots based on info in MemStore and when it exceeds some threshold (taking into account our granularity level) create a new incremental snapshot. Always emit incremental snapshots from MemStore.\n\nTo create a new snapshot we need to walk through WalStore to get the list of all changed pages, sort it, and get the latest versions of that pages from PageStore or by WAL replay. It makes sense to maintain that set in memory while we are receiving the WAL stream to avoid parsing WAL during snapshot creation.\n\nFull snapshot creation can be done by GC (or we can call that entity differently -- e.g. merger?) by merging the previous full snapshot with several incremental snapshots.\n\n### S3 pushdown\n\nWhen we have several full snapshots GC can push the old one with its increments to S3.\n\n### Branch creation\n\nCreate a new timeline and replay sliced WAL up to a requested point. When the page is not in PageIndex ask the parent timeline about a page. Relation sizes are tricky.\n\n## File formats\n\nAs far as I understand Bookfile/Aversion addresses versioning and serialization parts.\n\nAs for exact data that should go to snapshots I think it is the following for each snapshot:\n\n* format version number\n* set of key/values to interpret content (e.g. is page compression enabled, is that a full or incremental snapshot, previous snapshot id, is there WAL at the end on file, etc) -- it is up to a reader to decide what to do if some keys are missing or some unknown key are present. If we add something backward compatible to the file we can keep the version number.\n* array of [BuffTag, corresponding offset in file] for pages -- IIUC that is analogous to ToC in Bookfile\n* array of [(BuffTag, LSN), corresponding offset in file] for the WAL records\n* pages, one by one\n* WAL records, one by one\n\nIt is also important to be able to load metadata quickly since it would be one of the main factors impacting the time of page server start. E.g. if would store/cache about 10TB of data per page server, the size of uncompressed page references would be about 30GB (10TB / ( 8192 bytes page size / ( ~18 bytes per ObjectTag + 8 bytes offset in the file))).\n\n1) Since our ToC/array of entries can be sorted by ObjectTag we can store the whole BufferTag only when relation_id is changed and store only delta-encoded offsets for a given relation. That would reduce the average per-page metadata size to something less than 4 bytes instead of 26 (assuming that pages would follow the same order and offset deltas would be small).\n2) It makes sense to keep ToC at the beginning of the file to avoid extra seeks to locate it. Doesn't matter too much with the local files but matters on S3 -- if we are accessing a lot of ~1Gb files with the size of metadata ~ 1Mb then the time to transfer this metadata would be comparable with access latency itself (which is about a half of a second). So by slurping metadata with one read of file header instead of N reads we can improve the speed of page server start by this N factor.\n\nI think both of that optimizations can be done later, but that is something to keep in mind when we are designing our storage serialization routines.\n\nAlso, there were some discussions about how to embed WAL in incremental snapshots. So far following ideas were mentioned:\n1. snapshot lsn=200, includes WAL in range 200-300\n2. snapshot lsn=200, includes WAL in range 100-200\n3. data snapshots are separated from WAL snapshots\n\nBoth options 2 and 3 look good. I'm inclined towards option 3 as it would allow us to apply different S3 pushdown strategies for data and WAL files (e.g. we may keep data snapshot until the next full snapshot, but we may push WAL snapshot to S3 just when they appeared if there are no replicas).\n"
  },
  {
    "path": "docs/rfcs/011-retention-policy.md",
    "content": "# User-visible timeline history\n\nThe user can specify a retention policy. The retention policy is\npresented to the user as a PITR period and snapshots. The PITR period\nis the amount of recent history that needs to be retained, as minutes,\nhours, or days. Within that period, you can create a branch or\nsnapshot at any point in time, open a compute node, and start running\nqueries. Internally, a PITR period is represented as a range of LSNs\n\nThe user can also create snapshots. A snapshot is a point in time,\ninternally represented by an LSN. The user gives the snapshot a name.\n\nThe user can also specify an interval, at which the system creates\nsnapshots automatically. For example, create a snapshot every night at\n2 AM. After some user-specified time, old automatically created\nsnapshots are removed.\n\n                     Snapshot       Snapshot\n         PITR        \"Monday\"       \"Tuesday\"        PITR\n    ----######----------+-------------+-------------######>\n\nIf there are multiple branches, you can specify different policies or\ndifferent branches.\n\nThe PITR period and user-visible snapshots together define the\nretention policy.\n\nNOTE: As presented here, this is probably overly flexible. In reality,\nwe want to keep the user interface simple. Only allow a PITR period at\nthe tip of a branch, for example. But that doesn't make much\ndifference to the internals.\n\n\n# Retention policy behind the scenes\n\nThe retention policy consists of points (for snapshots) and ranges\n(for PITR periods).\n\nThe system must be able to reconstruct any page within the retention\npolicy. Other page versions can be garbage collected away. We have a\nlot of flexibility on when to perform the garbage collection and how\naggressive it is.\n\n\n# Base images and WAL slices\n\nThe page versions are stored in two kinds of files: base images and\nWAL slices. A base image contains a dump of all the pages of one\nrelation at a specific LSN. A WAL slice contains all the WAL in an LSN\nrange.\n\n\n    |\n    |\n    |\n    | --Base img @100   +\n    |                   |\n    |                   | WAL slice\n    |                   | 100-200\n    |                   |\n    | --Base img @200   +\n    |                   |\n    |                   | WAL slice\n    |                   | 200-300\n    |                   |\n    |                   +\n    |\n    V\n\n\nTo recover a page e.g. at LSN 150, you need the base image at LSN 100,\nand the WAL slice 100-200.\n\nAll of this works at a per-relation or per-relation-segment basis. If\na relation is updated very frequently, we create base images and WAL\nslices for it more quickly. For a relation that's updated\ninfrequently, we hold the recent WAL for that relation longer, and\nonly write it out when we need to release the disk space occupied by\nthe original WAL. (We need a backstop like that, because until all the\nWAL/base images have been been durably copied to S3, we must keep the\noriginal WAL for that period somewhere, in the WAL service or in S3.)\n\n\n# Branching\n\nInternally, branch points are also \"retention points\", in addition to\nthe user-visible snapshots. If a branch has been forked off at LSN\n100, we need to be able to reconstruct any page on the parent branch\nat that LSN, because it is needed by the child branch. If a page is\nmodified in the child, we don't need to keep that in the parent\nanymore, though.\n"
  },
  {
    "path": "docs/rfcs/012-background-tasks.md",
    "content": "# Eviction\n\n Write out in-memory layer to disk, into a delta layer.\n\n- To release memory\n- To make it possible to advance disk_consistent_lsn and allow the WAL\n  service to release some WAL.\n\n- Triggered if we are short on memory\n- Or if the oldest in-memory layer is so old that it's holding back\n  the WAL service from removing old WAL\n\n# Materialization\n\nCreate a new image layer of a segment, by performing WAL redo\n\n- To reduce the amount of WAL that needs to be replayed on a GetPage request.\n- To allow garbage collection of old layers\n\n- Triggered by distance to last full image of a page\n\n# Coalescing\n\nReplace N consecutive layers of a segment with one larger layer.\n\n- To reduce the number of small files that needs to be uploaded to S3\n\n\n# Bundling\n\nZip together multiple small files belonging to different segments.\n\n- To reduce the number of small files that needs to be uploaded to S3\n\n\n# Garbage collection\n\nRemove a layer that's older than the GC horizon, and isn't needed anymore.\n"
  },
  {
    "path": "docs/rfcs/013-term-history.md",
    "content": "# What\n\nCurrently, apart from WAL safekeeper persistently stores only two logical clock\ncounter (aka term) values, sourced from the same sequence. The first is bumped\nwhenever safekeeper gives vote to proposer (or acknowledges already elected one)\nand e.g. prevents electing two proposers with the same term -- it is actually\ncalled `term` in the code. The second, called `epoch`, reflects progress of log\nreceival and this might lag behind `term`; safekeeper switches to epoch `n` when\nit has received all committed log records from all `< n` terms. This roughly\ncorresponds to proposed in\n\nhttps://github.com/neondatabase/rfcs/pull/3/files\n\n\nThis makes our biggest our difference from Raft. In Raft, every log record is\nstamped with term in which it was generated; while we essentially store in\n`epoch` only the term of the highest record on this safekeeper -- when we know\nit -- because during recovery generally we don't, and `epoch` is bumped directly\nto the term of the proposer who performs the recovery when it is finished. It is\nnot immediately obvious that this simplification is safe. I thought and I still\nthink it is; model checking confirmed that. However, some details now make me\nbelieve it is better to keep full term switching history (which is equivalent to\nknowing term of each record).\n\n# Why\n\nWithout knowing full history (list of <term, LSN> pairs) of terms it is hard to\ndetermine the exact divergence point, and if we don't perform truncation at that\npoint safety becomes questionable. Consider the following history, with\nsafekeepers A, B, C, D, E. n_m means record created by proposer in term n with\nLSN m; (t=x, e=y) means safekeeper currently has term x and epoch y.\n\n1) P1 in term 1 writes 1.1 everywhere, which is committed, and some more only\non A.\n\n<pre>\nA(t=1, e=1) 1.1 1.2 1.3 1.4\nB(t=1, e=1) 1.1\nC(t=1, e=1) 1.1\nD(t=1, e=1) 1.1\nE(t=1, e=1) 1.1\n</pre>\n\n2) P2 is elected by CDE in term 2, epochStartLsn is 2, and writes 2.2, 2.3 on CD:\n\n<pre>\nA(t=1, e=1) 1.1 1.2 1.3 1.4\nB(t=1, e=1) 1.1\nC(t=2, e=2) 1.1 2.2 2.3\nD(t=2, e=2) 1.1 2.2 2.3\nE(t=2, e=1) 1.1\n</pre>\n\n\n3) P3 is elected by CDE in term 3, epochStartLsn is 4, and writes 3.4 on D:\n\n<pre>\nA(t=1, e=1) 1.1 1.2 1.3 1.4\nB(t=1, e=1) 1.1\nC(t=3, e=2) 1.1 2.2 2.3\nD(t=3, e=3) 1.1 2.2 2.3 3.4\nE(t=3, e=1) 1.1\n</pre>\n\n\nNow, A gets back and P3 starts recovering it. How it should proceed? There are\ntwo options.\n\n## Don't try to find divergence point at all\n\n...start sending WAL conservatively since the horizon (1.1), and truncate\nobsolete part of WAL only when recovery is finished, i.e. epochStartLsn (4) is\nreached, i.e. 2.3 transferred -- that's what https://github.com/neondatabase/neon/pull/505 proposes.\n\nThen the following is possible:\n\n4) P3 moves one record 2.2 to A.\n\n<pre>\nA(t=1, e=1) 1.1 <b>2.2</b> 1.3 1.4\nB(t=1, e=1) 1.1 1.2\nC(t=3, e=2) 1.1 2.2 2.3\nD(t=3, e=3) 1.1 2.2 2.3 3.4\nE(t=3, e=1) 1.1\n</pre>\n\nNow log of A is basically corrupted. Moreover, since ABE are all in epoch 1 and\nA's log is the longest one, they can elect P4 who will commit such log.\n\nNote that this particular history couldn't happen if we forbid to *create* new\nrecords in term n until majority of safekeepers switch to it. It would force CDE\nto switch to 2 before 2.2 is created, and A could never become donor while his\nlog is corrupted. Generally with this additional barrier I believe the algorithm\nbecomes safe, but\n - I don't like this kind of artificial barrier;\n - I also feel somewhat discomfortable about even temporary having intentionally\n   corrupted WAL;\n - I'd still model check the idea.\n\n## Find divergence point and truncate at it\n\nThen step 4 would delete 1.3 1.4 on A, and we are ok. The question is, how do we\ndo that? Without term switching history we have to resort to sending again since\nthe horizon and memcmp'ing records, which is inefficient and ugly. Or we can\nmaintain full history and determine truncation point by comparing 'wrong' and\n'right' histories -- much like pg_rewind does -- and perform truncation + start\nstreaming right there.\n\n# Proposal\n\n- Add term history as array of <term, LSN> pairs to safekeeper controlfile.\n- Return it to proposer with VoteResponse so 1) proposer can tell it to other\n  nodes and 2) determine personal streaming starting point. However, since we\n  don't append WAL and update controlfile atomically, let's first always update\n  controlfile but send only the history of what we really have (up to highest\n  term in history where begin_lsn >= end of wal; this highest term replaces\n  current `epoch`). We also send end of wal as we do now to determine the donor.\n- Create ProposerAnnouncement message which proposer sends before starting\n  streaming. It announces proposer as elected and\n  1) Truncates wrong part of WAL on safekeeper\n     (divergence point is already calculated at proposer, but can be\n     cross-verified here).\n  2) Communicates the 'right' history of its term (taken from donor). Seems\n     better to immediately put the history in the controlfile,\n\t though safekeeper might not have full WAL for previous terms in it --\n\t this way is simpler, and we can't update WAL and controlfile atomically anyway.\n\n\t This also constitutes analogue of current epoch bump for those safekeepers\n     which don't need recovery, which is important for sync-safekeepers (bump\n     epoch without waiting records from new term).\n- After ProposerAnnouncement proposer streams WAL since calculated starting\n  point -- only what is missing.\n\n\npros/cons:\n+ (more) clear safety of WAL truncation -- we get very close to Raft\n+ no unnecessary data sending (faster recovery for not-oldest-safekeepers, matters\n   only for 5+ nodes)\n+ adds some observability at safekeepers\n\n- complexity, but not that much\n\n\n# Misc\n\n- During model checking I did truncation on first locally non existent or\n  different record -- analogue of 'memcmp' variant described above.\n"
  },
  {
    "path": "docs/rfcs/014-safekeepers-gossip.md",
    "content": "# Safekeeper gossip\n\nExtracted from this [PR](https://github.com/neondatabase/rfcs/pull/13)\n\n## Motivation\n\nIn some situations, safekeeper (SK) needs coordination with other SK's that serve the same tenant:\n\n1. WAL deletion. SK needs to know what WAL was already safely replicated to delete it. Now we keep WAL indefinitely.\n2. Deciding on who is sending WAL to the pageserver. Now sending SK crash may lead to a livelock where nobody sends WAL to the pageserver.\n3. To enable SK to SK direct recovery without involving the compute\n\n## Summary\n\nCompute node has connection strings to each safekeeper. During each compute->safekeeper connection establishment, the compute node should pass down all that connection strings to each safekeeper. With that info, safekeepers may establish Postgres connections to each other and periodically send ping messages with LSN payload.\n\n## Components\n\nsafekeeper, compute, compute<->safekeeper protocol, possibly console (group SK addresses)\n\n## Proposed implementation\n\nEach safekeeper can periodically ping all its peers and share connectivity and liveness info. If the ping was not receiver for, let's say, four ping periods, we may consider sending safekeeper as dead. That would mean some of the alive safekeepers should connect to the pageserver. One way to decide which one exactly: `make_connection = my_node_id == min(alive_nodes)`\n\nSince safekeepers are multi-tenant, we may establish either per-tenant physical connections or per-safekeeper ones. So it makes sense to group \"logical\" connections between corresponding tenants on different nodes into a single physical connection. That means that we should implement an interconnect thread that maintains physical connections and periodically broadcasts info about all tenants.\n\nRight now console may assign any 3 SK addresses to a given compute node. That may lead to a high number of gossip connections between SK's. Instead, we can assign safekeeper triples to the compute node. But if we want to \"break\"/\" change\" group by an ad-hoc action, we can do it.\n\n### Corner cases\n\n- Current safekeeper may be alive but may not have connectivity to the pageserver\n\n  To address that, we need to gossip visibility info. Based on that info, we may define SK as alive only when it can connect to the pageserver.\n\n- Current safekeeper may be alive but may not have connectivity with the compute node.\n\n  We may broadcast last_received_lsn and presence of compute connection and decide who is alive based on that.\n\n- It is tricky to decide when to shut down gossip connections because we need to be sure that pageserver got all the committed (in the distributed sense, so local SK info is not enough) records, and it may never lose them. It is not a strict requirement since `--sync-safekeepers` that happen before the compute start will allow the pageserver to consume missing WAL, but it is better to do that in the background. So the condition may look like that: `majority_max(flush_lsn) == pageserver_s3_lsn` Here we rely on the two facts:\n    - that `--sync-safekeepers` happened after the compute shutdown, and it advanced local commit_lsn's allowing pageserver to consume that WAL.\n\n    - we wait for the `pageserver_s3_lsn` advancement to avoid pageserver's last_received_lsn/disk_consistent_lsn going backward due to the disk/hardware failure and subsequent S3 recovery\n\n    If those conditions are not met, we will have some gossip activity (but that may be okay).\n\n## Pros/cons\n\nPros:\n\n- distributed, does not introduce new services (like etcd), does not add console as a storage dependency\n- lays the foundation for gossip-based recovery\n\nCons:\n\n- Only compute knows a set of safekeepers, but they should communicate even without compute node. In case of safekeepers restart, we will lose that info and can't gossip anymore. Hence we can't trim some WAL tail until the compute node start. Also, it is ugly.\n\n- If the console assigns a random set of safekeepers to each Postgres, we may end up in a situation where each safekeeper needs to have a connection with all other safekeepers. We can group safekeepers into isolated triples in the console to avoid that. Then \"mixing\" would happen only if we do rebalancing.\n\n## Alternative implementation\n\nWe can have a selected node (e.g., console) with everybody reporting to it.\n\n## Security implications\n\nWe don't increase the attack surface here. Communication can happen in a private network that is not exposed to users.\n\n## Scalability implications\n\nThe only thing that may grow as we grow the number of computes is the number of gossip connections. But if we group safekeepers and assign a compute node to the random SK triple, the number of connections would be constant.\n"
  },
  {
    "path": "docs/rfcs/014-storage-lsm.md",
    "content": "# Why LSM trees?\n\nIn general, an LSM tree has the nice property that random updates are\nfast, but the disk writes are sequential. When a new file is created,\nit is immutable. New files are created and old ones are deleted, but\nexisting files are never modified. That fits well with storing the\nfiles on S3.\n\nCurrently, we create a lot of small files. That is mostly a problem\nwith S3, because each GET/PUT operation is expensive, and LIST\noperation only returns 1000 objects at a time, and isn't free\neither. Currently, the files are \"archived\" together into larger\ncheckpoint files before they're uploaded to S3 to alleviate that\nproblem, but garbage collecting data from the archive files would be\ndifficult and we have not implemented it. This proposal addresses that\nproblem.\n\n\n# Overview\n\n\n```\n^ LSN\n|\n|      Memtable:     +-----------------------------+\n|                    |                             |\n|                    +-----------------------------+\n|\n|\n|            L0:     +-----------------------------+\n|                    |                             |\n|                    +-----------------------------+\n|\n|                    +-----------------------------+\n|                    |                             |\n|                    +-----------------------------+\n|\n|                    +-----------------------------+\n|                    |                             |\n|                    +-----------------------------+\n|\n|                    +-----------------------------+\n|                    |                             |\n|                    +-----------------------------+\n|\n|\n|           L1:      +-------+ +-----+ +--+  +-+\n|                    |       | |     | |  |  | |\n|                    |       | |     | |  |  | |\n|                    +-------+ +-----+ +--+  +-+\n|\n|                       +----+ +-----+ +--+  +----+\n|                       |    | |     | |  |  |    |\n|                       |    | |     | |  |  |    |\n|                       +----+ +-----+ +--+  +----+\n|\n+--------------------------------------------------------------> Page ID\n\n\n+---+\n|   |   Layer file\n+---+\n```\n\n\n# Memtable\n\nWhen new WAL arrives, it is first put into the Memtable. Despite the\nname, the Memtable is not a purely in-memory data structure. It can\nspill to a temporary file on disk if the system is low on memory, and\nis accessed through a buffer cache.\n\nIf the page server crashes, the Memtable is lost. It is rebuilt by\nprocessing again the WAL that's newer than the latest layer in L0.\n\nThe size of the Memtable is configured by the \"checkpoint distance\"\nsetting. Because anything that hasn't been flushed to disk and\nuploaded to S3 yet needs to be kept in the safekeeper, the \"checkpoint\ndistance\" also determines the amount of WAL that needs to kept in the\nsafekeeper.\n\n# L0\n\nWhen the Memtable fills up, it is written out to a new file in L0. The\nfiles are immutable; when a file is created, it is never\nmodified. Each file in L0 is roughly 1 GB in size (*). Like the\nMemtable, each file in L0 covers the whole key range.\n\nWhen enough files have been accumulated in L0, compaction\nstarts. Compaction processes all the files in L0 and reshuffles the\ndata to create a new set of files in L1.\n\n\n(*) except in corner cases like if we want to shut down the page\nserver and want to flush out the memtable to disk even though it's not\nfull yet.\n\n\n# L1\n\nL1 consists of ~ 1 GB files like L0. But each file covers only part of\nthe overall key space, and a larger range of LSNs. This speeds up\nsearches. When you're looking for a given page, you need to check all\nthe files in L0, to see if they contain a page version for the requested\npage. But in L1, you only need to check the files whose key range covers\nthe requested page. This is particularly important at cold start, when\nchecking a file means downloading it from S3.\n\nPartitioning by key range also helps with garbage collection. If only a\npart of the database is updated, we will accumulate more files for\nthe hot part in L1, and old files can be removed without affecting the\ncold part.\n\n\n# Image layers\n\nSo far, we've only talked about delta layers. In addition to the delta\nlayers, we create image layers, when \"enough\" WAL has been accumulated\nfor some part of the database. Each image layer covers a 1 GB range of\nkey space. It contains images of the pages at a single LSN, a snapshot\nif you will.\n\nThe exact heuristic for what \"enough\" means is not clear yet. Maybe\ncreate a new image layer when 10 GB of WAL has been accumulated for a\n1 GB segment.\n\nThe image layers limit the number of layers that a search needs to\ncheck. That put a cap on read latency, and it also allows garbage\ncollecting layers that are older than the GC horizon.\n\n\n# Partitioning scheme\n\nWhen compaction happens and creates a new set of files in L1, how do\nwe partition the data into the files?\n\n- Goal is that each file is ~ 1 GB in size\n- Try to match partition boundaries at relation boundaries. (See [1]\n  for how PebblesDB does this, and for why that's important)\n- Greedy algorithm\n\n# Additional Reading\n\n[1] Paper on PebblesDB and how it does partitioning.\nhttps://www.cs.utexas.edu/~rak/papers/sosp17-pebblesdb.pdf\n"
  },
  {
    "path": "docs/rfcs/015-storage-messaging.md",
    "content": "# Storage messaging\n\nCreated on 19.01.22\n\nInitially created [here](https://github.com/neondatabase/rfcs/pull/16) by @kelvich.\n\nThat it is an alternative to (014-safekeeper-gossip)[]\n\n## Motivation\n\nAs in 014-safekeeper-gossip we need to solve the following problems:\n\n* Trim WAL on safekeepers\n* Decide on which SK should push WAL to the S3\n* Decide on which SK should forward WAL to the pageserver\n* Decide on when to shut down SK<->pageserver connection\n\nThis RFC suggests a more generic and hopefully more manageable way to address those problems. However, unlike 014-safekeeper-gossip, it does not bring us any closer to safekeeper-to-safekeeper recovery but rather unties two sets of different issues we previously wanted to solve with gossip.\n\nAlso, with this approach, we would not need \"call me maybe\" anymore, and the pageserver will have all the data required to understand that it needs to reconnect to another safekeeper.\n\n## Summary\n\nInstead of p2p gossip, let's have a centralized broker where all the storage nodes report per-timeline state. Each storage node should have a `--broker-url=1.2.3.4` CLI param.\n\nHere I propose two ways to do that. After a lot of arguing with myself, I'm leaning towards the etcd approach. My arguments for it are in the pros/cons section. Both options require adding a Grpc client in our codebase either directly or as an etcd dependency.\n\n## Non-goals\n\nThat RFC does *not* suggest moving the compute to pageserver and compute to safekeeper mappings out of the console. The console is still the only place in the cluster responsible for the persistency of that info. So I'm implying that each pageserver and safekeeper exactly knows what timelines he serves, as it currently is. We need some mechanism for a new pageserver to discover mapping info, but that is out of the scope of this RFC.\n\n## Impacted components\n\npageserver, safekeeper\nadds either etcd or console as a storage dependency\n\n## Possible implementation: custom message broker in the console\n\nWe've decided to go with an etcd approach instead of the message broker.\n\n<details closed>\n<summary>Original suggestion</summary>\n<br>\nWe can add a Grpc service in the console that acts as a message broker since the console knows the addresses of all the components. The broker can ignore the payload and only redirect messages. So, for example, each safekeeper may send a message to the peering safekeepers or to the pageserver responsible for a given timeline.\n\nMessage format could be `{sender, destination, payload}`.\n\nThe destination is either:\n1. `sk_#{tenant}_#{timeline}` -- to be broadcasted on all safekeepers, responsible for that timeline, or\n2. `pserver_#{tenant}_#{timeline}` -- to be broadcasted on all pageservers, responsible for that timeline\n\nSender is either:\n1. `sk_#{sk_id}`, or\n2. `pserver_#{pserver_id}`\n\nI can think of the following behavior to address our original problems:\n\n* WAL trimming\n  Each safekeeper periodically broadcasts `(write_lsn, commit_lsn)` to all peering (peering == responsible for that timeline) safekeepers\n\n* Decide on which SK should push WAL to the S3\n\n  Each safekeeper periodically broadcasts `i_am_alive_#{current_timestamp}` message to all peering safekeepers. That way, safekeepers may maintain the vector of alive peers (loose one, with false negatives). Alive safekeeper with the minimal id pushes data to S3.\n\n* Decide on which SK should forward WAL to the pageserver\n\n  Each safekeeper periodically sends (write_lsn, commit_lsn, compute_connected) to the relevant pageservers. With that info, pageserver can maintain a view of the safekeepers state, connect to a random one, and detect the moments (e.g., one the safekeepers is not making progress or down) when it needs to reconnect to another safekeeper. Pageserver should resolve exact IP addresses through the console, e.g., exchange `#sk_#{sk_id}` to `4.5.6.7:6400`.\n\n  Pageserver connection to the safekeeper triggered by the state change `compute_connected: false -> true`. With that, we don't need \"call me maybe\" anymore.\n\n  Also, we don't have a \"peer address amnesia\" problem as in the gossip approach (with gossip, after a simultaneous reboot, safekeepers wouldn't know each other addresses until the next compute connection).\n\n* Decide on when to shutdown sk<->pageserver connection\n\n  Again, pageserver would have all the info to understand when to shut down the safekeeper connection.\n\n### Scalability\n\nOne node is enough (c) No, seriously, it is enough.\n\n### High Availability\n\nBroker lives in the console, so we can rely on k8s maintaining the console app alive.\n\nIf the console is down, we won't trim WAL and reconnect the pageserver to another safekeeper. But, at the same, if the console is down, we already can't accept new compute connections and start stopped computes, so we are making things a bit worse, but not dramatically.\n\n### Interactions\n\n```\n         .________________.\nsk_1 <-> |                | <-> pserver_1\n...      | Console broker |     ...\nsk_n <-> |________________| <-> pserver_m\n```\n</details>\n\n\n## Implementation: etcd state store\n\nAlternatively, we can set up `etcd` and maintain the following data structure in it:\n\n```ruby\n\"compute_#{tenant}_#{timeline}\" => {\n    safekeepers => {\n        \"sk_#{sk_id}\" => {\n            write_lsn: \"0/AEDF130\",\n            commit_lsn: \"0/AEDF100\",\n            compute_connected: true,\n            last_updated: 1642621138,\n        },\n    }\n}\n```\n\nAs etcd doesn't support field updates in the nested objects that translates to the following set of keys:\n\n```ruby\n\"compute_#{tenant}_#{timeline}/safekeepers/sk_#{sk_id}/write_lsn\",\n\"compute_#{tenant}_#{timeline}/safekeepers/sk_#{sk_id}/commit_lsn\",\n...\n```\n\nEach storage node can subscribe to the relevant sets of keys and maintain a local view of that structure. So in terms of the data flow, everything is the same as in the previous approach. Still, we can avoid implementing the message broker and prevent runtime storage dependency on a console.\n\n### Safekeeper address discovery\n\nDuring the startup safekeeper should publish the address he is listening on as the part of `{\"sk_#{sk_id}\" => ip_address}`. Then the pageserver can resolve `sk_#{sk_id}` to the actual address. This way it would work both locally and in the cloud setup. Safekeeper should have `--advertised-address` CLI option so that we can listen on e.g. 0.0.0.0 but advertise something more useful.\n\n### Safekeeper behavior\n\nFor each timeline safekeeper periodically broadcasts `compute_#{tenant}_#{timeline}/safekeepers/sk_#{sk_id}/*` fields. It subscribes to changes of `compute_#{tenant}_#{timeline}` -- that way safekeeper will have an information about peering safekeepers.\nThat amount of information is enough to properly trim WAL. To decide on who is pushing the data to S3 safekeeper may use etcd leases or broadcast a timestamp and hence track who is alive.\n\n### Pageserver behavior\n\nPageserver subscribes to `compute_#{tenant}_#{timeline}` for each tenant it owns. With that info, pageserver can maintain a view of the safekeepers state, connect to a random one, and detect the moments (e.g., one the safekeepers is not making progress or down) when it needs to reconnect to another safekeeper. Pageserver should resolve exact IP addresses through the console, e.g., exchange `#sk_#{sk_id}` to `4.5.6.7:6400`.\n\nPageserver connection to the safekeeper can be triggered by the state change `compute_connected: false -> true`. With that, we don't need \"call me maybe\" anymore.\n\nAs an alternative to compute_connected, we can track timestamp of the latest message arrived to safekeeper from compute. Usually compute broadcasts KeepAlive to all safekeepers every second, so it'll be updated every second when connection is ok. Then the connection can be considered down when this timestamp isn't updated for a several seconds.\n\nThis will help to faster detect issues with safekeeper (and switch to another) in the following cases:\n\n      when compute failed but TCP connection stays alive until timeout (usually about a minute)\n      when safekeeper failed and didn't set compute_connected to false\n\nAnother way to deal with [2] is to process (write_lsn, commit_lsn, compute_connected) as a KeepAlive on the pageserver side and detect issues when sk_id don't send anything for some time. This way is fully compliant to this RFC.\n\nAlso, we don't have a \"peer address amnesia\" problem as in the gossip approach (with gossip, after a simultaneous reboot, safekeepers wouldn't know each other addresses until the next compute connection).\n\n### Interactions\n\n```\n         .________________.\nsk_1 <-> |                | <-> pserver_1\n...      |      etcd      |     ...\nsk_n <-> |________________| <-> pserver_m\n```\n\n### Sequence diagrams for different workflows\n\n#### Cluster startup\n\n```mermaid\nsequenceDiagram\n    autonumber\n    participant C as Compute\n    participant SK1\n    participant SK2\n    participant SK3\n    participant PS1\n    participant PS2\n    participant O as Orchestrator\n    participant M as Metadata Service\n\n    PS1->>M: subscribe to updates to state of timeline N\n    C->>+SK1: WAL push\n    loop constantly update current lsns\n        SK1->>-M: I'm at lsn A\n    end\n    C->>+SK2: WAL push\n    loop constantly update current lsns\n        SK2->>-M: I'm at lsn B\n    end\n    C->>+SK3: WAL push\n    loop constantly update current lsns\n        SK3->>-M: I'm at lsn C\n    end\n    loop request pages\n        C->>+PS1: get_page@lsn\n        PS1->>-C: page image\n    end\n    M->>PS1: New compute appeared for timeline N. SK1 at A, SK2 at B, SK3 at C\n    note over PS1: Say SK1 at A=200, SK2 at B=150 SK3 at C=100 <br> so connect to SK1 because it is the most up to date one\n    PS1->>SK1: start replication\n```\n\n#### Behaviour of services during typical operations\n\n```mermaid\nsequenceDiagram\n    autonumber\n    participant C as Compute\n    participant SK1\n    participant SK2\n    participant SK3\n    participant PS1\n    participant PS2\n    participant O as Orchestrator\n    participant M as Metadata Service\n\n    note over C,M: Scenario 1: Pageserver checkpoint\n    note over PS1: Upload data to S3\n    PS1->>M: Update remote consistent lsn\n    M->>SK1: propagate remote consistent lsn update\n    note over SK1: truncate WAL up to remote consistent lsn\n    M->>SK2: propagate remote consistent lsn update\n    note over SK2: truncate WAL up to remote consistent lsn\n    M->>SK3: propagate remote consistent lsn update\n    note over SK3: truncate WAL up to remote consistent lsn\n    note over C,M: Scenario 2: SK1 finds itself lagging behind MAX(150 (SK2), 200 (SK2)) - 100 (SK1) > THRESHOLD\n    SK1->>SK2: Fetch WAL delta between 100 (SK1) and 200 (SK2)\n    note over C,M: Scenario 3: PS1 detects that SK1 is lagging behind: Connection from SK1 is broken or there is no messages from it in 30 seconds.\n    note over PS1: e.g. SK2 is at 150, SK3 is at 100, chose SK2 as a new replication source\n    PS1->>SK2: start replication\n```\n\n#### Behaviour during timeline relocation\n\n```mermaid\nsequenceDiagram\n    autonumber\n    participant C as Compute\n    participant SK1\n    participant SK2\n    participant SK3\n    participant PS1\n    participant PS2\n    participant O as Orchestrator\n    participant M as Metadata Service\n\n    note over C,M: Timeline is being relocated from PS1 to PS2\n    O->>+PS2: Attach timeline\n    PS2->>-O: 202 Accepted if timeline exists in S3\n    note over PS2: Download timeline from S3\n     note over O: Poll for timeline download (or subscribe to metadata service)\n    loop wait for attach to complete\n        O->>PS2: timeline detail should answer that timeline is ready\n    end\n    PS2->>M: Register downloaded timeline\n    PS2->>M: Get safekeepers for timeline, subscribe to changes\n    PS2->>SK1: Start replication to catch up\n    note over O: PS2 caught up, time to switch compute\n    O->>C: Restart compute with new pageserver url in config\n    note over C: Wal push is restarted\n    loop request pages\n        C->>+PS2: get_page@lsn\n        PS2->>-C: page image\n    end\n    O->>PS1: detach timeline\n    note over C,M: Scenario 1: Attach call failed\n    O--xPS2: Attach timeline\n    note over O: The operation can be safely retried, <br> if we hit some threshold we can try another pageserver\n    note over C,M: Scenario 2: Attach succeeded but pageserver failed to download the data or start replication\n    loop wait for attach to complete\n        O--xPS2: timeline detail should answer that timeline is ready\n    end\n    note over O: Can wait for a timeout, and then try another pageserver <br> there should be a limit on number of different pageservers to try\n    note over C,M: Scenario 3: Detach fails\n    O--xPS1: Detach timeline\n    note over O: can be retried, if continues to fail might lead to data duplication in s3\n```\n\n# Pros/cons\n\n## Console broker/etcd vs gossip:\n\nGossip pros:\n* gossip allows running storage without the console or etcd\n\nConsole broker/etcd pros:\n* simpler\n* solves \"call me maybe\" as well\n* avoid possible N-to-N connection issues with gossip without grouping safekeepers in pre-defined triples\n\n## Console broker vs. etcd:\n\nInitially, I wanted to avoid etcd as a dependency mostly because I've seen how painful for Clickhouse was their ZooKeeper dependency: in each chat, at each conference, people were complaining about configuration and maintenance barriers with ZooKeeper. It was that bad that ClickHouse re-implemented ZooKeeper to embed it: https://clickhouse.com/docs/en/operations/clickhouse-keeper/.\n\nBut with an etcd we are in a bit different situation:\n\n1. We don't need persistency and strong consistency guarantees for the data we store in the etcd\n2. etcd uses Grpc as a protocol, and messages are pretty simple\n\nSo it looks like implementing in-mem store with etcd interface is straightforward thing _if we will want that in future_. At the same time, we can avoid implementing it right now, and we will be able to run local neon installation with etcd running somewhere in the background (as opposed to building and running console, which in turn requires Postgres).\n"
  },
  {
    "path": "docs/rfcs/016-connection-routing.md",
    "content": "# Dispatching a connection\n\nFor each client connection, Neon service needs to authenticate the\nconnection, and route it to the right PostgreSQL instance.\n\n## Authentication\n\nThere are three different ways to authenticate:\n\n- anonymous; no authentication needed\n- PostgreSQL authentication\n- github single sign-on using browser\n\nIn anonymous access, the user doesn't need to perform any\nauthentication at all. This can be used e.g. in interactive PostgreSQL\ndocumentation, allowing you to run the examples very quickly. Similar\nto sqlfiddle.com.\n\nPostgreSQL authentication works the same as always. All the different\nPostgreSQL authentication options like SCRAM, kerberos, etc. are\navailable. [1]\n\nThe third option is to authenticate with github single sign-on. When\nyou open the connection in psql, you get a link that you open with\nyour browser. Opening the link redirects you to github authentication,\nand lets the connection to proceed. This is also known as \"Link auth\" [2].\n\n\n## Routing the connection\n\nWhen a client starts a connection, it needs to be routed to the\ncorrect PostgreSQL instance. Routing can be done by the proxy, acting\nas a man-in-the-middle, or the connection can be routed at the network\nlevel based on the hostname or IP address.\n\nEither way, Neon needs to identify which PostgreSQL instance the\nconnection should be routed to. If the instance is not already\nrunning, it needs to be started. Some connections always require a new\nPostgreSQL instance to be created, e.g. if you want to run a one-off\nquery against a particular point-in-time.\n\nThe PostgreSQL instance is identified by:\n- Neon account (possibly anonymous)\n- cluster (known as tenant in the storage?)\n- branch or snapshot name\n- timestamp (PITR)\n- primary or read-replica\n- one-off read replica\n- one-off writeable branch\n\nWhen you are using regular PostgreSQL authentication or anonymous\naccess, the connection URL needs to contain all the information needed\nfor the routing. With github single sign-on, the browser is involved\nand some details - the Neon account in particular - can be deduced\nfrom the authentication exchange.\n\nThere are three methods for identifying the PostgreSQL instance:\n\n- Browser interaction (link auth)\n- Options in the connection URL and the domain name\n- A pre-defined endpoint, identified by domain name or IP address\n\n### Link Auth\n\n    postgres://<username>@start.neon.tech/<dbname>\n\nThis gives you a link that you open in browser. Clicking the link\nperforms github authentication, and the Neon account name is\nprovided to the proxy behind the scenes. The proxy routes the\nconnection to the primary PostgreSQL instance in cluster called\n\"main\", branch \"main\".\n\nFurther ideas:\n- You could pre-define a different target for link auth\n  connections in the UI.\n- You could have a drop-down in the browser, allowing you to connect\n  to any cluster you want. Link Auth can be like Teleport.\n\n### Connection URL\n\nThe connection URL looks like this:\n\n    postgres://<username>@<cluster-id>.db.neon.tech/<dbname>\n\nBy default, this connects you to the primary PostgreSQL instance\nrunning on the \"main\" branch in the named cluster [3]. However, you can\nchange that by specifying options in the connection URL. The following\noptions are supported:\n\n| option name  | Description                                                                                       | Examples                                            |\n| ---          | ---                                                                                               | ---                                                 |\n| cluster      | Cluster name                                                                                      | cluster:myproject                                   |\n| branch       | Branch name                                                                                       | branch:main                                         |\n| timestamp    | Connect to an instance at given point-in-time.                                                    | timestamp:2022-04-08 timestamp:2022-04-08T11:42:16Z |\n| lsn          | Connect to an instance at given LSN                                                               | lsn:0/12FF0420                                      |\n| read-replica | Connect to a read-replica. If the parameter is 'new', a new instance is created for this session. | read-replica read-replica:new                       |\n\nFor example, to read branch 'testing' as it was on Mar 31, 2022, you could\nspecify a timestamp in the connection URL [4]:\n\n    postgres://alice@cluster-1234.db.neon.tech/postgres?options=branch:testing,timestamp:2022-03-31\n\nConnecting with cluster name and options can be disabled in the UI. If\ndisabled, you can only connect using a pre-defined endpoint.\n\n### Pre-defined Endpoint\n\nInstead of providing the cluster name, branch, and all those options\nin the connection URL, you can define a named endpoint with the same\noptions.\n\nIn the UI, click \"create endpoint\". Fill in the details:\n\n- Cluster name\n- Branch\n- timestamp or LSN\n- is this for the primary or for a read replica\n- etc.\n\nWhen you click Finish, a named endpoint is created. You can now use the endpoint ID to connect:\n\n    postgres://<username>@<endpoint-id>.endpoint.neon.tech/<dbname>\n\n\nAn endpoint can be assigned a static or dynamic IP address, so that\nyou can connect to it with clients that don't support TLS SNI. Maybe\nbypass the proxy altogether, but that ought to be invisible to the\nuser.\n\nYou can limit the range of source IP addresses that are allowed to\nconnect to an endpoint. An endpoint can also be exposed in an Amazon\nVPC, allowing direct connections from applications.\n\n\n# Footnotes\n\n[1] I'm not sure how feasible it is to set up configure like Kerberos\nor LDAP in a cloud environment. But in principle I think we should\nallow customers to have the full power of PostgreSQL, including all\nauthentication options. However, it's up to the customer to configure\nit correctly.\n\n[2] Link is a way to both authenticate and to route the connection\n\n[3] This assumes that cluster-ids are globally unique, across all\nNeon accounts.\n\n[4] The syntax accepted in the connection URL is limited by libpq. The\nonly way to pass arbitrary options to the server (or our proxy) is\nwith the \"options\" keyword, and the options must be percent-encoded. I\nthink the above would work but i haven't tested it\n"
  },
  {
    "path": "docs/rfcs/017-console-split.md",
    "content": "# Splitting cloud console\n\nCreated on 17.06.2022\n\n## Summary\n\nCurrently we have `cloud` repository that contains code implementing public API for our clients as well as code for managing storage and internal infrastructure services. We can split everything user-related from everything storage-related to make it easier to test and maintain.\n\nThis RFC proposes to introduce a new control-plane service with HTTP API. The overall architecture will look like this:\n\n```markup\n.                    x\n       external area x internal area\n       (our clients) x (our services)\n                     x\n                     x                                                      ┌───────────────────────┐\n                     x ┌───────────────┐   >    ┌─────────────────────┐     │      Storage (EC2)    │\n                     x │  console db   │   >    │  control-plane db   │     │                       │\n                     x └───────────────┘   >    └─────────────────────┘     │ - safekeepers         │\n                     x         ▲           >               ▲                │ - pageservers         │\n                     x         │           >               │                │                       │\n┌──────────────────┐ x ┌───────┴───────┐   >               │                │     Dependencies      │\n│    browser UI    ├──►│               │   >    ┌──────────┴──────────┐     │                       │\n└──────────────────┘ x │               │   >    │                     │     │ - etcd                │\n                     x │    console    ├───────►│    control-plane    ├────►│ - S3                  │\n┌──────────────────┐ x │               │   >    │  (deployed in k8s)  │     │ - more?               │\n│public API clients├──►│               │   >    │                     │     │                       │\n└──────────────────┘ x └───────┬───────┘   >    └──────────┬──────────┘     └───────────────────────┘\n                     x         │           >          ▲    │                            ▲\n                     x         │           >          │    │                            │\n                     x ┌───────┴───────┐   >          │    │                ┌───────────┴───────────┐\n                     x │ dependencies  │   >          │    │                │                       │\n                     x │- analytics    │   >          │    └───────────────►│       computes        │\n                     x │- auth         │   >          │                     │   (deployed in k8s)   │\n                     x │- billing      │   >          │                     │                       │\n                     x └───────────────┘   >          │                     └───────────────────────┘\n                     x                     >          │                                 ▲\n                     x                     >    ┌─────┴───────────────┐                 │\n┌──────────────────┐ x                     >    │                     │                 │\n│                  │ x                     >    │        proxy        ├─────────────────┘\n│     postgres     ├───────────────────────────►│  (deployed in k8s)  │\n│      users       │ x                     >    │                     │\n│                  │ x                     >    └─────────────────────┘\n└──────────────────┘ x                     >\n                                           >\n                                           >\n                             closed-source > open-source\n                                           >\n                                           >\n```\n\nNotes:\n\n- diagram is simplified in the less-important places\n- directed arrows are strict and mean that connections in the reverse direction are forbidden\n\nThis split is quite complex and this RFC proposes several smaller steps to achieve the larger goal: \n\n1. Start by refactoring the console code, the goal is to have console and control-plane code in the different directories without dependencies on each other.\n2. Do similar refactoring for tables in the console database, remove queries selecting data from both console and control-plane; move control-plane tables to a separate database.\n3. Implement control-plane HTTP API serving on a separate TCP port; make all console→control-plane calls to go through that HTTP API.\n4. Move control-plane source code to the neon repo; start control-plane as a separate service.\n\n## Motivation\n\nThese are the two most important problems we want to solve:\n\n- Publish open-source implementation of all our cloud/storage features\n- Make a unified control-plane that is used in all cloud (serverless) and local (tests) setups\n\nRight now we have some closed-source code in the cloud repo. That code contains implementation for running Neon computes in k8s and without that code it’s impossible to automatically scale PostgreSQL computes. That means that we don’t have an open-source serverless PostgreSQL at the moment.\n\nAfter splitting and open-sourcing control-plane service we will have source code and Docker images for all storage services. That control-plane service should have HTTP API for creating and managing tenants (including all our storage features), while proxy will listen for incoming connections and create computes on-demand.\n\nImproving our test suite is an important task, but requires a lot of prerequisites and may require a separate RFC. Possible implementation of that is described in the section [Next steps](#next-steps).\n\nAnother piece of motivation can be a better involvement of storage development team into a control-plane. By splitting control-plane from the console, it can be more convenient to test and develop control-plane with paying less attention to “business” features, such as user management, billing and analytics.\n\nFor example, console currently requires authentication providers such as GitHub OAuth to work at all, as well as nodejs to be able to build it locally. It will be more convenient to build and run it locally without these requirements.\n\n## Proposed implementation\n\n### Current state of things\n\nLet’s start with defining the current state of things at the moment of this proposal. We have three repositories containing source code:\n\n- open-source `postgres` — our fork of postgres\n- open-source `neon` — our main repository for storage source code\n- closed-source `cloud` — mostly console backend and UI frontend\n\nThis proposal aims not to change anything at the existing code in `neon` and `postgres` repositories, but to create control-plane service and move it’s source code from `cloud` to the `neon` repository. That means that we need to split code in `cloud` repo only, and will consider only this repository for exploring its source code.\n\nLet’s look at the miscellaneous things in the `cloud` repo which are NOT part of the console application, i.e. NOT the Go source code that is compiled to the `./console` binary. There we have:\n\n- command-line tools, such as cloudbench, neonadmin\n- markdown documentation\n- cloud operations scripts (helm, terraform, ansible)\n- configs and other things\n- e2e python tests\n- incidents playbooks\n- UI frontend\n- Make build scripts, code generation scripts\n- database migrations\n- swagger definitions\n\nAnd also let’s take a look at what we have in the console source code, which is the service we’d like to split:\n\n- API Servers\n    - Public API v2\n    - Management API v2\n    - Public API v1\n    - Admin API v1 (same port as Public API v1)\n    - Management API v1\n- Workers\n    - Monitor Compute Activity\n    - Watch Failed Operations\n    - Availability Checker\n    - Business Metrics Collector\n- Internal Services\n    - Auth Middleware, UserIsAdmin, Cookies\n    - Cable Websocket Server\n    - Admin Services\n        - Global Settings, Operations, Pageservers, Platforms, Projects, Safekeepers, Users\n    - Authenticate Proxy\n    - API Keys\n    - App Controller, serving UI HTML\n    - Auth Controller\n    - Branches\n    - Projects\n    - Psql Connect + Passwordless login\n    - Users\n    - Cloud Metrics\n    - User Metrics\n    - Invites\n    - Pageserver/Safekeeper management\n    - Operations, k8s/docker/common logic\n    - Platforms, Regions\n    - Project State\n    - Projects Roles, SCRAM\n    - Global Settings\n- Other things\n    - segment analytics integration\n    - sentry integration\n    - other common utilities packages\n\n### Drawing the splitting line\n\nThe most challenging and the most important thing is to define the line that will split new control-plane service from the existing cloud service. If we don’t get it right, then we can end up with having a lot more issues without many benefits.\n\nWe propose to define that line as follows:\n\n- everything user-related stays in the console service\n- everything storage-related should be in the control-plane service\n- something that falls in between should be decided where to go, but most likely should stay in the console service\n- some similar parts should be in both services, such as admin/management/db_migrations\n\nWe call user-related all requests that can be connected to some user. The general idea is don’t have any user_id in the control-plane service and operate exclusively on tenant_id+timeline_id, the same way as existing storage services work now (compute, safekeeper, pageserver).\n\nStorage-related things can be defined as doing any of the following:\n\n- using k8s API\n- doing requests to any of the storage services (proxy, compute, safekeeper, pageserver, etc..)\n- tracking current status of tenants/timelines, managing lifetime of computes\n\nBased on that idea, we can say that new control-plane service should have the following components:\n\n- single HTTP API for everything\n    - Create and manage tenants and timelines\n    - Manage global settings and storage configuration (regions, platforms, safekeepers, pageservers)\n    - Admin API for storage health inspection and debugging\n- Workers\n    - Monitor Compute Activity\n    - Watch Failed Operations\n    - Availability Checker\n- Internal Services\n    - Admin Services\n        - Global Settings, Operations, Pageservers, Platforms, Tenants, Safekeepers\n    - Authenticate Proxy\n    - Branches\n    - Psql Connect\n    - Cloud Metrics\n    - Pageserver/Safekeeper management\n    - Operations, k8s/docker/common logic\n    - Platforms, Regions\n    - Tenant State\n    - Compute Roles, SCRAM\n    - Global Settings\n\n---\n\nAnd other components should probably stay in the console service:\n\n- API Servers (no changes here)\n    - Public API v2\n    - Management API v2\n    - Public API v1\n    - Admin API v1 (same port as Public API v1)\n    - Management API v1\n- Workers\n    - Business Metrics Collector\n- Internal Services\n    - Auth Middleware, UserIsAdmin, Cookies\n    - Cable Websocket Server\n    - Admin Services\n        - Users admin stays the same\n        - Other admin services can redirect requests to the control-plane\n    - API Keys\n    - App Controller, serving UI HTML\n    - Auth Controller\n    - Projects\n    - User Metrics\n    - Invites\n    - Users\n    - Passwordless login\n- Other things\n    - segment analytics integration\n    - sentry integration\n    - other common utilities packages\n\nThere are also miscellaneous things that are useful for all kinds of services. So we can say that these things can be in both services:\n\n- markdown documentation\n- e2e python tests\n- make build scripts, code generation scripts\n- database migrations\n- swagger definitions\n\nThe single entrypoint to the storage should be control-plane API. After we define that API, we can have code-generated implementation for the client and for the server. The general idea is to move code implementing storage components from the console to the API implementation inside the new control-plane service.\n\nAfter the code is moved to the new service, we can fill the created void by making API calls to the new service:\n\n- authorization of the client\n- mapping user_id + project_id to the tenant_id\n- calling the control-plane API\n\n### control-plane API\n\nCurrently we have the following projects API in the console:\n\n```\nGET /projects/{project_id}\nPATCH /projects/{project_id}\nPOST /projects/{project_id}/branches\nGET /projects/{project_id}/databases\nPOST /projects/{project_id}/databases\nGET /projects/{project_id}/databases/{database_id}\nPUT /projects/{project_id}/databases/{database_id}\nDELETE /projects/{project_id}/databases/{database_id}\nPOST /projects/{project_id}/delete\nGET /projects/{project_id}/issue_token\nGET /projects/{project_id}/operations\nGET /projects/{project_id}/operations/{operation_id}\nPOST /projects/{project_id}/query\nGET /projects/{project_id}/roles\nPOST /projects/{project_id}/roles\nGET /projects/{project_id}/roles/{role_name}\nDELETE /projects/{project_id}/roles/{role_name}\nPOST /projects/{project_id}/roles/{role_name}/reset_password\nPOST /projects/{project_id}/start\nPOST /projects/{project_id}/stop\nPOST /psql_session/{psql_session_id}\n```\n\nIt looks fine and we probably already have clients relying on it. So we should not change it, at least for now. But most of these endpoints (if not all) are related to storage, and it can suggest us what control-plane API should look like:\n\n```\nGET /tenants/{tenant_id}\nPATCH /tenants/{tenant_id}\nPOST /tenants/{tenant_id}/branches\nGET /tenants/{tenant_id}/databases\nPOST /tenants/{tenant_id}/databases\nGET /tenants/{tenant_id}/databases/{database_id}\nPUT /tenants/{tenant_id}/databases/{database_id}\nDELETE /tenants/{tenant_id}/databases/{database_id}\nPOST /tenants/{tenant_id}/delete\nGET /tenants/{tenant_id}/issue_token\nGET /tenants/{tenant_id}/operations\nGET /tenants/{tenant_id}/operations/{operation_id}\nPOST /tenants/{tenant_id}/query\nGET /tenants/{tenant_id}/roles\nPOST /tenants/{tenant_id}/roles\nGET /tenants/{tenant_id}/roles/{role_name}\nDELETE /tenants/{tenant_id}/roles/{role_name}\nPOST /tenants/{tenant_id}/roles/{role_name}/reset_password\nPOST /tenants/{tenant_id}/start\nPOST /tenants/{tenant_id}/stop\nPOST /psql_session/{psql_session_id}\n```\n\nOne of the options here is to use gRPC instead of the HTTP, which has some useful features, but there are some strong points towards using plain HTTP:\n\n- HTTP API is easier to use for the clients\n- we already have HTTP API in pageserver/safekeeper/console\n- we probably want control-plane API to be similar to the console API, available in the cloud\n\n### Getting updates from the storage\n\nThere can be some valid cases, when we would like to know what is changed in the storage. For example, console might want to know when user has queried and started compute and when compute was scaled to zero after that, to know how much user should pay for the service. Another example is to get info about reaching the disk space limits. Yet another example is to do analytics, such as how many users had at least one active project in a month.\n\nAll of the above cases can happen without using the console, just by accessing compute through the proxy.\n\nTo solve this, we can have a log of events occurring in the storage (event logs). That is very similar to operations table we have right now, the only difference is that events are immutable and we cannot change them after saving to the database. For example, we might want to have events for the following activities:\n\n- We finished processing some HTTP API query, such as resetting the password\n- We changed some state, such as started or stopped a compute\n- Operation is created\n- Operation is started for the first time\n- Operation is failed for the first time\n- Operation is finished\n\nOnce we save these events to the database, we can create HTTP API to subscribe to these events. That API can look like this:\n\n```\nGET /events/<cursor>\n\n{\n  \"events\": [...],\n  \"next_cursor\": 123\n}\n```\n\nIt should be possible to replay event logs from some point of time, to get a state of almost anything from the storage services. That means that if we maintain some state in the control-plane database and we have a reason to have the same state in the console database, it is possible by polling events from the control-plane API and changing the state in the console database according to the events.\n\n### Next steps\n\nAfter implementing control-plane HTTP API and starting control-plane as a separate service, we might want to think of exploiting benefits of the new architecture, such as reorganizing test infrastructure. Possible options are listed in the  [Next steps](#next-steps-1).\n\n## Non Goals\n\nRFC doesn’t cover the actual cloud deployment scripts and schemas, such as terraform, ansible, k8s yaml’s and so on.\n\n## Impacted components\n\nMostly console, but can also affect some storage service.\n\n## Scalability\n\nWe should support starting several instances of the new control-plane service at the same time.\n\nAt the same time, it should be possible to use only single instance of control-plane, which can be useful for local tests.\n\n## Security implications\n\nNew control-plane service is an internal service, so no external requests can reach it. But at the same time, it contains API to do absolutely anything with any of the tenants. That means that bad internal actor can potentially read and write all of the tenants. To make this safer, we can have one of these:\n\n- Simple option is to protect all requests with a single private key, so that no one can make requests without having that one key.\n- Another option is to have a separate token for every tenant and store these tokens in another secure place. This way it’s harder to access all tenants at once, because they have the different tokens.\n\n## Alternative implementation\n\nThere was an idea to create a k8s operator for managing storage services and computes, but author of this RFC is not really familiar with it.\n\nRegarding less alternative ideas, there are another options for the name of the new control-plane service:\n\n- storage-ctl\n- cloud\n- cloud-ctl\n\n## Pros/cons of proposed approaches (TODO)\n\nPros:\n\n- All storage features are completely open-source\n- Better tests coverage, less difference between cloud and local setups\n- Easier to develop storage and cloud features, because there is no need to setup console for that\n- Easier to deploy storage-only services to the any cloud\n\nCons:\n\n- All storage features are completely open-source\n- Distributed services mean more code to connect different services and potential network issues\n- Console needs to have a dependency on storage API, there can be complications with developing new feature in a branch\n- More code to JOIN data from different services (console and control-plane)\n\n## Definition of Done\n\nWe have a new control-plane service running in the k8s. Source code for that control-plane service is located in the open-source neon repo.\n\n## Next steps\n\nAfter we’ve reached DoD, we can make further improvements.\n\nFirst thing that can benefit from the split is local testing. The same control-plane service can implement starting computes as a local processes instead of k8s deployments. If it will also support starting pageservers/safekeepers/proxy for the local setup, then it can completely replace `./neon_local` binary, which is currently used for testing. The local testing environment can look like this:\n\n```\n┌─────────────────────┐     ┌───────────────────────┐\n│                     │     │      Storage (local)  │\n│  control-plane db   │     │                       │\n│   (local process)   │     │ - safekeepers         │\n│                     │     │ - pageservers         │\n└──────────▲──────────┘     │                       │\n           │                │     Dependencies      │\n┌──────────┴──────────┐     │                       │\n│                     │     │ - etcd                │\n│    control-plane    ├────►│ - S3                  │\n│   (local process)   │     │ - more?               │\n│                     │     │                       │\n└──────────┬──────────┘     └───────────────────────┘\n       ▲   │                            ▲\n       │   │                            │\n       │   │                ┌───────────┴───────────┐\n       │   │                │                       │\n       │   └───────────────►│       computes        │\n       │                    │   (local processes)   │\n       │                    │                       │\n┌──────┴──────────────┐     └───────────────────────┘\n│                     │                 ▲\n│        proxy        │                 │\n│   (local process)   ├─────────────────┘\n│                     │\n└─────────────────────┘\n```\n\nThe key thing here is that control-plane local service have the same API and almost the same implementation as the one deployed in the k8s. This allows to run the same e2e tests against both cloud and local setups.\n\nFor the python test_runner tests everything can stay mostly the same. To do that, we just need to replace `./neon_local` cli commands with API calls to the control-plane.\n\nThe benefit here will be in having fast local tests that are really close to our cloud setup. Bugs in k8s queries are still cannot be found when running computes as a local processes, but it should be really easy to start k8s locally (for example in k3s) and run the same tests with control-plane connected to the local k8s.\n\nTalking about console and UI tests, after the split there should be a way to test these without spinning up all the storage locally. New control-plane service has a well-defined API, allowing us to mock it. This way we can create UI tests to verify the right calls are issued after specific UI interactions and verify that we render correct messages when API returns errors."
  },
  {
    "path": "docs/rfcs/017-timeline-data-management.md",
    "content": "# Name\n\nTenant and timeline data management in pageserver\n\n## Summary\n\nThis RFC attempts to describe timeline-related data management as it's done now in pageserver, highlight current complexities caused by this and propose a set of changes to mitigate them.\n\nThe main goal is to prepare for future [on-demand layer downloads](https://github.com/neondatabase/neon/issues/2029), yet timeline data is one of the core primitive of pageserver, so a number of other RFCs are affected either.\nDue to that, this document won't have a single implementation, rather requiring a set of code changes to achieve the final state.\n\nRFC considers the repository at the `main` branch, commit [`28243d68e60ffc7e69f158522f589f7d2e09186d`](https://github.com/neondatabase/neon/tree/28243d68e60ffc7e69f158522f589f7d2e09186d) on the time of writing.\n\n## Motivation\n\nIn recent discussions, it became more clear that timeline-related code becomes harder to change: it consists of multiple disjoint modules, each requiring a synchronization to access.\nThe lower the code is, the complex the sync gets since many concurrent processes are involved and require orchestration to keep the data consistent.\nAs the number of modules and isolated data grows per timeline, more questions and corner cases arise:\n\n- https://github.com/neondatabase/neon/issues/1559\n  right now it's not straightened out what to do when the synchronization task fails for too many times: every separate module's data has to be treated differently.\n\n- https://github.com/neondatabase/neon/issues/1751\n  GC and compaction file activities are not well known outside their tasks code, causing race bugs\n\n- https://github.com/neondatabase/neon/issues/2003\n  Even the tenant management gets affected: we have to alter its state based on timeline state, yet the data for making the decision is separated and the synchronisation logic has bugs\n\n- more issues were brought in discussions, but apparently they were too specific to the code to mention them in the issues.\n  For instance, `tenant_mgr` itself is a static object that we can not mock anyhow, which reduces our capabilities to test the data synchronization logic.\n  In fact, we have zero Rust tests that cover the case of synchronizing more than one module's data.\n\nOn demand layer downloads would require us to dynamically manage the layer files, which we almost not doing at all on the module level, resulting in the most of their APIs dealing with timelines, rather than the layer files.\nThe disjoint data that would require data synchronization with possibly a chain of lock acquisitions, some async and some sync, and it would be hard to unit test it with the current code state.\n\nNeither this helps to easy start the on-demand download epic, nor it's easy to add more timeline-related code on top, whatever the task is.\nWe have to develop a vision on a number of topics before progressing safely:\n\n- timeline and tenant data structure and how should we access it\n- sync and async worlds and in what way that should evolve\n- unit tests for the complex logic\n\nThis RFC aims to provide a general overview of the existing situation and propose ways to improve it.\nThe changes proposed are quite big and no single PR is expected to do the adjustments, they should gradually be done during the on-demand download work later.\n\n## What is a timeline and its data\n\nFirst, we need to define what data we want to manage per timeline.\nCurrently, the data every timeline operates is:\n\n- a set of layer files, on the FS\n\n  Never updated files, created after pageserver's checkpoints and compaction runs, can be removed from the local FS due to compaction, gc or timeline deletion.\n\n- a set of layer files, on the remote storage\n\n  Identically named and placed in tenant subdirectories files on the remote storage (S3), copied by a special background sync thread\n\n- a `metadata` file, on the FS\n\n  Updated after every checkpoint with the never `disk_consistent_lsn` and `latest_gc_cutoff_lsn` values. Used to quickly restore timeline's basic metadata on pageserver restart.\n  Also contains data about the ancestor, if the timeline was branched off another timeline.\n\n- an `index_part.json` file, on the remote storage\n\n  Contains `metadata` file contents and a list of layer files, available in the current S3 \"directory\" for the timeline.\n  Used to avoid potentially slow and expensive `S3 list` command, updated by the remotes storage sync thread after every operation with the remote layer files.\n\n- LayerMap and PageCache, in memory\n\n  Dynamic, used to store and retrieve the page data to users.\n\n- timeline info, in memory\n\n  LSNs, walreceiver data, `RemoteTimelineIndex` and other data to share via HTTP API and internal processes.\n\n- metrics data, in memory\n\n  Data to push or provide to Prometheus, Opentelemetry, etc.\n\nBesides the data, every timeline currently needs an etcd connection to receive WAL events and connect to safekeepers.\n\nTimeline could be an ancestor to another one, forming a dependency tree, which is implicit right now: every time relations are looked up in place, based on the corresponding `TimelineMetadata` struct contents.\nYet, there's knowledge on a tenant as a group of timelines, belonging to a single user which is used in GC and compaction tasks, run on every tenant.\n`tenant_mgr` manages tenant creation and its task startup, along with the remote storage sync for timeline layers.\n\nLast file being managed per-tenant is the tenant config file, created and updated on the local FS to hold tenant-specific configuration between restarts.\nIt's not yet anyhow synchronized with the remote storage, so only exists on the local FS.\n\n### How the data is stored\n\nWe have multiple places where timeline data is stored:\n\n- `tenant_mgr` [holds](https://github.com/neondatabase/neon/blob/28243d68e60ffc7e69f158522f589f7d2e09186d/pageserver/src/tenant_mgr.rs#L43) a static `static ref TENANTS: RwLock<HashMap<ZTenantId, Tenant>>` with the `Tenant` having the `local_timelines: HashMap<ZTimelineId, Arc<DatadirTimelineImpl>>` inside\n\n- same `Tenant` above has actually two references to timelines: another via its `repo: Arc<RepositoryImpl>` with `pub type RepositoryImpl = LayeredRepository;` that [holds](https://github.com/neondatabase/neon/blob/28243d68e60ffc7e69f158522f589f7d2e09186d/pageserver/src/layered_repository.rs#L178) `Mutex<HashMap<ZTimelineId, LayeredTimelineEntry>>`\n\n- `RemoteTimelineIndex` [contains](https://github.com/neondatabase/neon/blob/28243d68e60ffc7e69f158522f589f7d2e09186d/pageserver/src/storage_sync/index.rs#L84) the metadata about timelines on the remote storage (S3) for sync reasons and possible HTTP API queries\n\n- `walreceiver` [stores](https://github.com/neondatabase/neon/blob/28243d68e60ffc7e69f158522f589f7d2e09186d/pageserver/src/walreceiver.rs#L60) the metadata for possible HTTP API queries and its [internal state](https://github.com/neondatabase/neon/blob/28243d68e60ffc7e69f158522f589f7d2e09186d/pageserver/src/walreceiver/connection_manager.rs#L245) with a reference to the timeline, its current connections and etcd subscription (if any)\n\n- `PageCache` contains timeline-related data, and is created globally for the whole pageserver\n\n- implicitly, we also have files on local FS, that contain timeline state. We operate on those files and for some operations (GC, compaction) yet we don't anyhow synchronize the access to the files per se: there are more high-level locks, ensuring only one of a group of operations is running at a time.\n\n  On practice though, `LayerMap` and layer files are tightly coupled together: current low-level code requires a timeline to be loaded into the memory to work with it, and the code removes the layer files after removing the entry from the `LayerMap` first.\n\nBased on this, a high-level pageserver's module diagram with data and entities could be:\n\n![timeline tenant state diagram](./images/017-timeline-data-management/timeline_tenant_state.svg)\n\nA few comments on the diagram:\n\n- the diagram does not show all the data and replaces a few newtypes and type aliases (for example, completely ignores \"unloaded\" timelines due to reasons described below)\n\n  It aims to show main data and means of synchronizing it.\n\n- modules tend to isolate their data inside and provide access to it via API\n\nDue to multitenancy, that results in a common pattern for storing both tenant and timeline data: `RwLock` or `Mutex` around the `HashMap<Id, Data>`, gc and compaction tasks also use the same lock pattern to ensure no concurrent runs are happening.\n\n- part of the modules is asynchronous, while the other is not, that complicates the data access\n\nCurrently, anything that's not related to tasks (walreceiver, storage sync, GC, compaction) is blocking.\n\nAsync tasks that try to access the data in the sync world, have to call `std::sync::Mutex::lock` method, which blocks the thread the callee async task runs on, also blocking other async tasks running in the same thread. Methods of `std::sync::RwLock` have the same issues, forcing async tasks either to block or spawn another, \"blocking\" task on a separate thread.\n\nSync tasks that try to access the data in the async world, cannot use `.await` hence have to have some `Runtime` doing those calls for them. [`tokio::sync::Mutex`](https://docs.rs/tokio/1.19.2/tokio/sync/struct.Mutex.html#method.blocking_lock) and [`tokio::sync::RwLock`](https://docs.rs/tokio/1.19.2/tokio/sync/struct.RwLock.html#method.blocking_read) provide an API to simplify such calls. Similarly, both `std::sync` and `tokio::sync` have channels that are able to communicate into one direction without blocking and requiring `.await` calls, hence can be used to connect both worlds without locking.\n\nSome modules are in transition, started as async \"blocking\" tasks and being fully synchronous in their entire code below the start. Current idea is to transfer them to the async further, but it's not yet done.\n\n- locks are used in two different ways:\n\n  - `RwLock<HashMap<..>>` ones to hold the shared data and ensure its atomic updates\n  - `Mutex<()>` for synchronizing the tasks, used to implicitly order the data access\n\n  The \"shared data\" locks of the first kind are mainly accessed briefly to either look up or alter the data, yet there are a few notable exceptions, such as\n  `latest_gc_cutoff_lsn: RwLock<Lsn>` that is explicitly held in a few places to prevent GC thread from progressing. Those are covered later in the data access diagrams.\n\n- some synchronizations are not yet implemented\n\nE.g. asynchronous storage sync module does not synchronize with almost synchronous GC and compaction tasks when the layer files are uploaded to the remote storage.\nThat occasionally results in the files being deleted before the storage upload task is run for this layer, but due to the incremental nature of the layer files, we can handle such situations without issues.\n\n- `LayeredRepository` covers lots of responsibilities: GC and compaction task synchronisation, timeline access (`local_timelines` in `Tenant` is not used directly before the timeline from the repository is accessed), layer flushing to FS, layer sync to remote storage scheduling, etc.\n\n### How is this data accessed?\n\nThere are multiple ways the data is accessed, from different sources:\n\n1. [HTTP requests](https://github.com/neondatabase/neon/blob/28243d68e60ffc7e69f158522f589f7d2e09186d/pageserver/src/http/routes.rs)\n\nHigh-level CRUD API for managing tenants, timelines and getting data about them.\nCurrent API list (modified for readability):\n\n```rust\n.get(\"/v1/status\", status_handler) // pageserver status\n.get(\"/v1/tenant\", tenant_list_handler)\n.post(\"/v1/tenant\", tenant_create_handler) // can create \"empty\" timelines or branch off the existing ones\n.get(\"/v1/tenant/:tenant_id\", tenant_status) // the only tenant public metadata\n.put(\"/v1/tenant/config\", tenant_config_handler) // tenant config data and local file manager\n.get(\"/v1/tenant/:tenant_id/timeline\", timeline_list_handler)\n.post(\"/v1/tenant/:tenant_id/timeline\", timeline_create_handler)\n.post(\"/v1/tenant/:tenant_id/attach\", tenant_attach_handler) // download entire tenant from the remote storage and load its timelines memory\n.post(\"/v1/tenant/:tenant_id/detach\", tenant_detach_handler) // delete all tenant timelines from memory, remote corresponding storage and local FS files\n.get(\"/v1/tenant/:tenant_id/timeline/:timeline_id\", timeline_detail_handler)\n.delete(\"/v1/tenant/:tenant_id/timeline/:timeline_id\", timeline_delete_handler)\n.get(\"/v1/tenant/:tenant_id/timeline/:timeline_id/wal_receiver\", wal_receiver_get_handler) // get walreceiver stats metadata\n```\n\nOverall, neither HTTP operation goes below `LayeredRepository` level and does not interact with layers: instead, they manage tenant and timeline entities, their configuration and metadata.\n\n`GET` data is small (relative to layer files contents), updated via brief `.write()/.lock()` calls and read via copying/cloning the data to release the lock soon.\nIt does not mean that the operations themselves are short, e.g. `tenant_attach_handler` downloads multiple files from the remote storage which might take time, yet the final data is inserted in memory via one brief write under the lock.\n\nNon-`GET` operations mostly follow the same rule, with two differences:\n\n- `tenant_detach_handler` has to wait for its background tasks to stop before shutting down, which requires more work with locks\n- `timeline_create_handler` currently requires GC to be paused before branching the timeline, which requires orchestrating too.\n  This is the only HTTP operation, able to load the timeline into memory: rest of the operations are reading the metadata or, as in `tenant_attach_handler`, schedule a deferred task to download timeline and load it into memory.\n\n\"Timeline data synchronization\" section below describes both complex cases in more details.\n\n2. [libpq requests](https://github.com/neondatabase/neon/blob/28243d68e60ffc7e69f158522f589f7d2e09186d/pageserver/src/page_service.rs)\n\nIs the main interface of pageserver, intended to handle libpq (and similar) requests.\nOperates on `LayeredTimeline` and, lower, `LayerMap` modules; all timelines accessed during the operation are loaded into memory immediately (if not loaded already), operations bail on timeline load errors.\n\n- `pagestream`\n\n  Page requests: `get_rel_exists`, `get_rel_size`, `get_page_at_lsn`, `get_db_size`\n\n  Main API points, intended to be used by `compute` to show the data to the user. All require requests to be made at certain Lsn, if this Lsn is not available in the memory, request processing is paused until that happens or bails after a timeout.\n\n- `basebackup` and `fullbackup`\n\n  Options to generate postgres-compatible backup archives.\n\n- `import basebackup`\n\n- `import wal`\n\n  Import the `pg_wal` section of the basebackup archive.\n\n- `get_last_record_rlsn`, `get_lsn_by_timestamp`\n\n\"Metadata\" retrieval methods, that still requires internal knowledge about layers.\n\n- `set`, `fallpoints`, `show`\n\nUtility methods to support various edge cases or help with debugging/testing.\n\n- `do_gc`, `compact`, `checkpoint`\n\nManual triggers for corresponding tenant tasks (GC, compaction) and inmemory layer flushing on disk (checkpointing), with upload task scheduling as a follow-up.\n\nApart from loading into memory, every timeline layer has to be accessed using specific set of locking primitives, especially if a write operations happens: otherwise, GC or compaction might spoil the data. User API is implicitly affected by this synchronization during branching, when a GC has to be orchestrated properly before the new timeline could be branched off the existing one.\nSee \"Timeline data synchronization\" section for the united synchronization diagram on the topic.\n\n3. internal access\n\nEntities within pageserver that update files on local FS and remote storage, metadata in memory; has to use internal data for those operations.\nPlaces that access internal, lower data are also required to have the corresponding timeline successfully loaded into memory and accessed with corresponding synchronization.\n\nIf ancestors' data is accessed via its child branch, it means more than one timeline has to be loaded into memory entirely and more locking primitives usage involved.\nRight now, all ancestors are resolved in-place: every place that has to check timeline's ancestor has to lock the timelines map, check if one is loaded into the memory, load it there or bail if it's not present, and get the information required and so on.\n\n- periodic GC and compaction tasks\n\nAlter metadata (GC info), in-memory data (layer relations, page caches, etc.) and layer files on disk.\nSame as its libpq counterparts, needs full synchronization with the low level layer management code.\n\n- storage sync task\n\nAlters metadata (`RemoteTimelineIndex`), layer files on remote storage (upload, delete) and local FS (download) and in-memory data (registers downloaded timelines in the repository).\nCurrently, does not know anything about layer files contents, rather focusing on the file structure and metadata file updates: due to the fact that the layer files cannot be updated (only created or deleted), storage sync is able to back up the files to the remote storage without further low-level synchronizations: only when the timeline is downloaded, a load operation is needed to run, possibly pausing GC and compaction tasks.\n\n- walreceiver and walingest task\n\nPer timeline, subscribes for etcd events from safekeeper and eventually spawns a walreceiver connection task to receive WAL from a safekeeper node.\nFills memory with data, eventually triggering a checkpoint task that creates a new layer file in the local FS and schedules a remote storage sync upload task.\nDuring WAL receiving, also updates a separate in-memory data structure with the walreceiver stats, used later via HTTP API.\n\nLayer updates require low-level set of sync primitives used to preserve the data consistency.\n\n- checkpoint (layer freeze) task\n\nPeriodic, short-lived tasks to generate a new layer file in the FS. Requires low level synchronization in the end, when the layer is being registered after creating and has additional mode to ensure only one concurrent compaction happens at a time.\n\n### Timeline data synchronization\n\nHere's a high-level timeline data access diagram, considering the synchronization locks, based on the state diagram above.\n\nFor brevity, diagrams do not show `RwLock<HashMap<..>>` data accesses, considering them almost instant to happen.\n`RwLock<LayerMap>` is close to be an exception to the previous rule, since it's taken in a multiple places to ensure all layers are inserted correctly.\nYet the only long operation in the current code is a `.write()` lock on the map during its creation, while all other lock usages tend to be short in the current code.\nNote though, that due to current \"working with loaded timeline only\", prevailing amount of the locks taken on the struct are `.write()` locks, not the `.read()` ones.\nTo simplify the diagrams, these accesses are now considered \"fast\" data access, not the synchronization attempts.\n\n`write_lock` synchronization diagram:\n\n![timeline data access synchronization(1)](./images/017-timeline-data-management/timeline_data_access_sync_1.svg)\n\nComments:\n\n- `write_lock: Mutex<()>` ensures that all timeline data being written into **in-memory layers** is done without races, one concurrent write at a time\n- `layer_flush_lock: Mutex<()>` and layer flushing seems to be slightly bloated with various ways to create a layer on disk and write it in memory\n  The lock itself seem to repeat `write_lock` purpose when it touches in-memory layers, and also to limit the on-disk layer creations.\n  Yet the latter is not really done consistently, since remote storage sync manages to download and register the new layers without touching the locks\n- `freeze_inmem_layer(true)` that touches both `write_lock` and `layer_flush_lock` seems not very aligned with the rest of the locks to those primitives; it also now restricts the layer creation concurrency even more, yet there are various `freeze_inmem_layer(false)` that are ignoring those restrictions at the same time\n\n![timeline data access synchronization(2)](./images/017-timeline-data-management/timeline_data_access_sync_2.svg)\n\nComments:\n\n- `partitioning: Mutex<(KeyPartitioning, Lsn)>` lock is a data sync lock that's not used to synchronize the tasks (all other such kinds were considered \"almost instant\" and omitted on the diagram), yet is very similar to what `write_lock` and `layer_flush_lock` do: it ensures the timeline in-memory data is up-to-date with the layer files state on disk, which is what `LayerMap` is for.\n\n- there are multiple locks that do similar task management operations:\n  - `gc_cs: Mutex<()>` and `latest_gc_cutoff_lsn: RwLock<Lsn>` ensures that branching and gc are not run concurrently\n  - `layer_removal_cs: Mutex<()>` lock ensure gc, compaction and timeline deletion via HTTP API do not run concurrently\n  - `file_lock: RwLock<()>` is used as a semaphore, to ensure \"all\" gc and compaction tasks are shut down and do not start\n    Yet that lock does take only gc and compaction from internal loops: libpq call is not cancelled and waited upon.\n\nThose operations do not seem to belong to a timeline. Moreover, some of those could be eliminated entirely due to duplication of their tasks.\n\n## Proposed implementation\n\n### How to structure timeline data access better\n\n- adjust tenant state handling\n\nCurrent [`TenantState`](https://github.com/neondatabase/neon/blob/28243d68e60ffc7e69f158522f589f7d2e09186d/pageserver/src/tenant_mgr.rs#L108) [changes](https://github.com/neondatabase/neon/blob/28243d68e60ffc7e69f158522f589f7d2e09186d/pageserver/src/tenant_mgr.rs#L317) mainly indicates whether GC and compaction tasks are running or not; another state, `Broken` shows only in case any timeline does not load during startup.\n\nWe could start both GC and compaction tasks at the time the tenant is created and adjust the tasks to throttle/sleep on timeline absence and wake up when the first one is added.\nThe latter becomes more important on download on demand, since we won't have the entire timeline in reach to verify its correctness. Moreover, if any network connection happens, the timeline could fail temporarily and entire tenant should be marked as broken due to that.\n\nSince nothing verifies the `TenantState` via HTTP API currently, it makes sense to remove the whole state entirely and don't write the code to synchronize its changes.\nInstead, we could indicate internal issues for every timeline and have a better API to \"stop\" timeline processing without deleting its data, making our API less restrictive.\n\n- remove the \"unloaded\" status for the timeline\n\nCurrent approach to timeline management [assumes](https://github.com/neondatabase/neon/blob/28243d68e60ffc7e69f158522f589f7d2e09186d/pageserver/src/layered_repository.rs#L486-L493)\n\n```rust\n#[derive(Clone)]\nenum LayeredTimelineEntry {\n    Loaded(Arc<LayeredTimeline>),\n    Unloaded {\n        id: ZTimelineId,\n        metadata: TimelineMetadata,\n    },\n}\n```\n\nsupposes that timelines have to be in `Unloaded` state.\n\nThe difference between both variants is whether its layer map was loaded from disk and kept in memory (Loaded) or not (Unloaded).\nThe idea behind such separation was to lazy load timelines in memory with all their layers only after its first access and potentially unload them later.\n\nYet now there's no public API methods, that deal with unloaded timelines' layers: all of them either bail when such timeline is worked on, or load it into memory and continue working.\nMoreover, every timeline in the local FS is loaded on pageserver startup now, so only two places where `Unloaded` variant is used are branching and timeline attach, with both loading the timeline into memory before the end of the operation.\nEven if that loading into memory bails for some reason, next GC or compaction task periodic run would load such timeline into memory.\nThere are a few timeline methods that return timeline metadata without loading its layers, but such metadata also comes from the `metadata` FS file, not the layer files (so no page info could be retrieved without loading the entire layer map first).\n\nWith the layer on-demand download, it's not feasible anymore to wait for the entire layer map to be loaded into the memory, since it might not even be available on the local FS when requested: `LayerMap` needs to be changed to contain metadata to retrieve the missing layers and handle partially present on the local FS timeline state.\n\nTo accommodate to that and move away from the redundant status, a timeline should always be \"loaded\" with its metadata read from the disk and its layer map prepared to be downloaded when requested, per layer.\n\nLayers in the layer map, on the other hand, could be in various state: loaded, unloaded, downloading, downloading failed, etc. and their state has to be handled instead, if we want to support on-demand download in the future.\n\nThis way, tenants and timelines could always try to serve requests and do their internal tasks periodically, trying to recover.\n\n- scale down the remote storage sync to per layer file, not per timeline as now\n\nDue to the reasons from the previous bullet, current remote storage model needs its timeline download approach to be changed.\nRight now, a timeline is marked as \"ready\" only after all its layers on the remote storage are downloaded on the local storage.\nWith the on-demand download approach, only remote storage timeline metadata should be downloaded from S3, leaving the rest of the layers ready for download if/when it's requested.\n\nNote: while the remote storage sync should operate per layer, it should stay global for all tenants, to better manage S3 limits and sync queue priorities.\nYet the only place using remote storage should be the layer map.\n\n- encapsulate `tenant_mgr` logic into a regular Rust struct, unite with part of the `Repository` and anything else needed to manage the timeline data in a single place and to test it independently\n\n[`Repository`](https://github.com/neondatabase/neon/blob/28243d68e60ffc7e69f158522f589f7d2e09186d/pageserver/src/repository.rs#L187) trait gets closer to `tenant_mgr` in terms of functionality: there are two background task-related functions, that are run on all timelines of a tenant: `gc_iteration` (it does allow running on a single timeline, but GC task runs it on all timelines) and `compaction_iteration` that are related to service tasks, not the data storage; and the metadata management functions, also not really related to the timeline contents.\n\n`tenant_mgr` proxies some of the `Repository` calls, yet both service tasks use `tenant_mgr` to access the data they need, creating a circular dependency between their APIs.\nTo avoid excessive synchronization between components, taking multiple locks for that and static state, we can organize the data access and updates in one place.\nOne potential benefit Rust gets from this is the ability to track and manage timeline resources, if all the related data is located in one place.\n\n- move `RemoteStorage` usage from `LayeredRepository` into `LayerMap`, as the rest of the layer-based entities (layer files, etc.)\n\nLayer == file in our model, since pageserver always either tries to load the LayerMap from disk for the timeline not in memory, or assumes the file contents matches its memory.\n`LayeredRepository` is one of the most loaded objects currently and not everything from it deserves unification with the `tenant_mgr`.\nIn particular, layer files need to be better prepared for future download on demand functionality, where every layer could be dynamically loaded and unloaded from memory and local FS.\nCurrent amount of locks and sync-async separation would make it hard to implement truly dynamic (un)loading; moreover, we would need retries with backoffs, since the unloaded layer files are most probably not available on the local FS either and network is not always reliable.\n\nOne of the solutions to the issue is already being developed for the remote storage sync: [SyncQueue](https://github.com/neondatabase/neon/blob/28243d68e60ffc7e69f158522f589f7d2e09186d/pageserver/src/storage_sync.rs#L463)\nThe queue is able to batch CRUD layer operations (both for local and remote FS contexts) and reorder them to increase the sync speed.\nSimilar approach could be generalized for all layer modifications, including in-memory ones such as GC or compaction: this way, we could manage all layer modifications and reads in one place with lesser locks and tests that are closer to unit tests.\n\n- change the approach to locking synchronization\n\nA number of locks in the timeline seem to be used to coordinate gc, compaction tasks and related processes.\nIt should be done in a task manager or other place, external to the timeline.\n\nTimeline contents still needs to be synchronized, considering the task work, so fields like `latest_gc_cutoff_lsn: RwLock<Lsn>` are expected to stay for that purpose, but general amount of locks should be reduced.\n\n### Putting it all together\n\nIf the proposal bullets applied to the diagrams above, the state could be represented as:\n\n![timeline timeline tenant state](./images/017-timeline-data-management/proposed_timeline_tenant_state.svg)\n\nThe reorders aim to put all tasks into separated modules, with strictly defined interfaces and as less knowledge about other components, as possible.\nThis way, all timeline data is now in the `data_storage`, including the GC, walreceiver, `RemoteTimelineIndex`, `LayerMap`, etc. with some API to get the data in the way,\nmore convenient for the data sync system inside.\nSo far, it seems that a few maps with `Arc<RwLock<SeparateData>>` with actual data operations added inside each `SeparateData` struct, if needed.\n\n`page_cache` is proposed to placed into the same `data_storage` since it contains tenant timelines' data: this way, all metadata and data is in the same struct, simplifying things with Rust's borrow checker and allowing us to share internals between data modules and later might simplify timeline in-memory size tracking.\n\n`task_manager` is related to data storage and manages all tenant and timeline tasks, manages shared resources (runtimes, thread pools, etcd connection, etc.) and synchronizes tasks.\nAll locks such as `gc_cs` belong to this module tree, as primitives inherently related to the task synchronization.\nTasks have to access timelines and their metadata, but should do that through `data_storage` API and similar.\n\n`task_manager` should (re)start, stop and track all tasks that are run in it, selecting an appropriate runtime depending on a task kind (we have async/sync task separation, CPU and IO bound tasks separation, ...)\nSome locks such as `layer_removal_cs` one are not needed, if the only component that starts the tasks ensures they don't run concurrently.\n\n`LayeredTimeline` is still split into two parts, more high-level with whatever primitives needed to sync its state, and the actual state storage with `LayerMap` and other low level entities.\nOnly `LayerMap` knows what storage it's layer files are taken from (inmem, local FS, etc.), and it's responsible for synchronizing the layers when needed, as also reacting to sync events, successful or not.\n\nLast but not least, `tenant config file` has to be backed into a remote storage, as tenant-specific information for all timelines.\nTenant and timelines have volatile information that's now partially mixed with constant information (e.g. fields in `metadata` file), that model should be better split and handled, in case we want to properly support its backups and synchronization.\n\n![proposed timeline data access synchronization(1)](./images/017-timeline-data-management/proposed_timeline_data_access_sync_1.svg)\n\nThere's still a need to keep inmemory layer buffer synchronized during layer freezing, yet that could happen on a layer level, not on a timeline level, as `write_lock` used to be, so we could lower the sync primitives one layer deeper, preparing us for download on demand feature, where multiple layers could be concurrently streamed and written from various data sources.\n\nFlushing the frozen layer requires creating a new layer on disk and further remote storage upload, so `LayerMap` has to get those flushed bytes and queue them later: no need to block in the timeline itself for anything again, rather locking on the layer level, if needed.\n\n![proposed timeline data access synchronization(2)](./images/017-timeline-data-management/proposed_timeline_data_access_sync_2.svg)\n\nLock diagrams legend:\n\n![lock diagrams legend](./images/017-timeline-data-management/lock_legend.svg)\n\nAfter the frozen layers are flushed, something has to ensure that the layer structure is intact, so a repartitioning lock is needed still, and could also guard the layer map structure changes, since both are needed either way.\nThis locking belongs to the `LowLevelLayeredTimeline` from the proposed data structure diagram, as the place with all such data being held.\n\nSimilarly, branching is still required to be done after certain Lsn in our current model, but this needs only one lock to synchronize and that could be the `gc_cs: Mutex<()>` lock.\nIt raises the question of where this lock has to be placed, it's the only place that requires pausing a GC task during external, HTTP request handling.\nThe right place for the lock seems to be the `task_manager` that could manage GC in more fine-grained way to accommodate the incoming branching request.\n\nThere's no explicit lock sync between GC, compaction or other mutually exclusive tasks: it is a job of the `task_manager` to ensure those are not run concurrently.\n"
  },
  {
    "path": "docs/rfcs/018-storage-messaging-2.md",
    "content": "# Storage messaging\n\nSafekeepers need to communicate to each other to\n* Trim WAL on safekeepers;\n* Decide on which SK should push WAL to the S3;\n* Decide on when to shut down SK<->pageserver connection;\n* Understand state of each other to perform peer recovery;\n\nPageservers need to communicate to safekeepers to decide which SK should provide\nWAL to the pageserver.\n\nThis is an iteration on [015-storage-messaging](https://github.com/neondatabase/neon/blob/main/docs/rfcs/015-storage-messaging.md) describing current situation,\npotential performance issue and ways to address it.\n\n## Background\n\nWhat we have currently is very close to etcd variant described in\n015-storage-messaging. Basically, we have single `SkTimelineInfo` message\nperiodically sent by all safekeepers to etcd for each timeline.\n* Safekeepers subscribe to it to learn status of peers (currently they subscribe to\n  'everything', but they can and should fetch data only for timelines they hold).\n* Pageserver subscribes to it (separate watch per timeline) to learn safekeepers\n  positions; based on that, it decides from which safekeepers to pull WAL.\n\nAlso, safekeepers use etcd elections API to make sure only single safekeeper\noffloads WAL.\n\nIt works, and callmemaybe is gone. However, this has a performance\nhazard. Currently deployed etcd can do about 6k puts per second (using its own\n`benchmark` tool); on my 6 core laptop, while running on tmpfs, this gets to\n35k. Making benchmark closer to our usage [etcd watch bench](https://github.com/arssher/etcd-client/blob/watch-bench/examples/watch_bench.rs),\nI get ~10k received messages per second with various number of publisher-subscribers\n(laptop, tmpfs). Diving this by 12 (3 sks generate msg, 1 ps + 3 sk consume them) we\nget about 800 active timelines, if message is sent each second. Not extremely\nlow, but quite reachable.\n\nA lot of idle watches seem to be ok though -- which is good, as pageserver\nsubscribes to all its timelines regardless of their activity.\n\nAlso, running etcd with fsyncs disabled is messy -- data dir must be wiped on\neach restart or there is a risk of corruption errors.\n\nThe reason is etcd making much more than what we need; it is a fault tolerant\nstore with strong consistency, but I claim all we need here is just simplest pub\nsub with best effort delivery, because\n* We already have centralized source of truth for long running data, like which\n  tlis are on which nodes  -- the console.\n* Momentary data (safekeeper/pageserver progress) doesn't make sense to persist.\n  Instead of putting each change to broker, expecting it to reliably deliver it\n  is better to just have constant flow of data for active timelines: 1) they\n  serve as natural heartbeats -- if node can't send, we shouldn't pull WAL from\n  it 2) it is simpler -- no need to track delivery to/from the broker.\n  Moreover, latency here is important: the faster we obtain fresh data, the\n  faster we can switch to proper safekeeper after failure.\n* As for WAL offloading leader election, it is trivial to achieve through these\n  heartbeats -- just take suitable node through deterministic rule (min node\n  id).  Once network is stable, this is a converging process (well, except\n  complicated failure topology, but even then making it converge is not\n  hard). Such elections bear some risk of several offloaders running\n  concurrently for a short period of time, but that's harmless.\n\n  Generally, if one needs strong consistency, electing leader per se is not\n  enough; it must be accompanied with number (logical clock ts), checked at\n  every action to track causality. s3 doesn't provide CAS, so it can't\n  differentiate old/new leader, this must be solved differently.\n\n  We could use etcd CAS (its most powerful/useful primitive actually) to issue\n  these leader numbers (and e.g. prefix files in s3), but currently I don't see\n  need for that.\n\n\nObviously best effort pub sub is much more simpler and performant; the one proposed is\n\n## gRPC broker\n\nI took tonic and [prototyped](https://github.com/neondatabase/neon/blob/asher/neon-broker/broker/src/broker.rs) the replacement of functionality we currently use\nwith grpc streams and tokio mpsc channels. The implementation description is at the file header.\n\nIt is just 500 lines of code and core functionality is complete. 1-1 pub sub\ngives about 120k received messages per second; having multiple subscribers in\ndifferent connections quickly scales to 1 million received messages per second.\nI had concerns about many concurrent streams in singe connection, but 2^20\nsubscribers still work (though eat memory, with 10 publishers 20GB are consumed;\nin this implementation each publisher holds full copy of all subscribers). There\nis `bench.rs` nearby which I used for testing.\n\n`SkTimelineInfo` is wired here, but another message can be added (e.g. if\npageservers want to communicate with each other) with templating.\n\n### Fault tolerance\n\nSince such broker is stateless, we can run it under k8s. Or add proxying to\nother members, with best-effort this is simple.\n\n### Security implications\n\nCommunication happens in a private network that is not exposed to users;\nadditionally we can add auth to the broker.\n\n## Alternative: get existing pub-sub\n\nWe could take some existing pub sub solution, e.g. RabbitMQ, Redis. But in this\ncase IMV simplicity of our own outweighs external dependency costs (RabbitMQ is\nmuch more complicated and needs VM; Redis Rust client maintenance is not\nideal...). Also note that projects like CockroachDB and TiDB are based on gRPC\nas well.\n\n## Alternative: direct communication\n\nApart from being transport, broker solves one more task: discovery, i.e. letting\nsafekeepers and pageservers find each other. We can let safekeepers know, for\neach timeline, both other safekeepers for this timeline and pageservers serving\nit. In this case direct communication is possible:\n - each safekeeper pushes to each other safekeeper status of timelines residing\n   on both of them, letting remove WAL, decide who offloads, decide on peer\n   recovery;\n - each safekeeper pushes to each pageserver status of timelines residing on\n   both of them, letting pageserver choose from which sk to pull WAL;\n\nIt was mostly described in [014-safekeeper-gossip](https://github.com/neondatabase/neon/blob/main/docs/rfcs/014-safekeepers-gossip.md), but I want to recap on that.\n\nThe main pro is less one dependency: less moving parts, easier to run Neon\nlocally/manually, less places to monitor. Fault tolerance for broker disappears,\nno kuber or something. To me this is a big thing.\n\nAlso (though not a big thing) idle watches for inactive timelines disappear:\nnaturally safekeepers learn about compute connection first and start pushing\nstatus to pageserver(s), notifying it should pull.\n\nImportantly, I think that eventually knowing and persisting peers and\npageservers on safekeepers is inevitable:\n- Knowing peer safekeepers for the timeline is required for correct\n  automatic membership change -- new member set must be hardened on old\n  majority before proceeding. It is required to get rid of sync-safekeepers\n  as well (peer recovery up to flush_lsn).\n- Knowing pageservers where the timeline is attached is needed to\n  1. Understand when to shut down activity on the timeline, i.e. push data to\n     the broker. We can have a lot of timelines sleeping quietly which\n\t shouldn't occupy resources.\n  2. Preserve WAL for these (currently we offload to s3 and take it from there,\n     but serving locally is better, and we get one less condition on which WAL\n     can be removed from s3).\n\nI suppose this membership data should be passed to safekeepers directly from the\nconsole because\n1. Console is the original source of this data, conceptually this is the\n   simplest way (rather than passing it through compute or something).\n2. We already have similar code for deleting timeline on safekeepers\n   (and attaching/detaching timeline on pageserver), this is a typical\n   action -- queue operation against storage node and execute it until it\n   completes (or timeline is dropped).\n\nCons of direct communication are\n- It is more complicated: each safekeeper should maintain set of peers it talks\n  to, and set of timelines for each such peer -- they ought to be multiplexed\n  into single connection.\n- Totally, we have O(n^2) connections instead of O(n) with broker schema\n  (still O(n) on each node). However, these are relatively stable, async and\n  thus not very expensive, I don't think this is a big problem. Up to 10k\n  storage nodes I doubt connection overhead would be noticeable.\n\nI'd use gRPC for direct communication, and in this sense gRPC based broker is a\nstep towards it.\n"
  },
  {
    "path": "docs/rfcs/019-tenant-timeline-lifecycles.md",
    "content": "# Managing Tenant and Timeline lifecycles\n\n## Summary\n\nThe pageserver has a Tenant object in memory for each tenant it manages, and a\nTimeline for each timeline. There are a lot of tasks that operate on the tenants\nand timelines with references to those objects. We have some mechanisms to track\nwhich tasks are operating on each Tenant and Timeline, and to request them to\nshutdown when a tenant or timeline is deleted, but it does not cover all uses,\nand as a result we have many race conditions around tenant/timeline shutdown.\n\n## Motivation\n\nWe have a bunch of race conditions that can produce weird errors and can be hard\nto track down.\n\n## Non Goals\n\nThis RFC only covers the problem of ensuring that a task/thread isn't operating\non a Tenant or Timeline. It does not cover what states, aside from Active and\nnon-Active, each Tenant and Timeline should have, or when exactly the transitions\nshould happen.\n\n## Impacted components (e.g. pageserver, safekeeper, console, etc)\n\nPageserver. Although I wonder if the safekeeper should have a similar mechanism.\n\n## Current situation\n\nMost pageserver tasks of are managed by task_mgr.rs:\n\n- LibpqEndpointListener\n- HttpEndPointListener\n- WalReceiverManager and -Connection\n- GarbageCollector and Compaction\n- InitialLogicalSizeCalculation\n\nIn addition to those tasks, the walreceiver performs some direct tokio::spawn\ncalls to spawn tasks that are not registered with 'task_mgr'. And all of these\ntasks can spawn extra operations with tokio spawn_blocking.\n\nWhenever a tenant or timeline is removed from the system, by pageserver\nshutdown, delete_timeline or tenant-detach operation, we rely on the task\nregistry in 'task_mgr.rs' to wait until there are no tasks operating on the\ntenant or timeline, before its Tenant/Timeline object is removed. That relies on\neach task to register itself with the tenant/timeline ID in\n'task_mgr.rs'. However, there are many gaps in that. For example,\nGarbageCollection and Compaction tasks are registered with the tenant, but when\nthey proceed to operate on a particular timeline of the tenant, they don't\nregister with timeline ID. Because of that, the timeline can be deleted while GC\nor compaction is running on it, causing failures in the GC or compaction (see\nhttps://github.com/neondatabase/neon/issues/2442).\n\nAnother problem is that the task registry only works for tokio Tasks. There is\nno way to register a piece of code that runs inside spawn_blocking(), for\nexample.\n\n## Proposed implementation\n\nThis \"voluntary\" registration of tasks is fragile. Let's use Rust language features\nto enforce that a tenant/timeline cannot be removed from the system when there is\nstill some code operating on it.\n\nLet's introduce new Guard objects for Tenant and Timeline, and do all actions through\nthe Guard object. Something like:\n\nTenantActiveGuard: Guard object over Arc<Tenant>. When you acquire the guard,\nthe code checks that the tenant is in Active state. If it's not, you get an\nerror. You can change the state of the tenant to Stopping while there are\nActiveTenantGuard objects still on it, to prevent new ActiveTenantGuards from\nbeing acquired, but the Tenant cannot be removed until all the guards are gone.\n\nTenantMaintenanceGuard: Like ActiveTenantGuard, but can be held even when the\ntenant is not in Active state. Used for operations like attach/detach. Perhaps\nallow only one such guard on a Tenant at a time.\n\nSimilarly for Timelines. We don't currently have a \"state\" on Timeline, but I think\nwe need at least two states: Active and Stopping. The Stopping state is used at\ndeletion, to prevent new TimelineActiveGuards from appearing, while you wait for\nexisting TimelineActiveGuards to die out.\n\nThe shutdown-signaling, using shutdown_watcher() and is_shutdown_requested(),\nprobably also needs changes to deal with the new Guards. The rule is that if you\nhave a TenantActiveGuard, and the tenant's state changes from Active to\nStopping, the is_shutdown_requested() function should return true, and\nshutdown_watcher() future should return.\n\nThis signaling doesn't necessarily need to cover all cases. For example, if you\nhave a block of code in spawn_blocking(), it might be acceptable if\nis_shutdown_requested() doesn't return true even though the tenant is in\nStopping state, as long as the code finishes reasonably fast.\n"
  },
  {
    "path": "docs/rfcs/020-pageserver-s3-coordination.md",
    "content": "# Coordinating access of multiple pageservers to the same s3 data\n\n## Motivation\n\nThere are some blind spots around coordinating access of multiple pageservers\nto the same s3 data. Currently this is applicable only to tenant relocation\ncase, but in the future we'll need to solve similar problems for\nreplica/standby pageservers.\n\n## Impacted components (e.g. pageserver, safekeeper, console, etc)\n\nPageserver\n\n## The problem\n\n### Relocation\n\nDuring relocation both pageservers can write to s3. This should be ok for all\ndata except the `index_part.json`. For index part it causes problems during\ncompaction/gc because they remove files from index/s3.\n\nImagine this case:\n\n```mermaid\nsequenceDiagram\n    autonumber\n    participant PS1\n    participant S3\n    participant PS2\n\n    PS1->>S3: Uploads L1, L2 <br/> Index contains L1 L2\n    PS2->>S3: Attach called, sees L1, L2\n    PS1->>S3: Compaction comes <br/> Removes L1, adds L3\n    note over S3: Index now L2, L3\n    PS2->>S3: Uploads new layer L4 <br/> (added to previous view of the index)\n    note over S3: Index now L1, L2, L4\n```\n\nAt this point it is not possible to restore from index, it contains L2 which\nis no longer available in s3 and doesn't contain L3 added by compaction by the\nfirst pageserver. So if any of the pageservers restart initial sync will fail\n(or in on-demand world it will fail a bit later during page request from\nmissing layer)\n\n### Standby pageserver\n\nAnother related case is standby pageserver. In this case second pageserver can\nbe used as a replica to scale reads and serve as a failover target in case\nfirst one fails.\n\nIn this mode second pageserver needs to have the same picture of s3 files to\nbe able to load layers on-demand. To accomplish that second pageserver\ncannot run gc/compaction jobs. Instead it needs to receive updates for index\ncontents. (There is no need to run walreceiver on the second pageserver then).\n\n## Observations\n\n- If both pageservers ingest wal then their layer set diverges, because layer\n  file generation is not deterministic\n- If one of the pageservers does not ingest wal (and just picks up layer\n  updates) then it lags behind and cannot really answer queries in the same\n  pace as the primary one\n- Can compaction help make layers deterministic? E g we do not upload level\n  zero layers and construction of higher levels should be deterministic.\n  This way we can guarantee that layer creation by timeout wont mess things up.\n  This way one pageserver uploads data and second one can just ingest it.\n  But we still need some form of election\n\n## Solutions\n\n### Manual orchestration\n\nOne possible solution for relocation case is to orchestrate background jobs\nfrom outside. The oracle who runs migration can turn off background jobs on\nPS1 before migration and then run migration -> enable them on PS2. The problem\ncomes if migration fails. In this case in order to resume background jobs\noracle needs to guarantee that PS2 doesn't run background jobs and if it doesn't\nrespond then PS1 is stuck unable to run compaction/gc. This cannot be solved\nwithout human ensuring that no upload from PS2 can happen. In order to be able\nto resolve this automatically CAS is required on S3 side so pageserver can\navoid overwriting index part if it is no longer the leading one\n\nNote that flag that disables background jobs needs to be persistent, because\notherwise pageserver restart will clean it\n\n### Avoid index_part.json\n\nIndex part consists of two parts, list of layers and metadata. List of layers\ncan be easily obtained by `ListObjects` S3 API method. But what to do with\nmetadata? Create metadata instance for each checkpoint and add some counter\nto the file name?\n\nBack to potentially long s3 ls.\n\n### Coordination based approach\n\nDo it like safekeepers chose leader for WAL upload. Ping each other and decide\nbased on some heuristics e g smallest node id. During relocation PS1 sends\n\"resign\" ping message so others can start election without waiting for a timeout.\n\nThis still leaves metadata question open and non deterministic layers are a\nproblem as well\n\n### Avoid metadata file\n\nOne way to eliminate metadata file is to store it in layer files under some\nspecial key. This may resonate with intention to keep all relation sizes in\nsome special segment to avoid initial download during size calculation.\nMaybe with that we can even store pre calculated value.\n\nAs a downside each checkpoint gets 512 bytes larger.\n\nIf we entirely avoid metadata file this opens up many approaches\n\n* * *\n\nDuring discussion it seems that we converged on the approach consisting of:\n\n- index files stored per pageserver in the same timeline directory. With that\n  index file name starts to look like: `<pageserver_node_id>_index_part.json`.\n  In such set up there are no concurrent overwrites of index file by different\n  pageservers.\n- For replica pageservers the solution would be for primary to broadcast index\n  changes to any followers with an ability to check index files in s3 and\n  restore the full state. To properly merge changes with index files we can use\n  a counter that is persisted in an index file, is incremented on every change\n  to it and passed along with broadcasted change. This way we can determine\n  whether we need to apply change to the index state or not.\n- Responsibility for running background jobs is assigned externally. Pageserver\n  keeps locally persistent flag for each tenant that indicates whether this\n  pageserver is considered as primary one or not. TODO what happens if we\n  crash and cannot start for some extended period of time? Control plane can\n  assign ownership to some other pageserver. Pageserver needs some way to check\n  if its still the blessed one. Maybe by explicit request to control plane on\n  start.\n\nRequirement for deterministic layer generation was considered overly strict\nbecause of two reasons:\n\n- It can limit possible optimizations e g when pageserver wants to reshuffle\n  some data locally and doesn't want to coordinate this\n- The deterministic algorithm itself can change so during deployments for some\n  time there will be two different version running at the same time which can\n  cause non determinism\n\n### External elections\n\nThe above case with lost state in this schema with externally managed\nleadership is represented like this:\n\nNote that here we keep objects list in the index file.\n\n```mermaid\nsequenceDiagram\n    autonumber\n    participant PS1\n    participant CP as Control Plane\n    participant S3\n    participant PS2\n\n    note over PS1,PS2: PS1 starts up and still a leader\n    PS1->>CP: Am I still the leader for Tenant X?\n    activate CP\n    CP->>PS1: Yes\n    deactivate CP\n    PS1->>S3: Fetch PS1 index.\n    note over PS1: Continue operations, start background jobs\n    note over PS1,PS2: PS1 starts up and still and is not a leader anymore\n    PS1->>CP: Am I still the leader for Tenant X?\n    CP->>PS1: No\n    PS1->>PS2: Subscribe to index changes\n    PS1->>S3: Fetch PS1 and PS2 indexes\n    note over PS1: Combine index file to include layers <br> from both indexes to be able <br> to see newer files from leader (PS2)\n    note over PS1: Continue operations, do not start background jobs\n```\n\n### Internal elections\n\nTo manage leadership internally we can use broker to exchange pings so nodes\ncan decide on the leader roles. In case multiple pageservers are active leader\nis the one with lowest node id.\n\nOperations with internally managed elections:\n\n```mermaid\nsequenceDiagram\n    autonumber\n    participant PS1\n    participant S3\n\n    note over PS1: Starts up\n    note over PS1: Subscribes to changes, waits for two ping <br> timeouts to see if there is a leader\n    PS1->>S3: Fetch indexes from s3\n    alt there is a leader\n        note over PS1: do not start background jobs, <br> continue applying index updates\n    else there is no leader\n        note over PS1: start background jobs, <br> broadcast index changes\n    end\n\n    note over PS1,S3: Then the picture is similar to external elections <br> the difference is that follower can become a leader <br> if there are no pings after some timeout new leader gets elected\n```\n\n### Eviction\n\nWhen two pageservers operate on a tenant for extended period of time follower\ndoesn't perform write operations in s3. When layer is evicted follower relies\non updates from primary to get info about layers it needs to cover range for\nevicted layer.\n\nNote that it wont match evicted layer exactly, so layers will overlap and\nlookup code needs to correctly handle that.\n\n### Relocation flow\n\nActions become:\n\n- Attach tenant to new pageserver\n- New pageserver becomes follower since previous one is still leading\n- New pageserver starts replicating from safekeepers but does not upload layers\n- Detach is called on the old one\n- New pageserver becomes leader after it realizes that old one disappeared\n\n### Index File\n\nUsing `s3 ls` on startup simplifies things, but we still need metadata, so we\nneed to fetch index files anyway. If they contain list of files we can combine\nthem and avoid costly `s3 ls`\n\n### Remaining issues\n\n- More than one remote consistent lsn for safekeepers to know\n\nAnything else?\n\n### Proposed solution\n\nTo recap. On meeting we converged on approach with external elections but I\nthink it will be overall harder to manage and will introduce a dependency on\ncontrol plane for pageserver. Using separate index files for each pageserver\nconsisting of log of operations and a metadata snapshot should be enough.\n\n### What we need to get there?\n\n- Change index file structure to contain log of changes instead of just the\n  file list\n- Implement pinging/elections for pageservers\n"
  },
  {
    "path": "docs/rfcs/021-metering.md",
    "content": "# Consumption tracking\n\n\n# Goals\n\nThis proposal is made with two mostly but not entirely overlapping goals:\n\n* Collect info that is needed for consumption-based billing\n* Cross-check AWS bills\n\n\n# Metrics\n\nThere are six metrics to collect:\n\n* CPU time. Wall clock seconds * the current number of cores. We have a fixed ratio of memory to cores, so the current memory size is the function of the number of cores. Measured per each `endpoint`.\n\n* Traffic. In/out traffic on the proxy. Measured per each `endpoint`.\n\n* Written size. Amount of data we write. That is different from both traffic and storage size, as only during the writing we\n\n  a) occupy some disk bandwidth on safekeepers\n\n  b) necessarily cross AZ boundaries delivering WAL to all safekeepers\n\n  Each timeline/branch has at most one writer, so the data is collected per branch.\n\n* Synthetic storage size. That is what is exposed now with pageserver's `/v1/tenant/{}/size`. Looks like now it is per-tenant. (Side note: can we make it per branch to show as branch physical size in UI?)\n\n* Real storage size. That is the size of the tenant directory on the pageservers disk. Per-tenant.\n\n* S3 storage size. That is the size of the tenant data on S3. Per-tenant.\n\nThat info should be enough to build an internal model that predicts AWS price (hence tracking `written data` and `real storage size`). As for the billing model we probably can get away with mentioning only `CPU time`, `synthetic storage size`, and `traffic` consumption.\n\n# Services participating in metrics collection\n\n## Proxy\n\nFor actual implementation details check `/docs/consumption_metrics.md`\n\nProxy is the only place that knows about traffic flow, so it tracks it and reports it with quite a small interval, let's say 1 minute. A small interval is needed here since the proxy is stateless, and any restart will reset accumulated consumption. Also proxy should report deltas since the last report, not an absolute value of the counter. Such kind of events is easier to integrate over a period of time to get the amount of traffic during some time interval.\n\nExample event:\n\n```json\n{\n\"metric\": \"proxy_io_bytes_per_client\",\n\"type\": \"incremental\",\n\"start_time\": \"2022-12-28T11:07:19.317310284Z\",\n\"stop_time\": \"2022-12-28T11:07:19.317310284Z\",\n\"idempotency_key\": \"2022-12-28 11:07:19.317310324 UTC-1-4019\",\n\"value\": 12345454,\n\"endpoint_id\": \"5d07d9ce9237c4cd845ea7918c0afa7d\",\n}\n```\n\nSince we report deltas over some period of time, it makes sense to include `event_start_time`/`event_stop_time` where `event_start_time` is the time of the previous report. That will allow us to identify metering gaps better (e.g., failed send/delivery).\n\nWhen there is no active connection proxy can avoid reporting anything. Also, deltas are additive, so several console instances serving the same user and endpoint can report traffic without coordination.\n\n## Console\n\nThe console knows about start/stop events, so it knows the amount of CPU time allocated to each endpoint. It also knows about operation successes and failures and can avoid billing clients after unsuccessful 'suspend' events. The console doesn't know the current compute size within the allowed limits on the endpoint. So with CPU time, we do the following:\n\n* While we don't yet have the autoscaling console can report `cpu time` as the number of seconds since the last `start_compute` event.\n\n* When we have autoscaling, `autoscaler-agent` can report `cpu time`*`compute_units_count` in the same increments as the proxy reports traffic.\n\nExample event:\n\n```json\n{\n    \"metric\": \"effective_compute_seconds\",\n    \"type\": \"increment\",\n    \"endpoint_id\": \"blazing-warrior-34\",\n    \"event_start_time\": ...,\n    \"event_stop_time\": ...,\n    \"value\": 12345454,\n}\n```\n\nI'd also suggest reporting one value, `cpu time`*`compute_units_count`, instead of two separate fields as it makes event schema simpler (it is possible to treat it the same way as traffic) and preserves additivity.\n\n## Pageserver\n\nFor actual implementation details check `/docs/consumption_metrics.md`\n\nPageserver knows / has access to / can calculate the rest of the metrics:\n\n* Written size -- that is basically `last_received_lsn`,\n* Synthetic storage size -- there is a way to calculate it, albeit a costly one,\n* Real storage size -- there is a way to calculate it using a layer map or filesystem,\n* S3 storage size -- can calculate it by S3 API calls\n\nSome of those metrics are expensive to calculate, so the reporting period here is driven mainly by implementation details. We can set it to, for example, once per hour. Not a big deal since the pageserver is stateful, and all metrics can be reported as an absolute value, not increments. At the same time, a smaller reporting period improves UX, so it would be good to have something more real-time.\n\n`written size` is primarily a safekeeper-related metric, but since it is available on both pageserver and safekeeper, we can avoid reporting anything from the safekeeper.\n\nExample event:\n\n```json\n{\n\"metric\": \"remote_storage_size\",\n\"type\": \"absolute\",\n\"time\": \"2022-12-28T11:07:19.317310284Z\",\n\"idempotency_key\": \"2022-12-28 11:07:19.317310324 UTC-1-4019\",\n\"value\": 12345454,\n\"tenant_id\": \"5d07d9ce9237c4cd845ea7918c0afa7d\",\n\"timeline_id\": \"a03ebb4f5922a1c56ff7485cc8854143\",\n}\n```\n\n# Data collection\n\n## Push vs. pull\n\nWe already have pull-based Prometheus metrics, so it is tempting to use them here too. However, in our setup, it is hard to tell when some metric changes. For example, garbage collection will constantly free some disk space over a week, even if the project is down for that week. We could also iterate through all existing tenants/branches/endpoints, but that means some amount of code to do that properly and most likely we will end up with some per-metric hacks in the collector to cut out some of the tenants that are surely not changing that metric.\n\nWith the push model, it is easier to publish data only about actively changing metrics -- pageserver knows when it performs s3 offloads, garbage collection and starts/stops consuming data from the safekeeper; proxy knows about connected clients; console / autoscaler-agent knows about active cpu time.\n\nHence, let's go with a push-based model.\n\n## Common bus vs. proxying through the console\n\nWe can implement such push systems in a few ways:\n\na. Each component pushes its metrics to the \"common bus\", namely segment, Kafka, or something similar. That approach scales well, but it would be harder to test it locally, will introduce new dependencies, we will have to distribute secrets for that connection to all of the components, etc. We would also have to loop back some of the events and their aggregates to the console, as we want to show some that metrics to the user in real-time.\n\nb. Each component can call HTTP `POST` with its events to the console, and the console can forward it to the segment for later integration with metronome / orb / onebill / etc. With that approach, only the console has to speak with segment. Also since that data passes through the console, the console can save the latest metrics values, so there is no need for constant feedback of that events back from the segment.\n\n# Implementation\n\nEach (proxy|pageserver|autoscaler-agent) sends consumption events to the single endpoint in the console:\n\n```json\nPOST /usage_events HTTP/1.1\nContent-Type: application/json\n\n[\n{\n\"metric\": \"remote_storage_size\",\n\"type\": \"absolute\",\n\"time\": \"2022-12-28T11:07:19.317310284Z\",\n\"idempotency_key\": \"2022-12-28 11:07:19.317310324 UTC-1-4019\",\n\"value\": 12345454,\n\"tenant_id\": \"5d07d9ce9237c4cd845ea7918c0afa7d\",\n\"timeline_id\": \"a03ebb4f5922a1c56ff7485cc8854143\",\n},\n...\n]\n```\n\n![data flow](./images/metering.jpg)\n\nEvents could be either:\n* `incremental` -- change in consumption since the previous event or service restart. That is `effective_cpu_seconds`, `traffic_in_bytes`, and `traffic_out_bytes`.\n* `absolute` -- that is the current value of a metric. All of the size-related metrics are absolute.\n\nEach service can post events at its own pace and bundle together data from different tenants/endpoints.\n\nThe console algorithm upon receive of events could be the following:\n\n1. Create and send a segment event with the same content (possibly enriching it with tenant/timeline data for endpoint-based events).\n2. Update the latest state of per-tenant and per-endpoint metrics in the database.\n3. Check whether any of that metrics is above the allowed threshold and stop the project if necessary.\n\nSince all the data comes in batches, we can do the batch update to reduce the number of queries in the database. Proxy traffic is probably the most frequent metric, so with batching, we will have extra `number_of_proxies` requests to the database each minute. This is most likely fine for now but will generate many dead tuples in the console database. If that is the case, we can change step 2 to the following:\n\n2.1. Check if there $tenant_$metric / $endpoint_$metric key in Redis\n\n2.2. If no stored value is found and the metric is incremental, then fetch the current value from DWH (which keeps aggregated value for all the events) and publish it.\n\n2.3. Publish a new value (absolute metric) or add an increment to the stored value (incremental metric)\n\n## Consumption watchdog\n\nSince all the data goes through the console, we don't have to run any background thread/coroutines to check whether consumption is within the allowed limits. We only change consumption with `POST /usage_events`, so limit checks could be applied in the same handler.\n\n## Extensibility\n\nIf we need to add a new metric (e.g. s3 traffic or something else), the console code should, by default, process it and publish segment event, even if the metric name is unknown to the console.\n\n## Naming & schema\n\nEach metric name should end up with units -- now `_seconds` and `_bytes`, and segment event should always have `tenant_id` and `timeline_id`/`endpoint_id` where applicable.\n"
  },
  {
    "path": "docs/rfcs/022-pageserver-delete-from-s3.md",
    "content": "# Deleting pageserver part of tenants data from s3\n\nCreated on 08.03.23\n\n## Motivation\n\nCurrently we don't delete pageserver part of the data from s3 when project is deleted. (The same is true for safekeepers, but this outside of the scope of this RFC).\n\nThis RFC aims to spin a discussion to come to a robust deletion solution that wont put us in into a corner for features like postponed deletion (when we keep data for user to be able to restore a project if it was deleted by accident)\n\n## Summary\n\nTLDR; There are two options, one based on control plane issuing actual delete requests to s3 and the other one that keeps s3 stuff bound to pageserver. Each one has its pros and cons.\n\nThe decision is to stick with pageserver centric approach. For motivation see [Decision](#decision).\n\n## Components\n\npageserver, control-plane\n\n## Requirements\n\nDeletion should successfully finish (eventually) without leaving dangling files in presense of:\n\n- component restarts\n- component outage\n- pageserver loss\n\n## Proposed implementation\n\nBefore the options are discussed, note that deletion can be quite long process. For deletion from s3 the obvious choice is [DeleteObjects](https://docs.aws.amazon.com/AmazonS3/latest/API/API_DeleteObjects.html) API call. It allows to batch deletion of up to 1k objects in one API call. So deletion operation linearly depends on number of layer files.\n\nAnother design limitation is that there is no cheap `mv` operation available for s3. `mv` from `aws s3 mv` uses `copy(src, dst) + delete(src)`. So `mv`-like operation is not feasible as a building block because it actually amplifies the problem with both duration and resulting cost of the operation.\n\nThe case when there are multiple pageservers handling the same tenants is largely out of scope of the RFC. We still consider case with migration from one PS to another, but do not consider case when tenant exists on multiple pageservers for extended period of time. The case with multiple pageservers can be reduced to case with one pageservers by calling detach on all pageservers except the last one, for it actual delete needs to be called.\n\nFor simplicity lets look into deleting tenants. Differences in deletion process between tenants and timelines are mentioned in paragraph [\"Differences between tenants and timelines\"](#differences-between-tenants-and-timelines)\n\n### 1. Pageserver owns deletion machinery\n\n#### The sequence\n\nTLDR; With this approach control plane needs to call delete on a tenant and poll for progress. As much as possible is handled on pageserver. Lets see the sequence.\n\nHappy path:\n\n```mermaid\nsequenceDiagram\n    autonumber\n    participant CP as Control Plane\n    participant PS as Pageserver\n    participant S3\n\n    CP->>PS: Delete tenant\n    PS->>S3: Create deleted mark file at <br> /tenant/meta/deleted\n    PS->>PS: Create deleted mark file locally\n    PS->>CP: Accepted\n    PS->>PS: delete local files other than deleted mark\n    loop Delete layers for each timeline\n        PS->>S3: delete(..)\n        CP->>PS: Finished?\n        PS->>CP: False\n    end\n    PS->>S3: Delete mark file\n    PS->>PS: Delete local mark file\n\n    loop Poll for status\n        CP->>PS: Finished?\n        PS->>CP: True or False\n    end\n```\n\nWhy two mark files?\nRemote one is needed for cases when pageserver is lost during deletion so other pageserver can learn the deletion from s3 during attach.\n\nWhy local mark file is needed?\n\nIf we don't have one, we have two choices, delete local data before deleting the remote part or do that after.\n\nIf we delete local data before remote then during restart pageserver wont pick up remote tenant at all because nothing is available locally (pageserver looks for remote counterparts of locally available tenants).\n\nIf we delete local data after remote then at the end of the sequence when remote mark file is deleted if pageserver restart happens then the state is the same to situation when pageserver just missing data on remote without knowing the fact that this data is intended to be deleted. In this case the current behavior is upload everything local-only to remote.\n\nThus we need local record of tenant being deleted as well.\n\n##### Handle pageserver crashes\n\nLets explore sequences with various crash points.\n\nPageserver crashes before `deleted` mark file is persisted in s3:\n\n```mermaid\nsequenceDiagram\n    autonumber\n    participant CP as Control Plane\n    participant PS as Pageserver\n    participant S3\n\n    CP->>PS: Delete tenant\n    note over PS: Crash point 1.\n    CP->>PS: Retry delete request\n\n    PS->>S3: Create deleted mark file at <br> /tenant/meta/deleted\n    PS->>PS: Create deleted mark file locally\n\n    PS->>CP: Accepted\n\n    PS->>PS: delete local files other than deleted mark\n\n    loop Delete layers for each timeline\n        PS->>S3: delete(..)\n        CP->>PS: Finished?\n        PS->>CP: False\n    end\n    PS->>S3: Delete mark file\n    PS->>PS: Delete local mark file\n\n    CP->>PS: Finished?\n    PS->>CP: True\n```\n\nPageserver crashed when deleted mark was about to be persisted in s3, before Control Plane gets a response:\n\n```mermaid\nsequenceDiagram\n    autonumber\n    participant CP as Control Plane\n    participant PS as Pageserver\n    participant S3\n\n    CP->>PS: Delete tenant\n    PS->>S3: Create deleted mark file at <br> /tenant/meta/deleted\n\n    note over PS: Crash point 2.\n    note over PS: During startup we reconcile <br> with remote and see <br> whether the remote mark exists\n    alt Remote mark exists\n        PS->>PS: create local mark if its missing\n        PS->>PS: delete local files other than deleted mark\n        loop Delete layers for each timeline\n            PS->>S3: delete(..)\n        end\n\n        note over CP: Eventually console should <br> retry delete request\n\n        CP->>PS: Retry delete tenant\n        PS->>CP: Not modified\n    else Mark is missing\n        note over PS: Continue to operate the tenant as if deletion didn't happen\n\n        note over CP: Eventually console should <br> retry delete request\n\n        CP->>PS: Retry delete tenant\n        PS->>S3: Create deleted mark file at <br> /tenant/meta/deleted\n        PS->>CP: Delete tenant\n    end\n\n    PS->>PS: Continue with layer file deletions\n    loop Delete layers for each timeline\n        PS->>S3: delete(..)\n        CP->>PS: Finished?\n        PS->>CP: False\n    end\n\n    PS->>S3: Delete mark file\n    PS->>PS: Delete local mark file\n\n    CP->>PS: Finished?\n    PS->>CP: True\n```\n\nSimilar sequence applies when both local and remote marks were persisted but Control Plane still didn't receive a response.\n\nIf pageserver crashes after both mark files were deleted then it will reply to control plane status poll request with 404 which should be treated by control plane as success.\n\nThe same applies if pageserver crashes in the end, when remote mark is deleted but before local one gets deleted. In this case on restart pageserver moves forward with deletion of local mark and Control Plane will receive 404.\n\n##### Differences between tenants and timelines\n\nFor timeline the sequence is the same with the following differences:\n\n- remote delete mark file can be replaced with a boolean \"deleted\" flag in index_part.json\n- local deletion mark is not needed, because whole tenant is kept locally so situation described in motivation for local mark is impossible\n\n##### Handle pageserver loss\n\nIf pageseserver is lost then the deleted tenant should be attached to different pageserver and delete request needs to be retried against new pageserver. Then attach logic is shared with one described for pageserver restarts (local deletion mark wont be available so needs to be created).\n\n##### Restrictions for tenant that is in progress of being deleted\n\nI propose to add another state to tenant/timeline - PendingDelete. This state shouldn't allow executing any operations aside from polling the deletion status.\n\n#### Summary\n\nPros:\n\n- Storage is not dependent on control plane. Storage can be restarted even if control plane is not working.\n- Allows for easier dogfooding, console can use Neon backed database as primary operational data store. If storage depends on control plane and control plane depends on storage we're stuck.\n- No need to share inner s3 workings with control plane. Pageserver presents api contract and S3 paths are not part of this contract.\n- No need to pass list of alive timelines to attach call. This will be solved by pageserver observing deleted flag. See\n\nCons:\n\n- Logic is a tricky, needs good testing\n- Anything else?\n\n### 2. Control plane owns deletion machinery\n\nIn this case the only action performed on pageserver is removal of local files.\n\nEverything else is done by control plane. The steps are as follows:\n\n1. Control plane marks tenant as \"delete pending\" in its database\n2. It lists the s3 for all the files and repeatedly calls delete until nothing is left behind\n3. When no files are left marks deletion as completed\n\nIn case of restart it selects all tenants marked as \"delete pending\" and continues the deletion.\n\nFor tenants it is simple. For timelines there are caveats.\n\nAssume that the same workflow is used for timelines.\n\nIf a tenant gets relocated during timeline deletion the attach call with its current logic will pick up deleted timeline in its half deleted state.\n\nAvailable options:\n\n- require list of alive timelines to be passed to attach call\n- use the same schema with flag in index_part.json (again part of the caveats around pageserver restart applies). In this case nothing stops pageserver from implementing deletion inside if we already have these deletion marks.\n\nWith first option the following problem becomes apparent:\n\nWho is the source of truth regarding timeline liveness?\n\nImagine:\nPS1 fails.\nPS2 gets assigned the tenant.\nNew branch gets created\nPS1 starts up (is it possible or we just recycle it?)\nPS1 is unaware of the new branch. It can either fall back to s3 ls, or ask control plane.\n\nSo here comes the dependency of storage on control plane. During restart storage needs to know which timelines are valid for operation. If there is nothing on s3 that can answer that question storage needs to ask control plane.\n\n### Summary\n\nCons:\n\n- Potential thundering herd-like problem during storage restart (requests to control plane)\n- Potential increase in storage startup time (additional request to control plane)\n- Storage startup starts to depend on console\n- Erroneous attach call can attach tenant in half deleted state\n\nPros:\n\n- Easier to reason about if you don't have to account for pageserver restarts\n\n### Extra notes\n\nThere was a concern that having deletion code in pageserver is a littlebit scary, but we need to have this code somewhere. So to me it is equally scary to have that in whatever place it ends up at.\n\nDelayed deletion can be done with both approaches. As discussed with Anna (@stepashka) this is only relevant for tenants (projects) not for timelines. For first approach detach can be called immediately and deletion can be done later with attach + delete. With second approach control plane needs to start the deletion whenever necessary.\n\n## Decision\n\nAfter discussion in comments I see that we settled on two options (though a bit different from ones described in rfc). First one is the same - pageserver owns as much as possible. The second option is that pageserver owns markers thing, but actual deletion happens in control plane by repeatedly calling ls + delete.\n\nTo my mind the only benefit of the latter approach is possible code reuse between safekeepers and pageservers. Otherwise poking around integrating s3 library into control plane, configuring shared knowledge about paths in s3 - are the downsides. Another downside of relying on control plane is the testing process. Control plane resides in different repository so it is quite hard to test pageserver related changes there. e2e test suite there doesn't support shutting down pageservers, which are separate docker containers there instead of just processes.\n\nWith pageserver owning everything we still give the retry logic to control plane but its easier to duplicate if needed compared to sharing inner s3 workings. We will have needed tests for retry logic in neon repo.\n\nSo the decision is to proceed with pageserver centric approach.\n"
  },
  {
    "path": "docs/rfcs/023-the-state-of-pageserver-tenant-relocation.md",
    "content": "# The state of pageserver tenant relocation\n\nCreated on 17.03.23\n\n## Motivation\n\nThere were previous write ups on the subject. The design of tenant relocation was planned at the time when we had quite different landscape. I e there was no on-demand download/eviction. They were on the horizon but we still planned for cases when they were not available. Some other things have changed. Now safekeepers offload wal to s3 so we're not risking overflowing their disks. Having all of the above, it makes sense to recap and take a look at the options we have now, which adjustments we'd like to make to original process, etc.\n\nRelated (in chronological order):\n\n- Tracking issue with initial discussion: [#886](https://github.com/neondatabase/neon/issues/886)\n- [015. Storage Messaging](015-storage-messaging.md)\n- [020. Pageserver S3 Coordination](020-pageserver-s3-coordination.md)\n\n## Summary\n\nThe RFC consists of a walkthrough of prior art on tenant relocation and corresponding problems. It describes 3 approaches.\n\n1. Simplistic approach that uses ignore and is the fastest to implement. The main downside is a requirement of short downtime.\n2. More complicated approach that avoids even short downtime.\n3. Even more complicated approach that will allow multiple pageservers to operate concurrently on the same tenant possibly allowing for HA cluster topologies and horizontal scaling of reads (i e compute talks to multiple pageservers).\n\nThe order in which solutions are described is a bit different. We start from 2, then move to possible compromises (aka simplistic approach) and then move to discussing directions for solving HA/Pageserver replica case with 3.\n\n## Components\n\npageserver, control-plane, safekeepers (a bit)\n\n## Requirements\n\nRelocation procedure should move tenant from one pageserver to another without downtime introduced by storage side. For now restarting compute for applying new configuration is fine.\n\n- component restarts\n- component outage\n- pageserver loss\n\n## The original proposed implementation\n\nThe starting point is this sequence:\n\n```mermaid\nsequenceDiagram\n    autonumber\n    participant CP as Control Plane\n    participant PS1 as Pageserver 1\n    participant PS2 as Pageserver 2\n    participant S3\n\n    CP->>PS2: Attach tenant X\n    PS2->>S3: Fetch timelines, indexes for them\n    PS2->>CP: Accepted\n    CP->>CP: Change pageserver id in project\n    CP->>PS1: Detach\n```\n\nWhich problems do we have with naive approach?\n\n### Concurrent GC and Compaction\n\nThe problem is that they can run on both, PS1 and PS2. Consider this example from [Pageserver S3 Coordination RFC](020-pageserver-s3-coordination.md)\n\n```mermaid\nsequenceDiagram\n    autonumber\n    participant PS1\n    participant S3\n    participant PS2\n\n    PS1->>S3: Uploads L1, L2 <br/> Index contains L1 L2\n    PS2->>S3: Attach called, sees L1, L2\n    PS1->>S3: Compaction comes <br/> Removes L1, adds L3\n    note over S3: Index now L2, L3\n    PS2->>S3: Uploads new layer L4 <br/> (added to previous view of the index)\n    note over S3: Index now L1, L2, L4\n```\n\nAt this point it is not possible to restore the state from index, it contains L2 which\nis no longer available in s3 and doesn't contain L3 added by compaction by the\nfirst pageserver. So if any of the pageservers restart, initial sync will fail\n(or in on-demand world it will fail a bit later during page request from\nmissing layer)\n\nThe problem lies in shared index_part.json. Having intersecting layers from append only edits is expected to work, though this is an uncharted territory without tests.\n\n#### Options\n\nThere are several options on how to restrict concurrent access to index file.\n\nFirst and the simplest one is external orchestration. Control plane which runs migration can use special api call on pageserver to stop background processes (gc, compaction), and even possibly all uploads.\n\nSo the sequence becomes:\n\n```mermaid\nsequenceDiagram\n    autonumber\n    participant CP as Control Plane\n    participant PS1 as Pageserver 1\n    participant PS2 as Pageserver 2\n    participant S3\n\n    CP->>PS1: Pause background jobs, pause uploading new layers.\n    CP->>PS2: Attach tenant X.\n    PS2->>S3: Fetch timelines, index, start background operations\n    PS2->>CP: Accepted\n    CP->>CP: Monitor PS2 last record lsn, ensure OK lag\n    CP->>CP: Change pageserver id in project\n    CP->>PS1: Detach\n```\n\nThe downside of this sequence is the potential rollback process. What if something goes wrong on new pageserver? Can we safely roll back to source pageserver?\n\nThere are two questions:\n\n#### How can we detect that something went wrong?\n\nWe can run usual availability check (consists of compute startup and an update of one row).\nNote that we cant run separate compute for that before touching compute that client runs actual workload on, because we cant have two simultaneous computes running in read-write mode on the same timeline (enforced by safekeepers consensus algorithm). So we can either run some readonly check first (basebackup) and then change pageserver id and run availability check. If it failed we can roll it back to the old one.\n\n#### What can go wrong? And how we can safely roll-back?\n\nIn the sequence above during attach we start background processes/uploads. They change state in remote storage so it is possible that after rollback remote state will be different from one that was observed by source pageserver. So if target pageserver goes wild then source pageserver may fail to start with changed remote state.\n\nProposed option would be to implement a barrier (read-only) mode when pageserver does not update remote state.\n\nSo the sequence for happy path becomes this one:\n\n```mermaid\nsequenceDiagram\n    autonumber\n    participant CP as Control Plane\n    participant PS1 as Pageserver 1\n    participant PS2 as Pageserver 2\n    participant S3\n\n    CP->>PS1: Pause background jobs, pause uploading new layers.\n    CP->>PS2: Attach tenant X in remote readonly mode.\n    PS2->>S3: Fetch timelines, index\n    PS2->>CP: Accepted\n    CP->>CP: Monitor PS2 last record lsn, ensure OK lag\n    CP->>CP: Change pageserver id in project\n    CP->>CP: Run successful availability check\n    CP->>PS2: Start uploads, background tasks\n    CP->>PS1: Detach\n```\n\nWith this sequence we restrict any changes to remote storage to one pageserver. So there is no concurrent access at all, not only for index_part.json, but for everything else too. This approach makes it possible to roll back after failure on new pageserver.\n\nThe sequence with roll back process:\n\n```mermaid\nsequenceDiagram\n    autonumber\n    participant CP as Control Plane\n    participant PS1 as Pageserver 1\n    participant PS2 as Pageserver 2\n    participant S3\n\n    CP->>PS1: Pause background jobs, pause uploading new layers.\n    CP->>PS2: Attach tenant X in remote readonly mode.\n    PS2->>S3: Fetch timelines, index\n    PS2->>CP: Accepted\n    CP->>CP: Monitor PS2 last record lsn, ensure OK lag\n    CP->>CP: Change pageserver id in project\n    CP->>CP: Availability check Failed\n    CP->>CP: Change pageserver id back\n    CP->>PS1: Resume remote operations\n    CP->>PS2: Ignore (instead of detach for investigation purposes)\n```\n\n## Concurrent branch creation\n\nAnother problem is a possibility of concurrent branch creation calls.\n\nI e during migration create_branch can be called on old pageserver and newly created branch wont be seen on new pageserver. Prior art includes prototyping an approach of trying to mirror such branches, but currently it lost its importance, because now attach is fast because we don't need to download all data, and additionally to the best of my knowledge of control plane internals (cc @ololobus to confirm) operations on one project are executed sequentially, so it is not possible to have such case. So branch create operation will be executed only when relocation is completed. As a safety measure we can forbid branch creation for tenants that are in readonly remote state.\n\n## Simplistic approach\n\nThe difference of simplistic approach from one described above is that it calls ignore on source tenant first and then calls attach on target pageserver. Approach above does it in opposite order thus opening a possibility for race conditions we strive to avoid.\n\nThe approach largely follows this guide: <https://www.notion.so/neondatabase/Cloud-Ad-hoc-tenant-relocation-f687474f7bfc42269e6214e3acba25c7>\n\nThe happy path sequence:\n\n```mermaid\nsequenceDiagram\n    autonumber\n    participant CP as Control Plane\n    participant PS1 as Pageserver 1\n    participant PS2 as Pageserver 2\n    participant SK as Safekeeper\n    participant S3\n\n    CP->>CP: Enable maintenance mode\n    CP->>PS1: Ignore\n    CP->>PS2: Attach\n    PS2->>CP: Accepted\n    loop Delete layers for each timeline\n        CP->>PS2: Get last record lsn\n        CP->>SK: Get commit lsn\n        CP->>CP: OK? Timed out?\n    end\n    CP->>CP: Change pageserver id in project\n    CP->>CP: Run successful availability check\n    CP->>CP: Disable maintenance mode\n    CP->>PS1: Detach ignored\n```\n\nThe sequence contains exactly the same rollback problems as in previous approach described above. They can be resolved the same way.\n\nMost probably we'd like to move forward without this safety measure and implement it on top of this approach to make progress towards the downtime-less one.\n\n## Lease based approach\n\nIn order to allow for concurrent operation on the same data on remote storage for multiple pageservers we need to go further than external orchestration.\n\nNOTE: [020. Pageserver S3 Coordination](020-pageserver-s3-coordination.md) discusses one more approach that relies on duplication of index_part.json for each pageserver operating on the timeline. This approach still requires external coordination which makes certain things easier but requires additional bookkeeping to account for multiple index_part.json files. Discussion/comparison with proposed lease based approach\n\nThe problems are outlined in [020. Pageserver S3 Coordination](020-pageserver-s3-coordination.md) and suggested solution includes [Coordination based approach](020-pageserver-s3-coordination.md#coordination-based-approach). This way it will allow to do basic leader election for pageservers so they can decide which node will be responsible for running GC and compaction. The process is based on extensive communication via storage broker and consists of a lease that is taken by one of the pageservers that extends it to continue serving a leader role.\n\nThere are two options for ingesting new data into pageserver in follower role. One option is to avoid WAL ingestion at all and rely on notifications from leader to discover new layers on s3. Main downside of this approach is that follower will always lag behind the primary node because it wont have the last layer until it is uploaded to remote storage. In case of a primary failure follower will be required to reingest last segment (up to 256Mb of WAL currently) which slows down recovery. Additionally if compute is connected to follower pageserver it will observe latest data with a delay. Queries from compute will likely experience bigger delays when recent lsn is required.\n\nThe second option is to consume WAL stream on both pageservers. In this case the only problem is non deterministic layer generation. Additional bookkeeping will be required to deduplicate layers from primary with local ones. Some process needs to somehow merge them to remove duplicated data. Additionally we need to have good testing coverage to ensure that our implementation of `get_page@lsn` properly handles intersecting layers.\n\nThere is another tradeoff. Approaches may be different in amount of traffic between system components. With first approach there can be increased traffic between follower and remote storage. But only in case follower has some activity that actually requests pages (!). With other approach traffic increase will be permanent and will be caused by two WAL streams instead of one.\n\n## Summary\n\nProposed implementation strategy:\n\nGo with the simplest approach for now. Then work on tech debt, increase test coverage. Then gradually move forward to second approach by implementing safety measures first, finishing with switch of order between ignore and attach operation.\n\nAnd only then go to lease based approach to solve HA/Pageserver replica use cases.\n"
  },
  {
    "path": "docs/rfcs/024-extension-loading.md",
    "content": "# Supporting custom user Extensions (Dynamic Extension Loading)\nCreated 2023-05-03\n\n## Motivation\n\nThere are many extensions in the PostgreSQL ecosystem, and not all extensions\nare of a quality that we can confidently support them. Additionally, our\ncurrent extension inclusion mechanism has several problems because we build all\nextensions into the primary Compute image: We build the extensions every time\nwe build the compute image regardless of whether we actually need to rebuild\nthe image, and the inclusion of these extensions in the image adds a hard\ndependency on all supported extensions - thus increasing the image size, and\nwith it the time it takes to download that image - increasing first start\nlatency.\n\nThis RFC proposes a dynamic loading mechanism that solves most of these\nproblems.\n\n## Summary\n\n`compute_ctl` is made responsible for loading extensions on-demand into\nthe container's file system for dynamically loaded extensions, and will also\nmake sure that the extensions in `shared_preload_libraries` are downloaded\nbefore the compute node starts.\n\n## Components\n\ncompute_ctl, PostgreSQL, neon (extension), Compute Host Node, Extension Store\n\n## Requirements\n\nCompute nodes with no extra extensions should not be negatively impacted by\nthe existence of support for many extensions.\n\nInstalling an extension into PostgreSQL should be easy.\n\nNon-preloaded extensions shouldn't impact startup latency.\n\nUninstalled extensions shouldn't impact query latency.\n\nA small latency penalty for dynamically loaded extensions is acceptable in\nthe first seconds of compute startup, but not in steady-state operations.\n\n## Proposed implementation\n\n### On-demand, JIT-loading of extensions\n\nBefore postgres starts we download \n- control files for all extensions available to that compute node;\n- all `shared_preload_libraries`;\n\nAfter postgres is running, `compute_ctl` listens for requests to load files.\nWhen PostgreSQL requests a file, `compute_ctl` downloads it.\n\nPostgreSQL requests files in the following cases:\n- When loading a preload library set in `local_preload_libraries`\n- When explicitly loading a library with `LOAD`\n- When creating extension with `CREATE EXTENSION` (download sql scripts, (optional) extension data files and (optional) library files)))\n\n\n#### Summary\n\nPros:\n - Startup is only as slow as it takes to load all (shared_)preload_libraries\n - Supports BYO Extension\n\nCons:\n - O(sizeof(extensions)) IO requirement for loading all extensions.\n\n### Alternative solutions\n\n1. Allow users to add their extensions to the base image\n   \n   Pros:\n    - Easy to deploy\n\n   Cons:\n    - Doesn't scale - first start size is dependent on image size;\n    - All extensions are shared across all users: It doesn't allow users to\n      bring their own restrictive-licensed extensions\n\n2. Bring Your Own compute image\n   \n   Pros:\n    - Still easy to deploy\n    - User can bring own patched version of PostgreSQL\n\n   Cons:\n    - First start latency is O(sizeof(extensions image))\n    - Warm instance pool for skipping pod schedule latency is not feasible with\n      O(n) custom images\n    - Support channels are difficult to manage\n\n3. Download all user extensions in bulk on compute start\n   \n   Pros:\n    - Easy to deploy\n    - No startup latency issues for \"clean\" users.\n    - Warm instance pool for skipping pod schedule latency is possible\n\n   Cons:\n    - Downloading all extensions in advance takes a lot of time, thus startup\n      latency issues\n\n4. Store user's extensions in persistent storage\n   \n   Pros:\n    - Easy to deploy\n    - No startup latency issues\n    - Warm instance pool for skipping pod schedule latency is possible\n\n   Cons:\n    - EC2 instances have only limited number of attachments shared between EBS\n      volumes, direct-attached NVMe drives, and ENIs.\n    - Compute instance migration isn't trivially solved for EBS mounts (e.g.\n      the device is unavailable whilst moving the mount between instances).\n    - EBS can only mount on one instance at a time (except the expensive IO2\n      device type).\n\n5. Store user's extensions in network drive\n   \n   Pros:\n    - Easy to deploy\n    - Few startup latency issues\n    - Warm instance pool for skipping pod schedule latency is possible\n\n   Cons:\n    - We'd need networked drives, and a lot of them, which would store many\n      duplicate extensions.\n    - **UNCHECKED:** Compute instance migration may not work nicely with\n      networked IOs\n\n\n### Idea extensions\n\nThe extension store does not have to be S3 directly, but could be a Node-local\ncaching service on top of S3. This would reduce the load on the network for\npopular extensions.\n\n## Extension Storage implementation\n\nThe layout of the S3 bucket is as follows:\n```\n5615610098 // this is an extension build number\n├── v14\n│   ├── extensions\n│   │   ├── anon.tar.zst\n│   │   └── embedding.tar.zst\n│   └── ext_index.json\n└── v15\n    ├── extensions\n    │   ├── anon.tar.zst\n    │   └── embedding.tar.zst\n    └── ext_index.json\n5615261079\n├── v14\n│   ├── extensions\n│   │   └── anon.tar.zst\n│   └── ext_index.json\n└── v15\n    ├── extensions\n    │   └── anon.tar.zst\n    └── ext_index.json\n5623261088\n├── v14\n│   ├── extensions\n│   │   └── embedding.tar.zst\n│   └── ext_index.json\n└── v15\n    ├── extensions\n    │   └── embedding.tar.zst\n    └── ext_index.json\n```\n\nNote that build number cannot be part of prefix because we might need extensions\nfrom other build numbers.\n\n`ext_index.json` stores the control files and location of extension archives. \nIt also stores a list of public extensions and a library_index\n\nWe don't need to duplicate `extension.tar.zst`` files.\nWe only need to upload a new one if it is updated.\n(Although currently we just upload every time anyways, hopefully will change\nthis sometime)\n\n*access* is controlled by spec\n\nMore specifically, here is an example ext_index.json\n```\n{\n    \"public_extensions\": [\n        \"anon\",\n        \"pg_buffercache\"\n    ],\n    \"library_index\": {\n        \"anon\": \"anon\",\n        \"pg_buffercache\": \"pg_buffercache\"\n        // for more complex extensions like postgis\n        // we might have something like:\n        // address_standardizer: postgis\n        // postgis_tiger: postgis\n    },\n    \"extension_data\": {\n        \"pg_buffercache\": {\n            \"control_data\": {\n                \"pg_buffercache.control\": \"# pg_buffercache extension \\ncomment = 'examine the shared buffer cache' \\ndefault_version = '1.3' \\nmodule_pathname = '$libdir/pg_buffercache' \\nrelocatable = true \\ntrusted=true\"\n            },\n            \"archive_path\": \"5670669815/v14/extensions/pg_buffercache.tar.zst\"\n        },\n        \"anon\": {\n            \"control_data\": {\n                \"anon.control\": \"# PostgreSQL Anonymizer (anon) extension \\ncomment = 'Data anonymization tools' \\ndefault_version = '1.1.0' \\ndirectory='extension/anon' \\nrelocatable = false \\nrequires = 'pgcrypto' \\nsuperuser = false \\nmodule_pathname = '$libdir/anon' \\ntrusted = true \\n\"\n            },\n            \"archive_path\": \"5670669815/v14/extensions/anon.tar.zst\"\n        }\n    }\n}\n```\n\n### How to add new extension to the Extension Storage?\n\nSimply upload build artifacts to the S3 bucket.\nImplement a CI step for that. Splitting it from compute-node-image build.\n\n### How do we deal with extension versions and updates?\n\nCurrently, we rebuild extensions on every compute-node-image build and store them in the <build-version> prefix.\nThis is needed to ensure that `/share` and `/lib` files are in sync.\n\nFor extension updates, we rely on the PostgreSQL extension versioning mechanism (sql update scripts) and extension authors to not break backwards compatibility within one major version of PostgreSQL.\n\n### Alternatives\n\nFor extensions written on trusted languages we can also adopt\n`dbdev` PostgreSQL Package Manager based on `pg_tle` by Supabase.\nThis will increase the amount supported extensions and decrease the amount of work required to support them.\n"
  },
  {
    "path": "docs/rfcs/024-user-mgmt.md",
    "content": "# Postgres user and database management\n\n(This supersedes the previous proposal that looked too complicated and desynchronization-prone)\n\nWe've accumulated a bunch of problems with our approach to role and database management, namely:\n\n1. we don't allow role and database creation from Postgres, and users are complaining about that\n2. fine-grained role management is not possible both from Postgres and console\n\nRight now, we do store users and databases both in console and Postgres, and there are two main reasons for\nthat:\n\n* we want to be able to authenticate users in proxy against the console without Postgres' involvement. Otherwise,\nmalicious brute force attempts will wake up Postgres (expensive) and may exhaust the Postgres connections limit (deny of service).\n* it is handy when we can render console UI without waking up compute (e.g., show database list)\n\nThis RFC doesn't talk about giving root access to the database, which is blocked by a secure runtime setup.\n\n## Overview\n\n* Add Postgres extension that sends an HTTP request each time transaction that modifies users/databases is about to commit.\n* Add user management API to internal console API. Also, the console should put a JWT token into the compute so that it can access management API.\n\n## Postgres behavior\n\nThe default user role (@username) should have `CREATE ROLE`, `CREATE DB`, and `BYPASSRLS` privileges. We expose the Postgres port\nto the open internet, so we need to check password strength. Now console generates strong passwords, so there is no risk of having dumb passwords. With user-provided passwords, such risks exist.\n\nSince we store passwords in the console we should also send unencrypted password when role is created/changed. Hence communication with the console must be encrypted. Postgres also supports creating roles using hashes, in that case, we will not be able to get a raw password. So I can see the following options here:\n  * roles created via SQL will *not* have raw passwords in the console\n  * roles created via SQL will have raw passwords in the console, except ones that were created using hashes\n\nI'm leaning towards the second option here as it is a bit more consistent one -- if raw password storage is enabled then we store passwords in all cases where we can store them.\n\nTo send data about roles and databases from Postgres to the console we can create the following Postgres extension:\n\n  * Intercept role/database changes in `ProcessUtility_hook`. Here we have access to the query statement with the raw password. The hook handler itself should not dial the console immediately and rather stash info in some hashmap for later use.\n  * When the transaction is about to commit we execute collected role modifications (all as one -- console should either accept all or reject all, and hence API shouldn't be REST-like). If the console request fails we can roll back the transaction. This way if the transaction is committed we know for sure that console has this information. We can use `XACT_EVENT_PRE_COMMIT` and `XACT_EVENT_PARALLEL_PRE_COMMIT` for that.\n  * Extension should be mindful of the fact that it is possible to create and delete roles within the transaction.\n  * We also need to track who is database owner, some coding around may be needed to get the current user when the database is created.\n\n## Console user management API\n\nThe current public API has REST API for role management. We need to have some analog for the internal API (called mgmt API in the console code). But unlike public API here we want to have an atomic way to create several roles/databases (in cases when several roles were created in the same transaction). So something like that may work:\n\n```\ncurl -X PATCH /api/v1/roles_and_databases -d '\n[\n    {\"op\":\"create\", \"type\":\"role\", \"name\": \"kurt\", \"password\":\"lYgT3BlbkFJ2vBZrqv\"},\n    {\"op\":\"drop\", \"type\":\"role\", \"name\": \"trout\"},\n    {\"op\":\"alter\", \"type\":\"role\", \"name\": \"kilgore\", \"password\":\"3BlbkFJ2vB\"},\n    {\"op\":\"create\", \"type\":\"database\", \"name\": \"db2\", \"owner\": \"eliot\"},\n]\n'\n```\n\nMakes sense not to error out on duplicated create/delete operations (see failure modes)\n\n## Managing users from the console\n\nNow console puts a spec file with the list of databases/roles and delta operations in all the compute pods. `compute_ctl` then picks up that file and stubbornly executes deltas and checks data in the spec file is the same as in the Postgres. This way if the user creates a role in the UI we restart compute with a new spec file and during the start databases/roles are created. So if Postgres send an HTTP call each time role is created we need to break recursion in that case. We can do that based on application_name or some GUC or user (local == no HTTP hook).\n\nGenerally, we have several options when we are creating users via console:\n\n1. restart compute with a new spec file, execute local SQL command; cut recursion in the extension\n2. \"push\" spec files into running compute, execute local SQL command; cut recursion in the extension\n3. \"push\" spec files into running compute, execute local SQL command; let extension create those roles in the console\n4. avoid managing roles via spec files, send SQL commands to compute; let extension create those roles in the console\n\nThe last option is the most straightforward one, but with the raw password storage opt-out, we will not have the password to establish an SQL connection. Also, we need a spec for provisioning purposes and to address potential desync (but that is quite unlikely). So I think the easiest approach would be:\n\n1. keep role management like it is now and cut the recursion in the extension when SQL is executed by compute_ctl\n2. add \"push\" endpoint to the compute_ctl to avoid compute restart during the `apply_config` operation -- that can be done as a follow up to avoid increasing scope too much\n\n## Failure modes\n\n* during role creation via SQL role was created in the console but the connection was dropped before Postgres got acknowledgment or some error happened after acknowledgment (out of disk space, deadlock, etc):\n\n  in that case, Postgres won't have a role that exists in the console. Compute restart will heal it (due to the spec file). Also if the console allows repeated creation/deletion user can repeat the transaction.\n\n\n# Scalability\n\nOn my laptop, I can create 4200 roles per second. That corresponds to 363 million roles per day. Since each role creation ends up in the console database we can add some limit to the number of roles (could be reasonably big to not run into it often -- like 1k or 10k).\n"
  },
  {
    "path": "docs/rfcs/025-generation-numbers.md",
    "content": "# Pageserver: split-brain safety for remote storage through generation numbers\n\n## Summary\n\nA scheme of logical \"generation numbers\" for tenant attachment to pageservers is proposed, along with\nchanges to the remote storage format to include these generation numbers in S3 keys.\n\nUsing the control plane as the issuer of these generation numbers enables strong anti-split-brain\nproperties in the pageserver cluster without implementing a consensus mechanism directly\nin the pageservers.\n\n## Motivation\n\nCurrently, the pageserver's remote storage format does not provide a mechanism for addressing\nsplit brain conditions that may happen when replacing a node or when migrating\na tenant from one pageserver to another.\n\nFrom a remote storage perspective, a split brain condition occurs whenever two nodes both think\nthey have the same tenant attached, and both can write to S3. This can happen in the case of a\nnetwork partition, pathologically long delays (e.g. suspended VM), or software bugs.\n\nIn the current deployment model, control plane guarantees that a tenant is attached to one\npageserver at a time, thereby ruling out split-brain conditions resulting from dual\nattachment (however, there is always the risk of a control plane bug). This control\nplane guarantee prevents robust response to failures, as if a pageserver is unresponsive\nwe may not detach from it. The mechanism in this RFC fixes this, by making it safe to\nattach to a new, different pageserver even if an unresponsive pageserver may be running.\n\nFurther lack of safety during split-brain conditions blocks two important features where occasional\nsplit-brain conditions are part of the design assumptions:\n\n- seamless tenant migration ([RFC PR](https://github.com/neondatabase/neon/pull/5029))\n- automatic pageserver instance failure handling (aka \"failover\") (RFC TBD)\n\n### Prior art\n\n- 020-pageserver-s3-coordination.md\n- 023-the-state-of-pageserver-tenant-relocation.md\n- 026-pageserver-s3-mvcc.md\n\nThis RFC has broad similarities to the proposal to implement a MVCC scheme in\nS3 object names, but this RFC avoids a general purpose transaction scheme in\nfavour of more specialized \"generations\" that work like a transaction ID that\nalways has the same lifetime as a pageserver process or tenant attachment, whichever\nis shorter.\n\n## Requirements\n\n- Accommodate storage backends with no atomic or fencing capability (i.e. work within\n  S3's limitation that there are no atomics and clients can't be fenced)\n- Don't depend on any STONITH or node fencing in the compute layer (i.e. we will not\n  assume that we can reliably kill and EC2 instance and have it die)\n- Scoped per-tenant, not per-pageserver; for _seamless tenant migration_, we need\n  per-tenant granularity, and for _failover_, we likely want to spread the workload\n  of the failed pageserver instance to a number of peers, rather than monolithically\n  moving the entire workload to another machine.\n  We do not rule out the latter case, but should not constrain ourselves to it.\n\n## Design Tenets\n\nThese are not requirements, but are ideas that guide the following design:\n\n- Avoid implementing another consensus system: we already have a strongly consistent\n  database in the control plane that can do atomic operations where needed, and we also\n  have a Paxos implementation in the safekeeper.\n- Avoiding locking in to specific models of how failover will work (e.g. do not assume that\n  all the tenants on a pageserver will fail over as a unit).\n- Be strictly correct when it comes to data integrity. Occasional failures of availability\n  are tolerable, occasional data loss is not.\n\n## Non Goals\n\nThe changes in this RFC intentionally isolate the design decision of how to define\nlogical generations numbers and object storage format in a way that is somewhat flexible with\nrespect to how actual orchestration of failover works.\n\nThis RFC intentionally does not cover:\n\n- Failure detection\n- Orchestration of failover\n- Standby modes to keep data ready for fast migration\n- Intentional multi-writer operation on tenants (multi-writer scenarios are assumed to be transient split-brain situations).\n- Sharding.\n\nThe interaction between this RFC and those features is discussed in [Appendix B](#appendix-b-interoperability-with-other-features)\n\n## Impacted Components\n\npageserver, control plane, safekeeper (optional)\n\n## Implementation Part 1: Correctness\n\n### Summary\n\n- A per-tenant **generation number** is introduced to uniquely identifying tenant attachments to pageserver processes.\n\n  - This generation number increments each time the control plane modifies a tenant (`Project`)'s assigned pageserver, or when the assigned pageserver restarts.\n  - the control plane is the authority for generation numbers: only it may\n    increment a generation number.\n\n- **Object keys are suffixed** with the generation number\n- **Safety for multiply-attached tenants** is provided by the\n  generation number in the object key: the competing pageservers will not\n  try to write to the same keys.\n- **Safety in split brain for multiple nodes running with\n  the same node ID** is provided by the pageserver calling out to the control plane\n  on startup, to re-attach and thereby increment the generations of any attached tenants\n- **Safety for deletions** is achieved by deferring the DELETE from S3 to a point in time where the deleting node has validated with control plane that no attachment with a higher generation has a reference to the to-be-DELETEd key.\n- **The control plane is used to issue generation numbers** to avoid the need for\n  a built-in consensus system in the pageserver, although this could in principle\n  be changed without changing the storage format.\n\n### Generation numbers\n\nA generation number is associated with each tenant in the control plane,\nand each time the attachment status of the tenant changes, this is incremented.\nChanges in attachment status include:\n\n- Attaching the tenant to a different pageserver\n- A pageserver restarting, and \"re-attaching\" its tenants on startup\n\nThese increments of attachment generation provide invariants we need to avoid\nsplit-brain issues in storage:\n\n- If two pageservers have the same tenant attached, the attachments are guaranteed to have different generation numbers, because the generation would increment\n  while attaching the second one.\n- If there are multiple pageservers running with the same node ID, all the attachments on all pageservers are guaranteed to have different generation numbers, because the generation would increment\n  when the second node started and re-attached its tenants.\n\nAs long as the infrastructure does not transparently replace an underlying\nphysical machine, we are totally safe. See the later [unsafe case](#unsafe-case-on-badly-behaved-infrastructure) section for details.\n\n### Object Key Changes\n\n#### Generation suffix\n\nAll object keys (layer objects and index objects) will contain the attachment\ngeneration as a [suffix](#why-a-generation-suffix-rather-than-prefix).\nThis suffix is the primary mechanism for protecting against split-brain situations, and\nenabling safe multi-attachment of tenants:\n\n- Two pageservers running with the same node ID (e.g. after a failure, where there is\n  some rogue pageserver still running) will not try to write to the same objects, because at startup they will have re-attached tenants and thereby incremented\n  generation numbers.\n- Multiple attachments (to different pageservers) of the same tenant will not try to write to the same objects, as each attachment would have a distinct generation.\n\nThe generation is appended in hex format (8 byte string representing\nu32), to all our existing key names. A u32's range limit would permit\n27 restarts _per second_ over a 5 year system lifetime: orders of magnitude more than\nis realistic.\n\nThe exact meaning of the generation suffix can evolve over time if necessary, for\nexample if we chose to implement a failover mechanism internally to the pageservers\nrather than going via the control plane. The storage format just sees it as a number,\nwith the only semantic property being that the highest numbered index is the latest.\n\n#### Index changes\n\nSince object keys now include a generation suffix, the index of these keys must also be updated. IndexPart currently stores keys and LSNs sufficient to reconstruct key names: this would be extended to store the generation as well.\n\nThis will increase the size of the file, but only modestly: layers are already encoded as\ntheir string-ized form, so the overhead is about 10 bytes per layer. This will be less if/when\nthe index storage format is migrated to a binary format from JSON.\n\n#### Visibility\n\n_This section doesn't describe code changes, but extends on the consequences of the\nobject key changes given above_\n\n##### Visibility of objects to pageservers\n\nPageservers can of course list objects in S3 at any time, but in practice their\nvisible set is based on the contents of their LayerMap, which is initialized\nfrom the `index_part.json.???` that they load.\n\nStarting with the `index_part` from the most recent previous generation\n(see [loading index_part](#finding-the-remote-indices-for-timelines)), a pageserver\ninitially has visibility of all the objects that were referenced in the loaded index.\nThese objects are guaranteed to remain visible until the current generation is\nsuperseded, via pageservers in older generations avoiding deletions (see [deletion](#deletion)).\n\nThe \"most recent previous generation\" is _not_ necessarily the most recent\nin terms of walltime, it is the one that is readable at the time a new generation\nstarts. Consider the following sequence of a tenant being re-attached to different\npageserver nodes:\n\n- Create + attach on PS1 in generation 1\n- PS1 Do some work, write out index_part.json-0001\n- Attach to PS2 in generation 2\n- Read index_part.json-0001\n- PS2 starts doing some work...\n- Attach to PS3 in generation 3\n- Read index_part.json-0001\n- **...PS2 finishes its work: now it writes index_part.json-0002**\n- PS3 writes out index_part.json-0003\n\nIn the above sequence, the ancestry of indices is:\n\n```\n0001 -> 0002\n     |\n     -> 0003\n```\n\nThis is not an issue for safety: if the 0002 references some object that is\nnot in 0001, then 0003 simply does not see it, and will re-do whatever\nwork was required (e.g. ingesting WAL or doing compaction). Objects referenced\nby only the 0002 index will never be read by future attachment generations, and\nwill eventually be cleaned up by a scrub (see [scrubbing](#cleaning-up-orphan-objects-scrubbing)).\n\n##### Visibility of LSNs to clients\n\nBecause index_part.json is now written with a generation suffix, which data\nis visible depends on which generation the reader is operating in:\n\n- If one was passively reading from S3 from outside of a pageserver, the\n  visibility of data would depend on which index_part.json-<generation> file\n  one had chosen to read from.\n- If two pageservers have the same tenant attached, they may have different\n  data visible as they're independently replaying the WAL, and maintaining\n  independent LayerMaps that are written to independent index_part.json files.\n  Data does not have to be remotely committed to be visible.\n- For a pageserver writing with a stale generation, historic LSNs\n  remain readable until another pageserver (with a higher generation suffix)\n  decides to execute GC deletions. At this point, we may think of the stale\n  attachment's generation as having logically ended: during its existence\n  the generation had a consistent view of the world.\n- For a newly attached pageserver, its highest visible LSN may appears to\n  go backwards with respect to an earlier attachment, if that earlier\n  attachment had not uploaded all data to S3 before the new attachment.\n\n### Deletion\n\n#### Generation number validation\n\nWhile writes are de-conflicted by writers always using their own generation number in the key,\ndeletions are slightly more challenging: if a pageserver A is isolated, and the true active node is\npageserver B, then it is dangerous for A to do any object deletions, even of objects that it wrote\nitself, because pageserver's B metadata might reference those objects.\n\nWe solve this by inserting a \"generation validation\" step between the write of a remote index\nthat un-links a particular object from the index, and the actual deletion of the object, such\nthat deletions strictly obey the following ordering:\n\n1. Write out index_part.json: this guarantees that any subsequent reader of the metadata will\n   not try and read the object we unlinked.\n2. Call out to control plane to validate that the generation which we use for our attachment is still the latest.\n3. If step 2 passes, it is safe to delete the object. Why? The check-in with control plane\n   together with our visibility rules guarantees that any later generation\n   will use either the exact `index_part.json` that we uploaded in step 1, or a successor\n   of it; not an earlier one. In both cases, the `index_part.json` doesn't reference the\n   key we are deleting anymore, so, the key is invisible to any later attachment generation.\n   Hence it's safe to delete it.\n\nNote that at step 2 we are only confirming that deletions of objects _no longer referenced\nby the specific `index_part.json` written in step 1_ are safe. If we were attempting other deletions concurrently,\nthese would need their own generation validation step.\n\nIf step 2 fails, we may leak the object. This is safe, but has a cost: see [scrubbing](#cleaning-up-orphan-objects-scrubbing). We may avoid this entirely outside of node\nfailures, if we do proper flushing of deletions on clean shutdown and clean migration.\n\nTo avoid doing a huge number of control plane requests to perform generation validation,\nvalidation of many tenants will be done in a single request, and deletions will be queued up\nprior to validation: see [Persistent deletion queue](#persistent-deletion-queue) for more.\n\n#### `remote_consistent_lsn` updates\n\nRemote objects are not the only kind of deletion the pageserver does: it also indirectly deletes\nWAL data, by feeding back remote_consistent_lsn to safekeepers, as a signal to the safekeepers that\nthey may drop data below this LSN.\n\nFor the same reasons that deletion of objects must be guarded by an attachment generation number\nvalidation step, updates to `remote_consistent_lsn` are subject to the same rules, using\nan ordering as follows:\n\n1. upload the index_part that covers data up to LSN `L0` to S3\n2. Call out to control plane to validate that the generation which we use for our attachment is still the latest.\n3. advance the `remote_consistent_lsn` that we advertise to the safekeepers to `L0`\n\nIf step 2 fails, then the `remote_consistent_lsn` advertised\nto safekeepers will not advance again until a pageserver\nwith the latest generation is ready to do so.\n\n**Note:** at step 3 we are not advertising the _latest_ remote_consistent_lsn, we are\nadvertising the value in the index_part that we uploaded in step 1. This provides\na strong ordering guarantee.\n\nInternally to the pageserver, each timeline will have two remote_consistent_lsn values: the one that\nreflects its latest write to remote storage, and the one that reflects the most\nrecent validation of generation number. It is only the latter value that may\nbe advertised to the outside world (i.e. to the safekeeper).\n\nThe control plane remains unaware of `remote_consistent_lsn`: it only has to validate\nthe freshness of generation numbers, thereby granting the pageserver permission to\nshare the information with the safekeeper.\n\nFor convenience, in subsequent sections and RFCs we will use \"deletion\" to mean both deletion\nof objects in S3, and updates to the `remote_consistent_lsn`, as updates to the remote consistent\nLSN are de-facto deletions done via the safekeeper, and both kinds of deletion are subject to\nthe same generation validation requirement.\n\n### Pageserver attach/startup changes\n\n#### Attachment\n\nCalls to `/v1/tenant/{tenant_id}/attach` are augmented with an additional\n`generation` field in the body.\n\nThe pageserver does not persist this: a generation is only good for the lifetime\nof a process.\n\n#### Finding the remote indices for timelines\n\nBecause index files are now suffixed with generation numbers, the pageserver\ncannot always GET the remote index in one request, because it can't always\nknow a-priori what the latest remote index is.\n\nTypically, the most recent generation to write an index would be our own\ngeneration minus 1. However, this might not be the case: the previous\nnode might have started and acquired a generation number, and then crashed\nbefore writing out a remote index.\n\nIn the general case and as a fallback, the pageserver may list all the `index_part.json`\nfiles for a timeline, sort them by generation, and pick the highest that is `<=`\nits current generation for this attachment. The tenant should never load an index\nwith an attachment generation _newer_ than its own.\nThese two rules combined ensure that objects written by later generations are never visible to earlier generations.\n\nNote that if a given attachment picks an index part from an earlier generation (say n-2), but crashes & restarts before it writes its own generation's index part, next time it tries to pick an index part there may be an index part from generation n-1.\nIt would pick the n-1 index part in that case, because it's sorted higher than the previous one from generation n-2.\nSo, above rules guarantee no determinism in selecting the index part.\nare allowed to be attached with stale attachment generations during a multiply-attached\nphase in a migration, and in this instance if the old location's pageserver restarts,\nit should not try and load the newer generation's index.\n\nTo summarize, on starting a timeline, the pageserver will:\n\n1. Issue a GET for index_part.json-<my generation - 1>\n2. If 1 failed, issue a ListObjectsv2 request for index_part.json\\* and\n   pick the newest.\n\nOne could optimize this further by using the control plane to record specifically\nwhich generation most recently wrote an index_part.json, if necessary, to increase\nthe probability of finding the index_part.json in one GET. One could also improve\nthe chances by having pageservers proactively write out index_part.json after they\nget a new generation ID.\n\n#### Re-attachment on startup\n\nOn startup, the pageserver will call out to an new control plane `/re-attach`\nAPI (see [Generation API](#generation-api)). This returns a list of\ntenants that should be attached to the pageserver, and their generation numbers, which\nthe control plane will increment before returning.\n\nThe pageserver should still scan its local disk on startup, but should _delete_\nany local content for tenants not indicated in the `/re-attach` response: their\nabsence is an implicit detach operation.\n\n**Note** if a tenant is omitted from the re-attach response, its local disk content\nwill be deleted. This will change in subsequent work, when the control plane gains\nthe concept of a secondary/standby location: a node with local content may revert\nto this status and retain some local content.\n\n#### Cleaning up previous generations' remote indices\n\nDeletion of old indices is not necessary for correctness, although it is necessary\nto avoid the ListObjects fallback in the previous section becoming ever more expensive.\n\nOnce the new attachment has written out its index_part.json, it may asynchronously clean up historic index_part.json\nobjects that were found.\n\nWe may choose to implement this deletion either as an explicit step after we\nwrite out index_part for the first time in a pageserver's lifetime, or for\nsimplicity just do it periodically as part of the background scrub (see [scrubbing](#cleaning-up-orphan-objects-scrubbing));\n\n### Control Plane Changes\n\n#### Store generations for attaching tenants\n\n- The `Project` table must store the generation number for use when\n  attaching the tenant to a new pageserver.\n- The `/v1/tenant/:tenant_id/attach` pageserver API will require the generation number,\n  which the control plane can supply by simply incrementing the `Project`'s\n  generation number each time the tenant is attached to a different server: the same database\n  transaction that changes the assigned pageserver should also change the generation number.\n\n#### Generation API\n\nThis section describes an API that could be provided directly by the control plane,\nor built as a separate microservice. In earlier parts of the RFC, when we\ndiscuss the control plane providing generation numbers, we are referring to this API.\n\nThe API endpoints used by the pageserver to acquire and validate generation\nnumbers are quite simple, and only require access to some persistent and\nlinerizable storage (such as a database).\n\nBuilding this into the control plane is proposed as a least-effort option to exploit existing infrastructure and implement generation number issuance in the same transaction that mandates it (i.e., the transaction that updates the `Project` assignment to another pageserver).\nHowever, this is not mandatory: this \"Generation Number Issuer\" could\nbe built as a microservice. In practice, we will write such a miniature service\nanyway, to enable E2E pageserver/compute testing without control plane.\n\nThe endpoints required by pageservers are:\n\n##### `/re-attach`\n\n- Request: `{node_id: <u32>}`\n- Response:\n  - 200 `{tenants: [{id: <TenantId>, gen: <u32>}]}`\n  - 404: unknown node_id\n  - (Future: 429: flapping detected, perhaps nodes are fighting for the same node ID,\n    or perhaps this node was in a retry loop)\n  - (On unknown tenants, omit tenant from `tenants` array)\n- Server behavior: query database for which tenants should be attached to this pageserver.\n  - for each tenant that should be attached, increment the attachment generation and\n    include the new generation in the response\n- Client behavior:\n  - for all tenants in the response, activate with the new generation number\n  - for any local disk content _not_ referenced in the response, act as if we\n    had been asked to detach it (i.e. delete local files)\n\n**Note** the `node_id` in this request will change in future if we move to ephemeral\nnode IDs, to be replaced with some correlation ID that helps the control plane realize\nif a process is running with the same storage as a previous pageserver process (e.g.\nwe might use EC instance ID, or we might just write some UUID to the disk the first\ntime we use it)\n\n##### `/validate`\n\n- Request: `{'tenants': [{tenant: <tenant id>, attach_gen: <gen>}, ...]}'`\n- Response:\n  - 200 `{'tenants': [{tenant: <tenant id>, status: <bool>}...]}`\n  - (On unknown tenants, omit tenant from `tenants` array)\n- Purpose: enable the pageserver to discover for the given attachments whether they are still the latest.\n- Server behavior: this is a read-only operation: simply compare the generations in the request with\n  the generations known to the server, and set status to `true` if they match.\n- Client behavior: clients must not do deletions within a tenant's remote data until they have\n  received a response indicating the generation they hold for the attachment is current.\n\n#### Use of `/load` and `/ignore` APIs\n\nBecause the pageserver will be changed to only attach tenants on startup\nbased on the control plane's response to a `/re-attach` request, the load/ignore\nAPIs no longer make sense in their current form.\n\nThe `/load` API becomes functionally equivalent to attach, and will be removed:\nany location that used `/load` before should just attach instead.\n\nThe `/ignore` API is equivalent to detaching, but without deleting local files.\n\n### Timeline/Branch creation & deletion\n\nAll of the previous arguments for safety have described operations within\na timeline, where we may describe a sequence that includes updates to\nindex_part.json, and where reads and writes are coming from a postgres\nendpoint (writes via the safekeeper).\n\nCreating or destroying timeline is a bit different, because writes\nare coming from the control plane.\n\nWe must be safe against scenarios such as:\n\n- A tenant is attached to pageserver B while pageserver A is\n  in the middle of servicing an RPC from the control plane to\n  create or delete a tenant.\n- A pageserver A has been sent a timeline creation request\n  but becomes unresponsive. The tenant is attached to a\n  different pageserver B, and the timeline creation request\n  is sent there too.\n\n#### Timeline Creation\n\nIf some very slow node tries to do a timeline creation _after_\na more recent generation node has already created the timeline\nand written some data into it, that must not cause harm. This\nis provided in timeline creations by the way all the objects\nwithin the timeline's remote path include a generation suffix:\na slow node in an old generation that attempts to \"create\" a timeline\nthat already exists will just emit an index_part.json with\nan old generation suffix.\n\nTimeline IDs are never reused, so we don't have\nto worry about the case of create/delete/create cycles. If they\nwere re-used during a disaster recovery \"un-delete\" of a timeline,\nthat special case can be handled by calling out to all available pageservers\nto check that they return 404 for the timeline, and to flush their\ndeletion queues in case they had any deletions pending from the\ntimeline.\n\nThe above makes it safe for control plane to change the assignment of\ntenant to pageserver in control plane while a timeline creation is ongoing.\nThe reason is that the creation request against the new assigned pageserver\nuses a new generation number. However, care must be taken by control plane\nto ensure that a \"timeline creation successful\" response from some pageserver\nis checked for the pageserver's generation for that timeline's tenant still being the latest.\nIf it is not the latest, the response does not constitute a successful timeline creation.\nIt is acceptable to discard such responses, the scrubber will clean up the S3 state.\nIt is better to issue a timeline deletion request to the stale attachment.\n\n#### Timeline Deletion\n\nTenant/timeline deletion operations are exempt from generation validation\non deletes, and therefore don't have to go through the same deletion\nqueue as GC/compaction layer deletions. This is because once a\ndelete is issued by the control plane, it is a promise that the\ncontrol plane will keep trying until the deletion is done, so even stale\npageservers are permitted to go ahead and delete the objects.\n\nThe implications of this for control plane are:\n\n- During timeline/tenant deletion, the control plane must wait for the deletion to\n  be truly complete (status 404) and also handle the case where the pageserver\n  becomes unavailable, either by waiting for a replacement with the same node_id,\n  or by *re-attaching the tenant elsewhere.\n\n- The control plane must persist its intent to delete\n  a timeline/tenant before issuing any RPCs, and then once it starts, it must\n  keep retrying until the tenant/timeline is gone. This is already handled\n  by using a persistent `Operation` record that is retried indefinitely.\n\nTimeline deletion may result in a special kind of object leak, where\nthe latest generation attachment completes a deletion (including erasing\nall objects in the timeline path), but some slow/partitioned node is\nwriting into the timeline path with a stale generation number. This would\nnot be caught by any per-timeline scrubbing (see [scrubbing](#cleaning-up-orphan-objects-scrubbing)), since scrubbing happens on the\nattached pageserver, and once the timeline is deleted it isn't attached anywhere.\nThis scenario should be pretty rare, and the control plane can make it even\nrarer by ensuring that if a tenant is in a multi-attached state (e.g. during\nmigration), we wait for that to complete before processing the deletion. Beyond\nthat, we may implement some other top-level scrub of timelines in\nan external tool, to identify any tenant/timeline paths that are not found\nin the control plane database.\n\n#### Examples\n\n- Deletion, node restarts partway through:\n  - By the time we returned 202, we have written a remote delete marker\n  - Any subsequent incarnation of the same node_id will see the remote\n    delete marker and continue to process the deletion\n  - If the original pageserver is lost permanently and no replacement\n    with the same node_id is available, then the control plane must recover\n    by re-attaching the tenant to a different node.\n- Creation, node becomes unresponsive partway through.\n  - Control plane will see HTTP request timeout, keep re-issuing\n    request to whoever is the latest attachment point for the tenant\n    until it succeeds.\n  - Stale nodes may be trying to execute timeline creation: they will\n    write out index_part.json files with\n    stale attachment generation: these will be eventually cleaned up\n    by the same mechanism as other old indices.\n\n### Unsafe case on badly behaved infrastructure\n\nThis section is only relevant if running on a different environment\nthan EC2 machines with ephemeral disks.\n\nIf we ever run pageservers on infrastructure that might transparently restart\na pageserver while leaving an old process running (e.g. a VM gets rescheduled\nwithout the old one being fenced), then there is a risk of corruption, when\nthe control plane attaches the tenant, as follows:\n\n- If the control plane sends an `/attach` request to node A, then node A dies\n  and is replaced, and the control plane's retries the request without\n  incrementing that attachment ID, then it could end up with two physical nodes\n  both using the same generation number.\n- This is not an issue when using EC2 instances with ephemeral storage, as long\n  as the control plane never re-uses a node ID, but it would need re-examining\n  if running on different infrastructure.\n- To robustly protect against this class of issue, we would either:\n  - add a \"node generation\" to distinguish between different processes holding the\n    same node_id.\n  - or, dispense with static node_id entirely and issue an ephemeral ID to each\n    pageserver process when it starts.\n\n## Implementation Part 2: Optimizations\n\n### Persistent deletion queue\n\nBetween writing our a new index_part.json that doesn't reference an object,\nand executing the deletion, an object passes through a window where it is\nonly referenced in memory, and could be leaked if the pageserver is stopped\nuncleanly. That introduces conflicting incentives: on the one hand, we would\nlike to delay and batch deletions to\n1. minimize the cost of the mandatory validations calls to control plane, and\n2. minimize cost for DeleteObjects requests.\nOn the other hand we would also like to minimize leakage by executing\ndeletions promptly.\n\nTo resolve this, we may make the deletion queue persistent\nand then executing these in the background at a later time.\n\n_Note: The deletion queue's reason for existence is optimization rather than correctness,\nso there is a lot of flexibility in exactly how the it should work,\nas long as it obeys the rule to validate generations before executing deletions,\nso the following details are not essential to the overall RFC._\n\n#### Scope\n\nThe deletion queue will be global per pageserver, not per-tenant. There\nare several reasons for this choice:\n\n- Use the queue as a central point to coalesce validation requests to the\n  control plane: this avoids individual `Timeline` objects ever touching\n  the control plane API, and avoids them having to know the rules about\n  validating deletions. This separation of concerns will avoid burdening\n  the already many-LoC `Timeline` type with even more responsibility.\n- Decouple the deletion queue from Tenant attachment lifetime: we may\n  \"hibernate\" an inactive tenant by tearing down its `Tenant`/`Timeline`\n  objects in the pageserver, without having to wait for deletions to be done.\n- Amortize the cost of I/O for the persistent queue, instead of having many\n  tiny queues.\n- Coalesce deletions into a smaller number of larger DeleteObjects calls\n\nBecause of the cost of doing I/O for persistence, and the desire to coalesce\ngeneration validation requests across tenants, and coalesce deletions into\nlarger DeleteObjects requests, there will be one deletion queue per pageserver\nrather than one per tenant. This has the added benefit that when deactivating\na tenant, we do not have to drain their deletion queue: deletions can proceed\nfor a tenant whose main `Tenant` object has been torn down.\n\n#### Flow of deletion\n\nThe flow of a deletion is becomes:\n\n1. Need for deletion of an object (=> layer file) is identified.\n2. Unlink the object from all the places that reference it (=> `index_part.json`).\n3. Enqueue the deletion to a persistent queue.\n   Each entry is `tenant_id, attachment_generation, S3 key`.\n4. Validate & execute in batches:\n  4.1 For a batch of entries, call into control plane.\n  4.2 For the subset of entries that passed validation, execute a `DeleteObjects` S3 DELETE request for their S3 keys.\n\nAs outlined in the Part 1 on correctness, it is critical that deletions are only\nexecuted once the key is not referenced anywhere in S3.\nThis property is obviously upheld by the scheme above.\n\n#### We Accept Object Leakage In Acceptable Circumstances\n\nIf we crash in the flow above between (2) and (3), we lose track of unreferenced object.\nFurther, enqueuing a single to the persistent queue may not be durable immediately to amortize cost of flush to disk.\nThis is acceptable for now, it can be caught by [the scrubber](#cleaning-up-orphan-objects-scrubbing).\n\nThere are various measures we can take to improve this in the future.\n1. Cap amount of time until enqueued entry becomes durable (timeout for flush-to-tisk)\n2. Proactively flush:\n    - On graceful shutdown, as we anticipate that some or\n      all of our attachments may be re-assigned while we are offline.\n    - On tenant detach.\n3. For each entry, keep track of whether it has passed (2).\n   Only admit entries to (4) one they have passed (2).\n   This requires re-writing / two queue entries (intent, commit) per deletion.\n\nThe important take-away with any of the above is that it's not\ndisastrous to leak objects in exceptional circumstances.\n\n#### Operations that may skip the queue\n\nDeletions of an entire timeline are [exempt](#Timeline-Deletion) from generation number validation. Once the\ncontrol plane sends the deletion request, there is no requirement to retain the readability\nof any data within the timeline, and all objects within the timeline path may be deleted\nat any time from the control plane's deletion request onwards.\n\nSince deletions of smaller timelines won't have enough objects to compose a full sized\nDeleteObjects request, it is still useful to send these through the last part of the\ndeletion pipeline to coalesce with other executing deletions: to enable this, the\ndeletion queue should expose two input channels: one for deletions that must be\nprocessed in a generation-aware way, and a fast path for timeline deletions, where\nthat fast path may skip validation and the persistent queue.\n\n### Cleaning up orphan objects (scrubbing)\n\nAn orphan object is any object which is no longer referenced by a running node or by metadata.\n\nExamples of how orphan objects arise:\n\n- A node PUTs a layer object, then crashes before it writes the\n  index_part.json that references that layer.\n- A stale node carries on running for some time, and writes out an unbounded number of\n  objects while it believes itself to be the rightful writer for a tenant.\n- A pageserver crashes between un-linking an object from the index, and persisting\n  the object to its deletion queue.\n\nOrphan objects are functionally harmless, but have a small cost due to S3 capacity consumed. We\nmay clean them up at some time in the future, but doing a ListObjectsv2 operation and cross\nreferencing with the latest metadata to identify objects which are not referenced.\n\nScrubbing will be done only by an attached pageserver (not some third party process), and deletions requested during scrub will go through the same\nvalidation as all other deletions: the attachment generation must be\nfresh. This avoids the possibility of a stale pageserver incorrectly\nthinking than an object written by a newer generation is stale, and deleting\nit.\n\nIt is not strictly necessary that scrubbing be done by an attached\npageserver: it could also be done externally. However, an external\nscrubber would still require the same validation procedure that\na pageserver's deletion queue performs, before actually erasing\nobjects.\n\n## Operational impact\n\n### Availability\n\nCoordination of generation numbers via the control plane introduce a dependency for certain\noperations:\n\n1. Starting new pageservers (or activating pageservers after a restart)\n2. Executing enqueued deletions\n3. Advertising updated `remote_consistent_lsn` to enable WAL trimming\n\nItem 1. would mean that some in-place restarts that previously would have resumed service even if the control plane were\nunavailable, will now not resume service to users until the control plane is available. We could\navoid this by having a timeout on communication with the control plane, and after some timeout,\nresume service with the previous generation numbers (assuming this was persisted to disk). However,\nthis is unlikely to be needed as the control plane is already an essential & highly available component. Also, having a node re-use an old generation number would complicate\nreasoning about the system, as it would break the invariant that a generation number uniquely identifies\na tenant's attachment to a given pageserver _process_: it would merely identify the tenant's attachment\nto the pageserver _machine_ or its _on-disk-state_.\n\nItem 2. is a non-issue operationally: it's harmless to delay deletions, the only impact of objects pending deletion is\nthe S3 capacity cost.\n\nItem 3. could be an issue if safekeepers are low on disk space and the control plane is unavailable for a long time. If this became an issue,\nwe could adjust the safekeeper to delete segments from local disk sooner, as soon as they're uploaded to S3, rather than waiting for\nremote_consistent_lsn to advance.\n\nFor a managed service, the general approach should be to make sure we are monitoring & respond fast enough\nthat control plane outages are bounded in time.\n\nThere is also the fact that control plane runs in a single region.\nThe latency for distant regions is not a big concern for us because all request types added by this RFC are either infrequent or not in the way of the data path.\nHowever, we lose region isolation for the operations listed above.\nThe ongoing work to split console and control will give us per-region control plane, and all operations in this RFC can be handled by these per-region control planes.\nWith that in mind, we accept the trade-offs outlined in this paragraph.\n\nWe will also implement an \"escape hatch\" config generation numbers, where in a major disaster outage,\nwe may manually run pageservers with a hand-selected generation number, so that we can bring them online\nindependently of a control plane.\n\n### Rollout\n\nAlthough there is coupling between components, we may deploy most of the new data plane components\nindependently of the control plane: initially they can just use a static generation number.\n\n#### Phase 1\n\nThe pageserver is deployed with some special config to:\n\n- Always act like everything is generation 1 and do not wait for a control plane issued generation on attach\n- Skip the places in deletion and remote_consistent_lsn updates where we would call into control plane\n\n#### Phase 2\n\nThe control plane changes are deployed: control plane will now track and increment generation numbers.\n\n#### Phase 3\n\nThe pageserver is deployed with its control-plane-dependent changes enabled: it will now require\nthe control plane to service re-attach requests on startup, and handle generation\nvalidation requests.\n\n### On-disk backward compatibility\n\nBackward compatibility with existing data is straightforward:\n\n- When reading the index, we may assume that any layer whose metadata doesn't include\n  generations will have a path without generation suffix.\n- When locating the index file on attachment, we may use the \"fallback\" listing path\n  and if there is only an index without generation suffix, that is the one we load.\n\nIt is not necessary to re-write existing layers: even new index files will be able\nto represent generation-less layers.\n\n### On-disk forward compatibility\n\nWe will do a two phase rollout, probably over multiple releases because we will naturally\nhave some of the read-side code ready before the overall functionality is ready:\n\n1. Deploy pageservers which understand the new index format and generation suffixes\n   in keys, but do not write objects with generation numbers in the keys.\n2. Deploy pageservers that write objects with generation numbers in the keys.\n\nOld pageservers will be oblivious to generation numbers. That means that they can't\nread objects with generation numbers in the name. This is why we must\nfirst step must deploy the ability to read, before the second step\nstarts writing them.\n\n# Frequently Asked Questions\n\n## Why a generation _suffix_ rather than _prefix_?\n\nThe choice is motivated by object listing, since one can list by prefix but not\nsuffix.\n\nIn [finding remote indices](#finding-the-remote-indices-for-timelines), we rely\non being able to do a prefix listing for `<tenant>/<timeline>/index_part.json*`.\nThat relies on the prefix listing.\n\nThe converse case of using a generation prefix and listing by generation is\nnot needed: one could imagine listing by generation while scrubbing (so that\na particular generation's layers could be scrubbed), but this is not part\nof normal operations, and the [scrubber](#cleaning-up-orphan-objects-scrubbing) probably won't work that way anyway.\n\n## Wouldn't it be simpler to have a separate deletion queue per timeline?\n\nFunctionally speaking, we could. That's how RemoteTimelineClient currently works,\nbut this approach does not map well to a long-lived persistent queue with\ngeneration validation.\n\nAnything we do per-timeline generates tiny random I/O, on a pageserver with\ntens of thousands of timelines operating: to be ready for high scale, we should:\n\n- A) Amortize costs where we can (e.g. a shared deletion queue)\n- B) Expect to put tenants into a quiescent state while they're not\n  busy: i.e. we shouldn't keep a tenant alive to service its deletion queue.\n\nThis was discussed in the [scope](#scope) part of the deletion queue section.\n\n# Appendix A: Examples of use in high availability/failover\n\nThe generation numbers proposed in this RFC are adaptable to a variety of different\nfailover scenarios and models. The sections below sketch how they would work in practice.\n\n### In-place restart of a pageserver\n\n\"In-place\" here means that the restart is done before any other element in the system\nhas taken action in response to the node being down.\n\n- After restart, the node issues a re-attach request to the control plane, and\n  receives new generation numbers for all its attached tenants.\n- Tenants may be activated with the generation number in the re-attach response.\n- If any of its attachments were in fact stale (i.e. had be reassigned to another\n  node while this node was offline), then\n  - the re-attach response will inform the tenant about this by not including\n    the tenant of this by _not_ incrementing the generation for that attachment.\n  - This will implicitly block deletions in the tenant, but as an optimization\n    the pageserver should also proactively stop doing S3 uploads when it notices this stale-generation state.\n  - The control plane is expected to eventually detach this tenant from the\n    pageserver.\n\nIf the control plane does not include a tenant in the re-attach response,\nbut there is still local state for the tenant in the filesystem, the pageserver\ndeletes the local state in response and does not load/active the tenant.\nSee the [earlier section on pageserver startup](#pageserver-attachstartup-changes) for details.\nControl plane can use this mechanism to clean up a pageserver that has been\ndown for so long that all its tenants were migrated away before it came back\nup again and asked for re-attach.\n\n### Failure of a pageserver\n\nIn this context, read \"failure\" as the most ambiguous possible case, where\na pageserver is unavailable to clients and control plane, but may still be executing and talking\nto S3.\n\n#### Case A: re-attachment to other nodes\n\n1. Let's say node 0 becomes unresponsive in a cluster of three nodes 0, 1, 2.\n2. Some external mechanism notices that the node is unavailable and initiates\n   movement of all tenants attached to that node to a different node according\n   to some distribution rule.\n   In this example, it would mean incrementing the generation\n   of all tenants that were attached to node 0, as each tenant's assigned pageserver changes.\n3. A tenant which is now attached to node 1 will _also_ still be attached to node\n   0, from the perspective of node 0. Node 0 will still be using its old generation,\n   node 1 will be using a newer generation.\n4. S3 writes will continue from nodes 0 and 1: there will be an index_part.json-00000001\n   \\_and\\* an index_part.json-00000002. Objects written under the old suffix\n   after the new attachment was created do not matter from the rest of the system's\n   perspective: the endpoints are reading from the new attachment location. Objects\n   written by node 0 are just garbage that can be cleaned up at leisure. Node 0 will\n   not do any deletions because it can't synchronize with control plane, or if it could,\n   its deletion queue processing would get errors for the validation requests.\n\n#### Case B: direct node replacement with same node_id and drive\n\nThis is the scenario we would experience if running pageservers in some dynamic\nVM/container environment that would auto-replace a given node_id when it became\nunresponsive, with the node's storage supplied by some network block device\nthat is attached to the replacement VM/container.\n\n1. Let's say node 0 fails, and there may be some other peers but they aren't relevant.\n2. Some external mechanism notices that the node is unavailable, and creates\n   a \"new node 0\" (Node 0b) which is a physically separate server. The original node 0\n   (Node 0a) may still be running, because we do not assume the environment fences nodes.\n3. On startup, node 0b re-attaches and gets higher generation numbers for\n   all tenants.\n4. S3 writes continue from nodes 0a and 0b, but the writes do not collide due to different\n   generation in the suffix, and the writes from node 0a are not visible to the rest\n   of the system because endpoints are reading only from node 0b.\n\n# Appendix B: interoperability with other features\n\n## Sharded Keyspace\n\nThe design in this RFC maps neatly to a sharded keyspace design where subsets of the key space\nfor a tenant are assigned to different pageservers:\n\n- the \"unit of work\" for attachments becomes something like a TenantShard rather than a Tenant\n- TenantShards get generation numbers just as Tenants do.\n- Write workload (ingest, compaction) for a tenant is spread out across pageservers via\n  TenantShards, but each TenantShard still has exactly one valid writer at a time.\n\n## Read replicas\n\n_This section is about a passive reader of S3 pageserver state, not a postgres\nread replica_\n\nFor historical reads to LSNs below the remote persistent LSN, any node may act as a reader at any\ntime: remote data is logically immutable data, and the use of deferred deletion in this RFC helps\nmitigate the fact that remote data is not _physically_ immutable (i.e. the actual data for a given\npage moves around as compaction happens).\n\nA read replica needs to be aware of generations in remote data in order to read the latest\nmetadata (find the index_part.json with the latest suffix). It may either query this\nfrom the control plane, or find it with ListObjectsv2 request\n\n## Seamless migration\n\nTo make tenant migration totally seamless, we will probably want to intentionally double-attach\na tenant briefly, serving reads from the old node while waiting for the new node to be ready.\n\nThis RFC enables that double-attachment: two nodes may be attached at the same time, with the migration destination\nhaving a higher generation number. The old node will be able to ingest and serve reads, but not\ndo any deletes. The new node's attachment must also avoid deleting layers that the old node may\nstill use. A new piece of state\nwill be needed for this in the control plane's definition of an attachment.\n\n## Warm secondary locations\n\nTo enable faster tenant movement after a pageserver is lost, we will probably want to spend some\ndisk capacity on keeping standby locations populated with local disk data.\n\nThere's no conflict between this RFC and that: implementing warm secondary locations on a per-tenant basis\nwould be a separate change to the control plane to store standby location(s) for a tenant. Because\nthe standbys do not write to S3, they do not need to be assigned generation numbers. When a tenant is\nre-attached to a standby location, that would increment the tenant attachment generation and this\nwould work the same as any other attachment change, but with a warm cache.\n\n## Ephemeral node IDs\n\nThis RFC intentionally avoids changing anything fundamental about how pageservers are identified\nand registered with the control plane, to avoid coupling the implementation of pageserver split\nbrain protection with more fundamental changes in the management of the pageservers.\n\nMoving to ephemeral node IDs would provide an extra layer of\nresilience in the system, as it would prevent the control plane\naccidentally attaching to two physical nodes with the same\ngeneration, if somehow there were two physical nodes with\nthe same node IDs (currently we rely on EC2 guarantees to\neliminate this scenario). With ephemeral node IDs, there would be\nno possibility of that happening, no matter the behavior of\nunderlying infrastructure.\n\nNothing fundamental in the pageserver's handling of generations needs to change to handle ephemeral node IDs, since we hardly use the\n`node_id` anywhere. The `/re-attach` API would be extended\nto enable the pageserver to obtain its ephemeral ID, and provide\nsome correlation identifier (e.g. EC instance ID), to help the\ncontrol plane re-attach tenants to the same physical server that\npreviously had them attached.\n"
  },
  {
    "path": "docs/rfcs/026-pageserver-s3-mvcc.md",
    "content": "This is a copy from the [original Notion page](https://www.notion.so/neondatabase/Proposal-Pageserver-MVCC-S3-Storage-8a424c0c7ec5459e89d3e3f00e87657c?pvs=4), taken on 2023-08-16.\n\nThis is for archival mostly.\nThe RFC that we're likely to go with is https://github.com/neondatabase/neon/pull/4919.\n\n---\n\n# Proposal: Pageserver MVCC S3 Storage\n\ntl;dr: this proposal enables Control Plane to attach a tenant to a new pageserver without being 100% certain that it has been detached from the old pageserver. This enables us to automate failover if a pageserver dies (no human in the loop).\n\n# Problem Statement\n\nThe current Neon architecture requires the Control Plane to guarantee that a tenant is only attached to one pageserver at a time. If a tenant is attached to multiple pageservers simultaneously, the pageservers will overwrite each other’s changes in S3 for that tenant, resulting in data loss for that tenant.\n\nThe above imposes limitations on tenant relocation and future designs for high availability. For instance, Control Plane cannot relocate a tenant to another pageserver before it is 100% certain that the tenant is detached from the source pageserver. If the source pageserver is unresponsive, the tenant detach procedure cannot proceed, and Control Plane has no choice but to wait for either the source to become responsive again, or rely on a node failure detection mechanism to detect that the source pageserver is dead, and give permission to skip the detachment step. Either way, the tenant is unavailable for an extended period, and we have no means to improve it in the current architecture.\n\nNote that there is no 100% correct node failure detection mechanism, and even techniques to accelerate failure detection, such as ********************************shoot-the-other-node-in-the-head,******************************** have their limits. So, we currently rely on humans as node failure detectors: they get alerted via PagerDuty, assess the situation under high stress, and make the decision. If they make the wrong call, or the apparent dead pageserver somehow resurrects later, we’ll have data loss.\n\nAlso, by relying on humans, we’re [incurring needless unscalable toil](https://sre.google/sre-book/eliminating-toil/): as Neon grows, pageserver failures will become more and more frequent because our fleet grows. Each instance will need quick response time to minimize downtime for the affected tenants, which implies higher toil, higher resulting attrition, and/or higher personnel cost.\n\nLastly, there are foreseeable needs by operation and product such as zero-downtime relocation and automatic failover/HA. For such features, the ability to have a tenant purposefully or accidentally attached to more than one pageserver will greatly reduce risk of data loss, and improve availability.\n\n# High-Level Idea\n\nThe core idea is to evolve the per-Tenant S3 state to an MVCC-like scheme, allowing multiple pageservers to operate on the same tenant S3 state without interference. To make changes to S3, pageservers acquire long-running transactions from Control Plane. After opening a transaction, Pageservers make PUTs directly against S3, but they keys include the transaction ID,  so overwrites never happen. Periodically, pageservers talk back to Control Plane to commit their transaction. This is where Control Plane enforces strict linearizability, favoring availability over work-conservation: commit is only granted if no transaction started after the one that’s requesting commit. Garbage collection is done through deadlists, and it’s simplified tremendously by above commit grant/reject policy.\n\nMinimal changes are required for safekeepers to allow WAL for a single timeline be consumed by more than one pageserver without premature truncation.\n\n**Above scheme makes it safe to attach tenants without a 100% correct node failure detection mechanism. Further, it makes it safe to interleave tenant-attachment to pageservers, unlocking new capabilities for (internal) product features:**\n\n- **Fast, Zero-Toil Failover on Network Partitions or Instance Failure**: if a pageserver is not reachable (network partition, hardware failure, overload) we want to spread its attached tenants to new pageservers to restore availability, within the range of *seconds*. We cannot afford gracious timeouts to maximize the probability that the unreachable pageserver has ceased writing to S3. This proposal enables us to attach the tenants to the replacement pageservers,  and redirect their computes, without having to wait for confirmation that the unreachable pageserver has ceased writing to S3.\n- **************************************Zero-Downtime Relocation:************************************** we want to be able to relocate tenants to different pageservers with minimized availability or a latency impact. This proposal enables us to attach the relocating Tenant to the destination Pageserver before detaching it from the source Pageserver. This can help minimize downtime because we can wait for the destination to catch up on WAL processing before redirecting Computes.\n\n# Design\n\nThe core idea is to evolve the per-Tenant S3 state to a per-tenant MVCC-like scheme.\n\nTo make S3 changes for a given tenant, Pageserver requests a transaction ID from control plane for that tenant. Without a transaction ID, Pageserver does not write to S3.\n\nOnce Pageserver received a transaction ID it is allowed to produce new objects and overwrite objects created in this transaction. Pageserver is not allowed to delete any objects; instead, it marks the object as deleted by appending the key to the transaction’s deadlist for later deletion. Commits of transactions are serialized through Control Plane: when Pageserver wants to commit a transaction, it sends an RPC to Control Plane. Control Plane responds with a commit grant or commit reject message. Commit grant means that the transaction’s changes are now visible to subsequent transactions. Commit reject means that the transaction’s changes are not and never will be visible to another Pageserver instance, and the rejected Pageserver is to cease further activity on that tenant.\n\n## ****************************************************Commit grant/reject policy****************************************************\n\nFor the purposes of Pageserver, we want **linearizability** of a tenant’s S3 state. Since our transactions are scoped per tenant, it is sufficient for linearizability to grant commit if and only if no other transaction has been started since the commit-requesting transaction started.\n\nFor example, consider the case of a single tenant, attached to Pageserver A. Pageserver A has an open transaction but becomes unresponsive. Control Plane decides to relocate the tenant to another Pageserver B. It need *not* wait for A to be 100%-certainly down before B can start uploading to S3 for that tenant. Instead, B can start a new transaction right away, make progress, and get commit grants;  What about A? The transaction is RejectPending in Control Plane until A eventually becomes responsive again, tries to commit, gets a rejection, acknowledges it, and thus its transaction becomes RejectAcknowledge. If A is definitively dead, operator can also force-transition from state RejectPending to RejectAcknowledged. But critically, Control Plane doesn’t have for A’s transaction to become RejectAcknowledge before attaching the tenant to B.\n\n```mermaid\nsequenceDiagram\n\n   participant CP\n   participant A\n   participant S3\n   participant B\n\n\t CP -->> A: attach tenant\n   activate A\n\t A -->> CP: start txn\n\t CP -->> A: txn=23, last_committed_txn=22\n\n\n\t Note over CP,A: network partition\n\t CP --x A: heartbeat\n\t CP --x A: heartbeat\n\n\t Note over CP: relocate tenant to avoid downtime\n\t CP -->> B: attach tenant\n   activate B\n\t B -->> CP: start txn\n   Note over CP: mark A's txn 23 as RejectPending\n\t CP -->> B: txn=24, last-committed txn is 22\n\t B -->> S3: PUT X.layer.24<br>PUT index_part.json.24 referencing X.layer.24\n\t B -->> CP: request commit\n\t CP -->> B: granted\n   B -->> CP: start txn\n  CP -->> B: txn=25, last_committed_txn=22\n\n   A -->> S3: PUT Y.layer.23 <br> PUT index_part.json.23 referencing Y.layer.23\n  A --x CP: request commit\n\t A --x CP: request commit\n\n   Note over CP,A: partition is over\n\n   A -->> CP: request commit\n\n   Note over CP: most recently started txn is 25, not 23, reject\n\n   CP -->> A: reject\n   A -->> CP: acknowledge reject\n\n   Note over CP: mark A's txn 23 as RejectAcknowledged\n\n  deactivate A\n\n  B -->> S3: PUT 000-FFF_X-Y.layer.**************25**************<br>...\n\n  deactivate B\n\n\n```\n\nIf a Pageserver gets a rejection to a commit request, it acknowledges rejection and cedes further S3 uploads for the tenant, until it receives a `/detach` request for the tenant (control plane has most likely attached the tenant to another pageserver in the meantime).\n\nIn practice, Control Plane will probably extend the commit grant/reject schema above, taking into account the pageserver to which it last attached the tenant. In the above example, Control Plane could remember that the pageserver that is supposed to host the tenant is pageserver B, and reject start-txn and commit requests from pageserver A. It would also use such requests from A as a signal that A is reachable again, and retry the `/detach` .\n\n<aside>\n💡 A commit failure causes the tenant to become effectively `Broken`. Pageserver should persist this locally so it doesn’t bother ControlPlane for a new txn when Pageserver is restarted.\n\n</aside>\n\n## ********************Visibility********************\n\nWe mentioned earlier that once a transaction commits, its changes are visible to subsequent transactions. But how does a given transaction know where to look for the data? There is no longer a single `index_part.json` per timeline, or a single `timelines/:timeline_id` prefix to look for; they’re all multi-versioned, suffixed by the txn number.\nThe solution is: at transaction start, Pageserver receives the last-committed transaction ID from Control Plane (`last_committed_txn` in the diagram). last_commited_txn is the upper bound for what is visible for the current transaction. Control Plane keeps track of each open transaction’s last_committed_txn for purposes of garbage collection (see later paragraph).\nEquipped with last_committed_txn, Pageserver then discovers\n\n- the current index part of a timeline at `tenants/:tenant_id/timelines/:timeline_id/index_part.json.$last_committed_txn`. The `index_part.json.$last_committed_txn` has the exact same contents as the current architecture’s index_part.json, i.e. full list of layers.\n- the list of existent timelines as part of the `attach` RPC from CP;\n\nThere is no other S3 state per tenant, so, that’s all the visibility required.\nAn alternative to receiving the list of existent timelines from CP is to introduce a proper **********SetOfTimelines********** object in S3, and multi-version it just like above. For example, we could have a `tenants/:tenant_id/timelines.json.$txn` file that references `index_part.json.$last_committed_txn` . It can be added later if more separation between CP and PS is desired.\n\nSo, the only MVCC’ed object types in this proposal are LayerFile and IndexPart (=individual timeline), but not the SetOfTimelines in a given tenant. Is this a problem? For example, the Pageserver’s garbage collection code needs to know the full set of timelines of a tenant. Otherwise it’ll make incorrect decisions. What if Pageserver A knows about timelines {R,S}, but another Pageserver B created an additional branch T, so, its set of timelines is {R,S,T}. Both pageservers will run GC code, and so, PS A may decide to delete a layer that’s still needed for branch T. Not a problem with this propsoal, because the effect of GC (i.e., layer deletion) is properly MVCC’ed.\n\n## Longevity Of Transactions & Availability\n\nPageserver depends on Control Plane to start a new transaction. If ControlPlane is down, no new transactions can be started.\n\nPageservers commit transactions based on a maximum amount of uncommitted changes that have accumulated in S3. A lower maximum increases dependence and load on ControlPlane which decreases availability. A higher maximum risks losing more work in the event of failover; the work will have to be re-done in a new transaction on the new node.\n\nPageservers are persist the open txn id in local storage, so that they can resume the transaction after restart, without dependence on Control Plane.\n\n## **Operations**\n\n********PUTs:********\n\n- **layer files**\n    - current architecture: layer files are supposed to be write-once, but actually, there are edge-cases where we PUT the same layer file name twice; namely if we PUT the file to S3 but crash before uploading the index part that references it; then detach + attach, and re-run compaction, which is non-deterministic.\n    - this proposal: with transactions, we can now upload layers and index_part.json concurrently, just need to make sure layer file upload is done before we request txn commit.\n- **index part** upload: `index_part.json.$txn` may be created and subsequently overwritten multiple times in a transaction; it is an availability/work-loss trade-off how often to request a commit from CP.\n\n**************DELETEs**************: for deletion, we maintain a deadlist per transaction. It is located at `tenants/:tenant_id/deadlist/deadlist.json.$txn`. It is PUT once before the pageserver requests requests commit, and not changed after sending request to commit. An object created in the current txn need not (but can) be on the deadlist — it can be DELETEd immediately because it’s not visible to other transactions. An example use case would be an L0 layer that gets compacted within one transaction; or, if we ever start MVCC’ing the set of timelines of a tenant, a short-lived branch that is created & destroyed within one transaction.\n\n<aside>\n☝ **Deadlist Invariant:** if a an object is on a deadlist of transaction T, it is not referenced from anywhere else in the full state visible to T or any later started transaction > T.\n\n</aside>\n\n### Rationale For Deadlist.json\n\nGiven that this proposal only MVCC’s layers and indexparts, one may ask why the deadlist isn’t part of indexpart. The reason is to not lose generality: the deadlist is just a list of keys; it is not necessary to understand the data format of the versioned object to process the deadlist. This is important for garbage collection / vacuuming, which we’ll come to in the next section.\n\n## Garbage Collection / Vacuuming\n\nAfter a transaction has reached reject-acknowledged state,  Control Plane initiates a garbage collection procedure for the aborted transaction.\n\nControl Plane is in the unique position about transaction states. Here is a sketch of the exact transaction states and what Control Plane keeps track of.\n\n```\nstruct Tenant {\n  ...\n\n  txns: HashMap<TxnId, Transaction>,\n  // the most recently started txn's id; only most recently started can win\n  next_winner_txn: Option<TxnId>,\n}\nstruct Transaction {\n  id: TxnId, // immutable\n  last_committed_txn: TxnId, // immutable; the most recent txn in state `Committed`\n                             // when self was started\n  pageserver_id: PageserverId,\n  state: enum {\n    Open,\n    Committed,\n    RejectPending,\n    RejectAcknowledged, // invariant: we know all S3 activity has ceded\n    GarbageCollected,\n  }\n}\n```\n\nObject creations & deletions by a rejected transaction have never been visible to other transactions. That is true for both RejectPending and RejectAcknowledged states. The difference is that, in RejectPending, the pageserver may still be uploading to S3, whereas in RejectAcknowledged, Control Plane can be certain that all S3 activity in the name of that transaction has ceded. So, once a transaction reaches state RejectAcknowledged state, it is safe to DELETE all objects created by that transaction, and discard the transaction’s deadlists.\n\nA transaction T in state Committed has subsequent transactions that may or may not reference the objects it created. None of the subsequent transaction can reference the objects on T’s deadlist, though, as per the Deadlist Invariant (see previous section).\n\nSo, for garbage collection, we need to assess transactions in state Committed and RejectAcknowledged:\n\n- Committed: delete objects on the deadlist.\n    - We don’t need a LIST request here, the deadlist is sufficient. So, it’s really cheap.\n    - This is **not true MVCC garbage collection**; by deleting the objects on Committed transaction T ’s deadlist, we might delete data referenced by other transactions that were concurrent with T, i.e., they started while T was still open. However, the fact that T is committed means that the other transactions are RejectPending or RejectAcknowledged, so, they don’t matter. Pageservers executing these doomed RejectPending transactions must handle 404 for GETs gracefully, e.g., by trying to commit txn so they observe the rejection they’re destined to get anyways. 404’s for RejectAcknowledged is handled below.\n- RejectAcknowledged: delete all objects created in that txn, and discard deadlists.\n    - 404s / object-already-deleted type messages must be expected because of Committed garbage collection (see above)\n    - How to get this list of objects created in a txn? Open but solvable design question; Ideas:\n        - **Brute force**: within tenant prefix, search for all keys ending in `.$txn` and delete them.\n        - **WAL for PUTs**: before a txn PUTs an object, it logs to S3, or some other equivalently durable storage, that it’s going to do it. If we log to S3, this means we have to do an additional WAL PUT per “readl” PUT.\n        - ******************************LIST with reorg’ed S3 layout (preferred one right now):****************************** layout S3 key space such that `$txn` comes first, i.e., `tenants/:tenant_id/$txn/timelines/:timeline_id/*.json.$txn` . That way, when we need to GC a RejectAcknowledged txn, we just LIST the entire `tenants/:tenant_id/$txn` prefix and delete it. The cost of GC for RejectAcknowledged transactions is thus proportional to the number of objects created in that transaction.\n\n## Branches\n\nThis proposal only MVCC’s layer files and and index_part.json, but leaves the tenant object not-MVCCed. We argued earlier that it’s fine to ignore this for now, because\n\n1. Control Plane can act as source-of-truth for the set of timelines, and\n2. The only operation that makes decision based on “set of timelines” is GC, which in turn only does layer deletions, and layer deletions ***are*** properly MVCC’ed.\n\nNow that we’ve introduced garbage collection, let’s elaborate a little more on (2). Recall our example from earlier: Pageserver A knows about timelines {R,S}, but another Pageserver B created an additional branch T, so, its set of timelines is {R,S,T}. Both pageservers will run GC code, and so, PS A may decide to delete a layer that’s still needed for branch T.\n\nHow does the MVCC’ing of layer files protect us here? If A decides to delete that layer, it’s just on A’s transaction’s deadlist, but still present in S3 and usable by B. If A commits first, B won’t be able to commit and the layers in timeline T will be vacuumed. If B commits first, A’s deadlist is discarded and the layer continues to exist.\n\n## Safekeeper Changes\n\nWe need to teach the safekeepers that there can be multiple pageservers requesting WAL for the same timeline, in order to prevent premature WAL truncation.\n\nIn the current architecture, the Safekeeper service currently assumes only one Pageserver and is allowed to prune WAL older than that Pageserver’s `remote_consistent_lsn`. Safekeeper currently learns the `remote_consistent_lsn` through the walreceiver protocol.\n\nSo, if we have a tenant attached to two pageservers at the same time, they will both try to stream WAL and the Safekeeper will get confused about which connection’s `remote_consistent_lsn` to use as a basis for WAL pruning.\n\nWhat do we need to change to make it work? We need to make sure that the Safekeepers only prune WAL up to the `remote_consistent_lsn` of the last-committed transaction.\n\nThe straight-forward way to get it is to re-design WAL pruning as follows:\n\n1. Pageserver reports remote_consistent_lsn as part of transaction commit to Control Plane.\n2. Control Plane makes sure transaction state update is persisted.\n3. Control Plane (asynchronous to transaction commit) reconciles with Safekeepers to ensure WAL pruning happens.\n\nThe above requires non-trivial changes, but, in the light of other planned projects such as restore-tenant-from-safekeeper-wal-backups, I think Control Plane will need to get involved in WAL pruning anyways.\n\n# How This Proposal Unlocks Future Features\n\nLet us revisit the example from the introduction where we were thinking about handling network partitions. Network partitions need to be solved first, because they’re unavoidable in distributed systems. We did that. Now let’s see how we can solve actual product problems:\n\n## **Fast, Zero-Toil Failover on Network Partitions or Instance Failure**\n\nThe “Problem Statement” section outlined the current architecture’s problems with regards to network partitions or instance failure: it requires a 100% correct node-dead detector to make decisions, which doesn’t exist in reality. We rely instead on human toil: an oncall engineer has to inspect the situation and make a decision, which may be incorrect and in any case take time in the order of minutes, which means equivalent downtime for users.\n\nWith this proposal, automatic failover for pageservers is trivial:\n\nIf a pageserver is unresponsive from Control Plane’s / Compute’s perspective, Control Plane does the following:\n\n- attach all tenants of the unresponsive pageserver to new pageservers\n- switch over these tenants’ computes immediately;\n\nAt this point, availability is restored and user pain relieved.\n\nWhat’s left is to somehow close the doomed transaction of the unresponsive pageserver, so that it becomes RejectAcknowledged, and GC can make progress. Since S3 is cheap, we can afford to wait a really long time here, especially if we put a soft bound on the amount of data a transaction may produce before it must commit. Procedure:\n\n1. Ensure the unresponsive pageserver is taken out of rotation for new attachments. That probably should happen as part of the routine above.\n2. Make a human operator investigate decide what to do (next morning, NO ONCALL ALERT):\n    1. Inspect the instance, investigate logs, understand root cause.\n    2. Try to re-establish connectivity between pageserver and Control Plane so that pageserver can retry commits, get rejected, ack rejection ⇒ enable GC.\n    3. Use below procedure to decommission pageserver.\n\n### Decommissioning A Pageserver (Dead or Alive-but-Unresponsive)\n\nThe solution, enabled by this proposal:\n\n1. Ensure that pageserver’s S3 credentials are revoked so that it cannot make new uploads, which wouldn’t be tracked anywhere.\n2. Let enough time pass for the S3 credential revocation to propagate. Amazon doesn’t give a guarantee here. As stated earlier, we can easily afford to wait here.\n3. Mark all Open and RejectPending transactions of that pageserver as RejectAcknowledge.\n\nRevocation of the S3 credentials is required so that, once we transition all the transactions of that pageserver to RejectAcknowledge, once garbage-collection pass is guaranteed to delete all objects that will ever exist for that pageserver. That way, we need not check *****GarbageCollected***** transactions every again.\n\n## Workflow: Zero-Downtime Relocation\n\nWith zero-downtime relocation, the goal is to have the target pageserver warmed up, i.e., at the same `last_record_lsn` as the source pageserver, before switching over Computes from source to target pageserver.\n\nWith this proposal, it works like so:\n\n1. Grant source pageserver its last open transaction. This one is doomed to be rejected later, unless the relocation fails.\n2. Grant target pageserver its first open transaction.\n3. Have target pageserver catch up on WAL, streaming from last-committed-txn’s remote_consistent_lsn onwards.\n4. Once target pageserver reports `last_record_lsn` close enough to source pageserver, target pageserver requests commit.\n5. Drain compute traffic from source to target pageserver. (Source can still answer requests until it tries to commit and gets reject, so, this will be quite smooth).\n\nNote that as soon as we complete step (4), the source pageserver’s transaction is doomed to be rejected later. Conversely, if the target can’t catch up fast enough, the source will make a transaction commit earlier. This will generally happen if there is a lot of write traffic coming in. The design space to make thing smooth here is large, but well explored in other areas of computing, e.g., VM live migration. We have all the important policy levers at hand, e.g.,\n\n- delaying source commits if we see target making progress\n- slowing down source consumption (need some signalling mechanism for it)\n- slowing down compute wal generation\n- …\n\nIt doesn’t really matter, what’s important is that two pageservers can overlap.\n\n# Additional Trade-Offs / Remarks Brought Up During Peer Review\n\nThis proposal was read by and discussed @Stas and @Dmitry Rodionov prior to publishing it with the broader team. (This does not mean they endorse this proposal!).\n\nIssues that we discussed:\n\n1. **Frequency of transactions:** If even idle tenants commit every 10min or so, that’s quite a lot of load on Control Plane. Can we minimize it by Equating Transaction Commit Period to Attachment Period? I.e. start txn on attach, commit on detach?\n    1. Would be nice, but, if a tenant is attached for 1 month, then PS dies, we lose 1 month of work.\n    2. ⇒ my solution to this problem: Adjusted this proposal to make transaction commit frequency proportional to amount of uncommitted data.\n        1. It’s ok to spend resources on active users, they pay us money to do it!\n        2. The amount of work per transaction is minimal.\n            1. In current Control Plane, it’s a small database transaction that is super unlikely to conflict with other transactions.\n            2. I have very little concerns about scalability of the commit workload on CP side because it's trivially horizontally scalable by sharding by tenant.\n        3. There's no super stringent availability requirement on control plane; if a txn can't commit because it can't reach the CP, PS can continue & retry in the background, speculating that it's CP downtime and not PS-partitioned-off scenario.\n        4. Without stringent availability requirement, there's flexibility for future changes to CP-side-implementation.\n2. ************************************************Does this proposal address mirroring / no-performance-degradation failover ?************************************************\n    1. No it doesn’t. It only provides the building block for attaching a tenant to a new pageserver without having to worry that the tenant is detached on the old pageserver.\n    2. A simple scheme to build no-performance-degradation failover on top of this proposal is to have an asynchronous read-only replica of a tenant on another pageserver in the same region.\n    3. Another more ambitious scheme to get no-performance-degradation would be [One-Pager: Layer File Spreading (Christian)](https://www.notion.so/One-Pager-Layer-File-Spreading-Christian-eb6b64182a214e11b3fceceee688d843?pvs=21); this proposal would be used in layer file spreading for risk-free automation of TenantLeader failover, which hasn’t been addressed Ithere.\n    4. In any way, failover would restart from an older S3 state, and need to re-ingest WAL before being able to server recently written pages.\n        1. Is that a show-stopper? I think not.\n        2. Is it suboptimal? Absolutely: if a pageserver instance fails, all its tenants will be distributed among the remaining pageservers (OK), and all these tenants will ask the safekeepers for WAL at the same time (BAD). So, pageserver instance failure will cause a load spike in safekeepers.\n            1. Personally I think that’s an OK trade-off to make.\n            2. There are countless options to avoid / mitigate the load spike. E.g., pro-actively streaming WAL to the standby read-only replica.\n\n3. ********************************************Does this proposal allow multiple writers for a tenant?********************************************\n    1. In abstract terms, this proposal provides a linearized history for a given S3 prefix.\n    2. In concrete terms, this proposal provides a linearized history per tenant.\n    3. There can be multiple writers at a given time, but only one of them will win to become part of the linearized history.\n4. ************************************************************************************Alternative ideas mentioned during meetings that should be turned into a written proposal like this one:************************************************************************************\n    1. @Dmitry Rodionov : having linearized storage of index_part.json in some database that allows serializable transactions / atomic compare-and-swap PUT\n    2. @Dmitry Rodionov :\n    3. @Stas : something like this scheme, but somehow find a way to equate attachment duration with transaction duration, without losing work if pageserver dies months after attachment.\n"
  },
  {
    "path": "docs/rfcs/027-crash-consistent-layer-map-through-index-part.md",
    "content": "\n# Crash-Consistent Layer Map Updates By Leveraging `index_part.json`\n\n* Created on: Aug 23, 2023\n* Author: Christian Schwarz\n\n## Summary\n\nThis RFC describes a simple scheme to make layer map updates crash consistent by leveraging the `index_part.json` in remote storage.\nWithout such a mechanism, crashes can induce certain edge cases in which broadly held assumptions about system invariants don't hold.\n\n## Motivation\n\n### Background\n\nWe can currently easily make complex, atomic updates to the layer map by means of an RwLock.\nIf we crash or restart pageserver, we reconstruct the layer map from:\n1. local timeline directory contents\n2. remote `index_part.json` contents.\n\nThe function that is responsible for this is called `Timeline::load_layer_map()`.\nThe reconciliation process's behavior is the following:\n* local-only files will become part of the layer map as local-only layers and rescheduled for upload\n* For a file name that, by its name, is present locally and in the remote `index_part.json`, but where the local file has a different size (future: checksum) than the remote file, we will delete the local file and leave the remote file as a `RemoteLayer` in the layer map.\n\n### The Problem\n\nThere are are cases where we need to make an atomic update to the layer map that involves **more than one layer**.\nThe best example is compaction, where we need to insert the L1 layers generated from the L0 layers, and remove the L0 layers.\nAs stated above, making the update to the layer map in atomic way is trivial.\nBut, there is no system call API to make an atomic update to a directory that involves more than one file rename and deletion.\nCurrently, we issue the system calls one by one and hope we don't crash.\n\nWhat happens if we crash and restart in the middle of that system call sequence?\nWe will reconstruct the layer map according to the reconciliation process, taking as input whatever transitory state the timeline directory ended up in.\n\nWe cannot roll back or complete the timeline directory update during which we crashed, because we keep no record of the changes we plan to make.\n\n### Problem's Implications For Compaction\n\nThe implications of the above are primarily problematic for compaction.\nSpecifically, the part of it that compacts L0 layers into L1 layers.\n\nRemember that compaction takes a set of L0 layers and reshuffles the delta records in them into L1 layer files.\nOnce the L1 layer files are written to disk, it atomically removes the L0 layers from the layer map and adds the L1 layers to the layer map.\nIt then deletes the L0 layers locally, and schedules an upload of the L1 layers and and updated index part.\n\nIf we crash before deleting L0s, but after writing out L1s, the next compaction after restart will re-digest the L0s and produce new L1s.\nThis means the compaction after restart will **overwrite** the previously written L1s.\nCurrently we also schedule an S3 upload of the overwritten L1.\n\nIf the compaction algorithm doesn't change between the two compaction runs, is deterministic, and uses the same set of L0s as input, then the second run will produce identical L1s and the overwrites will go unnoticed.\n\n*However*:\n1. the file size of the overwritten L1s may not be identical, and\n2. the bit pattern of the overwritten L1s may not be identical, and,\n3. in the future, we may want to make the compaction code non-deterministic, influenced by past access patterns, or otherwise change it, resulting in L1 overwrites with a different set of delta records than before the overwrite\n\nThe items above are a problem for the [split-brain protection RFC](https://github.com/neondatabase/neon/pull/4919) because it assumes that layer files in S3 are only ever deleted, but never replaced (overPUTted).\n\nFor example, if an unresponsive node A becomes active again after control plane has relocated the tenant to a new node B, the node A may overwrite some L1s.\nBut node B based its world view on the version of node A's `index_part.json` from _before_ the overwrite.\nThat earlier `index_part.json`` contained the file size of the pre-overwrite L1.\nIf the overwritten L1 has a different file size, node B will refuse to read data from the overwritten L1.\nEffectively, the data in the L1 has become inaccessible to node B.\nIf node B already uploaded an index part itself, all subsequent attachments will use node B's index part, and run into the same problem.\n\nIf we ever introduce checksums instead of checking just the file size, then a mismatching bit pattern (2) will cause similar problems.\n\nIn case of (1) and (2), where we know that the logical content of the layers is still the same, we can recover by manually patching the `index_part.json` of the new node to the overwritten L1's file size / checksum.\n\nBut if (3) ever happens, the logical content may be different, and, we could have truly lost data.\n\nGiven the above considerations, we should avoid making correctness of split-brain protection dependent on overwrites preserving _logical_ layer file contents.\n**It is a much cleaner separation of concerns to require that layer files are truly immutable in S3, i.e., PUT once and then only DELETEd, never overwritten (overPUTted).**\n\n## Design\n\nInstead of reconciling a layer map from local timeline directory contents and remote index part, this RFC proposes to view the remote index part as authoritative during timeline load.\nLocal layer files will be recognized if they match what's listed in remote index part, and removed otherwise.\n\nDuring **timeline load**, the only thing that matters is the remote index part content.\nEssentially, timeline load becomes much like attach, except we don't need to prefix-list the remote timelines.\nThe local timeline dir's `metadata` file does not matter.\nThe layer files in the local timeline dir are seen as a nice-to-have cache of layer files that are in the remote index part.\nAny layer files in the local timeline dir that aren't in the remote index part are removed during startup.\nThe `Timeline::load_layer_map()` no longer \"merges\" local timeline dir contents with the remote index part.\nInstead, it treats the remote index part as the authoritative layer map.\nIf the local timeline dir contains a layer that is in the remote index part, that's nice, and we'll re-use it if file size (and in the future, check sum) match what's stated in the index part.\nIf it doesn't match, we remove the file from the local timeline dir.\n\nAfter load, **at runtime**, nothing changes compared to what we did before this RFC.\nThe procedure for single- and multi-object changes is reproduced here for reference:\n* For any new layers that the change adds:\n  * Write them to a temporary location.\n  * While holding layer map lock:\n    * Move them to the final location.\n    * Insert into layer map.\n* Make the S3 changes.\n  We won't reproduce the remote timeline client method calls here because these are subject to change.\n  Instead we reproduce the sequence of s3 changes that must result for a given single-/multi-object change:\n    * PUT layer files inserted by the change.\n    * PUT an index part that has insertions and deletions of the change.\n    * DELETE the layer files that are deleted by the change.\n\nNote that it is safe for the DELETE to be deferred arbitrarily.\n* If it never happens, we leak the object, but, that's not a correctness concern.\n* As of #4938, we don't schedule the remote timeline client operation for deletion immediately, but, only when we drop the `LayerInner`.\n* With the [split-brain protection RFC](https://github.com/neondatabase/neon/pull/4919), the deletions will be written to deletion queue for processing when it's safe to do so (see the RFC for details).\n\n## How This Solves The Problem\n\nIf we crash before we've finished the S3 changes, then timeline load will reset layer map to the state that's in the S3 index part.\nThe S3 change sequence above is obviously crash-consistent.\nIf we crash before the index part PUT, then we leak the inserted layer files to S3.\nIf we crash after the index part PUT, we leak the to-be-DELETEd layer files to S3.\nLeaking is fine, it's a pre-existing condition and not addressed in this RFC.\n\nMulti-object changes that previously created and removed files in timeline dir are now atomic because the layer map updates are atomic and crash consistent:\n* atomic layer map update at runtime, currently by using an RwLock in write mode\n* atomic `index_part.json` update in S3, as per guarantee that S3 PUT is atomic\n* local timeline dir state:\n  * irrelevant for layer map content => irrelevant for atomic updates / crash consistency\n  * if we crash after index part PUT, local layer files will be used, so, no on-demand downloads needed for them\n  * if we crash before index part PUT, local layer files will be deleted\n\n## Trade-Offs\n\n### Fundamental\n\nIf we crash before finishing the index part PUT, we lose all the work that hasn't reached the S3 `index_part.json`:\n* wal ingest: we lose not-yet-uploaded L0s; load on the **safekeepers** + work for pageserver\n* compaction: we lose the entire compaction iteration work; need to re-do it again\n* gc: no change to what we have today\n\nIf the work is still deemed necessary after restart, the restarted restarted pageserver will re-do this work.\nThe amount of work to be re-do is capped to the lag of S3 changes to the local changes.\nAssuming upload queue allows for unlimited queue depth (that's what it does today), this means:\n* on-demand downloads that were needed to do the work: are likely still present, not lost\n* wal ingest: currently unbounded\n* L0 => L1 compaction: CPU time proportional to `O(sum(L0 size))` and upload work proportional to `O()`\n  * Compaction threshold is 10 L0s and each L0 can be up to 256M in size. Target size for L1 is 128M.\n  * In practice, most L0s are tiny due to 10minute `DEFAULT_CHECKPOINT_TIMEOUT`.\n* image layer generation: CPU time `O(sum(input data))` + upload work `O(sum(new image layer size))`\n  * I have no intuition how expensive / long-running it is in reality.\n* gc: `update_gc_info`` work (not substantial, AFAIK)\n\nTo limit the amount of lost upload work, and ingest work, we can limit the upload queue depth (see suggestions in the next sub-section).\nHowever, to limit the amount of lost CPU work, we would need a way to make make the compaction/image-layer-generation algorithms interruptible & resumable.\nWe aren't there yet, the need for it is tracked by ([#4580](https://github.com/neondatabase/neon/issues/4580)).\nHowever, this RFC is not constraining the design space either.\n\n### Practical\n\n#### Pageserver Restarts\n\nPageserver crashes are very rare ; it would likely be acceptable to re-do the lost work in that case.\nHowever, regular pageserver restart happen frequently, e.g., during weekly deploys.\n\nIn general, pageserver restart faces the problem of tenants that \"take too long\" to shut down.\nThey are a problem because other tenants that shut down quickly are unavailable while we wait for the slow tenants to shut down.\nWe currently allot 10 seconds for graceful shutdown until we SIGKILL the pageserver process (as per `pageserver.service` unit file).\nA longer budget would expose tenants that are done early to a longer downtime.\nA short budget would risk throwing away more work that'd have to be re-done after restart.\n\nIn the context of this RFC, killing the process would mean losing the work that hasn't made it to S3.\nWe can mitigate this problem as follows:\n0. initially, by accepting that we need to do the work again\n1. short-term, introducing measures to cap the amount of in-flight work:\n\n   - cap upload queue length, use backpressure to slow down compaction\n   - disabling compaction/image-layer-generation X minutes before `systemctl restart pageserver`\n   - introducing a read-only shutdown state for tenants that are fast to shut down;\n     that state would be equivalent to the state of a tenant in hot standby / readonly mode.\n\n2. mid term, by not restarting pageserver in place, but using [*seamless tenant migration*](https://github.com/neondatabase/neon/pull/5029) to drain a pageserver's tenants before we restart it.\n\n#### `disk_consistent_lsn` can go backwards\n\n`disk_consistent_lsn` can go backwards across restarts if we crash before we've finished the index part PUT.\nNobody should care about it, because the only thing that matters is `remote_consistent_lsn`.\nCompute certainly doesn't care about `disk_consistent_lsn`.\n\n\n## Side-Effects Of This Design\n\n* local `metadata` is basically reduced to a cache of which timelines exist for this tenant; i.e., we can avoid a `ListObjects` requests for a tenant's timelines during tenant load.\n\n## Limitations\n\nMulti-object changes that span multiple timelines aren't covered by this RFC.\nThat's fine because we currently don't need them, as evidenced by the absence\nof a Pageserver operation that holds multiple timelines' layer map lock at a time.\n\n## Impacted components\n\nPrimarily pageservers.\n\nSafekeepers will experience more load when we need to re-ingest WAL because we've thrown away work.\nNo changes to safekeepers are needed.\n\n## Alternatives considered\n\n### Alternative 1: WAL\n\nWe could have a local WAL for timeline dir changes, as proposed here https://github.com/neondatabase/neon/issues/4418 and partially implemented here https://github.com/neondatabase/neon/pull/4422 .\nThe WAL would be used to\n1. make multi-object changes atomic\n2. replace `reconcile_with_remote()` reconciliation: scheduling of layer upload would be part of WAL replay.\n\nThe WAL is appealing in a local-first world, but, it's much more complex than the design described above:\n* New on-disk state to get right.\n* Forward- and backward-compatibility development costs in the future.\n\n### Alternative 2: Flow Everything Through `index_part.json`\n\nWe could have gone to the other extreme and **only** update the layer map whenever we've PUT `index_part.json`.\nI.e., layer map would always be the last-persisted S3 state.\nThat's axiomatically beautiful, not least because it fully separates the layer file production and consumption path (=> [layer file spreading proposal](https://www.notion.so/neondatabase/One-Pager-Layer-File-Spreading-Christian-eb6b64182a214e11b3fceceee688d843?pvs=4)).\nAnd it might make hot standbys / read-only pageservers less of a special case in the future.\n\nBut, I have some uncertainties with regard to WAL ingestion, because it needs to be able to do some reads for the logical size feedback to safekeepers.\n\nAnd it's silly that we wouldn't be able to use the results of compaction or image layer generation before we're done with the upload.\n\nLastly, a temporarily clogged-up upload queue (e.g. S3 is down) shouldn't immediately render ingestion unavailable.\n\n### Alternative 3: Sequence Numbers For Layers\n\nInstead of what's proposed in this RFC, we could use unique numbers to identify layer files:\n\n```\n# before\ntenants/$tenant/timelines/$timeline/$key_and_lsn_range\n# after\ntenants/$tenant/timelines/$timeline/$layer_file_id-$key_and_lsn_range\n```\n\nTo guarantee uniqueness, the unique number is a sequence number, stored in `index_part.json`.\n\nThis alternative does not solve atomic layer map updates.\nIn our crash-during-compaction scenario above, the compaction run after the crash will not overwrite the L1s, but write/PUT new files with new sequence numbers.\nIn fact, this alternative makes it worse because the data is now duplicated in the not-overwritten and overwritten L1 layer files.\nWe'd need to write a deduplication pass that checks if perfectly overlapping layers have identical contents.\n\nHowever, this alternative is appealing because it systematically prevents overwrites at a lower level than this RFC.\n\nSo, this alternative is sufficient for the needs of the split-brain safety RFC (immutable layer files locally and in S3).\nBut it doesn't solve the problems with crash-during-compaction outlined earlier in this RFC, and in fact, makes it much more acute.\nThe proposed design in this RFC addresses both.\n\nSo, if this alternative sounds appealing, we should implement the proposal in this RFC first, then implement this alternative on top.\nThat way, we avoid a phase where the crash-during-compaction problem is acute.\n\n## Related issues\n\n- https://github.com/neondatabase/neon/issues/4749\n- https://github.com/neondatabase/neon/issues/4418\n  - https://github.com/neondatabase/neon/pull/4422\n- https://github.com/neondatabase/neon/issues/5077\n- https://github.com/neondatabase/neon/issues/4088\n  - (re)resolutions:\n    - https://github.com/neondatabase/neon/pull/4696\n    - https://github.com/neondatabase/neon/pull/4094\n      - https://neondb.slack.com/archives/C033QLM5P7D/p1682519017949719\n\nNote that the test case introduced in https://github.com/neondatabase/neon/pull/4696/files#diff-13114949d1deb49ae394405d4c49558adad91150ba8a34004133653a8a5aeb76 will produce L1s with the same logical content, but, as outlined in the last paragraph of the _Problem Statement_ section above, we don't want to make that  assumption in order to fix the problem.\n\n\n## Implementation Plan\n\n1. Remove support for `remote_storage=None`, because we now rely on the existence of an index part.\n\n    - The nasty part here is to fix all the tests that fiddle with the local timeline directory.\n      Possibly they are just irrelevant with this change, but, each case will require inspection.\n\n2. Implement the design above.\n\n    - Initially, ship without the mitigations for restart and accept we will do some work twice.\n    - Measure the impact and implement one of the mitigations.\n\n"
  },
  {
    "path": "docs/rfcs/028-pageserver-migration.md",
    "content": "# Seamless tenant migration\n\n- Author: john@neon.tech\n- Created on 2023-08-11\n- Implemented on ..\n\n## Summary\n\nThe preceding [generation numbers RFC](025-generation-numbers.md) may be thought of as \"making tenant\nmigration safe\". Following that,\nthis RFC is about how those migrations are to be done:\n\n1. Seamlessly (without interruption to client availability)\n2. Quickly (enabling faster operations)\n3. Efficiently (minimizing I/O and $ cost)\n\nThese points are in priority order: if we have to sacrifice\nefficiency to make a migration seamless for clients, we will\ndo so, etc.\n\nThis is accomplished by introducing two high level changes:\n\n- A dual-attached state for tenants, used in a control-plane-orchestrated\n  migration procedure that preserves availability during a migration.\n- Warm secondary locations for tenants, where on-disk content is primed\n  for a fast migration of the tenant from its current attachment to this\n  secondary location.\n\n## Motivation\n\nMigrating tenants between pageservers is essential to operating a service\nat scale, in several contexts:\n\n1. Responding to a pageserver node failure by migrating tenants to other pageservers\n2. Balancing load and capacity across pageservers, for example when a user expands their\n   database and they need to migrate to a pageserver with more capacity.\n3. Restarting pageservers for upgrades and maintenance\n\nThe current situation steps for migration are:\n\n- detach from old node; skip if old node is dead; (the [skip part is still WIP](https://github.com/neondatabase/cloud/issues/5426)).\n- attach to new node\n- re-configure endpoints to use the new node\n\nOnce [generation numbers](025-generation-numbers.md) are implemented,\nthe detach step is no longer critical for correctness. So, we can\n\n- attach to a new node,\n- re-configure endpoints to use the new node, and then\n- detach from the old node.\n\nHowever, this still does not meet our seamless/fast/efficient goals:\n\n- Not fast: The new node will have to download potentially large amounts\n  of data from S3, which may take many minutes.\n- Not seamless: If we attach to a new pageserver before detaching an old one,\n  the new one might delete some objects that interrupt availability of reads on the old one.\n- Not efficient: the old pageserver will continue uploading\n  S3 content during the migration that will never be read.\n\nThe user expectations for availability are:\n\n- For planned maintenance, there should be zero availability\n  gap. This expectation is fulfilled by this RFC.\n- For unplanned changes (e.g. node failures), there should be\n  minimal availability gap. This RFC provides the _mechanism_\n  to fail over quickly, but does not provide the failure _detection_\n  nor failover _policy_.\n\n## Non Goals\n\n- Defining service tiers with different storage strategies: the same\n  level of HA & overhead will apply to all tenants. This doesn't rule out\n  adding such tiers in future.\n- Enabling pageserver failover in the absence of a control plane: the control\n  plane will remain the source of truth for what should be attached where.\n- Totally avoiding availability gaps on unplanned migrations during\n  a failure (we expect a small, bounded window of\n  read unavailability of very recent LSNs)\n- Workload balancing: this RFC defines the mechanism for moving tenants\n  around, not the higher level logic for deciding who goes where.\n- Defining all possible configuration flows for tenants: the migration process\n  defined in this RFC demonstrates the sufficiency of the pageserver API, but\n  is not the only kind of configuration change the control plane will ever do.\n  The APIs defined here should let the control plane move tenants around in\n  whatever way is needed while preserving data safety and read availability.\n\n## Impacted components\n\nPageserver, control plane\n\n## Terminology\n\n- **Attachment**: a tenant is _attached_ to a pageserver if it has\n  been issued a generation number, and is running an instance of\n  the `Tenant` type, ingesting the WAL, and available to serve\n  page reads.\n- **Location**: locations are a superset of attachments. A location\n  is a combination of a tenant and a pageserver. We may _attach_ at a _location_.\n\n- **Secondary location**: a location which is not currently attached.\n- **Warm secondary location**: a location which is not currently attached, but is endeavoring to maintain a warm local cache of layers. We avoid calling this a _warm standby_ to avoid confusion with similar postgres features.\n\n## Implementation (high level)\n\n### Warm secondary locations\n\nTo enable faster migrations, we will identify at least one _secondary location_\nfor each tenant. This secondary location will keep a warm cache of layers\nfor the tenant, so that if it is later attached, it can catch up with the\nlatest LSN quickly: rather than downloading everything, it only has to replay\nthe recent part of the WAL to advance from the remote_consistent_offset to the\nmost recent LSN in the WAL.\n\nThe control plane is responsible for selecting secondary locations, and\ncalling into pageservers to configure tenants into a secondary mode at this\nnew location, as well as attaching the tenant in its existing primary location.\n\nThe attached pageserver for a tenant will publish a [layer heatmap](#layer-heatmap)\nto advise secondaries of which layers should be downloaded.\n\n### Location modes\n\nCurrently, we consider a tenant to be in one of two states on a pageserver:\n\n- Attached: active `Tenant` object, and layers on local disk\n- Detached: no layers on local disk, no runtime state.\n\nWe will extend this with finer-grained modes, whose purpose will become\nclear in later sections:\n\n- **AttachedSingle**: equivalent the existing attached state.\n- **AttachedMulti**: like AttachedSingle, holds an up to date generation, but\n  does not do deletions.\n- **AttachedStale**: like AttachedSingle, holds a stale generation,\n  do not do any remote storage operations.\n- **Secondary**: keep local state on disk, periodically update from S3.\n- **Detached**: equivalent to existing detached state.\n\nTo control these finer grained states, a new pageserver API endpoint will be added.\n\n### Cutover procedure\n\nDefine old location and new location as \"Node A\" and \"Node B\". Consider\nthe case where both nodes are available, and Node B was previously configured\nas a secondary location for the tenant we are migrating.\n\nThe cutover procedure is orchestrated by the control plane, calling into\nthe pageservers' APIs:\n\n1. Call to Node A requesting it to flush to S3 and enter AttachedStale state\n2. Increment generation, and call to Node B requesting it to enter AttachedMulti\n   state with the new generation.\n3. Call to Node B, requesting it to download the latest hot layers from remote storage,\n   according to the latest heatmap flushed by Node A.\n4. Wait for Node B's WAL ingestion to catch up with node A's\n5. Update endpoints to use node B instead of node A\n6. Call to node B requesting it to enter state AttachedSingle.\n7. Call to node A requesting it to enter state Secondary\n\nThe following table summarizes how the state of the system advances:\n\n|     Step      |     Node A     |     Node B     | Node used by endpoints |\n| :-----------: | :------------: | :------------: | :--------------------: |\n| 1 (_initial_) | AttachedSingle |   Secondary    |           A            |\n|       2       | AttachedStale  | AttachedMulti  |           A            |\n|       3       | AttachedStale  | AttachedMulti  |           A            |\n|       4       | AttachedStale  | AttachedMulti  |           A            |\n| 5 (_cutover_) | AttachedStale  | AttachedMulti  |           B            |\n|       6       | AttachedStale  | AttachedSingle |           B            |\n|  7 (_final_)  |   Secondary    | AttachedSingle |           B            |\n\nThe procedure described for a clean handover from a live node to a secondary\nis also used for failure cases and for migrations to a location that is not\nconfigured as a secondary, by simply skipping irrelevant steps, as described in\nthe following sections.\n\n#### Migration from an unresponsive node\n\nIf node A is unavailable, then all calls into\nnode A are skipped and we don't wait for B to catch up before\nswitching updating the endpoints to use B.\n\n#### Migration to a location that is not a secondary\n\nIf node B is initially in Detached state, the procedure is identical. Since Node B\nis coming from a Detached state rather than Secondary, the download of layers and\ncatch up with WAL will take much longer.\n\nWe might do this if:\n\n- Attached and secondary locations are both critically low on disk, and we need\n  to migrate to a third node with more resources available.\n- We are migrating a tenant which does not use secondary locations to save on cost.\n\n#### Permanent migration away from a node\n\nIn the final step of the migration, we generally request the original node to enter a Secondary\nstate. This is typical if we are doing a planned migration during maintenance, or to\nbalance CPU/network load away from a node.\n\nOne might also want to permanently migrate away: this can be done by simply removing the secondary\nlocation after the migration is complete, or as an optimization by substituting the Detached state\nfor the Secondary state in the final step.\n\n#### Cutover diagram\n\n```mermaid\nsequenceDiagram\nparticipant CP as Control plane\nparticipant A as Node A\nparticipant B as Node B\nparticipant E as Endpoint\n\nCP->>A: PUT Flush & go to AttachedStale\nnote right of A: A continues to ingest WAL\nCP->>B: PUT AttachedMulti\nCP->>B: PUT Download layers from latest heatmap\nnote right of B: B downloads from S3\nloop Poll until download complete\nCP->>B: GET download status\nend\nactivate B\nnote right of B: B ingests WAL\nloop Poll until catch up\nCP->>B: GET visible WAL\nCP->>A: GET visible WAL\nend\ndeactivate B\nCP->>E: Configure to use Node B\nE->>B: Connect for reads\nCP->>B: PUT AttachedSingle\nCP->>A: PUT Secondary\n```\n\n#### Cutover from an unavailable pageserver\n\nThis case is far simpler: we may skip straight to our intended\nend state.\n\n```mermaid\nsequenceDiagram\nparticipant A as Node A\nparticipant CP as Control plane\nparticipant B as Node B\nparticipant E as Endpoint\n\nnote right of A: Node A offline\nactivate A\nCP->>B: PUT AttachedSingle\nCP->>E: Configure to use Node B\nE->>B: Connect for reads\ndeactivate A\n```\n\n## Implementation (detail)\n\n### Purpose of AttachedMulti, AttachedStale\n\n#### AttachedMulti\n\nOrdinarily, an attached pageserver whose generation is the latest may delete\nlayers at will (e.g. during compaction). If a previous generation pageserver\nis also still attached, and in use by endpoints, then this layer deletion could\nlead to a loss of availability for the endpoint when reading from the previous\ngeneration pageserver.\n\nThe _AttachedMulti_ state simply disables deletions. These will be enqueued\nin `RemoteTimelineClient` until the control plane transitions the\nnode into AttachedSingle, which unblocks deletions.  Other remote storage operations\nsuch as uploads are not blocked.\n\nAttachedMulti is not required for data safety, only to preserve availability\non pageservers running with stale generations.\n\nA node enters AttachedMulti only when explicitly asked to by the control plane. It should\nonly remain in this state for the duration of a migration.\n\nIf a control plane bug leaves\nthe node in AttachedMulti for a long time, then we must avoid unbounded memory use from enqueued\ndeletions. This may be accomplished simply, by dropping enqueued deletions when some modest\nthreshold of delayed deletions (e.g. 10k layers per tenant) is reached. As with all deletions,\nit is safe to skip them, and the leaked objects will be eventually cleaned up by scrub or\nby timeline deletion.\n\nDuring AttachedMulti, the Tenant is free to drop layers from local disk in response to\ndisk pressure: only the deletion of remote layers is blocked.\n\n#### AttachedStale\n\nCurrently, a pageserver with a stale generation number will continue to\nupload layers, but be prevented from completing deletions. This is safe, but inefficient: layers uploaded by this stale generation\nwill not be read back by future generations of pageservers.\n\nThe _AttachedStale_ state disables S3 uploads. The stale pageserver\nwill continue to ingest the WAL and write layers to local disk, but not to\ndo any uploads to S3.\n\nA node may enter AttachedStale in two ways:\n\n- Explicitly, when control plane calls into the node at the start of a migration.\n- Implicitly, when the node tries to validate some deletions and discovers\n  that its generation is stale.\n\nThe AttachedStale state also disables sending consumption metrics from\nthat location: it is interpreted as an indication that some other pageserver\nis already attached or is about to be attached, and that new pageserver will\nbe responsible for sending consumption metrics.\n\n#### Disk Pressure & AttachedStale\n\nOver long periods of time, a tenant location in AttachedStale will accumulate data\non local disk, as it cannot evict any layers written since it entered the\nAttachStale state. We rely on the control plane to revert the location to\nSecondary or Detached at the end of a migration.\n\nThis scenario is particularly noteworthy when evacuating all tenants on a pageserver:\nsince _all_ the attached tenants will go into AttachedStale, we will be doing no\nuploads at all, therefore ingested data will cause disk usage to increase continuously.\nUnder nominal conditions, the available disk space on pageservers should be sufficient\nto complete the evacuation before this becomes a problem, but we must also handle\nthe case where we hit a low disk situation while in this state.\n\nThe concept of disk pressure already exists in the pageserver: the `disk_usage_eviction_task`\ntouches each Tenant when it determines that a low-disk condition requires\nsome layer eviction. Having selected layers for eviction, the eviction\ntask calls `Timeline::evict_layers`.\n\n**Safety**: If evict_layers is called while in AttachedStale state, and some of the to-be-evicted\nlayers are not yet uploaded to S3, then the block on uploads will be lifted. This\nwill result in leaking some objects once a migration is complete, but will enable\nthe node to manage its disk space properly: if a node is left with some tenants\nin AttachedStale indefinitely due to a network partition or control plane bug,\nthese tenants will not cause a full disk condition.\n\n### Warm secondary updates\n\n#### Layer heatmap\n\nThe secondary location's job is to serve reads **with the same quality of service as the original location\nwas serving them around the time of a migration**. This does not mean the secondary\nlocation needs the whole set of layers: inactive layers that might soon\nbe evicted on the attached pageserver need not be downloaded by the\nsecondary. A totally idle tenant only needs to maintain enough on-disk\nstate to enable a fast cold start (i.e. the most recent image layers are\ntypically sufficient).\n\nTo enable this, we introduce the concept of a _layer heatmap_, which\nacts as an advisory input to secondary locations to decide which\nlayers to download from S3.\n\n#### Attached pageserver\n\nThe attached pageserver, if in state AttachedSingle, periodically\nuploads a serialized heat map to S3. It may skip this if there\nis no change since the last time it uploaded (e.g. if the tenant\nis totally idle).\n\nAdditionally, when the tenant is flushed to remote storage prior to a migration\n(the first step in [cutover procedure](#cutover-procedure)), \nthe heatmap is written out. This enables a future attached pageserver\nto get an up to date view when deciding which layers to download.\n\n#### Secondary location behavior\n\nSecondary warm locations run a simple loop, implemented separately from\nthe main `Tenant` type, which represents attached tenants:\n\n- Download the layer heatmap\n- Select any \"hot enough\" layers to download, if there is sufficient\n  free disk space.\n- Download layers, if they were not previously evicted (see below)\n- Download the latest index_part.json\n- Check if any layers currently on disk are no longer referenced by\n  IndexPart & delete them\n\nNote that the heatmap is only advisory: if a secondary location has plenty\nof disk space, it may choose to retain layers that aren't referenced\nby the heatmap, as long as they are still referenced by the IndexPart. Conversely,\nif a node is very low on disk space, it might opt to raise the heat threshold required\nto both downloading a layer, until more disk space is available.\n\n#### Secondary locations & disk pressure\n\nSecondary locations are subject to eviction on disk pressure, just as\nattached locations are.  For eviction purposes, the access time of a\nlayer in a secondary location will be the access time given in the heatmap,\nrather than the literal time at which the local layer file was accessed.\n\nThe heatmap will indicate which layers are in local storage on the attached\nlocation.  The secondary will always attempt to get back to having that\nset of layers on disk, but to avoid flapping, it will remember the access\ntime of the layer it was most recently asked to evict, and layers whose\naccess time is below that will not be re-downloaded.\n\nThe resulting behavior is that after a layer is evicted from a secondary\nlocation, it is only re-downloaded once the attached pageserver accesses\nthe layer and uploads a heatmap reflecting that access time.  On a pageserver\nrestart, the secondary location will attempt to download all layers in\nthe heatmap again, if they are not on local disk.\n\nThis behavior will be slightly different when secondary locations are\nused for \"low energy tenants\", but that is beyond the scope of this RFC.\n\n### Location configuration API\n\nCurrently, the `/tenant/<tenant_id>/config` API defines various\ntunables like compaction settings, which apply to the tenant irrespective\nof which pageserver it is running on.\n\nA new \"location config\" structure will be introduced, which defines\nconfiguration which is per-tenant, but local to a particular pageserver,\nsuch as the attachment mode and whether it is a secondary.\n\nThe pageserver will expose a new per-tenant API for setting\nthe state: `/tenant/<tenant_id>/location/config`.\n\nBody content:\n\n```\n{\n  state: 'enum{Detached, Secondary, AttachedSingle, AttachedMulti, AttachedStale}',\n  generation: Option<u32>,\n  configuration: `Option<TenantConfig>`\n  flush: bool\n}\n```\n\nExisting `/attach` and `/detach` endpoint will have the same\nbehavior as calling `/location/config` with `AttachedSingle` and `Detached`\nstates respectively. These endpoints will be deprecated and later\nremoved.\n\nThe generation attribute is mandatory for entering `AttachedSingle` or\n`AttachedMulti`.\n\nThe configuration attribute is mandatory when entering any state other\nthan `Detached`. This configuration is the same as the body for\nthe existing `/tenant/<tenant_id>/config` endpoint.\n\nThe `flush` argument indicates whether the pageservers should flush\nto S3 before proceeding: this only has any effect if the node is\ncurrently in AttachedSingle or AttachedMulti. This is used\nduring the first phase of migration, when transitioning the\nold pageserver to AttachedSingle.\n\nThe `/re-attach` API response will be extended to include a `state` as\nwell as a `generation`, enabling the pageserver to enter the\ncorrect state for each tenant on startup.\n\n### Database schema for locations\n\nA new table `ProjectLocation`:\n\n- pageserver_id: int\n- tenant_id: TenantId\n- generation: Option<int>\n- state: `enum(Secondary, AttachedSingle, AttachedMulti)`\n\nNotes:\n\n- It is legacy for a Project to have zero `ProjectLocation`s\n- The `pageserver` column in `Project` now means \"to which pageserver should\n  endpoints connect\", rather than simply which pageserver is attached.\n- The `generation` column in `Project` remains, and is incremented and used\n  to set the generation of `ProjectLocation` rows when they are set into\n  an attached state.\n- The `Detached` state is implicitly represented as the absence of\n  a `ProjectLocation`.\n\n### Executing migrations\n\nMigrations will be implemented as Go functions, within the\nexisting `Operation` framework in the control plane. These\noperations are persistent, such that they will always keep\ntrying until completion: this property is important to avoid\nleaving garbage behind on pageservers, such as AttachedStale\nlocations.\n\n### Recovery from failures during migration\n\nDuring migration, the control plane may encounter failures of either\nthe original or new pageserver, or both:\n\n- If the original fails, skip past waiting for the new pageserver\n  to catch up, and put it into AttachedSingle immediately.\n- If the new node fails, put the old pageserver into Secondary\n  and then back into AttachedSingle (this has the effect of\n  retaining on-disk state and granting it a fresh generation number).\n- If both nodes fail, keep trying until one of them is available\n  again.\n\n### Control plane -> Pageserver reconciliation\n\nA migration may be done while the old node is unavailable,\nin which case the old node may still be running in an AttachedStale\nstate.\n\nIn this case, it is undesirable to have the migration `Operation`\nstay alive until the old node eventually comes back online\nand can be cleaned up. To handle this, the control plane\nshould run a background reconciliation process to compare\na pageserver's attachments with the database, and clean up\nany that shouldn't be there any more.\n\nNote that there will be no work to do if the old node was really\noffline, as during startup it will call into `/re-attach` and\nbe updated that way. The reconciliation will only be needed\nif the node was unavailable but still running.\n\n## Alternatives considered\n\n### Only enabling secondary locations for tenants on a higher service tier\n\nThis will make sense in future, especially for tiny databases that may be\ndownloaded from S3 in milliseconds when needed.\n\nHowever, it is not wise to do it immediately, because pageservers contain\na mixture of higher and lower tier workloads. If we had 1 tenant with\na secondary location and 9 without, then those other 9 tenants will do\na lot of I/O as they try to recover from S3, which may degrade the\nservice of the tenant which had a secondary location.\n\nUntil we segregate tenant on different service tiers on different pageserver\nnodes, or implement & test QoS to ensure that tenants with secondaries are\nnot harmed by tenants without, we should use the same failover approach\nfor all the tenants.\n\n### Hot secondary locations (continuous WAL replay)\n\nInstead of secondary locations populating their caches from S3, we could\nhave them consume the WAL from safekeepers. The downsides of this would be:\n\n- Double load on safekeepers, which are a less scalable service than S3\n- Secondary locations' on-disk state would end up subtly different to\n  the remote state, which would make synchronizing with S3 more complex/expensive\n  when going into attached state.\n\nThe downside of only updating secondary locations from S3 is that we will\nhave a delay during migration from replaying the LSN range between what's\nin S3 and what's in the pageserver. This range will be very small on\nplanned migrations, as we have the old pageserver flush to S3 immediately\nbefore attaching the new pageserver. On unplanned migrations (old pageserver\nis unavailable), the range of LSNs to replay is bounded by the flush frequency\non the old pageserver. However, the migration doesn't have to wait for the\nreplay: it's just that not-yet-replayed LSNs will be unavailable for read\nuntil the new pageserver catches up.\n\nWe expect that pageserver reads of the most recent LSNs will be relatively\nrare, as for an active endpoint those pages will usually still be in the postgres\npage cache: this leads us to prefer synchronizing from S3 on secondary\nlocations, rather than consuming the WAL from safekeepers.\n\n### Cold secondary locations\n\nIt is not functionally necessary to keep warm caches on secondary locations at all. However, if we do not, then\nwe would experience a de-facto availability loss in unplanned migrations, as reads to the new node would take an extremely long time (many seconds, perhaps minutes).\n\nWarm caches on secondary locations are necessary to meet\nour availability goals.\n\n### Pageserver-granularity failover\n\nInstead of migrating tenants individually, we could have entire spare nodes,\nand on a node death, move all its work to one of these spares.\n\nThis approach is avoided for several reasons:\n\n- we would still need fine-grained tenant migration for other\n  purposes such as balancing load\n- by sharing the spare capacity over many peers rather than one spare node,\n  these peers may use the capacity for other purposes, until it is needed\n  to handle migrated tenants. e.g. for keeping a deeper cache of their\n  attached tenants.\n\n### Readonly during migration\n\nWe could simplify migrations by making both previous and new nodes go into a\nreadonly state, then flush remote content from the previous node, then activate\nattachment on the secondary node.\n\nThe downside to this approach is a potentially large gap in readability of\nrecent LSNs while loading data onto the new node. To avoid this, it is worthwhile\nto incur the extra cost of double-replaying the WAL onto old and new nodes' local\nstorage during a migration.\n\n### Peer-to-peer pageserver communication\n\nRather than uploading the heatmap to S3, attached pageservers could make it\navailable to peers.\n\nCurrently, pageservers have no peer to peer communication, so adding this\nfor heatmaps would incur significant overhead in deployment and configuration\nof the service, and ensuring that when a new pageserver is deployed, other\npageservers are updated to be aware of it.\n\nAs well as simplifying implementation, putting heatmaps in S3 will be useful\nfor future analytics purposes -- gathering aggregated statistics on activity\npatterns across many tenants may be done directly from data in S3.\n"
  },
  {
    "path": "docs/rfcs/029-getpage-throttling.md",
    "content": "# Per-Tenant GetPage@LSN Throttling\n\nAuthor: Christian Schwarz\nDate: Oct 24, 2023\n\n## Summary\n\nThis RFC proposes per-tenant throttling of GetPage@LSN requests inside Pageserver\nand the interactions with its client, i.e., the neon_smgr component in Compute.\n\nThe result of implementing & executing this RFC will be a fleet-wide upper limit for\n**\"the highest GetPage/second that Pageserver can support for a single tenant/shard\"**.\n\n## Background\n\n### GetPage@LSN Request Flow\n\nPageserver exposes its `page_service.rs` as a libpq listener.\nThe Computes' `neon_smgr` module connects to that libpq listener.\nOnce a connection is established, the protocol allows Compute to request page images at a given LSN.\nWe call these requests GetPage@LSN requests, or GetPage requests for short.\nOther request types can be sent, but these are low traffic compared to GetPage requests\nand are not the concern of this RFC.\n\nPageserver associates one libpq connection with one tokio task.\n\nPer connection/task, the pq protocol is handled by the common `postgres_backend` crate.\nIts `run_message_loop` function invokes the `page_service` specific `impl<IO> postgres_backend::Handler<IO> for PageServerHandler`.\nRequests are processed in the order in which they arrive via the TCP-based pq protocol.\nSo, there is no concurrent request processing within one connection/task.\n\nThere is a degree of natural pipelining:\nCompute can \"fill the pipe\" by sending more than one GetPage request into the libpq TCP stream.\nAnd Pageserver can fill the pipe with responses in the other direction.\nBoth directions are subject to the limit of tx/rx buffers, nodelay, TCP flow control, etc.\n\n### GetPage@LSN Access Pattern\n\nThe Compute has its own hierarchy of caches, specifically `shared_buffers` and the `local file cache` (LFC).\nCompute only issues GetPage requests to Pageserver if it encounters a miss in these caches.\n\nIf the working set stops fitting into Compute's caches, requests to Pageserver increase sharply -- the Compute starts *thrashing*.\n\n## Motivation\n\nIn INC-69, a tenant issued 155k GetPage/second for a period of 10 minutes and 60k GetPage/second for a period of 3h,\nthen dropping to ca 18k GetPage/second for a period of 9h.\n\nWe noticed this because of an internal GetPage latency SLO burn rate alert, i.e.,\nthe request latency profile during this period significantly exceeded what was acceptable according to the internal SLO.\n\nSadly, we do not have the observability data to determine the impact of this tenant on other tenants on the same tenants.\n\nHowever, here are some illustrative data points for the 155k period:\nThe tenant was responsible for >= 99% of the GetPage traffic and, frankly, the overall activity on this Pageserver instance.\nWe were serving pages at 10 Gb/s (`155k x 8 kbyte (PAGE_SZ) per second is 1.12GiB/s = 9.4Gb/s.`)\nThe CPU utilization of the instance was 75% user+system.\nPageserver page cache served 1.75M accesses/second at a hit rate of ca 90%.\nThe hit rate for materialized pages was ca. 40%.\nCuriously, IOPS to the Instance Store NVMe were very low, rarely exceeding 100.\n\nThe fact that the IOPS were so low / the materialized page cache hit rate was so high suggests that **this tenant's compute's caches were thrashing**.\nThe compute was of type `k8s-pod`; hence, auto-scaling could/would not have helped remediate the thrashing by provisioning more RAM.\nThe consequence was that the **thrashing translated into excessive GetPage requests against Pageserver**.\n\nMy claim is that it was **unhealthy to serve this workload at the pace we did**:\n* it is likely that other tenants were/would have experienced high latencies (again, we sadly don't have per-tenant latency data to confirm this)\n* more importantly, it was **unsustainable** to serve traffic at this pace for multiple reasons:\n    * **predictability of performance**: when the working set grows, the pageserver materialized page cache hit rate drops.\n      At some point, we're bound by the EC2 Instance Store NVMe drive's IOPS limit.\n      The result is an **uneven** performance profile from the Compute perspective.\n\n    * **economics**: Neon currently does not charge for IOPS, only capacity.\n      **We cannot afford to undercut the market in IOPS/$ this drastically; it leads to adverse selection and perverse incentives.**\n      For example, the 155k IOPS, which we served for 10min, would cost ca. 6.5k$/month when provisioned as an io2 EBS volume.\n      Even the 18k IOPS, which we served for 9h, would cost ca. 1.1k$/month when provisioned as an io2 EBS volume.\n      We charge 0$.\n      It could be economically advantageous to keep using a low-DRAM compute because Pageserver IOPS are fast enough and free.\n\n\nNote: It is helpful to think of Pageserver as a disk, because it's precisely where `neon_smgr` sits:\nvanilla Postgres gets its pages from disk, Neon Postgres gets them from Pageserver.\nSo, regarding the above performance & economic arguments, it is fair to say that we currently provide an \"as-fast-as-possible-IOPS\" disk that we charge for only by capacity.\n\n## Solution: Throttling GetPage Requests\n\n**The consequence of the above analysis must be that Pageserver throttles GetPage@LSN requests**.\nThat is, unless we want to start charging for provisioned GetPage@LSN/second.\nThrottling sets the correct incentive for a thrashing Compute to scale up its DRAM to the working set size.\nNeon Autoscaling will make this easy, [eventually](https://github.com/neondatabase/neon/pull/3913).\n\n## The Design Space\n\nWhat that remains is the question about *policy* and *mechanism*:\n\n**Policy** concerns itself with the question of what limit applies to a given connection|timeline|tenant.\nCandidates are:\n\n* hard limit, same limit value per connection|timeline|tenant\n    * Per-tenant will provide an upper bound for the impact of a tenant on a given Pageserver instance.\n      This is a major operational pain point / risk right now.\n* hard limit, configurable per connection|timeline|tenant\n    * This outsources policy to console/control plane, with obvious advantages for flexible structuring of what service we offer to customers.\n    * Note that this is not a mechanism to guarantee a minium provisioned rate, i.e., this is not a mechanism to guarantee a certain QoS for a tenant.\n* fair share among active connections|timelines|tenants per instance\n    * example: each connection|timeline|tenant gets a fair fraction of the machine's GetPage/second capacity\n    * NB: needs definition of \"active\", and knowledge of available GetPage/second capacity in advance\n* ...\n\n\nRegarding **mechanism**, it's clear that **backpressure** is the way to go.\nHowever, we must choose between\n* **implicit** backpressure through pq/TCP and\n* **explicit** rejection of requests + retries with exponential backoff\n\nFurther, there is the question of how throttling GetPage@LSN will affect the **internal GetPage latency SLO**:\nwhere do we measure the SLI for Pageserver's internal getpage latency SLO? Before or after the throttling?\n\nAnd when we eventually move the measurement point into the Computes (to avoid coordinated omission),\nhow do we avoid counting throttling-induced latency toward the internal getpage latency SLI/SLO?\n\n## Scope Of This RFC\n\n**This RFC proposes introducing a hard GetPage@LSN/second limit per tenant, with the same value applying to each tenant on a Pageserver**.\n\nThis proposal is easy to implement and significantly de-risks operating large Pageservers,\nbased on the assumption that extremely-high-GetPage-rate-episodes like the one from the \"Motivation\" section are uncorrelated between tenants.\n\nFor example, suppose we pick a limit that allows up to 10 tenants to go at limit rate.\nSuppose our Pageserver can serve 100k GetPage/second total at a 100% page cache miss rate.\nIf each tenant gets a hard limit of 10k GetPage/second, we can serve up to 10 tenants at limit speed without latency degradation.\n\nThe mechanism for backpressure will be TCP-based implicit backpressure.\nThe compute team isn't concerned about prefetch queue depth.\nPageserver will implement it by delaying the reading of requests from the libpq connection(s).\n\nThe rate limit will be implemented using a per-tenant token bucket.\nThe bucket will be be shared among all connections to the tenant.\nThe bucket implementation supports starvation-preventing `await`ing.\nThe current candidate for the implementation is [`leaky_bucket`](https://docs.rs/leaky-bucket/).\nThe getpage@lsn benchmark that's being added in https://github.com/neondatabase/neon/issues/5771\ncan be used to evaluate the overhead of sharing the bucket among connections of a tenant.\nA possible technique to mitigate the impact of sharing the bucket would be to maintain a buffer of a few tokens per connection handler.\n\nRegarding metrics / the internal GetPage latency SLO:\nwe will measure the GetPage latency SLO _after_ the throttler and introduce a new metric to measure the amount of throttling, quantified by:\n- histogram that records the tenants' observations of queue depth before they start waiting (one such histogram per pageserver)\n- histogram that records the tenants' observations of time spent waiting (one such histogram per pageserver)\n\nFurther observability measures:\n- an INFO log message at frequency 1/min if the tenant/timeline/connection was throttled in that last minute.\n  The message will identify the tenant/timeline/connection to allow correlation with compute logs/stats.\n\nRollout will happen as follows:\n- deploy 1: implementation + config: disabled by default, ability to enable it per tenant through tenant_conf\n- experimentation in staging and later production to study impact & interaction with auto-scaling\n- determination of a sensible global default value\n  - the value will be chosen as high as possible ...\n  - ... but low enough to work towards this RFC's goal that one tenant should not be able to dominate a pageserver instance.\n- deploy 2: implementation fixes if any + config: enabled by default with the aforementioned global default\n- reset of the experimental per-tenant overrides\n- gain experience & lower the limit over time\n  - we stop lowering the limit as soon as this RFC's goal is achieved, i.e.,\n    once we decide that in practice the chosen value sufficiently de-risks operating large pageservers\n\nThe per-tenant override will remain for emergencies and testing.\nBut since Console doesn't preserve it during tenant migrations, it isn't durably configurable for the tenant.\n\nToward the upper layers of the Neon stack, the resulting limit will be\n**\"the highest GetPage/second that Pageserver can support for a single tenant\"**.\n\n### Rationale\n\nWe decided against error + retry because of worries about starvation.\n\n## Future Work\n\nEnable per-tenant emergency override of the limit via Console.\nShould be part of a more general framework to specify tenant config overrides.\n**NB:** this is **not** the right mechanism to _sell_ different max GetPage/second levels to users,\nor _auto-scale_ the GetPage/second levels. Such functionality will require a separate RFC that\nconcerns itself with GetPage/second capacity planning.\n\nCompute-side metrics for GetPage latency.\n\nBack-channel to inform Compute/Autoscaling/ControlPlane that the project is being throttled.\n\nCompute-side neon_smgr improvements to avoid sending the same GetPage request multiple times if multiple backends experience a cache miss.\n\nDealing with read-only endpoints: users use read-only endpoints to scale reads for a single tenant.\nPossibly there are also assumptions around read-only endpoints not affecting the primary read-write endpoint's performance.\nWith per-tenant rate limiting, we will not meet that expectation.\nHowever, we can currently only scale per tenant.\nSoon, we will have sharding (#5505), which will apply the throttling on a per-shard basis.\nBut, that's orthogonal to scaling reads: if many endpoints hit one shard, they share the same throttling limit.\nTo solve this properly, I think we'll need replicas for tenants / shard.\nTo performance-isolate a tenant's endpoints from each other, we'd then route them to different replicas.\n"
  },
  {
    "path": "docs/rfcs/029-pageserver-wal-disaster-recovery.md",
    "content": "# Name\n\nCreated on: 2023-09-08\nAuthor: Arpad Müller\n\n## Summary\n\nEnable the pageserver to recover from data corruption events by implementing\na feature to re-apply historic WAL records in parallel to the already occurring\nWAL replay.\n\nThe feature is outside of the user-visible backup and history story, and only\nserves as a second-level backup for the case that there is a bug in the\npageservers that corrupted the served pages.\n\nThe RFC proposes the addition of two new features:\n* recover a broken branch from WAL (downtime is allowed)\n* a test recovery system to recover random branches to make sure recovery works\n\n## Motivation\n\nThe historic WAL is currently stored in S3 even after it has been replayed by\nthe pageserver and thus been integrated into the pageserver's storage system.\nThis is done to defend from data corruption failures inside the pageservers.\n\nHowever, application of this WAL in the disaster recovery setting is currently\nvery manual and we want to automate this to make it easier.\n\n### Use cases\n\nThere are various use cases for this feature, like:\n\n* The main motivation is replaying in the instance of pageservers corrupting\n  data.\n* We might want to, beyond the user-visible history features, through our\n  support channels and upon customer request, in select instances, recover\n  historic versions beyond the range of history that we officially support.\n* Running the recovery process in the background for random tenant timelines\n  to figure out if there was a corruption of data (we would compare with what\n  the pageserver stores for the \"official\" timeline).\n* Using the WAL to arrive at historic pages we can then back up to S3 so that\n  WAL itself can be discarded, or at least not used for future replays.\n  Again, this sounds a lot like what the pageserver is already doing, but the\n  point is to provide a fallback to the service provided by the pageserver.\n\n## Design\n\n### Design constraints\n\nThe main design constraint is that the feature needs to be *simple* enough that\nthe number of bugs are as low, and reliability as high as possible: the main\ngoal of this endeavour is to achieve higher correctness than the pageserver.\n\nFor the background process, we cannot afford a downtime of the timeline that is\nbeing cloned, as we don't want to restrict ourselves to offline tenants only.\nIn the scenario where we want to recover from disasters or roll back to a\nhistoric lsn through support staff, downtimes are more affordable, and\ninevitable if the original had been subject to the corruption. Ideally, the\ntwo code paths would share code, so the solution would be designed for not\nrequiring downtimes.\n\n### API endpoint changes\n\nThis RFC proposes two API endpoint changes in the safekeeper and the\npageserver.\n\nRemember, the pageserver timeline API creation endpoint is to this URL:\n\n```\n/v1/tenant/{tenant_id}/timeline/\n```\n\nWhere `{tenant_id}` is the ID of the tenant the timeline is created for,\nand specified as part of the URL. The timeline ID is passed via the POST\nrequest body as the only required parameter `new_timeline_id`.\n\nThis proposal adds one optional parameter called\n`existing_initdb_timeline_id` to the request's json body. If the parameter\nis not specified, behaviour should be as existing, so the pageserver runs\ninitdb.\nIf the parameter is specified, it is expected to point to a timeline ID.\nIn fact that ID might match `new_timeline_id`, what's important is that\nS3 storage contains a matching initdb under the URL matching the given\ntenant and timeline.\n\nHaving both `ancestor_timeline_id` and `existing_initdb_timeline_id`\nspecified is illegal and will yield in an HTTP error. This feature is\nonly meant for the \"main\" branch that doesn't have any ancestors\nof its own, as only here initdb is relevant.\n\nFor the safekeeper, we propose the addition of the following copy endpoint:\n\n```\n/v1/tenant/{tenant_id}/timeline/{source_timeline_id}/copy\n```\nit is meant for POST requests with json, and the two URL parameters\n`tenant_id` and `source_timeline_id`. The json request body contains\nthe two required parameters `target_timeline_id` and `until_lsn`.\n\nAfter invoking, the copy endpoint starts a copy process of the WAL from\nthe source ID to the target ID. The lsn is updated according to the\nprogress of the API call.\n\n### Higher level features\n\nWe want the API changes to support the following higher level features:\n\n* recovery-after-corruption DR of the main timeline of a tenant. This\n  feature allows for downtime.\n* test DR of the main timeline into a special copy timeline. this feature\n  is meant to run against selected production tenants in the background,\n  without the user noticing, so it does not allow for downtime.\n\nThe recovery-after-corruption DR only needs the pageserver changes.\nIt works as follows:\n\n* delete the timeline from the pageservers via timeline deletion API\n* re-create it via timeline creation API (same ID as before) and set\n  `existing_initdb_timeline_id` to the same timeline ID\n\nThe test DR requires also the copy primitive and works as follows:\n\n* copy the WAL of the timeline to a new place\n* create a new timeline for the tenant\n\n## Non Goals\n\nAt the danger of being repetitive, the main goal of this feature is to be a\nbackup method, so reliability is very important. This implies that other\naspects like performance or space reduction are less important.\n\n### Corrupt WAL\n\nThe process suggested by this RFC assumes that the WAL is free of corruption.\nIn some instances, corruption can make it into WAL, like for example when\nhigher level components like postgres or the application first read corrupt\ndata, and then execute a write with data derived from that earlier read. That\nwritten data might then contain the corruption.\n\nCommon use cases can hit this quite easily. For example, an application reads\nsome counter, increments it, and then writes the new counter value to the\ndatabase.\nOn a lower level, the compute might put FPIs (Full Page Images) into the WAL,\nwhich have corrupt data for rows unrelated to the write operation at hand.\n\nSeparating corrupt writes from non-corrupt ones is a hard problem in general,\nand if the application was involved in making the corrupt write, a recovery\nwould also involve the application. Therefore, corruption that has made it into\nthe WAL is outside of the scope of this feature. However, the WAL replay can be\nissued to right before the point in time where the corruption occurred. Then the\ndata loss is isolated to post-corruption writes only.\n\n## Impacted components (e.g. pageserver, safekeeper, console, etc)\n\nMost changes would happen to the pageservers.\nFor the higher level features, maybe other components like the console would\nbe involved.\n\nWe need to make sure that the shadow timelines are not subject to the usual\nlimits and billing we apply to existing timelines.\n\n## Proposed implementation\n\nThe first problem to keep in mind is the reproducibility of `initdb`.\nSo an initial step would be to upload `initdb` snapshots to S3.\n\nAfter that, we'd have the endpoint spawn a background process which\nperforms the replay of the WAL to that new timeline. This process should\nfollow the existing workflows as closely as possible, just using the\nWAL records of a different timeline.\n\nThe timeline created will be in a special state that solely looks for WAL\nentries of the timeline it is trying to copy. Once the target LSN is reached,\nit turns into a normal timeline that also accepts writes to its own\ntimeline ID.\n\n### Scalability\n\nFor now we want to run this entire process on a single node, and as\nit is by nature linear, it's hard to parallelize. However, for the\nverification workloads, we can easily start the WAL replay in parallel\nfor different points in time. This is valuable especially for tenants\nwith large WAL records.\n\nCompare this with the tricks to make addition circuits execute with\nlower latency by making them perform the addition for both possible\nvalues of the carry bit, and then, in a second step, taking the\nresult for the carry bit that was actually obtained.\n\nThe other scalability dimension to consider is the WAL length, which\nis a growing question as tenants accumulate changes. There are\npossible approaches to this, including creating snapshots of the\npage files and uploading them to S3, but if we do this for every single\nbranch, we lose the cheap branching property.\n\n### Implementation by component\n\nThe proposed changes for the various components of the neon architecture\nare written up in this notion page:\n\nhttps://www.notion.so/neondatabase/Pageserver-disaster-recovery-one-pager-4ecfb5df16ce4f6bbfc3817ed1a6cbb2\n\n### Unresolved questions\n\nnone known (outside of the mentioned ones).\n"
  },
  {
    "path": "docs/rfcs/030-vectored-timeline-get.md",
    "content": "# Vectored Timeline Get\n\nCreated on: 2024-01-02\nAuthor: Christian Schwarz\n\n# Summary\n\nA brief RFC / GitHub Epic describing a vectored version of the `Timeline::get` method that is at the heart of Pageserver.\n\n**EDIT**: the implementation of this feature is described in [Vlad's (internal) tech talk](https://drive.google.com/file/d/1vfY24S869UP8lEUUDHRWKF1AJn8fpWoJ/view?usp=drive_link).\n\n# Motivation\n\nDuring basebackup, we issue many `Timeline::get` calls for SLRU pages that are *adjacent* in key space.\nFor an example, see\nhttps://github.com/neondatabase/neon/blob/5c88213eaf1b1e29c610a078d0b380f69ed49a7e/pageserver/src/basebackup.rs#L281-L302.\n\nEach of these `Timeline::get` calls must traverse the layer map to gather reconstruct data (`Timeline::get_reconstruct_data`) for the requested page number (`blknum` in the example).\nFor each layer visited by layer map traversal, we do a `DiskBtree` point lookup.\nIf it's negative (no entry), we resume layer map traversal.\nIf it's positive, we collect the result in our reconstruct data bag.\nIf the reconstruct data bag contents suffice to reconstruct the page, we're done with `get_reconstruct_data` and move on to walredo.\nOtherwise, we resume layer map traversal.\n\nDoing this many `Timeline::get` calls is quite inefficient because:\n\n1. We do the layer map traversal repeatedly, even if, e.g., all the data sits in the same image layer at the bottom of the stack.\n2. We may visit many DiskBtree inner pages multiple times for point lookup of different keys.\n   This is likely particularly bad for L0s which span the whole key space and hence must be visited by layer map traversal, but\n   may not contain the data we're looking for.\n3. Anecdotally, keys adjacent in keyspace and written simultaneously also end up physically adjacent in the layer files [^1].\n   So, to provide the reconstruct data for N adjacent keys, we would actually only _need_ to issue a single large read to the filesystem, instead of the N reads we currently do.\n   The filesystem, in turn, ideally stores the layer file physically contiguously, so our large read will turn into one IOP toward the disk.\n\n[^1]: https://www.notion.so/neondatabase/Christian-Investigation-Slow-Basebackups-Early-2023-12-34ea5c7dcdc1485d9ac3731da4d2a6fc?pvs=4#15ee4e143392461fa64590679c8f54c9\n\n# Solution\n\nWe should have a vectored aka batched aka scatter-gather style alternative API for `Timeline::get`. Having such an API  unlocks:\n\n* more efficient basebackup\n* batched IO during compaction (useful for strides of unchanged pages)\n* page_service: expose vectored get_page_at_lsn for compute (=> good for seqscan / prefetch)\n  * if [on-demand SLRU downloads](https://github.com/neondatabase/neon/pull/6151) land before vectored Timeline::get, on-demand SLRU downloads will still benefit from this API\n\n# DoD\n\nThere is a new variant of `Timeline::get`, called `Timeline::get_vectored`.\nIt takes as arguments an `lsn: Lsn` and a `src: &[KeyVec]` where `struct KeyVec { base: Key, count: usize }`.\n\nIt is up to the implementor to figure out a suitable and efficient way to return the reconstructed page images.\nIt is sufficient to simply return a `Vec<Bytes>`, but, likely more efficient solutions can be found after studying all the callers of `Timeline::get`.\n\nFunctionally, the behavior of `Timeline::get_vectored` is equivalent to\n\n```rust\nlet mut keys_iter: impl Iterator<Item=Key>\n  = src.map(|KeyVec{ base, count }| (base..base+count)).flatten();\nlet mut out = Vec::new();\nfor key in keys_iter {\n    let data = Timeline::get(key, lsn)?;\n    out.push(data);\n}\nreturn out;\n```\n\nHowever, unlike above, an ideal solution will\n\n* Visit each `struct Layer` at most once.\n* For each visited layer, call `Layer::get_value_reconstruct_data` at most once.\n  * This means, read each `DiskBtree` page at most once.\n* Facilitate merging of the reads we issue to the OS and eventually NVMe.\n\nEach of these items above represents a significant amount of work.\n\n## Performance\n\nIdeally, the **base performance** of a vectored get of a single page should be identical to the current `Timeline::get`.\nA reasonable constant overhead over current `Timeline::get` is acceptable.\n\nThe performance improvement for the vectored use case is demonstrated in some way, e.g., using the `pagebench` basebackup benchmark against a tenant with a lot of SLRU segments.\n\n# Implementation\n\nHigh-level set of tasks / changes to be made:\n\n- **Get clarity on API**:\n  - Define naive `Timeline::get_vectored` implementation & adopt it across pageserver.\n  - The tricky thing here will be the return type (e.g. `Vec<Bytes>` vs `impl Stream`).\n  - Start with something simple to explore the different usages of the API.\n    Then iterate with peers until we have something that is good enough.\n- **Vectored Layer Map traversal**\n  - Vectored `LayerMap::search` (take 1 LSN and N `Key`s instead of just 1 LSN and 1 `Key`)\n  - Refactor `Timeline::get_reconstruct_data` to hold & return state for N `Key`s instead of 1\n    - The slightly tricky part here is what to do about `cont_lsn` [after we've found some reconstruct data for some keys](https://github.com/neondatabase/neon/blob/d066dad84b076daf3781cdf9a692098889d3974e/pageserver/src/tenant/timeline.rs#L2378-L2385)\n      but need more.\n      Likely we'll need to keep track of `cont_lsn` per key and continue next iteration at `max(cont_lsn)` of all keys that still need data.\n- **Vectored `Layer::get_value_reconstruct_data` / `DiskBtree`**\n  - Current code calls it [here](https://github.com/neondatabase/neon/blob/d066dad84b076daf3781cdf9a692098889d3974e/pageserver/src/tenant/timeline.rs#L2378-L2384).\n  - Delta layers use `DiskBtreeReader::visit()` to collect the `(offset,len)` pairs for delta record blobs to load.\n  - Image layers use `DiskBtreeReader::get` to get the offset of the image blob to load. Underneath, that's just a `::visit()` call.\n  - What needs to happen to `DiskBtree::visit()`?\n    * Minimally\n      * take a single `KeyVec` instead of a single `Key` as argument, i.e., take a single contiguous key range to visit.\n      * Change the visit code to to invoke the callback for all values in the `KeyVec`'s key range\n      * This should be good enough for what we've seen when investigating basebackup slowness, because there, the key ranges are contiguous.\n    * Ideally:\n      * Take a `&[KeyVec]`, sort it;\n      * during Btree traversal, peek at the next `KeyVec` range to determine whether we need to descend or back out.\n      * NB: this should be a straight-forward extension of the minimal solution above, as we'll already be checking for \"is there more key range in the requested `KeyVec`\".\n- **Facilitate merging of the reads we issue to the OS and eventually NVMe.**\n  - The `DiskBtree::visit` produces a set of offsets which we then read from a `VirtualFile` [here](https://github.com/neondatabase/neon/blob/292281c9dfb24152b728b1a846cc45105dac7fe0/pageserver/src/tenant/storage_layer/delta_layer.rs#L772-L804)\n    - [Delta layer reads](https://github.com/neondatabase/neon/blob/292281c9dfb24152b728b1a846cc45105dac7fe0/pageserver/src/tenant/storage_layer/delta_layer.rs#L772-L804)\n      - We hit (and rely) on `PageCache` and `VirtualFile here (not great under pressure)\n    - [Image layer reads](https://github.com/neondatabase/neon/blob/292281c9dfb24152b728b1a846cc45105dac7fe0/pageserver/src/tenant/storage_layer/image_layer.rs#L429-L435)\n  - What needs to happen is the **vectorization of the `blob_io` interface and then the `VirtualFile` API**.\n  - That is tricky because\n    - the `VirtualFile` API, which sits underneath `blob_io`, is being touched by ongoing [io_uring work](https://github.com/neondatabase/neon/pull/5824)\n    - there's the question how IO buffers will be managed; currently this area relies heavily on `PageCache`, but there's controversy around the future of `PageCache`.\n      - The guiding principle here should be to avoid coupling this work to the `PageCache`.\n      - I.e., treat `PageCache` as an extra hop in the I/O chain, rather than as an integral part of buffer management.\n\n\nLet's see how we can improve by doing the first three items in above list first, then revisit.\n\n## Rollout / Feature Flags\n\nNo feature flags are required for this epic.\n\nAt the end of this epic, `Timeline::get` forwards to `Timeline::get_vectored`, i.e., it's an all-or-nothing type of change.\n\nIt is encouraged to deliver this feature incrementally, i.e., do many small PRs over multiple weeks.\nThat will help isolate performance regressions across weekly releases.\n\n# Interaction With Sharding\n\n[Sharding](https://github.com/neondatabase/neon/pull/5432) splits up the key space, see functions `is_key_local` / `key_to_shard_number`.\n\nJust as with `Timeline::get`, callers of `Timeline::get_vectored` are responsible for ensuring that they only ask for blocks of the given `struct Timeline`'s shard.\n\nGiven that this is already the case, there shouldn't be significant interaction/interference with sharding.\n\nHowever, let's have a safety check for this constraint (error or assertion) because there are currently few affordances at the higher layers of Pageserver for sharding<=>keyspace interaction.\nFor example, `KeySpace` is not broken up by shard stripe, so if someone naively converted the compaction code to issue a vectored get for a keyspace range it would violate this constraint.\n"
  },
  {
    "path": "docs/rfcs/031-sharding-static.md",
    "content": "# Sharding Phase 1: Static Key-space Sharding\n\n## Summary\n\nTo enable databases with sizes approaching the capacity of a pageserver's disk,\nit is necessary to break up the storage for the database, or _shard_ it.\n\nSharding in general is a complex area. This RFC aims to define an initial\ncapability that will permit creating large-capacity databases using a static configuration\ndefined at time of Tenant creation.\n\n## Motivation\n\nCurrently, all data for a Tenant, including all its timelines, is stored on a single\npageserver. The local storage required may be several times larger than the actual\ndatabase size, due to LSM write inflation.\n\nIf a database is larger than what one pageserver can hold, then it becomes impossible\nfor the pageserver to hold it in local storage, as it must do to provide service to\nclients.\n\n### Prior art\n\nIn Neon:\n\n- Layer File Spreading: https://www.notion.so/neondatabase/One-Pager-Layer-File-Spreading-Konstantin-21fd9b11b618475da5f39c61dd8ab7a4\n- Layer File SPreading: https://www.notion.so/neondatabase/One-Pager-Layer-File-Spreading-Christian-eb6b64182a214e11b3fceceee688d843\n- Key Space partitioning: https://www.notion.so/neondatabase/One-Pager-Key-Space-Partitioning-Stas-8e3a28a600a04a25a68523f42a170677\n\nPrior art in other distributed systems is too broad to capture here: pretty much\nany scale out storage system does something like this.\n\n## Requirements\n\n- Enable creating a large (for example, 16TiB) database without requiring dedicated\n  pageserver nodes.\n- Share read/write bandwidth costs for large databases across pageservers, as well\n  as storage capacity, in order to avoid large capacity databases acting as I/O hotspots\n  that disrupt service to other tenants.\n- Our data distribution scheme should handle sparse/nonuniform keys well, since postgres\n  does not write out a single contiguous ranges of page numbers.\n\n_Note: the definition of 'large database' is arbitrary, but the lower bound is to ensure that a database\nthat a user might create on a current-gen enterprise SSD should also work well on\nNeon. The upper bound is whatever postgres can handle: i.e. we must make sure that the\npageserver backend is not the limiting factor in the database size_.\n\n## Non Goals\n\n- Independently distributing timelines within the same tenant. If a tenant has many\n  timelines, then sharding may be a less efficient mechanism for distributing load than\n  sharing out timelines between pageservers.\n- Distributing work in the LSN dimension: this RFC focuses on the Key dimension only,\n  based on the idea that separate mechanisms will make sense for each dimension.\n\n## Impacted Components\n\npageserver, control plane, postgres/smgr\n\n## Terminology\n\n**Key**: a postgres page number, qualified by relation. In the sense that the pageserver is a versioned key-value store,\nthe page number is the key in that store. `Key` is a literal data type in existing code.\n\n**LSN dimension**: this just means the range of LSNs (history), when talking about the range\nof keys and LSNs as a two dimensional space.\n\n## Implementation\n\n### Key sharding vs. LSN sharding\n\nWhen we think of sharding across the two dimensional key/lsn space, this is an\nopportunity to think about how the two dimensions differ:\n\n- Sharding the key space distributes the _write_ workload of ingesting data\n  and compacting. This work must be carefully managed so that exactly one\n  node owns a given key.\n- Sharding the LSN space distributes the _historical read_ workload. This work\n  can be done by anyone without any special coordination, as long as they can\n  see the remote index and layers.\n\nThe key sharding is the harder part, and also the more urgent one, to support larger\ncapacity databases. Because distributing historical LSN read work is a relatively\nsimpler problem that most users don't have, we defer it to future work. It is anticipated\nthat some quite simple P2P offload model will enable distributing work for historical\nreads: a node which is low on space can call out to peer to ask it to download and\nserve reads from a historical layer.\n\n### Key mapping scheme\n\nHaving decided to focus on key sharding, we must next decide how we will map\nkeys to shards. It is proposed to use a \"wide striping\" approach, to obtain a good compromise\nbetween data locality and avoiding entire large relations mapping to the same shard.\n\nWe will define two spaces:\n\n- Key space: unsigned integer\n- Shard space: integer from 0 to N-1, where we have N shards.\n\n### Key -> Shard mapping\n\nKeys are currently defined in the pageserver's getpage@lsn interface as follows:\n\n```\npub struct Key {\n    pub field1: u8,\n    pub field2: u32,\n    pub field3: u32,\n    pub field4: u32,\n    pub field5: u8,\n    pub field6: u32,\n}\n\n\nfn rel_block_to_key(rel: RelTag, blknum: BlockNumber) -> Key {\n    Key {\n        field1: 0x00,\n        field2: rel.spcnode,\n        field3: rel.dbnode,\n        field4: rel.relnode,\n        field5: rel.forknum,\n        field6: blknum,\n    }\n}\n```\n\n_Note: keys for relation metadata are ignored here, as this data will be mirrored to all\nshards. For distribution purposes, we only care about user data keys_\n\nThe properties we want from our Key->Shard mapping are:\n\n- Locality in `blknum`, such that adjacent `blknum` will usually map to\n  the same stripe and consequently land on the same shard, even though the overall\n  collection of blocks in a relation will be spread over many stripes and therefore\n  many shards.\n- Avoid the same blknum on different relations landing on the same stripe, so that\n  with many small relations we do not end up aliasing data to the same stripe/shard.\n- Avoid vulnerability to aliasing in the values of relation identity fields, such that\n  if there are patterns in the value of `relnode`, these do not manifest as patterns\n  in data placement.\n\nTo accomplish this, the blknum is used to select a stripe, and stripes are\nassigned to shards in a pseudorandom order via a hash. The motivation for\npseudo-random distribution (rather than sequential mapping of stripe to shard)\nis to avoid I/O hotspots when sequentially reading multiple relations: we don't want\nall relations' stripes to touch pageservers in the same order.\n\nTo map a `Key` to a shard:\n\n- Hash the `Key` field 4 (relNode).\n- Divide field 6 (`blknum`) field by the stripe size in pages, and combine the\n  hash of this with the hash from the previous step.\n- The total hash modulo the shard count gives the shard holding this key.\n\nWhy don't we use the other fields in the Key?\n\n- We ignore `forknum` for key mapping, because it distinguishes different classes of data\n  in the same relation, and we would like to keep the data in a relation together.\n- We would like to use spcNode and dbNode, but cannot. Postgres database creation operations can refer to an existing database as a template, such that the created\n  database's blocks differ only by spcNode and dbNode from the original. To enable running\n  this type of creation without cross-pageserver communication, we must ensure that these\n  blocks map to the same shard -- we do this by excluding spcNode and dbNode from the hash.\n\n### Data placement examples\n\nFor example, consider the extreme large databases cases of postgres data layout in a system with 8 shards\nand a stripe size of 32k pages:\n\n- A single large relation: `blknum` division will break the data up into 4096\n  stripes, which will be scattered across the shards.\n- 4096 relations of of 32k pages each: each relation will map to exactly one stripe,\n  and that stripe will be placed according to the hash of the key fields 4. The\n  data placement will be statistically uniform across shards.\n\nData placement will be more uneven on smaller databases:\n\n- A tenant with 2 shards and 2 relations of one stripe size each: there is a 50% chance\n  that both relations land on the same shard and no data lands on the other shard.\n- A tenant with 8 shards and one relation of size 12 stripes: 4 shards will have double\n  the data of the other four shards.\n\nThese uneven cases for small amounts of data do not matter, as long as the stripe size\nis an order of magnitude smaller than the amount of data we are comfortable holding\nin a single shard: if our system handles shard sizes up to 10-100GB, then it is not an issue if\na tenant has some shards with 256MB size and some shards with 512MB size, even though\nthe standard deviation of shard size within the tenant is very high. Our key mapping\nscheme provides a statistical guarantee that as the tenant's overall data size increases,\nuniformity of placement will improve.\n\n### Important Types\n\n#### `ShardIdentity`\n\nProvides the information needed to know whether a particular key belongs\nto a particular shard:\n\n- Layout version\n- Stripe size\n- Shard count\n- Shard index\n\nThis structure's size is constant. Note that if we had used a differnet key\nmapping scheme such as consistent hashing with explicit hash ranges assigned\nto each shard, then the ShardIdentity's size would grow with the shard count: the simpler\nkey mapping scheme used here enables a small fixed size ShardIdentity.\n\n### Pageserver changes\n\n#### Structural\n\nEverywhere the Pageserver currently deals with Tenants, it will move to dealing with\n`TenantShard`s, which are just a `Tenant` plus a `ShardIdentity` telling it which part\nof the keyspace it owns. An un-sharded tenant is just a `TenantShard` whose `ShardIdentity`\ncovers the whole keyspace.\n\nWhen the pageserver writes layers and index_part.json to remote storage, it must\ninclude the shard index & count in the name, to avoid collisions (the count is\nnecessary for future-proofing: the count will vary in time). These keys\nwill also include a generation number: the [generation numbers](025-generation-numbers.md) system will work\nexactly the same for TenantShards as it does for Tenants today: each shard will have\nits own generation number.\n\n#### Storage Format: Keys\n\nFor tenants with >1 shard, layer files implicitly become sparse: within the key\nrange described in the layer name, the layer file for a shard will only hold the\ncontent relevant to stripes assigned to the shard.\n\nFor this reason, the LayerFileName within a tenant is no longer unique: different shards\nmay use the same LayerFileName to refer to different data. We may solve this simply\nby including the shard number in the keys used for layers.\n\nThe shard number will be included as a prefix (as part of tenant ID), like this:\n\n`pageserver/v1/tenants/<tenant_id>-<shard_number><shard_count>/timelines/<timeline id>/<layer file name>-<generation>`\n\n`pageserver/v1/tenants/<tenant_id>-<shard_number><shard_count>/timelines/<timeline id>/index_part.json-<generation>`\n\nReasons for this particular format:\n\n- Use of a prefix is convenient for implementation (no need to carry the shard ID everywhere\n  we construct a layer file name), and enables efficient listing of index_parts within\n  a particular shard-timeline prefix.\n- Including the shard _count_ as well as shard number means that in future when we implement\n  shard splitting, it will be possible for a parent shard and one of its children to write\n  the same layer file without a name collision. For example, a parent shard 0_1 might split\n  into two (0_2, 1_2), and in the process of splitting shard 0_2 could write a layer or index_part\n  that is distinct from what shard 0_1 would have written at the same place.\n\nIn practice, we expect shard counts to be relatively small, so a `u8` will be sufficient,\nand therefore the shard part of the path can be a fixed-length hex string like `{:02X}{:02X}`,\nfor example a single-shard tenant's prefix will be `0001`.\n\nFor backward compatibility, we may define a special `ShardIdentity` that has shard_count==0,\nand use this as a cue to construct paths with no prefix at all.\n\n#### Storage Format: Indices\n\nIn the phase 1 described in this RFC, shards only reference layers they write themselves. However,\nwhen we implement shard splitting in future, it will be useful to enable shards to reference layers\nwritten by other shards (specifically the parent shard during a split), so that shards don't\nhave to exhaustively copy all data into their own shard-prefixed keys.\n\nTo enable this, the `IndexPart` structure will be extended to store the (shard number, shard count)\ntuple on each layer, such that it can construct paths for layers written by other shards. This\nnaturally raises the question of who \"owns\" such layers written by ancestral shards: this problem\nwill be addressed in phase 2.\n\nFor backward compatibility, any index entry without shard information will be assumed to be\nin the legacy shardidentity.\n\n#### WAL Ingest\n\nIn Phase 1, all shards will subscribe to the safekeeper to download WAL content. They will filter\nit down to the pages relevant to their shard:\n\n- For ordinary user data writes, only retain a write if it matches the ShardIdentity\n- For metadata describing relations etc, all shards retain these writes.\n\nThe pageservers must somehow give the safekeeper correct feedback on remote_consistent_lsn:\none solution here is for the 0th shard to periodically peek at the IndexParts for all the other shards,\nand have only the 0th shard populate remote_consistent_lsn. However, this is relatively\nexpensive: if the safekeeper can be made shard-aware then it could be taught to use\nthe max() of all shards' remote_consistent_lsns to decide when to trim the WAL.\n\n#### Compaction/GC\n\nNo changes needed.\n\nThe pageserver doesn't have to do anything special during compaction\nor GC. It is implicitly operating on the subset of keys that map to its ShardIdentity.\nThis will result in sparse layer files, containing keys only in the stripes that this\nshard owns. Where optimizations currently exist in compaction for spotting \"gaps\" in\nthe key range, these should be updated to ignore gaps that are due to sharding, to\navoid spuriously splitting up layers ito stripe-sized pieces.\n\n### Compute Endpoints\n\nCompute endpoints will need to:\n\n- Accept a vector of connection strings as part of their configuration from the control plane\n- Route pageserver requests according to mapping the hash of key to the correct\n  entry in the vector of connection strings.\n\nDoing this in compute rather than routing requests via a single pageserver is\nnecessary to enable sharding tenants without adding latency from extra hops.\n\n### Control Plane\n\nTenants, or _Projects_ in the control plane, will each own a set of TenantShards (this will\nbe 1 for small tenants). Logic for placement of tenant shards is just the same as the current logic for placing\ntenants.\n\nTenant lifecycle operations like deletion will require fanning-out to all the shards\nin the tenant. The same goes for timeline creation and deletion: a timeline should\nnot be considered created until it has been created in all shards.\n\n#### Selectively enabling sharding for large tenants\n\nInitially, we will explicitly enable sharding for large tenants only.\n\nIn future, this hint mechanism will become optional when we implement automatic\nre-sharding of tenants.\n\n## Future Phases\n\nThis section exists to indicate what will likely come next after this phase.\n\nPhases 2a and 2b are amenable to execution in parallel.\n\n### Phase 2a: WAL fan-out\n\n**Problem**: when all shards consume the whole WAL, the network bandwidth used\nfor transmitting the WAL from safekeeper to pageservers is multiplied by a factor\nof the shard count.\n\nNetwork bandwidth is not our most pressing bottleneck, but it is likely to become\na problem if we set a modest shard count (~8) on a significant number of tenants,\nespecially as those larger tenants which we shard are also likely to have higher\nwrite bandwidth than average.\n\n### Phase 2b: Shard Splitting\n\n**Problem**: the number of shards in a tenant is defined at creation time and cannot\nbe changed. This causes excessive sharding for most small tenants, and an upper\nbound on scale for very large tenants.\n\nTo address this, a _splitting_ feature will later be added. One shard can split its\ndata into a number of children by doing a special compaction operation to generate\nimage layers broken up child-shard-wise, and then writing out an `index_part.json` for\neach child. This will then require external coordination (by the control plane) to\nsafely attach these new child shards and then move them around to distribute work.\nThe opposite _merging_ operation can also be imagined, but is unlikely to be implemented:\nonce a Tenant has been sharded, the marginal efficiency benefit of merging is unlikely to justify\nthe risk/complexity of implementing such a rarely-encountered scenario.\n\n### Phase N (future): distributed historical reads\n\n**Problem**: while sharding based on key is good for handling changes in overall\ndatabase size, it is less suitable for spiky/unpredictable changes in the read\nworkload to historical layers. Sudden increases in historical reads could result\nin sudden increases in local disk capacity required for a TenantShard.\n\nExample: the extreme case of this would be to run a tenant for a year, then create branches\nwith ancestors at monthly intervals. This could lead to a sudden 12x inflation in\nthe on-disk capacity footprint of a TenantShard, since it would be serving reads\nfrom all those disparate historical layers.\n\nIf we can respond fast enough, then key-sharding a tenant more finely can help with\nthis, but splitting may be a relatively expensive operation and the increased historical\nread load may be transient.\n\nA separate mechanism for handling heavy historical reads could be something like\na gossip mechanism for pageservers to communicate\nabout their workload, and then a getpageatlsn offload mechanism where one pageserver can\nask another to go read the necessary layers from remote storage to serve the read. This\nrequires relativly little coordination because it is read-only: any node can service any\nread. All reads to a particular shard would still flow through one node, but the\ndisk capactity & I/O impact of servicing the read would be distributed.\n\n## FAQ/Alternatives\n\n### Why stripe the data, rather than using contiguous ranges of keyspace for each shard?\n\nWhen a database is growing under a write workload, writes may predominantly hit the\nend of the keyspace, creating a bandwidth hotspot on that shard. Similarly, if the user\nis intensively re-writing a particular relation, if that relation lived in a particular\nshard then it would not achieve our goal of distributing the write work across shards.\n\n### Why not proxy read requests through one pageserver, so that endpoints don't have to change?\n\n1. This would not achieve scale-out of network bandwidth: a busy tenant with a large\n   database would still cause a load hotspot on the pageserver routing its read requests.\n2. The additional hop through the \"proxy\" pageserver would add latency and overall\n   resource cost (CPU, network bandwidth)\n\n### Layer File Spreading: use one pageserver as the owner of a tenant, and have it spread out work on a per-layer basis to peers\n\nIn this model, there would be no explicit sharding of work, but the pageserver to which\na tenant is attached would not hold all layers on its disk: instead, it would call out\nto peers to have them store some layers, and call out to those peers to request reads\nin those layers.\n\nThis mechanism will work well for distributing work in the LSN dimension, but in the key\nspace dimension it has the major limitation of requiring one node to handle all\nincoming writes, and compactions. Even if the write workload for a large database\nfits in one pageserver, it will still be a hotspot and such tenants may still\nde-facto require their own pageserver.\n"
  },
  {
    "path": "docs/rfcs/032-shard-splitting.md",
    "content": "# Shard splitting\n\n## Summary\n\nThis RFC describes a new pageserver API for splitting an existing tenant shard into\nmultiple shards, and describes how to use this API to safely increase the total\nshard count of a tenant.\n\n## Motivation\n\nIn the [sharding RFC](031-sharding-static.md), a mechanism was introduced to scale\ntenants beyond the capacity of a single pageserver by breaking up the key space\ninto stripes, and distributing these stripes across many pageservers. However,\nthe shard count was defined once at tenant creation time and not varied thereafter.\n\nIn practice, the expected size of a database is rarely known at creation time, and\nit is inefficient to enable sharding for very small tenants: we need to be\nable to create a tenant with a small number of shards (such as 1), and later expand\nwhen it becomes clear that the tenant has grown in size to a point where sharding\nis beneficial.\n\n### Prior art\n\nMany distributed systems have the problem of choosing how many shards to create for\ntenants that do not specify an expected size up-front. There are a couple of general\napproaches:\n\n- Write to a key space in order, and start a new shard when the highest key advances\n  past some point. This doesn't work well for Neon, because we write to our key space\n  in many different contiguous ranges (per relation), rather than in one contiguous\n  range. To adapt to this kind of model, we would need a sharding scheme where each\n  relation had its own range of shards, which would be inefficient for the common\n  case of databases with many small relations.\n- Monitor the system, and automatically re-shard at some size threshold. For\n  example in Ceph, the [pg_autoscaler](https://github.com/ceph/ceph/blob/49c27499af4ee9a90f69fcc6bf3597999d6efc7b/src/pybind/mgr/pg_autoscaler/module.py)\n  component monitors the size of each RADOS Pool, and adjusts the number of Placement\n  Groups (Ceph's shard equivalent).\n\n## Requirements\n\n- A configurable capacity limit per-shard is enforced.\n- Changes in shard count do not interrupt service beyond requiring postgres\n  to reconnect (i.e. milliseconds).\n- Human being does not have to choose shard count\n\n## Non Goals\n\n- Shard splitting is always a tenant-global operation: we will not enable splitting\n  one shard while leaving others intact.\n- The inverse operation (shard merging) is not described in this RFC. This is a lower\n  priority than splitting, because databases grow more often than they shrink, and\n  a database with many shards will still work properly if the stored data shrinks, just\n  with slightly more overhead (e.g. redundant WAL replication)\n- Shard splitting is only initiated based on capacity bounds, not load. Splitting\n  a tenant based on load will make sense for some medium-capacity, high-load workloads,\n  but is more complex to reason about and likely is not desirable until we have\n  shard merging to reduce the shard count again if the database becomes less busy.\n\n## Impacted Components\n\npageserver, storage controller\n\n(the _storage controller_ is the evolution of what was called `attachment_service` in our test environment)\n\n## Terminology\n\n**Parent** shards are the shards that exist before a split. **Child** shards are\nthe new shards created during a split.\n\n**Shard** is synonymous with _tenant shard_.\n\n**Shard Index** is the 2-tuple of shard number and shard count, written in\npaths as {:02x}{:02x}, e.g. `0001`.\n\n## Background\n\nIn the implementation section, a couple of existing aspects of sharding are important\nto remember:\n\n- Shard identifiers contain the shard number and count, so that \"shard 0 of 1\" (`0001`) is\n  a distinct shard from \"shard 0 of 2\" (`0002`). This is the case in key paths, local\n  storage paths, and remote index metadata.\n- Remote layer file paths contain the shard index of the shard that created them, and\n  remote indices contain the same index to enable building the layer file path. A shard's\n  index may reference layers that were created by another shard.\n- Local tenant shard directories include the shard index. All layers downloaded by\n  a tenant shard are stored in this shard-prefixed path, even if those layers were\n  initially created by another shard: tenant shards do not read and write one anothers'\n  paths.\n- The `Tenant` pageserver type represents one tenant _shard_, not the whole tenant.\n  This is for historical reasons and will be cleaned up in future, but the existing\n  name is used here to help comprehension when reading code.\n\n## Implementation\n\nNote: this section focuses on the correctness of the core split process. This will\nbe fairly inefficient in a naive implementation, and several important optimizations\nare described in a later section.\n\nThere are broadly two parts to the implementation:\n\n1. The pageserver split API, which splits one shard on one pageserver\n2. The overall tenant split proccess which is coordinated by the storage controller,\n   and calls into the pageserver split API as needed.\n\n### Pageserver Split API\n\nThe pageserver will expose a new API endpoint at `/v1/tenant/:tenant_shard_id/shard_split`\nthat takes the new total shard count in the body.\n\nThe pageserver split API operates on one tenant shard, on one pageserver. External\ncoordination is required to use it safely, this is described in the later\n'Split procedure' section.\n\n#### Preparation\n\nFirst identify the shard indices for the new child shards. These are deterministic,\ncalculated from the parent shard's index, and the number of children being created (this\nis an input to the API, and validated to be a power of two). In a trivial example, splitting\n0001 in two always results in 0002 and 0102.\n\nChild shard indices are chosen such that the childrens' parts of the keyspace will\nbe subsets of the parent's parts of the keyspace.\n\n#### Step 1: write new remote indices\n\nIn remote storage, splitting is very simple: we may just write new index_part.json\nobjects for each child shard, containing exactly the same layers as the parent shard.\n\nThe children will have more data than they need, but this avoids any exhausive\nre-writing or copying of layer files.\n\nThe index key path includes a generation number: the parent shard's current\nattached generation number will also be used for the child shards' indices. This\nmakes the operation safely retryable: if everything crashes and restarts, we may\ncall the split API again on the parent shard, and the result will be some new remote\nindices for the child shards, under a higher generation number.\n\n#### Step 2: start new `Tenant` objects\n\nA new `Tenant` object may be instantiated for each child shard, while the parent\nshard still exists. When calling the tenant_spawn function for this object,\nthe remote index from step 1 will be read, and the child shard will start\nto ingest WAL to catch up from whatever was in the remote storage at step 1.\n\nWe now wait for child shards' WAL ingestion to catch up with the parent shard,\nso that we can safely tear down the parent shard without risking an availability\ngap to clients reading recent LSNs.\n\n#### Step 3: tear down parent `Tenant` object\n\nOnce child shards are running and have caught up with WAL ingest, we no longer\nneed the parent shard. Note that clients may still be using it -- when we\nshut it down, any page_service handlers will also shut down, causing clients\nto disconnect. When the client reconnects, it will re-lookup the tenant,\nand hit the child shard instead of the parent (shard lookup from page_service\nshould bias toward higher ShardCount shards).\n\nNote that at this stage the page service client has not yet been notified of\nany split. In the trivial single split example:\n\n- Shard 0001 is gone: Tenant object torn down\n- Shards 0002 and 0102 are running on the same pageserver where Shard 0001 used to live.\n- Clients will continue to connect to that server thinking that shard 0001 is there,\n  and all requests will work, because any key that was in shard 0001 is definitely\n  available in either shard 0002 or shard 0102.\n- Eventually, the storage controller (not the pageserver) will decide to migrate\n  some child shards away: at that point it will do a live migration, ensuring\n  that the client has an updated configuration before it detaches anything\n  from the original server.\n\n#### Complete\n\nWhen we send a 200 response to the split request, we are promising the caller:\n\n- That the child shards are persistent in remote storage\n- That the parent shard has been shut down\n\nThis enables the caller to proceed with the overall shard split operation, which\nmay involve other shards on other pageservers.\n\n### Storage Controller Split procedure\n\nSplitting a tenant requires calling the pageserver split API, and tracking\nenough state to ensure recovery + completion in the event of any component (pageserver\nor storage controller) crashing (or request timing out) during the split.\n\n1. call the split API on all existing shards. Ensure that the resulting\n   child shards are pinned to their pageservers until _all_ the split calls are done.\n   This pinning may be implemented as a \"split bit\" on the tenant shards, that\n   blocks any migrations, and also acts as a sign that if we restart, we must go\n   through some recovery steps to resume the split.\n2. Once all the split calls are done, we may unpin the child shards (clear\n   the split bit). The split is now complete: subsequent steps are just migrations,\n   not strictly part of the split.\n3. Try to schedule new pageserver locations for the child shards, using\n   a soft anti-affinity constraint to place shards from the same tenant onto different\n   pageservers.\n\nUpdating computes about the new shard count is not necessary until we migrate\nany of the child shards away from the parent's location.\n\n### Recovering from failures\n\n#### Rolling back an incomplete split\n\nAn incomplete shard split may be rolled back quite simply, by attaching the parent shards to pageservers,\nand detaching child shards. This will lose any WAL ingested into the children after the parents\nwere detached earlier, but the parents will catch up.\n\nNo special pageserver API is needed for this. From the storage controllers point of view, the\nprocedure is:\n\n1. For all parent shards in the tenant, ensure they are attached\n2. For all child shards, ensure they are not attached\n3. Drop child shards from the storage controller's database, and clear the split bit on the parent shards.\n\nAny remote storage content for child shards is left behind. This is similar to other cases where\nwe may leave garbage objects in S3 (e.g. when we upload a layer but crash before uploading an\nindex that references it). Future online scrub/cleanup functionality can remove these objects, or\nthey will be removed when the tenant is deleted, as tenant deletion lists all objects in the prefix,\nwhich would include any child shards that were rolled back.\n\nIf any timelines had been created on child shards, they will be lost when rolling back. To mitigate\nthis, we will **block timeline creation during splitting**, so that we can safely roll back until\nthe split is complete, without risking losing timelines.\n\nRolling back an incomplete split will happen automatically if a split fails due to some fatal\nreason, and will not be accessible via an API:\n\n- A pageserver fails to complete its split API request after too many retries\n- A pageserver returns a fatal unexpected error such as 400 or 500\n- The storage controller database returns a non-retryable error\n- Some internal invariant is violated in the storage controller split code\n\n#### Rolling back a complete split\n\nA complete shard split may be rolled back similarly to an incomplete split, with the following\nmodifications:\n\n- The parent shards will no longer exist in the storage controller database, so these must\n  be re-synthesized somehow: the hard part of this is figuring the parent shards' generations. This\n  may be accomplished either by probing in S3, or by retaining some tombstone state for deleted\n  shards in the storage controller database.\n- Any timelines that were created after the split complete will disappear when rolling back\n  to the tenant shards. For this reason, rolling back after a complete split should only\n  be done due to serious issues where loss of recently created timelines is acceptable, or\n  in cases where we have confirmed that no timelines were created in the intervening period.\n- Parent shards' layers must not have been deleted: this property will come \"for free\" when\n  we first roll out sharding, by simply not implementing deletion of parent layers after\n  a split. When we do implement such deletion (see \"Cleaning up parent-shard layers\" in the\n  Optimizations section), it should apply a TTL to layers such that we have a\n  defined walltime window in which rollback will be possible.\n\nThe storage controller will expose an API for rolling back a complete split, for use\nin the field if we encounter some critical bug with a post-split tenant.\n\n#### Retrying API calls during Pageserver Restart\n\nWhen a pageserver restarts during a split API call, it may witness on-disk content for both parent and\nchild shards from an ongoing split. This does not intrinsically break anything, and the\npageserver may include all these shards in its `/re-attach` request to the storage controller.\n\nIn order to support such restarts, it is important that the storage controller stores\npersistent records of each child shard before it calls into a pageserver, as these child shards\nmay require generation increments via a `/re-attach` request.\n\nThe pageserver restart will also result in a failed API call from the storage controller's point\nof view. Recall that if _any_ pageserver fails to split, the overall split operation may not\ncomplete, and all shards must remain pinned to their current pageserver locations until the\nsplit is done.\n\nThe pageserver API calls during splitting will retry on transient errors, so that\nshort availability gaps do not result in a failure of the overall operation. The\nsplit in progress will be automatically rolled back if the threshold for API\nretries is reached (e.g. if a pageserver stays offline for longer than a typical\nrestart).\n\n#### Rollback on Storage Controller Restart\n\nOn startup, the storage controller will inspect the split bit for tenant shards that\nit loads from the database. If any splits are in progress:\n\n- Database content will be reverted to the parent shards\n- Child shards will be dropped from memory\n- The parent and child shards will be included in the general startup reconciliation that\n  the storage controller does: any child shards will be detached from pageservers because\n  they don't exist in the storage controller's expected set of shards, and parent shards\n  will be attached if they aren't already.\n\n#### Storage controller API request failures/retries\n\nThe split request handler will implement idempotency: if the [`Tenant`] requested to split\ndoesn't exist, we will check for the would-be child shards, and if they already exist,\nwe consider the request complete.\n\nIf a request is retried while the original request is still underway, then the split\nrequest handler will notice an InProgress marker in TenantManager, and return 503\nto encourage the client to backoff/retry. This is the same as the general pageserver\nAPI handling for calls that try to act on an InProgress shard.\n\n#### Compute start/restart during a split\n\nIf a compute starts up during split, it will be configured with the old sharding\nconfiguration. This will work for reads irrespective of the progress of the split\nas long as no child hards have been migrated away from their original location, and\nthis is guaranteed in the split procedure (see earlier section).\n\n#### Pageserver fails permanently during a split\n\nIf a pageserver permanently fails (i.e. the storage controller availability state for it\ngoes to Offline) while a split is in progress, the splitting operation will roll back, and\nduring the roll back it will skip any API calls to the offline pageserver. If the offline\npageserver becomes available again, any stale locations will be cleaned up via the normal reconciliation process (the `/re-attach` API).\n\n### Handling secondary locations\n\nFor correctness, it is not necessary to split secondary locations. We can simply detach\nthe secondary locations for parent shards, and then attach new secondary locations\nfor child shards.\n\nClearly this is not optimal, as it will result in re-downloads of layer files that\nwere already present on disk. See \"Splitting secondary locations\"\n\n### Conditions to trigger a split\n\nThe pageserver will expose a new API for reporting on shards that are candidates\nfor split: this will return a top-N report of the largest tenant shards by\nphysical size (remote size). This should exclude any tenants that are already\nat the maximum configured shard count.\n\nThe API would look something like:\n`/v1/top_n_tenant?shard_count_lt=8&sort_by=resident_size`\n\nThe storage controller will poll that API across all pageservers it manages at some appropriate interval (e.g. 60 seconds).\n\nA split operation will be started when the tenant exceeds some threshold. This threshold\nshould be _less than_ how large we actually want shards to be, perhaps much less. That's to\nminimize the amount of work involved in splitting -- if we want 100GiB shards, we shouldn't\nwait for a tenant to exceed 100GiB before we split anything. Some data analysis of existing\ntenant size distribution may be useful here: if we can make a statement like \"usually, if\na tenant has exceeded 20GiB they're probably going to exceed 100GiB later\", then we might\nmake our policy to split a tenant at 20GiB.\n\nThe finest split we can do is by factors of two, but we can do higher-cardinality splits\ntoo, and this will help to reduce the overhead of repeatedly re-splitting a tenant\nas it grows. An example of a very simple heuristic for early deployment of the splitting\nfeature would be: \"Split tenants into 8 shards when their physical size exceeds 64GiB\": that\nwould give us two kinds of tenant (1 shard and 8 shards), and the confidence that once we had\nsplit a tenant, it will not need re-splitting soon after.\n\n## Optimizations\n\n### Flush parent shard to remote storage during split\n\nAny data that is in WAL but not remote storage at time of split will need\nto be replayed by child shards when they start for the first time. To minimize\nthis work, we may flush the parent shard to remote storage before writing the\nremote indices for child shards.\n\nIt is important that this flush is subject to some time bounds: we may be splitting\nin response to a surge of write ingest, so it may be time-critical to split. A\nfew seconds to flush latest data should be sufficient to optimize common cases without\nrunning the risk of holding up a split for a harmful length of time when a parent\nshard is being written heavily. If the flush doesn't complete in time, we may proceed\nto shut down the parent shard and carry on with the split.\n\n### Hard linking parent layers into child shard directories\n\nBefore we start the Tenant objects for child shards, we may pre-populate their\nlocal storage directories with hard links to the layer files already present\nin the parent shard's local directory. When the child shard starts and downloads\nits remote index, it will find all those layer files already present on local disk.\n\nThis avoids wasting download capacity and makes splitting faster, but more importantly\nit avoids taking up a factor of N more disk space when splitting 1 shard into N.\n\nThis mechanism will work well in typical flows where shards are migrated away\npromptly after a split, but for the general case including what happens when\nlayers are evicted and re-downloaded after a split, see the 'Proactive compaction'\nsection below.\n\n### Filtering during compaction\n\nCompaction, especially image layer generation, should skip any keys that are\npresent in a shard's layer files, but do not match the shard's ShardIdentity's\nis_key_local() check. This avoids carrying around data for longer than necessary\nin post-split compactions.\n\nThis was already implemented in https://github.com/neondatabase/neon/pull/6246\n\n### Proactive compaction\n\nIn remote storage, there is little reason to rewrite any data on a shard split:\nall the children can reference parent layers via the very cheap write of the child\nindex_part.json.\n\nIn local storage, things are more nuanced. During the initial split there is no\ncapacity cost to duplicating parent layers, if we implement the hard linking\noptimization described above. However, as soon as any layers are evicted from\nlocal disk and re-downloaded, the downloaded layers will not be hard-links any more:\nthey'll have real capacity footprint. That isn't a problem if we migrate child shards\naway from the parent node swiftly, but it risks a significant over-use of local disk\nspace if we do not.\n\nFor example, if we did an 8-way split of a shard, and then _didn't_ migrate 7 of\nthe shards elsewhere, then churned all the layers in all the shards via eviction,\nthen we would blow up the storage capacity used on the node by 8x. If we're splitting\na 100GB shard, that could take the pageserver to the point of exhausting disk space.\n\nTo avoid this scenario, we could implement a special compaction mode where we just\nread historic layers, drop unwanted keys, and write back the layer file. This\nis pretty expensive, but useful if we have split a large shard and are not going to\nmigrate the child shards away.\n\nThe heuristic conditions for triggering such a compaction are:\n\n- A) eviction plus time: if a child shard\n  has existed for more than a time threshold, and has been requested to perform at least one eviction, then it becomes urgent for this child shard to execute a proactive compaction to reduce its storage footprint, at the cost of I/O load.\n- B) resident size plus time: we may inspect the resident layers and calculate how\n  many of them include the overhead of storing pre-split keys. After some time\n  threshold (different to the one in case A) we still have such layers occupying\n  local disk space, then we should proactively compact them.\n\n### Cleaning up parent-shard layers\n\nIt is functionally harmless to leave parent shard layers in remote storage indefinitely.\nThey would be cleaned up in the event of the tenant's deletion.\n\nAs an optimization to avoid leaking remote storage capacity (which costs money), we may\nlazily clean up parent shard layers once no child shards reference them.\n\nThis may be done _very_ lazily: e.g. check every PITR interval. The cleanup procedure is:\n\n- list all the key prefixes beginning with the tenant ID, and select those shard prefixes\n  which do not belong to the most-recently-split set of shards (_ancestral shards_, i.e. `shard*count < max(shard_count) over all shards)`, and those shard prefixes which do have the latest shard count (_current shards_)\n- If there are no _ancestral shard_ prefixes found, we have nothing to clean up and\n  may drop out now.\n- find the latest-generation index for each _current shard_, read all and accumulate the set of layers belonging to ancestral shards referenced by these indices.\n- for all ancestral shards, list objects in the prefix and delete any layer which was not\n  referenced by a current shard.\n\nIf this cleanup is scheduled for 1-2 PITR periods after the split, there is a good chance that child shards will have written their own image layers covering the whole keyspace, such that all parent shard layers will be deletable.\n\nThe cleanup may be done by the scrubber (external process), or we may choose to have\nthe zeroth shard in the latest generation do the work -- there is no obstacle to one shard\nreading the other shard's indices at runtime, and we do not require visibility of the\nlatest index writes.\n\nCleanup should be artificially delayed by some period (for example 24 hours) to ensure\nthat we retain the option to roll back a split in case of bugs.\n\n### Splitting secondary locations\n\nWe may implement a pageserver API similar to the main splitting API, which does a simpler\noperation for secondary locations: it would not write anything to S3, instead it would simply\ncreate the child shard directory on local disk, hard link in directories from the parent,\nand set up the in memory (TenantSlot) state for the children.\n\nSimilar to attached locations, a subset of secondary locations will probably need re-locating\nafter the split is complete, to avoid leaving multiple child shards on the same pageservers,\nwhere they may use excessive space for the tenant.\n\n## FAQ/Alternatives\n\n### What should the thresholds be set to?\n\nShard size limit: the pre-sharding default capacity quota for databases was 200GiB, so this could be a starting point for the per-shard size limit.\n\nMax shard count:\n\n- The safekeeper overhead to sharding is currently O(N) network bandwidth because\n  the un-filtered WAL is sent to all shards. To avoid this growing out of control,\n  a limit of 8 shards should be temporarily imposed until WAL filtering is implemented\n  on the safekeeper.\n- there is also little benefit to increasing the shard count beyond the number\n  of pageservers in a region.\n\n### Is it worth just rewriting all the data during a split to simplify reasoning about space?\n"
  },
  {
    "path": "docs/rfcs/033-storage-controller-drain-and-fill.md",
    "content": "# Graceful Restarts of Storage Controller Managed Clusters\n\n## Summary\nThis RFC describes new storage controller APIs for draining and filling tenant shards from/on pageserver nodes.\nIt also covers how these new APIs should be used by an orchestrator (e.g. Ansible) in order to implement\ngraceful cluster restarts.\n\n## Motivation\n\nPageserver restarts cause read availablity downtime for tenants.\n\nFor example pageserver-3 @ us-east-1 was unavailable for a randomly\npicked tenant (which requested on-demand activation) for around 30 seconds\nduring the restart at 2024-04-03 16:37 UTC.\n\nNote that lots of shutdowns on loaded pageservers do not finish within the\n[10 second systemd enforced timeout](https://github.com/neondatabase/infra/blob/0a5280b383e43c063d43cbf87fa026543f6d6ad4/.github/ansible/systemd/pageserver.service#L16). This means we are shutting down without flushing ephemeral layers\nand have to reingest data in order to serve requests after restarting, potentially making first request latencies worse.\n\nThis problem is not yet very acutely felt in storage controller managed pageservers since\ntenant density is much lower there. However, we are planning on eventually migrating all\npageservers to storage controller management, so it makes sense to solve the issue proactively.\n\n## Requirements\n\n- Pageserver re-deployments cause minimal downtime for tenants\n- The storage controller exposes HTTP API hooks for draining and filling tenant shards\nfrom a given pageserver. Said hooks can be used by an orchestrator proces or a human operator.\n- The storage controller exposes some HTTP API to cancel draining and filling background operations.\n- Failures to drain or fill the node should not be fatal. In such cases, cluster restarts should proceed\nas usual (with downtime).\n- Progress of draining/filling is visible through metrics\n\n## Non Goals\n\n- Integration with the control plane\n- Graceful restarts for large non-HA tenants.\n\n## Impacted Components\n\n- storage controller\n- deployment orchestrator (i.e. Ansible)\n- pageserver (indirectly)\n\n## Terminology\n\n** Draining ** is the process through which all tenant shards that can be migrated from a given pageserver\nare distributed across the rest of the cluster.\n\n** Filling ** is the symmetric opposite of draining. In this process tenant shards are migrated onto a given\npageserver until the cluster reaches a resonable, quiescent distribution of tenant shards across pageservers.\n\n** Node scheduling policies ** act as constraints to the scheduler. For instance, when a\nnode is set in the `Paused` policy, no further shards will be scheduled on it.\n\n** Node ** is a pageserver. Term is used interchangeably in this RFC.\n\n** Deployment orchestrator ** is a generic term for whatever drives our deployments.\nCurrently, it's an Ansible playbook.\n\n## Background\n\n### Storage Controller Basics (skip if already familiar)\n\nFundamentally, the storage controller is a reconciler which aims to move from the observed mapping between pageservers and tenant shards to an intended mapping. Pageserver nodes and tenant shards metadata is durably persisted in a database, but note that the mapping between the two entities is not durably persisted. Instead, this mapping (*observed state*) is constructed at startup by sending `GET location_config` requests to registered pageservers.\n\nAn internal scheduler maps tenant shards to pageservers while respecting certain constraints. The result of scheduling is the *intent state*. When the intent state changes, a *reconciliation* will inform pageservers about the new assigment via `PUT location_config` requests and will notify the compute via the configured hook.\n\n### Background Optimizations\n\nThe storage controller performs scheduling optimizations in the background. It will\nmigrate attachments to warm secondaries and replace secondaries in order to balance\nthe cluster out.\n\n### Reconciliations Concurrency Limiting\n\nThere's a hard limit on the number of reconciles that the storage controller\ncan have in flight at any given time. To get an idea of scales, the limit is\n128 at the time of writing.\n\n## Implementation\n\nNote: this section focuses on the core functionality of the graceful restart process.\nIt doesn't neccesarily describe the most efficient approach. Optimizations are described\nseparately in a later section.\n\n### Overall Flow\n\nThis section describes how to implement graceful restarts from the perspective\nof Ansible, the deployment orchestrator. Pageservers are already restarted sequentially.\nThe orchestrator shall implement the following epilogue and prologue steps for each\npageserver restart:\n\n#### Prologue\n\nThe orchestrator shall first fetch the pageserver node id from the control plane or\nthe pageserver it aims to restart directly. Next, it issues an HTTP request\nto the storage controller in order to start the drain of said pageserver node.\nAll error responses are retried with a short back-off. When a 202 (Accepted)\nHTTP code is returned, the drain has started. Now the orchestrator polls the\nnode status endpoint exposed by the storage controller in order to await the\nend of the drain process. When the `policy` field of the node status response\nbecomes `PauseForRestart`, the drain has completed and the orchestrator can\nproceed with restarting the pageserver.\n\nThe prologue is subject to an overall timeout. It will have a value in the ballpark\nof minutes. As storage controller managed pageservers become more loaded this timeout\nwill likely have to increase.\n\n#### Epilogue\n\nAfter restarting the pageserver, the orchestrator issues an HTTP request\nto the storage controller to kick off the filling process. This API call\nmay be retried for all error codes with a short backoff. This also serves\nas a synchronization primitive as the fill will be refused if the pageserver\nhas not yet re-attached to the storage controller. When a 202(Accepted) HTTP\ncode is returned, the fill has started. Now the orchestrator polls the node\nstatus endpoint exposed by the storage controller in order to await the end of\nthe filling process. When the `policy` field of the node status response becomes\n`Active`, the fill has completed and the orchestrator may proceed to the next pageserver.\n\nAgain, the epilogue is subject to an overall timeout. We can start off with\nusing the same timeout as for the prologue, but can also consider relying on\nthe storage controller's background optimizations with a shorter timeout.\n\nIn the case that the deployment orchestrator times out, it attempts to cancel\nthe fill. This operation shall be retried with a short back-off. If it ultimately\nfails it will require manual intervention to set the nodes scheduling policy to\n`NodeSchedulingPolicy::Active`. Not doing that is not immediately problematic,\nbut it constrains the scheduler as mentioned previously.\n\n### Node Scheduling Policy State Machine\n\nThe state machine below encodes the behaviours discussed above and\nthe various failover situations described in a later section.\n\nAssuming no failures and/or timeouts the flow should be:\n`Active -> Draining -> PauseForRestart -> Active -> Filling -> Active`\n\n```\n                          Operator requested drain\n               +-----------------------------------------+\n               |                                         |\n       +-------+-------+                         +-------v-------+\n       |               |                         |               |\n       |     Pause     |             +----------->    Draining   +----------+\n       |               |             |           |               |          |\n       +---------------+             |           +-------+-------+          |\n                                     |                   |                  |\n                                     |                   |                  |\n                      Drain requested|                   |                  |\n                                     |                   |Drain complete    | Drain failed\n                                     |                   |                  | Cancelled/PS reattach/Storcon restart\n                                     |                   |                  |\n                             +-------+-------+           |                  |\n                             |               |           |                  |\n               +-------------+    Active     <-----------+------------------+\n               |             |               |           |\nFill requested |             +---^---^-------+           |\n               |                 |   |                   |\n               |                 |   |                   |\n               |                 |   |                   |\n               |   Fill completed|   |                   |\n               |                 |   |PS reattach        |\n               |                 |   |after restart      |\n       +-------v-------+         |   |           +-------v-------+\n       |               |         |   |           |               |\n       |    Filling    +---------+   +-----------+PauseForRestart|\n       |               |                         |               |\n       +---------------+                         +---------------+\n```\n\n### Draining/Filling APIs\n\nThe storage controller API to trigger the draining of a given node is:\n`PUT /v1/control/node/:node_id/{drain,fill}`.\n\nThe following HTTP non-success return codes are used.\nAll of them are safely retriable from the perspective of the storage controller.\n- 404: Requested node was not found\n- 503: Requested node is known to the storage controller, but unavailable\n- 412: Drain precondition failed: there is no other node to drain to or the node's schedulling policy forbids draining\n- 409: A {drain, fill} is already in progress. Only one such background operation\nis allowed per node.\n\nWhen the drain is accepted and commenced a 202 HTTP code is returned.\n\nDrains and fills shall be cancellable by the deployment orchestrator or a\nhuman operator via: `DELETE /v1/control/node/:node_id/{drain,fill}`. A 200\nresponse is returned when the cancelation is successful. Errors are retriable.\n\n### Drain Process\n\nBefore accpeting a drain request the following validations is applied:\n* Ensure that the node is known the storage controller\n* Ensure that the schedulling policy is `NodeSchedulingPolicy::Active` or `NodeSchedulingPolicy::Pause`\n* Ensure that another drain or fill is not already running on the node\n* Ensure that a drain is possible (i.e. check that there is at least one\nschedulable node to drain to)\n\nAfter accepting the drain, the scheduling policy of the node is set to\n`NodeSchedulingPolicy::Draining` and persisted in both memory and the database.\nThis disallows the optimizer from adding or removing shards from the node which\nis desirable to avoid them racing.\n\nNext, a separate Tokio task is spawned to manage the draining. For each tenant\nshard attached to the node being drained, demote the node to a secondary and\nattempt to schedule the node away. Scheduling might fail due to unsatisfiable\nconstraints, but that is fine. Draining is a best effort process since it might\nnot always be possible to cut over all shards.\n\nImportantly, this task manages the concurrency of issued reconciles in order to\navoid drowning out the target pageservers and to allow other important reconciles\nto proceed.\n\nOnce the triggered reconciles have finished or timed out, set the node's scheduling\npolicy to `NodeSchedulingPolicy::PauseForRestart` to signal the end of the drain.\n\nA note on non HA tenants: These tenants do not have secondaries, so by the description\nabove, they would not be migrated. It makes sense to skip them (especially the large ones)\nsince, depending on tenant size, this might be more disruptive than the restart since the\npageserver we've moved to do will need to on-demand download the entire working set for the tenant.\nWe can consider expanding to small non-HA tenants in the future.\n\n### Fill Process\n\nBefore accpeting a fill request the following validations is applied:\n* Ensure that the node is known the storage controller\n* Ensure that the schedulling policy is `NodeSchedulingPolicy::Active`.\nThis is the only acceptable policy for the fill starting state. When a node re-attaches,\nit set the scheduling policy to `NodeSchedulingPolicy::Active` if it was equal to\n`NodeSchedulingPolicy::PauseForRestart` or `NodeSchedulingPolicy::Draining` (possible end states for a node drain).\n* Ensure that another drain or fill is not already running on the node\n\nAfter accepting the drain, the scheduling policy of the node is set to\n`NodeSchedulingPolicy::Filling` and persisted in both memory and the database.\nThis disallows the optimizer from adding or removing shards from the node which\nis desirable to avoid them racing.\n\nNext, a separate Tokio task is spawned to manage the draining. For each tenant\nshard where the filled node is a secondary, promote the secondary. This is done\nuntil we run out of shards or the counts of attached shards become balanced across\nthe cluster.\n\nLike for draining, the concurrency of spawned reconciles is limited.\n\n### Failure Modes & Handling\n\nFailures are generally handled by transition back into the `Active`\n(neutral) state. This simplifies the implementation greatly at the\ncost of adding transitions to the state machine. For example, we\ncould detect the `Draining` state upon restart and proceed with a drain,\nbut how should the storage controller know that's what the orchestrator\nneeds still?\n\n#### Storage Controller Crash\n\nWhen the storage controller starts up reset the node scheduling policy\nof all nodes in states `Draining`, `Filling` or `PauseForRestart` to\n`Active`. The rationale is that when the storage controller restarts,\nwe have lost context of what the deployment orchestrator wants. It also\nhas the benefit of making things easier to reason about.\n\n#### Pageserver Crash During Drain\n\nThe pageserver will attempt to re-attach during restart at which\npoint the node scheduling policy will be set back to `Active`, thus\nreenabling the scheduler to use the node.\n\n#### Non-drained Pageserver Crash During Drain\n\nWhat should happen when a pageserver we are draining to crashes during the\nprocess. Two reasonable options are: cancel the drain and focus on the failover\n*or* do both, but prioritise failover. Since the number of concurrent reconciles\nproduced by drains/fills are limited, we get the later behaviour for free.\nMy suggestion is we take this approach, but the cancellation option is trivial\nto implement as well.\n\n#### Pageserver Crash During Fill\n\nThe pageserver will attempt to re-attach during restart at which\npoint the node scheduling policy will be set back to `Active`, thus\nreenabling the scheduler to use the node.\n\n#### Pageserver Goes unavailable During Drain/Fill\n\nThe drain and fill jobs handle this by stopping early. When the pageserver\nis detected as online by storage controller heartbeats, reset its scheduling\npolicy to `Active`. If a restart happens instead, see the pageserver crash\nfailure mode.\n\n#### Orchestrator Drain Times Out\n\nOrchestrator will still proceed with the restart.\nWhen the pageserver re-attaches, the scheduling policy is set back to\n`Active`.\n\n#### Orchestrator Fill Times Out\n\nOrchestrator will attempt to cancel the fill operation. If that fails,\nthe fill will continue until it quiesces and the node will be left\nin the `Filling` scheduling policy. This hinders the scheduler, but is\notherwise harmless. A human operator can handle this by setting the scheduling\npolicy to `Active`, or we can bake in a fill timeout into the storage controller.\n\n## Optimizations\n\n### Location Warmth\n\nWhen cutting over to a secondary, the storage controller will wait for it to\nbecome \"warm\" (i.e. download enough of the tenants data). This means that some\nreconciliations can take significantly longer than others and hold up precious\nreconciliations units. As an optimization, the drain stage can only cut over\ntenants that are already \"warm\". Similarly, the fill stage can prioritise the\n\"warmest\" tenants in the fill.\n\nGiven that the number of tenants by the storage controller will be fairly low\nfor the foreseable future, the first implementation could simply query the tenants\nfor secondary status. This doesn't scale well with increasing tenant counts, so\neventually we will need new pageserver API endpoints to report the sets of\n\"warm\" and \"cold\" nodes.\n\n## Alternatives Considered\n\n### Draining and Filling Purely as Scheduling Constraints\n\nAt its core, the storage controller is a big background loop that detects changes\nin the environment and reacts on them. One could express draining and filling\nof nodes purely in terms of constraining the scheduler (as opposed to having\nsuch background tasks).\n\nWhile theoretically nice, I think that's harder to implement and more importantly operate and reason about.\nConsider cancellation of a drain/fill operation. We would have to update the scheduler state, create\nan entirely new schedule (intent state) and start work on applying that. It gets trickier if we wish\nto cancel the reconciliation tasks spawned by drain/fill nodes. How would we know which ones belong\nto the conceptual drain/fill? One could add labels to reconciliations, but it gets messy in my opinion.\n\nIt would also mean that reconciliations themselves have side effects that persist in the database\n(persist something to the databse when the drain is done), which I'm not conceptually fond of.\n\n## Proof of Concept\n\nThis RFC is accompanied by a POC which implements nearly everything mentioned here\napart from the optimizations and some of the failure handling:\nhttps://github.com/neondatabase/neon/pull/7682\n"
  },
  {
    "path": "docs/rfcs/034-ancestor-deletion.md",
    "content": "# Ancestor Timeline Deletion\n\nCreated on: 2024-02-23\n\nAuthor: John Spray\n\n# Summary\n\nWhen a tenant creates a new timeline that they will treat as their 'main' history,\nit is awkward to permanently retain an 'old main' timeline as its ancestor. Currently\nthis is necessary because it is forbidden to delete a timeline which has descendents.\n\nA new pageserver API is proposed to 'adopt' data from a parent timeline into\none of its children, such that the link between ancestor and child can be severed,\nleaving the parent in a state where it may then be deleted.\n\n# Motivation\n\nRetaining parent timelines currently has two costs:\n\n- Cognitive load on users, who have to remember which is the \"real\" main timeline.\n- Storage capacity cost, as the parent timeline will retain layers up to the\n  child's timeline point, even if the child fully covers its keyspace with image\n  layers and will never actually read from the parent.\n\n# Solution\n\nA new pageserver API `PUT /v1/tenant/:tenant_id/timeline/:timeline_id/detach_ancestor`\nwill be added. The `timeline_id` in this URL is that of the _child_ timeline that we\nwish to detach from its parent.\n\nOn success, this API will leave the following state:\n\n- The detached child timeline will no longer have an ancestor, and will contain all\n  the data needed to service reads without recursing into an ancestor.\n- Any other children of the parent whose timeline points were at a lower LSN than\n  the detached child timeline will be modified to have the child timeline as their\n  new parent.\n- The parent timeline will still exist, but the child will no longer have it as an\n  ancestor. If this was the last timeline that depended on the parent, then the\n  parent will become deletable.\n\nThis API's implementation will consist of a series of retryable steps, such that\non failures/timeout it can safely be called again to reach the target state.\n\n## Example\n\n### Before\n\nThe user has \"rolled back\" their project to LSN X, resulting in a \"new main\"\ntimeline. The parent \"old main\" timeline still exists, and they would like\nto clean it up.\n\nThey have two other timelines A and B. A is from before the rollback point,\nand B is from after the rollback point.\n\n```\n----\"old main\" timeline-------X-------------------------------------------->\n                |             |                         |\n                |-> child A   |                         |\n                              |-> \"new main\" timeline   |\n                                                        -> child B\n\n```\n\n### After calling detach ancestor API\n\nThe \"new main\" timeline is no longer dependent on old main, and neither\nis child A, because it had a branch point before X.\n\nThe user may now choose to delete child B and \"old main\" to get to\na pristine state. Child B is likely to be unwanted since the user\nchose to roll back to X, and it branches from after X. However, we\ndon't assume this in the API; it is up to the user to delete it.\n\n```\n|----\"old main\" timeline---------------------------------------------------->\n                                                         |\n                                                         |\n                                                         |\n                                                         -> child B\n\n|----\"new main\" timeline--------->\n                 |\n                 |-> child A\n\n\n```\n\n### After removing timelines\n\nWe end up with a totally clean state that leaves no trace that a rollback\never happened: there is only one root timeline.\n\n```\n| ----\"new main\" timeline----------->\n                |\n                |-> child A\n\n\n```\n\n## Caveats\n\nImportant things for API users to bear in mind:\n\n- this API does not delete the parent timeline: you must still do that explicitly.\n- if there are other child timelines ahead of the branch point of the detached\n  child, the parent won't be deletable: you must either delete or detach those\n  children.\n- do _not_ simply loop over all children and detach them all: this can have an\n  extremely high storage cost. The detach ancestor API is intended for use on a single\n  timeline to make it the new \"main\".\n- The detach ancestor API should also not be\n  exposed directly to the user as button/API, because they might decide\n  to click it for all the children and thereby generate many copies of the\n  parent's data -- the detach ancestor API should be used as part\n  of a high level \"clean up after rollback\" feature.\n\n## `detach_ancestor` API implementation\n\nTerms used in the following sections:\n\n- \"the child\": the timeline whose ID is specified in the detach ancestor API URL, also\n  called \"new main\" in the example.\n- \"the parent\": the parent of \"the child\". Also called \"old main\" in the example.\n- \"the branch point\" the ancestor_lsn of \"the child\"\n\n### Phase 1: write out adopted layers to S3\n\nThe child will \"adopt\" layers from the parent, such that its end state contains\nall the parent's history as well as its own.\n\nFor all layers in the parent's layer map whose high LSN is below the branch\npoint, issue S3 CopyObject requests to duplicate them into the child timeline's\nprefix. Do not add them to the child's layer map yet.\n\nFor delta layers in the parent's layer map which straddle the branch point, read them\nand write out only content up to the branch point into new layer objects.\n\nThis is a long running operation if the parent has many layers: it should be\nimplemented in a way that resumes rather than restarting from scratch, if the API\ntimes out and is called again.\n\nAs an optimization, if there are no other timelines that will be adopted into\nthe child, _and_ the child's image layers already full cover the branch LSN,\nthen we may skip adopting layers.\n\n### Phase 2: update the child's index\n\nHaving written out all needed layers in phase 1, atomically link them all\ninto the child's IndexPart and upload to S3. This may be done while the\nchild Timeline is still running.\n\n### Phase 3: modify timelines ancestry\n\nModify the child's ancestor to None, and upload its IndexPart to persist the change.\n\nFor all timelines which have the same parent as the child, and have a branch\npoint lower than our branch point, switch their ancestor_timeline to the child,\nand upload their IndexPart to persist the change.\n\n## Alternatives considered\n\n### Generate full image layer on child, rather than adopting parent deltas\n\nThis would work for the case of a single child, but would prevent re-targeting\nother timelines that depended on the parent. If we detached many children this\nway, the storage cost would become prohibitive (consider a 1TB database with\n100 child timelines: it would cost 100TiB if they all generated their own image layers).\n\n### Don't rewrite anything: just fake it in the API\n\nWe could add a layer of indirection that let a child \"pretend\" that it had no\nancestor, when in reality it still had the parent. The pageserver API could\naccept deletion of ancestor timelines, and just update child metadata to make\nthem look like they have no ancestor.\n\nThis would not achieve the desired reduction in storage cost, and may well be more\ncomplex to maintain than simply implementing the API described in this RFC.\n\n### Avoid copying objects: enable child index to use parent layers directly\n\nWe could teach IndexPart to store a TimelineId for each layer, such that a child\ntimeline could reference a parent's layers directly, rather than copying them\ninto the child's prefix.\n\nThis would impose a cost for the normal case of indices that only target the\ntimeline's own layers, add complexity, and break the useful simplifying\ninvariant that timelines \"own\" their own path. If child timelines were\nreferencing layers from the parent, we would have to ensure that the parent\nnever runs GC/compaction again, which would make the API less flexible (the\nproposal in this RFC enables deletion of the parent but doesn't require it.)\n\n## Performance\n\n### Adopting layers\n\n- CopyObject is a relatively cheap operation, but we may need to issue tens of thousands\n  of such requests: this can take up to tens of seconds and will compete for RemoteStorage\n  semaphore units with other activity on the pageserver.\n- If we are running on storage backend that doesn't implement CopyObject, then\n  this part will be much more expensive as we would stream all layer content\n  through the pageserver. This is no different to issuing a lot\n  of reads to a timeline that does not have a warm local cache: it will move\n  a lot of gigabytes, but that shouldn't break anything.\n- Generating truncated layers for delta that straddle the branch point will\n  require streaming read/write of all the layers in question.\n\n### Updating timeline ancestry\n\nThe simplest way to update timeline ancestry will probably be to stop and start\nall the Timeline objects: this is preferable to the complexity of making their\nancestry mutable at runtime.\n\nThere will be a corresponding \"stutter\" in the availability of the timelines,\nof the order 10-100ms, which is the time taken to upload their IndexPart, and\nrestart the Timeline.\n\n# Interaction with other features\n\n## Concurrent timeline creation\n\nIf new historic timelines are created using the parent as an ancestor while the\ndetach ancestor API is running, they will not be re-parented to the child. This\ndoesn't break anything, but it leaves the parent in a state where it might not\nbe possible to delete it.\n\nSince timeline creations are an explicit user action, this is not something we need to\nworry about as the storage layer: a user who wants to delete their parent timeline will not create\nnew children, and if they do, they can choose to delete those children to\nenable deleting the parent.\n\nFor the least surprise to the user, before starting the detach ancestor branch\noperation, the control plane should wait until all branches are created and not\nallow any branches to be created before the branch point on the ancestor branch\nwhile the operation is ongoing.\n\n## WAL based disaster recovery\n\nWAL based disaster recovery currently supports only restoring of the main\nbranch. Enabling WAL based disaster recovery in the future requires that we\nkeep a record which timeline generated the WAL and at which LSN was a parent\ndetached. Keep a list of timeline ids and the LSN in which they were detached in\nthe `index_part.json`. Limit the size of the list to 100 first entries, after\nwhich the WAL disaster recovery will not be possible.\n\n## Sharded tenants\n\nFor sharded tenants, calls to the detach ancestor API will pass through the storage\ncontroller, which will handle them the same as timeline creations: invoke first\non shard zero, and then on all the other shards.\n"
  },
  {
    "path": "docs/rfcs/035-safekeeper-dynamic-membership-change.md",
    "content": "# Safekeeper dynamic membership change\n\nTo quickly recover from safekeeper node failures and do rebalancing we need to\nbe able to change set of safekeepers the timeline resides on. The procedure must\nbe safe (not lose committed log) regardless of safekeepers and compute state. It\nshould be able to progress if any majority of old safekeeper set, any majority\nof new safekeeper set and compute are up and connected. This is known as a\nconsensus membership change. It always involves two phases: 1) switch old\nmajority to old + new configuration, preventing commits without acknowledge from\nthe new set 2) bootstrap the new set by ensuring majority of the new set has all\ndata which ever could have been committed before the first phase completed;\nafter that switch is safe to finish. Without two phases switch to the new set\nwhich quorum might not intersect with quorum of the old set (and typical case of\nABC -> ABD switch is an example of that, because quorums AC and BD don't\nintersect). Furthermore, procedure is typically carried out by the consensus\nleader, and so enumeration of configurations which establishes order between\nthem is done through consensus log.\n\nIn our case consensus leader is compute (walproposer), and we don't want to wake\nup all computes for the change. Neither we want to fully reimplement the leader\nlogic second time outside compute. Because of that the proposed algorithm relies\nfor issuing configurations on the external fault tolerant (distributed) strongly\nconsistent storage with simple API: CAS (compare-and-swap) on the single key.\nProperly configured postgres suits this.\n\nIn the system consensus is implemented at the timeline level, so algorithm below\napplies to the single timeline.\n\n## Algorithm\n\n### Definitions\n\nA configuration is\n\n```\nstruct Configuration {\n    generation: SafekeeperGeneration, // a number uniquely identifying configuration\n    sk_set: Vec<NodeId>, // current safekeeper set\n    new_sk_set: Optional<Vec<NodeId>>,\n}\n```\n\nConfiguration with `new_set` present is used for the intermediate step during\nthe change and called joint configuration. Generations establish order of\ngenerations: we say `c1` is higher than `c2` if `c1.generation` >\n`c2.generation`.\n\n### Persistently stored data changes\n\nSafekeeper starts storing its current configuration in the control file. Update\nof is atomic, so in-memory value always matches the persistent one.\n\nExternal CAS providing storage (let's call it configuration storage here) also\nstores configuration for each timeline. It is initialized with generation 1 and\ninitial set of safekeepers during timeline creation. Executed CAS on it must\nnever be lost.\n\n### Compute <-> safekeeper protocol changes\n\n`ProposerGreeting` message carries walproposer's configuration if it is already\nestablished (see below), else null.  `AcceptorGreeting` message carries\nsafekeeper's current `Configuration`. All further messages (`VoteRequest`,\n`VoteResponse`, `ProposerElected`, `AppendRequest`, `AppendResponse`) carry\ngeneration number, of walproposer in case of wp->sk message or of safekeeper in\ncase of sk->wp message.\n\n### Safekeeper changes\n\nBasic rule: once safekeeper observes configuration higher than his own it\nimmediately switches to it. It must refuse all messages with lower generation\nthat his. It also refuses messages if it is not member of the current generation\n(that is, of either `sk_set` of `sk_new_set`), though it is likely not unsafe to\nprocess them (walproposer should ignore result anyway).\n\nIf there is non null configuration in `ProposerGreeting` and it is higher than\ncurrent safekeeper one, safekeeper switches to it.\n\nSafekeeper sends its current configuration in its first message to walproposer\n`AcceptorGreeting`. It refuses all other walproposer messages if the\nconfiguration generation in them is less than its current one. Namely, it\nrefuses to vote, to truncate WAL in `handle_elected` and to accept WAL. In\nresponse it sends its current configuration generation to let walproposer know.\n\nSafekeeper gets `PUT /v1/tenants/{tenant_id}/timelines/{timeline_id}/membership`\naccepting `Configuration`. Safekeeper switches to the given conf if it is higher than its\ncurrent one and ignores it otherwise. In any case it replies with\n```\nstruct TimelineMembershipSwitchResponse {\n    conf: Configuration,\n    term: Term,\n    last_log_term: Term,\n    flush_lsn: Lsn,\n}\n```\n\n### Compute (walproposer) changes\n\nBasic rule is that joint configuration requires votes from majorities in the\nboth `set` and `new_sk_set`.\n\nCompute receives list of safekeepers to connect to from the control plane as\ncurrently and tries to communicate with all of them. However, the list does not\ndefine consensus members. Instead, on start walproposer tracks highest\nconfiguration it receives from `AcceptorGreeting`s. Once it assembles greetings\nfrom majority of `sk_set` and majority of `new_sk_set` (if it is present), it\nestablishes this configuration as its own and moves to voting.\n\nIt should stop talking to safekeepers not listed in the configuration at this\npoint, though it is not unsafe to continue doing so.\n\nTo be elected it must receive votes from both majorities if `new_sk_set` is present.\nSimilarly, to commit WAL it must receive flush acknowledge from both majorities.\n\nIf walproposer hears from safekeeper configuration higher than his own (i.e.\nrefusal to accept due to configuration change) it simply restarts.\n\n### Change algorithm\n\nThe following algorithm can be executed anywhere having access to configuration\nstorage and safekeepers. It is safe to interrupt / restart it and run multiple\ninstances of it concurrently, though likely one of them won't make\nprogress then. It accepts `desired_set: Vec<NodeId>` as input.\n\nAlgorithm will refuse to make the change if it encounters previous interrupted\nchange attempt, but in this case it will try to finish it.\n\nIt will eventually converge if old majority, new majority and configuration\nstorage are reachable.\n\n1) Fetch current timeline configuration from the configuration storage.\n2) If it is already joint one and `new_set` is different from `desired_set`\n   refuse to change. However, assign join conf to (in memory) var\n   `joint_conf` and proceed to step 4 to finish the ongoing change.\n3) Else, create joint `joint_conf: Configuration`: increment current conf number\n   `n` and put `desired_set` to `new_sk_set`. Persist it in the configuration\n   storage by doing CAS on the current generation: change happens only if\n   current configuration number is still `n`. Apart from guaranteeing uniqueness\n   of configurations, CAS linearizes them, ensuring that new configuration is\n   created only following the previous one when we know that the transition is\n   safe. Failed CAS aborts the procedure.\n4) Call `PUT` `configuration` on safekeepers from the current set,\n   delivering them `joint_conf`. Collecting responses from majority is required\n   to proceed. If any response returned generation higher than\n   `joint_conf.generation`, abort (another switch raced us). Otherwise, choose\n   max `<last_log_term, flush_lsn>` among responses and establish it as\n   (in memory) `sync_position`. Also choose max `term` and establish it as (in\n   memory) `sync_term`. We can't finish the switch until majority of the new set\n   catches up to this `sync_position` because data before it could be committed\n   without ack from the new set. Similarly, we'll bump term on new majority\n   to `sync_term` so that two computes with the same term are never elected.\n4) Initialize timeline on safekeeper(s) from `new_sk_set` where it\n   doesn't exist yet by doing `pull_timeline` from the majority of the\n   current set. Doing that on majority of `new_sk_set` is enough to\n   proceed, but it is reasonable to ensure that all `new_sk_set` members\n   are initialized -- if some of them are down why are we migrating there?\n5) Call `POST` `bump_term(sync_term)` on safekeepers from the new set.\n   Success on majority is enough.\n6) Repeatedly call `PUT` `configuration` on safekeepers from the new set,\n   delivering them `joint_conf` and collecting their positions. This will\n   switch them to the `joint_conf` which generally won't be needed\n   because `pull_timeline` already includes it and plus additionally would be\n   broadcast by compute. More importantly, we may proceed to the next step\n   only when `<last_log_term, flush_lsn>` on the majority of the new set reached\n   `sync_position`. Similarly, on the happy path no waiting is needed because\n   `pull_timeline` already includes it. However, we should double\n    check to be safe. For example, timeline could have been created earlier e.g.\n    manually or after try-to-migrate, abort, try-to-migrate-again sequence.\n7) Create `new_conf: Configuration` incrementing `joint_conf` generation and having new\n   safekeeper set as `sk_set` and None `new_sk_set`. Write it to configuration\n   storage under one more CAS.\n8) Call `PUT` `configuration` on safekeepers from the new set,\n   delivering them `new_conf`. It is enough to deliver it to the majority\n   of the new set; the rest can be updated by compute.\n\nI haven't put huge effort to make the description above very precise, because it\nis natural language prone to interpretations anyway. Instead I'd like to make TLA+\nspec of it.\n\nDescription above focuses on safety. To make the flow practical and live, here a few more\nconsiderations.\n1) It makes sense to ping new set to ensure we are migrating to live node(s) before\n  step 3.\n2) If e.g. accidentally wrong new sk set has been specified, before CAS in step `6` is completed\n   it is safe to rollback to the old conf with one more CAS.\n3) On step 4 timeline might be already created on members of the new set for various reasons;\n   the simplest is the procedure restart. There are more complicated scenarios like mentioned\n   in step 5. Deleting and re-doing `pull_timeline` is generally unsafe without involving\n   generations, so seems simpler to treat existing timeline as success. However, this also\n   has a disadvantage: you might imagine an surpassingly unlikely schedule where condition in\n   the step 5 is never reached until compute is (re)awaken up to synchronize new member(s).\n   I don't think we'll observe this in practice, but can add waking up compute if needed.\n4) In the end timeline should be locally deleted on the safekeeper(s) which are\n   in the old set but not in the new one, unless they are unreachable. To be\n   safe this also should be done under generation number (deletion proceeds only if\n   current configuration is <= than one in request and safekeeper is not member of it).\n5) If current conf fetched on step 1 is already not joint and members equal to `desired_set`,\n   jump to step 7, using it as `new_conf`.\n\n## Implementation\n\nThe procedure ought to be driven from somewhere. Obvious candidates are control\nplane and storage_controller; and as each of them already has db we don't want\nyet another storage. I propose to manage safekeepers in storage_controller\nbecause 1) since it is in rust it simplifies simulation testing (more on this\nbelow) 2) it already manages pageservers.\n\nThis assumes that migration will be fully usable only after we migrate all\ntenants/timelines to storage_controller. It is discussible whether we want also\nto manage pageserver attachments for all of these, but likely we do.\n\nThis requires us to define storcon <-> cplane interface and changes.\n\n### storage_controller <-> control plane interface and changes\n\nFirst of all, control plane should\n[change](https://neondb.slack.com/archives/C03438W3FLZ/p1719226543199829)\nstoring safekeepers per timeline instead of per tenant because we can't migrate\ntenants atomically.\n\nThe important question is how updated configuration is delivered from\nstorage_controller to control plane to provide it to computes. As always, there\nare two options, pull and push. Let's do it the same push as with pageserver\n`/notify-attach` because 1) it keeps storage_controller out of critical compute\nstart path 2) uniformity. It makes storage_controller responsible for retrying\nnotifying control plane until it succeeds.\n\nIt is not needed for the control plane to fully know the `Configuration`. It is\nenough for it to only to be aware of the list of safekeepers in the latest\nconfiguration to supply it to compute, plus associated generation number to\nprotect from stale update requests and to also pass it to compute.\n\nSo, cplane `/notify-safekeepers` for the timeline can accept JSON like\n```\n{\n   tenant_id: String,\n   timeline_id: String,\n   generation: u32,\n   safekeepers: Vec<SafekeeperId>,\n}\n```\nwhere `SafekeeperId` is\n```\n{\n   node_id: u64,\n   host: String\n}\n```\nIn principle `host` is redundant, but may be useful for observability.\n\nThe request updates list of safekeepers in the db if the provided conf\ngeneration is higher (the cplane db should also store generations for this).\nSimilarly to\n[`/notify-attach`](https://www.notion.so/neondatabase/Storage-Controller-Control-Plane-interface-6de56dd310a043bfa5c2f5564fa98365),\nit should update db which makes the call successful, and then try to schedule\n`apply_config` if possible, it is ok if not. storage_controller should rate\nlimit calling the endpoint, but likely this won't be needed, as migration\nthroughput is limited by `pull_timeline`.\n\nTimeline (branch) creation in cplane should call storage_controller POST\n`tenant/:tenant_id/timeline` like it currently does for sharded tenants.\nResponse should be augmented with `safekeepers_generation` and `safekeepers`\nfields like described in `/notify-safekeepers` above. Initially (currently)\nthese fields may be absent; in this case cplane chooses safekeepers on its own\nlike it currently does. The call should be retried until it succeeds.\n\nTimeline deletion and tenant deletion in cplane should call appropriate\nstorage_controller endpoints like it currently does for sharded tenants. The\ncalls should be retried until they succeed.\n\nWhen compute receives safekeeper list from control plane it needs to know the\ngeneration to check whether it should be updated (note that compute may get\nsafekeeper list from either cplane or safekeepers). Currently `neon.safekeepers`\nGUC is just a comma separates list of `host:port`. Let's prefix it with\n`g#<generation>:` to this end, so it will look like\n```\ng#42:safekeeper-0.eu-central-1.aws.neon.tech:6401,safekeeper-2.eu-central-1.aws.neon.tech:6401,safekeeper-1.eu-central-1.aws.neon.tech:6401\n```\n\nTo summarize, list of cplane changes:\n- per tenant -> per timeline safekeepers management and addition of int `safekeeper_generation` field.\n- `/notify-safekeepers` endpoint.\n- Branch creation call may return list of safekeepers and when it is\n  present cplane should adopt it instead of choosing on its own like it does currently.\n- `neon.safekeepers` GUC should be prefixed with `g#<generation>:`.\n\n### storage_controller implementation\n\nIf desired, we may continue using current 'load everything on startup and keep\nin memory' approach: single timeline shouldn't take more than 100 bytes (it's 16\nbyte tenant_id, 16 byte timeline_id, int generation, vec of ~3 safekeeper ids\nplus some flags), so 10^6 of timelines shouldn't take more than 100MB.\n\nSimilar to pageserver attachment Intents storage_controller would have in-memory\n`MigrationRequest` (or its absense) for each timeline and pool of tasks trying\nto make these request reality; this ensures one instance of storage_controller\nwon't do several migrations on the same timeline concurrently. In the first\nversion it is simpler to have more manual control and no retries, i.e. migration\nfailure removes the request. Later we can build retries and automatic\nscheduling/migration around. `MigrationRequest` is\n```\nenum MigrationRequest {\n    To(Vec<NodeId>),\n    FinishPending,\n}\n```\n\n`FinishPending` requests to run the procedure to ensure state is clean: current\nconfiguration is not joint and the majority of safekeepers are aware of it, but do\nnot attempt to migrate anywhere. If the current configuration fetched on step 1 is\nnot joint it jumps to step 7. It should be run at startup for all timelines (but\nsimilarly, in the first version it is ok to trigger it manually).\n\n#### Schema\n\n`safekeepers` table mirroring current `nodes` should be added, except that for\n`scheduling_policy`: it is enough to have at least in the beginning only 3\nfields: 1) `active` 2) `paused` (initially means only not assign new tlis there\n3) `decommissioned` (node is removed).\n\n`timelines` table:\n```\ntable! {\n    // timeline_id is primary key\n    timelines (tenant_id, timeline_id) {\n        timeline_id -> Varchar,\n        tenant_id -> Varchar,\n        start_lsn -> pg_lsn,\n        generation -> Int4,\n        sk_set -> Array<Int8>, // list of safekeeper ids\n        new_sk_set -> Nullable<Array<Int8>>, // list of safekeeper ids, null if not joint conf\n        cplane_notified_generation -> Int4,\n        sk_set_notified_generation -> Int4, // the generation a quorum of sk_set knows about\n        deleted_at -> Nullable<Timestamptz>,\n    }\n}\n```\n\n`start_lsn` is needed to create timeline on safekeepers properly, see below. We\nmight also want to add ancestor_timeline_id to preserve the hierarchy, but for\nthis RFC it is not needed.\n\n`cplane_notified_generation` and `sk_set_notified_generation` fields are used to\ntrack the last stage of the algorithm, when we need to notify safekeeper set and cplane\nwith the final configuration after it's already committed to DB.\n\nThe timeline is up-to-date (no migration in progress) if `new_sk_set` is null and\n`*_notified_generation` fields are up to date with `generation`. \n\nIt's possible to replace `*_notified_generation` with one boolean field `migration_completed`,\nbut for better observability it's nice to have them separately.\n\n#### API\n\nNode management is similar to pageserver:\n1) POST `/control/v1/safekeeper` inserts safekeeper.\n2) GET `/control/v1/safekeeper` lists safekeepers.\n3) GET `/control/v1/safekeeper/:node_id` gets safekeeper.\n4) PUT `/control/v1/safekeper/:node_id/scheduling_policy` changes status to e.g.\n   `offline` or `decomissioned`. Initially it is simpler not to schedule any\n    migrations here.\n\nSafekeeper deploy scripts should register safekeeper at storage_contorller as\nthey currently do with cplane, under the same id.\n\nTimeline creation/deletion will work through already existing POST and DELETE\n`tenant/:tenant_id/timeline`. Cplane is expected to retry both until they\nsucceed. See next section on the implementation details.\n\nWe don't want to block timeline creation/deletion when one safekeeper is down.\nCurrently this is crutched by compute implicitly creating timeline on any\nsafekeeper it is connected to. This creates ugly timeline state on safekeeper\nwhen timeline is created, but start LSN is not defined yet. Next section\ndescribes dealing with this.\n\nTenant deletion repeats timeline deletion for all timelines.\n\nMigration API: the first version is the simplest and the most imperative:\n1) PUT `/control/v1/safekeepers/migrate` schedules `MigrationRequest`s to move\nall timelines from one safekeeper to another. It accepts json\n```\n{\n    \"src_sk\": NodeId,\n    \"dst_sk\": NodeId,\n    \"limit\": Optional<u32>,\n}\n```\n\nReturns list of scheduled requests.\n\n2) PUT `/control/v1/tenant/:tenant_id/timeline/:timeline_id/safekeeper_migrate` schedules `MigrationRequest`\n   to move single timeline to given set of safekeepers:\n```\nstruct TimelineSafekeeperMigrateRequest {\n    \"new_sk_set\": Vec<NodeId>,\n}\n```\n\nIn the first version the handler migrates the timeline to `new_sk_set` synchronously.\nShould be retried until success.\n\nIn the future we might change it to asynchronous API and return scheduled request.\n\nSimilar call should be added for the tenant.\n\nIt would be great to have some way of subscribing to the results (apart from\nlooking at logs/metrics).\n\n3) GET `/control/v1/tenant/:tenant_id/timeline/:timeline_id/` should return\n   current in memory state of the timeline and pending `MigrationRequest`,\n   if any.\n\n4) PUT `/control/v1/tenant/:tenant_id/timeline/:timeline_id/safekeeper_migrate_abort` tries to abort the\n   migration by switching configuration from the joint to the one with (previous) `sk_set` under CAS\n   (incrementing generation as always).\n\n#### API implementation and reconciliation\n\nFor timeline creation/deletion we want to preserve the basic assumption that\nunreachable minority (1 sk of 3) doesn't block their completion, but eventually\nwe want to finish creation/deletion on nodes which missed it (unless they are\nremoved). Similarly for migration; it may and should finish even though excluded\nmembers missed their exclusion. And of course e.g. such pending exclusion on\nnode C after migration ABC -> ABD must not prevent next migration ABD -> ABE. As\nanother example, if some node missed timeline creation it clearly must not block\nmigration from it. Hence it is natural to have per safekeeper background\nreconciler which retries these ops until they succeed. There are 3 possible\noperation types, and the type is defined by timeline state (membership\nconfiguration and whether it is deleted) and safekeeper id: we may need to\ncreate timeline on sk (node added), locally delete it (node excluded, somewhat\nsimilar to detach) or globally delete it (timeline is deleted).\n\nNext, on storage controller restart in principle these pending operations can be\nfigured out by comparing safekeepers state against storcon state. But it seems\nbetter to me to materialize them in the database; it is not expensive, avoids\nthese startup scans which themselves can fail etc and makes it very easy to see\noutstanding work directly at the source of truth -- the db. So we can add table\n`safekeeper_timeline_pending_ops`\n```\ntable! {\n    // timeline_id, sk_id is primary key\n    safekeeper_timeline_pending_ops (sk_id, tenant_id, timeline_id) {\n        sk_id -> int8,\n        tenant_id -> Varchar,\n        timeline_id -> Varchar,\n        generation -> Int4,\n        op_type -> Varchar,\n    }\n}\n```\n\nWe load all pending ops from the table on startup into the memory.\nThe table is needed only to preserve the state between restarts.\n\n`op_type` can be `include` (seed from peers and ensure generation is up to\ndate), `exclude` (remove locally) and `delete`. Field is actually not strictly\nneeded as it can be computed from current configuration, but gives more explicit\nobservability.\n\n`generation` is necessary there because after op is done reconciler must remove\nit and not remove another row with higher gen which in theory might appear.\n\nAny insert of row should overwrite (remove) all rows with the same sk and\ntimeline id but lower `generation` as next op makes previous obsolete. Insertion\nof `op_type` `delete` overwrites all rows.\n\nAbout `exclude`: rather than adding explicit safekeeper http endpoint, it is\nreasonable to reuse membership switch endpoint: if safekeeper is not member\nof the configuration it locally removes the timeline on the switch. In this case\n404 should also be considered an 'ok' answer by the caller.\n\nSo, main loop of per sk reconcile reads `safekeeper_timeline_pending_ops`\njoined with timeline configuration to get current conf (with generation `n`)\nfor the safekeeper and does the jobs, infinitely retrying failures:\n1) If node is member (`include`):\n  - Check if timeline exists on it, if not, call pull_timeline on it from \n     other members\n  - Call switch configuration to the current\n2) If node is not member (`exclude`):\n  - Call switch configuration to the current, 404 is ok.\n3) If timeline is deleted (`delete`), call delete.\n\nIn cases 1 and 2 remove `safekeeper_timeline_pending_ops` for the sk and \ntimeline with generation <= `n` if `op_type` is not `delete`.\nIn case 3 also remove `safekeeper_timeline_pending_ops` \nentry + remove `timelines` entry if there is nothing left  in `safekeeper_timeline_pending_ops` for the timeline.\n\nLet's consider in details how APIs can be implemented from this angle.\n\nTimeline creation. It is assumed that cplane retries it until success, so all\nactions must be idempotent. Now, a tricky point here is timeline start LSN. For\nthe initial (tenant creation) call cplane doesn't know it. However, setting\nstart_lsn on safekeepers during creation is a good thing -- it provides a\nguarantee that walproposer can always find a common point in WAL histories of\nsafekeeper and its own, and so absence of it would be a clear sign of\ncorruption. The following sequence works:\n1) Create timeline (or observe that it exists) on pageserver,\n   figuring out last_record_lsn in response.\n2) Choose safekeepers and insert (ON CONFLICT DO NOTHING) timeline row into the\n   db. Note that last_record_lsn returned on the previous step is movable as it\n   changes once ingestion starts, insert must not overwrite it (as well as other\n   fields like membership conf). On the contrary, start_lsn used in the next\n   step must be set to the value in the db. cplane_notified_generation can be set\n   to 1 (initial generation) in insert to avoid notifying cplane about initial \n   conf as cplane will receive it in timeline creation request anyway.\n3) Issue timeline creation calls to at least majority of safekeepers. Using\n   majority here is not necessary but handy because it guarantees that any live\n   majority will have at least one sk with created timeline and so\n   reconciliation task can use pull_timeline shared with migration instead of\n   create timeline special init case. OFC if timeline is already exists call is\n   ignored.\n4) For minority of safekeepers which could have missed creation insert\n   entries to `safekeeper_timeline_pending_ops`. We won't miss this insertion \n   because response to cplane is sent only after it has happened, and cplane \n   retries the call until 200 response.\n\n   There is a small question how request handler (timeline creation in this\n   case) would interact with per sk reconciler. In the current implementation\n   we first persist the request in the DB, and then send an in-memory request\n   to each safekeeper reconciler to process it.\n\nFor pg version / wal segment size: while we may persist them in `timelines`\ntable, it is not necessary as initial creation at step 3 can take them from\npageserver or cplane creation call and later pull_timeline will carry them\naround.\n\nTimeline migration.\n1) CAS to the db to create joint conf. Since this moment the migration is considered to be \n   \"in progress\". We can detect all \"in-progress\" migrations looking into the database.\n2) Do steps 4-6 from the algorithm, including `pull_timeline` onto `new_sk_set`, update membership\n   configuration on all safekeepers, notify cplane, etc. All operations are idempotent,\n   so we don't need to persist anything in the database at this stage. If any errors occur,\n   it's safe to retry or abort the migration.\n3) Once it becomes possible per alg description above, get out of joint conf\n   with another CAS. Also should insert `exclude` entries into `safekeeper_timeline_pending_ops`\n   in the same DB transaction. Adding `exclude` entries atomically is nesessary because after\n   CAS we don't have the list of excluded safekeepers in the `timelines` table anymore, but we\n   need to have them persisted somewhere in case the migration is interrupted right after the CAS.\n4) Finish the migration. The final membership configuration is committed to the DB at this stage.\n   So, the migration can not be aborted anymore. But it can still be retried if the migration fails\n   past stage 3. To finish the migration we need to send the new membership configuration to\n   a new quorum of safekeepers, notify cplane with the new safekeeper list and schedule the `exclude`\n   requests to in-memory queue for safekeeper reconciler. If the algrorithm is retried, it's\n   possible that we have already committed `exclude` requests to DB, but didn't send them to\n   the in-memory queue. In this case we need to read them from `safekeeper_timeline_pending_ops`\n   because it's the only place where they are persistent. The fields `sk_set_notified_generation`\n   and `cplane_notified_generation` are updated after each step. The migration is considered\n   fully completed when they match the `generation` field.\n\nIn practice, we can report \"success\" after stage 3 and do the \"finish\" step in per-timeline\nreconciler (if we implement it). But it's wise to at least try to finish them synchronously,\nso the timeline is always in a \"good state\" and doesn't require an old quorum to commit\nWAL after the migration reported \"success\".\n\nTimeline deletion: just set `deleted_at` on the timeline row and insert\n`safekeeper_timeline_pending_ops` entries in the same xact, the rest is done by\nper sk reconcilers.\n\nWhen node is removed (set to `decommissioned`), `safekeeper_timeline_pending_ops`\nfor it must be cleared in the same transaction.\n\n#### Dealing with multiple instances of storage_controller\n\nOperations described above executed concurrently might create some errors but do\nnot prevent progress, so while we normally don't want to run multiple instances\nof storage_controller it is fine to have it temporarily, e.g. during redeploy.\n\nTo harden against some controller instance creating some work in\n`safekeeper_timeline_pending_ops` and then disappearing without anyone pickup up\nthe job per sk reconcilers apart from explicit wakeups should scan for work\nperiodically. It is possible to remove that though if all db updates are\nprotected with leadership token/term -- then such scans are needed only after\nleadership is acquired.\n\nAny interactions with db update in-memory controller state, e.g. if migration\nrequest failed because different one is in progress, controller remembers that\nand tries to finish it.\n\n## Testing\n\n`neon_local` should be switched to use storage_controller, playing role of\ncontrol plane.\n\nThere should be following layers of tests:\n1) Model checked TLA+ spec specifies the algorithm and verifies its basic safety.\n\n2) To cover real code and at the same time test many schedules we should have\n   simulation tests. For that, configuration storage, storage_controller <->\n   safekeeper communication and pull_timeline need to be mocked and main switch\n   procedure wrapped to as a node (thread) in simulation tests, using these\n   mocks. Test would inject migrations like it currently injects\n   safekeeper/walproposer restarts. Main assert is the same -- committed WAL must\n   not be lost.\n\n3) Since simulation testing injects at relatively high level points (not\n   syscalls), it omits some code, in particular `pull_timeline`. Thus it is\n   better to have basic tests covering whole system as well. Extended version of\n   `test_restarts_under_load` would do: start background load and do migration\n   under it, then restart endpoint and check that no reported commits\n   had been lost. I'd also add one more creating classic network split scenario, with\n   one compute talking to AC and another to BD while migration from nodes ABC to ABD\n   happens.\n\n4) Simple e2e test should ensure that full flow including cplane notification works.\n\n## Order of implementation and rollout\n\nNote that\n- Control plane parts and integration with it is fully independent from everything else\n  (tests would use simulation and neon_local).\n- It is reasonable to make compute <-> safekeepers protocol change\n  independent of enabling generations.\n- There is a lot of infra work making storage_controller aware of timelines and safekeepers\n  and its impl/rollout should be separate from migration itself.\n- Initially walproposer can just stop working while it observes joint configuration.\n  Such window would be typically very short anyway.\n- Obviously we want to test the whole thing thoroughly on staging and only then\n  gradually enable in prod.\n\nLet's have the following implementation bits for gradual rollout:\n- compute gets `neon.safekeepers_proto_version` flag.\n  Initially both compute and safekeepers will be able to talk both\n  versions so that we can delay force restart of them and for\n  simplicity of rollback in case it is needed.\n- storcon gets `-set-safekeepers` config option disabled by\n  default. Timeline creation request chooses safekeepers\n  (and returns them in response to cplane) only when it is set to\n  true.\n- control_plane [see above](storage_controller-<->-control-plane interface-and-changes)\n  prefixes `neon.safekeepers` GUC with generation number. When it is 0\n  (or prefix not present at all), walproposer behaves as currently, committing on\n  the provided safekeeper list -- generations are disabled.\n  If it is non 0 it follows this RFC rules.\n- We provide a script for manual migration to storage controller.\n  It selects timeline(s) from control plane (specified or all of them) db\n  and calls special import endpoint on storage controller which is very\n  similar to timeline creation: it inserts into the db, sets\n  configuration to initial on the safekeepers, calls cplane\n  `notify-safekeepers`.\n\nThen the rollout for a region would be:\n- Current situation: safekeepers are chosen by control_plane.\n- We manually migrate some timelines, test moving them around.\n- Then we enable `--set-safekeepers` so that all new timelines\n  are on storage controller.\n- Finally migrate all existing timelines using the script (no\n  compute should be speaking old proto version at this point).\n\nUntil all timelines are managed by storcon we'd need to use current ad hoc\nscript to migrate if needed. To keep state clean, all storage controller managed\ntimelines must be migrated before that, or controller db and configurations\nstate of safekeepers dropped manually.\n\nVery rough implementation order:\n- Add concept of configurations to safekeepers (including control file),\n  implement v3 protocol.\n- Implement walproposer changes, including protocol.\n- Implement storconn part. Use it in neon_local (and pytest).\n- Make cplane store safekeepers per timeline instead of per tenant.\n- Implement cplane/storcon integration. Route branch creation/deletion\n  through storcon. Then we can test migration of new branches.\n- Finally import existing branches. Then we can drop cplane\n  safekeeper selection code. Gradually enable configurations at\n  computes and safekeepers. Before that, all computes must talk only\n  v3 protocol version.\n\n## Integration with evicted timelines\n\nCurrently, `pull_timeline` doesn't work correctly with evicted timelines because\ncopy would point to original partial file. To fix let's just do s3 copy of the\nfile. It is a bit stupid as generally unnecessary work, but it makes sense to\nimplement proper migration before doing smarter timeline archival. [Issue](https://github.com/neondatabase/neon/issues/8542)\n\n## Possible optimizations\n\nSteps above suggest walproposer restart (with re-election) and thus reconnection\nto safekeepers. Since by bumping term on new majority we ensure that leader\nterms are unique even across generation switches it is possible to preserve\nconnections. However, it is more complicated, reconnection is very fast and it\nis much more important to avoid compute restart than millisecond order of write\nstall.\n\nMultiple joint consensus: algorithm above rejects attempt to change membership\nwhile another attempt is in progress. It is possible to overlay them and AFAIK\nAurora does this but similarly I don't think this is needed.\n\n## Misc\n\nWe should use Compute <-> safekeeper protocol change to include other (long\nyearned) modifications:\n- send data in network order without putting whole structs to be arch independent\n- remove term_start_lsn from AppendRequest\n- add horizon to TermHistory\n- add to ProposerGreeting number of connection from this wp to sk\n"
  },
  {
    "path": "docs/rfcs/035-timeline-archive.md",
    "content": "# Timeline Archival\n\n## Summary\n\nThis RFC describes a mechanism for pageservers to eliminate local storage + compute work\nfor timelines which are not in use, in response to external API calls to \"archive\" a timeline.\n\nThe archived state roughly corresponds to fully offloading a timeline to object storage, such\nthat its cost is purely the cost of that object storage.\n\n## Motivation\n\nArchived timelines serve multiple purposes:\n- Act as a 'snapshot' for workloads that would like to retain restorable copies of their\n  database from longer ago than their PITR window.\n- Enable users to create huge numbers of branches (e.g. one per github PR) without having\n  to diligently clean them up later to avoid overloading the pageserver (currently we support\n  up to ~500 branches per tenant).\n\n### Prior art\n\nMost storage and database systems have some form of snapshot, which can be implemented several ways:\n1. full copies of data (e.g. an EBS snapshot to S3)\n2. shallow snapshots which are CoW relative to the original version of the data, e.g. on a typical NFS appliance, or a filesystem like CephFS.\n3. a series of snapshots which are CoW or de-duplicated relative to one another.\n\nToday's Neon branches are approximately like `2.`, although due to implementation details branches\noften end up storing much more data than they really need, as parent branches assume that all data\nat the branch point is needed.  The layers pinned in the parent branch may have a much larger size\nthan the physical size of a compressed image layer representing the data at the branch point.\n\n## Requirements\n\n- Enter & exit the archived state in response to external admin API calls\n- API calls to modify the archived state are atomic and durable\n- An archived timeline should eventually (once out of PITR window) use an efficient compressed\n  representation, and avoid retaining arbitrarily large data in its parent branch.\n- Remote object GETs during tenant start may be O(N) with the number of _active_ branches,\n  but must not scale with the number of _archived_ branches.\n- Background I/O for archived branches should only be done a limited number of times to evolve them\n  to a long-term-efficient state (e.g. rewriting to image layers).  There should be no ongoing \"housekeeping\"\n  overhead for archived branches, including operations related to calculating sizes for billing.\n- The pageserver should put no load on the safekeeper for archived branches.\n- Performance of un-archiving a branch must make good use of S3/disk bandwidth to restore the branch\n  to a performant state in a short time (linear with the branch's logical size)\n\n## Non Goals\n\n- Archived branches are not a literal `fullbackup` postgres snapshot: they are still stored\n  in Neon's internal format.\n- Compute cold starts after activating an archived branch will not have comparable performance to\n  cold starts on an active branch.\n- Archived branches will not use any new/additional compression or de-duplication beyond what\n  is already implemented for image layers (zstd per page).\n- The pageserver will not \"auto start\" archived branches in response to page_service API requests: they\n  are only activated explicitly via the HTTP API.\n- We will not implement a total offload of archived timelines from safekeepers: their control file (small) will\n  remain on local disk, although existing eviction mechanisms will remove any segments from local disk.\n- We will not expose any prometheus metrics for archived timelines, or make them visible in any\n  detailed HTTP APIs other than the specific API for listing archived timelines.\n- A parent branch may not be archived unless all its children are.\n\n## Impacted Components\n\npageserver, storage controller\n\n## Terminology\n\n**Archived**: a branch is _archived_ when an HTTP API request to archive it has succeeded: the caller\nmay assume that this branch is now very cheap to store, although this may not be physically so until the\nbranch proceeds to the offloaded state.\n\n**Active** branches are branches which are available for use by page_service clients, and have a relatively\nhigh cost due to consuming local storage.\n\n**Offloaded** branches are a subset of _archived_ branches, which have had their local state removed such\nthat they now consume minimal runtime resources and have a cost similar to the cost of object storage.\n\n**Activate** (verb): transition from Archived to Active\n\n**Archive** (verb): transition from Active to Archived\n\n**Offload** (verb): transition from Archived to Offloaded\n\n**Offload manifest**: an object stored in S3 that describes timelines which pageservers do not load.\n\n**Warm up** (verb): operation done on an active branch, by downloading its active layers.  Once a branch is\nwarmed up, good performance will be available to page_service clients.\n\n## Implementation\n\n### High level flow\n\nWe may think of a timeline which is archived and then activated as proceeding through a series of states:\n\n```mermaid\nstateDiagram\n  [*] --> Active(warm)\n  Active(warm) --> Archived\n  Archived --> Offloaded\n  Archived --> Active(warm)\n  Offloaded --> Active(cold)\n  Active(cold) --> Active(warm)\n```\n\nNote that the transition from Archived to Active(warm) is expected to be fairly rare: the most common lifecycles\nof branches will be:\n- Very frequent: Short lived branches: Active -> Deleted\n- Frequent: Long-lived branches: Active -> Archived -> Offloaded -> Deleted\n- Rare: Branches used to restore old state: Active ->Archived -> Offloaded -> Active\n\nThese states are _not_ all stored as a single physical state on the timeline, but rather represent the combination\nof:\n- the timeline's lifecycle state: active or archived, stored in the timeline's index\n- its offload state: whether pageserver has chosen to drop local storage of the timeline and write it into the\n  manifest of offloaded timelines.\n- cache state (whether it's warm or cold).\n\n### Storage format changes\n\nThere are two storage format changes:\n1. `index_part.json` gets a new attribute `state` that describes whether the timeline is to\n   be considered active or archived.\n2. A new tenant-level _manifest_ object `tenant_manifest-v1.json` describes which timelines a tenant does not need to load\n   at startup (and is available for storing other small, rarely changing tenant-wide attributes in future)\n\nThe manifest object will have a format like this:\n```\n{\n  \"offload_timelines\": [\n    {\n      \"timeline_id\": ...\n      \"last_record_lsn\": ...\n      \"last_record_lsn_time\": ...\n      \"pitr_interval\": ...\n      \"last_gc_lsn\": ...  # equal to last_record_lsn if this branch has no history (i.e. a snapshot)\n      \"logical_size\": ...  # The size at last_record_lsn\n      \"physical_size\" ...\n      \"parent\": Option<{\n        \"timeline_id\"...\n        \"lsn\"... # Branch point LSN on the parent\n        \"requires_data\": bool # True if this branch depends on layers in its parent, identify it here\n\n      }>\n    }\n  ]\n}\n```\n\nThe information about a timeline in its offload state is intentionally minimal: just enough to decide:\n- Whether it requires [archive optimization](#archive-branch-optimization) by rewriting as a set of image layers: we may infer this\n  by checking if now > last_record_lsn_time - pitr_interval, and pitr_lsn < last_record_lsn.\n- Whether a parent branch should include this offloaded branch in its GC inputs to avoid removing\n  layers that the archived branch depends on\n- Whether requests to delete this `timeline_id` should be executed (i.e. if a deletion request\n  is received for a timeline_id that isn't in the site of live `Timelines` or in the manifest, then\n  we don't need to go to S3 for the deletion.\n- How much archived space to report in consumption metrics\n\nThe contents of the manifest's offload list will also be stored as an attribute of `Tenant`, such that the total\nset of timelines may be found by the union of `Tenant::timelines` (non-offloaded timelines) and `Tenant::offloaded`\n(offloaded timelines).\n\nFor split-brain protection, the manifest object will be written with a generation suffix, in the same way as\nindex_part objects are (see [generation numbers RFC](025-generation-numbers.md)).  This will add some complexity, but\ngive us total safety against two pageservers with the same tenant attached fighting over the object.  Existing code\nfor finding the latest generation and for cleaning up old generations (in the scrubber) will be generalized to cover\nthe manifest file.\n\n### API & Timeline state\n\nTimelines will store a lifecycle state (enum of Active or Archived) in their IndexPart.  This will\nbe controlled by a new per-timeline `configure` endpoint.  This is intentionally generic naming, which\nmay be used in future to control other per-timeline attributes (e.g. in future we may make PITR interval\na per-timeline configuration).\n\n`PUT /v1/tenants/{tenant_id}/timelines/{timeline_id}/configure`\n```\n{\n  'state': 'active|archive'\n}\n```\n\nWhen archiving a timeline, this API will complete as soon as the timeline's state has been set in index_part, and that index has been uploaded.\n\nWhen activating a timeline, this API will complete as soon as the timeline's state has been set in index_part,\n**and** the `Timeline` object has been instantiated and activated.  This will require reading the timeline's\nindex, but not any data: it should be about as fast as a couple of small S3 requests.\n\nThe API will be available with identical path via the storage controller: calling this on a sharded tenant\nwill simply map the API call to all the shards.\n\nArchived timelines may never have descendent timelines which are active.  This will be enforced at the API level,\nsuch that activating a timeline requires that all its ancestors are active, and archiving a timeline requires\nthat all its descendents are archived.  It is the callers responsibility to walk the hierarchy of timelines\nin the proper order if they would like to archive whole trees of branches.\n\nBecause archive timelines will be excluded from the usual timeline listing APIs, a new API specifically\nfor archived timelines will be added: this is for use in support/debug:\n\n```\nGET /v1/tenants/{tenant_id}/archived_timelines\n\n{\n  ...same per-timeline content as the tenant manifest...\n}\n\n```\n\n### Tenant attach changes\n\nCurrently, during Tenant::spawn we list all the timelines in the S3 bucket, and then for each timeline\nwe load their index_part.json.  To avoid the number of GETs scaling linearly with the number of archived\ntimelines, we must have a single object that tells us which timelines do not need to be loaded.  The\nnumber of ListObjects requests while listing timelines will still scale O(N), but this is less problematic\nbecause each request covers 1000 timelines.\n\nThis is **not** literally the same as the set of timelines who have state=archived.  Rather, it is\nthe set of timelines which have been offloaded in the background after their state was set to archived.\n\nWe may simply skip loading these timelines: there will be no special state of `Timeline`, they just won't\nexist from the perspective of an active `Tenant` apart from in deletion: timeline deletion will need\nto check for offloaded timelines as well as active timelines, to avoid wrongly returning 404 on trying\nto delete an offloaded timeline.\n\n### Warm-up API\n\n`PUT /v1/tenants/{tenant_id}/timelines/{timeline_id}/download?wait_ms=1234`\n\nThis API will be similar to the existing `download_remote_layers` API, but smarter:\n- It will not download _all_ remote layers, just the visible set (i.e. layers needed for a read)\n- It will download layers in the visible set until reaching `wait_ms`, then return a struct describing progress\n  of downloads, so that the caller can poll.\n\nThe _visible set_ mentioned above will be calculated by the pageserver in the background, by taking the set\nof readable LSNs (i.e. branch points and heads of branches), and walking the layer map to work out which layers\ncan possibly be read from these LSNs.  This concept of layer visibility is more generally useful for cache\neviction and heatmaps, as well as in this specific case of warming up a timeline.\n\nThe caller does not have to wait for the warm up API, or call it at all.  But it is strongly advised\nto call it, because otherwise populating local contents for a timeline can take a long time when waiting\nfor SQL queries to coincidentally hit all the layers, and during that time query latency remains quite\nvolatile.\n\n### Background work\n\nArchived branches are not subject to normal compaction.  Instead, when the compaction loop encounters\nan archived branch, it will consider rewriting the branch to just image layers if the branch has no history\n([archive branch optimization](#archive-branch-optimization)), or offloading the timeline from local disk\nif its state permits that.\n\nAdditionally, the tenant compaction task will walk the state of already offloaded timelines to consider\noptimizing their storage, e.g. if a timeline had some history when offloaded, but since then its PITR\nhas elapsed and it can now be rewritten to image layers.\n\n#### Archive branch offload\n\nRecall that when we archive a timeline via the HTTP API, this only sets a state: it doesn't do\nany actual work.\n\nThis work is done in the background compaction loop.  It makes sense to tag this work on to the compaction\nloop, because it is spiritually aligned: offloading data for archived branches improves storage efficiency.\n\nThe condition for offload is simple:\n - a `Timeline` object exists with state `Archived`\n - the timeline does not have any non-offloaded children.\n \n Regarding the condition that children must be offloaded, this will always be eventually true, because\n we enforce at the API level that children of archived timelines must themselves be archived, and all\n archived timelines will eventually be offloaded.\n\nOffloading a timeline is simple:\n- Read the timeline's attributes that we will store in its offloaded state (especially its logical size)\n- Call `shutdown()` on the timeline and remove it from the `Tenant` (as if we were about to delete it)\n- Erase all the timeline's content from local storage (`remove_dir_all` on its path)\n- Write the tenant manifest to S3 to prevent this timeline being loaded on next start.\n\n#### Archive branch optimization (flattening)\n\nWhen we offloaded a branch, it might have had some history that prevented rewriting it to a single\npoint in time set of image layers.  For example, a branch might have several days of writes and a 7\nday PITR: when we archive it, it still has those days of history.\n\nOnce the PITR has expired, we have an opportunity to reduce the physical footprint of the branch by:\n- Writing compressed image layers within the archived branch, as these are more efficient as a way of storing\n  a point in time compared with delta layers\n- Updating the branch's offload metadata to indicate that this branch no longer depends on its ancestor\n  for data, i.e. the ancestor is free to GC layers files at+below the branch point\n\nFully compacting an archived branch into image layers at a single LSN may be thought of as *flattening* the\nbranch, such that it is now a one-dimensional keyspace rather than a two-dimensional key/lsn space. It becomes\na true snapshot at that LSN.\n\nIt is not always more efficient to flatten a branch than to keep some extra history on the parent: this\nis described in more detail in [optimizations](#delaying-storage-optimization-if-retaining-parent-layers-is-cheaper)\n\nArchive branch optimization should be done _before_ background offloads during compaction, because there may\nbe timelines which are ready to be offloaded but also would benefit from the optimization step before\nbeing offloaded.  For example, a branch which has already fallen out of PITR window and has no history\nof its own may be immediately re-written as a series of image layers before being offloaded.\n\n### Consumption metrics\n\nArchived timelines and offloaded timelines will be excluded from the synthetic size calculation, in anticipating\nthat billing structures based on consumption metrics are highly likely to apply different $/GB rates to archived\nvs. ordinary content.\n\nArchived and offloaded timelines' logical size will be reported under the existing `timeline_logical_size`\nvariant of `MetricsKey`: receivers are then free to bill on this metric as they please.\n\n### Secondary locations\n\nArchived timelines (including offloaded timelines) will be excluded from heatmaps, and thereby\nwhen a timeline is archived, after the next cycle of heatmap upload & secondary download, its contents\nwill be dropped from secondary locations.\n\n### Sharding\n\nArchiving or activating a timeline will be done symmetrically across all shards in a tenant, in\nthe same way that timeline creation and deletion is done.  There are no special rules about ordering:\nthe storage controller may dispatch concurrent calls to all shards when archiving or activating a timeline.\n\nSince consumption metrics are only transmitted from shard zero, the state of archival on this shard\nwill be authoritative for consumption metrics.\n\n## Error cases\n\n### Errors in sharded tenants\n\nIf one shard in a tenant fails an operation but others succeed, the tenant may end up in a mixed\nstate, where a timeline is archived on some shards but not on others.  \n\nWe will not bother implementing a rollback mechanism for this: errors in archiving/activating a timeline\nare either transient (e.g. S3 unavailable, shutting down), or the fault of the caller (NotFound, BadRequest).\nIn the transient case callers are expected to retry until success, or to make appropriate API calls to clear\nup their mistake.  We rely on this good behavior of callers to eventually get timelines into a consistent\nstate across all shards.  If callers do leave a timeline in an inconsistent state across shards, this doesn't\nbreak anything, it's just \"weird\".\n\nThis is similar to the status quo for timeline creation and deletion: callers are expected to retry\nthese operations until they succeed.\n\n### Archiving/activating\n\nArchiving/activating a timeline can fail in a limited number of ways:\n1. I/O error storing/reading the timeline's updated index\n    - These errors are always retryable: a fundamental design assumption of the pageserver is that remote\n      storage errors are always transient. \n2. NotFound if the timeline doesn't exist\n    - Callers of the API are expected to avoid calling deletion and archival APIs concurrently.\n    - The storage controller has runtime locking to prevent races such as deleting a timeline while\n      archiving it.\n3. BadRequest if the rules around ancestors/descendents of archived timelines would be violated\n    - Callers are expected to do their own checks to avoid hitting this case.  If they make\n      a mistake and encounter this error, they should give up.\n\n### Offloading\n\nOffloading can only fail if remote storage is unavailable, which would prevent us from writing the\ntenant manifest.  In such error cases, we give up in the expectation that offloading will be tried \nagain at the next iteration of the compaction loop.\n\n### Archive branch optimization\n\nOptimization is a special form of compaction, so can encounter all the same errors as regular compaction\ncan: it should return Result<(), CompactionError>, and as with compaction it will be retried on\nthe next iteration of the compaction loop.\n\n## Optimizations\n\n### Delaying storage optimization if retaining parent layers is cheaper\n\nOptimizing archived branches to image layers and thereby enabling parent branch GC to progress\nis a safe default: archived branches cannot over-fill a pageserver's local disk, and once they\nare offloaded to S3 they're totally safe, inert things.\n\nHowever, in some cases it can be advantageous to retain extra history on their parent branch rather\nthan flattening the archived branch.  For example, if a 1TB parent branch is rather slow-changing (1GB\nof data per day), and archive branches are being created nightly, then writing out full 1TB image layers\nfor each nightly branch is inefficient compared with just keeping more history on the main branch.\n\nGetting this right requires consideration of:\n- Compaction: if keeping more history on the main branch is going to prompt the main branch's compaction to\n  write out extra image layers, then it might make more sense to just write out the image layers on\n  the archived branch.\n- Metadata bloat: keeping extra history on a parent branch doesn't just cost GB of storage, it makes\n  the layer map (and index_part) bigger.  There are practical limits beyond which writing an indefinitely\n  large layer map can cause problems elsewhere.\n\nThis optimization can probably be implemented quite cheaply with some basic heuristics like:\n- don't bother doing optimization on an archive branch if the LSN distance between\n  its branch point and the end of the PITR window is <5% of the logical size of the archive branch.\n- ...but, Don't keep more history on the main branch than double the PITR\n\n### Creating a timeline in archived state (a snapshot)\n\nSometimes, one might want to create a branch with no history, which will not be written to\nbefore it is archived.  This is a snapshot, although we do not require a special snapshot API,\nsince a snapshot can be represented as a timeline with no history.\n\nThis can be accomplished by simply creating a timeline and then immediately archiving it, but\nthat is somewhat wasteful: this timeline it will spin up various tasks and open a connection to the storage\nbroker to try and ingest WAL, before being shutdown in the subsequent archival call.  To explicitly\nsupport this common special case, we may add a parameter to the timeline creation API which\ncreates a timeline directly into the archived state.\n\nSuch a timeline creation will do exactly two I/Os at creation time:\n- write the index_part object to record the timeline's existence\n- when the timeline is offloaded in the next iteration of the compaction loop (~20s later),\n  write the tenant manifest.\n\nLater, when the timeline falls off the end of the PITR interval, the usual offload logic will wake\nup the 'snapshot' branch and write out image layers.\n\n## Future Work\n\n### Enabling `fullbackup` dumps from archive branches\n\nIt would be useful to be able to export an archive branch to another system, or for use in a local\npostgres database.\n\nThis could be implemented as a general capability for all branches, in which case it would \"just work\"\nfor archive branches by activating them.  However, downloading all the layers in a branch just to generate\na fullbackup is a bit inefficient: we could implement a special case for flattened archived branches\nwhich streams image layers from S3 and outputs the fullbackup stream without writing the layers out to disk.\n\nImplementing `fullbackup` is a bit more complicated than this because of sharding, but solving that problem\nis unrelated to the topic of archived branches (it probably involves having each shard write out a fullbackup \nstream to S3 in an intermediate format and, then having one node stitch them together).\n\n### Tagging layers from archived branches\n\nWhen we know a layer is an image layer written for an archived branch that has fallen off the PITR window,\nwe may add tags to the S3 objects to enable writing lifecycle policies that transition such layers to even\ncheaper storage.\n\nThis could be done for all archived layers, or it could be driven by the archival API, to give the pageserver\nexternal hints on which branches are likely to be reactivated, and which branches are good candidates for\ntagging for low performance storage.\n\nTagging+lifecycles is just one mechanism: one might also directly use S3 storage classes.  Other clouds' object\nstores have similar mechanisms.\n\n### Storing sequences of archive branches as deltas\n\nWhen archived branches are used as scheduled snapshots, we could store them even more efficiently\nby encoding them as deltas relative to each other (i.e. for nightly snapshots, when we do the\nstorage optimization for Tuesday's snapshot, we would read Monday's snapshot and store only the modified\npages). This is the kind of encoding that many backup storage systems use.\n\nThe utility of this depends a lot on the churn rate of the data, and the cost of doing the delta encoding\nvs. just writing out a simple stream of the entire database.  For smaller databases, writing out a full\ncopy is pretty trivial (e.g. writing a compressed copy of a 10GiB database to S3 can take under 10 seconds,\nso the complexity tradeoff of diff-encoding it is dubious).\n\nOne does not necessarily have to read-back the previous snapshot in order to encoded the next one: if the\npageserver knows about the schedule, it can intentionally retain extra history on the main branch so that\nwe can say: \"A branch exists from Monday night.  I have Monday night's data still active in the main branch,\nso now I can read at the Monday LSN and the Tuesday LSN, calculate the delta, and store it as Tuesday's\ndelta snapshot\".\n\nClearly this all requires careful housekeeping to retain the relationship between branches that depend on\neach other: perhaps this would be done by making the archive branches have child/parent relationships with\neach other, or perhaps we would permit them to remain children of their original parent, but additionally\nhave a relationship with the snapshot they're encoded relative to.\n\nActivating a branch that is diff-encoded may require activating several earlier branches too, so figuring\nout how frequently to write a full copy is important.  This is essentially a zoomed-out version of what\nwe do with delta layers and image layers within a timeline, except each \"layer\" is a whole timeline.\n\n\n## FAQ/Alternatives\n\n### Store all timelines in the tenant manifest\n\nRather than special-casing offloaded timelines in the offload manifest, we could store a total\nmanifest of all timelines, eliminating the need for the pageserver to list timelines in S3 on\nstartup.\n\nThat would be a more invasive change (require hooking in to timeline creation), and would\ngenerate much more I/O to this manifest for tenants that had many branches _and_ frequent\ncreate/delete cycles for short lived branches.  Restricting the manifest to offloaded timelines\nmeans that we only have to cope with the rate at which long-lived timelines are archived, rather\nthan the rate at which sort lived timelines are created & destroyed.\n\n### Automatically archiving/activating timelines without external API calls\n\nWe could implement TTL driven offload of timelines, waking them up when a page request\narrives.\n\nThis has downsides:\n- Opacity: if we do TTL-driven offload inside the pageserver, then the end user doesn't\n  know which of their branches are in this state, and might get a surprise when they try\n  to use such a branch.\n- Price fluctuation: if the archival of a branch is used in end user pricing, then users\n  prefer clarity & consistency.  Ideally a branch's storage should cost the same from the moment it\n  is created, rather than having a usage-dependency storage price.\n- Complexity: enabling the page service to call up into the Tenant to activate a timeline\n  would be awkward, compared with an external entry point.\n\n### Make offloaded a state of Timeline\n\nTo reduce the operator-facing complexity of having some timelines APIs that only return\nnon-offloaded timelines, we could build the offloaded state into the Timeline type.\n\n`timeline.rs` is already one of the most egregiously long source files in the tree, so\nthis is rejected on the basis that we need to avoid making that complexity worse."
  },
  {
    "path": "docs/rfcs/036-physical-replication.md",
    "content": "# Physical Replication\n\nThis RFC is a bit special in that we have already implemented physical\nreplication a long time ago. However, we never properly wrote down all\nthe decisions and assumptions, and in the last months when more users\nhave started to use the feature, numerous issues have surfaced.\n\nThis RFC documents the design decisions that have been made.\n\n## Summary\n\nPostgreSQL has a feature called streaming replication, where a replica\nstreams WAL from the primary and continuously applies it. It is also\nknown as \"physical replication\", to distinguish it from logical\nreplication.  In PostgreSQL, a replica is initialized by taking a\nphysical backup of the primary. In Neon, the replica is initialized\nfrom a slim \"base backup\" from the pageserver, just like a primary,\nand the primary and the replicas connect to the same pageserver,\nsharing the storage.\n\nThere are two kinds of read-only replicas in Neon:\n- replicas that follow the primary, and\n- \"static\" replicas that are pinned at a particular LSN.\n\nA static replica is useful e.g. for performing time-travel queries and\nrunning one-off slow queries without affecting the primary. A replica\nthat follows the primary can be used e.g. to scale out read-only\nworkloads.\n\n## Motivation\n\nRead-only replicas allow offloading read-only queries. It's useful for\nisolation, if you want to make sure that read-only queries don't\naffect the primary, and it's also an easy way to provide guaranteed\nread-only access to an application, without having to mess with access\ncontrols.\n\n## Non Goals (if relevant)\n\nThis RFC is all about WAL-based *physical* replication. Logical\nreplication is a different feature.\n\nNeon also has the capability to launch \"static\" read-only nodes which\ndo not follow the primary, but are pinned to a particular LSN. They\ncan be used for long-running one-off queries, or for Point-in-time\nqueries. They work similarly to read replicas that follow the primary,\nbut some things are simpler: there are no concerns about cache\ninvalidation when the data changes on the primary, or worrying about\ntransactions that are in-progress on the primary.\n\n## Impacted components (e.g. pageserver, safekeeper, console, etc)\n\n- Control plane launches the replica\n- Replica Postgres instance connects to the safekeepers, to stream the WAL\n- The primary does not know about the standby, except for the hot standby feedback\n- The primary and replicas all connect to the same pageservers\n\n\n# Context\n\nSome useful things to know about hot standby and replicas in\nPostgreSQL.\n\n## PostgreSQL startup sequence\n\n\"Running\" and \"start up\" terms are little imprecise. PostgreSQL\nreplica startup goes through several stages:\n\n1. First, the process is started up, and various initialization steps\n   are performed, like initializing shared memory. If you try to\n   connect to the server in this stage, you get an error: ERROR: the\n   database system is starting up. This stage happens very quickly, no\n\n2. Then the server reads the checpoint record from the WAL and starts\n   the WAL replay starting from the checkpoint. This works differently\n   in Neon: we start the WAL replay at the basebackup LSN, not from a\n   checkpoint! If you connect to the server in this state, you get an\n   error: ERROR: the database system is not yet accepting\n   connections. We proceed to the next stage, when the WAL replay sees\n   a running-xacts record. Or in Neon, the \"CLOG scanning\" mechanism\n   can allow us to move directly to next stage, with all the caveats\n   listed in this RFC.\n\n3. When the running-xacts information is established, the server\n   starts to accept connections normally.\n\nFrom PostgreSQL's point of view, the server is already running in\nstage 2, even though it's not accepting connections yet. Our\n`compute_ctl` does not consider it as running until stage 3. If the\ntransition from stage 2 to 3 doesn't happen fast enough, the control\nplane will mark the start operation as failed.\n\n\n## Decisions, Issues\n\n### Cache invalidation in replica\n\nWhen a read replica follows the primary in PostgreSQL, it needs to\nstream all the WAL from the primary and apply all the records, to keep\nthe local copy of the data consistent with the primary. In Neon, the\nreplica can fetch the updated page versions from the pageserver, so\nit's not necessary to apply all the WAL. However, it needs to ensure\nthat any pages that are currently in the Postgres buffer cache, or the\nLocal File Cache, are either updated, or thrown away so that the next\nread of the page will fetch the latest version.\n\nWe choose to apply the WAL records for pages that are already in the\nbuffer cache, and skip records for other pages. Somewhat arbitrarily,\nwe also apply records affecting catalog relations, fetching the old\npage version from the pageserver if necessary first. See\n`neon_redo_read_buffer_filter()` function.\n\nThe replica wouldn't necessarily need to see all the WAL records, only\nthe records that apply to cached pages. For simplicity, we do stream\nall the WAL to the replica, and the replica simply ignores WAL records\nthat require no action.\n\nLike in PostgreSQL, the read replica maintains a \"replay LSN\", which\nis the LSN up to which the replica has received and replayed the\nWAL. The replica can lag behind the primary, if it cannot quite keep\nup with the primary, or if a long-running query conflicts with changes\nthat are about to be applied, or even intentionally if the user wishes\nto see delayed data (see recovery_min_apply_delay). It's important\nthat the replica sees a consistent view of the whole cluster at the\nreplay LSN, when it's lagging behind.\n\nIn Neon, the replica connects to a safekeeper to get the WAL\nstream. That means that the safekeepers must be able to regurgitate\nthe original WAL as far back as the replay LSN of any running read\nreplica. (A static read-only node that does not follow the primary\ndoes not require a WAL stream however). The primary does not need to\nbe running, and when it is, the replicas don't incur any extra\noverhead to the primary (see hot standby feedback though).\n\n### In-progress transactions\n\nIn PostgreSQL, when a hot standby server starts up, it cannot\nimmediately open up for queries (see [PostgreSQL startup\nsequence]). It first needs to establish a complete list of in-progress\ntransactions, including subtransactions, that are running at the\nprimary, at the current replay LSN. Normally that happens quickly,\nwhen the replica sees a \"running-xacts\" WAL record, because the\nprimary writes a running-xacts WAL record at every checkpoint, and in\nPostgreSQL the replica always starts the WAL replay from a checkpoint\nREDO point. (A shutdown checkpoint WAL record also implies that all\nthe non-prepared transactions have ended.) If there are a lot of\nsubtransactions in progress, however, the standby might need to wait\nfor old transactions to complete before it can open up for queries.\n\nIn Neon that problem is worse: a replica can start at any LSN, so\nthere's no guarantee that it will see a running-xacts record any time\nsoon. In particular, if the primary is not running when the replica is\nstarted, it might never see a running-xacts record.\n\nTo make things worse, we initially missed this issue, and always\nstarted accepting queries at replica startup, even if it didn't have\nthe transaction information. That could lead to incorrect query\nresults and data corruption later. However, as we fixed that, we\nintroduced a new problem compared to what we had before: previously\nthe replica would always start up, but after fixing that bug, it might\nnot. In a superficial way, the old behavior was better (but could lead\nto serious issues later!). That made fixing that bug was very hard,\nbecause as we fixed it, we made things (superficially) worse for\nothers.\n\nSee https://github.com/neondatabase/neon/pull/7288 which fixed the\nbug, and follow-up PRs https://github.com/neondatabase/neon/pull/8323\nand https://github.com/neondatabase/neon/pull/8484 to try to claw back\nthe cases that started to cause trouble as fixing it. As of this\nwriting, there are still cases where a replica might not immediately\nstart up, causing the control plane operation to fail, the remaining\nissues are tracked in https://github.com/neondatabase/neon/issues/6211.\n\nOne long-term fix for this is to switch to using so-called CSN\nsnapshots in read replica. That would make it unnecessary to have the\nfull in-progress transaction list in the replica at startup time. See\nhttps://commitfest.postgresql.org/48/4912/ for a work-in-progress\npatch to upstream to implement that.\n\nAnother thing we could do is to teach the control plane about that\ndistinction between \"starting up\" and \"running but haven't received\nrunning-xacts information yet\", so that we could keep the replica\nwaiting longer in that stage, and also give any client connections the\nsame `ERROR: the database system is not yet accepting connections`\nerror that you get in standalone PostgreSQL in that state.\n\n\n### Recovery conflicts and Hot standby feedback\n\nIt's possible that a tuple version is vacuumed away in the primary,\neven though it is still needed by a running transactions in the\nreplica. This is called a \"recovery conflict\", and PostgreSQL provides\nvarious options for dealing with it. By default, the WAL replay will\nwait up to 30 s for the conflicting query to finish. After that, it\nwill kill the running query, so that the WAL replay can proceed.\n\nAnother way to avoid the situation is to enable the\n[`hot_standby_feedback`](https://www.postgresql.org/docs/current/runtime-config-replication.html#GUC-HOT-STANDBY-FEEDBACK)\noption. When it is enabled, the primary will refrain from vacuuming\ntuples that are still needed in the primary. That means potentially\nbloating the primary, which violates the usual rule that read replicas\ndon't affect the operations on the primary, which is why it's off by\ndefault. We leave it to users to decide if they want to turn it on,\nsame as PostgreSQL.\n\nNeon supports `hot_standby_feedback` by passing the feedback messages\nfrom the replica to the safekeepers, and from safekeepers to the\nprimary.\n\n### Relationship of settings between primary and replica\n\nIn order to enter hot standby mode, some configuration options need to\nbe set to the same or larger values in the standby, compared to the\nprimary.  See [explanation in the PostgreSQL\ndocs](https://www.postgresql.org/docs/current/hot-standby.html#HOT-STANDBY-ADMIN)\n\nIn Neon, we have this problem too. To prevent customers from hitting\nit, the control plane automatically adjusts the settings of a replica,\nso that they match or exceed the primary's settings (see\nhttps://github.com/neondatabase/cloud/issues/14903). However, you\ncan still hit the issue if the primary is restarted with larger\nsettings, while the replica is running.\n\n\n### Interaction with Pageserver GC\n\nThe read replica can lag behind the primary. If there are recovery\nconflicts or the replica cannot keep up for some reason, the lag can\nin principle grow indefinitely. The replica will issue all GetPage\nrequests to the pageservers at the current replay LSN, and needs to\nsee the old page versions.\n\nIf the retention period in the pageserver is set to be small, it may\nhave already garbage collected away the old page versions. That will\ncause read errors in the compute, and can mean that the replica cannot\nmake progress with the replication anymore.\n\nThere is a mechanism for replica to pass information about its replay\nLSN to the pageserver, so that the pageserver refrains from GC'ing\ndata that is still needed by the standby. It's called\n'standby_horizon' in the pageserver code, see\nhttps://github.com/neondatabase/neon/pull/7368. A separate \"lease\"\nmechanism also is in the works, where the replica could hold a lease\non the old LSN, preventing the pageserver from advancing the GC\nhorizon past that point. The difference is that the standby_horizon\nmechanism relies on a feedback message from replica to safekeeper,\nwhile the least API is exposed directly from the pageserver. A static\nread-only node is not connected to safekeepers, so it cannot use the\nstandby_horizon mechanism.\n\n\n### Synchronous replication\n\nWe haven't put any effort into synchronous replication yet.\n\nPostgreSQL provides multiple levels of synchronicity. In the weaker\nlevels, a transaction is not acknowledged as committed to the client\nin the primary until the WAL has been streamed to a replica or flushed\nto disk there. Those modes don't make senses in Neon, because the\nsafekeepers handle durability.\n\n`synchronous_commit=remote_apply` mode would make sense. In that mode,\nthe commit is not acknowledged to the client until it has been\nreplayed in the replica. That ensures that after commit, you can see\nthe commit in the replica too (aka. read-your-write consistency).\n"
  },
  {
    "path": "docs/rfcs/037-storage-controller-restarts.md",
    "content": "# Rolling Storage Controller Restarts\n\n## Summary\n\nThis RFC describes the issues around the current storage controller restart procedure\nand describes an implementation which reduces downtime to a few milliseconds on the happy path.\n\n## Motivation\n\nStorage controller upgrades (restarts, more generally) can cause multi-second availability gaps.\nWhile the storage controller does not sit on the main data path, it's generally not acceptable\nto block management requests for extended periods of time (e.g. https://github.com/neondatabase/neon/issues/8034).\n\n### Current Implementation\n\nThe storage controller runs in a Kubernetes Deployment configured for one replica and strategy set to [Recreate](https://kubernetes.io/docs/concepts/workloads/controllers/deployment/#recreate-deployment).\nIn non Kubernetes terms, during an upgrade, the currently running storage controller is stopped and, only after,\na new instance is created.\n\nAt start-up, the storage controller calls into all the pageservers it manages (retrieved from DB) to learn the\nlatest locations of all tenant shards present on them. This is usually fast, but can push into tens of seconds\nunder unfavourable circumstances: pageservers are heavily loaded or unavailable.\n\n## Prior Art\n\nThere's probably as many ways of handling restarts gracefully as there are distributed systems. Some examples include:\n* Active/Standby architectures: Two or more instance of the same service run, but traffic is only routed to one of them.\nFor fail-over, traffic is routed to one of the standbys (which becomes active).\n* Consensus Algorithms (Raft, Paxos and friends): The part of consensus we care about here is leader election: peers communicate to each other\nand use a voting scheme that ensures the existence of a single leader (e.g. Raft epochs).\n\n## Requirements\n\n* Reduce storage controller unavailability during upgrades to milliseconds\n* Minimize the interval in which it's possible for more than one storage controller\nto issue reconciles.\n* Have one uniform implementation for restarts and upgrades\n* Fit in with the current Kubernetes deployment scheme\n\n## Non Goals\n\n* Implement our own consensus algorithm from scratch\n* Completely eliminate downtime storage controller downtime. Instead we aim to reduce it to the point where it looks\nlike a transient error to the control plane\n\n## Impacted Components\n\n* storage controller\n* deployment orchestration (i.e. Ansible)\n* helm charts\n\n## Terminology\n\n* Observed State: in-memory mapping between tenant shards and their current pageserver locations - currently built up\nat start-up by quering pageservers\n* Deployment: Kubernetes [primitive](https://kubernetes.io/docs/concepts/workloads/controllers/deployment/) that models\na set of replicas\n\n## Implementation\n\n### High Level Flow\n\nAt a very high level the proposed idea is to start a new storage controller instance while\nthe previous one is still running and cut-over to it when it becomes ready. The new instance,\nshould coordinate with the existing one and transition responsibility gracefully. While the controller\nhas built in safety against split-brain situations (via generation numbers), we'd like to avoid such\nscenarios since they can lead to availability issues for tenants that underwent changes while two controllers\nwere operating at the same time and require operator intervention to remedy.\n\n### Kubernetes Deployment Configuration\n\nOn the Kubernetes configuration side, the proposal is to update the storage controller `Deployment`\nto use `spec.strategy.type = RollingUpdate`, `spec.strategy.rollingUpdate.maxSurge=1` and `spec.strategy.maxUnavailable=0`.\nUnder the hood, Kubernetes creates a new replica set and adds one pod to it (`maxSurge=1`). The old replica set does not\nscale down until the new replica set has one replica in the ready state (`maxUnavailable=0`).\n\nThe various possible failure scenarios are investigated in the [Handling Failures](#handling-failures) section.\n\n### Storage Controller Start-Up\n\nThis section describes the primitives required on the storage controller side and the flow of the happy path.\n\n#### Database Table For Leader Synchronization\n\nA new table should be added to the storage controller database for leader synchronization during startup.\nThis table will always contain at most one row. The proposed name for the table is `leader` and the schema\ncontains two elements:\n* `hostname`: represents the hostname for the current storage controller leader - should be addressible\nfrom other pods in the deployment\n* `start_timestamp`: holds the start timestamp for the current storage controller leader (UTC timezone) - only required\nfor failure case handling: see [Previous Leader Crashes Before New Leader Readiness](#previous-leader-crashes-before-new-leader-readiness)\n\nStorage controllers will read the leader row at start-up and then update it to mark themselves as the leader\nat the end of the start-up sequence. We want compare-and-exchange semantics for the update: avoid the\nsituation where two concurrent updates succeed and overwrite each other. The default Postgres isolation\nlevel is `READ COMMITTED`, which isn't strict enough here. This update transaction should use at least `REPEATABLE\nREAD` isolation level in order to [prevent lost updates](https://www.interdb.jp/pg/pgsql05/08.html). Currently,\nthe storage controller uses the stricter `SERIALIZABLE` isolation level for all transactions. This more than suits\nour needs here.\n\n```\nSTART TRANSACTION ISOLATION LEVEL REPEATABLE READ\nUPDATE leader SET hostname=<new_hostname>, start_timestamp=<new_start_ts>\nWHERE hostname=<old_hostname>, start_timestampt=<old_start_ts>;\n```\n\nIf the transaction fails or if no rows have been updated, then the compare-and-exchange is regarded as a failure.\n\n#### Step Down API\n\nA new HTTP endpoint should be added to the storage controller: `POST /control/v1/step_down`. Upon receiving this\nrequest the leader cancels any pending reconciles and goes into a mode where it replies with 503 to all other APIs\nand does not issue any location configurations to its pageservers. The successful HTTP response will return a serialized\nsnapshot of the observed state.\n\nIf other step down requests come in after the initial one, the request is handled and the observed state is returned (required\nfor failure scenario handling - see [Handling Failures](#handling-failures)).\n\n#### Graceful Restart Happy Path\n\nAt start-up, the first thing the storage controller does is retrieve the sole row from the new\n`leader` table. If such an entry exists, send a `/step_down` PUT API call to the current leader.\nThis should be retried a few times with a short backoff (see [1]). The aspiring leader loads the\nobserved state into memory and the start-up sequence proceeds as usual, but *without* querying the\npageservers in order to build up the observed state.\n\nBefore doing any reconciliations or persistence change, update the `leader` database table as described in the [Database Table For Leader Synchronization](database-table-for-leader-synchronization)\nsection. If this step fails, the storage controller process exits.\n\nNote that no row will exist in the `leaders` table for the first graceful restart. In that case, force update the `leader` table\n(without the WHERE clause) and perform with the pre-existing start-up procedure (i.e. build observed state by querying pageservers).\n\nSummary of proposed new start-up sequence:\n1. Call `/step_down`\n2. Perform any pending database migrations\n3. Load state from database\n4. Load observed state returned in step (1) into memory\n5. Do initial heartbeat round (may be moved after 5)\n7. Mark self as leader by updating the database\n8. Reschedule and reconcile everything\n\nSome things to note from the steps above:\n* The storage controller makes no changes to the cluster state before step (5) (i.e. no location config\ncalls to the pageserver and no compute notifications)\n* Ask the current leader to step down before loading state from database so we don't get a lost update\nif the transactions overlap.\n* Before loading the observed state at step (3), cross-validate against the database. If validation fails,\nfall back to asking the pageservers about their current locations.\n* Database migrations should only run **after** the previous instance steps down (or the step down times out).\n\n\n[1] The API call might fail because there's no storage controller running (i.e. [restart](#storage-controller-crash-or-restart)),\nso we don't want to extend the unavailability period by much. We still want to retry since that's not the common case.\n\n### Handling Failures\n\n#### Storage Controller Crash Or Restart\n\nThe storage controller may crash or be restarted outside of roll-outs. When a new pod is created, its call to\n`/step_down` will fail since the previous leader is no longer reachable. In this case perform the pre-existing\nstart-up procedure and update the leader table (with the WHERE clause). If the update fails, the storage controller\nexists and consistency is maintained.\n\n#### Previous Leader Crashes Before New Leader Readiness\n\nWhen the previous leader (P1) crashes before the new leader (P2) passses the readiness check, Kubernetes will\nreconcile the old replica set and create a new pod for it (P1'). The `/step_down` API call will fail for P1'\n(see [2]).\n\nNow we have two cases to consider:\n* P2 updates the `leader` table first: The database update from P1' will fail and P1' will exit, or be terminated\nby Kubernetes depending on timings.\n* P1' updates the `leader` table first: The `hostname` field of the `leader` row stays the same, but the `start_timestamp` field changes.\nThe database update from P2 will fail (since `start_timestamp` does not match). P2 will exit and Kubernetes will\ncreate a new replacement pod for it (P2'). Now the entire dance starts again, but with P1' as the leader and P2' as the incumbent.\n\n[2] P1 and P1' may (more likely than not) be the same pod and have the same hostname. The implementation\nshould avoid this self reference and fail the API call at the client if the persisted hostname matches\nthe current one.\n\n#### Previous Leader Crashes After New Leader Readiness\n\nThe deployment's replica sets already satisfy the deployment's replica count requirements and the\nKubernetes deployment rollout will just clean up the dead pod.\n\n#### New Leader Crashes Before Pasing Readiness Check\n\nThe deployment controller scales up the new replica sets by creating a new pod. The entire procedure is repeated\nwith the new pod.\n\n#### Network Partition Between New Pod and Previous Leader\n\nThis feels very unlikely, but should be considered in any case. P2 (the new aspiring leader) fails the `/step_down`\nAPI call into P1 (the current leader). P2 proceeds with the pre-existing startup procedure and updates the `leader` table.\nKubernetes will terminate P1, but there may be a brief period where both storage controller can drive reconciles.\n\n### Dealing With Split Brain Scenarios\n\nAs we've seen in the previous section, we can end up with two storage controller running at the same time. The split brain\nduration is not bounded since the Kubernetes controller might become partitioned from the pods (unlikely though). While these\nscenarios are not fatal, they can cause tenant unavailability, so we'd like to reduce the chances of this happening.\nThe rest of this section sketches some safety measure. It's likely overkill to implement all of them however.\n\n### Ensure Leadership Before Producing Side Effects\n\nThe storage controller has two types of side effects: location config requests into pageservers and compute notifications into the control plane.\nBefore issuing either, the storage controller could check that it is indeed still the leader by querying the database. Side effects might still be\napplied if they race with the database updatem, but the situation will eventually be detected. The storage controller process should terminate in these cases.\n\n### Leadership Lease\n\nUp until now, the leadership defined by this RFC is static. In order to bound the length of the split brain scenario, we could require the leadership\nto be renewed periodically. Two new columns would be added to the leaders table:\n1. `last_renewed` - timestamp indicating when the lease was last renewed\n2. `lease_duration` - duration indicating the amount of time after which the lease expires\n\nThe leader periodically attempts to renew the lease by checking that it is in fact still the legitimate leader and updating `last_renewed` in the\nsame transaction. If the update fails, the process exits. New storage controller instances wishing to become leaders must wait for the current lease\nto expire before acquiring leadership if they have not succesfully received a response to the `/step_down` request.\n\n### Notify Pageserver Of Storage Controller Term\n\nEach time that leadership changes, we can bump a `term` integer column in the `leader` table. This term uniquely identifies a leader.\nLocation config requests and re-attach responses can include this term. On the pageserver side, keep the latest term in memory and refuse\nanything which contains a stale term (i.e. smaller than the current one).\n\n### Observability\n\n* The storage controller should expose a metric which describes it's state (`Active | WarmingUp | SteppedDown`).\nPer region alerts should be added on this metric which triggers when:\n  + no storage controller has been in the `Active` state for an extended period of time\n  + more than one storage controllers are in the `Active` state\n\n* An alert that periodically verifies that the `leader` table is in sync with the metric above would be very useful.\nWe'd have to expose the storage controller read only database to Grafana (perhaps it is already done).\n\n## Alternatives\n\n### Kubernetes Leases\n\nKubernetes has a [lease primitive](https://kubernetes.io/docs/concepts/architecture/leases/) which can be used to implement leader election.\nOnly one instance may hold a lease at any given time. This lease needs to be periodically renewed and has an expiration period.\n\nIn our case, it would work something like this:\n* `/step_down` deletes the lease or stops it from renewing\n* lease acquisition becomes part of the start-up procedure\n\nThe kubert crate implements a [lightweight lease API](https://docs.rs/kubert/latest/kubert/lease/struct.LeaseManager.html), but it's still\nnot exactly trivial to implement.\n\nThis approach has the benefit of baked in observability (`kubectl describe lease`), but:\n* We offload the responsibility to Kubernetes which makes it harder to debug when things go wrong.\n* More code surface than the simple \"row in database\" approach. Also, most of this code would be in\na dependency not subject to code review, etc.\n* Hard to test. Our testing infra does not run the storage controller in Kubernetes and changing it do\nso is not simple and complictes and the test set-up.\n\nTo my mind, the \"row in database\" approach is straightforward enough that we don't have to offload this\nto something external.\n"
  },
  {
    "path": "docs/rfcs/038-aux-file-v2.md",
    "content": "# AUX file v2\n\n## Summary\n\nThis is a retrospective RFC describing a new storage strategy for AUX files.\n\n## Motivation\n\nThe original aux file storage strategy stores everything in a single `AUX_FILES_KEY`.\nEvery time the compute node streams a `neon-file` record to the pageserver, it will\nupdate the aux file hash map, and then write the serialized hash map into the key.\nThis creates serious space bloat. There was a fix to log delta records (i.e., update\na key in the hash map) to the aux file key. In this way, the pageserver only stores\nthe deltas at each of the LSNs. However, this improved v1 storage strategy still\nrequires us to store everything in an aux file cache in memory, because we cannot\nfetch a single key (or file) from the compound `AUX_FILES_KEY`.\n\n### Prior art\n\nFor storing large amount of small files, we can use a key-value store where the key\nis the filename and the value is the file content.\n\n## Requirements\n\n- No space bloat, fixed space amplification.\n- No write bloat, fixed write amplification.\n\n## Impacted Components\n\npageserver\n\n## Sparse Keyspace\n\nIn pageserver, we had assumed the keyspaces are always contiguous. For example, if the keyspace 0x0000-0xFFFF\nexists in the pageserver, every single key in the key range would exist in the storage. Based on the prior\nassumption, there are code that traverses the keyspace by iterating every single key.\n\n```rust\nloop {\n    // do something\n    key = key.next();\n}\n```\n\nIf a keyspace is very large, for example, containing `2^64` keys, this loop will take infinite time to run.\nTherefore, we introduce the concept of sparse keyspace in this RFC. For a sparse keyspace, not every key would\nexist in the key range. Developers should not attempt to iterate every single key in the keyspace. Instead,\nthey should fetch all the layer files in the key range, and then do a merge of them.\n\nIn aux file v2, we store aux files within the sparse keyspace of the prefix `AUX_KEY_PREFIX`.\n\n## AUX v2 Keyspace and Key Mapping\n\nPageserver uses fixed-size keys. The key is 128b. In order to store files of arbitrary filenames into the\nkeyspace, we assign a predetermined prefix based on the directory storing the aux file, and use the FNV hash\nof the filename for the rest bits of the key. The encoding scheme is defined in `encode_aux_file_key`.\n\nFor example, `pg_logical/mappings/test1` will be encoded as:\n\n```\n62 0000 01 01 7F8B83D94F7081693471ABF91C\n^ aux prefix\n        ^ assigned prefix of pg_logical/\n           ^ assigned prefix of mappings/\n              ^ 13B FNV hash of test1\n   ^ not used due to key representation\n```\n\nThe prefixes of the directories should be assigned every time we add a new type of aux file into the storage within `aux_file.rs`. For all directories without an assigned prefix, it will be put into the `0xFFFF` keyspace.\n\nNote that inside pageserver, there are two representations of the keys: the 18B full key representation\nand the 16B compact key representation. For the 18B representation, some fields have restricted ranges\nof values. Therefore, the aux keys only use the 16B compact portion of the full key.\n\nIt is possible that two files get mapped to the same key due to hash collision. Therefore, the value of\neach of the aux key is an array that contains all filenames and file content that should be stored in\nthis key.\n\nWe use `Value::Image` to store the aux keys. Therefore, page reconstruction works in the same way as before,\nand we do not need addition code to support reconstructing the value. We simply get the latest image from\nthe storage.\n\n## Inbound Logical Replication Key Mapping\n\nFor inbound logical replication, Postgres needs the `replorigin_checkpoint` file to store the data.\nThis file not directly stored in the pageserver using the aux v2 mechanism. It is constructed during\ngenerating the basebackup by scanning the `REPL_ORIGIN_KEY_PREFIX` keyspace.\n\n## Sparse Keyspace Read Path\n\nThere are two places we need to read the aux files from the pageserver:\n\n* On the write path, when the compute node adds an aux file to the pageserver, we will retrieve the key from the storage, append the file to the hashed key, and write it back. The current `get` API already supports that.\n*  We use the vectored get API to retrieve all aux files during generating the basebackup. Because we need to scan a sparse keyspace, we slightly modified the vectored get path. The vectorized API used to always attempt to retrieve every single key within the requested key range, and therefore, we modified it in a way that keys within `NON_INHERITED_SPARSE_RANGE` will not trigger missing key error. Furthermore, as aux file reads usually need all layer files intersecting with that key range within the branch and cover a big keyspace, it incurs large overhead for tracking keyspaces that have not been read. Therefore, for sparse keyspaces, we [do not track](https://github.com/neondatabase/neon/pull/9631) `ummapped_keyspace`.\n\n## Compaction and Image Layer Generation\n\nWith the add of sparse keyspaces, we also modified the compaction code to accommodate the fact that sparse keyspaces do not have every single key stored in the storage.\n\n* L0 compaction: we modified the hole computation code so that it can handle sparse keyspaces when computing holes.\n* Image layer creation: instead of calling `key.next()` and getting/reconstructing images for every single key, we use the vectored get API to scan all keys in the keyspace at a given LSN. Image layers are only created if there are too many delta layers between the latest LSN and the last image layer we generated for sparse keyspaces. The created image layer always cover the full aux key range for now, and could be optimized later.\n\n## Migration\n\nWe decided not to make the new aux storage strategy (v1) compatible with the original one (v1). One feasible way of doing a seamless migration is to store new data in aux v2 while old data in aux v1, but this complicates file deletions. We want all users to start with a clean state with no aux files in the storage, and therefore, we need to do manual migrations for users using aux v1 by using the [migration script](https://github.com/neondatabase/aux_v2_migration).\n\nDuring the period of migration, we store the aux policy in the `index_part.json` file. When a tenant is attached\nwith no policy set, the pageserver will scan the aux file keyspaces to identify the current aux policy being used (v1 or v2).\n\nIf a timeline has aux v1 files stored, it will use aux file policy v1 unless we do a manual migration for them. Otherwise, the default aux file policy for new timelines is aux v2. Users enrolled in logical replication before we set aux v2 as default use aux v1 policy. Users who tried setting up inbound replication (which was not supported at that time) may also create some file entries in aux v1 store, even if they did not enroll in the logical replication testing program.\n\nThe code for aux v2 migration is in https://github.com/neondatabase/aux_v2_migration. The toolkit scans all projects with logical replication enabled. For all these projects, it put the computes into maintenance mode (suspend all of then), call the migration API to switch the aux file policy on the pageserver (which drops all replication states), and restart all the computes.\n"
  },
  {
    "path": "docs/rfcs/038-independent-compute-release.md",
    "content": "# Independent compute release\n\nCreated at: 2024-08-30. Author: Alexey Kondratov (@ololobus)\n\n## Summary\n\nThis document proposes an approach to fully independent compute release flow. It attempts to\ncover the following features:\n\n- Process is automated as much as possible to minimize human errors.\n- Compute<->storage protocol compatibility is ensured.\n- A transparent release history is available with an easy rollback strategy.\n- Although not in the scope of this document, there is a viable way to extend the proposed release\n  flow to achieve the canary and/or blue-green deployment strategies.\n\n## Motivation\n\nPreviously, the compute release was tightly coupled to the storage release. This meant that once\nsome storage nodes got restarted with a newer version, all new compute starts using these nodes\nautomatically got a new version. Thus, two releases happen in parallel, which increases the blast\nradius and makes ownership fuzzy.\n\nNow, we practice a manual v0 independent compute release flow -- after getting a new compute release\nimage and tag, we pin it region by region using Admin UI. It's better, but it still has its own flaws:\n\n1. It's a simple but fairly manual process, as you need to click through a few pages.\n2. It's prone to human errors, e.g., you could mistype or copy the wrong compute tag.\n3. We now require an additional approval in the Admin UI, which partially solves the 2.,\n   but also makes the whole process pretty annoying, as you constantly need to go back\n   and forth between two people.\n\n## Non-goals\n\nIt's not the goal of this document to propose a design for some general-purpose release tool like Helm.\nThe document considers how the current compute fleet is orchestrated at Neon. Even if we later\ndecide to split the control plane further (e.g., introduce a separate compute controller), the proposed\nrelease process shouldn't change much, i.e., the releases table and API will reside in\none of the parts.\n\nAchieving the canary and/or blue-green deploy strategies is out of the scope of this document. They\nwere kept in mind, though, so it's expected that the proposed approach will lay down the foundation\nfor implementing them in future iterations.\n\n## Impacted components\n\nCompute, control plane, CI, observability (some Grafana dashboards may require changes).\n\n## Prior art\n\nOne of the very close examples is how Helm tracks [releases history](https://helm.sh/docs/helm/helm_history/).\n\nIn the code:\n\n- [Release](https://github.com/helm/helm/blob/2b30cf4b61d587d3f7594102bb202b787b9918db/pkg/release/release.go#L20-L43)\n- [Release info](https://github.com/helm/helm/blob/2b30cf4b61d587d3f7594102bb202b787b9918db/pkg/release/info.go#L24-L40)\n- [Release status](https://github.com/helm/helm/blob/2b30cf4b61d587d3f7594102bb202b787b9918db/pkg/release/status.go#L18-L42)\n\nTL;DR it has several important attributes:\n\n- Revision -- unique release ID/primary key. It is not the same as the application version,\n  because the same version can be deployed several times, e.g., after a newer version rollback.\n- App version -- version of the application chart/code.\n- Config -- set of overrides to the default config of the application.\n- Status -- current status of the release in the history.\n- Timestamps -- tracks when a release was created and deployed.\n\n## Proposed implementation\n\n### Separate release branch\n\nWe will use a separate release branch, `release-compute`, to have a clean history for releases and commits.\nIn order to avoid confusion with storage releases, we will use a different prefix for compute [git release\ntags](https://github.com/neondatabase/neon/releases) -- `release-compute-XXXX`. We will use the same tag for\nDocker images as well. The `neondatabase/compute-node-v16:release-compute-XXXX` looks longer and a bit redundant,\nbut it's better to have image and git tags in sync.\n\nCurrently, control plane relies on the numeric compute and storage release versions to decide on compute->storage\ncompatibility. Once we implement this proposal, we should drop this code as release numbers will be completely\nindependent. The only constraint we want is that it must monotonically increase within the same release branch.\n\n### Compute config/settings manifest\n\nWe will create a new sub-directory `compute` and file `compute/manifest.yaml` with a structure:\n\n```yaml\npg_settings:\n  # Common settings for primaries and secondaries of all versions.\n  common:\n    wal_log_hints: \"off\"\n    max_wal_size: \"1024\"\n\n  per_version:\n    14:\n      # Common settings for both replica and primary of version PG 14\n      common:\n        shared_preload_libraries: \"neon,pg_stat_statements,extension_x\"\n    15:\n      common:\n        shared_preload_libraries: \"neon,pg_stat_statements,extension_x\"\n      # Settings that should be applied only to\n      replica:\n        # Available only starting Postgres 15th\n        recovery_prefetch: \"off\"\n    # ...\n    17:\n      common:\n        # For example, if third-party `extension_x` is not yet available for PG 17\n        shared_preload_libraries: \"neon,pg_stat_statements\"\n      replica:\n        recovery_prefetch: \"off\"\n```\n\n**N.B.** Setting value should be a string with `on|off` for booleans and a number (as a string)\nwithout units for all numeric settings. That's how the control plane currently operates.\n\nThe priority of settings will be (a higher number is a higher priority):\n\n1. Any static and hard-coded settings in the control plane\n2. `pg_settings->common`\n3. Per-version `common`\n4. Per-version `replica`\n5. Any per-user/project/endpoint overrides in the control plane\n6. Any dynamic setting calculated based on the compute size\n\n**N.B.** For simplicity, we do not do any custom logic for `shared_preload_libraries`, so it's completely\noverridden if specified on some level. Make sure that you include all necessary extensions in it when you\ndo any overrides.\n\n**N.B.** There is a tricky question about what to do with custom compute image pinning we sometimes\ndo for particular projects and customers. That's usually some ad-hoc work and images are based on\nthe latest compute image, so it's relatively safe to assume that we could use settings from the latest compute\nrelease. If for some reason that's not true, and further overrides are needed, it's also possible to do\non the project level together with pinning the image, so it's on-call/engineer/support responsibility to\nensure that compute starts with the specified custom image. The only real risk is that compute image will get\nstale and settings from new releases will drift away, so eventually it will get something incompatible,\nbut i) this is some operational issue, as we do not want stale images anyway, and ii) base settings\nreceive something really new so rarely that the chance of this happening is very low. If we want to solve it completely,\nthen together with pinning the image we could also pin the matching release revision in the control plane.\n\nThe compute team will own the content of `compute/manifest.yaml`.\n\n### Control plane: releases table\n\nIn order to store information about releases, the control plane will use a table `compute_releases` with the following\nschema:\n\n```sql\nCREATE TABLE compute_releases (\n  -- Unique release ID\n  -- N.B. Revision won't by synchronized across all regions, because all control planes are technically independent\n  -- services. We have the same situation with Helm releases as well because they could be deployed and rolled back\n  -- independently in different clusters.\n  revision BIGSERIAL PRIMARY KEY,\n  -- Numeric version of the compute image, e.g. 9057\n  version BIGINT NOT NULL,\n  -- Compute image tag, e.g. `release-9057`\n  tag TEXT NOT NULL,\n  -- Current release status. Currently, it will be a simple enum\n  -- * `deployed` -- release is deployed and used for new compute starts.\n  --                 Exactly one release can have this status at a time.\n  -- * `superseded` -- release has been replaced by a newer one.\n  -- But we can always extend it in the future when we need more statuses\n  -- for more complex deployment strategies.\n  status TEXT NOT NULL,\n  -- Any additional metadata for compute in the corresponding release\n  manifest JSONB NOT NULL,\n  -- Timestamp when release record was created in the control plane database\n  created_at TIMESTAMP NOT NULL DEFAULT now(),\n  -- Timestamp when release deployment was finished\n  deployed_at TIMESTAMP\n);\n```\n\nWe keep track of the old releases not only for the sake of audit, but also because we usually have ~30% of\nold computes started using the image from one of the previous releases. Yet, when users want to reconfigure\nthem without restarting, the control plane needs to know what settings are applicable to them, so we also need\ninformation about the previous releases that are readily available. There could be some other auxiliary info\nneeded as well: supported extensions, compute flags, etc.\n\n**N.B.** Here, we can end up in an ambiguous situation when the same compute image is deployed twice, e.g.,\nit was deployed once, then rolled back, and then deployed again, potentially with a different manifest. Yet,\nwe could've started some computes with the first deployment and some with the second. Thus, when we need to\nlook up the manifest for the compute by its image tag, we will see two records in the table with the same tag,\nbut different revision numbers. We can assume that this could happen only in case of rollbacks, so we\ncan just take the latest revision for the given tag.\n\n### Control plane: management API\n\nThe control plane will implement new API methods to manage releases:\n\n1. `POST /management/api/v2/compute_releases` to create a new release. With payload\n\n   ```json\n    {\n      \"version\": 9057,\n      \"tag\": \"release-9057\",\n      \"manifest\": {}\n    }\n   ```\n\n   and response\n\n   ```json\n    {\n      \"revision\": 53,\n      \"version\": 9057,\n      \"tag\": \"release-9057\",\n      \"status\": \"deployed\",\n      \"manifest\": {},\n      \"created_at\": \"2024-08-15T15:52:01.0000Z\",\n      \"deployed_at\": \"2024-08-15T15:52:01.0000Z\",\n    }\n   ```\n\n   Here, we can actually mix-in custom (remote) extensions metadata into the `manifest`, so that the control plane\n   will get information about all available extensions not bundled into compute image. The corresponding\n   workflow in `neondatabase/build-custom-extensions` should produce it as an artifact and make\n   it accessible to the workflow in the `neondatabase/infra`. See the complete release flow below. Doing that,\n   we put a constraint that new custom extension requires new compute release, which is good for the safety,\n   but is not exactly what we want operational-wise (we want to be able to deploy new extensions without new\n   images). Yet, it can be solved incrementally: v0 -- do not do anything with extensions at all;\n   v1 -- put them into the same manifest; v2 -- make them separate entities with their own lifecycle.\n\n   **N.B.** This method is intended to be used in CI workflows, and CI/network can be flaky. It's reasonable\n   to assume that we could retry the request several times, even though it's already succeeded. Although it's\n   not a big deal to create several identical releases one-by-one, it's better to avoid it, so the control plane\n   should check if the latest release is identical and just return `304 Not Modified` in this case.\n\n2. `POST /management/api/v2/compute_releases/rollback` to rollback to any previously deployed release. With payload\n   including the revision of the release to rollback to:\n\n   ```json\n   {\n      \"revision\": 52\n   }\n   ```\n\n   Rollback marks the current release as `superseded` and creates a new release with all the same data as the\n   requested revision, but with a new revision number.\n\n   This rollback API is not strictly needed, as we can just use `infra` repo workflow to deploy any\n   available tag. It's still nice to have for on-call and any urgent matters, for example, if we need\n   to rollback and GitHub is down. It's much easier to specify only the revision number vs. crafting\n   all the necessary data for the new release payload.\n\n### Compute->storage compatibility tests\n\nIn order to safely release new compute versions independently from storage, we need to ensure that the currently\ndeployed storage is compatible with the new compute version. Currently, we maintain backward compatibility\nin storage, but newer computes may require a newer storage version.\n\nRemote end-to-end (e2e) tests [already accept](https://github.com/neondatabase/cloud/blob/e3468d433e0d73d02b7d7e738d027f509b522408/.github/workflows/testing.yml#L43-L48)\n`storage_image_tag` and `compute_image_tag` as separate inputs. That means that we could reuse e2e tests to ensure\ncompatibility between storage and compute:\n\n1. Pick the latest storage release tag and use it as `storage_image_tag`.\n2. Pick a new compute tag built in the current compute release PR and use it as `compute_image_tag`.\n   Here, we should use a temporary ECR image tag, because the final tag will be known only after the release PR is merged.\n3. Trigger e2e tests as usual.\n\n### Release flow\n\n```mermaid\n  sequenceDiagram\n\n  actor oncall as Compute on-call person\n  participant neon as neondatabase/neon\n\n  box private\n    participant cloud as neondatabase/cloud\n    participant exts as neondatabase/build-custom-extensions\n    participant infra as neondatabase/infra\n  end\n\n  box cloud\n    participant preprod as Pre-prod control plane\n    participant prod as Production control plane\n    participant k8s as Compute k8s\n  end\n\n  oncall ->> neon: Open release PR into release-compute\n\n  activate neon\n  neon ->> cloud: CI: trigger e2e compatibility tests\n  activate cloud\n  cloud -->> neon: CI: e2e tests pass\n  deactivate cloud\n  neon ->> neon: CI: pass PR checks, get approvals\n  deactivate neon\n\n  oncall ->> neon: Merge release PR into release-compute\n\n  activate neon\n  neon ->> neon: CI: pass checks, build and push images\n  neon ->> exts: CI: trigger extensions build\n  activate exts\n  exts -->> neon: CI: extensions are ready\n  deactivate exts\n  neon ->> neon: CI: create release tag\n  neon ->> infra: Trigger release workflow using the produced tag\n  deactivate neon\n\n  activate infra\n  infra ->> infra: CI: pass checks\n  infra ->> preprod: Release new compute image to pre-prod automatically <br/> POST /management/api/v2/compute_releases\n  activate preprod\n  preprod -->> infra: 200 OK\n  deactivate preprod\n\n  infra ->> infra: CI: wait for per-region production deploy approvals\n  oncall ->> infra: CI: approve deploys region by region\n  infra ->> k8s: Prewarm new compute image\n  infra ->> prod: POST /management/api/v2/compute_releases\n  activate prod\n  prod -->> infra: 200 OK\n  deactivate prod\n  deactivate infra\n```\n\n## Further work\n\nAs briefly mentioned in other sections, eventually, we would like to use more complex deployment strategies.\nFor example, we can pass a fraction of the total compute starts that should use the new release. Then we can\nmark the release as `partial` or `canary` and monitor its performance. If everything is fine, we can promote it\nto `deployed` status. If not, we can roll back to the previous one.\n\n## Alternatives\n\nIn theory, we can try using Helm as-is:\n\n1. Write a compute Helm chart. That will actually have only some config map, which the control plane can access and read.\n   N.B. We could reuse the control plane chart as well, but then it's not a fully independent release again and even more fuzzy.\n2. The control plane will read it and start using the new compute version for new starts.\n\nDrawbacks:\n\n1. Helm releases work best if the workload is controlled by the Helm chart itself. Then you can have different\n   deployment strategies like rolling update or canary or blue/green deployments. At Neon, the compute starts are controlled\n   by control plane, so it makes it much more tricky.\n2. Releases visibility will suffer, i.e. instead of a nice table in the control plane and Admin UI, we would need to use\n   `helm` cli and/or K8s UIs like K8sLens.\n3. We do not restart all computes shortly after the new version release. This means that for some features and compatibility\n   purpose (see above) control plane may need some auxiliary info from the previous releases.\n"
  },
  {
    "path": "docs/rfcs/040-Endpoint-Persistent-Unlogged-Files-Storage.md",
    "content": "# Memo: Endpoint Persistent Unlogged Files Storage\nCreated on 2024-11-05\nImplemented on N/A\n\n## Summary\nA design for a storage system that allows storage of files required to make\nNeon's Endpoints have a better experience at or after a reboot.\n\n## Motivation\nSeveral systems inside PostgreSQL (and Neon) need some persistent storage for\noptimal workings across reboots and restarts, but still work without.\nExamples are the query-level statistics files of `pg_stat_statements` in\n`pg_stat/pg_stat_statements.stat`, and `pg_prewarm`'s `autoprewarm.blocks`.\nWe need a storage system that can store and manage these files for each\nEndpoint, without necessarily granting users access to an unlimited storage\ndevice.\n\n## Goals\n- Store known files for Endpoints with reasonable persistence.  \n  _Data loss in this service, while annoying and bad for UX, won't lose any\n  customer's data._\n\n## Non Goals (if relevant)\n- This storage system does not need branching, file versioning, or other such\n  features. The files are as ephemeral to the timeline of the data as the\n  Endpoints that host the data.\n- This storage system does not need to store _all_ user files, only 'known'\n  user files.\n- This storage system does not need to be hosted fully inside Computes.  \n  _Instead, this will be a separate component similar to Pageserver,\n  SafeKeeper, the S3 proxy used for dynamically loaded extensions, etc._\n\n## Impacted components\n- Compute needs new code to load and store these files in its lifetime.\n- Control Plane needs to consider this new storage system when signalling\n  the deletion of an Endpoint, Timeline, or Tenant.\n- Control Plane needs to consider this new storage system when it resets\n  or re-assigns an endpoint's timeline/branch state.\n\nA new service is created: the Endpoint Persistent Unlogged Files Storage\nservice.  This could be integrated in e.g. Pageserver or Control Plane, or a\nseparately hosted service.\n\n## Proposed implementation\nEndpoint-related data files are managed by a newly designed service (which\noptionally is integrated in an existing service like Pageserver or Control\nPlane), which stores data directly into S3 or any blob storage of choice.\n\nUpon deletion of the Endpoint, or reassignment of the endpoint to a different\nbranch, this ephemeral data is dropped: the data stored may not match the\nstate of the branch's data after reassignment, and on endpoint deletion the\ndata won't have any use to the user.\n\nCompute gets credentials (JWT token with Tenant, Timeline & Endpoint claims)\nwhich it can use to authenticate to this new service and retrieve and store\ndata associated with this endpoint.  This limited scope reduces leaks of data\nacross endpoints and timeline resets, and limits the ability of endpoints to\nmess with other endpoints' data.\n\nThe path of this endpoint data in S3 is initially as follows:\n\n    s3://<regional-epufs-bucket>/\n      tenants/\n        <hex-tenant-id>/\n          tenants/\n            <hex-timeline-id>/\n              endpoints/\n                <endpoint-id>/\n                  pgdata/\n                    <file_path_in_pgdatadir>\n\nFor other blob storages an equivalent or similar path can be constructed.\n\n### Reliability, failure modes and corner cases (if relevant)\nReliability is important, but not critical to the workings of Neon.  The data\nstored in this service will, when lost, reduce performance, but won't be a\ncause of permanent data loss - only operational metadata is stored.\n\nMost, if not all, blob storage services have sufficiently high persistence\nguarantees to cater our need for persistence and uptime. The only concern with\nblob storages is that the access latency is generally higher than local disk,\nbut for the object types stored (cache state, ...) I don't think this will be\nmuch of an issue.\n\n### Interaction/Sequence diagram (if relevant)\n\nIn these diagrams you can replace S3 with any persistent storage device of\nchoice, but S3 is chosen as representative name: The well-known and short name\nof AWS' blob storage. Azure Blob Storage should work too, but it has a much\nlonger name making it less practical for the diagrams.\n\nWrite data:\n\n```http\nPOST /tenants/<tenant-id>/timelines/<tl-id>/endpoints/<endpoint-id>/pgdata/<the-pgdata-path>\nHost: epufs.svc.neon.local\n\n<<<\n\n200 OK\n{\n  \"version\": \"<opaque>\", # opaque file version token, changes when the file contents change\n  \"size\": <bytes>,\n}\n```\n\n```mermaid\nsequenceDiagram\n    autonumber\n    participant co as Compute\n    participant ep as EPUFS\n    participant s3 as Blob Storage\n\n    co-->ep: Connect with credentials\n    co->>+ep: Store Unlogged Persistent File\n    opt is authenticated\n        ep->>s3: Write UPF to S3\n    end\n    ep->>-co: OK / Failure / Auth Failure\n    co-->ep: Cancel connection\n```\n\nRead data: (optional with cache-relevant request parameters, e.g. If-Modified-Since)\n```http\nGET /tenants/<tenant-id>/timelines/<tl-id>/endpoints/<endpoint-id>/pgdata/<the-pgdata-path>\nHost: epufs.svc.neon.local\n\n<<<\n\n200 OK\n\n<file data>\n```\n\n```mermaid\nsequenceDiagram\n    autonumber\n    participant co as Compute\n    participant ep as EPUFS\n    participant s3 as Blob Storage\n\n    co->>+ep: Read Unlogged Persistent File\n    opt is authenticated\n        ep->>+s3: Request UPF from storage\n        s3->>-ep: Receive UPF from storage\n    end\n    ep->>-co: OK(response) / Failure(storage, auth, ...)\n```\n\nCompute Startup:\n```mermaid\nsequenceDiagram\n    autonumber\n    participant co as Compute\n    participant ps as Pageserver\n    participant ep as EPUFS\n    participant es as Extension server\n\n    note over co: Bind endpoint ep-xxx\n    par Get basebackup\n        co->>+ps: Request basebackup @ LSN\n        ps-)ps: Construct basebackup\n        ps->>-co: Receive basebackup TAR @ LSN\n    and Get startup-critical Unlogged Persistent Files\n        co->>+ep: Get all UPFs of endpoint ep-xxx\n        ep-)ep: Retrieve and gather all UPFs\n        ep->>-co: TAR of UPFs\n    and Get startup-critical extensions\n        loop For every startup-critical extension\n            co->>es: Get critical extension\n            es->>co: Receive critical extension\n        end\n    end\n    note over co: Start compute\n```\n\nCPlane ops:\n```http\nDELETE /tenants/<tenant-id>/timelines/<timeline-id>/endpoints/<endpoint-id>\nHost: epufs.svc.neon.local\n\n<<<\n\n200 OK\n{\n  \"tenant\": \"<tenant-id>\",\n  \"timeline\": \"<timeline-id>\",\n  \"endpoint\": \"<endpoint-id>\",\n  \"deleted\": {\n    \"files\": <count>,\n    \"bytes\": <count>,\n  },\n}\n```\n\n```http\nDELETE /tenants/<tenant-id>/timelines/<timeline-id>\nHost: epufs.svc.neon.local\n\n<<<\n\n200 OK\n{\n  \"tenant\": \"<tenant-id>\",\n  \"timeline\": \"<timeline-id>\",\n  \"deleted\": {\n    \"files\": <count>,\n    \"bytes\": <count>,\n  },\n}\n```\n\n```http\nDELETE /tenants/<tenant-id>\nHost: epufs.svc.neon.local\n\n<<<\n\n200 OK\n{\n  \"tenant\": \"<tenant-id>\",\n  \"deleted\": {\n    \"files\": <count>,\n    \"bytes\": <count>,\n  },\n}\n```\n\n```mermaid\nsequenceDiagram\n    autonumber\n    participant cp as Control Plane\n    participant ep as EPUFS\n    participant s3 as Blob Storage\n\n    alt Tenant deleted\n        cp-)ep: Tenant deleted\n        loop For every object associated with removed tenant\n            ep->>s3: Remove data of deleted tenant from Storage\n        end\n        opt\n            ep-)cp: Tenant cleanup complete\n        end\n    alt Timeline deleted\n        cp-)ep: Timeline deleted\n        loop For every object associated with removed timeline\n            ep->>s3: Remove data of deleted timeline from Storage\n        end\n        opt\n            ep-)cp: Timeline cleanup complete\n        end\n    else Endpoint reassigned or removed\n        cp->>+ep: Endpoint reassigned\n        loop For every object associated with reassigned/removed endpoint\n            ep->>s3: Remove data from Storage\n        end\n        ep->>-cp: Cleanup complete\n    end\n```\n\n### Scalability (if relevant)\n\nProvisionally:  As this service is going to be part of compute startup, this\nservice should be able to quickly respond to all requests.  Therefore this\nservice is deployed to every AZ we host Computes in, and Computes communicate\n(generally) only to the EPUFS endpoint of the AZ they're hosted in.\n\nLocal caching of frequently restarted endpoints' data or metadata may be\nneeded for best performance.  However, due to the regional nature of stored\ndata but zonal nature of the service deployment, we should be careful when we\nimplement any local caching, as it is possible that computes in AZ 1 will\nupdate data originally written and thus cached by AZ 2.  Cache version tests\nand invalidation is therefore required if we want to roll out caching to this\nservice, which is too broad a scope for an MVC.  This is why caching is left\nout of scope for this RFC, and should be considered separately after this RFC\nis implemented.\n\n### Security implications (if relevant)\nThis service must be able to authenticate users at least by Tenant ID,\nTimeline ID and Endpoint ID. This will use the existing JWT infrastructure of\nCompute, which will be upgraded to the extent needed to support Timeline- and\nEndpoint-based claims.\n\nThe service requires unlimited access to (a prefix of) a blob storage bucket,\nand thus must be hosted outside the Compute VM sandbox.\n\nA service that generates pre-signed request URLs for Compute to download the\ndata from that URL is likely problematic, too:  Compute would be able to write\nunlimited data to the bucket, or exfiltrate this signed URL to get read/write\naccess to specific objects in this bucket, which would still effectively give\nusers access to the S3 bucket (but with improved access logging).\n\nThere may be a use case for transferring data associated with one endpoint to\nanother endpoint (e.g. to make one endpoint warm its caches with the state of\nanother endpoint), but that's not currently in scope, and specific needs may\nbe solved through out-of-line communication of data or pre-signed URLs.\n\n### Unresolved questions (if relevant)\nCaching of files is not in the implementation scope of the document, but\nshould at some future point be considered to maximize performance.\n\n## Alternative implementation (if relevant)\nSeveral ideas have come up to solve this issue:\n\n### Use AUXfile\nOne prevalent idea was to WAL-log the files using our AUXfile mechanism.\n\nBenefits:\n\n+ We already have this storage mechanism\n\nDemerits:\n\n- It isn't available on read replicas\n- Additional WAL will be consumed during shutdown and after the shutdown\n  checkpoint, which needs PG modifications to work without panics.\n- It increases the data we need to manage in our versioned storage, thus\n  causing higher storage costs with higher retention due to duplication at\n  the storage layer.\n\n### Sign URLs for read/write operations, instead of proxying them\n\nBenefits:\n\n+ The service can be implemented with a much reduced IO budget\n\nDemerits:\n\n- Users could get access to these signed credentials\n- Not all blob storage services may implement URL signing\n\n### Give endpoints each their own directly accessed block volume\n\nBenefits:\n\n+ Easier to integrate for PostgreSQL\n\nDemerits:\n\n- Little control on data size and contents\n- Potentially problematic as we'd need to store data all across the pgdata\n  directory.\n- EBS is not a good candidate\n   - Attaches in 10s of seconds, if not more; i.e. too cold to start\n   - Shared EBS volumes are a no-go, as you'd have to schedule the endpoint\n     with users of the same EBS volumes, which can't work with VM migration\n   - EBS storage costs are very high (>80$/kilotenant when using a\n     volume/tenant)\n   - EBS volumes can't be mounted across AZ boundaries\n- Bucket per endpoint is unfeasible\n   - S3 buckets are priced at $20/month per 1k, which we could better spend\n     on developers.\n   - Allocating service accounts takes time (100s of ms), and service accounts\n     are a limited resource, too; so they're not a good candidate to allocate\n     on a per-endpoint basis.\n   - Giving credentials limited to prefix has similar issues as the pre-signed\n     URL approach.\n   - Bucket DNS lookup will fill DNS caches and put pressure on DNS lookup\n     much more than our current systems would.\n- Volumes bound by hypervisor are unlikely\n   - This requires significant investment and increased software on the\n     hypervisor.\n   - It is unclear if we can attach volumes after boot, i.e. for pooled\n     instances.\n\n### Put the files into a table\n\nBenefits:\n\n + Mostly already available in PostgreSQL\n\nDemerits:\n\n - Uses WAL\n   - Can't be used after shutdown checkpoint\n   - Needs a RW endpoint, and table & catalog access to write to this data\n - Gets hit with DB size limitations\n - Depending on user acces:\n   - Inaccessible:  \n     The user doesn't have control over database size caused by\n     these systems.\n   - Accessible:  \n     The user can corrupt these files and cause the system to crash while\n     user-corrupted files are present, thus increasing on-call overhead.\n\n## Definition of Done (if relevant)\n\nThis project is done if we have:\n\n- One S3 bucket equivalent per region, which stores this per-endpoint data.\n- A new service endpoint in at least every AZ, which indirectly grants\n  endpoints access to the data stored for these endpoints in these buckets.\n- Compute writes & reads temp-data at shutdown and startup, respectively, for\n  at least the pg_prewarm or lfc_prewarm state files.\n- Cleanup of endpoint data is triggered when the endpoint is deleted or is\n  detached from its current timeline.\n"
  },
  {
    "path": "docs/rfcs/040-profiling.md",
    "content": "# CPU and Memory Profiling\n\nCreated 2025-01-12 by Erik Grinaker.\n\nSee also [internal user guide](https://www.notion.so/neondatabase/Storage-CPU-Memory-Profiling-14bf189e004780228ec7d04442742324?pvs=4).\n\n## Summary\n\nThis document proposes a standard cross-team pattern for CPU and memory profiling across\napplications and languages, using the [pprof](https://github.com/google/pprof) profile format.\n\nIt enables both ad hoc profiles via HTTP endpoints, and continuous profiling across the fleet via\n[Grafana Cloud Profiles](https://grafana.com/docs/grafana-cloud/monitor-applications/profiles/).\nContinuous profiling incurs an overhead of about 0.1% CPU usage and 3% slower heap allocations.\n\n## Motivation\n\nCPU and memory profiles are crucial observability tools for understanding performance issues,\nresource exhaustion, and resource costs. They allow answering questions like:\n\n* Why is this process using 100% CPU?\n* How do I make this go faster?\n* Why did this process run out of memory?\n* Why are we paying for all these CPU cores and memory chips?\n\nGo has [first-class support](https://pkg.go.dev/net/http/pprof) for profiling included in its\nstandard library, using the [pprof](https://github.com/google/pprof) profile format and associated\ntooling.\n\nThis is not the case for Rust and C, where obtaining profiles can be rather cumbersome. It requires\ninstalling and running additional tools like `perf` as root on production nodes, with analysis tools\nthat can be hard to use and often don't give good results. This is not only annoying, but can also\nsignificantly affect the resolution time of production incidents.\n\nThis proposal will:\n\n* Provide CPU and heap profiles in pprof format via HTTP API.\n* Record continuous profiles in Grafana for aggregate historical analysis.\n* Make it easy for anyone to see a flamegraph in less than one minute.\n* Be reasonably consistent across teams and services (Rust, Go, C).\n\n## Non Goals (For Now)\n\n* [Additional profile types](https://grafana.com/docs/pyroscope/next/configure-client/profile-types/)\n  like mutexes, locks, goroutines, etc.\n* [Runtime trace integration](https://grafana.com/docs/pyroscope/next/configure-client/trace-span-profiles/).\n* [Profile-guided optimization](https://en.wikipedia.org/wiki/Profile-guided_optimization).\n\n## Using Profiles\n\nReady-to-use profiles can be obtained using e.g. `curl`. For Rust services:\n\n```\n$ curl localhost:9898/profile/cpu >profile.pb.gz\n```\n\npprof profiles can be explored using the [`pprof`](https://github.com/google/pprof) web UI, which\nprovides flamegraphs, call graphs, plain text listings, and more:\n\n```\n$ pprof -http :6060 <profile>\n```\n\nSome endpoints (e.g. Rust-based ones) can also generate flamegraph SVGs directly:\n\n```\n$ curl localhost:9898/profile/cpu?format=svg >profile.svg\n$ open profile.svg\n```\n\nContinuous profiles are available in Grafana under Explore → Profiles → Explore Profiles\n(currently only in [staging](https://neonstaging.grafana.net/a/grafana-pyroscope-app/profiles-explorer)).\n\n## API Requirements\n\n* HTTP endpoints that return a profile in pprof format (with symbols).\n  * CPU: records a profile over the request time interval (`seconds` query parameter).\n  * Memory: returns the current in-use heap allocations.\n* Unauthenticated, as it should not expose user data or pose a denial-of-service risk.\n* Default sample frequency should not impact service (maximum 5% CPU overhead).\n* Linux-compatibility.\n\nNice to have:\n\n* Return flamegraph SVG directly from the HTTP endpoint if requested.\n* Configurable sample frequency for CPU profiles.\n* Historical heap allocations, by count and bytes.\n* macOS-compatiblity.\n\n## Rust Profiling\n\n[`libs/utils/src/http/endpoint.rs`](https://github.com/neondatabase/neon/blob/8327f68043e692c77f70d6a6dafa463636c01578/libs/utils/src/http/endpoint.rs)\ncontains ready-to-use HTTP endpoints for CPU and memory profiling:\n[`profile_cpu_handler`](https://github.com/neondatabase/neon/blob/8327f68043e692c77f70d6a6dafa463636c01578/libs/utils/src/http/endpoint.rs#L338) and [`profile_heap_handler`](https://github.com/neondatabase/neon/blob/8327f68043e692c77f70d6a6dafa463636c01578/libs/utils/src/http/endpoint.rs#L416).\n\n### CPU\n\nCPU profiles are provided by [pprof-rs](https://github.com/tikv/pprof-rs) via\n[`profile_cpu_handler`](https://github.com/neondatabase/neon/blob/8327f68043e692c77f70d6a6dafa463636c01578/libs/utils/src/http/endpoint.rs#L338).\nExpose it unauthenticated at `/profile/cpu`.\n\nParameters:\n\n* `format`: profile output format (`pprof` or `svg`; default `pprof`).\n* `seconds`: duration to collect profile over, in seconds (default `5`).\n* `frequency`: how often to sample thread stacks, in Hz (default `99`).\n* `force`: if `true`, cancel a running profile and start a new one (default `false`).\n\nWorks on Linux and macOS.\n\n### Memory\n\nUse the jemalloc allocator via [`tikv-jemallocator`](https://github.com/tikv/jemallocator),\nand enable profiling with samples every 2 MB allocated:\n\n```rust\n#[global_allocator]\nstatic GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;\n\n#[allow(non_upper_case_globals)]\n#[export_name = \"malloc_conf\"]\npub static malloc_conf: &[u8] = b\"prof:true,prof_active:true,lg_prof_sample:21\\0\";\n```\n\npprof profiles are generated by\n[`jemalloc-pprof`](https://github.com/polarsignals/rust-jemalloc-pprof) via\n[`profile_heap_handler`](https://github.com/neondatabase/neon/blob/8327f68043e692c77f70d6a6dafa463636c01578/libs/utils/src/http/endpoint.rs#L416).\nExpose it unauthenticated at `/profile/heap`.\n\nParameters:\n\n* `format`: profile output format (`pprof`, `svg`, or `jemalloc`; default `pprof`).\n\nWorks on Linux only, due to [jemalloc limitations](https://github.com/jemalloc/jemalloc/issues/26).\n\n## Go Profiling\n\nThe Go standard library includes pprof profiling via HTTP API in\n[`net/http/pprof`](https://pkg.go.dev/net/http/pprof). Expose it unauthenticated at\n`/debug/pprof`.\n\nWorks on Linux and macOS.\n\n### CPU \n\nVia `/debug/pprof/profile`. Parameters:\n\n* `debug`: profile output format (`0` is pprof, `1` or above is plaintext; default `0`).\n* `seconds`: duration to collect profile over, in seconds (default `30`).\n\nDoes not support a frequency parameter (see [#57488](https://github.com/golang/go/issues/57488)),\nand defaults to 100 Hz. A lower frequency can be hardcoded via `SetCPUProfileRate`, but the default\nis likely ok (estimated 1% overhead).\n\n### Memory\n\nVia `/debug/pprof/heap`. Parameters:\n\n* `seconds`: take a delta profile over the given duration, in seconds (default `0`).\n* `gc`: if `1`, garbage collect before taking profile.\n\n## C Profiling\n\n[gperftools](https://github.com/gperftools/gperftools) provides in-process CPU and heap profiling\nwith pprof output.\n\nHowever, continuous profiling of PostgreSQL is expensive (many computes), and has limited value\nsince we don't own the internals anyway.\n\nAd hoc profiling might still be useful, but the compute team considers existing tooling sufficient,\nso this is not a priority at the moment.\n\n## Grafana Continuous Profiling\n\n[Grafana Alloy](https://grafana.com/docs/alloy/latest/) continually scrapes CPU and memory profiles\nacross the fleet, and archives them as time series. This can be used to analyze resource usage over\ntime, either in aggregate or zoomed in to specific events and nodes.\n\nProfiles are retained for 30 days. Profile ingestion volume for CPU+heap at 60-second intervals\nis about 0.5 GB/node/day, or about $0.25/node/day = $7.5/node/month ($0.50/GB).\n\nIt is currently enabled in [staging](https://neonstaging.grafana.net/a/grafana-pyroscope-app/profiles-explorer)\nfor Pageserver and Safekeeper.\n\n### Scraping\n\n* CPU profiling: 59 seconds at 19 Hz every 60 seconds.\n* Heap profiling: heap snapshot with 2 MB frequency every 60 seconds.\n\nThere are two main approaches that can be taken for CPU profiles:\n\n* Continuous low-frequency profiles (e.g. 19 Hz for 60 seconds every 60 seconds).\n* Occasional high-frequency profiles (e.g. 99 Hz for 5 seconds every 60 seconds).\n\nWe choose continuous low-frequency profiles where possible. This has a fixed low overhead, instead\nof a spiky high overhead. It likely also gives a more representative view of resource usage.\nHowever, a 19 Hz rate gives a minimum resolution of 52.6 ms per sample, which may be larger than the\nactual runtime of small functions. Note that Go does not support a frequency parameter, so we must\nuse a fixed frequency for all profiles via `SetCPUProfileRate()` (default 100 Hz).\n\nOnly one CPU profile can be taken at a time. With continuous profiling, one will always be running.\nTo allow also taking an ad hoc CPU profile, the Rust endpoint supports a `force` query parameter to\ncancel a running profile and start a new one.\n\n### Overhead\n\nWith Rust:\n\n* CPU profiles at 19 Hz frequency: 0.1% overhead.\n* Heap profiles at 2 MB frequency: 3% allocation overhead.\n* Profile call/encoding/symbolization: 20 ms every 60 seconds, or 0.03% of 1 CPU (for Pageserver).\n* Profile symbolization caches: 125 MB memory, or 0.4% of 32 GB (for Pageserver).\n\nBenchmarks with pprof-rs showed that the CPU time for taking a stack trace of a 40-frame stack was\n11 µs using the `frame-pointer` feature, and 1.4 µs using `libunwind` with DWARF. `libunwind` saw\nfrequent seg faults, so we use `frame-pointer` and build binaries with frame pointers (negligible\noverhead).\n\nCPU profiles work by installing an `ITIMER_PROF` for the process, which triggers a `SIGPROF` signal\nafter a given amount of cumulative CPU time across all CPUs. The signal handler will run for one\nof the currently executing threads and take a stack trace. Thus, a 19 Hz profile will take 1 stack\ntrace every 52.6 ms CPU time -- assuming 11 µs for a stack trace, this is 0.02% overhead, but\nlikely 0.1% in practice (given e.g. context switches).\n\nHeap profiles work by probabilistically taking a stack trace on allocations, adjusted for the\nallocation size. A 1 MB allocation takes about 15 µs in benchmarks, and a stack trace about 1 µs,\nso we can estimate that a 2 MB sampling frequency has about 3% allocation overhead -- this is \nconsistent with benchmarks. This is significantly larger than CPU profiles, but mitigated by the\nfact that performance-sensitive code will avoid allocations as far as possible.\n\nProfile symbolization uses in-memory caches for symbol lookups. These take about 125 MB for\nPageserver.\n\n## Alternatives Considered\n\n* eBPF profiles.\n  * Don't require instrumenting the binary.\n  * Use less resources.\n  * Can profile in kernel space too.\n  * Supported by Grafana.\n  * Less information about stack frames and spans.\n  * Limited tooling for local analysis.\n  * Does not support heap profiles.\n  * Does not work on macOS.\n\n* [Polar Signals](https://www.polarsignals.com) instead of Grafana.\n  * We already use Grafana for everything else. Appears good enough.\n"
  },
  {
    "path": "docs/rfcs/041-rel-sparse-keyspace.md",
    "content": "# Sparse Keyspace for Relation Directories\n\n## Summary\n\nThis is an RFC describing a new storage strategy for storing relation directories.\n\n## Motivation\n\nPostgres maintains a directory structure for databases and relations. In Neon, we store these information\nby serializing the directory data in a single key (see `pgdatadir_mapping.rs`).\n\n```rust\n// DbDir:\n// 00 00000000 00000000 00000000 00   00000000\n\n// RelDir:\n// 00 SPCNODE  DBNODE   00000000 00   00000001 (Postgres never uses relfilenode 0)\n```\n\nWe have a dedicated structure on the ingestion path to serialize the relation directory into this single key.\n\n```rust\n#[derive(Debug, Serialize, Deserialize, Default)]\npub(crate) struct RelDirectory {\n    // Set of relations that exist. (relfilenode, forknum)\n    //\n    // TODO: Store it as a btree or radix tree or something else that spans multiple\n    // key-value pairs, if you have a lot of relations\n    pub(crate) rels: HashSet<(Oid, u8)>,\n}\n```\n\nThe current codebase has the following three access patterns for the relation directory.\n\n1. Check if a relation exists.\n2. List all relations.\n3. Create/drop a relation.\n\nFor (1), we currently have to get the reldir key, deserialize it, and check whether the relation exists in the\nhash set. For (2), we get the reldir key and the hash set. For (3), we need first to get\nand deserialize the key, add the new relation record to the hash set, and then serialize it and write it back.\n\nIf we have 100k relations in a database, we would have a 100k-large hash set. Then, every\nrelation created and dropped would have deserialized and serialized this 100k-large hash set. This makes the\nrelation create/drop process to be quadratic. When we check if a relation exists in the ingestion path,\nwe would have to deserialize this super big 100k-large key before checking if a single relation exists.\n\nIn this RFC, we will propose a new way to store the reldir data in the sparse keyspace and propose how\nto seamlessly migrate users to use the new keyspace.\n\nThe PoC patch is implemented in [PR10316](https://github.com/neondatabase/neon/pull/10316).\n\n## Key Mapping\n\nWe will use the recently introduced sparse keyspace to store actual data. Sparse keyspace was proposed in\n[038-aux-file-v2.md](038-aux-file-v2.md). The original reldir has one single value of `HashSet<(Oid, u8)>`\nfor each of the databases (identified as `spcnode, dbnode`). We encode the `Oid` (`relnode, forknum`),\ninto the key.\n\n```plain\n(REL_DIR_KEY_PREFIX, spcnode, dbnode, relnode, forknum, 1) -> deleted\n(REL_DIR_KEY_PREFIX, spcnode, dbnode, relnode, forknum, 1) -> exists\n```\n\nAssume all reldir data are stored in this new keyspace; the 3 reldir operations we mentioned before can be\nimplemented as follows.\n\n1. Check if a relation exists: check if the key maps to \"exists\".\n2. List all relations: scan the sprase keyspace over the `rel_dir_key_prefix`. Extract relnode and forknum from the key.\n3. Create/drop a relation: write \"exists\" or \"deleted\" to the corresponding key of the relation. The delete tombstone will\n   be removed during image layer generation upon compaction.\n\nNote that \"exists\" and \"deleted\" will be encoded as a single byte as two variants of an enum.\nThe mapping is implemented as `rel_tag_sparse_key` in the PoC patch.\n\n## Changes to Sparse Keyspace\n\nPreviously, we only used sparse keyspaces for the aux files, which did not carry over when branching. The reldir\ninformation needs to be preserved from the parent branch to the child branch. Therefore, the read path needs\nto be updated accordingly to accommodate such \"inherited sparse keys\". This is done in\n[PR#10313](https://github.com/neondatabase/neon/pull/10313).\n\n## Coexistence of the Old and New Keyspaces\n\nMigrating to the new keyspace will be done gradually: when we flip a config item to enable the new reldir keyspace, the\ningestion path will start to write to the new keyspace and the old reldir data will be kept in the old one. The read\npath needs to combine the data from both keyspaces.\n\nTheoretically, we could do a rewrite at the startup time that scans all relation directories and copies that data into the\nnew keyspace. However, this could take a long time, especially if we have thousands of tenants doing the migration\nprocess simultaneously after the pageserver restarts. Therefore, we propose the coexistence strategy so that the\nmigration can happen seamlessly and imposes no potential downtime for the user.\n\nWith the coexistence assumption, the 3 reldir operations will be implemented as follows:\n\n1. Check if a relation exists\n   - Check the new keyspace if the key maps to any value. If it maps to \"exists\" or \"deleted\", directly\n    return it to the user.\n   - Otherwise, deserialize the old reldir key and get the result.\n2. List all relations: scan the sparse keyspace over the `rel_dir_key_prefix` and deserialize the old reldir key.\n   Combine them to obtain the final result.\n3. Create/drop a relation: write \"exists\" or \"deleted\" to the corresponding key of the relation into the new keyspace.\n   - We assume no overwrite of relations will happen (i.e., the user won't create a relation at the same Oid). This will be implemented as a runtime check.\n   - For relation creation, we add `sparse_reldir_tableX -> exists` to the keyspace.\n   - For relation drop, we first check if the relation is recorded in the old keyspace. If yes, we deserialize the old reldir key,\n    remove the relation, and then write it back. Otherwise, we put `sparse_reldir_tableX -> deleted` to the keyspace.\n   - The delete tombstone will be removed during image layer generation upon compaction.\n\nThis process ensures that the transition will not introduce any downtime and all new updates are written to the new keyspace. The total\namount of data in the storage would be `O(relations_modifications)` and we can guarantee `O(current_relations)` after compaction.\nThere could be some relations that exist in the old reldir key for a long time. Refer to the \"Full Migration\" section on how to deal\nwith them. Plus, for relation modifications, it will have `O(old_relations)` complexity until we do the full migration, which gives\nus `O(1)` complexity after fully opt-in the sparse keyspace.\n\nThe process also implies that a relation will only exists either in the old reldir key or in the new sparse keyspace. It is not possible\nto have a table to be recorded in the old reldir key while later having a delete tombstone for it in the sparse keyspace at any LSN.\n\nWe will introduce a config item and an index_part record to record the current status of the migration process.\n\n- Config item `enable_reldir_v2`: controls whether the ingestion path writes the reldir info into the new keyspace.\n- `index_part.json` field `reldir_v2_status`: whether the timeline has written any key into the new reldir keyspace.\n\nIf `enable_reldir_v2` is set to `true` and the timeline ingests the first key into the new reldir keyspace, it will update\n`index_part.json` to set `reldir_v2_status` to `Status::Migrating`. Even if `enable_reldir_v2` gets flipped back to\n`false` (i.e., when the pageserver restarts and such config isn't persisted), the read/write path will still\nread/write to the new keyspace to avoid data inconsistency. This also indicates that the migration is one-way only:\nonce v2 is enabled, the user cannot go back to v1.\n\n## Next Steps\n\n### Full Migration\n\nThis won't be implemented in the project's first phase but might be implemented in the future. Having both v1 and\nv2 existing in the system would force us to keep the code to deserialize the old reldir key forever. To entirely deprecate this\ncode path, we must ensure the timeline has no old reldir data.\n\nWe can trigger a special image layer generation process at the gc-horizon. The generated image layers will cover several keyspaces:\nthe old reldir key in each of the databases, and the new reldir sparse keyspace. It will remove the old reldir key while\ncopying them into the corresponding keys in the sparse keyspace in the resulting image. This special process happens in\nthe background during compaction. For example, assume this special process is triggered at LSN 0/180. The `create_image_layers`\nprocess discovers the following keys at this LSN.\n\n```plain\ndb1/reldir_key -> (table 1, table 2, table 3)\n...db1 rel keys\ndb2/reldir_key -> (table 4, table 5, table 6)\n...db2 rel keys\nsparse_reldir_db2_table7 -> exists\nsparse_reldir_db1_table8 -> deleted\n```\n\nIt will generate the following keys:\n\n```plain\ndb1/reldir_key -> () # we have to keep the key because it is part of `collect_keyspace`.\n...db1 rel keys\ndb2/reldir_key -> ()\n...db2 rel keys\n\n-- start image layer for the sparse keyspace at sparse_reldir_prefix at LSN 0/180\nsparse_reldir_db1_table1 -> exists\nsparse_reldir_db1_table2 -> exists\nsparse_reldir_db1_table3 -> exists\nsparse_reldir_db2_table4 -> exists\nsparse_reldir_db2_table5 -> exists\nsparse_reldir_db2_table6 -> exists\nsparse_reldir_db2_table7 -> exists\n-- end image layer for the sparse keyspace at sparse_reldir_prefix+1\n\n# The `sparse_reldir_db1_table8` key gets dropped as part of the image layer generation code for the sparse keyspace.\n# Note that the read path will stop reading if a key is not found in the image layer covering the key range so there\n# are no correctness issue.\n```\n\nWe must verify that no pending modifications to the old reldir exists in the delta/image layers above the gc-horizon before\nwe start this process (We can do a vectored read to get the full key history of the old reldir key and ensure there are no more images\nabove the gc-horizon). Otherwise, it will violate the property that \"a relation will only exists either in the old reldir key or\nin the new sparse keyspace\". After we run this migration process, we can mark `reldir_v2_status` in the `index_part.json` to\n`Status::Migrated`, and the read path won't need to read from the old reldir anymore. Once the status is set to `Migrated`, we\ndon't need to add the key into `collect_keyspace` and therefore all of them will be removed from all future image layers.\n\nThe migration process can be proactively triggered across all attached/detached tenants to help us fully remove the old reldir code.\n\n### Consolidate Relation Size Keys\n\nWe have relsize at the end of all relation nodes.\n\n```plain\n// RelSize:\n// 00 SPCNODE  DBNODE   RELNODE  FORK FFFFFFFF\n```\n\nThis means that computing logical size requires us to do several single-key gets across the keyspace,\npotentially requiring downloading many layer files. We could consolidate them into a single\nkeyspace, improving logical size calculation performance.\n\n### Migrate DBDir Keys\n\nWe assume the number of databases created by the users will be small, and therefore, the current way\nof storing the database directory would be acceptable. In the future, we could also migrate DBDir keys into\nthe sparse keyspace to support large amount of databases.\n"
  },
  {
    "path": "docs/rfcs/041-sharded-ingest.md",
    "content": "# \nCreated on Aug 2024\nImplemented on Jan 2025\n\n## Summary\n\nData in large tenants is split up between multiple pageservers according to key hashes, as\nintroduced in the [sharding RFC](031-sharding-static.md) and [shard splitting RFC](032-shard-splitting.md).\n\nWhereas currently we send all WAL to all pageserver shards, and each shard filters out the data that it needs,\nin this RFC we add a mechanism to filter the WAL on the safekeeper, so that each shard receives\nonly the data it needs.\n\nThis will place some extra CPU load on the safekeepers, in exchange for reducing the network bandwidth\nfor ingesting WAL back to scaling as O(1) with shard count, rather than O(N_shards).\n\n## Motivation\n\n1. Large databases require higher shard counts.  Whereas currently we run with up to 8 shards for tenants\nwith a few TB of storage, the next order of magnitude capacity increase will require tens of shards, such\nthat sending all WAL to all shards is impractical in terms of bandwidth.\n2. For contemporary database sizes (~2TB), the pageserver is the bottleneck for ingest: since each\n   shard has to decode and process the whole WAL, sharding doesn't fully relieve this bottleneck.  To achieve significantly higher ingest speeds, we need to filter the WAL earlier so that each pageserver\n   only has to process relevant parts.\n\n## Non Goals (if relevant)\n\nWe do not seek to introduce multiple WALs per timeline, or to share the work of handling a timeline's\nWAL across safekeepers (beyond simple 3x replication).  This RFC may be thought of as an incremental\nmove of the ingestion bottleneck up the stack: instead of high write rates bottlenecking on the\npageserver, they will bottleneck on the safekeeper.\n\n## Impacted components (e.g. pageserver, safekeeper, console, etc)\n\nSafekeeper, pageserver.\n\nThere will be no control plane or storage controller coordination needed, as pageservers will directly\nindicate their sharding parameters to the safekeeper when subscribing for WAL.\n\n## Proposed implementation\n\nTerminology:\n- \"Data pages\" refers to postgres relation blocks, and SLRU blocks.\n- \"Metadata pages\" refers to everything else the pageserver stores, such as relation sizes and\n  directories of relations.\n\n### Phase 1: Refactor ingest\n\nCurrently, pageserver ingest code is structured approximately as follows:\n1. `handle_walreceiver_connection` reads a stream of binary WAL records off a network\n   socket\n2. `WalIngest::ingest_record` to translate the record into a series of page-level modifications\n3. `DatadirModification` accumulates page updates from several `ingest_record` calls, and when\n   its `commit()` method is called, flushes these into a Timeline's open `InMemoryLayer`.\n\nThis process currently assumes access to a pageserver `Timeline` throughout `ingest_record` and\nfrom `DatadirModification`, which is used to do read-modify-write cycles on metadata pages\nsuch as relation sizes and the master DBDIR page.  It also assumes that records are ingested\nstrictly one after the other: they cannot be ingested in parallel because each record assumes\nthat earlier records' changes have already been applied to `Timeline`.\n\nThis code will be refactored to disentangle the simple, fast decode of relation page writes\nfrom the more complex logic for updating internal metadata.  An intermediate representation\ncalled `InterpretedWalRecords` will be introduced.  This is similar to the internal state of\na `DatadirModification`, but does not require access to a Timeline.  Instead of storing\nmetadata updates as materialized writes to pages, it will accumulate these as abstract operations,\nfor example rather than including a write to a relation size key, this structure will include\nan operation that indicates \"Update relation _foo_'s size to the max of its current value and\n_bar_\", such that these may be applied later to a real Timeline.\n\nThe `DatadirModification` will be aware of the `EphemeralFile` format, so that as it accumulates\nsimple page writes of relation blocks, it can write them directly into a buffer in the serialized\nformat.  This will avoid the need to later deserialize/reserialize this data when passing the\nstructure between safekeeper and pageserver.\n\nThe new pipeline will be:\n1. `handle_walreceiver_connection` reads a stream of binary WAL records off a network\n2. A `InterpretedWalRecords` is generated from the incoming WAL records.  This does not\n   require a reference to a Timeline.\n3. The logic that is current spread between `WalIngest` and `DatadirModification` for updating\n   metadata will be refactored to consume the metadata operations from the `InterpretedWalRecords`\n   and turn them into literal writes to metadata pages.  This part must be done sequentially.\n4. The resulting buffer of metadata page writes is combined with the buffer of relation block\n   writes, and written into the `InMemoryLayer`.\n\nImplemented in:\n1. https://github.com/neondatabase/neon/pull/9472\n2. https://github.com/neondatabase/neon/pull/9504\n3. https://github.com/neondatabase/neon/pull/9524\n\n### Phase 2: Decode & filter on safekeeper\n\nIn the previous phase, the ingest code was modified to be able to do most of its work without access to\na Timeline: this first stage of ingest simply converts a series of binary wal records into\na buffer of relation/SLRU page writes, and a buffer of abstract metadata writes.\n\nThe modified ingest code may be transplanted from pageserver to safekeeper (probably via a\nshared crate).  The safekeeper->pageserver network protocol is modified to:\n - in subscription requests, send the `ShardIdentity` from the pageserver to the safekeeper\n - in responses, transmit a `InterpretedWalRecords` instead of a raw `WalRecord`.\n - use the `ShardIdentity` to filter the `ProcessedWalIngest` to relevant content for\n   the subscribing shard before transmitting it.\n\nThe overall behavior of the pageserver->safekeeper interaction remains the same, in terms of\nconsistent LSN feedback, and connection management.  Only the payload of the subscriptions\nchanges, to express an LSN range of WAL as a filtered `ProcessedWalIngest` instead of the\nraw data.\n\nThe ingest code on the pageserver can now skip the part where it does the first phase of\nprocessing, as it will receive pre-processed, compressed data off the wire.\n\nNote that `InterpretedWalRecord` batches multiple `InterpretedWalRecord(s)` in the same network\nmessage. Safekeeper reads WAL in chunks of 16 blocks and then decodes as many Postgres WAL records\nas possible. Each Postgres WAL record maps to one `InterpretedWalRecord` for potentially multiple shards.\nHence, the size of the batch is given by the number of Postgres WAL records that fit in 16 blocks.\n\nThe protocol needs to support evolution. Protobuf was chosen here with the view that, in the future,\nwe may migrate it to GRPC altogether\n\nImplemented in:\n1. https://github.com/neondatabase/neon/pull/9746\n2. https://github.com/neondatabase/neon/pull/9821\n\n### Phase 3: Fan out interpreted WAL\n\nIn the previous phase, the initial processing of WAL was moved to the safekeeper, but it is still\ndone once for each shard: this will generate O(N_shards) CPU work on the safekeeper (especially\nwhen considering converting to Protobuf format and compression).\n\nTo avoid this, we fan-out WAL from one (tenant, timeline, shard) to all other shards subscribed on\nthe same safekeeper. Under normal operation, the WAL will be read from disk, decoded and interpreted\n_only_ once per (safekeeper, timeline).\n\nWhen the first shard of a sharded timeline subscribes to a given safekeeper a task is spawned\nfor the WAL reader (`InterpretedWalReader`). This task reads WAL, decodes, interprets it and sends\nit to the sender (`InterpretedWalSender`). The sender is a future that is polled from the connection\ntask. When further shards subscribe on the safekeeper they will attach themselves to the existing WAL reader.\nThere's two cases to consider:\n1. The shard's requested `start_lsn` is ahead of the current position of the WAL reader. In this case, the shard\nwill start receiving data when the reader reaches that LSN. The intuition here is that there's little to gain\nby letting shards \"front-run\" since compute backpressure is based on the laggard LSN.\n2. The shard's requested `start_lsn` is below the current position of the WAL reader. In this case, the WAL reader\ngets reset to this requested position (same intuition). Special care is taken such that advanced shards do not receive\ninterpreted WAL records below their current position.\n\nThe approach above implies that there is at most one WAL reader per (tenant, timeline) on a given safekeeper at any point in time.\nIf this turns out to be operationally problematic, there's a trick we can deploy: `--max-delta-for-fanout` is an optional safekeeper\nargument that controls the max absolute delta between a new shard and the current WAL position of the WAL reader. If the absolute\ndelta is above that value, a new reader is spawned. Note that there's currently no concurrency control on the number of WAL readers,\nso it's recommended to use large values to avoid pushing CPU utilisation too high.\n\nUnsharded tenants do not spawn a separate task for the interpreted WAL reader since there's no benefit to it. Instead they poll\nthe reader and sender concurrently from the connection task.\n\nShard splits are interesting here because it is the only case when the same shard might have two subscriptions at the same time.\nThis is handled by giving readers a unique identifier. Both shards will receive the same data while respecting their requested start\nposition.\n\nImplemented in:\n1. https://github.com/neondatabase/neon/pull/10190\n\n## Deployment\n\nEach phase shall be deployed independently. Special care should be taken around protocol changes.\n\n## Observability Tips\n\n* The safekeeper logs the protocol requested by the pageserver\nalong with the pageserver ID, tenant, timeline and shard: `starting streaming from`.\n* There's metrics for the number of wal readers:\n  * `safekeeper_wal_readers{kind=\"task\", target=~\"pageserver.*\"}` gives the number of wal reader tasks for each SK\n  * `safekeeper_wal_readers{kind=\"future\", target=~\"pageserver.*\"}` gives the numer of wal readers polled inline by each SK\n  * `safekeeper_interpreted_wal_reader_tasks` gives the number of wal reader tasks per tenant, timeline\n* Interesting log lines for the fan-out reader:\n  * `Spawning interpreted`: first shard creates the interpreted wal reader\n  * `Fanning out`: a subsequent shard attaches itself to an interpreted wal reader\n  * `Aborting interpreted`: all senders have finished and the reader task is being aborted\n\n## Future Optimizations\n\nThis sections describes some improvement areas which may be revisited in the future.\n\n### Buffering of Interpreted WAL\n\nThe interpreted WAL reader may buffer interpreted WAL records in user space to help with serving\nsubscriptions that are lagging behind the current position of the reader.\n\nCounterpoints:\n* Safekeepers serve many thousands of timelines and allocating a buffer for each might be wasteful,\nespecially given that it would go unused on the happy path.\n* WAL is buffered in the kernel page cache. Usually we'd only pay the CPU cost of decoding and interpreting.\n\n### Tweaking the Pagserver Safekeeper Selection Algorithm\n\nWe could make the pageserver aware of which safekeeper's already host shards for the timeline along\nwith their current WAL positions. The pageserver should then prefer safkeepers that are in the same\nAZ _and_ already have a shard with a position close to the desired start position.\n\nWe currently run one safekeeper per AZ, so the point is mute until that changes.\n\n### Pipelining first ingest phase\n\nThe first ingest phase is a stateless transformation of a binary WAL record into a pre-processed\noutput per shard.  To put multiple CPUs to work, we may pipeline this processing up to some defined buffer\ndepth.\n\n## Alternatives considered\n\n### Give safekeepers enough state to fully decode WAL\n\nIn this RFC, we only do the first phase of ingest on the safekeeper, because this is\nthe phase that is stateless.  Subsequent changes then happen on the pageserver, with\naccess to the `Timeline` state.\n\nWe could do more work on the safekeeper if we transmitted metadata state to the safekeeper\nwhen subscribing to the WAL: for example, by telling the safekeeper all the relation sizes,\nso that it could then generate all the metadata writes for relation sizes.\n\nWe avoid doing this for several reasons:\n1. Complexity: it's a more invasive protocol change\n2. Decoupling: having the safekeeper understand the `ProcessedWalIngest` already somewhat\n   infects it with knowledge of the pageserver, but this is mainly an abstract structure\n   that describes postgres writes.  However, if we taught the safekeeper about the exact\n   way that pageserver deals with metadata keys, this would be a much tighter coupling.\n3. Load: once the WAL has been processed to the point that it can be split between shards,\n   it is preferable to share out work on the remaining shards rather than adding extra CPU\n   load to the safekeeper.\n\n### Do pre-processing on the compute instead of the safekeeper\n\nSince our first stage of ingest is stateless, it could be done at any stage in the pipeline,\nall the way up to the compute.\n\nWe choose not to do this, because it is useful for the safekeeper to store the raw WAL rather\nthan just the preprocessed WAL:\n- The safekeeper still needs to be able to serve raw WAL back to postgres for e.g. physical replication\n- It simplifies our paxos implementation to have the offset in the write log be literally\n  the same as the LSN\n- Raw WAL must have a stable protocol since we might have to re-ingest it at arbitrary points in the future.\n  Storing raw WAL give us more flexibility to evolve the pageserver, safekeeper protocol.\n\n### Do wal pre-processing on shard 0 or a separate service, send it to other shards from there\n\nIf we wanted to keep the safekeepers as entirely pure stores of raw WAL bytes, then\nwe could do the initial decode and shard-splitting in some other location:\n- Shard 0 could subscribe to the full WAL and then send writes to other shards\n- A new intermediate service between the safekeeper and pageserver could do the splitting.\n\nSo why not?\n- Extra network hop from shard 0 to the final destination shard\n- Clearly there is more infrastructure involved here compared with doing it inline on the safekeeper.\n- Safekeepers already have very light CPU load: typical cloud instances shapes with appropriate\n  disks for the safekeepers effectively have \"free\" CPU resources.\n- Doing extra work on shard 0 would complicate scheduling of shards on pageservers, because\n  shard 0 would have significantly higher CPU load under write workloads than other shards.\n"
  },
  {
    "path": "docs/rfcs/043-bottom-most-gc-compaction.md",
    "content": "# Bottommost Garbage-Collection Compaction\n\n## Summary\n\nThe goal of this doc is to propose a way to reliably collect garbages below the GC horizon. This process is called bottom-most garbage-collect-compaction, and is part of the broader legacy-enhanced compaction that we plan to implement in the future.\n\n## Motivation\n\nThe current GC algorithm will wait until the covering via image layers before collecting the garbages of a key region. Relying on image layer generation to generate covering images is not reliable. There are prior arts to generate feedbacks from the GC algorithm to the image generation process to accelerate garbage collection, but it slows down the system and creates write amplification.\n\n# Basic Idea\n\n![](images/036-bottom-most-gc-compaction/01-basic-idea.svg)\n\nThe idea of bottom-most compaction is simple: we rewrite all layers that are below or intersect with the GC horizon to produce a flat level of image layers at the GC horizon and deltas above the GC horizon. In this process,\n\n- All images and deltas ≤ GC horizon LSN will be dropped. This process collects garbages.\n- We produce images for all keys involved in the compaction process at the GC horizon.\n\nTherefore, it can precisely collect all garbages below the horizon, and reduce the space amplification, i.e., in the staircase pattern (test_gc_feedback).\n\n![The staircase pattern in test_gc_feedback in the original compaction algorithm. The goal is to collect garbage below the red horizontal line.](images/036-bottom-most-gc-compaction/12-staircase-test-gc-feedback.png)\n\nThe staircase pattern in test_gc_feedback in the original compaction algorithm. The goal is to collect garbage below the red horizontal line.\n\n# Branches\n\nWith branches, the bottom-most compaction should retain a snapshot of the keyspace at the `retain_lsn` so that the child branch can access data at the branch point. This requires some modifications to the basic bottom-most compaction algorithm that we sketched above. \n\n![](images/036-bottom-most-gc-compaction/03-retain-lsn.svg)\n\n## Single Timeline w/ Snapshots: handle `retain_lsn`\n\nFirst let’s look into the case where we create branches over the main branch but don’t write any data to them (aka “snapshots”).\n\nThe bottom-most compaction algorithm collects all deltas and images of a key and can make decisions on what data to retain. Given that we have a single key’s history as below:\n\n```\nLSN 0x10 -> A\nLSN 0x20 -> append B\nretain_lsn: 0x20\nLSN 0x30 -> append C\nLSN 0x40 -> append D\nretain_lsn: 0x40\nLSN 0x50 -> append E\nGC horizon: 0x50\nLSN 0x60 -> append F\n```\n\nThe algorithm will produce:\n\n```\nLSN 0x20 -> AB\n(drop all history below the earliest retain_lsn)\nLSN 0x40 -> ABCD\n(assume the cost of replaying 2 deltas is higher than storing the full image, we generate an image here)\nLSN 0x50 -> append E\n(replay one delta is cheap)\nLSN 0x60 -> append F\n(keep everything as-is above the GC horizon)\n```\n\n![](images/036-bottom-most-gc-compaction/05-btmgc-parent.svg)\n\nWhat happens is that we balance the space taken by each retain_lsn and the cost of replaying deltas during the bottom-most compaction process. This is controlled by a threshold. If `count(deltas) < $threshold`, the deltas will be retained. Otherwise, an image will be generated and the deltas will be dropped.\n\nIn the example above, the `$threshold` is 2.\n\n## Child Branches with data: pull + partial images\n\nIn the previous section we have shown how bottom-most compaction respects `retain_lsn` so that all data that was readable at branch creation remains readable. But branches can have data on their own, and that data can fall out of the branch’s PITR window. So, this section explains how we deal with that.\n\nWe will run the same bottom-most compaction for these branches, to ensure the space amplification on the child branch is reasonable. \n\n```\nbranch_lsn: 0x20\nLSN 0x30 -> append P\nLSN 0x40 -> append Q\nLSN 0x50 -> append R\nGC horizon: 0x50\nLSN 0x60 -> append S\n```\n\nNote that bottom-most compaction happens on a per-timeline basis. When it processes this key, it only reads the history from LSN 0x30 without a base image. Therefore, on child branches, the bottom-most compaction process will make image creation decisions based on the same `count(deltas) < $threshold` criteria, and if it decides to create an image, the base image will be retrieved from the ancestor branch.\n\n```\nbranch_lsn: 0x20\nLSN 0x50 -> ABPQR\n(we pull the image at LSN 0x20 from the ancestor branch to get AB, and then apply append PQ to the page; we replace the record at 0x40 with an image and drop the delta)\nGC horizon: 0x50\nLSN 0x60 -> append S\n```\n\n![](images/036-bottom-most-gc-compaction/06-btmgc-child.svg)\n\nNote that for child branches, we do not create image layers for the images when bottom-most compaction runs. Instead, we drop the 0x30/0x40/0x50 delta records and directly place the image ABPQR@0x50 into the delta layer, which serves as a sparse image layer. For child branches, if we create image layers, we will need to put all keys in the range into the image layer. This causes space bloat and slow compactions. In this proposal, the compaction process will only compact and process keys modified inside the child branch.\n\n# Result\n\nBottom-most compaction ensures all garbage under the GC horizon gets collected right away (compared with “eventually” in the current algorithm). Meanwhile, it generates images at each of the retain_lsn to ensure branch reads are fast. As we make per-key decisions on whether to generate an image or not, the theoretical lower bound of the storage space we need to retain for a branch is lower than before.\n\nBefore: min(sum(logs for each key), sum(image for each key)), for each partition — we always generate image layers on a key range\n\nAfter: sum(min(logs for each key, image for each key))\n\n# Compaction Trigger\n\nThe bottom-most compaction can be automatically triggered. The goal of the trigger is that it should ensure a constant factor for write amplification. Say that the user write 1GB of WAL into the system, we should write 1GB x C data to S3. The legacy compaction algorithm does not have such a constant factor C. The data we write to S3 is quadratic to the logical size of the database (see [A Theoretical View of Neon Storage](https://www.notion.so/A-Theoretical-View-of-Neon-Storage-8d7ad7555b0c41b2a3597fa780911194?pvs=21)).\n\nWe propose the following compaction trigger that generates a constant write amplification factor. Write amplification >= total writes to S3 / total user writes. We only analyze the write amplification caused by the bottom-most GC-compaction process, ignoring the legacy create image layers amplification.\n\nGiven that we have ***X*** bytes of the delta layers above the GC horizon, ***A*** bytes of the delta layers intersecting with the GC horizon, ***B*** bytes of the delta layers below the GC horizon, and ***C*** bytes of the image layers below the GC horizon.\n\nThe legacy GC + compaction loop will always keep ***A*** unchanged, reduce ***B and C*** when there are image layers covering the key range. This yields 0 write amplification (only file deletions) and extra ***B*** bytes of space.\n\n![](images/036-bottom-most-gc-compaction/09-btmgc-analysis-2.svg)\n\nThe bottom-most compaction proposed here will split ***A*** into deltas above the GC horizon and below the GC horizon. Everything below the GC horizon will be image layers after the compaction (not considering branches). Therefore, this yields ***A+C*** extra write traffic each iteration, plus 0 extra space.\n\n![](images/036-bottom-most-gc-compaction/07-btmgc-analysis-1.svg)\n\nAlso considering read amplification (below the GC horizon). When a read request reaches the GC horizon, the read amplification will be (A+B+C)/C=1+(A+B)/C. Reducing ***A*** and ***B*** can help reduce the read amplification below the GC horizon.\n\nThe metrics-based trigger will wait until a point that space amplification is not that large and write amplification is not that large before the compaction gets triggered. The trigger is defined as **(A+B)/C ≥ 1 (or some other ratio)**.\n\nTo reason about this trigger, consider the two cases:\n\n**Data Ingestion**\n\nUser keeps ingesting data into the database, which indicates that WAL size roughly equals to the database logical size. The compaction gets triggered only when the newly-written WAL roughly equals to the current bottom-most image size (=X). Therefore, it’s triggered when the database size gets doubled. This is a reasonable amount of work. Write amplification is 2X/X=1 for the X amount of data written.\n\n![](images/036-bottom-most-gc-compaction/10-btmgc-analysis-3.svg)\n\n**Updates/Deletion**\n\nIn this case, WAL size will be larger than the database logical size ***D***. The compaction gets triggered for every ***D*** bytes of WAL written. Therefore, for every ***D*** bytes of WAL, we rewrite the bottom-most layer, which produces an extra ***D*** bytes of write amplification. This incurs exactly 2x write amplification (by the write of D), 1.5x write amplification (if we count from the start of the process) and no space amplification. \n\n![](images/036-bottom-most-gc-compaction/11-btmgc-analysis-4.svg)\n\nNote that here I try to reason that write amplification is a constant (i.e., the data we write to S3 is proportional to the data the user write). The main problem with the current legacy compaction algorithm is that write amplification is proportional to the database size.\n\nThe next step is to optimize the write amplification above the GC horizon (i.e., change the image creation criteria, top-most compaction, or introduce tiered compaction), to ensure the write amplification of the whole system is a constant factor.\n\n20GB layers → +20GB layers → delete 20GB, need 40GB temporary space\n\n# Sub-Compactions\n\nThe gc-compaction algorithm may take a long time and we need to split the job into multiple sub-compaction jobs.\n\n![](images/036-bottom-most-gc-compaction/13-job-split.svg)\n\nAs in the figure, the auto-trigger schedules a compaction job covering the full keyspace below a specific LSN. In such case that we cannot finish compacting it in one run in a reasonable amount of time, the algorithm will vertically split it into multiple jobs (in this case, 5).\n\nEach gc-compaction job will create one level of delta layers and one flat level of image layers for each LSN. Those layers will be automatically split based on size, which means that if the sub-compaction job produces 1GB of deltas, it will produce 4 * 256MB delta layers. For those layers that is not fully contained within the sub-compaction job rectangles, it will be rewritten to only contain the keys outside of the key range.\n\n# Implementation\n\nThe main implementation of gc-compaction is in `compaction.rs`.\n\n* `compact_with_gc`: The main loop of gc-compaction. It takes a rectangle range of the layer map and compact that specific range. It selects layers intersecting with the rectangle, downloads the layers, creates the k-merge iterator to read those layers in the key-lsn order, and decide which keys to keep or insert a reconstructed page. The process is the basic unit of a gc-compaction and is not interruptable. If the process gets preempted by L0 compaction, it has to be restarted from scratch. For layers overlaps with the rectangle but not fully inside, the main loop will also rewrite them so that the new layer (or two layers if both left and right ends are outside of the rectangle) has the same LSN range as the original one but only contain the keys outside of the compaction range.\n* `gc_compaction_split_jobs`: Splits a big gc-compaction job into sub-compactions based on heuristics in the layer map. The function looks at the layer map and splits the compaction job based on the size of the layers so that each compaction job only pulls ~4GB of layer files.\n* `generate_key_retention` and `KeyHistoryRetention`: Implements the algorithm described in the \"basic idea\" and \"branch\" chapter of this RFC. It takes a vector of history of a key (key-lsn-value) and decides which LSNs of the key to retain. If there are too many deltas between two retain_lsns, it will reconstruct the page and insert an image into the compaction result. Also, we implement `KeyHistoryRetention::verify` to ensure the generated result is not corrupted -- all retain_lsns and all LSNs above the gc-horizon should be accessible.\n* `GcCompactionQueue`: the automatic trigger implementation for gc-compaction. `GcCompactionQueue::iteration` is called at the end of the tenant compaction loop. It will then call `trigger_auto_compaction` to decide whether to trigger a gc-compaction job for this tenant. If yes, the compaction-job will be added to the compaction queue, and the queue will be slowly drained once there are no other compaction jobs running. gc-compaction has the lowest priority. If a sub-compaction job is not successful or gets preempted by L0 compaction (see limitations for reasons why a compaction job would fail), it will _not_ be retried.\n* Changes to `index_part.json`: we added a `last_completed_lsn` field to the index part for the auto-trigger to decide when to trigger a compaction.\n* Changes to the read path: when gc-compaction updates the layer map, all reads need to wait. See `gc_compaction_layer_update_lock` and comments in the code path for more information.\n\nGc-compaction can also be scheduled over the HTTP API. Example:\n\n```\ncurl 'localhost:9898/v1/tenant/:tenant_id/timeline/:timeline_id/compact?enhanced_gc_bottom_most_compaction=true&dry_run=true' -X PUT -H \"Content-Type: application/json\" -d '{\"scheduled\": true, \"compact_key_range\": { \"start\": \"000000067F0000A0000002A1CF0100000000\", \"end\": \"000000067F0000A0000002A1D70100000000\" } }'\n```\n\nThe `dry_run` mode can be specified in the query string so that the compaction will go through all layers to estimate how much space can be saved without writing the compaction result into the layer map.\n\nThe auto-trigger is controlled by tenant-level flag `gc_compaction_enabled`. If this is set to false, no gc-compaction will be automatically scheduled on this tenant (but manual trigger still works).\n\n# Next Steps\n\nThere are still some limitations of gc-compaction itself that needs to be resolved and tested,\n\n- gc-compaction is currently only automatically triggered on root branches. We have not tested gc-compaction on child branches in staging.\n- gc-compaction will skip aux key regions because of the possible conflict with the assumption of aux file tombstones.\n- gc-compaction does not consider keyspaces at retain_lsns and only look at keys in the layers. This also causes us giving up some sub-compaction jobs because a key might have part of its history available due to traditional GC removing part of the history.\n- We limit gc-compaction to run over shards <= 150GB to avoid gc-compaction taking too much time blocking other compaction jobs. The sub-compaction split algorithm needs to be improved to be able to split vertically and horizontally. Also, we need to move the download layer process out of the compaction loop so that we don't block other compaction jobs for too long.\n- The compaction trigger always schedules gc-compaction from the lowest LSN to the gc-horizon. Currently we do not schedule compaction jobs that only selects layers in the middle. Allowing this could potentially reduce the number of layers read/write throughout the process.\n- gc-compaction will give up if there are too many layers to rewrite or if there are not enough disk space for the compaction.\n- gc-compaction sometimes fails with \"no key produced during compaction\", which means that all existing keys within the compaction range can be collected; but we don't have a way to write this information back to the layer map -- we cannot generate an empty image layer.\n- We limit the maximum size of deltas for a single key to 512MB. If above this size, gc-compaction will give up. This can be resolved by changing `generate_key_retention` to be a stream instead of requiring to collect all the key history.\n\nIn the future,\n\n- Top-most compaction: ensure we always have an image coverage for the latest data (or near the latest data), so that reads will be fast at the latest LSN.\n- Tiered compaction on deltas: ensure read from any LSN is fast.\n- Per-timeline compaction → tenant-wide compaction?\n"
  },
  {
    "path": "docs/rfcs/044-feature-flag.md",
    "content": "# Storage Feature Flags\n\nIn this RFC, we will describe how we will implement per-tenant feature flags.\n\n## PostHog as Feature Flag Service\n\nBefore we start, let's talk about how current feature flag services work. PostHog is the feature flag service we are currently using across multiple user-facing components in the company. PostHog has two modes of operation: HTTP evaluation and server-side local evaluation.\n\nLet's assume we have a storage feature flag called gc-compaction and we want to roll it out to scale-tier users with resident size >= 10GB and <= 100GB.\n\n### Define User Profiles\n\nThe first step is to synchronize our user profiles to the PostHog service. We can simply assume that each tenant is a user in PostHog. Each user profile has some properties associated with it. In our case, it will be: plan type (free, scale, enterprise, etc); resident size (in bytes); primary pageserver (string); region (string).\n\n### Define Feature Flags\n\nWe would create a feature flag called gc-compaction in PostHog with 4 variants: disabled, stage-1, stage-2, fully-enabled. We will flip the feature flags from disabled to fully-enabled stage by stage for some percentage of our users.\n\n### Option 1: HTTP Evaluation Mode\n\nWhen using PostHog's HTTP evaluation mode, the client will make request to the PostHog service, asking for the value of a feature flag for a specific user.\n\n* Control plane will report the plan type to PostHog each time it attaches a tenant to the storcon or when the user upgrades/downgrades. It calls the PostHog profile API to associate tenant ID with the plan type. Assume we have X active tenants and such attach or plan change event happens each week, that would be 4X profile update requests per month.\n* Pageservers will report the resident size and the primary pageserver to the PostHog service. Assume we report resident size every 24 hours, that would be 30X requests per month.\n* Each tenant will request the state of the feature flag every 1 hour, that's 720X requests per month.\n* The Rust client would be easy to implement as we only need to call the `/decide` API on PostHog.\n\nUsing the HTTP evaluation mode we will issue 754X requests a month.\n\n### Option 2: Local Evaluation Mode\n\nWhen using PostHog's HTTP evaluation mode, the client (usually the server in a browser/server architecture) will poll the feature flag configuration every 30s (default in the Python client) from PostHog. Such configuration contains data like:\n\n<details>\n\n<summary>Example JSON response from the PostHog local evaluation API</summary>\n\n```\n[\n    {\n        \"id\": 1,\n        \"name\": \"Beta Feature\",\n        \"key\": \"person-flag\",\n        \"is_simple_flag\": True,\n        \"active\": True,\n        \"filters\": {\n            \"groups\": [\n                {\n                    \"properties\": [\n                        {\n                            \"key\": \"location\",\n                            \"operator\": \"exact\",\n                            \"value\": [\"Straße\"],\n                            \"type\": \"person\",\n                        }\n                    ],\n                    \"rollout_percentage\": 100,\n                },\n                {\n                    \"properties\": [\n                        {\n                            \"key\": \"star\",\n                            \"operator\": \"exact\",\n                            \"value\": [\"ſun\"],\n                            \"type\": \"person\",\n                        }\n                    ],\n                    \"rollout_percentage\": 100,\n                },\n            ],\n        },\n    }\n]\n```\n\n</details>\n\nNote that the API only contains information like \"under what condition => rollout percentage\". The user is responsible to provide the properties required to the client for local evaluation, and the PostHog service (web UI) cannot know if a feature is enabled for the tenant or not until the client uses the `capture` API to report the result back. To control the rollout percentage, the user ID gets mapped to a float number in `[0, 1)` on a consistent hash ring. All values <= the percentage will get the feature enabled or set to the desired value.\n\nTo use the local evaluation mode, the system needs:\n\n* Assume each pageserver will poll PostHog for the local evaluation JSON every 5 minutes (instead of the 30s default as it's too frequent). That's 8640Y per month, Y is the number of pageservers. Local evaluation requests cost 10x more than the normal decide request, so that's 86400Y request units to bill.\n* Storcon needs to store the plan type in the database and pass that information to the pageserver when attaching the tenant.\n* Storcon also needs to update PostHog with the active tenants, for example, when the tenant gets detached/attached. Assume each active tenant gets detached/attached every week, that would be 4X requests per month.\n* We do not need to update bill type or resident size to PostHog as all these are evaluated locally.\n* After each local evaluation of the feature flag, we need to call PostHog's capture event API to update the result of the evaluation that the feature is enabled. We can do this when the flag gets changed compared with the last cached state in memory. That would be at least 4X (assume we do deployment every week so the cache gets cleared) and maybe an additional multiplifier of 10 assume we have 10 active features.\n\nIn this case, we will issue 86400Y + 40X requests per month.\n\nAssume X = 1,000,000 and Y = 100,\n\n|   | HTTP Evaluation  | Local Evaluation  |\n|---|---|---|\n| Latency of propagating the conditions/properties for feature flag  | 24 hours  | available locally  |\n| Latency of applying the feature flag  | 1 hour  | 5 minutes  |\n| Can properties be reported from different services |  Yes |  No  |\n| Do we need to sync billing info etc to pageserver |  No |  Yes  |\n| Cost | 75400$ / month | 4864$ / month |\n\n# Our Solution\n\nWe will use PostHog _only_ as an UI to configure the feature flags. Whether a feature is enabled or not can only be queried through storcon/pageserver instead of using the PostHog UI. (We could report it back to PostHog via `capture_event` but it costs $$$.) This allows us to ramp up the feature flag functionality fast at first. At the same time, it would also give us the option to migrate to our own solution once we want to have more properties and more complex evaluation rules in our system.\n\n* We will create several fake users (tenants) in PostHog that contains all the properties we will use for evaluating a feature flag (i.e., resident size, billing type, pageserver id, etc.)\n* We will use PostHog's local evaluation API to poll the configuration of the feature flags and evaluate them locally on each of the pageserver.\n* The evaluation result will not be reported back to PostHog.\n* Storcon needs to pull some information from cplane database.\n* To know if a feature is currently enabled or not, we need to call the storcon/pageserver API; and we won't be able to know if a feature has been enabled on a tenant before easily: we need to look at the Grafana logs.\n\nWe only need to pay for the 86400Y local evaluation requests (that would be setting Y=0 in solution 2 => $864/month, and even less if we proxy it through storcon).\n\n## Implementation\n\n* Pageserver: implement a PostHog local evaluation client. The client will be shared across all tenants on the pageserver with a single API: `evaluate(tenant_id, feature_flag, properties) -> json`.\n* Storcon: if we need plan type as the evaluation condition, pull it from cplane database.\n* Storcon/Pageserver: implement an HTTP API `:tenant_id/feature/:feature` to retrieve the current feature flag status.\n* Storcon/Pageserver: a loop to update the feature flag spec on both storcon and pageserver. Pageserver loop will only be activated if storcon does not push the specs to the pageserver.\n\n## Difference from Tenant Config\n\n* Feature flags can be modified by percentage, and the default config for each feature flag can be modified in UI without going through the release process.\n* Feature flags are more flexible and won't be persisted anywhere and will be passed as plain JSON over the wire so that do not need to handle backward/forward compatibility as in tenant config.\n* The expectation of tenant config is that once we add a flag we cannot remove it (or it will be hard to remove), but feature flags are more flexible.\n\n# Final Implementation\n\n* We added a new crate `posthog_lite_client` that supports local feature evaluations.\n* We set up two projects \"Storage (staging)\" and \"Storage (production)\" in the PostHog console.\n* Each pageserver reports 10 fake tenants to PostHog so that we can get all combinations of regions (and other properties) in the PostHog UI.\n* Supported properties: AZ, neon_region, pageserver, tenant_id.\n* You may use \"Pageserver Feature Flags\" dashboard to see the evaluation status.\n* The feature flag spec is polled on storcon every 30s (in each of the region) and storcon will propagate the spec to the pageservers.\n* The pageserver housekeeping loop updates the tenant-specific properties (e.g., remote size) for evaluation.\n\nEach tenant has a `feature_resolver` object. After you add a feature flag in the PostHog console, you can retrieve it with:\n\n```rust\n// Boolean flag\nself\n    .feature_resolver\n    .evaluate_boolean(\"flag\")\n    .is_ok()\n// Multivariate flag\nself\n    .feature_resolver\n    .evaluate_multivariate(\"gc-comapction-strategy\")\n    .ok();\n```\n\nThe user needs to handle the case where the evaluation result is an error. This can occur in a variety of cases:\n\n* During the pageserver start, the feature flag spec has not been retrieved.\n* No condition group is matched.\n* The feature flag spec contains an operand/operation not supported by the lite PostHog library.\n\nFor boolean flags, the return value is `Result<(), Error>`. `Ok(())` means the flag is evaluated to true. Otherwise,\nthere is either an error in evaluation or it does not match any groups.\n\nFor multivariate flags, the return value is `Result<String, Error>`. `Ok(variant)` indicates the flag is evaluated\nto a variant. Otherwise, there is either an error in evaluation or it does not match any groups.\n\nThe evaluation logic is documented in the PostHog lite library. It compares the consistent hash of a flag key + tenant_id\nwith the rollout percentage and determines which tenant to roll out a specific feature.\n\nUsers can use the feature flag evaluation API to get the flag evaluation result of a specific tenant for debugging purposes.\n\n```\ncurl http://localhost:9898/v1/tenant/:tenant_id/feature_flag?flag=:key&as=multivariate/boolean\"\n```\n\nBy default, the storcon pushes the feature flag specs to the pageservers every 30 seconds, which means that a change in feature flag in the\nPostHog UI will propagate to the pageservers within 30 seconds.\n\n# Future Works\n\n* Support dynamic tenant properties like logical size as the evaluation condition.\n* Support properties like `plan_type` (needs cplane to pass it down).\n* Report feature flag evaluation result back to PostHog (if the cost is okay).\n* Fast feature flag evaluation cache on critical paths (e.g., cache a feature flag result in `AtomicBool` and use it on the read path)."
  },
  {
    "path": "docs/rfcs/2025-02-14-storage-controller.md",
    "content": "\n## Summary\n\nThis is a retrospective RFC to document the design of the `storage-controller` service.\n\nThis service manages the physical mapping of Tenants and Timelines to Pageservers and Safekeepers.  It\nacts as the API for \"storage\" as an abstract concept: enabling other parts of the system to reason\nabout things like creating/deleting tenants and timelines without having to understand exactly which\npageserver and safekeeper to communicate, or any subtle rules about how to orchestrate these things.\n\nThe storage controller was implemented in the first half of 2024 as an essential part\nof storage sharding, especially [shard splitting](032-shard-splitting.md).\n\nIt initially managed only pageservers, but has extended in 2025 to also manage safekeepers.  In\nsome places you may seen unqualified references to 'nodes' -- those are pageservers.\n\n## Design Choices\n\n### Durability\n\nWe rely on an external postgres for all durable state.  No local storage is used.\n\nWe avoid any unnecessary I/O to durable storage.  For example:\n- most tracking of in-flight changes to the system is done in-memory rather than recording progress/steps in a database\n- When migrating tenant shards between pageservers we only touch the database to increment generation numbers,\n  we do not persist the total state of a tenant shard.\n\nBeing frugal with database I/O has two benefits:\n- It avoids the database becoming a practical scaling bottleneck (we expect in-memory scale issues to be hit\n  before we hit e.g. transactions-per-second issues)\n- It reduces cost when using a cloud database service to run the controller's postgres database.\n\nThe trade-off is that there is a \"bootstrapping\" problem: a controller can't be deployed in isolation, one\nmust first have some existing database system.  In practice, we expect that Neon is deployed in one of the\nfollowing ways:\n- into a cloud which has a postgres service that can be used to run the controller\n- into a mature on-prem environment that has existing facilities for running databases\n- into a test/dev environment where a simple one-node vanilla postgres installation is sufficient\n\n### Consensus\n\nThe controller does _not_ implement any strong consensus mechanism of its own.  Instead:\n- Where strong consistency is required (for example, for pageserver generation numbers), this\n  responsibility is delegated to a transaction in our postgres database.\n- Highly available deploys are done using a simple in-database record of what controller instances\n  are available, distinguished by timestamps, rather than having controllers directly negotiate a leader.\n\nAvoiding strong consensus among controller processes is a cost saving (we avoid running three controllers\nall the time), and simplifies implementation (we do not have to phrase all configuration changes as e.g raft\ntransactions).\n\nThe trade-off is that under some circumstances a controller with partial network isolation can cause availability\nissues in the cluster, by making changes to pageserver state that might disagree with what the \"true\" active\ncontroller is trying to do.  The impact of this is bounded by our `controllers` database table, that enables\na rogue node to eventually realise that it is not the leader and step down.  If a rogue node can't reach\nthe database, then it implicitly stops making progress.  A rogue controller cannot durably damage the system\nbecause pageserver data and safekeeper configs are protected by generation numbers that are only updated\nvia postgres transactions (i.e. no controller \"trusts itself\" to independently make decisions about generations).\n\n### Scale\n\nWe design for high but not unlimited scale.  The memory footprint of each tenant shard is small (~8kB), so\nit is realistic to scale up to a million attached shards on a server with modest resources.  Tenants in\na detached state (i.e. not active on pageservers) do not need to be managed by storage controller, and can\nbe relegated from memory to the database.\n\nTypically, a tenant shard is updated about once a week, when we do a deploy.  During deploys, we relocate\na few thousand tenants from each pageserver while it is restarted, so it is extremely rare for the controller\nto have to do O(N) work (on all shards at once).\n\nThere are places where we do O(N) work:\n- On normal startup, when loading from the database into memory\n- On unclean startup (with no handover of observed state from a previous controller), where we will\n  scan all shards on all pageservers.\n\nIt is important that these locations are written efficiently.  At high scale we should still expect runtimes\nof the order tens of seconds to complete a storage controller start.\n\nWhen the practical scale limit of a single storage controller is reached, just deploy another one with its\nown pageservers & safekeepers: each controller+its storage servers should be thought of as a logical cluster\nor \"cell\" of storage.\n\n# High Level Design\n\nThe storage controller is an in-memory system (i.e. state for all attached\ntenants is held in memory _as well as_ being represented in durable postgres storage).\n\n## Infrastructure\n\nThe storage controller is an async rust binary using tokio.\n\nThe storage controller is built around the `Service` type.  This implements\nall the entry points for the outside world's interaction with the controller (HTTP handlers are mostly thin wrappers of service functions),\nand holds most in-memory state (e.g. the list of tenant shards).\n\nThe state is held in a `ServiceInner` wrapped in a RwLock.  This monolithic\nlock is used to simplify reasoning about code that mutates state: each function that takes a write lock may be thought of as a serializable transaction on the in-memory state.  This lock is clearly a bottleneck, but\nnevertheless is scalable to managing millions of tenants.\n\nPersistent state is held in a postgres database, and we use the `diesel` crate to provide database client functionality.  All database access is wrapped in the `Persistence` type -- this makes it easy to understand which\ncode is touching the database.  The database is only used when necessary, i.e. for state that cannot be recovered another way.  For example, we do not store the secondary pageserver locations of tenant shards in the database, rather we learn these at startup from running pageservers, and/or make scheduling decisions to fill in the gaps.  This adds some complexity, but massively reduces the load on the database, and enables running the storage controller with a very cheap postgres instance.\n\n## Pageserver tenant scheduling & reconciliation\n\n### Intent & observed state\n\nEach tenant shard is represented by type `TenantShard`, which has an 'intent' and 'observed' state.  Setting the\nintent state is called _scheduling_, and doing remote I/O to make observed\nstate match intent state is called _reconciliation_.\n\nThe `Scheduler` type is responsible for making choices about the intent\nstate, such as choosing a pageserver for a new tenant shard, or assigning\na replacement pageserver when the original one fails.\n\nThe observed state is updated after tenant reconciliation (see below), and\nhas the concept of a `None` state for a pageserver, indicating unknown state.  This is used to ensure that we can safely clean up after we start\nbut do not finish a remote call to a pageserver, or if a pageserver restarts and we are uncertain of its state.\n\n### Tenant Reconciliation\n\nThe `Reconciler` type is responsible for updating pageservers to achieve\nthe intent state.  It is instantiated when `Service` determines that a shard requires reconciliation, and owned by a background tokio task that\nruns it to completion.  Reconciler does not have access to the `Service` state: it is populated with a snapshot of relevant information when constructed, and submits is results to a channel that `Service` consumes\nto update the tenant shard's observed state.\n\nThe Reconciler does have access to the database, but only uses it for\na single purpose: updating shards' generation numbers immediately before\nattaching them to a pageserver.\n\nOperations that change a tenant's scheduling will spawn a reconciler if\nnecessary, and there is also a background loop which checks every shard\nfor the need to reconcile -- this background loop ensures eventual progress\nif some earlier reconciliations failed for some reason.\n\nThe reconciler has a general purpose code path which will attach/detach from pageservers as necessary, and a special case path for live migrations.  The live migration case is more common in practice, and is taken whenever the current observed state indicates that we have a healthy attached location to migrate from.  This implements live migration as described in the earlier [live migration RFC](028-pageserver-migration.md).\n\n### Scheduling optimisation\n\nDuring the periodic background reconciliation loop, the controller also\nperformance _scheduling optimization_.  This is the process of looking for\nshards that are in sub-optimal locations, and moving them.\n\nTypically, this means:\n- Shards attached outside their preferred AZ (e.g. after a node failure), to migrate them back to their preferred AZ\n- Shards attached on the same pageserver as some other shards in the same\n  tenant, to migrate them elsewhere (e.g. after a shard split)\n\nScheduling optimisation is a multi-step process to ensure graceful cutovers, e.g. by creating new secondary location, waiting for it to\nwarm up, then cutting over.  This is not done as an explicit queue\nof operations, but rather by iteratively calling the optimisation\nfunction, which will recognise each intervening state as something\nthat can generate the next optimisation.\n\n### Pageserver heartbeats and failure\n\nThe `Heartbeater` type is responsible for detecting when a pageserver\nbecomes unavailable.  This is fed back into `Service` for action: when\na pageserver is marked unavailable, tenant shards on that pageserver are\nrescheduled and Reconcilers are spawned to cut them over to their new location.\n\n## Pageserver timeline CRUD operations\n\nBy CRUD operations, we mean creating and deleting timelines.  The authoritative storage for which timelines exist on the pageserver\nis in S3, and is governed by the pageserver's system of generation\nnumbers.  Because a shard can be attached to multiple pageservers\nconcurrently, we need to handle this when doing timeline CRUD operations:\n- A timeline operation is only persistent if _after_ the ack from a pageserver, that pageserver's generation is still the latest.\n- For deletions in particular, they are only persistent if _all_ attached\n  locations have acked the deletion operation, since if only the latest one\n  has acked then the timeline could still return from the dead if some old-generation attachment writes an index for it.\n\n## Zero-downtime controller deployments\n\nWhen two storage controllers run at the same time, they coordinate via\nthe database to establish one leader, and the other controller may proxy\nrequests to this leader\n\nSee  [Storage controller restarts RFC](037-storage-controller-restarts.md).\n\nNote that this is not a strong consensus mechanism: the controller must also survive split-brain situations.  This is respected by code that\ne.g. increments version numbers, which uses database transactions that\ncheck the expected value before modifying it.  A split-brain situation can\nimpact availability (e.g. if two controllers are fighting over where to\nattach a shard), but it should never impact durability and data integrity.\n\n## Graceful drain & fill of pageservers during deploys\n\nThe storage controller has functionality for draining + filling pageservers\nwhile deploying new pageserver binaries, so that clients are not actively\nusing a pageserver while it restarts.\n\nSee [Graceful restarts RFC](033-storage-controller-drain-and-fill.md)\n\n## Safekeeper timeline scheduling\n\nThis is currently under development, see  [Safekeeper dynamic membership change RFC](035-safekeeper-dynamic-membership-change.md)."
  },
  {
    "path": "docs/rfcs/2025-03-17-compute-prewarm.md",
    "content": "# Compute rolling restart with prewarm\n\nCreated on 2025-03-17\nImplemented on _TBD_\nAuthor: Alexey Kondratov (@ololobus)\n\n## Summary\n\nThis RFC describes an approach to reduce performance degradation due to missing caches after compute node restart, i.e.:\n\n1. Rolling restart of the running instance via 'warm' replica.\n2. Auto-prewarm compute caches after unplanned restart or scale-to-zero.\n\n## Motivation\n\nNeon currently implements several features that guarantee high uptime of compute nodes:\n\n1. Storage high-availability (HA), i.e. each tenant shard has a secondary pageserver location, so we can quickly switch over compute to it in case of primary pageserver failure.\n2. Fast compute provisioning, i.e. we have a fleet of pre-created empty computes, that are ready to serve workload, so restarting unresponsive compute is very fast.\n3. Preemptive NeonVM compute provisioning in case of k8s node unavailability.\n\nThis helps us to be well-within the uptime SLO of 99.95% most of the time. Problems begin when we go up to multi-TB workloads and 32-64 CU computes.\nDuring restart, compute loses all caches: LFC, shared buffers, file system cache. Depending on the workload, it can take a lot of time to warm up the caches,\nso that performance could be degraded and might be even unacceptable for certain workloads. The latter means that although current approach works well for small to\nmedium workloads, we still have to do some additional work to avoid performance degradation after restart of large instances.\n\n## Non Goals\n\n- Details of the persistence storage for prewarm data are out of scope, there is a separate RFC for that: <https://github.com/neondatabase/neon/pull/9661>.\n- Complete compute/Postgres HA setup and flow. Although it was originally in scope of this RFC, during preliminary research it appeared to be a rabbit hole, so it's worth of a separate RFC.\n- Low-level implementation details for Postgres replica-to-primary promotion. There are a lot of things to think and care about: how to start walproposer, [logical replication failover](https://www.postgresql.org/docs/current/logical-replication-failover.html), and so on, but it's worth of at least a separate one-pager design document if not RFC.\n\n## Impacted components\n\nPostgres, compute_ctl, Control plane, Endpoint storage for unlogged storage of compute files.\nFor the latter, we will need to implement a uniform abstraction layer on top of S3, ABS, etc., but\nS3 is used in text interchangeably with 'endpoint storage' for simplicity.\n\n## Proposed implementation\n\n### compute_ctl spec changes and auto-prewarm\n\nWe are going to extend the current compute spec with the following attributes\n\n```rust\nstruct ComputeSpec {\n    /// [All existing attributes]\n    ...\n    /// Whether to do auto-prewarm at start or not.\n    /// Default to `false`.\n    pub lfc_auto_prewarm: bool\n    /// Interval in seconds between automatic dumps of\n    /// LFC state into S3. Default `None`, which means 'off'.\n    pub lfc_dump_interval_sec: Option<i32>\n}\n```\n\nWhen `lfc_dump_interval_sec` is set to `N`, `compute_ctl` will periodically dump the LFC state\nand store it in S3, so that it could be used either for auto-prewarm after restart or by replica\nduring the rolling restart. For enabling periodic dumping, we should consider the following value\n`lfc_dump_interval_sec=300` (5 minutes), same as in the upstream's `pg_prewarm.autoprewarm_interval`.\n\nWhen `lfc_auto_prewarm` is set to `true`, `compute_ctl` will start prewarming the LFC upon restart\niif some of the previous states is present in S3.\n\n### compute_ctl API\n\n1. `POST /store_lfc_state` -- dump LFC state using Postgres SQL interface and store result in S3.\n    This has to be a blocking call, i.e. it will return only after the state is stored in S3.\n    If there is any concurrent request in progress, we should return `429 Too Many Requests`,\n    and let the caller to retry.\n\n2. `GET /dump_lfc_state` -- dump LFC state using Postgres SQL interface and return it as is\n    in text format suitable for the future restore/prewarm. This API is not strictly needed at\n    the end state, but could be useful for a faster prototyping of a complete rolling restart flow\n    with prewarm, as it doesn't require persistent for LFC state storage.\n\n3. `POST /restore_lfc_state` -- restore/prewarm LFC state with request\n\n    ```yaml\n    RestoreLFCStateRequest:\n      oneOf:\n        - type: object\n          required:\n            - lfc_state\n          properties:\n            lfc_state:\n              type: string\n              description: Raw LFC content dumped with GET `/dump_lfc_state`\n        - type: object\n          required:\n            - lfc_cache_key\n          properties:\n            lfc_cache_key:\n              type: string\n              description: |\n                endpoint_id of the source endpoint on the same branch\n                to use as a 'donor' for LFC content. Compute will look up\n                LFC content dump in S3 using this key and do prewarm.\n    ```\n\n    where `lfc_state` and `lfc_cache_key` are mutually exclusive.\n\n    The actual prewarming will happen asynchronously, so the caller need to check the\n    prewarm status using the compute's standard `GET /status` API.\n\n4. `GET /status` -- extend existing API with following attributes\n\n    ```rust\n    struct ComputeStatusResponse {\n        // [All existing attributes]\n        ...\n        pub prewarm_state: PrewarmState\n    }\n\n    /// Compute prewarm state. Will be stored in the shared Compute state\n    /// in compute_ctl\n    struct PrewarmState {\n        pub status: PrewarmStatus\n        /// Total number of pages to prewarm\n        pub pages_total: i64\n        /// Number of pages prewarmed so far\n        pub pages_processed: i64\n        /// Optional prewarm error\n        pub error: Option<String>\n    }\n\n    pub enum PrewarmStatus {\n        /// Prewarming was never requested on this compute\n        Off,\n        /// Prewarming was requested, but not started yet\n        Pending,\n        /// Prewarming is in progress. The caller should follow\n        /// `PrewarmState::progress`.\n        InProgress,\n        /// Prewarming has been successfully completed\n        Completed,\n        /// Prewarming failed. The caller should look at\n        /// `PrewarmState::error` for the reason.\n        Failed,\n        /// It is intended to be used by auto-prewarm if none of\n        /// the previous LFC states is available in S3.\n        /// This is a distinct state from the `Failed` because\n        /// technically it's not a failure and could happen if\n        /// compute was restart before it dumped anything into S3,\n        /// or just after the initial rollout of the feature.\n        Skipped,\n    }\n    ```\n\n5. `POST /promote` -- this is a **blocking** API call to promote compute replica into primary.\n    This API should be very similar to the existing `POST /configure` API, i.e. accept the\n    spec (primary spec, because originally compute was started as replica). It's a distinct\n    API method because semantics and response codes are different:\n\n    - If promotion is done successfully, it will return `200 OK`.\n    - If compute is already primary, the call will be no-op and `compute_ctl`\n      will return `412 Precondition Failed`.\n    - If, for some reason, second request reaches compute that is in progress of promotion,\n      it will respond with `429 Too Many Requests`.\n    - If compute hit any permanent failure during promotion `500 Internal Server Error`\n      will be returned.\n\n### Control plane operations\n\nThe complete flow will be present as a sequence diagram in the next section, but here\nwe just want to list some important steps that have to be done by control plane during\nthe rolling restart via warm replica, but without much of low-level implementation details.\n\n1. Register the 'intent' of the instance restart, but not yet interrupt any workload at\n    primary and also accept new connections. This may require some endpoint state machine\n    changes, e.g. introduction of the `pending_restart` state. Being in this state also\n    **mustn't prevent any other operations except restart**: suspend, live-reconfiguration\n    (e.g. due to notify-attach call from the storage controller), deletion.\n\n2. Start new replica compute on the same timeline and start prewarming it. This process\n    may take quite a while, so the same concurrency considerations as in 1. should be applied\n    here as well.\n\n3. When warm replica is ready, control plane should:\n\n    3.1. Terminate the primary compute. Starting from here, **this is a critical section**,\n        if anything goes off, the only option is to start the primary normally and proceed\n        with auto-prewarm.\n\n    3.2. Send cache invalidation message to all proxies, notifying them that all new connections\n        should request and wait for the new connection details. At this stage, proxy has to also\n        drop any existing connections to the old primary, so they didn't do stale reads.\n\n    3.3. Attach warm replica compute to the primary endpoint inside control plane metadata\n        database.\n\n    3.4. Promote replica to primary.\n\n    3.5. When everything is done, finalize the endpoint state to be just `active`.\n\n### Complete rolling restart flow\n\n```mermaid\n  sequenceDiagram\n\n  autonumber\n\n  participant proxy as Neon proxy\n\n  participant cplane as Control plane\n\n  participant primary as Compute (primary)\n  box Compute (replica)\n    participant ctl as compute_ctl\n    participant pg as Postgres\n  end\n\n  box Endpoint unlogged storage\n    participant s3proxy as Endpoint storage service\n    participant s3 as S3/ABS/etc.\n  end\n\n\n  cplane ->> primary: POST /store_lfc_state\n  primary -->> cplane: 200 OK\n\n  cplane ->> ctl: POST /restore_lfc_state\n  activate ctl\n  ctl -->> cplane: 202 Accepted\n\n  activate cplane\n  cplane ->> ctl: GET /status: poll prewarm status\n  ctl ->> s3proxy: GET /read_file\n  s3proxy ->> s3: read file\n  s3 -->> s3proxy: file content\n  s3proxy -->> ctl: 200 OK: file content\n\n  proxy ->> cplane: GET /proxy_wake_compute\n  cplane -->> proxy: 200 OK: old primary conninfo\n\n  ctl ->> pg: prewarm LFC\n  activate pg\n  pg -->> ctl: prewarm is completed\n  deactivate pg\n\n  ctl -->> cplane: 200 OK: prewarm is completed\n  deactivate ctl\n  deactivate cplane\n\n  cplane -->> cplane: reassign replica compute to endpoint,<br>start terminating the old primary compute\n  activate cplane\n  cplane ->> proxy: invalidate caches\n\n  proxy ->> cplane: GET /proxy_wake_compute\n\n  cplane -x primary: POST /terminate\n  primary -->> cplane: 200 OK\n  note over primary: old primary<br>compute terminated\n\n  cplane ->> ctl: POST /promote\n  activate ctl\n  ctl ->> pg: pg_ctl promote\n  activate pg\n  pg -->> ctl: done\n  deactivate pg\n  ctl -->> cplane: 200 OK\n  deactivate ctl\n\n  cplane -->> cplane: finalize operation\n  cplane -->> proxy: 200 OK: new primary conninfo\n  deactivate cplane\n```\n\n### Network bandwidth and prewarm speed\n\nIt's currently known that pageserver can sustain about 3000 RPS per shard for a few running computes.\nLarge tenants are usually split into 8 shards, so the final formula may look like this:\n\n```text\n8 shards * 3000 RPS * 8 KB =~ 190 MB/s\n```\n\nso depending on the LFC size, prewarming will take at least:\n\n- ~5s for 1 GB\n- ~50s for 10 GB\n- ~5m for 100 GB\n- \\>1h for 1 TB\n\nIn total, one pageserver is normally capped by 30k RPS, so it obviously can't sustain many computes\ndoing prewarm at the same time. Later, we may need an additional mechanism for computes to throttle\nthe prewarming requests gracefully.\n\n### Reliability, failure modes and corner cases\n\nWe consider following failures while implementing this RFC:\n\n1. Compute got interrupted/crashed/restarted during prewarm. The caller -- control plane -- should\n    detect that and start prewarm from the beginning.\n\n2. Control plane promotion request timed out or hit network issues. If it never reached the\n    compute, control plane should just repeat it. If it did reach the compute, then during\n    retry control plane can hit `409` as previous request triggered the promotion already.\n    In this case, control plane need to retry until either `200` or\n    permanent error `500` is returned.\n\n3. Compute got interrupted/crashed/restarted during promotion. At restart it will ask for\n    a spec from control plane, and its content should signal compute to start as **primary**,\n    so it's expected that control plane will continue polling for certain period of time and\n    will discover that compute is ready to accept connections if restart is fast enough.\n\n4. Any other unexpected failure or timeout during prewarming. This **failure mustn't be fatal**,\n    control plane has to report failure, terminate replica and keep primary running.\n\n5. Any other unexpected failure or timeout during promotion. Unfortunately, at this moment\n    we already have the primary node stopped, so the only option is to start primary again\n    and proceed with auto-prewarm.\n\n6. Any unexpected failure during auto-prewarm. This **failure mustn't be fatal**,\n    `compute_ctl` has to report the failure, but do not crash the compute.\n\n7. Control plane failed to confirm that old primary has terminated. This can happen, especially\n    in the future HA setup. In this case, control plane has to ensure that it sent VM deletion\n    and pod termination requests to k8s, so long-term we do not have two running primaries\n    on the same timeline.\n\n### Security implications\n\nThere are two security implications to consider:\n\n1. Access to `compute_ctl` API. It has to be accessible from the outside of compute, so all\n    new API methods have to be exposed on the **external** HTTP port and **must** be authenticated\n    with JWT.\n\n2. Read/write only your own LFC state data in S3. Although it's not really a security concern,\n    since LFC state is just a mapping of blocks present in LFC at certain moment in time;\n    it still has to be highly restricted, so that i) only computes on the same timeline can\n    read S3 state; ii) each compute can only write to the path that contains it's `endpoint_id`.\n    Both of this must be validated by Endpoint storage service using the JWT token provided by `compute_ctl`.\n\n### Unresolved questions\n\n#### Billing, metrics and monitoring\n\nCurrently, we only label computes with `endpoint_id` after attaching them to the endpoint.\nIn this proposal, this means that temporary replica will remain unlabelled until it's promoted\nto primary. We can also hide it from users in the control plane API, but what to do with\nbilling and monitoring is still unclear.\n\nWe can probably mark it as 'billable' and tag with `project_id`, so it will be billed, but\nnot interfere in any way with the current primary monitoring.\n\nAnother thing to consider is how logs and metrics export will switch to the new compute.\nIt's expected that OpenTelemetry collector will auto-discover the new compute and start\nscraping metrics from it.\n\n#### Auto-prewarm\n\nIt's still an open question whether we need auto-prewarm at all. The author's gut-feeling is\nthat yes, we need it, but might be not for all workloads, so it could end up exposed as a\nuser-controllable knob on the endpoint. There are two arguments for that:\n\n1. Auto-prewarm existing in upstream's `pg_prewarm`, _probably for a reason_.\n\n2. There are still could be 2 flows when we cannot perform the rolling restart via the warm\n    replica: i) any failure or interruption during promotion; ii) wake up after scale-to-zero.\n    The latter might be challenged as well, i.e. one can argue that auto-prewarm may and will\n    compete with user-workload for storage resources. This is correct, but it might as well\n    reduce the time to get warm LFC and good performance.\n\n#### Low-level details of the replica promotion\n\nThere are many things to consider here, but three items just off the top of my head:\n\n1. How to properly start the `walproposer` inside Postgres.\n\n2. What to do with logical replication. Currently, we do not include logical replication slots\n    inside basebackup, because nobody advances them at replica, so they just prevent the WAL\n    deletion. Yet, we do need to have them at primary after promotion. Starting with Postgres 17,\n    there is a new feature called\n    [logical replication failover](https://www.postgresql.org/docs/current/logical-replication-failover.html)\n    and `synchronized_standby_slots` setting, but we need a plan for the older versions. Should we\n    request a new basebackup during promotion?\n\n3. How do we guarantee that replica will receive all the latest WAL from safekeepers? Do some\n    'shallow' version of sync safekeepers without data copying? Or just a standard version of\n    sync safekeepers?\n\n## Alternative implementation\n\nThe proposal already assumes one of the alternatives -- do not have any persistent storage for\nLFC state. This is possible to implement faster with the proposed API, but it means that\nwe do not implement auto-prewarm yet.\n\n## Definition of Done\n\nAt the end of implementing this RFC we should have two high-level settings that enable:\n\n1. Auto-prewarm of user computes upon restart.\n2. Perform primary compute restart via the warm replica promotion.\n\nIt also has to be decided what's the criteria for enabling one or both of these flows for\ncertain clients.\n"
  },
  {
    "path": "docs/rfcs/2025-04-30-direct-io-for-pageserver.md",
    "content": "# Direct IO For Pageserver\n\nDate: Apr 30, 2025\n\n## Summary\n\nThis document is a retroactive RFC. It\n- provides some background on what direct IO is,\n- motivates why Pageserver should be using it for its IO, and\n- describes how we changed Pageserver to use it.\n\nThe [initial proposal](https://github.com/neondatabase/neon/pull/8240) that kicked off the work can be found in this closed GitHub PR.\n\nPeople primarily involved in this project were:\n- Yuchen Liang <yuchen@neon.tech>\n- Vlad Lazar <vlad@neon.tech>\n- Christian Schwarz <christian@neon.tech>\n\n## Timeline\n\nFor posterity, here is the rough timeline of the development work that got us to where we are today.\n\n- Jan 2024: [integrate `tokio-epoll-uring`](https://github.com/neondatabase/neon/pull/5824) along with owned buffers API\n- March 2024: `tokio-epoll-uring` enabled in all regions in buffered IO mode\n- Feb 2024 to June 2024: PS PageCache Bypass For Data Blocks\n  - Feb 2024: [Vectored Get Implementation](https://github.com/neondatabase/neon/pull/6576) bypasses delta & image layer blocks for page requests\n  - Apr to June 2024: [Epic: bypass PageCache for use data blocks](https://github.com/neondatabase/neon/issues/7386) addresses remaining users\n- Aug to Nov 2024: direct IO: first code; preliminaries; read path coding; BufferedWriter; benchmarks show perf regressions too high, no-go.\n- Nov 2024 to Jan 2025: address perf regressions by developing page_service pipelining (aka batching) and concurrent IO ([Epic](https://github.com/neondatabase/neon/issues/9376))\n- Feb to March 2024: rollout batching, then concurrent+direct IO => read path and InMemoryLayer is now direct IO\n- Apr 2025: develop & roll out direct IO for the write path\n\n## Background: Terminology & Glossary\n\n**kernel page cache**: the Linux kernel's page cache is a write-back cache for filesystem contents.\nThe cached unit is memory-page-sized & aligned chunks of the files that are being cached (typically 4k).\nThe cache lives in kernel memory and is not directly accessible through userspace.\n\n**Buffered IO**: an application's read/write system calls go through the kernel page cache.\nFor example, a 10 byte sized read or write to offset 5000 in a file will load the file contents\nat offset `[4096,8192)` into a free page in the kernel page cache. If necessary, it will evict\na page to make room (cf eviction). Then, the kernel performs a memory-to-memory copy of 10 bytes\nfrom/to the offset `4` (`5000 = 4096 + 4`) within the cached page. If it's a write, the kernel keeps\ntrack of the fact that the page is now \"dirty\" in some ancillary structure.\n\n**Writeback**: a buffered read/write syscall returns after the memory-to-memory copy. The modifications\nmade by e.g. write system calls are not even *issued* to disk, let alone durable. Instead, the kernel\nasynchronously writes back dirtied pages based on a variety of conditions. For us, the most relevant\nones are a) explicit request by userspace (`fsync`) and b) memory pressure.\n\n**Memory pressure**: the kernel page cache is a best effort service and a user of spare memory capacity.\nIf there is no free memory, the kernel page allocator will take pages used by page cache to satisfy allocations.\nBefore reusing a page like that, the page has to be written back (writeback, see above).\nThe far-reaching consequence of this is that **any allocation of anonymous memory can do IO** if the only\nway to get that memory is by eviction & re-using a dirty page cache page.\nNotably, this includes a simple `malloc` in userspace, because eventually that boils down to `mmap(..., MAP_ANON, ...)`.\nI refer to this effect as the \"malloc latency backscatter\" caused by buffered IO.\n\n**Direct IO** allows application's read/write system calls to bypass the kernel page cache. The filesystem\nis still involved because it is ultimately in charge of mapping the concept of files & offsets within them\nto sectors on block devices. Typically, the filesystem poses size and alignment requirements for memory buffers\nand file offsets (statx `Dio_mem_align` / `Dio_offset_align`), see [this gist](https://gist.github.com/problame/1c35cac41b7cd617779f8aae50f97155).\nThe IO operations will fail at runtime with EINVAL if the alignment requirements are not met.\n\n**\"buffered\" vs \"direct\"**: the central distinction between buffered and direct IO is about who allocates and\nfills the IO buffers, and who controls when exactly the IOs are issued. In buffered IO, it's the syscall handlers,\nkernel page cache, and memory management subsystems (cf \"writeback\"). In direct IO, all of it is done by\nthe application.\nIt takes more effort by the application to program with direct instead of buffered IO.\nThe return is precise control over and a clear distinction between consumption/modification of memory vs disk.\n\n**Pageserver PageCache**: Pageserver has an additional `PageCache` (referred to as PS PageCache from here on, as opposed to \"kernel page cache\").\nIts caching unit is 8KiB blocks of the layer files written by Pageserver.\nA miss in PageCache is filled by reading from the filesystem, through the `VirtualFile` abstraction layer.\nThe default size is tiny (64MiB), very much like Postgres's `shared_buffers`.\nWe ran production at 128MiB for a long time but gradually moved it up to 2GiB over the past ~year.\n\n**VirtualFile** is Pageserver's abstraction for file IO, very similar to the facility in Postgres that bears the same name.\nIts historical purpose appears to be working around open file descriptor limitations, which is practically irrelevant on Linux.\nHowever, the facility in Pageserver is useful as an intermediary layer for metrics and abstracts over the different kinds of\nIO engines that Pageserver supports (`std-fs` vs `tokio-epoll-uring`).\n\n## Background: History Of Caching In Pageserver\n\nFor multiple years, Pageserver's `PageCache` was on the path of all read _and write_ IO.\nIt performed write-back to the kernel using buffered IO.\n\nWe converted it into a read-only cache of immutable data in [PR 4994](https://github.com/neondatabase/neon/pull/4994).\n\nThe introduction of `tokio-epoll-uring` required converting the code base to used owned IO buffers.\nThe `PageCache` pages are usable as owned IO buffers.\n\nWe then started bypassing PageCache for user data blocks.\nData blocks are the 8k blocks of data in layer files that hold the multiple `Value`s, as opposed to the disk btree index blocks that tell us which values exist in a file at what offsets.\nThe disk btree embedded in delta & image layers remains `PageCache`'d.\nEpics for that work were:\n- Vectored `Timeline::get` (cf RFC 30) skipped delta and image layer data block `PageCache`ing outright.\n- Epic https://github.com/neondatabase/neon/issues/7386 took care of the remaining users for data blocks:\n  - Materialized page cache (cached materialized pages; shown to be ~0% hit rate in practice)\n  - InMemoryLayer\n  - Compaction\n\nThe outcome of the above:\n1. All data blocks are always read through the `VirtualFile` APIs, hitting the kernel buffered read path (=> kernel page cache).\n2. Indirect blocks (=disk btree blocks) would be cached in the PS `PageCache`.\n\nIn production we size the PS `PageCache` to be 2GiB.\nThus drives hit rate up to ~99.95% and the eviction rate / replacement rates down to less than 200/second on a 1-minute average, on the busiest machines.\nHigh baseline replacement rates are treated as a signal of resource exhaustion (page cache insufficient to host working set of the PS).\nThe response to this is to migrate tenants away, or increase PS `PageCache` size.\nIt is currently manual but could be automated, e.g., in Storage Controller.\n\nIn the future, we may eliminate the `PageCache` even for indirect blocks.\nFor example with an LRU cache that has as unit the entire disk btree content\ninstead of individual blocks.\n\n## High-Level Design\n\nSo, before work on this project started, all data block reads and the entire write path of Pageserver were using kernel-buffered IO, i.e., the kernel page cache.\nWe now want to get the kernel page cache out of the picture by using direct IO for all interaction with the filesystem.\nThis achieves the following system properties:\n\n**Predictable VirtualFile latencies**\n* With buffered IO, reads are sometimes fast, sometimes slow, depending on kernel page cache hit/miss.\n* With buffered IO, appends when writing out new layer files during ingest or compaction are sometimes fast, sometimes slow because of write-back backpressure.\n* With buffered IO, the \"malloc backscatter\" phenomenon pointed out in the Glossary section is not something we actively observe.\n  But we do have occasional spikes in Dirty memory amount and Memory PSI graphs, so it may already be affecting to some degree.\n* By switching to direct IO, above operations will have the (predictable) device latency -- always.\n  Reads and appends always go to disk.\n  And malloc will not have to write back dirty data.\n\n**Explicitness & Tangibility of resource usage**\n* In a multi-tenant system, it is generally desirable and valuable to be *explicit* about the main resources we use for each tenant.\n* By using direct IO, we become explicit about the resources *disk IOPs*  and *memory capacity* in a way that was previously being conflated through the kernel page cache, outside our immediate control.\n* We will be able to build per-tenant observability of resource usage (\"what tenant is causing the actual IOs that are sent to the disk?\").\n* We will be able to build accounting & QoS by implementing an IO scheduler that is tenant aware. The kernel is not tenant-aware and can't do that.\n\n**CPU Efficiency**\n* The involvement of the kernel page cache means one additional memory-to-memory copy on read and write path.\n* Direct IO will eliminate that memory-to-memory copy, if we can make the userspace buffers used for the IO calls satisfy direct IO alignment requirements.\n\nThe **trade-off** is that we no longer get the theoretical benefits of the kernel page cache. These are:\n- read latency improvements for repeat reads of the same data (\"locality of reference\")\n  - asterisk: only if that state is still cache-resident by time of next access\n- write throughput by having kernel page cache batch small VFS writes into bigger disk writes\n  - asterisk: only if memory pressure is low enough that the kernel can afford to delay writeback\n\nWe are **happy to make this trade-off**:\n- Because of the advantages listed above.\n- Because we empirically have enough DRAM on Pageservers to serve metadata (=index blocks) from PS PageCache.\n  (At just 2GiB PS PageCache size, we average a 99.95% hit rate).\n  So, the latency of going to disk is only for data block reads, not the index traversal.\n- Because **the kernel page cache is ineffective** at high tenant density anyway (#tenants/pageserver instance).\n  And because dense packing of tenants will always be desirable to drive COGS down, we should design the system for it.\n  (See the appendix for a more detailed explanation why this is).\n- So, we accept that some reads that used to be fast by circumstance will have higher but **predictable** latency than before.\n\n### Desired End State\n\nThe desired end state of the project is as follows, and with some asterisks, we have achieved it.\n\nAll IOs of the Pageserver data path use direct IO, thereby bypassing the kernel page cache.\n\nIn particular, the \"data path\" includes\n- the wal ingest path\n- compaction\n- anything on the `Timeline::get` / `Timeline::get_vectored` path.\n\nThe production Pageserver config is tuned such that virtually all non-data blocks are cached in the PS PageCache.\nHit rate target is 99.95%.\n\nThere are no regressions to ingest latency.\n\nThe total \"wait-for-disk time\" contribution to random getpage request latency is `O(1 read IOP latency)`.\nWe accomplish that by having a near 100% PS PageCache hit rate so that layer index traversal effectively never needs not wait for IO.\nThereby, it can issue all the data blocks as it traverses the index, and only wait at the end of it (concurrent IO).\n\nThe amortized \"wait-for-disk time\" contribution of this direct IO proposal to a series of sequential getpage requests is `1/32 * read IOP latency` for each getpage request.\nWe accomplish this by server-side batching of up to 32 reads into a single `Timeline::get_vectored` call.\n(This is an ideal world where our batches are full - that's not the case in prod today because of lack of queue depth).\n\n## Design & Implementation\n\n### Prerequisites\n\nA lot of prerequisite work had to happen to enable use of direct IO.\n\nTo meet the \"wait-for-disk time\" requirements from the DoD, we implement for the read path:\n- page_service level server-side batching (config field `page_service_pipelining`)\n- concurrent IO (config field `get_vectored_concurrent_io`)\nThe work for both of these these was tracked [in the epic](https://github.com/neondatabase/neon/issues/9376).\nServer-side batching will likely be obsoleted by the [#proj-compute-communicator](https://github.com/neondatabase/neon/pull/10799).\nThe Concurrent IO work is described in retroactive RFC `2025-04-30-pageserver-concurrent-io-on-read-path.md`.\nThe implementation is relatively brittle and needs further investment, see the `Future Work` section in that RFC.\n\nFor the write path, and especially WAL ingest, we need to hide write latency.\nWe accomplish this by implementing a (`BufferedWriter`) type that does double-buffering: flushes of the filled\nbuffer happen in a sidecar tokio task while new writes fill a new buffer.\nWe refactor InMemoryLayer as well as BlobWriter (=> delta and image layer writers) to use this new `BufferedWriter`.\nThe most comprehensive write-up of this work is in [the PR description](https://github.com/neondatabase/neon/pull/11558).\n\n### Ensuring Adherence to Alignment Requirements\n\nDirect IO puts requirements on\n- memory buffer alignment\n- io size (=memory buffer size)\n- file offset alignment\n\nThe requirements are specific to a combination of filesystem/block-device/architecture(hardware page size!).\n\nIn Neon production environments we currently use ext4 with Linux 6.1.X on AWS and Azure storage-optimized instances (locally attached NVMe).\nInstead of dynamic discovery using `statx`, we statically hard-code 512 bytes as the buffer/offset alignment and size-multiple.\nWe made this decision because:\n- a) it is compatible with all the environments we need to run in\n- b) our primary workload can be small-random-read-heavy (we do merge adjacent reads if possible, but the worst case is that all `Value`s that needs to be read are far apart)\n- c) 512-byte tail latency on the production instance types is much better than 4k (p99.9: 3x lower, p99.99 5x lower).\n- d) hard-coding at compile-time allows us to use the Rust type system to enforce the use of only aligned IO buffers, eliminating a source of runtime errors typically associated with direct IO.\n\nThis was [discussed here](https://neondb.slack.com/archives/C07BZ38E6SD/p1725036790965549?thread_ts=1725026845.455259&cid=C07BZ38E6SD).\n\nThe new `IoBufAligned` / `IoBufAlignedMut` marker traits indicate that a given buffer meets memory alignment requirements.\nAll `VirtualFile` APIs and several software layers built on top of them only accept buffers that implement those traits.\nImplementors of the marker traits are:\n- `IoBuffer` / `IoBufferMut`: used for most reads and writes\n- `PageWriteGuardBuf`: for filling PS PageCache pages (index blocks!)\n\nThe alignment requirement is infectious; it permeates bottom-up throughout the code base.\nWe stop the infection at roughly the same layers in the code base where we stopped permeating the\nuse of owned-buffers-style API for tokio-epoll-uring. The way the stopping works is by introducing\na memory-to-memory copy from/to some unaligned memory location on the stack/current/heap.\nThe places where we currently stop permeating are sort of arbitrary. For example, it would probably\nmake sense to replace more usage of `Bytes` that we know holds 8k pages with 8k-sized `IoBuffer`s.\n\nThe `IoBufAligned` / `IoBufAlignedMut` types do not protect us from the following types of runtime errors:\n- non-adherence to file offset alignment requirements\n- non-adherence to io size requirements\n\nThe following higher-level constructs ensure we meet the requirements:\n- read path: the `ChunkedVectoredReadBuilder` and `mod vectored_dio_read` ensure reads happen at aligned offsets and in appropriate size multiples.\n- write path: `BufferedWriter` only writes in multiples of the capacity, at offsets that are `start_offset+N*capacity`; see its doc comment.\n\nNote that these types are used always, regardless of whether direct IO is enabled or not.\nThere are some cases where this adds unnecessary overhead to buffered IO (e.g. all memcpy's inflated to multiples of 512).\nBut we could not identify meaningful impact in practice when we shipped these changes while we were still using buffered IO.\n\n### Configuration / Feature Flagging\n\nIn the previous section we described how all users of VirtualFile were changed to always adhere to direct IO alignment and size-multiple requirements.\nTo actually enable direct IO, all we need to do is set the `O_DIRECT` flag in `open` syscalls / io_uring operations.\n\nWe set `O_DIRECT` based on:\n- the VirtualFile API used to create/open the VirtualFile instance\n- the `virtual_file_io_mode` configuration flag\n- the OpenOptions `read` and/or `write` flags.\n\nThe VirtualFile APIs suffixed with `_v2` are the only ones that _may_ open with `O_DIRECT` depending on the other two factors in above list.\nOther APIs never use `O_DIRECT`.\n(The name is bad and should really be `_maybe_direct_io`.)\n\nThe reason for having new APIs is because all code used VirtualFile but implementation and rollout happened in consecutive phases (read path, InMemoryLayer, write path).\nAt the VirtualFile level, context on whether an instance of VirtualFile is on read path, InMemoryLayer, or write path is not available.\n\nThe `_v2` APIs then check make the decision to set `O_DIRECT` based on the `virtual_file_io_mode` flag and the OpenOptions `read`/`write` flags.\nThe result is the following runtime behavior:\n\n|what|OpenOptions|`v_f_io_mode`<br/>=`buffered`|`v_f_io_mode`<br/>=`direct`|`v_f_io_mode`<br/>=`direct-rw`|\n|-|-|-|-|-|\n|`DeltaLayerInner`|read|()|O_DIRECT|O_DIRECT|\n|`ImageLayerInner`|read|()|O_DIRECT|O_DIRECT|\n|`InMemoryLayer`|read + write|()|()*|O_DIRECT|\n|`DeltaLayerWriter`| write | () | () |  O_DIRECT |\n|`ImageLayerWriter`| write | () | () |  O_DIRECT |\n|`download_layer_file`|write |()|()|O_DIRECT|\n\nThe `InMemoryLayer` is marked with `*` because there was a period when it *did* use O_DIRECT under `=direct`.\nThat period was when we implemented and shipped the first version of `BufferedWriter`.\nWe used it in `InMemoryLayer` and `download_layer_file` but it was only sensitive to `v_f_io_mode` in `InMemoryLayer`.\nThe introduction of `=direct-rw`, and the switch of the remaining write path to `BufferedWriter`, happened later,\nin https://github.com/neondatabase/neon/pull/11558.\n\nNote that this way of feature flagging inside VirtualFile makes it less and less a general purpose POSIX file access abstraction.\nFor example, with `=direct-rw` enabled, it is no longer possible to open a `VirtualFile` without `O_DIRECT`. It'll always be set.\n\n## Correctness Validation\n\nThe correctness risks with this project were:\n- Memory safety issues in the `IoBuffer` / `IoBufferMut` implementation.\n  These types expose an API that is largely identical to that of the `bytes` crate and/or Vec.\n- Runtime errors (=> downtime / unavailability) because of non-adherence to alignment/size-multiple requirements, resulting in EINVAL on the read path.\n\nWe sadly do not have infrastructure to run pageserver under `cargo miri`.\nSo for memory safety issues, we relied on careful peer review.\n\nWe do assert the production-like alignment requirements in testing builds.\nHowever, these asserts were added retroactively.\nThe actual validation before rollout happened in staging and pre-prod.\nWe eventually enabled  `=direct`/`=direct-rw` for Rust unit tests and the regression test suite.\nI cannot recall a single instance of staging/pre-prod/production errors caused by non-adherence to alignment/size-multiple requirements.\nEvidently developer testing was good enough.\n\n## Performance Validation\n\nThe read path went through a lot of iterations of benchmarking in staging and pre-prod.\nThe benchmarks in those environments demonstrated performance regressions early in the implementation.\nIt was actually this performance testing that made us implement batching and concurrent IO to avoid unacceptable regressions.\n\nThe write path was much quicker to validate because `bench_ingest` covered all of the (less numerous) access patterns.\n\n## Future Work\n\nThere is minor and major follow-up work that can be considered in the future.\nCheck the (soon-to-be-closed) Epic https://github.com/neondatabase/neon/issues/8130's \"Follow-Ups\" section for a current list.\n\nRead Path:\n- PS PageCache hit rate is crucial to unlock concurrent IO and reasonable latency for random reads generally.\n  Instead of reactively sizing PS PageCache, we should estimate the required PS PageCache size\n  and potentially also use that to drive placement decisions of shards from StorageController\n  https://github.com/neondatabase/neon/issues/9288\n- ... unless we get rid of PS PageCache entirely and cache the index block in a more specialized cache.\n  But even then, an estimation of the working set would be helpful to figure out caching strategy.\n\nWrite Path:\n- BlobWriter and its users could switch back to a borrowed API  https://github.com/neondatabase/neon/issues/10129\n- ... unless we want to implement bypass mode for large writes https://github.com/neondatabase/neon/issues/10101\n- The `TempVirtualFile` introduced as part of this project could internalize more of the common usage pattern: https://github.com/neondatabase/neon/issues/11692\n- Reduce conditional compilation around `virtual_file_io_mode`: https://github.com/neondatabase/neon/issues/11676\n\nBoth:\n- A performance simulation mode that pads VirtualFile op latencies to typical NVMe latencies, even if the underlying storage is faster.\n  This would avoid misleadingly good performance on developer systems and in benchmarks on systems that are less busy than production hosts.\n  However, padding latencies at microsecond scale is non-trivial.\n\nMisc:\n- We should finish trimming VirtualFile's scope to be truly limited to core data path read & write.\n  Abstractions for reading & writing pageserver config, location config, heatmaps, etc, should use\n  APIs in a different package (`VirtualFile::crashsafe_overwrite` and `VirtualFile::read_to_string`\n  are good entrypoints for cleanup.) https://github.com/neondatabase/neon/issues/11809\n\n# Appendix\n\n## Why Kernel Page Cache Is Ineffective At Tenant High Density\n\nIn the Motivation section, we stated:\n\n> - **The kernel page cache ineffective** at high tenant density anyways (#tenants/pageserver instance).\n\nThe reason is that the  Pageserver workload sent from Computes is whatever is a Compute cache(s) miss.\nThat's either sequential scans or random reads.\nA random read workload simply causes cache thrashing because a packed Pageserver NVMe drive (`im4gn.2xlarge`) has ~100x more capacity than DRAM available.\nIt is complete waste to have the kernel page cache cache data blocks in this case.\nSequential read workloads *can* benefit iff those pages have been updated recently (=no image layer yet) and together in time/LSN space.\nIn such cases, the WAL records of those updates likely sit on the same delta layer block.\nWhen Compute does a sequential scan, it sends a series of single-page requests for these individual pages.\nWhen Pageserver processes the second request in such a series, it goes to the same delta layer block and have a kernel page cache hit.\nThis dependence on kernel page cache for sequential scan performance is significant, but the solution is at a higher level than generic data block caching.\nWe can either add a small per-connection LRU cache for such delta layer blocks.\nOr we can merge those sequential requests into a larger vectored get request, which is designed to never read a block twice.\nThis amortizes the read latency for our delta layer block across the vectored get batch size (which currently is up to 32).\n\nThere are Pageserver-internal workloads that do sequential access (compaction, image layer generation), but these\n1. are not latency-critical and can do batched access outside of the `page_service` protocol constraints (image layer generation)\n2. don't actually need to reconstruct images and therefore can use totally different access methods (=> compaction can use k-way merge iterators with their own internal buffering / prefetching).\n"
  },
  {
    "path": "docs/rfcs/2025-04-30-pageserver-concurrent-io-on-read-path.md",
    "content": "# Concurrent IO for Pageserver Read Path\n\nDate: May 6, 2025\n\n## Summary\n\nThis document is a retroactive RFC on the Pageserver Concurrent IO work that happened in late 2024 / early 2025.\n\nThe gist of it is that Pageserver's `Timeline::get_vectored` now _issues_ the data block read operations against layer files\n_as it traverses the layer map_ and only _wait_ once, for all of them, after traversal is complete.\n\nAssuming a good PS PageCache hits on the index blocks during traversal, this drives down the \"wait-for-disk\" time\ncontribution down from `random_read_io_latency * O(number_of_values)` to `random_read_io_latency * O(1 + traversal)`.\n\nThe motivation for why this work had to happen when it happened was the switch of Pageserver to\n- not cache user data blocks in PS PageCache and\n- switch to use direct IO.\nMore context on this are given in complimentary RFC `./rfcs/2025-04-30-direct-io-for-pageserver.md`.\n\n### Refs\n\n- Epic: https://github.com/neondatabase/neon/issues/9378\n- Prototyping happened during the Lisbon 2024 Offsite hackathon: https://github.com/neondatabase/neon/pull/9002\n- Main implementation PR with good description: https://github.com/neondatabase/neon/issues/9378\n\nDesign and implementation by:\n- Vlad Lazar <vlad@neon.tech>\n- Christian Schwarz <christian@neon.tech>\n\n## Background & Motivation\n\nThe Pageserver read path (`Timeline::get_vectored`) consists of two high-level steps:\n- Retrieve the delta and image `Value`s required to reconstruct the requested Page@LSN (`Timeline::get_values_reconstruct_data`).\n- Pass these values to walredo to reconstruct the page images.\n\nThe read path used to be single-key but has been made multi-key some time ago.\n([Internal tech talk by Vlad](https://drive.google.com/file/d/1vfY24S869UP8lEUUDHRWKF1AJn8fpWoJ/view?usp=drive_link))\nHowever, for simplicity, most of this doc will explain things in terms of a single key being requested.\n\nThe `Value` retrieval step above can be broken down into the following functions:\n- **Traversal** of the layer map to figure out which `Value`s from which layer files are required for the page reconstruction.\n- **Read IO Planning**: planning of the read IOs that need to be issued to the layer files / filesystem / disk.\n  The main job here is to coalesce the small value reads into larger filesystem-level read operations.\n  This layer also takes care of direct IO alignment and size-multiple requirements (cf the RFC for details.)\n  Check `struct VectoredReadPlanner` and `mod vectored_dio_read` for how it's done.\n- **Perform the read IO** using `tokio-epoll-uring`.\n\nBefore this project, above functions were sequentially interleaved, meaning:\n1. we would advance traversal, ...\n2. discover, that we need to read a value, ...\n3. read it from disk using `tokio-epoll-uring`, ...\n4. goto 1 unless we're done.\n\nThis meant that if N `Value`s need to be read to reconstruct a page,\nthe time we spend waiting for disk will be we `random_read_io_latency * O(number_of_values)`.\n\n## Design\n\nThe **traversal** and **read IO Planning** jobs still happen sequentially, layer by layer, as before.\nBut instead of performing the read IOs inline, we submit the IOs to a concurrent tokio task for execution.\nAfter the last read from the last layer is submitted, we wait for the IOs to complete.\n\nAssuming the filesystem / disk is able to actually process the submitted IOs without queuing,\nwe arrive at _time spent waiting for disk_ ~ `random_read_io_latency * O(1 + traversal)`.\n\nNote this whole RFC is concerned with the steady state where all layer files required for reconstruction are resident on local NVMe.\nTraversal will stall on on-demand layer download if a layer is not yet resident.\nIt cannot proceed without the layer being resident beccause its next step depends on the contents of the layer index.\n\n### Avoiding Waiting For IO During Traversal\n\nThe `traversal` component in above time-spent-waiting-for-disk estimation is dominant and needs to be minimized.\n\nBefore this project, traversal needed to perform IOs for the following:\n1. The time we are waiting on PS PageCache to page in the visited layers' disk btree index blocks.\n2. When visiting a delta layer, reading the data block that contains a `Value` for a requested key,\n   to determine whether the `Value::will_init` the page and therefore traversal can stop for this key.\n\nThe solution for (1) is to raise the PS PageCache size such that the hit rate is practically 100%.\n(Check out the `Background: History Of Caching In Pageserver` section in the RFC on Direct IO for more details.)\n\nThe solution for (2) is source `will_init` from the disk btree index keys, which fortunately\nalready encode this bit of information since the introduction of the current storage/layer format.\n\n### Concurrent IOs, Submission & Completion\n\nTo separate IO submission from waiting for its completion,\nwe introduce the notion of an `IoConcurrency` struct through which IOs are issued.\n\nAn IO is an opaque future that\n- captures the `tx` side of a `oneshot` channel\n- performs the read IO by calling `VirtualFile::read_exact_at().await`\n- sending the result into the `tx`\n\nIssuing an IO means `Box`ing the future above and handing that `Box` over to the `IoConcurrency` struct.\n\nThe traversal code that submits the IO stores the the corresponding `oneshot::Receiver`\nin the `VectoredValueReconstructState`, in the the place where we previously stored\nthe sequentially read `img` and `records` fields.\n\nWhen we're done with traversal, we wait for all submitted IOs:\nfor each key, there is a future that awaits all the `oneshot::Receiver`s\nfor that key, and then calls into walredo to reconstruct the page image.\nWalredo is now invoked concurrently for each value instead of sequentially.\nWalredo itself remains unchanged.\n\nThe spawned IO futures are driven to completion by a sidecar tokio task that\nis separate from the task that performs all the layer visiting and spawning of IOs.\nThat tasks receives the IO futures via an unbounded mpsc channel and\ndrives them to completion inside a `FuturedUnordered`.\n\n### Error handling, Panics, Cancellation-Safety\n\nThere are two error classes during reconstruct data retrieval:\n* traversal errors: index lookup, move to next layer, and the like\n* value read IO errors\n\nA traversal error fails the entire `get_vectored` request, as before this PR.\nA value read error only fails reconstruction of that value.\n\nPanics and dropping of the `get_vectored` future before it completes\nleaves the sidecar task running and does not cancel submitted IOs\n(see next section for details on sidecar task lifecycle).\nAll of this is safe, but, today's preference in the team is to close out\nall resource usage explicitly if possible, rather than cancelling + forgetting\nabout it on drop. So, there is warning if we drop a\n`VectoredValueReconstructState`/`ValuesReconstructState` that still has uncompleted IOs.\n\n### Sidecar Task Lifecycle\n\nThe sidecar tokio task is spawned as part of the `IoConcurrency::spawn_from_conf` struct.\nThe `IoConcurrency` object acts as a handle through which IO futures are submitted.\n\nThe spawned tokio task holds the `Timeline::gate` open.\nIt is _not_ sensitive to `Timeline::cancel`, but instead to the `IoConcurrency` object being dropped.\n\nOnce the `IoConcurrency` struct is dropped, no new IO futures can come in\nbut already submitted IO futures will be driven to completion regardless.\nWe _could_ safely stop polling these futures because `tokio-epoll-uring` op futures are cancel-safe.\nBut the underlying kernel and hardware resources are not magically freed up by that.\nSo, again, in the interest of closing out all outstanding resource usage, we make timeline shutdown wait for sidecar tasks and their IOs to complete.\nUnder normal conditions, this should be in the low hundreds of microseconds.\n\nIt is advisable to make the `IoConcurrency` as long-lived as possible to minimize the amount of\ntokio task churn (=> lower pressure on tokio). Generally this means creating it \"high up\" in the call stack.\nThe pain with this is that the `IoConcurrency` reference needs to be propagated \"down\" to\nthe (short-lived) functions/scope where we issue the IOs.\nWe would like to use `RequestContext` for this propagation in the future (issue [here](https://github.com/neondatabase/neon/issues/10460)).\nFor now, we just add another argument to the relevant code paths.\n\n### Feature Gating\n\nThe `IoConcurrency` is an `enum` with two variants: `Sequential` and `SidecarTask`.\n\nThe behavior from before this project is available through `IoConcurrency::Sequential`,\nwhich awaits the IO futures in place, without \"spawning\" or \"submitting\" them anywhere.\n\nThe `get_vectored_concurrent_io` pageserver config variable determines the runtime value,\n**except** for the places that use `IoConcurrency::sequential` to get an `IoConcurrency` object.\n\n### Alternatives Explored & Caveats Encountered\n\nA few words on the rationale behind having a sidecar *task* and what\nalternatives were considered but abandoned.\n\n#### Why We Need A Sidecar *Task* / Why Just `FuturesUnordered` Doesn't Work\n\nWe explored to not have a sidecar task, and instead have a `FuturesUnordered` per\n`Timeline::get_vectored`. We would queue all IO futures in it and poll it for the\nfirst time after traversal is complete (i.e., at `collect_pending_ios`).\n\nThe obvious disadvantage, but not showstopper, is that we wouldn't be submitting\nIOs until traversal is complete.\n\nThe showstopper however, is that deadlocks happen if we don't drive the\nIO futures to completion independently of the traversal task.\nThe reason is that both the IO futures and the traversal task may hold _some_,\n_and_ try to acquire _more_, shared limited resources.\nFor example, both the travseral task and IO future may try to acquire\n* a `VirtualFile` file descriptor cache slot async mutex (observed during impl)\n* a `tokio-epoll-uring` submission slot (observed during impl)\n* a `PageCache` slot (currently this is not the case but we may move more code into the IO futures in the future)\n\n#### Why We Don't Do `tokio::task`-per-IO-future\n\nAnother option is to spawn a short-lived `tokio::task` for each IO future.\nWe implemented and benchmarked it during development, but found little\nthroughput improvement and moderate mean & tail latency degradation.\nConcerns about pressure on the tokio scheduler led us to abandon this variant.\n\n## Future Work\n\nIn addition to what is listed here, also check the \"Punted\" list in the epic:\nhttps://github.com/neondatabase/neon/issues/9378\n\n### Enable `Timeline::get`\n\nThe only major code path that still uses `IoConcurrency::sequential` is `Timeline::get`.\nThe impact is that roughly the following parts of pageserver do not benefit yet:\n- parts of basebackup\n- reads performed by the ingest path\n- most internal operations that read metadata keys (e.g. `collect_keyspace`!)\n\nThe solution is to propagate `IoConcurrency` via `RequestContext`:https://github.com/neondatabase/neon/issues/10460\n\nThe tricky part is to figure out at which level of the code the `IoConcurrency` is spawned (and added to the RequestContext).\n\nAlso, propagation via `RequestContext` makes makes it harder to tell during development whether a given\npiece of code uses concurrent vs sequential mode: one has to recurisvely walk up the call tree to find the\nplace that puts the `IoConcurrency` into the `RequestContext`.\nWe'd have to use `::Sequential` as the conservative default value in a fresh `RequestContext`, and add some\nobservability to weed out places that fail to enrich with a properly spanwed `IoConcurrency::spawn_from_conf`.\n\n### Concurrent On-Demand Downloads enabled by Detached Indices\n\nAs stated earlier, traversal stalls on on-demand download because its next step depends on the contents of the layer index.\nOnce we have separated indices from data blocks (=> https://github.com/neondatabase/neon/issues/11695)\nwe will only need to stall if the index is not resident. The download of the data blocks can happen concurrently or in the background. For example:\n- Move the `Layer::get_or_maybe_download().await` inside the IO futures.\n  This goes in the opposite direction of the next \"future work\" item below, but it's easy to do.\n- Serve the IO future directly from object storage and dispatch the layer download\n  to some other actor, e.g., an actor that is responsible for both downloads & eviction.\n\n### New `tokio-epoll-uring` API That Separates Submission & Wait-For-Completion\n\nInstead of `$op().await` style API, it would be useful to have a different `tokio-epoll-uring` API\nthat separates enqueuing (without necessarily `io_uring_enter`ing the kernel each time), submission,\nand then wait for completion.\n\nThe `$op().await` API is too opaque, so we _have_ to stuff it into a `FuturesUnordered`.\n\nA split API as sketched above would allow traversal to ensure an IO operation is enqueued to the kernel/disk (and get back-pressure iff the io_uring squeue is full).\nWhile avoiding spending of CPU cycles on processing of completions while we're still traversing.\n\nThe idea gets muddied by the fact that we may self-deadlock if we submit too much without completing.\nSo, the submission part of the split API needs to process completions if squeue is full.\n\nIn any way, this split API is precondition for the bigger issue with the design presented here,\nwhich we dicsuss in the next section.\n\n### Opaque Futures Are Brittle\n\nThe use of opaque futures to represent submitted IOs is a clever hack to minimize changes & allow for near-perfect feature-gating.\nHowever, we take on **brittleness** because callers must guarantee that the submitted futures are independent.\nBy our experience, it is non-trivial to identify or rule out the interdependencies.\nSee the lengthy doc comment on the `IoConcurrency::spawn_io` method for more details.\n\nThe better interface and proper subsystem boundary is a _descriptive_ struct of what needs to be done (\"read this range from this VirtualFile into this buffer\")\nand get back a means to wait for completion.\nThe subsystem can thereby reason by its own how operations may be related;\nunlike today, where the submitted opaque future can do just about anything.\n"
  },
  {
    "path": "docs/rfcs/2025-07-07-node-deletion-api-improvement.md",
    "content": "# Node deletion API improvement\n\nCreated on 2025-07-07\nImplemented on _TBD_\n\n## Summary\n\nThis RFC describes improvements to the storage controller API for gracefully deleting pageserver\nnodes.\n\n## Motivation\n\nThe basic node deletion API introduced in [#8226](https://github.com/neondatabase/neon/issues/8333)\nhas several limitations:\n\n- Deleted nodes can re-add themselves if they restart (e.g., a flaky node that keeps restarting and\nwe cannot reach via SSH to stop the pageserver). This issue has been resolved by tombstone\nmechanism in [#12036](https://github.com/neondatabase/neon/issues/12036)\n- Process of node deletion is not graceful, i.e. it just imitates a node failure\n\nIn this context, \"graceful\" node deletion means that users do not experience any disruption or\nnegative effects, provided the system remains in a healthy state (i.e., the remaining pageservers\ncan handle the workload and all requirements are met). To achieve this, the system must perform\nlive migration of all tenant shards from the node being deleted while the node is still running\nand continue processing all incoming requests. The node is removed only after all tenant shards\nhave been safely migrated.\n\nAlthough live migrations can be achieved with the drain functionality, it leads to incorrect shard\nplacement, such as not matching availability zones. This results in unnecessary work to optimize\nthe placement that was just recently performed.\n\nIf we delete a node before its tenant shards are fully moved, the new node won't have all the\nneeded data (e.g. heatmaps) ready. This means user requests to the new node will be much slower at\nfirst. If there are many tenant shards, this slowdown affects a huge amount of users.\n\nGraceful node deletion is more complicated and can introduce new issues. It takes longer because\nlive migration of each tenant shard can last several minutes. Using non-blocking accessors may\nalso cause deletion to wait if other processes are holding inner state lock. It also gets trickier\nbecause we need to handle other requests, like drain and fill, at the same time.\n\n## Impacted components (e.g. pageserver, safekeeper, console, etc)\n\n- storage controller\n- pageserver (indirectly)\n\n## Proposed implementation\n\n### Tombstones\n\nTo resolve the problem of deleted nodes re-adding themselves, a tombstone mechanism was introduced\nas part of the node stored information. Each node has a separate `NodeLifecycle` field with two\npossible states: `Active` and `Deleted`. When node deletion completes, the database row is not\ndeleted but instead has its `NodeLifecycle` column switched to `Deleted`. Nodes with `Deleted`\nlifecycle are treated as if the row is absent for most handlers, with several exceptions: reattach\nand register functionality must be aware of tombstones. Additionally, new debug handlers are\navailable for listing and deleting tombstones via the `/debug/v1/tombstone` path.\n\n### Gracefulness\n\nThe problem of making node deletion graceful is complex and involves several challenges:\n\n- **Cancellable**: The operation must be cancellable to allow administrators to abort the process\nif needed, e.g. if run by mistake.\n- **Non-blocking**: We don't want to block deployment operations like draining/filling on the node\ndeletion process. We need clear policies for handling concurrent operations: what happens when a\ndrain/fill request arrives while deletion is in progress, and what happens when a delete request\narrives while drain/fill is in progress.\n- **Persistent**: If the storage controller restarts during this long-running operation, we must\npreserve progress and automatically resume the deletion process after the storage controller\nrestarts.\n- **Migrated correctly**: We cannot simply use the existing drain mechanism for nodes scheduled\nfor deletion, as this would move shards to irrelevant locations. The drain process expects the\nnode to return, so it only moves shards to backup locations, not to their preferred AZs. It also\nleaves secondary locations unmoved. This could result in unnecessary load on the storage\ncontroller and inefficient resource utilization.\n- **Force option**: Administrators need the ability to force immediate, non-graceful deletion when\ntime constraints or emergency situations require it, bypassing the normal graceful migration\nprocess.\n\nSee below for a detailed breakdown of the proposed changes and mechanisms.\n\n#### Node lifecycle\n\nNew `NodeLifecycle` enum and a matching database field with these values:\n- `Active`: The normal state. All operations are allowed.\n- `ScheduledForDeletion`: The node is marked to be deleted soon. Deletion may be in progress or\nwill happen later, but the node will eventually be removed. All operations are allowed.\n- `Deleted`: The node is fully deleted. No operations are allowed, and the node cannot be brought\nback. The only action left is to remove its record from the database. Any attempt to register a\nnode in this state will fail.\n\nThis state persists across storage controller restarts.\n\n**State transition**\n```\n        +--------------------+\n    +---|       Active       |<---------------------+\n    |   +--------------------+                      |\n    |                     ^                         |\n    | start_node_delete   | cancel_node_delete      |\n    v                     |                         |\n  +----------------------------------+              |\n  |       ScheduledForDeletion       |              |\n  +----------------------------------+              |\n       |                                            |\n       |                              node_register |\n       |                                            |\n       | delete_node (at the finish)                |\n       |                                            |\n       v                                            |\n  +---------+         tombstone_delete        +----------+\n  | Deleted |-------------------------------->|  no row  |\n  +---------+                                 +----------+\n```\n\n#### NodeSchedulingPolicy::Deleting\n\nA `Deleting` variant to the `NodeSchedulingPolicy` enum. This means the deletion function is\nrunning for the node right now. Only one node can have the `Deleting` policy at a time.\n\nThe `NodeSchedulingPolicy::Deleting` state is persisted in the database. However, after a storage\ncontroller restart, any node previously marked as `Deleting` will have its scheduling policy reset\nto `Pause`. The policy will only transition back to `Deleting` when the deletion operation is\nactively started again, as triggered by the node's `NodeLifecycle::ScheduledForDeletion` state.\n\n`NodeSchedulingPolicy` transition details:\n1. When `node_delete` begins, set the policy to `NodeSchedulingPolicy::Deleting`.\n2. If `node_delete` is cancelled (for example, due to a concurrent drain operation), revert the\npolicy to its previous value. The policy is persisted in storcon DB.\n3. After `node_delete` completes, the final value of the scheduling policy is irrelevant, since\n`NodeLifecycle::Deleted` prevents any further access to this field.\n\nThe deletion process cannot be initiated for nodes currently undergoing deployment-related\noperations (`Draining`, `Filling`, or `PauseForRestart` policies). Deletion will only be triggered\nonce the node transitions to either the `Active` or `Pause` state.\n\n#### OperationTracker\n\nA replacement for `Option<OperationHandler> ongoing_operation`, the `OperationTracker` is a\ndedicated service state object responsible for managing all long-running node operations (drain,\nfill, delete) with robust concurrency control.\n\nKey responsibilities:\n- Orchestrates the execution of operations\n- Supports cancellation of currently running operations\n- Enforces operation constraints, e.g. allowing only single drain/fill operation at a time\n- Persists deletion state, enabling recovery of pending deletions across restarts\n- Ensures thread safety across concurrent requests\n\n#### Attached tenant shard processing\n\nWhen deleting a node, handle each attached tenant shard as follows:\n\n1. Pick the best node to become the new attached (the candidate).\n2. If the candidate already has this shard as a secondary:\n    - Create a new secondary for the shard on another suitable node.\n   Otherwise:\n    - Create a secondary for the shard on the candidate node.\n3. Wait until all secondaries are ready and pre-warmed.\n4. Promote the candidate's secondary to attached.\n5. Remove the secondary from the node being deleted.\n\nThis process safely moves all attached shards before deleting the node.\n\n#### Secondary tenant shard processing\n\nWhen deleting a node, handle each secondary tenant shard as follows:\n\n1. Choose the best node to become the new secondary.\n2. Create a secondary for the shard on that node.\n3. Wait until the new secondary is ready.\n4. Remove the secondary from the node being deleted.\n\nThis ensures all secondary shards are safely moved before deleting the node.\n\n### Reliability, failure modes and corner cases\n\nIn case of a storage controller failure and following restart, the system behavior depends on the\n`NodeLifecycle` state:\n\n- If `NodeLifecycle` is `Active`: No action is taken for this node.\n- If `NodeLifecycle` is `Deleted`: The node will not be re-added.\n- If `NodeLifecycle` is `ScheduledForDeletion`: A deletion background task will be launched for\nthis node.\n\nIn case of a pageserver node failure during deletion, the behavior depends on the `force` flag:\n- If `force` is set: The node deletion will proceed regardless of the node's availability.\n- If `force` is not set: The deletion will be retried a limited number of times. If the node\nremains unavailable, the deletion process will pause and automatically resume when the node\nbecomes healthy again.\n\n### Operations concurrency\n\nThe following sections describe the behavior when different types of requests arrive at the storage\ncontroller and how they interact with ongoing operations.\n\n#### Delete request\n\nHandler: `PUT /control/v1/node/:node_id/delete`\n\n1. If node lifecycle is `NodeLifecycle::ScheduledForDeletion`:\n    - Return `200 OK`: there is already an ongoing deletion request for this node\n2. Update & persist lifecycle to `NodeLifecycle::ScheduledForDeletion`\n3. Persist current scheduling policy\n4. If there is no active operation (drain/fill/delete):\n    - Run deletion process for this node\n\n#### Cancel delete request\n\nHandler: `DELETE /control/v1/node/:node_id/delete`\n\n1. If node lifecycle is not `NodeLifecycle::ScheduledForDeletion`:\n    - Return `404 Not Found`: there is no current deletion request for this node\n2. If the active operation is deleting this node, cancel it\n3. Update & persist lifecycle to `NodeLifecycle::Active`\n4. Restore the last scheduling policy from persistence\n\n#### Drain/fill request\n\n1. If there are already ongoing drain/fill processes:\n    - Return `409 Conflict`: queueing of drain/fill processes is not supported\n2. If there is an ongoing delete process:\n    - Cancel it and wait until it is cancelled\n3. Run the drain/fill process\n4. After the drain/fill process is cancelled or finished:\n    - Try to find another candidate to delete and run the deletion process for that node\n\n#### Drain/fill cancel request\n\n1. If the active operation is not the related process:\n    - Return `400 Bad Request`: cancellation request is incorrect, operations are not the same\n2. Cancel the active operation\n3. Try to find another candidate to delete and run the deletion process for that node\n\n## Definition of Done\n\n- [x] Fix flaky node scenario and introduce related debug handlers\n- [ ] Node deletion intent is persistent - a node will be eventually deleted after a deletion\nrequest regardless of draining/filling requests and restarts\n- [ ] Node deletion can be graceful - deletion completes only after moving all tenant shards to\nrecommended locations\n- [ ] Deploying does not break due to long deletions - drain/fill operations override deletion\nprocess and deletion resumes after drain/fill completes\n- [ ] `force` flag is implemented and provides fast, failure-tolerant node removal (e.g., when a\npageserver node does not respond)\n- [ ] Legacy delete handler code is removed from storage_controller, test_runner, and storcon_cli\n"
  },
  {
    "path": "docs/rfcs/README.md",
    "content": "# Neon RFCs\n\n## Overview\n\nThis directory contains Request for Comments documents, or RFCs, for\nfeatures or concepts that have been proposed. Alternative names:\ntechnical design doc, ERD, one-pager\n\nTo make a new proposal, create a new text file in this directory and\nopen a Pull Request with it. That gives others a chance and a forum\nto comment and discuss the design.\n\nWhen a feature is implemented and the code changes are committed, also\ninclude the corresponding RFC in this directory.\n\nSome of the RFCs in this directory have been implemented in some form\nor another, while others are on the roadmap, while still others are\njust obsolete and forgotten about. So read them with a grain of salt,\nbut hopefully even the ones that don't reflect reality give useful\ncontext information.\n\n## What\n\nWe use Tech Design RFC’s to summarize what we are planning to\nimplement in our system. These RFCs should be created for large or not\nobvious technical tasks, e.g. changes of the architecture or bigger\ntasks that could take over a week, changes that touch multiple\ncomponents or their interaction. RFCs should fit into a couple of\npages, but could be longer on occasion.\n\n## Why\n\nWe’re using RFCs to enable early review and collaboration, reduce\nuncertainties, risk and save time during the implementation phase that\nfollows the Tech Design RFC.\n\nTech Design RFCs also aim to avoid bus factor and are an additional\nmeasure to keep more peers up to date & familiar with our design and\narchitecture.\n\nThis is a crucial part for ensuring collaboration across timezones and\nsetting up for success a distributed team that works on complex\ntopics.\n\n## Prior art\n\n- Rust: [https://github.com/rust-lang/rfcs/blob/master/0000-template.md](https://github.com/rust-lang/rfcs/blob/master/0000-template.md)\n- React.js: [https://github.com/reactjs/rfcs/blob/main/0000-template.md](https://github.com/reactjs/rfcs/blob/main/0000-template.md)\n- Google fuchsia: [https://fuchsia.dev/fuchsia-src/contribute/governance/rfcs/TEMPLATE](https://fuchsia.dev/fuchsia-src/contribute/governance/rfcs/TEMPLATE)\n- Apache: [https://cwiki.apache.org/confluence/display/GEODE/RFC+Template](https://cwiki.apache.org/confluence/display/GEODE/RFC+Template) / [https://cwiki.apache.org/confluence/display/GEODE/Lightweight+RFC+Process](https://cwiki.apache.org/confluence/display/GEODE/Lightweight+RFC+Process)\n\n## How\n\nRFC lifecycle:\n\n- Should be submitted in a pull request with and full RFC text in a committed markdown file and copy of the Summary and Motivation sections also included in the PR body.\n- RFC should be published for review before most of the actual code is written. This isn’t a strict rule, don’t hesitate to experiment and build a POC in parallel with writing an RFC.\n- Add labels to the PR in the same manner as you do Issues. Example TBD\n- Request the review from your peers. Reviewing the RFCs from your peers is a priority, same as reviewing the actual code.\n- The Tech Design RFC should evolve based on the feedback received and further during the development phase if problems are discovered with the taken approach\n- RFCs stop evolving once the consensus is found or the proposal is implemented and merged.\n- RFCs are not intended as a documentation that’s kept up to date **after** the implementation is finished. Do not update the Tech Design RFC when merged functionality evolves later on. In such situation a new RFC may be appropriate.\n\n### RFC template\n\nUse template with `YYYY-MM-DD-copy-me.md` as a starting point. Timestamp prefix helps to avoid awkward 'id' collisions.\n\n```sh\ncp docs/rfcs/YYYY-MM-DD-copy-me.md docs/rfcs/$(date +\"%Y-%m-%d\")-<name>.md\n```\n\nNote, a lot of the sections are marked as ‘if relevant’. They are included into the template as a reminder and to help inspiration.\n"
  },
  {
    "path": "docs/rfcs/YYYY-MM-DD-copy-me.md",
    "content": "# Name\n\nCreated on YYYY-MM-DD\nImplemented on _TBD_\n\n## Summary\n\n## Motivation\n\n## Non Goals (if relevant)\n\n## Impacted components (e.g. pageserver, safekeeper, console, etc)\n\n## Proposed implementation\n\n### Reliability, failure modes and corner cases (if relevant)\n\n### Interaction/Sequence diagram (if relevant)\n\n### Scalability (if relevant)\n\n### Security implications (if relevant)\n\n### Unresolved questions (if relevant)\n\n## Alternative implementation (if relevant)\n\n## Pros/cons of proposed approaches (if relevant)\n\n## Definition of Done (if relevant)\n"
  },
  {
    "path": "docs/safekeeper-protocol.md",
    "content": "# WAL proposer-safekeeper communication consensus protocol.\n\n## General requirements and architecture\n\nThere is single stateless master and several safekeepers. Number of safekeepers is determined by redundancy level.\nTo minimize number of changes in Postgres core, we are using standard streaming replication from master (through WAL sender).\nThis replication stream is initiated by the WAL proposer process that runs in the PostgreSQL server, which broadcasts the WAL generated by PostgreSQL to safekeepers.\nTo provide durability we use synchronous replication at master (response to the commit statement is sent to the client\nonly when acknowledged by WAL receiver). WAL proposer sends this acknowledgment only when LSN of commit record is confirmed by quorum of safekeepers.\n\nWAL proposer tries to establish connections with safekeepers.\nAt any moment of time each safekeeper can serve exactly once proposer, but it can accept new connections.\n\nAny of safekeepers can be used as WAL server, producing replication stream. So both `Pagers` and `Replicas`\n(read-only computation nodes) can connect to safekeeper to receive WAL stream. Safekeepers is streaming WAL until\nit reaches min(`commitLSN`,`flushLSN`). Then replication is suspended until new data arrives from master.\n\n\n## Handshake\nThe goal of handshake is to collect quorum (to be able to perform recovery)\nand avoid split-brains caused by simultaneous presence of old and new master.\nProcedure of handshake consists of the following steps:\n\n1. Broadcast information about server to all safekeepers (wal segment size, system_id,...)\n2. Receive responses with information about safekeepers.\n3. Once quorum of handshake responses are received, propose new `NodeId(max(term)+1, server.uuid)`\nto all of them.\n4. On receiving proposed nodeId, safekeeper compares it with locally stored nodeId and if it is greater or equals\nthen accepts proposed nodeId and persists this choice in the local control file.\n5. If quorum of safekeepers approve proposed nodeId, then server assumes that handshake is successfully completed and switch to recovery stage.\n\n## Recovery\nProposer computes max(`restartLSN`) and max(`flushLSN`) from quorum of attached safekeepers.\n`RestartLSN` - is position in WAL which is known to be delivered to all safekeepers.\nIn other words: `restartLSN` can be also considered as cut-off horizon (all preceding WAL segments can be removed).\n`FlushLSN` is position flushed by safekeeper to the local persistent storage.\n\nIf max(`restartLSN`) != max(`flushLSN`), then recovery has to be performed.\nProposer creates replication channel with most advanced safekeeper (safekeeper with the largest `flushLSN`).\nThen it downloads all WAL messages between max(`restartLSN`)..max(`flushLSN`).\nMessages are inserted in L1-list (ordered by LSN). Then we locate position of each safekeeper in this list according\nto their `flushLSN`s. Safekeepers that are not yet connected (out of quorum) should start from the beginning of the list\n(corresponding to `restartLSN`).\n\nWe need to choose max(`flushLSN`) because voting quorum may be different from quorum committed the last message.\nSo we do not know whether records with max(`flushLSN`) was committed by quorum or not. So we have to consider it committed\nto avoid loose of committed data.\n\nCalculated max(`flushLSN`) is called `VCL` (Volume Complete LSN). As far as it is chosen among quorum, there may be some other offline safekeeper with larger\n`VCL`. Once it becomes online, we need to overwrite its WAL beyond `VCL`. To support it, each safekeeper maintains\n`epoch` number. `Epoch` plays almost the same role as `term`, but algorithm of `epoch` bumping is different.\n`VCL` and new epoch are received by safekeeper from proposer during voting.\nBut safekeeper doesn't switch to new epoch immediately after voting.\nInstead of it, safekeepers waits record with LSN > Max(`flushLSN`,`VCL`) is received.\nIt means that we restore all records from old generation and switch to new generation.\nWhen proposer calculates max(`FlushLSN`), it first compares `Epoch`. So actually we compare (`Epoch`,`FlushLSN`) pairs.\n\nLet's looks at the examples. Consider that we have three safekeepers: S1, S2, S3. Si(N) means that i-th safekeeper has epoch=N.\nRi(x) - WAL record for resource X with LSN=i. Assume that we have the following state:\n\n```\nS1(1): R1(a)\nS2(1): R1(a),R2(b)\nS3(1): R1(a),R2(b),R3(c),R4(d)  - offline\n```\n\nProposer choose quorum (S1,S2). VCL for them is 2. We download S2 to proposer and schedule its write to S1.\nAfter receiving record R5 the picture can be:\n\n```\nS1(2): R1(a),R2(b),R3(e)\nS2(2): R1(a),R2(b),R3(e)\nS3(1): R1(a),R2(b),R3(c),R4(d)  - offline\n```\n\nNow if server is crashed or restarted, we perform new voting and\ndoesn't matter which quorum we choose: (S1,S2), (S2,S3)...\nin any case VCL=3, because S3 has smaller epoch.\nR3(c) will be overwritten with R3(e):\n\n```\nS1(3): R1(a),R2(b),R3(e)\nS2(3): R1(a),R2(b),R3(e)\nS3(1): R1(a),R2(b),R3(e),R4(d)\n```\n\nEpoch of S3 will be adjusted once it overwrites R4:\n\n```\nS1(3): R1(a),R2(b),R3(e),R4(f)\nS2(3): R1(a),R2(b),R3(e),R4(f)\nS3(3): R1(a),R2(b),R3(e),R4(f)\n```\n\nCrash can happen before epoch was bumped. Let's return back to the initial position:\n\n```\nS1(1): R1(a)\nS2(1): R1(a),R2(b)\nS3(1): R1(a),R2(b),R3(c),R4(d)  - offline\n```\n\nAssume that we start recovery:\n\n```\nS1(1): R1(a),R2(b)\nS2(1): R1(a),R2(b)\nS3(1): R1(a),R2(b),R3(c),R4(d)  - offline\n```\n\nand then crash happens. During voting we choose quorum (S3,S3).\nNow them belong to the same epoch and S3 is most advanced among them.\nSo VCL is set to 4 and we recover S1 and S2 from S3:\n\n```\nS1(1): R1(a),R2(b),R3(c),R4(d)\nS2(1): R1(a),R2(b),R3(c),R4(d)\nS3(1): R1(a),R2(b),R3(c),R4(d)\n```\n\n## Main loop\nOnce recovery is completed, proposer switches to normal processing loop: it receives WAL stream from Postgres and appends WAL\nmessages to the list. At the same time it tries to push messages to safekeepers. Each safekeeper is associated\nwith some element in message list and once it acknowledged receiving of the message, position is moved forward.\nEach queue element contains acknowledgment mask, which bits corresponds to safekeepers.\nOnce all safekeepers acknowledged receiving of this message (by setting correspondent bit),\nthen element can be removed from queue and `restartLSN` is advanced forward.\n\nProposer maintains `restartLSN` and `commitLSN` based on the responses received by safekeepers.\n`RestartLSN` equals to the LSN of head message in the list. `CommitLSN` is `flushLSN[nSafekeepers-Quorum]` element\nin ordered array with `flushLSN`s of safekeepers. `CommitLSN` and `RestartLSN` are included in requests\nsent from proposer to safekeepers and stored in safekeepers control file.\nTo avoid overhead of extra fsync, this control file is not fsynced on each request. Flushing this file is performed\nperiodically, which means that `restartLSN`/`commitLSN` stored by safekeeper may be slightly deteriorated.\nIt is not critical because may only cause redundant processing of some WAL record.\nAnd `FlushLSN` is recalculated after node restart by scanning local WAL files.\n\n## Fault tolerance\nIf the WAL proposer process looses connection to safekeeper it tries to reestablish this connection using the same nodeId.\n\nRestart of PostgreSQL initiates new round of voting and switching new epoch.\n\n## Limitations\nRight now message queue is maintained in main memory and is not spilled to the disk.\nIt can cause memory overflow in case of presence of lagging safekeepers.\nIt is assumed that in case of losing local data by some safekeepers, it should be recovered using some external mechanism.\n\n\n## Glossary\n* `CommitLSN`: position in WAL confirmed by quorum safekeepers.\n* `RestartLSN`: position in WAL confirmed by all safekeepers.\n* `FlushLSN`: part of WAL persisted to the disk by safekeeper.\n* `NodeID`: pair (term,UUID)\n* `Pager`: Neon component restoring pages from WAL stream\n* `Replica`: read-only computation node\n* `VCL`: the largest LSN for which we can guarantee availability of all prior records.\n\n## Algorithm\n\n```python\nprocess WalProposer(safekeepers,server,curr_epoch,restart_lsn=0,message_queue={},feedbacks={})\n    function do_recovery(epoch,restart_lsn,VCL)\n        leader = i:safekeepers[i].state.epoch=epoch and safekeepers[i].state.flushLsn=VCL\n        wal_stream = safekeepers[leader].start_replication(restart_lsn,VCL)\n        do\n            message = wal_stream.read()\n            message_queue.append(message)\n        while message.startPos < VCL\n\n        for i in 1..safekeepers.size()\n            for message in message_queue\n                if message.endLsn < safekeepers[i].state.flushLsn\n                    message.delivered += i\n                else\n                    send_message(i, message)\n                    break\n    end function\n\n    function send_message(i,msg)\n        msg.restartLsn = restart_lsn\n        msg.commitLsn = get_commit_lsn()\n        safekeepers[i].send(msg, response_handler)\n    end function\n\n    function do_broadcast(message)\n        for i in 1..safekeepers.size()\n            if not safekeepers[i].sending()\n                send_message(i, message)\n    end function\n\n    function get_commit_lsn()\n        sorted_feedbacks = feedbacks.sort()\n        return sorted_feedbacks[safekeepers.size() - quorum]\n    end function\n\n    function response_handler(i,message,response)\n        feedbacks[i] = if response.epoch=curr_epoch then response.flushLsn else VCL\n        server.write(get_commit_lsn())\n\n        message.delivered += i\n        next_message = message_queue.next(message)\n        if next_message\n            send_message(i, next_message)\n\n        while message_queue.head.delivered.size() = safekeepers.size()\n            if restart_lsn < message_queue.head.beginLsn\n                restart_lsn = message_queue.head.endLsn\n            message_queue.pop_head()\n    end function\n\n    server_info = server.read()\n\n    safekeepers.write(server_info)\n    safekeepers.state = safekeepers.read()\n    next_term = max(safekeepers.state.nodeId.term)+1\n    restart_lsn = max(safekeepers.state.restartLsn)\n    epoch,VCL = max(safekeepers.state.epoch,safekeepers.state.flushLsn)\n    curr_epoch = epoch + 1\n\n    proposal = Proposal(NodeId(next_term,server.id),curr_epoch,VCL)\n    safekeepers.send(proposal)\n    responses = safekeepers.read()\n    if any responses.is_rejected()\n        exit()\n\n    for i in 1..safekeepers.size()\n        feedbacks[i].flushLsn = if epoch=safekeepers[i].state.epoch then safekeepers[i].state.flushLsn else restart_lsn\n\n    if restart_lsn != VCL\n        do_recovery(epoch,restart_lsn,VCL)\n\n    wal_stream = server.start_replication(VCL)\n    for ever\n        message = wal_stream.read()\n        message_queue.append(message)\n        do_broadcast(message)\nend process\n\nprocess safekeeper(gateway,state)\n    function handshake()\n        proposer = gateway.accept()\n        server_info = proposer.read()\n        proposer.write(state)\n        proposal = proposer.read()\n        if proposal.nodeId < state.nodeId\n            proposer.write(rejected)\n            return null\n        else\n            state.nodeId = proposal.nodeId\n            state.proposed_epoch = proposal.epoch\n            state.VCL = proposal.VCL\n            write_control_file(state)\n            proposer.write(accepted)\n            return proposer\n    end function\n\n    state = read_control_file()\n    state.flushLsn = locate_end_of_wal()\n\n    for ever\n        proposer = handshake()\n        if not proposer\n            continue\n        for ever\n            req = proposer.read()\n            if req.nodeId != state.nodeId\n                break\n            save_wal_file(req.data)\n            state.restartLsn = req.restartLsn\n            if state.epoch < state.proposed_epoch and req.endPos > max(state.flushLsn,state.VCL)\n                state.epoch = state.proposed_epoch\n            if req.endPos > state.flushLsn\n                state.flushLsn = req.endPos\n            save_control_file(state)\n            resp = Response(state.epoch,req.endPos)\n            proposer.write(resp)\n            notify_wal_sender(Min(req.commitLsn,req.endPos))\nend process\n```\n"
  },
  {
    "path": "docs/separation-compute-storage.md",
    "content": "# Separation of Compute and Storage\n\nTODO:\n\n- Read path\n- Write path\n- Durability model\n- API auth\n"
  },
  {
    "path": "docs/settings.md",
    "content": "## Pageserver\n\nPageserver is mainly configured via a `pageserver.toml` config file.\nIf there's no such file during `init` phase of the server, it creates the file itself. Without 'init', the file is read.\n\nThere's a possibility to pass an arbitrary config value to the pageserver binary as an argument: such values override\nthe values in the config file, if any are specified for the same key and get into the final config during init phase.\n\n### Config example\n\n```toml\n# Initial configuration file created by 'pageserver --init'\nlisten_pg_addr = '127.0.0.1:64000'\nlisten_http_addr = '127.0.0.1:9898'\n\ncheckpoint_distance = '268435456' # in bytes\ncheckpoint_timeout = '10m'\n\ngc_period = '1 hour'\ngc_horizon = '67108864'\n\nmax_file_descriptors = '100'\n\n# initial superuser role name to use when creating a new tenant\ninitial_superuser_name = 'cloud_admin'\n\nbroker_endpoint = 'http://127.0.0.1:50051'\n\n# [remote_storage]\n```\n\nThe config above shows default values for all basic pageserver settings, besides `broker_endpoint`: that one has to be set by the user,\nsee the corresponding section below.\nPageserver uses default values for all files that are missing in the config, so it's not a hard error to leave the config blank.\nYet, it validates the config values it can (e.g. postgres install dir) and errors if the validation fails, refusing to start.\n\nNote the `[remote_storage]` section: it's a [table](https://toml.io/en/v1.0.0#table) in TOML specification and\n\n- either has to be placed in the config after the table-less values such as `initial_superuser_name = 'cloud_admin'`\n\n- or can be placed anywhere if rewritten in identical form as [inline table](https://toml.io/en/v1.0.0#inline-table): `remote_storage = {foo = 2}`\n\n### Config values\n\nAll values can be passed as an argument to the pageserver binary, using the `-c` parameter and specified as a valid TOML string. All tables should be passed in the inline form.\n\nExample: `${PAGESERVER_BIN} -c \"checkpoint_timeout = '10 m'\" -c \"remote_storage={local_path='/some/local/path/'}\"`\n\nNote that TOML distinguishes between strings and integers, the former require single or double quotes around them.\n\n#### broker_endpoint\n\nA storage broker endpoint to connect and pull the information from. Default is\n`'http://127.0.0.1:50051'`. \n\n#### checkpoint_distance\n\n`checkpoint_distance` is the amount of incoming WAL that is held in\nthe open layer, before it's flushed to local disk. It puts an upper\nbound on how much WAL needs to be re-processed after a pageserver\ncrash. It is a soft limit, the pageserver can momentarily go above it,\nbut it will trigger a checkpoint operation to get it back below the\nlimit.\n\n`checkpoint_distance` also determines how much WAL needs to be kept\ndurable in the safekeeper. The safekeeper must have capacity to hold\nthis much WAL, with some headroom, otherwise you can get stuck in a\nsituation where the safekeeper is full and stops accepting new WAL,\nbut the pageserver is not flushing out and releasing the space in the\nsafekeeper because it hasn't reached checkpoint_distance yet.\n\n`checkpoint_distance` also controls how often the WAL is uploaded to\nS3.\n\nThe unit is # of bytes.\n\n#### checkpoint_timeout\n\nApart from `checkpoint_distance`, open layer flushing is also triggered\n`checkpoint_timeout` after the last flush. This makes WAL eventually uploaded to\ns3 when activity is stopped.\n\nThe default is 10m.\n\n#### compaction_period\n\nEvery `compaction_period` seconds, the page server checks if\nmaintenance operations, like compaction, are needed on the layer\nfiles. Default is 1 s, which should be fine.\n\n#### compaction_target_size\n\nFile sizes for L0 delta and L1 image layers. Default is 128MB.\n\n#### gc_horizon\n\n`gz_horizon` determines how much history is retained, to allow\nbranching and read replicas at an older point in time. The unit is #\nof bytes of WAL. Page versions older than this are garbage collected\naway.\n\n#### gc_period\n\nInterval at which garbage collection is triggered. Default is 1 hour.\n\n#### image_creation_threshold\n\nL0 delta layer threshold for L1 image layer creation. Default is 3.\n\n#### pitr_interval\n\nWAL retention duration for PITR branching. Default is 7 days.\n\n#### walreceiver_connect_timeout\n\nTime to wait to establish the wal receiver connection before failing\n\n#### lagging_wal_timeout\n\nTime the pageserver did not get any WAL updates from safekeeper (if any).\nAvoids lagging pageserver preemptively by forcing to switch it from stalled connections.\n\n#### max_lsn_wal_lag\n\nDifference between Lsn values of the latest available WAL on safekeepers: if currently connected safekeeper starts to lag too long and too much,\nit gets swapped to the different one.\n\n#### initial_superuser_name\n\nName of the initial superuser role, passed to initdb when a new tenant\nis initialized. It doesn't affect anything after initialization. The\ndefault is Note: The default is 'cloud_admin', and the console\ndepends on that, so if you change it, bad things will happen.\n\n#### page_cache_size\n\nSize of the page cache. Unit is\nnumber of 8 kB blocks. The default is 8192, which means 64 MB.\n\n#### max_file_descriptors\n\nMax number of file descriptors to hold open concurrently for accessing\nlayer files. This should be kept well below the process/container/OS\nlimit (see `ulimit -n`), as the pageserver also needs file descriptors\nfor other files and for sockets for incoming connections.\n\n#### pg_distrib_dir\n\nA directory with Postgres installation to use during pageserver activities.\nSince pageserver supports several postgres versions, `pg_distrib_dir` contains\na subdirectory for each version with naming convention `v{PG_MAJOR_VERSION}/`.\nInside that dir, a `bin/postgres` binary should be present.\n\nThe default distrib dir is `./pg_install/`.\n\n#### workdir (-D)\n\nA directory in the file system, where pageserver will store its files.\nThe default is `./.neon/`.\n\nThis parameter has a special CLI alias (`-D`) and can not be overridden with regular `-c` way.\n\n##### Remote storage\n\nThere's a way to automatically back up and restore some of the pageserver's data from working dir to the remote storage.\nThe backup system is disabled by default and can be enabled for either of the currently available storages:\n\n###### Local FS storage\n\nPageserver can back up and restore some of its workdir contents to another directory.\nFor that, only a path to that directory needs to be specified as a parameter:\n\n```toml\n[remote_storage]\nlocal_path = '/some/local/path/'\n```\n\n###### S3 storage\n\nPageserver can back up and restore some of its workdir contents to S3.\nFull set of S3 credentials is needed for that as parameters.\nConfiguration example:\n\n```toml\n[remote_storage]\n# Name of the bucket to connect to\nbucket_name = 'some-sample-bucket'\n\n# Name of the region where the bucket is located at\nbucket_region = 'eu-north-1'\n\n# A \"subfolder\" in the bucket, to use the same bucket separately by multiple pageservers at once.\n# Optional, pageserver uses entire bucket if the prefix is not specified.\nprefix_in_bucket = '/some/prefix/'\n\n# S3 API query limit to avoid getting errors/throttling from AWS.\nconcurrency_limit = 100\n```\n\nIf no IAM bucket access is used during the remote storage usage, use the `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY` environment variables to set the access credentials.\n\n###### General remote storage configuration\n\nPageserver allows only one remote storage configured concurrently and errors if parameters from multiple different remote configurations are used.\nNo default values are used for the remote storage configuration parameters.\n\nBesides, there are parameters common for all types of remote storage that can be configured, those have defaults:\n\n```toml\n[remote_storage]\n# Max number of concurrent timeline synchronized (layers uploaded or downloaded) with the remote storage at the same time.\nmax_concurrent_syncs = 50\n\n# Max number of errors a single task can have before it's considered failed and not attempted to run anymore.\nmax_sync_errors = 10\n```\n\n## safekeeper\n\nTODO\n"
  },
  {
    "path": "docs/sourcetree.md",
    "content": "## Source tree layout\n\nBelow you will find a brief overview of each subdir in the source tree in alphabetical order.\n\n`storage_broker`:\n\nNeon storage broker, providing messaging between safekeepers and pageservers.\n[storage_broker.md](./storage_broker.md)\n\n`storage_controller`:\n\nNeon storage controller, manages a cluster of pageservers and exposes an API that enables\nmanaging a many-sharded tenant as a single entity.\n\n`/control_plane`:\n\nLocal control plane.\nFunctions to start, configure and stop pageserver and postgres instances running as a local processes.\nIntended to be used in integration tests and in CLI tools for local installations.\n\n`/docs`:\n\nDocumentation of the Neon features and concepts.\nNow it is mostly dev documentation.\n\n`/pageserver`:\n\nNeon storage service.\nThe pageserver has a few different duties:\n\n- Store and manage the data.\n- Generate a tarball with files needed to bootstrap ComputeNode.\n- Respond to GetPage@LSN requests from the Compute Nodes.\n- Receive WAL from the WAL service and decode it.\n- Replay WAL that's applicable to the chunks that the Page Server maintains\n\nFor more detailed info, see [pageserver-services.md](./pageserver-services.md)\n\n`/proxy`:\n\nPostgres protocol proxy/router.\nThis service listens psql port, can check auth via external service\nand create new databases and accounts (control plane API in our case).\n\n`/test_runner`:\n\nIntegration tests, written in Python using the `pytest` framework.\n\n`/vendor/postgres-v14` and `/vendor/postgres-v15`:\n\nPostgreSQL source tree per version, with the modifications needed for Neon.\n\n`/pgxn/neon`:\n\nPostgreSQL extension that implements storage manager API and network communications with remote page server.\n\n`/pgxn/neon_test_utils`:\n\nPostgreSQL extension that contains functions needed for testing and debugging.\n\n`/pgxn/neon_walredo`:\n\nLibrary to run Postgres as a \"WAL redo process\" in the pageserver.\n\n`/safekeeper`:\n\nThe neon WAL service that receives WAL from a primary compute nodes and streams it to the pageserver.\nIt acts as a holding area and redistribution center for recently generated WAL.\n\nFor more detailed info, see [walservice.md](./walservice.md)\n\n`/workspace_hack`:\nThe workspace_hack crate exists only to pin down some dependencies.\n\nWe use [cargo-hakari](https://crates.io/crates/cargo-hakari) for automation.\n\n`/libs`:\nUnites granular neon helper crates under the hood.\n\n`/libs/postgres_ffi`:\n\nUtility functions for interacting with PostgreSQL file formats.\nMisc constants, copied from PostgreSQL headers.\n\n`/libs/utils`:\nGeneric helpers that are shared between other crates in this repository.\nA subject for future modularization.\n\n`/libs/metrics`:\nHelpers for exposing Prometheus metrics from the server.\n\n### Adding dependencies\nWhen you add a Cargo dependency, you should update hakari manifest by running commands below and committing the updated `Cargo.lock` and `workspace_hack/`. There may be no changes, that's fine.\n\n```bash\ncargo hakari generate\ncargo hakari manage-deps\n```\n\nIf you don't have hakari installed (`error: no such subcommand: hakari`), install it by running `cargo install cargo-hakari`.\n\n### Checking Rust 3rd-parties\n[Cargo deny](https://embarkstudios.github.io/cargo-deny/index.html) is a cargo plugin that lets us lint project's dependency graph to ensure all dependencies conform to requirements. It detects security issues, matches licenses, and ensures crates only come from trusted sources.\n\n```bash\ncargo deny check\n```\n\n## Using Python\nNote that Debian/Ubuntu Python packages are stale, as it commonly happens,\nso manual installation of dependencies is not recommended.\n\nA single virtual environment with all dependencies is described in the single `Pipfile`.\n\n### Prerequisites\n- Install Python 3.11 (the minimal supported version) or greater.\n    - Our setup with poetry should work with newer python versions too. So feel free to open an issue with a `c/test-runner` label if something doesn't work as expected.\n    - If you have some trouble with other version you can resolve it by installing Python 3.11 separately, via [pyenv](https://github.com/pyenv/pyenv) or via system package manager e.g.:\n      ```bash\n      # In Ubuntu\n      sudo add-apt-repository ppa:deadsnakes/ppa\n      sudo apt update\n      sudo apt install python3.11\n      ```\n- Install `poetry`\n    - Exact version of `poetry` is not important, see installation instructions available at poetry's [website](https://python-poetry.org/docs/#installation).\n- Install dependencies via `./scripts/pysync`.\n    - Note that CI uses specific Python version (look for `PYTHON_VERSION` [here](https://github.com/neondatabase/docker-images/blob/main/rust/Dockerfile))\n      so if you have different version some linting tools can yield different result locally vs in the CI.\n    - You can explicitly specify which Python to use by running `poetry env use /path/to/python`, e.g. `poetry env use python3.11`.\n      This may also disable the `The currently activated Python version X.Y.Z is not supported by the project` warning.\n\nRun `poetry shell` to activate the virtual environment.\nAlternatively, use `poetry run` to run a single command in the venv, e.g. `poetry run pytest`.\n\n### Obligatory checks\nWe force code formatting via `ruff`, and type hints via `mypy`.\nRun the following commands in the repository's root (next to `pyproject.toml`):\n\n```bash\npoetry run ruff format . # All code is reformatted\npoetry run ruff check .  # Python linter\npoetry run mypy .        # Ensure there are no typing errors\n```\n\n**WARNING**: do not run `mypy` from a directory other than the root of the repository.\nOtherwise it will not find its configuration.\n\nAlso consider:\n\n* Running `pycodestyle` (or a linter of your choice) and fixing possible defects, if any.\n* Adding more type hints to your code to avoid `Any`.\n\n### Changing dependencies\nTo add new package or change an existing one you can use `poetry add` or `poetry update` or edit `pyproject.toml` manually. Do not forget to run `poetry lock` in the latter case.\n\nMore details are available in poetry's [documentation](https://python-poetry.org/docs/).\n\n## Configuring IDEs\nNeon consists of three projects in different languages which use different project models.\n\n* A bunch of Rust crates, all available from the root `Cargo.toml`.\n* Integration tests in Python in the `test_runner` directory. Some stand-alone Python scripts exist as well.\n* Postgres and our Postgres extensions in C built with Makefiles under `vendor/postgres` and `pgxn`.\n\n### CLion\nYou can use CLion with the [Rust plugin](https://plugins.jetbrains.com/plugin/8182-rust) to develop Neon. It should pick up Rust and Python projects whenever you open Neon's repository as a project. We have not tried setting up a debugger, though.\n\nC code requires some extra care, as it's built via Make, not CMake. Some of our developers have successfully used [compilation database](https://www.jetbrains.com/help/clion/compilation-database.html#compdb_generate) for CLion. It is a JSON file which lists all C source files and corresponding compilation keys. CLion can use it instead of `CMakeLists.txt`. To set up a project with a compilation database:\n\n1. Clone the Neon repository and install all dependencies, including Python. Do not open it with CLion just yet.\n2. Run the following commands in the repository's root:\n   ```bash\n   # Install a `compiledb` tool which can parse make's output and generate the compilation database.\n   poetry add -D compiledb\n   # Clean the build tree so we can rebuild from scratch.\n   # Unfortunately, our and Postgres Makefiles do not work well with either --dry-run or --assume-new,\n   # so we don't know a way to generate the compilation database without recompiling everything,\n   # see https://github.com/neondatabase/neon/issues/2378#issuecomment-1241421325\n   make distclean\n   # Rebuild the Postgres parts from scratch and save the compilation commands to the compilation database.\n   # You can alter the -j parameter to your liking.\n   # Note that we only build for a specific version of Postgres. The extension code is shared, but headers are\n   # different, so we set up CLion to only use a specific version of the headers.\n   make -j$(nproc) --print-directory postgres-v15 neon-pg-ext-v15 | poetry run compiledb --verbose --no-build\n   # Uninstall the tool\n   poetry remove -D compiledb\n   # Make sure the compile_commands.json file is not committed.\n   echo /compile_commands.json >>.git/info/exclude\n   ```\n3. Open CLion, click \"Open File or Project\" and choose the generated `compile_commands.json` file to be opened \"as a project\". You cannot add a compilation database into an existing CLion project, you have to create a new one. _Do not_ open the directory as a project, open the file.\n4. The newly created project should start indexing Postgres source code in C, as well as the C standard library. You may have to [configure the C compiler for the compilation database](https://www.jetbrains.com/help/clion/compilation-database.html#compdb_toolchain).\n5. Open the `Cargo.toml` file in an editor in the same project. CLion should pick up the hint and start indexing Rust code.\n6. Now you have a CLion project which knows about C files, Rust files. It should pick up Python files automatically as well.\n7. Set up correct code indentation in CLion's settings: Editor > Code Style > C/C++, choose the \"Project\" scheme on the top, and tick the \"Use tab character\" on the \"Tabs and Indents\" tab. Ensure that \"Tab size\" is 4.\n\nYou can also enable Cargo Clippy diagnostics and enable Rustfmt instead of built-in code formatter.\n\nWhenever you change layout of C files, you may need to regenerate the compilation database. No need to re-create the CLion project, changes should be picked up automatically.\n\nKnown issues (fixes and suggestions are welcome):\n\n* Test results may be hard to read in CLion, both for unit tests in Rust and integration tests in Python. Use command line to run them instead.\n* CLion does not support non-local Python interpreters, unlike PyCharm. E.g. if you use WSL, CLion does not see `poetry` and installed dependencies. Python support is limited.\n* Cargo Clippy diagnostics in CLion may take a lot of resources.\n* `poetry add -D` updates some packages and changes `poetry.lock` drastically even when followed by `poetry remove -D`. Feel free to `git checkout poetry.lock` and `./scripts/pysync` to revert these changes.\n"
  },
  {
    "path": "docs/storage_broker.md",
    "content": "# Storage broker\n\nStorage broker targets two issues:\n- Allowing safekeepers and pageservers learn which nodes also hold their\n  timelines, and timeline statuses there.\n- Avoiding O(n^2) connections between storage nodes while doing so.\n\nThis is used\n- By pageservers to determine the most advanced and alive safekeeper to pull WAL from.\n- By safekeepers to synchronize on the timeline: advance\n  `remote_consistent_lsn`, `backup_lsn`, choose who offloads WAL to s3.\n\nTechnically, it is a simple stateless pub-sub message broker based on tonic\n(grpc) making multiplexing easy. Since it is stateless, fault tolerance can be\nprovided by k8s; there is no built in replication support, though it is not hard\nto add.\n\nCurrently, the only message is `SafekeeperTimelineInfo`. Each safekeeper, for\neach active timeline, once in a while pushes timeline status to the broker.\nOther nodes subscribe and receive this info, using it per above.\n\nBroker serves /metrics on the same port as grpc service. \n\ngrpcurl can be used to check which values are currently being pushed:\n```\ngrpcurl -proto broker/proto/broker.proto -d '{\"all\":{}}' -plaintext localhost:50051 storage_broker.BrokerService/SubscribeSafekeeperInfo\n```\n"
  },
  {
    "path": "docs/storage_controller.md",
    "content": "# Storage Controller\n\n## Concepts\n\nThe storage controller sits between administrative API clients and pageservers, and handles the details of mapping tenants to pageserver tenant shards. For example, creating a tenant is one API call to the storage controller,\nwhich is mapped into many API calls to many pageservers (for multiple shards, and for secondary locations).\n\nIt implements a pageserver-compatible API that may be used for CRUD operations on tenants and timelines, translating these requests into appropriate operations on the shards within a tenant, which may be on many different pageservers. Using this API, the storage controller may be used in the same way as the pageserver's administrative HTTP API, hiding\nthe underlying details of how data is spread across multiple nodes.\n\nThe storage controller also manages generations, high availability (via secondary locations) and live migrations for tenants under its management. This is done with a reconciliation loop pattern, where tenants have an “intent” state and a “reconcile” task that tries to make the outside world match the intent.\n\n## APIs\n\nThe storage controller’s HTTP server implements four logically separate APIs:\n\n- `/v1/...` path is the pageserver-compatible API. This has to be at the path root because that’s where clients expect to find it on a pageserver.\n- `/control/v1/...` path is the storage controller’s API, which enables operations such as registering and management pageservers, or executing shard splits.\n- `/debug/v1/...` path contains endpoints which are either exclusively used in tests, or are for use by engineers when supporting a deployed system.\n- `/upcall/v1/...` path contains endpoints that are called by pageservers. This includes the `/re-attach` and `/validate` APIs used by pageservers\n  to ensure data safety with generation numbers.\n\nThe API is authenticated with a JWT token, and tokens must have scope `pageserverapi` (i.e. the same scope as pageservers’ APIs).\n\nSee the `http.rs` file in the source for where the HTTP APIs are implemented.\n\n## Database\n\nThe storage controller uses a postgres database to persist a subset of its state. Note that the storage controller does _not_ keep all its state in the database: this is a design choice to enable most operations to be done efficiently in memory, rather than having to read from the database. See `persistence.rs` for a more comprehensive comment explaining what we do and do not persist: a useful metaphor is that we persist objects like tenants and nodes, but we do not\npersist the _relationships_ between them: the attachment state of a tenant's shards to nodes is kept in memory and\nrebuilt on startup.\n\nThe file `persistence.rs` contains all the code for accessing the database, and has a large doc comment that goes into more detail about exactly what we persist and why.\n\nThe `diesel` crate is used for defining models & migrations.\n\nRunning a local cluster with `cargo neon` automatically starts a vanilla postgress process to host the storage controller’s database.\n\n### Diesel tip: migrations\n\nIf you need to modify the database schema, here’s how to create a migration:\n\n- Install the diesel CLI with `cargo install diesel_cli`\n- Use `diesel migration generate <name>` to create a new migration\n- Populate the SQL files in the `migrations/` subdirectory\n- Use `DATABASE_URL=... diesel migration run` to apply the migration you just wrote: this will update the `[schema.rs](http://schema.rs)` file automatically.\n  - This requires a running database: the easiest way to do that is to just run `cargo neon init ; cargo neon start`, which will leave a database available at `postgresql://localhost:1235/storage_controller`\n- Commit the migration files and the changes to schema.rs\n- If you need to iterate, you can rewind migrations with `diesel migration revert -a` and then `diesel migration run` again.\n- The migrations are build into the storage controller binary, and automatically run at startup after it is deployed, so once you’ve committed a migration no further steps are needed.\n\n## storcon_cli\n\nThe `storcon_cli` tool enables interactive management of the storage controller. This is usually\nonly necessary for debug, but may also be used to manage nodes (e.g. marking a node as offline).\n\n`storcon_cli --help` includes details on commands.\n\n# Deploying\n\nThis section is aimed at engineers deploying the storage controller outside of Neon's cloud platform, as\npart of a self-hosted system.\n\n_General note: since the default `neon_local` environment includes a storage controller, this is a useful\nreference when figuring out deployment._\n\n## Database\n\nIt is **essential** that the database used by the storage controller is durable (**do not store it on ephemeral\nlocal disk**). This database contains pageserver generation numbers, which are essential to data safety on the pageserver.\n\nThe resource requirements for the database are very low: a single CPU core and 1GiB of memory should work well for most deployments. The physical size of the database is typically under a gigabyte.\n\nSet the URL to the database using the `--database-url` CLI option.\n\nThere is no need to run migrations manually: the storage controller automatically applies migrations\nwhen it starts up.\n\n## Configure pageservers to use the storage controller\n\n1. The pageserver `control_plane_api` and `control_plane_api_token` should be set in the `pageserver.toml` file. The API setting should\n   point to the \"upcall\" prefix, for example `http://127.0.0.1:1234/upcall/v1/` is used in neon_local clusters.\n2. Create a `metadata.json` file in the same directory as `pageserver.toml`: this enables the pageserver to automatically register itself\n   with the storage controller when it starts up. See the example below for the format of this file.\n\n### Example `metadata.json`\n\n```\n{\"host\":\"acmehost.localdomain\",\"http_host\":\"acmehost.localdomain\",\"http_port\":9898,\"port\":64000}\n```\n\n- `port` and `host` refer to the _postgres_ port and host, and these must be accessible from wherever\n  postgres runs.\n- `http_port` and `http_host` refer to the pageserver's HTTP api, this must be accessible from where\n  the storage controller runs.\n\n## Handle compute notifications.\n\nThe storage controller independently moves tenant attachments between pageservers in response to\nchanges such as a pageserver node becoming unavailable, or the tenant's shard count changing. To enable\npostgres clients to handle such changes, the storage controller calls an API hook when a tenant's pageserver\nlocation changes.\n\nThe hook is configured using the storage controller's `--control-plane-url` CLI option, from which the hook URL is computed.\n\nCurrently, there is two hooks, each computed by appending the name to the provided control plane URL prefix:\n\n- `notify-attach`, called whenever attachment for pageservers changes\n- `notify-safekeepers`, called whenever attachment for safekeepers changes\n\nIf the hooks require JWT auth, the token may be provided with `--control-plane-jwt-token`.\nThe hooks will be invoked with a `PUT` request.\n\nIn the Neon cloud service, these hooks are implemented by Neon's internal cloud control plane. In `neon_local` systems,\nthe storage controller integrates directly with neon_local to reconfigure local postgres processes instead of calling\nthe compute hook.\n\nWhen implementing an on-premise Neon deployment, you must implement a service that handles the compute hooks. This is not complicated.\n\n### `notify-attach` body\n\nThe `notify-attach` request body follows the format of the `ComputeHookNotifyRequest` structure, provided below for convenience.\n\n```\nstruct ComputeHookNotifyRequestShard {\n    node_id: NodeId,\n    shard_number: ShardNumber,\n}\n\nstruct ComputeHookNotifyRequest {\n    tenant_id: TenantId,\n    stripe_size: Option<ShardStripeSize>,\n    shards: Vec<ComputeHookNotifyRequestShard>,\n}\n```\n\nWhen a notification is received:\n\n1. Modify postgres configuration for this tenant:\n\n   - set `neon.pageserver_connstring` to a comma-separated list of postgres connection strings to pageservers according to the `shards` list. The\n     shards identified by `NodeId` must be converted to the address+port of the node.\n   - if stripe_size is not None, set `neon.shard_stripe_size` to this value\n\n2. Send SIGHUP to postgres to reload configuration\n3. Respond with 200 to the notification request. Do not return success if postgres was not updated: if an error is returned, the controller\n   will retry the notification until it succeeds..\n\nExample body:\n\n```\n{\n  \"tenant_id\": \"1f359dd625e519a1a4e8d7509690f6fc\",\n  \"stripe_size\": 2048,\n  \"shards\": [\n      {\"node_id\": 344, \"shard_number\": 0},\n      {\"node_id\": 722, \"shard_number\": 1},\n  ],\n}\n```\n\n### `notify-safekeepers` body\n\nThe `notify-safekeepers` request body forllows the format of the `SafekeepersNotifyRequest` structure, provided below for convenience.\n\n```\npub struct SafekeeperInfo {\n    pub id: NodeId,\n    pub hostname: String,\n}\n\npub struct SafekeepersNotifyRequest {\n    pub tenant_id: TenantId,\n    pub timeline_id: TimelineId,\n    pub generation: u32,\n    pub safekeepers: Vec<SafekeeperInfo>,\n}\n```\n\nWhen a notification is received:\n\n1. Modify postgres configuration for this tenant:\n\n   - set `neon.safekeeper_connstrings` to an array of postgres connection strings to safekeepers according to the `safekeepers` list. The\n     safekeepers identified by `NodeId` must be converted to the address+port of the respective safekeeper.\n     The hostname is provided for debugging purposes, so we reserve changes to how we pass it.\n   - set `neon.safekeepers_generation` to the provided `generation` value.\n\n2. Send SIGHUP to postgres to reload configuration\n3. Respond with 200 to the notification request. Do not return success if postgres was not updated: if an error is returned, the controller\n   will retry the notification until it succeeds.."
  },
  {
    "path": "docs/synthetic-size.md",
    "content": "# Synthetic size\n\nNeon storage has copy-on-write branching, which makes it difficult to\nanswer the question \"how large is my database\"? To give one reasonable\nanswer, we calculate _synthetic size_ for a project.\n\nThe calculation is called \"synthetic\", because it is based purely on\nthe user-visible logical size, which is the size that you would see on\na standalone PostgreSQL installation, and the amount of WAL, which is\nalso the same as what you'd see on a standalone PostgreSQL, for the\nsame set of updates.\n\nThe synthetic size does *not* depend on the actual physical size\nconsumed in the storage, or implementation details of the Neon storage\nlike garbage collection, compaction and compression.  There is a\nstrong *correlation* between the physical size and the synthetic size,\nbut the synthetic size is designed to be independent of the\nimplementation details, so that any improvements we make in the\nstorage system simply reduce our COGS. And vice versa: any bugs or bad\nimplementation where we keep more data than we would need to, do not\nchange the synthetic size or incur any costs to the user.\n\nThe synthetic size is calculated for the whole project. It is not\nstraightforward to attribute size to individual branches. See [What is\nthe size of an individual branch?](#what-is-the-size-of-an-individual-branch)\nfor a discussion of those difficulties.\n\nThe synthetic size is designed to:\n\n- Take into account the copy-on-write nature of the storage. For\n  example, if you create a branch, it doesn't immediately add anything\n  to the synthetic size. It starts to affect the synthetic size only\n  as it diverges from the parent branch.\n\n- Be independent of any implementation details of the storage, like\n  garbage collection, remote storage, or compression.\n\n## Terms & assumptions\n\n- logical size is the size of a branch *at a given point in\n  time*. It's the total size of all tables in all databases, as you\n  see with \"\\l+\" in psql for example, plus the Postgres SLRUs and some\n  small amount of metadata. Note that currently, Neon does not include\n  the SLRUs and metadata in the logical size. Refer to the comment in\n  [`get_current_logical_size_non_incremental()`](/pageserver/src/pgdatadir_mapping.rs#L813-L814).\n\n- a \"point in time\" is defined as an LSN value. You can convert a\n  timestamp to an LSN, but the storage internally works with LSNs.\n\n- PITR horizon can be set per-branch.\n\n- PITR horizon can be set as a time interval, e.g. 5 days or hours, or\n  as amount of WAL, in bytes.  If it's given as a time interval, it's\n  converted to an LSN for the calculation.\n\n- PITR horizon can be set to 0, if you don't want to retain any history.\n\n## Calculation\n\nInputs to the calculation are:\n- logical size of the database at different points in time,\n- amount of WAL generated, and\n- the PITR horizon settings\n\nThe synthetic size is based on an idealistic model of the storage\nsystem, where we pretend that the storage consists of two things:\n- snapshots, containing a full snapshot of the database, at a given\n  point in time, and\n- WAL.\n\nIn the simple case that the project contains just one branch (main),\nand a fixed PITR horizon, the synthetic size is the sum of:\n\n- the logical size of the branch *at the beginning of the PITR\n  horizon*, i.e. at the oldest point that you can still recover to, and\n- the size of the WAL covering the PITR horizon.\n\nThe snapshot allows you to recover to the beginning of the PITR\nhorizon, and the WAL allows you to recover from that point to any\npoint within the horizon.\n\n```\n                             WAL\n   -----------------------#########>\n                          ^\n                       snapshot\n\nLegend:\n  ##### PITR horizon. This is the region that you can still access\n        with Point-in-time query and you can still create branches\n        from.\n  ----- history that has fallen out of the PITR horizon, and can no\n        longer be accessed\n```\n\nNOTE: This is not how the storage system actually works! The actual\nimplementation is also based on snapshots and WAL, but the snapshots\nare taken for individual database pages and ranges of pages rather\nthan the whole database, and it is much more complicated. This model\nis a reasonable approximation, however, to make the synthetic size a\nuseful proxy for the actual storage consumption.\n\n\n## Example: Data is INSERTed\n\nFor example, let's assume that your database contained 10 GB of data\nat the beginning of the PITR horizon, and you have since then inserted\n5 GB of additional data into it. The additional insertions of 5 GB of\ndata consume roughly 5 GB of WAL. In that case, the synthetic size is:\n\n> 10 GB (snapshot) +  5 GB (WAL) = 15 GB\n\nIf you now set the PITR horizon on the project to 0, so that no\nhistorical data is retained, then the beginning PITR horizon would be\nat the end of the branch, so the size of the snapshot would be\ncalculated at the end of the branch, after the insertions. Then the\nsynthetic size is:\n\n> 15 GB (snapshot) + 0 GB (WAL) = 15 GB.\n\nIn this case, the synthetic size is the same, regardless of the PITR horizon,\nbecause all the history consists of inserts. The newly inserted data takes\nup the same amount of space, whether it's stored as part of the logical\nsnapshot, or as WAL. (*)\n\n(*) This is a rough approximation. In reality, the WAL contains\nheaders and other overhead, and on the other hand, the logical\nsnapshot includes empty space on pages, so the size of insertions in\nWAL can be smaller or greater than the size of the final table after\nthe insertions. But in most cases, it's in the same ballpark.\n\n## Example: Data is DELETEd\n\nLet's look at another example:\n\nLet's start again with a database that contains 10 GB of data. Then,\nyou DELETE 5 GB of the data, and run VACUUM to free up the space, so\nthat the logical size of the database is now only 5 GB.\n\nLet's assume that the WAL for the deletions and the vacuum take up\n100 MB of space. In that case, the synthetic size of the project is:\n\n> 10 GB (snapshot) + 100 MB (WAL) = 10.1 GB\n\nThis is much larger than the logical size of the database after the\ndeletions (5 GB). That's because the system still needs to retain the\ndeleted data, because it's still accessible to queries and branching\nin the PITR window.\n\nIf you now set the PITR horizon to 0 or just wait for time to pass so\nthat the data falls out of the PITR horizon, making the deleted data\ninaccessible, the synthetic size shrinks:\n\n> 5 GB (snapshot) + 0 GB (WAL) = 5 GB\n\n\n# Branching\n\nThings get more complicated with branching. Branches in Neon are\ncopy-on-write, which is also reflected in the synthetic size.\n\nWhen you create a branch, it doesn't immediately change the synthetic\nsize at all. The branch point is within the PITR horizon, and all the\ndata needed to recover to that point in time needs to be retained\nanyway.\n\nHowever, if you make modifications on the branch, the system needs to\nkeep the WAL of those modifications. The WAL is included in the\nsynthetic size.\n\n## Example: branch and INSERT\n\nLet's assume that you again start with a 10 GB database.\nOn the main branch, you insert 2 GB of data. Then you create\na branch at that point, and insert another 3 GB of data on the\nmain branch, and 1 GB of data on the child branch\n\n```\n  child                 +#####>\n                        |\n                        |    WAL\n  main    ---------###############>\n                   ^\n                snapshot\n```\n\nIn this case, the synthetic size consists of:\n- the snapshot at the beginning of the PITR horizon (10 GB)\n- the WAL on the main branch (2 GB + 3 GB = 5 GB)\n- the WAL on the child branch (1 GB)\n\nTotal: 16 GB\n\n# Diverging branches\n\nIf there is only a small amount of changes in the database on the\ndifferent branches, as in the previous example, the synthetic size\nconsists of a snapshot before the branch point, containing all the\nshared data, and the WAL on both branches. However, if the branches\ndiverge a lot, it is more efficient to store a separate snapshot of\nbranches.\n\n## Example: diverging branches\n\nYou start with a 10 GB database. You insert 5 GB of data on the main\nbranch. Then you create a branch, and immediately delete all the data\non the child branch and insert 5 GB of new data to it. Then you do the\nsame on the main branch. Let's assume\nthat the PITR horizon requires keeping the last 1 GB of WAL on the\nboth branches.\n\n```\n                              snapshot\n                                  v     WAL\n  child                 +---------##############>\n                        |\n                        |\n  main     -------------+---------##############>\n                                  ^     WAL\n                              snapshot\n```\n\nIn this case, the synthetic size consists of:\n- snapshot at the beginning of the PITR horizon on the main branch (4 GB)\n- WAL on the main branch (1 GB)\n- snapshot at the beginning of the PITR horizon on the child branch (4 GB)\n- last 1 GB of WAL on the child branch (1 GB)\n\nTotal: 10 GB\n\nThe alternative way to store this would be to take only one snapshot\nat the beginning of branch point, and keep all the WAL on both\nbranches.  However, the size with that method would be larger, as it\nwould require one 10 GB snapshot, and 5 GB + 5 GB of WAL. It depends\non the amount of changes (WAL) on both branches, and the logical size\nat the branch point, which method would result in a smaller synthetic\nsize. On each branch point, the system performs the calculation with\nboth methods, and uses the method that is cheaper, i.e. the one that\nresults in a smaller synthetic size.\n\nOne way to think about this is that when you create a branch, it\nstarts out as a thin branch that only stores the WAL since the branch\npoint.  As you modify it, and the amount of WAL grows, at some point\nit becomes cheaper to store a completely new snapshot of the branch\nand truncate the WAL.\n\n\n# What is the size of an individual branch?\n\nSynthetic size is calculated for the whole project, and includes all\nbranches. There is no such thing as the size of a branch, because it\nis not straightforward to attribute the parts of size to individual\nbranches.\n\n## Example: attributing size to branches\n\n(copied from https://github.com/neondatabase/neon/pull/2884#discussion_r1029365278)\n\nImagine that you create two branches, A and B, at the same point from\nmain branch, and do a couple of small updates on both branches. Then\nsix months pass, and during those six months the data on the main\nbranch churns over completely multiple times. The retention period is,\nsay 1 month.\n\n```\n                      +------> A\n                     /\n--------------------*-------------------------------> main\n                     \\\n                      +--------> B\n```\n\nIn that situation, the synthetic tenant size would be calculated based\non a \"logical snapshot\" at the branch point, that is, the logical size\nof the database at that point. Plus the WAL on branches A and B. Let's\nsay that the snapshot size is 10 GB, and the WAL is 1 MB on both\nbranches A and B. So the total synthetic storage size is 10002\nMB. (Let's ignore the main branch for now, that would be just added to\nthe sum)\n\nHow would you break that down per branch? I can think of three\ndifferent ways to do it, and all of them have their own problems:\n\n### Subtraction method\n\nFor each branch, calculate how much smaller the total synthetic size\nwould be, if that branch didn't exist. In other words, how much would\nyou save if you dropped the branch. With this method, the size of\nbranches A and B is 1 MB.\n\nWith this method, the 10 GB shared logical snapshot is not included\nfor A nor B. So the size of all branches is not equal to the total\nsynthetic size of the tenant. If you drop branch A, you save 1 MB as\nyou'd expect, but also the size of B suddenly jumps from 1 MB to 10001\nMB, which might feel surprising.\n\n### Division method\n\nDivide the common parts evenly across all branches that need\nthem. With this method, the size of branches A and B would be 5001 MB.\n\nWith this method, the sum of all branches adds up to the total\nsynthetic size. But it's surprising in other ways: if you drop branch\nA, you might think that you save 5001 MB, but in reality you only save\n1 MB, and the size of branch B suddenly grows from 5001 to 10001 MB.\n\n### Addition method\n\nFor each branch, include all the snapshots and WAL that it depends on,\neven if some of them are shared by other branches. With this method,\nthe size of branches A and B would be 10001 MB.\n\nThe surprise with this method is that the sum of all the branches is\nlarger than the total synthetic size. And if you drop branch A, the\ntotal synthetic size doesn't fall by 10001 MB as you might think.\n\n# Alternatives\n\nA sort of cop-out method would be to show the whole tree of branches\ngraphically, and for each section of WAL or logical snapshot, display\nthe size of that section. You can then see which branches depend on\nwhich sections, which sections are shared etc. That would be good to\nhave in the UI anyway.\n\nOr perhaps calculate per-branch numbers using the subtraction method,\nand in addition to that, one more number for \"shared size\" that\nincludes all the data that is needed by more than one branch.\n\n## Which is the right method?\n\nThe bottom line is that it's not straightforward to attribute the\nsynthetic size to individual branches. There are things we can do, and\nall of those methods are pretty straightforward to implement, but they\nall have their own problems. What makes sense depends a lot on what\nyou want to do with the number, what question you are trying to\nanswer.\n"
  },
  {
    "path": "docs/tools.md",
    "content": "# Useful development tools\n\nThis readme contains some hints on how to set up some optional development tools.\n\n## ccls\n\n[ccls](https://github.com/MaskRay/ccls) is a c/c++ language server. It requires some setup\nto work well. There are different ways to do it but here's what works for me:\n1. Make a common parent directory for all your common neon projects. (for example, `~/src/neondatabase/`)\n2. Go to `vendor/postgres-v15`\n3. Run `make clean && ./configure`\n4. Install [bear](https://github.com/rizsotto/Bear), and run `bear -- make -j4`\n5. Copy the generated `compile_commands.json` to `~/src/neondatabase` (or equivalent)\n6. Run `touch ~/src/neondatabase/.ccls-root` this will make the `compile_commands.json` file discoverable in all subdirectories\n\nWith this setup you will get decent lsp mileage inside the postgres repo, and also any postgres extensions that you put in `~/src/neondatabase/`, like `pg_embedding`, or inside `~/src/neondatabase/neon/pgxn` as well.\n\nSome additional tips for various IDEs:\n\n### Emacs\n\nTo improve performance: `(setq lsp-lens-enable nil)`\n"
  },
  {
    "path": "docs/updating-postgres.md",
    "content": "# Updating Postgres\n\n## Minor Versions\n\nWhen upgrading to a new minor version of Postgres, please follow these steps:\n\n_Example: 15.4 is the new minor version to upgrade to from 15.3._\n\n1. Clone the Neon Postgres repository if you have not done so already.\n\n    ```shell\n    git clone git@github.com:neondatabase/postgres.git\n    ```\n\n1. Add the Postgres upstream remote.\n\n    ```shell\n    git remote add upstream https://git.postgresql.org/git/postgresql.git\n    ```\n\n1. Create a new branch based on the stable branch you are updating.\n\n    ```shell\n    git checkout -b my-branch-15 REL_15_STABLE_neon\n    ```\n\n1. Find the upstream release tags you're looking for. They are of the form `REL_X_Y`.\n\n1. Merge the upstream tag into the branch you created on the tag and resolve any conflicts.\n\n    ```shell\n    git fetch upstream REL_15_4\n    git merge REL_15_4\n    ```\n\n    In the commit message of the merge commit, mention if there were\n    any non-trivial conflicts or other issues.\n\n1. Run the Postgres test suite to make sure our commits have not affected\nPostgres in a negative way.\n\n    ```shell\n    make check\n    # OR\n    meson test -C builddir\n    ```\n\n1. Push your branch to the Neon Postgres repository.\n\n    ```shell\n    git push origin my-branch-15\n    ```\n\n1. Clone the Neon repository if you have not done so already.\n\n    ```shell\n    git clone git@github.com:neondatabase/neon.git\n    ```\n\n1. Create a new branch.\n\n1. Change the `revisions.json` file to point at the HEAD of your Postgres\nbranch.\n\n1. Update the Git submodule.\n\n    ```shell\n    git submodule set-branch --branch my-branch-15 vendor/postgres-v15\n    git submodule update --remote vendor/postgres-v15\n    ```\n\n1. Run the Neon test suite to make sure that Neon is still good to go on this\nminor Postgres release.\n\n    ```shell\n    ./scripts/poetry -k pg15\n    ```\n\n1. Commit your changes.\n\n1. Create a pull request, and wait for CI to go green.\n\n1. Push the Postgres branches with the merge commits into the Neon Postgres repository.\n\n    ```shell\n    git push origin my-branch-15:REL_15_STABLE_neon\n    ```\n\n1. Update your Neon PR to point at the branches.\n\n    ```shell\n    git submodule set-branch --branch REL_15_STABLE_neon vendor/postgres-v15\n    git commit --amend --no-edit\n    git push --force origin\n    ```\n\n1. Merge the pull request after getting approval(s) and CI completion.\n"
  },
  {
    "path": "docs/walservice.md",
    "content": "# WAL service\n\nThe neon WAL service acts as a holding area and redistribution\ncenter for recently generated WAL. The primary Postgres server streams\nthe WAL to the WAL safekeeper, and treats it like a (synchronous)\nreplica. A replication slot is used in the primary to prevent the\nprimary from discarding WAL that hasn't been streamed to the WAL\nservice yet.\n\n```\n+--------------+              +------------------+\n|              |     WAL      |                  |\n| Compute node |  ----------> |   WAL Service    |\n|              |              |                  |\n+--------------+              +------------------+\n                                     |\n                                     |\n                                     | WAL\n                                     |\n                                     |\n                                     V\n                              +--------------+\n                              |              |\n                              | Pageservers  |\n                              |              |\n                              +--------------+\n```\n\n\nThe WAL service consists of multiple WAL safekeepers that all store a\ncopy of the WAL. A WAL record is considered durable when the majority\nof safekeepers have received and stored the WAL to local disk. A\nconsensus algorithm based on Paxos is used to manage the quorum.\n\n```\n  +-------------------------------------------+\n  | WAL Service                               |\n  |                                           |\n  |                                           |\n  |  +------------+                           |\n  |  | safekeeper |                           |\n  |  +------------+                           |\n  |                                           |\n  |  +------------+                           |\n  |  | safekeeper |                           |\n  |  +------------+                           |\n  |                                           |\n  |  +------------+                           |\n  |  | safekeeper |                           |\n  |  +------------+                           |\n  |                                           |\n  +-------------------------------------------+\n```\n\nThe primary connects to the WAL safekeepers, so it works in a \"push\"\nfashion.  That's different from how streaming replication usually\nworks, where the replica initiates the connection. To do that, there\nis a component called the \"WAL proposer\". The WAL proposer is a\nbackground worker that runs in the primary Postgres server. It\nconnects to the WAL safekeeper, and sends all the WAL. (PostgreSQL's\narchive_commands works in the \"push\" style, but it operates on a WAL\nsegment granularity. If PostgreSQL had a push style API for streaming,\nWAL propose could be implemented using it.)\n\nThe Page Server connects to the WAL safekeeper, using the same\nstreaming replication protocol that's used between Postgres primary\nand standby. You can also connect the Page Server directly to a\nprimary PostgreSQL node for testing.\n\nIn a production installation, there are multiple WAL safekeepers\nrunning on different nodes, and there is a quorum mechanism using the\nPaxos algorithm to ensure that a piece of WAL is considered as durable\nonly after it has been flushed to disk on more than half of the WAL\nsafekeepers. The Paxos and crash recovery algorithm ensures that only\none primary node can be actively streaming WAL to the quorum of\nsafekeepers.\n\nSee [this section](safekeeper-protocol.md) for a more detailed description of\nthe consensus protocol. spec/ contains TLA+ specification of it.\n\n# Q&A\n\nQ: Why have a separate service instead of connecting Page Server directly to a\n   primary PostgreSQL node?\nA: Page Server is a single server which can be lost. As our primary\n   fault-tolerant storage is S3, we do not want to wait for it before\n   committing a transaction. The WAL service acts as a temporary fault-tolerant\n   storage for recent data before it gets to the Page Server and then finally\n   to S3. Whenever WALs and pages are committed to S3, WAL's storage can be\n   trimmed.\n\nQ: What if the compute node evicts a page, needs it back, but the page is yet\n   to reach the Page Server?\nA: If the compute node has evicted a page, changes to it have been WAL-logged\n   (that's why it is called Write Ahead logging; there are some exceptions like\n   index builds, but these are exceptions). These WAL records will eventually\n   reach the Page Server. The Page Server notes that the compute node requests\n   pages with a very recent LSN and will not respond to the compute node until a\n   corresponding WAL is received from WAL safekeepers.\n\nQ: How long may Page Server wait for?\nA: Not too long, hopefully. If a page is evicted, it probably was not used for\n   a while, so the WAL service have had enough time to push changes to the Page\n   Server. To limit the lag, tune backpressure using `max_replication_*_lag` settings.\n\nQ: How do WAL safekeepers communicate with each other?\nA: They may only send each other messages via the compute node, they never\n   communicate directly with each other.\n\nQ: Why have a consensus algorithm if there is only a single compute node?\nA: Actually there may be moments with multiple PostgreSQL nodes running at the\n   same time. E.g. we are bringing one up and one down. We would like to avoid\n   simultaneous writes from different nodes, so there should be a consensus on\n   who is the primary node.\n\n# Terminology\n\nWAL service - The service as whole that ensures that WAL is stored durably.\n\nWAL safekeeper - One node that participates in the quorum. All the safekeepers\ntogether form the WAL service.\n\nWAL acceptor, WAL proposer - In the context of the consensus algorithm, the Postgres\ncompute node is also known as the WAL proposer, and the safekeeper is also known\nas the acceptor. Those are the standard terms in the Paxos algorithm.\n"
  },
  {
    "path": "endpoint_storage/Cargo.toml",
    "content": "[package]\nname = \"endpoint_storage\"\nversion = \"0.0.1\"\nedition.workspace = true\nlicense.workspace = true\n[dependencies]\nanyhow.workspace = true\naxum-extra.workspace = true\naxum.workspace = true\ncamino.workspace = true\nclap.workspace = true\nfutures.workspace = true\njsonwebtoken.workspace = true\nprometheus.workspace = true\nremote_storage.workspace = true\nserde.workspace = true\nserde_json.workspace = true\ntokio-util.workspace = true\ntokio.workspace = true\ntracing.workspace = true\nutils = { path = \"../libs/utils\", default-features = false }\nworkspace_hack.workspace = true\n[dev-dependencies]\ncamino-tempfile.workspace = true\nhttp-body-util.workspace = true\nitertools.workspace = true\nrand.workspace = true\ntest-log.workspace = true\ntower.workspace = true\n"
  },
  {
    "path": "endpoint_storage/src/app.rs",
    "content": "use anyhow::anyhow;\nuse axum::body::{Body, Bytes};\nuse axum::response::{IntoResponse, Response};\nuse axum::{Router, http::StatusCode};\nuse endpoint_storage::{PrefixS3Path, S3Path, Storage, bad_request, internal_error, not_found, ok};\nuse remote_storage::TimeoutOrCancel;\nuse remote_storage::{DownloadError, DownloadOpts, GenericRemoteStorage, RemotePath};\nuse std::{sync::Arc, time::SystemTime, time::UNIX_EPOCH};\nuse tokio_util::sync::CancellationToken;\nuse tracing::{error, info};\nuse utils::backoff::retry;\n\npub fn app(state: Arc<Storage>) -> Router<()> {\n    use axum::routing::{delete as _delete, get as _get};\n    let delete_prefix = _delete(delete_prefix);\n    // NB: On any changes do not forget to update the OpenAPI spec\n    // in /endpoint_storage/src/openapi_spec.yml.\n    Router::new()\n        .route(\n            \"/{tenant_id}/{timeline_id}/{endpoint_id}/{*path}\",\n            _get(get).put(set).delete(delete),\n        )\n        .route(\n            \"/{tenant_id}/{timeline_id}/{endpoint_id}\",\n            delete_prefix.clone(),\n        )\n        .route(\"/{tenant_id}/{timeline_id}\", delete_prefix.clone())\n        .route(\"/{tenant_id}\", delete_prefix)\n        .route(\"/metrics\", _get(metrics))\n        .route(\"/status\", _get(async || StatusCode::OK.into_response()))\n        .with_state(state)\n}\n\ntype Result = anyhow::Result<Response, Response>;\ntype State = axum::extract::State<Arc<Storage>>;\n\nconst CONTENT_TYPE: &str = \"content-type\";\nconst APPLICATION_OCTET_STREAM: &str = \"application/octet-stream\";\nconst WARN_THRESHOLD: u32 = 3;\nconst MAX_RETRIES: u32 = 10;\n\nasync fn metrics() -> Result {\n    prometheus::TextEncoder::new()\n        .encode_to_string(&prometheus::gather())\n        .map(|s| s.into_response())\n        .map_err(|e| internal_error(e, \"/metrics\", \"collecting metrics\"))\n}\n\nasync fn get(S3Path { path }: S3Path, state: State) -> Result {\n    info!(%path, \"downloading\");\n    let download_err = |err| {\n        if let DownloadError::NotFound = err {\n            info!(%path, %err, \"downloading\"); // 404 is not an issue of _this_ service\n            return not_found(&path);\n        }\n        internal_error(err, &path, \"downloading\")\n    };\n    let cancel = state.cancel.clone();\n    let opts = &DownloadOpts::default();\n\n    let stream = retry(\n        async || state.storage.download(&path, opts, &cancel).await,\n        DownloadError::is_permanent,\n        WARN_THRESHOLD,\n        MAX_RETRIES,\n        \"downloading\",\n        &cancel,\n    )\n    .await\n    .unwrap_or(Err(DownloadError::Cancelled))\n    .map_err(download_err)?\n    .download_stream;\n\n    Response::builder()\n        .status(StatusCode::OK)\n        .header(CONTENT_TYPE, APPLICATION_OCTET_STREAM)\n        .body(Body::from_stream(stream))\n        .map_err(|e| internal_error(e, path, \"reading response\"))\n}\n\n// Best solution for files is multipart upload, but remote_storage doesn't support it,\n// so we can either read Bytes in memory and push at once or forward BodyDataStream to\n// remote_storage. The latter may seem more peformant, but BodyDataStream doesn't have a\n// guaranteed size() which may produce issues while uploading to s3.\n// So, currently we're going with an in-memory copy plus a boundary to prevent uploading\n// very large files.\nasync fn set(S3Path { path }: S3Path, state: State, bytes: Bytes) -> Result {\n    info!(%path, \"uploading\");\n    let request_len = bytes.len();\n    let max_len = state.max_upload_file_limit;\n    if request_len > max_len {\n        return Err(bad_request(\n            anyhow!(\"File size {request_len} exceeds max {max_len}\"),\n            \"uploading\",\n        ));\n    }\n\n    let cancel = state.cancel.clone();\n    let fun = async || {\n        let stream = bytes_to_stream(bytes.clone());\n        state\n            .storage\n            .upload(stream, request_len, &path, None, &cancel)\n            .await\n    };\n    retry(\n        fun,\n        TimeoutOrCancel::caused_by_cancel,\n        WARN_THRESHOLD,\n        MAX_RETRIES,\n        \"uploading\",\n        &cancel,\n    )\n    .await\n    .unwrap_or(Err(anyhow!(\"uploading cancelled\")))\n    .map_err(|e| internal_error(e, path, \"reading response\"))?;\n    Ok(ok())\n}\n\nasync fn delete(S3Path { path }: S3Path, state: State) -> Result {\n    info!(%path, \"deleting\");\n    let cancel = state.cancel.clone();\n    retry(\n        async || state.storage.delete(&path, &cancel).await,\n        TimeoutOrCancel::caused_by_cancel,\n        WARN_THRESHOLD,\n        MAX_RETRIES,\n        \"deleting\",\n        &cancel,\n    )\n    .await\n    .unwrap_or(Err(anyhow!(\"deleting cancelled\")))\n    .map_err(|e| internal_error(e, path, \"deleting\"))?;\n    Ok(ok())\n}\n\nasync fn delete_prefix(PrefixS3Path { path }: PrefixS3Path, state: State) -> Result {\n    info!(%path, \"deleting prefix\");\n    let cancel = state.cancel.clone();\n    retry(\n        async || state.storage.delete_prefix(&path, &cancel).await,\n        TimeoutOrCancel::caused_by_cancel,\n        WARN_THRESHOLD,\n        MAX_RETRIES,\n        \"deleting prefix\",\n        &cancel,\n    )\n    .await\n    .unwrap_or(Err(anyhow!(\"deleting prefix cancelled\")))\n    .map_err(|e| internal_error(e, path, \"deleting prefix\"))?;\n    Ok(ok())\n}\n\npub async fn check_storage_permissions(\n    client: &GenericRemoteStorage,\n    cancel: CancellationToken,\n) -> anyhow::Result<()> {\n    info!(\"storage permissions check\");\n\n    // as_nanos() as multiple instances proxying same bucket may be started at once\n    let now = SystemTime::now()\n        .duration_since(UNIX_EPOCH)?\n        .as_nanos()\n        .to_string();\n\n    let path = RemotePath::from_string(&format!(\"write_access_{now}\"))?;\n    info!(%path, \"uploading\");\n\n    let body = now.to_string();\n    let stream = bytes_to_stream(Bytes::from(body.clone()));\n    client\n        .upload(stream, body.len(), &path, None, &cancel)\n        .await?;\n\n    use tokio::io::AsyncReadExt;\n    info!(%path, \"downloading\");\n    let download_opts = DownloadOpts {\n        kind: remote_storage::DownloadKind::Small,\n        ..Default::default()\n    };\n    let mut body_read_buf = Vec::new();\n    let stream = client\n        .download(&path, &download_opts, &cancel)\n        .await?\n        .download_stream;\n    tokio_util::io::StreamReader::new(stream)\n        .read_to_end(&mut body_read_buf)\n        .await?;\n    let body_read = String::from_utf8(body_read_buf)?;\n    if body != body_read {\n        error!(%body, %body_read, \"File contents do not match\");\n        anyhow::bail!(\"Read back file doesn't match original\")\n    }\n\n    info!(%path, \"removing\");\n    client.delete(&path, &cancel).await\n}\n\nfn bytes_to_stream(bytes: Bytes) -> impl futures::Stream<Item = std::io::Result<Bytes>> {\n    futures::stream::once(futures::future::ready(Ok(bytes)))\n}\n\n#[cfg(test)]\nmod tests {\n    use super::*;\n    use axum::{body::Body, extract::Request, response::Response};\n    use http_body_util::BodyExt;\n    use itertools::iproduct;\n    use std::env::var;\n    use std::sync::Arc;\n    use std::time::Duration;\n    use test_log::test as testlog;\n    use tower::{Service, util::ServiceExt};\n    use utils::id::{TenantId, TimelineId};\n\n    // see libs/remote_storage/tests/test_real_s3.rs\n    const REAL_S3_ENV: &str = \"ENABLE_REAL_S3_REMOTE_STORAGE\";\n    const REAL_S3_BUCKET: &str = \"REMOTE_STORAGE_S3_BUCKET\";\n    const REAL_S3_REGION: &str = \"REMOTE_STORAGE_S3_REGION\";\n\n    async fn proxy() -> (Storage, Option<camino_tempfile::Utf8TempDir>) {\n        let cancel = CancellationToken::new();\n        let (dir, storage) = if var(REAL_S3_ENV).is_err() {\n            // tests execute in parallel and we need a new directory for each of them\n            let dir = camino_tempfile::tempdir().unwrap();\n            let fs =\n                remote_storage::LocalFs::new(dir.path().into(), Duration::from_secs(5)).unwrap();\n            (Some(dir), GenericRemoteStorage::LocalFs(fs))\n        } else {\n            // test_real_s3::create_s3_client is hard to reference, reimplementing here\n            let millis = SystemTime::now()\n                .duration_since(UNIX_EPOCH)\n                .unwrap()\n                .as_millis();\n            use rand::Rng;\n            let random = rand::rng().random::<u32>();\n\n            let s3_config = remote_storage::S3Config {\n                bucket_name: var(REAL_S3_BUCKET).unwrap(),\n                bucket_region: var(REAL_S3_REGION).unwrap(),\n                prefix_in_bucket: Some(format!(\"test_{millis}_{random:08x}/\")),\n                endpoint: None,\n                concurrency_limit: std::num::NonZeroUsize::new(100).unwrap(),\n                max_keys_per_list_response: None,\n                upload_storage_class: None,\n            };\n            let bucket = remote_storage::S3Bucket::new(&s3_config, Duration::from_secs(1))\n                .await\n                .unwrap();\n            (None, GenericRemoteStorage::AwsS3(Arc::new(bucket)))\n        };\n\n        let proxy = Storage {\n            auth: endpoint_storage::JwtAuth::new(TEST_PUB_KEY_ED25519).unwrap(),\n            storage,\n            cancel: cancel.clone(),\n            max_upload_file_limit: usize::MAX,\n        };\n        check_storage_permissions(&proxy.storage, cancel)\n            .await\n            .unwrap();\n        (proxy, dir)\n    }\n\n    // see libs/utils/src/auth.rs\n    const TEST_PUB_KEY_ED25519: &[u8] = b\"\n-----BEGIN PUBLIC KEY-----\nMCowBQYDK2VwAyEARYwaNBayR+eGI0iXB4s3QxE3Nl2g1iWbr6KtLWeVD/w=\n-----END PUBLIC KEY-----\n\";\n\n    const TEST_PRIV_KEY_ED25519: &[u8] = br#\"\n-----BEGIN PRIVATE KEY-----\nMC4CAQAwBQYDK2VwBCIEID/Drmc1AA6U/znNRWpF3zEGegOATQxfkdWxitcOMsIH\n-----END PRIVATE KEY-----\n\"#;\n\n    async fn request(req: Request<Body>) -> Response<Body> {\n        let (proxy, _) = proxy().await;\n        app(Arc::new(proxy))\n            .into_service()\n            .oneshot(req)\n            .await\n            .unwrap()\n    }\n\n    #[testlog(tokio::test)]\n    async fn status() {\n        let res = Request::builder()\n            .uri(\"/status\")\n            .body(Body::empty())\n            .map(request)\n            .unwrap()\n            .await;\n        assert_eq!(res.status(), StatusCode::OK);\n    }\n\n    fn routes() -> impl Iterator<Item = (&'static str, &'static str)> {\n        iproduct!(\n            vec![\"/1\", \"/1/2\", \"/1/2/3\", \"/1/2/3/4\"],\n            vec![\"GET\", \"PUT\", \"DELETE\"]\n        )\n    }\n\n    #[testlog(tokio::test)]\n    async fn no_token() {\n        for (uri, method) in routes() {\n            info!(%uri, %method);\n            let res = Request::builder()\n                .uri(uri)\n                .method(method)\n                .body(Body::empty())\n                .map(request)\n                .unwrap()\n                .await;\n            assert!(matches!(\n                res.status(),\n                StatusCode::METHOD_NOT_ALLOWED | StatusCode::BAD_REQUEST\n            ));\n        }\n    }\n\n    #[testlog(tokio::test)]\n    async fn invalid_token() {\n        for (uri, method) in routes() {\n            info!(%uri, %method);\n            let status = Request::builder()\n                .uri(uri)\n                .header(\"Authorization\", \"Bearer 123\")\n                .method(method)\n                .body(Body::empty())\n                .map(request)\n                .unwrap()\n                .await;\n            assert!(matches!(\n                status.status(),\n                StatusCode::METHOD_NOT_ALLOWED | StatusCode::BAD_REQUEST\n            ));\n        }\n    }\n\n    const TENANT_ID: TenantId =\n        TenantId::from_array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6]);\n    const TIMELINE_ID: TimelineId =\n        TimelineId::from_array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 7]);\n    const ENDPOINT_ID: &str = \"ep-winter-frost-a662z3vg\";\n    fn token() -> String {\n        let claims = endpoint_storage::claims::EndpointStorageClaims {\n            tenant_id: TENANT_ID,\n            timeline_id: TIMELINE_ID,\n            endpoint_id: ENDPOINT_ID.into(),\n            exp: u64::MAX,\n        };\n        let key = jsonwebtoken::EncodingKey::from_ed_pem(TEST_PRIV_KEY_ED25519).unwrap();\n        let header = jsonwebtoken::Header::new(endpoint_storage::VALIDATION_ALGO);\n        jsonwebtoken::encode(&header, &claims, &key).unwrap()\n    }\n\n    #[testlog(tokio::test)]\n    async fn unauthorized() {\n        let (proxy, _) = proxy().await;\n        let mut app = app(Arc::new(proxy)).into_service();\n        let token = token();\n        let args = itertools::iproduct!(\n            vec![TENANT_ID.to_string(), TenantId::generate().to_string()],\n            vec![TIMELINE_ID.to_string(), TimelineId::generate().to_string()],\n            vec![ENDPOINT_ID, \"ep-ololo\"]\n        )\n        // first one is fully valid path, second path is valid for GET as\n        // read paths may have different endpoint if tenant and timeline matches\n        // (needed for prewarming RO->RW replica)\n        .skip(2);\n\n        for ((uri, method), (tenant, timeline, endpoint)) in iproduct!(routes(), args) {\n            info!(%uri, %method, %tenant, %timeline, %endpoint);\n            let request = Request::builder()\n                .uri(format!(\"/{tenant}/{timeline}/{endpoint}/sub/path/key\"))\n                .method(method)\n                .header(\"Authorization\", format!(\"Bearer {token}\"))\n                .body(Body::empty())\n                .unwrap();\n            let status = ServiceExt::ready(&mut app)\n                .await\n                .unwrap()\n                .call(request)\n                .await\n                .unwrap()\n                .status();\n            assert_eq!(status, StatusCode::UNAUTHORIZED);\n        }\n    }\n\n    #[testlog(tokio::test)]\n    async fn method_not_allowed() {\n        let token = token();\n        let iter = iproduct!(vec![\"\", \"/..\"], vec![\"GET\", \"PUT\"]);\n        for (key, method) in iter {\n            let status = Request::builder()\n                .uri(format!(\"/{TENANT_ID}/{TIMELINE_ID}/{ENDPOINT_ID}{key}\"))\n                .method(method)\n                .header(\"Authorization\", format!(\"Bearer {token}\"))\n                .body(Body::empty())\n                .map(request)\n                .unwrap()\n                .await\n                .status();\n            assert!(matches!(\n                status,\n                StatusCode::BAD_REQUEST | StatusCode::METHOD_NOT_ALLOWED\n            ));\n        }\n    }\n\n    async fn requests_chain(\n        chain: impl Iterator<Item = (String, &str, &'static str, StatusCode, bool)>,\n        token: impl Fn(&str) -> String,\n    ) {\n        let (proxy, _) = proxy().await;\n        let mut app = app(Arc::new(proxy)).into_service();\n        for (uri, method, body, expected_status, compare_body) in chain {\n            info!(%uri, %method, %body, %expected_status);\n            let bearer = format!(\"Bearer {}\", token(&uri));\n            let request = Request::builder()\n                .uri(uri)\n                .method(method)\n                .header(\"Authorization\", &bearer)\n                .body(Body::from(body))\n                .unwrap();\n            let response = ServiceExt::ready(&mut app)\n                .await\n                .unwrap()\n                .call(request)\n                .await\n                .unwrap();\n            assert_eq!(response.status(), expected_status);\n            if !compare_body {\n                continue;\n            }\n            let read_body = response.into_body().collect().await.unwrap().to_bytes();\n            assert_eq!(body, read_body);\n        }\n    }\n\n    #[testlog(tokio::test)]\n    async fn metrics() {\n        let uri = format!(\"/{TENANT_ID}/{TIMELINE_ID}/{ENDPOINT_ID}/key\");\n        let req = vec![\n            (uri.clone(), \"PUT\", \"body\", StatusCode::OK, false),\n            (uri.clone(), \"DELETE\", \"\", StatusCode::OK, false),\n        ];\n        requests_chain(req.into_iter(), |_| token()).await;\n\n        let res = Request::builder()\n            .uri(\"/metrics\")\n            .body(Body::empty())\n            .map(request)\n            .unwrap()\n            .await;\n        assert_eq!(res.status(), StatusCode::OK);\n        let body = res.into_body().collect().await.unwrap().to_bytes();\n        let body = String::from_utf8_lossy(&body);\n        tracing::debug!(%body);\n        // Storage metrics are not gathered for LocalFs\n        if var(REAL_S3_ENV).is_ok() {\n            assert!(body.contains(\"remote_storage_s3_deleted_objects_total\"));\n        }\n\n        #[cfg(target_os = \"linux\")]\n        assert!(body.contains(\"process_threads\"));\n    }\n\n    #[testlog(tokio::test)]\n    async fn insert_retrieve_remove() {\n        let uri = format!(\"/{TENANT_ID}/{TIMELINE_ID}/{ENDPOINT_ID}/key\");\n        let chain = vec![\n            (uri.clone(), \"GET\", \"\", StatusCode::NOT_FOUND, false),\n            (uri.clone(), \"PUT\", \"пыщьпыщь\", StatusCode::OK, false),\n            (uri.clone(), \"GET\", \"пыщьпыщь\", StatusCode::OK, true),\n            (uri.clone(), \"DELETE\", \"\", StatusCode::OK, false),\n            (uri, \"GET\", \"\", StatusCode::NOT_FOUND, false),\n        ];\n        requests_chain(chain.into_iter(), |_| token()).await;\n    }\n\n    #[testlog(tokio::test)]\n    async fn read_other_endpoint_data() {\n        let uri = format!(\"/{TENANT_ID}/{TIMELINE_ID}/other_endpoint/key\");\n        let chain = vec![\n            (uri.clone(), \"GET\", \"\", StatusCode::NOT_FOUND, false),\n            (uri.clone(), \"PUT\", \"\", StatusCode::UNAUTHORIZED, false),\n        ];\n        requests_chain(chain.into_iter(), |_| token()).await;\n    }\n\n    fn delete_prefix_token(uri: &str) -> String {\n        let parts = uri.split(\"/\").collect::<Vec<&str>>();\n        let claims = endpoint_storage::claims::DeletePrefixClaims {\n            tenant_id: parts.get(1).map(|c| c.parse().unwrap()).unwrap(),\n            timeline_id: parts.get(2).map(|c| c.parse().unwrap()),\n            endpoint_id: parts.get(3).map(ToString::to_string),\n            exp: u64::MAX,\n        };\n        let key = jsonwebtoken::EncodingKey::from_ed_pem(TEST_PRIV_KEY_ED25519).unwrap();\n        let header = jsonwebtoken::Header::new(endpoint_storage::VALIDATION_ALGO);\n        jsonwebtoken::encode(&header, &claims, &key).unwrap()\n    }\n\n    // Can't use single digit numbers as they won't be validated as TimelineId and EndpointId\n    #[testlog(tokio::test)]\n    async fn delete_prefix() {\n        let tenant_id =\n            TenantId::from_array([1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]).to_string();\n        let t2 = TimelineId::from_array([2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);\n        let t3 = TimelineId::from_array([3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);\n        let t4 = TimelineId::from_array([4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);\n        let f = |timeline, path| format!(\"/{tenant_id}/{timeline}{path}\");\n        // Why extra slash in string literals? Axum is weird with URIs:\n        // /1/2 and 1/2/ match different routes, thus first yields OK and second NOT_FOUND\n        //  as it matches /tenant/timeline/endpoint, see https://stackoverflow.com/a/75355932\n        // The cost of removing trailing slash is suprisingly hard:\n        // * Add tower dependency with NormalizePath layer\n        // * wrap Router<()> in this layer https://github.com/tokio-rs/axum/discussions/2377\n        // * Rewrite make_service() -> into_make_service()\n        // * Rewrite oneshot() (not available for NormalizePath)\n        // I didn't manage to get it working correctly\n        let chain = vec![\n            // create 1/2/3/4, 1/2/3/5, delete prefix 1/2/3 -> empty\n            (f(t2, \"/3/4\"), \"PUT\", \"\", StatusCode::OK, false),\n            (f(t2, \"/3/4\"), \"PUT\", \"\", StatusCode::OK, false), // we can override file contents\n            (f(t2, \"/3/5\"), \"PUT\", \"\", StatusCode::OK, false),\n            (f(t2, \"/3\"), \"DELETE\", \"\", StatusCode::OK, false),\n            (f(t2, \"/3/4\"), \"GET\", \"\", StatusCode::NOT_FOUND, false),\n            (f(t2, \"/3/5\"), \"GET\", \"\", StatusCode::NOT_FOUND, false),\n            // create 1/2/3/4, 1/2/5/6, delete prefix 1/2/3 -> 1/2/5/6\n            (f(t2, \"/3/4\"), \"PUT\", \"\", StatusCode::OK, false),\n            (f(t2, \"/5/6\"), \"PUT\", \"\", StatusCode::OK, false),\n            (f(t2, \"/3\"), \"DELETE\", \"\", StatusCode::OK, false),\n            (f(t2, \"/3/4\"), \"GET\", \"\", StatusCode::NOT_FOUND, false),\n            (f(t2, \"/5/6\"), \"GET\", \"\", StatusCode::OK, false),\n            // create 1/2/3/4, 1/2/7/8, delete prefix 1/2 -> empty\n            (f(t2, \"/3/4\"), \"PUT\", \"\", StatusCode::OK, false),\n            (f(t2, \"/7/8\"), \"PUT\", \"\", StatusCode::OK, false),\n            (f(t2, \"\"), \"DELETE\", \"\", StatusCode::OK, false),\n            (f(t2, \"/3/4\"), \"GET\", \"\", StatusCode::NOT_FOUND, false),\n            (f(t2, \"/7/8\"), \"GET\", \"\", StatusCode::NOT_FOUND, false),\n            // create 1/2/3/4, 1/2/5/6, 1/3/8/9, delete prefix 1/2/3 -> 1/2/5/6, 1/3/8/9\n            (f(t2, \"/3/4\"), \"PUT\", \"\", StatusCode::OK, false),\n            (f(t2, \"/5/6\"), \"PUT\", \"\", StatusCode::OK, false),\n            (f(t3, \"/8/9\"), \"PUT\", \"\", StatusCode::OK, false),\n            (f(t2, \"/3\"), \"DELETE\", \"\", StatusCode::OK, false),\n            (f(t2, \"/3/4\"), \"GET\", \"\", StatusCode::NOT_FOUND, false),\n            (f(t2, \"/5/6\"), \"GET\", \"\", StatusCode::OK, false),\n            (f(t3, \"/8/9\"), \"GET\", \"\", StatusCode::OK, false),\n            // create 1/4/5/6, delete prefix 1/2 -> 1/3/8/9, 1/4/5/6\n            (f(t4, \"/5/6\"), \"PUT\", \"\", StatusCode::OK, false),\n            (f(t2, \"\"), \"DELETE\", \"\", StatusCode::OK, false),\n            (f(t2, \"/3/4\"), \"GET\", \"\", StatusCode::NOT_FOUND, false),\n            (f(t2, \"/5/6\"), \"GET\", \"\", StatusCode::NOT_FOUND, false),\n            (f(t3, \"/8/9\"), \"GET\", \"\", StatusCode::OK, false),\n            (f(t4, \"/5/6\"), \"GET\", \"\", StatusCode::OK, false),\n            // delete prefix 1 -> empty\n            (format!(\"/{tenant_id}\"), \"DELETE\", \"\", StatusCode::OK, false),\n            (f(t2, \"/3/4\"), \"GET\", \"\", StatusCode::NOT_FOUND, false),\n            (f(t2, \"/5/6\"), \"GET\", \"\", StatusCode::NOT_FOUND, false),\n            (f(t3, \"/8/9\"), \"GET\", \"\", StatusCode::NOT_FOUND, false),\n            (f(t4, \"/5/6\"), \"GET\", \"\", StatusCode::NOT_FOUND, false),\n        ];\n        requests_chain(chain.into_iter(), delete_prefix_token).await;\n    }\n}\n"
  },
  {
    "path": "endpoint_storage/src/claims.rs",
    "content": "use serde::{Deserialize, Serialize};\nuse std::fmt::Display;\nuse utils::id::{EndpointId, TenantId, TimelineId};\n\n/// Claims to add, remove, or retrieve endpoint data. Used by compute_ctl\n#[derive(Deserialize, Serialize, PartialEq)]\npub struct EndpointStorageClaims {\n    pub tenant_id: TenantId,\n    pub timeline_id: TimelineId,\n    pub endpoint_id: EndpointId,\n    pub exp: u64,\n}\n\n/// Claims to remove tenant, timeline, or endpoint data. Used by control plane\n#[derive(Deserialize, Serialize, PartialEq)]\npub struct DeletePrefixClaims {\n    pub tenant_id: TenantId,\n    /// None when tenant is deleted (endpoint_id is also None in this case)\n    pub timeline_id: Option<TimelineId>,\n    /// None when timeline is deleted\n    pub endpoint_id: Option<EndpointId>,\n    pub exp: u64,\n}\n\nimpl Display for EndpointStorageClaims {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        write!(\n            f,\n            \"EndpointClaims(tenant_id={} timeline_id={} endpoint_id={} exp={})\",\n            self.tenant_id, self.timeline_id, self.endpoint_id, self.exp\n        )\n    }\n}\n\nimpl Display for DeletePrefixClaims {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        write!(\n            f,\n            \"DeletePrefixClaims(tenant_id={} timeline_id={} endpoint_id={}, exp={})\",\n            self.tenant_id,\n            self.timeline_id\n                .as_ref()\n                .map(ToString::to_string)\n                .unwrap_or(\"\".to_string()),\n            self.endpoint_id\n                .as_ref()\n                .map(ToString::to_string)\n                .unwrap_or(\"\".to_string()),\n            self.exp\n        )\n    }\n}\n"
  },
  {
    "path": "endpoint_storage/src/lib.rs",
    "content": "pub mod claims;\nuse crate::claims::{DeletePrefixClaims, EndpointStorageClaims};\nuse anyhow::Result;\nuse axum::extract::{FromRequestParts, Path};\nuse axum::response::{IntoResponse, Response};\nuse axum::{RequestPartsExt, http::StatusCode, http::request::Parts};\nuse axum_extra::TypedHeader;\nuse axum_extra::headers::{Authorization, authorization::Bearer};\nuse camino::Utf8PathBuf;\nuse jsonwebtoken::{DecodingKey, Validation};\nuse remote_storage::{GenericRemoteStorage, RemotePath};\nuse serde::{Deserialize, Serialize};\nuse std::fmt::Display;\nuse std::result::Result as StdResult;\nuse std::sync::Arc;\nuse tokio_util::sync::CancellationToken;\nuse tracing::{debug, error};\nuse utils::id::{EndpointId, TenantId, TimelineId};\n\n// simplified version of utils::auth::JwtAuth\npub struct JwtAuth {\n    decoding_key: DecodingKey,\n    validation: Validation,\n}\n\npub const VALIDATION_ALGO: jsonwebtoken::Algorithm = jsonwebtoken::Algorithm::EdDSA;\nimpl JwtAuth {\n    pub fn new(key: &[u8]) -> Result<Self> {\n        Ok(Self {\n            decoding_key: DecodingKey::from_ed_pem(key)?,\n            validation: Validation::new(VALIDATION_ALGO),\n        })\n    }\n\n    pub fn decode<T: serde::de::DeserializeOwned>(&self, token: &str) -> Result<T> {\n        Ok(jsonwebtoken::decode(token, &self.decoding_key, &self.validation).map(|t| t.claims)?)\n    }\n}\n\nfn normalize_key(key: &str) -> StdResult<Utf8PathBuf, String> {\n    let key = clean_utf8(&Utf8PathBuf::from(key));\n    if key.starts_with(\"..\") || key == \".\" || key == \"/\" {\n        return Err(format!(\"invalid key {key}\"));\n    }\n    match key.strip_prefix(\"/\").map(Utf8PathBuf::from) {\n        Ok(p) => Ok(p),\n        _ => Ok(key),\n    }\n}\n\n// Copied from path_clean crate with PathBuf->Utf8PathBuf\nfn clean_utf8(path: &camino::Utf8Path) -> Utf8PathBuf {\n    use camino::Utf8Component as Comp;\n    let mut out = Vec::new();\n    for comp in path.components() {\n        match comp {\n            Comp::CurDir => (),\n            Comp::ParentDir => match out.last() {\n                Some(Comp::RootDir) => (),\n                Some(Comp::Normal(_)) => {\n                    out.pop();\n                }\n                None | Some(Comp::CurDir) | Some(Comp::ParentDir) | Some(Comp::Prefix(_)) => {\n                    out.push(comp)\n                }\n            },\n            comp => out.push(comp),\n        }\n    }\n    if !out.is_empty() {\n        out.iter().collect()\n    } else {\n        Utf8PathBuf::from(\".\")\n    }\n}\n\npub struct Storage {\n    pub auth: JwtAuth,\n    pub storage: GenericRemoteStorage,\n    pub cancel: CancellationToken,\n    pub max_upload_file_limit: usize,\n}\n\n#[derive(Deserialize, Serialize)]\nstruct KeyRequest {\n    tenant_id: TenantId,\n    timeline_id: TimelineId,\n    endpoint_id: EndpointId,\n    path: String,\n}\n\n#[derive(Deserialize, Serialize, PartialEq)]\nstruct PrefixKeyRequest {\n    tenant_id: TenantId,\n    timeline_id: Option<TimelineId>,\n    endpoint_id: Option<EndpointId>,\n}\n\n#[derive(Debug, PartialEq)]\npub struct S3Path {\n    pub path: RemotePath,\n}\n\nimpl TryFrom<&KeyRequest> for S3Path {\n    type Error = String;\n    fn try_from(req: &KeyRequest) -> StdResult<Self, Self::Error> {\n        let KeyRequest {\n            tenant_id,\n            timeline_id,\n            endpoint_id,\n            path,\n        } = &req;\n        let prefix = format!(\"{tenant_id}/{timeline_id}/{endpoint_id}\",);\n        let path = Utf8PathBuf::from(prefix).join(normalize_key(path)?);\n        let path = RemotePath::new(&path).unwrap(); // unwrap() because the path is already relative\n        Ok(S3Path { path })\n    }\n}\n\nfn unauthorized(route: impl Display, claims: impl Display) -> Response {\n    debug!(%route, %claims, \"route doesn't match claims\");\n    StatusCode::UNAUTHORIZED.into_response()\n}\n\npub fn bad_request(err: impl Display, desc: &'static str) -> Response {\n    debug!(%err, desc);\n    (StatusCode::BAD_REQUEST, err.to_string()).into_response()\n}\n\npub fn ok() -> Response {\n    StatusCode::OK.into_response()\n}\n\npub fn internal_error(err: impl Display, path: impl Display, desc: &'static str) -> Response {\n    error!(%err, %path, desc);\n    StatusCode::INTERNAL_SERVER_ERROR.into_response()\n}\n\npub fn not_found(key: impl ToString) -> Response {\n    (StatusCode::NOT_FOUND, key.to_string()).into_response()\n}\n\nimpl FromRequestParts<Arc<Storage>> for S3Path {\n    type Rejection = Response;\n    async fn from_request_parts(\n        parts: &mut Parts,\n        state: &Arc<Storage>,\n    ) -> Result<Self, Self::Rejection> {\n        let Path(path): Path<KeyRequest> = parts\n            .extract()\n            .await\n            .map_err(|e| bad_request(e, \"invalid route\"))?;\n        let TypedHeader(Authorization(bearer)) = parts\n            .extract::<TypedHeader<Authorization<Bearer>>>()\n            .await\n            .map_err(|e| bad_request(e, \"invalid token\"))?;\n        let claims: EndpointStorageClaims = state\n            .auth\n            .decode(bearer.token())\n            .map_err(|e| bad_request(e, \"decoding token\"))?;\n\n        // Read paths may have different endpoint ids. For readonly -> readwrite replica\n        // prewarming, endpoint must read other endpoint's data.\n        let endpoint_id = if parts.method == axum::http::Method::GET {\n            claims.endpoint_id.clone()\n        } else {\n            path.endpoint_id.clone()\n        };\n\n        let route = EndpointStorageClaims {\n            tenant_id: path.tenant_id,\n            timeline_id: path.timeline_id,\n            endpoint_id,\n            exp: claims.exp,\n        };\n        if route != claims {\n            return Err(unauthorized(route, claims));\n        }\n        (&path)\n            .try_into()\n            .map_err(|e| bad_request(e, \"invalid route\"))\n    }\n}\n\n#[derive(Debug, PartialEq)]\npub struct PrefixS3Path {\n    pub path: RemotePath,\n}\n\nimpl From<&DeletePrefixClaims> for PrefixS3Path {\n    fn from(path: &DeletePrefixClaims) -> Self {\n        let timeline_id = path\n            .timeline_id\n            .as_ref()\n            .map(ToString::to_string)\n            .unwrap_or(\"\".to_string());\n        let endpoint_id = path\n            .endpoint_id\n            .as_ref()\n            .map(ToString::to_string)\n            .unwrap_or(\"\".to_string());\n        let path = Utf8PathBuf::from(path.tenant_id.to_string())\n            .join(timeline_id)\n            .join(endpoint_id);\n        let path = RemotePath::new(&path).unwrap(); // unwrap() because the path is already relative\n        PrefixS3Path { path }\n    }\n}\n\nimpl FromRequestParts<Arc<Storage>> for PrefixS3Path {\n    type Rejection = Response;\n    async fn from_request_parts(\n        parts: &mut Parts,\n        state: &Arc<Storage>,\n    ) -> Result<Self, Self::Rejection> {\n        let Path(path) = parts\n            .extract::<Path<PrefixKeyRequest>>()\n            .await\n            .map_err(|e| bad_request(e, \"invalid route\"))?;\n        let TypedHeader(Authorization(bearer)) = parts\n            .extract::<TypedHeader<Authorization<Bearer>>>()\n            .await\n            .map_err(|e| bad_request(e, \"invalid token\"))?;\n        let claims: DeletePrefixClaims = state\n            .auth\n            .decode(bearer.token())\n            .map_err(|e| bad_request(e, \"invalid token\"))?;\n        let route = DeletePrefixClaims {\n            tenant_id: path.tenant_id,\n            timeline_id: path.timeline_id,\n            endpoint_id: path.endpoint_id,\n            exp: claims.exp,\n        };\n        if route != claims {\n            return Err(unauthorized(route, claims));\n        }\n        Ok((&route).into())\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use super::*;\n\n    #[test]\n    fn normalize_key() {\n        let f = super::normalize_key;\n        assert_eq!(f(\"hello/world/..\").unwrap(), Utf8PathBuf::from(\"hello\"));\n        assert_eq!(\n            f(\"ololo/1/../../not_ololo\").unwrap(),\n            Utf8PathBuf::from(\"not_ololo\")\n        );\n        assert!(f(\"ololo/1/../../../\").is_err());\n        assert!(f(\".\").is_err());\n        assert!(f(\"../\").is_err());\n        assert!(f(\"\").is_err());\n        assert_eq!(f(\"/1/2/3\").unwrap(), Utf8PathBuf::from(\"1/2/3\"));\n        assert!(f(\"/1/2/3/../../../\").is_err());\n        assert!(f(\"/1/2/3/../../../../\").is_err());\n    }\n\n    const TENANT_ID: TenantId =\n        TenantId::from_array([1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6]);\n    const TIMELINE_ID: TimelineId =\n        TimelineId::from_array([1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 7]);\n    const ENDPOINT_ID: &str = \"ep-winter-frost-a662z3vg\";\n\n    #[test]\n    fn s3_path() {\n        let auth = EndpointStorageClaims {\n            tenant_id: TENANT_ID,\n            timeline_id: TIMELINE_ID,\n            endpoint_id: ENDPOINT_ID.into(),\n            exp: u64::MAX,\n        };\n        let s3_path = |key| {\n            let path = &format!(\"{TENANT_ID}/{TIMELINE_ID}/{ENDPOINT_ID}/{key}\");\n            let path = RemotePath::from_string(path).unwrap();\n            S3Path { path }\n        };\n\n        let path = \"cache_key\".to_string();\n        let mut key_path = KeyRequest {\n            path,\n            tenant_id: auth.tenant_id,\n            timeline_id: auth.timeline_id,\n            endpoint_id: auth.endpoint_id,\n        };\n        assert_eq!(S3Path::try_from(&key_path).unwrap(), s3_path(key_path.path));\n\n        key_path.path = \"we/can/have/nested/paths\".to_string();\n        assert_eq!(S3Path::try_from(&key_path).unwrap(), s3_path(key_path.path));\n\n        key_path.path = \"../error/hello/../\".to_string();\n        assert!(S3Path::try_from(&key_path).is_err());\n    }\n\n    #[test]\n    fn prefix_s3_path() {\n        let mut path = DeletePrefixClaims {\n            tenant_id: TENANT_ID,\n            timeline_id: None,\n            endpoint_id: None,\n            exp: 0,\n        };\n        let prefix_path = |s: String| RemotePath::from_string(&s).unwrap();\n        assert_eq!(\n            PrefixS3Path::from(&path).path,\n            prefix_path(format!(\"{TENANT_ID}\"))\n        );\n\n        path.timeline_id = Some(TIMELINE_ID);\n        assert_eq!(\n            PrefixS3Path::from(&path).path,\n            prefix_path(format!(\"{TENANT_ID}/{TIMELINE_ID}\"))\n        );\n\n        path.endpoint_id = Some(ENDPOINT_ID.into());\n        assert_eq!(\n            PrefixS3Path::from(&path).path,\n            prefix_path(format!(\"{TENANT_ID}/{TIMELINE_ID}/{ENDPOINT_ID}\"))\n        );\n    }\n}\n"
  },
  {
    "path": "endpoint_storage/src/main.rs",
    "content": "//! `endpoint_storage` is a service which provides API for uploading and downloading\n//! files. It is used by compute and control plane for accessing LFC prewarm data.\n//! This service is deployed either as a separate component or as part of compute image\n//! for large computes.\nmod app;\nuse anyhow::Context;\nuse clap::Parser;\nuse std::net::{IpAddr, Ipv4Addr, SocketAddr};\nuse tracing::info;\nuse utils::logging;\n\n//see set()\nconst fn max_upload_file_limit() -> usize {\n    100 * 1024 * 1024\n}\n\nconst fn listen() -> SocketAddr {\n    SocketAddr::new(IpAddr::V4(Ipv4Addr::new(0, 0, 0, 0)), 51243)\n}\n\n#[derive(Parser)]\nstruct Args {\n    #[arg(exclusive = true)]\n    config_file: Option<String>,\n    #[arg(long, default_value = \"false\", requires = \"config\")]\n    /// to allow testing k8s helm chart where we don't have s3 credentials\n    no_s3_check_on_startup: bool,\n    #[arg(long, value_name = \"FILE\")]\n    /// inline config mode for k8s helm chart\n    config: Option<String>,\n}\n\n#[derive(serde::Deserialize)]\nstruct Config {\n    #[serde(default = \"listen\")]\n    listen: std::net::SocketAddr,\n    pemfile: camino::Utf8PathBuf,\n    #[serde(flatten)]\n    storage_kind: remote_storage::TypedRemoteStorageKind,\n    #[serde(default = \"max_upload_file_limit\")]\n    max_upload_file_limit: usize,\n}\n\n#[tokio::main]\nasync fn main() -> anyhow::Result<()> {\n    logging::init(\n        logging::LogFormat::Plain,\n        logging::TracingErrorLayerEnablement::EnableWithRustLogFilter,\n        logging::Output::Stdout,\n    )?;\n\n    let args = Args::parse();\n    let config: Config = if let Some(config_path) = args.config_file {\n        info!(\"Reading config from {config_path}\");\n        let config = std::fs::read_to_string(config_path)?;\n        serde_json::from_str(&config).context(\"parsing config\")?\n    } else if let Some(config) = args.config {\n        info!(\"Reading inline config\");\n        serde_json::from_str(&config).context(\"parsing config\")?\n    } else {\n        anyhow::bail!(\"Supply either config file path or --config=inline-config\");\n    };\n\n    info!(\"Reading pemfile from {}\", config.pemfile.clone());\n    let pemfile = std::fs::read(config.pemfile.clone())?;\n    info!(\"Loading public key from {}\", config.pemfile.clone());\n    let auth = endpoint_storage::JwtAuth::new(&pemfile)?;\n\n    let listener = tokio::net::TcpListener::bind(config.listen).await.unwrap();\n    info!(\"listening on {}\", listener.local_addr().unwrap());\n\n    let storage =\n        remote_storage::GenericRemoteStorage::from_storage_kind(config.storage_kind).await?;\n    let cancel = tokio_util::sync::CancellationToken::new();\n    if !args.no_s3_check_on_startup {\n        app::check_storage_permissions(&storage, cancel.clone()).await?;\n    }\n\n    let proxy = std::sync::Arc::new(endpoint_storage::Storage {\n        auth,\n        storage,\n        cancel: cancel.clone(),\n        max_upload_file_limit: config.max_upload_file_limit,\n    });\n\n    tokio::spawn(utils::signals::signal_handler(cancel.clone()));\n    axum::serve(listener, app::app(proxy))\n        .with_graceful_shutdown(async move { cancel.cancelled().await })\n        .await?;\n    Ok(())\n}\n"
  },
  {
    "path": "endpoint_storage/src/openapi_spec.yml",
    "content": "openapi: \"3.0.2\"\ninfo:\n  title: Endpoint Storage API\n  description: Endpoint Storage API\n  version: \"1.0\"\n  license:\n    name: \"Apache\"\n    url: https://github.com/neondatabase/neon/blob/main/LICENSE\nservers:\n  - url: \"\"\npaths:\n  /status:\n    description: Healthcheck endpoint\n    get:\n      description: Healthcheck\n      security: []\n      responses:\n        \"200\":\n          description: OK\n\n  /{tenant_id}/{timeline_id}/{endpoint_id}/{key}:\n    parameters:\n      - name: tenant_id\n        in: path\n        required: true\n        schema:\n          type: string\n      - name: timeline_id\n        in: path\n        required: true\n        schema:\n          type: string\n      - name: endpoint_id\n        in: path\n        required: true\n        schema:\n          type: string\n      - name: key\n        in: path\n        required: true\n        schema:\n          type: string\n    get:\n      description: Get file from blob storage\n      responses:\n        \"200\":\n          description: \"File stream from blob storage\"\n          content:\n            application/octet-stream:\n              schema:\n                type: string\n                format: binary\n        \"400\":\n          description: File was not found\n        \"403\":\n          description: JWT does not authorize request to this route\n    put:\n      description: Insert file into blob storage. If file exists, override it\n      requestBody:\n        content:\n          application/octet-stream:\n            schema:\n              type: string\n              format: binary\n      responses:\n        \"200\":\n          description: File was inserted successfully\n        \"403\":\n          description: JWT does not authorize request to this route\n    delete:\n      description: Delete file from blob storage\n      responses:\n        \"200\":\n          description: File was successfully deleted or not found\n        \"403\":\n          description: JWT does not authorize request to this route\n\n  /{tenant_id}/{timeline_id}/{endpoint_id}:\n    parameters:\n      - name: tenant_id\n        in: path\n        required: true\n        schema:\n          type: string\n      - name: timeline_id\n        in: path\n        required: true\n        schema:\n          type: string\n      - name: endpoint_id\n        in: path\n        required: true\n        schema:\n          type: string\n    delete:\n      description: Delete endpoint data from blob storage\n      responses:\n        \"200\":\n          description: Endpoint data was deleted\n        \"403\":\n          description: JWT does not authorize request to this route\n\n  /{tenant_id}/{timeline_id}:\n    parameters:\n      - name: tenant_id\n        in: path\n        required: true\n        schema:\n          type: string\n      - name: timeline_id\n        in: path\n        required: true\n        schema:\n          type: string\n    delete:\n      description: Delete timeline data from blob storage\n      responses:\n        \"200\":\n          description: Timeline data was deleted\n        \"403\":\n          description: JWT does not authorize request to this route\n\n  /{tenant_id}:\n    parameters:\n      - name: tenant_id\n        in: path\n        required: true\n        schema:\n          type: string\n    delete:\n      description: Delete tenant data from blob storage\n      responses:\n        \"200\":\n          description: Tenant data was deleted\n        \"403\":\n          description: JWT does not authorize request to this route\n\ncomponents:\n  securitySchemes:\n    JWT:\n      type: http\n      scheme: bearer\n      bearerFormat: JWT\n\nsecurity:\n  - JWT: []\n"
  },
  {
    "path": "libs/compute_api/Cargo.toml",
    "content": "[package]\nname = \"compute_api\"\nversion = \"0.1.0\"\nedition = \"2024\"\nlicense.workspace = true\n\n[dependencies]\nanyhow.workspace = true\nchrono.workspace = true\nindexmap.workspace = true\njsonwebtoken.workspace = true\nserde.workspace = true\nserde_json.workspace = true\nregex.workspace = true\nurl.workspace = true\n\nutils = { path = \"../utils\" }\nremote_storage = { version = \"0.1\", path = \"../remote_storage/\" }\n"
  },
  {
    "path": "libs/compute_api/src/lib.rs",
    "content": "#![deny(unsafe_code)]\n#![deny(clippy::undocumented_unsafe_blocks)]\npub mod privilege;\npub mod requests;\npub mod responses;\npub mod spec;\n"
  },
  {
    "path": "libs/compute_api/src/privilege.rs",
    "content": "#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]\n#[serde(rename_all = \"UPPERCASE\")]\npub enum Privilege {\n    Select,\n    Insert,\n    Update,\n    Delete,\n    Truncate,\n    References,\n    Trigger,\n    Usage,\n    Create,\n    Connect,\n    Temporary,\n    Execute,\n}\n\nimpl Privilege {\n    pub fn as_str(&self) -> &'static str {\n        match self {\n            Privilege::Select => \"SELECT\",\n            Privilege::Insert => \"INSERT\",\n            Privilege::Update => \"UPDATE\",\n            Privilege::Delete => \"DELETE\",\n            Privilege::Truncate => \"TRUNCATE\",\n            Privilege::References => \"REFERENCES\",\n            Privilege::Trigger => \"TRIGGER\",\n            Privilege::Usage => \"USAGE\",\n            Privilege::Create => \"CREATE\",\n            Privilege::Connect => \"CONNECT\",\n            Privilege::Temporary => \"TEMPORARY\",\n            Privilege::Execute => \"EXECUTE\",\n        }\n    }\n}\n"
  },
  {
    "path": "libs/compute_api/src/requests.rs",
    "content": "//! Structs representing the JSON formats used in the compute_ctl's HTTP API.\nuse std::str::FromStr;\n\nuse serde::{Deserialize, Serialize};\n\nuse crate::privilege::Privilege;\nuse crate::responses::ComputeCtlConfig;\nuse crate::spec::{ComputeSpec, ExtVersion, PgIdent};\n\n/// The value to place in the [`ComputeClaims::audience`] claim.\npub static COMPUTE_AUDIENCE: &str = \"compute\";\n\n/// Available scopes for a compute's JWT.\n#[derive(Copy, Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]\n#[serde(rename_all = \"snake_case\")]\npub enum ComputeClaimsScope {\n    /// An admin-scoped token allows access to all of `compute_ctl`'s authorized\n    /// facilities.\n    #[serde(rename = \"compute_ctl:admin\")]\n    Admin,\n}\n\nimpl FromStr for ComputeClaimsScope {\n    type Err = anyhow::Error;\n\n    fn from_str(s: &str) -> Result<Self, Self::Err> {\n        match s {\n            \"compute_ctl:admin\" => Ok(ComputeClaimsScope::Admin),\n            _ => Err(anyhow::anyhow!(\"invalid compute claims scope \\\"{s}\\\"\")),\n        }\n    }\n}\n\n/// When making requests to the `compute_ctl` external HTTP server, the client\n/// must specify a set of claims in `Authorization` header JWTs such that\n/// `compute_ctl` can authorize the request.\n#[derive(Clone, Debug, Deserialize, Serialize)]\n#[serde(rename = \"snake_case\")]\npub struct ComputeClaims {\n    /// The compute ID that will validate the token. The only case in which this\n    /// can be [`None`] is if [`Self::scope`] is\n    /// [`ComputeClaimsScope::Admin`].\n    pub compute_id: Option<String>,\n\n    /// The scope of what the token authorizes.\n    pub scope: Option<ComputeClaimsScope>,\n\n    /// The recipient the token is intended for.\n    ///\n    /// See [RFC 7519](https://www.rfc-editor.org/rfc/rfc7519#section-4.1.3) for\n    /// more information.\n    ///\n    /// TODO: Remove the [`Option`] wrapper when control plane learns to send\n    /// the claim.\n    #[serde(rename = \"aud\")]\n    pub audience: Option<Vec<String>>,\n}\n\n/// Request of the /configure API\n///\n/// We now pass only `spec` in the configuration request, but later we can\n/// extend it and something like `restart: bool` or something else. So put\n/// `spec` into a struct initially to be more flexible in the future.\n#[derive(Debug, Deserialize, Serialize)]\npub struct ConfigurationRequest {\n    pub spec: ComputeSpec,\n    pub compute_ctl_config: ComputeCtlConfig,\n}\n\n#[derive(Deserialize, Debug)]\npub struct ExtensionInstallRequest {\n    pub extension: PgIdent,\n    pub database: PgIdent,\n    pub version: ExtVersion,\n}\n\n#[derive(Deserialize, Debug)]\npub struct SetRoleGrantsRequest {\n    pub database: PgIdent,\n    pub schema: PgIdent,\n    pub privileges: Vec<Privilege>,\n    pub role: PgIdent,\n}\n\n#[cfg(test)]\nmod test {\n    use std::str::FromStr;\n\n    use crate::requests::ComputeClaimsScope;\n\n    /// Confirm that whether we parse the scope by string or through serde, the\n    /// same values parse to the same enum variant.\n    #[test]\n    fn compute_request_scopes() {\n        const ADMIN_SCOPE: &str = \"compute_ctl:admin\";\n\n        let from_serde: ComputeClaimsScope =\n            serde_json::from_str(&format!(\"\\\"{ADMIN_SCOPE}\\\"\")).unwrap();\n        let from_str = ComputeClaimsScope::from_str(ADMIN_SCOPE).unwrap();\n\n        assert_eq!(from_serde, from_str);\n    }\n}\n"
  },
  {
    "path": "libs/compute_api/src/responses.rs",
    "content": "//! Structs representing the JSON formats used in the compute_ctl's HTTP API.\n\nuse chrono::{DateTime, Utc};\nuse jsonwebtoken::jwk::JwkSet;\nuse serde::{Deserialize, Serialize, Serializer};\nuse std::fmt::Display;\n\nuse crate::privilege::Privilege;\nuse crate::spec::{ComputeSpec, Database, ExtVersion, PgIdent, Role};\n\n#[derive(Serialize, Debug, Deserialize)]\npub struct GenericAPIError {\n    pub error: String,\n}\n\n/// All configuration parameters necessary for a compute. When\n/// [`ComputeConfig::spec`] is provided, it means that the compute is attached\n/// to a tenant. [`ComputeConfig::compute_ctl_config`] will always be provided\n/// and contains parameters necessary for operating `compute_ctl` independently\n/// of whether a tenant is attached to the compute or not.\n///\n/// This also happens to be the body of `compute_ctl`'s /configure request.\n#[derive(Debug, Deserialize, Serialize)]\npub struct ComputeConfig {\n    /// The compute spec\n    pub spec: Option<ComputeSpec>,\n\n    /// The compute_ctl configuration\n    #[allow(dead_code)]\n    pub compute_ctl_config: ComputeCtlConfig,\n}\n\nimpl From<ControlPlaneConfigResponse> for ComputeConfig {\n    fn from(value: ControlPlaneConfigResponse) -> Self {\n        Self {\n            spec: value.spec,\n            compute_ctl_config: value.compute_ctl_config,\n        }\n    }\n}\n\n#[derive(Debug, Clone, Serialize)]\npub struct ExtensionInstallResponse {\n    pub extension: PgIdent,\n    pub version: ExtVersion,\n}\n\n/// Status of the LFC prewarm process. The same state machine is reused for\n/// both autoprewarm (prewarm after compute/Postgres start using the previously\n/// stored LFC state) and explicit prewarming via API.\n#[derive(Serialize, Default, Debug, Clone)]\n#[serde(tag = \"status\", rename_all = \"snake_case\")]\npub enum LfcPrewarmState {\n    /// Default value when compute boots up.\n    #[default]\n    NotPrewarmed,\n    /// Prewarming thread is active and loading pages into LFC.\n    Prewarming,\n    /// We found requested LFC state in the endpoint storage and\n    /// completed prewarming successfully.\n    Completed {\n        total: i32,\n        prewarmed: i32,\n        skipped: i32,\n        state_download_time_ms: u32,\n        uncompress_time_ms: u32,\n        prewarm_time_ms: u32,\n    },\n    /// Unexpected error happened during prewarming. Note, `Not Found 404`\n    /// response from the endpoint storage is explicitly excluded here\n    /// because it can normally happen on the first compute start,\n    /// since LFC state is not available yet.\n    Failed { error: String },\n    /// We tried to fetch the corresponding LFC state from the endpoint storage,\n    /// but received `Not Found 404`. This should normally happen only during the\n    /// first endpoint start after creation with `autoprewarm: true`.\n    /// This may also happen if LFC is turned off or not initialized\n    ///\n    /// During the orchestrated prewarm via API, when a caller explicitly\n    /// provides the LFC state key to prewarm from, it's the caller responsibility\n    /// to handle this status as an error state in this case.\n    Skipped,\n    /// LFC prewarm was cancelled. Some pages in LFC cache may be prewarmed if query\n    /// has started working before cancellation\n    Cancelled,\n}\n\nimpl Display for LfcPrewarmState {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        match self {\n            LfcPrewarmState::NotPrewarmed => f.write_str(\"NotPrewarmed\"),\n            LfcPrewarmState::Prewarming => f.write_str(\"Prewarming\"),\n            LfcPrewarmState::Completed { .. } => f.write_str(\"Completed\"),\n            LfcPrewarmState::Skipped => f.write_str(\"Skipped\"),\n            LfcPrewarmState::Failed { error } => write!(f, \"Error({error})\"),\n            LfcPrewarmState::Cancelled => f.write_str(\"Cancelled\"),\n        }\n    }\n}\n\n#[derive(Serialize, Default, Debug, Clone)]\n#[serde(tag = \"status\", rename_all = \"snake_case\")]\npub enum LfcOffloadState {\n    #[default]\n    NotOffloaded,\n    Offloading,\n    Completed {\n        state_query_time_ms: u32,\n        compress_time_ms: u32,\n        state_upload_time_ms: u32,\n    },\n    Failed {\n        error: String,\n    },\n    /// LFC state was empty so it wasn't offloaded\n    Skipped,\n}\n\n#[derive(Serialize, Debug, Clone)]\n#[serde(tag = \"status\", rename_all = \"snake_case\")]\npub enum PromoteState {\n    NotPromoted,\n    Completed {\n        lsn_wait_time_ms: u32,\n        pg_promote_time_ms: u32,\n        reconfigure_time_ms: u32,\n    },\n    Failed {\n        error: String,\n    },\n}\n\n#[derive(Deserialize, Default, Debug)]\n#[serde(rename_all = \"snake_case\")]\npub struct PromoteConfig {\n    pub spec: ComputeSpec,\n    pub wal_flush_lsn: utils::lsn::Lsn,\n}\n\n/// Response of the /status API\n#[derive(Serialize, Debug, Deserialize)]\n#[serde(rename_all = \"snake_case\")]\npub struct ComputeStatusResponse {\n    pub start_time: DateTime<Utc>,\n    pub tenant: Option<String>,\n    pub timeline: Option<String>,\n    pub status: ComputeStatus,\n    #[serde(serialize_with = \"rfc3339_serialize\")]\n    pub last_active: Option<DateTime<Utc>>,\n    pub error: Option<String>,\n}\n\n#[derive(Serialize, Clone, Copy, Debug, Deserialize, PartialEq, Eq, Default)]\n#[serde(rename_all = \"snake_case\")]\npub enum TerminateMode {\n    #[default]\n    /// wait 30s till returning from /terminate to allow control plane to get the error\n    Fast,\n    /// return from /terminate immediately as soon as all components are terminated\n    Immediate,\n}\n\nimpl From<TerminateMode> for ComputeStatus {\n    fn from(mode: TerminateMode) -> Self {\n        match mode {\n            TerminateMode::Fast => ComputeStatus::TerminationPendingFast,\n            TerminateMode::Immediate => ComputeStatus::TerminationPendingImmediate,\n        }\n    }\n}\n\n#[derive(Serialize, Clone, Copy, Debug, Deserialize, PartialEq, Eq)]\n#[serde(rename_all = \"snake_case\")]\npub enum ComputeStatus {\n    // Spec wasn't provided at start, waiting for it to be\n    // provided by control-plane.\n    Empty,\n    // Compute configuration was requested.\n    ConfigurationPending,\n    // Compute node has spec and initial startup and\n    // configuration is in progress.\n    Init,\n    // Compute is configured and running.\n    Running,\n    // New spec is being applied.\n    Configuration,\n    // Either startup or configuration failed,\n    // compute will exit soon or is waiting for\n    // control-plane to terminate it.\n    Failed,\n    // Termination requested\n    TerminationPendingFast,\n    // Termination requested, without waiting 30s before returning from /terminate\n    TerminationPendingImmediate,\n    // Terminated Postgres\n    Terminated,\n    // A spec refresh is being requested\n    RefreshConfigurationPending,\n    // A spec refresh is being applied. We cannot refresh configuration again until the current\n    // refresh is done, i.e., signal_refresh_configuration() will return 500 error.\n    RefreshConfiguration,\n}\n\n#[derive(Deserialize, Serialize)]\npub struct TerminateResponse {\n    pub lsn: Option<utils::lsn::Lsn>,\n}\n\nimpl Display for ComputeStatus {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        match self {\n            ComputeStatus::Empty => f.write_str(\"empty\"),\n            ComputeStatus::ConfigurationPending => f.write_str(\"configuration-pending\"),\n            ComputeStatus::RefreshConfiguration => f.write_str(\"refresh-configuration\"),\n            ComputeStatus::RefreshConfigurationPending => {\n                f.write_str(\"refresh-configuration-pending\")\n            }\n            ComputeStatus::Init => f.write_str(\"init\"),\n            ComputeStatus::Running => f.write_str(\"running\"),\n            ComputeStatus::Configuration => f.write_str(\"configuration\"),\n            ComputeStatus::Failed => f.write_str(\"failed\"),\n            ComputeStatus::TerminationPendingFast => f.write_str(\"termination-pending-fast\"),\n            ComputeStatus::TerminationPendingImmediate => {\n                f.write_str(\"termination-pending-immediate\")\n            }\n            ComputeStatus::Terminated => f.write_str(\"terminated\"),\n        }\n    }\n}\n\npub fn rfc3339_serialize<S>(x: &Option<DateTime<Utc>>, s: S) -> Result<S::Ok, S::Error>\nwhere\n    S: Serializer,\n{\n    if let Some(x) = x {\n        x.to_rfc3339().serialize(s)\n    } else {\n        s.serialize_none()\n    }\n}\n\n/// Response of the /metrics.json API\n#[derive(Clone, Debug, Default, Serialize)]\npub struct ComputeMetrics {\n    /// Time spent waiting in pool\n    pub wait_for_spec_ms: u64,\n\n    /// Time spent checking if safekeepers are synced\n    pub sync_sk_check_ms: u64,\n\n    /// Time spent syncing safekeepers (walproposer.c).\n    /// In most cases this should be zero.\n    pub sync_safekeepers_ms: u64,\n\n    /// Time it took to establish a pg connection to the pageserver.\n    /// This is two roundtrips, so it's a good proxy for compute-pageserver\n    /// latency. The latency is usually 0.2ms, but it's not safe to assume\n    /// that.\n    pub pageserver_connect_micros: u64,\n\n    /// Time to get basebackup from pageserver and write it to disk.\n    pub basebackup_ms: u64,\n\n    /// Compressed size of basebackup received.\n    pub basebackup_bytes: u64,\n\n    /// Time spent starting potgres. This includes initialization of shared\n    /// buffers, preloading extensions, and other pg operations.\n    pub start_postgres_ms: u64,\n\n    /// Time spent applying pg catalog updates that were made in the console\n    /// UI. This should be 0 when startup time matters, since cplane tries\n    /// to do these updates eagerly, and passes the skip_pg_catalog_updates\n    /// when it's safe to skip this step.\n    pub config_ms: u64,\n\n    /// Total time, from when we receive the spec to when we're ready to take\n    /// pg connections.\n    pub total_startup_ms: u64,\n    pub load_ext_ms: u64,\n    pub num_ext_downloaded: u64,\n    pub largest_ext_size: u64, // these are measured in bytes\n    pub total_ext_download_size: u64,\n}\n\n#[derive(Clone, Debug, Default, Serialize)]\npub struct CatalogObjects {\n    pub roles: Vec<Role>,\n    pub databases: Vec<Database>,\n}\n\n#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]\npub struct ComputeCtlConfig {\n    /// Set of JSON web keys that the compute can use to authenticate\n    /// communication from the control plane.\n    pub jwks: JwkSet,\n    pub tls: Option<TlsConfig>,\n}\n\nimpl Default for ComputeCtlConfig {\n    fn default() -> Self {\n        Self {\n            jwks: JwkSet {\n                keys: Vec::default(),\n            },\n            tls: None,\n        }\n    }\n}\n\n#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]\npub struct TlsConfig {\n    pub key_path: String,\n    pub cert_path: String,\n}\n\n/// Response of the `/computes/{compute_id}/spec` control-plane API.\n#[derive(Deserialize, Debug)]\npub struct ControlPlaneConfigResponse {\n    pub spec: Option<ComputeSpec>,\n    pub status: ControlPlaneComputeStatus,\n    pub compute_ctl_config: ComputeCtlConfig,\n}\n\n#[derive(Deserialize, Clone, Copy, Debug, PartialEq, Eq)]\n#[serde(rename_all = \"snake_case\")]\npub enum ControlPlaneComputeStatus {\n    // Compute is known to control-plane, but it's not\n    // yet attached to any timeline / endpoint.\n    Empty,\n    // Compute is attached to some timeline / endpoint and\n    // should be able to start with provided spec.\n    Attached,\n}\n\n#[derive(Clone, Debug, Default, Serialize)]\npub struct InstalledExtension {\n    pub extname: String,\n    pub version: String,\n    pub n_databases: u32, // Number of databases using this extension\n    pub owned_by_superuser: String,\n}\n\n#[derive(Clone, Debug, Default, Serialize)]\npub struct InstalledExtensions {\n    pub extensions: Vec<InstalledExtension>,\n}\n\n#[derive(Clone, Debug, Default, Serialize)]\npub struct ExtensionInstallResult {\n    pub extension: PgIdent,\n    pub version: ExtVersion,\n}\n#[derive(Clone, Debug, Default, Serialize)]\npub struct SetRoleGrantsResponse {\n    pub database: PgIdent,\n    pub schema: PgIdent,\n    pub privileges: Vec<Privilege>,\n    pub role: PgIdent,\n}\n"
  },
  {
    "path": "libs/compute_api/src/spec.rs",
    "content": "//! The ComputeSpec contains all the information needed to start up\n//! the right version of PostgreSQL, and connect it to the storage nodes.\n//! It can be passed as part of the `config.json`, or the control plane can\n//! provide it by calling the compute_ctl's `/compute_ctl` endpoint, or\n//! compute_ctl can fetch it by calling the control plane's API.\nuse std::collections::HashMap;\nuse std::fmt::Display;\n\nuse anyhow::anyhow;\nuse indexmap::IndexMap;\nuse regex::Regex;\nuse remote_storage::RemotePath;\nuse serde::{Deserialize, Serialize};\nuse url::Url;\nuse utils::id::{NodeId, TenantId, TimelineId};\nuse utils::lsn::Lsn;\nuse utils::shard::{ShardCount, ShardIndex, ShardNumber, ShardStripeSize};\n\nuse crate::responses::TlsConfig;\n\n/// String type alias representing Postgres identifier and\n/// intended to be used for DB / role names.\npub type PgIdent = String;\n\n/// String type alias representing Postgres extension version\npub type ExtVersion = String;\n\nfn default_reconfigure_concurrency() -> usize {\n    1\n}\n\n/// Cluster spec or configuration represented as an optional number of\n/// delta operations + final cluster state description.\n#[derive(Clone, Debug, Default, Deserialize, Serialize)]\npub struct ComputeSpec {\n    pub format_version: f32,\n\n    // The control plane also includes a 'timestamp' field in the JSON document,\n    // but we don't use it for anything. Serde will ignore missing fields when\n    // deserializing it.\n    pub operation_uuid: Option<String>,\n\n    /// Compute features to enable. These feature flags are provided, when we\n    /// know all the details about client's compute, so they cannot be used\n    /// to change `Empty` compute behavior.\n    #[serde(default)]\n    pub features: Vec<ComputeFeature>,\n\n    /// If compute_ctl was passed `--resize-swap-on-bind`, a value of `Some(_)` instructs\n    /// compute_ctl to `/neonvm/bin/resize-swap` with the given size, when the spec is first\n    /// received.\n    ///\n    /// Both this field and `--resize-swap-on-bind` are required, so that the control plane's\n    /// spec generation doesn't need to be aware of the actual compute it's running on, while\n    /// guaranteeing gradual rollout of swap. Otherwise, without `--resize-swap-on-bind`, we could\n    /// end up trying to resize swap in VMs without it -- or end up *not* resizing swap, thus\n    /// giving every VM much more swap than it should have (32GiB).\n    ///\n    /// Eventually we may remove `--resize-swap-on-bind` and exclusively use `swap_size_bytes` for\n    /// enabling the swap resizing behavior once rollout is complete.\n    ///\n    /// See neondatabase/cloud#12047 for more.\n    #[serde(default)]\n    pub swap_size_bytes: Option<u64>,\n\n    /// If compute_ctl was passed `--set-disk-quota-for-fs`, a value of `Some(_)` instructs\n    /// compute_ctl to run `/neonvm/bin/set-disk-quota` with the given size and fs, when the\n    /// spec is first received.\n    ///\n    /// Both this field and `--set-disk-quota-for-fs` are required, so that the control plane's\n    /// spec generation doesn't need to be aware of the actual compute it's running on, while\n    /// guaranteeing gradual rollout of disk quota.\n    #[serde(default)]\n    pub disk_quota_bytes: Option<u64>,\n\n    /// Disables the vm-monitor behavior that resizes LFC on upscale/downscale, instead relying on\n    /// the initial size of LFC.\n    ///\n    /// This is intended for use when the LFC size is being overridden from the default but\n    /// autoscaling is still enabled, and we don't want the vm-monitor to interfere with the custom\n    /// LFC sizing.\n    #[serde(default)]\n    pub disable_lfc_resizing: Option<bool>,\n\n    /// Expected cluster state at the end of transition process.\n    pub cluster: Cluster,\n    pub delta_operations: Option<Vec<DeltaOp>>,\n\n    /// An optional hint that can be passed to speed up startup time if we know\n    /// that no pg catalog mutations (like role creation, database creation,\n    /// extension creation) need to be done on the actual database to start.\n    #[serde(default)] // Default false\n    pub skip_pg_catalog_updates: bool,\n\n    // Information needed to connect to the storage layer.\n    //\n    // `tenant_id`, `timeline_id` and `pageserver_connstring` are always needed.\n    //\n    // Depending on `mode`, this can be a primary read-write node, a read-only\n    // replica, or a read-only node pinned at an older LSN.\n    // `safekeeper_connstrings` must be set for a primary.\n    //\n    // For backwards compatibility, the control plane may leave out all of\n    // these, and instead set the \"neon.tenant_id\", \"neon.timeline_id\",\n    // etc. GUCs in cluster.settings. TODO: Once the control plane has been\n    // updated to fill these fields, we can make these non optional.\n    pub tenant_id: Option<TenantId>,\n    pub timeline_id: Option<TimelineId>,\n\n    /// Pageserver information can be passed in three different ways:\n    /// 1. Here in `pageserver_connection_info`\n    /// 2. In the `pageserver_connstring` field.\n    /// 3. in `cluster.settings`.\n    ///\n    /// The goal is to use method 1. everywhere. But for backwards-compatibility with old\n    /// versions of the control plane, `compute_ctl` will check 2. and 3. if the\n    /// `pageserver_connection_info` field is missing.\n    ///\n    /// If both `pageserver_connection_info` and `pageserver_connstring`+`shard_stripe_size` are\n    /// given, they must contain the same information.\n    pub pageserver_connection_info: Option<PageserverConnectionInfo>,\n\n    pub pageserver_connstring: Option<String>,\n\n    /// Stripe size for pageserver sharding, in pages. This is set together with the legacy\n    /// `pageserver_connstring` field. When the modern `pageserver_connection_info` field is used,\n    /// the stripe size is stored in `pageserver_connection_info.stripe_size` instead.\n    pub shard_stripe_size: Option<ShardStripeSize>,\n\n    // More neon ids that we expose to the compute_ctl\n    // and to postgres as neon extension GUCs.\n    pub project_id: Option<String>,\n    pub branch_id: Option<String>,\n    pub endpoint_id: Option<String>,\n\n    /// Safekeeper membership config generation. It is put in\n    /// neon.safekeepers GUC and serves two purposes:\n    /// 1) Non zero value forces walproposer to use membership configurations.\n    /// 2) If walproposer wants to update list of safekeepers to connect to\n    ///    taking them from some safekeeper mconf, it should check what value\n    ///    is newer by comparing the generation.\n    ///\n    /// Note: it could be SafekeeperGeneration, but this needs linking\n    /// compute_ctl with postgres_ffi.\n    #[serde(default)]\n    pub safekeepers_generation: Option<u32>,\n    #[serde(default)]\n    pub safekeeper_connstrings: Vec<String>,\n\n    #[serde(default)]\n    pub mode: ComputeMode,\n\n    /// If set, 'storage_auth_token' is used as the password to authenticate to\n    /// the pageserver and safekeepers.\n    pub storage_auth_token: Option<String>,\n\n    // information about available remote extensions\n    pub remote_extensions: Option<RemoteExtSpec>,\n\n    pub pgbouncer_settings: Option<IndexMap<String, String>>,\n\n    /// Local Proxy configuration used for JWT authentication\n    #[serde(default)]\n    pub local_proxy_config: Option<LocalProxySpec>,\n\n    /// Number of concurrent connections during the parallel RunInEachDatabase\n    /// phase of the apply config process.\n    ///\n    /// We need a higher concurrency during reconfiguration in case of many DBs,\n    /// but instance is already running and used by client. We can easily get out of\n    /// `max_connections` limit, and the current code won't handle that.\n    ///\n    /// Default is 1, but also allow control plane to override this value for specific\n    /// projects. It's also recommended to bump `superuser_reserved_connections` +=\n    /// `reconfigure_concurrency` for such projects to ensure that we always have\n    /// enough spare connections for reconfiguration process to succeed.\n    #[serde(default = \"default_reconfigure_concurrency\")]\n    pub reconfigure_concurrency: usize,\n\n    /// If set to true, the compute_ctl will drop all subscriptions before starting the\n    /// compute. This is needed when we start an endpoint on a branch, so that child\n    /// would not compete with parent branch subscriptions\n    /// over the same replication content from publisher.\n    #[serde(default)] // Default false\n    pub drop_subscriptions_before_start: bool,\n\n    /// Log level for compute audit logging\n    #[serde(default)]\n    pub audit_log_level: ComputeAudit,\n\n    /// Hostname and the port of the otel collector. Leave empty to disable Postgres logs forwarding.\n    /// Example: config-shy-breeze-123-collector-monitoring.neon-telemetry.svc.cluster.local:10514\n    pub logs_export_host: Option<String>,\n\n    /// Address of endpoint storage service\n    pub endpoint_storage_addr: Option<String>,\n    /// JWT for authorizing requests to endpoint storage service\n    pub endpoint_storage_token: Option<String>,\n\n    #[serde(default)]\n    /// Download LFC state from endpoint storage and pass it to Postgres on compute startup\n    pub autoprewarm: bool,\n\n    #[serde(default)]\n    /// Upload LFC state to endpoint storage periodically. Default value (None) means \"don't upload\"\n    pub offload_lfc_interval_seconds: Option<std::num::NonZeroU64>,\n\n    /// Suspend timeout in seconds.\n    ///\n    /// We use this value to derive other values, such as the installed extensions metric.\n    pub suspend_timeout_seconds: i64,\n\n    // Databricks specific options for compute instance.\n    pub databricks_settings: Option<DatabricksSettings>,\n}\n\n/// Feature flag to signal `compute_ctl` to enable certain experimental functionality.\n#[derive(Serialize, Clone, Copy, Debug, Deserialize, PartialEq, Eq)]\n#[serde(rename_all = \"snake_case\")]\npub enum ComputeFeature {\n    // XXX: Add more feature flags here.\n    /// Enable the experimental activity monitor logic, which uses `pg_stat_database` to\n    /// track short-lived connections as user activity.\n    ActivityMonitorExperimental,\n\n    /// Enable TLS functionality.\n    TlsExperimental,\n\n    /// This is a special feature flag that is used to represent unknown feature flags.\n    /// Basically all unknown to enum flags are represented as this one. See unit test\n    /// `parse_unknown_features()` for more details.\n    #[serde(other)]\n    UnknownFeature,\n}\n\n#[derive(Clone, Debug, Deserialize, Serialize, Eq, PartialEq)]\npub struct PageserverConnectionInfo {\n    /// NB: 0 for unsharded tenants, 1 for sharded tenants with 1 shard, following storage\n    pub shard_count: ShardCount,\n\n    /// INVARIANT: null if shard_count is 0, otherwise non-null and immutable\n    pub stripe_size: Option<ShardStripeSize>,\n\n    pub shards: HashMap<ShardIndex, PageserverShardInfo>,\n\n    /// If the compute supports both protocols, this indicates which one it should use.  The compute\n    /// may use other available protocols too, if it doesn't support the preferred one. The URL's\n    /// for the protocol specified here must be present for all shards, i.e. do not mark a protocol\n    /// as preferred if it cannot actually be used with all the pageservers.\n    #[serde(default)]\n    pub prefer_protocol: PageserverProtocol,\n}\n\n/// Extract PageserverConnectionInfo from a comma-separated list of libpq connection strings.\n///\n/// This is used for backwards-compatibility, to parse the legacy\n/// [ComputeSpec::pageserver_connstring] field, or the 'neon.pageserver_connstring' GUC. Nowadays,\n/// the 'pageserver_connection_info' field should be used instead.\nimpl PageserverConnectionInfo {\n    pub fn from_connstr(\n        connstr: &str,\n        stripe_size: Option<ShardStripeSize>,\n    ) -> Result<PageserverConnectionInfo, anyhow::Error> {\n        let shard_infos: Vec<_> = connstr\n            .split(',')\n            .map(|connstr| PageserverShardInfo {\n                pageservers: vec![PageserverShardConnectionInfo {\n                    id: None,\n                    libpq_url: Some(connstr.to_string()),\n                    grpc_url: None,\n                }],\n            })\n            .collect();\n\n        match shard_infos.len() {\n            0 => anyhow::bail!(\"empty connection string\"),\n            1 => {\n                // We assume that if there's only connection string, it means \"unsharded\",\n                // rather than a sharded system with just a single shard. The latter is\n                // possible in principle, but we never do it.\n                let shard_count = ShardCount::unsharded();\n                let only_shard = shard_infos.first().unwrap().clone();\n                let shards = vec![(ShardIndex::unsharded(), only_shard)];\n                Ok(PageserverConnectionInfo {\n                    shard_count,\n                    stripe_size: None,\n                    shards: shards.into_iter().collect(),\n                    prefer_protocol: PageserverProtocol::Libpq,\n                })\n            }\n            n => {\n                if stripe_size.is_none() {\n                    anyhow::bail!(\"{n} shards but no stripe_size\");\n                }\n                let shard_count = ShardCount(n.try_into()?);\n                let shards = shard_infos\n                    .into_iter()\n                    .enumerate()\n                    .map(|(idx, shard_info)| {\n                        (\n                            ShardIndex {\n                                shard_count,\n                                shard_number: ShardNumber(\n                                    idx.try_into().expect(\"shard number fits in u8\"),\n                                ),\n                            },\n                            shard_info,\n                        )\n                    })\n                    .collect();\n                Ok(PageserverConnectionInfo {\n                    shard_count,\n                    stripe_size,\n                    shards,\n                    prefer_protocol: PageserverProtocol::Libpq,\n                })\n            }\n        }\n    }\n\n    /// Convenience routine to get the connection string for a shard.\n    pub fn shard_url(\n        &self,\n        shard_number: ShardNumber,\n        protocol: PageserverProtocol,\n    ) -> anyhow::Result<&str> {\n        let shard_index = ShardIndex {\n            shard_number,\n            shard_count: self.shard_count,\n        };\n        let shard = self.shards.get(&shard_index).ok_or_else(|| {\n            anyhow::anyhow!(\"shard connection info missing for shard {}\", shard_index)\n        })?;\n\n        // Just use the first pageserver in the list. That's good enough for this\n        // convenience routine; if you need more control, like round robin policy or\n        // failover support, roll your own. (As of this writing, we never have more than\n        // one pageserver per shard anyway, but that will change in the future.)\n        let pageserver = shard\n            .pageservers\n            .first()\n            .ok_or(anyhow::anyhow!(\"must have at least one pageserver\"))?;\n\n        let result = match protocol {\n            PageserverProtocol::Grpc => pageserver\n                .grpc_url\n                .as_ref()\n                .ok_or(anyhow::anyhow!(\"no grpc_url for shard {shard_index}\"))?,\n            PageserverProtocol::Libpq => pageserver\n                .libpq_url\n                .as_ref()\n                .ok_or(anyhow::anyhow!(\"no libpq_url for shard {shard_index}\"))?,\n        };\n        Ok(result)\n    }\n}\n\n#[derive(Clone, Debug, Deserialize, Serialize, Eq, PartialEq)]\npub struct PageserverShardInfo {\n    pub pageservers: Vec<PageserverShardConnectionInfo>,\n}\n\n#[derive(Clone, Debug, Deserialize, Serialize, Eq, PartialEq)]\npub struct PageserverShardConnectionInfo {\n    pub id: Option<NodeId>,\n    pub libpq_url: Option<String>,\n    pub grpc_url: Option<String>,\n}\n\n#[derive(Clone, Debug, Default, Deserialize, Serialize)]\npub struct RemoteExtSpec {\n    pub public_extensions: Option<Vec<String>>,\n    pub custom_extensions: Option<Vec<String>>,\n    pub library_index: HashMap<String, String>,\n    pub extension_data: HashMap<String, ExtensionData>,\n}\n\n#[derive(Clone, Debug, Serialize, Deserialize)]\npub struct ExtensionData {\n    pub control_data: HashMap<String, String>,\n    pub archive_path: String,\n}\n\nimpl RemoteExtSpec {\n    pub fn get_ext(\n        &self,\n        ext_name: &str,\n        is_library: bool,\n        build_tag: &str,\n        pg_major_version: &str,\n    ) -> anyhow::Result<(String, RemotePath)> {\n        let mut real_ext_name = ext_name;\n        if is_library {\n            // sometimes library names might have a suffix like\n            // library.so or library.so.3. We strip this off\n            // because library_index is based on the name without the file extension\n            let strip_lib_suffix = Regex::new(r\"\\.so.*\").unwrap();\n            let lib_raw_name = strip_lib_suffix.replace(real_ext_name, \"\").to_string();\n\n            real_ext_name = self\n                .library_index\n                .get(&lib_raw_name)\n                .ok_or(anyhow::anyhow!(\"library {} is not found\", lib_raw_name))?;\n        }\n\n        // Check if extension is present in public or custom.\n        // If not, then it is not allowed to be used by this compute.\n        if !self\n            .public_extensions\n            .as_ref()\n            .is_some_and(|exts| exts.iter().any(|e| e == real_ext_name))\n            && !self\n                .custom_extensions\n                .as_ref()\n                .is_some_and(|exts| exts.iter().any(|e| e == real_ext_name))\n        {\n            return Err(anyhow::anyhow!(\"extension {} is not found\", real_ext_name));\n        }\n\n        match self.extension_data.get(real_ext_name) {\n            Some(_ext_data) => Ok((\n                real_ext_name.to_string(),\n                Self::build_remote_path(build_tag, pg_major_version, real_ext_name)?,\n            )),\n            None => Err(anyhow::anyhow!(\n                \"real_ext_name {} is not found\",\n                real_ext_name\n            )),\n        }\n    }\n\n    /// Get the architecture-specific portion of the remote extension path. We\n    /// use the Go naming convention due to Kubernetes.\n    fn get_arch() -> &'static str {\n        match std::env::consts::ARCH {\n            \"x86_64\" => \"amd64\",\n            \"aarch64\" => \"arm64\",\n            arch => arch,\n        }\n    }\n\n    /// Build a [`RemotePath`] for an extension.\n    fn build_remote_path(\n        build_tag: &str,\n        pg_major_version: &str,\n        ext_name: &str,\n    ) -> anyhow::Result<RemotePath> {\n        let arch = Self::get_arch();\n\n        // Construct the path to the extension archive\n        // BUILD_TAG/PG_MAJOR_VERSION/extensions/EXTENSION_NAME.tar.zst\n        //\n        // Keep it in sync with path generation in\n        // https://github.com/neondatabase/build-custom-extensions/tree/main\n        RemotePath::from_string(&format!(\n            \"{build_tag}/{arch}/{pg_major_version}/extensions/{ext_name}.tar.zst\"\n        ))\n    }\n}\n\n#[derive(Clone, Copy, Debug, Default, Eq, PartialEq, Deserialize, Serialize)]\npub enum ComputeMode {\n    /// A read-write node\n    #[default]\n    Primary,\n    /// A read-only node, pinned at a particular LSN\n    Static(Lsn),\n    /// A read-only node that follows the tip of the branch in hot standby mode\n    ///\n    /// Future versions may want to distinguish between replicas with hot standby\n    /// feedback and other kinds of replication configurations.\n    Replica,\n}\n\nimpl ComputeMode {\n    /// Convert the compute mode to a string that can be used to identify the type of compute,\n    /// which means that if it's a static compute, the LSN will not be included.\n    pub fn to_type_str(&self) -> &'static str {\n        match self {\n            ComputeMode::Primary => \"primary\",\n            ComputeMode::Static(_) => \"static\",\n            ComputeMode::Replica => \"replica\",\n        }\n    }\n}\n\nimpl Display for ComputeMode {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        f.write_str(self.to_type_str())\n    }\n}\n\n/// Log level for audit logging\n#[derive(Clone, Debug, Default, Eq, PartialEq, Deserialize, Serialize)]\npub enum ComputeAudit {\n    #[default]\n    Disabled,\n    // Deprecated, use Base instead\n    Log,\n    // (pgaudit.log = 'ddl', pgaudit.log_parameter='off')\n    // logged to the standard postgresql log stream\n    Base,\n    // Deprecated, use Full or Extended instead\n    Hipaa,\n    // (pgaudit.log = 'all, -misc', pgaudit.log_parameter='off')\n    // logged to separate files collected by rsyslog\n    // into dedicated log storage with strict access\n    Extended,\n    // (pgaudit.log='all', pgaudit.log_parameter='on'),\n    // logged to separate files collected by rsyslog\n    // into dedicated log storage with strict access.\n    Full,\n}\n\n#[derive(Clone, Debug, Default, Deserialize, Serialize, PartialEq, Eq)]\npub struct Cluster {\n    pub cluster_id: Option<String>,\n    pub name: Option<String>,\n    pub state: Option<String>,\n    pub roles: Vec<Role>,\n    pub databases: Vec<Database>,\n\n    /// Desired contents of 'postgresql.conf' file. (The 'compute_ctl'\n    /// tool may add additional settings to the final file.)\n    pub postgresql_conf: Option<String>,\n\n    /// Additional settings that will be appended to the 'postgresql.conf' file.\n    pub settings: GenericOptions,\n}\n\n/// Single cluster state changing operation that could not be represented as\n/// a static `Cluster` structure. For example:\n/// - DROP DATABASE\n/// - DROP ROLE\n/// - ALTER ROLE name RENAME TO new_name\n/// - ALTER DATABASE name RENAME TO new_name\n#[derive(Clone, Debug, Deserialize, Serialize)]\npub struct DeltaOp {\n    pub action: String,\n    pub name: PgIdent,\n    pub new_name: Option<PgIdent>,\n}\n\n/// Rust representation of Postgres role info with only those fields\n/// that matter for us.\n#[derive(Clone, Debug, Deserialize, Serialize, PartialEq, Eq)]\npub struct Role {\n    pub name: PgIdent,\n    pub encrypted_password: Option<String>,\n    pub options: GenericOptions,\n}\n\n/// Rust representation of Postgres database info with only those fields\n/// that matter for us.\n#[derive(Clone, Debug, Deserialize, Serialize, PartialEq, Eq)]\npub struct Database {\n    pub name: PgIdent,\n    pub owner: PgIdent,\n    pub options: GenericOptions,\n    // These are derived flags, not present in the spec file.\n    // They are never set by the control plane.\n    #[serde(skip_deserializing, default)]\n    pub restrict_conn: bool,\n    #[serde(skip_deserializing, default)]\n    pub invalid: bool,\n}\n\n/// Common type representing both SQL statement params with or without value,\n/// like `LOGIN` or `OWNER username` in the `CREATE/ALTER ROLE`, and config\n/// options like `wal_level = logical`.\n#[derive(Clone, Debug, Deserialize, Serialize, PartialEq, Eq)]\npub struct GenericOption {\n    pub name: String,\n    pub value: Option<String>,\n    pub vartype: String,\n}\n\n/// Postgres compute TLS settings.\n#[derive(Clone, Debug, Deserialize, Serialize, PartialEq)]\npub struct PgComputeTlsSettings {\n    // Absolute path to the certificate file for server-side TLS.\n    pub cert_file: String,\n    // Absolute path to the private key file for server-side TLS.\n    pub key_file: String,\n    // Absolute path to the certificate authority file for verifying client certificates.\n    pub ca_file: String,\n}\n\n/// Databricks specific options for compute instance.\n/// This is used to store any other settings that needs to be propagate to Compute\n/// but should not be persisted to ComputeSpec in the database.\n#[derive(Clone, Debug, Deserialize, Serialize, PartialEq)]\npub struct DatabricksSettings {\n    pub pg_compute_tls_settings: PgComputeTlsSettings,\n    // Absolute file path to databricks_pg_hba.conf file.\n    pub databricks_pg_hba: String,\n    // Absolute file path to databricks_pg_ident.conf file.\n    pub databricks_pg_ident: String,\n    // Hostname portion of the Databricks workspace URL of the endpoint, or empty string if not known.\n    // A valid hostname is required for the compute instance to support PAT logins.\n    pub databricks_workspace_host: String,\n}\n\n/// Optional collection of `GenericOption`'s. Type alias allows us to\n/// declare a `trait` on it.\npub type GenericOptions = Option<Vec<GenericOption>>;\n\n/// Configured the local_proxy application with the relevant JWKS and roles it should\n/// use for authorizing connect requests using JWT.\n#[derive(Clone, Debug, Deserialize, Serialize)]\npub struct LocalProxySpec {\n    #[serde(default)]\n    #[serde(skip_serializing_if = \"Option::is_none\")]\n    pub jwks: Option<Vec<JwksSettings>>,\n    #[serde(default)]\n    #[serde(skip_serializing_if = \"Option::is_none\")]\n    pub tls: Option<TlsConfig>,\n}\n\n#[derive(Clone, Debug, Deserialize, Serialize)]\npub struct JwksSettings {\n    pub id: String,\n    pub role_names: Vec<String>,\n    pub jwks_url: String,\n    pub provider_name: String,\n    pub jwt_audience: Option<String>,\n}\n\n/// Protocol used to connect to a Pageserver.\n#[derive(Clone, Copy, Debug, Default, Deserialize, Serialize, PartialEq, Eq)]\npub enum PageserverProtocol {\n    /// The original protocol based on libpq and COPY. Uses postgresql:// or postgres:// scheme.\n    #[default]\n    #[serde(rename = \"libpq\")]\n    Libpq,\n    /// A newer, gRPC-based protocol. Uses grpc:// scheme.\n    #[serde(rename = \"grpc\")]\n    Grpc,\n}\n\nimpl PageserverProtocol {\n    /// Parses the protocol from a connstring scheme. Defaults to Libpq if no scheme is given.\n    /// Errors if the connstring is an invalid URL.\n    pub fn from_connstring(connstring: &str) -> anyhow::Result<Self> {\n        let scheme = match Url::parse(connstring) {\n            Ok(url) => url.scheme().to_lowercase(),\n            Err(url::ParseError::RelativeUrlWithoutBase) => return Ok(Self::default()),\n            Err(err) => return Err(anyhow!(\"invalid connstring URL: {err}\")),\n        };\n        match scheme.as_str() {\n            \"postgresql\" | \"postgres\" => Ok(Self::Libpq),\n            \"grpc\" => Ok(Self::Grpc),\n            scheme => Err(anyhow!(\"invalid protocol scheme: {scheme}\")),\n        }\n    }\n\n    /// Returns the URL scheme for the protocol, for use in connstrings.\n    pub fn scheme(&self) -> &'static str {\n        match self {\n            Self::Libpq => \"postgresql\",\n            Self::Grpc => \"grpc\",\n        }\n    }\n}\n\nimpl Display for PageserverProtocol {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        f.write_str(self.scheme())\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use std::fs::File;\n\n    use super::*;\n\n    #[test]\n    fn allow_installing_remote_extensions() {\n        let rspec: RemoteExtSpec = serde_json::from_value(serde_json::json!({\n            \"public_extensions\": null,\n            \"custom_extensions\": null,\n            \"library_index\": {},\n            \"extension_data\": {},\n        }))\n        .unwrap();\n\n        rspec\n            .get_ext(\"ext\", false, \"latest\", \"v17\")\n            .expect_err(\"Extension should not be found\");\n\n        let rspec: RemoteExtSpec = serde_json::from_value(serde_json::json!({\n            \"public_extensions\": [],\n            \"custom_extensions\": null,\n            \"library_index\": {},\n            \"extension_data\": {},\n        }))\n        .unwrap();\n\n        rspec\n            .get_ext(\"ext\", false, \"latest\", \"v17\")\n            .expect_err(\"Extension should not be found\");\n\n        let rspec: RemoteExtSpec = serde_json::from_value(serde_json::json!({\n            \"public_extensions\": [],\n            \"custom_extensions\": [],\n            \"library_index\": {\n                \"ext\": \"ext\"\n            },\n            \"extension_data\": {\n                \"ext\": {\n                    \"control_data\": {\n                        \"ext.control\": \"\"\n                    },\n                    \"archive_path\": \"\"\n                }\n            },\n        }))\n        .unwrap();\n\n        rspec\n            .get_ext(\"ext\", false, \"latest\", \"v17\")\n            .expect_err(\"Extension should not be found\");\n\n        let rspec: RemoteExtSpec = serde_json::from_value(serde_json::json!({\n            \"public_extensions\": [],\n            \"custom_extensions\": [\"ext\"],\n            \"library_index\": {\n                \"ext\": \"ext\"\n            },\n            \"extension_data\": {\n                \"ext\": {\n                    \"control_data\": {\n                        \"ext.control\": \"\"\n                    },\n                    \"archive_path\": \"\"\n                }\n            },\n        }))\n        .unwrap();\n\n        rspec\n            .get_ext(\"ext\", false, \"latest\", \"v17\")\n            .expect(\"Extension should be found\");\n\n        let rspec: RemoteExtSpec = serde_json::from_value(serde_json::json!({\n            \"public_extensions\": [\"ext\"],\n            \"custom_extensions\": [],\n            \"library_index\": {\n                \"extlib\": \"ext\",\n            },\n            \"extension_data\": {\n                \"ext\": {\n                    \"control_data\": {\n                        \"ext.control\": \"\"\n                    },\n                    \"archive_path\": \"\"\n                }\n            },\n        }))\n        .unwrap();\n\n        rspec\n            .get_ext(\"ext\", false, \"latest\", \"v17\")\n            .expect(\"Extension should be found\");\n\n        // test library index for the case when library name\n        // doesn't match the extension name\n        rspec\n            .get_ext(\"extlib\", true, \"latest\", \"v17\")\n            .expect(\"Library should be found\");\n    }\n\n    #[test]\n    fn remote_extension_path() {\n        let rspec: RemoteExtSpec = serde_json::from_value(serde_json::json!({\n            \"public_extensions\": [\"ext\"],\n            \"custom_extensions\": [],\n            \"library_index\": {\n                \"extlib\": \"ext\",\n            },\n            \"extension_data\": {\n                \"ext\": {\n                    \"control_data\": {\n                        \"ext.control\": \"\"\n                    },\n                    \"archive_path\": \"\"\n                }\n            },\n        }))\n        .unwrap();\n\n        let (_ext_name, ext_path) = rspec\n            .get_ext(\"ext\", false, \"latest\", \"v17\")\n            .expect(\"Extension should be found\");\n        // Starting with a forward slash would have consequences for the\n        // Url::join() that occurs when downloading a remote extension.\n        assert!(!ext_path.to_string().starts_with(\"/\"));\n        assert_eq!(\n            ext_path,\n            RemoteExtSpec::build_remote_path(\"latest\", \"v17\", \"ext\").unwrap()\n        );\n    }\n\n    #[test]\n    fn parse_spec_file() {\n        let file = File::open(\"tests/cluster_spec.json\").unwrap();\n        let spec: ComputeSpec = serde_json::from_reader(file).unwrap();\n\n        // Features list defaults to empty vector.\n        assert!(spec.features.is_empty());\n\n        // Reconfigure concurrency defaults to 1.\n        assert_eq!(spec.reconfigure_concurrency, 1);\n    }\n\n    #[test]\n    fn parse_unknown_fields() {\n        // Forward compatibility test\n        let file = File::open(\"tests/cluster_spec.json\").unwrap();\n        let mut json: serde_json::Value = serde_json::from_reader(file).unwrap();\n        let ob = json.as_object_mut().unwrap();\n        ob.insert(\"unknown_field_123123123\".into(), \"hello\".into());\n        let _spec: ComputeSpec = serde_json::from_value(json).unwrap();\n    }\n\n    #[test]\n    fn parse_unknown_features() {\n        // Test that unknown feature flags do not cause any errors.\n        let file = File::open(\"tests/cluster_spec.json\").unwrap();\n        let mut json: serde_json::Value = serde_json::from_reader(file).unwrap();\n        let ob = json.as_object_mut().unwrap();\n\n        // Add unknown feature flags.\n        let features = vec![\"foo_bar_feature\", \"baz_feature\"];\n        ob.insert(\"features\".into(), features.into());\n\n        let spec: ComputeSpec = serde_json::from_value(json).unwrap();\n\n        assert!(spec.features.len() == 2);\n        assert!(spec.features.contains(&ComputeFeature::UnknownFeature));\n        assert_eq!(spec.features, vec![ComputeFeature::UnknownFeature; 2]);\n    }\n\n    #[test]\n    fn parse_known_features() {\n        // Test that we can properly parse known feature flags.\n        let file = File::open(\"tests/cluster_spec.json\").unwrap();\n        let mut json: serde_json::Value = serde_json::from_reader(file).unwrap();\n        let ob = json.as_object_mut().unwrap();\n\n        // Add known feature flags.\n        let features = vec![\"activity_monitor_experimental\"];\n        ob.insert(\"features\".into(), features.into());\n\n        let spec: ComputeSpec = serde_json::from_value(json).unwrap();\n\n        assert_eq!(\n            spec.features,\n            vec![ComputeFeature::ActivityMonitorExperimental]\n        );\n    }\n}\n"
  },
  {
    "path": "libs/compute_api/tests/cluster_spec.json",
    "content": "{\n    \"format_version\": 1.0,\n\n    \"timestamp\": \"2021-05-23T18:25:43.511Z\",\n    \"operation_uuid\": \"0f657b36-4b0f-4a2d-9c2e-1dcd615e7d8b\",\n    \"suspend_timeout_seconds\": 3600,\n\n    \"cluster\": {\n        \"cluster_id\": \"test-cluster-42\",\n        \"name\": \"Zenith Test\",\n        \"state\": \"restarted\",\n        \"roles\": [\n            {\n                \"name\": \"postgres\",\n                \"encrypted_password\": \"6b1d16b78004bbd51fa06af9eda75972\",\n                \"options\": null\n            },\n            {\n                \"name\": \"alexk\",\n                \"encrypted_password\": null,\n                \"options\": null\n            },\n            {\n                \"name\": \"zenith \\\"new\\\"\",\n                \"encrypted_password\": \"5b1d16b78004bbd51fa06af9eda75972\",\n                \"options\": null\n            },\n            {\n                \"name\": \"zen\",\n                \"encrypted_password\": \"9b1d16b78004bbd51fa06af9eda75972\"\n            },\n            {\n                \"name\": \"\\\"name\\\";\\\\n select 1;\",\n                \"encrypted_password\": \"5b1d16b78004bbd51fa06af9eda75972\"\n            },\n            {\n                \"name\": \"MyRole\",\n                \"encrypted_password\": \"5b1d16b78004bbd51fa06af9eda75972\"\n            }\n        ],\n        \"databases\": [\n            {\n                \"name\": \"DB2\",\n                \"owner\": \"alexk\",\n                \"options\": [\n                    {\n                        \"name\": \"LC_COLLATE\",\n                        \"value\": \"C\",\n                        \"vartype\": \"string\"\n                    },\n                    {\n                        \"name\": \"LC_CTYPE\",\n                        \"value\": \"C\",\n                        \"vartype\": \"string\"\n                    },\n                    {\n                        \"name\": \"TEMPLATE\",\n                        \"value\": \"template0\",\n                        \"vartype\": \"enum\"\n                    }\n                ]\n            },\n            {\n                \"name\": \"zenith\",\n                \"owner\": \"MyRole\"\n            },\n            {\n                \"name\": \"zen\",\n                \"owner\": \"zen\"\n            }\n        ],\n        \"settings\": [\n            {\n                \"name\": \"fsync\",\n                \"value\": \"off\",\n                \"vartype\": \"bool\"\n            },\n            {\n                \"name\": \"wal_level\",\n                \"value\": \"logical\",\n                \"vartype\": \"enum\"\n            },\n            {\n                \"name\": \"hot_standby\",\n                \"value\": \"on\",\n                \"vartype\": \"bool\"\n            },\n            {\n                \"name\": \"autoprewarm\",\n                \"value\": \"off\",\n                \"vartype\": \"bool\"\n            },\n            {\n                \"name\": \"offload_lfc_interval_seconds\",\n                \"value\": \"20\",\n                \"vartype\": \"integer\"\n            },\n            {\n                \"name\": \"neon.safekeepers\",\n                \"value\": \"127.0.0.1:6502,127.0.0.1:6503,127.0.0.1:6501\",\n                \"vartype\": \"string\"\n            },\n            {\n                \"name\": \"wal_log_hints\",\n                \"value\": \"on\",\n                \"vartype\": \"bool\"\n            },\n            {\n                \"name\": \"log_connections\",\n                \"value\": \"on\",\n                \"vartype\": \"bool\"\n            },\n            {\n                \"name\": \"shared_buffers\",\n                \"value\": \"32768\",\n                \"vartype\": \"integer\"\n            },\n            {\n                \"name\": \"port\",\n                \"value\": \"55432\",\n                \"vartype\": \"integer\"\n            },\n            {\n                \"name\": \"max_connections\",\n                \"value\": \"100\",\n                \"vartype\": \"integer\"\n            },\n            {\n                \"name\": \"max_wal_senders\",\n                \"value\": \"10\",\n                \"vartype\": \"integer\"\n            },\n            {\n                \"name\": \"listen_addresses\",\n                \"value\": \"0.0.0.0\",\n                \"vartype\": \"string\"\n            },\n            {\n                \"name\": \"wal_sender_timeout\",\n                \"value\": \"0\",\n                \"vartype\": \"integer\"\n            },\n            {\n                \"name\": \"password_encryption\",\n                \"value\": \"md5\",\n                \"vartype\": \"enum\"\n            },\n            {\n                \"name\": \"maintenance_work_mem\",\n                \"value\": \"65536\",\n                \"vartype\": \"integer\"\n            },\n            {\n                \"name\": \"max_parallel_workers\",\n                \"value\": \"8\",\n                \"vartype\": \"integer\"\n            },\n            {\n                \"name\": \"max_worker_processes\",\n                \"value\": \"8\",\n                \"vartype\": \"integer\"\n            },\n            {\n                \"name\": \"neon.tenant_id\",\n                \"value\": \"b0554b632bd4d547a63b86c3630317e8\",\n                \"vartype\": \"string\"\n            },\n            {\n                \"name\": \"max_replication_slots\",\n                \"value\": \"10\",\n                \"vartype\": \"integer\"\n            },\n            {\n                \"name\": \"neon.timeline_id\",\n                \"value\": \"2414a61ffc94e428f14b5758fe308e13\",\n                \"vartype\": \"string\"\n            },\n            {\n                \"name\": \"shared_preload_libraries\",\n                \"value\": \"neon\",\n                \"vartype\": \"string\"\n            },\n            {\n                \"name\": \"synchronous_standby_names\",\n                \"value\": \"walproposer\",\n                \"vartype\": \"string\"\n            },\n            {\n                \"name\": \"neon.pageserver_connstring\",\n                \"value\": \"host=127.0.0.1 port=6400\",\n                \"vartype\": \"string\"\n            },\n            {\n                \"name\": \"test.escaping\",\n                \"value\": \"here's a backslash \\\\ and a quote ' and a double-quote \\\" hooray\",\n                \"vartype\": \"string\"\n            }\n        ]\n    },\n    \"delta_operations\": [\n        {\n            \"action\": \"delete_db\",\n            \"name\": \"zenith_test\"\n        },\n        {\n            \"action\": \"rename_db\",\n            \"name\": \"DB\",\n            \"new_name\": \"DB2\"\n        },\n        {\n            \"action\": \"delete_role\",\n            \"name\": \"zenith2\"\n        },\n        {\n            \"action\": \"rename_role\",\n            \"name\": \"zenith new\",\n            \"new_name\": \"zenith \\\"new\\\"\"\n        }\n    ],\n    \"remote_extensions\": {\n        \"library_index\": {\n          \"postgis-3\": \"postgis\",\n          \"libpgrouting-3.4\": \"postgis\",\n          \"postgis_raster-3\": \"postgis\",\n          \"postgis_sfcgal-3\": \"postgis\",\n          \"postgis_topology-3\": \"postgis\",\n          \"address_standardizer-3\": \"postgis\"\n        },\n        \"extension_data\": {\n          \"postgis\": {\n            \"archive_path\": \"5834329303/v15/extensions/postgis.tar.zst\",\n            \"control_data\": {\n              \"postgis.control\": \"# postgis extension\\ncomment = ''PostGIS geometry and geography spatial types and functions''\\ndefault_version = ''3.3.2''\\nmodule_pathname = ''$libdir/postgis-3''\\nrelocatable = false\\ntrusted = true\\n\",\n              \"pgrouting.control\": \"# pgRouting Extension\\ncomment = ''pgRouting Extension''\\ndefault_version = ''3.4.2''\\nmodule_pathname = ''$libdir/libpgrouting-3.4''\\nrelocatable = true\\nrequires = ''plpgsql''\\nrequires = ''postgis''\\ntrusted = true\\n\",\n              \"postgis_raster.control\": \"# postgis_raster extension\\ncomment = ''PostGIS raster types and functions''\\ndefault_version = ''3.3.2''\\nmodule_pathname = ''$libdir/postgis_raster-3''\\nrelocatable = false\\nrequires = postgis\\ntrusted = true\\n\",\n              \"postgis_sfcgal.control\": \"# postgis topology extension\\ncomment = ''PostGIS SFCGAL functions''\\ndefault_version = ''3.3.2''\\nrelocatable = true\\nrequires = postgis\\ntrusted = true\\n\",\n              \"postgis_topology.control\": \"# postgis topology extension\\ncomment = ''PostGIS topology spatial types and functions''\\ndefault_version = ''3.3.2''\\nrelocatable = false\\nschema = topology\\nrequires = postgis\\ntrusted = true\\n\",\n              \"address_standardizer.control\": \"# address_standardizer extension\\ncomment = ''Used to parse an address into constituent elements. Generally used to support geocoding address normalization step.''\\ndefault_version = ''3.3.2''\\nrelocatable = true\\ntrusted = true\\n\",\n              \"postgis_tiger_geocoder.control\": \"# postgis tiger geocoder extension\\ncomment = ''PostGIS tiger geocoder and reverse geocoder''\\ndefault_version = ''3.3.2''\\nrelocatable = false\\nschema = tiger\\nrequires = ''postgis,fuzzystrmatch''\\nsuperuser= false\\ntrusted = true\\n\",\n              \"address_standardizer_data_us.control\": \"# address standardizer us dataset\\ncomment = ''Address Standardizer US dataset example''\\ndefault_version = ''3.3.2''\\nrelocatable = true\\ntrusted = true\\n\"\n            }\n          }\n        },\n        \"custom_extensions\": [\n        ],\n        \"public_extensions\": [\n          \"postgis\"\n        ]\n      },\n      \"pgbouncer_settings\": {\n        \"default_pool_size\": \"42\",\n        \"pool_mode\": \"session\"\n      }\n}\n"
  },
  {
    "path": "libs/consumption_metrics/Cargo.toml",
    "content": "[package]\nname = \"consumption_metrics\"\nversion = \"0.1.0\"\nedition = \"2024\"\nlicense = \"Apache-2.0\"\n\n[dependencies]\nchrono = { workspace = true, features = [\"serde\"] }\nrand.workspace = true\nserde.workspace = true\n"
  },
  {
    "path": "libs/consumption_metrics/src/lib.rs",
    "content": "//! Shared code for consumption metics collection\n#![deny(unsafe_code)]\n#![deny(clippy::undocumented_unsafe_blocks)]\nuse chrono::{DateTime, Utc};\nuse rand::Rng;\nuse serde::{Deserialize, Serialize};\n\n#[derive(Serialize, Deserialize, Debug, Clone, Copy, Eq, PartialEq, Ord, PartialOrd)]\n#[serde(tag = \"type\")]\npub enum EventType {\n    #[serde(rename = \"absolute\")]\n    Absolute { time: DateTime<Utc> },\n    #[serde(rename = \"incremental\")]\n    Incremental {\n        start_time: DateTime<Utc>,\n        stop_time: DateTime<Utc>,\n    },\n}\n\nimpl EventType {\n    pub fn absolute_time(&self) -> Option<&DateTime<Utc>> {\n        use EventType::*;\n        match self {\n            Absolute { time } => Some(time),\n            _ => None,\n        }\n    }\n\n    pub fn incremental_timerange(&self) -> Option<std::ops::Range<&DateTime<Utc>>> {\n        // these can most likely be thought of as Range or RangeFull, at least pageserver creates\n        // incremental ranges where the stop and next start are equal.\n        use EventType::*;\n        match self {\n            Incremental {\n                start_time,\n                stop_time,\n            } => Some(start_time..stop_time),\n            _ => None,\n        }\n    }\n\n    pub fn is_incremental(&self) -> bool {\n        matches!(self, EventType::Incremental { .. })\n    }\n\n    /// Returns the absolute time, or for incremental ranges, the stop time.\n    pub fn recorded_at(&self) -> &DateTime<Utc> {\n        use EventType::*;\n\n        match self {\n            Absolute { time } => time,\n            Incremental { stop_time, .. } => stop_time,\n        }\n    }\n}\n\n#[derive(Serialize, Deserialize, Debug, Clone, Eq, PartialEq, Ord, PartialOrd)]\npub struct Event<Extra, Metric> {\n    #[serde(flatten)]\n    #[serde(rename = \"type\")]\n    pub kind: EventType,\n\n    pub metric: Metric,\n    pub idempotency_key: String,\n    pub value: u64,\n\n    #[serde(flatten)]\n    pub extra: Extra,\n}\n\npub fn idempotency_key(node_id: &str) -> String {\n    IdempotencyKey::generate(node_id).to_string()\n}\n\n/// Downstream users will use these to detect upload retries.\npub struct IdempotencyKey<'a> {\n    now: chrono::DateTime<Utc>,\n    node_id: &'a str,\n    nonce: u16,\n}\n\nimpl std::fmt::Display for IdempotencyKey<'_> {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        write!(f, \"{}-{}-{:04}\", self.now, self.node_id, self.nonce)\n    }\n}\n\nimpl<'a> IdempotencyKey<'a> {\n    pub fn generate(node_id: &'a str) -> Self {\n        IdempotencyKey {\n            now: Utc::now(),\n            node_id,\n            nonce: rand::rng().random_range(0..=9999),\n        }\n    }\n\n    pub fn for_tests(now: DateTime<Utc>, node_id: &'a str, nonce: u16) -> Self {\n        IdempotencyKey {\n            now,\n            node_id,\n            nonce,\n        }\n    }\n}\n\n/// Split into chunks of 1000 metrics to avoid exceeding the max request size.\npub const CHUNK_SIZE: usize = 1000;\n\n// Just a wrapper around a slice of events\n// to serialize it as `{\"events\" : [ ] }\n#[derive(Debug, serde::Serialize, serde::Deserialize, PartialEq)]\npub struct EventChunk<'a, T: Clone + PartialEq> {\n    pub events: std::borrow::Cow<'a, [T]>,\n}\n"
  },
  {
    "path": "libs/desim/Cargo.toml",
    "content": "[package]\nname = \"desim\"\nversion = \"0.1.0\"\nedition.workspace = true\nlicense.workspace = true\n\n[dependencies]\nanyhow.workspace = true\nrand.workspace = true\ntracing.workspace = true\nbytes.workspace = true\nutils.workspace = true\nparking_lot.workspace = true\nhex.workspace = true\nsmallvec = { workspace = true, features = [\"write\"] }\n"
  },
  {
    "path": "libs/desim/README.md",
    "content": "# Discrete Event SIMulator\n\nThis is a library for running simulations of distributed systems. The main idea is borrowed from [FoundationDB](https://www.youtube.com/watch?v=4fFDFbi3toc).\n\nEach node runs as a separate thread. This library was not optimized for speed yet, but it's already much faster than running usual intergration tests in real time, because it uses virtual simulation time and can fast-forward time to skip intervals where all nodes are doing nothing but sleeping or waiting for something.\n\nThe original purpose for this library is to test walproposer and safekeeper implementation working together, in a scenarios close to the real world environment. This simulator is determenistic and can inject failures in networking without waiting minutes of wall-time to trigger timeout, which makes it easier to find bugs in our consensus implementation compared to using integration tests.\n"
  },
  {
    "path": "libs/desim/src/chan.rs",
    "content": "use std::collections::VecDeque;\nuse std::sync::Arc;\n\nuse parking_lot::{Mutex, MutexGuard};\n\nuse crate::executor::{self, PollSome, Waker};\n\n/// FIFO channel with blocking send and receive. Can be cloned and shared between threads.\n/// Blocking functions should be used only from threads that are managed by the executor.\npub struct Chan<T> {\n    shared: Arc<State<T>>,\n}\n\nimpl<T> Clone for Chan<T> {\n    fn clone(&self) -> Self {\n        Chan {\n            shared: self.shared.clone(),\n        }\n    }\n}\n\nimpl<T> Default for Chan<T> {\n    fn default() -> Self {\n        Self::new()\n    }\n}\n\nimpl<T> Chan<T> {\n    pub fn new() -> Chan<T> {\n        Chan {\n            shared: Arc::new(State {\n                queue: Mutex::new(VecDeque::new()),\n                waker: Waker::new(),\n            }),\n        }\n    }\n\n    /// Get a message from the front of the queue, block if the queue is empty.\n    /// If not called from the executor thread, it can block forever.\n    pub fn recv(&self) -> T {\n        self.shared.recv()\n    }\n\n    /// Panic if the queue is empty.\n    pub fn must_recv(&self) -> T {\n        self.shared\n            .try_recv()\n            .expect(\"message should've been ready\")\n    }\n\n    /// Get a message from the front of the queue, return None if the queue is empty.\n    /// Never blocks.\n    pub fn try_recv(&self) -> Option<T> {\n        self.shared.try_recv()\n    }\n\n    /// Send a message to the back of the queue.\n    pub fn send(&self, t: T) {\n        self.shared.send(t);\n    }\n}\n\nstruct State<T> {\n    queue: Mutex<VecDeque<T>>,\n    waker: Waker,\n}\n\nimpl<T> State<T> {\n    fn send(&self, t: T) {\n        self.queue.lock().push_back(t);\n        self.waker.wake_all();\n    }\n\n    fn try_recv(&self) -> Option<T> {\n        let mut q = self.queue.lock();\n        q.pop_front()\n    }\n\n    fn recv(&self) -> T {\n        // interrupt the receiver to prevent consuming everything at once\n        executor::yield_me(0);\n\n        let mut queue = self.queue.lock();\n        if let Some(t) = queue.pop_front() {\n            return t;\n        }\n        loop {\n            self.waker.wake_me_later();\n            if let Some(t) = queue.pop_front() {\n                return t;\n            }\n            MutexGuard::unlocked(&mut queue, || {\n                executor::yield_me(-1);\n            });\n        }\n    }\n}\n\nimpl<T> PollSome for Chan<T> {\n    /// Schedules a wakeup for the current thread.\n    fn wake_me(&self) {\n        self.shared.waker.wake_me_later();\n    }\n\n    /// Checks if chan has any pending messages.\n    fn has_some(&self) -> bool {\n        !self.shared.queue.lock().is_empty()\n    }\n}\n"
  },
  {
    "path": "libs/desim/src/executor.rs",
    "content": "use std::panic::AssertUnwindSafe;\nuse std::sync::atomic::{AtomicBool, AtomicU8, AtomicU32, Ordering};\nuse std::sync::{Arc, OnceLock, mpsc};\nuse std::thread::JoinHandle;\n\nuse tracing::{debug, error, trace};\n\nuse crate::time::Timing;\n\n/// Stores status of the running threads. Threads are registered in the runtime upon creation\n/// and deregistered upon termination.\npub struct Runtime {\n    // stores handles to all threads that are currently running\n    threads: Vec<ThreadHandle>,\n    // stores current time and pending wakeups\n    clock: Arc<Timing>,\n    // thread counter\n    thread_counter: AtomicU32,\n    // Thread step counter -- how many times all threads has been actually\n    // stepped (note that all world/time/executor/thread have slightly different\n    // meaning of steps). For observability.\n    pub step_counter: u64,\n}\n\nimpl Runtime {\n    /// Init new runtime, no running threads.\n    pub fn new(clock: Arc<Timing>) -> Self {\n        Self {\n            threads: Vec::new(),\n            clock,\n            thread_counter: AtomicU32::new(0),\n            step_counter: 0,\n        }\n    }\n\n    /// Spawn a new thread and register it in the runtime.\n    pub fn spawn<F>(&mut self, f: F) -> ExternalHandle\n    where\n        F: FnOnce() + Send + 'static,\n    {\n        let (tx, rx) = mpsc::channel();\n\n        let clock = self.clock.clone();\n        let tid = self.thread_counter.fetch_add(1, Ordering::SeqCst);\n        debug!(\"spawning thread-{}\", tid);\n\n        let join = std::thread::spawn(move || {\n            let _guard = tracing::info_span!(\"\", tid).entered();\n\n            let res = std::panic::catch_unwind(AssertUnwindSafe(|| {\n                with_thread_context(|ctx| {\n                    assert!(ctx.clock.set(clock).is_ok());\n                    ctx.id.store(tid, Ordering::SeqCst);\n                    tx.send(ctx.clone()).expect(\"failed to send thread context\");\n                    // suspend thread to put it to `threads` in sleeping state\n                    ctx.yield_me(0);\n                });\n\n                // start user-provided function\n                f();\n            }));\n            debug!(\"thread finished\");\n\n            if let Err(e) = res {\n                with_thread_context(|ctx| {\n                    if !ctx.allow_panic.load(std::sync::atomic::Ordering::SeqCst) {\n                        error!(\"thread panicked, terminating the process: {:?}\", e);\n                        std::process::exit(1);\n                    }\n\n                    debug!(\"thread panicked: {:?}\", e);\n                    let mut result = ctx.result.lock();\n                    if result.0 == -1 {\n                        *result = (256, format!(\"thread panicked: {e:?}\"));\n                    }\n                });\n            }\n\n            with_thread_context(|ctx| {\n                ctx.finish_me();\n            });\n        });\n\n        let ctx = rx.recv().expect(\"failed to receive thread context\");\n        let handle = ThreadHandle::new(ctx.clone(), join);\n\n        self.threads.push(handle);\n\n        ExternalHandle { ctx }\n    }\n\n    /// Returns true if there are any unfinished activity, such as running thread or pending events.\n    /// Otherwise returns false, which means all threads are blocked forever.\n    pub fn step(&mut self) -> bool {\n        trace!(\"runtime step\");\n\n        // have we run any thread?\n        let mut ran = false;\n\n        self.threads.retain(|thread: &ThreadHandle| {\n            let res = thread.ctx.wakeup.compare_exchange(\n                PENDING_WAKEUP,\n                NO_WAKEUP,\n                Ordering::SeqCst,\n                Ordering::SeqCst,\n            );\n            if res.is_err() {\n                // thread has no pending wakeups, leaving as is\n                return true;\n            }\n            ran = true;\n\n            trace!(\"entering thread-{}\", thread.ctx.tid());\n            let status = thread.step();\n            self.step_counter += 1;\n            trace!(\n                \"out of thread-{} with status {:?}\",\n                thread.ctx.tid(),\n                status\n            );\n\n            if status == Status::Sleep {\n                true\n            } else {\n                trace!(\"thread has finished\");\n                // removing the thread from the list\n                false\n            }\n        });\n\n        if !ran {\n            trace!(\"no threads were run, stepping clock\");\n            if let Some(ctx_to_wake) = self.clock.step() {\n                trace!(\"waking up thread-{}\", ctx_to_wake.tid());\n                ctx_to_wake.inc_wake();\n            } else {\n                return false;\n            }\n        }\n\n        true\n    }\n\n    /// Kill all threads. This is done by setting a flag in each thread context and waking it up.\n    pub fn crash_all_threads(&mut self) {\n        for thread in self.threads.iter() {\n            thread.ctx.crash_stop();\n        }\n\n        // all threads should be finished after a few steps\n        while !self.threads.is_empty() {\n            self.step();\n        }\n    }\n}\n\nimpl Drop for Runtime {\n    fn drop(&mut self) {\n        debug!(\"dropping the runtime\");\n        self.crash_all_threads();\n    }\n}\n\n#[derive(Clone)]\npub struct ExternalHandle {\n    ctx: Arc<ThreadContext>,\n}\n\nimpl ExternalHandle {\n    /// Returns true if thread has finished execution.\n    pub fn is_finished(&self) -> bool {\n        let status = self.ctx.mutex.lock();\n        *status == Status::Finished\n    }\n\n    /// Returns exitcode and message, which is available after thread has finished execution.\n    pub fn result(&self) -> (i32, String) {\n        let result = self.ctx.result.lock();\n        result.clone()\n    }\n\n    /// Returns thread id.\n    pub fn id(&self) -> u32 {\n        self.ctx.id.load(Ordering::SeqCst)\n    }\n\n    /// Sets a flag to crash thread on the next wakeup.\n    pub fn crash_stop(&self) {\n        self.ctx.crash_stop();\n    }\n}\n\nstruct ThreadHandle {\n    ctx: Arc<ThreadContext>,\n    _join: JoinHandle<()>,\n}\n\nimpl ThreadHandle {\n    /// Create a new [`ThreadHandle`] and wait until thread will enter [`Status::Sleep`] state.\n    fn new(ctx: Arc<ThreadContext>, join: JoinHandle<()>) -> Self {\n        let mut status = ctx.mutex.lock();\n        // wait until thread will go into the first yield\n        while *status != Status::Sleep {\n            ctx.condvar.wait(&mut status);\n        }\n        drop(status);\n\n        Self { ctx, _join: join }\n    }\n\n    /// Allows thread to execute one step of its execution.\n    /// Returns [`Status`] of the thread after the step.\n    fn step(&self) -> Status {\n        let mut status = self.ctx.mutex.lock();\n        assert!(matches!(*status, Status::Sleep));\n\n        *status = Status::Running;\n        self.ctx.condvar.notify_all();\n\n        while *status == Status::Running {\n            self.ctx.condvar.wait(&mut status);\n        }\n\n        *status\n    }\n}\n\n#[derive(Clone, Copy, Debug, PartialEq, Eq)]\nenum Status {\n    /// Thread is running.\n    Running,\n    /// Waiting for event to complete, will be resumed by the executor step, once wakeup flag is set.\n    Sleep,\n    /// Thread finished execution.\n    Finished,\n}\n\nconst NO_WAKEUP: u8 = 0;\nconst PENDING_WAKEUP: u8 = 1;\n\npub struct ThreadContext {\n    id: AtomicU32,\n    // used to block thread until it is woken up\n    mutex: parking_lot::Mutex<Status>,\n    condvar: parking_lot::Condvar,\n    // used as a flag to indicate runtime that thread is ready to be woken up\n    wakeup: AtomicU8,\n    clock: OnceLock<Arc<Timing>>,\n    // execution result, set by exit() call\n    result: parking_lot::Mutex<(i32, String)>,\n    // determines if process should be killed on receiving panic\n    allow_panic: AtomicBool,\n    // acts as a signal that thread should crash itself on the next wakeup\n    crash_request: AtomicBool,\n}\n\nimpl ThreadContext {\n    pub(crate) fn new() -> Self {\n        Self {\n            id: AtomicU32::new(0),\n            mutex: parking_lot::Mutex::new(Status::Running),\n            condvar: parking_lot::Condvar::new(),\n            wakeup: AtomicU8::new(NO_WAKEUP),\n            clock: OnceLock::new(),\n            result: parking_lot::Mutex::new((-1, String::new())),\n            allow_panic: AtomicBool::new(false),\n            crash_request: AtomicBool::new(false),\n        }\n    }\n}\n\n// Functions for executor to control thread execution.\nimpl ThreadContext {\n    /// Set atomic flag to indicate that thread is ready to be woken up.\n    fn inc_wake(&self) {\n        self.wakeup.store(PENDING_WAKEUP, Ordering::SeqCst);\n    }\n\n    /// Internal function used for event queues.\n    pub(crate) fn schedule_wakeup(self: &Arc<Self>, after_ms: u64) {\n        self.clock\n            .get()\n            .unwrap()\n            .schedule_wakeup(after_ms, self.clone());\n    }\n\n    fn tid(&self) -> u32 {\n        self.id.load(Ordering::SeqCst)\n    }\n\n    fn crash_stop(&self) {\n        let status = self.mutex.lock();\n        if *status == Status::Finished {\n            debug!(\n                \"trying to crash thread-{}, which is already finished\",\n                self.tid()\n            );\n            return;\n        }\n        assert!(matches!(*status, Status::Sleep));\n        drop(status);\n\n        self.allow_panic.store(true, Ordering::SeqCst);\n        self.crash_request.store(true, Ordering::SeqCst);\n        // set a wakeup\n        self.inc_wake();\n        // it will panic on the next wakeup\n    }\n}\n\n// Internal functions.\nimpl ThreadContext {\n    /// Blocks thread until it's woken up by the executor. If `after_ms` is 0, is will be\n    /// woken on the next step. If `after_ms` > 0, wakeup is scheduled after that time.\n    /// Otherwise wakeup is not scheduled inside `yield_me`, and should be arranged before\n    /// calling this function.\n    fn yield_me(self: &Arc<Self>, after_ms: i64) {\n        let mut status = self.mutex.lock();\n        assert!(matches!(*status, Status::Running));\n\n        match after_ms.cmp(&0) {\n            std::cmp::Ordering::Less => {\n                // block until something wakes us up\n            }\n            std::cmp::Ordering::Equal => {\n                // tell executor that we are ready to be woken up\n                self.inc_wake();\n            }\n            std::cmp::Ordering::Greater => {\n                // schedule wakeup\n                self.clock\n                    .get()\n                    .unwrap()\n                    .schedule_wakeup(after_ms as u64, self.clone());\n            }\n        }\n\n        *status = Status::Sleep;\n        self.condvar.notify_all();\n\n        // wait until executor wakes us up\n        while *status != Status::Running {\n            self.condvar.wait(&mut status);\n        }\n\n        if self.crash_request.load(Ordering::SeqCst) {\n            panic!(\"crashed by request\");\n        }\n    }\n\n    /// Called only once, exactly before thread finishes execution.\n    fn finish_me(&self) {\n        let mut status = self.mutex.lock();\n        assert!(matches!(*status, Status::Running));\n\n        *status = Status::Finished;\n        {\n            let mut result = self.result.lock();\n            if result.0 == -1 {\n                *result = (0, \"finished normally\".to_owned());\n            }\n        }\n        self.condvar.notify_all();\n    }\n}\n\n/// Invokes the given closure with a reference to the current thread [`ThreadContext`].\n#[inline(always)]\nfn with_thread_context<T>(f: impl FnOnce(&Arc<ThreadContext>) -> T) -> T {\n    thread_local!(static THREAD_DATA: Arc<ThreadContext> = Arc::new(ThreadContext::new()));\n    THREAD_DATA.with(f)\n}\n\n/// Waker is used to wake up threads that are blocked on condition.\n/// It keeps track of contexts [`Arc<ThreadContext>`] and can increment the counter\n/// of several contexts to send a notification.\npub struct Waker {\n    // contexts that are waiting for a notification\n    contexts: parking_lot::Mutex<smallvec::SmallVec<[Arc<ThreadContext>; 8]>>,\n}\n\nimpl Default for Waker {\n    fn default() -> Self {\n        Self::new()\n    }\n}\n\nimpl Waker {\n    pub fn new() -> Self {\n        Self {\n            contexts: parking_lot::Mutex::new(smallvec::SmallVec::new()),\n        }\n    }\n\n    /// Subscribe current thread to receive a wake notification later.\n    pub fn wake_me_later(&self) {\n        with_thread_context(|ctx| {\n            self.contexts.lock().push(ctx.clone());\n        });\n    }\n\n    /// Wake up all threads that are waiting for a notification and clear the list.\n    pub fn wake_all(&self) {\n        let mut v = self.contexts.lock();\n        for ctx in v.iter() {\n            ctx.inc_wake();\n        }\n        v.clear();\n    }\n}\n\n/// See [`ThreadContext::yield_me`].\npub fn yield_me(after_ms: i64) {\n    with_thread_context(|ctx| ctx.yield_me(after_ms))\n}\n\n/// Get current time.\npub fn now() -> u64 {\n    with_thread_context(|ctx| ctx.clock.get().unwrap().now())\n}\n\npub fn exit(code: i32, msg: String) -> ! {\n    with_thread_context(|ctx| {\n        ctx.allow_panic.store(true, Ordering::SeqCst);\n        let mut result = ctx.result.lock();\n        *result = (code, msg);\n        panic!(\"exit\");\n    })\n}\n\npub(crate) fn get_thread_ctx() -> Arc<ThreadContext> {\n    with_thread_context(|ctx| ctx.clone())\n}\n\n/// Trait for polling channels until they have something.\npub trait PollSome {\n    /// Schedule wakeup for message arrival.\n    fn wake_me(&self);\n\n    /// Check if channel has a ready message.\n    fn has_some(&self) -> bool;\n}\n\n/// Blocks current thread until one of the channels has a ready message. Returns\n/// index of the channel that has a message. If timeout is reached, returns None.\n///\n/// Negative timeout means block forever. Zero timeout means check channels and return\n/// immediately. Positive timeout means block until timeout is reached.\npub fn epoll_chans(chans: &[Box<dyn PollSome>], timeout: i64) -> Option<usize> {\n    let deadline = if timeout < 0 {\n        0\n    } else {\n        now() + timeout as u64\n    };\n\n    loop {\n        for chan in chans {\n            chan.wake_me()\n        }\n\n        for (i, chan) in chans.iter().enumerate() {\n            if chan.has_some() {\n                return Some(i);\n            }\n        }\n\n        if timeout < 0 {\n            // block until wakeup\n            yield_me(-1);\n        } else {\n            let current_time = now();\n            if current_time >= deadline {\n                return None;\n            }\n\n            yield_me((deadline - current_time) as i64);\n        }\n    }\n}\n"
  },
  {
    "path": "libs/desim/src/lib.rs",
    "content": "pub mod chan;\npub mod executor;\npub mod network;\npub mod node_os;\npub mod options;\npub mod proto;\npub mod time;\npub mod world;\n"
  },
  {
    "path": "libs/desim/src/network.rs",
    "content": "use std::cmp::Ordering;\nuse std::collections::{BinaryHeap, VecDeque};\nuse std::fmt::{self, Debug};\nuse std::ops::DerefMut;\nuse std::sync::{Arc, mpsc};\n\nuse parking_lot::lock_api::{MappedMutexGuard, MutexGuard};\nuse parking_lot::{Mutex, RawMutex};\nuse rand::rngs::StdRng;\nuse tracing::debug;\n\nuse super::chan::Chan;\nuse super::proto::AnyMessage;\nuse crate::executor::{self, ThreadContext};\nuse crate::options::NetworkOptions;\nuse crate::proto::{NetEvent, NodeEvent};\n\npub struct NetworkTask {\n    options: Arc<NetworkOptions>,\n    connections: Mutex<Vec<VirtualConnection>>,\n    /// min-heap of connections having something to deliver.\n    events: Mutex<BinaryHeap<Event>>,\n    task_context: Arc<ThreadContext>,\n}\n\nimpl NetworkTask {\n    pub fn start_new(options: Arc<NetworkOptions>, tx: mpsc::Sender<Arc<NetworkTask>>) {\n        let ctx = executor::get_thread_ctx();\n        let task = Arc::new(Self {\n            options,\n            connections: Mutex::new(Vec::new()),\n            events: Mutex::new(BinaryHeap::new()),\n            task_context: ctx,\n        });\n\n        // send the task upstream\n        tx.send(task.clone()).unwrap();\n\n        // start the task\n        task.start();\n    }\n\n    pub fn start_new_connection(self: &Arc<Self>, rng: StdRng, dst_accept: Chan<NodeEvent>) -> TCP {\n        let now = executor::now();\n        let connection_id = self.connections.lock().len();\n\n        let vc = VirtualConnection {\n            connection_id,\n            dst_accept,\n            dst_sockets: [Chan::new(), Chan::new()],\n            state: Mutex::new(ConnectionState {\n                buffers: [NetworkBuffer::new(None), NetworkBuffer::new(Some(now))],\n                rng,\n            }),\n        };\n        vc.schedule_timeout(self);\n        vc.send_connect(self);\n\n        let recv_chan = vc.dst_sockets[0].clone();\n        self.connections.lock().push(vc);\n\n        TCP {\n            net: self.clone(),\n            conn_id: connection_id,\n            dir: 0,\n            recv_chan,\n        }\n    }\n}\n\n// private functions\nimpl NetworkTask {\n    /// Schedule to wakeup network task (self) `after_ms` later to deliver\n    /// messages of connection `id`.\n    fn schedule(&self, id: usize, after_ms: u64) {\n        self.events.lock().push(Event {\n            time: executor::now() + after_ms,\n            conn_id: id,\n        });\n        self.task_context.schedule_wakeup(after_ms);\n    }\n\n    /// Get locked connection `id`.\n    fn get(&self, id: usize) -> MappedMutexGuard<'_, RawMutex, VirtualConnection> {\n        MutexGuard::map(self.connections.lock(), |connections| {\n            connections.get_mut(id).unwrap()\n        })\n    }\n\n    fn collect_pending_events(&self, now: u64, vec: &mut Vec<Event>) {\n        vec.clear();\n        let mut events = self.events.lock();\n        while let Some(event) = events.peek() {\n            if event.time > now {\n                break;\n            }\n            let event = events.pop().unwrap();\n            vec.push(event);\n        }\n    }\n\n    fn start(self: &Arc<Self>) {\n        debug!(\"started network task\");\n\n        let mut events = Vec::new();\n        loop {\n            let now = executor::now();\n            self.collect_pending_events(now, &mut events);\n\n            for event in events.drain(..) {\n                let conn = self.get(event.conn_id);\n                conn.process(self);\n            }\n\n            // block until wakeup\n            executor::yield_me(-1);\n        }\n    }\n}\n\n// 0 - from node(0) to node(1)\n// 1 - from node(1) to node(0)\ntype MessageDirection = u8;\n\nfn sender_str(dir: MessageDirection) -> &'static str {\n    match dir {\n        0 => \"client\",\n        1 => \"server\",\n        _ => unreachable!(),\n    }\n}\n\nfn receiver_str(dir: MessageDirection) -> &'static str {\n    match dir {\n        0 => \"server\",\n        1 => \"client\",\n        _ => unreachable!(),\n    }\n}\n\n/// Virtual connection between two nodes.\n/// Node 0 is the creator of the connection (client),\n/// and node 1 is the acceptor (server).\nstruct VirtualConnection {\n    connection_id: usize,\n    /// one-off chan, used to deliver Accept message to dst\n    dst_accept: Chan<NodeEvent>,\n    /// message sinks\n    dst_sockets: [Chan<NetEvent>; 2],\n    state: Mutex<ConnectionState>,\n}\n\nstruct ConnectionState {\n    buffers: [NetworkBuffer; 2],\n    rng: StdRng,\n}\n\nimpl VirtualConnection {\n    /// Notify the future about the possible timeout.\n    fn schedule_timeout(&self, net: &NetworkTask) {\n        if let Some(timeout) = net.options.keepalive_timeout {\n            net.schedule(self.connection_id, timeout);\n        }\n    }\n\n    /// Send the handshake (Accept) to the server.\n    fn send_connect(&self, net: &NetworkTask) {\n        let now = executor::now();\n        let mut state = self.state.lock();\n        let delay = net.options.connect_delay.delay(&mut state.rng);\n        let buffer = &mut state.buffers[0];\n        assert!(buffer.buf.is_empty());\n        assert!(!buffer.recv_closed);\n        assert!(!buffer.send_closed);\n        assert!(buffer.last_recv.is_none());\n\n        let delay = if let Some(ms) = delay {\n            ms\n        } else {\n            debug!(\"NET: TCP #{} dropped connect\", self.connection_id);\n            buffer.send_closed = true;\n            return;\n        };\n\n        // Send a message into the future.\n        buffer\n            .buf\n            .push_back((now + delay, AnyMessage::InternalConnect));\n        net.schedule(self.connection_id, delay);\n    }\n\n    /// Transmit some of the messages from the buffer to the nodes.\n    fn process(&self, net: &Arc<NetworkTask>) {\n        let now = executor::now();\n\n        let mut state = self.state.lock();\n\n        for direction in 0..2 {\n            self.process_direction(\n                net,\n                state.deref_mut(),\n                now,\n                direction as MessageDirection,\n                &self.dst_sockets[direction ^ 1],\n            );\n        }\n\n        // Close the one side of the connection by timeout if the node\n        // has not received any messages for a long time.\n        if let Some(timeout) = net.options.keepalive_timeout {\n            let mut to_close = [false, false];\n            for direction in 0..2 {\n                let buffer = &mut state.buffers[direction];\n                if buffer.recv_closed {\n                    continue;\n                }\n                if let Some(last_recv) = buffer.last_recv {\n                    if now - last_recv >= timeout {\n                        debug!(\n                            \"NET: connection {} timed out at {}\",\n                            self.connection_id,\n                            receiver_str(direction as MessageDirection)\n                        );\n                        let node_idx = direction ^ 1;\n                        to_close[node_idx] = true;\n                    }\n                }\n            }\n            drop(state);\n\n            for (node_idx, should_close) in to_close.iter().enumerate() {\n                if *should_close {\n                    self.close(node_idx);\n                }\n            }\n        }\n    }\n\n    /// Process messages in the buffer in the given direction.\n    fn process_direction(\n        &self,\n        net: &Arc<NetworkTask>,\n        state: &mut ConnectionState,\n        now: u64,\n        direction: MessageDirection,\n        to_socket: &Chan<NetEvent>,\n    ) {\n        let buffer = &mut state.buffers[direction as usize];\n        if buffer.recv_closed {\n            assert!(buffer.buf.is_empty());\n        }\n\n        while !buffer.buf.is_empty() && buffer.buf.front().unwrap().0 <= now {\n            let msg = buffer.buf.pop_front().unwrap().1;\n\n            buffer.last_recv = Some(now);\n            self.schedule_timeout(net);\n\n            if let AnyMessage::InternalConnect = msg {\n                // TODO: assert to_socket is the server\n                let server_to_client = TCP {\n                    net: net.clone(),\n                    conn_id: self.connection_id,\n                    dir: direction ^ 1,\n                    recv_chan: to_socket.clone(),\n                };\n                // special case, we need to deliver new connection to a separate channel\n                self.dst_accept.send(NodeEvent::Accept(server_to_client));\n            } else {\n                to_socket.send(NetEvent::Message(msg));\n            }\n        }\n    }\n\n    /// Try to send a message to the buffer, optionally dropping it and\n    /// determining delivery timestamp.\n    fn send(&self, net: &NetworkTask, direction: MessageDirection, msg: AnyMessage) {\n        let now = executor::now();\n        let mut state = self.state.lock();\n\n        let (delay, close) = if let Some(ms) = net.options.send_delay.delay(&mut state.rng) {\n            (ms, false)\n        } else {\n            (0, true)\n        };\n\n        let buffer = &mut state.buffers[direction as usize];\n        if buffer.send_closed {\n            debug!(\n                \"NET: TCP #{} dropped message {:?} (broken pipe)\",\n                self.connection_id, msg\n            );\n            return;\n        }\n\n        if close {\n            debug!(\n                \"NET: TCP #{} dropped message {:?} (pipe just broke)\",\n                self.connection_id, msg\n            );\n            buffer.send_closed = true;\n            return;\n        }\n\n        if buffer.recv_closed {\n            debug!(\n                \"NET: TCP #{} dropped message {:?} (recv closed)\",\n                self.connection_id, msg\n            );\n            return;\n        }\n\n        // Send a message into the future.\n        buffer.buf.push_back((now + delay, msg));\n        net.schedule(self.connection_id, delay);\n    }\n\n    /// Close the connection. Only one side of the connection will be closed,\n    /// and no further messages will be delivered. The other side will not be notified.\n    fn close(&self, node_idx: usize) {\n        let mut state = self.state.lock();\n        let recv_buffer = &mut state.buffers[1 ^ node_idx];\n        if recv_buffer.recv_closed {\n            debug!(\n                \"NET: TCP #{} closed twice at {}\",\n                self.connection_id,\n                sender_str(node_idx as MessageDirection),\n            );\n            return;\n        }\n\n        debug!(\n            \"NET: TCP #{} closed at {}\",\n            self.connection_id,\n            sender_str(node_idx as MessageDirection),\n        );\n        recv_buffer.recv_closed = true;\n        for msg in recv_buffer.buf.drain(..) {\n            debug!(\n                \"NET: TCP #{} dropped message {:?} (closed)\",\n                self.connection_id, msg\n            );\n        }\n\n        let send_buffer = &mut state.buffers[node_idx];\n        send_buffer.send_closed = true;\n        drop(state);\n\n        // TODO: notify the other side?\n\n        self.dst_sockets[node_idx].send(NetEvent::Closed);\n    }\n}\n\nstruct NetworkBuffer {\n    /// Messages paired with time of delivery\n    buf: VecDeque<(u64, AnyMessage)>,\n    /// True if the connection is closed on the receiving side,\n    /// i.e. no more messages from the buffer will be delivered.\n    recv_closed: bool,\n    /// True if the connection is closed on the sending side,\n    /// i.e. no more messages will be added to the buffer.\n    send_closed: bool,\n    /// Last time a message was delivered from the buffer.\n    /// If None, it means that the server is the receiver and\n    /// it has not yet aware of this connection (i.e. has not\n    /// received the Accept).\n    last_recv: Option<u64>,\n}\n\nimpl NetworkBuffer {\n    fn new(last_recv: Option<u64>) -> Self {\n        Self {\n            buf: VecDeque::new(),\n            recv_closed: false,\n            send_closed: false,\n            last_recv,\n        }\n    }\n}\n\n/// Single end of a bidirectional network stream without reordering (TCP-like).\n/// Reads are implemented using channels, writes go to the buffer inside VirtualConnection.\npub struct TCP {\n    net: Arc<NetworkTask>,\n    conn_id: usize,\n    dir: MessageDirection,\n    recv_chan: Chan<NetEvent>,\n}\n\nimpl Debug for TCP {\n    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {\n        write!(f, \"TCP #{} ({})\", self.conn_id, sender_str(self.dir),)\n    }\n}\n\nimpl TCP {\n    /// Send a message to the other side. It's guaranteed that it will not arrive\n    /// before the arrival of all messages sent earlier.\n    pub fn send(&self, msg: AnyMessage) {\n        let conn = self.net.get(self.conn_id);\n        conn.send(&self.net, self.dir, msg);\n    }\n\n    /// Get a channel to receive incoming messages.\n    pub fn recv_chan(&self) -> Chan<NetEvent> {\n        self.recv_chan.clone()\n    }\n\n    pub fn connection_id(&self) -> usize {\n        self.conn_id\n    }\n\n    pub fn close(&self) {\n        let conn = self.net.get(self.conn_id);\n        conn.close(self.dir as usize);\n    }\n}\nstruct Event {\n    time: u64,\n    conn_id: usize,\n}\n\n// BinaryHeap is a max-heap, and we want a min-heap. Reverse the ordering here\n// to get that.\nimpl PartialOrd for Event {\n    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {\n        Some(self.cmp(other))\n    }\n}\n\nimpl Ord for Event {\n    fn cmp(&self, other: &Self) -> Ordering {\n        (other.time, other.conn_id).cmp(&(self.time, self.conn_id))\n    }\n}\n\nimpl PartialEq for Event {\n    fn eq(&self, other: &Self) -> bool {\n        (other.time, other.conn_id) == (self.time, self.conn_id)\n    }\n}\n\nimpl Eq for Event {}\n"
  },
  {
    "path": "libs/desim/src/node_os.rs",
    "content": "use std::sync::Arc;\n\nuse rand::Rng;\n\nuse super::chan::Chan;\nuse super::network::TCP;\nuse super::world::{Node, NodeId, World};\nuse crate::proto::NodeEvent;\n\n/// Abstraction with all functions (aka syscalls) available to the node.\n#[derive(Clone)]\npub struct NodeOs {\n    world: Arc<World>,\n    internal: Arc<Node>,\n}\n\nimpl NodeOs {\n    pub fn new(world: Arc<World>, internal: Arc<Node>) -> NodeOs {\n        NodeOs { world, internal }\n    }\n\n    /// Get the node id.\n    pub fn id(&self) -> NodeId {\n        self.internal.id\n    }\n\n    /// Opens a bidirectional connection with the other node. Always successful.\n    pub fn open_tcp(&self, dst: NodeId) -> TCP {\n        self.world.open_tcp(dst)\n    }\n\n    /// Returns a channel to receive node events (socket Accept and internal messages).\n    pub fn node_events(&self) -> Chan<NodeEvent> {\n        self.internal.node_events()\n    }\n\n    /// Get current time.\n    pub fn now(&self) -> u64 {\n        self.world.now()\n    }\n\n    /// Generate a random number in range [0, max).\n    pub fn random(&self, max: u64) -> u64 {\n        self.internal.rng.lock().random_range(0..max)\n    }\n\n    /// Append a new event to the world event log.\n    pub fn log_event(&self, data: String) {\n        self.internal.log_event(data)\n    }\n}\n"
  },
  {
    "path": "libs/desim/src/options.rs",
    "content": "use rand::Rng;\nuse rand::rngs::StdRng;\n\n/// Describes random delays and failures. Delay will be uniformly distributed in [min, max].\n/// Connection failure will occur with the probablity fail_prob.\n#[derive(Clone, Debug)]\npub struct Delay {\n    pub min: u64,\n    pub max: u64,\n    pub fail_prob: f64, // [0; 1]\n}\n\nimpl Delay {\n    /// Create a struct with no delay, no failures.\n    pub fn empty() -> Delay {\n        Delay {\n            min: 0,\n            max: 0,\n            fail_prob: 0.0,\n        }\n    }\n\n    /// Create a struct with a fixed delay.\n    pub fn fixed(ms: u64) -> Delay {\n        Delay {\n            min: ms,\n            max: ms,\n            fail_prob: 0.0,\n        }\n    }\n\n    /// Generate a random delay in range [min, max]. Return None if the\n    /// message should be dropped.\n    pub fn delay(&self, rng: &mut StdRng) -> Option<u64> {\n        if rng.random_bool(self.fail_prob) {\n            return None;\n        }\n        Some(rng.random_range(self.min..=self.max))\n    }\n}\n\n/// Describes network settings. All network packets will be subjected to the same delays and failures.\n#[derive(Clone, Debug)]\npub struct NetworkOptions {\n    /// Connection will be automatically closed after this timeout if no data is received.\n    pub keepalive_timeout: Option<u64>,\n    /// New connections will be delayed by this amount of time.\n    pub connect_delay: Delay,\n    /// Each message will be delayed by this amount of time.\n    pub send_delay: Delay,\n}\n"
  },
  {
    "path": "libs/desim/src/proto.rs",
    "content": "use std::fmt::Debug;\n\nuse bytes::Bytes;\nuse utils::lsn::Lsn;\n\nuse crate::network::TCP;\nuse crate::world::NodeId;\n\n/// Internal node events.\n#[derive(Debug)]\npub enum NodeEvent {\n    Accept(TCP),\n    Internal(AnyMessage),\n}\n\n/// Events that are coming from a network socket.\n#[derive(Clone, Debug)]\npub enum NetEvent {\n    Message(AnyMessage),\n    Closed,\n}\n\n/// Custom events generated throughout the simulation. Can be used by the test to verify the correctness.\n#[derive(Debug)]\npub struct SimEvent {\n    pub time: u64,\n    pub node: NodeId,\n    pub data: String,\n}\n\n/// Umbrella type for all possible flavours of messages. These events can be sent over network\n/// or to an internal node events channel.\n#[derive(Clone)]\npub enum AnyMessage {\n    /// Not used, empty placeholder.\n    None,\n    /// Used internally for notifying node about new incoming connection.\n    InternalConnect,\n    Just32(u32),\n    ReplCell(ReplCell),\n    Bytes(Bytes),\n    LSN(u64),\n}\n\nimpl Debug for AnyMessage {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        match self {\n            AnyMessage::None => write!(f, \"None\"),\n            AnyMessage::InternalConnect => write!(f, \"InternalConnect\"),\n            AnyMessage::Just32(v) => write!(f, \"Just32({v})\"),\n            AnyMessage::ReplCell(v) => write!(f, \"ReplCell({v:?})\"),\n            AnyMessage::Bytes(v) => write!(f, \"Bytes({})\", hex::encode(v)),\n            AnyMessage::LSN(v) => write!(f, \"LSN({})\", Lsn(*v)),\n        }\n    }\n}\n\n/// Used in reliable_copy_test.rs\n#[derive(Clone, Debug)]\npub struct ReplCell {\n    pub value: u32,\n    pub client_id: u32,\n    pub seqno: u32,\n}\n"
  },
  {
    "path": "libs/desim/src/time.rs",
    "content": "use std::cmp::Ordering;\nuse std::collections::BinaryHeap;\nuse std::ops::DerefMut;\nuse std::sync::Arc;\nuse std::sync::atomic::{AtomicU32, AtomicU64};\n\nuse parking_lot::Mutex;\nuse tracing::trace;\n\nuse crate::executor::ThreadContext;\n\n/// Holds current time and all pending wakeup events.\npub struct Timing {\n    /// Current world's time.\n    current_time: AtomicU64,\n    /// Pending timers.\n    queue: Mutex<BinaryHeap<Pending>>,\n    /// Global nonce. Makes picking events from binary heap queue deterministic\n    /// by appending a number to events with the same timestamp.\n    nonce: AtomicU32,\n    /// Used to schedule fake events.\n    fake_context: Arc<ThreadContext>,\n}\n\nimpl Default for Timing {\n    fn default() -> Self {\n        Self::new()\n    }\n}\n\nimpl Timing {\n    /// Create a new empty clock with time set to 0.\n    pub fn new() -> Timing {\n        Timing {\n            current_time: AtomicU64::new(0),\n            queue: Mutex::new(BinaryHeap::new()),\n            nonce: AtomicU32::new(0),\n            fake_context: Arc::new(ThreadContext::new()),\n        }\n    }\n\n    /// Return the current world's time.\n    pub fn now(&self) -> u64 {\n        self.current_time.load(std::sync::atomic::Ordering::SeqCst)\n    }\n\n    /// Tick-tock the global clock. Return the event ready to be processed\n    /// or move the clock forward and then return the event.\n    pub(crate) fn step(&self) -> Option<Arc<ThreadContext>> {\n        let mut queue = self.queue.lock();\n\n        if queue.is_empty() {\n            // no future events\n            return None;\n        }\n\n        if !self.is_event_ready(queue.deref_mut()) {\n            let next_time = queue.peek().unwrap().time;\n            self.current_time\n                .store(next_time, std::sync::atomic::Ordering::SeqCst);\n            trace!(\"rewind time to {}\", next_time);\n            assert!(self.is_event_ready(queue.deref_mut()));\n        }\n\n        Some(queue.pop().unwrap().wake_context)\n    }\n\n    /// Append an event to the queue, to wakeup the thread in `ms` milliseconds.\n    pub(crate) fn schedule_wakeup(&self, ms: u64, wake_context: Arc<ThreadContext>) {\n        self.nonce.fetch_add(1, std::sync::atomic::Ordering::SeqCst);\n        let nonce = self.nonce.load(std::sync::atomic::Ordering::SeqCst);\n        self.queue.lock().push(Pending {\n            time: self.now() + ms,\n            nonce,\n            wake_context,\n        })\n    }\n\n    /// Append a fake event to the queue, to prevent clocks from skipping this time.\n    pub fn schedule_fake(&self, ms: u64) {\n        self.queue.lock().push(Pending {\n            time: self.now() + ms,\n            nonce: 0,\n            wake_context: self.fake_context.clone(),\n        });\n    }\n\n    /// Return true if there is a ready event.\n    fn is_event_ready(&self, queue: &mut BinaryHeap<Pending>) -> bool {\n        queue.peek().is_some_and(|x| x.time <= self.now())\n    }\n\n    /// Clear all pending events.\n    pub(crate) fn clear(&self) {\n        self.queue.lock().clear();\n    }\n}\n\nstruct Pending {\n    time: u64,\n    nonce: u32,\n    wake_context: Arc<ThreadContext>,\n}\n\n// BinaryHeap is a max-heap, and we want a min-heap. Reverse the ordering here\n// to get that.\nimpl PartialOrd for Pending {\n    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {\n        Some(self.cmp(other))\n    }\n}\n\nimpl Ord for Pending {\n    fn cmp(&self, other: &Self) -> Ordering {\n        (other.time, other.nonce).cmp(&(self.time, self.nonce))\n    }\n}\n\nimpl PartialEq for Pending {\n    fn eq(&self, other: &Self) -> bool {\n        (other.time, other.nonce) == (self.time, self.nonce)\n    }\n}\n\nimpl Eq for Pending {}\n"
  },
  {
    "path": "libs/desim/src/world.rs",
    "content": "use std::ops::DerefMut;\nuse std::sync::{Arc, mpsc};\n\nuse parking_lot::Mutex;\nuse rand::SeedableRng;\nuse rand::rngs::StdRng;\n\nuse super::chan::Chan;\nuse super::network::TCP;\nuse super::node_os::NodeOs;\nuse crate::executor::{ExternalHandle, Runtime};\nuse crate::network::NetworkTask;\nuse crate::options::NetworkOptions;\nuse crate::proto::{NodeEvent, SimEvent};\nuse crate::time::Timing;\n\npub type NodeId = u32;\n\n/// World contains simulation state.\npub struct World {\n    nodes: Mutex<Vec<Arc<Node>>>,\n    /// Random number generator.\n    rng: Mutex<StdRng>,\n    /// Internal event log.\n    events: Mutex<Vec<SimEvent>>,\n    /// Separate task that processes all network messages.\n    network_task: Arc<NetworkTask>,\n    /// Runtime for running threads and moving time.\n    runtime: Mutex<Runtime>,\n    /// To get current time.\n    timing: Arc<Timing>,\n}\n\nimpl World {\n    pub fn new(seed: u64, options: Arc<NetworkOptions>) -> World {\n        let timing = Arc::new(Timing::new());\n        let mut runtime = Runtime::new(timing.clone());\n\n        let (tx, rx) = mpsc::channel();\n\n        runtime.spawn(move || {\n            // create and start network background thread, and send it back via the channel\n            NetworkTask::start_new(options, tx)\n        });\n\n        // wait for the network task to start\n        while runtime.step() {}\n\n        let network_task = rx.recv().unwrap();\n\n        World {\n            nodes: Mutex::new(Vec::new()),\n            rng: Mutex::new(StdRng::seed_from_u64(seed)),\n            events: Mutex::new(Vec::new()),\n            network_task,\n            runtime: Mutex::new(runtime),\n            timing,\n        }\n    }\n\n    pub fn step(&self) -> bool {\n        self.runtime.lock().step()\n    }\n\n    pub fn get_thread_step_count(&self) -> u64 {\n        self.runtime.lock().step_counter\n    }\n\n    /// Create a new random number generator.\n    pub fn new_rng(&self) -> StdRng {\n        let mut rng = self.rng.lock();\n        StdRng::from_rng(rng.deref_mut())\n    }\n\n    /// Create a new node.\n    pub fn new_node(self: &Arc<Self>) -> Arc<Node> {\n        let mut nodes = self.nodes.lock();\n        let id = nodes.len() as NodeId;\n        let node = Arc::new(Node::new(id, self.clone(), self.new_rng()));\n        nodes.push(node.clone());\n        node\n    }\n\n    /// Get an internal node state by id.\n    fn get_node(&self, id: NodeId) -> Option<Arc<Node>> {\n        let nodes = self.nodes.lock();\n        let num = id as usize;\n        if num < nodes.len() {\n            Some(nodes[num].clone())\n        } else {\n            None\n        }\n    }\n\n    pub fn stop_all(&self) {\n        self.runtime.lock().crash_all_threads();\n    }\n\n    /// Returns a writable end of a TCP connection, to send src->dst messages.\n    pub fn open_tcp(self: &Arc<World>, dst: NodeId) -> TCP {\n        // TODO: replace unwrap() with /dev/null socket.\n        let dst = self.get_node(dst).unwrap();\n        let dst_accept = dst.node_events.lock().clone();\n\n        let rng = self.new_rng();\n        self.network_task.start_new_connection(rng, dst_accept)\n    }\n\n    /// Get current time.\n    pub fn now(&self) -> u64 {\n        self.timing.now()\n    }\n\n    /// Get a copy of the internal clock.\n    pub fn clock(&self) -> Arc<Timing> {\n        self.timing.clone()\n    }\n\n    pub fn add_event(&self, node: NodeId, data: String) {\n        let time = self.now();\n        self.events.lock().push(SimEvent { time, node, data });\n    }\n\n    pub fn take_events(&self) -> Vec<SimEvent> {\n        let mut events = self.events.lock();\n        let mut res = Vec::new();\n        std::mem::swap(&mut res, &mut events);\n        res\n    }\n\n    pub fn deallocate(&self) {\n        self.stop_all();\n        self.timing.clear();\n        self.nodes.lock().clear();\n    }\n}\n\n/// Internal node state.\npub struct Node {\n    pub id: NodeId,\n    node_events: Mutex<Chan<NodeEvent>>,\n    world: Arc<World>,\n    pub(crate) rng: Mutex<StdRng>,\n}\n\nimpl Node {\n    pub fn new(id: NodeId, world: Arc<World>, rng: StdRng) -> Node {\n        Node {\n            id,\n            node_events: Mutex::new(Chan::new()),\n            world,\n            rng: Mutex::new(rng),\n        }\n    }\n\n    /// Spawn a new thread with this node context.\n    pub fn launch(self: &Arc<Self>, f: impl FnOnce(NodeOs) + Send + 'static) -> ExternalHandle {\n        let node = self.clone();\n        let world = self.world.clone();\n        self.world.runtime.lock().spawn(move || {\n            f(NodeOs::new(world, node.clone()));\n        })\n    }\n\n    /// Returns a channel to receive Accepts and internal messages.\n    pub fn node_events(&self) -> Chan<NodeEvent> {\n        self.node_events.lock().clone()\n    }\n\n    /// This will drop all in-flight Accept messages.\n    pub fn replug_node_events(&self, chan: Chan<NodeEvent>) {\n        *self.node_events.lock() = chan;\n    }\n\n    /// Append event to the world's log.\n    pub fn log_event(&self, data: String) {\n        self.world.add_event(self.id, data)\n    }\n}\n"
  },
  {
    "path": "libs/desim/tests/reliable_copy_test.rs",
    "content": "//! Simple test to verify that simulator is working.\n#[cfg(test)]\nmod reliable_copy_test {\n    use std::sync::Arc;\n\n    use anyhow::Result;\n    use desim::executor::{self, PollSome};\n    use desim::node_os::NodeOs;\n    use desim::options::{Delay, NetworkOptions};\n    use desim::proto::{AnyMessage, NetEvent, NodeEvent, ReplCell};\n    use desim::world::{NodeId, World};\n    use parking_lot::Mutex;\n    use tracing::info;\n\n    /// Disk storage trait and implementation.\n    pub trait Storage<T> {\n        fn flush_pos(&self) -> u32;\n        fn flush(&mut self) -> Result<()>;\n        fn write(&mut self, t: T);\n    }\n\n    #[derive(Clone)]\n    pub struct SharedStorage<T> {\n        pub state: Arc<Mutex<InMemoryStorage<T>>>,\n    }\n\n    impl<T> SharedStorage<T> {\n        pub fn new() -> Self {\n            Self {\n                state: Arc::new(Mutex::new(InMemoryStorage::new())),\n            }\n        }\n    }\n\n    impl<T> Storage<T> for SharedStorage<T> {\n        fn flush_pos(&self) -> u32 {\n            self.state.lock().flush_pos\n        }\n\n        fn flush(&mut self) -> Result<()> {\n            executor::yield_me(0);\n            self.state.lock().flush()\n        }\n\n        fn write(&mut self, t: T) {\n            executor::yield_me(0);\n            self.state.lock().write(t);\n        }\n    }\n\n    pub struct InMemoryStorage<T> {\n        pub data: Vec<T>,\n        pub flush_pos: u32,\n    }\n\n    impl<T> InMemoryStorage<T> {\n        pub fn new() -> Self {\n            Self {\n                data: Vec::new(),\n                flush_pos: 0,\n            }\n        }\n\n        pub fn flush(&mut self) -> Result<()> {\n            self.flush_pos = self.data.len() as u32;\n            Ok(())\n        }\n\n        pub fn write(&mut self, t: T) {\n            self.data.push(t);\n        }\n    }\n\n    /// Server implementation.\n    pub fn run_server(os: NodeOs, mut storage: Box<dyn Storage<u32>>) {\n        info!(\"started server\");\n\n        let node_events = os.node_events();\n        let mut epoll_vec: Vec<Box<dyn PollSome>> = vec![Box::new(node_events.clone())];\n        let mut sockets = vec![];\n\n        loop {\n            let index = executor::epoll_chans(&epoll_vec, -1).unwrap();\n\n            if index == 0 {\n                let node_event = node_events.must_recv();\n                info!(\"got node event: {:?}\", node_event);\n                if let NodeEvent::Accept(tcp) = node_event {\n                    tcp.send(AnyMessage::Just32(storage.flush_pos()));\n                    epoll_vec.push(Box::new(tcp.recv_chan()));\n                    sockets.push(tcp);\n                }\n                continue;\n            }\n\n            let recv_chan = sockets[index - 1].recv_chan();\n            let socket = &sockets[index - 1];\n\n            let event = recv_chan.must_recv();\n            info!(\"got event: {:?}\", event);\n            if let NetEvent::Message(AnyMessage::ReplCell(cell)) = event {\n                if cell.seqno != storage.flush_pos() {\n                    info!(\"got out of order data: {:?}\", cell);\n                    continue;\n                }\n                storage.write(cell.value);\n                storage.flush().unwrap();\n                socket.send(AnyMessage::Just32(storage.flush_pos()));\n            }\n        }\n    }\n\n    /// Client copies all data from array to the remote node.\n    pub fn run_client(os: NodeOs, data: &[ReplCell], dst: NodeId) {\n        info!(\"started client\");\n\n        let mut delivered = 0;\n\n        let mut sock = os.open_tcp(dst);\n        let mut recv_chan = sock.recv_chan();\n\n        while delivered < data.len() {\n            let num = &data[delivered];\n            info!(\"sending data: {:?}\", num.clone());\n            sock.send(AnyMessage::ReplCell(num.clone()));\n\n            // loop {\n            let event = recv_chan.recv();\n            match event {\n                NetEvent::Message(AnyMessage::Just32(flush_pos)) => {\n                    if flush_pos == 1 + delivered as u32 {\n                        delivered += 1;\n                    }\n                }\n                NetEvent::Closed => {\n                    info!(\"connection closed, reestablishing\");\n                    sock = os.open_tcp(dst);\n                    recv_chan = sock.recv_chan();\n                }\n                _ => {}\n            }\n\n            // }\n        }\n\n        let sock = os.open_tcp(dst);\n        for num in data {\n            info!(\"sending data: {:?}\", num.clone());\n            sock.send(AnyMessage::ReplCell(num.clone()));\n        }\n\n        info!(\"sent all data and finished client\");\n    }\n\n    /// Run test simulations.\n    #[test]\n    fn sim_example_reliable_copy() {\n        utils::logging::init(\n            utils::logging::LogFormat::Test,\n            utils::logging::TracingErrorLayerEnablement::Disabled,\n            utils::logging::Output::Stdout,\n        )\n        .expect(\"logging init failed\");\n\n        let delay = Delay {\n            min: 1,\n            max: 60,\n            fail_prob: 0.4,\n        };\n\n        let network = NetworkOptions {\n            keepalive_timeout: Some(50),\n            connect_delay: delay.clone(),\n            send_delay: delay.clone(),\n        };\n\n        for seed in 0..20 {\n            let u32_data: [u32; 5] = [1, 2, 3, 4, 5];\n            let data = u32_to_cells(&u32_data, 1);\n            let world = Arc::new(World::new(seed, Arc::new(network.clone())));\n\n            start_simulation(Options {\n                world,\n                time_limit: 1_000_000,\n                client_fn: Box::new(move |os, server_id| run_client(os, &data, server_id)),\n                u32_data,\n            });\n        }\n    }\n\n    pub struct Options {\n        pub world: Arc<World>,\n        pub time_limit: u64,\n        pub u32_data: [u32; 5],\n        pub client_fn: Box<dyn FnOnce(NodeOs, u32) + Send + 'static>,\n    }\n\n    pub fn start_simulation(options: Options) {\n        let world = options.world;\n\n        let client_node = world.new_node();\n        let server_node = world.new_node();\n        let server_id = server_node.id;\n\n        // start the client thread\n        client_node.launch(move |os| {\n            let client_fn = options.client_fn;\n            client_fn(os, server_id);\n        });\n\n        // start the server thread\n        let shared_storage = SharedStorage::new();\n        let server_storage = shared_storage.clone();\n        server_node.launch(move |os| run_server(os, Box::new(server_storage)));\n\n        while world.step() && world.now() < options.time_limit {}\n\n        let disk_data = shared_storage.state.lock().data.clone();\n        assert!(verify_data(&disk_data, &options.u32_data[..]));\n    }\n\n    pub fn u32_to_cells(data: &[u32], client_id: u32) -> Vec<ReplCell> {\n        let mut res = Vec::new();\n        for (i, _) in data.iter().enumerate() {\n            res.push(ReplCell {\n                client_id,\n                seqno: i as u32,\n                value: data[i],\n            });\n        }\n        res\n    }\n\n    fn verify_data(disk_data: &[u32], data: &[u32]) -> bool {\n        if disk_data.len() != data.len() {\n            return false;\n        }\n        for i in 0..data.len() {\n            if disk_data[i] != data[i] {\n                return false;\n            }\n        }\n        true\n    }\n}\n"
  },
  {
    "path": "libs/http-utils/Cargo.toml",
    "content": "[package]\nname = \"http-utils\"\nversion = \"0.1.0\"\nedition.workspace = true\nlicense.workspace = true\n\n[dependencies]\nanyhow.workspace = true\narc-swap.workspace = true\nbytes.workspace = true\ncamino.workspace = true\nfail.workspace = true\nfutures.workspace = true\nhyper0.workspace = true\nitertools.workspace = true\njemalloc_pprof.workspace = true\njsonwebtoken.workspace = true\nonce_cell.workspace = true\npprof.workspace = true\nregex.workspace = true\nrouterify.workspace = true\nrustls-pemfile.workspace = true\nrustls.workspace = true\nserde_json.workspace = true\nserde_path_to_error.workspace = true\nserde.workspace = true\nthiserror.workspace = true\ntokio-rustls.workspace = true\ntokio-util.workspace = true\ntokio.workspace = true\ntracing.workspace = true\nurl.workspace = true\nuuid.workspace = true\nx509-cert.workspace = true\n\n# to use tokio channels as streams, this is faster to compile than async_stream\n# why is it only here? no other crate should use it, streams are rarely needed.\ntokio-stream = { version = \"0.1.14\" }\n\nmetrics.workspace = true\nutils.workspace = true\nworkspace_hack = { version = \"0.1\", path = \"../../workspace_hack\" }\n"
  },
  {
    "path": "libs/http-utils/src/endpoint.rs",
    "content": "use std::future::Future;\nuse std::io::Write as _;\nuse std::str::FromStr;\nuse std::time::Duration;\n\nuse anyhow::{Context, anyhow};\nuse bytes::{Bytes, BytesMut};\nuse hyper::header::{AUTHORIZATION, CONTENT_DISPOSITION, CONTENT_TYPE, HeaderName};\nuse hyper::http::HeaderValue;\nuse hyper::{Body, Method, Request, Response};\nuse jsonwebtoken::TokenData;\nuse metrics::{Encoder, IntCounter, TextEncoder, register_int_counter};\nuse once_cell::sync::Lazy;\nuse pprof::ProfilerGuardBuilder;\nuse pprof::protos::Message as _;\nuse routerify::ext::RequestExt;\nuse routerify::{Middleware, RequestInfo, Router, RouterBuilder};\nuse tokio::sync::{Mutex, Notify, mpsc};\nuse tokio_stream::wrappers::ReceiverStream;\nuse tokio_util::io::ReaderStream;\nuse tracing::{Instrument, debug, info, info_span, warn};\nuse utils::auth::{AuthError, Claims, SwappableJwtAuth};\nuse utils::metrics_collector::{METRICS_COLLECTOR, METRICS_STALE_MILLIS};\n\nuse crate::error::{ApiError, api_error_handler, route_error_handler};\nuse crate::request::{get_query_param, parse_query_param};\n\nstatic SERVE_METRICS_COUNT: Lazy<IntCounter> = Lazy::new(|| {\n    register_int_counter!(\n        \"libmetrics_metric_handler_requests_total\",\n        \"Number of metric requests made\"\n    )\n    .expect(\"failed to define a metric\")\n});\n\nstatic X_REQUEST_ID_HEADER_STR: &str = \"x-request-id\";\n\nstatic X_REQUEST_ID_HEADER: HeaderName = HeaderName::from_static(X_REQUEST_ID_HEADER_STR);\n#[derive(Debug, Default, Clone)]\nstruct RequestId(String);\n\n/// Adds a tracing info_span! instrumentation around the handler events,\n/// logs the request start and end events for non-GET requests and non-200 responses.\n///\n/// Usage: Replace `my_handler` with `|r| request_span(r, my_handler)`\n///\n/// Use this to distinguish between logs of different HTTP requests: every request handler wrapped\n/// with this will get request info logged in the wrapping span, including the unique request ID.\n///\n/// This also handles errors, logging them and converting them to an HTTP error response.\n///\n/// NB: If the client disconnects, Hyper will drop the Future, without polling it to\n/// completion. In other words, the handler must be async cancellation safe! request_span\n/// prints a warning to the log when that happens, so that you have some trace of it in\n/// the log.\n///\n///\n/// There could be other ways to implement similar functionality:\n///\n/// * procmacros placed on top of all handler methods\n///   With all the drawbacks of procmacros, brings no difference implementation-wise,\n///   and little code reduction compared to the existing approach.\n///\n/// * Another `TraitExt` with e.g. the `get_with_span`, `post_with_span` methods to do similar logic,\n///   implemented for [`RouterBuilder`].\n///   Could be simpler, but we don't want to depend on [`routerify`] more, targeting to use other library later.\n///\n/// * In theory, a span guard could've been created in a pre-request middleware and placed into a global collection, to be dropped\n///   later, in a post-response middleware.\n///   Due to suspendable nature of the futures, would give contradictive results which is exactly the opposite of what `tracing-futures`\n///   tries to achive with its `.instrument` used in the current approach.\n///\n/// If needed, a declarative macro to substitute the |r| ... closure boilerplate could be introduced.\npub async fn request_span<R, H>(request: Request<Body>, handler: H) -> R::Output\nwhere\n    R: Future<Output = Result<Response<Body>, ApiError>> + Send + 'static,\n    H: FnOnce(Request<Body>) -> R + Send + Sync + 'static,\n{\n    let request_id = request.context::<RequestId>().unwrap_or_default().0;\n    let method = request.method();\n    let path = request.uri().path();\n    let request_span = info_span!(\"request\", %method, %path, %request_id);\n\n    let log_quietly = method == Method::GET;\n    async move {\n        let cancellation_guard = RequestCancelled::warn_when_dropped_without_responding();\n        if log_quietly {\n            debug!(\"Handling request\");\n        } else {\n            info!(\"Handling request\");\n        }\n\n        // No special handling for panics here. There's a `tracing_panic_hook` from another\n        // module to do that globally.\n        let res = handler(request).await;\n\n        cancellation_guard.disarm();\n\n        // Log the result if needed.\n        //\n        // We also convert any errors into an Ok response with HTTP error code here.\n        // `make_router` sets a last-resort error handler that would do the same, but\n        // we prefer to do it here, before we exit the request span, so that the error\n        // is still logged with the span.\n        //\n        // (Because we convert errors to Ok response, we never actually return an error,\n        // and we could declare the function to return the never type (`!`). However,\n        // using `routerify::RouterBuilder` requires a proper error type.)\n        match res {\n            Ok(response) => {\n                let response_status = response.status();\n                if log_quietly && response_status.is_success() {\n                    debug!(\"Request handled, status: {response_status}\");\n                } else {\n                    info!(\"Request handled, status: {response_status}\");\n                }\n                Ok(response)\n            }\n            Err(err) => Ok(api_error_handler(err)),\n        }\n    }\n    .instrument(request_span)\n    .await\n}\n\n/// Drop guard to WARN in case the request was dropped before completion.\nstruct RequestCancelled {\n    warn: Option<tracing::Span>,\n}\n\nimpl RequestCancelled {\n    /// Create the drop guard using the [`tracing::Span::current`] as the span.\n    fn warn_when_dropped_without_responding() -> Self {\n        RequestCancelled {\n            warn: Some(tracing::Span::current()),\n        }\n    }\n\n    /// Consume the drop guard without logging anything.\n    fn disarm(mut self) {\n        self.warn = None;\n    }\n}\n\nimpl Drop for RequestCancelled {\n    fn drop(&mut self) {\n        if std::thread::panicking() {\n            // we are unwinding due to panicking, assume we are not dropped for cancellation\n        } else if let Some(span) = self.warn.take() {\n            // the span has all of the info already, but the outer `.instrument(span)` has already\n            // been dropped, so we need to manually re-enter it for this message.\n            //\n            // this is what the instrument would do before polling so it is fine.\n            let _g = span.entered();\n            warn!(\"request was dropped before completing\");\n        }\n    }\n}\n\n/// An [`std::io::Write`] implementation on top of a channel sending [`bytes::Bytes`] chunks.\npub struct ChannelWriter {\n    buffer: BytesMut,\n    pub tx: mpsc::Sender<std::io::Result<Bytes>>,\n    written: usize,\n    /// Time spent waiting for the channel to make progress. It is not the same as time to upload a\n    /// buffer because we cannot know anything about that, but this should allow us to understand\n    /// the actual time taken without the time spent `std::thread::park`ed.\n    wait_time: std::time::Duration,\n}\n\nimpl ChannelWriter {\n    pub fn new(buf_len: usize, tx: mpsc::Sender<std::io::Result<Bytes>>) -> Self {\n        assert_ne!(buf_len, 0);\n        ChannelWriter {\n            // split about half off the buffer from the start, because we flush depending on\n            // capacity. first flush will come sooner than without this, but now resizes will\n            // have better chance of picking up the \"other\" half. not guaranteed of course.\n            buffer: BytesMut::with_capacity(buf_len).split_off(buf_len / 2),\n            tx,\n            written: 0,\n            wait_time: std::time::Duration::ZERO,\n        }\n    }\n\n    pub fn flush0(&mut self) -> std::io::Result<usize> {\n        let n = self.buffer.len();\n        if n == 0 {\n            return Ok(0);\n        }\n\n        tracing::trace!(n, \"flushing\");\n        let ready = self.buffer.split().freeze();\n\n        let wait_started_at = std::time::Instant::now();\n\n        // not ideal to call from blocking code to block_on, but we are sure that this\n        // operation does not spawn_blocking other tasks\n        let res: Result<(), ()> = tokio::runtime::Handle::current().block_on(async {\n            self.tx.send(Ok(ready)).await.map_err(|_| ())?;\n\n            // throttle sending to allow reuse of our buffer in `write`.\n            self.tx.reserve().await.map_err(|_| ())?;\n\n            // now the response task has picked up the buffer and hopefully started\n            // sending it to the client.\n            Ok(())\n        });\n\n        self.wait_time += wait_started_at.elapsed();\n\n        if res.is_err() {\n            return Err(std::io::ErrorKind::BrokenPipe.into());\n        }\n        self.written += n;\n        Ok(n)\n    }\n\n    pub fn flushed_bytes(&self) -> usize {\n        self.written\n    }\n\n    pub fn wait_time(&self) -> std::time::Duration {\n        self.wait_time\n    }\n}\n\nimpl std::io::Write for ChannelWriter {\n    fn write(&mut self, mut buf: &[u8]) -> std::io::Result<usize> {\n        let remaining = self.buffer.capacity() - self.buffer.len();\n\n        let out_of_space = remaining < buf.len();\n\n        let original_len = buf.len();\n\n        if out_of_space {\n            let can_still_fit = buf.len() - remaining;\n            self.buffer.extend_from_slice(&buf[..can_still_fit]);\n            buf = &buf[can_still_fit..];\n            self.flush0()?;\n        }\n\n        // assume that this will often under normal operation just move the pointer back to the\n        // beginning of allocation, because previous split off parts are already sent and\n        // dropped.\n        self.buffer.extend_from_slice(buf);\n        Ok(original_len)\n    }\n\n    fn flush(&mut self) -> std::io::Result<()> {\n        self.flush0().map(|_| ())\n    }\n}\n\npub async fn prometheus_metrics_handler(\n    req: Request<Body>,\n    force_metric_collection_on_scrape: bool,\n) -> Result<Response<Body>, ApiError> {\n    SERVE_METRICS_COUNT.inc();\n\n    // HADRON\n    let requested_use_latest = parse_query_param(&req, \"use_latest\")?;\n\n    let use_latest = match requested_use_latest {\n        None => force_metric_collection_on_scrape,\n        Some(true) => true,\n        Some(false) => {\n            if force_metric_collection_on_scrape {\n                // We don't cache in this case\n                true\n            } else {\n                false\n            }\n        }\n    };\n\n    let started_at = std::time::Instant::now();\n\n    let (tx, rx) = mpsc::channel(1);\n\n    let body = Body::wrap_stream(ReceiverStream::new(rx));\n\n    let mut writer = ChannelWriter::new(128 * 1024, tx);\n\n    let encoder = TextEncoder::new();\n\n    let response = Response::builder()\n        .status(200)\n        .header(CONTENT_TYPE, encoder.format_type())\n        .body(body)\n        .unwrap();\n\n    let span = info_span!(\"blocking\");\n    tokio::task::spawn_blocking(move || {\n        // there are situations where we lose scraped metrics under load, try to gather some clues\n        // since all nodes are queried this, keep the message count low.\n        let spawned_at = std::time::Instant::now();\n\n        let _span = span.entered();\n\n        // HADRON\n        let collected = if use_latest {\n            // Skip caching the results if we always force metric collection on scrape.\n            METRICS_COLLECTOR.run_once(!force_metric_collection_on_scrape)\n        } else {\n            METRICS_COLLECTOR.last_collected()\n        };\n\n        let gathered_at = std::time::Instant::now();\n\n        let res = encoder\n            .encode(&collected.metrics, &mut writer)\n            .and_then(|_| writer.flush().map_err(|e| e.into()));\n\n        // this instant is not when we finally got the full response sent, sending is done by hyper\n        // in another task.\n        let encoded_at = std::time::Instant::now();\n\n        let spawned_in = spawned_at - started_at;\n        let collected_in = gathered_at - spawned_at;\n        // remove the wait time here in case the tcp connection was clogged\n        let encoded_in = encoded_at - gathered_at - writer.wait_time();\n        let total = encoded_at - started_at;\n\n        // HADRON\n        let staleness_ms = (encoded_at - collected.collected_at).as_millis();\n        METRICS_STALE_MILLIS.set(staleness_ms as i64);\n\n        match res {\n            Ok(()) => {\n                tracing::info!(\n                    bytes = writer.flushed_bytes(),\n                    total_ms = total.as_millis(),\n                    spawning_ms = spawned_in.as_millis(),\n                    collection_ms = collected_in.as_millis(),\n                    encoding_ms = encoded_in.as_millis(),\n                    stalenss_ms = staleness_ms,\n                    \"responded /metrics\"\n                );\n            }\n            Err(e) => {\n                // there is a chance that this error is not the BrokenPipe we generate in the writer\n                // for \"closed connection\", but it is highly unlikely.\n                tracing::warn!(\n                    after_bytes = writer.flushed_bytes(),\n                    total_ms = total.as_millis(),\n                    spawning_ms = spawned_in.as_millis(),\n                    collection_ms = collected_in.as_millis(),\n                    encoding_ms = encoded_in.as_millis(),\n                    \"failed to write out /metrics response: {e:?}\"\n                );\n                // semantics of this error are quite... unclear. we want to error the stream out to\n                // abort the response to somehow notify the client that we failed.\n                //\n                // though, most likely the reason for failure is that the receiver is already gone.\n                drop(\n                    writer\n                        .tx\n                        .blocking_send(Err(std::io::ErrorKind::BrokenPipe.into())),\n                );\n            }\n        }\n    });\n\n    Ok(response)\n}\n\n/// Generates CPU profiles.\npub async fn profile_cpu_handler(req: Request<Body>) -> Result<Response<Body>, ApiError> {\n    enum Format {\n        Pprof,\n        Svg,\n    }\n\n    // Parameters.\n    let format = match get_query_param(&req, \"format\")?.as_deref() {\n        None => Format::Pprof,\n        Some(\"pprof\") => Format::Pprof,\n        Some(\"svg\") => Format::Svg,\n        Some(format) => return Err(ApiError::BadRequest(anyhow!(\"invalid format {format}\"))),\n    };\n    let seconds = match parse_query_param(&req, \"seconds\")? {\n        None => 5,\n        Some(seconds @ 1..=60) => seconds,\n        Some(_) => return Err(ApiError::BadRequest(anyhow!(\"duration must be 1-60 secs\"))),\n    };\n    let frequency_hz = match parse_query_param(&req, \"frequency\")? {\n        None => 99,\n        Some(1001..) => return Err(ApiError::BadRequest(anyhow!(\"frequency must be <=1000 Hz\"))),\n        Some(frequency) => frequency,\n    };\n    let force: bool = parse_query_param(&req, \"force\")?.unwrap_or_default();\n\n    // Take the profile.\n    static PROFILE_LOCK: Lazy<Mutex<()>> = Lazy::new(|| Mutex::new(()));\n    static PROFILE_CANCEL: Lazy<Notify> = Lazy::new(Notify::new);\n\n    let report = {\n        // Only allow one profiler at a time. If force is true, cancel a running profile (e.g. a\n        // Grafana continuous profile). We use a try_lock() loop when cancelling instead of waiting\n        // for a lock(), to avoid races where the notify isn't currently awaited.\n        let _lock = loop {\n            match PROFILE_LOCK.try_lock() {\n                Ok(lock) => break lock,\n                Err(_) if force => PROFILE_CANCEL.notify_waiters(),\n                Err(_) => {\n                    return Err(ApiError::Conflict(\n                        \"profiler already running (use ?force=true to cancel it)\".into(),\n                    ));\n                }\n            }\n            tokio::time::sleep(Duration::from_millis(1)).await; // don't busy-wait\n        };\n\n        let guard = ProfilerGuardBuilder::default()\n            .frequency(frequency_hz)\n            .blocklist(&[\"libc\", \"libgcc\", \"pthread\", \"vdso\"])\n            .build()\n            .map_err(|err| ApiError::InternalServerError(err.into()))?;\n\n        tokio::select! {\n            _ = tokio::time::sleep(Duration::from_secs(seconds)) => {},\n            _ = PROFILE_CANCEL.notified() => {},\n        };\n\n        guard\n            .report()\n            .build()\n            .map_err(|err| ApiError::InternalServerError(err.into()))?\n    };\n\n    // Return the report in the requested format.\n    match format {\n        Format::Pprof => {\n            let body = report\n                .pprof()\n                .map_err(|err| ApiError::InternalServerError(err.into()))?\n                .encode_to_vec();\n\n            Response::builder()\n                .status(200)\n                .header(CONTENT_TYPE, \"application/octet-stream\")\n                .header(CONTENT_DISPOSITION, \"attachment; filename=\\\"profile.pb\\\"\")\n                .body(Body::from(body))\n                .map_err(|err| ApiError::InternalServerError(err.into()))\n        }\n\n        Format::Svg => {\n            let mut body = Vec::new();\n            report\n                .flamegraph(&mut body)\n                .map_err(|err| ApiError::InternalServerError(err.into()))?;\n            Response::builder()\n                .status(200)\n                .header(CONTENT_TYPE, \"image/svg+xml\")\n                .body(Body::from(body))\n                .map_err(|err| ApiError::InternalServerError(err.into()))\n        }\n    }\n}\n\n/// Generates heap profiles.\n///\n/// This only works with jemalloc on Linux.\npub async fn profile_heap_handler(req: Request<Body>) -> Result<Response<Body>, ApiError> {\n    enum Format {\n        Jemalloc,\n        Pprof,\n        Svg,\n    }\n\n    // Parameters.\n    let format = match get_query_param(&req, \"format\")?.as_deref() {\n        None => Format::Pprof,\n        Some(\"jemalloc\") => Format::Jemalloc,\n        Some(\"pprof\") => Format::Pprof,\n        Some(\"svg\") => Format::Svg,\n        Some(format) => return Err(ApiError::BadRequest(anyhow!(\"invalid format {format}\"))),\n    };\n\n    // Obtain profiler handle.\n    let mut prof_ctl = jemalloc_pprof::PROF_CTL\n        .as_ref()\n        .ok_or(ApiError::InternalServerError(anyhow!(\n            \"heap profiling not enabled\"\n        )))?\n        .lock()\n        .await;\n    if !prof_ctl.activated() {\n        return Err(ApiError::InternalServerError(anyhow!(\n            \"heap profiling not enabled\"\n        )));\n    }\n\n    // Take and return the profile.\n    match format {\n        Format::Jemalloc => {\n            // NB: file is an open handle to a tempfile that's already deleted.\n            let file = tokio::task::spawn_blocking(move || prof_ctl.dump())\n                .await\n                .map_err(|join_err| ApiError::InternalServerError(join_err.into()))?\n                .map_err(ApiError::InternalServerError)?;\n            let stream = ReaderStream::new(tokio::fs::File::from_std(file));\n            Response::builder()\n                .status(200)\n                .header(CONTENT_TYPE, \"application/octet-stream\")\n                .header(CONTENT_DISPOSITION, \"attachment; filename=\\\"heap.dump\\\"\")\n                .body(Body::wrap_stream(stream))\n                .map_err(|err| ApiError::InternalServerError(err.into()))\n        }\n\n        Format::Pprof => {\n            let data = tokio::task::spawn_blocking(move || prof_ctl.dump_pprof())\n                .await\n                .map_err(|join_err| ApiError::InternalServerError(join_err.into()))?\n                .map_err(ApiError::InternalServerError)?;\n            Response::builder()\n                .status(200)\n                .header(CONTENT_TYPE, \"application/octet-stream\")\n                .header(CONTENT_DISPOSITION, \"attachment; filename=\\\"heap.pb.gz\\\"\")\n                .body(Body::from(data))\n                .map_err(|err| ApiError::InternalServerError(err.into()))\n        }\n\n        Format::Svg => {\n            let svg = tokio::task::spawn_blocking(move || prof_ctl.dump_flamegraph())\n                .await\n                .map_err(|join_err| ApiError::InternalServerError(join_err.into()))?\n                .map_err(ApiError::InternalServerError)?;\n            Response::builder()\n                .status(200)\n                .header(CONTENT_TYPE, \"image/svg+xml\")\n                .body(Body::from(svg))\n                .map_err(|err| ApiError::InternalServerError(err.into()))\n        }\n    }\n}\n\npub fn add_request_id_middleware<B: hyper::body::HttpBody + Send + Sync + 'static>()\n-> Middleware<B, ApiError> {\n    Middleware::pre(move |req| async move {\n        let request_id = match req.headers().get(&X_REQUEST_ID_HEADER) {\n            Some(request_id) => request_id\n                .to_str()\n                .expect(\"extract request id value\")\n                .to_owned(),\n            None => {\n                let request_id = uuid::Uuid::new_v4();\n                request_id.to_string()\n            }\n        };\n        req.set_context(RequestId(request_id));\n\n        Ok(req)\n    })\n}\n\nasync fn add_request_id_header_to_response(\n    mut res: Response<Body>,\n    req_info: RequestInfo,\n) -> Result<Response<Body>, ApiError> {\n    if let Some(request_id) = req_info.context::<RequestId>()\n        && let Ok(request_header_value) = HeaderValue::from_str(&request_id.0)\n    {\n        res.headers_mut()\n            .insert(&X_REQUEST_ID_HEADER, request_header_value);\n    };\n\n    Ok(res)\n}\n\npub fn make_router() -> RouterBuilder<hyper::Body, ApiError> {\n    Router::builder()\n        .middleware(add_request_id_middleware())\n        .middleware(Middleware::post_with_info(\n            add_request_id_header_to_response,\n        ))\n        .err_handler(route_error_handler)\n}\n\npub fn attach_openapi_ui(\n    router_builder: RouterBuilder<hyper::Body, ApiError>,\n    spec: &'static [u8],\n    spec_mount_path: &'static str,\n    ui_mount_path: &'static str,\n) -> RouterBuilder<hyper::Body, ApiError> {\n    router_builder\n        .get(spec_mount_path,\n            move |r| request_span(r, move |_| async move {\n                Ok(Response::builder().body(Body::from(spec)).unwrap())\n            })\n        )\n        .get(ui_mount_path,\n             move |r| request_span(r, move |_| async move {\n                 Ok(Response::builder().body(Body::from(format!(r#\"\n                <!DOCTYPE html>\n                <html lang=\"en\">\n                <head>\n                <title>rweb</title>\n                <link href=\"https://cdn.jsdelivr.net/npm/swagger-ui-dist@3/swagger-ui.css\" rel=\"stylesheet\">\n                </head>\n                <body>\n                    <div id=\"swagger-ui\"></div>\n                    <script src=\"https://cdn.jsdelivr.net/npm/swagger-ui-dist@3/swagger-ui-bundle.js\" charset=\"UTF-8\"> </script>\n                    <script>\n                        window.onload = function() {{\n                        const ui = SwaggerUIBundle({{\n                            \"dom_id\": \"\\#swagger-ui\",\n                            presets: [\n                            SwaggerUIBundle.presets.apis,\n                            SwaggerUIBundle.SwaggerUIStandalonePreset\n                            ],\n                            layout: \"BaseLayout\",\n                            deepLinking: true,\n                            showExtensions: true,\n                            showCommonExtensions: true,\n                            url: \"{spec_mount_path}\",\n                        }})\n                        window.ui = ui;\n                    }};\n                </script>\n                </body>\n                </html>\n            \"#))).unwrap())\n             })\n        )\n}\n\nfn parse_token(header_value: &str) -> Result<&str, ApiError> {\n    // header must be in form Bearer <token>\n    let (prefix, token) = header_value\n        .split_once(' ')\n        .ok_or_else(|| ApiError::Unauthorized(\"malformed authorization header\".to_string()))?;\n    if prefix != \"Bearer\" {\n        return Err(ApiError::Unauthorized(\n            \"malformed authorization header\".to_string(),\n        ));\n    }\n    Ok(token)\n}\n\npub fn auth_middleware<B: hyper::body::HttpBody + Send + Sync + 'static>(\n    provide_auth: fn(&Request<Body>) -> Option<&SwappableJwtAuth>,\n) -> Middleware<B, ApiError> {\n    Middleware::pre(move |req| async move {\n        if let Some(auth) = provide_auth(&req) {\n            match req.headers().get(AUTHORIZATION) {\n                Some(value) => {\n                    let header_value = value.to_str().map_err(|_| {\n                        ApiError::Unauthorized(\"malformed authorization header\".to_string())\n                    })?;\n                    let token = parse_token(header_value)?;\n\n                    let data: TokenData<Claims> = auth.decode(token).map_err(|err| {\n                        warn!(\"Authentication error: {err}\");\n                        // Rely on From<AuthError> for ApiError impl\n                        err\n                    })?;\n                    req.set_context(data.claims);\n                }\n                None => {\n                    return Err(ApiError::Unauthorized(\n                        \"missing authorization header\".to_string(),\n                    ));\n                }\n            }\n        }\n        Ok(req)\n    })\n}\n\npub fn add_response_header_middleware<B>(\n    header: &str,\n    value: &str,\n) -> anyhow::Result<Middleware<B, ApiError>>\nwhere\n    B: hyper::body::HttpBody + Send + Sync + 'static,\n{\n    let name =\n        HeaderName::from_str(header).with_context(|| format!(\"invalid header name: {header}\"))?;\n    let value =\n        HeaderValue::from_str(value).with_context(|| format!(\"invalid header value: {value}\"))?;\n    Ok(Middleware::post_with_info(\n        move |mut response, request_info| {\n            let name = name.clone();\n            let value = value.clone();\n            async move {\n                let headers = response.headers_mut();\n                if headers.contains_key(&name) {\n                    warn!(\n                        \"{} response already contains header {:?}\",\n                        request_info.uri(),\n                        &name,\n                    );\n                } else {\n                    headers.insert(name, value);\n                }\n                Ok(response)\n            }\n        },\n    ))\n}\n\npub fn check_permission_with(\n    req: &Request<Body>,\n    check_permission: impl Fn(&Claims) -> Result<(), AuthError>,\n) -> Result<(), ApiError> {\n    match req.context::<Claims>() {\n        Some(claims) => Ok(check_permission(&claims)\n            .map_err(|_err| ApiError::Forbidden(\"JWT authentication error\".to_string()))?),\n        None => Ok(()), // claims is None because auth is disabled\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use std::future::poll_fn;\n    use std::net::{IpAddr, SocketAddr};\n\n    use hyper::service::Service;\n    use routerify::RequestServiceBuilder;\n\n    use super::*;\n\n    #[tokio::test]\n    async fn test_request_id_returned() {\n        let builder = RequestServiceBuilder::new(make_router().build().unwrap()).unwrap();\n        let remote_addr = SocketAddr::new(IpAddr::from_str(\"127.0.0.1\").unwrap(), 80);\n        let mut service = builder.build(remote_addr);\n        if let Err(e) = poll_fn(|ctx| service.poll_ready(ctx)).await {\n            panic!(\"request service is not ready: {e:?}\");\n        }\n\n        let mut req: Request<Body> = Request::default();\n        req.headers_mut()\n            .append(&X_REQUEST_ID_HEADER, HeaderValue::from_str(\"42\").unwrap());\n\n        let resp: Response<hyper::body::Body> = service.call(req).await.unwrap();\n\n        let header_val = resp.headers().get(&X_REQUEST_ID_HEADER).unwrap();\n\n        assert!(header_val == \"42\", \"response header mismatch\");\n    }\n\n    #[tokio::test]\n    async fn test_request_id_empty() {\n        let builder = RequestServiceBuilder::new(make_router().build().unwrap()).unwrap();\n        let remote_addr = SocketAddr::new(IpAddr::from_str(\"127.0.0.1\").unwrap(), 80);\n        let mut service = builder.build(remote_addr);\n        if let Err(e) = poll_fn(|ctx| service.poll_ready(ctx)).await {\n            panic!(\"request service is not ready: {e:?}\");\n        }\n\n        let req: Request<Body> = Request::default();\n        let resp: Response<hyper::body::Body> = service.call(req).await.unwrap();\n\n        let header_val = resp.headers().get(&X_REQUEST_ID_HEADER);\n\n        assert_ne!(header_val, None, \"response header should NOT be empty\");\n    }\n}\n"
  },
  {
    "path": "libs/http-utils/src/error.rs",
    "content": "use std::borrow::Cow;\nuse std::error::Error as StdError;\n\nuse hyper::{Body, Response, StatusCode, header};\nuse serde::{Deserialize, Serialize};\nuse thiserror::Error;\nuse tracing::{error, info, warn};\nuse utils::auth::AuthError;\n\n#[derive(Debug, Error)]\npub enum ApiError {\n    #[error(\"Bad request: {0:#?}\")]\n    BadRequest(anyhow::Error),\n\n    #[error(\"Forbidden: {0}\")]\n    Forbidden(String),\n\n    #[error(\"Unauthorized: {0}\")]\n    Unauthorized(String),\n\n    #[error(\"NotFound: {0}\")]\n    NotFound(Box<dyn StdError + Send + Sync + 'static>),\n\n    #[error(\"Conflict: {0}\")]\n    Conflict(String),\n\n    #[error(\"Precondition failed: {0}\")]\n    PreconditionFailed(Box<str>),\n\n    #[error(\"Resource temporarily unavailable: {0}\")]\n    ResourceUnavailable(Cow<'static, str>),\n\n    #[error(\"Too many requests: {0}\")]\n    TooManyRequests(Cow<'static, str>),\n\n    #[error(\"Shutting down\")]\n    ShuttingDown,\n\n    #[error(\"Timeout\")]\n    Timeout(Cow<'static, str>),\n\n    #[error(\"Request cancelled\")]\n    Cancelled,\n\n    #[error(transparent)]\n    InternalServerError(anyhow::Error),\n}\n\nimpl ApiError {\n    pub fn into_response(self) -> Response<Body> {\n        match self {\n            ApiError::BadRequest(err) => HttpErrorBody::response_from_msg_and_status(\n                format!(\"{err:#?}\"), // use debug printing so that we give the cause\n                StatusCode::BAD_REQUEST,\n            ),\n            ApiError::Forbidden(_) => {\n                HttpErrorBody::response_from_msg_and_status(self.to_string(), StatusCode::FORBIDDEN)\n            }\n            ApiError::Unauthorized(_) => HttpErrorBody::response_from_msg_and_status(\n                self.to_string(),\n                StatusCode::UNAUTHORIZED,\n            ),\n            ApiError::NotFound(_) => {\n                HttpErrorBody::response_from_msg_and_status(self.to_string(), StatusCode::NOT_FOUND)\n            }\n            ApiError::Conflict(_) => {\n                HttpErrorBody::response_from_msg_and_status(self.to_string(), StatusCode::CONFLICT)\n            }\n            ApiError::PreconditionFailed(_) => HttpErrorBody::response_from_msg_and_status(\n                self.to_string(),\n                StatusCode::PRECONDITION_FAILED,\n            ),\n            ApiError::ShuttingDown => HttpErrorBody::response_from_msg_and_status(\n                \"Shutting down\".to_string(),\n                StatusCode::SERVICE_UNAVAILABLE,\n            ),\n            ApiError::ResourceUnavailable(err) => HttpErrorBody::response_from_msg_and_status(\n                err.to_string(),\n                StatusCode::SERVICE_UNAVAILABLE,\n            ),\n            ApiError::TooManyRequests(err) => HttpErrorBody::response_from_msg_and_status(\n                err.to_string(),\n                StatusCode::TOO_MANY_REQUESTS,\n            ),\n            ApiError::Timeout(err) => HttpErrorBody::response_from_msg_and_status(\n                err.to_string(),\n                StatusCode::REQUEST_TIMEOUT,\n            ),\n            ApiError::Cancelled => HttpErrorBody::response_from_msg_and_status(\n                self.to_string(),\n                StatusCode::INTERNAL_SERVER_ERROR,\n            ),\n            ApiError::InternalServerError(err) => HttpErrorBody::response_from_msg_and_status(\n                format!(\"{err:#}\"), // use alternative formatting so that we give the cause without backtrace\n                StatusCode::INTERNAL_SERVER_ERROR,\n            ),\n        }\n    }\n}\n\nimpl From<AuthError> for ApiError {\n    fn from(_value: AuthError) -> Self {\n        // Don't pass on the value of the AuthError as a precautionary measure.\n        // Being intentionally vague in public error communication hurts debugability\n        // but it is more secure.\n        ApiError::Forbidden(\"JWT authentication error\".to_string())\n    }\n}\n\n#[derive(Serialize, Deserialize)]\npub struct HttpErrorBody {\n    pub msg: String,\n}\n\nimpl HttpErrorBody {\n    pub fn from_msg(msg: String) -> Self {\n        HttpErrorBody { msg }\n    }\n\n    pub fn response_from_msg_and_status(msg: String, status: StatusCode) -> Response<Body> {\n        HttpErrorBody { msg }.to_response(status)\n    }\n\n    pub fn to_response(&self, status: StatusCode) -> Response<Body> {\n        Response::builder()\n            .status(status)\n            .header(header::CONTENT_TYPE, \"application/json\")\n            // we do not have nested maps with non string keys so serialization shouldn't fail\n            .body(Body::from(serde_json::to_string(self).unwrap()))\n            .unwrap()\n    }\n}\n\npub async fn route_error_handler(err: routerify::RouteError) -> Response<Body> {\n    match err.downcast::<ApiError>() {\n        Ok(api_error) => api_error_handler(*api_error),\n        Err(other_error) => {\n            // We expect all the request handlers to return an ApiError, so this should\n            // not be reached. But just in case.\n            error!(\"Error processing HTTP request: {other_error:?}\");\n            HttpErrorBody::response_from_msg_and_status(\n                other_error.to_string(),\n                StatusCode::INTERNAL_SERVER_ERROR,\n            )\n        }\n    }\n}\n\npub fn api_error_handler(api_error: ApiError) -> Response<Body> {\n    // Print a stack trace for Internal Server errors\n\n    match api_error {\n        ApiError::Forbidden(_) | ApiError::Unauthorized(_) => {\n            warn!(\"Error processing HTTP request: {api_error:#}\")\n        }\n        ApiError::ResourceUnavailable(_) => info!(\"Error processing HTTP request: {api_error:#}\"),\n        ApiError::NotFound(_) => info!(\"Error processing HTTP request: {api_error:#}\"),\n        ApiError::InternalServerError(_) => error!(\"Error processing HTTP request: {api_error:?}\"),\n        ApiError::ShuttingDown => info!(\"Shut down while processing HTTP request\"),\n        ApiError::Timeout(_) => info!(\"Timeout while processing HTTP request: {api_error:#}\"),\n        ApiError::Cancelled => info!(\"Request cancelled while processing HTTP request\"),\n        _ => info!(\"Error processing HTTP request: {api_error:#}\"),\n    }\n\n    api_error.into_response()\n}\n"
  },
  {
    "path": "libs/http-utils/src/failpoints.rs",
    "content": "use hyper::{Body, Request, Response, StatusCode};\nuse serde::{Deserialize, Serialize};\nuse tokio_util::sync::CancellationToken;\nuse utils::failpoint_support::apply_failpoint;\n\nuse crate::error::ApiError;\nuse crate::json::{json_request, json_response};\n\npub type ConfigureFailpointsRequest = Vec<FailpointConfig>;\n\n/// Information for configuring a single fail point\n#[derive(Debug, Serialize, Deserialize)]\npub struct FailpointConfig {\n    /// Name of the fail point\n    pub name: String,\n    /// List of actions to take, using the format described in `fail::cfg`\n    ///\n    /// We also support `actions = \"exit\"` to cause the fail point to immediately exit.\n    pub actions: String,\n}\n\n/// Configure failpoints through http.\npub async fn failpoints_handler(\n    mut request: Request<Body>,\n    _cancel: CancellationToken,\n) -> Result<Response<Body>, ApiError> {\n    if !fail::has_failpoints() {\n        return Err(ApiError::BadRequest(anyhow::anyhow!(\n            \"Cannot manage failpoints because neon was compiled without failpoints support\"\n        )));\n    }\n\n    let failpoints: ConfigureFailpointsRequest = json_request(&mut request).await?;\n    for fp in failpoints {\n        tracing::info!(\"cfg failpoint: {} {}\", fp.name, fp.actions);\n\n        // We recognize one extra \"action\" that's not natively recognized\n        // by the failpoints crate: exit, to immediately kill the process\n        let cfg_result = apply_failpoint(&fp.name, &fp.actions);\n\n        if let Err(err_msg) = cfg_result {\n            return Err(ApiError::BadRequest(anyhow::anyhow!(\n                \"Failed to configure failpoints: {err_msg}\"\n            )));\n        }\n    }\n\n    json_response(StatusCode::OK, ())\n}\n"
  },
  {
    "path": "libs/http-utils/src/json.rs",
    "content": "use anyhow::Context;\nuse bytes::Buf;\nuse hyper::{Body, Request, Response, StatusCode, header};\nuse serde::{Deserialize, Serialize};\n\nuse super::error::ApiError;\n\n/// Parse a json request body and deserialize it to the type `T`.\npub async fn json_request<T: for<'de> Deserialize<'de>>(\n    request: &mut Request<Body>,\n) -> Result<T, ApiError> {\n    let body = hyper::body::aggregate(request.body_mut())\n        .await\n        .context(\"Failed to read request body\")\n        .map_err(ApiError::BadRequest)?;\n\n    if body.remaining() == 0 {\n        return Err(ApiError::BadRequest(anyhow::anyhow!(\n            \"missing request body\"\n        )));\n    }\n\n    let mut deser = serde_json::de::Deserializer::from_reader(body.reader());\n\n    serde_path_to_error::deserialize(&mut deser)\n        // intentionally stringify because the debug version is not helpful in python logs\n        .map_err(|e| anyhow::anyhow!(\"Failed to parse json request: {e}\"))\n        .map_err(ApiError::BadRequest)\n}\n\n/// Parse a json request body and deserialize it to the type `T`. If the body is empty, return `T::default`.\npub async fn json_request_maybe<T: for<'de> Deserialize<'de> + Default>(\n    request: &mut Request<Body>,\n) -> Result<T, ApiError> {\n    let body = hyper::body::aggregate(request.body_mut())\n        .await\n        .context(\"Failed to read request body\")\n        .map_err(ApiError::BadRequest)?;\n\n    if body.remaining() == 0 {\n        return Ok(T::default());\n    }\n\n    let mut deser = serde_json::de::Deserializer::from_reader(body.reader());\n\n    serde_path_to_error::deserialize(&mut deser)\n        // intentionally stringify because the debug version is not helpful in python logs\n        .map_err(|e| anyhow::anyhow!(\"Failed to parse json request: {e}\"))\n        .map_err(ApiError::BadRequest)\n}\n\npub fn json_response<T: Serialize>(\n    status: StatusCode,\n    data: T,\n) -> Result<Response<Body>, ApiError> {\n    let json = serde_json::to_string(&data)\n        .context(\"Failed to serialize JSON response\")\n        .map_err(ApiError::InternalServerError)?;\n    let response = Response::builder()\n        .status(status)\n        .header(header::CONTENT_TYPE, \"application/json\")\n        .body(Body::from(json))\n        .map_err(|e| ApiError::InternalServerError(e.into()))?;\n    Ok(response)\n}\n"
  },
  {
    "path": "libs/http-utils/src/lib.rs",
    "content": "pub mod endpoint;\npub mod error;\npub mod failpoints;\npub mod json;\npub mod request;\npub mod server;\npub mod tls_certs;\n\nextern crate hyper0 as hyper;\n\n/// Current fast way to apply simple http routing in various Neon binaries.\n/// Re-exported for sake of uniform approach, that could be later replaced with better alternatives, if needed.\npub use routerify::{RequestServiceBuilder, RouterBuilder, RouterService, ext::RequestExt};\n"
  },
  {
    "path": "libs/http-utils/src/request.rs",
    "content": "use core::fmt;\nuse std::borrow::Cow;\nuse std::str::FromStr;\n\nuse anyhow::anyhow;\nuse hyper::body::HttpBody;\nuse hyper::{Body, Request};\nuse routerify::ext::RequestExt;\n\nuse super::error::ApiError;\n\npub fn get_request_param<'a>(\n    request: &'a Request<Body>,\n    param_name: &str,\n) -> Result<&'a str, ApiError> {\n    match request.param(param_name) {\n        Some(arg) => Ok(arg),\n        None => Err(ApiError::BadRequest(anyhow!(\n            \"no {param_name} specified in path param\",\n        ))),\n    }\n}\n\npub fn parse_request_param<T: FromStr>(\n    request: &Request<Body>,\n    param_name: &str,\n) -> Result<T, ApiError> {\n    match get_request_param(request, param_name)?.parse() {\n        Ok(v) => Ok(v),\n        Err(_) => Err(ApiError::BadRequest(anyhow!(\n            \"failed to parse {param_name}\",\n        ))),\n    }\n}\n\npub fn get_query_param<'a>(\n    request: &'a Request<Body>,\n    param_name: &str,\n) -> Result<Option<Cow<'a, str>>, ApiError> {\n    let query = match request.uri().query() {\n        Some(q) => q,\n        None => return Ok(None),\n    };\n    let values = url::form_urlencoded::parse(query.as_bytes())\n        .filter_map(|(k, v)| if k == param_name { Some(v) } else { None })\n        // we call .next() twice below. If it's None the first time, .fuse() ensures it's None afterwards\n        .fuse();\n\n    // Work around an issue with Alloy's pyroscope scrape where the \"seconds\"\n    // parameter is added several times. https://github.com/grafana/alloy/issues/3026\n    // TODO: revert after Alloy is fixed.\n    let value1 = values\n        .map(Ok)\n        .reduce(|acc, i| {\n            match acc {\n                Err(_) => acc,\n\n                // It's okay to have duplicates as along as they have the same value.\n                Ok(ref a) if a == &i.unwrap() => acc,\n\n                _ => Err(ApiError::BadRequest(anyhow!(\n                    \"param {param_name} specified more than once\"\n                ))),\n            }\n        })\n        .transpose()?;\n    // if values.next().is_some() {\n    //     return Err(ApiError::BadRequest(anyhow!(\n    //         \"param {param_name} specified more than once\"\n    //     )));\n    // }\n\n    Ok(value1)\n}\n\npub fn must_get_query_param<'a>(\n    request: &'a Request<Body>,\n    param_name: &str,\n) -> Result<Cow<'a, str>, ApiError> {\n    get_query_param(request, param_name)?.ok_or_else(|| {\n        ApiError::BadRequest(anyhow!(\"no {param_name} specified in query parameters\"))\n    })\n}\n\npub fn parse_query_param<E: fmt::Display, T: FromStr<Err = E>>(\n    request: &Request<Body>,\n    param_name: &str,\n) -> Result<Option<T>, ApiError> {\n    get_query_param(request, param_name)?\n        .map(|v| {\n            v.parse().map_err(|e| {\n                ApiError::BadRequest(anyhow!(\"cannot parse query param {param_name}: {e}\"))\n            })\n        })\n        .transpose()\n}\n\npub fn must_parse_query_param<E: fmt::Display, T: FromStr<Err = E>>(\n    request: &Request<Body>,\n    param_name: &str,\n) -> Result<T, ApiError> {\n    parse_query_param(request, param_name)?.ok_or_else(|| {\n        ApiError::BadRequest(anyhow!(\"no {param_name} specified in query parameters\"))\n    })\n}\n\npub async fn ensure_no_body(request: &mut Request<Body>) -> Result<(), ApiError> {\n    match request.body_mut().data().await {\n        Some(_) => Err(ApiError::BadRequest(anyhow!(\"Unexpected request body\"))),\n        None => Ok(()),\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use super::*;\n\n    #[test]\n    fn test_get_query_param_duplicate() {\n        let req = Request::builder()\n            .uri(\"http://localhost:12345/testuri?testparam=1\")\n            .body(hyper::Body::empty())\n            .unwrap();\n        let value = get_query_param(&req, \"testparam\").unwrap();\n        assert_eq!(value.unwrap(), \"1\");\n\n        let req = Request::builder()\n            .uri(\"http://localhost:12345/testuri?testparam=1&testparam=1\")\n            .body(hyper::Body::empty())\n            .unwrap();\n        let value = get_query_param(&req, \"testparam\").unwrap();\n        assert_eq!(value.unwrap(), \"1\");\n\n        let req = Request::builder()\n            .uri(\"http://localhost:12345/testuri\")\n            .body(hyper::Body::empty())\n            .unwrap();\n        let value = get_query_param(&req, \"testparam\").unwrap();\n        assert!(value.is_none());\n\n        let req = Request::builder()\n            .uri(\"http://localhost:12345/testuri?testparam=1&testparam=2&testparam=3\")\n            .body(hyper::Body::empty())\n            .unwrap();\n        let value = get_query_param(&req, \"testparam\");\n        assert!(value.is_err());\n    }\n}\n"
  },
  {
    "path": "libs/http-utils/src/server.rs",
    "content": "use std::{error::Error, sync::Arc};\n\nuse futures::StreamExt;\nuse futures::stream::FuturesUnordered;\nuse hyper0::Body;\nuse hyper0::server::conn::Http;\nuse metrics::{IntCounterVec, register_int_counter_vec};\nuse once_cell::sync::Lazy;\nuse routerify::{RequestService, RequestServiceBuilder};\nuse tokio::io::{AsyncRead, AsyncWrite};\nuse tokio_rustls::TlsAcceptor;\nuse tokio_util::sync::CancellationToken;\nuse tracing::{error, info};\n\nuse crate::error::ApiError;\n\n/// A simple HTTP server over hyper library.\n/// You may want to use it instead of [`hyper0::server::Server`] because:\n/// 1. hyper0's Server was removed from hyper v1.\n///    It's recommended to replace hyepr0's Server with a manual loop, which is done here.\n/// 2. hyper0's Server doesn't support TLS out of the box, and there is no way\n///    to support it efficiently with the Accept trait that hyper0's Server uses.\n///    That's one of the reasons why it was removed from v1.\n///    <https://github.com/hyperium/hyper/blob/115339d3df50f20c8717680aa35f48858e9a6205/docs/ROADMAP.md#higher-level-client-and-server-problems>\npub struct Server {\n    request_service: Arc<RequestServiceBuilder<Body, ApiError>>,\n    listener: tokio::net::TcpListener,\n    tls_acceptor: Option<TlsAcceptor>,\n}\n\nstatic CONNECTION_STARTED_COUNT: Lazy<IntCounterVec> = Lazy::new(|| {\n    register_int_counter_vec!(\n        \"http_server_connection_started_total\",\n        \"Number of established http/https connections\",\n        &[\"scheme\"]\n    )\n    .expect(\"failed to define a metric\")\n});\n\nstatic CONNECTION_ERROR_COUNT: Lazy<IntCounterVec> = Lazy::new(|| {\n    register_int_counter_vec!(\n        \"http_server_connection_errors_total\",\n        \"Number of occured connection errors by type\",\n        &[\"type\"]\n    )\n    .expect(\"failed to define a metric\")\n});\n\nimpl Server {\n    pub fn new(\n        request_service: Arc<RequestServiceBuilder<Body, ApiError>>,\n        listener: std::net::TcpListener,\n        tls_acceptor: Option<TlsAcceptor>,\n    ) -> anyhow::Result<Self> {\n        // Note: caller of from_std is responsible for setting nonblocking mode.\n        listener.set_nonblocking(true)?;\n        let listener = tokio::net::TcpListener::from_std(listener)?;\n\n        Ok(Self {\n            request_service,\n            listener,\n            tls_acceptor,\n        })\n    }\n\n    pub async fn serve(self, cancel: CancellationToken) -> anyhow::Result<()> {\n        fn suppress_io_error(err: &std::io::Error) -> bool {\n            use std::io::ErrorKind::*;\n            matches!(err.kind(), ConnectionReset | ConnectionAborted | BrokenPipe)\n        }\n        fn suppress_hyper_error(err: &hyper0::Error) -> bool {\n            if err.is_incomplete_message() || err.is_closed() || err.is_timeout() {\n                return true;\n            }\n            if let Some(inner) = err.source()\n                && let Some(io) = inner.downcast_ref::<std::io::Error>()\n            {\n                return suppress_io_error(io);\n            }\n            false\n        }\n\n        let tcp_error_cnt = CONNECTION_ERROR_COUNT.with_label_values(&[\"tcp\"]);\n        let tls_error_cnt = CONNECTION_ERROR_COUNT.with_label_values(&[\"tls\"]);\n        let http_error_cnt = CONNECTION_ERROR_COUNT.with_label_values(&[\"http\"]);\n        let https_error_cnt = CONNECTION_ERROR_COUNT.with_label_values(&[\"https\"]);\n        let panic_error_cnt = CONNECTION_ERROR_COUNT.with_label_values(&[\"panic\"]);\n\n        let http_connection_cnt = CONNECTION_STARTED_COUNT.with_label_values(&[\"http\"]);\n        let https_connection_cnt = CONNECTION_STARTED_COUNT.with_label_values(&[\"https\"]);\n\n        let mut connections = FuturesUnordered::new();\n        loop {\n            tokio::select! {\n                stream = self.listener.accept() => {\n                    let (tcp_stream, remote_addr) = match stream {\n                        Ok(stream) => stream,\n                        Err(err) => {\n                            tcp_error_cnt.inc();\n                            if !suppress_io_error(&err) {\n                                info!(\"Failed to accept TCP connection: {err:#}\");\n                            }\n                            continue;\n                        }\n                    };\n\n                    let service = self.request_service.build(remote_addr);\n                    let tls_acceptor = self.tls_acceptor.clone();\n                    let cancel = cancel.clone();\n\n                    let tls_error_cnt = tls_error_cnt.clone();\n                    let http_error_cnt = http_error_cnt.clone();\n                    let https_error_cnt = https_error_cnt.clone();\n                    let http_connection_cnt = http_connection_cnt.clone();\n                    let https_connection_cnt = https_connection_cnt.clone();\n\n                    connections.push(tokio::spawn(\n                        async move {\n                            match tls_acceptor {\n                                Some(tls_acceptor) => {\n                                    // Handle HTTPS connection.\n                                    https_connection_cnt.inc();\n                                    let tls_stream = tokio::select! {\n                                        tls_stream = tls_acceptor.accept(tcp_stream) => tls_stream,\n                                        _ = cancel.cancelled() => return,\n                                    };\n                                    let tls_stream = match tls_stream {\n                                        Ok(tls_stream) => tls_stream,\n                                        Err(err) => {\n                                            tls_error_cnt.inc();\n                                            if !suppress_io_error(&err) {\n                                                info!(%remote_addr, \"Failed to accept TLS connection: {err:#}\");\n                                            }\n                                            return;\n                                        }\n                                    };\n                                    if let Err(err) = Self::serve_connection(tls_stream, service, cancel).await {\n                                        https_error_cnt.inc();\n                                        if !suppress_hyper_error(&err) {\n                                            info!(%remote_addr, \"Failed to serve HTTPS connection: {err:#}\");\n                                        }\n                                    }\n                                }\n                                None => {\n                                    // Handle HTTP connection.\n                                    http_connection_cnt.inc();\n                                    if let Err(err) = Self::serve_connection(tcp_stream, service, cancel).await {\n                                        http_error_cnt.inc();\n                                        if !suppress_hyper_error(&err) {\n                                            info!(%remote_addr, \"Failed to serve HTTP connection: {err:#}\");\n                                        }\n                                    }\n                                }\n                            };\n                        }));\n                 }\n                Some(conn) = connections.next() => {\n                    if let Err(err) = conn {\n                        panic_error_cnt.inc();\n                        error!(\"Connection panicked: {err:#}\");\n                    }\n                }\n                _ = cancel.cancelled() => {\n                    // Wait for graceful shutdown of all connections.\n                    while let Some(conn) = connections.next().await {\n                        if let Err(err) = conn {\n                            panic_error_cnt.inc();\n                            error!(\"Connection panicked: {err:#}\");\n                        }\n                    }\n                    break;\n                }\n            }\n        }\n        Ok(())\n    }\n\n    /// Serves HTTP connection with graceful shutdown.\n    async fn serve_connection<I>(\n        io: I,\n        service: RequestService<Body, ApiError>,\n        cancel: CancellationToken,\n    ) -> Result<(), hyper0::Error>\n    where\n        I: AsyncRead + AsyncWrite + Unpin + Send + 'static,\n    {\n        let mut conn = Http::new().serve_connection(io, service).with_upgrades();\n\n        tokio::select! {\n            res = &mut conn => res,\n            _ = cancel.cancelled() => {\n                Pin::new(&mut conn).graceful_shutdown();\n                // Note: connection should still be awaited for graceful shutdown to complete.\n                conn.await\n            }\n        }\n    }\n}\n"
  },
  {
    "path": "libs/http-utils/src/tls_certs.rs",
    "content": "use std::{sync::Arc, time::Duration};\n\nuse anyhow::Context;\nuse arc_swap::ArcSwap;\nuse camino::Utf8Path;\nuse metrics::{IntCounterVec, UIntGaugeVec, register_int_counter_vec, register_uint_gauge_vec};\nuse once_cell::sync::Lazy;\nuse rustls::{\n    pki_types::{CertificateDer, PrivateKeyDer, UnixTime},\n    server::{ClientHello, ResolvesServerCert},\n    sign::CertifiedKey,\n};\nuse x509_cert::der::Reader;\n\npub async fn load_cert_chain(filename: &Utf8Path) -> anyhow::Result<Vec<CertificateDer<'static>>> {\n    let cert_data = tokio::fs::read(filename)\n        .await\n        .context(format!(\"failed reading certificate file {filename:?}\"))?;\n    let mut reader = std::io::Cursor::new(&cert_data);\n\n    let cert_chain = rustls_pemfile::certs(&mut reader)\n        .collect::<Result<Vec<_>, _>>()\n        .context(format!(\"failed parsing certificate from file {filename:?}\"))?;\n\n    Ok(cert_chain)\n}\n\npub async fn load_private_key(filename: &Utf8Path) -> anyhow::Result<PrivateKeyDer<'static>> {\n    let key_data = tokio::fs::read(filename)\n        .await\n        .context(format!(\"failed reading private key file {filename:?}\"))?;\n    let mut reader = std::io::Cursor::new(&key_data);\n\n    let key = rustls_pemfile::private_key(&mut reader)\n        .context(format!(\"failed parsing private key from file {filename:?}\"))?;\n\n    key.ok_or(anyhow::anyhow!(\n        \"no private key found in {}\",\n        filename.as_str(),\n    ))\n}\n\npub async fn load_certified_key(\n    key_filename: &Utf8Path,\n    cert_filename: &Utf8Path,\n) -> anyhow::Result<CertifiedKey> {\n    let cert_chain = load_cert_chain(cert_filename).await?;\n    let key = load_private_key(key_filename).await?;\n\n    let key = rustls::crypto::ring::default_provider()\n        .key_provider\n        .load_private_key(key)?;\n\n    let certified_key = CertifiedKey::new(cert_chain, key);\n    certified_key.keys_match()?;\n    Ok(certified_key)\n}\n\n/// rustls's CertifiedKey with extra parsed fields used for metrics.\nstruct ParsedCertifiedKey {\n    certified_key: CertifiedKey,\n    expiration_time: UnixTime,\n}\n\n/// Parse expiration time from an X509 certificate.\nfn parse_expiration_time(cert: &CertificateDer<'_>) -> anyhow::Result<UnixTime> {\n    let parsed_cert = x509_cert::der::SliceReader::new(cert)\n        .context(\"Failed to parse cerficiate\")?\n        .decode::<x509_cert::Certificate>()\n        .context(\"Failed to parse cerficiate\")?;\n\n    Ok(UnixTime::since_unix_epoch(\n        parsed_cert\n            .tbs_certificate\n            .validity\n            .not_after\n            .to_unix_duration(),\n    ))\n}\n\nasync fn load_and_parse_certified_key(\n    key_filename: &Utf8Path,\n    cert_filename: &Utf8Path,\n) -> anyhow::Result<ParsedCertifiedKey> {\n    let certified_key = load_certified_key(key_filename, cert_filename).await?;\n    let expiration_time = parse_expiration_time(certified_key.end_entity_cert()?)?;\n    Ok(ParsedCertifiedKey {\n        certified_key,\n        expiration_time,\n    })\n}\n\nstatic CERT_EXPIRATION_TIME: Lazy<UIntGaugeVec> = Lazy::new(|| {\n    register_uint_gauge_vec!(\n        \"tls_certs_expiration_time_seconds\",\n        \"Expiration time of the loaded certificate since unix epoch in seconds\",\n        &[\"resolver_name\"]\n    )\n    .expect(\"failed to define a metric\")\n});\n\nstatic CERT_RELOAD_STARTED_COUNTER: Lazy<IntCounterVec> = Lazy::new(|| {\n    register_int_counter_vec!(\n        \"tls_certs_reload_started_total\",\n        \"Number of certificate reload loop iterations started\",\n        &[\"resolver_name\"]\n    )\n    .expect(\"failed to define a metric\")\n});\n\nstatic CERT_RELOAD_UPDATED_COUNTER: Lazy<IntCounterVec> = Lazy::new(|| {\n    register_int_counter_vec!(\n        \"tls_certs_reload_updated_total\",\n        \"Number of times the certificate was updated to the new one\",\n        &[\"resolver_name\"]\n    )\n    .expect(\"failed to define a metric\")\n});\n\nstatic CERT_RELOAD_FAILED_COUNTER: Lazy<IntCounterVec> = Lazy::new(|| {\n    register_int_counter_vec!(\n        \"tls_certs_reload_failed_total\",\n        \"Number of times the certificate reload failed\",\n        &[\"resolver_name\"]\n    )\n    .expect(\"failed to define a metric\")\n});\n\n/// Implementation of [`rustls::server::ResolvesServerCert`] which reloads certificates from\n/// the disk periodically.\n#[derive(Debug)]\npub struct ReloadingCertificateResolver {\n    certified_key: ArcSwap<CertifiedKey>,\n}\n\nimpl ReloadingCertificateResolver {\n    /// Creates a new Resolver by loading certificate and private key from FS and\n    /// creating tokio::task to reload them with provided reload_period.\n    /// resolver_name is used as metric's label.\n    pub async fn new(\n        resolver_name: &str,\n        key_filename: &Utf8Path,\n        cert_filename: &Utf8Path,\n        reload_period: Duration,\n    ) -> anyhow::Result<Arc<Self>> {\n        // Create metrics for current resolver.\n        let cert_expiration_time = CERT_EXPIRATION_TIME.with_label_values(&[resolver_name]);\n        let cert_reload_started_counter =\n            CERT_RELOAD_STARTED_COUNTER.with_label_values(&[resolver_name]);\n        let cert_reload_updated_counter =\n            CERT_RELOAD_UPDATED_COUNTER.with_label_values(&[resolver_name]);\n        let cert_reload_failed_counter =\n            CERT_RELOAD_FAILED_COUNTER.with_label_values(&[resolver_name]);\n\n        let parsed_key = load_and_parse_certified_key(key_filename, cert_filename).await?;\n\n        let this = Arc::new(Self {\n            certified_key: ArcSwap::from_pointee(parsed_key.certified_key),\n        });\n        cert_expiration_time.set(parsed_key.expiration_time.as_secs());\n\n        tokio::spawn({\n            let weak_this = Arc::downgrade(&this);\n            let key_filename = key_filename.to_owned();\n            let cert_filename = cert_filename.to_owned();\n            async move {\n                let start = tokio::time::Instant::now() + reload_period;\n                let mut interval = tokio::time::interval_at(start, reload_period);\n                let mut last_reload_failed = false;\n                loop {\n                    interval.tick().await;\n                    let this = match weak_this.upgrade() {\n                        Some(this) => this,\n                        None => break, // Resolver has been destroyed, exit.\n                    };\n                    cert_reload_started_counter.inc();\n\n                    match load_and_parse_certified_key(&key_filename, &cert_filename).await {\n                        Ok(parsed_key) => {\n                            if parsed_key.certified_key.cert == this.certified_key.load().cert {\n                                tracing::debug!(\"Certificate has not changed since last reloading\");\n                            } else {\n                                tracing::info!(\"Certificate has been reloaded\");\n                                this.certified_key.store(Arc::new(parsed_key.certified_key));\n                                cert_expiration_time.set(parsed_key.expiration_time.as_secs());\n                                cert_reload_updated_counter.inc();\n                            }\n                            last_reload_failed = false;\n                        }\n                        Err(err) => {\n                            cert_reload_failed_counter.inc();\n                            // Note: Reloading certs may fail if it conflicts with the script updating\n                            // the files at the same time. Warn only if the error is persistent.\n                            if last_reload_failed {\n                                tracing::warn!(\"Error reloading certificate: {err:#}\");\n                            } else {\n                                tracing::info!(\"Error reloading certificate: {err:#}\");\n                            }\n                            last_reload_failed = true;\n                        }\n                    }\n                }\n            }\n        });\n\n        Ok(this)\n    }\n}\n\nimpl ResolvesServerCert for ReloadingCertificateResolver {\n    fn resolve(&self, _client_hello: ClientHello<'_>) -> Option<Arc<CertifiedKey>> {\n        Some(self.certified_key.load_full())\n    }\n}\n"
  },
  {
    "path": "libs/metrics/Cargo.toml",
    "content": "[package]\nname = \"metrics\"\nversion = \"0.1.0\"\nedition.workspace = true\nlicense.workspace = true\n\n[dependencies]\nprometheus.workspace = true\nlibc.workspace = true\nonce_cell.workspace = true\nchrono.workspace = true\ntwox-hash.workspace = true\nmeasured.workspace = true\n\n[target.'cfg(target_os = \"linux\")'.dependencies]\nprocfs.workspace = true\nmeasured-process.workspace = true\n\n[dev-dependencies]\nrand.workspace = true\nrand_distr = \"0.5\"\n"
  },
  {
    "path": "libs/metrics/src/hll.rs",
    "content": "//! HyperLogLog is an algorithm for the count-distinct problem,\n//! approximating the number of distinct elements in a multiset.\n//! Calculating the exact cardinality of the distinct elements\n//! of a multiset requires an amount of memory proportional to\n//! the cardinality, which is impractical for very large data sets.\n//! Probabilistic cardinality estimators, such as the HyperLogLog algorithm,\n//! use significantly less memory than this, but can only approximate the cardinality.\n\nuse std::hash::{BuildHasher, BuildHasherDefault, Hash};\nuse std::sync::atomic::AtomicU8;\n\nuse measured::LabelGroup;\nuse measured::label::{LabelGroupVisitor, LabelName, LabelValue, LabelVisitor};\nuse measured::metric::counter::CounterState;\nuse measured::metric::name::MetricNameEncoder;\nuse measured::metric::{Metric, MetricType, MetricVec};\nuse measured::text::TextEncoder;\nuse twox_hash::xxh3;\n\n/// Create an [`HyperLogLogVec`] and registers to default registry.\n#[macro_export(local_inner_macros)]\nmacro_rules! register_hll_vec {\n    ($N:literal, $OPTS:expr, $LABELS_NAMES:expr $(,)?) => {{\n        let hll_vec = $crate::HyperLogLogVec::<$N>::new($OPTS, $LABELS_NAMES).unwrap();\n        $crate::register(Box::new(hll_vec.clone())).map(|_| hll_vec)\n    }};\n\n    ($N:literal, $NAME:expr, $HELP:expr, $LABELS_NAMES:expr $(,)?) => {{ $crate::register_hll_vec!($N, $crate::opts!($NAME, $HELP), $LABELS_NAMES) }};\n}\n\n/// Create an [`HyperLogLog`] and registers to default registry.\n#[macro_export(local_inner_macros)]\nmacro_rules! register_hll {\n    ($N:literal, $OPTS:expr $(,)?) => {{\n        let hll = $crate::HyperLogLog::<$N>::with_opts($OPTS).unwrap();\n        $crate::register(Box::new(hll.clone())).map(|_| hll)\n    }};\n\n    ($N:literal, $NAME:expr, $HELP:expr $(,)?) => {{ $crate::register_hll!($N, $crate::opts!($NAME, $HELP)) }};\n}\n\n/// HLL is a probabilistic cardinality measure.\n///\n/// How to use this time-series for a metric name `my_metrics_total_hll`:\n///\n/// ```promql\n/// # harmonic mean\n/// 1 / (\n///     sum (\n///         2 ^ -(\n///             # HLL merge operation\n///             max (my_metrics_total_hll{}) by (hll_shard, other_labels...)\n///         )\n///     ) without (hll_shard)\n/// )\n/// * alpha\n/// * shards_count\n/// * shards_count\n/// ```\n///\n/// If you want an estimate over time, you can use the following query:\n///\n/// ```promql\n/// # harmonic mean\n/// 1 / (\n///     sum (\n///         2 ^ -(\n///             # HLL merge operation\n///             max (\n///                 max_over_time(my_metrics_total_hll{}[$__rate_interval])\n///             ) by (hll_shard, other_labels...)\n///         )\n///     ) without (hll_shard)\n/// )\n/// * alpha\n/// * shards_count\n/// * shards_count\n/// ```\n///\n/// In the case of low cardinality, you might want to use the linear counting approximation:\n///\n/// ```promql\n/// # LinearCounting(m, V) = m log (m / V)\n/// shards_count * ln(shards_count /\n///     # calculate V = how many shards contain a 0\n///     count(max (proxy_connecting_endpoints{}) by (hll_shard, protocol) == 0) without (hll_shard)\n/// )\n/// ```\n///\n/// See <https://en.wikipedia.org/wiki/HyperLogLog#Practical_considerations> for estimates on alpha\npub type HyperLogLogVec<L, const N: usize> = MetricVec<HyperLogLogState<N>, L>;\npub type HyperLogLog<const N: usize> = Metric<HyperLogLogState<N>>;\n\npub struct HyperLogLogState<const N: usize> {\n    shards: [AtomicU8; N],\n}\nimpl<const N: usize> Default for HyperLogLogState<N> {\n    fn default() -> Self {\n        #[allow(clippy::declare_interior_mutable_const)]\n        const ZERO: AtomicU8 = AtomicU8::new(0);\n        Self { shards: [ZERO; N] }\n    }\n}\n\nimpl<const N: usize> MetricType for HyperLogLogState<N> {\n    type Metadata = ();\n}\n\nimpl<const N: usize> HyperLogLogState<N> {\n    pub fn measure(&self, item: &(impl Hash + ?Sized)) {\n        // changing the hasher will break compatibility with previous measurements.\n        self.record(BuildHasherDefault::<xxh3::Hash64>::default().hash_one(item));\n    }\n\n    fn record(&self, hash: u64) {\n        let p = N.ilog2() as u8;\n        let j = hash & (N as u64 - 1);\n        let rho = (hash >> p).leading_zeros() as u8 + 1 - p;\n        self.shards[j as usize].fetch_max(rho, std::sync::atomic::Ordering::Relaxed);\n    }\n\n    fn take_sample(&self) -> [u8; N] {\n        self.shards.each_ref().map(|x| {\n            // We reset the counter to 0 so we can perform a cardinality measure over any time slice in prometheus.\n\n            // This seems like it would be a race condition,\n            // but HLL is not impacted by a write in one shard happening in between.\n            // This is because in PromQL we will be implementing a harmonic mean of all buckets.\n            // we will also merge samples in a time series using `max by (hll_shard)`.\n\n            // TODO: maybe we shouldn't reset this on every collect, instead, only after a time window.\n            // this would mean that a dev port-forwarding the metrics url won't break the sampling.\n            x.swap(0, std::sync::atomic::Ordering::Relaxed)\n        })\n    }\n}\n\nimpl<W: std::io::Write, const N: usize> measured::metric::MetricEncoding<TextEncoder<W>>\n    for HyperLogLogState<N>\n{\n    fn write_type(\n        name: impl MetricNameEncoder,\n        enc: &mut TextEncoder<W>,\n    ) -> Result<(), std::io::Error> {\n        enc.write_type(&name, measured::text::MetricType::Gauge)\n    }\n    fn collect_into(\n        &self,\n        _: &(),\n        labels: impl LabelGroup,\n        name: impl MetricNameEncoder,\n        enc: &mut TextEncoder<W>,\n    ) -> Result<(), std::io::Error> {\n        struct I64(i64);\n        impl LabelValue for I64 {\n            fn visit<V: LabelVisitor>(&self, v: V) -> V::Output {\n                v.write_int(self.0)\n            }\n        }\n\n        struct HllShardLabel {\n            hll_shard: i64,\n        }\n\n        impl LabelGroup for HllShardLabel {\n            fn visit_values(&self, v: &mut impl LabelGroupVisitor) {\n                const LE: &LabelName = LabelName::from_str(\"hll_shard\");\n                v.write_value(LE, &I64(self.hll_shard));\n            }\n        }\n\n        self.take_sample()\n            .into_iter()\n            .enumerate()\n            .try_for_each(|(hll_shard, val)| {\n                CounterState::new(val as u64).collect_into(\n                    &(),\n                    labels.by_ref().compose_with(HllShardLabel {\n                        hll_shard: hll_shard as i64,\n                    }),\n                    name.by_ref(),\n                    enc,\n                )\n            })\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use std::collections::HashSet;\n\n    use measured::FixedCardinalityLabel;\n    use measured::label::StaticLabelSet;\n    use rand::rngs::StdRng;\n    use rand::{Rng, SeedableRng};\n    use rand_distr::{Distribution, Zipf};\n\n    use crate::HyperLogLogVec;\n\n    #[derive(FixedCardinalityLabel, Clone, Copy)]\n    #[label(singleton = \"x\")]\n    enum Label {\n        A,\n        B,\n    }\n\n    fn collect(hll: &HyperLogLogVec<StaticLabelSet<Label>, 32>) -> ([u8; 32], [u8; 32]) {\n        // cannot go through the `hll.collect_family_into` interface yet...\n        // need to see if I can fix the conflicting impls problem in measured.\n        (\n            hll.get_metric(hll.with_labels(Label::A)).take_sample(),\n            hll.get_metric(hll.with_labels(Label::B)).take_sample(),\n        )\n    }\n\n    fn get_cardinality(samples: &[[u8; 32]]) -> f64 {\n        let mut buckets = [0.0; 32];\n        for &sample in samples {\n            for (i, m) in sample.into_iter().enumerate() {\n                buckets[i] = f64::max(buckets[i], m as f64);\n            }\n        }\n\n        buckets\n            .into_iter()\n            .map(|f| 2.0f64.powf(-f))\n            .sum::<f64>()\n            .recip()\n            * 0.697\n            * 32.0\n            * 32.0\n    }\n\n    fn test_cardinality(n: usize, dist: impl Distribution<f64>) -> ([usize; 3], [f64; 3]) {\n        let hll = HyperLogLogVec::<StaticLabelSet<Label>, 32>::new();\n\n        let mut iter = StdRng::seed_from_u64(0x2024_0112).sample_iter(dist);\n        let mut set_a = HashSet::new();\n        let mut set_b = HashSet::new();\n\n        for x in iter.by_ref().take(n) {\n            set_a.insert(x.to_bits());\n            hll.get_metric(hll.with_labels(Label::A))\n                .measure(&x.to_bits());\n        }\n        for x in iter.by_ref().take(n) {\n            set_b.insert(x.to_bits());\n            hll.get_metric(hll.with_labels(Label::B))\n                .measure(&x.to_bits());\n        }\n        let merge = &set_a | &set_b;\n\n        let (a, b) = collect(&hll);\n        let len = get_cardinality(&[a, b]);\n        let len_a = get_cardinality(&[a]);\n        let len_b = get_cardinality(&[b]);\n\n        ([merge.len(), set_a.len(), set_b.len()], [len, len_a, len_b])\n    }\n\n    #[test]\n    fn test_cardinality_small() {\n        let (actual, estimate) = test_cardinality(100, Zipf::new(100.0, 1.2f64).unwrap());\n\n        assert_eq!(actual, [46, 30, 32]);\n        assert!(51.3 < estimate[0] && estimate[0] < 51.4);\n        assert!(44.0 < estimate[1] && estimate[1] < 44.1);\n        assert!(39.0 < estimate[2] && estimate[2] < 39.1);\n    }\n\n    #[test]\n    fn test_cardinality_medium() {\n        let (actual, estimate) = test_cardinality(10000, Zipf::new(10000.0, 1.2f64).unwrap());\n\n        assert_eq!(actual, [2529, 1618, 1629]);\n        assert!(2309.1 < estimate[0] && estimate[0] < 2309.2);\n        assert!(1566.6 < estimate[1] && estimate[1] < 1566.7);\n        assert!(1629.5 < estimate[2] && estimate[2] < 1629.6);\n    }\n\n    #[test]\n    fn test_cardinality_large() {\n        let (actual, estimate) =\n            test_cardinality(1_000_000, Zipf::new(1_000_000.0, 1.2f64).unwrap());\n\n        assert_eq!(actual, [129077, 79579, 79630]);\n        assert!(126067.2 < estimate[0] && estimate[0] < 126067.3);\n        assert!(83076.8 < estimate[1] && estimate[1] < 83076.9);\n        assert!(64251.2 < estimate[2] && estimate[2] < 64251.3);\n    }\n\n    #[test]\n    fn test_cardinality_small2() {\n        let (actual, estimate) = test_cardinality(100, Zipf::new(200.0, 0.8f64).unwrap());\n\n        assert_eq!(actual, [92, 58, 60]);\n        assert!(116.1 < estimate[0] && estimate[0] < 116.2);\n        assert!(81.7 < estimate[1] && estimate[1] < 81.8);\n        assert!(69.3 < estimate[2] && estimate[2] < 69.4);\n    }\n\n    #[test]\n    fn test_cardinality_medium2() {\n        let (actual, estimate) = test_cardinality(10000, Zipf::new(20000.0, 0.8f64).unwrap());\n\n        assert_eq!(actual, [8201, 5131, 5051]);\n        assert!(6846.4 < estimate[0] && estimate[0] < 6846.5);\n        assert!(5239.1 < estimate[1] && estimate[1] < 5239.2);\n        assert!(4292.8 < estimate[2] && estimate[2] < 4292.9);\n    }\n\n    #[test]\n    fn test_cardinality_large2() {\n        let (actual, estimate) =\n            test_cardinality(1_000_000, Zipf::new(2_000_000.0, 0.8f64).unwrap());\n\n        assert_eq!(actual, [777847, 482069, 482246]);\n        assert!(699437.4 < estimate[0] && estimate[0] < 699437.5);\n        assert!(374948.9 < estimate[1] && estimate[1] < 374949.0);\n        assert!(434609.7 < estimate[2] && estimate[2] < 434609.8);\n    }\n}\n"
  },
  {
    "path": "libs/metrics/src/launch_timestamp.rs",
    "content": "//! A timestamp captured at process startup to identify restarts of the process, e.g., in logs and metrics.\n\nuse std::fmt::Display;\n\nuse chrono::Utc;\n\nuse super::register_uint_gauge;\n\npub struct LaunchTimestamp(chrono::DateTime<Utc>);\n\nimpl LaunchTimestamp {\n    pub fn generate() -> Self {\n        LaunchTimestamp(Utc::now())\n    }\n}\n\nimpl Display for LaunchTimestamp {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        write!(f, \"{}\", self.0)\n    }\n}\n\npub fn set_launch_timestamp_metric(launch_ts: &'static LaunchTimestamp) {\n    let millis_since_epoch: u64 = launch_ts\n        .0\n        .timestamp_millis()\n        .try_into()\n        .expect(\"we're after the epoch, this should be positive\");\n    let metric = register_uint_gauge!(\n        \"libmetrics_launch_timestamp\",\n        \"Timestamp (millis since epoch) at wich the process launched.\"\n    )\n    .unwrap();\n    metric.set(millis_since_epoch);\n}\n"
  },
  {
    "path": "libs/metrics/src/lib.rs",
    "content": "//! We re-export those from prometheus crate to\n//! make sure that we use the same dep version everywhere.\n//! Otherwise, we might not see all metrics registered via\n//! a default registry.\n#![deny(clippy::undocumented_unsafe_blocks)]\n\nuse std::sync::RwLock;\n\nuse measured::label::{LabelGroupSet, LabelGroupVisitor, LabelName, NoLabels};\nuse measured::metric::counter::CounterState;\nuse measured::metric::gauge::GaugeState;\nuse measured::metric::group::Encoding;\nuse measured::metric::name::{MetricName, MetricNameEncoder};\nuse measured::metric::{MetricEncoding, MetricFamilyEncoding, MetricType};\nuse measured::{FixedCardinalityLabel, LabelGroup, MetricGroup};\nuse once_cell::sync::Lazy;\nuse prometheus::Registry;\nuse prometheus::core::{\n    Atomic, AtomicU64, Collector, GenericCounter, GenericCounterVec, GenericGauge, GenericGaugeVec,\n};\npub use prometheus::local::LocalHistogram;\npub use prometheus::{\n    Counter, CounterVec, Encoder, Error, Gauge, GaugeVec, Histogram, HistogramVec, IntCounter,\n    IntCounterVec, IntGauge, IntGaugeVec, TextEncoder, core, default_registry, exponential_buckets,\n    linear_buckets, opts, proto, register, register_counter_vec, register_gauge,\n    register_gauge_vec, register_histogram, register_histogram_vec, register_int_counter,\n    register_int_counter_vec, register_int_gauge, register_int_gauge_vec,\n};\n\npub mod launch_timestamp;\nmod wrappers;\npub use prometheus;\npub use wrappers::{CountedReader, CountedWriter};\nmod hll;\npub use hll::{HyperLogLog, HyperLogLogState, HyperLogLogVec};\n#[cfg(target_os = \"linux\")]\npub mod more_process_metrics;\n\npub type UIntGauge = GenericGauge<AtomicU64>;\npub type UIntGaugeVec = GenericGaugeVec<AtomicU64>;\n\n#[macro_export]\nmacro_rules! register_uint_gauge_vec {\n    ($NAME:expr, $HELP:expr, $LABELS_NAMES:expr $(,)?) => {{\n        let gauge_vec = UIntGaugeVec::new($crate::opts!($NAME, $HELP), $LABELS_NAMES).unwrap();\n        $crate::register(Box::new(gauge_vec.clone())).map(|_| gauge_vec)\n    }};\n}\n\n#[macro_export]\nmacro_rules! register_uint_gauge {\n    ($NAME:expr, $HELP:expr $(,)?) => {{\n        let gauge = $crate::UIntGauge::new($NAME, $HELP).unwrap();\n        $crate::register(Box::new(gauge.clone())).map(|_| gauge)\n    }};\n}\n\n/// Special internal registry, to collect metrics independently from the default registry.\n/// Was introduced to fix deadlock with lazy registration of metrics in the default registry.\nstatic INTERNAL_REGISTRY: Lazy<Registry> = Lazy::new(Registry::new);\n\n/// Register a collector in the internal registry. MUST be called before the first call to `gather()`.\n///\n/// Otherwise, we can have a deadlock in the `gather()` call, trying to register a new collector\n/// while holding the lock.\npub fn register_internal(c: Box<dyn Collector>) -> prometheus::Result<()> {\n    INTERNAL_REGISTRY.register(c)\n}\n\n/// Gathers all Prometheus metrics and records the I/O stats just before that.\n///\n/// Metrics gathering is a relatively simple and standalone operation, so\n/// it might be fine to do it this way to keep things simple.\npub fn gather() -> Vec<prometheus::proto::MetricFamily> {\n    update_rusage_metrics();\n    let mut mfs = prometheus::gather();\n    let mut internal_mfs = INTERNAL_REGISTRY.gather();\n    mfs.append(&mut internal_mfs);\n    mfs\n}\n\nstatic DISK_IO_BYTES: Lazy<IntGaugeVec> = Lazy::new(|| {\n    register_int_gauge_vec!(\n        \"libmetrics_disk_io_bytes_total\",\n        \"Bytes written and read from disk, grouped by the operation (read|write)\",\n        &[\"io_operation\"]\n    )\n    .expect(\"Failed to register disk i/o bytes int gauge vec\")\n});\n\nstatic MAXRSS_KB: Lazy<IntGauge> = Lazy::new(|| {\n    register_int_gauge!(\n        \"libmetrics_maxrss_kb\",\n        \"Memory usage (Maximum Resident Set Size)\"\n    )\n    .expect(\"Failed to register maxrss_kb int gauge\")\n});\n\n/// Most common fsync latency is 50 µs - 100 µs, but it can be much higher,\n/// especially during many concurrent disk operations.\npub const DISK_FSYNC_SECONDS_BUCKETS: &[f64] =\n    &[0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1.0, 5.0, 10.0, 30.0];\n\n/// Constructs histogram buckets that are powers of two starting at 1 (i.e. 2^0), covering the end\n/// points. For example, passing start=5,end=20 yields 4,8,16,32 as does start=4,end=32.\npub fn pow2_buckets(start: usize, end: usize) -> Vec<f64> {\n    assert_ne!(start, 0);\n    assert!(start <= end);\n    let start = match start.checked_next_power_of_two() {\n        Some(n) if n == start => n, // start already power of two\n        Some(n) => n >> 1,          // power of two below start\n        None => panic!(\"start too large\"),\n    };\n    let end = end.checked_next_power_of_two().expect(\"end too large\");\n    std::iter::successors(Some(start), |n| n.checked_mul(2))\n        .take_while(|n| n <= &end)\n        .map(|n| n as f64)\n        .collect()\n}\n\npub struct InfoMetric<L: LabelGroup, M: MetricType = GaugeState> {\n    label: RwLock<L>,\n    metric: M,\n}\n\nimpl<L: LabelGroup> InfoMetric<L> {\n    pub fn new(label: L) -> Self {\n        Self::with_metric(label, GaugeState::new(1))\n    }\n}\n\nimpl<L: LabelGroup + Default> Default for InfoMetric<L, GaugeState> {\n    fn default() -> Self {\n        InfoMetric::new(L::default())\n    }\n}\n\nimpl<L: LabelGroup, M: MetricType<Metadata = ()>> InfoMetric<L, M> {\n    pub fn with_metric(label: L, metric: M) -> Self {\n        Self {\n            label: RwLock::new(label),\n            metric,\n        }\n    }\n\n    pub fn set_label(&self, label: L) {\n        *self.label.write().unwrap() = label;\n    }\n}\n\nimpl<L, M, E> MetricFamilyEncoding<E> for InfoMetric<L, M>\nwhere\n    L: LabelGroup,\n    M: MetricEncoding<E, Metadata = ()>,\n    E: Encoding,\n{\n    fn collect_family_into(\n        &self,\n        name: impl measured::metric::name::MetricNameEncoder,\n        enc: &mut E,\n    ) -> Result<(), E::Err> {\n        M::write_type(&name, enc)?;\n        self.metric\n            .collect_into(&(), &*self.label.read().unwrap(), name, enc)\n    }\n}\n\npub struct BuildInfo {\n    pub revision: &'static str,\n    pub build_tag: &'static str,\n}\n\nimpl LabelGroup for BuildInfo {\n    fn visit_values(&self, v: &mut impl LabelGroupVisitor) {\n        const REVISION: &LabelName = LabelName::from_str(\"revision\");\n        v.write_value(REVISION, &self.revision);\n        const BUILD_TAG: &LabelName = LabelName::from_str(\"build_tag\");\n        v.write_value(BUILD_TAG, &self.build_tag);\n    }\n}\n\n#[derive(MetricGroup)]\n#[metric(new(build_info: BuildInfo))]\npub struct NeonMetrics {\n    #[cfg(target_os = \"linux\")]\n    #[metric(namespace = \"process\")]\n    #[metric(init = measured_process::ProcessCollector::for_self())]\n    process: measured_process::ProcessCollector,\n\n    #[metric(namespace = \"libmetrics\")]\n    #[metric(init = LibMetrics::new(build_info))]\n    libmetrics: LibMetrics,\n}\n\n#[derive(MetricGroup)]\n#[metric(new(build_info: BuildInfo))]\npub struct LibMetrics {\n    #[metric(init = InfoMetric::new(build_info))]\n    build_info: InfoMetric<BuildInfo>,\n\n    #[metric(flatten)]\n    rusage: Rusage,\n\n    serve_count: CollectionCounter,\n}\n\nfn write_gauge<Enc: Encoding>(\n    x: i64,\n    labels: impl LabelGroup,\n    name: impl MetricNameEncoder,\n    enc: &mut Enc,\n) -> Result<(), Enc::Err>\nwhere\n    GaugeState: MetricEncoding<Enc>,\n{\n    GaugeState::new(x).collect_into(&(), labels, name, enc)\n}\n\n#[derive(Default)]\nstruct Rusage;\n\n#[derive(FixedCardinalityLabel, Clone, Copy)]\n#[label(singleton = \"io_operation\")]\nenum IoOp {\n    Read,\n    Write,\n}\n\nimpl<T: Encoding> MetricGroup<T> for Rusage\nwhere\n    GaugeState: MetricEncoding<T>,\n{\n    fn collect_group_into(&self, enc: &mut T) -> Result<(), T::Err> {\n        const DISK_IO: &MetricName = MetricName::from_str(\"disk_io_bytes_total\");\n        const MAXRSS: &MetricName = MetricName::from_str(\"maxrss_kb\");\n\n        let ru = get_rusage_stats();\n\n        enc.write_help(\n            DISK_IO,\n            \"Bytes written and read from disk, grouped by the operation (read|write)\",\n        )?;\n        GaugeState::write_type(DISK_IO, enc)?;\n        write_gauge(ru.ru_inblock * BYTES_IN_BLOCK, IoOp::Read, DISK_IO, enc)?;\n        write_gauge(ru.ru_oublock * BYTES_IN_BLOCK, IoOp::Write, DISK_IO, enc)?;\n\n        enc.write_help(MAXRSS, \"Memory usage (Maximum Resident Set Size)\")?;\n        GaugeState::write_type(MAXRSS, enc)?;\n        write_gauge(ru.ru_maxrss, IoOp::Read, MAXRSS, enc)?;\n\n        Ok(())\n    }\n}\n\n#[derive(Default)]\nstruct CollectionCounter(CounterState);\n\nimpl<T: Encoding> MetricFamilyEncoding<T> for CollectionCounter\nwhere\n    CounterState: MetricEncoding<T>,\n{\n    fn collect_family_into(\n        &self,\n        name: impl measured::metric::name::MetricNameEncoder,\n        enc: &mut T,\n    ) -> Result<(), T::Err> {\n        self.0.inc();\n        enc.write_help(&name, \"Number of metric requests made\")?;\n        self.0.collect_into(&(), NoLabels, name, enc)\n    }\n}\n\npub fn set_build_info_metric(revision: &str, build_tag: &str) {\n    let metric = register_int_gauge_vec!(\n        \"libmetrics_build_info\",\n        \"Build/version information\",\n        &[\"revision\", \"build_tag\"]\n    )\n    .expect(\"Failed to register build info metric\");\n    metric.with_label_values(&[revision, build_tag]).set(1);\n}\nconst BYTES_IN_BLOCK: i64 = 512;\n\n// Records I/O stats in a \"cross-platform\" way.\n// Compiles both on macOS and Linux, but current macOS implementation always returns 0 as values for I/O stats.\n// An alternative is to read procfs (`/proc/[pid]/io`) which does not work under macOS at all, hence abandoned.\n//\n// Uses https://www.freebsd.org/cgi/man.cgi?query=getrusage to retrieve the number of block operations\n// performed by the process.\n// We know the size of the block, so we can determine the I/O bytes out of it.\n// The value might be not 100% exact, but should be fine for Prometheus metrics in this case.\nfn update_rusage_metrics() {\n    let rusage_stats = get_rusage_stats();\n\n    DISK_IO_BYTES\n        .with_label_values(&[\"read\"])\n        .set(rusage_stats.ru_inblock * BYTES_IN_BLOCK);\n    DISK_IO_BYTES\n        .with_label_values(&[\"write\"])\n        .set(rusage_stats.ru_oublock * BYTES_IN_BLOCK);\n\n    // On macOS, the unit of maxrss is bytes; on Linux, it's kilobytes. https://stackoverflow.com/a/59915669\n    #[cfg(target_os = \"macos\")]\n    {\n        MAXRSS_KB.set(rusage_stats.ru_maxrss / 1024);\n    }\n    #[cfg(not(target_os = \"macos\"))]\n    {\n        MAXRSS_KB.set(rusage_stats.ru_maxrss);\n    }\n}\n\nfn get_rusage_stats() -> libc::rusage {\n    let mut rusage = std::mem::MaybeUninit::uninit();\n\n    // SAFETY: kernel will initialize the struct for us\n    unsafe {\n        let ret = libc::getrusage(libc::RUSAGE_SELF, rusage.as_mut_ptr());\n        assert!(ret == 0, \"getrusage failed: bad args\");\n        rusage.assume_init()\n    }\n}\n\n/// Create an [`IntCounterPairVec`] and registers to default registry.\n#[macro_export(local_inner_macros)]\nmacro_rules! register_int_counter_pair_vec {\n    ($NAME1:expr, $HELP1:expr, $NAME2:expr, $HELP2:expr, $LABELS_NAMES:expr $(,)?) => {{\n        match (\n            $crate::register_int_counter_vec!($NAME1, $HELP1, $LABELS_NAMES),\n            $crate::register_int_counter_vec!($NAME2, $HELP2, $LABELS_NAMES),\n        ) {\n            (Ok(inc), Ok(dec)) => Ok($crate::IntCounterPairVec::new(inc, dec)),\n            (Err(e), _) | (_, Err(e)) => Err(e),\n        }\n    }};\n}\n\n/// Create an [`IntCounterPair`] and registers to default registry.\n#[macro_export(local_inner_macros)]\nmacro_rules! register_int_counter_pair {\n    ($NAME1:expr, $HELP1:expr, $NAME2:expr, $HELP2:expr $(,)?) => {{\n        match (\n            $crate::register_int_counter!($NAME1, $HELP1),\n            $crate::register_int_counter!($NAME2, $HELP2),\n        ) {\n            (Ok(inc), Ok(dec)) => Ok($crate::IntCounterPair::new(inc, dec)),\n            (Err(e), _) | (_, Err(e)) => Err(e),\n        }\n    }};\n}\n\n/// A Pair of [`GenericCounterVec`]s. Like an [`GenericGaugeVec`] but will always observe changes\npub struct GenericCounterPairVec<P: Atomic> {\n    inc: GenericCounterVec<P>,\n    dec: GenericCounterVec<P>,\n}\n\n/// A Pair of [`GenericCounter`]s. Like an [`GenericGauge`] but will always observe changes\npub struct GenericCounterPair<P: Atomic> {\n    inc: GenericCounter<P>,\n    dec: GenericCounter<P>,\n}\n\nimpl<P: Atomic> GenericCounterPairVec<P> {\n    pub fn new(inc: GenericCounterVec<P>, dec: GenericCounterVec<P>) -> Self {\n        Self { inc, dec }\n    }\n\n    /// `get_metric_with_label_values` returns the [`GenericCounterPair<P>`] for the given slice\n    /// of label values (same order as the VariableLabels in Desc). If that combination of\n    /// label values is accessed for the first time, a new [`GenericCounterPair<P>`] is created.\n    ///\n    /// An error is returned if the number of label values is not the same as the\n    /// number of VariableLabels in Desc.\n    pub fn get_metric_with_label_values(\n        &self,\n        vals: &[&str],\n    ) -> prometheus::Result<GenericCounterPair<P>> {\n        Ok(GenericCounterPair {\n            inc: self.inc.get_metric_with_label_values(vals)?,\n            dec: self.dec.get_metric_with_label_values(vals)?,\n        })\n    }\n\n    /// `with_label_values` works as `get_metric_with_label_values`, but panics if an error\n    /// occurs.\n    pub fn with_label_values(&self, vals: &[&str]) -> GenericCounterPair<P> {\n        self.get_metric_with_label_values(vals).unwrap()\n    }\n\n    pub fn remove_label_values(&self, res: &mut [prometheus::Result<()>; 2], vals: &[&str]) {\n        res[0] = self.inc.remove_label_values(vals);\n        res[1] = self.dec.remove_label_values(vals);\n    }\n}\n\nimpl<P: Atomic> GenericCounterPair<P> {\n    pub fn new(inc: GenericCounter<P>, dec: GenericCounter<P>) -> Self {\n        Self { inc, dec }\n    }\n\n    /// Increment the gauge by 1, returning a guard that decrements by 1 on drop.\n    pub fn guard(&self) -> GenericCounterPairGuard<P> {\n        self.inc.inc();\n        GenericCounterPairGuard(self.dec.clone())\n    }\n\n    /// Increment the gauge by n, returning a guard that decrements by n on drop.\n    pub fn guard_by(&self, n: P::T) -> GenericCounterPairGuardBy<P> {\n        self.inc.inc_by(n);\n        GenericCounterPairGuardBy(self.dec.clone(), n)\n    }\n\n    /// Increase the gauge by 1.\n    #[inline]\n    pub fn inc(&self) {\n        self.inc.inc();\n    }\n\n    /// Decrease the gauge by 1.\n    #[inline]\n    pub fn dec(&self) {\n        self.dec.inc();\n    }\n\n    /// Add the given value to the gauge. (The value can be\n    /// negative, resulting in a decrement of the gauge.)\n    #[inline]\n    pub fn inc_by(&self, v: P::T) {\n        self.inc.inc_by(v);\n    }\n\n    /// Subtract the given value from the gauge. (The value can be\n    /// negative, resulting in an increment of the gauge.)\n    #[inline]\n    pub fn dec_by(&self, v: P::T) {\n        self.dec.inc_by(v);\n    }\n}\n\nimpl<P: Atomic> Clone for GenericCounterPair<P> {\n    fn clone(&self) -> Self {\n        Self {\n            inc: self.inc.clone(),\n            dec: self.dec.clone(),\n        }\n    }\n}\n\n/// Guard returned by [`GenericCounterPair::guard`]\npub struct GenericCounterPairGuard<P: Atomic>(GenericCounter<P>);\n\nimpl<P: Atomic> Drop for GenericCounterPairGuard<P> {\n    fn drop(&mut self) {\n        self.0.inc();\n    }\n}\n/// Guard returned by [`GenericCounterPair::guard_by`]\npub struct GenericCounterPairGuardBy<P: Atomic>(GenericCounter<P>, P::T);\n\nimpl<P: Atomic> Drop for GenericCounterPairGuardBy<P> {\n    fn drop(&mut self) {\n        self.0.inc_by(self.1);\n    }\n}\n\n/// A Pair of [`IntCounterVec`]s. Like an [`IntGaugeVec`] but will always observe changes\npub type IntCounterPairVec = GenericCounterPairVec<AtomicU64>;\n\n/// A Pair of [`IntCounter`]s. Like an [`IntGauge`] but will always observe changes\npub type IntCounterPair = GenericCounterPair<AtomicU64>;\n\n/// A guard for [`IntCounterPair`] that will decrement the gauge on drop\npub type IntCounterPairGuard = GenericCounterPairGuard<AtomicU64>;\n\npub trait CounterPairAssoc {\n    const INC_NAME: &'static MetricName;\n    const DEC_NAME: &'static MetricName;\n\n    const INC_HELP: &'static str;\n    const DEC_HELP: &'static str;\n\n    type LabelGroupSet: LabelGroupSet;\n}\n\npub struct CounterPairVec<A: CounterPairAssoc> {\n    vec: measured::metric::MetricVec<MeasuredCounterPairState, A::LabelGroupSet>,\n}\n\nimpl<A: CounterPairAssoc> Default for CounterPairVec<A>\nwhere\n    A::LabelGroupSet: Default,\n{\n    fn default() -> Self {\n        Self {\n            vec: Default::default(),\n        }\n    }\n}\n\nimpl<A: CounterPairAssoc> CounterPairVec<A> {\n    pub fn guard(\n        &self,\n        labels: <A::LabelGroupSet as LabelGroupSet>::Group<'_>,\n    ) -> MeasuredCounterPairGuard<'_, A> {\n        let id = self.vec.with_labels(labels);\n        self.vec.get_metric(id).inc.inc();\n        MeasuredCounterPairGuard { vec: &self.vec, id }\n    }\n    pub fn inc(&self, labels: <A::LabelGroupSet as LabelGroupSet>::Group<'_>) {\n        let id = self.vec.with_labels(labels);\n        self.vec.get_metric(id).inc.inc();\n    }\n    pub fn dec(&self, labels: <A::LabelGroupSet as LabelGroupSet>::Group<'_>) {\n        let id = self.vec.with_labels(labels);\n        self.vec.get_metric(id).dec.inc();\n    }\n    pub fn remove_metric(\n        &self,\n        labels: <A::LabelGroupSet as LabelGroupSet>::Group<'_>,\n    ) -> Option<MeasuredCounterPairState> {\n        let id = self.vec.with_labels(labels);\n        self.vec.remove_metric(id)\n    }\n\n    pub fn sample(&self, labels: <A::LabelGroupSet as LabelGroupSet>::Group<'_>) -> u64 {\n        let id = self.vec.with_labels(labels);\n        let metric = self.vec.get_metric(id);\n\n        let inc = metric.inc.count.load(std::sync::atomic::Ordering::Relaxed);\n        let dec = metric.dec.count.load(std::sync::atomic::Ordering::Relaxed);\n        inc.saturating_sub(dec)\n    }\n}\n\nimpl<T, A> ::measured::metric::group::MetricGroup<T> for CounterPairVec<A>\nwhere\n    T: ::measured::metric::group::Encoding,\n    A: CounterPairAssoc,\n    ::measured::metric::counter::CounterState: ::measured::metric::MetricEncoding<T>,\n{\n    fn collect_group_into(&self, enc: &mut T) -> Result<(), T::Err> {\n        // write decrement first to avoid a race condition where inc - dec < 0\n        T::write_help(enc, A::DEC_NAME, A::DEC_HELP)?;\n        self.vec\n            .collect_family_into(A::DEC_NAME, &mut Dec(&mut *enc))?;\n\n        T::write_help(enc, A::INC_NAME, A::INC_HELP)?;\n        self.vec\n            .collect_family_into(A::INC_NAME, &mut Inc(&mut *enc))?;\n\n        Ok(())\n    }\n}\n\n#[derive(MetricGroup, Default)]\npub struct MeasuredCounterPairState {\n    pub inc: CounterState,\n    pub dec: CounterState,\n}\n\nimpl measured::metric::MetricType for MeasuredCounterPairState {\n    type Metadata = ();\n}\n\npub struct MeasuredCounterPairGuard<'a, A: CounterPairAssoc> {\n    vec: &'a measured::metric::MetricVec<MeasuredCounterPairState, A::LabelGroupSet>,\n    id: measured::metric::LabelId<A::LabelGroupSet>,\n}\n\nimpl<A: CounterPairAssoc> Drop for MeasuredCounterPairGuard<'_, A> {\n    fn drop(&mut self) {\n        self.vec.get_metric(self.id).dec.inc();\n    }\n}\n\n/// [`MetricEncoding`] for [`MeasuredCounterPairState`] that only writes the inc counter to the inner encoder.\nstruct Inc<T>(T);\n/// [`MetricEncoding`] for [`MeasuredCounterPairState`] that only writes the dec counter to the inner encoder.\nstruct Dec<T>(T);\n\nimpl<T: Encoding> Encoding for Inc<T> {\n    type Err = T::Err;\n\n    fn write_help(&mut self, name: impl MetricNameEncoder, help: &str) -> Result<(), Self::Err> {\n        self.0.write_help(name, help)\n    }\n}\n\nimpl<T: Encoding> MetricEncoding<Inc<T>> for MeasuredCounterPairState\nwhere\n    CounterState: MetricEncoding<T>,\n{\n    fn write_type(name: impl MetricNameEncoder, enc: &mut Inc<T>) -> Result<(), T::Err> {\n        CounterState::write_type(name, &mut enc.0)\n    }\n    fn collect_into(\n        &self,\n        metadata: &(),\n        labels: impl LabelGroup,\n        name: impl MetricNameEncoder,\n        enc: &mut Inc<T>,\n    ) -> Result<(), T::Err> {\n        self.inc.collect_into(metadata, labels, name, &mut enc.0)\n    }\n}\n\nimpl<T: Encoding> Encoding for Dec<T> {\n    type Err = T::Err;\n\n    fn write_help(&mut self, name: impl MetricNameEncoder, help: &str) -> Result<(), Self::Err> {\n        self.0.write_help(name, help)\n    }\n}\n\n/// Write the dec counter to the encoder\nimpl<T: Encoding> MetricEncoding<Dec<T>> for MeasuredCounterPairState\nwhere\n    CounterState: MetricEncoding<T>,\n{\n    fn write_type(name: impl MetricNameEncoder, enc: &mut Dec<T>) -> Result<(), T::Err> {\n        CounterState::write_type(name, &mut enc.0)\n    }\n    fn collect_into(\n        &self,\n        metadata: &(),\n        labels: impl LabelGroup,\n        name: impl MetricNameEncoder,\n        enc: &mut Dec<T>,\n    ) -> Result<(), T::Err> {\n        self.dec.collect_into(metadata, labels, name, &mut enc.0)\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use super::*;\n\n    const POW2_BUCKETS_MAX: usize = 1 << (usize::BITS - 1);\n\n    #[test]\n    fn pow2_buckets_cases() {\n        assert_eq!(pow2_buckets(1, 1), vec![1.0]);\n        assert_eq!(pow2_buckets(1, 2), vec![1.0, 2.0]);\n        assert_eq!(pow2_buckets(1, 3), vec![1.0, 2.0, 4.0]);\n        assert_eq!(pow2_buckets(1, 4), vec![1.0, 2.0, 4.0]);\n        assert_eq!(pow2_buckets(1, 5), vec![1.0, 2.0, 4.0, 8.0]);\n        assert_eq!(pow2_buckets(1, 6), vec![1.0, 2.0, 4.0, 8.0]);\n        assert_eq!(pow2_buckets(1, 7), vec![1.0, 2.0, 4.0, 8.0]);\n        assert_eq!(pow2_buckets(1, 8), vec![1.0, 2.0, 4.0, 8.0]);\n        assert_eq!(\n            pow2_buckets(1, 200),\n            vec![1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0, 128.0, 256.0]\n        );\n\n        assert_eq!(pow2_buckets(1, 8), vec![1.0, 2.0, 4.0, 8.0]);\n        assert_eq!(pow2_buckets(2, 8), vec![2.0, 4.0, 8.0]);\n        assert_eq!(pow2_buckets(3, 8), vec![2.0, 4.0, 8.0]);\n        assert_eq!(pow2_buckets(4, 8), vec![4.0, 8.0]);\n        assert_eq!(pow2_buckets(5, 8), vec![4.0, 8.0]);\n        assert_eq!(pow2_buckets(6, 8), vec![4.0, 8.0]);\n        assert_eq!(pow2_buckets(7, 8), vec![4.0, 8.0]);\n        assert_eq!(pow2_buckets(8, 8), vec![8.0]);\n        assert_eq!(pow2_buckets(20, 200), vec![16.0, 32.0, 64.0, 128.0, 256.0]);\n\n        // Largest valid values.\n        assert_eq!(\n            pow2_buckets(1, POW2_BUCKETS_MAX).len(),\n            usize::BITS as usize\n        );\n        assert_eq!(pow2_buckets(POW2_BUCKETS_MAX, POW2_BUCKETS_MAX).len(), 1);\n    }\n\n    #[test]\n    #[should_panic]\n    fn pow2_buckets_zero_start() {\n        pow2_buckets(0, 1);\n    }\n\n    #[test]\n    #[should_panic]\n    fn pow2_buckets_end_lt_start() {\n        pow2_buckets(2, 1);\n    }\n\n    #[test]\n    #[should_panic]\n    fn pow2_buckets_end_overflow_min() {\n        pow2_buckets(1, POW2_BUCKETS_MAX + 1);\n    }\n\n    #[test]\n    #[should_panic]\n    fn pow2_buckets_end_overflow_max() {\n        pow2_buckets(1, usize::MAX);\n    }\n}\n"
  },
  {
    "path": "libs/metrics/src/more_process_metrics.rs",
    "content": "//! process metrics that the [`::prometheus`] crate doesn't provide.\n\n// This module has heavy inspiration from the prometheus crate's `process_collector.rs`.\n\nuse once_cell::sync::Lazy;\nuse prometheus::Gauge;\n\nuse crate::UIntGauge;\n\npub struct Collector {\n    descs: Vec<prometheus::core::Desc>,\n    vmlck: crate::UIntGauge,\n    cpu_seconds_highres: Gauge,\n}\n\nconst NMETRICS: usize = 2;\n\nstatic CLK_TCK_F64: Lazy<f64> = Lazy::new(|| {\n    // SAFETY: libc::sysconf is safe, it merely returns a value.\n    let long = unsafe { libc::sysconf(libc::_SC_CLK_TCK) };\n    if long == -1 {\n        panic!(\"sysconf(_SC_CLK_TCK) failed\");\n    }\n    let convertible_to_f64: i32 =\n        i32::try_from(long).expect(\"sysconf(_SC_CLK_TCK) is larger than i32\");\n    convertible_to_f64 as f64\n});\n\nimpl prometheus::core::Collector for Collector {\n    fn desc(&self) -> Vec<&prometheus::core::Desc> {\n        self.descs.iter().collect()\n    }\n\n    fn collect(&self) -> Vec<prometheus::proto::MetricFamily> {\n        let Ok(myself) = procfs::process::Process::myself() else {\n            return vec![];\n        };\n        let mut mfs = Vec::with_capacity(NMETRICS);\n        if let Ok(status) = myself.status() {\n            if let Some(vmlck) = status.vmlck {\n                self.vmlck.set(vmlck);\n                mfs.extend(self.vmlck.collect())\n            }\n        }\n        if let Ok(stat) = myself.stat() {\n            let cpu_seconds = stat.utime + stat.stime;\n            self.cpu_seconds_highres\n                .set(cpu_seconds as f64 / *CLK_TCK_F64);\n            mfs.extend(self.cpu_seconds_highres.collect());\n        }\n        mfs\n    }\n}\n\nimpl Collector {\n    pub fn new() -> Self {\n        let mut descs = Vec::new();\n\n        let vmlck =\n            UIntGauge::new(\"libmetrics_process_status_vmlck\", \"/proc/self/status vmlck\").unwrap();\n        descs.extend(\n            prometheus::core::Collector::desc(&vmlck)\n                .into_iter()\n                .cloned(),\n        );\n\n        let cpu_seconds_highres = Gauge::new(\n            \"libmetrics_process_cpu_seconds_highres\",\n            \"Total user and system CPU time spent in seconds.\\\n             Sub-second resolution, hence better than `process_cpu_seconds_total`.\",\n        )\n        .unwrap();\n        descs.extend(\n            prometheus::core::Collector::desc(&cpu_seconds_highres)\n                .into_iter()\n                .cloned(),\n        );\n\n        Self {\n            descs,\n            vmlck,\n            cpu_seconds_highres,\n        }\n    }\n}\n\nimpl Default for Collector {\n    fn default() -> Self {\n        Self::new()\n    }\n}\n"
  },
  {
    "path": "libs/metrics/src/wrappers.rs",
    "content": "use std::io::{Read, Result, Write};\n\n/// A wrapper for an object implementing [Read]\n/// which allows a closure to observe the amount of bytes read.\n/// This is useful in conjunction with metrics (e.g. [IntCounter](crate::IntCounter)).\n///\n/// Example:\n///\n/// ```\n/// # use std::io::{Result, Read};\n/// # use metrics::{register_int_counter, IntCounter};\n/// # use metrics::CountedReader;\n/// # use once_cell::sync::Lazy;\n/// #\n/// # static INT_COUNTER: Lazy<IntCounter> = Lazy::new( || { register_int_counter!(\n/// #         \"int_counter\",\n/// #         \"let's count something!\"\n/// #     ).unwrap()\n/// # });\n/// #\n/// fn do_some_reads(stream: impl Read, count: usize) -> Result<Vec<u8>> {\n///     let mut reader = CountedReader::new(stream, |cnt| {\n///         // bump a counter each time we do a read\n///         INT_COUNTER.inc_by(cnt as u64);\n///     });\n///\n///     let mut proto_header = [0; 8];\n///     reader.read_exact(&mut proto_header)?;\n///     assert!(&proto_header == b\"deadbeef\");\n///\n///     let mut payload = vec![0; count];\n///     reader.read_exact(&mut payload)?;\n///     Ok(payload)\n/// }\n/// ```\n///\n/// NB: rapid concurrent bumping of an atomic counter might incur\n/// a performance penalty. Please make sure to amortize the amount\n/// of atomic operations by either using [BufReader](std::io::BufReader)\n/// or choosing a non-atomic (thread local) counter.\npub struct CountedReader<'a, T> {\n    reader: T,\n    update_counter: Box<dyn FnMut(usize) + Sync + Send + 'a>,\n}\n\nimpl<'a, T> CountedReader<'a, T> {\n    pub fn new(reader: T, update_counter: impl FnMut(usize) + Sync + Send + 'a) -> Self {\n        Self {\n            reader,\n            update_counter: Box::new(update_counter),\n        }\n    }\n\n    /// Get an immutable reference to the underlying [Read] implementor\n    pub fn inner(&self) -> &T {\n        &self.reader\n    }\n\n    /// Get a mutable reference to the underlying [Read] implementor\n    pub fn inner_mut(&mut self) -> &mut T {\n        &mut self.reader\n    }\n\n    /// Consume the wrapper and return the underlying [Read] implementor\n    pub fn into_inner(self) -> T {\n        self.reader\n    }\n}\n\nimpl<T: Read> Read for CountedReader<'_, T> {\n    fn read(&mut self, buf: &mut [u8]) -> Result<usize> {\n        let count = self.reader.read(buf)?;\n        (self.update_counter)(count);\n        Ok(count)\n    }\n}\n\n/// A wrapper for an object implementing [Write]\n/// which allows a closure to observe the amount of bytes written.\n/// This is useful in conjunction with metrics (e.g. [IntCounter](crate::IntCounter)).\n///\n/// Example:\n///\n/// ```\n/// # use std::io::{Result, Write};\n/// # use metrics::{register_int_counter, IntCounter};\n/// # use metrics::CountedWriter;\n/// # use once_cell::sync::Lazy;\n/// #\n/// # static INT_COUNTER: Lazy<IntCounter> = Lazy::new( || { register_int_counter!(\n/// #         \"int_counter\",\n/// #         \"let's count something!\"\n/// #     ).unwrap()\n/// # });\n/// #\n/// fn do_some_writes(stream: impl Write, payload: &[u8]) -> Result<()> {\n///     let mut writer = CountedWriter::new(stream, |cnt| {\n///         // bump a counter each time we do a write\n///         INT_COUNTER.inc_by(cnt as u64);\n///     });\n///\n///     let proto_header = b\"deadbeef\";\n///     writer.write_all(proto_header)?;\n///     writer.write_all(payload)\n/// }\n/// ```\n///\n/// NB: rapid concurrent bumping of an atomic counter might incur\n/// a performance penalty. Please make sure to amortize the amount\n/// of atomic operations by either using [BufWriter](std::io::BufWriter)\n/// or choosing a non-atomic (thread local) counter.\npub struct CountedWriter<'a, T> {\n    writer: T,\n    update_counter: Box<dyn FnMut(usize) + Sync + Send + 'a>,\n}\n\nimpl<'a, T> CountedWriter<'a, T> {\n    pub fn new(writer: T, update_counter: impl FnMut(usize) + Sync + Send + 'a) -> Self {\n        Self {\n            writer,\n            update_counter: Box::new(update_counter),\n        }\n    }\n\n    /// Get an immutable reference to the underlying [Write] implementor\n    pub fn inner(&self) -> &T {\n        &self.writer\n    }\n\n    /// Get a mutable reference to the underlying [Write] implementor\n    pub fn inner_mut(&mut self) -> &mut T {\n        &mut self.writer\n    }\n\n    /// Consume the wrapper and return the underlying [Write] implementor\n    pub fn into_inner(self) -> T {\n        self.writer\n    }\n}\n\nimpl<T: Write> Write for CountedWriter<'_, T> {\n    fn write(&mut self, buf: &[u8]) -> Result<usize> {\n        let count = self.writer.write(buf)?;\n        (self.update_counter)(count);\n        Ok(count)\n    }\n\n    fn flush(&mut self) -> Result<()> {\n        self.writer.flush()\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use super::*;\n\n    #[test]\n    fn test_counted_reader() {\n        let stream = [0; 16];\n        let mut total = 0;\n        let mut reader = CountedReader::new(stream.as_ref(), |cnt| {\n            total += cnt;\n        });\n\n        let mut buffer = [0; 8];\n        reader.read_exact(&mut buffer).unwrap();\n        reader.read_exact(&mut buffer).unwrap();\n\n        drop(reader);\n        assert_eq!(total, stream.len());\n    }\n\n    #[test]\n    fn test_counted_writer() {\n        let mut stream = [0; 16];\n        let mut total = 0;\n        let mut writer = CountedWriter::new(stream.as_mut(), |cnt| {\n            total += cnt;\n        });\n\n        let buffer = [0; 8];\n        writer.write_all(&buffer).unwrap();\n        writer.write_all(&buffer).unwrap();\n\n        drop(writer);\n        assert_eq!(total, stream.len());\n    }\n\n    // This mimics the constraints of std::thread::spawn\n    fn assert_send_sync(_x: impl Sync + Send + 'static) {}\n\n    #[test]\n    fn test_send_sync_counted_reader() {\n        let stream: &[u8] = &[];\n        let mut reader = CountedReader::new(stream, |_| {});\n\n        assert_send_sync(move || {\n            reader.read_exact(&mut []).unwrap();\n        });\n    }\n\n    #[test]\n    fn test_send_sync_counted_writer() {\n        let stream = Vec::<u8>::new();\n        let mut writer = CountedWriter::new(stream, |_| {});\n\n        assert_send_sync(move || {\n            writer.write_all(&[]).unwrap();\n        });\n    }\n}\n"
  },
  {
    "path": "libs/neon-shmem/Cargo.toml",
    "content": "[package]\nname = \"neon-shmem\"\nversion = \"0.1.0\"\nedition.workspace = true\nlicense.workspace = true\n\n[dependencies]\nthiserror.workspace = true\nnix.workspace=true\nworkspace_hack = { version = \"0.1\", path = \"../../workspace_hack\" }\nlibc.workspace = true\nlock_api.workspace = true\nrustc-hash.workspace = true\n\n[target.'cfg(target_os = \"macos\")'.dependencies]\ntempfile = \"3.14.0\"\n\n[dev-dependencies]\nrand.workspace = true\nrand_distr = \"0.5.1\"\n"
  },
  {
    "path": "libs/neon-shmem/src/hash/core.rs",
    "content": "//! Simple hash table with chaining.\n\nuse std::hash::Hash;\nuse std::mem::MaybeUninit;\n\nuse crate::hash::entry::*;\n\n/// Invalid position within the map (either within the dictionary or bucket array).\npub(crate) const INVALID_POS: u32 = u32::MAX;\n\n/// Fundamental storage unit within the hash table. Either empty or contains a key-value pair.\n/// Always part of a chain of some kind (either a freelist if empty or a hash chain if full).\npub(crate) struct Bucket<K, V> {\n    /// Index of next bucket in the chain.\n    pub(crate) next: u32,\n    /// Key-value pair contained within bucket.\n    pub(crate) inner: Option<(K, V)>,\n}\n\n/// Core hash table implementation.\npub(crate) struct CoreHashMap<'a, K, V> {\n    /// Dictionary used to map hashes to bucket indices.\n    pub(crate) dictionary: &'a mut [u32],\n    /// Buckets containing key-value pairs.\n    pub(crate) buckets: &'a mut [Bucket<K, V>],\n    /// Head of the freelist.\n    pub(crate) free_head: u32,\n    /// Maximum index of a bucket allowed to be allocated. [`INVALID_POS`] if no limit.\n    pub(crate) alloc_limit: u32,\n    /// The number of currently occupied buckets.\n    pub(crate) buckets_in_use: u32,\n}\n\n/// Error for when there are no empty buckets left but one is needed.\n#[derive(Debug, PartialEq)]\npub struct FullError;\n\nimpl<'a, K: Clone + Hash + Eq, V> CoreHashMap<'a, K, V> {\n    const FILL_FACTOR: f32 = 0.60;\n\n    /// Estimate the size of data contained within the the hash map.\n    pub fn estimate_size(num_buckets: u32) -> usize {\n        let mut size = 0;\n\n        // buckets\n        size += size_of::<Bucket<K, V>>() * num_buckets as usize;\n\n        // dictionary\n        size += (f32::ceil((size_of::<u32>() * num_buckets as usize) as f32 / Self::FILL_FACTOR))\n            as usize;\n\n        size\n    }\n\n    pub fn new(\n        buckets: &'a mut [MaybeUninit<Bucket<K, V>>],\n        dictionary: &'a mut [MaybeUninit<u32>],\n    ) -> Self {\n        // Initialize the buckets\n        for i in 0..buckets.len() {\n            buckets[i].write(Bucket {\n                next: if i < buckets.len() - 1 {\n                    i as u32 + 1\n                } else {\n                    INVALID_POS\n                },\n                inner: None,\n            });\n        }\n\n        // Initialize the dictionary\n        for e in dictionary.iter_mut() {\n            e.write(INVALID_POS);\n        }\n\n        // TODO: use std::slice::assume_init_mut() once it stabilizes\n        let buckets =\n            unsafe { std::slice::from_raw_parts_mut(buckets.as_mut_ptr().cast(), buckets.len()) };\n        let dictionary = unsafe {\n            std::slice::from_raw_parts_mut(dictionary.as_mut_ptr().cast(), dictionary.len())\n        };\n\n        Self {\n            dictionary,\n            buckets,\n            free_head: 0,\n            buckets_in_use: 0,\n            alloc_limit: INVALID_POS,\n        }\n    }\n\n    /// Get the value associated with a key (if it exists) given its hash.\n    pub fn get_with_hash(&self, key: &K, hash: u64) -> Option<&V> {\n        let mut next = self.dictionary[hash as usize % self.dictionary.len()];\n        loop {\n            if next == INVALID_POS {\n                return None;\n            }\n\n            let bucket = &self.buckets[next as usize];\n            let (bucket_key, bucket_value) = bucket.inner.as_ref().expect(\"entry is in use\");\n            if bucket_key == key {\n                return Some(bucket_value);\n            }\n            next = bucket.next;\n        }\n    }\n\n    /// Get number of buckets in map.\n    pub fn get_num_buckets(&self) -> usize {\n        self.buckets.len()\n    }\n\n    /// Clears all entries from the hashmap.\n    ///\n    /// Does not reset any allocation limits, but does clear any entries beyond them.\n    pub fn clear(&mut self) {\n        for i in 0..self.buckets.len() {\n            self.buckets[i] = Bucket {\n                next: if i < self.buckets.len() - 1 {\n                    i as u32 + 1\n                } else {\n                    INVALID_POS\n                },\n                inner: None,\n            }\n        }\n        for i in 0..self.dictionary.len() {\n            self.dictionary[i] = INVALID_POS;\n        }\n\n        self.free_head = 0;\n        self.buckets_in_use = 0;\n    }\n\n    /// Find the position of an unused bucket via the freelist and initialize it.\n    pub(crate) fn alloc_bucket(&mut self, key: K, value: V) -> Result<u32, FullError> {\n        let mut pos = self.free_head;\n\n        // Find the first bucket we're *allowed* to use.\n        let mut prev = PrevPos::First(self.free_head);\n        while pos != INVALID_POS && pos >= self.alloc_limit {\n            let bucket = &mut self.buckets[pos as usize];\n            prev = PrevPos::Chained(pos);\n            pos = bucket.next;\n        }\n        if pos == INVALID_POS {\n            return Err(FullError);\n        }\n\n        // Repair the freelist.\n        match prev {\n            PrevPos::First(_) => {\n                let next_pos = self.buckets[pos as usize].next;\n                self.free_head = next_pos;\n            }\n            PrevPos::Chained(p) => {\n                if p != INVALID_POS {\n                    let next_pos = self.buckets[pos as usize].next;\n                    self.buckets[p as usize].next = next_pos;\n                }\n            }\n            _ => unreachable!(),\n        }\n\n        // Initialize the bucket.\n        let bucket = &mut self.buckets[pos as usize];\n        self.buckets_in_use += 1;\n        bucket.next = INVALID_POS;\n        bucket.inner = Some((key, value));\n\n        Ok(pos)\n    }\n}\n"
  },
  {
    "path": "libs/neon-shmem/src/hash/entry.rs",
    "content": "//! Equivalent of [`std::collections::hash_map::Entry`] for this hashmap.\n\nuse crate::hash::core::{CoreHashMap, FullError, INVALID_POS};\nuse crate::sync::{RwLockWriteGuard, ValueWriteGuard};\n\nuse std::hash::Hash;\nuse std::mem;\n\npub enum Entry<'a, 'b, K, V> {\n    Occupied(OccupiedEntry<'a, 'b, K, V>),\n    Vacant(VacantEntry<'a, 'b, K, V>),\n}\n\n/// Enum representing the previous position within a chain.\n#[derive(Clone, Copy)]\npub(crate) enum PrevPos {\n    /// Starting index within the dictionary.  \n    First(u32),\n    /// Regular index within the buckets.\n    Chained(u32),\n    /// Unknown - e.g. the associated entry was retrieved by index instead of chain.\n    Unknown(u64),\n}\n\npub struct OccupiedEntry<'a, 'b, K, V> {\n    /// Mutable reference to the map containing this entry.\n    pub(crate) map: RwLockWriteGuard<'b, CoreHashMap<'a, K, V>>,\n    /// The key of the occupied entry\n    pub(crate) _key: K,\n    /// The index of the previous entry in the chain.\n    pub(crate) prev_pos: PrevPos,\n    /// The position of the bucket in the [`CoreHashMap`] bucket array.\n    pub(crate) bucket_pos: u32,\n}\n\nimpl<K, V> OccupiedEntry<'_, '_, K, V> {\n    pub fn get(&self) -> &V {\n        &self.map.buckets[self.bucket_pos as usize]\n            .inner\n            .as_ref()\n            .unwrap()\n            .1\n    }\n\n    pub fn get_mut(&mut self) -> &mut V {\n        &mut self.map.buckets[self.bucket_pos as usize]\n            .inner\n            .as_mut()\n            .unwrap()\n            .1\n    }\n\n    /// Inserts a value into the entry, replacing (and returning) the existing value.\n    pub fn insert(&mut self, value: V) -> V {\n        let bucket = &mut self.map.buckets[self.bucket_pos as usize];\n        // This assumes inner is Some, which it must be for an OccupiedEntry\n        mem::replace(&mut bucket.inner.as_mut().unwrap().1, value)\n    }\n\n    /// Removes the entry from the hash map, returning the value originally stored within it.\n    ///\n    /// This may result in multiple bucket accesses if the entry was obtained by index as the\n    /// previous chain entry needs to be discovered in this case.\n    pub fn remove(mut self) -> V {\n        // If this bucket was queried by index, go ahead and follow its chain from the start.\n        let prev = if let PrevPos::Unknown(hash) = self.prev_pos {\n            let dict_idx = hash as usize % self.map.dictionary.len();\n            let mut prev = PrevPos::First(dict_idx as u32);\n            let mut curr = self.map.dictionary[dict_idx];\n            while curr != self.bucket_pos {\n                assert!(curr != INVALID_POS);\n                prev = PrevPos::Chained(curr);\n                curr = self.map.buckets[curr as usize].next;\n            }\n            prev\n        } else {\n            self.prev_pos\n        };\n\n        // CoreHashMap::remove returns Option<(K, V)>. We know it's Some for an OccupiedEntry.\n        let bucket = &mut self.map.buckets[self.bucket_pos as usize];\n\n        // unlink it from the chain\n        match prev {\n            PrevPos::First(dict_pos) => {\n                self.map.dictionary[dict_pos as usize] = bucket.next;\n            }\n            PrevPos::Chained(bucket_pos) => {\n                self.map.buckets[bucket_pos as usize].next = bucket.next;\n            }\n            _ => unreachable!(),\n        }\n\n        // and add it to the freelist\n        let free = self.map.free_head;\n        let bucket = &mut self.map.buckets[self.bucket_pos as usize];\n        let old_value = bucket.inner.take();\n        bucket.next = free;\n        self.map.free_head = self.bucket_pos;\n        self.map.buckets_in_use -= 1;\n\n        old_value.unwrap().1\n    }\n}\n\n/// An abstract view into a vacant entry within the map.\npub struct VacantEntry<'a, 'b, K, V> {\n    /// Mutable reference to the map containing this entry.\n    pub(crate) map: RwLockWriteGuard<'b, CoreHashMap<'a, K, V>>,\n    /// The key to be inserted into this entry.\n    pub(crate) key: K,\n    /// The position within the dictionary corresponding to the key's hash.\n    pub(crate) dict_pos: u32,\n}\n\nimpl<'b, K: Clone + Hash + Eq, V> VacantEntry<'_, 'b, K, V> {\n    /// Insert a value into the vacant entry, finding and populating an empty bucket in the process.\n    ///\n    /// # Errors\n    /// Will return [`FullError`] if there are no unoccupied buckets in the map.\n    pub fn insert(mut self, value: V) -> Result<ValueWriteGuard<'b, V>, FullError> {\n        let pos = self.map.alloc_bucket(self.key, value)?;\n        self.map.buckets[pos as usize].next = self.map.dictionary[self.dict_pos as usize];\n        self.map.dictionary[self.dict_pos as usize] = pos;\n\n        Ok(RwLockWriteGuard::map(self.map, |m| {\n            &mut m.buckets[pos as usize].inner.as_mut().unwrap().1\n        }))\n    }\n}\n"
  },
  {
    "path": "libs/neon-shmem/src/hash/tests.rs",
    "content": "use std::collections::BTreeMap;\nuse std::collections::HashSet;\nuse std::fmt::Debug;\nuse std::mem::MaybeUninit;\n\nuse crate::hash::Entry;\nuse crate::hash::HashMapAccess;\nuse crate::hash::HashMapInit;\nuse crate::hash::core::FullError;\n\nuse rand::seq::SliceRandom;\nuse rand::{Rng, RngCore};\nuse rand_distr::Zipf;\n\nconst TEST_KEY_LEN: usize = 16;\n\n#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]\nstruct TestKey([u8; TEST_KEY_LEN]);\n\nimpl From<&TestKey> for u128 {\n    fn from(val: &TestKey) -> u128 {\n        u128::from_be_bytes(val.0)\n    }\n}\n\nimpl From<u128> for TestKey {\n    fn from(val: u128) -> TestKey {\n        TestKey(val.to_be_bytes())\n    }\n}\n\nimpl<'a> From<&'a [u8]> for TestKey {\n    fn from(bytes: &'a [u8]) -> TestKey {\n        TestKey(bytes.try_into().unwrap())\n    }\n}\n\nfn test_inserts<K: Into<TestKey> + Copy>(keys: &[K]) {\n    let w = HashMapInit::<TestKey, usize>::new_resizeable_named(100000, 120000, \"test_inserts\")\n        .attach_writer();\n\n    for (idx, k) in keys.iter().enumerate() {\n        let res = w.entry((*k).into());\n        match res {\n            Entry::Occupied(mut e) => {\n                e.insert(idx);\n            }\n            Entry::Vacant(e) => {\n                let res = e.insert(idx);\n                assert!(res.is_ok());\n            }\n        };\n    }\n\n    for (idx, k) in keys.iter().enumerate() {\n        let x = w.get(&(*k).into());\n        let value = x.as_deref().copied();\n        assert_eq!(value, Some(idx));\n    }\n}\n\n#[test]\nfn dense() {\n    // This exercises splitting a node with prefix\n    let keys: &[u128] = &[0, 1, 2, 3, 256];\n    test_inserts(keys);\n\n    // Dense keys\n    let mut keys: Vec<u128> = (0..10000).collect();\n    test_inserts(&keys);\n\n    // Do the same in random orders\n    for _ in 1..10 {\n        keys.shuffle(&mut rand::rng());\n        test_inserts(&keys);\n    }\n}\n\n#[test]\nfn sparse() {\n    // sparse keys\n    let mut keys: Vec<TestKey> = Vec::new();\n    let mut used_keys = HashSet::new();\n    for _ in 0..10000 {\n        loop {\n            let key = rand::random::<u128>();\n            if used_keys.contains(&key) {\n                continue;\n            }\n            used_keys.insert(key);\n            keys.push(key.into());\n            break;\n        }\n    }\n    test_inserts(&keys);\n}\n\n#[derive(Clone, Debug)]\nstruct TestOp(TestKey, Option<usize>);\n\nfn apply_op(\n    op: &TestOp,\n    map: &mut HashMapAccess<TestKey, usize>,\n    shadow: &mut BTreeMap<TestKey, usize>,\n) {\n    // apply the change to the shadow tree first\n    let shadow_existing = if let Some(v) = op.1 {\n        shadow.insert(op.0, v)\n    } else {\n        shadow.remove(&op.0)\n    };\n\n    let entry = map.entry(op.0);\n    let hash_existing = match op.1 {\n        Some(new) => match entry {\n            Entry::Occupied(mut e) => Some(e.insert(new)),\n            Entry::Vacant(e) => {\n                _ = e.insert(new).unwrap();\n                None\n            }\n        },\n        None => match entry {\n            Entry::Occupied(e) => Some(e.remove()),\n            Entry::Vacant(_) => None,\n        },\n    };\n\n    assert_eq!(shadow_existing, hash_existing);\n}\n\nfn do_random_ops(\n    num_ops: usize,\n    size: u32,\n    del_prob: f64,\n    writer: &mut HashMapAccess<TestKey, usize>,\n    shadow: &mut BTreeMap<TestKey, usize>,\n    rng: &mut rand::rngs::ThreadRng,\n) {\n    for i in 0..num_ops {\n        let key: TestKey = ((rng.next_u32() % size) as u128).into();\n        let op = TestOp(\n            key,\n            if rng.random_bool(del_prob) {\n                Some(i)\n            } else {\n                None\n            },\n        );\n        apply_op(&op, writer, shadow);\n    }\n}\n\nfn do_deletes(\n    num_ops: usize,\n    writer: &mut HashMapAccess<TestKey, usize>,\n    shadow: &mut BTreeMap<TestKey, usize>,\n) {\n    for _ in 0..num_ops {\n        let (k, _) = shadow.pop_first().unwrap();\n        writer.remove(&k);\n    }\n}\n\nfn do_shrink(\n    writer: &mut HashMapAccess<TestKey, usize>,\n    shadow: &mut BTreeMap<TestKey, usize>,\n    from: u32,\n    to: u32,\n) {\n    assert!(writer.shrink_goal().is_none());\n    writer.begin_shrink(to);\n    assert_eq!(writer.shrink_goal(), Some(to as usize));\n    for i in to..from {\n        if let Some(entry) = writer.entry_at_bucket(i as usize) {\n            shadow.remove(&entry._key);\n            entry.remove();\n        }\n    }\n    let old_usage = writer.get_num_buckets_in_use();\n    writer.finish_shrink().unwrap();\n    assert!(writer.shrink_goal().is_none());\n    assert_eq!(writer.get_num_buckets_in_use(), old_usage);\n}\n\n#[test]\nfn random_ops() {\n    let mut writer =\n        HashMapInit::<TestKey, usize>::new_resizeable_named(100000, 120000, \"test_random\")\n            .attach_writer();\n    let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();\n\n    let distribution = Zipf::new(u128::MAX as f64, 1.1).unwrap();\n    let mut rng = rand::rng();\n    for i in 0..100000 {\n        let key: TestKey = (rng.sample(distribution) as u128).into();\n\n        let op = TestOp(key, if rng.random_bool(0.75) { Some(i) } else { None });\n\n        apply_op(&op, &mut writer, &mut shadow);\n    }\n}\n\n#[test]\nfn test_shuffle() {\n    let mut writer = HashMapInit::<TestKey, usize>::new_resizeable_named(1000, 1200, \"test_shuf\")\n        .attach_writer();\n    let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();\n    let mut rng = rand::rng();\n\n    do_random_ops(10000, 1000, 0.75, &mut writer, &mut shadow, &mut rng);\n    writer.shuffle();\n    do_random_ops(10000, 1000, 0.75, &mut writer, &mut shadow, &mut rng);\n}\n\n#[test]\nfn test_grow() {\n    let mut writer = HashMapInit::<TestKey, usize>::new_resizeable_named(1000, 2000, \"test_grow\")\n        .attach_writer();\n    let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();\n    let mut rng = rand::rng();\n\n    do_random_ops(10000, 1000, 0.75, &mut writer, &mut shadow, &mut rng);\n    let old_usage = writer.get_num_buckets_in_use();\n    writer.grow(1500).unwrap();\n    assert_eq!(writer.get_num_buckets_in_use(), old_usage);\n    assert_eq!(writer.get_num_buckets(), 1500);\n    do_random_ops(10000, 1500, 0.75, &mut writer, &mut shadow, &mut rng);\n}\n\n#[test]\nfn test_clear() {\n    let mut writer = HashMapInit::<TestKey, usize>::new_resizeable_named(1500, 2000, \"test_clear\")\n        .attach_writer();\n    let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();\n    let mut rng = rand::rng();\n    do_random_ops(2000, 1500, 0.75, &mut writer, &mut shadow, &mut rng);\n    writer.clear();\n    assert_eq!(writer.get_num_buckets_in_use(), 0);\n    assert_eq!(writer.get_num_buckets(), 1500);\n    while let Some((key, _)) = shadow.pop_first() {\n        assert!(writer.get(&key).is_none());\n    }\n    do_random_ops(2000, 1500, 0.75, &mut writer, &mut shadow, &mut rng);\n    for i in 0..(1500 - writer.get_num_buckets_in_use()) {\n        writer.insert((1500 + i as u128).into(), 0).unwrap();\n    }\n    assert_eq!(writer.insert(5000.into(), 0), Err(FullError {}));\n    writer.clear();\n    assert!(writer.insert(5000.into(), 0).is_ok());\n}\n\n#[test]\nfn test_idx_remove() {\n    let mut writer = HashMapInit::<TestKey, usize>::new_resizeable_named(1500, 2000, \"test_clear\")\n        .attach_writer();\n    let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();\n    let mut rng = rand::rng();\n    do_random_ops(2000, 1500, 0.25, &mut writer, &mut shadow, &mut rng);\n    for _ in 0..100 {\n        let idx = (rng.next_u32() % 1500) as usize;\n        if let Some(e) = writer.entry_at_bucket(idx) {\n            shadow.remove(&e._key);\n            e.remove();\n        }\n    }\n    while let Some((key, val)) = shadow.pop_first() {\n        assert_eq!(*writer.get(&key).unwrap(), val);\n    }\n}\n\n#[test]\nfn test_idx_get() {\n    let mut writer = HashMapInit::<TestKey, usize>::new_resizeable_named(1500, 2000, \"test_clear\")\n        .attach_writer();\n    let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();\n    let mut rng = rand::rng();\n    do_random_ops(2000, 1500, 0.25, &mut writer, &mut shadow, &mut rng);\n    for _ in 0..100 {\n        let idx = (rng.next_u32() % 1500) as usize;\n        if let Some(pair) = writer.get_at_bucket(idx) {\n            {\n                let v: *const usize = &pair.1;\n                assert_eq!(writer.get_bucket_for_value(v), idx);\n            }\n            {\n                let v: *const usize = &pair.1;\n                assert_eq!(writer.get_bucket_for_value(v), idx);\n            }\n        }\n    }\n}\n\n#[test]\nfn test_shrink() {\n    let mut writer = HashMapInit::<TestKey, usize>::new_resizeable_named(1500, 2000, \"test_shrink\")\n        .attach_writer();\n    let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();\n    let mut rng = rand::rng();\n\n    do_random_ops(10000, 1500, 0.75, &mut writer, &mut shadow, &mut rng);\n    do_shrink(&mut writer, &mut shadow, 1500, 1000);\n    assert_eq!(writer.get_num_buckets(), 1000);\n    do_deletes(500, &mut writer, &mut shadow);\n    do_random_ops(10000, 500, 0.75, &mut writer, &mut shadow, &mut rng);\n    assert!(writer.get_num_buckets_in_use() <= 1000);\n}\n\n#[test]\nfn test_shrink_grow_seq() {\n    let mut writer =\n        HashMapInit::<TestKey, usize>::new_resizeable_named(1000, 20000, \"test_grow_seq\")\n            .attach_writer();\n    let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();\n    let mut rng = rand::rng();\n\n    do_random_ops(500, 1000, 0.1, &mut writer, &mut shadow, &mut rng);\n    eprintln!(\"Shrinking to 750\");\n    do_shrink(&mut writer, &mut shadow, 1000, 750);\n    do_random_ops(200, 1000, 0.5, &mut writer, &mut shadow, &mut rng);\n    eprintln!(\"Growing to 1500\");\n    writer.grow(1500).unwrap();\n    do_random_ops(600, 1500, 0.1, &mut writer, &mut shadow, &mut rng);\n    eprintln!(\"Shrinking to 200\");\n    while shadow.len() > 100 {\n        do_deletes(1, &mut writer, &mut shadow);\n    }\n    do_shrink(&mut writer, &mut shadow, 1500, 200);\n    do_random_ops(50, 1500, 0.25, &mut writer, &mut shadow, &mut rng);\n    eprintln!(\"Growing to 10k\");\n    writer.grow(10000).unwrap();\n    do_random_ops(10000, 5000, 0.25, &mut writer, &mut shadow, &mut rng);\n}\n\n#[test]\nfn test_bucket_ops() {\n    let writer = HashMapInit::<TestKey, usize>::new_resizeable_named(1000, 1200, \"test_bucket_ops\")\n        .attach_writer();\n    match writer.entry(1.into()) {\n        Entry::Occupied(mut e) => {\n            e.insert(2);\n        }\n        Entry::Vacant(e) => {\n            _ = e.insert(2).unwrap();\n        }\n    }\n    assert_eq!(writer.get_num_buckets_in_use(), 1);\n    assert_eq!(writer.get_num_buckets(), 1000);\n    assert_eq!(*writer.get(&1.into()).unwrap(), 2);\n    let pos = match writer.entry(1.into()) {\n        Entry::Occupied(e) => {\n            assert_eq!(e._key, 1.into());\n            e.bucket_pos as usize\n        }\n        Entry::Vacant(_) => {\n            panic!(\"Insert didn't affect entry\");\n        }\n    };\n    assert_eq!(writer.entry_at_bucket(pos).unwrap()._key, 1.into());\n    assert_eq!(*writer.get_at_bucket(pos).unwrap(), (1.into(), 2));\n    {\n        let ptr: *const usize = &*writer.get(&1.into()).unwrap();\n        assert_eq!(writer.get_bucket_for_value(ptr), pos);\n    }\n    writer.remove(&1.into());\n    assert!(writer.get(&1.into()).is_none());\n}\n\n#[test]\nfn test_shrink_zero() {\n    let mut writer =\n        HashMapInit::<TestKey, usize>::new_resizeable_named(1500, 2000, \"test_shrink_zero\")\n            .attach_writer();\n    writer.begin_shrink(0);\n    for i in 0..1500 {\n        writer.entry_at_bucket(i).map(|x| x.remove());\n    }\n    writer.finish_shrink().unwrap();\n    assert_eq!(writer.get_num_buckets_in_use(), 0);\n    let entry = writer.entry(1.into());\n    if let Entry::Vacant(v) = entry {\n        assert!(v.insert(2).is_err());\n    } else {\n        panic!(\"Somehow got non-vacant entry in empty map.\")\n    }\n    writer.grow(50).unwrap();\n    let entry = writer.entry(1.into());\n    if let Entry::Vacant(v) = entry {\n        assert!(v.insert(2).is_ok());\n    } else {\n        panic!(\"Somehow got non-vacant entry in empty map.\")\n    }\n    assert_eq!(writer.get_num_buckets_in_use(), 1);\n}\n\n#[test]\n#[should_panic]\nfn test_grow_oom() {\n    let writer = HashMapInit::<TestKey, usize>::new_resizeable_named(1500, 2000, \"test_grow_oom\")\n        .attach_writer();\n    writer.grow(20000).unwrap();\n}\n\n#[test]\n#[should_panic]\nfn test_shrink_bigger() {\n    let mut writer =\n        HashMapInit::<TestKey, usize>::new_resizeable_named(1500, 2500, \"test_shrink_bigger\")\n            .attach_writer();\n    writer.begin_shrink(2000);\n}\n\n#[test]\n#[should_panic]\nfn test_shrink_early_finish() {\n    let writer =\n        HashMapInit::<TestKey, usize>::new_resizeable_named(1500, 2500, \"test_shrink_early_finish\")\n            .attach_writer();\n    writer.finish_shrink().unwrap();\n}\n\n#[test]\n#[should_panic]\nfn test_shrink_fixed_size() {\n    let mut area = [MaybeUninit::uninit(); 10000];\n    let init_struct = HashMapInit::<TestKey, usize>::with_fixed(3, &mut area);\n    let mut writer = init_struct.attach_writer();\n    writer.begin_shrink(1);\n}\n"
  },
  {
    "path": "libs/neon-shmem/src/hash.rs",
    "content": "//! Resizable hash table implementation on top of byte-level storage (either a [`ShmemHandle`] or a fixed byte array).\n//!\n//! This hash table has two major components: the bucket array and the dictionary. Each bucket within the\n//! bucket array contains a `Option<(K, V)>` and an index of another bucket. In this way there is both an\n//! implicit freelist within the bucket array (`None` buckets point to other `None` entries) and various hash\n//! chains within the bucket array (a Some bucket will point to other Some buckets that had the same hash).\n//!\n//! Buckets are never moved unless they are within a region that is being shrunk, and so the actual hash-\n//! dependent component is done with the dictionary. When a new key is inserted into the map, a position\n//! within the dictionary is decided based on its hash, the data is inserted into an empty bucket based\n//! off of the freelist, and then the index of said bucket is placed in the dictionary.\n//!\n//! This map is resizable (if initialized on top of a [`ShmemHandle`]). Both growing and shrinking happen\n//! in-place and are at a high level achieved by expanding/reducing the bucket array and rebuilding the\n//! dictionary by rehashing all keys.\n//!\n//! Concurrency is managed very simply: the entire map is guarded by one shared-memory RwLock.\n\nuse std::hash::{BuildHasher, Hash};\nuse std::mem::MaybeUninit;\n\nuse crate::shmem::ShmemHandle;\nuse crate::{shmem, sync::*};\n\nmod core;\npub mod entry;\n\n#[cfg(test)]\nmod tests;\n\nuse core::{Bucket, CoreHashMap, INVALID_POS};\nuse entry::{Entry, OccupiedEntry, PrevPos, VacantEntry};\n\nuse thiserror::Error;\n\n/// Error type for a hashmap shrink operation.\n#[derive(Error, Debug)]\npub enum HashMapShrinkError {\n    /// There was an error encountered while resizing the memory area.\n    #[error(\"shmem resize failed: {0}\")]\n    ResizeError(shmem::Error),\n    /// Occupied entries in to-be-shrunk space were encountered beginning at the given index.\n    #[error(\"occupied entry in deallocated space found at {0}\")]\n    RemainingEntries(usize),\n}\n\n/// This represents a hash table that (possibly) lives in shared memory.\n/// If a new process is launched with fork(), the child process inherits\n/// this struct.\n#[must_use]\npub struct HashMapInit<'a, K, V, S = rustc_hash::FxBuildHasher> {\n    shmem_handle: Option<ShmemHandle>,\n    shared_ptr: *mut HashMapShared<'a, K, V>,\n    shared_size: usize,\n    hasher: S,\n    num_buckets: u32,\n}\n\n/// This is a per-process handle to a hash table that (possibly) lives in shared memory.\n/// If a child process is launched with fork(), the child process should\n/// get its own HashMapAccess by calling HashMapInit::attach_writer/reader().\n///\n/// XXX: We're not making use of it at the moment, but this struct could\n/// hold process-local information in the future.\npub struct HashMapAccess<'a, K, V, S = rustc_hash::FxBuildHasher> {\n    shmem_handle: Option<ShmemHandle>,\n    shared_ptr: *mut HashMapShared<'a, K, V>,\n    hasher: S,\n}\n\nunsafe impl<K: Sync, V: Sync, S> Sync for HashMapAccess<'_, K, V, S> {}\nunsafe impl<K: Send, V: Send, S> Send for HashMapAccess<'_, K, V, S> {}\n\nimpl<'a, K: Clone + Hash + Eq, V, S> HashMapInit<'a, K, V, S> {\n    /// Change the 'hasher' used by the hash table.\n    ///\n    /// NOTE: This must be called right after creating the hash table,\n    /// before inserting any entries and before calling attach_writer/reader.\n    /// Otherwise different accessors could be using different hash function,\n    /// with confusing results.\n    pub fn with_hasher<T: BuildHasher>(self, hasher: T) -> HashMapInit<'a, K, V, T> {\n        HashMapInit {\n            hasher,\n            shmem_handle: self.shmem_handle,\n            shared_ptr: self.shared_ptr,\n            shared_size: self.shared_size,\n            num_buckets: self.num_buckets,\n        }\n    }\n\n    /// Loosely (over)estimate the size needed to store a hash table with `num_buckets` buckets.\n    pub fn estimate_size(num_buckets: u32) -> usize {\n        // add some margin to cover alignment etc.\n        CoreHashMap::<K, V>::estimate_size(num_buckets) + size_of::<HashMapShared<K, V>>() + 1000\n    }\n\n    fn new(\n        num_buckets: u32,\n        shmem_handle: Option<ShmemHandle>,\n        area_ptr: *mut u8,\n        area_size: usize,\n        hasher: S,\n    ) -> Self {\n        let mut ptr: *mut u8 = area_ptr;\n        let end_ptr: *mut u8 = unsafe { ptr.add(area_size) };\n\n        // carve out area for the One Big Lock (TM) and the HashMapShared.\n        ptr = unsafe { ptr.add(ptr.align_offset(align_of::<libc::pthread_rwlock_t>())) };\n        let raw_lock_ptr = ptr;\n        ptr = unsafe { ptr.add(size_of::<libc::pthread_rwlock_t>()) };\n        ptr = unsafe { ptr.add(ptr.align_offset(align_of::<HashMapShared<K, V>>())) };\n        let shared_ptr: *mut HashMapShared<K, V> = ptr.cast();\n        ptr = unsafe { ptr.add(size_of::<HashMapShared<K, V>>()) };\n\n        // carve out the buckets\n        ptr = unsafe { ptr.byte_add(ptr.align_offset(align_of::<core::Bucket<K, V>>())) };\n        let buckets_ptr = ptr;\n        ptr = unsafe { ptr.add(size_of::<core::Bucket<K, V>>() * num_buckets as usize) };\n\n        // use remaining space for the dictionary\n        ptr = unsafe { ptr.byte_add(ptr.align_offset(align_of::<u32>())) };\n        assert!(ptr.addr() < end_ptr.addr());\n        let dictionary_ptr = ptr;\n        let dictionary_size = unsafe { end_ptr.byte_offset_from(ptr) / size_of::<u32>() as isize };\n        assert!(dictionary_size > 0);\n\n        let buckets =\n            unsafe { std::slice::from_raw_parts_mut(buckets_ptr.cast(), num_buckets as usize) };\n        let dictionary = unsafe {\n            std::slice::from_raw_parts_mut(dictionary_ptr.cast(), dictionary_size as usize)\n        };\n\n        let hashmap = CoreHashMap::new(buckets, dictionary);\n        unsafe {\n            let lock = RwLock::from_raw(PthreadRwLock::new(raw_lock_ptr.cast()), hashmap);\n            std::ptr::write(shared_ptr, lock);\n        }\n\n        Self {\n            num_buckets,\n            shmem_handle,\n            shared_ptr,\n            shared_size: area_size,\n            hasher,\n        }\n    }\n\n    /// Attach to a hash table for writing.\n    pub fn attach_writer(self) -> HashMapAccess<'a, K, V, S> {\n        HashMapAccess {\n            shmem_handle: self.shmem_handle,\n            shared_ptr: self.shared_ptr,\n            hasher: self.hasher,\n        }\n    }\n\n    /// Initialize a table for reading. Currently identical to [`HashMapInit::attach_writer`].\n    ///\n    /// This is a holdover from a previous implementation and is being kept around for\n    /// backwards compatibility reasons.\n    pub fn attach_reader(self) -> HashMapAccess<'a, K, V, S> {\n        self.attach_writer()\n    }\n}\n\n/// Hash table data that is actually stored in the shared memory area.\n///\n/// NOTE: We carve out the parts from a contiguous chunk. Growing and shrinking the hash table\n/// relies on the memory layout! The data structures are laid out in the contiguous shared memory\n/// area as follows:\n///\n/// [`libc::pthread_rwlock_t`]\n/// [`HashMapShared`]\n/// buckets\n/// dictionary\n///\n/// In between the above parts, there can be padding bytes to align the parts correctly.\ntype HashMapShared<'a, K, V> = RwLock<CoreHashMap<'a, K, V>>;\n\nimpl<'a, K, V> HashMapInit<'a, K, V, rustc_hash::FxBuildHasher>\nwhere\n    K: Clone + Hash + Eq,\n{\n    /// Place the hash table within a user-supplied fixed memory area.\n    pub fn with_fixed(num_buckets: u32, area: &'a mut [MaybeUninit<u8>]) -> Self {\n        Self::new(\n            num_buckets,\n            None,\n            area.as_mut_ptr().cast(),\n            area.len(),\n            rustc_hash::FxBuildHasher,\n        )\n    }\n\n    /// Place a new hash map in the given shared memory area\n    ///\n    /// # Panics\n    /// Will panic on failure to resize area to expected map size.\n    pub fn with_shmem(num_buckets: u32, shmem: ShmemHandle) -> Self {\n        let size = Self::estimate_size(num_buckets);\n        shmem\n            .set_size(size)\n            .expect(\"could not resize shared memory area\");\n        let ptr = shmem.data_ptr.as_ptr().cast();\n        Self::new(\n            num_buckets,\n            Some(shmem),\n            ptr,\n            size,\n            rustc_hash::FxBuildHasher,\n        )\n    }\n\n    /// Make a resizable hash map within a new shared memory area with the given name.\n    pub fn new_resizeable_named(num_buckets: u32, max_buckets: u32, name: &str) -> Self {\n        let size = Self::estimate_size(num_buckets);\n        let max_size = Self::estimate_size(max_buckets);\n        let shmem =\n            ShmemHandle::new(name, size, max_size).expect(\"failed to make shared memory area\");\n        let ptr = shmem.data_ptr.as_ptr().cast();\n\n        Self::new(\n            num_buckets,\n            Some(shmem),\n            ptr,\n            size,\n            rustc_hash::FxBuildHasher,\n        )\n    }\n\n    /// Make a resizable hash map within a new anonymous shared memory area.\n    pub fn new_resizeable(num_buckets: u32, max_buckets: u32) -> Self {\n        use std::sync::atomic::{AtomicUsize, Ordering};\n        static COUNTER: AtomicUsize = AtomicUsize::new(0);\n        let val = COUNTER.fetch_add(1, Ordering::Relaxed);\n        let name = format!(\"neon_shmem_hmap{val}\");\n        Self::new_resizeable_named(num_buckets, max_buckets, &name)\n    }\n}\n\nimpl<'a, K, V, S: BuildHasher> HashMapAccess<'a, K, V, S>\nwhere\n    K: Clone + Hash + Eq,\n{\n    /// Hash a key using the map's hasher.\n    #[inline]\n    fn get_hash_value(&self, key: &K) -> u64 {\n        self.hasher.hash_one(key)\n    }\n\n    fn entry_with_hash(&self, key: K, hash: u64) -> Entry<'a, '_, K, V> {\n        let mut map = unsafe { self.shared_ptr.as_ref() }.unwrap().write();\n        let dict_pos = hash as usize % map.dictionary.len();\n        let first = map.dictionary[dict_pos];\n        if first == INVALID_POS {\n            // no existing entry\n            return Entry::Vacant(VacantEntry {\n                map,\n                key,\n                dict_pos: dict_pos as u32,\n            });\n        }\n\n        let mut prev_pos = PrevPos::First(dict_pos as u32);\n        let mut next = first;\n        loop {\n            let bucket = &mut map.buckets[next as usize];\n            let (bucket_key, _bucket_value) = bucket.inner.as_mut().expect(\"entry is in use\");\n            if *bucket_key == key {\n                // found existing entry\n                return Entry::Occupied(OccupiedEntry {\n                    map,\n                    _key: key,\n                    prev_pos,\n                    bucket_pos: next,\n                });\n            }\n\n            if bucket.next == INVALID_POS {\n                // No existing entry\n                return Entry::Vacant(VacantEntry {\n                    map,\n                    key,\n                    dict_pos: dict_pos as u32,\n                });\n            }\n            prev_pos = PrevPos::Chained(next);\n            next = bucket.next;\n        }\n    }\n\n    /// Get a reference to the corresponding value for a key.\n    pub fn get<'e>(&'e self, key: &K) -> Option<ValueReadGuard<'e, V>> {\n        let hash = self.get_hash_value(key);\n        let map = unsafe { self.shared_ptr.as_ref() }.unwrap().read();\n        RwLockReadGuard::try_map(map, |m| m.get_with_hash(key, hash)).ok()\n    }\n\n    /// Get a reference to the entry containing a key.\n    ///\n    /// NB: THis takes a write lock as there's no way to distinguish whether the intention\n    /// is to use the entry for reading or for writing in advance.\n    pub fn entry(&self, key: K) -> Entry<'a, '_, K, V> {\n        let hash = self.get_hash_value(&key);\n        self.entry_with_hash(key, hash)\n    }\n\n    /// Remove a key given its hash. Returns the associated value if it existed.\n    pub fn remove(&self, key: &K) -> Option<V> {\n        let hash = self.get_hash_value(key);\n        match self.entry_with_hash(key.clone(), hash) {\n            Entry::Occupied(e) => Some(e.remove()),\n            Entry::Vacant(_) => None,\n        }\n    }\n\n    /// Insert/update a key. Returns the previous associated value if it existed.\n    ///\n    /// # Errors\n    /// Will return [`core::FullError`] if there is no more space left in the map.\n    pub fn insert(&self, key: K, value: V) -> Result<Option<V>, core::FullError> {\n        let hash = self.get_hash_value(&key);\n        match self.entry_with_hash(key.clone(), hash) {\n            Entry::Occupied(mut e) => Ok(Some(e.insert(value))),\n            Entry::Vacant(e) => {\n                _ = e.insert(value)?;\n                Ok(None)\n            }\n        }\n    }\n\n    /// Optionally return the entry for a bucket at a given index if it exists.\n    ///\n    /// Has more overhead than one would intuitively expect: performs both a clone of the key\n    /// due to the [`OccupiedEntry`] type owning the key and also a hash of the key in order\n    /// to enable repairing the hash chain if the entry is removed.\n    pub fn entry_at_bucket(&self, pos: usize) -> Option<OccupiedEntry<'a, '_, K, V>> {\n        let map = unsafe { self.shared_ptr.as_mut() }.unwrap().write();\n        if pos >= map.buckets.len() {\n            return None;\n        }\n\n        let entry = map.buckets[pos].inner.as_ref();\n        match entry {\n            Some((key, _)) => Some(OccupiedEntry {\n                _key: key.clone(),\n                bucket_pos: pos as u32,\n                prev_pos: entry::PrevPos::Unknown(self.get_hash_value(key)),\n                map,\n            }),\n            _ => None,\n        }\n    }\n\n    /// Returns the number of buckets in the table.\n    pub fn get_num_buckets(&self) -> usize {\n        let map = unsafe { self.shared_ptr.as_ref() }.unwrap().read();\n        map.get_num_buckets()\n    }\n\n    /// Return the key and value stored in bucket with given index. This can be used to\n    /// iterate through the hash map.\n    // TODO: An Iterator might be nicer. The communicator's clock algorithm needs to\n    // _slowly_ iterate through all buckets with its clock hand,  without holding a lock.\n    // If we switch to an Iterator, it must not hold the lock.\n    pub fn get_at_bucket(&self, pos: usize) -> Option<ValueReadGuard<'_, (K, V)>> {\n        let map = unsafe { self.shared_ptr.as_ref() }.unwrap().read();\n        if pos >= map.buckets.len() {\n            return None;\n        }\n        RwLockReadGuard::try_map(map, |m| m.buckets[pos].inner.as_ref()).ok()\n    }\n\n    /// Returns the index of the bucket a given value corresponds to.\n    pub fn get_bucket_for_value(&self, val_ptr: *const V) -> usize {\n        let map = unsafe { self.shared_ptr.as_ref() }.unwrap().read();\n\n        let origin = map.buckets.as_ptr();\n        let idx = (val_ptr as usize - origin as usize) / size_of::<Bucket<K, V>>();\n        assert!(idx < map.buckets.len());\n\n        idx\n    }\n\n    /// Returns the number of occupied buckets in the table.\n    pub fn get_num_buckets_in_use(&self) -> usize {\n        let map = unsafe { self.shared_ptr.as_ref() }.unwrap().read();\n        map.buckets_in_use as usize\n    }\n\n    /// Clears all entries in a table. Does not reset any shrinking operations.\n    pub fn clear(&self) {\n        let mut map = unsafe { self.shared_ptr.as_mut() }.unwrap().write();\n        map.clear();\n    }\n\n    /// Perform an in-place rehash of some region (0..`rehash_buckets`) of the table and reset\n    /// the `buckets` and `dictionary` slices to be as long as `num_buckets`. Resets the freelist\n    /// in the process.\n    fn rehash_dict(\n        &self,\n        inner: &mut CoreHashMap<'a, K, V>,\n        buckets_ptr: *mut core::Bucket<K, V>,\n        end_ptr: *mut u8,\n        num_buckets: u32,\n        rehash_buckets: u32,\n    ) {\n        inner.free_head = INVALID_POS;\n\n        let buckets;\n        let dictionary;\n        unsafe {\n            let buckets_end_ptr = buckets_ptr.add(num_buckets as usize);\n            let dictionary_ptr: *mut u32 = buckets_end_ptr\n                .byte_add(buckets_end_ptr.align_offset(align_of::<u32>()))\n                .cast();\n            let dictionary_size: usize =\n                end_ptr.byte_offset_from(buckets_end_ptr) as usize / size_of::<u32>();\n\n            buckets = std::slice::from_raw_parts_mut(buckets_ptr, num_buckets as usize);\n            dictionary = std::slice::from_raw_parts_mut(dictionary_ptr, dictionary_size);\n        }\n        for e in dictionary.iter_mut() {\n            *e = INVALID_POS;\n        }\n\n        for (i, bucket) in buckets.iter_mut().enumerate().take(rehash_buckets as usize) {\n            if bucket.inner.is_none() {\n                bucket.next = inner.free_head;\n                inner.free_head = i as u32;\n                continue;\n            }\n\n            let hash = self.hasher.hash_one(&bucket.inner.as_ref().unwrap().0);\n            let pos: usize = (hash % dictionary.len() as u64) as usize;\n            bucket.next = dictionary[pos];\n            dictionary[pos] = i as u32;\n        }\n\n        inner.dictionary = dictionary;\n        inner.buckets = buckets;\n    }\n\n    /// Rehash the map without growing or shrinking.\n    pub fn shuffle(&self) {\n        let mut map = unsafe { self.shared_ptr.as_mut() }.unwrap().write();\n        let num_buckets = map.get_num_buckets() as u32;\n        let size_bytes = HashMapInit::<K, V, S>::estimate_size(num_buckets);\n        let end_ptr: *mut u8 = unsafe { self.shared_ptr.byte_add(size_bytes).cast() };\n        let buckets_ptr = map.buckets.as_mut_ptr();\n        self.rehash_dict(&mut map, buckets_ptr, end_ptr, num_buckets, num_buckets);\n    }\n\n    /// Grow the number of buckets within the table.\n    ///\n    /// 1. Grows the underlying shared memory area\n    /// 2. Initializes new buckets and overwrites the current dictionary\n    /// 3. Rehashes the dictionary\n    ///\n    /// # Panics\n    /// Panics if called on a map initialized with [`HashMapInit::with_fixed`].\n    ///\n    /// # Errors\n    /// Returns an [`shmem::Error`] if any errors occur resizing the memory region.\n    pub fn grow(&self, num_buckets: u32) -> Result<(), shmem::Error> {\n        let mut map = unsafe { self.shared_ptr.as_mut() }.unwrap().write();\n        let old_num_buckets = map.buckets.len() as u32;\n\n        assert!(\n            num_buckets >= old_num_buckets,\n            \"grow called with a smaller number of buckets\"\n        );\n        if num_buckets == old_num_buckets {\n            return Ok(());\n        }\n        let shmem_handle = self\n            .shmem_handle\n            .as_ref()\n            .expect(\"grow called on a fixed-size hash table\");\n\n        let size_bytes = HashMapInit::<K, V, S>::estimate_size(num_buckets);\n        shmem_handle.set_size(size_bytes)?;\n        let end_ptr: *mut u8 = unsafe { shmem_handle.data_ptr.as_ptr().add(size_bytes) };\n\n        // Initialize new buckets. The new buckets are linked to the free list.\n        // NB: This overwrites the dictionary!\n        let buckets_ptr = map.buckets.as_mut_ptr();\n        unsafe {\n            for i in old_num_buckets..num_buckets {\n                let bucket = buckets_ptr.add(i as usize);\n                bucket.write(core::Bucket {\n                    next: if i < num_buckets - 1 {\n                        i + 1\n                    } else {\n                        map.free_head\n                    },\n                    inner: None,\n                });\n            }\n        }\n\n        self.rehash_dict(&mut map, buckets_ptr, end_ptr, num_buckets, old_num_buckets);\n        map.free_head = old_num_buckets;\n\n        Ok(())\n    }\n\n    /// Begin a shrink, limiting all new allocations to be in buckets with index below `num_buckets`.\n    ///\n    /// # Panics\n    /// Panics if called on a map initialized with [`HashMapInit::with_fixed`] or if `num_buckets` is\n    /// greater than the number of buckets in the map.\n    pub fn begin_shrink(&mut self, num_buckets: u32) {\n        let mut map = unsafe { self.shared_ptr.as_mut() }.unwrap().write();\n        assert!(\n            num_buckets <= map.get_num_buckets() as u32,\n            \"shrink called with a larger number of buckets\"\n        );\n        _ = self\n            .shmem_handle\n            .as_ref()\n            .expect(\"shrink called on a fixed-size hash table\");\n        map.alloc_limit = num_buckets;\n    }\n\n    /// If a shrink operation is underway, returns the target size of the map. Otherwise, returns None.\n    pub fn shrink_goal(&self) -> Option<usize> {\n        let map = unsafe { self.shared_ptr.as_mut() }.unwrap().read();\n        let goal = map.alloc_limit;\n        if goal == INVALID_POS {\n            None\n        } else {\n            Some(goal as usize)\n        }\n    }\n\n    /// Complete a shrink after caller has evicted entries, removing the unused buckets and rehashing.\n    ///\n    /// # Panics\n    /// The following cases result in a panic:\n    /// - Calling this function on a map initialized with [`HashMapInit::with_fixed`].\n    /// - Calling this function on a map when no shrink operation is in progress.\n    pub fn finish_shrink(&self) -> Result<(), HashMapShrinkError> {\n        let mut map = unsafe { self.shared_ptr.as_mut() }.unwrap().write();\n        assert!(\n            map.alloc_limit != INVALID_POS,\n            \"called finish_shrink when no shrink is in progress\"\n        );\n\n        let num_buckets = map.alloc_limit;\n\n        if map.get_num_buckets() == num_buckets as usize {\n            return Ok(());\n        }\n\n        assert!(\n            map.buckets_in_use <= num_buckets,\n            \"called finish_shrink before enough entries were removed\"\n        );\n\n        for i in (num_buckets as usize)..map.buckets.len() {\n            if map.buckets[i].inner.is_some() {\n                return Err(HashMapShrinkError::RemainingEntries(i));\n            }\n        }\n\n        let shmem_handle = self\n            .shmem_handle\n            .as_ref()\n            .expect(\"shrink called on a fixed-size hash table\");\n\n        let size_bytes = HashMapInit::<K, V, S>::estimate_size(num_buckets);\n        if let Err(e) = shmem_handle.set_size(size_bytes) {\n            return Err(HashMapShrinkError::ResizeError(e));\n        }\n        let end_ptr: *mut u8 = unsafe { shmem_handle.data_ptr.as_ptr().add(size_bytes) };\n        let buckets_ptr = map.buckets.as_mut_ptr();\n        self.rehash_dict(&mut map, buckets_ptr, end_ptr, num_buckets, num_buckets);\n        map.alloc_limit = INVALID_POS;\n\n        Ok(())\n    }\n}\n"
  },
  {
    "path": "libs/neon-shmem/src/lib.rs",
    "content": "pub mod hash;\npub mod shmem;\npub mod sync;\n"
  },
  {
    "path": "libs/neon-shmem/src/shmem.rs",
    "content": "//! Dynamically resizable contiguous chunk of shared memory\n\nuse std::num::NonZeroUsize;\nuse std::os::fd::{AsFd, BorrowedFd, OwnedFd};\nuse std::ptr::NonNull;\nuse std::sync::atomic::{AtomicUsize, Ordering};\n\nuse nix::errno::Errno;\nuse nix::sys::mman::MapFlags;\nuse nix::sys::mman::ProtFlags;\nuse nix::sys::mman::mmap as nix_mmap;\nuse nix::sys::mman::munmap as nix_munmap;\nuse nix::unistd::ftruncate as nix_ftruncate;\n\n/// `ShmemHandle` represents a shared memory area that can be shared by processes over `fork()`.\n/// Unlike shared memory allocated by Postgres, this area is resizable, up to `max_size` that's\n/// specified at creation.\n///\n/// The area is backed by an anonymous file created with `memfd_create()`. The full address space for\n/// `max_size` is reserved up-front with `mmap()`, but whenever you call [`ShmemHandle::set_size`],\n/// the underlying file is resized. Do not access the area beyond the current size. Currently, that\n/// will cause the file to be expanded, but we might use `mprotect()` etc. to enforce that in the\n/// future.\npub struct ShmemHandle {\n    /// memfd file descriptor\n    fd: OwnedFd,\n\n    max_size: usize,\n\n    // Pointer to the beginning of the shared memory area. The header is stored there.\n    shared_ptr: NonNull<SharedStruct>,\n\n    // Pointer to the beginning of the user data\n    pub data_ptr: NonNull<u8>,\n}\n\n/// This is stored at the beginning in the shared memory area.\nstruct SharedStruct {\n    max_size: usize,\n\n    /// Current size of the backing file. The high-order bit is used for the [`RESIZE_IN_PROGRESS`] flag.\n    current_size: AtomicUsize,\n}\n\nconst RESIZE_IN_PROGRESS: usize = 1 << 63;\n\nconst HEADER_SIZE: usize = std::mem::size_of::<SharedStruct>();\n\n/// Error type returned by the [`ShmemHandle`] functions.\n#[derive(thiserror::Error, Debug)]\n#[error(\"{msg}: {errno}\")]\npub struct Error {\n    pub msg: String,\n    pub errno: Errno,\n}\n\nimpl Error {\n    fn new(msg: &str, errno: Errno) -> Self {\n        Self {\n            msg: msg.to_string(),\n            errno,\n        }\n    }\n}\n\nimpl ShmemHandle {\n    /// Create a new shared memory area. To communicate between processes, the processes need to be\n    /// `fork()`'d after calling this, so that the `ShmemHandle` is inherited by all processes.\n    ///\n    /// If the `ShmemHandle` is dropped, the memory is unmapped from the current process. Other\n    /// processes can continue using it, however.\n    pub fn new(name: &str, initial_size: usize, max_size: usize) -> Result<Self, Error> {\n        // create the backing anonymous file.\n        let fd = create_backing_file(name)?;\n\n        Self::new_with_fd(fd, initial_size, max_size)\n    }\n\n    fn new_with_fd(fd: OwnedFd, initial_size: usize, max_size: usize) -> Result<Self, Error> {\n        // We reserve the high-order bit for the `RESIZE_IN_PROGRESS` flag, and the actual size\n        // is a little larger than this because of the SharedStruct header. Make the upper limit\n        // somewhat smaller than that, because with anything close to that, you'll run out of\n        // memory anyway.\n        assert!(max_size < 1 << 48, \"max size {max_size} too large\");\n\n        assert!(\n            initial_size <= max_size,\n            \"initial size {initial_size} larger than max size {max_size}\"\n        );\n\n        // The actual initial / max size is the one given by the caller, plus the size of\n        // 'SharedStruct'.\n        let initial_size = HEADER_SIZE + initial_size;\n        let max_size = NonZeroUsize::new(HEADER_SIZE + max_size).unwrap();\n\n        // Reserve address space for it with mmap\n        //\n        // TODO: Use MAP_HUGETLB if possible\n        let start_ptr = unsafe {\n            nix_mmap(\n                None,\n                max_size,\n                ProtFlags::PROT_READ | ProtFlags::PROT_WRITE,\n                MapFlags::MAP_SHARED,\n                &fd,\n                0,\n            )\n        }\n        .map_err(|e| Error::new(\"mmap failed\", e))?;\n\n        // Reserve space for the initial size\n        enlarge_file(fd.as_fd(), initial_size as u64)?;\n\n        // Initialize the header\n        let shared: NonNull<SharedStruct> = start_ptr.cast();\n        unsafe {\n            shared.write(SharedStruct {\n                max_size: max_size.into(),\n                current_size: AtomicUsize::new(initial_size),\n            });\n        }\n\n        // The user data begins after the header\n        let data_ptr = unsafe { start_ptr.cast().add(HEADER_SIZE) };\n\n        Ok(Self {\n            fd,\n            max_size: max_size.into(),\n            shared_ptr: shared,\n            data_ptr,\n        })\n    }\n\n    // return reference to the header\n    fn shared(&self) -> &SharedStruct {\n        unsafe { self.shared_ptr.as_ref() }\n    }\n\n    /// Resize the shared memory area. `new_size` must not be larger than the `max_size` specified\n    /// when creating the area.\n    ///\n    /// This may only be called from one process/thread concurrently. We detect that case\n    /// and return an [`shmem::Error`](Error).\n    pub fn set_size(&self, new_size: usize) -> Result<(), Error> {\n        let new_size = new_size + HEADER_SIZE;\n        let shared = self.shared();\n\n        assert!(\n            new_size <= self.max_size,\n            \"new size ({new_size}) is greater than max size ({})\",\n            self.max_size\n        );\n\n        assert_eq!(self.max_size, shared.max_size);\n\n        // Lock the area by setting the bit in `current_size`\n        //\n        // Ordering::Relaxed would probably be sufficient here, as we don't access any other memory\n        // and the `posix_fallocate`/`ftruncate` call is surely a synchronization point anyway. But\n        // since this is not performance-critical, better safe than sorry.\n        let mut old_size = shared.current_size.load(Ordering::Acquire);\n        loop {\n            if (old_size & RESIZE_IN_PROGRESS) != 0 {\n                return Err(Error::new(\n                    \"concurrent resize detected\",\n                    Errno::UnknownErrno,\n                ));\n            }\n            match shared.current_size.compare_exchange(\n                old_size,\n                new_size,\n                Ordering::Acquire,\n                Ordering::Relaxed,\n            ) {\n                Ok(_) => break,\n                Err(x) => old_size = x,\n            }\n        }\n\n        // Ok, we got the lock.\n        //\n        // NB: If anything goes wrong, we *must* clear the bit!\n        let result = {\n            use std::cmp::Ordering::{Equal, Greater, Less};\n            match new_size.cmp(&old_size) {\n                Less => nix_ftruncate(&self.fd, new_size as i64)\n                    .map_err(|e| Error::new(\"could not shrink shmem segment, ftruncate failed\", e)),\n                Equal => Ok(()),\n                Greater => enlarge_file(self.fd.as_fd(), new_size as u64),\n            }\n        };\n\n        // Unlock\n        shared.current_size.store(\n            if result.is_ok() { new_size } else { old_size },\n            Ordering::Release,\n        );\n\n        result\n    }\n\n    /// Returns the current user-visible size of the shared memory segment.\n    ///\n    /// NOTE: a concurrent [`ShmemHandle::set_size()`] call can change the size at any time.\n    /// It is the caller's responsibility not to access the area beyond the current size.\n    pub fn current_size(&self) -> usize {\n        let total_current_size =\n            self.shared().current_size.load(Ordering::Relaxed) & !RESIZE_IN_PROGRESS;\n        total_current_size - HEADER_SIZE\n    }\n}\n\nimpl Drop for ShmemHandle {\n    fn drop(&mut self) {\n        // SAFETY: The pointer was obtained from mmap() with the given size.\n        // We unmap the entire region.\n        let _ = unsafe { nix_munmap(self.shared_ptr.cast(), self.max_size) };\n        // The fd is dropped automatically by OwnedFd.\n    }\n}\n\n/// Create a \"backing file\" for the shared memory area. On Linux, use `memfd_create()`, to create an\n/// anonymous in-memory file. One macos, fall back to a regular file. That's good enough for\n/// development and testing, but in production we want the file to stay in memory.\n///\n/// Disable unused variables warnings because `name` is unused in the macos path.\n#[allow(unused_variables)]\nfn create_backing_file(name: &str) -> Result<OwnedFd, Error> {\n    #[cfg(not(target_os = \"macos\"))]\n    {\n        nix::sys::memfd::memfd_create(name, nix::sys::memfd::MFdFlags::empty())\n            .map_err(|e| Error::new(\"memfd_create failed\", e))\n    }\n    #[cfg(target_os = \"macos\")]\n    {\n        let file = tempfile::tempfile().map_err(|e| {\n            Error::new(\n                \"could not create temporary file to back shmem area\",\n                nix::errno::Errno::from_raw(e.raw_os_error().unwrap_or(0)),\n            )\n        })?;\n        Ok(OwnedFd::from(file))\n    }\n}\n\nfn enlarge_file(fd: BorrowedFd, size: u64) -> Result<(), Error> {\n    // Use posix_fallocate() to enlarge the file. It reserves the space correctly, so that\n    // we don't get a segfault later when trying to actually use it.\n    #[cfg(not(target_os = \"macos\"))]\n    {\n        nix::fcntl::posix_fallocate(fd, 0, size as i64)\n            .map_err(|e| Error::new(\"could not grow shmem segment, posix_fallocate failed\", e))\n    }\n    // As a fallback on macos, which doesn't have posix_fallocate, use plain 'fallocate'\n    #[cfg(target_os = \"macos\")]\n    {\n        nix::unistd::ftruncate(fd, size as i64)\n            .map_err(|e| Error::new(\"could not grow shmem segment, ftruncate failed\", e))\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use super::*;\n\n    use nix::unistd::ForkResult;\n    use std::ops::Range;\n\n    /// check that all bytes in given range have the expected value.\n    fn assert_range(ptr: *const u8, expected: u8, range: Range<usize>) {\n        for i in range {\n            let b = unsafe { *(ptr.add(i)) };\n            assert_eq!(expected, b, \"unexpected byte at offset {i}\");\n        }\n    }\n\n    /// Write 'b' to all bytes in the given range\n    fn write_range(ptr: *mut u8, b: u8, range: Range<usize>) {\n        unsafe { std::ptr::write_bytes(ptr.add(range.start), b, range.end - range.start) };\n    }\n\n    // simple single-process test of growing and shrinking\n    #[test]\n    fn test_shmem_resize() -> Result<(), Error> {\n        let max_size = 1024 * 1024;\n        let init_struct = ShmemHandle::new(\"test_shmem_resize\", 0, max_size)?;\n\n        assert_eq!(init_struct.current_size(), 0);\n\n        // Initial grow\n        let size1 = 10000;\n        init_struct.set_size(size1).unwrap();\n        assert_eq!(init_struct.current_size(), size1);\n\n        // Write some data\n        let data_ptr = init_struct.data_ptr.as_ptr();\n        write_range(data_ptr, 0xAA, 0..size1);\n        assert_range(data_ptr, 0xAA, 0..size1);\n\n        // Shrink\n        let size2 = 5000;\n        init_struct.set_size(size2).unwrap();\n        assert_eq!(init_struct.current_size(), size2);\n\n        // Grow again\n        let size3 = 20000;\n        init_struct.set_size(size3).unwrap();\n        assert_eq!(init_struct.current_size(), size3);\n\n        // Try to read it. The area that was shrunk and grown again should read as all zeros now\n        assert_range(data_ptr, 0xAA, 0..5000);\n        assert_range(data_ptr, 0, 5000..size1);\n\n        // Try to grow beyond max_size\n        //let size4 = max_size + 1;\n        //assert!(init_struct.set_size(size4).is_err());\n\n        // Dropping init_struct should unmap the memory\n        drop(init_struct);\n\n        Ok(())\n    }\n\n    /// This is used in tests to coordinate between test processes. It's like `std::sync::Barrier`,\n    /// but is stored in the shared memory area and works across processes. It's implemented by\n    /// polling, because e.g. standard rust mutexes are not guaranteed to work across processes.\n    struct SimpleBarrier {\n        num_procs: usize,\n        count: AtomicUsize,\n    }\n\n    impl SimpleBarrier {\n        unsafe fn init(ptr: *mut SimpleBarrier, num_procs: usize) {\n            unsafe {\n                *ptr = SimpleBarrier {\n                    num_procs,\n                    count: AtomicUsize::new(0),\n                }\n            }\n        }\n\n        pub fn wait(&self) {\n            let old = self.count.fetch_add(1, Ordering::Relaxed);\n\n            let generation = old / self.num_procs;\n\n            let mut current = old + 1;\n            while current < (generation + 1) * self.num_procs {\n                std::thread::sleep(std::time::Duration::from_millis(10));\n                current = self.count.load(Ordering::Relaxed);\n            }\n        }\n    }\n\n    #[test]\n    fn test_multi_process() {\n        // Initialize\n        let max_size = 1_000_000_000_000;\n        let init_struct = ShmemHandle::new(\"test_multi_process\", 0, max_size).unwrap();\n        let ptr = init_struct.data_ptr.as_ptr();\n\n        // Store the SimpleBarrier in the first 1k of the area.\n        init_struct.set_size(10000).unwrap();\n        let barrier_ptr: *mut SimpleBarrier = unsafe {\n            ptr.add(ptr.align_offset(std::mem::align_of::<SimpleBarrier>()))\n                .cast()\n        };\n        unsafe { SimpleBarrier::init(barrier_ptr, 2) };\n        let barrier = unsafe { barrier_ptr.as_ref().unwrap() };\n\n        // Fork another test process. The code after this runs in both processes concurrently.\n        let fork_result = unsafe { nix::unistd::fork().unwrap() };\n\n        // In the parent, fill bytes between 1000..2000. In the child, between 2000..3000\n        if fork_result.is_parent() {\n            write_range(ptr, 0xAA, 1000..2000);\n        } else {\n            write_range(ptr, 0xBB, 2000..3000);\n        }\n        barrier.wait();\n        // Verify the contents. (in both processes)\n        assert_range(ptr, 0xAA, 1000..2000);\n        assert_range(ptr, 0xBB, 2000..3000);\n\n        // Grow, from the child this time\n        let size = 10_000_000;\n        if !fork_result.is_parent() {\n            init_struct.set_size(size).unwrap();\n        }\n        barrier.wait();\n\n        // make some writes at the end\n        if fork_result.is_parent() {\n            write_range(ptr, 0xAA, (size - 10)..size);\n        } else {\n            write_range(ptr, 0xBB, (size - 20)..(size - 10));\n        }\n        barrier.wait();\n\n        // Verify the contents. (This runs in both processes)\n        assert_range(ptr, 0, (size - 1000)..(size - 20));\n        assert_range(ptr, 0xBB, (size - 20)..(size - 10));\n        assert_range(ptr, 0xAA, (size - 10)..size);\n\n        if let ForkResult::Parent { child } = fork_result {\n            nix::sys::wait::waitpid(child, None).unwrap();\n        }\n    }\n}\n"
  },
  {
    "path": "libs/neon-shmem/src/sync.rs",
    "content": "//! Simple utilities akin to what's in [`std::sync`] but designed to work with shared memory.\n\nuse std::mem::MaybeUninit;\nuse std::ptr::NonNull;\n\nuse nix::errno::Errno;\n\npub type RwLock<T> = lock_api::RwLock<PthreadRwLock, T>;\npub type RwLockReadGuard<'a, T> = lock_api::RwLockReadGuard<'a, PthreadRwLock, T>;\npub type RwLockWriteGuard<'a, T> = lock_api::RwLockWriteGuard<'a, PthreadRwLock, T>;\npub type ValueReadGuard<'a, T> = lock_api::MappedRwLockReadGuard<'a, PthreadRwLock, T>;\npub type ValueWriteGuard<'a, T> = lock_api::MappedRwLockWriteGuard<'a, PthreadRwLock, T>;\n\n/// Shared memory read-write lock.\npub struct PthreadRwLock(Option<NonNull<libc::pthread_rwlock_t>>);\n\n/// Simple macro that calls a function in the libc namespace and panics if return value is nonzero.\nmacro_rules! libc_checked {\n    ($fn_name:ident ( $($arg:expr),* )) => {{\n        let res = libc::$fn_name($($arg),*);\n        if res != 0 {\n            panic!(\"{} failed with {}\", stringify!($fn_name), Errno::from_raw(res));\n        }\n    }};\n}\n\nimpl PthreadRwLock {\n    /// Creates a new `PthreadRwLock` on top of a pointer to a pthread rwlock.\n    ///\n    /// # Safety\n    /// `lock` must be non-null. Every unsafe operation will panic in the event of an error.\n    pub unsafe fn new(lock: *mut libc::pthread_rwlock_t) -> Self {\n        unsafe {\n            let mut attrs = MaybeUninit::uninit();\n            libc_checked!(pthread_rwlockattr_init(attrs.as_mut_ptr()));\n            libc_checked!(pthread_rwlockattr_setpshared(\n                attrs.as_mut_ptr(),\n                libc::PTHREAD_PROCESS_SHARED\n            ));\n            libc_checked!(pthread_rwlock_init(lock, attrs.as_mut_ptr()));\n            // Safety: POSIX specifies that \"any function affecting the attributes\n            // object (including destruction) shall not affect any previously\n            // initialized read-write locks\".\n            libc_checked!(pthread_rwlockattr_destroy(attrs.as_mut_ptr()));\n            Self(Some(NonNull::new_unchecked(lock)))\n        }\n    }\n\n    fn inner(&self) -> NonNull<libc::pthread_rwlock_t> {\n        match self.0 {\n            None => {\n                panic!(\"PthreadRwLock constructed badly - something likely used RawRwLock::INIT\")\n            }\n            Some(x) => x,\n        }\n    }\n}\n\nunsafe impl lock_api::RawRwLock for PthreadRwLock {\n    type GuardMarker = lock_api::GuardSend;\n    const INIT: Self = Self(None);\n\n    fn try_lock_shared(&self) -> bool {\n        unsafe {\n            let res = libc::pthread_rwlock_tryrdlock(self.inner().as_ptr());\n            match res {\n                0 => true,\n                libc::EAGAIN => false,\n                _ => panic!(\n                    \"pthread_rwlock_tryrdlock failed with {}\",\n                    Errno::from_raw(res)\n                ),\n            }\n        }\n    }\n\n    fn try_lock_exclusive(&self) -> bool {\n        unsafe {\n            let res = libc::pthread_rwlock_trywrlock(self.inner().as_ptr());\n            match res {\n                0 => true,\n                libc::EAGAIN => false,\n                _ => panic!(\"try_wrlock failed with {}\", Errno::from_raw(res)),\n            }\n        }\n    }\n\n    fn lock_shared(&self) {\n        unsafe {\n            libc_checked!(pthread_rwlock_rdlock(self.inner().as_ptr()));\n        }\n    }\n\n    fn lock_exclusive(&self) {\n        unsafe {\n            libc_checked!(pthread_rwlock_wrlock(self.inner().as_ptr()));\n        }\n    }\n\n    unsafe fn unlock_exclusive(&self) {\n        unsafe {\n            libc_checked!(pthread_rwlock_unlock(self.inner().as_ptr()));\n        }\n    }\n\n    unsafe fn unlock_shared(&self) {\n        unsafe {\n            libc_checked!(pthread_rwlock_unlock(self.inner().as_ptr()));\n        }\n    }\n}\n"
  },
  {
    "path": "libs/pageserver_api/Cargo.toml",
    "content": "[package]\nname = \"pageserver_api\"\nversion = \"0.1.0\"\nedition = \"2024\"\nlicense.workspace = true\n\n[features]\ndefault = [\"io-align-512\"]\n# See pageserver/Cargo.toml\ntesting = [\"dep:nix\"]\n# Direct IO alignment options (mutually exclusive)\nio-align-512 = []\nio-align-4k = []\n\n[dependencies]\nserde.workspace = true\nserde_with.workspace = true\nserde_json.workspace = true\nconst_format.workspace = true\nanyhow.workspace = true\nbytes.workspace = true\nbyteorder.workspace = true\nutils.workspace = true\npostgres_ffi_types.workspace = true\npostgres_versioninfo.workspace = true\nposthog_client_lite.workspace = true\nenum-map.workspace = true\nstrum.workspace = true\nstrum_macros.workspace = true\nhex.workspace = true\nhumantime.workspace = true\nthiserror.workspace = true\nhumantime-serde.workspace = true\nchrono = { workspace = true, features = [\"serde\"] }\nitertools.workspace = true\nstorage_broker.workspace = true\ncamino = { workspace = true, features = [\"serde1\"] }\nremote_storage.workspace = true\npostgres_backend.workspace = true\nnix = { workspace = true, optional = true }\nreqwest.workspace = true\nrand.workspace = true\ntracing.workspace = true\ntracing-utils.workspace = true\nonce_cell.workspace = true\n\n[dev-dependencies]\nbincode.workspace = true\nrand.workspace = true\n"
  },
  {
    "path": "libs/pageserver_api/src/config/tests.rs",
    "content": "use super::*;\n\n#[test]\nfn test_node_metadata_v1_backward_compatibilty() {\n    let v1 = serde_json::to_vec(&serde_json::json!({\n        \"host\": \"localhost\",\n        \"port\": 23,\n        \"http_host\": \"localhost\",\n        \"http_port\": 42,\n    }));\n\n    assert_eq!(\n        serde_json::from_slice::<NodeMetadata>(&v1.unwrap()).unwrap(),\n        NodeMetadata {\n            postgres_host: \"localhost\".to_string(),\n            postgres_port: 23,\n            grpc_host: None,\n            grpc_port: None,\n            http_host: \"localhost\".to_string(),\n            http_port: 42,\n            https_port: None,\n            other: HashMap::new(),\n        }\n    )\n}\n\n#[test]\nfn test_node_metadata_v2_backward_compatibilty() {\n    let v2 = serde_json::to_vec(&serde_json::json!({\n        \"host\": \"localhost\",\n        \"port\": 23,\n        \"http_host\": \"localhost\",\n        \"http_port\": 42,\n        \"https_port\": 123,\n    }));\n\n    assert_eq!(\n        serde_json::from_slice::<NodeMetadata>(&v2.unwrap()).unwrap(),\n        NodeMetadata {\n            postgres_host: \"localhost\".to_string(),\n            postgres_port: 23,\n            grpc_host: None,\n            grpc_port: None,\n            http_host: \"localhost\".to_string(),\n            http_port: 42,\n            https_port: Some(123),\n            other: HashMap::new(),\n        }\n    )\n}\n\n#[test]\nfn test_node_metadata_v3_backward_compatibilty() {\n    let v3 = serde_json::to_vec(&serde_json::json!({\n        \"host\": \"localhost\",\n        \"port\": 23,\n        \"grpc_host\": \"localhost\",\n        \"grpc_port\": 51,\n        \"http_host\": \"localhost\",\n        \"http_port\": 42,\n        \"https_port\": 123,\n    }));\n\n    assert_eq!(\n        serde_json::from_slice::<NodeMetadata>(&v3.unwrap()).unwrap(),\n        NodeMetadata {\n            postgres_host: \"localhost\".to_string(),\n            postgres_port: 23,\n            grpc_host: Some(\"localhost\".to_string()),\n            grpc_port: Some(51),\n            http_host: \"localhost\".to_string(),\n            http_port: 42,\n            https_port: Some(123),\n            other: HashMap::new(),\n        }\n    )\n}\n"
  },
  {
    "path": "libs/pageserver_api/src/config.rs",
    "content": "use camino::Utf8PathBuf;\n\n#[cfg(test)]\nmod tests;\n\nuse const_format::formatcp;\nuse posthog_client_lite::PostHogClientConfig;\nuse utils::serde_percent::Percent;\npub const DEFAULT_PG_LISTEN_PORT: u16 = 64000;\npub const DEFAULT_PG_LISTEN_ADDR: &str = formatcp!(\"127.0.0.1:{DEFAULT_PG_LISTEN_PORT}\");\npub const DEFAULT_HTTP_LISTEN_PORT: u16 = 9898;\npub const DEFAULT_HTTP_LISTEN_ADDR: &str = formatcp!(\"127.0.0.1:{DEFAULT_HTTP_LISTEN_PORT}\");\n// TODO: gRPC is disabled by default for now, but the port is used in neon_local.\npub const DEFAULT_GRPC_LISTEN_PORT: u16 = 51051; // storage-broker already uses 50051\n\nuse std::collections::HashMap;\nuse std::fmt::Display;\nuse std::num::{NonZeroU64, NonZeroUsize};\nuse std::str::FromStr;\nuse std::time::Duration;\n\nuse postgres_backend::AuthType;\nuse remote_storage::RemoteStorageConfig;\nuse serde_with::serde_as;\nuse utils::logging::LogFormat;\n\nuse crate::models::{ImageCompressionAlgorithm, LsnLease};\n\n// Certain metadata (e.g. externally-addressable name, AZ) is delivered\n// as a separate structure.  This information is not needed by the pageserver\n// itself, it is only used for registering the pageserver with the control\n// plane and/or storage controller.\n#[derive(PartialEq, Eq, Debug, serde::Serialize, serde::Deserialize)]\npub struct NodeMetadata {\n    #[serde(rename = \"host\")]\n    pub postgres_host: String,\n    #[serde(rename = \"port\")]\n    pub postgres_port: u16,\n    pub grpc_host: Option<String>,\n    pub grpc_port: Option<u16>,\n    pub http_host: String,\n    pub http_port: u16,\n    pub https_port: Option<u16>,\n\n    // Deployment tools may write fields to the metadata file beyond what we\n    // use in this type: this type intentionally only names fields that require.\n    #[serde(flatten)]\n    pub other: HashMap<String, serde_json::Value>,\n}\n\nimpl Display for NodeMetadata {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        write!(\n            f,\n            \"postgresql://{}:{} \",\n            self.postgres_host, self.postgres_port\n        )?;\n        if let Some(grpc_host) = &self.grpc_host {\n            let grpc_port = self.grpc_port.unwrap_or_default();\n            write!(f, \"grpc://{grpc_host}:{grpc_port} \")?;\n        }\n        write!(f, \"http://{}:{} \", self.http_host, self.http_port)?;\n        write!(f, \"other:{:?}\", self.other)?;\n        Ok(())\n    }\n}\n\n/// PostHog integration config. This is used in pageserver, storcon, and neon_local.\n/// Ensure backward compatibility when adding new fields.\n#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]\npub struct PostHogConfig {\n    /// PostHog project ID\n    #[serde(default)]\n    #[serde(skip_serializing_if = \"Option::is_none\")]\n    pub project_id: Option<String>,\n    /// Server-side (private) API key\n    #[serde(default)]\n    #[serde(skip_serializing_if = \"Option::is_none\")]\n    pub server_api_key: Option<String>,\n    /// Client-side (public) API key\n    #[serde(default)]\n    #[serde(skip_serializing_if = \"Option::is_none\")]\n    pub client_api_key: Option<String>,\n    /// Private API URL\n    #[serde(default)]\n    #[serde(skip_serializing_if = \"Option::is_none\")]\n    pub private_api_url: Option<String>,\n    /// Public API URL\n    #[serde(default)]\n    #[serde(skip_serializing_if = \"Option::is_none\")]\n    pub public_api_url: Option<String>,\n    /// Refresh interval for the feature flag spec.\n    /// The storcon will push the feature flag spec to the pageserver. If the pageserver does not receive\n    /// the spec for `refresh_interval`, it will fetch the spec from the PostHog API.\n    #[serde(default)]\n    #[serde(skip_serializing_if = \"Option::is_none\")]\n    #[serde(with = \"humantime_serde\")]\n    pub refresh_interval: Option<Duration>,\n}\n\nimpl PostHogConfig {\n    pub fn try_into_posthog_config(self) -> Result<PostHogClientConfig, &'static str> {\n        let Some(project_id) = self.project_id else {\n            return Err(\"project_id is required\");\n        };\n        let Some(server_api_key) = self.server_api_key else {\n            return Err(\"server_api_key is required\");\n        };\n        let Some(client_api_key) = self.client_api_key else {\n            return Err(\"client_api_key is required\");\n        };\n        let Some(private_api_url) = self.private_api_url else {\n            return Err(\"private_api_url is required\");\n        };\n        let Some(public_api_url) = self.public_api_url else {\n            return Err(\"public_api_url is required\");\n        };\n        Ok(PostHogClientConfig {\n            project_id,\n            server_api_key,\n            client_api_key,\n            private_api_url,\n            public_api_url,\n        })\n    }\n}\n\n/// `pageserver.toml`\n///\n/// We use serde derive with `#[serde(default)]` to generate a deserializer\n/// that fills in the default values for each config field.\n///\n/// If there cannot be a static default value because we need to make runtime\n/// checks to determine the default, make it an `Option` (which defaults to None).\n/// The runtime check should be done in the consuming crate, i.e., `pageserver`.\n///\n/// Unknown fields are silently ignored during deserialization.\n/// The alternative, which we used in the past, was to set `deny_unknown_fields`,\n/// which fails deserialization, and hence pageserver startup, if there is an unknown field.\n/// The reason we don't do that anymore is that it complicates\n/// usage of config fields for feature flagging, which we commonly do for\n/// region-by-region rollouts.\n/// The complications mainly arise because the `pageserver.toml` contents on a\n/// prod server have a separate lifecycle from the pageserver binary.\n/// For instance, `pageserver.toml` contents today are defined in the internal\n/// infra repo, and thus introducing a new config field to pageserver and\n/// rolling it out to prod servers are separate commits in separate repos\n/// that can't be made or rolled back atomically.\n/// Rollbacks in particular pose a risk with deny_unknown_fields because\n/// the old pageserver binary may reject a new config field, resulting in\n/// an outage unless the person doing the pageserver rollback remembers\n/// to also revert the commit that added the config field in to the\n/// `pageserver.toml` templates in the internal infra repo.\n/// (A pre-deploy config check would eliminate this risk during rollbacks,\n///  cf [here](https://github.com/neondatabase/cloud/issues/24349).)\n/// In addition to this compatibility problem during emergency rollbacks,\n/// deny_unknown_fields adds further complications when decomissioning a feature\n/// flag: with deny_unknown_fields, we can't remove a flag from the [`ConfigToml`]\n/// until all prod servers' `pageserver.toml` files have been updated to a version\n/// that doesn't specify the flag. Otherwise new software would fail to start up.\n/// This adds the requirement for an intermediate step where the new config field\n/// is accepted but ignored, prolonging the decomissioning process by an entire\n/// release cycle.\n/// By contrast  with unknown fields silently ignored, decomissioning a feature\n/// flag is a one-step process: we can skip the intermediate step and straight\n/// remove the field from the [`ConfigToml`]. We leave the field in the\n/// `pageserver.toml` files on prod servers until we reach certainty that we\n/// will not roll back to old software whose behavior was dependent on config.\n/// Then we can remove the field from the templates in the internal infra repo.\n/// This process is [documented internally](\n/// https://docs.neon.build/storage/pageserver_configuration.html).\n///\n/// Note that above relaxed compatbility for the config format does NOT APPLY\n/// TO THE STORAGE FORMAT. As general guidance, when introducing storage format\n/// changes, ensure that the potential rollback target version will be compatible\n/// with the new format. This must hold regardless of what flags are set in in the `pageserver.toml`:\n/// any format version that exists in an environment must be compatible with the software that runs there.\n/// Use a pageserver.toml flag only to gate whether software _writes_ the new format.\n/// For more compatibility considerations, refer to [internal docs](\n/// https://docs.neon.build/storage/compat.html?highlight=compat#format-versions--compatibility)\n#[serde_as]\n#[derive(Clone, Debug, serde::Deserialize, serde::Serialize)]\n#[serde(default)]\npub struct ConfigToml {\n    // types mapped 1:1 into the runtime PageServerConfig type\n    pub listen_pg_addr: String,\n    pub listen_http_addr: String,\n    pub listen_https_addr: Option<String>,\n    pub listen_grpc_addr: Option<String>,\n    pub ssl_key_file: Utf8PathBuf,\n    pub ssl_cert_file: Utf8PathBuf,\n    #[serde(with = \"humantime_serde\")]\n    pub ssl_cert_reload_period: Duration,\n    pub ssl_ca_file: Option<Utf8PathBuf>,\n    pub availability_zone: Option<String>,\n    #[serde(with = \"humantime_serde\")]\n    pub wait_lsn_timeout: Duration,\n    #[serde(with = \"humantime_serde\")]\n    pub wal_redo_timeout: Duration,\n    pub superuser: String,\n    pub locale: String,\n    pub page_cache_size: usize,\n    pub max_file_descriptors: usize,\n    pub pg_distrib_dir: Option<Utf8PathBuf>,\n    #[serde_as(as = \"serde_with::DisplayFromStr\")]\n    pub http_auth_type: AuthType,\n    #[serde_as(as = \"serde_with::DisplayFromStr\")]\n    pub pg_auth_type: AuthType,\n    pub grpc_auth_type: AuthType,\n    pub auth_validation_public_key_path: Option<Utf8PathBuf>,\n    pub remote_storage: Option<RemoteStorageConfig>,\n    pub tenant_config: TenantConfigToml,\n    #[serde_as(as = \"serde_with::DisplayFromStr\")]\n    pub broker_endpoint: storage_broker::Uri,\n    #[serde(with = \"humantime_serde\")]\n    pub broker_keepalive_interval: Duration,\n    #[serde_as(as = \"serde_with::DisplayFromStr\")]\n    pub log_format: LogFormat,\n    pub concurrent_tenant_warmup: NonZeroUsize,\n    pub concurrent_tenant_size_logical_size_queries: NonZeroUsize,\n    #[serde(with = \"humantime_serde\")]\n    pub metric_collection_interval: Duration,\n    pub metric_collection_endpoint: Option<reqwest::Url>,\n    pub metric_collection_bucket: Option<RemoteStorageConfig>,\n    #[serde(with = \"humantime_serde\")]\n    pub synthetic_size_calculation_interval: Duration,\n    pub disk_usage_based_eviction: DiskUsageEvictionTaskConfig,\n    pub test_remote_failures: u64,\n    pub test_remote_failures_probability: u64,\n    pub ondemand_download_behavior_treat_error_as_warn: bool,\n    #[serde(with = \"humantime_serde\")]\n    pub background_task_maximum_delay: Duration,\n    pub control_plane_api: Option<reqwest::Url>,\n    pub control_plane_api_token: Option<String>,\n    pub control_plane_emergency_mode: bool,\n    /// Unstable feature: subject to change or removal without notice.\n    /// See <https://github.com/neondatabase/neon/pull/9218>.\n    pub import_pgdata_upcall_api: Option<reqwest::Url>,\n    /// Unstable feature: subject to change or removal without notice.\n    /// See <https://github.com/neondatabase/neon/pull/9218>.\n    pub import_pgdata_upcall_api_token: Option<String>,\n    /// Unstable feature: subject to change or removal without notice.\n    /// See <https://github.com/neondatabase/neon/pull/9218>.\n    pub import_pgdata_aws_endpoint_url: Option<reqwest::Url>,\n    pub heatmap_upload_concurrency: usize,\n    pub secondary_download_concurrency: usize,\n    pub virtual_file_io_engine: Option<crate::models::virtual_file::IoEngineKind>,\n    pub ingest_batch_size: u64,\n    pub max_vectored_read_bytes: MaxVectoredReadBytes,\n    pub max_get_vectored_keys: MaxGetVectoredKeys,\n    pub image_compression: ImageCompressionAlgorithm,\n    pub timeline_offloading: bool,\n    pub ephemeral_bytes_per_memory_kb: usize,\n    pub l0_flush: Option<crate::models::L0FlushConfig>,\n    pub virtual_file_io_mode: Option<crate::models::virtual_file::IoMode>,\n    #[serde(skip_serializing_if = \"Option::is_none\")]\n    pub no_sync: Option<bool>,\n    pub page_service_pipelining: PageServicePipeliningConfig,\n    pub get_vectored_concurrent_io: GetVectoredConcurrentIo,\n    pub enable_read_path_debugging: Option<bool>,\n    #[serde(skip_serializing_if = \"Option::is_none\")]\n    pub validate_wal_contiguity: Option<bool>,\n    #[serde(skip_serializing_if = \"Option::is_none\")]\n    pub load_previous_heatmap: Option<bool>,\n    #[serde(skip_serializing_if = \"Option::is_none\")]\n    pub generate_unarchival_heatmap: Option<bool>,\n    pub tracing: Option<Tracing>,\n    pub enable_tls_page_service_api: bool,\n    pub dev_mode: bool,\n    #[serde(skip_serializing_if = \"Option::is_none\")]\n    pub posthog_config: Option<PostHogConfig>,\n    pub timeline_import_config: TimelineImportConfig,\n    #[serde(skip_serializing_if = \"Option::is_none\")]\n    pub basebackup_cache_config: Option<BasebackupCacheConfig>,\n    #[serde(skip_serializing_if = \"Option::is_none\")]\n    pub image_layer_generation_large_timeline_threshold: Option<u64>,\n    pub force_metric_collection_on_scrape: bool,\n}\n\n#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]\n#[serde(default)]\npub struct DiskUsageEvictionTaskConfig {\n    pub max_usage_pct: utils::serde_percent::Percent,\n    pub min_avail_bytes: u64,\n    #[serde(with = \"humantime_serde\")]\n    pub period: Duration,\n    #[cfg(feature = \"testing\")]\n    pub mock_statvfs: Option<statvfs::mock::Behavior>,\n    /// Select sorting for evicted layers\n    #[serde(default)]\n    pub eviction_order: EvictionOrder,\n    pub enabled: bool,\n}\n\nimpl Default for DiskUsageEvictionTaskConfig {\n    fn default() -> Self {\n        Self {\n            max_usage_pct: Percent::new(80).unwrap(),\n            min_avail_bytes: 2_000_000_000,\n            period: Duration::from_secs(60),\n            #[cfg(feature = \"testing\")]\n            mock_statvfs: None,\n            eviction_order: EvictionOrder::default(),\n            enabled: true,\n        }\n    }\n}\n\n#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]\n#[serde(tag = \"mode\", rename_all = \"kebab-case\")]\npub enum PageServicePipeliningConfig {\n    Serial,\n    Pipelined(PageServicePipeliningConfigPipelined),\n}\n#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]\npub struct PageServicePipeliningConfigPipelined {\n    /// Failed config parsing and validation if larger than `max_get_vectored_keys`.\n    pub max_batch_size: NonZeroUsize,\n    pub execution: PageServiceProtocolPipelinedExecutionStrategy,\n    // The default below is such that new versions of the software can start\n    // with the old configuration.\n    #[serde(default)]\n    pub batching: PageServiceProtocolPipelinedBatchingStrategy,\n}\n\n#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]\n#[serde(rename_all = \"kebab-case\")]\npub enum PageServiceProtocolPipelinedExecutionStrategy {\n    ConcurrentFutures,\n    Tasks,\n}\n\n#[derive(Default, Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]\n#[serde(rename_all = \"kebab-case\")]\npub enum PageServiceProtocolPipelinedBatchingStrategy {\n    /// All get page requests in a batch will be at the same LSN\n    #[default]\n    UniformLsn,\n    /// Get page requests in a batch may be at different LSN\n    ///\n    /// One key cannot be present more than once at different LSNs in\n    /// the same batch.\n    ScatteredLsn,\n}\n\n#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]\n#[serde(tag = \"mode\", rename_all = \"kebab-case\")]\npub enum GetVectoredConcurrentIo {\n    /// The read path is fully sequential: layers are visited\n    /// one after the other and IOs are issued and waited upon\n    /// from the same task that traverses the layers.\n    Sequential,\n    /// The read path still traverses layers sequentially, and\n    /// index blocks will be read into the PS PageCache from\n    /// that task, with waiting.\n    /// But data IOs are dispatched and waited upon from a sidecar\n    /// task so that the traversing task can continue to traverse\n    /// layers while the IOs are in flight.\n    /// If the PS PageCache miss rate is low, this improves\n    /// throughput dramatically.\n    SidecarTask,\n}\n\n#[derive(Debug, Copy, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]\npub struct Ratio {\n    pub numerator: usize,\n    pub denominator: usize,\n}\n\n#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]\npub struct OtelExporterConfig {\n    pub endpoint: String,\n    pub protocol: OtelExporterProtocol,\n    #[serde(with = \"humantime_serde\")]\n    pub timeout: Duration,\n}\n\n#[derive(Debug, Copy, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]\n#[serde(rename_all = \"kebab-case\")]\npub enum OtelExporterProtocol {\n    Grpc,\n    HttpBinary,\n    HttpJson,\n}\n\n#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]\npub struct Tracing {\n    pub sampling_ratio: Ratio,\n    pub export_config: OtelExporterConfig,\n}\n\nimpl From<&OtelExporterConfig> for tracing_utils::ExportConfig {\n    fn from(val: &OtelExporterConfig) -> Self {\n        tracing_utils::ExportConfig {\n            endpoint: Some(val.endpoint.clone()),\n            protocol: val.protocol.into(),\n            timeout: Some(val.timeout),\n        }\n    }\n}\n\nimpl From<OtelExporterProtocol> for tracing_utils::Protocol {\n    fn from(val: OtelExporterProtocol) -> Self {\n        match val {\n            OtelExporterProtocol::Grpc => tracing_utils::Protocol::Grpc,\n            OtelExporterProtocol::HttpJson => tracing_utils::Protocol::HttpJson,\n            OtelExporterProtocol::HttpBinary => tracing_utils::Protocol::HttpBinary,\n        }\n    }\n}\n\n#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]\npub struct TimelineImportConfig {\n    pub import_job_concurrency: NonZeroUsize,\n    pub import_job_soft_size_limit: NonZeroUsize,\n    pub import_job_checkpoint_threshold: NonZeroUsize,\n    /// Max size of the remote storage partial read done by any job\n    pub import_job_max_byte_range_size: NonZeroUsize,\n}\n\n#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]\n#[serde(default)]\npub struct BasebackupCacheConfig {\n    #[serde(with = \"humantime_serde\")]\n    pub cleanup_period: Duration,\n    /// Maximum total size of basebackup cache entries on disk in bytes.\n    /// The cache may slightly exceed this limit because we do not know\n    /// the exact size of the cache entry untill it's written to disk.\n    pub max_total_size_bytes: u64,\n    // TODO(diko): support max_entry_size_bytes.\n    // pub max_entry_size_bytes: u64,\n    pub max_size_entries: usize,\n    /// Size of the channel used to send prepare requests to the basebackup cache worker.\n    /// If exceeded, new prepare requests will be dropped.\n    pub prepare_channel_size: usize,\n}\n\nimpl Default for BasebackupCacheConfig {\n    fn default() -> Self {\n        Self {\n            cleanup_period: Duration::from_secs(60),\n            max_total_size_bytes: 1024 * 1024 * 1024, // 1 GiB\n            // max_entry_size_bytes: 16 * 1024 * 1024,   // 16 MiB\n            max_size_entries: 10000,\n            prepare_channel_size: 100,\n        }\n    }\n}\n\npub mod statvfs {\n    pub mod mock {\n        #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]\n        #[serde(tag = \"type\")]\n        pub enum Behavior {\n            Success {\n                blocksize: u64,\n                total_blocks: u64,\n                name_filter: Option<utils::serde_regex::Regex>,\n            },\n            #[cfg(feature = \"testing\")]\n            Failure { mocked_error: MockedError },\n        }\n\n        #[cfg(feature = \"testing\")]\n        #[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]\n        #[allow(clippy::upper_case_acronyms)]\n        pub enum MockedError {\n            EIO,\n        }\n\n        #[cfg(feature = \"testing\")]\n        impl From<MockedError> for nix::Error {\n            fn from(e: MockedError) -> Self {\n                match e {\n                    MockedError::EIO => nix::Error::EIO,\n                }\n            }\n        }\n    }\n}\n\n#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]\n#[serde(tag = \"type\", content = \"args\")]\npub enum EvictionOrder {\n    RelativeAccessed {\n        highest_layer_count_loses_first: bool,\n    },\n}\n\nimpl Default for EvictionOrder {\n    fn default() -> Self {\n        Self::RelativeAccessed {\n            highest_layer_count_loses_first: true,\n        }\n    }\n}\n\n#[derive(Copy, Clone, Debug, PartialEq, Eq, serde::Serialize, serde::Deserialize)]\n#[serde(transparent)]\npub struct MaxVectoredReadBytes(pub NonZeroUsize);\n\n#[derive(Copy, Clone, Debug, PartialEq, Eq, serde::Serialize, serde::Deserialize)]\n#[serde(transparent)]\npub struct MaxGetVectoredKeys(NonZeroUsize);\n\nimpl MaxGetVectoredKeys {\n    pub fn get(&self) -> usize {\n        self.0.get()\n    }\n}\n\n/// Tenant-level configuration values, used for various purposes.\n#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]\n#[serde(default)]\npub struct TenantConfigToml {\n    // Flush out an inmemory layer, if it's holding WAL older than this\n    // This puts a backstop on how much WAL needs to be re-digested if the\n    // page server crashes.\n    // This parameter actually determines L0 layer file size.\n    pub checkpoint_distance: u64,\n    // Inmemory layer is also flushed at least once in checkpoint_timeout to\n    // eventually upload WAL after activity is stopped.\n    #[serde(with = \"humantime_serde\")]\n    pub checkpoint_timeout: Duration,\n    // Target file size, when creating image and delta layers.\n    // This parameter determines L1 layer file size.\n    pub compaction_target_size: u64,\n    // How often to check if there's compaction work to be done.\n    // Duration::ZERO means automatic compaction is disabled.\n    #[serde(with = \"humantime_serde\")]\n    pub compaction_period: Duration,\n    /// Level0 delta layer threshold for compaction.\n    pub compaction_threshold: usize,\n    /// Controls the amount of L0 included in a single compaction iteration.\n    /// The unit is `checkpoint_distance`, i.e., a size.\n    /// We add L0s to the set of layers to compact until their cumulative\n    /// size exceeds `compaction_upper_limit * checkpoint_distance`.\n    pub compaction_upper_limit: usize,\n    pub compaction_algorithm: crate::models::CompactionAlgorithmSettings,\n    /// If true, enable shard ancestor compaction (enabled by default).\n    pub compaction_shard_ancestor: bool,\n    /// If true, compact down L0 across all tenant timelines before doing regular compaction. L0\n    /// compaction must be responsive to avoid read amp during heavy ingestion. Defaults to true.\n    pub compaction_l0_first: bool,\n    /// If true, use a separate semaphore (i.e. concurrency limit) for the L0 compaction pass. Only\n    /// has an effect if `compaction_l0_first` is true. Defaults to true.\n    pub compaction_l0_semaphore: bool,\n    /// Level0 delta layer threshold at which to delay layer flushes such that they take 2x as long,\n    /// and block on layer flushes during ephemeral layer rolls, for compaction backpressure. This\n    /// helps compaction keep up with WAL ingestion, and avoids read amplification blowing up.\n    /// Should be >compaction_threshold. 0 to disable. Defaults to 3x compaction_threshold.\n    pub l0_flush_delay_threshold: Option<usize>,\n    /// Level0 delta layer threshold at which to stall layer flushes. Must be >compaction_threshold\n    /// to avoid deadlock. 0 to disable. Disabled by default.\n    pub l0_flush_stall_threshold: Option<usize>,\n    // Determines how much history is retained, to allow\n    // branching and read replicas at an older point in time.\n    // The unit is #of bytes of WAL.\n    // Page versions older than this are garbage collected away.\n    pub gc_horizon: u64,\n    // Interval at which garbage collection is triggered.\n    // Duration::ZERO means automatic GC is disabled\n    #[serde(with = \"humantime_serde\")]\n    pub gc_period: Duration,\n    // Delta layer churn threshold to create L1 image layers.\n    pub image_creation_threshold: usize,\n    // HADRON\n    // When the timeout is reached, PageServer will (1) force compact any remaining L0 deltas and\n    // (2) create image layers if there are any L1 deltas.\n    #[serde(with = \"humantime_serde\")]\n    pub image_layer_force_creation_period: Option<Duration>,\n    // Determines how much history is retained, to allow\n    // branching and read replicas at an older point in time.\n    // The unit is time.\n    // Page versions older than this are garbage collected away.\n    #[serde(with = \"humantime_serde\")]\n    pub pitr_interval: Duration,\n    /// Maximum amount of time to wait while opening a connection to receive wal, before erroring.\n    #[serde(with = \"humantime_serde\")]\n    pub walreceiver_connect_timeout: Duration,\n    /// Considers safekeepers stalled after no WAL updates were received longer than this threshold.\n    /// A stalled safekeeper will be changed to a newer one when it appears.\n    #[serde(with = \"humantime_serde\")]\n    pub lagging_wal_timeout: Duration,\n    /// Considers safekeepers lagging when their WAL is behind another safekeeper for more than this threshold.\n    /// A lagging safekeeper will be changed after `lagging_wal_timeout` time elapses since the last WAL update,\n    /// to avoid eager reconnects.\n    pub max_lsn_wal_lag: NonZeroU64,\n    pub eviction_policy: crate::models::EvictionPolicy,\n    pub min_resident_size_override: Option<u64>,\n    // See the corresponding metric's help string.\n    #[serde(with = \"humantime_serde\")]\n    pub evictions_low_residence_duration_metric_threshold: Duration,\n\n    /// If non-zero, the period between uploads of a heatmap from attached tenants.  This\n    /// may be disabled if a Tenant will not have secondary locations: only secondary\n    /// locations will use the heatmap uploaded by attached locations.\n    #[serde(with = \"humantime_serde\")]\n    pub heatmap_period: Duration,\n\n    /// If true then SLRU segments are dowloaded on demand, if false SLRU segments are included in basebackup\n    pub lazy_slru_download: bool,\n\n    pub timeline_get_throttle: crate::models::ThrottleConfig,\n\n    // How much WAL must be ingested before checking again whether a new image layer is required.\n    // Expresed in multiples of checkpoint distance.\n    pub image_layer_creation_check_threshold: u8,\n\n    // How many multiples of L0 `compaction_threshold` will preempt image layer creation and do L0 compaction.\n    // Set to 0 to disable preemption.\n    pub image_creation_preempt_threshold: usize,\n\n    /// The length for an explicit LSN lease request.\n    /// Layers needed to reconstruct pages at LSN will not be GC-ed during this interval.\n    #[serde(with = \"humantime_serde\")]\n    pub lsn_lease_length: Duration,\n\n    /// The length for an implicit LSN lease granted as part of `get_lsn_by_timestamp` request.\n    /// Layers needed to reconstruct pages at LSN will not be GC-ed during this interval.\n    #[serde(with = \"humantime_serde\")]\n    pub lsn_lease_length_for_ts: Duration,\n\n    /// Enable auto-offloading of timelines.\n    /// (either this flag or the pageserver-global one need to be set)\n    pub timeline_offloading: bool,\n\n    /// Enable rel_size_v2 for this tenant. Once enabled, the tenant will persist this information into\n    /// `index_part.json`, and it cannot be reversed.\n    pub rel_size_v2_enabled: bool,\n\n    // gc-compaction related configs\n    /// Enable automatic gc-compaction trigger on this tenant.\n    pub gc_compaction_enabled: bool,\n    /// Enable verification of gc-compaction results.\n    pub gc_compaction_verification: bool,\n    /// The initial threshold for gc-compaction in KB. Once the total size of layers below the gc-horizon is above this threshold,\n    /// gc-compaction will be triggered.\n    pub gc_compaction_initial_threshold_kb: u64,\n    /// The ratio that triggers the auto gc-compaction. If (the total size of layers between L2 LSN and gc-horizon) / (size below the L2 LSN)\n    /// is above this ratio, gc-compaction will be triggered.\n    pub gc_compaction_ratio_percent: u64,\n    /// Tenant level performance sampling ratio override. Controls the ratio of get page requests\n    /// that will get perf sampling for the tenant.\n    pub sampling_ratio: Option<Ratio>,\n\n    /// Capacity of relsize snapshot cache (used by replicas).\n    pub relsize_snapshot_cache_capacity: usize,\n\n    /// Enable preparing basebackup on XLOG_CHECKPOINT_SHUTDOWN and using it in basebackup requests.\n    // FIXME: Remove skip_serializing_if when the feature is stable.\n    #[serde(skip_serializing_if = \"std::ops::Not::not\")]\n    pub basebackup_cache_enabled: bool,\n}\n\npub mod defaults {\n    pub use storage_broker::DEFAULT_ENDPOINT as BROKER_DEFAULT_ENDPOINT;\n\n    use crate::models::ImageCompressionAlgorithm;\n\n    pub const DEFAULT_WAIT_LSN_TIMEOUT: &str = \"300 s\";\n    pub const DEFAULT_WAL_REDO_TIMEOUT: &str = \"60 s\";\n\n    pub const DEFAULT_SUPERUSER: &str = \"cloud_admin\";\n    pub const DEFAULT_LOCALE: &str = if cfg!(target_os = \"macos\") {\n        \"C\"\n    } else {\n        \"C.UTF-8\"\n    };\n\n    pub const DEFAULT_PAGE_CACHE_SIZE: usize = 8192;\n    pub const DEFAULT_MAX_FILE_DESCRIPTORS: usize = 100;\n\n    pub const DEFAULT_LOG_FORMAT: &str = \"plain\";\n\n    pub const DEFAULT_CONCURRENT_TENANT_WARMUP: usize = 8;\n\n    pub const DEFAULT_CONCURRENT_TENANT_SIZE_LOGICAL_SIZE_QUERIES: usize = 1;\n\n    pub const DEFAULT_METRIC_COLLECTION_INTERVAL: &str = \"10 min\";\n    pub const DEFAULT_METRIC_COLLECTION_ENDPOINT: Option<reqwest::Url> = None;\n    pub const DEFAULT_SYNTHETIC_SIZE_CALCULATION_INTERVAL: &str = \"10 min\";\n    pub const DEFAULT_BACKGROUND_TASK_MAXIMUM_DELAY: &str = \"10s\";\n\n    pub const DEFAULT_HEATMAP_UPLOAD_CONCURRENCY: usize = 8;\n    pub const DEFAULT_SECONDARY_DOWNLOAD_CONCURRENCY: usize = 1;\n\n    pub const DEFAULT_INGEST_BATCH_SIZE: u64 = 100;\n\n    /// Soft limit for the maximum size of a vectored read.\n    ///\n    /// This is determined by the largest NeonWalRecord that can exist (minus dbdir and reldir keys\n    /// which are bounded by the blob io limits only). As of this writing, that is a `NeonWalRecord::ClogSetCommitted` record,\n    /// with 32k xids. That's the max number of XIDS on a single CLOG page. The size of such a record\n    /// is `sizeof(Transactionid) * 32768 + (some fixed overhead from 'timestamp`, the Vec length and whatever extra serde serialization adds)`.\n    /// That is, slightly above 128 kB.\n    pub const DEFAULT_MAX_VECTORED_READ_BYTES: usize = 130 * 1024; // 130 KiB\n\n    pub const DEFAULT_MAX_GET_VECTORED_KEYS: usize = 32;\n\n    pub const DEFAULT_IMAGE_COMPRESSION: ImageCompressionAlgorithm =\n        ImageCompressionAlgorithm::Zstd { level: Some(1) };\n\n    pub const DEFAULT_EPHEMERAL_BYTES_PER_MEMORY_KB: usize = 0;\n\n    #[cfg(feature = \"io-align-4k\")]\n    pub const DEFAULT_IO_BUFFER_ALIGNMENT: usize = 4096;\n    #[cfg(all(feature = \"io-align-512\", not(feature = \"io-align-4k\")))]\n    pub const DEFAULT_IO_BUFFER_ALIGNMENT: usize = 512;\n    #[cfg(not(any(feature = \"io-align-512\", feature = \"io-align-4k\")))]\n    pub const DEFAULT_IO_BUFFER_ALIGNMENT: usize = 512;\n\n    pub const DEFAULT_SSL_KEY_FILE: &str = \"server.key\";\n    pub const DEFAULT_SSL_CERT_FILE: &str = \"server.crt\";\n}\n\nimpl Default for ConfigToml {\n    fn default() -> Self {\n        use defaults::*;\n\n        Self {\n            listen_pg_addr: (DEFAULT_PG_LISTEN_ADDR.to_string()),\n            listen_http_addr: (DEFAULT_HTTP_LISTEN_ADDR.to_string()),\n            listen_https_addr: (None),\n            listen_grpc_addr: None, // TODO: default to 127.0.0.1:51051\n            ssl_key_file: Utf8PathBuf::from(DEFAULT_SSL_KEY_FILE),\n            ssl_cert_file: Utf8PathBuf::from(DEFAULT_SSL_CERT_FILE),\n            ssl_cert_reload_period: Duration::from_secs(60),\n            ssl_ca_file: None,\n            availability_zone: (None),\n            wait_lsn_timeout: (humantime::parse_duration(DEFAULT_WAIT_LSN_TIMEOUT)\n                .expect(\"cannot parse default wait lsn timeout\")),\n            wal_redo_timeout: (humantime::parse_duration(DEFAULT_WAL_REDO_TIMEOUT)\n                .expect(\"cannot parse default wal redo timeout\")),\n            superuser: (DEFAULT_SUPERUSER.to_string()),\n            locale: DEFAULT_LOCALE.to_string(),\n            page_cache_size: (DEFAULT_PAGE_CACHE_SIZE),\n            max_file_descriptors: (DEFAULT_MAX_FILE_DESCRIPTORS),\n            pg_distrib_dir: None, // Utf8PathBuf::from(\"./pg_install\"), // TODO: formely, this was std::env::current_dir()\n            http_auth_type: (AuthType::Trust),\n            pg_auth_type: (AuthType::Trust),\n            grpc_auth_type: (AuthType::Trust),\n            auth_validation_public_key_path: (None),\n            remote_storage: None,\n            broker_endpoint: (storage_broker::DEFAULT_ENDPOINT\n                .parse()\n                .expect(\"failed to parse default broker endpoint\")),\n            broker_keepalive_interval: (humantime::parse_duration(\n                storage_broker::DEFAULT_KEEPALIVE_INTERVAL,\n            )\n            .expect(\"cannot parse default keepalive interval\")),\n            log_format: (LogFormat::from_str(DEFAULT_LOG_FORMAT).unwrap()),\n\n            concurrent_tenant_warmup: (NonZeroUsize::new(DEFAULT_CONCURRENT_TENANT_WARMUP)\n                .expect(\"Invalid default constant\")),\n            concurrent_tenant_size_logical_size_queries: NonZeroUsize::new(\n                DEFAULT_CONCURRENT_TENANT_SIZE_LOGICAL_SIZE_QUERIES,\n            )\n            .unwrap(),\n            metric_collection_interval: (humantime::parse_duration(\n                DEFAULT_METRIC_COLLECTION_INTERVAL,\n            )\n            .expect(\"cannot parse default metric collection interval\")),\n            synthetic_size_calculation_interval: (humantime::parse_duration(\n                DEFAULT_SYNTHETIC_SIZE_CALCULATION_INTERVAL,\n            )\n            .expect(\"cannot parse default synthetic size calculation interval\")),\n            metric_collection_endpoint: (DEFAULT_METRIC_COLLECTION_ENDPOINT),\n\n            metric_collection_bucket: (None),\n\n            disk_usage_based_eviction: DiskUsageEvictionTaskConfig::default(),\n\n            test_remote_failures: (0),\n            test_remote_failures_probability: (100),\n\n            ondemand_download_behavior_treat_error_as_warn: (false),\n\n            background_task_maximum_delay: (humantime::parse_duration(\n                DEFAULT_BACKGROUND_TASK_MAXIMUM_DELAY,\n            )\n            .unwrap()),\n\n            control_plane_api: (None),\n            control_plane_api_token: (None),\n            control_plane_emergency_mode: (false),\n\n            import_pgdata_upcall_api: (None),\n            import_pgdata_upcall_api_token: (None),\n            import_pgdata_aws_endpoint_url: (None),\n\n            heatmap_upload_concurrency: (DEFAULT_HEATMAP_UPLOAD_CONCURRENCY),\n            secondary_download_concurrency: (DEFAULT_SECONDARY_DOWNLOAD_CONCURRENCY),\n\n            ingest_batch_size: (DEFAULT_INGEST_BATCH_SIZE),\n\n            virtual_file_io_engine: None,\n\n            max_vectored_read_bytes: (MaxVectoredReadBytes(\n                NonZeroUsize::new(DEFAULT_MAX_VECTORED_READ_BYTES).unwrap(),\n            )),\n            max_get_vectored_keys: (MaxGetVectoredKeys(\n                NonZeroUsize::new(DEFAULT_MAX_GET_VECTORED_KEYS).unwrap(),\n            )),\n            image_compression: (DEFAULT_IMAGE_COMPRESSION),\n            timeline_offloading: true,\n            ephemeral_bytes_per_memory_kb: (DEFAULT_EPHEMERAL_BYTES_PER_MEMORY_KB),\n            l0_flush: None,\n            virtual_file_io_mode: None,\n            tenant_config: TenantConfigToml::default(),\n            no_sync: None,\n            page_service_pipelining: PageServicePipeliningConfig::Pipelined(\n                PageServicePipeliningConfigPipelined {\n                    max_batch_size: NonZeroUsize::new(32).unwrap(),\n                    execution: PageServiceProtocolPipelinedExecutionStrategy::ConcurrentFutures,\n                    batching: PageServiceProtocolPipelinedBatchingStrategy::ScatteredLsn,\n                },\n            ),\n            get_vectored_concurrent_io: GetVectoredConcurrentIo::SidecarTask,\n            enable_read_path_debugging: if cfg!(feature = \"testing\") {\n                Some(true)\n            } else {\n                None\n            },\n            validate_wal_contiguity: None,\n            load_previous_heatmap: None,\n            generate_unarchival_heatmap: None,\n            tracing: None,\n            enable_tls_page_service_api: false,\n            dev_mode: false,\n            timeline_import_config: TimelineImportConfig {\n                import_job_concurrency: NonZeroUsize::new(32).unwrap(),\n                import_job_soft_size_limit: NonZeroUsize::new(256 * 1024 * 1024).unwrap(),\n                import_job_checkpoint_threshold: NonZeroUsize::new(32).unwrap(),\n                import_job_max_byte_range_size: NonZeroUsize::new(4 * 1024 * 1024).unwrap(),\n            },\n            basebackup_cache_config: None,\n            posthog_config: None,\n            image_layer_generation_large_timeline_threshold: Some(2 * 1024 * 1024 * 1024),\n            force_metric_collection_on_scrape: true,\n        }\n    }\n}\n\npub mod tenant_conf_defaults {\n\n    // FIXME: This current value is very low. I would imagine something like 1 GB or 10 GB\n    // would be more appropriate. But a low value forces the code to be exercised more,\n    // which is good for now to trigger bugs.\n    // This parameter actually determines L0 layer file size.\n    pub const DEFAULT_CHECKPOINT_DISTANCE: u64 = 256 * 1024 * 1024;\n    pub const DEFAULT_CHECKPOINT_TIMEOUT: &str = \"10 m\";\n\n    // FIXME the below configs are only used by legacy algorithm. The new algorithm\n    // has different parameters.\n\n    // Target file size, when creating image and delta layers.\n    // This parameter determines L1 layer file size.\n    pub const DEFAULT_COMPACTION_TARGET_SIZE: u64 = 128 * 1024 * 1024;\n\n    pub const DEFAULT_COMPACTION_PERIOD: &str = \"20 s\";\n    pub const DEFAULT_COMPACTION_THRESHOLD: usize = 10;\n    pub const DEFAULT_COMPACTION_SHARD_ANCESTOR: bool = true;\n\n    // This value needs to be tuned to avoid OOM. We have 3/4*CPUs threads for L0 compaction, that's\n    // 3/4*8=6 on most of our pageservers. Compacting 10 layers requires a maximum of\n    // DEFAULT_CHECKPOINT_DISTANCE*10 memory, that's 2560MB. So with this config, we can get a maximum peak\n    // compaction usage of 15360MB.\n    pub const DEFAULT_COMPACTION_UPPER_LIMIT: usize = 10;\n    // Enable L0 compaction pass and semaphore by default. L0 compaction must be responsive to avoid\n    // read amp.\n    pub const DEFAULT_COMPACTION_L0_FIRST: bool = true;\n    pub const DEFAULT_COMPACTION_L0_SEMAPHORE: bool = true;\n\n    pub const DEFAULT_COMPACTION_ALGORITHM: crate::models::CompactionAlgorithm =\n        crate::models::CompactionAlgorithm::Legacy;\n\n    pub const DEFAULT_GC_HORIZON: u64 = 64 * 1024 * 1024;\n\n    // Large DEFAULT_GC_PERIOD is fine as long as PITR_INTERVAL is larger.\n    // If there's a need to decrease this value, first make sure that GC\n    // doesn't hold a layer map write lock for non-trivial operations.\n    // Relevant: https://github.com/neondatabase/neon/issues/3394\n    pub const DEFAULT_GC_PERIOD: &str = \"1 hr\";\n    pub const DEFAULT_IMAGE_CREATION_THRESHOLD: usize = 3;\n    // Currently, any value other than 0 will trigger image layer creation preemption immediately with L0 backpressure\n    // without looking at the exact number of L0 layers.\n    // It was expected to have the following behavior:\n    // > If there are more than threshold * compaction_threshold (that is 3 * 10 in the default config) L0 layers, image\n    // > layer creation will end immediately. Set to 0 to disable.\n    pub const DEFAULT_IMAGE_CREATION_PREEMPT_THRESHOLD: usize = 3;\n    pub const DEFAULT_PITR_INTERVAL: &str = \"7 days\";\n    pub const DEFAULT_WALRECEIVER_CONNECT_TIMEOUT: &str = \"10 seconds\";\n    pub const DEFAULT_WALRECEIVER_LAGGING_WAL_TIMEOUT: &str = \"10 seconds\";\n    // The default limit on WAL lag should be set to avoid causing disconnects under high throughput\n    // scenarios: since the broker stats are updated ~1/s, a value of 1GiB should be sufficient for\n    // throughputs up to 1GiB/s per timeline.\n    pub const DEFAULT_MAX_WALRECEIVER_LSN_WAL_LAG: u64 = 1024 * 1024 * 1024;\n    pub const DEFAULT_EVICTIONS_LOW_RESIDENCE_DURATION_METRIC_THRESHOLD: &str = \"24 hour\";\n    // By default ingest enough WAL for two new L0 layers before checking if new image\n    // image layers should be created.\n    pub const DEFAULT_IMAGE_LAYER_CREATION_CHECK_THRESHOLD: u8 = 2;\n    pub const DEFAULT_GC_COMPACTION_ENABLED: bool = true;\n    pub const DEFAULT_GC_COMPACTION_VERIFICATION: bool = true;\n    pub const DEFAULT_GC_COMPACTION_INITIAL_THRESHOLD_KB: u64 = 5 * 1024 * 1024; // 5GB\n    pub const DEFAULT_GC_COMPACTION_RATIO_PERCENT: u64 = 100;\n    pub const DEFAULT_RELSIZE_SNAPSHOT_CACHE_CAPACITY: usize = 1000;\n}\n\nimpl Default for TenantConfigToml {\n    fn default() -> Self {\n        use tenant_conf_defaults::*;\n        Self {\n            checkpoint_distance: DEFAULT_CHECKPOINT_DISTANCE,\n            checkpoint_timeout: humantime::parse_duration(DEFAULT_CHECKPOINT_TIMEOUT)\n                .expect(\"cannot parse default checkpoint timeout\"),\n            compaction_target_size: DEFAULT_COMPACTION_TARGET_SIZE,\n            compaction_period: humantime::parse_duration(DEFAULT_COMPACTION_PERIOD)\n                .expect(\"cannot parse default compaction period\"),\n            compaction_threshold: DEFAULT_COMPACTION_THRESHOLD,\n            compaction_upper_limit: DEFAULT_COMPACTION_UPPER_LIMIT,\n            compaction_algorithm: crate::models::CompactionAlgorithmSettings {\n                kind: DEFAULT_COMPACTION_ALGORITHM,\n            },\n            compaction_shard_ancestor: DEFAULT_COMPACTION_SHARD_ANCESTOR,\n            compaction_l0_first: DEFAULT_COMPACTION_L0_FIRST,\n            compaction_l0_semaphore: DEFAULT_COMPACTION_L0_SEMAPHORE,\n            l0_flush_delay_threshold: None,\n            l0_flush_stall_threshold: None,\n            gc_horizon: DEFAULT_GC_HORIZON,\n            gc_period: humantime::parse_duration(DEFAULT_GC_PERIOD)\n                .expect(\"cannot parse default gc period\"),\n            image_creation_threshold: DEFAULT_IMAGE_CREATION_THRESHOLD,\n            image_layer_force_creation_period: None,\n            pitr_interval: humantime::parse_duration(DEFAULT_PITR_INTERVAL)\n                .expect(\"cannot parse default PITR interval\"),\n            walreceiver_connect_timeout: humantime::parse_duration(\n                DEFAULT_WALRECEIVER_CONNECT_TIMEOUT,\n            )\n            .expect(\"cannot parse default walreceiver connect timeout\"),\n            lagging_wal_timeout: humantime::parse_duration(DEFAULT_WALRECEIVER_LAGGING_WAL_TIMEOUT)\n                .expect(\"cannot parse default walreceiver lagging wal timeout\"),\n            max_lsn_wal_lag: NonZeroU64::new(DEFAULT_MAX_WALRECEIVER_LSN_WAL_LAG)\n                .expect(\"cannot parse default max walreceiver Lsn wal lag\"),\n            eviction_policy: crate::models::EvictionPolicy::NoEviction,\n            min_resident_size_override: None,\n            evictions_low_residence_duration_metric_threshold: humantime::parse_duration(\n                DEFAULT_EVICTIONS_LOW_RESIDENCE_DURATION_METRIC_THRESHOLD,\n            )\n            .expect(\"cannot parse default evictions_low_residence_duration_metric_threshold\"),\n            heatmap_period: Duration::ZERO,\n            lazy_slru_download: false,\n            timeline_get_throttle: crate::models::ThrottleConfig::disabled(),\n            image_layer_creation_check_threshold: DEFAULT_IMAGE_LAYER_CREATION_CHECK_THRESHOLD,\n            image_creation_preempt_threshold: DEFAULT_IMAGE_CREATION_PREEMPT_THRESHOLD,\n            lsn_lease_length: LsnLease::DEFAULT_LENGTH,\n            lsn_lease_length_for_ts: LsnLease::DEFAULT_LENGTH_FOR_TS,\n            timeline_offloading: true,\n            rel_size_v2_enabled: false,\n            gc_compaction_enabled: DEFAULT_GC_COMPACTION_ENABLED,\n            gc_compaction_verification: DEFAULT_GC_COMPACTION_VERIFICATION,\n            gc_compaction_initial_threshold_kb: DEFAULT_GC_COMPACTION_INITIAL_THRESHOLD_KB,\n            gc_compaction_ratio_percent: DEFAULT_GC_COMPACTION_RATIO_PERCENT,\n            sampling_ratio: None,\n            relsize_snapshot_cache_capacity: DEFAULT_RELSIZE_SNAPSHOT_CACHE_CAPACITY,\n            basebackup_cache_enabled: false,\n        }\n    }\n}\n"
  },
  {
    "path": "libs/pageserver_api/src/controller_api.rs",
    "content": "use std::collections::{HashMap, HashSet};\nuse std::fmt::Display;\nuse std::net::IpAddr;\nuse std::str::FromStr;\nuse std::time::{Duration, Instant};\n\n/// Request/response types for the storage controller\n/// API (`/control/v1` prefix).  Implemented by the server\n/// in [`storage_controller::http`]\nuse serde::{Deserialize, Serialize};\nuse utils::id::{NodeId, TenantId, TimelineId};\nuse utils::lsn::Lsn;\n\nuse crate::models::{PageserverUtilization, ShardParameters, TenantConfig, TimelineInfo};\nuse crate::shard::{ShardStripeSize, TenantShardId};\n\n#[derive(Serialize, Deserialize, Debug)]\n#[serde(deny_unknown_fields)]\npub struct TenantCreateRequest {\n    pub new_tenant_id: TenantShardId,\n    #[serde(default)]\n    #[serde(skip_serializing_if = \"Option::is_none\")]\n    pub generation: Option<u32>,\n\n    // If omitted, create a single shard with TenantShardId::unsharded()\n    #[serde(default)]\n    #[serde(skip_serializing_if = \"ShardParameters::is_unsharded\")]\n    pub shard_parameters: ShardParameters,\n\n    #[serde(default)]\n    #[serde(skip_serializing_if = \"Option::is_none\")]\n    pub placement_policy: Option<PlacementPolicy>,\n\n    #[serde(flatten)]\n    pub config: TenantConfig, // as we have a flattened field, we should reject all unknown fields in it\n}\n\n#[derive(Serialize, Deserialize)]\npub struct TenantCreateResponseShard {\n    pub shard_id: TenantShardId,\n    pub node_id: NodeId,\n    pub generation: u32,\n}\n\n#[derive(Serialize, Deserialize)]\npub struct TenantCreateResponse {\n    pub shards: Vec<TenantCreateResponseShard>,\n}\n\n#[derive(Serialize, Deserialize, Debug, Clone)]\npub struct NodeRegisterRequest {\n    pub node_id: NodeId,\n\n    pub listen_pg_addr: String,\n    pub listen_pg_port: u16,\n    pub listen_grpc_addr: Option<String>,\n    pub listen_grpc_port: Option<u16>,\n\n    pub listen_http_addr: String,\n    pub listen_http_port: u16,\n    pub listen_https_port: Option<u16>,\n\n    pub availability_zone_id: AvailabilityZone,\n\n    // Reachable IP address of the PS/SK registering, if known.\n    // Hadron Cluster Coordiantor will update the DNS record of the registering node\n    // with this IP address.\n    pub node_ip_addr: Option<IpAddr>,\n}\n\n#[derive(Serialize, Deserialize)]\npub struct NodeConfigureRequest {\n    pub node_id: NodeId,\n\n    pub availability: Option<NodeAvailabilityWrapper>,\n    pub scheduling: Option<NodeSchedulingPolicy>,\n}\n\n#[derive(Serialize, Deserialize)]\npub struct TenantPolicyRequest {\n    pub placement: Option<PlacementPolicy>,\n    pub scheduling: Option<ShardSchedulingPolicy>,\n}\n\n#[derive(Clone, Serialize, Deserialize, PartialEq, Eq, Hash, Debug, PartialOrd, Ord)]\npub struct AvailabilityZone(pub String);\n\nimpl Display for AvailabilityZone {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        write!(f, \"{}\", self.0)\n    }\n}\n\n#[derive(Serialize, Deserialize)]\npub struct ShardsPreferredAzsRequest {\n    #[serde(flatten)]\n    pub preferred_az_ids: HashMap<TenantShardId, Option<AvailabilityZone>>,\n}\n\n#[derive(Serialize, Deserialize)]\npub struct ShardsPreferredAzsResponse {\n    pub updated: Vec<TenantShardId>,\n}\n\n#[derive(Serialize, Deserialize, Debug)]\npub struct TenantLocateResponseShard {\n    pub shard_id: TenantShardId,\n    pub node_id: NodeId,\n\n    pub listen_pg_addr: String,\n    pub listen_pg_port: u16,\n    pub listen_grpc_addr: Option<String>,\n    pub listen_grpc_port: Option<u16>,\n\n    pub listen_http_addr: String,\n    pub listen_http_port: u16,\n    pub listen_https_port: Option<u16>,\n}\n\n#[derive(Serialize, Deserialize)]\npub struct TenantLocateResponse {\n    pub shards: Vec<TenantLocateResponseShard>,\n    pub shard_params: ShardParameters,\n}\n\n#[derive(Serialize, Deserialize, Debug)]\npub struct TenantDescribeResponse {\n    pub tenant_id: TenantId,\n    pub shards: Vec<TenantDescribeResponseShard>,\n    pub stripe_size: ShardStripeSize,\n    pub policy: PlacementPolicy,\n    pub config: TenantConfig,\n}\n\n#[derive(Serialize, Deserialize, Debug)]\npub struct TenantTimelineDescribeResponse {\n    pub shards: Vec<TimelineInfo>,\n    #[serde(skip_serializing_if = \"Option::is_none\")]\n    pub image_consistent_lsn: Option<Lsn>,\n}\n\n#[derive(Serialize, Deserialize, Debug)]\npub struct NodeShardResponse {\n    pub node_id: NodeId,\n    pub shards: Vec<NodeShard>,\n}\n\n#[derive(Serialize, Deserialize, Debug)]\npub struct NodeShard {\n    pub tenant_shard_id: TenantShardId,\n    /// Whether the shard is observed secondary on a specific node. True = yes, False = no, None = not on this node.\n    pub is_observed_secondary: Option<bool>,\n    /// Whether the shard is intended to be a secondary on a specific node. True = yes, False = no, None = not on this node.\n    pub is_intended_secondary: Option<bool>,\n}\n\n#[derive(Serialize, Deserialize)]\npub struct NodeDescribeResponse {\n    pub id: NodeId,\n\n    pub availability: NodeAvailabilityWrapper,\n    pub scheduling: NodeSchedulingPolicy,\n\n    pub availability_zone_id: String,\n\n    pub listen_http_addr: String,\n    pub listen_http_port: u16,\n    pub listen_https_port: Option<u16>,\n\n    pub listen_pg_addr: String,\n    pub listen_pg_port: u16,\n    pub listen_grpc_addr: Option<String>,\n    pub listen_grpc_port: Option<u16>,\n}\n\n#[derive(Serialize, Deserialize, Debug)]\npub struct TenantDescribeResponseShard {\n    pub tenant_shard_id: TenantShardId,\n\n    pub node_attached: Option<NodeId>,\n    pub node_secondary: Vec<NodeId>,\n\n    pub last_error: String,\n\n    /// A task is currently running to reconcile this tenant's intent state with the state on pageservers\n    pub is_reconciling: bool,\n    /// This shard failed in sending a compute notification to the cloud control plane, and a retry is pending.\n    pub is_pending_compute_notification: bool,\n    /// A shard split is currently underway\n    pub is_splitting: bool,\n    /// A timeline is being imported into this tenant\n    pub is_importing: bool,\n\n    pub scheduling_policy: ShardSchedulingPolicy,\n\n    pub preferred_az_id: Option<String>,\n}\n\n/// Migration request for a given tenant shard to a given node.\n///\n/// Explicitly migrating a particular shard is a low level operation\n/// TODO: higher level \"Reschedule tenant\" operation where the request\n/// specifies some constraints, e.g. asking it to get off particular node(s)\n#[derive(Serialize, Deserialize, Debug)]\npub struct TenantShardMigrateRequest {\n    pub node_id: NodeId,\n\n    /// Optionally, callers may specify the node they are migrating _from_, and the server will\n    /// reject the request if the shard is no longer attached there: this enables writing safer\n    /// clients that don't risk fighting with some other movement of the shard.\n    #[serde(default)]\n    pub origin_node_id: Option<NodeId>,\n\n    #[serde(default)]\n    pub migration_config: MigrationConfig,\n}\n\n#[derive(Serialize, Deserialize, Debug, PartialEq, Eq)]\npub struct MigrationConfig {\n    /// If true, the migration will be executed even if it is to a location with a sub-optimal scheduling\n    /// score: this is usually not what you want, and if you use this then you'll also need to set the\n    /// tenant's scheduling policy to Essential or Pause to avoid the optimiser reverting your migration.\n    ///\n    /// Default: false\n    #[serde(default)]\n    pub override_scheduler: bool,\n\n    /// If true, the migration will be done gracefully by creating a secondary location first and\n    /// waiting for it to warm up before cutting over.  If false, if there is no existing secondary\n    /// location at the destination, the tenant will be migrated immediately.  If the tenant's data\n    /// can't be downloaded within [`Self::secondary_warmup_timeout`], then the migration will go\n    /// ahead but run with a cold cache that can severely reduce performance until it warms up.\n    ///\n    /// When doing a graceful migration, the migration API returns as soon as it is started.\n    ///\n    /// Default: true\n    #[serde(default = \"default_prewarm\")]\n    pub prewarm: bool,\n\n    /// For non-prewarm migrations which will immediately enter a cutover to the new node: how long to wait\n    /// overall for secondary warmup before cutting over\n    #[serde(default)]\n    #[serde(with = \"humantime_serde\")]\n    pub secondary_warmup_timeout: Option<Duration>,\n    /// For non-prewarm migrations which will immediately enter a cutover to the new node: how long to wait\n    /// within each secondary download poll call to pageserver.\n    #[serde(default)]\n    #[serde(with = \"humantime_serde\")]\n    pub secondary_download_request_timeout: Option<Duration>,\n}\n\nfn default_prewarm() -> bool {\n    true\n}\n\nimpl Default for MigrationConfig {\n    fn default() -> Self {\n        Self {\n            override_scheduler: false,\n            prewarm: default_prewarm(),\n            secondary_warmup_timeout: None,\n            secondary_download_request_timeout: None,\n        }\n    }\n}\n\n#[derive(Serialize, Clone, Debug)]\n#[serde(into = \"NodeAvailabilityWrapper\")]\npub enum NodeAvailability {\n    // Normal, happy state\n    Active(PageserverUtilization),\n    // Node is warming up, but we expect it to become available soon. Covers\n    // the time span between the re-attach response being composed on the storage controller\n    // and the first successful heartbeat after the processing of the re-attach response\n    // finishes on the pageserver.\n    WarmingUp(Instant),\n    // Offline: Tenants shouldn't try to attach here, but they may assume that their\n    // secondary locations on this node still exist.  Newly added nodes are in this\n    // state until we successfully contact them.\n    Offline,\n}\n\nimpl PartialEq for NodeAvailability {\n    fn eq(&self, other: &Self) -> bool {\n        use NodeAvailability::*;\n        matches!(\n            (self, other),\n            (Active(_), Active(_)) | (Offline, Offline) | (WarmingUp(_), WarmingUp(_))\n        )\n    }\n}\n\nimpl Eq for NodeAvailability {}\n\n// This wrapper provides serde functionality and it should only be used to\n// communicate with external callers which don't know or care about the\n// utilisation score of the pageserver it is targeting.\n#[derive(Serialize, Deserialize, Clone, Copy, Debug)]\npub enum NodeAvailabilityWrapper {\n    Active,\n    WarmingUp,\n    Offline,\n}\n\nimpl From<NodeAvailabilityWrapper> for NodeAvailability {\n    fn from(val: NodeAvailabilityWrapper) -> Self {\n        match val {\n            // Assume the worst utilisation score to begin with. It will later be updated by\n            // the heartbeats.\n            NodeAvailabilityWrapper::Active => {\n                NodeAvailability::Active(PageserverUtilization::full())\n            }\n            NodeAvailabilityWrapper::WarmingUp => NodeAvailability::WarmingUp(Instant::now()),\n            NodeAvailabilityWrapper::Offline => NodeAvailability::Offline,\n        }\n    }\n}\n\nimpl From<NodeAvailability> for NodeAvailabilityWrapper {\n    fn from(val: NodeAvailability) -> Self {\n        match val {\n            NodeAvailability::Active(_) => NodeAvailabilityWrapper::Active,\n            NodeAvailability::WarmingUp(_) => NodeAvailabilityWrapper::WarmingUp,\n            NodeAvailability::Offline => NodeAvailabilityWrapper::Offline,\n        }\n    }\n}\n\n/// Scheduling policy enables us to selectively disable some automatic actions that the\n/// controller performs on a tenant shard. This is only set to a non-default value by\n/// human intervention, and it is reset to the default value (Active) when the tenant's\n/// placement policy is modified away from Attached.\n///\n/// The typical use of a non-Active scheduling policy is one of:\n/// - Pinnning a shard to a node (i.e. migrating it there & setting a non-Active scheduling policy)\n/// - Working around a bug (e.g. if something is flapping and we need to stop it until the bug is fixed)\n///\n/// If you're not sure which policy to use to pin a shard to its current location, you probably\n/// want Pause.\n#[derive(Serialize, Deserialize, Clone, Copy, Eq, PartialEq, Debug)]\npub enum ShardSchedulingPolicy {\n    // Normal mode: the tenant's scheduled locations may be updated at will, including\n    // for non-essential optimization.\n    Active,\n\n    // Disable optimizations, but permit scheduling when necessary to fulfil the PlacementPolicy.\n    // For example, this still permits a node's attachment location to change to a secondary in\n    // response to a node failure, or to assign a new secondary if a node was removed.\n    Essential,\n\n    // No scheduling: leave the shard running wherever it currently is.  Even if the shard is\n    // unavailable, it will not be rescheduled to another node.\n    Pause,\n\n    // No reconciling: we will make no location_conf API calls to pageservers at all.  If the\n    // shard is unavailable, it stays that way.  If a node fails, this shard doesn't get failed over.\n    Stop,\n}\n\nimpl Default for ShardSchedulingPolicy {\n    fn default() -> Self {\n        Self::Active\n    }\n}\n\n#[derive(Serialize, Deserialize, Clone, Copy, Eq, PartialEq, Debug)]\npub enum NodeLifecycle {\n    Active,\n    Deleted,\n}\n\nimpl FromStr for NodeLifecycle {\n    type Err = anyhow::Error;\n\n    fn from_str(s: &str) -> Result<Self, Self::Err> {\n        match s {\n            \"active\" => Ok(Self::Active),\n            \"deleted\" => Ok(Self::Deleted),\n            _ => Err(anyhow::anyhow!(\"Unknown node lifecycle '{s}'\")),\n        }\n    }\n}\n\nimpl From<NodeLifecycle> for String {\n    fn from(value: NodeLifecycle) -> String {\n        use NodeLifecycle::*;\n        match value {\n            Active => \"active\",\n            Deleted => \"deleted\",\n        }\n        .to_string()\n    }\n}\n\n#[derive(Serialize, Deserialize, Clone, Copy, Eq, PartialEq, Debug)]\npub enum NodeSchedulingPolicy {\n    Active,\n    Filling,\n    Pause,\n    PauseForRestart,\n    Draining,\n    Deleting,\n}\n\nimpl FromStr for NodeSchedulingPolicy {\n    type Err = anyhow::Error;\n\n    fn from_str(s: &str) -> Result<Self, Self::Err> {\n        match s {\n            \"active\" => Ok(Self::Active),\n            \"filling\" => Ok(Self::Filling),\n            \"pause\" => Ok(Self::Pause),\n            \"pause_for_restart\" => Ok(Self::PauseForRestart),\n            \"draining\" => Ok(Self::Draining),\n            \"deleting\" => Ok(Self::Deleting),\n            _ => Err(anyhow::anyhow!(\"Unknown scheduling state '{s}'\")),\n        }\n    }\n}\n\nimpl From<NodeSchedulingPolicy> for String {\n    fn from(value: NodeSchedulingPolicy) -> String {\n        use NodeSchedulingPolicy::*;\n        match value {\n            Active => \"active\",\n            Filling => \"filling\",\n            Pause => \"pause\",\n            PauseForRestart => \"pause_for_restart\",\n            Draining => \"draining\",\n            Deleting => \"deleting\",\n        }\n        .to_string()\n    }\n}\n\n#[derive(Serialize, Deserialize, Clone, Copy, Eq, PartialEq, Debug)]\npub enum SkSchedulingPolicy {\n    Active,\n    Activating,\n    Pause,\n    Decomissioned,\n}\n\nimpl FromStr for SkSchedulingPolicy {\n    type Err = anyhow::Error;\n\n    fn from_str(s: &str) -> Result<Self, Self::Err> {\n        Ok(match s {\n            \"active\" => Self::Active,\n            \"activating\" => Self::Activating,\n            \"pause\" => Self::Pause,\n            \"decomissioned\" => Self::Decomissioned,\n            _ => {\n                return Err(anyhow::anyhow!(\n                    \"Unknown scheduling policy '{s}', try active,pause,decomissioned\"\n                ));\n            }\n        })\n    }\n}\n\nimpl From<SkSchedulingPolicy> for String {\n    fn from(value: SkSchedulingPolicy) -> String {\n        use SkSchedulingPolicy::*;\n        match value {\n            Active => \"active\",\n            Activating => \"activating\",\n            Pause => \"pause\",\n            Decomissioned => \"decomissioned\",\n        }\n        .to_string()\n    }\n}\n\n/// Controls how tenant shards are mapped to locations on pageservers, e.g. whether\n/// to create secondary locations.\n#[derive(Clone, Serialize, Deserialize, Debug, PartialEq, Eq)]\npub enum PlacementPolicy {\n    /// Normal live state: one attached pageserver and zero or more secondaries.\n    Attached(usize),\n    /// Create one secondary mode locations. This is useful when onboarding\n    /// a tenant, or for an idle tenant that we might want to bring online quickly.\n    Secondary,\n\n    /// Do not attach to any pageservers.  This is appropriate for tenants that\n    /// have been idle for a long time, where we do not mind some delay in making\n    /// them available in future.\n    Detached,\n}\n\nimpl PlacementPolicy {\n    pub fn want_secondaries(&self) -> usize {\n        match self {\n            PlacementPolicy::Attached(secondary_count) => *secondary_count,\n            PlacementPolicy::Secondary => 1,\n            PlacementPolicy::Detached => 0,\n        }\n    }\n}\n\n#[derive(Serialize, Deserialize, Debug)]\npub struct TenantShardMigrateResponse {}\n\n/// Metadata health record posted from scrubber.\n#[derive(Serialize, Deserialize, Debug)]\npub struct MetadataHealthRecord {\n    pub tenant_shard_id: TenantShardId,\n    pub healthy: bool,\n    pub last_scrubbed_at: chrono::DateTime<chrono::Utc>,\n}\n\n#[derive(Serialize, Deserialize, Debug)]\npub struct MetadataHealthUpdateRequest {\n    pub healthy_tenant_shards: HashSet<TenantShardId>,\n    pub unhealthy_tenant_shards: HashSet<TenantShardId>,\n}\n\n#[derive(Serialize, Deserialize, Debug)]\npub struct MetadataHealthUpdateResponse {}\n\n#[derive(Serialize, Deserialize, Debug)]\npub struct MetadataHealthListUnhealthyResponse {\n    pub unhealthy_tenant_shards: Vec<TenantShardId>,\n}\n\n#[derive(Serialize, Deserialize, Debug)]\npub struct MetadataHealthListOutdatedRequest {\n    #[serde(with = \"humantime_serde\")]\n    pub not_scrubbed_for: Duration,\n}\n\n#[derive(Serialize, Deserialize, Debug)]\npub struct MetadataHealthListOutdatedResponse {\n    pub health_records: Vec<MetadataHealthRecord>,\n}\n\n/// Publicly exposed safekeeper description\n#[derive(Serialize, Deserialize, Clone)]\npub struct SafekeeperDescribeResponse {\n    pub id: NodeId,\n    pub region_id: String,\n    /// 1 is special, it means just created (not currently posted to storcon).\n    /// Zero or negative is not really expected.\n    /// Otherwise the number from `release-$(number_of_commits_on_branch)` tag.\n    pub version: i64,\n    pub host: String,\n    pub port: i32,\n    pub http_port: i32,\n    pub https_port: Option<i32>,\n    pub availability_zone_id: String,\n    pub scheduling_policy: SkSchedulingPolicy,\n}\n\n#[derive(Serialize, Deserialize, Clone, Debug)]\npub struct TimelineSafekeeperPeer {\n    pub node_id: NodeId,\n    pub listen_http_addr: String,\n    pub http_port: i32,\n}\n\n#[derive(Serialize, Deserialize, Clone, Debug)]\npub struct SCSafekeeperTimeline {\n    // SC does not know the tenant id.\n    pub timeline_id: TimelineId,\n    pub peers: Vec<NodeId>,\n}\n\n#[derive(Serialize, Deserialize, Clone, Debug)]\npub struct SCSafekeeperTimelinesResponse {\n    pub timelines: Vec<SCSafekeeperTimeline>,\n    pub safekeeper_peers: Vec<TimelineSafekeeperPeer>,\n}\n\n#[derive(Serialize, Deserialize, Clone, Debug)]\npub struct SafekeeperTimeline {\n    pub tenant_id: TenantId,\n    pub timeline_id: TimelineId,\n    pub peers: Vec<NodeId>,\n}\n\n#[derive(Serialize, Deserialize, Clone, Debug)]\npub struct SafekeeperTimelinesResponse {\n    pub timelines: Vec<SafekeeperTimeline>,\n    pub safekeeper_peers: Vec<TimelineSafekeeperPeer>,\n}\n\n#[derive(Serialize, Deserialize, Clone)]\npub struct SafekeeperSchedulingPolicyRequest {\n    pub scheduling_policy: SkSchedulingPolicy,\n}\n\n/// Import request for safekeeper timelines.\n#[derive(Serialize, Deserialize, Clone)]\npub struct TimelineImportRequest {\n    pub tenant_id: TenantId,\n    pub timeline_id: TimelineId,\n    pub start_lsn: Lsn,\n    pub sk_set: Vec<NodeId>,\n    pub force_upsert: bool,\n}\n\n#[derive(serde::Serialize, serde::Deserialize, Clone)]\npub struct TimelineSafekeeperMigrateRequest {\n    pub new_sk_set: Vec<NodeId>,\n}\n\n#[cfg(test)]\nmod test {\n    use serde_json;\n\n    use super::*;\n\n    /// Check stability of PlacementPolicy's serialization\n    #[test]\n    fn placement_policy_encoding() -> anyhow::Result<()> {\n        let v = PlacementPolicy::Attached(1);\n        let encoded = serde_json::to_string(&v)?;\n        assert_eq!(encoded, \"{\\\"Attached\\\":1}\");\n        assert_eq!(serde_json::from_str::<PlacementPolicy>(&encoded)?, v);\n\n        let v = PlacementPolicy::Detached;\n        let encoded = serde_json::to_string(&v)?;\n        assert_eq!(encoded, \"\\\"Detached\\\"\");\n        assert_eq!(serde_json::from_str::<PlacementPolicy>(&encoded)?, v);\n        Ok(())\n    }\n\n    #[test]\n    fn test_reject_unknown_field() {\n        let id = TenantId::generate();\n        let create_request = serde_json::json!({\n            \"new_tenant_id\": id.to_string(),\n            \"unknown_field\": \"unknown_value\".to_string(),\n        });\n        let err = serde_json::from_value::<TenantCreateRequest>(create_request).unwrap_err();\n        assert!(\n            err.to_string().contains(\"unknown field `unknown_field`\"),\n            \"expect unknown field `unknown_field` error, got: {err}\"\n        );\n    }\n\n    /// Check that a minimal migrate request with no config results in the expected default settings\n    #[test]\n    fn test_migrate_request_decode_defaults() {\n        let json = r#\"{\n            \"node_id\": 123\n        }\"#;\n\n        let request: TenantShardMigrateRequest = serde_json::from_str(json).unwrap();\n        assert_eq!(request.node_id, NodeId(123));\n        assert_eq!(request.origin_node_id, None);\n        assert!(!request.migration_config.override_scheduler);\n        assert!(request.migration_config.prewarm);\n        assert_eq!(request.migration_config.secondary_warmup_timeout, None);\n        assert_eq!(\n            request.migration_config.secondary_download_request_timeout,\n            None\n        );\n    }\n\n    /// Check that a partially specified migration config results in the expected default settings\n    #[test]\n    fn test_migration_config_decode_defaults() {\n        // Specify just one field of the config\n        let json = r#\"{\n        }\"#;\n\n        let config: MigrationConfig = serde_json::from_str(json).unwrap();\n\n        // Check each field's expected default value\n        assert!(!config.override_scheduler);\n        assert!(config.prewarm);\n        assert_eq!(config.secondary_warmup_timeout, None);\n        assert_eq!(config.secondary_download_request_timeout, None);\n        assert_eq!(config.secondary_warmup_timeout, None);\n\n        // Consistency check that the Default impl agrees with our serde defaults\n        assert_eq!(MigrationConfig::default(), config);\n    }\n}\n"
  },
  {
    "path": "libs/pageserver_api/src/key.rs",
    "content": "use std::fmt;\nuse std::ops::Range;\n\nuse anyhow::{Result, bail};\nuse byteorder::{BE, ByteOrder};\nuse bytes::Bytes;\nuse postgres_ffi_types::forknum::{FSM_FORKNUM, VISIBILITYMAP_FORKNUM};\nuse postgres_ffi_types::{Oid, RepOriginId};\nuse serde::{Deserialize, Serialize};\nuse utils::const_assert;\n\nuse crate::reltag::{BlockNumber, RelTag, SlruKind};\n\n/// Key used in the Repository kv-store.\n///\n/// The Repository treats this as an opaque struct, but see the code in pgdatadir_mapping.rs\n/// for what we actually store in these fields.\n#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, Ord, PartialOrd, Serialize, Deserialize)]\npub struct Key {\n    pub field1: u8,\n    pub field2: u32,\n    pub field3: u32,\n    pub field4: u32,\n    pub field5: u8,\n    pub field6: u32,\n}\n\n/// When working with large numbers of Keys in-memory, it is more efficient to handle them as i128 than as\n/// a struct of fields.\n#[derive(\n    Clone, Copy, Default, Hash, PartialEq, Eq, Ord, PartialOrd, Serialize, Deserialize, Debug,\n)]\npub struct CompactKey(i128);\n\n/// The storage key size.\npub const KEY_SIZE: usize = 18;\n\n/// The metadata key size. 2B fewer than the storage key size because field2 is not fully utilized.\n/// See [`Key::to_i128`] for more information on the encoding.\npub const METADATA_KEY_SIZE: usize = 16;\n\n/// The key prefix start range for the metadata keys. All keys with the first byte >= 0x60 is a metadata key.\npub const METADATA_KEY_BEGIN_PREFIX: u8 = 0x60;\npub const METADATA_KEY_END_PREFIX: u8 = 0x7F;\n\n/// The (reserved) key prefix of relation sizes.\npub const RELATION_SIZE_PREFIX: u8 = 0x61;\n\n/// The key prefix of AUX file keys.\npub const AUX_KEY_PREFIX: u8 = 0x62;\n\n/// The key prefix of ReplOrigin keys.\npub const REPL_ORIGIN_KEY_PREFIX: u8 = 0x63;\n\n/// The key prefix of db directory keys.\npub const DB_DIR_KEY_PREFIX: u8 = 0x64;\n\n/// The key prefix of rel directory keys.\npub const REL_DIR_KEY_PREFIX: u8 = 0x65;\n\n#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)]\npub enum RelDirExists {\n    Exists,\n    Removed,\n}\n\n#[derive(Debug)]\npub struct DecodeError;\n\nimpl fmt::Display for DecodeError {\n    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {\n        write!(f, \"invalid marker\")\n    }\n}\n\nimpl std::error::Error for DecodeError {}\n\nimpl RelDirExists {\n    /// The value of the rel directory keys that indicates the existence of a relation.\n    const REL_EXISTS_MARKER: Bytes = Bytes::from_static(b\"r\");\n\n    pub fn encode(&self) -> Bytes {\n        match self {\n            Self::Exists => Self::REL_EXISTS_MARKER.clone(),\n            Self::Removed => SPARSE_TOMBSTONE_MARKER.clone(),\n        }\n    }\n\n    pub fn decode_option(data: Option<impl AsRef<[u8]>>) -> Result<Self, DecodeError> {\n        match data {\n            Some(marker) if marker.as_ref() == Self::REL_EXISTS_MARKER => Ok(Self::Exists),\n            // Any other marker is invalid\n            Some(_) => Err(DecodeError),\n            None => Ok(Self::Removed),\n        }\n    }\n\n    pub fn decode(data: impl AsRef<[u8]>) -> Result<Self, DecodeError> {\n        let data = data.as_ref();\n        if data == Self::REL_EXISTS_MARKER {\n            Ok(Self::Exists)\n        } else if data == SPARSE_TOMBSTONE_MARKER {\n            Ok(Self::Removed)\n        } else {\n            Err(DecodeError)\n        }\n    }\n}\n\n/// A tombstone in the sparse keyspace, which is an empty buffer.\npub const SPARSE_TOMBSTONE_MARKER: Bytes = Bytes::from_static(b\"\");\n\n/// Check if the key falls in the range of metadata keys.\npub const fn is_metadata_key_slice(key: &[u8]) -> bool {\n    key[0] >= METADATA_KEY_BEGIN_PREFIX && key[0] < METADATA_KEY_END_PREFIX\n}\n\nimpl Key {\n    /// Check if the key falls in the range of metadata keys.\n    pub const fn is_metadata_key(&self) -> bool {\n        self.field1 >= METADATA_KEY_BEGIN_PREFIX && self.field1 < METADATA_KEY_END_PREFIX\n    }\n\n    /// Encode a metadata key to a storage key.\n    pub fn from_metadata_key_fixed_size(key: &[u8; METADATA_KEY_SIZE]) -> Self {\n        assert!(is_metadata_key_slice(key), \"key not in metadata key range\");\n        // Metadata key space ends at 0x7F so it's fine to directly convert it to i128.\n        Self::from_i128(i128::from_be_bytes(*key))\n    }\n\n    /// Encode a metadata key to a storage key.\n    pub fn from_metadata_key(key: &[u8]) -> Self {\n        Self::from_metadata_key_fixed_size(key.try_into().expect(\"expect 16 byte metadata key\"))\n    }\n\n    /// Get the range of metadata keys.\n    pub const fn metadata_key_range() -> Range<Self> {\n        Key {\n            field1: METADATA_KEY_BEGIN_PREFIX,\n            field2: 0,\n            field3: 0,\n            field4: 0,\n            field5: 0,\n            field6: 0,\n        }..Key {\n            field1: METADATA_KEY_END_PREFIX,\n            field2: 0,\n            field3: 0,\n            field4: 0,\n            field5: 0,\n            field6: 0,\n        }\n    }\n\n    /// Get the range of aux keys.\n    pub fn metadata_aux_key_range() -> Range<Self> {\n        Key {\n            field1: AUX_KEY_PREFIX,\n            field2: 0,\n            field3: 0,\n            field4: 0,\n            field5: 0,\n            field6: 0,\n        }..Key {\n            field1: AUX_KEY_PREFIX + 1,\n            field2: 0,\n            field3: 0,\n            field4: 0,\n            field5: 0,\n            field6: 0,\n        }\n    }\n\n    pub fn rel_dir_sparse_key_range() -> Range<Self> {\n        Key {\n            field1: REL_DIR_KEY_PREFIX,\n            field2: 0,\n            field3: 0,\n            field4: 0,\n            field5: 0,\n            field6: 0,\n        }..Key {\n            field1: REL_DIR_KEY_PREFIX + 1,\n            field2: 0,\n            field3: 0,\n            field4: 0,\n            field5: 0,\n            field6: 0,\n        }\n    }\n\n    /// This function checks more extensively what keys we can take on the write path.\n    /// If a key beginning with 00 does not have a global/default tablespace OID, it\n    /// will be rejected on the write path.\n    #[allow(dead_code)]\n    pub fn is_valid_key_on_write_path_strong(&self) -> bool {\n        use postgres_ffi_types::constants::{DEFAULTTABLESPACE_OID, GLOBALTABLESPACE_OID};\n        if !self.is_i128_representable() {\n            return false;\n        }\n        if self.field1 == 0\n            && !(self.field2 == GLOBALTABLESPACE_OID\n                || self.field2 == DEFAULTTABLESPACE_OID\n                || self.field2 == 0)\n        {\n            return false; // User defined tablespaces are not supported\n        }\n        true\n    }\n\n    /// This is a weaker version of `is_valid_key_on_write_path_strong` that simply\n    /// checks if the key is i128 representable. Note that some keys can be successfully\n    /// ingested into the pageserver, but will cause errors on generating basebackup.\n    pub fn is_valid_key_on_write_path(&self) -> bool {\n        self.is_i128_representable()\n    }\n\n    pub fn is_i128_representable(&self) -> bool {\n        self.field2 <= 0xFFFF || self.field2 == 0xFFFFFFFF || self.field2 == 0x22222222\n    }\n\n    /// 'field2' is used to store tablespaceid for relations and small enum numbers for other relish.\n    /// As long as Neon does not support tablespace (because of lack of access to local file system),\n    /// we can assume that only some predefined namespace OIDs are used which can fit in u16\n    pub fn to_i128(&self) -> i128 {\n        assert!(self.is_i128_representable(), \"invalid key: {self}\");\n        (((self.field1 & 0x7F) as i128) << 120)\n            | (((self.field2 & 0xFFFF) as i128) << 104)\n            | ((self.field3 as i128) << 72)\n            | ((self.field4 as i128) << 40)\n            | ((self.field5 as i128) << 32)\n            | self.field6 as i128\n    }\n\n    pub const fn from_i128(x: i128) -> Self {\n        Key {\n            field1: ((x >> 120) & 0x7F) as u8,\n            field2: ((x >> 104) & 0xFFFF) as u32,\n            field3: (x >> 72) as u32,\n            field4: (x >> 40) as u32,\n            field5: (x >> 32) as u8,\n            field6: x as u32,\n        }\n    }\n\n    pub fn to_compact(&self) -> CompactKey {\n        CompactKey(self.to_i128())\n    }\n\n    pub fn from_compact(k: CompactKey) -> Self {\n        Self::from_i128(k.0)\n    }\n\n    pub const fn next(&self) -> Key {\n        self.add(1)\n    }\n\n    pub const fn add(&self, x: u32) -> Key {\n        let mut key = *self;\n\n        let r = key.field6.overflowing_add(x);\n        key.field6 = r.0;\n        if r.1 {\n            let r = key.field5.overflowing_add(1);\n            key.field5 = r.0;\n            if r.1 {\n                let r = key.field4.overflowing_add(1);\n                key.field4 = r.0;\n                if r.1 {\n                    let r = key.field3.overflowing_add(1);\n                    key.field3 = r.0;\n                    if r.1 {\n                        let r = key.field2.overflowing_add(1);\n                        key.field2 = r.0;\n                        if r.1 {\n                            let r = key.field1.overflowing_add(1);\n                            key.field1 = r.0;\n                            assert!(!r.1);\n                        }\n                    }\n                }\n            }\n        }\n        key\n    }\n\n    /// Convert a 18B slice to a key. This function should not be used for 16B metadata keys because `field2` is handled differently.\n    /// Use [`Key::from_i128`] instead if you want to handle 16B keys (i.e., metadata keys). There are some restrictions on `field2`,\n    /// and therefore not all 18B slices are valid page server keys.\n    pub fn from_slice(b: &[u8]) -> Self {\n        Key {\n            field1: b[0],\n            field2: u32::from_be_bytes(b[1..5].try_into().unwrap()),\n            field3: u32::from_be_bytes(b[5..9].try_into().unwrap()),\n            field4: u32::from_be_bytes(b[9..13].try_into().unwrap()),\n            field5: b[13],\n            field6: u32::from_be_bytes(b[14..18].try_into().unwrap()),\n        }\n    }\n\n    /// Convert a key to a 18B slice. This function should not be used for getting a 16B metadata key because `field2` is handled differently.\n    /// Use [`Key::to_i128`] instead if you want to get a 16B key (i.e., metadata keys).\n    pub fn write_to_byte_slice(&self, buf: &mut [u8]) {\n        buf[0] = self.field1;\n        BE::write_u32(&mut buf[1..5], self.field2);\n        BE::write_u32(&mut buf[5..9], self.field3);\n        BE::write_u32(&mut buf[9..13], self.field4);\n        buf[13] = self.field5;\n        BE::write_u32(&mut buf[14..18], self.field6);\n    }\n}\n\nimpl CompactKey {\n    pub fn raw(&self) -> i128 {\n        self.0\n    }\n}\n\nimpl From<i128> for CompactKey {\n    fn from(value: i128) -> Self {\n        Self(value)\n    }\n}\n\nimpl fmt::Display for Key {\n    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {\n        write!(\n            f,\n            \"{:02X}{:08X}{:08X}{:08X}{:02X}{:08X}\",\n            self.field1, self.field2, self.field3, self.field4, self.field5, self.field6\n        )\n    }\n}\n\nimpl fmt::Display for CompactKey {\n    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {\n        let k = Key::from_compact(*self);\n        k.fmt(f)\n    }\n}\n\nimpl Key {\n    pub const MIN: Key = Key {\n        field1: u8::MIN,\n        field2: u32::MIN,\n        field3: u32::MIN,\n        field4: u32::MIN,\n        field5: u8::MIN,\n        field6: u32::MIN,\n    };\n    pub const MAX: Key = Key {\n        field1: u8::MAX,\n        field2: u32::MAX,\n        field3: u32::MAX,\n        field4: u32::MAX,\n        field5: u8::MAX,\n        field6: u32::MAX,\n    };\n\n    pub fn from_hex(s: &str) -> Result<Self> {\n        if s.len() != 36 {\n            bail!(\"parse error\");\n        }\n        Ok(Key {\n            field1: u8::from_str_radix(&s[0..2], 16)?,\n            field2: u32::from_str_radix(&s[2..10], 16)?,\n            field3: u32::from_str_radix(&s[10..18], 16)?,\n            field4: u32::from_str_radix(&s[18..26], 16)?,\n            field5: u8::from_str_radix(&s[26..28], 16)?,\n            field6: u32::from_str_radix(&s[28..36], 16)?,\n        })\n    }\n}\n\n// Layout of the Key address space\n//\n// The Key struct, used to address the underlying key-value store, consists of\n// 18 bytes, split into six fields. See 'Key' in repository.rs. We need to map\n// all the data and metadata keys into those 18 bytes.\n//\n// Principles for the mapping:\n//\n// - Things that are often accessed or modified together, should be close to\n//   each other in the key space. For example, if a relation is extended by one\n//   block, we create a new key-value pair for the block data, and update the\n//   relation size entry. Because of that, the RelSize key comes after all the\n//   RelBlocks of a relation: the RelSize and the last RelBlock are always next\n//   to each other.\n//\n// The key space is divided into four major sections, identified by the first\n// byte, and the form a hierarchy:\n//\n// 00 Relation data and metadata\n//\n//   DbDir    () -> (dbnode, spcnode)\n//   Filenodemap\n//   RelDir   -> relnode forknum\n//       RelBlocks\n//       RelSize\n//\n// 01 SLRUs\n//\n//   SlruDir  kind\n//   SlruSegBlocks segno\n//   SlruSegSize\n//\n// 02 pg_twophase\n//\n// 03 misc\n//    Controlfile\n//    checkpoint\n//    pg_version\n//\n// 04 aux files\n//\n// Below is a full list of the keyspace allocation:\n//\n// DbDir:\n// 00 00000000 00000000 00000000 00   00000000\n//\n// Filenodemap:\n// 00 SPCNODE  DBNODE   00000000 00   00000000\n//\n// RelDir:\n// 00 SPCNODE  DBNODE   00000000 00   00000001 (Postgres never uses relfilenode 0)\n//\n// RelBlock:\n// 00 SPCNODE  DBNODE   RELNODE  FORK BLKNUM\n//\n// RelSize:\n// 00 SPCNODE  DBNODE   RELNODE  FORK FFFFFFFF\n//\n// SlruDir:\n// 01 kind     00000000 00000000 00   00000000\n//\n// SlruSegBlock:\n// 01 kind     00000001 SEGNO    00   BLKNUM\n//\n// SlruSegSize:\n// 01 kind     00000001 SEGNO    00   FFFFFFFF\n//\n// TwoPhaseDir:\n// 02 00000000 00000000 00000000 00   00000000\n//\n// TwoPhaseFile:\n//\n// 02 00000000 00000000 00XXXXXX XX   XXXXXXXX\n//\n//                        \\______XID_________/\n//\n// The 64-bit XID is stored a little awkwardly in field6, field5 and\n// field4. PostgreSQL v16 and below only stored a 32-bit XID, which\n// fit completely in field6, but starting with PostgreSQL v17, a full\n// 64-bit XID is used. Most pageserver code that accesses\n// TwoPhaseFiles now deals with 64-bit XIDs even on v16, the high bits\n// are just unused.\n//\n// ControlFile:\n// 03 00000000 00000000 00000000 00   00000000\n//\n// Checkpoint:\n// 03 00000000 00000000 00000000 00   00000001\n//\n// AuxFiles:\n// 03 00000000 00000000 00000000 00   00000002\n//\n\n//-- Section 01: relation data and metadata\n\npub const DBDIR_KEY: Key = Key {\n    field1: 0x00,\n    field2: 0,\n    field3: 0,\n    field4: 0,\n    field5: 0,\n    field6: 0,\n};\n\n#[inline(always)]\npub fn dbdir_key_range(spcnode: Oid, dbnode: Oid) -> Range<Key> {\n    Key {\n        field1: 0x00,\n        field2: spcnode,\n        field3: dbnode,\n        field4: 0,\n        field5: 0,\n        field6: 0,\n    }..Key {\n        field1: 0x00,\n        field2: spcnode,\n        field3: dbnode,\n        field4: 0xffffffff,\n        field5: 0xff,\n        field6: 0xffffffff,\n    }\n}\n\n#[inline(always)]\npub fn relmap_file_key(spcnode: Oid, dbnode: Oid) -> Key {\n    Key {\n        field1: 0x00,\n        field2: spcnode,\n        field3: dbnode,\n        field4: 0,\n        field5: 0,\n        field6: 0,\n    }\n}\n\n#[inline(always)]\npub fn rel_dir_to_key(spcnode: Oid, dbnode: Oid) -> Key {\n    Key {\n        field1: 0x00,\n        field2: spcnode,\n        field3: dbnode,\n        field4: 0,\n        field5: 0,\n        field6: 1,\n    }\n}\n\n#[inline(always)]\npub fn rel_tag_sparse_key(spcnode: Oid, dbnode: Oid, relnode: Oid, forknum: u8) -> Key {\n    Key {\n        field1: REL_DIR_KEY_PREFIX,\n        field2: spcnode,\n        field3: dbnode,\n        field4: relnode,\n        field5: forknum,\n        field6: 1,\n    }\n}\n\npub fn rel_tag_sparse_key_range(spcnode: Oid, dbnode: Oid) -> Range<Key> {\n    Key {\n        field1: REL_DIR_KEY_PREFIX,\n        field2: spcnode,\n        field3: dbnode,\n        field4: 0,\n        field5: 0,\n        field6: 0,\n    }..Key {\n        field1: REL_DIR_KEY_PREFIX,\n        field2: spcnode,\n        field3: dbnode,\n        field4: u32::MAX,\n        field5: u8::MAX,\n        field6: u32::MAX,\n    } // it's fine to exclude the last key b/c we only use field6 == 1\n}\n\n#[inline(always)]\npub fn rel_block_to_key(rel: RelTag, blknum: BlockNumber) -> Key {\n    Key {\n        field1: 0x00,\n        field2: rel.spcnode,\n        field3: rel.dbnode,\n        field4: rel.relnode,\n        field5: rel.forknum,\n        field6: blknum,\n    }\n}\n\n#[inline(always)]\npub fn rel_size_to_key(rel: RelTag) -> Key {\n    Key {\n        field1: 0x00,\n        field2: rel.spcnode,\n        field3: rel.dbnode,\n        field4: rel.relnode,\n        field5: rel.forknum,\n        field6: 0xffff_ffff,\n    }\n}\n\nimpl Key {\n    #[inline(always)]\n    pub fn is_rel_size_key(&self) -> bool {\n        self.field1 == 0 && self.field6 == u32::MAX\n    }\n}\n\n#[inline(always)]\npub fn rel_key_range(rel: RelTag) -> Range<Key> {\n    Key {\n        field1: 0x00,\n        field2: rel.spcnode,\n        field3: rel.dbnode,\n        field4: rel.relnode,\n        field5: rel.forknum,\n        field6: 0,\n    }..Key {\n        field1: 0x00,\n        field2: rel.spcnode,\n        field3: rel.dbnode,\n        field4: rel.relnode,\n        field5: rel.forknum + 1,\n        field6: 0,\n    }\n}\n\n//-- Section 02: SLRUs\n\n#[inline(always)]\npub fn slru_dir_to_key(kind: SlruKind) -> Key {\n    Key {\n        field1: 0x01,\n        field2: match kind {\n            SlruKind::Clog => 0x00,\n            SlruKind::MultiXactMembers => 0x01,\n            SlruKind::MultiXactOffsets => 0x02,\n        },\n        field3: 0,\n        field4: 0,\n        field5: 0,\n        field6: 0,\n    }\n}\n\n#[inline(always)]\npub fn slru_dir_kind(key: &Key) -> Option<Result<SlruKind, u32>> {\n    if key.field1 == 0x01\n        && key.field3 == 0\n        && key.field4 == 0\n        && key.field5 == 0\n        && key.field6 == 0\n    {\n        match key.field2 {\n            0 => Some(Ok(SlruKind::Clog)),\n            1 => Some(Ok(SlruKind::MultiXactMembers)),\n            2 => Some(Ok(SlruKind::MultiXactOffsets)),\n            x => Some(Err(x)),\n        }\n    } else {\n        None\n    }\n}\n\n#[inline(always)]\npub fn slru_block_to_key(kind: SlruKind, segno: u32, blknum: BlockNumber) -> Key {\n    Key {\n        field1: 0x01,\n        field2: match kind {\n            SlruKind::Clog => 0x00,\n            SlruKind::MultiXactMembers => 0x01,\n            SlruKind::MultiXactOffsets => 0x02,\n        },\n        field3: 1,\n        field4: segno,\n        field5: 0,\n        field6: blknum,\n    }\n}\n\n#[inline(always)]\npub fn slru_segment_size_to_key(kind: SlruKind, segno: u32) -> Key {\n    Key {\n        field1: 0x01,\n        field2: match kind {\n            SlruKind::Clog => 0x00,\n            SlruKind::MultiXactMembers => 0x01,\n            SlruKind::MultiXactOffsets => 0x02,\n        },\n        field3: 1,\n        field4: segno,\n        field5: 0,\n        field6: 0xffff_ffff,\n    }\n}\n\nimpl Key {\n    pub fn is_slru_segment_size_key(&self) -> bool {\n        self.field1 == 0x01\n            && self.field2 < 0x03\n            && self.field3 == 0x01\n            && self.field5 == 0\n            && self.field6 == u32::MAX\n    }\n\n    pub fn is_slru_dir_key(&self) -> bool {\n        slru_dir_kind(self).is_some()\n    }\n}\n\n#[inline(always)]\npub fn slru_segment_key_range(kind: SlruKind, segno: u32) -> Range<Key> {\n    let field2 = match kind {\n        SlruKind::Clog => 0x00,\n        SlruKind::MultiXactMembers => 0x01,\n        SlruKind::MultiXactOffsets => 0x02,\n    };\n\n    Key {\n        field1: 0x01,\n        field2,\n        field3: 1,\n        field4: segno,\n        field5: 0,\n        field6: 0,\n    }..Key {\n        field1: 0x01,\n        field2,\n        field3: 1,\n        field4: segno,\n        field5: 1,\n        field6: 0,\n    }\n}\n\n//-- Section 03: pg_twophase\n\npub const TWOPHASEDIR_KEY: Key = Key {\n    field1: 0x02,\n    field2: 0,\n    field3: 0,\n    field4: 0,\n    field5: 0,\n    field6: 0,\n};\n\n#[inline(always)]\npub fn twophase_file_key(xid: u64) -> Key {\n    Key {\n        field1: 0x02,\n        field2: 0,\n        field3: 0,\n        field4: ((xid & 0xFFFFFF0000000000) >> 40) as u32,\n        field5: ((xid & 0x000000FF00000000) >> 32) as u8,\n        field6: (xid & 0x00000000FFFFFFFF) as u32,\n    }\n}\n\n#[inline(always)]\npub fn twophase_key_range(xid: u64) -> Range<Key> {\n    // 64-bit XIDs really should not overflow\n    let (next_xid, overflowed) = xid.overflowing_add(1);\n\n    Key {\n        field1: 0x02,\n        field2: 0,\n        field3: 0,\n        field4: ((xid & 0xFFFFFF0000000000) >> 40) as u32,\n        field5: ((xid & 0x000000FF00000000) >> 32) as u8,\n        field6: (xid & 0x00000000FFFFFFFF) as u32,\n    }..Key {\n        field1: 0x02,\n        field2: 0,\n        field3: u32::from(overflowed),\n        field4: ((next_xid & 0xFFFFFF0000000000) >> 40) as u32,\n        field5: ((next_xid & 0x000000FF00000000) >> 32) as u8,\n        field6: (next_xid & 0x00000000FFFFFFFF) as u32,\n    }\n}\n\n//-- Section 03: Control file\npub const CONTROLFILE_KEY: Key = Key {\n    field1: 0x03,\n    field2: 0,\n    field3: 0,\n    field4: 0,\n    field5: 0,\n    field6: 0,\n};\n\npub const CHECKPOINT_KEY: Key = Key {\n    field1: 0x03,\n    field2: 0,\n    field3: 0,\n    field4: 0,\n    field5: 0,\n    field6: 1,\n};\n\npub const AUX_FILES_KEY: Key = Key {\n    field1: 0x03,\n    field2: 0,\n    field3: 0,\n    field4: 0,\n    field5: 0,\n    field6: 2,\n};\n\n#[inline(always)]\npub fn repl_origin_key(origin_id: RepOriginId) -> Key {\n    Key {\n        field1: REPL_ORIGIN_KEY_PREFIX,\n        field2: 0,\n        field3: 0,\n        field4: 0,\n        field5: 0,\n        field6: origin_id as u32,\n    }\n}\n\n/// Get the range of replorigin keys.\npub fn repl_origin_key_range() -> Range<Key> {\n    Key {\n        field1: REPL_ORIGIN_KEY_PREFIX,\n        field2: 0,\n        field3: 0,\n        field4: 0,\n        field5: 0,\n        field6: 0,\n    }..Key {\n        field1: REPL_ORIGIN_KEY_PREFIX,\n        field2: 0,\n        field3: 0,\n        field4: 0,\n        field5: 0,\n        field6: 0x10000,\n    }\n}\n\n// Reverse mappings for a few Keys.\n// These are needed by WAL redo manager.\n\n/// Non inherited range for vectored get.\npub const NON_INHERITED_RANGE: Range<Key> = AUX_FILES_KEY..AUX_FILES_KEY.next();\n/// Sparse keyspace range for vectored get. Missing key error will be ignored for this range.\npub const SPARSE_RANGE: Range<Key> = Key::metadata_key_range();\n\nimpl Key {\n    // AUX_FILES currently stores only data for logical replication (slots etc), and\n    // we don't preserve these on a branch because safekeepers can't follow timeline\n    // switch (and generally it likely should be optional), so ignore these.\n    #[inline(always)]\n    pub fn is_inherited_key(self) -> bool {\n        if self.is_sparse() {\n            self.is_inherited_sparse_key()\n        } else {\n            !NON_INHERITED_RANGE.contains(&self)\n        }\n    }\n\n    #[inline(always)]\n    pub fn is_sparse(self) -> bool {\n        self.field1 >= METADATA_KEY_BEGIN_PREFIX && self.field1 < METADATA_KEY_END_PREFIX\n    }\n\n    /// Check if the key belongs to the inherited keyspace.\n    fn is_inherited_sparse_key(self) -> bool {\n        debug_assert!(self.is_sparse());\n        self.field1 == RELATION_SIZE_PREFIX\n    }\n\n    pub const fn sparse_non_inherited_keyspace() -> Range<Key> {\n        // The two keys are adjacent; if we will have non-adjancent keys in the future, we should return a keyspace\n        const_assert!(AUX_KEY_PREFIX + 1 == REPL_ORIGIN_KEY_PREFIX);\n        Key {\n            field1: AUX_KEY_PREFIX,\n            field2: 0,\n            field3: 0,\n            field4: 0,\n            field5: 0,\n            field6: 0,\n        }..Key {\n            field1: REPL_ORIGIN_KEY_PREFIX + 1,\n            field2: 0,\n            field3: 0,\n            field4: 0,\n            field5: 0,\n            field6: 0,\n        }\n    }\n\n    #[inline(always)]\n    pub fn is_rel_fsm_block_key(self) -> bool {\n        self.field1 == 0x00\n            && self.field4 != 0\n            && self.field5 == FSM_FORKNUM\n            && self.field6 != 0xffffffff\n    }\n\n    #[inline(always)]\n    pub fn is_rel_vm_block_key(self) -> bool {\n        self.field1 == 0x00\n            && self.field4 != 0\n            && self.field5 == VISIBILITYMAP_FORKNUM\n            && self.field6 != 0xffffffff\n    }\n\n    #[inline(always)]\n    pub fn to_slru_block(self) -> anyhow::Result<(SlruKind, u32, BlockNumber)> {\n        Ok(match self.field1 {\n            0x01 => {\n                let kind = match self.field2 {\n                    0x00 => SlruKind::Clog,\n                    0x01 => SlruKind::MultiXactMembers,\n                    0x02 => SlruKind::MultiXactOffsets,\n                    _ => anyhow::bail!(\"unrecognized slru kind 0x{:02x}\", self.field2),\n                };\n                let segno = self.field4;\n                let blknum = self.field6;\n\n                (kind, segno, blknum)\n            }\n            _ => anyhow::bail!(\"unexpected value kind 0x{:02x}\", self.field1),\n        })\n    }\n\n    #[inline(always)]\n    pub fn is_slru_block_key(self) -> bool {\n        self.field1 == 0x01                // SLRU-related\n        && self.field3 == 0x00000001   // but not SlruDir\n        && self.field6 != 0xffffffff // and not SlruSegSize\n    }\n\n    #[inline(always)]\n    pub fn is_rel_block_key(&self) -> bool {\n        self.field1 == 0x00 && self.field4 != 0 && self.field6 != 0xffffffff\n    }\n\n    #[inline(always)]\n    pub fn is_rel_block_of_rel(&self, rel: Oid) -> bool {\n        self.is_rel_block_key() && self.field4 == rel\n    }\n\n    #[inline(always)]\n    pub fn is_rel_dir_key(&self) -> bool {\n        self.field1 == 0x00\n            && self.field2 != 0\n            && self.field3 != 0\n            && self.field4 == 0\n            && self.field5 == 0\n            && self.field6 == 1\n    }\n\n    #[inline(always)]\n    pub fn is_aux_file_key(&self) -> bool {\n        self.field1 == AUX_KEY_PREFIX\n    }\n\n    /// Guaranteed to return `Ok()` if [`Self::is_rel_block_key`] returns `true` for `key`.\n    #[inline(always)]\n    pub fn to_rel_block(self) -> Result<(RelTag, BlockNumber), ToRelBlockError> {\n        Ok(match self.field1 {\n            0x00 => (\n                RelTag {\n                    spcnode: self.field2,\n                    dbnode: self.field3,\n                    relnode: self.field4,\n                    forknum: self.field5,\n                },\n                self.field6,\n            ),\n            _ => return Err(ToRelBlockError(self.field1)),\n        })\n    }\n}\n\nimpl std::str::FromStr for Key {\n    type Err = anyhow::Error;\n\n    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {\n        Self::from_hex(s)\n    }\n}\n\n#[derive(Debug)]\npub struct ToRelBlockError(u8);\n\nimpl fmt::Display for ToRelBlockError {\n    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {\n        write!(f, \"unexpected value kind 0x{:02x}\", self.0)\n    }\n}\n\nimpl std::error::Error for ToRelBlockError {}\n\n#[cfg(test)]\nmod tests {\n    use std::str::FromStr;\n\n    use rand::{Rng, SeedableRng};\n\n    use super::AUX_KEY_PREFIX;\n    use crate::key::{Key, is_metadata_key_slice};\n\n    #[test]\n    fn display_fromstr_bijection() {\n        let mut rng = rand::rngs::StdRng::seed_from_u64(42);\n\n        let key = Key {\n            field1: rng.random(),\n            field2: rng.random(),\n            field3: rng.random(),\n            field4: rng.random(),\n            field5: rng.random(),\n            field6: rng.random(),\n        };\n\n        assert_eq!(key, Key::from_str(&format!(\"{key}\")).unwrap());\n    }\n\n    #[test]\n    fn test_metadata_keys() {\n        let mut metadata_key = vec![AUX_KEY_PREFIX];\n        metadata_key.extend_from_slice(&[0xFF; 15]);\n        let encoded_key = Key::from_metadata_key(&metadata_key);\n        let output_key = encoded_key.to_i128().to_be_bytes();\n        assert_eq!(metadata_key, output_key);\n        assert!(encoded_key.is_metadata_key());\n        assert!(is_metadata_key_slice(&metadata_key));\n    }\n\n    #[test]\n    fn test_possible_largest_key() {\n        Key::from_i128(0x7FFF_FFFF_FFFF_FFFF_FFFF_FFFF_FFFF_FFFF);\n        // TODO: put this key into the system and see if anything breaks.\n    }\n}\n"
  },
  {
    "path": "libs/pageserver_api/src/keyspace.rs",
    "content": "use std::ops::Range;\n\nuse itertools::Itertools;\n\nuse crate::key::Key;\nuse crate::shard::{ShardCount, ShardIdentity};\n\n///\n/// Represents a set of Keys, in a compact form.\n///\n#[derive(Clone, Debug, Default, PartialEq, Eq)]\npub struct KeySpace {\n    /// Contiguous ranges of keys that belong to the key space. In key order,\n    /// and with no overlap.\n    pub ranges: Vec<Range<Key>>,\n}\n\nimpl std::fmt::Display for KeySpace {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        write!(f, \"[\")?;\n        for range in &self.ranges {\n            write!(f, \"{}..{},\", range.start, range.end)?;\n        }\n        write!(f, \"]\")\n    }\n}\n\n/// A wrapper type for sparse keyspaces.\n#[derive(Clone, Debug, Default, PartialEq, Eq)]\npub struct SparseKeySpace(pub KeySpace);\n\n/// Represents a contiguous half-open range of the keyspace, masked according to a particular\n/// ShardNumber's stripes: within this range of keys, only some \"belong\" to the current\n/// shard.\n///\n/// When we iterate over keys within this object, we will skip any keys that don't belong\n/// to this shard.\n///\n/// The start + end keys may not belong to the shard: these specify where layer files should\n/// start  + end, but we will never actually read/write those keys.\n#[derive(Clone, Debug, PartialEq, Eq)]\npub struct ShardedRange<'a> {\n    pub shard_identity: &'a ShardIdentity,\n    pub range: Range<Key>,\n}\n\n// Calculate the size of a range within the blocks of the same relation, or spanning only the\n// top page in the previous relation's space.\npub fn contiguous_range_len(range: &Range<Key>) -> u32 {\n    debug_assert!(is_contiguous_range(range));\n    if range.start.field6 == 0xffffffff {\n        range.end.field6 + 1\n    } else {\n        range.end.field6 - range.start.field6\n    }\n}\n\n/// Return true if this key range includes only keys in the same relation's data blocks, or\n/// just spanning one relation and the logical size (0xffffffff) block of the relation before it.\n///\n/// Contiguous in this context means we know the keys are in use _somewhere_, but it might not\n/// be on our shard.  Later in ShardedRange we do the extra work to figure out how much\n/// of a given contiguous range is present on one shard.\n///\n/// This matters, because:\n/// - Within such ranges, keys are used contiguously.  Outside such ranges it is sparse.\n/// - Within such ranges, we may calculate distances using simple subtraction of field6.\npub fn is_contiguous_range(range: &Range<Key>) -> bool {\n    range.start.field1 == range.end.field1\n        && range.start.field2 == range.end.field2\n        && range.start.field3 == range.end.field3\n        && range.start.field4 == range.end.field4\n        && (range.start.field5 == range.end.field5\n            || (range.start.field6 == 0xffffffff && range.start.field5 + 1 == range.end.field5))\n}\n\nimpl<'a> ShardedRange<'a> {\n    pub fn new(range: Range<Key>, shard_identity: &'a ShardIdentity) -> Self {\n        Self {\n            shard_identity,\n            range,\n        }\n    }\n\n    /// Break up this range into chunks, each of which has at least one local key in it if the\n    /// total range has at least one local key.\n    pub fn fragment(self, target_nblocks: u32) -> Vec<(u32, Range<Key>)> {\n        // Optimization for single-key case (e.g. logical size keys)\n        if self.range.end == self.range.start.add(1) {\n            return vec![(\n                if self.shard_identity.is_key_disposable(&self.range.start) {\n                    0\n                } else {\n                    1\n                },\n                self.range,\n            )];\n        }\n\n        if !is_contiguous_range(&self.range) {\n            // Ranges that span relations are not fragmented.  We only get these ranges as a result\n            // of operations that act on existing layers, so we trust that the existing range is\n            // reasonably small.\n            return vec![(u32::MAX, self.range)];\n        }\n\n        let mut fragments: Vec<(u32, Range<Key>)> = Vec::new();\n\n        let mut cursor = self.range.start;\n        while cursor < self.range.end {\n            let advance_by = self.distance_to_next_boundary(cursor);\n            let is_fragment_disposable = self.shard_identity.is_key_disposable(&cursor);\n\n            // If the previous fragment is undersized, then we seek to consume enough\n            // blocks to complete it.\n            let (want_blocks, merge_last_fragment) = match fragments.last_mut() {\n                Some(frag) if frag.0 < target_nblocks => (target_nblocks - frag.0, Some(frag)),\n                Some(frag) => {\n                    // Prev block is complete, want the full number.\n                    (\n                        target_nblocks,\n                        if is_fragment_disposable {\n                            // If this current range will be empty (not shard-local data), we will merge into previous\n                            Some(frag)\n                        } else {\n                            None\n                        },\n                    )\n                }\n                None => {\n                    // First iteration, want the full number\n                    (target_nblocks, None)\n                }\n            };\n\n            let advance_by = if is_fragment_disposable {\n                advance_by\n            } else {\n                std::cmp::min(advance_by, want_blocks)\n            };\n\n            let next_cursor = cursor.add(advance_by);\n\n            let this_frag = (\n                if is_fragment_disposable {\n                    0\n                } else {\n                    advance_by\n                },\n                cursor..next_cursor,\n            );\n            cursor = next_cursor;\n\n            if let Some(last_fragment) = merge_last_fragment {\n                // Previous fragment was short or this one is empty, merge into it\n                last_fragment.0 += this_frag.0;\n                last_fragment.1.end = this_frag.1.end;\n            } else {\n                fragments.push(this_frag);\n            }\n        }\n\n        fragments\n    }\n\n    /// Estimate the physical pages that are within this range, on this shard.  This returns\n    /// u32::MAX if the range spans relations: this return value should be interpreted as \"large\".\n    pub fn page_count(&self) -> u32 {\n        // Special cases for single keys like logical sizes\n        if self.range.end == self.range.start.add(1) {\n            return if self.shard_identity.is_key_disposable(&self.range.start) {\n                0\n            } else {\n                1\n            };\n        }\n\n        // We can only do an authentic calculation of contiguous key ranges\n        if !is_contiguous_range(&self.range) {\n            return u32::MAX;\n        }\n\n        // Special case for single sharded tenants: our logical and physical sizes are the same\n        if self.shard_identity.count < ShardCount::new(2) {\n            return contiguous_range_len(&self.range);\n        }\n\n        // Normal path: step through stripes and part-stripes in the range, evaluate whether each one belongs\n        // to Self, and add the stripe's block count to our total if so.\n        let mut result: u64 = 0;\n        let mut cursor = self.range.start;\n        while cursor < self.range.end {\n            // Count up to the next stripe_size boundary or end of range\n            let advance_by = self.distance_to_next_boundary(cursor);\n\n            // If this blocks in this stripe belong to us, add them to our count\n            if !self.shard_identity.is_key_disposable(&cursor) {\n                result += advance_by as u64;\n            }\n\n            cursor = cursor.add(advance_by);\n        }\n\n        if result > u32::MAX as u64 {\n            u32::MAX\n        } else {\n            result as u32\n        }\n    }\n\n    /// Advance the cursor to the next potential fragment boundary: this is either\n    /// a stripe boundary, or the end of the range.\n    fn distance_to_next_boundary(&self, cursor: Key) -> u32 {\n        let distance_to_range_end = contiguous_range_len(&(cursor..self.range.end));\n\n        if self.shard_identity.count < ShardCount::new(2) {\n            // Optimization: don't bother stepping through stripes if the tenant isn't sharded.\n            return distance_to_range_end;\n        }\n\n        if cursor.field6 == 0xffffffff {\n            // We are wrapping from one relation's logical size to the next relation's first data block\n            return 1;\n        }\n\n        let stripe_index = cursor.field6 / self.shard_identity.stripe_size.0;\n        let stripe_remainder = self.shard_identity.stripe_size.0\n            - (cursor.field6 - stripe_index * self.shard_identity.stripe_size.0);\n\n        if cfg!(debug_assertions) {\n            // We should never overflow field5 and field6 -- our callers check this earlier\n            // and would have returned their u32::MAX cases if the input range violated this.\n            let next_cursor = cursor.add(stripe_remainder);\n            debug_assert!(\n                next_cursor.field1 == cursor.field1\n                    && next_cursor.field2 == cursor.field2\n                    && next_cursor.field3 == cursor.field3\n                    && next_cursor.field4 == cursor.field4\n                    && next_cursor.field5 == cursor.field5\n            )\n        }\n\n        std::cmp::min(stripe_remainder, distance_to_range_end)\n    }\n\n    /// Whereas `page_count` estimates the number of pages physically in this range on this shard,\n    /// this function simply calculates the number of pages in the space, without accounting for those\n    /// pages that would not actually be stored on this node.\n    ///\n    /// Don't use this function in code that works with physical entities like layer files.\n    pub fn raw_size(range: &Range<Key>) -> u32 {\n        if is_contiguous_range(range) {\n            contiguous_range_len(range)\n        } else {\n            u32::MAX\n        }\n    }\n}\n\nimpl KeySpace {\n    /// Create a key space with a single range.\n    pub fn single(key_range: Range<Key>) -> Self {\n        Self {\n            ranges: vec![key_range],\n        }\n    }\n\n    /// Partition a key space into roughly chunks of roughly 'target_size' bytes\n    /// in each partition.\n    ///\n    pub fn partition(\n        &self,\n        shard_identity: &ShardIdentity,\n        target_size: u64,\n        block_size: u64,\n    ) -> KeyPartitioning {\n        let target_nblocks = (target_size / block_size) as u32;\n\n        let mut parts = Vec::new();\n        let mut current_part = Vec::new();\n        let mut current_part_size: usize = 0;\n        for range in &self.ranges {\n            // While doing partitioning, wrap the range in ShardedRange so that our size calculations\n            // will respect shard striping rather than assuming all keys within a range are present.\n            let range = ShardedRange::new(range.clone(), shard_identity);\n\n            // Chunk up the range into parts that each contain up to target_size local blocks\n            for (frag_on_shard_size, frag_range) in range.fragment(target_nblocks) {\n                // If appending the next contiguous range in the keyspace to the current\n                // partition would cause it to be too large, and our current partition\n                // covers at least one block that is physically present in this shard,\n                // then start a new partition\n                if current_part_size + frag_on_shard_size as usize > target_nblocks as usize\n                    && current_part_size > 0\n                {\n                    parts.push(KeySpace {\n                        ranges: current_part,\n                    });\n                    current_part = Vec::new();\n                    current_part_size = 0;\n                }\n                current_part.push(frag_range.start..frag_range.end);\n                current_part_size += frag_on_shard_size as usize;\n            }\n        }\n\n        // add last partition that wasn't full yet.\n        if !current_part.is_empty() {\n            parts.push(KeySpace {\n                ranges: current_part,\n            });\n        }\n\n        KeyPartitioning { parts }\n    }\n\n    pub fn is_empty(&self) -> bool {\n        self.total_raw_size() == 0\n    }\n\n    /// Merge another keyspace into the current one.\n    /// Note: the keyspaces must not overlap (enforced via assertions). To merge overlapping key ranges, use `KeySpaceRandomAccum`.\n    pub fn merge(&mut self, other: &KeySpace) {\n        let all_ranges = self\n            .ranges\n            .iter()\n            .merge_by(other.ranges.iter(), |lhs, rhs| lhs.start < rhs.start);\n\n        let mut accum = KeySpaceAccum::new();\n        let mut prev: Option<&Range<Key>> = None;\n        for range in all_ranges {\n            if let Some(prev) = prev {\n                let overlap =\n                    std::cmp::max(range.start, prev.start) < std::cmp::min(range.end, prev.end);\n                assert!(\n                    !overlap,\n                    \"Attempt to merge ovelapping keyspaces: {prev:?} overlaps {range:?}\"\n                );\n            }\n\n            accum.add_range(range.clone());\n            prev = Some(range);\n        }\n\n        self.ranges = accum.to_keyspace().ranges;\n    }\n\n    /// Remove all keys in `other` from `self`.\n    /// This can involve splitting or removing of existing ranges.\n    /// Returns the removed keyspace\n    pub fn remove_overlapping_with(&mut self, other: &KeySpace) -> KeySpace {\n        let (self_start, self_end) = match (self.start(), self.end()) {\n            (Some(start), Some(end)) => (start, end),\n            _ => {\n                // self is empty\n                return KeySpace::default();\n            }\n        };\n\n        // Key spaces are sorted by definition, so skip ahead to the first\n        // potentially intersecting range. Similarly, ignore ranges that start\n        // after the current keyspace ends.\n        let other_ranges = other\n            .ranges\n            .iter()\n            .skip_while(|range| self_start >= range.end)\n            .take_while(|range| self_end > range.start);\n\n        let mut removed_accum = KeySpaceRandomAccum::new();\n        for range in other_ranges {\n            while let Some(overlap_at) = self.overlaps_at(range) {\n                let overlapped = self.ranges[overlap_at].clone();\n\n                if overlapped.start < range.start && overlapped.end <= range.end {\n                    // Higher part of the range is completely overlapped.\n                    removed_accum.add_range(range.start..self.ranges[overlap_at].end);\n                    self.ranges[overlap_at].end = range.start;\n                }\n                if overlapped.start >= range.start && overlapped.end > range.end {\n                    // Lower part of the range is completely overlapped.\n                    removed_accum.add_range(self.ranges[overlap_at].start..range.end);\n                    self.ranges[overlap_at].start = range.end;\n                }\n                if overlapped.start < range.start && overlapped.end > range.end {\n                    // Middle part of the range is overlapped.\n                    removed_accum.add_range(range.clone());\n                    self.ranges[overlap_at].end = range.start;\n                    self.ranges\n                        .insert(overlap_at + 1, range.end..overlapped.end);\n                }\n                if overlapped.start >= range.start && overlapped.end <= range.end {\n                    // Whole range is overlapped\n                    removed_accum.add_range(self.ranges[overlap_at].clone());\n                    self.ranges.remove(overlap_at);\n                }\n            }\n        }\n\n        removed_accum.to_keyspace()\n    }\n\n    pub fn start(&self) -> Option<Key> {\n        self.ranges.first().map(|range| range.start)\n    }\n\n    pub fn end(&self) -> Option<Key> {\n        self.ranges.last().map(|range| range.end)\n    }\n\n    /// The size of the keyspace in pages, before accounting for sharding\n    pub fn total_raw_size(&self) -> usize {\n        self.ranges\n            .iter()\n            .map(|range| ShardedRange::raw_size(range) as usize)\n            .sum()\n    }\n\n    fn overlaps_at(&self, range: &Range<Key>) -> Option<usize> {\n        match self.ranges.binary_search_by_key(&range.end, |r| r.start) {\n            Ok(0) => None,\n            Err(0) => None,\n            Ok(index) if self.ranges[index - 1].end > range.start => Some(index - 1),\n            Err(index) if self.ranges[index - 1].end > range.start => Some(index - 1),\n            _ => None,\n        }\n    }\n\n    ///\n    /// Check if key space contains overlapping range\n    ///\n    pub fn overlaps(&self, range: &Range<Key>) -> bool {\n        self.overlaps_at(range).is_some()\n    }\n\n    /// Check if the keyspace contains a key\n    pub fn contains(&self, key: &Key) -> bool {\n        self.overlaps(&(*key..key.next()))\n    }\n}\n\n///\n/// Represents a partitioning of the key space.\n///\n/// The only kind of partitioning we do is to partition the key space into\n/// partitions that are roughly equal in physical size (see KeySpace::partition).\n/// But this data structure could represent any partitioning.\n///\n#[derive(Clone, Debug, Default)]\npub struct KeyPartitioning {\n    pub parts: Vec<KeySpace>,\n}\n\n/// Represents a partitioning of the sparse key space.\n#[derive(Clone, Debug, Default)]\npub struct SparseKeyPartitioning {\n    pub parts: Vec<SparseKeySpace>,\n}\n\nimpl KeyPartitioning {\n    pub fn new() -> Self {\n        KeyPartitioning { parts: Vec::new() }\n    }\n\n    /// Convert a key partitioning to a sparse partition.\n    pub fn into_sparse(self) -> SparseKeyPartitioning {\n        SparseKeyPartitioning {\n            parts: self.parts.into_iter().map(SparseKeySpace).collect(),\n        }\n    }\n}\n\nimpl SparseKeyPartitioning {\n    /// Note: use this function with caution. Attempt to handle a sparse keyspace in the same way as a dense keyspace will\n    /// cause long/dead loops.\n    pub fn into_dense(self) -> KeyPartitioning {\n        KeyPartitioning {\n            parts: self.parts.into_iter().map(|x| x.0).collect(),\n        }\n    }\n}\n\n///\n/// A helper object, to collect a set of keys and key ranges into a KeySpace\n/// object. This takes care of merging adjacent keys and key ranges into\n/// contiguous ranges.\n///\n#[derive(Clone, Debug, Default)]\npub struct KeySpaceAccum {\n    accum: Option<Range<Key>>,\n\n    ranges: Vec<Range<Key>>,\n    size: u64,\n}\n\nimpl KeySpaceAccum {\n    pub fn new() -> Self {\n        Self {\n            accum: None,\n            ranges: Vec::new(),\n            size: 0,\n        }\n    }\n\n    #[inline(always)]\n    pub fn add_key(&mut self, key: Key) {\n        self.add_range(singleton_range(key))\n    }\n\n    #[inline(always)]\n    pub fn add_range(&mut self, range: Range<Key>) {\n        self.size += ShardedRange::raw_size(&range) as u64;\n\n        match self.accum.as_mut() {\n            Some(accum) => {\n                if range.start == accum.end {\n                    accum.end = range.end;\n                } else {\n                    // TODO: to efficiently support small sharding stripe sizes, we should avoid starting\n                    // a new range here if the skipped region was all keys that don't belong on this shard.\n                    // (https://github.com/neondatabase/neon/issues/6247)\n                    assert!(range.start > accum.end);\n                    self.ranges.push(accum.clone());\n                    *accum = range;\n                }\n            }\n            None => self.accum = Some(range),\n        }\n    }\n\n    pub fn to_keyspace(mut self) -> KeySpace {\n        if let Some(accum) = self.accum.take() {\n            self.ranges.push(accum);\n        }\n        KeySpace {\n            ranges: self.ranges,\n        }\n    }\n\n    pub fn consume_keyspace(&mut self) -> KeySpace {\n        std::mem::take(self).to_keyspace()\n    }\n\n    // The total number of keys in this object, ignoring any sharding effects that might cause some of\n    // the keys to be omitted in storage on this shard.\n    pub fn raw_size(&self) -> u64 {\n        self.size\n    }\n}\n\n///\n/// A helper object, to collect a set of keys and key ranges into a KeySpace\n/// object. Key ranges may be inserted in any order and can overlap.\n///\n#[derive(Clone, Debug, Default)]\npub struct KeySpaceRandomAccum {\n    ranges: Vec<Range<Key>>,\n}\n\nimpl KeySpaceRandomAccum {\n    pub fn new() -> Self {\n        Self { ranges: Vec::new() }\n    }\n\n    pub fn add_key(&mut self, key: Key) {\n        self.add_range(singleton_range(key))\n    }\n\n    pub fn add_range(&mut self, range: Range<Key>) {\n        self.ranges.push(range);\n    }\n\n    pub fn add_keyspace(&mut self, keyspace: KeySpace) {\n        for range in keyspace.ranges {\n            self.add_range(range);\n        }\n    }\n\n    pub fn to_keyspace(mut self) -> KeySpace {\n        let mut ranges = Vec::new();\n        if !self.ranges.is_empty() {\n            self.ranges.sort_by_key(|r| r.start);\n            let mut start = self.ranges.first().unwrap().start;\n            let mut end = self.ranges.first().unwrap().end;\n            for r in self.ranges {\n                assert!(r.start >= start);\n                if r.start > end {\n                    ranges.push(start..end);\n                    start = r.start;\n                    end = r.end;\n                } else if r.end > end {\n                    end = r.end;\n                }\n            }\n            ranges.push(start..end);\n        }\n        KeySpace { ranges }\n    }\n\n    pub fn consume_keyspace(&mut self) -> KeySpace {\n        let mut prev_accum = KeySpaceRandomAccum::new();\n        std::mem::swap(self, &mut prev_accum);\n\n        prev_accum.to_keyspace()\n    }\n}\n\npub fn singleton_range(key: Key) -> Range<Key> {\n    key..key.next()\n}\n\n#[cfg(test)]\nmod tests {\n    use std::fmt::Write;\n\n    use rand::{RngCore, SeedableRng};\n\n    use super::*;\n    use crate::shard::{DEFAULT_STRIPE_SIZE, ShardCount, ShardNumber, ShardStripeSize};\n\n    // Helper function to create a key range.\n    //\n    // Make the tests below less verbose.\n    fn kr(irange: Range<i128>) -> Range<Key> {\n        Key::from_i128(irange.start)..Key::from_i128(irange.end)\n    }\n\n    #[allow(dead_code)]\n    fn dump_keyspace(ks: &KeySpace) {\n        for r in ks.ranges.iter() {\n            println!(\"  {}..{}\", r.start.to_i128(), r.end.to_i128());\n        }\n    }\n\n    fn assert_ks_eq(actual: &KeySpace, expected: Vec<Range<Key>>) {\n        if actual.ranges != expected {\n            let mut msg = String::new();\n\n            writeln!(msg, \"expected:\").unwrap();\n            for r in &expected {\n                writeln!(msg, \"  {}..{}\", r.start.to_i128(), r.end.to_i128()).unwrap();\n            }\n            writeln!(msg, \"got:\").unwrap();\n            for r in &actual.ranges {\n                writeln!(msg, \"  {}..{}\", r.start.to_i128(), r.end.to_i128()).unwrap();\n            }\n            panic!(\"{}\", msg);\n        }\n    }\n\n    #[test]\n    fn keyspace_consume() {\n        let ranges = vec![kr(0..10), kr(20..35), kr(40..45)];\n\n        let mut accum = KeySpaceAccum::new();\n        for range in &ranges {\n            accum.add_range(range.clone());\n        }\n\n        let expected_size: u64 = ranges\n            .iter()\n            .map(|r| ShardedRange::raw_size(r) as u64)\n            .sum();\n        assert_eq!(accum.raw_size(), expected_size);\n\n        assert_ks_eq(&accum.consume_keyspace(), ranges.clone());\n        assert_eq!(accum.raw_size(), 0);\n\n        assert_ks_eq(&accum.consume_keyspace(), vec![]);\n        assert_eq!(accum.raw_size(), 0);\n\n        for range in &ranges {\n            accum.add_range(range.clone());\n        }\n        assert_ks_eq(&accum.to_keyspace(), ranges);\n    }\n\n    #[test]\n    fn keyspace_add_range() {\n        // two separate ranges\n        //\n        // #####\n        //         #####\n        let mut ks = KeySpaceRandomAccum::default();\n        ks.add_range(kr(0..10));\n        ks.add_range(kr(20..30));\n        assert_ks_eq(&ks.to_keyspace(), vec![kr(0..10), kr(20..30)]);\n\n        // two separate ranges, added in reverse order\n        //\n        //         #####\n        // #####\n        let mut ks = KeySpaceRandomAccum::default();\n        ks.add_range(kr(20..30));\n        ks.add_range(kr(0..10));\n\n        // add range that is adjacent to the end of an existing range\n        //\n        // #####\n        //      #####\n        ks.add_range(kr(0..10));\n        ks.add_range(kr(10..30));\n        assert_ks_eq(&ks.to_keyspace(), vec![kr(0..30)]);\n\n        // add range that is adjacent to the start of an existing range\n        //\n        //      #####\n        // #####\n        let mut ks = KeySpaceRandomAccum::default();\n        ks.add_range(kr(10..30));\n        ks.add_range(kr(0..10));\n        assert_ks_eq(&ks.to_keyspace(), vec![kr(0..30)]);\n\n        // add range that overlaps with the end of an existing range\n        //\n        // #####\n        //    #####\n        let mut ks = KeySpaceRandomAccum::default();\n        ks.add_range(kr(0..10));\n        ks.add_range(kr(5..30));\n        assert_ks_eq(&ks.to_keyspace(), vec![kr(0..30)]);\n\n        // add range that overlaps with the start of an existing range\n        //\n        //    #####\n        // #####\n        let mut ks = KeySpaceRandomAccum::default();\n        ks.add_range(kr(5..30));\n        ks.add_range(kr(0..10));\n        assert_ks_eq(&ks.to_keyspace(), vec![kr(0..30)]);\n\n        // add range that is fully covered by an existing range\n        //\n        // #########\n        //   #####\n        let mut ks = KeySpaceRandomAccum::default();\n        ks.add_range(kr(0..30));\n        ks.add_range(kr(10..20));\n        assert_ks_eq(&ks.to_keyspace(), vec![kr(0..30)]);\n\n        // add range that extends an existing range from both ends\n        //\n        //   #####\n        // #########\n        let mut ks = KeySpaceRandomAccum::default();\n        ks.add_range(kr(10..20));\n        ks.add_range(kr(0..30));\n        assert_ks_eq(&ks.to_keyspace(), vec![kr(0..30)]);\n\n        // add a range that overlaps with two existing ranges, joining them\n        //\n        // #####   #####\n        //    #######\n        let mut ks = KeySpaceRandomAccum::default();\n        ks.add_range(kr(0..10));\n        ks.add_range(kr(20..30));\n        ks.add_range(kr(5..25));\n        assert_ks_eq(&ks.to_keyspace(), vec![kr(0..30)]);\n    }\n\n    #[test]\n    fn keyspace_overlaps() {\n        let mut ks = KeySpaceRandomAccum::default();\n        ks.add_range(kr(10..20));\n        ks.add_range(kr(30..40));\n        let ks = ks.to_keyspace();\n\n        //        #####      #####\n        // xxxx\n        assert!(!ks.overlaps(&kr(0..5)));\n\n        //        #####      #####\n        //   xxxx\n        assert!(!ks.overlaps(&kr(5..9)));\n\n        //        #####      #####\n        //    xxxx\n        assert!(!ks.overlaps(&kr(5..10)));\n\n        //        #####      #####\n        //     xxxx\n        assert!(ks.overlaps(&kr(5..11)));\n\n        //        #####      #####\n        //        xxxx\n        assert!(ks.overlaps(&kr(10..15)));\n\n        //        #####      #####\n        //         xxxx\n        assert!(ks.overlaps(&kr(15..20)));\n\n        //        #####      #####\n        //           xxxx\n        assert!(ks.overlaps(&kr(15..25)));\n\n        //        #####      #####\n        //              xxxx\n        assert!(!ks.overlaps(&kr(22..28)));\n\n        //        #####      #####\n        //               xxxx\n        assert!(!ks.overlaps(&kr(25..30)));\n\n        //        #####      #####\n        //                      xxxx\n        assert!(ks.overlaps(&kr(35..35)));\n\n        //        #####      #####\n        //                        xxxx\n        assert!(!ks.overlaps(&kr(40..45)));\n\n        //        #####      #####\n        //                        xxxx\n        assert!(!ks.overlaps(&kr(45..50)));\n\n        //        #####      #####\n        //        xxxxxxxxxxx\n        assert!(ks.overlaps(&kr(0..30))); // XXXXX This fails currently!\n    }\n\n    #[test]\n    fn test_remove_full_overlapps() {\n        let mut key_space1 = KeySpace {\n            ranges: vec![\n                Key::from_i128(1)..Key::from_i128(4),\n                Key::from_i128(5)..Key::from_i128(8),\n                Key::from_i128(10)..Key::from_i128(12),\n            ],\n        };\n        let key_space2 = KeySpace {\n            ranges: vec![\n                Key::from_i128(2)..Key::from_i128(3),\n                Key::from_i128(6)..Key::from_i128(7),\n                Key::from_i128(11)..Key::from_i128(13),\n            ],\n        };\n        let removed = key_space1.remove_overlapping_with(&key_space2);\n        let removed_expected = KeySpace {\n            ranges: vec![\n                Key::from_i128(2)..Key::from_i128(3),\n                Key::from_i128(6)..Key::from_i128(7),\n                Key::from_i128(11)..Key::from_i128(12),\n            ],\n        };\n        assert_eq!(removed, removed_expected);\n\n        assert_eq!(\n            key_space1.ranges,\n            vec![\n                Key::from_i128(1)..Key::from_i128(2),\n                Key::from_i128(3)..Key::from_i128(4),\n                Key::from_i128(5)..Key::from_i128(6),\n                Key::from_i128(7)..Key::from_i128(8),\n                Key::from_i128(10)..Key::from_i128(11)\n            ]\n        );\n    }\n\n    #[test]\n    fn test_remove_partial_overlaps() {\n        // Test partial ovelaps\n        let mut key_space1 = KeySpace {\n            ranges: vec![\n                Key::from_i128(1)..Key::from_i128(5),\n                Key::from_i128(7)..Key::from_i128(10),\n                Key::from_i128(12)..Key::from_i128(15),\n            ],\n        };\n        let key_space2 = KeySpace {\n            ranges: vec![\n                Key::from_i128(3)..Key::from_i128(6),\n                Key::from_i128(8)..Key::from_i128(11),\n                Key::from_i128(14)..Key::from_i128(17),\n            ],\n        };\n\n        let removed = key_space1.remove_overlapping_with(&key_space2);\n        let removed_expected = KeySpace {\n            ranges: vec![\n                Key::from_i128(3)..Key::from_i128(5),\n                Key::from_i128(8)..Key::from_i128(10),\n                Key::from_i128(14)..Key::from_i128(15),\n            ],\n        };\n        assert_eq!(removed, removed_expected);\n\n        assert_eq!(\n            key_space1.ranges,\n            vec![\n                Key::from_i128(1)..Key::from_i128(3),\n                Key::from_i128(7)..Key::from_i128(8),\n                Key::from_i128(12)..Key::from_i128(14),\n            ]\n        );\n    }\n\n    #[test]\n    fn test_remove_no_overlaps() {\n        let mut key_space1 = KeySpace {\n            ranges: vec![\n                Key::from_i128(1)..Key::from_i128(5),\n                Key::from_i128(7)..Key::from_i128(10),\n                Key::from_i128(12)..Key::from_i128(15),\n            ],\n        };\n        let key_space2 = KeySpace {\n            ranges: vec![\n                Key::from_i128(6)..Key::from_i128(7),\n                Key::from_i128(11)..Key::from_i128(12),\n                Key::from_i128(15)..Key::from_i128(17),\n            ],\n        };\n\n        let removed = key_space1.remove_overlapping_with(&key_space2);\n        let removed_expected = KeySpace::default();\n        assert_eq!(removed, removed_expected);\n\n        assert_eq!(\n            key_space1.ranges,\n            vec![\n                Key::from_i128(1)..Key::from_i128(5),\n                Key::from_i128(7)..Key::from_i128(10),\n                Key::from_i128(12)..Key::from_i128(15),\n            ]\n        );\n    }\n\n    #[test]\n    fn test_remove_one_range_overlaps_multiple() {\n        let mut key_space1 = KeySpace {\n            ranges: vec![\n                Key::from_i128(1)..Key::from_i128(3),\n                Key::from_i128(3)..Key::from_i128(6),\n                Key::from_i128(6)..Key::from_i128(10),\n                Key::from_i128(12)..Key::from_i128(15),\n                Key::from_i128(17)..Key::from_i128(20),\n                Key::from_i128(20)..Key::from_i128(30),\n                Key::from_i128(30)..Key::from_i128(40),\n            ],\n        };\n        let key_space2 = KeySpace {\n            ranges: vec![Key::from_i128(9)..Key::from_i128(19)],\n        };\n\n        let removed = key_space1.remove_overlapping_with(&key_space2);\n        let removed_expected = KeySpace {\n            ranges: vec![\n                Key::from_i128(9)..Key::from_i128(10),\n                Key::from_i128(12)..Key::from_i128(15),\n                Key::from_i128(17)..Key::from_i128(19),\n            ],\n        };\n        assert_eq!(removed, removed_expected);\n\n        assert_eq!(\n            key_space1.ranges,\n            vec![\n                Key::from_i128(1)..Key::from_i128(3),\n                Key::from_i128(3)..Key::from_i128(6),\n                Key::from_i128(6)..Key::from_i128(9),\n                Key::from_i128(19)..Key::from_i128(20),\n                Key::from_i128(20)..Key::from_i128(30),\n                Key::from_i128(30)..Key::from_i128(40),\n            ]\n        );\n    }\n    #[test]\n    fn sharded_range_relation_gap() {\n        let shard_identity =\n            ShardIdentity::new(ShardNumber(0), ShardCount::new(4), DEFAULT_STRIPE_SIZE).unwrap();\n\n        let range = ShardedRange::new(\n            Range {\n                start: Key::from_hex(\"000000067F00000005000040100300000000\").unwrap(),\n                end: Key::from_hex(\"000000067F00000005000040130000004000\").unwrap(),\n            },\n            &shard_identity,\n        );\n\n        // Key range spans relations, expect MAX\n        assert_eq!(range.page_count(), u32::MAX);\n    }\n\n    #[test]\n    fn shard_identity_keyspaces_single_key() {\n        let shard_identity =\n            ShardIdentity::new(ShardNumber(1), ShardCount::new(4), DEFAULT_STRIPE_SIZE).unwrap();\n\n        let range = ShardedRange::new(\n            Range {\n                start: Key::from_hex(\"000000067f000000010000007000ffffffff\").unwrap(),\n                end: Key::from_hex(\"000000067f00000001000000700100000000\").unwrap(),\n            },\n            &shard_identity,\n        );\n        // Single-key range on logical size key\n        assert_eq!(range.page_count(), 1);\n    }\n\n    /// Test the helper that we use to identify ranges which go outside the data blocks of a single relation\n    #[test]\n    fn contiguous_range_check() {\n        assert!(!is_contiguous_range(\n            &(Key::from_hex(\"000000067f00000001000004df00fffffffe\").unwrap()\n                ..Key::from_hex(\"000000067f00000001000004df0100000003\").unwrap())\n        ),);\n\n        // The ranges goes all the way up to the 0xffffffff, including it: this is\n        // not considered a rel block range because 0xffffffff stores logical sizes,\n        // not blocks.\n        assert!(!is_contiguous_range(\n            &(Key::from_hex(\"000000067f00000001000004df00fffffffe\").unwrap()\n                ..Key::from_hex(\"000000067f00000001000004df0100000000\").unwrap())\n        ),);\n\n        // Keys within the normal data region of a relation\n        assert!(is_contiguous_range(\n            &(Key::from_hex(\"000000067f00000001000004df0000000000\").unwrap()\n                ..Key::from_hex(\"000000067f00000001000004df0000000080\").unwrap())\n        ),);\n\n        // The logical size key of one forkno, then some blocks in the next\n        assert!(is_contiguous_range(\n            &(Key::from_hex(\"000000067f00000001000004df00ffffffff\").unwrap()\n                ..Key::from_hex(\"000000067f00000001000004df0100000080\").unwrap())\n        ),);\n    }\n\n    #[test]\n    fn shard_identity_keyspaces_forkno_gap() {\n        let shard_identity =\n            ShardIdentity::new(ShardNumber(1), ShardCount::new(4), DEFAULT_STRIPE_SIZE).unwrap();\n\n        let range = ShardedRange::new(\n            Range {\n                start: Key::from_hex(\"000000067f00000001000004df00fffffffe\").unwrap(),\n                end: Key::from_hex(\"000000067f00000001000004df0100000003\").unwrap(),\n            },\n            &shard_identity,\n        );\n\n        // Range spanning the end of one forkno and the start of the next: we do not attempt to\n        // calculate a valid size, because we have no way to know if they keys between start\n        // and end are actually in use.\n        assert_eq!(range.page_count(), u32::MAX);\n    }\n\n    #[test]\n    fn shard_identity_keyspaces_one_relation() {\n        for shard_number in 0..4 {\n            let shard_identity = ShardIdentity::new(\n                ShardNumber(shard_number),\n                ShardCount::new(4),\n                DEFAULT_STRIPE_SIZE,\n            )\n            .unwrap();\n\n            let range = ShardedRange::new(\n                Range {\n                    start: Key::from_hex(\"000000067f00000001000000ae0000000000\").unwrap(),\n                    end: Key::from_hex(\"000000067f00000001000000ae0000000001\").unwrap(),\n                },\n                &shard_identity,\n            );\n\n            // Very simple case: range covering block zero of one relation, where that block maps to shard zero\n            if shard_number == 0 {\n                assert_eq!(range.page_count(), 1);\n            } else {\n                // Other shards should perceive the range's size as zero\n                assert_eq!(range.page_count(), 0);\n            }\n        }\n    }\n\n    /// Test helper: construct a ShardedRange and call fragment() on it, returning\n    /// the total page count in the range and the fragments.\n    fn do_fragment(\n        range_start: Key,\n        range_end: Key,\n        shard_identity: &ShardIdentity,\n        target_nblocks: u32,\n    ) -> (u32, Vec<(u32, Range<Key>)>) {\n        let range = ShardedRange::new(\n            Range {\n                start: range_start,\n                end: range_end,\n            },\n            shard_identity,\n        );\n\n        let page_count = range.page_count();\n        let fragments = range.fragment(target_nblocks);\n\n        // Invariant: we always get at least one fragment\n        assert!(!fragments.is_empty());\n\n        // Invariant: the first/last fragment start/end should equal the input start/end\n        assert_eq!(fragments.first().unwrap().1.start, range_start);\n        assert_eq!(fragments.last().unwrap().1.end, range_end);\n\n        if page_count > 0 {\n            // Invariant: every fragment must contain at least one shard-local page, if the\n            // total range contains at least one shard-local page\n            let all_nonzero = fragments.iter().all(|f| f.0 > 0);\n            if !all_nonzero {\n                eprintln!(\"Found a zero-length fragment: {fragments:?}\");\n            }\n            assert!(all_nonzero);\n        } else {\n            // A range with no shard-local pages should always be returned as a single fragment\n            assert_eq!(fragments, vec![(0, range_start..range_end)]);\n        }\n\n        // Invariant: fragments must be ordered and non-overlapping\n        let mut last: Option<Range<Key>> = None;\n        for frag in &fragments {\n            if let Some(last) = last {\n                assert!(frag.1.start >= last.end);\n                assert!(frag.1.start > last.start);\n            }\n            last = Some(frag.1.clone())\n        }\n\n        // Invariant: fragments respect target_nblocks\n        for frag in &fragments {\n            assert!(frag.0 == u32::MAX || frag.0 <= target_nblocks);\n        }\n\n        (page_count, fragments)\n    }\n\n    /// Really simple tests for fragment(), on a range that just contains a single stripe\n    /// for a single tenant.\n    #[test]\n    fn sharded_range_fragment_simple() {\n        const SHARD_COUNT: u8 = 4;\n        const STRIPE_SIZE: u32 = DEFAULT_STRIPE_SIZE.0;\n\n        let shard_identity = ShardIdentity::new(\n            ShardNumber(0),\n            ShardCount::new(SHARD_COUNT),\n            ShardStripeSize(STRIPE_SIZE),\n        )\n        .unwrap();\n\n        // A range which we happen to know covers exactly one stripe which belongs to this shard\n        let input_start = Key::from_hex(\"000000067f00000001000000ae0000000000\").unwrap();\n        let mut input_end = input_start;\n        input_end.field6 += STRIPE_SIZE; // field6 is block number\n\n        // Ask for stripe_size blocks, we get the whole stripe\n        assert_eq!(\n            do_fragment(input_start, input_end, &shard_identity, STRIPE_SIZE),\n            (STRIPE_SIZE, vec![(STRIPE_SIZE, input_start..input_end)])\n        );\n\n        // Ask for more, we still get the whole stripe\n        assert_eq!(\n            do_fragment(input_start, input_end, &shard_identity, 10 * STRIPE_SIZE),\n            (STRIPE_SIZE, vec![(STRIPE_SIZE, input_start..input_end)])\n        );\n\n        // Ask for target_nblocks of half the stripe size, we get two halves\n        assert_eq!(\n            do_fragment(input_start, input_end, &shard_identity, STRIPE_SIZE / 2),\n            (\n                STRIPE_SIZE,\n                vec![\n                    (\n                        STRIPE_SIZE / 2,\n                        input_start..input_start.add(STRIPE_SIZE / 2)\n                    ),\n                    (STRIPE_SIZE / 2, input_start.add(STRIPE_SIZE / 2)..input_end)\n                ]\n            )\n        );\n    }\n\n    #[test]\n    fn sharded_range_fragment_multi_stripe() {\n        const SHARD_COUNT: u8 = 4;\n        const STRIPE_SIZE: u32 = DEFAULT_STRIPE_SIZE.0;\n        const RANGE_SIZE: u32 = SHARD_COUNT as u32 * STRIPE_SIZE;\n\n        let shard_identity = ShardIdentity::new(\n            ShardNumber(0),\n            ShardCount::new(SHARD_COUNT),\n            ShardStripeSize(STRIPE_SIZE),\n        )\n        .unwrap();\n\n        // A range which covers multiple stripes, exactly one of which belongs to the current shard.\n        let input_start = Key::from_hex(\"000000067f00000001000000ae0000000000\").unwrap();\n        let mut input_end = input_start;\n        input_end.field6 += RANGE_SIZE; // field6 is block number\n\n        // Ask for all the blocks, get a fragment that covers the whole range but reports\n        // its size to be just the blocks belonging to our shard.\n        assert_eq!(\n            do_fragment(input_start, input_end, &shard_identity, RANGE_SIZE),\n            (STRIPE_SIZE, vec![(STRIPE_SIZE, input_start..input_end)])\n        );\n\n        // Ask for a sub-stripe quantity that results in 3 fragments.\n        let limit = STRIPE_SIZE / 3 + 1;\n        assert_eq!(\n            do_fragment(input_start, input_end, &shard_identity, limit),\n            (\n                STRIPE_SIZE,\n                vec![\n                    (limit, input_start..input_start.add(limit)),\n                    (limit, input_start.add(limit)..input_start.add(2 * limit)),\n                    (\n                        STRIPE_SIZE - 2 * limit,\n                        input_start.add(2 * limit)..input_end\n                    ),\n                ]\n            )\n        );\n\n        // Try on a range that starts slightly after our owned stripe\n        assert_eq!(\n            do_fragment(input_start.add(1), input_end, &shard_identity, RANGE_SIZE),\n            (\n                STRIPE_SIZE - 1,\n                vec![(STRIPE_SIZE - 1, input_start.add(1)..input_end)]\n            )\n        );\n    }\n\n    /// Test our calculations work correctly when we start a range from the logical size key of\n    /// a previous relation.\n    #[test]\n    fn sharded_range_fragment_starting_from_logical_size() {\n        const SHARD_COUNT: u8 = 4;\n        const STRIPE_SIZE: u32 = DEFAULT_STRIPE_SIZE.0;\n        const RANGE_SIZE: u32 = SHARD_COUNT as u32 * STRIPE_SIZE;\n\n        let input_start = Key::from_hex(\"000000067f00000001000000ae00ffffffff\").unwrap();\n        let mut input_end = Key::from_hex(\"000000067f00000001000000ae0100000000\").unwrap();\n        input_end.field6 += RANGE_SIZE; // field6 is block number\n\n        // Shard 0 owns the first stripe in the relation, and the preceding logical size is shard local too\n        let shard_identity = ShardIdentity::new(\n            ShardNumber(0),\n            ShardCount::new(SHARD_COUNT),\n            ShardStripeSize(STRIPE_SIZE),\n        )\n        .unwrap();\n        assert_eq!(\n            do_fragment(input_start, input_end, &shard_identity, 2 * STRIPE_SIZE),\n            (\n                STRIPE_SIZE + 1,\n                vec![(STRIPE_SIZE + 1, input_start..input_end)]\n            )\n        );\n\n        // Shard 1 does not own the first stripe in the relation, but it does own the logical size (all shards\n        // store all logical sizes)\n        let shard_identity = ShardIdentity::new(\n            ShardNumber(1),\n            ShardCount::new(SHARD_COUNT),\n            ShardStripeSize(STRIPE_SIZE),\n        )\n        .unwrap();\n        assert_eq!(\n            do_fragment(input_start, input_end, &shard_identity, 2 * STRIPE_SIZE),\n            (1, vec![(1, input_start..input_end)])\n        );\n    }\n\n    /// Test that ShardedRange behaves properly when used on un-sharded data\n    #[test]\n    fn sharded_range_fragment_unsharded() {\n        let shard_identity = ShardIdentity::unsharded();\n\n        let input_start = Key::from_hex(\"000000067f00000001000000ae0000000000\").unwrap();\n        let input_end = Key::from_hex(\"000000067f00000001000000ae0000010000\").unwrap();\n        assert_eq!(\n            do_fragment(input_start, input_end, &shard_identity, 0x8000),\n            (\n                0x10000,\n                vec![\n                    (0x8000, input_start..input_start.add(0x8000)),\n                    (0x8000, input_start.add(0x8000)..input_start.add(0x10000))\n                ]\n            )\n        );\n    }\n\n    #[test]\n    fn sharded_range_fragment_cross_relation() {\n        let shard_identity = ShardIdentity::unsharded();\n\n        // A range that spans relations: expect fragmentation to give up and return a u32::MAX size\n        let input_start = Key::from_hex(\"000000067f00000001000000ae0000000000\").unwrap();\n        let input_end = Key::from_hex(\"000000068f00000001000000ae0000010000\").unwrap();\n        assert_eq!(\n            do_fragment(input_start, input_end, &shard_identity, 0x8000),\n            (u32::MAX, vec![(u32::MAX, input_start..input_end),])\n        );\n\n        // Same, but using a sharded identity\n        let shard_identity =\n            ShardIdentity::new(ShardNumber(0), ShardCount::new(4), DEFAULT_STRIPE_SIZE).unwrap();\n        assert_eq!(\n            do_fragment(input_start, input_end, &shard_identity, 0x8000),\n            (u32::MAX, vec![(u32::MAX, input_start..input_end),])\n        );\n    }\n\n    #[test]\n    fn sharded_range_fragment_tiny_nblocks() {\n        let shard_identity = ShardIdentity::unsharded();\n\n        // A range that spans relations: expect fragmentation to give up and return a u32::MAX size\n        let input_start = Key::from_hex(\"000000067F00000001000004E10000000000\").unwrap();\n        let input_end = Key::from_hex(\"000000067F00000001000004E10000000038\").unwrap();\n        assert_eq!(\n            do_fragment(input_start, input_end, &shard_identity, 16),\n            (\n                0x38,\n                vec![\n                    (16, input_start..input_start.add(16)),\n                    (16, input_start.add(16)..input_start.add(32)),\n                    (16, input_start.add(32)..input_start.add(48)),\n                    (8, input_start.add(48)..input_end),\n                ]\n            )\n        );\n    }\n\n    #[test]\n    fn sharded_range_fragment_fuzz() {\n        // Use a fixed seed: we don't want to explicitly pick values, but we do want\n        // the test to be reproducible.\n        let mut prng = rand::rngs::StdRng::seed_from_u64(0xdeadbeef);\n\n        for _i in 0..1000 {\n            let shard_identity = if prng.next_u32() % 2 == 0 {\n                ShardIdentity::unsharded()\n            } else {\n                let shard_count = prng.next_u32() % 127 + 1;\n                ShardIdentity::new(\n                    ShardNumber((prng.next_u32() % shard_count) as u8),\n                    ShardCount::new(shard_count as u8),\n                    DEFAULT_STRIPE_SIZE,\n                )\n                .unwrap()\n            };\n\n            let target_nblocks = prng.next_u32() % 65536 + 1;\n\n            let start_offset = prng.next_u32() % 16384;\n\n            // Try ranges up to 4GiB in size, that are always at least 1\n            let range_size = prng.next_u32() % 8192 + 1;\n\n            // A range that spans relations: expect fragmentation to give up and return a u32::MAX size\n            let input_start = Key::from_hex(\"000000067F00000001000004E10000000000\")\n                .unwrap()\n                .add(start_offset);\n            let input_end = input_start.add(range_size);\n\n            // This test's main success conditions are the invariants baked into do_fragment\n            let (_total_size, fragments) =\n                do_fragment(input_start, input_end, &shard_identity, target_nblocks);\n\n            // Pick a random key within the range and check it appears in the output\n            let example_key = input_start.add(prng.next_u32() % range_size);\n\n            // Panic on unwrap if it isn't found\n            let example_key_frag = fragments\n                .iter()\n                .find(|f| f.1.contains(&example_key))\n                .unwrap();\n\n            // Check that the fragment containing our random key has a nonzero size if\n            // that key is shard-local\n            let example_key_local = !shard_identity.is_key_disposable(&example_key);\n            if example_key_local {\n                assert!(example_key_frag.0 > 0);\n            }\n        }\n    }\n}\n"
  },
  {
    "path": "libs/pageserver_api/src/lib.rs",
    "content": "#![deny(unsafe_code)]\n#![deny(clippy::undocumented_unsafe_blocks)]\n\npub mod controller_api;\npub mod key;\npub mod keyspace;\npub mod models;\npub mod pagestream_api;\npub mod reltag;\npub mod shard;\n/// Public API types\npub mod upcall_api;\n\npub mod config;\n"
  },
  {
    "path": "libs/pageserver_api/src/models/detach_ancestor.rs",
    "content": "use std::collections::HashSet;\n\nuse utils::id::TimelineId;\n\n#[derive(Debug, Default, PartialEq, serde::Serialize, serde::Deserialize)]\npub struct AncestorDetached {\n    pub reparented_timelines: HashSet<TimelineId>,\n}\n"
  },
  {
    "path": "libs/pageserver_api/src/models/partitioning.rs",
    "content": "use utils::lsn::Lsn;\n\nuse crate::keyspace::SparseKeySpace;\n\n#[derive(Debug, PartialEq, Eq)]\npub struct Partitioning {\n    pub keys: crate::keyspace::KeySpace,\n    pub sparse_keys: crate::keyspace::SparseKeySpace,\n    pub at_lsn: Lsn,\n}\n\nimpl serde::Serialize for Partitioning {\n    fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>\n    where\n        S: serde::Serializer,\n    {\n        pub struct KeySpace<'a>(&'a crate::keyspace::KeySpace);\n\n        impl serde::Serialize for KeySpace<'_> {\n            fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>\n            where\n                S: serde::Serializer,\n            {\n                use serde::ser::SerializeSeq;\n                let mut seq = serializer.serialize_seq(Some(self.0.ranges.len()))?;\n                for kr in &self.0.ranges {\n                    seq.serialize_element(&KeyRange(kr))?;\n                }\n                seq.end()\n            }\n        }\n\n        use serde::ser::SerializeMap;\n        let mut map = serializer.serialize_map(Some(2))?;\n        map.serialize_key(\"keys\")?;\n        map.serialize_value(&KeySpace(&self.keys))?;\n        map.serialize_key(\"sparse_keys\")?;\n        map.serialize_value(&KeySpace(&self.sparse_keys.0))?;\n        map.serialize_key(\"at_lsn\")?;\n        map.serialize_value(&WithDisplay(&self.at_lsn))?;\n        map.end()\n    }\n}\n\npub struct WithDisplay<'a, T>(&'a T);\n\nimpl<T: std::fmt::Display> serde::Serialize for WithDisplay<'_, T> {\n    fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>\n    where\n        S: serde::Serializer,\n    {\n        serializer.collect_str(&self.0)\n    }\n}\n\npub struct KeyRange<'a>(&'a std::ops::Range<crate::key::Key>);\n\nimpl serde::Serialize for KeyRange<'_> {\n    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>\n    where\n        S: serde::Serializer,\n    {\n        use serde::ser::SerializeTuple;\n        let mut t = serializer.serialize_tuple(2)?;\n        t.serialize_element(&WithDisplay(&self.0.start))?;\n        t.serialize_element(&WithDisplay(&self.0.end))?;\n        t.end()\n    }\n}\n\nimpl<'a> serde::Deserialize<'a> for Partitioning {\n    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>\n    where\n        D: serde::Deserializer<'a>,\n    {\n        pub struct KeySpace(crate::keyspace::KeySpace);\n\n        impl<'de> serde::Deserialize<'de> for KeySpace {\n            fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>\n            where\n                D: serde::Deserializer<'de>,\n            {\n                #[serde_with::serde_as]\n                #[derive(serde::Deserialize)]\n                #[serde(transparent)]\n                struct Key(#[serde_as(as = \"serde_with::DisplayFromStr\")] crate::key::Key);\n\n                #[serde_with::serde_as]\n                #[derive(serde::Deserialize)]\n                struct Range(Key, Key);\n\n                let ranges: Vec<Range> = serde::Deserialize::deserialize(deserializer)?;\n                Ok(Self(crate::keyspace::KeySpace {\n                    ranges: ranges\n                        .into_iter()\n                        .map(|Range(start, end)| (start.0..end.0))\n                        .collect(),\n                }))\n            }\n        }\n\n        #[serde_with::serde_as]\n        #[derive(serde::Deserialize)]\n        struct De {\n            keys: KeySpace,\n            sparse_keys: KeySpace,\n            #[serde_as(as = \"serde_with::DisplayFromStr\")]\n            at_lsn: Lsn,\n        }\n\n        let de: De = serde::Deserialize::deserialize(deserializer)?;\n        Ok(Self {\n            at_lsn: de.at_lsn,\n            keys: de.keys.0,\n            sparse_keys: SparseKeySpace(de.sparse_keys.0),\n        })\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use super::*;\n\n    #[test]\n    fn test_serialization_roundtrip() {\n        let reference = r#\"\n        {\n            \"keys\": [\n              [\n                \"000000000000000000000000000000000000\",\n                \"000000000000000000000000000000000001\"\n              ],\n              [\n                \"000000067F00000001000000000000000000\",\n                \"000000067F00000001000000000000000002\"\n              ],\n              [\n                \"030000000000000000000000000000000000\",\n                \"030000000000000000000000000000000003\"\n              ]\n            ],\n            \"sparse_keys\": [\n              [\n                \"620000000000000000000000000000000000\",\n                \"620000000000000000000000000000000003\"\n              ]\n            ],\n            \"at_lsn\": \"0/2240160\"\n        }\n        \"#;\n\n        let de: Partitioning = serde_json::from_str(reference).unwrap();\n\n        let ser = serde_json::to_string(&de).unwrap();\n\n        let ser_de: serde_json::Value = serde_json::from_str(&ser).unwrap();\n\n        assert_eq!(\n            ser_de,\n            serde_json::from_str::<'_, serde_json::Value>(reference).unwrap()\n        );\n    }\n}\n"
  },
  {
    "path": "libs/pageserver_api/src/models/utilization.rs",
    "content": "use std::time::SystemTime;\n\nuse utils::serde_percent::Percent;\nuse utils::serde_system_time;\n\n/// Pageserver current utilization and scoring for how good candidate the pageserver would be for\n/// the next tenant.\n///\n/// See and maintain pageserver openapi spec for `/v1/utilization_score` as the truth.\n///\n/// `format: int64` fields must use `ser_saturating_u63` because openapi generated clients might\n/// not handle full u64 values properly.\n#[derive(serde::Serialize, serde::Deserialize, Debug, Clone)]\npub struct PageserverUtilization {\n    /// Used disk space (physical, ground truth from statfs())\n    #[serde(serialize_with = \"ser_saturating_u63\")]\n    pub disk_usage_bytes: u64,\n    /// Free disk space\n    #[serde(serialize_with = \"ser_saturating_u63\")]\n    pub free_space_bytes: u64,\n\n    /// Wanted disk space, based on the tenant shards currently present on this pageserver: this\n    /// is like disk_usage_bytes, but it is stable and does not change with the cache state of\n    /// tenants, whereas disk_usage_bytes may reach the disk eviction `max_usage_pct` and stay\n    /// there, or may be unrealistically low if the pageserver has attached tenants which haven't\n    /// downloaded layers yet.\n    #[serde(serialize_with = \"ser_saturating_u63\", default)]\n    pub disk_wanted_bytes: u64,\n\n    // What proportion of total disk space will this pageserver use before it starts evicting data?\n    #[serde(default = \"unity_percent\")]\n    pub disk_usable_pct: Percent,\n\n    // How many shards are currently on this node?\n    #[serde(default)]\n    pub shard_count: u32,\n\n    // How many shards should this node be able to handle at most?\n    #[serde(default)]\n    pub max_shard_count: u32,\n\n    /// Cached result of [`Self::score`]\n    pub utilization_score: Option<u64>,\n\n    /// When was this snapshot captured, pageserver local time.\n    ///\n    /// Use millis to give confidence that the value is regenerated often enough.\n    pub captured_at: serde_system_time::SystemTime,\n}\n\nfn unity_percent() -> Percent {\n    Percent::new(0).unwrap()\n}\n\npub type RawScore = u64;\n\nimpl PageserverUtilization {\n    const UTILIZATION_FULL: u64 = 1000000;\n\n    /// Calculate a utilization score.  The result is to be inrepreted as a fraction of\n    /// Self::UTILIZATION_FULL.\n    ///\n    /// Lower values are more affine to scheduling more work on this node.\n    /// - UTILIZATION_FULL represents an ideal node which is fully utilized but should not receive any more work.\n    /// - 0.0 represents an empty node.\n    /// - Negative values are forbidden\n    /// - Values over UTILIZATION_FULL indicate an overloaded node, which may show degraded performance due to\n    ///   layer eviction.\n    pub fn score(&self) -> RawScore {\n        let disk_usable_capacity = ((self.disk_usage_bytes + self.free_space_bytes)\n            * self.disk_usable_pct.get() as u64)\n            / 100;\n        let disk_utilization_score =\n            self.disk_wanted_bytes * Self::UTILIZATION_FULL / disk_usable_capacity;\n\n        let shard_utilization_score =\n            self.shard_count as u64 * Self::UTILIZATION_FULL / self.max_shard_count as u64;\n        std::cmp::max(disk_utilization_score, shard_utilization_score)\n    }\n\n    pub fn cached_score(&mut self) -> RawScore {\n        match self.utilization_score {\n            None => {\n                let s = self.score();\n                self.utilization_score = Some(s);\n                s\n            }\n            Some(s) => s,\n        }\n    }\n\n    /// If a node is currently hosting more work than it can comfortably handle.  This does not indicate that\n    /// it will fail, but it is a strong signal that more work should not be added unless there is no alternative.\n    ///\n    /// When a node is overloaded, we may override soft affinity preferences and do things like scheduling\n    /// into a node in a less desirable AZ, if all the nodes in the preferred AZ are overloaded.\n    pub fn is_overloaded(score: RawScore) -> bool {\n        // Why the factor of two?  This is unscientific but reflects behavior of real systems:\n        // - In terms of shard counts, a node's preferred max count is a soft limit intended to keep\n        //   startup and housekeeping jobs nice and responsive.  We can go to double this limit if needed\n        //   until some more nodes are deployed.\n        // - In terms of disk space, the node's utilization heuristic assumes every tenant needs to\n        //   hold its biggest timeline fully on disk, which is tends to be an over estimate when\n        //   some tenants are very idle and have dropped layers from disk.  In practice going up to\n        //   double is generally better than giving up and scheduling in a sub-optimal AZ.\n        score >= 2 * Self::UTILIZATION_FULL\n    }\n\n    pub fn adjust_shard_count_max(&mut self, shard_count: u32) {\n        if self.shard_count < shard_count {\n            self.shard_count = shard_count;\n\n            // Dirty cache: this will be calculated next time someone retrives the score\n            self.utilization_score = None;\n        }\n    }\n\n    /// A utilization structure that has a full utilization score: use this as a placeholder when\n    /// you need a utilization but don't have real values yet.\n    pub fn full() -> Self {\n        Self {\n            disk_usage_bytes: 1,\n            free_space_bytes: 0,\n            disk_wanted_bytes: 1,\n            disk_usable_pct: Percent::new(100).unwrap(),\n            shard_count: 1,\n            max_shard_count: 1,\n            utilization_score: Some(Self::UTILIZATION_FULL),\n            captured_at: serde_system_time::SystemTime(SystemTime::now()),\n        }\n    }\n}\n\n/// Test helper\npub mod test_utilization {\n    use std::time::SystemTime;\n\n    use utils::serde_percent::Percent;\n    use utils::serde_system_time::{self};\n\n    use super::PageserverUtilization;\n\n    // Parameters of the imaginary node used for test utilization instances\n    const TEST_DISK_SIZE: u64 = 1024 * 1024 * 1024 * 1024;\n    const TEST_SHARDS_MAX: u32 = 1000;\n\n    /// Unit test helper.  Unconditionally compiled because cfg(test) doesn't carry across crates.  Do\n    /// not abuse this function from non-test code.\n    ///\n    /// Emulates a node with a 1000 shard limit and a 1TB disk.\n    pub fn simple(shard_count: u32, disk_wanted_bytes: u64) -> PageserverUtilization {\n        PageserverUtilization {\n            disk_usage_bytes: disk_wanted_bytes,\n            free_space_bytes: TEST_DISK_SIZE - std::cmp::min(disk_wanted_bytes, TEST_DISK_SIZE),\n            disk_wanted_bytes,\n            disk_usable_pct: Percent::new(100).unwrap(),\n            shard_count,\n            max_shard_count: TEST_SHARDS_MAX,\n            utilization_score: None,\n            captured_at: serde_system_time::SystemTime(SystemTime::now()),\n        }\n    }\n}\n\n/// openapi knows only `format: int64`, so avoid outputting a non-parseable value by generated clients.\n///\n/// Instead of newtype, use this because a newtype would get require handling deserializing values\n/// with the highest bit set which is properly parsed by serde formats, but would create a\n/// conundrum on how to handle and again serialize such values at type level. It will be a few\n/// years until we can use more than `i64::MAX` bytes on a disk.\nfn ser_saturating_u63<S: serde::Serializer>(value: &u64, serializer: S) -> Result<S::Ok, S::Error> {\n    const MAX_FORMAT_INT64: u64 = i64::MAX as u64;\n\n    let value = (*value).min(MAX_FORMAT_INT64);\n\n    serializer.serialize_u64(value)\n}\n\n#[cfg(test)]\nmod tests {\n    use std::time::Duration;\n\n    use super::*;\n\n    #[test]\n    fn u64_max_is_serialized_as_u63_max() {\n        let doc = PageserverUtilization {\n            disk_usage_bytes: u64::MAX,\n            free_space_bytes: 0,\n            disk_wanted_bytes: u64::MAX,\n            utilization_score: Some(13),\n            disk_usable_pct: Percent::new(90).unwrap(),\n            shard_count: 100,\n            max_shard_count: 200,\n            captured_at: serde_system_time::SystemTime(\n                std::time::SystemTime::UNIX_EPOCH + Duration::from_secs(1708509779),\n            ),\n        };\n\n        let s = serde_json::to_string(&doc).unwrap();\n\n        let expected = \"{\\\"disk_usage_bytes\\\":9223372036854775807,\\\"free_space_bytes\\\":0,\\\"disk_wanted_bytes\\\":9223372036854775807,\\\"disk_usable_pct\\\":90,\\\"shard_count\\\":100,\\\"max_shard_count\\\":200,\\\"utilization_score\\\":13,\\\"captured_at\\\":\\\"2024-02-21T10:02:59.000Z\\\"}\";\n\n        assert_eq!(s, expected);\n    }\n}\n"
  },
  {
    "path": "libs/pageserver_api/src/models.rs",
    "content": "pub mod detach_ancestor;\npub mod partitioning;\npub mod utilization;\n\nuse core::ops::Range;\nuse std::collections::HashMap;\nuse std::fmt::Display;\nuse std::num::{NonZeroU32, NonZeroU64, NonZeroUsize};\nuse std::str::FromStr;\nuse std::time::{Duration, SystemTime};\n\n#[cfg(feature = \"testing\")]\nuse camino::Utf8PathBuf;\nuse postgres_versioninfo::PgMajorVersion;\nuse serde::{Deserialize, Deserializer, Serialize, Serializer};\nuse serde_with::serde_as;\npub use utilization::PageserverUtilization;\nuse utils::id::{NodeId, TenantId, TimelineId};\nuse utils::lsn::Lsn;\nuse utils::{completion, serde_system_time};\n\nuse crate::config::Ratio;\nuse crate::key::{CompactKey, Key};\nuse crate::shard::{\n    DEFAULT_STRIPE_SIZE, ShardCount, ShardIdentity, ShardStripeSize, TenantShardId,\n};\n\n/// The state of a tenant in this pageserver.\n///\n/// ```mermaid\n/// stateDiagram-v2\n///\n///     [*] --> Attaching: spawn_attach()\n///\n///     Attaching --> Activating: activate()\n///     Activating --> Active: infallible\n///\n///     Attaching --> Broken: attach() failure\n///\n///     Active --> Stopping: set_stopping(), part of shutdown & detach\n///     Stopping --> Broken: late error in remove_tenant_from_memory\n///\n///     Broken --> [*]: ignore / detach / shutdown\n///     Stopping --> [*]: remove_from_memory complete\n///\n///     Active --> Broken: cfg(testing)-only tenant break point\n/// ```\n#[derive(\n    Clone,\n    PartialEq,\n    Eq,\n    serde::Serialize,\n    serde::Deserialize,\n    strum_macros::Display,\n    strum_macros::VariantNames,\n    strum_macros::AsRefStr,\n    strum_macros::IntoStaticStr,\n)]\n#[serde(tag = \"slug\", content = \"data\")]\npub enum TenantState {\n    /// This tenant is being attached to the pageserver.\n    ///\n    /// `set_stopping()` and `set_broken()` do not work in this state and wait for it to pass.\n    Attaching,\n    /// The tenant is transitioning from Loading/Attaching to Active.\n    ///\n    /// While in this state, the individual timelines are being activated.\n    ///\n    /// `set_stopping()` and `set_broken()` do not work in this state and wait for it to pass.\n    Activating(ActivatingFrom),\n    /// The tenant has finished activating and is open for business.\n    ///\n    /// Transitions out of this state are possible through `set_stopping()` and `set_broken()`.\n    Active,\n    /// The tenant is recognized by pageserver, but it is being detached or the\n    /// system is being shut down.\n    ///\n    /// Transitions out of this state are possible through `set_broken()`.\n    Stopping {\n        /// The barrier can be used to wait for shutdown to complete. The first caller to set\n        /// Some(Barrier) is responsible for driving shutdown to completion. Subsequent callers\n        /// will wait for the first caller's existing barrier.\n        ///\n        /// None is set when an attach is cancelled, to signal to shutdown that the attach has in\n        /// fact cancelled:\n        ///\n        /// 1. `shutdown` sees `TenantState::Attaching`, and cancels the tenant.\n        /// 2. `attach` sets `TenantState::Stopping(None)` and exits.\n        /// 3. `set_stopping` waits for `TenantState::Stopping(None)` and sets\n        ///    `TenantState::Stopping(Some)` to claim the barrier as the shutdown owner.\n        //\n        // Because of https://github.com/serde-rs/serde/issues/2105 this has to be a named field,\n        // otherwise it will not be skipped during deserialization\n        #[serde(skip)]\n        progress: Option<completion::Barrier>,\n    },\n    /// The tenant is recognized by the pageserver, but can no longer be used for\n    /// any operations.\n    ///\n    /// If the tenant fails to load or attach, it will transition to this state\n    /// and it is guaranteed that no background tasks are running in its name.\n    ///\n    /// The other way to transition into this state is from `Stopping` state\n    /// through `set_broken()` called from `remove_tenant_from_memory()`. That happens\n    /// if the cleanup future executed by `remove_tenant_from_memory()` fails.\n    Broken { reason: String, backtrace: String },\n}\n\nimpl TenantState {\n    pub fn attachment_status(&self) -> TenantAttachmentStatus {\n        use TenantAttachmentStatus::*;\n\n        // Below TenantState::Activating is used as \"transient\" or \"transparent\" state for\n        // attachment_status determining.\n        match self {\n            // The attach procedure writes the marker file before adding the Attaching tenant to the tenants map.\n            // So, technically, we can return Attached here.\n            // However, as soon as Console observes Attached, it will proceed with the Postgres-level health check.\n            // But, our attach task might still be fetching the remote timelines, etc.\n            // So, return `Maybe` while Attaching, making Console wait for the attach task to finish.\n            Self::Attaching | Self::Activating(ActivatingFrom::Attaching) => Maybe,\n            // We only reach Active after successful load / attach.\n            // So, call atttachment status Attached.\n            Self::Active => Attached,\n            // If the (initial or resumed) attach procedure fails, the tenant becomes Broken.\n            // However, it also becomes Broken if the regular load fails.\n            // From Console's perspective there's no practical difference\n            // because attachment_status is polled by console only during attach operation execution.\n            Self::Broken { reason, .. } => Failed {\n                reason: reason.to_owned(),\n            },\n            // Why is Stopping a Maybe case? Because, during pageserver shutdown,\n            // we set the Stopping state irrespective of whether the tenant\n            // has finished attaching or not.\n            Self::Stopping { .. } => Maybe,\n        }\n    }\n\n    pub fn broken_from_reason(reason: String) -> Self {\n        let backtrace_str: String = format!(\"{}\", std::backtrace::Backtrace::force_capture());\n        Self::Broken {\n            reason,\n            backtrace: backtrace_str,\n        }\n    }\n}\n\nimpl std::fmt::Debug for TenantState {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        match self {\n            Self::Broken { reason, backtrace } if !reason.is_empty() => {\n                write!(f, \"Broken due to: {reason}. Backtrace:\\n{backtrace}\")\n            }\n            _ => write!(f, \"{self}\"),\n        }\n    }\n}\n\n/// A temporary lease to a specific lsn inside a timeline.\n/// Access to the lsn is guaranteed by the pageserver until the expiration indicated by `valid_until`.\n#[serde_as]\n#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]\npub struct LsnLease {\n    #[serde_as(as = \"SystemTimeAsRfc3339Millis\")]\n    pub valid_until: SystemTime,\n}\n\nserde_with::serde_conv!(\n    SystemTimeAsRfc3339Millis,\n    SystemTime,\n    |time: &SystemTime| humantime::format_rfc3339_millis(*time).to_string(),\n    |value: String| -> Result<_, humantime::TimestampError> { humantime::parse_rfc3339(&value) }\n);\n\nimpl LsnLease {\n    /// The default length for an explicit LSN lease request (10 minutes).\n    pub const DEFAULT_LENGTH: Duration = Duration::from_secs(10 * 60);\n\n    /// The default length for an implicit LSN lease granted during\n    /// `get_lsn_by_timestamp` request (1 minutes).\n    pub const DEFAULT_LENGTH_FOR_TS: Duration = Duration::from_secs(60);\n\n    /// Checks whether the lease is expired.\n    pub fn is_expired(&self, now: &SystemTime) -> bool {\n        now > &self.valid_until\n    }\n}\n\n/// Controls the detach ancestor behavior.\n/// - When set to `NoAncestorAndReparent`, we will only detach a branch if its ancestor is a root branch. It will automatically reparent any children of the ancestor before and at the branch point.\n/// - When set to `MultiLevelAndNoReparent`, we will detach a branch from multiple levels of ancestors, and no reparenting will happen at all.\n#[derive(Debug, Clone, Copy, Default)]\npub enum DetachBehavior {\n    #[default]\n    NoAncestorAndReparent,\n    MultiLevelAndNoReparent,\n}\n\nimpl std::str::FromStr for DetachBehavior {\n    type Err = &'static str;\n\n    fn from_str(s: &str) -> Result<Self, Self::Err> {\n        match s {\n            \"no_ancestor_and_reparent\" => Ok(DetachBehavior::NoAncestorAndReparent),\n            \"multi_level_and_no_reparent\" => Ok(DetachBehavior::MultiLevelAndNoReparent),\n            \"v1\" => Ok(DetachBehavior::NoAncestorAndReparent),\n            \"v2\" => Ok(DetachBehavior::MultiLevelAndNoReparent),\n            _ => Err(\"cannot parse detach behavior\"),\n        }\n    }\n}\n\nimpl std::fmt::Display for DetachBehavior {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        match self {\n            DetachBehavior::NoAncestorAndReparent => write!(f, \"no_ancestor_and_reparent\"),\n            DetachBehavior::MultiLevelAndNoReparent => write!(f, \"multi_level_and_no_reparent\"),\n        }\n    }\n}\n\n/// The only [`TenantState`] variants we could be `TenantState::Activating` from.\n///\n/// XXX: We used to have more variants here, but now it's just one, which makes this rather\n/// useless. Remove, once we've checked that there's no client code left that looks at this.\n#[derive(Clone, Copy, Debug, PartialEq, Eq, serde::Serialize, serde::Deserialize)]\npub enum ActivatingFrom {\n    /// Arrived to [`TenantState::Activating`] from [`TenantState::Attaching`]\n    Attaching,\n}\n\n/// A state of a timeline in pageserver's memory.\n#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]\npub enum TimelineState {\n    /// The timeline is recognized by the pageserver but is not yet operational.\n    /// In particular, the walreceiver connection loop is not running for this timeline.\n    /// It will eventually transition to state Active or Broken.\n    Loading,\n    /// The timeline is fully operational.\n    /// It can be queried, and the walreceiver connection loop is running.\n    Active,\n    /// The timeline was previously Loading or Active but is shutting down.\n    /// It cannot transition back into any other state.\n    Stopping,\n    /// The timeline is broken and not operational (previous states: Loading or Active).\n    Broken { reason: String, backtrace: String },\n}\n\n#[serde_with::serde_as]\n#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]\npub struct CompactLsnRange {\n    pub start: Lsn,\n    pub end: Lsn,\n}\n\n#[serde_with::serde_as]\n#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]\npub struct CompactKeyRange {\n    #[serde_as(as = \"serde_with::DisplayFromStr\")]\n    pub start: Key,\n    #[serde_as(as = \"serde_with::DisplayFromStr\")]\n    pub end: Key,\n}\n\nimpl From<Range<Lsn>> for CompactLsnRange {\n    fn from(range: Range<Lsn>) -> Self {\n        Self {\n            start: range.start,\n            end: range.end,\n        }\n    }\n}\n\nimpl From<Range<Key>> for CompactKeyRange {\n    fn from(range: Range<Key>) -> Self {\n        Self {\n            start: range.start,\n            end: range.end,\n        }\n    }\n}\n\nimpl From<CompactLsnRange> for Range<Lsn> {\n    fn from(range: CompactLsnRange) -> Self {\n        range.start..range.end\n    }\n}\n\nimpl From<CompactKeyRange> for Range<Key> {\n    fn from(range: CompactKeyRange) -> Self {\n        range.start..range.end\n    }\n}\n\nimpl CompactLsnRange {\n    pub fn above(lsn: Lsn) -> Self {\n        Self {\n            start: lsn,\n            end: Lsn::MAX,\n        }\n    }\n}\n\n#[derive(Debug, Clone, Serialize)]\npub struct CompactInfoResponse {\n    pub compact_key_range: Option<CompactKeyRange>,\n    pub compact_lsn_range: Option<CompactLsnRange>,\n    pub sub_compaction: bool,\n    pub running: bool,\n    pub job_id: usize,\n}\n\n#[derive(Serialize, Deserialize, Clone)]\npub struct TimelineCreateRequest {\n    pub new_timeline_id: TimelineId,\n    #[serde(flatten)]\n    pub mode: TimelineCreateRequestMode,\n}\n\nimpl TimelineCreateRequest {\n    pub fn mode_tag(&self) -> &'static str {\n        match &self.mode {\n            TimelineCreateRequestMode::Branch { .. } => \"branch\",\n            TimelineCreateRequestMode::ImportPgdata { .. } => \"import\",\n            TimelineCreateRequestMode::Bootstrap { .. } => \"bootstrap\",\n        }\n    }\n\n    pub fn is_import(&self) -> bool {\n        matches!(self.mode, TimelineCreateRequestMode::ImportPgdata { .. })\n    }\n}\n\n#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]\npub enum ShardImportStatus {\n    InProgress(Option<ShardImportProgress>),\n    Done,\n    Error(String),\n}\n\n#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]\npub enum ShardImportProgress {\n    V1(ShardImportProgressV1),\n}\n\n#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]\npub struct ShardImportProgressV1 {\n    /// Total number of jobs in the import plan\n    pub jobs: usize,\n    /// Number of jobs completed\n    pub completed: usize,\n    /// Hash of the plan\n    pub import_plan_hash: u64,\n    /// Soft limit for the job size\n    /// This needs to remain constant throughout the import\n    pub job_soft_size_limit: usize,\n}\n\nimpl ShardImportStatus {\n    pub fn is_terminal(&self) -> bool {\n        match self {\n            ShardImportStatus::InProgress(_) => false,\n            ShardImportStatus::Done | ShardImportStatus::Error(_) => true,\n        }\n    }\n}\n\n/// Storage controller specific extensions to [`TimelineInfo`].\n#[derive(Serialize, Deserialize, Clone)]\npub struct TimelineCreateResponseStorcon {\n    #[serde(flatten)]\n    pub timeline_info: TimelineInfo,\n\n    pub safekeepers: Option<SafekeepersInfo>,\n}\n\n/// Safekeepers as returned in timeline creation request to storcon or pushed to\n/// cplane in the post migration hook.\n#[derive(Serialize, Deserialize, Clone)]\npub struct SafekeepersInfo {\n    pub tenant_id: TenantId,\n    pub timeline_id: TimelineId,\n    pub generation: u32,\n    pub safekeepers: Vec<SafekeeperInfo>,\n}\n\n#[derive(Serialize, Deserialize, Clone, Debug)]\npub struct SafekeeperInfo {\n    pub id: NodeId,\n    pub hostname: String,\n}\n\n#[derive(Serialize, Deserialize, Clone)]\n#[serde(untagged)]\npub enum TimelineCreateRequestMode {\n    Branch {\n        ancestor_timeline_id: TimelineId,\n        #[serde(default)]\n        ancestor_start_lsn: Option<Lsn>,\n        // TODO: cplane sets this, but, the branching code always\n        // inherits the ancestor's pg_version. Earlier code wasn't\n        // using a flattened enum, so, it was an accepted field, and\n        // we continue to accept it by having it here.\n        pg_version: Option<PgMajorVersion>,\n        #[serde(default, skip_serializing_if = \"std::ops::Not::not\")]\n        read_only: bool,\n    },\n    ImportPgdata {\n        import_pgdata: TimelineCreateRequestModeImportPgdata,\n    },\n    // NB: Bootstrap is all-optional, and thus the serde(untagged) will cause serde to stop at Bootstrap.\n    // (serde picks the first matching enum variant, in declaration order).\n    Bootstrap {\n        #[serde(default)]\n        existing_initdb_timeline_id: Option<TimelineId>,\n        pg_version: Option<PgMajorVersion>,\n    },\n}\n\n#[derive(Serialize, Deserialize, Clone)]\npub struct TimelineCreateRequestModeImportPgdata {\n    pub location: ImportPgdataLocation,\n    pub idempotency_key: ImportPgdataIdempotencyKey,\n}\n\n#[derive(Serialize, Deserialize, Clone, Debug)]\npub enum ImportPgdataLocation {\n    #[cfg(feature = \"testing\")]\n    LocalFs { path: Utf8PathBuf },\n    AwsS3 {\n        region: String,\n        bucket: String,\n        /// A better name for this would be `prefix`; changing requires coordination with cplane.\n        /// See <https://github.com/neondatabase/cloud/issues/20646>.\n        key: String,\n    },\n}\n\n#[derive(Serialize, Deserialize, Clone)]\n#[serde(transparent)]\npub struct ImportPgdataIdempotencyKey(pub String);\n\nimpl ImportPgdataIdempotencyKey {\n    pub fn random() -> Self {\n        use rand::Rng;\n        use rand::distr::Alphanumeric;\n        Self(\n            rand::rng()\n                .sample_iter(&Alphanumeric)\n                .take(20)\n                .map(char::from)\n                .collect(),\n        )\n    }\n}\n\n#[derive(Serialize, Deserialize, Clone)]\npub struct LsnLeaseRequest {\n    pub lsn: Lsn,\n}\n\n#[derive(Serialize, Deserialize)]\npub struct TenantShardSplitRequest {\n    pub new_shard_count: u8,\n\n    // A tenant's stripe size is only meaningful the first time their shard count goes\n    // above 1: therefore during a split from 1->N shards, we may modify the stripe size.\n    //\n    // If this is set while the stripe count is being increased from an already >1 value,\n    // then the request will fail with 400.\n    pub new_stripe_size: Option<ShardStripeSize>,\n}\n\n#[derive(Serialize, Deserialize)]\npub struct TenantShardSplitResponse {\n    pub new_shards: Vec<TenantShardId>,\n}\n\n/// Parameters that apply to all shards in a tenant.  Used during tenant creation.\n#[derive(Clone, Copy, Serialize, Deserialize, Debug)]\n#[serde(deny_unknown_fields)]\npub struct ShardParameters {\n    pub count: ShardCount,\n    pub stripe_size: ShardStripeSize,\n}\n\nimpl ShardParameters {\n    pub fn is_unsharded(&self) -> bool {\n        self.count.is_unsharded()\n    }\n}\n\nimpl Default for ShardParameters {\n    fn default() -> Self {\n        Self {\n            count: ShardCount::new(0),\n            stripe_size: DEFAULT_STRIPE_SIZE,\n        }\n    }\n}\n\nimpl From<ShardIdentity> for ShardParameters {\n    fn from(identity: ShardIdentity) -> Self {\n        Self {\n            count: identity.count,\n            stripe_size: identity.stripe_size,\n        }\n    }\n}\n\n#[derive(Debug, Default, Clone, Eq, PartialEq)]\npub enum FieldPatch<T> {\n    Upsert(T),\n    Remove,\n    #[default]\n    Noop,\n}\n\nimpl<T> FieldPatch<T> {\n    fn is_noop(&self) -> bool {\n        matches!(self, FieldPatch::Noop)\n    }\n\n    pub fn apply(self, target: &mut Option<T>) {\n        match self {\n            Self::Upsert(v) => *target = Some(v),\n            Self::Remove => *target = None,\n            Self::Noop => {}\n        }\n    }\n\n    pub fn map<U, E, F: FnOnce(T) -> Result<U, E>>(self, map: F) -> Result<FieldPatch<U>, E> {\n        match self {\n            Self::Upsert(v) => Ok(FieldPatch::<U>::Upsert(map(v)?)),\n            Self::Remove => Ok(FieldPatch::<U>::Remove),\n            Self::Noop => Ok(FieldPatch::<U>::Noop),\n        }\n    }\n}\n\nimpl<'de, T: Deserialize<'de>> Deserialize<'de> for FieldPatch<T> {\n    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>\n    where\n        D: Deserializer<'de>,\n    {\n        Option::deserialize(deserializer).map(|opt| match opt {\n            None => FieldPatch::Remove,\n            Some(val) => FieldPatch::Upsert(val),\n        })\n    }\n}\n\nimpl<T: Serialize> Serialize for FieldPatch<T> {\n    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>\n    where\n        S: Serializer,\n    {\n        match self {\n            FieldPatch::Upsert(val) => serializer.serialize_some(val),\n            FieldPatch::Remove => serializer.serialize_none(),\n            FieldPatch::Noop => unreachable!(),\n        }\n    }\n}\n\n#[derive(Serialize, Deserialize, Debug, Default, Clone, Eq, PartialEq)]\n#[serde(default)]\npub struct TenantConfigPatch {\n    #[serde(skip_serializing_if = \"FieldPatch::is_noop\")]\n    pub checkpoint_distance: FieldPatch<u64>,\n    #[serde(skip_serializing_if = \"FieldPatch::is_noop\")]\n    pub checkpoint_timeout: FieldPatch<String>,\n    #[serde(skip_serializing_if = \"FieldPatch::is_noop\")]\n    pub compaction_target_size: FieldPatch<u64>,\n    #[serde(skip_serializing_if = \"FieldPatch::is_noop\")]\n    pub compaction_period: FieldPatch<String>,\n    #[serde(skip_serializing_if = \"FieldPatch::is_noop\")]\n    pub compaction_threshold: FieldPatch<usize>,\n    #[serde(skip_serializing_if = \"FieldPatch::is_noop\")]\n    pub compaction_upper_limit: FieldPatch<usize>,\n    // defer parsing compaction_algorithm, like eviction_policy\n    #[serde(skip_serializing_if = \"FieldPatch::is_noop\")]\n    pub compaction_algorithm: FieldPatch<CompactionAlgorithmSettings>,\n    #[serde(skip_serializing_if = \"FieldPatch::is_noop\")]\n    pub compaction_shard_ancestor: FieldPatch<bool>,\n    #[serde(skip_serializing_if = \"FieldPatch::is_noop\")]\n    pub compaction_l0_first: FieldPatch<bool>,\n    #[serde(skip_serializing_if = \"FieldPatch::is_noop\")]\n    pub compaction_l0_semaphore: FieldPatch<bool>,\n    #[serde(skip_serializing_if = \"FieldPatch::is_noop\")]\n    pub l0_flush_delay_threshold: FieldPatch<usize>,\n    #[serde(skip_serializing_if = \"FieldPatch::is_noop\")]\n    pub l0_flush_stall_threshold: FieldPatch<usize>,\n    #[serde(skip_serializing_if = \"FieldPatch::is_noop\")]\n    pub gc_horizon: FieldPatch<u64>,\n    #[serde(skip_serializing_if = \"FieldPatch::is_noop\")]\n    pub gc_period: FieldPatch<String>,\n    #[serde(skip_serializing_if = \"FieldPatch::is_noop\")]\n    pub image_creation_threshold: FieldPatch<usize>,\n    // HADRON\n    #[serde(skip_serializing_if = \"FieldPatch::is_noop\")]\n    pub image_layer_force_creation_period: FieldPatch<String>,\n    #[serde(skip_serializing_if = \"FieldPatch::is_noop\")]\n    pub pitr_interval: FieldPatch<String>,\n    #[serde(skip_serializing_if = \"FieldPatch::is_noop\")]\n    pub walreceiver_connect_timeout: FieldPatch<String>,\n    #[serde(skip_serializing_if = \"FieldPatch::is_noop\")]\n    pub lagging_wal_timeout: FieldPatch<String>,\n    #[serde(skip_serializing_if = \"FieldPatch::is_noop\")]\n    pub max_lsn_wal_lag: FieldPatch<NonZeroU64>,\n    #[serde(skip_serializing_if = \"FieldPatch::is_noop\")]\n    pub eviction_policy: FieldPatch<EvictionPolicy>,\n    #[serde(skip_serializing_if = \"FieldPatch::is_noop\")]\n    pub min_resident_size_override: FieldPatch<u64>,\n    #[serde(skip_serializing_if = \"FieldPatch::is_noop\")]\n    pub evictions_low_residence_duration_metric_threshold: FieldPatch<String>,\n    #[serde(skip_serializing_if = \"FieldPatch::is_noop\")]\n    pub heatmap_period: FieldPatch<String>,\n    #[serde(skip_serializing_if = \"FieldPatch::is_noop\")]\n    pub lazy_slru_download: FieldPatch<bool>,\n    #[serde(skip_serializing_if = \"FieldPatch::is_noop\")]\n    pub timeline_get_throttle: FieldPatch<ThrottleConfig>,\n    #[serde(skip_serializing_if = \"FieldPatch::is_noop\")]\n    pub image_layer_creation_check_threshold: FieldPatch<u8>,\n    #[serde(skip_serializing_if = \"FieldPatch::is_noop\")]\n    pub image_creation_preempt_threshold: FieldPatch<usize>,\n    #[serde(skip_serializing_if = \"FieldPatch::is_noop\")]\n    pub lsn_lease_length: FieldPatch<String>,\n    #[serde(skip_serializing_if = \"FieldPatch::is_noop\")]\n    pub lsn_lease_length_for_ts: FieldPatch<String>,\n    #[serde(skip_serializing_if = \"FieldPatch::is_noop\")]\n    pub timeline_offloading: FieldPatch<bool>,\n    #[serde(skip_serializing_if = \"FieldPatch::is_noop\")]\n    pub rel_size_v2_enabled: FieldPatch<bool>,\n    #[serde(skip_serializing_if = \"FieldPatch::is_noop\")]\n    pub gc_compaction_enabled: FieldPatch<bool>,\n    #[serde(skip_serializing_if = \"FieldPatch::is_noop\")]\n    pub gc_compaction_verification: FieldPatch<bool>,\n    #[serde(skip_serializing_if = \"FieldPatch::is_noop\")]\n    pub gc_compaction_initial_threshold_kb: FieldPatch<u64>,\n    #[serde(skip_serializing_if = \"FieldPatch::is_noop\")]\n    pub gc_compaction_ratio_percent: FieldPatch<u64>,\n    #[serde(skip_serializing_if = \"FieldPatch::is_noop\")]\n    pub sampling_ratio: FieldPatch<Option<Ratio>>,\n    #[serde(skip_serializing_if = \"FieldPatch::is_noop\")]\n    pub relsize_snapshot_cache_capacity: FieldPatch<usize>,\n    #[serde(skip_serializing_if = \"FieldPatch::is_noop\")]\n    pub basebackup_cache_enabled: FieldPatch<bool>,\n}\n\n/// Like [`crate::config::TenantConfigToml`], but preserves the information\n/// about which parameters are set and which are not.\n///\n/// Used in many places, including durably stored ones.\n#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]\n#[serde(default)] // this maps omitted fields in deserialization to None\npub struct TenantConfig {\n    #[serde(skip_serializing_if = \"Option::is_none\")]\n    pub checkpoint_distance: Option<u64>,\n\n    #[serde(skip_serializing_if = \"Option::is_none\")]\n    #[serde(with = \"humantime_serde\")]\n    pub checkpoint_timeout: Option<Duration>,\n\n    #[serde(skip_serializing_if = \"Option::is_none\")]\n    pub compaction_target_size: Option<u64>,\n\n    #[serde(skip_serializing_if = \"Option::is_none\")]\n    #[serde(with = \"humantime_serde\")]\n    pub compaction_period: Option<Duration>,\n\n    #[serde(skip_serializing_if = \"Option::is_none\")]\n    pub compaction_threshold: Option<usize>,\n\n    #[serde(skip_serializing_if = \"Option::is_none\")]\n    pub compaction_upper_limit: Option<usize>,\n\n    #[serde(skip_serializing_if = \"Option::is_none\")]\n    pub compaction_algorithm: Option<CompactionAlgorithmSettings>,\n\n    #[serde(skip_serializing_if = \"Option::is_none\")]\n    pub compaction_shard_ancestor: Option<bool>,\n\n    #[serde(skip_serializing_if = \"Option::is_none\")]\n    pub compaction_l0_first: Option<bool>,\n\n    #[serde(skip_serializing_if = \"Option::is_none\")]\n    pub compaction_l0_semaphore: Option<bool>,\n\n    #[serde(skip_serializing_if = \"Option::is_none\")]\n    pub l0_flush_delay_threshold: Option<usize>,\n\n    #[serde(skip_serializing_if = \"Option::is_none\")]\n    pub l0_flush_stall_threshold: Option<usize>,\n\n    #[serde(skip_serializing_if = \"Option::is_none\")]\n    pub gc_horizon: Option<u64>,\n\n    #[serde(skip_serializing_if = \"Option::is_none\")]\n    #[serde(with = \"humantime_serde\")]\n    pub gc_period: Option<Duration>,\n\n    #[serde(skip_serializing_if = \"Option::is_none\")]\n    pub image_creation_threshold: Option<usize>,\n\n    // HADRON\n    #[serde(skip_serializing_if = \"Option::is_none\")]\n    #[serde(with = \"humantime_serde\")]\n    pub image_layer_force_creation_period: Option<Duration>,\n\n    #[serde(skip_serializing_if = \"Option::is_none\")]\n    #[serde(with = \"humantime_serde\")]\n    pub pitr_interval: Option<Duration>,\n\n    #[serde(skip_serializing_if = \"Option::is_none\")]\n    #[serde(with = \"humantime_serde\")]\n    pub walreceiver_connect_timeout: Option<Duration>,\n\n    #[serde(skip_serializing_if = \"Option::is_none\")]\n    #[serde(with = \"humantime_serde\")]\n    pub lagging_wal_timeout: Option<Duration>,\n\n    #[serde(skip_serializing_if = \"Option::is_none\")]\n    pub max_lsn_wal_lag: Option<NonZeroU64>,\n\n    #[serde(skip_serializing_if = \"Option::is_none\")]\n    pub eviction_policy: Option<EvictionPolicy>,\n\n    #[serde(skip_serializing_if = \"Option::is_none\")]\n    pub min_resident_size_override: Option<u64>,\n\n    #[serde(skip_serializing_if = \"Option::is_none\")]\n    #[serde(with = \"humantime_serde\")]\n    pub evictions_low_residence_duration_metric_threshold: Option<Duration>,\n\n    #[serde(skip_serializing_if = \"Option::is_none\")]\n    #[serde(with = \"humantime_serde\")]\n    pub heatmap_period: Option<Duration>,\n\n    #[serde(skip_serializing_if = \"Option::is_none\")]\n    pub lazy_slru_download: Option<bool>,\n\n    #[serde(skip_serializing_if = \"Option::is_none\")]\n    pub timeline_get_throttle: Option<ThrottleConfig>,\n\n    #[serde(skip_serializing_if = \"Option::is_none\")]\n    pub image_layer_creation_check_threshold: Option<u8>,\n\n    #[serde(skip_serializing_if = \"Option::is_none\")]\n    pub image_creation_preempt_threshold: Option<usize>,\n\n    #[serde(skip_serializing_if = \"Option::is_none\")]\n    #[serde(with = \"humantime_serde\")]\n    pub lsn_lease_length: Option<Duration>,\n\n    #[serde(skip_serializing_if = \"Option::is_none\")]\n    #[serde(with = \"humantime_serde\")]\n    pub lsn_lease_length_for_ts: Option<Duration>,\n\n    #[serde(skip_serializing_if = \"Option::is_none\")]\n    pub timeline_offloading: Option<bool>,\n\n    #[serde(skip_serializing_if = \"Option::is_none\")]\n    pub rel_size_v2_enabled: Option<bool>,\n\n    #[serde(skip_serializing_if = \"Option::is_none\")]\n    pub gc_compaction_enabled: Option<bool>,\n\n    #[serde(skip_serializing_if = \"Option::is_none\")]\n    pub gc_compaction_verification: Option<bool>,\n\n    #[serde(skip_serializing_if = \"Option::is_none\")]\n    pub gc_compaction_initial_threshold_kb: Option<u64>,\n\n    #[serde(skip_serializing_if = \"Option::is_none\")]\n    pub gc_compaction_ratio_percent: Option<u64>,\n\n    #[serde(skip_serializing_if = \"Option::is_none\")]\n    pub sampling_ratio: Option<Option<Ratio>>,\n\n    #[serde(skip_serializing_if = \"Option::is_none\")]\n    pub relsize_snapshot_cache_capacity: Option<usize>,\n\n    #[serde(skip_serializing_if = \"Option::is_none\")]\n    pub basebackup_cache_enabled: Option<bool>,\n}\n\nimpl TenantConfig {\n    pub fn apply_patch(\n        self,\n        patch: TenantConfigPatch,\n    ) -> Result<TenantConfig, humantime::DurationError> {\n        let Self {\n            mut checkpoint_distance,\n            mut checkpoint_timeout,\n            mut compaction_target_size,\n            mut compaction_period,\n            mut compaction_threshold,\n            mut compaction_upper_limit,\n            mut compaction_algorithm,\n            mut compaction_shard_ancestor,\n            mut compaction_l0_first,\n            mut compaction_l0_semaphore,\n            mut l0_flush_delay_threshold,\n            mut l0_flush_stall_threshold,\n            mut gc_horizon,\n            mut gc_period,\n            mut image_creation_threshold,\n            mut image_layer_force_creation_period,\n            mut pitr_interval,\n            mut walreceiver_connect_timeout,\n            mut lagging_wal_timeout,\n            mut max_lsn_wal_lag,\n            mut eviction_policy,\n            mut min_resident_size_override,\n            mut evictions_low_residence_duration_metric_threshold,\n            mut heatmap_period,\n            mut lazy_slru_download,\n            mut timeline_get_throttle,\n            mut image_layer_creation_check_threshold,\n            mut image_creation_preempt_threshold,\n            mut lsn_lease_length,\n            mut lsn_lease_length_for_ts,\n            mut timeline_offloading,\n            mut rel_size_v2_enabled,\n            mut gc_compaction_enabled,\n            mut gc_compaction_verification,\n            mut gc_compaction_initial_threshold_kb,\n            mut gc_compaction_ratio_percent,\n            mut sampling_ratio,\n            mut relsize_snapshot_cache_capacity,\n            mut basebackup_cache_enabled,\n        } = self;\n\n        patch.checkpoint_distance.apply(&mut checkpoint_distance);\n        patch\n            .checkpoint_timeout\n            .map(|v| humantime::parse_duration(&v))?\n            .apply(&mut checkpoint_timeout);\n        patch\n            .compaction_target_size\n            .apply(&mut compaction_target_size);\n        patch\n            .compaction_period\n            .map(|v| humantime::parse_duration(&v))?\n            .apply(&mut compaction_period);\n        patch.compaction_threshold.apply(&mut compaction_threshold);\n        patch\n            .compaction_upper_limit\n            .apply(&mut compaction_upper_limit);\n        patch.compaction_algorithm.apply(&mut compaction_algorithm);\n        patch\n            .compaction_shard_ancestor\n            .apply(&mut compaction_shard_ancestor);\n        patch.compaction_l0_first.apply(&mut compaction_l0_first);\n        patch\n            .compaction_l0_semaphore\n            .apply(&mut compaction_l0_semaphore);\n        patch\n            .l0_flush_delay_threshold\n            .apply(&mut l0_flush_delay_threshold);\n        patch\n            .l0_flush_stall_threshold\n            .apply(&mut l0_flush_stall_threshold);\n        patch.gc_horizon.apply(&mut gc_horizon);\n        patch\n            .gc_period\n            .map(|v| humantime::parse_duration(&v))?\n            .apply(&mut gc_period);\n        patch\n            .image_creation_threshold\n            .apply(&mut image_creation_threshold);\n        // HADRON\n        patch\n            .image_layer_force_creation_period\n            .map(|v| humantime::parse_duration(&v))?\n            .apply(&mut image_layer_force_creation_period);\n        patch\n            .pitr_interval\n            .map(|v| humantime::parse_duration(&v))?\n            .apply(&mut pitr_interval);\n        patch\n            .walreceiver_connect_timeout\n            .map(|v| humantime::parse_duration(&v))?\n            .apply(&mut walreceiver_connect_timeout);\n        patch\n            .lagging_wal_timeout\n            .map(|v| humantime::parse_duration(&v))?\n            .apply(&mut lagging_wal_timeout);\n        patch.max_lsn_wal_lag.apply(&mut max_lsn_wal_lag);\n        patch.eviction_policy.apply(&mut eviction_policy);\n        patch\n            .min_resident_size_override\n            .apply(&mut min_resident_size_override);\n        patch\n            .evictions_low_residence_duration_metric_threshold\n            .map(|v| humantime::parse_duration(&v))?\n            .apply(&mut evictions_low_residence_duration_metric_threshold);\n        patch\n            .heatmap_period\n            .map(|v| humantime::parse_duration(&v))?\n            .apply(&mut heatmap_period);\n        patch.lazy_slru_download.apply(&mut lazy_slru_download);\n        patch\n            .timeline_get_throttle\n            .apply(&mut timeline_get_throttle);\n        patch\n            .image_layer_creation_check_threshold\n            .apply(&mut image_layer_creation_check_threshold);\n        patch\n            .image_creation_preempt_threshold\n            .apply(&mut image_creation_preempt_threshold);\n        patch\n            .lsn_lease_length\n            .map(|v| humantime::parse_duration(&v))?\n            .apply(&mut lsn_lease_length);\n        patch\n            .lsn_lease_length_for_ts\n            .map(|v| humantime::parse_duration(&v))?\n            .apply(&mut lsn_lease_length_for_ts);\n        patch.timeline_offloading.apply(&mut timeline_offloading);\n        patch.rel_size_v2_enabled.apply(&mut rel_size_v2_enabled);\n        patch\n            .gc_compaction_enabled\n            .apply(&mut gc_compaction_enabled);\n        patch\n            .gc_compaction_verification\n            .apply(&mut gc_compaction_verification);\n        patch\n            .gc_compaction_initial_threshold_kb\n            .apply(&mut gc_compaction_initial_threshold_kb);\n        patch\n            .gc_compaction_ratio_percent\n            .apply(&mut gc_compaction_ratio_percent);\n        patch.sampling_ratio.apply(&mut sampling_ratio);\n        patch\n            .relsize_snapshot_cache_capacity\n            .apply(&mut relsize_snapshot_cache_capacity);\n        patch\n            .basebackup_cache_enabled\n            .apply(&mut basebackup_cache_enabled);\n\n        Ok(Self {\n            checkpoint_distance,\n            checkpoint_timeout,\n            compaction_target_size,\n            compaction_period,\n            compaction_threshold,\n            compaction_upper_limit,\n            compaction_algorithm,\n            compaction_shard_ancestor,\n            compaction_l0_first,\n            compaction_l0_semaphore,\n            l0_flush_delay_threshold,\n            l0_flush_stall_threshold,\n            gc_horizon,\n            gc_period,\n            image_creation_threshold,\n            image_layer_force_creation_period,\n            pitr_interval,\n            walreceiver_connect_timeout,\n            lagging_wal_timeout,\n            max_lsn_wal_lag,\n            eviction_policy,\n            min_resident_size_override,\n            evictions_low_residence_duration_metric_threshold,\n            heatmap_period,\n            lazy_slru_download,\n            timeline_get_throttle,\n            image_layer_creation_check_threshold,\n            image_creation_preempt_threshold,\n            lsn_lease_length,\n            lsn_lease_length_for_ts,\n            timeline_offloading,\n            rel_size_v2_enabled,\n            gc_compaction_enabled,\n            gc_compaction_verification,\n            gc_compaction_initial_threshold_kb,\n            gc_compaction_ratio_percent,\n            sampling_ratio,\n            relsize_snapshot_cache_capacity,\n            basebackup_cache_enabled,\n        })\n    }\n\n    pub fn merge(\n        &self,\n        global_conf: crate::config::TenantConfigToml,\n    ) -> crate::config::TenantConfigToml {\n        crate::config::TenantConfigToml {\n            checkpoint_distance: self\n                .checkpoint_distance\n                .unwrap_or(global_conf.checkpoint_distance),\n            checkpoint_timeout: self\n                .checkpoint_timeout\n                .unwrap_or(global_conf.checkpoint_timeout),\n            compaction_target_size: self\n                .compaction_target_size\n                .unwrap_or(global_conf.compaction_target_size),\n            compaction_period: self\n                .compaction_period\n                .unwrap_or(global_conf.compaction_period),\n            compaction_threshold: self\n                .compaction_threshold\n                .unwrap_or(global_conf.compaction_threshold),\n            compaction_upper_limit: self\n                .compaction_upper_limit\n                .unwrap_or(global_conf.compaction_upper_limit),\n            compaction_algorithm: self\n                .compaction_algorithm\n                .as_ref()\n                .unwrap_or(&global_conf.compaction_algorithm)\n                .clone(),\n            compaction_shard_ancestor: self\n                .compaction_shard_ancestor\n                .unwrap_or(global_conf.compaction_shard_ancestor),\n            compaction_l0_first: self\n                .compaction_l0_first\n                .unwrap_or(global_conf.compaction_l0_first),\n            compaction_l0_semaphore: self\n                .compaction_l0_semaphore\n                .unwrap_or(global_conf.compaction_l0_semaphore),\n            l0_flush_delay_threshold: self\n                .l0_flush_delay_threshold\n                .or(global_conf.l0_flush_delay_threshold),\n            l0_flush_stall_threshold: self\n                .l0_flush_stall_threshold\n                .or(global_conf.l0_flush_stall_threshold),\n            gc_horizon: self.gc_horizon.unwrap_or(global_conf.gc_horizon),\n            gc_period: self.gc_period.unwrap_or(global_conf.gc_period),\n            image_creation_threshold: self\n                .image_creation_threshold\n                .unwrap_or(global_conf.image_creation_threshold),\n            image_layer_force_creation_period: self\n                .image_layer_force_creation_period\n                .or(global_conf.image_layer_force_creation_period),\n            pitr_interval: self.pitr_interval.unwrap_or(global_conf.pitr_interval),\n            walreceiver_connect_timeout: self\n                .walreceiver_connect_timeout\n                .unwrap_or(global_conf.walreceiver_connect_timeout),\n            lagging_wal_timeout: self\n                .lagging_wal_timeout\n                .unwrap_or(global_conf.lagging_wal_timeout),\n            max_lsn_wal_lag: self.max_lsn_wal_lag.unwrap_or(global_conf.max_lsn_wal_lag),\n            eviction_policy: self.eviction_policy.unwrap_or(global_conf.eviction_policy),\n            min_resident_size_override: self\n                .min_resident_size_override\n                .or(global_conf.min_resident_size_override),\n            evictions_low_residence_duration_metric_threshold: self\n                .evictions_low_residence_duration_metric_threshold\n                .unwrap_or(global_conf.evictions_low_residence_duration_metric_threshold),\n            heatmap_period: self.heatmap_period.unwrap_or(global_conf.heatmap_period),\n            lazy_slru_download: self\n                .lazy_slru_download\n                .unwrap_or(global_conf.lazy_slru_download),\n            timeline_get_throttle: self\n                .timeline_get_throttle\n                .clone()\n                .unwrap_or(global_conf.timeline_get_throttle),\n            image_layer_creation_check_threshold: self\n                .image_layer_creation_check_threshold\n                .unwrap_or(global_conf.image_layer_creation_check_threshold),\n            image_creation_preempt_threshold: self\n                .image_creation_preempt_threshold\n                .unwrap_or(global_conf.image_creation_preempt_threshold),\n            lsn_lease_length: self\n                .lsn_lease_length\n                .unwrap_or(global_conf.lsn_lease_length),\n            lsn_lease_length_for_ts: self\n                .lsn_lease_length_for_ts\n                .unwrap_or(global_conf.lsn_lease_length_for_ts),\n            timeline_offloading: self\n                .timeline_offloading\n                .unwrap_or(global_conf.timeline_offloading),\n            rel_size_v2_enabled: self\n                .rel_size_v2_enabled\n                .unwrap_or(global_conf.rel_size_v2_enabled),\n            gc_compaction_enabled: self\n                .gc_compaction_enabled\n                .unwrap_or(global_conf.gc_compaction_enabled),\n            gc_compaction_verification: self\n                .gc_compaction_verification\n                .unwrap_or(global_conf.gc_compaction_verification),\n            gc_compaction_initial_threshold_kb: self\n                .gc_compaction_initial_threshold_kb\n                .unwrap_or(global_conf.gc_compaction_initial_threshold_kb),\n            gc_compaction_ratio_percent: self\n                .gc_compaction_ratio_percent\n                .unwrap_or(global_conf.gc_compaction_ratio_percent),\n            sampling_ratio: self.sampling_ratio.unwrap_or(global_conf.sampling_ratio),\n            relsize_snapshot_cache_capacity: self\n                .relsize_snapshot_cache_capacity\n                .unwrap_or(global_conf.relsize_snapshot_cache_capacity),\n            basebackup_cache_enabled: self\n                .basebackup_cache_enabled\n                .unwrap_or(global_conf.basebackup_cache_enabled),\n        }\n    }\n}\n\n/// The policy for the aux file storage.\n///\n/// It can be switched through `switch_aux_file_policy` tenant config.\n/// When the first aux file written, the policy will be persisted in the\n/// `index_part.json` file and has a limited migration path.\n///\n/// Currently, we only allow the following migration path:\n///\n/// Unset -> V1\n///       -> V2\n///       -> CrossValidation -> V2\n#[derive(\n    Eq,\n    PartialEq,\n    Debug,\n    Copy,\n    Clone,\n    strum_macros::EnumString,\n    strum_macros::Display,\n    serde_with::DeserializeFromStr,\n    serde_with::SerializeDisplay,\n)]\n#[strum(serialize_all = \"kebab-case\")]\npub enum AuxFilePolicy {\n    /// V1 aux file policy: store everything in AUX_FILE_KEY\n    #[strum(ascii_case_insensitive)]\n    V1,\n    /// V2 aux file policy: store in the AUX_FILE keyspace\n    #[strum(ascii_case_insensitive)]\n    V2,\n    /// Cross validation runs both formats on the write path and does validation\n    /// on the read path.\n    #[strum(ascii_case_insensitive)]\n    CrossValidation,\n}\n\n#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]\n#[serde(tag = \"kind\")]\npub enum EvictionPolicy {\n    NoEviction,\n    LayerAccessThreshold(EvictionPolicyLayerAccessThreshold),\n    OnlyImitiate(EvictionPolicyLayerAccessThreshold),\n}\n\nimpl EvictionPolicy {\n    pub fn discriminant_str(&self) -> &'static str {\n        match self {\n            EvictionPolicy::NoEviction => \"NoEviction\",\n            EvictionPolicy::LayerAccessThreshold(_) => \"LayerAccessThreshold\",\n            EvictionPolicy::OnlyImitiate(_) => \"OnlyImitiate\",\n        }\n    }\n}\n\n#[derive(\n    Eq,\n    PartialEq,\n    Debug,\n    Copy,\n    Clone,\n    strum_macros::EnumString,\n    strum_macros::Display,\n    serde_with::DeserializeFromStr,\n    serde_with::SerializeDisplay,\n)]\n#[strum(serialize_all = \"kebab-case\")]\npub enum CompactionAlgorithm {\n    Legacy,\n    Tiered,\n}\n\n#[derive(\n    Debug, Clone, Copy, PartialEq, Eq, serde_with::DeserializeFromStr, serde_with::SerializeDisplay,\n)]\npub enum ImageCompressionAlgorithm {\n    // Disabled for writes, support decompressing during read path\n    Disabled,\n    /// Zstandard compression. Level 0 means and None mean the same (default level). Levels can be negative as well.\n    /// For details, see the [manual](http://facebook.github.io/zstd/zstd_manual.html).\n    Zstd {\n        level: Option<i8>,\n    },\n}\n\nimpl FromStr for ImageCompressionAlgorithm {\n    type Err = anyhow::Error;\n    fn from_str(s: &str) -> Result<Self, Self::Err> {\n        let mut components = s.split(['(', ')']);\n        let first = components\n            .next()\n            .ok_or_else(|| anyhow::anyhow!(\"empty string\"))?;\n        match first {\n            \"disabled\" => Ok(ImageCompressionAlgorithm::Disabled),\n            \"zstd\" => {\n                let level = if let Some(v) = components.next() {\n                    let v: i8 = v.parse()?;\n                    Some(v)\n                } else {\n                    None\n                };\n\n                Ok(ImageCompressionAlgorithm::Zstd { level })\n            }\n            _ => anyhow::bail!(\"invalid specifier '{first}'\"),\n        }\n    }\n}\n\nimpl Display for ImageCompressionAlgorithm {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        match self {\n            ImageCompressionAlgorithm::Disabled => write!(f, \"disabled\"),\n            ImageCompressionAlgorithm::Zstd { level } => {\n                if let Some(level) = level {\n                    write!(f, \"zstd({level})\")\n                } else {\n                    write!(f, \"zstd\")\n                }\n            }\n        }\n    }\n}\n\n#[derive(Eq, PartialEq, Debug, Clone, Serialize, Deserialize)]\npub struct CompactionAlgorithmSettings {\n    pub kind: CompactionAlgorithm,\n}\n\n#[derive(Debug, PartialEq, Eq, Clone, Deserialize, Serialize)]\n#[serde(tag = \"mode\", rename_all = \"kebab-case\")]\npub enum L0FlushConfig {\n    #[serde(rename_all = \"snake_case\")]\n    Direct { max_concurrency: NonZeroUsize },\n}\n\n#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]\npub struct EvictionPolicyLayerAccessThreshold {\n    #[serde(with = \"humantime_serde\")]\n    pub period: Duration,\n    #[serde(with = \"humantime_serde\")]\n    pub threshold: Duration,\n}\n\n#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]\npub struct ThrottleConfig {\n    /// See [`ThrottleConfigTaskKinds`] for why we do the serde `rename`.\n    #[serde(rename = \"task_kinds\")]\n    pub enabled: ThrottleConfigTaskKinds,\n    pub initial: u32,\n    #[serde(with = \"humantime_serde\")]\n    pub refill_interval: Duration,\n    pub refill_amount: NonZeroU32,\n    pub max: u32,\n}\n\n/// Before <https://github.com/neondatabase/neon/pull/9962>\n/// the throttle was a per `Timeline::get`/`Timeline::get_vectored` call.\n/// The `task_kinds` field controlled which Pageserver \"Task Kind\"s\n/// were subject to the throttle.\n///\n/// After that PR, the throttle is applied at pagestream request level\n/// and the `task_kinds` field does not apply since the only task kind\n/// that us subject to the throttle is that of the page service.\n///\n/// However, we don't want to make a breaking config change right now\n/// because it means we have to migrate all the tenant configs.\n/// This will be done in a future PR.\n///\n/// In the meantime, we use emptiness / non-emptsiness of the `task_kinds`\n/// field to determine if the throttle is enabled or not.\n#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]\n#[serde(transparent)]\npub struct ThrottleConfigTaskKinds(Vec<String>);\n\nimpl ThrottleConfigTaskKinds {\n    pub fn disabled() -> Self {\n        Self(vec![])\n    }\n    pub fn is_enabled(&self) -> bool {\n        !self.0.is_empty()\n    }\n}\n\nimpl ThrottleConfig {\n    pub fn disabled() -> Self {\n        Self {\n            enabled: ThrottleConfigTaskKinds::disabled(),\n            // other values don't matter with emtpy `task_kinds`.\n            initial: 0,\n            refill_interval: Duration::from_millis(1),\n            refill_amount: NonZeroU32::new(1).unwrap(),\n            max: 1,\n        }\n    }\n    /// The requests per second allowed  by the given config.\n    pub fn steady_rps(&self) -> f64 {\n        (self.refill_amount.get() as f64) / (self.refill_interval.as_secs_f64())\n    }\n}\n\n#[cfg(test)]\nmod throttle_config_tests {\n    use super::*;\n\n    #[test]\n    fn test_disabled_is_disabled() {\n        let config = ThrottleConfig::disabled();\n        assert!(!config.enabled.is_enabled());\n    }\n    #[test]\n    fn test_enabled_backwards_compat() {\n        let input = serde_json::json!({\n            \"task_kinds\": [\"PageRequestHandler\"],\n            \"initial\": 40000,\n            \"refill_interval\": \"50ms\",\n            \"refill_amount\": 1000,\n            \"max\": 40000,\n            \"fair\": true\n        });\n        let config: ThrottleConfig = serde_json::from_value(input).unwrap();\n        assert!(config.enabled.is_enabled());\n    }\n}\n\n/// A flattened analog of a `pagesever::tenant::LocationMode`, which\n/// lists out all possible states (and the virtual \"Detached\" state)\n/// in a flat form rather than using rust-style enums.\n#[derive(Serialize, Deserialize, Debug, Clone, Copy, Eq, PartialEq)]\npub enum LocationConfigMode {\n    AttachedSingle,\n    AttachedMulti,\n    AttachedStale,\n    Secondary,\n    Detached,\n}\n\n#[derive(Serialize, Deserialize, Debug, Clone, Eq, PartialEq)]\npub struct LocationConfigSecondary {\n    pub warm: bool,\n}\n\n/// An alternative representation of `pageserver::tenant::LocationConf`,\n/// for use in external-facing APIs.\n#[derive(Serialize, Deserialize, Debug, Clone, Eq, PartialEq)]\npub struct LocationConfig {\n    pub mode: LocationConfigMode,\n    /// If attaching, in what generation?\n    #[serde(default)]\n    pub generation: Option<u32>,\n\n    // If requesting mode `Secondary`, configuration for that.\n    #[serde(default)]\n    pub secondary_conf: Option<LocationConfigSecondary>,\n\n    // Shard parameters: if shard_count is nonzero, then other shard_* fields\n    // must be set accurately.\n    #[serde(default)]\n    pub shard_number: u8,\n    #[serde(default)]\n    pub shard_count: u8,\n    #[serde(default)]\n    pub shard_stripe_size: u32,\n\n    // This configuration only affects attached mode, but should be provided irrespective\n    // of the mode, as a secondary location might transition on startup if the response\n    // to the `/re-attach` control plane API requests it.\n    pub tenant_conf: TenantConfig,\n}\n\n#[derive(Serialize, Deserialize)]\npub struct LocationConfigListResponse {\n    pub tenant_shards: Vec<(TenantShardId, Option<LocationConfig>)>,\n}\n\n#[derive(Serialize)]\npub struct StatusResponse {\n    pub id: NodeId,\n}\n\n#[derive(Serialize, Deserialize, Debug)]\n#[serde(deny_unknown_fields)]\npub struct TenantLocationConfigRequest {\n    #[serde(flatten)]\n    pub config: LocationConfig, // as we have a flattened field, we should reject all unknown fields in it\n}\n\n#[derive(Serialize, Deserialize, Debug)]\n#[serde(deny_unknown_fields)]\npub struct TenantTimeTravelRequest {\n    pub shard_counts: Vec<ShardCount>,\n}\n\n#[derive(Serialize, Deserialize, Debug)]\n#[serde(deny_unknown_fields)]\npub struct TenantShardLocation {\n    pub shard_id: TenantShardId,\n    pub node_id: NodeId,\n}\n\n#[derive(Serialize, Deserialize, Debug)]\n#[serde(deny_unknown_fields)]\npub struct TenantLocationConfigResponse {\n    pub shards: Vec<TenantShardLocation>,\n    // If the shards' ShardCount count is >1, stripe_size will be set.\n    pub stripe_size: Option<ShardStripeSize>,\n}\n\n#[derive(Serialize, Deserialize, Debug)]\n#[serde(deny_unknown_fields)]\npub struct TenantConfigRequest {\n    pub tenant_id: TenantId,\n    #[serde(flatten)]\n    pub config: TenantConfig, // as we have a flattened field, we should reject all unknown fields in it\n}\n\nimpl std::ops::Deref for TenantConfigRequest {\n    type Target = TenantConfig;\n\n    fn deref(&self) -> &Self::Target {\n        &self.config\n    }\n}\n\nimpl TenantConfigRequest {\n    pub fn new(tenant_id: TenantId) -> TenantConfigRequest {\n        let config = TenantConfig::default();\n        TenantConfigRequest { tenant_id, config }\n    }\n}\n\n#[derive(Serialize, Deserialize, Debug)]\n#[serde(deny_unknown_fields)]\npub struct TenantConfigPatchRequest {\n    pub tenant_id: TenantId,\n    #[serde(flatten)]\n    pub config: TenantConfigPatch, // as we have a flattened field, we should reject all unknown fields in it\n}\n\n#[derive(Serialize, Deserialize, Debug)]\npub struct TenantWaitLsnRequest {\n    #[serde(flatten)]\n    pub timelines: HashMap<TimelineId, Lsn>,\n    pub timeout: Duration,\n}\n\n/// See [`TenantState::attachment_status`] and the OpenAPI docs for context.\n#[derive(Serialize, Deserialize, Clone)]\n#[serde(tag = \"slug\", content = \"data\", rename_all = \"snake_case\")]\npub enum TenantAttachmentStatus {\n    Maybe,\n    Attached,\n    Failed { reason: String },\n}\n\n#[derive(Serialize, Deserialize, Clone)]\npub struct TenantInfo {\n    pub id: TenantShardId,\n    // NB: intentionally not part of OpenAPI, we don't want to commit to a specific set of TenantState's\n    pub state: TenantState,\n    /// Sum of the size of all layer files.\n    /// If a layer is present in both local FS and S3, it counts only once.\n    pub current_physical_size: Option<u64>, // physical size is only included in `tenant_status` endpoint\n    pub attachment_status: TenantAttachmentStatus,\n    pub generation: u32,\n\n    /// Opaque explanation if gc is being blocked.\n    ///\n    /// Only looked up for the individual tenant detail, not the listing.\n    #[serde(skip_serializing_if = \"Option::is_none\")]\n    pub gc_blocking: Option<String>,\n}\n\n#[derive(Serialize, Deserialize, Clone)]\npub struct TenantDetails {\n    #[serde(flatten)]\n    pub tenant_info: TenantInfo,\n\n    pub walredo: Option<WalRedoManagerStatus>,\n\n    pub timelines: Vec<TimelineId>,\n}\n\n#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Copy, Debug)]\npub enum TimelineArchivalState {\n    Archived,\n    Unarchived,\n}\n\n#[derive(Serialize, Deserialize, PartialEq, Eq, Clone)]\npub enum TimelineVisibilityState {\n    Visible,\n    Invisible,\n}\n\n#[derive(Serialize, Deserialize, PartialEq, Eq, Clone)]\npub struct TimelineArchivalConfigRequest {\n    pub state: TimelineArchivalState,\n}\n\n#[derive(Serialize, Deserialize, PartialEq, Eq, Clone)]\npub struct TimelinePatchIndexPartRequest {\n    pub rel_size_migration: Option<RelSizeMigration>,\n    pub rel_size_migrated_at: Option<Lsn>,\n    pub gc_compaction_last_completed_lsn: Option<Lsn>,\n    pub applied_gc_cutoff_lsn: Option<Lsn>,\n    #[serde(default)]\n    pub force_index_update: bool,\n}\n\n#[derive(Debug, Serialize, Deserialize, Clone)]\npub struct TimelinesInfoAndOffloaded {\n    pub timelines: Vec<TimelineInfo>,\n    pub offloaded: Vec<OffloadedTimelineInfo>,\n}\n\n/// Analog of [`TimelineInfo`] for offloaded timelines.\n#[derive(Debug, Serialize, Deserialize, Clone)]\npub struct OffloadedTimelineInfo {\n    pub tenant_id: TenantShardId,\n    pub timeline_id: TimelineId,\n    /// Whether the timeline has a parent it has been branched off from or not\n    pub ancestor_timeline_id: Option<TimelineId>,\n    /// Whether to retain the branch lsn at the ancestor or not\n    pub ancestor_retain_lsn: Option<Lsn>,\n    /// The time point when the timeline was archived\n    pub archived_at: chrono::DateTime<chrono::Utc>,\n}\n\n#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]\n#[serde(rename_all = \"camelCase\")]\npub enum RelSizeMigration {\n    /// The tenant is using the old rel_size format.\n    /// Note that this enum is persisted as `Option<RelSizeMigration>` in the index part, so\n    /// `None` is the same as `Some(RelSizeMigration::Legacy)`.\n    Legacy,\n    /// The tenant is migrating to the new rel_size format. Both old and new rel_size format are\n    /// persisted in the storage. The read path will read both formats and validate them.\n    Migrating,\n    /// The tenant has migrated to the new rel_size format. Only the new rel_size format is persisted\n    /// in the storage, and the read path will not read the old format.\n    Migrated,\n}\n\n/// This represents the output of the \"timeline_detail\" and \"timeline_list\" API calls.\n#[derive(Debug, Serialize, Deserialize, Clone)]\npub struct TimelineInfo {\n    pub tenant_id: TenantShardId,\n    pub timeline_id: TimelineId,\n\n    pub ancestor_timeline_id: Option<TimelineId>,\n    pub ancestor_lsn: Option<Lsn>,\n    pub last_record_lsn: Lsn,\n    pub prev_record_lsn: Option<Lsn>,\n\n    /// The LSN up to which GC has advanced: older data may still exist but it is not available for clients.\n    /// This LSN is not suitable for deciding where to create branches etc: use [`TimelineInfo::min_readable_lsn`] instead,\n    /// as it is easier to reason about.\n    #[serde(default)]\n    pub applied_gc_cutoff_lsn: Lsn,\n\n    /// The upper bound of data which is either already GC'ed, or elegible to be GC'ed at any time based on PITR interval.\n    /// This LSN represents the \"end of history\" for this timeline, and callers should use it to figure out the oldest\n    /// LSN at which it is legal to create a branch or ephemeral endpoint.\n    ///\n    /// Note that holders of valid LSN leases may be able to create branches and read pages earlier\n    /// than this LSN, but new leases may not be taken out earlier than this LSN.\n    #[serde(default)]\n    pub min_readable_lsn: Lsn,\n\n    pub disk_consistent_lsn: Lsn,\n\n    /// The LSN that we have succesfully uploaded to remote storage\n    pub remote_consistent_lsn: Lsn,\n\n    /// The LSN that we are advertizing to safekeepers\n    pub remote_consistent_lsn_visible: Lsn,\n\n    /// The LSN from the start of the root timeline (never changes)\n    pub initdb_lsn: Lsn,\n\n    pub current_logical_size: u64,\n    pub current_logical_size_is_accurate: bool,\n\n    pub directory_entries_counts: Vec<u64>,\n\n    /// Sum of the size of all layer files.\n    /// If a layer is present in both local FS and S3, it counts only once.\n    pub current_physical_size: Option<u64>, // is None when timeline is Unloaded\n    pub current_logical_size_non_incremental: Option<u64>,\n\n    /// How many bytes of WAL are within this branch's pitr_interval.  If the pitr_interval goes\n    /// beyond the branch's branch point, we only count up to the branch point.\n    pub pitr_history_size: u64,\n\n    /// Whether this branch's branch point is within its ancestor's PITR interval (i.e. any\n    /// ancestor data used by this branch would have been retained anyway).  If this is false, then\n    /// this branch may be imposing a cost on the ancestor by causing it to retain layers that it would\n    /// otherwise be able to GC.\n    pub within_ancestor_pitr: bool,\n\n    pub timeline_dir_layer_file_size_sum: Option<u64>,\n\n    pub wal_source_connstr: Option<String>,\n    pub last_received_msg_lsn: Option<Lsn>,\n    /// the timestamp (in microseconds) of the last received message\n    pub last_received_msg_ts: Option<u128>,\n    pub pg_version: PgMajorVersion,\n\n    pub state: TimelineState,\n\n    pub walreceiver_status: String,\n\n    // ALWAYS add new fields at the end of the struct with `Option` to ensure forward/backward compatibility.\n    // Backward compatibility: you will get a JSON not containing the newly-added field.\n    // Forward compatibility: a previous version of the pageserver will receive a JSON. serde::Deserialize does\n    // not deny unknown fields by default so it's safe to set the field to some value, though it won't be\n    // read.\n    /// Whether the timeline is archived.\n    pub is_archived: Option<bool>,\n\n    /// The status of the rel_size migration.\n    pub rel_size_migration: Option<RelSizeMigration>,\n    pub rel_size_migrated_at: Option<Lsn>,\n\n    /// Whether the timeline is invisible in synthetic size calculations.\n    pub is_invisible: Option<bool>,\n    // HADRON: the largest LSN below which all page updates have been included in the image layers.\n    #[serde(skip_serializing_if = \"Option::is_none\")]\n    pub image_consistent_lsn: Option<Lsn>,\n}\n\n#[derive(Debug, Clone, Serialize, Deserialize)]\npub struct LayerMapInfo {\n    pub in_memory_layers: Vec<InMemoryLayerInfo>,\n    pub historic_layers: Vec<HistoricLayerInfo>,\n}\n\n/// The residence status of a layer\n#[derive(Debug, Clone, Copy, Serialize, Deserialize)]\npub enum LayerResidenceStatus {\n    /// Residence status for a layer file that exists locally.\n    /// It may also exist on the remote, we don't care here.\n    Resident,\n    /// Residence status for a layer file that only exists on the remote.\n    Evicted,\n}\n\n#[serde_as]\n#[derive(Debug, Clone, Serialize, Deserialize)]\npub struct LayerAccessStats {\n    #[serde_as(as = \"serde_with::TimestampMilliSeconds\")]\n    pub access_time: SystemTime,\n\n    #[serde_as(as = \"serde_with::TimestampMilliSeconds\")]\n    pub residence_time: SystemTime,\n\n    pub visible: bool,\n}\n\n#[derive(Debug, Clone, Serialize, Deserialize)]\n#[serde(tag = \"kind\")]\npub enum InMemoryLayerInfo {\n    Open { lsn_start: Lsn },\n    Frozen { lsn_start: Lsn, lsn_end: Lsn },\n}\n\n#[derive(Debug, Clone, Serialize, Deserialize)]\n#[serde(tag = \"kind\")]\npub enum HistoricLayerInfo {\n    Delta {\n        layer_file_name: String,\n        layer_file_size: u64,\n\n        lsn_start: Lsn,\n        lsn_end: Lsn,\n        remote: bool,\n        access_stats: LayerAccessStats,\n\n        l0: bool,\n    },\n    Image {\n        layer_file_name: String,\n        layer_file_size: u64,\n\n        lsn_start: Lsn,\n        remote: bool,\n        access_stats: LayerAccessStats,\n    },\n}\n\nimpl HistoricLayerInfo {\n    pub fn layer_file_name(&self) -> &str {\n        match self {\n            HistoricLayerInfo::Delta {\n                layer_file_name, ..\n            } => layer_file_name,\n            HistoricLayerInfo::Image {\n                layer_file_name, ..\n            } => layer_file_name,\n        }\n    }\n    pub fn is_remote(&self) -> bool {\n        match self {\n            HistoricLayerInfo::Delta { remote, .. } => *remote,\n            HistoricLayerInfo::Image { remote, .. } => *remote,\n        }\n    }\n    pub fn set_remote(&mut self, value: bool) {\n        let field = match self {\n            HistoricLayerInfo::Delta { remote, .. } => remote,\n            HistoricLayerInfo::Image { remote, .. } => remote,\n        };\n        *field = value;\n    }\n    pub fn layer_file_size(&self) -> u64 {\n        match self {\n            HistoricLayerInfo::Delta {\n                layer_file_size, ..\n            } => *layer_file_size,\n            HistoricLayerInfo::Image {\n                layer_file_size, ..\n            } => *layer_file_size,\n        }\n    }\n}\n\n#[derive(Debug, Serialize, Deserialize)]\npub struct DownloadRemoteLayersTaskSpawnRequest {\n    pub max_concurrent_downloads: NonZeroUsize,\n}\n\n#[derive(Debug, Serialize, Deserialize)]\npub struct IngestAuxFilesRequest {\n    pub aux_files: HashMap<String, String>,\n}\n\n#[derive(Debug, Serialize, Deserialize)]\npub struct ListAuxFilesRequest {\n    pub lsn: Lsn,\n}\n\n#[derive(Debug, Serialize, Deserialize, Clone)]\npub struct DownloadRemoteLayersTaskInfo {\n    pub task_id: String,\n    pub state: DownloadRemoteLayersTaskState,\n    pub total_layer_count: u64,         // stable once `completed`\n    pub successful_download_count: u64, // stable once `completed`\n    pub failed_download_count: u64,     // stable once `completed`\n}\n\n#[derive(Debug, Serialize, Deserialize, Clone)]\npub enum DownloadRemoteLayersTaskState {\n    Running,\n    Completed,\n    ShutDown,\n}\n\n#[derive(Debug, Serialize, Deserialize)]\npub struct TimelineGcRequest {\n    pub gc_horizon: Option<u64>,\n}\n\n#[derive(Debug, Clone, Serialize, Deserialize)]\npub struct WalRedoManagerProcessStatus {\n    pub pid: u32,\n}\n\n#[derive(Debug, Clone, Serialize, Deserialize)]\npub struct WalRedoManagerStatus {\n    pub last_redo_at: Option<chrono::DateTime<chrono::Utc>>,\n    pub process: Option<WalRedoManagerProcessStatus>,\n}\n\n/// The progress of a secondary tenant.\n///\n/// It is mostly useful when doing a long running download: e.g. initiating\n/// a download job, timing out while waiting for it to run, and then inspecting this status to understand\n/// what's happening.\n#[derive(Default, Debug, Serialize, Deserialize, Clone)]\npub struct SecondaryProgress {\n    /// The remote storage LastModified time of the heatmap object we last downloaded.\n    pub heatmap_mtime: Option<serde_system_time::SystemTime>,\n\n    /// The number of layers currently on-disk\n    pub layers_downloaded: usize,\n    /// The number of layers in the most recently seen heatmap\n    pub layers_total: usize,\n\n    /// The number of layer bytes currently on-disk\n    pub bytes_downloaded: u64,\n    /// The number of layer bytes in the most recently seen heatmap\n    pub bytes_total: u64,\n}\n\n#[derive(Serialize, Deserialize, Debug)]\npub struct TenantScanRemoteStorageShard {\n    pub tenant_shard_id: TenantShardId,\n    pub generation: Option<u32>,\n    pub stripe_size: Option<ShardStripeSize>,\n}\n\n#[derive(Serialize, Deserialize, Debug, Default)]\npub struct TenantScanRemoteStorageResponse {\n    pub shards: Vec<TenantScanRemoteStorageShard>,\n}\n\n#[derive(Serialize, Deserialize, Debug, Clone)]\n#[serde(rename_all = \"snake_case\")]\npub enum TenantSorting {\n    /// Total size of layers on local disk for all timelines in a shard.\n    ResidentSize,\n    /// The logical size of the largest timeline within a _tenant_ (not shard). Only tracked on\n    /// shard 0, contains the sum across all shards.\n    MaxLogicalSize,\n    /// The logical size of the largest timeline within a _tenant_ (not shard), divided by number of\n    /// shards. Only tracked on shard 0, and estimates the per-shard logical size.\n    MaxLogicalSizePerShard,\n}\n\nimpl Default for TenantSorting {\n    fn default() -> Self {\n        Self::ResidentSize\n    }\n}\n\n#[derive(Serialize, Deserialize, Debug, Clone)]\npub struct TopTenantShardsRequest {\n    // How would you like to sort the tenants?\n    pub order_by: TenantSorting,\n\n    // How many results?\n    pub limit: usize,\n\n    // Omit tenants with more than this many shards (e.g. if this is the max number of shards\n    // that the caller would ever split to)\n    pub where_shards_lt: Option<ShardCount>,\n\n    // Omit tenants where the ordering metric is less than this (this is an optimization to\n    // let us quickly exclude numerous tiny shards)\n    pub where_gt: Option<u64>,\n}\n\n#[derive(Serialize, Deserialize, Debug, PartialEq, Eq)]\npub struct TopTenantShardItem {\n    pub id: TenantShardId,\n\n    /// Total size of layers on local disk for all timelines in this shard.\n    pub resident_size: u64,\n\n    /// Total size of layers in remote storage for all timelines in this shard.\n    pub physical_size: u64,\n\n    /// The largest logical size of a timeline within this _tenant_ (not shard). This is only\n    /// tracked on shard 0, and contains the sum of the logical size across all shards.\n    pub max_logical_size: u64,\n\n    /// The largest logical size of a timeline within this _tenant_ (not shard) divided by number of\n    /// shards. This is only tracked on shard 0, and is only an estimate as we divide it evenly by\n    /// shard count, rounded up.\n    pub max_logical_size_per_shard: u64,\n}\n\n#[derive(Serialize, Deserialize, Debug, Default)]\npub struct TopTenantShardsResponse {\n    pub shards: Vec<TopTenantShardItem>,\n}\n\npub mod virtual_file {\n\n    #[derive(\n        Copy,\n        Clone,\n        PartialEq,\n        Eq,\n        Hash,\n        strum_macros::EnumString,\n        strum_macros::Display,\n        serde_with::DeserializeFromStr,\n        serde_with::SerializeDisplay,\n        Debug,\n    )]\n    #[strum(serialize_all = \"kebab-case\")]\n    pub enum IoEngineKind {\n        StdFs,\n        #[cfg(target_os = \"linux\")]\n        TokioEpollUring,\n    }\n\n    /// Direct IO modes for a pageserver.\n    #[derive(\n        Copy,\n        Clone,\n        PartialEq,\n        Eq,\n        Hash,\n        strum_macros::EnumString,\n        strum_macros::EnumIter,\n        strum_macros::Display,\n        serde_with::DeserializeFromStr,\n        serde_with::SerializeDisplay,\n        Debug,\n    )]\n    #[strum(serialize_all = \"kebab-case\")]\n    #[repr(u8)]\n    pub enum IoMode {\n        /// Uses buffered IO.\n        Buffered,\n        /// Uses direct IO for reads only.\n        Direct,\n        /// Use direct IO for reads and writes.\n        DirectRw,\n    }\n\n    impl IoMode {\n        pub fn preferred() -> Self {\n            IoMode::DirectRw\n        }\n    }\n\n    impl TryFrom<u8> for IoMode {\n        type Error = u8;\n\n        fn try_from(value: u8) -> Result<Self, Self::Error> {\n            Ok(match value {\n                v if v == (IoMode::Buffered as u8) => IoMode::Buffered,\n                v if v == (IoMode::Direct as u8) => IoMode::Direct,\n                v if v == (IoMode::DirectRw as u8) => IoMode::DirectRw,\n                x => return Err(x),\n            })\n        }\n    }\n}\n\n#[derive(Debug, Clone, Serialize, Deserialize)]\npub struct ScanDisposableKeysResponse {\n    pub disposable_count: usize,\n    pub not_disposable_count: usize,\n}\n\n// This is a cut-down version of TenantHistorySize from the pageserver crate, omitting fields\n// that require pageserver-internal types.  It is sufficient to get the total size.\n#[derive(Serialize, Deserialize, Debug)]\npub struct TenantHistorySize {\n    pub id: TenantId,\n    /// Size is a mixture of WAL and logical size, so the unit is bytes.\n    ///\n    /// Will be none if `?inputs_only=true` was given.\n    pub size: Option<u64>,\n}\n\n#[derive(Debug, Serialize, Deserialize)]\npub struct PageTraceEvent {\n    pub key: CompactKey,\n    pub effective_lsn: Lsn,\n    pub time: SystemTime,\n}\n\nimpl Default for PageTraceEvent {\n    fn default() -> Self {\n        Self {\n            key: Default::default(),\n            effective_lsn: Default::default(),\n            time: std::time::UNIX_EPOCH,\n        }\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use std::str::FromStr;\n\n    use serde_json::json;\n\n    use super::*;\n\n    #[test]\n    fn test_tenantinfo_serde() {\n        // Test serialization/deserialization of TenantInfo\n        let original_active = TenantInfo {\n            id: TenantShardId::unsharded(TenantId::generate()),\n            state: TenantState::Active,\n            current_physical_size: Some(42),\n            attachment_status: TenantAttachmentStatus::Attached,\n            generation: 1,\n            gc_blocking: None,\n        };\n        let expected_active = json!({\n            \"id\": original_active.id.to_string(),\n            \"state\": {\n                \"slug\": \"Active\",\n            },\n            \"current_physical_size\": 42,\n            \"attachment_status\": {\n                \"slug\":\"attached\",\n            },\n            \"generation\" : 1\n        });\n\n        let original_broken = TenantInfo {\n            id: TenantShardId::unsharded(TenantId::generate()),\n            state: TenantState::Broken {\n                reason: \"reason\".into(),\n                backtrace: \"backtrace info\".into(),\n            },\n            current_physical_size: Some(42),\n            attachment_status: TenantAttachmentStatus::Attached,\n            generation: 1,\n            gc_blocking: None,\n        };\n        let expected_broken = json!({\n            \"id\": original_broken.id.to_string(),\n            \"state\": {\n                \"slug\": \"Broken\",\n                \"data\": {\n                    \"backtrace\": \"backtrace info\",\n                    \"reason\": \"reason\",\n                }\n            },\n            \"current_physical_size\": 42,\n            \"attachment_status\": {\n                \"slug\":\"attached\",\n            },\n            \"generation\" : 1\n        });\n\n        assert_eq!(\n            serde_json::to_value(&original_active).unwrap(),\n            expected_active\n        );\n\n        assert_eq!(\n            serde_json::to_value(&original_broken).unwrap(),\n            expected_broken\n        );\n        assert!(format!(\"{:?}\", &original_broken.state).contains(\"reason\"));\n        assert!(format!(\"{:?}\", &original_broken.state).contains(\"backtrace info\"));\n    }\n\n    #[test]\n    fn test_reject_unknown_field() {\n        let id = TenantId::generate();\n        let config_request = json!({\n            \"tenant_id\": id.to_string(),\n            \"unknown_field\": \"unknown_value\".to_string(),\n        });\n        let err = serde_json::from_value::<TenantConfigRequest>(config_request).unwrap_err();\n        assert!(\n            err.to_string().contains(\"unknown field `unknown_field`\"),\n            \"expect unknown field `unknown_field` error, got: {err}\"\n        );\n    }\n\n    #[test]\n    fn tenantstatus_activating_serde() {\n        let states = [TenantState::Activating(ActivatingFrom::Attaching)];\n        let expected = \"[{\\\"slug\\\":\\\"Activating\\\",\\\"data\\\":\\\"Attaching\\\"}]\";\n\n        let actual = serde_json::to_string(&states).unwrap();\n\n        assert_eq!(actual, expected);\n\n        let parsed = serde_json::from_str::<Vec<TenantState>>(&actual).unwrap();\n\n        assert_eq!(states.as_slice(), &parsed);\n    }\n\n    #[test]\n    fn tenantstatus_activating_strum() {\n        // tests added, because we use these for metrics\n        let examples = [\n            (line!(), TenantState::Attaching, \"Attaching\"),\n            (\n                line!(),\n                TenantState::Activating(ActivatingFrom::Attaching),\n                \"Activating\",\n            ),\n            (line!(), TenantState::Active, \"Active\"),\n            (\n                line!(),\n                TenantState::Stopping { progress: None },\n                \"Stopping\",\n            ),\n            (\n                line!(),\n                TenantState::Stopping {\n                    progress: Some(completion::Barrier::default()),\n                },\n                \"Stopping\",\n            ),\n            (\n                line!(),\n                TenantState::Broken {\n                    reason: \"Example\".into(),\n                    backtrace: \"Looooong backtrace\".into(),\n                },\n                \"Broken\",\n            ),\n        ];\n\n        for (line, rendered, expected) in examples {\n            let actual: &'static str = rendered.into();\n            assert_eq!(actual, expected, \"example on {line}\");\n        }\n    }\n\n    #[test]\n    fn test_image_compression_algorithm_parsing() {\n        use ImageCompressionAlgorithm::*;\n        let cases = [\n            (\"disabled\", Disabled),\n            (\"zstd\", Zstd { level: None }),\n            (\"zstd(18)\", Zstd { level: Some(18) }),\n            (\"zstd(-3)\", Zstd { level: Some(-3) }),\n        ];\n\n        for (display, expected) in cases {\n            assert_eq!(\n                ImageCompressionAlgorithm::from_str(display).unwrap(),\n                expected,\n                \"parsing works\"\n            );\n            assert_eq!(format!(\"{expected}\"), display, \"Display FromStr roundtrip\");\n\n            let ser = serde_json::to_string(&expected).expect(\"serialization\");\n            assert_eq!(\n                serde_json::from_str::<ImageCompressionAlgorithm>(&ser).unwrap(),\n                expected,\n                \"serde roundtrip\"\n            );\n\n            assert_eq!(\n                serde_json::Value::String(display.to_string()),\n                serde_json::to_value(expected).unwrap(),\n                \"Display is the serde serialization\"\n            );\n        }\n    }\n\n    #[test]\n    fn test_tenant_config_patch_request_serde() {\n        let patch_request = TenantConfigPatchRequest {\n            tenant_id: TenantId::from_str(\"17c6d121946a61e5ab0fe5a2fd4d8215\").unwrap(),\n            config: TenantConfigPatch {\n                checkpoint_distance: FieldPatch::Upsert(42),\n                gc_horizon: FieldPatch::Remove,\n                compaction_threshold: FieldPatch::Noop,\n                ..TenantConfigPatch::default()\n            },\n        };\n\n        let json = serde_json::to_string(&patch_request).unwrap();\n\n        let expected = r#\"{\"tenant_id\":\"17c6d121946a61e5ab0fe5a2fd4d8215\",\"checkpoint_distance\":42,\"gc_horizon\":null}\"#;\n        assert_eq!(json, expected);\n\n        let decoded: TenantConfigPatchRequest = serde_json::from_str(&json).unwrap();\n        assert_eq!(decoded.tenant_id, patch_request.tenant_id);\n        assert_eq!(decoded.config, patch_request.config);\n\n        // Now apply the patch to a config to demonstrate semantics\n\n        let base = TenantConfig {\n            checkpoint_distance: Some(28),\n            gc_horizon: Some(100),\n            compaction_target_size: Some(1024),\n            ..Default::default()\n        };\n\n        let expected = TenantConfig {\n            checkpoint_distance: Some(42),\n            gc_horizon: None,\n            ..base.clone()\n        };\n\n        let patched = base.apply_patch(decoded.config).unwrap();\n\n        assert_eq!(patched, expected);\n    }\n}\n"
  },
  {
    "path": "libs/pageserver_api/src/pagestream_api.rs",
    "content": "//! Rust definitions of the libpq-based pagestream API\n//!\n//! See also the C implementation of the same API in pgxn/neon/pagestore_client.h\n\nuse std::io::{BufRead, Read};\n\nuse crate::reltag::RelTag;\n\nuse byteorder::{BigEndian, ReadBytesExt};\nuse bytes::{Buf, BufMut, Bytes, BytesMut};\nuse utils::lsn::Lsn;\n\n/// Block size.\n///\n/// XXX: We assume 8k block size in the SLRU fetch API. It's not great to hardcode\n/// that in the protocol, because Postgres supports different block sizes as a compile\n/// time option.\nconst BLCKSZ: usize = 8192;\n\n// Wrapped in libpq CopyData\n#[derive(PartialEq, Eq, Debug)]\npub enum PagestreamFeMessage {\n    Exists(PagestreamExistsRequest),\n    Nblocks(PagestreamNblocksRequest),\n    GetPage(PagestreamGetPageRequest),\n    DbSize(PagestreamDbSizeRequest),\n    GetSlruSegment(PagestreamGetSlruSegmentRequest),\n    #[cfg(feature = \"testing\")]\n    Test(PagestreamTestRequest),\n}\n\n// Wrapped in libpq CopyData\n#[derive(Debug, strum_macros::EnumProperty)]\npub enum PagestreamBeMessage {\n    Exists(PagestreamExistsResponse),\n    Nblocks(PagestreamNblocksResponse),\n    GetPage(PagestreamGetPageResponse),\n    Error(PagestreamErrorResponse),\n    DbSize(PagestreamDbSizeResponse),\n    GetSlruSegment(PagestreamGetSlruSegmentResponse),\n    #[cfg(feature = \"testing\")]\n    Test(PagestreamTestResponse),\n}\n\n// Keep in sync with `pagestore_client.h`\n#[repr(u8)]\nenum PagestreamFeMessageTag {\n    Exists = 0,\n    Nblocks = 1,\n    GetPage = 2,\n    DbSize = 3,\n    GetSlruSegment = 4,\n    /* future tags above this line */\n    /// For testing purposes, not available in production.\n    #[cfg(feature = \"testing\")]\n    Test = 99,\n}\n\n// Keep in sync with `pagestore_client.h`\n#[repr(u8)]\nenum PagestreamBeMessageTag {\n    Exists = 100,\n    Nblocks = 101,\n    GetPage = 102,\n    Error = 103,\n    DbSize = 104,\n    GetSlruSegment = 105,\n    /* future tags above this line */\n    /// For testing purposes, not available in production.\n    #[cfg(feature = \"testing\")]\n    Test = 199,\n}\n\nimpl TryFrom<u8> for PagestreamFeMessageTag {\n    type Error = u8;\n    fn try_from(value: u8) -> Result<Self, u8> {\n        match value {\n            0 => Ok(PagestreamFeMessageTag::Exists),\n            1 => Ok(PagestreamFeMessageTag::Nblocks),\n            2 => Ok(PagestreamFeMessageTag::GetPage),\n            3 => Ok(PagestreamFeMessageTag::DbSize),\n            4 => Ok(PagestreamFeMessageTag::GetSlruSegment),\n            #[cfg(feature = \"testing\")]\n            99 => Ok(PagestreamFeMessageTag::Test),\n            _ => Err(value),\n        }\n    }\n}\n\nimpl TryFrom<u8> for PagestreamBeMessageTag {\n    type Error = u8;\n    fn try_from(value: u8) -> Result<Self, u8> {\n        match value {\n            100 => Ok(PagestreamBeMessageTag::Exists),\n            101 => Ok(PagestreamBeMessageTag::Nblocks),\n            102 => Ok(PagestreamBeMessageTag::GetPage),\n            103 => Ok(PagestreamBeMessageTag::Error),\n            104 => Ok(PagestreamBeMessageTag::DbSize),\n            105 => Ok(PagestreamBeMessageTag::GetSlruSegment),\n            #[cfg(feature = \"testing\")]\n            199 => Ok(PagestreamBeMessageTag::Test),\n            _ => Err(value),\n        }\n    }\n}\n\n// A GetPage request contains two LSN values:\n//\n// request_lsn: Get the page version at this point in time.  Lsn::Max is a special value that means\n// \"get the latest version present\". It's used by the primary server, which knows that no one else\n// is writing WAL. 'not_modified_since' must be set to a proper value even if request_lsn is\n// Lsn::Max. Standby servers use the current replay LSN as the request LSN.\n//\n// not_modified_since: Hint to the pageserver that the client knows that the page has not been\n// modified between 'not_modified_since' and the request LSN. It's always correct to set\n// 'not_modified_since equal' to 'request_lsn' (unless Lsn::Max is used as the 'request_lsn'), but\n// passing an earlier LSN can speed up the request, by allowing the pageserver to process the\n// request without waiting for 'request_lsn' to arrive.\n//\n// The now-defunct V1 interface contained only one LSN, and a boolean 'latest' flag. The V1 interface was\n// sufficient for the primary; the 'lsn' was equivalent to the 'not_modified_since' value, and\n// 'latest' was set to true. The V2 interface was added because there was no correct way for a\n// standby to request a page at a particular non-latest LSN, and also include the\n// 'not_modified_since' hint. That led to an awkward choice of either using an old LSN in the\n// request, if the standby knows that the page hasn't been modified since, and risk getting an error\n// if that LSN has fallen behind the GC horizon, or requesting the current replay LSN, which could\n// require the pageserver unnecessarily to wait for the WAL to arrive up to that point. The new V2\n// interface allows sending both LSNs, and let the pageserver do the right thing. There was no\n// difference in the responses between V1 and V2.\n//\n// V3 version of protocol adds request ID to all requests. This request ID is also included in response\n// as well as other fields from requests, which allows to verify that we receive response for our request.\n// We copy fields from request to response to make checking more reliable: request ID is formed from process ID\n// and local counter, so in principle there can be duplicated requests IDs if process PID is reused.\n//\n#[derive(Debug, PartialEq, Eq, Clone, Copy)]\npub enum PagestreamProtocolVersion {\n    V2,\n    V3,\n}\n\npub type RequestId = u64;\n\n#[derive(Debug, Default, PartialEq, Eq, Clone, Copy)]\npub struct PagestreamRequest {\n    pub reqid: RequestId,\n    pub request_lsn: Lsn,\n    pub not_modified_since: Lsn,\n}\n\n#[derive(Debug, PartialEq, Eq, Clone, Copy)]\npub struct PagestreamExistsRequest {\n    pub hdr: PagestreamRequest,\n    pub rel: RelTag,\n}\n\n#[derive(Debug, PartialEq, Eq, Clone, Copy)]\npub struct PagestreamNblocksRequest {\n    pub hdr: PagestreamRequest,\n    pub rel: RelTag,\n}\n\n#[derive(Debug, Default, PartialEq, Eq, Clone, Copy)]\npub struct PagestreamGetPageRequest {\n    pub hdr: PagestreamRequest,\n    pub rel: RelTag,\n    pub blkno: u32,\n}\n\n#[derive(Debug, PartialEq, Eq, Clone, Copy)]\npub struct PagestreamDbSizeRequest {\n    pub hdr: PagestreamRequest,\n    pub dbnode: u32,\n}\n\n#[derive(Debug, PartialEq, Eq, Clone, Copy)]\npub struct PagestreamGetSlruSegmentRequest {\n    pub hdr: PagestreamRequest,\n    pub kind: u8,\n    pub segno: u32,\n}\n\n#[derive(Debug)]\npub struct PagestreamExistsResponse {\n    pub req: PagestreamExistsRequest,\n    pub exists: bool,\n}\n\n#[derive(Debug)]\npub struct PagestreamNblocksResponse {\n    pub req: PagestreamNblocksRequest,\n    pub n_blocks: u32,\n}\n\n#[derive(Debug)]\npub struct PagestreamGetPageResponse {\n    pub req: PagestreamGetPageRequest,\n    pub page: Bytes,\n}\n\n#[derive(Debug)]\npub struct PagestreamGetSlruSegmentResponse {\n    pub req: PagestreamGetSlruSegmentRequest,\n    pub segment: Bytes,\n}\n\n#[derive(Debug)]\npub struct PagestreamErrorResponse {\n    pub req: PagestreamRequest,\n    pub message: String,\n}\n\n#[derive(Debug)]\npub struct PagestreamDbSizeResponse {\n    pub req: PagestreamDbSizeRequest,\n    pub db_size: i64,\n}\n\n#[cfg(feature = \"testing\")]\n#[derive(Debug, PartialEq, Eq, Clone)]\npub struct PagestreamTestRequest {\n    pub hdr: PagestreamRequest,\n    pub batch_key: u64,\n    pub message: String,\n}\n\n#[cfg(feature = \"testing\")]\n#[derive(Debug)]\npub struct PagestreamTestResponse {\n    pub req: PagestreamTestRequest,\n}\n\nimpl PagestreamFeMessage {\n    /// Serialize a compute -> pageserver message. This is currently only used in testing\n    /// tools. Always uses protocol version 3.\n    pub fn serialize(&self) -> Bytes {\n        let mut bytes = BytesMut::new();\n\n        match self {\n            Self::Exists(req) => {\n                bytes.put_u8(PagestreamFeMessageTag::Exists as u8);\n                bytes.put_u64(req.hdr.reqid);\n                bytes.put_u64(req.hdr.request_lsn.0);\n                bytes.put_u64(req.hdr.not_modified_since.0);\n                bytes.put_u32(req.rel.spcnode);\n                bytes.put_u32(req.rel.dbnode);\n                bytes.put_u32(req.rel.relnode);\n                bytes.put_u8(req.rel.forknum);\n            }\n\n            Self::Nblocks(req) => {\n                bytes.put_u8(PagestreamFeMessageTag::Nblocks as u8);\n                bytes.put_u64(req.hdr.reqid);\n                bytes.put_u64(req.hdr.request_lsn.0);\n                bytes.put_u64(req.hdr.not_modified_since.0);\n                bytes.put_u32(req.rel.spcnode);\n                bytes.put_u32(req.rel.dbnode);\n                bytes.put_u32(req.rel.relnode);\n                bytes.put_u8(req.rel.forknum);\n            }\n\n            Self::GetPage(req) => {\n                bytes.put_u8(PagestreamFeMessageTag::GetPage as u8);\n                bytes.put_u64(req.hdr.reqid);\n                bytes.put_u64(req.hdr.request_lsn.0);\n                bytes.put_u64(req.hdr.not_modified_since.0);\n                bytes.put_u32(req.rel.spcnode);\n                bytes.put_u32(req.rel.dbnode);\n                bytes.put_u32(req.rel.relnode);\n                bytes.put_u8(req.rel.forknum);\n                bytes.put_u32(req.blkno);\n            }\n\n            Self::DbSize(req) => {\n                bytes.put_u8(PagestreamFeMessageTag::DbSize as u8);\n                bytes.put_u64(req.hdr.reqid);\n                bytes.put_u64(req.hdr.request_lsn.0);\n                bytes.put_u64(req.hdr.not_modified_since.0);\n                bytes.put_u32(req.dbnode);\n            }\n\n            Self::GetSlruSegment(req) => {\n                bytes.put_u8(PagestreamFeMessageTag::GetSlruSegment as u8);\n                bytes.put_u64(req.hdr.reqid);\n                bytes.put_u64(req.hdr.request_lsn.0);\n                bytes.put_u64(req.hdr.not_modified_since.0);\n                bytes.put_u8(req.kind);\n                bytes.put_u32(req.segno);\n            }\n            #[cfg(feature = \"testing\")]\n            Self::Test(req) => {\n                bytes.put_u8(PagestreamFeMessageTag::Test as u8);\n                bytes.put_u64(req.hdr.reqid);\n                bytes.put_u64(req.hdr.request_lsn.0);\n                bytes.put_u64(req.hdr.not_modified_since.0);\n                bytes.put_u64(req.batch_key);\n                let message = req.message.as_bytes();\n                bytes.put_u64(message.len() as u64);\n                bytes.put_slice(message);\n            }\n        }\n\n        bytes.into()\n    }\n\n    pub fn parse<R: std::io::Read>(\n        body: &mut R,\n        protocol_version: PagestreamProtocolVersion,\n    ) -> anyhow::Result<PagestreamFeMessage> {\n        // these correspond to the NeonMessageTag enum in pagestore_client.h\n        //\n        // TODO: consider using protobuf or serde bincode for less error prone\n        // serialization.\n        let msg_tag = body.read_u8()?;\n        let (reqid, request_lsn, not_modified_since) = match protocol_version {\n            PagestreamProtocolVersion::V2 => (\n                0,\n                Lsn::from(body.read_u64::<BigEndian>()?),\n                Lsn::from(body.read_u64::<BigEndian>()?),\n            ),\n            PagestreamProtocolVersion::V3 => (\n                body.read_u64::<BigEndian>()?,\n                Lsn::from(body.read_u64::<BigEndian>()?),\n                Lsn::from(body.read_u64::<BigEndian>()?),\n            ),\n        };\n\n        match PagestreamFeMessageTag::try_from(msg_tag)\n            .map_err(|tag: u8| anyhow::anyhow!(\"invalid tag {tag}\"))?\n        {\n            PagestreamFeMessageTag::Exists => {\n                Ok(PagestreamFeMessage::Exists(PagestreamExistsRequest {\n                    hdr: PagestreamRequest {\n                        reqid,\n                        request_lsn,\n                        not_modified_since,\n                    },\n                    rel: RelTag {\n                        spcnode: body.read_u32::<BigEndian>()?,\n                        dbnode: body.read_u32::<BigEndian>()?,\n                        relnode: body.read_u32::<BigEndian>()?,\n                        forknum: body.read_u8()?,\n                    },\n                }))\n            }\n            PagestreamFeMessageTag::Nblocks => {\n                Ok(PagestreamFeMessage::Nblocks(PagestreamNblocksRequest {\n                    hdr: PagestreamRequest {\n                        reqid,\n                        request_lsn,\n                        not_modified_since,\n                    },\n                    rel: RelTag {\n                        spcnode: body.read_u32::<BigEndian>()?,\n                        dbnode: body.read_u32::<BigEndian>()?,\n                        relnode: body.read_u32::<BigEndian>()?,\n                        forknum: body.read_u8()?,\n                    },\n                }))\n            }\n            PagestreamFeMessageTag::GetPage => {\n                Ok(PagestreamFeMessage::GetPage(PagestreamGetPageRequest {\n                    hdr: PagestreamRequest {\n                        reqid,\n                        request_lsn,\n                        not_modified_since,\n                    },\n                    rel: RelTag {\n                        spcnode: body.read_u32::<BigEndian>()?,\n                        dbnode: body.read_u32::<BigEndian>()?,\n                        relnode: body.read_u32::<BigEndian>()?,\n                        forknum: body.read_u8()?,\n                    },\n                    blkno: body.read_u32::<BigEndian>()?,\n                }))\n            }\n            PagestreamFeMessageTag::DbSize => {\n                Ok(PagestreamFeMessage::DbSize(PagestreamDbSizeRequest {\n                    hdr: PagestreamRequest {\n                        reqid,\n                        request_lsn,\n                        not_modified_since,\n                    },\n                    dbnode: body.read_u32::<BigEndian>()?,\n                }))\n            }\n            PagestreamFeMessageTag::GetSlruSegment => Ok(PagestreamFeMessage::GetSlruSegment(\n                PagestreamGetSlruSegmentRequest {\n                    hdr: PagestreamRequest {\n                        reqid,\n                        request_lsn,\n                        not_modified_since,\n                    },\n                    kind: body.read_u8()?,\n                    segno: body.read_u32::<BigEndian>()?,\n                },\n            )),\n            #[cfg(feature = \"testing\")]\n            PagestreamFeMessageTag::Test => Ok(PagestreamFeMessage::Test(PagestreamTestRequest {\n                hdr: PagestreamRequest {\n                    reqid,\n                    request_lsn,\n                    not_modified_since,\n                },\n                batch_key: body.read_u64::<BigEndian>()?,\n                message: {\n                    let len = body.read_u64::<BigEndian>()?;\n                    let mut buf = vec![0; len as usize];\n                    body.read_exact(&mut buf)?;\n                    String::from_utf8(buf)?\n                },\n            })),\n        }\n    }\n}\n\nimpl PagestreamBeMessage {\n    pub fn serialize(&self, protocol_version: PagestreamProtocolVersion) -> Bytes {\n        let mut bytes = BytesMut::new();\n\n        use PagestreamBeMessageTag as Tag;\n        match protocol_version {\n            PagestreamProtocolVersion::V2 => {\n                match self {\n                    Self::Exists(resp) => {\n                        bytes.put_u8(Tag::Exists as u8);\n                        bytes.put_u8(resp.exists as u8);\n                    }\n\n                    Self::Nblocks(resp) => {\n                        bytes.put_u8(Tag::Nblocks as u8);\n                        bytes.put_u32(resp.n_blocks);\n                    }\n\n                    Self::GetPage(resp) => {\n                        bytes.put_u8(Tag::GetPage as u8);\n                        bytes.put(&resp.page[..])\n                    }\n\n                    Self::Error(resp) => {\n                        bytes.put_u8(Tag::Error as u8);\n                        bytes.put(resp.message.as_bytes());\n                        bytes.put_u8(0); // null terminator\n                    }\n                    Self::DbSize(resp) => {\n                        bytes.put_u8(Tag::DbSize as u8);\n                        bytes.put_i64(resp.db_size);\n                    }\n\n                    Self::GetSlruSegment(resp) => {\n                        bytes.put_u8(Tag::GetSlruSegment as u8);\n                        bytes.put_u32((resp.segment.len() / BLCKSZ) as u32);\n                        bytes.put(&resp.segment[..]);\n                    }\n\n                    #[cfg(feature = \"testing\")]\n                    Self::Test(resp) => {\n                        bytes.put_u8(Tag::Test as u8);\n                        bytes.put_u64(resp.req.batch_key);\n                        let message = resp.req.message.as_bytes();\n                        bytes.put_u64(message.len() as u64);\n                        bytes.put_slice(message);\n                    }\n                }\n            }\n            PagestreamProtocolVersion::V3 => {\n                match self {\n                    Self::Exists(resp) => {\n                        bytes.put_u8(Tag::Exists as u8);\n                        bytes.put_u64(resp.req.hdr.reqid);\n                        bytes.put_u64(resp.req.hdr.request_lsn.0);\n                        bytes.put_u64(resp.req.hdr.not_modified_since.0);\n                        bytes.put_u32(resp.req.rel.spcnode);\n                        bytes.put_u32(resp.req.rel.dbnode);\n                        bytes.put_u32(resp.req.rel.relnode);\n                        bytes.put_u8(resp.req.rel.forknum);\n                        bytes.put_u8(resp.exists as u8);\n                    }\n\n                    Self::Nblocks(resp) => {\n                        bytes.put_u8(Tag::Nblocks as u8);\n                        bytes.put_u64(resp.req.hdr.reqid);\n                        bytes.put_u64(resp.req.hdr.request_lsn.0);\n                        bytes.put_u64(resp.req.hdr.not_modified_since.0);\n                        bytes.put_u32(resp.req.rel.spcnode);\n                        bytes.put_u32(resp.req.rel.dbnode);\n                        bytes.put_u32(resp.req.rel.relnode);\n                        bytes.put_u8(resp.req.rel.forknum);\n                        bytes.put_u32(resp.n_blocks);\n                    }\n\n                    Self::GetPage(resp) => {\n                        bytes.put_u8(Tag::GetPage as u8);\n                        bytes.put_u64(resp.req.hdr.reqid);\n                        bytes.put_u64(resp.req.hdr.request_lsn.0);\n                        bytes.put_u64(resp.req.hdr.not_modified_since.0);\n                        bytes.put_u32(resp.req.rel.spcnode);\n                        bytes.put_u32(resp.req.rel.dbnode);\n                        bytes.put_u32(resp.req.rel.relnode);\n                        bytes.put_u8(resp.req.rel.forknum);\n                        bytes.put_u32(resp.req.blkno);\n                        bytes.put(&resp.page[..])\n                    }\n\n                    Self::Error(resp) => {\n                        bytes.put_u8(Tag::Error as u8);\n                        bytes.put_u64(resp.req.reqid);\n                        bytes.put_u64(resp.req.request_lsn.0);\n                        bytes.put_u64(resp.req.not_modified_since.0);\n                        bytes.put(resp.message.as_bytes());\n                        bytes.put_u8(0); // null terminator\n                    }\n                    Self::DbSize(resp) => {\n                        bytes.put_u8(Tag::DbSize as u8);\n                        bytes.put_u64(resp.req.hdr.reqid);\n                        bytes.put_u64(resp.req.hdr.request_lsn.0);\n                        bytes.put_u64(resp.req.hdr.not_modified_since.0);\n                        bytes.put_u32(resp.req.dbnode);\n                        bytes.put_i64(resp.db_size);\n                    }\n\n                    Self::GetSlruSegment(resp) => {\n                        bytes.put_u8(Tag::GetSlruSegment as u8);\n                        bytes.put_u64(resp.req.hdr.reqid);\n                        bytes.put_u64(resp.req.hdr.request_lsn.0);\n                        bytes.put_u64(resp.req.hdr.not_modified_since.0);\n                        bytes.put_u8(resp.req.kind);\n                        bytes.put_u32(resp.req.segno);\n                        bytes.put_u32((resp.segment.len() / BLCKSZ) as u32);\n                        bytes.put(&resp.segment[..]);\n                    }\n\n                    #[cfg(feature = \"testing\")]\n                    Self::Test(resp) => {\n                        bytes.put_u8(Tag::Test as u8);\n                        bytes.put_u64(resp.req.hdr.reqid);\n                        bytes.put_u64(resp.req.hdr.request_lsn.0);\n                        bytes.put_u64(resp.req.hdr.not_modified_since.0);\n                        bytes.put_u64(resp.req.batch_key);\n                        let message = resp.req.message.as_bytes();\n                        bytes.put_u64(message.len() as u64);\n                        bytes.put_slice(message);\n                    }\n                }\n            }\n        }\n        bytes.into()\n    }\n\n    pub fn deserialize(buf: Bytes) -> anyhow::Result<Self> {\n        let mut buf = buf.reader();\n        let msg_tag = buf.read_u8()?;\n\n        use PagestreamBeMessageTag as Tag;\n        let ok =\n            match Tag::try_from(msg_tag).map_err(|tag: u8| anyhow::anyhow!(\"invalid tag {tag}\"))? {\n                Tag::Exists => {\n                    let reqid = buf.read_u64::<BigEndian>()?;\n                    let request_lsn = Lsn(buf.read_u64::<BigEndian>()?);\n                    let not_modified_since = Lsn(buf.read_u64::<BigEndian>()?);\n                    let rel = RelTag {\n                        spcnode: buf.read_u32::<BigEndian>()?,\n                        dbnode: buf.read_u32::<BigEndian>()?,\n                        relnode: buf.read_u32::<BigEndian>()?,\n                        forknum: buf.read_u8()?,\n                    };\n                    let exists = buf.read_u8()? != 0;\n                    Self::Exists(PagestreamExistsResponse {\n                        req: PagestreamExistsRequest {\n                            hdr: PagestreamRequest {\n                                reqid,\n                                request_lsn,\n                                not_modified_since,\n                            },\n                            rel,\n                        },\n                        exists,\n                    })\n                }\n                Tag::Nblocks => {\n                    let reqid = buf.read_u64::<BigEndian>()?;\n                    let request_lsn = Lsn(buf.read_u64::<BigEndian>()?);\n                    let not_modified_since = Lsn(buf.read_u64::<BigEndian>()?);\n                    let rel = RelTag {\n                        spcnode: buf.read_u32::<BigEndian>()?,\n                        dbnode: buf.read_u32::<BigEndian>()?,\n                        relnode: buf.read_u32::<BigEndian>()?,\n                        forknum: buf.read_u8()?,\n                    };\n                    let n_blocks = buf.read_u32::<BigEndian>()?;\n                    Self::Nblocks(PagestreamNblocksResponse {\n                        req: PagestreamNblocksRequest {\n                            hdr: PagestreamRequest {\n                                reqid,\n                                request_lsn,\n                                not_modified_since,\n                            },\n                            rel,\n                        },\n                        n_blocks,\n                    })\n                }\n                Tag::GetPage => {\n                    let reqid = buf.read_u64::<BigEndian>()?;\n                    let request_lsn = Lsn(buf.read_u64::<BigEndian>()?);\n                    let not_modified_since = Lsn(buf.read_u64::<BigEndian>()?);\n                    let rel = RelTag {\n                        spcnode: buf.read_u32::<BigEndian>()?,\n                        dbnode: buf.read_u32::<BigEndian>()?,\n                        relnode: buf.read_u32::<BigEndian>()?,\n                        forknum: buf.read_u8()?,\n                    };\n                    let blkno = buf.read_u32::<BigEndian>()?;\n                    let mut page = vec![0; 8192]; // TODO: use MaybeUninit\n                    buf.read_exact(&mut page)?;\n                    Self::GetPage(PagestreamGetPageResponse {\n                        req: PagestreamGetPageRequest {\n                            hdr: PagestreamRequest {\n                                reqid,\n                                request_lsn,\n                                not_modified_since,\n                            },\n                            rel,\n                            blkno,\n                        },\n                        page: page.into(),\n                    })\n                }\n                Tag::Error => {\n                    let reqid = buf.read_u64::<BigEndian>()?;\n                    let request_lsn = Lsn(buf.read_u64::<BigEndian>()?);\n                    let not_modified_since = Lsn(buf.read_u64::<BigEndian>()?);\n                    let mut msg = Vec::new();\n                    buf.read_until(0, &mut msg)?;\n                    let cstring = std::ffi::CString::from_vec_with_nul(msg)?;\n                    let rust_str = cstring.to_str()?;\n                    Self::Error(PagestreamErrorResponse {\n                        req: PagestreamRequest {\n                            reqid,\n                            request_lsn,\n                            not_modified_since,\n                        },\n                        message: rust_str.to_owned(),\n                    })\n                }\n                Tag::DbSize => {\n                    let reqid = buf.read_u64::<BigEndian>()?;\n                    let request_lsn = Lsn(buf.read_u64::<BigEndian>()?);\n                    let not_modified_since = Lsn(buf.read_u64::<BigEndian>()?);\n                    let dbnode = buf.read_u32::<BigEndian>()?;\n                    let db_size = buf.read_i64::<BigEndian>()?;\n                    Self::DbSize(PagestreamDbSizeResponse {\n                        req: PagestreamDbSizeRequest {\n                            hdr: PagestreamRequest {\n                                reqid,\n                                request_lsn,\n                                not_modified_since,\n                            },\n                            dbnode,\n                        },\n                        db_size,\n                    })\n                }\n                Tag::GetSlruSegment => {\n                    let reqid = buf.read_u64::<BigEndian>()?;\n                    let request_lsn = Lsn(buf.read_u64::<BigEndian>()?);\n                    let not_modified_since = Lsn(buf.read_u64::<BigEndian>()?);\n                    let kind = buf.read_u8()?;\n                    let segno = buf.read_u32::<BigEndian>()?;\n                    let n_blocks = buf.read_u32::<BigEndian>()?;\n                    let mut segment = vec![0; n_blocks as usize * BLCKSZ];\n                    buf.read_exact(&mut segment)?;\n                    Self::GetSlruSegment(PagestreamGetSlruSegmentResponse {\n                        req: PagestreamGetSlruSegmentRequest {\n                            hdr: PagestreamRequest {\n                                reqid,\n                                request_lsn,\n                                not_modified_since,\n                            },\n                            kind,\n                            segno,\n                        },\n                        segment: segment.into(),\n                    })\n                }\n                #[cfg(feature = \"testing\")]\n                Tag::Test => {\n                    let reqid = buf.read_u64::<BigEndian>()?;\n                    let request_lsn = Lsn(buf.read_u64::<BigEndian>()?);\n                    let not_modified_since = Lsn(buf.read_u64::<BigEndian>()?);\n                    let batch_key = buf.read_u64::<BigEndian>()?;\n                    let len = buf.read_u64::<BigEndian>()?;\n                    let mut msg = vec![0; len as usize];\n                    buf.read_exact(&mut msg)?;\n                    let message = String::from_utf8(msg)?;\n                    Self::Test(PagestreamTestResponse {\n                        req: PagestreamTestRequest {\n                            hdr: PagestreamRequest {\n                                reqid,\n                                request_lsn,\n                                not_modified_since,\n                            },\n                            batch_key,\n                            message,\n                        },\n                    })\n                }\n            };\n        let remaining = buf.into_inner();\n        if !remaining.is_empty() {\n            anyhow::bail!(\n                \"remaining bytes in msg with tag={msg_tag}: {}\",\n                remaining.len()\n            );\n        }\n        Ok(ok)\n    }\n\n    pub fn kind(&self) -> &'static str {\n        match self {\n            Self::Exists(_) => \"Exists\",\n            Self::Nblocks(_) => \"Nblocks\",\n            Self::GetPage(_) => \"GetPage\",\n            Self::Error(_) => \"Error\",\n            Self::DbSize(_) => \"DbSize\",\n            Self::GetSlruSegment(_) => \"GetSlruSegment\",\n            #[cfg(feature = \"testing\")]\n            Self::Test(_) => \"Test\",\n        }\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use super::*;\n\n    #[test]\n    fn test_pagestream() {\n        // Test serialization/deserialization of PagestreamFeMessage\n        let messages = vec![\n            PagestreamFeMessage::Exists(PagestreamExistsRequest {\n                hdr: PagestreamRequest {\n                    reqid: 0,\n                    request_lsn: Lsn(4),\n                    not_modified_since: Lsn(3),\n                },\n                rel: RelTag {\n                    forknum: 1,\n                    spcnode: 2,\n                    dbnode: 3,\n                    relnode: 4,\n                },\n            }),\n            PagestreamFeMessage::Nblocks(PagestreamNblocksRequest {\n                hdr: PagestreamRequest {\n                    reqid: 0,\n                    request_lsn: Lsn(4),\n                    not_modified_since: Lsn(4),\n                },\n                rel: RelTag {\n                    forknum: 1,\n                    spcnode: 2,\n                    dbnode: 3,\n                    relnode: 4,\n                },\n            }),\n            PagestreamFeMessage::GetPage(PagestreamGetPageRequest {\n                hdr: PagestreamRequest {\n                    reqid: 0,\n                    request_lsn: Lsn(4),\n                    not_modified_since: Lsn(3),\n                },\n                rel: RelTag {\n                    forknum: 1,\n                    spcnode: 2,\n                    dbnode: 3,\n                    relnode: 4,\n                },\n                blkno: 7,\n            }),\n            PagestreamFeMessage::DbSize(PagestreamDbSizeRequest {\n                hdr: PagestreamRequest {\n                    reqid: 0,\n                    request_lsn: Lsn(4),\n                    not_modified_since: Lsn(3),\n                },\n                dbnode: 7,\n            }),\n        ];\n        for msg in messages {\n            let bytes = msg.serialize();\n            let reconstructed =\n                PagestreamFeMessage::parse(&mut bytes.reader(), PagestreamProtocolVersion::V3)\n                    .unwrap();\n            assert!(msg == reconstructed);\n        }\n    }\n}\n"
  },
  {
    "path": "libs/pageserver_api/src/reltag.rs",
    "content": "use std::cmp::Ordering;\nuse std::fmt;\n\nuse postgres_ffi_types::Oid;\nuse postgres_ffi_types::constants::GLOBALTABLESPACE_OID;\nuse postgres_ffi_types::forknum::{MAIN_FORKNUM, forkname_to_number, forknumber_to_name};\nuse serde::{Deserialize, Serialize};\n\n///\n/// Relation data file segment id throughout the Postgres cluster.\n///\n/// Every data file in Postgres is uniquely identified by 4 numbers:\n/// - relation id / node (`relnode`)\n/// - database id (`dbnode`)\n/// - tablespace id (`spcnode`), in short this is a unique id of a separate\n///   directory to store data files.\n/// - forknumber (`forknum`) is used to split different kinds of data of the same relation\n///   between some set of files (`relnode`, `relnode_fsm`, `relnode_vm`).\n///\n/// In native Postgres code `RelFileNode` structure and individual `ForkNumber` value\n/// are used for the same purpose.\n/// [See more related comments here](https:///github.com/postgres/postgres/blob/99c5852e20a0987eca1c38ba0c09329d4076b6a0/src/include/storage/relfilenode.h#L57).\n///\n// FIXME: should move 'forknum' as last field to keep this consistent with Postgres.\n// Then we could replace the custom Ord and PartialOrd implementations below with\n// deriving them. This will require changes in walredoproc.c.\n#[derive(Debug, Default, PartialEq, Eq, Hash, Clone, Copy, Serialize, Deserialize)]\npub struct RelTag {\n    pub forknum: u8,\n    pub spcnode: Oid,\n    pub dbnode: Oid,\n    pub relnode: Oid,\n}\n\n/// Block number within a relation or SLRU. This matches PostgreSQL's BlockNumber type.\npub type BlockNumber = u32;\n\nimpl PartialOrd for RelTag {\n    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {\n        Some(self.cmp(other))\n    }\n}\n\nimpl Ord for RelTag {\n    fn cmp(&self, other: &Self) -> Ordering {\n        // Custom ordering where we put forknum to the end of the list\n        let other_tup = (other.spcnode, other.dbnode, other.relnode, other.forknum);\n        (self.spcnode, self.dbnode, self.relnode, self.forknum).cmp(&other_tup)\n    }\n}\n\n/// Display RelTag in the same format that's used in most PostgreSQL debug messages:\n///\n/// ```text\n/// <spcnode>/<dbnode>/<relnode>[_fsm|_vm|_init]\n/// ```\nimpl fmt::Display for RelTag {\n    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {\n        if let Some(forkname) = forknumber_to_name(self.forknum) {\n            write!(\n                f,\n                \"{}/{}/{}_{}\",\n                self.spcnode, self.dbnode, self.relnode, forkname\n            )\n        } else {\n            write!(f, \"{}/{}/{}\", self.spcnode, self.dbnode, self.relnode)\n        }\n    }\n}\n\n#[derive(Debug, thiserror::Error)]\npub enum ParseRelTagError {\n    #[error(\"invalid forknum\")]\n    InvalidForknum(#[source] std::num::ParseIntError),\n    #[error(\"missing triplet member {}\", .0)]\n    MissingTripletMember(usize),\n    #[error(\"invalid triplet member {}\", .0)]\n    InvalidTripletMember(usize, #[source] std::num::ParseIntError),\n}\n\nimpl std::str::FromStr for RelTag {\n    type Err = ParseRelTagError;\n\n    fn from_str(s: &str) -> Result<Self, Self::Err> {\n        use ParseRelTagError::*;\n\n        // FIXME: in postgres logs this separator is dot\n        // Example:\n        //     could not read block 2 in rel 1663/208101/2620.1 from page server at lsn 0/2431E6F0\n        // with a regex we could get this more painlessly\n        let (triplet, forknum) = match s.split_once('_').or_else(|| s.split_once('.')) {\n            Some((t, f)) => {\n                let forknum = forkname_to_number(Some(f));\n                let forknum = if let Ok(f) = forknum {\n                    f\n                } else {\n                    f.parse::<u8>().map_err(InvalidForknum)?\n                };\n\n                (t, Some(forknum))\n            }\n            None => (s, None),\n        };\n\n        let mut split = triplet\n            .splitn(3, '/')\n            .enumerate()\n            .map(|(i, s)| s.parse::<u32>().map_err(|e| InvalidTripletMember(i, e)));\n        let spcnode = split.next().ok_or(MissingTripletMember(0))??;\n        let dbnode = split.next().ok_or(MissingTripletMember(1))??;\n        let relnode = split.next().ok_or(MissingTripletMember(2))??;\n\n        Ok(RelTag {\n            spcnode,\n            forknum: forknum.unwrap_or(MAIN_FORKNUM),\n            dbnode,\n            relnode,\n        })\n    }\n}\n\nimpl RelTag {\n    pub fn to_segfile_name(&self, segno: u32) -> String {\n        let mut name = if self.spcnode == GLOBALTABLESPACE_OID {\n            \"global/\".to_string()\n        } else {\n            format!(\"base/{}/\", self.dbnode)\n        };\n\n        name += &self.relnode.to_string();\n\n        if let Some(fork_name) = forknumber_to_name(self.forknum) {\n            name += \"_\";\n            name += fork_name;\n        }\n\n        if segno != 0 {\n            name += \".\";\n            name += &segno.to_string();\n        }\n\n        name\n    }\n\n    pub fn with_forknum(&self, forknum: u8) -> Self {\n        RelTag {\n            forknum,\n            spcnode: self.spcnode,\n            dbnode: self.dbnode,\n            relnode: self.relnode,\n        }\n    }\n}\n\n///\n/// Non-relation transaction status files (clog (a.k.a. pg_xact) and\n/// pg_multixact) in Postgres are handled by SLRU (Simple LRU) buffer,\n/// hence the name.\n///\n/// These files are global for a postgres instance.\n///\n/// These files are divided into segments, which are divided into\n/// pages of the same BLCKSZ as used for relation files.\n///\n#[derive(\n    Debug,\n    Clone,\n    Copy,\n    Hash,\n    Serialize,\n    Deserialize,\n    PartialEq,\n    Eq,\n    PartialOrd,\n    Ord,\n    strum_macros::EnumIter,\n    strum_macros::FromRepr,\n    enum_map::Enum,\n)]\n#[repr(u8)]\npub enum SlruKind {\n    Clog = 0,\n    MultiXactMembers,\n    MultiXactOffsets,\n}\n\nimpl fmt::Display for SlruKind {\n    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {\n        match self {\n            Self::Clog => write!(f, \"pg_xact\"),\n            Self::MultiXactMembers => write!(f, \"pg_multixact/members\"),\n            Self::MultiXactOffsets => write!(f, \"pg_multixact/offsets\"),\n        }\n    }\n}\n"
  },
  {
    "path": "libs/pageserver_api/src/shard.rs",
    "content": "//! See docs/rfcs/031-sharding-static.md for an overview of sharding.\n//!\n//! This module contains a variety of types used to represent the concept of sharding\n//! a Neon tenant across multiple physical shards.  Since there are quite a few of these,\n//! we provide an summary here.\n//!\n//! Types used to describe shards:\n//! - [`ShardCount`] describes how many shards make up a tenant, plus the magic `unsharded` value\n//!   which identifies a tenant which is not shard-aware.  This means its storage paths do not include\n//!   a shard suffix.\n//! - [`ShardNumber`] is simply the zero-based index of a shard within a tenant.\n//! - [`ShardIndex`] is the 2-tuple of `ShardCount` and `ShardNumber`, it's just like a `TenantShardId`\n//!   without the tenant ID.  This is useful for things that are implicitly scoped to a particular\n//!   tenant, such as layer files.\n//! - [`ShardIdentity`]` is the full description of a particular shard's parameters, in sufficient\n//!   detail to convert a [`Key`] to a [`ShardNumber`] when deciding where to write/read.\n//! - The [`ShardSlug`] is a terse formatter for ShardCount and ShardNumber, written as\n//!   four hex digits.  An unsharded tenant is `0000`.\n//! - [`TenantShardId`] is the unique ID of a particular shard within a particular tenant\n//!\n//! Types used to describe the parameters for data distribution in a sharded tenant:\n//! - [`ShardStripeSize`] controls how long contiguous runs of [`Key`]s (stripes) are when distributed across\n//!   multiple shards.  Its value is given in 8kiB pages.\n//! - [`ShardLayout`] describes the data distribution scheme, and at time of writing is\n//!   always zero: this is provided for future upgrades that might introduce different\n//!   data distribution schemes.\n//!\n//! Examples:\n//! - A legacy unsharded tenant has one shard with ShardCount(0), ShardNumber(0), and its slug is 0000\n//! - A single sharded tenant has one shard with ShardCount(1), ShardNumber(0), and its slug is 0001\n//! - In a tenant with 4 shards, each shard has ShardCount(N), ShardNumber(i) where i in 0..N-1 (inclusive),\n//!   and their slugs are 0004, 0104, 0204, and 0304.\n\nuse std::hash::{Hash, Hasher};\n\n#[doc(inline)]\npub use ::utils::shard::*;\nuse postgres_ffi_types::forknum::INIT_FORKNUM;\nuse serde::{Deserialize, Serialize};\nuse utils::critical;\n\nuse crate::key::Key;\nuse crate::models::ShardParameters;\n\n/// The ShardIdentity contains enough information to map a [`Key`] to a [`ShardNumber`],\n/// and to check whether that [`ShardNumber`] is the same as the current shard.\n#[derive(Clone, Copy, Serialize, Deserialize, Eq, PartialEq, Debug)]\npub struct ShardIdentity {\n    pub number: ShardNumber,\n    pub count: ShardCount,\n    pub stripe_size: ShardStripeSize,\n    layout: ShardLayout,\n}\n\n/// Hash implementation\n///\n/// The stripe size cannot change dynamically, so it can be ignored for efficiency reasons.\nimpl Hash for ShardIdentity {\n    fn hash<H: Hasher>(&self, state: &mut H) {\n        let ShardIdentity {\n            number,\n            count,\n            stripe_size: _,\n            layout: _,\n        } = self;\n\n        number.0.hash(state);\n        count.0.hash(state);\n    }\n}\n\n/// Layout version: for future upgrades where we might change how the key->shard mapping works\n#[derive(Clone, Copy, Serialize, Deserialize, Eq, PartialEq, Hash, Debug)]\npub struct ShardLayout(u8);\n\nconst LAYOUT_V1: ShardLayout = ShardLayout(1);\n/// ShardIdentity uses a magic layout value to indicate if it is unusable\nconst LAYOUT_BROKEN: ShardLayout = ShardLayout(255);\n\n/// The default stripe size in pages. 16 MiB divided by 8 kiB page size.\n///\n/// A lower stripe size distributes ingest load better across shards, but reduces IO amortization.\n/// 16 MiB appears to be a reasonable balance: <https://github.com/neondatabase/neon/pull/10510>.\npub const DEFAULT_STRIPE_SIZE: ShardStripeSize = ShardStripeSize(16 * 1024 / 8);\n\n#[derive(thiserror::Error, Debug, PartialEq, Eq)]\npub enum ShardConfigError {\n    #[error(\"Invalid shard count\")]\n    InvalidCount,\n    #[error(\"Invalid shard number\")]\n    InvalidNumber,\n    #[error(\"Invalid stripe size\")]\n    InvalidStripeSize,\n}\n\nimpl ShardIdentity {\n    /// An identity with number=0 count=0 is a \"none\" identity, which represents legacy\n    /// tenants.  Modern single-shard tenants should not use this: they should\n    /// have number=0 count=1.\n    pub const fn unsharded() -> Self {\n        Self {\n            number: ShardNumber(0),\n            count: ShardCount(0),\n            layout: LAYOUT_V1,\n            stripe_size: DEFAULT_STRIPE_SIZE,\n        }\n    }\n\n    /// An unsharded identity with the given stripe size (if non-zero). This is typically used to\n    /// carry over a stripe size for an unsharded tenant from persistent storage.\n    pub fn unsharded_with_stripe_size(stripe_size: ShardStripeSize) -> Self {\n        let mut shard_identity = Self::unsharded();\n        if stripe_size.0 > 0 {\n            shard_identity.stripe_size = stripe_size;\n        }\n        shard_identity\n    }\n\n    /// A broken instance of this type is only used for `TenantState::Broken` tenants,\n    /// which are constructed in code paths that don't have access to proper configuration.\n    ///\n    /// A ShardIdentity in this state may not be used for anything, and should not be persisted.\n    /// Enforcement is via assertions, to avoid making our interface fallible for this\n    /// edge case: it is the Tenant's responsibility to avoid trying to do any I/O when in a broken\n    /// state, and by extension to avoid trying to do any page->shard resolution.\n    pub fn broken(number: ShardNumber, count: ShardCount) -> Self {\n        Self {\n            number,\n            count,\n            layout: LAYOUT_BROKEN,\n            stripe_size: DEFAULT_STRIPE_SIZE,\n        }\n    }\n\n    /// The \"unsharded\" value is distinct from simply having a single shard: it represents\n    /// a tenant which is not shard-aware at all, and whose storage paths will not include\n    /// a shard suffix.\n    pub fn is_unsharded(&self) -> bool {\n        self.number == ShardNumber(0) && self.count == ShardCount(0)\n    }\n\n    /// Count must be nonzero, and number must be < count. To construct\n    /// the legacy case (count==0), use Self::unsharded instead.\n    pub fn new(\n        number: ShardNumber,\n        count: ShardCount,\n        stripe_size: ShardStripeSize,\n    ) -> Result<Self, ShardConfigError> {\n        if count.0 == 0 {\n            Err(ShardConfigError::InvalidCount)\n        } else if number.0 > count.0 - 1 {\n            Err(ShardConfigError::InvalidNumber)\n        } else if stripe_size.0 == 0 {\n            Err(ShardConfigError::InvalidStripeSize)\n        } else {\n            Ok(Self {\n                number,\n                count,\n                layout: LAYOUT_V1,\n                stripe_size,\n            })\n        }\n    }\n\n    /// For use when creating ShardIdentity instances for new shards, where a creation request\n    /// specifies the ShardParameters that apply to all shards.\n    pub fn from_params(number: ShardNumber, params: ShardParameters) -> Self {\n        Self {\n            number,\n            count: params.count,\n            layout: LAYOUT_V1,\n            stripe_size: params.stripe_size,\n        }\n    }\n\n    /// Asserts that the given shard identities are equal. Changes to shard parameters will likely\n    /// result in data corruption.\n    pub fn assert_equal(&self, other: ShardIdentity) {\n        if self != &other {\n            // TODO: for now, we're conservative and just log errors in production. Turn this into a\n            // real assertion when we're confident it doesn't misfire, and also reject requests that\n            // attempt to change it with an error response.\n            critical!(\"shard identity mismatch: {self:?} != {other:?}\");\n        }\n    }\n\n    fn is_broken(&self) -> bool {\n        self.layout == LAYOUT_BROKEN\n    }\n\n    pub fn get_shard_number(&self, key: &Key) -> ShardNumber {\n        assert!(!self.is_broken());\n        key_to_shard_number(self.count, self.stripe_size, key)\n    }\n\n    /// Return true if the key is stored only on this shard. This does not include\n    /// global keys, see is_key_global().\n    ///\n    /// Shards must ingest _at least_ keys which return true from this check.\n    pub fn is_key_local(&self, key: &Key) -> bool {\n        assert!(!self.is_broken());\n        if self.count < ShardCount(2) || (key_is_shard0(key) && self.number == ShardNumber(0)) {\n            true\n        } else {\n            key_to_shard_number(self.count, self.stripe_size, key) == self.number\n        }\n    }\n\n    /// Return true if the key should be stored on all shards, not just one.\n    pub fn is_key_global(&self, key: &Key) -> bool {\n        if key.is_slru_block_key()\n            || key.is_slru_segment_size_key()\n            || key.is_aux_file_key()\n            || key.is_slru_dir_key()\n        {\n            // Special keys that are only stored on shard 0\n            false\n        } else if key.is_rel_block_key() {\n            // Ordinary relation blocks are distributed across shards\n            false\n        } else if key.is_rel_size_key() {\n            // All shards maintain rel size keys (although only shard 0 is responsible for\n            // keeping it strictly accurate, other shards just reflect the highest block they've ingested)\n            true\n        } else {\n            // For everything else, we assume it must be kept everywhere, because ingest code\n            // might assume this -- this covers functionality where the ingest code has\n            // not (yet) been made fully shard aware.\n            true\n        }\n    }\n\n    /// Return true if the key should be discarded if found in this shard's\n    /// data store, e.g. during compaction after a split.\n    ///\n    /// Shards _may_ drop keys which return false here, but are not obliged to.\n    pub fn is_key_disposable(&self, key: &Key) -> bool {\n        if self.count < ShardCount(2) {\n            // Fast path: unsharded tenant doesn't dispose of anything\n            return false;\n        }\n\n        if self.is_key_global(key) {\n            false\n        } else {\n            !self.is_key_local(key)\n        }\n    }\n\n    /// Obtains the shard number and count combined into a `ShardIndex`.\n    pub fn shard_index(&self) -> ShardIndex {\n        ShardIndex {\n            shard_count: self.count,\n            shard_number: self.number,\n        }\n    }\n\n    pub fn shard_slug(&self) -> String {\n        if self.count > ShardCount(0) {\n            format!(\"-{:02x}{:02x}\", self.number.0, self.count.0)\n        } else {\n            String::new()\n        }\n    }\n\n    /// Convenience for checking if this identity is the 0th shard in a tenant,\n    /// for special cases on shard 0 such as ingesting relation sizes.\n    pub fn is_shard_zero(&self) -> bool {\n        self.number == ShardNumber(0)\n    }\n}\n\n/// Whether this key is always held on shard 0 (e.g. shard 0 holds all SLRU keys\n/// in order to be able to serve basebackup requests without peer communication).\nfn key_is_shard0(key: &Key) -> bool {\n    // To decide what to shard out to shards >0, we apply a simple rule that only\n    // relation pages are distributed to shards other than shard zero. Everything else gets\n    // stored on shard 0.  This guarantees that shard 0 can independently serve basebackup\n    // requests, and any request other than those for particular blocks in relations.\n    //\n    // The only exception to this rule is \"initfork\" data -- this relates to postgres's UNLOGGED table\n    // type. These are special relations, usually with only 0 or 1 blocks, and we store them on shard 0\n    // because they must be included in basebackups.\n    let is_initfork = key.field5 == INIT_FORKNUM;\n\n    !key.is_rel_block_key() || is_initfork\n}\n\n/// Provide the same result as the function in postgres `hashfn.h` with the same name\nfn murmurhash32(mut h: u32) -> u32 {\n    h ^= h >> 16;\n    h = h.wrapping_mul(0x85ebca6b);\n    h ^= h >> 13;\n    h = h.wrapping_mul(0xc2b2ae35);\n    h ^= h >> 16;\n    h\n}\n\n/// Provide the same result as the function in postgres `hashfn.h` with the same name\nfn hash_combine(mut a: u32, mut b: u32) -> u32 {\n    b = b.wrapping_add(0x9e3779b9);\n    b = b.wrapping_add(a << 6);\n    b = b.wrapping_add(a >> 2);\n\n    a ^= b;\n    a\n}\n\n/// Where a Key is to be distributed across shards, select the shard.  This function\n/// does not account for keys that should be broadcast across shards.\n///\n/// The hashing in this function must exactly match what we do in postgres smgr\n/// code.  The resulting distribution of pages is intended to preserve locality within\n/// `stripe_size` ranges of contiguous block numbers in the same relation, while otherwise\n/// distributing data pseudo-randomly.\n///\n/// The mapping of key to shard is not stable across changes to ShardCount: this is intentional\n/// and will be handled at higher levels when shards are split.\npub fn key_to_shard_number(\n    count: ShardCount,\n    stripe_size: ShardStripeSize,\n    key: &Key,\n) -> ShardNumber {\n    // Fast path for un-sharded tenants or broadcast keys\n    if count < ShardCount(2) || key_is_shard0(key) {\n        return ShardNumber(0);\n    }\n\n    // relNode\n    let mut hash = murmurhash32(key.field4);\n    // blockNum/stripe size\n    hash = hash_combine(hash, murmurhash32(key.field6 / stripe_size.0));\n\n    ShardNumber((hash % count.0 as u32) as u8)\n}\n\n/// For debugging, while not exposing the internals.\n#[derive(Debug)]\n#[allow(unused)] // used by debug formatting by pagectl\nstruct KeyShardingInfo {\n    shard0: bool,\n    shard_number: ShardNumber,\n}\n\npub fn describe(\n    key: &Key,\n    shard_count: ShardCount,\n    stripe_size: ShardStripeSize,\n) -> impl std::fmt::Debug {\n    KeyShardingInfo {\n        shard0: key_is_shard0(key),\n        shard_number: key_to_shard_number(shard_count, stripe_size, key),\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use std::str::FromStr;\n\n    use utils::Hex;\n    use utils::id::TenantId;\n\n    use super::*;\n\n    const EXAMPLE_TENANT_ID: &str = \"1f359dd625e519a1a4e8d7509690f6fc\";\n\n    #[test]\n    fn tenant_shard_id_string() -> Result<(), hex::FromHexError> {\n        let example = TenantShardId {\n            tenant_id: TenantId::from_str(EXAMPLE_TENANT_ID).unwrap(),\n            shard_count: ShardCount(10),\n            shard_number: ShardNumber(7),\n        };\n\n        let encoded = format!(\"{example}\");\n\n        let expected = format!(\"{EXAMPLE_TENANT_ID}-070a\");\n        assert_eq!(&encoded, &expected);\n\n        let decoded = TenantShardId::from_str(&encoded)?;\n\n        assert_eq!(example, decoded);\n\n        Ok(())\n    }\n\n    #[test]\n    fn tenant_shard_id_binary() -> Result<(), hex::FromHexError> {\n        let example = TenantShardId {\n            tenant_id: TenantId::from_str(EXAMPLE_TENANT_ID).unwrap(),\n            shard_count: ShardCount(10),\n            shard_number: ShardNumber(7),\n        };\n\n        let encoded = bincode::serialize(&example).unwrap();\n        let expected: [u8; 18] = [\n            0x1f, 0x35, 0x9d, 0xd6, 0x25, 0xe5, 0x19, 0xa1, 0xa4, 0xe8, 0xd7, 0x50, 0x96, 0x90,\n            0xf6, 0xfc, 0x07, 0x0a,\n        ];\n        assert_eq!(Hex(&encoded), Hex(&expected));\n\n        let decoded = bincode::deserialize(&encoded).unwrap();\n\n        assert_eq!(example, decoded);\n\n        Ok(())\n    }\n\n    #[test]\n    fn tenant_shard_id_backward_compat() -> Result<(), hex::FromHexError> {\n        // Test that TenantShardId can decode a TenantId in human\n        // readable form\n        let example = TenantId::from_str(EXAMPLE_TENANT_ID).unwrap();\n        let encoded = format!(\"{example}\");\n\n        assert_eq!(&encoded, EXAMPLE_TENANT_ID);\n\n        let decoded = TenantShardId::from_str(&encoded)?;\n\n        assert_eq!(example, decoded.tenant_id);\n        assert_eq!(decoded.shard_count, ShardCount(0));\n        assert_eq!(decoded.shard_number, ShardNumber(0));\n\n        Ok(())\n    }\n\n    #[test]\n    fn tenant_shard_id_forward_compat() -> Result<(), hex::FromHexError> {\n        // Test that a legacy TenantShardId encodes into a form that\n        // can be decoded as TenantId\n        let example_tenant_id = TenantId::from_str(EXAMPLE_TENANT_ID).unwrap();\n        let example = TenantShardId::unsharded(example_tenant_id);\n        let encoded = format!(\"{example}\");\n\n        assert_eq!(&encoded, EXAMPLE_TENANT_ID);\n\n        let decoded = TenantId::from_str(&encoded)?;\n\n        assert_eq!(example_tenant_id, decoded);\n\n        Ok(())\n    }\n\n    #[test]\n    fn tenant_shard_id_legacy_binary() -> Result<(), hex::FromHexError> {\n        // Unlike in human readable encoding, binary encoding does not\n        // do any special handling of legacy unsharded TenantIds: this test\n        // is equivalent to the main test for binary encoding, just verifying\n        // that the same behavior applies when we have used `unsharded()` to\n        // construct a TenantShardId.\n        let example = TenantShardId::unsharded(TenantId::from_str(EXAMPLE_TENANT_ID).unwrap());\n        let encoded = bincode::serialize(&example).unwrap();\n\n        let expected: [u8; 18] = [\n            0x1f, 0x35, 0x9d, 0xd6, 0x25, 0xe5, 0x19, 0xa1, 0xa4, 0xe8, 0xd7, 0x50, 0x96, 0x90,\n            0xf6, 0xfc, 0x00, 0x00,\n        ];\n        assert_eq!(Hex(&encoded), Hex(&expected));\n\n        let decoded = bincode::deserialize::<TenantShardId>(&encoded).unwrap();\n        assert_eq!(example, decoded);\n\n        Ok(())\n    }\n\n    #[test]\n    fn shard_identity_validation() -> Result<(), ShardConfigError> {\n        // Happy cases\n        ShardIdentity::new(ShardNumber(0), ShardCount(1), DEFAULT_STRIPE_SIZE)?;\n        ShardIdentity::new(ShardNumber(0), ShardCount(1), ShardStripeSize(1))?;\n        ShardIdentity::new(ShardNumber(254), ShardCount(255), ShardStripeSize(1))?;\n\n        assert_eq!(\n            ShardIdentity::new(ShardNumber(0), ShardCount(0), DEFAULT_STRIPE_SIZE),\n            Err(ShardConfigError::InvalidCount)\n        );\n        assert_eq!(\n            ShardIdentity::new(ShardNumber(10), ShardCount(10), DEFAULT_STRIPE_SIZE),\n            Err(ShardConfigError::InvalidNumber)\n        );\n        assert_eq!(\n            ShardIdentity::new(ShardNumber(11), ShardCount(10), DEFAULT_STRIPE_SIZE),\n            Err(ShardConfigError::InvalidNumber)\n        );\n        assert_eq!(\n            ShardIdentity::new(ShardNumber(255), ShardCount(255), DEFAULT_STRIPE_SIZE),\n            Err(ShardConfigError::InvalidNumber)\n        );\n        assert_eq!(\n            ShardIdentity::new(ShardNumber(0), ShardCount(1), ShardStripeSize(0)),\n            Err(ShardConfigError::InvalidStripeSize)\n        );\n\n        Ok(())\n    }\n\n    #[test]\n    fn shard_index_human_encoding() -> Result<(), hex::FromHexError> {\n        let example = ShardIndex {\n            shard_number: ShardNumber(13),\n            shard_count: ShardCount(17),\n        };\n        let expected: String = \"0d11\".to_string();\n        let encoded = format!(\"{example}\");\n        assert_eq!(&encoded, &expected);\n\n        let decoded = ShardIndex::from_str(&encoded)?;\n        assert_eq!(example, decoded);\n        Ok(())\n    }\n\n    #[test]\n    fn shard_index_binary_encoding() -> Result<(), hex::FromHexError> {\n        let example = ShardIndex {\n            shard_number: ShardNumber(13),\n            shard_count: ShardCount(17),\n        };\n        let expected: [u8; 2] = [0x0d, 0x11];\n\n        let encoded = bincode::serialize(&example).unwrap();\n        assert_eq!(Hex(&encoded), Hex(&expected));\n        let decoded = bincode::deserialize(&encoded).unwrap();\n        assert_eq!(example, decoded);\n\n        Ok(())\n    }\n\n    // These are only smoke tests to spot check that our implementation doesn't\n    // deviate from a few examples values: not aiming to validate the overall\n    // hashing algorithm.\n    #[test]\n    fn murmur_hash() {\n        assert_eq!(murmurhash32(0), 0);\n\n        assert_eq!(hash_combine(0xb1ff3b40, 0), 0xfb7923c9);\n    }\n\n    #[test]\n    fn shard_mapping() {\n        let key = Key {\n            field1: 0x00,\n            field2: 0x67f,\n            field3: 0x5,\n            field4: 0x400c,\n            field5: 0x00,\n            field6: 0x7d06,\n        };\n\n        let shard = key_to_shard_number(ShardCount(10), ShardStripeSize(32768), &key);\n        assert_eq!(shard, ShardNumber(8));\n    }\n\n    #[test]\n    fn shard_id_split() {\n        let tenant_id = TenantId::generate();\n        let parent = TenantShardId::unsharded(tenant_id);\n\n        // Unsharded into 2\n        assert_eq!(\n            parent.split(ShardCount(2)),\n            vec![\n                TenantShardId {\n                    tenant_id,\n                    shard_count: ShardCount(2),\n                    shard_number: ShardNumber(0)\n                },\n                TenantShardId {\n                    tenant_id,\n                    shard_count: ShardCount(2),\n                    shard_number: ShardNumber(1)\n                }\n            ]\n        );\n\n        // Unsharded into 4\n        assert_eq!(\n            parent.split(ShardCount(4)),\n            vec![\n                TenantShardId {\n                    tenant_id,\n                    shard_count: ShardCount(4),\n                    shard_number: ShardNumber(0)\n                },\n                TenantShardId {\n                    tenant_id,\n                    shard_count: ShardCount(4),\n                    shard_number: ShardNumber(1)\n                },\n                TenantShardId {\n                    tenant_id,\n                    shard_count: ShardCount(4),\n                    shard_number: ShardNumber(2)\n                },\n                TenantShardId {\n                    tenant_id,\n                    shard_count: ShardCount(4),\n                    shard_number: ShardNumber(3)\n                }\n            ]\n        );\n\n        // count=1 into 2 (check this works the same as unsharded.)\n        let parent = TenantShardId {\n            tenant_id,\n            shard_count: ShardCount(1),\n            shard_number: ShardNumber(0),\n        };\n        assert_eq!(\n            parent.split(ShardCount(2)),\n            vec![\n                TenantShardId {\n                    tenant_id,\n                    shard_count: ShardCount(2),\n                    shard_number: ShardNumber(0)\n                },\n                TenantShardId {\n                    tenant_id,\n                    shard_count: ShardCount(2),\n                    shard_number: ShardNumber(1)\n                }\n            ]\n        );\n\n        // count=2 into count=8\n        let parent = TenantShardId {\n            tenant_id,\n            shard_count: ShardCount(2),\n            shard_number: ShardNumber(1),\n        };\n        assert_eq!(\n            parent.split(ShardCount(8)),\n            vec![\n                TenantShardId {\n                    tenant_id,\n                    shard_count: ShardCount(8),\n                    shard_number: ShardNumber(1)\n                },\n                TenantShardId {\n                    tenant_id,\n                    shard_count: ShardCount(8),\n                    shard_number: ShardNumber(3)\n                },\n                TenantShardId {\n                    tenant_id,\n                    shard_count: ShardCount(8),\n                    shard_number: ShardNumber(5)\n                },\n                TenantShardId {\n                    tenant_id,\n                    shard_count: ShardCount(8),\n                    shard_number: ShardNumber(7)\n                },\n            ]\n        );\n    }\n}\n"
  },
  {
    "path": "libs/pageserver_api/src/upcall_api.rs",
    "content": "//! Types in this file are for pageserver's upward-facing API calls to the storage controller,\n//! required for acquiring and validating tenant generation numbers.\n//!\n//! See docs/rfcs/025-generation-numbers.md\n\nuse serde::{Deserialize, Serialize};\nuse utils::generation::Generation;\nuse utils::id::{NodeId, TimelineId};\n\nuse crate::controller_api::NodeRegisterRequest;\nuse crate::models::{LocationConfigMode, ShardImportStatus};\nuse crate::shard::{ShardStripeSize, TenantShardId};\n\n/// Upcall message sent by the pageserver to the configured `control_plane_api` on\n/// startup.\n#[derive(Serialize, Deserialize)]\npub struct ReAttachRequest {\n    pub node_id: NodeId,\n\n    /// Optional inline self-registration: this is useful with the storage controller,\n    /// if the node already has a node_id set.\n    #[serde(skip_serializing_if = \"Option::is_none\", default)]\n    pub register: Option<NodeRegisterRequest>,\n\n    /// Hadron: Optional flag to indicate whether the node is starting with an empty local disk.\n    /// Will be set to true if the node couldn't find any local tenant data on startup, could be\n    /// due to the node starting for the first time or due to a local SSD failure/disk wipe event.\n    /// The flag may be used by the storage controller to update its observed state of the world\n    /// to make sure that it sends explicit location_config calls to the node following the\n    /// re-attach request.\n    pub empty_local_disk: Option<bool>,\n}\n\n#[derive(Serialize, Deserialize, Debug)]\npub struct ReAttachResponseTenant {\n    pub id: TenantShardId,\n    /// Mandatory if LocationConfigMode is None or set to an Attached* mode\n    pub r#gen: Option<u32>,\n    pub mode: LocationConfigMode,\n    pub stripe_size: ShardStripeSize,\n}\n#[derive(Serialize, Deserialize)]\npub struct ReAttachResponse {\n    pub tenants: Vec<ReAttachResponseTenant>,\n}\n\n#[derive(Serialize, Deserialize)]\npub struct ValidateRequestTenant {\n    pub id: TenantShardId,\n    pub r#gen: u32,\n}\n\n#[derive(Serialize, Deserialize)]\npub struct ValidateRequest {\n    pub tenants: Vec<ValidateRequestTenant>,\n}\n\n#[derive(Serialize, Deserialize)]\npub struct ValidateResponse {\n    pub tenants: Vec<ValidateResponseTenant>,\n}\n\n#[derive(Serialize, Deserialize)]\npub struct ValidateResponseTenant {\n    pub id: TenantShardId,\n    pub valid: bool,\n}\n\n#[derive(Serialize, Deserialize)]\npub struct TimelineImportStatusRequest {\n    pub tenant_shard_id: TenantShardId,\n    pub timeline_id: TimelineId,\n    pub generation: Generation,\n}\n\n#[derive(Serialize, Deserialize)]\npub struct PutTimelineImportStatusRequest {\n    pub tenant_shard_id: TenantShardId,\n    pub timeline_id: TimelineId,\n    pub status: ShardImportStatus,\n    pub generation: Generation,\n}\n"
  },
  {
    "path": "libs/postgres_backend/Cargo.toml",
    "content": "[package]\nname = \"postgres_backend\"\nversion = \"0.1.0\"\nedition.workspace = true\nlicense.workspace = true\n\n[dependencies]\nanyhow.workspace = true\nbytes.workspace = true\nrustls.workspace = true\nserde.workspace = true\nthiserror.workspace = true\ntokio.workspace = true\ntokio-util.workspace = true\ntokio-rustls.workspace = true\ntracing.workspace = true\n\npq_proto.workspace = true\n\n[dev-dependencies]\nonce_cell.workspace = true\nrustls-pemfile.workspace = true\ntokio-postgres.workspace = true\ntokio-postgres-rustls.workspace = true\n"
  },
  {
    "path": "libs/postgres_backend/src/lib.rs",
    "content": "//! Server-side asynchronous Postgres connection, as limited as we need.\n//! To use, create PostgresBackend and run() it, passing the Handler\n//! implementation determining how to process the queries. Currently its API\n//! is rather narrow, but we can extend it once required.\n#![deny(unsafe_code)]\n#![deny(clippy::undocumented_unsafe_blocks)]\nuse std::future::Future;\nuse std::net::SocketAddr;\nuse std::os::fd::{AsRawFd, RawFd};\nuse std::pin::Pin;\nuse std::str::FromStr;\nuse std::sync::Arc;\nuse std::task::{Poll, ready};\nuse std::{fmt, io};\n\nuse anyhow::Context;\nuse bytes::Bytes;\nuse pq_proto::framed::{ConnectionError, Framed, FramedReader, FramedWriter};\nuse pq_proto::{\n    BeMessage, FeMessage, FeStartupPacket, ProtocolError, SQLSTATE_ADMIN_SHUTDOWN,\n    SQLSTATE_INTERNAL_ERROR, SQLSTATE_SUCCESSFUL_COMPLETION,\n};\nuse serde::{Deserialize, Serialize};\nuse tokio::io::{AsyncRead, AsyncWrite};\nuse tokio_rustls::TlsAcceptor;\nuse tokio_util::sync::CancellationToken;\nuse tracing::{debug, error, info, trace, warn};\n\n/// An error, occurred during query processing:\n/// either during the connection ([`ConnectionError`]) or before/after it.\n#[derive(thiserror::Error, Debug)]\npub enum QueryError {\n    /// The connection was lost while processing the query.\n    #[error(transparent)]\n    Disconnected(#[from] ConnectionError),\n    /// We were instructed to shutdown while processing the query\n    #[error(\"Shutting down\")]\n    Shutdown,\n    /// Query handler indicated that client should reconnect\n    #[error(\"Server requested reconnect\")]\n    Reconnect,\n    /// Query named an entity that was not found\n    #[error(\"Not found: {0}\")]\n    NotFound(std::borrow::Cow<'static, str>),\n    /// Authentication failure\n    #[error(\"Unauthorized: {0}\")]\n    Unauthorized(std::borrow::Cow<'static, str>),\n    #[error(\"Simulated Connection Error\")]\n    SimulatedConnectionError,\n    /// Some other error\n    #[error(transparent)]\n    Other(#[from] anyhow::Error),\n}\n\nimpl From<io::Error> for QueryError {\n    fn from(e: io::Error) -> Self {\n        Self::Disconnected(ConnectionError::Io(e))\n    }\n}\n\nimpl QueryError {\n    pub fn pg_error_code(&self) -> &'static [u8; 5] {\n        match self {\n            Self::Disconnected(_) | Self::SimulatedConnectionError | Self::Reconnect => b\"08006\", // connection failure\n            Self::Shutdown => SQLSTATE_ADMIN_SHUTDOWN,\n            Self::Unauthorized(_) | Self::NotFound(_) => SQLSTATE_INTERNAL_ERROR,\n            Self::Other(_) => SQLSTATE_INTERNAL_ERROR, // internal error\n        }\n    }\n}\n\n/// Returns true if the given error is a normal consequence of a network issue,\n/// or the client closing the connection.\n///\n/// These errors can happen during normal operations,\n/// and don't indicate a bug in our code.\npub fn is_expected_io_error(e: &io::Error) -> bool {\n    use io::ErrorKind::*;\n    matches!(\n        e.kind(),\n        HostUnreachable\n            | NetworkUnreachable\n            | BrokenPipe\n            | ConnectionRefused\n            | ConnectionAborted\n            | ConnectionReset\n            | TimedOut,\n    )\n}\n\npub trait Handler<IO> {\n    /// Handle single query.\n    /// postgres_backend will issue ReadyForQuery after calling this (this\n    /// might be not what we want after CopyData streaming, but currently we don't\n    /// care). It will also flush out the output buffer.\n    fn process_query(\n        &mut self,\n        pgb: &mut PostgresBackend<IO>,\n        query_string: &str,\n    ) -> impl Future<Output = Result<(), QueryError>>;\n\n    /// Called on startup packet receival, allows to process params.\n    ///\n    /// If Ok(false) is returned postgres_backend will skip auth -- that is needed for new users\n    /// creation is the proxy code. That is quite hacky and ad-hoc solution, may be we could allow\n    /// to override whole init logic in implementations.\n    fn startup(\n        &mut self,\n        _pgb: &mut PostgresBackend<IO>,\n        _sm: &FeStartupPacket,\n    ) -> Result<(), QueryError> {\n        Ok(())\n    }\n\n    /// Check auth jwt\n    fn check_auth_jwt(\n        &mut self,\n        _pgb: &mut PostgresBackend<IO>,\n        _jwt_response: &[u8],\n    ) -> Result<(), QueryError> {\n        Err(QueryError::Other(anyhow::anyhow!(\"JWT auth failed\")))\n    }\n}\n\n/// PostgresBackend protocol state.\n/// XXX: The order of the constructors matters.\n#[derive(Clone, Copy, PartialEq, Eq, PartialOrd)]\npub enum ProtoState {\n    /// Nothing happened yet.\n    Initialization,\n    /// Encryption handshake is done; waiting for encrypted Startup message.\n    Encrypted,\n    /// Waiting for password (auth token).\n    Authentication,\n    /// Performed handshake and auth, ReadyForQuery is issued.\n    Established,\n    Closed,\n}\n\n#[derive(Clone, Copy)]\npub enum ProcessMsgResult {\n    Continue,\n    Break,\n}\n\n/// Either plain TCP stream or encrypted one, implementing AsyncRead + AsyncWrite.\npub enum MaybeTlsStream<IO> {\n    Unencrypted(IO),\n    Tls(Box<tokio_rustls::server::TlsStream<IO>>),\n}\n\nimpl<IO: AsyncRead + AsyncWrite + Unpin> AsyncWrite for MaybeTlsStream<IO> {\n    fn poll_write(\n        self: Pin<&mut Self>,\n        cx: &mut std::task::Context<'_>,\n        buf: &[u8],\n    ) -> Poll<io::Result<usize>> {\n        match self.get_mut() {\n            Self::Unencrypted(stream) => Pin::new(stream).poll_write(cx, buf),\n            Self::Tls(stream) => Pin::new(stream).poll_write(cx, buf),\n        }\n    }\n    fn poll_flush(self: Pin<&mut Self>, cx: &mut std::task::Context<'_>) -> Poll<io::Result<()>> {\n        match self.get_mut() {\n            Self::Unencrypted(stream) => Pin::new(stream).poll_flush(cx),\n            Self::Tls(stream) => Pin::new(stream).poll_flush(cx),\n        }\n    }\n    fn poll_shutdown(\n        self: Pin<&mut Self>,\n        cx: &mut std::task::Context<'_>,\n    ) -> Poll<io::Result<()>> {\n        match self.get_mut() {\n            Self::Unencrypted(stream) => Pin::new(stream).poll_shutdown(cx),\n            Self::Tls(stream) => Pin::new(stream).poll_shutdown(cx),\n        }\n    }\n}\nimpl<IO: AsyncRead + AsyncWrite + Unpin> AsyncRead for MaybeTlsStream<IO> {\n    fn poll_read(\n        self: Pin<&mut Self>,\n        cx: &mut std::task::Context<'_>,\n        buf: &mut tokio::io::ReadBuf<'_>,\n    ) -> Poll<io::Result<()>> {\n        match self.get_mut() {\n            Self::Unencrypted(stream) => Pin::new(stream).poll_read(cx, buf),\n            Self::Tls(stream) => Pin::new(stream).poll_read(cx, buf),\n        }\n    }\n}\n\n#[derive(Debug, PartialEq, Eq, Clone, Copy, Serialize, Deserialize)]\npub enum AuthType {\n    Trust,\n    // This mimics postgres's AuthenticationCleartextPassword but instead of password expects JWT\n    NeonJWT,\n}\n\nimpl FromStr for AuthType {\n    type Err = anyhow::Error;\n\n    fn from_str(s: &str) -> Result<Self, Self::Err> {\n        match s {\n            \"Trust\" => Ok(Self::Trust),\n            \"NeonJWT\" => Ok(Self::NeonJWT),\n            _ => anyhow::bail!(\"invalid value \\\"{s}\\\" for auth type\"),\n        }\n    }\n}\n\nimpl fmt::Display for AuthType {\n    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {\n        f.write_str(match self {\n            AuthType::Trust => \"Trust\",\n            AuthType::NeonJWT => \"NeonJWT\",\n        })\n    }\n}\n\n/// Either full duplex Framed or write only half; the latter is left in\n/// PostgresBackend after call to `split`. In principle we could always store a\n/// pair of splitted handles, but that would force to to pay splitting price\n/// (Arc and kinda mutex inside polling) for all uses (e.g. pageserver).\nenum MaybeWriteOnly<IO> {\n    Full(Framed<MaybeTlsStream<IO>>),\n    WriteOnly(FramedWriter<MaybeTlsStream<IO>>),\n    Broken, // temporary value palmed off during the split\n}\n\nimpl<IO: AsyncRead + AsyncWrite + Unpin> MaybeWriteOnly<IO> {\n    async fn read_startup_message(&mut self) -> Result<Option<FeStartupPacket>, ConnectionError> {\n        match self {\n            MaybeWriteOnly::Full(framed) => framed.read_startup_message().await,\n            MaybeWriteOnly::WriteOnly(_) => {\n                Err(io::Error::other(\"reading from write only half\").into())\n            }\n            MaybeWriteOnly::Broken => panic!(\"IO on invalid MaybeWriteOnly\"),\n        }\n    }\n\n    async fn read_message(&mut self) -> Result<Option<FeMessage>, ConnectionError> {\n        match self {\n            MaybeWriteOnly::Full(framed) => framed.read_message().await,\n            MaybeWriteOnly::WriteOnly(_) => {\n                Err(io::Error::other(\"reading from write only half\").into())\n            }\n            MaybeWriteOnly::Broken => panic!(\"IO on invalid MaybeWriteOnly\"),\n        }\n    }\n\n    fn write_message_noflush(&mut self, msg: &BeMessage<'_>) -> Result<(), ProtocolError> {\n        match self {\n            MaybeWriteOnly::Full(framed) => framed.write_message(msg),\n            MaybeWriteOnly::WriteOnly(framed_writer) => framed_writer.write_message_noflush(msg),\n            MaybeWriteOnly::Broken => panic!(\"IO on invalid MaybeWriteOnly\"),\n        }\n    }\n\n    async fn flush(&mut self) -> io::Result<()> {\n        match self {\n            MaybeWriteOnly::Full(framed) => framed.flush().await,\n            MaybeWriteOnly::WriteOnly(framed_writer) => framed_writer.flush().await,\n            MaybeWriteOnly::Broken => panic!(\"IO on invalid MaybeWriteOnly\"),\n        }\n    }\n\n    /// Cancellation safe as long as the underlying IO is cancellation safe.\n    async fn shutdown(&mut self) -> io::Result<()> {\n        match self {\n            MaybeWriteOnly::Full(framed) => framed.shutdown().await,\n            MaybeWriteOnly::WriteOnly(framed_writer) => framed_writer.shutdown().await,\n            MaybeWriteOnly::Broken => panic!(\"IO on invalid MaybeWriteOnly\"),\n        }\n    }\n}\n\npub struct PostgresBackend<IO> {\n    pub socket_fd: RawFd,\n    framed: MaybeWriteOnly<IO>,\n\n    pub state: ProtoState,\n\n    auth_type: AuthType,\n\n    peer_addr: SocketAddr,\n    pub tls_config: Option<Arc<rustls::ServerConfig>>,\n}\n\npub type PostgresBackendTCP = PostgresBackend<tokio::net::TcpStream>;\n\n/// Cast a byte slice to a string slice, dropping null terminator if there's one.\nfn cstr_to_str(bytes: &[u8]) -> anyhow::Result<&str> {\n    let without_null = bytes.strip_suffix(&[0]).unwrap_or(bytes);\n    std::str::from_utf8(without_null).map_err(|e| e.into())\n}\n\nimpl PostgresBackend<tokio::net::TcpStream> {\n    pub fn new(\n        socket: tokio::net::TcpStream,\n        auth_type: AuthType,\n        tls_config: Option<Arc<rustls::ServerConfig>>,\n    ) -> io::Result<Self> {\n        let peer_addr = socket.peer_addr()?;\n        let socket_fd = socket.as_raw_fd();\n        let stream = MaybeTlsStream::Unencrypted(socket);\n\n        Ok(Self {\n            socket_fd,\n            framed: MaybeWriteOnly::Full(Framed::new(stream)),\n            state: ProtoState::Initialization,\n            auth_type,\n            tls_config,\n            peer_addr,\n        })\n    }\n}\n\nimpl<IO: AsyncRead + AsyncWrite + Unpin> PostgresBackend<IO> {\n    pub fn new_from_io(\n        socket_fd: RawFd,\n        socket: IO,\n        peer_addr: SocketAddr,\n        auth_type: AuthType,\n        tls_config: Option<Arc<rustls::ServerConfig>>,\n    ) -> io::Result<Self> {\n        let stream = MaybeTlsStream::Unencrypted(socket);\n\n        Ok(Self {\n            socket_fd,\n            framed: MaybeWriteOnly::Full(Framed::new(stream)),\n            state: ProtoState::Initialization,\n            auth_type,\n            tls_config,\n            peer_addr,\n        })\n    }\n\n    pub fn get_peer_addr(&self) -> &SocketAddr {\n        &self.peer_addr\n    }\n\n    /// Read full message or return None if connection is cleanly closed with no\n    /// unprocessed data.\n    pub async fn read_message(&mut self) -> Result<Option<FeMessage>, ConnectionError> {\n        if let ProtoState::Closed = self.state {\n            Ok(None)\n        } else {\n            match self.framed.read_message().await {\n                Ok(m) => {\n                    trace!(\"read msg {:?}\", m);\n                    Ok(m)\n                }\n                Err(e) => {\n                    // remember not to try to read anymore\n                    self.state = ProtoState::Closed;\n                    Err(e)\n                }\n            }\n        }\n    }\n\n    /// Write message into internal output buffer, doesn't flush it. Technically\n    /// error type can be only ProtocolError here (if, unlikely, serialization\n    /// fails), but callers typically wrap it anyway.\n    pub fn write_message_noflush(\n        &mut self,\n        message: &BeMessage<'_>,\n    ) -> Result<&mut Self, ConnectionError> {\n        self.framed.write_message_noflush(message)?;\n        trace!(\"wrote msg {:?}\", message);\n        Ok(self)\n    }\n\n    /// Flush output buffer into the socket.\n    pub async fn flush(&mut self) -> io::Result<()> {\n        self.framed.flush().await\n    }\n\n    /// Polling version of `flush()`, saves the caller need to pin.\n    pub fn poll_flush(\n        &mut self,\n        cx: &mut std::task::Context<'_>,\n    ) -> Poll<Result<(), std::io::Error>> {\n        let flush_fut = std::pin::pin!(self.flush());\n        flush_fut.poll(cx)\n    }\n\n    /// Write message into internal output buffer and flush it to the stream.\n    pub async fn write_message(\n        &mut self,\n        message: &BeMessage<'_>,\n    ) -> Result<&mut Self, ConnectionError> {\n        self.write_message_noflush(message)?;\n        self.flush().await?;\n        Ok(self)\n    }\n\n    /// Returns an AsyncWrite implementation that wraps all the data written\n    /// to it in CopyData messages, and writes them to the connection\n    ///\n    /// The caller is responsible for sending CopyOutResponse and CopyDone messages.\n    pub fn copyout_writer(&mut self) -> CopyDataWriter<IO> {\n        CopyDataWriter { pgb: self }\n    }\n\n    /// Wrapper for run_message_loop() that shuts down socket when we are done\n    pub async fn run(\n        mut self,\n        handler: &mut impl Handler<IO>,\n        cancel: &CancellationToken,\n    ) -> Result<(), QueryError> {\n        let ret = self.run_message_loop(handler, cancel).await;\n\n        tokio::select! {\n            _ = cancel.cancelled() => {\n                // do nothing; we most likely got already stopped by shutdown and will log it next.\n            }\n            _ = self.framed.shutdown() => {\n                // socket might be already closed, e.g. if previously received error,\n                // so ignore result.\n            },\n        }\n\n        match ret {\n            Ok(()) => Ok(()),\n            Err(QueryError::Shutdown) => {\n                info!(\"Stopped due to shutdown\");\n                Ok(())\n            }\n            Err(QueryError::Reconnect) => {\n                // Dropping out of this loop implicitly disconnects\n                info!(\"Stopped due to handler reconnect request\");\n                Ok(())\n            }\n            Err(QueryError::Disconnected(e)) => {\n                info!(\"Disconnected ({e:#})\");\n                // Disconnection is not an error: we just use it that way internally to drop\n                // out of loops.\n                Ok(())\n            }\n            e => e,\n        }\n    }\n\n    async fn run_message_loop(\n        &mut self,\n        handler: &mut impl Handler<IO>,\n        cancel: &CancellationToken,\n    ) -> Result<(), QueryError> {\n        trace!(\"postgres backend to {:?} started\", self.peer_addr);\n\n        tokio::select!(\n            biased;\n\n            _ = cancel.cancelled() => {\n                // We were requested to shut down.\n                tracing::info!(\"shutdown request received during handshake\");\n                return Err(QueryError::Shutdown)\n            },\n\n            handshake_r = self.handshake(handler) => {\n                handshake_r?;\n            }\n        );\n\n        // Authentication completed\n        let mut query_string = Bytes::new();\n        while let Some(msg) = tokio::select!(\n            biased;\n            _ = cancel.cancelled() => {\n                // We were requested to shut down.\n                tracing::info!(\"shutdown request received in run_message_loop\");\n                return Err(QueryError::Shutdown)\n            },\n            msg = self.read_message() => { msg },\n        )? {\n            trace!(\"got message {:?}\", msg);\n\n            let result = self.process_message(handler, msg, &mut query_string).await;\n            tokio::select!(\n                biased;\n                _ = cancel.cancelled() => {\n                    // We were requested to shut down.\n                    tracing::info!(\"shutdown request received during response flush\");\n\n                    // If we exited process_message with a shutdown error, there may be\n                    // some valid response content on in our transmit buffer: permit sending\n                    // this within a short timeout.  This is a best effort thing so we don't\n                    // care about the result.\n                    tokio::time::timeout(std::time::Duration::from_millis(500), self.flush()).await.ok();\n\n                    return Err(QueryError::Shutdown)\n                },\n                flush_r = self.flush() => {\n                    flush_r?;\n                }\n            );\n\n            match result? {\n                ProcessMsgResult::Continue => {\n                    continue;\n                }\n                ProcessMsgResult::Break => break,\n            }\n        }\n\n        trace!(\"postgres backend to {:?} exited\", self.peer_addr);\n        Ok(())\n    }\n\n    /// Try to upgrade MaybeTlsStream into actual TLS one, performing handshake.\n    async fn tls_upgrade(\n        src: MaybeTlsStream<IO>,\n        tls_config: Arc<rustls::ServerConfig>,\n    ) -> anyhow::Result<MaybeTlsStream<IO>> {\n        match src {\n            MaybeTlsStream::Unencrypted(s) => {\n                let acceptor = TlsAcceptor::from(tls_config);\n                let tls_stream = acceptor.accept(s).await?;\n                Ok(MaybeTlsStream::Tls(Box::new(tls_stream)))\n            }\n            MaybeTlsStream::Tls(_) => {\n                anyhow::bail!(\"TLS already started\");\n            }\n        }\n    }\n\n    async fn start_tls(&mut self) -> anyhow::Result<()> {\n        // temporary replace stream with fake to cook TLS one, Indiana Jones style\n        match std::mem::replace(&mut self.framed, MaybeWriteOnly::Broken) {\n            MaybeWriteOnly::Full(framed) => {\n                let tls_config = self\n                    .tls_config\n                    .as_ref()\n                    .context(\"start_tls called without conf\")?\n                    .clone();\n                let tls_framed = framed\n                    .map_stream(|s| PostgresBackend::tls_upgrade(s, tls_config))\n                    .await?;\n                // push back ready TLS stream\n                self.framed = MaybeWriteOnly::Full(tls_framed);\n                Ok(())\n            }\n            MaybeWriteOnly::WriteOnly(_) => {\n                anyhow::bail!(\"TLS upgrade attempt in split state\")\n            }\n            MaybeWriteOnly::Broken => panic!(\"TLS upgrade on framed in invalid state\"),\n        }\n    }\n\n    /// Split off owned read part from which messages can be read in different\n    /// task/thread.\n    pub fn split(&mut self) -> anyhow::Result<PostgresBackendReader<IO>> {\n        // temporary replace stream with fake to cook split one, Indiana Jones style\n        match std::mem::replace(&mut self.framed, MaybeWriteOnly::Broken) {\n            MaybeWriteOnly::Full(framed) => {\n                let (reader, writer) = framed.split();\n                self.framed = MaybeWriteOnly::WriteOnly(writer);\n                Ok(PostgresBackendReader {\n                    reader,\n                    closed: false,\n                })\n            }\n            MaybeWriteOnly::WriteOnly(_) => {\n                anyhow::bail!(\"PostgresBackend is already split\")\n            }\n            MaybeWriteOnly::Broken => panic!(\"split on framed in invalid state\"),\n        }\n    }\n\n    /// Join read part back.\n    pub fn unsplit(&mut self, reader: PostgresBackendReader<IO>) -> anyhow::Result<()> {\n        // temporary replace stream with fake to cook joined one, Indiana Jones style\n        match std::mem::replace(&mut self.framed, MaybeWriteOnly::Broken) {\n            MaybeWriteOnly::Full(_) => {\n                anyhow::bail!(\"PostgresBackend is not split\")\n            }\n            MaybeWriteOnly::WriteOnly(writer) => {\n                let joined = Framed::unsplit(reader.reader, writer);\n                self.framed = MaybeWriteOnly::Full(joined);\n                // if reader encountered connection error, do not attempt reading anymore\n                if reader.closed {\n                    self.state = ProtoState::Closed;\n                }\n                Ok(())\n            }\n            MaybeWriteOnly::Broken => panic!(\"unsplit on framed in invalid state\"),\n        }\n    }\n\n    /// Perform handshake with the client, transitioning to Established.\n    /// In case of EOF during handshake logs this, sets state to Closed and returns Ok(()).\n    async fn handshake(&mut self, handler: &mut impl Handler<IO>) -> Result<(), QueryError> {\n        while self.state < ProtoState::Authentication {\n            match self.framed.read_startup_message().await? {\n                Some(msg) => {\n                    self.process_startup_message(handler, msg).await?;\n                }\n                None => {\n                    trace!(\n                        \"postgres backend to {:?} received EOF during handshake\",\n                        self.peer_addr\n                    );\n                    self.state = ProtoState::Closed;\n                    return Err(QueryError::Disconnected(ConnectionError::Protocol(\n                        ProtocolError::Protocol(\"EOF during handshake\".to_string()),\n                    )));\n                }\n            }\n        }\n\n        // Perform auth, if needed.\n        if self.state == ProtoState::Authentication {\n            match self.framed.read_message().await? {\n                Some(FeMessage::PasswordMessage(m)) => {\n                    assert!(self.auth_type == AuthType::NeonJWT);\n\n                    let (_, jwt_response) = m.split_last().context(\"protocol violation\")?;\n\n                    if let Err(e) = handler.check_auth_jwt(self, jwt_response) {\n                        self.write_message_noflush(&BeMessage::ErrorResponse(\n                            &short_error(&e),\n                            Some(e.pg_error_code()),\n                        ))?;\n                        return Err(e);\n                    }\n\n                    self.write_message_noflush(&BeMessage::AuthenticationOk)?\n                        .write_message_noflush(&BeMessage::CLIENT_ENCODING)?\n                        .write_message(&BeMessage::ReadyForQuery)\n                        .await?;\n                    self.state = ProtoState::Established;\n                }\n                Some(m) => {\n                    return Err(QueryError::Other(anyhow::anyhow!(\n                        \"Unexpected message {:?} while waiting for handshake\",\n                        m\n                    )));\n                }\n                None => {\n                    trace!(\n                        \"postgres backend to {:?} received EOF during auth\",\n                        self.peer_addr\n                    );\n                    self.state = ProtoState::Closed;\n                    return Err(QueryError::Disconnected(ConnectionError::Protocol(\n                        ProtocolError::Protocol(\"EOF during auth\".to_string()),\n                    )));\n                }\n            }\n        }\n\n        Ok(())\n    }\n\n    /// Process startup packet:\n    /// - transition to Established if auth type is trust\n    /// - transition to Authentication if auth type is NeonJWT.\n    /// - or perform TLS handshake -- then need to call this again to receive\n    ///   actual startup packet.\n    async fn process_startup_message(\n        &mut self,\n        handler: &mut impl Handler<IO>,\n        msg: FeStartupPacket,\n    ) -> Result<(), QueryError> {\n        assert!(self.state < ProtoState::Authentication);\n        let have_tls = self.tls_config.is_some();\n        match msg {\n            FeStartupPacket::SslRequest { direct } => {\n                debug!(\"SSL requested\");\n\n                if !direct {\n                    self.write_message(&BeMessage::EncryptionResponse(have_tls))\n                        .await?;\n                } else if !have_tls {\n                    return Err(QueryError::Other(anyhow::anyhow!(\n                        \"direct SSL negotiation but no TLS support\"\n                    )));\n                }\n\n                if have_tls {\n                    self.start_tls().await?;\n                    self.state = ProtoState::Encrypted;\n                }\n            }\n            FeStartupPacket::GssEncRequest => {\n                debug!(\"GSS requested\");\n                self.write_message(&BeMessage::EncryptionResponse(false))\n                    .await?;\n            }\n            FeStartupPacket::StartupMessage { .. } => {\n                if have_tls && !matches!(self.state, ProtoState::Encrypted) {\n                    self.write_message(&BeMessage::ErrorResponse(\"must connect with TLS\", None))\n                        .await?;\n                    return Err(QueryError::Other(anyhow::anyhow!(\n                        \"client did not connect with TLS\"\n                    )));\n                }\n\n                // NB: startup() may change self.auth_type -- we are using that in proxy code\n                // to bypass auth for new users.\n                handler.startup(self, &msg)?;\n\n                match self.auth_type {\n                    AuthType::Trust => {\n                        self.write_message_noflush(&BeMessage::AuthenticationOk)?\n                            .write_message_noflush(&BeMessage::CLIENT_ENCODING)?\n                            .write_message_noflush(&BeMessage::INTEGER_DATETIMES)?\n                            // The async python driver requires a valid server_version\n                            .write_message_noflush(&BeMessage::server_version(\"14.1\"))?\n                            .write_message(&BeMessage::ReadyForQuery)\n                            .await?;\n                        self.state = ProtoState::Established;\n                    }\n                    AuthType::NeonJWT => {\n                        self.write_message(&BeMessage::AuthenticationCleartextPassword)\n                            .await?;\n                        self.state = ProtoState::Authentication;\n                    }\n                }\n            }\n            FeStartupPacket::CancelRequest { .. } => {\n                return Err(QueryError::Other(anyhow::anyhow!(\n                    \"Unexpected CancelRequest message during handshake\"\n                )));\n            }\n        }\n        Ok(())\n    }\n\n    // Proto looks like this:\n    // FeMessage::Query(\"pagestream_v2{FeMessage::CopyData(PagesetreamFeMessage::GetPage(..))}\")\n\n    async fn process_message(\n        &mut self,\n        handler: &mut impl Handler<IO>,\n        msg: FeMessage,\n        unnamed_query_string: &mut Bytes,\n    ) -> Result<ProcessMsgResult, QueryError> {\n        // Allow only startup and password messages during auth. Otherwise client would be able to bypass auth\n        // TODO: change that to proper top-level match of protocol state with separate message handling for each state\n        assert!(self.state == ProtoState::Established);\n\n        match msg {\n            FeMessage::Query(body) => {\n                // remove null terminator\n                let query_string = cstr_to_str(&body)?;\n\n                trace!(\"got query {query_string:?}\");\n                if let Err(e) = handler.process_query(self, query_string).await {\n                    match e {\n                        err @ QueryError::Shutdown => {\n                            // Notify postgres of the connection shutdown at the libpq\n                            // protocol level. This avoids postgres having to tell apart\n                            // from an idle connection and a stale one, which is bug prone.\n                            let shutdown_error = short_error(&err);\n                            self.write_message_noflush(&BeMessage::ErrorResponse(\n                                &shutdown_error,\n                                Some(err.pg_error_code()),\n                            ))?;\n\n                            return Ok(ProcessMsgResult::Break);\n                        }\n                        QueryError::SimulatedConnectionError => {\n                            return Err(QueryError::SimulatedConnectionError);\n                        }\n                        err @ QueryError::Reconnect => {\n                            // Instruct the client to reconnect, stop processing messages\n                            // from this libpq connection and, finally, disconnect from the\n                            // server side (returning an Err achieves the later).\n                            //\n                            // Note the flushing is done by the caller.\n                            let reconnect_error = short_error(&err);\n                            self.write_message_noflush(&BeMessage::ErrorResponse(\n                                &reconnect_error,\n                                Some(err.pg_error_code()),\n                            ))?;\n\n                            return Err(err);\n                        }\n                        e => {\n                            log_query_error(query_string, &e);\n                            let short_error = short_error(&e);\n                            self.write_message_noflush(&BeMessage::ErrorResponse(\n                                &short_error,\n                                Some(e.pg_error_code()),\n                            ))?;\n                        }\n                    }\n                }\n                self.write_message_noflush(&BeMessage::ReadyForQuery)?;\n            }\n\n            FeMessage::Parse(m) => {\n                *unnamed_query_string = m.query_string;\n                self.write_message_noflush(&BeMessage::ParseComplete)?;\n            }\n\n            FeMessage::Describe(_) => {\n                self.write_message_noflush(&BeMessage::ParameterDescription)?\n                    .write_message_noflush(&BeMessage::NoData)?;\n            }\n\n            FeMessage::Bind(_) => {\n                self.write_message_noflush(&BeMessage::BindComplete)?;\n            }\n\n            FeMessage::Close(_) => {\n                self.write_message_noflush(&BeMessage::CloseComplete)?;\n            }\n\n            FeMessage::Execute(_) => {\n                let query_string = cstr_to_str(unnamed_query_string)?;\n                trace!(\"got execute {query_string:?}\");\n                if let Err(e) = handler.process_query(self, query_string).await {\n                    log_query_error(query_string, &e);\n                    self.write_message_noflush(&BeMessage::ErrorResponse(\n                        &e.to_string(),\n                        Some(e.pg_error_code()),\n                    ))?;\n                }\n                // NOTE there is no ReadyForQuery message. This handler is used\n                // for basebackup and it uses CopyOut which doesn't require\n                // ReadyForQuery message and backend just switches back to\n                // processing mode after sending CopyDone or ErrorResponse.\n            }\n\n            FeMessage::Sync => {\n                self.write_message_noflush(&BeMessage::ReadyForQuery)?;\n            }\n\n            FeMessage::Terminate => {\n                return Ok(ProcessMsgResult::Break);\n            }\n\n            // We prefer explicit pattern matching to wildcards, because\n            // this helps us spot the places where new variants are missing\n            FeMessage::CopyData(_)\n            | FeMessage::CopyDone\n            | FeMessage::CopyFail\n            | FeMessage::PasswordMessage(_) => {\n                return Err(QueryError::Other(anyhow::anyhow!(\n                    \"unexpected message type: {msg:?}\",\n                )));\n            }\n        }\n\n        Ok(ProcessMsgResult::Continue)\n    }\n\n    /// - Log as info/error result of handling COPY stream and send back\n    ///   ErrorResponse if that makes sense.\n    /// - Shutdown the stream if we got Terminate.\n    /// - Then close the connection because we don't handle exiting from COPY\n    ///   stream normally.\n    pub async fn handle_copy_stream_end(&mut self, end: CopyStreamHandlerEnd) {\n        use CopyStreamHandlerEnd::*;\n\n        let expected_end = match &end {\n            ServerInitiated(_) | CopyDone | CopyFail | Terminate | EOF | Cancelled => true,\n            // The timeline doesn't exist and we have been requested to not auto-create it.\n            // Compute requests for timelines that haven't been created yet\n            // might reach us before the storcon request to create those timelines.\n            TimelineNoCreate => true,\n            CopyStreamHandlerEnd::Disconnected(ConnectionError::Io(io_error))\n                if is_expected_io_error(io_error) =>\n            {\n                true\n            }\n            _ => false,\n        };\n        if expected_end {\n            info!(\"terminated: {:#}\", end);\n        } else {\n            error!(\"terminated: {:?}\", end);\n        }\n\n        // Note: no current usages ever send this\n        if let CopyDone = &end {\n            if let Err(e) = self.write_message(&BeMessage::CopyDone).await {\n                error!(\"failed to send CopyDone: {}\", e);\n            }\n        }\n\n        let err_to_send_and_errcode = match &end {\n            ServerInitiated(_) => Some((end.to_string(), SQLSTATE_SUCCESSFUL_COMPLETION)),\n            Other(_) => Some((format!(\"{end:#}\"), SQLSTATE_INTERNAL_ERROR)),\n            // Note: CopyFail in duplex copy is somewhat unexpected (at least to\n            // PG walsender; evidently and per my docs reading client should\n            // finish it with CopyDone). It is not a problem to recover from it\n            // finishing the stream in both directions like we do, but note that\n            // sync rust-postgres client (which we don't use anymore) hangs if\n            // socket is not closed here.\n            // https://github.com/sfackler/rust-postgres/issues/755\n            // https://github.com/neondatabase/neon/issues/935\n            //\n            // Currently, the version of tokio_postgres replication patch we use\n            // sends this when it closes the stream (e.g. pageserver decided to\n            // switch conn to another safekeeper and client gets dropped).\n            // Moreover, seems like 'connection' task errors with 'unexpected\n            // message from server' when it receives ErrorResponse (anything but\n            // CopyData/CopyDone) back.\n            CopyFail => Some((end.to_string(), SQLSTATE_SUCCESSFUL_COMPLETION)),\n\n            // When cancelled, send no response: we must not risk blocking on sending that response\n            Cancelled => None,\n            _ => None,\n        };\n        if let Some((err, errcode)) = err_to_send_and_errcode {\n            if let Err(ee) = self\n                .write_message(&BeMessage::ErrorResponse(&err, Some(errcode)))\n                .await\n            {\n                error!(\"failed to send ErrorResponse: {}\", ee);\n            }\n        }\n\n        // Proper COPY stream finishing to continue using the connection is not\n        // implemented at the server side (we don't need it so far). To prevent\n        // further usages of the connection, close it.\n        self.framed.shutdown().await.ok();\n        self.state = ProtoState::Closed;\n    }\n}\n\npub struct PostgresBackendReader<IO> {\n    reader: FramedReader<MaybeTlsStream<IO>>,\n    closed: bool, // true if received error closing the connection\n}\n\nimpl<IO: AsyncRead + AsyncWrite + Unpin> PostgresBackendReader<IO> {\n    /// Read full message or return None if connection is cleanly closed with no\n    /// unprocessed data.\n    pub async fn read_message(&mut self) -> Result<Option<FeMessage>, ConnectionError> {\n        match self.reader.read_message().await {\n            Ok(m) => {\n                trace!(\"read msg {:?}\", m);\n                Ok(m)\n            }\n            Err(e) => {\n                self.closed = true;\n                Err(e)\n            }\n        }\n    }\n\n    /// Get CopyData contents of the next message in COPY stream or error\n    /// closing it. The error type is wider than actual errors which can happen\n    /// here -- it includes 'Other' and 'ServerInitiated', but that's ok for\n    /// current callers.\n    pub async fn read_copy_message(&mut self) -> Result<Bytes, CopyStreamHandlerEnd> {\n        match self.read_message().await? {\n            Some(msg) => match msg {\n                FeMessage::CopyData(m) => Ok(m),\n                FeMessage::CopyDone => Err(CopyStreamHandlerEnd::CopyDone),\n                FeMessage::CopyFail => Err(CopyStreamHandlerEnd::CopyFail),\n                FeMessage::Terminate => Err(CopyStreamHandlerEnd::Terminate),\n                _ => Err(CopyStreamHandlerEnd::from(ConnectionError::Protocol(\n                    ProtocolError::Protocol(format!(\"unexpected message in COPY stream {msg:?}\")),\n                ))),\n            },\n            None => Err(CopyStreamHandlerEnd::EOF),\n        }\n    }\n}\n\n///\n/// A futures::AsyncWrite implementation that wraps all data written to it in CopyData\n/// messages.\n///\npub struct CopyDataWriter<'a, IO> {\n    pgb: &'a mut PostgresBackend<IO>,\n}\n\nimpl<IO: AsyncRead + AsyncWrite + Unpin> AsyncWrite for CopyDataWriter<'_, IO> {\n    fn poll_write(\n        self: Pin<&mut Self>,\n        cx: &mut std::task::Context<'_>,\n        buf: &[u8],\n    ) -> Poll<Result<usize, std::io::Error>> {\n        let this = self.get_mut();\n\n        // It's not strictly required to flush between each message, but makes it easier\n        // to view in wireshark, and usually the messages that the callers write are\n        // decently-sized anyway.\n        if let Err(err) = ready!(this.pgb.poll_flush(cx)) {\n            return Poll::Ready(Err(err));\n        }\n\n        // CopyData\n        // XXX: if the input is large, we should split it into multiple messages.\n        // Not sure what the threshold should be, but the ultimate hard limit is that\n        // the length cannot exceed u32.\n        this.pgb\n            .write_message_noflush(&BeMessage::CopyData(buf))\n            // write_message only writes to the buffer, so it can fail iff the\n            // message is invaid, but CopyData can't be invalid.\n            .map_err(|_| io::Error::other(\"failed to serialize CopyData\"))?;\n\n        Poll::Ready(Ok(buf.len()))\n    }\n\n    fn poll_flush(\n        self: Pin<&mut Self>,\n        cx: &mut std::task::Context<'_>,\n    ) -> Poll<Result<(), std::io::Error>> {\n        let this = self.get_mut();\n        this.pgb.poll_flush(cx)\n    }\n\n    fn poll_shutdown(\n        self: Pin<&mut Self>,\n        cx: &mut std::task::Context<'_>,\n    ) -> Poll<Result<(), std::io::Error>> {\n        let this = self.get_mut();\n        this.pgb.poll_flush(cx)\n    }\n}\n\npub fn short_error(e: &QueryError) -> String {\n    match e {\n        QueryError::Disconnected(connection_error) => connection_error.to_string(),\n        QueryError::Reconnect => \"reconnect\".to_string(),\n        QueryError::Shutdown => \"shutdown\".to_string(),\n        QueryError::NotFound(_) => \"not found\".to_string(),\n        QueryError::Unauthorized(_e) => \"JWT authentication error\".to_string(),\n        QueryError::SimulatedConnectionError => \"simulated connection error\".to_string(),\n        QueryError::Other(e) => format!(\"{e:#}\"),\n    }\n}\n\nfn log_query_error(query: &str, e: &QueryError) {\n    // If you want to change the log level of a specific error, also re-categorize it in `BasebackupQueryTimeOngoingRecording`.\n    match e {\n        QueryError::Disconnected(ConnectionError::Io(io_error)) => {\n            if is_expected_io_error(io_error) {\n                info!(\"query handler for '{query}' failed with expected io error: {io_error}\");\n            } else {\n                error!(\"query handler for '{query}' failed with io error: {io_error}\");\n            }\n        }\n        QueryError::Disconnected(other_connection_error) => {\n            error!(\n                \"query handler for '{query}' failed with connection error: {other_connection_error:?}\"\n            )\n        }\n        QueryError::SimulatedConnectionError => {\n            error!(\"query handler for query '{query}' failed due to a simulated connection error\")\n        }\n        QueryError::Reconnect => {\n            info!(\"query handler for '{query}' requested client to reconnect\")\n        }\n        QueryError::Shutdown => {\n            info!(\"query handler for '{query}' cancelled during tenant shutdown\")\n        }\n        QueryError::NotFound(reason) => {\n            info!(\"query handler for '{query}' entity not found: {reason}\")\n        }\n        QueryError::Unauthorized(e) => {\n            warn!(\"query handler for '{query}' failed with authentication error: {e}\");\n        }\n        QueryError::Other(e) => {\n            error!(\"query handler for '{query}' failed: {e:?}\");\n        }\n    }\n}\n\n/// Something finishing handling of COPY stream, see handle_copy_stream_end.\n/// This is not always a real error, but it allows to use ? and thiserror impls.\n#[derive(thiserror::Error, Debug)]\npub enum CopyStreamHandlerEnd {\n    /// Handler initiates the end of streaming.\n    #[error(\"{0}\")]\n    ServerInitiated(String),\n    #[error(\"received CopyDone\")]\n    CopyDone,\n    #[error(\"received CopyFail\")]\n    CopyFail,\n    #[error(\"received Terminate\")]\n    Terminate,\n    #[error(\"EOF on COPY stream\")]\n    EOF,\n    #[error(\"timeline not found, and allow_timeline_creation is false\")]\n    TimelineNoCreate,\n    /// The connection was lost\n    #[error(\"connection error: {0}\")]\n    Disconnected(#[from] ConnectionError),\n    #[error(\"Shutdown\")]\n    Cancelled,\n    /// Some other error\n    #[error(transparent)]\n    Other(#[from] anyhow::Error),\n}\n"
  },
  {
    "path": "libs/postgres_backend/tests/cert.pem",
    "content": "-----BEGIN CERTIFICATE-----\nMIIDbjCCAlagAwIBAgIUGHJukXa1bQathgBHC40+A18BsnYwDQYJKoZIhvcNAQEL\nBQAwYzELMAkGA1UEBhMCVVMxEzARBgNVBAgMCkNhbGlmb3JuaWExFjAUBgNVBAcM\nDVNhbiBGcmFuY2lzY28xEzARBgNVBAoMCk15IENvbXBhbnkxEjAQBgNVBAMMCWxv\nY2FsaG9zdDAgFw0yMTA4MTMxODQyMjBaGA8yMTIxMDcyMDE4NDIyMFowYzELMAkG\nA1UEBhMCVVMxEzARBgNVBAgMCkNhbGlmb3JuaWExFjAUBgNVBAcMDVNhbiBGcmFu\nY2lzY28xEzARBgNVBAoMCk15IENvbXBhbnkxEjAQBgNVBAMMCWxvY2FsaG9zdDCC\nASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBAOI9S+nh8ABMp5jpb7WWfAYr\ntGJ4C7gi9IPTVIRxSSrt5KglEysrOiKlhan1Ut2e8CCudztdXtCvT8/goJWlmxpF\nIQkErlCsOdGHeEJ0EZxoU1fMkBAQVf6Rb1JE9ladG2+D1e7yvxmMqfPVuU8lj+kN\nnESP+I3ESNCtuqgtfcErxu3TuhSzV2slSi5lrYQCwERgCevl6LUNd2mEaYdS4mmJ\n4RZqc2C4y7JO5wSDjga8GIBHJVo70HRVsvX7eE8r6tMP2HyGyonBitBKAc2QEQIv\ncLCuMOTtTBlYcMvTmJEOHFKwIJXm0XmQfAWeKFfyK7493fB4Gu+8Dc1xC+IHaTEC\nAwEAAaMYMBYwFAYDVR0RBA0wC4IJbG9jYWxob3N0MA0GCSqGSIb3DQEBCwUAA4IB\nAQBjY+g3eF8m8lEWz+QgKp88MhTdtJTsEsSz0GAi58SnEkuyxVOHjKEyjGKJWTtT\nICgmEzC85uaS7VBdftoYNmsbvNewGiisDGQRWCjOGM7lTaA4FQPADguexMvXh/nO\n9PQoTxtp7qwvGWO2mED6LWU6bjT3cL+XgrOwT9sticRTl6/BXV8wAmyxT0DkQ3nJ\nzbRuTP/G2kE0bRK++67kK0ovopRkX6Dl6di1EFlkAnPBC2d8tdcNTXYhkxZk4O0q\nGUolwiuWz/dtD3tZ2bx3vqzT7uIFHS4XP6Q3SRNWFTGhuvAc7DPvCZBqxy6odeyQ\nVxBgJtq+pNjYYkeaSQVQ+UMU\n-----END CERTIFICATE-----\n"
  },
  {
    "path": "libs/postgres_backend/tests/key.pem",
    "content": "-----BEGIN RSA PRIVATE KEY-----\nMIIEpAIBAAKCAQEA4j1L6eHwAEynmOlvtZZ8Biu0YngLuCL0g9NUhHFJKu3kqCUT\nKys6IqWFqfVS3Z7wIK53O11e0K9Pz+CglaWbGkUhCQSuUKw50Yd4QnQRnGhTV8yQ\nEBBV/pFvUkT2Vp0bb4PV7vK/GYyp89W5TyWP6Q2cRI/4jcRI0K26qC19wSvG7dO6\nFLNXayVKLmWthALARGAJ6+XotQ13aYRph1LiaYnhFmpzYLjLsk7nBIOOBrwYgEcl\nWjvQdFWy9ft4Tyvq0w/YfIbKicGK0EoBzZARAi9wsK4w5O1MGVhwy9OYkQ4cUrAg\nlebReZB8BZ4oV/Irvj3d8Hga77wNzXEL4gdpMQIDAQABAoIBAQClKycO+zpinZQG\nGPbLVa/6OVIaSZYUusBUtaaQgrxuMPusnlSeQZLR1JH/APGchvq8gWLe3k3ogPT9\nyPq0BhF0Xl+928L/dp1HkWWE7oQk8i1Wfiv27lY54iepoltN5KkxAsjfCC3oEz/I\nmpINbFjiRmN90rYdmd2nLA6H1Z5ntZQm5AcTo3OJZlTVN9eH9TV8f0AQRQgUJsL9\n75agSmj7euqZOqvvwfpsYzaZEhzMSG2QIcS3WglInbHy8c6ikZSm36J36wgsatMz\nCBZ6pMNtonRSKvAECQhBGEA73evtnGbLH0EY9KouN4KSHEHob89dGVeeXozksf9x\nQUE1/yOhAoGBAP818f7vIH6Z3QwWgTMwQsPBW+wNOIbTZrbZaihnz2K9XMu39TV6\nDWQHMsOlvg2QURZGwqB3jFn4wqZHmt7XYwk553E60kIw4hDvgpkkqmXVwK3kZASQ\nRRUax3hZ1gCWxpXlRZ1SvHNXjN9KEFwqQbR33XcxzC3TpSp0KYghT9jFAoGBAOLw\nagejqSF+f/5W1QhEKlM+tSlluo2sn5kKVkM4nNezFukb3pu5oScFjoQQGsoaz5aU\nkLlxW5h/aSxquhgcuo6I4Ux5dcgNm4QeonCCp+Qycn7tzyoJFL4odT9vYPQa5O9E\nhD9aSqhBBD1IIOS2T3vcW6VxibKZx1CRMDdRz119AoGALflr1L8DHYteNLVBJRWG\nkXkdtBJVooQmtr3Hz+uTgngWZWSIOc/45ZIeZPxQlmTvFpI8sWeX0wVrG0U+8vHe\nF2Vk+hLcmavwrZhX8HqYb6vn/+tq0R+kMj8Wu+mDEawXrh0VQ1gKNsUIzZisBc5e\n88G8FaLU41SDJniymqFVnvkCgYEA1ou/UfWRwg6b5tIkmKoI8aZJExgPpDzcrYyu\nPOLatLmlIUCt1b9K8V85evTWvtdWBd/yar8WfzeFMO69fGo8nOAfT3NMvJLQwblM\njN2Y6A4hXIpq3iyzpYsOPaiImn6KjQHTnSk5h5Pf9CeqoU8SGeEb629JZMYpPqvk\nT4hSaOkCgYBPaf51oSAstqdj0vxrsFS3EN3D8Fk0xQWt9Ss3ZGFAlTaEq5xoIk4k\nYfKVDv1S6/vlzbheIIzQ2lzVvG4AW+drQLsmEx5iMKvbNtFAur9kwUFU202Q2dki\nZQJ/JvjnPYFKxy+SVlLJ1h9RD9E3dgL/Ai7OUfbmX771vN0IQF7Z6Q==\n-----END RSA PRIVATE KEY-----\n"
  },
  {
    "path": "libs/postgres_backend/tests/simple_select.rs",
    "content": "use std::io::Cursor;\nuse std::sync::Arc;\n\n/// Test postgres_backend_async with tokio_postgres\nuse once_cell::sync::Lazy;\nuse postgres_backend::{AuthType, Handler, PostgresBackend, QueryError};\nuse pq_proto::{BeMessage, RowDescriptor};\nuse rustls::crypto::ring;\nuse tokio::io::{AsyncRead, AsyncWrite};\nuse tokio::net::{TcpListener, TcpStream};\nuse tokio_postgres::config::SslMode;\nuse tokio_postgres::tls::MakeTlsConnect;\nuse tokio_postgres::{Config, NoTls, SimpleQueryMessage};\nuse tokio_postgres_rustls::MakeRustlsConnect;\nuse tokio_util::sync::CancellationToken;\n\n// generate client, server test streams\nasync fn make_tcp_pair() -> (TcpStream, TcpStream) {\n    let listener = TcpListener::bind(\"127.0.0.1:0\").await.unwrap();\n    let addr = listener.local_addr().unwrap();\n    let client_stream = TcpStream::connect(addr).await.unwrap();\n    let (server_stream, _) = listener.accept().await.unwrap();\n    (client_stream, server_stream)\n}\n\nstruct TestHandler {}\n\nimpl<IO: AsyncRead + AsyncWrite + Unpin + Send> Handler<IO> for TestHandler {\n    // return single col 'hey' for any query\n    async fn process_query(\n        &mut self,\n        pgb: &mut PostgresBackend<IO>,\n        _query_string: &str,\n    ) -> Result<(), QueryError> {\n        pgb.write_message_noflush(&BeMessage::RowDescription(&[RowDescriptor::text_col(\n            b\"hey\",\n        )]))?\n        .write_message_noflush(&BeMessage::DataRow(&[Some(\"hey\".as_bytes())]))?\n        .write_message_noflush(&BeMessage::CommandComplete(b\"SELECT 1\"))?;\n        Ok(())\n    }\n}\n\n// test that basic select works\n#[tokio::test]\nasync fn simple_select() {\n    let (client_sock, server_sock) = make_tcp_pair().await;\n\n    // create and run pgbackend\n    let pgbackend =\n        PostgresBackend::new(server_sock, AuthType::Trust, None).expect(\"pgbackend creation\");\n\n    tokio::spawn(async move {\n        let mut handler = TestHandler {};\n        pgbackend.run(&mut handler, &CancellationToken::new()).await\n    });\n\n    let conf = Config::new();\n    let (client, connection) = conf.connect_raw(client_sock, NoTls).await.expect(\"connect\");\n    // The connection object performs the actual communication with the database,\n    // so spawn it off to run on its own.\n    tokio::spawn(async move {\n        if let Err(e) = connection.await {\n            eprintln!(\"connection error: {e}\");\n        }\n    });\n\n    let first_val = &(client.simple_query(\"SELECT 42;\").await.expect(\"select\"))[0];\n    if let SimpleQueryMessage::Row(row) = first_val {\n        let first_col = row.get(0).expect(\"first column\");\n        assert_eq!(first_col, \"hey\");\n    } else {\n        panic!(\"expected SimpleQueryMessage::Row\");\n    }\n}\n\nstatic KEY: Lazy<rustls::pki_types::PrivateKeyDer<'static>> = Lazy::new(|| {\n    let mut cursor = Cursor::new(include_bytes!(\"key.pem\"));\n    let key = rustls_pemfile::rsa_private_keys(&mut cursor)\n        .next()\n        .unwrap()\n        .unwrap();\n    rustls::pki_types::PrivateKeyDer::Pkcs1(key)\n});\n\nstatic CERT: Lazy<rustls::pki_types::CertificateDer<'static>> = Lazy::new(|| {\n    let mut cursor = Cursor::new(include_bytes!(\"cert.pem\"));\n\n    rustls_pemfile::certs(&mut cursor).next().unwrap().unwrap()\n});\n\n// test that basic select with ssl works\n#[tokio::test]\nasync fn simple_select_ssl() {\n    let (client_sock, server_sock) = make_tcp_pair().await;\n\n    let server_cfg =\n        rustls::ServerConfig::builder_with_provider(Arc::new(ring::default_provider()))\n            .with_safe_default_protocol_versions()\n            .expect(\"aws_lc_rs should support the default protocol versions\")\n            .with_no_client_auth()\n            .with_single_cert(vec![CERT.clone()], KEY.clone_key())\n            .unwrap();\n    let tls_config = Some(Arc::new(server_cfg));\n    let pgbackend =\n        PostgresBackend::new(server_sock, AuthType::Trust, tls_config).expect(\"pgbackend creation\");\n\n    tokio::spawn(async move {\n        let mut handler = TestHandler {};\n        pgbackend.run(&mut handler, &CancellationToken::new()).await\n    });\n\n    let client_cfg =\n        rustls::ClientConfig::builder_with_provider(Arc::new(ring::default_provider()))\n            .with_safe_default_protocol_versions()\n            .expect(\"aws_lc_rs should support the default protocol versions\")\n            .with_root_certificates({\n                let mut store = rustls::RootCertStore::empty();\n                store.add(CERT.clone()).unwrap();\n                store\n            })\n            .with_no_client_auth();\n    let mut make_tls_connect = tokio_postgres_rustls::MakeRustlsConnect::new(client_cfg);\n    let tls_connect = <MakeRustlsConnect as MakeTlsConnect<TcpStream>>::make_tls_connect(\n        &mut make_tls_connect,\n        \"localhost\",\n    )\n    .expect(\"make_tls_connect\");\n\n    let mut conf = Config::new();\n    conf.ssl_mode(SslMode::Require);\n    let (client, connection) = conf\n        .connect_raw(client_sock, tls_connect)\n        .await\n        .expect(\"connect\");\n    // The connection object performs the actual communication with the database,\n    // so spawn it off to run on its own.\n    tokio::spawn(async move {\n        if let Err(e) = connection.await {\n            eprintln!(\"connection error: {e}\");\n        }\n    });\n\n    let first_val = &(client.simple_query(\"SELECT 42;\").await.expect(\"select\"))[0];\n    if let SimpleQueryMessage::Row(row) = first_val {\n        let first_col = row.get(0).expect(\"first column\");\n        assert_eq!(first_col, \"hey\");\n    } else {\n        panic!(\"expected SimpleQueryMessage::Row\");\n    }\n}\n"
  },
  {
    "path": "libs/postgres_connection/Cargo.toml",
    "content": "[package]\nname = \"postgres_connection\"\nversion = \"0.1.0\"\nedition.workspace = true\nlicense.workspace = true\n\n[dependencies]\nanyhow.workspace = true\nitertools.workspace = true\ntokio-postgres.workspace = true\nurl.workspace = true\n\n[dev-dependencies]\nonce_cell.workspace = true\n"
  },
  {
    "path": "libs/postgres_connection/src/lib.rs",
    "content": "#![deny(unsafe_code)]\n#![deny(clippy::undocumented_unsafe_blocks)]\nuse std::borrow::Cow;\nuse std::fmt;\n\nuse anyhow::{Context, bail};\nuse itertools::Itertools;\nuse url::Host;\n\n/// Parses a string of format either `host:port` or `host` into a corresponding pair.\n///\n/// The `host` part should be a correct `url::Host`, while `port` (if present) should be\n/// a valid decimal u16 of digits only.\npub fn parse_host_port<S: AsRef<str>>(host_port: S) -> Result<(Host, Option<u16>), anyhow::Error> {\n    let (host, port) = match host_port.as_ref().rsplit_once(':') {\n        Some((host, port)) => (\n            host,\n            // +80 is a valid u16, but not a valid port\n            if port.chars().all(|c| c.is_ascii_digit()) {\n                Some(port.parse::<u16>().context(\"Unable to parse port\")?)\n            } else {\n                bail!(\"Port contains a non-ascii-digit\")\n            },\n        ),\n        None => (host_port.as_ref(), None), // No colons, no port specified\n    };\n    let host = Host::parse(host).context(\"Unable to parse host\")?;\n    Ok((host, port))\n}\n\n#[cfg(test)]\nmod tests_parse_host_port {\n    use url::Host;\n\n    use crate::parse_host_port;\n\n    #[test]\n    fn test_normal() {\n        let (host, port) = parse_host_port(\"hello:123\").unwrap();\n        assert_eq!(host, Host::Domain(\"hello\".to_owned()));\n        assert_eq!(port, Some(123));\n    }\n\n    #[test]\n    fn test_no_port() {\n        let (host, port) = parse_host_port(\"hello\").unwrap();\n        assert_eq!(host, Host::Domain(\"hello\".to_owned()));\n        assert_eq!(port, None);\n    }\n\n    #[test]\n    fn test_ipv6() {\n        let (host, port) = parse_host_port(\"[::1]:123\").unwrap();\n        assert_eq!(host, Host::<String>::Ipv6(std::net::Ipv6Addr::LOCALHOST));\n        assert_eq!(port, Some(123));\n    }\n\n    #[test]\n    fn test_invalid_host() {\n        assert!(parse_host_port(\"hello world\").is_err());\n    }\n\n    #[test]\n    fn test_invalid_port() {\n        assert!(parse_host_port(\"hello:+80\").is_err());\n    }\n}\n\n#[derive(Clone)]\npub struct PgConnectionConfig {\n    host: Host,\n    port: u16,\n    password: Option<String>,\n    options: Vec<String>,\n}\n\n/// A simplified PostgreSQL connection configuration. Supports only a subset of possible\n/// settings for simplicity. A password getter or `to_connection_string` methods are not\n/// added by design to avoid accidentally leaking password through logging, command line\n/// arguments to a child process, or likewise.\nimpl PgConnectionConfig {\n    pub fn new_host_port(host: Host, port: u16) -> Self {\n        PgConnectionConfig {\n            host,\n            port,\n            password: None,\n            options: vec![],\n        }\n    }\n\n    pub fn host(&self) -> &Host {\n        &self.host\n    }\n\n    pub fn port(&self) -> u16 {\n        self.port\n    }\n\n    pub fn set_host(mut self, h: Host) -> Self {\n        self.host = h;\n        self\n    }\n\n    pub fn set_port(mut self, p: u16) -> Self {\n        self.port = p;\n        self\n    }\n\n    pub fn set_password(mut self, s: Option<String>) -> Self {\n        self.password = s;\n        self\n    }\n\n    pub fn extend_options<I: IntoIterator<Item = S>, S: Into<String>>(mut self, i: I) -> Self {\n        self.options.extend(i.into_iter().map(|s| s.into()));\n        self\n    }\n\n    /// Return a `<host>:<port>` string.\n    pub fn raw_address(&self) -> String {\n        format!(\"{}:{}\", self.host(), self.port())\n    }\n\n    /// Build a client library-specific connection configuration.\n    /// Used for testing and when we need to add some obscure configuration\n    /// elements at the last moment.\n    pub fn to_tokio_postgres_config(&self) -> tokio_postgres::Config {\n        // Use `tokio_postgres::Config` instead of `postgres::Config` because\n        // the former supports more options to fiddle with later.\n        let mut config = tokio_postgres::Config::new();\n        config.host(&self.host().to_string()).port(self.port);\n        if let Some(password) = &self.password {\n            config.password(password);\n        }\n        if !self.options.is_empty() {\n            // These options are command-line options and should be escaped before being passed\n            // as an 'options' connection string parameter, see\n            // https://www.postgresql.org/docs/15/libpq-connect.html#LIBPQ-CONNECT-OPTIONS\n            //\n            // They will be space-separated, so each space inside an option should be escaped,\n            // and all backslashes should be escaped before that. Although we don't expect options\n            // with spaces at the moment, they're supported by PostgreSQL. Hence we support them\n            // in this typesafe interface.\n            //\n            // We use `Cow` to avoid allocations in the best case (no escaping). A fully imperative\n            // solution would require 1-2 allocations in the worst case as well, but it's harder to\n            // implement and this function is hardly a bottleneck. The function is only called around\n            // establishing a new connection.\n            #[allow(unstable_name_collisions)]\n            config.options(\n                &self\n                    .options\n                    .iter()\n                    .map(|s| {\n                        if s.contains(['\\\\', ' ']) {\n                            Cow::Owned(s.replace('\\\\', \"\\\\\\\\\").replace(' ', \"\\\\ \"))\n                        } else {\n                            Cow::Borrowed(s.as_str())\n                        }\n                    })\n                    .intersperse(Cow::Borrowed(\" \")) // TODO: use impl from std once it's stabilized\n                    .collect::<String>(),\n            );\n        }\n        config\n    }\n\n    /// Connect using postgres protocol with TLS disabled.\n    pub async fn connect_no_tls(\n        &self,\n    ) -> Result<\n        (\n            tokio_postgres::Client,\n            tokio_postgres::Connection<tokio_postgres::Socket, tokio_postgres::tls::NoTlsStream>,\n        ),\n        tokio_postgres::Error,\n    > {\n        self.to_tokio_postgres_config()\n            .connect(tokio_postgres::NoTls)\n            .await\n    }\n}\n\nimpl fmt::Display for PgConnectionConfig {\n    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {\n        // The password is intentionally hidden and not part of this display string.\n        write!(f, \"postgresql://{}:{}\", self.host, self.port)\n    }\n}\n\nimpl fmt::Debug for PgConnectionConfig {\n    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {\n        // We want `password: Some(REDACTED-STRING)`, not `password: Some(\"REDACTED-STRING\")`\n        // so even if the password is `REDACTED-STRING` (quite unlikely) there is no confusion.\n        // Hence `format_args!()`, it returns a \"safe\" string which is not escaped by `Debug`.\n        f.debug_struct(\"PgConnectionConfig\")\n            .field(\"host\", &self.host)\n            .field(\"port\", &self.port)\n            .field(\n                \"password\",\n                &self\n                    .password\n                    .as_ref()\n                    .map(|_| format_args!(\"REDACTED-STRING\")),\n            )\n            .finish()\n    }\n}\n\n#[cfg(test)]\nmod tests_pg_connection_config {\n    use once_cell::sync::Lazy;\n    use url::Host;\n\n    use crate::PgConnectionConfig;\n\n    static STUB_HOST: Lazy<Host> = Lazy::new(|| Host::Domain(\"stub.host.example\".to_owned()));\n\n    #[test]\n    fn test_no_password() {\n        let cfg = PgConnectionConfig::new_host_port(STUB_HOST.clone(), 123);\n        assert_eq!(cfg.host(), &*STUB_HOST);\n        assert_eq!(cfg.port(), 123);\n        assert_eq!(cfg.raw_address(), \"stub.host.example:123\");\n        assert_eq!(\n            format!(\"{cfg:?}\"),\n            \"PgConnectionConfig { host: Domain(\\\"stub.host.example\\\"), port: 123, password: None }\"\n        );\n    }\n\n    #[test]\n    fn test_ipv6() {\n        // May be a special case because hostname contains a colon.\n        let cfg = PgConnectionConfig::new_host_port(Host::parse(\"[::1]\").unwrap(), 123);\n        assert_eq!(\n            cfg.host(),\n            &Host::<String>::Ipv6(std::net::Ipv6Addr::LOCALHOST)\n        );\n        assert_eq!(cfg.port(), 123);\n        assert_eq!(cfg.raw_address(), \"[::1]:123\");\n        assert_eq!(\n            format!(\"{cfg:?}\"),\n            \"PgConnectionConfig { host: Ipv6(::1), port: 123, password: None }\"\n        );\n    }\n\n    #[test]\n    fn test_with_password() {\n        let cfg = PgConnectionConfig::new_host_port(STUB_HOST.clone(), 123)\n            .set_password(Some(\"password\".to_owned()));\n        assert_eq!(cfg.host(), &*STUB_HOST);\n        assert_eq!(cfg.port(), 123);\n        assert_eq!(cfg.raw_address(), \"stub.host.example:123\");\n        assert_eq!(\n            format!(\"{cfg:?}\"),\n            \"PgConnectionConfig { host: Domain(\\\"stub.host.example\\\"), port: 123, password: Some(REDACTED-STRING) }\"\n        );\n    }\n\n    #[test]\n    fn test_with_options() {\n        let cfg = PgConnectionConfig::new_host_port(STUB_HOST.clone(), 123).extend_options([\n            \"hello\",\n            \"world\",\n            \"with space\",\n            \"and \\\\ backslashes\",\n        ]);\n        assert_eq!(cfg.host(), &*STUB_HOST);\n        assert_eq!(cfg.port(), 123);\n        assert_eq!(cfg.raw_address(), \"stub.host.example:123\");\n        assert_eq!(\n            cfg.to_tokio_postgres_config().get_options(),\n            Some(\"hello world with\\\\ space and\\\\ \\\\\\\\\\\\ backslashes\")\n        );\n    }\n}\n"
  },
  {
    "path": "libs/postgres_ffi/Cargo.toml",
    "content": "[package]\nname = \"postgres_ffi\"\nversion = \"0.1.0\"\nedition.workspace = true\nlicense.workspace = true\n\n[dependencies]\nregex.workspace = true\nbytes.workspace = true\nanyhow.workspace = true\ncrc32c.workspace = true\nonce_cell.workspace = true\npprof.workspace = true\nthiserror.workspace = true\nserde.workspace = true\npostgres_ffi_types.workspace = true\nutils.workspace = true\ntracing.workspace = true\npostgres_versioninfo.workspace = true\n\n[dev-dependencies]\ncriterion.workspace = true\nenv_logger.workspace = true\npostgres.workspace = true\n\n[build-dependencies]\nanyhow.workspace = true\nbindgen.workspace = true\n\n[[bench]]\nname = \"waldecoder\"\nharness = false\n"
  },
  {
    "path": "libs/postgres_ffi/README.md",
    "content": "This module contains utilities for working with PostgreSQL file\nformats. It's a collection of structs that are auto-generated from the\nPostgreSQL header files using bindgen, and Rust functions to read and\nmanipulate them.\n\nThere are also a bunch of constants in `pg_constants.rs` that are copied\nfrom various PostgreSQL headers, rather than auto-generated. They mostly\nshould be auto-generated too, but that's a TODO.\n\nThe PostgreSQL on-disk file format is not portable across different\nCPU architectures and operating systems. It is also subject to change\nin each major PostgreSQL version. Currently, this module supports\nPostgreSQL v14, v15 and v16: bindings and code that depends on them are\nversion-specific.\nThis code is organized in modules `postgres_ffi::v14`, `postgres_ffi::v15` and\n`postgres_ffi::v16`. Version independent code is explicitly exported into\nshared `postgres_ffi`.\n\n\nTODO: Currently, there is also some code that deals with WAL records\nin pageserver/src/waldecoder.rs.  That should be moved into this\nmodule. The rest of the codebase should not have intimate knowledge of\nPostgreSQL file formats or WAL layout, that knowledge should be\nencapsulated in this module.\n"
  },
  {
    "path": "libs/postgres_ffi/benches/README.md",
    "content": "## Benchmarks\n\nTo run benchmarks:\n\n```sh\n# All benchmarks.\ncargo bench --package postgres_ffi\n\n# Specific file.\ncargo bench --package postgres_ffi --bench waldecoder\n\n# Specific benchmark.\ncargo bench --package postgres_ffi --bench waldecoder complete_record/size=1024\n\n# List available benchmarks.\ncargo bench --package postgres_ffi --benches -- --list\n\n# Generate flamegraph profiles using pprof-rs, profiling for 10 seconds.\n# Output in target/criterion/*/profile/flamegraph.svg.\ncargo bench --package postgres_ffi --bench waldecoder complete_record/size=1024 -- --profile-time 10\n```\n\nAdditional charts and statistics are available in `target/criterion/report/index.html`.\n\nBenchmarks are automatically compared against the previous run. To compare against other runs, see\n`--baseline` and `--save-baseline`."
  },
  {
    "path": "libs/postgres_ffi/benches/waldecoder.rs",
    "content": "use std::ffi::CStr;\n\nuse criterion::{Bencher, Criterion, criterion_group, criterion_main};\nuse postgres_ffi::v17::wal_generator::LogicalMessageGenerator;\nuse postgres_ffi::v17::waldecoder_handler::WalStreamDecoderHandler;\nuse postgres_ffi::waldecoder::WalStreamDecoder;\nuse postgres_versioninfo::PgMajorVersion;\nuse pprof::criterion::{Output, PProfProfiler};\nuse utils::lsn::Lsn;\n\nconst KB: usize = 1024;\n\n// Register benchmarks with Criterion.\ncriterion_group!(\n    name = benches;\n    config = Criterion::default().with_profiler(PProfProfiler::new(100, Output::Flamegraph(None)));\n    targets = bench_complete_record,\n);\ncriterion_main!(benches);\n\n/// Benchmarks WalStreamDecoder::complete_record() for a logical message of varying size.\nfn bench_complete_record(c: &mut Criterion) {\n    let mut g = c.benchmark_group(\"complete_record\");\n    for size in [64, KB, 8 * KB, 128 * KB] {\n        // Kind of weird to change the group throughput per benchmark, but it's the only way\n        // to vary it per benchmark. It works.\n        g.throughput(criterion::Throughput::Bytes(size as u64));\n        g.bench_function(format!(\"size={size}\"), |b| run_bench(b, size).unwrap());\n    }\n\n    fn run_bench(b: &mut Bencher, size: usize) -> anyhow::Result<()> {\n        const PREFIX: &CStr = c\"\";\n        let value_size = LogicalMessageGenerator::make_value_size(size, PREFIX);\n        let value = vec![1; value_size];\n\n        let mut decoder = WalStreamDecoder::new(Lsn(0), PgMajorVersion::PG17);\n        let msg = LogicalMessageGenerator::new(PREFIX, &value)\n            .next()\n            .unwrap()\n            .encode(Lsn(0));\n        assert_eq!(msg.len(), size);\n\n        b.iter(|| {\n            let msg = msg.clone(); // Bytes::clone() is cheap\n            decoder.complete_record(msg).unwrap();\n        });\n\n        Ok(())\n    }\n}\n"
  },
  {
    "path": "libs/postgres_ffi/bindgen_deps.h",
    "content": "/*\n * This header file is the input to bindgen. It includes all the\n * PostgreSQL headers that we need to auto-generate Rust structs\n * from. If you need to expose a new struct to Rust code, add the\n * header here, and whitelist the struct in the build.rs file.\n */\n#include \"c.h\"\n#include \"catalog/pg_control.h\"\n#include \"access/xlog_internal.h\"\n\n#include \"storage/block.h\"\n#include \"storage/bufpage.h\"\n#include \"storage/off.h\"\n#include \"access/multixact.h\"\n"
  },
  {
    "path": "libs/postgres_ffi/build.rs",
    "content": "extern crate bindgen;\n\nuse std::env;\nuse std::path::PathBuf;\nuse std::process::Command;\n\nuse anyhow::{Context, anyhow};\nuse bindgen::callbacks::{DeriveInfo, ParseCallbacks};\n\n#[derive(Debug)]\nstruct PostgresFfiCallbacks;\n\nimpl ParseCallbacks for PostgresFfiCallbacks {\n    fn include_file(&self, filename: &str) {\n        // This does the equivalent of passing bindgen::CargoCallbacks\n        // to the builder .parse_callbacks() method.\n        let cargo_callbacks = bindgen::CargoCallbacks::new();\n        cargo_callbacks.include_file(filename)\n    }\n\n    // Add any custom #[derive] attributes to the data structures that bindgen\n    // creates.\n    fn add_derives(&self, derive_info: &DeriveInfo) -> Vec<String> {\n        // This is the list of data structures that we want to serialize/deserialize.\n        let serde_list = [\n            \"XLogRecord\",\n            \"XLogPageHeaderData\",\n            \"XLogLongPageHeaderData\",\n            \"CheckPoint\",\n            \"FullTransactionId\",\n            \"ControlFileData\",\n        ];\n\n        if serde_list.contains(&derive_info.name) {\n            vec![\n                \"Default\".into(), // Default allows us to easily fill the padding fields with 0.\n                \"Serialize\".into(),\n                \"Deserialize\".into(),\n            ]\n        } else {\n            vec![]\n        }\n    }\n}\n\nfn main() -> anyhow::Result<()> {\n    // Tell cargo to invalidate the built crate whenever the wrapper changes\n    println!(\"cargo:rerun-if-changed=bindgen_deps.h\");\n\n    // Finding the location of C headers for the Postgres server:\n    // - if POSTGRES_INSTALL_DIR is set look into it, otherwise look into `<project_root>/pg_install`\n    // - if there's a `bin/pg_config` file use it for getting include server, otherwise use `<project_root>/pg_install/{PG_MAJORVERSION}/include/postgresql/server`\n    let pg_install_dir = if let Some(postgres_install_dir) = env::var_os(\"POSTGRES_INSTALL_DIR\") {\n        postgres_install_dir.into()\n    } else {\n        PathBuf::from(\"pg_install\")\n    };\n\n    for pg_version in &[\"v14\", \"v15\", \"v16\", \"v17\"] {\n        let mut pg_install_dir_versioned = pg_install_dir.join(pg_version);\n        if pg_install_dir_versioned.is_relative() {\n            let cwd = env::current_dir().context(\"Failed to get current_dir\")?;\n            pg_install_dir_versioned = cwd.join(\"..\").join(\"..\").join(pg_install_dir_versioned);\n        }\n\n        let pg_config_bin = pg_install_dir_versioned.join(\"bin\").join(\"pg_config\");\n        let inc_server_path: String = if pg_config_bin.exists() {\n            let output = Command::new(pg_config_bin)\n                .arg(\"--includedir-server\")\n                .output()\n                .context(\"failed to execute `pg_config --includedir-server`\")?;\n\n            if !output.status.success() {\n                panic!(\"`pg_config --includedir-server` failed\")\n            }\n\n            String::from_utf8(output.stdout)\n                .context(\"pg_config output is not UTF-8\")?\n                .trim_end()\n                .into()\n        } else {\n            let server_path = pg_install_dir_versioned\n                .join(\"include\")\n                .join(\"postgresql\")\n                .join(\"server\")\n                .into_os_string();\n            server_path\n                .into_string()\n                .map_err(|s| anyhow!(\"Bad postgres server path {s:?}\"))?\n        };\n\n        // The bindgen::Builder is the main entry point\n        // to bindgen, and lets you build up options for\n        // the resulting bindings.\n        let bindings = bindgen::Builder::default()\n            //\n            // All the needed PostgreSQL headers are included from 'bindgen_deps.h'\n            //\n            .header(\"bindgen_deps.h\")\n            //\n            // Tell cargo to invalidate the built crate whenever any of the\n            // included header files changed.\n            //\n            .parse_callbacks(Box::new(PostgresFfiCallbacks))\n            //\n            // These are the types and constants that we want to generate bindings for\n            //\n            .allowlist_type(\"BlockNumber\")\n            .allowlist_type(\"OffsetNumber\")\n            .allowlist_type(\"XLogRecPtr\")\n            .allowlist_type(\"XLogSegNo\")\n            .allowlist_type(\"TimeLineID\")\n            .allowlist_type(\"MultiXactId\")\n            .allowlist_type(\"MultiXactOffset\")\n            .allowlist_type(\"MultiXactStatus\")\n            .allowlist_type(\"ControlFileData\")\n            .allowlist_type(\"CheckPoint\")\n            .allowlist_type(\"FullTransactionId\")\n            .allowlist_type(\"XLogRecord\")\n            .allowlist_type(\"XLogPageHeaderData\")\n            .allowlist_type(\"XLogLongPageHeaderData\")\n            .allowlist_var(\"XLOG_PAGE_MAGIC\")\n            .allowlist_var(\"PG_MAJORVERSION_NUM\")\n            .allowlist_var(\"PG_CONTROL_FILE_SIZE\")\n            .allowlist_var(\"PG_CONTROLFILEDATA_OFFSETOF_CRC\")\n            .allowlist_type(\"PageHeaderData\")\n            .allowlist_type(\"DBState\")\n            .allowlist_type(\"RelMapFile\")\n            .allowlist_type(\"RepOriginId\")\n            // Because structs are used for serialization, tell bindgen to emit\n            // explicit padding fields.\n            .explicit_padding(true)\n            //\n            .clang_arg(format!(\"-I{inc_server_path}\"))\n            //\n            // Finish the builder and generate the bindings.\n            //\n            .generate()\n            .context(\"Unable to generate bindings\")?;\n\n        // Write the bindings to the $OUT_DIR/bindings_$pg_version.rs file.\n        let out_path: PathBuf = env::var(\"OUT_DIR\")\n            .context(\"Couldn't read OUT_DIR environment variable var\")?\n            .into();\n        let filename = format!(\"bindings_{pg_version}.rs\");\n\n        bindings\n            .write_to_file(out_path.join(filename))\n            .context(\"Couldn't write bindings\")?;\n    }\n\n    Ok(())\n}\n"
  },
  {
    "path": "libs/postgres_ffi/samples/pg_hba.conf",
    "content": "# PostgreSQL Client Authentication Configuration File\n# ===================================================\n#\n# Refer to the \"Client Authentication\" section in the PostgreSQL\n# documentation for a complete description of this file.  A short\n# synopsis follows.\n#\n# This file controls: which hosts are allowed to connect, how clients\n# are authenticated, which PostgreSQL user names they can use, which\n# databases they can access.  Records take one of these forms:\n#\n# local         DATABASE  USER  METHOD  [OPTIONS]\n# host          DATABASE  USER  ADDRESS  METHOD  [OPTIONS]\n# hostssl       DATABASE  USER  ADDRESS  METHOD  [OPTIONS]\n# hostnossl     DATABASE  USER  ADDRESS  METHOD  [OPTIONS]\n# hostgssenc    DATABASE  USER  ADDRESS  METHOD  [OPTIONS]\n# hostnogssenc  DATABASE  USER  ADDRESS  METHOD  [OPTIONS]\n#\n# (The uppercase items must be replaced by actual values.)\n#\n# The first field is the connection type:\n# - \"local\" is a Unix-domain socket\n# - \"host\" is a TCP/IP socket (encrypted or not)\n# - \"hostssl\" is a TCP/IP socket that is SSL-encrypted\n# - \"hostnossl\" is a TCP/IP socket that is not SSL-encrypted\n# - \"hostgssenc\" is a TCP/IP socket that is GSSAPI-encrypted\n# - \"hostnogssenc\" is a TCP/IP socket that is not GSSAPI-encrypted\n#\n# DATABASE can be \"all\", \"sameuser\", \"samerole\", \"replication\", a\n# database name, or a comma-separated list thereof. The \"all\"\n# keyword does not match \"replication\". Access to replication\n# must be enabled in a separate record (see example below).\n#\n# USER can be \"all\", a user name, a group name prefixed with \"+\", or a\n# comma-separated list thereof.  In both the DATABASE and USER fields\n# you can also write a file name prefixed with \"@\" to include names\n# from a separate file.\n#\n# ADDRESS specifies the set of hosts the record matches.  It can be a\n# host name, or it is made up of an IP address and a CIDR mask that is\n# an integer (between 0 and 32 (IPv4) or 128 (IPv6) inclusive) that\n# specifies the number of significant bits in the mask.  A host name\n# that starts with a dot (.) matches a suffix of the actual host name.\n# Alternatively, you can write an IP address and netmask in separate\n# columns to specify the set of hosts.  Instead of a CIDR-address, you\n# can write \"samehost\" to match any of the server's own IP addresses,\n# or \"samenet\" to match any address in any subnet that the server is\n# directly connected to.\n#\n# METHOD can be \"trust\", \"reject\", \"md5\", \"password\", \"scram-sha-256\",\n# \"gss\", \"sspi\", \"ident\", \"peer\", \"pam\", \"ldap\", \"radius\" or \"cert\".\n# Note that \"password\" sends passwords in clear text; \"md5\" or\n# \"scram-sha-256\" are preferred since they send encrypted passwords.\n#\n# OPTIONS are a set of options for the authentication in the format\n# NAME=VALUE.  The available options depend on the different\n# authentication methods -- refer to the \"Client Authentication\"\n# section in the documentation for a list of which options are\n# available for which authentication methods.\n#\n# Database and user names containing spaces, commas, quotes and other\n# special characters must be quoted.  Quoting one of the keywords\n# \"all\", \"sameuser\", \"samerole\" or \"replication\" makes the name lose\n# its special character, and just match a database or username with\n# that name.\n#\n# This file is read on server startup and when the server receives a\n# SIGHUP signal.  If you edit the file on a running system, you have to\n# SIGHUP the server for the changes to take effect, run \"pg_ctl reload\",\n# or execute \"SELECT pg_reload_conf()\".\n#\n# Put your actual configuration here\n# ----------------------------------\n#\n# If you want to allow non-local connections, you need to add more\n# \"host\" records.  In that case you will also need to make PostgreSQL\n# listen on a non-local interface via the listen_addresses\n# configuration parameter, or via the -i or -h command line switches.\n\n# CAUTION: Configuring the system for local \"trust\" authentication\n# allows any local user to connect as any PostgreSQL user, including\n# the database superuser.  If you do not trust all your local users,\n# use another authentication method.\n\n\n# TYPE  DATABASE        USER            ADDRESS                 METHOD\n\n# \"local\" is for Unix domain socket connections only\nlocal   all             all                                     trust\n# IPv4 local connections:\nhost    all             all             127.0.0.1/32            trust\n# IPv6 local connections:\nhost    all             all             ::1/128                 trust\n# Allow replication connections from localhost, by a user with the\n# replication privilege.\nlocal   replication     all                                     trust\nhost    replication     all             127.0.0.1/32            trust\nhost    replication     all             ::1/128                 trust\n"
  },
  {
    "path": "libs/postgres_ffi/src/controlfile_utils.rs",
    "content": "//!\n//! Utilities for reading and writing the PostgreSQL control file.\n//!\n//! The PostgreSQL control file is one the first things that the PostgreSQL\n//! server reads when it starts up. It indicates whether the server was shut\n//! down cleanly, or if it crashed or was restored from online backup so that\n//! WAL recovery needs to be performed. It also contains a copy of the latest\n//! checkpoint record and its location in the WAL.\n//!\n//! The control file also contains fields for detecting whether the\n//! data directory is compatible with a postgres binary. That includes\n//! a version number, configuration options that can be set at\n//! compilation time like the block size, and the platform's alignment\n//! and endianness information. (The PostgreSQL on-disk file format is\n//! not portable across platforms.)\n//!\n//! The control file is stored in the PostgreSQL data directory, as\n//! `global/pg_control`. The data stored in it is designed to be smaller than\n//! 512 bytes, on the assumption that it can be updated atomically. The actual\n//! file is larger, 8192 bytes, but the rest of it is just filled with zeros.\n//!\n//! See src/include/catalog/pg_control.h in the PostgreSQL sources for more\n//! information. You can use PostgreSQL's pg_controldata utility to view its\n//! contents.\n//!\nuse super::bindings::{ControlFileData, PG_CONTROL_FILE_SIZE};\n\nuse anyhow::{bail, Result};\nuse bytes::{Bytes, BytesMut};\n\n/// Equivalent to sizeof(ControlFileData) in C\nconst SIZEOF_CONTROLDATA: usize = size_of::<ControlFileData>();\n\nimpl ControlFileData {\n    /// Compute the offset of the `crc` field within the `ControlFileData` struct.\n    /// Equivalent to offsetof(ControlFileData, crc) in C.\n    const fn pg_control_crc_offset() -> usize {\n        std::mem::offset_of!(ControlFileData, crc)\n    }\n\n    ///\n    /// Interpret a slice of bytes as a Postgres control file.\n    ///\n    pub fn decode(buf: &[u8]) -> Result<ControlFileData> {\n        use utils::bin_ser::LeSer;\n\n        // Check that the slice has the expected size. The control file is\n        // padded with zeros up to a 512 byte sector size, so accept a\n        // larger size too, so that the caller can just the whole file\n        // contents without knowing the exact size of the struct.\n        if buf.len() < SIZEOF_CONTROLDATA {\n            bail!(\"control file is too short\");\n        }\n\n        // Compute the expected CRC of the content.\n        let OFFSETOF_CRC = Self::pg_control_crc_offset();\n        let expectedcrc = crc32c::crc32c(&buf[0..OFFSETOF_CRC]);\n\n        // Use serde to deserialize the input as a ControlFileData struct.\n        let controlfile = ControlFileData::des_prefix(buf)?;\n\n        // Check the CRC\n        if expectedcrc != controlfile.crc {\n            bail!(\n                \"invalid CRC in control file: expected {:08X}, was {:08X}\",\n                expectedcrc,\n                controlfile.crc\n            );\n        }\n\n        Ok(controlfile)\n    }\n\n    ///\n    /// Convert a struct representing a Postgres control file into raw bytes.\n    ///\n    /// The CRC is recomputed to match the contents of the fields.\n    pub fn encode(&self) -> Bytes {\n        use utils::bin_ser::LeSer;\n\n        // Serialize into a new buffer.\n        let b = self.ser().unwrap();\n\n        // Recompute the CRC\n        let OFFSETOF_CRC = Self::pg_control_crc_offset();\n        let newcrc = crc32c::crc32c(&b[0..OFFSETOF_CRC]);\n\n        let mut buf = BytesMut::with_capacity(PG_CONTROL_FILE_SIZE as usize);\n        buf.extend_from_slice(&b[0..OFFSETOF_CRC]);\n        buf.extend_from_slice(&newcrc.to_ne_bytes());\n        // Fill the rest of the control file with zeros.\n        buf.resize(PG_CONTROL_FILE_SIZE as usize, 0);\n\n        buf.into()\n    }\n}\n"
  },
  {
    "path": "libs/postgres_ffi/src/lib.rs",
    "content": "#![allow(non_upper_case_globals)]\n#![allow(non_camel_case_types)]\n#![allow(non_snake_case)]\n// bindgen creates some unsafe code with no doc comments.\n#![allow(clippy::missing_safety_doc)]\n// noted at 1.63 that in many cases there's u32 -> u32 transmutes in bindgen code.\n#![allow(clippy::useless_transmute)]\n// modules included with the postgres_ffi macro depend on the types of the specific version's\n// types, and trigger a too eager lint.\n#![allow(clippy::duplicate_mod)]\n#![deny(clippy::undocumented_unsafe_blocks)]\n\nuse bytes::Bytes;\nuse utils::bin_ser::SerializeError;\nuse utils::lsn::Lsn;\n\npub use postgres_versioninfo::PgMajorVersion;\n\nmacro_rules! postgres_ffi {\n    ($version:ident) => {\n        #[path = \".\"]\n        pub mod $version {\n            pub mod bindings {\n                // bindgen generates bindings for a lot of stuff we don't need\n                #![allow(dead_code)]\n                #![allow(unsafe_op_in_unsafe_fn)]\n                #![allow(clippy::undocumented_unsafe_blocks)]\n                #![allow(clippy::ptr_offset_with_cast)]\n\n                use serde::{Deserialize, Serialize};\n                include!(concat!(\n                    env!(\"OUT_DIR\"),\n                    \"/bindings_\",\n                    stringify!($version),\n                    \".rs\"\n                ));\n\n                include!(concat!(\"pg_constants_\", stringify!($version), \".rs\"));\n            }\n            pub mod controlfile_utils;\n            pub mod nonrelfile_utils;\n            pub mod wal_craft_test_export;\n            pub mod wal_generator;\n            pub mod waldecoder_handler;\n            pub mod xlog_utils;\n\n            pub const PG_MAJORVERSION: &str = stringify!($version);\n\n            // Re-export some symbols from bindings\n            pub use bindings::{CheckPoint, ControlFileData, DBState_DB_SHUTDOWNED, XLogRecord};\n\n            pub const ZERO_CHECKPOINT: bytes::Bytes =\n                bytes::Bytes::from_static(&[0u8; xlog_utils::SIZEOF_CHECKPOINT]);\n        }\n    };\n}\n\n#[macro_export]\nmacro_rules! for_all_postgres_versions {\n    ($macro:tt) => {\n        $macro!(v14);\n        $macro!(v15);\n        $macro!(v16);\n        $macro!(v17);\n    };\n}\n\nfor_all_postgres_versions! { postgres_ffi }\n\n/// dispatch_pgversion\n///\n/// Run a code block in a context where the postgres_ffi bindings for a\n/// specific (supported) PostgreSQL version are `use`-ed in scope under the pgv\n/// identifier.\n/// If the provided pg_version is not supported, we panic!(), unless the\n/// optional third argument was provided (in which case that code will provide\n/// the default handling instead).\n///\n/// Use like\n///\n/// dispatch_pgversion!(my_pgversion, { pgv::constants::XLOG_DBASE_CREATE })\n/// dispatch_pgversion!(my_pgversion, pgv::constants::XLOG_DBASE_CREATE)\n///\n/// Other uses are for macro-internal purposes only and strictly unsupported.\n///\n#[macro_export]\nmacro_rules! dispatch_pgversion {\n    ($version:expr, $code:expr) => {\n        dispatch_pgversion!($version, $code, panic!(\"Unknown PostgreSQL version {}\", $version))\n    };\n    ($version:expr, $code:expr, $invalid_pgver_handling:expr) => {\n        dispatch_pgversion!(\n            $version => $code,\n            default = $invalid_pgver_handling,\n            pgversions = [\n                $crate::PgMajorVersion::PG14 => v14,\n                $crate::PgMajorVersion::PG15 => v15,\n                $crate::PgMajorVersion::PG16 => v16,\n                $crate::PgMajorVersion::PG17 => v17,\n            ]\n        )\n    };\n    ($pgversion:expr => $code:expr,\n     default = $default:expr,\n     pgversions = [$($sv:pat => $vsv:ident),+ $(,)?]) => {\n        match ($pgversion.clone().into()) {\n            $($sv => {\n                use $crate::$vsv as pgv;\n                $code\n            },)+\n            #[allow(unreachable_patterns)]\n            _ => {\n                $default\n            }\n        }\n    };\n}\n\n#[macro_export]\nmacro_rules! enum_pgversion_dispatch {\n    ($name:expr, $typ:ident, $bind:ident, $code:block) => {\n        enum_pgversion_dispatch!(\n            name = $name,\n            bind = $bind,\n            typ = $typ,\n            code = $code,\n            pgversions = [\n                V14 : v14,\n                V15 : v15,\n                V16 : v16,\n                V17 : v17,\n            ]\n        )\n    };\n    (name = $name:expr,\n     bind = $bind:ident,\n     typ = $typ:ident,\n     code = $code:block,\n     pgversions = [$($variant:ident : $md:ident),+ $(,)?]) => {\n        match $name {\n            $(\n            self::$typ::$variant($bind) => {\n                use $crate::$md as pgv;\n                $code\n            }\n            ),+,\n        }\n    };\n}\n\n#[macro_export]\nmacro_rules! enum_pgversion {\n    {$name:ident, pgv :: $t:ident} => {\n        enum_pgversion!{\n            name = $name,\n            typ = $t,\n            pgversions = [\n                V14 : v14,\n                V15 : v15,\n                V16 : v16,\n                V17 : v17,\n            ]\n        }\n    };\n    {$name:ident, pgv :: $p:ident :: $t:ident} => {\n        enum_pgversion!{\n            name = $name,\n            path = $p,\n            typ = $t,\n            pgversions = [\n                V14 : v14,\n                V15 : v15,\n                V16 : v16,\n                V17 : v17,\n            ]\n        }\n    };\n    {name = $name:ident,\n     typ = $t:ident,\n     pgversions = [$($variant:ident : $md:ident),+ $(,)?]} => {\n        pub enum $name {\n            $($variant ( $crate::$md::$t )),+\n        }\n        impl self::$name {\n            pub fn pg_version(&self) -> PgMajorVersion {\n                enum_pgversion_dispatch!(self, $name, _ign, {\n                    pgv::bindings::MY_PGVERSION\n                })\n            }\n        }\n        $(\n        impl Into<self::$name> for $crate::$md::$t {\n            fn into(self) -> self::$name {\n                self::$name::$variant (self)\n            }\n        }\n        )+\n    };\n    {name = $name:ident,\n     path = $p:ident,\n     $(typ = $t:ident,)?\n     pgversions = [$($variant:ident : $md:ident),+ $(,)?]} => {\n        pub enum $name {\n            $($variant $(($crate::$md::$p::$t))?),+\n        }\n        impl $name {\n            pub fn pg_version(&self) -> PgMajorVersion {\n                enum_pgversion_dispatch!(self, $name, _ign, {\n                    pgv::bindings::MY_PGVERSION\n                })\n            }\n        }\n        $(\n        impl Into<$name> for $crate::$md::$p::$t {\n            fn into(self) -> $name {\n                $name::$variant (self)\n            }\n        }\n        )+\n    };\n}\n\npub mod pg_constants;\npub mod relfile_utils;\npub mod walrecord;\n\n// Export some widely used datatypes that are unlikely to change across Postgres versions\npub use v14::bindings::{\n    BlockNumber, CheckPoint, ControlFileData, MultiXactId, OffsetNumber, Oid, PageHeaderData,\n    RepOriginId, TimeLineID, TransactionId, XLogRecPtr, XLogRecord, XLogSegNo, uint32, uint64,\n};\n// Likewise for these, although the assumption that these don't change is a little more iffy.\npub use v14::bindings::{MultiXactOffset, MultiXactStatus};\npub use v14::xlog_utils::{\n    XLOG_SIZE_OF_XLOG_LONG_PHD, XLOG_SIZE_OF_XLOG_RECORD, XLOG_SIZE_OF_XLOG_SHORT_PHD,\n};\n\n// from pg_config.h. These can be changed with configure options --with-blocksize=BLOCKSIZE and\n// --with-segsize=SEGSIZE, but assume the defaults for now.\npub const BLCKSZ: u16 = 8192;\npub const RELSEG_SIZE: u32 = 1024 * 1024 * 1024 / (BLCKSZ as u32);\npub const XLOG_BLCKSZ: usize = 8192;\npub const WAL_SEGMENT_SIZE: usize = 16 * 1024 * 1024;\n\npub const MAX_SEND_SIZE: usize = XLOG_BLCKSZ * 16;\n\n// Export some version independent functions that are used outside of this mod\npub use v14::bindings::DBState_DB_SHUTDOWNED;\npub use v14::xlog_utils::{\n    XLogFileName, encode_logical_message, get_current_timestamp, to_pg_timestamp,\n    try_from_pg_timestamp,\n};\n\npub fn bkpimage_is_compressed(bimg_info: u8, version: PgMajorVersion) -> bool {\n    dispatch_pgversion!(version, pgv::bindings::bkpimg_is_compressed(bimg_info))\n}\n\npub fn generate_wal_segment(\n    segno: u64,\n    system_id: u64,\n    pg_version: PgMajorVersion,\n    lsn: Lsn,\n) -> Result<Bytes, SerializeError> {\n    assert_eq!(segno, lsn.segment_number(WAL_SEGMENT_SIZE));\n\n    dispatch_pgversion!(\n        pg_version,\n        pgv::xlog_utils::generate_wal_segment(segno, system_id, lsn)\n    )\n}\n\npub fn generate_pg_control(\n    pg_control_bytes: &[u8],\n    checkpoint_bytes: &[u8],\n    lsn: Lsn,\n    pg_version: PgMajorVersion,\n) -> anyhow::Result<(Bytes, u64, bool)> {\n    dispatch_pgversion!(\n        pg_version,\n        pgv::xlog_utils::generate_pg_control(pg_control_bytes, checkpoint_bytes, lsn),\n        anyhow::bail!(\"Unknown version {}\", pg_version)\n    )\n}\n\n// PG timeline is always 1, changing it doesn't have any useful meaning in Neon.\n//\n// NOTE: this is not to be confused with Neon timelines; different concept!\n//\n// It's a shaky assumption, that it's always 1. We might import a\n// PostgreSQL data directory that has gone through timeline bumps,\n// for example. FIXME later.\npub const PG_TLI: u32 = 1;\n\n//  See TransactionIdIsNormal in transam.h\npub const fn transaction_id_is_normal(id: TransactionId) -> bool {\n    id > pg_constants::FIRST_NORMAL_TRANSACTION_ID\n}\n\n// See TransactionIdPrecedes in transam.c\npub const fn transaction_id_precedes(id1: TransactionId, id2: TransactionId) -> bool {\n    /*\n     * If either ID is a permanent XID then we can just do unsigned\n     * comparison.  If both are normal, do a modulo-2^32 comparison.\n     */\n\n    if !(transaction_id_is_normal(id1)) || !transaction_id_is_normal(id2) {\n        return id1 < id2;\n    }\n\n    let diff = id1.wrapping_sub(id2) as i32;\n    diff < 0\n}\n\n// Check if page is not yet initialized (port of Postgres PageIsInit() macro)\npub fn page_is_new(pg: &[u8]) -> bool {\n    pg[14] == 0 && pg[15] == 0 // pg_upper == 0\n}\n\n// ExtractLSN from page header\npub fn page_get_lsn(pg: &[u8]) -> Lsn {\n    Lsn(\n        ((u32::from_le_bytes(pg[0..4].try_into().unwrap()) as u64) << 32)\n            | u32::from_le_bytes(pg[4..8].try_into().unwrap()) as u64,\n    )\n}\n\npub fn page_set_lsn(pg: &mut [u8], lsn: Lsn) {\n    pg[0..4].copy_from_slice(&((lsn.0 >> 32) as u32).to_le_bytes());\n    pg[4..8].copy_from_slice(&(lsn.0 as u32).to_le_bytes());\n}\n\n// This is port of function with the same name from freespace.c.\n// The only difference is that it does not have \"level\" parameter because XLogRecordPageWithFreeSpace\n// always call it with level=FSM_BOTTOM_LEVEL\npub fn fsm_logical_to_physical(addr: BlockNumber) -> BlockNumber {\n    let mut leafno = addr;\n    const FSM_TREE_DEPTH: u32 = if pg_constants::SLOTS_PER_FSM_PAGE >= 1626 {\n        3\n    } else {\n        4\n    };\n\n    /* Count upper level nodes required to address the leaf page */\n    let mut pages: BlockNumber = 0;\n    for _l in 0..FSM_TREE_DEPTH {\n        pages += leafno + 1;\n        leafno /= pg_constants::SLOTS_PER_FSM_PAGE;\n    }\n    /* Turn the page count into 0-based block number */\n    pages - 1\n}\n\npub mod waldecoder {\n    use std::num::NonZeroU32;\n\n    use crate::PgMajorVersion;\n    use bytes::{Buf, Bytes, BytesMut};\n    use thiserror::Error;\n    use utils::lsn::Lsn;\n\n    pub enum State {\n        WaitingForRecord,\n        ReassemblingRecord {\n            recordbuf: BytesMut,\n            contlen: NonZeroU32,\n        },\n        SkippingEverything {\n            skip_until_lsn: Lsn,\n        },\n    }\n\n    pub struct WalStreamDecoder {\n        pub lsn: Lsn,\n        pub pg_version: PgMajorVersion,\n        pub inputbuf: BytesMut,\n        pub state: State,\n    }\n\n    #[derive(Error, Debug, Clone)]\n    #[error(\"{msg} at {lsn}\")]\n    pub struct WalDecodeError {\n        pub msg: String,\n        pub lsn: Lsn,\n    }\n\n    impl WalStreamDecoder {\n        pub fn new(lsn: Lsn, pg_version: PgMajorVersion) -> WalStreamDecoder {\n            WalStreamDecoder {\n                lsn,\n                pg_version,\n                inputbuf: BytesMut::new(),\n                state: State::WaitingForRecord,\n            }\n        }\n\n        // The latest LSN position fed to the decoder.\n        pub fn available(&self) -> Lsn {\n            self.lsn + self.inputbuf.remaining() as u64\n        }\n\n        /// Returns the LSN up to which the WAL decoder has processed.\n        ///\n        /// If [`Self::poll_decode`] returned a record, then this will return\n        /// the end LSN of said record.\n        pub fn lsn(&self) -> Lsn {\n            self.lsn\n        }\n\n        pub fn feed_bytes(&mut self, buf: &[u8]) {\n            self.inputbuf.extend_from_slice(buf);\n        }\n\n        pub fn poll_decode(&mut self) -> Result<Option<(Lsn, Bytes)>, WalDecodeError> {\n            dispatch_pgversion!(\n                self.pg_version,\n                {\n                    use pgv::waldecoder_handler::WalStreamDecoderHandler;\n                    self.poll_decode_internal()\n                },\n                Err(WalDecodeError {\n                    msg: format!(\"Unknown version {}\", self.pg_version),\n                    lsn: self.lsn,\n                })\n            )\n        }\n    }\n}\n"
  },
  {
    "path": "libs/postgres_ffi/src/nonrelfile_utils.rs",
    "content": "//!\n//! Common utilities for dealing with PostgreSQL non-relation files.\n//!\nuse crate::pg_constants;\nuse crate::transaction_id_precedes;\nuse bytes::BytesMut;\n\nuse super::bindings::MultiXactId;\n\npub fn transaction_id_set_status(xid: u32, status: u8, page: &mut BytesMut) {\n    tracing::trace!(\n        \"handle_apply_request for RM_XACT_ID-{} (1-commit, 2-abort, 3-sub_commit)\",\n        status\n    );\n\n    let byteno: usize =\n        ((xid % pg_constants::CLOG_XACTS_PER_PAGE) / pg_constants::CLOG_XACTS_PER_BYTE) as usize;\n\n    let bshift: u8 =\n        ((xid % pg_constants::CLOG_XACTS_PER_BYTE) * pg_constants::CLOG_BITS_PER_XACT as u32) as u8;\n\n    page[byteno] =\n        (page[byteno] & !(pg_constants::CLOG_XACT_BITMASK << bshift)) | (status << bshift);\n}\n\npub fn transaction_id_get_status(xid: u32, page: &[u8]) -> u8 {\n    let byteno: usize =\n        ((xid % pg_constants::CLOG_XACTS_PER_PAGE) / pg_constants::CLOG_XACTS_PER_BYTE) as usize;\n\n    let bshift: u8 =\n        ((xid % pg_constants::CLOG_XACTS_PER_BYTE) * pg_constants::CLOG_BITS_PER_XACT as u32) as u8;\n\n    (page[byteno] >> bshift) & pg_constants::CLOG_XACT_BITMASK\n}\n\n// See CLOGPagePrecedes in clog.c\npub const fn clogpage_precedes(page1: u32, page2: u32) -> bool {\n    let mut xid1 = page1 * pg_constants::CLOG_XACTS_PER_PAGE;\n    xid1 += pg_constants::FIRST_NORMAL_TRANSACTION_ID + 1;\n    let mut xid2 = page2 * pg_constants::CLOG_XACTS_PER_PAGE;\n    xid2 += pg_constants::FIRST_NORMAL_TRANSACTION_ID + 1;\n\n    transaction_id_precedes(xid1, xid2)\n        && transaction_id_precedes(xid1, xid2 + pg_constants::CLOG_XACTS_PER_PAGE - 1)\n}\n\n// See SlruMayDeleteSegment() in slru.c\npub fn slru_may_delete_clogsegment(segpage: u32, cutoff_page: u32) -> bool {\n    let seg_last_page = segpage + pg_constants::SLRU_PAGES_PER_SEGMENT - 1;\n\n    assert_eq!(segpage % pg_constants::SLRU_PAGES_PER_SEGMENT, 0);\n\n    clogpage_precedes(segpage, cutoff_page) && clogpage_precedes(seg_last_page, cutoff_page)\n}\n\n// Multixact utils\n\npub fn mx_offset_to_flags_offset(xid: MultiXactId) -> usize {\n    ((xid / pg_constants::MULTIXACT_MEMBERS_PER_MEMBERGROUP as u32)\n        % pg_constants::MULTIXACT_MEMBERGROUPS_PER_PAGE as u32\n        * pg_constants::MULTIXACT_MEMBERGROUP_SIZE as u32) as usize\n}\n\npub fn mx_offset_to_flags_bitshift(xid: MultiXactId) -> u16 {\n    (xid as u16) % pg_constants::MULTIXACT_MEMBERS_PER_MEMBERGROUP\n        * pg_constants::MXACT_MEMBER_BITS_PER_XACT\n}\n\n/* Location (byte offset within page) of TransactionId of given member */\npub fn mx_offset_to_member_offset(xid: MultiXactId) -> usize {\n    mx_offset_to_flags_offset(xid)\n        + (pg_constants::MULTIXACT_FLAGBYTES_PER_GROUP\n            + (xid as u16 % pg_constants::MULTIXACT_MEMBERS_PER_MEMBERGROUP) * 4) as usize\n}\n\nfn mx_offset_to_member_page(xid: u32) -> u32 {\n    xid / pg_constants::MULTIXACT_MEMBERS_PER_PAGE as u32\n}\n\npub fn mx_offset_to_member_segment(xid: u32) -> i32 {\n    (mx_offset_to_member_page(xid) / pg_constants::SLRU_PAGES_PER_SEGMENT) as i32\n}\n\n#[cfg(test)]\nmod tests {\n    use super::*;\n\n    #[test]\n    fn test_multixid_calc() {\n        // Check that the mx_offset_* functions produce the same values as the\n        // corresponding PostgreSQL C macros (MXOffsetTo*). These test values\n        // were generated by calling the PostgreSQL macros with a little C\n        // program.\n        assert_eq!(mx_offset_to_member_segment(0), 0);\n        assert_eq!(mx_offset_to_member_page(0), 0);\n        assert_eq!(mx_offset_to_flags_offset(0), 0);\n        assert_eq!(mx_offset_to_flags_bitshift(0), 0);\n        assert_eq!(mx_offset_to_member_offset(0), 4);\n        assert_eq!(mx_offset_to_member_segment(1), 0);\n        assert_eq!(mx_offset_to_member_page(1), 0);\n        assert_eq!(mx_offset_to_flags_offset(1), 0);\n        assert_eq!(mx_offset_to_flags_bitshift(1), 8);\n        assert_eq!(mx_offset_to_member_offset(1), 8);\n        assert_eq!(mx_offset_to_member_segment(123456789), 2358);\n        assert_eq!(mx_offset_to_member_page(123456789), 75462);\n        assert_eq!(mx_offset_to_flags_offset(123456789), 4780);\n        assert_eq!(mx_offset_to_flags_bitshift(123456789), 8);\n        assert_eq!(mx_offset_to_member_offset(123456789), 4788);\n        assert_eq!(mx_offset_to_member_segment(u32::MAX - 1), 82040);\n        assert_eq!(mx_offset_to_member_page(u32::MAX - 1), 2625285);\n        assert_eq!(mx_offset_to_flags_offset(u32::MAX - 1), 5160);\n        assert_eq!(mx_offset_to_flags_bitshift(u32::MAX - 1), 16);\n        assert_eq!(mx_offset_to_member_offset(u32::MAX - 1), 5172);\n        assert_eq!(mx_offset_to_member_segment(u32::MAX), 82040);\n        assert_eq!(mx_offset_to_member_page(u32::MAX), 2625285);\n        assert_eq!(mx_offset_to_flags_offset(u32::MAX), 5160);\n        assert_eq!(mx_offset_to_flags_bitshift(u32::MAX), 24);\n        assert_eq!(mx_offset_to_member_offset(u32::MAX), 5176);\n    }\n}\n"
  },
  {
    "path": "libs/postgres_ffi/src/pg_constants.rs",
    "content": "//!\n//! Misc constants, copied from PostgreSQL headers.\n//!\n//! Only place version-independent constants here.\n//!\n//! TODO: These probably should be auto-generated using bindgen,\n//! rather than copied by hand. Although on the other hand, it's nice\n//! to have them all here in one place, and have the ability to add\n//! comments on them.\n//!\n\nuse crate::{BLCKSZ, PageHeaderData};\n\n// Note: There are a few more widely-used constants in the postgres_ffi_types::constants crate.\n\n// From storage_xlog.h\npub const XLOG_SMGR_CREATE: u8 = 0x10;\npub const XLOG_SMGR_TRUNCATE: u8 = 0x20;\n\npub const SMGR_TRUNCATE_HEAP: u32 = 0x0001;\npub const SMGR_TRUNCATE_VM: u32 = 0x0002;\npub const SMGR_TRUNCATE_FSM: u32 = 0x0004;\n\n//\n// From bufpage.h\n//\n\n// Assumes 8 byte alignment\nconst SIZEOF_PAGE_HEADER_DATA: usize = size_of::<PageHeaderData>();\npub const MAXALIGN_SIZE_OF_PAGE_HEADER_DATA: usize = (SIZEOF_PAGE_HEADER_DATA + 7) & !7;\n\n//\n// constants from clog.h\n//\npub const CLOG_XACTS_PER_BYTE: u32 = 4;\npub const CLOG_XACTS_PER_PAGE: u32 = BLCKSZ as u32 * CLOG_XACTS_PER_BYTE;\npub const CLOG_BITS_PER_XACT: u8 = 2;\npub const CLOG_XACT_BITMASK: u8 = (1 << CLOG_BITS_PER_XACT) - 1;\n\npub const TRANSACTION_STATUS_COMMITTED: u8 = 0x01;\npub const TRANSACTION_STATUS_ABORTED: u8 = 0x02;\npub const TRANSACTION_STATUS_SUB_COMMITTED: u8 = 0x03;\n\npub const CLOG_ZEROPAGE: u8 = 0x00;\npub const CLOG_TRUNCATE: u8 = 0x10;\n\n//\n// Constants from visibilitymap.h, visibilitymapdefs.h and visibilitymap.c\n//\npub const SIZE_OF_PAGE_HEADER: u16 = 24;\npub const BITS_PER_BYTE: u16 = 8;\npub const HEAPBLOCKS_PER_PAGE: u32 =\n    (BLCKSZ - SIZE_OF_PAGE_HEADER) as u32 * 8 / BITS_PER_HEAPBLOCK as u32;\npub const HEAPBLOCKS_PER_BYTE: u16 = BITS_PER_BYTE / BITS_PER_HEAPBLOCK;\n\npub const fn HEAPBLK_TO_MAPBLOCK(x: u32) -> u32 {\n    x / HEAPBLOCKS_PER_PAGE\n}\npub const fn HEAPBLK_TO_MAPBYTE(x: u32) -> u32 {\n    (x % HEAPBLOCKS_PER_PAGE) / HEAPBLOCKS_PER_BYTE as u32\n}\npub const fn HEAPBLK_TO_OFFSET(x: u32) -> u32 {\n    (x % HEAPBLOCKS_PER_BYTE as u32) * BITS_PER_HEAPBLOCK as u32\n}\n\npub const BITS_PER_HEAPBLOCK: u16 = 2;\npub const VISIBILITYMAP_ALL_VISIBLE: u8 = 0x01;\npub const VISIBILITYMAP_ALL_FROZEN: u8 = 0x02;\npub const VISIBILITYMAP_VALID_BITS: u8 = 0x03;\n\n// From xact.h\npub const XLOG_XACT_COMMIT: u8 = 0x00;\npub const XLOG_XACT_PREPARE: u8 = 0x10;\npub const XLOG_XACT_ABORT: u8 = 0x20;\npub const XLOG_XACT_COMMIT_PREPARED: u8 = 0x30;\npub const XLOG_XACT_ABORT_PREPARED: u8 = 0x40;\n\n// From standbydefs.h\npub const XLOG_RUNNING_XACTS: u8 = 0x10;\n\n// From srlu.h\npub const SLRU_PAGES_PER_SEGMENT: u32 = 32;\npub const SLRU_SEG_SIZE: usize = BLCKSZ as usize * SLRU_PAGES_PER_SEGMENT as usize;\n\n/* mask for filtering opcodes out of xl_info */\npub const XLOG_XACT_OPMASK: u8 = 0x70;\npub const XLOG_HEAP_OPMASK: u8 = 0x70;\n/* does this record have a 'xinfo' field or not */\npub const XLOG_XACT_HAS_INFO: u8 = 0x80;\n\n/*\n * The following flags, stored in xinfo, determine which information is\n * contained in commit/abort records.\n */\npub const XACT_XINFO_HAS_DBINFO: u32 = 1u32 << 0;\npub const XACT_XINFO_HAS_SUBXACTS: u32 = 1u32 << 1;\npub const XACT_XINFO_HAS_RELFILENODES: u32 = 1u32 << 2;\npub const XACT_XINFO_HAS_INVALS: u32 = 1u32 << 3;\npub const XACT_XINFO_HAS_TWOPHASE: u32 = 1u32 << 4;\npub const XACT_XINFO_HAS_ORIGIN: u32 = 1u32 << 5;\n// pub const XACT_XINFO_HAS_AE_LOCKS: u32 = 1u32 << 6;\n// pub const XACT_XINFO_HAS_GID: u32 = 1u32 << 7;\n\n// From pg_control.h and rmgrlist.h\npub const XLOG_NEXTOID: u8 = 0x30;\npub const XLOG_SWITCH: u8 = 0x40;\npub const XLOG_FPI_FOR_HINT: u8 = 0xA0;\npub const XLOG_FPI: u8 = 0xB0;\n\n// From multixact.h\npub const FIRST_MULTIXACT_ID: u32 = 1;\npub const MAX_MULTIXACT_ID: u32 = 0xFFFFFFFF;\npub const MAX_MULTIXACT_OFFSET: u32 = 0xFFFFFFFF;\n\npub const XLOG_MULTIXACT_ZERO_OFF_PAGE: u8 = 0x00;\npub const XLOG_MULTIXACT_ZERO_MEM_PAGE: u8 = 0x10;\npub const XLOG_MULTIXACT_CREATE_ID: u8 = 0x20;\npub const XLOG_MULTIXACT_TRUNCATE_ID: u8 = 0x30;\n\npub const MULTIXACT_OFFSETS_PER_PAGE: u16 = BLCKSZ / 4;\npub const MXACT_MEMBER_BITS_PER_XACT: u16 = 8;\npub const MXACT_MEMBER_FLAGS_PER_BYTE: u16 = 1;\npub const MULTIXACT_FLAGBYTES_PER_GROUP: u16 = 4;\npub const MULTIXACT_MEMBERS_PER_MEMBERGROUP: u16 =\n    MULTIXACT_FLAGBYTES_PER_GROUP * MXACT_MEMBER_FLAGS_PER_BYTE;\n/* size in bytes of a complete group */\npub const MULTIXACT_MEMBERGROUP_SIZE: u16 =\n    4 * MULTIXACT_MEMBERS_PER_MEMBERGROUP + MULTIXACT_FLAGBYTES_PER_GROUP;\npub const MULTIXACT_MEMBERGROUPS_PER_PAGE: u16 = BLCKSZ / MULTIXACT_MEMBERGROUP_SIZE;\npub const MULTIXACT_MEMBERS_PER_PAGE: u16 =\n    MULTIXACT_MEMBERGROUPS_PER_PAGE * MULTIXACT_MEMBERS_PER_MEMBERGROUP;\n\n// From heapam_xlog.h\npub const XLOG_HEAP_INSERT: u8 = 0x00;\npub const XLOG_HEAP_DELETE: u8 = 0x10;\npub const XLOG_HEAP_UPDATE: u8 = 0x20;\npub const XLOG_HEAP_HOT_UPDATE: u8 = 0x40;\npub const XLOG_HEAP_LOCK: u8 = 0x60;\npub const XLOG_HEAP_INIT_PAGE: u8 = 0x80;\npub const XLOG_HEAP2_VISIBLE: u8 = 0x40;\npub const XLOG_HEAP2_MULTI_INSERT: u8 = 0x50;\npub const XLOG_HEAP2_LOCK_UPDATED: u8 = 0x60;\npub const XLH_LOCK_ALL_FROZEN_CLEARED: u8 = 0x01;\npub const XLH_INSERT_ALL_FROZEN_SET: u8 = (1 << 5) as u8;\npub const XLH_INSERT_ALL_VISIBLE_CLEARED: u8 = (1 << 0) as u8;\npub const XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED: u8 = (1 << 0) as u8;\npub const XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED: u8 = (1 << 1) as u8;\npub const XLH_DELETE_ALL_VISIBLE_CLEARED: u8 = (1 << 0) as u8;\n\n// From heapam_xlog.h\npub const XLOG_HEAP2_REWRITE: u8 = 0x00;\n\n// From replication/message.h\npub const XLOG_LOGICAL_MESSAGE: u8 = 0x00;\n\n// From rmgrlist.h\npub const RM_XLOG_ID: u8 = 0;\npub const RM_XACT_ID: u8 = 1;\npub const RM_SMGR_ID: u8 = 2;\npub const RM_CLOG_ID: u8 = 3;\npub const RM_DBASE_ID: u8 = 4;\npub const RM_TBLSPC_ID: u8 = 5;\npub const RM_MULTIXACT_ID: u8 = 6;\npub const RM_RELMAP_ID: u8 = 7;\npub const RM_STANDBY_ID: u8 = 8;\npub const RM_HEAP2_ID: u8 = 9;\npub const RM_HEAP_ID: u8 = 10;\npub const RM_REPLORIGIN_ID: u8 = 19;\npub const RM_LOGICALMSG_ID: u8 = 21;\n\n// from neon_rmgr.h\npub const RM_NEON_ID: u8 = 134;\n\npub const XLOG_NEON_HEAP_INIT_PAGE: u8 = 0x80;\n\npub const XLOG_NEON_HEAP_INSERT: u8 = 0x00;\npub const XLOG_NEON_HEAP_DELETE: u8 = 0x10;\npub const XLOG_NEON_HEAP_UPDATE: u8 = 0x20;\npub const XLOG_NEON_HEAP_HOT_UPDATE: u8 = 0x30;\npub const XLOG_NEON_HEAP_LOCK: u8 = 0x40;\npub const XLOG_NEON_HEAP_MULTI_INSERT: u8 = 0x50;\n\npub const XLOG_NEON_HEAP_VISIBLE: u8 = 0x40;\n\n// from xlogreader.h\npub const XLR_INFO_MASK: u8 = 0x0F;\npub const XLR_RMGR_INFO_MASK: u8 = 0xF0;\n\npub const XLOG_TBLSPC_CREATE: u8 = 0x00;\npub const XLOG_TBLSPC_DROP: u8 = 0x10;\n\n//\n// from xlogrecord.h\n//\npub const XLR_MAX_BLOCK_ID: u8 = 32;\n\npub const XLR_BLOCK_ID_DATA_SHORT: u8 = 255;\npub const XLR_BLOCK_ID_DATA_LONG: u8 = 254;\npub const XLR_BLOCK_ID_ORIGIN: u8 = 253;\npub const XLR_BLOCK_ID_TOPLEVEL_XID: u8 = 252;\n\npub const BKPBLOCK_FORK_MASK: u8 = 0x0F;\npub const _BKPBLOCK_FLAG_MASK: u8 = 0xF0;\npub const BKPBLOCK_HAS_IMAGE: u8 = 0x10; /* block data is an XLogRecordBlockImage */\npub const BKPBLOCK_HAS_DATA: u8 = 0x20;\npub const BKPBLOCK_WILL_INIT: u8 = 0x40; /* redo will re-init the page */\npub const BKPBLOCK_SAME_REL: u8 = 0x80; /* RelFileNode omitted, same as previous */\n\n/* Information stored in bimg_info */\npub const BKPIMAGE_HAS_HOLE: u8 = 0x01; /* page image has \"hole\" */\n\n/* From transam.h */\npub const FIRST_NORMAL_TRANSACTION_ID: u32 = 3;\npub const INVALID_TRANSACTION_ID: u32 = 0;\n\n/* pg_control.h */\npub const XLOG_CHECKPOINT_SHUTDOWN: u8 = 0x00;\npub const XLOG_CHECKPOINT_ONLINE: u8 = 0x10;\npub const XLOG_PARAMETER_CHANGE: u8 = 0x60;\npub const XLOG_END_OF_RECOVERY: u8 = 0x90;\n\n/* From xlog.h */\npub const XLOG_REPLORIGIN_SET: u8 = 0x00;\npub const XLOG_REPLORIGIN_DROP: u8 = 0x10;\n\n/* xlog_internal.h */\npub const XLP_FIRST_IS_CONTRECORD: u16 = 0x0001;\npub const XLP_LONG_HEADER: u16 = 0x0002;\n\n/* From replication/slot.h */\npub const REPL_SLOT_ON_DISK_OFFSETOF_RESTART_LSN: usize = 4*4  /* offset of `slotdata` in ReplicationSlotOnDisk  */\n   + 64 /* NameData */  + 4*4;\n\n/* From fsm_internals.h */\nconst FSM_NODES_PER_PAGE: usize = BLCKSZ as usize - SIZEOF_PAGE_HEADER_DATA - 4;\nconst FSM_NON_LEAF_NODES_PER_PAGE: usize = BLCKSZ as usize / 2 - 1;\nconst FSM_LEAF_NODES_PER_PAGE: usize = FSM_NODES_PER_PAGE - FSM_NON_LEAF_NODES_PER_PAGE;\npub const SLOTS_PER_FSM_PAGE: u32 = FSM_LEAF_NODES_PER_PAGE as u32;\n\n/* From visibilitymap.c */\n\npub const VM_MAPSIZE: usize = BLCKSZ as usize - MAXALIGN_SIZE_OF_PAGE_HEADER_DATA;\npub const VM_BITS_PER_HEAPBLOCK: usize = 2;\npub const VM_HEAPBLOCKS_PER_BYTE: usize = 8 / VM_BITS_PER_HEAPBLOCK;\npub const VM_HEAPBLOCKS_PER_PAGE: usize = VM_MAPSIZE * VM_HEAPBLOCKS_PER_BYTE;\n\n/* From origin.c */\npub const REPLICATION_STATE_MAGIC: u32 = 0x1257DADE;\n\n// Don't include postgresql.conf as it is inconvenient on node start:\n// we need postgresql.conf before basebackup to synchronize safekeepers\n// so no point in overwriting it during backup restore. Rest of the files\n// here are not needed before backup so it is okay to edit them after.\npub const PGDATA_SPECIAL_FILES: [&str; 3] =\n    [\"pg_hba.conf\", \"pg_ident.conf\", \"postgresql.auto.conf\"];\n\npub static PG_HBA: &str = include_str!(\"../samples/pg_hba.conf\");\n"
  },
  {
    "path": "libs/postgres_ffi/src/pg_constants_v14.rs",
    "content": "use crate::PgMajorVersion;\n\npub const MY_PGVERSION: PgMajorVersion = PgMajorVersion::PG14;\n\npub const XLOG_DBASE_CREATE: u8 = 0x00;\npub const XLOG_DBASE_DROP: u8 = 0x10;\n\npub const BKPIMAGE_IS_COMPRESSED: u8 = 0x02; /* page image is compressed */\npub const BKPIMAGE_APPLY: u8 = 0x04; /* page image should be restored during replay */\npub const SIZEOF_RELMAPFILE: usize = 512; /* sizeof(RelMapFile) in relmapper.c */\n\n// List of subdirectories inside pgdata.\n// Copied from src/bin/initdb/initdb.c\npub const PGDATA_SUBDIRS: [&str; 22] = [\n    \"global\",\n    \"pg_wal/archive_status\",\n    \"pg_commit_ts\",\n    \"pg_dynshmem\",\n    \"pg_notify\",\n    \"pg_serial\",\n    \"pg_snapshots\",\n    \"pg_subtrans\",\n    \"pg_twophase\",\n    \"pg_multixact\",\n    \"pg_multixact/members\",\n    \"pg_multixact/offsets\",\n    \"base\",\n    \"base/1\",\n    \"pg_replslot\",\n    \"pg_tblspc\",\n    \"pg_stat\",\n    \"pg_stat_tmp\",\n    \"pg_xact\",\n    \"pg_logical\",\n    \"pg_logical/snapshots\",\n    \"pg_logical/mappings\",\n];\n\npub fn bkpimg_is_compressed(bimg_info: u8) -> bool {\n    (bimg_info & BKPIMAGE_IS_COMPRESSED) != 0\n}\n"
  },
  {
    "path": "libs/postgres_ffi/src/pg_constants_v15.rs",
    "content": "use crate::PgMajorVersion;\n\npub const MY_PGVERSION: PgMajorVersion = PgMajorVersion::PG15;\n\npub const XACT_XINFO_HAS_DROPPED_STATS: u32 = 1u32 << 8;\n\npub const XLOG_DBASE_CREATE_FILE_COPY: u8 = 0x00;\npub const XLOG_DBASE_CREATE_WAL_LOG: u8 = 0x10;\npub const XLOG_DBASE_DROP: u8 = 0x20;\n\npub const BKPIMAGE_APPLY: u8 = 0x02; /* page image should be restored during replay */\npub const BKPIMAGE_COMPRESS_PGLZ: u8 = 0x04; /* page image is compressed */\npub const BKPIMAGE_COMPRESS_LZ4: u8 = 0x08; /* page image is compressed */\npub const BKPIMAGE_COMPRESS_ZSTD: u8 = 0x10; /* page image is compressed */\n\npub const SIZEOF_RELMAPFILE: usize = 512; /* sizeof(RelMapFile) in relmapper.c */\n\npub use super::super::v14::bindings::PGDATA_SUBDIRS;\n\npub fn bkpimg_is_compressed(bimg_info: u8) -> bool {\n    const ANY_COMPRESS_FLAG: u8 = BKPIMAGE_COMPRESS_PGLZ | BKPIMAGE_COMPRESS_LZ4 | BKPIMAGE_COMPRESS_ZSTD;\n\n    (bimg_info & ANY_COMPRESS_FLAG) != 0\n}\n"
  },
  {
    "path": "libs/postgres_ffi/src/pg_constants_v16.rs",
    "content": "use crate::PgMajorVersion;\n\npub const MY_PGVERSION: PgMajorVersion = PgMajorVersion::PG16;\n\npub const XACT_XINFO_HAS_DROPPED_STATS: u32 = 1u32 << 8;\n\npub const XLOG_DBASE_CREATE_FILE_COPY: u8 = 0x00;\npub const XLOG_DBASE_CREATE_WAL_LOG: u8 = 0x10;\npub const XLOG_DBASE_DROP: u8 = 0x20;\n\npub const BKPIMAGE_APPLY: u8 = 0x02; /* page image should be restored during replay */\npub const BKPIMAGE_COMPRESS_PGLZ: u8 = 0x04; /* page image is compressed */\npub const BKPIMAGE_COMPRESS_LZ4: u8 = 0x08; /* page image is compressed */\npub const BKPIMAGE_COMPRESS_ZSTD: u8 = 0x10; /* page image is compressed */\n\npub const SIZEOF_RELMAPFILE: usize = 524; /* sizeof(RelMapFile) in relmapper.c */\n\npub use super::super::v14::bindings::PGDATA_SUBDIRS;\n\npub fn bkpimg_is_compressed(bimg_info: u8) -> bool {\n    const ANY_COMPRESS_FLAG: u8 = BKPIMAGE_COMPRESS_PGLZ | BKPIMAGE_COMPRESS_LZ4 | BKPIMAGE_COMPRESS_ZSTD;\n\n    (bimg_info & ANY_COMPRESS_FLAG) != 0\n}\n"
  },
  {
    "path": "libs/postgres_ffi/src/pg_constants_v17.rs",
    "content": "use crate::PgMajorVersion;\n\npub const MY_PGVERSION: PgMajorVersion = PgMajorVersion::PG17;\n\npub const XACT_XINFO_HAS_DROPPED_STATS: u32 = 1u32 << 8;\n\npub const XLOG_DBASE_CREATE_FILE_COPY: u8 = 0x00;\npub const XLOG_DBASE_CREATE_WAL_LOG: u8 = 0x10;\npub const XLOG_DBASE_DROP: u8 = 0x20;\n\npub const BKPIMAGE_APPLY: u8 = 0x02; /* page image should be restored during replay */\npub const BKPIMAGE_COMPRESS_PGLZ: u8 = 0x04; /* page image is compressed */\npub const BKPIMAGE_COMPRESS_LZ4: u8 = 0x08; /* page image is compressed */\npub const BKPIMAGE_COMPRESS_ZSTD: u8 = 0x10; /* page image is compressed */\n\npub const SIZEOF_RELMAPFILE: usize = 524; /* sizeof(RelMapFile) in relmapper.c */\n\n// List of subdirectories inside pgdata.\n// Copied from src/bin/initdb/initdb.c\npub const PGDATA_SUBDIRS: [&str; 23] = [\n    \"global\",\n    \"pg_wal/archive_status\",\n    \"pg_wal/summaries\",\n    \"pg_commit_ts\",\n    \"pg_dynshmem\",\n    \"pg_notify\",\n    \"pg_serial\",\n    \"pg_snapshots\",\n    \"pg_subtrans\",\n    \"pg_twophase\",\n    \"pg_multixact\",\n    \"pg_multixact/members\",\n    \"pg_multixact/offsets\",\n    \"base\",\n    \"base/1\",\n    \"pg_replslot\",\n    \"pg_tblspc\",\n    \"pg_stat\",\n    \"pg_stat_tmp\",\n    \"pg_xact\",\n    \"pg_logical\",\n    \"pg_logical/snapshots\",\n    \"pg_logical/mappings\",\n];\n\npub fn bkpimg_is_compressed(bimg_info: u8) -> bool {\n    const ANY_COMPRESS_FLAG: u8 = BKPIMAGE_COMPRESS_PGLZ | BKPIMAGE_COMPRESS_LZ4 | BKPIMAGE_COMPRESS_ZSTD;\n\n    (bimg_info & ANY_COMPRESS_FLAG) != 0\n}\n\n\npub const XLOG_HEAP2_PRUNE_ON_ACCESS: u8 = 0x10;\npub const XLOG_HEAP2_PRUNE_VACUUM_SCAN: u8 = 0x20;\npub const XLOG_HEAP2_PRUNE_VACUUM_CLEANUP: u8 = 0x30;\n\n\npub const XLOG_OVERWRITE_CONTRECORD: u8 = 0xD0;\npub const XLOG_CHECKPOINT_REDO: u8 = 0xE0;\n"
  },
  {
    "path": "libs/postgres_ffi/src/relfile_utils.rs",
    "content": "//!\n//! Common utilities for dealing with PostgreSQL relation files.\n//!\nuse once_cell::sync::OnceCell;\nuse regex::Regex;\n\nuse postgres_ffi_types::forknum::*;\n\n/// Parse a filename of a relation file. Returns (relfilenode, forknum, segno) tuple.\n///\n/// Formats:\n///\n/// ```text\n/// <oid>\n/// <oid>_<fork name>\n/// <oid>.<segment number>\n/// <oid>_<fork name>.<segment number>\n/// ```\n///\n/// See functions relpath() and _mdfd_segpath() in PostgreSQL sources.\n///\npub fn parse_relfilename(fname: &str) -> Result<(u32, u8, u32), FilePathError> {\n    static RELFILE_RE: OnceCell<Regex> = OnceCell::new();\n    RELFILE_RE.get_or_init(|| {\n        Regex::new(r\"^(?P<relnode>\\d+)(_(?P<forkname>[a-z]+))?(\\.(?P<segno>\\d+))?$\").unwrap()\n    });\n\n    let caps = RELFILE_RE\n        .get()\n        .unwrap()\n        .captures(fname)\n        .ok_or(FilePathError::InvalidFileName)?;\n\n    let relnode_str = caps.name(\"relnode\").unwrap().as_str();\n    let relnode = relnode_str\n        .parse::<u32>()\n        .map_err(|_e| FilePathError::InvalidFileName)?;\n\n    let forkname = caps.name(\"forkname\").map(|f| f.as_str());\n    let forknum = forkname_to_number(forkname)?;\n\n    let segno_match = caps.name(\"segno\");\n    let segno = if segno_match.is_none() {\n        0\n    } else {\n        segno_match\n            .unwrap()\n            .as_str()\n            .parse::<u32>()\n            .map_err(|_e| FilePathError::InvalidFileName)?\n    };\n\n    Ok((relnode, forknum, segno))\n}\n\n#[cfg(test)]\nmod tests {\n    use super::*;\n\n    #[test]\n    fn test_parse_valid_relfilenames() {\n        assert_eq!(parse_relfilename(\"1234\"), Ok((1234, 0, 0)));\n        assert_eq!(parse_relfilename(\"1234_fsm\"), Ok((1234, 1, 0)));\n        assert_eq!(parse_relfilename(\"1234_vm\"), Ok((1234, 2, 0)));\n        assert_eq!(parse_relfilename(\"1234_init\"), Ok((1234, 3, 0)));\n\n        assert_eq!(parse_relfilename(\"1234.12\"), Ok((1234, 0, 12)));\n        assert_eq!(parse_relfilename(\"1234_fsm.12\"), Ok((1234, 1, 12)));\n        assert_eq!(parse_relfilename(\"1234_vm.12\"), Ok((1234, 2, 12)));\n        assert_eq!(parse_relfilename(\"1234_init.12\"), Ok((1234, 3, 12)));\n\n        // relfilenode is unsigned, so it can go up to 2^32-1\n        assert_eq!(parse_relfilename(\"3147483648\"), Ok((3147483648, 0, 0)));\n    }\n\n    #[test]\n    fn test_parse_invalid_relfilenames() {\n        assert_eq!(\n            parse_relfilename(\"foo\"),\n            Err(FilePathError::InvalidFileName)\n        );\n        assert_eq!(\n            parse_relfilename(\"1.2.3\"),\n            Err(FilePathError::InvalidFileName)\n        );\n        assert_eq!(\n            parse_relfilename(\"1234_invalid\"),\n            Err(FilePathError::InvalidForkName)\n        );\n        assert_eq!(\n            parse_relfilename(\"1234_\"),\n            Err(FilePathError::InvalidFileName)\n        );\n\n        // too large for u32\n        assert_eq!(\n            parse_relfilename(\"12345678901\"),\n            Err(FilePathError::InvalidFileName)\n        );\n        assert_eq!(\n            parse_relfilename(\"-1234\"),\n            Err(FilePathError::InvalidFileName)\n        );\n    }\n\n    #[test]\n    fn test_parse_weird_relfilenames() {\n        // we accept 0 for the relfilenode, but PostgreSQL should never do that.\n        assert_eq!(parse_relfilename(\"0\"), Ok((0, 0, 0)));\n\n        // PostgreSQL has a limit of 2^32-2 blocks in a table. With 8k block size and\n        // 1 GB segments, the max segment number is 32767. But we accept larger values\n        // currently.\n        assert_eq!(parse_relfilename(\"1.123456\"), Ok((1, 0, 123456)));\n    }\n}\n"
  },
  {
    "path": "libs/postgres_ffi/src/wal_craft_test_export.rs",
    "content": "//! This module is for WAL craft to test with postgres_ffi. Should not import any thing in normal usage.\n\npub use super::PG_MAJORVERSION;\npub use super::xlog_utils::*;\npub use super::bindings::*;\npub use crate::WAL_SEGMENT_SIZE;\n"
  },
  {
    "path": "libs/postgres_ffi/src/wal_generator.rs",
    "content": "use std::ffi::{CStr, CString};\n\nuse bytes::{Bytes, BytesMut};\nuse crc32c::crc32c_append;\nuse utils::lsn::Lsn;\n\nuse super::bindings::{RmgrId, XLogLongPageHeaderData, XLogPageHeaderData, XLOG_PAGE_MAGIC};\nuse super::xlog_utils::{\n    XlLogicalMessage, XLOG_RECORD_CRC_OFFS, XLOG_SIZE_OF_XLOG_RECORD, XLP_BKP_REMOVABLE,\n    XLP_FIRST_IS_CONTRECORD,\n};\nuse super::XLogRecord;\nuse crate::pg_constants::{\n    RM_LOGICALMSG_ID, XLOG_LOGICAL_MESSAGE, XLP_LONG_HEADER, XLR_BLOCK_ID_DATA_LONG,\n    XLR_BLOCK_ID_DATA_SHORT,\n};\nuse crate::{WAL_SEGMENT_SIZE, XLOG_BLCKSZ};\n\n/// A WAL record payload. Will be prefixed by an XLogRecord header when encoded.\npub struct Record {\n    pub rmid: RmgrId,\n    pub info: u8,\n    pub data: Bytes,\n}\n\nimpl Record {\n    /// Encodes the WAL record including an XLogRecord header. prev_lsn is the start position of\n    /// the previous record in the WAL -- this is ignored by the Safekeeper, but not Postgres.\n    pub fn encode(&self, prev_lsn: Lsn) -> Bytes {\n        // Prefix data with block ID and length.\n        let data_header = Bytes::from(match self.data.len() {\n            0 => vec![],\n            1..=255 => vec![XLR_BLOCK_ID_DATA_SHORT, self.data.len() as u8],\n            256.. => {\n                let len_bytes = (self.data.len() as u32).to_le_bytes();\n                [&[XLR_BLOCK_ID_DATA_LONG], len_bytes.as_slice()].concat()\n            }\n        });\n\n        // Construct the WAL record header.\n        let mut header = XLogRecord {\n            xl_tot_len: (XLOG_SIZE_OF_XLOG_RECORD + data_header.len() + self.data.len()) as u32,\n            xl_xid: 0,\n            xl_prev: prev_lsn.into(),\n            xl_info: self.info,\n            xl_rmid: self.rmid,\n            __bindgen_padding_0: [0; 2],\n            xl_crc: 0, // see below\n        };\n\n        // Compute the CRC checksum for the data, and the header up to the CRC field.\n        let mut crc = 0;\n        crc = crc32c_append(crc, &data_header);\n        crc = crc32c_append(crc, &self.data);\n        crc = crc32c_append(crc, &header.encode().unwrap()[0..XLOG_RECORD_CRC_OFFS]);\n        header.xl_crc = crc;\n\n        // Encode the final header and record.\n        let header = header.encode().unwrap();\n\n        [header, data_header, self.data.clone()].concat().into()\n    }\n}\n\n/// Generates WAL record payloads.\n///\n/// TODO: currently only provides LogicalMessageGenerator for trivial noop messages. Add a generator\n/// that creates a table and inserts rows.\npub trait RecordGenerator: Iterator<Item = Record> {}\n\nimpl<I: Iterator<Item = Record>> RecordGenerator for I {}\n\n/// Generates binary WAL for use in tests and benchmarks. The provided record generator constructs\n/// the WAL records. It is used as an iterator which yields encoded bytes for a single WAL record,\n/// including internal page headers if it spans pages. Concatenating the bytes will yield a\n/// complete, well-formed WAL, which can be chunked at segment boundaries if desired. Not optimized\n/// for performance.\n///\n/// The WAL format is version-dependant (see e.g. `XLOG_PAGE_MAGIC`), so make sure to import this\n/// for the appropriate Postgres version (e.g. `postgres_ffi::v17::wal_generator::WalGenerator`).\n///\n/// A WAL is split into 16 MB segments. Each segment is split into 8 KB pages, with headers.\n/// Records are arbitrary length, 8-byte aligned, and may span pages. The layout is e.g.:\n///\n/// |        Segment 1         |        Segment 2         |        Segment 3         |\n/// | Page 1 | Page 2 | Page 3 | Page 4 | Page 5 | Page 6 | Page 7 | Page 8 | Page 9 |\n/// | R1 |   R2  |R3|  R4  | R5  |  R6  |                 R7            | R8  |\n#[derive(Default)]\npub struct WalGenerator<R: RecordGenerator> {\n    /// Generates record payloads for the WAL.\n    pub record_generator: R,\n    /// Current LSN to append the next record at.\n    ///\n    /// Callers can modify this (and prev_lsn) to restart generation at a different LSN, but should\n    /// ensure that the LSN is on a valid record boundary (i.e. we can't start appending in the\n    /// middle on an existing record or header, or beyond the end of the existing WAL).\n    pub lsn: Lsn,\n    /// The starting LSN of the previous record. Used in WAL record headers. The Safekeeper doesn't\n    /// care about this, unlike Postgres, but we include it for completeness.\n    pub prev_lsn: Lsn,\n}\n\nimpl<R: RecordGenerator> WalGenerator<R> {\n    // Hardcode the sys and timeline ID. We can make them configurable if we care about them.\n    const SYS_ID: u64 = 0;\n    const TIMELINE_ID: u32 = 1;\n\n    /// Creates a new WAL generator with the given record generator.\n    pub fn new(record_generator: R, start_lsn: Lsn) -> WalGenerator<R> {\n        Self {\n            record_generator,\n            lsn: start_lsn,\n            prev_lsn: start_lsn,\n        }\n    }\n\n    /// Appends a record with an arbitrary payload at the current LSN, then increments the LSN.\n    /// Returns the WAL bytes for the record, including page headers and padding, and the start LSN.\n    fn append_record(&mut self, record: Record) -> (Lsn, Bytes) {\n        let record = record.encode(self.prev_lsn);\n        let record = Self::insert_pages(record, self.lsn);\n        let record = Self::pad_record(record, self.lsn);\n        let lsn = self.lsn;\n        self.prev_lsn = self.lsn;\n        self.lsn += record.len() as u64;\n        (lsn, record)\n    }\n\n    /// Inserts page headers on 8KB page boundaries. Takes the current LSN position where the record\n    /// is to be appended.\n    fn insert_pages(record: Bytes, mut lsn: Lsn) -> Bytes {\n        // Fast path: record fits in current page, and the page already has a header.\n        if lsn.remaining_in_block() as usize >= record.len() && lsn.block_offset() > 0 {\n            return record;\n        }\n\n        let mut pages = BytesMut::new();\n        let mut remaining = record.clone(); // Bytes::clone() is cheap\n        while !remaining.is_empty() {\n            // At new page boundary, inject page header.\n            if lsn.block_offset() == 0 {\n                let mut page_header = XLogPageHeaderData {\n                    xlp_magic: XLOG_PAGE_MAGIC as u16,\n                    xlp_info: XLP_BKP_REMOVABLE,\n                    xlp_tli: Self::TIMELINE_ID,\n                    xlp_pageaddr: lsn.0,\n                    xlp_rem_len: 0,\n                    __bindgen_padding_0: [0; 4],\n                };\n                // If the record was split across page boundaries, mark as continuation.\n                if remaining.len() < record.len() {\n                    page_header.xlp_rem_len = remaining.len() as u32;\n                    page_header.xlp_info |= XLP_FIRST_IS_CONTRECORD;\n                }\n                // At start of segment, use a long page header.\n                let page_header = if lsn.segment_offset(WAL_SEGMENT_SIZE) == 0 {\n                    page_header.xlp_info |= XLP_LONG_HEADER;\n                    XLogLongPageHeaderData {\n                        std: page_header,\n                        xlp_sysid: Self::SYS_ID,\n                        xlp_seg_size: WAL_SEGMENT_SIZE as u32,\n                        xlp_xlog_blcksz: XLOG_BLCKSZ as u32,\n                    }\n                    .encode()\n                    .unwrap()\n                } else {\n                    page_header.encode().unwrap()\n                };\n                pages.extend_from_slice(&page_header);\n                lsn += page_header.len() as u64;\n            }\n\n            // Append the record up to the next page boundary, if any.\n            let page_free = lsn.remaining_in_block() as usize;\n            let chunk = remaining.split_to(std::cmp::min(page_free, remaining.len()));\n            pages.extend_from_slice(&chunk);\n            lsn += chunk.len() as u64;\n        }\n        pages.freeze()\n    }\n\n    /// Records must be 8-byte aligned. Take an encoded record (including any injected page\n    /// boundaries), starting at the given LSN, and add any necessary padding at the end.\n    fn pad_record(record: Bytes, mut lsn: Lsn) -> Bytes {\n        lsn += record.len() as u64;\n        let padding = lsn.calc_padding(8u64) as usize;\n        if padding == 0 {\n            return record;\n        }\n        [record, Bytes::from(vec![0; padding])].concat().into()\n    }\n}\n\n/// Generates WAL records as an iterator.\nimpl<R: RecordGenerator> Iterator for WalGenerator<R> {\n    type Item = (Lsn, Bytes);\n\n    fn next(&mut self) -> Option<Self::Item> {\n        let record = self.record_generator.next()?;\n        Some(self.append_record(record))\n    }\n}\n\n/// Generates logical message records (effectively noops) with a fixed message.\npub struct LogicalMessageGenerator {\n    prefix: CString,\n    message: Vec<u8>,\n}\n\nimpl LogicalMessageGenerator {\n    const DB_ID: u32 = 0; // hardcoded for now\n    const RM_ID: RmgrId = RM_LOGICALMSG_ID;\n    const INFO: u8 = XLOG_LOGICAL_MESSAGE;\n\n    /// Creates a new LogicalMessageGenerator.\n    pub fn new(prefix: &CStr, message: &[u8]) -> Self {\n        Self {\n            prefix: prefix.to_owned(),\n            message: message.to_owned(),\n        }\n    }\n\n    /// Encodes a logical message.\n    fn encode(prefix: &CStr, message: &[u8]) -> Bytes {\n        let prefix = prefix.to_bytes_with_nul();\n        let header = XlLogicalMessage {\n            db_id: Self::DB_ID,\n            transactional: 0,\n            prefix_size: prefix.len() as u64,\n            message_size: message.len() as u64,\n        };\n        [&header.encode(), prefix, message].concat().into()\n    }\n\n    /// Computes how large a value must be to get a record of the given size. Convenience method to\n    /// construct records of pre-determined size. Panics if the record size is too small.\n    pub fn make_value_size(record_size: usize, prefix: &CStr) -> usize {\n        let xlog_header_size = XLOG_SIZE_OF_XLOG_RECORD;\n        let lm_header_size = size_of::<XlLogicalMessage>();\n        let prefix_size = prefix.to_bytes_with_nul().len();\n        let data_header_size = match record_size - xlog_header_size - 2 {\n            0..=255 => 2,\n            256..=258 => panic!(\"impossible record_size {record_size}\"),\n            259.. => 5,\n        };\n        record_size\n            .checked_sub(xlog_header_size + lm_header_size + prefix_size + data_header_size)\n            .expect(\"record_size too small\")\n    }\n}\n\nimpl Iterator for LogicalMessageGenerator {\n    type Item = Record;\n\n    fn next(&mut self) -> Option<Self::Item> {\n        Some(Record {\n            rmid: Self::RM_ID,\n            info: Self::INFO,\n            data: Self::encode(&self.prefix, &self.message),\n        })\n    }\n}\n\nimpl WalGenerator<LogicalMessageGenerator> {\n    /// Convenience method for appending a WAL record with an arbitrary logical message at the\n    /// current WAL LSN position. Returns the start LSN and resulting WAL bytes.\n    pub fn append_logical_message(&mut self, prefix: &CStr, message: &[u8]) -> (Lsn, Bytes) {\n        let record = Record {\n            rmid: LogicalMessageGenerator::RM_ID,\n            info: LogicalMessageGenerator::INFO,\n            data: LogicalMessageGenerator::encode(prefix, message),\n        };\n        self.append_record(record)\n    }\n}\n"
  },
  {
    "path": "libs/postgres_ffi/src/waldecoder_handler.rs",
    "content": "//!\n//! Basic WAL stream decoding.\n//!\n//! This understands the WAL page and record format, enough to figure out where the WAL record\n//! boundaries are, and to reassemble WAL records that cross page boundaries.\n//!\n//! This functionality is needed by both the pageserver and the safekeepers. The pageserver needs\n//! to look deeper into the WAL records to also understand which blocks they modify, the code\n//! for that is in pageserver/src/walrecord.rs\n//!\nuse super::super::waldecoder::{State, WalDecodeError, WalStreamDecoder};\nuse super::bindings::{XLogLongPageHeaderData, XLogPageHeaderData, XLogRecord, XLOG_PAGE_MAGIC};\nuse super::xlog_utils::*;\nuse crate::WAL_SEGMENT_SIZE;\nuse bytes::{Buf, BufMut, Bytes, BytesMut};\nuse crc32c::*;\nuse std::cmp::min;\nuse std::num::NonZeroU32;\nuse utils::lsn::Lsn;\n\npub trait WalStreamDecoderHandler {\n    fn validate_page_header(&self, hdr: &XLogPageHeaderData) -> Result<(), WalDecodeError>;\n    fn poll_decode_internal(&mut self) -> Result<Option<(Lsn, Bytes)>, WalDecodeError>;\n    fn complete_record(&mut self, recordbuf: Bytes) -> Result<(Lsn, Bytes), WalDecodeError>;\n}\n\n//\n// This is a trick to support several postgres versions simultaneously.\n//\n// Page decoding code depends on postgres bindings, so it is compiled for each version.\n// Thus WalStreamDecoder implements several WalStreamDecoderHandler traits.\n// WalStreamDecoder poll_decode() method dispatches to the right handler based on the postgres version.\n// Other methods are internal and are not dispatched.\n//\n// It is similar to having several impl blocks for the same struct,\n// but the impls here are in different modules, so need to use a trait.\n//\nimpl WalStreamDecoderHandler for WalStreamDecoder {\n    fn validate_page_header(&self, hdr: &XLogPageHeaderData) -> Result<(), WalDecodeError> {\n        let validate_impl = || {\n            if hdr.xlp_magic != XLOG_PAGE_MAGIC as u16 {\n                return Err(format!(\n                    \"invalid xlog page header: xlp_magic={}, expected {}\",\n                    hdr.xlp_magic, XLOG_PAGE_MAGIC\n                ));\n            }\n            if hdr.xlp_pageaddr != self.lsn.0 {\n                return Err(format!(\n                    \"invalid xlog page header: xlp_pageaddr={}, expected {}\",\n                    hdr.xlp_pageaddr, self.lsn\n                ));\n            }\n            match self.state {\n                State::WaitingForRecord => {\n                    if hdr.xlp_info & XLP_FIRST_IS_CONTRECORD != 0 {\n                        return Err(\n                            \"invalid xlog page header: unexpected XLP_FIRST_IS_CONTRECORD\".into(),\n                        );\n                    }\n                    if hdr.xlp_rem_len != 0 {\n                        return Err(format!(\n                            \"invalid xlog page header: xlp_rem_len={}, but it's not a contrecord\",\n                            hdr.xlp_rem_len\n                        ));\n                    }\n                }\n                State::ReassemblingRecord { contlen, .. } => {\n                    if hdr.xlp_info & XLP_FIRST_IS_CONTRECORD == 0 {\n                        return Err(\n                            \"invalid xlog page header: XLP_FIRST_IS_CONTRECORD expected, not found\"\n                                .into(),\n                        );\n                    }\n                    if hdr.xlp_rem_len != contlen.get() {\n                        return Err(format!(\n                            \"invalid xlog page header: xlp_rem_len={}, expected {}\",\n                            hdr.xlp_rem_len,\n                            contlen.get()\n                        ));\n                    }\n                }\n                State::SkippingEverything { .. } => {\n                    panic!(\"Should not be validating page header in the SkippingEverything state\");\n                }\n            };\n            Ok(())\n        };\n        validate_impl().map_err(|msg| WalDecodeError { msg, lsn: self.lsn })\n    }\n\n    /// Attempt to decode another WAL record from the input that has been fed to the\n    /// decoder so far.\n    ///\n    /// Returns one of the following:\n    ///     Ok((Lsn, Bytes)): a tuple containing the LSN of next record, and the record itself\n    ///     Ok(None): there is not enough data in the input buffer. Feed more by calling the `feed_bytes` function\n    ///     Err(WalDecodeError): an error occurred while decoding, meaning the input was invalid.\n    ///\n    fn poll_decode_internal(&mut self) -> Result<Option<(Lsn, Bytes)>, WalDecodeError> {\n        // Run state machine that validates page headers, and reassembles records\n        // that cross page boundaries.\n        loop {\n            // parse and verify page boundaries as we go\n            // However, we may have to skip some page headers if we're processing the XLOG_SWITCH record or skipping padding for whatever reason.\n            match self.state {\n                State::WaitingForRecord | State::ReassemblingRecord { .. } => {\n                    if self.lsn.segment_offset(WAL_SEGMENT_SIZE) == 0 {\n                        // parse long header\n\n                        if self.inputbuf.remaining() < XLOG_SIZE_OF_XLOG_LONG_PHD {\n                            return Ok(None);\n                        }\n\n                        let hdr = XLogLongPageHeaderData::from_bytes(&mut self.inputbuf).map_err(\n                            |e| WalDecodeError {\n                                msg: format!(\"long header deserialization failed {e}\"),\n                                lsn: self.lsn,\n                            },\n                        )?;\n\n                        self.validate_page_header(&hdr.std)?;\n\n                        self.lsn += XLOG_SIZE_OF_XLOG_LONG_PHD as u64;\n                    } else if self.lsn.block_offset() == 0 {\n                        if self.inputbuf.remaining() < XLOG_SIZE_OF_XLOG_SHORT_PHD {\n                            return Ok(None);\n                        }\n\n                        let hdr =\n                            XLogPageHeaderData::from_bytes(&mut self.inputbuf).map_err(|e| {\n                                WalDecodeError {\n                                    msg: format!(\"header deserialization failed {e}\"),\n                                    lsn: self.lsn,\n                                }\n                            })?;\n\n                        self.validate_page_header(&hdr)?;\n\n                        self.lsn += XLOG_SIZE_OF_XLOG_SHORT_PHD as u64;\n                    }\n                }\n                State::SkippingEverything { .. } => {}\n            }\n            // now read page contents\n            match &mut self.state {\n                State::WaitingForRecord => {\n                    // need to have at least the xl_tot_len field\n                    if self.inputbuf.remaining() < 4 {\n                        return Ok(None);\n                    }\n\n                    // peek xl_tot_len at the beginning of the record.\n                    // FIXME: assumes little-endian\n                    let xl_tot_len = (&self.inputbuf[0..4]).get_u32_le();\n                    if (xl_tot_len as usize) < XLOG_SIZE_OF_XLOG_RECORD {\n                        return Err(WalDecodeError {\n                            msg: format!(\"invalid xl_tot_len {xl_tot_len}\"),\n                            lsn: self.lsn,\n                        });\n                    }\n                    // Fast path for the common case that the whole record fits on the page.\n                    let pageleft = self.lsn.remaining_in_block() as u32;\n                    if self.inputbuf.remaining() >= xl_tot_len as usize && xl_tot_len <= pageleft {\n                        self.lsn += xl_tot_len as u64;\n                        let recordbuf = self.inputbuf.copy_to_bytes(xl_tot_len as usize);\n                        return Ok(Some(self.complete_record(recordbuf)?));\n                    } else {\n                        // Need to assemble the record from pieces. Remember the size of the\n                        // record, and loop back. On next iterations, we will reach the branch\n                        // below, and copy the part of the record that was on this or next page(s)\n                        // to 'recordbuf'.  Subsequent iterations will skip page headers, and\n                        // append the continuations from the next pages to 'recordbuf'.\n                        self.state = State::ReassemblingRecord {\n                            recordbuf: BytesMut::with_capacity(xl_tot_len as usize),\n                            contlen: NonZeroU32::new(xl_tot_len).unwrap(),\n                        }\n                    }\n                }\n                State::ReassemblingRecord { recordbuf, contlen } => {\n                    // we're continuing a record, possibly from previous page.\n                    let pageleft = self.lsn.remaining_in_block() as u32;\n\n                    // read the rest of the record, or as much as fits on this page.\n                    let n = min(contlen.get(), pageleft) as usize;\n\n                    if self.inputbuf.remaining() < n {\n                        return Ok(None);\n                    }\n\n                    recordbuf.put(self.inputbuf.split_to(n));\n                    self.lsn += n as u64;\n                    *contlen = match NonZeroU32::new(contlen.get() - n as u32) {\n                        Some(x) => x,\n                        None => {\n                            // The record is now complete.\n                            let recordbuf = std::mem::replace(recordbuf, BytesMut::new()).freeze();\n                            return Ok(Some(self.complete_record(recordbuf)?));\n                        }\n                    }\n                }\n                State::SkippingEverything { skip_until_lsn } => {\n                    assert!(*skip_until_lsn >= self.lsn);\n                    let n = skip_until_lsn.0 - self.lsn.0;\n                    if self.inputbuf.remaining() < n as usize {\n                        return Ok(None);\n                    }\n                    self.inputbuf.advance(n as usize);\n                    self.lsn += n;\n                    self.state = State::WaitingForRecord;\n                }\n            }\n        }\n    }\n\n    fn complete_record(&mut self, recordbuf: Bytes) -> Result<(Lsn, Bytes), WalDecodeError> {\n        // We now have a record in the 'recordbuf' local variable.\n        let xlogrec =\n            XLogRecord::from_slice(&recordbuf[0..XLOG_SIZE_OF_XLOG_RECORD]).map_err(|e| {\n                WalDecodeError {\n                    msg: format!(\"xlog record deserialization failed {e}\"),\n                    lsn: self.lsn,\n                }\n            })?;\n\n        let mut crc = 0;\n        crc = crc32c_append(crc, &recordbuf[XLOG_RECORD_CRC_OFFS + 4..]);\n        crc = crc32c_append(crc, &recordbuf[0..XLOG_RECORD_CRC_OFFS]);\n        if crc != xlogrec.xl_crc {\n            return Err(WalDecodeError {\n                msg: \"WAL record crc mismatch\".into(),\n                lsn: self.lsn,\n            });\n        }\n\n        // XLOG_SWITCH records are special. If we see one, we need to skip\n        // to the next WAL segment.\n        let next_lsn = if xlogrec.is_xlog_switch_record() {\n            tracing::trace!(\"saw xlog switch record at {}\", self.lsn);\n            self.lsn + self.lsn.calc_padding(WAL_SEGMENT_SIZE as u64)\n        } else {\n            // Pad to an 8-byte boundary\n            self.lsn.align()\n        };\n        self.state = State::SkippingEverything {\n            skip_until_lsn: next_lsn,\n        };\n\n        // We should return LSN of the next record, not the last byte of this record or\n        // the byte immediately after. Note that this handles both XLOG_SWITCH and usual\n        // records, the former \"spans\" until the next WAL segment (see test_xlog_switch).\n        Ok((next_lsn, recordbuf))\n    }\n}\n"
  },
  {
    "path": "libs/postgres_ffi/src/walrecord.rs",
    "content": "//! This module houses types used in decoding of PG WAL\n//! records.\n//!\n//! TODO: Generate separate types for each supported PG version\n\nuse bytes::{Buf, Bytes};\nuse postgres_ffi_types::TimestampTz;\nuse serde::{Deserialize, Serialize};\nuse utils::bin_ser::DeserializeError;\nuse utils::lsn::Lsn;\n\nuse crate::{\n    BLCKSZ, BlockNumber, MultiXactId, MultiXactOffset, MultiXactStatus, Oid, PgMajorVersion,\n    RepOriginId, TransactionId, XLOG_SIZE_OF_XLOG_RECORD, XLogRecord, pg_constants,\n};\n\n#[repr(C)]\n#[derive(Clone, Debug, Serialize, Deserialize)]\npub struct XlMultiXactCreate {\n    pub mid: MultiXactId,\n    /* new MultiXact's ID */\n    pub moff: MultiXactOffset,\n    /* its starting offset in members file */\n    pub nmembers: u32,\n    /* number of member XIDs */\n    pub members: Vec<MultiXactMember>,\n}\n\nimpl XlMultiXactCreate {\n    pub fn decode(buf: &mut Bytes) -> XlMultiXactCreate {\n        let mid = buf.get_u32_le();\n        let moff = buf.get_u32_le();\n        let nmembers = buf.get_u32_le();\n        let mut members = Vec::new();\n        for _ in 0..nmembers {\n            members.push(MultiXactMember::decode(buf));\n        }\n        XlMultiXactCreate {\n            mid,\n            moff,\n            nmembers,\n            members,\n        }\n    }\n}\n\n#[repr(C)]\n#[derive(Clone, Debug, Serialize, Deserialize)]\npub struct XlMultiXactTruncate {\n    pub oldest_multi_db: Oid,\n    /* to-be-truncated range of multixact offsets */\n    pub start_trunc_off: MultiXactId,\n    /* just for completeness' sake */\n    pub end_trunc_off: MultiXactId,\n\n    /* to-be-truncated range of multixact members */\n    pub start_trunc_memb: MultiXactOffset,\n    pub end_trunc_memb: MultiXactOffset,\n}\n\nimpl XlMultiXactTruncate {\n    pub fn decode(buf: &mut Bytes) -> XlMultiXactTruncate {\n        XlMultiXactTruncate {\n            oldest_multi_db: buf.get_u32_le(),\n            start_trunc_off: buf.get_u32_le(),\n            end_trunc_off: buf.get_u32_le(),\n            start_trunc_memb: buf.get_u32_le(),\n            end_trunc_memb: buf.get_u32_le(),\n        }\n    }\n}\n\n#[repr(C)]\n#[derive(Clone, Debug, Serialize, Deserialize)]\npub struct XlRelmapUpdate {\n    pub dbid: Oid,   /* database ID, or 0 for shared map */\n    pub tsid: Oid,   /* database's tablespace, or pg_global */\n    pub nbytes: i32, /* size of relmap data */\n}\n\nimpl XlRelmapUpdate {\n    pub fn decode(buf: &mut Bytes) -> XlRelmapUpdate {\n        XlRelmapUpdate {\n            dbid: buf.get_u32_le(),\n            tsid: buf.get_u32_le(),\n            nbytes: buf.get_i32_le(),\n        }\n    }\n}\n\n#[repr(C)]\n#[derive(Clone, Debug, Serialize, Deserialize)]\npub struct XlReploriginDrop {\n    pub node_id: RepOriginId,\n}\n\nimpl XlReploriginDrop {\n    pub fn decode(buf: &mut Bytes) -> XlReploriginDrop {\n        XlReploriginDrop {\n            node_id: buf.get_u16_le(),\n        }\n    }\n}\n\n#[repr(C)]\n#[derive(Clone, Debug, Serialize, Deserialize)]\npub struct XlReploriginSet {\n    pub remote_lsn: Lsn,\n    pub node_id: RepOriginId,\n}\n\nimpl XlReploriginSet {\n    pub fn decode(buf: &mut Bytes) -> XlReploriginSet {\n        XlReploriginSet {\n            remote_lsn: Lsn(buf.get_u64_le()),\n            node_id: buf.get_u16_le(),\n        }\n    }\n}\n\n#[repr(C)]\n#[derive(Debug, Clone, Copy, Serialize, Deserialize)]\npub struct RelFileNode {\n    pub spcnode: Oid, /* tablespace */\n    pub dbnode: Oid,  /* database */\n    pub relnode: Oid, /* relation */\n}\n\n#[repr(C)]\n#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]\npub struct MultiXactMember {\n    pub xid: TransactionId,\n    pub status: MultiXactStatus,\n}\n\nimpl MultiXactMember {\n    pub fn decode(buf: &mut Bytes) -> MultiXactMember {\n        MultiXactMember {\n            xid: buf.get_u32_le(),\n            status: buf.get_u32_le(),\n        }\n    }\n}\n\n/// DecodedBkpBlock represents per-page data contained in a WAL record.\n#[derive(Default)]\npub struct DecodedBkpBlock {\n    /* Is this block ref in use? */\n    //in_use: bool,\n\n    /* Identify the block this refers to */\n    pub rnode_spcnode: u32,\n    pub rnode_dbnode: u32,\n    pub rnode_relnode: u32,\n    // Note that we have a few special forknum values for non-rel files.\n    pub forknum: u8,\n    pub blkno: u32,\n\n    /* copy of the fork_flags field from the XLogRecordBlockHeader */\n    pub flags: u8,\n\n    /* Information on full-page image, if any */\n    pub has_image: bool,\n    /* has image, even for consistency checking */\n    pub apply_image: bool,\n    /* has image that should be restored */\n    pub will_init: bool,\n    /* record doesn't need previous page version to apply */\n    //char\t   *bkp_image;\n    pub hole_offset: u16,\n    pub hole_length: u16,\n    pub bimg_offset: u32,\n    pub bimg_len: u16,\n    pub bimg_info: u8,\n\n    /* Buffer holding the rmgr-specific data associated with this block */\n    has_data: bool,\n    data_len: u16,\n}\n\nimpl DecodedBkpBlock {\n    pub fn new() -> DecodedBkpBlock {\n        Default::default()\n    }\n}\n\n#[derive(Default)]\npub struct DecodedWALRecord {\n    pub xl_xid: TransactionId,\n    pub xl_info: u8,\n    pub xl_rmid: u8,\n    pub record: Bytes, // raw XLogRecord\n\n    pub blocks: Vec<DecodedBkpBlock>,\n    pub main_data_offset: usize,\n    pub origin_id: u16,\n}\n\nimpl DecodedWALRecord {\n    /// Check if this WAL record represents a legacy \"copy\" database creation, which populates new relations\n    /// by reading other existing relations' data blocks.  This is more complex to apply than new-style database\n    /// creations which simply include all the desired blocks in the WAL, so we need a helper function to detect this case.\n    pub fn is_dbase_create_copy(&self, pg_version: PgMajorVersion) -> bool {\n        if self.xl_rmid == pg_constants::RM_DBASE_ID {\n            let info = self.xl_info & pg_constants::XLR_RMGR_INFO_MASK;\n            match pg_version {\n                PgMajorVersion::PG14 => {\n                    // Postgres 14 database creations are always the legacy kind\n                    info == crate::v14::bindings::XLOG_DBASE_CREATE\n                }\n                PgMajorVersion::PG15 => info == crate::v15::bindings::XLOG_DBASE_CREATE_FILE_COPY,\n                PgMajorVersion::PG16 => info == crate::v16::bindings::XLOG_DBASE_CREATE_FILE_COPY,\n                PgMajorVersion::PG17 => info == crate::v17::bindings::XLOG_DBASE_CREATE_FILE_COPY,\n            }\n        } else {\n            false\n        }\n    }\n}\n\n/// Main routine to decode a WAL record and figure out which blocks are modified\n//\n// See xlogrecord.h for details\n// The overall layout of an XLOG record is:\n//\t\tFixed-size header (XLogRecord struct)\n//      XLogRecordBlockHeader struct\n//          If pg_constants::BKPBLOCK_HAS_IMAGE, an XLogRecordBlockImageHeader struct follows\n//\t           If pg_constants::BKPIMAGE_HAS_HOLE and pg_constants::BKPIMAGE_IS_COMPRESSED, an\n//\t           XLogRecordBlockCompressHeader struct follows.\n//          If pg_constants::BKPBLOCK_SAME_REL is not set, a RelFileNode follows\n//          BlockNumber follows\n//      XLogRecordBlockHeader struct\n//      ...\n//      XLogRecordDataHeader[Short|Long] struct\n//      block data\n//      block data\n//      ...\n//      main data\n//\n//\n// For performance reasons, the caller provides the DecodedWALRecord struct and the function just fills it in.\n// It would be more natural for this function to return a DecodedWALRecord as return value,\n// but reusing the caller-supplied struct avoids an allocation.\n// This code is in the hot path for digesting incoming WAL, and is very performance sensitive.\n//\npub fn decode_wal_record(\n    record: Bytes,\n    decoded: &mut DecodedWALRecord,\n    pg_version: PgMajorVersion,\n) -> anyhow::Result<()> {\n    let mut rnode_spcnode: u32 = 0;\n    let mut rnode_dbnode: u32 = 0;\n    let mut rnode_relnode: u32 = 0;\n    let mut got_rnode = false;\n    let mut origin_id: u16 = 0;\n\n    let mut buf = record.clone();\n\n    // 1. Parse XLogRecord struct\n\n    // FIXME: assume little-endian here\n    let xlogrec = XLogRecord::from_bytes(&mut buf)?;\n\n    tracing::trace!(\n        \"decode_wal_record xl_rmid = {} xl_info = {}\",\n        xlogrec.xl_rmid,\n        xlogrec.xl_info\n    );\n\n    let remaining: usize = xlogrec.xl_tot_len as usize - XLOG_SIZE_OF_XLOG_RECORD;\n\n    if buf.remaining() != remaining {\n        //TODO error\n    }\n\n    let mut max_block_id = 0;\n    let mut blocks_total_len: u32 = 0;\n    let mut main_data_len = 0;\n    let mut datatotal: u32 = 0;\n    decoded.blocks.clear();\n\n    // 2. Decode the headers.\n    // XLogRecordBlockHeaders if any,\n    // XLogRecordDataHeader[Short|Long]\n    while buf.remaining() > datatotal as usize {\n        let block_id = buf.get_u8();\n\n        match block_id {\n            pg_constants::XLR_BLOCK_ID_DATA_SHORT => {\n                /* XLogRecordDataHeaderShort */\n                main_data_len = buf.get_u8() as u32;\n                datatotal += main_data_len;\n            }\n\n            pg_constants::XLR_BLOCK_ID_DATA_LONG => {\n                /* XLogRecordDataHeaderLong */\n                main_data_len = buf.get_u32_le();\n                datatotal += main_data_len;\n            }\n\n            pg_constants::XLR_BLOCK_ID_ORIGIN => {\n                // RepOriginId is uint16\n                origin_id = buf.get_u16_le();\n            }\n\n            pg_constants::XLR_BLOCK_ID_TOPLEVEL_XID => {\n                // TransactionId is uint32\n                buf.advance(4);\n            }\n\n            0..=pg_constants::XLR_MAX_BLOCK_ID => {\n                /* XLogRecordBlockHeader */\n                let mut blk = DecodedBkpBlock::new();\n\n                if block_id <= max_block_id {\n                    // TODO\n                    //report_invalid_record(state,\n                    //\t\t\t  \"out-of-order block_id %u at %X/%X\",\n                    //\t\t\t  block_id,\n                    //\t\t\t  (uint32) (state->ReadRecPtr >> 32),\n                    //\t\t\t  (uint32) state->ReadRecPtr);\n                    //    goto err;\n                }\n                max_block_id = block_id;\n\n                let fork_flags: u8 = buf.get_u8();\n                blk.forknum = fork_flags & pg_constants::BKPBLOCK_FORK_MASK;\n                blk.flags = fork_flags;\n                blk.has_image = (fork_flags & pg_constants::BKPBLOCK_HAS_IMAGE) != 0;\n                blk.has_data = (fork_flags & pg_constants::BKPBLOCK_HAS_DATA) != 0;\n                blk.will_init = (fork_flags & pg_constants::BKPBLOCK_WILL_INIT) != 0;\n                blk.data_len = buf.get_u16_le();\n\n                /* TODO cross-check that the HAS_DATA flag is set iff data_length > 0 */\n\n                datatotal += blk.data_len as u32;\n                blocks_total_len += blk.data_len as u32;\n\n                if blk.has_image {\n                    blk.bimg_len = buf.get_u16_le();\n                    blk.hole_offset = buf.get_u16_le();\n                    blk.bimg_info = buf.get_u8();\n\n                    blk.apply_image = dispatch_pgversion!(\n                        pg_version,\n                        (blk.bimg_info & pgv::bindings::BKPIMAGE_APPLY) != 0\n                    );\n\n                    let blk_img_is_compressed =\n                        crate::bkpimage_is_compressed(blk.bimg_info, pg_version);\n\n                    if blk_img_is_compressed {\n                        tracing::debug!(\"compressed block image , pg_version = {}\", pg_version);\n                    }\n\n                    if blk_img_is_compressed {\n                        if blk.bimg_info & pg_constants::BKPIMAGE_HAS_HOLE != 0 {\n                            blk.hole_length = buf.get_u16_le();\n                        } else {\n                            blk.hole_length = 0;\n                        }\n                    } else {\n                        blk.hole_length = BLCKSZ - blk.bimg_len;\n                    }\n                    datatotal += blk.bimg_len as u32;\n                    blocks_total_len += blk.bimg_len as u32;\n\n                    /*\n                     * cross-check that hole_offset > 0, hole_length > 0 and\n                     * bimg_len < BLCKSZ if the HAS_HOLE flag is set.\n                     */\n                    if blk.bimg_info & pg_constants::BKPIMAGE_HAS_HOLE != 0\n                        && (blk.hole_offset == 0 || blk.hole_length == 0 || blk.bimg_len == BLCKSZ)\n                    {\n                        // TODO\n                        /*\n                        report_invalid_record(state,\n                                      \"pg_constants::BKPIMAGE_HAS_HOLE set, but hole offset %u length %u block image length %u at %X/%X\",\n                                      (unsigned int) blk->hole_offset,\n                                      (unsigned int) blk->hole_length,\n                                      (unsigned int) blk->bimg_len,\n                                      (uint32) (state->ReadRecPtr >> 32), (uint32) state->ReadRecPtr);\n                        goto err;\n                                     */\n                    }\n\n                    /*\n                     * cross-check that hole_offset == 0 and hole_length == 0 if\n                     * the HAS_HOLE flag is not set.\n                     */\n                    if blk.bimg_info & pg_constants::BKPIMAGE_HAS_HOLE == 0\n                        && (blk.hole_offset != 0 || blk.hole_length != 0)\n                    {\n                        // TODO\n                        /*\n                        report_invalid_record(state,\n                                      \"pg_constants::BKPIMAGE_HAS_HOLE not set, but hole offset %u length %u at %X/%X\",\n                                      (unsigned int) blk->hole_offset,\n                                      (unsigned int) blk->hole_length,\n                                      (uint32) (state->ReadRecPtr >> 32), (uint32) state->ReadRecPtr);\n                        goto err;\n                                     */\n                    }\n\n                    /*\n                     * cross-check that bimg_len < BLCKSZ if the IS_COMPRESSED\n                     * flag is set.\n                     */\n                    if !blk_img_is_compressed && blk.bimg_len == BLCKSZ {\n                        // TODO\n                        /*\n                        report_invalid_record(state,\n                                      \"pg_constants::BKPIMAGE_IS_COMPRESSED set, but block image length %u at %X/%X\",\n                                      (unsigned int) blk->bimg_len,\n                                      (uint32) (state->ReadRecPtr >> 32), (uint32) state->ReadRecPtr);\n                        goto err;\n                                     */\n                    }\n\n                    /*\n                     * cross-check that bimg_len = BLCKSZ if neither HAS_HOLE nor\n                     * IS_COMPRESSED flag is set.\n                     */\n                    if blk.bimg_info & pg_constants::BKPIMAGE_HAS_HOLE == 0\n                        && !blk_img_is_compressed\n                        && blk.bimg_len != BLCKSZ\n                    {\n                        // TODO\n                        /*\n                        report_invalid_record(state,\n                                      \"neither pg_constants::BKPIMAGE_HAS_HOLE nor pg_constants::BKPIMAGE_IS_COMPRESSED set, but block image length is %u at %X/%X\",\n                                      (unsigned int) blk->data_len,\n                                      (uint32) (state->ReadRecPtr >> 32), (uint32) state->ReadRecPtr);\n                        goto err;\n                                     */\n                    }\n                }\n                if fork_flags & pg_constants::BKPBLOCK_SAME_REL == 0 {\n                    rnode_spcnode = buf.get_u32_le();\n                    rnode_dbnode = buf.get_u32_le();\n                    rnode_relnode = buf.get_u32_le();\n                    got_rnode = true;\n                } else if !got_rnode {\n                    // TODO\n                    /*\n                    report_invalid_record(state,\n                                    \"pg_constants::BKPBLOCK_SAME_REL set but no previous rel at %X/%X\",\n                                    (uint32) (state->ReadRecPtr >> 32), (uint32) state->ReadRecPtr);\n                    goto err;           */\n                }\n\n                blk.rnode_spcnode = rnode_spcnode;\n                blk.rnode_dbnode = rnode_dbnode;\n                blk.rnode_relnode = rnode_relnode;\n\n                blk.blkno = buf.get_u32_le();\n                tracing::trace!(\n                    \"this record affects {}/{}/{} blk {}\",\n                    rnode_spcnode,\n                    rnode_dbnode,\n                    rnode_relnode,\n                    blk.blkno\n                );\n\n                decoded.blocks.push(blk);\n            }\n\n            _ => {\n                // TODO: invalid block_id\n            }\n        }\n    }\n\n    // 3. Decode blocks.\n    let mut ptr = record.len() - buf.remaining();\n    for blk in decoded.blocks.iter_mut() {\n        if blk.has_image {\n            blk.bimg_offset = ptr as u32;\n            ptr += blk.bimg_len as usize;\n        }\n        if blk.has_data {\n            ptr += blk.data_len as usize;\n        }\n    }\n    // We don't need them, so just skip blocks_total_len bytes\n    buf.advance(blocks_total_len as usize);\n    assert_eq!(ptr, record.len() - buf.remaining());\n\n    let main_data_offset = (xlogrec.xl_tot_len - main_data_len) as usize;\n\n    // 4. Decode main_data\n    if main_data_len > 0 {\n        assert_eq!(buf.remaining(), main_data_len as usize);\n    }\n\n    decoded.xl_xid = xlogrec.xl_xid;\n    decoded.xl_info = xlogrec.xl_info;\n    decoded.xl_rmid = xlogrec.xl_rmid;\n    decoded.record = record;\n    decoded.origin_id = origin_id;\n    decoded.main_data_offset = main_data_offset;\n\n    Ok(())\n}\n\npub mod v14 {\n    use bytes::{Buf, Bytes};\n\n    use crate::{OffsetNumber, TransactionId};\n\n    #[repr(C)]\n    #[derive(Debug)]\n    pub struct XlHeapInsert {\n        pub offnum: OffsetNumber,\n        pub flags: u8,\n    }\n\n    impl XlHeapInsert {\n        pub fn decode(buf: &mut Bytes) -> XlHeapInsert {\n            XlHeapInsert {\n                offnum: buf.get_u16_le(),\n                flags: buf.get_u8(),\n            }\n        }\n    }\n\n    #[repr(C)]\n    #[derive(Debug)]\n    pub struct XlHeapMultiInsert {\n        pub flags: u8,\n        pub _padding: u8,\n        pub ntuples: u16,\n    }\n\n    impl XlHeapMultiInsert {\n        pub fn decode(buf: &mut Bytes) -> XlHeapMultiInsert {\n            XlHeapMultiInsert {\n                flags: buf.get_u8(),\n                _padding: buf.get_u8(),\n                ntuples: buf.get_u16_le(),\n            }\n        }\n    }\n\n    #[repr(C)]\n    #[derive(Debug)]\n    pub struct XlHeapDelete {\n        pub xmax: TransactionId,\n        pub offnum: OffsetNumber,\n        pub _padding: u16,\n        pub t_cid: u32,\n        pub infobits_set: u8,\n        pub flags: u8,\n    }\n\n    impl XlHeapDelete {\n        pub fn decode(buf: &mut Bytes) -> XlHeapDelete {\n            XlHeapDelete {\n                xmax: buf.get_u32_le(),\n                offnum: buf.get_u16_le(),\n                _padding: buf.get_u16_le(),\n                t_cid: buf.get_u32_le(),\n                infobits_set: buf.get_u8(),\n                flags: buf.get_u8(),\n            }\n        }\n    }\n\n    #[repr(C)]\n    #[derive(Debug)]\n    pub struct XlHeapUpdate {\n        pub old_xmax: TransactionId,\n        pub old_offnum: OffsetNumber,\n        pub old_infobits_set: u8,\n        pub flags: u8,\n        pub t_cid: u32,\n        pub new_xmax: TransactionId,\n        pub new_offnum: OffsetNumber,\n    }\n\n    impl XlHeapUpdate {\n        pub fn decode(buf: &mut Bytes) -> XlHeapUpdate {\n            XlHeapUpdate {\n                old_xmax: buf.get_u32_le(),\n                old_offnum: buf.get_u16_le(),\n                old_infobits_set: buf.get_u8(),\n                flags: buf.get_u8(),\n                t_cid: buf.get_u32_le(),\n                new_xmax: buf.get_u32_le(),\n                new_offnum: buf.get_u16_le(),\n            }\n        }\n    }\n\n    #[repr(C)]\n    #[derive(Debug)]\n    pub struct XlHeapLock {\n        pub locking_xid: TransactionId,\n        pub offnum: OffsetNumber,\n        pub _padding: u16,\n        pub t_cid: u32,\n        pub infobits_set: u8,\n        pub flags: u8,\n    }\n\n    impl XlHeapLock {\n        pub fn decode(buf: &mut Bytes) -> XlHeapLock {\n            XlHeapLock {\n                locking_xid: buf.get_u32_le(),\n                offnum: buf.get_u16_le(),\n                _padding: buf.get_u16_le(),\n                t_cid: buf.get_u32_le(),\n                infobits_set: buf.get_u8(),\n                flags: buf.get_u8(),\n            }\n        }\n    }\n\n    #[repr(C)]\n    #[derive(Debug)]\n    pub struct XlHeapLockUpdated {\n        pub xmax: TransactionId,\n        pub offnum: OffsetNumber,\n        pub infobits_set: u8,\n        pub flags: u8,\n    }\n\n    impl XlHeapLockUpdated {\n        pub fn decode(buf: &mut Bytes) -> XlHeapLockUpdated {\n            XlHeapLockUpdated {\n                xmax: buf.get_u32_le(),\n                offnum: buf.get_u16_le(),\n                infobits_set: buf.get_u8(),\n                flags: buf.get_u8(),\n            }\n        }\n    }\n\n    #[repr(C)]\n    #[derive(Debug)]\n    pub struct XlParameterChange {\n        pub max_connections: i32,\n        pub max_worker_processes: i32,\n        pub max_wal_senders: i32,\n        pub max_prepared_xacts: i32,\n        pub max_locks_per_xact: i32,\n        pub wal_level: i32,\n        pub wal_log_hints: bool,\n        pub track_commit_timestamp: bool,\n        pub _padding: [u8; 2],\n    }\n\n    impl XlParameterChange {\n        pub fn decode(buf: &mut Bytes) -> XlParameterChange {\n            XlParameterChange {\n                max_connections: buf.get_i32_le(),\n                max_worker_processes: buf.get_i32_le(),\n                max_wal_senders: buf.get_i32_le(),\n                max_prepared_xacts: buf.get_i32_le(),\n                max_locks_per_xact: buf.get_i32_le(),\n                wal_level: buf.get_i32_le(),\n                wal_log_hints: buf.get_u8() != 0,\n                track_commit_timestamp: buf.get_u8() != 0,\n                _padding: [buf.get_u8(), buf.get_u8()],\n            }\n        }\n    }\n}\n\npub mod v15 {\n    pub use super::v14::{\n        XlHeapDelete, XlHeapInsert, XlHeapLock, XlHeapLockUpdated, XlHeapMultiInsert, XlHeapUpdate,\n        XlParameterChange,\n    };\n}\n\npub mod v16 {\n    use bytes::{Buf, Bytes};\n\n    pub use super::v14::{XlHeapInsert, XlHeapLockUpdated, XlHeapMultiInsert, XlParameterChange};\n    use crate::{OffsetNumber, TransactionId};\n\n    pub struct XlHeapDelete {\n        pub xmax: TransactionId,\n        pub offnum: OffsetNumber,\n        pub infobits_set: u8,\n        pub flags: u8,\n    }\n\n    impl XlHeapDelete {\n        pub fn decode(buf: &mut Bytes) -> XlHeapDelete {\n            XlHeapDelete {\n                xmax: buf.get_u32_le(),\n                offnum: buf.get_u16_le(),\n                infobits_set: buf.get_u8(),\n                flags: buf.get_u8(),\n            }\n        }\n    }\n\n    #[repr(C)]\n    #[derive(Debug)]\n    pub struct XlHeapUpdate {\n        pub old_xmax: TransactionId,\n        pub old_offnum: OffsetNumber,\n        pub old_infobits_set: u8,\n        pub flags: u8,\n        pub new_xmax: TransactionId,\n        pub new_offnum: OffsetNumber,\n    }\n\n    impl XlHeapUpdate {\n        pub fn decode(buf: &mut Bytes) -> XlHeapUpdate {\n            XlHeapUpdate {\n                old_xmax: buf.get_u32_le(),\n                old_offnum: buf.get_u16_le(),\n                old_infobits_set: buf.get_u8(),\n                flags: buf.get_u8(),\n                new_xmax: buf.get_u32_le(),\n                new_offnum: buf.get_u16_le(),\n            }\n        }\n    }\n\n    #[repr(C)]\n    #[derive(Debug)]\n    pub struct XlHeapLock {\n        pub locking_xid: TransactionId,\n        pub offnum: OffsetNumber,\n        pub infobits_set: u8,\n        pub flags: u8,\n    }\n\n    impl XlHeapLock {\n        pub fn decode(buf: &mut Bytes) -> XlHeapLock {\n            XlHeapLock {\n                locking_xid: buf.get_u32_le(),\n                offnum: buf.get_u16_le(),\n                infobits_set: buf.get_u8(),\n                flags: buf.get_u8(),\n            }\n        }\n    }\n\n    /* Since PG16, we have the Neon RMGR (RM_NEON_ID) to manage Neon-flavored WAL. */\n    pub mod rm_neon {\n        use bytes::{Buf, Bytes};\n\n        use crate::{OffsetNumber, TransactionId};\n\n        #[repr(C)]\n        #[derive(Debug)]\n        pub struct XlNeonHeapInsert {\n            pub offnum: OffsetNumber,\n            pub flags: u8,\n        }\n\n        impl XlNeonHeapInsert {\n            pub fn decode(buf: &mut Bytes) -> XlNeonHeapInsert {\n                XlNeonHeapInsert {\n                    offnum: buf.get_u16_le(),\n                    flags: buf.get_u8(),\n                }\n            }\n        }\n\n        #[repr(C)]\n        #[derive(Debug)]\n        pub struct XlNeonHeapMultiInsert {\n            pub flags: u8,\n            pub _padding: u8,\n            pub ntuples: u16,\n            pub t_cid: u32,\n        }\n\n        impl XlNeonHeapMultiInsert {\n            pub fn decode(buf: &mut Bytes) -> XlNeonHeapMultiInsert {\n                XlNeonHeapMultiInsert {\n                    flags: buf.get_u8(),\n                    _padding: buf.get_u8(),\n                    ntuples: buf.get_u16_le(),\n                    t_cid: buf.get_u32_le(),\n                }\n            }\n        }\n\n        #[repr(C)]\n        #[derive(Debug)]\n        pub struct XlNeonHeapDelete {\n            pub xmax: TransactionId,\n            pub offnum: OffsetNumber,\n            pub infobits_set: u8,\n            pub flags: u8,\n            pub t_cid: u32,\n        }\n\n        impl XlNeonHeapDelete {\n            pub fn decode(buf: &mut Bytes) -> XlNeonHeapDelete {\n                XlNeonHeapDelete {\n                    xmax: buf.get_u32_le(),\n                    offnum: buf.get_u16_le(),\n                    infobits_set: buf.get_u8(),\n                    flags: buf.get_u8(),\n                    t_cid: buf.get_u32_le(),\n                }\n            }\n        }\n\n        #[repr(C)]\n        #[derive(Debug)]\n        pub struct XlNeonHeapUpdate {\n            pub old_xmax: TransactionId,\n            pub old_offnum: OffsetNumber,\n            pub old_infobits_set: u8,\n            pub flags: u8,\n            pub t_cid: u32,\n            pub new_xmax: TransactionId,\n            pub new_offnum: OffsetNumber,\n        }\n\n        impl XlNeonHeapUpdate {\n            pub fn decode(buf: &mut Bytes) -> XlNeonHeapUpdate {\n                XlNeonHeapUpdate {\n                    old_xmax: buf.get_u32_le(),\n                    old_offnum: buf.get_u16_le(),\n                    old_infobits_set: buf.get_u8(),\n                    flags: buf.get_u8(),\n                    t_cid: buf.get_u32(),\n                    new_xmax: buf.get_u32_le(),\n                    new_offnum: buf.get_u16_le(),\n                }\n            }\n        }\n\n        #[repr(C)]\n        #[derive(Debug)]\n        pub struct XlNeonHeapLock {\n            pub locking_xid: TransactionId,\n            pub t_cid: u32,\n            pub offnum: OffsetNumber,\n            pub infobits_set: u8,\n            pub flags: u8,\n        }\n\n        impl XlNeonHeapLock {\n            pub fn decode(buf: &mut Bytes) -> XlNeonHeapLock {\n                XlNeonHeapLock {\n                    locking_xid: buf.get_u32_le(),\n                    t_cid: buf.get_u32_le(),\n                    offnum: buf.get_u16_le(),\n                    infobits_set: buf.get_u8(),\n                    flags: buf.get_u8(),\n                }\n            }\n        }\n    }\n}\n\npub mod v17 {\n    use bytes::{Buf, Bytes};\n\n    pub use super::v14::XlHeapLockUpdated;\n    pub use super::v16::{\n        XlHeapDelete, XlHeapInsert, XlHeapLock, XlHeapMultiInsert, XlHeapUpdate, XlParameterChange,\n        rm_neon,\n    };\n    pub use crate::TimeLineID;\n    pub use postgres_ffi_types::TimestampTz;\n\n    #[repr(C)]\n    #[derive(Debug)]\n    pub struct XlEndOfRecovery {\n        pub end_time: TimestampTz,\n        pub this_time_line_id: TimeLineID,\n        pub prev_time_line_id: TimeLineID,\n        pub wal_level: i32,\n    }\n\n    impl XlEndOfRecovery {\n        pub fn decode(buf: &mut Bytes) -> XlEndOfRecovery {\n            XlEndOfRecovery {\n                end_time: buf.get_i64_le(),\n                this_time_line_id: buf.get_u32_le(),\n                prev_time_line_id: buf.get_u32_le(),\n                wal_level: buf.get_i32_le(),\n            }\n        }\n    }\n}\n\n#[repr(C)]\n#[derive(Debug)]\npub struct XlSmgrCreate {\n    pub rnode: RelFileNode,\n    // FIXME: This is ForkNumber in storage_xlog.h. That's an enum. Does it have\n    // well-defined size?\n    pub forknum: u8,\n}\n\nimpl XlSmgrCreate {\n    pub fn decode(buf: &mut Bytes) -> XlSmgrCreate {\n        XlSmgrCreate {\n            rnode: RelFileNode {\n                spcnode: buf.get_u32_le(), /* tablespace */\n                dbnode: buf.get_u32_le(),  /* database */\n                relnode: buf.get_u32_le(), /* relation */\n            },\n            forknum: buf.get_u32_le() as u8,\n        }\n    }\n}\n\n#[repr(C)]\n#[derive(Clone, Debug, Serialize, Deserialize)]\npub struct XlSmgrTruncate {\n    pub blkno: BlockNumber,\n    pub rnode: RelFileNode,\n    pub flags: u32,\n}\n\nimpl XlSmgrTruncate {\n    pub fn decode(buf: &mut Bytes) -> XlSmgrTruncate {\n        XlSmgrTruncate {\n            blkno: buf.get_u32_le(),\n            rnode: RelFileNode {\n                spcnode: buf.get_u32_le(), /* tablespace */\n                dbnode: buf.get_u32_le(),  /* database */\n                relnode: buf.get_u32_le(), /* relation */\n            },\n            flags: buf.get_u32_le(),\n        }\n    }\n}\n\n#[repr(C)]\n#[derive(Debug)]\npub struct XlCreateDatabase {\n    pub db_id: Oid,\n    pub tablespace_id: Oid,\n    pub src_db_id: Oid,\n    pub src_tablespace_id: Oid,\n}\n\nimpl XlCreateDatabase {\n    pub fn decode(buf: &mut Bytes) -> XlCreateDatabase {\n        XlCreateDatabase {\n            db_id: buf.get_u32_le(),\n            tablespace_id: buf.get_u32_le(),\n            src_db_id: buf.get_u32_le(),\n            src_tablespace_id: buf.get_u32_le(),\n        }\n    }\n}\n\n#[repr(C)]\n#[derive(Debug)]\npub struct XlDropDatabase {\n    pub db_id: Oid,\n    pub n_tablespaces: Oid, /* number of tablespace IDs */\n    pub tablespace_ids: Vec<Oid>,\n}\n\nimpl XlDropDatabase {\n    pub fn decode(buf: &mut Bytes) -> XlDropDatabase {\n        let mut rec = XlDropDatabase {\n            db_id: buf.get_u32_le(),\n            n_tablespaces: buf.get_u32_le(),\n            tablespace_ids: Vec::<Oid>::new(),\n        };\n\n        for _i in 0..rec.n_tablespaces {\n            let id = buf.get_u32_le();\n            rec.tablespace_ids.push(id);\n        }\n\n        rec\n    }\n}\n\n///\n/// Note: Parsing some fields is missing, because they're not needed.\n///\n/// This is similar to the xl_xact_parsed_commit and\n/// xl_xact_parsed_abort structs in PostgreSQL, but we use the same\n/// struct for commits and aborts.\n///\n#[derive(Clone, Debug, Serialize, Deserialize)]\npub struct XlXactParsedRecord {\n    pub xid: TransactionId,\n    pub info: u8,\n    pub xact_time: TimestampTz,\n    pub xinfo: u32,\n\n    pub db_id: Oid,\n    /* MyDatabaseId */\n    pub ts_id: Oid,\n    /* MyDatabaseTableSpace */\n    pub subxacts: Vec<TransactionId>,\n\n    pub xnodes: Vec<RelFileNode>,\n    pub origin_lsn: Lsn,\n}\n\nimpl XlXactParsedRecord {\n    /// Decode a XLOG_XACT_COMMIT/ABORT/COMMIT_PREPARED/ABORT_PREPARED\n    /// record. This should agree with the ParseCommitRecord and ParseAbortRecord\n    /// functions in PostgreSQL (in src/backend/access/rmgr/xactdesc.c)\n    pub fn decode(buf: &mut Bytes, mut xid: TransactionId, xl_info: u8) -> XlXactParsedRecord {\n        let info = xl_info & pg_constants::XLOG_XACT_OPMASK;\n        // The record starts with time of commit/abort\n        let xact_time = buf.get_i64_le();\n        let xinfo = if xl_info & pg_constants::XLOG_XACT_HAS_INFO != 0 {\n            buf.get_u32_le()\n        } else {\n            0\n        };\n        let db_id;\n        let ts_id;\n        if xinfo & pg_constants::XACT_XINFO_HAS_DBINFO != 0 {\n            db_id = buf.get_u32_le();\n            ts_id = buf.get_u32_le();\n        } else {\n            db_id = 0;\n            ts_id = 0;\n        }\n        let mut subxacts = Vec::<TransactionId>::new();\n        if xinfo & pg_constants::XACT_XINFO_HAS_SUBXACTS != 0 {\n            let nsubxacts = buf.get_i32_le();\n            for _i in 0..nsubxacts {\n                let subxact = buf.get_u32_le();\n                subxacts.push(subxact);\n            }\n        }\n        let mut xnodes = Vec::<RelFileNode>::new();\n        if xinfo & pg_constants::XACT_XINFO_HAS_RELFILENODES != 0 {\n            let nrels = buf.get_i32_le();\n            for _i in 0..nrels {\n                let spcnode = buf.get_u32_le();\n                let dbnode = buf.get_u32_le();\n                let relnode = buf.get_u32_le();\n                tracing::trace!(\n                    \"XLOG_XACT_COMMIT relfilenode {}/{}/{}\",\n                    spcnode,\n                    dbnode,\n                    relnode\n                );\n                xnodes.push(RelFileNode {\n                    spcnode,\n                    dbnode,\n                    relnode,\n                });\n            }\n        }\n\n        if xinfo & crate::v15::bindings::XACT_XINFO_HAS_DROPPED_STATS != 0 {\n            let nitems = buf.get_i32_le();\n            tracing::debug!(\n                \"XLOG_XACT_COMMIT-XACT_XINFO_HAS_DROPPED_STAT nitems {}\",\n                nitems\n            );\n            let sizeof_xl_xact_stats_item = 12;\n            buf.advance((nitems * sizeof_xl_xact_stats_item).try_into().unwrap());\n        }\n\n        if xinfo & pg_constants::XACT_XINFO_HAS_INVALS != 0 {\n            let nmsgs = buf.get_i32_le();\n            let sizeof_shared_invalidation_message = 16;\n            buf.advance(\n                (nmsgs * sizeof_shared_invalidation_message)\n                    .try_into()\n                    .unwrap(),\n            );\n        }\n\n        if xinfo & pg_constants::XACT_XINFO_HAS_TWOPHASE != 0 {\n            xid = buf.get_u32_le();\n            tracing::debug!(\"XLOG_XACT_COMMIT-XACT_XINFO_HAS_TWOPHASE xid {}\", xid);\n        }\n\n        let origin_lsn = if xinfo & pg_constants::XACT_XINFO_HAS_ORIGIN != 0 {\n            Lsn(buf.get_u64_le())\n        } else {\n            Lsn::INVALID\n        };\n        XlXactParsedRecord {\n            xid,\n            info,\n            xact_time,\n            xinfo,\n            db_id,\n            ts_id,\n            subxacts,\n            xnodes,\n            origin_lsn,\n        }\n    }\n}\n\n#[repr(C)]\n#[derive(Debug)]\npub struct XlClogTruncate {\n    pub pageno: u32,\n    pub oldest_xid: TransactionId,\n    pub oldest_xid_db: Oid,\n}\n\nimpl XlClogTruncate {\n    pub fn decode(buf: &mut Bytes, pg_version: PgMajorVersion) -> XlClogTruncate {\n        XlClogTruncate {\n            pageno: if pg_version < PgMajorVersion::PG17 {\n                buf.get_u32_le()\n            } else {\n                buf.get_u64_le() as u32\n            },\n            oldest_xid: buf.get_u32_le(),\n            oldest_xid_db: buf.get_u32_le(),\n        }\n    }\n}\n\n#[repr(C)]\n#[derive(Debug)]\npub struct XlLogicalMessage {\n    pub db_id: Oid,\n    pub transactional: bool,\n    pub prefix_size: usize,\n    pub message_size: usize,\n}\n\nimpl XlLogicalMessage {\n    pub fn decode(buf: &mut Bytes) -> XlLogicalMessage {\n        XlLogicalMessage {\n            db_id: buf.get_u32_le(),\n            transactional: buf.get_u32_le() != 0, // 4-bytes alignment\n            prefix_size: buf.get_u64_le() as usize,\n            message_size: buf.get_u64_le() as usize,\n        }\n    }\n}\n\n#[repr(C)]\n#[derive(Debug)]\npub struct XlRunningXacts {\n    pub xcnt: u32,\n    pub subxcnt: u32,\n    pub subxid_overflow: bool,\n    pub next_xid: TransactionId,\n    pub oldest_running_xid: TransactionId,\n    pub latest_completed_xid: TransactionId,\n    pub xids: Vec<TransactionId>,\n}\n\nimpl XlRunningXacts {\n    pub fn decode(buf: &mut Bytes) -> XlRunningXacts {\n        let xcnt = buf.get_u32_le();\n        let subxcnt = buf.get_u32_le();\n        let subxid_overflow = buf.get_u32_le() != 0;\n        let next_xid = buf.get_u32_le();\n        let oldest_running_xid = buf.get_u32_le();\n        let latest_completed_xid = buf.get_u32_le();\n        let mut xids = Vec::new();\n        for _ in 0..(xcnt + subxcnt) {\n            xids.push(buf.get_u32_le());\n        }\n        XlRunningXacts {\n            xcnt,\n            subxcnt,\n            subxid_overflow,\n            next_xid,\n            oldest_running_xid,\n            latest_completed_xid,\n            xids,\n        }\n    }\n}\n\npub fn describe_postgres_wal_record(record: &Bytes) -> Result<String, DeserializeError> {\n    // TODO: It would be nice to use the PostgreSQL rmgrdesc infrastructure for this.\n    // Maybe use the postgres wal redo process, the same used for replaying WAL records?\n    // Or could we compile the rmgrdesc routines into the dump_layer_file() binary directly,\n    // without worrying about security?\n    //\n    // But for now, we have a hand-written code for a few common WAL record types here.\n\n    let mut buf = record.clone();\n\n    // 1. Parse XLogRecord struct\n\n    // FIXME: assume little-endian here\n    let xlogrec = XLogRecord::from_bytes(&mut buf)?;\n\n    let unknown_str: String;\n\n    let result: &str = match xlogrec.xl_rmid {\n        pg_constants::RM_HEAP2_ID => {\n            let info = xlogrec.xl_info & pg_constants::XLOG_HEAP_OPMASK;\n            match info {\n                pg_constants::XLOG_HEAP2_MULTI_INSERT => \"HEAP2 MULTI_INSERT\",\n                pg_constants::XLOG_HEAP2_VISIBLE => \"HEAP2 VISIBLE\",\n                _ => {\n                    unknown_str = format!(\"HEAP2 UNKNOWN_0x{info:02x}\");\n                    &unknown_str\n                }\n            }\n        }\n        pg_constants::RM_HEAP_ID => {\n            let info = xlogrec.xl_info & pg_constants::XLOG_HEAP_OPMASK;\n            match info {\n                pg_constants::XLOG_HEAP_INSERT => \"HEAP INSERT\",\n                pg_constants::XLOG_HEAP_DELETE => \"HEAP DELETE\",\n                pg_constants::XLOG_HEAP_UPDATE => \"HEAP UPDATE\",\n                pg_constants::XLOG_HEAP_HOT_UPDATE => \"HEAP HOT_UPDATE\",\n                _ => {\n                    unknown_str = format!(\"HEAP2 UNKNOWN_0x{info:02x}\");\n                    &unknown_str\n                }\n            }\n        }\n        pg_constants::RM_XLOG_ID => {\n            let info = xlogrec.xl_info & pg_constants::XLR_RMGR_INFO_MASK;\n            match info {\n                pg_constants::XLOG_FPI => \"XLOG FPI\",\n                pg_constants::XLOG_FPI_FOR_HINT => \"XLOG FPI_FOR_HINT\",\n                _ => {\n                    unknown_str = format!(\"XLOG UNKNOWN_0x{info:02x}\");\n                    &unknown_str\n                }\n            }\n        }\n        rmid => {\n            let info = xlogrec.xl_info & pg_constants::XLR_RMGR_INFO_MASK;\n\n            unknown_str = format!(\"UNKNOWN_RM_{rmid} INFO_0x{info:02x}\");\n            &unknown_str\n        }\n    };\n\n    Ok(String::from(result))\n}\n"
  },
  {
    "path": "libs/postgres_ffi/src/xlog_utils.rs",
    "content": "//\n// This file contains common utilities for dealing with PostgreSQL WAL files and\n// LSNs.\n//\n// Many of these functions have been copied from PostgreSQL, and rewritten in\n// Rust. That's why they don't follow the usual Rust naming conventions, they\n// have been named the same as the corresponding PostgreSQL functions instead.\n//\n\nuse super::super::waldecoder::WalStreamDecoder;\nuse super::bindings::{\n    CheckPoint, ControlFileData, DBState_DB_SHUTDOWNED, FullTransactionId, TimeLineID,\n    XLogLongPageHeaderData, XLogPageHeaderData, XLogRecPtr, XLogRecord, XLogSegNo, XLOG_PAGE_MAGIC,\n    MY_PGVERSION\n};\nuse postgres_ffi_types::TimestampTz;\nuse super::wal_generator::LogicalMessageGenerator;\nuse crate::pg_constants;\nuse crate::PG_TLI;\nuse crate::{uint32, uint64, Oid};\nuse crate::{WAL_SEGMENT_SIZE, XLOG_BLCKSZ};\n\nuse bytes::BytesMut;\nuse bytes::{Buf, Bytes};\n\nuse serde::Serialize;\nuse std::ffi::{CString, OsStr};\nuse std::fs::File;\nuse std::io::prelude::*;\nuse std::io::ErrorKind;\nuse std::io::SeekFrom;\nuse std::path::Path;\nuse std::time::SystemTime;\nuse utils::bin_ser::DeserializeError;\nuse utils::bin_ser::SerializeError;\n\nuse utils::lsn::Lsn;\n\npub const XLOG_FNAME_LEN: usize = 24;\npub const XLP_BKP_REMOVABLE: u16 = 0x0004;\npub const XLP_FIRST_IS_CONTRECORD: u16 = 0x0001;\npub const XLP_REM_LEN_OFFS: usize = 2 + 2 + 4 + 8;\npub const XLOG_RECORD_CRC_OFFS: usize = 4 + 4 + 8 + 1 + 1 + 2;\n\npub const XLOG_SIZE_OF_XLOG_SHORT_PHD: usize = size_of::<XLogPageHeaderData>();\npub const XLOG_SIZE_OF_XLOG_LONG_PHD: usize = size_of::<XLogLongPageHeaderData>();\npub const XLOG_SIZE_OF_XLOG_RECORD: usize = size_of::<XLogRecord>();\n#[allow(clippy::identity_op)]\npub const SIZE_OF_XLOG_RECORD_DATA_HEADER_SHORT: usize = 1 * 2;\n\n/// Interval of checkpointing metadata file. We should store metadata file to enforce\n/// predicate that checkpoint.nextXid is larger than any XID in WAL.\n/// But flushing checkpoint file for each transaction seems to be too expensive,\n/// so XID_CHECKPOINT_INTERVAL is used to forward align nextXid and so perform\n/// metadata checkpoint only once per XID_CHECKPOINT_INTERVAL transactions.\n/// XID_CHECKPOINT_INTERVAL should not be larger than BLCKSZ*CLOG_XACTS_PER_BYTE\n/// in order to let CLOG_TRUNCATE mechanism correctly extend CLOG.\nconst XID_CHECKPOINT_INTERVAL: u32 = 1024;\n\npub fn XLogSegmentsPerXLogId(wal_segsz_bytes: usize) -> XLogSegNo {\n    (0x100000000u64 / wal_segsz_bytes as u64) as XLogSegNo\n}\n\npub fn XLogSegNoOffsetToRecPtr(\n    segno: XLogSegNo,\n    offset: u32,\n    wal_segsz_bytes: usize,\n) -> XLogRecPtr {\n    segno * (wal_segsz_bytes as u64) + (offset as u64)\n}\n\npub fn XLogFileName(tli: TimeLineID, logSegNo: XLogSegNo, wal_segsz_bytes: usize) -> String {\n    format!(\n        \"{:>08X}{:>08X}{:>08X}\",\n        tli,\n        logSegNo / XLogSegmentsPerXLogId(wal_segsz_bytes),\n        logSegNo % XLogSegmentsPerXLogId(wal_segsz_bytes)\n    )\n}\n\npub fn XLogFromFileName(\n    fname: &OsStr,\n    wal_seg_size: usize,\n) -> anyhow::Result<(XLogSegNo, TimeLineID)> {\n    if let Some(fname_str) = fname.to_str() {\n        let tli = u32::from_str_radix(&fname_str[0..8], 16)?;\n        let log = u32::from_str_radix(&fname_str[8..16], 16)? as XLogSegNo;\n        let seg = u32::from_str_radix(&fname_str[16..24], 16)? as XLogSegNo;\n        Ok((log * XLogSegmentsPerXLogId(wal_seg_size) + seg, tli))\n    } else {\n        anyhow::bail!(\"non-ut8 filename: {:?}\", fname);\n    }\n}\n\npub fn IsXLogFileName(fname: &OsStr) -> bool {\n    if let Some(fname) = fname.to_str() {\n        fname.len() == XLOG_FNAME_LEN && fname.chars().all(|c| c.is_ascii_hexdigit())\n    } else {\n        false\n    }\n}\n\npub fn IsPartialXLogFileName(fname: &OsStr) -> bool {\n    if let Some(fname) = fname.to_str() {\n        fname.ends_with(\".partial\") && IsXLogFileName(OsStr::new(&fname[0..fname.len() - 8]))\n    } else {\n        false\n    }\n}\n\n/// If LSN points to the beginning of the page, then shift it to first record,\n/// otherwise align on 8-bytes boundary (required for WAL records)\npub fn normalize_lsn(lsn: Lsn, seg_sz: usize) -> Lsn {\n    if lsn.0 % XLOG_BLCKSZ as u64 == 0 {\n        let hdr_size = if lsn.0 % seg_sz as u64 == 0 {\n            XLOG_SIZE_OF_XLOG_LONG_PHD\n        } else {\n            XLOG_SIZE_OF_XLOG_SHORT_PHD\n        };\n        lsn + hdr_size as u64\n    } else {\n        lsn.align()\n    }\n}\n\n/// Generate a pg_control file, for a basebackup for starting up Postgres at the given LSN\n///\n/// 'pg_control_bytes' and 'checkpoint_bytes' are the contents of those keys persisted in\n/// the pageserver. They use the same format as the PostgreSQL control file and the\n/// checkpoint record, but see walingest.rs for how exactly they are kept up to date.\n/// 'lsn' is the LSN at which we're starting up.\n///\n/// Returns:\n/// - pg_control file contents\n/// - system_identifier, extracted from the persisted information\n/// - true, if we're starting up from a \"clean shutdown\", i.e. if there was a shutdown\n///   checkpoint at the given LSN\npub fn generate_pg_control(\n    pg_control_bytes: &[u8],\n    checkpoint_bytes: &[u8],\n    lsn: Lsn,\n) -> anyhow::Result<(Bytes, u64, bool)> {\n    let mut pg_control = ControlFileData::decode(pg_control_bytes)?;\n    let mut checkpoint = CheckPoint::decode(checkpoint_bytes)?;\n\n    // Generate new pg_control needed for bootstrap\n    //\n    // NB: In the checkpoint struct that we persist in the pageserver, we have a different\n    // convention for the 'redo' field than in PostgreSQL: On a shutdown checkpoint,\n    // 'redo' points the *end* of the checkpoint WAL record. On PostgreSQL, it points to\n    // the beginning. Furthermore, on an online checkpoint, 'redo' is set to 0.\n    //\n    // We didn't always have this convention however, and old persisted records will have\n    // old REDO values that point to some old LSN.\n    //\n    // The upshot is that if 'redo' is equal to the \"current\" LSN, there was a shutdown\n    // checkpoint record at that point in WAL, with no new WAL records after it. That case\n    // can be treated as starting from a clean shutdown. All other cases are treated as\n    // non-clean shutdown. In Neon, we don't do WAL replay at startup in either case, so\n    // that distinction doesn't matter very much. As of this writing, it only affects\n    // whether the persisted pg_stats information can be used or not.\n    //\n    // In the Checkpoint struct in the returned pg_control file, the redo pointer is\n    // always set to the LSN we're starting at, to hint that no WAL replay is required.\n    // (There's some neon-specific code in Postgres startup to make that work, though.\n    // Just setting the redo pointer is not sufficient.)\n    let was_shutdown = Lsn(checkpoint.redo) == lsn;\n    checkpoint.redo = normalize_lsn(lsn, WAL_SEGMENT_SIZE).0;\n\n    // We use DBState_DB_SHUTDOWNED even if it was not a clean shutdown.  The\n    // neon-specific code at postgres startup ignores the state stored in the control\n    // file, similar to archive recovery in standalone PostgreSQL. Similarly, the\n    // checkPoint pointer is ignored, so just set it to 0.\n    pg_control.checkPoint = 0;\n    pg_control.checkPointCopy = checkpoint;\n    pg_control.state = DBState_DB_SHUTDOWNED;\n\n    Ok((pg_control.encode(), pg_control.system_identifier, was_shutdown))\n}\n\npub fn get_current_timestamp() -> TimestampTz {\n    to_pg_timestamp(SystemTime::now())\n}\n\n// Module to reduce the scope of the constants\nmod timestamp_conversions {\n    use std::time::Duration;\n\n    use anyhow::Context;\n\n    use super::*;\n\n    const UNIX_EPOCH_JDATE: u64 = 2440588; // == date2j(1970, 1, 1)\n    const POSTGRES_EPOCH_JDATE: u64 = 2451545; // == date2j(2000, 1, 1)\n    const SECS_PER_DAY: u64 = 86400;\n    const USECS_PER_SEC: u64 = 1000000;\n    const SECS_DIFF_UNIX_TO_POSTGRES_EPOCH: u64 =\n        (POSTGRES_EPOCH_JDATE - UNIX_EPOCH_JDATE) * SECS_PER_DAY;\n\n    pub fn to_pg_timestamp(time: SystemTime) -> TimestampTz {\n        match time.duration_since(SystemTime::UNIX_EPOCH) {\n            Ok(n) => {\n                ((n.as_secs() - SECS_DIFF_UNIX_TO_POSTGRES_EPOCH) * USECS_PER_SEC\n                    + n.subsec_micros() as u64) as i64\n            }\n            Err(_) => panic!(\"SystemTime before UNIX EPOCH!\"),\n        }\n    }\n\n    pub fn try_from_pg_timestamp(time: TimestampTz) -> anyhow::Result<SystemTime> {\n        let time: u64 = time\n            .try_into()\n            .context(\"timestamp before millenium (postgres epoch)\")?;\n        let since_unix_epoch = time + SECS_DIFF_UNIX_TO_POSTGRES_EPOCH * USECS_PER_SEC;\n        SystemTime::UNIX_EPOCH\n            .checked_add(Duration::from_micros(since_unix_epoch))\n            .context(\"SystemTime overflow\")\n    }\n}\n\npub use timestamp_conversions::{to_pg_timestamp, try_from_pg_timestamp};\n\n// Returns (aligned) end_lsn of the last record in data_dir with WAL segments.\n// start_lsn must point to some previously known record boundary (beginning of\n// the next record). If no valid record after is found, start_lsn is returned\n// back.\npub fn find_end_of_wal(\n    data_dir: &Path,\n    wal_seg_size: usize,\n    start_lsn: Lsn, // start reading WAL at this point; must point at record start_lsn.\n) -> anyhow::Result<Lsn> {\n    let mut result = start_lsn;\n    let mut curr_lsn = start_lsn;\n    let mut buf = [0u8; XLOG_BLCKSZ];\n    let pg_version = MY_PGVERSION;\n    tracing::debug!(\"find_end_of_wal PG_VERSION: {}\", pg_version);\n\n    let mut decoder = WalStreamDecoder::new(start_lsn, pg_version);\n\n    // loop over segments\n    loop {\n        let segno = curr_lsn.segment_number(wal_seg_size);\n        let seg_file_name = XLogFileName(PG_TLI, segno, wal_seg_size);\n        let seg_file_path = data_dir.join(seg_file_name);\n        match open_wal_segment(&seg_file_path)? {\n            None => {\n                // no more segments\n                tracing::debug!(\n                    \"find_end_of_wal reached end at {:?}, segment {:?} doesn't exist\",\n                    result, seg_file_path\n                );\n                return Ok(result);\n            }\n            Some(mut segment) => {\n                let seg_offs = curr_lsn.segment_offset(wal_seg_size);\n                segment.seek(SeekFrom::Start(seg_offs as u64))?;\n                // loop inside segment\n                while curr_lsn.segment_number(wal_seg_size) == segno {\n                    let bytes_read = segment.read(&mut buf)?;\n                    if bytes_read == 0 {\n                        tracing::debug!(\n                            \"find_end_of_wal reached end at {:?}, EOF in segment {:?} at offset {}\",\n                            result,\n                            seg_file_path,\n                            curr_lsn.segment_offset(wal_seg_size)\n                        );\n                        return Ok(result);\n                    }\n                    curr_lsn += bytes_read as u64;\n                    decoder.feed_bytes(&buf[0..bytes_read]);\n\n                    // advance result past all completely read records\n                    loop {\n                        match decoder.poll_decode() {\n                            Ok(Some(record)) => result = record.0,\n                            Err(e) => {\n                                tracing::debug!(\n                                    \"find_end_of_wal reached end at {:?}, decode error: {:?}\",\n                                    result, e\n                                );\n                                return Ok(result);\n                            }\n                            Ok(None) => break, // need more data\n                        }\n                    }\n                }\n            }\n        }\n    }\n}\n\n// Open .partial or full WAL segment file, if present.\nfn open_wal_segment(seg_file_path: &Path) -> anyhow::Result<Option<File>> {\n    let mut partial_path = seg_file_path.to_owned();\n    partial_path.set_extension(\"partial\");\n    match File::open(partial_path) {\n        Ok(file) => Ok(Some(file)),\n        Err(e) => match e.kind() {\n            ErrorKind::NotFound => {\n                // .partial not found, try full\n                match File::open(seg_file_path) {\n                    Ok(file) => Ok(Some(file)),\n                    Err(e) => match e.kind() {\n                        ErrorKind::NotFound => Ok(None),\n                        _ => Err(e.into()),\n                    },\n                }\n            }\n            _ => Err(e.into()),\n        },\n    }\n}\n\nimpl XLogRecord {\n    pub fn from_slice(buf: &[u8]) -> Result<XLogRecord, DeserializeError> {\n        use utils::bin_ser::LeSer;\n        XLogRecord::des(buf)\n    }\n\n    pub fn from_bytes<B: Buf>(buf: &mut B) -> Result<XLogRecord, DeserializeError> {\n        use utils::bin_ser::LeSer;\n        XLogRecord::des_from(&mut buf.reader())\n    }\n\n    pub fn encode(&self) -> Result<Bytes, SerializeError> {\n        use utils::bin_ser::LeSer;\n        Ok(self.ser()?.into())\n    }\n\n    // Is this record an XLOG_SWITCH record? They need some special processing,\n    pub fn is_xlog_switch_record(&self) -> bool {\n        self.xl_info == pg_constants::XLOG_SWITCH && self.xl_rmid == pg_constants::RM_XLOG_ID\n    }\n}\n\nimpl XLogPageHeaderData {\n    pub fn from_bytes<B: Buf>(buf: &mut B) -> Result<XLogPageHeaderData, DeserializeError> {\n        use utils::bin_ser::LeSer;\n        XLogPageHeaderData::des_from(&mut buf.reader())\n    }\n\n    pub fn encode(&self) -> Result<Bytes, SerializeError> {\n        use utils::bin_ser::LeSer;\n        self.ser().map(|b| b.into())\n    }\n}\n\nimpl XLogLongPageHeaderData {\n    pub fn from_bytes<B: Buf>(buf: &mut B) -> Result<XLogLongPageHeaderData, DeserializeError> {\n        use utils::bin_ser::LeSer;\n        XLogLongPageHeaderData::des_from(&mut buf.reader())\n    }\n\n    pub fn encode(&self) -> Result<Bytes, SerializeError> {\n        use utils::bin_ser::LeSer;\n        self.ser().map(|b| b.into())\n    }\n}\n\npub const SIZEOF_CHECKPOINT: usize = size_of::<CheckPoint>();\n\nimpl CheckPoint {\n    pub fn encode(&self) -> Result<Bytes, SerializeError> {\n        use utils::bin_ser::LeSer;\n        Ok(self.ser()?.into())\n    }\n\n    pub fn decode(buf: &[u8]) -> Result<CheckPoint, DeserializeError> {\n        use utils::bin_ser::LeSer;\n        CheckPoint::des(buf)\n    }\n\n    /// Update next XID based on provided new_xid and stored epoch.\n    /// Next XID should be greater than new_xid. This handles 32-bit\n    /// XID wraparound correctly.\n    ///\n    /// Returns 'true' if the XID was updated.\n    pub fn update_next_xid(&mut self, xid: u32) -> bool {\n        // nextXid should be greater than any XID in WAL, so increment provided XID and check for wraparround.\n        let mut new_xid = std::cmp::max(\n            xid.wrapping_add(1),\n            pg_constants::FIRST_NORMAL_TRANSACTION_ID,\n        );\n        // To reduce number of metadata checkpoints, we forward align XID on XID_CHECKPOINT_INTERVAL.\n        // XID_CHECKPOINT_INTERVAL should not be larger than BLCKSZ*CLOG_XACTS_PER_BYTE\n        new_xid =\n            new_xid.wrapping_add(XID_CHECKPOINT_INTERVAL - 1) & !(XID_CHECKPOINT_INTERVAL - 1);\n        let full_xid = self.nextXid.value;\n        let old_xid = full_xid as u32;\n        if new_xid.wrapping_sub(old_xid) as i32 > 0 {\n            let mut epoch = full_xid >> 32;\n            if new_xid < old_xid {\n                // wrap-around\n                epoch += 1;\n            }\n            let nextXid = (epoch << 32) | new_xid as u64;\n\n            if nextXid != self.nextXid.value {\n                self.nextXid = FullTransactionId { value: nextXid };\n                return true;\n            }\n        }\n        false\n    }\n\n    /// Advance next multi-XID/offset to those given in arguments.\n    ///\n    /// It's important that this handles wraparound correctly. This should match the\n    /// MultiXactAdvanceNextMXact() logic in PostgreSQL's xlog_redo() function.\n    ///\n    /// Returns 'true' if the Checkpoint was updated.\n    pub fn update_next_multixid(&mut self, multi_xid: u32, multi_offset: u32) -> bool {\n        let mut modified = false;\n\n        if multi_xid.wrapping_sub(self.nextMulti) as i32 > 0 {\n            self.nextMulti = multi_xid;\n            modified = true;\n        }\n\n        if multi_offset.wrapping_sub(self.nextMultiOffset) as i32 > 0 {\n            self.nextMultiOffset = multi_offset;\n            modified = true;\n        }\n\n        modified\n    }\n}\n\n/// Generate new, empty WAL segment, with correct block headers at the first\n/// page of the segment and the page that contains the given LSN.\n/// We need this segment to start compute node.\npub fn generate_wal_segment(segno: u64, system_id: u64, lsn: Lsn) -> Result<Bytes, SerializeError> {\n    let mut seg_buf = BytesMut::with_capacity(WAL_SEGMENT_SIZE);\n\n    let pageaddr = XLogSegNoOffsetToRecPtr(segno, 0, WAL_SEGMENT_SIZE);\n\n    let page_off = lsn.block_offset();\n    let seg_off = lsn.segment_offset(WAL_SEGMENT_SIZE);\n\n    let first_page_only = seg_off < XLOG_BLCKSZ;\n    // If first records starts in the middle of the page, pretend in page header\n    // there is a fake record which ends where first real record starts. This\n    // makes pg_waldump etc happy.\n    let (shdr_rem_len, infoflags) = if first_page_only && seg_off > 0 {\n        assert!(seg_off >= XLOG_SIZE_OF_XLOG_LONG_PHD);\n        // xlp_rem_len doesn't include page header, hence the subtraction.\n        (\n            seg_off - XLOG_SIZE_OF_XLOG_LONG_PHD,\n            pg_constants::XLP_FIRST_IS_CONTRECORD,\n        )\n    } else {\n        (0, 0)\n    };\n\n    let hdr = XLogLongPageHeaderData {\n        std: {\n            XLogPageHeaderData {\n                xlp_magic: XLOG_PAGE_MAGIC as u16,\n                xlp_info: pg_constants::XLP_LONG_HEADER | infoflags,\n                xlp_tli: PG_TLI,\n                xlp_pageaddr: pageaddr,\n                xlp_rem_len: shdr_rem_len as u32,\n                ..Default::default() // Put 0 in padding fields.\n            }\n        },\n        xlp_sysid: system_id,\n        xlp_seg_size: WAL_SEGMENT_SIZE as u32,\n        xlp_xlog_blcksz: XLOG_BLCKSZ as u32,\n    };\n\n    let hdr_bytes = hdr.encode()?;\n    seg_buf.extend_from_slice(&hdr_bytes);\n\n    //zero out the rest of the file\n    seg_buf.resize(WAL_SEGMENT_SIZE, 0);\n\n    if !first_page_only {\n        let block_offset = lsn.page_offset_in_segment(WAL_SEGMENT_SIZE) as usize;\n        // see comments above about XLP_FIRST_IS_CONTRECORD and xlp_rem_len.\n        let (xlp_rem_len, xlp_info) = if page_off > 0 {\n            assert!(page_off >= XLOG_SIZE_OF_XLOG_SHORT_PHD as u64);\n            (\n                (page_off - XLOG_SIZE_OF_XLOG_SHORT_PHD as u64) as u32,\n                pg_constants::XLP_FIRST_IS_CONTRECORD,\n            )\n        } else {\n            (0, 0)\n        };\n        let header = XLogPageHeaderData {\n            xlp_magic: XLOG_PAGE_MAGIC as u16,\n            xlp_info,\n            xlp_tli: PG_TLI,\n            xlp_pageaddr: lsn.page_lsn().0,\n            xlp_rem_len,\n            ..Default::default() // Put 0 in padding fields.\n        };\n        let hdr_bytes = header.encode()?;\n\n        debug_assert!(seg_buf.len() > block_offset + hdr_bytes.len());\n        debug_assert_ne!(block_offset, 0);\n\n        seg_buf[block_offset..block_offset + hdr_bytes.len()].copy_from_slice(&hdr_bytes[..]);\n    }\n\n    Ok(seg_buf.freeze())\n}\n\n#[repr(C)]\n#[derive(Serialize)]\npub struct XlLogicalMessage {\n    pub db_id: Oid,\n    pub transactional: uint32, // bool, takes 4 bytes due to alignment in C structures\n    pub prefix_size: uint64,\n    pub message_size: uint64,\n}\n\nimpl XlLogicalMessage {\n    pub fn encode(&self) -> Bytes {\n        use utils::bin_ser::LeSer;\n        self.ser().unwrap().into()\n    }\n}\n\n/// Create new WAL record for non-transactional logical message.\n/// Used for creating artificial WAL for tests, as LogicalMessage\n/// record is basically no-op.\npub fn encode_logical_message(prefix: &str, message: &str) -> Bytes {\n    // This function can take untrusted input, so discard any NUL bytes in the prefix string.\n    let prefix = CString::new(prefix.replace('\\0', \"\")).expect(\"no NULs\");\n    let message = message.as_bytes();\n    LogicalMessageGenerator::new(&prefix, message)\n        .next()\n        .unwrap()\n        .encode(Lsn(0))\n}\n\n#[cfg(test)]\nmod tests {\n    use super::*;\n\n    #[test]\n    fn test_ts_conversion() {\n        let now = SystemTime::now();\n        let round_trip = try_from_pg_timestamp(to_pg_timestamp(now)).unwrap();\n\n        let now_since = now.duration_since(SystemTime::UNIX_EPOCH).unwrap();\n        let round_trip_since = round_trip.duration_since(SystemTime::UNIX_EPOCH).unwrap();\n        assert_eq!(now_since.as_micros(), round_trip_since.as_micros());\n\n        let now_pg = get_current_timestamp();\n        let round_trip_pg = to_pg_timestamp(try_from_pg_timestamp(now_pg).unwrap());\n\n        assert_eq!(now_pg, round_trip_pg);\n    }\n\n    // If you need to craft WAL and write tests for this module, put it at wal_craft crate.\n}\n"
  },
  {
    "path": "libs/postgres_ffi/wal_craft/Cargo.toml",
    "content": "[package]\nname = \"wal_craft\"\nversion = \"0.1.0\"\nedition.workspace = true\nlicense.workspace = true\n\n[dependencies]\nanyhow.workspace = true\nclap.workspace = true\nenv_logger.workspace = true\nlog.workspace = true\npostgres.workspace = true\npostgres_ffi.workspace = true\ncamino-tempfile.workspace = true\n\n[dev-dependencies]\nregex.workspace = true\nutils.workspace = true\n"
  },
  {
    "path": "libs/postgres_ffi/wal_craft/src/bin/wal_craft.rs",
    "content": "use std::path::PathBuf;\nuse std::str::FromStr;\n\nuse anyhow::*;\nuse clap::{Arg, ArgMatches, Command, value_parser};\nuse postgres::Client;\nuse postgres_ffi::PgMajorVersion;\nuse wal_craft::*;\n\nfn main() -> Result<()> {\n    env_logger::Builder::from_env(env_logger::Env::default().default_filter_or(\"wal_craft=info\"))\n        .init();\n    let arg_matches = cli().get_matches();\n\n    let wal_craft = |arg_matches: &ArgMatches, client: &mut Client| {\n        let intermediate_lsns = match arg_matches\n            .get_one::<String>(\"type\")\n            .map(|s| s.as_str())\n            .context(\"'type' is required\")?\n        {\n            Simple::NAME => Simple::craft(client)?,\n            LastWalRecordXlogSwitch::NAME => LastWalRecordXlogSwitch::craft(client)?,\n            LastWalRecordXlogSwitchEndsOnPageBoundary::NAME => {\n                LastWalRecordXlogSwitchEndsOnPageBoundary::craft(client)?\n            }\n            WalRecordCrossingSegmentFollowedBySmallOne::NAME => {\n                WalRecordCrossingSegmentFollowedBySmallOne::craft(client)?\n            }\n            LastWalRecordCrossingSegment::NAME => LastWalRecordCrossingSegment::craft(client)?,\n            a => panic!(\"Unknown --type argument: {a}\"),\n        };\n        let end_of_wal_lsn = client.pg_current_wal_insert_lsn()?;\n        for lsn in intermediate_lsns {\n            println!(\"intermediate_lsn = {lsn}\");\n        }\n        println!(\"end_of_wal = {end_of_wal_lsn}\");\n        Ok(())\n    };\n\n    match arg_matches.subcommand() {\n        None => panic!(\"No subcommand provided\"),\n        Some((\"print-postgres-config\", _)) => {\n            for cfg in REQUIRED_POSTGRES_CONFIG.iter() {\n                println!(\"{cfg}\");\n            }\n            Ok(())\n        }\n\n        Some((\"with-initdb\", arg_matches)) => {\n            let cfg = Conf {\n                pg_version: *arg_matches\n                    .get_one::<PgMajorVersion>(\"pg-version\")\n                    .context(\"'pg-version' is required\")?,\n                pg_distrib_dir: arg_matches\n                    .get_one::<PathBuf>(\"pg-distrib-dir\")\n                    .context(\"'pg-distrib-dir' is required\")?\n                    .to_owned(),\n                datadir: arg_matches\n                    .get_one::<PathBuf>(\"datadir\")\n                    .context(\"'datadir' is required\")?\n                    .to_owned(),\n            };\n            cfg.initdb()?;\n            let srv = cfg.start_server()?;\n            wal_craft(arg_matches, &mut srv.connect_with_timeout()?)?;\n            srv.kill();\n            Ok(())\n        }\n        Some((\"in-existing\", arg_matches)) => wal_craft(\n            arg_matches,\n            &mut postgres::Config::from_str(\n                arg_matches\n                    .get_one::<String>(\"connection\")\n                    .context(\"'connection' is required\")?,\n            )\n            .context(\n                \"'connection' argument value could not be parsed as a postgres connection string\",\n            )?\n            .connect(postgres::NoTls)?,\n        ),\n        Some(_) => panic!(\"Unknown subcommand\"),\n    }\n}\n\nfn cli() -> Command {\n    let type_arg = &Arg::new(\"type\")\n        .help(\"Type of WAL to craft\")\n        .value_parser([\n            Simple::NAME,\n            LastWalRecordXlogSwitch::NAME,\n            LastWalRecordXlogSwitchEndsOnPageBoundary::NAME,\n            WalRecordCrossingSegmentFollowedBySmallOne::NAME,\n            LastWalRecordCrossingSegment::NAME,\n        ])\n        .required(true);\n\n    Command::new(\"Postgres WAL crafter\")\n        .about(\"Crafts Postgres databases with specific WAL properties\")\n        .subcommand(\n            Command::new(\"print-postgres-config\")\n                .about(\"Print the configuration required for PostgreSQL server before running this script\")\n        )\n        .subcommand(\n            Command::new(\"with-initdb\")\n                .about(\"Craft WAL in a new data directory first initialized with initdb\")\n                .arg(type_arg)\n                .arg(\n                    Arg::new(\"datadir\")\n                        .help(\"Data directory for the Postgres server\")\n                        .value_parser(value_parser!(PathBuf))\n                        .required(true)\n                )\n                .arg(\n                    Arg::new(\"pg-distrib-dir\")\n                        .long(\"pg-distrib-dir\")\n                        .value_parser(value_parser!(PathBuf))\n                        .help(\"Directory with Postgres distributions (bin and lib directories, e.g. pg_install containing subpath `v14/bin/postgresql`)\")\n                        .default_value(\"/usr/local\")\n                )\n                .arg(\n                    Arg::new(\"pg-version\")\n                    .long(\"pg-version\")\n                    .help(\"Postgres version to use for the initial tenant\")\n                    .value_parser(value_parser!(u32))\n                    .required(true)\n\n                )\n        )\n        .subcommand(\n            Command::new(\"in-existing\")\n                .about(\"Craft WAL at an existing recently created Postgres database. Note that server may append new WAL entries on shutdown.\")\n                .arg(type_arg)\n                .arg(\n                    Arg::new(\"connection\")\n                        .help(\"Connection string to the Postgres database to populate\")\n                        .required(true)\n                )\n        )\n}\n\n#[test]\nfn verify_cli() {\n    cli().debug_assert();\n}\n"
  },
  {
    "path": "libs/postgres_ffi/wal_craft/src/lib.rs",
    "content": "use std::ffi::OsStr;\nuse std::path::{Path, PathBuf};\nuse std::process::Command;\nuse std::time::{Duration, Instant};\n\nuse anyhow::{bail, ensure};\nuse camino_tempfile::{Utf8TempDir, tempdir};\nuse log::*;\nuse postgres::Client;\nuse postgres::types::PgLsn;\nuse postgres_ffi::{\n    PgMajorVersion, WAL_SEGMENT_SIZE, XLOG_BLCKSZ, XLOG_SIZE_OF_XLOG_LONG_PHD,\n    XLOG_SIZE_OF_XLOG_RECORD, XLOG_SIZE_OF_XLOG_SHORT_PHD,\n};\n\nmacro_rules! xlog_utils_test {\n    ($version:ident) => {\n        #[path = \".\"]\n        mod $version {\n            #[allow(unused_imports)]\n            pub use postgres_ffi::$version::wal_craft_test_export::*;\n            #[allow(clippy::duplicate_mod)]\n            #[cfg(test)]\n            mod xlog_utils_test;\n        }\n    };\n}\n\npostgres_ffi::for_all_postgres_versions! { xlog_utils_test }\n\npub struct Conf {\n    pub pg_version: PgMajorVersion,\n    pub pg_distrib_dir: PathBuf,\n    pub datadir: PathBuf,\n}\n\npub struct PostgresServer {\n    process: std::process::Child,\n    _unix_socket_dir: Utf8TempDir,\n    client_config: postgres::Config,\n}\n\npub static REQUIRED_POSTGRES_CONFIG: [&str; 4] = [\n    \"wal_keep_size=50MB\",            // Ensure old WAL is not removed\n    \"shared_preload_libraries=neon\", // can only be loaded at startup\n    // Disable background processes as much as possible\n    \"wal_writer_delay=10s\",\n    \"autovacuum=off\",\n];\n\nimpl Conf {\n    pub fn pg_distrib_dir(&self) -> anyhow::Result<PathBuf> {\n        let path = self.pg_distrib_dir.clone();\n\n        Ok(path.join(self.pg_version.v_str()))\n    }\n\n    fn pg_bin_dir(&self) -> anyhow::Result<PathBuf> {\n        Ok(self.pg_distrib_dir()?.join(\"bin\"))\n    }\n\n    fn pg_lib_dir(&self) -> anyhow::Result<PathBuf> {\n        Ok(self.pg_distrib_dir()?.join(\"lib\"))\n    }\n\n    pub fn wal_dir(&self) -> PathBuf {\n        self.datadir.join(\"pg_wal\")\n    }\n\n    fn new_pg_command(&self, command: impl AsRef<Path>) -> anyhow::Result<Command> {\n        let path = self.pg_bin_dir()?.join(command);\n        ensure!(path.exists(), \"Command {:?} does not exist\", path);\n        let mut cmd = Command::new(path);\n        cmd.env_clear()\n            .env(\"LD_LIBRARY_PATH\", self.pg_lib_dir()?)\n            .env(\"DYLD_LIBRARY_PATH\", self.pg_lib_dir()?)\n            .env(\n                \"ASAN_OPTIONS\",\n                std::env::var(\"ASAN_OPTIONS\").unwrap_or_default(),\n            )\n            .env(\n                \"UBSAN_OPTIONS\",\n                std::env::var(\"UBSAN_OPTIONS\").unwrap_or_default(),\n            );\n        Ok(cmd)\n    }\n\n    pub fn initdb(&self) -> anyhow::Result<()> {\n        if let Some(parent) = self.datadir.parent() {\n            info!(\"Pre-creating parent directory {:?}\", parent);\n            // Tests may be run concurrently and there may be a race to create `test_output/`.\n            // std::fs::create_dir_all is guaranteed to have no races with another thread creating directories.\n            std::fs::create_dir_all(parent)?;\n        }\n        info!(\n            \"Running initdb in {:?} with user \\\"postgres\\\"\",\n            self.datadir\n        );\n        let output = self\n            .new_pg_command(\"initdb\")?\n            .arg(\"--pgdata\")\n            .arg(&self.datadir)\n            .args([\"--username\", \"postgres\", \"--no-instructions\", \"--no-sync\"])\n            .output()?;\n        debug!(\"initdb output: {:?}\", output);\n        ensure!(\n            output.status.success(),\n            \"initdb failed, stdout and stderr follow:\\n{}{}\",\n            String::from_utf8_lossy(&output.stdout),\n            String::from_utf8_lossy(&output.stderr),\n        );\n        Ok(())\n    }\n\n    pub fn start_server(&self) -> anyhow::Result<PostgresServer> {\n        info!(\"Starting Postgres server in {:?}\", self.datadir);\n        let unix_socket_dir = tempdir()?; // We need a directory with a short name for Unix socket (up to 108 symbols)\n        let unix_socket_dir_path = unix_socket_dir.path().to_owned();\n        let server_process = self\n            .new_pg_command(\"postgres\")?\n            .args([\"-c\", \"listen_addresses=\"])\n            .arg(\"-k\")\n            .arg(&unix_socket_dir_path)\n            .arg(\"-D\")\n            .arg(&self.datadir)\n            .args(REQUIRED_POSTGRES_CONFIG.iter().flat_map(|cfg| [\"-c\", cfg]))\n            .spawn()?;\n        let server = PostgresServer {\n            process: server_process,\n            _unix_socket_dir: unix_socket_dir,\n            client_config: {\n                let mut c = postgres::Config::new();\n                c.host_path(&unix_socket_dir_path);\n                c.user(\"postgres\");\n                c.connect_timeout(Duration::from_millis(10000));\n                c\n            },\n        };\n        Ok(server)\n    }\n\n    pub fn pg_waldump(\n        &self,\n        first_segment_name: &OsStr,\n        last_segment_name: &OsStr,\n    ) -> anyhow::Result<std::process::Output> {\n        let first_segment_file = self.datadir.join(first_segment_name);\n        let last_segment_file = self.datadir.join(last_segment_name);\n        info!(\n            \"Running pg_waldump for {} .. {}\",\n            first_segment_file.display(),\n            last_segment_file.display()\n        );\n        let output = self\n            .new_pg_command(\"pg_waldump\")?\n            .args([&first_segment_file, &last_segment_file])\n            .output()?;\n        debug!(\"waldump output: {:?}\", output);\n        Ok(output)\n    }\n}\n\nimpl PostgresServer {\n    pub fn connect_with_timeout(&self) -> anyhow::Result<Client> {\n        let retry_until = Instant::now() + *self.client_config.get_connect_timeout().unwrap();\n        while Instant::now() < retry_until {\n            if let Ok(client) = self.client_config.connect(postgres::NoTls) {\n                return Ok(client);\n            }\n            std::thread::sleep(Duration::from_millis(100));\n        }\n        bail!(\"Connection timed out\");\n    }\n\n    pub fn kill(mut self) {\n        self.process.kill().unwrap();\n        self.process.wait().unwrap();\n    }\n}\n\nimpl Drop for PostgresServer {\n    fn drop(&mut self) {\n        match self.process.try_wait() {\n            Ok(Some(_)) => return,\n            Ok(None) => {\n                warn!(\"Server was not terminated, will be killed\");\n            }\n            Err(e) => {\n                error!(\"Unable to get status of the server: {}, will be killed\", e);\n            }\n        }\n        let _ = self.process.kill();\n    }\n}\n\npub trait PostgresClientExt: postgres::GenericClient {\n    fn pg_current_wal_insert_lsn(&mut self) -> anyhow::Result<PgLsn> {\n        Ok(self\n            .query_one(\"SELECT pg_current_wal_insert_lsn()\", &[])?\n            .get(0))\n    }\n    fn pg_current_wal_flush_lsn(&mut self) -> anyhow::Result<PgLsn> {\n        Ok(self\n            .query_one(\"SELECT pg_current_wal_flush_lsn()\", &[])?\n            .get(0))\n    }\n}\n\nimpl<C: postgres::GenericClient> PostgresClientExt for C {}\n\npub fn ensure_server_config(client: &mut impl postgres::GenericClient) -> anyhow::Result<()> {\n    client.execute(\"create extension if not exists neon_test_utils\", &[])?;\n\n    let wal_keep_size: String = client.query_one(\"SHOW wal_keep_size\", &[])?.get(0);\n    ensure!(wal_keep_size == \"50MB\");\n    let wal_writer_delay: String = client.query_one(\"SHOW wal_writer_delay\", &[])?.get(0);\n    ensure!(wal_writer_delay == \"10s\");\n    let autovacuum: String = client.query_one(\"SHOW autovacuum\", &[])?.get(0);\n    ensure!(autovacuum == \"off\");\n\n    let wal_segment_size = client.query_one(\n        \"select cast(setting as bigint) as setting, unit \\\n         from pg_settings where name = 'wal_segment_size'\",\n        &[],\n    )?;\n    ensure!(\n        wal_segment_size.get::<_, String>(\"unit\") == \"B\",\n        \"Unexpected wal_segment_size unit\"\n    );\n    ensure!(\n        wal_segment_size.get::<_, i64>(\"setting\") == WAL_SEGMENT_SIZE as i64,\n        \"Unexpected wal_segment_size in bytes\"\n    );\n\n    Ok(())\n}\n\npub trait Crafter {\n    const NAME: &'static str;\n\n    /// Generates WAL using the client `client`. Returns a vector of some valid\n    /// \"interesting\" intermediate LSNs which one may start reading from.\n    /// test_end_of_wal uses this to check various starting points.\n    ///\n    /// Note that postgres is generally keen about writing some WAL. While we\n    /// try to disable it (autovacuum, big wal_writer_delay, etc) it is always\n    /// possible, e.g. xl_running_xacts are dumped each 15s. So checks about\n    /// stable WAL end would be flaky unless postgres is shut down. For this\n    /// reason returning potential end of WAL here is pointless. Most of the\n    /// time this doesn't happen though, so it is reasonable to create needed\n    /// WAL structure and immediately kill postgres like test_end_of_wal does.\n    fn craft(client: &mut impl postgres::GenericClient) -> anyhow::Result<Vec<PgLsn>>;\n}\n\n/// Wraps some WAL craft function, providing current LSN to it before the\n/// insertion and flushing WAL afterwards. Also pushes initial LSN to the\n/// result.\nfn craft_internal<C: postgres::GenericClient>(\n    client: &mut C,\n    f: impl Fn(&mut C, PgLsn) -> anyhow::Result<Vec<PgLsn>>,\n) -> anyhow::Result<Vec<PgLsn>> {\n    ensure_server_config(client)?;\n\n    let initial_lsn = client.pg_current_wal_insert_lsn()?;\n    info!(\"LSN initial = {}\", initial_lsn);\n\n    let mut intermediate_lsns = f(client, initial_lsn)?;\n    if !intermediate_lsns.starts_with(&[initial_lsn]) {\n        intermediate_lsns.insert(0, initial_lsn);\n    }\n\n    // Some records may be not flushed, e.g. non-transactional logical messages. Flush now.\n    //\n    // If the previous WAL record ended exactly at page boundary, pg_current_wal_insert_lsn\n    // returns the position just after the page header on the next page. That's where the next\n    // record will be inserted. But the page header hasn't actually been written to the WAL\n    // yet, and if you try to flush it, you get a \"request to flush past end of generated WAL\"\n    // error. Because of that, if the insert location is just after a page header, back off to\n    // previous page boundary.\n    let mut lsn = u64::from(client.pg_current_wal_insert_lsn()?);\n    if lsn % WAL_SEGMENT_SIZE as u64 == XLOG_SIZE_OF_XLOG_LONG_PHD as u64 {\n        lsn -= XLOG_SIZE_OF_XLOG_LONG_PHD as u64;\n    } else if lsn % XLOG_BLCKSZ as u64 == XLOG_SIZE_OF_XLOG_SHORT_PHD as u64 {\n        lsn -= XLOG_SIZE_OF_XLOG_SHORT_PHD as u64;\n    }\n    client.execute(\"select neon_xlogflush($1)\", &[&PgLsn::from(lsn)])?;\n    Ok(intermediate_lsns)\n}\n\npub struct Simple;\nimpl Crafter for Simple {\n    const NAME: &'static str = \"simple\";\n    fn craft(client: &mut impl postgres::GenericClient) -> anyhow::Result<Vec<PgLsn>> {\n        craft_internal(client, |client, _| {\n            client.execute(\"CREATE table t(x int)\", &[])?;\n            Ok(Vec::new())\n        })\n    }\n}\n\npub struct LastWalRecordXlogSwitch;\nimpl Crafter for LastWalRecordXlogSwitch {\n    const NAME: &'static str = \"last_wal_record_xlog_switch\";\n    fn craft(client: &mut impl postgres::GenericClient) -> anyhow::Result<Vec<PgLsn>> {\n        // Do not use craft_internal because here we end up with flush_lsn exactly on\n        // the segment boundary and insert_lsn after the initial page header, which is unusual.\n        ensure_server_config(client)?;\n\n        client.execute(\"CREATE table t(x int)\", &[])?;\n        let before_xlog_switch = client.pg_current_wal_insert_lsn()?;\n        // pg_switch_wal returns end of last record of the switched segment,\n        // i.e. end of SWITCH itself.\n        let xlog_switch_record_end: PgLsn = client.query_one(\"SELECT pg_switch_wal()\", &[])?.get(0);\n        let before_xlog_switch_u64 = u64::from(before_xlog_switch);\n        let next_segment = PgLsn::from(\n            before_xlog_switch_u64 - (before_xlog_switch_u64 % WAL_SEGMENT_SIZE as u64)\n                + WAL_SEGMENT_SIZE as u64,\n        );\n        ensure!(\n            xlog_switch_record_end <= next_segment,\n            \"XLOG_SWITCH record ended after the expected segment boundary: {} > {}\",\n            xlog_switch_record_end,\n            next_segment\n        );\n        Ok(vec![before_xlog_switch, xlog_switch_record_end])\n    }\n}\n\npub struct LastWalRecordXlogSwitchEndsOnPageBoundary;\n/// Craft xlog SWITCH record ending at page boundary.\nimpl Crafter for LastWalRecordXlogSwitchEndsOnPageBoundary {\n    const NAME: &'static str = \"last_wal_record_xlog_switch_ends_on_page_boundary\";\n    fn craft(client: &mut impl postgres::GenericClient) -> anyhow::Result<Vec<PgLsn>> {\n        // Do not use generate_internal because here we end up with flush_lsn exactly on\n        // the segment boundary and insert_lsn after the initial page header, which is unusual.\n        ensure_server_config(client)?;\n\n        client.execute(\"CREATE table t(x int)\", &[])?;\n\n        // Add padding so the XLOG_SWITCH record ends exactly on XLOG_BLCKSZ boundary.  We\n        // will use carefully-sized logical messages to advance WAL insert location such\n        // that there is just enough space on the page for the XLOG_SWITCH record.\n        loop {\n            // We start with measuring how much WAL it takes for one logical message,\n            // considering all alignments and headers.\n            let before_lsn = client.pg_current_wal_insert_lsn()?;\n            client.execute(\n                \"SELECT pg_logical_emit_message(false, 'swch', REPEAT('a', 10))\",\n                &[],\n            )?;\n            let after_lsn = client.pg_current_wal_insert_lsn()?;\n\n            // Did the record cross a page boundary? If it did, start over. Crossing a\n            // page boundary adds to the apparent size of the record because of the page\n            // header, which throws off the calculation.\n            if u64::from(before_lsn) / XLOG_BLCKSZ as u64\n                != u64::from(after_lsn) / XLOG_BLCKSZ as u64\n            {\n                continue;\n            }\n            // base_size is the size of a logical message without the payload\n            let base_size = u64::from(after_lsn) - u64::from(before_lsn) - 10;\n\n            // Is there enough space on the page for another logical message and an\n            // XLOG_SWITCH? If not, start over.\n            let page_remain = XLOG_BLCKSZ as u64 - u64::from(after_lsn) % XLOG_BLCKSZ as u64;\n            if page_remain < base_size + XLOG_SIZE_OF_XLOG_RECORD as u64 {\n                continue;\n            }\n\n            // We will write another logical message, such that after the logical message\n            // record, there will be space for exactly one XLOG_SWITCH. How large should\n            // the logical message's payload be? An XLOG_SWITCH record has no data => its\n            // size is exactly XLOG_SIZE_OF_XLOG_RECORD.\n            let repeats = page_remain - base_size - XLOG_SIZE_OF_XLOG_RECORD as u64;\n\n            client.execute(\n                \"SELECT pg_logical_emit_message(false, 'swch', REPEAT('a', $1))\",\n                &[&(repeats as i32)],\n            )?;\n            info!(\n                \"current_wal_insert_lsn={}, XLOG_SIZE_OF_XLOG_RECORD={}\",\n                client.pg_current_wal_insert_lsn()?,\n                XLOG_SIZE_OF_XLOG_RECORD\n            );\n\n            // Emit the XLOG_SWITCH\n            let before_xlog_switch = client.pg_current_wal_insert_lsn()?;\n            let xlog_switch_record_end: PgLsn =\n                client.query_one(\"SELECT pg_switch_wal()\", &[])?.get(0);\n\n            if u64::from(xlog_switch_record_end) as usize % XLOG_BLCKSZ\n                != XLOG_SIZE_OF_XLOG_SHORT_PHD\n            {\n                warn!(\n                    \"XLOG_SWITCH message ended not on page boundary: {}, offset = {}, repeating\",\n                    xlog_switch_record_end,\n                    u64::from(xlog_switch_record_end) as usize % XLOG_BLCKSZ\n                );\n                continue;\n            }\n            return Ok(vec![before_xlog_switch, xlog_switch_record_end]);\n        }\n    }\n}\n\n/// Write ~16MB logical message; it should cross WAL segment.\nfn craft_seg_size_logical_message(\n    client: &mut impl postgres::GenericClient,\n    transactional: bool,\n) -> anyhow::Result<Vec<PgLsn>> {\n    craft_internal(client, |client, initial_lsn| {\n        ensure!(\n            initial_lsn < PgLsn::from(0x0200_0000 - 1024 * 1024),\n            \"Initial LSN is too far in the future\"\n        );\n\n        let message_lsn: PgLsn = client\n            .query_one(\n                \"select pg_logical_emit_message($1, 'big-16mb-msg', \\\n                 concat(repeat('abcd', 16 * 256 * 1024), 'end')) as message_lsn\",\n                &[&transactional],\n            )?\n            .get(\"message_lsn\");\n        ensure!(\n            message_lsn > PgLsn::from(0x0200_0000 + 4 * 8192),\n            \"Logical message did not cross the segment boundary\"\n        );\n        ensure!(\n            message_lsn < PgLsn::from(0x0400_0000),\n            \"Logical message crossed two segments\"\n        );\n\n        Ok(vec![message_lsn])\n    })\n}\n\npub struct WalRecordCrossingSegmentFollowedBySmallOne;\nimpl Crafter for WalRecordCrossingSegmentFollowedBySmallOne {\n    const NAME: &'static str = \"wal_record_crossing_segment_followed_by_small_one\";\n    fn craft(client: &mut impl postgres::GenericClient) -> anyhow::Result<Vec<PgLsn>> {\n        // Transactional message crossing WAL segment will be followed by small\n        // commit record.\n        craft_seg_size_logical_message(client, true)\n    }\n}\n\npub struct LastWalRecordCrossingSegment;\nimpl Crafter for LastWalRecordCrossingSegment {\n    const NAME: &'static str = \"last_wal_record_crossing_segment\";\n    fn craft(client: &mut impl postgres::GenericClient) -> anyhow::Result<Vec<PgLsn>> {\n        craft_seg_size_logical_message(client, false)\n    }\n}\n"
  },
  {
    "path": "libs/postgres_ffi/wal_craft/src/xlog_utils_test.rs",
    "content": "//! Tests for postgres_ffi xlog_utils module. Put it here to break cyclic dependency.\n\nuse super::*;\nuse crate::{error, info};\nuse regex::Regex;\nuse std::cmp::min;\nuse std::ffi::OsStr;\nuse std::fs::{self, File};\nuse std::io::Write;\nuse std::{env, str::FromStr};\nuse utils::const_assert;\nuse utils::lsn::Lsn;\n\nfn init_logging() {\n    let _ = env_logger::Builder::from_env(env_logger::Env::default().default_filter_or(format!(\n        \"crate=info,postgres_ffi::{PG_MAJORVERSION}::xlog_utils=trace\"\n    )))\n    .is_test(true)\n    .try_init();\n}\n\n/// Test that find_end_of_wal returns the same results as pg_dump on various\n/// WALs created by Crafter.\nfn test_end_of_wal<C: crate::Crafter>(test_name: &str) {\n    use crate::*;\n\n    let pg_version = MY_PGVERSION;\n\n    // Craft some WAL\n    let top_path = PathBuf::from(env!(\"CARGO_MANIFEST_DIR\"))\n        .join(\"..\")\n        .join(\"..\")\n        .join(\"..\");\n    let cfg = Conf {\n        pg_version,\n        pg_distrib_dir: top_path.join(\"pg_install\"),\n        datadir: top_path.join(format!(\"test_output/{test_name}-{PG_MAJORVERSION}\")),\n    };\n    if cfg.datadir.exists() {\n        fs::remove_dir_all(&cfg.datadir).unwrap();\n    }\n    cfg.initdb().unwrap();\n    let srv = cfg.start_server().unwrap();\n    let intermediate_lsns = C::craft(&mut srv.connect_with_timeout().unwrap()).unwrap();\n    let intermediate_lsns: Vec<Lsn> = intermediate_lsns\n        .iter()\n        .map(|&lsn| u64::from(lsn).into())\n        .collect();\n    // Kill postgres. Note that it might have inserted to WAL something after\n    // 'craft' did its job.\n    srv.kill();\n\n    // Check find_end_of_wal on the initial WAL\n    let last_segment = cfg\n        .wal_dir()\n        .read_dir()\n        .unwrap()\n        .map(|f| f.unwrap().file_name())\n        .filter(|fname| IsXLogFileName(fname))\n        .max()\n        .unwrap();\n    let expected_end_of_wal = find_pg_waldump_end_of_wal(&cfg, &last_segment);\n    for start_lsn in intermediate_lsns\n        .iter()\n        .chain(std::iter::once(&expected_end_of_wal))\n    {\n        // Erase all WAL before `start_lsn` to ensure it's not used by `find_end_of_wal`.\n        // We assume that `start_lsn` is non-decreasing.\n        info!(\n            \"Checking with start_lsn={}, erasing WAL before it\",\n            start_lsn\n        );\n        for file in fs::read_dir(cfg.wal_dir()).unwrap().flatten() {\n            let fname = file.file_name();\n            if !IsXLogFileName(&fname) {\n                continue;\n            }\n            let (segno, _) = XLogFromFileName(&fname, WAL_SEGMENT_SIZE).unwrap();\n            let seg_start_lsn = XLogSegNoOffsetToRecPtr(segno, 0, WAL_SEGMENT_SIZE);\n            if seg_start_lsn > u64::from(*start_lsn) {\n                continue;\n            }\n            let mut f = File::options().write(true).open(file.path()).unwrap();\n            static ZEROS: [u8; WAL_SEGMENT_SIZE] = [0u8; WAL_SEGMENT_SIZE];\n            f.write_all(\n                &ZEROS[0..min(\n                    WAL_SEGMENT_SIZE,\n                    (u64::from(*start_lsn) - seg_start_lsn) as usize,\n                )],\n            )\n            .unwrap();\n        }\n        check_end_of_wal(&cfg, &last_segment, *start_lsn, expected_end_of_wal);\n    }\n}\n\nfn find_pg_waldump_end_of_wal(cfg: &crate::Conf, last_segment: &OsStr) -> Lsn {\n    // Get the actual end of WAL by pg_waldump\n    let waldump_output = cfg\n        .pg_waldump(OsStr::new(\"000000010000000000000001\"), last_segment)\n        .unwrap()\n        .stderr;\n    let waldump_output = std::str::from_utf8(&waldump_output).unwrap();\n    let caps = match Regex::new(r\"invalid record length at (.+):\")\n        .unwrap()\n        .captures(waldump_output)\n    {\n        Some(caps) => caps,\n        None => {\n            error!(\"Unable to parse pg_waldump's stderr:\\n{}\", waldump_output);\n            panic!();\n        }\n    };\n    let waldump_wal_end = Lsn::from_str(caps.get(1).unwrap().as_str()).unwrap();\n    info!(\"waldump erred on {}\", waldump_wal_end);\n    waldump_wal_end\n}\n\nfn check_end_of_wal(\n    cfg: &crate::Conf,\n    last_segment: &OsStr,\n    start_lsn: Lsn,\n    expected_end_of_wal: Lsn,\n) {\n    // Check end_of_wal on non-partial WAL segment (we treat it as fully populated)\n    // let wal_end = find_end_of_wal(&cfg.wal_dir(), WAL_SEGMENT_SIZE, start_lsn).unwrap();\n    // info!(\n    //     \"find_end_of_wal returned wal_end={} with non-partial WAL segment\",\n    //     wal_end\n    // );\n    // assert_eq!(wal_end, expected_end_of_wal_non_partial);\n\n    // Rename file to partial to actually find last valid lsn, then rename it back.\n    fs::rename(\n        cfg.wal_dir().join(last_segment),\n        cfg.wal_dir()\n            .join(format!(\"{}.partial\", last_segment.to_str().unwrap())),\n    )\n    .unwrap();\n    let wal_end = find_end_of_wal(&cfg.wal_dir(), WAL_SEGMENT_SIZE, start_lsn).unwrap();\n    info!(\n        \"find_end_of_wal returned wal_end={} with partial WAL segment\",\n        wal_end\n    );\n    assert_eq!(wal_end, expected_end_of_wal);\n    fs::rename(\n        cfg.wal_dir()\n            .join(format!(\"{}.partial\", last_segment.to_str().unwrap())),\n        cfg.wal_dir().join(last_segment),\n    )\n    .unwrap();\n}\n\nconst_assert!(WAL_SEGMENT_SIZE == 16 * 1024 * 1024);\n\n#[test]\npub fn test_find_end_of_wal_simple() {\n    init_logging();\n    test_end_of_wal::<crate::Simple>(\"test_find_end_of_wal_simple\");\n}\n\n#[test]\npub fn test_find_end_of_wal_crossing_segment_followed_by_small_one() {\n    init_logging();\n    test_end_of_wal::<crate::WalRecordCrossingSegmentFollowedBySmallOne>(\n        \"test_find_end_of_wal_crossing_segment_followed_by_small_one\",\n    );\n}\n\n#[test]\npub fn test_find_end_of_wal_last_crossing_segment() {\n    init_logging();\n    test_end_of_wal::<crate::LastWalRecordCrossingSegment>(\n        \"test_find_end_of_wal_last_crossing_segment\",\n    );\n}\n\n/// Check the math in update_next_xid\n///\n/// NOTE: These checks are sensitive to the value of XID_CHECKPOINT_INTERVAL,\n/// currently 1024.\n#[test]\npub fn test_update_next_xid() {\n    let checkpoint_buf = [0u8; size_of::<CheckPoint>()];\n    let mut checkpoint = CheckPoint::decode(&checkpoint_buf).unwrap();\n\n    checkpoint.nextXid = FullTransactionId { value: 10 };\n    assert_eq!(checkpoint.nextXid.value, 10);\n\n    // The input XID gets rounded up to the next XID_CHECKPOINT_INTERVAL\n    // boundary\n    checkpoint.update_next_xid(100);\n    assert_eq!(checkpoint.nextXid.value, 1024);\n\n    // No change\n    checkpoint.update_next_xid(500);\n    assert_eq!(checkpoint.nextXid.value, 1024);\n    checkpoint.update_next_xid(1023);\n    assert_eq!(checkpoint.nextXid.value, 1024);\n\n    // The function returns the *next* XID, given the highest XID seen so\n    // far. So when we pass 1024, the nextXid gets bumped up to the next\n    // XID_CHECKPOINT_INTERVAL boundary.\n    checkpoint.update_next_xid(1024);\n    assert_eq!(checkpoint.nextXid.value, 2048);\n}\n\n#[test]\npub fn test_update_next_multixid() {\n    let checkpoint_buf = [0u8; size_of::<CheckPoint>()];\n    let mut checkpoint = CheckPoint::decode(&checkpoint_buf).unwrap();\n\n    // simple case\n    checkpoint.nextMulti = 20;\n    checkpoint.nextMultiOffset = 20;\n    checkpoint.update_next_multixid(1000, 2000);\n    assert_eq!(checkpoint.nextMulti, 1000);\n    assert_eq!(checkpoint.nextMultiOffset, 2000);\n\n    // No change\n    checkpoint.update_next_multixid(500, 900);\n    assert_eq!(checkpoint.nextMulti, 1000);\n    assert_eq!(checkpoint.nextMultiOffset, 2000);\n\n    // Close to wraparound, but not wrapped around yet\n    checkpoint.nextMulti = 0xffff0000;\n    checkpoint.nextMultiOffset = 0xfffe0000;\n    checkpoint.update_next_multixid(0xffff00ff, 0xfffe00ff);\n    assert_eq!(checkpoint.nextMulti, 0xffff00ff);\n    assert_eq!(checkpoint.nextMultiOffset, 0xfffe00ff);\n\n    // Wraparound\n    checkpoint.update_next_multixid(1, 900);\n    assert_eq!(checkpoint.nextMulti, 1);\n    assert_eq!(checkpoint.nextMultiOffset, 900);\n\n    // Wraparound nextMulti to 0.\n    //\n    // It's a bit surprising that nextMulti can be 0, because that's a special value\n    // (InvalidMultiXactId). However, that's how Postgres does it at multi-xid wraparound:\n    // nextMulti wraps around to 0, but then when the next multi-xid is assigned, it skips\n    // the 0 and the next multi-xid actually assigned is 1.\n    checkpoint.nextMulti = 0xffff0000;\n    checkpoint.nextMultiOffset = 0xfffe0000;\n    checkpoint.update_next_multixid(0, 0xfffe00ff);\n    assert_eq!(checkpoint.nextMulti, 0);\n    assert_eq!(checkpoint.nextMultiOffset, 0xfffe00ff);\n\n    // Wraparound nextMultiOffset to 0\n    checkpoint.update_next_multixid(0, 0);\n    assert_eq!(checkpoint.nextMulti, 0);\n    assert_eq!(checkpoint.nextMultiOffset, 0);\n}\n\n#[test]\npub fn test_encode_logical_message() {\n    let expected = [\n        64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 0, 0, 170, 34, 166, 227, 255, 38,\n        0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 112, 114, 101, 102,\n        105, 120, 0, 109, 101, 115, 115, 97, 103, 101,\n    ];\n    let actual = encode_logical_message(\"prefix\", \"message\");\n    assert_eq!(expected, actual[..]);\n}\n"
  },
  {
    "path": "libs/postgres_ffi_types/Cargo.toml",
    "content": "[package]\nname = \"postgres_ffi_types\"\nversion = \"0.1.0\"\nedition.workspace = true\nlicense.workspace = true\n\n[dependencies]\nthiserror.workspace = true\nworkspace_hack = { version = \"0.1\", path = \"../../workspace_hack\" }\n\n[dev-dependencies]\n"
  },
  {
    "path": "libs/postgres_ffi_types/src/constants.rs",
    "content": "//! Misc constants, copied from PostgreSQL headers.\n//!\n//! Any constants included here must be the same in all PostgreSQL versions and unlikely to change\n//! in the future either!\n\n// From pg_tablespace_d.h\npub const DEFAULTTABLESPACE_OID: u32 = 1663;\npub const GLOBALTABLESPACE_OID: u32 = 1664;\n"
  },
  {
    "path": "libs/postgres_ffi_types/src/forknum.rs",
    "content": "// Fork numbers, from relpath.h\npub const MAIN_FORKNUM: u8 = 0;\npub const FSM_FORKNUM: u8 = 1;\npub const VISIBILITYMAP_FORKNUM: u8 = 2;\npub const INIT_FORKNUM: u8 = 3;\n\n#[derive(Debug, Clone, thiserror::Error, PartialEq, Eq)]\npub enum FilePathError {\n    #[error(\"invalid relation fork name\")]\n    InvalidForkName,\n    #[error(\"invalid relation data file name\")]\n    InvalidFileName,\n}\n\n/// Convert Postgres relation file's fork suffix to fork number.\npub fn forkname_to_number(forkname: Option<&str>) -> Result<u8, FilePathError> {\n    match forkname {\n        // \"main\" is not in filenames, it's implicit if the fork name is not present\n        None => Ok(MAIN_FORKNUM),\n        Some(\"fsm\") => Ok(FSM_FORKNUM),\n        Some(\"vm\") => Ok(VISIBILITYMAP_FORKNUM),\n        Some(\"init\") => Ok(INIT_FORKNUM),\n        Some(_) => Err(FilePathError::InvalidForkName),\n    }\n}\n\n/// Convert Postgres fork number to the right suffix of the relation data file.\npub fn forknumber_to_name(forknum: u8) -> Option<&'static str> {\n    match forknum {\n        MAIN_FORKNUM => None,\n        FSM_FORKNUM => Some(\"fsm\"),\n        VISIBILITYMAP_FORKNUM => Some(\"vm\"),\n        INIT_FORKNUM => Some(\"init\"),\n        _ => Some(\"UNKNOWN FORKNUM\"),\n    }\n}\n"
  },
  {
    "path": "libs/postgres_ffi_types/src/lib.rs",
    "content": "//! This package contains some PostgreSQL constants and datatypes that are the same in all versions\n//! of PostgreSQL and unlikely to change in the future either. These could be derived from the\n//! PostgreSQL headers with 'bindgen', but in order to avoid proliferating the dependency to bindgen\n//! and the PostgreSQL C headers to all services, we prefer to have this small stand-alone crate for\n//! them instead.\n//!\n//! Be mindful in what you add here, as these types are deeply ingrained in the APIs.\n\npub mod constants;\npub mod forknum;\n\npub type Oid = u32;\npub type RepOriginId = u16;\npub type TimestampTz = i64;\n"
  },
  {
    "path": "libs/postgres_initdb/Cargo.toml",
    "content": "[package]\nname = \"postgres_initdb\"\nversion = \"0.1.0\"\nedition.workspace = true\nlicense.workspace = true\n\n[dependencies]\nanyhow.workspace = true\ntokio.workspace = true\ncamino.workspace = true\nthiserror.workspace = true\npostgres_versioninfo.workspace = true\nworkspace_hack = { version = \"0.1\", path = \"../../workspace_hack\" }\n"
  },
  {
    "path": "libs/postgres_initdb/src/lib.rs",
    "content": "//! The canonical way we run `initdb` in Neon.\n//!\n//! initdb has implicit defaults that are dependent on the environment, e.g., locales & collations.\n//!\n//! This module's job is to eliminate the environment-dependence as much as possible.\n\nuse std::fmt;\n\nuse camino::Utf8Path;\nuse postgres_versioninfo::PgMajorVersion;\n\npub struct RunInitdbArgs<'a> {\n    pub superuser: &'a str,\n    pub locale: &'a str,\n    pub initdb_bin: &'a Utf8Path,\n    pub pg_version: PgMajorVersion,\n    pub library_search_path: &'a Utf8Path,\n    pub pgdata: &'a Utf8Path,\n}\n\n#[derive(thiserror::Error, Debug)]\npub enum Error {\n    Spawn(std::io::Error),\n    Failed {\n        status: std::process::ExitStatus,\n        stderr: Vec<u8>,\n    },\n    WaitOutput(std::io::Error),\n    Other(anyhow::Error),\n}\n\nimpl fmt::Display for Error {\n    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {\n        match self {\n            Error::Spawn(e) => write!(f, \"Error spawning command: {e:?}\"),\n            Error::Failed { status, stderr } => write!(\n                f,\n                \"Command failed with status {:?}: {}\",\n                status,\n                String::from_utf8_lossy(stderr)\n            ),\n            Error::WaitOutput(e) => write!(f, \"Error waiting for command output: {e:?}\"),\n            Error::Other(e) => write!(f, \"Error: {e:?}\"),\n        }\n    }\n}\n\npub async fn do_run_initdb(args: RunInitdbArgs<'_>) -> Result<(), Error> {\n    let RunInitdbArgs {\n        superuser,\n        locale,\n        initdb_bin: initdb_bin_path,\n        pg_version,\n        library_search_path,\n        pgdata,\n    } = args;\n    let mut initdb_command = tokio::process::Command::new(initdb_bin_path);\n    initdb_command\n        .args([\"--pgdata\", pgdata.as_ref()])\n        .args([\"--username\", superuser])\n        .args([\"--encoding\", \"utf8\"])\n        .args([\"--locale\", locale])\n        .arg(\"--no-instructions\")\n        .arg(\"--no-sync\")\n        .env_clear()\n        .env(\"LD_LIBRARY_PATH\", library_search_path)\n        .env(\"DYLD_LIBRARY_PATH\", library_search_path)\n        .env(\n            \"ASAN_OPTIONS\",\n            std::env::var(\"ASAN_OPTIONS\").unwrap_or_default(),\n        )\n        .env(\n            \"UBSAN_OPTIONS\",\n            std::env::var(\"UBSAN_OPTIONS\").unwrap_or_default(),\n        )\n        .stdin(std::process::Stdio::null())\n        // stdout invocation produces the same output every time, we don't need it\n        .stdout(std::process::Stdio::null())\n        // we would be interested in the stderr output, if there was any\n        .stderr(std::process::Stdio::piped());\n\n    // Before version 14, only the libc provide was available.\n    if pg_version > PgMajorVersion::PG14 {\n        // Version 17 brought with it a builtin locale provider which only provides\n        // C and C.UTF-8. While being safer for collation purposes since it is\n        // guaranteed to be consistent throughout a major release, it is also more\n        // performant.\n        let locale_provider = if pg_version >= PgMajorVersion::PG17 {\n            \"builtin\"\n        } else {\n            \"libc\"\n        };\n\n        initdb_command.args([\"--locale-provider\", locale_provider]);\n    }\n\n    let initdb_proc = initdb_command.spawn().map_err(Error::Spawn)?;\n\n    // Ideally we'd select here with the cancellation token, but the problem is that\n    // we can't safely terminate initdb: it launches processes of its own, and killing\n    // initdb doesn't kill them. After we return from this function, we want the target\n    // directory to be able to be cleaned up.\n    // See https://github.com/neondatabase/neon/issues/6385\n    let initdb_output = initdb_proc\n        .wait_with_output()\n        .await\n        .map_err(Error::WaitOutput)?;\n    if !initdb_output.status.success() {\n        return Err(Error::Failed {\n            status: initdb_output.status,\n            stderr: initdb_output.stderr,\n        });\n    }\n\n    Ok(())\n}\n"
  },
  {
    "path": "libs/postgres_versioninfo/Cargo.toml",
    "content": "[package]\nname = \"postgres_versioninfo\"\nversion = \"0.1.0\"\nedition = \"2024\"\nlicense.workspace = true\n\n[dependencies]\nanyhow.workspace = true\nthiserror.workspace = true\nserde.workspace = true\nserde_repr.workspace = true\nworkspace_hack = { version = \"0.1\", path = \"../../workspace_hack\" }\n"
  },
  {
    "path": "libs/postgres_versioninfo/src/lib.rs",
    "content": "use serde::{Deserialize, Deserializer, Serialize, Serializer};\nuse serde_repr::{Deserialize_repr, Serialize_repr};\nuse std::fmt::{Display, Formatter};\nuse std::str::FromStr;\n\n/// An enum with one variant for each major version of PostgreSQL that we support.\n///\n#[derive(Debug, Clone, Copy, Ord, PartialOrd, Eq, PartialEq, Deserialize_repr, Serialize_repr)]\n#[repr(u32)]\npub enum PgMajorVersion {\n    PG14 = 14,\n    PG15 = 15,\n    PG16 = 16,\n    PG17 = 17,\n    // !!! When you add a new PgMajorVersion, don't forget to update PgMajorVersion::ALL\n}\n\n/// A full PostgreSQL version ID, in MMmmbb numerical format (Major/minor/bugfix)\n#[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq)]\n#[repr(transparent)]\npub struct PgVersionId(u32);\n\nimpl PgVersionId {\n    pub const UNKNOWN: PgVersionId = PgVersionId(0);\n\n    pub fn from_full_pg_version(version: u32) -> PgVersionId {\n        match version {\n            0 => PgVersionId(version), // unknown version\n            140000..180000 => PgVersionId(version),\n            _ => panic!(\"Invalid full PostgreSQL version ID {version}\"),\n        }\n    }\n}\n\nimpl Display for PgVersionId {\n    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {\n        u32::fmt(&self.0, f)\n    }\n}\n\nimpl Serialize for PgVersionId {\n    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>\n    where\n        S: Serializer,\n    {\n        u32::serialize(&self.0, serializer)\n    }\n}\n\nimpl<'de> Deserialize<'de> for PgVersionId {\n    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>\n    where\n        D: Deserializer<'de>,\n    {\n        u32::deserialize(deserializer).map(PgVersionId)\n    }\n\n    fn deserialize_in_place<D>(deserializer: D, place: &mut Self) -> Result<(), D::Error>\n    where\n        D: Deserializer<'de>,\n    {\n        u32::deserialize_in_place(deserializer, &mut place.0)\n    }\n}\n\nimpl PgMajorVersion {\n    /// Get the numerical representation of the represented Major Version\n    pub const fn major_version_num(&self) -> u32 {\n        match self {\n            PgMajorVersion::PG14 => 14,\n            PgMajorVersion::PG15 => 15,\n            PgMajorVersion::PG16 => 16,\n            PgMajorVersion::PG17 => 17,\n        }\n    }\n\n    /// Get the contents of this version's PG_VERSION file.\n    ///\n    /// The PG_VERSION file is used to determine the PostgreSQL version that currently\n    /// owns the data in a PostgreSQL data directory.\n    pub fn versionfile_string(&self) -> &'static str {\n        match self {\n            PgMajorVersion::PG14 => \"14\",\n            PgMajorVersion::PG15 => \"15\",\n            PgMajorVersion::PG16 => \"16\\x0A\",\n            PgMajorVersion::PG17 => \"17\\x0A\",\n        }\n    }\n\n    /// Get the v{version} string of this major PostgreSQL version.\n    ///\n    /// Because this was hand-coded in various places, this was moved into a shared\n    /// implementation.\n    pub fn v_str(&self) -> String {\n        match self {\n            PgMajorVersion::PG14 => \"v14\",\n            PgMajorVersion::PG15 => \"v15\",\n            PgMajorVersion::PG16 => \"v16\",\n            PgMajorVersion::PG17 => \"v17\",\n        }\n        .to_string()\n    }\n\n    /// All currently supported major versions of PostgreSQL.\n    pub const ALL: &'static [PgMajorVersion] = &[\n        PgMajorVersion::PG14,\n        PgMajorVersion::PG15,\n        PgMajorVersion::PG16,\n        PgMajorVersion::PG17,\n    ];\n}\n\nimpl Display for PgMajorVersion {\n    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {\n        f.write_str(match self {\n            PgMajorVersion::PG14 => \"PgMajorVersion::PG14\",\n            PgMajorVersion::PG15 => \"PgMajorVersion::PG15\",\n            PgMajorVersion::PG16 => \"PgMajorVersion::PG16\",\n            PgMajorVersion::PG17 => \"PgMajorVersion::PG17\",\n        })\n    }\n}\n\n#[derive(Debug, thiserror::Error)]\n#[allow(dead_code)]\npub struct InvalidPgVersion(u32);\n\nimpl Display for InvalidPgVersion {\n    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {\n        write!(f, \"InvalidPgVersion({})\", self.0)\n    }\n}\n\nimpl TryFrom<PgVersionId> for PgMajorVersion {\n    type Error = InvalidPgVersion;\n\n    fn try_from(value: PgVersionId) -> Result<Self, Self::Error> {\n        Ok(match value.0 / 10000 {\n            14 => PgMajorVersion::PG14,\n            15 => PgMajorVersion::PG15,\n            16 => PgMajorVersion::PG16,\n            17 => PgMajorVersion::PG17,\n            _ => return Err(InvalidPgVersion(value.0)),\n        })\n    }\n}\n\nimpl From<PgMajorVersion> for PgVersionId {\n    fn from(value: PgMajorVersion) -> Self {\n        PgVersionId((value as u32) * 10000)\n    }\n}\n\n#[derive(Debug, PartialEq, Eq, thiserror::Error)]\npub struct PgMajorVersionParseError(String);\n\nimpl Display for PgMajorVersionParseError {\n    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {\n        write!(f, \"PgMajorVersionParseError({})\", self.0)\n    }\n}\n\nimpl FromStr for PgMajorVersion {\n    type Err = PgMajorVersionParseError;\n\n    fn from_str(s: &str) -> Result<Self, Self::Err> {\n        Ok(match s {\n            \"14\" => PgMajorVersion::PG14,\n            \"15\" => PgMajorVersion::PG15,\n            \"16\" => PgMajorVersion::PG16,\n            \"17\" => PgMajorVersion::PG17,\n            _ => return Err(PgMajorVersionParseError(s.to_string())),\n        })\n    }\n}\n"
  },
  {
    "path": "libs/posthog_client_lite/Cargo.toml",
    "content": "[package]\nname = \"posthog_client_lite\"\nversion = \"0.1.0\"\nedition = \"2024\"\nlicense.workspace = true\n\n[dependencies]\nanyhow.workspace = true\narc-swap.workspace = true\nreqwest.workspace = true\nserde_json.workspace = true\nserde.workspace = true\nsha2.workspace = true\nthiserror.workspace = true\ntokio = { workspace = true, features = [\"process\", \"sync\", \"fs\", \"rt\", \"io-util\", \"time\"] }\ntokio-util.workspace = true\ntracing-utils.workspace = true\ntracing.workspace = true\nworkspace_hack.workspace = true\n"
  },
  {
    "path": "libs/posthog_client_lite/src/background_loop.rs",
    "content": "//! A background loop that fetches feature flags from PostHog and updates the feature store.\n\nuse std::{\n    sync::Arc,\n    time::{Duration, SystemTime},\n};\n\nuse arc_swap::ArcSwap;\nuse tokio_util::sync::CancellationToken;\nuse tracing::{Instrument, info_span};\n\nuse crate::{\n    CaptureEvent, FeatureStore, LocalEvaluationResponse, PostHogClient, PostHogClientConfig,\n};\n\n/// A background loop that fetches feature flags from PostHog and updates the feature store.\npub struct FeatureResolverBackgroundLoop {\n    posthog_client: PostHogClient,\n    feature_store: ArcSwap<(SystemTime, Arc<FeatureStore>)>,\n    cancel: CancellationToken,\n}\n\nimpl FeatureResolverBackgroundLoop {\n    pub fn new(config: PostHogClientConfig, shutdown_pageserver: CancellationToken) -> Self {\n        Self {\n            posthog_client: PostHogClient::new(config),\n            feature_store: ArcSwap::new(Arc::new((\n                SystemTime::UNIX_EPOCH,\n                Arc::new(FeatureStore::new()),\n            ))),\n            cancel: shutdown_pageserver,\n        }\n    }\n\n    /// Update the feature store with a new feature flag spec bypassing the normal refresh loop.\n    pub fn update(&self, spec: String) -> anyhow::Result<()> {\n        let resp: LocalEvaluationResponse = serde_json::from_str(&spec)?;\n        self.update_feature_store_nofail(resp, \"http_propagate\");\n        Ok(())\n    }\n\n    fn update_feature_store_nofail(&self, resp: LocalEvaluationResponse, source: &'static str) {\n        let project_id = self.posthog_client.config.project_id.parse::<u64>().ok();\n        match FeatureStore::new_with_flags(resp.flags, project_id) {\n            Ok(feature_store) => {\n                self.feature_store\n                    .store(Arc::new((SystemTime::now(), Arc::new(feature_store))));\n                tracing::info!(\"Feature flag updated from {}\", source);\n            }\n            Err(e) => {\n                tracing::warn!(\"Cannot process feature flag spec from {}: {}\", source, e);\n            }\n        }\n    }\n\n    pub fn spawn(\n        self: Arc<Self>,\n        handle: &tokio::runtime::Handle,\n        refresh_period: Duration,\n        fake_tenants: Vec<CaptureEvent>,\n    ) {\n        let this = self.clone();\n        let cancel = self.cancel.clone();\n\n        // Main loop of updating the feature flags.\n        handle.spawn(\n            async move {\n                tracing::info!(\n                    \"Starting PostHog feature resolver with refresh period: {:?}\",\n                    refresh_period\n                );\n                let mut ticker = tokio::time::interval(refresh_period);\n                ticker.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Skip);\n                loop {\n                    tokio::select! {\n                        _ = ticker.tick() => {}\n                        _ = cancel.cancelled() => break\n                    }\n                    {\n                        let last_update = this.feature_store.load().0;\n                        if let Ok(elapsed) = last_update.elapsed() {\n                            if elapsed < refresh_period {\n                                tracing::debug!(\n                                    \"Skipping feature flag refresh because it's too soon\"\n                                );\n                                continue;\n                            }\n                        }\n                    }\n                    let resp = match this\n                        .posthog_client\n                        .get_feature_flags_local_evaluation()\n                        .await\n                    {\n                        Ok(resp) => resp,\n                        Err(e) => {\n                            tracing::warn!(\"Cannot get feature flags: {}\", e);\n                            continue;\n                        }\n                    };\n                    this.update_feature_store_nofail(resp, \"refresh_loop\");\n                }\n                tracing::info!(\"PostHog feature resolver stopped\");\n            }\n            .instrument(info_span!(\"posthog_feature_resolver\")),\n        );\n\n        // Report fake tenants to PostHog so that we have the combination of all the properties in the UI.\n        // Do one report per pageserver restart.\n        let this = self.clone();\n        handle.spawn(\n            async move {\n                tracing::info!(\"Starting PostHog feature reporter\");\n                for tenant in &fake_tenants {\n                    tracing::info!(\"Reporting fake tenant: {:?}\", tenant);\n                }\n                if let Err(e) = this.posthog_client.capture_event_batch(&fake_tenants).await {\n                    tracing::warn!(\"Cannot report fake tenants: {}\", e);\n                }\n            }\n            .instrument(info_span!(\"posthog_feature_reporter\")),\n        );\n    }\n\n    pub fn feature_store(&self) -> Arc<FeatureStore> {\n        self.feature_store.load().1.clone()\n    }\n}\n"
  },
  {
    "path": "libs/posthog_client_lite/src/lib.rs",
    "content": "//! A lite version of the PostHog client that only supports local evaluation of feature flags.\n\nmod background_loop;\n\npub use background_loop::FeatureResolverBackgroundLoop;\n\nuse std::collections::HashMap;\n\nuse serde::{Deserialize, Serialize};\nuse serde_json::json;\nuse sha2::Digest;\n\n#[derive(Debug, thiserror::Error)]\npub enum PostHogEvaluationError {\n    /// The feature flag is not available, for example, because the local evaluation data is not populated yet.\n    #[error(\"Feature flag not available: {0}\")]\n    NotAvailable(String),\n    #[error(\"No condition group is matched\")]\n    NoConditionGroupMatched,\n    /// Real errors, e.g., the rollout percentage does not add up to 100.\n    #[error(\"Failed to evaluate feature flag: {0}\")]\n    Internal(String),\n}\n\nimpl PostHogEvaluationError {\n    pub fn as_variant_str(&self) -> &'static str {\n        match self {\n            PostHogEvaluationError::NotAvailable(_) => \"not_available\",\n            PostHogEvaluationError::NoConditionGroupMatched => \"no_condition_group_matched\",\n            PostHogEvaluationError::Internal(_) => \"internal\",\n        }\n    }\n}\n\n#[derive(Deserialize)]\npub struct LocalEvaluationResponse {\n    pub flags: Vec<LocalEvaluationFlag>,\n}\n\n#[derive(Deserialize)]\npub struct LocalEvaluationFlag {\n    #[allow(dead_code)]\n    id: u64,\n    team_id: u64,\n    key: String,\n    filters: LocalEvaluationFlagFilters,\n    active: bool,\n}\n\n#[derive(Deserialize)]\npub struct LocalEvaluationFlagFilters {\n    groups: Vec<LocalEvaluationFlagFilterGroup>,\n    multivariate: Option<LocalEvaluationFlagMultivariate>,\n}\n\n#[derive(Deserialize)]\npub struct LocalEvaluationFlagFilterGroup {\n    variant: Option<String>,\n    properties: Option<Vec<LocalEvaluationFlagFilterProperty>>,\n    rollout_percentage: i64,\n}\n\n#[derive(Deserialize)]\npub struct LocalEvaluationFlagFilterProperty {\n    key: String,\n    value: PostHogFlagFilterPropertyValue,\n    operator: String,\n}\n\n#[derive(Debug, Serialize, Deserialize, Clone)]\n#[serde(untagged)]\npub enum PostHogFlagFilterPropertyValue {\n    String(String),\n    Number(f64),\n    Boolean(bool),\n    List(Vec<String>),\n}\n\n#[derive(Deserialize)]\npub struct LocalEvaluationFlagMultivariate {\n    variants: Vec<LocalEvaluationFlagMultivariateVariant>,\n}\n\n#[derive(Deserialize)]\npub struct LocalEvaluationFlagMultivariateVariant {\n    key: String,\n    rollout_percentage: i64,\n}\n\npub struct FeatureStore {\n    flags: HashMap<String, LocalEvaluationFlag>,\n}\n\nimpl Default for FeatureStore {\n    fn default() -> Self {\n        Self::new()\n    }\n}\n\nenum GroupEvaluationResult {\n    MatchedAndOverride(String),\n    MatchedAndEvaluate,\n    Unmatched,\n}\n\nimpl FeatureStore {\n    pub fn new() -> Self {\n        Self {\n            flags: HashMap::new(),\n        }\n    }\n\n    pub fn new_with_flags(\n        flags: Vec<LocalEvaluationFlag>,\n        project_id: Option<u64>,\n    ) -> Result<Self, &'static str> {\n        let mut store = Self::new();\n        store.set_flags(flags, project_id)?;\n        Ok(store)\n    }\n\n    pub fn set_flags(\n        &mut self,\n        flags: Vec<LocalEvaluationFlag>,\n        project_id: Option<u64>,\n    ) -> Result<(), &'static str> {\n        self.flags.clear();\n        for flag in flags {\n            if let Some(project_id) = project_id {\n                if flag.team_id != project_id {\n                    return Err(\n                        \"Retrieved a spec with different project id, wrong config? Discarding the feature flags.\",\n                    );\n                }\n            }\n            self.flags.insert(flag.key.clone(), flag);\n        }\n        Ok(())\n    }\n\n    /// Generate a consistent hash for a user ID (e.g., tenant ID).\n    ///\n    /// The implementation is different from PostHog SDK. In PostHog SDK, it is sha1 of `user_id.distinct_id.salt`.\n    /// However, as we do not upload all of our tenant IDs to PostHog, we do not have the PostHog distinct_id for a\n    /// tenant. Therefore, the way we compute it is sha256 of `user_id.feature_id.salt`.\n    fn consistent_hash(user_id: &str, flag_key: &str, salt: &str) -> f64 {\n        let mut hasher = sha2::Sha256::new();\n        hasher.update(user_id);\n        hasher.update(\".\");\n        hasher.update(flag_key);\n        hasher.update(\".\");\n        hasher.update(salt);\n        let hash = hasher.finalize();\n        let hash_int = u64::from_le_bytes(hash[..8].try_into().unwrap());\n        hash_int as f64 / u64::MAX as f64\n    }\n\n    /// Evaluate a condition. Returns an error if the condition cannot be evaluated due to parsing error or missing\n    /// property.\n    fn evaluate_condition(\n        &self,\n        operator: &str,\n        provided: &PostHogFlagFilterPropertyValue,\n        requested: &PostHogFlagFilterPropertyValue,\n    ) -> Result<bool, PostHogEvaluationError> {\n        match operator {\n            \"exact\" => {\n                let PostHogFlagFilterPropertyValue::String(provided) = provided else {\n                    // Left should be a string\n                    return Err(PostHogEvaluationError::Internal(format!(\n                        \"The left side of the condition is not a string: {provided:?}\"\n                    )));\n                };\n                let PostHogFlagFilterPropertyValue::List(requested) = requested else {\n                    // Right should be a list of string\n                    return Err(PostHogEvaluationError::Internal(format!(\n                        \"The right side of the condition is not a list: {requested:?}\"\n                    )));\n                };\n                Ok(requested.contains(provided))\n            }\n            \"lt\" | \"gt\" => {\n                let PostHogFlagFilterPropertyValue::String(requested) = requested else {\n                    // Right should be a string\n                    return Err(PostHogEvaluationError::Internal(format!(\n                        \"The right side of the condition is not a string: {requested:?}\"\n                    )));\n                };\n                let Ok(requested) = requested.parse::<f64>() else {\n                    return Err(PostHogEvaluationError::Internal(format!(\n                        \"Can not parse the right side of the condition as a number: {requested:?}\"\n                    )));\n                };\n                // Left can either be a number or a string\n                let provided = match provided {\n                    PostHogFlagFilterPropertyValue::Number(provided) => *provided,\n                    PostHogFlagFilterPropertyValue::String(provided) => {\n                        let Ok(provided) = provided.parse::<f64>() else {\n                            return Err(PostHogEvaluationError::Internal(format!(\n                                \"Can not parse the left side of the condition as a number: {provided:?}\"\n                            )));\n                        };\n                        provided\n                    }\n                    _ => {\n                        return Err(PostHogEvaluationError::Internal(format!(\n                            \"The left side of the condition is not a number or a string: {provided:?}\"\n                        )));\n                    }\n                };\n                match operator {\n                    \"lt\" => Ok(provided < requested),\n                    \"gt\" => Ok(provided > requested),\n                    op => Err(PostHogEvaluationError::Internal(format!(\n                        \"Unsupported operator: {op}\"\n                    ))),\n                }\n            }\n            _ => Err(PostHogEvaluationError::Internal(format!(\n                \"Unsupported operator: {operator}\"\n            ))),\n        }\n    }\n\n    /// Evaluate a percentage.\n    fn evaluate_percentage(&self, mapped_user_id: f64, percentage: i64) -> bool {\n        mapped_user_id <= percentage as f64 / 100.0\n    }\n\n    /// Evaluate a filter group for a feature flag. Returns an error if there are errors during the evaluation.\n    ///\n    /// Return values:\n    /// Ok(GroupEvaluationResult::MatchedAndOverride(variant)): matched and evaluated to this value\n    /// Ok(GroupEvaluationResult::MatchedAndEvaluate): condition matched but no variant override, use the global rollout percentage\n    /// Ok(GroupEvaluationResult::Unmatched): condition unmatched\n    fn evaluate_group(\n        &self,\n        group: &LocalEvaluationFlagFilterGroup,\n        hash_on_group_rollout_percentage: f64,\n        provided_properties: &HashMap<String, PostHogFlagFilterPropertyValue>,\n    ) -> Result<GroupEvaluationResult, PostHogEvaluationError> {\n        if let Some(ref properties) = group.properties {\n            for property in properties {\n                if let Some(value) = provided_properties.get(&property.key) {\n                    // The user provided the property value\n                    if !self.evaluate_condition(\n                        property.operator.as_ref(),\n                        value,\n                        &property.value,\n                    )? {\n                        return Ok(GroupEvaluationResult::Unmatched);\n                    }\n                } else {\n                    // We cannot evaluate, the property is not available\n                    return Err(PostHogEvaluationError::NotAvailable(format!(\n                        \"The required property in the condition is not available: {}\",\n                        property.key\n                    )));\n                }\n            }\n        }\n\n        // The group has no condition matchers or we matched the properties\n        if self.evaluate_percentage(hash_on_group_rollout_percentage, group.rollout_percentage) {\n            if let Some(ref variant_override) = group.variant {\n                Ok(GroupEvaluationResult::MatchedAndOverride(\n                    variant_override.clone(),\n                ))\n            } else {\n                Ok(GroupEvaluationResult::MatchedAndEvaluate)\n            }\n        } else {\n            Ok(GroupEvaluationResult::Unmatched)\n        }\n    }\n\n    /// Evaluate a multivariate feature flag. Returns an error if the flag is not available or if there are errors\n    /// during the evaluation.\n    ///\n    /// The parsing logic is as follows:\n    ///\n    /// * Match each filter group.\n    ///   - If a group is matched, it will first determine whether the user is in the range of the group's rollout\n    ///     percentage. We will generate a consistent hash for the user ID on the group rollout percentage. This hash\n    ///     is shared across all groups.\n    ///   - If the hash falls within the group's rollout percentage, return the variant if it's overridden, or\n    ///   - Evaluate the variant using the global config and the global rollout percentage.\n    /// * Otherwise, continue with the next group until all groups are evaluated and no group is within the\n    ///   rollout percentage.\n    /// * If there are no matching groups, return an error.\n    ///\n    /// Example: we have a multivariate flag with 3 groups of the configured global rollout percentage: A (10%), B (20%), C (70%).\n    /// There is a single group with a condition that has a rollout percentage of 10% and it does not have a variant override.\n    /// Then, we will have 1% of the users evaluated to A, 2% to B, and 7% to C.\n    ///\n    /// Error handling: the caller should inspect the error and decide the behavior when a feature flag\n    /// cannot be evaluated (i.e., default to false if it cannot be resolved). The error should *not* be\n    /// propagated beyond where the feature flag gets resolved.\n    pub fn evaluate_multivariate(\n        &self,\n        flag_key: &str,\n        user_id: &str,\n        properties: &HashMap<String, PostHogFlagFilterPropertyValue>,\n    ) -> Result<String, PostHogEvaluationError> {\n        let hash_on_global_rollout_percentage =\n            Self::consistent_hash(user_id, flag_key, \"multivariate\");\n        let hash_on_group_rollout_percentage =\n            Self::consistent_hash(user_id, flag_key, \"within_group\");\n        self.evaluate_multivariate_inner(\n            flag_key,\n            hash_on_global_rollout_percentage,\n            hash_on_group_rollout_percentage,\n            properties,\n        )\n    }\n\n    /// Evaluate a boolean feature flag. Returns  an error if the flag is not available or if there are errors\n    /// during the evaluation.\n    ///\n    /// The parsing logic is as follows:\n    ///\n    /// * Generate a consistent hash for the tenant-feature.\n    /// * Match each filter group.\n    ///   - If a group is matched, it will first determine whether the user is in the range of the rollout\n    ///     percentage.\n    ///   - If the hash falls within the group's rollout percentage, return true.\n    /// * Otherwise, continue with the next group until all groups are evaluated and no group is within the\n    ///   rollout percentage.\n    /// * If there are no matching groups, return an error.\n    ///\n    /// Returns `Ok(())` if the feature flag evaluates to true. In the future, it will return a payload.\n    ///\n    /// Error handling: the caller should inspect the error and decide the behavior when a feature flag\n    /// cannot be evaluated (i.e., default to false if it cannot be resolved). The error should *not* be\n    /// propagated beyond where the feature flag gets resolved.\n    pub fn evaluate_boolean(\n        &self,\n        flag_key: &str,\n        user_id: &str,\n        properties: &HashMap<String, PostHogFlagFilterPropertyValue>,\n    ) -> Result<(), PostHogEvaluationError> {\n        let hash_on_global_rollout_percentage = Self::consistent_hash(user_id, flag_key, \"boolean\");\n        self.evaluate_boolean_inner(flag_key, hash_on_global_rollout_percentage, properties)\n    }\n\n    /// Evaluate a multivariate feature flag. Note that we directly take the mapped user ID\n    /// (a consistent hash ranging from 0 to 1) so that it is easier to use it in the tests\n    /// and avoid duplicate computations.\n    ///\n    /// Use a different consistent hash for evaluating the group rollout percentage.\n    /// The behavior: if the condition is set to rolling out to 10% of the users, and\n    /// we set the variant A to 20% in the global config, then 2% of the total users will\n    /// be evaluated to variant A.\n    ///\n    /// Note that the hash to determine group rollout percentage is shared across all groups. So if we have two\n    /// exactly-the-same conditions with 10% and 20% rollout percentage respectively, a total of 20% of the users\n    /// will be evaluated (versus 30% if group evaluation is done independently).\n    pub(crate) fn evaluate_multivariate_inner(\n        &self,\n        flag_key: &str,\n        hash_on_global_rollout_percentage: f64,\n        hash_on_group_rollout_percentage: f64,\n        properties: &HashMap<String, PostHogFlagFilterPropertyValue>,\n    ) -> Result<String, PostHogEvaluationError> {\n        if let Some(flag_config) = self.flags.get(flag_key) {\n            if !flag_config.active {\n                return Err(PostHogEvaluationError::NotAvailable(format!(\n                    \"The feature flag is not active: {flag_key}\"\n                )));\n            }\n            let Some(ref multivariate) = flag_config.filters.multivariate else {\n                return Err(PostHogEvaluationError::Internal(format!(\n                    \"No multivariate available, should use evaluate_boolean?: {flag_key}\"\n                )));\n            };\n            // TODO: sort the groups so that variant overrides always get evaluated first and it follows the PostHog\n            // Python SDK behavior; for now we do not configure conditions without variant overrides in Neon so it\n            // does not matter.\n            for group in &flag_config.filters.groups {\n                match self.evaluate_group(group, hash_on_group_rollout_percentage, properties)? {\n                    GroupEvaluationResult::MatchedAndOverride(variant) => return Ok(variant),\n                    GroupEvaluationResult::MatchedAndEvaluate => {\n                        let mut percentage = 0;\n                        for variant in &multivariate.variants {\n                            percentage += variant.rollout_percentage;\n                            if self\n                                .evaluate_percentage(hash_on_global_rollout_percentage, percentage)\n                            {\n                                return Ok(variant.key.clone());\n                            }\n                        }\n                        // This should not happen because the rollout percentage always adds up to 100, but just in case that PostHog\n                        // returned invalid spec, we return an error.\n                        return Err(PostHogEvaluationError::Internal(format!(\n                            \"Rollout percentage does not add up to 100: {flag_key}\"\n                        )));\n                    }\n                    GroupEvaluationResult::Unmatched => continue,\n                }\n            }\n            // If no group is matched, the feature is not available, and up to the caller to decide what to do.\n            Err(PostHogEvaluationError::NoConditionGroupMatched)\n        } else {\n            // The feature flag is not available yet\n            Err(PostHogEvaluationError::NotAvailable(format!(\n                \"Not found in the local evaluation spec: {flag_key}\"\n            )))\n        }\n    }\n\n    /// Evaluate a multivariate feature flag. Note that we directly take the mapped user ID\n    /// (a consistent hash ranging from 0 to 1) so that it is easier to use it in the tests\n    /// and avoid duplicate computations.\n    ///\n    /// Use a different consistent hash for evaluating the group rollout percentage.\n    /// The behavior: if the condition is set to rolling out to 10% of the users, and\n    /// we set the variant A to 20% in the global config, then 2% of the total users will\n    /// be evaluated to variant A.\n    ///\n    /// Note that the hash to determine group rollout percentage is shared across all groups. So if we have two\n    /// exactly-the-same conditions with 10% and 20% rollout percentage respectively, a total of 20% of the users\n    /// will be evaluated (versus 30% if group evaluation is done independently).\n    pub(crate) fn evaluate_boolean_inner(\n        &self,\n        flag_key: &str,\n        hash_on_global_rollout_percentage: f64,\n        properties: &HashMap<String, PostHogFlagFilterPropertyValue>,\n    ) -> Result<(), PostHogEvaluationError> {\n        if let Some(flag_config) = self.flags.get(flag_key) {\n            if !flag_config.active {\n                return Err(PostHogEvaluationError::NotAvailable(format!(\n                    \"The feature flag is not active: {flag_key}\"\n                )));\n            }\n            if flag_config.filters.multivariate.is_some() {\n                return Err(PostHogEvaluationError::Internal(format!(\n                    \"This looks like a multivariate flag, should use evaluate_multivariate?: {flag_key}\"\n                )));\n            };\n            // TODO: sort the groups so that variant overrides always get evaluated first and it follows the PostHog\n            // Python SDK behavior; for now we do not configure conditions without variant overrides in Neon so it\n            // does not matter.\n            for group in &flag_config.filters.groups {\n                match self.evaluate_group(group, hash_on_global_rollout_percentage, properties)? {\n                    GroupEvaluationResult::MatchedAndOverride(_) => {\n                        return Err(PostHogEvaluationError::Internal(format!(\n                            \"Boolean flag cannot have overrides: {flag_key}\"\n                        )));\n                    }\n                    GroupEvaluationResult::MatchedAndEvaluate => {\n                        return Ok(());\n                    }\n                    GroupEvaluationResult::Unmatched => continue,\n                }\n            }\n            // If no group is matched, the feature is not available, and up to the caller to decide what to do.\n            Err(PostHogEvaluationError::NoConditionGroupMatched)\n        } else {\n            // The feature flag is not available yet\n            Err(PostHogEvaluationError::NotAvailable(format!(\n                \"Not found in the local evaluation spec: {flag_key}\"\n            )))\n        }\n    }\n\n    /// Infer whether a feature flag is a boolean flag by checking if it has a multivariate filter.\n    pub fn is_feature_flag_boolean(&self, flag_key: &str) -> Result<bool, PostHogEvaluationError> {\n        if let Some(flag_config) = self.flags.get(flag_key) {\n            Ok(flag_config.filters.multivariate.is_none())\n        } else {\n            Err(PostHogEvaluationError::NotAvailable(format!(\n                \"Not found in the local evaluation spec: {flag_key}\"\n            )))\n        }\n    }\n}\n\npub struct PostHogClientConfig {\n    /// The server API key.\n    pub server_api_key: String,\n    /// The client API key.\n    pub client_api_key: String,\n    /// The project ID.\n    pub project_id: String,\n    /// The private API URL.\n    pub private_api_url: String,\n    /// The public API URL.\n    pub public_api_url: String,\n}\n\n/// A lite PostHog client.\n///\n/// At the point of writing this code, PostHog does not have a functional Rust client with feature flag support.\n/// This is a lite version that only supports local evaluation of feature flags and only supports those JSON specs\n/// that will be used within Neon.\n///\n/// PostHog is designed as a browser-server system: the browser (client) side uses the client key and is exposed\n/// to the end users; the server side uses a server key and is not exposed to the end users. The client and the\n/// server has different API keys and provide a different set of APIs. In Neon, we only have the server (that is\n/// pageserver), and it will use both the client API and the server API. So we need to store two API keys within\n/// our PostHog client.\n///\n/// The server API is used to fetch the feature flag specs. The client API is used to capture events in case we\n/// want to report the feature flag usage back to PostHog. The current plan is to use PostHog only as an UI to\n/// configure feature flags so it is very likely that the client API will not be used.\npub struct PostHogClient {\n    /// The config.\n    config: PostHogClientConfig,\n    /// The HTTP client.\n    client: reqwest::Client,\n}\n\n#[derive(Serialize, Debug)]\npub struct CaptureEvent {\n    pub event: String,\n    pub distinct_id: String,\n    pub properties: serde_json::Value,\n}\n\nimpl PostHogClient {\n    pub fn new(config: PostHogClientConfig) -> Self {\n        let client = reqwest::Client::new();\n        Self { config, client }\n    }\n\n    pub fn new_with_us_region(\n        server_api_key: String,\n        client_api_key: String,\n        project_id: String,\n    ) -> Self {\n        Self::new(PostHogClientConfig {\n            server_api_key,\n            client_api_key,\n            project_id,\n            private_api_url: \"https://us.posthog.com\".to_string(),\n            public_api_url: \"https://us.i.posthog.com\".to_string(),\n        })\n    }\n\n    /// Check if the server API key is a feature flag secure API key. This key can only be\n    /// used to fetch the feature flag specs and can only be used on a undocumented API\n    /// endpoint.\n    fn is_feature_flag_secure_api_key(&self) -> bool {\n        self.config.server_api_key.starts_with(\"phs_\")\n    }\n\n    /// Get the raw JSON spec, same as `get_feature_flags_local_evaluation` but without parsing.\n    pub async fn get_feature_flags_local_evaluation_raw(&self) -> anyhow::Result<String> {\n        // BASE_URL/api/projects/:project_id/feature_flags/local_evaluation\n        // with bearer token of self.server_api_key\n        // OR\n        // BASE_URL/api/feature_flag/local_evaluation/\n        // with bearer token of feature flag specific self.server_api_key\n        let url = if self.is_feature_flag_secure_api_key() {\n            // The new feature local evaluation secure API token\n            format!(\n                \"{}/api/feature_flag/local_evaluation\",\n                self.config.private_api_url\n            )\n        } else {\n            // The old personal API token\n            format!(\n                \"{}/api/projects/{}/feature_flags/local_evaluation\",\n                self.config.private_api_url, self.config.project_id\n            )\n        };\n        let response = self\n            .client\n            .get(url)\n            .bearer_auth(&self.config.server_api_key)\n            .send()\n            .await?;\n        let status = response.status();\n        let body = response.text().await?;\n        if !status.is_success() {\n            return Err(anyhow::anyhow!(\n                \"Failed to get feature flags: {}, {}\",\n                status,\n                body\n            ));\n        }\n        Ok(body)\n    }\n\n    /// Fetch the feature flag specs from the server.\n    ///\n    /// This is unfortunately an undocumented API at:\n    /// - <https://posthog.com/docs/api/feature-flags#get-api-projects-project_id-feature_flags-local_evaluation>\n    /// - <https://posthog.com/docs/feature-flags/local-evaluation>\n    ///\n    /// The handling logic in [`FeatureStore`] mostly follows the Python API implementation.\n    /// See `_compute_flag_locally` in <https://github.com/PostHog/posthog-python/blob/master/posthog/client.py>\n    pub async fn get_feature_flags_local_evaluation(\n        &self,\n    ) -> Result<LocalEvaluationResponse, anyhow::Error> {\n        let raw = self.get_feature_flags_local_evaluation_raw().await?;\n        Ok(serde_json::from_str(&raw)?)\n    }\n\n    /// Capture an event. This will only be used to report the feature flag usage back to PostHog, though\n    /// it also support a lot of other functionalities.\n    ///\n    /// <https://posthog.com/docs/api/capture>\n    pub async fn capture_event(\n        &self,\n        event: &str,\n        distinct_id: &str,\n        properties: &serde_json::Value,\n    ) -> anyhow::Result<()> {\n        // PUBLIC_URL/capture/\n        let url = format!(\"{}/capture/\", self.config.public_api_url);\n        let response = self\n            .client\n            .post(url)\n            .body(serde_json::to_string(&json!({\n                \"api_key\": self.config.client_api_key,\n                \"distinct_id\": distinct_id,\n                \"event\": event,\n                \"properties\": properties,\n            }))?)\n            .send()\n            .await?;\n        let status = response.status();\n        let body = response.text().await?;\n        if !status.is_success() {\n            return Err(anyhow::anyhow!(\n                \"Failed to capture events: {}, {}\",\n                status,\n                body\n            ));\n        }\n        Ok(())\n    }\n\n    pub async fn capture_event_batch(&self, events: &[CaptureEvent]) -> anyhow::Result<()> {\n        // PUBLIC_URL/batch/\n        let url = format!(\"{}/batch/\", self.config.public_api_url);\n        let response = self\n            .client\n            .post(url)\n            .body(serde_json::to_string(&json!({\n                \"api_key\": self.config.client_api_key,\n                \"batch\": events,\n            }))?)\n            .send()\n            .await?;\n        let status = response.status();\n        let body = response.text().await?;\n        if !status.is_success() {\n            return Err(anyhow::anyhow!(\n                \"Failed to capture events: {}, {}\",\n                status,\n                body\n            ));\n        }\n        Ok(())\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use super::*;\n\n    fn data() -> &'static str {\n        r#\"{\n  \"flags\": [\n    {\n      \"id\": 141807,\n      \"team_id\": 152860,\n      \"name\": \"\",\n      \"key\": \"image-compaction-boundary\",\n      \"filters\": {\n        \"groups\": [\n          {\n            \"variant\": null,\n            \"properties\": [\n              {\n                \"key\": \"plan_type\",\n                \"type\": \"person\",\n                \"value\": [\n                  \"free\"\n                ],\n                \"operator\": \"exact\"\n              }\n            ],\n            \"rollout_percentage\": 40\n          },\n          {\n            \"variant\": null,\n            \"properties\": [],\n            \"rollout_percentage\": 10\n          }\n        ],\n        \"payloads\": {},\n        \"multivariate\": null\n      },\n      \"deleted\": false,\n      \"active\": true,\n      \"ensure_experience_continuity\": false,\n      \"has_encrypted_payloads\": false,\n      \"version\": 1\n    },\n    {\n      \"id\": 135586,\n      \"team_id\": 152860,\n      \"name\": \"\",\n      \"key\": \"boolean-flag\",\n      \"filters\": {\n        \"groups\": [\n          {\n            \"variant\": null,\n            \"properties\": [\n              {\n                \"key\": \"plan_type\",\n                \"type\": \"person\",\n                \"value\": [\n                  \"free\"\n                ],\n                \"operator\": \"exact\"\n              }\n            ],\n            \"rollout_percentage\": 47\n          }\n        ],\n        \"payloads\": {},\n        \"multivariate\": null\n      },\n      \"deleted\": false,\n      \"active\": true,\n      \"ensure_experience_continuity\": false,\n      \"has_encrypted_payloads\": false,\n      \"version\": 1\n    },\n    {\n      \"id\": 132794,\n      \"team_id\": 152860,\n      \"name\": \"\",\n      \"key\": \"gc-compaction\",\n      \"filters\": {\n        \"groups\": [\n          {\n            \"variant\": \"enabled-stage-2\",\n            \"properties\": [\n              {\n                \"key\": \"plan_type\",\n                \"type\": \"person\",\n                \"value\": [\n                  \"free\"\n                ],\n                \"operator\": \"exact\"\n              },\n              {\n                \"key\": \"pageserver_remote_size\",\n                \"type\": \"person\",\n                \"value\": \"10000000\",\n                \"operator\": \"lt\"\n              }\n            ],\n             \"rollout_percentage\": 50\n          },\n          {\n            \"properties\": [\n              {\n                \"key\": \"plan_type\",\n                \"type\": \"person\",\n                \"value\": [\n                  \"free\"\n                ],\n                \"operator\": \"exact\"\n              },\n              {\n                \"key\": \"pageserver_remote_size\",\n                \"type\": \"person\",\n                \"value\": \"10000000\",\n                \"operator\": \"lt\"\n              }\n            ],\n            \"rollout_percentage\": 80\n          }\n        ],\n        \"payloads\": {},\n        \"multivariate\": {\n          \"variants\": [\n            {\n              \"key\": \"disabled\",\n              \"name\": \"\",\n              \"rollout_percentage\": 90\n            },\n            {\n              \"key\": \"enabled-stage-1\",\n              \"name\": \"\",\n              \"rollout_percentage\": 10\n            },\n            {\n              \"key\": \"enabled-stage-2\",\n              \"name\": \"\",\n              \"rollout_percentage\": 0\n            },\n            {\n              \"key\": \"enabled-stage-3\",\n              \"name\": \"\",\n              \"rollout_percentage\": 0\n            },\n            {\n              \"key\": \"enabled\",\n              \"name\": \"\",\n              \"rollout_percentage\": 0\n            }\n          ]\n        }\n      },\n      \"deleted\": false,\n      \"active\": true,\n      \"ensure_experience_continuity\": false,\n      \"has_encrypted_payloads\": false,\n      \"version\": 7\n    }\n  ],\n  \"group_type_mapping\": {},\n  \"cohorts\": {}\n}\"#\n    }\n\n    #[test]\n    fn parse_local_evaluation() {\n        let data = data();\n        let _: LocalEvaluationResponse = serde_json::from_str(data).unwrap();\n    }\n\n    #[test]\n    fn evaluate_multivariate() {\n        let mut store = FeatureStore::new();\n        let response: LocalEvaluationResponse = serde_json::from_str(data()).unwrap();\n        store.set_flags(response.flags, None).unwrap();\n\n        // This lacks the required properties and cannot be evaluated.\n        let variant =\n            store.evaluate_multivariate_inner(\"gc-compaction\", 1.00, 0.40, &HashMap::new());\n        assert!(matches!(\n            variant,\n            Err(PostHogEvaluationError::NotAvailable(_))\n        ),);\n\n        let properties_unmatched = HashMap::from([\n            (\n                \"plan_type\".to_string(),\n                PostHogFlagFilterPropertyValue::String(\"paid\".to_string()),\n            ),\n            (\n                \"pageserver_remote_size\".to_string(),\n                PostHogFlagFilterPropertyValue::Number(1000.0),\n            ),\n        ]);\n\n        // This does not match any group so there will be an error.\n        let variant =\n            store.evaluate_multivariate_inner(\"gc-compaction\", 1.00, 0.40, &properties_unmatched);\n        assert!(matches!(\n            variant,\n            Err(PostHogEvaluationError::NoConditionGroupMatched)\n        ),);\n        let variant =\n            store.evaluate_multivariate_inner(\"gc-compaction\", 0.80, 0.80, &properties_unmatched);\n        assert!(matches!(\n            variant,\n            Err(PostHogEvaluationError::NoConditionGroupMatched)\n        ),);\n\n        let properties = HashMap::from([\n            (\n                \"plan_type\".to_string(),\n                PostHogFlagFilterPropertyValue::String(\"free\".to_string()),\n            ),\n            (\n                \"pageserver_remote_size\".to_string(),\n                PostHogFlagFilterPropertyValue::Number(1000.0),\n            ),\n        ]);\n\n        // It matches the first group as 0.10 <= 0.50 and the properties are matched. Then it gets evaluated to the variant override.\n        let variant = store.evaluate_multivariate_inner(\"gc-compaction\", 0.10, 0.10, &properties);\n        assert_eq!(variant.unwrap(), \"enabled-stage-2\".to_string());\n\n        // It matches the second group as 0.50 <= 0.60 <= 0.80 and the properties are matched. Then it gets evaluated using the global percentage.\n        let variant = store.evaluate_multivariate_inner(\"gc-compaction\", 0.99, 0.60, &properties);\n        assert_eq!(variant.unwrap(), \"enabled-stage-1\".to_string());\n        let variant = store.evaluate_multivariate_inner(\"gc-compaction\", 0.80, 0.60, &properties);\n        assert_eq!(variant.unwrap(), \"disabled\".to_string());\n\n        // It matches the group conditions but not the group rollout percentage.\n        let variant = store.evaluate_multivariate_inner(\"gc-compaction\", 1.00, 0.90, &properties);\n        assert!(matches!(\n            variant,\n            Err(PostHogEvaluationError::NoConditionGroupMatched)\n        ),);\n    }\n\n    #[test]\n    fn evaluate_boolean_1() {\n        // The `boolean-flag` feature flag only has one group that matches on the free user.\n\n        let mut store = FeatureStore::new();\n        let response: LocalEvaluationResponse = serde_json::from_str(data()).unwrap();\n        store.set_flags(response.flags, None).unwrap();\n\n        // This lacks the required properties and cannot be evaluated.\n        let variant = store.evaluate_boolean_inner(\"boolean-flag\", 1.00, &HashMap::new());\n        assert!(matches!(\n            variant,\n            Err(PostHogEvaluationError::NotAvailable(_))\n        ),);\n\n        let properties_unmatched = HashMap::from([\n            (\n                \"plan_type\".to_string(),\n                PostHogFlagFilterPropertyValue::String(\"paid\".to_string()),\n            ),\n            (\n                \"pageserver_remote_size\".to_string(),\n                PostHogFlagFilterPropertyValue::Number(1000.0),\n            ),\n        ]);\n\n        // This does not match any group so there will be an error.\n        let variant = store.evaluate_boolean_inner(\"boolean-flag\", 1.00, &properties_unmatched);\n        assert!(matches!(\n            variant,\n            Err(PostHogEvaluationError::NoConditionGroupMatched)\n        ),);\n\n        let properties = HashMap::from([\n            (\n                \"plan_type\".to_string(),\n                PostHogFlagFilterPropertyValue::String(\"free\".to_string()),\n            ),\n            (\n                \"pageserver_remote_size\".to_string(),\n                PostHogFlagFilterPropertyValue::Number(1000.0),\n            ),\n        ]);\n\n        // It matches the first group as 0.10 <= 0.50 and the properties are matched. Then it gets evaluated to the variant override.\n        let variant = store.evaluate_boolean_inner(\"boolean-flag\", 0.10, &properties);\n        assert!(variant.is_ok());\n\n        // It matches the group conditions but not the group rollout percentage.\n        let variant = store.evaluate_boolean_inner(\"boolean-flag\", 1.00, &properties);\n        assert!(matches!(\n            variant,\n            Err(PostHogEvaluationError::NoConditionGroupMatched)\n        ),);\n    }\n\n    #[test]\n    fn evaluate_boolean_2() {\n        // The `image-compaction-boundary` feature flag has one group that matches on the free user and a group that matches on all users.\n\n        let mut store = FeatureStore::new();\n        let response: LocalEvaluationResponse = serde_json::from_str(data()).unwrap();\n        store.set_flags(response.flags, None).unwrap();\n\n        // This lacks the required properties and cannot be evaluated.\n        let variant =\n            store.evaluate_boolean_inner(\"image-compaction-boundary\", 1.00, &HashMap::new());\n        assert!(matches!(\n            variant,\n            Err(PostHogEvaluationError::NotAvailable(_))\n        ),);\n\n        let properties_unmatched = HashMap::from([\n            (\n                \"plan_type\".to_string(),\n                PostHogFlagFilterPropertyValue::String(\"paid\".to_string()),\n            ),\n            (\n                \"pageserver_remote_size\".to_string(),\n                PostHogFlagFilterPropertyValue::Number(1000.0),\n            ),\n        ]);\n\n        // This does not match the filtered group but the all user group.\n        let variant =\n            store.evaluate_boolean_inner(\"image-compaction-boundary\", 1.00, &properties_unmatched);\n        assert!(matches!(\n            variant,\n            Err(PostHogEvaluationError::NoConditionGroupMatched)\n        ),);\n        let variant =\n            store.evaluate_boolean_inner(\"image-compaction-boundary\", 0.05, &properties_unmatched);\n        assert!(variant.is_ok());\n\n        let properties = HashMap::from([\n            (\n                \"plan_type\".to_string(),\n                PostHogFlagFilterPropertyValue::String(\"free\".to_string()),\n            ),\n            (\n                \"pageserver_remote_size\".to_string(),\n                PostHogFlagFilterPropertyValue::Number(1000.0),\n            ),\n        ]);\n\n        // It matches the first group as 0.30 <= 0.40 and the properties are matched. Then it gets evaluated to the variant override.\n        let variant = store.evaluate_boolean_inner(\"image-compaction-boundary\", 0.30, &properties);\n        assert!(variant.is_ok());\n\n        // It matches the group conditions but not the group rollout percentage.\n        let variant = store.evaluate_boolean_inner(\"image-compaction-boundary\", 1.00, &properties);\n        assert!(matches!(\n            variant,\n            Err(PostHogEvaluationError::NoConditionGroupMatched)\n        ),);\n\n        // It matches the second \"all\" group conditions.\n        let variant = store.evaluate_boolean_inner(\"image-compaction-boundary\", 0.09, &properties);\n        assert!(variant.is_ok());\n    }\n}\n"
  },
  {
    "path": "libs/pq_proto/Cargo.toml",
    "content": "[package]\nname = \"pq_proto\"\nversion = \"0.1.0\"\nedition.workspace = true\nlicense.workspace = true\n\n[dependencies]\nbytes.workspace = true\nbyteorder.workspace = true\nitertools.workspace = true\npostgres-protocol.workspace = true\nrand.workspace = true\ntokio = { workspace = true, features = [\"io-util\"] }\nthiserror.workspace = true\nserde.workspace = true\n"
  },
  {
    "path": "libs/pq_proto/src/framed.rs",
    "content": "//! Provides `Framed` -- writing/flushing and reading Postgres messages to/from\n//! the async stream based on (and buffered with) BytesMut. All functions are\n//! cancellation safe.\n//!\n//! It is similar to what tokio_util::codec::Framed with appropriate codec\n//! provides, but `FramedReader` and `FramedWriter` read/write parts can be used\n//! separately without using split from futures::stream::StreamExt (which\n//! allocates a [Box] in polling internally). tokio::io::split is used for splitting\n//! instead. Plus we customize error messages more than a single type for all io\n//! calls.\n//!\n//! [Box]: https://docs.rs/futures-util/0.3.26/src/futures_util/lock/bilock.rs.html#107\nuse std::future::Future;\nuse std::io::{self, ErrorKind};\n\nuse bytes::{Buf, BytesMut};\nuse tokio::io::{AsyncRead, AsyncReadExt, AsyncWrite, AsyncWriteExt, ReadHalf, WriteHalf};\n\nuse crate::{BeMessage, FeMessage, FeStartupPacket, ProtocolError};\n\nconst INITIAL_CAPACITY: usize = 8 * 1024;\n\n/// Error on postgres connection: either IO (physical transport error) or\n/// protocol violation.\n#[derive(thiserror::Error, Debug)]\npub enum ConnectionError {\n    #[error(transparent)]\n    Io(#[from] io::Error),\n    #[error(transparent)]\n    Protocol(#[from] ProtocolError),\n}\n\nimpl ConnectionError {\n    /// Proxy stream.rs uses only io::Error; provide it.\n    pub fn into_io_error(self) -> io::Error {\n        match self {\n            ConnectionError::Io(io) => io,\n            ConnectionError::Protocol(pe) => io::Error::other(pe.to_string()),\n        }\n    }\n}\n\n/// Wraps async io `stream`, providing messages to write/flush + read Postgres\n/// messages.\npub struct Framed<S> {\n    pub stream: S,\n    pub read_buf: BytesMut,\n    pub write_buf: BytesMut,\n}\n\nimpl<S> Framed<S> {\n    pub fn new(stream: S) -> Self {\n        Self {\n            stream,\n            read_buf: BytesMut::with_capacity(INITIAL_CAPACITY),\n            write_buf: BytesMut::with_capacity(INITIAL_CAPACITY),\n        }\n    }\n\n    /// Get a shared reference to the underlying stream.\n    pub fn get_ref(&self) -> &S {\n        &self.stream\n    }\n\n    /// Deconstruct into the underlying stream and read buffer.\n    pub fn into_inner(self) -> (S, BytesMut) {\n        (self.stream, self.read_buf)\n    }\n\n    /// Return new Framed with stream type transformed by async f, for TLS\n    /// upgrade.\n    pub async fn map_stream<S2, E, F, Fut>(self, f: F) -> Result<Framed<S2>, E>\n    where\n        F: FnOnce(S) -> Fut,\n        Fut: Future<Output = Result<S2, E>>,\n    {\n        let stream = f(self.stream).await?;\n        Ok(Framed {\n            stream,\n            read_buf: self.read_buf,\n            write_buf: self.write_buf,\n        })\n    }\n}\n\nimpl<S: AsyncRead + Unpin> Framed<S> {\n    pub async fn read_startup_message(\n        &mut self,\n    ) -> Result<Option<FeStartupPacket>, ConnectionError> {\n        read_message(&mut self.stream, &mut self.read_buf, FeStartupPacket::parse).await\n    }\n\n    pub async fn read_message(&mut self) -> Result<Option<FeMessage>, ConnectionError> {\n        read_message(&mut self.stream, &mut self.read_buf, FeMessage::parse).await\n    }\n}\n\nimpl<S: AsyncWrite + Unpin> Framed<S> {\n    /// Write next message to the output buffer; doesn't flush.\n    pub fn write_message(&mut self, msg: &BeMessage<'_>) -> Result<(), ProtocolError> {\n        BeMessage::write(&mut self.write_buf, msg)\n    }\n\n    /// Flush out the buffer. This function is cancellation safe: it can be\n    /// interrupted and flushing will be continued in the next call.\n    pub async fn flush(&mut self) -> Result<(), io::Error> {\n        flush(&mut self.stream, &mut self.write_buf).await\n    }\n\n    /// Flush out the buffer and shutdown the stream.\n    pub async fn shutdown(&mut self) -> Result<(), io::Error> {\n        shutdown(&mut self.stream, &mut self.write_buf).await\n    }\n}\n\nimpl<S: AsyncRead + AsyncWrite + Unpin> Framed<S> {\n    /// Split into owned read and write parts. Beware of potential issues with\n    /// using halves in different tasks on TLS stream:\n    /// <https://github.com/tokio-rs/tls/issues/40>\n    pub fn split(self) -> (FramedReader<S>, FramedWriter<S>) {\n        let (read_half, write_half) = tokio::io::split(self.stream);\n        let reader = FramedReader {\n            stream: read_half,\n            read_buf: self.read_buf,\n        };\n        let writer = FramedWriter {\n            stream: write_half,\n            write_buf: self.write_buf,\n        };\n        (reader, writer)\n    }\n\n    /// Join read and write parts back.\n    pub fn unsplit(reader: FramedReader<S>, writer: FramedWriter<S>) -> Self {\n        Self {\n            stream: reader.stream.unsplit(writer.stream),\n            read_buf: reader.read_buf,\n            write_buf: writer.write_buf,\n        }\n    }\n}\n\n/// Read-only version of `Framed`.\npub struct FramedReader<S> {\n    stream: ReadHalf<S>,\n    read_buf: BytesMut,\n}\n\nimpl<S: AsyncRead + Unpin> FramedReader<S> {\n    pub async fn read_message(&mut self) -> Result<Option<FeMessage>, ConnectionError> {\n        read_message(&mut self.stream, &mut self.read_buf, FeMessage::parse).await\n    }\n}\n\n/// Write-only version of `Framed`.\npub struct FramedWriter<S> {\n    stream: WriteHalf<S>,\n    write_buf: BytesMut,\n}\n\nimpl<S: AsyncWrite + Unpin> FramedWriter<S> {\n    /// Write next message to the output buffer; doesn't flush.\n    pub fn write_message_noflush(&mut self, msg: &BeMessage<'_>) -> Result<(), ProtocolError> {\n        BeMessage::write(&mut self.write_buf, msg)\n    }\n\n    /// Flush out the buffer. This function is cancellation safe: it can be\n    /// interrupted and flushing will be continued in the next call.\n    pub async fn flush(&mut self) -> Result<(), io::Error> {\n        flush(&mut self.stream, &mut self.write_buf).await\n    }\n\n    /// Flush out the buffer and shutdown the stream.\n    pub async fn shutdown(&mut self) -> Result<(), io::Error> {\n        shutdown(&mut self.stream, &mut self.write_buf).await\n    }\n}\n\n/// Read next message from the stream. Returns Ok(None), if EOF happened and we\n/// don't have remaining data in the buffer. This function is cancellation safe:\n/// you can drop future which is not yet complete and finalize reading message\n/// with the next call.\n///\n/// Parametrized to allow reading startup or usual message, having different\n/// format.\nasync fn read_message<S: AsyncRead + Unpin, M, P>(\n    stream: &mut S,\n    read_buf: &mut BytesMut,\n    parse: P,\n) -> Result<Option<M>, ConnectionError>\nwhere\n    P: Fn(&mut BytesMut) -> Result<Option<M>, ProtocolError>,\n{\n    loop {\n        if let Some(msg) = parse(read_buf)? {\n            return Ok(Some(msg));\n        }\n        // If we can't build a frame yet, try to read more data and try again.\n        // Make sure we've got room for at least one byte to read to ensure\n        // that we don't get a spurious 0 that looks like EOF.\n        read_buf.reserve(1);\n        if stream.read_buf(read_buf).await? == 0 {\n            if read_buf.has_remaining() {\n                return Err(io::Error::new(\n                    ErrorKind::UnexpectedEof,\n                    \"EOF with unprocessed data in the buffer\",\n                )\n                .into());\n            } else {\n                return Ok(None); // clean EOF\n            }\n        }\n    }\n}\n\n/// Cancellation safe as long as the AsyncWrite is cancellation safe.\nasync fn flush<S: AsyncWrite + Unpin>(\n    stream: &mut S,\n    write_buf: &mut BytesMut,\n) -> Result<(), io::Error> {\n    while write_buf.has_remaining() {\n        let bytes_written = stream.write_buf(write_buf).await?;\n        if bytes_written == 0 {\n            return Err(io::Error::new(\n                ErrorKind::WriteZero,\n                \"failed to write message\",\n            ));\n        }\n    }\n    stream.flush().await\n}\n\n/// Cancellation safe as long as the AsyncWrite is cancellation safe.\nasync fn shutdown<S: AsyncWrite + Unpin>(\n    stream: &mut S,\n    write_buf: &mut BytesMut,\n) -> Result<(), io::Error> {\n    flush(stream, write_buf).await?;\n    stream.shutdown().await\n}\n"
  },
  {
    "path": "libs/pq_proto/src/lib.rs",
    "content": "//! Postgres protocol messages serialization-deserialization. See\n//! <https://www.postgresql.org/docs/devel/protocol-message-formats.html>\n//! on message formats.\n#![deny(clippy::undocumented_unsafe_blocks)]\n\npub mod framed;\n\nuse std::borrow::Cow;\nuse std::{fmt, io, str};\n\nuse byteorder::{BigEndian, ReadBytesExt};\nuse bytes::{Buf, BufMut, Bytes, BytesMut};\nuse itertools::Itertools;\n// re-export for use in utils pageserver_feedback.rs\npub use postgres_protocol::PG_EPOCH;\nuse serde::{Deserialize, Serialize};\n\npub type Oid = u32;\npub type SystemId = u64;\n\npub const INT8_OID: Oid = 20;\npub const INT4_OID: Oid = 23;\npub const TEXT_OID: Oid = 25;\n\n#[derive(Debug)]\npub enum FeMessage {\n    // Simple query.\n    Query(Bytes),\n    // Extended query protocol.\n    Parse(FeParseMessage),\n    Describe(FeDescribeMessage),\n    Bind(FeBindMessage),\n    Execute(FeExecuteMessage),\n    Close(FeCloseMessage),\n    Sync,\n    Terminate,\n    CopyData(Bytes),\n    CopyDone,\n    CopyFail,\n    PasswordMessage(Bytes),\n}\n\n#[derive(Clone, Copy, PartialEq, PartialOrd)]\npub struct ProtocolVersion(u32);\n\nimpl ProtocolVersion {\n    pub const fn new(major: u16, minor: u16) -> Self {\n        Self(((major as u32) << 16) | minor as u32)\n    }\n    pub const fn minor(self) -> u16 {\n        self.0 as u16\n    }\n    pub const fn major(self) -> u16 {\n        (self.0 >> 16) as u16\n    }\n}\n\nimpl fmt::Debug for ProtocolVersion {\n    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {\n        f.debug_list()\n            .entry(&self.major())\n            .entry(&self.minor())\n            .finish()\n    }\n}\n\n#[derive(Debug)]\npub enum FeStartupPacket {\n    CancelRequest(CancelKeyData),\n    SslRequest {\n        direct: bool,\n    },\n    GssEncRequest,\n    StartupMessage {\n        version: ProtocolVersion,\n        params: StartupMessageParams,\n    },\n}\n\n#[derive(Debug, Clone, Default)]\npub struct StartupMessageParamsBuilder {\n    params: BytesMut,\n}\n\nimpl StartupMessageParamsBuilder {\n    /// Set parameter's value by its name.\n    /// name and value must not contain a \\0 byte\n    pub fn insert(&mut self, name: &str, value: &str) {\n        self.params.put(name.as_bytes());\n        self.params.put(&b\"\\0\"[..]);\n        self.params.put(value.as_bytes());\n        self.params.put(&b\"\\0\"[..]);\n    }\n\n    pub fn freeze(self) -> StartupMessageParams {\n        StartupMessageParams {\n            params: self.params.freeze(),\n        }\n    }\n}\n\n#[derive(Debug, Clone, Default)]\npub struct StartupMessageParams {\n    pub params: Bytes,\n}\n\nimpl StartupMessageParams {\n    /// Get parameter's value by its name.\n    pub fn get(&self, name: &str) -> Option<&str> {\n        self.iter().find_map(|(k, v)| (k == name).then_some(v))\n    }\n\n    /// Split command-line options according to PostgreSQL's logic,\n    /// taking into account all escape sequences but leaving them as-is.\n    /// [`None`] means that there's no `options` in [`Self`].\n    pub fn options_raw(&self) -> Option<impl Iterator<Item = &str>> {\n        self.get(\"options\").map(Self::parse_options_raw)\n    }\n\n    /// Split command-line options according to PostgreSQL's logic,\n    /// applying all escape sequences (using owned strings as needed).\n    /// [`None`] means that there's no `options` in [`Self`].\n    pub fn options_escaped(&self) -> Option<impl Iterator<Item = Cow<'_, str>>> {\n        self.get(\"options\").map(Self::parse_options_escaped)\n    }\n\n    /// Split command-line options according to PostgreSQL's logic,\n    /// taking into account all escape sequences but leaving them as-is.\n    pub fn parse_options_raw(input: &str) -> impl Iterator<Item = &str> {\n        // See `postgres: pg_split_opts`.\n        let mut last_was_escape = false;\n        input\n            .split(move |c: char| {\n                // We split by non-escaped whitespace symbols.\n                let should_split = c.is_ascii_whitespace() && !last_was_escape;\n                last_was_escape = c == '\\\\' && !last_was_escape;\n                should_split\n            })\n            .filter(|s| !s.is_empty())\n    }\n\n    /// Split command-line options according to PostgreSQL's logic,\n    /// applying all escape sequences (using owned strings as needed).\n    pub fn parse_options_escaped(input: &str) -> impl Iterator<Item = Cow<'_, str>> {\n        // See `postgres: pg_split_opts`.\n        Self::parse_options_raw(input).map(|s| {\n            let mut preserve_next_escape = false;\n            let escape = |c| {\n                // We should remove '\\\\' unless it's preceded by '\\\\'.\n                let should_remove = c == '\\\\' && !preserve_next_escape;\n                preserve_next_escape = should_remove;\n                should_remove\n            };\n\n            match s.contains('\\\\') {\n                true => Cow::Owned(s.replace(escape, \"\")),\n                false => Cow::Borrowed(s),\n            }\n        })\n    }\n\n    /// Iterate through key-value pairs in an arbitrary order.\n    pub fn iter(&self) -> impl Iterator<Item = (&str, &str)> {\n        let params =\n            std::str::from_utf8(&self.params).expect(\"should be validated as utf8 already\");\n        params.split_terminator('\\0').tuples()\n    }\n\n    // This function is mostly useful in tests.\n    #[doc(hidden)]\n    pub fn new<'a, const N: usize>(pairs: [(&'a str, &'a str); N]) -> Self {\n        let mut b = StartupMessageParamsBuilder::default();\n        for (k, v) in pairs {\n            b.insert(k, v)\n        }\n        b.freeze()\n    }\n}\n\n#[derive(Debug, Hash, PartialEq, Eq, Clone, Copy, Serialize, Deserialize)]\npub struct CancelKeyData {\n    pub backend_pid: i32,\n    pub cancel_key: i32,\n}\n\npub fn id_to_cancel_key(id: u64) -> CancelKeyData {\n    CancelKeyData {\n        backend_pid: (id >> 32) as i32,\n        cancel_key: (id & 0xffffffff) as i32,\n    }\n}\n\nimpl fmt::Display for CancelKeyData {\n    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {\n        let hi = (self.backend_pid as u64) << 32;\n        let lo = (self.cancel_key as u64) & 0xffffffff;\n        let id = hi | lo;\n\n        // This format is more compact and might work better for logs.\n        f.debug_tuple(\"CancelKeyData\")\n            .field(&format_args!(\"{id:x}\"))\n            .finish()\n    }\n}\n\nuse rand::distr::{Distribution, StandardUniform};\nimpl Distribution<CancelKeyData> for StandardUniform {\n    fn sample<R: rand::Rng + ?Sized>(&self, rng: &mut R) -> CancelKeyData {\n        CancelKeyData {\n            backend_pid: rng.random(),\n            cancel_key: rng.random(),\n        }\n    }\n}\n\n// We only support the simple case of Parse on unnamed prepared statement and\n// no params\n#[derive(Debug)]\npub struct FeParseMessage {\n    pub query_string: Bytes,\n}\n\n#[derive(Debug)]\npub struct FeDescribeMessage {\n    pub kind: u8, // 'S' to describe a prepared statement; or 'P' to describe a portal.\n                  // we only support unnamed prepared stmt or portal\n}\n\n// we only support unnamed prepared stmt and portal\n#[derive(Debug)]\npub struct FeBindMessage;\n\n// we only support unnamed prepared stmt or portal\n#[derive(Debug)]\npub struct FeExecuteMessage {\n    /// max # of rows\n    pub maxrows: i32,\n}\n\n// we only support unnamed prepared stmt and portal\n#[derive(Debug)]\npub struct FeCloseMessage;\n\n/// An error occurred while parsing or serializing raw stream into Postgres\n/// messages.\n#[derive(thiserror::Error, Debug)]\npub enum ProtocolError {\n    /// Invalid packet was received from the client (e.g. unexpected message\n    /// type or broken len).\n    #[error(\"Protocol error: {0}\")]\n    Protocol(String),\n    /// Failed to parse or, (unlikely), serialize a protocol message.\n    #[error(\"Message parse error: {0}\")]\n    BadMessage(String),\n}\n\nimpl ProtocolError {\n    /// Proxy stream.rs uses only io::Error; provide it.\n    pub fn into_io_error(self) -> io::Error {\n        io::Error::other(self.to_string())\n    }\n}\n\nimpl FeMessage {\n    /// Read and parse one message from the `buf` input buffer. If there is at\n    /// least one valid message, returns it, advancing `buf`; redundant copies\n    /// are avoided, as thanks to `bytes` crate ptrs in parsed message point\n    /// directly into the `buf` (processed data is garbage collected after\n    /// parsed message is dropped).\n    ///\n    /// Returns None if `buf` doesn't contain enough data for a single message.\n    /// For efficiency, tries to reserve large enough space in `buf` for the\n    /// next message in this case to save the repeated calls.\n    ///\n    /// Returns Error if message is malformed, the only possible ErrorKind is\n    /// InvalidInput.\n    //\n    // Inspired by rust-postgres Message::parse.\n    pub fn parse(buf: &mut BytesMut) -> Result<Option<FeMessage>, ProtocolError> {\n        // Every message contains message type byte and 4 bytes len; can't do\n        // much without them.\n        if buf.len() < 5 {\n            let to_read = 5 - buf.len();\n            buf.reserve(to_read);\n            return Ok(None);\n        }\n\n        // We shouldn't advance `buf` as probably full message is not there yet,\n        // so can't directly use Bytes::get_u32 etc.\n        let tag = buf[0];\n        let len = (&buf[1..5]).read_u32::<BigEndian>().unwrap();\n        if len < 4 {\n            return Err(ProtocolError::Protocol(format!(\n                \"invalid message length {len}\"\n            )));\n        }\n\n        // length field includes itself, but not message type.\n        let total_len = len as usize + 1;\n        if buf.len() < total_len {\n            // Don't have full message yet.\n            let to_read = total_len - buf.len();\n            buf.reserve(to_read);\n            return Ok(None);\n        }\n\n        // got the message, advance buffer\n        let mut msg = buf.split_to(total_len).freeze();\n        msg.advance(5); // consume message type and len\n\n        match tag {\n            b'Q' => Ok(Some(FeMessage::Query(msg))),\n            b'P' => Ok(Some(FeParseMessage::parse(msg)?)),\n            b'D' => Ok(Some(FeDescribeMessage::parse(msg)?)),\n            b'E' => Ok(Some(FeExecuteMessage::parse(msg)?)),\n            b'B' => Ok(Some(FeBindMessage::parse(msg)?)),\n            b'C' => Ok(Some(FeCloseMessage::parse(msg)?)),\n            b'S' => Ok(Some(FeMessage::Sync)),\n            b'X' => Ok(Some(FeMessage::Terminate)),\n            b'd' => Ok(Some(FeMessage::CopyData(msg))),\n            b'c' => Ok(Some(FeMessage::CopyDone)),\n            b'f' => Ok(Some(FeMessage::CopyFail)),\n            b'p' => Ok(Some(FeMessage::PasswordMessage(msg))),\n            tag => Err(ProtocolError::Protocol(format!(\n                \"unknown message tag: {tag},'{msg:?}'\"\n            ))),\n        }\n    }\n}\n\nimpl FeStartupPacket {\n    /// Read and parse startup message from the `buf` input buffer. It is\n    /// different from [`FeMessage::parse`] because startup messages don't have\n    /// message type byte; otherwise, its comments apply.\n    pub fn parse(buf: &mut BytesMut) -> Result<Option<FeStartupPacket>, ProtocolError> {\n        /// <https://github.com/postgres/postgres/blob/ca481d3c9ab7bf69ff0c8d71ad3951d407f6a33c/src/include/libpq/pqcomm.h#L118>\n        const MAX_STARTUP_PACKET_LENGTH: usize = 10000;\n        const RESERVED_INVALID_MAJOR_VERSION: u16 = 1234;\n        /// <https://github.com/postgres/postgres/blob/ca481d3c9ab7bf69ff0c8d71ad3951d407f6a33c/src/include/libpq/pqcomm.h#L132>\n        const CANCEL_REQUEST_CODE: ProtocolVersion = ProtocolVersion::new(1234, 5678);\n        /// <https://github.com/postgres/postgres/blob/ca481d3c9ab7bf69ff0c8d71ad3951d407f6a33c/src/include/libpq/pqcomm.h#L166>\n        const NEGOTIATE_SSL_CODE: ProtocolVersion = ProtocolVersion::new(1234, 5679);\n        /// <https://github.com/postgres/postgres/blob/ca481d3c9ab7bf69ff0c8d71ad3951d407f6a33c/src/include/libpq/pqcomm.h#L167>\n        const NEGOTIATE_GSS_CODE: ProtocolVersion = ProtocolVersion::new(1234, 5680);\n\n        // <https://github.com/postgres/postgres/blob/04bcf9e19a4261fe9c7df37c777592c2e10c32a7/src/backend/tcop/backend_startup.c#L378-L382>\n        // First byte indicates standard SSL handshake message\n        // (It can't be a Postgres startup length because in network byte order\n        // that would be a startup packet hundreds of megabytes long)\n        if buf.first() == Some(&0x16) {\n            return Ok(Some(FeStartupPacket::SslRequest { direct: true }));\n        }\n\n        // need at least 4 bytes with packet len\n        if buf.len() < 4 {\n            let to_read = 4 - buf.len();\n            buf.reserve(to_read);\n            return Ok(None);\n        }\n\n        // We shouldn't advance `buf` as probably full message is not there yet,\n        // so can't directly use Bytes::get_u32 etc.\n        let len = (&buf[0..4]).read_u32::<BigEndian>().unwrap() as usize;\n        // The proposed replacement is `!(8..=MAX_STARTUP_PACKET_LENGTH).contains(&len)`\n        // which is less readable\n        #[allow(clippy::manual_range_contains)]\n        if len < 8 || len > MAX_STARTUP_PACKET_LENGTH {\n            return Err(ProtocolError::Protocol(format!(\n                \"invalid startup packet message length {len}\"\n            )));\n        }\n\n        if buf.len() < len {\n            // Don't have full message yet.\n            let to_read = len - buf.len();\n            buf.reserve(to_read);\n            return Ok(None);\n        }\n\n        // got the message, advance buffer\n        let mut msg = buf.split_to(len).freeze();\n        msg.advance(4); // consume len\n\n        let request_code = ProtocolVersion(msg.get_u32());\n        // StartupMessage, CancelRequest, SSLRequest etc are differentiated by request code.\n        let message = match request_code {\n            CANCEL_REQUEST_CODE => {\n                if msg.remaining() != 8 {\n                    return Err(ProtocolError::BadMessage(\n                        \"CancelRequest message is malformed, backend PID / secret key missing\"\n                            .to_owned(),\n                    ));\n                }\n                FeStartupPacket::CancelRequest(CancelKeyData {\n                    backend_pid: msg.get_i32(),\n                    cancel_key: msg.get_i32(),\n                })\n            }\n            NEGOTIATE_SSL_CODE => {\n                // Requested upgrade to SSL (aka TLS)\n                FeStartupPacket::SslRequest { direct: false }\n            }\n            NEGOTIATE_GSS_CODE => {\n                // Requested upgrade to GSSAPI\n                FeStartupPacket::GssEncRequest\n            }\n            version if version.major() == RESERVED_INVALID_MAJOR_VERSION => {\n                return Err(ProtocolError::Protocol(format!(\n                    \"Unrecognized request code {}\",\n                    version.minor()\n                )));\n            }\n            // TODO bail if protocol major_version is not 3?\n            version => {\n                // StartupMessage\n\n                let s = str::from_utf8(&msg).map_err(|_e| {\n                    ProtocolError::BadMessage(\"StartupMessage params: invalid utf-8\".to_owned())\n                })?;\n                let s = s.strip_suffix('\\0').ok_or_else(|| {\n                    ProtocolError::Protocol(\n                        \"StartupMessage params: missing null terminator\".to_string(),\n                    )\n                })?;\n\n                FeStartupPacket::StartupMessage {\n                    version,\n                    params: StartupMessageParams {\n                        params: msg.slice_ref(s.as_bytes()),\n                    },\n                }\n            }\n        };\n        Ok(Some(message))\n    }\n}\n\nimpl FeParseMessage {\n    fn parse(mut buf: Bytes) -> Result<FeMessage, ProtocolError> {\n        // FIXME: the rust-postgres driver uses a named prepared statement\n        // for copy_out(). We're not prepared to handle that correctly. For\n        // now, just ignore the statement name, assuming that the client never\n        // uses more than one prepared statement at a time.\n\n        let _pstmt_name = read_cstr(&mut buf)?;\n        let query_string = read_cstr(&mut buf)?;\n        if buf.remaining() < 2 {\n            return Err(ProtocolError::BadMessage(\n                \"Parse message is malformed, nparams missing\".to_string(),\n            ));\n        }\n        let nparams = buf.get_i16();\n\n        if nparams != 0 {\n            return Err(ProtocolError::BadMessage(\n                \"query params not implemented\".to_string(),\n            ));\n        }\n\n        Ok(FeMessage::Parse(FeParseMessage { query_string }))\n    }\n}\n\nimpl FeDescribeMessage {\n    fn parse(mut buf: Bytes) -> Result<FeMessage, ProtocolError> {\n        let kind = buf.get_u8();\n        let _pstmt_name = read_cstr(&mut buf)?;\n\n        // FIXME: see FeParseMessage::parse\n        if kind != b'S' {\n            return Err(ProtocolError::BadMessage(\n                \"only prepared statemement Describe is implemented\".to_string(),\n            ));\n        }\n\n        Ok(FeMessage::Describe(FeDescribeMessage { kind }))\n    }\n}\n\nimpl FeExecuteMessage {\n    fn parse(mut buf: Bytes) -> Result<FeMessage, ProtocolError> {\n        let portal_name = read_cstr(&mut buf)?;\n        if buf.remaining() < 4 {\n            return Err(ProtocolError::BadMessage(\n                \"FeExecuteMessage message is malformed, maxrows missing\".to_string(),\n            ));\n        }\n        let maxrows = buf.get_i32();\n\n        if !portal_name.is_empty() {\n            return Err(ProtocolError::BadMessage(\n                \"named portals not implemented\".to_string(),\n            ));\n        }\n        if maxrows != 0 {\n            return Err(ProtocolError::BadMessage(\n                \"row limit in Execute message not implemented\".to_string(),\n            ));\n        }\n\n        Ok(FeMessage::Execute(FeExecuteMessage { maxrows }))\n    }\n}\n\nimpl FeBindMessage {\n    fn parse(mut buf: Bytes) -> Result<FeMessage, ProtocolError> {\n        let portal_name = read_cstr(&mut buf)?;\n        let _pstmt_name = read_cstr(&mut buf)?;\n\n        // FIXME: see FeParseMessage::parse\n        if !portal_name.is_empty() {\n            return Err(ProtocolError::BadMessage(\n                \"named portals not implemented\".to_string(),\n            ));\n        }\n\n        Ok(FeMessage::Bind(FeBindMessage))\n    }\n}\n\nimpl FeCloseMessage {\n    fn parse(mut buf: Bytes) -> Result<FeMessage, ProtocolError> {\n        let _kind = buf.get_u8();\n        let _pstmt_or_portal_name = read_cstr(&mut buf)?;\n\n        // FIXME: we do nothing with Close\n        Ok(FeMessage::Close(FeCloseMessage))\n    }\n}\n\n// Backend\n\n#[derive(Debug)]\npub enum BeMessage<'a> {\n    AuthenticationOk,\n    AuthenticationMD5Password([u8; 4]),\n    AuthenticationSasl(BeAuthenticationSaslMessage<'a>),\n    AuthenticationCleartextPassword,\n    BackendKeyData(CancelKeyData),\n    BindComplete,\n    CommandComplete(&'a [u8]),\n    CopyData(&'a [u8]),\n    CopyDone,\n    CopyFail,\n    CopyInResponse,\n    CopyOutResponse,\n    CopyBothResponse,\n    CloseComplete,\n    // None means column is NULL\n    DataRow(&'a [Option<&'a [u8]>]),\n    // None errcode means internal_error will be sent.\n    ErrorResponse(&'a str, Option<&'a [u8; 5]>),\n    /// Single byte - used in response to SSLRequest/GSSENCRequest.\n    EncryptionResponse(bool),\n    NoData,\n    ParameterDescription,\n    ParameterStatus {\n        name: &'a [u8],\n        value: &'a [u8],\n    },\n    ParseComplete,\n    ReadyForQuery,\n    RowDescription(&'a [RowDescriptor<'a>]),\n    XLogData(XLogDataBody<'a>),\n    NoticeResponse(&'a str),\n    NegotiateProtocolVersion {\n        version: ProtocolVersion,\n        options: &'a [&'a str],\n    },\n    KeepAlive(WalSndKeepAlive),\n    /// Batch of interpreted, shard filtered WAL records,\n    /// ready for the pageserver to ingest\n    InterpretedWalRecords(InterpretedWalRecordsBody<'a>),\n\n    Raw(u8, &'a [u8]),\n}\n\n/// Common shorthands.\nimpl<'a> BeMessage<'a> {\n    /// A [`BeMessage::ParameterStatus`] holding the client encoding, i.e. UTF-8.\n    /// This is a sensible default, given that:\n    ///  * rust strings only support this encoding out of the box.\n    ///  * tokio-postgres, postgres-jdbc (and probably more) mandate it.\n    ///\n    /// TODO: do we need to report `server_encoding` as well?\n    pub const CLIENT_ENCODING: Self = Self::ParameterStatus {\n        name: b\"client_encoding\",\n        value: b\"UTF8\",\n    };\n\n    pub const INTEGER_DATETIMES: Self = Self::ParameterStatus {\n        name: b\"integer_datetimes\",\n        value: b\"on\",\n    };\n\n    /// Build a [`BeMessage::ParameterStatus`] holding the server version.\n    pub fn server_version(version: &'a str) -> Self {\n        Self::ParameterStatus {\n            name: b\"server_version\",\n            value: version.as_bytes(),\n        }\n    }\n}\n\n#[derive(Debug)]\npub enum BeAuthenticationSaslMessage<'a> {\n    Methods(&'a [&'a str]),\n    Continue(&'a [u8]),\n    Final(&'a [u8]),\n}\n\n#[derive(Debug)]\npub enum BeParameterStatusMessage<'a> {\n    Encoding(&'a str),\n    ServerVersion(&'a str),\n}\n\n// One row description in RowDescription packet.\n#[derive(Debug)]\npub struct RowDescriptor<'a> {\n    pub name: &'a [u8],\n    pub tableoid: Oid,\n    pub attnum: i16,\n    pub typoid: Oid,\n    pub typlen: i16,\n    pub typmod: i32,\n    pub formatcode: i16,\n}\n\nimpl Default for RowDescriptor<'_> {\n    fn default() -> RowDescriptor<'static> {\n        RowDescriptor {\n            name: b\"\",\n            tableoid: 0,\n            attnum: 0,\n            typoid: 0,\n            typlen: 0,\n            typmod: 0,\n            formatcode: 0,\n        }\n    }\n}\n\nimpl RowDescriptor<'_> {\n    /// Convenience function to create a RowDescriptor message for an int8 column\n    pub const fn int8_col(name: &[u8]) -> RowDescriptor {\n        RowDescriptor {\n            name,\n            tableoid: 0,\n            attnum: 0,\n            typoid: INT8_OID,\n            typlen: 8,\n            typmod: 0,\n            formatcode: 0,\n        }\n    }\n\n    pub const fn text_col(name: &[u8]) -> RowDescriptor {\n        RowDescriptor {\n            name,\n            tableoid: 0,\n            attnum: 0,\n            typoid: TEXT_OID,\n            typlen: -1,\n            typmod: 0,\n            formatcode: 0,\n        }\n    }\n}\n\n#[derive(Debug)]\npub struct XLogDataBody<'a> {\n    pub wal_start: u64,\n    pub wal_end: u64, // current end of WAL on the server\n    pub timestamp: i64,\n    pub data: &'a [u8],\n}\n\n#[derive(Debug)]\npub struct WalSndKeepAlive {\n    pub wal_end: u64, // current end of WAL on the server\n    pub timestamp: i64,\n    pub request_reply: bool,\n}\n\n/// Batch of interpreted WAL records used in the interpreted\n/// safekeeper to pageserver protocol.\n///\n/// Note that the pageserver uses the RawInterpretedWalRecordsBody\n/// counterpart of this from the neondatabase/rust-postgres repo.\n/// If you're changing this struct, you likely need to change its\n/// twin as well.\n#[derive(Debug)]\npub struct InterpretedWalRecordsBody<'a> {\n    /// End of raw WAL in [`Self::data`]\n    pub streaming_lsn: u64,\n    /// Current end of WAL on the server\n    pub commit_lsn: u64,\n    pub data: &'a [u8],\n}\n\npub static HELLO_WORLD_ROW: BeMessage = BeMessage::DataRow(&[Some(b\"hello world\")]);\n\n// single text column\npub static SINGLE_COL_ROWDESC: BeMessage = BeMessage::RowDescription(&[RowDescriptor {\n    name: b\"data\",\n    tableoid: 0,\n    attnum: 0,\n    typoid: TEXT_OID,\n    typlen: -1,\n    typmod: 0,\n    formatcode: 0,\n}]);\n\n/// Call f() to write body of the message and prepend it with 4-byte len as\n/// prescribed by the protocol.\nfn write_body<R>(buf: &mut BytesMut, f: impl FnOnce(&mut BytesMut) -> R) -> R {\n    let base = buf.len();\n    buf.extend_from_slice(&[0; 4]);\n\n    let res = f(buf);\n\n    let size = i32::try_from(buf.len() - base).expect(\"message too big to transmit\");\n    (&mut buf[base..]).put_slice(&size.to_be_bytes());\n\n    res\n}\n\n/// Safe write of s into buf as cstring (String in the protocol).\nfn write_cstr(s: impl AsRef<[u8]>, buf: &mut BytesMut) -> Result<(), ProtocolError> {\n    let bytes = s.as_ref();\n    if bytes.contains(&0) {\n        return Err(ProtocolError::BadMessage(\n            \"string contains embedded null\".to_owned(),\n        ));\n    }\n    buf.put_slice(bytes);\n    buf.put_u8(0);\n    Ok(())\n}\n\n/// Read cstring from buf, advancing it.\npub fn read_cstr(buf: &mut Bytes) -> Result<Bytes, ProtocolError> {\n    let pos = buf\n        .iter()\n        .position(|x| *x == 0)\n        .ok_or_else(|| ProtocolError::BadMessage(\"missing cstring terminator\".to_owned()))?;\n    let result = buf.split_to(pos);\n    buf.advance(1); // drop the null terminator\n    Ok(result)\n}\n\npub const SQLSTATE_INTERNAL_ERROR: &[u8; 5] = b\"XX000\";\npub const SQLSTATE_ADMIN_SHUTDOWN: &[u8; 5] = b\"57P01\";\npub const SQLSTATE_SUCCESSFUL_COMPLETION: &[u8; 5] = b\"00000\";\n\nimpl BeMessage<'_> {\n    /// Serialize `message` to the given `buf`.\n    /// Apart from smart memory managemet, BytesMut is good here as msg len\n    /// precedes its body and it is handy to write it down first and then fill\n    /// the length. With Write we would have to either calc it manually or have\n    /// one more buffer.\n    pub fn write(buf: &mut BytesMut, message: &BeMessage) -> Result<(), ProtocolError> {\n        match message {\n            BeMessage::Raw(code, data) => {\n                buf.put_u8(*code);\n                write_body(buf, |b| b.put_slice(data))\n            }\n            BeMessage::AuthenticationOk => {\n                buf.put_u8(b'R');\n                write_body(buf, |buf| {\n                    buf.put_i32(0); // Specifies that the authentication was successful.\n                });\n            }\n\n            BeMessage::AuthenticationCleartextPassword => {\n                buf.put_u8(b'R');\n                write_body(buf, |buf| {\n                    buf.put_i32(3); // Specifies that clear text password is required.\n                });\n            }\n\n            BeMessage::AuthenticationMD5Password(salt) => {\n                buf.put_u8(b'R');\n                write_body(buf, |buf| {\n                    buf.put_i32(5); // Specifies that an MD5-encrypted password is required.\n                    buf.put_slice(&salt[..]);\n                });\n            }\n\n            BeMessage::AuthenticationSasl(msg) => {\n                buf.put_u8(b'R');\n                write_body(buf, |buf| {\n                    use BeAuthenticationSaslMessage::*;\n                    match msg {\n                        Methods(methods) => {\n                            buf.put_i32(10); // Specifies that SASL auth method is used.\n                            for method in methods.iter() {\n                                write_cstr(method, buf)?;\n                            }\n                            buf.put_u8(0); // zero terminator for the list\n                        }\n                        Continue(extra) => {\n                            buf.put_i32(11); // Continue SASL auth.\n                            buf.put_slice(extra);\n                        }\n                        Final(extra) => {\n                            buf.put_i32(12); // Send final SASL message.\n                            buf.put_slice(extra);\n                        }\n                    }\n                    Ok(())\n                })?;\n            }\n\n            BeMessage::BackendKeyData(key_data) => {\n                buf.put_u8(b'K');\n                write_body(buf, |buf| {\n                    buf.put_i32(key_data.backend_pid);\n                    buf.put_i32(key_data.cancel_key);\n                });\n            }\n\n            BeMessage::BindComplete => {\n                buf.put_u8(b'2');\n                write_body(buf, |_| {});\n            }\n\n            BeMessage::CloseComplete => {\n                buf.put_u8(b'3');\n                write_body(buf, |_| {});\n            }\n\n            BeMessage::CommandComplete(cmd) => {\n                buf.put_u8(b'C');\n                write_body(buf, |buf| write_cstr(cmd, buf))?;\n            }\n\n            BeMessage::CopyData(data) => {\n                buf.put_u8(b'd');\n                write_body(buf, |buf| {\n                    buf.put_slice(data);\n                });\n            }\n\n            BeMessage::CopyDone => {\n                buf.put_u8(b'c');\n                write_body(buf, |_| {});\n            }\n\n            BeMessage::CopyFail => {\n                buf.put_u8(b'f');\n                write_body(buf, |_| {});\n            }\n\n            BeMessage::CopyInResponse => {\n                buf.put_u8(b'G');\n                write_body(buf, |buf| {\n                    buf.put_u8(1); // copy_is_binary\n                    buf.put_i16(0); // numAttributes\n                });\n            }\n\n            BeMessage::CopyOutResponse => {\n                buf.put_u8(b'H');\n                write_body(buf, |buf| {\n                    buf.put_u8(0); // copy_is_binary\n                    buf.put_i16(0); // numAttributes\n                });\n            }\n\n            BeMessage::CopyBothResponse => {\n                buf.put_u8(b'W');\n                write_body(buf, |buf| {\n                    // doesn't matter, used only for replication\n                    buf.put_u8(0); // copy_is_binary\n                    buf.put_i16(0); // numAttributes\n                });\n            }\n\n            BeMessage::DataRow(vals) => {\n                buf.put_u8(b'D');\n                write_body(buf, |buf| {\n                    buf.put_u16(vals.len() as u16); // num of cols\n                    for val_opt in vals.iter() {\n                        if let Some(val) = val_opt {\n                            buf.put_u32(val.len() as u32);\n                            buf.put_slice(val);\n                        } else {\n                            buf.put_i32(-1);\n                        }\n                    }\n                });\n            }\n\n            // ErrorResponse is a zero-terminated array of zero-terminated fields.\n            // First byte of each field represents type of this field. Set just enough fields\n            // to satisfy rust-postgres client: 'S' -- severity, 'C' -- error, 'M' -- error\n            // message text.\n            BeMessage::ErrorResponse(error_msg, pg_error_code) => {\n                // 'E' signalizes ErrorResponse messages\n                buf.put_u8(b'E');\n                write_body(buf, |buf| {\n                    buf.put_u8(b'S'); // severity\n                    buf.put_slice(b\"ERROR\\0\");\n\n                    buf.put_u8(b'C'); // SQLSTATE error code\n                    buf.put_slice(&terminate_code(\n                        pg_error_code.unwrap_or(SQLSTATE_INTERNAL_ERROR),\n                    ));\n\n                    buf.put_u8(b'M'); // the message\n                    write_cstr(error_msg, buf)?;\n\n                    buf.put_u8(0); // terminator\n                    Ok(())\n                })?;\n            }\n\n            // NoticeResponse has the same format as ErrorResponse. From doc: \"The frontend should display the\n            // message but continue listening for ReadyForQuery or ErrorResponse\"\n            BeMessage::NoticeResponse(error_msg) => {\n                // For all the errors set Severity to Error and error code to\n                // 'internal error'.\n\n                // 'N' signalizes NoticeResponse messages\n                buf.put_u8(b'N');\n                write_body(buf, |buf| {\n                    buf.put_u8(b'S'); // severity\n                    buf.put_slice(b\"NOTICE\\0\");\n\n                    buf.put_u8(b'C'); // SQLSTATE error code\n                    buf.put_slice(&terminate_code(SQLSTATE_INTERNAL_ERROR));\n\n                    buf.put_u8(b'M'); // the message\n                    write_cstr(error_msg.as_bytes(), buf)?;\n\n                    buf.put_u8(0); // terminator\n                    Ok(())\n                })?;\n            }\n\n            BeMessage::NoData => {\n                buf.put_u8(b'n');\n                write_body(buf, |_| {});\n            }\n\n            BeMessage::EncryptionResponse(should_negotiate) => {\n                let response = if *should_negotiate { b'S' } else { b'N' };\n                buf.put_u8(response);\n            }\n\n            BeMessage::ParameterStatus { name, value } => {\n                buf.put_u8(b'S');\n                write_body(buf, |buf| {\n                    write_cstr(name, buf)?;\n                    write_cstr(value, buf)\n                })?;\n            }\n\n            BeMessage::ParameterDescription => {\n                buf.put_u8(b't');\n                write_body(buf, |buf| {\n                    // we don't support params, so always 0\n                    buf.put_i16(0);\n                });\n            }\n\n            BeMessage::ParseComplete => {\n                buf.put_u8(b'1');\n                write_body(buf, |_| {});\n            }\n\n            BeMessage::ReadyForQuery => {\n                buf.put_u8(b'Z');\n                write_body(buf, |buf| {\n                    buf.put_u8(b'I');\n                });\n            }\n\n            BeMessage::RowDescription(rows) => {\n                buf.put_u8(b'T');\n                write_body(buf, |buf| {\n                    buf.put_i16(rows.len() as i16); // # of fields\n                    for row in rows.iter() {\n                        write_cstr(row.name, buf)?;\n                        buf.put_i32(0); /* table oid */\n                        buf.put_i16(0); /* attnum */\n                        buf.put_u32(row.typoid);\n                        buf.put_i16(row.typlen);\n                        buf.put_i32(-1); /* typmod */\n                        buf.put_i16(0); /* format code */\n                    }\n                    Ok(())\n                })?;\n            }\n\n            BeMessage::XLogData(body) => {\n                buf.put_u8(b'd');\n                write_body(buf, |buf| {\n                    buf.put_u8(b'w');\n                    buf.put_u64(body.wal_start);\n                    buf.put_u64(body.wal_end);\n                    buf.put_i64(body.timestamp);\n                    buf.put_slice(body.data);\n                });\n            }\n\n            BeMessage::KeepAlive(req) => {\n                buf.put_u8(b'd');\n                write_body(buf, |buf| {\n                    buf.put_u8(b'k');\n                    buf.put_u64(req.wal_end);\n                    buf.put_i64(req.timestamp);\n                    buf.put_u8(u8::from(req.request_reply));\n                });\n            }\n\n            BeMessage::NegotiateProtocolVersion { version, options } => {\n                buf.put_u8(b'v');\n                write_body(buf, |buf| {\n                    buf.put_u32(version.0);\n                    buf.put_u32(options.len() as u32);\n                    for option in options.iter() {\n                        write_cstr(option, buf)?;\n                    }\n                    Ok(())\n                })?\n            }\n\n            BeMessage::InterpretedWalRecords(rec) => {\n                // We use the COPY_DATA_TAG for our custom message\n                // since this tag is interpreted as raw bytes.\n                buf.put_u8(b'd');\n                write_body(buf, |buf| {\n                    buf.put_u8(b'0'); // matches INTERPRETED_WAL_RECORD_TAG in postgres-protocol\n                    // dependency\n                    buf.put_u64(rec.streaming_lsn);\n                    buf.put_u64(rec.commit_lsn);\n                    buf.put_slice(rec.data);\n                });\n            }\n        }\n        Ok(())\n    }\n}\n\nfn terminate_code(code: &[u8; 5]) -> [u8; 6] {\n    let mut terminated = [0; 6];\n    for (i, &elem) in code.iter().enumerate() {\n        terminated[i] = elem;\n    }\n\n    terminated\n}\n\n#[cfg(test)]\nmod tests {\n    use super::*;\n\n    #[test]\n    fn test_startup_message_params_options_escaped() {\n        fn split_options(params: &StartupMessageParams) -> Vec<Cow<'_, str>> {\n            params\n                .options_escaped()\n                .expect(\"options are None\")\n                .collect()\n        }\n\n        let make_params = |options| StartupMessageParams::new([(\"options\", options)]);\n\n        let params = StartupMessageParams::new([]);\n        assert!(params.options_escaped().is_none());\n\n        let params = make_params(\"\");\n        assert!(split_options(&params).is_empty());\n\n        let params = make_params(\"foo\");\n        assert_eq!(split_options(&params), [\"foo\"]);\n\n        let params = make_params(\" foo  bar \");\n        assert_eq!(split_options(&params), [\"foo\", \"bar\"]);\n\n        let params = make_params(\"foo\\\\ bar \\\\ \\\\\\\\ baz\\\\  lol\");\n        assert_eq!(split_options(&params), [\"foo bar\", \" \\\\\", \"baz \", \"lol\"]);\n    }\n\n    #[test]\n    fn parse_fe_startup_packet_regression() {\n        let data = [0, 0, 0, 7, 0, 0, 0, 0];\n        FeStartupPacket::parse(&mut BytesMut::from_iter(data)).unwrap_err();\n    }\n\n    #[test]\n    fn cancel_key_data() {\n        let key = CancelKeyData {\n            backend_pid: -1817212860,\n            cancel_key: -1183897012,\n        };\n        assert_eq!(format!(\"{key}\"), \"CancelKeyData(93af8844b96f2a4c)\");\n    }\n}\n"
  },
  {
    "path": "libs/proxy/README.md",
    "content": "This directory contains libraries that are specific for proxy.\n\nCurrently, it contains a signficant fork/refactoring of rust-postgres that no longer reflects the API\nof the original library. Since it was so significant, it made sense to upgrade it to it's own set of libraries.\n\nProxy needs unique access to the protocol, which explains why such heavy modifications were necessary.\n"
  },
  {
    "path": "libs/proxy/json/Cargo.toml",
    "content": "[package]\nname = \"json\"\nversion = \"0.1.0\"\nedition.workspace = true\nlicense.workspace = true\n\n[dependencies]\nryu = \"1\"\nitoa = \"1\"\n\n[dev-dependencies]\nfutures = \"0.3\"\n"
  },
  {
    "path": "libs/proxy/json/src/lib.rs",
    "content": "//! A JSON serialization lib, designed for more flexibility than `serde_json` offers.\n//!\n//! Features:\n//!\n//! ## Dynamic construction\n//!\n//! Sometimes you have dynamic values you want to serialize, that are not already in a serde-aware model like a struct or a Vec etc.\n//! To achieve this with serde, you need to implement a lot of different traits on a lot of different new-types.\n//! Because of this, it's often easier to give-in and pull all the data into a serde-aware model (`serde_json::Value` or some intermediate struct),\n//! but that is often not very efficient.\n//!\n//! This crate allows full control over the JSON encoding without needing to implement any extra traits. Just call the\n//! relevant functions, and it will guarantee a correctly encoded JSON value.\n//!\n//! ## Async construction\n//!\n//! Similar to the above, sometimes the values arrive asynchronously. Often collecting those values in memory\n//! is more expensive than writing them as JSON, since the overheads of `Vec` and `String` is much higher, however\n//! there are exceptions.\n//!\n//! Serializing to JSON all in one go is also more CPU intensive and can cause lag spikes,\n//! whereas serializing values incrementally spreads out the CPU load and reduces lag.\n//!\n//! ## Examples\n//!\n//! To represent the following JSON as a compact string\n//!\n//! ```json\n//! {\n//!   \"results\": {\n//!     \"rows\": [\n//!       {\n//!         \"id\": 1,\n//!         \"value\": null\n//!       },\n//!       {\n//!         \"id\": 2,\n//!         \"value\": \"hello\"\n//!       }\n//!     ]\n//!   }\n//! }\n//! ```\n//!\n//! We can use the following code:\n//!\n//! ```\n//! // create the outer object\n//! let s = json::value_to_string!(|v| json::value_as_object!(|v| {\n//!     // create an entry with key \"results\" and start an object value associated with it.\n//!     let results = v.key(\"results\");\n//!     json::value_as_object!(|results| {\n//!         // create an entry with key \"rows\" and start an list value associated with it.\n//!         let rows = results.key(\"rows\");\n//!         json::value_as_list!(|rows| {\n//!             // create a list entry and start an object value associated with it.\n//!             let row = rows.entry();\n//!             json::value_as_object!(|row| {\n//!                 // add entry \"id\": 1\n//!                 row.entry(\"id\", 1);\n//!                 // add entry \"value\": null\n//!                 row.entry(\"value\", json::Null);\n//!             });\n//!\n//!             // create a list entry and start an object value associated with it.\n//!             let row = rows.entry();\n//!             json::value_as_object!(|row| {\n//!                 // add entry \"id\": 2\n//!                 row.entry(\"id\", 2);\n//!                 // add entry \"value\": \"hello\"\n//!                 row.entry(\"value\", \"hello\");\n//!             });\n//!         });\n//!     });\n//! }));\n//!\n//! assert_eq!(s, r#\"{\"results\":{\"rows\":[{\"id\":1,\"value\":null},{\"id\":2,\"value\":\"hello\"}]}}\"#);\n//! ```\n\nmod macros;\nmod str;\nmod value;\n\npub use value::{Null, ValueEncoder};\n\n#[must_use]\n/// Serialize a single json value.\npub struct ValueSer<'buf> {\n    buf: &'buf mut Vec<u8>,\n    start: usize,\n}\n\nimpl<'buf> ValueSer<'buf> {\n    /// Create a new json value serializer.\n    pub fn new(buf: &'buf mut Vec<u8>) -> Self {\n        Self { buf, start: 0 }\n    }\n\n    /// Borrow the underlying buffer\n    pub fn as_buffer(&self) -> &[u8] {\n        self.buf\n    }\n\n    #[inline]\n    pub fn value(self, e: impl ValueEncoder) {\n        e.encode(self);\n    }\n\n    /// Write raw bytes to the buf. This must be already JSON encoded.\n    #[inline]\n    pub fn write_raw_json(self, data: &[u8]) {\n        self.buf.extend_from_slice(data);\n        self.finish();\n    }\n\n    /// Start a new object serializer.\n    #[inline]\n    pub fn object(self) -> ObjectSer<'buf> {\n        ObjectSer::new(self)\n    }\n\n    /// Start a new list serializer.\n    #[inline]\n    pub fn list(self) -> ListSer<'buf> {\n        ListSer::new(self)\n    }\n\n    /// Finish the value ser.\n    #[inline]\n    fn finish(self) {\n        // don't trigger the drop handler which triggers a rollback.\n        // this won't cause memory leaks because `ValueSet` owns no allocations.\n        std::mem::forget(self);\n    }\n}\n\nimpl Drop for ValueSer<'_> {\n    fn drop(&mut self) {\n        self.buf.truncate(self.start);\n    }\n}\n\n#[must_use]\n/// Serialize a json object.\npub struct ObjectSer<'buf> {\n    value: ValueSer<'buf>,\n    start: usize,\n}\n\nimpl<'buf> ObjectSer<'buf> {\n    /// Start a new object serializer.\n    #[inline]\n    pub fn new(value: ValueSer<'buf>) -> Self {\n        value.buf.push(b'{');\n        let start = value.buf.len();\n        Self { value, start }\n    }\n\n    /// Borrow the underlying buffer\n    pub fn as_buffer(&self) -> &[u8] {\n        self.value.as_buffer()\n    }\n\n    /// Start a new object entry with the given string key, returning a [`ValueSer`] for the associated value.\n    #[inline]\n    pub fn key(&mut self, key: impl KeyEncoder) -> ValueSer<'_> {\n        key.write_key(self)\n    }\n\n    /// Write an entry (key-value pair) to the object.\n    #[inline]\n    pub fn entry(&mut self, key: impl KeyEncoder, val: impl ValueEncoder) {\n        self.key(key).value(val);\n    }\n\n    #[inline]\n    fn entry_inner(&mut self, f: impl FnOnce(&mut Vec<u8>)) -> ValueSer<'_> {\n        // track before the separator so we the value is rolled back it also removes the separator.\n        let start = self.value.buf.len();\n\n        // push separator if necessary\n        if self.value.buf.len() > self.start {\n            self.value.buf.push(b',');\n        }\n        // push key\n        f(self.value.buf);\n        // push value separator\n        self.value.buf.push(b':');\n\n        // return value writer.\n        ValueSer {\n            buf: self.value.buf,\n            start,\n        }\n    }\n\n    /// Reset the buffer back to before this object was started.\n    #[inline]\n    pub fn rollback(self) -> ValueSer<'buf> {\n        // Do not fully reset the value, only reset it to before the `{`.\n        // This ensures any `,` before this value are not clobbered.\n        self.value.buf.truncate(self.start - 1);\n        self.value\n    }\n\n    /// Finish the object ser.\n    #[inline]\n    pub fn finish(self) {\n        self.value.buf.push(b'}');\n        self.value.finish();\n    }\n}\n\npub trait KeyEncoder {\n    fn write_key<'a>(self, obj: &'a mut ObjectSer) -> ValueSer<'a>;\n}\n\n#[must_use]\n/// Serialize a json object.\npub struct ListSer<'buf> {\n    value: ValueSer<'buf>,\n    start: usize,\n}\n\nimpl<'buf> ListSer<'buf> {\n    /// Start a new list serializer.\n    #[inline]\n    pub fn new(value: ValueSer<'buf>) -> Self {\n        value.buf.push(b'[');\n        let start = value.buf.len();\n        Self { value, start }\n    }\n\n    /// Borrow the underlying buffer\n    pub fn as_buffer(&self) -> &[u8] {\n        self.value.as_buffer()\n    }\n\n    /// Write an value to the list.\n    #[inline]\n    pub fn push(&mut self, val: impl ValueEncoder) {\n        self.entry().value(val);\n    }\n\n    /// Start a new value entry in this list.\n    #[inline]\n    pub fn entry(&mut self) -> ValueSer<'_> {\n        // track before the separator so we the value is rolled back it also removes the separator.\n        let start = self.value.buf.len();\n\n        // push separator if necessary\n        if self.value.buf.len() > self.start {\n            self.value.buf.push(b',');\n        }\n\n        // return value writer.\n        ValueSer {\n            buf: self.value.buf,\n            start,\n        }\n    }\n\n    /// Reset the buffer back to before this object was started.\n    #[inline]\n    pub fn rollback(self) -> ValueSer<'buf> {\n        // Do not fully reset the value, only reset it to before the `[`.\n        // This ensures any `,` before this value are not clobbered.\n        self.value.buf.truncate(self.start - 1);\n        self.value\n    }\n\n    /// Finish the object ser.\n    #[inline]\n    pub fn finish(self) {\n        self.value.buf.push(b']');\n        self.value.finish();\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use crate::{Null, ValueSer};\n\n    #[test]\n    fn object() {\n        let mut buf = vec![];\n        let mut object = ValueSer::new(&mut buf).object();\n        object.entry(\"foo\", \"bar\");\n        object.entry(\"baz\", Null);\n        object.finish();\n\n        assert_eq!(buf, br#\"{\"foo\":\"bar\",\"baz\":null}\"#);\n    }\n\n    #[test]\n    fn list() {\n        let mut buf = vec![];\n        let mut list = ValueSer::new(&mut buf).list();\n        list.entry().value(\"bar\");\n        list.entry().value(Null);\n        list.finish();\n\n        assert_eq!(buf, br#\"[\"bar\",null]\"#);\n    }\n\n    #[test]\n    fn object_macro() {\n        let res = crate::value_to_string!(|obj| {\n            crate::value_as_object!(|obj| {\n                obj.entry(\"foo\", \"bar\");\n                obj.entry(\"baz\", Null);\n            })\n        });\n\n        assert_eq!(res, r#\"{\"foo\":\"bar\",\"baz\":null}\"#);\n    }\n\n    #[test]\n    fn list_macro() {\n        let res = crate::value_to_string!(|list| {\n            crate::value_as_list!(|list| {\n                list.entry().value(\"bar\");\n                list.entry().value(Null);\n            })\n        });\n\n        assert_eq!(res, r#\"[\"bar\",null]\"#);\n    }\n\n    #[test]\n    fn rollback_on_drop() {\n        let res = crate::value_to_string!(|list| {\n            crate::value_as_list!(|list| {\n                list.entry().value(\"bar\");\n\n                'cancel: {\n                    let nested_list = list.entry();\n                    crate::value_as_list!(|nested_list| {\n                        nested_list.entry().value(1);\n\n                        assert_eq!(nested_list.as_buffer(), br#\"[\"bar\",[1\"#);\n                        if true {\n                            break 'cancel;\n                        }\n                    })\n                }\n\n                assert_eq!(list.as_buffer(), br#\"[\"bar\"\"#);\n\n                list.entry().value(Null);\n            })\n        });\n\n        assert_eq!(res, r#\"[\"bar\",null]\"#);\n    }\n\n    #[test]\n    fn rollback_object() {\n        let res = crate::value_to_string!(|obj| {\n            crate::value_as_object!(|obj| {\n                let entry = obj.key(\"1\");\n                entry.value(1_i32);\n\n                let entry = obj.key(\"2\");\n                let entry = {\n                    let mut nested_obj = entry.object();\n                    nested_obj.entry(\"foo\", \"bar\");\n                    nested_obj.rollback()\n                };\n\n                entry.value(2_i32);\n            })\n        });\n\n        assert_eq!(res, r#\"{\"1\":1,\"2\":2}\"#);\n    }\n\n    #[test]\n    fn rollback_list() {\n        let res = crate::value_to_string!(|list| {\n            crate::value_as_list!(|list| {\n                let entry = list.entry();\n                entry.value(1_i32);\n\n                let entry = list.entry();\n                let entry = {\n                    let mut nested_list = entry.list();\n                    nested_list.push(\"foo\");\n                    nested_list.rollback()\n                };\n\n                entry.value(2_i32);\n            })\n        });\n\n        assert_eq!(res, r#\"[1,2]\"#);\n    }\n\n    #[test]\n    fn string_escaping() {\n        let mut buf = vec![];\n        let mut object = ValueSer::new(&mut buf).object();\n\n        let key = \"hello\";\n        let value = \"\\n world\";\n\n        object.entry(format_args!(\"{key:?}\"), value);\n        object.finish();\n\n        assert_eq!(buf, br#\"{\"\\\"hello\\\"\":\"\\n world\"}\"#);\n    }\n}\n"
  },
  {
    "path": "libs/proxy/json/src/macros.rs",
    "content": "//! # Examples\n//!\n//! ```\n//! use futures::{StreamExt, TryStream, TryStreamExt};\n//!\n//! async fn stream_to_json_list<S, T, E>(mut s: S) -> Result<String, E>\n//! where\n//!     S: TryStream<Ok = T, Error = E> + Unpin,\n//!     T: json::ValueEncoder\n//! {\n//!     Ok(json::value_to_string!(|val| json::value_as_list!(|val| {\n//!         // note how we can use `.await` and `?` in here.\n//!         while let Some(value) = s.try_next().await? {\n//!             val.push(value);\n//!         }\n//!     })))\n//! }\n//!\n//! let stream = futures::stream::iter([1, 2, 3]).map(Ok::<i32, ()>);\n//! let json_string = futures::executor::block_on(stream_to_json_list(stream)).unwrap();\n//! assert_eq!(json_string, \"[1,2,3]\");\n//! ```\n\n/// A helper to create a new JSON vec.\n///\n/// Implemented as a macro to preserve all control flow.\n#[macro_export]\nmacro_rules! value_to_vec {\n    (|$val:ident| $body:expr) => {{\n        let mut buf = vec![];\n        let $val = $crate::ValueSer::new(&mut buf);\n        let _: () = $body;\n        buf\n    }};\n}\n\n/// A helper to create a new JSON string.\n///\n/// Implemented as a macro to preserve all control flow.\n#[macro_export]\nmacro_rules! value_to_string {\n    (|$val:ident| $body:expr) => {{\n        ::std::string::String::from_utf8($crate::value_to_vec!(|$val| $body))\n            .expect(\"json should be valid utf8\")\n    }};\n}\n\n/// A helper that ensures the [`ObjectSer::finish`](crate::ObjectSer::finish) method is called on completion.\n///\n/// Consumes `$val` and assigns it as an [`ObjectSer`](crate::ObjectSer) serializer.\n/// The serializer is only 'finished' if the body completes.\n/// The serializer is rolled back if `break`/`return` escapes the body.\n///\n/// Implemented as a macro to preserve all control flow.\n#[macro_export]\nmacro_rules! value_as_object {\n    (|$val:ident| $body:expr) => {{\n        let mut obj = $crate::ObjectSer::new($val);\n\n        let $val = &mut obj;\n        let res = $body;\n\n        obj.finish();\n        res\n    }};\n}\n\n/// A helper that ensures the [`ListSer::finish`](crate::ListSer::finish) method is called on completion.\n///\n/// Consumes `$val` and assigns it as an [`ListSer`](crate::ListSer) serializer.\n/// The serializer is only 'finished' if the body completes.\n/// The serializer is rolled back if `break`/`return` escapes the body.\n///\n/// Implemented as a macro to preserve all control flow.\n#[macro_export]\nmacro_rules! value_as_list {\n    (|$val:ident| $body:expr) => {{\n        let mut list = $crate::ListSer::new($val);\n\n        let $val = &mut list;\n        let res = $body;\n\n        list.finish();\n        res\n    }};\n}\n"
  },
  {
    "path": "libs/proxy/json/src/str.rs",
    "content": "//! Helpers for serializing escaped strings.\n//!\n//! ## License\n//!\n//! <https://github.com/serde-rs/json/blob/c1826ebcccb1a520389c6b78ad3da15db279220d/src/ser.rs#L1514-L1552>\n//! <https://github.com/serde-rs/json/blob/c1826ebcccb1a520389c6b78ad3da15db279220d/src/ser.rs#L2081-L2157>\n//! Licensed by David Tolnay under MIT or Apache-2.0.\n//!\n//! With modifications by Conrad Ludgate on behalf of Databricks.\n\nuse std::fmt::{self, Write};\n\n/// Represents a character escape code in a type-safe manner.\npub enum CharEscape {\n    /// An escaped quote `\"`\n    Quote,\n    /// An escaped reverse solidus `\\`\n    ReverseSolidus,\n    // /// An escaped solidus `/`\n    // Solidus,\n    /// An escaped backspace character (usually escaped as `\\b`)\n    Backspace,\n    /// An escaped form feed character (usually escaped as `\\f`)\n    FormFeed,\n    /// An escaped line feed character (usually escaped as `\\n`)\n    LineFeed,\n    /// An escaped carriage return character (usually escaped as `\\r`)\n    CarriageReturn,\n    /// An escaped tab character (usually escaped as `\\t`)\n    Tab,\n    /// An escaped ASCII plane control character (usually escaped as\n    /// `\\u00XX` where `XX` are two hex characters)\n    AsciiControl(u8),\n}\n\nimpl CharEscape {\n    #[inline]\n    fn from_escape_table(escape: u8, byte: u8) -> CharEscape {\n        match escape {\n            self::BB => CharEscape::Backspace,\n            self::TT => CharEscape::Tab,\n            self::NN => CharEscape::LineFeed,\n            self::FF => CharEscape::FormFeed,\n            self::RR => CharEscape::CarriageReturn,\n            self::QU => CharEscape::Quote,\n            self::BS => CharEscape::ReverseSolidus,\n            self::UU => CharEscape::AsciiControl(byte),\n            _ => unreachable!(),\n        }\n    }\n}\n\npub(crate) fn format_escaped_str(writer: &mut Vec<u8>, value: &str) {\n    writer.reserve(2 + value.len());\n\n    writer.push(b'\"');\n\n    let rest = format_escaped_str_contents(writer, value);\n    writer.extend_from_slice(rest);\n\n    writer.push(b'\"');\n}\n\npub(crate) fn format_escaped_fmt(writer: &mut Vec<u8>, args: fmt::Arguments) {\n    writer.push(b'\"');\n\n    Collect { buf: writer }\n        .write_fmt(args)\n        .expect(\"formatting should not error\");\n\n    writer.push(b'\"');\n}\n\nstruct Collect<'buf> {\n    buf: &'buf mut Vec<u8>,\n}\n\nimpl fmt::Write for Collect<'_> {\n    fn write_str(&mut self, s: &str) -> fmt::Result {\n        let last = format_escaped_str_contents(self.buf, s);\n        self.buf.extend(last);\n        Ok(())\n    }\n}\n\n// writes any escape sequences, and returns the suffix still needed to be written.\nfn format_escaped_str_contents<'a>(writer: &mut Vec<u8>, value: &'a str) -> &'a [u8] {\n    let bytes = value.as_bytes();\n\n    let mut start = 0;\n\n    for (i, &byte) in bytes.iter().enumerate() {\n        let escape = ESCAPE[byte as usize];\n        if escape == 0 {\n            continue;\n        }\n\n        writer.extend_from_slice(&bytes[start..i]);\n\n        let char_escape = CharEscape::from_escape_table(escape, byte);\n        write_char_escape(writer, char_escape);\n\n        start = i + 1;\n    }\n\n    &bytes[start..]\n}\n\nconst BB: u8 = b'b'; // \\x08\nconst TT: u8 = b't'; // \\x09\nconst NN: u8 = b'n'; // \\x0A\nconst FF: u8 = b'f'; // \\x0C\nconst RR: u8 = b'r'; // \\x0D\nconst QU: u8 = b'\"'; // \\x22\nconst BS: u8 = b'\\\\'; // \\x5C\nconst UU: u8 = b'u'; // \\x00...\\x1F except the ones above\nconst __: u8 = 0;\n\n// Lookup table of escape sequences. A value of b'x' at index i means that byte\n// i is escaped as \"\\x\" in JSON. A value of 0 means that byte i is not escaped.\nstatic ESCAPE: [u8; 256] = [\n    //   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F\n    UU, UU, UU, UU, UU, UU, UU, UU, BB, TT, NN, UU, FF, RR, UU, UU, // 0\n    UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, // 1\n    __, __, QU, __, __, __, __, __, __, __, __, __, __, __, __, __, // 2\n    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 3\n    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 4\n    __, __, __, __, __, __, __, __, __, __, __, __, BS, __, __, __, // 5\n    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 6\n    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 7\n    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 8\n    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 9\n    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // A\n    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // B\n    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // C\n    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // D\n    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // E\n    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // F\n];\n\nfn write_char_escape(writer: &mut Vec<u8>, char_escape: CharEscape) {\n    let s = match char_escape {\n        CharEscape::Quote => b\"\\\\\\\"\",\n        CharEscape::ReverseSolidus => b\"\\\\\\\\\",\n        // CharEscape::Solidus => b\"\\\\/\",\n        CharEscape::Backspace => b\"\\\\b\",\n        CharEscape::FormFeed => b\"\\\\f\",\n        CharEscape::LineFeed => b\"\\\\n\",\n        CharEscape::CarriageReturn => b\"\\\\r\",\n        CharEscape::Tab => b\"\\\\t\",\n        CharEscape::AsciiControl(byte) => {\n            static HEX_DIGITS: [u8; 16] = *b\"0123456789abcdef\";\n            let bytes = &[\n                b'\\\\',\n                b'u',\n                b'0',\n                b'0',\n                HEX_DIGITS[(byte >> 4) as usize],\n                HEX_DIGITS[(byte & 0xF) as usize],\n            ];\n            return writer.extend_from_slice(bytes);\n        }\n    };\n\n    writer.extend_from_slice(s);\n}\n"
  },
  {
    "path": "libs/proxy/json/src/value.rs",
    "content": "use core::fmt;\nuse std::collections::{BTreeMap, HashMap};\n\nuse crate::str::{format_escaped_fmt, format_escaped_str};\nuse crate::{KeyEncoder, ObjectSer, ValueSer, value_as_list, value_as_object};\n\n/// Write a value to the underlying json representation.\npub trait ValueEncoder {\n    fn encode(self, v: ValueSer<'_>);\n}\n\npub(crate) fn write_int(x: impl itoa::Integer, b: &mut Vec<u8>) {\n    b.extend_from_slice(itoa::Buffer::new().format(x).as_bytes());\n}\n\npub(crate) fn write_float(x: impl ryu::Float, b: &mut Vec<u8>) {\n    b.extend_from_slice(ryu::Buffer::new().format(x).as_bytes());\n}\n\nimpl<T: Copy + ValueEncoder> ValueEncoder for &T {\n    #[inline]\n    fn encode(self, v: ValueSer<'_>) {\n        T::encode(*self, v);\n    }\n}\n\nimpl ValueEncoder for &str {\n    #[inline]\n    fn encode(self, v: ValueSer<'_>) {\n        format_escaped_str(v.buf, self);\n        v.finish();\n    }\n}\n\nimpl ValueEncoder for fmt::Arguments<'_> {\n    #[inline]\n    fn encode(self, v: ValueSer<'_>) {\n        if let Some(s) = self.as_str() {\n            format_escaped_str(v.buf, s);\n        } else {\n            format_escaped_fmt(v.buf, self);\n        }\n        v.finish();\n    }\n}\n\nmacro_rules! int {\n    [$($t:ty),*] => {\n        $(\n            impl ValueEncoder for $t {\n                #[inline]\n                fn encode(self, v: ValueSer<'_>) {\n                    write_int(self, v.buf);\n                    v.finish();\n                }\n            }\n        )*\n    };\n}\n\nint![u8, u16, u32, u64, usize, u128];\nint![i8, i16, i32, i64, isize, i128];\n\nmacro_rules! float {\n    [$($t:ty),*] => {\n        $(\n            impl ValueEncoder for $t {\n                #[inline]\n                fn encode(self, v: ValueSer<'_>) {\n                    write_float(self, v.buf);\n                    v.finish();\n                }\n            }\n        )*\n    };\n}\n\nfloat![f32, f64];\n\nimpl ValueEncoder for bool {\n    #[inline]\n    fn encode(self, v: ValueSer<'_>) {\n        v.write_raw_json(if self { b\"true\" } else { b\"false\" });\n    }\n}\n\nimpl<T: ValueEncoder> ValueEncoder for Option<T> {\n    #[inline]\n    fn encode(self, v: ValueSer<'_>) {\n        match self {\n            Some(value) => value.encode(v),\n            None => Null.encode(v),\n        }\n    }\n}\n\nimpl KeyEncoder for &str {\n    #[inline]\n    fn write_key<'a>(self, obj: &'a mut ObjectSer) -> ValueSer<'a> {\n        let obj = &mut *obj;\n        obj.entry_inner(|b| format_escaped_str(b, self))\n    }\n}\n\nimpl KeyEncoder for fmt::Arguments<'_> {\n    #[inline]\n    fn write_key<'a>(self, obj: &'a mut ObjectSer) -> ValueSer<'a> {\n        if let Some(key) = self.as_str() {\n            obj.entry_inner(|b| format_escaped_str(b, key))\n        } else {\n            obj.entry_inner(|b| format_escaped_fmt(b, self))\n        }\n    }\n}\n\n/// Represents the JSON null value.\npub struct Null;\n\nimpl ValueEncoder for Null {\n    #[inline]\n    fn encode(self, v: ValueSer<'_>) {\n        v.write_raw_json(b\"null\");\n    }\n}\n\nimpl<T: ValueEncoder> ValueEncoder for Vec<T> {\n    #[inline]\n    fn encode(self, v: ValueSer<'_>) {\n        value_as_list!(|v| {\n            for t in self {\n                v.entry().value(t);\n            }\n        });\n    }\n}\n\nimpl<T: Copy + ValueEncoder> ValueEncoder for &[T] {\n    #[inline]\n    fn encode(self, v: ValueSer<'_>) {\n        value_as_list!(|v| {\n            for t in self {\n                v.entry().value(t);\n            }\n        });\n    }\n}\n\nimpl<K: KeyEncoder, V: ValueEncoder, S> ValueEncoder for HashMap<K, V, S> {\n    #[inline]\n    fn encode(self, o: ValueSer<'_>) {\n        value_as_object!(|o| {\n            for (k, v) in self {\n                o.entry(k, v);\n            }\n        });\n    }\n}\n\nimpl<K: KeyEncoder, V: ValueEncoder> ValueEncoder for BTreeMap<K, V> {\n    #[inline]\n    fn encode(self, o: ValueSer<'_>) {\n        value_as_object!(|o| {\n            for (k, v) in self {\n                o.entry(k, v);\n            }\n        });\n    }\n}\n"
  },
  {
    "path": "libs/proxy/postgres-protocol2/Cargo.toml",
    "content": "[package]\nname = \"postgres-protocol2\"\nversion = \"0.1.0\"\nedition = \"2024\"\nlicense = \"MIT/Apache-2.0\"\n\n[dependencies]\nbase64.workspace = true\nbyteorder.workspace = true\nbytes.workspace = true\nfallible-iterator.workspace = true\nhmac.workspace = true\nmemchr = \"2.0\"\nrand.workspace = true\nsha2.workspace = true\nstringprep = \"0.1\"\ntokio = { workspace = true, features = [\"rt\"] }\n\n[dev-dependencies]\ntokio = { workspace = true, features = [\"full\"] }\n"
  },
  {
    "path": "libs/proxy/postgres-protocol2/src/authentication/mod.rs",
    "content": "//! Authentication protocol support.\npub mod sasl;\n"
  },
  {
    "path": "libs/proxy/postgres-protocol2/src/authentication/sasl.rs",
    "content": "//! SASL-based authentication support.\n\nuse std::fmt::Write;\nuse std::{io, iter, mem, str};\n\nuse base64::Engine as _;\nuse base64::prelude::BASE64_STANDARD;\nuse hmac::{Hmac, Mac};\nuse rand::{self, Rng};\nuse sha2::digest::FixedOutput;\nuse sha2::{Digest, Sha256};\nuse tokio::task::yield_now;\n\nconst NONCE_LENGTH: usize = 24;\n\n/// The identifier of the SCRAM-SHA-256 SASL authentication mechanism.\npub const SCRAM_SHA_256: &str = \"SCRAM-SHA-256\";\n/// The identifier of the SCRAM-SHA-256-PLUS SASL authentication mechanism.\npub const SCRAM_SHA_256_PLUS: &str = \"SCRAM-SHA-256-PLUS\";\n\n// since postgres passwords are not required to exclude saslprep-prohibited\n// characters or even be valid UTF8, we run saslprep if possible and otherwise\n// return the raw password.\nfn normalize(pass: &[u8]) -> Vec<u8> {\n    let pass = match str::from_utf8(pass) {\n        Ok(pass) => pass,\n        Err(_) => return pass.to_vec(),\n    };\n\n    match stringprep::saslprep(pass) {\n        Ok(pass) => pass.into_owned().into_bytes(),\n        Err(_) => pass.as_bytes().to_vec(),\n    }\n}\n\npub(crate) async fn hi(str: &[u8], salt: &[u8], iterations: u32) -> [u8; 32] {\n    let mut hmac =\n        Hmac::<Sha256>::new_from_slice(str).expect(\"HMAC is able to accept all key sizes\");\n    hmac.update(salt);\n    hmac.update(&[0, 0, 0, 1]);\n    let mut prev = hmac.finalize().into_bytes();\n\n    let mut hi = prev;\n\n    for i in 1..iterations {\n        let mut hmac = Hmac::<Sha256>::new_from_slice(str).expect(\"already checked above\");\n        hmac.update(&prev);\n        prev = hmac.finalize().into_bytes();\n\n        for (hi, prev) in hi.iter_mut().zip(prev) {\n            *hi ^= prev;\n        }\n        // yield every ~250us\n        // hopefully reduces tail latencies\n        if i.is_multiple_of(1024) {\n            yield_now().await\n        }\n    }\n\n    hi.into()\n}\n\nenum ChannelBindingInner {\n    Unrequested,\n    Unsupported,\n    TlsServerEndPoint(Vec<u8>),\n}\n\n/// The channel binding configuration for a SCRAM authentication exchange.\npub struct ChannelBinding(ChannelBindingInner);\n\nimpl ChannelBinding {\n    /// The server did not request channel binding.\n    pub fn unrequested() -> ChannelBinding {\n        ChannelBinding(ChannelBindingInner::Unrequested)\n    }\n\n    /// The server requested channel binding but the client is unable to provide it.\n    pub fn unsupported() -> ChannelBinding {\n        ChannelBinding(ChannelBindingInner::Unsupported)\n    }\n\n    /// The server requested channel binding and the client will use the `tls-server-end-point`\n    /// method.\n    pub fn tls_server_end_point(signature: Vec<u8>) -> ChannelBinding {\n        ChannelBinding(ChannelBindingInner::TlsServerEndPoint(signature))\n    }\n\n    fn gs2_header(&self) -> &'static str {\n        match self.0 {\n            ChannelBindingInner::Unrequested => \"y,,\",\n            ChannelBindingInner::Unsupported => \"n,,\",\n            ChannelBindingInner::TlsServerEndPoint(_) => \"p=tls-server-end-point,,\",\n        }\n    }\n\n    fn cbind_data(&self) -> &[u8] {\n        match self.0 {\n            ChannelBindingInner::Unrequested | ChannelBindingInner::Unsupported => &[],\n            ChannelBindingInner::TlsServerEndPoint(ref buf) => buf,\n        }\n    }\n}\n\n/// A pair of keys for the SCRAM-SHA-256 mechanism.\n/// See <https://datatracker.ietf.org/doc/html/rfc5802#section-3> for details.\n#[derive(Debug, Clone, Copy, PartialEq, Eq)]\npub struct ScramKeys<const N: usize> {\n    /// Used by server to authenticate client.\n    pub client_key: [u8; N],\n    /// Used by client to verify server's signature.\n    pub server_key: [u8; N],\n}\n\n/// Password or keys which were derived from it.\nenum Credentials<const N: usize> {\n    /// A regular password as a vector of bytes.\n    Password(Vec<u8>),\n    /// A precomputed pair of keys.\n    Keys(ScramKeys<N>),\n}\n\nenum State {\n    Update {\n        nonce: String,\n        password: Credentials<32>,\n        channel_binding: ChannelBinding,\n    },\n    Finish {\n        server_key: [u8; 32],\n        auth_message: String,\n    },\n    Done,\n}\n\n/// A type which handles the client side of the SCRAM-SHA-256/SCRAM-SHA-256-PLUS authentication\n/// process.\n///\n/// During the authentication process, if the backend sends an `AuthenticationSASL` message which\n/// includes `SCRAM-SHA-256` as an authentication mechanism, this type can be used.\n///\n/// After a `ScramSha256` is constructed, the buffer returned by the `message()` method should be\n/// sent to the backend in a `SASLInitialResponse` message along with the mechanism name.\n///\n/// The server will reply with an `AuthenticationSASLContinue` message. Its contents should be\n/// passed to the `update()` method, after which the buffer returned by the `message()` method\n/// should be sent to the backend in a `SASLResponse` message.\n///\n/// The server will reply with an `AuthenticationSASLFinal` message. Its contents should be passed\n/// to the `finish()` method, after which the authentication process is complete.\npub struct ScramSha256 {\n    message: String,\n    state: State,\n}\n\nfn nonce() -> String {\n    // rand 0.5's ThreadRng is cryptographically secure\n    let mut rng = rand::rng();\n    (0..NONCE_LENGTH)\n        .map(|_| {\n            let mut v = rng.random_range(0x21u8..0x7e);\n            if v == 0x2c {\n                v = 0x7e\n            }\n            v as char\n        })\n        .collect()\n}\n\nimpl ScramSha256 {\n    /// Constructs a new instance which will use the provided password for authentication.\n    pub fn new(password: &[u8], channel_binding: ChannelBinding) -> ScramSha256 {\n        let password = Credentials::Password(normalize(password));\n        ScramSha256::new_inner(password, channel_binding, nonce())\n    }\n\n    /// Constructs a new instance which will use the provided key pair for authentication.\n    pub fn new_with_keys(keys: ScramKeys<32>, channel_binding: ChannelBinding) -> ScramSha256 {\n        let password = Credentials::Keys(keys);\n        ScramSha256::new_inner(password, channel_binding, nonce())\n    }\n\n    fn new_inner(\n        password: Credentials<32>,\n        channel_binding: ChannelBinding,\n        nonce: String,\n    ) -> ScramSha256 {\n        ScramSha256 {\n            message: format!(\"{}n=,r={}\", channel_binding.gs2_header(), nonce),\n            state: State::Update {\n                nonce,\n                password,\n                channel_binding,\n            },\n        }\n    }\n\n    /// Returns the message which should be sent to the backend in an `SASLResponse` message.\n    pub fn message(&self) -> &[u8] {\n        if let State::Done = self.state {\n            panic!(\"invalid SCRAM state\");\n        }\n        self.message.as_bytes()\n    }\n\n    /// Updates the state machine with the response from the backend.\n    ///\n    /// This should be called when an `AuthenticationSASLContinue` message is received.\n    pub async fn update(&mut self, message: &[u8]) -> io::Result<()> {\n        let (client_nonce, password, channel_binding) =\n            match mem::replace(&mut self.state, State::Done) {\n                State::Update {\n                    nonce,\n                    password,\n                    channel_binding,\n                } => (nonce, password, channel_binding),\n                _ => return Err(io::Error::other(\"invalid SCRAM state\")),\n            };\n\n        let message =\n            str::from_utf8(message).map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))?;\n\n        let parsed = Parser::new(message).server_first_message()?;\n\n        if !parsed.nonce.starts_with(&client_nonce) {\n            return Err(io::Error::new(io::ErrorKind::InvalidInput, \"invalid nonce\"));\n        }\n\n        let (client_key, server_key) = match password {\n            Credentials::Password(password) => {\n                let salt = match BASE64_STANDARD.decode(parsed.salt) {\n                    Ok(salt) => salt,\n                    Err(e) => return Err(io::Error::new(io::ErrorKind::InvalidInput, e)),\n                };\n\n                let salted_password = hi(&password, &salt, parsed.iteration_count).await;\n\n                let make_key = |name| {\n                    let mut hmac = Hmac::<Sha256>::new_from_slice(&salted_password)\n                        .expect(\"HMAC is able to accept all key sizes\");\n                    hmac.update(name);\n\n                    let mut key = [0u8; 32];\n                    key.copy_from_slice(hmac.finalize().into_bytes().as_slice());\n                    key\n                };\n\n                (make_key(b\"Client Key\"), make_key(b\"Server Key\"))\n            }\n            Credentials::Keys(keys) => (keys.client_key, keys.server_key),\n        };\n\n        let mut hash = Sha256::default();\n        hash.update(client_key);\n        let stored_key = hash.finalize_fixed();\n\n        let mut cbind_input = vec![];\n        cbind_input.extend(channel_binding.gs2_header().as_bytes());\n        cbind_input.extend(channel_binding.cbind_data());\n        let cbind_input = BASE64_STANDARD.encode(&cbind_input);\n\n        self.message.clear();\n        write!(&mut self.message, \"c={},r={}\", cbind_input, parsed.nonce).unwrap();\n\n        let auth_message = format!(\"n=,r={},{},{}\", client_nonce, message, self.message);\n\n        let mut hmac = Hmac::<Sha256>::new_from_slice(&stored_key)\n            .expect(\"HMAC is able to accept all key sizes\");\n        hmac.update(auth_message.as_bytes());\n        let client_signature = hmac.finalize().into_bytes();\n\n        let mut client_proof = client_key;\n        for (proof, signature) in client_proof.iter_mut().zip(client_signature) {\n            *proof ^= signature;\n        }\n\n        write!(\n            &mut self.message,\n            \",p={}\",\n            BASE64_STANDARD.encode(client_proof)\n        )\n        .unwrap();\n\n        self.state = State::Finish {\n            server_key,\n            auth_message,\n        };\n        Ok(())\n    }\n\n    /// Finalizes the authentication process.\n    ///\n    /// This should be called when the backend sends an `AuthenticationSASLFinal` message.\n    /// Authentication has only succeeded if this method returns `Ok(())`.\n    pub fn finish(&mut self, message: &[u8]) -> io::Result<()> {\n        let (server_key, auth_message) = match mem::replace(&mut self.state, State::Done) {\n            State::Finish {\n                server_key,\n                auth_message,\n            } => (server_key, auth_message),\n            _ => return Err(io::Error::other(\"invalid SCRAM state\")),\n        };\n\n        let message =\n            str::from_utf8(message).map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))?;\n\n        let parsed = Parser::new(message).server_final_message()?;\n\n        let verifier = match parsed {\n            ServerFinalMessage::Error(e) => {\n                return Err(io::Error::other(format!(\"SCRAM error: {e}\")));\n            }\n            ServerFinalMessage::Verifier(verifier) => verifier,\n        };\n\n        let verifier = match BASE64_STANDARD.decode(verifier) {\n            Ok(verifier) => verifier,\n            Err(e) => return Err(io::Error::new(io::ErrorKind::InvalidInput, e)),\n        };\n\n        let mut hmac = Hmac::<Sha256>::new_from_slice(&server_key)\n            .expect(\"HMAC is able to accept all key sizes\");\n        hmac.update(auth_message.as_bytes());\n        hmac.verify_slice(&verifier)\n            .map_err(|_| io::Error::new(io::ErrorKind::InvalidInput, \"SCRAM verification error\"))\n    }\n}\n\nstruct Parser<'a> {\n    s: &'a str,\n    it: iter::Peekable<str::CharIndices<'a>>,\n}\n\nimpl<'a> Parser<'a> {\n    fn new(s: &'a str) -> Parser<'a> {\n        Parser {\n            s,\n            it: s.char_indices().peekable(),\n        }\n    }\n\n    fn eat(&mut self, target: char) -> io::Result<()> {\n        match self.it.next() {\n            Some((_, c)) if c == target => Ok(()),\n            Some((i, c)) => {\n                let m =\n                    format!(\"unexpected character at byte {i}: expected `{target}` but got `{c}\");\n                Err(io::Error::new(io::ErrorKind::InvalidInput, m))\n            }\n            None => Err(io::Error::new(\n                io::ErrorKind::UnexpectedEof,\n                \"unexpected EOF\",\n            )),\n        }\n    }\n\n    fn take_while<F>(&mut self, f: F) -> io::Result<&'a str>\n    where\n        F: Fn(char) -> bool,\n    {\n        let start = match self.it.peek() {\n            Some(&(i, _)) => i,\n            None => return Ok(\"\"),\n        };\n\n        loop {\n            match self.it.peek() {\n                Some(&(_, c)) if f(c) => {\n                    self.it.next();\n                }\n                Some(&(i, _)) => return Ok(&self.s[start..i]),\n                None => return Ok(&self.s[start..]),\n            }\n        }\n    }\n\n    fn printable(&mut self) -> io::Result<&'a str> {\n        self.take_while(|c| matches!(c, '\\x21'..='\\x2b' | '\\x2d'..='\\x7e'))\n    }\n\n    fn nonce(&mut self) -> io::Result<&'a str> {\n        self.eat('r')?;\n        self.eat('=')?;\n        self.printable()\n    }\n\n    fn base64(&mut self) -> io::Result<&'a str> {\n        self.take_while(|c| matches!(c, 'a'..='z' | 'A'..='Z' | '0'..='9' | '/' | '+' | '='))\n    }\n\n    fn salt(&mut self) -> io::Result<&'a str> {\n        self.eat('s')?;\n        self.eat('=')?;\n        self.base64()\n    }\n\n    fn posit_number(&mut self) -> io::Result<u32> {\n        let n = self.take_while(|c| c.is_ascii_digit())?;\n        n.parse()\n            .map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))\n    }\n\n    fn iteration_count(&mut self) -> io::Result<u32> {\n        self.eat('i')?;\n        self.eat('=')?;\n        self.posit_number()\n    }\n\n    fn eof(&mut self) -> io::Result<()> {\n        match self.it.peek() {\n            Some(&(i, _)) => Err(io::Error::new(\n                io::ErrorKind::InvalidInput,\n                format!(\"unexpected trailing data at byte {i}\"),\n            )),\n            None => Ok(()),\n        }\n    }\n\n    fn server_first_message(&mut self) -> io::Result<ServerFirstMessage<'a>> {\n        let nonce = self.nonce()?;\n        self.eat(',')?;\n        let salt = self.salt()?;\n        self.eat(',')?;\n        let iteration_count = self.iteration_count()?;\n        self.eof()?;\n\n        Ok(ServerFirstMessage {\n            nonce,\n            salt,\n            iteration_count,\n        })\n    }\n\n    fn value(&mut self) -> io::Result<&'a str> {\n        self.take_while(|c| matches!(c, '\\0' | '=' | ','))\n    }\n\n    fn server_error(&mut self) -> io::Result<Option<&'a str>> {\n        match self.it.peek() {\n            Some(&(_, 'e')) => {}\n            _ => return Ok(None),\n        }\n\n        self.eat('e')?;\n        self.eat('=')?;\n        self.value().map(Some)\n    }\n\n    fn verifier(&mut self) -> io::Result<&'a str> {\n        self.eat('v')?;\n        self.eat('=')?;\n        self.base64()\n    }\n\n    fn server_final_message(&mut self) -> io::Result<ServerFinalMessage<'a>> {\n        let message = match self.server_error()? {\n            Some(error) => ServerFinalMessage::Error(error),\n            None => ServerFinalMessage::Verifier(self.verifier()?),\n        };\n        self.eof()?;\n        Ok(message)\n    }\n}\n\nstruct ServerFirstMessage<'a> {\n    nonce: &'a str,\n    salt: &'a str,\n    iteration_count: u32,\n}\n\nenum ServerFinalMessage<'a> {\n    Error(&'a str),\n    Verifier(&'a str),\n}\n\n#[cfg(test)]\nmod test {\n    use super::*;\n\n    #[test]\n    fn parse_server_first_message() {\n        let message = \"r=fyko+d2lbbFgONRv9qkxdawL3rfcNHYJY1ZVvWVs7j,s=QSXCR+Q6sek8bf92,i=4096\";\n        let message = Parser::new(message).server_first_message().unwrap();\n        assert_eq!(message.nonce, \"fyko+d2lbbFgONRv9qkxdawL3rfcNHYJY1ZVvWVs7j\");\n        assert_eq!(message.salt, \"QSXCR+Q6sek8bf92\");\n        assert_eq!(message.iteration_count, 4096);\n    }\n\n    // recorded auth exchange from psql\n    #[tokio::test]\n    async fn exchange() {\n        let password = \"foobar\";\n        let nonce = \"9IZ2O01zb9IgiIZ1WJ/zgpJB\";\n\n        let client_first = \"n,,n=,r=9IZ2O01zb9IgiIZ1WJ/zgpJB\";\n        let server_first = \"r=9IZ2O01zb9IgiIZ1WJ/zgpJBjx/oIRLs02gGSHcw1KEty3eY,s=fs3IXBy7U7+IvVjZ,i\\\n             =4096\";\n        let client_final = \"c=biws,r=9IZ2O01zb9IgiIZ1WJ/zgpJBjx/oIRLs02gGSHcw1KEty3eY,p=AmNKosjJzS3\\\n             1NTlQYNs5BTeQjdHdk7lOflDo5re2an8=\";\n        let server_final = \"v=U+ppxD5XUKtradnv8e2MkeupiA8FU87Sg8CXzXHDAzw=\";\n\n        let mut scram = ScramSha256::new_inner(\n            Credentials::Password(normalize(password.as_bytes())),\n            ChannelBinding::unsupported(),\n            nonce.to_string(),\n        );\n        assert_eq!(str::from_utf8(scram.message()).unwrap(), client_first);\n\n        scram.update(server_first.as_bytes()).await.unwrap();\n        assert_eq!(str::from_utf8(scram.message()).unwrap(), client_final);\n\n        scram.finish(server_final.as_bytes()).unwrap();\n    }\n}\n"
  },
  {
    "path": "libs/proxy/postgres-protocol2/src/escape/mod.rs",
    "content": "//! Provides functions for escaping literals and identifiers for use\n//! in SQL queries.\n//!\n//! Prefer parameterized queries where possible. Do not escape\n//! parameters in a parameterized query.\n\n#[cfg(test)]\nmod test;\n\n/// Escape a literal and surround result with single quotes. Not\n/// recommended in most cases.\n///\n/// If input contains backslashes, result will be of the form `\n/// E'...'` so it is safe to use regardless of the setting of\n/// standard_conforming_strings.\npub fn escape_literal(input: &str) -> String {\n    escape_internal(input, false)\n}\n\n/// Escape an identifier and surround result with double quotes.\npub fn escape_identifier(input: &str) -> String {\n    escape_internal(input, true)\n}\n\n// Translation of PostgreSQL libpq's PQescapeInternal(). Does not\n// require a connection because input string is known to be valid\n// UTF-8.\n//\n// Escape arbitrary strings.  If as_ident is true, we escape the\n// result as an identifier; if false, as a literal.  The result is\n// returned in a newly allocated buffer.  If we fail due to an\n// encoding violation or out of memory condition, we return NULL,\n// storing an error message into conn.\nfn escape_internal(input: &str, as_ident: bool) -> String {\n    let mut num_backslashes = 0;\n    let mut num_quotes = 0;\n    let quote_char = if as_ident { '\"' } else { '\\'' };\n\n    // Scan the string for characters that must be escaped.\n    for ch in input.chars() {\n        if ch == quote_char {\n            num_quotes += 1;\n        } else if ch == '\\\\' {\n            num_backslashes += 1;\n        }\n    }\n\n    // Allocate output String.\n    let mut result_size = input.len() + num_quotes + 3; // two quotes, plus a NUL\n    if !as_ident && num_backslashes > 0 {\n        result_size += num_backslashes + 2;\n    }\n\n    let mut output = String::with_capacity(result_size);\n\n    // If we are escaping a literal that contains backslashes, we use\n    // the escape string syntax so that the result is correct under\n    // either value of standard_conforming_strings.  We also emit a\n    // leading space in this case, to guard against the possibility\n    // that the result might be interpolated immediately following an\n    // identifier.\n    if !as_ident && num_backslashes > 0 {\n        output.push(' ');\n        output.push('E');\n    }\n\n    // Opening quote.\n    output.push(quote_char);\n\n    // Use fast path if possible.\n    //\n    // We've already verified that the input string is well-formed in\n    // the current encoding.  If it contains no quotes and, in the\n    // case of literal-escaping, no backslashes, then we can just copy\n    // it directly to the output buffer, adding the necessary quotes.\n    //\n    // If not, we must rescan the input and process each character\n    // individually.\n    if num_quotes == 0 && (num_backslashes == 0 || as_ident) {\n        output.push_str(input);\n    } else {\n        for ch in input.chars() {\n            if ch == quote_char || (!as_ident && ch == '\\\\') {\n                output.push(ch);\n            }\n            output.push(ch);\n        }\n    }\n\n    output.push(quote_char);\n\n    output\n}\n"
  },
  {
    "path": "libs/proxy/postgres-protocol2/src/escape/test.rs",
    "content": "use crate::escape::{escape_identifier, escape_literal};\n\n#[test]\nfn test_escape_idenifier() {\n    assert_eq!(escape_identifier(\"foo\"), String::from(\"\\\"foo\\\"\"));\n    assert_eq!(escape_identifier(\"f\\\\oo\"), String::from(\"\\\"f\\\\oo\\\"\"));\n    assert_eq!(escape_identifier(\"f'oo\"), String::from(\"\\\"f'oo\\\"\"));\n    assert_eq!(escape_identifier(\"f\\\"oo\"), String::from(\"\\\"f\\\"\\\"oo\\\"\"));\n}\n\n#[test]\nfn test_escape_literal() {\n    assert_eq!(escape_literal(\"foo\"), String::from(\"'foo'\"));\n    assert_eq!(escape_literal(\"f\\\\oo\"), String::from(\" E'f\\\\\\\\oo'\"));\n    assert_eq!(escape_literal(\"f'oo\"), String::from(\"'f''oo'\"));\n    assert_eq!(escape_literal(\"f\\\"oo\"), String::from(\"'f\\\"oo'\"));\n}\n"
  },
  {
    "path": "libs/proxy/postgres-protocol2/src/lib.rs",
    "content": "//! Low level Postgres protocol APIs.\n//!\n//! This crate implements the low level components of Postgres's communication\n//! protocol, including message and value serialization and deserialization.\n//! It is designed to be used as a building block by higher level APIs such as\n//! `rust-postgres`, and should not typically be used directly.\n//!\n//! # Note\n//!\n//! This library assumes that the `client_encoding` backend parameter has been\n//! set to `UTF8`. It will most likely not behave properly if that is not the case.\n#![warn(missing_docs, clippy::all)]\n\nuse std::io;\n\nuse byteorder::{BigEndian, ByteOrder};\nuse bytes::{BufMut, BytesMut};\n\npub mod authentication;\npub mod escape;\npub mod message;\npub mod password;\npub mod types;\n\n/// A Postgres OID.\npub type Oid = u32;\n\n/// A Postgres Log Sequence Number (LSN).\npub type Lsn = u64;\n\n/// An enum indicating if a value is `NULL` or not.\npub enum IsNull {\n    /// The value is `NULL`.\n    Yes,\n    /// The value is not `NULL`.\n    No,\n}\n\nfn write_nullable<F, E>(serializer: F, buf: &mut BytesMut) -> Result<(), E>\nwhere\n    F: FnOnce(&mut BytesMut) -> Result<IsNull, E>,\n    E: From<io::Error>,\n{\n    let base = buf.len();\n    buf.put_i32(0);\n    let size = match serializer(buf)? {\n        IsNull::No => i32::from_usize(buf.len() - base - 4)?,\n        IsNull::Yes => -1,\n    };\n    BigEndian::write_i32(&mut buf[base..], size);\n\n    Ok(())\n}\n\ntrait FromUsize: Sized {\n    fn from_usize(x: usize) -> Result<Self, io::Error>;\n}\n\nmacro_rules! from_usize {\n    ($t:ty) => {\n        impl FromUsize for $t {\n            #[inline]\n            fn from_usize(x: usize) -> io::Result<$t> {\n                if x > <$t>::MAX as usize {\n                    Err(io::Error::new(\n                        io::ErrorKind::InvalidInput,\n                        \"value too large to transmit\",\n                    ))\n                } else {\n                    Ok(x as $t)\n                }\n            }\n        }\n    };\n}\n\nfrom_usize!(i16);\nfrom_usize!(i32);\n"
  },
  {
    "path": "libs/proxy/postgres-protocol2/src/message/backend.rs",
    "content": "#![allow(missing_docs)]\n\nuse std::io::{self, Read};\nuse std::ops::Range;\nuse std::{cmp, str};\n\nuse byteorder::{BigEndian, ByteOrder, ReadBytesExt};\nuse bytes::{Bytes, BytesMut};\nuse fallible_iterator::FallibleIterator;\nuse memchr::memchr;\n\nuse crate::Oid;\n\n// top-level message tags\nconst PARSE_COMPLETE_TAG: u8 = b'1';\nconst BIND_COMPLETE_TAG: u8 = b'2';\nconst CLOSE_COMPLETE_TAG: u8 = b'3';\npub const NOTIFICATION_RESPONSE_TAG: u8 = b'A';\nconst COPY_DONE_TAG: u8 = b'c';\nconst COMMAND_COMPLETE_TAG: u8 = b'C';\nconst COPY_DATA_TAG: u8 = b'd';\nconst DATA_ROW_TAG: u8 = b'D';\nconst ERROR_RESPONSE_TAG: u8 = b'E';\nconst COPY_IN_RESPONSE_TAG: u8 = b'G';\nconst COPY_OUT_RESPONSE_TAG: u8 = b'H';\nconst COPY_BOTH_RESPONSE_TAG: u8 = b'W';\nconst EMPTY_QUERY_RESPONSE_TAG: u8 = b'I';\nconst BACKEND_KEY_DATA_TAG: u8 = b'K';\npub const NO_DATA_TAG: u8 = b'n';\npub const NOTICE_RESPONSE_TAG: u8 = b'N';\nconst AUTHENTICATION_TAG: u8 = b'R';\nconst PORTAL_SUSPENDED_TAG: u8 = b's';\npub const PARAMETER_STATUS_TAG: u8 = b'S';\nconst PARAMETER_DESCRIPTION_TAG: u8 = b't';\nconst ROW_DESCRIPTION_TAG: u8 = b'T';\npub const READY_FOR_QUERY_TAG: u8 = b'Z';\n\n#[derive(Debug, Copy, Clone)]\npub struct Header {\n    tag: u8,\n    len: i32,\n}\n\n#[allow(clippy::len_without_is_empty)]\nimpl Header {\n    #[inline]\n    pub fn parse(buf: &[u8]) -> io::Result<Option<Header>> {\n        if buf.len() < 5 {\n            return Ok(None);\n        }\n\n        let tag = buf[0];\n        let len = BigEndian::read_i32(&buf[1..]);\n\n        if len < 4 {\n            return Err(io::Error::new(\n                io::ErrorKind::InvalidData,\n                \"invalid message length: header length < 4\",\n            ));\n        }\n\n        Ok(Some(Header { tag, len }))\n    }\n\n    #[inline]\n    pub fn tag(self) -> u8 {\n        self.tag\n    }\n\n    #[inline]\n    pub fn len(self) -> i32 {\n        self.len\n    }\n}\n\n/// An enum representing Postgres backend messages.\npub enum Message {\n    AuthenticationCleartextPassword,\n    AuthenticationGss,\n    AuthenticationKerberosV5,\n    AuthenticationMd5Password,\n    AuthenticationOk,\n    AuthenticationScmCredential,\n    AuthenticationSspi,\n    AuthenticationGssContinue,\n    AuthenticationSasl(AuthenticationSaslBody),\n    AuthenticationSaslContinue(AuthenticationSaslContinueBody),\n    AuthenticationSaslFinal(AuthenticationSaslFinalBody),\n    BackendKeyData(BackendKeyDataBody),\n    BindComplete,\n    CloseComplete,\n    CommandComplete(CommandCompleteBody),\n    CopyData,\n    CopyDone,\n    CopyInResponse,\n    CopyOutResponse,\n    CopyBothResponse,\n    DataRow(DataRowBody),\n    EmptyQueryResponse,\n    ErrorResponse(ErrorResponseBody),\n    NoData,\n    NoticeResponse(NoticeResponseBody),\n    NotificationResponse(NotificationResponseBody),\n    ParameterDescription(ParameterDescriptionBody),\n    ParameterStatus(ParameterStatusBody),\n    ParseComplete,\n    PortalSuspended,\n    ReadyForQuery(ReadyForQueryBody),\n    RowDescription(RowDescriptionBody),\n}\n\nimpl Message {\n    #[inline]\n    pub fn parse(buf: &mut BytesMut) -> io::Result<Option<Message>> {\n        if buf.len() < 5 {\n            let to_read = 5 - buf.len();\n            buf.reserve(to_read);\n            return Ok(None);\n        }\n\n        let tag = buf[0];\n        let len = (&buf[1..5]).read_u32::<BigEndian>().unwrap();\n\n        if len < 4 {\n            return Err(io::Error::new(\n                io::ErrorKind::InvalidInput,\n                \"invalid message length: parsing u32\",\n            ));\n        }\n\n        let total_len = len as usize + 1;\n        if buf.len() < total_len {\n            let to_read = total_len - buf.len();\n            buf.reserve(to_read);\n            return Ok(None);\n        }\n\n        let mut buf = Buffer {\n            bytes: buf.split_to(total_len).freeze(),\n            idx: 5,\n        };\n\n        let message = match tag {\n            PARSE_COMPLETE_TAG => Message::ParseComplete,\n            BIND_COMPLETE_TAG => Message::BindComplete,\n            CLOSE_COMPLETE_TAG => Message::CloseComplete,\n            NOTIFICATION_RESPONSE_TAG => Message::NotificationResponse(NotificationResponseBody {}),\n            COPY_DONE_TAG => Message::CopyDone,\n            COMMAND_COMPLETE_TAG => {\n                let tag = buf.read_cstr()?;\n                Message::CommandComplete(CommandCompleteBody { tag })\n            }\n            COPY_DATA_TAG => Message::CopyData,\n            DATA_ROW_TAG => {\n                let len = buf.read_u16::<BigEndian>()?;\n                let storage = buf.read_all();\n                Message::DataRow(DataRowBody { storage, len })\n            }\n            ERROR_RESPONSE_TAG => {\n                let storage = buf.read_all();\n                Message::ErrorResponse(ErrorResponseBody { storage })\n            }\n            COPY_IN_RESPONSE_TAG => Message::CopyInResponse,\n            COPY_OUT_RESPONSE_TAG => Message::CopyOutResponse,\n            COPY_BOTH_RESPONSE_TAG => Message::CopyBothResponse,\n            EMPTY_QUERY_RESPONSE_TAG => Message::EmptyQueryResponse,\n            BACKEND_KEY_DATA_TAG => {\n                let process_id = buf.read_i32::<BigEndian>()?;\n                let secret_key = buf.read_i32::<BigEndian>()?;\n                Message::BackendKeyData(BackendKeyDataBody {\n                    process_id,\n                    secret_key,\n                })\n            }\n            NO_DATA_TAG => Message::NoData,\n            NOTICE_RESPONSE_TAG => {\n                let storage = buf.read_all();\n                Message::NoticeResponse(NoticeResponseBody { storage })\n            }\n            AUTHENTICATION_TAG => match buf.read_i32::<BigEndian>()? {\n                0 => Message::AuthenticationOk,\n                2 => Message::AuthenticationKerberosV5,\n                3 => Message::AuthenticationCleartextPassword,\n                5 => Message::AuthenticationMd5Password,\n                6 => Message::AuthenticationScmCredential,\n                7 => Message::AuthenticationGss,\n                8 => Message::AuthenticationGssContinue,\n                9 => Message::AuthenticationSspi,\n                10 => {\n                    let storage = buf.read_all();\n                    Message::AuthenticationSasl(AuthenticationSaslBody(storage))\n                }\n                11 => {\n                    let storage = buf.read_all();\n                    Message::AuthenticationSaslContinue(AuthenticationSaslContinueBody(storage))\n                }\n                12 => {\n                    let storage = buf.read_all();\n                    Message::AuthenticationSaslFinal(AuthenticationSaslFinalBody(storage))\n                }\n                tag => {\n                    return Err(io::Error::new(\n                        io::ErrorKind::InvalidInput,\n                        format!(\"unknown authentication tag `{tag}`\"),\n                    ));\n                }\n            },\n            PORTAL_SUSPENDED_TAG => Message::PortalSuspended,\n            PARAMETER_STATUS_TAG => {\n                let name = buf.read_cstr()?;\n                let value = buf.read_cstr()?;\n                Message::ParameterStatus(ParameterStatusBody { name, value })\n            }\n            PARAMETER_DESCRIPTION_TAG => {\n                let len = buf.read_u16::<BigEndian>()?;\n                let storage = buf.read_all();\n                Message::ParameterDescription(ParameterDescriptionBody { storage, len })\n            }\n            ROW_DESCRIPTION_TAG => {\n                let len = buf.read_u16::<BigEndian>()?;\n                let storage = buf.read_all();\n                Message::RowDescription(RowDescriptionBody { storage, len })\n            }\n            READY_FOR_QUERY_TAG => {\n                let status = buf.read_u8()?;\n                Message::ReadyForQuery(ReadyForQueryBody { status })\n            }\n            tag => {\n                return Err(io::Error::new(\n                    io::ErrorKind::InvalidInput,\n                    format!(\"unknown message tag `{tag}`\"),\n                ));\n            }\n        };\n\n        if !buf.is_empty() {\n            return Err(io::Error::new(\n                io::ErrorKind::InvalidInput,\n                \"invalid message length: expected buffer to be empty\",\n            ));\n        }\n\n        Ok(Some(message))\n    }\n}\n\nstruct Buffer {\n    bytes: Bytes,\n    idx: usize,\n}\n\nimpl Buffer {\n    #[inline]\n    fn slice(&self) -> &[u8] {\n        &self.bytes[self.idx..]\n    }\n\n    #[inline]\n    fn is_empty(&self) -> bool {\n        self.slice().is_empty()\n    }\n\n    #[inline]\n    fn read_cstr(&mut self) -> io::Result<Bytes> {\n        match memchr(0, self.slice()) {\n            Some(pos) => {\n                let start = self.idx;\n                let end = start + pos;\n                let cstr = self.bytes.slice(start..end);\n                self.idx = end + 1;\n                Ok(cstr)\n            }\n            None => Err(io::Error::new(\n                io::ErrorKind::UnexpectedEof,\n                \"unexpected EOF\",\n            )),\n        }\n    }\n\n    #[inline]\n    fn read_all(&mut self) -> Bytes {\n        let buf = self.bytes.slice(self.idx..);\n        self.idx = self.bytes.len();\n        buf\n    }\n}\n\nimpl Read for Buffer {\n    #[inline]\n    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {\n        let len = {\n            let slice = self.slice();\n            let len = cmp::min(slice.len(), buf.len());\n            buf[..len].copy_from_slice(&slice[..len]);\n            len\n        };\n        self.idx += len;\n        Ok(len)\n    }\n}\n\npub struct AuthenticationMd5PasswordBody {\n    salt: [u8; 4],\n}\n\nimpl AuthenticationMd5PasswordBody {\n    #[inline]\n    pub fn salt(&self) -> [u8; 4] {\n        self.salt\n    }\n}\n\npub struct AuthenticationSaslBody(Bytes);\n\nimpl AuthenticationSaslBody {\n    #[inline]\n    pub fn mechanisms(&self) -> SaslMechanisms<'_> {\n        SaslMechanisms(&self.0)\n    }\n}\n\npub struct SaslMechanisms<'a>(&'a [u8]);\n\nimpl<'a> FallibleIterator for SaslMechanisms<'a> {\n    type Item = &'a str;\n    type Error = io::Error;\n\n    #[inline]\n    fn next(&mut self) -> io::Result<Option<&'a str>> {\n        let value_end = find_null(self.0, 0)?;\n        if value_end == 0 {\n            if self.0.len() != 1 {\n                return Err(io::Error::new(\n                    io::ErrorKind::InvalidData,\n                    \"invalid message length: expected to be at end of iterator for sasl\",\n                ));\n            }\n            Ok(None)\n        } else {\n            let value = get_str(&self.0[..value_end])?;\n            self.0 = &self.0[value_end + 1..];\n            Ok(Some(value))\n        }\n    }\n}\n\npub struct AuthenticationSaslContinueBody(Bytes);\n\nimpl AuthenticationSaslContinueBody {\n    #[inline]\n    pub fn data(&self) -> &[u8] {\n        &self.0\n    }\n}\n\npub struct AuthenticationSaslFinalBody(Bytes);\n\nimpl AuthenticationSaslFinalBody {\n    #[inline]\n    pub fn data(&self) -> &[u8] {\n        &self.0\n    }\n}\n\npub struct BackendKeyDataBody {\n    process_id: i32,\n    secret_key: i32,\n}\n\nimpl BackendKeyDataBody {\n    #[inline]\n    pub fn process_id(&self) -> i32 {\n        self.process_id\n    }\n\n    #[inline]\n    pub fn secret_key(&self) -> i32 {\n        self.secret_key\n    }\n}\n\npub struct CommandCompleteBody {\n    tag: Bytes,\n}\n\nimpl CommandCompleteBody {\n    #[inline]\n    pub fn tag(&self) -> io::Result<&str> {\n        get_str(&self.tag)\n    }\n}\n\n#[derive(Debug)]\npub struct DataRowBody {\n    storage: Bytes,\n    len: u16,\n}\n\nimpl DataRowBody {\n    #[inline]\n    pub fn ranges(&self) -> DataRowRanges<'_> {\n        DataRowRanges {\n            buf: &self.storage,\n            len: self.storage.len(),\n            remaining: self.len,\n        }\n    }\n\n    #[inline]\n    pub fn buffer(&self) -> &[u8] {\n        &self.storage\n    }\n}\n\npub struct DataRowRanges<'a> {\n    buf: &'a [u8],\n    len: usize,\n    remaining: u16,\n}\n\nimpl FallibleIterator for DataRowRanges<'_> {\n    type Item = Option<Range<usize>>;\n    type Error = io::Error;\n\n    #[inline]\n    fn next(&mut self) -> io::Result<Option<Option<Range<usize>>>> {\n        if self.remaining == 0 {\n            if self.buf.is_empty() {\n                return Ok(None);\n            } else {\n                return Err(io::Error::new(\n                    io::ErrorKind::InvalidInput,\n                    \"invalid message length: datarowrange is not empty\",\n                ));\n            }\n        }\n\n        self.remaining -= 1;\n        let len = self.buf.read_i32::<BigEndian>()?;\n        if len < 0 {\n            Ok(Some(None))\n        } else {\n            let len = len as usize;\n            if self.buf.len() < len {\n                return Err(io::Error::new(\n                    io::ErrorKind::UnexpectedEof,\n                    \"unexpected EOF\",\n                ));\n            }\n            let base = self.len - self.buf.len();\n            self.buf = &self.buf[len..];\n            Ok(Some(Some(base..base + len)))\n        }\n    }\n\n    #[inline]\n    fn size_hint(&self) -> (usize, Option<usize>) {\n        let len = self.remaining as usize;\n        (len, Some(len))\n    }\n}\n\npub struct ErrorResponseBody {\n    storage: Bytes,\n}\n\nimpl ErrorResponseBody {\n    #[inline]\n    pub fn fields(&self) -> ErrorFields<'_> {\n        ErrorFields { buf: &self.storage }\n    }\n}\n\npub struct ErrorFields<'a> {\n    buf: &'a [u8],\n}\n\nimpl<'a> FallibleIterator for ErrorFields<'a> {\n    type Item = ErrorField<'a>;\n    type Error = io::Error;\n\n    #[inline]\n    fn next(&mut self) -> io::Result<Option<ErrorField<'a>>> {\n        let type_ = self.buf.read_u8()?;\n        if type_ == 0 {\n            if self.buf.is_empty() {\n                return Ok(None);\n            } else {\n                return Err(io::Error::new(\n                    io::ErrorKind::InvalidInput,\n                    \"invalid message length: error fields is not drained\",\n                ));\n            }\n        }\n\n        let value_end = find_null(self.buf, 0)?;\n        let value = get_str(&self.buf[..value_end])?;\n        self.buf = &self.buf[value_end + 1..];\n\n        Ok(Some(ErrorField { type_, value }))\n    }\n}\n\npub struct ErrorField<'a> {\n    type_: u8,\n    value: &'a str,\n}\n\nimpl ErrorField<'_> {\n    #[inline]\n    pub fn type_(&self) -> u8 {\n        self.type_\n    }\n\n    #[inline]\n    pub fn value(&self) -> &str {\n        self.value\n    }\n}\n\npub struct NoticeResponseBody {\n    storage: Bytes,\n}\n\nimpl NoticeResponseBody {\n    #[inline]\n    pub fn fields(&self) -> ErrorFields<'_> {\n        ErrorFields { buf: &self.storage }\n    }\n\n    pub fn as_bytes(&self) -> &[u8] {\n        &self.storage\n    }\n}\n\npub struct NotificationResponseBody {}\n\npub struct ParameterDescriptionBody {\n    storage: Bytes,\n    len: u16,\n}\n\nimpl ParameterDescriptionBody {\n    #[inline]\n    pub fn parameters(&self) -> Parameters<'_> {\n        Parameters {\n            buf: &self.storage,\n            remaining: self.len,\n        }\n    }\n}\n\npub struct Parameters<'a> {\n    buf: &'a [u8],\n    remaining: u16,\n}\n\nimpl FallibleIterator for Parameters<'_> {\n    type Item = Oid;\n    type Error = io::Error;\n\n    #[inline]\n    fn next(&mut self) -> io::Result<Option<Oid>> {\n        if self.remaining == 0 {\n            if self.buf.is_empty() {\n                return Ok(None);\n            } else {\n                return Err(io::Error::new(\n                    io::ErrorKind::InvalidInput,\n                    \"invalid message length: parameters is not drained\",\n                ));\n            }\n        }\n\n        self.remaining -= 1;\n        self.buf.read_u32::<BigEndian>().map(Some)\n    }\n\n    #[inline]\n    fn size_hint(&self) -> (usize, Option<usize>) {\n        let len = self.remaining as usize;\n        (len, Some(len))\n    }\n}\n\npub struct ParameterStatusBody {\n    name: Bytes,\n    value: Bytes,\n}\n\nimpl ParameterStatusBody {\n    #[inline]\n    pub fn name(&self) -> io::Result<&str> {\n        get_str(&self.name)\n    }\n\n    #[inline]\n    pub fn value(&self) -> io::Result<&str> {\n        get_str(&self.value)\n    }\n}\n\npub struct ReadyForQueryBody {\n    status: u8,\n}\n\nimpl ReadyForQueryBody {\n    #[inline]\n    pub fn status(&self) -> u8 {\n        self.status\n    }\n}\n\npub struct RowDescriptionBody {\n    storage: Bytes,\n    len: u16,\n}\n\nimpl RowDescriptionBody {\n    #[inline]\n    pub fn fields(&self) -> Fields<'_> {\n        Fields {\n            buf: &self.storage,\n            remaining: self.len,\n        }\n    }\n}\n\npub struct Fields<'a> {\n    buf: &'a [u8],\n    remaining: u16,\n}\n\nimpl<'a> FallibleIterator for Fields<'a> {\n    type Item = Field<'a>;\n    type Error = io::Error;\n\n    #[inline]\n    fn next(&mut self) -> io::Result<Option<Field<'a>>> {\n        if self.remaining == 0 {\n            if self.buf.is_empty() {\n                return Ok(None);\n            } else {\n                return Err(io::Error::new(\n                    io::ErrorKind::InvalidInput,\n                    \"invalid message length: field is not drained\",\n                ));\n            }\n        }\n\n        self.remaining -= 1;\n        let name_end = find_null(self.buf, 0)?;\n        let name = get_str(&self.buf[..name_end])?;\n        self.buf = &self.buf[name_end + 1..];\n        let table_oid = self.buf.read_u32::<BigEndian>()?;\n        let column_id = self.buf.read_i16::<BigEndian>()?;\n        let type_oid = self.buf.read_u32::<BigEndian>()?;\n        let type_size = self.buf.read_i16::<BigEndian>()?;\n        let type_modifier = self.buf.read_i32::<BigEndian>()?;\n        let format = self.buf.read_i16::<BigEndian>()?;\n\n        Ok(Some(Field {\n            name,\n            table_oid,\n            column_id,\n            type_oid,\n            type_size,\n            type_modifier,\n            format,\n        }))\n    }\n}\n\npub struct Field<'a> {\n    name: &'a str,\n    table_oid: Oid,\n    column_id: i16,\n    type_oid: Oid,\n    type_size: i16,\n    type_modifier: i32,\n    format: i16,\n}\n\nimpl<'a> Field<'a> {\n    #[inline]\n    pub fn name(&self) -> &'a str {\n        self.name\n    }\n\n    #[inline]\n    pub fn table_oid(&self) -> Oid {\n        self.table_oid\n    }\n\n    #[inline]\n    pub fn column_id(&self) -> i16 {\n        self.column_id\n    }\n\n    #[inline]\n    pub fn type_oid(&self) -> Oid {\n        self.type_oid\n    }\n\n    #[inline]\n    pub fn type_size(&self) -> i16 {\n        self.type_size\n    }\n\n    #[inline]\n    pub fn type_modifier(&self) -> i32 {\n        self.type_modifier\n    }\n\n    #[inline]\n    pub fn format(&self) -> i16 {\n        self.format\n    }\n}\n\n#[inline]\nfn find_null(buf: &[u8], start: usize) -> io::Result<usize> {\n    match memchr(0, &buf[start..]) {\n        Some(pos) => Ok(pos + start),\n        None => Err(io::Error::new(\n            io::ErrorKind::UnexpectedEof,\n            \"unexpected EOF\",\n        )),\n    }\n}\n\n#[inline]\nfn get_str(buf: &[u8]) -> io::Result<&str> {\n    str::from_utf8(buf).map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))\n}\n"
  },
  {
    "path": "libs/proxy/postgres-protocol2/src/message/frontend.rs",
    "content": "//! Frontend message serialization.\n#![allow(missing_docs)]\n\nuse std::error::Error;\nuse std::{io, marker};\n\nuse byteorder::{BigEndian, ByteOrder};\nuse bytes::{Buf, BufMut, BytesMut};\n\nuse crate::{FromUsize, IsNull, Oid, write_nullable};\n\n#[inline]\nfn write_body<F, E>(buf: &mut BytesMut, f: F) -> Result<(), E>\nwhere\n    F: FnOnce(&mut BytesMut) -> Result<(), E>,\n    E: From<io::Error>,\n{\n    let base = buf.len();\n    buf.extend_from_slice(&[0; 4]);\n\n    f(buf)?;\n\n    let size = i32::from_usize(buf.len() - base)?;\n    BigEndian::write_i32(&mut buf[base..], size);\n    Ok(())\n}\n\n#[derive(Debug)]\npub enum BindError {\n    Conversion(Box<dyn Error + marker::Sync + Send>),\n    Serialization(io::Error),\n}\n\nimpl From<Box<dyn Error + marker::Sync + Send>> for BindError {\n    #[inline]\n    fn from(e: Box<dyn Error + marker::Sync + Send>) -> BindError {\n        BindError::Conversion(e)\n    }\n}\n\nimpl From<io::Error> for BindError {\n    #[inline]\n    fn from(e: io::Error) -> BindError {\n        BindError::Serialization(e)\n    }\n}\n\n#[inline]\npub fn bind<I, J, F, T, K>(\n    portal: &str,\n    statement: &str,\n    formats: I,\n    values: J,\n    mut serializer: F,\n    result_formats: K,\n    buf: &mut BytesMut,\n) -> Result<(), BindError>\nwhere\n    I: IntoIterator<Item = i16>,\n    J: IntoIterator<Item = T>,\n    F: FnMut(T, &mut BytesMut) -> Result<IsNull, Box<dyn Error + marker::Sync + Send>>,\n    K: IntoIterator<Item = i16>,\n{\n    buf.put_u8(b'B');\n\n    write_body(buf, |buf| {\n        write_cstr(portal.as_bytes(), buf)?;\n        write_cstr(statement.as_bytes(), buf)?;\n        write_counted(\n            formats,\n            |f, buf| {\n                buf.put_i16(f);\n                Ok::<_, io::Error>(())\n            },\n            buf,\n        )?;\n        write_counted(\n            values,\n            |v, buf| write_nullable(|buf| serializer(v, buf), buf),\n            buf,\n        )?;\n        write_counted(\n            result_formats,\n            |f, buf| {\n                buf.put_i16(f);\n                Ok::<_, io::Error>(())\n            },\n            buf,\n        )?;\n\n        Ok(())\n    })\n}\n\n#[inline]\nfn write_counted<I, T, F, E>(items: I, mut serializer: F, buf: &mut BytesMut) -> Result<(), E>\nwhere\n    I: IntoIterator<Item = T>,\n    F: FnMut(T, &mut BytesMut) -> Result<(), E>,\n    E: From<io::Error>,\n{\n    let base = buf.len();\n    buf.extend_from_slice(&[0; 2]);\n    let mut count = 0;\n    for item in items {\n        serializer(item, buf)?;\n        count += 1;\n    }\n    let count = i16::from_usize(count)?;\n    BigEndian::write_i16(&mut buf[base..], count);\n\n    Ok(())\n}\n\n#[inline]\npub fn cancel_request(process_id: i32, secret_key: i32, buf: &mut BytesMut) {\n    write_body(buf, |buf| {\n        buf.put_i32(80_877_102);\n        buf.put_i32(process_id);\n        buf.put_i32(secret_key);\n        Ok::<_, io::Error>(())\n    })\n    .unwrap();\n}\n\n#[inline]\npub fn close(variant: u8, name: &str, buf: &mut BytesMut) -> io::Result<()> {\n    buf.put_u8(b'C');\n    write_body(buf, |buf| {\n        buf.put_u8(variant);\n        write_cstr(name.as_bytes(), buf)\n    })\n}\n\npub struct CopyData<T> {\n    buf: T,\n    len: i32,\n}\n\nimpl<T> CopyData<T>\nwhere\n    T: Buf,\n{\n    pub fn new(buf: T) -> io::Result<CopyData<T>> {\n        let len = buf\n            .remaining()\n            .checked_add(4)\n            .and_then(|l| i32::try_from(l).ok())\n            .ok_or_else(|| {\n                io::Error::new(io::ErrorKind::InvalidInput, \"message length overflow\")\n            })?;\n\n        Ok(CopyData { buf, len })\n    }\n\n    pub fn write(self, out: &mut BytesMut) {\n        out.put_u8(b'd');\n        out.put_i32(self.len);\n        out.put(self.buf);\n    }\n}\n\n#[inline]\npub fn copy_done(buf: &mut BytesMut) {\n    buf.put_u8(b'c');\n    write_body(buf, |_| Ok::<(), io::Error>(())).unwrap();\n}\n\n#[inline]\npub fn copy_fail(message: &str, buf: &mut BytesMut) -> io::Result<()> {\n    buf.put_u8(b'f');\n    write_body(buf, |buf| write_cstr(message.as_bytes(), buf))\n}\n\n#[inline]\npub fn describe(variant: u8, name: &str, buf: &mut BytesMut) -> io::Result<()> {\n    buf.put_u8(b'D');\n    write_body(buf, |buf| {\n        buf.put_u8(variant);\n        write_cstr(name.as_bytes(), buf)\n    })\n}\n\n#[inline]\npub fn execute(portal: &str, max_rows: i32, buf: &mut BytesMut) -> io::Result<()> {\n    buf.put_u8(b'E');\n    write_body(buf, |buf| {\n        write_cstr(portal.as_bytes(), buf)?;\n        buf.put_i32(max_rows);\n        Ok(())\n    })\n}\n\n#[inline]\npub fn parse<I>(name: &str, query: &str, param_types: I, buf: &mut BytesMut) -> io::Result<()>\nwhere\n    I: IntoIterator<Item = Oid>,\n{\n    buf.put_u8(b'P');\n    write_body(buf, |buf| {\n        write_cstr(name.as_bytes(), buf)?;\n        write_cstr(query.as_bytes(), buf)?;\n        write_counted(\n            param_types,\n            |t, buf| {\n                buf.put_u32(t);\n                Ok::<_, io::Error>(())\n            },\n            buf,\n        )?;\n        Ok(())\n    })\n}\n\n#[inline]\npub fn password_message(password: &[u8], buf: &mut BytesMut) -> io::Result<()> {\n    buf.put_u8(b'p');\n    write_body(buf, |buf| write_cstr(password, buf))\n}\n\n#[inline]\npub fn query(query: &str, buf: &mut BytesMut) -> io::Result<()> {\n    buf.put_u8(b'Q');\n    write_body(buf, |buf| write_cstr(query.as_bytes(), buf))\n}\n\n#[inline]\npub fn sasl_initial_response(mechanism: &str, data: &[u8], buf: &mut BytesMut) -> io::Result<()> {\n    buf.put_u8(b'p');\n    write_body(buf, |buf| {\n        write_cstr(mechanism.as_bytes(), buf)?;\n        let len = i32::from_usize(data.len())?;\n        buf.put_i32(len);\n        buf.put_slice(data);\n        Ok(())\n    })\n}\n\n#[inline]\npub fn sasl_response(data: &[u8], buf: &mut BytesMut) -> io::Result<()> {\n    buf.put_u8(b'p');\n    write_body(buf, |buf| {\n        buf.put_slice(data);\n        Ok(())\n    })\n}\n\n#[inline]\npub fn ssl_request(buf: &mut BytesMut) {\n    write_body(buf, |buf| {\n        buf.put_i32(80_877_103);\n        Ok::<_, io::Error>(())\n    })\n    .unwrap();\n}\n\n#[inline]\npub fn startup_message(parameters: &StartupMessageParams, buf: &mut BytesMut) -> io::Result<()> {\n    write_body(buf, |buf| {\n        // postgres protocol version 3.0(196608) in bigger-endian\n        buf.put_i32(0x00_03_00_00);\n        buf.put_slice(&parameters.params);\n        buf.put_u8(0);\n        Ok(())\n    })\n}\n\n#[derive(Debug, Clone, Default, PartialEq, Eq)]\npub struct StartupMessageParams {\n    pub params: BytesMut,\n}\n\nimpl StartupMessageParams {\n    /// Set parameter's value by its name.\n    pub fn insert(&mut self, name: &str, value: &str) {\n        if name.contains('\\0') || value.contains('\\0') {\n            panic!(\"startup parameter name or value contained a null\")\n        }\n        self.params.put_slice(name.as_bytes());\n        self.params.put_u8(0);\n        self.params.put_slice(value.as_bytes());\n        self.params.put_u8(0);\n    }\n}\n\n#[inline]\npub fn sync(buf: &mut BytesMut) {\n    buf.put_u8(b'S');\n    write_body(buf, |_| Ok::<(), io::Error>(())).unwrap();\n}\n\n#[inline]\npub fn flush(buf: &mut BytesMut) {\n    buf.put_u8(b'H');\n    write_body(buf, |_| Ok::<(), io::Error>(())).unwrap();\n}\n\n#[inline]\npub fn terminate(buf: &mut BytesMut) {\n    buf.put_u8(b'X');\n    write_body(buf, |_| Ok::<(), io::Error>(())).unwrap();\n}\n\n#[inline]\nfn write_cstr(s: &[u8], buf: &mut BytesMut) -> Result<(), io::Error> {\n    if s.contains(&0) {\n        return Err(io::Error::new(\n            io::ErrorKind::InvalidInput,\n            \"string contains embedded null\",\n        ));\n    }\n    buf.put_slice(s);\n    buf.put_u8(0);\n    Ok(())\n}\n"
  },
  {
    "path": "libs/proxy/postgres-protocol2/src/message/mod.rs",
    "content": "//! Postgres message protocol support.\n//!\n//! See [Postgres's documentation][docs] for more information on message flow.\n//!\n//! [docs]: https://www.postgresql.org/docs/9.5/static/protocol-flow.html\n\npub mod backend;\npub mod frontend;\n"
  },
  {
    "path": "libs/proxy/postgres-protocol2/src/password/mod.rs",
    "content": "//! Functions to encrypt a password in the client.\n//!\n//! This is intended to be used by client applications that wish to\n//! send commands like `ALTER USER joe PASSWORD 'pwd'`. The password\n//! need not be sent in cleartext if it is encrypted on the client\n//! side. This is good because it ensures the cleartext password won't\n//! end up in logs pg_stat displays, etc.\n\nuse base64::Engine as _;\nuse base64::prelude::BASE64_STANDARD;\nuse hmac::{Hmac, Mac};\nuse rand::RngCore;\nuse sha2::digest::FixedOutput;\nuse sha2::{Digest, Sha256};\n\nuse crate::authentication::sasl;\n\n#[cfg(test)]\nmod test;\n\nconst SCRAM_DEFAULT_ITERATIONS: u32 = 4096;\nconst SCRAM_DEFAULT_SALT_LEN: usize = 16;\n\n/// Hash password using SCRAM-SHA-256 with a randomly-generated\n/// salt.\n///\n/// The client may assume the returned string doesn't contain any\n/// special characters that would require escaping in an SQL command.\npub async fn scram_sha_256(password: &[u8]) -> String {\n    let mut salt: [u8; SCRAM_DEFAULT_SALT_LEN] = [0; SCRAM_DEFAULT_SALT_LEN];\n    let mut rng = rand::rng();\n    rng.fill_bytes(&mut salt);\n    scram_sha_256_salt(password, salt).await\n}\n\n// Internal implementation of scram_sha_256 with a caller-provided\n// salt. This is useful for testing.\npub(crate) async fn scram_sha_256_salt(\n    password: &[u8],\n    salt: [u8; SCRAM_DEFAULT_SALT_LEN],\n) -> String {\n    // Prepare the password, per [RFC\n    // 4013](https://tools.ietf.org/html/rfc4013), if possible.\n    //\n    // Postgres treats passwords as byte strings (without embedded NUL\n    // bytes), but SASL expects passwords to be valid UTF-8.\n    //\n    // Follow the behavior of libpq's PQencryptPasswordConn(), and\n    // also the backend. If the password is not valid UTF-8, or if it\n    // contains prohibited characters (such as non-ASCII whitespace),\n    // just skip the SASLprep step and use the original byte\n    // sequence.\n    let prepared: Vec<u8> = match std::str::from_utf8(password) {\n        Ok(password_str) => {\n            match stringprep::saslprep(password_str) {\n                Ok(p) => p.into_owned().into_bytes(),\n                // contains invalid characters; skip saslprep\n                Err(_) => Vec::from(password),\n            }\n        }\n        // not valid UTF-8; skip saslprep\n        Err(_) => Vec::from(password),\n    };\n\n    // salt password\n    let salted_password = sasl::hi(&prepared, &salt, SCRAM_DEFAULT_ITERATIONS).await;\n\n    // client key\n    let mut hmac = Hmac::<Sha256>::new_from_slice(&salted_password)\n        .expect(\"HMAC is able to accept all key sizes\");\n    hmac.update(b\"Client Key\");\n    let client_key = hmac.finalize().into_bytes();\n\n    // stored key\n    let mut hash = Sha256::default();\n    hash.update(client_key.as_slice());\n    let stored_key = hash.finalize_fixed();\n\n    // server key\n    let mut hmac = Hmac::<Sha256>::new_from_slice(&salted_password)\n        .expect(\"HMAC is able to accept all key sizes\");\n    hmac.update(b\"Server Key\");\n    let server_key = hmac.finalize().into_bytes();\n\n    format!(\n        \"SCRAM-SHA-256${}:{}${}:{}\",\n        SCRAM_DEFAULT_ITERATIONS,\n        BASE64_STANDARD.encode(salt),\n        BASE64_STANDARD.encode(stored_key),\n        BASE64_STANDARD.encode(server_key)\n    )\n}\n"
  },
  {
    "path": "libs/proxy/postgres-protocol2/src/password/test.rs",
    "content": "use crate::password;\n\n#[tokio::test]\nasync fn test_encrypt_scram_sha_256() {\n    // Specify the salt to make the test deterministic. Any bytes will do.\n    let salt: [u8; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];\n    assert_eq!(\n        password::scram_sha_256_salt(b\"secret\", salt).await,\n        \"SCRAM-SHA-256$4096:AQIDBAUGBwgJCgsMDQ4PEA==$8rrDg00OqaiWXJ7p+sCgHEIaBSHY89ZJl3mfIsf32oY=:05L1f+yZbiN8O0AnO40Og85NNRhvzTS57naKRWCcsIA=\"\n    );\n}\n"
  },
  {
    "path": "libs/proxy/postgres-protocol2/src/types/mod.rs",
    "content": "//! Conversions to and from Postgres's binary format for various types.\nuse std::boxed::Box as StdBox;\nuse std::error::Error;\nuse std::str;\n\nuse byteorder::{BigEndian, ReadBytesExt};\nuse bytes::{BufMut, BytesMut};\nuse fallible_iterator::FallibleIterator;\n\nuse crate::Oid;\n\n#[cfg(test)]\nmod test;\n\n/// Serializes a `TEXT`, `VARCHAR`, `CHAR(n)`, `NAME`, or `CITEXT` value.\n#[inline]\npub fn text_to_sql(v: &str, buf: &mut BytesMut) {\n    buf.put_slice(v.as_bytes());\n}\n\n/// Deserializes a `TEXT`, `VARCHAR`, `CHAR(n)`, `NAME`, or `CITEXT` value.\n#[inline]\npub fn text_from_sql(buf: &[u8]) -> Result<&str, StdBox<dyn Error + Sync + Send>> {\n    Ok(str::from_utf8(buf)?)\n}\n\n/// Deserializes a `\"char\"` value.\n#[inline]\npub fn char_from_sql(mut buf: &[u8]) -> Result<i8, StdBox<dyn Error + Sync + Send>> {\n    let v = buf.read_i8()?;\n    if !buf.is_empty() {\n        return Err(\"invalid buffer size\".into());\n    }\n    Ok(v)\n}\n\n/// Serializes an `OID` value.\n#[inline]\npub fn oid_to_sql(v: Oid, buf: &mut BytesMut) {\n    buf.put_u32(v);\n}\n\n/// Deserializes an `OID` value.\n#[inline]\npub fn oid_from_sql(mut buf: &[u8]) -> Result<Oid, StdBox<dyn Error + Sync + Send>> {\n    let v = buf.read_u32::<BigEndian>()?;\n    if !buf.is_empty() {\n        return Err(\"invalid buffer size\".into());\n    }\n    Ok(v)\n}\n\n/// A fallible iterator over `HSTORE` entries.\npub struct HstoreEntries<'a> {\n    remaining: i32,\n    buf: &'a [u8],\n}\n\nimpl<'a> FallibleIterator for HstoreEntries<'a> {\n    type Item = (&'a str, Option<&'a str>);\n    type Error = StdBox<dyn Error + Sync + Send>;\n\n    #[inline]\n    #[allow(clippy::type_complexity)]\n    fn next(\n        &mut self,\n    ) -> Result<Option<(&'a str, Option<&'a str>)>, StdBox<dyn Error + Sync + Send>> {\n        if self.remaining == 0 {\n            if !self.buf.is_empty() {\n                return Err(\"invalid buffer size\".into());\n            }\n            return Ok(None);\n        }\n\n        self.remaining -= 1;\n\n        let key_len = self.buf.read_i32::<BigEndian>()?;\n        if key_len < 0 {\n            return Err(\"invalid key length\".into());\n        }\n        let (key, buf) = self.buf.split_at(key_len as usize);\n        let key = str::from_utf8(key)?;\n        self.buf = buf;\n\n        let value_len = self.buf.read_i32::<BigEndian>()?;\n        let value = if value_len < 0 {\n            None\n        } else {\n            let (value, buf) = self.buf.split_at(value_len as usize);\n            let value = str::from_utf8(value)?;\n            self.buf = buf;\n            Some(value)\n        };\n\n        Ok(Some((key, value)))\n    }\n\n    #[inline]\n    fn size_hint(&self) -> (usize, Option<usize>) {\n        let len = self.remaining as usize;\n        (len, Some(len))\n    }\n}\n\n/// Deserializes an array value.\n#[inline]\npub fn array_from_sql(mut buf: &[u8]) -> Result<Array<'_>, StdBox<dyn Error + Sync + Send>> {\n    let dimensions = buf.read_i32::<BigEndian>()?;\n    if dimensions < 0 {\n        return Err(\"invalid dimension count\".into());\n    }\n\n    let mut r = buf;\n    let mut elements = 1i32;\n    for _ in 0..dimensions {\n        let len = r.read_i32::<BigEndian>()?;\n        if len < 0 {\n            return Err(\"invalid dimension size\".into());\n        }\n        let _lower_bound = r.read_i32::<BigEndian>()?;\n        elements = match elements.checked_mul(len) {\n            Some(elements) => elements,\n            None => return Err(\"too many array elements\".into()),\n        };\n    }\n\n    if dimensions == 0 {\n        elements = 0;\n    }\n\n    Ok(Array {\n        dimensions,\n        elements,\n        buf,\n    })\n}\n\n/// A Postgres array.\npub struct Array<'a> {\n    dimensions: i32,\n    elements: i32,\n    buf: &'a [u8],\n}\n\nimpl<'a> Array<'a> {\n    /// Returns an iterator over the dimensions of the array.\n    #[inline]\n    pub fn dimensions(&self) -> ArrayDimensions<'a> {\n        ArrayDimensions(&self.buf[..self.dimensions as usize * 8])\n    }\n\n    /// Returns an iterator over the values of the array.\n    #[inline]\n    pub fn values(&self) -> ArrayValues<'a> {\n        ArrayValues {\n            remaining: self.elements,\n            buf: &self.buf[self.dimensions as usize * 8..],\n        }\n    }\n}\n\n/// An iterator over the dimensions of an array.\npub struct ArrayDimensions<'a>(&'a [u8]);\n\nimpl FallibleIterator for ArrayDimensions<'_> {\n    type Item = ArrayDimension;\n    type Error = StdBox<dyn Error + Sync + Send>;\n\n    #[inline]\n    fn next(&mut self) -> Result<Option<ArrayDimension>, StdBox<dyn Error + Sync + Send>> {\n        if self.0.is_empty() {\n            return Ok(None);\n        }\n\n        let len = self.0.read_i32::<BigEndian>()?;\n        let lower_bound = self.0.read_i32::<BigEndian>()?;\n\n        Ok(Some(ArrayDimension { len, lower_bound }))\n    }\n\n    #[inline]\n    fn size_hint(&self) -> (usize, Option<usize>) {\n        let len = self.0.len() / 8;\n        (len, Some(len))\n    }\n}\n\n/// Information about a dimension of an array.\n#[derive(Debug, Copy, Clone, PartialEq, Eq)]\npub struct ArrayDimension {\n    /// The length of this dimension.\n    pub len: i32,\n\n    /// The base value used to index into this dimension.\n    pub lower_bound: i32,\n}\n\n/// An iterator over the values of an array, in row-major order.\npub struct ArrayValues<'a> {\n    remaining: i32,\n    buf: &'a [u8],\n}\n\nimpl<'a> FallibleIterator for ArrayValues<'a> {\n    type Item = Option<&'a [u8]>;\n    type Error = StdBox<dyn Error + Sync + Send>;\n\n    #[inline]\n    fn next(&mut self) -> Result<Option<Option<&'a [u8]>>, StdBox<dyn Error + Sync + Send>> {\n        if self.remaining == 0 {\n            if !self.buf.is_empty() {\n                return Err(\"invalid message length: arrayvalue not drained\".into());\n            }\n            return Ok(None);\n        }\n        self.remaining -= 1;\n\n        let len = self.buf.read_i32::<BigEndian>()?;\n        let val = if len < 0 {\n            None\n        } else {\n            if self.buf.len() < len as usize {\n                return Err(\"invalid value length\".into());\n            }\n\n            let (val, buf) = self.buf.split_at(len as usize);\n            self.buf = buf;\n            Some(val)\n        };\n\n        Ok(Some(val))\n    }\n\n    fn size_hint(&self) -> (usize, Option<usize>) {\n        let len = self.remaining as usize;\n        (len, Some(len))\n    }\n}\n\n/// Serializes a Postgres ltree string\n#[inline]\npub fn ltree_to_sql(v: &str, buf: &mut BytesMut) {\n    // A version number is prepended to an ltree string per spec\n    buf.put_u8(1);\n    // Append the rest of the query\n    buf.put_slice(v.as_bytes());\n}\n\n/// Deserialize a Postgres ltree string\n#[inline]\npub fn ltree_from_sql(buf: &[u8]) -> Result<&str, StdBox<dyn Error + Sync + Send>> {\n    match buf {\n        // Remove the version number from the front of the ltree per spec\n        [1u8, rest @ ..] => Ok(str::from_utf8(rest)?),\n        _ => Err(\"ltree version 1 only supported\".into()),\n    }\n}\n\n/// Serializes a Postgres lquery string\n#[inline]\npub fn lquery_to_sql(v: &str, buf: &mut BytesMut) {\n    // A version number is prepended to an lquery string per spec\n    buf.put_u8(1);\n    // Append the rest of the query\n    buf.put_slice(v.as_bytes());\n}\n\n/// Deserialize a Postgres lquery string\n#[inline]\npub fn lquery_from_sql(buf: &[u8]) -> Result<&str, StdBox<dyn Error + Sync + Send>> {\n    match buf {\n        // Remove the version number from the front of the lquery per spec\n        [1u8, rest @ ..] => Ok(str::from_utf8(rest)?),\n        _ => Err(\"lquery version 1 only supported\".into()),\n    }\n}\n\n/// Serializes a Postgres ltxtquery string\n#[inline]\npub fn ltxtquery_to_sql(v: &str, buf: &mut BytesMut) {\n    // A version number is prepended to an ltxtquery string per spec\n    buf.put_u8(1);\n    // Append the rest of the query\n    buf.put_slice(v.as_bytes());\n}\n\n/// Deserialize a Postgres ltxtquery string\n#[inline]\npub fn ltxtquery_from_sql(buf: &[u8]) -> Result<&str, StdBox<dyn Error + Sync + Send>> {\n    match buf {\n        // Remove the version number from the front of the ltxtquery per spec\n        [1u8, rest @ ..] => Ok(str::from_utf8(rest)?),\n        _ => Err(\"ltxtquery version 1 only supported\".into()),\n    }\n}\n"
  },
  {
    "path": "libs/proxy/postgres-protocol2/src/types/test.rs",
    "content": "use bytes::{Buf, BytesMut};\n\nuse super::*;\n\n#[test]\nfn ltree_sql() {\n    let mut query = vec![1u8];\n    query.extend_from_slice(\"A.B.C\".as_bytes());\n\n    let mut buf = BytesMut::new();\n\n    ltree_to_sql(\"A.B.C\", &mut buf);\n\n    assert_eq!(query.as_slice(), buf.chunk());\n}\n\n#[test]\nfn ltree_str() {\n    let mut query = vec![1u8];\n    query.extend_from_slice(\"A.B.C\".as_bytes());\n\n    assert!(ltree_from_sql(query.as_slice()).is_ok())\n}\n\n#[test]\nfn ltree_wrong_version() {\n    let mut query = vec![2u8];\n    query.extend_from_slice(\"A.B.C\".as_bytes());\n\n    assert!(ltree_from_sql(query.as_slice()).is_err())\n}\n\n#[test]\nfn lquery_sql() {\n    let mut query = vec![1u8];\n    query.extend_from_slice(\"A.B.C\".as_bytes());\n\n    let mut buf = BytesMut::new();\n\n    lquery_to_sql(\"A.B.C\", &mut buf);\n\n    assert_eq!(query.as_slice(), buf.chunk());\n}\n\n#[test]\nfn lquery_str() {\n    let mut query = vec![1u8];\n    query.extend_from_slice(\"A.B.C\".as_bytes());\n\n    assert!(lquery_from_sql(query.as_slice()).is_ok())\n}\n\n#[test]\nfn lquery_wrong_version() {\n    let mut query = vec![2u8];\n    query.extend_from_slice(\"A.B.C\".as_bytes());\n\n    assert!(lquery_from_sql(query.as_slice()).is_err())\n}\n\n#[test]\nfn ltxtquery_sql() {\n    let mut query = vec![1u8];\n    query.extend_from_slice(\"a & b*\".as_bytes());\n\n    let mut buf = BytesMut::new();\n\n    ltree_to_sql(\"a & b*\", &mut buf);\n\n    assert_eq!(query.as_slice(), buf.chunk());\n}\n\n#[test]\nfn ltxtquery_str() {\n    let mut query = vec![1u8];\n    query.extend_from_slice(\"a & b*\".as_bytes());\n\n    assert!(ltree_from_sql(query.as_slice()).is_ok())\n}\n\n#[test]\nfn ltxtquery_wrong_version() {\n    let mut query = vec![2u8];\n    query.extend_from_slice(\"a & b*\".as_bytes());\n\n    assert!(ltree_from_sql(query.as_slice()).is_err())\n}\n"
  },
  {
    "path": "libs/proxy/postgres-types2/Cargo.toml",
    "content": "[package]\nname = \"postgres-types2\"\nversion = \"0.1.0\"\nedition = \"2024\"\nlicense = \"MIT/Apache-2.0\"\n\n[dependencies]\nbytes.workspace = true\nfallible-iterator.workspace = true\npostgres-protocol2 = { path = \"../postgres-protocol2\" }\n"
  },
  {
    "path": "libs/proxy/postgres-types2/src/lib.rs",
    "content": "//! Conversions to and from Postgres types.\n//!\n//! This crate is used by the `tokio-postgres` and `postgres` crates. You normally don't need to depend directly on it\n//! unless you want to define your own `ToSql` or `FromSql` definitions.\n#![warn(clippy::all, missing_docs)]\n\nuse std::any::type_name;\nuse std::error::Error;\nuse std::fmt;\nuse std::sync::Arc;\n\nuse fallible_iterator::FallibleIterator;\n#[doc(inline)]\npub use postgres_protocol2::Oid;\nuse postgres_protocol2::types;\n\nuse crate::type_gen::{Inner, Other};\n\n/// Generates a simple implementation of `ToSql::accepts` which accepts the\n/// types passed to it.\nmacro_rules! accepts {\n    ($($expected:ident),+) => (\n        fn accepts(ty: &$crate::Type) -> bool {\n            matches!(*ty, $($crate::Type::$expected)|+)\n        }\n    )\n}\n\n// mod pg_lsn;\n#[doc(hidden)]\npub mod private;\n// mod special;\nmod type_gen;\n\n/// A Postgres type.\n#[derive(PartialEq, Eq, Clone, Hash)]\npub struct Type(Inner);\n\nimpl fmt::Debug for Type {\n    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {\n        fmt::Debug::fmt(&self.0, fmt)\n    }\n}\n\nimpl fmt::Display for Type {\n    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {\n        match self.schema() {\n            \"public\" | \"pg_catalog\" => {}\n            schema => write!(fmt, \"{schema}.\")?,\n        }\n        fmt.write_str(self.name())\n    }\n}\n\nimpl Type {\n    /// Creates a new `Type`.\n    pub fn new(name: String, oid: Oid, kind: Kind, schema: String) -> Type {\n        Type(Inner::Other(Arc::new(Other {\n            name,\n            oid,\n            kind,\n            schema,\n        })))\n    }\n\n    /// Returns the `Type` corresponding to the provided `Oid` if it\n    /// corresponds to a built-in type.\n    pub fn from_oid(oid: Oid) -> Option<Type> {\n        Inner::from_oid(oid).map(Type)\n    }\n\n    /// Returns the OID of the `Type`.\n    pub fn oid(&self) -> Oid {\n        self.0.oid()\n    }\n\n    /// Returns the kind of this type.\n    pub fn kind(&self) -> &Kind {\n        self.0.kind()\n    }\n\n    /// Returns the schema of this type.\n    pub fn schema(&self) -> &str {\n        match self.0 {\n            Inner::Other(ref u) => &u.schema,\n            _ => \"pg_catalog\",\n        }\n    }\n\n    /// Returns the name of this type.\n    pub fn name(&self) -> &str {\n        self.0.name()\n    }\n}\n\n/// Represents the kind of a Postgres type.\n#[derive(Debug, Clone, PartialEq, Eq, Hash)]\n#[non_exhaustive]\npub enum Kind {\n    /// A simple type like `VARCHAR` or `INTEGER`.\n    Simple,\n    /// An enumerated type.\n    Enum,\n    /// A pseudo-type.\n    Pseudo,\n    /// An array type along with the type of its elements.\n    Array(Type),\n    /// A range type along with the type of its elements.\n    Range(Oid),\n    /// A multirange type along with the type of its elements.\n    Multirange(Type),\n    /// A domain type along with its underlying type.\n    Domain(Oid),\n    /// A composite type.\n    Composite(Oid),\n}\n\n/// Information about a field of a composite type.\n#[derive(Debug, Clone, PartialEq, Eq, Hash)]\npub struct Field {\n    name: String,\n    type_: Type,\n}\n\nimpl Field {\n    /// Creates a new `Field`.\n    pub fn new(name: String, type_: Type) -> Field {\n        Field { name, type_ }\n    }\n\n    /// Returns the name of the field.\n    pub fn name(&self) -> &str {\n        &self.name\n    }\n\n    /// Returns the type of the field.\n    pub fn type_(&self) -> &Type {\n        &self.type_\n    }\n}\n\n/// An error indicating that a `NULL` Postgres value was passed to a `FromSql`\n/// implementation that does not support `NULL` values.\n#[derive(Debug, Clone, Copy)]\npub struct WasNull;\n\nimpl fmt::Display for WasNull {\n    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {\n        fmt.write_str(\"a Postgres value was `NULL`\")\n    }\n}\n\nimpl Error for WasNull {}\n\n/// An error indicating that a conversion was attempted between incompatible\n/// Rust and Postgres types.\n#[derive(Debug)]\npub struct WrongType {\n    postgres: Type,\n    rust: &'static str,\n}\n\nimpl fmt::Display for WrongType {\n    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {\n        write!(\n            fmt,\n            \"cannot convert between the Rust type `{}` and the Postgres type `{}`\",\n            self.rust, self.postgres,\n        )\n    }\n}\n\nimpl Error for WrongType {}\n\nimpl WrongType {\n    /// Creates a new `WrongType` error.\n    pub fn new<T>(ty: Type) -> WrongType {\n        WrongType {\n            postgres: ty,\n            rust: type_name::<T>(),\n        }\n    }\n}\n\n/// An error indicating that a as_text conversion was attempted on a binary\n/// result.\n#[derive(Debug)]\npub struct WrongFormat {}\n\nimpl Error for WrongFormat {}\n\nimpl fmt::Display for WrongFormat {\n    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {\n        write!(\n            fmt,\n            \"cannot read column as text while it is in binary format\"\n        )\n    }\n}\n\n/// A trait for types that can be created from a Postgres value.\npub trait FromSql<'a>: Sized {\n    /// Creates a new value of this type from a buffer of data of the specified\n    /// Postgres `Type` in its binary format.\n    ///\n    /// The caller of this method is responsible for ensuring that this type\n    /// is compatible with the Postgres `Type`.\n    fn from_sql(ty: &Type, raw: &'a [u8]) -> Result<Self, Box<dyn Error + Sync + Send>>;\n\n    /// Creates a new value of this type from a `NULL` SQL value.\n    ///\n    /// The caller of this method is responsible for ensuring that this type\n    /// is compatible with the Postgres `Type`.\n    ///\n    /// The default implementation returns `Err(Box::new(WasNull))`.\n    #[allow(unused_variables)]\n    fn from_sql_null(ty: &Type) -> Result<Self, Box<dyn Error + Sync + Send>> {\n        Err(Box::new(WasNull))\n    }\n\n    /// A convenience function that delegates to `from_sql` and `from_sql_null` depending on the\n    /// value of `raw`.\n    fn from_sql_nullable(\n        ty: &Type,\n        raw: Option<&'a [u8]>,\n    ) -> Result<Self, Box<dyn Error + Sync + Send>> {\n        match raw {\n            Some(raw) => Self::from_sql(ty, raw),\n            None => Self::from_sql_null(ty),\n        }\n    }\n\n    /// Determines if a value of this type can be created from the specified\n    /// Postgres `Type`.\n    fn accepts(ty: &Type) -> bool;\n}\n\n/// A trait for types which can be created from a Postgres value without borrowing any data.\n///\n/// This is primarily useful for trait bounds on functions.\npub trait FromSqlOwned: for<'a> FromSql<'a> {}\n\nimpl<T> FromSqlOwned for T where T: for<'a> FromSql<'a> {}\n\nimpl<'a, T: FromSql<'a>> FromSql<'a> for Option<T> {\n    fn from_sql(ty: &Type, raw: &'a [u8]) -> Result<Option<T>, Box<dyn Error + Sync + Send>> {\n        <T as FromSql>::from_sql(ty, raw).map(Some)\n    }\n\n    fn from_sql_null(_: &Type) -> Result<Option<T>, Box<dyn Error + Sync + Send>> {\n        Ok(None)\n    }\n\n    fn accepts(ty: &Type) -> bool {\n        <T as FromSql>::accepts(ty)\n    }\n}\n\nimpl<'a, T: FromSql<'a>> FromSql<'a> for Vec<T> {\n    fn from_sql(ty: &Type, raw: &'a [u8]) -> Result<Vec<T>, Box<dyn Error + Sync + Send>> {\n        let member_type = match *ty.kind() {\n            Kind::Array(ref member) => member,\n            _ => panic!(\"expected array type\"),\n        };\n\n        let array = types::array_from_sql(raw)?;\n        if array.dimensions().count()? > 1 {\n            return Err(\"array contains too many dimensions\".into());\n        }\n\n        array\n            .values()\n            .map(|v| T::from_sql_nullable(member_type, v))\n            .collect()\n    }\n\n    fn accepts(ty: &Type) -> bool {\n        match *ty.kind() {\n            Kind::Array(ref inner) => T::accepts(inner),\n            _ => false,\n        }\n    }\n}\n\nimpl<'a> FromSql<'a> for String {\n    fn from_sql(ty: &Type, raw: &'a [u8]) -> Result<String, Box<dyn Error + Sync + Send>> {\n        <&str as FromSql>::from_sql(ty, raw).map(ToString::to_string)\n    }\n\n    fn accepts(ty: &Type) -> bool {\n        <&str as FromSql>::accepts(ty)\n    }\n}\n\nimpl<'a> FromSql<'a> for &'a str {\n    fn from_sql(ty: &Type, raw: &'a [u8]) -> Result<&'a str, Box<dyn Error + Sync + Send>> {\n        match *ty {\n            ref ty if ty.name() == \"ltree\" => types::ltree_from_sql(raw),\n            ref ty if ty.name() == \"lquery\" => types::lquery_from_sql(raw),\n            ref ty if ty.name() == \"ltxtquery\" => types::ltxtquery_from_sql(raw),\n            _ => types::text_from_sql(raw),\n        }\n    }\n\n    fn accepts(ty: &Type) -> bool {\n        match *ty {\n            Type::VARCHAR | Type::TEXT | Type::BPCHAR | Type::NAME | Type::UNKNOWN => true,\n            ref ty\n                if (ty.name() == \"citext\"\n                    || ty.name() == \"ltree\"\n                    || ty.name() == \"lquery\"\n                    || ty.name() == \"ltxtquery\") =>\n            {\n                true\n            }\n            _ => false,\n        }\n    }\n}\n\nmacro_rules! simple_from {\n    ($t:ty, $f:ident, $($expected:ident),+) => {\n        impl<'a> FromSql<'a> for $t {\n            fn from_sql(_: &Type, raw: &'a [u8]) -> Result<$t, Box<dyn Error + Sync + Send>> {\n                types::$f(raw)\n            }\n\n            accepts!($($expected),+);\n        }\n    }\n}\n\nsimple_from!(i8, char_from_sql, CHAR);\nsimple_from!(u32, oid_from_sql, OID);\n\n/// An enum representing the nullability of a Postgres value.\npub enum IsNull {\n    /// The value is NULL.\n    Yes,\n    /// The value is not NULL.\n    No,\n}\n\n/// Supported Postgres message format types\n///\n/// Using Text format in a message assumes a Postgres `SERVER_ENCODING` of `UTF8`\n#[derive(Clone, Copy, Debug, PartialEq)]\npub enum Format {\n    /// Text format (UTF-8)\n    Text,\n    /// Compact, typed binary format\n    Binary,\n}\n"
  },
  {
    "path": "libs/proxy/postgres-types2/src/private.rs",
    "content": "use std::error::Error;\n\npub use bytes::BytesMut;\n\nuse crate::{FromSql, Type};\n\npub fn read_be_i32(buf: &mut &[u8]) -> Result<i32, Box<dyn Error + Sync + Send>> {\n    if buf.len() < 4 {\n        return Err(\"invalid buffer size\".into());\n    }\n    let mut bytes = [0; 4];\n    bytes.copy_from_slice(&buf[..4]);\n    *buf = &buf[4..];\n    Ok(i32::from_be_bytes(bytes))\n}\n\npub fn read_value<'a, T>(\n    type_: &Type,\n    buf: &mut &'a [u8],\n) -> Result<T, Box<dyn Error + Sync + Send>>\nwhere\n    T: FromSql<'a>,\n{\n    let len = read_be_i32(buf)?;\n    let value = if len < 0 {\n        None\n    } else {\n        if len as usize > buf.len() {\n            return Err(\"invalid buffer size\".into());\n        }\n        let (head, tail) = buf.split_at(len as usize);\n        *buf = tail;\n        Some(head)\n    };\n    T::from_sql_nullable(type_, value)\n}\n"
  },
  {
    "path": "libs/proxy/postgres-types2/src/type_gen.rs",
    "content": "// Autogenerated file - DO NOT EDIT\nuse std::sync::Arc;\n\nuse crate::{Kind, Oid, Type};\n\n#[derive(PartialEq, Eq, Debug, Hash)]\npub struct Other {\n    pub name: String,\n    pub oid: Oid,\n    pub kind: Kind,\n    pub schema: String,\n}\n\n#[derive(PartialEq, Eq, Clone, Debug, Hash)]\npub enum Inner {\n    Bool,\n    Bytea,\n    Char,\n    Name,\n    Int8,\n    Int2,\n    Int2Vector,\n    Int4,\n    Regproc,\n    Text,\n    Oid,\n    Tid,\n    Xid,\n    Cid,\n    OidVector,\n    PgDdlCommand,\n    Json,\n    Xml,\n    XmlArray,\n    PgNodeTree,\n    JsonArray,\n    TableAmHandler,\n    Xid8Array,\n    IndexAmHandler,\n    Point,\n    Lseg,\n    Path,\n    Box,\n    Polygon,\n    Line,\n    LineArray,\n    Cidr,\n    CidrArray,\n    Float4,\n    Float8,\n    Unknown,\n    Circle,\n    CircleArray,\n    Macaddr8,\n    Macaddr8Array,\n    Money,\n    MoneyArray,\n    Macaddr,\n    Inet,\n    BoolArray,\n    ByteaArray,\n    CharArray,\n    NameArray,\n    Int2Array,\n    Int2VectorArray,\n    Int4Array,\n    RegprocArray,\n    TextArray,\n    TidArray,\n    XidArray,\n    CidArray,\n    OidVectorArray,\n    BpcharArray,\n    VarcharArray,\n    Int8Array,\n    PointArray,\n    LsegArray,\n    PathArray,\n    BoxArray,\n    Float4Array,\n    Float8Array,\n    PolygonArray,\n    OidArray,\n    Aclitem,\n    AclitemArray,\n    MacaddrArray,\n    InetArray,\n    Bpchar,\n    Varchar,\n    Date,\n    Time,\n    Timestamp,\n    TimestampArray,\n    DateArray,\n    TimeArray,\n    Timestamptz,\n    TimestamptzArray,\n    Interval,\n    IntervalArray,\n    NumericArray,\n    CstringArray,\n    Timetz,\n    TimetzArray,\n    Bit,\n    BitArray,\n    Varbit,\n    VarbitArray,\n    Numeric,\n    Refcursor,\n    RefcursorArray,\n    Regprocedure,\n    Regoper,\n    Regoperator,\n    Regclass,\n    Regtype,\n    RegprocedureArray,\n    RegoperArray,\n    RegoperatorArray,\n    RegclassArray,\n    RegtypeArray,\n    Record,\n    Cstring,\n    Any,\n    Anyarray,\n    Void,\n    Trigger,\n    LanguageHandler,\n    Internal,\n    Anyelement,\n    RecordArray,\n    Anynonarray,\n    TxidSnapshotArray,\n    Uuid,\n    UuidArray,\n    TxidSnapshot,\n    FdwHandler,\n    PgLsn,\n    PgLsnArray,\n    TsmHandler,\n    PgNdistinct,\n    PgDependencies,\n    Anyenum,\n    TsVector,\n    Tsquery,\n    GtsVector,\n    TsVectorArray,\n    GtsVectorArray,\n    TsqueryArray,\n    Regconfig,\n    RegconfigArray,\n    Regdictionary,\n    RegdictionaryArray,\n    Jsonb,\n    JsonbArray,\n    AnyRange,\n    EventTrigger,\n    Int4Range,\n    Int4RangeArray,\n    NumRange,\n    NumRangeArray,\n    TsRange,\n    TsRangeArray,\n    TstzRange,\n    TstzRangeArray,\n    DateRange,\n    DateRangeArray,\n    Int8Range,\n    Int8RangeArray,\n    Jsonpath,\n    JsonpathArray,\n    Regnamespace,\n    RegnamespaceArray,\n    Regrole,\n    RegroleArray,\n    Regcollation,\n    RegcollationArray,\n    Int4multiRange,\n    NummultiRange,\n    TsmultiRange,\n    TstzmultiRange,\n    DatemultiRange,\n    Int8multiRange,\n    AnymultiRange,\n    AnycompatiblemultiRange,\n    PgBrinBloomSummary,\n    PgBrinMinmaxMultiSummary,\n    PgMcvList,\n    PgSnapshot,\n    PgSnapshotArray,\n    Xid8,\n    Anycompatible,\n    Anycompatiblearray,\n    Anycompatiblenonarray,\n    AnycompatibleRange,\n    Int4multiRangeArray,\n    NummultiRangeArray,\n    TsmultiRangeArray,\n    TstzmultiRangeArray,\n    DatemultiRangeArray,\n    Int8multiRangeArray,\n    Other(Arc<Other>),\n}\n\nimpl Inner {\n    pub fn from_oid(oid: Oid) -> Option<Inner> {\n        match oid {\n            16 => Some(Inner::Bool),\n            17 => Some(Inner::Bytea),\n            18 => Some(Inner::Char),\n            19 => Some(Inner::Name),\n            20 => Some(Inner::Int8),\n            21 => Some(Inner::Int2),\n            22 => Some(Inner::Int2Vector),\n            23 => Some(Inner::Int4),\n            24 => Some(Inner::Regproc),\n            25 => Some(Inner::Text),\n            26 => Some(Inner::Oid),\n            27 => Some(Inner::Tid),\n            28 => Some(Inner::Xid),\n            29 => Some(Inner::Cid),\n            30 => Some(Inner::OidVector),\n            32 => Some(Inner::PgDdlCommand),\n            114 => Some(Inner::Json),\n            142 => Some(Inner::Xml),\n            143 => Some(Inner::XmlArray),\n            194 => Some(Inner::PgNodeTree),\n            199 => Some(Inner::JsonArray),\n            269 => Some(Inner::TableAmHandler),\n            271 => Some(Inner::Xid8Array),\n            325 => Some(Inner::IndexAmHandler),\n            600 => Some(Inner::Point),\n            601 => Some(Inner::Lseg),\n            602 => Some(Inner::Path),\n            603 => Some(Inner::Box),\n            604 => Some(Inner::Polygon),\n            628 => Some(Inner::Line),\n            629 => Some(Inner::LineArray),\n            650 => Some(Inner::Cidr),\n            651 => Some(Inner::CidrArray),\n            700 => Some(Inner::Float4),\n            701 => Some(Inner::Float8),\n            705 => Some(Inner::Unknown),\n            718 => Some(Inner::Circle),\n            719 => Some(Inner::CircleArray),\n            774 => Some(Inner::Macaddr8),\n            775 => Some(Inner::Macaddr8Array),\n            790 => Some(Inner::Money),\n            791 => Some(Inner::MoneyArray),\n            829 => Some(Inner::Macaddr),\n            869 => Some(Inner::Inet),\n            1000 => Some(Inner::BoolArray),\n            1001 => Some(Inner::ByteaArray),\n            1002 => Some(Inner::CharArray),\n            1003 => Some(Inner::NameArray),\n            1005 => Some(Inner::Int2Array),\n            1006 => Some(Inner::Int2VectorArray),\n            1007 => Some(Inner::Int4Array),\n            1008 => Some(Inner::RegprocArray),\n            1009 => Some(Inner::TextArray),\n            1010 => Some(Inner::TidArray),\n            1011 => Some(Inner::XidArray),\n            1012 => Some(Inner::CidArray),\n            1013 => Some(Inner::OidVectorArray),\n            1014 => Some(Inner::BpcharArray),\n            1015 => Some(Inner::VarcharArray),\n            1016 => Some(Inner::Int8Array),\n            1017 => Some(Inner::PointArray),\n            1018 => Some(Inner::LsegArray),\n            1019 => Some(Inner::PathArray),\n            1020 => Some(Inner::BoxArray),\n            1021 => Some(Inner::Float4Array),\n            1022 => Some(Inner::Float8Array),\n            1027 => Some(Inner::PolygonArray),\n            1028 => Some(Inner::OidArray),\n            1033 => Some(Inner::Aclitem),\n            1034 => Some(Inner::AclitemArray),\n            1040 => Some(Inner::MacaddrArray),\n            1041 => Some(Inner::InetArray),\n            1042 => Some(Inner::Bpchar),\n            1043 => Some(Inner::Varchar),\n            1082 => Some(Inner::Date),\n            1083 => Some(Inner::Time),\n            1114 => Some(Inner::Timestamp),\n            1115 => Some(Inner::TimestampArray),\n            1182 => Some(Inner::DateArray),\n            1183 => Some(Inner::TimeArray),\n            1184 => Some(Inner::Timestamptz),\n            1185 => Some(Inner::TimestamptzArray),\n            1186 => Some(Inner::Interval),\n            1187 => Some(Inner::IntervalArray),\n            1231 => Some(Inner::NumericArray),\n            1263 => Some(Inner::CstringArray),\n            1266 => Some(Inner::Timetz),\n            1270 => Some(Inner::TimetzArray),\n            1560 => Some(Inner::Bit),\n            1561 => Some(Inner::BitArray),\n            1562 => Some(Inner::Varbit),\n            1563 => Some(Inner::VarbitArray),\n            1700 => Some(Inner::Numeric),\n            1790 => Some(Inner::Refcursor),\n            2201 => Some(Inner::RefcursorArray),\n            2202 => Some(Inner::Regprocedure),\n            2203 => Some(Inner::Regoper),\n            2204 => Some(Inner::Regoperator),\n            2205 => Some(Inner::Regclass),\n            2206 => Some(Inner::Regtype),\n            2207 => Some(Inner::RegprocedureArray),\n            2208 => Some(Inner::RegoperArray),\n            2209 => Some(Inner::RegoperatorArray),\n            2210 => Some(Inner::RegclassArray),\n            2211 => Some(Inner::RegtypeArray),\n            2249 => Some(Inner::Record),\n            2275 => Some(Inner::Cstring),\n            2276 => Some(Inner::Any),\n            2277 => Some(Inner::Anyarray),\n            2278 => Some(Inner::Void),\n            2279 => Some(Inner::Trigger),\n            2280 => Some(Inner::LanguageHandler),\n            2281 => Some(Inner::Internal),\n            2283 => Some(Inner::Anyelement),\n            2287 => Some(Inner::RecordArray),\n            2776 => Some(Inner::Anynonarray),\n            2949 => Some(Inner::TxidSnapshotArray),\n            2950 => Some(Inner::Uuid),\n            2951 => Some(Inner::UuidArray),\n            2970 => Some(Inner::TxidSnapshot),\n            3115 => Some(Inner::FdwHandler),\n            3220 => Some(Inner::PgLsn),\n            3221 => Some(Inner::PgLsnArray),\n            3310 => Some(Inner::TsmHandler),\n            3361 => Some(Inner::PgNdistinct),\n            3402 => Some(Inner::PgDependencies),\n            3500 => Some(Inner::Anyenum),\n            3614 => Some(Inner::TsVector),\n            3615 => Some(Inner::Tsquery),\n            3642 => Some(Inner::GtsVector),\n            3643 => Some(Inner::TsVectorArray),\n            3644 => Some(Inner::GtsVectorArray),\n            3645 => Some(Inner::TsqueryArray),\n            3734 => Some(Inner::Regconfig),\n            3735 => Some(Inner::RegconfigArray),\n            3769 => Some(Inner::Regdictionary),\n            3770 => Some(Inner::RegdictionaryArray),\n            3802 => Some(Inner::Jsonb),\n            3807 => Some(Inner::JsonbArray),\n            3831 => Some(Inner::AnyRange),\n            3838 => Some(Inner::EventTrigger),\n            3904 => Some(Inner::Int4Range),\n            3905 => Some(Inner::Int4RangeArray),\n            3906 => Some(Inner::NumRange),\n            3907 => Some(Inner::NumRangeArray),\n            3908 => Some(Inner::TsRange),\n            3909 => Some(Inner::TsRangeArray),\n            3910 => Some(Inner::TstzRange),\n            3911 => Some(Inner::TstzRangeArray),\n            3912 => Some(Inner::DateRange),\n            3913 => Some(Inner::DateRangeArray),\n            3926 => Some(Inner::Int8Range),\n            3927 => Some(Inner::Int8RangeArray),\n            4072 => Some(Inner::Jsonpath),\n            4073 => Some(Inner::JsonpathArray),\n            4089 => Some(Inner::Regnamespace),\n            4090 => Some(Inner::RegnamespaceArray),\n            4096 => Some(Inner::Regrole),\n            4097 => Some(Inner::RegroleArray),\n            4191 => Some(Inner::Regcollation),\n            4192 => Some(Inner::RegcollationArray),\n            4451 => Some(Inner::Int4multiRange),\n            4532 => Some(Inner::NummultiRange),\n            4533 => Some(Inner::TsmultiRange),\n            4534 => Some(Inner::TstzmultiRange),\n            4535 => Some(Inner::DatemultiRange),\n            4536 => Some(Inner::Int8multiRange),\n            4537 => Some(Inner::AnymultiRange),\n            4538 => Some(Inner::AnycompatiblemultiRange),\n            4600 => Some(Inner::PgBrinBloomSummary),\n            4601 => Some(Inner::PgBrinMinmaxMultiSummary),\n            5017 => Some(Inner::PgMcvList),\n            5038 => Some(Inner::PgSnapshot),\n            5039 => Some(Inner::PgSnapshotArray),\n            5069 => Some(Inner::Xid8),\n            5077 => Some(Inner::Anycompatible),\n            5078 => Some(Inner::Anycompatiblearray),\n            5079 => Some(Inner::Anycompatiblenonarray),\n            5080 => Some(Inner::AnycompatibleRange),\n            6150 => Some(Inner::Int4multiRangeArray),\n            6151 => Some(Inner::NummultiRangeArray),\n            6152 => Some(Inner::TsmultiRangeArray),\n            6153 => Some(Inner::TstzmultiRangeArray),\n            6155 => Some(Inner::DatemultiRangeArray),\n            6157 => Some(Inner::Int8multiRangeArray),\n            _ => None,\n        }\n    }\n\n    pub const fn const_oid(&self) -> Oid {\n        match *self {\n            Inner::Bool => 16,\n            Inner::Bytea => 17,\n            Inner::Char => 18,\n            Inner::Name => 19,\n            Inner::Int8 => 20,\n            Inner::Int2 => 21,\n            Inner::Int2Vector => 22,\n            Inner::Int4 => 23,\n            Inner::Regproc => 24,\n            Inner::Text => 25,\n            Inner::Oid => 26,\n            Inner::Tid => 27,\n            Inner::Xid => 28,\n            Inner::Cid => 29,\n            Inner::OidVector => 30,\n            Inner::PgDdlCommand => 32,\n            Inner::Json => 114,\n            Inner::Xml => 142,\n            Inner::XmlArray => 143,\n            Inner::PgNodeTree => 194,\n            Inner::JsonArray => 199,\n            Inner::TableAmHandler => 269,\n            Inner::Xid8Array => 271,\n            Inner::IndexAmHandler => 325,\n            Inner::Point => 600,\n            Inner::Lseg => 601,\n            Inner::Path => 602,\n            Inner::Box => 603,\n            Inner::Polygon => 604,\n            Inner::Line => 628,\n            Inner::LineArray => 629,\n            Inner::Cidr => 650,\n            Inner::CidrArray => 651,\n            Inner::Float4 => 700,\n            Inner::Float8 => 701,\n            Inner::Unknown => 705,\n            Inner::Circle => 718,\n            Inner::CircleArray => 719,\n            Inner::Macaddr8 => 774,\n            Inner::Macaddr8Array => 775,\n            Inner::Money => 790,\n            Inner::MoneyArray => 791,\n            Inner::Macaddr => 829,\n            Inner::Inet => 869,\n            Inner::BoolArray => 1000,\n            Inner::ByteaArray => 1001,\n            Inner::CharArray => 1002,\n            Inner::NameArray => 1003,\n            Inner::Int2Array => 1005,\n            Inner::Int2VectorArray => 1006,\n            Inner::Int4Array => 1007,\n            Inner::RegprocArray => 1008,\n            Inner::TextArray => 1009,\n            Inner::TidArray => 1010,\n            Inner::XidArray => 1011,\n            Inner::CidArray => 1012,\n            Inner::OidVectorArray => 1013,\n            Inner::BpcharArray => 1014,\n            Inner::VarcharArray => 1015,\n            Inner::Int8Array => 1016,\n            Inner::PointArray => 1017,\n            Inner::LsegArray => 1018,\n            Inner::PathArray => 1019,\n            Inner::BoxArray => 1020,\n            Inner::Float4Array => 1021,\n            Inner::Float8Array => 1022,\n            Inner::PolygonArray => 1027,\n            Inner::OidArray => 1028,\n            Inner::Aclitem => 1033,\n            Inner::AclitemArray => 1034,\n            Inner::MacaddrArray => 1040,\n            Inner::InetArray => 1041,\n            Inner::Bpchar => 1042,\n            Inner::Varchar => 1043,\n            Inner::Date => 1082,\n            Inner::Time => 1083,\n            Inner::Timestamp => 1114,\n            Inner::TimestampArray => 1115,\n            Inner::DateArray => 1182,\n            Inner::TimeArray => 1183,\n            Inner::Timestamptz => 1184,\n            Inner::TimestamptzArray => 1185,\n            Inner::Interval => 1186,\n            Inner::IntervalArray => 1187,\n            Inner::NumericArray => 1231,\n            Inner::CstringArray => 1263,\n            Inner::Timetz => 1266,\n            Inner::TimetzArray => 1270,\n            Inner::Bit => 1560,\n            Inner::BitArray => 1561,\n            Inner::Varbit => 1562,\n            Inner::VarbitArray => 1563,\n            Inner::Numeric => 1700,\n            Inner::Refcursor => 1790,\n            Inner::RefcursorArray => 2201,\n            Inner::Regprocedure => 2202,\n            Inner::Regoper => 2203,\n            Inner::Regoperator => 2204,\n            Inner::Regclass => 2205,\n            Inner::Regtype => 2206,\n            Inner::RegprocedureArray => 2207,\n            Inner::RegoperArray => 2208,\n            Inner::RegoperatorArray => 2209,\n            Inner::RegclassArray => 2210,\n            Inner::RegtypeArray => 2211,\n            Inner::Record => 2249,\n            Inner::Cstring => 2275,\n            Inner::Any => 2276,\n            Inner::Anyarray => 2277,\n            Inner::Void => 2278,\n            Inner::Trigger => 2279,\n            Inner::LanguageHandler => 2280,\n            Inner::Internal => 2281,\n            Inner::Anyelement => 2283,\n            Inner::RecordArray => 2287,\n            Inner::Anynonarray => 2776,\n            Inner::TxidSnapshotArray => 2949,\n            Inner::Uuid => 2950,\n            Inner::UuidArray => 2951,\n            Inner::TxidSnapshot => 2970,\n            Inner::FdwHandler => 3115,\n            Inner::PgLsn => 3220,\n            Inner::PgLsnArray => 3221,\n            Inner::TsmHandler => 3310,\n            Inner::PgNdistinct => 3361,\n            Inner::PgDependencies => 3402,\n            Inner::Anyenum => 3500,\n            Inner::TsVector => 3614,\n            Inner::Tsquery => 3615,\n            Inner::GtsVector => 3642,\n            Inner::TsVectorArray => 3643,\n            Inner::GtsVectorArray => 3644,\n            Inner::TsqueryArray => 3645,\n            Inner::Regconfig => 3734,\n            Inner::RegconfigArray => 3735,\n            Inner::Regdictionary => 3769,\n            Inner::RegdictionaryArray => 3770,\n            Inner::Jsonb => 3802,\n            Inner::JsonbArray => 3807,\n            Inner::AnyRange => 3831,\n            Inner::EventTrigger => 3838,\n            Inner::Int4Range => 3904,\n            Inner::Int4RangeArray => 3905,\n            Inner::NumRange => 3906,\n            Inner::NumRangeArray => 3907,\n            Inner::TsRange => 3908,\n            Inner::TsRangeArray => 3909,\n            Inner::TstzRange => 3910,\n            Inner::TstzRangeArray => 3911,\n            Inner::DateRange => 3912,\n            Inner::DateRangeArray => 3913,\n            Inner::Int8Range => 3926,\n            Inner::Int8RangeArray => 3927,\n            Inner::Jsonpath => 4072,\n            Inner::JsonpathArray => 4073,\n            Inner::Regnamespace => 4089,\n            Inner::RegnamespaceArray => 4090,\n            Inner::Regrole => 4096,\n            Inner::RegroleArray => 4097,\n            Inner::Regcollation => 4191,\n            Inner::RegcollationArray => 4192,\n            Inner::Int4multiRange => 4451,\n            Inner::NummultiRange => 4532,\n            Inner::TsmultiRange => 4533,\n            Inner::TstzmultiRange => 4534,\n            Inner::DatemultiRange => 4535,\n            Inner::Int8multiRange => 4536,\n            Inner::AnymultiRange => 4537,\n            Inner::AnycompatiblemultiRange => 4538,\n            Inner::PgBrinBloomSummary => 4600,\n            Inner::PgBrinMinmaxMultiSummary => 4601,\n            Inner::PgMcvList => 5017,\n            Inner::PgSnapshot => 5038,\n            Inner::PgSnapshotArray => 5039,\n            Inner::Xid8 => 5069,\n            Inner::Anycompatible => 5077,\n            Inner::Anycompatiblearray => 5078,\n            Inner::Anycompatiblenonarray => 5079,\n            Inner::AnycompatibleRange => 5080,\n            Inner::Int4multiRangeArray => 6150,\n            Inner::NummultiRangeArray => 6151,\n            Inner::TsmultiRangeArray => 6152,\n            Inner::TstzmultiRangeArray => 6153,\n            Inner::DatemultiRangeArray => 6155,\n            Inner::Int8multiRangeArray => 6157,\n            Inner::Other(_) => u32::MAX,\n        }\n    }\n\n    pub fn oid(&self) -> Oid {\n        match *self {\n            Inner::Other(ref u) => u.oid,\n            _ => self.const_oid(),\n        }\n    }\n\n    pub fn kind(&self) -> &Kind {\n        match *self {\n            Inner::Bool => &Kind::Simple,\n            Inner::Bytea => &Kind::Simple,\n            Inner::Char => &Kind::Simple,\n            Inner::Name => &Kind::Simple,\n            Inner::Int8 => &Kind::Simple,\n            Inner::Int2 => &Kind::Simple,\n            Inner::Int2Vector => &Kind::Array(Type(Inner::Int2)),\n            Inner::Int4 => &Kind::Simple,\n            Inner::Regproc => &Kind::Simple,\n            Inner::Text => &Kind::Simple,\n            Inner::Oid => &Kind::Simple,\n            Inner::Tid => &Kind::Simple,\n            Inner::Xid => &Kind::Simple,\n            Inner::Cid => &Kind::Simple,\n            Inner::OidVector => &Kind::Array(Type(Inner::Oid)),\n            Inner::PgDdlCommand => &Kind::Pseudo,\n            Inner::Json => &Kind::Simple,\n            Inner::Xml => &Kind::Simple,\n            Inner::XmlArray => &Kind::Array(Type(Inner::Xml)),\n            Inner::PgNodeTree => &Kind::Simple,\n            Inner::JsonArray => &Kind::Array(Type(Inner::Json)),\n            Inner::TableAmHandler => &Kind::Pseudo,\n            Inner::Xid8Array => &Kind::Array(Type(Inner::Xid8)),\n            Inner::IndexAmHandler => &Kind::Pseudo,\n            Inner::Point => &Kind::Simple,\n            Inner::Lseg => &Kind::Simple,\n            Inner::Path => &Kind::Simple,\n            Inner::Box => &Kind::Simple,\n            Inner::Polygon => &Kind::Simple,\n            Inner::Line => &Kind::Simple,\n            Inner::LineArray => &Kind::Array(Type(Inner::Line)),\n            Inner::Cidr => &Kind::Simple,\n            Inner::CidrArray => &Kind::Array(Type(Inner::Cidr)),\n            Inner::Float4 => &Kind::Simple,\n            Inner::Float8 => &Kind::Simple,\n            Inner::Unknown => &Kind::Simple,\n            Inner::Circle => &Kind::Simple,\n            Inner::CircleArray => &Kind::Array(Type(Inner::Circle)),\n            Inner::Macaddr8 => &Kind::Simple,\n            Inner::Macaddr8Array => &Kind::Array(Type(Inner::Macaddr8)),\n            Inner::Money => &Kind::Simple,\n            Inner::MoneyArray => &Kind::Array(Type(Inner::Money)),\n            Inner::Macaddr => &Kind::Simple,\n            Inner::Inet => &Kind::Simple,\n            Inner::BoolArray => &Kind::Array(Type(Inner::Bool)),\n            Inner::ByteaArray => &Kind::Array(Type(Inner::Bytea)),\n            Inner::CharArray => &Kind::Array(Type(Inner::Char)),\n            Inner::NameArray => &Kind::Array(Type(Inner::Name)),\n            Inner::Int2Array => &Kind::Array(Type(Inner::Int2)),\n            Inner::Int2VectorArray => &Kind::Array(Type(Inner::Int2Vector)),\n            Inner::Int4Array => &Kind::Array(Type(Inner::Int4)),\n            Inner::RegprocArray => &Kind::Array(Type(Inner::Regproc)),\n            Inner::TextArray => &Kind::Array(Type(Inner::Text)),\n            Inner::TidArray => &Kind::Array(Type(Inner::Tid)),\n            Inner::XidArray => &Kind::Array(Type(Inner::Xid)),\n            Inner::CidArray => &Kind::Array(Type(Inner::Cid)),\n            Inner::OidVectorArray => &Kind::Array(Type(Inner::OidVector)),\n            Inner::BpcharArray => &Kind::Array(Type(Inner::Bpchar)),\n            Inner::VarcharArray => &Kind::Array(Type(Inner::Varchar)),\n            Inner::Int8Array => &Kind::Array(Type(Inner::Int8)),\n            Inner::PointArray => &Kind::Array(Type(Inner::Point)),\n            Inner::LsegArray => &Kind::Array(Type(Inner::Lseg)),\n            Inner::PathArray => &Kind::Array(Type(Inner::Path)),\n            Inner::BoxArray => &Kind::Array(Type(Inner::Box)),\n            Inner::Float4Array => &Kind::Array(Type(Inner::Float4)),\n            Inner::Float8Array => &Kind::Array(Type(Inner::Float8)),\n            Inner::PolygonArray => &Kind::Array(Type(Inner::Polygon)),\n            Inner::OidArray => &Kind::Array(Type(Inner::Oid)),\n            Inner::Aclitem => &Kind::Simple,\n            Inner::AclitemArray => &Kind::Array(Type(Inner::Aclitem)),\n            Inner::MacaddrArray => &Kind::Array(Type(Inner::Macaddr)),\n            Inner::InetArray => &Kind::Array(Type(Inner::Inet)),\n            Inner::Bpchar => &Kind::Simple,\n            Inner::Varchar => &Kind::Simple,\n            Inner::Date => &Kind::Simple,\n            Inner::Time => &Kind::Simple,\n            Inner::Timestamp => &Kind::Simple,\n            Inner::TimestampArray => &Kind::Array(Type(Inner::Timestamp)),\n            Inner::DateArray => &Kind::Array(Type(Inner::Date)),\n            Inner::TimeArray => &Kind::Array(Type(Inner::Time)),\n            Inner::Timestamptz => &Kind::Simple,\n            Inner::TimestamptzArray => &Kind::Array(Type(Inner::Timestamptz)),\n            Inner::Interval => &Kind::Simple,\n            Inner::IntervalArray => &Kind::Array(Type(Inner::Interval)),\n            Inner::NumericArray => &Kind::Array(Type(Inner::Numeric)),\n            Inner::CstringArray => &Kind::Array(Type(Inner::Cstring)),\n            Inner::Timetz => &Kind::Simple,\n            Inner::TimetzArray => &Kind::Array(Type(Inner::Timetz)),\n            Inner::Bit => &Kind::Simple,\n            Inner::BitArray => &Kind::Array(Type(Inner::Bit)),\n            Inner::Varbit => &Kind::Simple,\n            Inner::VarbitArray => &Kind::Array(Type(Inner::Varbit)),\n            Inner::Numeric => &Kind::Simple,\n            Inner::Refcursor => &Kind::Simple,\n            Inner::RefcursorArray => &Kind::Array(Type(Inner::Refcursor)),\n            Inner::Regprocedure => &Kind::Simple,\n            Inner::Regoper => &Kind::Simple,\n            Inner::Regoperator => &Kind::Simple,\n            Inner::Regclass => &Kind::Simple,\n            Inner::Regtype => &Kind::Simple,\n            Inner::RegprocedureArray => &Kind::Array(Type(Inner::Regprocedure)),\n            Inner::RegoperArray => &Kind::Array(Type(Inner::Regoper)),\n            Inner::RegoperatorArray => &Kind::Array(Type(Inner::Regoperator)),\n            Inner::RegclassArray => &Kind::Array(Type(Inner::Regclass)),\n            Inner::RegtypeArray => &Kind::Array(Type(Inner::Regtype)),\n            Inner::Record => &Kind::Pseudo,\n            Inner::Cstring => &Kind::Pseudo,\n            Inner::Any => &Kind::Pseudo,\n            Inner::Anyarray => &Kind::Pseudo,\n            Inner::Void => &Kind::Pseudo,\n            Inner::Trigger => &Kind::Pseudo,\n            Inner::LanguageHandler => &Kind::Pseudo,\n            Inner::Internal => &Kind::Pseudo,\n            Inner::Anyelement => &Kind::Pseudo,\n            Inner::RecordArray => &Kind::Pseudo,\n            Inner::Anynonarray => &Kind::Pseudo,\n            Inner::TxidSnapshotArray => &Kind::Array(Type(Inner::TxidSnapshot)),\n            Inner::Uuid => &Kind::Simple,\n            Inner::UuidArray => &Kind::Array(Type(Inner::Uuid)),\n            Inner::TxidSnapshot => &Kind::Simple,\n            Inner::FdwHandler => &Kind::Pseudo,\n            Inner::PgLsn => &Kind::Simple,\n            Inner::PgLsnArray => &Kind::Array(Type(Inner::PgLsn)),\n            Inner::TsmHandler => &Kind::Pseudo,\n            Inner::PgNdistinct => &Kind::Simple,\n            Inner::PgDependencies => &Kind::Simple,\n            Inner::Anyenum => &Kind::Pseudo,\n            Inner::TsVector => &Kind::Simple,\n            Inner::Tsquery => &Kind::Simple,\n            Inner::GtsVector => &Kind::Simple,\n            Inner::TsVectorArray => &Kind::Array(Type(Inner::TsVector)),\n            Inner::GtsVectorArray => &Kind::Array(Type(Inner::GtsVector)),\n            Inner::TsqueryArray => &Kind::Array(Type(Inner::Tsquery)),\n            Inner::Regconfig => &Kind::Simple,\n            Inner::RegconfigArray => &Kind::Array(Type(Inner::Regconfig)),\n            Inner::Regdictionary => &Kind::Simple,\n            Inner::RegdictionaryArray => &Kind::Array(Type(Inner::Regdictionary)),\n            Inner::Jsonb => &Kind::Simple,\n            Inner::JsonbArray => &Kind::Array(Type(Inner::Jsonb)),\n            Inner::AnyRange => &Kind::Pseudo,\n            Inner::EventTrigger => &Kind::Pseudo,\n            Inner::Int4Range => &const { Kind::Range(Inner::Int4.const_oid()) },\n            Inner::Int4RangeArray => &Kind::Array(Type(Inner::Int4Range)),\n            Inner::NumRange => &const { Kind::Range(Inner::Numeric.const_oid()) },\n            Inner::NumRangeArray => &Kind::Array(Type(Inner::NumRange)),\n            Inner::TsRange => &const { Kind::Range(Inner::Timestamp.const_oid()) },\n            Inner::TsRangeArray => &Kind::Array(Type(Inner::TsRange)),\n            Inner::TstzRange => &const { Kind::Range(Inner::Timestamptz.const_oid()) },\n            Inner::TstzRangeArray => &Kind::Array(Type(Inner::TstzRange)),\n            Inner::DateRange => &const { Kind::Range(Inner::Date.const_oid()) },\n            Inner::DateRangeArray => &Kind::Array(Type(Inner::DateRange)),\n            Inner::Int8Range => &const { Kind::Range(Inner::Int8.const_oid()) },\n            Inner::Int8RangeArray => &Kind::Array(Type(Inner::Int8Range)),\n            Inner::Jsonpath => &Kind::Simple,\n            Inner::JsonpathArray => &Kind::Array(Type(Inner::Jsonpath)),\n            Inner::Regnamespace => &Kind::Simple,\n            Inner::RegnamespaceArray => &Kind::Array(Type(Inner::Regnamespace)),\n            Inner::Regrole => &Kind::Simple,\n            Inner::RegroleArray => &Kind::Array(Type(Inner::Regrole)),\n            Inner::Regcollation => &Kind::Simple,\n            Inner::RegcollationArray => &Kind::Array(Type(Inner::Regcollation)),\n            Inner::Int4multiRange => &Kind::Multirange(Type(Inner::Int4)),\n            Inner::NummultiRange => &Kind::Multirange(Type(Inner::Numeric)),\n            Inner::TsmultiRange => &Kind::Multirange(Type(Inner::Timestamp)),\n            Inner::TstzmultiRange => &Kind::Multirange(Type(Inner::Timestamptz)),\n            Inner::DatemultiRange => &Kind::Multirange(Type(Inner::Date)),\n            Inner::Int8multiRange => &Kind::Multirange(Type(Inner::Int8)),\n            Inner::AnymultiRange => &Kind::Pseudo,\n            Inner::AnycompatiblemultiRange => &Kind::Pseudo,\n            Inner::PgBrinBloomSummary => &Kind::Simple,\n            Inner::PgBrinMinmaxMultiSummary => &Kind::Simple,\n            Inner::PgMcvList => &Kind::Simple,\n            Inner::PgSnapshot => &Kind::Simple,\n            Inner::PgSnapshotArray => &Kind::Array(Type(Inner::PgSnapshot)),\n            Inner::Xid8 => &Kind::Simple,\n            Inner::Anycompatible => &Kind::Pseudo,\n            Inner::Anycompatiblearray => &Kind::Pseudo,\n            Inner::Anycompatiblenonarray => &Kind::Pseudo,\n            Inner::AnycompatibleRange => &Kind::Pseudo,\n            Inner::Int4multiRangeArray => &Kind::Array(Type(Inner::Int4multiRange)),\n            Inner::NummultiRangeArray => &Kind::Array(Type(Inner::NummultiRange)),\n            Inner::TsmultiRangeArray => &Kind::Array(Type(Inner::TsmultiRange)),\n            Inner::TstzmultiRangeArray => &Kind::Array(Type(Inner::TstzmultiRange)),\n            Inner::DatemultiRangeArray => &Kind::Array(Type(Inner::DatemultiRange)),\n            Inner::Int8multiRangeArray => &Kind::Array(Type(Inner::Int8multiRange)),\n            Inner::Other(ref u) => &u.kind,\n        }\n    }\n\n    pub fn name(&self) -> &str {\n        match *self {\n            Inner::Bool => \"bool\",\n            Inner::Bytea => \"bytea\",\n            Inner::Char => \"char\",\n            Inner::Name => \"name\",\n            Inner::Int8 => \"int8\",\n            Inner::Int2 => \"int2\",\n            Inner::Int2Vector => \"int2vector\",\n            Inner::Int4 => \"int4\",\n            Inner::Regproc => \"regproc\",\n            Inner::Text => \"text\",\n            Inner::Oid => \"oid\",\n            Inner::Tid => \"tid\",\n            Inner::Xid => \"xid\",\n            Inner::Cid => \"cid\",\n            Inner::OidVector => \"oidvector\",\n            Inner::PgDdlCommand => \"pg_ddl_command\",\n            Inner::Json => \"json\",\n            Inner::Xml => \"xml\",\n            Inner::XmlArray => \"_xml\",\n            Inner::PgNodeTree => \"pg_node_tree\",\n            Inner::JsonArray => \"_json\",\n            Inner::TableAmHandler => \"table_am_handler\",\n            Inner::Xid8Array => \"_xid8\",\n            Inner::IndexAmHandler => \"index_am_handler\",\n            Inner::Point => \"point\",\n            Inner::Lseg => \"lseg\",\n            Inner::Path => \"path\",\n            Inner::Box => \"box\",\n            Inner::Polygon => \"polygon\",\n            Inner::Line => \"line\",\n            Inner::LineArray => \"_line\",\n            Inner::Cidr => \"cidr\",\n            Inner::CidrArray => \"_cidr\",\n            Inner::Float4 => \"float4\",\n            Inner::Float8 => \"float8\",\n            Inner::Unknown => \"unknown\",\n            Inner::Circle => \"circle\",\n            Inner::CircleArray => \"_circle\",\n            Inner::Macaddr8 => \"macaddr8\",\n            Inner::Macaddr8Array => \"_macaddr8\",\n            Inner::Money => \"money\",\n            Inner::MoneyArray => \"_money\",\n            Inner::Macaddr => \"macaddr\",\n            Inner::Inet => \"inet\",\n            Inner::BoolArray => \"_bool\",\n            Inner::ByteaArray => \"_bytea\",\n            Inner::CharArray => \"_char\",\n            Inner::NameArray => \"_name\",\n            Inner::Int2Array => \"_int2\",\n            Inner::Int2VectorArray => \"_int2vector\",\n            Inner::Int4Array => \"_int4\",\n            Inner::RegprocArray => \"_regproc\",\n            Inner::TextArray => \"_text\",\n            Inner::TidArray => \"_tid\",\n            Inner::XidArray => \"_xid\",\n            Inner::CidArray => \"_cid\",\n            Inner::OidVectorArray => \"_oidvector\",\n            Inner::BpcharArray => \"_bpchar\",\n            Inner::VarcharArray => \"_varchar\",\n            Inner::Int8Array => \"_int8\",\n            Inner::PointArray => \"_point\",\n            Inner::LsegArray => \"_lseg\",\n            Inner::PathArray => \"_path\",\n            Inner::BoxArray => \"_box\",\n            Inner::Float4Array => \"_float4\",\n            Inner::Float8Array => \"_float8\",\n            Inner::PolygonArray => \"_polygon\",\n            Inner::OidArray => \"_oid\",\n            Inner::Aclitem => \"aclitem\",\n            Inner::AclitemArray => \"_aclitem\",\n            Inner::MacaddrArray => \"_macaddr\",\n            Inner::InetArray => \"_inet\",\n            Inner::Bpchar => \"bpchar\",\n            Inner::Varchar => \"varchar\",\n            Inner::Date => \"date\",\n            Inner::Time => \"time\",\n            Inner::Timestamp => \"timestamp\",\n            Inner::TimestampArray => \"_timestamp\",\n            Inner::DateArray => \"_date\",\n            Inner::TimeArray => \"_time\",\n            Inner::Timestamptz => \"timestamptz\",\n            Inner::TimestamptzArray => \"_timestamptz\",\n            Inner::Interval => \"interval\",\n            Inner::IntervalArray => \"_interval\",\n            Inner::NumericArray => \"_numeric\",\n            Inner::CstringArray => \"_cstring\",\n            Inner::Timetz => \"timetz\",\n            Inner::TimetzArray => \"_timetz\",\n            Inner::Bit => \"bit\",\n            Inner::BitArray => \"_bit\",\n            Inner::Varbit => \"varbit\",\n            Inner::VarbitArray => \"_varbit\",\n            Inner::Numeric => \"numeric\",\n            Inner::Refcursor => \"refcursor\",\n            Inner::RefcursorArray => \"_refcursor\",\n            Inner::Regprocedure => \"regprocedure\",\n            Inner::Regoper => \"regoper\",\n            Inner::Regoperator => \"regoperator\",\n            Inner::Regclass => \"regclass\",\n            Inner::Regtype => \"regtype\",\n            Inner::RegprocedureArray => \"_regprocedure\",\n            Inner::RegoperArray => \"_regoper\",\n            Inner::RegoperatorArray => \"_regoperator\",\n            Inner::RegclassArray => \"_regclass\",\n            Inner::RegtypeArray => \"_regtype\",\n            Inner::Record => \"record\",\n            Inner::Cstring => \"cstring\",\n            Inner::Any => \"any\",\n            Inner::Anyarray => \"anyarray\",\n            Inner::Void => \"void\",\n            Inner::Trigger => \"trigger\",\n            Inner::LanguageHandler => \"language_handler\",\n            Inner::Internal => \"internal\",\n            Inner::Anyelement => \"anyelement\",\n            Inner::RecordArray => \"_record\",\n            Inner::Anynonarray => \"anynonarray\",\n            Inner::TxidSnapshotArray => \"_txid_snapshot\",\n            Inner::Uuid => \"uuid\",\n            Inner::UuidArray => \"_uuid\",\n            Inner::TxidSnapshot => \"txid_snapshot\",\n            Inner::FdwHandler => \"fdw_handler\",\n            Inner::PgLsn => \"pg_lsn\",\n            Inner::PgLsnArray => \"_pg_lsn\",\n            Inner::TsmHandler => \"tsm_handler\",\n            Inner::PgNdistinct => \"pg_ndistinct\",\n            Inner::PgDependencies => \"pg_dependencies\",\n            Inner::Anyenum => \"anyenum\",\n            Inner::TsVector => \"tsvector\",\n            Inner::Tsquery => \"tsquery\",\n            Inner::GtsVector => \"gtsvector\",\n            Inner::TsVectorArray => \"_tsvector\",\n            Inner::GtsVectorArray => \"_gtsvector\",\n            Inner::TsqueryArray => \"_tsquery\",\n            Inner::Regconfig => \"regconfig\",\n            Inner::RegconfigArray => \"_regconfig\",\n            Inner::Regdictionary => \"regdictionary\",\n            Inner::RegdictionaryArray => \"_regdictionary\",\n            Inner::Jsonb => \"jsonb\",\n            Inner::JsonbArray => \"_jsonb\",\n            Inner::AnyRange => \"anyrange\",\n            Inner::EventTrigger => \"event_trigger\",\n            Inner::Int4Range => \"int4range\",\n            Inner::Int4RangeArray => \"_int4range\",\n            Inner::NumRange => \"numrange\",\n            Inner::NumRangeArray => \"_numrange\",\n            Inner::TsRange => \"tsrange\",\n            Inner::TsRangeArray => \"_tsrange\",\n            Inner::TstzRange => \"tstzrange\",\n            Inner::TstzRangeArray => \"_tstzrange\",\n            Inner::DateRange => \"daterange\",\n            Inner::DateRangeArray => \"_daterange\",\n            Inner::Int8Range => \"int8range\",\n            Inner::Int8RangeArray => \"_int8range\",\n            Inner::Jsonpath => \"jsonpath\",\n            Inner::JsonpathArray => \"_jsonpath\",\n            Inner::Regnamespace => \"regnamespace\",\n            Inner::RegnamespaceArray => \"_regnamespace\",\n            Inner::Regrole => \"regrole\",\n            Inner::RegroleArray => \"_regrole\",\n            Inner::Regcollation => \"regcollation\",\n            Inner::RegcollationArray => \"_regcollation\",\n            Inner::Int4multiRange => \"int4multirange\",\n            Inner::NummultiRange => \"nummultirange\",\n            Inner::TsmultiRange => \"tsmultirange\",\n            Inner::TstzmultiRange => \"tstzmultirange\",\n            Inner::DatemultiRange => \"datemultirange\",\n            Inner::Int8multiRange => \"int8multirange\",\n            Inner::AnymultiRange => \"anymultirange\",\n            Inner::AnycompatiblemultiRange => \"anycompatiblemultirange\",\n            Inner::PgBrinBloomSummary => \"pg_brin_bloom_summary\",\n            Inner::PgBrinMinmaxMultiSummary => \"pg_brin_minmax_multi_summary\",\n            Inner::PgMcvList => \"pg_mcv_list\",\n            Inner::PgSnapshot => \"pg_snapshot\",\n            Inner::PgSnapshotArray => \"_pg_snapshot\",\n            Inner::Xid8 => \"xid8\",\n            Inner::Anycompatible => \"anycompatible\",\n            Inner::Anycompatiblearray => \"anycompatiblearray\",\n            Inner::Anycompatiblenonarray => \"anycompatiblenonarray\",\n            Inner::AnycompatibleRange => \"anycompatiblerange\",\n            Inner::Int4multiRangeArray => \"_int4multirange\",\n            Inner::NummultiRangeArray => \"_nummultirange\",\n            Inner::TsmultiRangeArray => \"_tsmultirange\",\n            Inner::TstzmultiRangeArray => \"_tstzmultirange\",\n            Inner::DatemultiRangeArray => \"_datemultirange\",\n            Inner::Int8multiRangeArray => \"_int8multirange\",\n            Inner::Other(ref u) => &u.name,\n        }\n    }\n}\nimpl Type {\n    /// BOOL - boolean, &#39;true&#39;/&#39;false&#39;\n    pub const BOOL: Type = Type(Inner::Bool);\n\n    /// BYTEA - variable-length string, binary values escaped\n    pub const BYTEA: Type = Type(Inner::Bytea);\n\n    /// CHAR - single character\n    pub const CHAR: Type = Type(Inner::Char);\n\n    /// NAME - 63-byte type for storing system identifiers\n    pub const NAME: Type = Type(Inner::Name);\n\n    /// INT8 - ~18 digit integer, 8-byte storage\n    pub const INT8: Type = Type(Inner::Int8);\n\n    /// INT2 - -32 thousand to 32 thousand, 2-byte storage\n    pub const INT2: Type = Type(Inner::Int2);\n\n    /// INT2VECTOR - array of int2, used in system tables\n    pub const INT2_VECTOR: Type = Type(Inner::Int2Vector);\n\n    /// INT4 - -2 billion to 2 billion integer, 4-byte storage\n    pub const INT4: Type = Type(Inner::Int4);\n\n    /// REGPROC - registered procedure\n    pub const REGPROC: Type = Type(Inner::Regproc);\n\n    /// TEXT - variable-length string, no limit specified\n    pub const TEXT: Type = Type(Inner::Text);\n\n    /// OID - object identifier&#40;oid&#41;, maximum 4 billion\n    pub const OID: Type = Type(Inner::Oid);\n\n    /// TID - &#40;block, offset&#41;, physical location of tuple\n    pub const TID: Type = Type(Inner::Tid);\n\n    /// XID - transaction id\n    pub const XID: Type = Type(Inner::Xid);\n\n    /// CID - command identifier type, sequence in transaction id\n    pub const CID: Type = Type(Inner::Cid);\n\n    /// OIDVECTOR - array of oids, used in system tables\n    pub const OID_VECTOR: Type = Type(Inner::OidVector);\n\n    /// PG_DDL_COMMAND - internal type for passing CollectedCommand\n    pub const PG_DDL_COMMAND: Type = Type(Inner::PgDdlCommand);\n\n    /// JSON - JSON stored as text\n    pub const JSON: Type = Type(Inner::Json);\n\n    /// XML - XML content\n    pub const XML: Type = Type(Inner::Xml);\n\n    /// XML&#91;&#93;\n    pub const XML_ARRAY: Type = Type(Inner::XmlArray);\n\n    /// PG_NODE_TREE - string representing an internal node tree\n    pub const PG_NODE_TREE: Type = Type(Inner::PgNodeTree);\n\n    /// JSON&#91;&#93;\n    pub const JSON_ARRAY: Type = Type(Inner::JsonArray);\n\n    /// TABLE_AM_HANDLER\n    pub const TABLE_AM_HANDLER: Type = Type(Inner::TableAmHandler);\n\n    /// XID8&#91;&#93;\n    pub const XID8_ARRAY: Type = Type(Inner::Xid8Array);\n\n    /// INDEX_AM_HANDLER - pseudo-type for the result of an index AM handler function\n    pub const INDEX_AM_HANDLER: Type = Type(Inner::IndexAmHandler);\n\n    /// POINT - geometric point &#39;&#40;x, y&#41;&#39;\n    pub const POINT: Type = Type(Inner::Point);\n\n    /// LSEG - geometric line segment &#39;&#40;pt1,pt2&#41;&#39;\n    pub const LSEG: Type = Type(Inner::Lseg);\n\n    /// PATH - geometric path &#39;&#40;pt1,...&#41;&#39;\n    pub const PATH: Type = Type(Inner::Path);\n\n    /// BOX - geometric box &#39;&#40;lower left,upper right&#41;&#39;\n    pub const BOX: Type = Type(Inner::Box);\n\n    /// POLYGON - geometric polygon &#39;&#40;pt1,...&#41;&#39;\n    pub const POLYGON: Type = Type(Inner::Polygon);\n\n    /// LINE - geometric line\n    pub const LINE: Type = Type(Inner::Line);\n\n    /// LINE&#91;&#93;\n    pub const LINE_ARRAY: Type = Type(Inner::LineArray);\n\n    /// CIDR - network IP address/netmask, network address\n    pub const CIDR: Type = Type(Inner::Cidr);\n\n    /// CIDR&#91;&#93;\n    pub const CIDR_ARRAY: Type = Type(Inner::CidrArray);\n\n    /// FLOAT4 - single-precision floating point number, 4-byte storage\n    pub const FLOAT4: Type = Type(Inner::Float4);\n\n    /// FLOAT8 - double-precision floating point number, 8-byte storage\n    pub const FLOAT8: Type = Type(Inner::Float8);\n\n    /// UNKNOWN - pseudo-type representing an undetermined type\n    pub const UNKNOWN: Type = Type(Inner::Unknown);\n\n    /// CIRCLE - geometric circle &#39;&#40;center,radius&#41;&#39;\n    pub const CIRCLE: Type = Type(Inner::Circle);\n\n    /// CIRCLE&#91;&#93;\n    pub const CIRCLE_ARRAY: Type = Type(Inner::CircleArray);\n\n    /// MACADDR8 - XX:XX:XX:XX:XX:XX:XX:XX, MAC address\n    pub const MACADDR8: Type = Type(Inner::Macaddr8);\n\n    /// MACADDR8&#91;&#93;\n    pub const MACADDR8_ARRAY: Type = Type(Inner::Macaddr8Array);\n\n    /// MONEY - monetary amounts, &#36;d,ddd.cc\n    pub const MONEY: Type = Type(Inner::Money);\n\n    /// MONEY&#91;&#93;\n    pub const MONEY_ARRAY: Type = Type(Inner::MoneyArray);\n\n    /// MACADDR - XX:XX:XX:XX:XX:XX, MAC address\n    pub const MACADDR: Type = Type(Inner::Macaddr);\n\n    /// INET - IP address/netmask, host address, netmask optional\n    pub const INET: Type = Type(Inner::Inet);\n\n    /// BOOL&#91;&#93;\n    pub const BOOL_ARRAY: Type = Type(Inner::BoolArray);\n\n    /// BYTEA&#91;&#93;\n    pub const BYTEA_ARRAY: Type = Type(Inner::ByteaArray);\n\n    /// CHAR&#91;&#93;\n    pub const CHAR_ARRAY: Type = Type(Inner::CharArray);\n\n    /// NAME&#91;&#93;\n    pub const NAME_ARRAY: Type = Type(Inner::NameArray);\n\n    /// INT2&#91;&#93;\n    pub const INT2_ARRAY: Type = Type(Inner::Int2Array);\n\n    /// INT2VECTOR&#91;&#93;\n    pub const INT2_VECTOR_ARRAY: Type = Type(Inner::Int2VectorArray);\n\n    /// INT4&#91;&#93;\n    pub const INT4_ARRAY: Type = Type(Inner::Int4Array);\n\n    /// REGPROC&#91;&#93;\n    pub const REGPROC_ARRAY: Type = Type(Inner::RegprocArray);\n\n    /// TEXT&#91;&#93;\n    pub const TEXT_ARRAY: Type = Type(Inner::TextArray);\n\n    /// TID&#91;&#93;\n    pub const TID_ARRAY: Type = Type(Inner::TidArray);\n\n    /// XID&#91;&#93;\n    pub const XID_ARRAY: Type = Type(Inner::XidArray);\n\n    /// CID&#91;&#93;\n    pub const CID_ARRAY: Type = Type(Inner::CidArray);\n\n    /// OIDVECTOR&#91;&#93;\n    pub const OID_VECTOR_ARRAY: Type = Type(Inner::OidVectorArray);\n\n    /// BPCHAR&#91;&#93;\n    pub const BPCHAR_ARRAY: Type = Type(Inner::BpcharArray);\n\n    /// VARCHAR&#91;&#93;\n    pub const VARCHAR_ARRAY: Type = Type(Inner::VarcharArray);\n\n    /// INT8&#91;&#93;\n    pub const INT8_ARRAY: Type = Type(Inner::Int8Array);\n\n    /// POINT&#91;&#93;\n    pub const POINT_ARRAY: Type = Type(Inner::PointArray);\n\n    /// LSEG&#91;&#93;\n    pub const LSEG_ARRAY: Type = Type(Inner::LsegArray);\n\n    /// PATH&#91;&#93;\n    pub const PATH_ARRAY: Type = Type(Inner::PathArray);\n\n    /// BOX&#91;&#93;\n    pub const BOX_ARRAY: Type = Type(Inner::BoxArray);\n\n    /// FLOAT4&#91;&#93;\n    pub const FLOAT4_ARRAY: Type = Type(Inner::Float4Array);\n\n    /// FLOAT8&#91;&#93;\n    pub const FLOAT8_ARRAY: Type = Type(Inner::Float8Array);\n\n    /// POLYGON&#91;&#93;\n    pub const POLYGON_ARRAY: Type = Type(Inner::PolygonArray);\n\n    /// OID&#91;&#93;\n    pub const OID_ARRAY: Type = Type(Inner::OidArray);\n\n    /// ACLITEM - access control list\n    pub const ACLITEM: Type = Type(Inner::Aclitem);\n\n    /// ACLITEM&#91;&#93;\n    pub const ACLITEM_ARRAY: Type = Type(Inner::AclitemArray);\n\n    /// MACADDR&#91;&#93;\n    pub const MACADDR_ARRAY: Type = Type(Inner::MacaddrArray);\n\n    /// INET&#91;&#93;\n    pub const INET_ARRAY: Type = Type(Inner::InetArray);\n\n    /// BPCHAR - char&#40;length&#41;, blank-padded string, fixed storage length\n    pub const BPCHAR: Type = Type(Inner::Bpchar);\n\n    /// VARCHAR - varchar&#40;length&#41;, non-blank-padded string, variable storage length\n    pub const VARCHAR: Type = Type(Inner::Varchar);\n\n    /// DATE - date\n    pub const DATE: Type = Type(Inner::Date);\n\n    /// TIME - time of day\n    pub const TIME: Type = Type(Inner::Time);\n\n    /// TIMESTAMP - date and time\n    pub const TIMESTAMP: Type = Type(Inner::Timestamp);\n\n    /// TIMESTAMP&#91;&#93;\n    pub const TIMESTAMP_ARRAY: Type = Type(Inner::TimestampArray);\n\n    /// DATE&#91;&#93;\n    pub const DATE_ARRAY: Type = Type(Inner::DateArray);\n\n    /// TIME&#91;&#93;\n    pub const TIME_ARRAY: Type = Type(Inner::TimeArray);\n\n    /// TIMESTAMPTZ - date and time with time zone\n    pub const TIMESTAMPTZ: Type = Type(Inner::Timestamptz);\n\n    /// TIMESTAMPTZ&#91;&#93;\n    pub const TIMESTAMPTZ_ARRAY: Type = Type(Inner::TimestamptzArray);\n\n    /// INTERVAL - &#64; &lt;number&gt; &lt;units&gt;, time interval\n    pub const INTERVAL: Type = Type(Inner::Interval);\n\n    /// INTERVAL&#91;&#93;\n    pub const INTERVAL_ARRAY: Type = Type(Inner::IntervalArray);\n\n    /// NUMERIC&#91;&#93;\n    pub const NUMERIC_ARRAY: Type = Type(Inner::NumericArray);\n\n    /// CSTRING&#91;&#93;\n    pub const CSTRING_ARRAY: Type = Type(Inner::CstringArray);\n\n    /// TIMETZ - time of day with time zone\n    pub const TIMETZ: Type = Type(Inner::Timetz);\n\n    /// TIMETZ&#91;&#93;\n    pub const TIMETZ_ARRAY: Type = Type(Inner::TimetzArray);\n\n    /// BIT - fixed-length bit string\n    pub const BIT: Type = Type(Inner::Bit);\n\n    /// BIT&#91;&#93;\n    pub const BIT_ARRAY: Type = Type(Inner::BitArray);\n\n    /// VARBIT - variable-length bit string\n    pub const VARBIT: Type = Type(Inner::Varbit);\n\n    /// VARBIT&#91;&#93;\n    pub const VARBIT_ARRAY: Type = Type(Inner::VarbitArray);\n\n    /// NUMERIC - numeric&#40;precision, decimal&#41;, arbitrary precision number\n    pub const NUMERIC: Type = Type(Inner::Numeric);\n\n    /// REFCURSOR - reference to cursor &#40;portal name&#41;\n    pub const REFCURSOR: Type = Type(Inner::Refcursor);\n\n    /// REFCURSOR&#91;&#93;\n    pub const REFCURSOR_ARRAY: Type = Type(Inner::RefcursorArray);\n\n    /// REGPROCEDURE - registered procedure &#40;with args&#41;\n    pub const REGPROCEDURE: Type = Type(Inner::Regprocedure);\n\n    /// REGOPER - registered operator\n    pub const REGOPER: Type = Type(Inner::Regoper);\n\n    /// REGOPERATOR - registered operator &#40;with args&#41;\n    pub const REGOPERATOR: Type = Type(Inner::Regoperator);\n\n    /// REGCLASS - registered class\n    pub const REGCLASS: Type = Type(Inner::Regclass);\n\n    /// REGTYPE - registered type\n    pub const REGTYPE: Type = Type(Inner::Regtype);\n\n    /// REGPROCEDURE&#91;&#93;\n    pub const REGPROCEDURE_ARRAY: Type = Type(Inner::RegprocedureArray);\n\n    /// REGOPER&#91;&#93;\n    pub const REGOPER_ARRAY: Type = Type(Inner::RegoperArray);\n\n    /// REGOPERATOR&#91;&#93;\n    pub const REGOPERATOR_ARRAY: Type = Type(Inner::RegoperatorArray);\n\n    /// REGCLASS&#91;&#93;\n    pub const REGCLASS_ARRAY: Type = Type(Inner::RegclassArray);\n\n    /// REGTYPE&#91;&#93;\n    pub const REGTYPE_ARRAY: Type = Type(Inner::RegtypeArray);\n\n    /// RECORD - pseudo-type representing any composite type\n    pub const RECORD: Type = Type(Inner::Record);\n\n    /// CSTRING - C-style string\n    pub const CSTRING: Type = Type(Inner::Cstring);\n\n    /// ANY - pseudo-type representing any type\n    pub const ANY: Type = Type(Inner::Any);\n\n    /// ANYARRAY - pseudo-type representing a polymorphic array type\n    pub const ANYARRAY: Type = Type(Inner::Anyarray);\n\n    /// VOID - pseudo-type for the result of a function with no real result\n    pub const VOID: Type = Type(Inner::Void);\n\n    /// TRIGGER - pseudo-type for the result of a trigger function\n    pub const TRIGGER: Type = Type(Inner::Trigger);\n\n    /// LANGUAGE_HANDLER - pseudo-type for the result of a language handler function\n    pub const LANGUAGE_HANDLER: Type = Type(Inner::LanguageHandler);\n\n    /// INTERNAL - pseudo-type representing an internal data structure\n    pub const INTERNAL: Type = Type(Inner::Internal);\n\n    /// ANYELEMENT - pseudo-type representing a polymorphic base type\n    pub const ANYELEMENT: Type = Type(Inner::Anyelement);\n\n    /// RECORD&#91;&#93;\n    pub const RECORD_ARRAY: Type = Type(Inner::RecordArray);\n\n    /// ANYNONARRAY - pseudo-type representing a polymorphic base type that is not an array\n    pub const ANYNONARRAY: Type = Type(Inner::Anynonarray);\n\n    /// TXID_SNAPSHOT&#91;&#93;\n    pub const TXID_SNAPSHOT_ARRAY: Type = Type(Inner::TxidSnapshotArray);\n\n    /// UUID - UUID datatype\n    pub const UUID: Type = Type(Inner::Uuid);\n\n    /// UUID&#91;&#93;\n    pub const UUID_ARRAY: Type = Type(Inner::UuidArray);\n\n    /// TXID_SNAPSHOT - txid snapshot\n    pub const TXID_SNAPSHOT: Type = Type(Inner::TxidSnapshot);\n\n    /// FDW_HANDLER - pseudo-type for the result of an FDW handler function\n    pub const FDW_HANDLER: Type = Type(Inner::FdwHandler);\n\n    /// PG_LSN - PostgreSQL LSN datatype\n    pub const PG_LSN: Type = Type(Inner::PgLsn);\n\n    /// PG_LSN&#91;&#93;\n    pub const PG_LSN_ARRAY: Type = Type(Inner::PgLsnArray);\n\n    /// TSM_HANDLER - pseudo-type for the result of a tablesample method function\n    pub const TSM_HANDLER: Type = Type(Inner::TsmHandler);\n\n    /// PG_NDISTINCT - multivariate ndistinct coefficients\n    pub const PG_NDISTINCT: Type = Type(Inner::PgNdistinct);\n\n    /// PG_DEPENDENCIES - multivariate dependencies\n    pub const PG_DEPENDENCIES: Type = Type(Inner::PgDependencies);\n\n    /// ANYENUM - pseudo-type representing a polymorphic base type that is an enum\n    pub const ANYENUM: Type = Type(Inner::Anyenum);\n\n    /// TSVECTOR - text representation for text search\n    pub const TS_VECTOR: Type = Type(Inner::TsVector);\n\n    /// TSQUERY - query representation for text search\n    pub const TSQUERY: Type = Type(Inner::Tsquery);\n\n    /// GTSVECTOR - GiST index internal text representation for text search\n    pub const GTS_VECTOR: Type = Type(Inner::GtsVector);\n\n    /// TSVECTOR&#91;&#93;\n    pub const TS_VECTOR_ARRAY: Type = Type(Inner::TsVectorArray);\n\n    /// GTSVECTOR&#91;&#93;\n    pub const GTS_VECTOR_ARRAY: Type = Type(Inner::GtsVectorArray);\n\n    /// TSQUERY&#91;&#93;\n    pub const TSQUERY_ARRAY: Type = Type(Inner::TsqueryArray);\n\n    /// REGCONFIG - registered text search configuration\n    pub const REGCONFIG: Type = Type(Inner::Regconfig);\n\n    /// REGCONFIG&#91;&#93;\n    pub const REGCONFIG_ARRAY: Type = Type(Inner::RegconfigArray);\n\n    /// REGDICTIONARY - registered text search dictionary\n    pub const REGDICTIONARY: Type = Type(Inner::Regdictionary);\n\n    /// REGDICTIONARY&#91;&#93;\n    pub const REGDICTIONARY_ARRAY: Type = Type(Inner::RegdictionaryArray);\n\n    /// JSONB - Binary JSON\n    pub const JSONB: Type = Type(Inner::Jsonb);\n\n    /// JSONB&#91;&#93;\n    pub const JSONB_ARRAY: Type = Type(Inner::JsonbArray);\n\n    /// ANYRANGE - pseudo-type representing a range over a polymorphic base type\n    pub const ANY_RANGE: Type = Type(Inner::AnyRange);\n\n    /// EVENT_TRIGGER - pseudo-type for the result of an event trigger function\n    pub const EVENT_TRIGGER: Type = Type(Inner::EventTrigger);\n\n    /// INT4RANGE - range of integers\n    pub const INT4_RANGE: Type = Type(Inner::Int4Range);\n\n    /// INT4RANGE&#91;&#93;\n    pub const INT4_RANGE_ARRAY: Type = Type(Inner::Int4RangeArray);\n\n    /// NUMRANGE - range of numerics\n    pub const NUM_RANGE: Type = Type(Inner::NumRange);\n\n    /// NUMRANGE&#91;&#93;\n    pub const NUM_RANGE_ARRAY: Type = Type(Inner::NumRangeArray);\n\n    /// TSRANGE - range of timestamps without time zone\n    pub const TS_RANGE: Type = Type(Inner::TsRange);\n\n    /// TSRANGE&#91;&#93;\n    pub const TS_RANGE_ARRAY: Type = Type(Inner::TsRangeArray);\n\n    /// TSTZRANGE - range of timestamps with time zone\n    pub const TSTZ_RANGE: Type = Type(Inner::TstzRange);\n\n    /// TSTZRANGE&#91;&#93;\n    pub const TSTZ_RANGE_ARRAY: Type = Type(Inner::TstzRangeArray);\n\n    /// DATERANGE - range of dates\n    pub const DATE_RANGE: Type = Type(Inner::DateRange);\n\n    /// DATERANGE&#91;&#93;\n    pub const DATE_RANGE_ARRAY: Type = Type(Inner::DateRangeArray);\n\n    /// INT8RANGE - range of bigints\n    pub const INT8_RANGE: Type = Type(Inner::Int8Range);\n\n    /// INT8RANGE&#91;&#93;\n    pub const INT8_RANGE_ARRAY: Type = Type(Inner::Int8RangeArray);\n\n    /// JSONPATH - JSON path\n    pub const JSONPATH: Type = Type(Inner::Jsonpath);\n\n    /// JSONPATH&#91;&#93;\n    pub const JSONPATH_ARRAY: Type = Type(Inner::JsonpathArray);\n\n    /// REGNAMESPACE - registered namespace\n    pub const REGNAMESPACE: Type = Type(Inner::Regnamespace);\n\n    /// REGNAMESPACE&#91;&#93;\n    pub const REGNAMESPACE_ARRAY: Type = Type(Inner::RegnamespaceArray);\n\n    /// REGROLE - registered role\n    pub const REGROLE: Type = Type(Inner::Regrole);\n\n    /// REGROLE&#91;&#93;\n    pub const REGROLE_ARRAY: Type = Type(Inner::RegroleArray);\n\n    /// REGCOLLATION - registered collation\n    pub const REGCOLLATION: Type = Type(Inner::Regcollation);\n\n    /// REGCOLLATION&#91;&#93;\n    pub const REGCOLLATION_ARRAY: Type = Type(Inner::RegcollationArray);\n\n    /// INT4MULTIRANGE - multirange of integers\n    pub const INT4MULTI_RANGE: Type = Type(Inner::Int4multiRange);\n\n    /// NUMMULTIRANGE - multirange of numerics\n    pub const NUMMULTI_RANGE: Type = Type(Inner::NummultiRange);\n\n    /// TSMULTIRANGE - multirange of timestamps without time zone\n    pub const TSMULTI_RANGE: Type = Type(Inner::TsmultiRange);\n\n    /// TSTZMULTIRANGE - multirange of timestamps with time zone\n    pub const TSTZMULTI_RANGE: Type = Type(Inner::TstzmultiRange);\n\n    /// DATEMULTIRANGE - multirange of dates\n    pub const DATEMULTI_RANGE: Type = Type(Inner::DatemultiRange);\n\n    /// INT8MULTIRANGE - multirange of bigints\n    pub const INT8MULTI_RANGE: Type = Type(Inner::Int8multiRange);\n\n    /// ANYMULTIRANGE - pseudo-type representing a polymorphic base type that is a multirange\n    pub const ANYMULTI_RANGE: Type = Type(Inner::AnymultiRange);\n\n    /// ANYCOMPATIBLEMULTIRANGE - pseudo-type representing a multirange over a polymorphic common type\n    pub const ANYCOMPATIBLEMULTI_RANGE: Type = Type(Inner::AnycompatiblemultiRange);\n\n    /// PG_BRIN_BLOOM_SUMMARY - BRIN bloom summary\n    pub const PG_BRIN_BLOOM_SUMMARY: Type = Type(Inner::PgBrinBloomSummary);\n\n    /// PG_BRIN_MINMAX_MULTI_SUMMARY - BRIN minmax-multi summary\n    pub const PG_BRIN_MINMAX_MULTI_SUMMARY: Type = Type(Inner::PgBrinMinmaxMultiSummary);\n\n    /// PG_MCV_LIST - multivariate MCV list\n    pub const PG_MCV_LIST: Type = Type(Inner::PgMcvList);\n\n    /// PG_SNAPSHOT - snapshot\n    pub const PG_SNAPSHOT: Type = Type(Inner::PgSnapshot);\n\n    /// PG_SNAPSHOT&#91;&#93;\n    pub const PG_SNAPSHOT_ARRAY: Type = Type(Inner::PgSnapshotArray);\n\n    /// XID8 - full transaction id\n    pub const XID8: Type = Type(Inner::Xid8);\n\n    /// ANYCOMPATIBLE - pseudo-type representing a polymorphic common type\n    pub const ANYCOMPATIBLE: Type = Type(Inner::Anycompatible);\n\n    /// ANYCOMPATIBLEARRAY - pseudo-type representing an array of polymorphic common type elements\n    pub const ANYCOMPATIBLEARRAY: Type = Type(Inner::Anycompatiblearray);\n\n    /// ANYCOMPATIBLENONARRAY - pseudo-type representing a polymorphic common type that is not an array\n    pub const ANYCOMPATIBLENONARRAY: Type = Type(Inner::Anycompatiblenonarray);\n\n    /// ANYCOMPATIBLERANGE - pseudo-type representing a range over a polymorphic common type\n    pub const ANYCOMPATIBLE_RANGE: Type = Type(Inner::AnycompatibleRange);\n\n    /// INT4MULTIRANGE&#91;&#93;\n    pub const INT4MULTI_RANGE_ARRAY: Type = Type(Inner::Int4multiRangeArray);\n\n    /// NUMMULTIRANGE&#91;&#93;\n    pub const NUMMULTI_RANGE_ARRAY: Type = Type(Inner::NummultiRangeArray);\n\n    /// TSMULTIRANGE&#91;&#93;\n    pub const TSMULTI_RANGE_ARRAY: Type = Type(Inner::TsmultiRangeArray);\n\n    /// TSTZMULTIRANGE&#91;&#93;\n    pub const TSTZMULTI_RANGE_ARRAY: Type = Type(Inner::TstzmultiRangeArray);\n\n    /// DATEMULTIRANGE&#91;&#93;\n    pub const DATEMULTI_RANGE_ARRAY: Type = Type(Inner::DatemultiRangeArray);\n\n    /// INT8MULTIRANGE&#91;&#93;\n    pub const INT8MULTI_RANGE_ARRAY: Type = Type(Inner::Int8multiRangeArray);\n}\n"
  },
  {
    "path": "libs/proxy/subzero_core/.gitignore",
    "content": "target\nCargo.lock"
  },
  {
    "path": "libs/proxy/subzero_core/Cargo.toml",
    "content": "# This is a stub for the subzero-core crate.\n[package]\nname = \"subzero-core\"\nversion = \"3.0.1\"\nedition = \"2024\"\npublish = false # \"private\"!\n\n[features]\ndefault = []\npostgresql = []\n\n[dependencies]\n"
  },
  {
    "path": "libs/proxy/subzero_core/src/lib.rs",
    "content": "// This is a stub for the subzero-core crate.\n"
  },
  {
    "path": "libs/proxy/tokio-postgres2/Cargo.toml",
    "content": "[package]\nname = \"tokio-postgres2\"\nversion = \"0.1.0\"\nedition = \"2024\"\nlicense = \"MIT/Apache-2.0\"\n\n[dependencies]\nbytes.workspace = true\nfallible-iterator.workspace = true\nfutures-util = { workspace = true, features = [\"sink\"] }\ntracing.workspace = true\nparking_lot.workspace = true\npin-project-lite.workspace = true\npostgres-protocol2 = { path = \"../postgres-protocol2\" }\npostgres-types2 = { path = \"../postgres-types2\" }\ntokio = { workspace = true, features = [\"io-util\", \"time\", \"net\"] }\ntokio-util = { workspace = true, features = [\"codec\"] }\nserde = { workspace = true, features = [\"derive\"] }\n"
  },
  {
    "path": "libs/proxy/tokio-postgres2/src/cancel_query.rs",
    "content": "use tokio::net::TcpStream;\n\nuse crate::client::SocketConfig;\nuse crate::config::{Host, SslMode};\nuse crate::tls::MakeTlsConnect;\nuse crate::{Error, cancel_query_raw, connect_socket};\n\npub(crate) async fn cancel_query<T>(\n    config: SocketConfig,\n    ssl_mode: SslMode,\n    tls: T,\n    process_id: i32,\n    secret_key: i32,\n) -> Result<(), Error>\nwhere\n    T: MakeTlsConnect<TcpStream>,\n{\n    let hostname = match &config.host {\n        Host::Tcp(host) => &**host,\n    };\n    let tls = tls\n        .make_tls_connect(hostname)\n        .map_err(|e| Error::tls(e.into()))?;\n\n    let socket = connect_socket::connect_socket(\n        config.host_addr,\n        &config.host,\n        config.port,\n        config.connect_timeout,\n    )\n    .await?;\n\n    cancel_query_raw::cancel_query_raw(socket, ssl_mode, tls, process_id, secret_key).await\n}\n"
  },
  {
    "path": "libs/proxy/tokio-postgres2/src/cancel_query_raw.rs",
    "content": "use bytes::BytesMut;\nuse postgres_protocol2::message::frontend;\nuse tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt};\n\nuse crate::config::SslMode;\nuse crate::tls::TlsConnect;\nuse crate::{Error, connect_tls};\n\npub async fn cancel_query_raw<S, T>(\n    stream: S,\n    mode: SslMode,\n    tls: T,\n    process_id: i32,\n    secret_key: i32,\n) -> Result<(), Error>\nwhere\n    S: AsyncRead + AsyncWrite + Unpin,\n    T: TlsConnect<S>,\n{\n    let mut stream = connect_tls::connect_tls(stream, mode, tls).await?;\n\n    let mut buf = BytesMut::new();\n    frontend::cancel_request(process_id, secret_key, &mut buf);\n\n    stream.write_all(&buf).await.map_err(Error::io)?;\n    stream.flush().await.map_err(Error::io)?;\n    stream.shutdown().await.map_err(Error::io)?;\n\n    Ok(())\n}\n"
  },
  {
    "path": "libs/proxy/tokio-postgres2/src/cancel_token.rs",
    "content": "use serde::{Deserialize, Serialize};\nuse tokio::io::{AsyncRead, AsyncWrite};\nuse tokio::net::TcpStream;\n\nuse crate::client::SocketConfig;\nuse crate::config::SslMode;\nuse crate::tls::{MakeTlsConnect, TlsConnect};\nuse crate::{Error, cancel_query, cancel_query_raw};\n\n/// A cancellation token that allows easy cancellation of a query.\n#[derive(Clone)]\npub struct CancelToken {\n    pub socket_config: SocketConfig,\n    pub raw: RawCancelToken,\n}\n\n/// A raw cancellation token that allows cancellation of a query, given a fresh connection to postgres.\n#[derive(Debug, Clone, Serialize, Deserialize)]\npub struct RawCancelToken {\n    pub ssl_mode: SslMode,\n    pub process_id: i32,\n    pub secret_key: i32,\n}\n\nimpl CancelToken {\n    /// Attempts to cancel the in-progress query on the connection associated\n    /// with this `CancelToken`.\n    ///\n    /// The server provides no information about whether a cancellation attempt was successful or not. An error will\n    /// only be returned if the client was unable to connect to the database.\n    ///\n    /// Cancellation is inherently racy. There is no guarantee that the\n    /// cancellation request will reach the server before the query terminates\n    /// normally, or that the connection associated with this token is still\n    /// active.\n    ///\n    /// Requires the `runtime` Cargo feature (enabled by default).\n    pub async fn cancel_query<T>(&self, tls: T) -> Result<(), Error>\n    where\n        T: MakeTlsConnect<TcpStream>,\n    {\n        cancel_query::cancel_query(\n            self.socket_config.clone(),\n            self.raw.ssl_mode,\n            tls,\n            self.raw.process_id,\n            self.raw.secret_key,\n        )\n        .await\n    }\n}\n\nimpl RawCancelToken {\n    /// Like `cancel_query`, but uses a stream which is already connected to the server rather than opening a new\n    /// connection itself.\n    pub async fn cancel_query_raw<S, T>(&self, stream: S, tls: T) -> Result<(), Error>\n    where\n        S: AsyncRead + AsyncWrite + Unpin,\n        T: TlsConnect<S>,\n    {\n        cancel_query_raw::cancel_query_raw(\n            stream,\n            self.ssl_mode,\n            tls,\n            self.process_id,\n            self.secret_key,\n        )\n        .await\n    }\n}\n"
  },
  {
    "path": "libs/proxy/tokio-postgres2/src/client.rs",
    "content": "use std::collections::HashMap;\nuse std::fmt;\nuse std::net::IpAddr;\nuse std::task::{Context, Poll};\nuse std::time::Duration;\n\nuse bytes::BytesMut;\nuse fallible_iterator::FallibleIterator;\nuse futures_util::{TryStreamExt, future, ready};\nuse postgres_protocol2::message::backend::Message;\nuse postgres_protocol2::message::frontend;\nuse serde::{Deserialize, Serialize};\nuse tokio::sync::mpsc;\n\nuse crate::cancel_token::RawCancelToken;\nuse crate::codec::{BackendMessages, FrontendMessage, RecordNotices};\nuse crate::config::{Host, SslMode};\nuse crate::connection::gc_bytesmut;\nuse crate::query::RowStream;\nuse crate::simple_query::SimpleQueryStream;\nuse crate::types::{Oid, Type};\nuse crate::{\n    CancelToken, Error, ReadyForQueryStatus, SimpleQueryMessage, Transaction, TransactionBuilder,\n    query, simple_query,\n};\n\npub struct Responses {\n    /// new messages from conn\n    receiver: mpsc::Receiver<BackendMessages>,\n    /// current batch of messages\n    cur: BackendMessages,\n    /// number of total queries sent.\n    waiting: usize,\n    /// number of ReadyForQuery messages received.\n    received: usize,\n}\n\nimpl Responses {\n    pub fn poll_next(&mut self, cx: &mut Context<'_>) -> Poll<Result<Message, Error>> {\n        loop {\n            // get the next saved message\n            if let Some(message) = self.cur.next().map_err(Error::parse)? {\n                let received = self.received;\n\n                // increase the query head if this is the last message.\n                if let Message::ReadyForQuery(_) = message {\n                    self.received += 1;\n                }\n\n                // check if the client has skipped this query.\n                if received + 1 < self.waiting {\n                    // grab the next message.\n                    continue;\n                }\n\n                // convenience: turn the error messaage into a proper error.\n                let res = match message {\n                    Message::ErrorResponse(body) => Err(Error::db(body)),\n                    message => Ok(message),\n                };\n                return Poll::Ready(res);\n            }\n\n            // get the next batch of messages.\n            match ready!(self.receiver.poll_recv(cx)) {\n                Some(messages) => self.cur = messages,\n                None => return Poll::Ready(Err(Error::closed())),\n            }\n        }\n    }\n\n    pub async fn next(&mut self) -> Result<Message, Error> {\n        future::poll_fn(|cx| self.poll_next(cx)).await\n    }\n}\n\n/// A cache of type info and prepared statements for fetching type info\n/// (corresponding to the queries in the [crate::prepare] module).\n#[derive(Default)]\npub(crate) struct CachedTypeInfo {\n    /// Cache of types already looked up.\n    pub(crate) types: HashMap<Oid, Type>,\n}\n\npub struct InnerClient {\n    sender: mpsc::UnboundedSender<FrontendMessage>,\n    responses: Responses,\n\n    /// A buffer to use when writing out postgres commands.\n    buffer: BytesMut,\n}\n\nimpl InnerClient {\n    pub fn start(&mut self) -> Result<PartialQuery<'_>, Error> {\n        self.responses.waiting += 1;\n        Ok(PartialQuery(Some(self)))\n    }\n\n    pub fn send_simple_query(&mut self, query: &str) -> Result<&mut Responses, Error> {\n        self.responses.waiting += 1;\n\n        self.buffer.clear();\n        // simple queries do not need sync.\n        frontend::query(query, &mut self.buffer).map_err(Error::encode)?;\n        let buf = self.buffer.split();\n        self.send_message(FrontendMessage::Raw(buf))\n    }\n\n    fn send_message(&mut self, messages: FrontendMessage) -> Result<&mut Responses, Error> {\n        self.sender.send(messages).map_err(|_| Error::closed())?;\n        Ok(&mut self.responses)\n    }\n}\n\npub struct PartialQuery<'a>(Option<&'a mut InnerClient>);\n\nimpl Drop for PartialQuery<'_> {\n    fn drop(&mut self) {\n        if let Some(client) = self.0.take() {\n            client.buffer.clear();\n            frontend::sync(&mut client.buffer);\n            let buf = client.buffer.split();\n            let _ = client.send_message(FrontendMessage::Raw(buf));\n        }\n    }\n}\n\nimpl<'a> PartialQuery<'a> {\n    pub fn send_with_flush<F>(&mut self, f: F) -> Result<&mut Responses, Error>\n    where\n        F: FnOnce(&mut BytesMut) -> Result<(), Error>,\n    {\n        let client = self.0.as_deref_mut().unwrap();\n\n        client.buffer.clear();\n        f(&mut client.buffer)?;\n        frontend::flush(&mut client.buffer);\n        let buf = client.buffer.split();\n        client.send_message(FrontendMessage::Raw(buf))\n    }\n\n    pub fn send_with_sync<F>(mut self, f: F) -> Result<&'a mut Responses, Error>\n    where\n        F: FnOnce(&mut BytesMut) -> Result<(), Error>,\n    {\n        let client = self.0.as_deref_mut().unwrap();\n\n        client.buffer.clear();\n        f(&mut client.buffer)?;\n        frontend::sync(&mut client.buffer);\n        let buf = client.buffer.split();\n        let _ = client.send_message(FrontendMessage::Raw(buf));\n\n        Ok(&mut self.0.take().unwrap().responses)\n    }\n}\n\n#[derive(Clone, Serialize, Deserialize)]\npub struct SocketConfig {\n    pub host_addr: Option<IpAddr>,\n    pub host: Host,\n    pub port: u16,\n    pub connect_timeout: Option<Duration>,\n}\n\n/// An asynchronous PostgreSQL client.\n///\n/// The client is one half of what is returned when a connection is established. Users interact with the database\n/// through this client object.\npub struct Client {\n    inner: InnerClient,\n    cached_typeinfo: CachedTypeInfo,\n\n    socket_config: SocketConfig,\n    ssl_mode: SslMode,\n    process_id: i32,\n    secret_key: i32,\n}\n\nimpl Client {\n    pub(crate) fn new(\n        sender: mpsc::UnboundedSender<FrontendMessage>,\n        receiver: mpsc::Receiver<BackendMessages>,\n        socket_config: SocketConfig,\n        ssl_mode: SslMode,\n        process_id: i32,\n        secret_key: i32,\n        write_buf: BytesMut,\n    ) -> Client {\n        Client {\n            inner: InnerClient {\n                sender,\n                responses: Responses {\n                    receiver,\n                    cur: BackendMessages::empty(),\n                    waiting: 0,\n                    received: 0,\n                },\n                buffer: write_buf,\n            },\n            cached_typeinfo: Default::default(),\n\n            socket_config,\n            ssl_mode,\n            process_id,\n            secret_key,\n        }\n    }\n\n    /// Returns process_id.\n    pub fn get_process_id(&self) -> i32 {\n        self.process_id\n    }\n\n    pub(crate) fn inner_mut(&mut self) -> &mut InnerClient {\n        &mut self.inner\n    }\n\n    pub fn record_notices(&mut self, limit: usize) -> mpsc::UnboundedReceiver<Box<str>> {\n        let (tx, rx) = mpsc::unbounded_channel();\n\n        let notices = RecordNotices { sender: tx, limit };\n        self.inner\n            .sender\n            .send(FrontendMessage::RecordNotices(notices))\n            .ok();\n\n        rx\n    }\n\n    /// Pass text directly to the Postgres backend to allow it to sort out typing itself and\n    /// to save a roundtrip\n    pub async fn query_raw_txt<S, I>(\n        &mut self,\n        statement: &str,\n        params: I,\n    ) -> Result<RowStream<'_>, Error>\n    where\n        S: AsRef<str>,\n        I: IntoIterator<Item = Option<S>>,\n        I::IntoIter: ExactSizeIterator,\n    {\n        query::query_txt(\n            &mut self.inner,\n            &mut self.cached_typeinfo,\n            statement,\n            params,\n        )\n        .await\n    }\n\n    /// Executes a sequence of SQL statements using the simple query protocol, returning the resulting rows.\n    ///\n    /// Statements should be separated by semicolons. If an error occurs, execution of the sequence will stop at that\n    /// point. The simple query protocol returns the values in rows as strings rather than in their binary encodings,\n    /// so the associated row type doesn't work with the `FromSql` trait. Rather than simply returning a list of the\n    /// rows, this method returns a list of an enum which indicates either the completion of one of the commands,\n    /// or a row of data. This preserves the framing between the separate statements in the request.\n    ///\n    /// # Warning\n    ///\n    /// Prepared statements should be use for any query which contains user-specified data, as they provided the\n    /// functionality to safely embed that data in the request. Do not form statements via string concatenation and pass\n    /// them to this method!\n    pub async fn simple_query(&mut self, query: &str) -> Result<Vec<SimpleQueryMessage>, Error> {\n        self.simple_query_raw(query).await?.try_collect().await\n    }\n\n    pub(crate) async fn simple_query_raw(\n        &mut self,\n        query: &str,\n    ) -> Result<SimpleQueryStream<'_>, Error> {\n        simple_query::simple_query(self.inner_mut(), query).await\n    }\n\n    /// Executes a sequence of SQL statements using the simple query protocol.\n    ///\n    /// Statements should be separated by semicolons. If an error occurs, execution of the sequence will stop at that\n    /// point. This is intended for use when, for example, initializing a database schema.\n    ///\n    /// # Warning\n    ///\n    /// Prepared statements should be use for any query which contains user-specified data, as they provided the\n    /// functionality to safely embed that data in the request. Do not form statements via string concatenation and pass\n    /// them to this method!\n    pub async fn batch_execute(&mut self, query: &str) -> Result<ReadyForQueryStatus, Error> {\n        simple_query::batch_execute(self.inner_mut(), query).await\n    }\n\n    /// Similar to `discard_all`, but it does not clear any query plans\n    ///\n    /// This runs in the background, so it can be executed without `await`ing.\n    pub fn reset_session_background(&mut self) -> Result<(), Error> {\n        // \"CLOSE ALL\": closes any cursors\n        // \"SET SESSION AUTHORIZATION DEFAULT\": resets the current_user back to the session_user\n        // \"RESET ALL\": resets any GUCs back to their session defaults.\n        // \"DEALLOCATE ALL\": deallocates any prepared statements\n        // \"UNLISTEN *\": stops listening on all channels\n        // \"SELECT pg_advisory_unlock_all();\": unlocks all advisory locks\n        // \"DISCARD TEMP;\": drops all temporary tables\n        // \"DISCARD SEQUENCES;\": deallocates all cached sequence state\n\n        let _responses = self.inner_mut().send_simple_query(\n            \"ROLLBACK;\n            CLOSE ALL;\n            SET SESSION AUTHORIZATION DEFAULT;\n            RESET ALL;\n            DEALLOCATE ALL;\n            UNLISTEN *;\n            SELECT pg_advisory_unlock_all();\n            DISCARD TEMP;\n            DISCARD SEQUENCES;\",\n        )?;\n\n        // Clean up memory usage.\n        gc_bytesmut(&mut self.inner_mut().buffer);\n\n        Ok(())\n    }\n\n    /// Begins a new database transaction.\n    ///\n    /// The transaction will roll back by default - use the `commit` method to commit it.\n    pub async fn transaction(&mut self) -> Result<Transaction<'_>, Error> {\n        struct RollbackIfNotDone<'me> {\n            client: &'me mut Client,\n            done: bool,\n        }\n\n        impl Drop for RollbackIfNotDone<'_> {\n            fn drop(&mut self) {\n                if self.done {\n                    return;\n                }\n\n                let _ = self.client.inner.send_simple_query(\"ROLLBACK\");\n            }\n        }\n\n        // This is done, as `Future` created by this method can be dropped after\n        // `RequestMessages` is synchronously send to the `Connection` by\n        // `batch_execute()`, but before `Responses` is asynchronously polled to\n        // completion. In that case `Transaction` won't be created and thus\n        // won't be rolled back.\n        {\n            let mut cleaner = RollbackIfNotDone {\n                client: self,\n                done: false,\n            };\n            cleaner.client.batch_execute(\"BEGIN\").await?;\n            cleaner.done = true;\n        }\n\n        Ok(Transaction::new(self))\n    }\n\n    /// Returns a builder for a transaction with custom settings.\n    ///\n    /// Unlike the `transaction` method, the builder can be used to control the transaction's isolation level and other\n    /// attributes.\n    pub fn build_transaction(&mut self) -> TransactionBuilder<'_> {\n        TransactionBuilder::new(self)\n    }\n\n    /// Constructs a cancellation token that can later be used to request cancellation of a query running on the\n    /// connection associated with this client.\n    pub fn cancel_token(&self) -> CancelToken {\n        CancelToken {\n            socket_config: self.socket_config.clone(),\n            raw: RawCancelToken {\n                ssl_mode: self.ssl_mode,\n                process_id: self.process_id,\n                secret_key: self.secret_key,\n            },\n        }\n    }\n\n    /// Determines if the connection to the server has already closed.\n    ///\n    /// In that case, all future queries will fail.\n    pub fn is_closed(&self) -> bool {\n        self.inner.sender.is_closed()\n    }\n}\n\nimpl fmt::Debug for Client {\n    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {\n        f.debug_struct(\"Client\").finish()\n    }\n}\n"
  },
  {
    "path": "libs/proxy/tokio-postgres2/src/codec.rs",
    "content": "use std::io;\n\nuse bytes::BytesMut;\nuse fallible_iterator::FallibleIterator;\nuse postgres_protocol2::message::backend;\nuse tokio::sync::mpsc::UnboundedSender;\nuse tokio_util::codec::{Decoder, Encoder};\n\npub enum FrontendMessage {\n    Raw(BytesMut),\n    RecordNotices(RecordNotices),\n}\n\npub struct RecordNotices {\n    pub sender: UnboundedSender<Box<str>>,\n    pub limit: usize,\n}\n\npub enum BackendMessage {\n    Normal {\n        messages: BackendMessages,\n        ready: bool,\n    },\n    Async(backend::Message),\n}\n\npub struct BackendMessages(BytesMut);\n\nimpl BackendMessages {\n    pub fn empty() -> BackendMessages {\n        BackendMessages(BytesMut::new())\n    }\n}\n\nimpl FallibleIterator for BackendMessages {\n    type Item = backend::Message;\n    type Error = io::Error;\n\n    fn next(&mut self) -> io::Result<Option<backend::Message>> {\n        backend::Message::parse(&mut self.0)\n    }\n}\n\npub struct PostgresCodec;\n\nimpl Encoder<BytesMut> for PostgresCodec {\n    type Error = io::Error;\n\n    fn encode(&mut self, item: BytesMut, dst: &mut BytesMut) -> io::Result<()> {\n        dst.unsplit(item);\n        Ok(())\n    }\n}\n\nimpl Decoder for PostgresCodec {\n    type Item = BackendMessage;\n    type Error = io::Error;\n\n    fn decode(&mut self, src: &mut BytesMut) -> Result<Option<BackendMessage>, io::Error> {\n        let mut idx = 0;\n\n        let mut ready = false;\n        while let Some(header) = backend::Header::parse(&src[idx..])? {\n            let len = header.len() as usize + 1;\n            if src[idx..].len() < len {\n                break;\n            }\n\n            match header.tag() {\n                backend::NOTICE_RESPONSE_TAG\n                | backend::NOTIFICATION_RESPONSE_TAG\n                | backend::PARAMETER_STATUS_TAG => {\n                    if idx == 0 {\n                        let message = backend::Message::parse(src)?.unwrap();\n                        return Ok(Some(BackendMessage::Async(message)));\n                    } else {\n                        break;\n                    }\n                }\n                _ => {}\n            }\n\n            idx += len;\n\n            if header.tag() == backend::READY_FOR_QUERY_TAG {\n                ready = true;\n                break;\n            }\n        }\n\n        if idx == 0 {\n            Ok(None)\n        } else {\n            Ok(Some(BackendMessage::Normal {\n                messages: BackendMessages(src.split_to(idx)),\n                ready,\n            }))\n        }\n    }\n}\n"
  },
  {
    "path": "libs/proxy/tokio-postgres2/src/config.rs",
    "content": "//! Connection configuration.\n\nuse std::net::IpAddr;\nuse std::time::Duration;\nuse std::{fmt, str};\n\npub use postgres_protocol2::authentication::sasl::ScramKeys;\nuse postgres_protocol2::message::frontend::StartupMessageParams;\nuse serde::{Deserialize, Serialize};\nuse tokio::io::{AsyncRead, AsyncWrite};\nuse tokio::net::TcpStream;\n\nuse crate::connect::connect;\nuse crate::connect_raw::{self, StartupStream};\nuse crate::connect_tls::connect_tls;\nuse crate::tls::{MakeTlsConnect, TlsConnect, TlsStream};\nuse crate::{Client, Connection, Error};\n\n/// TLS configuration.\n#[derive(Debug, Copy, Clone, PartialEq, Eq, Serialize, Deserialize)]\npub enum SslMode {\n    /// Do not use TLS.\n    Disable,\n    /// Attempt to connect with TLS but allow sessions without.\n    Prefer,\n    /// Require the use of TLS.\n    Require,\n}\n\n/// Channel binding configuration.\n#[derive(Debug, Copy, Clone, PartialEq, Eq)]\n#[non_exhaustive]\npub enum ChannelBinding {\n    /// Do not use channel binding.\n    Disable,\n    /// Attempt to use channel binding but allow sessions without.\n    Prefer,\n    /// Require the use of channel binding.\n    Require,\n}\n\n/// Replication mode configuration.\n#[derive(Debug, Copy, Clone, PartialEq, Eq)]\n#[non_exhaustive]\npub enum ReplicationMode {\n    /// Physical replication.\n    Physical,\n    /// Logical replication.\n    Logical,\n}\n\n/// A host specification.\n#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]\npub enum Host {\n    /// A TCP hostname.\n    Tcp(String),\n}\n\n/// Precomputed keys which may override password during auth.\n#[derive(Debug, Clone, Copy, PartialEq, Eq)]\npub enum AuthKeys {\n    /// A `ClientKey` & `ServerKey` pair for `SCRAM-SHA-256`.\n    ScramSha256(ScramKeys<32>),\n}\n\n/// Connection configuration.\n#[derive(Clone, PartialEq, Eq)]\npub struct Config {\n    pub(crate) host_addr: Option<IpAddr>,\n    pub(crate) host: Host,\n    pub(crate) port: u16,\n\n    pub(crate) password: Option<Vec<u8>>,\n    pub(crate) auth_keys: Option<Box<AuthKeys>>,\n    pub(crate) ssl_mode: SslMode,\n    pub(crate) connect_timeout: Option<Duration>,\n    pub(crate) channel_binding: ChannelBinding,\n    pub(crate) server_params: StartupMessageParams,\n\n    database: bool,\n    username: bool,\n}\n\nimpl Config {\n    /// Creates a new configuration.\n    pub fn new(host: String, port: u16) -> Config {\n        Config {\n            host_addr: None,\n            host: Host::Tcp(host),\n            port,\n            password: None,\n            auth_keys: None,\n            ssl_mode: SslMode::Prefer,\n            connect_timeout: None,\n            channel_binding: ChannelBinding::Prefer,\n            server_params: StartupMessageParams::default(),\n\n            database: false,\n            username: false,\n        }\n    }\n\n    /// Sets the user to authenticate with.\n    ///\n    /// Required.\n    pub fn user(&mut self, user: &str) -> &mut Config {\n        self.set_param(\"user\", user)\n    }\n\n    /// Gets the user to authenticate with, if one has been configured with\n    /// the `user` method.\n    pub fn user_is_set(&self) -> bool {\n        self.username\n    }\n\n    /// Sets the password to authenticate with.\n    pub fn password<T>(&mut self, password: T) -> &mut Config\n    where\n        T: AsRef<[u8]>,\n    {\n        self.password = Some(password.as_ref().to_vec());\n        self\n    }\n\n    /// Gets the password to authenticate with, if one has been configured with\n    /// the `password` method.\n    pub fn get_password(&self) -> Option<&[u8]> {\n        self.password.as_deref()\n    }\n\n    /// Sets precomputed protocol-specific keys to authenticate with.\n    /// When set, this option will override `password`.\n    /// See [`AuthKeys`] for more information.\n    pub fn auth_keys(&mut self, keys: AuthKeys) -> &mut Config {\n        self.auth_keys = Some(Box::new(keys));\n        self\n    }\n\n    /// Gets precomputed protocol-specific keys to authenticate with.\n    /// if one has been configured with the `auth_keys` method.\n    pub fn get_auth_keys(&self) -> Option<AuthKeys> {\n        self.auth_keys.as_deref().copied()\n    }\n\n    /// Sets the name of the database to connect to.\n    ///\n    /// Defaults to the user.\n    pub fn dbname(&mut self, dbname: &str) -> &mut Config {\n        self.set_param(\"database\", dbname)\n    }\n\n    /// Gets the name of the database to connect to, if one has been configured\n    /// with the `dbname` method.\n    pub fn db_is_set(&self) -> bool {\n        self.database\n    }\n\n    pub fn set_param(&mut self, name: &str, value: &str) -> &mut Config {\n        if name == \"database\" {\n            self.database = true;\n        } else if name == \"user\" {\n            self.username = true;\n        }\n\n        self.server_params.insert(name, value);\n        self\n    }\n\n    pub fn set_host_addr(&mut self, addr: IpAddr) -> &mut Config {\n        self.host_addr = Some(addr);\n        self\n    }\n\n    pub fn get_host_addr(&self) -> Option<IpAddr> {\n        self.host_addr\n    }\n\n    /// Sets the SSL configuration.\n    ///\n    /// Defaults to `prefer`.\n    pub fn ssl_mode(&mut self, ssl_mode: SslMode) -> &mut Config {\n        self.ssl_mode = ssl_mode;\n        self\n    }\n\n    /// Gets the SSL configuration.\n    pub fn get_ssl_mode(&self) -> SslMode {\n        self.ssl_mode\n    }\n\n    /// Gets the hosts that have been added to the configuration with `host`.\n    pub fn get_host(&self) -> &Host {\n        &self.host\n    }\n\n    /// Gets the ports that have been added to the configuration with `port`.\n    pub fn get_port(&self) -> u16 {\n        self.port\n    }\n\n    /// Sets the timeout applied to socket-level connection attempts.\n    ///\n    /// Note that hostnames can resolve to multiple IP addresses, and this timeout will apply to each address of each\n    /// host separately. Defaults to no limit.\n    pub fn connect_timeout(&mut self, connect_timeout: Duration) -> &mut Config {\n        self.connect_timeout = Some(connect_timeout);\n        self\n    }\n\n    /// Gets the connection timeout, if one has been set with the\n    /// `connect_timeout` method.\n    pub fn get_connect_timeout(&self) -> Option<&Duration> {\n        self.connect_timeout.as_ref()\n    }\n\n    /// Sets the channel binding behavior.\n    ///\n    /// Defaults to `prefer`.\n    pub fn channel_binding(&mut self, channel_binding: ChannelBinding) -> &mut Config {\n        self.channel_binding = channel_binding;\n        self\n    }\n\n    /// Gets the channel binding behavior.\n    pub fn get_channel_binding(&self) -> ChannelBinding {\n        self.channel_binding\n    }\n\n    /// Opens a connection to a PostgreSQL database.\n    ///\n    /// Requires the `runtime` Cargo feature (enabled by default).\n    pub async fn connect<T>(\n        &self,\n        tls: &T,\n    ) -> Result<(Client, Connection<TcpStream, T::Stream>), Error>\n    where\n        T: MakeTlsConnect<TcpStream>,\n    {\n        connect(tls, self).await\n    }\n\n    pub async fn tls_and_authenticate<S, T>(\n        &self,\n        stream: S,\n        tls: T,\n    ) -> Result<StartupStream<S, T::Stream>, Error>\n    where\n        S: AsyncRead + AsyncWrite + Unpin,\n        T: TlsConnect<S>,\n    {\n        let stream = connect_tls(stream, self.ssl_mode, tls).await?;\n        let mut stream = StartupStream::new(stream);\n        connect_raw::authenticate(&mut stream, self).await?;\n\n        Ok(stream)\n    }\n\n    pub fn authenticate<S, T>(\n        &self,\n        stream: &mut StartupStream<S, T>,\n    ) -> impl Future<Output = Result<(), Error>>\n    where\n        S: AsyncRead + AsyncWrite + Unpin,\n        T: TlsStream + Unpin,\n    {\n        connect_raw::authenticate(stream, self)\n    }\n}\n\n// Omit password from debug output\nimpl fmt::Debug for Config {\n    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {\n        struct Redaction {}\n        impl fmt::Debug for Redaction {\n            fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {\n                write!(f, \"_\")\n            }\n        }\n\n        f.debug_struct(\"Config\")\n            .field(\"password\", &self.password.as_ref().map(|_| Redaction {}))\n            .field(\"ssl_mode\", &self.ssl_mode)\n            .field(\"host\", &self.host)\n            .field(\"port\", &self.port)\n            .field(\"connect_timeout\", &self.connect_timeout)\n            .field(\"channel_binding\", &self.channel_binding)\n            .field(\"server_params\", &self.server_params)\n            .finish()\n    }\n}\n"
  },
  {
    "path": "libs/proxy/tokio-postgres2/src/connect.rs",
    "content": "use std::net::IpAddr;\n\nuse futures_util::TryStreamExt;\nuse postgres_protocol2::message::backend::Message;\nuse tokio::io::{AsyncRead, AsyncWrite};\nuse tokio::net::TcpStream;\nuse tokio::sync::mpsc;\n\nuse crate::client::SocketConfig;\nuse crate::config::{Host, SslMode};\nuse crate::connect_raw::StartupStream;\nuse crate::connect_socket::connect_socket;\nuse crate::tls::{MakeTlsConnect, TlsConnect};\nuse crate::{Client, Config, Connection, Error};\n\npub async fn connect<T>(\n    tls: &T,\n    config: &Config,\n) -> Result<(Client, Connection<TcpStream, T::Stream>), Error>\nwhere\n    T: MakeTlsConnect<TcpStream>,\n{\n    let hostname = match &config.host {\n        Host::Tcp(host) => host.as_str(),\n    };\n\n    let tls = tls\n        .make_tls_connect(hostname)\n        .map_err(|e| Error::tls(e.into()))?;\n\n    match connect_once(config.host_addr, &config.host, config.port, tls, config).await {\n        Ok((client, connection)) => Ok((client, connection)),\n        Err(e) => Err(e),\n    }\n}\n\nasync fn connect_once<T>(\n    host_addr: Option<IpAddr>,\n    host: &Host,\n    port: u16,\n    tls: T,\n    config: &Config,\n) -> Result<(Client, Connection<TcpStream, T::Stream>), Error>\nwhere\n    T: TlsConnect<TcpStream>,\n{\n    let socket = connect_socket(host_addr, host, port, config.connect_timeout).await?;\n    let stream = config.tls_and_authenticate(socket, tls).await?;\n    managed(\n        stream,\n        host_addr,\n        host.clone(),\n        port,\n        config.ssl_mode,\n        config.connect_timeout,\n    )\n    .await\n}\n\npub async fn managed<TlsStream>(\n    mut stream: StartupStream<TcpStream, TlsStream>,\n    host_addr: Option<IpAddr>,\n    host: Host,\n    port: u16,\n    ssl_mode: SslMode,\n    connect_timeout: Option<std::time::Duration>,\n) -> Result<(Client, Connection<TcpStream, TlsStream>), Error>\nwhere\n    TlsStream: AsyncRead + AsyncWrite + Unpin,\n{\n    let (process_id, secret_key) = wait_until_ready(&mut stream).await?;\n\n    let socket_config = SocketConfig {\n        host_addr,\n        host,\n        port,\n        connect_timeout,\n    };\n\n    let mut stream = stream.into_framed();\n    let write_buf = std::mem::take(stream.write_buffer_mut());\n\n    let (client_tx, conn_rx) = mpsc::unbounded_channel();\n    let (conn_tx, client_rx) = mpsc::channel(4);\n    let client = Client::new(\n        client_tx,\n        client_rx,\n        socket_config,\n        ssl_mode,\n        process_id,\n        secret_key,\n        write_buf,\n    );\n\n    let connection = Connection::new(stream, conn_tx, conn_rx);\n\n    Ok((client, connection))\n}\n\nasync fn wait_until_ready<S, T>(stream: &mut StartupStream<S, T>) -> Result<(i32, i32), Error>\nwhere\n    S: AsyncRead + AsyncWrite + Unpin,\n    T: AsyncRead + AsyncWrite + Unpin,\n{\n    let mut process_id = 0;\n    let mut secret_key = 0;\n\n    loop {\n        match stream.try_next().await.map_err(Error::io)? {\n            Some(Message::BackendKeyData(body)) => {\n                process_id = body.process_id();\n                secret_key = body.secret_key();\n            }\n            // These values are currently not used by `Client`/`Connection`. Ignore them.\n            Some(Message::ParameterStatus(_)) | Some(Message::NoticeResponse(_)) => {}\n            Some(Message::ReadyForQuery(_)) => return Ok((process_id, secret_key)),\n            Some(Message::ErrorResponse(body)) => return Err(Error::db(body)),\n            Some(_) => return Err(Error::unexpected_message()),\n            None => return Err(Error::closed()),\n        }\n    }\n}\n"
  },
  {
    "path": "libs/proxy/tokio-postgres2/src/connect_raw.rs",
    "content": "use std::io;\nuse std::pin::Pin;\nuse std::task::{Context, Poll, ready};\n\nuse bytes::BytesMut;\nuse fallible_iterator::FallibleIterator;\nuse futures_util::{SinkExt, Stream, TryStreamExt};\nuse postgres_protocol2::authentication::sasl;\nuse postgres_protocol2::authentication::sasl::ScramSha256;\nuse postgres_protocol2::message::backend::{AuthenticationSaslBody, Message};\nuse postgres_protocol2::message::frontend;\nuse tokio::io::{AsyncRead, AsyncWrite, ReadBuf};\nuse tokio_util::codec::{Framed, FramedParts};\n\nuse crate::Error;\nuse crate::codec::PostgresCodec;\nuse crate::config::{self, AuthKeys, Config};\nuse crate::connection::{GC_THRESHOLD, INITIAL_CAPACITY};\nuse crate::maybe_tls_stream::MaybeTlsStream;\nuse crate::tls::TlsStream;\n\npub struct StartupStream<S, T> {\n    inner: Framed<MaybeTlsStream<S, T>, PostgresCodec>,\n    read_buf: BytesMut,\n}\n\nimpl<S, T> Stream for StartupStream<S, T>\nwhere\n    S: AsyncRead + AsyncWrite + Unpin,\n    T: AsyncRead + AsyncWrite + Unpin,\n{\n    type Item = io::Result<Message>;\n\n    fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {\n        // We don't use `self.inner.poll_next()` as that might over-read into the read buffer.\n\n        // read 1 byte tag, 4 bytes length.\n        let header = ready!(self.as_mut().poll_fill_buf_exact(cx, 5)?);\n\n        let len = u32::from_be_bytes(header[1..5].try_into().unwrap());\n        if len < 4 {\n            return Poll::Ready(Some(Err(std::io::Error::other(\n                \"postgres message too small\",\n            ))));\n        }\n        if len >= 65536 {\n            return Poll::Ready(Some(Err(std::io::Error::other(\n                \"postgres message too large\",\n            ))));\n        }\n\n        // the tag is an additional byte.\n        let _message = ready!(self.as_mut().poll_fill_buf_exact(cx, len as usize + 1)?);\n\n        // Message::parse will remove the all the bytes from the buffer.\n        Poll::Ready(Message::parse(&mut self.read_buf).transpose())\n    }\n}\n\nimpl<S, T> StartupStream<S, T>\nwhere\n    S: AsyncRead + AsyncWrite + Unpin,\n    T: AsyncRead + AsyncWrite + Unpin,\n{\n    /// Fill the buffer until it's the exact length provided. No additional data will be read from the socket.\n    ///\n    /// If the current buffer length is greater, nothing happens.\n    fn poll_fill_buf_exact(\n        self: Pin<&mut Self>,\n        cx: &mut Context<'_>,\n        len: usize,\n    ) -> Poll<Result<&[u8], std::io::Error>> {\n        let this = self.get_mut();\n        let mut stream = Pin::new(this.inner.get_mut());\n\n        let mut n = this.read_buf.len();\n        while n < len {\n            this.read_buf.resize(len, 0);\n\n            let mut buf = ReadBuf::new(&mut this.read_buf[..]);\n            buf.set_filled(n);\n\n            if stream.as_mut().poll_read(cx, &mut buf)?.is_pending() {\n                this.read_buf.truncate(n);\n                return Poll::Pending;\n            }\n\n            if buf.filled().len() == n {\n                return Poll::Ready(Err(std::io::Error::new(\n                    std::io::ErrorKind::UnexpectedEof,\n                    \"early eof\",\n                )));\n            }\n            n = buf.filled().len();\n\n            this.read_buf.truncate(n);\n        }\n\n        Poll::Ready(Ok(&this.read_buf[..len]))\n    }\n\n    pub fn into_framed(mut self) -> Framed<MaybeTlsStream<S, T>, PostgresCodec> {\n        *self.inner.read_buffer_mut() = self.read_buf;\n        self.inner\n    }\n\n    pub fn new(io: MaybeTlsStream<S, T>) -> Self {\n        let mut parts = FramedParts::new(io, PostgresCodec);\n        parts.write_buf = BytesMut::with_capacity(INITIAL_CAPACITY);\n\n        let mut inner = Framed::from_parts(parts);\n\n        // This is the default already, but nice to be explicit.\n        // We divide by two because writes will overshoot the boundary.\n        // We don't want constant overshoots to cause us to constantly re-shrink the buffer.\n        inner.set_backpressure_boundary(GC_THRESHOLD / 2);\n\n        Self {\n            inner,\n            read_buf: BytesMut::with_capacity(INITIAL_CAPACITY),\n        }\n    }\n}\n\npub(crate) async fn authenticate<S, T>(\n    stream: &mut StartupStream<S, T>,\n    config: &Config,\n) -> Result<(), Error>\nwhere\n    S: AsyncRead + AsyncWrite + Unpin,\n    T: TlsStream + Unpin,\n{\n    frontend::startup_message(&config.server_params, stream.inner.write_buffer_mut())\n        .map_err(Error::encode)?;\n\n    stream.inner.flush().await.map_err(Error::io)?;\n    match stream.try_next().await.map_err(Error::io)? {\n        Some(Message::AuthenticationOk) => {\n            can_skip_channel_binding(config)?;\n            return Ok(());\n        }\n        Some(Message::AuthenticationCleartextPassword) => {\n            can_skip_channel_binding(config)?;\n\n            let pass = config\n                .password\n                .as_ref()\n                .ok_or_else(|| Error::config(\"password missing\".into()))?;\n\n            frontend::password_message(pass, stream.inner.write_buffer_mut())\n                .map_err(Error::encode)?;\n        }\n        Some(Message::AuthenticationSasl(body)) => {\n            authenticate_sasl(stream, body, config).await?;\n        }\n        Some(Message::AuthenticationMd5Password)\n        | Some(Message::AuthenticationKerberosV5)\n        | Some(Message::AuthenticationScmCredential)\n        | Some(Message::AuthenticationGss)\n        | Some(Message::AuthenticationSspi) => {\n            return Err(Error::authentication(\n                \"unsupported authentication method\".into(),\n            ));\n        }\n        Some(Message::ErrorResponse(body)) => return Err(Error::db(body)),\n        Some(_) => return Err(Error::unexpected_message()),\n        None => return Err(Error::closed()),\n    }\n\n    stream.inner.flush().await.map_err(Error::io)?;\n    match stream.try_next().await.map_err(Error::io)? {\n        Some(Message::AuthenticationOk) => Ok(()),\n        Some(Message::ErrorResponse(body)) => Err(Error::db(body)),\n        Some(_) => Err(Error::unexpected_message()),\n        None => Err(Error::closed()),\n    }\n}\n\nfn can_skip_channel_binding(config: &Config) -> Result<(), Error> {\n    match config.channel_binding {\n        config::ChannelBinding::Disable | config::ChannelBinding::Prefer => Ok(()),\n        config::ChannelBinding::Require => Err(Error::authentication(\n            \"server did not use channel binding\".into(),\n        )),\n    }\n}\n\nasync fn authenticate_sasl<S, T>(\n    stream: &mut StartupStream<S, T>,\n    body: AuthenticationSaslBody,\n    config: &Config,\n) -> Result<(), Error>\nwhere\n    S: AsyncRead + AsyncWrite + Unpin,\n    T: TlsStream + Unpin,\n{\n    let mut has_scram = false;\n    let mut has_scram_plus = false;\n    let mut mechanisms = body.mechanisms();\n    while let Some(mechanism) = mechanisms.next().map_err(Error::parse)? {\n        match mechanism {\n            sasl::SCRAM_SHA_256 => has_scram = true,\n            sasl::SCRAM_SHA_256_PLUS => has_scram_plus = true,\n            _ => {}\n        }\n    }\n\n    let channel_binding = stream\n        .inner\n        .get_ref()\n        .channel_binding()\n        .tls_server_end_point\n        .filter(|_| config.channel_binding != config::ChannelBinding::Disable)\n        .map(sasl::ChannelBinding::tls_server_end_point);\n\n    let (channel_binding, mechanism) = if has_scram_plus {\n        match channel_binding {\n            Some(channel_binding) => (channel_binding, sasl::SCRAM_SHA_256_PLUS),\n            None => (sasl::ChannelBinding::unsupported(), sasl::SCRAM_SHA_256),\n        }\n    } else if has_scram {\n        match channel_binding {\n            Some(_) => (sasl::ChannelBinding::unrequested(), sasl::SCRAM_SHA_256),\n            None => (sasl::ChannelBinding::unsupported(), sasl::SCRAM_SHA_256),\n        }\n    } else {\n        return Err(Error::authentication(\"unsupported SASL mechanism\".into()));\n    };\n\n    if mechanism != sasl::SCRAM_SHA_256_PLUS {\n        can_skip_channel_binding(config)?;\n    }\n\n    let mut scram = if let Some(AuthKeys::ScramSha256(keys)) = config.get_auth_keys() {\n        ScramSha256::new_with_keys(keys, channel_binding)\n    } else if let Some(password) = config.get_password() {\n        ScramSha256::new(password, channel_binding)\n    } else {\n        return Err(Error::config(\"password or auth keys missing\".into()));\n    };\n\n    frontend::sasl_initial_response(mechanism, scram.message(), stream.inner.write_buffer_mut())\n        .map_err(Error::encode)?;\n\n    stream.inner.flush().await.map_err(Error::io)?;\n    let body = match stream.try_next().await.map_err(Error::io)? {\n        Some(Message::AuthenticationSaslContinue(body)) => body,\n        Some(Message::ErrorResponse(body)) => return Err(Error::db(body)),\n        Some(_) => return Err(Error::unexpected_message()),\n        None => return Err(Error::closed()),\n    };\n\n    scram\n        .update(body.data())\n        .await\n        .map_err(|e| Error::authentication(e.into()))?;\n\n    frontend::sasl_response(scram.message(), stream.inner.write_buffer_mut())\n        .map_err(Error::encode)?;\n\n    stream.inner.flush().await.map_err(Error::io)?;\n    let body = match stream.try_next().await.map_err(Error::io)? {\n        Some(Message::AuthenticationSaslFinal(body)) => body,\n        Some(Message::ErrorResponse(body)) => return Err(Error::db(body)),\n        Some(_) => return Err(Error::unexpected_message()),\n        None => return Err(Error::closed()),\n    };\n\n    scram\n        .finish(body.data())\n        .map_err(|e| Error::authentication(e.into()))?;\n\n    Ok(())\n}\n"
  },
  {
    "path": "libs/proxy/tokio-postgres2/src/connect_socket.rs",
    "content": "use std::future::Future;\nuse std::io;\nuse std::net::{IpAddr, SocketAddr};\nuse std::time::Duration;\n\nuse tokio::net::{self, TcpStream};\nuse tokio::time;\n\nuse crate::Error;\nuse crate::config::Host;\n\npub(crate) async fn connect_socket(\n    host_addr: Option<IpAddr>,\n    host: &Host,\n    port: u16,\n    connect_timeout: Option<Duration>,\n) -> Result<TcpStream, Error> {\n    match host {\n        Host::Tcp(host) => {\n            let addrs = match host_addr {\n                Some(addr) => vec![SocketAddr::new(addr, port)],\n                None => net::lookup_host((&**host, port))\n                    .await\n                    .map_err(Error::connect)?\n                    .collect(),\n            };\n\n            let mut last_err = None;\n\n            for addr in addrs {\n                let stream =\n                    match connect_with_timeout(TcpStream::connect(addr), connect_timeout).await {\n                        Ok(stream) => stream,\n                        Err(e) => {\n                            last_err = Some(e);\n                            continue;\n                        }\n                    };\n\n                stream.set_nodelay(true).map_err(Error::connect)?;\n\n                return Ok(stream);\n            }\n\n            Err(last_err.unwrap_or_else(|| {\n                Error::connect(io::Error::new(\n                    io::ErrorKind::InvalidInput,\n                    \"could not resolve any addresses\",\n                ))\n            }))\n        }\n    }\n}\n\nasync fn connect_with_timeout<F, T>(connect: F, timeout: Option<Duration>) -> Result<T, Error>\nwhere\n    F: Future<Output = io::Result<T>>,\n{\n    match timeout {\n        Some(timeout) => match time::timeout(timeout, connect).await {\n            Ok(Ok(socket)) => Ok(socket),\n            Ok(Err(e)) => Err(Error::connect(e)),\n            Err(_) => Err(Error::connect(io::Error::new(\n                io::ErrorKind::TimedOut,\n                \"connection timed out\",\n            ))),\n        },\n        None => match connect.await {\n            Ok(socket) => Ok(socket),\n            Err(e) => Err(Error::connect(e)),\n        },\n    }\n}\n"
  },
  {
    "path": "libs/proxy/tokio-postgres2/src/connect_tls.rs",
    "content": "use bytes::BytesMut;\nuse postgres_protocol2::message::frontend;\nuse tokio::io::{AsyncRead, AsyncReadExt, AsyncWrite, AsyncWriteExt};\n\nuse crate::Error;\nuse crate::config::SslMode;\nuse crate::maybe_tls_stream::MaybeTlsStream;\nuse crate::tls::TlsConnect;\nuse crate::tls::private::ForcePrivateApi;\n\npub async fn connect_tls<S, T>(\n    mut stream: S,\n    mode: SslMode,\n    tls: T,\n) -> Result<MaybeTlsStream<S, T::Stream>, Error>\nwhere\n    S: AsyncRead + AsyncWrite + Unpin,\n    T: TlsConnect<S>,\n{\n    match mode {\n        SslMode::Disable => return Ok(MaybeTlsStream::Raw(stream)),\n        SslMode::Prefer if !tls.can_connect(ForcePrivateApi) => {\n            return Ok(MaybeTlsStream::Raw(stream));\n        }\n        SslMode::Prefer | SslMode::Require => {}\n    }\n\n    let mut buf = BytesMut::new();\n    frontend::ssl_request(&mut buf);\n    stream.write_all(&buf).await.map_err(Error::io)?;\n\n    let mut buf = [0];\n    stream.read_exact(&mut buf).await.map_err(Error::io)?;\n\n    if buf[0] != b'S' {\n        if SslMode::Require == mode {\n            return Err(Error::tls(\"server does not support TLS\".into()));\n        } else {\n            return Ok(MaybeTlsStream::Raw(stream));\n        }\n    }\n\n    let stream = tls\n        .connect(stream)\n        .await\n        .map_err(|e| Error::tls(e.into()))?;\n\n    Ok(MaybeTlsStream::Tls(stream))\n}\n"
  },
  {
    "path": "libs/proxy/tokio-postgres2/src/connection.rs",
    "content": "use std::future::Future;\nuse std::pin::Pin;\nuse std::task::{Context, Poll};\n\nuse bytes::BytesMut;\nuse fallible_iterator::FallibleIterator;\nuse futures_util::{Sink, StreamExt, ready};\nuse postgres_protocol2::message::backend::{Message, NoticeResponseBody};\nuse postgres_protocol2::message::frontend;\nuse tokio::io::{AsyncRead, AsyncWrite};\nuse tokio::sync::mpsc;\nuse tokio_util::codec::Framed;\nuse tokio_util::sync::PollSender;\nuse tracing::trace;\n\nuse crate::Error;\nuse crate::codec::{\n    BackendMessage, BackendMessages, FrontendMessage, PostgresCodec, RecordNotices,\n};\nuse crate::maybe_tls_stream::MaybeTlsStream;\n\n#[derive(PartialEq, Debug)]\nenum State {\n    Active,\n    Closing,\n}\n\n/// A connection to a PostgreSQL database.\n///\n/// This is one half of what is returned when a new connection is established. It performs the actual IO with the\n/// server, and should generally be spawned off onto an executor to run in the background.\n///\n/// `Connection` implements `Future`, and only resolves when the connection is closed, either because a fatal error has\n/// occurred, or because its associated `Client` has dropped and all outstanding work has completed.\n#[must_use = \"futures do nothing unless polled\"]\npub struct Connection<S, T> {\n    stream: Framed<MaybeTlsStream<S, T>, PostgresCodec>,\n\n    sender: PollSender<BackendMessages>,\n    receiver: mpsc::UnboundedReceiver<FrontendMessage>,\n    notices: Option<RecordNotices>,\n\n    pending_response: Option<BackendMessages>,\n    state: State,\n}\n\npub const INITIAL_CAPACITY: usize = 2 * 1024;\npub const GC_THRESHOLD: usize = 16 * 1024;\n\n/// Gargabe collect the [`BytesMut`] if it has too much spare capacity.\npub fn gc_bytesmut(buf: &mut BytesMut) {\n    // We use a different mode to shrink the buf when above the threshold.\n    // When above the threshold, we only re-allocate when the buf has 2x spare capacity.\n    let reclaim = GC_THRESHOLD.checked_sub(buf.len()).unwrap_or(buf.len());\n\n    // `try_reclaim` tries to get the capacity from any shared `BytesMut`s,\n    // before then comparing the length against the capacity.\n    if buf.try_reclaim(reclaim) {\n        let capacity = usize::max(buf.len(), INITIAL_CAPACITY);\n\n        // Allocate a new `BytesMut` so that we deallocate the old version.\n        let mut new = BytesMut::with_capacity(capacity);\n        new.extend_from_slice(buf);\n        *buf = new;\n    }\n}\n\npub enum Never {}\n\nimpl<S, T> Connection<S, T>\nwhere\n    S: AsyncRead + AsyncWrite + Unpin,\n    T: AsyncRead + AsyncWrite + Unpin,\n{\n    pub(crate) fn new(\n        stream: Framed<MaybeTlsStream<S, T>, PostgresCodec>,\n        sender: mpsc::Sender<BackendMessages>,\n        receiver: mpsc::UnboundedReceiver<FrontendMessage>,\n    ) -> Connection<S, T> {\n        Connection {\n            stream,\n            sender: PollSender::new(sender),\n            receiver,\n            notices: None,\n            pending_response: None,\n            state: State::Active,\n        }\n    }\n\n    /// Read and process messages from the connection to postgres.\n    /// client <- postgres\n    fn poll_read(&mut self, cx: &mut Context<'_>) -> Poll<Result<Never, Error>> {\n        loop {\n            let messages = match self.pending_response.take() {\n                Some(messages) => messages,\n                None => {\n                    let message = match self.stream.poll_next_unpin(cx) {\n                        Poll::Pending => return Poll::Pending,\n                        Poll::Ready(None) => return Poll::Ready(Err(Error::closed())),\n                        Poll::Ready(Some(Err(e))) => return Poll::Ready(Err(Error::io(e))),\n                        Poll::Ready(Some(Ok(message))) => message,\n                    };\n\n                    match message {\n                        BackendMessage::Async(Message::NoticeResponse(body)) => {\n                            self.handle_notice(body)?;\n                            continue;\n                        }\n                        BackendMessage::Async(_) => continue,\n                        BackendMessage::Normal { messages, ready } => {\n                            // if we read a ReadyForQuery from postgres, let's try GC the read buffer.\n                            if ready {\n                                gc_bytesmut(self.stream.read_buffer_mut());\n                            }\n\n                            messages\n                        }\n                    }\n                }\n            };\n\n            match self.sender.poll_reserve(cx) {\n                Poll::Ready(Ok(())) => {\n                    let _ = self.sender.send_item(messages);\n                }\n                Poll::Ready(Err(_)) => {\n                    return Poll::Ready(Err(Error::closed()));\n                }\n                Poll::Pending => {\n                    self.pending_response = Some(messages);\n                    trace!(\"poll_read: waiting on sender\");\n                    return Poll::Pending;\n                }\n            }\n        }\n    }\n\n    fn handle_notice(&mut self, body: NoticeResponseBody) -> Result<(), Error> {\n        let Some(notices) = &mut self.notices else {\n            return Ok(());\n        };\n\n        let mut fields = body.fields();\n        while let Some(field) = fields.next().map_err(Error::parse)? {\n            // loop until we find the message field\n            if field.type_() == b'M' {\n                // if the message field is within the limit, send it.\n                if let Some(new_limit) = notices.limit.checked_sub(field.value().len()) {\n                    match notices.sender.send(field.value().into()) {\n                        // set the new limit.\n                        Ok(()) => notices.limit = new_limit,\n                        // closed.\n                        Err(_) => self.notices = None,\n                    }\n                }\n                break;\n            }\n        }\n\n        Ok(())\n    }\n\n    /// Fetch the next client request and enqueue the response sender.\n    fn poll_request(&mut self, cx: &mut Context<'_>) -> Poll<Option<FrontendMessage>> {\n        if self.receiver.is_closed() {\n            return Poll::Ready(None);\n        }\n\n        match self.receiver.poll_recv(cx) {\n            Poll::Ready(Some(request)) => {\n                trace!(\"polled new request\");\n                Poll::Ready(Some(request))\n            }\n            Poll::Ready(None) => Poll::Ready(None),\n            Poll::Pending => Poll::Pending,\n        }\n    }\n\n    /// Process client requests and write them to the postgres connection, flushing if necessary.\n    /// client -> postgres\n    fn poll_write(&mut self, cx: &mut Context<'_>) -> Poll<Result<(), Error>> {\n        loop {\n            if Pin::new(&mut self.stream)\n                .poll_ready(cx)\n                .map_err(Error::io)?\n                .is_pending()\n            {\n                trace!(\"poll_write: waiting on socket\");\n\n                // poll_ready is self-flushing.\n                return Poll::Pending;\n            }\n\n            match self.poll_request(cx) {\n                // send the message to postgres\n                Poll::Ready(Some(FrontendMessage::Raw(request))) => {\n                    Pin::new(&mut self.stream)\n                        .start_send(request)\n                        .map_err(Error::io)?;\n                }\n                Poll::Ready(Some(FrontendMessage::RecordNotices(notices))) => {\n                    self.notices = Some(notices)\n                }\n                // No more messages from the client, and no more responses to wait for.\n                // Send a terminate message to postgres\n                Poll::Ready(None) => {\n                    trace!(\"poll_write: at eof, terminating\");\n                    frontend::terminate(self.stream.write_buffer_mut());\n\n                    trace!(\"poll_write: sent eof, closing\");\n                    trace!(\"poll_write: done\");\n                    return Poll::Ready(Ok(()));\n                }\n                // Still waiting for a message from the client.\n                Poll::Pending => {\n                    trace!(\"poll_write: waiting on request\");\n                    ready!(self.poll_flush(cx))?;\n                    return Poll::Pending;\n                }\n            }\n        }\n    }\n\n    fn poll_flush(&mut self, cx: &mut Context<'_>) -> Poll<Result<(), Error>> {\n        match Pin::new(&mut self.stream)\n            .poll_flush(cx)\n            .map_err(Error::io)?\n        {\n            Poll::Ready(()) => {\n                trace!(\"poll_flush: flushed\");\n\n                // Since our codec prefers to share the buffer with the `Client`,\n                // if we don't release our share, then the `Client` would have to re-alloc\n                // the buffer when they next use it.\n                debug_assert!(self.stream.write_buffer().is_empty());\n                *self.stream.write_buffer_mut() = BytesMut::new();\n\n                Poll::Ready(Ok(()))\n            }\n            Poll::Pending => {\n                trace!(\"poll_flush: waiting on socket\");\n                Poll::Pending\n            }\n        }\n    }\n\n    fn poll_shutdown(&mut self, cx: &mut Context<'_>) -> Poll<Result<(), Error>> {\n        match Pin::new(&mut self.stream)\n            .poll_close(cx)\n            .map_err(Error::io)?\n        {\n            Poll::Ready(()) => {\n                trace!(\"poll_shutdown: complete\");\n                Poll::Ready(Ok(()))\n            }\n            Poll::Pending => {\n                trace!(\"poll_shutdown: waiting on socket\");\n                Poll::Pending\n            }\n        }\n    }\n\n    fn poll_message(&mut self, cx: &mut Context<'_>) -> Poll<Option<Result<Never, Error>>> {\n        if self.state != State::Closing {\n            // if the state is still active, try read from and write to postgres.\n            let Poll::Pending = self.poll_read(cx)?;\n            if self.poll_write(cx)?.is_ready() {\n                self.state = State::Closing;\n            }\n\n            // poll_read returned Pending.\n            // poll_write returned Pending or Ready(()).\n            // if poll_write returned Ready(()), then we are waiting to read more data from postgres.\n            if self.state != State::Closing {\n                return Poll::Pending;\n            }\n        }\n\n        match self.poll_shutdown(cx) {\n            Poll::Ready(Ok(())) => Poll::Ready(None),\n            Poll::Ready(Err(e)) => Poll::Ready(Some(Err(e))),\n            Poll::Pending => Poll::Pending,\n        }\n    }\n}\n\nimpl<S, T> Future for Connection<S, T>\nwhere\n    S: AsyncRead + AsyncWrite + Unpin,\n    T: AsyncRead + AsyncWrite + Unpin,\n{\n    type Output = Result<(), Error>;\n\n    fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Result<(), Error>> {\n        match self.poll_message(cx)? {\n            Poll::Ready(None) => Poll::Ready(Ok(())),\n            Poll::Pending => Poll::Pending,\n        }\n    }\n}\n"
  },
  {
    "path": "libs/proxy/tokio-postgres2/src/error/mod.rs",
    "content": "//! Errors.\n\nuse std::error::{self, Error as _Error};\nuse std::{fmt, io};\n\nuse fallible_iterator::FallibleIterator;\nuse postgres_protocol2::message::backend::{ErrorFields, ErrorResponseBody};\n\npub use self::sqlstate::*;\n\n#[allow(clippy::unreadable_literal)]\npub mod sqlstate;\n\n/// The severity of a Postgres error or notice.\n#[derive(Debug, Copy, Clone, PartialEq, Eq)]\npub enum Severity {\n    /// PANIC\n    Panic,\n    /// FATAL\n    Fatal,\n    /// ERROR\n    Error,\n    /// WARNING\n    Warning,\n    /// NOTICE\n    Notice,\n    /// DEBUG\n    Debug,\n    /// INFO\n    Info,\n    /// LOG\n    Log,\n}\n\nimpl fmt::Display for Severity {\n    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {\n        let s = match *self {\n            Severity::Panic => \"PANIC\",\n            Severity::Fatal => \"FATAL\",\n            Severity::Error => \"ERROR\",\n            Severity::Warning => \"WARNING\",\n            Severity::Notice => \"NOTICE\",\n            Severity::Debug => \"DEBUG\",\n            Severity::Info => \"INFO\",\n            Severity::Log => \"LOG\",\n        };\n        fmt.write_str(s)\n    }\n}\n\nimpl Severity {\n    fn from_str(s: &str) -> Option<Severity> {\n        match s {\n            \"PANIC\" => Some(Severity::Panic),\n            \"FATAL\" => Some(Severity::Fatal),\n            \"ERROR\" => Some(Severity::Error),\n            \"WARNING\" => Some(Severity::Warning),\n            \"NOTICE\" => Some(Severity::Notice),\n            \"DEBUG\" => Some(Severity::Debug),\n            \"INFO\" => Some(Severity::Info),\n            \"LOG\" => Some(Severity::Log),\n            _ => None,\n        }\n    }\n}\n\n/// A Postgres error or notice.\n#[derive(Debug, Clone, PartialEq, Eq)]\npub struct DbError {\n    severity: String,\n    parsed_severity: Option<Severity>,\n    code: SqlState,\n    message: String,\n    detail: Option<String>,\n    hint: Option<String>,\n    position: Option<ErrorPosition>,\n    where_: Option<String>,\n    schema: Option<String>,\n    table: Option<String>,\n    column: Option<String>,\n    datatype: Option<String>,\n    constraint: Option<String>,\n    file: Option<String>,\n    line: Option<u32>,\n    routine: Option<String>,\n}\n\nimpl DbError {\n    pub fn new_test_error(code: SqlState, message: String) -> Self {\n        DbError {\n            severity: \"ERROR\".to_string(),\n            parsed_severity: Some(Severity::Error),\n            code,\n            message,\n            detail: None,\n            hint: None,\n            position: None,\n            where_: None,\n            schema: None,\n            table: None,\n            column: None,\n            datatype: None,\n            constraint: None,\n            file: None,\n            line: None,\n            routine: None,\n        }\n    }\n\n    pub(crate) fn parse(fields: &mut ErrorFields<'_>) -> io::Result<DbError> {\n        let mut severity = None;\n        let mut parsed_severity = None;\n        let mut code = None;\n        let mut message = None;\n        let mut detail = None;\n        let mut hint = None;\n        let mut normal_position = None;\n        let mut internal_position = None;\n        let mut internal_query = None;\n        let mut where_ = None;\n        let mut schema = None;\n        let mut table = None;\n        let mut column = None;\n        let mut datatype = None;\n        let mut constraint = None;\n        let mut file = None;\n        let mut line = None;\n        let mut routine = None;\n\n        while let Some(field) = fields.next()? {\n            match field.type_() {\n                b'S' => severity = Some(field.value().to_owned()),\n                b'C' => code = Some(SqlState::from_code(field.value())),\n                b'M' => message = Some(field.value().to_owned()),\n                b'D' => detail = Some(field.value().to_owned()),\n                b'H' => hint = Some(field.value().to_owned()),\n                b'P' => {\n                    normal_position = Some(field.value().parse::<u32>().map_err(|_| {\n                        io::Error::new(\n                            io::ErrorKind::InvalidInput,\n                            \"`P` field did not contain an integer\",\n                        )\n                    })?);\n                }\n                b'p' => {\n                    internal_position = Some(field.value().parse::<u32>().map_err(|_| {\n                        io::Error::new(\n                            io::ErrorKind::InvalidInput,\n                            \"`p` field did not contain an integer\",\n                        )\n                    })?);\n                }\n                b'q' => internal_query = Some(field.value().to_owned()),\n                b'W' => where_ = Some(field.value().to_owned()),\n                b's' => schema = Some(field.value().to_owned()),\n                b't' => table = Some(field.value().to_owned()),\n                b'c' => column = Some(field.value().to_owned()),\n                b'd' => datatype = Some(field.value().to_owned()),\n                b'n' => constraint = Some(field.value().to_owned()),\n                b'F' => file = Some(field.value().to_owned()),\n                b'L' => {\n                    line = Some(field.value().parse::<u32>().map_err(|_| {\n                        io::Error::new(\n                            io::ErrorKind::InvalidInput,\n                            \"`L` field did not contain an integer\",\n                        )\n                    })?);\n                }\n                b'R' => routine = Some(field.value().to_owned()),\n                b'V' => {\n                    parsed_severity = Some(Severity::from_str(field.value()).ok_or_else(|| {\n                        io::Error::new(\n                            io::ErrorKind::InvalidInput,\n                            \"`V` field contained an invalid value\",\n                        )\n                    })?);\n                }\n                _ => {}\n            }\n        }\n\n        Ok(DbError {\n            severity: severity\n                .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidInput, \"`S` field missing\"))?,\n            parsed_severity,\n            code: code\n                .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidInput, \"`C` field missing\"))?,\n            message: message\n                .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidInput, \"`M` field missing\"))?,\n            detail,\n            hint,\n            position: match normal_position {\n                Some(position) => Some(ErrorPosition::Original(position)),\n                None => match internal_position {\n                    Some(position) => Some(ErrorPosition::Internal {\n                        position,\n                        query: internal_query.ok_or_else(|| {\n                            io::Error::new(\n                                io::ErrorKind::InvalidInput,\n                                \"`q` field missing but `p` field present\",\n                            )\n                        })?,\n                    }),\n                    None => None,\n                },\n            },\n            where_,\n            schema,\n            table,\n            column,\n            datatype,\n            constraint,\n            file,\n            line,\n            routine,\n        })\n    }\n\n    /// The field contents are ERROR, FATAL, or PANIC (in an error message),\n    /// or WARNING, NOTICE, DEBUG, INFO, or LOG (in a notice message), or a\n    /// localized translation of one of these.\n    pub fn severity(&self) -> &str {\n        &self.severity\n    }\n\n    /// A parsed, nonlocalized version of `severity`. (PostgreSQL 9.6+)\n    pub fn parsed_severity(&self) -> Option<Severity> {\n        self.parsed_severity\n    }\n\n    /// The SQLSTATE code for the error.\n    pub fn code(&self) -> &SqlState {\n        &self.code\n    }\n\n    /// The primary human-readable error message.\n    ///\n    /// This should be accurate but terse (typically one line).\n    pub fn message(&self) -> &str {\n        &self.message\n    }\n\n    /// An optional secondary error message carrying more detail about the\n    /// problem.\n    ///\n    /// Might run to multiple lines.\n    pub fn detail(&self) -> Option<&str> {\n        self.detail.as_deref()\n    }\n\n    /// An optional suggestion what to do about the problem.\n    ///\n    /// This is intended to differ from `detail` in that it offers advice\n    /// (potentially inappropriate) rather than hard facts. Might run to\n    /// multiple lines.\n    pub fn hint(&self) -> Option<&str> {\n        self.hint.as_deref()\n    }\n\n    /// An optional error cursor position into either the original query string\n    /// or an internally generated query.\n    pub fn position(&self) -> Option<&ErrorPosition> {\n        self.position.as_ref()\n    }\n\n    /// An indication of the context in which the error occurred.\n    ///\n    /// Presently this includes a call stack traceback of active procedural\n    /// language functions and internally-generated queries. The trace is one\n    /// entry per line, most recent first.\n    pub fn where_(&self) -> Option<&str> {\n        self.where_.as_deref()\n    }\n\n    /// If the error was associated with a specific database object, the name\n    /// of the schema containing that object, if any. (PostgreSQL 9.3+)\n    pub fn schema(&self) -> Option<&str> {\n        self.schema.as_deref()\n    }\n\n    /// If the error was associated with a specific table, the name of the\n    /// table. (Refer to the schema name field for the name of the table's\n    /// schema.) (PostgreSQL 9.3+)\n    pub fn table(&self) -> Option<&str> {\n        self.table.as_deref()\n    }\n\n    /// If the error was associated with a specific table column, the name of\n    /// the column.\n    ///\n    /// (Refer to the schema and table name fields to identify the table.)\n    /// (PostgreSQL 9.3+)\n    pub fn column(&self) -> Option<&str> {\n        self.column.as_deref()\n    }\n\n    /// If the error was associated with a specific data type, the name of the\n    /// data type. (Refer to the schema name field for the name of the data\n    /// type's schema.) (PostgreSQL 9.3+)\n    pub fn datatype(&self) -> Option<&str> {\n        self.datatype.as_deref()\n    }\n\n    /// If the error was associated with a specific constraint, the name of the\n    /// constraint.\n    ///\n    /// Refer to fields listed above for the associated table or domain.\n    /// (For this purpose, indexes are treated as constraints, even if they\n    /// weren't created with constraint syntax.) (PostgreSQL 9.3+)\n    pub fn constraint(&self) -> Option<&str> {\n        self.constraint.as_deref()\n    }\n\n    /// The file name of the source-code location where the error was reported.\n    pub fn file(&self) -> Option<&str> {\n        self.file.as_deref()\n    }\n\n    /// The line number of the source-code location where the error was\n    /// reported.\n    pub fn line(&self) -> Option<u32> {\n        self.line\n    }\n\n    /// The name of the source-code routine reporting the error.\n    pub fn routine(&self) -> Option<&str> {\n        self.routine.as_deref()\n    }\n}\n\nimpl fmt::Display for DbError {\n    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {\n        write!(fmt, \"{}: {}\", self.severity, self.message)?;\n        if let Some(detail) = &self.detail {\n            write!(fmt, \"\\nDETAIL: {detail}\")?;\n        }\n        if let Some(hint) = &self.hint {\n            write!(fmt, \"\\nHINT: {hint}\")?;\n        }\n        Ok(())\n    }\n}\n\nimpl error::Error for DbError {}\n\n/// Represents the position of an error in a query.\n#[derive(Clone, PartialEq, Eq, Debug)]\npub enum ErrorPosition {\n    /// A position in the original query.\n    Original(u32),\n    /// A position in an internally generated query.\n    Internal {\n        /// The byte position.\n        position: u32,\n        /// A query generated by the Postgres server.\n        query: String,\n    },\n}\n\n#[derive(Debug, PartialEq)]\nenum Kind {\n    Io,\n    UnexpectedMessage,\n    Tls,\n    ToSql(usize),\n    FromSql(usize),\n    Column(String),\n    Closed,\n    Db,\n    Parse,\n    Encode,\n    Authentication,\n    Config,\n    Connect,\n    Timeout,\n}\n\nstruct ErrorInner {\n    kind: Kind,\n    cause: Option<Box<dyn error::Error + Sync + Send>>,\n}\n\n/// An error communicating with the Postgres server.\npub struct Error(Box<ErrorInner>);\n\nimpl fmt::Debug for Error {\n    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {\n        fmt.debug_struct(\"Error\")\n            .field(\"kind\", &self.0.kind)\n            .field(\"cause\", &self.0.cause)\n            .finish()\n    }\n}\n\nimpl fmt::Display for Error {\n    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {\n        match &self.0.kind {\n            Kind::Io => fmt.write_str(\"error communicating with the server\")?,\n            Kind::UnexpectedMessage => fmt.write_str(\"unexpected message from server\")?,\n            Kind::Tls => fmt.write_str(\"error performing TLS handshake\")?,\n            Kind::ToSql(idx) => write!(fmt, \"error serializing parameter {idx}\")?,\n            Kind::FromSql(idx) => write!(fmt, \"error deserializing column {idx}\")?,\n            Kind::Column(column) => write!(fmt, \"invalid column `{column}`\")?,\n            Kind::Closed => fmt.write_str(\"connection closed\")?,\n            Kind::Db => fmt.write_str(\"db error\")?,\n            Kind::Parse => fmt.write_str(\"error parsing response from server\")?,\n            Kind::Encode => fmt.write_str(\"error encoding message to server\")?,\n            Kind::Authentication => fmt.write_str(\"authentication error\")?,\n            Kind::Config => fmt.write_str(\"invalid configuration\")?,\n            Kind::Connect => fmt.write_str(\"error connecting to server\")?,\n            Kind::Timeout => fmt.write_str(\"timeout waiting for server\")?,\n        };\n        if let Some(ref cause) = self.0.cause {\n            write!(fmt, \": {cause}\")?;\n        }\n        Ok(())\n    }\n}\n\nimpl error::Error for Error {\n    fn source(&self) -> Option<&(dyn error::Error + 'static)> {\n        self.0.cause.as_ref().map(|e| &**e as _)\n    }\n}\n\nimpl Error {\n    /// Consumes the error, returning its cause.\n    pub fn into_source(self) -> Option<Box<dyn error::Error + Sync + Send>> {\n        self.0.cause\n    }\n\n    /// Returns the source of this error if it was a `DbError`.\n    ///\n    /// This is a simple convenience method.\n    pub fn as_db_error(&self) -> Option<&DbError> {\n        self.source().and_then(|e| e.downcast_ref::<DbError>())\n    }\n\n    /// Determines if the error was associated with closed connection.\n    pub fn is_closed(&self) -> bool {\n        self.0.kind == Kind::Closed\n    }\n\n    /// Returns the SQLSTATE error code associated with the error.\n    ///\n    /// This is a convenience method that downcasts the cause to a `DbError` and returns its code.\n    pub fn code(&self) -> Option<&SqlState> {\n        self.as_db_error().map(DbError::code)\n    }\n\n    fn new(kind: Kind, cause: Option<Box<dyn error::Error + Sync + Send>>) -> Error {\n        Error(Box::new(ErrorInner { kind, cause }))\n    }\n\n    pub fn closed() -> Error {\n        Error::new(Kind::Closed, None)\n    }\n\n    pub fn unexpected_message() -> Error {\n        Error::new(Kind::UnexpectedMessage, None)\n    }\n\n    #[allow(clippy::needless_pass_by_value)]\n    pub fn db(error: ErrorResponseBody) -> Error {\n        match DbError::parse(&mut error.fields()) {\n            Ok(e) => Error::new(Kind::Db, Some(Box::new(e))),\n            Err(e) => Error::new(Kind::Parse, Some(Box::new(e))),\n        }\n    }\n\n    pub(crate) fn parse(e: io::Error) -> Error {\n        Error::new(Kind::Parse, Some(Box::new(e)))\n    }\n\n    pub(crate) fn encode(e: io::Error) -> Error {\n        Error::new(Kind::Encode, Some(Box::new(e)))\n    }\n\n    #[allow(clippy::wrong_self_convention)]\n    pub(crate) fn to_sql(e: Box<dyn error::Error + Sync + Send>, idx: usize) -> Error {\n        Error::new(Kind::ToSql(idx), Some(e))\n    }\n\n    pub(crate) fn from_sql(e: Box<dyn error::Error + Sync + Send>, idx: usize) -> Error {\n        Error::new(Kind::FromSql(idx), Some(e))\n    }\n\n    pub(crate) fn column(column: String) -> Error {\n        Error::new(Kind::Column(column), None)\n    }\n\n    pub(crate) fn tls(e: Box<dyn error::Error + Sync + Send>) -> Error {\n        Error::new(Kind::Tls, Some(e))\n    }\n\n    pub fn io(e: io::Error) -> Error {\n        Error::new(Kind::Io, Some(Box::new(e)))\n    }\n\n    pub(crate) fn authentication(e: Box<dyn error::Error + Sync + Send>) -> Error {\n        Error::new(Kind::Authentication, Some(e))\n    }\n\n    pub(crate) fn config(e: Box<dyn error::Error + Sync + Send>) -> Error {\n        Error::new(Kind::Config, Some(e))\n    }\n\n    pub(crate) fn connect(e: io::Error) -> Error {\n        Error::new(Kind::Connect, Some(Box::new(e)))\n    }\n\n    #[doc(hidden)]\n    pub fn __private_api_timeout() -> Error {\n        Error::new(Kind::Timeout, None)\n    }\n}\n"
  },
  {
    "path": "libs/proxy/tokio-postgres2/src/error/sqlstate.rs",
    "content": "//! Rust repr for <https://www.postgresql.org/docs/current/errcodes-appendix.html>\n\n/// A SQLSTATE error code\n#[derive(PartialEq, Eq, Clone, Debug)]\npub struct SqlState([u8; 5]);\n\nimpl SqlState {\n    /// Creates a `SqlState` from its error code.\n    pub fn from_code(s: &str) -> SqlState {\n        let mut code = [b'0'; 5];\n        if s.len() == 5 {\n            code.copy_from_slice(s.as_bytes());\n        }\n        SqlState(code)\n    }\n\n    /// Returns the error code corresponding to the `SqlState`.\n    pub fn code(&self) -> &str {\n        std::str::from_utf8(&self.0).unwrap()\n    }\n\n    // Class 08 - Connection Exception\n\n    /// 08000\n    pub const CONNECTION_EXCEPTION: SqlState = SqlState(*b\"08000\");\n\n    /// 08003\n    pub const CONNECTION_DOES_NOT_EXIST: SqlState = SqlState(*b\"08003\");\n\n    /// 08006\n    pub const CONNECTION_FAILURE: SqlState = SqlState(*b\"08006\");\n\n    /// 08001\n    pub const SQLCLIENT_UNABLE_TO_ESTABLISH_SQLCONNECTION: SqlState = SqlState(*b\"08001\");\n\n    /// 08P01\n    pub const PROTOCOL_VIOLATION: SqlState = SqlState(*b\"08P01\");\n\n    // Class 22 - Data Exception\n\n    /// 22023\n    pub const INVALID_PARAMETER_VALUE: SqlState = SqlState(*b\"22023\");\n\n    // Class 3D - Invalid Catalog Name\n\n    /// 3D000\n    pub const INVALID_CATALOG_NAME: SqlState = SqlState(*b\"3D000\");\n\n    // Class 3F - Invalid Schema Name\n\n    /// 3F000\n    pub const INVALID_SCHEMA_NAME: SqlState = SqlState(*b\"3F000\");\n\n    // Class 40 - Transaction Rollback\n\n    /// 40001\n    pub const T_R_SERIALIZATION_FAILURE: SqlState = SqlState(*b\"40001\");\n\n    // Class 42 - Syntax Error or Access Rule Violation\n\n    /// 42601\n    pub const SYNTAX_ERROR: SqlState = SqlState(*b\"42601\");\n\n    // Class 53 - Insufficient Resources\n\n    /// 53200\n    pub const OUT_OF_MEMORY: SqlState = SqlState(*b\"53200\");\n\n    /// 53300\n    pub const TOO_MANY_CONNECTIONS: SqlState = SqlState(*b\"53300\");\n\n    // Class 57 - Operator Intervention\n\n    /// 57014\n    pub const QUERY_CANCELED: SqlState = SqlState(*b\"57014\");\n}\n\n#[cfg(test)]\nmod tests {\n    use super::SqlState;\n\n    #[test]\n    fn round_trip() {\n        let state = SqlState::from_code(\"08P01\");\n        assert_eq!(state, SqlState::PROTOCOL_VIOLATION);\n        assert_eq!(state.code(), \"08P01\");\n    }\n}\n"
  },
  {
    "path": "libs/proxy/tokio-postgres2/src/generic_client.rs",
    "content": "#![allow(async_fn_in_trait)]\n\nuse crate::query::RowStream;\nuse crate::{Client, Error, Transaction};\n\nmod private {\n    pub trait Sealed {}\n}\n\n/// A trait allowing abstraction over connections and transactions.\n///\n/// This trait is \"sealed\", and cannot be implemented outside of this crate.\npub trait GenericClient: private::Sealed {\n    /// Like `Client::query_raw_txt`.\n    async fn query_raw_txt<S, I>(\n        &mut self,\n        statement: &str,\n        params: I,\n    ) -> Result<RowStream<'_>, Error>\n    where\n        S: AsRef<str> + Sync + Send,\n        I: IntoIterator<Item = Option<S>> + Sync + Send,\n        I::IntoIter: ExactSizeIterator + Sync + Send;\n}\n\nimpl private::Sealed for Client {}\n\nimpl GenericClient for Client {\n    async fn query_raw_txt<S, I>(\n        &mut self,\n        statement: &str,\n        params: I,\n    ) -> Result<RowStream<'_>, Error>\n    where\n        S: AsRef<str> + Sync + Send,\n        I: IntoIterator<Item = Option<S>> + Sync + Send,\n        I::IntoIter: ExactSizeIterator + Sync + Send,\n    {\n        self.query_raw_txt(statement, params).await\n    }\n}\n\nimpl private::Sealed for Transaction<'_> {}\n\nimpl GenericClient for Transaction<'_> {\n    async fn query_raw_txt<S, I>(\n        &mut self,\n        statement: &str,\n        params: I,\n    ) -> Result<RowStream<'_>, Error>\n    where\n        S: AsRef<str> + Sync + Send,\n        I: IntoIterator<Item = Option<S>> + Sync + Send,\n        I::IntoIter: ExactSizeIterator + Sync + Send,\n    {\n        self.query_raw_txt(statement, params).await\n    }\n}\n"
  },
  {
    "path": "libs/proxy/tokio-postgres2/src/lib.rs",
    "content": "//! An asynchronous, pipelined, PostgreSQL client.\n#![warn(clippy::all)]\n\nuse postgres_protocol2::message::backend::ReadyForQueryBody;\n\npub use crate::cancel_token::{CancelToken, RawCancelToken};\npub use crate::client::{Client, SocketConfig};\npub use crate::config::Config;\npub use crate::connection::Connection;\npub use crate::error::Error;\npub use crate::generic_client::GenericClient;\npub use crate::query::RowStream;\npub use crate::row::{Row, SimpleQueryRow};\npub use crate::simple_query::SimpleQueryStream;\npub use crate::statement::{Column, Statement};\npub use crate::tls::NoTls;\npub use crate::transaction::Transaction;\npub use crate::transaction_builder::{IsolationLevel, TransactionBuilder};\n\n/// After executing a query, the connection will be in one of these states\n#[derive(Clone, Copy, Debug, PartialEq)]\n#[repr(u8)]\npub enum ReadyForQueryStatus {\n    /// Connection state is unknown\n    Unknown,\n    /// Connection is idle (no transactions)\n    Idle = b'I',\n    /// Connection is in a transaction block\n    Transaction = b'T',\n    /// Connection is in a failed transaction block\n    FailedTransaction = b'E',\n}\n\nimpl From<ReadyForQueryBody> for ReadyForQueryStatus {\n    fn from(value: ReadyForQueryBody) -> Self {\n        match value.status() {\n            b'I' => Self::Idle,\n            b'T' => Self::Transaction,\n            b'E' => Self::FailedTransaction,\n            _ => Self::Unknown,\n        }\n    }\n}\n\nmod cancel_query;\nmod cancel_query_raw;\nmod cancel_token;\nmod client;\nmod codec;\npub mod config;\npub mod connect;\npub mod connect_raw;\nmod connect_socket;\nmod connect_tls;\nmod connection;\npub mod error;\nmod generic_client;\npub mod maybe_tls_stream;\nmod prepare;\nmod query;\npub mod row;\nmod simple_query;\nmod statement;\npub mod tls;\nmod transaction;\nmod transaction_builder;\npub mod types;\n\n/// An asynchronous notification.\n#[derive(Clone, Debug)]\npub struct Notification {\n    process_id: i32,\n    channel: String,\n    payload: String,\n}\n\nimpl Notification {\n    /// The process ID of the notifying backend process.\n    pub fn process_id(&self) -> i32 {\n        self.process_id\n    }\n\n    /// The name of the channel that the notify has been raised on.\n    pub fn channel(&self) -> &str {\n        &self.channel\n    }\n\n    /// The \"payload\" string passed from the notifying process.\n    pub fn payload(&self) -> &str {\n        &self.payload\n    }\n}\n\n/// Message returned by the `SimpleQuery` stream.\n#[derive(Debug)]\n#[non_exhaustive]\npub enum SimpleQueryMessage {\n    /// A row of data.\n    Row(SimpleQueryRow),\n    /// A statement in the query has completed.\n    ///\n    /// The number of rows modified or selected is returned.\n    CommandComplete(u64),\n}\n"
  },
  {
    "path": "libs/proxy/tokio-postgres2/src/maybe_tls_stream.rs",
    "content": "//! MaybeTlsStream.\n//!\n//! Represents a stream that may or may not be encrypted with TLS.\nuse std::io;\nuse std::pin::Pin;\nuse std::task::{Context, Poll};\n\nuse tokio::io::{AsyncRead, AsyncWrite, ReadBuf};\n\nuse crate::tls::{ChannelBinding, TlsStream};\n\n/// A stream that may or may not be encrypted with TLS.\npub enum MaybeTlsStream<S, T> {\n    /// An unencrypted stream.\n    Raw(S),\n    /// An encrypted stream.\n    Tls(T),\n}\n\nimpl<S, T> AsyncRead for MaybeTlsStream<S, T>\nwhere\n    S: AsyncRead + Unpin,\n    T: AsyncRead + Unpin,\n{\n    fn poll_read(\n        mut self: Pin<&mut Self>,\n        cx: &mut Context<'_>,\n        buf: &mut ReadBuf<'_>,\n    ) -> Poll<io::Result<()>> {\n        match &mut *self {\n            MaybeTlsStream::Raw(s) => Pin::new(s).poll_read(cx, buf),\n            MaybeTlsStream::Tls(s) => Pin::new(s).poll_read(cx, buf),\n        }\n    }\n}\n\nimpl<S, T> AsyncWrite for MaybeTlsStream<S, T>\nwhere\n    S: AsyncWrite + Unpin,\n    T: AsyncWrite + Unpin,\n{\n    fn poll_write(\n        mut self: Pin<&mut Self>,\n        cx: &mut Context<'_>,\n        buf: &[u8],\n    ) -> Poll<io::Result<usize>> {\n        match &mut *self {\n            MaybeTlsStream::Raw(s) => Pin::new(s).poll_write(cx, buf),\n            MaybeTlsStream::Tls(s) => Pin::new(s).poll_write(cx, buf),\n        }\n    }\n\n    fn poll_flush(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<io::Result<()>> {\n        match &mut *self {\n            MaybeTlsStream::Raw(s) => Pin::new(s).poll_flush(cx),\n            MaybeTlsStream::Tls(s) => Pin::new(s).poll_flush(cx),\n        }\n    }\n\n    fn poll_shutdown(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<io::Result<()>> {\n        match &mut *self {\n            MaybeTlsStream::Raw(s) => Pin::new(s).poll_shutdown(cx),\n            MaybeTlsStream::Tls(s) => Pin::new(s).poll_shutdown(cx),\n        }\n    }\n}\n\nimpl<S, T> TlsStream for MaybeTlsStream<S, T>\nwhere\n    S: AsyncRead + AsyncWrite + Unpin,\n    T: TlsStream + Unpin,\n{\n    fn channel_binding(&self) -> ChannelBinding {\n        match self {\n            MaybeTlsStream::Raw(_) => ChannelBinding::none(),\n            MaybeTlsStream::Tls(s) => s.channel_binding(),\n        }\n    }\n}\n"
  },
  {
    "path": "libs/proxy/tokio-postgres2/src/prepare.rs",
    "content": "use bytes::BytesMut;\nuse fallible_iterator::FallibleIterator;\nuse postgres_protocol2::IsNull;\nuse postgres_protocol2::message::backend::{Message, RowDescriptionBody};\nuse postgres_protocol2::message::frontend;\nuse postgres_protocol2::types::oid_to_sql;\nuse postgres_types2::Format;\n\nuse crate::client::{CachedTypeInfo, PartialQuery, Responses};\nuse crate::types::{Kind, Oid, Type};\nuse crate::{Column, Error, Row, Statement};\n\npub(crate) const TYPEINFO_QUERY: &str = \"\\\nSELECT t.typname, t.typtype, t.typelem, r.rngsubtype, t.typbasetype, n.nspname, t.typrelid\nFROM pg_catalog.pg_type t\nLEFT OUTER JOIN pg_catalog.pg_range r ON r.rngtypid = t.oid\nINNER JOIN pg_catalog.pg_namespace n ON t.typnamespace = n.oid\nWHERE t.oid = $1\n\";\n\n/// we need to make sure we close this prepared statement.\nstruct CloseStmt<'a, 'b> {\n    client: Option<&'a mut PartialQuery<'b>>,\n    name: &'static str,\n}\n\nimpl<'a> CloseStmt<'a, '_> {\n    fn close(mut self) -> Result<&'a mut Responses, Error> {\n        let client = self.client.take().unwrap();\n        client.send_with_flush(|buf| {\n            frontend::close(b'S', self.name, buf).map_err(Error::encode)?;\n            Ok(())\n        })\n    }\n}\n\nimpl Drop for CloseStmt<'_, '_> {\n    fn drop(&mut self) {\n        if let Some(client) = self.client.take() {\n            let _ = client.send_with_flush(|buf| {\n                frontend::close(b'S', self.name, buf).map_err(Error::encode)?;\n                Ok(())\n            });\n        }\n    }\n}\n\nasync fn prepare_typecheck(\n    client: &mut PartialQuery<'_>,\n    name: &'static str,\n    query: &str,\n) -> Result<Statement, Error> {\n    let responses = client.send_with_flush(|buf| {\n        frontend::parse(name, query, [], buf).map_err(Error::encode)?;\n        frontend::describe(b'S', name, buf).map_err(Error::encode)?;\n        Ok(())\n    })?;\n\n    match responses.next().await? {\n        Message::ParseComplete => {}\n        _ => return Err(Error::unexpected_message()),\n    }\n\n    match responses.next().await? {\n        Message::ParameterDescription(_) => {}\n        _ => return Err(Error::unexpected_message()),\n    };\n\n    let row_description = match responses.next().await? {\n        Message::RowDescription(body) => Some(body),\n        Message::NoData => None,\n        _ => return Err(Error::unexpected_message()),\n    };\n\n    let mut columns = vec![];\n    if let Some(row_description) = row_description {\n        let mut it = row_description.fields();\n        while let Some(field) = it.next().map_err(Error::parse)? {\n            let type_ = Type::from_oid(field.type_oid()).ok_or_else(Error::unexpected_message)?;\n            let column = Column::new(field.name().to_string(), type_, field);\n            columns.push(column);\n        }\n    }\n\n    Ok(Statement::new(name, columns))\n}\n\nfn try_from_cache(typecache: &CachedTypeInfo, oid: Oid) -> Option<Type> {\n    if let Some(type_) = Type::from_oid(oid) {\n        return Some(type_);\n    }\n\n    if let Some(type_) = typecache.types.get(&oid) {\n        return Some(type_.clone());\n    };\n\n    None\n}\n\npub async fn parse_row_description(\n    client: &mut PartialQuery<'_>,\n    typecache: &mut CachedTypeInfo,\n    row_description: Option<RowDescriptionBody>,\n) -> Result<Vec<Column>, Error> {\n    let mut columns = vec![];\n\n    if let Some(row_description) = row_description {\n        let mut it = row_description.fields();\n        while let Some(field) = it.next().map_err(Error::parse)? {\n            let type_ = try_from_cache(typecache, field.type_oid()).unwrap_or(Type::UNKNOWN);\n            let column = Column::new(field.name().to_string(), type_, field);\n            columns.push(column);\n        }\n    }\n\n    let all_known = columns.iter().all(|c| c.type_ != Type::UNKNOWN);\n    if all_known {\n        // all known, return early.\n        return Ok(columns);\n    }\n\n    let typeinfo = \"neon_proxy_typeinfo\";\n\n    // make sure to close the typeinfo statement before exiting.\n    let mut guard = CloseStmt {\n        name: typeinfo,\n        client: None,\n    };\n    let client = guard.client.insert(client);\n\n    // get the typeinfo statement.\n    let stmt = prepare_typecheck(client, typeinfo, TYPEINFO_QUERY).await?;\n\n    for column in &mut columns {\n        column.type_ = get_type(client, typecache, &stmt, column.type_oid()).await?;\n    }\n\n    // cancel the close guard.\n    let responses = guard.close()?;\n\n    match responses.next().await? {\n        Message::CloseComplete => {}\n        _ => return Err(Error::unexpected_message()),\n    }\n\n    Ok(columns)\n}\n\nasync fn get_type(\n    client: &mut PartialQuery<'_>,\n    typecache: &mut CachedTypeInfo,\n    stmt: &Statement,\n    mut oid: Oid,\n) -> Result<Type, Error> {\n    let mut stack = vec![];\n    let mut type_ = loop {\n        if let Some(type_) = try_from_cache(typecache, oid) {\n            break type_;\n        }\n\n        let row = exec(client, stmt, oid).await?;\n        if stack.len() > 8 {\n            return Err(Error::unexpected_message());\n        }\n\n        let name: String = row.try_get(0)?;\n        let type_: i8 = row.try_get(1)?;\n        let elem_oid: Oid = row.try_get(2)?;\n        let rngsubtype: Option<Oid> = row.try_get(3)?;\n        let basetype: Oid = row.try_get(4)?;\n        let schema: String = row.try_get(5)?;\n        let relid: Oid = row.try_get(6)?;\n\n        let kind = if type_ == b'e' as i8 {\n            Kind::Enum\n        } else if type_ == b'p' as i8 {\n            Kind::Pseudo\n        } else if basetype != 0 {\n            Kind::Domain(basetype)\n        } else if elem_oid != 0 {\n            stack.push((name, oid, schema));\n            oid = elem_oid;\n            continue;\n        } else if relid != 0 {\n            Kind::Composite(relid)\n        } else if let Some(rngsubtype) = rngsubtype {\n            Kind::Range(rngsubtype)\n        } else {\n            Kind::Simple\n        };\n\n        let type_ = Type::new(name, oid, kind, schema);\n        typecache.types.insert(oid, type_.clone());\n        break type_;\n    };\n\n    while let Some((name, oid, schema)) = stack.pop() {\n        type_ = Type::new(name, oid, Kind::Array(type_), schema);\n        typecache.types.insert(oid, type_.clone());\n    }\n\n    Ok(type_)\n}\n\n/// exec the typeinfo statement returning one row.\nasync fn exec(\n    client: &mut PartialQuery<'_>,\n    statement: &Statement,\n    param: Oid,\n) -> Result<Row, Error> {\n    let responses = client.send_with_flush(|buf| {\n        encode_bind(statement, param, \"\", buf);\n        frontend::execute(\"\", 0, buf).map_err(Error::encode)?;\n        Ok(())\n    })?;\n\n    match responses.next().await? {\n        Message::BindComplete => {}\n        _ => return Err(Error::unexpected_message()),\n    }\n\n    let row = match responses.next().await? {\n        Message::DataRow(body) => Row::new(statement.clone(), body, Format::Binary)?,\n        _ => return Err(Error::unexpected_message()),\n    };\n\n    match responses.next().await? {\n        Message::CommandComplete(_) => {}\n        _ => return Err(Error::unexpected_message()),\n    };\n\n    Ok(row)\n}\n\nfn encode_bind(statement: &Statement, param: Oid, portal: &str, buf: &mut BytesMut) {\n    frontend::bind(\n        portal,\n        statement.name(),\n        [Format::Binary as i16],\n        [param],\n        |param, buf| {\n            oid_to_sql(param, buf);\n            Ok(IsNull::No)\n        },\n        [Format::Binary as i16],\n        buf,\n    )\n    .unwrap();\n}\n"
  },
  {
    "path": "libs/proxy/tokio-postgres2/src/query.rs",
    "content": "use std::pin::Pin;\nuse std::task::{Context, Poll};\n\nuse bytes::BufMut;\nuse futures_util::{Stream, ready};\nuse postgres_protocol2::message::backend::Message;\nuse postgres_protocol2::message::frontend;\nuse postgres_types2::Format;\n\nuse crate::client::{CachedTypeInfo, InnerClient, Responses};\nuse crate::{Error, ReadyForQueryStatus, Row, Statement};\n\npub async fn query_txt<'a, S, I>(\n    client: &'a mut InnerClient,\n    typecache: &mut CachedTypeInfo,\n    query: &str,\n    params: I,\n) -> Result<RowStream<'a>, Error>\nwhere\n    S: AsRef<str>,\n    I: IntoIterator<Item = Option<S>>,\n    I::IntoIter: ExactSizeIterator,\n{\n    let params = params.into_iter();\n    let mut client = client.start()?;\n\n    // Flow:\n    // 1. Parse the query\n    // 2. Inspect the row description for OIDs\n    // 3. If there's any OIDs we don't already know about, perform the typeinfo routine\n    // 4. Execute the query\n    // 5. Sync.\n    //\n    // The typeinfo routine:\n    // 1. Parse the typeinfo query\n    // 2. Execute the query on each OID\n    // 3. If the result does not match an OID we know, repeat 2.\n\n    // parse the query and get type info\n    let responses = client.send_with_flush(|buf| {\n        frontend::parse(\n            \"\",                 // unnamed prepared statement\n            query,              // query to parse\n            std::iter::empty(), // give no type info\n            buf,\n        )\n        .map_err(Error::encode)?;\n        frontend::describe(b'S', \"\", buf).map_err(Error::encode)?;\n        Ok(())\n    })?;\n\n    match responses.next().await? {\n        Message::ParseComplete => {}\n        _ => return Err(Error::unexpected_message()),\n    }\n\n    match responses.next().await? {\n        Message::ParameterDescription(_) => {}\n        _ => return Err(Error::unexpected_message()),\n    };\n\n    let row_description = match responses.next().await? {\n        Message::RowDescription(body) => Some(body),\n        Message::NoData => None,\n        _ => return Err(Error::unexpected_message()),\n    };\n\n    let columns =\n        crate::prepare::parse_row_description(&mut client, typecache, row_description).await?;\n\n    let responses = client.send_with_sync(|buf| {\n        // Bind, pass params as text, retrieve as text\n        match frontend::bind(\n            \"\",                 // empty string selects the unnamed portal\n            \"\",                 // unnamed prepared statement\n            std::iter::empty(), // all parameters use the default format (text)\n            params,\n            |param, buf| match param {\n                Some(param) => {\n                    buf.put_slice(param.as_ref().as_bytes());\n                    Ok(postgres_protocol2::IsNull::No)\n                }\n                None => Ok(postgres_protocol2::IsNull::Yes),\n            },\n            Some(0), // all text\n            buf,\n        ) {\n            Ok(()) => Ok(()),\n            Err(frontend::BindError::Conversion(e)) => Err(Error::to_sql(e, 0)),\n            Err(frontend::BindError::Serialization(e)) => Err(Error::encode(e)),\n        }?;\n\n        // Execute\n        frontend::execute(\"\", 0, buf).map_err(Error::encode)?;\n\n        Ok(())\n    })?;\n\n    match responses.next().await? {\n        Message::BindComplete => {}\n        _ => return Err(Error::unexpected_message()),\n    }\n\n    Ok(RowStream {\n        responses,\n        statement: Statement::new(\"\", columns),\n        command_tag: None,\n        status: ReadyForQueryStatus::Unknown,\n        output_format: Format::Text,\n    })\n}\n\n/// A stream of table rows.\npub struct RowStream<'a> {\n    responses: &'a mut Responses,\n    output_format: Format,\n    pub statement: Statement,\n    pub command_tag: Option<String>,\n    pub status: ReadyForQueryStatus,\n}\n\nimpl Stream for RowStream<'_> {\n    type Item = Result<Row, Error>;\n\n    fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {\n        let this = self.get_mut();\n        loop {\n            match ready!(this.responses.poll_next(cx)?) {\n                Message::DataRow(body) => {\n                    return Poll::Ready(Some(Ok(Row::new(\n                        this.statement.clone(),\n                        body,\n                        this.output_format,\n                    )?)));\n                }\n                Message::EmptyQueryResponse | Message::PortalSuspended => {}\n                Message::CommandComplete(body) => {\n                    if let Ok(tag) = body.tag() {\n                        this.command_tag = Some(tag.to_string());\n                    }\n                }\n                Message::ReadyForQuery(status) => {\n                    this.status = status.into();\n                    return Poll::Ready(None);\n                }\n                _ => return Poll::Ready(Some(Err(Error::unexpected_message()))),\n            }\n        }\n    }\n}\n"
  },
  {
    "path": "libs/proxy/tokio-postgres2/src/row.rs",
    "content": "//! Rows.\n\nuse std::ops::Range;\nuse std::sync::Arc;\nuse std::{fmt, str};\n\nuse fallible_iterator::FallibleIterator;\nuse postgres_protocol2::message::backend::DataRowBody;\nuse postgres_types2::{Format, WrongFormat};\n\nuse crate::row::sealed::{AsName, Sealed};\nuse crate::simple_query::SimpleColumn;\nuse crate::statement::Column;\nuse crate::types::{FromSql, Type, WrongType};\nuse crate::{Error, Statement};\n\nmod sealed {\n    pub trait Sealed {}\n\n    pub trait AsName {\n        fn as_name(&self) -> &str;\n    }\n}\n\nimpl AsName for Column {\n    fn as_name(&self) -> &str {\n        self.name()\n    }\n}\n\nimpl AsName for String {\n    fn as_name(&self) -> &str {\n        self\n    }\n}\n\n/// A trait implemented by types that can index into columns of a row.\n///\n/// This cannot be implemented outside of this crate.\npub trait RowIndex: Sealed {\n    #[doc(hidden)]\n    fn __idx<T>(&self, columns: &[T]) -> Option<usize>\n    where\n        T: AsName;\n}\n\nimpl Sealed for usize {}\n\nimpl RowIndex for usize {\n    #[inline]\n    fn __idx<T>(&self, columns: &[T]) -> Option<usize>\n    where\n        T: AsName,\n    {\n        if *self >= columns.len() {\n            None\n        } else {\n            Some(*self)\n        }\n    }\n}\n\nimpl Sealed for str {}\n\nimpl RowIndex for str {\n    #[inline]\n    fn __idx<T>(&self, columns: &[T]) -> Option<usize>\n    where\n        T: AsName,\n    {\n        if let Some(idx) = columns.iter().position(|d| d.as_name() == self) {\n            return Some(idx);\n        };\n\n        // FIXME ASCII-only case insensitivity isn't really the right thing to\n        // do. Postgres itself uses a dubious wrapper around tolower and JDBC\n        // uses the US locale.\n        columns\n            .iter()\n            .position(|d| d.as_name().eq_ignore_ascii_case(self))\n    }\n}\n\nimpl<T> Sealed for &T where T: ?Sized + Sealed {}\n\nimpl<T> RowIndex for &T\nwhere\n    T: ?Sized + RowIndex,\n{\n    #[inline]\n    fn __idx<U>(&self, columns: &[U]) -> Option<usize>\n    where\n        U: AsName,\n    {\n        T::__idx(*self, columns)\n    }\n}\n\n/// A row of data returned from the database by a query.\npub struct Row {\n    statement: Statement,\n    output_format: Format,\n    body: DataRowBody,\n    ranges: Vec<Option<Range<usize>>>,\n}\n\nimpl fmt::Debug for Row {\n    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {\n        f.debug_struct(\"Row\")\n            .field(\"columns\", &self.columns())\n            .finish()\n    }\n}\n\nimpl Row {\n    pub(crate) fn new(\n        statement: Statement,\n        body: DataRowBody,\n        output_format: Format,\n    ) -> Result<Row, Error> {\n        let ranges = body.ranges().collect().map_err(Error::parse)?;\n        Ok(Row {\n            statement,\n            body,\n            ranges,\n            output_format,\n        })\n    }\n\n    /// Returns information about the columns of data in the row.\n    pub fn columns(&self) -> &[Column] {\n        self.statement.columns()\n    }\n\n    /// Determines if the row contains no values.\n    pub fn is_empty(&self) -> bool {\n        self.len() == 0\n    }\n\n    /// Returns the number of values in the row.\n    pub fn len(&self) -> usize {\n        self.columns().len()\n    }\n\n    /// Deserializes a value from the row.\n    ///\n    /// The value can be specified either by its numeric index in the row, or by its column name.\n    ///\n    /// # Panics\n    ///\n    /// Panics if the index is out of bounds or if the value cannot be converted to the specified type.\n    pub fn get<'a, I, T>(&'a self, idx: I) -> T\n    where\n        I: RowIndex + fmt::Display,\n        T: FromSql<'a>,\n    {\n        match self.get_inner(&idx) {\n            Ok(ok) => ok,\n            Err(err) => panic!(\"error retrieving column {idx}: {err}\"),\n        }\n    }\n\n    /// Like `Row::get`, but returns a `Result` rather than panicking.\n    pub fn try_get<'a, I, T>(&'a self, idx: I) -> Result<T, Error>\n    where\n        I: RowIndex + fmt::Display,\n        T: FromSql<'a>,\n    {\n        self.get_inner(&idx)\n    }\n\n    fn get_inner<'a, I, T>(&'a self, idx: &I) -> Result<T, Error>\n    where\n        I: RowIndex + fmt::Display,\n        T: FromSql<'a>,\n    {\n        let idx = match idx.__idx(self.columns()) {\n            Some(idx) => idx,\n            None => return Err(Error::column(idx.to_string())),\n        };\n\n        let ty = self.columns()[idx].type_();\n        if !T::accepts(ty) {\n            return Err(Error::from_sql(\n                Box::new(WrongType::new::<T>(ty.clone())),\n                idx,\n            ));\n        }\n\n        FromSql::from_sql_nullable(ty, self.col_buffer(idx)).map_err(|e| Error::from_sql(e, idx))\n    }\n\n    /// Get the raw bytes for the column at the given index.\n    fn col_buffer(&self, idx: usize) -> Option<&[u8]> {\n        let range = self.ranges.get(idx)?.to_owned()?;\n        Some(&self.body.buffer()[range])\n    }\n\n    /// Interpret the column at the given index as text\n    ///\n    /// Useful when using query_raw_txt() which sets text transfer mode\n    pub fn as_text(&self, idx: usize) -> Result<Option<&str>, Error> {\n        if self.output_format == Format::Text {\n            match self.col_buffer(idx) {\n                Some(raw) => {\n                    FromSql::from_sql(&Type::TEXT, raw).map_err(|e| Error::from_sql(e, idx))\n                }\n                None => Ok(None),\n            }\n        } else {\n            Err(Error::from_sql(Box::new(WrongFormat {}), idx))\n        }\n    }\n\n    /// Row byte size\n    pub fn body_len(&self) -> usize {\n        self.body.buffer().len()\n    }\n}\n\nimpl AsName for SimpleColumn {\n    fn as_name(&self) -> &str {\n        self.name()\n    }\n}\n\n/// A row of data returned from the database by a simple query.\n#[derive(Debug)]\npub struct SimpleQueryRow {\n    columns: Arc<[SimpleColumn]>,\n    body: DataRowBody,\n    ranges: Vec<Option<Range<usize>>>,\n}\n\nimpl SimpleQueryRow {\n    #[allow(clippy::new_ret_no_self)]\n    pub(crate) fn new(\n        columns: Arc<[SimpleColumn]>,\n        body: DataRowBody,\n    ) -> Result<SimpleQueryRow, Error> {\n        let ranges = body.ranges().collect().map_err(Error::parse)?;\n        Ok(SimpleQueryRow {\n            columns,\n            body,\n            ranges,\n        })\n    }\n\n    /// Returns information about the columns of data in the row.\n    pub fn columns(&self) -> &[SimpleColumn] {\n        &self.columns\n    }\n\n    /// Determines if the row contains no values.\n    pub fn is_empty(&self) -> bool {\n        self.len() == 0\n    }\n\n    /// Returns the number of values in the row.\n    pub fn len(&self) -> usize {\n        self.columns.len()\n    }\n\n    /// Returns a value from the row.\n    ///\n    /// The value can be specified either by its numeric index in the row, or by its column name.\n    ///\n    /// # Panics\n    ///\n    /// Panics if the index is out of bounds or if the value cannot be converted to the specified type.\n    pub fn get<I>(&self, idx: I) -> Option<&str>\n    where\n        I: RowIndex + fmt::Display,\n    {\n        match self.get_inner(&idx) {\n            Ok(ok) => ok,\n            Err(err) => panic!(\"error retrieving column {idx}: {err}\"),\n        }\n    }\n\n    /// Like `SimpleQueryRow::get`, but returns a `Result` rather than panicking.\n    pub fn try_get<I>(&self, idx: I) -> Result<Option<&str>, Error>\n    where\n        I: RowIndex + fmt::Display,\n    {\n        self.get_inner(&idx)\n    }\n\n    fn get_inner<I>(&self, idx: &I) -> Result<Option<&str>, Error>\n    where\n        I: RowIndex + fmt::Display,\n    {\n        let idx = match idx.__idx(&self.columns) {\n            Some(idx) => idx,\n            None => return Err(Error::column(idx.to_string())),\n        };\n\n        let buf = self.ranges[idx].clone().map(|r| &self.body.buffer()[r]);\n        FromSql::from_sql_nullable(&Type::TEXT, buf).map_err(|e| Error::from_sql(e, idx))\n    }\n}\n"
  },
  {
    "path": "libs/proxy/tokio-postgres2/src/simple_query.rs",
    "content": "use std::pin::Pin;\nuse std::sync::Arc;\nuse std::task::{Context, Poll};\n\nuse fallible_iterator::FallibleIterator;\nuse futures_util::{Stream, ready};\nuse pin_project_lite::pin_project;\nuse postgres_protocol2::message::backend::Message;\nuse tracing::debug;\n\nuse crate::client::{InnerClient, Responses};\nuse crate::{Error, ReadyForQueryStatus, SimpleQueryMessage, SimpleQueryRow};\n\n/// Information about a column of a single query row.\n#[derive(Debug)]\npub struct SimpleColumn {\n    name: String,\n}\n\nimpl SimpleColumn {\n    pub(crate) fn new(name: String) -> SimpleColumn {\n        SimpleColumn { name }\n    }\n\n    /// Returns the name of the column.\n    pub fn name(&self) -> &str {\n        &self.name\n    }\n}\n\npub async fn simple_query<'a>(\n    client: &'a mut InnerClient,\n    query: &str,\n) -> Result<SimpleQueryStream<'a>, Error> {\n    debug!(\"executing simple query: {}\", query);\n\n    let responses = client.send_simple_query(query)?;\n\n    Ok(SimpleQueryStream {\n        responses,\n        columns: None,\n        status: ReadyForQueryStatus::Unknown,\n    })\n}\n\npub async fn batch_execute(\n    client: &mut InnerClient,\n    query: &str,\n) -> Result<ReadyForQueryStatus, Error> {\n    debug!(\"executing statement batch: {}\", query);\n\n    let responses = client.send_simple_query(query)?;\n\n    loop {\n        match responses.next().await? {\n            Message::ReadyForQuery(status) => return Ok(status.into()),\n            Message::CommandComplete(_)\n            | Message::EmptyQueryResponse\n            | Message::RowDescription(_)\n            | Message::DataRow(_) => {}\n            _ => return Err(Error::unexpected_message()),\n        }\n    }\n}\n\npin_project! {\n    /// A stream of simple query results.\n    pub struct SimpleQueryStream<'a> {\n        responses: &'a mut Responses,\n        columns: Option<Arc<[SimpleColumn]>>,\n        status: ReadyForQueryStatus,\n    }\n}\n\nimpl SimpleQueryStream<'_> {\n    /// Returns if the connection is ready for querying, with the status of the connection.\n    ///\n    /// This might be available only after the stream has been exhausted.\n    pub fn ready_status(&self) -> ReadyForQueryStatus {\n        self.status\n    }\n}\n\nimpl Stream for SimpleQueryStream<'_> {\n    type Item = Result<SimpleQueryMessage, Error>;\n\n    fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {\n        let this = self.project();\n        loop {\n            match ready!(this.responses.poll_next(cx)?) {\n                Message::CommandComplete(body) => {\n                    let rows = body\n                        .tag()\n                        .map_err(Error::parse)?\n                        .rsplit(' ')\n                        .next()\n                        .unwrap()\n                        .parse()\n                        .unwrap_or(0);\n                    return Poll::Ready(Some(Ok(SimpleQueryMessage::CommandComplete(rows))));\n                }\n                Message::EmptyQueryResponse => {\n                    return Poll::Ready(Some(Ok(SimpleQueryMessage::CommandComplete(0))));\n                }\n                Message::RowDescription(body) => {\n                    let columns = body\n                        .fields()\n                        .map(|f| Ok(SimpleColumn::new(f.name().to_string())))\n                        .collect::<Vec<_>>()\n                        .map_err(Error::parse)?\n                        .into();\n\n                    *this.columns = Some(columns);\n                }\n                Message::DataRow(body) => {\n                    let row = match &this.columns {\n                        Some(columns) => SimpleQueryRow::new(columns.clone(), body)?,\n                        None => return Poll::Ready(Some(Err(Error::unexpected_message()))),\n                    };\n                    return Poll::Ready(Some(Ok(SimpleQueryMessage::Row(row))));\n                }\n                Message::ReadyForQuery(s) => {\n                    *this.status = s.into();\n                    return Poll::Ready(None);\n                }\n                _ => return Poll::Ready(Some(Err(Error::unexpected_message()))),\n            }\n        }\n    }\n}\n"
  },
  {
    "path": "libs/proxy/tokio-postgres2/src/statement.rs",
    "content": "use std::fmt;\nuse std::sync::Arc;\n\nuse crate::types::Type;\nuse postgres_protocol2::Oid;\nuse postgres_protocol2::message::backend::Field;\n\nstruct StatementInner {\n    name: &'static str,\n    columns: Vec<Column>,\n}\n\n/// A prepared statement.\n///\n/// Prepared statements can only be used with the connection that created them.\n#[derive(Clone)]\npub struct Statement(Arc<StatementInner>);\n\nimpl Statement {\n    pub(crate) fn new(name: &'static str, columns: Vec<Column>) -> Statement {\n        Statement(Arc::new(StatementInner { name, columns }))\n    }\n\n    pub(crate) fn name(&self) -> &str {\n        self.0.name\n    }\n\n    /// Returns information about the columns returned when the statement is queried.\n    pub fn columns(&self) -> &[Column] {\n        &self.0.columns\n    }\n}\n\n/// Information about a column of a query.\npub struct Column {\n    name: String,\n    pub(crate) type_: Type,\n\n    // raw fields from RowDescription\n    table_oid: Oid,\n    column_id: i16,\n    format: i16,\n\n    // that better be stored in self.type_, but that is more radical refactoring\n    type_oid: Oid,\n    type_size: i16,\n    type_modifier: i32,\n}\n\nimpl Column {\n    pub(crate) fn new(name: String, type_: Type, raw_field: Field<'_>) -> Column {\n        Column {\n            name,\n            type_,\n            table_oid: raw_field.table_oid(),\n            column_id: raw_field.column_id(),\n            format: raw_field.format(),\n            type_oid: raw_field.type_oid(),\n            type_size: raw_field.type_size(),\n            type_modifier: raw_field.type_modifier(),\n        }\n    }\n\n    /// Returns the name of the column.\n    pub fn name(&self) -> &str {\n        &self.name\n    }\n\n    /// Returns the type of the column.\n    pub fn type_(&self) -> &Type {\n        &self.type_\n    }\n\n    /// Returns the table OID of the column.\n    pub fn table_oid(&self) -> Oid {\n        self.table_oid\n    }\n\n    /// Returns the column ID of the column.\n    pub fn column_id(&self) -> i16 {\n        self.column_id\n    }\n\n    /// Returns the format of the column.\n    pub fn format(&self) -> i16 {\n        self.format\n    }\n\n    /// Returns the type OID of the column.\n    pub fn type_oid(&self) -> Oid {\n        self.type_oid\n    }\n\n    /// Returns the type size of the column.\n    pub fn type_size(&self) -> i16 {\n        self.type_size\n    }\n\n    /// Returns the type modifier of the column.\n    pub fn type_modifier(&self) -> i32 {\n        self.type_modifier\n    }\n}\n\nimpl fmt::Debug for Column {\n    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {\n        fmt.debug_struct(\"Column\")\n            .field(\"name\", &self.name)\n            .field(\"type\", &self.type_)\n            .finish()\n    }\n}\n"
  },
  {
    "path": "libs/proxy/tokio-postgres2/src/tls.rs",
    "content": "//! TLS support.\n\nuse std::error::Error;\nuse std::future::Future;\nuse std::pin::Pin;\nuse std::task::{Context, Poll};\nuse std::{fmt, io};\n\nuse tokio::io::{AsyncRead, AsyncWrite, ReadBuf};\n\npub(crate) mod private {\n    pub struct ForcePrivateApi;\n}\n\n/// Channel binding information returned from a TLS handshake.\npub struct ChannelBinding {\n    pub(crate) tls_server_end_point: Option<Vec<u8>>,\n}\n\nimpl ChannelBinding {\n    /// Creates a `ChannelBinding` containing no information.\n    pub fn none() -> ChannelBinding {\n        ChannelBinding {\n            tls_server_end_point: None,\n        }\n    }\n\n    /// Creates a `ChannelBinding` containing `tls-server-end-point` channel binding information.\n    pub fn tls_server_end_point(tls_server_end_point: Vec<u8>) -> ChannelBinding {\n        ChannelBinding {\n            tls_server_end_point: Some(tls_server_end_point),\n        }\n    }\n}\n\n/// A constructor of `TlsConnect`ors.\n///\n/// Requires the `runtime` Cargo feature (enabled by default).\npub trait MakeTlsConnect<S> {\n    /// The stream type created by the `TlsConnect` implementation.\n    type Stream: TlsStream + Unpin;\n    /// The `TlsConnect` implementation created by this type.\n    type TlsConnect: TlsConnect<S, Stream = Self::Stream>;\n    /// The error type returned by the `TlsConnect` implementation.\n    type Error: Into<Box<dyn Error + Sync + Send>>;\n\n    /// Creates a new `TlsConnect`or.\n    ///\n    /// The domain name is provided for certificate verification and SNI.\n    fn make_tls_connect(&self, domain: &str) -> Result<Self::TlsConnect, Self::Error>;\n}\n\n/// An asynchronous function wrapping a stream in a TLS session.\npub trait TlsConnect<S> {\n    /// The stream returned by the future.\n    type Stream: TlsStream + Unpin;\n    /// The error returned by the future.\n    type Error: Into<Box<dyn Error + Sync + Send>>;\n    /// The future returned by the connector.\n    type Future: Future<Output = Result<Self::Stream, Self::Error>>;\n\n    /// Returns a future performing a TLS handshake over the stream.\n    fn connect(self, stream: S) -> Self::Future;\n\n    #[doc(hidden)]\n    fn can_connect(&self, _: private::ForcePrivateApi) -> bool {\n        true\n    }\n}\n\n/// A TLS-wrapped connection to a PostgreSQL database.\npub trait TlsStream: AsyncRead + AsyncWrite {\n    /// Returns channel binding information for the session.\n    fn channel_binding(&self) -> ChannelBinding;\n}\n\n/// A `MakeTlsConnect` and `TlsConnect` implementation which simply returns an error.\n///\n/// This can be used when `sslmode` is `none` or `prefer`.\n#[derive(Debug, Copy, Clone)]\npub struct NoTls;\n\nimpl<S> MakeTlsConnect<S> for NoTls {\n    type Stream = NoTlsStream;\n    type TlsConnect = NoTls;\n    type Error = NoTlsError;\n\n    fn make_tls_connect(&self, _: &str) -> Result<NoTls, NoTlsError> {\n        Ok(NoTls)\n    }\n}\n\nimpl<S> TlsConnect<S> for NoTls {\n    type Stream = NoTlsStream;\n    type Error = NoTlsError;\n    type Future = NoTlsFuture;\n\n    fn connect(self, _: S) -> NoTlsFuture {\n        NoTlsFuture(())\n    }\n\n    fn can_connect(&self, _: private::ForcePrivateApi) -> bool {\n        false\n    }\n}\n\n/// The future returned by `NoTls`.\npub struct NoTlsFuture(());\n\nimpl Future for NoTlsFuture {\n    type Output = Result<NoTlsStream, NoTlsError>;\n\n    fn poll(self: Pin<&mut Self>, _: &mut Context<'_>) -> Poll<Self::Output> {\n        Poll::Ready(Err(NoTlsError(())))\n    }\n}\n\n/// The TLS \"stream\" type produced by the `NoTls` connector.\n///\n/// Since `NoTls` doesn't support TLS, this type is uninhabited.\npub enum NoTlsStream {}\n\nimpl AsyncRead for NoTlsStream {\n    fn poll_read(\n        self: Pin<&mut Self>,\n        _: &mut Context<'_>,\n        _: &mut ReadBuf<'_>,\n    ) -> Poll<io::Result<()>> {\n        match *self {}\n    }\n}\n\nimpl AsyncWrite for NoTlsStream {\n    fn poll_write(self: Pin<&mut Self>, _: &mut Context<'_>, _: &[u8]) -> Poll<io::Result<usize>> {\n        match *self {}\n    }\n\n    fn poll_flush(self: Pin<&mut Self>, _: &mut Context<'_>) -> Poll<io::Result<()>> {\n        match *self {}\n    }\n\n    fn poll_shutdown(self: Pin<&mut Self>, _: &mut Context<'_>) -> Poll<io::Result<()>> {\n        match *self {}\n    }\n}\n\nimpl TlsStream for NoTlsStream {\n    fn channel_binding(&self) -> ChannelBinding {\n        match *self {}\n    }\n}\n\n/// The error returned by `NoTls`.\n#[derive(Debug)]\npub struct NoTlsError(());\n\nimpl fmt::Display for NoTlsError {\n    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {\n        fmt.write_str(\"no TLS implementation configured\")\n    }\n}\n\nimpl Error for NoTlsError {}\n"
  },
  {
    "path": "libs/proxy/tokio-postgres2/src/transaction.rs",
    "content": "use crate::query::RowStream;\nuse crate::{CancelToken, Client, Error, ReadyForQueryStatus};\n\n/// A representation of a PostgreSQL database transaction.\n///\n/// Transactions will implicitly roll back when dropped. Use the `commit` method to commit the changes made in the\n/// transaction. Transactions can be nested, with inner transactions implemented via safepoints.\npub struct Transaction<'a> {\n    client: &'a mut Client,\n    done: bool,\n}\n\nimpl Drop for Transaction<'_> {\n    fn drop(&mut self) {\n        if self.done {\n            return;\n        }\n\n        let _ = self.client.inner_mut().send_simple_query(\"ROLLBACK\");\n    }\n}\n\nimpl<'a> Transaction<'a> {\n    pub(crate) fn new(client: &'a mut Client) -> Transaction<'a> {\n        Transaction {\n            client,\n            done: false,\n        }\n    }\n\n    /// Consumes the transaction, committing all changes made within it.\n    pub async fn commit(mut self) -> Result<ReadyForQueryStatus, Error> {\n        self.done = true;\n        self.client.batch_execute(\"COMMIT\").await\n    }\n\n    /// Rolls the transaction back, discarding all changes made within it.\n    ///\n    /// This is equivalent to `Transaction`'s `Drop` implementation, but provides any error encountered to the caller.\n    pub async fn rollback(mut self) -> Result<ReadyForQueryStatus, Error> {\n        self.done = true;\n        self.client.batch_execute(\"ROLLBACK\").await\n    }\n\n    /// Like `Client::query_raw_txt`.\n    pub async fn query_raw_txt<S, I>(\n        &mut self,\n        statement: &str,\n        params: I,\n    ) -> Result<RowStream<'_>, Error>\n    where\n        S: AsRef<str>,\n        I: IntoIterator<Item = Option<S>>,\n        I::IntoIter: ExactSizeIterator,\n    {\n        self.client.query_raw_txt(statement, params).await\n    }\n\n    /// Like `Client::cancel_token`.\n    pub fn cancel_token(&self) -> CancelToken {\n        self.client.cancel_token()\n    }\n\n    /// Returns a reference to the underlying `Client`.\n    pub fn client(&self) -> &Client {\n        self.client\n    }\n\n    /// Returns a reference to the underlying `Client`.\n    pub fn client_mut(&mut self) -> &mut Client {\n        self.client\n    }\n}\n"
  },
  {
    "path": "libs/proxy/tokio-postgres2/src/transaction_builder.rs",
    "content": "use crate::{Client, Error, Transaction};\n\n/// The isolation level of a database transaction.\n#[derive(Debug, Copy, Clone)]\n#[non_exhaustive]\npub enum IsolationLevel {\n    /// Equivalent to `ReadCommitted`.\n    ReadUncommitted,\n\n    /// An individual statement in the transaction will see rows committed before it began.\n    ReadCommitted,\n\n    /// All statements in the transaction will see the same view of rows committed before the first query in the\n    /// transaction.\n    RepeatableRead,\n\n    /// The reads and writes in this transaction must be able to be committed as an atomic \"unit\" with respect to reads\n    /// and writes of all other concurrent serializable transactions without interleaving.\n    Serializable,\n}\n\n/// A builder for database transactions.\npub struct TransactionBuilder<'a> {\n    client: &'a mut Client,\n    isolation_level: Option<IsolationLevel>,\n    read_only: Option<bool>,\n    deferrable: Option<bool>,\n}\n\nimpl<'a> TransactionBuilder<'a> {\n    pub(crate) fn new(client: &'a mut Client) -> TransactionBuilder<'a> {\n        TransactionBuilder {\n            client,\n            isolation_level: None,\n            read_only: None,\n            deferrable: None,\n        }\n    }\n\n    /// Sets the isolation level of the transaction.\n    pub fn isolation_level(mut self, isolation_level: IsolationLevel) -> Self {\n        self.isolation_level = Some(isolation_level);\n        self\n    }\n\n    /// Sets the access mode of the transaction.\n    pub fn read_only(mut self, read_only: bool) -> Self {\n        self.read_only = Some(read_only);\n        self\n    }\n\n    /// Sets the deferrability of the transaction.\n    ///\n    /// If the transaction is also serializable and read only, creation of the transaction may block, but when it\n    /// completes the transaction is able to run with less overhead and a guarantee that it will not be aborted due to\n    /// serialization failure.\n    pub fn deferrable(mut self, deferrable: bool) -> Self {\n        self.deferrable = Some(deferrable);\n        self\n    }\n\n    /// Begins the transaction.\n    ///\n    /// The transaction will roll back by default - use the `commit` method to commit it.\n    pub async fn start(self) -> Result<Transaction<'a>, Error> {\n        let mut query = \"START TRANSACTION\".to_string();\n        let mut first = true;\n\n        if let Some(level) = self.isolation_level {\n            first = false;\n\n            query.push_str(\" ISOLATION LEVEL \");\n            let level = match level {\n                IsolationLevel::ReadUncommitted => \"READ UNCOMMITTED\",\n                IsolationLevel::ReadCommitted => \"READ COMMITTED\",\n                IsolationLevel::RepeatableRead => \"REPEATABLE READ\",\n                IsolationLevel::Serializable => \"SERIALIZABLE\",\n            };\n            query.push_str(level);\n        }\n\n        if let Some(read_only) = self.read_only {\n            if !first {\n                query.push(',');\n            }\n            first = false;\n\n            let s = if read_only {\n                \" READ ONLY\"\n            } else {\n                \" READ WRITE\"\n            };\n            query.push_str(s);\n        }\n\n        if let Some(deferrable) = self.deferrable {\n            if !first {\n                query.push(',');\n            }\n\n            let s = if deferrable {\n                \" DEFERRABLE\"\n            } else {\n                \" NOT DEFERRABLE\"\n            };\n            query.push_str(s);\n        }\n\n        self.client.batch_execute(&query).await?;\n\n        Ok(Transaction::new(self.client))\n    }\n}\n"
  },
  {
    "path": "libs/proxy/tokio-postgres2/src/types.rs",
    "content": "//! Types.\n//!\n//! This module is a reexport of the `postgres_types` crate.\n\n#[doc(inline)]\npub use postgres_types2::*;\n"
  },
  {
    "path": "libs/remote_storage/Cargo.toml",
    "content": "[package]\nname = \"remote_storage\"\nversion = \"0.1.0\"\nedition = \"2024\"\nlicense.workspace = true\n\n[dependencies]\nanyhow.workspace = true\nasync-trait.workspace = true\nasync-stream.workspace = true\nonce_cell.workspace = true\naws-smithy-async.workspace = true\naws-smithy-types.workspace = true\naws-config.workspace = true\naws-sdk-s3.workspace = true\nbase64.workspace = true\nbytes.workspace = true\ncamino = { workspace = true, features = [\"serde1\"] }\nhumantime-serde.workspace = true\nhyper = { workspace = true, features = [\"client\"] }\nfutures.workspace = true\nreqwest = { workspace = true, features = [\"multipart\", \"stream\"] }\nchrono = { version = \"0.4\", default-features = false, features = [\"clock\"] }\nserde.workspace = true\nserde_json.workspace = true\ntokio = { workspace = true, features = [\"sync\", \"fs\", \"io-util\"] }\ntokio-stream.workspace = true\ntokio-util = { workspace = true, features = [\"compat\"] }\ntoml_edit.workspace = true\ntracing.workspace = true\nscopeguard.workspace = true\nmetrics.workspace = true\nutils = { path = \"../utils\", default-features = false }\npin-project-lite.workspace = true\n\nazure_core.workspace = true\nazure_identity.workspace = true\nazure_storage.workspace = true\nazure_storage_blobs.workspace = true\nfutures-util.workspace = true\nhttp-types.workspace = true\nhttp-body-util.workspace = true\nitertools.workspace = true\nsync_wrapper = { workspace = true, features = [\"futures\"] }\ngcp_auth = \"0.12.3\"\nurl.workspace = true\nhttp.workspace = true\nuuid.workspace = true\n\nbyteorder = \"1.4\"\nrand.workspace = true\n\n[dev-dependencies]\ncamino-tempfile.workspace = true\ntest-context.workspace = true\nrand.workspace = true\ntokio = { workspace = true, features = [\"test-util\"] }\n"
  },
  {
    "path": "libs/remote_storage/src/azure_blob.rs",
    "content": "//! Azure Blob Storage wrapper\n\nuse std::borrow::Cow;\nuse std::collections::HashMap;\nuse std::fmt::Display;\nuse std::num::NonZeroU32;\nuse std::pin::Pin;\nuse std::str::FromStr;\nuse std::sync::Arc;\nuse std::time::{Duration, SystemTime};\nuse std::{env, io};\n\nuse anyhow::{Context, Result, anyhow};\nuse azure_core::request_options::{IfMatchCondition, MaxResults, Metadata, Range};\nuse azure_core::{Continuable, HttpClient, RetryOptions, TransportOptions};\nuse azure_storage::StorageCredentials;\nuse azure_storage_blobs::blob::BlobBlockType;\nuse azure_storage_blobs::blob::BlockList;\nuse azure_storage_blobs::blob::{Blob, CopyStatus};\nuse azure_storage_blobs::container::operations::ListBlobsBuilder;\nuse azure_storage_blobs::prelude::ClientBuilder;\nuse azure_storage_blobs::{blob::operations::GetBlobBuilder, prelude::ContainerClient};\nuse base64::{Engine as _, engine::general_purpose::URL_SAFE};\nuse byteorder::{BigEndian, ByteOrder};\nuse bytes::Bytes;\nuse camino::Utf8Path;\nuse futures::FutureExt;\nuse futures::future::Either;\nuse futures::stream::Stream;\nuse futures_util::{StreamExt, TryStreamExt};\nuse http_types::{StatusCode, Url};\nuse scopeguard::ScopeGuard;\nuse tokio::fs::File;\nuse tokio::io::AsyncReadExt;\nuse tokio::io::AsyncSeekExt;\nuse tokio_util::sync::CancellationToken;\nuse tracing::debug;\nuse utils::backoff;\nuse utils::backoff::exponential_backoff_duration_seconds;\n\nuse super::REMOTE_STORAGE_PREFIX_SEPARATOR;\nuse crate::config::AzureConfig;\nuse crate::error::Cancelled;\nuse crate::metrics::{AttemptOutcome, RequestKind, start_measuring_requests};\nuse crate::{\n    ConcurrencyLimiter, Download, DownloadError, DownloadKind, DownloadOpts, Listing, ListingMode,\n    ListingObject, RemotePath, RemoteStorage, StorageMetadata, TimeTravelError, TimeoutOrCancel,\n    Version, VersionKind,\n};\n\npub struct AzureBlobStorage {\n    client: ContainerClient,\n    container_name: String,\n    prefix_in_container: Option<String>,\n    max_keys_per_list_response: Option<NonZeroU32>,\n    concurrency_limiter: ConcurrencyLimiter,\n    // Per-request timeout. Accessible for tests.\n    pub timeout: Duration,\n\n    // Alternative timeout used for metadata objects which are expected to be small\n    pub small_timeout: Duration,\n    /* BEGIN_HADRON */\n    pub put_block_size_mb: Option<usize>,\n    /* END_HADRON */\n}\n\nimpl AzureBlobStorage {\n    pub fn new(\n        azure_config: &AzureConfig,\n        timeout: Duration,\n        small_timeout: Duration,\n    ) -> Result<Self> {\n        debug!(\n            \"Creating azure remote storage for azure container {}\",\n            azure_config.container_name\n        );\n\n        // Use the storage account from the config by default, fall back to env var if not present.\n        let account = azure_config.storage_account.clone().unwrap_or_else(|| {\n            env::var(\"AZURE_STORAGE_ACCOUNT\").expect(\"missing AZURE_STORAGE_ACCOUNT\")\n        });\n\n        // If the `AZURE_STORAGE_ACCESS_KEY` env var has an access key, use that,\n        // otherwise try the token based credentials.\n        let credentials = if let Ok(access_key) = env::var(\"AZURE_STORAGE_ACCESS_KEY\") {\n            StorageCredentials::access_key(account.clone(), access_key)\n        } else {\n            let token_credential = azure_identity::create_default_credential()\n                .context(\"trying to obtain Azure default credentials\")?;\n            StorageCredentials::token_credential(token_credential)\n        };\n\n        let builder = ClientBuilder::new(account, credentials)\n            // we have an outer retry\n            .retry(RetryOptions::none())\n            // Customize transport to configure conneciton pooling\n            .transport(TransportOptions::new(Self::reqwest_client(\n                azure_config.conn_pool_size,\n            )));\n\n        let client = builder.container_client(azure_config.container_name.to_owned());\n\n        let max_keys_per_list_response =\n            if let Some(limit) = azure_config.max_keys_per_list_response {\n                Some(\n                    NonZeroU32::new(limit as u32)\n                        .ok_or_else(|| anyhow::anyhow!(\"max_keys_per_list_response can't be 0\"))?,\n                )\n            } else {\n                None\n            };\n\n        Ok(AzureBlobStorage {\n            client,\n            container_name: azure_config.container_name.to_owned(),\n            prefix_in_container: azure_config.prefix_in_container.to_owned(),\n            max_keys_per_list_response,\n            concurrency_limiter: ConcurrencyLimiter::new(azure_config.concurrency_limit.get()),\n            timeout,\n            small_timeout,\n            /* BEGIN_HADRON */\n            put_block_size_mb: azure_config.put_block_size_mb,\n            /* END_HADRON */\n        })\n    }\n\n    fn reqwest_client(conn_pool_size: usize) -> Arc<dyn HttpClient> {\n        let client = reqwest::ClientBuilder::new()\n            .pool_max_idle_per_host(conn_pool_size)\n            .build()\n            .expect(\"failed to build `reqwest` client\");\n        Arc::new(client)\n    }\n\n    pub fn relative_path_to_name(&self, path: &RemotePath) -> String {\n        assert_eq!(std::path::MAIN_SEPARATOR, REMOTE_STORAGE_PREFIX_SEPARATOR);\n        let path_string = path.get_path().as_str();\n        match &self.prefix_in_container {\n            Some(prefix) => {\n                if prefix.ends_with(REMOTE_STORAGE_PREFIX_SEPARATOR) {\n                    prefix.clone() + path_string\n                } else {\n                    format!(\"{prefix}{REMOTE_STORAGE_PREFIX_SEPARATOR}{path_string}\")\n                }\n            }\n            None => path_string.to_string(),\n        }\n    }\n\n    fn name_to_relative_path(&self, key: &str) -> RemotePath {\n        let relative_path =\n            match key.strip_prefix(self.prefix_in_container.as_deref().unwrap_or_default()) {\n                Some(stripped) => stripped,\n                // we rely on Azure to return properly prefixed paths\n                // for requests with a certain prefix\n                None => panic!(\n                    \"Key {key} does not start with container prefix {:?}\",\n                    self.prefix_in_container\n                ),\n            };\n        RemotePath(\n            relative_path\n                .split(REMOTE_STORAGE_PREFIX_SEPARATOR)\n                .collect(),\n        )\n    }\n\n    async fn download_for_builder(\n        &self,\n        builder: GetBlobBuilder,\n        timeout: Duration,\n        cancel: &CancellationToken,\n    ) -> Result<Download, DownloadError> {\n        let kind = RequestKind::Get;\n\n        let _permit = self.permit(kind, cancel).await?;\n        let cancel_or_timeout = crate::support::cancel_or_timeout(self.timeout, cancel.clone());\n        let cancel_or_timeout_ = crate::support::cancel_or_timeout(self.timeout, cancel.clone());\n\n        let mut etag = None;\n        let mut last_modified = None;\n        let mut metadata = HashMap::new();\n\n        let started_at = start_measuring_requests(kind);\n\n        let download = async {\n            let response = builder\n                // convert to concrete Pageable\n                .into_stream()\n                // convert to TryStream\n                .into_stream()\n                .map_err(to_download_error);\n\n            // apply per request timeout\n            let response = tokio_stream::StreamExt::timeout(response, timeout);\n\n            // flatten\n            let response = response.map(|res| match res {\n                Ok(res) => res,\n                Err(_elapsed) => Err(DownloadError::Timeout),\n            });\n\n            let mut response = Box::pin(response);\n\n            let Some(part) = response.next().await else {\n                return Err(DownloadError::Other(anyhow::anyhow!(\n                    \"Azure GET response contained no response body\"\n                )));\n            };\n            let part = part?;\n            if etag.is_none() {\n                etag = Some(part.blob.properties.etag);\n            }\n            if last_modified.is_none() {\n                last_modified = Some(part.blob.properties.last_modified.into());\n            }\n            if let Some(blob_meta) = part.blob.metadata {\n                metadata.extend(blob_meta.iter().map(|(k, v)| (k.to_owned(), v.to_owned())));\n            }\n\n            // unwrap safety: if these were None, bufs would be empty and we would have returned an error already\n            let etag = etag.unwrap();\n            let last_modified = last_modified.unwrap();\n\n            let tail_stream = response\n                .map(|part| match part {\n                    Ok(part) => Either::Left(part.data.map(|r| r.map_err(io::Error::other))),\n                    Err(e) => {\n                        Either::Right(futures::stream::once(async { Err(io::Error::other(e)) }))\n                    }\n                })\n                .flatten();\n            let stream = part\n                .data\n                .map(|r| r.map_err(io::Error::other))\n                .chain(sync_wrapper::SyncStream::new(tail_stream));\n            //.chain(SyncStream::from_pin(Box::pin(tail_stream)));\n\n            let download_stream = crate::support::DownloadStream::new(cancel_or_timeout_, stream);\n\n            Ok(Download {\n                download_stream: Box::pin(download_stream),\n                etag,\n                last_modified,\n                metadata: Some(StorageMetadata(metadata)),\n            })\n        };\n\n        let download = tokio::select! {\n            bufs = download => bufs,\n            cancel_or_timeout = cancel_or_timeout => match cancel_or_timeout {\n                TimeoutOrCancel::Timeout => return Err(DownloadError::Timeout),\n                TimeoutOrCancel::Cancel => return Err(DownloadError::Cancelled),\n            },\n        };\n        let started_at = ScopeGuard::into_inner(started_at);\n        let outcome = match &download {\n            Ok(_) => AttemptOutcome::Ok,\n            // At this level in the stack 404 and 304 responses do not indicate an error.\n            // There's expected cases when a blob may not exist or hasn't been modified since\n            // the last get (e.g. probing for timeline indices and heatmap downloads).\n            // Callers should handle errors if they are unexpected.\n            Err(DownloadError::NotFound | DownloadError::Unmodified) => AttemptOutcome::Ok,\n            Err(_) => AttemptOutcome::Err,\n        };\n        crate::metrics::BUCKET_METRICS\n            .req_seconds\n            .observe_elapsed(kind, outcome, started_at);\n        download\n    }\n\n    fn list_streaming_for_fn<T: Default + ListingCollector>(\n        &self,\n        prefix: Option<&RemotePath>,\n        mode: ListingMode,\n        max_keys: Option<NonZeroU32>,\n        cancel: &CancellationToken,\n        request_kind: RequestKind,\n        customize_builder: impl Fn(ListBlobsBuilder) -> ListBlobsBuilder,\n    ) -> impl Stream<Item = Result<T, DownloadError>> {\n        // get the passed prefix or if it is not set use prefix_in_bucket value\n        let list_prefix = prefix.map(|p| self.relative_path_to_name(p)).or_else(|| {\n            self.prefix_in_container.clone().map(|mut s| {\n                if !s.ends_with(REMOTE_STORAGE_PREFIX_SEPARATOR) {\n                    s.push(REMOTE_STORAGE_PREFIX_SEPARATOR);\n                }\n                s\n            })\n        });\n\n        async_stream::stream! {\n            let _permit = self.permit(request_kind, cancel).await?;\n\n            let mut builder = self.client.list_blobs();\n\n            if let ListingMode::WithDelimiter = mode {\n                builder = builder.delimiter(REMOTE_STORAGE_PREFIX_SEPARATOR.to_string());\n            }\n\n            if let Some(prefix) = list_prefix {\n                builder = builder.prefix(Cow::from(prefix.to_owned()));\n            }\n\n            if let Some(limit) = self.max_keys_per_list_response {\n                builder = builder.max_results(MaxResults::new(limit));\n            }\n\n            builder = customize_builder(builder);\n\n            let mut next_marker = None;\n\n            let mut timeout_try_cnt = 1;\n\n            'outer: loop {\n                let mut builder = builder.clone();\n                if let Some(marker) = next_marker.clone() {\n                    builder = builder.marker(marker);\n                }\n                // Azure Blob Rust SDK does not expose the list blob API directly. Users have to use\n                // their pageable iterator wrapper that returns all keys as a stream. We want to have\n                // full control of paging, and therefore we only take the first item from the stream.\n                let mut response_stream = builder.into_stream();\n                let response = response_stream.next();\n                // Timeout mechanism: Azure client will sometimes stuck on a request, but retrying that request\n                // would immediately succeed. Therefore, we use exponential backoff timeout to retry the request.\n                // (Usually, exponential backoff is used to determine the sleep time between two retries.) We\n                // start with 10.0 second timeout, and double the timeout for each failure, up to 5 failures.\n                // timeout = min(5 * (1.0+1.0)^n, self.timeout).\n                let this_timeout = (5.0 * exponential_backoff_duration_seconds(timeout_try_cnt, 1.0, self.timeout.as_secs_f64())).min(self.timeout.as_secs_f64());\n                let response = tokio::time::timeout(Duration::from_secs_f64(this_timeout), response);\n                let response = response.map(|res| {\n                    match res {\n                        Ok(Some(Ok(res))) => Ok(Some(res)),\n                        Ok(Some(Err(e)))  => Err(to_download_error(e)),\n                        Ok(None) => Ok(None),\n                        Err(_elasped) => Err(DownloadError::Timeout),\n                    }\n                });\n                let mut max_keys = max_keys.map(|mk| mk.get());\n                let next_item = tokio::select! {\n                    op = response => op,\n                    _ = cancel.cancelled() => Err(DownloadError::Cancelled),\n                };\n\n                if let Err(DownloadError::Timeout) = &next_item {\n                    timeout_try_cnt += 1;\n                    if timeout_try_cnt <= 5 {\n                        continue 'outer;\n                    }\n                }\n\n                let next_item = match next_item {\n                    Ok(next_item) => next_item,\n                    Err(e) => {\n                        // The error is potentially retryable, so we must rewind the loop after yielding.\n                        yield Err(e);\n                        continue 'outer;\n                    },\n                };\n\n                // Log a warning if we saw two timeouts in a row before a successful request\n                if timeout_try_cnt > 2 {\n                    tracing::warn!(\"Azure Blob Storage list timed out and succeeded after {} tries\", timeout_try_cnt);\n                }\n                timeout_try_cnt = 1;\n\n                let Some(entry) = next_item else {\n                    // The list is complete, so yield it.\n                    break;\n                };\n\n                let mut res = T::default();\n                next_marker = entry.continuation();\n                let prefix_iter = entry\n                    .blobs\n                    .prefixes()\n                    .map(|prefix| self.name_to_relative_path(&prefix.name));\n                res.add_prefixes(self, prefix_iter);\n\n                let blob_iter = entry\n                    .blobs\n                    .blobs();\n\n                for key in blob_iter {\n                    res.add_blob(self, key);\n\n                    if let Some(mut mk) = max_keys {\n                        assert!(mk > 0);\n                        mk -= 1;\n                        if mk == 0 {\n                            yield Ok(res); // limit reached\n                            break 'outer;\n                        }\n                        max_keys = Some(mk);\n                    }\n                }\n                yield Ok(res);\n\n                // We are done here\n                if next_marker.is_none() {\n                    break;\n                }\n            }\n        }\n    }\n\n    async fn permit(\n        &self,\n        kind: RequestKind,\n        cancel: &CancellationToken,\n    ) -> Result<tokio::sync::SemaphorePermit<'_>, Cancelled> {\n        let acquire = self.concurrency_limiter.acquire(kind);\n\n        tokio::select! {\n            permit = acquire => Ok(permit.expect(\"never closed\")),\n            _ = cancel.cancelled() => Err(Cancelled),\n        }\n    }\n\n    pub fn container_name(&self) -> &str {\n        &self.container_name\n    }\n\n    async fn list_versions_with_permit(\n        &self,\n        _permit: &tokio::sync::SemaphorePermit<'_>,\n        prefix: Option<&RemotePath>,\n        mode: ListingMode,\n        max_keys: Option<NonZeroU32>,\n        cancel: &CancellationToken,\n    ) -> Result<crate::VersionListing, DownloadError> {\n        let customize_builder = |mut builder: ListBlobsBuilder| {\n            builder = builder.include_versions(true);\n            // We do not return this info back to `VersionListing` yet.\n            builder = builder.include_deleted(true);\n            builder\n        };\n        let kind = RequestKind::ListVersions;\n\n        let mut stream = std::pin::pin!(self.list_streaming_for_fn(\n            prefix,\n            mode,\n            max_keys,\n            cancel,\n            kind,\n            customize_builder\n        ));\n        let mut combined: crate::VersionListing =\n            stream.next().await.expect(\"At least one item required\")?;\n        while let Some(list) = stream.next().await {\n            let list = list?;\n            combined.versions.extend(list.versions.into_iter());\n        }\n        Ok(combined)\n    }\n}\n\ntrait ListingCollector {\n    fn add_prefixes(&mut self, abs: &AzureBlobStorage, prefix_it: impl Iterator<Item = RemotePath>);\n    fn add_blob(&mut self, abs: &AzureBlobStorage, blob: &Blob);\n}\n\nimpl ListingCollector for Listing {\n    fn add_prefixes(\n        &mut self,\n        _abs: &AzureBlobStorage,\n        prefix_it: impl Iterator<Item = RemotePath>,\n    ) {\n        self.prefixes.extend(prefix_it);\n    }\n    fn add_blob(&mut self, abs: &AzureBlobStorage, blob: &Blob) {\n        self.keys.push(ListingObject {\n            key: abs.name_to_relative_path(&blob.name),\n            last_modified: blob.properties.last_modified.into(),\n            size: blob.properties.content_length,\n        });\n    }\n}\n\nimpl ListingCollector for crate::VersionListing {\n    fn add_prefixes(\n        &mut self,\n        _abs: &AzureBlobStorage,\n        _prefix_it: impl Iterator<Item = RemotePath>,\n    ) {\n        // nothing\n    }\n    fn add_blob(&mut self, abs: &AzureBlobStorage, blob: &Blob) {\n        let id = crate::VersionId(blob.version_id.clone().expect(\"didn't find version ID\"));\n        self.versions.push(crate::Version {\n            key: abs.name_to_relative_path(&blob.name),\n            last_modified: blob.properties.last_modified.into(),\n            kind: crate::VersionKind::Version(id),\n        });\n    }\n}\n\nfn to_azure_metadata(metadata: StorageMetadata) -> Metadata {\n    let mut res = Metadata::new();\n    for (k, v) in metadata.0.into_iter() {\n        res.insert(k, v);\n    }\n    res\n}\n\nfn to_download_error(error: azure_core::Error) -> DownloadError {\n    if let Some(http_err) = error.as_http_error() {\n        match http_err.status() {\n            StatusCode::NotFound => DownloadError::NotFound,\n            StatusCode::NotModified => DownloadError::Unmodified,\n            StatusCode::BadRequest => DownloadError::BadInput(anyhow::Error::new(error)),\n            _ => DownloadError::Other(anyhow::Error::new(error)),\n        }\n    } else {\n        DownloadError::Other(error.into())\n    }\n}\n\nimpl RemoteStorage for AzureBlobStorage {\n    fn list_streaming(\n        &self,\n        prefix: Option<&RemotePath>,\n        mode: ListingMode,\n        max_keys: Option<NonZeroU32>,\n        cancel: &CancellationToken,\n    ) -> impl Stream<Item = Result<Listing, DownloadError>> {\n        let customize_builder = |builder| builder;\n        let kind = RequestKind::ListVersions;\n        self.list_streaming_for_fn(prefix, mode, max_keys, cancel, kind, customize_builder)\n    }\n\n    async fn list_versions(\n        &self,\n        prefix: Option<&RemotePath>,\n        mode: ListingMode,\n        max_keys: Option<NonZeroU32>,\n        cancel: &CancellationToken,\n    ) -> std::result::Result<crate::VersionListing, DownloadError> {\n        let kind = RequestKind::ListVersions;\n        let permit = self.permit(kind, cancel).await?;\n        self.list_versions_with_permit(&permit, prefix, mode, max_keys, cancel)\n            .await\n    }\n\n    async fn head_object(\n        &self,\n        key: &RemotePath,\n        cancel: &CancellationToken,\n    ) -> Result<ListingObject, DownloadError> {\n        let kind = RequestKind::Head;\n        let _permit = self.permit(kind, cancel).await?;\n\n        let started_at = start_measuring_requests(kind);\n\n        let blob_client = self.client.blob_client(self.relative_path_to_name(key));\n        let properties_future = blob_client.get_properties().into_future();\n\n        let properties_future = tokio::time::timeout(self.small_timeout, properties_future);\n\n        let res = tokio::select! {\n            res = properties_future => res,\n            _ = cancel.cancelled() => return Err(TimeoutOrCancel::Cancel.into()),\n        };\n\n        if let Ok(inner) = &res {\n            // do not incl. timeouts as errors in metrics but cancellations\n            let started_at = ScopeGuard::into_inner(started_at);\n            crate::metrics::BUCKET_METRICS\n                .req_seconds\n                .observe_elapsed(kind, inner, started_at);\n        }\n\n        let data = match res {\n            Ok(Ok(data)) => Ok(data),\n            Ok(Err(sdk)) => Err(to_download_error(sdk)),\n            Err(_timeout) => Err(DownloadError::Timeout),\n        }?;\n\n        let properties = data.blob.properties;\n        Ok(ListingObject {\n            key: key.to_owned(),\n            last_modified: SystemTime::from(properties.last_modified),\n            size: properties.content_length,\n        })\n    }\n\n    async fn upload(\n        &self,\n        from: impl Stream<Item = std::io::Result<Bytes>> + Send + Sync + 'static,\n        data_size_bytes: usize,\n        to: &RemotePath,\n        metadata: Option<StorageMetadata>,\n        cancel: &CancellationToken,\n    ) -> anyhow::Result<()> {\n        let kind = RequestKind::Put;\n        let _permit = self.permit(kind, cancel).await?;\n\n        let started_at = start_measuring_requests(kind);\n\n        let mut metadata_map = metadata.unwrap_or([].into());\n        let timeline_file_path = metadata_map.0.remove(\"databricks_azure_put_block\");\n\n        /* BEGIN_HADRON */\n        let op = async move {\n            let blob_client = self.client.blob_client(self.relative_path_to_name(to));\n            let put_block_size = self.put_block_size_mb.unwrap_or(0) * 1024 * 1024;\n            if timeline_file_path.is_none() || put_block_size == 0 {\n                // Use put_block_blob directly.\n                let from: Pin<\n                    Box<dyn Stream<Item = std::io::Result<Bytes>> + Send + Sync + 'static>,\n                > = Box::pin(from);\n                let from = NonSeekableStream::new(from, data_size_bytes);\n                let body = azure_core::Body::SeekableStream(Box::new(from));\n\n                let mut builder = blob_client.put_block_blob(body);\n                if !metadata_map.0.is_empty() {\n                    builder = builder.metadata(to_azure_metadata(metadata_map));\n                }\n                let fut = builder.into_future();\n                let fut = tokio::time::timeout(self.timeout, fut);\n                let result = fut.await;\n                match result {\n                    Ok(Ok(_response)) => return Ok(()),\n                    Ok(Err(azure)) => return Err(azure.into()),\n                    Err(_timeout) => return Err(TimeoutOrCancel::Timeout.into()),\n                };\n            }\n            // Upload chunks concurrently using Put Block.\n            // Each PutBlock uploads put_block_size bytes of the file.\n            let mut upload_futures: Vec<tokio::task::JoinHandle<Result<(), azure_core::Error>>> =\n                vec![];\n            let mut block_list = BlockList::default();\n            let mut start_bytes = 0u64;\n            let mut remaining_bytes = data_size_bytes;\n            let mut block_list_count = 0;\n\n            while remaining_bytes > 0 {\n                let block_size = std::cmp::min(remaining_bytes, put_block_size);\n                let end_bytes = start_bytes + block_size as u64;\n                let block_id = block_list_count;\n                let timeout = self.timeout;\n                let blob_client = blob_client.clone();\n                let timeline_file = timeline_file_path.clone().unwrap().clone();\n\n                let mut encoded_block_id = [0u8; 8];\n                BigEndian::write_u64(&mut encoded_block_id, block_id);\n                URL_SAFE.encode(encoded_block_id);\n\n                // Put one block.\n                let part_fut = async move {\n                    let mut file = File::open(Utf8Path::new(&timeline_file.clone())).await?;\n                    file.seek(io::SeekFrom::Start(start_bytes)).await?;\n                    let limited_reader = file.take(block_size as u64);\n                    let file_chunk_stream =\n                        tokio_util::io::ReaderStream::with_capacity(limited_reader, 1024 * 1024);\n                    let file_chunk_stream_pin: Pin<\n                        Box<dyn Stream<Item = std::io::Result<Bytes>> + Send + Sync + 'static>,\n                    > = Box::pin(file_chunk_stream);\n                    let stream_wrapper = NonSeekableStream::new(file_chunk_stream_pin, block_size);\n                    let body = azure_core::Body::SeekableStream(Box::new(stream_wrapper));\n                    // Azure put block takes URL-encoded block ids and all blocks must have the same byte length.\n                    // https://learn.microsoft.com/en-us/rest/api/storageservices/put-block?tabs=microsoft-entra-id#uri-parameters\n                    let builder = blob_client.put_block(encoded_block_id.to_vec(), body);\n                    let fut = builder.into_future();\n                    let fut = tokio::time::timeout(timeout, fut);\n                    let result = fut.await;\n                    tracing::debug!(\n                        \"azure put block id-{} size {} start {} end {} file {} response {:#?}\",\n                        block_id,\n                        block_size,\n                        start_bytes,\n                        end_bytes,\n                        timeline_file,\n                        result\n                    );\n                    match result {\n                        Ok(Ok(_response)) => Ok(()),\n                        Ok(Err(azure)) => Err(azure),\n                        Err(_timeout) => Err(azure_core::Error::new(\n                            azure_core::error::ErrorKind::Io,\n                            std::io::Error::new(\n                                std::io::ErrorKind::TimedOut,\n                                \"Operation timed out\",\n                            ),\n                        )),\n                    }\n                };\n                upload_futures.push(tokio::spawn(part_fut));\n\n                block_list_count += 1;\n                remaining_bytes -= block_size;\n                start_bytes += block_size as u64;\n\n                block_list\n                    .blocks\n                    .push(BlobBlockType::Uncommitted(encoded_block_id.to_vec().into()));\n            }\n\n            tracing::debug!(\n                \"azure put blocks {} total MB: {} chunk size MB: {}\",\n                block_list_count,\n                data_size_bytes / 1024 / 1024,\n                put_block_size / 1024 / 1024\n            );\n            // Wait for all blocks to be uploaded.\n            let upload_results = futures::future::try_join_all(upload_futures).await;\n            if upload_results.is_err() {\n                return Err(anyhow::anyhow!(format!(\n                    \"Failed to upload all blocks {:#?}\",\n                    upload_results.unwrap_err()\n                )));\n            }\n\n            // Commit the blocks.\n            let mut builder = blob_client.put_block_list(block_list);\n            if !metadata_map.0.is_empty() {\n                builder = builder.metadata(to_azure_metadata(metadata_map));\n            }\n            let fut = builder.into_future();\n            let fut = tokio::time::timeout(self.timeout, fut);\n            let result = fut.await;\n            tracing::debug!(\"azure put block list response {:#?}\", result);\n\n            match result {\n                Ok(Ok(_response)) => Ok(()),\n                Ok(Err(azure)) => Err(azure.into()),\n                Err(_timeout) => Err(TimeoutOrCancel::Timeout.into()),\n            }\n        };\n        /* END_HADRON */\n\n        let res = tokio::select! {\n            res = op => res,\n            _ = cancel.cancelled() => return Err(TimeoutOrCancel::Cancel.into()),\n        };\n\n        let outcome = match res {\n            Ok(_) => AttemptOutcome::Ok,\n            Err(_) => AttemptOutcome::Err,\n        };\n        let started_at = ScopeGuard::into_inner(started_at);\n        crate::metrics::BUCKET_METRICS\n            .req_seconds\n            .observe_elapsed(kind, outcome, started_at);\n        res\n    }\n\n    async fn download(\n        &self,\n        from: &RemotePath,\n        opts: &DownloadOpts,\n        cancel: &CancellationToken,\n    ) -> Result<Download, DownloadError> {\n        let blob_client = self.client.blob_client(self.relative_path_to_name(from));\n\n        let mut builder = blob_client.get();\n\n        if let Some(ref etag) = opts.etag {\n            builder = builder.if_match(IfMatchCondition::NotMatch(etag.to_string()));\n        }\n\n        if let Some(ref version_id) = opts.version_id {\n            let version_id = azure_storage_blobs::prelude::VersionId::new(version_id.0.clone());\n            builder = builder.blob_versioning(version_id);\n        }\n\n        if let Some((start, end)) = opts.byte_range() {\n            builder = builder.range(match end {\n                Some(end) => Range::Range(start..end),\n                None => Range::RangeFrom(start..),\n            });\n        }\n\n        let timeout = match opts.kind {\n            DownloadKind::Small => self.small_timeout,\n            DownloadKind::Large => self.timeout,\n        };\n\n        self.download_for_builder(builder, timeout, cancel).await\n    }\n\n    async fn delete(&self, path: &RemotePath, cancel: &CancellationToken) -> anyhow::Result<()> {\n        self.delete_objects(std::array::from_ref(path), cancel)\n            .await\n    }\n\n    async fn delete_objects(\n        &self,\n        paths: &[RemotePath],\n        cancel: &CancellationToken,\n    ) -> anyhow::Result<()> {\n        let kind = RequestKind::Delete;\n        let _permit = self.permit(kind, cancel).await?;\n        let started_at = start_measuring_requests(kind);\n\n        let op = async {\n            // TODO batch requests are not supported by the SDK\n            // https://github.com/Azure/azure-sdk-for-rust/issues/1068\n            for path in paths {\n                #[derive(Debug)]\n                enum AzureOrTimeout {\n                    AzureError(azure_core::Error),\n                    Timeout,\n                    Cancel,\n                }\n                impl Display for AzureOrTimeout {\n                    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n                        write!(f, \"{self:?}\")\n                    }\n                }\n                let warn_threshold = 3;\n                let max_retries = 5;\n                backoff::retry(\n                    || async {\n                        let blob_client = self.client.blob_client(self.relative_path_to_name(path));\n\n                        let request = blob_client.delete().into_future();\n\n                        let res = tokio::time::timeout(self.timeout, request).await;\n\n                        match res {\n                            Ok(Ok(_v)) => Ok(()),\n                            Ok(Err(azure_err)) => {\n                                if let Some(http_err) = azure_err.as_http_error() {\n                                    if http_err.status() == StatusCode::NotFound {\n                                        return Ok(());\n                                    }\n                                }\n                                Err(AzureOrTimeout::AzureError(azure_err))\n                            }\n                            Err(_elapsed) => Err(AzureOrTimeout::Timeout),\n                        }\n                    },\n                    |err| match err {\n                        AzureOrTimeout::AzureError(_) | AzureOrTimeout::Timeout => false,\n                        AzureOrTimeout::Cancel => true,\n                    },\n                    warn_threshold,\n                    max_retries,\n                    \"deleting remote object\",\n                    cancel,\n                )\n                .await\n                .ok_or_else(|| AzureOrTimeout::Cancel)\n                .and_then(|x| x)\n                .map_err(|e| match e {\n                    AzureOrTimeout::AzureError(err) => anyhow::Error::from(err),\n                    AzureOrTimeout::Timeout => TimeoutOrCancel::Timeout.into(),\n                    AzureOrTimeout::Cancel => TimeoutOrCancel::Cancel.into(),\n                })?;\n            }\n            Ok(())\n        };\n\n        let res = tokio::select! {\n            res = op => res,\n            _ = cancel.cancelled() => return Err(TimeoutOrCancel::Cancel.into()),\n        };\n\n        let started_at = ScopeGuard::into_inner(started_at);\n        crate::metrics::BUCKET_METRICS\n            .req_seconds\n            .observe_elapsed(kind, &res, started_at);\n        res\n    }\n\n    fn max_keys_per_delete(&self) -> usize {\n        super::MAX_KEYS_PER_DELETE_AZURE\n    }\n\n    async fn copy(\n        &self,\n        from: &RemotePath,\n        to: &RemotePath,\n        cancel: &CancellationToken,\n    ) -> anyhow::Result<()> {\n        let kind = RequestKind::Copy;\n        let _permit = self.permit(kind, cancel).await?;\n        let started_at = start_measuring_requests(kind);\n\n        let timeout = tokio::time::sleep(self.timeout);\n\n        let mut copy_status = None;\n\n        let op = async {\n            let blob_client = self.client.blob_client(self.relative_path_to_name(to));\n\n            let source_url = format!(\n                \"{}/{}\",\n                self.client.url()?,\n                self.relative_path_to_name(from)\n            );\n\n            let builder = blob_client.copy(Url::from_str(&source_url)?);\n            let copy = builder.into_future();\n\n            let result = copy.await?;\n\n            copy_status = Some(result.copy_status);\n            loop {\n                match copy_status.as_ref().expect(\"we always set it to Some\") {\n                    CopyStatus::Aborted => {\n                        anyhow::bail!(\"Received abort for copy from {from} to {to}.\");\n                    }\n                    CopyStatus::Failed => {\n                        anyhow::bail!(\"Received failure response for copy from {from} to {to}.\");\n                    }\n                    CopyStatus::Success => return Ok(()),\n                    CopyStatus::Pending => (),\n                }\n                // The copy is taking longer. Waiting a second and then re-trying.\n                // TODO estimate time based on copy_progress and adjust time based on that\n                tokio::time::sleep(Duration::from_millis(1000)).await;\n                let properties = blob_client.get_properties().into_future().await?;\n                let Some(status) = properties.blob.properties.copy_status else {\n                    tracing::warn!(\"copy_status for copy is None!, from={from}, to={to}\");\n                    return Ok(());\n                };\n                copy_status = Some(status);\n            }\n        };\n\n        let res = tokio::select! {\n            res = op => res,\n            _ = cancel.cancelled() => return Err(anyhow::Error::new(TimeoutOrCancel::Cancel)),\n            _ = timeout => {\n                let e = anyhow::Error::new(TimeoutOrCancel::Timeout);\n                let e = e.context(format!(\"Timeout, last status: {copy_status:?}\"));\n                Err(e)\n            },\n        };\n\n        let started_at = ScopeGuard::into_inner(started_at);\n        crate::metrics::BUCKET_METRICS\n            .req_seconds\n            .observe_elapsed(kind, &res, started_at);\n        res\n    }\n\n    async fn time_travel_recover(\n        &self,\n        prefix: Option<&RemotePath>,\n        timestamp: SystemTime,\n        done_if_after: SystemTime,\n        cancel: &CancellationToken,\n        _complexity_limit: Option<NonZeroU32>,\n    ) -> Result<(), TimeTravelError> {\n        let msg = \"PLEASE NOTE: Azure Blob storage time-travel recovery may not work as expected \"\n            .to_string()\n            + \"for some specific files. If a file gets deleted but then overwritten and we want to recover \"\n            + \"to the time during the file was not present, this functionality will recover the file. Only \"\n            + \"use the functionality for services that can tolerate this. For example, recovering a state of the \"\n            + \"pageserver tenants.\";\n        tracing::error!(\"{}\", msg);\n\n        let kind = RequestKind::TimeTravel;\n        let permit = self.permit(kind, cancel).await?;\n\n        let mode = ListingMode::NoDelimiter;\n        let version_listing = self\n            .list_versions_with_permit(&permit, prefix, mode, None, cancel)\n            .await\n            .map_err(|err| match err {\n                DownloadError::Other(e) => TimeTravelError::Other(e),\n                DownloadError::Cancelled => TimeTravelError::Cancelled,\n                other => TimeTravelError::Other(other.into()),\n            })?;\n        let versions_and_deletes = version_listing.versions;\n\n        tracing::info!(\n            \"Built list for time travel with {} versions and deletions\",\n            versions_and_deletes.len()\n        );\n\n        // Work on the list of references instead of the objects directly,\n        // otherwise we get lifetime errors in the sort_by_key call below.\n        let mut versions_and_deletes = versions_and_deletes.iter().collect::<Vec<_>>();\n\n        versions_and_deletes.sort_by_key(|vd| (&vd.key, &vd.last_modified));\n\n        let mut vds_for_key = HashMap::<_, Vec<_>>::new();\n\n        for vd in &versions_and_deletes {\n            let Version { key, .. } = &vd;\n            let version_id = vd.version_id().map(|v| v.0.as_str());\n            if version_id == Some(\"null\") {\n                return Err(TimeTravelError::Other(anyhow!(\n                    \"Received ListVersions response for key={key} with version_id='null', \\\n                        indicating either disabled versioning, or legacy objects with null version id values\"\n                )));\n            }\n            tracing::trace!(\"Parsing version key={key} kind={:?}\", vd.kind);\n\n            vds_for_key.entry(key).or_default().push(vd);\n        }\n\n        let warn_threshold = 3;\n        let max_retries = 10;\n        let is_permanent = |e: &_| matches!(e, TimeTravelError::Cancelled);\n\n        for (key, versions) in vds_for_key {\n            let last_vd = versions.last().unwrap();\n            let key = self.relative_path_to_name(key);\n            if last_vd.last_modified > done_if_after {\n                tracing::debug!(\"Key {key} has version later than done_if_after, skipping\");\n                continue;\n            }\n            // the version we want to restore to.\n            let version_to_restore_to =\n                match versions.binary_search_by_key(&timestamp, |tpl| tpl.last_modified) {\n                    Ok(v) => v,\n                    Err(e) => e,\n                };\n            if version_to_restore_to == versions.len() {\n                tracing::debug!(\"Key {key} has no changes since timestamp, skipping\");\n                continue;\n            }\n            let mut do_delete = false;\n            if version_to_restore_to == 0 {\n                // All versions more recent, so the key didn't exist at the specified time point.\n                tracing::debug!(\n                    \"All {} versions more recent for {key}, deleting\",\n                    versions.len()\n                );\n                do_delete = true;\n            } else {\n                match &versions[version_to_restore_to - 1] {\n                    Version {\n                        kind: VersionKind::Version(version_id),\n                        ..\n                    } => {\n                        let source_url = format!(\n                            \"{}/{}?versionid={}\",\n                            self.client\n                                .url()\n                                .map_err(|e| TimeTravelError::Other(anyhow!(\"{e}\")))?,\n                            key,\n                            version_id.0\n                        );\n                        tracing::debug!(\n                            \"Promoting old version {} for {key} at {}...\",\n                            version_id.0,\n                            source_url\n                        );\n                        backoff::retry(\n                            || async {\n                                let blob_client = self.client.blob_client(key.clone());\n                                let op = blob_client.copy(Url::from_str(&source_url).unwrap());\n                                tokio::select! {\n                                    res = op => res.map_err(|e| TimeTravelError::Other(e.into())),\n                                    _ = cancel.cancelled() => Err(TimeTravelError::Cancelled),\n                                }\n                            },\n                            is_permanent,\n                            warn_threshold,\n                            max_retries,\n                            \"copying object version for time_travel_recover\",\n                            cancel,\n                        )\n                        .await\n                        .ok_or_else(|| TimeTravelError::Cancelled)\n                        .and_then(|x| x)?;\n                        tracing::info!(?version_id, %key, \"Copied old version in Azure blob storage\");\n                    }\n                    Version {\n                        kind: VersionKind::DeletionMarker,\n                        ..\n                    } => {\n                        do_delete = true;\n                    }\n                }\n            };\n            if do_delete {\n                if matches!(last_vd.kind, VersionKind::DeletionMarker) {\n                    // Key has since been deleted (but there was some history), no need to do anything\n                    tracing::debug!(\"Key {key} already deleted, skipping.\");\n                } else {\n                    tracing::debug!(\"Deleting {key}...\");\n\n                    self.delete(&RemotePath::from_string(&key).unwrap(), cancel)\n                        .await\n                        .map_err(|e| {\n                            // delete_oid0 will use TimeoutOrCancel\n                            if TimeoutOrCancel::caused_by_cancel(&e) {\n                                TimeTravelError::Cancelled\n                            } else {\n                                TimeTravelError::Other(e)\n                            }\n                        })?;\n                }\n            }\n        }\n\n        Ok(())\n    }\n}\n\npin_project_lite::pin_project! {\n    /// Hack to work around not being able to stream once with azure sdk.\n    ///\n    /// Azure sdk clones streams around with the assumption that they are like\n    /// `Arc<tokio::fs::File>` (except not supporting tokio), however our streams are not like\n    /// that. For example for an `index_part.json` we just have a single chunk of [`Bytes`]\n    /// representing the whole serialized vec. It could be trivially cloneable and \"semi-trivially\"\n    /// seekable, but we can also just re-try the request easier.\n    #[project = NonSeekableStreamProj]\n    enum NonSeekableStream<S> {\n        /// A stream wrappers initial form.\n        ///\n        /// Mutex exists to allow moving when cloning. If the sdk changes to do less than 1\n        /// clone before first request, then this must be changed.\n        Initial {\n            inner: std::sync::Mutex<Option<tokio_util::compat::Compat<tokio_util::io::StreamReader<S, Bytes>>>>,\n            len: usize,\n        },\n        /// The actually readable variant, produced by cloning the Initial variant.\n        ///\n        /// The sdk currently always clones once, even without retry policy.\n        Actual {\n            #[pin]\n            inner: tokio_util::compat::Compat<tokio_util::io::StreamReader<S, Bytes>>,\n            len: usize,\n            read_any: bool,\n        },\n        /// Most likely unneeded, but left to make life easier, in case more clones are added.\n        Cloned {\n            len_was: usize,\n        }\n    }\n}\n\nimpl<S> NonSeekableStream<S>\nwhere\n    S: Stream<Item = std::io::Result<Bytes>> + Send + Sync + 'static,\n{\n    fn new(inner: S, len: usize) -> NonSeekableStream<S> {\n        use tokio_util::compat::TokioAsyncReadCompatExt;\n\n        let inner = tokio_util::io::StreamReader::new(inner).compat();\n        let inner = Some(inner);\n        let inner = std::sync::Mutex::new(inner);\n        NonSeekableStream::Initial { inner, len }\n    }\n}\n\nimpl<S> std::fmt::Debug for NonSeekableStream<S> {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        match self {\n            Self::Initial { len, .. } => f.debug_struct(\"Initial\").field(\"len\", len).finish(),\n            Self::Actual { len, .. } => f.debug_struct(\"Actual\").field(\"len\", len).finish(),\n            Self::Cloned { len_was, .. } => f.debug_struct(\"Cloned\").field(\"len\", len_was).finish(),\n        }\n    }\n}\n\nimpl<S> futures::io::AsyncRead for NonSeekableStream<S>\nwhere\n    S: Stream<Item = std::io::Result<Bytes>>,\n{\n    fn poll_read(\n        self: std::pin::Pin<&mut Self>,\n        cx: &mut std::task::Context<'_>,\n        buf: &mut [u8],\n    ) -> std::task::Poll<std::io::Result<usize>> {\n        match self.project() {\n            NonSeekableStreamProj::Actual {\n                inner, read_any, ..\n            } => {\n                *read_any = true;\n                inner.poll_read(cx, buf)\n            }\n            // NonSeekableStream::Initial does not support reading because it is just much easier\n            // to have the mutex in place where one does not poll the contents, or that's how it\n            // seemed originally. If there is a version upgrade which changes the cloning, then\n            // that support needs to be hacked in.\n            //\n            // including {self:?} into the message would be useful, but unsure how to unproject.\n            _ => std::task::Poll::Ready(Err(std::io::Error::other(\n                \"cloned or initial values cannot be read\",\n            ))),\n        }\n    }\n}\n\nimpl<S> Clone for NonSeekableStream<S> {\n    /// Weird clone implementation exists to support the sdk doing cloning before issuing the first\n    /// request, see type documentation.\n    fn clone(&self) -> Self {\n        use NonSeekableStream::*;\n\n        match self {\n            Initial { inner, len } => {\n                if let Some(inner) = inner.lock().unwrap().take() {\n                    Actual {\n                        inner,\n                        len: *len,\n                        read_any: false,\n                    }\n                } else {\n                    Self::Cloned { len_was: *len }\n                }\n            }\n            Actual { len, .. } => Cloned { len_was: *len },\n            Cloned { len_was } => Cloned { len_was: *len_was },\n        }\n    }\n}\n\n#[async_trait::async_trait]\nimpl<S> azure_core::SeekableStream for NonSeekableStream<S>\nwhere\n    S: Stream<Item = std::io::Result<Bytes>> + Unpin + Send + Sync + 'static,\n{\n    async fn reset(&mut self) -> azure_core::error::Result<()> {\n        use NonSeekableStream::*;\n\n        let msg = match self {\n            Initial { inner, .. } => {\n                if inner.get_mut().unwrap().is_some() {\n                    return Ok(());\n                } else {\n                    \"reset after first clone is not supported\"\n                }\n            }\n            Actual { read_any, .. } if !*read_any => return Ok(()),\n            Actual { .. } => \"reset after reading is not supported\",\n            Cloned { .. } => \"reset after second clone is not supported\",\n        };\n        Err(azure_core::error::Error::new(\n            azure_core::error::ErrorKind::Io,\n            std::io::Error::other(msg),\n        ))\n    }\n\n    // Note: it is not documented if this should be the total or remaining length, total passes the\n    // tests.\n    fn len(&self) -> usize {\n        use NonSeekableStream::*;\n        match self {\n            Initial { len, .. } => *len,\n            Actual { len, .. } => *len,\n            Cloned { len_was, .. } => *len_was,\n        }\n    }\n}\n"
  },
  {
    "path": "libs/remote_storage/src/config.rs",
    "content": "use std::fmt::Debug;\nuse std::num::NonZeroUsize;\nuse std::str::FromStr;\nuse std::time::Duration;\n\nuse aws_sdk_s3::types::StorageClass;\nuse camino::Utf8PathBuf;\nuse serde::{Deserialize, Serialize};\n\nuse crate::{\n    DEFAULT_MAX_KEYS_PER_LIST_RESPONSE, DEFAULT_REMOTE_STORAGE_AZURE_CONCURRENCY_LIMIT,\n    DEFAULT_REMOTE_STORAGE_LOCALFS_CONCURRENCY_LIMIT, DEFAULT_REMOTE_STORAGE_S3_CONCURRENCY_LIMIT,\n};\n\n/// External backup storage configuration, enough for creating a client for that storage.\n#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize)]\npub struct RemoteStorageConfig {\n    /// The storage connection configuration.\n    #[serde(flatten)]\n    pub storage: RemoteStorageKind,\n    /// A common timeout enforced for all requests after concurrency limiter permit has been\n    /// acquired.\n    #[serde(\n        with = \"humantime_serde\",\n        default = \"default_timeout\",\n        skip_serializing_if = \"is_default_timeout\"\n    )]\n    pub timeout: Duration,\n    /// Alternative timeout used for metadata objects which are expected to be small\n    #[serde(\n        with = \"humantime_serde\",\n        default = \"default_small_timeout\",\n        skip_serializing_if = \"is_default_small_timeout\"\n    )]\n    pub small_timeout: Duration,\n}\n\nimpl RemoteStorageKind {\n    pub fn bucket_name(&self) -> Option<&str> {\n        match self {\n            RemoteStorageKind::LocalFs { .. } => None,\n            RemoteStorageKind::AwsS3(config) => Some(&config.bucket_name),\n            RemoteStorageKind::AzureContainer(config) => Some(&config.container_name),\n            RemoteStorageKind::GCS(config) => Some(&config.bucket_name),\n        }\n    }\n}\n\nimpl RemoteStorageConfig {\n    /// Helper to fetch the configured concurrency limit.\n    pub fn concurrency_limit(&self) -> usize {\n        match &self.storage {\n            RemoteStorageKind::LocalFs { .. } => DEFAULT_REMOTE_STORAGE_LOCALFS_CONCURRENCY_LIMIT,\n            RemoteStorageKind::AwsS3(c) => c.concurrency_limit.into(),\n            RemoteStorageKind::GCS(c) => c.concurrency_limit.into(),\n            RemoteStorageKind::AzureContainer(c) => c.concurrency_limit.into(),\n        }\n    }\n}\n\nfn default_timeout() -> Duration {\n    RemoteStorageConfig::DEFAULT_TIMEOUT\n}\n\nfn default_small_timeout() -> Duration {\n    RemoteStorageConfig::DEFAULT_SMALL_TIMEOUT\n}\n\nfn is_default_timeout(d: &Duration) -> bool {\n    *d == RemoteStorageConfig::DEFAULT_TIMEOUT\n}\n\nfn is_default_small_timeout(d: &Duration) -> bool {\n    *d == RemoteStorageConfig::DEFAULT_SMALL_TIMEOUT\n}\n\n/// A kind of a remote storage to connect to, with its connection configuration.\n#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize)]\n#[serde(untagged)]\npub enum RemoteStorageKind {\n    /// Storage based on local file system.\n    /// Specify a root folder to place all stored files into.\n    LocalFs { local_path: Utf8PathBuf },\n    /// AWS S3 based storage, storing all files in the S3 bucket\n    /// specified by the config\n    AwsS3(S3Config),\n    /// Azure Blob based storage, storing all files in the container\n    /// specified by the config\n    AzureContainer(AzureConfig),\n    /// Google Cloud based storage, storing all files in the GCS bucket\n    /// specified by the config\n    GCS(GCSConfig),\n}\n\n#[derive(Deserialize)]\n#[serde(tag = \"type\")]\n/// Version of RemoteStorageKind which deserializes with type: LocalFs | AwsS3 | AzureContainer\n/// Needed for endpoint storage service\npub enum TypedRemoteStorageKind {\n    LocalFs { local_path: Utf8PathBuf },\n    AwsS3(S3Config),\n    AzureContainer(AzureConfig),\n}\n\nimpl From<TypedRemoteStorageKind> for RemoteStorageKind {\n    fn from(value: TypedRemoteStorageKind) -> Self {\n        match value {\n            TypedRemoteStorageKind::LocalFs { local_path } => {\n                RemoteStorageKind::LocalFs { local_path }\n            }\n            TypedRemoteStorageKind::AwsS3(v) => RemoteStorageKind::AwsS3(v),\n            TypedRemoteStorageKind::AzureContainer(v) => RemoteStorageKind::AzureContainer(v),\n        }\n    }\n}\n\n/// AWS S3 bucket coordinates and access credentials to manage the bucket contents (read and write).\n#[derive(Clone, PartialEq, Eq, Deserialize, Serialize)]\npub struct S3Config {\n    /// Name of the bucket to connect to.\n    pub bucket_name: String,\n    /// The region where the bucket is located at.\n    pub bucket_region: String,\n    /// A \"subfolder\" in the bucket, to use the same bucket separately by multiple remote storage users at once.\n    pub prefix_in_bucket: Option<String>,\n    /// A base URL to send S3 requests to.\n    /// By default, the endpoint is derived from a region name, assuming it's\n    /// an AWS S3 region name, erroring on wrong region name.\n    /// Endpoint provides a way to support other S3 flavors and their regions.\n    ///\n    /// Example: `http://127.0.0.1:5000`\n    pub endpoint: Option<String>,\n    /// AWS S3 has various limits on its API calls, we need not to exceed those.\n    /// See [`DEFAULT_REMOTE_STORAGE_S3_CONCURRENCY_LIMIT`] for more details.\n    #[serde(default = \"default_remote_storage_s3_concurrency_limit\")]\n    pub concurrency_limit: NonZeroUsize,\n    #[serde(default = \"default_max_keys_per_list_response\")]\n    pub max_keys_per_list_response: Option<i32>,\n    #[serde(\n        deserialize_with = \"deserialize_storage_class\",\n        serialize_with = \"serialize_storage_class\",\n        default\n    )]\n    pub upload_storage_class: Option<StorageClass>,\n}\n\nfn default_remote_storage_s3_concurrency_limit() -> NonZeroUsize {\n    DEFAULT_REMOTE_STORAGE_S3_CONCURRENCY_LIMIT\n        .try_into()\n        .unwrap()\n}\n\nfn default_max_keys_per_list_response() -> Option<i32> {\n    DEFAULT_MAX_KEYS_PER_LIST_RESPONSE\n}\n\nfn default_azure_conn_pool_size() -> usize {\n    // By default, the Azure SDK does no connection pooling, due to historic reports of hard-to-reproduce issues\n    // (https://github.com/hyperium/hyper/issues/2312)\n    //\n    // However, using connection pooling is important to avoid exhausting client ports when\n    // doing huge numbers of requests (https://github.com/neondatabase/cloud/issues/20971)\n    //\n    // We therefore enable a modest pool size by default: this may be configured to zero if\n    // issues like the alleged upstream hyper issue appear.\n    8\n}\n\nimpl Debug for S3Config {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        f.debug_struct(\"S3Config\")\n            .field(\"bucket_name\", &self.bucket_name)\n            .field(\"bucket_region\", &self.bucket_region)\n            .field(\"prefix_in_bucket\", &self.prefix_in_bucket)\n            .field(\"concurrency_limit\", &self.concurrency_limit)\n            .field(\n                \"max_keys_per_list_response\",\n                &self.max_keys_per_list_response,\n            )\n            .finish()\n    }\n}\n\n#[derive(Clone, PartialEq, Eq, Deserialize, Serialize)]\npub struct GCSConfig {\n    /// Name of the bucket to connect to.\n    pub bucket_name: String,\n    /// A \"subfolder\" in the bucket, to use the same bucket separately by multiple remote storage users at once.\n    pub prefix_in_bucket: Option<String>,\n    #[serde(default = \"default_remote_storage_s3_concurrency_limit\")]\n    pub concurrency_limit: NonZeroUsize,\n    #[serde(default = \"default_max_keys_per_list_response\")]\n    pub max_keys_per_list_response: Option<i32>,\n}\n\nimpl Debug for GCSConfig {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        f.debug_struct(\"GCSConfig\")\n            .field(\"bucket_name\", &self.bucket_name)\n            .field(\"prefix_in_bucket\", &self.prefix_in_bucket)\n            .field(\"concurrency_limit\", &self.concurrency_limit)\n            .field(\n                \"max_keys_per_list_response\",\n                &self.max_keys_per_list_response,\n            )\n            .finish()\n    }\n}\n\n/// Azure  bucket coordinates and access credentials to manage the bucket contents (read and write).\n#[derive(Clone, PartialEq, Eq, Serialize, Deserialize)]\npub struct AzureConfig {\n    /// Name of the container to connect to.\n    pub container_name: String,\n    /// Name of the storage account the container is inside of\n    pub storage_account: Option<String>,\n    /// The region where the bucket is located at.\n    pub container_region: String,\n    /// A \"subfolder\" in the container, to use the same container separately by multiple remote storage users at once.\n    pub prefix_in_container: Option<String>,\n    /// Azure has various limits on its API calls, we need not to exceed those.\n    /// See [`DEFAULT_REMOTE_STORAGE_AZURE_CONCURRENCY_LIMIT`] for more details.\n    #[serde(default = \"default_remote_storage_azure_concurrency_limit\")]\n    pub concurrency_limit: NonZeroUsize,\n    #[serde(default = \"default_max_keys_per_list_response\")]\n    pub max_keys_per_list_response: Option<i32>,\n    #[serde(default = \"default_azure_conn_pool_size\")]\n    pub conn_pool_size: usize,\n    /* BEGIN_HADRON */\n    #[serde(default = \"default_azure_put_block_size_mb\")]\n    pub put_block_size_mb: Option<usize>,\n    /* END_HADRON */\n}\n\n/* BEGIN_HADRON */\nfn default_azure_put_block_size_mb() -> Option<usize> {\n    // Disable parallel upload by default.\n    Some(0)\n}\n/* END_HADRON */\n\nfn default_remote_storage_azure_concurrency_limit() -> NonZeroUsize {\n    NonZeroUsize::new(DEFAULT_REMOTE_STORAGE_AZURE_CONCURRENCY_LIMIT).unwrap()\n}\n\nimpl Debug for AzureConfig {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        f.debug_struct(\"AzureConfig\")\n            .field(\"bucket_name\", &self.container_name)\n            .field(\"storage_account\", &self.storage_account)\n            .field(\"bucket_region\", &self.container_region)\n            .field(\"prefix_in_container\", &self.prefix_in_container)\n            .field(\"concurrency_limit\", &self.concurrency_limit)\n            .field(\n                \"max_keys_per_list_response\",\n                &self.max_keys_per_list_response,\n            )\n            /* BEGIN_HADRON */\n            .field(\"put_block_size_mb\", &self.put_block_size_mb)\n            /* END_HADRON */\n            .finish()\n    }\n}\n\nfn deserialize_storage_class<'de, D: serde::Deserializer<'de>>(\n    deserializer: D,\n) -> Result<Option<StorageClass>, D::Error> {\n    Option::<String>::deserialize(deserializer).and_then(|s| {\n        if let Some(s) = s {\n            use serde::de::Error;\n            let storage_class = StorageClass::from_str(&s).expect(\"infallible\");\n            #[allow(deprecated)]\n            if matches!(storage_class, StorageClass::Unknown(_)) {\n                return Err(D::Error::custom(format!(\n                    \"Specified storage class unknown to SDK: '{s}'. Allowed values: {:?}\",\n                    StorageClass::values()\n                )));\n            }\n            Ok(Some(storage_class))\n        } else {\n            Ok(None)\n        }\n    })\n}\n\nfn serialize_storage_class<S: serde::Serializer>(\n    val: &Option<StorageClass>,\n    serializer: S,\n) -> Result<S::Ok, S::Error> {\n    let val = val.as_ref().map(StorageClass::as_str);\n    Option::<&str>::serialize(&val, serializer)\n}\n\nimpl RemoteStorageConfig {\n    pub const DEFAULT_TIMEOUT: Duration = std::time::Duration::from_secs(120);\n    pub const DEFAULT_SMALL_TIMEOUT: Duration = std::time::Duration::from_secs(30);\n\n    pub fn from_toml(toml: &toml_edit::Item) -> anyhow::Result<RemoteStorageConfig> {\n        Ok(utils::toml_edit_ext::deserialize_item(toml)?)\n    }\n\n    pub fn from_toml_str(input: &str) -> anyhow::Result<RemoteStorageConfig> {\n        let toml_document = toml_edit::DocumentMut::from_str(input)?;\n        if let Some(item) = toml_document.get(\"remote_storage\") {\n            return Self::from_toml(item);\n        }\n        Self::from_toml(toml_document.as_item())\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use super::*;\n\n    fn parse(input: &str) -> anyhow::Result<RemoteStorageConfig> {\n        RemoteStorageConfig::from_toml_str(input)\n    }\n\n    #[test]\n    fn parse_localfs_config_with_timeout() {\n        let input = \"local_path = '.'\ntimeout = '5s'\";\n\n        let config = parse(input).unwrap();\n\n        assert_eq!(\n            config,\n            RemoteStorageConfig {\n                storage: RemoteStorageKind::LocalFs {\n                    local_path: Utf8PathBuf::from(\".\")\n                },\n                timeout: Duration::from_secs(5),\n                small_timeout: RemoteStorageConfig::DEFAULT_SMALL_TIMEOUT\n            }\n        );\n    }\n\n    #[test]\n    fn test_gcs_parsing() {\n        let toml = \"\\\n    bucket_name = 'foo-bar'\n    prefix_in_bucket = 'pageserver/'\n    \";\n\n        let config = parse(toml).unwrap();\n\n        assert_eq!(\n            config,\n            RemoteStorageConfig {\n                storage: RemoteStorageKind::GCS(GCSConfig {\n                    bucket_name: \"foo-bar\".into(),\n                    prefix_in_bucket: Some(\"pageserver/\".into()),\n                    max_keys_per_list_response: DEFAULT_MAX_KEYS_PER_LIST_RESPONSE,\n                    concurrency_limit: std::num::NonZero::new(100).unwrap(),\n                }),\n                timeout: Duration::from_secs(120),\n                small_timeout: RemoteStorageConfig::DEFAULT_SMALL_TIMEOUT\n            }\n        );\n    }\n\n    #[test]\n    fn test_s3_parsing() {\n        let toml = \"\\\n    bucket_name = 'foo-bar'\n    bucket_region = 'eu-central-1'\n    upload_storage_class = 'INTELLIGENT_TIERING'\n    timeout = '7s'\n    \";\n\n        let config = parse(toml).unwrap();\n\n        assert_eq!(\n            config,\n            RemoteStorageConfig {\n                storage: RemoteStorageKind::AwsS3(S3Config {\n                    bucket_name: \"foo-bar\".into(),\n                    bucket_region: \"eu-central-1\".into(),\n                    prefix_in_bucket: None,\n                    endpoint: None,\n                    concurrency_limit: default_remote_storage_s3_concurrency_limit(),\n                    max_keys_per_list_response: DEFAULT_MAX_KEYS_PER_LIST_RESPONSE,\n                    upload_storage_class: Some(StorageClass::IntelligentTiering),\n                }),\n                timeout: Duration::from_secs(7),\n                small_timeout: RemoteStorageConfig::DEFAULT_SMALL_TIMEOUT\n            }\n        );\n    }\n\n    #[test]\n    fn test_storage_class_serde_roundtrip() {\n        let classes = [\n            None,\n            Some(StorageClass::Standard),\n            Some(StorageClass::IntelligentTiering),\n        ];\n        for class in classes {\n            #[derive(Serialize, Deserialize)]\n            struct Wrapper {\n                #[serde(\n                    deserialize_with = \"deserialize_storage_class\",\n                    serialize_with = \"serialize_storage_class\"\n                )]\n                class: Option<StorageClass>,\n            }\n            let wrapped = Wrapper {\n                class: class.clone(),\n            };\n            let serialized = serde_json::to_string(&wrapped).unwrap();\n            let deserialized: Wrapper = serde_json::from_str(&serialized).unwrap();\n            assert_eq!(class, deserialized.class);\n        }\n    }\n\n    #[test]\n    fn test_azure_parsing() {\n        let toml = \"\\\n    container_name = 'foo-bar'\n    container_region = 'westeurope'\n    upload_storage_class = 'INTELLIGENT_TIERING'\n    timeout = '7s'\n    conn_pool_size = 8\n    put_block_size_mb = 1024\n    \";\n\n        let config = parse(toml).unwrap();\n\n        assert_eq!(\n            config,\n            RemoteStorageConfig {\n                storage: RemoteStorageKind::AzureContainer(AzureConfig {\n                    container_name: \"foo-bar\".into(),\n                    storage_account: None,\n                    container_region: \"westeurope\".into(),\n                    prefix_in_container: None,\n                    concurrency_limit: default_remote_storage_azure_concurrency_limit(),\n                    max_keys_per_list_response: DEFAULT_MAX_KEYS_PER_LIST_RESPONSE,\n                    conn_pool_size: 8,\n                    /* BEGIN_HADRON */\n                    put_block_size_mb: Some(1024),\n                    /* END_HADRON */\n                }),\n                timeout: Duration::from_secs(7),\n                small_timeout: RemoteStorageConfig::DEFAULT_SMALL_TIMEOUT\n            }\n        );\n    }\n}\n"
  },
  {
    "path": "libs/remote_storage/src/error.rs",
    "content": "/// Reasons for downloads or listings to fail.\n#[derive(Debug)]\npub enum DownloadError {\n    /// Validation or other error happened due to user input.\n    BadInput(anyhow::Error),\n    /// The file was not found in the remote storage.\n    NotFound,\n    /// The caller provided an ETag, and the file was not modified.\n    Unmodified,\n    /// A cancellation token aborted the download, typically during\n    /// tenant detach or process shutdown.\n    Cancelled,\n    /// A timeout happened while executing the request. Possible reasons:\n    /// - stuck tcp connection\n    ///\n    /// Concurrency control is not timed within timeout.\n    Timeout,\n    /// Some integrity/consistency check failed during download. This is used during\n    /// timeline loads to cancel the load of a tenant if some timeline detects fatal corruption.\n    Fatal(String),\n    /// The file was found in the remote storage, but the download failed.\n    Other(anyhow::Error),\n}\n\nimpl std::fmt::Display for DownloadError {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        match self {\n            DownloadError::BadInput(e) => {\n                write!(f, \"Failed to download a remote file due to user input: {e}\")\n            }\n            DownloadError::NotFound => write!(f, \"No file found for the remote object id given\"),\n            DownloadError::Unmodified => write!(f, \"File was not modified\"),\n            DownloadError::Cancelled => write!(f, \"Cancelled, shutting down\"),\n            DownloadError::Timeout => write!(f, \"timeout\"),\n            DownloadError::Fatal(why) => write!(f, \"Fatal read error: {why}\"),\n            DownloadError::Other(e) => write!(f, \"Failed to download a remote file: {e:?}\"),\n        }\n    }\n}\n\nimpl std::error::Error for DownloadError {}\n\nimpl DownloadError {\n    /// Returns true if the error should not be retried with backoff\n    pub fn is_permanent(&self) -> bool {\n        use DownloadError::*;\n        match self {\n            BadInput(_) | NotFound | Unmodified | Fatal(_) | Cancelled => true,\n            Timeout | Other(_) => false,\n        }\n    }\n\n    pub fn is_cancelled(&self) -> bool {\n        matches!(self, DownloadError::Cancelled)\n    }\n}\n\nimpl From<std::io::Error> for DownloadError {\n    fn from(value: std::io::Error) -> Self {\n        let needs_unwrap = value.kind() == std::io::ErrorKind::Other\n            && value\n                .get_ref()\n                .and_then(|x| x.downcast_ref::<DownloadError>())\n                .is_some();\n\n        if needs_unwrap {\n            *value\n                .into_inner()\n                .expect(\"just checked\")\n                .downcast::<DownloadError>()\n                .expect(\"just checked\")\n        } else {\n            DownloadError::Other(value.into())\n        }\n    }\n}\n\n#[derive(Debug)]\npub enum TimeTravelError {\n    /// Validation or other error happened due to user input.\n    BadInput(anyhow::Error),\n    /// The used remote storage does not have time travel recovery implemented\n    Unimplemented,\n    /// The number of versions/deletion markers is above our limit.\n    TooManyVersions,\n    /// A cancellation token aborted the process, typically during\n    /// request closure or process shutdown.\n    Cancelled,\n    /// Other errors\n    Other(anyhow::Error),\n}\n\nimpl std::fmt::Display for TimeTravelError {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        match self {\n            TimeTravelError::BadInput(e) => {\n                write!(\n                    f,\n                    \"Failed to time travel recover a prefix due to user input: {e}\"\n                )\n            }\n            TimeTravelError::Unimplemented => write!(\n                f,\n                \"time travel recovery is not implemented for the current storage backend\"\n            ),\n            TimeTravelError::Cancelled => write!(f, \"Cancelled, shutting down\"),\n            TimeTravelError::TooManyVersions => {\n                write!(f, \"Number of versions/delete markers above limit\")\n            }\n            TimeTravelError::Other(e) => write!(f, \"Failed to time travel recover a prefix: {e:?}\"),\n        }\n    }\n}\n\nimpl std::error::Error for TimeTravelError {}\n\n/// Plain cancelled error.\n///\n/// By design this type does not not implement `std::error::Error` so it cannot be put as the root\n/// cause of `std::io::Error` or `anyhow::Error`. It should never need to be exposed out of this\n/// crate.\n///\n/// It exists to implement permit acquiring in `{Download,TimeTravel}Error` and `anyhow::Error` returning\n/// operations and ensuring that those get converted to proper versions with just `?`.\n#[derive(Debug)]\npub(crate) struct Cancelled;\n\nimpl From<Cancelled> for anyhow::Error {\n    fn from(_: Cancelled) -> Self {\n        anyhow::Error::new(TimeoutOrCancel::Cancel)\n    }\n}\n\nimpl From<Cancelled> for TimeTravelError {\n    fn from(_: Cancelled) -> Self {\n        TimeTravelError::Cancelled\n    }\n}\n\nimpl From<Cancelled> for TimeoutOrCancel {\n    fn from(_: Cancelled) -> Self {\n        TimeoutOrCancel::Cancel\n    }\n}\n\nimpl From<Cancelled> for DownloadError {\n    fn from(_: Cancelled) -> Self {\n        DownloadError::Cancelled\n    }\n}\n\n/// This type is used at as the root cause for timeouts and cancellations with `anyhow::Error` returning\n/// RemoteStorage methods.\n///\n/// For use with `utils::backoff::retry` and `anyhow::Error` returning operations there is\n/// `TimeoutOrCancel::caused_by_cancel` method to query \"proper form\" errors.\n#[derive(Debug)]\npub enum TimeoutOrCancel {\n    Timeout,\n    Cancel,\n}\n\nimpl std::fmt::Display for TimeoutOrCancel {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        use TimeoutOrCancel::*;\n        match self {\n            Timeout => write!(f, \"timeout\"),\n            Cancel => write!(f, \"cancel\"),\n        }\n    }\n}\n\nimpl std::error::Error for TimeoutOrCancel {}\n\nimpl TimeoutOrCancel {\n    /// Returns true if the error was caused by [`TimeoutOrCancel::Cancel`].\n    pub fn caused_by_cancel(error: &anyhow::Error) -> bool {\n        error\n            .root_cause()\n            .downcast_ref::<Self>()\n            .is_some_and(Self::is_cancel)\n    }\n\n    pub fn is_cancel(&self) -> bool {\n        matches!(self, TimeoutOrCancel::Cancel)\n    }\n\n    pub fn is_timeout(&self) -> bool {\n        matches!(self, TimeoutOrCancel::Timeout)\n    }\n}\n\n/// This conversion is used when [`crate::support::DownloadStream`] notices a cancellation or\n/// timeout to wrap it in an `std::io::Error`.\nimpl From<TimeoutOrCancel> for std::io::Error {\n    fn from(value: TimeoutOrCancel) -> Self {\n        let e = DownloadError::from(value);\n        std::io::Error::other(e)\n    }\n}\n\nimpl From<TimeoutOrCancel> for DownloadError {\n    fn from(value: TimeoutOrCancel) -> Self {\n        use TimeoutOrCancel::*;\n\n        match value {\n            Timeout => DownloadError::Timeout,\n            Cancel => DownloadError::Cancelled,\n        }\n    }\n}\n"
  },
  {
    "path": "libs/remote_storage/src/gcs_bucket.rs",
    "content": "use crate::config::GCSConfig;\nuse crate::error::Cancelled;\npub(super) use crate::metrics::RequestKind;\nuse crate::metrics::{AttemptOutcome, start_counting_cancelled_wait, start_measuring_requests};\nuse crate::{\n    ConcurrencyLimiter, Download, DownloadError, DownloadOpts, GCS_SCOPES, Listing, ListingMode,\n    ListingObject, MAX_KEYS_PER_DELETE_GCS, REMOTE_STORAGE_PREFIX_SEPARATOR, RemotePath,\n    RemoteStorage, StorageMetadata, TimeTravelError, TimeoutOrCancel, GCSVersion, VersionId,\n    GCSVersionListing, \n};\nuse anyhow::Context;\nuse azure_core::Etag;\nuse bytes::Bytes;\nuse bytes::BytesMut;\nuse chrono::DateTime;\nuse futures::stream::Stream;\nuse futures::stream::TryStreamExt;\nuse futures_util::StreamExt;\nuse gcp_auth::{Token, TokenProvider};\nuse http::Method;\nuse http::StatusCode;\nuse reqwest::{Client, header};\nuse scopeguard::ScopeGuard;\nuse serde::{Deserialize, Deserializer, Serialize, de};\nuse std::collections::HashMap;\nuse std::fmt::Debug;\nuse std::num::{NonZeroU32, ParseIntError};\nuse std::pin::{Pin, pin};\nuse std::str::FromStr;\nuse std::sync::Arc;\nuse std::time::Duration;\nuse std::time::SystemTime;\nuse tokio_util::codec::{BytesCodec, FramedRead};\nuse tokio_util::sync::CancellationToken;\nuse tracing;\nuse url::{ParseError, Url};\nuse utils::backoff;\nuse uuid::Uuid;\n\n// ---------\nfn to_system_time(timestamp: Option<String>) -> Option<SystemTime> {\n    timestamp\n        .and_then(|s| DateTime::parse_from_rfc3339(&s).ok())\n        .map(|s| s.into())\n}\n\n// ---------\npub struct GCSBucket {\n    token_provider: Arc<dyn TokenProvider>,\n    bucket_name: String,\n    prefix_in_bucket: Option<String>,\n    max_keys_per_list_response: Option<i32>,\n    concurrency_limiter: ConcurrencyLimiter,\n    pub timeout: Duration,\n}\n\nstruct GetObjectRequest {\n    bucket: String,\n    key: String,\n    etag: Option<String>,\n    range: Option<String>,\n}\n\n// ---------\n\nimpl GCSBucket {\n    pub async fn new(remote_storage_config: &GCSConfig, timeout: Duration) -> anyhow::Result<Self> {\n        tracing::debug!(\n            \"creating remote storage for gcs bucket {}\",\n            remote_storage_config.bucket_name\n        );\n\n        // clean up 'prefix_in_bucket' if user provides '/pageserver' or 'pageserver/'\n        let prefix_in_bucket = remote_storage_config\n            .prefix_in_bucket\n            .as_deref()\n            .map(|prefix| {\n                let mut prefix = prefix;\n                while prefix.starts_with(REMOTE_STORAGE_PREFIX_SEPARATOR) {\n                    prefix = &prefix[1..];\n                }\n\n                let mut prefix = prefix.to_string();\n                if prefix.ends_with(REMOTE_STORAGE_PREFIX_SEPARATOR) {\n                    prefix.pop();\n                }\n\n                prefix\n            });\n\n        // get GOOGLE_APPLICATION_CREDENTIALS\n        let provider = gcp_auth::provider().await?;\n\n        Ok(GCSBucket {\n            token_provider: Arc::clone(&provider),\n            bucket_name: remote_storage_config.bucket_name.clone(),\n            prefix_in_bucket,\n            timeout,\n            max_keys_per_list_response: remote_storage_config.max_keys_per_list_response,\n            concurrency_limiter: ConcurrencyLimiter::new(\n                remote_storage_config.concurrency_limit.get(),\n            ),\n        })\n    }\n\n    // convert `RemotePath` -> `String`\n    pub fn relative_path_to_gcs_object(&self, path: &RemotePath) -> String {\n        let path_string = path.get_path().as_str();\n        match &self.prefix_in_bucket {\n            Some(prefix) => prefix.clone() + \"/\" + path_string,\n            None => path_string.to_string(),\n        }\n    }\n\n    // convert `String` -> `RemotePath`\n    pub fn gcs_object_to_relative_path(&self, key: &str) -> RemotePath {\n        let relative_path =\n            match key.strip_prefix(self.prefix_in_bucket.as_deref().unwrap_or_default()) {\n                Some(stripped) => stripped,\n                // we rely on GCS to return properly prefixed paths\n                // for requests with a certain prefix\n                None => panic!(\n                    \"Key {} does not start with bucket prefix {:?}\",\n                    key, self.prefix_in_bucket\n                ),\n            };\n        RemotePath(\n            relative_path\n                .split(REMOTE_STORAGE_PREFIX_SEPARATOR)\n                .collect(),\n        )\n    }\n\n    pub fn bucket_name(&self) -> &str {\n        &self.bucket_name\n    }\n\n    fn max_keys_per_delete(&self) -> usize {\n        MAX_KEYS_PER_DELETE_GCS\n    }\n\n    async fn permit(\n        &self,\n        kind: RequestKind,\n        cancel: &CancellationToken,\n    ) -> Result<tokio::sync::SemaphorePermit<'_>, Cancelled> {\n        let started_at = start_counting_cancelled_wait(kind);\n        let acquire = self.concurrency_limiter.acquire(kind);\n\n        let permit = tokio::select! {\n            permit = acquire => permit.expect(\"semaphore is never closed\"),\n            _ = cancel.cancelled() => return Err(Cancelled),\n        };\n\n        let started_at = ScopeGuard::into_inner(started_at);\n        crate::metrics::BUCKET_METRICS\n            .wait_seconds\n            .observe_elapsed(kind, started_at);\n\n        Ok(permit)\n    }\n\n    async fn owned_permit(\n        &self,\n        kind: RequestKind,\n        cancel: &CancellationToken,\n    ) -> Result<tokio::sync::OwnedSemaphorePermit, Cancelled> {\n        let started_at = start_counting_cancelled_wait(kind);\n        let acquire = self.concurrency_limiter.acquire_owned(kind);\n\n        let permit = tokio::select! {\n            permit = acquire => permit.expect(\"semaphore is never closed\"),\n            _ = cancel.cancelled() => return Err(Cancelled),\n        };\n\n        let started_at = ScopeGuard::into_inner(started_at);\n        crate::metrics::BUCKET_METRICS\n            .wait_seconds\n            .observe_elapsed(kind, started_at);\n        Ok(permit)\n    }\n\n    async fn list_versions_with_permit(\n        &self,\n        _permit: &tokio::sync::SemaphorePermit<'_>,\n        prefix: Option<&RemotePath>,\n        mode: ListingMode,\n        max_keys: Option<NonZeroU32>,\n        cancel: &CancellationToken,\n    ) -> Result<crate::GCSVersionListing, DownloadError> {\n\n        let warn_threshold = 3;\n        let max_retries = 10;\n        let is_permanent = |e: &_| matches!(e, DownloadError::Cancelled);\n        \n        // GCS only has versions, which may contain 'deleted_at'.\n        let mut versions = crate::GCSVersionListing::default();\n        let mut continuation_token = None;\n        let mut uri: String;\n        \n        let list_prefix = prefix\n            .map(|p| self.relative_path_to_gcs_object(p))\n            .or_else(|| {\n                self.prefix_in_bucket.clone().map(|mut s| {\n                    s.push(REMOTE_STORAGE_PREFIX_SEPARATOR);\n                    s\n                })\n            })\n            .unwrap();\n        \n        let mut versions_base_uri = format!(\n            \"https://storage.googleapis.com/storage/v1/b/{}/o?prefix={}&versions=true\",\n            self.bucket_name.clone(),\n            list_prefix,\n        );\n\n        if let ListingMode::WithDelimiter = mode {\n            versions_base_uri.push_str(&format!(\n                \"&delimiter={}\",\n                REMOTE_STORAGE_PREFIX_SEPARATOR.to_string()\n            ));\n        }\n       \n        loop {\n            \n            match &continuation_token {\n                Some(token) => {\n                    uri = format!(\"{}&pageToken={}\", &versions_base_uri, token);\n                },\n                None => {\n                    uri = versions_base_uri.clone();\n                },\n            }\n            \n            let mut req_uri = versions_base_uri.clone();\n\n            let response = backoff::retry(\n                || async {\n                     \n                    // fetch an array of results, keep looping to get them\n                    let op = Client::new()\n                        .get(&uri)\n                        .bearer_auth(\n                            self.token_provider\n                                .token(GCS_SCOPES)\n                                .await\n                                .map_err(|e: gcp_auth::Error| DownloadError::Other(e.into()))?\n                                .as_str()\n                        )\n                        .send();\n                    \n                        tokio::select! {\n                            res = op => res.map_err(|e| DownloadError::Other(e.into())),\n                            _ = cancel.cancelled() => Err(DownloadError::Cancelled),\n                        }\n\n                    },\n                    is_permanent,\n                    warn_threshold,\n                    max_retries,\n                    \"listing object versions\",\n                    cancel,\n                ) \n                .await\n                .ok_or_else(|| DownloadError::Cancelled)\n                .and_then(|x| x)?;\n                \n            let res = response.json::<GCSListResponse>()\n                .await\n                .map_err(|e| DownloadError::Other(e.into()))?;\n                    \n            // fill up our results vec, \n            continuation_token = res.next_page_token;\n            \n            let version_listing = \n                res.items\n                    .ok_or_else(|| DownloadError::Other(anyhow::anyhow!(\"no items returned\")))?\n                    .into_iter()\n                    .map(| GCSObject { name, updated, time_deleted, generation, .. } | {\n                        // don't `filter_map`, a `None` for `last_modified` ('updated') is bad for\n                        // time travel, so catch it.\n                        if updated.is_none() {\n                           return Err(\n                               DownloadError::Other(\n                                   anyhow::anyhow!(\"no 'updated' field\")\n                               )\n                           )\n                        }\n                        Ok(\n                            GCSVersion {\n                                key: self.gcs_object_to_relative_path(&name),\n                                last_modified: to_system_time(updated).unwrap(),\n                                id: VersionId(generation.expect(\"no version id\")),\n                                time_deleted: to_system_time(time_deleted),\n                            }\n                        )\n                    }).collect::<Result<Vec<GCSVersion>, _>>();\n                \n            versions.versions.extend(version_listing?);\n\n            if let Some(max_keys) = max_keys {\n                if versions.versions.len() >= max_keys.get().try_into().unwrap() {\n                    return Err(DownloadError::Other(\n                        anyhow::anyhow!(\"max keys reached\") \n                    ));\n                }\n            }\n            \n            if continuation_token.is_none() {\n                break\n            }\n        }\n        \n        Ok(versions)\n        \n    }\n\n    async fn put_object(\n        &self,\n        byte_stream: impl Stream<Item = std::io::Result<Bytes>> + Send + Sync + 'static,\n        fs_size: usize,\n        to: &RemotePath,\n        cancel: &CancellationToken,\n        metadata: Option<StorageMetadata>,\n    ) -> anyhow::Result<()> {\n        let kind = RequestKind::Put;\n        let _permit = self.permit(kind, cancel).await?;\n        let started_at = start_measuring_requests(kind);\n\n        let multipart_uri = format!(\n            \"https://storage.googleapis.com/upload/storage/v1/b/{}/o?uploadType=multipart\",\n            self.bucket_name.clone()\n        );\n\n        let mut metadata = metadata.clone();\n        let gcs_path = self.relative_path_to_gcs_object(to);\n\n        // Always specify destination via `RemotePath` in multipart uploads\n        if metadata.is_none() {\n            metadata = Some(StorageMetadata::from([(\"name\", gcs_path.as_str())]));\n        } else {\n            metadata\n                .as_mut()\n                .map(|m| m.0.insert(\"name\".to_string(), gcs_path));\n        }\n\n        let metadata_body = serde_json::to_string(&metadata.map(|m| m.0))?;\n        let metadata_part = reqwest::multipart::Part::text(metadata_body)\n            .mime_str(\"application/json; charset=UTF-8\")?;\n\n        let stream_body = reqwest::Body::wrap_stream(byte_stream);\n        let data_part = reqwest::multipart::Part::stream_with_length(stream_body, fs_size as u64)\n            .mime_str(\"application/octet-stream\")?;\n\n        let mut form = reqwest::multipart::Form::new()\n            .part(\"metadata\", metadata_part)\n            .part(\"bodystream\", data_part);\n\n        let mut headers = header::HeaderMap::new();\n        headers.insert(\n            header::CONTENT_TYPE,\n            header::HeaderValue::from_str(&format!(\n                \"multipart/related; boundary={}\",\n                form.boundary()\n            ))?,\n        );\n\n        let upload = Client::new()\n            .post(multipart_uri)\n            .bearer_auth(self.token_provider.token(GCS_SCOPES).await?.as_str())\n            .multipart(form)\n            .headers(headers)\n            .send();\n\n        let upload = tokio::time::timeout(self.timeout, upload);\n\n        let res = tokio::select! {\n            res = upload => res,\n            _ = cancel.cancelled() => return Err(TimeoutOrCancel::Cancel.into()),\n        };\n\n        if let Ok(inner) = &res {\n            let started_at = ScopeGuard::into_inner(started_at);\n            crate::metrics::BUCKET_METRICS\n                .req_seconds\n                .observe_elapsed(kind, inner, started_at);\n        }\n        match res {\n            Ok(Ok(res)) => {\n                if !res.status().is_success() {\n                    match res.status() {\n                        _ => Err(anyhow::anyhow!(\"GCS PUT error \\n\\t {:?}\", res)),\n                    }\n                } else {\n                    let body = res\n                        .text()\n                        .await\n                        .map_err(|e: reqwest::Error| DownloadError::Other(e.into()))?;\n\n                    let resp: GCSObject = serde_json::from_str(&body)\n                        .map_err(|e: serde_json::Error| DownloadError::Other(e.into()))?;\n\n                    if !resp.size.is_some_and(|s| s == fs_size as i64) {\n                        // very unlikely\n                        return Err(anyhow::anyhow!(\n                            \"Boundary string from 'multipart/related' HTTP upload occurred in payload\"\n                        ));\n                    };\n\n                    Ok(())\n                }\n            }\n            Ok(Err(reqw)) => Err(reqw.into()),\n            Err(_timeout) => Err(TimeoutOrCancel::Timeout.into()),\n        }\n    }\n\n    async fn delete_oids(\n        &self,\n        delete_objects: &[String],\n        cancel: &CancellationToken,\n        _permit: &tokio::sync::SemaphorePermit<'_>,\n    ) -> anyhow::Result<()> {\n        let kind = RequestKind::Delete;\n        let mut cancel = std::pin::pin!(cancel.cancelled());\n\n        for chunk in delete_objects.chunks(MAX_KEYS_PER_DELETE_GCS) {\n            let started_at = start_measuring_requests(kind);\n\n            // Use this to report keys that didn't delete based on 'content_id'\n            let mut delete_objects_status = HashMap::new();\n\n            let mut form = reqwest::multipart::Form::new();\n            let bulk_uri = \"https://storage.googleapis.com/batch/storage/v1\";\n\n            for (index, path) in delete_objects.iter().enumerate() {\n                delete_objects_status.insert(index + 1, path.clone());\n\n                let path_to_delete: String =\n                    url::form_urlencoded::byte_serialize(path.trim_start_matches(\"/\").as_bytes())\n                        .collect();\n\n                let delete_req = format!(\n                    \"\n                    DELETE /storage/v1/b/{}/o/{} HTTP/1.1\\r\\n\\\n                    Content-Type: application/json\\r\\n\\\n                    accept: application/json\\r\\n\\\n                    content-length: 0\\r\\n\n                    \",\n                    self.bucket_name.clone(),\n                    path_to_delete\n                )\n                .trim()\n                .to_string();\n\n                let content_id = format!(\"<{}+{}>\", Uuid::new_v4(), index + 1);\n\n                let mut part_headers = header::HeaderMap::new();\n                part_headers.insert(\n                    header::CONTENT_TYPE,\n                    header::HeaderValue::from_static(\"application/http\"),\n                );\n                part_headers.insert(\n                    header::TRANSFER_ENCODING,\n                    header::HeaderValue::from_static(\"binary\"),\n                );\n                part_headers.insert(\n                    header::HeaderName::from_static(\"content-id\"),\n                    header::HeaderValue::from_str(&content_id)?,\n                );\n                let part = reqwest::multipart::Part::text(delete_req).headers(part_headers);\n\n                form = form.part(format!(\"request-{}\", index), part);\n            }\n\n            let mut headers = header::HeaderMap::new();\n            headers.insert(\n                header::CONTENT_TYPE,\n                header::HeaderValue::from_str(&format!(\n                    \"multipart/mixed; boundary={}\",\n                    form.boundary()\n                ))?,\n            );\n\n            let req = Client::new()\n                .post(bulk_uri)\n                .bearer_auth(self.token_provider.token(GCS_SCOPES).await?.as_str())\n                .multipart(form)\n                .headers(headers)\n                .send();\n\n            let resp = tokio::select! {\n                resp = req => resp,\n                _ = tokio::time::sleep(self.timeout) => return Err(TimeoutOrCancel::Timeout.into()),\n                _ = &mut cancel => return Err(TimeoutOrCancel::Cancel.into()),\n            };\n\n            let started_at = ScopeGuard::into_inner(started_at);\n            crate::metrics::BUCKET_METRICS\n                .req_seconds\n                .observe_elapsed(kind, &resp, started_at);\n\n            let resp = resp.context(\"request deletion\")?;\n\n            crate::metrics::BUCKET_METRICS\n                .deleted_objects_total\n                .inc_by(chunk.len() as u64);\n\n            let res_headers = resp.headers().to_owned();\n\n            let boundary = res_headers\n                .get(header::CONTENT_TYPE)\n                .unwrap()\n                .to_str()?\n                .split(\"=\")\n                .last()\n                .unwrap();\n\n            let res_body = resp.text().await?;\n\n            let parsed: HashMap<String, String> = res_body\n                .split(&format!(\"--{}\", boundary))\n                .filter_map(|c| {\n                    let mut lines = c.lines();\n\n                    let id = lines.find_map(|line| {\n                        line.strip_prefix(\"Content-ID:\")\n                            .and_then(|suf| suf.split('+').last())\n                            .and_then(|suf| suf.split('>').next())\n                            .map(|x| x.trim().to_string())\n                    });\n\n                    let status_code = lines.find_map(|line| {\n                        // Not sure if this protocol version shouldn't be so specific\n                        line.strip_prefix(\"HTTP/1.1\")\n                            .and_then(|x| x.split_whitespace().next())\n                            .map(|x| x.trim().to_string())\n                    });\n\n                    id.zip(status_code)\n                })\n                .collect();\n\n            // Gather failures\n            let errors: HashMap<usize, &String> = parsed\n                .iter()\n                .filter_map(|(x, y)| {\n                    let id = x.parse::<usize>().ok();\n                    if y == \"404\" {\n                        // GCS returns Error on 404, S3 doesn't. Warn and omit from failed count.\n                        // https://cloud.google.com/storage/docs/xml-api/delete-object\n                        tracing::warn!(\n                            \"DeleteObjects key {} {} NotFound. Already deleted.\",\n                            delete_objects_status.get(&id?).unwrap(),\n                            y\n                        );\n                        None\n                    } else if y.chars().next() != Some('2') {\n                        id.map(|v| (v, y))\n                    } else {\n                        None\n                    }\n                })\n                .collect();\n\n            if !errors.is_empty() {\n                // Report 10 of them like S3\n                const LOG_UP_TO_N_ERRORS: usize = 10;\n                for (id, code) in errors.iter().take(LOG_UP_TO_N_ERRORS) {\n                    tracing::warn!(\n                        \"DeleteObjects key {} failed with code: {}\",\n                        delete_objects_status.get(id).unwrap(),\n                        code\n                    );\n                }\n\n                return Err(anyhow::anyhow!(\n                    \"Failed to delete {}/{} objects\",\n                    errors.len(),\n                    chunk.len(),\n                ));\n            }\n        }\n\n        Ok(())\n    }\n\n    async fn head_object(\n        &self,\n        key: String,\n        cancel: &CancellationToken,\n    ) -> Result<GCSObject, DownloadError> {\n        let kind = RequestKind::Head;\n        let _permit = self.permit(kind, cancel).await?;\n\n        let encoded_path: String = url::form_urlencoded::byte_serialize(key.as_bytes()).collect();\n\n        let metadata_uri_mod = \"alt=json\";\n        let download_uri = format!(\n            \"https://storage.googleapis.com/storage/v1/b/{}/o/{}?{}\",\n            self.bucket_name.clone(),\n            encoded_path,\n            metadata_uri_mod\n        );\n\n        let head_future = Client::new()\n            .get(download_uri)\n            .bearer_auth(\n                self.token_provider\n                    .token(GCS_SCOPES)\n                    .await\n                    .map_err(|e: gcp_auth::Error| DownloadError::Other(e.into()))?\n                    .as_str(),\n            )\n            .send();\n\n        let started_at = start_measuring_requests(kind);\n\n        let head_future = tokio::time::timeout(self.timeout, head_future);\n\n        let res = tokio::select! {\n            res = head_future => res,\n            _ = cancel.cancelled() => return Err(TimeoutOrCancel::Cancel.into()),\n        };\n\n        let res = res.map_err(|_e| DownloadError::Timeout)?;\n\n        // do not incl. timeouts as errors in metrics but cancellations\n        let started_at = ScopeGuard::into_inner(started_at);\n        crate::metrics::BUCKET_METRICS\n            .req_seconds\n            .observe_elapsed(kind, &res, started_at);\n\n        let data = match res {\n            Ok(data) => {\n                if !data.status().is_success() {\n                    match data.status() {\n                        StatusCode::NOT_FOUND => return Err(DownloadError::NotFound),\n                        _ => {\n                            return Err(DownloadError::Other(anyhow::anyhow!(\n                                \"GCS head response contained no response body\"\n                            )));\n                        }\n                    }\n                } else {\n                    data\n                }\n            }\n            Err(e) => {\n                crate::metrics::BUCKET_METRICS.req_seconds.observe_elapsed(\n                    kind,\n                    AttemptOutcome::Err,\n                    started_at,\n                );\n\n                return Err(DownloadError::Other(\n                    anyhow::Error::new(e).context(\"error in HEAD of GCS object\"),\n                ));\n            }\n        };\n\n        let body = data\n            .text()\n            .await\n            .map_err(|e: reqwest::Error| DownloadError::Other(e.into()))?;\n\n        let resp: GCSObject = serde_json::from_str(&body)\n            .map_err(|e: serde_json::Error| DownloadError::Other(e.into()))?;\n\n        Ok(resp)\n    }\n\n    async fn list_objects_v2(&self, list_uri: String) -> anyhow::Result<reqwest::RequestBuilder> {\n        let res = Client::new()\n            .get(list_uri)\n            .bearer_auth(self.token_provider.token(GCS_SCOPES).await?.as_str());\n        Ok(res)\n    }\n\n    // need a 'bucket', a 'key', and a bytes 'range'.\n    async fn get_object(\n        &self,\n        request: GetObjectRequest,\n        cancel: &CancellationToken,\n    ) -> anyhow::Result<Download, DownloadError> {\n        let kind = RequestKind::Get;\n\n        let permit = self.owned_permit(kind, cancel).await?;\n\n        let started_at = start_measuring_requests(kind);\n\n        let encoded_path: String =\n            url::form_urlencoded::byte_serialize(request.key.as_bytes()).collect();\n\n        /// We do this in two parts:\n        /// 1. Serialize the metadata of the first request to get Etag, last modified, etc\n        /// 2. We do not .await the second request pass on the pinned stream to the 'get_object'\n        ///    caller\n        let metadata_uri_mod = \"alt=json\";\n        let download_uri = format!(\n            \"https://storage.googleapis.com/storage/v1/b/{}/o/{}?{}\",\n            self.bucket_name.clone(),\n            encoded_path,\n            metadata_uri_mod\n        );\n\n        let res = Client::new()\n            .get(download_uri)\n            .bearer_auth(\n                self.token_provider\n                    .token(GCS_SCOPES)\n                    .await\n                    .map_err(|e: gcp_auth::Error| DownloadError::Other(e.into()))?\n                    .as_str(),\n            )\n            .send();\n\n        let obj_metadata = tokio::select! {\n            res = res => res,\n            _ = tokio::time::sleep(self.timeout) => return Err(DownloadError::Timeout),\n            _ = cancel.cancelled() => return Err(DownloadError::Cancelled),\n        };\n\n        let resp = match obj_metadata {\n            Ok(resp) => {\n                if !resp.status().is_success() {\n                    match resp.status() {\n                        StatusCode::NOT_FOUND => return Err(DownloadError::NotFound),\n                        _ => {\n                            return Err(DownloadError::Other(anyhow::anyhow!(\n                                \"GCS GET response contained no response body\"\n                            )));\n                        }\n                    }\n                } else {\n                    resp\n                }\n            }\n            _ => {\n                return Err(DownloadError::Other(anyhow::anyhow!(\"download gcs object\")));\n            }\n        };\n\n        let body = resp\n            .text()\n            .await\n            .map_err(|e: reqwest::Error| DownloadError::Other(e.into()))?;\n\n        let resp: GCSObject = serde_json::from_str(&body)\n            .map_err(|e: serde_json::Error| DownloadError::Other(e.into()))?;\n\n        // 2. Byte Stream request\n        let mut headers = header::HeaderMap::new();\n        let bytes_range = match &request.range {\n           Some(s) => header::HeaderValue::from_str(s).unwrap(),\n           None => header::HeaderValue::from_static(\"bytes=0-\"),\n        };\n        \n        tracing::info!(\n            \"performing object download with {:?} range header\",\n            bytes_range\n        );\n        \n        headers.insert(header::RANGE, bytes_range);\n\n        let encoded_path: String =\n            url::form_urlencoded::byte_serialize(request.key.as_bytes()).collect();\n\n        let stream_uri_mod = \"alt=media\";\n        // See: https://cloud.google.com/storage/docs/streaming-downloads#stream_a_download\n        // REST APIs > JSON API > 1st bullet  point\n        let generation = resp\n            .generation\n            .expect(\"object did not contain generation number\");\n        let generation_mod = format!(\"generation={generation}\");\n        let stream_uri = format!(\n            \"https://storage.googleapis.com/storage/v1/b/{}/o/{}?{}&{}\",\n            self.bucket_name.clone(),\n            encoded_path,\n            stream_uri_mod,\n            generation_mod,\n        );\n\n        let mut req = Client::new()\n            .get(stream_uri)\n            .headers(headers)\n            .bearer_auth(\n                self.token_provider\n                    .token(GCS_SCOPES)\n                    .await\n                    .map_err(|e: gcp_auth::Error| DownloadError::Other(e.into()))?\n                    .as_str(),\n            )\n            .send();\n\n        let get_object = tokio::select! {\n            res = req => res,\n            _ = tokio::time::sleep(self.timeout) => return Err(DownloadError::Timeout),\n            _ = cancel.cancelled() => return Err(DownloadError::Cancelled),\n        };\n\n        let started_at = ScopeGuard::into_inner(started_at);\n\n        let object_output = match get_object {\n            Ok(object_output) => {\n                if !object_output.status().is_success() {\n                    match object_output.status() {\n                        StatusCode::NOT_FOUND => return Err(DownloadError::NotFound),\n                        _ => {\n                            return Err(DownloadError::Other(anyhow::anyhow!(\n                                \"GCS GET response contained no response body\"\n                            )));\n                        }\n                    }\n                } else {\n                    object_output\n                }\n            }\n            Err(e) => {\n                crate::metrics::BUCKET_METRICS.req_seconds.observe_elapsed(\n                    kind,\n                    AttemptOutcome::Err,\n                    started_at,\n                );\n\n                return Err(DownloadError::Other(\n                    anyhow::Error::new(e).context(\"download s3 object\"),\n                ));\n            }\n        };\n\n        let remaining = self.timeout.saturating_sub(started_at.elapsed());\n\n        let metadata = resp.metadata.map(StorageMetadata);\n\n        let etag = resp\n            .etag\n            .ok_or(DownloadError::Other(anyhow::anyhow!(\"Missing ETag header\")))?\n            .into();\n\n        let last_modified: SystemTime = to_system_time(resp.updated).unwrap_or(SystemTime::now());\n\n        // But let data stream pass through\n        Ok(Download {\n            download_stream: Box::pin(object_output.bytes_stream().map(|item| {\n                item.map_err(|e: reqwest::Error| std::io::Error::new(std::io::ErrorKind::Other, e))\n            })),\n            etag,\n            last_modified,\n            metadata,\n        })\n    }\n    \n    async fn copy_object(\n        &self, \n        from: &RemotePath,\n        to: &RemotePath,\n        cancel: &CancellationToken,\n        generation: Option<&String>,\n    ) -> anyhow::Result<reqwest::RequestBuilder> {\n\n        let copy_from_path: String =\n            url::form_urlencoded::byte_serialize(\n                self.relative_path_to_gcs_object(to)\n                    .trim_start_matches(\"/\")\n                    .as_bytes()\n            )\n            .collect();\n        \n        let copy_to_path: String =\n            url::form_urlencoded::byte_serialize(\n                self.relative_path_to_gcs_object(to)\n                    .trim_start_matches(\"/\")\n                    .as_bytes()\n            )\n            .collect();\n       \n       let mut copy_uri = format!(\n           \"https://storage.googleapis.com/storage/v1/b/{}/o/{}/rewriteTo/b/{}/o/{}\",\n           self.bucket_name.clone(),\n           copy_from_path,\n           self.bucket_name.clone(),\n           copy_to_path,\n       );\n      \n       if let Some(gen_id) = generation {\n           copy_uri += gen_id;\n       }        \n\n       Ok(\n           Client::new()\n               .post(copy_uri)\n               .bearer_auth(self.token_provider.token(GCS_SCOPES).await?.as_str())\n               .header(header::CONTENT_TYPE, \"application/json\")\n               .header(header::CONTENT_LENGTH, \"0\")\n       )\n    }\n\n    \n}\n\nimpl RemoteStorage for GCSBucket {\n    // ---------------------------------------\n    // Neon wrappers for GCS client functions\n    // ---------------------------------------\n\n    fn list_streaming(\n        &self,\n        prefix: Option<&RemotePath>,\n        mode: ListingMode,\n        max_keys: Option<NonZeroU32>,\n        cancel: &CancellationToken,\n    ) -> impl Stream<Item = Result<Listing, DownloadError>> {\n        let kind = RequestKind::List;\n\n        let mut max_keys = max_keys.map(|mk| mk.get() as i32);\n\n        let list_prefix = prefix\n            .map(|p| self.relative_path_to_gcs_object(p))\n            .or_else(|| {\n                self.prefix_in_bucket.clone().map(|mut s| {\n                    s.push(REMOTE_STORAGE_PREFIX_SEPARATOR);\n                    s\n                })\n            })\n            .unwrap();\n\n        let request_max_keys = self\n            .max_keys_per_list_response\n            .into_iter()\n            .chain(max_keys.into_iter())\n            .min()\n            // https://cloud.google.com/storage/docs/json_api/v1/objects/list?hl=en#parameters\n            .unwrap_or(1000);\n\n        // We pass URI in to `list_objects_v2` as we'll modify it with `NextPageToken`, hence\n        // `mut`\n        let mut list_uri = format!(\n            \"https://storage.googleapis.com/storage/v1/b/{}/o?prefix={}&maxResults={}\",\n            self.bucket_name.clone(),\n            list_prefix,\n            request_max_keys,\n        );\n\n        // on ListingMode:\n        // https://github.com/neondatabase/neon/blob/edc11253b65e12a10843711bd88ad277511396d7/libs/remote_storage/src/lib.rs#L158C1-L164C2\n        if let ListingMode::WithDelimiter = mode {\n            list_uri.push_str(&format!(\n                \"&delimiter={}\",\n                REMOTE_STORAGE_PREFIX_SEPARATOR.to_string()\n            ));\n        }\n\n        async_stream::stream! {\n\n            let mut continuation_token = None;\n\n            'outer: loop {\n                let started_at = start_measuring_requests(kind);\n\n                let request = self.list_objects_v2(list_uri.clone())\n                    .await\n                    .map_err(DownloadError::Other)?\n                    .send();\n\n                // this is like `await`\n                let response = tokio::select! {\n                    res = request => Ok(res),\n                    _ = tokio::time::sleep(self.timeout) => Err(DownloadError::Timeout),\n                    _ = cancel.cancelled() => Err(DownloadError::Cancelled),\n                }?;\n\n                // just mapping our `Result' error variant's type.\n                let response = response\n                    .context(\"Failed to list GCS prefixes\")\n                    .map_err(DownloadError::Other);\n\n                let started_at = ScopeGuard::into_inner(started_at);\n\n                crate::metrics::BUCKET_METRICS\n                    .req_seconds\n                    .observe_elapsed(kind, &response, started_at);\n\n                let response = match response {\n                    Ok(response) => response,\n                    Err(e) => {\n                        // The error is potentially retryable, so we must rewind the loop after yielding.\n                        yield Err(e);\n                        continue 'outer;\n                    },\n                };\n\n                let body = response.text()\n                    .await\n                    .map_err(|e: reqwest::Error| DownloadError::Other(e.into()))?;\n\n                let resp: GCSListResponse = serde_json::from_str(&body).map_err(|e: serde_json::Error| DownloadError::Other(e.into()))?;\n\n                let prefixes = resp.common_prefixes();\n                let keys = resp.contents();\n\n                tracing::debug!(\"list: {} prefixes, {} keys\", prefixes.len(), keys.len());\n\n                let mut result = Listing::default();\n\n                for res in keys.iter() {\n                    \n                   let last_modified: SystemTime = to_system_time(res.updated.clone()).unwrap_or(SystemTime::now());\n\n                   let size = res.size.unwrap_or(0) as u64;\n\n                   let key = res.name.clone();\n\n                   result.keys.push(\n                        ListingObject{\n                            key: self.gcs_object_to_relative_path(&key),\n                            last_modified,\n                            size\n                        }\n                   );\n\n                   if let Some(mut mk) = max_keys {\n                       assert!(mk > 0);\n                       mk -= 1;\n                       if mk == 0 {\n                          tracing::debug!(\"reached limit set by max_keys\");\n                          yield Ok(result);\n                          break 'outer;\n                       }\n                       max_keys = Some(mk);\n                   };\n                }\n\n                result.prefixes.extend(prefixes.iter().filter_map(|p| {\n                    Some(\n                        self.gcs_object_to_relative_path(\n                            p.trim_end_matches(REMOTE_STORAGE_PREFIX_SEPARATOR)\n                        ),\n                    )\n                }));\n\n                yield Ok(result);\n\n                continuation_token = match resp.next_page_token {\n                    Some(token) => {\n                        list_uri = list_uri + \"&pageToken=\" + &token;\n                        Some(token)\n                    },\n                    None => break\n                }\n            }\n        }\n    }\n    \n    async fn copy(\n        &self,\n        from: &RemotePath,\n        to: &RemotePath,\n        cancel: &CancellationToken,\n    ) -> anyhow::Result<()> {\n        let kind = RequestKind::Copy;\n\n        let _permit = self.permit(kind, cancel).await?;\n\n        let timeout = tokio::time::sleep(self.timeout);\n\n        let started_at = start_measuring_requests(kind);\n        \n        let op = self.copy_object(\n            from,\n            to, \n            cancel,\n            None\n        ).await?.send();\n\n        let res = tokio::select! {\n            res = op => res,\n            _ = timeout => return Err(TimeoutOrCancel::Timeout.into()),\n            _ = cancel.cancelled() => return Err(TimeoutOrCancel::Cancel.into()),\n        };\n\n        let started_at = ScopeGuard::into_inner(started_at);\n        crate::metrics::BUCKET_METRICS\n            .req_seconds\n            .observe_elapsed(kind, &res, started_at);\n\n        res?;\n\n        Ok(())\n    }\n\n\n\n    async fn upload(\n        &self,\n        from: impl Stream<Item = std::io::Result<Bytes>> + Send + Sync + 'static,\n        from_size_bytes: usize,\n        to: &RemotePath,\n        metadata: Option<StorageMetadata>,\n        cancel: &CancellationToken,\n    ) -> anyhow::Result<()> {\n        let kind = RequestKind::Put;\n        let _permit = self.permit(kind, cancel).await?;\n\n        let started_at = start_measuring_requests(kind);\n\n        let upload = self.put_object(from, from_size_bytes, to, cancel, metadata);\n\n        let upload = tokio::time::timeout(self.timeout, upload);\n\n        let res = tokio::select! {\n            res = upload => res,\n            _ = cancel.cancelled() => return Err(TimeoutOrCancel::Cancel.into()),\n        };\n\n        if let Ok(inner) = &res {\n            // do not incl. timeouts as errors in metrics but cancellations\n            let started_at = ScopeGuard::into_inner(started_at);\n            crate::metrics::BUCKET_METRICS\n                .req_seconds\n                .observe_elapsed(kind, inner, started_at);\n        }\n\n        match res {\n            Ok(Ok(_put)) => Ok(()),\n            Ok(Err(sdk)) => {\n                Err(sdk.into())\n            }\n            Err(_timeout) => Err(TimeoutOrCancel::Timeout.into()),\n        }\n    }\n\n    async fn download(\n        &self,\n        from: &RemotePath,\n        opts: &DownloadOpts,\n        cancel: &CancellationToken,\n    ) -> Result<Download, DownloadError> {\n        // if prefix is not none then download file `prefix/from`\n        // if prefix is none then download file `from`\n\n        self.get_object(\n            GetObjectRequest {\n                bucket: self.bucket_name.clone(),\n                key: self\n                    .relative_path_to_gcs_object(from)\n                    .trim_start_matches(\"/\")\n                    .to_string(),\n                etag: opts.etag.as_ref().map(|e| e.to_string()),\n                range: opts.byte_range_header(),\n            },\n            cancel,\n        )\n        .await\n    }\n\n    async fn delete_objects(\n        &self,\n        paths: &[RemotePath],\n        cancel: &CancellationToken,\n    ) -> anyhow::Result<()> {\n        let kind = RequestKind::Delete;\n        let permit = self.permit(kind, cancel).await?;\n\n        let mut delete_objects: Vec<String> = Vec::with_capacity(paths.len());\n\n        let delete_objects: Vec<String> = paths\n            .iter()\n            .map(|i| self.relative_path_to_gcs_object(i))\n            .collect();\n\n        self.delete_oids(&delete_objects, cancel, &permit).await\n    }\n\n    fn max_keys_per_delete(&self) -> usize {\n        MAX_KEYS_PER_DELETE_GCS\n    }\n\n    async fn delete(&self, path: &RemotePath, cancel: &CancellationToken) -> anyhow::Result<()> {\n        let paths = std::array::from_ref(path);\n        self.delete_objects(paths, cancel).await\n    }\n\n    async fn time_travel_recover(\n           &self,\n           prefix: Option<&RemotePath>,\n           timestamp: SystemTime,\n           done_if_after: SystemTime,\n           cancel: &CancellationToken,\n           complexity_limit: Option<NonZeroU32>,\n    ) -> Result<(), TimeTravelError> {\n        \n       let kind = RequestKind::TimeTravel;\n       let permit = self.permit(kind, cancel).await?;\n\n       tracing::trace!(\"Target time: {timestamp:?}, done_if_after {done_if_after:?}\");\n\n       let mode = ListingMode::NoDelimiter;\n       let version_listing = self\n           .list_versions_with_permit(&permit, prefix, mode, complexity_limit, cancel)\n           .await\n           .map_err(|err| match err {\n               DownloadError::Other(e) => TimeTravelError::Other(e),\n               DownloadError::Cancelled => TimeTravelError::Cancelled,\n               other => TimeTravelError::Other(other.into()),\n           })?;\n       let versions_and_deletes = version_listing.versions;\n\n       tracing::info!(\n           \"Built list for time travel with {} versions and deletions\",\n           versions_and_deletes.len()\n       );\n\n       // Work on the list of references instead of the objects directly,\n       // otherwise we get lifetime errors in the sort_by_key call below.\n       let mut versions_and_deletes = versions_and_deletes.iter().collect::<Vec<_>>();\n\n       versions_and_deletes.sort_by_key(|vd| (&vd.key, &vd.last_modified));\n\n       let mut vds_for_key = HashMap::<_, Vec<_>>::new();\n\n       for vd in &versions_and_deletes {\n           let GCSVersion { key, .. } = &vd;\n           if Some(vd.id.0.as_str()) == Some(\"null\") {\n               // TODO: check the behavior of using the SDK on a non-versioned container\n               return Err(TimeTravelError::Other(anyhow::anyhow!(\n                   \"Received ListVersions response for key={key} with version_id='null', \\\n                   indicating either disabled versioning, or legacy objects with null version id values\"\n               )));\n           }\n           tracing::trace!(\"Parsing version key={key} id={:?}\", vd.id);\n           vds_for_key.entry(key).or_default().push(vd);\n       }\n\n       let warn_threshold = 3;\n       let max_retries = 10;\n       let is_permanent = |e: &_| matches!(e, TimeTravelError::Cancelled);\n\n       for (key, versions) in vds_for_key {\n           let last_vd = versions.last().unwrap();\n           let key = self.relative_path_to_gcs_object(key);\n           if last_vd.last_modified > done_if_after {\n               /// Case 1: we have a recent object outside of our restore window.\n               tracing::trace!(\"Key {key} has version later than done_if_after, skipping\");\n               continue;\n           }\n           /// we get index in the array that we want whether its `v` or `e`\n           let version_to_restore_to =\n               match versions.binary_search_by_key(&timestamp, |tpl| tpl.last_modified) {\n                   Ok(v) => v,\n                   Err(e) => e,\n               };\n           \n           let mut do_delete = false;\n           if version_to_restore_to == 0 {\n               // All versions more recent, so the key didn't exist at the specified time point.\n               tracing::trace!(\n                   \"All {} versions more recent for {key}, deleting\",\n                   versions.len()\n               );\n               do_delete = true;\n               \n           } else {\n\n               let GCSVersion {\n                       id: VersionId(version_id),\n                       time_deleted: deletion_timestamp,\n                       ..\n                   } = &versions[version_to_restore_to - 1];\n               \n               // GCS only has 'timeDeleted', not a version object per delete + version. \n               // A version is either replaced by an object or removed -- stomped or dropped.\n               // If `timeDeleted` < `time_travel_timestamp`, obj was removed and ought to be deleted.\n               // If its `None`, that means we have the most current object, no-op.\n               // Else, it was the same as the `updated` / `timeCreated` of the subsequent version, and ought to be restored.\n               match &deletion_timestamp {\n                   \n                   Some(time) => {\n                       \n                       if time < &timestamp  {\n                          // Case 2: version was last marked deleted before `timestamp`\n                          do_delete = true;\n                          \n                       } else {\n                \n                          // Case 3:  restore state to this version via `copy_object`\n                          tracing::trace!(\"Copying old version {version_id} for {key}...\");\n                          \n                          let source_id =\n                              format!(\"?sourceGeneration={version_id}\");\n\n                          backoff::retry(\n                              || async {\n                                  \n                                  let key_path = self.gcs_object_to_relative_path(&key);\n                                  \n                                  let op = self.copy_object(\n                                      &key_path,\n                                      &key_path,\n                                      cancel,\n                                      Some(&source_id),\n                                  ).await.map_err(|e| TimeTravelError::Other(e.into()))?\n                                  .send();\n                                  \n                                  tokio::select! {\n                                      res = op => res.map_err(|e| TimeTravelError::Other(e.into())),\n                                      _ = cancel.cancelled() => Err(TimeTravelError::Cancelled),\n                                  }\n                              },\n                              is_permanent,\n                              warn_threshold,\n                              max_retries,\n                              \"copying object version for time_travel_recover\",\n                              cancel,\n                          )\n                          .await\n                          .ok_or_else(|| TimeTravelError::Cancelled)\n                          .and_then(|x| {x})?;\n                          tracing::info!(%version_id, %key, \"Copied old version in GCS\");            \n                       }\n                   },\n                   _ => {\n                        tracing::info!(\"most current object version, skipping\");\n                   }\n               }\n           };\n           if do_delete {\n               tracing::trace!(\"Deleting {key}...\");\n               self.delete_oids(&[key], cancel, &permit)\n                   .await\n                   .map_err(|e| {\n                       // delete_oid0 will use TimeoutOrCancel\n                       if TimeoutOrCancel::caused_by_cancel(&e) {\n                           TimeTravelError::Cancelled\n                       } else {\n                           TimeTravelError::Other(e)\n                       }\n               })?;\n           }\n       }\n       Ok(())\n    }\n    async fn head_object(\n        &self,\n        key: &RemotePath,\n        cancel: &CancellationToken,\n    ) -> Result<ListingObject, DownloadError> {\n        let path = self\n            .relative_path_to_gcs_object(key)\n            .trim_start_matches(\"/\")\n            .to_string();\n\n        let resp = self.head_object(path.clone(), cancel).await?;\n\n        let last_modified: SystemTime = to_system_time(resp.updated).unwrap_or(SystemTime::now());\n\n        let Some(size) = resp.size else {\n            return Err(DownloadError::Other(anyhow::anyhow!(\n                \"Missing size (content length) header\"\n            )));\n        };\n\n        Ok(ListingObject {\n            key: self.gcs_object_to_relative_path(&path),\n            last_modified,\n            size: size as u64,\n        })\n    }\n\n    async fn list_versions(\n        &self,\n        prefix: Option<&RemotePath>,\n        mode: ListingMode,\n        max_keys: Option<NonZeroU32>,\n        cancel: &CancellationToken,\n    ) -> Result<crate::VersionListing, DownloadError> {\n        let kind = RequestKind::ListVersions;\n        let permit = self.permit(kind, cancel).await?;\n        Ok(\n            self.list_versions_with_permit(&permit, prefix, mode, max_keys, cancel)\n            .await?.into()\n        )\n    }\n}\n\n// ---------\n\n#[derive(Serialize, Deserialize, Debug)]\n#[serde(rename_all = \"snake_case\")]\npub struct GCSListResponse {\n    #[serde(rename = \"nextPageToken\")]\n    pub next_page_token: Option<String>,\n    pub items: Option<Vec<GCSObject>>,\n    pub prefixes: Option<Vec<String>>,\n}\n\nfn de_from_str<'de, D>(deserializer: D) -> Result<Option<i64>, D::Error>\nwhere\n    D: Deserializer<'de>,\n{\n    let s = Option::<String>::deserialize(deserializer)?;\n    match s {\n        Some(s) => i64::from_str(&s).map(Some).map_err(de::Error::custom),\n        None => Ok(None),\n    }\n}\n\n#[derive(Serialize, Deserialize, Debug)]\n#[serde(rename_all = \"snake_case\")]\npub struct GCSObject {\n    pub name: String,\n    pub bucket: String,\n    pub generation: Option<String>,\n    pub metageneration: String,\n    #[serde(rename = \"contentType\")]\n    pub content_type: Option<String>,\n    #[serde(rename = \"storageClass\")]\n    pub storage_class: String,\n    #[serde(deserialize_with = \"de_from_str\")]\n    pub size: Option<i64>,\n    #[serde(rename = \"md5Hash\")]\n    pub md5_hash: Option<String>,\n    pub crc32c: String,\n    pub etag: Option<String>,\n    #[serde(rename = \"timeCreated\")]\n    pub time_created: String,\n    pub updated: Option<String>,\n    #[serde(rename = \"timeStorageClassUpdated\")]\n    pub time_storage_class_updated: String,\n    #[serde(rename = \"timeDeleted\")]\n    pub time_deleted: Option<String>,\n    #[serde(rename = \"timeFinalized\")]\n    pub time_finalized: String,\n    pub metadata: Option<HashMap<String, String>>,\n}\n\nimpl GCSListResponse {\n    pub fn contents(&self) -> &[GCSObject] {\n        self.items.as_deref().unwrap_or_default()\n    }\n    pub fn common_prefixes(&self) -> &[String] {\n        self.prefixes.as_deref().unwrap_or_default()\n    }\n}\n"
  },
  {
    "path": "libs/remote_storage/src/lib.rs",
    "content": "//! A set of generic storage abstractions for the page server to use when backing up and restoring its state from the external storage.\n//! No other modules from this tree are supposed to be used directly by the external code.\n//!\n//! [`RemoteStorage`] trait a CRUD-like generic abstraction to use for adapting external storages with a few implementations:\n//!   * [`local_fs`] allows to use local file system as an external storage\n//!   * [`s3_bucket`] uses AWS S3 bucket as an external storage\n//!   * [`azure_blob`] allows to use Azure Blob storage as an external storage\n//!\n#![deny(unsafe_code)]\n#![deny(clippy::undocumented_unsafe_blocks)]\n\nmod azure_blob;\nmod config;\nmod error;\nmod gcs_bucket;\nmod local_fs;\nmod metrics;\nmod s3_bucket;\nmod simulate_failures;\nmod support;\n\nuse std::collections::HashMap;\nuse std::fmt::Debug;\nuse std::num::NonZeroU32;\nuse std::ops::Bound;\nuse std::pin::{Pin, pin};\nuse std::sync::Arc;\nuse std::time::SystemTime;\n\nuse anyhow::Context;\n/// Azure SDK's ETag type is a simple String wrapper: we use this internally instead of repeating it here.\npub use azure_core::Etag;\nuse bytes::Bytes;\nuse camino::{Utf8Path, Utf8PathBuf};\npub use config::TypedRemoteStorageKind;\npub use error::{DownloadError, TimeTravelError, TimeoutOrCancel};\nuse futures::StreamExt;\nuse futures::stream::Stream;\nuse itertools::Itertools as _;\nuse s3_bucket::RequestKind;\nuse serde::{Deserialize, Serialize};\nuse tokio::sync::Semaphore;\nuse tokio_util::sync::CancellationToken;\nuse tracing::info;\n\npub use self::azure_blob::AzureBlobStorage;\npub use self::gcs_bucket::GCSBucket;\npub use self::local_fs::LocalFs;\npub use self::s3_bucket::S3Bucket;\npub use self::simulate_failures::UnreliableWrapper;\npub use crate::config::{AzureConfig, GCSConfig, RemoteStorageConfig, RemoteStorageKind, S3Config};\n\n/// Default concurrency limit for S3 operations\n///\n/// Currently, sync happens with AWS S3, that has two limits on requests per second:\n/// ~200 RPS for IAM services\n/// <https://docs.aws.amazon.com/AmazonRDS/latest/AuroraUserGuide/UsingWithRDS.IAMDBAuth.html>\n/// ~3500 PUT/COPY/POST/DELETE or 5500 GET/HEAD S3 requests\n/// <https://aws.amazon.com/premiumsupport/knowledge-center/s3-request-limit-avoid-throttling/>\npub const DEFAULT_REMOTE_STORAGE_S3_CONCURRENCY_LIMIT: usize = 100;\n/// Set this limit analogously to the S3 limit\n///\n/// Here, a limit of max 20k concurrent connections was noted.\n/// <https://learn.microsoft.com/en-us/answers/questions/1301863/is-there-any-limitation-to-concurrent-connections>\npub const DEFAULT_REMOTE_STORAGE_AZURE_CONCURRENCY_LIMIT: usize = 100;\n/// Set this limit analogously to the S3 limit.\n///\n/// The local filesystem backend doesn't enforce a concurrency limit itself, but this also bounds\n/// the upload queue concurrency. Some tests create thousands of uploads, which slows down the\n/// quadratic scheduling of the upload queue, and there is no point spawning so many Tokio tasks.\npub const DEFAULT_REMOTE_STORAGE_LOCALFS_CONCURRENCY_LIMIT: usize = 100;\n/// No limits on the client side, which currenltly means 1000 for AWS S3.\n/// <https://docs.aws.amazon.com/AmazonS3/latest/API/API_ListObjectsV2.html#API_ListObjectsV2_RequestSyntax>\npub const DEFAULT_MAX_KEYS_PER_LIST_RESPONSE: Option<i32> = None;\n\n/// As defined in S3 docs\n///\n/// <https://docs.aws.amazon.com/AmazonS3/latest/API/API_DeleteObjects.html>\npub const MAX_KEYS_PER_DELETE_S3: usize = 1000;\n\n/// As defined in Azure docs\n///\n/// <https://learn.microsoft.com/en-us/rest/api/storageservices/blob-batch>\npub const MAX_KEYS_PER_DELETE_AZURE: usize = 256;\n\npub const MAX_KEYS_PER_DELETE_GCS: usize = 1000;\n\nconst REMOTE_STORAGE_PREFIX_SEPARATOR: char = '/';\n\nconst GCS_SCOPES: &[&str] = &[\"https://www.googleapis.com/auth/cloud-platform\"];\n\n/// Path on the remote storage, relative to some inner prefix.\n/// The prefix is an implementation detail, that allows representing local paths\n/// as the remote ones, stripping the local storage prefix away.\n#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]\npub struct RemotePath(Utf8PathBuf);\n\nimpl Serialize for RemotePath {\n    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>\n    where\n        S: serde::Serializer,\n    {\n        serializer.collect_str(self)\n    }\n}\n\nimpl<'de> Deserialize<'de> for RemotePath {\n    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>\n    where\n        D: serde::Deserializer<'de>,\n    {\n        let str = String::deserialize(deserializer)?;\n        Ok(Self(Utf8PathBuf::from(&str)))\n    }\n}\n\nimpl std::fmt::Display for RemotePath {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        std::fmt::Display::fmt(&self.0, f)\n    }\n}\n\nimpl RemotePath {\n    pub fn new(relative_path: &Utf8Path) -> anyhow::Result<Self> {\n        anyhow::ensure!(\n            relative_path.is_relative(),\n            \"Path {relative_path:?} is not relative\"\n        );\n        Ok(Self(relative_path.to_path_buf()))\n    }\n\n    pub fn from_string(relative_path: &str) -> anyhow::Result<Self> {\n        Self::new(Utf8Path::new(relative_path))\n    }\n\n    pub fn with_base(&self, base_path: &Utf8Path) -> Utf8PathBuf {\n        base_path.join(&self.0)\n    }\n\n    pub fn object_name(&self) -> Option<&str> {\n        self.0.file_name()\n    }\n\n    pub fn join(&self, path: impl AsRef<Utf8Path>) -> Self {\n        Self(self.0.join(path))\n    }\n\n    pub fn get_path(&self) -> &Utf8PathBuf {\n        &self.0\n    }\n\n    pub fn strip_prefix(&self, p: &RemotePath) -> Result<&Utf8Path, std::path::StripPrefixError> {\n        self.0.strip_prefix(&p.0)\n    }\n\n    pub fn add_trailing_slash(&self) -> Self {\n        // Unwrap safety inputs are guararnteed to be valid UTF-8\n        Self(format!(\"{}/\", self.0).try_into().unwrap())\n    }\n}\n\n/// We don't need callers to be able to pass arbitrary delimiters: just control\n/// whether listings will use a '/' separator or not.\n///\n/// The WithDelimiter mode will populate `prefixes` and `keys` in the result.  The\n/// NoDelimiter mode will only populate `keys`.\n#[derive(Copy, Clone)]\npub enum ListingMode {\n    WithDelimiter,\n    NoDelimiter,\n}\n\n#[derive(PartialEq, Eq, Debug, Clone)]\npub struct ListingObject {\n    pub key: RemotePath,\n    pub last_modified: SystemTime,\n    pub size: u64,\n}\n\n#[derive(Default)]\npub struct Listing {\n    pub prefixes: Vec<RemotePath>,\n    pub keys: Vec<ListingObject>,\n}\n\n#[derive(Default)]\npub struct VersionListing {\n    pub versions: Vec<Version>,\n}\n\n#[derive(Debug)]\npub struct Version {\n    pub key: RemotePath,\n    pub last_modified: SystemTime,\n    pub kind: VersionKind,\n}\n\nimpl Version {\n    pub fn version_id(&self) -> Option<&VersionId> {\n        match &self.kind {\n            VersionKind::Version(id) => Some(id),\n            VersionKind::DeletionMarker => None,\n        }\n    }\n}\n\n#[derive(Debug)]\npub enum VersionKind {\n    DeletionMarker,\n    Version(VersionId),\n}\n\n// I was going to do an `enum GenericVersion` but this feels cleaner.\n#[derive(Default)]\npub struct GCSVersionListing {\n    pub versions: Vec<GCSVersion>,\n}\n\n#[derive(Debug)]\npub struct GCSVersion {\n    pub key: RemotePath,\n    pub last_modified: SystemTime,\n    pub id: VersionId,\n    pub time_deleted: Option<SystemTime>,\n}\n\nimpl From<GCSVersionListing> for VersionListing {\n    fn from(gcs_listing: GCSVersionListing) -> Self {\n        let version_listing = gcs_listing\n            .versions\n            .into_iter()\n            .map(\n                |GCSVersion {\n                     key,\n                     last_modified,\n                     id,\n                     ..\n                 }| {\n                    Version {\n                        key,\n                        last_modified,\n                        kind: VersionKind::Version(VersionId(id.0)),\n                    }\n                },\n            )\n            .collect::<Vec<Version>>();\n\n        VersionListing {\n            versions: version_listing,\n        }\n    }\n}\n\n/// Options for downloads. The default value is a plain GET.\npub struct DownloadOpts {\n    /// If given, returns [`DownloadError::Unmodified`] if the object still has\n    /// the same ETag (using If-None-Match).\n    pub etag: Option<Etag>,\n    /// The start of the byte range to download, or unbounded.\n    pub byte_start: Bound<u64>,\n    /// The end of the byte range to download, or unbounded. Must be after the\n    /// start bound.\n    pub byte_end: Bound<u64>,\n    /// Optionally request a specific version of a key\n    pub version_id: Option<VersionId>,\n    /// Indicate whether we're downloading something small or large: this indirectly controls\n    /// timeouts: for something like an index/manifest/heatmap, we should time out faster than\n    /// for layer files\n    pub kind: DownloadKind,\n}\n\npub enum DownloadKind {\n    Large,\n    Small,\n}\n\n#[derive(Debug, Clone)]\npub struct VersionId(pub String);\n\nimpl Default for DownloadOpts {\n    fn default() -> Self {\n        Self {\n            etag: Default::default(),\n            byte_start: Bound::Unbounded,\n            byte_end: Bound::Unbounded,\n            version_id: None,\n            kind: DownloadKind::Large,\n        }\n    }\n}\n\nimpl DownloadOpts {\n    /// Returns the byte range with inclusive start and exclusive end, or None\n    /// if unbounded.\n    pub fn byte_range(&self) -> Option<(u64, Option<u64>)> {\n        if self.byte_start == Bound::Unbounded && self.byte_end == Bound::Unbounded {\n            return None;\n        }\n        let start = match self.byte_start {\n            Bound::Excluded(i) => i + 1,\n            Bound::Included(i) => i,\n            Bound::Unbounded => 0,\n        };\n        let end = match self.byte_end {\n            Bound::Excluded(i) => Some(i),\n            Bound::Included(i) => Some(i + 1),\n            Bound::Unbounded => None,\n        };\n        if let Some(end) = end {\n            assert!(start < end, \"range end {end} at or before start {start}\");\n        }\n        Some((start, end))\n    }\n\n    /// Returns the byte range as an RFC 2616 Range header value with inclusive\n    /// bounds, or None if unbounded.\n    pub fn byte_range_header(&self) -> Option<String> {\n        self.byte_range()\n            .map(|(start, end)| (start, end.map(|end| end - 1))) // make end inclusive\n            .map(|(start, end)| match end {\n                Some(end) => format!(\"bytes={start}-{end}\"),\n                None => format!(\"bytes={start}-\"),\n            })\n    }\n}\n\n/// Storage (potentially remote) API to manage its state.\n/// This storage tries to be unaware of any layered repository context,\n/// providing basic CRUD operations for storage files.\n#[allow(async_fn_in_trait)]\npub trait RemoteStorage: Send + Sync + 'static {\n    /// List objects in remote storage, with semantics matching AWS S3's [`ListObjectsV2`].\n    ///\n    /// The stream is guaranteed to return at least one element, even in the case of errors\n    /// (in that case it's an `Err()`), or an empty `Listing`.\n    ///\n    /// The stream is not ending if it returns an error, as long as [`is_permanent`] returns false on the error.\n    /// The `next` function can be retried, and maybe in a future retry, there will be success.\n    ///\n    /// Note that the prefix is relative to any `prefix_in_bucket` configured for the client, not\n    /// from the absolute root of the bucket.\n    ///\n    /// `mode` configures whether to use a delimiter.  Without a delimiter, all keys\n    /// within the prefix are listed in the `keys` of the result.  With a delimiter, any \"directories\" at the top level of\n    /// the prefix are returned in the `prefixes` of the result, and keys in the top level of the prefix are\n    /// returned in `keys` ().\n    ///\n    /// `max_keys` controls the maximum number of keys that will be returned.  If this is None, this function\n    /// will iteratively call listobjects until it runs out of keys.  Note that this is not safe to use on\n    /// unlimted size buckets, as the full list of objects is allocated into a monolithic data structure.\n    ///\n    /// [`ListObjectsV2`]: <https://docs.aws.amazon.com/AmazonS3/latest/API/API_ListObjectsV2.html>\n    /// [`is_permanent`]: DownloadError::is_permanent\n    fn list_streaming(\n        &self,\n        prefix: Option<&RemotePath>,\n        mode: ListingMode,\n        max_keys: Option<NonZeroU32>,\n        cancel: &CancellationToken,\n    ) -> impl Stream<Item = Result<Listing, DownloadError>> + Send;\n\n    async fn list(\n        &self,\n        prefix: Option<&RemotePath>,\n        mode: ListingMode,\n        max_keys: Option<NonZeroU32>,\n        cancel: &CancellationToken,\n    ) -> Result<Listing, DownloadError> {\n        let mut stream = pin!(self.list_streaming(prefix, mode, max_keys, cancel));\n        let mut combined = stream.next().await.expect(\"At least one item required\")?;\n        while let Some(list) = stream.next().await {\n            let list = list?;\n            combined.keys.extend(list.keys.into_iter());\n            combined.prefixes.extend_from_slice(&list.prefixes);\n        }\n        Ok(combined)\n    }\n\n    async fn list_versions(\n        &self,\n        prefix: Option<&RemotePath>,\n        mode: ListingMode,\n        max_keys: Option<NonZeroU32>,\n        cancel: &CancellationToken,\n    ) -> Result<VersionListing, DownloadError>;\n\n    /// Obtain metadata information about an object.\n    async fn head_object(\n        &self,\n        key: &RemotePath,\n        cancel: &CancellationToken,\n    ) -> Result<ListingObject, DownloadError>;\n\n    /// Streams the local file contents into remote into the remote storage entry.\n    ///\n    /// If the operation fails because of timeout or cancellation, the root cause of the error will be\n    /// set to `TimeoutOrCancel`.\n    async fn upload(\n        &self,\n        from: impl Stream<Item = std::io::Result<Bytes>> + Send + Sync + 'static,\n        // S3 PUT request requires the content length to be specified,\n        // otherwise it starts to fail with the concurrent connection count increasing.\n        data_size_bytes: usize,\n        to: &RemotePath,\n        metadata: Option<StorageMetadata>,\n        cancel: &CancellationToken,\n    ) -> anyhow::Result<()>;\n\n    /// Streams the remote storage entry contents.\n    ///\n    /// The returned download stream will obey initial timeout and cancellation signal by erroring\n    /// on whichever happens first. Only one of the reasons will fail the stream, which is usually\n    /// enough for `tokio::io::copy_buf` usage. If needed the error can be filtered out.\n    ///\n    /// Returns the metadata, if any was stored with the file previously.\n    async fn download(\n        &self,\n        from: &RemotePath,\n        opts: &DownloadOpts,\n        cancel: &CancellationToken,\n    ) -> Result<Download, DownloadError>;\n\n    /// Delete a single path from remote storage.\n    ///\n    /// If the operation fails because of timeout or cancellation, the root cause of the error will be\n    /// set to `TimeoutOrCancel`. In such situation it is unknown if the deletion went through.\n    async fn delete(&self, path: &RemotePath, cancel: &CancellationToken) -> anyhow::Result<()>;\n\n    /// Delete a multiple paths from remote storage.\n    ///\n    /// If the operation fails because of timeout or cancellation, the root cause of the error will be\n    /// set to `TimeoutOrCancel`. In such situation it is unknown which deletions, if any, went\n    /// through.\n    async fn delete_objects(\n        &self,\n        paths: &[RemotePath],\n        cancel: &CancellationToken,\n    ) -> anyhow::Result<()>;\n\n    /// Returns the maximum number of keys that a call to [`Self::delete_objects`] can delete without chunking\n    ///\n    /// The value returned is only an optimization hint, One can pass larger number of objects to\n    /// `delete_objects` as well.\n    ///\n    /// The value is guaranteed to be >= 1.\n    fn max_keys_per_delete(&self) -> usize;\n\n    /// Deletes all objects matching the given prefix.\n    ///\n    /// NB: this uses NoDelimiter and will match partial prefixes. For example, the prefix /a/b will\n    /// delete /a/b, /a/b/*, /a/bc, /a/bc/*, etc.\n    ///\n    /// If the operation fails because of timeout or cancellation, the root cause of the error will\n    /// be set to `TimeoutOrCancel`. In such situation it is unknown which deletions, if any, went\n    /// through.\n    async fn delete_prefix(\n        &self,\n        prefix: &RemotePath,\n        cancel: &CancellationToken,\n    ) -> anyhow::Result<()> {\n        let mut stream =\n            pin!(self.list_streaming(Some(prefix), ListingMode::NoDelimiter, None, cancel));\n        while let Some(result) = stream.next().await {\n            let keys = match result {\n                Ok(listing) if listing.keys.is_empty() => continue,\n                Ok(listing) => listing.keys.into_iter().map(|o| o.key).collect_vec(),\n                Err(DownloadError::Cancelled) => return Err(TimeoutOrCancel::Cancel.into()),\n                Err(DownloadError::Timeout) => return Err(TimeoutOrCancel::Timeout.into()),\n                Err(err) => return Err(err.into()),\n            };\n            tracing::info!(\"Deleting {} keys from remote storage\", keys.len());\n            self.delete_objects(&keys, cancel).await?;\n        }\n        Ok(())\n    }\n\n    /// Copy a remote object inside a bucket from one path to another.\n    async fn copy(\n        &self,\n        from: &RemotePath,\n        to: &RemotePath,\n        cancel: &CancellationToken,\n    ) -> anyhow::Result<()>;\n\n    /// Resets the content of everything with the given prefix to the given state\n    async fn time_travel_recover(\n        &self,\n        prefix: Option<&RemotePath>,\n        timestamp: SystemTime,\n        done_if_after: SystemTime,\n        cancel: &CancellationToken,\n        complexity_limit: Option<NonZeroU32>,\n    ) -> Result<(), TimeTravelError>;\n}\n\n/// Data part of an ongoing [`Download`].\n///\n/// `DownloadStream` is sensitive to the timeout and cancellation used with the original\n/// [`RemoteStorage::download`] request. The type yields `std::io::Result<Bytes>` to be compatible\n/// with `tokio::io::copy_buf`.\n// This has 'static because safekeepers do not use cancellation tokens (yet)\npub type DownloadStream =\n    Pin<Box<dyn Stream<Item = std::io::Result<Bytes>> + Send + Sync + 'static>>;\n\npub struct Download {\n    pub download_stream: DownloadStream,\n    /// The last time the file was modified (`last-modified` HTTP header)\n    pub last_modified: SystemTime,\n    /// A way to identify this specific version of the resource (`etag` HTTP header)\n    pub etag: Etag,\n    /// Extra key-value data, associated with the current remote file.\n    pub metadata: Option<StorageMetadata>,\n}\n\nimpl Debug for Download {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        f.debug_struct(\"Download\")\n            .field(\"metadata\", &self.metadata)\n            .finish()\n    }\n}\n\n/// Every storage, currently supported.\n/// Serves as a simple way to pass around the [`RemoteStorage`] without dealing with generics.\n// Require Clone for `Other` due to https://github.com/rust-lang/rust/issues/26925\n#[derive(Clone)]\npub enum GenericRemoteStorage<Other: Clone = Arc<UnreliableWrapper>> {\n    LocalFs(LocalFs),\n    AwsS3(Arc<S3Bucket>),\n    AzureBlob(Arc<AzureBlobStorage>),\n    Unreliable(Other),\n    GCS(Arc<GCSBucket>),\n}\n\nimpl<Other: RemoteStorage> GenericRemoteStorage<Arc<Other>> {\n    // See [`RemoteStorage::list`].\n    pub async fn list(\n        &self,\n        prefix: Option<&RemotePath>,\n        mode: ListingMode,\n        max_keys: Option<NonZeroU32>,\n        cancel: &CancellationToken,\n    ) -> Result<Listing, DownloadError> {\n        match self {\n            Self::LocalFs(s) => s.list(prefix, mode, max_keys, cancel).await,\n            Self::AwsS3(s) => s.list(prefix, mode, max_keys, cancel).await,\n            Self::AzureBlob(s) => s.list(prefix, mode, max_keys, cancel).await,\n            Self::Unreliable(s) => s.list(prefix, mode, max_keys, cancel).await,\n            Self::GCS(s) => s.list(prefix, mode, max_keys, cancel).await,\n        }\n    }\n\n    // See [`RemoteStorage::list_streaming`].\n    pub fn list_streaming<'a>(\n        &'a self,\n        prefix: Option<&'a RemotePath>,\n        mode: ListingMode,\n        max_keys: Option<NonZeroU32>,\n        cancel: &'a CancellationToken,\n    ) -> impl Stream<Item = Result<Listing, DownloadError>> + 'a + Send {\n        match self {\n            Self::LocalFs(s) => Box::pin(s.list_streaming(prefix, mode, max_keys, cancel))\n                as Pin<Box<dyn Stream<Item = Result<Listing, DownloadError>> + Send>>,\n            Self::AwsS3(s) => Box::pin(s.list_streaming(prefix, mode, max_keys, cancel)),\n            Self::AzureBlob(s) => Box::pin(s.list_streaming(prefix, mode, max_keys, cancel)),\n            Self::Unreliable(s) => Box::pin(s.list_streaming(prefix, mode, max_keys, cancel)),\n            Self::GCS(s) => Box::pin(s.list_streaming(prefix, mode, max_keys, cancel)),\n        }\n    }\n\n    // See [`RemoteStorage::list_versions`].\n    pub async fn list_versions<'a>(\n        &'a self,\n        prefix: Option<&'a RemotePath>,\n        mode: ListingMode,\n        max_keys: Option<NonZeroU32>,\n        cancel: &'a CancellationToken,\n    ) -> Result<VersionListing, DownloadError> {\n        match self {\n            Self::LocalFs(s) => s.list_versions(prefix, mode, max_keys, cancel).await,\n            Self::AwsS3(s) => s.list_versions(prefix, mode, max_keys, cancel).await,\n            Self::AzureBlob(s) => s.list_versions(prefix, mode, max_keys, cancel).await,\n            Self::Unreliable(s) => s.list_versions(prefix, mode, max_keys, cancel).await,\n            Self::GCS(s) => s.list_versions(prefix, mode, max_keys, cancel).await,\n        }\n    }\n\n    // See [`RemoteStorage::head_object`].\n    pub async fn head_object(\n        &self,\n        key: &RemotePath,\n        cancel: &CancellationToken,\n    ) -> Result<ListingObject, DownloadError> {\n        match self {\n            Self::LocalFs(s) => s.head_object(key, cancel).await,\n            Self::AwsS3(s) => s.head_object(key, cancel).await,\n            Self::AzureBlob(s) => s.head_object(key, cancel).await,\n            Self::Unreliable(s) => s.head_object(key, cancel).await,\n            Self::GCS(s) => s.head_object(key, cancel).await,\n        }\n    }\n\n    /// See [`RemoteStorage::upload`]\n    pub async fn upload(\n        &self,\n        from: impl Stream<Item = std::io::Result<Bytes>> + Send + Sync + 'static,\n        data_size_bytes: usize,\n        to: &RemotePath,\n        metadata: Option<StorageMetadata>,\n        cancel: &CancellationToken,\n    ) -> anyhow::Result<()> {\n        match self {\n            Self::LocalFs(s) => s.upload(from, data_size_bytes, to, metadata, cancel).await,\n            Self::AwsS3(s) => s.upload(from, data_size_bytes, to, metadata, cancel).await,\n            Self::AzureBlob(s) => s.upload(from, data_size_bytes, to, metadata, cancel).await,\n            Self::Unreliable(s) => s.upload(from, data_size_bytes, to, metadata, cancel).await,\n            Self::GCS(s) => s.upload(from, data_size_bytes, to, metadata, cancel).await,\n        }\n    }\n\n    /// See [`RemoteStorage::download`]\n    pub async fn download(\n        &self,\n        from: &RemotePath,\n        opts: &DownloadOpts,\n        cancel: &CancellationToken,\n    ) -> Result<Download, DownloadError> {\n        match self {\n            Self::LocalFs(s) => s.download(from, opts, cancel).await,\n            Self::AwsS3(s) => s.download(from, opts, cancel).await,\n            Self::AzureBlob(s) => s.download(from, opts, cancel).await,\n            Self::Unreliable(s) => s.download(from, opts, cancel).await,\n            Self::GCS(s) => s.download(from, opts, cancel).await,\n        }\n    }\n\n    /// See [`RemoteStorage::delete`]\n    pub async fn delete(\n        &self,\n        path: &RemotePath,\n        cancel: &CancellationToken,\n    ) -> anyhow::Result<()> {\n        match self {\n            Self::LocalFs(s) => s.delete(path, cancel).await,\n            Self::AwsS3(s) => s.delete(path, cancel).await,\n            Self::AzureBlob(s) => s.delete(path, cancel).await,\n            Self::Unreliable(s) => s.delete(path, cancel).await,\n            Self::GCS(s) => s.delete(path, cancel).await,\n        }\n    }\n\n    /// See [`RemoteStorage::delete_objects`]\n    pub async fn delete_objects(\n        &self,\n        paths: &[RemotePath],\n        cancel: &CancellationToken,\n    ) -> anyhow::Result<()> {\n        match self {\n            Self::LocalFs(s) => s.delete_objects(paths, cancel).await,\n            Self::AwsS3(s) => s.delete_objects(paths, cancel).await,\n            Self::AzureBlob(s) => s.delete_objects(paths, cancel).await,\n            Self::Unreliable(s) => s.delete_objects(paths, cancel).await,\n            Self::GCS(s) => s.delete_objects(paths, cancel).await,\n        }\n    }\n\n    /// [`RemoteStorage::max_keys_per_delete`]\n    pub fn max_keys_per_delete(&self) -> usize {\n        match self {\n            Self::LocalFs(s) => s.max_keys_per_delete(),\n            Self::AwsS3(s) => s.max_keys_per_delete(),\n            Self::AzureBlob(s) => s.max_keys_per_delete(),\n            Self::Unreliable(s) => s.max_keys_per_delete(),\n            Self::GCS(s) => s.max_keys_per_delete(),\n        }\n    }\n\n    /// See [`RemoteStorage::delete_prefix`]\n    pub async fn delete_prefix(\n        &self,\n        prefix: &RemotePath,\n        cancel: &CancellationToken,\n    ) -> anyhow::Result<()> {\n        match self {\n            Self::LocalFs(s) => s.delete_prefix(prefix, cancel).await,\n            Self::AwsS3(s) => s.delete_prefix(prefix, cancel).await,\n            Self::AzureBlob(s) => s.delete_prefix(prefix, cancel).await,\n            Self::Unreliable(s) => s.delete_prefix(prefix, cancel).await,\n            Self::GCS(s) => s.delete_prefix(prefix, cancel).await,\n        }\n    }\n\n    /// See [`RemoteStorage::copy`]\n    pub async fn copy_object(\n        &self,\n        from: &RemotePath,\n        to: &RemotePath,\n        cancel: &CancellationToken,\n    ) -> anyhow::Result<()> {\n        match self {\n            Self::LocalFs(s) => s.copy(from, to, cancel).await,\n            Self::AwsS3(s) => s.copy(from, to, cancel).await,\n            Self::AzureBlob(s) => s.copy(from, to, cancel).await,\n            Self::Unreliable(s) => s.copy(from, to, cancel).await,\n            Self::GCS(s) => s.copy(from, to, cancel).await,\n        }\n    }\n\n    /// See [`RemoteStorage::time_travel_recover`].\n    pub async fn time_travel_recover(\n        &self,\n        prefix: Option<&RemotePath>,\n        timestamp: SystemTime,\n        done_if_after: SystemTime,\n        cancel: &CancellationToken,\n        complexity_limit: Option<NonZeroU32>,\n    ) -> Result<(), TimeTravelError> {\n        match self {\n            Self::LocalFs(s) => {\n                s.time_travel_recover(prefix, timestamp, done_if_after, cancel, complexity_limit)\n                    .await\n            }\n            Self::AwsS3(s) => {\n                s.time_travel_recover(prefix, timestamp, done_if_after, cancel, complexity_limit)\n                    .await\n            }\n            Self::AzureBlob(s) => {\n                s.time_travel_recover(prefix, timestamp, done_if_after, cancel, complexity_limit)\n                    .await\n            }\n            Self::Unreliable(s) => {\n                s.time_travel_recover(prefix, timestamp, done_if_after, cancel, complexity_limit)\n                    .await\n            }\n            Self::GCS(s) => {\n                s.time_travel_recover(prefix, timestamp, done_if_after, cancel, complexity_limit)\n                    .await\n            }\n        }\n    }\n}\n\nimpl GenericRemoteStorage {\n    pub async fn from_storage_kind(kind: TypedRemoteStorageKind) -> anyhow::Result<Self> {\n        Self::from_config(&RemoteStorageConfig {\n            storage: kind.into(),\n            timeout: RemoteStorageConfig::DEFAULT_TIMEOUT,\n            small_timeout: RemoteStorageConfig::DEFAULT_SMALL_TIMEOUT,\n        })\n        .await\n    }\n\n    pub async fn from_config(storage_config: &RemoteStorageConfig) -> anyhow::Result<Self> {\n        info!(\"RemoteStorageConfig: {:?}\", storage_config);\n\n        let timeout = storage_config.timeout;\n\n        // If someone overrides timeout to be small without adjusting small_timeout, then adjust it automatically\n        let small_timeout = std::cmp::min(storage_config.small_timeout, timeout);\n\n        info!(\n            \"RemoteStorageConfig's storage attribute: {:?}\",\n            storage_config.storage\n        );\n\n        Ok(match &storage_config.storage {\n            RemoteStorageKind::LocalFs { local_path: path } => {\n                info!(\"Using fs root '{path}' as a remote storage\");\n                Self::LocalFs(LocalFs::new(path.clone(), timeout)?)\n            }\n            RemoteStorageKind::AwsS3(s3_config) => {\n                // The profile and access key id are only printed here for debugging purposes,\n                // their values don't indicate the eventually taken choice for auth.\n                let profile = std::env::var(\"AWS_PROFILE\").unwrap_or_else(|_| \"<none>\".into());\n                let access_key_id =\n                    std::env::var(\"AWS_ACCESS_KEY_ID\").unwrap_or_else(|_| \"<none>\".into());\n                info!(\n                    \"Using s3 bucket '{}' in region '{}' as a remote storage, prefix in bucket: '{:?}', bucket endpoint: '{:?}', profile: {profile}, access_key_id: {access_key_id}\",\n                    s3_config.bucket_name,\n                    s3_config.bucket_region,\n                    s3_config.prefix_in_bucket,\n                    s3_config.endpoint\n                );\n                Self::AwsS3(Arc::new(S3Bucket::new(s3_config, timeout).await?))\n            }\n            RemoteStorageKind::AzureContainer(azure_config) => {\n                let storage_account = azure_config\n                    .storage_account\n                    .as_deref()\n                    .unwrap_or(\"<AZURE_STORAGE_ACCOUNT>\");\n                info!(\n                    \"Using azure container '{}' in account '{storage_account}' in region '{}' as a remote storage, prefix in container: '{:?}'\",\n                    azure_config.container_name,\n                    azure_config.container_region,\n                    azure_config.prefix_in_container\n                );\n                Self::AzureBlob(Arc::new(AzureBlobStorage::new(\n                    azure_config,\n                    timeout,\n                    small_timeout,\n                )?))\n            }\n            RemoteStorageKind::GCS(gcs_config) => {\n                let google_application_credentials =\n                    std::env::var(\"GOOGLE_APPLICATION_CREDENTIALS\")\n                        .unwrap_or_else(|_| \"<none>\".into());\n                info!(\n                    \"Using gcs bucket '{}' as a remote storage, prefix in bucket: '{:?}', GOOGLE_APPLICATION_CREDENTIALS: {google_application_credentials }\",\n                    gcs_config.bucket_name, gcs_config.prefix_in_bucket\n                );\n                Self::GCS(Arc::new(GCSBucket::new(gcs_config, timeout).await?))\n            }\n        })\n    }\n\n    /* BEGIN_HADRON */\n    pub fn unreliable_wrapper(s: Self, fail_first: u64, fail_probability: u64) -> Self {\n        Self::Unreliable(Arc::new(UnreliableWrapper::new(\n            s,\n            fail_first,\n            fail_probability,\n        )))\n    }\n    /* END_HADRON */\n\n    /// See [`RemoteStorage::upload`], which this method calls with `None` as metadata.\n    pub async fn upload_storage_object(\n        &self,\n        from: impl Stream<Item = std::io::Result<Bytes>> + Send + Sync + 'static,\n        from_size_bytes: usize,\n        to: &RemotePath,\n        cancel: &CancellationToken,\n    ) -> anyhow::Result<()> {\n        self.upload(from, from_size_bytes, to, None, cancel)\n            .await\n            .with_context(|| {\n                format!(\"Failed to upload data of length {from_size_bytes} to storage path {to:?}\")\n            })\n    }\n\n    /// The name of the bucket/container/etc.\n    pub fn bucket_name(&self) -> Option<&str> {\n        match self {\n            Self::LocalFs(_s) => None,\n            Self::AwsS3(s) => Some(s.bucket_name()),\n            Self::AzureBlob(s) => Some(s.container_name()),\n            Self::Unreliable(_s) => None,\n            Self::GCS(s) => Some(s.bucket_name()),\n        }\n    }\n}\n\n/// Extra set of key-value pairs that contain arbitrary metadata about the storage entry.\n/// Immutable, cannot be changed once the file is created.\n#[derive(Debug, Clone, PartialEq, Eq)]\npub struct StorageMetadata(HashMap<String, String>);\n\nimpl<const N: usize> From<[(&str, &str); N]> for StorageMetadata {\n    fn from(arr: [(&str, &str); N]) -> Self {\n        let map: HashMap<String, String> = arr\n            .iter()\n            .map(|(k, v)| (k.to_string(), v.to_string()))\n            .collect();\n        Self(map)\n    }\n}\n\nstruct ConcurrencyLimiter {\n    // Every request to S3 can be throttled or cancelled, if a certain number of requests per second is exceeded.\n    // Same goes to IAM, which is queried before every S3 request, if enabled. IAM has even lower RPS threshold.\n    // The helps to ensure we don't exceed the thresholds.\n    write: Arc<Semaphore>,\n    read: Arc<Semaphore>,\n}\n\nimpl ConcurrencyLimiter {\n    fn for_kind(&self, kind: RequestKind) -> &Arc<Semaphore> {\n        match kind {\n            RequestKind::Get => &self.read,\n            RequestKind::Put => &self.write,\n            RequestKind::List => &self.read,\n            RequestKind::Delete => &self.write,\n            RequestKind::Copy => &self.write,\n            RequestKind::TimeTravel => &self.write,\n            RequestKind::Head => &self.read,\n            RequestKind::ListVersions => &self.read,\n        }\n    }\n\n    async fn acquire(\n        &self,\n        kind: RequestKind,\n    ) -> Result<tokio::sync::SemaphorePermit<'_>, tokio::sync::AcquireError> {\n        self.for_kind(kind).acquire().await\n    }\n\n    async fn acquire_owned(\n        &self,\n        kind: RequestKind,\n    ) -> Result<tokio::sync::OwnedSemaphorePermit, tokio::sync::AcquireError> {\n        Arc::clone(self.for_kind(kind)).acquire_owned().await\n    }\n\n    fn new(limit: usize) -> ConcurrencyLimiter {\n        Self {\n            read: Arc::new(Semaphore::new(limit)),\n            write: Arc::new(Semaphore::new(limit)),\n        }\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use super::*;\n\n    /// DownloadOpts::byte_range() should generate (inclusive, exclusive) ranges\n    /// with optional end bound, or None when unbounded.\n    #[test]\n    fn download_opts_byte_range() {\n        // Consider using test_case or a similar table-driven test framework.\n        let cases = [\n            // (byte_start, byte_end, expected)\n            (Bound::Unbounded, Bound::Unbounded, None),\n            (Bound::Unbounded, Bound::Included(7), Some((0, Some(8)))),\n            (Bound::Unbounded, Bound::Excluded(7), Some((0, Some(7)))),\n            (Bound::Included(3), Bound::Unbounded, Some((3, None))),\n            (Bound::Included(3), Bound::Included(7), Some((3, Some(8)))),\n            (Bound::Included(3), Bound::Excluded(7), Some((3, Some(7)))),\n            (Bound::Excluded(3), Bound::Unbounded, Some((4, None))),\n            (Bound::Excluded(3), Bound::Included(7), Some((4, Some(8)))),\n            (Bound::Excluded(3), Bound::Excluded(7), Some((4, Some(7)))),\n            // 1-sized ranges are fine, 0 aren't and will panic (separate test).\n            (Bound::Included(3), Bound::Included(3), Some((3, Some(4)))),\n            (Bound::Included(3), Bound::Excluded(4), Some((3, Some(4)))),\n        ];\n\n        for (byte_start, byte_end, expect) in cases {\n            let opts = DownloadOpts {\n                byte_start,\n                byte_end,\n                ..Default::default()\n            };\n            let result = opts.byte_range();\n            assert_eq!(\n                result, expect,\n                \"byte_start={byte_start:?} byte_end={byte_end:?}\"\n            );\n\n            // Check generated HTTP header, which uses an inclusive range.\n            let expect_header = expect.map(|(start, end)| match end {\n                Some(end) => format!(\"bytes={start}-{}\", end - 1), // inclusive end\n                None => format!(\"bytes={start}-\"),\n            });\n            assert_eq!(\n                opts.byte_range_header(),\n                expect_header,\n                \"byte_start={byte_start:?} byte_end={byte_end:?}\"\n            );\n        }\n    }\n\n    /// DownloadOpts::byte_range() zero-sized byte range should panic.\n    #[test]\n    #[should_panic]\n    fn download_opts_byte_range_zero() {\n        DownloadOpts {\n            byte_start: Bound::Included(3),\n            byte_end: Bound::Excluded(3),\n            ..Default::default()\n        }\n        .byte_range();\n    }\n\n    /// DownloadOpts::byte_range() negative byte range should panic.\n    #[test]\n    #[should_panic]\n    fn download_opts_byte_range_negative() {\n        DownloadOpts {\n            byte_start: Bound::Included(3),\n            byte_end: Bound::Included(2),\n            ..Default::default()\n        }\n        .byte_range();\n    }\n\n    #[test]\n    fn test_object_name() {\n        let k = RemotePath::new(Utf8Path::new(\"a/b/c\")).unwrap();\n        assert_eq!(k.object_name(), Some(\"c\"));\n\n        let k = RemotePath::new(Utf8Path::new(\"a/b/c/\")).unwrap();\n        assert_eq!(k.object_name(), Some(\"c\"));\n\n        let k = RemotePath::new(Utf8Path::new(\"a/\")).unwrap();\n        assert_eq!(k.object_name(), Some(\"a\"));\n\n        // XXX is it impossible to have an empty key?\n        let k = RemotePath::new(Utf8Path::new(\"\")).unwrap();\n        assert_eq!(k.object_name(), None);\n    }\n\n    #[test]\n    fn rempte_path_cannot_be_created_from_absolute_ones() {\n        let err = RemotePath::new(Utf8Path::new(\"/\")).expect_err(\"Should fail on absolute paths\");\n        assert_eq!(err.to_string(), \"Path \\\"/\\\" is not relative\");\n    }\n}\n"
  },
  {
    "path": "libs/remote_storage/src/local_fs.rs",
    "content": "//! Local filesystem acting as a remote storage.\n//! Multiple API users can use the same \"storage\" of this kind by using different storage roots.\n//!\n//! This storage used in tests, but can also be used in cases when a certain persistent\n//! volume is mounted to the local FS.\n\nuse std::collections::HashSet;\nuse std::io::ErrorKind;\nuse std::num::NonZeroU32;\nuse std::time::{Duration, SystemTime, UNIX_EPOCH};\n\nuse anyhow::{Context, bail, ensure};\nuse bytes::Bytes;\nuse camino::{Utf8Path, Utf8PathBuf};\nuse futures::stream::Stream;\nuse tokio::fs;\nuse tokio::io::{self, AsyncReadExt, AsyncSeekExt, AsyncWriteExt};\nuse tokio_util::io::ReaderStream;\nuse tokio_util::sync::CancellationToken;\nuse utils::crashsafe::path_with_suffix_extension;\n\nuse super::{RemoteStorage, StorageMetadata};\nuse crate::{\n    Download, DownloadError, DownloadOpts, Etag, Listing, ListingMode, ListingObject,\n    REMOTE_STORAGE_PREFIX_SEPARATOR, RemotePath, TimeTravelError, TimeoutOrCancel,\n};\n\nconst LOCAL_FS_TEMP_FILE_SUFFIX: &str = \"___temp\";\n\n#[derive(Debug, Clone)]\npub struct LocalFs {\n    storage_root: Utf8PathBuf,\n    timeout: Duration,\n}\n\nimpl LocalFs {\n    /// Attempts to create local FS storage, along with its root directory.\n    /// Storage root will be created (if does not exist) and transformed into an absolute path (if passed as relative).\n    pub fn new(mut storage_root: Utf8PathBuf, timeout: Duration) -> anyhow::Result<Self> {\n        if !storage_root.exists() {\n            std::fs::create_dir_all(&storage_root).with_context(|| {\n                format!(\"Failed to create all directories in the given root path {storage_root:?}\")\n            })?;\n        }\n        if !storage_root.is_absolute() {\n            storage_root = storage_root.canonicalize_utf8().with_context(|| {\n                format!(\"Failed to represent path {storage_root:?} as an absolute path\")\n            })?;\n        }\n\n        Ok(Self {\n            storage_root,\n            timeout,\n        })\n    }\n\n    // mirrors S3Bucket::s3_object_to_relative_path\n    fn local_file_to_relative_path(&self, key: Utf8PathBuf) -> RemotePath {\n        let relative_path = key\n            .strip_prefix(&self.storage_root)\n            .expect(\"relative path must contain storage_root as prefix\");\n        RemotePath(relative_path.into())\n    }\n\n    async fn read_storage_metadata(\n        &self,\n        file_path: &Utf8Path,\n    ) -> anyhow::Result<Option<StorageMetadata>> {\n        let metadata_path = storage_metadata_path(file_path);\n        if metadata_path.exists() && metadata_path.is_file() {\n            let metadata_string = fs::read_to_string(&metadata_path).await.with_context(|| {\n                format!(\"Failed to read metadata from the local storage at '{metadata_path}'\")\n            })?;\n\n            serde_json::from_str(&metadata_string)\n                .with_context(|| {\n                    format!(\n                        \"Failed to deserialize metadata from the local storage at '{metadata_path}'\",\n                    )\n                })\n                .map(|metadata| Some(StorageMetadata(metadata)))\n        } else {\n            Ok(None)\n        }\n    }\n\n    #[cfg(test)]\n    async fn list_all(&self) -> anyhow::Result<Vec<RemotePath>> {\n        use std::future::Future;\n        use std::pin::Pin;\n        fn get_all_files<'a, P>(\n            directory_path: P,\n        ) -> Pin<Box<dyn Future<Output = anyhow::Result<Vec<Utf8PathBuf>>> + Send + Sync + 'a>>\n        where\n            P: AsRef<Utf8Path> + Send + Sync + 'a,\n        {\n            Box::pin(async move {\n                let directory_path = directory_path.as_ref();\n                if directory_path.exists() {\n                    if directory_path.is_dir() {\n                        let mut paths = Vec::new();\n                        let mut dir_contents = fs::read_dir(directory_path).await?;\n                        while let Some(dir_entry) = dir_contents.next_entry().await? {\n                            let file_type = dir_entry.file_type().await?;\n                            let entry_path =\n                                Utf8PathBuf::from_path_buf(dir_entry.path()).map_err(|pb| {\n                                    anyhow::Error::msg(format!(\n                                        \"non-Unicode path: {}\",\n                                        pb.to_string_lossy()\n                                    ))\n                                })?;\n                            if file_type.is_symlink() {\n                                tracing::debug!(\"{entry_path:?} is a symlink, skipping\")\n                            } else if file_type.is_dir() {\n                                paths.extend(get_all_files(&entry_path).await?.into_iter())\n                            } else {\n                                paths.push(entry_path);\n                            }\n                        }\n                        Ok(paths)\n                    } else {\n                        bail!(\"Path {directory_path:?} is not a directory\")\n                    }\n                } else {\n                    Ok(Vec::new())\n                }\n            })\n        }\n\n        Ok(get_all_files(&self.storage_root)\n            .await?\n            .into_iter()\n            .map(|path| {\n                path.strip_prefix(&self.storage_root)\n                    .context(\"Failed to strip storage root prefix\")\n                    .and_then(RemotePath::new)\n                    .expect(\n                        \"We list files for storage root, hence should be able to remote the prefix\",\n                    )\n            })\n            .collect())\n    }\n\n    // recursively lists all files in a directory,\n    // mirroring the `list_files` for `s3_bucket`\n    async fn list_recursive(&self, folder: Option<&RemotePath>) -> anyhow::Result<Vec<RemotePath>> {\n        let full_path = match folder {\n            Some(folder) => folder.with_base(&self.storage_root),\n            None => self.storage_root.clone(),\n        };\n\n        // If we were given a directory, we may use it as our starting point.\n        // Otherwise, we must go up to the first ancestor dir that exists.  This is because\n        // S3 object list prefixes can be arbitrary strings, but when reading\n        // the local filesystem we need a directory to start calling read_dir on.\n        let mut initial_dir = full_path.clone();\n\n        // If there's no trailing slash, we have to start looking from one above: even if\n        // `initial_dir` is a directory, we should still list any prefixes in the parent\n        // that start with the same string.\n        if !full_path.to_string().ends_with('/') {\n            initial_dir.pop();\n        }\n\n        loop {\n            // Did we make it to the root?\n            if initial_dir.parent().is_none() {\n                anyhow::bail!(\"list_files: failed to find valid ancestor dir for {full_path}\");\n            }\n\n            match fs::metadata(initial_dir.clone()).await {\n                Ok(meta) if meta.is_dir() => {\n                    // We found a directory, break\n                    break;\n                }\n                Ok(_meta) => {\n                    // It's not a directory: strip back to the parent\n                    initial_dir.pop();\n                }\n                Err(e) if e.kind() == ErrorKind::NotFound => {\n                    // It's not a file that exists: strip the prefix back to the parent directory\n                    initial_dir.pop();\n                }\n                Err(e) => {\n                    // Unexpected I/O error\n                    anyhow::bail!(e)\n                }\n            }\n        }\n        // Note that Utf8PathBuf starts_with only considers full path segments, but\n        // object prefixes are arbitrary strings, so we need the strings for doing\n        // starts_with later.\n        let prefix = full_path.as_str();\n\n        let mut files = vec![];\n        let mut directory_queue = vec![initial_dir];\n        while let Some(cur_folder) = directory_queue.pop() {\n            let mut entries = cur_folder.read_dir_utf8()?;\n            while let Some(Ok(entry)) = entries.next() {\n                let file_name = entry.file_name();\n                let full_file_name = cur_folder.join(file_name);\n                if full_file_name.as_str().starts_with(prefix) {\n                    let file_remote_path = self.local_file_to_relative_path(full_file_name.clone());\n                    files.push(file_remote_path);\n                    if full_file_name.is_dir() {\n                        directory_queue.push(full_file_name);\n                    }\n                }\n            }\n        }\n\n        Ok(files)\n    }\n\n    async fn upload0(\n        &self,\n        data: impl Stream<Item = std::io::Result<Bytes>> + Send + Sync,\n        data_size_bytes: usize,\n        to: &RemotePath,\n        metadata: Option<StorageMetadata>,\n        cancel: &CancellationToken,\n    ) -> anyhow::Result<()> {\n        let target_file_path = to.with_base(&self.storage_root);\n        create_target_directory(&target_file_path).await?;\n        // We need this dance with sort of durable rename (without fsyncs)\n        // to prevent partial uploads. This was really hit when pageserver shutdown\n        // cancelled the upload and partial file was left on the fs\n        // NOTE: Because temp file suffix always the same this operation is racy.\n        // Two concurrent operations can lead to the following sequence:\n        // T1: write(temp)\n        // T2: write(temp) -> overwrites the content\n        // T1: rename(temp, dst) -> succeeds\n        // T2: rename(temp, dst) -> fails, temp no longet exists\n        // This can be solved by supplying unique temp suffix every time, but this situation\n        // is not normal in the first place, the error can help (and helped at least once)\n        // to discover bugs in upper level synchronization.\n        let temp_file_path =\n            path_with_suffix_extension(&target_file_path, LOCAL_FS_TEMP_FILE_SUFFIX);\n        let mut destination = io::BufWriter::new(\n            fs::OpenOptions::new()\n                .write(true)\n                .create(true)\n                .truncate(true)\n                .open(&temp_file_path)\n                .await\n                .with_context(|| {\n                    format!(\"Failed to open target fs destination at '{target_file_path}'\")\n                })?,\n        );\n\n        let from_size_bytes = data_size_bytes as u64;\n        let data = tokio_util::io::StreamReader::new(data);\n        let data = std::pin::pin!(data);\n        let mut buffer_to_read = data.take(from_size_bytes);\n\n        // alternatively we could just write the bytes to a file, but local_fs is a testing utility\n        let copy = io::copy_buf(&mut buffer_to_read, &mut destination);\n\n        let bytes_read = tokio::select! {\n            biased;\n            _ = cancel.cancelled() => {\n                let file = destination.into_inner();\n                // wait for the inflight operation(s) to complete so that there could be a next\n                // attempt right away and our writes are not directed to their file.\n                file.into_std().await;\n\n                // TODO: leave the temp or not? leaving is probably less racy. enabled truncate at\n                // least.\n                fs::remove_file(temp_file_path).await.context(\"remove temp_file_path after cancellation or timeout\")?;\n                return Err(TimeoutOrCancel::Cancel.into());\n            }\n            read = copy => read,\n        };\n\n        let bytes_read =\n            bytes_read.with_context(|| {\n                format!(\n                    \"Failed to upload file (write temp) to the local storage at '{temp_file_path}'\",\n                )\n            })?;\n\n        if bytes_read < from_size_bytes {\n            bail!(\n                \"Provided stream was shorter than expected: {bytes_read} vs {from_size_bytes} bytes\"\n            );\n        }\n        // Check if there is any extra data after the given size.\n        let mut from = buffer_to_read.into_inner();\n        let extra_read = from.read(&mut [1]).await?;\n        ensure!(\n            extra_read == 0,\n            \"Provided stream was larger than expected: expected {from_size_bytes} bytes\",\n        );\n\n        destination.flush().await.with_context(|| {\n            format!(\n                \"Failed to upload (flush temp) file to the local storage at '{temp_file_path}'\",\n            )\n        })?;\n\n        fs::rename(temp_file_path, &target_file_path)\n            .await\n            .with_context(|| {\n                format!(\n                    \"Failed to upload (rename) file to the local storage at '{target_file_path}'\",\n                )\n            })?;\n\n        if let Some(storage_metadata) = metadata {\n            // FIXME: we must not be using metadata much, since this would forget the old metadata\n            // for new writes? or perhaps metadata is sticky; could consider removing if it's never\n            // used.\n            let storage_metadata_path = storage_metadata_path(&target_file_path);\n            fs::write(\n                &storage_metadata_path,\n                serde_json::to_string(&storage_metadata.0)\n                    .context(\"Failed to serialize storage metadata as json\")?,\n            )\n            .await\n            .with_context(|| {\n                format!(\n                    \"Failed to write metadata to the local storage at '{storage_metadata_path}'\",\n                )\n            })?;\n        }\n\n        Ok(())\n    }\n}\n\nimpl RemoteStorage for LocalFs {\n    fn list_streaming(\n        &self,\n        prefix: Option<&RemotePath>,\n        mode: ListingMode,\n        max_keys: Option<NonZeroU32>,\n        cancel: &CancellationToken,\n    ) -> impl Stream<Item = Result<Listing, DownloadError>> {\n        let listing = self.list(prefix, mode, max_keys, cancel);\n        futures::stream::once(listing)\n    }\n\n    async fn list(\n        &self,\n        prefix: Option<&RemotePath>,\n        mode: ListingMode,\n        max_keys: Option<NonZeroU32>,\n        cancel: &CancellationToken,\n    ) -> Result<Listing, DownloadError> {\n        let op = async {\n            let mut result = Listing::default();\n\n            // Filter out directories: in S3 directories don't exist, only the keys within them do.\n            let keys = self\n                .list_recursive(prefix)\n                .await\n                .map_err(DownloadError::Other)?;\n            let mut objects = Vec::with_capacity(keys.len());\n            for key in keys {\n                let path = key.with_base(&self.storage_root);\n                let metadata = file_metadata(&path).await;\n                if let Err(DownloadError::NotFound) = metadata {\n                    // Race: if the file is deleted between listing and metadata check, ignore it.\n                    continue;\n                }\n                let metadata = metadata?;\n                if metadata.is_dir() {\n                    continue;\n                }\n                objects.push(ListingObject {\n                    key: key.clone(),\n                    last_modified: metadata.modified()?,\n                    size: metadata.len(),\n                });\n            }\n            let objects = objects;\n\n            if let ListingMode::NoDelimiter = mode {\n                result.keys = objects;\n            } else {\n                let mut prefixes = HashSet::new();\n                for object in objects {\n                    let key = object.key;\n                    // If the part after the prefix includes a \"/\", take only the first part and put it in `prefixes`.\n                    let relative_key = if let Some(prefix) = prefix {\n                        let mut prefix = prefix.clone();\n                        // We only strip the dirname of the prefix, so that when we strip it from the start of keys we\n                        // end up with full file/dir names.\n                        let prefix_full_local_path = prefix.with_base(&self.storage_root);\n                        let has_slash = prefix.0.to_string().ends_with('/');\n                        let strip_prefix = if prefix_full_local_path.is_dir() && has_slash {\n                            prefix\n                        } else {\n                            prefix.0.pop();\n                            prefix\n                        };\n\n                        RemotePath::new(key.strip_prefix(&strip_prefix).unwrap()).unwrap()\n                    } else {\n                        key\n                    };\n\n                    let relative_key = format!(\"{relative_key}\");\n                    if relative_key.contains(REMOTE_STORAGE_PREFIX_SEPARATOR) {\n                        let first_part = relative_key\n                            .split(REMOTE_STORAGE_PREFIX_SEPARATOR)\n                            .next()\n                            .unwrap()\n                            .to_owned();\n                        prefixes.insert(first_part);\n                    } else {\n                        result.keys.push(ListingObject {\n                            key: RemotePath::from_string(&relative_key).unwrap(),\n                            last_modified: object.last_modified,\n                            size: object.size,\n                        });\n                    }\n                }\n                result.prefixes = prefixes\n                    .into_iter()\n                    .map(|s| RemotePath::from_string(&s).unwrap())\n                    .collect();\n            }\n\n            if let Some(max_keys) = max_keys {\n                result.keys.truncate(max_keys.get() as usize);\n            }\n            Ok(result)\n        };\n\n        let timeout = async {\n            tokio::time::sleep(self.timeout).await;\n            Err(DownloadError::Timeout)\n        };\n\n        let cancelled = async {\n            cancel.cancelled().await;\n            Err(DownloadError::Cancelled)\n        };\n\n        tokio::select! {\n            res = op => res,\n            res = timeout => res,\n            res = cancelled => res,\n        }\n    }\n\n    async fn list_versions(\n        &self,\n        _prefix: Option<&RemotePath>,\n        _mode: ListingMode,\n        _max_keys: Option<NonZeroU32>,\n        _cancel: &CancellationToken,\n    ) -> Result<crate::VersionListing, DownloadError> {\n        unimplemented!()\n    }\n\n    async fn head_object(\n        &self,\n        key: &RemotePath,\n        _cancel: &CancellationToken,\n    ) -> Result<ListingObject, DownloadError> {\n        let target_file_path = key.with_base(&self.storage_root);\n        let metadata = file_metadata(&target_file_path).await?;\n        Ok(ListingObject {\n            key: key.clone(),\n            last_modified: metadata.modified()?,\n            size: metadata.len(),\n        })\n    }\n\n    async fn upload(\n        &self,\n        data: impl Stream<Item = std::io::Result<Bytes>> + Send + Sync,\n        data_size_bytes: usize,\n        to: &RemotePath,\n        metadata: Option<StorageMetadata>,\n        cancel: &CancellationToken,\n    ) -> anyhow::Result<()> {\n        let cancel = cancel.child_token();\n\n        let op = self.upload0(data, data_size_bytes, to, metadata, &cancel);\n        let mut op = std::pin::pin!(op);\n\n        // race the upload0 to the timeout; if it goes over, do a graceful shutdown\n        let (res, timeout) = tokio::select! {\n            res = &mut op => (res, false),\n            _ = tokio::time::sleep(self.timeout) => {\n                cancel.cancel();\n                (op.await, true)\n            }\n        };\n\n        match res {\n            Err(e) if timeout && TimeoutOrCancel::caused_by_cancel(&e) => {\n                // we caused this cancel (or they happened simultaneously) -- swap it out to\n                // Timeout\n                Err(TimeoutOrCancel::Timeout.into())\n            }\n            res => res,\n        }\n    }\n\n    async fn download(\n        &self,\n        from: &RemotePath,\n        opts: &DownloadOpts,\n        cancel: &CancellationToken,\n    ) -> Result<Download, DownloadError> {\n        let target_path = from.with_base(&self.storage_root);\n\n        let file_metadata = file_metadata(&target_path).await?;\n        let etag = mock_etag(&file_metadata);\n\n        if opts.etag.as_ref() == Some(&etag) {\n            return Err(DownloadError::Unmodified);\n        }\n\n        let mut file = fs::OpenOptions::new()\n            .read(true)\n            .open(&target_path)\n            .await\n            .with_context(|| {\n                format!(\"Failed to open source file {target_path:?} to use in the download\")\n            })\n            .map_err(DownloadError::Other)?;\n\n        let mut take = file_metadata.len();\n        if let Some((start, end)) = opts.byte_range() {\n            if start > 0 {\n                file.seek(io::SeekFrom::Start(start))\n                    .await\n                    .context(\"Failed to seek to the range start in a local storage file\")\n                    .map_err(DownloadError::Other)?;\n            }\n            if let Some(end) = end {\n                take = end - start;\n            }\n        }\n\n        let source = ReaderStream::new(file.take(take));\n\n        let metadata = self\n            .read_storage_metadata(&target_path)\n            .await\n            .map_err(DownloadError::Other)?;\n\n        let cancel_or_timeout = crate::support::cancel_or_timeout(self.timeout, cancel.clone());\n        let source = crate::support::DownloadStream::new(cancel_or_timeout, source);\n\n        Ok(Download {\n            metadata,\n            last_modified: file_metadata\n                .modified()\n                .map_err(|e| DownloadError::Other(anyhow::anyhow!(e).context(\"Reading mtime\")))?,\n            etag,\n            download_stream: Box::pin(source),\n        })\n    }\n\n    async fn delete(&self, path: &RemotePath, _cancel: &CancellationToken) -> anyhow::Result<()> {\n        let file_path = path.with_base(&self.storage_root);\n        match fs::remove_file(&file_path).await {\n            Ok(()) => Ok(()),\n            // The file doesn't exist. This shouldn't yield an error to mirror S3's behaviour.\n            // See https://docs.aws.amazon.com/AmazonS3/latest/API/API_DeleteObject.html\n            // > If there isn't a null version, Amazon S3 does not remove any objects but will still respond that the command was successful.\n            Err(e) if e.kind() == ErrorKind::NotFound => Ok(()),\n            Err(e) => Err(anyhow::anyhow!(e)),\n        }\n    }\n\n    async fn delete_objects(\n        &self,\n        paths: &[RemotePath],\n        cancel: &CancellationToken,\n    ) -> anyhow::Result<()> {\n        for path in paths {\n            self.delete(path, cancel).await?\n        }\n        Ok(())\n    }\n\n    fn max_keys_per_delete(&self) -> usize {\n        super::MAX_KEYS_PER_DELETE_S3\n    }\n\n    async fn copy(\n        &self,\n        from: &RemotePath,\n        to: &RemotePath,\n        _cancel: &CancellationToken,\n    ) -> anyhow::Result<()> {\n        let from_path = from.with_base(&self.storage_root);\n        let to_path = to.with_base(&self.storage_root);\n        create_target_directory(&to_path).await?;\n        fs::copy(&from_path, &to_path)\n            .await\n            .with_context(|| format!(\"Failed to copy file from '{from_path}' to '{to_path}'\"))?;\n        Ok(())\n    }\n\n    async fn time_travel_recover(\n        &self,\n        _prefix: Option<&RemotePath>,\n        _timestamp: SystemTime,\n        _done_if_after: SystemTime,\n        _cancel: &CancellationToken,\n        _complexity_limit: Option<NonZeroU32>,\n    ) -> Result<(), TimeTravelError> {\n        Err(TimeTravelError::Unimplemented)\n    }\n}\n\nfn storage_metadata_path(original_path: &Utf8Path) -> Utf8PathBuf {\n    path_with_suffix_extension(original_path, \"metadata\")\n}\n\nasync fn create_target_directory(target_file_path: &Utf8Path) -> anyhow::Result<()> {\n    let target_dir = match target_file_path.parent() {\n        Some(parent_dir) => parent_dir,\n        None => bail!(\"File path '{target_file_path}' has no parent directory\"),\n    };\n    if !target_dir.exists() {\n        fs::create_dir_all(target_dir).await?;\n    }\n    Ok(())\n}\n\nasync fn file_metadata(file_path: &Utf8Path) -> Result<std::fs::Metadata, DownloadError> {\n    tokio::fs::metadata(&file_path).await.map_err(|e| {\n        if e.kind() == ErrorKind::NotFound {\n            DownloadError::NotFound\n        } else {\n            DownloadError::BadInput(e.into())\n        }\n    })\n}\n\n// Use mtime as stand-in for ETag.  We could calculate a meaningful one by md5'ing the contents of files we\n// read, but that's expensive and the local_fs test helper's whole reason for existence is to run small tests\n// quickly, with less overhead than using a mock S3 server.\nfn mock_etag(meta: &std::fs::Metadata) -> Etag {\n    let mtime = meta.modified().expect(\"Filesystem mtime missing\");\n    format!(\"{}\", mtime.duration_since(UNIX_EPOCH).unwrap().as_millis()).into()\n}\n\n#[cfg(test)]\nmod fs_tests {\n    use std::collections::HashMap;\n    use std::io::Write;\n    use std::ops::Bound;\n\n    use camino_tempfile::tempdir;\n\n    use super::*;\n\n    async fn read_and_check_metadata(\n        storage: &LocalFs,\n        remote_storage_path: &RemotePath,\n        expected_metadata: Option<&StorageMetadata>,\n    ) -> anyhow::Result<String> {\n        let cancel = CancellationToken::new();\n        let download = storage\n            .download(remote_storage_path, &DownloadOpts::default(), &cancel)\n            .await\n            .map_err(|e| anyhow::anyhow!(\"Download failed: {e}\"))?;\n        ensure!(\n            download.metadata.as_ref() == expected_metadata,\n            \"Unexpected metadata returned for the downloaded file\"\n        );\n\n        let contents = aggregate(download.download_stream).await?;\n\n        String::from_utf8(contents).map_err(anyhow::Error::new)\n    }\n\n    #[tokio::test]\n    async fn upload_file() -> anyhow::Result<()> {\n        let (storage, cancel) = create_storage()?;\n\n        let target_path_1 = upload_dummy_file(&storage, \"upload_1\", None, &cancel).await?;\n        assert_eq!(\n            storage.list_all().await?,\n            vec![target_path_1.clone()],\n            \"Should list a single file after first upload\"\n        );\n\n        let target_path_2 = upload_dummy_file(&storage, \"upload_2\", None, &cancel).await?;\n        assert_eq!(\n            list_files_sorted(&storage).await?,\n            vec![target_path_1.clone(), target_path_2.clone()],\n            \"Should list a two different files after second upload\"\n        );\n\n        Ok(())\n    }\n\n    #[tokio::test]\n    async fn upload_file_negatives() -> anyhow::Result<()> {\n        let (storage, cancel) = create_storage()?;\n\n        let id = RemotePath::new(Utf8Path::new(\"dummy\"))?;\n        let content = Bytes::from_static(b\"12345\");\n        let content = move || futures::stream::once(futures::future::ready(Ok(content.clone())));\n\n        // Check that you get an error if the size parameter doesn't match the actual\n        // size of the stream.\n        storage\n            .upload(content(), 0, &id, None, &cancel)\n            .await\n            .expect_err(\"upload with zero size succeeded\");\n        storage\n            .upload(content(), 4, &id, None, &cancel)\n            .await\n            .expect_err(\"upload with too short size succeeded\");\n        storage\n            .upload(content(), 6, &id, None, &cancel)\n            .await\n            .expect_err(\"upload with too large size succeeded\");\n\n        // Correct size is 5, this should succeed.\n        storage.upload(content(), 5, &id, None, &cancel).await?;\n\n        Ok(())\n    }\n\n    fn create_storage() -> anyhow::Result<(LocalFs, CancellationToken)> {\n        let storage_root = tempdir()?.path().to_path_buf();\n        LocalFs::new(storage_root, Duration::from_secs(120)).map(|s| (s, CancellationToken::new()))\n    }\n\n    #[tokio::test]\n    async fn download_file() -> anyhow::Result<()> {\n        let (storage, cancel) = create_storage()?;\n        let upload_name = \"upload_1\";\n        let upload_target = upload_dummy_file(&storage, upload_name, None, &cancel).await?;\n\n        let contents = read_and_check_metadata(&storage, &upload_target, None).await?;\n        assert_eq!(\n            dummy_contents(upload_name),\n            contents,\n            \"We should upload and download the same contents\"\n        );\n\n        let non_existing_path = RemotePath::new(Utf8Path::new(\"somewhere/else\"))?;\n        match storage\n            .download(&non_existing_path, &DownloadOpts::default(), &cancel)\n            .await\n        {\n            Err(DownloadError::NotFound) => {} // Should get NotFound for non existing keys\n            other => panic!(\n                \"Should get a NotFound error when downloading non-existing storage files, but got: {other:?}\"\n            ),\n        }\n        Ok(())\n    }\n\n    #[tokio::test]\n    async fn download_file_range_positive() -> anyhow::Result<()> {\n        let (storage, cancel) = create_storage()?;\n        let upload_name = \"upload_1\";\n        let upload_target = upload_dummy_file(&storage, upload_name, None, &cancel).await?;\n\n        let full_range_download_contents =\n            read_and_check_metadata(&storage, &upload_target, None).await?;\n        assert_eq!(\n            dummy_contents(upload_name),\n            full_range_download_contents,\n            \"Download full range should return the whole upload\"\n        );\n\n        let uploaded_bytes = dummy_contents(upload_name).into_bytes();\n        let (first_part_local, second_part_local) = uploaded_bytes.split_at(3);\n\n        let first_part_download = storage\n            .download(\n                &upload_target,\n                &DownloadOpts {\n                    byte_end: Bound::Excluded(first_part_local.len() as u64),\n                    ..Default::default()\n                },\n                &cancel,\n            )\n            .await?;\n        assert!(\n            first_part_download.metadata.is_none(),\n            \"No metadata should be returned for no metadata upload\"\n        );\n\n        let first_part_remote = aggregate(first_part_download.download_stream).await?;\n        assert_eq!(\n            first_part_local, first_part_remote,\n            \"First part bytes should be returned when requested\"\n        );\n\n        let second_part_download = storage\n            .download(\n                &upload_target,\n                &DownloadOpts {\n                    byte_start: Bound::Included(first_part_local.len() as u64),\n                    byte_end: Bound::Excluded(\n                        (first_part_local.len() + second_part_local.len()) as u64,\n                    ),\n                    ..Default::default()\n                },\n                &cancel,\n            )\n            .await?;\n        assert!(\n            second_part_download.metadata.is_none(),\n            \"No metadata should be returned for no metadata upload\"\n        );\n\n        let second_part_remote = aggregate(second_part_download.download_stream).await?;\n        assert_eq!(\n            second_part_local, second_part_remote,\n            \"Second part bytes should be returned when requested\"\n        );\n\n        let suffix_bytes = storage\n            .download(\n                &upload_target,\n                &DownloadOpts {\n                    byte_start: Bound::Included(13),\n                    ..Default::default()\n                },\n                &cancel,\n            )\n            .await?\n            .download_stream;\n        let suffix_bytes = aggregate(suffix_bytes).await?;\n        let suffix = std::str::from_utf8(&suffix_bytes)?;\n        assert_eq!(upload_name, suffix);\n\n        let all_bytes = storage\n            .download(&upload_target, &DownloadOpts::default(), &cancel)\n            .await?\n            .download_stream;\n        let all_bytes = aggregate(all_bytes).await?;\n        let all_bytes = std::str::from_utf8(&all_bytes)?;\n        assert_eq!(dummy_contents(\"upload_1\"), all_bytes);\n\n        Ok(())\n    }\n\n    #[tokio::test]\n    #[should_panic(expected = \"at or before start\")]\n    async fn download_file_range_negative() {\n        let (storage, cancel) = create_storage().unwrap();\n        let upload_name = \"upload_1\";\n        let upload_target = upload_dummy_file(&storage, upload_name, None, &cancel)\n            .await\n            .unwrap();\n\n        storage\n            .download(\n                &upload_target,\n                &DownloadOpts {\n                    byte_start: Bound::Included(10),\n                    byte_end: Bound::Excluded(10),\n                    ..Default::default()\n                },\n                &cancel,\n            )\n            .await\n            .unwrap();\n    }\n\n    #[tokio::test]\n    async fn delete_file() -> anyhow::Result<()> {\n        let (storage, cancel) = create_storage()?;\n        let upload_name = \"upload_1\";\n        let upload_target = upload_dummy_file(&storage, upload_name, None, &cancel).await?;\n\n        storage.delete(&upload_target, &cancel).await?;\n        assert!(storage.list_all().await?.is_empty());\n\n        storage\n            .delete(&upload_target, &cancel)\n            .await\n            .expect(\"Should allow deleting non-existing storage files\");\n\n        Ok(())\n    }\n\n    #[tokio::test]\n    async fn file_with_metadata() -> anyhow::Result<()> {\n        let (storage, cancel) = create_storage()?;\n        let upload_name = \"upload_1\";\n        let metadata = StorageMetadata(HashMap::from([\n            (\"one\".to_string(), \"1\".to_string()),\n            (\"two\".to_string(), \"2\".to_string()),\n        ]));\n        let upload_target =\n            upload_dummy_file(&storage, upload_name, Some(metadata.clone()), &cancel).await?;\n\n        let full_range_download_contents =\n            read_and_check_metadata(&storage, &upload_target, Some(&metadata)).await?;\n        assert_eq!(\n            dummy_contents(upload_name),\n            full_range_download_contents,\n            \"We should upload and download the same contents\"\n        );\n\n        let uploaded_bytes = dummy_contents(upload_name).into_bytes();\n        let (first_part_local, _) = uploaded_bytes.split_at(3);\n\n        let partial_download_with_metadata = storage\n            .download(\n                &upload_target,\n                &DownloadOpts {\n                    byte_end: Bound::Excluded(first_part_local.len() as u64),\n                    ..Default::default()\n                },\n                &cancel,\n            )\n            .await?;\n        let first_part_remote = aggregate(partial_download_with_metadata.download_stream).await?;\n        assert_eq!(\n            first_part_local,\n            first_part_remote.as_slice(),\n            \"First part bytes should be returned when requested\"\n        );\n\n        assert_eq!(\n            partial_download_with_metadata.metadata,\n            Some(metadata),\n            \"We should get the same metadata back for partial download\"\n        );\n\n        Ok(())\n    }\n\n    #[tokio::test]\n    async fn list() -> anyhow::Result<()> {\n        // No delimiter: should recursively list everything\n        let (storage, cancel) = create_storage()?;\n        let child = upload_dummy_file(&storage, \"grandparent/parent/child\", None, &cancel).await?;\n        let child_sibling =\n            upload_dummy_file(&storage, \"grandparent/parent/child_sibling\", None, &cancel).await?;\n        let uncle = upload_dummy_file(&storage, \"grandparent/uncle\", None, &cancel).await?;\n\n        let listing = storage\n            .list(None, ListingMode::NoDelimiter, None, &cancel)\n            .await?;\n        assert!(listing.prefixes.is_empty());\n        assert_eq!(\n            listing\n                .keys\n                .into_iter()\n                .map(|o| o.key)\n                .collect::<HashSet<_>>(),\n            HashSet::from([uncle.clone(), child.clone(), child_sibling.clone()])\n        );\n\n        // Delimiter: should only go one deep\n        let listing = storage\n            .list(None, ListingMode::WithDelimiter, None, &cancel)\n            .await?;\n\n        assert_eq!(\n            listing.prefixes,\n            [RemotePath::from_string(\"timelines\").unwrap()].to_vec()\n        );\n        assert!(listing.keys.is_empty());\n\n        // Delimiter & prefix with a trailing slash\n        let listing = storage\n            .list(\n                Some(&RemotePath::from_string(\"timelines/some_timeline/grandparent/\").unwrap()),\n                ListingMode::WithDelimiter,\n                None,\n                &cancel,\n            )\n            .await?;\n        assert_eq!(\n            listing.keys.into_iter().map(|o| o.key).collect::<Vec<_>>(),\n            [RemotePath::from_string(\"uncle\").unwrap()].to_vec()\n        );\n        assert_eq!(\n            listing.prefixes,\n            [RemotePath::from_string(\"parent\").unwrap()].to_vec()\n        );\n\n        // Delimiter and prefix without a trailing slash\n        let listing = storage\n            .list(\n                Some(&RemotePath::from_string(\"timelines/some_timeline/grandparent\").unwrap()),\n                ListingMode::WithDelimiter,\n                None,\n                &cancel,\n            )\n            .await?;\n        assert_eq!(listing.keys, vec![]);\n        assert_eq!(\n            listing.prefixes,\n            [RemotePath::from_string(\"grandparent\").unwrap()].to_vec()\n        );\n\n        // Delimiter and prefix that's partway through a path component\n        let listing = storage\n            .list(\n                Some(&RemotePath::from_string(\"timelines/some_timeline/grandp\").unwrap()),\n                ListingMode::WithDelimiter,\n                None,\n                &cancel,\n            )\n            .await?;\n        assert_eq!(listing.keys, vec![]);\n        assert_eq!(\n            listing.prefixes,\n            [RemotePath::from_string(\"grandparent\").unwrap()].to_vec()\n        );\n\n        Ok(())\n    }\n\n    #[tokio::test]\n    async fn list_part_component() -> anyhow::Result<()> {\n        // No delimiter: should recursively list everything\n        let (storage, cancel) = create_storage()?;\n\n        // Imitates what happens in a tenant path when we have an unsharded path and a sharded path, and do a listing\n        // of the unsharded path: although there is a \"directory\" at the unsharded path, it should be handled as\n        // a freeform prefix.\n        let _child_a =\n            upload_dummy_file(&storage, \"grandparent/tenant-01/child\", None, &cancel).await?;\n        let _child_b =\n            upload_dummy_file(&storage, \"grandparent/tenant/child\", None, &cancel).await?;\n\n        // Delimiter and prefix that's partway through a path component\n        let listing = storage\n            .list(\n                Some(\n                    &RemotePath::from_string(\"timelines/some_timeline/grandparent/tenant\").unwrap(),\n                ),\n                ListingMode::WithDelimiter,\n                None,\n                &cancel,\n            )\n            .await?;\n        assert_eq!(listing.keys, vec![]);\n\n        let mut found_prefixes = listing.prefixes.clone();\n        found_prefixes.sort();\n        assert_eq!(\n            found_prefixes,\n            [\n                RemotePath::from_string(\"tenant\").unwrap(),\n                RemotePath::from_string(\"tenant-01\").unwrap(),\n            ]\n            .to_vec()\n        );\n\n        Ok(())\n    }\n\n    #[tokio::test]\n    async fn overwrite_shorter_file() -> anyhow::Result<()> {\n        let (storage, cancel) = create_storage()?;\n\n        let path = RemotePath::new(\"does/not/matter/file\".into())?;\n\n        let body = Bytes::from_static(b\"long file contents is long\");\n        {\n            let len = body.len();\n            let body =\n                futures::stream::once(futures::future::ready(std::io::Result::Ok(body.clone())));\n            storage.upload(body, len, &path, None, &cancel).await?;\n        }\n\n        let read = aggregate(\n            storage\n                .download(&path, &DownloadOpts::default(), &cancel)\n                .await?\n                .download_stream,\n        )\n        .await?;\n        assert_eq!(body, read);\n\n        let shorter = Bytes::from_static(b\"shorter body\");\n        {\n            let len = shorter.len();\n            let body =\n                futures::stream::once(futures::future::ready(std::io::Result::Ok(shorter.clone())));\n            storage.upload(body, len, &path, None, &cancel).await?;\n        }\n\n        let read = aggregate(\n            storage\n                .download(&path, &DownloadOpts::default(), &cancel)\n                .await?\n                .download_stream,\n        )\n        .await?;\n        assert_eq!(shorter, read);\n        Ok(())\n    }\n\n    #[tokio::test]\n    async fn cancelled_upload_can_later_be_retried() -> anyhow::Result<()> {\n        let (storage, cancel) = create_storage()?;\n\n        let path = RemotePath::new(\"does/not/matter/file\".into())?;\n\n        let body = Bytes::from_static(b\"long file contents is long\");\n        {\n            let len = body.len();\n            let body =\n                futures::stream::once(futures::future::ready(std::io::Result::Ok(body.clone())));\n            let cancel = cancel.child_token();\n            cancel.cancel();\n            let e = storage\n                .upload(body, len, &path, None, &cancel)\n                .await\n                .unwrap_err();\n\n            assert!(TimeoutOrCancel::caused_by_cancel(&e));\n        }\n\n        {\n            let len = body.len();\n            let body =\n                futures::stream::once(futures::future::ready(std::io::Result::Ok(body.clone())));\n            storage.upload(body, len, &path, None, &cancel).await?;\n        }\n\n        let read = aggregate(\n            storage\n                .download(&path, &DownloadOpts::default(), &cancel)\n                .await?\n                .download_stream,\n        )\n        .await?;\n        assert_eq!(body, read);\n\n        Ok(())\n    }\n\n    async fn upload_dummy_file(\n        storage: &LocalFs,\n        name: &str,\n        metadata: Option<StorageMetadata>,\n        cancel: &CancellationToken,\n    ) -> anyhow::Result<RemotePath> {\n        let from_path = storage\n            .storage_root\n            .join(\"timelines\")\n            .join(\"some_timeline\")\n            .join(name);\n        let (file, size) = create_file_for_upload(&from_path, &dummy_contents(name)).await?;\n\n        let relative_path = from_path\n            .strip_prefix(&storage.storage_root)\n            .context(\"Failed to strip storage root prefix\")\n            .and_then(RemotePath::new)\n            .with_context(|| {\n                format!(\n                    \"Failed to resolve remote part of path {:?} for base {:?}\",\n                    from_path, storage.storage_root\n                )\n            })?;\n\n        let file = tokio_util::io::ReaderStream::new(file);\n\n        storage\n            .upload(file, size, &relative_path, metadata, cancel)\n            .await?;\n        Ok(relative_path)\n    }\n\n    async fn create_file_for_upload(\n        path: &Utf8Path,\n        contents: &str,\n    ) -> anyhow::Result<(fs::File, usize)> {\n        std::fs::create_dir_all(path.parent().unwrap())?;\n        let mut file_for_writing = std::fs::OpenOptions::new()\n            .write(true)\n            .create_new(true)\n            .open(path)?;\n        write!(file_for_writing, \"{contents}\")?;\n        drop(file_for_writing);\n        let file_size = path.metadata()?.len() as usize;\n        Ok((\n            fs::OpenOptions::new().read(true).open(&path).await?,\n            file_size,\n        ))\n    }\n\n    fn dummy_contents(name: &str) -> String {\n        format!(\"contents for {name}\")\n    }\n\n    async fn list_files_sorted(storage: &LocalFs) -> anyhow::Result<Vec<RemotePath>> {\n        let mut files = storage.list_all().await?;\n        files.sort_by(|a, b| a.0.cmp(&b.0));\n        Ok(files)\n    }\n\n    async fn aggregate(\n        stream: impl Stream<Item = std::io::Result<Bytes>>,\n    ) -> anyhow::Result<Vec<u8>> {\n        use futures::stream::StreamExt;\n        let mut out = Vec::new();\n        let mut stream = std::pin::pin!(stream);\n        while let Some(res) = stream.next().await {\n            out.extend_from_slice(&res?[..]);\n        }\n        Ok(out)\n    }\n}\n"
  },
  {
    "path": "libs/remote_storage/src/metrics.rs",
    "content": "use metrics::{\n    Histogram, IntCounter, register_histogram_vec, register_int_counter, register_int_counter_vec,\n};\nuse once_cell::sync::Lazy;\n\npub(super) static BUCKET_METRICS: Lazy<BucketMetrics> = Lazy::new(Default::default);\n\n#[derive(Clone, Copy, Debug)]\npub(crate) enum RequestKind {\n    Get = 0,\n    Put = 1,\n    Delete = 2,\n    List = 3,\n    Copy = 4,\n    TimeTravel = 5,\n    Head = 6,\n    ListVersions = 7,\n}\n\nuse RequestKind::*;\nuse scopeguard::ScopeGuard;\n\nimpl RequestKind {\n    const fn as_str(&self) -> &'static str {\n        match self {\n            Get => \"get_object\",\n            Put => \"put_object\",\n            Delete => \"delete_object\",\n            List => \"list_objects\",\n            Copy => \"copy_object\",\n            TimeTravel => \"time_travel_recover\",\n            Head => \"head_object\",\n            ListVersions => \"list_versions\",\n        }\n    }\n    const fn as_index(&self) -> usize {\n        *self as usize\n    }\n}\n\nconst REQUEST_KIND_LIST: &[RequestKind] =\n    &[Get, Put, Delete, List, Copy, TimeTravel, Head, ListVersions];\n\nconst REQUEST_KIND_COUNT: usize = REQUEST_KIND_LIST.len();\npub(crate) struct RequestTyped<C>([C; REQUEST_KIND_COUNT]);\n\nimpl<C> RequestTyped<C> {\n    pub(crate) fn get(&self, kind: RequestKind) -> &C {\n        &self.0[kind.as_index()]\n    }\n\n    fn build_with(mut f: impl FnMut(RequestKind) -> C) -> Self {\n        let mut it = REQUEST_KIND_LIST.iter();\n        let arr = std::array::from_fn::<C, REQUEST_KIND_COUNT, _>(|index| {\n            let next = it.next().unwrap();\n            assert_eq!(index, next.as_index());\n            f(*next)\n        });\n\n        if let Some(next) = it.next() {\n            panic!(\"unexpected {next:?}\");\n        }\n\n        RequestTyped(arr)\n    }\n}\n\nimpl RequestTyped<Histogram> {\n    pub(crate) fn observe_elapsed(&self, kind: RequestKind, started_at: std::time::Instant) {\n        self.get(kind).observe(started_at.elapsed().as_secs_f64())\n    }\n}\n\npub(crate) struct PassFailCancelledRequestTyped<C> {\n    success: RequestTyped<C>,\n    fail: RequestTyped<C>,\n    cancelled: RequestTyped<C>,\n}\n\n#[derive(Debug, Clone, Copy)]\npub(crate) enum AttemptOutcome {\n    Ok,\n    Err,\n    Cancelled,\n}\n\nimpl<T, E> From<&Result<T, E>> for AttemptOutcome {\n    fn from(value: &Result<T, E>) -> Self {\n        match value {\n            Ok(_) => AttemptOutcome::Ok,\n            Err(_) => AttemptOutcome::Err,\n        }\n    }\n}\n\nimpl AttemptOutcome {\n    pub(crate) fn as_str(&self) -> &'static str {\n        match self {\n            AttemptOutcome::Ok => \"ok\",\n            AttemptOutcome::Err => \"err\",\n            AttemptOutcome::Cancelled => \"cancelled\",\n        }\n    }\n}\n\nimpl<C> PassFailCancelledRequestTyped<C> {\n    pub(crate) fn get(&self, kind: RequestKind, outcome: AttemptOutcome) -> &C {\n        let target = match outcome {\n            AttemptOutcome::Ok => &self.success,\n            AttemptOutcome::Err => &self.fail,\n            AttemptOutcome::Cancelled => &self.cancelled,\n        };\n        target.get(kind)\n    }\n\n    fn build_with(mut f: impl FnMut(RequestKind, AttemptOutcome) -> C) -> Self {\n        let success = RequestTyped::build_with(|kind| f(kind, AttemptOutcome::Ok));\n        let fail = RequestTyped::build_with(|kind| f(kind, AttemptOutcome::Err));\n        let cancelled = RequestTyped::build_with(|kind| f(kind, AttemptOutcome::Cancelled));\n\n        PassFailCancelledRequestTyped {\n            success,\n            fail,\n            cancelled,\n        }\n    }\n}\n\nimpl PassFailCancelledRequestTyped<Histogram> {\n    pub(crate) fn observe_elapsed(\n        &self,\n        kind: RequestKind,\n        outcome: impl Into<AttemptOutcome>,\n        started_at: std::time::Instant,\n    ) {\n        self.get(kind, outcome.into())\n            .observe(started_at.elapsed().as_secs_f64())\n    }\n}\n\n/// On drop (cancellation) count towards [`BucketMetrics::cancelled_waits`].\npub(crate) fn start_counting_cancelled_wait(\n    kind: RequestKind,\n) -> ScopeGuard<std::time::Instant, impl FnOnce(std::time::Instant), scopeguard::OnSuccess> {\n    scopeguard::guard_on_success(std::time::Instant::now(), move |_| {\n        crate::metrics::BUCKET_METRICS\n            .cancelled_waits\n            .get(kind)\n            .inc()\n    })\n}\n\n/// On drop (cancellation) add time to [`BucketMetrics::req_seconds`].\npub(crate) fn start_measuring_requests(\n    kind: RequestKind,\n) -> ScopeGuard<std::time::Instant, impl FnOnce(std::time::Instant), scopeguard::OnSuccess> {\n    scopeguard::guard_on_success(std::time::Instant::now(), move |started_at| {\n        crate::metrics::BUCKET_METRICS.req_seconds.observe_elapsed(\n            kind,\n            AttemptOutcome::Cancelled,\n            started_at,\n        )\n    })\n}\n\npub(crate) struct BucketMetrics {\n    /// Full request duration until successful completion, error or cancellation.\n    pub(crate) req_seconds: PassFailCancelledRequestTyped<Histogram>,\n    /// Total amount of seconds waited on queue.\n    pub(crate) wait_seconds: RequestTyped<Histogram>,\n\n    /// Track how many semaphore awaits were cancelled per request type.\n    ///\n    /// This is in case cancellations are happening more than expected.\n    pub(crate) cancelled_waits: RequestTyped<IntCounter>,\n\n    /// Total amount of deleted objects in batches or single requests.\n    pub(crate) deleted_objects_total: IntCounter,\n}\n\nimpl Default for BucketMetrics {\n    fn default() -> Self {\n        // first bucket 100 microseconds to count requests that do not need to wait at all\n        // and get a permit immediately\n        let buckets = [0.0001, 0.01, 0.10, 0.5, 1.0, 5.0, 10.0, 50.0, 100.0];\n\n        let req_seconds = register_histogram_vec!(\n            \"remote_storage_s3_request_seconds\",\n            \"Seconds to complete a request\",\n            &[\"request_type\", \"result\"],\n            buckets.to_vec(),\n        )\n        .unwrap();\n        let req_seconds = PassFailCancelledRequestTyped::build_with(|kind, outcome| {\n            req_seconds.with_label_values(&[kind.as_str(), outcome.as_str()])\n        });\n\n        let wait_seconds = register_histogram_vec!(\n            \"remote_storage_s3_wait_seconds\",\n            \"Seconds rate limited\",\n            &[\"request_type\"],\n            buckets.to_vec(),\n        )\n        .unwrap();\n        let wait_seconds =\n            RequestTyped::build_with(|kind| wait_seconds.with_label_values(&[kind.as_str()]));\n\n        let cancelled_waits = register_int_counter_vec!(\n            \"remote_storage_s3_cancelled_waits_total\",\n            \"Times a semaphore wait has been cancelled per request type\",\n            &[\"request_type\"],\n        )\n        .unwrap();\n        let cancelled_waits =\n            RequestTyped::build_with(|kind| cancelled_waits.with_label_values(&[kind.as_str()]));\n\n        let deleted_objects_total = register_int_counter!(\n            \"remote_storage_s3_deleted_objects_total\",\n            \"Amount of deleted objects in total\",\n        )\n        .unwrap();\n\n        Self {\n            req_seconds,\n            wait_seconds,\n            cancelled_waits,\n            deleted_objects_total,\n        }\n    }\n}\n"
  },
  {
    "path": "libs/remote_storage/src/s3_bucket.rs",
    "content": "//! AWS S3 storage wrapper around `rusoto` library.\n//!\n//! Respects `prefix_in_bucket` property from [`S3Config`],\n//! allowing multiple api users to independently work with the same S3 bucket, if\n//! their bucket prefixes are both specified and different.\n\nuse std::borrow::Cow;\nuse std::collections::HashMap;\nuse std::num::NonZeroU32;\nuse std::pin::Pin;\nuse std::sync::Arc;\nuse std::task::{Context, Poll};\nuse std::time::{Duration, SystemTime};\n\nuse anyhow::{Context as _, anyhow};\nuse aws_config::BehaviorVersion;\nuse aws_config::default_provider::credentials::DefaultCredentialsChain;\nuse aws_config::retry::{RetryConfigBuilder, RetryMode};\nuse aws_sdk_s3::Client;\nuse aws_sdk_s3::config::{AsyncSleep, IdentityCache, Region, SharedAsyncSleep};\nuse aws_sdk_s3::error::SdkError;\nuse aws_sdk_s3::operation::get_object::GetObjectError;\nuse aws_sdk_s3::operation::head_object::HeadObjectError;\nuse aws_sdk_s3::types::{Delete, ObjectIdentifier, StorageClass};\nuse aws_smithy_async::rt::sleep::TokioSleep;\nuse aws_smithy_types::body::SdkBody;\nuse aws_smithy_types::byte_stream::ByteStream;\nuse aws_smithy_types::date_time::ConversionError;\nuse bytes::Bytes;\nuse futures::stream::Stream;\nuse futures_util::StreamExt;\nuse http_body_util::StreamBody;\nuse http_types::StatusCode;\nuse hyper::body::Frame;\nuse scopeguard::ScopeGuard;\nuse tokio_util::sync::CancellationToken;\nuse utils::backoff;\n\nuse super::StorageMetadata;\nuse crate::config::S3Config;\nuse crate::error::Cancelled;\npub(super) use crate::metrics::RequestKind;\nuse crate::metrics::{AttemptOutcome, start_counting_cancelled_wait, start_measuring_requests};\nuse crate::support::PermitCarrying;\nuse crate::{\n    ConcurrencyLimiter, Download, DownloadError, DownloadOpts, Listing, ListingMode, ListingObject,\n    MAX_KEYS_PER_DELETE_S3, REMOTE_STORAGE_PREFIX_SEPARATOR, RemotePath, RemoteStorage,\n    TimeTravelError, TimeoutOrCancel, Version, VersionId, VersionKind, VersionListing,\n};\n\n/// AWS S3 storage.\npub struct S3Bucket {\n    client: Client,\n    bucket_name: String,\n    prefix_in_bucket: Option<String>,\n    max_keys_per_list_response: Option<i32>,\n    upload_storage_class: Option<StorageClass>,\n    concurrency_limiter: ConcurrencyLimiter,\n    // Per-request timeout. Accessible for tests.\n    pub timeout: Duration,\n}\n\nstruct GetObjectRequest {\n    bucket: String,\n    key: String,\n    etag: Option<String>,\n    range: Option<String>,\n    version_id: Option<String>,\n}\nimpl S3Bucket {\n    /// Creates the S3 storage, errors if incorrect AWS S3 configuration provided.\n    pub async fn new(remote_storage_config: &S3Config, timeout: Duration) -> anyhow::Result<Self> {\n        tracing::debug!(\n            \"Creating s3 remote storage for S3 bucket {}\",\n            remote_storage_config.bucket_name\n        );\n\n        let region = Region::new(remote_storage_config.bucket_region.clone());\n        let region_opt = Some(region.clone());\n\n        // https://docs.aws.amazon.com/sdkref/latest/guide/standardized-credentials.html\n        // https://docs.rs/aws-config/latest/aws_config/default_provider/credentials/struct.DefaultCredentialsChain.html\n        // Incomplete list of auth methods used by this:\n        // * \"AWS_ACCESS_KEY_ID\", \"AWS_SECRET_ACCESS_KEY\"\n        // * \"AWS_PROFILE\" / `aws sso login --profile <profile>`\n        // * \"AWS_WEB_IDENTITY_TOKEN_FILE\", \"AWS_ROLE_ARN\", \"AWS_ROLE_SESSION_NAME\"\n        // * http (ECS/EKS) container credentials\n        // * imds v2\n        let credentials_provider = DefaultCredentialsChain::builder()\n            .region(region)\n            .build()\n            .await;\n\n        // AWS SDK requires us to specify how the RetryConfig should sleep when it wants to back off\n        let sleep_impl: Arc<dyn AsyncSleep> = Arc::new(TokioSleep::new());\n\n        let sdk_config_loader: aws_config::ConfigLoader = aws_config::defaults(\n            #[allow(deprecated)] /* TODO: https://github.com/neondatabase/neon/issues/7665 */\n            BehaviorVersion::v2023_11_09(),\n        )\n        .region(region_opt)\n        .identity_cache(IdentityCache::lazy().build())\n        .credentials_provider(credentials_provider)\n        .sleep_impl(SharedAsyncSleep::from(sleep_impl));\n\n        let sdk_config: aws_config::SdkConfig = std::thread::scope(|s| {\n            s.spawn(|| {\n                // TODO: make this function async.\n                tokio::runtime::Builder::new_current_thread()\n                    .enable_all()\n                    .build()\n                    .unwrap()\n                    .block_on(sdk_config_loader.load())\n            })\n            .join()\n            .unwrap()\n        });\n\n        let mut s3_config_builder = aws_sdk_s3::config::Builder::from(&sdk_config);\n\n        // Technically, the `remote_storage_config.endpoint` field only applies to S3 interactions.\n        // (In case we ever re-use the `sdk_config` for more than just the S3 client in the future)\n        if let Some(custom_endpoint) = remote_storage_config.endpoint.clone() {\n            s3_config_builder = s3_config_builder\n                .endpoint_url(custom_endpoint)\n                .force_path_style(true);\n        }\n\n        // We do our own retries (see [`backoff::retry`]).  However, for the AWS SDK to enable rate limiting in response to throttling\n        // responses (e.g. 429 on too many ListObjectsv2 requests), we must provide a retry config.  We set it to use at most one\n        // attempt, and enable 'Adaptive' mode, which causes rate limiting to be enabled.\n        let mut retry_config = RetryConfigBuilder::new();\n        retry_config\n            .set_max_attempts(Some(1))\n            .set_mode(Some(RetryMode::Adaptive));\n        s3_config_builder = s3_config_builder.retry_config(retry_config.build());\n\n        let s3_config = s3_config_builder.build();\n        let client = aws_sdk_s3::Client::from_conf(s3_config);\n\n        let prefix_in_bucket = remote_storage_config\n            .prefix_in_bucket\n            .as_deref()\n            .map(|prefix| {\n                let mut prefix = prefix;\n                while prefix.starts_with(REMOTE_STORAGE_PREFIX_SEPARATOR) {\n                    prefix = &prefix[1..]\n                }\n\n                let mut prefix = prefix.to_string();\n                while prefix.ends_with(REMOTE_STORAGE_PREFIX_SEPARATOR) {\n                    prefix.pop();\n                }\n                prefix\n            });\n\n        Ok(Self {\n            client,\n            bucket_name: remote_storage_config.bucket_name.clone(),\n            max_keys_per_list_response: remote_storage_config.max_keys_per_list_response,\n            prefix_in_bucket,\n            concurrency_limiter: ConcurrencyLimiter::new(\n                remote_storage_config.concurrency_limit.get(),\n            ),\n            upload_storage_class: remote_storage_config.upload_storage_class.clone(),\n            timeout,\n        })\n    }\n\n    fn s3_object_to_relative_path(&self, key: &str) -> RemotePath {\n        let relative_path =\n            match key.strip_prefix(self.prefix_in_bucket.as_deref().unwrap_or_default()) {\n                Some(stripped) => stripped,\n                // we rely on AWS to return properly prefixed paths\n                // for requests with a certain prefix\n                None => panic!(\n                    \"Key {} does not start with bucket prefix {:?}\",\n                    key, self.prefix_in_bucket\n                ),\n            };\n        RemotePath(\n            relative_path\n                .split(REMOTE_STORAGE_PREFIX_SEPARATOR)\n                .collect(),\n        )\n    }\n\n    pub fn relative_path_to_s3_object(&self, path: &RemotePath) -> String {\n        assert_eq!(std::path::MAIN_SEPARATOR, REMOTE_STORAGE_PREFIX_SEPARATOR);\n        let path_string = path.get_path().as_str();\n        match &self.prefix_in_bucket {\n            Some(prefix) => prefix.clone() + \"/\" + path_string,\n            None => path_string.to_string(),\n        }\n    }\n\n    async fn permit(\n        &self,\n        kind: RequestKind,\n        cancel: &CancellationToken,\n    ) -> Result<tokio::sync::SemaphorePermit<'_>, Cancelled> {\n        let started_at = start_counting_cancelled_wait(kind);\n        let acquire = self.concurrency_limiter.acquire(kind);\n\n        let permit = tokio::select! {\n            permit = acquire => permit.expect(\"semaphore is never closed\"),\n            _ = cancel.cancelled() => return Err(Cancelled),\n        };\n\n        let started_at = ScopeGuard::into_inner(started_at);\n        crate::metrics::BUCKET_METRICS\n            .wait_seconds\n            .observe_elapsed(kind, started_at);\n\n        Ok(permit)\n    }\n\n    async fn owned_permit(\n        &self,\n        kind: RequestKind,\n        cancel: &CancellationToken,\n    ) -> Result<tokio::sync::OwnedSemaphorePermit, Cancelled> {\n        let started_at = start_counting_cancelled_wait(kind);\n        let acquire = self.concurrency_limiter.acquire_owned(kind);\n\n        let permit = tokio::select! {\n            permit = acquire => permit.expect(\"semaphore is never closed\"),\n            _ = cancel.cancelled() => return Err(Cancelled),\n        };\n\n        let started_at = ScopeGuard::into_inner(started_at);\n        crate::metrics::BUCKET_METRICS\n            .wait_seconds\n            .observe_elapsed(kind, started_at);\n        Ok(permit)\n    }\n\n    async fn download_object(\n        &self,\n        request: GetObjectRequest,\n        cancel: &CancellationToken,\n    ) -> Result<Download, DownloadError> {\n        let kind = RequestKind::Get;\n\n        let permit = self.owned_permit(kind, cancel).await?;\n\n        let started_at = start_measuring_requests(kind);\n\n        let mut builder = self\n            .client\n            .get_object()\n            .bucket(request.bucket)\n            .key(request.key)\n            .set_version_id(request.version_id)\n            .set_range(request.range);\n\n        if let Some(etag) = request.etag {\n            builder = builder.if_none_match(etag);\n        }\n\n        let get_object = builder.send();\n\n        let get_object = tokio::select! {\n            res = get_object => res,\n            _ = tokio::time::sleep(self.timeout) => return Err(DownloadError::Timeout),\n            _ = cancel.cancelled() => return Err(DownloadError::Cancelled),\n        };\n\n        let started_at = ScopeGuard::into_inner(started_at);\n\n        let object_output = match get_object {\n            Ok(object_output) => object_output,\n            Err(SdkError::ServiceError(e)) if matches!(e.err(), GetObjectError::NoSuchKey(_)) => {\n                // Count this in the AttemptOutcome::Ok bucket, because 404 is not\n                // an error: we expect to sometimes fetch an object and find it missing,\n                // e.g. when probing for timeline indices.\n                crate::metrics::BUCKET_METRICS.req_seconds.observe_elapsed(\n                    kind,\n                    AttemptOutcome::Ok,\n                    started_at,\n                );\n                return Err(DownloadError::NotFound);\n            }\n            Err(SdkError::ServiceError(e))\n                // aws_smithy_runtime_api::http::response::StatusCode isn't\n                // re-exported by any aws crates, so just check the numeric\n                // status against http_types::StatusCode instead of pulling it.\n                if e.raw().status().as_u16() == StatusCode::NotModified =>\n            {\n                // Count an unmodified file as a success.\n                crate::metrics::BUCKET_METRICS.req_seconds.observe_elapsed(\n                    kind,\n                    AttemptOutcome::Ok,\n                    started_at,\n                );\n                return Err(DownloadError::Unmodified);\n            }\n            Err(e) => {\n                crate::metrics::BUCKET_METRICS.req_seconds.observe_elapsed(\n                    kind,\n                    AttemptOutcome::Err,\n                    started_at,\n                );\n\n                return Err(DownloadError::Other(\n                    anyhow::Error::new(e).context(\"download s3 object\"),\n                ));\n            }\n        };\n\n        // even if we would have no timeout left, continue anyways. the caller can decide to ignore\n        // the errors considering timeouts and cancellation.\n        let remaining = self.timeout.saturating_sub(started_at.elapsed());\n\n        let metadata = object_output.metadata().cloned().map(StorageMetadata);\n        let etag = object_output\n            .e_tag\n            .ok_or(DownloadError::Other(anyhow::anyhow!(\"Missing ETag header\")))?\n            .into();\n        let last_modified = object_output\n            .last_modified\n            .ok_or(DownloadError::Other(anyhow::anyhow!(\n                \"Missing LastModified header\"\n            )))?\n            .try_into()\n            .map_err(|e: ConversionError| DownloadError::Other(e.into()))?;\n\n        let body = object_output.body;\n        let body = ByteStreamAsStream::from(body);\n        let body = PermitCarrying::new(permit, body);\n        let body = TimedDownload::new(started_at, body);\n\n        let cancel_or_timeout = crate::support::cancel_or_timeout(remaining, cancel.clone());\n        let body = crate::support::DownloadStream::new(cancel_or_timeout, body);\n\n        Ok(Download {\n            metadata,\n            etag,\n            last_modified,\n            download_stream: Box::pin(body),\n        })\n    }\n\n    async fn delete_oids(\n        &self,\n        _permit: &tokio::sync::SemaphorePermit<'_>,\n        delete_objects: &[ObjectIdentifier],\n        cancel: &CancellationToken,\n    ) -> anyhow::Result<()> {\n        let kind = RequestKind::Delete;\n        let mut cancel = std::pin::pin!(cancel.cancelled());\n\n        for chunk in delete_objects.chunks(MAX_KEYS_PER_DELETE_S3) {\n            let started_at = start_measuring_requests(kind);\n\n            let req = self\n                .client\n                .delete_objects()\n                .bucket(self.bucket_name.clone())\n                .delete(\n                    Delete::builder()\n                        .set_objects(Some(chunk.to_vec()))\n                        .build()\n                        .context(\"build request\")?,\n                )\n                .send();\n\n            let resp = tokio::select! {\n                resp = req => resp,\n                _ = tokio::time::sleep(self.timeout) => return Err(TimeoutOrCancel::Timeout.into()),\n                _ = &mut cancel => return Err(TimeoutOrCancel::Cancel.into()),\n            };\n\n            let started_at = ScopeGuard::into_inner(started_at);\n            crate::metrics::BUCKET_METRICS\n                .req_seconds\n                .observe_elapsed(kind, &resp, started_at);\n\n            let resp = resp.context(\"request deletion\")?;\n            crate::metrics::BUCKET_METRICS\n                .deleted_objects_total\n                .inc_by(chunk.len() as u64);\n\n            if let Some(errors) = resp.errors {\n                // Log a bounded number of the errors within the response:\n                // these requests can carry 1000 keys so logging each one\n                // would be too verbose, especially as errors may lead us\n                // to retry repeatedly.\n                const LOG_UP_TO_N_ERRORS: usize = 10;\n                for e in errors.iter().take(LOG_UP_TO_N_ERRORS) {\n                    tracing::warn!(\n                        \"DeleteObjects key {} failed: {}: {}\",\n                        e.key.as_ref().map(Cow::from).unwrap_or(\"\".into()),\n                        e.code.as_ref().map(Cow::from).unwrap_or(\"\".into()),\n                        e.message.as_ref().map(Cow::from).unwrap_or(\"\".into())\n                    );\n                }\n\n                return Err(anyhow::anyhow!(\n                    \"Failed to delete {}/{} objects\",\n                    errors.len(),\n                    chunk.len(),\n                ));\n            }\n        }\n        Ok(())\n    }\n\n    async fn list_versions_with_permit(\n        &self,\n        _permit: &tokio::sync::SemaphorePermit<'_>,\n        prefix: Option<&RemotePath>,\n        mode: ListingMode,\n        max_keys: Option<NonZeroU32>,\n        cancel: &CancellationToken,\n    ) -> Result<crate::VersionListing, DownloadError> {\n        // get the passed prefix or if it is not set use prefix_in_bucket value\n        let prefix = prefix\n            .map(|p| self.relative_path_to_s3_object(p))\n            .or_else(|| self.prefix_in_bucket.clone());\n\n        let warn_threshold = 3;\n        let max_retries = 10;\n        let is_permanent = |e: &_| matches!(e, DownloadError::Cancelled);\n\n        let mut key_marker = None;\n        let mut version_id_marker = None;\n        let mut versions_and_deletes = Vec::new();\n\n        loop {\n            let response = backoff::retry(\n                || async {\n                    let mut request = self\n                        .client\n                        .list_object_versions()\n                        .bucket(self.bucket_name.clone())\n                        .set_prefix(prefix.clone())\n                        .set_key_marker(key_marker.clone())\n                        .set_version_id_marker(version_id_marker.clone());\n\n                    if let ListingMode::WithDelimiter = mode {\n                        request = request.delimiter(REMOTE_STORAGE_PREFIX_SEPARATOR.to_string());\n                    }\n\n                    let op = request.send();\n\n                    tokio::select! {\n                        res = op => res.map_err(|e| DownloadError::Other(e.into())),\n                        _ = cancel.cancelled() => Err(DownloadError::Cancelled),\n                    }\n                },\n                is_permanent,\n                warn_threshold,\n                max_retries,\n                \"listing object versions\",\n                cancel,\n            )\n            .await\n            .ok_or_else(|| DownloadError::Cancelled)\n            .and_then(|x| x)?;\n\n            tracing::trace!(\n                \"  Got List response version_id_marker={:?}, key_marker={:?}\",\n                response.version_id_marker,\n                response.key_marker\n            );\n            let versions = response\n                .versions\n                .unwrap_or_default()\n                .into_iter()\n                .map(|version| {\n                    let key = version.key.expect(\"response does not contain a key\");\n                    let key = self.s3_object_to_relative_path(&key);\n                    let version_id = VersionId(version.version_id.expect(\"needing version id\"));\n                    let last_modified =\n                        SystemTime::try_from(version.last_modified.expect(\"no last_modified\"))?;\n                    Ok(Version {\n                        key,\n                        last_modified,\n                        kind: crate::VersionKind::Version(version_id),\n                    })\n                });\n            let deletes = response\n                .delete_markers\n                .unwrap_or_default()\n                .into_iter()\n                .map(|version| {\n                    let key = version.key.expect(\"response does not contain a key\");\n                    let key = self.s3_object_to_relative_path(&key);\n                    let last_modified =\n                        SystemTime::try_from(version.last_modified.expect(\"no last_modified\"))?;\n                    Ok(Version {\n                        key,\n                        last_modified,\n                        kind: crate::VersionKind::DeletionMarker,\n                    })\n                });\n            itertools::process_results(versions.chain(deletes), |n_vds| {\n                versions_and_deletes.extend(n_vds)\n            })\n            .map_err(DownloadError::Other)?;\n            fn none_if_empty(v: Option<String>) -> Option<String> {\n                v.filter(|v| !v.is_empty())\n            }\n            version_id_marker = none_if_empty(response.next_version_id_marker);\n            key_marker = none_if_empty(response.next_key_marker);\n            if version_id_marker.is_none() {\n                // The final response is not supposed to be truncated\n                if response.is_truncated.unwrap_or_default() {\n                    return Err(DownloadError::Other(anyhow::anyhow!(\n                        \"Received truncated ListObjectVersions response for prefix={prefix:?}\"\n                    )));\n                }\n                break;\n            }\n            if let Some(max_keys) = max_keys {\n                if versions_and_deletes.len() >= max_keys.get().try_into().unwrap() {\n                    return Err(DownloadError::Other(anyhow::anyhow!(\"too many versions\")));\n                }\n            }\n        }\n        Ok(VersionListing {\n            versions: versions_and_deletes,\n        })\n    }\n\n    pub fn bucket_name(&self) -> &str {\n        &self.bucket_name\n    }\n}\n\npin_project_lite::pin_project! {\n    struct ByteStreamAsStream {\n        #[pin]\n        inner: aws_smithy_types::byte_stream::ByteStream\n    }\n}\n\nimpl From<aws_smithy_types::byte_stream::ByteStream> for ByteStreamAsStream {\n    fn from(inner: aws_smithy_types::byte_stream::ByteStream) -> Self {\n        ByteStreamAsStream { inner }\n    }\n}\n\nimpl Stream for ByteStreamAsStream {\n    type Item = std::io::Result<Bytes>;\n\n    fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {\n        // this does the std::io::ErrorKind::Other conversion\n        self.project().inner.poll_next(cx).map_err(|x| x.into())\n    }\n\n    // cannot implement size_hint because inner.size_hint is remaining size in bytes, which makes\n    // sense and Stream::size_hint does not really\n}\n\npin_project_lite::pin_project! {\n    /// Times and tracks the outcome of the request.\n    struct TimedDownload<S> {\n        started_at: std::time::Instant,\n        outcome: AttemptOutcome,\n        #[pin]\n        inner: S\n    }\n\n    impl<S> PinnedDrop for TimedDownload<S> {\n        fn drop(mut this: Pin<&mut Self>) {\n            crate::metrics::BUCKET_METRICS.req_seconds.observe_elapsed(RequestKind::Get, this.outcome, this.started_at);\n        }\n    }\n}\n\nimpl<S> TimedDownload<S> {\n    fn new(started_at: std::time::Instant, inner: S) -> Self {\n        TimedDownload {\n            started_at,\n            outcome: AttemptOutcome::Cancelled,\n            inner,\n        }\n    }\n}\n\nimpl<S: Stream<Item = std::io::Result<Bytes>>> Stream for TimedDownload<S> {\n    type Item = <S as Stream>::Item;\n\n    fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {\n        use std::task::ready;\n\n        let this = self.project();\n\n        let res = ready!(this.inner.poll_next(cx));\n        match &res {\n            Some(Ok(_)) => {}\n            Some(Err(_)) => *this.outcome = AttemptOutcome::Err,\n            None => *this.outcome = AttemptOutcome::Ok,\n        }\n\n        Poll::Ready(res)\n    }\n\n    fn size_hint(&self) -> (usize, Option<usize>) {\n        self.inner.size_hint()\n    }\n}\n\nimpl RemoteStorage for S3Bucket {\n    fn list_streaming(\n        &self,\n        prefix: Option<&RemotePath>,\n        mode: ListingMode,\n        max_keys: Option<NonZeroU32>,\n        cancel: &CancellationToken,\n    ) -> impl Stream<Item = Result<Listing, DownloadError>> {\n        let kind = RequestKind::List;\n        // s3 sdk wants i32\n        let mut max_keys = max_keys.map(|mk| mk.get() as i32);\n\n        // get the passed prefix or if it is not set use prefix_in_bucket value\n        let list_prefix = prefix\n            .map(|p| self.relative_path_to_s3_object(p))\n            .or_else(|| {\n                self.prefix_in_bucket.clone().map(|mut s| {\n                    s.push(REMOTE_STORAGE_PREFIX_SEPARATOR);\n                    s\n                })\n            });\n\n        async_stream::stream! {\n            let _permit = self.permit(kind, cancel).await?;\n\n            let mut continuation_token = None;\n            'outer: loop {\n                let started_at = start_measuring_requests(kind);\n\n                // min of two Options, returning Some if one is value and another is\n                // None (None is smaller than anything, so plain min doesn't work).\n                let request_max_keys = self\n                    .max_keys_per_list_response\n                    .into_iter()\n                    .chain(max_keys.into_iter())\n                    .min();\n                let mut request = self\n                    .client\n                    .list_objects_v2()\n                    .bucket(self.bucket_name.clone())\n                    .set_prefix(list_prefix.clone())\n                    .set_continuation_token(continuation_token.clone())\n                    .set_max_keys(request_max_keys);\n\n                if let ListingMode::WithDelimiter = mode {\n                    request = request.delimiter(REMOTE_STORAGE_PREFIX_SEPARATOR.to_string());\n                }\n\n                let request = request.send();\n\n                let response = tokio::select! {\n                    res = request => Ok(res),\n                    _ = tokio::time::sleep(self.timeout) => Err(DownloadError::Timeout),\n                    _ = cancel.cancelled() => Err(DownloadError::Cancelled),\n                };\n\n                if let Err(DownloadError::Timeout) = &response {\n                    yield Err(DownloadError::Timeout);\n                    continue 'outer;\n                }\n\n                let response = response?; // always yield cancellation errors and stop the stream\n\n                let response = response\n                    .context(\"Failed to list S3 prefixes\")\n                    .map_err(DownloadError::Other);\n\n                let started_at = ScopeGuard::into_inner(started_at);\n\n                crate::metrics::BUCKET_METRICS\n                    .req_seconds\n                    .observe_elapsed(kind, &response, started_at);\n\n                let response = match response {\n                    Ok(response) => response,\n                    Err(e) => {\n                        // The error is potentially retryable, so we must rewind the loop after yielding.\n                        yield Err(e);\n                        continue 'outer;\n                    },\n                };\n\n                let keys = response.contents();\n                let prefixes = response.common_prefixes.as_deref().unwrap_or_default();\n\n                tracing::debug!(\"list: {} prefixes, {} keys\", prefixes.len(), keys.len());\n                let mut result = Listing::default();\n\n                for object in keys {\n                    let key = object.key().expect(\"response does not contain a key\");\n                    let key = self.s3_object_to_relative_path(key);\n\n                    let last_modified = match object.last_modified.map(SystemTime::try_from) {\n                        Some(Ok(t)) => t,\n                        Some(Err(_)) => {\n                            tracing::warn!(\"Remote storage last_modified {:?} for {} is out of bounds\",\n                                object.last_modified, key\n                        );\n                            SystemTime::now()\n                        },\n                        None => {\n                            SystemTime::now()\n                        }\n                    };\n\n                    let size = object.size.unwrap_or(0) as u64;\n\n                    result.keys.push(ListingObject{\n                        key,\n                        last_modified,\n                        size,\n                    });\n                    if let Some(mut mk) = max_keys {\n                        assert!(mk > 0);\n                        mk -= 1;\n                        if mk == 0 {\n                            // limit reached\n                            yield Ok(result);\n                            break 'outer;\n                        }\n                        max_keys = Some(mk);\n                    }\n                }\n\n                // S3 gives us prefixes like \"foo/\", we return them like \"foo\"\n                result.prefixes.extend(prefixes.iter().filter_map(|o| {\n                    Some(\n                        self.s3_object_to_relative_path(\n                            o.prefix()?\n                                .trim_end_matches(REMOTE_STORAGE_PREFIX_SEPARATOR),\n                        ),\n                    )\n                }));\n\n                yield Ok(result);\n\n                continuation_token = match response.next_continuation_token {\n                    Some(new_token) => Some(new_token),\n                    None => break,\n                };\n            }\n        }\n    }\n\n    async fn list_versions(\n        &self,\n        prefix: Option<&RemotePath>,\n        mode: ListingMode,\n        max_keys: Option<NonZeroU32>,\n        cancel: &CancellationToken,\n    ) -> Result<crate::VersionListing, DownloadError> {\n        let kind = RequestKind::ListVersions;\n        let permit = self.permit(kind, cancel).await?;\n        self.list_versions_with_permit(&permit, prefix, mode, max_keys, cancel)\n            .await\n    }\n\n    async fn head_object(\n        &self,\n        key: &RemotePath,\n        cancel: &CancellationToken,\n    ) -> Result<ListingObject, DownloadError> {\n        let kind = RequestKind::Head;\n        let _permit = self.permit(kind, cancel).await?;\n\n        let started_at = start_measuring_requests(kind);\n\n        let head_future = self\n            .client\n            .head_object()\n            .bucket(self.bucket_name())\n            .key(self.relative_path_to_s3_object(key))\n            .send();\n\n        let head_future = tokio::time::timeout(self.timeout, head_future);\n\n        let res = tokio::select! {\n            res = head_future => res,\n            _ = cancel.cancelled() => return Err(TimeoutOrCancel::Cancel.into()),\n        };\n\n        let res = res.map_err(|_e| DownloadError::Timeout)?;\n\n        // do not incl. timeouts as errors in metrics but cancellations\n        let started_at = ScopeGuard::into_inner(started_at);\n        crate::metrics::BUCKET_METRICS\n            .req_seconds\n            .observe_elapsed(kind, &res, started_at);\n\n        let data = match res {\n            Ok(object_output) => object_output,\n            Err(SdkError::ServiceError(e)) if matches!(e.err(), HeadObjectError::NotFound(_)) => {\n                // Count this in the AttemptOutcome::Ok bucket, because 404 is not\n                // an error: we expect to sometimes fetch an object and find it missing,\n                // e.g. when probing for timeline indices.\n                crate::metrics::BUCKET_METRICS.req_seconds.observe_elapsed(\n                    kind,\n                    AttemptOutcome::Ok,\n                    started_at,\n                );\n                return Err(DownloadError::NotFound);\n            }\n            Err(e) => {\n                crate::metrics::BUCKET_METRICS.req_seconds.observe_elapsed(\n                    kind,\n                    AttemptOutcome::Err,\n                    started_at,\n                );\n\n                return Err(DownloadError::Other(\n                    anyhow::Error::new(e).context(\"s3 head object\"),\n                ));\n            }\n        };\n\n        let (Some(last_modified), Some(size)) = (data.last_modified, data.content_length) else {\n            return Err(DownloadError::Other(anyhow!(\n                \"head_object doesn't contain last_modified or content_length\"\n            )))?;\n        };\n        Ok(ListingObject {\n            key: key.to_owned(),\n            last_modified: SystemTime::try_from(last_modified).map_err(|e| {\n                DownloadError::Other(anyhow!(\"can't convert time '{last_modified}': {e}\"))\n            })?,\n            size: size as u64,\n        })\n    }\n\n    async fn upload(\n        &self,\n        from: impl Stream<Item = std::io::Result<Bytes>> + Send + Sync + 'static,\n        from_size_bytes: usize,\n        to: &RemotePath,\n        metadata: Option<StorageMetadata>,\n        cancel: &CancellationToken,\n    ) -> anyhow::Result<()> {\n        let kind = RequestKind::Put;\n        let _permit = self.permit(kind, cancel).await?;\n\n        let started_at = start_measuring_requests(kind);\n\n        let body = StreamBody::new(from.map(|x| x.map(Frame::data)));\n        let bytes_stream = ByteStream::new(SdkBody::from_body_1_x(body));\n\n        let upload = self\n            .client\n            .put_object()\n            .bucket(self.bucket_name.clone())\n            .key(self.relative_path_to_s3_object(to))\n            .set_metadata(metadata.map(|m| m.0))\n            .set_storage_class(self.upload_storage_class.clone())\n            .content_length(from_size_bytes.try_into()?)\n            .body(bytes_stream)\n            .send();\n\n        let upload = tokio::time::timeout(self.timeout, upload);\n\n        let res = tokio::select! {\n            res = upload => res,\n            _ = cancel.cancelled() => return Err(TimeoutOrCancel::Cancel.into()),\n        };\n\n        if let Ok(inner) = &res {\n            // do not incl. timeouts as errors in metrics but cancellations\n            let started_at = ScopeGuard::into_inner(started_at);\n            crate::metrics::BUCKET_METRICS\n                .req_seconds\n                .observe_elapsed(kind, inner, started_at);\n        }\n\n        match res {\n            Ok(Ok(_put)) => Ok(()),\n            Ok(Err(sdk)) => Err(sdk.into()),\n            Err(_timeout) => Err(TimeoutOrCancel::Timeout.into()),\n        }\n    }\n\n    async fn copy(\n        &self,\n        from: &RemotePath,\n        to: &RemotePath,\n        cancel: &CancellationToken,\n    ) -> anyhow::Result<()> {\n        let kind = RequestKind::Copy;\n        let _permit = self.permit(kind, cancel).await?;\n\n        let timeout = tokio::time::sleep(self.timeout);\n\n        let started_at = start_measuring_requests(kind);\n\n        // we need to specify bucket_name as a prefix\n        let copy_source = format!(\n            \"{}/{}\",\n            self.bucket_name,\n            self.relative_path_to_s3_object(from)\n        );\n\n        let op = self\n            .client\n            .copy_object()\n            .bucket(self.bucket_name.clone())\n            .key(self.relative_path_to_s3_object(to))\n            .set_storage_class(self.upload_storage_class.clone())\n            .copy_source(copy_source)\n            .send();\n\n        let res = tokio::select! {\n            res = op => res,\n            _ = timeout => return Err(TimeoutOrCancel::Timeout.into()),\n            _ = cancel.cancelled() => return Err(TimeoutOrCancel::Cancel.into()),\n        };\n\n        let started_at = ScopeGuard::into_inner(started_at);\n        crate::metrics::BUCKET_METRICS\n            .req_seconds\n            .observe_elapsed(kind, &res, started_at);\n\n        res?;\n\n        Ok(())\n    }\n\n    async fn download(\n        &self,\n        from: &RemotePath,\n        opts: &DownloadOpts,\n        cancel: &CancellationToken,\n    ) -> Result<Download, DownloadError> {\n        // if prefix is not none then download file `prefix/from`\n        // if prefix is none then download file `from`\n        self.download_object(\n            GetObjectRequest {\n                bucket: self.bucket_name.clone(),\n                key: self.relative_path_to_s3_object(from),\n                etag: opts.etag.as_ref().map(|e| e.to_string()),\n                range: opts.byte_range_header(),\n                version_id: opts.version_id.as_ref().map(|v| v.0.to_owned()),\n            },\n            cancel,\n        )\n        .await\n    }\n\n    async fn delete_objects(\n        &self,\n        paths: &[RemotePath],\n        cancel: &CancellationToken,\n    ) -> anyhow::Result<()> {\n        let kind = RequestKind::Delete;\n        let permit = self.permit(kind, cancel).await?;\n        let mut delete_objects = Vec::with_capacity(paths.len());\n        for path in paths {\n            let obj_id = ObjectIdentifier::builder()\n                .set_key(Some(self.relative_path_to_s3_object(path)))\n                .build()\n                .context(\"convert path to oid\")?;\n            delete_objects.push(obj_id);\n        }\n\n        self.delete_oids(&permit, &delete_objects, cancel).await\n    }\n\n    fn max_keys_per_delete(&self) -> usize {\n        MAX_KEYS_PER_DELETE_S3\n    }\n\n    async fn delete(&self, path: &RemotePath, cancel: &CancellationToken) -> anyhow::Result<()> {\n        let paths = std::array::from_ref(path);\n        self.delete_objects(paths, cancel).await\n    }\n\n    async fn time_travel_recover(\n        &self,\n        prefix: Option<&RemotePath>,\n        timestamp: SystemTime,\n        done_if_after: SystemTime,\n        cancel: &CancellationToken,\n        complexity_limit: Option<NonZeroU32>,\n    ) -> Result<(), TimeTravelError> {\n        let kind = RequestKind::TimeTravel;\n        let permit = self.permit(kind, cancel).await?;\n\n        tracing::trace!(\"Target time: {timestamp:?}, done_if_after {done_if_after:?}\");\n\n        let mode = ListingMode::NoDelimiter;\n        let version_listing = self\n            .list_versions_with_permit(&permit, prefix, mode, complexity_limit, cancel)\n            .await\n            .map_err(|err| match err {\n                DownloadError::Other(e) => TimeTravelError::Other(e),\n                DownloadError::Cancelled => TimeTravelError::Cancelled,\n                other => TimeTravelError::Other(other.into()),\n            })?;\n        let versions_and_deletes = version_listing.versions;\n\n        tracing::info!(\n            \"Built list for time travel with {} versions and deletions\",\n            versions_and_deletes.len()\n        );\n\n        // Work on the list of references instead of the objects directly,\n        // otherwise we get lifetime errors in the sort_by_key call below.\n        let mut versions_and_deletes = versions_and_deletes.iter().collect::<Vec<_>>();\n\n        versions_and_deletes.sort_by_key(|vd| (&vd.key, &vd.last_modified));\n\n        let mut vds_for_key = HashMap::<_, Vec<_>>::new();\n\n        for vd in &versions_and_deletes {\n            let Version { key, .. } = &vd;\n            let version_id = vd.version_id().map(|v| v.0.as_str());\n            if version_id == Some(\"null\") {\n                // TODO: check the behavior of using the SDK on a non-versioned container\n                return Err(TimeTravelError::Other(anyhow!(\n                    \"Received ListVersions response for key={key} with version_id='null', \\\n                    indicating either disabled versioning, or legacy objects with null version id values\"\n                )));\n            }\n            tracing::trace!(\"Parsing version key={key} kind={:?}\", vd.kind);\n\n            vds_for_key.entry(key).or_default().push(vd);\n        }\n\n        let warn_threshold = 3;\n        let max_retries = 10;\n        let is_permanent = |e: &_| matches!(e, TimeTravelError::Cancelled);\n\n        for (key, versions) in vds_for_key {\n            let last_vd = versions.last().unwrap();\n            let key = self.relative_path_to_s3_object(key);\n            if last_vd.last_modified > done_if_after {\n                tracing::trace!(\"Key {key} has version later than done_if_after, skipping\");\n                continue;\n            }\n            // the version we want to restore to.\n            let version_to_restore_to =\n                match versions.binary_search_by_key(&timestamp, |tpl| tpl.last_modified) {\n                    Ok(v) => v,\n                    Err(e) => e,\n                };\n            if version_to_restore_to == versions.len() {\n                tracing::trace!(\"Key {key} has no changes since timestamp, skipping\");\n                continue;\n            }\n            let mut do_delete = false;\n            if version_to_restore_to == 0 {\n                // All versions more recent, so the key didn't exist at the specified time point.\n                tracing::trace!(\n                    \"All {} versions more recent for {key}, deleting\",\n                    versions.len()\n                );\n                do_delete = true;\n            } else {\n                match &versions[version_to_restore_to - 1] {\n                    Version {\n                        kind: VersionKind::Version(version_id),\n                        ..\n                    } => {\n                        let version_id = &version_id.0;\n                        tracing::trace!(\"Copying old version {version_id} for {key}...\");\n                        // Restore the state to the last version by copying\n                        let source_id =\n                            format!(\"{}/{key}?versionId={version_id}\", self.bucket_name);\n\n                        backoff::retry(\n                            || async {\n                                let op = self\n                                    .client\n                                    .copy_object()\n                                    .bucket(self.bucket_name.clone())\n                                    .key(&key)\n                                    .set_storage_class(self.upload_storage_class.clone())\n                                    .copy_source(&source_id)\n                                    .send();\n\n                                tokio::select! {\n                                    res = op => res.map_err(|e| TimeTravelError::Other(e.into())),\n                                    _ = cancel.cancelled() => Err(TimeTravelError::Cancelled),\n                                }\n                            },\n                            is_permanent,\n                            warn_threshold,\n                            max_retries,\n                            \"copying object version for time_travel_recover\",\n                            cancel,\n                        )\n                        .await\n                        .ok_or_else(|| TimeTravelError::Cancelled)\n                        .and_then(|x| x)?;\n                        tracing::info!(%version_id, %key, \"Copied old version in S3\");\n                    }\n                    Version {\n                        kind: VersionKind::DeletionMarker,\n                        ..\n                    } => {\n                        do_delete = true;\n                    }\n                }\n            };\n            if do_delete {\n                if matches!(last_vd.kind, VersionKind::DeletionMarker) {\n                    // Key has since been deleted (but there was some history), no need to do anything\n                    tracing::trace!(\"Key {key} already deleted, skipping.\");\n                } else {\n                    tracing::trace!(\"Deleting {key}...\");\n\n                    let oid = ObjectIdentifier::builder()\n                        .key(key.to_owned())\n                        .build()\n                        .map_err(|e| TimeTravelError::Other(e.into()))?;\n\n                    self.delete_oids(&permit, &[oid], cancel)\n                        .await\n                        .map_err(|e| {\n                            // delete_oid0 will use TimeoutOrCancel\n                            if TimeoutOrCancel::caused_by_cancel(&e) {\n                                TimeTravelError::Cancelled\n                            } else {\n                                TimeTravelError::Other(e)\n                            }\n                        })?;\n                }\n            }\n        }\n        Ok(())\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use std::num::NonZeroUsize;\n\n    use camino::Utf8Path;\n\n    use crate::{RemotePath, S3Bucket, S3Config};\n\n    #[tokio::test]\n    async fn relative_path() {\n        let all_paths = [\"\", \"some/path\", \"some/path/\"];\n        let all_paths: Vec<RemotePath> = all_paths\n            .iter()\n            .map(|x| RemotePath::new(Utf8Path::new(x)).expect(\"bad path\"))\n            .collect();\n        let prefixes = [\n            None,\n            Some(\"\"),\n            Some(\"test/prefix\"),\n            Some(\"test/prefix/\"),\n            Some(\"/test/prefix/\"),\n        ];\n        let expected_outputs = [\n            vec![\"\", \"some/path\", \"some/path/\"],\n            vec![\"/\", \"/some/path\", \"/some/path/\"],\n            vec![\n                \"test/prefix/\",\n                \"test/prefix/some/path\",\n                \"test/prefix/some/path/\",\n            ],\n            vec![\n                \"test/prefix/\",\n                \"test/prefix/some/path\",\n                \"test/prefix/some/path/\",\n            ],\n            vec![\n                \"test/prefix/\",\n                \"test/prefix/some/path\",\n                \"test/prefix/some/path/\",\n            ],\n        ];\n\n        for (prefix_idx, prefix) in prefixes.iter().enumerate() {\n            let config = S3Config {\n                bucket_name: \"bucket\".to_owned(),\n                bucket_region: \"region\".to_owned(),\n                prefix_in_bucket: prefix.map(str::to_string),\n                endpoint: None,\n                concurrency_limit: NonZeroUsize::new(100).unwrap(),\n                max_keys_per_list_response: Some(5),\n                upload_storage_class: None,\n            };\n            let storage = S3Bucket::new(&config, std::time::Duration::ZERO)\n                .await\n                .expect(\"remote storage init\");\n            for (test_path_idx, test_path) in all_paths.iter().enumerate() {\n                let result = storage.relative_path_to_s3_object(test_path);\n                let expected = expected_outputs[prefix_idx][test_path_idx];\n                assert_eq!(result, expected);\n            }\n        }\n    }\n}\n"
  },
  {
    "path": "libs/remote_storage/src/simulate_failures.rs",
    "content": "//! This module provides a wrapper around a real RemoteStorage implementation that\n//! causes the first N attempts at each upload or download operatio to fail. For\n//! testing purposes.\nuse rand::Rng;\nuse std::cmp;\nuse std::collections::HashMap;\nuse std::collections::hash_map::Entry;\nuse std::num::NonZeroU32;\nuse std::sync::{Arc, Mutex};\nuse std::time::SystemTime;\n\nuse bytes::Bytes;\nuse futures::StreamExt;\nuse futures::stream::Stream;\nuse tokio_util::sync::CancellationToken;\n\nuse crate::{\n    Download, DownloadError, DownloadOpts, GenericRemoteStorage, Listing, ListingMode, RemotePath,\n    RemoteStorage, StorageMetadata, TimeTravelError,\n};\n\npub struct UnreliableWrapper {\n    inner: GenericRemoteStorage<Arc<VoidStorage>>,\n\n    // This many attempts of each operation will fail, then we let it succeed.\n    attempts_to_fail: u64,\n\n    // Tracks how many failed attempts of each operation has been made.\n    attempts: Mutex<HashMap<RemoteOp, u64>>,\n\n    /* BEGIN_HADRON */\n    // This the probability of failure for each operation, ranged from [0, 100].\n    // The probability is default to 100, which means that all operations will fail.\n    // Storage will fail by probability up to attempts_to_fail times.\n    attempt_failure_probability: u64,\n    /* END_HADRON */\n}\n\n/// Used to identify retries of different unique operation.\n#[derive(Debug, Hash, Eq, PartialEq)]\nenum RemoteOp {\n    ListPrefixes(Option<RemotePath>),\n    HeadObject(RemotePath),\n    Upload(RemotePath),\n    Download(RemotePath),\n    Delete(RemotePath),\n    DeleteObjects(Vec<RemotePath>),\n    TimeTravelRecover(Option<RemotePath>),\n}\n\nimpl UnreliableWrapper {\n    pub fn new(\n        inner: crate::GenericRemoteStorage,\n        attempts_to_fail: u64,\n        attempt_failure_probability: u64,\n    ) -> Self {\n        assert!(attempts_to_fail > 0);\n        let inner = match inner {\n            GenericRemoteStorage::AwsS3(s) => GenericRemoteStorage::AwsS3(s),\n            GenericRemoteStorage::AzureBlob(s) => GenericRemoteStorage::AzureBlob(s),\n            GenericRemoteStorage::LocalFs(s) => GenericRemoteStorage::LocalFs(s),\n            // We could also make this a no-op, as in, extract the inner of the passed generic remote storage\n            GenericRemoteStorage::Unreliable(_s) => {\n                panic!(\"Can't wrap unreliable wrapper unreliably\")\n            }\n            GenericRemoteStorage::GCS(s) => GenericRemoteStorage::GCS(s),\n        };\n        let actual_attempt_failure_probability = cmp::min(attempt_failure_probability, 100);\n        UnreliableWrapper {\n            inner,\n            attempts_to_fail,\n            attempt_failure_probability: actual_attempt_failure_probability,\n            attempts: Mutex::new(HashMap::new()),\n        }\n    }\n\n    ///\n    /// Common functionality for all operations.\n    ///\n    /// On the first attempts of this operation, return an error. After 'attempts_to_fail'\n    /// attempts, let the operation go ahead, and clear the counter.\n    ///\n    fn attempt(&self, op: RemoteOp) -> anyhow::Result<u64> {\n        let mut attempts = self.attempts.lock().unwrap();\n        let mut rng = rand::rng();\n\n        match attempts.entry(op) {\n            Entry::Occupied(mut e) => {\n                let attempts_before_this = {\n                    let p = e.get_mut();\n                    *p += 1;\n                    *p\n                };\n\n                /* BEGIN_HADRON */\n                // If there are more attempts to fail, fail the request by probability.\n                if (attempts_before_this < self.attempts_to_fail)\n                    && (rng.random_range(0..=100) < self.attempt_failure_probability)\n                {\n                    let error =\n                        anyhow::anyhow!(\"simulated failure of remote operation {:?}\", e.key());\n                    Err(error)\n                } else {\n                    e.remove();\n                    Ok(attempts_before_this)\n                }\n                /* END_HADRON */\n            }\n            Entry::Vacant(e) => {\n                let error = anyhow::anyhow!(\"simulated failure of remote operation {:?}\", e.key());\n                e.insert(1);\n                Err(error)\n            }\n        }\n    }\n\n    async fn delete_inner(\n        &self,\n        path: &RemotePath,\n        attempt: bool,\n        cancel: &CancellationToken,\n    ) -> anyhow::Result<()> {\n        if attempt {\n            self.attempt(RemoteOp::Delete(path.clone()))?;\n        }\n        self.inner.delete(path, cancel).await\n    }\n}\n\n// We never construct this, so the type is not important, just has to not be UnreliableWrapper and impl RemoteStorage.\ntype VoidStorage = crate::LocalFs;\n\nimpl RemoteStorage for UnreliableWrapper {\n    fn list_streaming(\n        &self,\n        prefix: Option<&RemotePath>,\n        mode: ListingMode,\n        max_keys: Option<NonZeroU32>,\n        cancel: &CancellationToken,\n    ) -> impl Stream<Item = Result<Listing, DownloadError>> + Send {\n        async_stream::stream! {\n            self.attempt(RemoteOp::ListPrefixes(prefix.cloned()))\n                .map_err(DownloadError::Other)?;\n            let mut stream = self.inner\n                .list_streaming(prefix, mode, max_keys, cancel);\n            while let Some(item) = stream.next().await {\n                yield item;\n            }\n        }\n    }\n    async fn list(\n        &self,\n        prefix: Option<&RemotePath>,\n        mode: ListingMode,\n        max_keys: Option<NonZeroU32>,\n        cancel: &CancellationToken,\n    ) -> Result<Listing, DownloadError> {\n        self.attempt(RemoteOp::ListPrefixes(prefix.cloned()))\n            .map_err(DownloadError::Other)?;\n        self.inner.list(prefix, mode, max_keys, cancel).await\n    }\n\n    async fn list_versions(\n        &self,\n        prefix: Option<&RemotePath>,\n        mode: ListingMode,\n        max_keys: Option<NonZeroU32>,\n        cancel: &CancellationToken,\n    ) -> Result<crate::VersionListing, DownloadError> {\n        self.attempt(RemoteOp::ListPrefixes(prefix.cloned()))\n            .map_err(DownloadError::Other)?;\n        self.inner\n            .list_versions(prefix, mode, max_keys, cancel)\n            .await\n    }\n\n    async fn head_object(\n        &self,\n        key: &RemotePath,\n        cancel: &CancellationToken,\n    ) -> Result<crate::ListingObject, DownloadError> {\n        self.attempt(RemoteOp::HeadObject(key.clone()))\n            .map_err(DownloadError::Other)?;\n        self.inner.head_object(key, cancel).await\n    }\n\n    async fn upload(\n        &self,\n        data: impl Stream<Item = std::io::Result<Bytes>> + Send + Sync + 'static,\n        // S3 PUT request requires the content length to be specified,\n        // otherwise it starts to fail with the concurrent connection count increasing.\n        data_size_bytes: usize,\n        to: &RemotePath,\n        metadata: Option<StorageMetadata>,\n        cancel: &CancellationToken,\n    ) -> anyhow::Result<()> {\n        self.attempt(RemoteOp::Upload(to.clone()))?;\n        self.inner\n            .upload(data, data_size_bytes, to, metadata, cancel)\n            .await\n    }\n\n    async fn download(\n        &self,\n        from: &RemotePath,\n        opts: &DownloadOpts,\n        cancel: &CancellationToken,\n    ) -> Result<Download, DownloadError> {\n        // Note: We treat any byte range as an \"attempt\" of the same operation.\n        // We don't pay attention to the ranges. That's good enough for now.\n        self.attempt(RemoteOp::Download(from.clone()))\n            .map_err(DownloadError::Other)?;\n        self.inner.download(from, opts, cancel).await\n    }\n\n    async fn delete(&self, path: &RemotePath, cancel: &CancellationToken) -> anyhow::Result<()> {\n        self.delete_inner(path, true, cancel).await\n    }\n\n    async fn delete_objects(\n        &self,\n        paths: &[RemotePath],\n        cancel: &CancellationToken,\n    ) -> anyhow::Result<()> {\n        self.attempt(RemoteOp::DeleteObjects(paths.to_vec()))?;\n        let mut error_counter = 0;\n        for path in paths {\n            // Dont record attempt because it was already recorded above\n            if (self.delete_inner(path, false, cancel).await).is_err() {\n                error_counter += 1;\n            }\n        }\n        if error_counter > 0 {\n            return Err(anyhow::anyhow!(\n                \"failed to delete {} objects\",\n                error_counter\n            ));\n        }\n        Ok(())\n    }\n\n    fn max_keys_per_delete(&self) -> usize {\n        self.inner.max_keys_per_delete()\n    }\n\n    async fn copy(\n        &self,\n        from: &RemotePath,\n        to: &RemotePath,\n        cancel: &CancellationToken,\n    ) -> anyhow::Result<()> {\n        // copy is equivalent to download + upload\n        self.attempt(RemoteOp::Download(from.clone()))?;\n        self.attempt(RemoteOp::Upload(to.clone()))?;\n        self.inner.copy_object(from, to, cancel).await\n    }\n\n    async fn time_travel_recover(\n        &self,\n        prefix: Option<&RemotePath>,\n        timestamp: SystemTime,\n        done_if_after: SystemTime,\n        cancel: &CancellationToken,\n        complexity_limit: Option<NonZeroU32>,\n    ) -> Result<(), TimeTravelError> {\n        self.attempt(RemoteOp::TimeTravelRecover(prefix.map(|p| p.to_owned())))\n            .map_err(TimeTravelError::Other)?;\n        self.inner\n            .time_travel_recover(prefix, timestamp, done_if_after, cancel, complexity_limit)\n            .await\n    }\n}\n"
  },
  {
    "path": "libs/remote_storage/src/support.rs",
    "content": "use std::future::Future;\nuse std::pin::Pin;\nuse std::task::{Context, Poll};\nuse std::time::Duration;\n\nuse bytes::Bytes;\nuse futures_util::Stream;\nuse tokio_util::sync::CancellationToken;\n\nuse crate::TimeoutOrCancel;\n\npin_project_lite::pin_project! {\n    /// An `AsyncRead` adapter which carries a permit for the lifetime of the value.\n    pub(crate) struct PermitCarrying<S> {\n        permit: tokio::sync::OwnedSemaphorePermit,\n        #[pin]\n        inner: S,\n    }\n}\n\nimpl<S> PermitCarrying<S> {\n    pub(crate) fn new(permit: tokio::sync::OwnedSemaphorePermit, inner: S) -> Self {\n        Self { permit, inner }\n    }\n}\n\nimpl<S: Stream> Stream for PermitCarrying<S> {\n    type Item = <S as Stream>::Item;\n\n    fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {\n        self.project().inner.poll_next(cx)\n    }\n\n    fn size_hint(&self) -> (usize, Option<usize>) {\n        self.inner.size_hint()\n    }\n}\n\npin_project_lite::pin_project! {\n    pub(crate) struct DownloadStream<F, S> {\n        hit: bool,\n        #[pin]\n        cancellation: F,\n        #[pin]\n        inner: S,\n    }\n}\n\nimpl<F, S> DownloadStream<F, S> {\n    pub(crate) fn new(cancellation: F, inner: S) -> Self {\n        Self {\n            cancellation,\n            hit: false,\n            inner,\n        }\n    }\n}\n\n/// See documentation on [`crate::DownloadStream`] on rationale why `std::io::Error` is used.\nimpl<E, F, S> Stream for DownloadStream<F, S>\nwhere\n    std::io::Error: From<E>,\n    F: Future<Output = E>,\n    S: Stream<Item = std::io::Result<Bytes>>,\n{\n    type Item = <S as Stream>::Item;\n\n    fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {\n        let this = self.project();\n\n        if !*this.hit {\n            if let Poll::Ready(e) = this.cancellation.poll(cx) {\n                *this.hit = true;\n\n                // most likely this will be a std::io::Error wrapping a DownloadError\n                let e = Err(std::io::Error::from(e));\n                return Poll::Ready(Some(e));\n            }\n        } else {\n            // this would be perfectly valid behaviour for doing a graceful completion on the\n            // download for example, but not one we expect to do right now.\n            tracing::warn!(\"continuing polling after having cancelled or timeouted\");\n        }\n\n        this.inner.poll_next(cx)\n    }\n\n    fn size_hint(&self) -> (usize, Option<usize>) {\n        self.inner.size_hint()\n    }\n}\n\n/// Fires only on the first cancel or timeout, not on both.\npub(crate) fn cancel_or_timeout(\n    timeout: Duration,\n    cancel: CancellationToken,\n) -> impl std::future::Future<Output = TimeoutOrCancel> + 'static {\n    // futures are lazy, they don't do anything before being polled.\n    //\n    // \"precalculate\" the wanted deadline before returning the future, so that we can use pause\n    // failpoint to trigger a timeout in test.\n    let deadline = tokio::time::Instant::now() + timeout;\n    async move {\n        tokio::select! {\n            _ = tokio::time::sleep_until(deadline) => TimeoutOrCancel::Timeout,\n            _ = cancel.cancelled() => {\n                TimeoutOrCancel::Cancel\n            },\n        }\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use futures::stream::StreamExt;\n\n    use super::*;\n    use crate::DownloadError;\n\n    #[tokio::test(start_paused = true)]\n    async fn cancelled_download_stream() {\n        let inner = futures::stream::pending();\n        let timeout = Duration::from_secs(120);\n        let cancel = CancellationToken::new();\n\n        let stream = DownloadStream::new(cancel_or_timeout(timeout, cancel.clone()), inner);\n        let mut stream = std::pin::pin!(stream);\n\n        let mut first = stream.next();\n\n        tokio::select! {\n            _ = &mut first => unreachable!(\"we haven't yet cancelled nor is timeout passed\"),\n            _ = tokio::time::sleep(Duration::from_secs(1)) => {},\n        }\n\n        cancel.cancel();\n\n        let e = first.await.expect(\"there must be some\").unwrap_err();\n        assert!(matches!(e.kind(), std::io::ErrorKind::Other), \"{e:?}\");\n        let inner = e.get_ref().expect(\"inner should be set\");\n        assert!(\n            inner\n                .downcast_ref::<DownloadError>()\n                .is_some_and(|e| matches!(e, DownloadError::Cancelled)),\n            \"{inner:?}\"\n        );\n        let e = DownloadError::from(e);\n        assert!(matches!(e, DownloadError::Cancelled), \"{e:?}\");\n\n        tokio::select! {\n            _ = stream.next() => unreachable!(\"no timeout ever happens as we were already cancelled\"),\n            _ = tokio::time::sleep(Duration::from_secs(121)) => {},\n        }\n    }\n\n    #[tokio::test(start_paused = true)]\n    async fn timeouted_download_stream() {\n        let inner = futures::stream::pending();\n        let timeout = Duration::from_secs(120);\n        let cancel = CancellationToken::new();\n\n        let stream = DownloadStream::new(cancel_or_timeout(timeout, cancel.clone()), inner);\n        let mut stream = std::pin::pin!(stream);\n\n        // because the stream uses 120s timeout and we are paused, we advance to 120s right away.\n        let first = stream.next();\n\n        let e = first.await.expect(\"there must be some\").unwrap_err();\n        assert!(matches!(e.kind(), std::io::ErrorKind::Other), \"{e:?}\");\n        let inner = e.get_ref().expect(\"inner should be set\");\n        assert!(\n            inner\n                .downcast_ref::<DownloadError>()\n                .is_some_and(|e| matches!(e, DownloadError::Timeout)),\n            \"{inner:?}\"\n        );\n        let e = DownloadError::from(e);\n        assert!(matches!(e, DownloadError::Timeout), \"{e:?}\");\n\n        cancel.cancel();\n\n        tokio::select! {\n            _ = stream.next() => unreachable!(\"no cancellation ever happens because we already timed out\"),\n            _ = tokio::time::sleep(Duration::from_secs(121)) => {},\n        }\n    }\n\n    #[tokio::test]\n    async fn notified_but_pollable_after() {\n        let inner = futures::stream::once(futures::future::ready(Ok(bytes::Bytes::from_static(\n            b\"hello world\",\n        ))));\n        let timeout = Duration::from_secs(120);\n        let cancel = CancellationToken::new();\n\n        cancel.cancel();\n        let stream = DownloadStream::new(cancel_or_timeout(timeout, cancel.clone()), inner);\n        let mut stream = std::pin::pin!(stream);\n\n        let next = stream.next().await;\n        let ioe = next.unwrap().unwrap_err();\n        assert!(\n            matches!(\n                ioe.get_ref().unwrap().downcast_ref::<DownloadError>(),\n                Some(&DownloadError::Cancelled)\n            ),\n            \"{ioe:?}\"\n        );\n\n        let next = stream.next().await;\n        let bytes = next.unwrap().unwrap();\n        assert_eq!(&b\"hello world\"[..], bytes);\n    }\n}\n"
  },
  {
    "path": "libs/remote_storage/tests/common/mod.rs",
    "content": "use std::collections::HashSet;\nuse std::ops::ControlFlow;\nuse std::path::PathBuf;\nuse std::sync::Arc;\n\nuse anyhow::Context;\nuse bytes::Bytes;\nuse camino::Utf8Path;\nuse futures::stream::Stream;\nuse once_cell::sync::OnceCell;\nuse remote_storage::{Download, GenericRemoteStorage, RemotePath};\nuse tokio::task::JoinSet;\nuse tokio_util::sync::CancellationToken;\nuse tracing::{debug, error, info};\n\nstatic LOGGING_DONE: OnceCell<()> = OnceCell::new();\n\npub(crate) fn upload_stream(\n    content: std::borrow::Cow<'static, [u8]>,\n) -> (\n    impl Stream<Item = std::io::Result<Bytes>> + Send + Sync + 'static,\n    usize,\n) {\n    use std::borrow::Cow;\n\n    let content = match content {\n        Cow::Borrowed(x) => Bytes::from_static(x),\n        Cow::Owned(vec) => Bytes::from(vec),\n    };\n    wrap_stream(content)\n}\n\npub(crate) fn wrap_stream(\n    content: bytes::Bytes,\n) -> (\n    impl Stream<Item = std::io::Result<Bytes>> + Send + Sync + 'static,\n    usize,\n) {\n    let len = content.len();\n    let content = futures::future::ready(Ok(content));\n\n    (futures::stream::once(content), len)\n}\n\npub(crate) async fn download_to_vec(dl: Download) -> anyhow::Result<Vec<u8>> {\n    let mut buf = Vec::new();\n    tokio::io::copy_buf(\n        &mut tokio_util::io::StreamReader::new(dl.download_stream),\n        &mut buf,\n    )\n    .await?;\n    Ok(buf)\n}\n\n// Uploads files `folder{j}/blob{i}.txt`. See test description for more details.\npub(crate) async fn upload_simple_remote_data(\n    client: &Arc<GenericRemoteStorage>,\n    upload_tasks_count: usize,\n) -> ControlFlow<HashSet<RemotePath>, HashSet<RemotePath>> {\n    info!(\"Creating {upload_tasks_count} remote files\");\n    let mut upload_tasks = JoinSet::new();\n    let cancel = CancellationToken::new();\n\n    for i in 1..upload_tasks_count + 1 {\n        let task_client = Arc::clone(client);\n        let cancel = cancel.clone();\n\n        upload_tasks.spawn(async move {\n            let blob_path = PathBuf::from(format!(\"folder{}/blob_{}.txt\", i / 7, i));\n            let blob_path = RemotePath::new(\n                Utf8Path::from_path(blob_path.as_path()).expect(\"must be valid blob path\"),\n            )\n            .with_context(|| format!(\"{blob_path:?} to RemotePath conversion\"))?;\n            debug!(\"Creating remote item {i} at path {blob_path:?}\");\n\n            let (data, len) = upload_stream(format!(\"remote blob data {i}\").into_bytes().into());\n            task_client\n                .upload(data, len, &blob_path, None, &cancel)\n                .await?;\n\n            Ok::<_, anyhow::Error>(blob_path)\n        });\n    }\n\n    let mut upload_tasks_failed = false;\n    let mut uploaded_blobs = HashSet::with_capacity(upload_tasks_count);\n    while let Some(task_run_result) = upload_tasks.join_next().await {\n        match task_run_result\n            .context(\"task join failed\")\n            .and_then(|task_result| task_result.context(\"upload task failed\"))\n        {\n            Ok(upload_path) => {\n                uploaded_blobs.insert(upload_path);\n            }\n            Err(e) => {\n                error!(\"Upload task failed: {e:?}\");\n                upload_tasks_failed = true;\n            }\n        }\n    }\n\n    if upload_tasks_failed {\n        ControlFlow::Break(uploaded_blobs)\n    } else {\n        ControlFlow::Continue(uploaded_blobs)\n    }\n}\n\npub(crate) async fn cleanup(\n    client: &Arc<GenericRemoteStorage>,\n    objects_to_delete: HashSet<RemotePath>,\n) {\n    info!(\n        \"Removing {} objects from the remote storage during cleanup\",\n        objects_to_delete.len()\n    );\n    let cancel = CancellationToken::new();\n    let mut delete_tasks = JoinSet::new();\n    for object_to_delete in objects_to_delete {\n        let task_client = Arc::clone(client);\n        let cancel = cancel.clone();\n        delete_tasks.spawn(async move {\n            debug!(\"Deleting remote item at path {object_to_delete:?}\");\n            task_client\n                .delete(&object_to_delete, &cancel)\n                .await\n                .with_context(|| format!(\"{object_to_delete:?} removal\"))\n        });\n    }\n\n    while let Some(task_run_result) = delete_tasks.join_next().await {\n        match task_run_result {\n            Ok(task_result) => match task_result {\n                Ok(()) => {}\n                Err(e) => error!(\"Delete task failed: {e:?}\"),\n            },\n            Err(join_err) => error!(\"Delete task did not finish correctly: {join_err}\"),\n        }\n    }\n}\npub(crate) struct Uploads {\n    pub(crate) prefixes: HashSet<RemotePath>,\n    pub(crate) blobs: HashSet<RemotePath>,\n}\n\npub(crate) async fn upload_remote_data(\n    client: &Arc<GenericRemoteStorage>,\n    base_prefix_str: &'static str,\n    upload_tasks_count: usize,\n) -> ControlFlow<Uploads, Uploads> {\n    info!(\"Creating {upload_tasks_count} remote files\");\n    let mut upload_tasks = JoinSet::new();\n    let cancel = CancellationToken::new();\n\n    for i in 1..=upload_tasks_count {\n        let task_client = Arc::clone(client);\n        let cancel = cancel.clone();\n\n        upload_tasks.spawn(async move {\n            let prefix = format!(\"{base_prefix_str}/sub_prefix_{i}/\");\n            let blob_prefix = RemotePath::new(Utf8Path::new(&prefix))\n                .with_context(|| format!(\"{prefix:?} to RemotePath conversion\"))?;\n            let blob_path = blob_prefix.join(Utf8Path::new(&format!(\"blob_{i}\")));\n            debug!(\"Creating remote item {i} at path {blob_path:?}\");\n\n            let (data, data_len) =\n                upload_stream(format!(\"remote blob data {i}\").into_bytes().into());\n\n            /* BEGIN_HADRON */\n            let mut metadata = None;\n            if matches!(&*task_client, GenericRemoteStorage::AzureBlob(_)) {\n                let file_path = \"/tmp/dbx_upload_tmp_file.txt\";\n                {\n                    // Open the file in append mode\n                    let mut file = std::fs::OpenOptions::new()\n                        .append(true)\n                        .create(true) // Create the file if it doesn't exist\n                        .open(file_path)?;\n                    // Append some bytes to the file\n                    std::io::Write::write_all(\n                        &mut file,\n                        &format!(\"remote blob data {i}\").into_bytes(),\n                    )?;\n                    file.sync_all()?;\n                }\n                metadata = Some(remote_storage::StorageMetadata::from([(\n                    \"databricks_azure_put_block\",\n                    file_path,\n                )]));\n            }\n            /* END_HADRON */\n\n            task_client\n                .upload(data, data_len, &blob_path, metadata, &cancel)\n                .await?;\n\n            // TODO: Check upload is using the put_block upload.\n            // We cannot consume data here since data is moved inside the upload.\n            // let total_bytes = data.fold(0, |acc, chunk| async move {\n            //     acc + chunk.map(|bytes| bytes.len()).unwrap_or(0)\n            // }).await;\n            // assert_eq!(total_bytes, data_len);\n\n            Ok::<_, anyhow::Error>((blob_prefix, blob_path))\n        });\n    }\n\n    let mut upload_tasks_failed = false;\n    let mut uploaded_prefixes = HashSet::with_capacity(upload_tasks_count);\n    let mut uploaded_blobs = HashSet::with_capacity(upload_tasks_count);\n    while let Some(task_run_result) = upload_tasks.join_next().await {\n        match task_run_result\n            .context(\"task join failed\")\n            .and_then(|task_result| task_result.context(\"upload task failed\"))\n        {\n            Ok((upload_prefix, upload_path)) => {\n                uploaded_prefixes.insert(upload_prefix);\n                uploaded_blobs.insert(upload_path);\n            }\n            Err(e) => {\n                error!(\"Upload task failed: {e:?}\");\n                upload_tasks_failed = true;\n            }\n        }\n    }\n\n    let uploads = Uploads {\n        prefixes: uploaded_prefixes,\n        blobs: uploaded_blobs,\n    };\n    if upload_tasks_failed {\n        ControlFlow::Break(uploads)\n    } else {\n        ControlFlow::Continue(uploads)\n    }\n}\n\npub(crate) fn ensure_logging_ready() {\n    LOGGING_DONE.get_or_init(|| {\n        utils::logging::init(\n            utils::logging::LogFormat::Test,\n            utils::logging::TracingErrorLayerEnablement::Disabled,\n            utils::logging::Output::Stdout,\n        )\n        .expect(\"logging init failed\");\n    });\n}\n"
  },
  {
    "path": "libs/remote_storage/tests/common/tests.rs",
    "content": "use std::collections::HashSet;\nuse std::num::NonZeroU32;\nuse std::ops::Bound;\nuse std::sync::Arc;\n\nuse anyhow::Context;\nuse camino::Utf8Path;\nuse futures::StreamExt;\nuse remote_storage::{DownloadError, DownloadOpts, ListingMode, ListingObject, RemotePath};\nuse test_context::test_context;\nuse tokio_util::sync::CancellationToken;\nuse tracing::debug;\n\nuse super::{\n    MaybeEnabledStorage, MaybeEnabledStorageWithSimpleTestBlobs, MaybeEnabledStorageWithTestBlobs,\n};\nuse crate::common::{download_to_vec, upload_stream, wrap_stream};\n\n/// Tests that S3 client can list all prefixes, even if the response come paginated and requires multiple S3 queries.\n/// Uses real S3 and requires [`ENABLE_REAL_S3_REMOTE_STORAGE_ENV_VAR_NAME`] and related S3 cred env vars specified.\n/// See the client creation in [`create_s3_client`] for details on the required env vars.\n/// If real S3 tests are disabled, the test passes, skipping any real test run: currently, there's no way to mark the test ignored in runtime with the\n/// deafult test framework, see https://github.com/rust-lang/rust/issues/68007 for details.\n///\n/// First, the test creates a set of S3 objects with keys `/${random_prefix_part}/${base_prefix_str}/sub_prefix_${i}/blob_${i}` in [`upload_remote_data`]\n/// where\n/// * `random_prefix_part` is set for the entire S3 client during the S3 client creation in [`create_s3_client`], to avoid multiple test runs interference\n/// * `base_prefix_str` is a common prefix to use in the client requests: we would want to ensure that the client is able to list nested prefixes inside the bucket\n///\n/// Then, verifies that the client does return correct prefixes when queried:\n/// * with no prefix, it lists everything after its `${random_prefix_part}/` — that should be `${base_prefix_str}` value only\n/// * with `${base_prefix_str}/` prefix, it lists every `sub_prefix_${i}`\n///\n/// In the `MaybeEnabledStorageWithTestBlobs::setup`, we set the `max_keys_in_list_response` param to limit the keys in a single response.\n/// This way, we are able to test the pagination, by ensuring all results are returned from the remote storage and avoid uploading too many blobs to S3,\n/// as the current default AWS S3 pagination limit is 1000.\n/// (see <https://docs.aws.amazon.com/AmazonS3/latest/API/API_ListObjectsV2.html#API_ListObjectsV2_RequestSyntax>).\n///\n/// Lastly, the test attempts to clean up and remove all uploaded S3 files.\n/// If any errors appear during the clean up, they get logged, but the test is not failed or stopped until clean up is finished.\n#[test_context(MaybeEnabledStorageWithTestBlobs)]\n#[tokio::test]\nasync fn pagination_should_work(ctx: &mut MaybeEnabledStorageWithTestBlobs) -> anyhow::Result<()> {\n    let ctx = match ctx {\n        MaybeEnabledStorageWithTestBlobs::Enabled(ctx) => ctx,\n        MaybeEnabledStorageWithTestBlobs::Disabled => return Ok(()),\n        MaybeEnabledStorageWithTestBlobs::UploadsFailed(e, _) => {\n            anyhow::bail!(\"S3 init failed: {e:?}\")\n        }\n    };\n\n    let cancel = CancellationToken::new();\n\n    let test_client = Arc::clone(&ctx.enabled.client);\n    let expected_remote_prefixes = ctx.remote_prefixes.clone();\n\n    let base_prefix = RemotePath::new(Utf8Path::new(ctx.enabled.base_prefix))\n        .context(\"common_prefix construction\")?;\n    let root_remote_prefixes = test_client\n        .list(None, ListingMode::WithDelimiter, None, &cancel)\n        .await?\n        .prefixes\n        .into_iter()\n        .collect::<HashSet<_>>();\n    assert_eq!(\n        root_remote_prefixes,\n        HashSet::from([base_prefix.clone()]),\n        \"remote storage root prefixes list mismatches with the uploads. Returned prefixes: {root_remote_prefixes:?}\"\n    );\n\n    let nested_remote_prefixes = test_client\n        .list(\n            Some(&base_prefix.add_trailing_slash()),\n            ListingMode::WithDelimiter,\n            None,\n            &cancel,\n        )\n        .await?\n        .prefixes\n        .into_iter()\n        .collect::<HashSet<_>>();\n    let remote_only_prefixes = nested_remote_prefixes\n        .difference(&expected_remote_prefixes)\n        .collect::<HashSet<_>>();\n    let missing_uploaded_prefixes = expected_remote_prefixes\n        .difference(&nested_remote_prefixes)\n        .collect::<HashSet<_>>();\n    assert_eq!(\n        remote_only_prefixes.len() + missing_uploaded_prefixes.len(),\n        0,\n        \"remote storage nested prefixes list mismatches with the uploads. Remote only prefixes: {remote_only_prefixes:?}, missing uploaded prefixes: {missing_uploaded_prefixes:?}\",\n    );\n\n    // list_streaming\n\n    let prefix_with_slash = base_prefix.add_trailing_slash();\n    let mut nested_remote_prefixes_st = test_client.list_streaming(\n        Some(&prefix_with_slash),\n        ListingMode::WithDelimiter,\n        None,\n        &cancel,\n    );\n    let mut nested_remote_prefixes_combined = HashSet::new();\n    let mut segments = 0;\n    let mut segment_max_size = 0;\n    while let Some(st) = nested_remote_prefixes_st.next().await {\n        let st = st?;\n        segment_max_size = segment_max_size.max(st.prefixes.len());\n        nested_remote_prefixes_combined.extend(st.prefixes.into_iter());\n        segments += 1;\n    }\n    assert!(segments > 1, \"less than 2 segments: {segments}\");\n    assert!(\n        segment_max_size * 2 <= nested_remote_prefixes_combined.len(),\n        \"double of segment_max_size={segment_max_size} larger number of remote prefixes of {}\",\n        nested_remote_prefixes_combined.len()\n    );\n    let remote_only_prefixes = nested_remote_prefixes_combined\n        .difference(&expected_remote_prefixes)\n        .collect::<HashSet<_>>();\n    let missing_uploaded_prefixes = expected_remote_prefixes\n        .difference(&nested_remote_prefixes_combined)\n        .collect::<HashSet<_>>();\n    assert_eq!(\n        remote_only_prefixes.len() + missing_uploaded_prefixes.len(),\n        0,\n        \"remote storage nested prefixes list mismatches with the uploads. Remote only prefixes: {remote_only_prefixes:?}, missing uploaded prefixes: {missing_uploaded_prefixes:?}\",\n    );\n\n    Ok(())\n}\n\n/// Tests that S3 client can list all files in a folder, even if the response comes paginated and requirees multiple S3 queries.\n/// Uses real S3 and requires [`ENABLE_REAL_S3_REMOTE_STORAGE_ENV_VAR_NAME`] and related S3 cred env vars specified. Test will skip real code and pass if env vars not set.\n/// See `s3_pagination_should_work` for more information.\n///\n/// First, create a set of S3 objects with keys `random_prefix/folder{j}/blob_{i}.txt` in [`upload_remote_data`]\n/// Then performs the following queries:\n///    1. `list(None)`. This should return all files `random_prefix/folder{j}/blob_{i}.txt`\n///    2. `list(\"folder1\")`.  This  should return all files `random_prefix/folder1/blob_{i}.txt`\n#[test_context(MaybeEnabledStorageWithSimpleTestBlobs)]\n#[tokio::test]\nasync fn list_no_delimiter_works(\n    ctx: &mut MaybeEnabledStorageWithSimpleTestBlobs,\n) -> anyhow::Result<()> {\n    let ctx = match ctx {\n        MaybeEnabledStorageWithSimpleTestBlobs::Enabled(ctx) => ctx,\n        MaybeEnabledStorageWithSimpleTestBlobs::Disabled => return Ok(()),\n        MaybeEnabledStorageWithSimpleTestBlobs::UploadsFailed(e, _) => {\n            anyhow::bail!(\"S3 init failed: {e:?}\")\n        }\n    };\n    let cancel = CancellationToken::new();\n    let test_client = Arc::clone(&ctx.enabled.client);\n    let base_prefix =\n        RemotePath::new(Utf8Path::new(\"folder1\")).context(\"common_prefix construction\")?;\n    let root_files = test_client\n        .list(None, ListingMode::NoDelimiter, None, &cancel)\n        .await\n        .context(\"client list root files failure\")?\n        .keys\n        .into_iter()\n        .map(|o| o.key)\n        .collect::<HashSet<_>>();\n    assert_eq!(\n        root_files,\n        ctx.remote_blobs.clone(),\n        \"remote storage list on root mismatches with the uploads.\"\n    );\n\n    // Test that max_keys limit works. In total there are about 21 files (see\n    // upload_simple_remote_data call in test_real_s3.rs).\n    let limited_root_files = test_client\n        .list(\n            None,\n            ListingMode::NoDelimiter,\n            Some(NonZeroU32::new(2).unwrap()),\n            &cancel,\n        )\n        .await\n        .context(\"client list root files failure\")?;\n    assert_eq!(limited_root_files.keys.len(), 2);\n\n    let nested_remote_files = test_client\n        .list(Some(&base_prefix), ListingMode::NoDelimiter, None, &cancel)\n        .await\n        .context(\"client list nested files failure\")?\n        .keys\n        .into_iter()\n        .map(|o| o.key)\n        .collect::<HashSet<_>>();\n    let trim_remote_blobs: HashSet<_> = ctx\n        .remote_blobs\n        .iter()\n        .map(|x| x.get_path())\n        .filter(|x| x.starts_with(\"folder1\"))\n        .map(|x| RemotePath::new(x).expect(\"must be valid path\"))\n        .collect();\n    assert_eq!(\n        nested_remote_files, trim_remote_blobs,\n        \"remote storage list on subdirrectory mismatches with the uploads.\"\n    );\n    Ok(())\n}\n\n/// Tests that giving a partial prefix returns all matches (e.g. \"/foo\" yields \"/foobar/baz\"),\n/// but only with NoDelimiter.\n#[test_context(MaybeEnabledStorageWithSimpleTestBlobs)]\n#[tokio::test]\nasync fn list_partial_prefix(\n    ctx: &mut MaybeEnabledStorageWithSimpleTestBlobs,\n) -> anyhow::Result<()> {\n    let ctx = match ctx {\n        MaybeEnabledStorageWithSimpleTestBlobs::Enabled(ctx) => ctx,\n        MaybeEnabledStorageWithSimpleTestBlobs::Disabled => return Ok(()),\n        MaybeEnabledStorageWithSimpleTestBlobs::UploadsFailed(e, _) => {\n            anyhow::bail!(\"S3 init failed: {e:?}\")\n        }\n    };\n\n    let cancel = CancellationToken::new();\n    let test_client = Arc::clone(&ctx.enabled.client);\n\n    // Prefix \"fold\" should match all \"folder{i}\" directories with NoDelimiter.\n    let objects: HashSet<_> = test_client\n        .list(\n            Some(&RemotePath::from_string(\"fold\")?),\n            ListingMode::NoDelimiter,\n            None,\n            &cancel,\n        )\n        .await?\n        .keys\n        .into_iter()\n        .map(|o| o.key)\n        .collect();\n    assert_eq!(&objects, &ctx.remote_blobs);\n\n    // Prefix \"fold\" matches nothing with WithDelimiter.\n    let objects: HashSet<_> = test_client\n        .list(\n            Some(&RemotePath::from_string(\"fold\")?),\n            ListingMode::WithDelimiter,\n            None,\n            &cancel,\n        )\n        .await?\n        .keys\n        .into_iter()\n        .map(|o| o.key)\n        .collect();\n    assert!(objects.is_empty());\n\n    // Prefix \"\" matches everything.\n    let objects: HashSet<_> = test_client\n        .list(\n            Some(&RemotePath::from_string(\"\")?),\n            ListingMode::NoDelimiter,\n            None,\n            &cancel,\n        )\n        .await?\n        .keys\n        .into_iter()\n        .map(|o| o.key)\n        .collect();\n    assert_eq!(&objects, &ctx.remote_blobs);\n\n    // Prefix \"\" matches nothing with WithDelimiter.\n    let objects: HashSet<_> = test_client\n        .list(\n            Some(&RemotePath::from_string(\"\")?),\n            ListingMode::WithDelimiter,\n            None,\n            &cancel,\n        )\n        .await?\n        .keys\n        .into_iter()\n        .map(|o| o.key)\n        .collect();\n    assert!(objects.is_empty());\n\n    // Prefix \"foo\" matches nothing.\n    let objects: HashSet<_> = test_client\n        .list(\n            Some(&RemotePath::from_string(\"foo\")?),\n            ListingMode::NoDelimiter,\n            None,\n            &cancel,\n        )\n        .await?\n        .keys\n        .into_iter()\n        .map(|o| o.key)\n        .collect();\n    assert!(objects.is_empty());\n\n    // Prefix \"folder2/blob\" matches.\n    let objects: HashSet<_> = test_client\n        .list(\n            Some(&RemotePath::from_string(\"folder2/blob\")?),\n            ListingMode::NoDelimiter,\n            None,\n            &cancel,\n        )\n        .await?\n        .keys\n        .into_iter()\n        .map(|o| o.key)\n        .collect();\n    let expect: HashSet<_> = ctx\n        .remote_blobs\n        .iter()\n        .filter(|o| o.get_path().starts_with(\"folder2\"))\n        .cloned()\n        .collect();\n    assert_eq!(&objects, &expect);\n\n    // Prefix \"folder2/foo\" matches nothing.\n    let objects: HashSet<_> = test_client\n        .list(\n            Some(&RemotePath::from_string(\"folder2/foo\")?),\n            ListingMode::NoDelimiter,\n            None,\n            &cancel,\n        )\n        .await?\n        .keys\n        .into_iter()\n        .map(|o| o.key)\n        .collect();\n    assert!(objects.is_empty());\n\n    Ok(())\n}\n\n#[test_context(MaybeEnabledStorage)]\n#[tokio::test]\nasync fn delete_non_exising_works(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<()> {\n    let ctx = match ctx {\n        MaybeEnabledStorage::Enabled(ctx) => ctx,\n        MaybeEnabledStorage::Disabled => return Ok(()),\n    };\n\n    let cancel = CancellationToken::new();\n\n    let path = RemotePath::new(Utf8Path::new(\n        format!(\"{}/for_sure_there_is_nothing_there_really\", ctx.base_prefix).as_str(),\n    ))\n    .with_context(|| \"RemotePath conversion\")?;\n\n    ctx.client\n        .delete(&path, &cancel)\n        .await\n        .expect(\"should succeed\");\n\n    Ok(())\n}\n\n#[test_context(MaybeEnabledStorage)]\n#[tokio::test]\nasync fn delete_objects_works(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<()> {\n    let ctx = match ctx {\n        MaybeEnabledStorage::Enabled(ctx) => ctx,\n        MaybeEnabledStorage::Disabled => return Ok(()),\n    };\n\n    let cancel = CancellationToken::new();\n\n    let path1 = RemotePath::new(Utf8Path::new(format!(\"{}/path1\", ctx.base_prefix).as_str()))\n        .with_context(|| \"RemotePath conversion\")?;\n\n    let path2 = RemotePath::new(Utf8Path::new(format!(\"{}/path2\", ctx.base_prefix).as_str()))\n        .with_context(|| \"RemotePath conversion\")?;\n\n    let path3 = RemotePath::new(Utf8Path::new(format!(\"{}/path3\", ctx.base_prefix).as_str()))\n        .with_context(|| \"RemotePath conversion\")?;\n\n    let (data, len) = upload_stream(\"remote blob data1\".as_bytes().into());\n    ctx.client.upload(data, len, &path1, None, &cancel).await?;\n\n    let (data, len) = upload_stream(\"remote blob data2\".as_bytes().into());\n    ctx.client.upload(data, len, &path2, None, &cancel).await?;\n\n    let (data, len) = upload_stream(\"remote blob data3\".as_bytes().into());\n    ctx.client.upload(data, len, &path3, None, &cancel).await?;\n\n    ctx.client.delete_objects(&[path1, path2], &cancel).await?;\n\n    let prefixes = ctx\n        .client\n        .list(None, ListingMode::WithDelimiter, None, &cancel)\n        .await?\n        .prefixes;\n\n    assert_eq!(prefixes.len(), 1);\n\n    ctx.client.delete_objects(&[path3], &cancel).await?;\n\n    Ok(())\n}\n\n/// Tests that delete_prefix() will delete all objects matching a prefix, including\n/// partial prefixes (i.e. \"/foo\" matches \"/foobar\").\n#[test_context(MaybeEnabledStorageWithSimpleTestBlobs)]\n#[tokio::test]\nasync fn delete_prefix(ctx: &mut MaybeEnabledStorageWithSimpleTestBlobs) -> anyhow::Result<()> {\n    let ctx = match ctx {\n        MaybeEnabledStorageWithSimpleTestBlobs::Enabled(ctx) => ctx,\n        MaybeEnabledStorageWithSimpleTestBlobs::Disabled => return Ok(()),\n        MaybeEnabledStorageWithSimpleTestBlobs::UploadsFailed(e, _) => {\n            anyhow::bail!(\"S3 init failed: {e:?}\")\n        }\n    };\n\n    let cancel = CancellationToken::new();\n    let test_client = Arc::clone(&ctx.enabled.client);\n\n    /// Asserts that the S3 listing matches the given paths.\n    macro_rules! assert_list {\n        ($expect:expr) => {{\n            let listing = test_client\n                .list(None, ListingMode::NoDelimiter, None, &cancel)\n                .await?\n                .keys\n                .into_iter()\n                .map(|o| o.key)\n                .collect();\n            assert_eq!($expect, listing);\n        }};\n    }\n\n    // We start with the full set of uploaded files.\n    let mut expect = ctx.remote_blobs.clone();\n\n    // Deleting a non-existing prefix should do nothing.\n    test_client\n        .delete_prefix(&RemotePath::from_string(\"xyz\")?, &cancel)\n        .await?;\n    assert_list!(expect);\n\n    // Prefixes are case-sensitive.\n    test_client\n        .delete_prefix(&RemotePath::from_string(\"Folder\")?, &cancel)\n        .await?;\n    assert_list!(expect);\n\n    // Deleting a path which overlaps with an existing object should do nothing. We pick the first\n    // path in the set as our common prefix.\n    let path = expect.iter().next().expect(\"empty set\").clone().join(\"xyz\");\n    test_client.delete_prefix(&path, &cancel).await?;\n    assert_list!(expect);\n\n    // Deleting an exact path should work. We pick the first path in the set.\n    let path = expect.iter().next().expect(\"empty set\").clone();\n    test_client.delete_prefix(&path, &cancel).await?;\n    expect.remove(&path);\n    assert_list!(expect);\n\n    // Deleting a prefix should delete all matching objects.\n    test_client\n        .delete_prefix(&RemotePath::from_string(\"folder0/blob_\")?, &cancel)\n        .await?;\n    expect.retain(|p| !p.get_path().as_str().starts_with(\"folder0/\"));\n    assert_list!(expect);\n\n    // Deleting a common prefix should delete all objects.\n    test_client\n        .delete_prefix(&RemotePath::from_string(\"fold\")?, &cancel)\n        .await?;\n    expect.clear();\n    assert_list!(expect);\n\n    Ok(())\n}\n\n#[test_context(MaybeEnabledStorage)]\n#[tokio::test]\nasync fn upload_download_works(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<()> {\n    let MaybeEnabledStorage::Enabled(ctx) = ctx else {\n        return Ok(());\n    };\n\n    let cancel = CancellationToken::new();\n\n    let path = RemotePath::new(Utf8Path::new(format!(\"{}/file\", ctx.base_prefix).as_str()))\n        .with_context(|| \"RemotePath conversion\")?;\n\n    let orig = bytes::Bytes::from_static(\"remote blob data here\".as_bytes());\n\n    let (data, len) = wrap_stream(orig.clone());\n\n    ctx.client.upload(data, len, &path, None, &cancel).await?;\n\n    // Normal download request\n    let dl = ctx\n        .client\n        .download(&path, &DownloadOpts::default(), &cancel)\n        .await?;\n    let buf = download_to_vec(dl).await?;\n    assert_eq!(&buf, &orig);\n\n    // Full range (end specified)\n    let dl = ctx\n        .client\n        .download(\n            &path,\n            &DownloadOpts {\n                byte_start: Bound::Included(0),\n                byte_end: Bound::Excluded(len as u64),\n                ..Default::default()\n            },\n            &cancel,\n        )\n        .await?;\n    let buf = download_to_vec(dl).await?;\n    assert_eq!(&buf, &orig);\n\n    // partial range (end specified)\n    let dl = ctx\n        .client\n        .download(\n            &path,\n            &DownloadOpts {\n                byte_start: Bound::Included(4),\n                byte_end: Bound::Excluded(10),\n                ..Default::default()\n            },\n            &cancel,\n        )\n        .await?;\n    let buf = download_to_vec(dl).await?;\n    assert_eq!(&buf, &orig[4..10]);\n\n    // partial range (end beyond real end)\n    let dl = ctx\n        .client\n        .download(\n            &path,\n            &DownloadOpts {\n                byte_start: Bound::Included(8),\n                byte_end: Bound::Excluded(len as u64 * 100),\n                ..Default::default()\n            },\n            &cancel,\n        )\n        .await?;\n    let buf = download_to_vec(dl).await?;\n    assert_eq!(&buf, &orig[8..]);\n\n    // Partial range (end unspecified)\n    let dl = ctx\n        .client\n        .download(\n            &path,\n            &DownloadOpts {\n                byte_start: Bound::Included(4),\n                ..Default::default()\n            },\n            &cancel,\n        )\n        .await?;\n    let buf = download_to_vec(dl).await?;\n    assert_eq!(&buf, &orig[4..]);\n\n    // Full range (end unspecified)\n    let dl = ctx\n        .client\n        .download(\n            &path,\n            &DownloadOpts {\n                byte_start: Bound::Included(0),\n                ..Default::default()\n            },\n            &cancel,\n        )\n        .await?;\n    let buf = download_to_vec(dl).await?;\n    assert_eq!(&buf, &orig);\n\n    debug!(\"Cleanup: deleting file at path {path:?}\");\n    ctx.client\n        .delete(&path, &cancel)\n        .await\n        .with_context(|| format!(\"{path:?} removal\"))?;\n\n    Ok(())\n}\n\n/// Tests that conditional downloads work properly, by returning\n/// DownloadError::Unmodified when the object ETag matches the given ETag.\n#[test_context(MaybeEnabledStorage)]\n#[tokio::test]\nasync fn download_conditional(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<()> {\n    let MaybeEnabledStorage::Enabled(ctx) = ctx else {\n        return Ok(());\n    };\n    let cancel = CancellationToken::new();\n\n    // Create a file.\n    let path = RemotePath::new(Utf8Path::new(format!(\"{}/file\", ctx.base_prefix).as_str()))?;\n    let data = bytes::Bytes::from_static(\"foo\".as_bytes());\n    let (stream, len) = wrap_stream(data);\n    ctx.client.upload(stream, len, &path, None, &cancel).await?;\n\n    // Download it to obtain its etag.\n    let mut opts = DownloadOpts::default();\n    let download = ctx.client.download(&path, &opts, &cancel).await?;\n\n    // Download with the etag yields DownloadError::Unmodified.\n    opts.etag = Some(download.etag);\n    let result = ctx.client.download(&path, &opts, &cancel).await;\n    assert!(\n        matches!(result, Err(DownloadError::Unmodified)),\n        \"expected DownloadError::Unmodified, got {result:?}\"\n    );\n\n    // Replace the file contents.\n    let data = bytes::Bytes::from_static(\"bar\".as_bytes());\n    let (stream, len) = wrap_stream(data);\n    ctx.client.upload(stream, len, &path, None, &cancel).await?;\n\n    // A download with the old etag should yield the new file.\n    let download = ctx.client.download(&path, &opts, &cancel).await?;\n    assert_ne!(download.etag, opts.etag.unwrap(), \"ETag did not change\");\n\n    // A download with the new etag should yield Unmodified again.\n    opts.etag = Some(download.etag);\n    let result = ctx.client.download(&path, &opts, &cancel).await;\n    assert!(\n        matches!(result, Err(DownloadError::Unmodified)),\n        \"expected DownloadError::Unmodified, got {result:?}\"\n    );\n\n    Ok(())\n}\n\n#[test_context(MaybeEnabledStorage)]\n#[tokio::test]\nasync fn copy_works(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<()> {\n    let MaybeEnabledStorage::Enabled(ctx) = ctx else {\n        return Ok(());\n    };\n\n    let cancel = CancellationToken::new();\n\n    let path = RemotePath::new(Utf8Path::new(\n        format!(\"{}/file_to_copy\", ctx.base_prefix).as_str(),\n    ))\n    .with_context(|| \"RemotePath conversion\")?;\n    let path_dest = RemotePath::new(Utf8Path::new(\n        format!(\"{}/file_dest\", ctx.base_prefix).as_str(),\n    ))\n    .with_context(|| \"RemotePath conversion\")?;\n\n    let orig = bytes::Bytes::from_static(\"remote blob data content\".as_bytes());\n\n    let (data, len) = wrap_stream(orig.clone());\n\n    ctx.client.upload(data, len, &path, None, &cancel).await?;\n\n    // Normal download request\n    ctx.client.copy_object(&path, &path_dest, &cancel).await?;\n\n    let dl = ctx\n        .client\n        .download(&path_dest, &DownloadOpts::default(), &cancel)\n        .await?;\n    let buf = download_to_vec(dl).await?;\n    assert_eq!(&buf, &orig);\n\n    debug!(\"Cleanup: deleting file at path {path:?}\");\n    ctx.client\n        .delete_objects(&[path.clone(), path_dest.clone()], &cancel)\n        .await\n        .with_context(|| format!(\"{path:?} removal\"))?;\n\n    Ok(())\n}\n\n/// Tests that head_object works properly.\n#[test_context(MaybeEnabledStorage)]\n#[tokio::test]\nasync fn head_object(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<()> {\n    let MaybeEnabledStorage::Enabled(ctx) = ctx else {\n        return Ok(());\n    };\n    let cancel = CancellationToken::new();\n\n    let path = RemotePath::new(Utf8Path::new(format!(\"{}/file\", ctx.base_prefix).as_str()))?;\n\n    // Errors on missing file.\n    let result = ctx.client.head_object(&path, &cancel).await;\n    assert!(\n        matches!(result, Err(DownloadError::NotFound)),\n        \"expected NotFound, got {result:?}\"\n    );\n\n    // Create the file.\n    let data = bytes::Bytes::from_static(\"foo\".as_bytes());\n    let (stream, len) = wrap_stream(data);\n    ctx.client.upload(stream, len, &path, None, &cancel).await?;\n\n    // Fetch the head metadata.\n    let object = ctx.client.head_object(&path, &cancel).await?;\n    assert_eq!(\n        object,\n        ListingObject {\n            key: path.clone(),\n            last_modified: object.last_modified, // ignore\n            size: 3\n        }\n    );\n\n    // Wait for a couple of seconds, and then update the file to check the last\n    // modified timestamp.\n    tokio::time::sleep(std::time::Duration::from_secs(2)).await;\n\n    let data = bytes::Bytes::from_static(\"bar\".as_bytes());\n    let (stream, len) = wrap_stream(data);\n    ctx.client.upload(stream, len, &path, None, &cancel).await?;\n    let new = ctx.client.head_object(&path, &cancel).await?;\n\n    assert!(\n        !new.last_modified\n            .duration_since(object.last_modified)?\n            .is_zero(),\n        \"last_modified did not advance\"\n    );\n\n    Ok(())\n}\n"
  },
  {
    "path": "libs/remote_storage/tests/test_real_azure.rs",
    "content": "use std::collections::HashSet;\nuse std::env;\nuse std::num::NonZeroUsize;\nuse std::ops::ControlFlow;\nuse std::sync::Arc;\nuse std::time::{Duration, UNIX_EPOCH};\n\nuse anyhow::Context;\nuse remote_storage::{\n    AzureConfig, GenericRemoteStorage, RemotePath, RemoteStorageConfig, RemoteStorageKind,\n};\nuse test_context::AsyncTestContext;\nuse tracing::info;\n\nmod common;\n\n#[path = \"common/tests.rs\"]\nmod tests_azure;\n\nuse common::{cleanup, ensure_logging_ready, upload_remote_data, upload_simple_remote_data};\n\nconst ENABLE_REAL_AZURE_REMOTE_STORAGE_ENV_VAR_NAME: &str = \"ENABLE_REAL_AZURE_REMOTE_STORAGE\";\n\nconst BASE_PREFIX: &str = \"test\";\n\nstruct EnabledAzure {\n    client: Arc<GenericRemoteStorage>,\n    base_prefix: &'static str,\n}\n\nimpl EnabledAzure {\n    async fn setup(max_keys_in_list_response: Option<i32>) -> Self {\n        let client = create_azure_client(max_keys_in_list_response)\n            .await\n            .context(\"Azure client creation\")\n            .expect(\"Azure client creation failed\");\n\n        EnabledAzure {\n            client,\n            base_prefix: BASE_PREFIX,\n        }\n    }\n\n    #[allow(unused)] // this will be needed when moving the timeout integration tests back\n    fn configure_request_timeout(&mut self, timeout: Duration) {\n        match Arc::get_mut(&mut self.client).expect(\"outer Arc::get_mut\") {\n            GenericRemoteStorage::AzureBlob(azure) => {\n                let azure = Arc::get_mut(azure).expect(\"inner Arc::get_mut\");\n                azure.timeout = timeout;\n            }\n            _ => unreachable!(),\n        }\n    }\n}\n\nenum MaybeEnabledStorage {\n    Enabled(EnabledAzure),\n    Disabled,\n}\n\nimpl AsyncTestContext for MaybeEnabledStorage {\n    async fn setup() -> Self {\n        ensure_logging_ready();\n\n        if env::var(ENABLE_REAL_AZURE_REMOTE_STORAGE_ENV_VAR_NAME).is_err() {\n            info!(\n                \"`{}` env variable is not set, skipping the test\",\n                ENABLE_REAL_AZURE_REMOTE_STORAGE_ENV_VAR_NAME\n            );\n            return Self::Disabled;\n        }\n\n        Self::Enabled(EnabledAzure::setup(None).await)\n    }\n}\n\nenum MaybeEnabledStorageWithTestBlobs {\n    Enabled(AzureWithTestBlobs),\n    Disabled,\n    UploadsFailed(anyhow::Error, AzureWithTestBlobs),\n}\n\nstruct AzureWithTestBlobs {\n    enabled: EnabledAzure,\n    remote_prefixes: HashSet<RemotePath>,\n    remote_blobs: HashSet<RemotePath>,\n}\n\nimpl AsyncTestContext for MaybeEnabledStorageWithTestBlobs {\n    async fn setup() -> Self {\n        ensure_logging_ready();\n        if env::var(ENABLE_REAL_AZURE_REMOTE_STORAGE_ENV_VAR_NAME).is_err() {\n            info!(\n                \"`{}` env variable is not set, skipping the test\",\n                ENABLE_REAL_AZURE_REMOTE_STORAGE_ENV_VAR_NAME\n            );\n            return Self::Disabled;\n        }\n\n        let max_keys_in_list_response = 10;\n        let upload_tasks_count = 1 + (2 * usize::try_from(max_keys_in_list_response).unwrap());\n\n        let enabled = EnabledAzure::setup(Some(max_keys_in_list_response)).await;\n\n        match upload_remote_data(&enabled.client, enabled.base_prefix, upload_tasks_count).await {\n            ControlFlow::Continue(uploads) => {\n                info!(\"Remote objects created successfully\");\n\n                Self::Enabled(AzureWithTestBlobs {\n                    enabled,\n                    remote_prefixes: uploads.prefixes,\n                    remote_blobs: uploads.blobs,\n                })\n            }\n            ControlFlow::Break(uploads) => Self::UploadsFailed(\n                anyhow::anyhow!(\"One or multiple blobs failed to upload to Azure\"),\n                AzureWithTestBlobs {\n                    enabled,\n                    remote_prefixes: uploads.prefixes,\n                    remote_blobs: uploads.blobs,\n                },\n            ),\n        }\n    }\n\n    async fn teardown(self) {\n        match self {\n            Self::Disabled => {}\n            Self::Enabled(ctx) | Self::UploadsFailed(_, ctx) => {\n                cleanup(&ctx.enabled.client, ctx.remote_blobs).await;\n            }\n        }\n    }\n}\n\nenum MaybeEnabledStorageWithSimpleTestBlobs {\n    Enabled(AzureWithSimpleTestBlobs),\n    Disabled,\n    UploadsFailed(anyhow::Error, AzureWithSimpleTestBlobs),\n}\nstruct AzureWithSimpleTestBlobs {\n    enabled: EnabledAzure,\n    remote_blobs: HashSet<RemotePath>,\n}\n\nimpl AsyncTestContext for MaybeEnabledStorageWithSimpleTestBlobs {\n    async fn setup() -> Self {\n        ensure_logging_ready();\n        if env::var(ENABLE_REAL_AZURE_REMOTE_STORAGE_ENV_VAR_NAME).is_err() {\n            info!(\n                \"`{}` env variable is not set, skipping the test\",\n                ENABLE_REAL_AZURE_REMOTE_STORAGE_ENV_VAR_NAME\n            );\n            return Self::Disabled;\n        }\n\n        let max_keys_in_list_response = 10;\n        let upload_tasks_count = 1 + (2 * usize::try_from(max_keys_in_list_response).unwrap());\n\n        let enabled = EnabledAzure::setup(Some(max_keys_in_list_response)).await;\n\n        match upload_simple_remote_data(&enabled.client, upload_tasks_count).await {\n            ControlFlow::Continue(uploads) => {\n                info!(\"Remote objects created successfully\");\n\n                Self::Enabled(AzureWithSimpleTestBlobs {\n                    enabled,\n                    remote_blobs: uploads,\n                })\n            }\n            ControlFlow::Break(uploads) => Self::UploadsFailed(\n                anyhow::anyhow!(\"One or multiple blobs failed to upload to Azure\"),\n                AzureWithSimpleTestBlobs {\n                    enabled,\n                    remote_blobs: uploads,\n                },\n            ),\n        }\n    }\n\n    async fn teardown(self) {\n        match self {\n            Self::Disabled => {}\n            Self::Enabled(ctx) | Self::UploadsFailed(_, ctx) => {\n                cleanup(&ctx.enabled.client, ctx.remote_blobs).await;\n            }\n        }\n    }\n}\n\nasync fn create_azure_client(\n    max_keys_per_list_response: Option<i32>,\n) -> anyhow::Result<Arc<GenericRemoteStorage>> {\n    use rand::Rng;\n\n    let remote_storage_azure_container = env::var(\"REMOTE_STORAGE_AZURE_CONTAINER\").context(\n        \"`REMOTE_STORAGE_AZURE_CONTAINER` env var is not set, but real Azure tests are enabled\",\n    )?;\n    let remote_storage_azure_region = env::var(\"REMOTE_STORAGE_AZURE_REGION\").context(\n        \"`REMOTE_STORAGE_AZURE_REGION` env var is not set, but real Azure tests are enabled\",\n    )?;\n\n    // due to how time works, we've had test runners use the same nanos as bucket prefixes.\n    // millis is just a debugging aid for easier finding the prefix later.\n    let millis = std::time::SystemTime::now()\n        .duration_since(UNIX_EPOCH)\n        .context(\"random Azure test prefix part calculation\")?\n        .as_millis();\n\n    // because nanos can be the same for two threads so can millis, add randomness\n    let random = rand::rng().random::<u32>();\n\n    let remote_storage_config = RemoteStorageConfig {\n        storage: RemoteStorageKind::AzureContainer(AzureConfig {\n            container_name: remote_storage_azure_container,\n            storage_account: None,\n            container_region: remote_storage_azure_region,\n            prefix_in_container: Some(format!(\"test_{millis}_{random:08x}/\")),\n            concurrency_limit: NonZeroUsize::new(100).unwrap(),\n            max_keys_per_list_response,\n            conn_pool_size: 8,\n            /* BEGIN_HADRON */\n            put_block_size_mb: Some(1),\n            /* END_HADRON */\n        }),\n        timeout: RemoteStorageConfig::DEFAULT_TIMEOUT,\n        small_timeout: RemoteStorageConfig::DEFAULT_SMALL_TIMEOUT,\n    };\n    Ok(Arc::new(\n        GenericRemoteStorage::from_config(&remote_storage_config)\n            .await\n            .context(\"remote storage init\")?,\n    ))\n}\n"
  },
  {
    "path": "libs/remote_storage/tests/test_real_gcs.rs",
    "content": "#![allow(dead_code)]\n#![allow(unused)]\n\nmod common;\n\nuse crate::common::{download_to_vec, upload_stream};\nuse anyhow::Context;\nuse camino::Utf8Path;\nuse futures::StreamExt;\nuse futures::stream::Stream;\nuse remote_storage::{\n    DownloadKind, DownloadOpts, GCSConfig, GenericRemoteStorage, ListingMode, RemotePath,\n    RemoteStorageConfig, RemoteStorageKind, StorageMetadata,\n};\nuse std::collections::HashMap;\n#[path = \"common/tests.rs\"]\nuse std::collections::HashSet;\nuse std::fmt::{Debug, Display};\nuse std::io::Cursor;\nuse std::ops::Bound;\nuse std::pin::pin;\nuse std::sync::Arc;\nuse std::time::Duration;\nuse std::time::SystemTime;\nuse test_context::{AsyncTestContext, test_context};\nuse tokio_util::sync::CancellationToken;\nuse utils::backoff;\n\n// A minimal working GCS client I can pass around in async context\n\nconst BASE_PREFIX: &str = \"test\";\n\nasync fn create_gcs_client() -> anyhow::Result<Arc<GenericRemoteStorage>> {\n    let bucket_name = std::env::var(\"GCS_TEST_BUCKET\").expect(\"GCS_TEST_BUCKET must be set\");\n    let gcs_config = GCSConfig {\n        bucket_name,\n        prefix_in_bucket: Some(\"testing-path/\".into()),\n        max_keys_per_list_response: Some(100),\n        concurrency_limit: std::num::NonZero::new(100).unwrap(),\n    };\n\n    let remote_storage_config = RemoteStorageConfig {\n        storage: RemoteStorageKind::GCS(gcs_config),\n        timeout: Duration::from_secs(120),\n        small_timeout: std::time::Duration::from_secs(120),\n    };\n    Ok(Arc::new(\n        GenericRemoteStorage::from_config(&remote_storage_config)\n            .await\n            .context(\"remote storage init\")?,\n    ))\n}\n\nstruct EnabledGCS {\n    client: Arc<GenericRemoteStorage>,\n    base_prefix: &'static str,\n}\n\nimpl EnabledGCS {\n    async fn setup() -> Self {\n        let client = create_gcs_client()\n            .await\n            .context(\"gcs client creation\")\n            .expect(\"gcs client creation failed\");\n        EnabledGCS {\n            client,\n            base_prefix: BASE_PREFIX,\n        }\n    }\n}\n\nimpl AsyncTestContext for EnabledGCS {\n    async fn setup() -> Self {\n        Self::setup().await\n    }\n}\n\n#[test_context(EnabledGCS)]\n#[tokio::test]\nasync fn gcs_get_object_bytes_range_header(ctx: &mut EnabledGCS) -> anyhow::Result<()> {\n    let cancel = CancellationToken::new();\n    let path = RemotePath::new(Utf8Path::new(\n        format!(\"{}/000000010000028000000086\", ctx.base_prefix).as_str(),\n    ))\n    .with_context(|| \"RemotePath conversion\")?;\n\n    let (data, len) = upload_stream(\"hello, world\".as_bytes().into());\n\n    ctx.client.upload(data, len, &path, None, &cancel).await?;\n\n    let opts = DownloadOpts {\n        byte_start: Bound::Included(7),\n        ..Default::default()\n    };\n    let dl_object = download_to_vec(ctx.client.download(&path, &opts, &cancel).await?).await?;\n    let s = String::from_utf8(dl_object).unwrap();\n    assert_eq!(5, s.len());\n    Ok(())\n}\n#[test_context(EnabledGCS)]\n#[tokio::test]\nasync fn gcs_test_suite(ctx: &mut EnabledGCS) -> anyhow::Result<()> {\n    // ------------------------------------------------\n    // --- `time_travel_recover`, showcasing `upload`, `delete_objects`, `copy`\n    // ------------------------------------------------\n\n    // Our test depends on discrepancies in the clock between S3 and the environment the tests\n    // run in. Therefore, wait a little bit before and after. The alternative would be\n    // to take the time from S3 response headers.\n    const WAIT_TIME: Duration = Duration::from_millis(3_000);\n\n    async fn retry<T, O, F, E>(op: O) -> Result<T, E>\n    where\n        E: Display + Debug + 'static,\n        O: FnMut() -> F,\n        F: Future<Output = Result<T, E>>,\n    {\n        let warn_threshold = 3;\n        let max_retries = 10;\n        backoff::retry(\n            op,\n            |_e| false,\n            warn_threshold,\n            max_retries,\n            \"test retry\",\n            &CancellationToken::new(),\n        )\n        .await\n        .expect(\"never cancelled\")\n    }\n\n    async fn time_point() -> SystemTime {\n        tokio::time::sleep(WAIT_TIME).await;\n        let ret = SystemTime::now();\n        tokio::time::sleep(WAIT_TIME).await;\n        ret\n    }\n\n    async fn list_files(\n        client: &Arc<GenericRemoteStorage>,\n        cancel: &CancellationToken,\n    ) -> anyhow::Result<HashSet<RemotePath>> {\n        Ok(\n            retry(|| client.list(None, ListingMode::NoDelimiter, None, cancel))\n                .await\n                .context(\"list root files failure\")?\n                .keys\n                .into_iter()\n                .map(|o| o.key)\n                .collect::<HashSet<_>>(),\n        )\n    }\n\n    let cancel = CancellationToken::new();\n\n    let path1 = RemotePath::new(Utf8Path::new(format!(\"{}/path1\", ctx.base_prefix).as_str()))\n        .with_context(|| \"RemotePath conversion\")?;\n\n    let path2 = RemotePath::new(Utf8Path::new(format!(\"{}/path2\", ctx.base_prefix).as_str()))\n        .with_context(|| \"RemotePath conversion\")?;\n\n    let path3 = RemotePath::new(Utf8Path::new(format!(\"{}/path3\", ctx.base_prefix).as_str()))\n        .with_context(|| \"RemotePath conversion\")?;\n\n    // ---------------- t0 ---------------\n    // Upload 'path1'\n    retry(|| {\n        let (data, len) = upload_stream(\"remote blob data1\".as_bytes().into());\n        ctx.client.upload(data, len, &path1, None, &cancel)\n    })\n    .await?;\n    let t0_files = list_files(&ctx.client, &cancel).await?;\n    let t0 = time_point().await;\n\n    // Show 'path1'\n    println!(\"at t0: {t0_files:?}\");\n\n    // Upload 'path2'\n    let old_data = \"remote blob data2\";\n    retry(|| {\n        let (data, len) = upload_stream(old_data.as_bytes().into());\n        ctx.client.upload(data, len, &path2, None, &cancel)\n    })\n    .await?;\n\n    // ---------------- t1 ---------------\n    // Show 'path1' and 'path2'\n    let t1_files = list_files(&ctx.client, &cancel).await?;\n    let t1 = time_point().await;\n    println!(\"at t1: {t1_files:?}\");\n\n    {\n        let opts = DownloadOpts::default();\n        let dl = retry(|| ctx.client.download(&path2, &opts, &cancel)).await?;\n        let last_modified = dl.last_modified;\n        let half_wt = WAIT_TIME.mul_f32(0.5);\n        let t0_hwt = t0 + half_wt;\n        let t1_hwt = t1 - half_wt;\n        if !(t0_hwt..=t1_hwt).contains(&last_modified) {\n            panic!(\n                \"last_modified={last_modified:?} is not between t0_hwt={t0_hwt:?} and t1_hwt={t1_hwt:?}. \\\n                This likely means a large lock discrepancy between S3 and the local clock.\"\n            );\n        }\n    }\n\n    // Upload 'path3'\n    retry(|| {\n        let (data, len) = upload_stream(\"remote blob data3\".as_bytes().into());\n        ctx.client.upload(data, len, &path3, None, &cancel)\n    })\n    .await?;\n\n    // Overwrite 'path2'\n    let new_data = \"new remote blob data2\";\n    retry(|| {\n        let (data, len) = upload_stream(new_data.as_bytes().into());\n        ctx.client.upload(data, len, &path2, None, &cancel)\n    })\n    .await?;\n\n    // Delete 'path1'\n    retry(|| ctx.client.delete(&path1, &cancel)).await?;\n\n    // Show 'path2' and `path3`\n    let t2_files = list_files(&ctx.client, &cancel).await?;\n    let t2 = time_point().await;\n    println!(\"at t2: {t2_files:?}\");\n\n    // No changes after recovery to t2 (no-op)\n    let t_final = time_point().await;\n    ctx.client\n        .time_travel_recover(None, t2, t_final, &cancel, None)\n        .await?;\n    let t2_files_recovered = list_files(&ctx.client, &cancel).await?;\n    println!(\"after recovery to t2: {t2_files_recovered:?}\");\n\n    assert_eq!(t2_files, t2_files_recovered);\n    let path2_recovered_t2 = download_to_vec(\n        ctx.client\n            .download(&path2, &DownloadOpts::default(), &cancel)\n            .await?,\n    )\n    .await?;\n    assert_eq!(path2_recovered_t2, new_data.as_bytes());\n\n    // after recovery to t1: path1 is back, path2 has the old content\n    let t_final = time_point().await;\n    ctx.client\n        .time_travel_recover(None, t1, t_final, &cancel, None)\n        .await?;\n    let t1_files_recovered = list_files(&ctx.client, &cancel).await?;\n    println!(\"after recovery to t1: {t1_files_recovered:?}\");\n    assert_eq!(t1_files, t1_files_recovered);\n    let path2_recovered_t1 = download_to_vec(\n        ctx.client\n            .download(&path2, &DownloadOpts::default(), &cancel)\n            .await?,\n    )\n    .await?;\n    assert_eq!(path2_recovered_t1, old_data.as_bytes());\n\n    // after recovery to t0: everything is gone except for path1\n    let t_final = time_point().await;\n    ctx.client\n        .time_travel_recover(None, t0, t_final, &cancel, None)\n        .await?;\n    let t0_files_recovered = list_files(&ctx.client, &cancel).await?;\n    println!(\"after recovery to t0: {t0_files_recovered:?}\");\n    assert_eq!(t0_files, t0_files_recovered);\n\n    // cleanup\n    let paths = &[path1, path2, path3];\n    retry(|| ctx.client.delete_objects(paths, &cancel)).await?;\n\n    Ok(())\n}\n"
  },
  {
    "path": "libs/remote_storage/tests/test_real_s3.rs",
    "content": "use std::collections::HashSet;\nuse std::env;\nuse std::fmt::{Debug, Display};\nuse std::future::Future;\nuse std::num::NonZeroUsize;\nuse std::ops::ControlFlow;\nuse std::sync::Arc;\nuse std::time::{Duration, SystemTime, UNIX_EPOCH};\n\nuse anyhow::Context;\nuse camino::Utf8Path;\nuse futures_util::StreamExt;\nuse remote_storage::{\n    DownloadError, DownloadOpts, GenericRemoteStorage, ListingMode, RemotePath,\n    RemoteStorageConfig, RemoteStorageKind, S3Config,\n};\nuse test_context::{AsyncTestContext, test_context};\nuse tokio::io::AsyncBufReadExt;\nuse tokio_util::sync::CancellationToken;\nuse tracing::info;\n\nuse crate::common::{download_to_vec, upload_stream};\n\nmod common;\n\n#[path = \"common/tests.rs\"]\nmod tests_s3;\n\nuse common::{cleanup, ensure_logging_ready, upload_remote_data, upload_simple_remote_data};\nuse utils::backoff;\n\nconst ENABLE_REAL_S3_REMOTE_STORAGE_ENV_VAR_NAME: &str = \"ENABLE_REAL_S3_REMOTE_STORAGE\";\nconst BASE_PREFIX: &str = \"test\";\n\n#[test_context(MaybeEnabledStorage)]\n#[tokio::test]\nasync fn s3_time_travel_recovery_works(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<()> {\n    let ctx = match ctx {\n        MaybeEnabledStorage::Enabled(ctx) => ctx,\n        MaybeEnabledStorage::Disabled => return Ok(()),\n    };\n    // Our test depends on discrepancies in the clock between S3 and the environment the tests\n    // run in. Therefore, wait a little bit before and after. The alternative would be\n    // to take the time from S3 response headers.\n    const WAIT_TIME: Duration = Duration::from_millis(3_000);\n\n    async fn retry<T, O, F, E>(op: O) -> Result<T, E>\n    where\n        E: Display + Debug + 'static,\n        O: FnMut() -> F,\n        F: Future<Output = Result<T, E>>,\n    {\n        let warn_threshold = 3;\n        let max_retries = 10;\n        backoff::retry(\n            op,\n            |_e| false,\n            warn_threshold,\n            max_retries,\n            \"test retry\",\n            &CancellationToken::new(),\n        )\n        .await\n        .expect(\"never cancelled\")\n    }\n\n    async fn time_point() -> SystemTime {\n        tokio::time::sleep(WAIT_TIME).await;\n        let ret = SystemTime::now();\n        tokio::time::sleep(WAIT_TIME).await;\n        ret\n    }\n\n    async fn list_files(\n        client: &Arc<GenericRemoteStorage>,\n        cancel: &CancellationToken,\n    ) -> anyhow::Result<HashSet<RemotePath>> {\n        Ok(\n            retry(|| client.list(None, ListingMode::NoDelimiter, None, cancel))\n                .await\n                .context(\"list root files failure\")?\n                .keys\n                .into_iter()\n                .map(|o| o.key)\n                .collect::<HashSet<_>>(),\n        )\n    }\n\n    let cancel = CancellationToken::new();\n\n    let path1 = RemotePath::new(Utf8Path::new(format!(\"{}/path1\", ctx.base_prefix).as_str()))\n        .with_context(|| \"RemotePath conversion\")?;\n\n    let path2 = RemotePath::new(Utf8Path::new(format!(\"{}/path2\", ctx.base_prefix).as_str()))\n        .with_context(|| \"RemotePath conversion\")?;\n\n    let path3 = RemotePath::new(Utf8Path::new(format!(\"{}/path3\", ctx.base_prefix).as_str()))\n        .with_context(|| \"RemotePath conversion\")?;\n\n    retry(|| {\n        let (data, len) = upload_stream(\"remote blob data1\".as_bytes().into());\n        ctx.client.upload(data, len, &path1, None, &cancel)\n    })\n    .await?;\n\n    let t0_files = list_files(&ctx.client, &cancel).await?;\n    let t0 = time_point().await;\n    println!(\"at t0: {t0_files:?}\");\n\n    let old_data = \"remote blob data2\";\n\n    retry(|| {\n        let (data, len) = upload_stream(old_data.as_bytes().into());\n        ctx.client.upload(data, len, &path2, None, &cancel)\n    })\n    .await?;\n\n    let t1_files = list_files(&ctx.client, &cancel).await?;\n    let t1 = time_point().await;\n    println!(\"at t1: {t1_files:?}\");\n\n    // A little check to ensure that our clock is not too far off from the S3 clock\n    {\n        let opts = DownloadOpts::default();\n        let dl = retry(|| ctx.client.download(&path2, &opts, &cancel)).await?;\n        let last_modified = dl.last_modified;\n        let half_wt = WAIT_TIME.mul_f32(0.5);\n        let t0_hwt = t0 + half_wt;\n        let t1_hwt = t1 - half_wt;\n        if !(t0_hwt..=t1_hwt).contains(&last_modified) {\n            panic!(\n                \"last_modified={last_modified:?} is not between t0_hwt={t0_hwt:?} and t1_hwt={t1_hwt:?}. \\\n                This likely means a large lock discrepancy between S3 and the local clock.\"\n            );\n        }\n    }\n\n    retry(|| {\n        let (data, len) = upload_stream(\"remote blob data3\".as_bytes().into());\n        ctx.client.upload(data, len, &path3, None, &cancel)\n    })\n    .await?;\n\n    let new_data = \"new remote blob data2\";\n\n    retry(|| {\n        let (data, len) = upload_stream(new_data.as_bytes().into());\n        ctx.client.upload(data, len, &path2, None, &cancel)\n    })\n    .await?;\n\n    retry(|| ctx.client.delete(&path1, &cancel)).await?;\n    let t2_files = list_files(&ctx.client, &cancel).await?;\n    let t2 = time_point().await;\n    println!(\"at t2: {t2_files:?}\");\n\n    // No changes after recovery to t2 (no-op)\n    let t_final = time_point().await;\n    ctx.client\n        .time_travel_recover(None, t2, t_final, &cancel, None)\n        .await?;\n    let t2_files_recovered = list_files(&ctx.client, &cancel).await?;\n    println!(\"after recovery to t2: {t2_files_recovered:?}\");\n    assert_eq!(t2_files, t2_files_recovered);\n    let path2_recovered_t2 = download_to_vec(\n        ctx.client\n            .download(&path2, &DownloadOpts::default(), &cancel)\n            .await?,\n    )\n    .await?;\n    assert_eq!(path2_recovered_t2, new_data.as_bytes());\n\n    // after recovery to t1: path1 is back, path2 has the old content\n    let t_final = time_point().await;\n    ctx.client\n        .time_travel_recover(None, t1, t_final, &cancel, None)\n        .await?;\n    let t1_files_recovered = list_files(&ctx.client, &cancel).await?;\n    println!(\"after recovery to t1: {t1_files_recovered:?}\");\n    assert_eq!(t1_files, t1_files_recovered);\n    let path2_recovered_t1 = download_to_vec(\n        ctx.client\n            .download(&path2, &DownloadOpts::default(), &cancel)\n            .await?,\n    )\n    .await?;\n    assert_eq!(path2_recovered_t1, old_data.as_bytes());\n\n    // after recovery to t0: everything is gone except for path1\n    let t_final = time_point().await;\n    ctx.client\n        .time_travel_recover(None, t0, t_final, &cancel, None)\n        .await?;\n    let t0_files_recovered = list_files(&ctx.client, &cancel).await?;\n    println!(\"after recovery to t0: {t0_files_recovered:?}\");\n    assert_eq!(t0_files, t0_files_recovered);\n\n    // cleanup\n\n    let paths = &[path1, path2, path3];\n    retry(|| ctx.client.delete_objects(paths, &cancel)).await?;\n\n    Ok(())\n}\n\nstruct EnabledS3 {\n    client: Arc<GenericRemoteStorage>,\n    base_prefix: &'static str,\n}\n\nimpl EnabledS3 {\n    async fn setup(max_keys_in_list_response: Option<i32>) -> Self {\n        let client = create_s3_client(max_keys_in_list_response)\n            .await\n            .context(\"S3 client creation\")\n            .expect(\"S3 client creation failed\");\n\n        EnabledS3 {\n            client,\n            base_prefix: BASE_PREFIX,\n        }\n    }\n\n    fn configure_request_timeout(&mut self, timeout: Duration) {\n        match Arc::get_mut(&mut self.client).expect(\"outer Arc::get_mut\") {\n            GenericRemoteStorage::AwsS3(s3) => {\n                let s3 = Arc::get_mut(s3).expect(\"inner Arc::get_mut\");\n                s3.timeout = timeout;\n            }\n            _ => unreachable!(),\n        }\n    }\n}\n\nenum MaybeEnabledStorage {\n    Enabled(EnabledS3),\n    Disabled,\n}\n\nimpl AsyncTestContext for MaybeEnabledStorage {\n    async fn setup() -> Self {\n        ensure_logging_ready();\n\n        if env::var(ENABLE_REAL_S3_REMOTE_STORAGE_ENV_VAR_NAME).is_err() {\n            info!(\n                \"`{}` env variable is not set, skipping the test\",\n                ENABLE_REAL_S3_REMOTE_STORAGE_ENV_VAR_NAME\n            );\n            return Self::Disabled;\n        }\n\n        Self::Enabled(EnabledS3::setup(None).await)\n    }\n}\n\nenum MaybeEnabledStorageWithTestBlobs {\n    Enabled(S3WithTestBlobs),\n    Disabled,\n    UploadsFailed(anyhow::Error, S3WithTestBlobs),\n}\n\nstruct S3WithTestBlobs {\n    enabled: EnabledS3,\n    remote_prefixes: HashSet<RemotePath>,\n    remote_blobs: HashSet<RemotePath>,\n}\n\nimpl AsyncTestContext for MaybeEnabledStorageWithTestBlobs {\n    async fn setup() -> Self {\n        ensure_logging_ready();\n        if env::var(ENABLE_REAL_S3_REMOTE_STORAGE_ENV_VAR_NAME).is_err() {\n            info!(\n                \"`{}` env variable is not set, skipping the test\",\n                ENABLE_REAL_S3_REMOTE_STORAGE_ENV_VAR_NAME\n            );\n            return Self::Disabled;\n        }\n\n        let max_keys_in_list_response = 10;\n        let upload_tasks_count = 1 + (2 * usize::try_from(max_keys_in_list_response).unwrap());\n\n        let enabled = EnabledS3::setup(Some(max_keys_in_list_response)).await;\n\n        match upload_remote_data(&enabled.client, enabled.base_prefix, upload_tasks_count).await {\n            ControlFlow::Continue(uploads) => {\n                info!(\"Remote objects created successfully\");\n\n                Self::Enabled(S3WithTestBlobs {\n                    enabled,\n                    remote_prefixes: uploads.prefixes,\n                    remote_blobs: uploads.blobs,\n                })\n            }\n            ControlFlow::Break(uploads) => Self::UploadsFailed(\n                anyhow::anyhow!(\"One or multiple blobs failed to upload to S3\"),\n                S3WithTestBlobs {\n                    enabled,\n                    remote_prefixes: uploads.prefixes,\n                    remote_blobs: uploads.blobs,\n                },\n            ),\n        }\n    }\n\n    async fn teardown(self) {\n        match self {\n            Self::Disabled => {}\n            Self::Enabled(ctx) | Self::UploadsFailed(_, ctx) => {\n                cleanup(&ctx.enabled.client, ctx.remote_blobs).await;\n            }\n        }\n    }\n}\n\nenum MaybeEnabledStorageWithSimpleTestBlobs {\n    Enabled(S3WithSimpleTestBlobs),\n    Disabled,\n    UploadsFailed(anyhow::Error, S3WithSimpleTestBlobs),\n}\nstruct S3WithSimpleTestBlobs {\n    enabled: EnabledS3,\n    remote_blobs: HashSet<RemotePath>,\n}\n\nimpl AsyncTestContext for MaybeEnabledStorageWithSimpleTestBlobs {\n    async fn setup() -> Self {\n        ensure_logging_ready();\n        if env::var(ENABLE_REAL_S3_REMOTE_STORAGE_ENV_VAR_NAME).is_err() {\n            info!(\n                \"`{}` env variable is not set, skipping the test\",\n                ENABLE_REAL_S3_REMOTE_STORAGE_ENV_VAR_NAME\n            );\n            return Self::Disabled;\n        }\n\n        let max_keys_in_list_response = 10;\n        let upload_tasks_count = 1 + (2 * usize::try_from(max_keys_in_list_response).unwrap());\n\n        let enabled = EnabledS3::setup(Some(max_keys_in_list_response)).await;\n\n        match upload_simple_remote_data(&enabled.client, upload_tasks_count).await {\n            ControlFlow::Continue(uploads) => {\n                info!(\"Remote objects created successfully\");\n\n                Self::Enabled(S3WithSimpleTestBlobs {\n                    enabled,\n                    remote_blobs: uploads,\n                })\n            }\n            ControlFlow::Break(uploads) => Self::UploadsFailed(\n                anyhow::anyhow!(\"One or multiple blobs failed to upload to S3\"),\n                S3WithSimpleTestBlobs {\n                    enabled,\n                    remote_blobs: uploads,\n                },\n            ),\n        }\n    }\n\n    async fn teardown(self) {\n        match self {\n            Self::Disabled => {}\n            Self::Enabled(ctx) | Self::UploadsFailed(_, ctx) => {\n                cleanup(&ctx.enabled.client, ctx.remote_blobs).await;\n            }\n        }\n    }\n}\n\nasync fn create_s3_client(\n    max_keys_per_list_response: Option<i32>,\n) -> anyhow::Result<Arc<GenericRemoteStorage>> {\n    use rand::Rng;\n\n    let remote_storage_s3_bucket = env::var(\"REMOTE_STORAGE_S3_BUCKET\")\n        .context(\"`REMOTE_STORAGE_S3_BUCKET` env var is not set, but real S3 tests are enabled\")?;\n    let remote_storage_s3_region = env::var(\"REMOTE_STORAGE_S3_REGION\")\n        .context(\"`REMOTE_STORAGE_S3_REGION` env var is not set, but real S3 tests are enabled\")?;\n\n    // due to how time works, we've had test runners use the same nanos as bucket prefixes.\n    // millis is just a debugging aid for easier finding the prefix later.\n    let millis = std::time::SystemTime::now()\n        .duration_since(UNIX_EPOCH)\n        .context(\"random s3 test prefix part calculation\")?\n        .as_millis();\n\n    // because nanos can be the same for two threads so can millis, add randomness\n    let random = rand::rng().random::<u32>();\n\n    let remote_storage_config = RemoteStorageConfig {\n        storage: RemoteStorageKind::AwsS3(S3Config {\n            bucket_name: remote_storage_s3_bucket,\n            bucket_region: remote_storage_s3_region,\n            prefix_in_bucket: Some(format!(\"test_{millis}_{random:08x}/\")),\n            endpoint: None,\n            concurrency_limit: NonZeroUsize::new(100).unwrap(),\n            max_keys_per_list_response,\n            upload_storage_class: None,\n        }),\n        timeout: RemoteStorageConfig::DEFAULT_TIMEOUT,\n        small_timeout: RemoteStorageConfig::DEFAULT_SMALL_TIMEOUT,\n    };\n    Ok(Arc::new(\n        GenericRemoteStorage::from_config(&remote_storage_config)\n            .await\n            .context(\"remote storage init\")?,\n    ))\n}\n\n#[test_context(MaybeEnabledStorage)]\n#[tokio::test]\nasync fn download_is_timeouted(ctx: &mut MaybeEnabledStorage) {\n    let MaybeEnabledStorage::Enabled(ctx) = ctx else {\n        return;\n    };\n\n    let cancel = CancellationToken::new();\n\n    let path = RemotePath::new(Utf8Path::new(\n        format!(\"{}/file_to_copy\", ctx.base_prefix).as_str(),\n    ))\n    .unwrap();\n\n    let len = upload_large_enough_file(&ctx.client, &path, &cancel).await;\n\n    let timeout = std::time::Duration::from_secs(5);\n\n    ctx.configure_request_timeout(timeout);\n\n    let started_at = std::time::Instant::now();\n    let mut stream = ctx\n        .client\n        .download(&path, &DownloadOpts::default(), &cancel)\n        .await\n        .expect(\"download succeeds\")\n        .download_stream;\n\n    if started_at.elapsed().mul_f32(0.9) >= timeout {\n        tracing::warn!(\n            elapsed_ms = started_at.elapsed().as_millis(),\n            \"timeout might be too low, consumed most of it during headers\"\n        );\n    }\n\n    let first = stream\n        .next()\n        .await\n        .expect(\"should have the first blob\")\n        .expect(\"should have succeeded\");\n\n    tracing::info!(len = first.len(), \"downloaded first chunk\");\n\n    assert!(\n        first.len() < len,\n        \"uploaded file is too small, we downloaded all on first chunk\"\n    );\n\n    tokio::time::sleep(timeout).await;\n\n    {\n        let started_at = std::time::Instant::now();\n        let next = stream\n            .next()\n            .await\n            .expect(\"stream should not have ended yet\");\n\n        tracing::info!(\n            next.is_err = next.is_err(),\n            elapsed_ms = started_at.elapsed().as_millis(),\n            \"received item after timeout\"\n        );\n\n        let e = next.expect_err(\"expected an error, but got a chunk?\");\n\n        let inner = e.get_ref().expect(\"std::io::Error::inner should be set\");\n        assert!(\n            inner\n                .downcast_ref::<DownloadError>()\n                .is_some_and(|e| matches!(e, DownloadError::Timeout)),\n            \"{inner:?}\"\n        );\n    }\n\n    ctx.configure_request_timeout(RemoteStorageConfig::DEFAULT_TIMEOUT);\n\n    ctx.client.delete_objects(&[path], &cancel).await.unwrap()\n}\n\n#[test_context(MaybeEnabledStorage)]\n#[tokio::test]\nasync fn download_is_cancelled(ctx: &mut MaybeEnabledStorage) {\n    let MaybeEnabledStorage::Enabled(ctx) = ctx else {\n        return;\n    };\n\n    let cancel = CancellationToken::new();\n\n    let path = RemotePath::new(Utf8Path::new(\n        format!(\"{}/file_to_copy\", ctx.base_prefix).as_str(),\n    ))\n    .unwrap();\n\n    let file_len = upload_large_enough_file(&ctx.client, &path, &cancel).await;\n\n    {\n        let stream = ctx\n            .client\n            .download(&path, &DownloadOpts::default(), &cancel)\n            .await\n            .expect(\"download succeeds\")\n            .download_stream;\n\n        let mut reader = std::pin::pin!(tokio_util::io::StreamReader::new(stream));\n\n        let first = reader.fill_buf().await.expect(\"should have the first blob\");\n\n        let len = first.len();\n        tracing::info!(len, \"downloaded first chunk\");\n\n        assert!(\n            first.len() < file_len,\n            \"uploaded file is too small, we downloaded all on first chunk\"\n        );\n\n        reader.consume(len);\n\n        cancel.cancel();\n\n        let next = reader.fill_buf().await;\n\n        let e = next.expect_err(\"expected an error, but got a chunk?\");\n\n        let inner = e.get_ref().expect(\"std::io::Error::inner should be set\");\n        assert!(\n            inner\n                .downcast_ref::<DownloadError>()\n                .is_some_and(|e| matches!(e, DownloadError::Cancelled)),\n            \"{inner:?}\"\n        );\n\n        let e = DownloadError::from(e);\n\n        assert!(matches!(e, DownloadError::Cancelled), \"{e:?}\");\n    }\n\n    let cancel = CancellationToken::new();\n\n    ctx.client.delete_objects(&[path], &cancel).await.unwrap();\n}\n\n/// Upload a long enough file so that we cannot download it in single chunk\n///\n/// For s3 the first chunk seems to be less than 10kB, so this has a bit of a safety margin\nasync fn upload_large_enough_file(\n    client: &GenericRemoteStorage,\n    path: &RemotePath,\n    cancel: &CancellationToken,\n) -> usize {\n    let header = bytes::Bytes::from_static(\"remote blob data content\".as_bytes());\n    let body = bytes::Bytes::from(vec![0u8; 1024]);\n    let contents = std::iter::once(header).chain(std::iter::repeat_n(body, 128));\n\n    let len = contents.clone().fold(0, |acc, next| acc + next.len());\n\n    let contents = futures::stream::iter(contents.map(std::io::Result::Ok));\n\n    client\n        .upload(contents, len, path, None, cancel)\n        .await\n        .expect(\"upload succeeds\");\n\n    len\n}\n"
  },
  {
    "path": "libs/safekeeper_api/Cargo.toml",
    "content": "[package]\nname = \"safekeeper_api\"\nversion = \"0.1.0\"\nedition = \"2024\"\nlicense.workspace = true\n\n[dependencies]\nanyhow.workspace = true\nconst_format.workspace = true\nserde.workspace = true\nserde_json.workspace = true\npostgres_ffi_types.workspace = true\npostgres_versioninfo.workspace = true\npq_proto.workspace = true\ntokio.workspace = true\nutils.workspace = true\npageserver_api.workspace = true\n"
  },
  {
    "path": "libs/safekeeper_api/src/lib.rs",
    "content": "#![deny(unsafe_code)]\n#![deny(clippy::undocumented_unsafe_blocks)]\nuse const_format::formatcp;\nuse pq_proto::SystemId;\nuse serde::{Deserialize, Serialize};\n\npub mod membership;\n/// Public API types\npub mod models;\n\npub use postgres_versioninfo::{PgMajorVersion, PgVersionId};\n\n/// Consensus logical timestamp. Note: it is a part of sk control file.\npub type Term = u64;\n/// With this term timeline is created initially. It\n/// is a normal term except wp is never elected with it.\npub const INITIAL_TERM: Term = 0;\n\n/// Information about Postgres. Safekeeper gets it once and then verifies all\n/// further connections from computes match. Note: it is a part of sk control\n/// file.\n#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]\npub struct ServerInfo {\n    /// Postgres server version\n    pub pg_version: PgVersionId,\n    pub system_id: SystemId,\n    pub wal_seg_size: u32,\n}\n\npub const DEFAULT_PG_LISTEN_PORT: u16 = 5454;\npub const DEFAULT_PG_LISTEN_ADDR: &str = formatcp!(\"127.0.0.1:{DEFAULT_PG_LISTEN_PORT}\");\n\npub const DEFAULT_HTTP_LISTEN_PORT: u16 = 7676;\npub const DEFAULT_HTTP_LISTEN_ADDR: &str = formatcp!(\"127.0.0.1:{DEFAULT_HTTP_LISTEN_PORT}\");\n"
  },
  {
    "path": "libs/safekeeper_api/src/membership.rs",
    "content": "//! Types defining safekeeper membership, see\n//! rfcs/035-safekeeper-dynamic-membership-change.md\n//! for details.\n\nuse std::collections::HashSet;\nuse std::fmt::Display;\n\nuse anyhow;\nuse anyhow::bail;\nuse serde::{Deserialize, Serialize};\nuse utils::id::NodeId;\n\n/// 1 is the first valid generation, 0 is used as\n/// a placeholder before we fully migrate to generations.\npub const INVALID_GENERATION: SafekeeperGeneration = SafekeeperGeneration::new(0);\npub const INITIAL_GENERATION: SafekeeperGeneration = SafekeeperGeneration::new(1);\n\n/// Number uniquely identifying safekeeper configuration.\n/// Note: it is a part of sk control file.\n///\n/// Like tenant generations, but for safekeepers.\n#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]\npub struct SafekeeperGeneration(u32);\n\nimpl SafekeeperGeneration {\n    pub const fn new(v: u32) -> Self {\n        Self(v)\n    }\n\n    #[track_caller]\n    pub fn previous(&self) -> Option<Self> {\n        Some(Self(self.0.checked_sub(1)?))\n    }\n\n    #[track_caller]\n    pub fn next(&self) -> Self {\n        Self(self.0 + 1)\n    }\n\n    pub fn into_inner(self) -> u32 {\n        self.0\n    }\n}\n\nimpl Display for SafekeeperGeneration {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        write!(f, \"{}\", self.0)\n    }\n}\n\n/// Membership is defined by ids so e.g. walproposer uses them to figure out\n/// quorums, but we also carry host and port to give wp idea where to connect.\n#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]\npub struct SafekeeperId {\n    pub id: NodeId,\n    pub host: String,\n    /// We include here only port for computes -- that is, pg protocol tenant\n    /// only port, or wide pg protocol port if the former is not configured.\n    pub pg_port: u16,\n}\n\nimpl Display for SafekeeperId {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        write!(f, \"[id={}, ep={}:{}]\", self.id, self.host, self.pg_port)\n    }\n}\n\n/// Set of safekeepers.\n#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]\n#[serde(transparent)]\npub struct MemberSet {\n    pub m: Vec<SafekeeperId>,\n}\n\nimpl MemberSet {\n    pub fn empty() -> Self {\n        MemberSet { m: Vec::new() }\n    }\n\n    pub fn new(members: Vec<SafekeeperId>) -> anyhow::Result<Self> {\n        let hs: HashSet<NodeId> = HashSet::from_iter(members.iter().map(|sk| sk.id));\n        if hs.len() != members.len() {\n            bail!(\"duplicate safekeeper id in the set {:?}\", members);\n        }\n        Ok(MemberSet { m: members })\n    }\n\n    pub fn contains(&self, sk: NodeId) -> bool {\n        self.m.iter().any(|m| m.id == sk)\n    }\n\n    pub fn add(&mut self, sk: SafekeeperId) -> anyhow::Result<()> {\n        if self.contains(sk.id) {\n            bail!(format!(\n                \"sk {} is already member of the set {}\",\n                sk.id, self\n            ));\n        }\n        self.m.push(sk);\n        Ok(())\n    }\n}\n\nimpl Display for MemberSet {\n    /// Display as a comma separated list of members.\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        let sks_str = self.m.iter().map(|sk| sk.to_string()).collect::<Vec<_>>();\n        write!(f, \"({})\", sks_str.join(\", \"))\n    }\n}\n\n/// Safekeeper membership configuration.\n/// Note: it is a part of both control file and http API.\n#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]\npub struct Configuration {\n    /// Unique id.\n    pub generation: SafekeeperGeneration,\n    /// Current members of the configuration.\n    pub members: MemberSet,\n    /// Some means it is a joint conf.\n    pub new_members: Option<MemberSet>,\n}\n\nimpl Configuration {\n    /// Used for pre-generations timelines, will be removed eventually.\n    pub fn empty() -> Self {\n        Configuration {\n            generation: INVALID_GENERATION,\n            members: MemberSet::empty(),\n            new_members: None,\n        }\n    }\n\n    pub fn new(members: MemberSet) -> Self {\n        Configuration {\n            generation: INITIAL_GENERATION,\n            members,\n            new_members: None,\n        }\n    }\n\n    /// Is `sk_id` member of the configuration?\n    pub fn contains(&self, sk_id: NodeId) -> bool {\n        self.members.contains(sk_id) || self.new_members.as_ref().is_some_and(|m| m.contains(sk_id))\n    }\n}\n\nimpl Display for Configuration {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        write!(\n            f,\n            \"gen={}, members={}, new_members={}\",\n            self.generation,\n            self.members,\n            self.new_members\n                .as_ref()\n                .map(ToString::to_string)\n                .unwrap_or(String::from(\"none\"))\n        )\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use utils::id::NodeId;\n\n    use super::{MemberSet, SafekeeperId};\n\n    #[test]\n    fn test_member_set() {\n        let mut members = MemberSet::empty();\n        members\n            .add(SafekeeperId {\n                id: NodeId(42),\n                host: String::from(\"lala.org\"),\n                pg_port: 5432,\n            })\n            .unwrap();\n\n        members\n            .add(SafekeeperId {\n                id: NodeId(42),\n                host: String::from(\"lala.org\"),\n                pg_port: 5432,\n            })\n            .expect_err(\"duplicate must not be allowed\");\n\n        members\n            .add(SafekeeperId {\n                id: NodeId(43),\n                host: String::from(\"bubu.org\"),\n                pg_port: 5432,\n            })\n            .unwrap();\n\n        println!(\"members: {members}\");\n\n        let j = serde_json::to_string(&members).expect(\"failed to serialize\");\n        println!(\"members json: {j}\");\n        assert_eq!(\n            j,\n            r#\"[{\"id\":42,\"host\":\"lala.org\",\"pg_port\":5432},{\"id\":43,\"host\":\"bubu.org\",\"pg_port\":5432}]\"#\n        );\n    }\n}\n"
  },
  {
    "path": "libs/safekeeper_api/src/models.rs",
    "content": "//! Types used in safekeeper http API. Many of them are also reused internally.\n\nuse std::net::SocketAddr;\n\nuse pageserver_api::shard::ShardIdentity;\nuse postgres_ffi_types::TimestampTz;\nuse postgres_versioninfo::PgVersionId;\nuse serde::{Deserialize, Serialize};\nuse tokio::time::Instant;\nuse utils::id::{NodeId, TenantId, TenantTimelineId, TimelineId};\nuse utils::lsn::Lsn;\nuse utils::pageserver_feedback::PageserverFeedback;\n\nuse crate::membership::{Configuration, SafekeeperGeneration};\nuse crate::{ServerInfo, Term};\n\n#[derive(Debug, Serialize, Deserialize)]\npub struct SafekeeperStatus {\n    pub id: NodeId,\n}\n\n#[derive(Serialize, Deserialize, Clone)]\npub struct TimelineCreateRequest {\n    pub tenant_id: TenantId,\n    pub timeline_id: TimelineId,\n    pub mconf: Configuration,\n    pub pg_version: PgVersionId,\n    pub system_id: Option<u64>,\n    // By default WAL_SEGMENT_SIZE\n    pub wal_seg_size: Option<u32>,\n    pub start_lsn: Lsn,\n    // Normal creation should omit this field (start_lsn initializes all LSNs).\n    // However, we allow specifying custom value higher than start_lsn for\n    // manual recovery case, see test_s3_wal_replay.\n    pub commit_lsn: Option<Lsn>,\n}\n\n/// Same as TermLsn, but serializes LSN using display serializer\n/// in Postgres format, i.e. 0/FFFFFFFF. Used only for the API response.\n#[derive(Debug, Clone, Copy, Serialize, Deserialize)]\npub struct TermSwitchApiEntry {\n    pub term: Term,\n    pub lsn: Lsn,\n}\n\n/// Augment AcceptorState with last_log_term for convenience\n#[derive(Debug, Serialize, Deserialize)]\npub struct AcceptorStateStatus {\n    pub term: Term,\n    pub epoch: Term, // aka last_log_term, old `epoch` name is left for compatibility\n    pub term_history: Vec<TermSwitchApiEntry>,\n}\n\n/// Things safekeeper should know about timeline state on peers.\n/// Used as both model and internally.\n#[derive(Debug, Clone, Serialize, Deserialize)]\npub struct PeerInfo {\n    pub sk_id: NodeId,\n    pub term: Term,\n    /// Term of the last entry.\n    pub last_log_term: Term,\n    /// LSN of the last record.\n    pub flush_lsn: Lsn,\n    pub commit_lsn: Lsn,\n    /// Since which LSN safekeeper has WAL.\n    pub local_start_lsn: Lsn,\n    /// When info was received. Serde annotations are not very useful but make\n    /// the code compile -- we don't rely on this field externally.\n    #[serde(skip)]\n    #[serde(default = \"Instant::now\")]\n    pub ts: Instant,\n    pub pg_connstr: String,\n    pub http_connstr: String,\n    pub https_connstr: Option<String>,\n}\n\npub type FullTransactionId = u64;\n\n/// Hot standby feedback received from replica\n#[derive(Debug, Clone, Copy, Serialize, Deserialize)]\npub struct HotStandbyFeedback {\n    pub ts: TimestampTz,\n    pub xmin: FullTransactionId,\n    pub catalog_xmin: FullTransactionId,\n}\n\npub const INVALID_FULL_TRANSACTION_ID: FullTransactionId = 0;\n\nimpl HotStandbyFeedback {\n    pub fn empty() -> HotStandbyFeedback {\n        HotStandbyFeedback {\n            ts: 0,\n            xmin: 0,\n            catalog_xmin: 0,\n        }\n    }\n}\n\n/// Standby status update\n#[derive(Debug, Clone, Copy, Serialize, Deserialize)]\npub struct StandbyReply {\n    pub write_lsn: Lsn, // The location of the last WAL byte + 1 received and written to disk in the standby.\n    pub flush_lsn: Lsn, // The location of the last WAL byte + 1 flushed to disk in the standby.\n    pub apply_lsn: Lsn, // The location of the last WAL byte + 1 applied in the standby.\n    pub reply_ts: TimestampTz, // The client's system clock at the time of transmission, as microseconds since midnight on 2000-01-01.\n    pub reply_requested: bool,\n}\n\nimpl StandbyReply {\n    pub fn empty() -> Self {\n        StandbyReply {\n            write_lsn: Lsn::INVALID,\n            flush_lsn: Lsn::INVALID,\n            apply_lsn: Lsn::INVALID,\n            reply_ts: 0,\n            reply_requested: false,\n        }\n    }\n}\n\n#[derive(Debug, Clone, Copy, Serialize, Deserialize)]\npub struct StandbyFeedback {\n    pub reply: StandbyReply,\n    pub hs_feedback: HotStandbyFeedback,\n}\n\nimpl StandbyFeedback {\n    pub fn empty() -> Self {\n        StandbyFeedback {\n            reply: StandbyReply::empty(),\n            hs_feedback: HotStandbyFeedback::empty(),\n        }\n    }\n}\n\n/// Receiver is either pageserver or regular standby, which have different\n/// feedbacks.\n/// Used as both model and internally.\n#[derive(Debug, Clone, Copy, Serialize, Deserialize)]\npub enum ReplicationFeedback {\n    Pageserver(PageserverFeedback),\n    Standby(StandbyFeedback),\n}\n\n/// Uniquely identifies a WAL service connection. Logged in spans for\n/// observability.\npub type ConnectionId = u32;\n\n/// Serialize is used only for json'ing in API response. Also used internally.\n#[derive(Debug, Clone, Serialize, Deserialize)]\npub enum WalSenderState {\n    Vanilla(VanillaWalSenderState),\n    Interpreted(InterpretedWalSenderState),\n}\n\n#[derive(Debug, Clone, Serialize, Deserialize)]\npub struct VanillaWalSenderState {\n    pub ttid: TenantTimelineId,\n    pub addr: SocketAddr,\n    pub conn_id: ConnectionId,\n    // postgres application_name\n    pub appname: Option<String>,\n    pub feedback: ReplicationFeedback,\n}\n\n#[derive(Debug, Clone, Serialize, Deserialize)]\npub struct InterpretedWalSenderState {\n    pub ttid: TenantTimelineId,\n    pub shard: ShardIdentity,\n    pub addr: SocketAddr,\n    pub conn_id: ConnectionId,\n    // postgres application_name\n    pub appname: Option<String>,\n    pub feedback: ReplicationFeedback,\n}\n\n#[derive(Debug, Clone, Serialize, Deserialize)]\npub struct WalReceiverState {\n    /// None means it is recovery initiated by us (this safekeeper).\n    pub conn_id: Option<ConnectionId>,\n    pub status: WalReceiverStatus,\n}\n\n/// Walreceiver status. Currently only whether it passed voting stage and\n/// started receiving the stream, but it is easy to add more if needed.\n#[derive(Debug, Clone, Serialize, Deserialize)]\npub enum WalReceiverStatus {\n    Voting,\n    Streaming,\n}\n\n/// Info about timeline on safekeeper ready for reporting.\n#[derive(Debug, Serialize, Deserialize)]\npub struct TimelineStatus {\n    pub tenant_id: TenantId,\n    pub timeline_id: TimelineId,\n    pub mconf: Configuration,\n    pub acceptor_state: AcceptorStateStatus,\n    pub pg_info: ServerInfo,\n    pub flush_lsn: Lsn,\n    pub timeline_start_lsn: Lsn,\n    pub local_start_lsn: Lsn,\n    pub commit_lsn: Lsn,\n    pub backup_lsn: Lsn,\n    pub peer_horizon_lsn: Lsn,\n    pub remote_consistent_lsn: Lsn,\n    pub peers: Vec<PeerInfo>,\n    pub walsenders: Vec<WalSenderState>,\n    pub walreceivers: Vec<WalReceiverState>,\n}\n\n/// Request to switch membership configuration.\n#[derive(Clone, Serialize, Deserialize)]\n#[serde(transparent)]\npub struct TimelineMembershipSwitchRequest {\n    pub mconf: Configuration,\n}\n\n/// In response both previous and current configuration are sent.\n#[derive(Serialize, Deserialize)]\npub struct TimelineMembershipSwitchResponse {\n    pub previous_conf: Configuration,\n    pub current_conf: Configuration,\n    pub last_log_term: Term,\n    pub flush_lsn: Lsn,\n}\n\n#[derive(Clone, Copy, Serialize, Deserialize)]\npub struct TimelineDeleteResult {\n    pub dir_existed: bool,\n}\n\npub type TenantDeleteResult = std::collections::HashMap<String, TimelineDeleteResult>;\n\nfn lsn_invalid() -> Lsn {\n    Lsn::INVALID\n}\n\n/// Data about safekeeper's timeline, mirrors broker.proto.\n#[derive(Debug, Clone, Deserialize, Serialize)]\npub struct SkTimelineInfo {\n    /// Term.\n    pub term: Option<u64>,\n    /// Term of the last entry.\n    pub last_log_term: Option<u64>,\n    /// LSN of the last record.\n    #[serde(default = \"lsn_invalid\")]\n    pub flush_lsn: Lsn,\n    /// Up to which LSN safekeeper regards its WAL as committed.\n    #[serde(default = \"lsn_invalid\")]\n    pub commit_lsn: Lsn,\n    /// LSN up to which safekeeper has backed WAL.\n    #[serde(default = \"lsn_invalid\")]\n    pub backup_lsn: Lsn,\n    /// LSN of last checkpoint uploaded by pageserver.\n    #[serde(default = \"lsn_invalid\")]\n    pub remote_consistent_lsn: Lsn,\n    #[serde(default = \"lsn_invalid\")]\n    pub peer_horizon_lsn: Lsn,\n    #[serde(default = \"lsn_invalid\")]\n    pub local_start_lsn: Lsn,\n    /// A connection string to use for WAL receiving.\n    #[serde(default)]\n    pub safekeeper_connstr: Option<String>,\n    #[serde(default)]\n    pub http_connstr: Option<String>,\n    #[serde(default)]\n    pub https_connstr: Option<String>,\n    // Minimum of all active RO replicas flush LSN\n    #[serde(default = \"lsn_invalid\")]\n    pub standby_horizon: Lsn,\n}\n\n#[derive(Debug, Clone, Deserialize, Serialize)]\npub struct TimelineCopyRequest {\n    pub target_timeline_id: TimelineId,\n    pub until_lsn: Lsn,\n}\n\n#[derive(Debug, Clone, Deserialize, Serialize)]\npub struct TimelineTermBumpRequest {\n    /// bump to\n    pub term: Option<u64>,\n}\n\n#[derive(Debug, Clone, Deserialize, Serialize)]\npub struct TimelineTermBumpResponse {\n    // before the request\n    pub previous_term: u64,\n    pub current_term: u64,\n}\n\n#[derive(Debug, Clone, Deserialize, Serialize)]\npub struct SafekeeperUtilization {\n    pub timeline_count: u64,\n}\n\n/// pull_timeline request body.\n#[derive(Debug, Clone, Deserialize, Serialize)]\npub struct PullTimelineRequest {\n    pub tenant_id: TenantId,\n    pub timeline_id: TimelineId,\n    pub http_hosts: Vec<String>,\n    /// Membership configuration to switch to after pull.\n    /// It guarantees that if pull_timeline returns successfully, the timeline will\n    /// not be deleted by request with an older generation.\n    /// Storage controller always sets this field.\n    /// None is only allowed for manual pull_timeline requests.\n    pub mconf: Option<Configuration>,\n}\n\n#[derive(Debug, Serialize, Deserialize)]\npub struct PullTimelineResponse {\n    /// Donor safekeeper host.\n    /// None if no pull happened because the timeline already exists.\n    pub safekeeper_host: Option<String>,\n    // TODO: add more fields?\n}\n\n/// Response to a timeline locate request.\n/// Storcon-only API.\n#[derive(Serialize, Deserialize, Clone, Debug)]\npub struct TimelineLocateResponse {\n    pub generation: SafekeeperGeneration,\n    pub sk_set: Vec<NodeId>,\n    pub new_sk_set: Option<Vec<NodeId>>,\n}\n"
  },
  {
    "path": "libs/tenant_size_model/.gitignore",
    "content": "*.dot\n*.png\n*.svg\n"
  },
  {
    "path": "libs/tenant_size_model/Cargo.toml",
    "content": "[package]\nname = \"tenant_size_model\"\nversion = \"0.1.0\"\nedition.workspace = true\npublish = false\nlicense.workspace = true\n\n[dependencies]\nanyhow.workspace = true\nserde.workspace = true\nserde_json.workspace = true\n"
  },
  {
    "path": "libs/tenant_size_model/Makefile",
    "content": "all: 1.svg 2.svg 3.svg 4.svg 1.png 2.png 3.png 4.png\n\n../../target/debug/tenant_size_model: Cargo.toml src/main.rs src/lib.rs\n\tcargo build --bin tenant_size_model\n\n%.svg: %.dot\n\tdot -Tsvg $< > $@\n\n%.png: %.dot\n\tdot -Tpng $< > $@\n\n%.dot: ../../target/debug/tenant_size_model\n\t../../target/debug/tenant_size_model $* > $@\n"
  },
  {
    "path": "libs/tenant_size_model/README.md",
    "content": "# Logical size + WAL pricing\n\nThis is a simulator to calculate the tenant size in different scenarios,\nusing the \"Logical size + WAL\" method. Makefile produces diagrams used in a\nprivate presentation:\n\nhttps://docs.google.com/presentation/d/1OapE4k11xmcwMh7I7YvNWGC63yCRLh6udO9bXZ-fZmo/edit?usp=sharing\n"
  },
  {
    "path": "libs/tenant_size_model/src/calculation.rs",
    "content": "use crate::{SegmentMethod, SegmentSizeResult, SizeResult, StorageModel};\n\n//\n//                 *-g--*---D--->\n//                /\n//               /\n//              /                 *---b----*-B--->\n//             /                 /\n//            /                 /\n//      -----*--e---*-----f----* C\n//           E                  \\\n//                               \\\n//                                *--a---*---A-->\n//\n// If A and B need to be retained, is it cheaper to store\n// snapshot at C+a+b, or snapshots at A and B ?\n//\n// If D also needs to be retained, which is cheaper:\n//\n// 1. E+g+e+f+a+b\n// 2. D+C+a+b\n// 3. D+A+B\n\n/// `Segment` which has had its size calculated.\n#[derive(Clone, Debug)]\nstruct SegmentSize {\n    method: SegmentMethod,\n\n    // calculated size of this subtree, using this method\n    accum_size: u64,\n\n    seg_id: usize,\n    children: Vec<SegmentSize>,\n}\n\nstruct SizeAlternatives {\n    /// cheapest alternative if parent is available.\n    incremental: SegmentSize,\n\n    /// cheapest alternative if parent node is not available\n    non_incremental: Option<SegmentSize>,\n}\n\nimpl StorageModel {\n    pub fn calculate(&self) -> SizeResult {\n        // Build adjacency list. 'child_list' is indexed by segment id. Each entry\n        // contains a list of all child segments of the segment.\n        let mut roots: Vec<usize> = Vec::new();\n        let mut child_list: Vec<Vec<usize>> = Vec::new();\n        child_list.resize(self.segments.len(), Vec::new());\n\n        for (seg_id, seg) in self.segments.iter().enumerate() {\n            if let Some(parent_id) = seg.parent {\n                child_list[parent_id].push(seg_id);\n            } else {\n                roots.push(seg_id);\n            }\n        }\n\n        let mut segment_results = Vec::new();\n        segment_results.resize(\n            self.segments.len(),\n            SegmentSizeResult {\n                method: SegmentMethod::Skipped,\n                accum_size: 0,\n            },\n        );\n\n        let mut total_size = 0;\n        for root in roots {\n            if let Some(selected) = self.size_here(root, &child_list).non_incremental {\n                StorageModel::fill_selected_sizes(&selected, &mut segment_results);\n                total_size += selected.accum_size;\n            } else {\n                // Couldn't find any way to get this root. Error?\n            }\n        }\n\n        SizeResult {\n            // If total_size is 0, it means that the tenant has all timelines offloaded; we need to report 1\n            // here so that the data point shows up in the s3 files.\n            total_size: total_size.max(1),\n            segments: segment_results,\n        }\n    }\n\n    fn fill_selected_sizes(selected: &SegmentSize, result: &mut Vec<SegmentSizeResult>) {\n        result[selected.seg_id] = SegmentSizeResult {\n            method: selected.method,\n            accum_size: selected.accum_size,\n        };\n        // recurse to children\n        for child in selected.children.iter() {\n            StorageModel::fill_selected_sizes(child, result);\n        }\n    }\n\n    //\n    // This is the core of the sizing calculation.\n    //\n    // This is a recursive function, that for each Segment calculates the best way\n    // to reach all the Segments that are marked as needed in this subtree, under two\n    // different conditions:\n    // a) when the parent of this segment is available (as a snaphot or through WAL), and\n    // b) when the parent of this segment is not available.\n    //\n    fn size_here(&self, seg_id: usize, child_list: &Vec<Vec<usize>>) -> SizeAlternatives {\n        let seg = &self.segments[seg_id];\n        // First figure out the best way to get each child\n        let mut children = Vec::new();\n        for child_id in &child_list[seg_id] {\n            children.push(self.size_here(*child_id, child_list))\n        }\n\n        // Method 1. If this node is not needed, we can skip it as long as we\n        // take snapshots later in each sub-tree\n        let snapshot_later = if !seg.needed {\n            let mut snapshot_later = SegmentSize {\n                seg_id,\n                method: SegmentMethod::Skipped,\n                accum_size: 0,\n                children: Vec::new(),\n            };\n\n            let mut possible = true;\n            for child in children.iter() {\n                if let Some(non_incremental) = &child.non_incremental {\n                    snapshot_later.accum_size += non_incremental.accum_size;\n                    snapshot_later.children.push(non_incremental.clone())\n                } else {\n                    possible = false;\n                    break;\n                }\n            }\n            if possible { Some(snapshot_later) } else { None }\n        } else {\n            None\n        };\n\n        // Method 2. Get a snapshot here. This assumed to be possible, if the 'size' of\n        // this Segment was given.\n        let snapshot_here = if !seg.needed || seg.parent.is_none() {\n            if let Some(snapshot_size) = seg.size {\n                let mut snapshot_here = SegmentSize {\n                    seg_id,\n                    method: SegmentMethod::SnapshotHere,\n                    accum_size: snapshot_size,\n                    children: Vec::new(),\n                };\n                for child in children.iter() {\n                    snapshot_here.accum_size += child.incremental.accum_size;\n                    snapshot_here.children.push(child.incremental.clone())\n                }\n                Some(snapshot_here)\n            } else {\n                None\n            }\n        } else {\n            None\n        };\n\n        // Method 3. Use WAL to get here from parent\n        let wal_here = {\n            let mut wal_here = SegmentSize {\n                seg_id,\n                method: SegmentMethod::Wal,\n                accum_size: if let Some(parent_id) = seg.parent {\n                    seg.lsn - self.segments[parent_id].lsn\n                } else {\n                    0\n                },\n                children: Vec::new(),\n            };\n            for child in children {\n                wal_here.accum_size += child.incremental.accum_size;\n                wal_here.children.push(child.incremental)\n            }\n            wal_here\n        };\n\n        // If the parent is not available, what's the cheapest method involving\n        // a snapshot here or later?\n        let mut cheapest_non_incremental: Option<SegmentSize> = None;\n        if let Some(snapshot_here) = snapshot_here {\n            cheapest_non_incremental = Some(snapshot_here);\n        }\n        if let Some(snapshot_later) = snapshot_later {\n            // Use <=, to prefer skipping if the size is equal\n            if let Some(parent) = &cheapest_non_incremental {\n                if snapshot_later.accum_size <= parent.accum_size {\n                    cheapest_non_incremental = Some(snapshot_later);\n                }\n            } else {\n                cheapest_non_incremental = Some(snapshot_later);\n            }\n        }\n\n        // And what's the cheapest method, if the parent is available?\n        let cheapest_incremental = if let Some(cheapest_non_incremental) = &cheapest_non_incremental\n        {\n            // Is it cheaper to use a snapshot here or later, anyway?\n            // Use <, to prefer Wal over snapshot if the cost is the same\n            if wal_here.accum_size < cheapest_non_incremental.accum_size {\n                wal_here\n            } else {\n                cheapest_non_incremental.clone()\n            }\n        } else {\n            wal_here\n        };\n\n        SizeAlternatives {\n            incremental: cheapest_incremental,\n            non_incremental: cheapest_non_incremental,\n        }\n    }\n}\n"
  },
  {
    "path": "libs/tenant_size_model/src/lib.rs",
    "content": "//! Synthetic size calculation\n#![deny(unsafe_code)]\n#![deny(clippy::undocumented_unsafe_blocks)]\n\nmod calculation;\npub mod svg;\n\n/// StorageModel is the input to the synthetic size calculation.\n///\n/// It represents a tree of timelines, with just the information that's needed\n/// for the calculation. This doesn't track timeline names or where each timeline\n/// begins and ends, for example. Instead, it consists of \"points of interest\"\n/// on the timelines. A point of interest could be the timeline start or end point,\n/// the oldest point on a timeline that needs to be retained because of PITR\n/// cutoff, or snapshot points named by the user. For each such point, and the\n/// edge connecting the points (implicit in Segment), we store information about\n/// whether we need to be able to recover to the point, and if known, the logical\n/// size at the point.\n///\n/// The segments must form a well-formed tree, with no loops.\n#[derive(serde::Serialize)]\npub struct StorageModel {\n    pub segments: Vec<Segment>,\n}\n\n/// Segment represents one point in the tree of branches, *and* the edge that leads\n/// to it (if any). We don't need separate structs for points and edges, because each\n/// point can have only one parent.\n///\n/// When 'needed' is true, it means that we need to be able to reconstruct\n/// any version between 'parent.lsn' and 'lsn'. If you want to represent that only\n/// a single point is needed, create two Segments with the same lsn, and mark only\n/// the child as needed.\n///\n#[derive(Clone, Debug, Eq, PartialEq, serde::Serialize, serde::Deserialize)]\npub struct Segment {\n    /// Previous segment index into ['Storage::segments`], if any.\n    pub parent: Option<usize>,\n\n    /// LSN at this point\n    pub lsn: u64,\n\n    /// Logical size at this node, if known.\n    pub size: Option<u64>,\n\n    /// If true, the segment from parent to this node is needed by `retention_period`\n    pub needed: bool,\n}\n\n/// Result of synthetic size calculation. Returned by StorageModel::calculate()\npub struct SizeResult {\n    pub total_size: u64,\n\n    // This has same length as the StorageModel::segments vector in the input.\n    // Each entry in this array corresponds to the entry with same index in\n    // StorageModel::segments.\n    pub segments: Vec<SegmentSizeResult>,\n}\n\n#[derive(Clone, Debug, Eq, PartialEq, serde::Serialize, serde::Deserialize)]\npub struct SegmentSizeResult {\n    pub method: SegmentMethod,\n    // calculated size of this subtree, using this method\n    pub accum_size: u64,\n}\n\n/// Different methods to retain history from a particular state\n#[derive(Clone, Copy, Debug, Eq, PartialEq, serde::Serialize, serde::Deserialize)]\npub enum SegmentMethod {\n    SnapshotHere, // A logical snapshot is needed after this segment\n    Wal,          // Keep WAL leading up to this node\n    Skipped,\n}\n"
  },
  {
    "path": "libs/tenant_size_model/src/svg.rs",
    "content": "use crate::{SegmentMethod, SegmentSizeResult, SizeResult, StorageModel};\nuse std::fmt::Write;\n\nconst SVG_WIDTH: f32 = 500.0;\n\n/// Different branch kind for SVG drawing.\n#[derive(PartialEq)]\npub enum SvgBranchKind {\n    Timeline,\n    Lease,\n}\n\nstruct SvgDraw<'a> {\n    storage: &'a StorageModel,\n    branches: &'a [String],\n    seg_to_branch: &'a [(usize, SvgBranchKind)],\n    sizes: &'a [SegmentSizeResult],\n\n    // layout\n    xscale: f32,\n    min_lsn: u64,\n    seg_coordinates: Vec<(f32, f32)>,\n}\n\nfn draw_legend(result: &mut String) -> anyhow::Result<()> {\n    writeln!(\n        result,\n        \"<circle cx=\\\"10\\\" cy=\\\"10\\\" r=\\\"5\\\" stroke=\\\"red\\\"/>\"\n    )?;\n    writeln!(result, \"<text x=\\\"20\\\" y=\\\"15\\\">logical snapshot</text>\")?;\n    writeln!(\n        result,\n        \"<line x1=\\\"5\\\" y1=\\\"30\\\" x2=\\\"15\\\" y2=\\\"30\\\" stroke-width=\\\"6\\\" stroke=\\\"black\\\" />\"\n    )?;\n    writeln!(\n        result,\n        \"<text x=\\\"20\\\" y=\\\"35\\\">WAL within retention period</text>\"\n    )?;\n    writeln!(\n        result,\n        \"<line x1=\\\"5\\\" y1=\\\"50\\\" x2=\\\"15\\\" y2=\\\"50\\\" stroke-width=\\\"3\\\" stroke=\\\"black\\\" />\"\n    )?;\n    writeln!(\n        result,\n        \"<text x=\\\"20\\\" y=\\\"55\\\">WAL retained to avoid copy</text>\"\n    )?;\n    writeln!(\n        result,\n        \"<line x1=\\\"5\\\" y1=\\\"70\\\" x2=\\\"15\\\" y2=\\\"70\\\" stroke-width=\\\"1\\\" stroke=\\\"gray\\\" />\"\n    )?;\n    writeln!(result, \"<text x=\\\"20\\\" y=\\\"75\\\">WAL not retained</text>\")?;\n    writeln!(\n        result,\n        \"<line x1=\\\"10\\\" y1=\\\"85\\\" x2=\\\"10\\\" y2=\\\"95\\\" stroke-width=\\\"3\\\" stroke=\\\"blue\\\" />\"\n    )?;\n    writeln!(result, \"<text x=\\\"20\\\" y=\\\"95\\\">LSN lease</text>\")?;\n    Ok(())\n}\n\npub fn draw_svg(\n    storage: &StorageModel,\n    branches: &[String],\n    seg_to_branch: &[(usize, SvgBranchKind)],\n    sizes: &SizeResult,\n) -> anyhow::Result<String> {\n    let mut draw = SvgDraw {\n        storage,\n        branches,\n        seg_to_branch,\n        sizes: &sizes.segments,\n\n        xscale: 0.0,\n        min_lsn: 0,\n        seg_coordinates: Vec::new(),\n    };\n\n    let mut result = String::new();\n\n    writeln!(\n        result,\n        \"<svg xmlns=\\\"http://www.w3.org/2000/svg\\\" xmlns:xlink=\\\"http://www.w3.org/1999/xlink\\\" height=\\\"300\\\" width=\\\"500\\\">\"\n    )?;\n\n    draw.calculate_svg_layout();\n\n    // Draw the tree\n    for (seg_id, _seg) in storage.segments.iter().enumerate() {\n        draw.draw_seg_phase1(seg_id, &mut result)?;\n    }\n\n    // Draw snapshots\n    for (seg_id, _seg) in storage.segments.iter().enumerate() {\n        draw.draw_seg_phase2(seg_id, &mut result)?;\n    }\n\n    draw_legend(&mut result)?;\n\n    write!(result, \"</svg>\")?;\n\n    Ok(result)\n}\n\nimpl SvgDraw<'_> {\n    fn calculate_svg_layout(&mut self) {\n        // Find x scale\n        let segments = &self.storage.segments;\n        let min_lsn = segments.iter().map(|s| s.lsn).fold(u64::MAX, std::cmp::min);\n        let max_lsn = segments.iter().map(|s| s.lsn).fold(0, std::cmp::max);\n\n        // Start with 1 pixel = 1 byte. Double the scale until it fits into the image\n        let mut xscale = 1.0;\n        while (max_lsn - min_lsn) as f32 / xscale > SVG_WIDTH {\n            xscale *= 2.0;\n        }\n\n        // Layout the timelines on Y dimension.\n        // TODO\n        let mut y = 120.0;\n        let mut branch_y_coordinates = Vec::new();\n        for _branch in self.branches {\n            branch_y_coordinates.push(y);\n            y += 40.0;\n        }\n\n        // Calculate coordinates for each point\n        let seg_coordinates = std::iter::zip(segments, self.seg_to_branch)\n            .map(|(seg, (branch_id, _))| {\n                let x = (seg.lsn - min_lsn) as f32 / xscale;\n                let y = branch_y_coordinates[*branch_id];\n                (x, y)\n            })\n            .collect();\n\n        self.xscale = xscale;\n        self.min_lsn = min_lsn;\n        self.seg_coordinates = seg_coordinates;\n    }\n\n    /// Draws lines between points\n    fn draw_seg_phase1(&self, seg_id: usize, result: &mut String) -> anyhow::Result<()> {\n        let seg = &self.storage.segments[seg_id];\n\n        let wal_bytes = if let Some(parent_id) = seg.parent {\n            seg.lsn - self.storage.segments[parent_id].lsn\n        } else {\n            0\n        };\n\n        let style = match self.sizes[seg_id].method {\n            SegmentMethod::SnapshotHere => \"stroke-width=\\\"1\\\" stroke=\\\"gray\\\"\",\n            SegmentMethod::Wal if seg.needed && wal_bytes > 0 => {\n                \"stroke-width=\\\"6\\\" stroke=\\\"black\\\"\"\n            }\n            SegmentMethod::Wal => \"stroke-width=\\\"3\\\" stroke=\\\"black\\\"\",\n            SegmentMethod::Skipped => \"stroke-width=\\\"1\\\" stroke=\\\"gray\\\"\",\n        };\n        if let Some(parent_id) = seg.parent {\n            let (x1, y1) = self.seg_coordinates[parent_id];\n            let (x2, y2) = self.seg_coordinates[seg_id];\n\n            writeln!(\n                result,\n                \"<line x1=\\\"{x1}\\\" y1=\\\"{y1}\\\" x2=\\\"{x2}\\\" y2=\\\"{y2}\\\" {style}>\",\n            )?;\n            writeln!(\n                result,\n                \"  <title>{wal_bytes} bytes of WAL (seg {seg_id})</title>\"\n            )?;\n            writeln!(result, \"</line>\")?;\n        } else {\n            // draw a little dash to mark the starting point of this branch\n            let (x, y) = self.seg_coordinates[seg_id];\n            let (x1, y1) = (x, y - 5.0);\n            let (x2, y2) = (x, y + 5.0);\n\n            writeln!(\n                result,\n                \"<line x1=\\\"{x1}\\\" y1=\\\"{y1}\\\" x2=\\\"{x2}\\\" y2=\\\"{y2}\\\" {style}>\",\n            )?;\n            writeln!(result, \"  <title>(seg {seg_id})</title>\")?;\n            writeln!(result, \"</line>\")?;\n        }\n\n        Ok(())\n    }\n\n    /// Draw circles where snapshots are taken\n    fn draw_seg_phase2(&self, seg_id: usize, result: &mut String) -> anyhow::Result<()> {\n        let seg = &self.storage.segments[seg_id];\n\n        // draw a snapshot point if it's needed\n        let (coord_x, coord_y) = self.seg_coordinates[seg_id];\n\n        let (_, kind) = &self.seg_to_branch[seg_id];\n        if kind == &SvgBranchKind::Lease {\n            let (x1, y1) = (coord_x, coord_y - 10.0);\n            let (x2, y2) = (coord_x, coord_y + 10.0);\n\n            let style = \"stroke-width=\\\"3\\\" stroke=\\\"blue\\\"\";\n\n            writeln!(\n                result,\n                \"<line x1=\\\"{x1}\\\" y1=\\\"{y1}\\\" x2=\\\"{x2}\\\" y2=\\\"{y2}\\\" {style}>\",\n            )?;\n            writeln!(result, \"  <title>leased lsn at {}</title>\", seg.lsn)?;\n            writeln!(result, \"</line>\")?;\n        }\n\n        if self.sizes[seg_id].method == SegmentMethod::SnapshotHere {\n            writeln!(\n                result,\n                \"<circle cx=\\\"{coord_x}\\\" cy=\\\"{coord_y}\\\" r=\\\"5\\\" stroke=\\\"red\\\">\",\n            )?;\n            writeln!(\n                result,\n                \"  <title>logical size {}</title>\",\n                seg.size.unwrap()\n            )?;\n            write!(result, \"</circle>\")?;\n        }\n\n        Ok(())\n    }\n}\n"
  },
  {
    "path": "libs/tenant_size_model/tests/tests.rs",
    "content": "//! Tenant size model tests.\n\nuse tenant_size_model::{Segment, SizeResult, StorageModel};\n\nuse std::collections::HashMap;\n\nstruct ScenarioBuilder {\n    segments: Vec<Segment>,\n\n    /// Mapping from the branch name to the index of a segment describing its latest state.\n    branches: HashMap<String, usize>,\n}\n\nimpl ScenarioBuilder {\n    /// Creates a new storage with the given default branch name.\n    pub fn new(initial_branch: &str) -> ScenarioBuilder {\n        let init_segment = Segment {\n            parent: None,\n            lsn: 0,\n            size: Some(0),\n            needed: false, // determined later\n        };\n\n        ScenarioBuilder {\n            segments: vec![init_segment],\n            branches: HashMap::from([(initial_branch.into(), 0)]),\n        }\n    }\n\n    /// Advances the branch with the named operation, by the relative LSN and logical size bytes.\n    pub fn modify_branch(&mut self, branch: &str, lsn_bytes: u64, size_bytes: i64) {\n        let lastseg_id = *self.branches.get(branch).unwrap();\n        let newseg_id = self.segments.len();\n        let lastseg = &mut self.segments[lastseg_id];\n\n        let newseg = Segment {\n            parent: Some(lastseg_id),\n            lsn: lastseg.lsn + lsn_bytes,\n            size: Some((lastseg.size.unwrap() as i64 + size_bytes) as u64),\n            needed: false,\n        };\n\n        self.segments.push(newseg);\n        *self.branches.get_mut(branch).expect(\"read already\") = newseg_id;\n    }\n\n    pub fn insert(&mut self, branch: &str, bytes: u64) {\n        self.modify_branch(branch, bytes, bytes as i64);\n    }\n\n    pub fn update(&mut self, branch: &str, bytes: u64) {\n        self.modify_branch(branch, bytes, 0i64);\n    }\n\n    pub fn _delete(&mut self, branch: &str, bytes: u64) {\n        self.modify_branch(branch, bytes, -(bytes as i64));\n    }\n\n    /// Panics if the parent branch cannot be found.\n    pub fn branch(&mut self, parent: &str, name: &str) {\n        // Find the right segment\n        let branchseg_id = *self\n            .branches\n            .get(parent)\n            .expect(\"should had found the parent by key\");\n        let _branchseg = &mut self.segments[branchseg_id];\n\n        // Create branch name for it\n        self.branches.insert(name.to_string(), branchseg_id);\n    }\n\n    pub fn calculate(&mut self, retention_period: u64) -> (StorageModel, SizeResult) {\n        // Phase 1: Mark all the segments that need to be retained\n        for (_branch, &last_seg_id) in self.branches.iter() {\n            let last_seg = &self.segments[last_seg_id];\n            let cutoff_lsn = last_seg.lsn.saturating_sub(retention_period);\n            let mut seg_id = last_seg_id;\n            loop {\n                let seg = &mut self.segments[seg_id];\n                if seg.lsn <= cutoff_lsn {\n                    break;\n                }\n                seg.needed = true;\n                if let Some(prev_seg_id) = seg.parent {\n                    seg_id = prev_seg_id;\n                } else {\n                    break;\n                }\n            }\n        }\n\n        // Perform the calculation\n        let storage_model = StorageModel {\n            segments: self.segments.clone(),\n        };\n        let size_result = storage_model.calculate();\n        (storage_model, size_result)\n    }\n}\n\n// Main branch only. Some updates on it.\n#[test]\nfn scenario_1() {\n    // Create main branch\n    let mut scenario = ScenarioBuilder::new(\"main\");\n\n    // Bulk load 5 GB of data to it\n    scenario.insert(\"main\", 5_000);\n\n    // Stream of updates\n    for _ in 0..5 {\n        scenario.update(\"main\", 1_000);\n    }\n\n    // Calculate the synthetic size with retention horizon 1000\n    let (_model, result) = scenario.calculate(1000);\n\n    // The end of the branch is at LSN 10000. Need to retain\n    // a logical snapshot at LSN 9000, plus the WAL between 9000-10000.\n    // The logical snapshot has size 5000.\n    assert_eq!(result.total_size, 5000 + 1000);\n}\n\n// Main branch only. Some updates on it.\n#[test]\nfn scenario_2() {\n    // Create main branch\n    let mut scenario = ScenarioBuilder::new(\"main\");\n\n    // Bulk load 5 GB of data to it\n    scenario.insert(\"main\", 5_000);\n\n    // Stream of updates\n    for _ in 0..5 {\n        scenario.update(\"main\", 1_000);\n    }\n\n    // Branch\n    scenario.branch(\"main\", \"child\");\n    scenario.update(\"child\", 1_000);\n\n    // More updates on parent\n    scenario.update(\"main\", 1_000);\n\n    //\n    // The history looks like this now:\n    //\n    //         10000          11000\n    // *----*----*--------------*    main\n    //           |\n    //           |            11000\n    //           +--------------     child\n    //\n    //\n    // With retention horizon 1000, we need to retain logical snapshot\n    // at the branch point, size 5000, and the WAL from 10000-11000 on\n    // both branches.\n    let (_model, result) = scenario.calculate(1000);\n\n    assert_eq!(result.total_size, 5000 + 1000 + 1000);\n}\n\n// Like 2, but more updates on main\n#[test]\nfn scenario_3() {\n    // Create main branch\n    let mut scenario = ScenarioBuilder::new(\"main\");\n\n    // Bulk load 5 GB of data to it\n    scenario.insert(\"main\", 5_000);\n\n    // Stream of updates\n    for _ in 0..5 {\n        scenario.update(\"main\", 1_000);\n    }\n\n    // Branch\n    scenario.branch(\"main\", \"child\");\n    scenario.update(\"child\", 1_000);\n\n    // More updates on parent\n    for _ in 0..5 {\n        scenario.update(\"main\", 1_000);\n    }\n\n    //\n    // The history looks like this now:\n    //\n    //         10000                                 15000\n    // *----*----*------------------------------------*    main\n    //           |\n    //           |            11000\n    //           +--------------     child\n    //\n    //\n    // With retention horizon 1000, it's still cheapest to retain\n    // - snapshot at branch point (size 5000)\n    // - WAL on child between 10000-11000\n    // - WAL on main between 10000-15000\n    //\n    // This is in total 5000 + 1000 + 5000\n    //\n    let (_model, result) = scenario.calculate(1000);\n\n    assert_eq!(result.total_size, 5000 + 1000 + 5000);\n}\n\n// Diverged branches\n#[test]\nfn scenario_4() {\n    // Create main branch\n    let mut scenario = ScenarioBuilder::new(\"main\");\n\n    // Bulk load 5 GB of data to it\n    scenario.insert(\"main\", 5_000);\n\n    // Stream of updates\n    for _ in 0..5 {\n        scenario.update(\"main\", 1_000);\n    }\n\n    // Branch\n    scenario.branch(\"main\", \"child\");\n    scenario.update(\"child\", 1_000);\n\n    // More updates on parent\n    for _ in 0..8 {\n        scenario.update(\"main\", 1_000);\n    }\n\n    //\n    // The history looks like this now:\n    //\n    //         10000                                 18000\n    // *----*----*------------------------------------*    main\n    //           |\n    //           |            11000\n    //           +--------------     child\n    //\n    //\n    // With retention horizon 1000, it's now cheapest to retain\n    // separate snapshots on both branches:\n    // - snapshot on main branch at LSN 17000 (size 5000)\n    // - WAL on main between 17000-18000\n    // - snapshot on child branch at LSN 10000 (size 5000)\n    // - WAL on child between 10000-11000\n    //\n    // This is in total 5000 + 1000 + 5000 + 1000 = 12000\n    //\n    // (If we used the method from the previous scenario, and\n    // kept only snapshot at the branch point, we'd need to keep\n    // all the WAL between 10000-18000 on the main branch, so\n    // the total size would be 5000 + 1000 + 8000 = 14000. The\n    // calculation always picks the cheapest alternative)\n\n    let (_model, result) = scenario.calculate(1000);\n\n    assert_eq!(result.total_size, 5000 + 1000 + 5000 + 1000);\n}\n\n#[test]\nfn scenario_5() {\n    let mut scenario = ScenarioBuilder::new(\"a\");\n    scenario.insert(\"a\", 5000);\n    scenario.branch(\"a\", \"b\");\n    scenario.update(\"b\", 4000);\n    scenario.update(\"a\", 2000);\n    scenario.branch(\"a\", \"c\");\n    scenario.insert(\"c\", 4000);\n    scenario.insert(\"a\", 2000);\n\n    let (_model, result) = scenario.calculate(1000);\n\n    assert_eq!(result.total_size, 17000);\n}\n\n#[test]\nfn scenario_6() {\n    let branches = [\n        \"7ff1edab8182025f15ae33482edb590a\",\n        \"b1719e044db05401a05a2ed588a3ad3f\",\n        \"0xb68d6691c895ad0a70809470020929ef\",\n    ];\n\n    // compared to other scenarios, this one uses bytes instead of kB\n\n    let mut scenario = ScenarioBuilder::new(\"\");\n\n    scenario.branch(\"\", branches[0]); // at 0\n    scenario.modify_branch(branches[0], 108951064, 43696128); // at 108951064\n    scenario.branch(branches[0], branches[1]); // at 108951064\n    scenario.modify_branch(branches[1], 15560408, -1851392); // at 124511472\n    scenario.modify_branch(branches[0], 174464360, -1531904); // at 283415424\n    scenario.branch(branches[0], branches[2]); // at 283415424\n    scenario.modify_branch(branches[2], 15906192, 8192); // at 299321616\n    scenario.modify_branch(branches[0], 18909976, 32768); // at 302325400\n\n    let (model, result) = scenario.calculate(100_000);\n\n    // FIXME: We previously calculated 333_792_000. But with this PR, we get\n    // a much lower number. At a quick look at the model output and the\n    // calculations here, the new result seems correct to me.\n    eprintln!(\n        \" MODEL: {}\",\n        serde_json::to_string(&model.segments).unwrap()\n    );\n    eprintln!(\n        \"RESULT: {}\",\n        serde_json::to_string(&result.segments).unwrap()\n    );\n\n    assert_eq!(result.total_size, 136_236_928);\n}\n"
  },
  {
    "path": "libs/tracing-utils/Cargo.toml",
    "content": "[package]\nname = \"tracing-utils\"\nversion = \"0.1.0\"\nedition.workspace = true\nlicense.workspace = true\n\n[dependencies]\nhyper0.workspace = true\nopentelemetry = { workspace = true, features = [\"trace\"] }\nopentelemetry_sdk = { workspace = true, features = [\"rt-tokio\"] }\nopentelemetry-otlp = { workspace = true, default-features = false, features = [\"http-proto\", \"trace\", \"http\", \"reqwest-blocking-client\"] }\nopentelemetry-semantic-conventions.workspace = true\ntokio = { workspace = true, features = [\"rt\", \"rt-multi-thread\"] }\ntracing.workspace = true\ntracing-opentelemetry.workspace = true\ntracing-subscriber.workspace = true\npin-project-lite.workspace = true\n\n[dev-dependencies]\ntracing-subscriber.workspace = true    # For examples in docs\n"
  },
  {
    "path": "libs/tracing-utils/src/http.rs",
    "content": "//! Tracing wrapper for Hyper HTTP server\n\nuse std::future::Future;\n\nuse hyper0::{Body, HeaderMap, Request, Response};\nuse tracing::Instrument;\nuse tracing_opentelemetry::OpenTelemetrySpanExt;\n\n/// Configuration option for what to use as the \"otel.name\" field in the traces.\npub enum OtelName<'a> {\n    /// Use a constant string\n    Constant(&'a str),\n\n    /// Use the path from the request.\n    ///\n    /// That's very useful information, but is not appropriate if the\n    /// path contains parameters that differ on ever request, or worse,\n    /// sensitive information like usernames or email addresses.\n    ///\n    /// See <https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/trace/semantic_conventions/http.md#name>\n    UriPath,\n}\n\n/// Handle an incoming HTTP request using the given handler function,\n/// with OpenTelemetry tracing.\n///\n/// This runs 'handler' on the request in a new span, with fields filled in\n/// from the request. Notably, if the request contains tracing information,\n/// it is propagated to the span, so that this request is traced as part of\n/// the same trace.\n///\n/// XXX: Usually, this is handled by existing libraries, or built\n/// directly into HTTP servers. However, I couldn't find one for Hyper,\n/// so I had to write our own. OpenTelemetry website has a registry of\n/// instrumentation libraries at:\n/// <https://opentelemetry.io/registry/?language=rust&component=instrumentation>\n/// If a Hyper crate appears, consider switching to that.\npub async fn tracing_handler<F, R>(\n    req: Request<Body>,\n    handler: F,\n    otel_name: OtelName<'_>,\n) -> Response<Body>\nwhere\n    F: Fn(Request<Body>) -> R,\n    R: Future<Output = Response<Body>>,\n{\n    // Create a tracing span, with context propagated from the incoming\n    // request if any.\n    //\n    // See list of standard fields defined for HTTP requests at\n    // https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/trace/semantic_conventions/http.md\n    // We only fill in a few of the most useful ones here.\n    let otel_name = match otel_name {\n        OtelName::Constant(s) => s,\n        OtelName::UriPath => req.uri().path(),\n    };\n\n    let span = tracing::info_span!(\n        \"http request\",\n        otel.name= %otel_name,\n        http.method = %req.method(),\n        http.status_code = tracing::field::Empty,\n    );\n    let parent_ctx = extract_remote_context(req.headers());\n    span.set_parent(parent_ctx);\n\n    // Handle the request within the span\n    let response = handler(req).instrument(span.clone()).await;\n\n    // Fill in the fields from the response code\n    let status = response.status();\n    span.record(\"http.status_code\", status.as_str());\n    span.record(\n        \"otel.status_code\",\n        if status.is_success() { \"OK\" } else { \"ERROR\" },\n    );\n\n    response\n}\n\n// Extract remote tracing context from the HTTP headers\nfn extract_remote_context(headers: &HeaderMap) -> opentelemetry::Context {\n    struct HeaderExtractor<'a>(&'a HeaderMap);\n\n    impl opentelemetry::propagation::Extractor for HeaderExtractor<'_> {\n        fn get(&self, key: &str) -> Option<&str> {\n            self.0.get(key).and_then(|value| value.to_str().ok())\n        }\n\n        fn keys(&self) -> Vec<&str> {\n            self.0.keys().map(|value| value.as_str()).collect()\n        }\n    }\n    let extractor = HeaderExtractor(headers);\n    opentelemetry::global::get_text_map_propagator(|propagator| propagator.extract(&extractor))\n}\n"
  },
  {
    "path": "libs/tracing-utils/src/lib.rs",
    "content": "//! Helper functions to set up OpenTelemetry tracing.\n//!\n//! Example:\n//!\n//! ```rust,no_run\n//! use tracing_subscriber::prelude::*;\n//!\n//! #[tokio::main]\n//! async fn main() {\n//!     // Set up logging to stderr\n//!     let env_filter = tracing_subscriber::EnvFilter::try_from_default_env()\n//!         .unwrap_or_else(|_| tracing_subscriber::EnvFilter::new(\"info\"));\n//!     let fmt_layer = tracing_subscriber::fmt::layer()\n//!         .with_target(false)\n//!         .with_writer(std::io::stderr);\n//!\n//!     // Initialize OpenTelemetry. Exports tracing spans as OpenTelemetry traces\n//!     let provider = tracing_utils::init_tracing(\"my_application\", tracing_utils::ExportConfig::default());\n//!     let otlp_layer = provider.as_ref().map(tracing_utils::layer);\n//!\n//!     // Put it all together\n//!     tracing_subscriber::registry()\n//!         .with(env_filter)\n//!         .with(otlp_layer)\n//!         .with(fmt_layer)\n//!         .init();\n//! }\n//! ```\n#![deny(clippy::undocumented_unsafe_blocks)]\n\npub mod http;\npub mod perf_span;\n\nuse opentelemetry::trace::TracerProvider;\nuse opentelemetry_otlp::WithExportConfig;\npub use opentelemetry_otlp::{ExportConfig, Protocol};\nuse opentelemetry_sdk::trace::SdkTracerProvider;\nuse tracing::level_filters::LevelFilter;\nuse tracing::{Dispatch, Subscriber};\nuse tracing_subscriber::Layer;\nuse tracing_subscriber::layer::SubscriberExt;\nuse tracing_subscriber::registry::LookupSpan;\n\npub type Provider = SdkTracerProvider;\n\n/// Set up OpenTelemetry exporter, using configuration from environment variables.\n///\n/// `service_name` is set as the OpenTelemetry 'service.name' resource (see\n/// <https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/resource/semantic_conventions/README.md#service>)\n///\n/// We try to follow the conventions for the environment variables specified in\n/// <https://opentelemetry.io/docs/reference/specification/sdk-environment-variables/>\n///\n/// However, we only support a subset of those options:\n///\n/// - OTEL_SDK_DISABLED is supported. The default is \"false\", meaning tracing\n///   is enabled by default. Set it to \"true\" to disable.\n///\n/// - We use the OTLP exporter, with HTTP protocol. Most of the OTEL_EXPORTER_OTLP_*\n///   settings specified in\n///   <https://opentelemetry.io/docs/reference/specification/protocol/exporter/>\n///   are supported, as they are handled by the `opentelemetry-otlp` crate.\n///   Settings related to other exporters have no effect.\n///\n/// - Some other settings are supported by the `opentelemetry` crate.\n///\n/// If you need some other setting, please test if it works first. And perhaps\n/// add a comment in the list above to save the effort of testing for the next\n/// person.\npub fn init_tracing(service_name: &str, export_config: ExportConfig) -> Option<Provider> {\n    if std::env::var(\"OTEL_SDK_DISABLED\") == Ok(\"true\".to_string()) {\n        return None;\n    };\n    Some(init_tracing_internal(\n        service_name.to_string(),\n        export_config,\n    ))\n}\n\npub fn layer<S>(p: &Provider) -> impl Layer<S>\nwhere\n    S: Subscriber + for<'span> LookupSpan<'span>,\n{\n    tracing_opentelemetry::layer().with_tracer(p.tracer(\"global\"))\n}\n\nfn init_tracing_internal(service_name: String, export_config: ExportConfig) -> Provider {\n    // Sets up exporter from the provided [`ExportConfig`] parameter.\n    // If the endpoint is not specified, it is loaded from the\n    // OTEL_EXPORTER_OTLP_ENDPOINT environment variable.\n    let exporter = opentelemetry_otlp::SpanExporter::builder()\n        .with_http()\n        .with_export_config(export_config)\n        .build()\n        .expect(\"could not initialize opentelemetry exporter\");\n\n    // TODO: opentelemetry::global::set_error_handler() with custom handler that\n    //       bypasses default tracing layers, but logs regular looking log\n    //       messages.\n\n    // Propagate trace information in the standard W3C TraceContext format.\n    opentelemetry::global::set_text_map_propagator(\n        opentelemetry_sdk::propagation::TraceContextPropagator::new(),\n    );\n\n    Provider::builder()\n        .with_batch_exporter(exporter)\n        .with_resource(\n            opentelemetry_sdk::Resource::builder()\n                .with_service_name(service_name)\n                .build(),\n        )\n        .build()\n}\n\npub enum OtelEnablement {\n    Disabled,\n    Enabled {\n        service_name: String,\n        export_config: ExportConfig,\n    },\n}\n\npub struct OtelGuard {\n    provider: Provider,\n    pub dispatch: Dispatch,\n}\n\nimpl Drop for OtelGuard {\n    fn drop(&mut self) {\n        _ = self.provider.shutdown();\n    }\n}\n\n/// Initializes OTEL infrastructure for performance tracing according to the provided configuration\n///\n/// Performance tracing is handled by a different [`tracing::Subscriber`]. This functions returns\n/// an [`OtelGuard`] containing a [`tracing::Dispatch`] associated with a newly created subscriber.\n/// Applications should use this dispatch for their performance traces.\n///\n/// The lifetime of the guard should match taht of the application. On drop, it tears down the\n/// OTEL infra.\npub fn init_performance_tracing(otel_enablement: OtelEnablement) -> Option<OtelGuard> {\n    match otel_enablement {\n        OtelEnablement::Disabled => None,\n        OtelEnablement::Enabled {\n            service_name,\n            export_config,\n        } => {\n            let provider = init_tracing(&service_name, export_config)?;\n\n            let otel_layer = layer(&provider).with_filter(LevelFilter::INFO);\n            let otel_subscriber = tracing_subscriber::registry().with(otel_layer);\n            let dispatch = Dispatch::new(otel_subscriber);\n\n            Some(OtelGuard { dispatch, provider })\n        }\n    }\n}\n"
  },
  {
    "path": "libs/tracing-utils/src/perf_span.rs",
    "content": "//! Crutch module to work around tracing infrastructure deficiencies\n//!\n//! We wish to collect granular request spans without impacting performance\n//! by much. Ideally, we should have zero overhead for a sampling rate of 0.\n//!\n//! The approach taken by the pageserver crate is to use a completely different\n//! span hierarchy for the performance spans. Spans are explicitly stored in\n//! the request context and use a different [`tracing::Subscriber`] in order\n//! to avoid expensive filtering.\n//!\n//! [`tracing::Span`] instances record their [`tracing::Dispatch`] and, implcitly,\n//! their [`tracing::Subscriber`] at creation time. However, upon exiting the span,\n//! the global default [`tracing::Dispatch`] is used. This is problematic if one\n//! wishes to juggle different subscribers.\n//!\n//! In order to work around this, this module provides a [`PerfSpan`] type which\n//! wraps a [`Span`] and sets the default subscriber when exiting the span. This\n//! achieves the correct routing.\n//!\n//! There's also a modified version of [`tracing::Instrument`] which works with\n//! [`PerfSpan`].\n\nuse core::{\n    future::Future,\n    marker::Sized,\n    mem::ManuallyDrop,\n    pin::Pin,\n    task::{Context, Poll},\n};\nuse pin_project_lite::pin_project;\nuse tracing::{Dispatch, span::Span};\n\n#[derive(Debug, Clone)]\npub struct PerfSpan {\n    inner: ManuallyDrop<Span>,\n    dispatch: Dispatch,\n}\n\n#[must_use = \"once a span has been entered, it should be exited\"]\npub struct PerfSpanEntered<'a> {\n    span: &'a PerfSpan,\n}\n\nimpl PerfSpan {\n    pub fn new(span: Span, dispatch: Dispatch) -> Self {\n        Self {\n            inner: ManuallyDrop::new(span),\n            dispatch,\n        }\n    }\n\n    pub fn enter(&self) -> PerfSpanEntered<'_> {\n        if let Some(ref id) = self.inner.id() {\n            self.dispatch.enter(id);\n        }\n\n        PerfSpanEntered { span: self }\n    }\n\n    pub fn inner(&self) -> &Span {\n        &self.inner\n    }\n}\n\nimpl Drop for PerfSpan {\n    fn drop(&mut self) {\n        // Bring the desired dispatch into scope before explicitly calling\n        // the span destructor. This routes the span exit to the correct\n        // [`tracing::Subscriber`].\n        let _dispatch_guard = tracing::dispatcher::set_default(&self.dispatch);\n        // SAFETY: ManuallyDrop in Drop implementation\n        unsafe { ManuallyDrop::drop(&mut self.inner) }\n    }\n}\n\nimpl Drop for PerfSpanEntered<'_> {\n    fn drop(&mut self) {\n        assert!(self.span.inner.id().is_some());\n\n        let _dispatch_guard = tracing::dispatcher::set_default(&self.span.dispatch);\n        self.span.dispatch.exit(&self.span.inner.id().unwrap());\n    }\n}\n\npub trait PerfInstrument: Sized {\n    fn instrument(self, span: PerfSpan) -> PerfInstrumented<Self> {\n        PerfInstrumented {\n            inner: ManuallyDrop::new(self),\n            span,\n        }\n    }\n}\n\npin_project! {\n    #[project = PerfInstrumentedProj]\n    #[derive(Debug, Clone)]\n    #[must_use = \"futures do nothing unless you `.await` or poll them\"]\n    pub struct PerfInstrumented<T> {\n        // `ManuallyDrop` is used here to to enter instrument `Drop` by entering\n        // `Span` and executing `ManuallyDrop::drop`.\n        #[pin]\n        inner: ManuallyDrop<T>,\n        span: PerfSpan,\n    }\n\n    impl<T> PinnedDrop for PerfInstrumented<T> {\n        fn drop(this: Pin<&mut Self>) {\n            let this = this.project();\n            let _enter = this.span.enter();\n            // SAFETY: 1. `Pin::get_unchecked_mut()` is safe, because this isn't\n            //             different from wrapping `T` in `Option` and calling\n            //             `Pin::set(&mut this.inner, None)`, except avoiding\n            //             additional memory overhead.\n            //         2. `ManuallyDrop::drop()` is safe, because\n            //            `PinnedDrop::drop()` is guaranteed to be called only\n            //            once.\n            unsafe { ManuallyDrop::drop(this.inner.get_unchecked_mut()) }\n        }\n    }\n}\n\nimpl<'a, T> PerfInstrumentedProj<'a, T> {\n    /// Get a mutable reference to the [`Span`] a pinned mutable reference to\n    /// the wrapped type.\n    fn span_and_inner_pin_mut(self) -> (&'a mut PerfSpan, Pin<&'a mut T>) {\n        // SAFETY: As long as `ManuallyDrop<T>` does not move, `T` won't move\n        //         and `inner` is valid, because `ManuallyDrop::drop` is called\n        //         only inside `Drop` of the `Instrumented`.\n        let inner = unsafe { self.inner.map_unchecked_mut(|v| &mut **v) };\n        (self.span, inner)\n    }\n}\n\nimpl<T: Future> Future for PerfInstrumented<T> {\n    type Output = T::Output;\n\n    fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {\n        let (span, inner) = self.project().span_and_inner_pin_mut();\n        let _enter = span.enter();\n        inner.poll(cx)\n    }\n}\n\nimpl<T: Sized> PerfInstrument for T {}\n"
  },
  {
    "path": "libs/utils/Cargo.toml",
    "content": "[package]\nname = \"utils\"\nversion = \"0.1.0\"\nedition.workspace = true\nlicense.workspace = true\n\n[features]\ndefault = [\"rename_noreplace\"]\nrename_noreplace = []\n# Enables test-only APIs, incuding failpoints. In particular, enables the `fail_point!` macro,\n# which adds some runtime cost to run tests on outage conditions\ntesting = [\"fail/failpoints\"]\n\n[dependencies]\narc-swap.workspace = true\nsentry.workspace = true\nasync-compression.workspace = true\nanyhow.workspace = true\nbincode.workspace = true\nbytes.workspace = true\ncamino.workspace = true\nchrono.workspace = true\ndiatomic-waker.workspace = true\ngit-version.workspace = true\nhex = { workspace = true, features = [\"serde\"] }\nhumantime.workspace = true\nfail.workspace = true\nfutures = { workspace = true }\njsonwebtoken.workspace = true\nnix = { workspace = true, features = [\"ioctl\"] }\nonce_cell.workspace = true\npem.workspace = true\npin-project-lite.workspace = true\nregex.workspace = true\nserde.workspace = true\nserde_with.workspace = true\nserde_json.workspace = true\nsignal-hook.workspace = true\nthiserror.workspace = true\ntokio = { workspace = true, features = [\"signal\"] }\ntokio-tar.workspace = true\ntokio-util.workspace = true\ntoml_edit = { workspace = true, features = [\"serde\"] }\ntracing.workspace = true\ntracing-error.workspace = true\ntracing-subscriber = { workspace = true, features = [\"json\", \"registry\"] }\ntracing-utils.workspace = true\nrand.workspace = true\nscopeguard.workspace = true\nuuid.workspace = true\nstrum.workspace = true\nstrum_macros.workspace = true\nwalkdir.workspace = true\n\npq_proto.workspace = true\npostgres_connection.workspace = true\nmetrics.workspace = true\n\nconst_format.workspace = true\n\n[dev-dependencies]\nbyteorder.workspace = true\nbytes.workspace = true\ncriterion.workspace = true\nhex-literal.workspace = true\ncamino-tempfile.workspace = true\npprof.workspace = true\nserde_assert.workspace = true\ntokio = { workspace = true, features = [\"test-util\"] }\n\n[[bench]]\nname = \"benchmarks\"\nharness = false\n"
  },
  {
    "path": "libs/utils/benches/README.md",
    "content": "## Utils Benchmarks\n\nTo run benchmarks:\n\n```sh\n# All benchmarks.\ncargo bench --package utils\n\n# Specific file.\ncargo bench --package utils --bench benchmarks\n\n# Specific benchmark.\ncargo bench --package utils --bench benchmarks log_slow/enabled=true\n\n# List available benchmarks.\ncargo bench --package utils --benches -- --list\n\n# Generate flamegraph profiles using pprof-rs, profiling for 10 seconds.\n# Output in target/criterion/*/profile/flamegraph.svg.\ncargo bench --package utils --bench benchmarks log_slow/enabled=true --profile-time 10\n```\n\nAdditional charts and statistics are available in `target/criterion/report/index.html`.\n\nBenchmarks are automatically compared against the previous run. To compare against other runs, see\n`--baseline` and `--save-baseline`."
  },
  {
    "path": "libs/utils/benches/benchmarks.rs",
    "content": "use std::time::Duration;\n\nuse criterion::{Bencher, Criterion, criterion_group, criterion_main};\nuse pprof::criterion::{Output, PProfProfiler};\nuse utils::id;\nuse utils::logging::log_slow;\n\n// Register benchmarks with Criterion.\ncriterion_group!(\n    name = benches;\n    config = Criterion::default().with_profiler(PProfProfiler::new(100, Output::Flamegraph(None)));\n    targets = bench_id_stringify,\n    bench_log_slow,\n);\ncriterion_main!(benches);\n\npub fn bench_id_stringify(c: &mut Criterion) {\n    // Can only use public methods.\n    let ttid = id::TenantTimelineId::generate();\n\n    c.bench_function(\"id.to_string\", |b| {\n        b.iter(|| {\n            // FIXME measurement overhead?\n            //for _ in 0..1000 {\n            //    ttid.tenant_id.to_string();\n            //}\n            ttid.tenant_id.to_string();\n        })\n    });\n}\n\npub fn bench_log_slow(c: &mut Criterion) {\n    for enabled in [false, true] {\n        c.bench_function(&format!(\"log_slow/enabled={enabled}\"), |b| {\n            run_bench(b, enabled).unwrap()\n        });\n    }\n\n    // The actual benchmark.\n    fn run_bench(b: &mut Bencher, enabled: bool) -> anyhow::Result<()> {\n        const THRESHOLD: Duration = Duration::from_secs(1);\n\n        // Use a multi-threaded runtime to avoid thread parking overhead when yielding.\n        let runtime = tokio::runtime::Builder::new_multi_thread()\n            .enable_all()\n            .build()?;\n\n        // Test both with and without log_slow, since we're essentially measuring Tokio scheduling\n        // performance too. Use a simple noop future that yields once, to avoid any scheduler fast\n        // paths for a ready future.\n        if enabled {\n            b.iter(|| {\n                runtime.block_on(log_slow(\n                    \"ready\",\n                    THRESHOLD,\n                    std::pin::pin!(tokio::task::yield_now()),\n                ))\n            });\n        } else {\n            b.iter(|| runtime.block_on(tokio::task::yield_now()));\n        }\n\n        Ok(())\n    }\n}\n"
  },
  {
    "path": "libs/utils/scripts/restore_from_wal.sh",
    "content": "#!/usr/bin/env bash\n\nset -euxo pipefail\n\nPG_BIN=$1\nWAL_PATH=$2\nDATA_DIR=$3\nPORT=$4\nPG_VERSION=$5\nSYSID=$(od -A n -j 24 -N 8 -t d8 \"$WAL_PATH\"/000000010000000000000002* | cut -c 3-)\n\n# The way that initdb is invoked must match how the pageserver runs initdb.\nfunction initdb_with_args {\n    local cmd=(\n        \"$PG_BIN\"/initdb\n        -E utf8\n        -U cloud_admin\n        -D \"$DATA_DIR\"\n        --locale 'C.UTF-8'\n        --lc-collate 'C.UTF-8'\n        --lc-ctype 'C.UTF-8'\n        --lc-messages 'C.UTF-8'\n        --lc-monetary 'C.UTF-8'\n        --lc-numeric 'C.UTF-8'\n        --lc-time 'C.UTF-8'\n        --sysid=\"$SYSID\"\n    )\n\n    case \"$PG_VERSION\" in\n        14)\n            # Postgres 14 and below didn't support --locale-provider\n            ;;\n        15 | 16)\n            cmd+=(--locale-provider 'libc')\n            ;;\n        *)\n            # Postgres 17 added the builtin provider\n            cmd+=(--locale-provider 'builtin')\n            ;;\n    esac\n\n    eval env -i LD_LIBRARY_PATH=\"$PG_BIN\"/../lib ASAN_OPTIONS=\"${ASAN_OPTIONS-}\" UBSAN_OPTIONS=\"${UBSAN_OPTIONS-}\" \"${cmd[*]}\"\n}\n\nrm -fr \"$DATA_DIR\"\ninitdb_with_args\necho \"port=$PORT\" >> \"$DATA_DIR\"/postgresql.conf\necho \"shared_preload_libraries='\\$libdir/neon_rmgr.so'\" >> \"$DATA_DIR\"/postgresql.conf\nREDO_POS=0x$(\"$PG_BIN\"/pg_controldata -D \"$DATA_DIR\" | grep -F \"REDO location\"| cut -c 42-)\ndeclare -i WAL_SIZE=$REDO_POS+114\n\"$PG_BIN\"/pg_ctl -D \"$DATA_DIR\" -l \"$DATA_DIR/logfile.log\" start\n\"$PG_BIN\"/pg_ctl -D \"$DATA_DIR\" -l \"$DATA_DIR/logfile.log\" stop -m immediate\ncp \"$DATA_DIR\"/pg_wal/000000010000000000000001 \"$DATA_DIR\"\ncp \"$WAL_PATH\"/* \"$DATA_DIR\"/pg_wal/\nfor partial in \"$DATA_DIR\"/pg_wal/*.partial ; do mv \"$partial\" \"${partial%.partial}\" ; done\ndd if=\"$DATA_DIR\"/000000010000000000000001 of=\"$DATA_DIR\"/pg_wal/000000010000000000000001 bs=$WAL_SIZE count=1 conv=notrunc\nrm -f \"$DATA_DIR\"/000000010000000000000001\n"
  },
  {
    "path": "libs/utils/scripts/restore_from_wal_initdb.sh",
    "content": "#!/bin/bash\n\n# like restore_from_wal.sh, but takes existing initdb.tar.zst\n\nset -euxo pipefail\n\nPG_BIN=$1\nWAL_PATH=$2\nDATA_DIR=$3\nPORT=$4\necho \"port=$PORT\" >> \"$DATA_DIR\"/postgresql.conf\necho \"shared_preload_libraries='\\$libdir/neon_rmgr.so'\" >> \"$DATA_DIR\"/postgresql.conf\nREDO_POS=0x$(\"$PG_BIN\"/pg_controldata -D \"$DATA_DIR\" | grep -F \"REDO location\"| cut -c 42-)\ndeclare -i WAL_SIZE=$REDO_POS+114\n\"$PG_BIN\"/pg_ctl -D \"$DATA_DIR\" -l \"$DATA_DIR/logfile.log\" start\n\"$PG_BIN\"/pg_ctl -D \"$DATA_DIR\" -l \"$DATA_DIR/logfile.log\" stop -m immediate\ncp \"$DATA_DIR\"/pg_wal/000000010000000000000001 \"$DATA_DIR\"\ncp \"$WAL_PATH\"/* \"$DATA_DIR\"/pg_wal/\nfor partial in \"$DATA_DIR\"/pg_wal/*.partial ; do mv \"$partial\" \"${partial%.partial}\" ; done\ndd if=\"$DATA_DIR\"/000000010000000000000001 of=\"$DATA_DIR\"/pg_wal/000000010000000000000001 bs=$WAL_SIZE count=1 conv=notrunc\nrm -f \"$DATA_DIR\"/000000010000000000000001\n"
  },
  {
    "path": "libs/utils/src/auth.rs",
    "content": "// For details about authentication see docs/authentication.md\n\nuse std::borrow::Cow;\nuse std::fmt::Display;\nuse std::fs;\nuse std::sync::Arc;\n\nuse anyhow::Result;\nuse arc_swap::ArcSwap;\nuse camino::Utf8Path;\nuse jsonwebtoken::{\n    Algorithm, DecodingKey, EncodingKey, Header, TokenData, Validation, decode, encode,\n};\nuse pem::Pem;\nuse serde::{Deserialize, Deserializer, Serialize, de::DeserializeOwned};\nuse uuid::Uuid;\n\nuse crate::id::TenantId;\n\n/// Algorithm to use. We require EdDSA.\nconst STORAGE_TOKEN_ALGORITHM: Algorithm = Algorithm::EdDSA;\n\n#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]\n#[serde(rename_all = \"lowercase\")]\npub enum Scope {\n    /// Provides access to all data for a specific tenant (specified in `struct Claims` below)\n    // TODO: join these two?\n    Tenant,\n    /// Provides access to all data for a specific tenant, but based on endpoint ID. This token scope\n    /// is only used by compute to fetch the spec for a specific endpoint. The spec contains a Tenant-scoped\n    /// token authorizing access to all data of a tenant, so the spec-fetch API requires a TenantEndpoint\n    /// scope token to ensure that untrusted compute nodes can't fetch spec for arbitrary endpoints.\n    TenantEndpoint,\n    /// Provides blanket access to all tenants on the pageserver plus pageserver-wide APIs.\n    /// Should only be used e.g. for status check/tenant creation/list.\n    PageServerApi,\n    /// Provides blanket access to all data on the safekeeper plus safekeeper-wide APIs.\n    /// Should only be used e.g. for status check.\n    /// Currently also used for connection from any pageserver to any safekeeper.\n    SafekeeperData,\n    /// The scope used by pageservers in upcalls to storage controller and cloud control plane\n    #[serde(rename = \"generations_api\")]\n    GenerationsApi,\n    /// Allows access to control plane managment API and all storage controller endpoints.\n    Admin,\n\n    /// Allows access to control plane & storage controller endpoints used in infrastructure automation (e.g. node registration)\n    Infra,\n\n    /// Allows access to storage controller APIs used by the scrubber, to interrogate the state\n    /// of a tenant & post scrub results.\n    Scrubber,\n\n    /// This scope is used for communication with other storage controller instances.\n    /// At the time of writing, this is only used for the step down request.\n    #[serde(rename = \"controller_peer\")]\n    ControllerPeer,\n}\n\nfn deserialize_empty_string_as_none_uuid<'de, D>(deserializer: D) -> Result<Option<Uuid>, D::Error>\nwhere\n    D: Deserializer<'de>,\n{\n    let opt = Option::<String>::deserialize(deserializer)?;\n    match opt.as_deref() {\n        Some(\"\") => Ok(None),\n        Some(s) => Uuid::parse_str(s)\n            .map(Some)\n            .map_err(serde::de::Error::custom),\n        None => Ok(None),\n    }\n}\n\n/// JWT payload. See docs/authentication.md for the format\n#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]\npub struct Claims {\n    #[serde(default)]\n    pub tenant_id: Option<TenantId>,\n    #[serde(\n        default,\n        skip_serializing_if = \"Option::is_none\",\n        // Neon control plane includes this field as empty in the claims.\n        // Consider it None in those cases.\n        deserialize_with = \"deserialize_empty_string_as_none_uuid\"\n    )]\n    pub endpoint_id: Option<Uuid>,\n    pub scope: Scope,\n}\n\nimpl Claims {\n    pub fn new(tenant_id: Option<TenantId>, scope: Scope) -> Self {\n        Self {\n            tenant_id,\n            scope,\n            endpoint_id: None,\n        }\n    }\n}\n\npub struct SwappableJwtAuth(ArcSwap<JwtAuth>);\n\nimpl SwappableJwtAuth {\n    pub fn new(jwt_auth: JwtAuth) -> Self {\n        SwappableJwtAuth(ArcSwap::new(Arc::new(jwt_auth)))\n    }\n    pub fn swap(&self, jwt_auth: JwtAuth) {\n        self.0.swap(Arc::new(jwt_auth));\n    }\n    pub fn decode<D: DeserializeOwned>(\n        &self,\n        token: &str,\n    ) -> std::result::Result<TokenData<D>, AuthError> {\n        self.0.load().decode(token)\n    }\n}\n\nimpl std::fmt::Debug for SwappableJwtAuth {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        write!(f, \"Swappable({:?})\", self.0.load())\n    }\n}\n\n#[derive(Clone, PartialEq, Eq, Hash, Debug)]\npub struct AuthError(pub Cow<'static, str>);\n\nimpl Display for AuthError {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        write!(f, \"{}\", self.0)\n    }\n}\n\npub struct JwtAuth {\n    decoding_keys: Vec<DecodingKey>,\n    validation: Validation,\n}\n\nimpl JwtAuth {\n    pub fn new(decoding_keys: Vec<DecodingKey>) -> Self {\n        let mut validation = Validation::default();\n        validation.algorithms = vec![STORAGE_TOKEN_ALGORITHM];\n        // The default 'required_spec_claims' is 'exp'. But we don't want to require\n        // expiration.\n        validation.required_spec_claims = [].into();\n        Self {\n            decoding_keys,\n            validation,\n        }\n    }\n\n    pub fn from_key_path(key_path: &Utf8Path) -> Result<Self> {\n        let metadata = key_path.metadata()?;\n        let decoding_keys = if metadata.is_dir() {\n            let mut keys = Vec::new();\n            for entry in fs::read_dir(key_path)? {\n                let path = entry?.path();\n                if !path.is_file() {\n                    // Ignore directories (don't recurse)\n                    continue;\n                }\n                let public_key = fs::read(path)?;\n                keys.push(DecodingKey::from_ed_pem(&public_key)?);\n            }\n            keys\n        } else if metadata.is_file() {\n            let public_key = fs::read(key_path)?;\n            vec![DecodingKey::from_ed_pem(&public_key)?]\n        } else {\n            anyhow::bail!(\"path is neither a directory or a file\")\n        };\n        if decoding_keys.is_empty() {\n            anyhow::bail!(\n                \"Configured for JWT auth with zero decoding keys. All JWT gated requests would be rejected.\"\n            );\n        }\n        Ok(Self::new(decoding_keys))\n    }\n\n    pub fn from_key(key: String) -> Result<Self> {\n        Ok(Self::new(vec![DecodingKey::from_ed_pem(key.as_bytes())?]))\n    }\n\n    /// Attempt to decode the token with the internal decoding keys.\n    ///\n    /// The function tries the stored decoding keys in succession,\n    /// and returns the first yielding a successful result.\n    /// If there is no working decoding key, it returns the last error.\n    pub fn decode<D: DeserializeOwned>(\n        &self,\n        token: &str,\n    ) -> std::result::Result<TokenData<D>, AuthError> {\n        let mut res = None;\n        for decoding_key in &self.decoding_keys {\n            res = Some(decode(token, decoding_key, &self.validation));\n            if let Some(Ok(res)) = res {\n                return Ok(res);\n            }\n        }\n        if let Some(res) = res {\n            res.map_err(|e| AuthError(Cow::Owned(e.to_string())))\n        } else {\n            Err(AuthError(Cow::Borrowed(\"no JWT decoding keys configured\")))\n        }\n    }\n}\n\nimpl std::fmt::Debug for JwtAuth {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        f.debug_struct(\"JwtAuth\")\n            .field(\"validation\", &self.validation)\n            .finish()\n    }\n}\n\n// this function is used only for testing purposes in CLI e g generate tokens during init\npub fn encode_from_key_file<S: Serialize>(claims: &S, pem: &Pem) -> Result<String> {\n    let key = EncodingKey::from_ed_der(pem.contents());\n    Ok(encode(&Header::new(STORAGE_TOKEN_ALGORITHM), claims, &key)?)\n}\n\n#[cfg(test)]\nmod tests {\n    use std::str::FromStr;\n\n    use super::*;\n\n    // Generated with:\n    //\n    // openssl genpkey -algorithm ed25519 -out ed25519-priv.pem\n    // openssl pkey -in ed25519-priv.pem -pubout -out ed25519-pub.pem\n    const TEST_PUB_KEY_ED25519: &str = r#\"\n-----BEGIN PUBLIC KEY-----\nMCowBQYDK2VwAyEARYwaNBayR+eGI0iXB4s3QxE3Nl2g1iWbr6KtLWeVD/w=\n-----END PUBLIC KEY-----\n\"#;\n\n    const TEST_PRIV_KEY_ED25519: &str = r#\"\n-----BEGIN PRIVATE KEY-----\nMC4CAQAwBQYDK2VwBCIEID/Drmc1AA6U/znNRWpF3zEGegOATQxfkdWxitcOMsIH\n-----END PRIVATE KEY-----\n\"#;\n\n    #[test]\n    fn test_decode() {\n        let expected_claims = Claims {\n            tenant_id: Some(TenantId::from_str(\"3d1f7595b468230304e0b73cecbcb081\").unwrap()),\n            scope: Scope::Tenant,\n            endpoint_id: None,\n        };\n\n        // A test token containing the following payload, signed using TEST_PRIV_KEY_ED25519:\n        //\n        // ```\n        // {\n        //   \"scope\": \"tenant\",\n        //   \"tenant_id\": \"3d1f7595b468230304e0b73cecbcb081\",\n        //   \"iss\": \"neon.controlplane\",\n        //   \"iat\": 1678442479\n        // }\n        // ```\n        //\n        let encoded_eddsa = \"eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJzY29wZSI6InRlbmFudCIsInRlbmFudF9pZCI6IjNkMWY3NTk1YjQ2ODIzMDMwNGUwYjczY2VjYmNiMDgxIiwiaXNzIjoibmVvbi5jb250cm9scGxhbmUiLCJpYXQiOjE2Nzg0NDI0Nzl9.rNheBnluMJNgXzSTTJoTNIGy4P_qe0JUHl_nVEGuDCTgHOThPVr552EnmKccrCKquPeW3c2YUk0Y9Oh4KyASAw\";\n\n        // Check it can be validated with the public key\n        let auth = JwtAuth::new(vec![\n            DecodingKey::from_ed_pem(TEST_PUB_KEY_ED25519.as_bytes()).unwrap(),\n        ]);\n        let claims_from_token: Claims = auth.decode(encoded_eddsa).unwrap().claims;\n        assert_eq!(claims_from_token, expected_claims);\n    }\n\n    #[test]\n    fn test_encode() {\n        let claims = Claims {\n            tenant_id: Some(TenantId::from_str(\"3d1f7595b468230304e0b73cecbcb081\").unwrap()),\n            scope: Scope::Tenant,\n            endpoint_id: None,\n        };\n\n        let pem = pem::parse(TEST_PRIV_KEY_ED25519).unwrap();\n        let encoded = encode_from_key_file(&claims, &pem).unwrap();\n\n        // decode it back\n        let auth = JwtAuth::new(vec![\n            DecodingKey::from_ed_pem(TEST_PUB_KEY_ED25519.as_bytes()).unwrap(),\n        ]);\n        let decoded: TokenData<Claims> = auth.decode(&encoded).unwrap();\n\n        assert_eq!(decoded.claims, claims);\n    }\n}\n"
  },
  {
    "path": "libs/utils/src/backoff.rs",
    "content": "use std::fmt::{Debug, Display};\nuse std::time::Duration;\n\nuse futures::Future;\nuse tokio_util::sync::CancellationToken;\n\npub const DEFAULT_BASE_BACKOFF_SECONDS: f64 = 0.1;\npub const DEFAULT_MAX_BACKOFF_SECONDS: f64 = 3.0;\n\npub async fn exponential_backoff(\n    n: u32,\n    base_increment: f64,\n    max_seconds: f64,\n    cancel: &CancellationToken,\n) {\n    let backoff_duration_seconds =\n        exponential_backoff_duration_seconds(n, base_increment, max_seconds);\n    if backoff_duration_seconds > 0.0 {\n        tracing::info!(\n            \"Backoff: waiting {backoff_duration_seconds} seconds before processing with the task\",\n        );\n\n        drop(\n            tokio::time::timeout(\n                std::time::Duration::from_secs_f64(backoff_duration_seconds),\n                cancel.cancelled(),\n            )\n            .await,\n        )\n    }\n}\n\npub fn exponential_backoff_duration(n: u32, base_increment: f64, max_seconds: f64) -> Duration {\n    let seconds = exponential_backoff_duration_seconds(n, base_increment, max_seconds);\n    Duration::from_secs_f64(seconds)\n}\n\npub fn exponential_backoff_duration_seconds(n: u32, base_increment: f64, max_seconds: f64) -> f64 {\n    if n == 0 {\n        0.0\n    } else {\n        (1.0 + base_increment).powf(f64::from(n)).min(max_seconds)\n    }\n}\n\n/// Retries passed operation until one of the following conditions are met:\n/// - encountered error is considered as permanent (non-retryable)\n/// - retries have been exhausted\n/// - cancellation token has been cancelled\n///\n/// `is_permanent` closure should be used to provide distinction between permanent/non-permanent\n/// errors. When attempts cross `warn_threshold` function starts to emit log warnings.\n/// `description` argument is added to log messages. Its value should identify the `op` is doing\n/// `cancel` cancels new attempts and the backoff sleep.\n///\n/// If attempts fail, they are being logged with `{:#}` which works for anyhow, but does not work\n/// for any other error type. Final failed attempt is logged with `{:?}`.\n///\n/// Returns `None` if cancellation was noticed during backoff or the terminal result.\npub async fn retry<T, O, F, E>(\n    mut op: O,\n    is_permanent: impl Fn(&E) -> bool,\n    warn_threshold: u32,\n    max_retries: u32,\n    description: &str,\n    cancel: &CancellationToken,\n) -> Option<Result<T, E>>\nwhere\n    // Not std::error::Error because anyhow::Error doesnt implement it.\n    // For context see https://github.com/dtolnay/anyhow/issues/63\n    E: Display + Debug + 'static,\n    O: FnMut() -> F,\n    F: Future<Output = Result<T, E>>,\n{\n    let mut attempts = 0;\n    loop {\n        if cancel.is_cancelled() {\n            return None;\n        }\n\n        let result = op().await;\n        match &result {\n            Ok(_) => {\n                if attempts > 0 {\n                    tracing::info!(\"{description} succeeded after {attempts} retries\");\n                }\n                return Some(result);\n            }\n\n            // These are \"permanent\" errors that should not be retried.\n            Err(e) if is_permanent(e) => {\n                return Some(result);\n            }\n            // Assume that any other failure might be transient, and the operation might\n            // succeed if we just keep trying.\n            Err(err) if attempts < warn_threshold => {\n                tracing::info!(\"{description} failed, will retry (attempt {attempts}): {err:#}\");\n            }\n            Err(err) if attempts < max_retries => {\n                tracing::warn!(\"{description} failed, will retry (attempt {attempts}): {err:#}\");\n            }\n            Err(err) => {\n                // Operation failed `max_attempts` times. Time to give up.\n                tracing::warn!(\n                    \"{description} still failed after {attempts} retries, giving up: {err:?}\"\n                );\n                return Some(result);\n            }\n        }\n        // sleep and retry\n        exponential_backoff(\n            attempts,\n            DEFAULT_BASE_BACKOFF_SECONDS,\n            DEFAULT_MAX_BACKOFF_SECONDS,\n            cancel,\n        )\n        .await;\n        attempts += 1;\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use std::io;\n\n    use tokio::sync::Mutex;\n\n    use super::*;\n\n    #[test]\n    fn backoff_defaults_produce_growing_backoff_sequence() {\n        let mut current_backoff_value = None;\n\n        for i in 0..10_000 {\n            let new_backoff_value = exponential_backoff_duration_seconds(\n                i,\n                DEFAULT_BASE_BACKOFF_SECONDS,\n                DEFAULT_MAX_BACKOFF_SECONDS,\n            );\n\n            if let Some(old_backoff_value) = current_backoff_value.replace(new_backoff_value) {\n                assert!(\n                    old_backoff_value <= new_backoff_value,\n                    \"{i}th backoff value {new_backoff_value} is smaller than the previous one {old_backoff_value}\"\n                )\n            }\n        }\n\n        assert_eq!(\n            current_backoff_value.expect(\"Should have produced backoff values to compare\"),\n            DEFAULT_MAX_BACKOFF_SECONDS,\n            \"Given big enough of retries, backoff should reach its allowed max value\"\n        );\n    }\n\n    #[tokio::test(start_paused = true)]\n    async fn retry_always_error() {\n        let count = Mutex::new(0);\n        retry(\n            || async {\n                *count.lock().await += 1;\n                Result::<(), io::Error>::Err(io::Error::from(io::ErrorKind::Other))\n            },\n            |_e| false,\n            1,\n            1,\n            \"work\",\n            &CancellationToken::new(),\n        )\n        .await\n        .expect(\"not cancelled\")\n        .expect_err(\"it can only fail\");\n\n        assert_eq!(*count.lock().await, 2);\n    }\n\n    #[tokio::test(start_paused = true)]\n    async fn retry_ok_after_err() {\n        let count = Mutex::new(0);\n        retry(\n            || async {\n                let mut locked = count.lock().await;\n                if *locked > 1 {\n                    Ok(())\n                } else {\n                    *locked += 1;\n                    Err(io::Error::from(io::ErrorKind::Other))\n                }\n            },\n            |_e| false,\n            2,\n            2,\n            \"work\",\n            &CancellationToken::new(),\n        )\n        .await\n        .expect(\"not cancelled\")\n        .expect(\"success on second try\");\n    }\n\n    #[tokio::test(start_paused = true)]\n    async fn dont_retry_permanent_errors() {\n        let count = Mutex::new(0);\n        let _ = retry(\n            || async {\n                let mut locked = count.lock().await;\n                if *locked > 1 {\n                    Ok(())\n                } else {\n                    *locked += 1;\n                    Err(io::Error::from(io::ErrorKind::Other))\n                }\n            },\n            |_e| true,\n            2,\n            2,\n            \"work\",\n            &CancellationToken::new(),\n        )\n        .await\n        .expect(\"was not cancellation\")\n        .expect_err(\"it was permanent error\");\n\n        assert_eq!(*count.lock().await, 1);\n    }\n}\n"
  },
  {
    "path": "libs/utils/src/bin_ser.rs",
    "content": "//! Utilities for binary serialization/deserialization.\n//!\n//! The [`BeSer`] trait allows us to define data structures\n//! that can match data structures that are sent over the wire\n//! in big-endian form with no packing.\n//!\n//! The [`LeSer`] trait does the same thing, in little-endian form.\n//!\n//! Note: you will get a compile error if you try to `use` both traits\n//! in the same module or scope. This is intended to be a safety\n//! mechanism: mixing big-endian and little-endian encoding in the same file\n//! is error-prone.\n\n#![warn(missing_docs)]\n\nuse std::io::{self, Read, Write};\n\nuse bincode::Options;\nuse serde::Serialize;\nuse serde::de::DeserializeOwned;\nuse thiserror::Error;\n\n/// An error that occurred during a deserialize operation\n///\n/// This could happen because the input data was too short,\n/// or because an invalid value was encountered.\n#[derive(Debug, Error)]\npub enum DeserializeError {\n    /// The deserializer isn't able to deserialize the supplied data.\n    #[error(\"deserialize error\")]\n    BadInput,\n    /// While deserializing from a `Read` source, an `io::Error` occurred.\n    #[error(\"deserialize error: {0}\")]\n    Io(io::Error),\n}\n\nimpl From<bincode::Error> for DeserializeError {\n    fn from(e: bincode::Error) -> Self {\n        match *e {\n            bincode::ErrorKind::Io(io_err) => DeserializeError::Io(io_err),\n            _ => DeserializeError::BadInput,\n        }\n    }\n}\n\n/// An error that occurred during a serialize operation\n///\n/// This probably means our [`Write`] failed, e.g. we tried\n/// to write beyond the end of a buffer.\n#[derive(Debug, Error)]\npub enum SerializeError {\n    /// The serializer isn't able to serialize the supplied data.\n    #[error(\"serialize error\")]\n    BadInput,\n    /// While serializing into a `Write` sink, an `io::Error` occurred.\n    #[error(\"serialize error: {0}\")]\n    Io(io::Error),\n}\n\nimpl From<bincode::Error> for SerializeError {\n    fn from(e: bincode::Error) -> Self {\n        match *e {\n            bincode::ErrorKind::Io(io_err) => SerializeError::Io(io_err),\n            _ => SerializeError::BadInput,\n        }\n    }\n}\n\n/// A shortcut that configures big-endian binary serialization\n///\n/// Properties:\n/// - Big endian\n/// - Fixed integer encoding (i.e. 1u32 is 00000001 not 01)\n///\n/// Does not allow trailing bytes in deserialization. If this is desired, you\n/// may set [`Options::allow_trailing_bytes`] to explicitly accommodate this.\npub fn be_coder() -> impl Options {\n    bincode::DefaultOptions::new()\n        .with_big_endian()\n        .with_fixint_encoding()\n}\n\n/// A shortcut that configures little-ending binary serialization\n///\n/// Properties:\n/// - Little endian\n/// - Fixed integer encoding (i.e. 1u32 is 00000001 not 01)\n///\n/// Does not allow trailing bytes in deserialization. If this is desired, you\n/// may set [`Options::allow_trailing_bytes`] to explicitly accommodate this.\npub fn le_coder() -> impl Options {\n    bincode::DefaultOptions::new()\n        .with_little_endian()\n        .with_fixint_encoding()\n}\n\n/// Binary serialize/deserialize helper functions (Big Endian)\n///\npub trait BeSer {\n    /// Serialize into a byte slice\n    fn ser_into_slice(&self, mut b: &mut [u8]) -> Result<(), SerializeError>\n    where\n        Self: Serialize,\n    {\n        // &mut [u8] implements Write, but `ser_into` needs a mutable\n        // reference to that. So we need the slightly awkward \"mutable\n        // reference to a mutable reference.\n        self.ser_into(&mut b)\n    }\n\n    /// Serialize into a borrowed writer\n    ///\n    /// This is useful for most `Write` types except `&mut [u8]`, which\n    /// can more easily use [`ser_into_slice`](Self::ser_into_slice).\n    fn ser_into<W: Write>(&self, w: &mut W) -> Result<(), SerializeError>\n    where\n        Self: Serialize,\n    {\n        be_coder().serialize_into(w, &self).map_err(|e| e.into())\n    }\n\n    /// Serialize into a new heap-allocated buffer\n    fn ser(&self) -> Result<Vec<u8>, SerializeError>\n    where\n        Self: Serialize,\n    {\n        be_coder().serialize(&self).map_err(|e| e.into())\n    }\n\n    /// Deserialize from the full contents of a byte slice\n    ///\n    /// See also: [`BeSer::des_prefix`]\n    fn des(buf: &[u8]) -> Result<Self, DeserializeError>\n    where\n        Self: DeserializeOwned,\n    {\n        be_coder()\n            .deserialize(buf)\n            .or(Err(DeserializeError::BadInput))\n    }\n\n    /// Deserialize from a prefix of the byte slice\n    ///\n    /// Uses as much of the byte slice as is necessary to deserialize the\n    /// type, but does not guarantee that the entire slice is used.\n    ///\n    /// See also: [`BeSer::des`]\n    fn des_prefix(buf: &[u8]) -> Result<Self, DeserializeError>\n    where\n        Self: DeserializeOwned,\n    {\n        be_coder()\n            .allow_trailing_bytes()\n            .deserialize(buf)\n            .or(Err(DeserializeError::BadInput))\n    }\n\n    /// Deserialize from a reader\n    fn des_from<R: Read>(r: &mut R) -> Result<Self, DeserializeError>\n    where\n        Self: DeserializeOwned,\n    {\n        be_coder().deserialize_from(r).map_err(|e| e.into())\n    }\n\n    /// Compute the serialized size of a data structure\n    ///\n    /// Note: it may be faster to serialize to a buffer and then measure the\n    /// buffer length, than to call `serialized_size` and then `ser_into`.\n    fn serialized_size(&self) -> Result<u64, SerializeError>\n    where\n        Self: Serialize,\n    {\n        be_coder().serialized_size(self).map_err(|e| e.into())\n    }\n}\n\n/// Binary serialize/deserialize helper functions (Little Endian)\n///\npub trait LeSer {\n    /// Serialize into a byte slice\n    fn ser_into_slice(&self, mut b: &mut [u8]) -> Result<(), SerializeError>\n    where\n        Self: Serialize,\n    {\n        // &mut [u8] implements Write, but `ser_into` needs a mutable\n        // reference to that. So we need the slightly awkward \"mutable\n        // reference to a mutable reference.\n        self.ser_into(&mut b)\n    }\n\n    /// Serialize into a borrowed writer\n    ///\n    /// This is useful for most `Write` types except `&mut [u8]`, which\n    /// can more easily use [`ser_into_slice`](Self::ser_into_slice).\n    fn ser_into<W: Write>(&self, w: &mut W) -> Result<(), SerializeError>\n    where\n        Self: Serialize,\n    {\n        le_coder().serialize_into(w, &self).map_err(|e| e.into())\n    }\n\n    /// Serialize into a new heap-allocated buffer\n    fn ser(&self) -> Result<Vec<u8>, SerializeError>\n    where\n        Self: Serialize,\n    {\n        le_coder().serialize(&self).map_err(|e| e.into())\n    }\n\n    /// Deserialize from the full contents of a byte slice\n    ///\n    /// See also: [`LeSer::des_prefix`]\n    fn des(buf: &[u8]) -> Result<Self, DeserializeError>\n    where\n        Self: DeserializeOwned,\n    {\n        le_coder()\n            .deserialize(buf)\n            .or(Err(DeserializeError::BadInput))\n    }\n\n    /// Deserialize from a prefix of the byte slice\n    ///\n    /// Uses as much of the byte slice as is necessary to deserialize the\n    /// type, but does not guarantee that the entire slice is used.\n    ///\n    /// See also: [`LeSer::des`]\n    fn des_prefix(buf: &[u8]) -> Result<Self, DeserializeError>\n    where\n        Self: DeserializeOwned,\n    {\n        le_coder()\n            .allow_trailing_bytes()\n            .deserialize(buf)\n            .or(Err(DeserializeError::BadInput))\n    }\n\n    /// Deserialize from a reader\n    fn des_from<R: Read>(r: &mut R) -> Result<Self, DeserializeError>\n    where\n        Self: DeserializeOwned,\n    {\n        le_coder().deserialize_from(r).map_err(|e| e.into())\n    }\n\n    /// Compute the serialized size of a data structure\n    ///\n    /// Note: it may be faster to serialize to a buffer and then measure the\n    /// buffer length, than to call `serialized_size` and then `ser_into`.\n    fn serialized_size(&self) -> Result<u64, SerializeError>\n    where\n        Self: Serialize,\n    {\n        le_coder().serialized_size(self).map_err(|e| e.into())\n    }\n}\n\n// Because usage of `BeSer` or `LeSer` can be done with *either* a Serialize or\n// DeserializeOwned implementation, the blanket implementation has to be for every type.\nimpl<T> BeSer for T {}\nimpl<T> LeSer for T {}\n\n#[cfg(test)]\nmod tests {\n    use std::io::Cursor;\n\n    use serde::{Deserialize, Serialize};\n\n    use super::DeserializeError;\n\n    #[derive(Debug, PartialEq, Eq, Serialize, Deserialize)]\n    pub struct ShortStruct {\n        a: u8,\n        b: u32,\n    }\n\n    const SHORT1: ShortStruct = ShortStruct { a: 7, b: 65536 };\n    const SHORT1_ENC_BE: &[u8] = &[7, 0, 1, 0, 0];\n    const SHORT1_ENC_BE_TRAILING: &[u8] = &[7, 0, 1, 0, 0, 255, 255, 255];\n    const SHORT1_ENC_LE: &[u8] = &[7, 0, 0, 1, 0];\n    const SHORT1_ENC_LE_TRAILING: &[u8] = &[7, 0, 0, 1, 0, 255, 255, 255];\n\n    const SHORT2: ShortStruct = ShortStruct {\n        a: 8,\n        b: 0x07030000,\n    };\n    const SHORT2_ENC_BE: &[u8] = &[8, 7, 3, 0, 0];\n    const SHORT2_ENC_BE_TRAILING: &[u8] = &[8, 7, 3, 0, 0, 0xff, 0xff, 0xff];\n    const SHORT2_ENC_LE: &[u8] = &[8, 0, 0, 3, 7];\n    const SHORT2_ENC_LE_TRAILING: &[u8] = &[8, 0, 0, 3, 7, 0xff, 0xff, 0xff];\n\n    #[derive(Debug, PartialEq, Eq, Serialize, Deserialize)]\n    struct NewTypeStruct(u32);\n    const NT1: NewTypeStruct = NewTypeStruct(414243);\n    const NT1_INNER: u32 = 414243;\n\n    #[derive(Debug, PartialEq, Eq, Serialize, Deserialize)]\n    pub struct LongMsg {\n        pub tag: u8,\n        pub blockpos: u32,\n        pub last_flush_position: u64,\n        pub apply: u64,\n        pub timestamp: i64,\n        pub reply_requested: u8,\n    }\n\n    const LONG1: LongMsg = LongMsg {\n        tag: 42,\n        blockpos: 0x1000_2000,\n        last_flush_position: 0x1234_2345_3456_4567,\n        apply: 0x9876_5432_10FE_DCBA,\n        timestamp: 0x7788_99AA_BBCC_DDFF,\n        reply_requested: 1,\n    };\n\n    #[test]\n    fn be_short() {\n        use super::BeSer;\n\n        assert_eq!(SHORT1.serialized_size().unwrap(), 5);\n\n        let encoded = SHORT1.ser().unwrap();\n        assert_eq!(encoded, SHORT1_ENC_BE);\n\n        let decoded = ShortStruct::des(SHORT2_ENC_BE).unwrap();\n        assert_eq!(decoded, SHORT2);\n\n        // with trailing data\n        let decoded = ShortStruct::des_prefix(SHORT2_ENC_BE_TRAILING).unwrap();\n        assert_eq!(decoded, SHORT2);\n        let err = ShortStruct::des(SHORT2_ENC_BE_TRAILING).unwrap_err();\n        assert!(matches!(err, DeserializeError::BadInput));\n\n        // serialize into a `Write` sink.\n        let mut buf = Cursor::new(vec![0xFF; 8]);\n        SHORT1.ser_into(&mut buf).unwrap();\n        assert_eq!(buf.into_inner(), SHORT1_ENC_BE_TRAILING);\n\n        // deserialize from a `Write` sink.\n        let mut buf = Cursor::new(SHORT2_ENC_BE);\n        let decoded = ShortStruct::des_from(&mut buf).unwrap();\n        assert_eq!(decoded, SHORT2);\n\n        // deserialize from a `Write` sink that terminates early.\n        let mut buf = Cursor::new([0u8; 4]);\n        let err = ShortStruct::des_from(&mut buf).unwrap_err();\n        assert!(matches!(err, DeserializeError::Io(_)));\n    }\n\n    #[test]\n    fn le_short() {\n        use super::LeSer;\n\n        assert_eq!(SHORT1.serialized_size().unwrap(), 5);\n\n        let encoded = SHORT1.ser().unwrap();\n        assert_eq!(encoded, SHORT1_ENC_LE);\n\n        let decoded = ShortStruct::des(SHORT2_ENC_LE).unwrap();\n        assert_eq!(decoded, SHORT2);\n\n        // with trailing data\n        let decoded = ShortStruct::des_prefix(SHORT2_ENC_LE_TRAILING).unwrap();\n        assert_eq!(decoded, SHORT2);\n        let err = ShortStruct::des(SHORT2_ENC_LE_TRAILING).unwrap_err();\n        assert!(matches!(err, DeserializeError::BadInput));\n\n        // serialize into a `Write` sink.\n        let mut buf = Cursor::new(vec![0xFF; 8]);\n        SHORT1.ser_into(&mut buf).unwrap();\n        assert_eq!(buf.into_inner(), SHORT1_ENC_LE_TRAILING);\n\n        // deserialize from a `Write` sink.\n        let mut buf = Cursor::new(SHORT2_ENC_LE);\n        let decoded = ShortStruct::des_from(&mut buf).unwrap();\n        assert_eq!(decoded, SHORT2);\n\n        // deserialize from a `Write` sink that terminates early.\n        let mut buf = Cursor::new([0u8; 4]);\n        let err = ShortStruct::des_from(&mut buf).unwrap_err();\n        assert!(matches!(err, DeserializeError::Io(_)));\n    }\n\n    #[test]\n    fn be_long() {\n        use super::BeSer;\n\n        assert_eq!(LONG1.serialized_size().unwrap(), 30);\n\n        let msg = LONG1;\n\n        let encoded = msg.ser().unwrap();\n        let expected = hex_literal::hex!(\n            \"2A 1000 2000 1234 2345 3456 4567 9876 5432 10FE DCBA 7788 99AA BBCC DDFF 01\"\n        );\n        assert_eq!(encoded, expected);\n\n        let msg2 = LongMsg::des(&encoded).unwrap();\n        assert_eq!(msg, msg2);\n    }\n\n    #[test]\n    fn le_long() {\n        use super::LeSer;\n\n        assert_eq!(LONG1.serialized_size().unwrap(), 30);\n\n        let msg = LONG1;\n\n        let encoded = msg.ser().unwrap();\n        let expected = hex_literal::hex!(\n            \"2A 0020 0010 6745 5634 4523 3412 BADC FE10 3254 7698 FFDD CCBB AA99 8877 01\"\n        );\n        assert_eq!(encoded, expected);\n\n        let msg2 = LongMsg::des(&encoded).unwrap();\n        assert_eq!(msg, msg2);\n    }\n\n    #[test]\n    /// Ensure that newtype wrappers around u32 don't change the serialization format\n    fn be_nt() {\n        use super::BeSer;\n\n        assert_eq!(NT1.serialized_size().unwrap(), 4);\n\n        let msg = NT1;\n\n        let encoded = msg.ser().unwrap();\n        let expected = hex_literal::hex!(\"0006 5223\");\n        assert_eq!(encoded, expected);\n\n        assert_eq!(encoded, NT1_INNER.ser().unwrap());\n\n        let msg2 = NewTypeStruct::des(&encoded).unwrap();\n        assert_eq!(msg, msg2);\n    }\n\n    #[test]\n    /// Ensure that newtype wrappers around u32 don't change the serialization format\n    fn le_nt() {\n        use super::LeSer;\n\n        assert_eq!(NT1.serialized_size().unwrap(), 4);\n\n        let msg = NT1;\n\n        let encoded = msg.ser().unwrap();\n        let expected = hex_literal::hex!(\"2352 0600\");\n        assert_eq!(encoded, expected);\n\n        assert_eq!(encoded, NT1_INNER.ser().unwrap());\n\n        let msg2 = NewTypeStruct::des(&encoded).unwrap();\n        assert_eq!(msg, msg2);\n    }\n}\n"
  },
  {
    "path": "libs/utils/src/circuit_breaker.rs",
    "content": "use std::fmt::Display;\nuse std::time::{Duration, Instant};\n\nuse metrics::IntCounter;\n\n/// Circuit breakers are for operations that are expensive and fallible.\n///\n/// If a circuit breaker fails repeatedly, we will stop attempting it for some\n/// period of time, to avoid denial-of-service from retries, and\n/// to mitigate the log spam from repeated failures.\npub struct CircuitBreaker {\n    /// An identifier that enables us to log useful errors when a circuit is broken\n    name: String,\n\n    /// Consecutive failures since last success\n    fail_count: usize,\n\n    /// How many consecutive failures before we break the circuit\n    fail_threshold: usize,\n\n    /// If circuit is broken, when was it broken?\n    broken_at: Option<Instant>,\n\n    /// If set, we will auto-reset the circuit this long after it was broken.  If None, broken\n    /// circuits stay broken forever, or until success() is called.\n    reset_period: Option<Duration>,\n\n    /// If this is true, no actual circuit-breaking happens.  This is for overriding a circuit breaker\n    /// to permit something to keep running even if it would otherwise have tripped it.\n    short_circuit: bool,\n}\n\nimpl CircuitBreaker {\n    pub fn new(name: String, fail_threshold: usize, reset_period: Option<Duration>) -> Self {\n        Self {\n            name,\n            fail_count: 0,\n            fail_threshold,\n            broken_at: None,\n            reset_period,\n            short_circuit: false,\n        }\n    }\n\n    /// Construct an unbreakable circuit breaker, for use in unit tests etc.\n    pub fn short_circuit() -> Self {\n        Self {\n            name: String::new(),\n            fail_threshold: 0,\n            fail_count: 0,\n            broken_at: None,\n            reset_period: None,\n            short_circuit: true,\n        }\n    }\n\n    pub fn fail<E>(&mut self, metric: &IntCounter, error: E)\n    where\n        E: Display,\n    {\n        if self.short_circuit {\n            return;\n        }\n\n        self.fail_count += 1;\n        if self.broken_at.is_none() && self.fail_count >= self.fail_threshold {\n            self.break_circuit(metric, error);\n        }\n    }\n\n    /// Call this after successfully executing an operation\n    pub fn success(&mut self, metric: &IntCounter) {\n        self.fail_count = 0;\n        if let Some(broken_at) = &self.broken_at {\n            tracing::info!(breaker=%self.name, \"Circuit breaker failure ended (was broken for {})\",\n                humantime::format_duration(broken_at.elapsed()));\n            self.broken_at = None;\n            metric.inc();\n        }\n    }\n\n    /// Call this before attempting an operation, and skip the operation if we are currently broken.\n    pub fn is_broken(&mut self) -> bool {\n        if self.short_circuit {\n            return false;\n        }\n\n        if let Some(broken_at) = self.broken_at {\n            match self.reset_period {\n                Some(reset_period) if broken_at.elapsed() > reset_period => {\n                    self.reset_circuit();\n                    false\n                }\n                _ => true,\n            }\n        } else {\n            false\n        }\n    }\n\n    fn break_circuit<E>(&mut self, metric: &IntCounter, error: E)\n    where\n        E: Display,\n    {\n        self.broken_at = Some(Instant::now());\n        tracing::error!(breaker=%self.name, \"Circuit breaker broken!  Last error: {error}\");\n        metric.inc();\n    }\n\n    fn reset_circuit(&mut self) {\n        self.broken_at = None;\n        self.fail_count = 0;\n    }\n}\n"
  },
  {
    "path": "libs/utils/src/completion.rs",
    "content": "use tokio_util::task::TaskTracker;\nuse tokio_util::task::task_tracker::TaskTrackerToken;\n\n/// While a reference is kept around, the associated [`Barrier::wait`] will wait.\n///\n/// Can be cloned, moved and kept around in futures as \"guard objects\".\n#[derive(Clone)]\npub struct Completion {\n    token: TaskTrackerToken,\n}\n\nimpl std::fmt::Debug for Completion {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        f.debug_struct(\"Completion\")\n            .field(\"siblings\", &self.token.task_tracker().len())\n            .finish()\n    }\n}\n\nimpl Completion {\n    /// Returns true if this completion is associated with the given barrier.\n    pub fn blocks(&self, barrier: &Barrier) -> bool {\n        TaskTracker::ptr_eq(self.token.task_tracker(), &barrier.0)\n    }\n\n    pub fn barrier(&self) -> Barrier {\n        Barrier(self.token.task_tracker().clone())\n    }\n}\n\n/// Barrier will wait until all clones of [`Completion`] have been dropped.\n#[derive(Clone)]\npub struct Barrier(TaskTracker);\n\nimpl std::fmt::Debug for Barrier {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        f.debug_struct(\"Barrier\")\n            .field(\"remaining\", &self.0.len())\n            .finish()\n    }\n}\n\nimpl Default for Barrier {\n    fn default() -> Self {\n        let (_, rx) = channel();\n        rx\n    }\n}\n\nimpl Barrier {\n    pub async fn wait(self) {\n        self.0.wait().await;\n    }\n\n    pub async fn maybe_wait(barrier: Option<Barrier>) {\n        if let Some(b) = barrier {\n            b.wait().await\n        }\n    }\n\n    /// Return true if a call to wait() would complete immediately\n    pub fn is_ready(&self) -> bool {\n        futures::future::FutureExt::now_or_never(self.0.wait()).is_some()\n    }\n}\n\nimpl PartialEq for Barrier {\n    fn eq(&self, other: &Self) -> bool {\n        TaskTracker::ptr_eq(&self.0, &other.0)\n    }\n}\n\nimpl Eq for Barrier {}\n\n/// Create new Guard and Barrier pair.\npub fn channel() -> (Completion, Barrier) {\n    let tracker = TaskTracker::new();\n    // otherwise wait never exits\n    tracker.close();\n\n    let token = tracker.token();\n    (Completion { token }, Barrier(tracker))\n}\n"
  },
  {
    "path": "libs/utils/src/crashsafe.rs",
    "content": "use std::borrow::Cow;\nuse std::fs::{self, File};\nuse std::io::{self, Write};\nuse std::os::fd::AsFd;\n\nuse camino::{Utf8Path, Utf8PathBuf};\n\n/// Similar to [`std::fs::create_dir`], except we fsync the\n/// created directory and its parent.\npub fn create_dir(path: impl AsRef<Utf8Path>) -> io::Result<()> {\n    let path = path.as_ref();\n\n    fs::create_dir(path)?;\n    fsync_file_and_parent(path)?;\n    Ok(())\n}\n\n/// Similar to [`std::fs::create_dir_all`], except we fsync all\n/// newly created directories and the pre-existing parent.\npub fn create_dir_all(path: impl AsRef<Utf8Path>) -> io::Result<()> {\n    let mut path = path.as_ref();\n\n    let mut dirs_to_create = Vec::new();\n\n    // Figure out which directories we need to create.\n    loop {\n        match path.metadata() {\n            Ok(metadata) if metadata.is_dir() => break,\n            Ok(_) => {\n                return Err(io::Error::new(\n                    io::ErrorKind::AlreadyExists,\n                    format!(\"non-directory found in path: {path}\"),\n                ));\n            }\n            Err(ref e) if e.kind() == io::ErrorKind::NotFound => {}\n            Err(e) => return Err(e),\n        }\n\n        dirs_to_create.push(path);\n\n        match path.parent() {\n            Some(parent) => path = parent,\n            None => {\n                return Err(io::Error::new(\n                    io::ErrorKind::InvalidInput,\n                    format!(\"can't find parent of path '{path}'\"),\n                ));\n            }\n        }\n    }\n\n    // Create directories from parent to child.\n    for &path in dirs_to_create.iter().rev() {\n        fs::create_dir(path)?;\n    }\n\n    // Fsync the created directories from child to parent.\n    for &path in dirs_to_create.iter() {\n        fsync(path)?;\n    }\n\n    // If we created any new directories, fsync the parent.\n    if !dirs_to_create.is_empty() {\n        fsync(path)?;\n    }\n\n    Ok(())\n}\n\n/// Adds a suffix to the file(directory) name, either appending the suffix to the end of its extension,\n/// or if there's no extension, creates one and puts a suffix there.\npub fn path_with_suffix_extension(\n    original_path: impl AsRef<Utf8Path>,\n    suffix: &str,\n) -> Utf8PathBuf {\n    let new_extension = match original_path.as_ref().extension() {\n        Some(extension) => Cow::Owned(format!(\"{extension}.{suffix}\")),\n        None => Cow::Borrowed(suffix),\n    };\n    original_path.as_ref().with_extension(new_extension)\n}\n\npub fn fsync_file_and_parent(file_path: &Utf8Path) -> io::Result<()> {\n    let parent = file_path\n        .parent()\n        .ok_or_else(|| io::Error::other(format!(\"File {file_path:?} has no parent\")))?;\n\n    fsync(file_path)?;\n    fsync(parent)?;\n    Ok(())\n}\n\npub fn fsync(path: &Utf8Path) -> io::Result<()> {\n    File::open(path)\n        .map_err(|e| io::Error::new(e.kind(), format!(\"Failed to open the file {path:?}: {e}\")))\n        .and_then(|file| {\n            file.sync_all().map_err(|e| {\n                io::Error::new(\n                    e.kind(),\n                    format!(\"Failed to sync file {path:?} data and metadata: {e}\"),\n                )\n            })\n        })\n        .map_err(|e| io::Error::new(e.kind(), format!(\"Failed to fsync file {path:?}: {e}\")))\n}\n\npub async fn fsync_async(path: impl AsRef<Utf8Path>) -> Result<(), std::io::Error> {\n    tokio::fs::File::open(path.as_ref()).await?.sync_all().await\n}\n\npub async fn fsync_async_opt(\n    path: impl AsRef<Utf8Path>,\n    do_fsync: bool,\n) -> Result<(), std::io::Error> {\n    if do_fsync {\n        fsync_async(path.as_ref()).await?;\n    }\n    Ok(())\n}\n\n/// Like postgres' durable_rename, renames a file and issues fsyncs to make it durable. After\n/// returning, both the file and rename are guaranteed to be persisted. Both paths must be on the\n/// same file system.\n///\n/// Unlike postgres, it only fsyncs 1) the file to make contents durable, and 2) the directory to\n/// make the rename durable. This sequence ensures the target file will never be incomplete.\n///\n/// Postgres also:\n///\n/// * Fsyncs the target file, if it exists, before the rename, to ensure either the new or existing\n///   file survives a crash. Current callers don't need this as it should already be fsynced if\n///   durability is needed.\n///\n/// * Fsyncs the file after the rename. This can be required with certain OSes or file systems (e.g.\n///   NFS), but not on Linux with most common file systems like ext4 (which we currently use).\n///\n/// An audit of 8 other databases found that none fsynced the file after a rename:\n/// <https://github.com/neondatabase/neon/pull/9686#discussion_r1837180535>\n///\n/// eBPF probes confirmed that this is sufficient with ext4, XFS, and ZFS, but possibly not Btrfs:\n/// <https://github.com/neondatabase/neon/pull/9686#discussion_r1837926218>\n///\n/// virtual_file.rs has similar code, but it doesn't use vfs.\n///\n/// Useful links: <https://lwn.net/Articles/457667/>\n/// <https://www.postgresql.org/message-id/flat/56583BDD.9060302%402ndquadrant.com>\n/// <https://thunk.org/tytso/blog/2009/03/15/dont-fear-the-fsync/>\npub async fn durable_rename(\n    old_path: impl AsRef<Utf8Path>,\n    new_path: impl AsRef<Utf8Path>,\n    do_fsync: bool,\n) -> io::Result<()> {\n    // first fsync the file\n    fsync_async_opt(old_path.as_ref(), do_fsync).await?;\n\n    // Time to do the real deal.\n    tokio::fs::rename(old_path.as_ref(), new_path.as_ref()).await?;\n\n    // Now fsync the parent\n    let parent = match new_path.as_ref().parent() {\n        Some(p) => p,\n        None => Utf8Path::new(\"./\"), // assume current dir if there is no parent\n    };\n    fsync_async_opt(parent, do_fsync).await?;\n\n    Ok(())\n}\n\n/// Writes a file to the specified `final_path` in a crash safe fasion, using [`std::fs`].\n///\n/// The file is first written to the specified `tmp_path`, and in a second\n/// step, the `tmp_path` is renamed to the `final_path`. Intermediary fsync\n/// and atomic rename guarantee that, if we crash at any point, there will never\n/// be a partially written file at `final_path` (but maybe at `tmp_path`).\n///\n/// Callers are responsible for serializing calls of this function for a given `final_path`.\n/// If they don't, there may be an error due to conflicting `tmp_path`, or there will\n/// be no error and the content of `final_path` will be the \"winner\" caller's `content`.\n/// I.e., the atomticity guarantees still hold.\npub fn overwrite(\n    final_path: &Utf8Path,\n    tmp_path: &Utf8Path,\n    content: &[u8],\n) -> std::io::Result<()> {\n    let Some(final_path_parent) = final_path.parent() else {\n        return Err(std::io::Error::from_raw_os_error(\n            nix::errno::Errno::EINVAL as i32,\n        ));\n    };\n    std::fs::remove_file(tmp_path).or_else(crate::fs_ext::ignore_not_found)?;\n    let mut file = std::fs::OpenOptions::new()\n        .write(true)\n        // Use `create_new` so that, if we race with ourselves or something else,\n        // we bail out instead of causing damage.\n        .create_new(true)\n        .open(tmp_path)?;\n    file.write_all(content)?;\n    file.sync_all()?;\n    drop(file); // don't keep the fd open for longer than we have to\n\n    std::fs::rename(tmp_path, final_path)?;\n\n    let final_parent_dirfd = std::fs::OpenOptions::new()\n        .read(true)\n        .open(final_path_parent)?;\n\n    final_parent_dirfd.sync_all()?;\n    Ok(())\n}\n\n/// Syncs the filesystem for the given file descriptor.\n#[cfg_attr(target_os = \"macos\", allow(unused_variables))]\npub fn syncfs(fd: impl AsFd) -> anyhow::Result<()> {\n    // Linux guarantees durability for syncfs.\n    // POSIX doesn't have syncfs, and further does not actually guarantee durability of sync().\n    #[cfg(target_os = \"linux\")]\n    {\n        use anyhow::Context;\n        nix::unistd::syncfs(fd).context(\"syncfs\")?;\n    }\n    #[cfg(target_os = \"macos\")]\n    {\n        // macOS is not a production platform for Neon, don't even bother.\n    }\n    #[cfg(not(any(target_os = \"linux\", target_os = \"macos\")))]\n    {\n        compile_error!(\"Unsupported OS\");\n    }\n    Ok(())\n}\n\n#[cfg(test)]\nmod tests {\n\n    use super::*;\n\n    #[test]\n    fn test_create_dir_fsyncd() {\n        let dir = camino_tempfile::tempdir().unwrap();\n\n        let existing_dir_path = dir.path();\n        let err = create_dir(existing_dir_path).unwrap_err();\n        assert_eq!(err.kind(), io::ErrorKind::AlreadyExists);\n\n        let child_dir = existing_dir_path.join(\"child\");\n        create_dir(child_dir).unwrap();\n\n        let nested_child_dir = existing_dir_path.join(\"child1\").join(\"child2\");\n        let err = create_dir(nested_child_dir).unwrap_err();\n        assert_eq!(err.kind(), io::ErrorKind::NotFound);\n    }\n\n    #[test]\n    fn test_create_dir_all_fsyncd() {\n        let dir = camino_tempfile::tempdir().unwrap();\n\n        let existing_dir_path = dir.path();\n        create_dir_all(existing_dir_path).unwrap();\n\n        let child_dir = existing_dir_path.join(\"child\");\n        assert!(!child_dir.exists());\n        create_dir_all(&child_dir).unwrap();\n        assert!(child_dir.exists());\n\n        let nested_child_dir = existing_dir_path.join(\"child1\").join(\"child2\");\n        assert!(!nested_child_dir.exists());\n        create_dir_all(&nested_child_dir).unwrap();\n        assert!(nested_child_dir.exists());\n\n        let file_path = existing_dir_path.join(\"file\");\n        std::fs::write(&file_path, b\"\").unwrap();\n\n        let err = create_dir_all(&file_path).unwrap_err();\n        assert_eq!(err.kind(), io::ErrorKind::AlreadyExists);\n\n        let invalid_dir_path = file_path.join(\"folder\");\n        create_dir_all(invalid_dir_path).unwrap_err();\n    }\n\n    #[test]\n    fn test_path_with_suffix_extension() {\n        let p = Utf8PathBuf::from(\"/foo/bar\");\n        assert_eq!(\n            &path_with_suffix_extension(p, \"temp\").to_string(),\n            \"/foo/bar.temp\"\n        );\n        let p = Utf8PathBuf::from(\"/foo/bar\");\n        assert_eq!(\n            &path_with_suffix_extension(p, \"temp.temp\").to_string(),\n            \"/foo/bar.temp.temp\"\n        );\n        let p = Utf8PathBuf::from(\"/foo/bar.baz\");\n        assert_eq!(\n            &path_with_suffix_extension(p, \"temp.temp\").to_string(),\n            \"/foo/bar.baz.temp.temp\"\n        );\n        let p = Utf8PathBuf::from(\"/foo/bar.baz\");\n        assert_eq!(\n            &path_with_suffix_extension(p, \".temp\").to_string(),\n            \"/foo/bar.baz..temp\"\n        );\n        let p = Utf8PathBuf::from(\"/foo/bar/dir/\");\n        assert_eq!(\n            &path_with_suffix_extension(p, \".temp\").to_string(),\n            \"/foo/bar/dir..temp\"\n        );\n    }\n}\n"
  },
  {
    "path": "libs/utils/src/elapsed_accum.rs",
    "content": "use std::time::{Duration, Instant};\n\n#[derive(Default)]\npub struct ElapsedAccum {\n    accum: Duration,\n}\n\nimpl ElapsedAccum {\n    pub fn get(&self) -> Duration {\n        self.accum\n    }\n    pub fn guard(&mut self) -> impl Drop + '_ {\n        let start = Instant::now();\n        scopeguard::guard(start, |last_wait_at| {\n            self.accum += Instant::now() - last_wait_at;\n        })\n    }\n\n    pub async fn measure<Fut, O>(&mut self, fut: Fut) -> O\n    where\n        Fut: Future<Output = O>,\n    {\n        let _guard = self.guard();\n        fut.await\n    }\n}\n"
  },
  {
    "path": "libs/utils/src/env.rs",
    "content": "//! Wrapper around `std::env::var` for parsing environment variables.\n\nuse std::fmt::Display;\nuse std::str::FromStr;\n\n/// For types `V` that implement [`FromStr`].\npub fn var<V, E>(varname: &str) -> Option<V>\nwhere\n    V: FromStr<Err = E>,\n    E: Display,\n{\n    match std::env::var(varname) {\n        Ok(s) => Some(\n            s.parse()\n                .map_err(|e| {\n                    format!(\"failed to parse env var {varname} using FromStr::parse: {e:#}\")\n                })\n                .unwrap(),\n        ),\n        Err(std::env::VarError::NotPresent) => None,\n        Err(std::env::VarError::NotUnicode(_)) => {\n            panic!(\"env var {varname} is not unicode\")\n        }\n    }\n}\n\n/// For types `V` that implement [`serde::de::DeserializeOwned`].\npub fn var_serde_json_string<V>(varname: &str) -> Option<V>\nwhere\n    V: serde::de::DeserializeOwned,\n{\n    match std::env::var(varname) {\n        Ok(s) => Some({\n            let value = serde_json::Value::String(s);\n            serde_json::from_value(value)\n                .map_err(|e| {\n                    format!(\"failed to parse env var {varname} as a serde_json json string: {e:#}\")\n                })\n                .unwrap()\n        }),\n        Err(std::env::VarError::NotPresent) => None,\n        Err(std::env::VarError::NotUnicode(_)) => {\n            panic!(\"env var {varname} is not unicode\")\n        }\n    }\n}\n\n/* BEGIN_HADRON */\npub enum DeploymentMode {\n    Local,\n    Dev,\n    Staging,\n    Prod,\n}\n\npub fn get_deployment_mode() -> Option<DeploymentMode> {\n    match std::env::var(\"DEPLOYMENT_MODE\") {\n        Ok(env) => match env.as_str() {\n            \"development\" => Some(DeploymentMode::Dev),\n            \"staging\" => Some(DeploymentMode::Staging),\n            \"production\" => Some(DeploymentMode::Prod),\n            _ => {\n                tracing::error!(\"Unexpected DEPLOYMENT_MODE: {}\", env);\n                None\n            }\n        },\n        Err(_) => {\n            // tracing::error!(\"DEPLOYMENT_MODE not set\");\n            None\n        }\n    }\n}\n\npub fn is_dev_or_staging() -> bool {\n    matches!(\n        get_deployment_mode(),\n        Some(DeploymentMode::Dev) | Some(DeploymentMode::Staging)\n    )\n}\n\npub enum TestingMode {\n    Chaos,\n    Stress,\n}\n\npub fn get_test_mode() -> Option<TestingMode> {\n    match std::env::var(\"HADRON_TEST_MODE\") {\n        Ok(env) => match env.as_str() {\n            \"chaos\" => Some(TestingMode::Chaos),\n            \"stress\" => Some(TestingMode::Stress),\n            _ => {\n                tracing::error!(\"Unexpected HADRON_TEST_MODE: {}\", env);\n                None\n            }\n        },\n        Err(_) => {\n            tracing::error!(\"HADRON_TEST_MODE not set\");\n            None\n        }\n    }\n}\n\npub fn is_chaos_testing() -> bool {\n    matches!(get_test_mode(), Some(TestingMode::Chaos))\n}\n/* END_HADRON */\n"
  },
  {
    "path": "libs/utils/src/error.rs",
    "content": "/// Create a reporter for an error that outputs similar to [`anyhow::Error`] with Display with alternative setting.\n///\n/// It can be used with `anyhow::Error` as well.\n///\n/// Why would one use this instead of converting to `anyhow::Error` on the spot? Because\n/// anyhow::Error would also capture a stacktrace on the spot, which you would later discard after\n/// formatting.\n///\n/// ## Usage\n///\n/// ```rust\n/// #[derive(Debug, thiserror::Error)]\n/// enum MyCoolError {\n///   #[error(\"should never happen\")]\n///   Bad(#[source] std::io::Error),\n/// }\n///\n/// # fn failing_call() -> Result<(), MyCoolError> { Err(MyCoolError::Bad(std::io::ErrorKind::PermissionDenied.into())) }\n///\n/// # fn main() {\n/// use utils::error::report_compact_sources;\n///\n/// if let Err(e) = failing_call() {\n///     let e = report_compact_sources(&e);\n///     assert_eq!(format!(\"{e}\"), \"should never happen: permission denied\");\n/// }\n/// # }\n/// ```\n///\n/// ## TODO\n///\n/// When we are able to describe return position impl trait in traits, this should of course be an\n/// extension trait. Until then avoid boxing with this more ackward interface.\npub fn report_compact_sources<E: std::error::Error>(e: &E) -> impl std::fmt::Display + '_ {\n    struct AnyhowDisplayAlternateAlike<'a, E>(&'a E);\n\n    impl<E: std::error::Error> std::fmt::Display for AnyhowDisplayAlternateAlike<'_, E> {\n        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n            write!(f, \"{}\", self.0)?;\n\n            // why is E a generic parameter here? hope that rustc will see through a default\n            // Error::source implementation and leave the following out if there cannot be any\n            // sources:\n            Sources(self.0.source()).try_for_each(|src| write!(f, \": {src}\"))\n        }\n    }\n\n    struct Sources<'a>(Option<&'a (dyn std::error::Error + 'static)>);\n\n    impl<'a> Iterator for Sources<'a> {\n        type Item = &'a (dyn std::error::Error + 'static);\n\n        fn next(&mut self) -> Option<Self::Item> {\n            let rem = self.0;\n\n            let next = self.0.and_then(|x| x.source());\n            self.0 = next;\n            rem\n        }\n    }\n\n    AnyhowDisplayAlternateAlike(e)\n}\n\n#[cfg(test)]\nmod tests {\n    use super::report_compact_sources;\n\n    #[test]\n    fn report_compact_sources_examples() {\n        use std::fmt::Write;\n\n        #[derive(Debug, thiserror::Error)]\n        enum EvictionError {\n            #[error(\"cannot evict a remote layer\")]\n            CannotEvictRemoteLayer,\n            #[error(\"stat failed\")]\n            StatFailed(#[source] std::io::Error),\n            #[error(\"layer was no longer part of LayerMap\")]\n            LayerNotFound(#[source] anyhow::Error),\n        }\n\n        let examples = [\n            (\n                line!(),\n                EvictionError::CannotEvictRemoteLayer,\n                \"cannot evict a remote layer\",\n            ),\n            (\n                line!(),\n                EvictionError::StatFailed(std::io::ErrorKind::PermissionDenied.into()),\n                \"stat failed: permission denied\",\n            ),\n            (\n                line!(),\n                EvictionError::LayerNotFound(anyhow::anyhow!(\"foobar\")),\n                \"layer was no longer part of LayerMap: foobar\",\n            ),\n        ];\n\n        let mut s = String::new();\n\n        for (line, example, expected) in examples {\n            s.clear();\n\n            write!(s, \"{}\", report_compact_sources(&example)).expect(\"string grows\");\n\n            assert_eq!(s, expected, \"example on line {line}\");\n        }\n    }\n}\n"
  },
  {
    "path": "libs/utils/src/failpoint_support.rs",
    "content": "//! Failpoint support code shared between pageserver and safekeepers.\n\nuse tokio_util::sync::CancellationToken;\n\n/// Declare a failpoint that can use to `pause` failpoint action.\n/// We don't want to block the executor thread, hence, spawn_blocking + await.\n///\n/// Optionally pass a cancellation token, and this failpoint will drop out of\n/// its pause when the cancellation token fires. This is useful for testing\n/// cases where we would like to block something, but test its clean shutdown behavior.\n/// The macro evaluates to a Result in that case, where Ok(()) is the case\n/// where the failpoint was not paused, and Err() is the case where cancellation\n/// token fired while evaluating the failpoint.\n///\n/// Remember to unpause the failpoint in the test; until that happens, one of the\n/// limited number of spawn_blocking thread pool threads is leaked.\n#[macro_export]\nmacro_rules! pausable_failpoint {\n    ($name:literal) => {{\n        if cfg!(feature = \"testing\") {\n            let cancel = ::tokio_util::sync::CancellationToken::new();\n            let _ = $crate::pausable_failpoint!($name, &cancel);\n        }\n    }};\n    ($name:literal, $cancel:expr) => {{\n        if cfg!(feature = \"testing\") {\n            let failpoint_fut = ::tokio::task::spawn_blocking({\n                let current = ::tracing::Span::current();\n                move || {\n                    let _entered = current.entered();\n                    ::tracing::info!(\"at failpoint {}\", $name);\n                    ::fail::fail_point!($name);\n                }\n            });\n            let cancel_fut = async move {\n                $cancel.cancelled().await;\n            };\n            ::tokio::select! {\n                res = failpoint_fut => {\n                    res.expect(\"spawn_blocking\");\n                    // continue with execution\n                    Ok(())\n                },\n                _ = cancel_fut => {\n                    Err(())\n                }\n            }\n        } else {\n            Ok(())\n        }\n    }};\n}\n\npub use pausable_failpoint;\n\n/// use with fail::cfg(\"$name\", \"return(2000)\")\n///\n/// The effect is similar to a \"sleep(2000)\" action, i.e. we sleep for the\n/// specified time (in milliseconds). The main difference is that we use async\n/// tokio sleep function. Another difference is that we print lines to the log,\n/// which can be useful in tests to check that the failpoint was hit.\n///\n/// Optionally pass a cancellation token, and this failpoint will drop out of\n/// its sleep when the cancellation token fires.  This is useful for testing\n/// cases where we would like to block something, but test its clean shutdown behavior.\n#[macro_export]\nmacro_rules! __failpoint_sleep_millis_async {\n    ($name:literal) => {{\n        // If the failpoint is used with a \"return\" action, set should_sleep to the\n        // returned value (as string). Otherwise it's set to None.\n        let should_sleep = (|| {\n            ::fail::fail_point!($name, |x| x);\n            ::std::option::Option::None\n        })();\n\n        // Sleep if the action was a returned value\n        if let ::std::option::Option::Some(duration_str) = should_sleep {\n            $crate::failpoint_support::failpoint_sleep_helper($name, duration_str).await\n        }\n    }};\n    ($name:literal, $cancel:expr) => {{\n        // If the failpoint is used with a \"return\" action, set should_sleep to the\n        // returned value (as string). Otherwise it's set to None.\n        let should_sleep = (|| {\n            ::fail::fail_point!($name, |x| x);\n            ::std::option::Option::None\n        })();\n\n        // Sleep if the action was a returned value\n        if let ::std::option::Option::Some(duration_str) = should_sleep {\n            $crate::failpoint_support::failpoint_sleep_cancellable_helper(\n                $name,\n                duration_str,\n                $cancel,\n            )\n            .await\n        }\n    }};\n}\npub use __failpoint_sleep_millis_async as sleep_millis_async;\n\n// Helper function used by the macro. (A function has nicer scoping so we\n// don't need to decorate everything with \"::\")\n#[doc(hidden)]\npub async fn failpoint_sleep_helper(name: &'static str, duration_str: String) {\n    let millis = duration_str.parse::<u64>().unwrap();\n    let d = std::time::Duration::from_millis(millis);\n\n    tracing::info!(\"failpoint {:?}: sleeping for {:?}\", name, d);\n    tokio::time::sleep(d).await;\n    tracing::info!(\"failpoint {:?}: sleep done\", name);\n}\n\n// Helper function used by the macro. (A function has nicer scoping so we\n// don't need to decorate everything with \"::\")\n#[doc(hidden)]\npub async fn failpoint_sleep_cancellable_helper(\n    name: &'static str,\n    duration_str: String,\n    cancel: &CancellationToken,\n) {\n    let millis = duration_str.parse::<u64>().unwrap();\n    let d = std::time::Duration::from_millis(millis);\n\n    tracing::info!(\"failpoint {:?}: sleeping for {:?}\", name, d);\n    tokio::time::timeout(d, cancel.cancelled()).await.ok();\n    tracing::info!(\"failpoint {:?}: sleep done\", name);\n}\n\n/// Initialize the configured failpoints\n///\n/// You must call this function before any concurrent threads do operations.\npub fn init() -> fail::FailScenario<'static> {\n    // The failpoints lib provides support for parsing the `FAILPOINTS` env var.\n    // We want non-default behavior for `exit`, though, so, we handle it separately.\n    //\n    // Format for FAILPOINTS is \"name=actions\" separated by \";\".\n    let actions = std::env::var(\"FAILPOINTS\");\n    if actions.is_ok() {\n        // SAFETY: this function should before any threads start and access env vars concurrently\n        unsafe {\n            std::env::remove_var(\"FAILPOINTS\");\n        }\n    } else {\n        // let the library handle non-utf8, or nothing for not present\n    }\n\n    let scenario = fail::FailScenario::setup();\n\n    if let Ok(val) = actions {\n        val.split(';')\n            .enumerate()\n            .map(|(i, s)| s.split_once('=').ok_or((i, s)))\n            .for_each(|res| {\n                let (name, actions) = match res {\n                    Ok(t) => t,\n                    Err((i, s)) => {\n                        panic!(\n                            \"startup failpoints: missing action on the {}th failpoint; try `{s}=return`\",\n                            i + 1,\n                        );\n                    }\n                };\n                if let Err(e) = apply_failpoint(name, actions) {\n                    panic!(\"startup failpoints: failed to apply failpoint {name}={actions}: {e}\");\n                }\n            });\n    }\n\n    scenario\n}\n\npub fn apply_failpoint(name: &str, actions: &str) -> Result<(), String> {\n    if actions == \"exit\" {\n        fail::cfg_callback(name, exit_failpoint)\n    } else {\n        fail::cfg(name, actions)\n    }\n}\n\n#[inline(never)]\nfn exit_failpoint() {\n    tracing::info!(\"Exit requested by failpoint\");\n    std::process::exit(1);\n}\n"
  },
  {
    "path": "libs/utils/src/fs_ext/rename_noreplace.rs",
    "content": "use nix::NixPath;\n\n/// Rename a file without replacing an existing file.\n///\n/// This is a wrapper around platform-specific APIs.\npub fn rename_noreplace<P1: ?Sized + NixPath, P2: ?Sized + NixPath>(\n    src: &P1,\n    dst: &P2,\n) -> nix::Result<()> {\n    {\n        #[cfg(all(target_os = \"linux\", target_env = \"gnu\"))]\n        {\n            nix::fcntl::renameat2(\n                nix::fcntl::AT_FDCWD,\n                src,\n                nix::fcntl::AT_FDCWD,\n                dst,\n                nix::fcntl::RenameFlags::RENAME_NOREPLACE,\n            )\n        }\n        #[cfg(target_os = \"macos\")]\n        {\n            let res = src.with_nix_path(|src| {\n                dst.with_nix_path(|dst|\n                    // SAFETY: `src` and `dst` are valid C strings as per the NixPath trait and they outlive the call to renamex_np.\n                    unsafe {\n                        nix::libc::renamex_np(src.as_ptr(), dst.as_ptr(), nix::libc::RENAME_EXCL)\n                })\n            })??;\n            nix::errno::Errno::result(res).map(drop)\n        }\n        #[cfg(not(any(all(target_os = \"linux\", target_env = \"gnu\"), target_os = \"macos\")))]\n        {\n            std::compile_error!(\"OS does not support no-replace renames\");\n        }\n    }\n}\n\n#[cfg(test)]\nmod test {\n    use std::fs;\n    use std::path::PathBuf;\n\n    use super::*;\n\n    fn testdir() -> camino_tempfile::Utf8TempDir {\n        match crate::env::var(\"NEON_UTILS_RENAME_NOREPLACE_TESTDIR\") {\n            Some(path) => {\n                let path: camino::Utf8PathBuf = path;\n                camino_tempfile::tempdir_in(path).unwrap()\n            }\n            None => camino_tempfile::tempdir().unwrap(),\n        }\n    }\n\n    #[test]\n    fn test_absolute_paths() {\n        let testdir = testdir();\n        println!(\"testdir: {}\", testdir.path());\n\n        let src = testdir.path().join(\"src\");\n        let dst = testdir.path().join(\"dst\");\n\n        fs::write(&src, b\"\").unwrap();\n        fs::write(&dst, b\"\").unwrap();\n\n        let src = src.canonicalize().unwrap();\n        assert!(src.is_absolute());\n        let dst = dst.canonicalize().unwrap();\n        assert!(dst.is_absolute());\n\n        let result = rename_noreplace(&src, &dst);\n        assert_eq!(result.unwrap_err(), nix::Error::EEXIST);\n    }\n\n    #[test]\n    fn test_relative_paths() {\n        let testdir = testdir();\n        println!(\"testdir: {}\", testdir.path());\n\n        // this is fine because we run in nextest => process per test\n        std::env::set_current_dir(testdir.path()).unwrap();\n\n        let src = PathBuf::from(\"src\");\n        let dst = PathBuf::from(\"dst\");\n\n        fs::write(&src, b\"\").unwrap();\n        fs::write(&dst, b\"\").unwrap();\n\n        let result = rename_noreplace(&src, &dst);\n        assert_eq!(result.unwrap_err(), nix::Error::EEXIST);\n    }\n\n    #[test]\n    fn test_works_when_not_exists() {\n        let testdir = testdir();\n        println!(\"testdir: {}\", testdir.path());\n\n        let src = testdir.path().join(\"src\");\n        let dst = testdir.path().join(\"dst\");\n\n        fs::write(&src, b\"content\").unwrap();\n\n        rename_noreplace(src.as_std_path(), dst.as_std_path()).unwrap();\n        assert_eq!(\n            \"content\",\n            String::from_utf8(std::fs::read(&dst).unwrap()).unwrap()\n        );\n    }\n}\n"
  },
  {
    "path": "libs/utils/src/fs_ext.rs",
    "content": "/// Extensions to `std::fs` types.\nuse std::{fs, io, path::Path};\n\nuse anyhow::Context;\n\n#[cfg(feature = \"rename_noreplace\")]\nmod rename_noreplace;\n#[cfg(feature = \"rename_noreplace\")]\npub use rename_noreplace::rename_noreplace;\n\npub trait PathExt {\n    /// Returns an error if `self` is not a directory.\n    fn is_empty_dir(&self) -> io::Result<bool>;\n}\n\nimpl<P> PathExt for P\nwhere\n    P: AsRef<Path>,\n{\n    fn is_empty_dir(&self) -> io::Result<bool> {\n        Ok(fs::read_dir(self)?.next().is_none())\n    }\n}\n\npub async fn is_directory_empty(path: impl AsRef<Path>) -> anyhow::Result<bool> {\n    let mut dir = tokio::fs::read_dir(&path)\n        .await\n        .context(format!(\"read_dir({})\", path.as_ref().display()))?;\n    Ok(dir.next_entry().await?.is_none())\n}\n\npub async fn list_dir(path: impl AsRef<Path>) -> anyhow::Result<Vec<String>> {\n    let mut dir = tokio::fs::read_dir(&path)\n        .await\n        .context(format!(\"read_dir({})\", path.as_ref().display()))?;\n\n    let mut content = vec![];\n    while let Some(next) = dir.next_entry().await? {\n        let file_name = next.file_name();\n        content.push(file_name.to_string_lossy().to_string());\n    }\n\n    Ok(content)\n}\n\npub fn ignore_not_found(e: io::Error) -> io::Result<()> {\n    if e.kind() == io::ErrorKind::NotFound {\n        Ok(())\n    } else {\n        Err(e)\n    }\n}\n\npub fn ignore_absent_files<F>(fs_operation: F) -> io::Result<()>\nwhere\n    F: Fn() -> io::Result<()>,\n{\n    fs_operation().or_else(ignore_not_found)\n}\n\n#[cfg(test)]\nmod test {\n    use super::ignore_absent_files;\n    use crate::fs_ext::{is_directory_empty, list_dir};\n\n    #[test]\n    fn is_empty_dir() {\n        use super::PathExt;\n\n        let dir = camino_tempfile::tempdir().unwrap();\n        let dir_path = dir.path();\n\n        // test positive case\n        assert!(\n            dir_path.is_empty_dir().expect(\"test failure\"),\n            \"new tempdir should be empty\"\n        );\n\n        // invoke on a file to ensure it returns an error\n        let file_path = dir_path.join(\"testfile\");\n        let f = std::fs::File::create(&file_path).unwrap();\n        drop(f);\n        assert!(file_path.is_empty_dir().is_err());\n\n        // do it again on a path, we know to be nonexistent\n        std::fs::remove_file(&file_path).unwrap();\n        assert!(file_path.is_empty_dir().is_err());\n    }\n\n    #[tokio::test]\n    async fn is_empty_dir_async() {\n        let dir = camino_tempfile::tempdir().unwrap();\n        let dir_path = dir.path();\n\n        // test positive case\n        assert!(\n            is_directory_empty(dir_path).await.expect(\"test failure\"),\n            \"new tempdir should be empty\"\n        );\n\n        // invoke on a file to ensure it returns an error\n        let file_path = dir_path.join(\"testfile\");\n        let f = std::fs::File::create(&file_path).unwrap();\n        drop(f);\n        assert!(is_directory_empty(&file_path).await.is_err());\n\n        // do it again on a path, we know to be nonexistent\n        std::fs::remove_file(&file_path).unwrap();\n        assert!(is_directory_empty(file_path).await.is_err());\n    }\n\n    #[test]\n    fn ignore_absent_files_works() {\n        let dir = camino_tempfile::tempdir().unwrap();\n\n        let file_path = dir.path().join(\"testfile\");\n\n        ignore_absent_files(|| std::fs::remove_file(&file_path)).expect(\"should execute normally\");\n\n        let f = std::fs::File::create(&file_path).unwrap();\n        drop(f);\n\n        ignore_absent_files(|| std::fs::remove_file(&file_path)).expect(\"should execute normally\");\n\n        assert!(!file_path.exists());\n    }\n\n    #[tokio::test]\n    async fn list_dir_works() {\n        let dir = camino_tempfile::tempdir().unwrap();\n        let dir_path = dir.path();\n\n        assert!(list_dir(dir_path).await.unwrap().is_empty());\n\n        let file_path = dir_path.join(\"testfile\");\n        let _ = std::fs::File::create(&file_path).unwrap();\n\n        assert_eq!(&list_dir(dir_path).await.unwrap(), &[\"testfile\"]);\n\n        let another_dir_path = dir_path.join(\"testdir\");\n        std::fs::create_dir(another_dir_path).unwrap();\n\n        let expected = &[\"testdir\", \"testfile\"];\n        let mut actual = list_dir(dir_path).await.unwrap();\n        actual.sort();\n        assert_eq!(actual, expected);\n    }\n}\n"
  },
  {
    "path": "libs/utils/src/generation.rs",
    "content": "use std::fmt::Debug;\n\nuse serde::{Deserialize, Serialize};\n\n/// Tenant generations are used to provide split-brain safety and allow\n/// multiple pageservers to attach the same tenant concurrently.\n///\n/// See docs/rfcs/025-generation-numbers.md for detail on how generation\n/// numbers are used.\n#[derive(Copy, Clone, Eq, PartialEq, PartialOrd, Ord, Hash)]\npub enum Generation {\n    // The None Generation is used in the metadata of layers written before generations were\n    // introduced.  A running Tenant always has a valid generation, but the layer metadata may\n    // include None generations.\n    None,\n\n    Valid(u32),\n}\n\n/// The Generation type represents a number associated with a Tenant, which\n/// increments every time the tenant is attached to a new pageserver, or\n/// an attached pageserver restarts.\n///\n/// It is included as a suffix in S3 keys, as a protection against split-brain\n/// scenarios where pageservers might otherwise issue conflicting writes to\n/// remote storage\nimpl Generation {\n    pub const MAX: Self = Self::Valid(u32::MAX);\n\n    /// Create a new Generation that represents a legacy key format with\n    /// no generation suffix\n    pub fn none() -> Self {\n        Self::None\n    }\n\n    pub const fn new(v: u32) -> Self {\n        Self::Valid(v)\n    }\n\n    pub fn is_none(&self) -> bool {\n        matches!(self, Self::None)\n    }\n\n    #[track_caller]\n    pub fn get_suffix(&self) -> impl std::fmt::Display {\n        match self {\n            Self::Valid(v) => GenerationFileSuffix(Some(*v)),\n            Self::None => GenerationFileSuffix(None),\n        }\n    }\n\n    /// `suffix` is the part after \"-\" in a key\n    ///\n    /// Returns None if parsing was unsuccessful\n    pub fn parse_suffix(suffix: &str) -> Option<Generation> {\n        u32::from_str_radix(suffix, 16).map(Generation::new).ok()\n    }\n\n    #[track_caller]\n    pub fn previous(&self) -> Generation {\n        match self {\n            Self::Valid(n) => {\n                if *n == 0 {\n                    // Since a tenant may be upgraded from a pre-generations state, interpret the \"previous\" generation\n                    // to 0 as being \"no generation\".\n                    Self::None\n                } else {\n                    Self::Valid(n - 1)\n                }\n            }\n            Self::None => Self::None,\n        }\n    }\n\n    #[track_caller]\n    pub fn next(&self) -> Generation {\n        match self {\n            Self::Valid(n) => Self::Valid(*n + 1),\n            Self::None => Self::Valid(1),\n        }\n    }\n\n    pub fn into(self) -> Option<u32> {\n        if let Self::Valid(v) = self {\n            Some(v)\n        } else {\n            None\n        }\n    }\n}\n\nstruct GenerationFileSuffix(Option<u32>);\n\nimpl std::fmt::Display for GenerationFileSuffix {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        if let Some(g) = self.0 {\n            write!(f, \"-{g:08x}\")\n        } else {\n            Ok(())\n        }\n    }\n}\n\nimpl Serialize for Generation {\n    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>\n    where\n        S: serde::Serializer,\n    {\n        if let Self::Valid(v) = self {\n            v.serialize(serializer)\n        } else {\n            // We should never be asked to serialize a None. Structures\n            // that include an optional generation should convert None to an\n            // Option<Generation>::None\n            Err(serde::ser::Error::custom(format!(\n                \"Tried to serialize invalid generation ({self:?})\"\n            )))\n        }\n    }\n}\n\nimpl<'de> Deserialize<'de> for Generation {\n    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>\n    where\n        D: serde::Deserializer<'de>,\n    {\n        Ok(Self::Valid(u32::deserialize(deserializer)?))\n    }\n}\n\n// We intentionally do not implement Display for Generation, to reduce the\n// risk of a bug where the generation is used in a format!() string directly\n// instead of using get_suffix().\nimpl Debug for Generation {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        match self {\n            Self::Valid(v) => {\n                write!(f, \"{v:08x}\")\n            }\n            Self::None => {\n                write!(f, \"<none>\")\n            }\n        }\n    }\n}\n\n#[cfg(test)]\nmod test {\n    use super::*;\n\n    #[test]\n    fn generation_gt() {\n        // Important that a None generation compares less than a valid one, during upgrades from\n        // pre-generation systems.\n        assert!(Generation::none() < Generation::new(0));\n        assert!(Generation::none() < Generation::new(1));\n    }\n\n    #[test]\n    fn suffix_is_stable() {\n        use std::fmt::Write as _;\n\n        // the suffix must remain stable through-out the pageserver remote storage evolution and\n        // not be changed accidentially without thinking about migration\n        let examples = [\n            (line!(), Generation::None, \"\"),\n            (line!(), Generation::Valid(0), \"-00000000\"),\n            (line!(), Generation::Valid(u32::MAX), \"-ffffffff\"),\n        ];\n\n        let mut s = String::new();\n        for (line, gen_, expected) in examples {\n            s.clear();\n            write!(s, \"{}\", &gen_.get_suffix()).expect(\"string grows\");\n            assert_eq!(s, expected, \"example on {line}\");\n        }\n    }\n}\n"
  },
  {
    "path": "libs/utils/src/guard_arc_swap.rs",
    "content": "//! A wrapper around `ArcSwap` that ensures there is only one writer at a time and writes\n//! don't block reads.\n\nuse std::sync::Arc;\n\nuse arc_swap::ArcSwap;\nuse tokio::sync::TryLockError;\n\npub struct GuardArcSwap<T> {\n    inner: ArcSwap<T>,\n    guard: tokio::sync::Mutex<()>,\n}\n\npub struct Guard<'a, T> {\n    _guard: tokio::sync::MutexGuard<'a, ()>,\n    inner: &'a ArcSwap<T>,\n}\n\nimpl<T> GuardArcSwap<T> {\n    pub fn new(inner: T) -> Self {\n        Self {\n            inner: ArcSwap::new(Arc::new(inner)),\n            guard: tokio::sync::Mutex::new(()),\n        }\n    }\n\n    pub fn read(&self) -> Arc<T> {\n        self.inner.load_full()\n    }\n\n    pub async fn write_guard(&self) -> Guard<'_, T> {\n        Guard {\n            _guard: self.guard.lock().await,\n            inner: &self.inner,\n        }\n    }\n\n    pub fn try_write_guard(&self) -> Result<Guard<'_, T>, TryLockError> {\n        let guard = self.guard.try_lock()?;\n        Ok(Guard {\n            _guard: guard,\n            inner: &self.inner,\n        })\n    }\n}\n\nimpl<T> Guard<'_, T> {\n    pub fn read(&self) -> Arc<T> {\n        self.inner.load_full()\n    }\n\n    pub fn write(&mut self, value: T) {\n        self.inner.store(Arc::new(value));\n    }\n}\n"
  },
  {
    "path": "libs/utils/src/hex.rs",
    "content": "/// Useful type for asserting that expected bytes match reporting the bytes more readable\n/// array-syntax compatible hex bytes.\n///\n/// # Usage\n///\n/// ```\n/// use utils::Hex;\n///\n/// let actual = serialize_something();\n/// let expected = [0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x77, 0x6f, 0x72, 0x6c, 0x64];\n///\n/// // the type implements PartialEq and on mismatch, both sides are printed in 16 wide multiline\n/// // output suffixed with an array style length for easier comparisons.\n/// assert_eq!(Hex(&actual), Hex(&expected));\n///\n/// // with `let expected = [0x68];` the error would had been:\n/// // assertion `left == right` failed\n/// //  left: [0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x77, 0x6f, 0x72, 0x6c, 0x64; 11]\n/// // right: [0x68; 1]\n/// # fn serialize_something() -> Vec<u8> { \"hello world\".as_bytes().to_vec() }\n/// ```\npub struct Hex<S>(pub S);\n\nimpl<S: AsRef<[u8]>> std::fmt::Debug for Hex<S> {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        write!(f, \"[\")?;\n        let chunks = self.0.as_ref().chunks(16);\n        for (i, c) in chunks.enumerate() {\n            if i > 0 && !c.is_empty() {\n                writeln!(f, \", \")?;\n            }\n            for (j, b) in c.iter().enumerate() {\n                if j > 0 {\n                    write!(f, \", \")?;\n                }\n                write!(f, \"0x{b:02x}\")?;\n            }\n        }\n        write!(f, \"; {}]\", self.0.as_ref().len())\n    }\n}\n\nimpl<R: AsRef<[u8]>, L: AsRef<[u8]>> PartialEq<Hex<R>> for Hex<L> {\n    fn eq(&self, other: &Hex<R>) -> bool {\n        let left = self.0.as_ref();\n        let right = other.0.as_ref();\n\n        left == right\n    }\n}\n"
  },
  {
    "path": "libs/utils/src/id.rs",
    "content": "use std::fmt;\nuse std::num::ParseIntError;\nuse std::str::FromStr;\n\nuse anyhow::Context;\nuse hex::FromHex;\nuse rand::Rng;\nuse serde::de::Visitor;\nuse serde::{Deserialize, Serialize};\nuse thiserror::Error;\n\n#[derive(Error, Debug)]\npub enum IdError {\n    #[error(\"invalid id length {0}\")]\n    SliceParseError(usize),\n}\n\n/// Neon ID is a 128-bit random ID.\n/// Used to represent various identifiers. Provides handy utility methods and impls.\n///\n/// NOTE: It (de)serializes as an array of hex bytes, so the string representation would look\n/// like `[173,80,132,115,129,226,72,254,170,201,135,108,199,26,228,24]`.\n#[derive(Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]\nstruct Id([u8; 16]);\n\nimpl Serialize for Id {\n    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>\n    where\n        S: serde::Serializer,\n    {\n        if serializer.is_human_readable() {\n            serializer.collect_str(self)\n        } else {\n            self.0.serialize(serializer)\n        }\n    }\n}\n\nimpl<'de> Deserialize<'de> for Id {\n    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>\n    where\n        D: serde::Deserializer<'de>,\n    {\n        struct IdVisitor {\n            is_human_readable_deserializer: bool,\n        }\n\n        impl<'de> Visitor<'de> for IdVisitor {\n            type Value = Id;\n\n            fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {\n                if self.is_human_readable_deserializer {\n                    formatter.write_str(\"value in form of hex string\")\n                } else {\n                    formatter.write_str(\"value in form of integer array([u8; 16])\")\n                }\n            }\n\n            fn visit_seq<A>(self, seq: A) -> Result<Self::Value, A::Error>\n            where\n                A: serde::de::SeqAccess<'de>,\n            {\n                let s = serde::de::value::SeqAccessDeserializer::new(seq);\n                let id: [u8; 16] = Deserialize::deserialize(s)?;\n                Ok(Id::from(id))\n            }\n\n            fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>\n            where\n                E: serde::de::Error,\n            {\n                Id::from_str(v).map_err(E::custom)\n            }\n        }\n\n        if deserializer.is_human_readable() {\n            deserializer.deserialize_str(IdVisitor {\n                is_human_readable_deserializer: true,\n            })\n        } else {\n            deserializer.deserialize_tuple(\n                16,\n                IdVisitor {\n                    is_human_readable_deserializer: false,\n                },\n            )\n        }\n    }\n}\n\nimpl Id {\n    pub fn from_slice(src: &[u8]) -> Result<Id, IdError> {\n        if src.len() != 16 {\n            return Err(IdError::SliceParseError(src.len()));\n        }\n        let mut id_array = [0u8; 16];\n        id_array.copy_from_slice(src);\n        Ok(id_array.into())\n    }\n\n    pub fn as_arr(&self) -> [u8; 16] {\n        self.0\n    }\n\n    pub fn generate() -> Self {\n        let mut tli_buf = [0u8; 16];\n        rand::rng().fill(&mut tli_buf);\n        Id::from(tli_buf)\n    }\n\n    fn hex_encode(&self) -> String {\n        static HEX: &[u8] = b\"0123456789abcdef\";\n\n        let mut buf = vec![0u8; self.0.len() * 2];\n        for (&b, chunk) in self.0.as_ref().iter().zip(buf.chunks_exact_mut(2)) {\n            chunk[0] = HEX[((b >> 4) & 0xf) as usize];\n            chunk[1] = HEX[(b & 0xf) as usize];\n        }\n\n        // SAFETY: vec constructed out of `HEX`, it can only be ascii\n        unsafe { String::from_utf8_unchecked(buf) }\n    }\n}\n\nimpl FromStr for Id {\n    type Err = hex::FromHexError;\n\n    fn from_str(s: &str) -> Result<Id, Self::Err> {\n        Self::from_hex(s)\n    }\n}\n\n// this is needed for pretty serialization and deserialization of Id's using serde integration with hex crate\nimpl FromHex for Id {\n    type Error = hex::FromHexError;\n\n    fn from_hex<T: AsRef<[u8]>>(hex: T) -> Result<Self, Self::Error> {\n        let mut buf: [u8; 16] = [0u8; 16];\n        hex::decode_to_slice(hex, &mut buf)?;\n        Ok(Id(buf))\n    }\n}\n\nimpl AsRef<[u8]> for Id {\n    fn as_ref(&self) -> &[u8] {\n        &self.0\n    }\n}\n\nimpl From<[u8; 16]> for Id {\n    fn from(b: [u8; 16]) -> Self {\n        Id(b)\n    }\n}\n\nimpl From<Id> for u128 {\n    fn from(id: Id) -> Self {\n        u128::from_le_bytes(id.0)\n    }\n}\n\nimpl fmt::Display for Id {\n    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {\n        f.write_str(&self.hex_encode())\n    }\n}\n\nimpl fmt::Debug for Id {\n    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {\n        f.write_str(&self.hex_encode())\n    }\n}\n\nmacro_rules! id_newtype {\n    ($t:ident) => {\n        impl $t {\n            pub fn from_slice(src: &[u8]) -> Result<$t, IdError> {\n                Ok($t(Id::from_slice(src)?))\n            }\n\n            pub fn as_arr(&self) -> [u8; 16] {\n                self.0.as_arr()\n            }\n\n            pub fn generate() -> Self {\n                $t(Id::generate())\n            }\n\n            pub const fn from_array(b: [u8; 16]) -> Self {\n                $t(Id(b))\n            }\n        }\n\n        impl FromStr for $t {\n            type Err = hex::FromHexError;\n\n            fn from_str(s: &str) -> Result<$t, Self::Err> {\n                let value = Id::from_str(s)?;\n                Ok($t(value))\n            }\n        }\n\n        impl From<[u8; 16]> for $t {\n            fn from(b: [u8; 16]) -> Self {\n                $t(Id::from(b))\n            }\n        }\n\n        impl FromHex for $t {\n            type Error = hex::FromHexError;\n\n            fn from_hex<T: AsRef<[u8]>>(hex: T) -> Result<Self, Self::Error> {\n                Ok($t(Id::from_hex(hex)?))\n            }\n        }\n\n        impl AsRef<[u8]> for $t {\n            fn as_ref(&self) -> &[u8] {\n                &self.0.0\n            }\n        }\n\n        impl From<$t> for u128 {\n            fn from(id: $t) -> Self {\n                u128::from(id.0)\n            }\n        }\n\n        impl fmt::Display for $t {\n            fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {\n                self.0.fmt(f)\n            }\n        }\n\n        impl fmt::Debug for $t {\n            fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {\n                self.0.fmt(f)\n            }\n        }\n    };\n}\n\n/// Neon timeline ID.\n///\n/// They are different from PostgreSQL timeline\n/// IDs, but serve a similar purpose: they differentiate\n/// between different \"histories\" of the same cluster.  However,\n/// PostgreSQL timeline IDs are a bit cumbersome, because they are only\n/// 32-bits wide, and they must be in ascending order in any given\n/// timeline history.  Those limitations mean that we cannot generate a\n/// new PostgreSQL timeline ID by just generating a random number. And\n/// that in turn is problematic for the \"pull/push\" workflow, where you\n/// have a local copy of a Neon repository, and you periodically sync\n/// the local changes with a remote server. When you work \"detached\"\n/// from the remote server, you cannot create a PostgreSQL timeline ID\n/// that's guaranteed to be different from all existing timelines in\n/// the remote server. For example, if two people are having a clone of\n/// the repository on their laptops, and they both create a new branch\n/// with different name. What timeline ID would they assign to their\n/// branches? If they pick the same one, and later try to push the\n/// branches to the same remote server, they will get mixed up.\n///\n/// To avoid those issues, Neon has its own concept of timelines that\n/// is separate from PostgreSQL timelines, and doesn't have those\n/// limitations. A Neon timeline is identified by a 128-bit ID, which\n/// is usually printed out as a hex string.\n///\n/// NOTE: It (de)serializes as an array of hex bytes, so the string representation would look\n/// like `[173,80,132,115,129,226,72,254,170,201,135,108,199,26,228,24]`.\n/// See [`Id`] for alternative ways to serialize it.\n#[derive(Clone, Copy, PartialEq, Eq, Hash, Ord, PartialOrd, Serialize, Deserialize)]\npub struct TimelineId(Id);\n\nid_newtype!(TimelineId);\n\nimpl TryFrom<Option<&str>> for TimelineId {\n    type Error = anyhow::Error;\n\n    fn try_from(value: Option<&str>) -> Result<Self, Self::Error> {\n        value\n            .unwrap_or_default()\n            .parse::<TimelineId>()\n            .with_context(|| format!(\"Could not parse timeline id from {value:?}\"))\n    }\n}\n\n/// Neon Tenant Id represents identifiar of a particular tenant.\n/// Is used for distinguishing requests and data belonging to different users.\n///\n/// NOTE: It (de)serializes as an array of hex bytes, so the string representation would look\n/// like `[173,80,132,115,129,226,72,254,170,201,135,108,199,26,228,24]`.\n/// See [`Id`] for alternative ways to serialize it.\n#[derive(Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, PartialOrd, Ord)]\npub struct TenantId(Id);\n\nid_newtype!(TenantId);\n\n/// If needed, reuse small string from proxy/src/types.rc\npub type EndpointId = String;\n\n// A pair uniquely identifying Neon instance.\n#[derive(Debug, Clone, Copy, PartialOrd, Ord, PartialEq, Eq, Hash, Serialize, Deserialize)]\npub struct TenantTimelineId {\n    pub tenant_id: TenantId,\n    pub timeline_id: TimelineId,\n}\n\nimpl TenantTimelineId {\n    pub fn new(tenant_id: TenantId, timeline_id: TimelineId) -> Self {\n        TenantTimelineId {\n            tenant_id,\n            timeline_id,\n        }\n    }\n\n    pub fn generate() -> Self {\n        Self::new(TenantId::generate(), TimelineId::generate())\n    }\n\n    pub fn empty() -> Self {\n        Self::new(TenantId::from([0u8; 16]), TimelineId::from([0u8; 16]))\n    }\n}\n\nimpl fmt::Display for TenantTimelineId {\n    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {\n        write!(f, \"{}/{}\", self.tenant_id, self.timeline_id)\n    }\n}\n\nimpl FromStr for TenantTimelineId {\n    type Err = anyhow::Error;\n\n    fn from_str(s: &str) -> Result<Self, Self::Err> {\n        let mut parts = s.split('/');\n        let tenant_id = parts\n            .next()\n            .ok_or_else(|| anyhow::anyhow!(\"TenantTimelineId must contain tenant_id\"))?\n            .parse()?;\n        let timeline_id = parts\n            .next()\n            .ok_or_else(|| anyhow::anyhow!(\"TenantTimelineId must contain timeline_id\"))?\n            .parse()?;\n        if parts.next().is_some() {\n            anyhow::bail!(\"TenantTimelineId must contain only tenant_id and timeline_id\");\n        }\n        Ok(TenantTimelineId::new(tenant_id, timeline_id))\n    }\n}\n\n// Unique ID of a storage node (safekeeper or pageserver). Supposed to be issued\n// by the console.\n#[derive(Clone, Copy, Eq, Ord, PartialEq, PartialOrd, Hash, Debug, Serialize, Deserialize)]\n#[serde(transparent)]\npub struct NodeId(pub u64);\n\nimpl fmt::Display for NodeId {\n    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {\n        write!(f, \"{}\", self.0)\n    }\n}\n\nimpl FromStr for NodeId {\n    type Err = ParseIntError;\n    fn from_str(s: &str) -> Result<Self, Self::Err> {\n        Ok(NodeId(u64::from_str(s)?))\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use serde_assert::{Deserializer, Serializer, Token, Tokens};\n\n    use super::*;\n    use crate::bin_ser::BeSer;\n\n    #[test]\n    fn test_id_serde_non_human_readable() {\n        let original_id = Id([\n            173, 80, 132, 115, 129, 226, 72, 254, 170, 201, 135, 108, 199, 26, 228, 24,\n        ]);\n        let expected_tokens = Tokens(vec![\n            Token::Tuple { len: 16 },\n            Token::U8(173),\n            Token::U8(80),\n            Token::U8(132),\n            Token::U8(115),\n            Token::U8(129),\n            Token::U8(226),\n            Token::U8(72),\n            Token::U8(254),\n            Token::U8(170),\n            Token::U8(201),\n            Token::U8(135),\n            Token::U8(108),\n            Token::U8(199),\n            Token::U8(26),\n            Token::U8(228),\n            Token::U8(24),\n            Token::TupleEnd,\n        ]);\n\n        let serializer = Serializer::builder().is_human_readable(false).build();\n        let serialized_tokens = original_id.serialize(&serializer).unwrap();\n        assert_eq!(serialized_tokens, expected_tokens);\n\n        let mut deserializer = Deserializer::builder()\n            .is_human_readable(false)\n            .tokens(serialized_tokens)\n            .build();\n        let deserialized_id = Id::deserialize(&mut deserializer).unwrap();\n        assert_eq!(deserialized_id, original_id);\n    }\n\n    #[test]\n    fn test_id_serde_human_readable() {\n        let original_id = Id([\n            173, 80, 132, 115, 129, 226, 72, 254, 170, 201, 135, 108, 199, 26, 228, 24,\n        ]);\n        let expected_tokens = Tokens(vec![Token::Str(String::from(\n            \"ad50847381e248feaac9876cc71ae418\",\n        ))]);\n\n        let serializer = Serializer::builder().is_human_readable(true).build();\n        let serialized_tokens = original_id.serialize(&serializer).unwrap();\n        assert_eq!(serialized_tokens, expected_tokens);\n\n        let mut deserializer = Deserializer::builder()\n            .is_human_readable(true)\n            .tokens(Tokens(vec![Token::Str(String::from(\n                \"ad50847381e248feaac9876cc71ae418\",\n            ))]))\n            .build();\n        assert_eq!(Id::deserialize(&mut deserializer).unwrap(), original_id);\n    }\n\n    macro_rules! roundtrip_type {\n        ($type:ty, $expected_bytes:expr) => {{\n            let expected_bytes: [u8; 16] = $expected_bytes;\n            let original_id = <$type>::from(expected_bytes);\n\n            let ser_bytes = original_id.ser().unwrap();\n            assert_eq!(ser_bytes, expected_bytes);\n\n            let des_id = <$type>::des(&ser_bytes).unwrap();\n            assert_eq!(des_id, original_id);\n        }};\n    }\n\n    #[test]\n    fn test_id_bincode_serde() {\n        let expected_bytes = [\n            173, 80, 132, 115, 129, 226, 72, 254, 170, 201, 135, 108, 199, 26, 228, 24,\n        ];\n\n        roundtrip_type!(Id, expected_bytes);\n    }\n\n    #[test]\n    fn test_tenant_id_bincode_serde() {\n        let expected_bytes = [\n            173, 80, 132, 115, 129, 226, 72, 254, 170, 201, 135, 108, 199, 26, 228, 24,\n        ];\n\n        roundtrip_type!(TenantId, expected_bytes);\n    }\n\n    #[test]\n    fn test_timeline_id_bincode_serde() {\n        let expected_bytes = [\n            173, 80, 132, 115, 129, 226, 72, 254, 170, 201, 135, 108, 199, 26, 228, 24,\n        ];\n\n        roundtrip_type!(TimelineId, expected_bytes);\n    }\n}\n"
  },
  {
    "path": "libs/utils/src/ip_address.rs",
    "content": "use std::env::{VarError, var};\nuse std::error::Error;\nuse std::net::IpAddr;\nuse std::str::FromStr;\n\n/// Name of the environment variable containing the reachable IP address of the node. If set, the IP address contained in this\n/// environment variable is used as the reachable IP address of the pageserver or safekeeper node during node registration.\n/// In a Kubernetes environment, this environment variable should be set by Kubernetes to the Pod IP (specified in the Pod\n/// template).\npub const HADRON_NODE_IP_ADDRESS: &str = \"HADRON_NODE_IP_ADDRESS\";\n\n/// Read the reachable IP address of this page server from env var HADRON_NODE_IP_ADDRESS.\n/// In Kubernetes this environment variable is set to the Pod IP (specified in the Pod template).\npub fn read_node_ip_addr_from_env() -> Result<Option<IpAddr>, Box<dyn Error>> {\n    match var(HADRON_NODE_IP_ADDRESS) {\n        Ok(v) => {\n            if let Ok(addr) = IpAddr::from_str(&v) {\n                Ok(Some(addr))\n            } else {\n                Err(format!(\"Invalid IP address string: {v}. Cannot be parsed as either an IPv4 or an IPv6 address.\").into())\n            }\n        }\n        Err(VarError::NotPresent) => Ok(None),\n        Err(e) => Err(e.into()),\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use super::*;\n    use std::env;\n    use std::net::{Ipv4Addr, Ipv6Addr};\n\n    #[test]\n    fn test_read_node_ip_addr_from_env() {\n        // SAFETY: test code\n        unsafe {\n            // Test with a valid IPv4 address\n            env::set_var(HADRON_NODE_IP_ADDRESS, \"192.168.1.1\");\n            let result = read_node_ip_addr_from_env().unwrap();\n            assert_eq!(result, Some(IpAddr::V4(Ipv4Addr::new(192, 168, 1, 1))));\n\n            // Test with a valid IPv6 address\n            env::set_var(\n                HADRON_NODE_IP_ADDRESS,\n                \"2001:0db8:85a3:0000:0000:8a2e:0370:7334\",\n            );\n        }\n        let result = read_node_ip_addr_from_env().unwrap();\n        assert_eq!(\n            result,\n            Some(IpAddr::V6(\n                Ipv6Addr::from_str(\"2001:0db8:85a3:0000:0000:8a2e:0370:7334\").unwrap()\n            ))\n        );\n\n        // Test with an invalid IP address\n        // SAFETY: test code\n        unsafe {\n            env::set_var(HADRON_NODE_IP_ADDRESS, \"invalid_ip\");\n        }\n        let result = read_node_ip_addr_from_env();\n        assert!(result.is_err());\n\n        // Test with no environment variable set\n        // SAFETY: test code\n        unsafe {\n            env::remove_var(HADRON_NODE_IP_ADDRESS);\n        }\n        let result = read_node_ip_addr_from_env().unwrap();\n        assert_eq!(result, None);\n    }\n}\n"
  },
  {
    "path": "libs/utils/src/leaky_bucket.rs",
    "content": "//! This module implements the Generic Cell Rate Algorithm for a simplified\n//! version of the Leaky Bucket rate limiting system.\n//!\n//! # Leaky Bucket\n//!\n//! If the bucket is full, no new requests are allowed and are throttled/errored.\n//! If the bucket is partially full/empty, new requests are added to the bucket in\n//! terms of \"tokens\".\n//!\n//! Over time, tokens are removed from the bucket, naturally allowing new requests at a steady rate.\n//!\n//! The bucket size tunes the burst support. The drain rate tunes the steady-rate requests per second.\n//!\n//! # [GCRA](https://en.wikipedia.org/wiki/Generic_cell_rate_algorithm)\n//!\n//! GCRA is a continuous rate leaky-bucket impl that stores minimal state and requires\n//! no background jobs to drain tokens, as the design utilises timestamps to drain automatically over time.\n//!\n//! We store an \"empty_at\" timestamp as the only state. As time progresses, we will naturally approach\n//! the empty state. The full-bucket state is calculated from `empty_at - config.bucket_width`.\n//!\n//! Another explaination can be found here: <https://brandur.org/rate-limiting>\n\nuse std::sync::Mutex;\nuse std::sync::atomic::{AtomicU64, Ordering};\nuse std::time::Duration;\n\nuse tokio::sync::Notify;\nuse tokio::time::Instant;\n\n#[derive(Clone, Copy)]\npub struct LeakyBucketConfig {\n    /// This is the \"time cost\" of a single request unit.\n    /// Should loosely represent how long it takes to handle a request unit in active resource time.\n    /// Loosely speaking this is the inverse of the steady-rate requests-per-second\n    pub cost: Duration,\n\n    /// total size of the bucket\n    pub bucket_width: Duration,\n}\n\nimpl LeakyBucketConfig {\n    pub fn new(rps: f64, bucket_size: f64) -> Self {\n        let cost = Duration::from_secs_f64(rps.recip());\n        let bucket_width = cost.mul_f64(bucket_size);\n        Self { cost, bucket_width }\n    }\n}\n\npub struct LeakyBucketState {\n    /// Bucket is represented by `allow_at..empty_at` where `allow_at = empty_at - config.bucket_width`.\n    ///\n    /// At any given time, `empty_at - now` represents the number of tokens in the bucket, multiplied by the \"time_cost\".\n    /// Adding `n` tokens to the bucket is done by moving `empty_at` forward by `n * config.time_cost`.\n    /// If `now < allow_at`, the bucket is considered filled and cannot accept any more tokens.\n    /// Draining the bucket will happen naturally as `now` moves forward.\n    ///\n    /// Let `n` be some \"time cost\" for the request,\n    /// If now is after empty_at, the bucket is empty and the empty_at is reset to now,\n    /// If now is within the `bucket window + n`, we are within time budget.\n    /// If now is before the `bucket window + n`, we have run out of budget.\n    ///\n    /// This is inspired by the generic cell rate algorithm (GCRA) and works\n    /// exactly the same as a leaky-bucket.\n    pub empty_at: Instant,\n}\n\nimpl LeakyBucketState {\n    pub fn with_initial_tokens(config: &LeakyBucketConfig, initial_tokens: f64) -> Self {\n        LeakyBucketState {\n            empty_at: Instant::now() + config.cost.mul_f64(initial_tokens),\n        }\n    }\n\n    pub fn bucket_is_empty(&self, now: Instant) -> bool {\n        // if self.end is after now, the bucket is not empty\n        self.empty_at <= now\n    }\n\n    /// Immediately adds tokens to the bucket, if there is space.\n    ///\n    /// In a scenario where you are waiting for available rate,\n    /// rather than just erroring immediately, `started` corresponds to when this waiting started.\n    ///\n    /// `n` is the number of tokens that will be filled in the bucket.\n    ///\n    /// # Errors\n    ///\n    /// If there is not enough space, no tokens are added. Instead, an error is returned with the time when\n    /// there will be space again.\n    pub fn add_tokens(\n        &mut self,\n        config: &LeakyBucketConfig,\n        started: Instant,\n        n: f64,\n    ) -> Result<(), Instant> {\n        let now = Instant::now();\n\n        // invariant: started <= now\n        debug_assert!(started <= now);\n\n        // If the bucket was empty when we started our search,\n        // we should update the `empty_at` value accordingly.\n        // this prevents us from having negative tokens in the bucket.\n        let mut empty_at = self.empty_at;\n        if empty_at < started {\n            empty_at = started;\n        }\n\n        let n = config.cost.mul_f64(n);\n        let new_empty_at = empty_at + n;\n        let allow_at = new_empty_at.checked_sub(config.bucket_width);\n\n        //                     empty_at\n        //          allow_at    |   new_empty_at\n        //           /          |   /\n        // -------o-[---------o-|--]---------\n        //   now1 ^      now2 ^\n        //\n        // at now1, the bucket would be completely filled if we add n tokens.\n        // at now2, the bucket would be partially filled if we add n tokens.\n\n        match allow_at {\n            Some(allow_at) if now < allow_at => Err(allow_at),\n            _ => {\n                self.empty_at = new_empty_at;\n                Ok(())\n            }\n        }\n    }\n}\n\npub struct RateLimiter {\n    pub config: LeakyBucketConfig,\n    pub sleep_counter: AtomicU64,\n    pub state: Mutex<LeakyBucketState>,\n    /// a queue to provide this fair ordering.\n    pub queue: Notify,\n}\n\nstruct Requeue<'a>(&'a Notify);\n\nimpl Drop for Requeue<'_> {\n    fn drop(&mut self) {\n        self.0.notify_one();\n    }\n}\n\nimpl RateLimiter {\n    pub fn with_initial_tokens(config: LeakyBucketConfig, initial_tokens: f64) -> Self {\n        RateLimiter {\n            sleep_counter: AtomicU64::new(0),\n            state: Mutex::new(LeakyBucketState::with_initial_tokens(\n                &config,\n                initial_tokens,\n            )),\n            config,\n            queue: {\n                let queue = Notify::new();\n                queue.notify_one();\n                queue\n            },\n        }\n    }\n\n    pub fn steady_rps(&self) -> f64 {\n        self.config.cost.as_secs_f64().recip()\n    }\n\n    /// returns true if we did throttle\n    pub async fn acquire(&self, count: usize) -> bool {\n        let start = tokio::time::Instant::now();\n\n        let start_count = self.sleep_counter.load(Ordering::Acquire);\n        let mut end_count = start_count;\n\n        // wait until we are the first in the queue\n        let mut notified = std::pin::pin!(self.queue.notified());\n        if !notified.as_mut().enable() {\n            notified.await;\n            end_count = self.sleep_counter.load(Ordering::Acquire);\n        }\n\n        // notify the next waiter in the queue when we are done.\n        let _guard = Requeue(&self.queue);\n\n        loop {\n            let res = self\n                .state\n                .lock()\n                .unwrap()\n                .add_tokens(&self.config, start, count as f64);\n            match res {\n                Ok(()) => return end_count > start_count,\n                Err(ready_at) => {\n                    struct Increment<'a>(&'a AtomicU64);\n\n                    impl Drop for Increment<'_> {\n                        fn drop(&mut self) {\n                            self.0.fetch_add(1, Ordering::AcqRel);\n                        }\n                    }\n\n                    // increment the counter after we finish sleeping (or cancel this task).\n                    // this ensures that tasks that have already started the acquire will observe\n                    // the new sleep count when they are allowed to resume on the notify.\n                    let _inc = Increment(&self.sleep_counter);\n                    end_count += 1;\n\n                    tokio::time::sleep_until(ready_at).await;\n                }\n            }\n        }\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use std::time::Duration;\n\n    use tokio::time::Instant;\n\n    use super::{LeakyBucketConfig, LeakyBucketState};\n\n    #[tokio::test(start_paused = true)]\n    async fn check() {\n        let config = LeakyBucketConfig {\n            // average 100rps\n            cost: Duration::from_millis(10),\n            // burst up to 100 requests\n            bucket_width: Duration::from_millis(1000),\n        };\n\n        let mut state = LeakyBucketState {\n            empty_at: Instant::now(),\n        };\n\n        // supports burst\n        {\n            // should work for 100 requests this instant\n            for _ in 0..100 {\n                state.add_tokens(&config, Instant::now(), 1.0).unwrap();\n            }\n            let ready = state.add_tokens(&config, Instant::now(), 1.0).unwrap_err();\n            assert_eq!(ready - Instant::now(), Duration::from_millis(10));\n        }\n\n        // doesn't overfill\n        {\n            // after 1s we should have an empty bucket again.\n            tokio::time::advance(Duration::from_secs(1)).await;\n            assert!(state.bucket_is_empty(Instant::now()));\n\n            // after 1s more, we should not over count the tokens and allow more than 200 requests.\n            tokio::time::advance(Duration::from_secs(1)).await;\n            for _ in 0..100 {\n                state.add_tokens(&config, Instant::now(), 1.0).unwrap();\n            }\n            let ready = state.add_tokens(&config, Instant::now(), 1.0).unwrap_err();\n            assert_eq!(ready - Instant::now(), Duration::from_millis(10));\n        }\n\n        // supports sustained rate over a long period\n        {\n            tokio::time::advance(Duration::from_secs(1)).await;\n\n            // should sustain 100rps\n            for _ in 0..2000 {\n                tokio::time::advance(Duration::from_millis(10)).await;\n                state.add_tokens(&config, Instant::now(), 1.0).unwrap();\n            }\n        }\n\n        // supports requesting more tokens than can be stored in the bucket\n        // we just wait a little bit longer upfront.\n        {\n            // start the bucket completely empty\n            tokio::time::advance(Duration::from_secs(5)).await;\n            assert!(state.bucket_is_empty(Instant::now()));\n\n            // requesting 200 tokens of space should take 200*cost = 2s\n            // but we already have 1s available, so we wait 1s from start.\n            let start = Instant::now();\n\n            let ready = state.add_tokens(&config, start, 200.0).unwrap_err();\n            assert_eq!(ready - Instant::now(), Duration::from_secs(1));\n\n            tokio::time::advance(Duration::from_millis(500)).await;\n            let ready = state.add_tokens(&config, start, 200.0).unwrap_err();\n            assert_eq!(ready - Instant::now(), Duration::from_millis(500));\n\n            tokio::time::advance(Duration::from_millis(500)).await;\n            state.add_tokens(&config, start, 200.0).unwrap();\n\n            // bucket should be completely full now\n            let ready = state.add_tokens(&config, Instant::now(), 1.0).unwrap_err();\n            assert_eq!(ready - Instant::now(), Duration::from_millis(10));\n        }\n    }\n}\n"
  },
  {
    "path": "libs/utils/src/lib.rs",
    "content": "//! `utils` is intended to be a place to put code that is shared\n//! between other crates in this repository.\n#![deny(clippy::undocumented_unsafe_blocks)]\n\npub mod backoff;\n\n/// `Lsn` type implements common tasks on Log Sequence Numbers\npub mod lsn;\n/// SeqWait allows waiting for a future sequence number to arrive\npub mod seqwait;\n\n/// A simple Read-Copy-Update implementation.\npub mod simple_rcu;\n\n/// append only ordered map implemented with a Vec\npub mod vec_map;\n\npub mod bin_ser;\n\n// helper functions for creating and fsyncing\npub mod crashsafe;\n\n// common authentication routines\npub mod auth;\n\n// utility functions and helper traits for unified unique id generation/serialization etc.\npub mod id;\n\n// utility functions to obtain reachable IP addresses in PS/SK nodes.\npub mod ip_address;\n\npub mod shard;\n\nmod hex;\npub use hex::Hex;\n\n// definition of the Generation type for pageserver attachment APIs\npub mod generation;\n\n// common log initialisation routine\npub mod logging;\n\npub mod lock_file;\npub mod pid_file;\n\n// Utility for binding TcpListeners with proper socket options.\npub mod tcp_listener;\n\n// Default signal handling\npub mod sentry_init;\npub mod signals;\n\npub mod fs_ext;\n\npub mod measured_stream;\n\npub mod serde_percent;\npub mod serde_regex;\npub mod serde_system_time;\n\npub mod pageserver_feedback;\n\npub mod postgres_client;\n\npub mod tracing_span_assert;\n\npub mod leaky_bucket;\npub mod rate_limit;\n\n/// Simple once-barrier and a guard which keeps barrier awaiting.\npub mod completion;\n\n/// Reporting utilities\npub mod error;\n\n/// async timeout helper\npub mod timeout;\n\npub mod span;\npub mod sync;\n\npub mod failpoint_support;\n\npub mod yielding_loop;\n\npub mod zstd;\n\npub mod env;\n\npub mod poison;\n\npub mod toml_edit_ext;\n\npub mod circuit_breaker;\n\npub mod try_rcu;\n\npub mod guard_arc_swap;\n\npub mod elapsed_accum;\n\n#[cfg(target_os = \"linux\")]\npub mod linux_socket_ioctl;\n\npub mod metrics_collector;\n\n// Re-export used in macro. Avoids adding git-version as dep in target crates.\n#[doc(hidden)]\npub use git_version;\n\n/// This is a shortcut to embed git sha into binaries and avoid copying the same build script to all packages\n///\n/// we have several cases:\n/// * building locally from git repo\n/// * building in CI from git repo\n/// * building in docker (either in CI or locally)\n///\n/// One thing to note is that .git is not available in docker (and it is bad to include it there).\n/// When building locally, the `git_version` is used to query .git. When building on CI and docker,\n/// we don't build the actual PR branch commits, but always a \"phantom\" would be merge commit to\n/// the target branch -- the actual PR commit from which we build from is supplied as GIT_VERSION\n/// environment variable.\n///\n/// We ended up with this compromise between phantom would be merge commits vs. pull request branch\n/// heads due to old logs becoming more reliable (github could gc the phantom merge commit\n/// anytime) in #4641.\n///\n/// To avoid running buildscript every recompilation, we use rerun-if-env-changed option.\n/// So the build script will be run only when GIT_VERSION envvar has changed.\n///\n/// Why not to use buildscript to get git commit sha directly without procmacro from different crate?\n/// Caching and workspaces complicates that. In case `utils` is not\n/// recompiled due to caching then version may become outdated.\n/// git_version crate handles that case by introducing a dependency on .git internals via include_bytes! macro,\n/// so if we changed the index state git_version will pick that up and rerun the macro.\n///\n/// Note that with git_version prefix is `git:` and in case of git version from env its `git-env:`.\n///\n/// #############################################################################################\n/// TODO this macro is not the way the library is intended to be used, see <https://github.com/neondatabase/neon/issues/1565> for details.\n/// We used `cachepot` to reduce our current CI build times: <https://github.com/neondatabase/cloud/pull/1033#issuecomment-1100935036>\n/// Yet, it seems to ignore the GIT_VERSION env variable, passed to Docker build, even with build.rs that contains\n/// `println!(\"cargo:rerun-if-env-changed=GIT_VERSION\");` code for cachepot cache invalidation.\n/// The problem needs further investigation and regular `const` declaration instead of a macro.\n#[macro_export]\nmacro_rules! project_git_version {\n    ($const_identifier:ident) => {\n        // this should try GIT_VERSION first only then git_version::git_version!\n        const $const_identifier: &::core::primitive::str = {\n            const __COMMIT_FROM_GIT: &::core::primitive::str = $crate::git_version::git_version! {\n                prefix = \"\",\n                fallback = \"unknown\",\n                args = [\"--abbrev=40\", \"--always\", \"--dirty=-modified\"] // always use full sha\n            };\n\n            const __ARG: &[&::core::primitive::str; 2] = &match ::core::option_env!(\"GIT_VERSION\") {\n                ::core::option::Option::Some(x) => [\"git-env:\", x],\n                ::core::option::Option::None => [\"git:\", __COMMIT_FROM_GIT],\n            };\n\n            $crate::__const_format::concatcp!(__ARG[0], __ARG[1])\n        };\n    };\n}\n\n/// This is a shortcut to embed build tag into binaries and avoid copying the same build script to all packages\n#[macro_export]\nmacro_rules! project_build_tag {\n    ($const_identifier:ident) => {\n        const $const_identifier: &::core::primitive::str = {\n            const __ARG: &[&::core::primitive::str; 2] = &match ::core::option_env!(\"BUILD_TAG\") {\n                ::core::option::Option::Some(x) => [\"build_tag-env:\", x],\n                ::core::option::Option::None => [\"build_tag:\", \"\"],\n            };\n\n            $crate::__const_format::concatcp!(__ARG[0], __ARG[1])\n        };\n    };\n}\n\n/// Re-export for `project_git_version` macro\n#[doc(hidden)]\npub use const_format as __const_format;\n\n/// Same as `assert!`, but evaluated during compilation and gets optimized out in runtime.\n#[macro_export]\nmacro_rules! const_assert {\n    ($($args:tt)*) => {\n        const _: () = assert!($($args)*);\n    };\n}\n"
  },
  {
    "path": "libs/utils/src/linux_socket_ioctl.rs",
    "content": "//! Linux-specific socket ioctls.\n//!\n//! <https://elixir.bootlin.com/linux/v6.1.128/source/include/uapi/linux/sockios.h#L25-L27>\n\nuse std::io;\nuse std::mem::MaybeUninit;\nuse std::os::fd::RawFd;\nuse std::os::raw::c_int;\n\nuse nix::libc::{FIONREAD, TIOCOUTQ};\n\nunsafe fn do_ioctl(socket_fd: RawFd, cmd: nix::libc::Ioctl) -> io::Result<c_int> {\n    let mut inq: MaybeUninit<c_int> = MaybeUninit::uninit();\n    // SAFETY: encapsulating fn is unsafe, we require `socket_fd` to be a valid file descriptor\n    unsafe {\n        let err = nix::libc::ioctl(socket_fd, cmd, inq.as_mut_ptr());\n        if err == 0 {\n            Ok(inq.assume_init())\n        } else {\n            Err(io::Error::last_os_error())\n        }\n    }\n}\n\n/// # Safety\n///\n/// Caller must ensure that `socket_fd` is a valid TCP socket file descriptor.\npub unsafe fn inq(socket_fd: RawFd) -> io::Result<c_int> {\n    // SAFETY: encapsulating fn is unsafe\n    unsafe { do_ioctl(socket_fd, FIONREAD) }\n}\n\n/// # Safety\n///\n/// Caller must ensure that `socket_fd` is a valid TCP socket file descriptor.\npub unsafe fn outq(socket_fd: RawFd) -> io::Result<c_int> {\n    // SAFETY: encapsulating fn is unsafe\n    unsafe { do_ioctl(socket_fd, TIOCOUTQ) }\n}\n"
  },
  {
    "path": "libs/utils/src/lock_file.rs",
    "content": "//! A module to create and read lock files.\n//!\n//! File locking is done using [`nix::fcntl::Flock`] exclusive locks.\n//! The only consumer of this module is currently\n//! [`pid_file`](crate::pid_file). See the module-level comment\n//! there for potential pitfalls with lock files that are used\n//! to store PIDs (pidfiles).\n\nuse std::fs;\nuse std::io::{Read, Write};\nuse std::ops::Deref;\n\nuse anyhow::Context;\nuse camino::{Utf8Path, Utf8PathBuf};\nuse nix::errno::Errno::EAGAIN;\nuse nix::fcntl::{Flock, FlockArg};\n\nuse crate::crashsafe;\n\n/// A handle to an open and flocked, but not-yet-written lock file.\n/// Returned by [`create_exclusive`].\n#[must_use]\npub struct UnwrittenLockFile {\n    path: Utf8PathBuf,\n    file: Flock<fs::File>,\n}\n\n/// Returned by [`UnwrittenLockFile::write_content`].\n#[must_use]\npub struct LockFileGuard(Flock<fs::File>);\n\nimpl Deref for LockFileGuard {\n    type Target = fs::File;\n\n    fn deref(&self) -> &Self::Target {\n        &self.0\n    }\n}\n\nimpl UnwrittenLockFile {\n    /// Replace the content of this lock file with the byte representation of `contents`.\n    pub fn write_content(mut self, contents: String) -> anyhow::Result<LockFileGuard> {\n        self.file\n            .set_len(0)\n            .context(\"Failed to truncate lockfile\")?;\n        self.file\n            .write_all(contents.as_bytes())\n            .with_context(|| format!(\"Failed to write '{contents}' contents into lockfile\"))?;\n        crashsafe::fsync_file_and_parent(&self.path).context(\"fsync lockfile\")?;\n        Ok(LockFileGuard(self.file))\n    }\n}\n\n/// Creates and opens a lock file in the path, grabs an exclusive flock on it, and returns\n/// a handle that allows overwriting the locked file's content.\n///\n/// The exclusive lock is released when dropping the returned handle.\n///\n/// It is not an error if the file already exists.\n/// It is an error if the file is already locked.\npub fn create_exclusive(lock_file_path: &Utf8Path) -> anyhow::Result<UnwrittenLockFile> {\n    let lock_file = fs::OpenOptions::new()\n        .create(true) // O_CREAT\n        .truncate(true)\n        .write(true)\n        .open(lock_file_path)\n        .context(\"open lock file\")?;\n\n    let res = Flock::lock(lock_file, FlockArg::LockExclusiveNonblock);\n    match res {\n        Ok(lock_file) => Ok(UnwrittenLockFile {\n            path: lock_file_path.to_owned(),\n            file: lock_file,\n        }),\n        Err((_, EAGAIN)) => anyhow::bail!(\"file is already locked\"),\n        Err((_, e)) => Err(e).context(\"flock error\"),\n    }\n}\n\n/// Returned by [`read_and_hold_lock_file`].\n/// Check out the [`pid_file`](crate::pid_file) module for what the variants mean\n/// and potential caveats if the lock files that are used to store PIDs.\npub enum LockFileRead {\n    /// No file exists at the given path.\n    NotExist,\n    /// No other process held the lock file, so we grabbed an flock\n    /// on it and read its contents.\n    /// Release the flock by dropping the [`LockFileGuard`].\n    NotHeldByAnyProcess(LockFileGuard, String),\n    /// The file exists but another process was holding an flock on it.\n    LockedByOtherProcess {\n        not_locked_file: fs::File,\n        content: String,\n    },\n}\n\n/// Open & try to lock the lock file at the given `path`, returning a [handle][`LockFileRead`] to\n/// inspect its content.\n///\n/// It is not an `Err(...)` if the file does not exist or is already locked.\n/// Check the [`LockFileRead`] variants for details.\npub fn read_and_hold_lock_file(path: &Utf8Path) -> anyhow::Result<LockFileRead> {\n    let res = fs::OpenOptions::new().read(true).open(path);\n    let lock_file = match res {\n        Ok(f) => f,\n        Err(e) => match e.kind() {\n            std::io::ErrorKind::NotFound => return Ok(LockFileRead::NotExist),\n            _ => return Err(e).context(\"open lock file\"),\n        },\n    };\n    let res = Flock::lock(lock_file, FlockArg::LockExclusiveNonblock);\n    // We need the content regardless of lock success / failure.\n    // But, read it after flock so that, if it succeeded, the content is consistent.\n    match res {\n        Ok(mut locked_file) => {\n            let mut content = String::new();\n            locked_file\n                .read_to_string(&mut content)\n                .context(\"read lock file\")?;\n            Ok(LockFileRead::NotHeldByAnyProcess(\n                LockFileGuard(locked_file),\n                content,\n            ))\n        }\n        Err((mut not_locked_file, EAGAIN)) => {\n            let mut content = String::new();\n            not_locked_file\n                .read_to_string(&mut content)\n                .context(\"read lock file\")?;\n            Ok(LockFileRead::LockedByOtherProcess {\n                not_locked_file,\n                content,\n            })\n        }\n        Err((_, e)) => Err(e).context(\"flock error\"),\n    }\n}\n"
  },
  {
    "path": "libs/utils/src/logging.rs",
    "content": "use std::future::Future;\nuse std::pin::Pin;\nuse std::str::FromStr;\nuse std::time::Duration;\n\nuse anyhow::Context;\nuse metrics::{IntCounter, IntCounterVec};\nuse once_cell::sync::Lazy;\nuse strum_macros::{EnumString, VariantNames};\nuse tokio::time::Instant;\nuse tracing::{info, warn};\n\n/// Logs a critical error, similarly to `tracing::error!`. This will:\n///\n/// * Emit an ERROR log message with prefix \"CRITICAL:\" and a backtrace.\n/// * Trigger a pageable alert (via the metric below).\n/// * Increment libmetrics_tracing_event_count{level=\"critical\"}, and indirectly level=\"error\".\n/// * In debug builds, panic the process.\n///\n/// When including errors in the message, please use {err:?} to include the error cause and original\n/// backtrace.\n#[macro_export]\nmacro_rules! critical {\n    ($($arg:tt)*) => {{\n        if cfg!(debug_assertions) {\n            panic!($($arg)*);\n        }\n        // Increment both metrics\n        $crate::logging::TRACING_EVENT_COUNT_METRIC.inc_critical();\n        let backtrace = std::backtrace::Backtrace::capture();\n        tracing::error!(\"CRITICAL: {}\\n{backtrace}\", format!($($arg)*));\n    }};\n}\n\n#[macro_export]\nmacro_rules! critical_timeline {\n    ($tenant_shard_id:expr, $timeline_id:expr, $corruption_detected:expr, $($arg:tt)*) => {{\n        if cfg!(debug_assertions) {\n            panic!($($arg)*);\n        }\n        // Increment both metrics\n        $crate::logging::TRACING_EVENT_COUNT_METRIC.inc_critical();\n        $crate::logging::HADRON_CRITICAL_STORAGE_EVENT_COUNT_METRIC.inc(&$tenant_shard_id.to_string(), &$timeline_id.to_string());\n        if let Some(c) = $corruption_detected.as_ref() {\n            c.store(true, std::sync::atomic::Ordering::Relaxed);\n        }\n        let backtrace = std::backtrace::Backtrace::capture();\n        tracing::error!(\"CRITICAL: [tenant_shard_id: {}, timeline_id: {}] {}\\n{backtrace}\",\n                       $tenant_shard_id, $timeline_id, format!($($arg)*));\n    }};\n}\n\n#[derive(EnumString, strum_macros::Display, VariantNames, Eq, PartialEq, Debug, Clone, Copy)]\n#[strum(serialize_all = \"snake_case\")]\npub enum LogFormat {\n    Plain,\n    Json,\n    Test,\n}\n\nimpl LogFormat {\n    pub fn from_config(s: &str) -> anyhow::Result<LogFormat> {\n        use strum::VariantNames;\n        LogFormat::from_str(s).with_context(|| {\n            format!(\n                \"Unrecognized log format. Please specify one of: {:?}\",\n                LogFormat::VARIANTS\n            )\n        })\n    }\n}\n\npub struct TracingEventCountMetric {\n    /// CRITICAL is not a `tracing` log level. Instead, we increment it in the `critical!` macro,\n    /// and also emit it as a regular error. These are thus double-counted, but that seems fine.\n    critical: IntCounter,\n    error: IntCounter,\n    warn: IntCounter,\n    info: IntCounter,\n    debug: IntCounter,\n    trace: IntCounter,\n}\n\n// Begin Hadron: Add a HadronCriticalStorageEventCountMetric metric that is sliced by tenant_id and timeline_id\npub struct HadronCriticalStorageEventCountMetric {\n    critical: IntCounterVec,\n}\n\npub static HADRON_CRITICAL_STORAGE_EVENT_COUNT_METRIC: Lazy<HadronCriticalStorageEventCountMetric> =\n    Lazy::new(|| {\n        let vec = metrics::register_int_counter_vec!(\n            \"hadron_critical_storage_event_count\",\n            \"Number of critical storage events, by tenant_id and timeline_id\",\n            &[\"tenant_shard_id\", \"timeline_id\"]\n        )\n        .expect(\"failed to define metric\");\n        HadronCriticalStorageEventCountMetric::new(vec)\n    });\n\nimpl HadronCriticalStorageEventCountMetric {\n    fn new(vec: IntCounterVec) -> Self {\n        Self { critical: vec }\n    }\n\n    // Allow public access from `critical!` macro.\n    pub fn inc(&self, tenant_shard_id: &str, timeline_id: &str) {\n        self.critical\n            .with_label_values(&[tenant_shard_id, timeline_id])\n            .inc();\n    }\n}\n// End Hadron\n\npub static TRACING_EVENT_COUNT_METRIC: Lazy<TracingEventCountMetric> = Lazy::new(|| {\n    let vec = metrics::register_int_counter_vec!(\n        \"libmetrics_tracing_event_count\",\n        \"Number of tracing events, by level\",\n        &[\"level\"]\n    )\n    .expect(\"failed to define metric\");\n    TracingEventCountMetric::new(vec)\n});\n\nimpl TracingEventCountMetric {\n    fn new(vec: IntCounterVec) -> Self {\n        Self {\n            critical: vec.with_label_values(&[\"critical\"]),\n            error: vec.with_label_values(&[\"error\"]),\n            warn: vec.with_label_values(&[\"warn\"]),\n            info: vec.with_label_values(&[\"info\"]),\n            debug: vec.with_label_values(&[\"debug\"]),\n            trace: vec.with_label_values(&[\"trace\"]),\n        }\n    }\n\n    // Allow public access from `critical!` macro.\n    pub fn inc_critical(&self) {\n        self.critical.inc();\n    }\n\n    fn inc_for_level(&self, level: tracing::Level) {\n        let counter = match level {\n            tracing::Level::ERROR => &self.error,\n            tracing::Level::WARN => &self.warn,\n            tracing::Level::INFO => &self.info,\n            tracing::Level::DEBUG => &self.debug,\n            tracing::Level::TRACE => &self.trace,\n        };\n        counter.inc();\n    }\n}\n\nstruct TracingEventCountLayer(&'static TracingEventCountMetric);\n\nimpl<S> tracing_subscriber::layer::Layer<S> for TracingEventCountLayer\nwhere\n    S: tracing::Subscriber,\n{\n    fn on_event(\n        &self,\n        event: &tracing::Event<'_>,\n        _ctx: tracing_subscriber::layer::Context<'_, S>,\n    ) {\n        self.0.inc_for_level(*event.metadata().level());\n    }\n}\n\n/// Whether to add the `tracing_error` crate's `ErrorLayer`\n/// to the global tracing subscriber.\n///\npub enum TracingErrorLayerEnablement {\n    /// Do not add the `ErrorLayer`.\n    Disabled,\n    /// Add the `ErrorLayer` with the filter specified by RUST_LOG, defaulting to `info` if `RUST_LOG` is unset.\n    EnableWithRustLogFilter,\n}\n\n/// Where the logging should output to.\n#[derive(Clone, Copy)]\npub enum Output {\n    Stdout,\n    Stderr,\n}\n\npub fn init(\n    log_format: LogFormat,\n    tracing_error_layer_enablement: TracingErrorLayerEnablement,\n    output: Output,\n) -> anyhow::Result<()> {\n    // We fall back to printing all spans at info-level or above if\n    // the RUST_LOG environment variable is not set.\n    let rust_log_env_filter = || {\n        tracing_subscriber::EnvFilter::try_from_default_env()\n            .unwrap_or_else(|_| tracing_subscriber::EnvFilter::new(\"info\"))\n    };\n\n    // NB: the order of the with() calls does not matter.\n    // See https://docs.rs/tracing-subscriber/0.3.16/tracing_subscriber/layer/index.html#per-layer-filtering\n    use tracing_subscriber::prelude::*;\n    let r = tracing_subscriber::registry();\n    let r = r.with({\n        let log_layer = tracing_subscriber::fmt::layer()\n            .with_target(false)\n            .with_ansi(false)\n            .with_writer(move || -> Box<dyn std::io::Write> {\n                match output {\n                    Output::Stdout => Box::new(std::io::stdout()),\n                    Output::Stderr => Box::new(std::io::stderr()),\n                }\n            });\n        let log_layer = match log_format {\n            LogFormat::Json => log_layer.json().boxed(),\n            LogFormat::Plain => log_layer.boxed(),\n            LogFormat::Test => log_layer.with_test_writer().boxed(),\n        };\n        log_layer.with_filter(rust_log_env_filter())\n    });\n\n    let r = r.with(\n        TracingEventCountLayer(&TRACING_EVENT_COUNT_METRIC).with_filter(rust_log_env_filter()),\n    );\n    match tracing_error_layer_enablement {\n        TracingErrorLayerEnablement::EnableWithRustLogFilter => r\n            .with(tracing_error::ErrorLayer::default().with_filter(rust_log_env_filter()))\n            .init(),\n        TracingErrorLayerEnablement::Disabled => r.init(),\n    }\n\n    Ok(())\n}\n\n/// Disable the default rust panic hook by using `set_hook`.\n///\n/// For neon binaries, the assumption is that tracing is configured before with [`init`], after\n/// that sentry is configured (if needed). sentry will install it's own on top of this, always\n/// processing the panic before we log it.\n///\n/// When the return value is dropped, the hook is reverted to std default hook (prints to stderr).\n/// If the assumptions about the initialization order are not held, use\n/// [`TracingPanicHookGuard::forget`] but keep in mind, if tracing is stopped, then panics will be\n/// lost.\n#[must_use]\npub fn replace_panic_hook_with_tracing_panic_hook() -> TracingPanicHookGuard {\n    std::panic::set_hook(Box::new(tracing_panic_hook));\n    TracingPanicHookGuard::new()\n}\n\n/// Drop guard which restores the std panic hook on drop.\n///\n/// Tracing should not be used when it's not configured, but we cannot really latch on to any\n/// imaginary lifetime of tracing.\npub struct TracingPanicHookGuard {\n    act: bool,\n}\n\nimpl TracingPanicHookGuard {\n    fn new() -> Self {\n        TracingPanicHookGuard { act: true }\n    }\n\n    /// Make this hook guard not do anything when dropped.\n    pub fn forget(&mut self) {\n        self.act = false;\n    }\n}\n\nimpl Drop for TracingPanicHookGuard {\n    fn drop(&mut self) {\n        if self.act {\n            let _ = std::panic::take_hook();\n        }\n    }\n}\n\n/// Named symbol for our panic hook, which logs the panic.\nfn tracing_panic_hook(info: &std::panic::PanicHookInfo) {\n    // following rust 1.66.1 std implementation:\n    // https://github.com/rust-lang/rust/blob/90743e7298aca107ddaa0c202a4d3604e29bfeb6/library/std/src/panicking.rs#L235-L288\n    let location = info.location();\n\n    let msg = match info.payload().downcast_ref::<&'static str>() {\n        Some(s) => *s,\n        None => match info.payload().downcast_ref::<String>() {\n            Some(s) => &s[..],\n            None => \"Box<dyn Any>\",\n        },\n    };\n\n    let thread = std::thread::current();\n    let thread = thread.name().unwrap_or(\"<unnamed>\");\n    let backtrace = std::backtrace::Backtrace::capture();\n\n    let _entered = if let Some(location) = location {\n        tracing::error_span!(\"panic\", %thread, location = %PrettyLocation(location))\n    } else {\n        // very unlikely to hit here, but the guarantees of std could change\n        tracing::error_span!(\"panic\", %thread)\n    }\n    .entered();\n\n    if backtrace.status() == std::backtrace::BacktraceStatus::Captured {\n        // this has an annoying extra '\\n' in the end which anyhow doesn't do, but we cannot really\n        // get rid of it as we cannot get in between of std::fmt::Formatter<'_>; we could format to\n        // string, maybe even to a TLS one but tracing already does that.\n        tracing::error!(\"{msg}\\n\\nStack backtrace:\\n{backtrace}\");\n    } else {\n        tracing::error!(\"{msg}\");\n    }\n\n    // ensure that we log something on the panic if this hook is left after tracing has been\n    // unconfigured. worst case when teardown is racing the panic is to log the panic twice.\n    tracing::dispatcher::get_default(|d| {\n        if let Some(_none) = d.downcast_ref::<tracing::subscriber::NoSubscriber>() {\n            let location = location.map(PrettyLocation);\n            log_panic_to_stderr(thread, msg, location, &backtrace);\n        }\n    });\n}\n\n#[cold]\nfn log_panic_to_stderr(\n    thread: &str,\n    msg: &str,\n    location: Option<PrettyLocation<'_, '_>>,\n    backtrace: &std::backtrace::Backtrace,\n) {\n    eprintln!(\n        \"panic while tracing is unconfigured: thread '{thread}' panicked at '{msg}', {location:?}\\nStack backtrace:\\n{backtrace}\"\n    );\n}\n\nstruct PrettyLocation<'a, 'b>(&'a std::panic::Location<'b>);\n\nimpl std::fmt::Display for PrettyLocation<'_, '_> {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        write!(f, \"{}:{}:{}\", self.0.file(), self.0.line(), self.0.column())\n    }\n}\n\nimpl std::fmt::Debug for PrettyLocation<'_, '_> {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        <Self as std::fmt::Display>::fmt(self, f)\n    }\n}\n\n/// When you will store a secret but want to make sure it won't\n/// be accidentally logged, wrap it in a SecretString, whose Debug\n/// implementation does not expose the contents.\n#[derive(Clone, Eq, PartialEq)]\npub struct SecretString(String);\n\nimpl SecretString {\n    pub fn get_contents(&self) -> &str {\n        self.0.as_str()\n    }\n}\n\nimpl From<String> for SecretString {\n    fn from(s: String) -> Self {\n        Self(s)\n    }\n}\n\nimpl FromStr for SecretString {\n    type Err = std::convert::Infallible;\n\n    fn from_str(s: &str) -> Result<Self, Self::Err> {\n        Ok(Self(s.to_string()))\n    }\n}\n\nimpl std::fmt::Debug for SecretString {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        write!(f, \"[SECRET]\")\n    }\n}\n\n/// Logs a periodic message if a future is slow to complete.\n///\n/// This is performance-sensitive as it's used on the GetPage read path.\n///\n/// TODO: consider upgrading this to a warning, but currently it fires too often.\n#[inline]\npub async fn log_slow<O>(\n    name: &str,\n    threshold: Duration,\n    f: Pin<&mut impl Future<Output = O>>,\n) -> O {\n    monitor_slow_future(\n        threshold,\n        threshold, // period = threshold\n        f,\n        |MonitorSlowFutureCallback {\n             ready,\n             is_slow,\n             elapsed_total,\n             elapsed_since_last_callback: _,\n         }| {\n            if !is_slow {\n                return;\n            }\n            let elapsed = elapsed_total.as_secs_f64();\n            if ready {\n                info!(\"slow {name} completed after {elapsed:.3}s\");\n            } else {\n                info!(\"slow {name} still running after {elapsed:.3}s\");\n            }\n        },\n    )\n    .await\n}\n\n/// Logs a periodic warning if a future is slow to complete.\n#[inline]\npub async fn warn_slow<O>(\n    name: &str,\n    threshold: Duration,\n    f: Pin<&mut impl Future<Output = O>>,\n) -> O {\n    monitor_slow_future(\n        threshold,\n        threshold, // period = threshold\n        f,\n        |MonitorSlowFutureCallback {\n             ready,\n             is_slow,\n             elapsed_total,\n             elapsed_since_last_callback: _,\n         }| {\n            if !is_slow {\n                return;\n            }\n            let elapsed = elapsed_total.as_secs_f64();\n            if ready {\n                warn!(\"slow {name} completed after {elapsed:.3}s\");\n            } else {\n                warn!(\"slow {name} still running after {elapsed:.3}s\");\n            }\n        },\n    )\n    .await\n}\n\n/// Poll future `fut` to completion, invoking callback `cb` at the given `threshold` and every\n/// `period` afterwards, and also unconditionally when the future completes.\n#[inline]\npub async fn monitor_slow_future<F, O>(\n    threshold: Duration,\n    period: Duration,\n    mut fut: Pin<&mut F>,\n    mut cb: impl FnMut(MonitorSlowFutureCallback),\n) -> O\nwhere\n    F: Future<Output = O>,\n{\n    let started = Instant::now();\n    let mut attempt = 1;\n    let mut last_cb = started;\n    loop {\n        // NB: use timeout_at() instead of timeout() to avoid an extra clock reading in the common\n        // case where the timeout doesn't fire.\n        let deadline = started + threshold + (attempt - 1) * period;\n        // TODO: still call the callback if the future panics? Copy how we do it for the page_service flush_in_progress counter.\n        let res = tokio::time::timeout_at(deadline, &mut fut).await;\n        let now = Instant::now();\n        let elapsed_total = now - started;\n        cb(MonitorSlowFutureCallback {\n            ready: res.is_ok(),\n            is_slow: elapsed_total >= threshold,\n            elapsed_total,\n            elapsed_since_last_callback: now - last_cb,\n        });\n        last_cb = now;\n        if let Ok(output) = res {\n            return output;\n        }\n        attempt += 1;\n    }\n}\n\n/// See [`monitor_slow_future`].\npub struct MonitorSlowFutureCallback {\n    /// Whether the future completed. If true, there will be no more callbacks.\n    pub ready: bool,\n    /// Whether the future is taking `>=` the specififed threshold duration to complete.\n    /// Monotonic: if true in one callback invocation, true in all subsequent onces.\n    pub is_slow: bool,\n    /// The time elapsed since the [`monitor_slow_future`] was first polled.\n    pub elapsed_total: Duration,\n    /// The time elapsed since the last callback invocation.\n    /// For the initial callback invocation, the time elapsed since the [`monitor_slow_future`] was first polled.\n    pub elapsed_since_last_callback: Duration,\n}\n\n#[cfg(test)]\nmod tests {\n    use metrics::IntCounterVec;\n    use metrics::core::Opts;\n\n    use crate::logging::{TracingEventCountLayer, TracingEventCountMetric};\n\n    #[test]\n    fn tracing_event_count_metric() {\n        let counter_vec =\n            IntCounterVec::new(Opts::new(\"testmetric\", \"testhelp\"), &[\"level\"]).unwrap();\n        let metric = Box::leak(Box::new(TracingEventCountMetric::new(counter_vec.clone())));\n        let layer = TracingEventCountLayer(metric);\n        use tracing_subscriber::prelude::*;\n\n        tracing::subscriber::with_default(tracing_subscriber::registry().with(layer), || {\n            tracing::trace!(\"foo\");\n            tracing::debug!(\"foo\");\n            tracing::info!(\"foo\");\n            tracing::warn!(\"foo\");\n            tracing::error!(\"foo\");\n        });\n\n        assert_eq!(counter_vec.with_label_values(&[\"trace\"]).get(), 1);\n        assert_eq!(counter_vec.with_label_values(&[\"debug\"]).get(), 1);\n        assert_eq!(counter_vec.with_label_values(&[\"info\"]).get(), 1);\n        assert_eq!(counter_vec.with_label_values(&[\"warn\"]).get(), 1);\n        assert_eq!(counter_vec.with_label_values(&[\"error\"]).get(), 1);\n    }\n}\n"
  },
  {
    "path": "libs/utils/src/lsn.rs",
    "content": "#![warn(missing_docs)]\n\nuse std::fmt;\nuse std::ops::{Add, AddAssign};\nuse std::str::FromStr;\nuse std::sync::atomic::{AtomicU64, Ordering};\n\nuse serde::de::Visitor;\nuse serde::{Deserialize, Serialize};\n\nuse crate::seqwait::MonotonicCounter;\n\n/// Transaction log block size in bytes\npub const XLOG_BLCKSZ: u32 = 8192;\n\n/// A Postgres LSN (Log Sequence Number), also known as an XLogRecPtr\n#[derive(Clone, Copy, Default, Eq, Ord, PartialEq, PartialOrd, Hash)]\npub struct Lsn(pub u64);\n\nimpl Serialize for Lsn {\n    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>\n    where\n        S: serde::Serializer,\n    {\n        if serializer.is_human_readable() {\n            serializer.collect_str(self)\n        } else {\n            self.0.serialize(serializer)\n        }\n    }\n}\n\nimpl<'de> Deserialize<'de> for Lsn {\n    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>\n    where\n        D: serde::Deserializer<'de>,\n    {\n        struct LsnVisitor {\n            is_human_readable_deserializer: bool,\n        }\n\n        impl Visitor<'_> for LsnVisitor {\n            type Value = Lsn;\n\n            fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {\n                if self.is_human_readable_deserializer {\n                    formatter.write_str(\n                        \"value in form of hex string({upper_u32_hex}/{lower_u32_hex}) representing u64 integer\",\n                    )\n                } else {\n                    formatter.write_str(\"value in form of integer(u64)\")\n                }\n            }\n\n            fn visit_u64<E>(self, v: u64) -> Result<Self::Value, E>\n            where\n                E: serde::de::Error,\n            {\n                Ok(Lsn(v))\n            }\n\n            fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>\n            where\n                E: serde::de::Error,\n            {\n                Lsn::from_str(v).map_err(|e| E::custom(e))\n            }\n        }\n\n        if deserializer.is_human_readable() {\n            deserializer.deserialize_str(LsnVisitor {\n                is_human_readable_deserializer: true,\n            })\n        } else {\n            deserializer.deserialize_u64(LsnVisitor {\n                is_human_readable_deserializer: false,\n            })\n        }\n    }\n}\n\n/// Allows (de)serialization of an `Lsn` always as `u64`.\n///\n/// ### Example\n///\n/// ```rust\n/// # use serde::{Serialize, Deserialize};\n/// use utils::lsn::Lsn;\n///\n/// #[derive(PartialEq, Serialize, Deserialize, Debug)]\n/// struct Foo {\n///   #[serde(with = \"utils::lsn::serde_as_u64\")]\n///   always_u64: Lsn,\n/// }\n///\n/// let orig = Foo { always_u64: Lsn(1234) };\n///\n/// let res = serde_json::to_string(&orig).unwrap();\n/// assert_eq!(res, r#\"{\"always_u64\":1234}\"#);\n///\n/// let foo = serde_json::from_str::<Foo>(&res).unwrap();\n/// assert_eq!(foo, orig);\n/// ```\n///\npub mod serde_as_u64 {\n    use super::Lsn;\n\n    /// Serializes the Lsn as u64 disregarding the human readability of the format.\n    ///\n    /// Meant to be used via `#[serde(with = \"...\")]` or `#[serde(serialize_with = \"...\")]`.\n    pub fn serialize<S: serde::Serializer>(lsn: &Lsn, serializer: S) -> Result<S::Ok, S::Error> {\n        use serde::Serialize;\n        lsn.0.serialize(serializer)\n    }\n\n    /// Deserializes the Lsn as u64 disregarding the human readability of the format.\n    ///\n    /// Meant to be used via `#[serde(with = \"...\")]` or `#[serde(deserialize_with = \"...\")]`.\n    pub fn deserialize<'de, D: serde::Deserializer<'de>>(deserializer: D) -> Result<Lsn, D::Error> {\n        use serde::Deserialize;\n        u64::deserialize(deserializer).map(Lsn)\n    }\n}\n\n/// We tried to parse an LSN from a string, but failed\n#[derive(Debug, PartialEq, Eq, thiserror::Error)]\n#[error(\"LsnParseError\")]\npub struct LsnParseError;\n\nimpl Lsn {\n    /// Maximum possible value for an LSN\n    pub const MAX: Lsn = Lsn(u64::MAX);\n\n    /// Invalid value for InvalidXLogRecPtr, as defined in xlogdefs.h\n    pub const INVALID: Lsn = Lsn(0);\n\n    /// Subtract a number, returning None on overflow.\n    pub fn checked_sub<T: Into<u64>>(self, other: T) -> Option<Lsn> {\n        let other: u64 = other.into();\n        self.0.checked_sub(other).map(Lsn)\n    }\n\n    /// Subtract a number, saturating at numeric bounds instead of overflowing.\n    pub fn saturating_sub<T: Into<u64>>(self, other: T) -> Lsn {\n        Lsn(self.0.saturating_sub(other.into()))\n    }\n\n    /// Subtract a number, returning the difference as i128 to avoid overflow.\n    pub fn widening_sub<T: Into<u64>>(self, other: T) -> i128 {\n        let other: u64 = other.into();\n        i128::from(self.0) - i128::from(other)\n    }\n\n    /// Parse an LSN from a string in the form `0000000000000000`\n    pub fn from_hex<S>(s: S) -> Result<Self, LsnParseError>\n    where\n        S: AsRef<str>,\n    {\n        let s: &str = s.as_ref();\n        let n = u64::from_str_radix(s, 16).or(Err(LsnParseError))?;\n        Ok(Lsn(n))\n    }\n\n    /// Compute the offset into a segment\n    #[inline]\n    pub fn segment_offset(self, seg_sz: usize) -> usize {\n        (self.0 % seg_sz as u64) as usize\n    }\n\n    /// Compute LSN of the segment start.\n    #[inline]\n    pub fn segment_lsn(self, seg_sz: usize) -> Lsn {\n        Lsn(self.0 - (self.0 % seg_sz as u64))\n    }\n\n    /// Compute the segment number\n    #[inline]\n    pub fn segment_number(self, seg_sz: usize) -> u64 {\n        self.0 / seg_sz as u64\n    }\n\n    /// Compute the offset into a block\n    #[inline]\n    pub fn block_offset(self) -> u64 {\n        const BLCKSZ: u64 = XLOG_BLCKSZ as u64;\n        self.0 % BLCKSZ\n    }\n\n    /// Compute the block offset of the first byte of this Lsn within this\n    /// segment\n    #[inline]\n    pub fn page_lsn(self) -> Lsn {\n        Lsn(self.0 - self.block_offset())\n    }\n\n    /// Compute the block offset of the first byte of this Lsn within this\n    /// segment\n    #[inline]\n    pub fn page_offset_in_segment(self, seg_sz: usize) -> u64 {\n        (self.0 - self.block_offset()) - self.segment_lsn(seg_sz).0\n    }\n\n    /// Compute the bytes remaining in this block\n    ///\n    /// If the LSN is already at the block boundary, it will return `XLOG_BLCKSZ`.\n    #[inline]\n    pub fn remaining_in_block(self) -> u64 {\n        const BLCKSZ: u64 = XLOG_BLCKSZ as u64;\n        BLCKSZ - (self.0 % BLCKSZ)\n    }\n\n    /// Compute the bytes remaining to fill a chunk of some size\n    ///\n    /// If the LSN is already at the chunk boundary, it will return 0.\n    pub fn calc_padding<T: Into<u64>>(self, sz: T) -> u64 {\n        let sz: u64 = sz.into();\n        // By using wrapping_sub, we can subtract first and then mod second.\n        // If it's done the other way around, then we would return a full\n        // chunk size if we're already at the chunk boundary.\n        // (Regular subtraction will panic on overflow in debug builds.)\n        (sz.wrapping_sub(self.0)) % sz\n    }\n\n    /// Align LSN on 8-byte boundary (alignment of WAL records).\n    pub fn align(&self) -> Lsn {\n        Lsn((self.0 + 7) & !7)\n    }\n\n    /// Align LSN on 8-byte boundary (alignment of WAL records).\n    pub fn is_aligned(&self) -> bool {\n        *self == self.align()\n    }\n\n    /// Return if the LSN is valid\n    /// mimics postgres XLogRecPtrIsInvalid macro\n    pub fn is_valid(self) -> bool {\n        self != Lsn::INVALID\n    }\n}\n\nimpl From<u64> for Lsn {\n    fn from(n: u64) -> Self {\n        Lsn(n)\n    }\n}\n\nimpl From<Lsn> for u64 {\n    fn from(lsn: Lsn) -> u64 {\n        lsn.0\n    }\n}\n\nimpl FromStr for Lsn {\n    type Err = LsnParseError;\n\n    /// Parse an LSN from a string in the form `00000000/00000000`\n    ///\n    /// If the input string is missing the '/' character, then use `Lsn::from_hex`\n    fn from_str(s: &str) -> Result<Self, Self::Err> {\n        let mut splitter = s.trim().split('/');\n        if let (Some(left), Some(right), None) = (splitter.next(), splitter.next(), splitter.next())\n        {\n            let left_num = u32::from_str_radix(left, 16).map_err(|_| LsnParseError)?;\n            let right_num = u32::from_str_radix(right, 16).map_err(|_| LsnParseError)?;\n            Ok(Lsn(((left_num as u64) << 32) | right_num as u64))\n        } else {\n            Err(LsnParseError)\n        }\n    }\n}\n\nimpl fmt::Display for Lsn {\n    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {\n        write!(f, \"{:X}/{:X}\", self.0 >> 32, self.0 & 0xffffffff)\n    }\n}\n\nimpl fmt::Debug for Lsn {\n    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {\n        write!(f, \"{:X}/{:X}\", self.0 >> 32, self.0 & 0xffffffff)\n    }\n}\n\nimpl Add<u64> for Lsn {\n    type Output = Lsn;\n\n    fn add(self, other: u64) -> Self::Output {\n        // panic if the addition overflows.\n        Lsn(self.0.checked_add(other).unwrap())\n    }\n}\n\nimpl AddAssign<u64> for Lsn {\n    fn add_assign(&mut self, other: u64) {\n        // panic if the addition overflows.\n        self.0 = self.0.checked_add(other).unwrap();\n    }\n}\n\n/// An [`Lsn`] that can be accessed atomically.\npub struct AtomicLsn {\n    inner: AtomicU64,\n}\n\nimpl AtomicLsn {\n    /// Creates a new atomic `Lsn`.\n    pub fn new(val: u64) -> Self {\n        AtomicLsn {\n            inner: AtomicU64::new(val),\n        }\n    }\n\n    /// Atomically retrieve the `Lsn` value from memory.\n    pub fn load(&self) -> Lsn {\n        Lsn(self.inner.load(Ordering::Acquire))\n    }\n\n    /// Atomically store a new `Lsn` value to memory.\n    pub fn store(&self, lsn: Lsn) {\n        self.inner.store(lsn.0, Ordering::Release);\n    }\n\n    /// Adds to the current value, returning the previous value.\n    ///\n    /// This operation will panic on overflow.\n    pub fn fetch_add(&self, val: u64) -> Lsn {\n        let prev = self.inner.fetch_add(val, Ordering::AcqRel);\n        assert!(prev.checked_add(val).is_some(), \"AtomicLsn overflow\");\n        Lsn(prev)\n    }\n\n    /// Atomically sets the Lsn to the max of old and new value, returning the old value.\n    pub fn fetch_max(&self, lsn: Lsn) -> Lsn {\n        let prev = self.inner.fetch_max(lsn.0, Ordering::AcqRel);\n        Lsn(prev)\n    }\n}\n\nimpl From<Lsn> for AtomicLsn {\n    fn from(lsn: Lsn) -> Self {\n        Self::new(lsn.0)\n    }\n}\n\n/// Pair of LSN's pointing to the end of the last valid record and previous one\n#[derive(Debug, Clone, Copy)]\npub struct RecordLsn {\n    /// LSN at the end of the current record\n    pub last: Lsn,\n    /// LSN at the end of the previous record\n    pub prev: Lsn,\n}\n\n/// Expose `self.last` as counter to be able to use RecordLsn in SeqWait\nimpl MonotonicCounter<Lsn> for RecordLsn {\n    fn cnt_advance(&mut self, lsn: Lsn) {\n        assert!(self.last <= lsn);\n        let new_prev = self.last;\n        self.last = lsn;\n        self.prev = new_prev;\n    }\n    fn cnt_value(&self) -> Lsn {\n        self.last\n    }\n}\n\n/// Implements  [`rand::distr::uniform::UniformSampler`] so we can sample [`Lsn`]s.\n///\n/// This is used by the `pagebench` pageserver benchmarking tool.\npub struct LsnSampler(<u64 as rand::distr::uniform::SampleUniform>::Sampler);\n\nimpl rand::distr::uniform::SampleUniform for Lsn {\n    type Sampler = LsnSampler;\n}\n\nimpl rand::distr::uniform::UniformSampler for LsnSampler {\n    type X = Lsn;\n\n    fn new<B1, B2>(low: B1, high: B2) -> Result<Self, rand::distr::uniform::Error>\n    where\n        B1: rand::distr::uniform::SampleBorrow<Self::X> + Sized,\n        B2: rand::distr::uniform::SampleBorrow<Self::X> + Sized,\n    {\n        <u64 as rand::distr::uniform::SampleUniform>::Sampler::new(low.borrow().0, high.borrow().0)\n            .map(Self)\n    }\n\n    fn new_inclusive<B1, B2>(low: B1, high: B2) -> Result<Self, rand::distr::uniform::Error>\n    where\n        B1: rand::distr::uniform::SampleBorrow<Self::X> + Sized,\n        B2: rand::distr::uniform::SampleBorrow<Self::X> + Sized,\n    {\n        <u64 as rand::distr::uniform::SampleUniform>::Sampler::new_inclusive(\n            low.borrow().0,\n            high.borrow().0,\n        )\n        .map(Self)\n    }\n\n    fn sample<R: rand::prelude::Rng + ?Sized>(&self, rng: &mut R) -> Self::X {\n        Lsn(self.0.sample(rng))\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use serde_assert::{Deserializer, Serializer, Token, Tokens};\n\n    use super::*;\n    use crate::bin_ser::BeSer;\n\n    #[test]\n    fn test_lsn_strings() {\n        assert_eq!(\"12345678/AAAA5555\".parse(), Ok(Lsn(0x12345678AAAA5555)));\n        assert_eq!(\"aaaa/bbbb\".parse(), Ok(Lsn(0x0000AAAA0000BBBB)));\n        assert_eq!(\"1/A\".parse(), Ok(Lsn(0x000000010000000A)));\n        assert_eq!(\"0/0\".parse(), Ok(Lsn(0)));\n        \"ABCDEFG/12345678\".parse::<Lsn>().unwrap_err();\n        \"123456789/AAAA5555\".parse::<Lsn>().unwrap_err();\n        \"12345678/AAAA55550\".parse::<Lsn>().unwrap_err();\n        \"-1/0\".parse::<Lsn>().unwrap_err();\n        \"1/-1\".parse::<Lsn>().unwrap_err();\n\n        assert_eq!(format!(\"{}\", Lsn(0x12345678AAAA5555)), \"12345678/AAAA5555\");\n        assert_eq!(format!(\"{}\", Lsn(0x000000010000000A)), \"1/A\");\n\n        assert_eq!(\n            Lsn::from_hex(\"12345678AAAA5555\"),\n            Ok(Lsn(0x12345678AAAA5555))\n        );\n        assert_eq!(Lsn::from_hex(\"0\"), Ok(Lsn(0)));\n        assert_eq!(Lsn::from_hex(\"F12345678AAAA5555\"), Err(LsnParseError));\n\n        let expected_lsn = Lsn(0x3C490F8);\n        assert_eq!(\" 0/3C490F8\".parse(), Ok(expected_lsn));\n        assert_eq!(\"0/3C490F8 \".parse(), Ok(expected_lsn));\n        assert_eq!(\" 0/3C490F8 \".parse(), Ok(expected_lsn));\n    }\n\n    #[test]\n    fn test_lsn_math() {\n        assert_eq!(Lsn(1234) + 11u64, Lsn(1245));\n\n        assert_eq!(\n            {\n                let mut lsn = Lsn(1234);\n                lsn += 11u64;\n                lsn\n            },\n            Lsn(1245)\n        );\n\n        assert_eq!(Lsn(1234).checked_sub(1233u64), Some(Lsn(1)));\n        assert_eq!(Lsn(1234).checked_sub(1235u64), None);\n\n        assert_eq!(Lsn(1235).widening_sub(1234u64), 1);\n        assert_eq!(Lsn(1234).widening_sub(1235u64), -1);\n        assert_eq!(Lsn(u64::MAX).widening_sub(0u64), i128::from(u64::MAX));\n        assert_eq!(Lsn(0).widening_sub(u64::MAX), -i128::from(u64::MAX));\n\n        let seg_sz: usize = 16 * 1024 * 1024;\n        assert_eq!(Lsn(0x1000007).segment_offset(seg_sz), 7);\n        assert_eq!(Lsn(0x1000007).segment_number(seg_sz), 1u64);\n\n        assert_eq!(Lsn(0x4007).block_offset(), 7u64);\n        assert_eq!(Lsn(0x4000).block_offset(), 0u64);\n        assert_eq!(Lsn(0x4007).remaining_in_block(), 8185u64);\n        assert_eq!(Lsn(0x4000).remaining_in_block(), 8192u64);\n\n        assert_eq!(Lsn(0xffff01).calc_padding(seg_sz as u64), 255u64);\n        assert_eq!(Lsn(0x2000000).calc_padding(seg_sz as u64), 0u64);\n        assert_eq!(Lsn(0xffff01).calc_padding(8u32), 7u64);\n        assert_eq!(Lsn(0xffff00).calc_padding(8u32), 0u64);\n    }\n\n    #[test]\n    fn test_atomic_lsn() {\n        let lsn = AtomicLsn::new(0);\n        assert_eq!(lsn.fetch_add(1234), Lsn(0));\n        assert_eq!(lsn.load(), Lsn(1234));\n        lsn.store(Lsn(5678));\n        assert_eq!(lsn.load(), Lsn(5678));\n\n        assert_eq!(lsn.fetch_max(Lsn(6000)), Lsn(5678));\n        assert_eq!(lsn.fetch_max(Lsn(5000)), Lsn(6000));\n    }\n\n    #[test]\n    fn test_lsn_serde() {\n        let original_lsn = Lsn(0x0123456789abcdef);\n        let expected_readable_tokens = Tokens(vec![Token::U64(0x0123456789abcdef)]);\n        let expected_non_readable_tokens =\n            Tokens(vec![Token::Str(String::from(\"1234567/89ABCDEF\"))]);\n\n        // Testing human_readable ser/de\n        let serializer = Serializer::builder().is_human_readable(false).build();\n        let readable_ser_tokens = original_lsn.serialize(&serializer).unwrap();\n        assert_eq!(readable_ser_tokens, expected_readable_tokens);\n\n        let mut deserializer = Deserializer::builder()\n            .is_human_readable(false)\n            .tokens(readable_ser_tokens)\n            .build();\n        let des_lsn = Lsn::deserialize(&mut deserializer).unwrap();\n        assert_eq!(des_lsn, original_lsn);\n\n        // Testing NON human_readable ser/de\n        let serializer = Serializer::builder().is_human_readable(true).build();\n        let non_readable_ser_tokens = original_lsn.serialize(&serializer).unwrap();\n        assert_eq!(non_readable_ser_tokens, expected_non_readable_tokens);\n\n        let mut deserializer = Deserializer::builder()\n            .is_human_readable(true)\n            .tokens(non_readable_ser_tokens)\n            .build();\n        let des_lsn = Lsn::deserialize(&mut deserializer).unwrap();\n        assert_eq!(des_lsn, original_lsn);\n\n        // Testing mismatching ser/de\n        let serializer = Serializer::builder().is_human_readable(false).build();\n        let non_readable_ser_tokens = original_lsn.serialize(&serializer).unwrap();\n\n        let mut deserializer = Deserializer::builder()\n            .is_human_readable(true)\n            .tokens(non_readable_ser_tokens)\n            .build();\n        Lsn::deserialize(&mut deserializer).unwrap_err();\n\n        let serializer = Serializer::builder().is_human_readable(true).build();\n        let readable_ser_tokens = original_lsn.serialize(&serializer).unwrap();\n\n        let mut deserializer = Deserializer::builder()\n            .is_human_readable(false)\n            .tokens(readable_ser_tokens)\n            .build();\n        Lsn::deserialize(&mut deserializer).unwrap_err();\n    }\n\n    #[test]\n    fn test_lsn_ensure_roundtrip() {\n        let original_lsn = Lsn(0xaaaabbbb);\n\n        let serializer = Serializer::builder().is_human_readable(false).build();\n        let ser_tokens = original_lsn.serialize(&serializer).unwrap();\n\n        let mut deserializer = Deserializer::builder()\n            .is_human_readable(false)\n            .tokens(ser_tokens)\n            .build();\n\n        let des_lsn = Lsn::deserialize(&mut deserializer).unwrap();\n        assert_eq!(des_lsn, original_lsn);\n    }\n\n    #[test]\n    fn test_lsn_bincode_serde() {\n        let lsn = Lsn(0x0123456789abcdef);\n        let expected_bytes = [0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef];\n\n        let ser_bytes = lsn.ser().unwrap();\n        assert_eq!(ser_bytes, expected_bytes);\n\n        let des_lsn = Lsn::des(&ser_bytes).unwrap();\n        assert_eq!(des_lsn, lsn);\n    }\n\n    #[test]\n    fn test_lsn_bincode_ensure_roundtrip() {\n        let original_lsn = Lsn(0x01_02_03_04_05_06_07_08);\n        let expected_bytes = vec![0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08];\n\n        let ser_bytes = original_lsn.ser().unwrap();\n        assert_eq!(ser_bytes, expected_bytes);\n\n        let des_lsn = Lsn::des(&ser_bytes).unwrap();\n        assert_eq!(des_lsn, original_lsn);\n    }\n}\n"
  },
  {
    "path": "libs/utils/src/measured_stream.rs",
    "content": "use std::io::Read;\nuse std::pin::Pin;\nuse std::{io, task};\n\nuse pin_project_lite::pin_project;\nuse tokio::io::{AsyncRead, AsyncWrite, ReadBuf};\n\npin_project! {\n    /// This stream tracks all writes and calls user provided\n    /// callback when the underlying stream is flushed.\n    pub struct MeasuredStream<S, R, W> {\n        #[pin]\n        stream: S,\n        write_count: usize,\n        inc_read_count: R,\n        inc_write_count: W,\n    }\n}\n\nimpl<S, R, W> MeasuredStream<S, R, W> {\n    pub fn new(stream: S, inc_read_count: R, inc_write_count: W) -> Self {\n        Self {\n            stream,\n            write_count: 0,\n            inc_read_count,\n            inc_write_count,\n        }\n    }\n}\n\nimpl<S: AsyncRead + Unpin, R: FnMut(usize), W> AsyncRead for MeasuredStream<S, R, W> {\n    fn poll_read(\n        self: Pin<&mut Self>,\n        context: &mut task::Context<'_>,\n        buf: &mut ReadBuf<'_>,\n    ) -> task::Poll<io::Result<()>> {\n        let this = self.project();\n        let filled = buf.filled().len();\n        this.stream.poll_read(context, buf).map_ok(|()| {\n            let cnt = buf.filled().len() - filled;\n            // Increment the read count.\n            (this.inc_read_count)(cnt);\n        })\n    }\n}\n\nimpl<S: AsyncWrite + Unpin, R, W: FnMut(usize)> AsyncWrite for MeasuredStream<S, R, W> {\n    fn poll_write(\n        self: Pin<&mut Self>,\n        context: &mut task::Context<'_>,\n        buf: &[u8],\n    ) -> task::Poll<io::Result<usize>> {\n        let this = self.project();\n        this.stream.poll_write(context, buf).map_ok(|cnt| {\n            // Increment the write count.\n            *this.write_count += cnt;\n            cnt\n        })\n    }\n\n    fn poll_flush(\n        self: Pin<&mut Self>,\n        context: &mut task::Context<'_>,\n    ) -> task::Poll<io::Result<()>> {\n        let this = self.project();\n        this.stream.poll_flush(context).map_ok(|()| {\n            // Call the user provided callback and reset the write count.\n            (this.inc_write_count)(*this.write_count);\n            *this.write_count = 0;\n        })\n    }\n\n    fn poll_shutdown(\n        self: Pin<&mut Self>,\n        context: &mut task::Context<'_>,\n    ) -> task::Poll<io::Result<()>> {\n        self.project().stream.poll_shutdown(context)\n    }\n}\n\n/// Wrapper for a reader that counts bytes read.\n///\n/// Similar to MeasuredStream but it's one way and it's sync\npub struct MeasuredReader<R: Read> {\n    inner: R,\n    byte_count: usize,\n}\n\nimpl<R: Read> MeasuredReader<R> {\n    pub fn new(reader: R) -> Self {\n        Self {\n            inner: reader,\n            byte_count: 0,\n        }\n    }\n\n    pub fn get_byte_count(&self) -> usize {\n        self.byte_count\n    }\n}\n\nimpl<R: Read> Read for MeasuredReader<R> {\n    fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {\n        let result = self.inner.read(buf);\n        if let Ok(n_bytes) = result {\n            self.byte_count += n_bytes\n        }\n        result\n    }\n}\n"
  },
  {
    "path": "libs/utils/src/metrics_collector.rs",
    "content": "use std::{\n    sync::{Arc, RwLock},\n    time::{Duration, Instant},\n};\n\nuse metrics::{IntGauge, proto::MetricFamily, register_int_gauge};\nuse once_cell::sync::Lazy;\n\npub static METRICS_STALE_MILLIS: Lazy<IntGauge> = Lazy::new(|| {\n    register_int_gauge!(\n        \"metrics_metrics_stale_milliseconds\",\n        \"The current metrics stale time in milliseconds\"\n    )\n    .expect(\"failed to define a metric\")\n});\n\n#[derive(Debug)]\npub struct CollectedMetrics {\n    pub metrics: Vec<MetricFamily>,\n    pub collected_at: Instant,\n}\n\nimpl CollectedMetrics {\n    fn new(metrics: Vec<MetricFamily>) -> Self {\n        Self {\n            metrics,\n            collected_at: Instant::now(),\n        }\n    }\n}\n\n#[derive(Debug)]\npub struct MetricsCollector {\n    last_collected: RwLock<Arc<CollectedMetrics>>,\n}\n\nimpl MetricsCollector {\n    pub fn new() -> Self {\n        Self {\n            last_collected: RwLock::new(Arc::new(CollectedMetrics::new(vec![]))),\n        }\n    }\n\n    #[tracing::instrument(name = \"metrics_collector\", skip_all)]\n    pub fn run_once(&self, cache_metrics: bool) -> Arc<CollectedMetrics> {\n        let started = Instant::now();\n        let metrics = metrics::gather();\n        let collected = Arc::new(CollectedMetrics::new(metrics));\n        if cache_metrics {\n            let mut guard = self.last_collected.write().unwrap();\n            *guard = collected.clone();\n        }\n        tracing::info!(\n            \"Collected {} metric families in {} ms\",\n            collected.metrics.len(),\n            started.elapsed().as_millis()\n        );\n        collected\n    }\n\n    pub fn last_collected(&self) -> Arc<CollectedMetrics> {\n        self.last_collected.read().unwrap().clone()\n    }\n}\n\nimpl Default for MetricsCollector {\n    fn default() -> Self {\n        Self::new()\n    }\n}\n\n// Interval for metrics collection. Currently hard-coded to be the same as the metrics scape interval from the obs agent\npub static METRICS_COLLECTION_INTERVAL: Duration = Duration::from_secs(30);\n\npub static METRICS_COLLECTOR: Lazy<MetricsCollector> = Lazy::new(MetricsCollector::default);\n"
  },
  {
    "path": "libs/utils/src/pageserver_feedback.rs",
    "content": "use std::time::{Duration, SystemTime};\n\nuse bytes::{Buf, BufMut, Bytes, BytesMut};\nuse pq_proto::{PG_EPOCH, read_cstr};\nuse serde::{Deserialize, Serialize};\nuse tracing::{trace, warn};\n\nuse crate::lsn::Lsn;\n\n/// Feedback pageserver sends to safekeeper and safekeeper resends to compute.\n///\n/// Serialized in custom flexible key/value format. In replication protocol, it\n/// is marked with NEON_STATUS_UPDATE_TAG_BYTE to differentiate from postgres\n/// Standby status update / Hot standby feedback messages.\n///\n/// serde Serialize is used only for human readable dump to json (e.g. in\n/// safekeepers debug_dump).\n#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]\npub struct PageserverFeedback {\n    /// Last known size of the timeline. Used to enforce timeline size limit.\n    pub current_timeline_size: u64,\n    /// LSN last received and ingested by the pageserver. Controls backpressure.\n    pub last_received_lsn: Lsn,\n    /// LSN up to which data is persisted by the pageserver to its local disc.\n    /// Controls backpressure.\n    pub disk_consistent_lsn: Lsn,\n    /// LSN up to which data is persisted by the pageserver on s3; safekeepers\n    /// consider WAL before it can be removed.\n    pub remote_consistent_lsn: Lsn,\n    // Serialize with RFC3339 format.\n    #[serde(with = \"serde_systemtime\")]\n    pub replytime: SystemTime,\n    /// Used to track feedbacks from different shards. Always zero for unsharded tenants.\n    pub shard_number: u32,\n    /// If true, the pageserver has detected corruption and the safekeeper and postgres\n    /// should stop sending WAL.\n    pub corruption_detected: bool,\n}\n\nimpl PageserverFeedback {\n    pub fn empty() -> PageserverFeedback {\n        PageserverFeedback {\n            current_timeline_size: 0,\n            last_received_lsn: Lsn::INVALID,\n            remote_consistent_lsn: Lsn::INVALID,\n            disk_consistent_lsn: Lsn::INVALID,\n            replytime: *PG_EPOCH,\n            shard_number: 0,\n            corruption_detected: false,\n        }\n    }\n\n    // Serialize PageserverFeedback using custom format\n    // to support protocol extensibility.\n    //\n    // Following layout is used:\n    // char - number of key-value pairs that follow.\n    //\n    // key-value pairs:\n    // null-terminated string - key,\n    // uint32 - value length in bytes\n    // value itself\n    //\n    // TODO: change serialized fields names once all computes migrate to rename.\n    pub fn serialize(&self, buf: &mut BytesMut) {\n        let buf_ptr = buf.len();\n        buf.put_u8(0); // # of keys, will be filled later\n        let mut nkeys = 0;\n\n        nkeys += 1;\n        buf.put_slice(b\"current_timeline_size\\0\");\n        buf.put_i32(8);\n        buf.put_u64(self.current_timeline_size);\n\n        nkeys += 1;\n        buf.put_slice(b\"ps_writelsn\\0\");\n        buf.put_i32(8);\n        buf.put_u64(self.last_received_lsn.0);\n\n        nkeys += 1;\n        buf.put_slice(b\"ps_flushlsn\\0\");\n        buf.put_i32(8);\n        buf.put_u64(self.disk_consistent_lsn.0);\n\n        nkeys += 1;\n        buf.put_slice(b\"ps_applylsn\\0\");\n        buf.put_i32(8);\n        buf.put_u64(self.remote_consistent_lsn.0);\n\n        let timestamp = self\n            .replytime\n            .duration_since(*PG_EPOCH)\n            .expect(\"failed to serialize pg_replytime earlier than PG_EPOCH\")\n            .as_micros() as i64;\n\n        nkeys += 1;\n        buf.put_slice(b\"ps_replytime\\0\");\n        buf.put_i32(8);\n        buf.put_i64(timestamp);\n\n        if self.shard_number > 0 {\n            nkeys += 1;\n            buf.put_slice(b\"shard_number\\0\");\n            buf.put_i32(4);\n            buf.put_u32(self.shard_number);\n        }\n\n        if self.corruption_detected {\n            nkeys += 1;\n            buf.put_slice(b\"corruption_detected\\0\");\n            buf.put_i32(1);\n            buf.put_u8(1);\n        }\n\n        buf[buf_ptr] = nkeys;\n    }\n\n    // Deserialize PageserverFeedback message\n    // TODO: change serialized fields names once all computes migrate to rename.\n    pub fn parse(mut buf: Bytes) -> PageserverFeedback {\n        let mut rf = PageserverFeedback::empty();\n        let nfields = buf.get_u8();\n        for _ in 0..nfields {\n            let key = read_cstr(&mut buf).unwrap();\n            match key.as_ref() {\n                b\"current_timeline_size\" => {\n                    let len = buf.get_i32();\n                    assert_eq!(len, 8);\n                    rf.current_timeline_size = buf.get_u64();\n                }\n                b\"ps_writelsn\" => {\n                    let len = buf.get_i32();\n                    assert_eq!(len, 8);\n                    rf.last_received_lsn = Lsn(buf.get_u64());\n                }\n                b\"ps_flushlsn\" => {\n                    let len = buf.get_i32();\n                    assert_eq!(len, 8);\n                    rf.disk_consistent_lsn = Lsn(buf.get_u64());\n                }\n                b\"ps_applylsn\" => {\n                    let len = buf.get_i32();\n                    assert_eq!(len, 8);\n                    rf.remote_consistent_lsn = Lsn(buf.get_u64());\n                }\n                b\"ps_replytime\" => {\n                    let len = buf.get_i32();\n                    assert_eq!(len, 8);\n                    let raw_time = buf.get_i64();\n                    if raw_time > 0 {\n                        rf.replytime = *PG_EPOCH + Duration::from_micros(raw_time as u64);\n                    } else {\n                        rf.replytime = *PG_EPOCH - Duration::from_micros(-raw_time as u64);\n                    }\n                }\n                b\"shard_number\" => {\n                    let len = buf.get_i32();\n                    assert_eq!(len, 4);\n                    rf.shard_number = buf.get_u32();\n                }\n                b\"corruption_detected\" => {\n                    let len = buf.get_i32();\n                    assert_eq!(len, 1);\n                    rf.corruption_detected = buf.get_u8() != 0;\n                }\n                _ => {\n                    let len = buf.get_i32();\n                    warn!(\n                        \"PageserverFeedback parse. unknown key {} of len {len}. Skip it.\",\n                        String::from_utf8_lossy(key.as_ref())\n                    );\n                    buf.advance(len as usize);\n                }\n            }\n        }\n        trace!(\"PageserverFeedback parsed is {:?}\", rf);\n        rf\n    }\n}\n\nmod serde_systemtime {\n    use std::time::SystemTime;\n\n    use chrono::{DateTime, Utc};\n    use serde::{Deserialize, Deserializer, Serializer};\n\n    pub fn serialize<S>(ts: &SystemTime, serializer: S) -> Result<S::Ok, S::Error>\n    where\n        S: Serializer,\n    {\n        let chrono_dt: DateTime<Utc> = (*ts).into();\n        serializer.serialize_str(&chrono_dt.to_rfc3339())\n    }\n\n    pub fn deserialize<'de, D>(deserializer: D) -> Result<SystemTime, D::Error>\n    where\n        D: Deserializer<'de>,\n    {\n        let time: String = Deserialize::deserialize(deserializer)?;\n        Ok(DateTime::parse_from_rfc3339(&time)\n            .map_err(serde::de::Error::custom)?\n            .into())\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use super::*;\n\n    #[test]\n    fn test_replication_feedback_serialization() {\n        let mut rf = PageserverFeedback::empty();\n        // Fill rf with some values\n        rf.current_timeline_size = 12345678;\n        // Set rounded time to be able to compare it with deserialized value,\n        // because it is rounded up to microseconds during serialization.\n        rf.replytime = *PG_EPOCH + Duration::from_secs(100_000_000);\n        let mut data = BytesMut::new();\n        rf.serialize(&mut data);\n\n        let rf_parsed = PageserverFeedback::parse(data.freeze());\n        assert_eq!(rf, rf_parsed);\n    }\n\n    // Test that databricks-specific fields added to the PageserverFeedback message are serialized\n    // and deserialized correctly, in addition to the existing fields from upstream.\n    #[test]\n    fn test_replication_feedback_databricks_fields() {\n        let mut rf = PageserverFeedback::empty();\n        rf.current_timeline_size = 12345678;\n        rf.last_received_lsn = Lsn(23456789);\n        rf.disk_consistent_lsn = Lsn(34567890);\n        rf.remote_consistent_lsn = Lsn(45678901);\n        rf.replytime = *PG_EPOCH + Duration::from_secs(100_000_000);\n        rf.shard_number = 1;\n        rf.corruption_detected = true;\n\n        let mut data = BytesMut::new();\n        rf.serialize(&mut data);\n\n        let rf_parsed = PageserverFeedback::parse(data.freeze());\n        assert_eq!(rf, rf_parsed);\n    }\n\n    #[test]\n    fn test_replication_feedback_unknown_key() {\n        let mut rf = PageserverFeedback::empty();\n        // Fill rf with some values\n        rf.current_timeline_size = 12345678;\n        // Set rounded time to be able to compare it with deserialized value,\n        // because it is rounded up to microseconds during serialization.\n        rf.replytime = *PG_EPOCH + Duration::from_secs(100_000_000);\n        let mut data = BytesMut::new();\n        rf.serialize(&mut data);\n\n        // Add an extra field to the buffer and adjust number of keys\n        data[0] += 1;\n        data.put_slice(b\"new_field_one\\0\");\n        data.put_i32(8);\n        data.put_u64(42);\n\n        // Parse serialized data and check that new field is not parsed\n        let rf_parsed = PageserverFeedback::parse(data.freeze());\n        assert_eq!(rf, rf_parsed);\n    }\n}\n"
  },
  {
    "path": "libs/utils/src/pid_file.rs",
    "content": "//! Abstraction to create & read pidfiles.\n//!\n//! A pidfile is a file in the filesystem that stores a process's PID.\n//! Its purpose is to implement a singleton behavior where only\n//! one process of some \"kind\" is supposed to be running at a given time.\n//! The \"kind\" is identified by the pidfile.\n//!\n//! During process startup, the process that is supposed to be a singleton\n//! must [claim][`claim_for_current_process`] the pidfile first.\n//! If that is unsuccessful, the process must not act as the singleton, i.e.,\n//! it must not access any of the resources that only the singleton may access.\n//!\n//! A common need is to signal a running singleton process, e.g., to make\n//! it shut down and exit.\n//! For that, we have to [`read`] the pidfile. The result of the `read` operation\n//! tells us if there is any singleton process, and if so, what PID it has.\n//! We can then proceed to signal it, although some caveats still apply.\n//! Read the function-level documentation of [`read`] for that.\n//!\n//! ## Never Remove Pidfiles\n//!\n//! It would be natural to assume that the process who claimed the pidfile\n//! should remove it upon exit to avoid leaving a stale pidfile in place.\n//! However, we already have a reliable way to detect staleness of the pidfile,\n//! i.e., the `flock` that [claiming][`claim_for_current_process`] puts on it.\n//!\n//! And further, removing pidfiles would introduce a **catastrophic race condition**\n//! where two processes are running that are supposed to be singletons.\n//! Suppose we were to remove our pidfile during process shutdown.\n//! Here is how the race plays out:\n//! - Suppose we have a service called `myservice` with pidfile `myservice.pidfile`.\n//! - Process `A` starts to shut down.\n//! - Process `B` is just starting up\n//!     - It `open(\"myservice.pid\", O_WRONLY|O_CREAT)` the file\n//!     - It blocks on `flock`\n//! - Process `A` removes the pidfile as the last step of its shutdown procedure\n//!     - `unlink(\"myservice.pid\")\n//! - Process `A` exits\n//!     - This releases its `flock` and unblocks `B`\n//! - Process `B` still has the file descriptor for `myservice.pid` open\n//! - Process `B` writes its PID into `myservice.pid`.\n//! - But the `myservice.pid` file has been unlinked, so, there is `myservice.pid`\n//!   in the directory.\n//! - Process `C` starts\n//!     - It `open(\"myservice.pid\", O_WRONLY|O_CREAT)` which creates a new file (new inode)\n//!     - It `flock`s the file, which, since it's a different file, does not block\n//!     - It writes its PID into the file\n//!\n//! At this point, `B` and `C` are running, which is hazardous.\n//! Morale of the story: don't unlink pidfiles, ever.\n\nuse std::ops::Deref;\n\nuse anyhow::Context;\nuse camino::Utf8Path;\nuse nix::unistd::Pid;\n\nuse crate::lock_file::{self, LockFileRead};\n\n/// Keeps a claim on a pidfile alive until it is dropped.\n/// Returned by [`claim_for_current_process`].\n#[must_use]\npub struct PidFileGuard(lock_file::LockFileGuard);\n\nimpl Deref for PidFileGuard {\n    type Target = lock_file::LockFileGuard;\n\n    fn deref(&self) -> &Self::Target {\n        &self.0\n    }\n}\n\n/// Try to claim `path` as a pidfile for the current process.\n///\n/// If another process has already claimed the pidfile, and it is still running,\n/// this function returns ane error.\n/// Otherwise, the function `flock`s the file and updates its contents to the\n/// current process's PID.\n/// If the update fails, the flock is released and an error returned.\n/// On success, the function returns a [`PidFileGuard`] to keep the flock alive.\n///\n/// ### Maintaining A Claim\n///\n/// It is the caller's responsibility to maintain the claim.\n/// The claim ends as soon as the returned guard object is dropped.\n/// To maintain the claim for the remaining lifetime of the current process,\n/// use [`std::mem::forget`] or similar.\npub fn claim_for_current_process(path: &Utf8Path) -> anyhow::Result<PidFileGuard> {\n    let unwritten_lock_file = lock_file::create_exclusive(path).context(\"lock file\")?;\n    // if any of the next steps fail, we drop the file descriptor and thereby release the lock\n    let guard = unwritten_lock_file\n        .write_content(Pid::this().to_string())\n        .context(\"write pid to lock file\")?;\n    Ok(PidFileGuard(guard))\n}\n\n/// Returned by [`read`].\npub enum PidFileRead {\n    /// No file exists at the given path.\n    NotExist,\n    /// The given pidfile is currently not claimed by any process.\n    /// To determine this, the [`read`] operation acquired\n    /// an exclusive flock on the file. The lock is still held and responsibility\n    /// to release it is returned through the guard object.\n    /// Before releasing it, other [`claim_for_current_process`] or [`read`] calls\n    /// will fail.\n    ///\n    /// ### Caveats\n    ///\n    /// Do not unlink the pidfile from the filesystem. See module-comment for why.\n    NotHeldByAnyProcess(PidFileGuard),\n    /// The given pidfile is still claimed by another process whose PID is given\n    /// as part of this variant.\n    ///\n    /// ### Caveats\n    ///\n    /// 1. The other process might exit at any time, turning the given PID stale.\n    /// 2. There is a small window in which `claim_for_current_process` has already\n    ///    locked the file but not yet updates its contents. [`read`] will return\n    ///    this variant here, but with the old file contents, i.e., a stale PID.\n    ///\n    /// The kernel is free to recycle PID once it has been `wait(2)`ed upon by\n    /// its creator. Thus, acting upon a stale PID, e.g., by issuing a `kill`\n    /// system call on it, bears the risk of killing an unrelated process.\n    /// This is an inherent limitation of using pidfiles.\n    /// The only race-free solution is to have a supervisor-process with a lifetime\n    /// that exceeds that of all of its child-processes (e.g., `runit`, `supervisord`).\n    LockedByOtherProcess(Pid),\n}\n\n/// Try to read the file at the given path as a pidfile that was previously created\n/// through [`claim_for_current_process`].\n///\n/// On success, this function returns a [`PidFileRead`].\n/// Check its docs for a description of the meaning of its different variants.\npub fn read(pidfile: &Utf8Path) -> anyhow::Result<PidFileRead> {\n    let res = lock_file::read_and_hold_lock_file(pidfile).context(\"read and hold pid file\")?;\n    let ret = match res {\n        LockFileRead::NotExist => PidFileRead::NotExist,\n        LockFileRead::NotHeldByAnyProcess(guard, _) => {\n            PidFileRead::NotHeldByAnyProcess(PidFileGuard(guard))\n        }\n        LockFileRead::LockedByOtherProcess {\n            not_locked_file: _not_locked_file,\n            content,\n        } => {\n            // XXX the read races with the write in claim_pid_file_for_pid().\n            // But pids are smaller than a page, so the kernel page cache will lock for us.\n            // The only problem is that we might get the old contents here.\n            // Can only fix that by implementing some scheme that downgrades the\n            // exclusive lock to shared lock in claim_pid_file_for_pid().\n            PidFileRead::LockedByOtherProcess(parse_pidfile_content(&content)?)\n        }\n    };\n    Ok(ret)\n}\n\nfn parse_pidfile_content(content: &str) -> anyhow::Result<Pid> {\n    let pid: i32 = content\n        .parse()\n        .map_err(|_| anyhow::anyhow!(\"parse pidfile content to PID\"))?;\n    if pid < 1 {\n        anyhow::bail!(\"bad value in pidfile '{pid}'\");\n    }\n    Ok(Pid::from_raw(pid))\n}\n"
  },
  {
    "path": "libs/utils/src/poison.rs",
    "content": "//!  Protect a piece of state from reuse after it is left in an inconsistent state.\n//!\n//!  # Example\n//!\n//!  ```\n//!  # tokio::runtime::Builder::new_current_thread().enable_all().build().unwrap().block_on(async {\n//!  use utils::poison::Poison;\n//!  use std::time::Duration;\n//!\n//!  struct State {\n//!    clean: bool,\n//!  }\n//!  let state = tokio::sync::Mutex::new(Poison::new(\"mystate\", State { clean: true }));\n//!\n//!  let mut mutex_guard = state.lock().await;\n//!  let mut poison_guard = mutex_guard.check_and_arm()?;\n//!  let state = poison_guard.data_mut();\n//!  state.clean = false;\n//!  // If we get cancelled at this await point, subsequent check_and_arm() calls will fail.\n//!  tokio::time::sleep(Duration::from_secs(10)).await;\n//!  state.clean = true;\n//!  poison_guard.disarm();\n//!  # Ok::<(), utils::poison::Error>(())\n//!  # });\n//!  ```\n\nuse tracing::warn;\n\npub struct Poison<T> {\n    what: &'static str,\n    state: State,\n    data: T,\n}\n\n#[derive(Clone, Copy)]\nenum State {\n    Clean,\n    Armed,\n    Poisoned { at: chrono::DateTime<chrono::Utc> },\n}\n\nimpl<T> Poison<T> {\n    /// We log `what` `warning!` level if the [`Guard`] gets dropped without being [`Guard::disarm`]ed.\n    pub fn new(what: &'static str, data: T) -> Self {\n        Self {\n            what,\n            state: State::Clean,\n            data,\n        }\n    }\n\n    /// Check for poisoning and return a [`Guard`] that provides access to the wrapped state.\n    pub fn check_and_arm(&mut self) -> Result<Guard<T>, Error> {\n        match self.state {\n            State::Clean => {\n                self.state = State::Armed;\n                Ok(Guard(self))\n            }\n            State::Armed => unreachable!(\"transient state\"),\n            State::Poisoned { at } => Err(Error::Poisoned {\n                what: self.what,\n                at,\n            }),\n        }\n    }\n}\n\n/// Armed pointer to a [`Poison`].\n///\n/// Use [`Self::data`] and [`Self::data_mut`] to access the wrapped state.\n/// Once modifications are done, use [`Self::disarm`].\n/// If [`Guard`] gets dropped instead of calling [`Self::disarm`], the state is poisoned\n/// and subsequent calls to [`Poison::check_and_arm`] will fail with an error.\npub struct Guard<'a, T>(&'a mut Poison<T>);\n\nimpl<T> Guard<'_, T> {\n    pub fn data(&self) -> &T {\n        &self.0.data\n    }\n    pub fn data_mut(&mut self) -> &mut T {\n        &mut self.0.data\n    }\n\n    pub fn disarm(self) {\n        match self.0.state {\n            State::Clean => unreachable!(\"we set it to Armed in check_and_arm()\"),\n            State::Armed => {\n                self.0.state = State::Clean;\n            }\n            State::Poisoned { at } => {\n                unreachable!(\"we fail check_and_arm() if it's in that state: {at}\")\n            }\n        }\n    }\n}\n\nimpl<T> Drop for Guard<'_, T> {\n    fn drop(&mut self) {\n        match self.0.state {\n            State::Clean => {\n                // set by disarm()\n            }\n            State::Armed => {\n                // still armed => poison it\n                let at = chrono::Utc::now();\n                self.0.state = State::Poisoned { at };\n                warn!(at=?at, \"poisoning {}\", self.0.what);\n            }\n            State::Poisoned { at } => {\n                unreachable!(\"we fail check_and_arm() if it's in that state: {at}\")\n            }\n        }\n    }\n}\n\n#[derive(thiserror::Error, Debug)]\npub enum Error {\n    #[error(\"poisoned at {at}: {what}\")]\n    Poisoned {\n        what: &'static str,\n        at: chrono::DateTime<chrono::Utc>,\n    },\n}\n"
  },
  {
    "path": "libs/utils/src/postgres_client.rs",
    "content": "//! Postgres client connection code common to other crates (safekeeper and\n//! pageserver) which depends on tenant/timeline ids and thus not fitting into\n//! postgres_connection crate.\n\nuse anyhow::Context;\nuse postgres_connection::{PgConnectionConfig, parse_host_port};\n\nuse crate::id::TenantTimelineId;\n\n#[derive(Copy, Clone, PartialEq, Eq, Debug, serde::Serialize, serde::Deserialize)]\n#[serde(rename_all = \"kebab-case\")]\npub enum InterpretedFormat {\n    Bincode,\n    Protobuf,\n}\n\n#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]\n#[serde(rename_all = \"kebab-case\")]\npub enum Compression {\n    Zstd { level: i8 },\n}\n\n#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]\n#[serde(tag = \"type\", content = \"args\")]\n#[serde(rename_all = \"kebab-case\")]\npub enum PostgresClientProtocol {\n    /// Usual Postgres replication protocol\n    Vanilla,\n    /// Custom shard-aware protocol that replicates interpreted records.\n    /// Used to send wal from safekeeper to pageserver.\n    Interpreted {\n        format: InterpretedFormat,\n        compression: Option<Compression>,\n    },\n}\n\npub struct ConnectionConfigArgs<'a> {\n    pub protocol: PostgresClientProtocol,\n\n    pub ttid: TenantTimelineId,\n    pub shard_number: Option<u8>,\n    pub shard_count: Option<u8>,\n    pub shard_stripe_size: Option<u32>,\n\n    pub listen_pg_addr_str: &'a str,\n\n    pub auth_token: Option<&'a str>,\n    pub availability_zone: Option<&'a str>,\n}\n\nimpl<'a> ConnectionConfigArgs<'a> {\n    fn options(&'a self) -> Vec<String> {\n        let mut options = vec![\n            \"-c\".to_owned(),\n            format!(\"timeline_id={}\", self.ttid.timeline_id),\n            format!(\"tenant_id={}\", self.ttid.tenant_id),\n            format!(\n                \"protocol={}\",\n                serde_json::to_string(&self.protocol).unwrap()\n            ),\n        ];\n\n        if self.shard_number.is_some() {\n            assert!(self.shard_count.is_some());\n            assert!(self.shard_stripe_size.is_some());\n\n            options.push(format!(\"shard_count={}\", self.shard_count.unwrap()));\n            options.push(format!(\"shard_number={}\", self.shard_number.unwrap()));\n            options.push(format!(\n                \"shard_stripe_size={}\",\n                self.shard_stripe_size.unwrap()\n            ));\n        }\n\n        options\n    }\n}\n\n/// Create client config for fetching WAL from safekeeper on particular timeline.\n/// listen_pg_addr_str is in form host:\\[port\\].\npub fn wal_stream_connection_config(\n    args: ConnectionConfigArgs,\n) -> anyhow::Result<PgConnectionConfig> {\n    let (host, port) =\n        parse_host_port(args.listen_pg_addr_str).context(\"Unable to parse listen_pg_addr_str\")?;\n    let port = port.unwrap_or(5432);\n    let mut connstr = PgConnectionConfig::new_host_port(host, port)\n        .extend_options(args.options())\n        .set_password(args.auth_token.map(|s| s.to_owned()));\n\n    if let Some(availability_zone) = args.availability_zone {\n        connstr = connstr.extend_options([format!(\"availability_zone={availability_zone}\")]);\n    }\n\n    Ok(connstr)\n}\n"
  },
  {
    "path": "libs/utils/src/rate_limit.rs",
    "content": "//! A helper to rate limit operations.\n\nuse std::time::{Duration, Instant};\n\npub struct RateLimit {\n    last: Option<Instant>,\n    interval: Duration,\n    dropped: u64,\n}\n\npub struct RateLimitStats(u64);\n\nimpl std::fmt::Display for RateLimitStats {\n    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {\n        write!(f, \"{} dropped calls\", self.0)\n    }\n}\n\nimpl RateLimit {\n    pub const fn new(interval: Duration) -> Self {\n        Self {\n            last: None,\n            interval,\n            dropped: 0,\n        }\n    }\n\n    /// Call `f` if the rate limit allows.\n    /// Don't call it otherwise.\n    pub fn call<F: FnOnce()>(&mut self, f: F) {\n        self.call2(|_| f())\n    }\n\n    pub fn call2<F: FnOnce(RateLimitStats)>(&mut self, f: F) {\n        let now = Instant::now();\n        match self.last {\n            Some(last) if now - last <= self.interval => {\n                // ratelimit\n                self.dropped += 1;\n            }\n            _ => {\n                self.last = Some(now);\n                f(RateLimitStats(self.dropped));\n                self.dropped = 0;\n            }\n        }\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use std::sync::atomic::AtomicUsize;\n\n    #[test]\n    fn basics() {\n        use std::sync::atomic::Ordering::Relaxed;\n        use std::time::Duration;\n\n        use super::RateLimit;\n\n        let called = AtomicUsize::new(0);\n        let mut f = RateLimit::new(Duration::from_millis(100));\n\n        let cl = || {\n            called.fetch_add(1, Relaxed);\n        };\n\n        f.call(cl);\n        assert_eq!(called.load(Relaxed), 1);\n        f.call(cl);\n        assert_eq!(called.load(Relaxed), 1);\n        f.call(cl);\n        assert_eq!(called.load(Relaxed), 1);\n        std::thread::sleep(Duration::from_millis(100));\n        f.call(cl);\n        assert_eq!(called.load(Relaxed), 2);\n        f.call(cl);\n        assert_eq!(called.load(Relaxed), 2);\n        std::thread::sleep(Duration::from_millis(100));\n        f.call(cl);\n        assert_eq!(called.load(Relaxed), 3);\n    }\n}\n"
  },
  {
    "path": "libs/utils/src/sentry_init.rs",
    "content": "use std::borrow::Cow;\nuse std::env;\n\nuse sentry::ClientInitGuard;\npub use sentry::release_name;\nuse tracing::{error, info};\n\n#[must_use]\npub fn init_sentry(\n    release_name: Option<Cow<'static, str>>,\n    extra_options: &[(&str, &str)],\n) -> Option<ClientInitGuard> {\n    let Ok(dsn) = env::var(\"SENTRY_DSN\") else {\n        info!(\"not initializing Sentry, no SENTRY_DSN given\");\n        return None;\n    };\n    let environment = env::var(\"SENTRY_ENVIRONMENT\").unwrap_or_else(|_| \"development\".into());\n\n    let guard = sentry::init((\n        dsn,\n        sentry::ClientOptions {\n            release: release_name.clone(),\n            environment: Some(environment.clone().into()),\n            ..Default::default()\n        },\n    ));\n    sentry::configure_scope(|scope| {\n        for &(key, value) in extra_options {\n            scope.set_extra(key, value.into());\n        }\n    });\n\n    if let Some(dsn) = guard.dsn() {\n        info!(\n            \"initialized Sentry for project {}, environment {}, release {} (using API {})\",\n            dsn.project_id(),\n            environment,\n            release_name.unwrap_or(Cow::Borrowed(\"None\")),\n            dsn.envelope_api_url(),\n        );\n    } else {\n        // This should panic during sentry::init(), but we may as well cover it.\n        error!(\"failed to initialize Sentry, invalid DSN\");\n    }\n\n    Some(guard)\n}\n"
  },
  {
    "path": "libs/utils/src/seqwait.rs",
    "content": "#![warn(missing_docs)]\n\nuse std::cmp::{Eq, Ordering};\nuse std::collections::BinaryHeap;\nuse std::mem;\nuse std::sync::Mutex;\nuse std::time::Duration;\n\nuse tokio::sync::watch::{self, channel};\nuse tokio::time::timeout;\n\n/// An error happened while waiting for a number\n#[derive(Debug, PartialEq, Eq, thiserror::Error)]\npub enum SeqWaitError {\n    /// The wait timeout was reached\n    #[error(\"seqwait timeout was reached\")]\n    Timeout,\n\n    /// [`SeqWait::shutdown`] was called\n    #[error(\"SeqWait::shutdown was called\")]\n    Shutdown,\n}\n\n/// Monotonically increasing value\n///\n/// It is handy to store some other fields under the same mutex in `SeqWait<S>`\n/// (e.g. store prev_record_lsn). So we allow SeqWait to be parametrized with\n/// any type that can expose counter. `V` is the type of exposed counter.\npub trait MonotonicCounter<V> {\n    /// Bump counter value and check that it goes forward\n    /// N.B.: new_val is an actual new value, not a difference.\n    fn cnt_advance(&mut self, new_val: V);\n\n    /// Get counter value\n    fn cnt_value(&self) -> V;\n}\n\n/// Heap of waiters, lowest numbers pop first.\nstruct Waiters<V>\nwhere\n    V: Ord,\n{\n    heap: BinaryHeap<Waiter<V>>,\n    /// Number of the first waiter in the heap, or None if there are no waiters.\n    status_channel: watch::Sender<Option<V>>,\n}\n\nimpl<V> Waiters<V>\nwhere\n    V: Ord + Copy,\n{\n    fn new() -> Self {\n        Waiters {\n            heap: BinaryHeap::new(),\n            status_channel: channel(None).0,\n        }\n    }\n\n    /// `status_channel` contains the number of the first waiter in the heap.\n    /// This function should be called whenever waiters heap changes.\n    fn update_status(&self) {\n        let first_waiter = self.heap.peek().map(|w| w.wake_num);\n        let _ = self.status_channel.send_replace(first_waiter);\n    }\n\n    /// Add new waiter to the heap, return a channel that will be notified when the number arrives.\n    fn add(&mut self, num: V) -> watch::Receiver<()> {\n        let (tx, rx) = channel(());\n        self.heap.push(Waiter {\n            wake_num: num,\n            wake_channel: tx,\n        });\n        self.update_status();\n        rx\n    }\n\n    /// Pop all waiters <= num from the heap. Collect channels in a vector,\n    /// so that caller can wake them up.\n    fn pop_leq(&mut self, num: V) -> Vec<watch::Sender<()>> {\n        let mut wake_these = Vec::new();\n        while let Some(n) = self.heap.peek() {\n            if n.wake_num > num {\n                break;\n            }\n            wake_these.push(self.heap.pop().unwrap().wake_channel);\n        }\n        if !wake_these.is_empty() {\n            self.update_status();\n        }\n        wake_these\n    }\n\n    /// Used on shutdown to efficiently drop all waiters.\n    fn take_all(&mut self) -> BinaryHeap<Waiter<V>> {\n        let heap = mem::take(&mut self.heap);\n        self.update_status();\n        heap\n    }\n}\n\nstruct Waiter<T>\nwhere\n    T: Ord,\n{\n    wake_num: T,                     // wake me when this number arrives ...\n    wake_channel: watch::Sender<()>, // ... by sending a message to this channel\n}\n\n// BinaryHeap is a max-heap, and we want a min-heap. Reverse the ordering here\n// to get that.\nimpl<T: Ord> PartialOrd for Waiter<T> {\n    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {\n        Some(self.cmp(other))\n    }\n}\n\nimpl<T: Ord> Ord for Waiter<T> {\n    fn cmp(&self, other: &Self) -> Ordering {\n        other.wake_num.cmp(&self.wake_num)\n    }\n}\n\nimpl<T: Ord> PartialEq for Waiter<T> {\n    fn eq(&self, other: &Self) -> bool {\n        other.wake_num == self.wake_num\n    }\n}\n\nimpl<T: Ord> Eq for Waiter<T> {}\n\n/// Internal components of a `SeqWait`\nstruct SeqWaitInt<S, V>\nwhere\n    S: MonotonicCounter<V>,\n    V: Ord,\n{\n    waiters: Waiters<V>,\n    current: S,\n    shutdown: bool,\n}\n\n/// A tool for waiting on a sequence number\n///\n/// This provides a way to wait the arrival of a number.\n/// As soon as the number arrives by another caller calling\n/// [`advance`], then the waiter will be woken up.\n///\n/// This implementation takes a blocking Mutex on both [`wait_for`]\n/// and [`advance`], meaning there may be unexpected executor blocking\n/// due to thread scheduling unfairness. There are probably better\n/// implementations, but we can probably live with this for now.\n///\n/// [`wait_for`]: SeqWait::wait_for\n/// [`advance`]: SeqWait::advance\n///\n/// `S` means Storage, `V` is type of counter that this storage exposes.\n///\npub struct SeqWait<S, V>\nwhere\n    S: MonotonicCounter<V>,\n    V: Ord,\n{\n    internal: Mutex<SeqWaitInt<S, V>>,\n}\n\nimpl<S, V> SeqWait<S, V>\nwhere\n    S: MonotonicCounter<V> + Copy,\n    V: Ord + Copy,\n{\n    /// Create a new `SeqWait`, initialized to a particular number\n    pub fn new(starting_num: S) -> Self {\n        let internal = SeqWaitInt {\n            waiters: Waiters::new(),\n            current: starting_num,\n            shutdown: false,\n        };\n        SeqWait {\n            internal: Mutex::new(internal),\n        }\n    }\n\n    /// Shut down a `SeqWait`, causing all waiters (present and\n    /// future) to return an error.\n    pub fn shutdown(&self) {\n        let waiters = {\n            // Prevent new waiters; wake all those that exist.\n            // Wake everyone with an error.\n            let mut internal = self.internal.lock().unwrap();\n\n            // Block any future waiters from starting\n            internal.shutdown = true;\n\n            // Take all waiters to drop them later.\n            internal.waiters.take_all()\n\n            // Drop the lock as we exit this scope.\n        };\n\n        // When we drop the waiters list, each Receiver will\n        // be woken with an error.\n        // This drop doesn't need to be explicit; it's done\n        // here to make it easier to read the code and understand\n        // the order of events.\n        drop(waiters);\n    }\n\n    /// Wait for a number to arrive\n    ///\n    /// This call won't complete until someone has called `advance`\n    /// with a number greater than or equal to the one we're waiting for.\n    ///\n    /// This function is async cancellation-safe.\n    pub async fn wait_for(&self, num: V) -> Result<(), SeqWaitError> {\n        match self.queue_for_wait(num) {\n            Ok(None) => Ok(()),\n            Ok(Some(mut rx)) => rx.changed().await.map_err(|_| SeqWaitError::Shutdown),\n            Err(e) => Err(e),\n        }\n    }\n\n    /// Wait for a number to arrive\n    ///\n    /// This call won't complete until someone has called `advance`\n    /// with a number greater than or equal to the one we're waiting for.\n    ///\n    /// If that hasn't happened after the specified timeout duration,\n    /// [`SeqWaitError::Timeout`] will be returned.\n    ///\n    /// This function is async cancellation-safe.\n    pub async fn wait_for_timeout(\n        &self,\n        num: V,\n        timeout_duration: Duration,\n    ) -> Result<(), SeqWaitError> {\n        match self.queue_for_wait(num) {\n            Ok(None) => Ok(()),\n            Ok(Some(mut rx)) => match timeout(timeout_duration, rx.changed()).await {\n                Ok(Ok(())) => Ok(()),\n                Ok(Err(_)) => Err(SeqWaitError::Shutdown),\n                Err(_) => Err(SeqWaitError::Timeout),\n            },\n            Err(e) => Err(e),\n        }\n    }\n\n    /// Check if [`Self::wait_for`] or [`Self::wait_for_timeout`] would wait if called with `num`.\n    pub fn would_wait_for(&self, num: V) -> Result<(), V> {\n        let internal = self.internal.lock().unwrap();\n        let cnt = internal.current.cnt_value();\n        drop(internal);\n        if cnt >= num { Ok(()) } else { Err(cnt) }\n    }\n\n    /// Register and return a channel that will be notified when a number arrives,\n    /// or None, if it has already arrived.\n    fn queue_for_wait(&self, num: V) -> Result<Option<watch::Receiver<()>>, SeqWaitError> {\n        let mut internal = self.internal.lock().unwrap();\n        if internal.current.cnt_value() >= num {\n            return Ok(None);\n        }\n        if internal.shutdown {\n            return Err(SeqWaitError::Shutdown);\n        }\n\n        // Add waiter channel to the queue.\n        let rx = internal.waiters.add(num);\n        // Drop the lock as we exit this scope.\n        Ok(Some(rx))\n    }\n\n    /// Announce a new number has arrived\n    ///\n    /// All waiters at this value or below will be woken.\n    ///\n    /// Returns the old number.\n    pub fn advance(&self, num: V) -> V {\n        let old_value;\n        let wake_these = {\n            let mut internal = self.internal.lock().unwrap();\n\n            old_value = internal.current.cnt_value();\n            if old_value >= num {\n                return old_value;\n            }\n            internal.current.cnt_advance(num);\n\n            // Pop all waiters <= num from the heap.\n            internal.waiters.pop_leq(num)\n        };\n\n        for tx in wake_these {\n            // This can fail if there are no receivers.\n            // We don't care; discard the error.\n            let _ = tx.send(());\n        }\n        old_value\n    }\n\n    /// Read the current value, without waiting.\n    pub fn load(&self) -> S {\n        self.internal.lock().unwrap().current\n    }\n\n    /// Get a Receiver for the current status.\n    ///\n    /// The current status is the number of the first waiter in the queue,\n    /// or None if there are no waiters.\n    ///\n    /// This receiver will be notified whenever the status changes.\n    /// It is useful for receiving notifications when the first waiter\n    /// starts waiting for a number, or when there are no more waiters left.\n    pub fn status_receiver(&self) -> watch::Receiver<Option<V>> {\n        self.internal\n            .lock()\n            .unwrap()\n            .waiters\n            .status_channel\n            .subscribe()\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use std::sync::Arc;\n\n    use super::*;\n\n    impl MonotonicCounter<i32> for i32 {\n        fn cnt_advance(&mut self, val: i32) {\n            assert!(*self <= val);\n            *self = val;\n        }\n        fn cnt_value(&self) -> i32 {\n            *self\n        }\n    }\n\n    #[tokio::test]\n    async fn seqwait() {\n        let seq = Arc::new(SeqWait::new(0));\n        let seq2 = Arc::clone(&seq);\n        let seq3 = Arc::clone(&seq);\n        let jh1 = tokio::task::spawn(async move {\n            seq2.wait_for(42).await.expect(\"wait_for 42\");\n            let old = seq2.advance(100);\n            assert_eq!(old, 99);\n            seq2.wait_for_timeout(999, Duration::from_millis(100))\n                .await\n                .expect_err(\"no 999\");\n        });\n        let jh2 = tokio::task::spawn(async move {\n            seq3.wait_for(42).await.expect(\"wait_for 42\");\n            seq3.wait_for(0).await.expect(\"wait_for 0\");\n        });\n        tokio::time::sleep(Duration::from_millis(200)).await;\n        let old = seq.advance(99);\n        assert_eq!(old, 0);\n        seq.wait_for(100).await.expect(\"wait_for 100\");\n\n        // Calling advance with a smaller value is a no-op\n        assert_eq!(seq.advance(98), 100);\n        assert_eq!(seq.load(), 100);\n\n        jh1.await.unwrap();\n        jh2.await.unwrap();\n\n        seq.shutdown();\n    }\n\n    #[tokio::test]\n    async fn seqwait_timeout() {\n        let seq = Arc::new(SeqWait::new(0));\n        let seq2 = Arc::clone(&seq);\n        let jh = tokio::task::spawn(async move {\n            let timeout = Duration::from_millis(1);\n            let res = seq2.wait_for_timeout(42, timeout).await;\n            assert_eq!(res, Err(SeqWaitError::Timeout));\n        });\n        tokio::time::sleep(Duration::from_millis(200)).await;\n        // This will attempt to wake, but nothing will happen\n        // because the waiter already dropped its Receiver.\n        let old = seq.advance(99);\n        assert_eq!(old, 0);\n        jh.await.unwrap();\n\n        seq.shutdown();\n    }\n}\n"
  },
  {
    "path": "libs/utils/src/serde_percent.rs",
    "content": "//! A serde::Deserialize type for percentages.\n//!\n//! See [`Percent`] for details.\n\nuse serde::{Deserialize, Serialize};\n\n/// If the value is not an integer between 0 and 100,\n/// deserialization fails with a descriptive error.\n#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]\n#[serde(transparent)]\npub struct Percent(#[serde(deserialize_with = \"deserialize_pct_0_to_100\")] u8);\n\nimpl Percent {\n    pub const fn new(pct: u8) -> Option<Self> {\n        if pct <= 100 { Some(Percent(pct)) } else { None }\n    }\n\n    pub fn get(&self) -> u8 {\n        self.0\n    }\n}\n\nfn deserialize_pct_0_to_100<'de, D>(deserializer: D) -> Result<u8, D::Error>\nwhere\n    D: serde::de::Deserializer<'de>,\n{\n    let v: u8 = serde::de::Deserialize::deserialize(deserializer)?;\n    if v > 100 {\n        return Err(serde::de::Error::custom(\n            \"must be an integer between 0 and 100\",\n        ));\n    }\n    Ok(v)\n}\n\n#[cfg(test)]\nmod tests {\n    use super::Percent;\n\n    #[derive(serde::Deserialize, serde::Serialize, Debug, PartialEq, Eq)]\n    struct Foo {\n        bar: Percent,\n    }\n\n    #[test]\n    fn basics() {\n        let input = r#\"{ \"bar\": 50 }\"#;\n        let foo: Foo = serde_json::from_str(input).unwrap();\n        assert_eq!(foo.bar.get(), 50);\n    }\n    #[test]\n    fn null_handling() {\n        let input = r#\"{ \"bar\": null }\"#;\n        let res: Result<Foo, _> = serde_json::from_str(input);\n        assert!(res.is_err());\n    }\n    #[test]\n    fn zero() {\n        let input = r#\"{ \"bar\": 0 }\"#;\n        let foo: Foo = serde_json::from_str(input).unwrap();\n        assert_eq!(foo.bar.get(), 0);\n    }\n    #[test]\n    fn out_of_range_above() {\n        let input = r#\"{ \"bar\": 101 }\"#;\n        let res: Result<Foo, _> = serde_json::from_str(input);\n        assert!(res.is_err());\n    }\n    #[test]\n    fn out_of_range_below() {\n        let input = r#\"{ \"bar\": -1 }\"#;\n        let res: Result<Foo, _> = serde_json::from_str(input);\n        assert!(res.is_err());\n    }\n    #[test]\n    fn float() {\n        let input = r#\"{ \"bar\": 50.5 }\"#;\n        let res: Result<Foo, _> = serde_json::from_str(input);\n        assert!(res.is_err());\n    }\n    #[test]\n    fn string() {\n        let input = r#\"{ \"bar\": \"50 %\" }\"#;\n        let res: Result<Foo, _> = serde_json::from_str(input);\n        assert!(res.is_err());\n    }\n}\n"
  },
  {
    "path": "libs/utils/src/serde_regex.rs",
    "content": "//! A `serde::{Deserialize,Serialize}` type for regexes.\n\nuse std::ops::Deref;\n\n#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]\n#[serde(transparent)]\npub struct Regex(\n    #[serde(\n        deserialize_with = \"deserialize_regex\",\n        serialize_with = \"serialize_regex\"\n    )]\n    regex::Regex,\n);\n\nfn deserialize_regex<'de, D>(deserializer: D) -> Result<regex::Regex, D::Error>\nwhere\n    D: serde::de::Deserializer<'de>,\n{\n    let s: String = serde::de::Deserialize::deserialize(deserializer)?;\n    let re = regex::Regex::new(&s).map_err(serde::de::Error::custom)?;\n    Ok(re)\n}\n\nfn serialize_regex<S>(re: &regex::Regex, serializer: S) -> Result<S::Ok, S::Error>\nwhere\n    S: serde::ser::Serializer,\n{\n    serializer.collect_str(re.as_str())\n}\n\nimpl Deref for Regex {\n    type Target = regex::Regex;\n\n    fn deref(&self) -> &regex::Regex {\n        &self.0\n    }\n}\n\nimpl PartialEq for Regex {\n    fn eq(&self, other: &Regex) -> bool {\n        // comparing the automatons would be quite complicated\n        self.as_str() == other.as_str()\n    }\n}\n\nimpl Eq for Regex {}\n\n#[cfg(test)]\nmod tests {\n\n    #[test]\n    fn roundtrip() {\n        let input = r#\"\"foo.*bar\"\"#;\n        let re: super::Regex = serde_json::from_str(input).unwrap();\n        assert!(re.is_match(\"foo123bar\"));\n        assert!(!re.is_match(\"foo\"));\n        let output = serde_json::to_string(&re).unwrap();\n        assert_eq!(output, input);\n    }\n}\n"
  },
  {
    "path": "libs/utils/src/serde_system_time.rs",
    "content": "//! A `serde::{Deserialize,Serialize}` type for SystemTime with RFC3339 format and millisecond precision.\n\n#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, serde::Serialize, serde::Deserialize)]\n#[serde(transparent)]\npub struct SystemTime(\n    #[serde(\n        deserialize_with = \"deser_rfc3339_millis\",\n        serialize_with = \"ser_rfc3339_millis\"\n    )]\n    pub std::time::SystemTime,\n);\n\nfn ser_rfc3339_millis<S: serde::ser::Serializer>(\n    ts: &std::time::SystemTime,\n    serializer: S,\n) -> Result<S::Ok, S::Error> {\n    serializer.collect_str(&humantime::format_rfc3339_millis(*ts))\n}\n\nfn deser_rfc3339_millis<'de, D>(deserializer: D) -> Result<std::time::SystemTime, D::Error>\nwhere\n    D: serde::de::Deserializer<'de>,\n{\n    let s: String = serde::de::Deserialize::deserialize(deserializer)?;\n    humantime::parse_rfc3339(&s).map_err(serde::de::Error::custom)\n}\n\n#[cfg(test)]\nmod tests {\n    use super::*;\n\n    /// Helper function to make a SystemTime have millisecond precision by truncating additional nanoseconds.\n    fn to_millisecond_precision(time: SystemTime) -> SystemTime {\n        match time.0.duration_since(std::time::SystemTime::UNIX_EPOCH) {\n            Ok(duration) => {\n                let total_millis = duration.as_secs() * 1_000 + u64::from(duration.subsec_millis());\n                SystemTime(\n                    std::time::SystemTime::UNIX_EPOCH\n                        + std::time::Duration::from_millis(total_millis),\n                )\n            }\n            Err(_) => time,\n        }\n    }\n\n    #[test]\n    fn test_serialize_deserialize() {\n        let input = SystemTime(std::time::SystemTime::now());\n        let expected_serialized = format!(\"\\\"{}\\\"\", humantime::format_rfc3339_millis(input.0));\n        let serialized = serde_json::to_string(&input).unwrap();\n        assert_eq!(expected_serialized, serialized);\n        let deserialized: SystemTime = serde_json::from_str(&expected_serialized).unwrap();\n        assert_eq!(to_millisecond_precision(input), deserialized);\n    }\n}\n"
  },
  {
    "path": "libs/utils/src/shard.rs",
    "content": "//! See `pageserver_api::shard` for description on sharding.\n\nuse std::ops::RangeInclusive;\nuse std::str::FromStr;\n\nuse hex::FromHex;\nuse serde::{Deserialize, Serialize};\n\nuse crate::id::TenantId;\n\n#[derive(Ord, PartialOrd, Eq, PartialEq, Clone, Copy, Serialize, Deserialize, Debug, Hash)]\npub struct ShardNumber(pub u8);\n\n#[derive(Ord, PartialOrd, Eq, PartialEq, Clone, Copy, Serialize, Deserialize, Debug, Hash)]\npub struct ShardCount(pub u8);\n\n/// Combination of ShardNumber and ShardCount.\n///\n/// For use within the context of a particular tenant, when we need to know which shard we're\n/// dealing with, but do not need to know the full ShardIdentity (because we won't be doing\n/// any page->shard mapping), and do not need to know the fully qualified TenantShardId.\n#[derive(Eq, PartialEq, PartialOrd, Ord, Clone, Copy, Hash)]\npub struct ShardIndex {\n    pub shard_number: ShardNumber,\n    pub shard_count: ShardCount,\n}\n\n/// Stripe size as number of pages.\n///\n/// NB: don't implement Default, so callers don't lazily use it by mistake. See DEFAULT_STRIPE_SIZE.\n#[derive(Clone, Copy, Serialize, Deserialize, Eq, PartialEq, Debug)]\npub struct ShardStripeSize(pub u32);\n\n/// Formatting helper, for generating the `shard_id` label in traces.\npub struct ShardSlug<'a>(&'a TenantShardId);\n\n/// TenantShardId globally identifies a particular shard in a particular tenant.\n///\n/// These are written as `<TenantId>-<ShardSlug>`, for example:\n///   # The second shard in a two-shard tenant\n///   072f1291a5310026820b2fe4b2968934-0102\n///\n/// If the `ShardCount` is _unsharded_, the `TenantShardId` is written without\n/// a shard suffix and is equivalent to the encoding of a `TenantId`: this enables\n/// an unsharded [`TenantShardId`] to be used interchangably with a [`TenantId`].\n///\n/// The human-readable encoding of an unsharded TenantShardId, such as used in API URLs,\n/// is both forward and backward compatible with TenantId: a legacy TenantId can be\n/// decoded as a TenantShardId, and when re-encoded it will be parseable\n/// as a TenantId.\n#[derive(Eq, PartialEq, PartialOrd, Ord, Clone, Copy, Hash)]\npub struct TenantShardId {\n    pub tenant_id: TenantId,\n    pub shard_number: ShardNumber,\n    pub shard_count: ShardCount,\n}\n\nimpl ShardCount {\n    pub const MAX: Self = Self(u8::MAX);\n    pub const MIN: Self = Self(0);\n\n    pub fn unsharded() -> Self {\n        ShardCount(0)\n    }\n\n    /// The internal value of a ShardCount may be zero, which means \"1 shard, but use\n    /// legacy format for TenantShardId that excludes the shard suffix\", also known\n    /// as [`TenantShardId::unsharded`].\n    ///\n    /// This method returns the actual number of shards, i.e. if our internal value is\n    /// zero, we return 1 (unsharded tenants have 1 shard).\n    pub fn count(&self) -> u8 {\n        if self.0 > 0 { self.0 } else { 1 }\n    }\n\n    /// The literal internal value: this is **not** the number of shards in the\n    /// tenant, as we have a special zero value for legacy unsharded tenants.  Use\n    /// [`Self::count`] if you want to know the cardinality of shards.\n    pub fn literal(&self) -> u8 {\n        self.0\n    }\n\n    /// Whether the `ShardCount` is for an unsharded tenant, so uses one shard but\n    /// uses the legacy format for `TenantShardId`. See also the documentation for\n    /// [`Self::count`].\n    pub fn is_unsharded(&self) -> bool {\n        self.0 == 0\n    }\n\n    /// `v` may be zero, or the number of shards in the tenant.  `v` is what\n    /// [`Self::literal`] would return.\n    pub const fn new(val: u8) -> Self {\n        Self(val)\n    }\n}\n\nimpl ShardNumber {\n    pub const MAX: Self = Self(u8::MAX);\n}\n\nimpl TenantShardId {\n    pub fn unsharded(tenant_id: TenantId) -> Self {\n        Self {\n            tenant_id,\n            shard_number: ShardNumber(0),\n            shard_count: ShardCount(0),\n        }\n    }\n\n    /// The range of all TenantShardId that belong to a particular TenantId.  This is useful when\n    /// you have a BTreeMap of TenantShardId, and are querying by TenantId.\n    pub fn tenant_range(tenant_id: TenantId) -> RangeInclusive<Self> {\n        RangeInclusive::new(\n            Self {\n                tenant_id,\n                shard_number: ShardNumber(0),\n                shard_count: ShardCount(0),\n            },\n            Self {\n                tenant_id,\n                shard_number: ShardNumber::MAX,\n                shard_count: ShardCount::MAX,\n            },\n        )\n    }\n\n    pub fn range(&self) -> RangeInclusive<Self> {\n        RangeInclusive::new(*self, *self)\n    }\n\n    pub fn shard_slug(&self) -> impl std::fmt::Display + '_ {\n        ShardSlug(self)\n    }\n\n    /// Convenience for code that has special behavior on the 0th shard.\n    pub fn is_shard_zero(&self) -> bool {\n        self.shard_number == ShardNumber(0)\n    }\n\n    /// The \"unsharded\" value is distinct from simply having a single shard: it represents\n    /// a tenant which is not shard-aware at all, and whose storage paths will not include\n    /// a shard suffix.\n    pub fn is_unsharded(&self) -> bool {\n        self.shard_number == ShardNumber(0) && self.shard_count.is_unsharded()\n    }\n\n    /// Convenience for dropping the tenant_id and just getting the ShardIndex: this\n    /// is useful when logging from code that is already in a span that includes tenant ID, to\n    /// keep messages reasonably terse.\n    pub fn to_index(&self) -> ShardIndex {\n        ShardIndex {\n            shard_number: self.shard_number,\n            shard_count: self.shard_count,\n        }\n    }\n\n    /// Calculate the children of this TenantShardId when splitting the overall tenant into\n    /// the given number of shards.\n    pub fn split(&self, new_shard_count: ShardCount) -> Vec<TenantShardId> {\n        let effective_old_shard_count = std::cmp::max(self.shard_count.0, 1);\n        let mut child_shards = Vec::new();\n        for shard_number in 0..ShardNumber(new_shard_count.0).0 {\n            // Key mapping is based on a round robin mapping of key hash modulo shard count,\n            // so our child shards are the ones which the same keys would map to.\n            if shard_number % effective_old_shard_count == self.shard_number.0 {\n                child_shards.push(TenantShardId {\n                    tenant_id: self.tenant_id,\n                    shard_number: ShardNumber(shard_number),\n                    shard_count: new_shard_count,\n                })\n            }\n        }\n\n        child_shards\n    }\n}\n\nimpl std::fmt::Display for ShardNumber {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        self.0.fmt(f)\n    }\n}\n\nimpl std::fmt::Display for ShardCount {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        self.0.fmt(f)\n    }\n}\n\nimpl std::fmt::Display for ShardStripeSize {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        self.0.fmt(f)\n    }\n}\n\nimpl std::fmt::Display for ShardSlug<'_> {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        write!(\n            f,\n            \"{:02x}{:02x}\",\n            self.0.shard_number.0, self.0.shard_count.0\n        )\n    }\n}\n\nimpl std::fmt::Display for TenantShardId {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        if self.shard_count != ShardCount(0) {\n            write!(f, \"{}-{}\", self.tenant_id, self.shard_slug())\n        } else {\n            // Legacy case (shard_count == 0) -- format as just the tenant id.  Note that this\n            // is distinct from the normal single shard case (shard count == 1).\n            self.tenant_id.fmt(f)\n        }\n    }\n}\n\nimpl std::fmt::Debug for TenantShardId {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        // Debug is the same as Display: the compact hex representation\n        write!(f, \"{self}\")\n    }\n}\n\nimpl std::str::FromStr for TenantShardId {\n    type Err = hex::FromHexError;\n\n    fn from_str(s: &str) -> Result<Self, Self::Err> {\n        // Expect format: 16 byte TenantId, '-', 1 byte shard number, 1 byte shard count\n        if s.len() == 32 {\n            // Legacy case: no shard specified\n            Ok(Self {\n                tenant_id: TenantId::from_str(s)?,\n                shard_number: ShardNumber(0),\n                shard_count: ShardCount(0),\n            })\n        } else if s.len() == 37 {\n            let bytes = s.as_bytes();\n            let tenant_id = TenantId::from_hex(&bytes[0..32])?;\n            let mut shard_parts: [u8; 2] = [0u8; 2];\n            hex::decode_to_slice(&bytes[33..37], &mut shard_parts)?;\n            Ok(Self {\n                tenant_id,\n                shard_number: ShardNumber(shard_parts[0]),\n                shard_count: ShardCount(shard_parts[1]),\n            })\n        } else {\n            Err(hex::FromHexError::InvalidStringLength)\n        }\n    }\n}\n\nimpl From<[u8; 18]> for TenantShardId {\n    fn from(b: [u8; 18]) -> Self {\n        let tenant_id_bytes: [u8; 16] = b[0..16].try_into().unwrap();\n\n        Self {\n            tenant_id: TenantId::from(tenant_id_bytes),\n            shard_number: ShardNumber(b[16]),\n            shard_count: ShardCount(b[17]),\n        }\n    }\n}\n\nimpl ShardIndex {\n    pub fn new(number: ShardNumber, count: ShardCount) -> Self {\n        Self {\n            shard_number: number,\n            shard_count: count,\n        }\n    }\n    pub fn unsharded() -> Self {\n        Self {\n            shard_number: ShardNumber(0),\n            shard_count: ShardCount(0),\n        }\n    }\n\n    /// The \"unsharded\" value is distinct from simply having a single shard: it represents\n    /// a tenant which is not shard-aware at all, and whose storage paths will not include\n    /// a shard suffix.\n    pub fn is_unsharded(&self) -> bool {\n        self.shard_number == ShardNumber(0) && self.shard_count == ShardCount(0)\n    }\n\n    /// For use in constructing remote storage paths: concatenate this with a TenantId\n    /// to get a fully qualified TenantShardId.\n    ///\n    /// Backward compat: this function returns an empty string if Self::is_unsharded, such\n    /// that the legacy pre-sharding remote key format is preserved.\n    pub fn get_suffix(&self) -> String {\n        if self.is_unsharded() {\n            \"\".to_string()\n        } else {\n            format!(\"-{:02x}{:02x}\", self.shard_number.0, self.shard_count.0)\n        }\n    }\n}\n\nimpl std::fmt::Display for ShardIndex {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        write!(f, \"{:02x}{:02x}\", self.shard_number.0, self.shard_count.0)\n    }\n}\n\nimpl std::fmt::Debug for ShardIndex {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        // Debug is the same as Display: the compact hex representation\n        write!(f, \"{self}\")\n    }\n}\n\nimpl std::str::FromStr for ShardIndex {\n    type Err = hex::FromHexError;\n\n    fn from_str(s: &str) -> Result<Self, Self::Err> {\n        // Expect format: 1 byte shard number, 1 byte shard count\n        if s.len() == 4 {\n            let bytes = s.as_bytes();\n            let mut shard_parts: [u8; 2] = [0u8; 2];\n            hex::decode_to_slice(bytes, &mut shard_parts)?;\n            Ok(Self {\n                shard_number: ShardNumber(shard_parts[0]),\n                shard_count: ShardCount(shard_parts[1]),\n            })\n        } else {\n            Err(hex::FromHexError::InvalidStringLength)\n        }\n    }\n}\n\nimpl From<[u8; 2]> for ShardIndex {\n    fn from(b: [u8; 2]) -> Self {\n        Self {\n            shard_number: ShardNumber(b[0]),\n            shard_count: ShardCount(b[1]),\n        }\n    }\n}\n\nimpl Serialize for TenantShardId {\n    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>\n    where\n        S: serde::Serializer,\n    {\n        if serializer.is_human_readable() {\n            serializer.collect_str(self)\n        } else {\n            // Note: while human encoding of [`TenantShardId`] is backward and forward\n            // compatible, this binary encoding is not.\n            let mut packed: [u8; 18] = [0; 18];\n            packed[0..16].clone_from_slice(&self.tenant_id.as_arr());\n            packed[16] = self.shard_number.0;\n            packed[17] = self.shard_count.0;\n\n            packed.serialize(serializer)\n        }\n    }\n}\n\nimpl<'de> Deserialize<'de> for TenantShardId {\n    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>\n    where\n        D: serde::Deserializer<'de>,\n    {\n        struct IdVisitor {\n            is_human_readable_deserializer: bool,\n        }\n\n        impl<'de> serde::de::Visitor<'de> for IdVisitor {\n            type Value = TenantShardId;\n\n            fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {\n                if self.is_human_readable_deserializer {\n                    formatter.write_str(\"value in form of hex string\")\n                } else {\n                    formatter.write_str(\"value in form of integer array([u8; 18])\")\n                }\n            }\n\n            fn visit_seq<A>(self, seq: A) -> Result<Self::Value, A::Error>\n            where\n                A: serde::de::SeqAccess<'de>,\n            {\n                let s = serde::de::value::SeqAccessDeserializer::new(seq);\n                let id: [u8; 18] = Deserialize::deserialize(s)?;\n                Ok(TenantShardId::from(id))\n            }\n\n            fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>\n            where\n                E: serde::de::Error,\n            {\n                TenantShardId::from_str(v).map_err(E::custom)\n            }\n        }\n\n        if deserializer.is_human_readable() {\n            deserializer.deserialize_str(IdVisitor {\n                is_human_readable_deserializer: true,\n            })\n        } else {\n            deserializer.deserialize_tuple(\n                18,\n                IdVisitor {\n                    is_human_readable_deserializer: false,\n                },\n            )\n        }\n    }\n}\n\nimpl Serialize for ShardIndex {\n    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>\n    where\n        S: serde::Serializer,\n    {\n        if serializer.is_human_readable() {\n            serializer.collect_str(self)\n        } else {\n            // Binary encoding is not used in index_part.json, but is included in anticipation of\n            // switching various structures (e.g. inter-process communication, remote metadata) to more\n            // compact binary encodings in future.\n            let mut packed: [u8; 2] = [0; 2];\n            packed[0] = self.shard_number.0;\n            packed[1] = self.shard_count.0;\n            packed.serialize(serializer)\n        }\n    }\n}\n\nimpl<'de> Deserialize<'de> for ShardIndex {\n    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>\n    where\n        D: serde::Deserializer<'de>,\n    {\n        struct IdVisitor {\n            is_human_readable_deserializer: bool,\n        }\n\n        impl<'de> serde::de::Visitor<'de> for IdVisitor {\n            type Value = ShardIndex;\n\n            fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {\n                if self.is_human_readable_deserializer {\n                    formatter.write_str(\"value in form of hex string\")\n                } else {\n                    formatter.write_str(\"value in form of integer array([u8; 2])\")\n                }\n            }\n\n            fn visit_seq<A>(self, seq: A) -> Result<Self::Value, A::Error>\n            where\n                A: serde::de::SeqAccess<'de>,\n            {\n                let s = serde::de::value::SeqAccessDeserializer::new(seq);\n                let id: [u8; 2] = Deserialize::deserialize(s)?;\n                Ok(ShardIndex::from(id))\n            }\n\n            fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>\n            where\n                E: serde::de::Error,\n            {\n                ShardIndex::from_str(v).map_err(E::custom)\n            }\n        }\n\n        if deserializer.is_human_readable() {\n            deserializer.deserialize_str(IdVisitor {\n                is_human_readable_deserializer: true,\n            })\n        } else {\n            deserializer.deserialize_tuple(\n                2,\n                IdVisitor {\n                    is_human_readable_deserializer: false,\n                },\n            )\n        }\n    }\n}\n"
  },
  {
    "path": "libs/utils/src/signals.rs",
    "content": "pub use signal_hook::consts::TERM_SIGNALS;\npub use signal_hook::consts::signal::*;\nuse signal_hook::iterator::Signals;\nuse tokio::signal::unix::{SignalKind, signal};\nuse tracing::info;\n\npub enum Signal {\n    Quit,\n    Interrupt,\n    Terminate,\n}\n\nimpl Signal {\n    pub fn name(&self) -> &'static str {\n        match self {\n            Signal::Quit => \"SIGQUIT\",\n            Signal::Interrupt => \"SIGINT\",\n            Signal::Terminate => \"SIGTERM\",\n        }\n    }\n}\n\npub struct ShutdownSignals;\n\nimpl ShutdownSignals {\n    pub fn handle(mut handler: impl FnMut(Signal) -> anyhow::Result<()>) -> anyhow::Result<()> {\n        for raw_signal in Signals::new(TERM_SIGNALS)?.into_iter() {\n            let signal = match raw_signal {\n                SIGINT => Signal::Interrupt,\n                SIGTERM => Signal::Terminate,\n                SIGQUIT => Signal::Quit,\n                other => panic!(\"unknown signal: {other}\"),\n            };\n\n            handler(signal)?;\n        }\n\n        Ok(())\n    }\n}\n\n/// Runs in a loop since we want to be responsive to multiple signals\n/// even after triggering shutdown (e.g. a SIGQUIT after a slow SIGTERM shutdown)\n/// <https://github.com/neondatabase/neon/issues/9740>\npub async fn signal_handler(token: tokio_util::sync::CancellationToken) {\n    let mut sigint = signal(SignalKind::interrupt()).unwrap();\n    let mut sigterm = signal(SignalKind::terminate()).unwrap();\n    let mut sigquit = signal(SignalKind::quit()).unwrap();\n\n    loop {\n        let signal = tokio::select! {\n            _ = sigquit.recv() => {\n                info!(\"Got signal SIGQUIT. Terminating in immediate shutdown mode.\");\n                std::process::exit(111);\n            }\n            _ = sigint.recv() => \"SIGINT\",\n            _ = sigterm.recv() => \"SIGTERM\",\n        };\n\n        if !token.is_cancelled() {\n            info!(\"Got signal {signal}. Terminating gracefully in fast shutdown mode.\");\n            token.cancel();\n        } else {\n            info!(\"Got signal {signal}. Already shutting down.\");\n        }\n    }\n}\n"
  },
  {
    "path": "libs/utils/src/simple_rcu.rs",
    "content": "//!\n//! RCU stands for Read-Copy-Update. It's a synchronization mechanism somewhat\n//! similar to a lock, but it allows readers to \"hold on\" to an old value of RCU\n//! without blocking writers, and allows writing a new value without blocking\n//! readers. When you update the value, the new value is immediately visible\n//! to new readers, but the update waits until all existing readers have\n//! finished, so that on return, no one sees the old value anymore.\n//!\n//! This implementation isn't wait-free; it uses an RwLock that is held for a\n//! short duration when the value is read or updated.\n//!\n//! # Examples\n//!\n//! Read a value and do things with it while holding the guard:\n//!\n//! ```\n//! # let rcu = utils::simple_rcu::Rcu::new(1);\n//! {\n//!     let read = rcu.read();\n//!     println!(\"the current value is {}\", *read);\n//!     // exiting the scope drops the read-guard, and allows concurrent writers\n//!     // to finish.\n//! }\n//! ```\n//!\n//! Increment the value by one, and wait for old readers to finish:\n//!\n//! ```\n//! # async fn dox() {\n//! # let rcu = utils::simple_rcu::Rcu::new(1);\n//! let write_guard = rcu.lock_for_write();\n//!\n//! // NB: holding `write_guard` blocks new readers and writers. Keep this section short!\n//! let new_value = *write_guard + 1;\n//!\n//! let waitlist = write_guard.store_and_unlock(new_value); // consumes `write_guard`\n//!\n//! // Concurrent reads and writes are now possible again. Wait for all the readers\n//! // that still observe the old value to finish.\n//! waitlist.wait().await;\n//! # }\n//! ```\n//!\n#![warn(missing_docs)]\n\nuse std::ops::Deref;\nuse std::sync::{Arc, RwLock, RwLockWriteGuard, Weak};\n\nuse tokio::sync::watch;\n\n/// Rcu allows multiple readers to read and hold onto a value without blocking\n/// (for very long).\n///\n/// Storing to the Rcu updates the value, making new readers immediately see\n/// the new value, but it also waits for all current readers to finish.\npub struct Rcu<V> {\n    inner: RwLock<RcuInner<V>>,\n}\n\nstruct RcuInner<V> {\n    current_cell: Arc<RcuCell<V>>,\n    old_cells: Vec<Weak<RcuCell<V>>>,\n}\n\n///\n/// RcuCell holds one value. It can be the latest one, or an old one.\n///\nstruct RcuCell<V> {\n    value: V,\n\n    /// A dummy channel. We never send anything to this channel. The point is\n    /// that when the RcuCell is dropped, any subscribed Receivers will be notified\n    /// that the channel is closed. Updaters can use this to wait out until the\n    /// RcuCell has been dropped, i.e. until the old value is no longer in use.\n    ///\n    /// We never send anything to this, we just need to hold onto it so that the\n    /// Receivers will be notified when it's dropped.\n    watch: watch::Sender<()>,\n}\n\nimpl<V> RcuCell<V> {\n    fn new(value: V) -> Self {\n        let (watch_sender, _) = watch::channel(());\n        RcuCell {\n            value,\n            watch: watch_sender,\n        }\n    }\n}\n\nimpl<V> Rcu<V> {\n    /// Create a new `Rcu`, initialized to `starting_val`\n    pub fn new(starting_val: V) -> Self {\n        let inner = RcuInner {\n            current_cell: Arc::new(RcuCell::new(starting_val)),\n            old_cells: Vec::new(),\n        };\n        Self {\n            inner: RwLock::new(inner),\n        }\n    }\n\n    ///\n    /// Read current value. Any store() calls will block until the returned\n    /// guard object is dropped.\n    ///\n    pub fn read(&self) -> RcuReadGuard<V> {\n        let current_cell = Arc::clone(&self.inner.read().unwrap().current_cell);\n        RcuReadGuard { cell: current_cell }\n    }\n\n    ///\n    /// Lock the current value for updating. Returns a guard object that can be\n    /// used to read the current value, and to store a new value.\n    ///\n    /// Note: holding the write-guard blocks concurrent readers, so you should\n    /// finish the update and drop the guard quickly! Multiple writers can be\n    /// waiting on the RcuWriteGuard::store step at the same time, however.\n    ///\n    pub fn lock_for_write(&self) -> RcuWriteGuard<'_, V> {\n        let inner = self.inner.write().unwrap();\n        RcuWriteGuard { inner }\n    }\n}\n\n///\n/// Read guard returned by `read`\n///\npub struct RcuReadGuard<V> {\n    cell: Arc<RcuCell<V>>,\n}\n\nimpl<V> Deref for RcuReadGuard<V> {\n    type Target = V;\n\n    fn deref(&self) -> &V {\n        &self.cell.value\n    }\n}\n\n///\n/// Write guard returned by `write`\n///\n/// NB: Holding this guard blocks all concurrent `read` and `write` calls, so it should only be\n/// held for a short duration!\n///\n/// Calling [`Self::store_and_unlock`] consumes the guard, making new reads and new writes possible\n/// again.\n///\npub struct RcuWriteGuard<'a, V> {\n    inner: RwLockWriteGuard<'a, RcuInner<V>>,\n}\n\nimpl<V> Deref for RcuWriteGuard<'_, V> {\n    type Target = V;\n\n    fn deref(&self) -> &V {\n        &self.inner.current_cell.value\n    }\n}\n\nimpl<V> RcuWriteGuard<'_, V> {\n    ///\n    /// Store a new value. The new value will be written to the Rcu immediately,\n    /// and will be immediately seen by any `read` calls that start afterwards.\n    ///\n    /// Returns a list of readers that can see old values. You can call `wait()`\n    /// on it to wait for them to finish.\n    ///\n    pub fn store_and_unlock(mut self, new_val: V) -> RcuWaitList {\n        let new_cell = Arc::new(RcuCell::new(new_val));\n\n        let mut watches = Vec::new();\n        {\n            let old = std::mem::replace(&mut self.inner.current_cell, new_cell);\n            self.inner.old_cells.push(Arc::downgrade(&old));\n\n            // cleanup old cells that no longer have any readers, and collect\n            // the watches for any that do.\n            self.inner.old_cells.retain(|weak| {\n                if let Some(cell) = weak.upgrade() {\n                    watches.push(cell.watch.subscribe());\n                    true\n                } else {\n                    false\n                }\n            });\n        }\n        RcuWaitList(watches)\n    }\n}\n\n///\n/// List of readers who can still see old values.\n///\npub struct RcuWaitList(Vec<watch::Receiver<()>>);\n\nimpl RcuWaitList {\n    ///\n    /// Wait for old readers to finish.\n    ///\n    pub async fn wait(mut self) {\n        // after all the old_cells are no longer in use, we're done\n        for w in self.0.iter_mut() {\n            // This will block until the Receiver is closed. That happens when\n            // the RcuCell is dropped.\n            #[allow(clippy::single_match)]\n            match w.changed().await {\n                Ok(_) => panic!(\"changed() unexpectedly succeeded on dummy channel\"),\n                Err(_) => {\n                    // closed, which means that the cell has been dropped, and\n                    // its value is no longer in use\n                }\n            }\n        }\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use std::sync::Mutex;\n    use std::time::Duration;\n\n    use super::*;\n\n    #[tokio::test]\n    async fn two_writers() {\n        let rcu = Rcu::new(1);\n\n        let read1 = rcu.read();\n        assert_eq!(*read1, 1);\n\n        let write2 = rcu.lock_for_write();\n        assert_eq!(*write2, 1);\n        let wait2 = write2.store_and_unlock(2);\n\n        let read2 = rcu.read();\n        assert_eq!(*read2, 2);\n\n        let write3 = rcu.lock_for_write();\n        assert_eq!(*write3, 2);\n        let wait3 = write3.store_and_unlock(3);\n\n        // new reader can see the new value, and old readers continue to see the old values.\n        let read3 = rcu.read();\n        assert_eq!(*read3, 3);\n        assert_eq!(*read2, 2);\n        assert_eq!(*read1, 1);\n\n        let log = Arc::new(Mutex::new(Vec::new()));\n        // Wait for the old readers to finish in separate tasks.\n        let log_clone = Arc::clone(&log);\n        let task2 = tokio::spawn(async move {\n            wait2.wait().await;\n            log_clone.lock().unwrap().push(\"wait2 done\");\n        });\n        let log_clone = Arc::clone(&log);\n        let task3 = tokio::spawn(async move {\n            wait3.wait().await;\n            log_clone.lock().unwrap().push(\"wait3 done\");\n        });\n\n        // without this sleep the test can pass on accident if the writer is slow\n        tokio::time::sleep(Duration::from_millis(100)).await;\n\n        // Release first reader. This allows first write to finish, but calling\n        // wait() on the 'task3' would still block.\n        log.lock().unwrap().push(\"dropping read1\");\n        drop(read1);\n        task2.await.unwrap();\n\n        assert!(!task3.is_finished());\n\n        tokio::time::sleep(Duration::from_millis(100)).await;\n\n        // Release second reader, and finish second writer.\n        log.lock().unwrap().push(\"dropping read2\");\n        drop(read2);\n        task3.await.unwrap();\n\n        assert_eq!(\n            log.lock().unwrap().as_slice(),\n            &[\n                \"dropping read1\",\n                \"wait2 done\",\n                \"dropping read2\",\n                \"wait3 done\"\n            ]\n        );\n    }\n}\n"
  },
  {
    "path": "libs/utils/src/span.rs",
    "content": "//! Tracing span helpers.\n\n/// Records the given fields in the current span, as a single call. The fields must already have\n/// been declared for the span (typically with empty values).\n#[macro_export]\nmacro_rules! span_record {\n    ($($tokens:tt)*) => {$crate::span_record_in!(::tracing::Span::current(), $($tokens)*)};\n}\n\n/// Records the given fields in the given span, as a single call. The fields must already have been\n/// declared for the span (typically with empty values).\n#[macro_export]\nmacro_rules! span_record_in {\n    ($span:expr, $($tokens:tt)*) => {\n        if let Some(meta) = $span.metadata() {\n            $span.record_all(&tracing::valueset!(meta.fields(), $($tokens)*));\n        }\n    };\n}\n"
  },
  {
    "path": "libs/utils/src/sync/duplex/mpsc.rs",
    "content": "use tokio::sync::mpsc;\n\n/// A bi-directional channel.\npub struct Duplex<S, R> {\n    pub tx: mpsc::Sender<S>,\n    pub rx: mpsc::Receiver<R>,\n}\n\n/// Creates a bi-directional channel.\n///\n/// The channel will buffer up to the provided number of messages. Once the buffer is full,\n/// attempts to send new messages will wait until a message is received from the channel.\n/// The provided buffer capacity must be at least 1.\npub fn channel<A: Send, B: Send>(buffer: usize) -> (Duplex<A, B>, Duplex<B, A>) {\n    let (tx_a, rx_a) = mpsc::channel::<A>(buffer);\n    let (tx_b, rx_b) = mpsc::channel::<B>(buffer);\n\n    (Duplex { tx: tx_a, rx: rx_b }, Duplex { tx: tx_b, rx: rx_a })\n}\n\nimpl<S: Send, R: Send> Duplex<S, R> {\n    /// Sends a value, waiting until there is capacity.\n    ///\n    /// A successful send occurs when it is determined that the other end of the channel has not hung up already.\n    pub async fn send(&self, x: S) -> Result<(), mpsc::error::SendError<S>> {\n        self.tx.send(x).await\n    }\n\n    pub fn try_send(&self, x: S) -> Result<(), mpsc::error::TrySendError<S>> {\n        self.tx.try_send(x)\n    }\n\n    /// Receives the next value for this receiver.\n    ///\n    /// This method returns `None` if the channel has been closed and there are\n    /// no remaining messages in the channel's buffer.\n    pub async fn recv(&mut self) -> Option<R> {\n        self.rx.recv().await\n    }\n}\n"
  },
  {
    "path": "libs/utils/src/sync/duplex.rs",
    "content": "pub mod mpsc;\n"
  },
  {
    "path": "libs/utils/src/sync/gate.rs",
    "content": "use std::sync::Arc;\nuse std::sync::atomic::{AtomicBool, Ordering};\nuse std::time::Duration;\n\n/// Gates are a concurrency helper, primarily used for implementing safe shutdown.\n///\n/// Users of a resource call `enter()` to acquire a GateGuard, and the owner of\n/// the resource calls `close()` when they want to ensure that all holders of guards\n/// have released them, and that no future guards will be issued.\npub struct Gate {\n    inner: Arc<GateInner>,\n}\n\nimpl std::fmt::Debug for Gate {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        f.debug_struct(\"Gate\")\n            // use this for identification\n            .field(\"ptr\", &Arc::as_ptr(&self.inner))\n            .field(\"inner\", &self.inner)\n            .finish()\n    }\n}\n\nstruct GateInner {\n    sem: tokio::sync::Semaphore,\n    closing: std::sync::atomic::AtomicBool,\n}\n\nimpl std::fmt::Debug for GateInner {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        let avail = self.sem.available_permits();\n\n        let guards = u32::try_from(avail)\n            .ok()\n            // the sem only supports 32-bit ish amount, but lets play it safe\n            .and_then(|x| Gate::MAX_UNITS.checked_sub(x));\n\n        let closing = self.closing.load(Ordering::Relaxed);\n\n        if let Some(guards) = guards {\n            f.debug_struct(\"Gate\")\n                .field(\"remaining_guards\", &guards)\n                .field(\"closing\", &closing)\n                .finish()\n        } else {\n            f.debug_struct(\"Gate\")\n                .field(\"avail_permits\", &avail)\n                .field(\"closing\", &closing)\n                .finish()\n        }\n    }\n}\n\n/// RAII guard for a [`Gate`]: as long as this exists, calls to [`Gate::close`] will\n/// not complete.\n#[derive(Debug)]\npub struct GateGuard {\n    // Record the span where the gate was entered, so that we can identify who was blocking Gate::close\n    span_at_enter: tracing::Span,\n    gate: Arc<GateInner>,\n}\n\nimpl GateGuard {\n    pub fn try_clone(&self) -> Result<Self, GateError> {\n        Gate::enter_impl(self.gate.clone())\n    }\n}\n\nimpl Drop for GateGuard {\n    fn drop(&mut self) {\n        if self.gate.closing.load(Ordering::Relaxed) {\n            self.span_at_enter.in_scope(\n                || tracing::info!(gate = ?Arc::as_ptr(&self.gate), \"kept the gate from closing\"),\n            );\n        }\n\n        // when the permit was acquired, it was forgotten to allow us to manage it's lifecycle\n        // manually, so \"return\" the permit now.\n        self.gate.sem.add_permits(1);\n    }\n}\n\n#[derive(Debug, thiserror::Error)]\npub enum GateError {\n    #[error(\"gate is closed\")]\n    GateClosed,\n}\n\nimpl GateError {\n    pub fn is_cancel(&self) -> bool {\n        match self {\n            GateError::GateClosed => true,\n        }\n    }\n}\n\nimpl Default for Gate {\n    fn default() -> Self {\n        Self {\n            inner: Arc::new(GateInner {\n                sem: tokio::sync::Semaphore::new(Self::MAX_UNITS as usize),\n                closing: AtomicBool::new(false),\n            }),\n        }\n    }\n}\n\nimpl Gate {\n    const MAX_UNITS: u32 = u32::MAX;\n\n    /// Acquire a guard that will prevent close() calls from completing. If close()\n    /// was already called, this will return an error which should be interpreted\n    /// as \"shutting down\".\n    ///\n    /// This function would typically be used from e.g. request handlers. While holding\n    /// the guard returned from this function, it is important to respect a CancellationToken\n    /// to avoid blocking close() indefinitely: typically types that contain a Gate will\n    /// also contain a CancellationToken.\n    pub fn enter(&self) -> Result<GateGuard, GateError> {\n        Self::enter_impl(self.inner.clone())\n    }\n\n    fn enter_impl(gate: Arc<GateInner>) -> Result<GateGuard, GateError> {\n        let permit = gate.sem.try_acquire().map_err(|_| GateError::GateClosed)?;\n\n        // we now have the permit, let's disable the normal raii functionality and leave\n        // \"returning\" the permit to our GateGuard::drop.\n        //\n        // this is done to avoid the need for multiple Arcs (one for semaphore, next for other\n        // fields).\n        permit.forget();\n\n        Ok(GateGuard {\n            span_at_enter: tracing::Span::current(),\n            gate,\n        })\n    }\n\n    /// Types with a shutdown() method and a gate should call this method at the\n    /// end of shutdown, to ensure that all GateGuard holders are done.\n    ///\n    /// This will wait for all guards to be destroyed.  For this to complete promptly, it is\n    /// important that the holders of such guards are respecting a CancellationToken which has\n    /// been cancelled before entering this function.\n    pub async fn close(&self) {\n        let started_at = std::time::Instant::now();\n        let mut do_close = std::pin::pin!(self.do_close());\n\n        // with 1s we rarely saw anything, let's try if we get more gate closing reasons with 100ms\n        let nag_after = Duration::from_millis(100);\n\n        let Err(_timeout) = tokio::time::timeout(nag_after, &mut do_close).await else {\n            return;\n        };\n\n        tracing::info!(\n            gate = ?self.as_ptr(),\n            elapsed_ms = started_at.elapsed().as_millis(),\n            \"closing is taking longer than expected\"\n        );\n\n        // close operation is not trying to be cancellation safe as pageserver does not need it.\n        //\n        // note: \"closing\" is not checked in Gate::enter -- it exists just for observability,\n        // dropping of GateGuard after this will log who they were.\n        self.inner.closing.store(true, Ordering::Relaxed);\n\n        do_close.await;\n\n        tracing::info!(\n            gate = ?self.as_ptr(),\n            elapsed_ms = started_at.elapsed().as_millis(),\n            \"close completed\"\n        );\n    }\n\n    /// Used as an identity of a gate. This identity will be resolved to something useful when\n    /// it's actually closed in a hopefully sensible `tracing::Span` which will describe it even\n    /// more.\n    ///\n    /// `GateGuard::drop` also logs this pointer when it has realized it has been keeping the gate\n    /// open for too long.\n    fn as_ptr(&self) -> *const GateInner {\n        Arc::as_ptr(&self.inner)\n    }\n\n    /// Check if [`Self::close()`] has finished waiting for all [`Self::enter()`] users to finish.  This\n    /// is usually analoguous for \"Did shutdown finish?\" for types that include a Gate, whereas checking\n    /// the CancellationToken on such types is analogous to \"Did shutdown start?\"\n    pub fn close_complete(&self) -> bool {\n        self.inner.sem.is_closed()\n    }\n\n    #[tracing::instrument(level = tracing::Level::DEBUG, skip_all, fields(gate = ?self.as_ptr()))]\n    async fn do_close(&self) {\n        tracing::debug!(\"Closing Gate...\");\n\n        match self.inner.sem.acquire_many(Self::MAX_UNITS).await {\n            Ok(_permit) => {\n                // While holding all units, close the semaphore.  All subsequent calls to enter() will fail.\n                self.inner.sem.close();\n            }\n            Err(_closed) => {\n                // Semaphore closed: we are the only function that can do this, so it indicates a double-call.\n                // This is legal.  Timeline::shutdown for example is not protected from being called more than\n                // once.\n                tracing::debug!(\"Double close\")\n            }\n        }\n        tracing::debug!(\"Closed Gate.\")\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use super::*;\n\n    #[tokio::test]\n    async fn close_unused() {\n        // Having taken no guards, we should not be blocked in close\n        let gate = Gate::default();\n        gate.close().await;\n    }\n\n    #[tokio::test]\n    async fn close_idle() {\n        // If a guard is dropped before entering, close should not be blocked\n        let gate = Gate::default();\n        let guard = gate.enter().unwrap();\n        drop(guard);\n        gate.close().await;\n\n        // Entering a closed guard fails\n        gate.enter().expect_err(\"enter should fail after close\");\n    }\n\n    #[tokio::test(start_paused = true)]\n    async fn close_busy_gate() {\n        let gate = Gate::default();\n        let forever = Duration::from_secs(24 * 7 * 365);\n\n        let guard =\n            tracing::info_span!(\"i am holding back the gate\").in_scope(|| gate.enter().unwrap());\n\n        let mut close_fut = std::pin::pin!(gate.close());\n\n        // Close should be waiting for guards to drop\n        tokio::time::timeout(forever, &mut close_fut)\n            .await\n            .unwrap_err();\n\n        // Attempting to enter() should fail, even though close isn't done yet.\n        gate.enter()\n            .expect_err(\"enter should fail after entering close\");\n\n        // this will now log, which we cannot verify except manually\n        drop(guard);\n\n        // Guard is gone, close should finish\n        close_fut.await;\n\n        // Attempting to enter() is still forbidden\n        gate.enter().expect_err(\"enter should fail finishing close\");\n    }\n\n    #[tokio::test(start_paused = true)]\n    async fn clone_gate_guard() {\n        let gate = Gate::default();\n        let forever = Duration::from_secs(24 * 7 * 365);\n\n        let guard1 = gate.enter().expect(\"gate isn't closed\");\n\n        let guard2 = guard1.try_clone().expect(\"gate isn't clsoed\");\n\n        let mut close_fut = std::pin::pin!(gate.close());\n\n        tokio::time::timeout(forever, &mut close_fut)\n            .await\n            .unwrap_err();\n\n        // we polled close_fut once, that should prevent all later enters and clones\n        gate.enter().unwrap_err();\n        guard1.try_clone().unwrap_err();\n        guard2.try_clone().unwrap_err();\n\n        // guard2 keeps gate open even if guard1 is closed\n        drop(guard1);\n        tokio::time::timeout(forever, &mut close_fut)\n            .await\n            .unwrap_err();\n\n        drop(guard2);\n\n        // now that the last guard is dropped, closing should complete\n        close_fut.await;\n\n        // entering is still forbidden\n        gate.enter().expect_err(\"enter should stilll fail\");\n    }\n}\n"
  },
  {
    "path": "libs/utils/src/sync/heavier_once_cell.rs",
    "content": "use std::sync::atomic::{AtomicUsize, Ordering};\nuse std::sync::{Arc, Mutex, MutexGuard};\n\nuse tokio::sync::Semaphore;\n\n/// Custom design like [`tokio::sync::OnceCell`] but using [`OwnedSemaphorePermit`] instead of\n/// `SemaphorePermit`.\n///\n/// Allows use of `take` which does not require holding an outer mutex guard\n/// for the duration of initialization.\n///\n/// Has no unsafe, builds upon [`tokio::sync::Semaphore`] and [`std::sync::Mutex`].\n///\n/// [`OwnedSemaphorePermit`]: tokio::sync::OwnedSemaphorePermit\npub struct OnceCell<T> {\n    inner: Mutex<Inner<T>>,\n    initializers: AtomicUsize,\n}\n\nimpl<T> Default for OnceCell<T> {\n    /// Create new uninitialized [`OnceCell`].\n    fn default() -> Self {\n        Self {\n            inner: Default::default(),\n            initializers: AtomicUsize::new(0),\n        }\n    }\n}\n\n/// Semaphore is the current state:\n/// - open semaphore means the value is `None`, not yet initialized\n/// - closed semaphore means the value has been initialized\n#[derive(Debug)]\nstruct Inner<T> {\n    init_semaphore: Arc<Semaphore>,\n    value: Option<T>,\n}\n\nimpl<T> Default for Inner<T> {\n    fn default() -> Self {\n        Self {\n            init_semaphore: Arc::new(Semaphore::new(1)),\n            value: None,\n        }\n    }\n}\n\nimpl<T> OnceCell<T> {\n    /// Creates an already initialized `OnceCell` with the given value.\n    pub fn new(value: T) -> Self {\n        let sem = Semaphore::new(1);\n        sem.close();\n        Self {\n            inner: Mutex::new(Inner {\n                init_semaphore: Arc::new(sem),\n                value: Some(value),\n            }),\n            initializers: AtomicUsize::new(0),\n        }\n    }\n\n    /// Returns a guard to an existing initialized value, or uniquely initializes the value before\n    /// returning the guard.\n    ///\n    /// Initializing might wait on any existing [`Guard::take_and_deinit`] deinitialization.\n    ///\n    /// Initialization is panic-safe and cancellation-safe.\n    pub async fn get_or_init<F, Fut, E>(&self, factory: F) -> Result<Guard<'_, T>, E>\n    where\n        F: FnOnce(InitPermit) -> Fut,\n        Fut: std::future::Future<Output = Result<(T, InitPermit), E>>,\n    {\n        loop {\n            let sem = {\n                let guard = self.inner.lock().unwrap();\n                if guard.value.is_some() {\n                    return Ok(Guard(guard));\n                }\n                guard.init_semaphore.clone()\n            };\n\n            {\n                let permit = {\n                    // increment the count for the duration of queued\n                    let _guard = CountWaitingInitializers::start(self);\n                    sem.acquire().await\n                };\n\n                let Ok(permit) = permit else {\n                    let guard = self.inner.lock().unwrap();\n                    if !Arc::ptr_eq(&sem, &guard.init_semaphore) {\n                        // there was a take_and_deinit in between\n                        continue;\n                    }\n                    assert!(\n                        guard.value.is_some(),\n                        \"semaphore got closed, must be initialized\"\n                    );\n                    return Ok(Guard(guard));\n                };\n\n                permit.forget();\n            }\n\n            let permit = InitPermit(sem);\n            let (value, _permit) = factory(permit).await?;\n\n            let guard = self.inner.lock().unwrap();\n\n            return Ok(Self::set0(value, guard));\n        }\n    }\n\n    /// Like [`Self::get_or_init_detached_measured`], but without out parameter for time spent waiting.\n    pub async fn get_or_init_detached(&self) -> Result<Guard<'_, T>, InitPermit> {\n        self.get_or_init_detached_measured(None).await\n    }\n\n    /// Returns a guard to an existing initialized value, or returns an unique initialization\n    /// permit which can be used to initialize this `OnceCell` using `OnceCell::set`.\n    pub async fn get_or_init_detached_measured(\n        &self,\n        mut wait_time: Option<&mut crate::elapsed_accum::ElapsedAccum>,\n    ) -> Result<Guard<'_, T>, InitPermit> {\n        // It looks like OnceCell::get_or_init could be implemented using this method instead of\n        // duplication. However, that makes the future be !Send due to possibly holding on to the\n        // MutexGuard over an await point.\n        loop {\n            let sem = {\n                let guard = self.inner.lock().unwrap();\n                if guard.value.is_some() {\n                    return Ok(Guard(guard));\n                }\n                guard.init_semaphore.clone()\n            };\n            {\n                let permit = {\n                    // increment the count for the duration of queued\n                    let _guard = CountWaitingInitializers::start(self);\n                    let fut = sem.acquire();\n                    if let Some(wait_time) = wait_time.as_mut() {\n                        wait_time.measure(fut).await\n                    } else {\n                        fut.await\n                    }\n                };\n\n                let Ok(permit) = permit else {\n                    let guard = self.inner.lock().unwrap();\n                    if !Arc::ptr_eq(&sem, &guard.init_semaphore) {\n                        // there was a take_and_deinit in between\n                        continue;\n                    }\n                    assert!(\n                        guard.value.is_some(),\n                        \"semaphore got closed, must be initialized\"\n                    );\n                    return Ok(Guard(guard));\n                };\n\n                permit.forget();\n            }\n\n            let permit = InitPermit(sem);\n            return Err(permit);\n        }\n    }\n\n    /// Assuming a permit is held after previous call to [`Guard::take_and_deinit`], it can be used\n    /// to complete initializing the inner value.\n    ///\n    /// # Panics\n    ///\n    /// If the inner has already been initialized.\n    pub fn set(&self, value: T, _permit: InitPermit) -> Guard<'_, T> {\n        let guard = self.inner.lock().unwrap();\n\n        // cannot assert that this permit is for self.inner.semaphore, but we can assert it cannot\n        // give more permits right now.\n        if guard.init_semaphore.try_acquire().is_ok() {\n            drop(guard);\n            panic!(\"permit is of wrong origin\");\n        }\n\n        Self::set0(value, guard)\n    }\n\n    fn set0(value: T, mut guard: std::sync::MutexGuard<'_, Inner<T>>) -> Guard<'_, T> {\n        if guard.value.is_some() {\n            drop(guard);\n            unreachable!(\"we won permit, must not be initialized\");\n        }\n        guard.value = Some(value);\n        guard.init_semaphore.close();\n        Guard(guard)\n    }\n\n    /// Returns a guard to an existing initialized value, if any.\n    pub fn get(&self) -> Option<Guard<'_, T>> {\n        let guard = self.inner.lock().unwrap();\n        if guard.value.is_some() {\n            Some(Guard(guard))\n        } else {\n            None\n        }\n    }\n\n    /// Like [`Guard::take_and_deinit`], but will return `None` if this OnceCell was never\n    /// initialized.\n    pub fn take_and_deinit(&mut self) -> Option<(T, InitPermit)> {\n        let inner = self.inner.get_mut().unwrap();\n\n        inner.take_and_deinit()\n    }\n\n    /// Return the number of [`Self::get_or_init`] calls waiting for initialization to complete.\n    pub fn initializer_count(&self) -> usize {\n        self.initializers.load(Ordering::Relaxed)\n    }\n}\n\n/// DropGuard counter for queued tasks waiting to initialize, mainly accessible for the\n/// initializing task for example at the end of initialization.\nstruct CountWaitingInitializers<'a, T>(&'a OnceCell<T>);\n\nimpl<'a, T> CountWaitingInitializers<'a, T> {\n    fn start(target: &'a OnceCell<T>) -> Self {\n        target.initializers.fetch_add(1, Ordering::Relaxed);\n        CountWaitingInitializers(target)\n    }\n}\n\nimpl<T> Drop for CountWaitingInitializers<'_, T> {\n    fn drop(&mut self) {\n        self.0.initializers.fetch_sub(1, Ordering::Relaxed);\n    }\n}\n\n/// Uninteresting guard object to allow short-lived access to inspect or clone the held,\n/// initialized value.\n#[derive(Debug)]\npub struct Guard<'a, T>(MutexGuard<'a, Inner<T>>);\n\nimpl<T> std::ops::Deref for Guard<'_, T> {\n    type Target = T;\n\n    fn deref(&self) -> &Self::Target {\n        self.0\n            .value\n            .as_ref()\n            .expect(\"guard is not created unless value has been initialized\")\n    }\n}\n\nimpl<T> std::ops::DerefMut for Guard<'_, T> {\n    fn deref_mut(&mut self) -> &mut Self::Target {\n        self.0\n            .value\n            .as_mut()\n            .expect(\"guard is not created unless value has been initialized\")\n    }\n}\n\nimpl<T> Guard<'_, T> {\n    /// Take the current value, and a new permit for it's deinitialization.\n    ///\n    /// The permit will be on a semaphore part of the new internal value, and any following\n    /// [`OnceCell::get_or_init`] will wait on it to complete.\n    pub fn take_and_deinit(mut self) -> (T, InitPermit) {\n        self.0\n            .take_and_deinit()\n            .expect(\"guard is not created unless value has been initialized\")\n    }\n}\n\nimpl<T> Inner<T> {\n    pub fn take_and_deinit(&mut self) -> Option<(T, InitPermit)> {\n        let value = self.value.take()?;\n\n        let mut swapped = Inner::default();\n        let sem = swapped.init_semaphore.clone();\n        // acquire and forget right away, moving the control over to InitPermit\n        sem.try_acquire().expect(\"we just created this\").forget();\n        let permit = InitPermit(sem);\n        std::mem::swap(self, &mut swapped);\n        Some((value, permit))\n    }\n}\n\n/// Type held by OnceCell (de)initializing task.\n///\n/// On drop, this type will return the permit.\npub struct InitPermit(Arc<tokio::sync::Semaphore>);\n\nimpl std::fmt::Debug for InitPermit {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        let ptr = Arc::as_ptr(&self.0) as *const ();\n        f.debug_tuple(\"InitPermit\").field(&ptr).finish()\n    }\n}\n\nimpl Drop for InitPermit {\n    fn drop(&mut self) {\n        assert_eq!(\n            self.0.available_permits(),\n            0,\n            \"InitPermit should only exist as the unique permit\"\n        );\n        self.0.add_permits(1);\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use std::convert::Infallible;\n    use std::pin::{Pin, pin};\n    use std::time::Duration;\n\n    use futures::Future;\n\n    use super::*;\n\n    #[tokio::test]\n    async fn many_initializers() {\n        #[derive(Default, Debug)]\n        struct Counters {\n            factory_got_to_run: AtomicUsize,\n            future_polled: AtomicUsize,\n            winners: AtomicUsize,\n        }\n\n        let initializers = 100;\n\n        let cell = Arc::new(OnceCell::default());\n        let counters = Arc::new(Counters::default());\n        let barrier = Arc::new(tokio::sync::Barrier::new(initializers + 1));\n\n        let mut js = tokio::task::JoinSet::new();\n        for i in 0..initializers {\n            js.spawn({\n                let cell = cell.clone();\n                let counters = counters.clone();\n                let barrier = barrier.clone();\n\n                async move {\n                    barrier.wait().await;\n                    let won = {\n                        let g = cell\n                            .get_or_init(|permit| {\n                                counters.factory_got_to_run.fetch_add(1, Ordering::Relaxed);\n                                async {\n                                    counters.future_polled.fetch_add(1, Ordering::Relaxed);\n                                    Ok::<_, Infallible>((i, permit))\n                                }\n                            })\n                            .await\n                            .unwrap();\n\n                        *g == i\n                    };\n\n                    if won {\n                        counters.winners.fetch_add(1, Ordering::Relaxed);\n                    }\n                }\n            });\n        }\n\n        barrier.wait().await;\n\n        while let Some(next) = js.join_next().await {\n            next.expect(\"no panics expected\");\n        }\n\n        let mut counters = Arc::try_unwrap(counters).unwrap();\n\n        assert_eq!(*counters.factory_got_to_run.get_mut(), 1);\n        assert_eq!(*counters.future_polled.get_mut(), 1);\n        assert_eq!(*counters.winners.get_mut(), 1);\n    }\n\n    #[tokio::test(start_paused = true)]\n    async fn reinit_waits_for_deinit() {\n        // with the tokio::time paused, we will \"sleep\" for 1s while holding the reinitialization\n        let sleep_for = Duration::from_secs(1);\n        let initial = 42;\n        let reinit = 1;\n        let cell = Arc::new(OnceCell::new(initial));\n\n        let deinitialization_started = Arc::new(tokio::sync::Barrier::new(2));\n\n        let jh = tokio::spawn({\n            let cell = cell.clone();\n            let deinitialization_started = deinitialization_started.clone();\n            async move {\n                let (answer, _permit) = cell.get().expect(\"initialized to value\").take_and_deinit();\n                assert_eq!(answer, initial);\n\n                deinitialization_started.wait().await;\n                tokio::time::sleep(sleep_for).await;\n            }\n        });\n\n        deinitialization_started.wait().await;\n\n        let started_at = tokio::time::Instant::now();\n        cell.get_or_init(|permit| async { Ok::<_, Infallible>((reinit, permit)) })\n            .await\n            .unwrap();\n\n        let elapsed = started_at.elapsed();\n        assert!(\n            elapsed >= sleep_for,\n            \"initialization should had taken at least the time time slept with permit\"\n        );\n\n        jh.await.unwrap();\n\n        assert_eq!(*cell.get().unwrap(), reinit);\n    }\n\n    #[test]\n    fn reinit_with_deinit_permit() {\n        let cell = Arc::new(OnceCell::new(42));\n\n        let (mol, permit) = cell.get().unwrap().take_and_deinit();\n        cell.set(5, permit);\n        assert_eq!(*cell.get().unwrap(), 5);\n\n        let (five, permit) = cell.get().unwrap().take_and_deinit();\n        assert_eq!(5, five);\n        cell.set(mol, permit);\n        assert_eq!(*cell.get().unwrap(), 42);\n    }\n\n    #[tokio::test]\n    async fn initialization_attemptable_until_ok() {\n        let cell = OnceCell::default();\n\n        for _ in 0..10 {\n            cell.get_or_init(|_permit| async { Err(\"whatever error\") })\n                .await\n                .unwrap_err();\n        }\n\n        let g = cell\n            .get_or_init(|permit| async { Ok::<_, Infallible>((\"finally success\", permit)) })\n            .await\n            .unwrap();\n        assert_eq!(*g, \"finally success\");\n    }\n\n    #[tokio::test]\n    async fn initialization_is_cancellation_safe() {\n        let cell = OnceCell::default();\n\n        let barrier = tokio::sync::Barrier::new(2);\n\n        let initializer = cell.get_or_init(|permit| async {\n            barrier.wait().await;\n            futures::future::pending::<()>().await;\n\n            Ok::<_, Infallible>((\"never reached\", permit))\n        });\n\n        tokio::select! {\n            _ = initializer => { unreachable!(\"cannot complete; stuck in pending().await\") },\n            _ = barrier.wait() => {}\n        };\n\n        // now initializer is dropped\n\n        assert!(cell.get().is_none());\n\n        let g = cell\n            .get_or_init(|permit| async { Ok::<_, Infallible>((\"now initialized\", permit)) })\n            .await\n            .unwrap();\n        assert_eq!(*g, \"now initialized\");\n    }\n\n    #[tokio::test(start_paused = true)]\n    async fn reproduce_init_take_deinit_race() {\n        init_take_deinit_scenario(|cell, factory| {\n            Box::pin(async {\n                cell.get_or_init(factory).await.unwrap();\n            })\n        })\n        .await;\n    }\n\n    type BoxedInitFuture<T, E> = Pin<Box<dyn Future<Output = Result<(T, InitPermit), E>>>>;\n    type BoxedInitFunction<T, E> = Box<dyn Fn(InitPermit) -> BoxedInitFuture<T, E>>;\n\n    /// Reproduce an assertion failure.\n    ///\n    /// This has interesting generics to be generic between `get_or_init` and `get_mut_or_init`.\n    /// We currently only have one, but the structure is kept.\n    async fn init_take_deinit_scenario<F>(init_way: F)\n    where\n        F: for<'a> Fn(\n            &'a OnceCell<&'static str>,\n            BoxedInitFunction<&'static str, Infallible>,\n        ) -> Pin<Box<dyn Future<Output = ()> + 'a>>,\n    {\n        let cell = OnceCell::default();\n\n        // acquire the init_semaphore only permit to drive initializing tasks in order to waiting\n        // on the same semaphore.\n        let permit = cell\n            .inner\n            .lock()\n            .unwrap()\n            .init_semaphore\n            .clone()\n            .try_acquire_owned()\n            .unwrap();\n\n        let mut t1 = pin!(init_way(\n            &cell,\n            Box::new(|permit| Box::pin(async move { Ok((\"t1\", permit)) })),\n        ));\n\n        let mut t2 = pin!(init_way(\n            &cell,\n            Box::new(|permit| Box::pin(async move { Ok((\"t2\", permit)) })),\n        ));\n\n        // drive t2 first to the init_semaphore -- the timeout will be hit once t2 future can\n        // no longer make progress\n        tokio::select! {\n            _ = &mut t2 => unreachable!(\"it cannot get permit\"),\n            _ = tokio::time::sleep(Duration::from_secs(3600 * 24 * 7 * 365)) => {}\n        }\n\n        // followed by t1 in the init_semaphore\n        tokio::select! {\n            _ = &mut t1 => unreachable!(\"it cannot get permit\"),\n            _ = tokio::time::sleep(Duration::from_secs(3600 * 24 * 7 * 365)) => {}\n        }\n\n        // now let t2 proceed and initialize\n        drop(permit);\n        t2.await;\n\n        let (s, permit) = { cell.get().unwrap().take_and_deinit() };\n        assert_eq!(\"t2\", s);\n\n        // now originally t1 would see the semaphore it has as closed. it cannot yet get a permit from\n        // the new one.\n        tokio::select! {\n            _ = &mut t1 => unreachable!(\"it cannot get permit\"),\n            _ = tokio::time::sleep(Duration::from_secs(3600 * 24 * 7 * 365)) => {}\n        }\n\n        // only now we get to initialize it\n        drop(permit);\n        t1.await;\n\n        assert_eq!(\"t1\", *cell.get().unwrap());\n    }\n\n    #[tokio::test(start_paused = true)]\n    async fn detached_init_smoke() {\n        let target = OnceCell::default();\n\n        let Err(permit) = target.get_or_init_detached().await else {\n            unreachable!(\"it is not initialized\")\n        };\n\n        tokio::time::timeout(\n            std::time::Duration::from_secs(3600 * 24 * 7 * 365),\n            target.get_or_init(|permit2| async { Ok::<_, Infallible>((11, permit2)) }),\n        )\n        .await\n        .expect_err(\"should timeout since we are already holding the permit\");\n\n        target.set(42, permit);\n\n        let (_answer, permit) = {\n            let guard = target\n                .get_or_init(|permit| async { Ok::<_, Infallible>((11, permit)) })\n                .await\n                .unwrap();\n\n            assert_eq!(*guard, 42);\n\n            guard.take_and_deinit()\n        };\n\n        assert!(target.get().is_none());\n\n        target.set(11, permit);\n\n        assert_eq!(*target.get().unwrap(), 11);\n    }\n\n    #[tokio::test]\n    async fn take_and_deinit_on_mut() {\n        use std::convert::Infallible;\n\n        let mut target = OnceCell::<u32>::default();\n        assert!(target.take_and_deinit().is_none());\n\n        target\n            .get_or_init(|permit| async move { Ok::<_, Infallible>((42, permit)) })\n            .await\n            .unwrap();\n\n        let again = target.take_and_deinit();\n        assert!(matches!(again, Some((42, _))), \"{again:?}\");\n\n        assert!(target.take_and_deinit().is_none());\n    }\n}\n"
  },
  {
    "path": "libs/utils/src/sync/spsc_fold.rs",
    "content": "use core::future::poll_fn;\nuse core::task::Poll;\nuse std::sync::{Arc, Mutex};\n\nuse diatomic_waker::DiatomicWaker;\n\npub struct Sender<T> {\n    state: Arc<Inner<T>>,\n}\n\npub struct Receiver<T> {\n    state: Arc<Inner<T>>,\n}\n\nstruct Inner<T> {\n    wake_receiver: DiatomicWaker,\n    wake_sender: DiatomicWaker,\n    value: Mutex<State<T>>,\n}\n\nenum State<T> {\n    NoData,\n    HasData(T),\n    TryFoldFailed, // transient state\n    SenderWaitsForReceiverToConsume(T),\n    SenderGone(Option<T>),\n    ReceiverGone,\n    AllGone,\n    SenderDropping,   // transient state\n    ReceiverDropping, // transient state\n}\n\npub fn channel<T: Send>() -> (Sender<T>, Receiver<T>) {\n    let inner = Inner {\n        wake_receiver: DiatomicWaker::new(),\n        wake_sender: DiatomicWaker::new(),\n        value: Mutex::new(State::NoData),\n    };\n\n    let state = Arc::new(inner);\n    (\n        Sender {\n            state: state.clone(),\n        },\n        Receiver { state },\n    )\n}\n\n#[derive(Debug, thiserror::Error)]\npub enum SendError {\n    #[error(\"receiver is gone\")]\n    ReceiverGone,\n}\n\nimpl<T: Send> Sender<T> {\n    /// # Panics\n    ///\n    /// If `try_fold` panics,  any subsequent call to `send` panic.\n    pub async fn send<F>(&mut self, value: T, try_fold: F) -> Result<(), SendError>\n    where\n        F: Fn(&mut T, T) -> Result<(), T>,\n    {\n        let mut value = Some(value);\n        poll_fn(|cx| {\n            let mut guard = self.state.value.lock().unwrap();\n            match &mut *guard {\n                State::NoData => {\n                    *guard = State::HasData(value.take().unwrap());\n                    self.state.wake_receiver.notify();\n                    Poll::Ready(Ok(()))\n                }\n                State::HasData(_) => {\n                    let State::HasData(acc_mut) = &mut *guard else {\n                        unreachable!(\"this match arm guarantees that the guard is HasData\");\n                    };\n                    match try_fold(acc_mut, value.take().unwrap()) {\n                        Ok(()) => {\n                            // no need to wake receiver, if it was waiting it already\n                            // got a wake-up when we transitioned from NoData to HasData\n                            Poll::Ready(Ok(()))\n                        }\n                        Err(unfoldable_value) => {\n                            value = Some(unfoldable_value);\n                            let State::HasData(acc) =\n                                std::mem::replace(&mut *guard, State::TryFoldFailed)\n                            else {\n                                unreachable!(\"this match arm guarantees that the guard is HasData\");\n                            };\n                            *guard = State::SenderWaitsForReceiverToConsume(acc);\n                            // SAFETY: send is single threaded due to `&mut self` requirement,\n                            // therefore register is not concurrent.\n                            unsafe {\n                                self.state.wake_sender.register(cx.waker());\n                            }\n                            Poll::Pending\n                        }\n                    }\n                }\n                State::SenderWaitsForReceiverToConsume(_data) => {\n                    // SAFETY: send is single threaded due to `&mut self` requirement,\n                    // therefore register is not concurrent.\n                    unsafe {\n                        self.state.wake_sender.register(cx.waker());\n                    }\n                    Poll::Pending\n                }\n                State::ReceiverGone => Poll::Ready(Err(SendError::ReceiverGone)),\n                State::SenderGone(_)\n                | State::AllGone\n                | State::SenderDropping\n                | State::ReceiverDropping\n                | State::TryFoldFailed => {\n                    unreachable!();\n                }\n            }\n        })\n        .await\n    }\n}\n\nimpl<T> Drop for Sender<T> {\n    fn drop(&mut self) {\n        scopeguard::defer! {\n            self.state.wake_receiver.notify()\n        };\n        let Ok(mut guard) = self.state.value.lock() else {\n            return;\n        };\n        *guard = match std::mem::replace(&mut *guard, State::SenderDropping) {\n            State::NoData => State::SenderGone(None),\n            State::HasData(data) | State::SenderWaitsForReceiverToConsume(data) => {\n                State::SenderGone(Some(data))\n            }\n            State::ReceiverGone => State::AllGone,\n            State::TryFoldFailed\n            | State::SenderGone(_)\n            | State::AllGone\n            | State::SenderDropping\n            | State::ReceiverDropping => {\n                unreachable!(\"unreachable state {:?}\", guard.discriminant_str())\n            }\n        }\n    }\n}\n\n#[derive(Debug, thiserror::Error)]\npub enum RecvError {\n    #[error(\"sender is gone\")]\n    SenderGone,\n}\n\nimpl<T: Send> Receiver<T> {\n    pub async fn recv(&mut self) -> Result<T, RecvError> {\n        poll_fn(|cx| {\n            let mut guard = self.state.value.lock().unwrap();\n            match &mut *guard {\n                State::NoData => {\n                    // SAFETY: recv is single threaded due to `&mut self` requirement,\n                    // therefore register is not concurrent.\n                    unsafe {\n                        self.state.wake_receiver.register(cx.waker());\n                    }\n                    Poll::Pending\n                }\n                guard @ State::HasData(_)\n                | guard @ State::SenderWaitsForReceiverToConsume(_)\n                | guard @ State::SenderGone(Some(_)) => {\n                    let data = guard\n                        .take_data()\n                        .expect(\"in these states, data is guaranteed to be present\");\n                    self.state.wake_sender.notify();\n                    Poll::Ready(Ok(data))\n                }\n                State::SenderGone(None) => Poll::Ready(Err(RecvError::SenderGone)),\n                State::ReceiverGone\n                | State::AllGone\n                | State::SenderDropping\n                | State::ReceiverDropping\n                | State::TryFoldFailed => {\n                    unreachable!(\"unreachable state {:?}\", guard.discriminant_str());\n                }\n            }\n        })\n        .await\n    }\n}\n\nimpl<T> Drop for Receiver<T> {\n    fn drop(&mut self) {\n        scopeguard::defer! {\n            self.state.wake_sender.notify()\n        };\n        let Ok(mut guard) = self.state.value.lock() else {\n            return;\n        };\n        *guard = match std::mem::replace(&mut *guard, State::ReceiverDropping) {\n            State::NoData => State::ReceiverGone,\n            State::HasData(_) | State::SenderWaitsForReceiverToConsume(_) => State::ReceiverGone,\n            State::SenderGone(_) => State::AllGone,\n            State::TryFoldFailed\n            | State::ReceiverGone\n            | State::AllGone\n            | State::SenderDropping\n            | State::ReceiverDropping => {\n                unreachable!(\"unreachable state {:?}\", guard.discriminant_str())\n            }\n        }\n    }\n}\n\nimpl<T> State<T> {\n    fn take_data(&mut self) -> Option<T> {\n        match self {\n            State::HasData(_) => {\n                let State::HasData(data) = std::mem::replace(self, State::NoData) else {\n                    unreachable!(\"this match arm guarantees that the state is HasData\");\n                };\n                Some(data)\n            }\n            State::SenderWaitsForReceiverToConsume(_) => {\n                let State::SenderWaitsForReceiverToConsume(data) =\n                    std::mem::replace(self, State::NoData)\n                else {\n                    unreachable!(\n                        \"this match arm guarantees that the state is SenderWaitsForReceiverToConsume\"\n                    );\n                };\n                Some(data)\n            }\n            State::SenderGone(data) => Some(data.take().unwrap()),\n            State::NoData\n            | State::TryFoldFailed\n            | State::ReceiverGone\n            | State::AllGone\n            | State::SenderDropping\n            | State::ReceiverDropping => None,\n        }\n    }\n    fn discriminant_str(&self) -> &'static str {\n        match self {\n            State::NoData => \"NoData\",\n            State::HasData(_) => \"HasData\",\n            State::TryFoldFailed => \"TryFoldFailed\",\n            State::SenderWaitsForReceiverToConsume(_) => \"SenderWaitsForReceiverToConsume\",\n            State::SenderGone(_) => \"SenderGone\",\n            State::ReceiverGone => \"ReceiverGone\",\n            State::AllGone => \"AllGone\",\n            State::SenderDropping => \"SenderDropping\",\n            State::ReceiverDropping => \"ReceiverDropping\",\n        }\n    }\n}\n\n#[cfg(test)]\nmod tests {\n\n    use super::*;\n\n    const FOREVER: std::time::Duration = std::time::Duration::from_secs(u64::MAX);\n\n    #[tokio::test]\n    async fn test_send_recv() {\n        let (mut sender, mut receiver) = channel();\n\n        sender\n            .send(42, |acc, val| {\n                *acc += val;\n                Ok(())\n            })\n            .await\n            .unwrap();\n\n        let received = receiver.recv().await.unwrap();\n        assert_eq!(received, 42);\n    }\n\n    #[tokio::test]\n    async fn test_send_recv_with_fold() {\n        let (mut sender, mut receiver) = channel();\n\n        sender\n            .send(1, |acc, val| {\n                *acc += val;\n                Ok(())\n            })\n            .await\n            .unwrap();\n        sender\n            .send(2, |acc, val| {\n                *acc += val;\n                Ok(())\n            })\n            .await\n            .unwrap();\n\n        let received = receiver.recv().await.unwrap();\n        assert_eq!(received, 3);\n    }\n\n    #[tokio::test(start_paused = true)]\n    async fn test_sender_waits_for_receiver_if_try_fold_fails() {\n        let (mut sender, mut receiver) = channel();\n\n        sender.send(23, |_, _| panic!(\"first send\")).await.unwrap();\n\n        let send_fut = sender.send(42, |_, val| Err(val));\n        let mut send_fut = std::pin::pin!(send_fut);\n\n        tokio::select! {\n            _ = tokio::time::sleep(FOREVER) => {},\n            _ = &mut send_fut => {\n                panic!(\"send should not complete\");\n            },\n        }\n\n        let val = receiver.recv().await.unwrap();\n        assert_eq!(val, 23);\n\n        tokio::select! {\n            _ = tokio::time::sleep(FOREVER) => {\n                panic!(\"receiver should have consumed the value\");\n            },\n            _ = &mut send_fut => { },\n        }\n\n        let val = receiver.recv().await.unwrap();\n        assert_eq!(val, 42);\n    }\n\n    #[tokio::test(start_paused = true)]\n    async fn test_sender_errors_if_waits_for_receiver_and_receiver_drops() {\n        let (mut sender, receiver) = channel();\n\n        sender.send(23, |_, _| unreachable!()).await.unwrap();\n\n        let send_fut = sender.send(42, |_, val| Err(val));\n        let send_fut = std::pin::pin!(send_fut);\n\n        drop(receiver);\n\n        let result = send_fut.await;\n        assert!(matches!(result, Err(SendError::ReceiverGone)));\n    }\n\n    #[tokio::test(start_paused = true)]\n    async fn test_receiver_errors_if_waits_for_sender_and_sender_drops() {\n        let (sender, mut receiver) = channel::<()>();\n\n        let recv_fut = receiver.recv();\n        let recv_fut = std::pin::pin!(recv_fut);\n\n        drop(sender);\n\n        let result = recv_fut.await;\n        assert!(matches!(result, Err(RecvError::SenderGone)));\n    }\n\n    #[tokio::test(start_paused = true)]\n    async fn test_receiver_errors_if_waits_for_sender_and_sender_drops_with_data() {\n        let (mut sender, mut receiver) = channel();\n\n        sender.send(42, |_, _| unreachable!()).await.unwrap();\n\n        {\n            let recv_fut = receiver.recv();\n            let recv_fut = std::pin::pin!(recv_fut);\n\n            drop(sender);\n\n            let val = recv_fut.await.unwrap();\n            assert_eq!(val, 42);\n        }\n\n        let result = receiver.recv().await;\n        assert!(matches!(result, Err(RecvError::SenderGone)));\n    }\n\n    #[tokio::test(start_paused = true)]\n    async fn test_receiver_waits_for_sender_if_no_data() {\n        let (mut sender, mut receiver) = channel();\n\n        let recv_fut = receiver.recv();\n        let mut recv_fut = std::pin::pin!(recv_fut);\n\n        tokio::select! {\n            _ = tokio::time::sleep(FOREVER) => {},\n            _ = &mut recv_fut => {\n                panic!(\"recv should not complete\");\n            },\n        }\n\n        sender.send(42, |_, _| Ok(())).await.unwrap();\n\n        let val = recv_fut.await.unwrap();\n        assert_eq!(val, 42);\n    }\n\n    #[tokio::test]\n    async fn test_receiver_gone_while_nodata() {\n        let (mut sender, receiver) = channel();\n        drop(receiver);\n\n        let result = sender.send(42, |_, _| Ok(())).await;\n        assert!(matches!(result, Err(SendError::ReceiverGone)));\n    }\n\n    #[tokio::test]\n    async fn test_sender_gone_while_nodata() {\n        let (sender, mut receiver) = super::channel::<usize>();\n        drop(sender);\n\n        let result = receiver.recv().await;\n        assert!(matches!(result, Err(RecvError::SenderGone)));\n    }\n\n    #[tokio::test(start_paused = true)]\n    async fn test_receiver_drops_after_sender_went_to_sleep() {\n        let (mut sender, receiver) = channel();\n        let state = receiver.state.clone();\n\n        sender.send(23, |_, _| unreachable!()).await.unwrap();\n\n        let send_task = tokio::spawn(async move { sender.send(42, |_, v| Err(v)).await });\n\n        tokio::time::sleep(FOREVER).await;\n\n        assert!(matches!(\n            &*state.value.lock().unwrap(),\n            &State::SenderWaitsForReceiverToConsume(_)\n        ));\n\n        drop(receiver);\n\n        let err = send_task\n            .await\n            .unwrap()\n            .expect_err(\"should unblock immediately\");\n        assert!(matches!(err, SendError::ReceiverGone));\n    }\n\n    #[tokio::test(start_paused = true)]\n    async fn test_sender_drops_after_receiver_went_to_sleep() {\n        let (sender, mut receiver) = channel::<usize>();\n        let state = sender.state.clone();\n\n        let recv_task = tokio::spawn(async move { receiver.recv().await });\n\n        tokio::time::sleep(FOREVER).await;\n\n        assert!(matches!(&*state.value.lock().unwrap(), &State::NoData));\n\n        drop(sender);\n\n        let err = recv_task.await.unwrap().expect_err(\"should error\");\n        assert!(matches!(err, RecvError::SenderGone));\n    }\n\n    #[tokio::test(start_paused = true)]\n    async fn test_receiver_drop_while_waiting_for_receiver_to_consume_unblocks_sender() {\n        let (mut sender, receiver) = channel();\n\n        let state = receiver.state.clone();\n\n        sender.send((), |_, _| unreachable!()).await.unwrap();\n\n        assert!(matches!(&*state.value.lock().unwrap(), &State::HasData(_)));\n\n        let unmergeable = sender.send((), |_, _| Err(()));\n        let mut unmergeable = std::pin::pin!(unmergeable);\n        tokio::select! {\n            _ = tokio::time::sleep(FOREVER) => {},\n            _ = &mut unmergeable => {\n                panic!(\"unmergeable should not complete\");\n            },\n        }\n\n        assert!(matches!(\n            &*state.value.lock().unwrap(),\n            &State::SenderWaitsForReceiverToConsume(_)\n        ));\n\n        drop(receiver);\n\n        assert!(matches!(\n            &*state.value.lock().unwrap(),\n            &State::ReceiverGone\n        ));\n\n        unmergeable.await.unwrap_err();\n    }\n}\n"
  },
  {
    "path": "libs/utils/src/sync.rs",
    "content": "pub mod heavier_once_cell;\n\npub mod duplex;\npub mod gate;\n\npub mod spsc_fold;\n"
  },
  {
    "path": "libs/utils/src/tcp_listener.rs",
    "content": "use std::io;\nuse std::net::{TcpListener, ToSocketAddrs};\n\nuse nix::sys::socket::setsockopt;\nuse nix::sys::socket::sockopt::ReuseAddr;\n\n/// Bind a [`TcpListener`] to addr with `SO_REUSEADDR` set to true.\npub fn bind<A: ToSocketAddrs>(addr: A) -> io::Result<TcpListener> {\n    let listener = TcpListener::bind(addr)?;\n\n    setsockopt(&listener, ReuseAddr, &true)?;\n\n    Ok(listener)\n}\n"
  },
  {
    "path": "libs/utils/src/timeout.rs",
    "content": "use std::time::Duration;\n\nuse tokio_util::sync::CancellationToken;\n\n#[derive(thiserror::Error, Debug)]\npub enum TimeoutCancellableError {\n    #[error(\"Timed out\")]\n    Timeout,\n    #[error(\"Cancelled\")]\n    Cancelled,\n}\n\n/// Wrap [`tokio::time::timeout`] with a CancellationToken.\n///\n/// This wrapper is appropriate for any long running operation in a task\n/// that ought to respect a CancellationToken (which means most tasks).\n///\n/// The only time you should use a bare tokio::timeout is when the future `F`\n/// itself respects a CancellationToken: otherwise, always use this wrapper\n/// with your CancellationToken to ensure that your task does not hold up\n/// graceful shutdown.\npub async fn timeout_cancellable<F>(\n    duration: Duration,\n    cancel: &CancellationToken,\n    future: F,\n) -> Result<F::Output, TimeoutCancellableError>\nwhere\n    F: std::future::Future,\n{\n    tokio::select!(\n        r = tokio::time::timeout(duration, future) => {\n            r.map_err(|_| TimeoutCancellableError::Timeout)\n\n        },\n        _ = cancel.cancelled() => {\n            Err(TimeoutCancellableError::Cancelled)\n\n        }\n    )\n}\n"
  },
  {
    "path": "libs/utils/src/toml_edit_ext.rs",
    "content": "#[derive(Debug, thiserror::Error)]\npub enum Error {\n    #[error(\"item is not a document\")]\n    ItemIsNotADocument,\n    #[error(transparent)]\n    Serde(toml_edit::de::Error),\n}\n\npub fn deserialize_item<T>(item: &toml_edit::Item) -> Result<T, Error>\nwhere\n    T: serde::de::DeserializeOwned,\n{\n    let document: toml_edit::DocumentMut = match item {\n        toml_edit::Item::Table(toml) => toml.clone().into(),\n        toml_edit::Item::Value(toml_edit::Value::InlineTable(toml)) => {\n            toml.clone().into_table().into()\n        }\n        _ => return Err(Error::ItemIsNotADocument),\n    };\n\n    toml_edit::de::from_document(document).map_err(Error::Serde)\n}\n"
  },
  {
    "path": "libs/utils/src/tracing_span_assert.rs",
    "content": "//! Assert that the current [`tracing::Span`] has a given set of fields.\n//!\n//! Can only produce meaningful positive results when tracing has been configured as in example.\n//! Absence of `tracing_error::ErrorLayer` is not detected yet.\n//!\n//! `#[cfg(test)]` code will get a pass when using the `check_fields_present` macro in case tracing\n//! is completly unconfigured.\n//!\n//! # Usage\n//!\n//! ```rust\n//! # fn main() {\n//! use tracing_subscriber::prelude::*;\n//! let registry = tracing_subscriber::registry()\n//!    .with(tracing_error::ErrorLayer::default());\n//!\n//! // Register the registry as the global subscriber.\n//! // In this example, we'll only use it as a thread-local subscriber.\n//! let _guard = tracing::subscriber::set_default(registry);\n//!\n//! // Then, in the main code:\n//!\n//! let span = tracing::info_span!(\"TestSpan\", tenant_id = 1);\n//! let _guard = span.enter();\n//!\n//! // ... down the call stack\n//!\n//! use utils::tracing_span_assert::{check_fields_present, ConstExtractor};\n//! let extractor = ConstExtractor::new(\"tenant_id\");\n//! if let Err(missing) = check_fields_present!([&extractor]) {\n//!    // if you copypaste this to a custom assert method, remember to add #[track_caller]\n//!    // to get the \"user\" code location for the panic.\n//!    panic!(\"Missing fields: {missing:?}\");\n//! }\n//! # }\n//! ```\n//!\n//! Recommended reading: <https://docs.rs/tracing-subscriber/0.3.16/tracing_subscriber/layer/index.html#per-layer-filtering>\n//!\n\n#[derive(Debug)]\npub enum ExtractionResult {\n    Present,\n    Absent,\n}\n\npub trait Extractor: Send + Sync + std::fmt::Debug {\n    fn id(&self) -> &str;\n    fn extract(&self, fields: &tracing::field::FieldSet) -> ExtractionResult;\n}\n\n#[derive(Debug)]\npub struct ConstExtractor {\n    field_name: &'static str,\n}\n\nimpl ConstExtractor {\n    pub const fn new(field_name: &'static str) -> ConstExtractor {\n        ConstExtractor { field_name }\n    }\n}\nimpl Extractor for ConstExtractor {\n    fn id(&self) -> &str {\n        self.field_name\n    }\n    fn extract(&self, fields: &tracing::field::FieldSet) -> ExtractionResult {\n        if fields.iter().any(|f| f.name() == self.field_name) {\n            ExtractionResult::Present\n        } else {\n            ExtractionResult::Absent\n        }\n    }\n}\n\n/// Checks that the given extractors are satisfied with the current span hierarchy.\n///\n/// This should not be called directly, but used through [`check_fields_present`] which allows\n/// `Summary::Unconfigured` only when the calling crate is being `#[cfg(test)]` as a conservative default.\n#[doc(hidden)]\npub fn check_fields_present0<const L: usize>(\n    must_be_present: [&dyn Extractor; L],\n) -> Result<Summary, Vec<&dyn Extractor>> {\n    let mut missing = must_be_present.into_iter().collect::<Vec<_>>();\n    let trace = tracing_error::SpanTrace::capture();\n    trace.with_spans(|md, _formatted_fields| {\n        // when trying to understand the inner workings of how does the matching work, note that\n        // this closure might be called zero times if the span is disabled. normally it is called\n        // once per span hierarchy level.\n        missing.retain(|extractor| match extractor.extract(md.fields()) {\n            ExtractionResult::Present => false,\n            ExtractionResult::Absent => true,\n        });\n\n        // continue walking up until we've found all missing\n        !missing.is_empty()\n    });\n    if missing.is_empty() {\n        Ok(Summary::FoundEverything)\n    } else if !tracing_subscriber_configured() {\n        Ok(Summary::Unconfigured)\n    } else {\n        // we can still hit here if a tracing subscriber has been configured but the ErrorLayer is\n        // missing, which can be annoying. for this case, we could probably use\n        // SpanTrace::status().\n        //\n        // another way to end up here is with RUST_LOG=pageserver=off while configuring the\n        // logging, though I guess in that case the SpanTrace::status() == EMPTY would be valid.\n        // this case is covered by test `not_found_if_tracing_error_subscriber_has_wrong_filter`.\n        Err(missing)\n    }\n}\n\n/// Checks that the given extractors are satisfied with the current span hierarchy.\n///\n/// The macro is the preferred way of checking if fields exist while passing checks if a test does\n/// not have tracing configured.\n///\n/// Why mangled name? Because #[macro_export] will expose it at utils::__check_fields_present.\n/// However we can game a module namespaced macro for `use` purposes by re-exporting the\n/// #[macro_export] exported name with an alias (below).\n#[doc(hidden)]\n#[macro_export]\nmacro_rules! __check_fields_present {\n    ($extractors:expr) => {{\n        {\n            use $crate::tracing_span_assert::{check_fields_present0, Summary::*, Extractor};\n\n            match check_fields_present0($extractors) {\n                Ok(FoundEverything) => Ok(()),\n                Ok(Unconfigured) if cfg!(feature = \"testing\") => {\n                    // allow unconfigured in tests\n                    Ok(())\n                },\n                Ok(Unconfigured) => {\n                    panic!(r#\"utils::tracing_span_assert: outside of #[cfg(feature = \"testing\")] expected tracing to be configured with tracing_error::ErrorLayer\"#)\n                },\n                Err(missing) => Err(missing)\n            }\n        }\n    }}\n}\n\npub use crate::__check_fields_present as check_fields_present;\n\n/// Explanation for why the check was deemed ok.\n///\n/// Mainly useful for testing, or configuring per-crate behaviour as in with\n/// [`check_fields_present`].\n#[derive(Debug)]\npub enum Summary {\n    /// All extractors were found.\n    ///\n    /// Should only happen when tracing is properly configured.\n    FoundEverything,\n\n    /// Tracing has not been configured at all. This is ok for tests running without tracing set\n    /// up.\n    Unconfigured,\n}\n\nfn tracing_subscriber_configured() -> bool {\n    let mut noop_configured = false;\n    tracing::dispatcher::get_default(|d| {\n        // it is possible that this closure will not be invoked, but the current implementation\n        // always invokes it\n        noop_configured = d.is::<tracing::subscriber::NoSubscriber>();\n    });\n\n    !noop_configured\n}\n\n#[cfg(test)]\nmod tests {\n\n    use std::collections::HashSet;\n    use std::fmt::{self};\n    use std::hash::{Hash, Hasher};\n\n    use tracing_subscriber::prelude::*;\n\n    use super::*;\n\n    struct MemoryIdentity<'a>(&'a dyn Extractor);\n\n    impl MemoryIdentity<'_> {\n        fn as_ptr(&self) -> *const () {\n            self.0 as *const _ as *const ()\n        }\n    }\n    impl PartialEq for MemoryIdentity<'_> {\n        fn eq(&self, other: &Self) -> bool {\n            self.as_ptr() == other.as_ptr()\n        }\n    }\n    impl Eq for MemoryIdentity<'_> {}\n    impl Hash for MemoryIdentity<'_> {\n        fn hash<H: Hasher>(&self, state: &mut H) {\n            self.as_ptr().hash(state);\n        }\n    }\n    impl fmt::Debug for MemoryIdentity<'_> {\n        fn fmt(&self, f: &mut fmt::Formatter<'_>) -> std::fmt::Result {\n            write!(f, \"{:p}: {}\", self.as_ptr(), self.0.id())\n        }\n    }\n\n    struct Setup {\n        _current_thread_subscriber_guard: tracing::subscriber::DefaultGuard,\n        tenant_extractor: ConstExtractor,\n        timeline_extractor: ConstExtractor,\n    }\n\n    fn setup_current_thread() -> Setup {\n        let tenant_extractor = ConstExtractor::new(\"tenant_id\");\n        let timeline_extractor = ConstExtractor::new(\"timeline_id\");\n\n        let registry = tracing_subscriber::registry()\n            .with(tracing_subscriber::fmt::layer())\n            .with(tracing_error::ErrorLayer::default());\n\n        let guard = tracing::subscriber::set_default(registry);\n\n        Setup {\n            _current_thread_subscriber_guard: guard,\n            tenant_extractor,\n            timeline_extractor,\n        }\n    }\n\n    fn assert_missing(missing: Vec<&dyn Extractor>, expected: Vec<&dyn Extractor>) {\n        let missing: HashSet<MemoryIdentity> =\n            HashSet::from_iter(missing.into_iter().map(MemoryIdentity));\n        let expected: HashSet<MemoryIdentity> =\n            HashSet::from_iter(expected.into_iter().map(MemoryIdentity));\n        assert_eq!(missing, expected);\n    }\n\n    #[test]\n    fn positive_one_level() {\n        let setup = setup_current_thread();\n        let span = tracing::info_span!(\"root\", tenant_id = \"tenant-1\", timeline_id = \"timeline-1\");\n        let _guard = span.enter();\n        let res = check_fields_present0([&setup.tenant_extractor, &setup.timeline_extractor]);\n        assert!(matches!(res, Ok(Summary::FoundEverything)), \"{res:?}\");\n    }\n\n    #[test]\n    fn negative_one_level() {\n        let setup = setup_current_thread();\n        let span = tracing::info_span!(\"root\", timeline_id = \"timeline-1\");\n        let _guard = span.enter();\n        let missing = check_fields_present0([&setup.tenant_extractor, &setup.timeline_extractor])\n            .unwrap_err();\n        assert_missing(missing, vec![&setup.tenant_extractor]);\n    }\n\n    #[test]\n    fn positive_multiple_levels() {\n        let setup = setup_current_thread();\n\n        let span = tracing::info_span!(\"root\");\n        let _guard = span.enter();\n\n        let span = tracing::info_span!(\"child\", tenant_id = \"tenant-1\");\n        let _guard = span.enter();\n\n        let span = tracing::info_span!(\"grandchild\", timeline_id = \"timeline-1\");\n        let _guard = span.enter();\n\n        let res = check_fields_present0([&setup.tenant_extractor, &setup.timeline_extractor]);\n        assert!(matches!(res, Ok(Summary::FoundEverything)), \"{res:?}\");\n    }\n\n    #[test]\n    fn negative_multiple_levels() {\n        let setup = setup_current_thread();\n\n        let span = tracing::info_span!(\"root\");\n        let _guard = span.enter();\n\n        let span = tracing::info_span!(\"child\", timeline_id = \"timeline-1\");\n        let _guard = span.enter();\n\n        let missing = check_fields_present0([&setup.tenant_extractor]).unwrap_err();\n        assert_missing(missing, vec![&setup.tenant_extractor]);\n    }\n\n    #[test]\n    fn positive_subset_one_level() {\n        let setup = setup_current_thread();\n        let span = tracing::info_span!(\"root\", tenant_id = \"tenant-1\", timeline_id = \"timeline-1\");\n        let _guard = span.enter();\n        let res = check_fields_present0([&setup.tenant_extractor]);\n        assert!(matches!(res, Ok(Summary::FoundEverything)), \"{res:?}\");\n    }\n\n    #[test]\n    fn positive_subset_multiple_levels() {\n        let setup = setup_current_thread();\n\n        let span = tracing::info_span!(\"root\");\n        let _guard = span.enter();\n\n        let span = tracing::info_span!(\"child\", tenant_id = \"tenant-1\");\n        let _guard = span.enter();\n\n        let span = tracing::info_span!(\"grandchild\", timeline_id = \"timeline-1\");\n        let _guard = span.enter();\n\n        let res = check_fields_present0([&setup.tenant_extractor]);\n        assert!(matches!(res, Ok(Summary::FoundEverything)), \"{res:?}\");\n    }\n\n    #[test]\n    fn negative_subset_one_level() {\n        let setup = setup_current_thread();\n        let span = tracing::info_span!(\"root\", timeline_id = \"timeline-1\");\n        let _guard = span.enter();\n        let missing = check_fields_present0([&setup.tenant_extractor]).unwrap_err();\n        assert_missing(missing, vec![&setup.tenant_extractor]);\n    }\n\n    #[test]\n    fn negative_subset_multiple_levels() {\n        let setup = setup_current_thread();\n\n        let span = tracing::info_span!(\"root\");\n        let _guard = span.enter();\n\n        let span = tracing::info_span!(\"child\", timeline_id = \"timeline-1\");\n        let _guard = span.enter();\n\n        let missing = check_fields_present0([&setup.tenant_extractor]).unwrap_err();\n        assert_missing(missing, vec![&setup.tenant_extractor]);\n    }\n\n    #[test]\n    fn tracing_error_subscriber_not_set_up_straight_line() {\n        // no setup\n        let span = tracing::info_span!(\"foo\", e = \"some value\");\n        let _guard = span.enter();\n\n        let extractor = ConstExtractor::new(\"e\");\n        let res = check_fields_present0([&extractor]);\n        assert!(matches!(res, Ok(Summary::Unconfigured)), \"{res:?}\");\n\n        // similarly for a not found key\n        let extractor = ConstExtractor::new(\"foobar\");\n        let res = check_fields_present0([&extractor]);\n        assert!(matches!(res, Ok(Summary::Unconfigured)), \"{res:?}\");\n    }\n\n    #[test]\n    fn tracing_error_subscriber_not_set_up_with_instrument() {\n        // no setup\n\n        // demo a case where span entering is used to establish a parent child connection, but\n        // when we re-enter the subspan SpanTrace::with_spans iterates over nothing.\n        let span = tracing::info_span!(\"foo\", e = \"some value\");\n        let _guard = span.enter();\n\n        let subspan = tracing::info_span!(\"bar\", f = \"foobar\");\n        drop(_guard);\n\n        // normally this would work, but without any tracing-subscriber configured, both\n        // check_field_present find nothing\n        let _guard = subspan.enter();\n        let extractors: [&dyn Extractor; 2] =\n            [&ConstExtractor::new(\"e\"), &ConstExtractor::new(\"f\")];\n\n        let res = check_fields_present0(extractors);\n        assert!(matches!(res, Ok(Summary::Unconfigured)), \"{res:?}\");\n\n        // similarly for a not found key\n        let extractor = ConstExtractor::new(\"g\");\n        let res = check_fields_present0([&extractor]);\n        assert!(matches!(res, Ok(Summary::Unconfigured)), \"{res:?}\");\n    }\n\n    #[test]\n    fn tracing_subscriber_configured() {\n        // this will fail if any utils::logging::init callers appear, but let's hope they do not\n        // appear.\n        assert!(!super::tracing_subscriber_configured());\n\n        let _g = setup_current_thread();\n\n        assert!(super::tracing_subscriber_configured());\n    }\n\n    #[test]\n    fn not_found_when_disabled_by_filter() {\n        let r = tracing_subscriber::registry().with({\n            tracing_error::ErrorLayer::default().with_filter(tracing_subscriber::filter::filter_fn(\n                |md| !(md.is_span() && *md.level() == tracing::Level::INFO),\n            ))\n        });\n\n        let _guard = tracing::subscriber::set_default(r);\n\n        // this test is a rather tricky one, it has a number of possible outcomes depending on the\n        // execution order when executed with other tests even if no test sets the global default\n        // subscriber.\n\n        let span = tracing::info_span!(\"foo\", e = \"some value\");\n        let _guard = span.enter();\n\n        let extractors: [&dyn Extractor; 1] = [&ConstExtractor::new(\"e\")];\n\n        if span.is_disabled() {\n            // the tests are running single threaded, or we got lucky and no other tests subscriber\n            // was got to register their per-CALLSITE::META interest between `set_default` and\n            // creation of the span, thus the filter got to apply and registered interest of Never,\n            // so the span was never created.\n            //\n            // as the span is disabled, no keys were recorded to it, leading check_fields_present0\n            // to find an error.\n\n            let missing = check_fields_present0(extractors).unwrap_err();\n            assert_missing(missing, vec![extractors[0]]);\n        } else {\n            // when the span is enabled, it is because some other test is running at the same time,\n            // and that tests registry has filters which are interested in our above span.\n            //\n            // because the span is now enabled, all keys will be found for it. the\n            // tracing_error::SpanTrace does not consider layer filters during the span hierarchy\n            // walk (SpanTrace::with_spans), nor is the SpanTrace::status a reliable indicator in\n            // this test-induced issue.\n\n            let res = check_fields_present0(extractors);\n            assert!(matches!(res, Ok(Summary::FoundEverything)), \"{res:?}\");\n        }\n    }\n}\n"
  },
  {
    "path": "libs/utils/src/try_rcu.rs",
    "content": "//! Try RCU extension lifted from <https://github.com/vorner/arc-swap/issues/94#issuecomment-1987154023>\n\npub trait ArcSwapExt<T> {\n    /// [`ArcSwap::rcu`](arc_swap::ArcSwap::rcu), but with Result that short-circuits on error.\n    fn try_rcu<R, F, E>(&self, f: F) -> Result<T, E>\n    where\n        F: FnMut(&T) -> Result<R, E>,\n        R: Into<T>;\n}\n\nimpl<T, S> ArcSwapExt<T> for arc_swap::ArcSwapAny<T, S>\nwhere\n    T: arc_swap::RefCnt,\n    S: arc_swap::strategy::CaS<T>,\n{\n    fn try_rcu<R, F, E>(&self, mut f: F) -> Result<T, E>\n    where\n        F: FnMut(&T) -> Result<R, E>,\n        R: Into<T>,\n    {\n        fn ptr_eq<Base, A, B>(a: A, b: B) -> bool\n        where\n            A: arc_swap::AsRaw<Base>,\n            B: arc_swap::AsRaw<Base>,\n        {\n            let a = a.as_raw();\n            let b = b.as_raw();\n            std::ptr::eq(a, b)\n        }\n\n        let mut cur = self.load();\n        loop {\n            let new = f(&cur)?.into();\n            let prev = self.compare_and_swap(&*cur, new);\n            let swapped = ptr_eq(&*cur, &*prev);\n            if swapped {\n                return Ok(arc_swap::Guard::into_inner(prev));\n            } else {\n                cur = prev;\n            }\n        }\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use std::sync::Arc;\n\n    use arc_swap::ArcSwap;\n\n    use super::*;\n\n    #[test]\n    fn test_try_rcu_success() {\n        let swap = ArcSwap::from(Arc::new(42));\n\n        let result = swap.try_rcu(|value| -> Result<_, String> { Ok(**value + 1) });\n\n        assert!(result.is_ok());\n        assert_eq!(**swap.load(), 43);\n    }\n\n    #[test]\n    fn test_try_rcu_error() {\n        let swap = ArcSwap::from(Arc::new(42));\n\n        let result = swap.try_rcu(|value| -> Result<i32, _> {\n            if **value == 42 {\n                Err(\"err\")\n            } else {\n                Ok(**value + 1)\n            }\n        });\n\n        assert!(result.is_err());\n        assert_eq!(result.unwrap_err(), \"err\");\n        assert_eq!(**swap.load(), 42);\n    }\n}\n"
  },
  {
    "path": "libs/utils/src/vec_map.rs",
    "content": "use std::alloc::Layout;\nuse std::cmp::Ordering;\nuse std::ops::RangeBounds;\n\n#[derive(Clone, Copy, Debug, PartialEq, Eq)]\npub enum VecMapOrdering {\n    Greater,\n    GreaterOrEqual,\n}\n\n/// Ordered map datastructure implemented in a Vec.\n///\n/// Append only - can only add keys that are larger than the\n/// current max key.\n/// Ordering can be adjusted using [`VecMapOrdering`]\n/// during `VecMap` construction.\n#[derive(Clone, Debug)]\npub struct VecMap<K, V> {\n    data: Vec<(K, V)>,\n    ordering: VecMapOrdering,\n}\n\nimpl<K, V> Default for VecMap<K, V> {\n    fn default() -> Self {\n        VecMap {\n            data: Default::default(),\n            ordering: VecMapOrdering::Greater,\n        }\n    }\n}\n\n#[derive(thiserror::Error, Debug)]\npub enum VecMapError {\n    #[error(\"Key violates ordering constraint\")]\n    InvalidKey,\n    #[error(\"Mismatched ordering constraints\")]\n    ExtendOrderingError,\n}\n\nimpl<K: Ord, V> VecMap<K, V> {\n    pub fn new(ordering: VecMapOrdering) -> Self {\n        Self {\n            data: Vec::new(),\n            ordering,\n        }\n    }\n\n    pub fn with_capacity(capacity: usize, ordering: VecMapOrdering) -> Self {\n        Self {\n            data: Vec::with_capacity(capacity),\n            ordering,\n        }\n    }\n\n    pub fn is_empty(&self) -> bool {\n        self.data.is_empty()\n    }\n\n    pub fn as_slice(&self) -> &[(K, V)] {\n        self.data.as_slice()\n    }\n\n    /// This function may panic if given a range where the lower bound is\n    /// greater than the upper bound.\n    pub fn slice_range<R: RangeBounds<K>>(&self, range: R) -> &[(K, V)] {\n        use std::ops::Bound::*;\n\n        let binary_search = |k: &K| self.data.binary_search_by_key(&k, extract_key);\n\n        let start_idx = match range.start_bound() {\n            Unbounded => 0,\n            Included(k) => binary_search(k).unwrap_or_else(std::convert::identity),\n            Excluded(k) => match binary_search(k) {\n                Ok(idx) => idx + 1,\n                Err(idx) => idx,\n            },\n        };\n\n        let end_idx = match range.end_bound() {\n            Unbounded => self.data.len(),\n            Included(k) => match binary_search(k) {\n                Ok(idx) => idx + 1,\n                Err(idx) => idx,\n            },\n            Excluded(k) => binary_search(k).unwrap_or_else(std::convert::identity),\n        };\n\n        &self.data[start_idx..end_idx]\n    }\n\n    /// Add a key value pair to the map.\n    /// If `key` is not respective of the `self` ordering the\n    /// pair will not be added and `InvalidKey` error will be returned.\n    pub fn append(&mut self, key: K, value: V) -> Result<usize, VecMapError> {\n        self.validate_key_order(&key)?;\n\n        let delta_size = self.instrument_vec_op(|vec| vec.push((key, value)));\n        Ok(delta_size)\n    }\n\n    /// Update the maximum key value pair or add a new key value pair to the map.\n    /// If `key` is not respective of the `self` ordering no updates or additions\n    /// will occur and `InvalidKey` error will be returned.\n    pub fn append_or_update_last(\n        &mut self,\n        key: K,\n        mut value: V,\n    ) -> Result<(Option<V>, usize), VecMapError> {\n        if let Some((last_key, last_value)) = self.data.last_mut() {\n            match key.cmp(last_key) {\n                Ordering::Less => return Err(VecMapError::InvalidKey),\n                Ordering::Equal => {\n                    std::mem::swap(last_value, &mut value);\n                    const DELTA_SIZE: usize = 0;\n                    return Ok((Some(value), DELTA_SIZE));\n                }\n                Ordering::Greater => {}\n            }\n        }\n\n        let delta_size = self.instrument_vec_op(|vec| vec.push((key, value)));\n        Ok((None, delta_size))\n    }\n\n    /// Move items from `other` to the end of `self`, leaving `other` empty.\n    /// If the `other` ordering is different from `self` ordering\n    /// `ExtendOrderingError` error will be returned.\n    /// If any keys in `other` is not respective of the ordering defined in\n    /// `self`, `InvalidKey` error will be returned and no mutation will occur.\n    pub fn extend(&mut self, other: &mut Self) -> Result<usize, VecMapError> {\n        if self.ordering != other.ordering {\n            return Err(VecMapError::ExtendOrderingError);\n        }\n\n        let other_first_opt = other.data.last().map(extract_key);\n        if let Some(other_first) = other_first_opt {\n            self.validate_key_order(other_first)?;\n        }\n\n        let delta_size = self.instrument_vec_op(|vec| vec.append(&mut other.data));\n        Ok(delta_size)\n    }\n\n    /// Validate the current last key in `self` and key being\n    /// inserted against the order defined in `self`.\n    fn validate_key_order(&self, key: &K) -> Result<(), VecMapError> {\n        if let Some(last_key) = self.data.last().map(extract_key) {\n            match (&self.ordering, &key.cmp(last_key)) {\n                (VecMapOrdering::Greater, Ordering::Less | Ordering::Equal) => {\n                    return Err(VecMapError::InvalidKey);\n                }\n                (VecMapOrdering::Greater, Ordering::Greater) => {}\n                (VecMapOrdering::GreaterOrEqual, Ordering::Less) => {\n                    return Err(VecMapError::InvalidKey);\n                }\n                (VecMapOrdering::GreaterOrEqual, Ordering::Equal | Ordering::Greater) => {}\n            }\n        }\n\n        Ok(())\n    }\n\n    /// Instrument an operation on the underlying [`Vec`].\n    /// Will panic if the operation decreases capacity.\n    /// Returns the increase in memory usage caused by the op.\n    fn instrument_vec_op(&mut self, op: impl FnOnce(&mut Vec<(K, V)>)) -> usize {\n        let old_cap = self.data.capacity();\n        op(&mut self.data);\n        let new_cap = self.data.capacity();\n\n        match old_cap.cmp(&new_cap) {\n            Ordering::Less => {\n                let old_size = Layout::array::<(K, V)>(old_cap).unwrap().size();\n                let new_size = Layout::array::<(K, V)>(new_cap).unwrap().size();\n                new_size - old_size\n            }\n            Ordering::Equal => 0,\n            Ordering::Greater => panic!(\"VecMap capacity shouldn't ever decrease\"),\n        }\n    }\n\n    /// Similar to `from_iter` defined in `FromIter` trait except\n    /// that it accepts an [`VecMapOrdering`]\n    pub fn from_iter<I: IntoIterator<Item = (K, V)>>(iter: I, ordering: VecMapOrdering) -> Self {\n        let iter = iter.into_iter();\n        let initial_capacity = {\n            match iter.size_hint() {\n                (lower_bound, None) => lower_bound,\n                (_, Some(upper_bound)) => upper_bound,\n            }\n        };\n\n        let mut vec_map = VecMap::with_capacity(initial_capacity, ordering);\n        for (key, value) in iter {\n            vec_map\n                .append(key, value)\n                .expect(\"The passed collection needs to be sorted!\");\n        }\n\n        vec_map\n    }\n}\n\nimpl<K: Ord, V> IntoIterator for VecMap<K, V> {\n    type Item = (K, V);\n    type IntoIter = std::vec::IntoIter<(K, V)>;\n\n    fn into_iter(self) -> Self::IntoIter {\n        self.data.into_iter()\n    }\n}\n\nfn extract_key<K, V>(entry: &(K, V)) -> &K {\n    &entry.0\n}\n\n#[cfg(test)]\nmod tests {\n    use std::collections::BTreeMap;\n    use std::ops::Bound;\n\n    use super::{VecMap, VecMapOrdering};\n\n    #[test]\n    fn unbounded_range() {\n        let mut vec = VecMap::default();\n        vec.append(0, ()).unwrap();\n\n        assert_eq!(vec.slice_range(0..0), &[]);\n    }\n\n    #[test]\n    #[should_panic]\n    fn invalid_ordering_range() {\n        let mut vec = VecMap::default();\n        vec.append(0, ()).unwrap();\n\n        #[allow(clippy::reversed_empty_ranges)]\n        vec.slice_range(1..0);\n    }\n\n    #[test]\n    fn range_tests() {\n        let mut vec = VecMap::default();\n        vec.append(0, ()).unwrap();\n        vec.append(2, ()).unwrap();\n        vec.append(4, ()).unwrap();\n\n        assert_eq!(vec.slice_range(0..0), &[]);\n        assert_eq!(vec.slice_range(0..1), &[(0, ())]);\n        assert_eq!(vec.slice_range(0..2), &[(0, ())]);\n        assert_eq!(vec.slice_range(0..3), &[(0, ()), (2, ())]);\n\n        assert_eq!(vec.slice_range(..0), &[]);\n        assert_eq!(vec.slice_range(..1), &[(0, ())]);\n\n        assert_eq!(vec.slice_range(..3), &[(0, ()), (2, ())]);\n        assert_eq!(vec.slice_range(..3), &[(0, ()), (2, ())]);\n\n        assert_eq!(vec.slice_range(0..=0), &[(0, ())]);\n        assert_eq!(vec.slice_range(0..=1), &[(0, ())]);\n        assert_eq!(vec.slice_range(0..=2), &[(0, ()), (2, ())]);\n        assert_eq!(vec.slice_range(0..=3), &[(0, ()), (2, ())]);\n\n        assert_eq!(vec.slice_range(..=0), &[(0, ())]);\n        assert_eq!(vec.slice_range(..=1), &[(0, ())]);\n        assert_eq!(vec.slice_range(..=2), &[(0, ()), (2, ())]);\n        assert_eq!(vec.slice_range(..=3), &[(0, ()), (2, ())]);\n    }\n\n    struct BoundIter {\n        min: i32,\n        max: i32,\n\n        next: Option<Bound<i32>>,\n    }\n\n    impl BoundIter {\n        fn new(min: i32, max: i32) -> Self {\n            Self {\n                min,\n                max,\n\n                next: Some(Bound::Unbounded),\n            }\n        }\n    }\n\n    impl Iterator for BoundIter {\n        type Item = Bound<i32>;\n\n        fn next(&mut self) -> Option<Self::Item> {\n            let cur = self.next?;\n\n            self.next = match &cur {\n                Bound::Unbounded => Some(Bound::Included(self.min)),\n                Bound::Included(x) => {\n                    if *x >= self.max {\n                        Some(Bound::Excluded(self.min))\n                    } else {\n                        Some(Bound::Included(x + 1))\n                    }\n                }\n                Bound::Excluded(x) => {\n                    if *x >= self.max {\n                        None\n                    } else {\n                        Some(Bound::Excluded(x + 1))\n                    }\n                }\n            };\n\n            Some(cur)\n        }\n    }\n\n    #[test]\n    fn range_exhaustive() {\n        let map: BTreeMap<i32, ()> = (1..=7).step_by(2).map(|x| (x, ())).collect();\n        let mut vec = VecMap::default();\n        for &key in map.keys() {\n            vec.append(key, ()).unwrap();\n        }\n\n        const RANGE_MIN: i32 = 0;\n        const RANGE_MAX: i32 = 8;\n        for lower_bound in BoundIter::new(RANGE_MIN, RANGE_MAX) {\n            let ub_min = match lower_bound {\n                Bound::Unbounded => RANGE_MIN,\n                Bound::Included(x) => x,\n                Bound::Excluded(x) => x + 1,\n            };\n            for upper_bound in BoundIter::new(ub_min, RANGE_MAX) {\n                let map_range: Vec<(i32, ())> = map\n                    .range((lower_bound, upper_bound))\n                    .map(|(&x, _)| (x, ()))\n                    .collect();\n                let vec_slice = vec.slice_range((lower_bound, upper_bound));\n\n                assert_eq!(map_range, vec_slice);\n            }\n        }\n    }\n\n    #[test]\n    fn extend() {\n        let mut left = VecMap::default();\n        left.append(0, ()).unwrap();\n        assert_eq!(left.as_slice(), &[(0, ())]);\n\n        let mut empty = VecMap::default();\n        left.extend(&mut empty).unwrap();\n        assert_eq!(left.as_slice(), &[(0, ())]);\n        assert_eq!(empty.as_slice(), &[]);\n\n        let mut right = VecMap::default();\n        right.append(1, ()).unwrap();\n\n        left.extend(&mut right).unwrap();\n\n        assert_eq!(left.as_slice(), &[(0, ()), (1, ())]);\n        assert_eq!(right.as_slice(), &[]);\n\n        let mut zero_map = VecMap::default();\n        zero_map.append(0, ()).unwrap();\n\n        left.extend(&mut zero_map).unwrap_err();\n        assert_eq!(left.as_slice(), &[(0, ()), (1, ())]);\n        assert_eq!(zero_map.as_slice(), &[(0, ())]);\n\n        let mut one_map = VecMap::default();\n        one_map.append(1, ()).unwrap();\n\n        left.extend(&mut one_map).unwrap_err();\n        assert_eq!(left.as_slice(), &[(0, ()), (1, ())]);\n        assert_eq!(one_map.as_slice(), &[(1, ())]);\n\n        let mut map_greater_or_equal = VecMap::new(VecMapOrdering::GreaterOrEqual);\n        map_greater_or_equal.append(2, ()).unwrap();\n        map_greater_or_equal.append(2, ()).unwrap();\n\n        left.extend(&mut map_greater_or_equal).unwrap_err();\n        assert_eq!(left.as_slice(), &[(0, ()), (1, ())]);\n        assert_eq!(map_greater_or_equal.as_slice(), &[(2, ()), (2, ())]);\n    }\n\n    #[test]\n    fn extend_with_ordering() {\n        let mut left = VecMap::new(VecMapOrdering::GreaterOrEqual);\n        left.append(0, ()).unwrap();\n        assert_eq!(left.as_slice(), &[(0, ())]);\n\n        let mut greater_right = VecMap::new(VecMapOrdering::Greater);\n        greater_right.append(0, ()).unwrap();\n        left.extend(&mut greater_right).unwrap_err();\n        assert_eq!(left.as_slice(), &[(0, ())]);\n\n        let mut greater_or_equal_right = VecMap::new(VecMapOrdering::GreaterOrEqual);\n        greater_or_equal_right.append(2, ()).unwrap();\n        greater_or_equal_right.append(2, ()).unwrap();\n        left.extend(&mut greater_or_equal_right).unwrap();\n        assert_eq!(left.as_slice(), &[(0, ()), (2, ()), (2, ())]);\n    }\n\n    #[test]\n    fn vec_map_from_sorted() {\n        let vec = vec![(1, ()), (2, ()), (3, ()), (6, ())];\n        let vec_map = VecMap::from_iter(vec, VecMapOrdering::Greater);\n        assert_eq!(vec_map.as_slice(), &[(1, ()), (2, ()), (3, ()), (6, ())]);\n\n        let vec = vec![(1, ()), (2, ()), (3, ()), (3, ()), (6, ()), (6, ())];\n        let vec_map = VecMap::from_iter(vec, VecMapOrdering::GreaterOrEqual);\n        assert_eq!(\n            vec_map.as_slice(),\n            &[(1, ()), (2, ()), (3, ()), (3, ()), (6, ()), (6, ())]\n        );\n    }\n\n    #[test]\n    #[should_panic]\n    fn vec_map_from_unsorted_greater() {\n        let vec = vec![(1, ()), (2, ()), (2, ()), (3, ()), (6, ())];\n        let _ = VecMap::from_iter(vec, VecMapOrdering::Greater);\n    }\n\n    #[test]\n    #[should_panic]\n    fn vec_map_from_unsorted_greater_or_equal() {\n        let vec = vec![(1, ()), (2, ()), (3, ()), (6, ()), (5, ())];\n        let _ = VecMap::from_iter(vec, VecMapOrdering::GreaterOrEqual);\n    }\n}\n"
  },
  {
    "path": "libs/utils/src/yielding_loop.rs",
    "content": "use tokio_util::sync::CancellationToken;\n\n#[derive(thiserror::Error, Debug)]\npub enum YieldingLoopError {\n    #[error(\"Cancelled\")]\n    Cancelled,\n}\n\n/// Helper for long synchronous loops, e.g. over all tenants in the system.\n///\n/// Periodically yields to avoid blocking the executor, and after resuming\n/// checks the provided cancellation token to drop out promptly on shutdown.\n#[inline(always)]\npub async fn yielding_loop<I, T, F>(\n    interval: usize,\n    cancel: &CancellationToken,\n    iter: I,\n    mut visitor: F,\n) -> Result<(), YieldingLoopError>\nwhere\n    I: Iterator<Item = T>,\n    F: FnMut(T),\n{\n    for (i, item) in iter.enumerate() {\n        visitor(item);\n\n        if (i + 1) % interval == 0 {\n            tokio::task::yield_now().await;\n            if cancel.is_cancelled() {\n                return Err(YieldingLoopError::Cancelled);\n            }\n        }\n    }\n\n    Ok(())\n}\n"
  },
  {
    "path": "libs/utils/src/zstd.rs",
    "content": "use std::io::SeekFrom;\n\nuse anyhow::{Context, Result};\nuse async_compression::Level;\nuse async_compression::tokio::bufread::ZstdDecoder;\nuse async_compression::tokio::write::ZstdEncoder;\nuse async_compression::zstd::CParameter;\nuse camino::Utf8Path;\nuse nix::NixPath;\nuse tokio::fs::{File, OpenOptions};\nuse tokio::io::{AsyncBufRead, AsyncSeekExt, AsyncWriteExt};\nuse tokio_tar::{Archive, Builder, HeaderMode};\nuse walkdir::WalkDir;\n\n/// Creates a Zstandard tarball.\npub async fn create_zst_tarball(path: &Utf8Path, tarball: &Utf8Path) -> Result<(File, u64)> {\n    let file = OpenOptions::new()\n        .create(true)\n        .truncate(true)\n        .read(true)\n        .write(true)\n        .open(&tarball)\n        .await\n        .with_context(|| format!(\"tempfile creation {tarball}\"))?;\n\n    let mut paths = Vec::new();\n    for entry in WalkDir::new(path) {\n        let entry = entry?;\n        let metadata = entry.metadata().expect(\"error getting dir entry metadata\");\n        // Also allow directories so that we also get empty directories\n        if !(metadata.is_file() || metadata.is_dir()) {\n            continue;\n        }\n        let path = entry.into_path();\n        paths.push(path);\n    }\n    // Do a sort to get a more consistent listing\n    paths.sort_unstable();\n    let zstd = ZstdEncoder::with_quality_and_params(\n        file,\n        Level::Default,\n        &[CParameter::enable_long_distance_matching(true)],\n    );\n    let mut builder = Builder::new(zstd);\n    // Use reproducible header mode\n    builder.mode(HeaderMode::Deterministic);\n    for p in paths {\n        let rel_path = p.strip_prefix(path)?;\n        if rel_path.is_empty() {\n            // The top directory should not be compressed,\n            // the tar crate doesn't like that\n            continue;\n        }\n        builder.append_path_with_name(&p, rel_path).await?;\n    }\n    let mut zstd = builder.into_inner().await?;\n    zstd.shutdown().await?;\n    let mut compressed = zstd.into_inner();\n    let compressed_len = compressed.metadata().await?.len();\n    compressed.seek(SeekFrom::Start(0)).await?;\n    Ok((compressed, compressed_len))\n}\n\n/// Creates a Zstandard tarball.\npub async fn extract_zst_tarball(\n    path: &Utf8Path,\n    tarball: impl AsyncBufRead + Unpin,\n) -> Result<()> {\n    let decoder = Box::pin(ZstdDecoder::new(tarball));\n    let mut archive = Archive::new(decoder);\n    archive.unpack(path).await?;\n    Ok(())\n}\n"
  },
  {
    "path": "libs/utils/tests/bin_ser_test.rs",
    "content": "use std::io::Read;\n\nuse bytes::{Buf, BytesMut};\nuse hex_literal::hex;\nuse serde::Deserialize;\nuse utils::bin_ser::LeSer;\n\n#[derive(Debug, PartialEq, Eq, Deserialize)]\npub struct HeaderData {\n    magic: u16,\n    info: u16,\n    tli: u32,\n    pageaddr: u64,\n    len: u32,\n}\n\n// A manual implementation using BytesMut, just so we can\n// verify that we decode the same way.\npub fn decode_header_data(buf: &mut BytesMut) -> HeaderData {\n    HeaderData {\n        magic: buf.get_u16_le(),\n        info: buf.get_u16_le(),\n        tli: buf.get_u32_le(),\n        pageaddr: buf.get_u64_le(),\n        len: buf.get_u32_le(),\n    }\n}\n\npub fn decode2<R: Read>(reader: &mut R) -> HeaderData {\n    HeaderData::des_from(reader).unwrap()\n}\n\n#[test]\nfn test1() {\n    let raw1 = hex!(\"8940 7890 5534 7890  1289 5379 8378 7893  4207 8923 4712 3218\");\n    let mut buf1 = BytesMut::from(&raw1[..]);\n    let mut buf2 = &raw1[..];\n    let dec1 = decode_header_data(&mut buf1);\n    let dec2 = decode2(&mut buf2);\n    assert_eq!(dec1, dec2);\n    assert_eq!(buf1, buf2);\n}\n"
  },
  {
    "path": "libs/vm_monitor/Cargo.toml",
    "content": "[package]\nname = \"vm_monitor\"\nversion = \"0.1.0\"\nedition = \"2024\"\nlicense.workspace = true\n\n[[bin]]\nname = \"vm-monitor\"\npath = \"./src/bin/monitor.rs\"\n\n# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html\n\n[dependencies]\nanyhow.workspace = true\naxum.workspace = true\nclap.workspace = true\nfutures.workspace = true\nserde.workspace = true\nserde_json.workspace = true\nsysinfo.workspace = true\ntokio = { workspace = true, features = [\"rt-multi-thread\"] }\ntokio-postgres.workspace = true\ntokio-util.workspace = true\ntracing.workspace = true\ntracing-subscriber.workspace = true\n\n[target.'cfg(target_os = \"linux\")'.dependencies]\ncgroups-rs = \"0.3.3\"\n"
  },
  {
    "path": "libs/vm_monitor/README.md",
    "content": "# `vm-monitor`\n\nThe `vm-monitor` (or just monitor) is a core component of the autoscaling system,\nalong with the `autoscale-scheduler` and the `autoscaler-agent`s. The monitor has\ntwo primary roles: 1) notifying agents when immediate upscaling is necessary due\nto memory conditions and 2) managing Postgres' file cache and a cgroup to carry\nout upscaling and downscaling decisions.\n\n## More on scaling\n\nWe scale CPU and memory using NeonVM, our in-house QEMU tool for use with Kubernetes.\nTo control thresholds for receiving memory usage notifications, we start Postgres\nin the `neon-postgres` cgroup and set its `memory.{max,high}`.\n\n* See also: [`neondatabase/autoscaling`](https://github.com/neondatabase/autoscaling/)\n* See also: [`neondatabase/vm-monitor`](https://github.com/neondatabase/vm-monitor/),\nwhere initial development of the monitor happened. The repository is no longer\nmaintained but the commit history may be useful for debugging.\n\n## Structure\n\nThe `vm-monitor` is loosely comprised of a few systems. These are:\n* the server: this is just a simple `axum` server that accepts requests and\nupgrades them to websocket connections. The server only allows one connection at\na time. This means that upon receiving a new connection, the server will terminate\nand old one if it exists.\n* the filecache: a struct that allows communication with the Postgres file cache.\nOn startup, we connect to the filecache and hold on to the connection for the\nentire monitor lifetime.\n* the cgroup watcher: the `CgroupWatcher` polls the `neon-postgres` cgroup's memory\nusage and sends rolling aggregates to the runner.\n* the runner: the runner marries the filecache and cgroup watcher together,\ncommunicating with the agent throught the `Dispatcher`, and then calling filecache\nand cgroup watcher functions as needed to upscale and downscale\n"
  },
  {
    "path": "libs/vm_monitor/src/bin/monitor.rs",
    "content": "// We expose a standalone binary _and_ start the monitor in `compute_ctl` so that\n// we can test the monitor as part of the entire autoscaling system in\n// neondatabase/autoscaling.\n//\n// The monitor was previously started by vm-builder, and for testing purposes,\n// we can mimic that setup with this binary.\n\n#[cfg(target_os = \"linux\")]\n#[tokio::main]\nasync fn main() -> anyhow::Result<()> {\n    use clap::Parser;\n    use tokio_util::sync::CancellationToken;\n    use tracing_subscriber::EnvFilter;\n    use vm_monitor::Args;\n\n    let subscriber = tracing_subscriber::fmt::Subscriber::builder()\n        .json()\n        .with_file(true)\n        .with_line_number(true)\n        .with_span_list(true)\n        .with_env_filter(EnvFilter::from_default_env())\n        .finish();\n    tracing::subscriber::set_global_default(subscriber)?;\n\n    let args: &'static Args = Box::leak(Box::new(Args::parse()));\n    let token = CancellationToken::new();\n    vm_monitor::start(args, token).await\n}\n\n#[cfg(not(target_os = \"linux\"))]\nfn main() {\n    panic!(\"the monitor requires cgroups, which are only available on linux\")\n}\n"
  },
  {
    "path": "libs/vm_monitor/src/cgroup.rs",
    "content": "use std::fmt::{self, Debug, Formatter};\nuse std::time::{Duration, Instant};\n\nuse anyhow::{Context, anyhow};\nuse cgroups_rs::Subsystem;\nuse cgroups_rs::hierarchies::{self, is_cgroup2_unified_mode};\nuse cgroups_rs::memory::MemController;\nuse tokio::sync::watch;\nuse tracing::{info, warn};\n\n/// Configuration for a `CgroupWatcher`\n#[derive(Debug, Clone)]\npub struct Config {\n    /// Interval at which we should be fetching memory statistics\n    memory_poll_interval: Duration,\n\n    /// The number of samples used in constructing aggregated memory statistics\n    memory_history_len: usize,\n    /// The number of most recent samples that will be periodically logged.\n    ///\n    /// Each sample is logged exactly once. Increasing this value means that recent samples will be\n    /// logged less frequently, and vice versa.\n    ///\n    /// For simplicity, this value must be greater than or equal to `memory_history_len`.\n    memory_history_log_interval: usize,\n    /// The max number of iterations to skip before logging the next iteration\n    memory_history_log_noskip_interval: Duration,\n}\n\nimpl Default for Config {\n    fn default() -> Self {\n        Self {\n            memory_poll_interval: Duration::from_millis(100),\n            memory_history_len: 5, // use 500ms of history for decision-making\n            memory_history_log_interval: 20, // but only log every ~2s (otherwise it's spammy)\n            memory_history_log_noskip_interval: Duration::from_secs(15), // but only if it's changed, or 60 seconds have passed\n        }\n    }\n}\n\n/// Responds to `MonitorEvents` to manage the cgroup: preventing it from being\n/// OOM killed or throttling.\n///\n/// The `CgroupWatcher` primarily achieves this by reading from a stream of\n/// `MonitorEvent`s. See `main_signals_loop` for details on how to keep the\n/// cgroup happy.\n#[derive(Debug)]\npub struct CgroupWatcher {\n    pub config: Config,\n\n    /// The actual cgroup we are watching and managing.\n    cgroup: cgroups_rs::Cgroup,\n}\n\nimpl CgroupWatcher {\n    /// Create a new `CgroupWatcher`.\n    #[tracing::instrument(skip_all, fields(%name))]\n    pub fn new(name: String) -> anyhow::Result<Self> {\n        // TODO: clarify exactly why we need v2\n        // Make sure cgroups v2 (aka unified) are supported\n        if !is_cgroup2_unified_mode() {\n            anyhow::bail!(\"cgroups v2 not supported\");\n        }\n        let cgroup = cgroups_rs::Cgroup::load(hierarchies::auto(), &name);\n\n        Ok(Self {\n            cgroup,\n            config: Default::default(),\n        })\n    }\n\n    /// The entrypoint for the `CgroupWatcher`.\n    #[tracing::instrument(skip_all)]\n    pub async fn watch(\n        &self,\n        updates: watch::Sender<(Instant, MemoryHistory)>,\n    ) -> anyhow::Result<()> {\n        // this requirement makes the code a bit easier to work with; see the config for more.\n        assert!(self.config.memory_history_len <= self.config.memory_history_log_interval);\n\n        let mut ticker = tokio::time::interval(self.config.memory_poll_interval);\n        ticker.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Skip);\n        // ticker.reset_immediately(); // FIXME: enable this once updating to tokio >= 1.30.0\n\n        let mem_controller = self.memory()?;\n\n        // buffer for samples that will be logged. once full, it remains so.\n        let history_log_len = self.config.memory_history_log_interval;\n        let max_skip = self.config.memory_history_log_noskip_interval;\n        let mut history_log_buf = vec![MemoryStatus::zeroed(); history_log_len];\n        let mut last_logged_memusage = MemoryStatus::zeroed();\n\n        // Ensure that we're tracking a value that's definitely in the past, as Instant::now is only guaranteed to be non-decreasing on Rust's T1-supported systems.\n        let mut can_skip_logs_until = Instant::now() - max_skip;\n\n        for t in 0_u64.. {\n            ticker.tick().await;\n\n            let now = Instant::now();\n            let mem = Self::memory_usage(mem_controller);\n\n            let i = t as usize % history_log_len;\n            history_log_buf[i] = mem;\n\n            // We're taking *at most* memory_history_len values; we may be bounded by the total\n            // number of samples that have come in so far.\n            let samples_count = (t + 1).min(self.config.memory_history_len as u64) as usize;\n            // NB: in `ring_buf_recent_values_iter`, `i` is *inclusive*, which matches the fact\n            // that we just inserted a value there, so the end of the iterator will *include* the\n            // value at i, rather than stopping just short of it.\n            let samples = ring_buf_recent_values_iter(&history_log_buf, i, samples_count);\n\n            let summary = MemoryHistory {\n                avg_non_reclaimable: samples.map(|h| h.non_reclaimable).sum::<u64>()\n                    / samples_count as u64,\n                samples_count,\n                samples_span: self.config.memory_poll_interval * (samples_count - 1) as u32,\n            };\n\n            // Log the current history if it's time to do so. Because `history_log_buf` has length\n            // equal to the logging interval, we can just log the entire buffer every time we set\n            // the last entry, which also means that for this log line, we can ignore that it's a\n            // ring buffer (because all the entries are in order of increasing time).\n            //\n            // We skip logging the data if data hasn't meaningfully changed in a while, unless\n            // we've already ignored previous iterations for the last max_skip period.\n            if i == history_log_len - 1\n                && (now > can_skip_logs_until\n                    || !history_log_buf\n                        .iter()\n                        .all(|usage| last_logged_memusage.status_is_close_or_similar(usage)))\n            {\n                info!(\n                    history = ?MemoryStatus::debug_slice(&history_log_buf),\n                    summary = ?summary,\n                    \"Recent cgroup memory statistics history\"\n                );\n\n                can_skip_logs_until = now + max_skip;\n\n                last_logged_memusage = *history_log_buf.last().unwrap();\n            }\n\n            updates\n                .send((now, summary))\n                .context(\"failed to send MemoryHistory\")?;\n        }\n\n        unreachable!()\n    }\n\n    /// Get a handle on the memory subsystem.\n    fn memory(&self) -> anyhow::Result<&MemController> {\n        self.cgroup\n            .subsystems()\n            .iter()\n            .find_map(|sub| match sub {\n                Subsystem::Mem(c) => Some(c),\n                _ => None,\n            })\n            .ok_or_else(|| anyhow!(\"could not find memory subsystem\"))\n    }\n\n    /// Given a handle on the memory subsystem, returns the current memory information\n    fn memory_usage(mem_controller: &MemController) -> MemoryStatus {\n        let stat = mem_controller.memory_stat().stat;\n        MemoryStatus {\n            non_reclaimable: stat.active_anon + stat.inactive_anon,\n        }\n    }\n}\n\n// Helper function for `CgroupWatcher::watch`\nfn ring_buf_recent_values_iter<T>(\n    buf: &[T],\n    last_value_idx: usize,\n    count: usize,\n) -> impl '_ + Iterator<Item = &T> {\n    // Assertion carried over from `CgroupWatcher::watch`, to make the logic in this function\n    // easier (we only have to add `buf.len()` once, rather than a dynamic number of times).\n    assert!(count <= buf.len());\n\n    buf.iter()\n        // 'cycle' because the values could wrap around\n        .cycle()\n        // with 'cycle', this skip is more like 'offset', and functionally this is\n        // offsettting by 'last_value_idx - count (mod buf.len())', but we have to be\n        // careful to avoid underflow, so we pre-add buf.len().\n        // The '+ 1' is because `last_value_idx` is inclusive, rather than exclusive.\n        .skip((buf.len() + last_value_idx + 1 - count) % buf.len())\n        .take(count)\n}\n\n/// Summary of recent memory usage\n#[derive(Debug, Copy, Clone)]\npub struct MemoryHistory {\n    /// Rolling average of non-reclaimable memory usage samples over the last `history_period`\n    pub avg_non_reclaimable: u64,\n\n    /// The number of samples used to construct this summary\n    pub samples_count: usize,\n    /// Total timespan between the first and last sample used for this summary\n    pub samples_span: Duration,\n}\n\n#[derive(Debug, Copy, Clone)]\npub struct MemoryStatus {\n    non_reclaimable: u64,\n}\n\nimpl MemoryStatus {\n    fn zeroed() -> Self {\n        MemoryStatus { non_reclaimable: 0 }\n    }\n\n    fn debug_slice(slice: &[Self]) -> impl '_ + Debug {\n        struct DS<'a>(&'a [MemoryStatus]);\n\n        impl Debug for DS<'_> {\n            fn fmt(&self, f: &mut Formatter) -> fmt::Result {\n                f.debug_struct(\"[MemoryStatus]\")\n                    .field(\n                        \"non_reclaimable[..]\",\n                        &Fields(self.0, |stat: &MemoryStatus| {\n                            BytesToGB(stat.non_reclaimable)\n                        }),\n                    )\n                    .finish()\n            }\n        }\n\n        struct Fields<'a, F>(&'a [MemoryStatus], F);\n\n        impl<F: Fn(&MemoryStatus) -> T, T: Debug> Debug for Fields<'_, F> {\n            fn fmt(&self, f: &mut Formatter) -> fmt::Result {\n                f.debug_list().entries(self.0.iter().map(&self.1)).finish()\n            }\n        }\n\n        struct BytesToGB(u64);\n\n        impl Debug for BytesToGB {\n            fn fmt(&self, f: &mut Formatter) -> fmt::Result {\n                f.write_fmt(format_args!(\n                    \"{:.3}Gi\",\n                    self.0 as f64 / (1_u64 << 30) as f64\n                ))\n            }\n        }\n\n        DS(slice)\n    }\n\n    /// Check if the other memory status is a close or similar result.\n    /// Returns true if the larger value is not larger than the smaller value\n    /// by 1/8 of the smaller value, and within 128MiB.\n    /// See tests::check_similarity_behaviour for examples of behaviour\n    fn status_is_close_or_similar(&self, other: &MemoryStatus) -> bool {\n        let margin;\n        let diff;\n        if self.non_reclaimable >= other.non_reclaimable {\n            margin = other.non_reclaimable / 8;\n            diff = self.non_reclaimable - other.non_reclaimable;\n        } else {\n            margin = self.non_reclaimable / 8;\n            diff = other.non_reclaimable - self.non_reclaimable;\n        }\n\n        diff < margin && diff < 128 * 1024 * 1024\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    #[test]\n    fn ring_buf_iter() {\n        let buf = vec![0_i32, 1, 2, 3, 4, 5, 6, 7, 8, 9];\n\n        let values = |offset, count| {\n            super::ring_buf_recent_values_iter(&buf, offset, count)\n                .copied()\n                .collect::<Vec<i32>>()\n        };\n\n        // Boundary conditions: start, end, and entire thing:\n        assert_eq!(values(0, 1), [0]);\n        assert_eq!(values(3, 4), [0, 1, 2, 3]);\n        assert_eq!(values(9, 4), [6, 7, 8, 9]);\n        assert_eq!(values(9, 10), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]);\n\n        // \"normal\" operation: no wraparound\n        assert_eq!(values(7, 4), [4, 5, 6, 7]);\n\n        // wraparound:\n        assert_eq!(values(0, 4), [7, 8, 9, 0]);\n        assert_eq!(values(1, 4), [8, 9, 0, 1]);\n        assert_eq!(values(2, 4), [9, 0, 1, 2]);\n        assert_eq!(values(2, 10), [3, 4, 5, 6, 7, 8, 9, 0, 1, 2]);\n    }\n\n    #[test]\n    fn check_similarity_behaviour() {\n        // This all accesses private methods, so we can't actually run this\n        // as doctests, because doctests run as an external crate.\n        let mut small = super::MemoryStatus {\n            non_reclaimable: 1024,\n        };\n        let mut large = super::MemoryStatus {\n            non_reclaimable: 1024 * 1024 * 1024 * 1024,\n        };\n\n        // objects are self-similar, no matter the size\n        assert!(small.status_is_close_or_similar(&small));\n        assert!(large.status_is_close_or_similar(&large));\n\n        // inequality is symmetric\n        assert!(!small.status_is_close_or_similar(&large));\n        assert!(!large.status_is_close_or_similar(&small));\n\n        small.non_reclaimable = 64;\n        large.non_reclaimable = (small.non_reclaimable / 8) * 9;\n\n        // objects are self-similar, no matter the size\n        assert!(small.status_is_close_or_similar(&small));\n        assert!(large.status_is_close_or_similar(&large));\n\n        // values are similar if the larger value is larger by less than\n        // 12.5%, i.e. 1/8 of the smaller value.\n        // In the example above, large is exactly 12.5% larger, so this doesn't\n        // match.\n        assert!(!small.status_is_close_or_similar(&large));\n        assert!(!large.status_is_close_or_similar(&small));\n\n        large.non_reclaimable -= 1;\n        assert!(large.status_is_close_or_similar(&large));\n\n        assert!(small.status_is_close_or_similar(&large));\n        assert!(large.status_is_close_or_similar(&small));\n\n        // The 1/8 rule only applies up to 128MiB of difference\n        small.non_reclaimable = 1024 * 1024 * 1024 * 1024;\n        large.non_reclaimable = small.non_reclaimable / 8 * 9;\n        assert!(small.status_is_close_or_similar(&small));\n        assert!(large.status_is_close_or_similar(&large));\n\n        assert!(!small.status_is_close_or_similar(&large));\n        assert!(!large.status_is_close_or_similar(&small));\n        // the large value is put just above the threshold\n        large.non_reclaimable = small.non_reclaimable + 128 * 1024 * 1024;\n        assert!(large.status_is_close_or_similar(&large));\n\n        assert!(!small.status_is_close_or_similar(&large));\n        assert!(!large.status_is_close_or_similar(&small));\n        // now below\n        large.non_reclaimable -= 1;\n        assert!(large.status_is_close_or_similar(&large));\n\n        assert!(small.status_is_close_or_similar(&large));\n        assert!(large.status_is_close_or_similar(&small));\n    }\n}\n"
  },
  {
    "path": "libs/vm_monitor/src/dispatcher.rs",
    "content": "//! Managing the websocket connection and other signals in the monitor.\n//!\n//! Contains types that manage the interaction (not data interchange, see `protocol`)\n//! between agent and monitor, allowing us to to process and send messages in a\n//! straightforward way. The dispatcher also manages that signals that come from\n//! the cgroup (requesting upscale), and the signals that go to the cgroup\n//! (notifying it of upscale).\n\nuse anyhow::{Context, bail};\nuse axum::extract::ws::{Message, Utf8Bytes, WebSocket};\nuse futures::stream::{SplitSink, SplitStream};\nuse futures::{SinkExt, StreamExt};\nuse tracing::{debug, info};\n\nuse crate::protocol::{\n    OutboundMsg, OutboundMsgKind, PROTOCOL_MAX_VERSION, PROTOCOL_MIN_VERSION, ProtocolRange,\n    ProtocolResponse, ProtocolVersion,\n};\n\n/// The central handler for all communications in the monitor.\n///\n/// The dispatcher has two purposes:\n/// 1. Manage the connection to the agent, sending and receiving messages.\n/// 2. Communicate with the cgroup manager, notifying it when upscale is received,\n///    and sending a message to the agent when the cgroup manager requests\n///    upscale.\n#[derive(Debug)]\npub struct Dispatcher {\n    /// We read agent messages of of `source`\n    pub(crate) source: SplitStream<WebSocket>,\n\n    /// We send messages to the agent through `sink`\n    sink: SplitSink<WebSocket, Message>,\n\n    /// The protocol version we have agreed to use with the agent. This is negotiated\n    /// during the creation of the dispatcher, and should be the highest shared protocol\n    /// version.\n    ///\n    // NOTE: currently unused, but will almost certainly be used in the futures\n    // as the protocol changes\n    #[allow(unused)]\n    pub(crate) proto_version: ProtocolVersion,\n}\n\nimpl Dispatcher {\n    /// Creates a new dispatcher using the passed-in connection.\n    ///\n    /// Performs a negotiation with the agent to determine the highest protocol\n    /// version that both support. This consists of two steps:\n    /// 1. Wait for the agent to sent the range of protocols it supports.\n    /// 2. Send a protocol version that works for us as well, or an error if there\n    ///    is no compatible version.\n    pub async fn new(stream: WebSocket) -> anyhow::Result<Self> {\n        let (mut sink, mut source) = stream.split();\n\n        // Figure out the highest protocol version we both support\n        info!(\"waiting for agent to send protocol version range\");\n        let Some(message) = source.next().await else {\n            bail!(\"websocket connection closed while performing protocol handshake\")\n        };\n\n        let message = message.context(\"failed to read protocol version range off connection\")?;\n\n        let Message::Text(message_text) = message else {\n            // All messages should be in text form, since we don't do any\n            // pinging/ponging. See nhooyr/websocket's implementation and the\n            // agent for more info\n            bail!(\"received non-text message during proocol handshake: {message:?}\")\n        };\n\n        let monitor_range = ProtocolRange {\n            min: PROTOCOL_MIN_VERSION,\n            max: PROTOCOL_MAX_VERSION,\n        };\n\n        let agent_range: ProtocolRange = serde_json::from_str(&message_text)\n            .context(\"failed to deserialize protocol version range\")?;\n\n        info!(range = ?agent_range, \"received protocol version range\");\n\n        let highest_shared_version = match monitor_range.highest_shared_version(&agent_range) {\n            Ok(version) => {\n                sink.send(Message::Text(Utf8Bytes::from(\n                    serde_json::to_string(&ProtocolResponse::Version(version)).unwrap(),\n                )))\n                .await\n                .context(\"failed to notify agent of negotiated protocol version\")?;\n                version\n            }\n            Err(e) => {\n                sink.send(Message::Text(Utf8Bytes::from(\n                    serde_json::to_string(&ProtocolResponse::Error(format!(\n                        \"Received protocol version range {agent_range} which does not overlap with {monitor_range}\"\n                    )))\n                    .unwrap(),\n                )))\n                .await\n                .context(\"failed to notify agent of no overlap between protocol version ranges\")?;\n                Err(e).context(\"error determining suitable protocol version range\")?\n            }\n        };\n\n        Ok(Self {\n            sink,\n            source,\n            proto_version: highest_shared_version,\n        })\n    }\n\n    /// Send a message to the agent.\n    ///\n    /// Although this function is small, it has one major benefit: it is the only\n    /// way to send data accross the connection, and you can only pass in a proper\n    /// `MonitorMessage`. Without safeguards like this, it's easy to accidentally\n    /// serialize the wrong thing and send it, since `self.sink.send` will take\n    /// any string.\n    pub async fn send(&mut self, message: OutboundMsg) -> anyhow::Result<()> {\n        if matches!(&message.inner, OutboundMsgKind::HealthCheck { .. }) {\n            debug!(?message, \"sending message\");\n        } else {\n            info!(?message, \"sending message\");\n        }\n\n        let json = serde_json::to_string(&message).context(\"failed to serialize message\")?;\n        self.sink\n            .send(Message::Text(Utf8Bytes::from(json)))\n            .await\n            .context(\"stream error sending message\")\n    }\n}\n"
  },
  {
    "path": "libs/vm_monitor/src/filecache.rs",
    "content": "//! Logic for configuring and scaling the Postgres file cache.\n\nuse std::num::NonZeroU64;\n\nuse anyhow::{Context, anyhow};\nuse tokio_postgres::types::ToSql;\nuse tokio_postgres::{Client, NoTls, Row};\nuse tokio_util::sync::CancellationToken;\nuse tracing::{error, info};\n\nuse crate::MiB;\n\n/// Manages Postgres' file cache by keeping a connection open.\n#[derive(Debug)]\npub struct FileCacheState {\n    client: Client,\n    conn_str: String,\n    pub(crate) config: FileCacheConfig,\n\n    /// A token for cancelling spawned threads during shutdown.\n    token: CancellationToken,\n}\n\n#[derive(Debug)]\npub struct FileCacheConfig {\n    /// The size of the file cache, in terms of the size of the resource it consumes\n    /// (currently: only memory)\n    ///\n    /// For example, setting `resource_multipler = 0.75` gives the cache a target size of 75% of total\n    /// resources.\n    ///\n    /// This value must be strictly between 0 and 1.\n    resource_multiplier: f64,\n\n    /// The required minimum amount of memory, in bytes, that must remain available\n    /// after subtracting the file cache.\n    ///\n    /// This value must be non-zero.\n    min_remaining_after_cache: NonZeroU64,\n\n    /// Controls the rate of increase in the file cache's size as it grows from zero\n    /// (when total resources equals min_remaining_after_cache) to the desired size based on\n    /// `resource_multiplier`.\n    ///\n    /// A `spread_factor` of zero means that all additional resources will go to the cache until it\n    /// reaches the desired size. Setting `spread_factor` to N roughly means \"for every 1 byte added to\n    /// the cache's size, N bytes are reserved for the rest of the system, until the cache gets to\n    /// its desired size\".\n    ///\n    /// This value must be >= 0, and must retain an increase that is more than what would be given by\n    /// `resource_multiplier`. For example, setting `resource_multiplier` = 0.75 but `spread_factor` = 1\n    /// would be invalid, because `spread_factor` would induce only 50% usage - never reaching the 75%\n    /// as desired by `resource_multiplier`.\n    ///\n    /// `spread_factor` is too large if `(spread_factor + 1) * resource_multiplier >= 1`.\n    spread_factor: f64,\n}\n\nimpl Default for FileCacheConfig {\n    fn default() -> Self {\n        Self {\n            resource_multiplier: 0.75,\n            // 256 MiB - lower than when in memory because overcommitting is safe; if we don't have\n            // memory, the kernel will just evict from its page cache, rather than e.g. killing\n            // everything.\n            min_remaining_after_cache: NonZeroU64::new(256 * MiB).unwrap(),\n            spread_factor: 0.1,\n        }\n    }\n}\n\nimpl FileCacheConfig {\n    /// Make sure fields of the config are consistent.\n    pub fn validate(&self) -> anyhow::Result<()> {\n        // Single field validity\n        anyhow::ensure!(\n            0.0 < self.resource_multiplier && self.resource_multiplier < 1.0,\n            \"resource_multiplier must be between 0.0 and 1.0 exclusive, got {}\",\n            self.resource_multiplier\n        );\n        anyhow::ensure!(\n            self.spread_factor >= 0.0,\n            \"spread_factor must be >= 0, got {}\",\n            self.spread_factor\n        );\n\n        // Check that `resource_multiplier` and `spread_factor` are valid w.r.t. each other.\n        //\n        // As shown in `calculate_cache_size`, we have two lines resulting from `resource_multiplier` and\n        // `spread_factor`, respectively. They are:\n        //\n        //                 `total`           `min_remaining_after_cache`\n        //   size = ————————————————————— - —————————————————————————————\n        //           `spread_factor` + 1         `spread_factor` + 1\n        //\n        // and\n        //\n        //   size = `resource_multiplier` × total\n        //\n        // .. where `total` is the total resources. These are isomorphic to the typical 'y = mx + b'\n        // form, with y = \"size\" and x = \"total\".\n        //\n        // These lines intersect at:\n        //\n        //               `min_remaining_after_cache`\n        //   ———————————————————————————————————————————————————\n        //    1 - `resource_multiplier` × (`spread_factor` + 1)\n        //\n        // We want to ensure that this value (a) exists, and (b) is >= `min_remaining_after_cache`. This is\n        // guaranteed when '`resource_multiplier` × (`spread_factor` + 1)' is less than 1.\n        // (We also need it to be >= 0, but that's already guaranteed.)\n\n        let intersect_factor = self.resource_multiplier * (self.spread_factor + 1.0);\n        anyhow::ensure!(\n            intersect_factor < 1.0,\n            \"incompatible resource_multipler and spread_factor\"\n        );\n        Ok(())\n    }\n\n    /// Calculate the desired size of the cache, given the total memory\n    pub fn calculate_cache_size(&self, total: u64) -> u64 {\n        // *Note*: all units are in bytes, until the very last line.\n        let available = total.saturating_sub(self.min_remaining_after_cache.get());\n        if available == 0 {\n            return 0;\n        }\n\n        // Conversions to ensure we don't overflow from floating-point ops\n        let size_from_spread =\n            i64::max(0, (available as f64 / (1.0 + self.spread_factor)) as i64) as u64;\n\n        let size_from_normal = (total as f64 * self.resource_multiplier) as u64;\n\n        let byte_size = u64::min(size_from_spread, size_from_normal);\n\n        // The file cache operates in units of mebibytes, so the sizes we produce should\n        // be rounded to a mebibyte. We round down to be conservative.\n        byte_size / MiB * MiB\n    }\n}\n\nimpl FileCacheState {\n    /// Connect to the file cache.\n    #[tracing::instrument(skip_all, fields(%conn_str, ?config))]\n    pub async fn new(\n        conn_str: &str,\n        config: FileCacheConfig,\n        token: CancellationToken,\n    ) -> anyhow::Result<Self> {\n        config.validate().context(\"file cache config is invalid\")?;\n\n        info!(conn_str, \"connecting to Postgres file cache\");\n        let client = FileCacheState::connect(conn_str, token.clone())\n            .await\n            .context(\"failed to connect to postgres file cache\")?;\n\n        let conn_str = conn_str.to_string();\n        Ok(Self {\n            client,\n            config,\n            conn_str,\n            token,\n        })\n    }\n\n    /// Connect to Postgres.\n    ///\n    /// Aborts the spawned thread if the kill signal is received. This is not\n    /// a method as it is called in [`FileCacheState::new`].\n    #[tracing::instrument(skip_all, fields(%conn_str))]\n    async fn connect(conn_str: &str, token: CancellationToken) -> anyhow::Result<Client> {\n        let (client, conn) = tokio_postgres::connect(conn_str, NoTls)\n            .await\n            .context(\"failed to connect to pg client\")?;\n\n        // The connection object performs the actual communication with the database,\n        // so spawn it off to run on its own. See tokio-postgres docs.\n        crate::spawn_with_cancel(\n            token,\n            |res| {\n                if let Err(e) = res {\n                    error!(error = format_args!(\"{e:#}\"), \"postgres error\");\n                }\n            },\n            conn,\n        );\n\n        Ok(client)\n    }\n\n    /// Execute a query with a retry if necessary.\n    ///\n    /// If the initial query fails, we restart the database connection and attempt\n    /// if again.\n    #[tracing::instrument(skip_all, fields(%statement))]\n    pub async fn query_with_retry(\n        &mut self,\n        statement: &str,\n        params: &[&(dyn ToSql + Sync)],\n    ) -> anyhow::Result<Vec<Row>> {\n        match self\n            .client\n            .query(statement, params)\n            .await\n            .context(\"failed to execute query\")\n        {\n            Ok(rows) => Ok(rows),\n            Err(e) => {\n                error!(error = format_args!(\"{e:#}\"), \"postgres error -> retrying\");\n\n                let client = FileCacheState::connect(&self.conn_str, self.token.clone())\n                    .await\n                    .context(\"failed to connect to postgres file cache\")?;\n                info!(\"successfully reconnected to postgres client\");\n\n                // Replace the old client and attempt the query with the new one\n                self.client = client;\n                self.client\n                    .query(statement, params)\n                    .await\n                    .context(\"failed to execute query a second time\")\n            }\n        }\n    }\n\n    /// Get the current size of the file cache.\n    #[tracing::instrument(skip_all)]\n    pub async fn get_file_cache_size(&mut self) -> anyhow::Result<u64> {\n        self.query_with_retry(\n            // The file cache GUC variable is in MiB, but the conversion with\n            // pg_size_bytes means that the end result we get is in bytes.\n            \"SELECT pg_size_bytes(current_setting('neon.file_cache_size_limit'));\",\n            &[],\n        )\n        .await\n        .context(\"failed to query pg for file cache size\")?\n        .first()\n        .ok_or_else(|| anyhow!(\"file cache size query returned no rows\"))?\n        // pg_size_bytes returns a bigint which is the same as an i64.\n        .try_get::<_, i64>(0)\n        // Since the size of the table is not negative, the cast is sound.\n        .map(|bytes| bytes as u64)\n        .context(\"failed to extract file cache size from query result\")\n    }\n\n    /// Attempt to set the file cache size, returning the size it was actually\n    /// set to.\n    #[tracing::instrument(skip_all, fields(%num_bytes))]\n    pub async fn set_file_cache_size(&mut self, num_bytes: u64) -> anyhow::Result<u64> {\n        let max_bytes = self\n            // The file cache GUC variable is in MiB, but the conversion with pg_size_bytes\n            // means that the end result we get is in bytes.\n            .query_with_retry(\n                \"SELECT pg_size_bytes(current_setting('neon.max_file_cache_size'));\",\n                &[],\n            )\n            .await\n            .context(\"failed to query pg for max file cache size\")?\n            .first()\n            .ok_or_else(|| anyhow!(\"max file cache size query returned no rows\"))?\n            .try_get::<_, i64>(0)\n            .map(|bytes| bytes as u64)\n            .context(\"failed to extract max file cache size from query result\")?;\n\n        let max_mb = max_bytes / MiB;\n        let num_mb = u64::min(num_bytes, max_bytes) / MiB;\n\n        let capped = if num_bytes > max_bytes {\n            \" (capped by maximum size)\"\n        } else {\n            \"\"\n        };\n\n        info!(\n            size = num_mb,\n            max = max_mb,\n            \"updating file cache size {capped}\",\n        );\n\n        // note: even though the normal ways to get the cache size produce values with trailing \"MB\"\n        // (hence why we call pg_size_bytes in `get_file_cache_size`'s query), the format\n        // it expects to set the value is \"integer number of MB\" without trailing units.\n        // For some reason, this *really* wasn't working with normal arguments, so that's\n        // why we're constructing the query here.\n        self.client\n            .query(\n                &format!(\"ALTER SYSTEM SET neon.file_cache_size_limit = {num_mb};\"),\n                &[],\n            )\n            .await\n            .context(\"failed to change file cache size limit\")?;\n\n        // must use pg_reload_conf to have the settings change take effect\n        self.client\n            .execute(\"SELECT pg_reload_conf();\", &[])\n            .await\n            .context(\"failed to reload config\")?;\n\n        Ok(num_mb * MiB)\n    }\n}\n"
  },
  {
    "path": "libs/vm_monitor/src/lib.rs",
    "content": "#![deny(unsafe_code)]\n#![deny(clippy::undocumented_unsafe_blocks)]\n#![cfg(target_os = \"linux\")]\n\nuse std::fmt::Debug;\nuse std::net::SocketAddr;\nuse std::time::Duration;\n\nuse anyhow::Context;\nuse axum::Router;\nuse axum::extract::ws::WebSocket;\nuse axum::extract::{State, WebSocketUpgrade};\nuse axum::response::Response;\nuse axum::routing::get;\nuse clap::Parser;\nuse futures::Future;\nuse runner::Runner;\nuse sysinfo::{RefreshKind, System, SystemExt};\nuse tokio::net::TcpListener;\nuse tokio::sync::broadcast;\nuse tokio::task::JoinHandle;\nuse tokio_util::sync::CancellationToken;\nuse tracing::{error, info};\n\n// Code that interfaces with agent\npub mod dispatcher;\npub mod protocol;\n\npub mod cgroup;\npub mod filecache;\npub mod runner;\n\n/// The vm-monitor is an autoscaling component started by compute_ctl.\n///\n/// It carries out autoscaling decisions (upscaling/downscaling) and responds to\n/// memory pressure by making requests to the autoscaler-agent.\n#[derive(Debug, Parser)]\npub struct Args {\n    /// The name of the cgroup we should monitor for memory.high events. This\n    /// is the cgroup that postgres should be running in.\n    #[arg(short, long)]\n    pub cgroup: Option<String>,\n\n    /// The connection string for the Postgres file cache we should manage.\n    #[arg(short, long)]\n    pub pgconnstr: Option<String>,\n\n    /// The address we should listen on for connection requests. For the\n    /// agent, this is 0.0.0.0:10301. For the informant, this is 127.0.0.1:10369.\n    #[arg(short, long)]\n    pub addr: String,\n}\n\nimpl Args {\n    pub fn addr(&self) -> &str {\n        &self.addr\n    }\n}\n\n/// The number of bytes in one mebibyte.\n#[allow(non_upper_case_globals)]\nconst MiB: u64 = 1 << 20;\n\n/// Convert a quantity in bytes to a quantity in mebibytes, generally for display\n/// purposes. (Most calculations in this crate use bytes directly)\npub fn bytes_to_mebibytes(bytes: u64) -> f32 {\n    (bytes as f32) / (MiB as f32)\n}\n\npub fn get_total_system_memory() -> u64 {\n    System::new_with_specifics(RefreshKind::new().with_memory()).total_memory()\n}\n\n/// Global app state for the Axum server\n#[derive(Debug, Clone)]\npub struct ServerState {\n    /// Used to close old connections.\n    ///\n    /// When a new connection is made, we send a message signalling to the old\n    /// connection to close.\n    pub sender: broadcast::Sender<()>,\n\n    /// Used to cancel all spawned threads in the monitor.\n    pub token: CancellationToken,\n\n    // The CLI args\n    pub args: &'static Args,\n}\n\n/// Spawn a thread that may get cancelled by the provided [`CancellationToken`].\n///\n/// This is mainly meant to be called with futures that will be pending for a very\n/// long time, or are not mean to return. If it is not desirable for the future to\n/// ever resolve, such as in the case of [`cgroup::CgroupWatcher::watch`], the error can\n/// be logged with `f`.\npub fn spawn_with_cancel<T, F>(\n    token: CancellationToken,\n    f: F,\n    future: T,\n) -> JoinHandle<Option<T::Output>>\nwhere\n    T: Future + Send + 'static,\n    T::Output: Send + 'static,\n    F: FnOnce(&T::Output) + Send + 'static,\n{\n    tokio::spawn(async move {\n        tokio::select! {\n            _ = token.cancelled() => {\n                info!(\"received global kill signal\");\n                None\n            }\n            res = future => {\n                f(&res);\n                Some(res)\n            }\n        }\n    })\n}\n\n/// The entrypoint to the binary.\n///\n/// Set up tracing, parse arguments, and start an http server.\npub async fn start(args: &'static Args, token: CancellationToken) -> anyhow::Result<()> {\n    // This channel is used to close old connections. When a new connection is\n    // made, we send a message signalling to the old connection to close.\n    let (sender, _) = tokio::sync::broadcast::channel::<()>(1);\n\n    let app = Router::new()\n        // This route gets upgraded to a websocket connection. We only support\n        // one connection at a time, which we enforce by killing old connections\n        // when we receive a new one.\n        .route(\"/monitor\", get(ws_handler))\n        .with_state(ServerState {\n            sender,\n            token,\n            args,\n        });\n\n    let addr_str = args.addr();\n    let addr: SocketAddr = addr_str.parse().expect(\"parsing address should not fail\");\n\n    let listener = TcpListener::bind(&addr)\n        .await\n        .with_context(|| format!(\"failed to bind to {addr}\"))?;\n    info!(addr_str, \"server bound\");\n    axum::serve(listener, app.into_make_service())\n        .await\n        .context(\"server exited\")?;\n\n    Ok(())\n}\n\n/// Handles incoming websocket connections.\n///\n/// If we are already to connected to an agent, we kill that old connection\n/// and accept the new one.\n#[tracing::instrument(name = \"/monitor\", skip_all, fields(?args))]\npub async fn ws_handler(\n    ws: WebSocketUpgrade,\n    State(ServerState {\n        sender,\n        token,\n        args,\n    }): State<ServerState>,\n) -> Response {\n    // Kill the old monitor\n    info!(\"closing old connection if there is one\");\n    let _ = sender.send(());\n\n    // Start the new one. Wow, the cycle of death and rebirth\n    let closer = sender.subscribe();\n    ws.on_upgrade(|ws| start_monitor(ws, args, closer, token))\n}\n\n/// Starts the monitor. If startup fails or the monitor exits, an error will\n/// be logged and our internal state will be reset to allow for new connections.\n#[tracing::instrument(skip_all)]\nasync fn start_monitor(\n    ws: WebSocket,\n    args: &Args,\n    kill: broadcast::Receiver<()>,\n    token: CancellationToken,\n) {\n    info!(\n        ?args,\n        \"accepted new websocket connection -> starting monitor\"\n    );\n    let timeout = Duration::from_secs(4);\n    let monitor = tokio::time::timeout(\n        timeout,\n        Runner::new(Default::default(), args, ws, kill, token),\n    )\n    .await;\n    let mut monitor = match monitor {\n        Ok(Ok(monitor)) => monitor,\n        Ok(Err(e)) => {\n            error!(error = format_args!(\"{e:#}\"), \"failed to create monitor\");\n            return;\n        }\n        Err(_) => {\n            error!(?timeout, \"creating monitor timed out\");\n            return;\n        }\n    };\n    info!(\"connected to agent\");\n\n    match monitor.run().await {\n        Ok(()) => info!(\"monitor was killed due to new connection\"),\n        Err(e) => error!(\n            error = format_args!(\"{e:#}\"),\n            \"monitor terminated unexpectedly\"\n        ),\n    }\n}\n"
  },
  {
    "path": "libs/vm_monitor/src/protocol.rs",
    "content": "//! Types representing protocols and actual agent-monitor messages.\n//!\n//! The pervasive use of serde modifiers throughout this module is to ease\n//! serialization on the go side. Because go does not have enums (which model\n//! messages well), it is harder to model messages, and we accomodate that with\n//! serde.\n//!\n//! *Note*: the agent sends and receives messages in different ways.\n//!\n//! The agent serializes messages in the form and then sends them. The use\n//! of `#[serde(tag = \"type\", content = \"content\")]` allows us to use `Type`\n//! to determine how to deserialize `Content`.\n//! ```ignore\n//! struct {\n//!     Content any\n//!     Type    string\n//!     Id      uint64\n//! }\n//! ```\n//! and receives messages in the form:\n//! ```ignore\n//! struct {\n//!     {fields embedded}\n//!     Type string\n//!     Id   uint64\n//! }\n//! ```\n//! After reading the type field, the agent will decode the entire message\n//! again, this time into the correct type using the embedded fields.\n//! Because the agent cannot just extract the json contained in a certain field\n//! (it initially deserializes to `map[string]interface{}`), we keep the fields\n//! at the top level, so the entire piece of json can be deserialized into a struct,\n//! such as a `DownscaleResult`, with the `Type` and `Id` fields ignored.\n\nuse core::fmt;\nuse std::cmp;\n\nuse serde::de::Error;\nuse serde::{Deserialize, Serialize};\n\n/// A Message we send to the agent.\n#[derive(Serialize, Deserialize, Debug, Clone)]\npub struct OutboundMsg {\n    #[serde(flatten)]\n    pub(crate) inner: OutboundMsgKind,\n    pub(crate) id: usize,\n}\n\nimpl OutboundMsg {\n    pub fn new(inner: OutboundMsgKind, id: usize) -> Self {\n        Self { inner, id }\n    }\n}\n\n/// The different underlying message types we can send to the agent.\n#[derive(Serialize, Deserialize, Debug, Clone)]\n#[serde(tag = \"type\")]\npub enum OutboundMsgKind {\n    /// Indicates that the agent sent an invalid message, i.e, we couldn't\n    /// properly deserialize it.\n    InvalidMessage { error: String },\n    /// Indicates that we experienced an internal error while processing a message.\n    /// For example, if a cgroup operation fails while trying to handle an upscale,\n    /// we return `InternalError`.\n    InternalError { error: String },\n    /// Returned to the agent once we have finished handling an upscale. If the\n    /// handling was unsuccessful, an `InternalError` will get returned instead.\n    /// *Note*: this is a struct variant because of the way go serializes struct{}\n    UpscaleConfirmation {},\n    /// Indicates to the monitor that we are urgently requesting resources.\n    /// *Note*: this is a struct variant because of the way go serializes struct{}\n    UpscaleRequest {},\n    /// Returned to the agent once we have finished attempting to downscale. If\n    /// an error occured trying to do so, an `InternalError` will get returned instead.\n    /// However, if we are simply unsuccessful (for example, do to needing the resources),\n    /// that gets included in the `DownscaleResult`.\n    DownscaleResult {\n        // FIXME for the future (once the informant is deprecated)\n        // As of the time of writing, the agent/informant version of this struct is\n        // called api.DownscaleResult. This struct has uppercase fields which are\n        // serialized as such. Thus, we serialize using uppercase names so we don't\n        // have to make a breaking change to the agent<->informant protocol. Once\n        // the informant has been superseded by the monitor, we can add the correct\n        // struct tags to api.DownscaleResult without causing a breaking change,\n        // since we don't need to support the agent<->informant protocol anymore.\n        #[serde(rename = \"Ok\")]\n        ok: bool,\n        #[serde(rename = \"Status\")]\n        status: String,\n    },\n    /// Part of the bidirectional heartbeat. The heartbeat is initiated by the\n    /// agent.\n    /// *Note*: this is a struct variant because of the way go serializes struct{}\n    HealthCheck {},\n}\n\n/// A message received form the agent.\n#[derive(Serialize, Deserialize, Debug, Clone)]\npub struct InboundMsg {\n    #[serde(flatten)]\n    pub(crate) inner: InboundMsgKind,\n    pub(crate) id: usize,\n}\n\n/// The different underlying message types we can receive from the agent.\n#[derive(Serialize, Deserialize, Debug, Clone)]\n#[serde(tag = \"type\", content = \"content\")]\npub enum InboundMsgKind {\n    /// Indicates that the we sent an invalid message, i.e, we couldn't\n    /// properly deserialize it.\n    InvalidMessage { error: String },\n    /// Indicates that the informan experienced an internal error while processing\n    /// a message. For example, if it failed to request upsacle from the agent, it\n    /// would return an `InternalError`.\n    InternalError { error: String },\n    /// Indicates to us that we have been granted more resources. We should respond\n    /// with an `UpscaleConfirmation` when done handling the resources (increasins\n    /// file cache size, cgorup memory limits).\n    UpscaleNotification { granted: Resources },\n    /// A request to reduce resource usage. We should response with a `DownscaleResult`,\n    /// when done.\n    DownscaleRequest { target: Resources },\n    /// Part of the bidirectional heartbeat. The heartbeat is initiated by the\n    /// agent.\n    /// *Note*: this is a struct variant because of the way go serializes struct{}\n    HealthCheck {},\n}\n\n/// Represents the resources granted to a VM.\n#[derive(Serialize, Deserialize, Debug, Clone, Copy)]\n// Renamed because the agent has multiple resources types:\n// `Resources` (milliCPU/memory slots)\n// `Allocation` (vCPU/bytes) <- what we correspond to\n#[serde(rename(serialize = \"Allocation\", deserialize = \"Allocation\"))]\npub struct Resources {\n    /// Number of vCPUs\n    pub(crate) cpu: f64,\n    /// Bytes of memory\n    pub(crate) mem: u64,\n}\n\nimpl Resources {\n    pub fn new(cpu: f64, mem: u64) -> Self {\n        Self { cpu, mem }\n    }\n}\n\npub const PROTOCOL_MIN_VERSION: ProtocolVersion = ProtocolVersion::V1_0;\npub const PROTOCOL_MAX_VERSION: ProtocolVersion = ProtocolVersion::V1_0;\n\n#[derive(Debug, Clone, Copy, PartialEq, PartialOrd, Ord, Eq, Serialize, Deserialize)]\npub struct ProtocolVersion(u8);\n\nimpl ProtocolVersion {\n    /// Represents v1.0 of the agent<-> monitor protocol - the initial version\n    ///\n    /// Currently the latest version.\n    const V1_0: ProtocolVersion = ProtocolVersion(1);\n}\n\nimpl fmt::Display for ProtocolVersion {\n    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {\n        match *self {\n            ProtocolVersion(0) => f.write_str(\"<invalid: zero>\"),\n            ProtocolVersion::V1_0 => f.write_str(\"v1.0\"),\n            other => write!(f, \"<unknown: {other}>\"),\n        }\n    }\n}\n\n/// A set of protocol bounds that determines what we are speaking.\n///\n/// These bounds are inclusive.\n#[derive(Debug)]\npub struct ProtocolRange {\n    pub min: ProtocolVersion,\n    pub max: ProtocolVersion,\n}\n\n// Use a custom deserialize impl to ensure that `self.min <= self.max`\nimpl<'de> Deserialize<'de> for ProtocolRange {\n    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>\n    where\n        D: serde::Deserializer<'de>,\n    {\n        #[derive(Deserialize)]\n        struct InnerProtocolRange {\n            min: ProtocolVersion,\n            max: ProtocolVersion,\n        }\n        let InnerProtocolRange { min, max } = InnerProtocolRange::deserialize(deserializer)?;\n        if min > max {\n            Err(D::Error::custom(format!(\n                \"min version = {min} is greater than max version = {max}\",\n            )))\n        } else {\n            Ok(ProtocolRange { min, max })\n        }\n    }\n}\n\nimpl fmt::Display for ProtocolRange {\n    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {\n        if self.min == self.max {\n            f.write_fmt(format_args!(\"{}\", self.max))\n        } else {\n            f.write_fmt(format_args!(\"{} to {}\", self.min, self.max))\n        }\n    }\n}\n\nimpl ProtocolRange {\n    /// Find the highest shared version between two `ProtocolRange`'s\n    pub fn highest_shared_version(&self, other: &Self) -> anyhow::Result<ProtocolVersion> {\n        // We first have to make sure the ranges are overlapping. Once we know\n        // this, we can merge the ranges by taking the max of the mins and the\n        // mins of the maxes.\n        if self.min > other.max {\n            anyhow::bail!(\n                \"Non-overlapping bounds: other.max = {} was less than self.min = {}\",\n                other.max,\n                self.min,\n            )\n        } else if self.max < other.min {\n            anyhow::bail!(\n                \"Non-overlappinng bounds: self.max = {} was less than other.min = {}\",\n                self.max,\n                other.min\n            )\n        } else {\n            Ok(cmp::min(self.max, other.max))\n        }\n    }\n}\n\n/// We send this to the monitor after negotiating which protocol to use\n#[derive(Serialize, Debug)]\n#[serde(rename_all = \"camelCase\")]\npub enum ProtocolResponse {\n    Error(String),\n    Version(ProtocolVersion),\n}\n"
  },
  {
    "path": "libs/vm_monitor/src/runner.rs",
    "content": "//! Exposes the `Runner`, which handles messages received from agent and\n//! sends upscale requests.\n//!\n//! This is the \"Monitor\" part of the monitor binary and is the main entrypoint for\n//! all functionality.\n\nuse std::fmt::Debug;\nuse std::time::{Duration, Instant};\n\nuse anyhow::{Context, bail};\nuse axum::extract::ws::{Message, WebSocket};\nuse futures::StreamExt;\nuse tokio::sync::{broadcast, watch};\nuse tokio_util::sync::CancellationToken;\nuse tracing::{debug, error, info, warn};\n\nuse crate::cgroup::{self, CgroupWatcher};\nuse crate::dispatcher::Dispatcher;\nuse crate::filecache::{FileCacheConfig, FileCacheState};\nuse crate::protocol::{InboundMsg, InboundMsgKind, OutboundMsg, OutboundMsgKind, Resources};\nuse crate::{Args, MiB, bytes_to_mebibytes, get_total_system_memory, spawn_with_cancel};\n\n/// Central struct that interacts with agent, dispatcher, and cgroup to handle\n/// signals from the agent.\n#[derive(Debug)]\npub struct Runner {\n    config: Config,\n    filecache: Option<FileCacheState>,\n    cgroup: Option<CgroupState>,\n    dispatcher: Dispatcher,\n\n    /// We \"mint\" new message ids by incrementing this counter and taking the value.\n    ///\n    /// **Note**: This counter is always odd, so that we avoid collisions between the IDs generated\n    /// by us vs the autoscaler-agent.\n    counter: usize,\n\n    last_upscale_request_at: Option<Instant>,\n\n    /// A signal to kill the main thread produced by `self.run()`. This is triggered\n    /// when the server receives a new connection. When the thread receives the\n    /// signal off this channel, it will gracefully shutdown.\n    kill: broadcast::Receiver<()>,\n}\n\n#[derive(Debug)]\nstruct CgroupState {\n    watcher: watch::Receiver<(Instant, cgroup::MemoryHistory)>,\n    /// If [`cgroup::MemoryHistory::avg_non_reclaimable`] exceeds `threshold`, we send upscale\n    /// requests.\n    threshold: u64,\n}\n\n/// Configuration for a `Runner`\n#[derive(Debug)]\npub struct Config {\n    /// `sys_buffer_bytes` gives the estimated amount of memory, in bytes, that the kernel uses before\n    /// handing out the rest to userspace. This value is the estimated difference between the\n    /// *actual* physical memory and the amount reported by `grep MemTotal /proc/meminfo`.\n    ///\n    /// For more information, refer to `man 5 proc`, which defines MemTotal as \"Total usable RAM\n    /// (i.e., physical RAM minus a few reserved bits and the kernel binary code)\".\n    ///\n    /// We only use `sys_buffer_bytes` when calculating the system memory from the *external* memory\n    /// size, rather than the self-reported memory size, according to the kernel.\n    ///\n    /// TODO: this field is only necessary while we still have to trust the autoscaler-agent's\n    /// upscale resource amounts (because we might not *actually* have been upscaled yet). This field\n    /// should be removed once we have a better solution there.\n    sys_buffer_bytes: u64,\n\n    /// Minimum fraction of total system memory reserved *before* the cgroup threshold; in\n    /// other words, providing a ceiling for the highest value of the threshold by enforcing that\n    /// there's at least `cgroup_min_overhead_fraction` of the total memory remaining beyond the\n    /// threshold.\n    ///\n    /// For example, a value of `0.1` means that 10% of total memory must remain after exceeding\n    /// the threshold, so the value of the cgroup threshold would always be capped at 90% of total\n    /// memory.\n    ///\n    /// The default value of `0.15` means that we *guarantee* sending upscale requests if the\n    /// cgroup is using more than 85% of total memory.\n    cgroup_min_overhead_fraction: f64,\n\n    cgroup_downscale_threshold_buffer_bytes: u64,\n}\n\nimpl Default for Config {\n    fn default() -> Self {\n        Self {\n            sys_buffer_bytes: 100 * MiB,\n            cgroup_min_overhead_fraction: 0.15,\n            cgroup_downscale_threshold_buffer_bytes: 100 * MiB,\n        }\n    }\n}\n\nimpl Config {\n    fn cgroup_threshold(&self, total_mem: u64) -> u64 {\n        // We want our threshold to be met gracefully instead of letting postgres get OOM-killed\n        // (or if there's room, spilling to swap).\n        // So we guarantee that there's at least `cgroup_min_overhead_fraction` of total memory\n        // remaining above the threshold.\n        (total_mem as f64 * (1.0 - self.cgroup_min_overhead_fraction)) as u64\n    }\n}\n\nimpl Runner {\n    /// Create a new monitor.\n    #[tracing::instrument(skip_all, fields(?config, ?args))]\n    pub async fn new(\n        config: Config,\n        args: &Args,\n        ws: WebSocket,\n        kill: broadcast::Receiver<()>,\n        token: CancellationToken,\n    ) -> anyhow::Result<Runner> {\n        anyhow::ensure!(\n            config.sys_buffer_bytes != 0,\n            \"invalid monitor Config: sys_buffer_bytes cannot be 0\"\n        );\n\n        let dispatcher = Dispatcher::new(ws)\n            .await\n            .context(\"error creating new dispatcher\")?;\n\n        let mut state = Runner {\n            config,\n            filecache: None,\n            cgroup: None,\n            dispatcher,\n            counter: 1, // NB: must be odd, see the comment about the field for more.\n            last_upscale_request_at: None,\n            kill,\n        };\n\n        let mem = get_total_system_memory();\n\n        if let Some(connstr) = &args.pgconnstr {\n            info!(\"initializing file cache\");\n            let config = FileCacheConfig::default();\n\n            let mut file_cache = FileCacheState::new(connstr, config, token.clone())\n                .await\n                .context(\"failed to create file cache\")?;\n\n            let size = file_cache\n                .get_file_cache_size()\n                .await\n                .context(\"error getting file cache size\")?;\n\n            let new_size = file_cache.config.calculate_cache_size(mem);\n            info!(\n                initial = bytes_to_mebibytes(size),\n                new = bytes_to_mebibytes(new_size),\n                \"setting initial file cache size\",\n            );\n\n            // note: even if size == new_size, we want to explicitly set it, just\n            // to make sure that we have the permissions to do so\n            let actual_size = file_cache\n                .set_file_cache_size(new_size)\n                .await\n                .context(\"failed to set file cache size, possibly due to inadequate permissions\")?;\n            if actual_size != new_size {\n                info!(\"file cache size actually got set to {actual_size}\")\n            }\n\n            state.filecache = Some(file_cache);\n        }\n\n        if let Some(name) = &args.cgroup {\n            // Best not to set up cgroup stuff more than once, so we'll initialize cgroup state\n            // now, and then set limits later.\n            info!(\"initializing cgroup\");\n\n            let cgroup =\n                CgroupWatcher::new(name.clone()).context(\"failed to create cgroup manager\")?;\n\n            let init_value = cgroup::MemoryHistory {\n                avg_non_reclaimable: 0,\n                samples_count: 0,\n                samples_span: Duration::ZERO,\n            };\n            let (hist_tx, hist_rx) = watch::channel((Instant::now(), init_value));\n\n            spawn_with_cancel(token, |_| error!(\"cgroup watcher terminated\"), async move {\n                cgroup.watch(hist_tx).await\n            });\n\n            let threshold = state.config.cgroup_threshold(mem);\n            info!(threshold, \"set initial cgroup threshold\",);\n\n            state.cgroup = Some(CgroupState {\n                watcher: hist_rx,\n                threshold,\n            });\n        }\n\n        Ok(state)\n    }\n\n    /// Attempt to downscale filecache + cgroup\n    #[tracing::instrument(skip_all, fields(?target))]\n    pub async fn try_downscale(&mut self, target: Resources) -> anyhow::Result<(bool, String)> {\n        // Nothing to adjust\n        if self.cgroup.is_none() && self.filecache.is_none() {\n            info!(\"no action needed for downscale (no cgroup or file cache enabled)\");\n            return Ok((\n                true,\n                \"monitor is not managing cgroup or file cache\".to_string(),\n            ));\n        }\n\n        let requested_mem = target.mem;\n        let usable_system_memory = requested_mem.saturating_sub(self.config.sys_buffer_bytes);\n        let expected_file_cache_size = self\n            .filecache\n            .as_ref()\n            .map(|file_cache| file_cache.config.calculate_cache_size(usable_system_memory))\n            .unwrap_or(0);\n        if let Some(cgroup) = &self.cgroup {\n            let (last_time, last_history) = *cgroup.watcher.borrow();\n\n            // NB: The ordering of these conditions is intentional. During startup, we should deny\n            // downscaling until we have enough information to determine that it's safe to do so\n            // (i.e. enough samples have come in). But if it's been a while and we *still* haven't\n            // received any information, we should *fail* instead of just denying downscaling.\n            //\n            // `last_time` is set to `Instant::now()` on startup, so checking `last_time.elapsed()`\n            // serves double-duty: it trips if we haven't received *any* metrics for long enough,\n            // OR if we haven't received metrics *recently enough*.\n            //\n            // TODO: make the duration here configurable.\n            if last_time.elapsed() > Duration::from_secs(5) {\n                bail!(\n                    \"haven't gotten cgroup memory stats recently enough to determine downscaling information\"\n                );\n            } else if last_history.samples_count <= 1 {\n                let status = \"haven't received enough cgroup memory stats yet\";\n                info!(status, \"discontinuing downscale\");\n                return Ok((false, status.to_owned()));\n            }\n\n            let new_threshold = self.config.cgroup_threshold(usable_system_memory);\n\n            let current = last_history.avg_non_reclaimable;\n\n            if new_threshold < current + self.config.cgroup_downscale_threshold_buffer_bytes {\n                let status = format!(\n                    \"{}: {} MiB (new threshold) < {} (current usage) + {} (downscale buffer)\",\n                    \"calculated memory threshold too low\",\n                    bytes_to_mebibytes(new_threshold),\n                    bytes_to_mebibytes(current),\n                    bytes_to_mebibytes(self.config.cgroup_downscale_threshold_buffer_bytes)\n                );\n\n                info!(status, \"discontinuing downscale\");\n\n                return Ok((false, status));\n            }\n        }\n\n        // The downscaling has been approved. Downscale the file cache, then the cgroup.\n        let mut status = vec![];\n        if let Some(file_cache) = &mut self.filecache {\n            let actual_usage = file_cache\n                .set_file_cache_size(expected_file_cache_size)\n                .await\n                .context(\"failed to set file cache size\")?;\n            let message = format!(\n                \"set file cache size to {} MiB\",\n                bytes_to_mebibytes(actual_usage),\n            );\n            info!(\"downscale: {message}\");\n            status.push(message);\n        }\n\n        if let Some(cgroup) = &mut self.cgroup {\n            let new_threshold = self.config.cgroup_threshold(usable_system_memory);\n\n            let message = format!(\n                \"set cgroup memory threshold from {} MiB to {} MiB, of new total {} MiB\",\n                bytes_to_mebibytes(cgroup.threshold),\n                bytes_to_mebibytes(new_threshold),\n                bytes_to_mebibytes(usable_system_memory)\n            );\n            cgroup.threshold = new_threshold;\n            info!(\"downscale: {message}\");\n            status.push(message);\n        }\n\n        // TODO: make this status thing less jank\n        let status = status.join(\"; \");\n        Ok((true, status))\n    }\n\n    /// Handle new resources\n    #[tracing::instrument(skip_all, fields(?resources))]\n    pub async fn handle_upscale(&mut self, resources: Resources) -> anyhow::Result<()> {\n        if self.filecache.is_none() && self.cgroup.is_none() {\n            info!(\"no action needed for upscale (no cgroup or file cache enabled)\");\n            return Ok(());\n        }\n\n        let new_mem = resources.mem;\n        let usable_system_memory = new_mem.saturating_sub(self.config.sys_buffer_bytes);\n\n        if let Some(file_cache) = &mut self.filecache {\n            let expected_usage = file_cache.config.calculate_cache_size(usable_system_memory);\n            info!(\n                target = bytes_to_mebibytes(expected_usage),\n                total = bytes_to_mebibytes(new_mem),\n                \"updating file cache size\",\n            );\n\n            let actual_usage = file_cache\n                .set_file_cache_size(expected_usage)\n                .await\n                .context(\"failed to set file cache size\")?;\n\n            if actual_usage != expected_usage {\n                warn!(\n                    \"file cache was set to a different size that we wanted: target = {} Mib, actual= {} Mib\",\n                    bytes_to_mebibytes(expected_usage),\n                    bytes_to_mebibytes(actual_usage)\n                )\n            }\n        }\n\n        if let Some(cgroup) = &mut self.cgroup {\n            let new_threshold = self.config.cgroup_threshold(usable_system_memory);\n\n            info!(\n                \"set cgroup memory threshold from {} MiB to {} MiB of new total {} MiB\",\n                bytes_to_mebibytes(cgroup.threshold),\n                bytes_to_mebibytes(new_threshold),\n                bytes_to_mebibytes(usable_system_memory)\n            );\n            cgroup.threshold = new_threshold;\n        }\n\n        Ok(())\n    }\n\n    /// Take in a message and perform some action, such as downscaling or upscaling,\n    /// and return a message to be send back.\n    #[tracing::instrument(skip_all, fields(%id, message = ?inner))]\n    pub async fn process_message(\n        &mut self,\n        InboundMsg { inner, id }: InboundMsg,\n    ) -> anyhow::Result<Option<OutboundMsg>> {\n        match inner {\n            InboundMsgKind::UpscaleNotification { granted } => {\n                self.handle_upscale(granted)\n                    .await\n                    .context(\"failed to handle upscale\")?;\n                Ok(Some(OutboundMsg::new(\n                    OutboundMsgKind::UpscaleConfirmation {},\n                    id,\n                )))\n            }\n            InboundMsgKind::DownscaleRequest { target } => self\n                .try_downscale(target)\n                .await\n                .context(\"failed to downscale\")\n                .map(|(ok, status)| {\n                    Some(OutboundMsg::new(\n                        OutboundMsgKind::DownscaleResult { ok, status },\n                        id,\n                    ))\n                }),\n            InboundMsgKind::InvalidMessage { error } => {\n                warn!(\n                    error = format_args!(\"{error:#}\"),\n                    id, \"received notification of an invalid message we sent\"\n                );\n                Ok(None)\n            }\n            InboundMsgKind::InternalError { error } => {\n                warn!(\n                    error = format_args!(\"{error:#}\"),\n                    id, \"agent experienced an internal error\"\n                );\n                Ok(None)\n            }\n            InboundMsgKind::HealthCheck {} => {\n                Ok(Some(OutboundMsg::new(OutboundMsgKind::HealthCheck {}, id)))\n            }\n        }\n    }\n\n    // TODO: don't propagate errors, probably just warn!?\n    #[tracing::instrument(skip_all)]\n    pub async fn run(&mut self) -> anyhow::Result<()> {\n        info!(\"starting dispatcher\");\n        loop {\n            tokio::select! {\n                signal = self.kill.recv() => {\n                    match signal {\n                        Ok(()) => return Ok(()),\n                        Err(e) => bail!(\"failed to receive kill signal: {e}\")\n                    }\n                }\n\n                // New memory stats from the cgroup, *may* need to request upscaling, if we've\n                // exceeded the threshold\n                result = self.cgroup.as_mut().unwrap().watcher.changed(), if self.cgroup.is_some() => {\n                    result.context(\"failed to receive from cgroup memory stats watcher\")?;\n\n                    let cgroup = self.cgroup.as_ref().unwrap();\n\n                    let (_time, cgroup_mem_stat) = *cgroup.watcher.borrow();\n\n                    // If we haven't exceeded the threshold, then we're all ok\n                    if cgroup_mem_stat.avg_non_reclaimable < cgroup.threshold {\n                        continue;\n                    }\n\n                    // Otherwise, we generally want upscaling. But, if it's been less than 1 second\n                    // since the last time we requested upscaling, ignore the event, to avoid\n                    // spamming the agent.\n                    if let Some(t) = self.last_upscale_request_at {\n                        let elapsed = t.elapsed();\n                        if elapsed < Duration::from_secs(1) {\n                            // *Ideally* we'd like to log here that we're ignoring the fact the\n                            // memory stats are too high, but in practice this can result in\n                            // spamming the logs with repetitive messages about ignoring the signal\n                            //\n                            // See https://github.com/neondatabase/neon/issues/5865 for more.\n                            continue;\n                        }\n                    }\n\n                    self.last_upscale_request_at = Some(Instant::now());\n\n                    info!(\n                        avg_non_reclaimable = bytes_to_mebibytes(cgroup_mem_stat.avg_non_reclaimable),\n                        threshold = bytes_to_mebibytes(cgroup.threshold),\n                        \"cgroup memory stats are high enough to upscale, requesting upscale\",\n                    );\n\n                    self.counter += 2; // Increment, preserving parity (i.e. keep the\n                                       // counter odd). See the field comment for more.\n                    self.dispatcher\n                        .send(OutboundMsg::new(OutboundMsgKind::UpscaleRequest {}, self.counter))\n                        .await\n                        .context(\"failed to send message\")?;\n                },\n\n                // there is a message from the agent\n                msg = self.dispatcher.source.next() => {\n                    if let Some(msg) = msg {\n                        match &msg {\n                            Ok(msg) => {\n                                let message: InboundMsg = match msg {\n                                    Message::Text(text) => {\n                                        serde_json::from_str(text).context(\"failed to deserialize text message\")?\n                                    }\n                                    other => {\n                                        warn!(\n                                            // Don't use 'message' as a key as the\n                                            // string also uses that for its key\n                                            msg = ?other,\n                                            \"problem processing incoming message: agent should only send text messages but received different type\"\n                                        );\n                                        continue\n                                    },\n                                };\n\n                                if matches!(&message.inner, InboundMsgKind::HealthCheck { .. }) {\n                                    debug!(?msg, \"received message\");\n                                } else {\n                                    info!(?msg, \"received message\");\n                                }\n\n                                let out = match self.process_message(message.clone()).await {\n                                    Ok(Some(out)) => out,\n                                    Ok(None) => continue,\n                                    Err(e) => {\n                                        // use {:#} for our logging because the display impl only\n                                        // gives the outermost cause, and the debug impl\n                                        // pretty-prints the error, whereas {:#} contains all the\n                                        // causes, but is compact (no newlines).\n                                        warn!(error = format_args!(\"{e:#}\"), \"error handling message\");\n                                        OutboundMsg::new(\n                                            OutboundMsgKind::InternalError {\n                                                error: e.to_string(),\n                                            },\n                                            message.id\n                                        )\n                                    }\n                                };\n\n                                self.dispatcher\n                                    .send(out)\n                                    .await\n                                    .context(\"failed to send message\")?;\n                            }\n                            Err(e) => warn!(\n                                error = format_args!(\"{e:#}\"),\n                                msg = ?msg,\n                                \"received error message\"\n                            ),\n                        }\n                    } else {\n                        anyhow::bail!(\"dispatcher connection closed\")\n                    }\n                }\n            }\n        }\n    }\n}\n"
  },
  {
    "path": "libs/wal_decoder/Cargo.toml",
    "content": "[package]\nname = \"wal_decoder\"\nversion = \"0.1.0\"\nedition.workspace = true\nlicense.workspace = true\n\n[features]\ntesting = [\"pageserver_api/testing\"]\n\n[dependencies]\nasync-compression.workspace = true\nanyhow.workspace = true\nbytes.workspace = true\npageserver_api.workspace = true\nprost.workspace = true\npostgres_ffi.workspace = true\npostgres_ffi_types.workspace = true\nserde.workspace = true\nthiserror.workspace = true\ntokio = { workspace = true, features = [\"io-util\"] }\ntracing.workspace = true\nutils.workspace = true\nworkspace_hack = { version = \"0.1\", path = \"../../workspace_hack\" }\n\n[build-dependencies]\ntonic-build.workspace = true\n\n[dev-dependencies]\ncriterion.workspace = true\ncamino.workspace = true\ncamino-tempfile.workspace = true\nremote_storage.workspace = true\ntokio-util.workspace = true\nserde_json.workspace = true\nfutures.workspace = true\ntikv-jemallocator.workspace = true\npprof.workspace = true\n\n[[bench]]\nname = \"bench_interpret_wal\"\nharness = false\n"
  },
  {
    "path": "libs/wal_decoder/benches/README.md",
    "content": "## WAL Decoding and Interpretation Benchmarks\n\nNote that these benchmarks pull WAL from a public bucket in S3\nas a preparation step. Hence, you need a way to auth with AWS.\nYou can achieve this by copying the `~/.aws/config` file from\nthe AWS SSO notion page and exporting `AWS_PROFILE=dev` when invoking\nthe benchmarks.\n\nTo run benchmarks:\n\n```sh\naws sso login --profile dev\n\n# All benchmarks.\nAWS_PROFILE=dev cargo bench --package wal_decoder\n\n# Specific file.\nAWS_PROFILE=dev cargo bench --package wal_decoder --bench bench_interpret_wal\n\n# Specific benchmark.\nAWS_PROFILE=dev cargo bench --package wal_decoder --bench bench_interpret_wal unsharded\n\n# List available benchmarks.\ncargo bench --package wal_decoder --benches -- --list\n\n# Generate flamegraph profiles using pprof-rs, profiling for 10 seconds.\n# Output in target/criterion/*/profile/flamegraph.svg.\nAWS_PROFILE=dev cargo bench --package wal_decoder --bench bench_interpret_wal unsharded -- --profile-time 10\n```\n\nAdditional charts and statistics are available in `target/criterion/report/index.html`.\n\nBenchmarks are automatically compared against the previous run. To compare against other runs, see\n`--baseline` and `--save-baseline`.\n"
  },
  {
    "path": "libs/wal_decoder/benches/bench_interpret_wal.rs",
    "content": "use std::env;\nuse std::num::NonZeroUsize;\nuse std::sync::Arc;\n\nuse anyhow::Context;\nuse camino::{Utf8Path, Utf8PathBuf};\nuse camino_tempfile::Utf8TempDir;\nuse criterion::{Criterion, criterion_group, criterion_main};\nuse futures::StreamExt;\nuse futures::stream::FuturesUnordered;\nuse pageserver_api::shard::{ShardIdentity, ShardStripeSize};\nuse postgres_ffi::waldecoder::WalStreamDecoder;\nuse postgres_ffi::{MAX_SEND_SIZE, PgMajorVersion, WAL_SEGMENT_SIZE};\nuse pprof::criterion::{Output, PProfProfiler};\nuse remote_storage::{\n    DownloadOpts, GenericRemoteStorage, ListingMode, RemoteStorageConfig, RemoteStorageKind,\n    S3Config,\n};\nuse serde::Deserialize;\nuse tokio_util::sync::CancellationToken;\nuse utils::lsn::Lsn;\nuse utils::shard::{ShardCount, ShardNumber};\nuse wal_decoder::models::InterpretedWalRecord;\n\nconst S3_BUCKET: &str = \"neon-github-public-dev\";\nconst S3_REGION: &str = \"eu-central-1\";\nconst BUCKET_PREFIX: &str = \"wal-snapshots/bulk-insert/\";\nconst METADATA_FILENAME: &str = \"metadata.json\";\n\n/// Use jemalloc, and configure it to sample allocations for profiles every 1 MB.\n/// This mirrors the configuration in bin/safekeeper.rs.\n#[global_allocator]\nstatic GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;\n\n#[allow(non_upper_case_globals)]\n#[unsafe(export_name = \"malloc_conf\")]\npub static malloc_conf: &[u8] = b\"prof:true,prof_active:true,lg_prof_sample:20\\0\";\n\nasync fn create_s3_client() -> anyhow::Result<Arc<GenericRemoteStorage>> {\n    let remote_storage_config = RemoteStorageConfig {\n        storage: RemoteStorageKind::AwsS3(S3Config {\n            bucket_name: S3_BUCKET.to_string(),\n            bucket_region: S3_REGION.to_string(),\n            prefix_in_bucket: Some(BUCKET_PREFIX.to_string()),\n            endpoint: None,\n            concurrency_limit: NonZeroUsize::new(100).unwrap(),\n            max_keys_per_list_response: None,\n            upload_storage_class: None,\n        }),\n        timeout: RemoteStorageConfig::DEFAULT_TIMEOUT,\n        small_timeout: RemoteStorageConfig::DEFAULT_SMALL_TIMEOUT,\n    };\n    Ok(Arc::new(\n        GenericRemoteStorage::from_config(&remote_storage_config)\n            .await\n            .context(\"remote storage init\")?,\n    ))\n}\n\nasync fn download_bench_data(\n    client: Arc<GenericRemoteStorage>,\n    cancel: &CancellationToken,\n) -> anyhow::Result<Utf8TempDir> {\n    let temp_dir_parent: Utf8PathBuf = env::current_dir().unwrap().try_into()?;\n    let temp_dir = camino_tempfile::tempdir_in(temp_dir_parent)?;\n\n    eprintln!(\"Downloading benchmark data to {temp_dir:?}\");\n\n    let listing = client\n        .list(None, ListingMode::NoDelimiter, None, cancel)\n        .await?;\n\n    let mut downloads = listing\n        .keys\n        .into_iter()\n        .map(|obj| {\n            let client = client.clone();\n            let temp_dir_path = temp_dir.path().to_owned();\n\n            async move {\n                let remote_path = obj.key;\n                let download = client\n                    .download(&remote_path, &DownloadOpts::default(), cancel)\n                    .await?;\n                let mut body = tokio_util::io::StreamReader::new(download.download_stream);\n\n                let file_name = remote_path.object_name().unwrap();\n                let file_path = temp_dir_path.join(file_name);\n                let file = tokio::fs::OpenOptions::new()\n                    .create(true)\n                    .truncate(true)\n                    .write(true)\n                    .open(&file_path)\n                    .await?;\n\n                let mut writer = tokio::io::BufWriter::new(file);\n                tokio::io::copy_buf(&mut body, &mut writer).await?;\n\n                Ok::<(), anyhow::Error>(())\n            }\n        })\n        .collect::<FuturesUnordered<_>>();\n\n    while let Some(download) = downloads.next().await {\n        download?;\n    }\n\n    Ok(temp_dir)\n}\n\nstruct BenchmarkData {\n    wal: Vec<u8>,\n    meta: BenchmarkMetadata,\n}\n\n#[derive(Deserialize)]\nstruct BenchmarkMetadata {\n    pg_version: PgMajorVersion,\n    start_lsn: Lsn,\n}\n\nasync fn load_bench_data(path: &Utf8Path, input_size: usize) -> anyhow::Result<BenchmarkData> {\n    eprintln!(\"Loading benchmark data from {path:?}\");\n\n    let mut entries = tokio::fs::read_dir(path).await?;\n    let mut ordered_segment_paths = Vec::new();\n    let mut metadata = None;\n\n    while let Some(entry) = entries.next_entry().await? {\n        if entry.file_name() == METADATA_FILENAME {\n            let bytes = tokio::fs::read(entry.path()).await?;\n            metadata = Some(\n                serde_json::from_slice::<BenchmarkMetadata>(&bytes)\n                    .context(\"failed to deserialize metadata.json\")?,\n            );\n        } else {\n            ordered_segment_paths.push(entry.path());\n        }\n    }\n\n    ordered_segment_paths.sort();\n\n    let mut buffer = Vec::new();\n    for path in ordered_segment_paths {\n        if buffer.len() >= input_size {\n            break;\n        }\n\n        use async_compression::tokio::bufread::ZstdDecoder;\n        let file = tokio::fs::File::open(path).await?;\n        let reader = tokio::io::BufReader::new(file);\n        let decoder = ZstdDecoder::new(reader);\n        let mut reader = tokio::io::BufReader::new(decoder);\n        tokio::io::copy_buf(&mut reader, &mut buffer).await?;\n    }\n\n    buffer.truncate(input_size);\n\n    Ok(BenchmarkData {\n        wal: buffer,\n        meta: metadata.unwrap(),\n    })\n}\n\nfn criterion_benchmark(c: &mut Criterion) {\n    const INPUT_SIZE: usize = 128 * 1024 * 1024;\n\n    let setup_runtime = tokio::runtime::Builder::new_current_thread()\n        .enable_all()\n        .build()\n        .unwrap();\n\n    let (_temp_dir, bench_data) = setup_runtime.block_on(async move {\n        let cancel = CancellationToken::new();\n        let client = create_s3_client().await.unwrap();\n        let temp_dir = download_bench_data(client, &cancel).await.unwrap();\n        let bench_data = load_bench_data(temp_dir.path(), INPUT_SIZE).await.unwrap();\n\n        (temp_dir, bench_data)\n    });\n\n    eprintln!(\n        \"Benchmarking against {} MiB of WAL\",\n        INPUT_SIZE / 1024 / 1024\n    );\n\n    let mut group = c.benchmark_group(\"decode-interpret-wal\");\n    group.throughput(criterion::Throughput::Bytes(bench_data.wal.len() as u64));\n    group.sample_size(10);\n\n    group.bench_function(\"unsharded\", |b| {\n        b.iter(|| decode_interpret_main(&bench_data, &[ShardIdentity::unsharded()]))\n    });\n\n    let eight_shards = (0..8)\n        .map(|i| ShardIdentity::new(ShardNumber(i), ShardCount(8), ShardStripeSize(8)).unwrap())\n        .collect::<Vec<_>>();\n\n    group.bench_function(\"8/8-shards\", |b| {\n        b.iter(|| decode_interpret_main(&bench_data, &eight_shards))\n    });\n\n    let four_shards = eight_shards\n        .into_iter()\n        .filter(|s| s.number.0 % 2 == 0)\n        .collect::<Vec<_>>();\n    group.bench_function(\"4/8-shards\", |b| {\n        b.iter(|| decode_interpret_main(&bench_data, &four_shards))\n    });\n\n    let two_shards = four_shards\n        .into_iter()\n        .filter(|s| s.number.0 % 4 == 0)\n        .collect::<Vec<_>>();\n    group.bench_function(\"2/8-shards\", |b| {\n        b.iter(|| decode_interpret_main(&bench_data, &two_shards))\n    });\n}\n\nfn decode_interpret_main(bench: &BenchmarkData, shards: &[ShardIdentity]) {\n    let r = decode_interpret(bench, shards);\n    if let Err(e) = r {\n        panic!(\"{e:?}\");\n    }\n}\n\nfn decode_interpret(bench: &BenchmarkData, shard: &[ShardIdentity]) -> anyhow::Result<()> {\n    let mut decoder = WalStreamDecoder::new(bench.meta.start_lsn, bench.meta.pg_version);\n    let xlogoff: usize = bench.meta.start_lsn.segment_offset(WAL_SEGMENT_SIZE);\n\n    for chunk in bench.wal[xlogoff..].chunks(MAX_SEND_SIZE) {\n        decoder.feed_bytes(chunk);\n        while let Some((lsn, recdata)) = decoder.poll_decode().unwrap() {\n            assert!(lsn.is_aligned());\n            let _ = InterpretedWalRecord::from_bytes_filtered(\n                recdata,\n                shard,\n                lsn,\n                bench.meta.pg_version,\n            )\n            .unwrap();\n        }\n    }\n\n    Ok(())\n}\ncriterion_group!(\n    name=benches;\n    config=Criterion::default().with_profiler(PProfProfiler::new(100, Output::Flamegraph(None)));\n    targets=criterion_benchmark\n);\ncriterion_main!(benches);\n"
  },
  {
    "path": "libs/wal_decoder/build.rs",
    "content": "fn main() -> Result<(), Box<dyn std::error::Error>> {\n    // Generate rust code from .proto protobuf.\n    //\n    // Note: we previously tried to use deterministic location at proto/ for\n    // easy location, but apparently interference with cachepot sometimes fails\n    // the build then. Anyway, per cargo docs build script shouldn't output to\n    // anywhere but $OUT_DIR.\n    tonic_build::compile_protos(\"proto/interpreted_wal.proto\")\n        .unwrap_or_else(|e| panic!(\"failed to compile protos {e:?}\"));\n    Ok(())\n}\n"
  },
  {
    "path": "libs/wal_decoder/proto/interpreted_wal.proto",
    "content": "syntax = \"proto3\";\n\npackage interpreted_wal;\n\nmessage InterpretedWalRecords {\n  repeated InterpretedWalRecord records = 1;\n  optional uint64 next_record_lsn = 2;\n  optional uint64 raw_wal_start_lsn = 3;\n}\n\nmessage InterpretedWalRecord {\n  optional bytes metadata_record = 1;\n  SerializedValueBatch batch = 2;\n  uint64 next_record_lsn = 3;\n  bool flush_uncommitted = 4;\n  uint32 xid = 5;\n}\n\nmessage SerializedValueBatch {\n  bytes raw = 1;\n  repeated ValueMeta metadata = 2;\n  uint64 max_lsn = 3;\n  uint64 len = 4;\n}\n\nenum ValueMetaType {\n  Serialized = 0;\n  Observed = 1;\n}\n\nmessage ValueMeta {\n  ValueMetaType type = 1;\n  CompactKey key = 2;\n  uint64 lsn = 3;\n  optional uint64 batch_offset = 4;\n  optional uint64 len = 5;\n  optional bool will_init = 6;\n}\n\nmessage CompactKey {\n  uint64 high = 1;\n  uint64 low = 2;\n}\n\n"
  },
  {
    "path": "libs/wal_decoder/src/decoder.rs",
    "content": "//! This module contains logic for decoding and interpreting\n//! raw bytes which represent a raw Postgres WAL record.\n\nuse std::collections::HashMap;\n\nuse bytes::{Buf, Bytes};\nuse pageserver_api::key::rel_block_to_key;\nuse pageserver_api::reltag::{RelTag, SlruKind};\nuse pageserver_api::shard::ShardIdentity;\nuse postgres_ffi::walrecord::*;\nuse postgres_ffi::{PgMajorVersion, pg_constants};\nuse postgres_ffi_types::forknum::VISIBILITYMAP_FORKNUM;\nuse utils::lsn::Lsn;\n\nuse crate::models::*;\nuse crate::serialized_batch::SerializedValueBatch;\n\nimpl InterpretedWalRecord {\n    /// Decode and interpreted raw bytes which represent one Postgres WAL record.\n    /// Data blocks which do not match any of the provided shard identities are filtered out.\n    /// Shard 0 is a special case since it tracks all relation sizes. We only give it\n    /// the keys that are being written as that is enough for updating relation sizes.\n    pub fn from_bytes_filtered(\n        buf: Bytes,\n        shards: &[ShardIdentity],\n        next_record_lsn: Lsn,\n        pg_version: PgMajorVersion,\n    ) -> anyhow::Result<HashMap<ShardIdentity, InterpretedWalRecord>> {\n        let mut decoded = DecodedWALRecord::default();\n        decode_wal_record(buf, &mut decoded, pg_version)?;\n        let xid = decoded.xl_xid;\n\n        let flush_uncommitted = if decoded.is_dbase_create_copy(pg_version) {\n            FlushUncommittedRecords::Yes\n        } else {\n            FlushUncommittedRecords::No\n        };\n\n        let mut shard_records: HashMap<ShardIdentity, InterpretedWalRecord> =\n            HashMap::with_capacity(shards.len());\n        for shard in shards {\n            shard_records.insert(\n                *shard,\n                InterpretedWalRecord {\n                    metadata_record: None,\n                    batch: SerializedValueBatch::default(),\n                    next_record_lsn,\n                    flush_uncommitted,\n                    xid,\n                },\n            );\n        }\n\n        MetadataRecord::from_decoded_filtered(\n            &decoded,\n            &mut shard_records,\n            next_record_lsn,\n            pg_version,\n        )?;\n        SerializedValueBatch::from_decoded_filtered(\n            decoded,\n            &mut shard_records,\n            next_record_lsn,\n            pg_version,\n        )?;\n\n        Ok(shard_records)\n    }\n}\n\nimpl MetadataRecord {\n    /// Populates the given `shard_records` with metadata records from this WAL record, if any,\n    /// discarding those belonging to other shards.\n    ///\n    /// Only metadata records relevant for the given shards is emitted. Currently, most metadata\n    /// records are broadcast to all shards for simplicity, but this should be improved.\n    fn from_decoded_filtered(\n        decoded: &DecodedWALRecord,\n        shard_records: &mut HashMap<ShardIdentity, InterpretedWalRecord>,\n        next_record_lsn: Lsn,\n        pg_version: PgMajorVersion,\n    ) -> anyhow::Result<()> {\n        // Note: this doesn't actually copy the bytes since\n        // the [`Bytes`] type implements it via a level of indirection.\n        let mut buf = decoded.record.clone();\n        buf.advance(decoded.main_data_offset);\n\n        // First, generate metadata records from the decoded WAL record.\n        let metadata_record = match decoded.xl_rmid {\n            pg_constants::RM_HEAP_ID | pg_constants::RM_HEAP2_ID => {\n                Self::decode_heapam_record(&mut buf, decoded, pg_version)?\n            }\n            pg_constants::RM_NEON_ID => Self::decode_neonmgr_record(&mut buf, decoded, pg_version)?,\n            // Handle other special record types\n            pg_constants::RM_SMGR_ID => Self::decode_smgr_record(&mut buf, decoded)?,\n            pg_constants::RM_DBASE_ID => Self::decode_dbase_record(&mut buf, decoded, pg_version)?,\n            pg_constants::RM_TBLSPC_ID => {\n                tracing::trace!(\"XLOG_TBLSPC_CREATE/DROP is not handled yet\");\n                None\n            }\n            pg_constants::RM_CLOG_ID => Self::decode_clog_record(&mut buf, decoded, pg_version)?,\n            pg_constants::RM_XACT_ID => {\n                Self::decode_xact_record(&mut buf, decoded, next_record_lsn)?\n            }\n            pg_constants::RM_MULTIXACT_ID => {\n                Self::decode_multixact_record(&mut buf, decoded, pg_version)?\n            }\n            pg_constants::RM_RELMAP_ID => Self::decode_relmap_record(&mut buf, decoded)?,\n            // This is an odd duck. It needs to go to all shards.\n            // Since it uses the checkpoint image (that's initialized from CHECKPOINT_KEY\n            // in WalIngest::new), we have to send the whole DecodedWalRecord::record to\n            // the pageserver and decode it there.\n            //\n            // Alternatively, one can make the checkpoint part of the subscription protocol\n            // to the pageserver. This should work fine, but can be done at a later point.\n            pg_constants::RM_XLOG_ID => {\n                Self::decode_xlog_record(&mut buf, decoded, next_record_lsn)?\n            }\n            pg_constants::RM_LOGICALMSG_ID => {\n                Self::decode_logical_message_record(&mut buf, decoded)?\n            }\n            pg_constants::RM_STANDBY_ID => Self::decode_standby_record(&mut buf, decoded)?,\n            pg_constants::RM_REPLORIGIN_ID => Self::decode_replorigin_record(&mut buf, decoded)?,\n            _unexpected => {\n                // TODO: consider failing here instead of blindly doing something without\n                // understanding the protocol\n                None\n            }\n        };\n\n        // Next, filter the metadata record by shard.\n        for (shard, record) in shard_records.iter_mut() {\n            match metadata_record {\n                Some(\n                    MetadataRecord::Heapam(HeapamRecord::ClearVmBits(ref clear_vm_bits))\n                    | MetadataRecord::Neonrmgr(NeonrmgrRecord::ClearVmBits(ref clear_vm_bits)),\n                ) => {\n                    // Route VM page updates to the shards that own them. VM pages are stored in the VM fork\n                    // of the main relation. These are sharded and managed just like regular relation pages.\n                    // See: https://github.com/neondatabase/neon/issues/9855\n                    let is_local_vm_page = |heap_blk| {\n                        let vm_blk = pg_constants::HEAPBLK_TO_MAPBLOCK(heap_blk);\n                        shard.is_key_local(&rel_block_to_key(clear_vm_bits.vm_rel, vm_blk))\n                    };\n                    // Send the old and new VM page updates to their respective shards.\n                    let updated_old_heap_blkno = clear_vm_bits\n                        .old_heap_blkno\n                        .filter(|&blkno| is_local_vm_page(blkno));\n                    let updated_new_heap_blkno = clear_vm_bits\n                        .new_heap_blkno\n                        .filter(|&blkno| is_local_vm_page(blkno));\n                    // If neither VM page belongs to this shard, discard the record.\n                    if updated_old_heap_blkno.is_some() || updated_new_heap_blkno.is_some() {\n                        // Clone the record and update it for the current shard.\n                        let mut for_shard = metadata_record.clone();\n                        match for_shard {\n                            Some(\n                                MetadataRecord::Heapam(HeapamRecord::ClearVmBits(\n                                    ref mut clear_vm_bits,\n                                ))\n                                | MetadataRecord::Neonrmgr(NeonrmgrRecord::ClearVmBits(\n                                    ref mut clear_vm_bits,\n                                )),\n                            ) => {\n                                clear_vm_bits.old_heap_blkno = updated_old_heap_blkno;\n                                clear_vm_bits.new_heap_blkno = updated_new_heap_blkno;\n                                record.metadata_record = for_shard;\n                            }\n                            _ => {\n                                unreachable!(\"for_shard is a clone of what we checked above\")\n                            }\n                        }\n                    }\n                }\n                Some(MetadataRecord::LogicalMessage(LogicalMessageRecord::Put(_))) => {\n                    // Filter LogicalMessage records (AUX files) to only be stored on shard zero\n                    if shard.is_shard_zero() {\n                        record.metadata_record = metadata_record;\n                        // No other shards should receive this record, so we stop traversing shards early.\n                        break;\n                    }\n                }\n                _ => {\n                    // All other metadata records are sent to all shards.\n                    record.metadata_record = metadata_record.clone();\n                }\n            }\n        }\n\n        Ok(())\n    }\n\n    fn decode_heapam_record(\n        buf: &mut Bytes,\n        decoded: &DecodedWALRecord,\n        pg_version: PgMajorVersion,\n    ) -> anyhow::Result<Option<MetadataRecord>> {\n        // Handle VM bit updates that are implicitly part of heap records.\n\n        // First, look at the record to determine which VM bits need\n        // to be cleared. If either of these variables is set, we\n        // need to clear the corresponding bits in the visibility map.\n        let mut new_heap_blkno: Option<u32> = None;\n        let mut old_heap_blkno: Option<u32> = None;\n        let mut flags = pg_constants::VISIBILITYMAP_VALID_BITS;\n\n        match pg_version {\n            PgMajorVersion::PG14 => {\n                if decoded.xl_rmid == pg_constants::RM_HEAP_ID {\n                    let info = decoded.xl_info & pg_constants::XLOG_HEAP_OPMASK;\n\n                    if info == pg_constants::XLOG_HEAP_INSERT {\n                        let xlrec = v14::XlHeapInsert::decode(buf);\n                        assert_eq!(0, buf.remaining());\n                        if (xlrec.flags & pg_constants::XLH_INSERT_ALL_VISIBLE_CLEARED) != 0 {\n                            new_heap_blkno = Some(decoded.blocks[0].blkno);\n                        }\n                    } else if info == pg_constants::XLOG_HEAP_DELETE {\n                        let xlrec = v14::XlHeapDelete::decode(buf);\n                        if (xlrec.flags & pg_constants::XLH_DELETE_ALL_VISIBLE_CLEARED) != 0 {\n                            new_heap_blkno = Some(decoded.blocks[0].blkno);\n                        }\n                    } else if info == pg_constants::XLOG_HEAP_UPDATE\n                        || info == pg_constants::XLOG_HEAP_HOT_UPDATE\n                    {\n                        let xlrec = v14::XlHeapUpdate::decode(buf);\n                        // the size of tuple data is inferred from the size of the record.\n                        // we can't validate the remaining number of bytes without parsing\n                        // the tuple data.\n                        if (xlrec.flags & pg_constants::XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED) != 0 {\n                            old_heap_blkno = Some(decoded.blocks.last().unwrap().blkno);\n                        }\n                        if (xlrec.flags & pg_constants::XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED) != 0 {\n                            // PostgreSQL only uses XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED on a\n                            // non-HOT update where the new tuple goes to different page than\n                            // the old one. Otherwise, only XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED is\n                            // set.\n                            new_heap_blkno = Some(decoded.blocks[0].blkno);\n                        }\n                    } else if info == pg_constants::XLOG_HEAP_LOCK {\n                        let xlrec = v14::XlHeapLock::decode(buf);\n                        if (xlrec.flags & pg_constants::XLH_LOCK_ALL_FROZEN_CLEARED) != 0 {\n                            old_heap_blkno = Some(decoded.blocks[0].blkno);\n                            flags = pg_constants::VISIBILITYMAP_ALL_FROZEN;\n                        }\n                    }\n                } else if decoded.xl_rmid == pg_constants::RM_HEAP2_ID {\n                    let info = decoded.xl_info & pg_constants::XLOG_HEAP_OPMASK;\n                    if info == pg_constants::XLOG_HEAP2_MULTI_INSERT {\n                        let xlrec = v14::XlHeapMultiInsert::decode(buf);\n\n                        let offset_array_len =\n                            if decoded.xl_info & pg_constants::XLOG_HEAP_INIT_PAGE > 0 {\n                                // the offsets array is omitted if XLOG_HEAP_INIT_PAGE is set\n                                0\n                            } else {\n                                size_of::<u16>() * xlrec.ntuples as usize\n                            };\n                        assert_eq!(offset_array_len, buf.remaining());\n\n                        if (xlrec.flags & pg_constants::XLH_INSERT_ALL_VISIBLE_CLEARED) != 0 {\n                            new_heap_blkno = Some(decoded.blocks[0].blkno);\n                        }\n                    } else if info == pg_constants::XLOG_HEAP2_LOCK_UPDATED {\n                        let xlrec = v14::XlHeapLockUpdated::decode(buf);\n                        if (xlrec.flags & pg_constants::XLH_LOCK_ALL_FROZEN_CLEARED) != 0 {\n                            old_heap_blkno = Some(decoded.blocks[0].blkno);\n                            flags = pg_constants::VISIBILITYMAP_ALL_FROZEN;\n                        }\n                    }\n                } else {\n                    anyhow::bail!(\"Unknown RMGR {} for Heap decoding\", decoded.xl_rmid);\n                }\n            }\n            PgMajorVersion::PG15 => {\n                if decoded.xl_rmid == pg_constants::RM_HEAP_ID {\n                    let info = decoded.xl_info & pg_constants::XLOG_HEAP_OPMASK;\n\n                    if info == pg_constants::XLOG_HEAP_INSERT {\n                        let xlrec = v15::XlHeapInsert::decode(buf);\n                        assert_eq!(0, buf.remaining());\n                        if (xlrec.flags & pg_constants::XLH_INSERT_ALL_VISIBLE_CLEARED) != 0 {\n                            new_heap_blkno = Some(decoded.blocks[0].blkno);\n                        }\n                    } else if info == pg_constants::XLOG_HEAP_DELETE {\n                        let xlrec = v15::XlHeapDelete::decode(buf);\n                        if (xlrec.flags & pg_constants::XLH_DELETE_ALL_VISIBLE_CLEARED) != 0 {\n                            new_heap_blkno = Some(decoded.blocks[0].blkno);\n                        }\n                    } else if info == pg_constants::XLOG_HEAP_UPDATE\n                        || info == pg_constants::XLOG_HEAP_HOT_UPDATE\n                    {\n                        let xlrec = v15::XlHeapUpdate::decode(buf);\n                        // the size of tuple data is inferred from the size of the record.\n                        // we can't validate the remaining number of bytes without parsing\n                        // the tuple data.\n                        if (xlrec.flags & pg_constants::XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED) != 0 {\n                            old_heap_blkno = Some(decoded.blocks.last().unwrap().blkno);\n                        }\n                        if (xlrec.flags & pg_constants::XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED) != 0 {\n                            // PostgreSQL only uses XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED on a\n                            // non-HOT update where the new tuple goes to different page than\n                            // the old one. Otherwise, only XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED is\n                            // set.\n                            new_heap_blkno = Some(decoded.blocks[0].blkno);\n                        }\n                    } else if info == pg_constants::XLOG_HEAP_LOCK {\n                        let xlrec = v15::XlHeapLock::decode(buf);\n                        if (xlrec.flags & pg_constants::XLH_LOCK_ALL_FROZEN_CLEARED) != 0 {\n                            old_heap_blkno = Some(decoded.blocks[0].blkno);\n                            flags = pg_constants::VISIBILITYMAP_ALL_FROZEN;\n                        }\n                    }\n                } else if decoded.xl_rmid == pg_constants::RM_HEAP2_ID {\n                    let info = decoded.xl_info & pg_constants::XLOG_HEAP_OPMASK;\n                    if info == pg_constants::XLOG_HEAP2_MULTI_INSERT {\n                        let xlrec = v15::XlHeapMultiInsert::decode(buf);\n\n                        let offset_array_len =\n                            if decoded.xl_info & pg_constants::XLOG_HEAP_INIT_PAGE > 0 {\n                                // the offsets array is omitted if XLOG_HEAP_INIT_PAGE is set\n                                0\n                            } else {\n                                size_of::<u16>() * xlrec.ntuples as usize\n                            };\n                        assert_eq!(offset_array_len, buf.remaining());\n\n                        if (xlrec.flags & pg_constants::XLH_INSERT_ALL_VISIBLE_CLEARED) != 0 {\n                            new_heap_blkno = Some(decoded.blocks[0].blkno);\n                        }\n                    } else if info == pg_constants::XLOG_HEAP2_LOCK_UPDATED {\n                        let xlrec = v15::XlHeapLockUpdated::decode(buf);\n                        if (xlrec.flags & pg_constants::XLH_LOCK_ALL_FROZEN_CLEARED) != 0 {\n                            old_heap_blkno = Some(decoded.blocks[0].blkno);\n                            flags = pg_constants::VISIBILITYMAP_ALL_FROZEN;\n                        }\n                    }\n                } else {\n                    anyhow::bail!(\"Unknown RMGR {} for Heap decoding\", decoded.xl_rmid);\n                }\n            }\n            PgMajorVersion::PG16 => {\n                if decoded.xl_rmid == pg_constants::RM_HEAP_ID {\n                    let info = decoded.xl_info & pg_constants::XLOG_HEAP_OPMASK;\n\n                    if info == pg_constants::XLOG_HEAP_INSERT {\n                        let xlrec = v16::XlHeapInsert::decode(buf);\n                        assert_eq!(0, buf.remaining());\n                        if (xlrec.flags & pg_constants::XLH_INSERT_ALL_VISIBLE_CLEARED) != 0 {\n                            new_heap_blkno = Some(decoded.blocks[0].blkno);\n                        }\n                    } else if info == pg_constants::XLOG_HEAP_DELETE {\n                        let xlrec = v16::XlHeapDelete::decode(buf);\n                        if (xlrec.flags & pg_constants::XLH_DELETE_ALL_VISIBLE_CLEARED) != 0 {\n                            new_heap_blkno = Some(decoded.blocks[0].blkno);\n                        }\n                    } else if info == pg_constants::XLOG_HEAP_UPDATE\n                        || info == pg_constants::XLOG_HEAP_HOT_UPDATE\n                    {\n                        let xlrec = v16::XlHeapUpdate::decode(buf);\n                        // the size of tuple data is inferred from the size of the record.\n                        // we can't validate the remaining number of bytes without parsing\n                        // the tuple data.\n                        if (xlrec.flags & pg_constants::XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED) != 0 {\n                            old_heap_blkno = Some(decoded.blocks.last().unwrap().blkno);\n                        }\n                        if (xlrec.flags & pg_constants::XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED) != 0 {\n                            // PostgreSQL only uses XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED on a\n                            // non-HOT update where the new tuple goes to different page than\n                            // the old one. Otherwise, only XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED is\n                            // set.\n                            new_heap_blkno = Some(decoded.blocks[0].blkno);\n                        }\n                    } else if info == pg_constants::XLOG_HEAP_LOCK {\n                        let xlrec = v16::XlHeapLock::decode(buf);\n                        if (xlrec.flags & pg_constants::XLH_LOCK_ALL_FROZEN_CLEARED) != 0 {\n                            old_heap_blkno = Some(decoded.blocks[0].blkno);\n                            flags = pg_constants::VISIBILITYMAP_ALL_FROZEN;\n                        }\n                    }\n                } else if decoded.xl_rmid == pg_constants::RM_HEAP2_ID {\n                    let info = decoded.xl_info & pg_constants::XLOG_HEAP_OPMASK;\n                    if info == pg_constants::XLOG_HEAP2_MULTI_INSERT {\n                        let xlrec = v16::XlHeapMultiInsert::decode(buf);\n\n                        let offset_array_len =\n                            if decoded.xl_info & pg_constants::XLOG_HEAP_INIT_PAGE > 0 {\n                                // the offsets array is omitted if XLOG_HEAP_INIT_PAGE is set\n                                0\n                            } else {\n                                size_of::<u16>() * xlrec.ntuples as usize\n                            };\n                        assert_eq!(offset_array_len, buf.remaining());\n\n                        if (xlrec.flags & pg_constants::XLH_INSERT_ALL_VISIBLE_CLEARED) != 0 {\n                            new_heap_blkno = Some(decoded.blocks[0].blkno);\n                        }\n                    } else if info == pg_constants::XLOG_HEAP2_LOCK_UPDATED {\n                        let xlrec = v16::XlHeapLockUpdated::decode(buf);\n                        if (xlrec.flags & pg_constants::XLH_LOCK_ALL_FROZEN_CLEARED) != 0 {\n                            old_heap_blkno = Some(decoded.blocks[0].blkno);\n                            flags = pg_constants::VISIBILITYMAP_ALL_FROZEN;\n                        }\n                    }\n                } else {\n                    anyhow::bail!(\"Unknown RMGR {} for Heap decoding\", decoded.xl_rmid);\n                }\n            }\n            PgMajorVersion::PG17 => {\n                if decoded.xl_rmid == pg_constants::RM_HEAP_ID {\n                    let info = decoded.xl_info & pg_constants::XLOG_HEAP_OPMASK;\n\n                    if info == pg_constants::XLOG_HEAP_INSERT {\n                        let xlrec = v17::XlHeapInsert::decode(buf);\n                        assert_eq!(0, buf.remaining());\n                        if (xlrec.flags & pg_constants::XLH_INSERT_ALL_VISIBLE_CLEARED) != 0 {\n                            new_heap_blkno = Some(decoded.blocks[0].blkno);\n                        }\n                    } else if info == pg_constants::XLOG_HEAP_DELETE {\n                        let xlrec = v17::XlHeapDelete::decode(buf);\n                        if (xlrec.flags & pg_constants::XLH_DELETE_ALL_VISIBLE_CLEARED) != 0 {\n                            new_heap_blkno = Some(decoded.blocks[0].blkno);\n                        }\n                    } else if info == pg_constants::XLOG_HEAP_UPDATE\n                        || info == pg_constants::XLOG_HEAP_HOT_UPDATE\n                    {\n                        let xlrec = v17::XlHeapUpdate::decode(buf);\n                        // the size of tuple data is inferred from the size of the record.\n                        // we can't validate the remaining number of bytes without parsing\n                        // the tuple data.\n                        if (xlrec.flags & pg_constants::XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED) != 0 {\n                            old_heap_blkno = Some(decoded.blocks.last().unwrap().blkno);\n                        }\n                        if (xlrec.flags & pg_constants::XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED) != 0 {\n                            // PostgreSQL only uses XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED on a\n                            // non-HOT update where the new tuple goes to different page than\n                            // the old one. Otherwise, only XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED is\n                            // set.\n                            new_heap_blkno = Some(decoded.blocks[0].blkno);\n                        }\n                    } else if info == pg_constants::XLOG_HEAP_LOCK {\n                        let xlrec = v17::XlHeapLock::decode(buf);\n                        if (xlrec.flags & pg_constants::XLH_LOCK_ALL_FROZEN_CLEARED) != 0 {\n                            old_heap_blkno = Some(decoded.blocks[0].blkno);\n                            flags = pg_constants::VISIBILITYMAP_ALL_FROZEN;\n                        }\n                    }\n                } else if decoded.xl_rmid == pg_constants::RM_HEAP2_ID {\n                    let info = decoded.xl_info & pg_constants::XLOG_HEAP_OPMASK;\n                    if info == pg_constants::XLOG_HEAP2_MULTI_INSERT {\n                        let xlrec = v17::XlHeapMultiInsert::decode(buf);\n\n                        let offset_array_len =\n                            if decoded.xl_info & pg_constants::XLOG_HEAP_INIT_PAGE > 0 {\n                                // the offsets array is omitted if XLOG_HEAP_INIT_PAGE is set\n                                0\n                            } else {\n                                size_of::<u16>() * xlrec.ntuples as usize\n                            };\n                        assert_eq!(offset_array_len, buf.remaining());\n\n                        if (xlrec.flags & pg_constants::XLH_INSERT_ALL_VISIBLE_CLEARED) != 0 {\n                            new_heap_blkno = Some(decoded.blocks[0].blkno);\n                        }\n                    } else if info == pg_constants::XLOG_HEAP2_LOCK_UPDATED {\n                        let xlrec = v17::XlHeapLockUpdated::decode(buf);\n                        if (xlrec.flags & pg_constants::XLH_LOCK_ALL_FROZEN_CLEARED) != 0 {\n                            old_heap_blkno = Some(decoded.blocks[0].blkno);\n                            flags = pg_constants::VISIBILITYMAP_ALL_FROZEN;\n                        }\n                    }\n                } else {\n                    anyhow::bail!(\"Unknown RMGR {} for Heap decoding\", decoded.xl_rmid);\n                }\n            }\n        }\n\n        if new_heap_blkno.is_some() || old_heap_blkno.is_some() {\n            let vm_rel = RelTag {\n                forknum: VISIBILITYMAP_FORKNUM,\n                spcnode: decoded.blocks[0].rnode_spcnode,\n                dbnode: decoded.blocks[0].rnode_dbnode,\n                relnode: decoded.blocks[0].rnode_relnode,\n            };\n\n            Ok(Some(MetadataRecord::Heapam(HeapamRecord::ClearVmBits(\n                ClearVmBits {\n                    new_heap_blkno,\n                    old_heap_blkno,\n                    vm_rel,\n                    flags,\n                },\n            ))))\n        } else {\n            Ok(None)\n        }\n    }\n\n    fn decode_neonmgr_record(\n        buf: &mut Bytes,\n        decoded: &DecodedWALRecord,\n        pg_version: PgMajorVersion,\n    ) -> anyhow::Result<Option<MetadataRecord>> {\n        // Handle VM bit updates that are implicitly part of heap records.\n\n        // First, look at the record to determine which VM bits need\n        // to be cleared. If either of these variables is set, we\n        // need to clear the corresponding bits in the visibility map.\n        let mut new_heap_blkno: Option<u32> = None;\n        let mut old_heap_blkno: Option<u32> = None;\n        let mut flags = pg_constants::VISIBILITYMAP_VALID_BITS;\n\n        assert_eq!(decoded.xl_rmid, pg_constants::RM_NEON_ID);\n\n        match pg_version {\n            PgMajorVersion::PG16 | PgMajorVersion::PG17 => {\n                let info = decoded.xl_info & pg_constants::XLOG_HEAP_OPMASK;\n\n                match info {\n                    pg_constants::XLOG_NEON_HEAP_INSERT => {\n                        let xlrec = v17::rm_neon::XlNeonHeapInsert::decode(buf);\n                        assert_eq!(0, buf.remaining());\n                        if (xlrec.flags & pg_constants::XLH_INSERT_ALL_VISIBLE_CLEARED) != 0 {\n                            new_heap_blkno = Some(decoded.blocks[0].blkno);\n                        }\n                    }\n                    pg_constants::XLOG_NEON_HEAP_DELETE => {\n                        let xlrec = v17::rm_neon::XlNeonHeapDelete::decode(buf);\n                        if (xlrec.flags & pg_constants::XLH_DELETE_ALL_VISIBLE_CLEARED) != 0 {\n                            new_heap_blkno = Some(decoded.blocks[0].blkno);\n                        }\n                    }\n                    pg_constants::XLOG_NEON_HEAP_UPDATE\n                    | pg_constants::XLOG_NEON_HEAP_HOT_UPDATE => {\n                        let xlrec = v17::rm_neon::XlNeonHeapUpdate::decode(buf);\n                        // the size of tuple data is inferred from the size of the record.\n                        // we can't validate the remaining number of bytes without parsing\n                        // the tuple data.\n                        if (xlrec.flags & pg_constants::XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED) != 0 {\n                            old_heap_blkno = Some(decoded.blocks.last().unwrap().blkno);\n                        }\n                        if (xlrec.flags & pg_constants::XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED) != 0 {\n                            // PostgreSQL only uses XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED on a\n                            // non-HOT update where the new tuple goes to different page than\n                            // the old one. Otherwise, only XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED is\n                            // set.\n                            new_heap_blkno = Some(decoded.blocks[0].blkno);\n                        }\n                    }\n                    pg_constants::XLOG_NEON_HEAP_MULTI_INSERT => {\n                        let xlrec = v17::rm_neon::XlNeonHeapMultiInsert::decode(buf);\n\n                        let offset_array_len =\n                            if decoded.xl_info & pg_constants::XLOG_HEAP_INIT_PAGE > 0 {\n                                // the offsets array is omitted if XLOG_HEAP_INIT_PAGE is set\n                                0\n                            } else {\n                                size_of::<u16>() * xlrec.ntuples as usize\n                            };\n                        assert_eq!(offset_array_len, buf.remaining());\n\n                        if (xlrec.flags & pg_constants::XLH_INSERT_ALL_VISIBLE_CLEARED) != 0 {\n                            new_heap_blkno = Some(decoded.blocks[0].blkno);\n                        }\n                    }\n                    pg_constants::XLOG_NEON_HEAP_LOCK => {\n                        let xlrec = v17::rm_neon::XlNeonHeapLock::decode(buf);\n                        if (xlrec.flags & pg_constants::XLH_LOCK_ALL_FROZEN_CLEARED) != 0 {\n                            old_heap_blkno = Some(decoded.blocks[0].blkno);\n                            flags = pg_constants::VISIBILITYMAP_ALL_FROZEN;\n                        }\n                    }\n                    info => anyhow::bail!(\"Unknown WAL record type for Neon RMGR: {}\", info),\n                }\n            }\n            PgMajorVersion::PG15 | PgMajorVersion::PG14 => anyhow::bail!(\n                \"Neon RMGR has no known compatibility with PostgreSQL version {}\",\n                pg_version\n            ),\n        }\n\n        if new_heap_blkno.is_some() || old_heap_blkno.is_some() {\n            let vm_rel = RelTag {\n                forknum: VISIBILITYMAP_FORKNUM,\n                spcnode: decoded.blocks[0].rnode_spcnode,\n                dbnode: decoded.blocks[0].rnode_dbnode,\n                relnode: decoded.blocks[0].rnode_relnode,\n            };\n\n            Ok(Some(MetadataRecord::Neonrmgr(NeonrmgrRecord::ClearVmBits(\n                ClearVmBits {\n                    new_heap_blkno,\n                    old_heap_blkno,\n                    vm_rel,\n                    flags,\n                },\n            ))))\n        } else {\n            Ok(None)\n        }\n    }\n\n    fn decode_smgr_record(\n        buf: &mut Bytes,\n        decoded: &DecodedWALRecord,\n    ) -> anyhow::Result<Option<MetadataRecord>> {\n        let info = decoded.xl_info & pg_constants::XLR_RMGR_INFO_MASK;\n        if info == pg_constants::XLOG_SMGR_CREATE {\n            let create = XlSmgrCreate::decode(buf);\n            let rel = RelTag {\n                spcnode: create.rnode.spcnode,\n                dbnode: create.rnode.dbnode,\n                relnode: create.rnode.relnode,\n                forknum: create.forknum,\n            };\n\n            return Ok(Some(MetadataRecord::Smgr(SmgrRecord::Create(SmgrCreate {\n                rel,\n            }))));\n        } else if info == pg_constants::XLOG_SMGR_TRUNCATE {\n            let truncate = XlSmgrTruncate::decode(buf);\n            return Ok(Some(MetadataRecord::Smgr(SmgrRecord::Truncate(truncate))));\n        }\n\n        Ok(None)\n    }\n\n    fn decode_dbase_record(\n        buf: &mut Bytes,\n        decoded: &DecodedWALRecord,\n        pg_version: PgMajorVersion,\n    ) -> anyhow::Result<Option<MetadataRecord>> {\n        // TODO: Refactor this to avoid the duplication between postgres versions.\n\n        let info = decoded.xl_info & pg_constants::XLR_RMGR_INFO_MASK;\n        tracing::debug!(%info, %pg_version, \"handle RM_DBASE_ID\");\n\n        match pg_version {\n            PgMajorVersion::PG14 => {\n                if info == postgres_ffi::v14::bindings::XLOG_DBASE_CREATE {\n                    let createdb = XlCreateDatabase::decode(buf);\n                    tracing::debug!(\"XLOG_DBASE_CREATE v14\");\n\n                    let record = MetadataRecord::Dbase(DbaseRecord::Create(DbaseCreate {\n                        db_id: createdb.db_id,\n                        tablespace_id: createdb.tablespace_id,\n                        src_db_id: createdb.src_db_id,\n                        src_tablespace_id: createdb.src_tablespace_id,\n                    }));\n\n                    return Ok(Some(record));\n                } else if info == postgres_ffi::v14::bindings::XLOG_DBASE_DROP {\n                    let dropdb = XlDropDatabase::decode(buf);\n\n                    let record = MetadataRecord::Dbase(DbaseRecord::Drop(DbaseDrop {\n                        db_id: dropdb.db_id,\n                        tablespace_ids: dropdb.tablespace_ids,\n                    }));\n\n                    return Ok(Some(record));\n                }\n            }\n            PgMajorVersion::PG15 => {\n                if info == postgres_ffi::v15::bindings::XLOG_DBASE_CREATE_WAL_LOG {\n                    tracing::debug!(\"XLOG_DBASE_CREATE_WAL_LOG: noop\");\n                } else if info == postgres_ffi::v15::bindings::XLOG_DBASE_CREATE_FILE_COPY {\n                    // The XLOG record was renamed between v14 and v15,\n                    // but the record format is the same.\n                    // So we can reuse XlCreateDatabase here.\n                    tracing::debug!(\"XLOG_DBASE_CREATE_FILE_COPY\");\n\n                    let createdb = XlCreateDatabase::decode(buf);\n                    let record = MetadataRecord::Dbase(DbaseRecord::Create(DbaseCreate {\n                        db_id: createdb.db_id,\n                        tablespace_id: createdb.tablespace_id,\n                        src_db_id: createdb.src_db_id,\n                        src_tablespace_id: createdb.src_tablespace_id,\n                    }));\n\n                    return Ok(Some(record));\n                } else if info == postgres_ffi::v15::bindings::XLOG_DBASE_DROP {\n                    let dropdb = XlDropDatabase::decode(buf);\n                    let record = MetadataRecord::Dbase(DbaseRecord::Drop(DbaseDrop {\n                        db_id: dropdb.db_id,\n                        tablespace_ids: dropdb.tablespace_ids,\n                    }));\n\n                    return Ok(Some(record));\n                }\n            }\n            PgMajorVersion::PG16 => {\n                if info == postgres_ffi::v16::bindings::XLOG_DBASE_CREATE_WAL_LOG {\n                    tracing::debug!(\"XLOG_DBASE_CREATE_WAL_LOG: noop\");\n                } else if info == postgres_ffi::v16::bindings::XLOG_DBASE_CREATE_FILE_COPY {\n                    // The XLOG record was renamed between v14 and v15,\n                    // but the record format is the same.\n                    // So we can reuse XlCreateDatabase here.\n                    tracing::debug!(\"XLOG_DBASE_CREATE_FILE_COPY\");\n\n                    let createdb = XlCreateDatabase::decode(buf);\n                    let record = MetadataRecord::Dbase(DbaseRecord::Create(DbaseCreate {\n                        db_id: createdb.db_id,\n                        tablespace_id: createdb.tablespace_id,\n                        src_db_id: createdb.src_db_id,\n                        src_tablespace_id: createdb.src_tablespace_id,\n                    }));\n\n                    return Ok(Some(record));\n                } else if info == postgres_ffi::v16::bindings::XLOG_DBASE_DROP {\n                    let dropdb = XlDropDatabase::decode(buf);\n                    let record = MetadataRecord::Dbase(DbaseRecord::Drop(DbaseDrop {\n                        db_id: dropdb.db_id,\n                        tablespace_ids: dropdb.tablespace_ids,\n                    }));\n\n                    return Ok(Some(record));\n                }\n            }\n            PgMajorVersion::PG17 => {\n                if info == postgres_ffi::v17::bindings::XLOG_DBASE_CREATE_WAL_LOG {\n                    tracing::debug!(\"XLOG_DBASE_CREATE_WAL_LOG: noop\");\n                } else if info == postgres_ffi::v17::bindings::XLOG_DBASE_CREATE_FILE_COPY {\n                    // The XLOG record was renamed between v14 and v15,\n                    // but the record format is the same.\n                    // So we can reuse XlCreateDatabase here.\n                    tracing::debug!(\"XLOG_DBASE_CREATE_FILE_COPY\");\n\n                    let createdb = XlCreateDatabase::decode(buf);\n                    let record = MetadataRecord::Dbase(DbaseRecord::Create(DbaseCreate {\n                        db_id: createdb.db_id,\n                        tablespace_id: createdb.tablespace_id,\n                        src_db_id: createdb.src_db_id,\n                        src_tablespace_id: createdb.src_tablespace_id,\n                    }));\n\n                    return Ok(Some(record));\n                } else if info == postgres_ffi::v17::bindings::XLOG_DBASE_DROP {\n                    let dropdb = XlDropDatabase::decode(buf);\n                    let record = MetadataRecord::Dbase(DbaseRecord::Drop(DbaseDrop {\n                        db_id: dropdb.db_id,\n                        tablespace_ids: dropdb.tablespace_ids,\n                    }));\n\n                    return Ok(Some(record));\n                }\n            }\n        }\n\n        Ok(None)\n    }\n\n    fn decode_clog_record(\n        buf: &mut Bytes,\n        decoded: &DecodedWALRecord,\n        pg_version: PgMajorVersion,\n    ) -> anyhow::Result<Option<MetadataRecord>> {\n        let info = decoded.xl_info & !pg_constants::XLR_INFO_MASK;\n\n        if info == pg_constants::CLOG_ZEROPAGE {\n            let pageno = if pg_version < PgMajorVersion::PG17 {\n                buf.get_u32_le()\n            } else {\n                buf.get_u64_le() as u32\n            };\n            let segno = pageno / pg_constants::SLRU_PAGES_PER_SEGMENT;\n            let rpageno = pageno % pg_constants::SLRU_PAGES_PER_SEGMENT;\n\n            Ok(Some(MetadataRecord::Clog(ClogRecord::ZeroPage(\n                ClogZeroPage { segno, rpageno },\n            ))))\n        } else {\n            assert_eq!(info, pg_constants::CLOG_TRUNCATE);\n            let xlrec = XlClogTruncate::decode(buf, pg_version);\n\n            Ok(Some(MetadataRecord::Clog(ClogRecord::Truncate(\n                ClogTruncate {\n                    pageno: xlrec.pageno,\n                    oldest_xid: xlrec.oldest_xid,\n                    oldest_xid_db: xlrec.oldest_xid_db,\n                },\n            ))))\n        }\n    }\n\n    fn decode_xact_record(\n        buf: &mut Bytes,\n        decoded: &DecodedWALRecord,\n        lsn: Lsn,\n    ) -> anyhow::Result<Option<MetadataRecord>> {\n        let info = decoded.xl_info & pg_constants::XLOG_XACT_OPMASK;\n        let origin_id = decoded.origin_id;\n        let xl_xid = decoded.xl_xid;\n\n        if info == pg_constants::XLOG_XACT_COMMIT {\n            let parsed = XlXactParsedRecord::decode(buf, decoded.xl_xid, decoded.xl_info);\n            return Ok(Some(MetadataRecord::Xact(XactRecord::Commit(XactCommon {\n                parsed,\n                origin_id,\n                xl_xid,\n                lsn,\n            }))));\n        } else if info == pg_constants::XLOG_XACT_ABORT {\n            let parsed = XlXactParsedRecord::decode(buf, decoded.xl_xid, decoded.xl_info);\n            return Ok(Some(MetadataRecord::Xact(XactRecord::Abort(XactCommon {\n                parsed,\n                origin_id,\n                xl_xid,\n                lsn,\n            }))));\n        } else if info == pg_constants::XLOG_XACT_COMMIT_PREPARED {\n            let parsed = XlXactParsedRecord::decode(buf, decoded.xl_xid, decoded.xl_info);\n            return Ok(Some(MetadataRecord::Xact(XactRecord::CommitPrepared(\n                XactCommon {\n                    parsed,\n                    origin_id,\n                    xl_xid,\n                    lsn,\n                },\n            ))));\n        } else if info == pg_constants::XLOG_XACT_ABORT_PREPARED {\n            let parsed = XlXactParsedRecord::decode(buf, decoded.xl_xid, decoded.xl_info);\n            return Ok(Some(MetadataRecord::Xact(XactRecord::AbortPrepared(\n                XactCommon {\n                    parsed,\n                    origin_id,\n                    xl_xid,\n                    lsn,\n                },\n            ))));\n        } else if info == pg_constants::XLOG_XACT_PREPARE {\n            return Ok(Some(MetadataRecord::Xact(XactRecord::Prepare(\n                XactPrepare {\n                    xl_xid: decoded.xl_xid,\n                    data: Bytes::copy_from_slice(&buf[..]),\n                },\n            ))));\n        }\n\n        Ok(None)\n    }\n\n    fn decode_multixact_record(\n        buf: &mut Bytes,\n        decoded: &DecodedWALRecord,\n        pg_version: PgMajorVersion,\n    ) -> anyhow::Result<Option<MetadataRecord>> {\n        let info = decoded.xl_info & pg_constants::XLR_RMGR_INFO_MASK;\n\n        if info == pg_constants::XLOG_MULTIXACT_ZERO_OFF_PAGE\n            || info == pg_constants::XLOG_MULTIXACT_ZERO_MEM_PAGE\n        {\n            let pageno = if pg_version < PgMajorVersion::PG17 {\n                buf.get_u32_le()\n            } else {\n                buf.get_u64_le() as u32\n            };\n            let segno = pageno / pg_constants::SLRU_PAGES_PER_SEGMENT;\n            let rpageno = pageno % pg_constants::SLRU_PAGES_PER_SEGMENT;\n\n            let slru_kind = match info {\n                pg_constants::XLOG_MULTIXACT_ZERO_OFF_PAGE => SlruKind::MultiXactOffsets,\n                pg_constants::XLOG_MULTIXACT_ZERO_MEM_PAGE => SlruKind::MultiXactMembers,\n                _ => unreachable!(),\n            };\n\n            return Ok(Some(MetadataRecord::MultiXact(MultiXactRecord::ZeroPage(\n                MultiXactZeroPage {\n                    slru_kind,\n                    segno,\n                    rpageno,\n                },\n            ))));\n        } else if info == pg_constants::XLOG_MULTIXACT_CREATE_ID {\n            let xlrec = XlMultiXactCreate::decode(buf);\n            return Ok(Some(MetadataRecord::MultiXact(MultiXactRecord::Create(\n                xlrec,\n            ))));\n        } else if info == pg_constants::XLOG_MULTIXACT_TRUNCATE_ID {\n            let xlrec = XlMultiXactTruncate::decode(buf);\n            return Ok(Some(MetadataRecord::MultiXact(MultiXactRecord::Truncate(\n                xlrec,\n            ))));\n        }\n\n        Ok(None)\n    }\n\n    fn decode_relmap_record(\n        buf: &mut Bytes,\n        decoded: &DecodedWALRecord,\n    ) -> anyhow::Result<Option<MetadataRecord>> {\n        let update = XlRelmapUpdate::decode(buf);\n\n        let mut buf = decoded.record.clone();\n        buf.advance(decoded.main_data_offset);\n        // skip xl_relmap_update\n        buf.advance(12);\n\n        Ok(Some(MetadataRecord::Relmap(RelmapRecord::Update(\n            RelmapUpdate {\n                update,\n                buf: Bytes::copy_from_slice(&buf[..]),\n            },\n        ))))\n    }\n\n    fn decode_xlog_record(\n        buf: &mut Bytes,\n        decoded: &DecodedWALRecord,\n        lsn: Lsn,\n    ) -> anyhow::Result<Option<MetadataRecord>> {\n        let info = decoded.xl_info & pg_constants::XLR_RMGR_INFO_MASK;\n        Ok(Some(MetadataRecord::Xlog(XlogRecord::Raw(RawXlogRecord {\n            info,\n            lsn,\n            buf: buf.clone(),\n        }))))\n    }\n\n    fn decode_logical_message_record(\n        buf: &mut Bytes,\n        decoded: &DecodedWALRecord,\n    ) -> anyhow::Result<Option<MetadataRecord>> {\n        let info = decoded.xl_info & pg_constants::XLR_RMGR_INFO_MASK;\n        if info == pg_constants::XLOG_LOGICAL_MESSAGE {\n            let xlrec = XlLogicalMessage::decode(buf);\n            let prefix = std::str::from_utf8(&buf[0..xlrec.prefix_size - 1])?;\n\n            #[cfg(feature = \"testing\")]\n            if prefix == \"neon-test\" {\n                return Ok(Some(MetadataRecord::LogicalMessage(\n                    LogicalMessageRecord::Failpoint,\n                )));\n            }\n\n            if let Some(path) = prefix.strip_prefix(\"neon-file:\") {\n                let buf_size = xlrec.prefix_size + xlrec.message_size;\n                let buf = Bytes::copy_from_slice(&buf[xlrec.prefix_size..buf_size]);\n                return Ok(Some(MetadataRecord::LogicalMessage(\n                    LogicalMessageRecord::Put(PutLogicalMessage {\n                        path: path.to_string(),\n                        buf,\n                    }),\n                )));\n            }\n        }\n\n        Ok(None)\n    }\n\n    fn decode_standby_record(\n        buf: &mut Bytes,\n        decoded: &DecodedWALRecord,\n    ) -> anyhow::Result<Option<MetadataRecord>> {\n        let info = decoded.xl_info & pg_constants::XLR_RMGR_INFO_MASK;\n        if info == pg_constants::XLOG_RUNNING_XACTS {\n            let xlrec = XlRunningXacts::decode(buf);\n            return Ok(Some(MetadataRecord::Standby(StandbyRecord::RunningXacts(\n                StandbyRunningXacts {\n                    oldest_running_xid: xlrec.oldest_running_xid,\n                },\n            ))));\n        }\n\n        Ok(None)\n    }\n\n    fn decode_replorigin_record(\n        buf: &mut Bytes,\n        decoded: &DecodedWALRecord,\n    ) -> anyhow::Result<Option<MetadataRecord>> {\n        let info = decoded.xl_info & pg_constants::XLR_RMGR_INFO_MASK;\n        if info == pg_constants::XLOG_REPLORIGIN_SET {\n            let xlrec = XlReploriginSet::decode(buf);\n            return Ok(Some(MetadataRecord::Replorigin(ReploriginRecord::Set(\n                xlrec,\n            ))));\n        } else if info == pg_constants::XLOG_REPLORIGIN_DROP {\n            let xlrec = XlReploriginDrop::decode(buf);\n            return Ok(Some(MetadataRecord::Replorigin(ReploriginRecord::Drop(\n                xlrec,\n            ))));\n        }\n\n        Ok(None)\n    }\n}\n"
  },
  {
    "path": "libs/wal_decoder/src/lib.rs",
    "content": "pub mod decoder;\npub mod models;\npub mod serialized_batch;\npub mod wire_format;\n"
  },
  {
    "path": "libs/wal_decoder/src/models/record.rs",
    "content": "//! This module defines the WAL record format used within the pageserver.\n\nuse bytes::Bytes;\nuse postgres_ffi::walrecord::{MultiXactMember, describe_postgres_wal_record};\nuse postgres_ffi::{MultiXactId, MultiXactOffset, TransactionId};\nuse postgres_ffi_types::TimestampTz;\nuse serde::{Deserialize, Serialize};\nuse utils::bin_ser::DeserializeError;\n\n/// Each update to a page is represented by a NeonWalRecord. It can be a wrapper\n/// around a PostgreSQL WAL record, or a custom neon-specific \"record\".\n#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]\npub enum NeonWalRecord {\n    /// Native PostgreSQL WAL record\n    Postgres { will_init: bool, rec: Bytes },\n\n    /// Clear bits in heap visibility map. ('flags' is bitmap of bits to clear)\n    ClearVisibilityMapFlags {\n        new_heap_blkno: Option<u32>,\n        old_heap_blkno: Option<u32>,\n        flags: u8,\n    },\n    /// Mark transaction IDs as committed on a CLOG page\n    ClogSetCommitted {\n        xids: Vec<TransactionId>,\n        timestamp: TimestampTz,\n    },\n    /// Mark transaction IDs as aborted on a CLOG page\n    ClogSetAborted { xids: Vec<TransactionId> },\n    /// Extend multixact offsets SLRU\n    MultixactOffsetCreate {\n        mid: MultiXactId,\n        moff: MultiXactOffset,\n    },\n    /// Extend multixact members SLRU.\n    MultixactMembersCreate {\n        moff: MultiXactOffset,\n        members: Vec<MultiXactMember>,\n    },\n    /// Update the map of AUX files, either writing or dropping an entry\n    AuxFile {\n        file_path: String,\n        content: Option<Bytes>,\n    },\n    // Truncate visibility map page\n    TruncateVisibilityMap {\n        trunc_byte: usize,\n        trunc_offs: usize,\n    },\n\n    /// A testing record for unit testing purposes. It supports append data to an existing image, or clear it.\n    #[cfg(feature = \"testing\")]\n    Test {\n        /// Append a string to the image.\n        append: String,\n        /// Clear the image before appending.\n        clear: bool,\n        /// Treat this record as an init record. `clear` should be set to true if this field is set\n        /// to true. This record does not need the history WALs to reconstruct. See [`NeonWalRecord::will_init`] and\n        /// its references in `timeline.rs`.\n        will_init: bool,\n        /// Only append the record if the current image is the same as the one specified in this field.\n        only_if: Option<String>,\n    },\n}\n\nimpl NeonWalRecord {\n    /// Does replaying this WAL record initialize the page from scratch, or does\n    /// it need to be applied over the previous image of the page?\n    pub fn will_init(&self) -> bool {\n        // If you change this function, you'll also need to change ValueBytes::will_init\n        match self {\n            NeonWalRecord::Postgres { will_init, rec: _ } => *will_init,\n            #[cfg(feature = \"testing\")]\n            NeonWalRecord::Test { will_init, .. } => *will_init,\n            // None of the special neon record types currently initialize the page\n            _ => false,\n        }\n    }\n\n    #[cfg(feature = \"testing\")]\n    pub fn wal_append(s: impl AsRef<str>) -> Self {\n        Self::Test {\n            append: s.as_ref().to_string(),\n            clear: false,\n            will_init: false,\n            only_if: None,\n        }\n    }\n\n    #[cfg(feature = \"testing\")]\n    pub fn wal_append_conditional(s: impl AsRef<str>, only_if: impl AsRef<str>) -> Self {\n        Self::Test {\n            append: s.as_ref().to_string(),\n            clear: false,\n            will_init: false,\n            only_if: Some(only_if.as_ref().to_string()),\n        }\n    }\n\n    #[cfg(feature = \"testing\")]\n    pub fn wal_clear(s: impl AsRef<str>) -> Self {\n        Self::Test {\n            append: s.as_ref().to_string(),\n            clear: true,\n            will_init: false,\n            only_if: None,\n        }\n    }\n\n    #[cfg(feature = \"testing\")]\n    pub fn wal_init(s: impl AsRef<str>) -> Self {\n        Self::Test {\n            append: s.as_ref().to_string(),\n            clear: true,\n            will_init: true,\n            only_if: None,\n        }\n    }\n}\n\n/// Build a human-readable string to describe a WAL record\n///\n/// For debugging purposes\npub fn describe_wal_record(rec: &NeonWalRecord) -> Result<String, DeserializeError> {\n    match rec {\n        NeonWalRecord::Postgres { will_init, rec } => Ok(format!(\n            \"will_init: {}, {}\",\n            will_init,\n            describe_postgres_wal_record(rec)?\n        )),\n        _ => Ok(format!(\"{rec:?}\")),\n    }\n}\n"
  },
  {
    "path": "libs/wal_decoder/src/models/value.rs",
    "content": "//! This module defines the value type used by the storage engine.\n//!\n//! A [`Value`] represents either a completely new value for one Key ([`Value::Image`]),\n//! or a \"delta\" of how to get from previous version of the value to the new one\n//! ([`Value::WalRecord`]])\n//!\n//! Note that the [`Value`] type is used for the permananent storage format, so any\n//! changes to it must be backwards compatible.\n\nuse bytes::Bytes;\nuse serde::{Deserialize, Serialize};\n\nuse crate::models::record::NeonWalRecord;\n\n#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]\npub enum Value {\n    /// An Image value contains a full copy of the value\n    Image(Bytes),\n    /// A WalRecord value contains a WAL record that needs to be\n    /// replayed get the full value. Replaying the WAL record\n    /// might need a previous version of the value (if will_init()\n    /// returns false), or it may be replayed stand-alone (true).\n    WalRecord(NeonWalRecord),\n}\n\nimpl Value {\n    #[inline(always)]\n    pub fn is_image(&self) -> bool {\n        matches!(self, Value::Image(_))\n    }\n\n    #[inline(always)]\n    pub fn will_init(&self) -> bool {\n        match self {\n            Value::Image(_) => true,\n            Value::WalRecord(rec) => rec.will_init(),\n        }\n    }\n\n    #[inline(always)]\n    pub fn estimated_size(&self) -> usize {\n        match self {\n            Value::Image(image) => image.len(),\n            Value::WalRecord(NeonWalRecord::AuxFile {\n                content: Some(content),\n                ..\n            }) => content.len(),\n            Value::WalRecord(NeonWalRecord::Postgres { rec, .. }) => rec.len(),\n            Value::WalRecord(NeonWalRecord::ClogSetAborted { xids }) => xids.len() * 4,\n            Value::WalRecord(NeonWalRecord::ClogSetCommitted { xids, .. }) => xids.len() * 4,\n            Value::WalRecord(NeonWalRecord::MultixactMembersCreate { members, .. }) => {\n                members.len() * 8\n            }\n            _ => 8192, /* use image size as the estimation */\n        }\n    }\n}\n\n#[derive(Debug, PartialEq)]\npub enum InvalidInput {\n    TooShortValue,\n    TooShortPostgresRecord,\n}\n\n/// We could have a ValueRef where everything is `serde(borrow)`. Before implementing that, lets\n/// use this type for querying if a slice looks some particular way.\npub struct ValueBytes;\n\nimpl ValueBytes {\n    #[inline(always)]\n    pub fn will_init(raw: &[u8]) -> Result<bool, InvalidInput> {\n        if raw.len() < 12 {\n            return Err(InvalidInput::TooShortValue);\n        }\n\n        let value_discriminator = &raw[0..4];\n\n        if value_discriminator == [0, 0, 0, 0] {\n            // Value::Image always initializes\n            return Ok(true);\n        }\n\n        if value_discriminator != [0, 0, 0, 1] {\n            // not a Value::WalRecord(..)\n            return Ok(false);\n        }\n\n        let walrecord_discriminator = &raw[4..8];\n\n        if walrecord_discriminator != [0, 0, 0, 0] {\n            // only NeonWalRecord::Postgres can have will_init\n            return Ok(false);\n        }\n\n        if raw.len() < 17 {\n            return Err(InvalidInput::TooShortPostgresRecord);\n        }\n\n        Ok(raw[8] == 1)\n    }\n}\n\n#[cfg(test)]\nmod test {\n    use bytes::Bytes;\n    use utils::bin_ser::BeSer;\n\n    use super::*;\n\n    macro_rules! roundtrip {\n        ($orig:expr, $expected:expr) => {{\n            let orig: Value = $orig;\n\n            let actual = Value::ser(&orig).unwrap();\n            let expected: &[u8] = &$expected;\n\n            assert_eq!(utils::Hex(&actual), utils::Hex(expected));\n\n            let deser = Value::des(&actual).unwrap();\n\n            assert_eq!(orig, deser);\n        }};\n    }\n\n    #[test]\n    fn image_roundtrip() {\n        let image = Bytes::from_static(b\"foobar\");\n        let image = Value::Image(image);\n\n        #[rustfmt::skip]\n        let expected = [\n            // top level discriminator of 4 bytes\n            0x00, 0x00, 0x00, 0x00,\n            // 8 byte length\n            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06,\n            // foobar\n            0x66, 0x6f, 0x6f, 0x62, 0x61, 0x72\n        ];\n\n        roundtrip!(image, expected);\n\n        assert!(ValueBytes::will_init(&expected).unwrap());\n    }\n\n    #[test]\n    fn walrecord_postgres_roundtrip() {\n        let rec = NeonWalRecord::Postgres {\n            will_init: true,\n            rec: Bytes::from_static(b\"foobar\"),\n        };\n        let rec = Value::WalRecord(rec);\n\n        #[rustfmt::skip]\n        let expected = [\n            // flattened discriminator of total 8 bytes\n            0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00,\n            // will_init\n            0x01,\n            // 8 byte length\n            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06,\n            // foobar\n            0x66, 0x6f, 0x6f, 0x62, 0x61, 0x72\n        ];\n\n        roundtrip!(rec, expected);\n\n        assert!(ValueBytes::will_init(&expected).unwrap());\n    }\n\n    #[test]\n    fn bytes_inspection_too_short_image() {\n        let rec = Value::Image(Bytes::from_static(b\"\"));\n\n        #[rustfmt::skip]\n        let expected = [\n            // top level discriminator of 4 bytes\n            0x00, 0x00, 0x00, 0x00,\n            // 8 byte length\n            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,\n        ];\n\n        roundtrip!(rec, expected);\n\n        assert!(ValueBytes::will_init(&expected).unwrap());\n        assert_eq!(expected.len(), 12);\n        for len in 0..12 {\n            assert_eq!(\n                ValueBytes::will_init(&expected[..len]).unwrap_err(),\n                InvalidInput::TooShortValue\n            );\n        }\n    }\n\n    #[test]\n    fn bytes_inspection_too_short_postgres_record() {\n        let rec = NeonWalRecord::Postgres {\n            will_init: false,\n            rec: Bytes::from_static(b\"\"),\n        };\n        let rec = Value::WalRecord(rec);\n\n        #[rustfmt::skip]\n        let expected = [\n            // flattened discriminator of total 8 bytes\n            0x00, 0x00, 0x00, 0x01,\n            0x00, 0x00, 0x00, 0x00,\n            // will_init\n            0x00,\n            // 8 byte length\n            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,\n        ];\n\n        roundtrip!(rec, expected);\n\n        assert!(!ValueBytes::will_init(&expected).unwrap());\n        assert_eq!(expected.len(), 17);\n        for len in 12..17 {\n            assert_eq!(\n                ValueBytes::will_init(&expected[..len]).unwrap_err(),\n                InvalidInput::TooShortPostgresRecord\n            )\n        }\n        for len in 0..12 {\n            assert_eq!(\n                ValueBytes::will_init(&expected[..len]).unwrap_err(),\n                InvalidInput::TooShortValue\n            )\n        }\n    }\n\n    #[test]\n    fn clear_visibility_map_flags_example() {\n        let rec = NeonWalRecord::ClearVisibilityMapFlags {\n            new_heap_blkno: Some(0x11),\n            old_heap_blkno: None,\n            flags: 0x03,\n        };\n        let rec = Value::WalRecord(rec);\n\n        #[rustfmt::skip]\n        let expected = [\n            // discriminators\n            0x00, 0x00, 0x00, 0x01,\n            0x00, 0x00, 0x00, 0x01,\n            // Some == 1 followed by 4 bytes\n            0x01, 0x00, 0x00, 0x00, 0x11,\n            // None == 0\n            0x00,\n            // flags\n            0x03\n        ];\n\n        roundtrip!(rec, expected);\n\n        assert!(!ValueBytes::will_init(&expected).unwrap());\n    }\n}\n"
  },
  {
    "path": "libs/wal_decoder/src/models.rs",
    "content": "//! This module houses types which represent decoded PG WAL records\n//! ready for the pageserver to interpret. They are derived from the original\n//! WAL records, so that each struct corresponds closely to one WAL record of\n//! a specific kind. They contain the same information as the original WAL records,\n//! but the values are already serialized in a [`SerializedValueBatch`], which\n//! is the format that the pageserver is expecting them in.\n//!\n//! The ingestion code uses these structs to help with parsing the WAL records,\n//! and it splits them into a stream of modifications to the key-value pairs that\n//! are ultimately stored in delta layers.  See also the split-out counterparts in\n//! [`postgres_ffi::walrecord`].\n//!\n//! The pipeline which processes WAL records is not super obvious, so let's follow\n//! the flow of an example XACT_COMMIT Postgres record:\n//!\n//! (Postgres XACT_COMMIT record)\n//! |\n//! |--> pageserver::walingest::WalIngest::decode_xact_record\n//!      |\n//!      |--> ([`XactRecord::Commit`])\n//!           |\n//!           |--> pageserver::walingest::WalIngest::ingest_xact_record\n//!                |\n//!                |--> (NeonWalRecord::ClogSetCommitted)\n//!                     |\n//!                     |--> write to KV store within the pageserver\n\npub mod record;\npub mod value;\n\nuse bytes::Bytes;\nuse pageserver_api::reltag::{RelTag, SlruKind};\nuse postgres_ffi::walrecord::{\n    XlMultiXactCreate, XlMultiXactTruncate, XlRelmapUpdate, XlReploriginDrop, XlReploriginSet,\n    XlSmgrTruncate, XlXactParsedRecord,\n};\nuse postgres_ffi::{Oid, TransactionId};\nuse serde::{Deserialize, Serialize};\nuse utils::lsn::Lsn;\n\nuse crate::serialized_batch::SerializedValueBatch;\n\n// Code generated by protobuf.\npub mod proto {\n    // Tonic does derives as `#[derive(Clone, PartialEq, ::prost::Message)]`\n    // we don't use these types for anything but broker data transmission,\n    // so it's ok to ignore this one.\n    #![allow(clippy::derive_partial_eq_without_eq)]\n    // The generated ValueMeta has a `len` method generate for its `len` field.\n    #![allow(clippy::len_without_is_empty)]\n    include!(concat!(env!(\"OUT_DIR\"), concat!(\"/interpreted_wal.rs\")));\n}\n\n#[derive(Copy, Clone, Serialize, Deserialize)]\npub enum FlushUncommittedRecords {\n    Yes,\n    No,\n}\n\n/// A batch of interpreted WAL records\n#[derive(Serialize, Deserialize)]\npub struct InterpretedWalRecords {\n    pub records: Vec<InterpretedWalRecord>,\n    // Start LSN of the next record after the batch.\n    // Note that said record may not belong to the current shard.\n    pub next_record_lsn: Lsn,\n    // Inclusive start LSN of the PG WAL from which the interpreted\n    // WAL records were extracted. Note that this is not necessarily the\n    // start LSN of the first interpreted record in the batch.\n    pub raw_wal_start_lsn: Option<Lsn>,\n}\n\n/// An interpreted Postgres WAL record, ready to be handled by the pageserver\n#[derive(Serialize, Deserialize, Clone)]\npub struct InterpretedWalRecord {\n    /// Optional metadata record - may cause writes to metadata keys\n    /// in the storage engine\n    pub metadata_record: Option<MetadataRecord>,\n    /// A pre-serialized batch along with the required metadata for ingestion\n    /// by the pageserver\n    pub batch: SerializedValueBatch,\n    /// Byte offset within WAL for the start of the next PG WAL record.\n    /// Usually this is the end LSN of the current record, but in case of\n    /// XLOG SWITCH records it will be within the next segment.\n    pub next_record_lsn: Lsn,\n    /// Whether to flush all uncommitted modifications to the storage engine\n    /// before ingesting this record. This is currently only used for legacy PG\n    /// database creations which read pages from a template database. Such WAL\n    /// records require reading data blocks while ingesting, hence the need to flush.\n    pub flush_uncommitted: FlushUncommittedRecords,\n    /// Transaction id of the original PG WAL record\n    pub xid: TransactionId,\n}\n\nimpl InterpretedWalRecord {\n    /// Checks if the WAL record is empty\n    ///\n    /// An empty interpreted WAL record has no data or metadata and does not have to be sent to the\n    /// pageserver.\n    pub fn is_empty(&self) -> bool {\n        self.batch.is_empty()\n            && self.metadata_record.is_none()\n            && matches!(self.flush_uncommitted, FlushUncommittedRecords::No)\n    }\n\n    /// Checks if the WAL record is observed (i.e. contains only metadata\n    /// for observed values)\n    pub fn is_observed(&self) -> bool {\n        self.batch.is_observed()\n            && self.metadata_record.is_none()\n            && matches!(self.flush_uncommitted, FlushUncommittedRecords::No)\n    }\n}\n\n/// The interpreted part of the Postgres WAL record which requires metadata\n/// writes to the underlying storage engine.\n#[derive(Clone, Serialize, Deserialize)]\npub enum MetadataRecord {\n    Heapam(HeapamRecord),\n    Neonrmgr(NeonrmgrRecord),\n    Smgr(SmgrRecord),\n    Dbase(DbaseRecord),\n    Clog(ClogRecord),\n    Xact(XactRecord),\n    MultiXact(MultiXactRecord),\n    Relmap(RelmapRecord),\n    Xlog(XlogRecord),\n    LogicalMessage(LogicalMessageRecord),\n    Standby(StandbyRecord),\n    Replorigin(ReploriginRecord),\n}\n\n#[derive(Clone, Serialize, Deserialize)]\npub enum HeapamRecord {\n    ClearVmBits(ClearVmBits),\n}\n\n#[derive(Clone, Serialize, Deserialize)]\npub struct ClearVmBits {\n    pub new_heap_blkno: Option<u32>,\n    pub old_heap_blkno: Option<u32>,\n    pub vm_rel: RelTag,\n    pub flags: u8,\n}\n\n#[derive(Clone, Serialize, Deserialize)]\npub enum NeonrmgrRecord {\n    ClearVmBits(ClearVmBits),\n}\n\n#[derive(Clone, Serialize, Deserialize)]\npub enum SmgrRecord {\n    Create(SmgrCreate),\n    Truncate(XlSmgrTruncate),\n}\n\n#[derive(Clone, Serialize, Deserialize)]\npub struct SmgrCreate {\n    pub rel: RelTag,\n}\n\n#[derive(Clone, Serialize, Deserialize)]\npub enum DbaseRecord {\n    Create(DbaseCreate),\n    Drop(DbaseDrop),\n}\n\n#[derive(Clone, Serialize, Deserialize)]\npub struct DbaseCreate {\n    pub db_id: Oid,\n    pub tablespace_id: Oid,\n    pub src_db_id: Oid,\n    pub src_tablespace_id: Oid,\n}\n\n#[derive(Clone, Serialize, Deserialize)]\npub struct DbaseDrop {\n    pub db_id: Oid,\n    pub tablespace_ids: Vec<Oid>,\n}\n\n#[derive(Clone, Serialize, Deserialize)]\npub enum ClogRecord {\n    ZeroPage(ClogZeroPage),\n    Truncate(ClogTruncate),\n}\n\n#[derive(Clone, Serialize, Deserialize)]\npub struct ClogZeroPage {\n    pub segno: u32,\n    pub rpageno: u32,\n}\n\n#[derive(Clone, Serialize, Deserialize)]\npub struct ClogTruncate {\n    pub pageno: u32,\n    pub oldest_xid: TransactionId,\n    pub oldest_xid_db: Oid,\n}\n\n#[derive(Clone, Serialize, Deserialize)]\npub enum XactRecord {\n    Commit(XactCommon),\n    Abort(XactCommon),\n    CommitPrepared(XactCommon),\n    AbortPrepared(XactCommon),\n    Prepare(XactPrepare),\n}\n\n#[derive(Clone, Serialize, Deserialize)]\npub struct XactCommon {\n    pub parsed: XlXactParsedRecord,\n    pub origin_id: u16,\n    // Fields below are only used for logging\n    pub xl_xid: TransactionId,\n    pub lsn: Lsn,\n}\n\n#[derive(Clone, Serialize, Deserialize)]\npub struct XactPrepare {\n    pub xl_xid: TransactionId,\n    pub data: Bytes,\n}\n\n#[derive(Clone, Serialize, Deserialize)]\npub enum MultiXactRecord {\n    ZeroPage(MultiXactZeroPage),\n    Create(XlMultiXactCreate),\n    Truncate(XlMultiXactTruncate),\n}\n\n#[derive(Clone, Serialize, Deserialize)]\npub struct MultiXactZeroPage {\n    pub slru_kind: SlruKind,\n    pub segno: u32,\n    pub rpageno: u32,\n}\n\n#[derive(Clone, Serialize, Deserialize)]\npub enum RelmapRecord {\n    Update(RelmapUpdate),\n}\n\n#[derive(Clone, Serialize, Deserialize)]\npub struct RelmapUpdate {\n    pub update: XlRelmapUpdate,\n    pub buf: Bytes,\n}\n\n#[derive(Clone, Serialize, Deserialize)]\npub enum XlogRecord {\n    Raw(RawXlogRecord),\n}\n\n#[derive(Clone, Serialize, Deserialize)]\npub struct RawXlogRecord {\n    pub info: u8,\n    pub lsn: Lsn,\n    pub buf: Bytes,\n}\n\n#[derive(Clone, Serialize, Deserialize)]\npub enum LogicalMessageRecord {\n    Put(PutLogicalMessage),\n    #[cfg(feature = \"testing\")]\n    Failpoint,\n}\n\n#[derive(Clone, Serialize, Deserialize)]\npub struct PutLogicalMessage {\n    pub path: String,\n    pub buf: Bytes,\n}\n\n#[derive(Clone, Serialize, Deserialize)]\npub enum StandbyRecord {\n    RunningXacts(StandbyRunningXacts),\n}\n\n#[derive(Clone, Serialize, Deserialize)]\npub struct StandbyRunningXacts {\n    pub oldest_running_xid: TransactionId,\n}\n\n#[derive(Clone, Serialize, Deserialize)]\npub enum ReploriginRecord {\n    Set(XlReploriginSet),\n    Drop(XlReploriginDrop),\n}\n"
  },
  {
    "path": "libs/wal_decoder/src/serialized_batch.rs",
    "content": "//! This module implements batch type for serialized [`crate::models::value::Value`]\n//! instances. Each batch contains a raw buffer (serialized values)\n//! and a list of metadata for each (key, LSN) tuple present in the batch.\n//!\n//! Such batches are created from decoded PG wal records and ingested\n//! by the pageserver by writing directly to the ephemeral file.\n\nuse std::collections::{BTreeSet, HashMap};\n\nuse bytes::{Bytes, BytesMut};\nuse pageserver_api::key::{CompactKey, Key, rel_block_to_key};\nuse pageserver_api::keyspace::KeySpace;\nuse pageserver_api::reltag::RelTag;\nuse pageserver_api::shard::ShardIdentity;\nuse postgres_ffi::walrecord::{DecodedBkpBlock, DecodedWALRecord};\nuse postgres_ffi::{BLCKSZ, PgMajorVersion, page_is_new, page_set_lsn, pg_constants};\nuse serde::{Deserialize, Serialize};\nuse utils::bin_ser::BeSer;\nuse utils::lsn::Lsn;\n\nuse crate::models::InterpretedWalRecord;\nuse crate::models::record::NeonWalRecord;\nuse crate::models::value::Value;\n\nstatic ZERO_PAGE: Bytes = Bytes::from_static(&[0u8; BLCKSZ as usize]);\n\n/// Accompanying metadata for the batch\n/// A value may be serialized and stored into the batch or just \"observed\".\n/// Shard 0 currently \"observes\" all values in order to accurately track\n/// relation sizes. In the case of \"observed\" values, we only need to know\n/// the key and LSN, so two types of metadata are supported to save on network\n/// bandwidth.\n#[derive(Serialize, Deserialize, Clone)]\npub enum ValueMeta {\n    Serialized(SerializedValueMeta),\n    Observed(ObservedValueMeta),\n}\n\nimpl ValueMeta {\n    pub fn key(&self) -> CompactKey {\n        match self {\n            Self::Serialized(ser) => ser.key,\n            Self::Observed(obs) => obs.key,\n        }\n    }\n\n    pub fn lsn(&self) -> Lsn {\n        match self {\n            Self::Serialized(ser) => ser.lsn,\n            Self::Observed(obs) => obs.lsn,\n        }\n    }\n}\n\n/// Wrapper around [`ValueMeta`] that implements ordering by\n/// (key, LSN) tuples\nstruct OrderedValueMeta(ValueMeta);\n\nimpl Ord for OrderedValueMeta {\n    fn cmp(&self, other: &Self) -> std::cmp::Ordering {\n        (self.0.key(), self.0.lsn()).cmp(&(other.0.key(), other.0.lsn()))\n    }\n}\n\nimpl PartialOrd for OrderedValueMeta {\n    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {\n        Some(self.cmp(other))\n    }\n}\n\nimpl PartialEq for OrderedValueMeta {\n    fn eq(&self, other: &Self) -> bool {\n        (self.0.key(), self.0.lsn()) == (other.0.key(), other.0.lsn())\n    }\n}\n\nimpl Eq for OrderedValueMeta {}\n\n/// Metadata for a [`Value`] serialized into the batch.\n#[derive(Serialize, Deserialize, Clone)]\npub struct SerializedValueMeta {\n    pub key: CompactKey,\n    pub lsn: Lsn,\n    /// Starting offset of the value for the (key, LSN) tuple\n    /// in [`SerializedValueBatch::raw`]\n    pub batch_offset: u64,\n    pub len: usize,\n    pub will_init: bool,\n}\n\n/// Metadata for a [`Value`] observed by the batch\n#[derive(Serialize, Deserialize, Clone)]\npub struct ObservedValueMeta {\n    pub key: CompactKey,\n    pub lsn: Lsn,\n}\n\n/// Batch of serialized [`Value`]s.\n#[derive(Serialize, Deserialize, Clone)]\npub struct SerializedValueBatch {\n    /// [`Value`]s serialized in EphemeralFile's native format,\n    /// ready for disk write by the pageserver\n    pub raw: Vec<u8>,\n\n    /// Metadata to make sense of the bytes in [`Self::raw`]\n    /// and represent \"observed\" values.\n    ///\n    /// Invariant: Metadata entries for any given key are ordered\n    /// by LSN. Note that entries for a key do not have to be contiguous.\n    pub metadata: Vec<ValueMeta>,\n\n    /// The highest LSN of any value in the batch\n    pub max_lsn: Lsn,\n\n    /// Number of values encoded by [`Self::raw`]\n    pub len: usize,\n}\n\nimpl Default for SerializedValueBatch {\n    fn default() -> Self {\n        Self {\n            raw: Default::default(),\n            metadata: Default::default(),\n            max_lsn: Lsn(0),\n            len: 0,\n        }\n    }\n}\n\nimpl SerializedValueBatch {\n    /// Populates the given `shard_records` with value batches from this WAL record, if any,\n    /// discarding those belonging to other shards.\n    ///\n    /// The batch will only contain values for keys targeting the specifiec\n    /// shard. Shard 0 is a special case, where any keys that don't belong to\n    /// it are \"observed\" by the batch (i.e. present in [`SerializedValueBatch::metadata`],\n    /// but absent from the raw buffer [`SerializedValueBatch::raw`]).\n    pub(crate) fn from_decoded_filtered(\n        decoded: DecodedWALRecord,\n        shard_records: &mut HashMap<ShardIdentity, InterpretedWalRecord>,\n        next_record_lsn: Lsn,\n        pg_version: PgMajorVersion,\n    ) -> anyhow::Result<()> {\n        // First determine how big the buffers need to be and allocate it up-front.\n        // This duplicates some of the work below, but it's empirically much faster.\n        for (shard, record) in shard_records.iter_mut() {\n            assert!(record.batch.is_empty());\n\n            let estimate = Self::estimate_buffer_size(&decoded, shard, pg_version);\n            record.batch.raw = Vec::with_capacity(estimate);\n        }\n\n        for blk in decoded.blocks.iter() {\n            let rel = RelTag {\n                spcnode: blk.rnode_spcnode,\n                dbnode: blk.rnode_dbnode,\n                relnode: blk.rnode_relnode,\n                forknum: blk.forknum,\n            };\n\n            let key = rel_block_to_key(rel, blk.blkno);\n\n            if !key.is_valid_key_on_write_path() {\n                anyhow::bail!(\n                    \"Unsupported key decoded at LSN {}: {}\",\n                    next_record_lsn,\n                    key\n                );\n            }\n\n            for (shard, record) in shard_records.iter_mut() {\n                let key_is_local = shard.is_key_local(&key);\n\n                tracing::debug!(\n                    lsn=%next_record_lsn,\n                    key=%key,\n                    \"ingest: shard decision {}\",\n                    if !key_is_local { \"drop\" } else { \"keep\" },\n                );\n\n                if !key_is_local {\n                    if shard.is_shard_zero() {\n                        // Shard 0 tracks relation sizes.  Although we will not store this block, we will observe\n                        // its blkno in case it implicitly extends a relation.\n                        record\n                            .batch\n                            .metadata\n                            .push(ValueMeta::Observed(ObservedValueMeta {\n                                key: key.to_compact(),\n                                lsn: next_record_lsn,\n                            }))\n                    }\n\n                    continue;\n                }\n\n                // Instead of storing full-page-image WAL record,\n                // it is better to store extracted image: we can skip wal-redo\n                // in this case. Also some FPI records may contain multiple (up to 32) pages,\n                // so them have to be copied multiple times.\n                //\n                let val = if Self::block_is_image(&decoded, blk, pg_version) {\n                    // Extract page image from FPI record\n                    let img_len = blk.bimg_len as usize;\n                    let img_offs = blk.bimg_offset as usize;\n                    let mut image = BytesMut::with_capacity(BLCKSZ as usize);\n                    // TODO(vlad): skip the copy\n                    image.extend_from_slice(&decoded.record[img_offs..img_offs + img_len]);\n\n                    if blk.hole_length != 0 {\n                        let tail = image.split_off(blk.hole_offset as usize);\n                        image.resize(image.len() + blk.hole_length as usize, 0u8);\n                        image.unsplit(tail);\n                    }\n                    //\n                    // Match the logic of XLogReadBufferForRedoExtended:\n                    // The page may be uninitialized. If so, we can't set the LSN because\n                    // that would corrupt the page.\n                    //\n                    if !page_is_new(&image) {\n                        page_set_lsn(&mut image, next_record_lsn)\n                    }\n                    assert_eq!(image.len(), BLCKSZ as usize);\n\n                    Value::Image(image.freeze())\n                } else {\n                    Value::WalRecord(NeonWalRecord::Postgres {\n                        will_init: blk.will_init || blk.apply_image,\n                        rec: decoded.record.clone(),\n                    })\n                };\n\n                let relative_off = record.batch.raw.len() as u64;\n\n                val.ser_into(&mut record.batch.raw)\n                    .expect(\"Writing into in-memory buffer is infallible\");\n\n                let val_ser_size = record.batch.raw.len() - relative_off as usize;\n\n                record\n                    .batch\n                    .metadata\n                    .push(ValueMeta::Serialized(SerializedValueMeta {\n                        key: key.to_compact(),\n                        lsn: next_record_lsn,\n                        batch_offset: relative_off,\n                        len: val_ser_size,\n                        will_init: val.will_init(),\n                    }));\n                record.batch.max_lsn = std::cmp::max(record.batch.max_lsn, next_record_lsn);\n                record.batch.len += 1;\n            }\n        }\n\n        if cfg!(any(debug_assertions, test)) {\n            // Validate that the batches are correct\n            for record in shard_records.values() {\n                record.batch.validate_lsn_order();\n            }\n        }\n\n        Ok(())\n    }\n\n    /// Look into the decoded PG WAL record and determine\n    /// roughly how large the buffer for serialized values needs to be.\n    fn estimate_buffer_size(\n        decoded: &DecodedWALRecord,\n        shard: &ShardIdentity,\n        pg_version: PgMajorVersion,\n    ) -> usize {\n        let mut estimate: usize = 0;\n\n        for blk in decoded.blocks.iter() {\n            let rel = RelTag {\n                spcnode: blk.rnode_spcnode,\n                dbnode: blk.rnode_dbnode,\n                relnode: blk.rnode_relnode,\n                forknum: blk.forknum,\n            };\n\n            let key = rel_block_to_key(rel, blk.blkno);\n\n            if !shard.is_key_local(&key) {\n                continue;\n            }\n\n            if Self::block_is_image(decoded, blk, pg_version) {\n                // 4 bytes for the Value::Image discriminator\n                // 8 bytes for encoding the size of the buffer\n                // BLCKSZ for the raw image\n                estimate += (4 + 8 + BLCKSZ) as usize;\n            } else {\n                // 4 bytes for the Value::WalRecord discriminator\n                // 4 bytes for the NeonWalRecord::Postgres discriminator\n                // 1 bytes for NeonWalRecord::Postgres::will_init\n                // 8 bytes for encoding the size of the buffer\n                // length of the raw record\n                estimate += 8 + 1 + 8 + decoded.record.len();\n            }\n        }\n\n        estimate\n    }\n\n    fn block_is_image(\n        decoded: &DecodedWALRecord,\n        blk: &DecodedBkpBlock,\n        pg_version: PgMajorVersion,\n    ) -> bool {\n        blk.apply_image\n            && blk.has_image\n            && decoded.xl_rmid == pg_constants::RM_XLOG_ID\n            && (decoded.xl_info == pg_constants::XLOG_FPI\n            || decoded.xl_info == pg_constants::XLOG_FPI_FOR_HINT)\n            // compression of WAL is not yet supported: fall back to storing the original WAL record\n            && !postgres_ffi::bkpimage_is_compressed(blk.bimg_info, pg_version)\n            // do not materialize null pages because them most likely be soon replaced with real data\n            && blk.bimg_len != 0\n    }\n\n    /// Encode a list of values and metadata into a serialized batch\n    ///\n    /// This is used by the pageserver ingest code to conveniently generate\n    /// batches for metadata writes.\n    pub fn from_values(batch: Vec<(CompactKey, Lsn, usize, Value)>) -> Self {\n        // Pre-allocate a big flat buffer to write into. This should be large but not huge: it is soft-limited in practice by\n        // [`crate::pgdatadir_mapping::DatadirModification::MAX_PENDING_BYTES`]\n        let buffer_size = batch.iter().map(|i| i.2).sum::<usize>();\n        let mut buf = Vec::<u8>::with_capacity(buffer_size);\n\n        let mut metadata: Vec<ValueMeta> = Vec::with_capacity(batch.len());\n        let mut max_lsn: Lsn = Lsn(0);\n        let len = batch.len();\n        for (key, lsn, val_ser_size, val) in batch {\n            let relative_off = buf.len() as u64;\n\n            val.ser_into(&mut buf)\n                .expect(\"Writing into in-memory buffer is infallible\");\n\n            metadata.push(ValueMeta::Serialized(SerializedValueMeta {\n                key,\n                lsn,\n                batch_offset: relative_off,\n                len: val_ser_size,\n                will_init: val.will_init(),\n            }));\n            max_lsn = std::cmp::max(max_lsn, lsn);\n        }\n\n        // Assert that we didn't do any extra allocations while building buffer.\n        debug_assert!(buf.len() <= buffer_size);\n\n        if cfg!(any(debug_assertions, test)) {\n            let batch = Self {\n                raw: buf,\n                metadata,\n                max_lsn,\n                len,\n            };\n\n            batch.validate_lsn_order();\n\n            return batch;\n        }\n\n        Self {\n            raw: buf,\n            metadata,\n            max_lsn,\n            len,\n        }\n    }\n\n    /// Add one value to the batch\n    ///\n    /// This is used by the pageserver ingest code to include metadata block\n    /// updates for a single key.\n    pub fn put(&mut self, key: CompactKey, value: Value, lsn: Lsn) {\n        let relative_off = self.raw.len() as u64;\n        value.ser_into(&mut self.raw).unwrap();\n\n        let val_ser_size = self.raw.len() - relative_off as usize;\n        self.metadata\n            .push(ValueMeta::Serialized(SerializedValueMeta {\n                key,\n                lsn,\n                batch_offset: relative_off,\n                len: val_ser_size,\n                will_init: value.will_init(),\n            }));\n\n        self.max_lsn = std::cmp::max(self.max_lsn, lsn);\n        self.len += 1;\n\n        if cfg!(any(debug_assertions, test)) {\n            self.validate_lsn_order();\n        }\n    }\n\n    /// Extend with the contents of another batch\n    ///\n    /// One batch is generated for each decoded PG WAL record.\n    /// They are then merged to accumulate reasonably sized writes.\n    pub fn extend(&mut self, mut other: SerializedValueBatch) {\n        let extend_batch_start_offset = self.raw.len() as u64;\n\n        self.raw.extend(other.raw);\n\n        // Shift the offsets in the batch we are extending with\n        other.metadata.iter_mut().for_each(|meta| match meta {\n            ValueMeta::Serialized(ser) => {\n                ser.batch_offset += extend_batch_start_offset;\n                if cfg!(debug_assertions) {\n                    let value_end = ser.batch_offset + ser.len as u64;\n                    assert!((value_end as usize) <= self.raw.len());\n                }\n            }\n            ValueMeta::Observed(_) => {}\n        });\n        self.metadata.extend(other.metadata);\n\n        self.max_lsn = std::cmp::max(self.max_lsn, other.max_lsn);\n\n        self.len += other.len;\n\n        if cfg!(any(debug_assertions, test)) {\n            self.validate_lsn_order();\n        }\n    }\n\n    /// Add zero images for the (key, LSN) tuples specified\n    ///\n    /// PG versions below 16 do not zero out pages before extending\n    /// a relation and may leave gaps. Such gaps need to be identified\n    /// by the pageserver ingest logic and get patched up here.\n    ///\n    /// Note that this function does not validate that the gaps have been\n    /// identified correctly (it does not know relation sizes), so it's up\n    /// to the call-site to do it properly.\n    pub fn zero_gaps(&mut self, gaps: Vec<(KeySpace, Lsn)>) {\n        // Implementation note:\n        //\n        // Values within [`SerializedValueBatch::raw`] do not have any ordering requirements,\n        // but the metadata entries should be ordered properly (see\n        // [`SerializedValueBatch::metadata`]).\n        //\n        // Exploiting this observation we do:\n        // 1. Drain all the metadata entries into an ordered set.\n        // The use of a BTreeSet keyed by (Key, Lsn) relies on the observation that Postgres never\n        // includes more than one update to the same block in the same WAL record.\n        // 2. For each (key, LSN) gap tuple, append a zero image to the raw buffer\n        // and add an index entry to the ordered metadata set.\n        // 3. Drain the ordered set back into a metadata vector\n\n        let mut ordered_metas = self\n            .metadata\n            .drain(..)\n            .map(OrderedValueMeta)\n            .collect::<BTreeSet<_>>();\n        for (keyspace, lsn) in gaps {\n            self.max_lsn = std::cmp::max(self.max_lsn, lsn);\n\n            for gap_range in keyspace.ranges {\n                let mut key = gap_range.start;\n                while key != gap_range.end {\n                    let relative_off = self.raw.len() as u64;\n\n                    // TODO(vlad): Can we be cheeky and write only one zero image, and\n                    // make all index entries requiring a zero page point to it?\n                    // Alternatively, we can change the index entry format to represent zero pages\n                    // without writing them at all.\n                    Value::Image(ZERO_PAGE.clone())\n                        .ser_into(&mut self.raw)\n                        .unwrap();\n                    let val_ser_size = self.raw.len() - relative_off as usize;\n\n                    ordered_metas.insert(OrderedValueMeta(ValueMeta::Serialized(\n                        SerializedValueMeta {\n                            key: key.to_compact(),\n                            lsn,\n                            batch_offset: relative_off,\n                            len: val_ser_size,\n                            will_init: true,\n                        },\n                    )));\n\n                    self.len += 1;\n\n                    key = key.next();\n                }\n            }\n        }\n\n        self.metadata = ordered_metas.into_iter().map(|ord| ord.0).collect();\n\n        if cfg!(any(debug_assertions, test)) {\n            self.validate_lsn_order();\n        }\n    }\n\n    /// Checks if the batch contains any serialized or observed values\n    pub fn is_empty(&self) -> bool {\n        !self.has_data() && self.metadata.is_empty()\n    }\n\n    /// Checks if the batch contains only observed values\n    pub fn is_observed(&self) -> bool {\n        !self.has_data() && !self.metadata.is_empty()\n    }\n\n    /// Checks if the batch contains data\n    ///\n    /// Note that if this returns false, it may still contain observed values or\n    /// a metadata record.\n    pub fn has_data(&self) -> bool {\n        let empty = self.raw.is_empty();\n\n        if cfg!(debug_assertions) && empty {\n            assert!(\n                self.metadata\n                    .iter()\n                    .all(|meta| matches!(meta, ValueMeta::Observed(_)))\n            );\n        }\n\n        !empty\n    }\n\n    /// Returns the number of values serialized in the batch\n    pub fn len(&self) -> usize {\n        self.len\n    }\n\n    /// Returns the size of the buffer wrapped by the batch\n    pub fn buffer_size(&self) -> usize {\n        self.raw.len()\n    }\n\n    pub fn updates_key(&self, key: &Key) -> bool {\n        self.metadata.iter().any(|meta| match meta {\n            ValueMeta::Serialized(ser) => key.to_compact() == ser.key,\n            ValueMeta::Observed(_) => false,\n        })\n    }\n\n    pub fn validate_lsn_order(&self) {\n        use std::collections::HashMap;\n\n        let mut last_seen_lsn_per_key: HashMap<CompactKey, Lsn> = HashMap::default();\n\n        for meta in self.metadata.iter() {\n            let lsn = meta.lsn();\n            let key = meta.key();\n\n            if let Some(prev_lsn) = last_seen_lsn_per_key.insert(key, lsn) {\n                assert!(\n                    lsn >= prev_lsn,\n                    \"Ordering violated by {}: {} < {}\",\n                    Key::from_compact(key),\n                    lsn,\n                    prev_lsn\n                );\n            }\n        }\n    }\n}\n\n#[cfg(all(test, feature = \"testing\"))]\nmod tests {\n    use super::*;\n\n    fn validate_batch(\n        batch: &SerializedValueBatch,\n        values: &[(CompactKey, Lsn, usize, Value)],\n        gaps: Option<&Vec<(KeySpace, Lsn)>>,\n    ) {\n        // Invariant 1: The metadata for a given entry in the batch\n        // is correct and can be used to deserialize back to the original value.\n        for (key, lsn, size, value) in values.iter() {\n            let meta = batch\n                .metadata\n                .iter()\n                .find(|meta| (meta.key(), meta.lsn()) == (*key, *lsn))\n                .unwrap();\n            let meta = match meta {\n                ValueMeta::Serialized(ser) => ser,\n                ValueMeta::Observed(_) => unreachable!(),\n            };\n\n            assert_eq!(meta.len, *size);\n            assert_eq!(meta.will_init, value.will_init());\n\n            let start = meta.batch_offset as usize;\n            let end = meta.batch_offset as usize + meta.len;\n            let value_from_batch = Value::des(&batch.raw[start..end]).unwrap();\n            assert_eq!(&value_from_batch, value);\n        }\n\n        let mut expected_buffer_size: usize = values.iter().map(|(_, _, size, _)| size).sum();\n        let mut gap_pages_count: usize = 0;\n\n        // Invariant 2: Zero pages were added for identified gaps and their metadata\n        // is correct.\n        if let Some(gaps) = gaps {\n            for (gap_keyspace, lsn) in gaps {\n                for gap_range in &gap_keyspace.ranges {\n                    let mut gap_key = gap_range.start;\n                    while gap_key != gap_range.end {\n                        let meta = batch\n                            .metadata\n                            .iter()\n                            .find(|meta| (meta.key(), meta.lsn()) == (gap_key.to_compact(), *lsn))\n                            .unwrap();\n                        let meta = match meta {\n                            ValueMeta::Serialized(ser) => ser,\n                            ValueMeta::Observed(_) => unreachable!(),\n                        };\n\n                        let zero_value = Value::Image(ZERO_PAGE.clone());\n                        let zero_value_size = zero_value.serialized_size().unwrap() as usize;\n\n                        assert_eq!(meta.len, zero_value_size);\n                        assert_eq!(meta.will_init, zero_value.will_init());\n\n                        let start = meta.batch_offset as usize;\n                        let end = meta.batch_offset as usize + meta.len;\n                        let value_from_batch = Value::des(&batch.raw[start..end]).unwrap();\n                        assert_eq!(value_from_batch, zero_value);\n\n                        gap_pages_count += 1;\n                        expected_buffer_size += zero_value_size;\n                        gap_key = gap_key.next();\n                    }\n                }\n            }\n        }\n\n        // Invariant 3: The length of the batch is equal to the number\n        // of values inserted, plus the number of gap pages. This extends\n        // to the raw buffer size.\n        assert_eq!(batch.len(), values.len() + gap_pages_count);\n        assert_eq!(expected_buffer_size, batch.buffer_size());\n\n        // Invariant 4: Metadata entries for any given key are sorted in LSN order.\n        batch.validate_lsn_order();\n    }\n\n    #[test]\n    fn test_creation_from_values() {\n        const LSN: Lsn = Lsn(0x10);\n        let key = Key::from_hex(\"110000000033333333444444445500000001\").unwrap();\n\n        let values = vec![\n            (\n                key.to_compact(),\n                LSN,\n                Value::WalRecord(NeonWalRecord::wal_append(\"foo\")),\n            ),\n            (\n                key.next().to_compact(),\n                LSN,\n                Value::WalRecord(NeonWalRecord::wal_append(\"bar\")),\n            ),\n            (\n                key.to_compact(),\n                Lsn(LSN.0 + 0x10),\n                Value::WalRecord(NeonWalRecord::wal_append(\"baz\")),\n            ),\n            (\n                key.next().next().to_compact(),\n                LSN,\n                Value::WalRecord(NeonWalRecord::wal_append(\"taz\")),\n            ),\n        ];\n\n        let values = values\n            .into_iter()\n            .map(|(key, lsn, value)| (key, lsn, value.serialized_size().unwrap() as usize, value))\n            .collect::<Vec<_>>();\n        let batch = SerializedValueBatch::from_values(values.clone());\n\n        validate_batch(&batch, &values, None);\n\n        assert!(!batch.is_empty());\n    }\n\n    #[test]\n    fn test_put() {\n        const LSN: Lsn = Lsn(0x10);\n        let key = Key::from_hex(\"110000000033333333444444445500000001\").unwrap();\n\n        let values = vec![\n            (\n                key.to_compact(),\n                LSN,\n                Value::WalRecord(NeonWalRecord::wal_append(\"foo\")),\n            ),\n            (\n                key.next().to_compact(),\n                LSN,\n                Value::WalRecord(NeonWalRecord::wal_append(\"bar\")),\n            ),\n        ];\n\n        let mut values = values\n            .into_iter()\n            .map(|(key, lsn, value)| (key, lsn, value.serialized_size().unwrap() as usize, value))\n            .collect::<Vec<_>>();\n        let mut batch = SerializedValueBatch::from_values(values.clone());\n\n        validate_batch(&batch, &values, None);\n\n        let value = (\n            key.to_compact(),\n            Lsn(LSN.0 + 0x10),\n            Value::WalRecord(NeonWalRecord::wal_append(\"baz\")),\n        );\n        let serialized_size = value.2.serialized_size().unwrap() as usize;\n        let value = (value.0, value.1, serialized_size, value.2);\n        values.push(value.clone());\n        batch.put(value.0, value.3, value.1);\n\n        validate_batch(&batch, &values, None);\n\n        let value = (\n            key.next().next().to_compact(),\n            LSN,\n            Value::WalRecord(NeonWalRecord::wal_append(\"taz\")),\n        );\n        let serialized_size = value.2.serialized_size().unwrap() as usize;\n        let value = (value.0, value.1, serialized_size, value.2);\n        values.push(value.clone());\n        batch.put(value.0, value.3, value.1);\n\n        validate_batch(&batch, &values, None);\n    }\n\n    #[test]\n    fn test_extension() {\n        const LSN: Lsn = Lsn(0x10);\n        let key = Key::from_hex(\"110000000033333333444444445500000001\").unwrap();\n\n        let values = vec![\n            (\n                key.to_compact(),\n                LSN,\n                Value::WalRecord(NeonWalRecord::wal_append(\"foo\")),\n            ),\n            (\n                key.next().to_compact(),\n                LSN,\n                Value::WalRecord(NeonWalRecord::wal_append(\"bar\")),\n            ),\n            (\n                key.next().next().to_compact(),\n                LSN,\n                Value::WalRecord(NeonWalRecord::wal_append(\"taz\")),\n            ),\n        ];\n\n        let mut values = values\n            .into_iter()\n            .map(|(key, lsn, value)| (key, lsn, value.serialized_size().unwrap() as usize, value))\n            .collect::<Vec<_>>();\n        let mut batch = SerializedValueBatch::from_values(values.clone());\n\n        let other_values = vec![\n            (\n                key.to_compact(),\n                Lsn(LSN.0 + 0x10),\n                Value::WalRecord(NeonWalRecord::wal_append(\"foo\")),\n            ),\n            (\n                key.next().to_compact(),\n                Lsn(LSN.0 + 0x10),\n                Value::WalRecord(NeonWalRecord::wal_append(\"bar\")),\n            ),\n            (\n                key.next().next().to_compact(),\n                Lsn(LSN.0 + 0x10),\n                Value::WalRecord(NeonWalRecord::wal_append(\"taz\")),\n            ),\n        ];\n\n        let other_values = other_values\n            .into_iter()\n            .map(|(key, lsn, value)| (key, lsn, value.serialized_size().unwrap() as usize, value))\n            .collect::<Vec<_>>();\n        let other_batch = SerializedValueBatch::from_values(other_values.clone());\n\n        values.extend(other_values);\n        batch.extend(other_batch);\n\n        validate_batch(&batch, &values, None);\n    }\n\n    #[test]\n    fn test_gap_zeroing() {\n        const LSN: Lsn = Lsn(0x10);\n        let rel_foo_base_key = Key::from_hex(\"110000000033333333444444445500000001\").unwrap();\n\n        let rel_bar_base_key = {\n            let mut key = rel_foo_base_key;\n            key.field4 += 1;\n            key\n        };\n\n        let values = vec![\n            (\n                rel_foo_base_key.to_compact(),\n                LSN,\n                Value::WalRecord(NeonWalRecord::wal_append(\"foo1\")),\n            ),\n            (\n                rel_foo_base_key.add(1).to_compact(),\n                LSN,\n                Value::WalRecord(NeonWalRecord::wal_append(\"foo2\")),\n            ),\n            (\n                rel_foo_base_key.add(5).to_compact(),\n                LSN,\n                Value::WalRecord(NeonWalRecord::wal_append(\"foo3\")),\n            ),\n            (\n                rel_foo_base_key.add(1).to_compact(),\n                Lsn(LSN.0 + 0x10),\n                Value::WalRecord(NeonWalRecord::wal_append(\"foo4\")),\n            ),\n            (\n                rel_foo_base_key.add(10).to_compact(),\n                Lsn(LSN.0 + 0x10),\n                Value::WalRecord(NeonWalRecord::wal_append(\"foo5\")),\n            ),\n            (\n                rel_foo_base_key.add(11).to_compact(),\n                Lsn(LSN.0 + 0x10),\n                Value::WalRecord(NeonWalRecord::wal_append(\"foo6\")),\n            ),\n            (\n                rel_foo_base_key.add(12).to_compact(),\n                Lsn(LSN.0 + 0x10),\n                Value::WalRecord(NeonWalRecord::wal_append(\"foo7\")),\n            ),\n            (\n                rel_bar_base_key.to_compact(),\n                LSN,\n                Value::WalRecord(NeonWalRecord::wal_append(\"bar1\")),\n            ),\n            (\n                rel_bar_base_key.add(4).to_compact(),\n                LSN,\n                Value::WalRecord(NeonWalRecord::wal_append(\"bar2\")),\n            ),\n        ];\n\n        let values = values\n            .into_iter()\n            .map(|(key, lsn, value)| (key, lsn, value.serialized_size().unwrap() as usize, value))\n            .collect::<Vec<_>>();\n\n        let mut batch = SerializedValueBatch::from_values(values.clone());\n\n        let gaps = vec![\n            (\n                KeySpace {\n                    ranges: vec![\n                        rel_foo_base_key.add(2)..rel_foo_base_key.add(5),\n                        rel_bar_base_key.add(1)..rel_bar_base_key.add(4),\n                    ],\n                },\n                LSN,\n            ),\n            (\n                KeySpace {\n                    ranges: vec![rel_foo_base_key.add(6)..rel_foo_base_key.add(10)],\n                },\n                Lsn(LSN.0 + 0x10),\n            ),\n        ];\n\n        batch.zero_gaps(gaps.clone());\n        validate_batch(&batch, &values, Some(&gaps));\n    }\n}\n"
  },
  {
    "path": "libs/wal_decoder/src/wire_format.rs",
    "content": "use bytes::{BufMut, Bytes, BytesMut};\nuse pageserver_api::key::CompactKey;\nuse prost::{DecodeError, EncodeError, Message};\nuse tokio::io::AsyncWriteExt;\nuse utils::bin_ser::{BeSer, DeserializeError, SerializeError};\nuse utils::lsn::Lsn;\nuse utils::postgres_client::{Compression, InterpretedFormat};\n\nuse crate::models::{\n    FlushUncommittedRecords, InterpretedWalRecord, InterpretedWalRecords, MetadataRecord, proto,\n};\nuse crate::serialized_batch::{\n    ObservedValueMeta, SerializedValueBatch, SerializedValueMeta, ValueMeta,\n};\n\n#[derive(Debug, thiserror::Error)]\npub enum ToWireFormatError {\n    #[error(\"{0}\")]\n    Bincode(#[from] SerializeError),\n    #[error(\"{0}\")]\n    Protobuf(#[from] ProtobufSerializeError),\n    #[error(\"{0}\")]\n    Compression(#[from] std::io::Error),\n}\n\n#[derive(Debug, thiserror::Error)]\npub enum ProtobufSerializeError {\n    #[error(\"{0}\")]\n    MetadataRecord(#[from] SerializeError),\n    #[error(\"{0}\")]\n    Encode(#[from] EncodeError),\n}\n\n#[derive(Debug, thiserror::Error)]\npub enum FromWireFormatError {\n    #[error(\"{0}\")]\n    Bincode(#[from] DeserializeError),\n    #[error(\"{0}\")]\n    Protobuf(#[from] ProtobufDeserializeError),\n    #[error(\"{0}\")]\n    Decompress(#[from] std::io::Error),\n}\n\n#[derive(Debug, thiserror::Error)]\npub enum ProtobufDeserializeError {\n    #[error(\"{0}\")]\n    Transcode(#[from] TranscodeError),\n    #[error(\"{0}\")]\n    Decode(#[from] DecodeError),\n}\n\n#[derive(Debug, thiserror::Error)]\npub enum TranscodeError {\n    #[error(\"{0}\")]\n    BadInput(String),\n    #[error(\"{0}\")]\n    MetadataRecord(#[from] DeserializeError),\n}\n\npub trait ToWireFormat {\n    fn to_wire(\n        self,\n        format: InterpretedFormat,\n        compression: Option<Compression>,\n    ) -> impl std::future::Future<Output = Result<Bytes, ToWireFormatError>> + Send;\n}\n\npub trait FromWireFormat {\n    type T;\n    fn from_wire(\n        buf: &Bytes,\n        format: InterpretedFormat,\n        compression: Option<Compression>,\n    ) -> impl std::future::Future<Output = Result<Self::T, FromWireFormatError>> + Send;\n}\n\nimpl ToWireFormat for InterpretedWalRecords {\n    async fn to_wire(\n        self,\n        format: InterpretedFormat,\n        compression: Option<Compression>,\n    ) -> Result<Bytes, ToWireFormatError> {\n        use async_compression::Level;\n        use async_compression::tokio::write::ZstdEncoder;\n\n        let encode_res: Result<Bytes, ToWireFormatError> = match format {\n            InterpretedFormat::Bincode => {\n                let buf = BytesMut::new();\n                let mut buf = buf.writer();\n                self.ser_into(&mut buf)?;\n                Ok(buf.into_inner().freeze())\n            }\n            InterpretedFormat::Protobuf => {\n                let proto: proto::InterpretedWalRecords = self.try_into()?;\n                let mut buf = BytesMut::new();\n                proto\n                    .encode(&mut buf)\n                    .map_err(|e| ToWireFormatError::Protobuf(e.into()))?;\n\n                Ok(buf.freeze())\n            }\n        };\n\n        let buf = encode_res?;\n        let compressed_buf = match compression {\n            Some(Compression::Zstd { level }) => {\n                let mut encoder = ZstdEncoder::with_quality(\n                    Vec::with_capacity(buf.len() / 4),\n                    Level::Precise(level as i32),\n                );\n                encoder.write_all(&buf).await?;\n                encoder.shutdown().await?;\n                Bytes::from(encoder.into_inner())\n            }\n            None => buf,\n        };\n\n        Ok(compressed_buf)\n    }\n}\n\nimpl FromWireFormat for InterpretedWalRecords {\n    type T = Self;\n\n    async fn from_wire(\n        buf: &Bytes,\n        format: InterpretedFormat,\n        compression: Option<Compression>,\n    ) -> Result<Self, FromWireFormatError> {\n        let decompressed_buf = match compression {\n            Some(Compression::Zstd { .. }) => {\n                use async_compression::tokio::write::ZstdDecoder;\n                let mut decoded_buf = Vec::with_capacity(buf.len());\n                let mut decoder = ZstdDecoder::new(&mut decoded_buf);\n                decoder.write_all(buf).await?;\n                decoder.flush().await?;\n                Bytes::from(decoded_buf)\n            }\n            None => buf.clone(),\n        };\n\n        match format {\n            InterpretedFormat::Bincode => {\n                InterpretedWalRecords::des(&decompressed_buf).map_err(FromWireFormatError::Bincode)\n            }\n            InterpretedFormat::Protobuf => {\n                let proto = proto::InterpretedWalRecords::decode(decompressed_buf)\n                    .map_err(|e| FromWireFormatError::Protobuf(e.into()))?;\n                InterpretedWalRecords::try_from(proto)\n                    .map_err(|e| FromWireFormatError::Protobuf(e.into()))\n            }\n        }\n    }\n}\n\nimpl TryFrom<InterpretedWalRecords> for proto::InterpretedWalRecords {\n    type Error = SerializeError;\n\n    fn try_from(value: InterpretedWalRecords) -> Result<Self, Self::Error> {\n        let records = value\n            .records\n            .into_iter()\n            .map(proto::InterpretedWalRecord::try_from)\n            .collect::<Result<Vec<_>, _>>()?;\n        Ok(proto::InterpretedWalRecords {\n            records,\n            next_record_lsn: Some(value.next_record_lsn.0),\n            raw_wal_start_lsn: value.raw_wal_start_lsn.map(|l| l.0),\n        })\n    }\n}\n\nimpl TryFrom<InterpretedWalRecord> for proto::InterpretedWalRecord {\n    type Error = SerializeError;\n\n    fn try_from(value: InterpretedWalRecord) -> Result<Self, Self::Error> {\n        let metadata_record = value\n            .metadata_record\n            .map(|meta_rec| -> Result<Vec<u8>, Self::Error> {\n                let mut buf = Vec::new();\n                meta_rec.ser_into(&mut buf)?;\n                Ok(buf)\n            })\n            .transpose()?;\n\n        Ok(proto::InterpretedWalRecord {\n            metadata_record,\n            batch: Some(proto::SerializedValueBatch::from(value.batch)),\n            next_record_lsn: value.next_record_lsn.0,\n            flush_uncommitted: matches!(value.flush_uncommitted, FlushUncommittedRecords::Yes),\n            xid: value.xid,\n        })\n    }\n}\n\nimpl From<SerializedValueBatch> for proto::SerializedValueBatch {\n    fn from(value: SerializedValueBatch) -> Self {\n        proto::SerializedValueBatch {\n            raw: value.raw,\n            metadata: value\n                .metadata\n                .into_iter()\n                .map(proto::ValueMeta::from)\n                .collect(),\n            max_lsn: value.max_lsn.0,\n            len: value.len as u64,\n        }\n    }\n}\n\nimpl From<ValueMeta> for proto::ValueMeta {\n    fn from(value: ValueMeta) -> Self {\n        match value {\n            ValueMeta::Observed(obs) => proto::ValueMeta {\n                r#type: proto::ValueMetaType::Observed.into(),\n                key: Some(proto::CompactKey::from(obs.key)),\n                lsn: obs.lsn.0,\n                batch_offset: None,\n                len: None,\n                will_init: None,\n            },\n            ValueMeta::Serialized(ser) => proto::ValueMeta {\n                r#type: proto::ValueMetaType::Serialized.into(),\n                key: Some(proto::CompactKey::from(ser.key)),\n                lsn: ser.lsn.0,\n                batch_offset: Some(ser.batch_offset),\n                len: Some(ser.len as u64),\n                will_init: Some(ser.will_init),\n            },\n        }\n    }\n}\n\nimpl From<CompactKey> for proto::CompactKey {\n    fn from(value: CompactKey) -> Self {\n        proto::CompactKey {\n            high: (value.raw() >> 64) as u64,\n            low: value.raw() as u64,\n        }\n    }\n}\n\nimpl TryFrom<proto::InterpretedWalRecords> for InterpretedWalRecords {\n    type Error = TranscodeError;\n\n    fn try_from(value: proto::InterpretedWalRecords) -> Result<Self, Self::Error> {\n        let records = value\n            .records\n            .into_iter()\n            .map(InterpretedWalRecord::try_from)\n            .collect::<Result<_, _>>()?;\n\n        Ok(InterpretedWalRecords {\n            records,\n            next_record_lsn: value\n                .next_record_lsn\n                .map(Lsn::from)\n                .expect(\"Always provided\"),\n            raw_wal_start_lsn: value.raw_wal_start_lsn.map(Lsn::from),\n        })\n    }\n}\n\nimpl TryFrom<proto::InterpretedWalRecord> for InterpretedWalRecord {\n    type Error = TranscodeError;\n\n    fn try_from(value: proto::InterpretedWalRecord) -> Result<Self, Self::Error> {\n        let metadata_record = value\n            .metadata_record\n            .map(|mrec| -> Result<_, DeserializeError> { MetadataRecord::des(&mrec) })\n            .transpose()?;\n\n        let batch = {\n            let batch = value.batch.ok_or_else(|| {\n                TranscodeError::BadInput(\"InterpretedWalRecord::batch missing\".to_string())\n            })?;\n\n            SerializedValueBatch::try_from(batch)?\n        };\n\n        Ok(InterpretedWalRecord {\n            metadata_record,\n            batch,\n            next_record_lsn: Lsn(value.next_record_lsn),\n            flush_uncommitted: if value.flush_uncommitted {\n                FlushUncommittedRecords::Yes\n            } else {\n                FlushUncommittedRecords::No\n            },\n            xid: value.xid,\n        })\n    }\n}\n\nimpl TryFrom<proto::SerializedValueBatch> for SerializedValueBatch {\n    type Error = TranscodeError;\n\n    fn try_from(value: proto::SerializedValueBatch) -> Result<Self, Self::Error> {\n        let metadata = value\n            .metadata\n            .into_iter()\n            .map(ValueMeta::try_from)\n            .collect::<Result<Vec<_>, _>>()?;\n\n        Ok(SerializedValueBatch {\n            raw: value.raw,\n            metadata,\n            max_lsn: Lsn(value.max_lsn),\n            len: value.len as usize,\n        })\n    }\n}\n\nimpl TryFrom<proto::ValueMeta> for ValueMeta {\n    type Error = TranscodeError;\n\n    fn try_from(value: proto::ValueMeta) -> Result<Self, Self::Error> {\n        match proto::ValueMetaType::try_from(value.r#type) {\n            Ok(proto::ValueMetaType::Serialized) => {\n                Ok(ValueMeta::Serialized(SerializedValueMeta {\n                    key: value\n                        .key\n                        .ok_or_else(|| {\n                            TranscodeError::BadInput(\"ValueMeta::key missing\".to_string())\n                        })?\n                        .into(),\n                    lsn: Lsn(value.lsn),\n                    batch_offset: value.batch_offset.ok_or_else(|| {\n                        TranscodeError::BadInput(\"ValueMeta::batch_offset missing\".to_string())\n                    })?,\n                    len: value.len.ok_or_else(|| {\n                        TranscodeError::BadInput(\"ValueMeta::len missing\".to_string())\n                    })? as usize,\n                    will_init: value.will_init.ok_or_else(|| {\n                        TranscodeError::BadInput(\"ValueMeta::will_init missing\".to_string())\n                    })?,\n                }))\n            }\n            Ok(proto::ValueMetaType::Observed) => Ok(ValueMeta::Observed(ObservedValueMeta {\n                key: value\n                    .key\n                    .ok_or_else(|| TranscodeError::BadInput(\"ValueMeta::key missing\".to_string()))?\n                    .into(),\n                lsn: Lsn(value.lsn),\n            })),\n            Err(_) => Err(TranscodeError::BadInput(format!(\n                \"Unexpected ValueMeta::type {}\",\n                value.r#type\n            ))),\n        }\n    }\n}\n\nimpl From<proto::CompactKey> for CompactKey {\n    fn from(value: proto::CompactKey) -> Self {\n        (((value.high as i128) << 64) | (value.low as i128)).into()\n    }\n}\n\n#[test]\nfn test_compact_key_with_large_relnode() {\n    use pageserver_api::key::Key;\n\n    let inputs = vec![\n        Key {\n            field1: 0,\n            field2: 0x100,\n            field3: 0x200,\n            field4: 0,\n            field5: 0x10,\n            field6: 0x5,\n        },\n        Key {\n            field1: 0,\n            field2: 0x100,\n            field3: 0x200,\n            field4: 0x007FFFFF,\n            field5: 0x10,\n            field6: 0x5,\n        },\n        Key {\n            field1: 0,\n            field2: 0x100,\n            field3: 0x200,\n            field4: 0x00800000,\n            field5: 0x10,\n            field6: 0x5,\n        },\n        Key {\n            field1: 0,\n            field2: 0x100,\n            field3: 0x200,\n            field4: 0x00800001,\n            field5: 0x10,\n            field6: 0x5,\n        },\n        Key {\n            field1: 0,\n            field2: 0xFFFFFFFF,\n            field3: 0xFFFFFFFF,\n            field4: 0xFFFFFFFF,\n            field5: 0x0,\n            field6: 0x0,\n        },\n    ];\n\n    for input in inputs {\n        assert!(input.is_valid_key_on_write_path());\n        let compact = input.to_compact();\n        let proto: proto::CompactKey = compact.into();\n        let from_proto: CompactKey = proto.into();\n\n        assert_eq!(\n            compact, from_proto,\n            \"Round trip failed for key with relnode={:#x}\",\n            input.field4\n        );\n    }\n}\n"
  },
  {
    "path": "libs/walproposer/Cargo.toml",
    "content": "[package]\nname = \"walproposer\"\nversion = \"0.1.0\"\nedition.workspace = true\nlicense.workspace = true\n\n[dependencies]\nanyhow.workspace = true\nutils.workspace = true\npostgres_ffi.workspace = true\n\n[build-dependencies]\nanyhow.workspace = true\nbindgen.workspace = true\n"
  },
  {
    "path": "libs/walproposer/bindgen_deps.h",
    "content": "#include \"postgres.h\"\n#include \"walproposer.h\"\n"
  },
  {
    "path": "libs/walproposer/build.rs",
    "content": "//! Links with walproposer, pgcommon, pgport and runs bindgen on walproposer.h\n//! to generate Rust bindings for it.\n\nuse std::env;\nuse std::path::PathBuf;\nuse std::process::Command;\n\nuse anyhow::{Context, anyhow};\n\nconst WALPROPOSER_PG_VERSION: &str = \"v17\";\n\nfn main() -> anyhow::Result<()> {\n    // Tell cargo to invalidate the built crate whenever the wrapper changes\n    println!(\"cargo:rerun-if-changed=bindgen_deps.h\");\n\n    let root_path = PathBuf::from(env!(\"CARGO_MANIFEST_DIR\")).join(\"../..\");\n\n    // Finding the location of built libraries and Postgres C headers:\n    // - if POSTGRES_INSTALL_DIR is set look into it, otherwise look into `<project_root>/pg_install`\n    // - if there's a `bin/pg_config` file use it for getting include server, otherwise use `<project_root>/pg_install/{PG_MAJORVERSION}/include/postgresql/server`\n    let pg_install_dir = if let Some(postgres_install_dir) = env::var_os(\"POSTGRES_INSTALL_DIR\") {\n        postgres_install_dir.into()\n    } else {\n        root_path.join(\"pg_install\")\n    };\n\n    let pg_install_abs = std::fs::canonicalize(pg_install_dir)?;\n    let walproposer_lib_dir = root_path.join(\"build/walproposer-lib\");\n    let walproposer_lib_search_str = walproposer_lib_dir\n        .to_str()\n        .ok_or(anyhow!(\"Bad non-UTF path\"))?;\n\n    let pgxn_neon = root_path.join(\"pgxn/neon\");\n    let pgxn_neon = std::fs::canonicalize(pgxn_neon)?;\n    let pgxn_neon = pgxn_neon.to_str().ok_or(anyhow!(\"Bad non-UTF path\"))?;\n\n    println!(\"cargo:rustc-link-lib=static=walproposer\");\n    println!(\"cargo:rustc-link-lib=static=pgport\");\n    println!(\"cargo:rustc-link-lib=static=pgcommon\");\n    println!(\"cargo:rustc-link-search={walproposer_lib_search_str}\");\n\n    // Rebuild crate when libwalproposer.a changes\n    println!(\"cargo:rerun-if-changed={walproposer_lib_search_str}/libwalproposer.a\");\n\n    let pg_config_bin = pg_install_abs\n        .join(WALPROPOSER_PG_VERSION)\n        .join(\"bin\")\n        .join(\"pg_config\");\n    let inc_server_path: String = if pg_config_bin.exists() {\n        let output = Command::new(pg_config_bin)\n            .arg(\"--includedir-server\")\n            .output()\n            .context(\"failed to execute `pg_config --includedir-server`\")?;\n\n        if !output.status.success() {\n            panic!(\"`pg_config --includedir-server` failed\")\n        }\n\n        String::from_utf8(output.stdout)\n            .context(\"pg_config output is not UTF-8\")?\n            .trim_end()\n            .into()\n    } else {\n        let server_path = pg_install_abs\n            .join(WALPROPOSER_PG_VERSION)\n            .join(\"include\")\n            .join(\"postgresql\")\n            .join(\"server\")\n            .into_os_string();\n        server_path\n            .into_string()\n            .map_err(|s| anyhow!(\"Bad postgres server path {s:?}\"))?\n    };\n\n    let unwind_abi_functions = [\n        \"log_internal\",\n        \"recovery_download\",\n        \"start_streaming\",\n        \"finish_sync_safekeepers\",\n        \"wait_event_set\",\n        \"WalProposerStart\",\n    ];\n\n    // The bindgen::Builder is the main entry point\n    // to bindgen, and lets you build up options for\n    // the resulting bindings.\n    let mut builder = bindgen::Builder::default()\n        // The input header we would like to generate\n        // bindings for.\n        .header(\"bindgen_deps.h\")\n        // Tell cargo to invalidate the built crate whenever any of the\n        // included header files changed.\n        .parse_callbacks(Box::new(bindgen::CargoCallbacks::new()))\n        .allowlist_type(\"WalProposer\")\n        .allowlist_type(\"WalProposerConfig\")\n        .allowlist_type(\"walproposer_api\")\n        .allowlist_function(\"WalProposerCreate\")\n        .allowlist_function(\"WalProposerStart\")\n        .allowlist_function(\"WalProposerBroadcast\")\n        .allowlist_function(\"WalProposerPoll\")\n        .allowlist_function(\"WalProposerFree\")\n        .allowlist_function(\"SafekeeperStateDesiredEvents\")\n        .allowlist_var(\"DEBUG5\")\n        .allowlist_var(\"DEBUG4\")\n        .allowlist_var(\"DEBUG3\")\n        .allowlist_var(\"DEBUG2\")\n        .allowlist_var(\"DEBUG1\")\n        .allowlist_var(\"LOG\")\n        .allowlist_var(\"INFO\")\n        .allowlist_var(\"NOTICE\")\n        .allowlist_var(\"WARNING\")\n        .allowlist_var(\"ERROR\")\n        .allowlist_var(\"FATAL\")\n        .allowlist_var(\"PANIC\")\n        .allowlist_var(\"PG_VERSION_NUM\")\n        .allowlist_var(\"WPEVENT\")\n        .allowlist_var(\"WL_LATCH_SET\")\n        .allowlist_var(\"WL_SOCKET_READABLE\")\n        .allowlist_var(\"WL_SOCKET_WRITEABLE\")\n        .allowlist_var(\"WL_TIMEOUT\")\n        .allowlist_var(\"WL_SOCKET_CLOSED\")\n        .allowlist_var(\"WL_SOCKET_MASK\")\n        .clang_arg(\"-DWALPROPOSER_LIB\")\n        .clang_arg(format!(\"-I{pgxn_neon}\"))\n        .clang_arg(format!(\"-I{inc_server_path}\"));\n\n    for name in unwind_abi_functions {\n        builder = builder.override_abi(bindgen::Abi::CUnwind, name);\n    }\n    let bindings = builder\n        // Finish the builder and generate the bindings.\n        .generate()\n        // Unwrap the Result and panic on failure.\n        .expect(\"Unable to generate bindings\");\n\n    // Write the bindings to the $OUT_DIR/bindings.rs file.\n    let out_path = PathBuf::from(env::var(\"OUT_DIR\").unwrap()).join(\"bindings.rs\");\n    bindings\n        .write_to_file(out_path)\n        .expect(\"Couldn't write bindings!\");\n\n    Ok(())\n}\n"
  },
  {
    "path": "libs/walproposer/src/api_bindings.rs",
    "content": "//! A C-Rust shim: defines implementation of C walproposer API, assuming wp\n//! callback_data stores Box to some Rust implementation.\n\n#![allow(dead_code)]\n\nuse std::ffi::{CStr, CString};\n\nuse crate::bindings::{\n    NeonWALReadResult, PGAsyncReadResult, PGAsyncWriteResult, Safekeeper, Size, StringInfoData,\n    TimestampTz, WalProposer, WalProposerConnStatusType, WalProposerConnectPollStatusType,\n    WalProposerExecStatusType, WalproposerShmemState, XLogRecPtr, uint32, walproposer_api,\n};\nuse crate::walproposer::{ApiImpl, StreamingCallback, WaitResult};\n\nextern \"C\" fn get_shmem_state(wp: *mut WalProposer) -> *mut WalproposerShmemState {\n    unsafe {\n        let callback_data = (*(*wp).config).callback_data;\n        let api = callback_data as *mut Box<dyn ApiImpl>;\n        (*api).get_shmem_state()\n    }\n}\n\nextern \"C-unwind\" fn start_streaming(wp: *mut WalProposer, startpos: XLogRecPtr) {\n    unsafe {\n        let callback_data = (*(*wp).config).callback_data;\n        let api = callback_data as *mut Box<dyn ApiImpl>;\n        let callback = StreamingCallback::new(wp);\n        (*api).start_streaming(startpos, &callback);\n    }\n}\n\nextern \"C\" fn get_flush_rec_ptr(wp: *mut WalProposer) -> XLogRecPtr {\n    unsafe {\n        let callback_data = (*(*wp).config).callback_data;\n        let api = callback_data as *mut Box<dyn ApiImpl>;\n        (*api).get_flush_rec_ptr()\n    }\n}\n\nextern \"C\" fn update_donor(wp: *mut WalProposer, donor: *mut Safekeeper, donor_lsn: XLogRecPtr) {\n    unsafe {\n        let callback_data = (*(*wp).config).callback_data;\n        let api = callback_data as *mut Box<dyn ApiImpl>;\n        (*api).update_donor(&mut (*donor), donor_lsn)\n    }\n}\n\nextern \"C\" fn get_current_timestamp(wp: *mut WalProposer) -> TimestampTz {\n    unsafe {\n        let callback_data = (*(*wp).config).callback_data;\n        let api = callback_data as *mut Box<dyn ApiImpl>;\n        (*api).get_current_timestamp()\n    }\n}\n\nextern \"C\" fn conn_error_message(sk: *mut Safekeeper) -> *mut ::std::os::raw::c_char {\n    unsafe {\n        let callback_data = (*(*(*sk).wp).config).callback_data;\n        let api = callback_data as *mut Box<dyn ApiImpl>;\n        let msg = (*api).conn_error_message(&mut (*sk));\n        let msg = CString::new(msg).unwrap();\n        // TODO: fix leaking error message\n        msg.into_raw()\n    }\n}\n\nextern \"C\" fn conn_status(sk: *mut Safekeeper) -> WalProposerConnStatusType {\n    unsafe {\n        let callback_data = (*(*(*sk).wp).config).callback_data;\n        let api = callback_data as *mut Box<dyn ApiImpl>;\n        (*api).conn_status(&mut (*sk))\n    }\n}\n\nextern \"C\" fn conn_connect_start(sk: *mut Safekeeper) {\n    unsafe {\n        let callback_data = (*(*(*sk).wp).config).callback_data;\n        let api = callback_data as *mut Box<dyn ApiImpl>;\n        (*api).conn_connect_start(&mut (*sk))\n    }\n}\n\nextern \"C\" fn conn_connect_poll(sk: *mut Safekeeper) -> WalProposerConnectPollStatusType {\n    unsafe {\n        let callback_data = (*(*(*sk).wp).config).callback_data;\n        let api = callback_data as *mut Box<dyn ApiImpl>;\n        (*api).conn_connect_poll(&mut (*sk))\n    }\n}\n\nextern \"C\" fn conn_send_query(sk: *mut Safekeeper, query: *mut ::std::os::raw::c_char) -> bool {\n    let query = unsafe { CStr::from_ptr(query) };\n    let query = query.to_str().unwrap();\n\n    unsafe {\n        let callback_data = (*(*(*sk).wp).config).callback_data;\n        let api = callback_data as *mut Box<dyn ApiImpl>;\n        (*api).conn_send_query(&mut (*sk), query)\n    }\n}\n\nextern \"C\" fn conn_get_query_result(sk: *mut Safekeeper) -> WalProposerExecStatusType {\n    unsafe {\n        let callback_data = (*(*(*sk).wp).config).callback_data;\n        let api = callback_data as *mut Box<dyn ApiImpl>;\n        (*api).conn_get_query_result(&mut (*sk))\n    }\n}\n\nextern \"C\" fn conn_flush(sk: *mut Safekeeper) -> ::std::os::raw::c_int {\n    unsafe {\n        let callback_data = (*(*(*sk).wp).config).callback_data;\n        let api = callback_data as *mut Box<dyn ApiImpl>;\n        (*api).conn_flush(&mut (*sk))\n    }\n}\n\nextern \"C\" fn conn_finish(sk: *mut Safekeeper) {\n    unsafe {\n        let callback_data = (*(*(*sk).wp).config).callback_data;\n        let api = callback_data as *mut Box<dyn ApiImpl>;\n        (*api).conn_finish(&mut (*sk))\n    }\n}\n\nextern \"C\" fn conn_async_read(\n    sk: *mut Safekeeper,\n    buf: *mut *mut ::std::os::raw::c_char,\n    amount: *mut ::std::os::raw::c_int,\n) -> PGAsyncReadResult {\n    unsafe {\n        let callback_data = (*(*(*sk).wp).config).callback_data;\n        let api = callback_data as *mut Box<dyn ApiImpl>;\n\n        // This function has guarantee that returned buf will be valid until\n        // the next call. So we can store a Vec in each Safekeeper and reuse\n        // it on the next call.\n        let mut inbuf = take_vec_u8(&mut (*sk).inbuf).unwrap_or_default();\n        inbuf.clear();\n\n        let result = (*api).conn_async_read(&mut (*sk), &mut inbuf);\n\n        // Put a Vec back to sk->inbuf and return data ptr.\n        *amount = inbuf.len() as i32;\n        *buf = store_vec_u8(&mut (*sk).inbuf, inbuf);\n\n        result\n    }\n}\n\nextern \"C\" fn conn_async_write(\n    sk: *mut Safekeeper,\n    buf: *const ::std::os::raw::c_void,\n    size: usize,\n) -> PGAsyncWriteResult {\n    unsafe {\n        let buf = std::slice::from_raw_parts(buf as *const u8, size);\n        let callback_data = (*(*(*sk).wp).config).callback_data;\n        let api = callback_data as *mut Box<dyn ApiImpl>;\n        (*api).conn_async_write(&mut (*sk), buf)\n    }\n}\n\nextern \"C\" fn conn_blocking_write(\n    sk: *mut Safekeeper,\n    buf: *const ::std::os::raw::c_void,\n    size: usize,\n) -> bool {\n    unsafe {\n        let buf = std::slice::from_raw_parts(buf as *const u8, size);\n        let callback_data = (*(*(*sk).wp).config).callback_data;\n        let api = callback_data as *mut Box<dyn ApiImpl>;\n        (*api).conn_blocking_write(&mut (*sk), buf)\n    }\n}\n\nextern \"C-unwind\" fn recovery_download(wp: *mut WalProposer, sk: *mut Safekeeper) -> bool {\n    unsafe {\n        let callback_data = (*(*(*sk).wp).config).callback_data;\n        let api = callback_data as *mut Box<dyn ApiImpl>;\n\n        // currently `recovery_download` is always called right after election\n        (*api).after_election(&mut (*wp));\n\n        (*api).recovery_download(&mut (*wp), &mut (*sk))\n    }\n}\n\nextern \"C\" fn wal_reader_allocate(sk: *mut Safekeeper) {\n    unsafe {\n        let callback_data = (*(*(*sk).wp).config).callback_data;\n        let api = callback_data as *mut Box<dyn ApiImpl>;\n        (*api).wal_reader_allocate(&mut (*sk));\n    }\n}\n\n#[allow(clippy::unnecessary_cast)]\nextern \"C\" fn wal_read(\n    sk: *mut Safekeeper,\n    buf: *mut ::std::os::raw::c_char,\n    startptr: XLogRecPtr,\n    count: Size,\n    _errmsg: *mut *mut ::std::os::raw::c_char,\n) -> NeonWALReadResult {\n    unsafe {\n        let buf = std::slice::from_raw_parts_mut(buf as *mut u8, count);\n        let callback_data = (*(*(*sk).wp).config).callback_data;\n        let api = callback_data as *mut Box<dyn ApiImpl>;\n        // TODO: errmsg is not forwarded\n        (*api).wal_read(&mut (*sk), buf, startptr)\n    }\n}\n\nextern \"C\" fn wal_reader_events(sk: *mut Safekeeper) -> uint32 {\n    unsafe {\n        let callback_data = (*(*(*sk).wp).config).callback_data;\n        let api = callback_data as *mut Box<dyn ApiImpl>;\n        (*api).wal_reader_events(&mut (*sk))\n    }\n}\n\nextern \"C\" fn init_event_set(wp: *mut WalProposer) {\n    unsafe {\n        let callback_data = (*(*wp).config).callback_data;\n        let api = callback_data as *mut Box<dyn ApiImpl>;\n        (*api).init_event_set(&mut (*wp));\n    }\n}\n\nextern \"C\" fn update_event_set(sk: *mut Safekeeper, events: uint32) {\n    unsafe {\n        let callback_data = (*(*(*sk).wp).config).callback_data;\n        let api = callback_data as *mut Box<dyn ApiImpl>;\n        (*api).update_event_set(&mut (*sk), events);\n    }\n}\n\nextern \"C\" fn active_state_update_event_set(sk: *mut Safekeeper) {\n    unsafe {\n        let callback_data = (*(*(*sk).wp).config).callback_data;\n        let api = callback_data as *mut Box<dyn ApiImpl>;\n        (*api).active_state_update_event_set(&mut (*sk));\n    }\n}\n\nextern \"C\" fn add_safekeeper_event_set(sk: *mut Safekeeper, events: uint32) {\n    unsafe {\n        let callback_data = (*(*(*sk).wp).config).callback_data;\n        let api = callback_data as *mut Box<dyn ApiImpl>;\n        (*api).add_safekeeper_event_set(&mut (*sk), events);\n    }\n}\n\nextern \"C\" fn rm_safekeeper_event_set(sk: *mut Safekeeper) {\n    unsafe {\n        let callback_data = (*(*(*sk).wp).config).callback_data;\n        let api = callback_data as *mut Box<dyn ApiImpl>;\n        (*api).rm_safekeeper_event_set(&mut (*sk));\n    }\n}\n\nextern \"C-unwind\" fn wait_event_set(\n    wp: *mut WalProposer,\n    timeout: ::std::os::raw::c_long,\n    event_sk: *mut *mut Safekeeper,\n    events: *mut uint32,\n) -> ::std::os::raw::c_int {\n    unsafe {\n        let callback_data = (*(*wp).config).callback_data;\n        let api = callback_data as *mut Box<dyn ApiImpl>;\n        let result = (*api).wait_event_set(&mut (*wp), timeout);\n        match result {\n            WaitResult::Latch => {\n                *event_sk = std::ptr::null_mut();\n                *events = crate::bindings::WL_LATCH_SET;\n                1\n            }\n            WaitResult::Timeout => {\n                *event_sk = std::ptr::null_mut();\n                // WaitEventSetWait returns 0 for timeout.\n                *events = 0;\n                0\n            }\n            WaitResult::Network(sk, event_mask) => {\n                *event_sk = sk;\n                *events = event_mask;\n                1\n            }\n        }\n    }\n}\n\nextern \"C\" fn strong_random(\n    wp: *mut WalProposer,\n    buf: *mut ::std::os::raw::c_void,\n    len: usize,\n) -> bool {\n    unsafe {\n        let buf = std::slice::from_raw_parts_mut(buf as *mut u8, len);\n        let callback_data = (*(*wp).config).callback_data;\n        let api = callback_data as *mut Box<dyn ApiImpl>;\n        (*api).strong_random(buf)\n    }\n}\n\nextern \"C\" fn get_redo_start_lsn(wp: *mut WalProposer) -> XLogRecPtr {\n    unsafe {\n        let callback_data = (*(*wp).config).callback_data;\n        let api = callback_data as *mut Box<dyn ApiImpl>;\n        (*api).get_redo_start_lsn()\n    }\n}\n\nunsafe extern \"C-unwind\" fn finish_sync_safekeepers(wp: *mut WalProposer, lsn: XLogRecPtr) -> ! {\n    unsafe {\n        let callback_data = (*(*wp).config).callback_data;\n        let api = callback_data as *mut Box<dyn ApiImpl>;\n        (*api).finish_sync_safekeepers(lsn)\n    }\n}\n\nextern \"C\" fn process_safekeeper_feedback(wp: *mut WalProposer, sk: *mut Safekeeper) {\n    unsafe {\n        let callback_data = (*(*wp).config).callback_data;\n        let api = callback_data as *mut Box<dyn ApiImpl>;\n        (*api).process_safekeeper_feedback(&mut (*wp), &mut (*sk));\n    }\n}\n\nextern \"C-unwind\" fn log_internal(\n    wp: *mut WalProposer,\n    level: ::std::os::raw::c_int,\n    line: *const ::std::os::raw::c_char,\n) {\n    unsafe {\n        let callback_data = (*(*wp).config).callback_data;\n        let api = callback_data as *mut Box<dyn ApiImpl>;\n        let line = CStr::from_ptr(line);\n        let line = line.to_str().unwrap();\n        (*api).log_internal(&mut (*wp), Level::from(level as u32), line)\n    }\n}\n\n/* BEGIN_HADRON */\nextern \"C\" fn reset_safekeeper_statuses_for_metrics(wp: *mut WalProposer, num_safekeepers: u32) {\n    unsafe {\n        let callback_data = (*(*wp).config).callback_data;\n        let api = callback_data as *mut Box<dyn ApiImpl>;\n        if api.is_null() {\n            return;\n        }\n        (*api).reset_safekeeper_statuses_for_metrics(&mut (*wp), num_safekeepers);\n    }\n}\n\nextern \"C\" fn update_safekeeper_status_for_metrics(\n    wp: *mut WalProposer,\n    sk_index: u32,\n    status: u8,\n) {\n    unsafe {\n        let callback_data = (*(*wp).config).callback_data;\n        let api = callback_data as *mut Box<dyn ApiImpl>;\n        if api.is_null() {\n            return;\n        }\n        (*api).update_safekeeper_status_for_metrics(&mut (*wp), sk_index, status);\n    }\n}\n/* END_HADRON */\n\n#[derive(Debug, PartialEq)]\npub enum Level {\n    Debug5,\n    Debug4,\n    Debug3,\n    Debug2,\n    Debug1,\n    Log,\n    Info,\n    Notice,\n    Warning,\n    Error,\n    Fatal,\n    Panic,\n    WPEvent,\n}\n\nimpl Level {\n    pub fn from(elevel: u32) -> Level {\n        use crate::bindings::*;\n\n        match elevel {\n            DEBUG5 => Level::Debug5,\n            DEBUG4 => Level::Debug4,\n            DEBUG3 => Level::Debug3,\n            DEBUG2 => Level::Debug2,\n            DEBUG1 => Level::Debug1,\n            LOG => Level::Log,\n            INFO => Level::Info,\n            NOTICE => Level::Notice,\n            WARNING => Level::Warning,\n            ERROR => Level::Error,\n            FATAL => Level::Fatal,\n            PANIC => Level::Panic,\n            WPEVENT => Level::WPEvent,\n            _ => panic!(\"unknown log level {elevel}\"),\n        }\n    }\n}\n\npub(crate) fn create_api() -> walproposer_api {\n    walproposer_api {\n        get_shmem_state: Some(get_shmem_state),\n        start_streaming: Some(start_streaming),\n        get_flush_rec_ptr: Some(get_flush_rec_ptr),\n        update_donor: Some(update_donor),\n        get_current_timestamp: Some(get_current_timestamp),\n        conn_error_message: Some(conn_error_message),\n        conn_status: Some(conn_status),\n        conn_connect_start: Some(conn_connect_start),\n        conn_connect_poll: Some(conn_connect_poll),\n        conn_send_query: Some(conn_send_query),\n        conn_get_query_result: Some(conn_get_query_result),\n        conn_flush: Some(conn_flush),\n        conn_finish: Some(conn_finish),\n        conn_async_read: Some(conn_async_read),\n        conn_async_write: Some(conn_async_write),\n        conn_blocking_write: Some(conn_blocking_write),\n        recovery_download: Some(recovery_download),\n        wal_reader_allocate: Some(wal_reader_allocate),\n        wal_read: Some(wal_read),\n        wal_reader_events: Some(wal_reader_events),\n        init_event_set: Some(init_event_set),\n        update_event_set: Some(update_event_set),\n        active_state_update_event_set: Some(active_state_update_event_set),\n        add_safekeeper_event_set: Some(add_safekeeper_event_set),\n        rm_safekeeper_event_set: Some(rm_safekeeper_event_set),\n        wait_event_set: Some(wait_event_set),\n        strong_random: Some(strong_random),\n        get_redo_start_lsn: Some(get_redo_start_lsn),\n        finish_sync_safekeepers: Some(finish_sync_safekeepers),\n        process_safekeeper_feedback: Some(process_safekeeper_feedback),\n        log_internal: Some(log_internal),\n        /* BEGIN_HADRON */\n        reset_safekeeper_statuses_for_metrics: Some(reset_safekeeper_statuses_for_metrics),\n        update_safekeeper_status_for_metrics: Some(update_safekeeper_status_for_metrics),\n        /* END_HADRON */\n    }\n}\n\npub fn empty_shmem() -> crate::bindings::WalproposerShmemState {\n    let empty_feedback = crate::bindings::PageserverFeedback {\n        present: false,\n        currentClusterSize: 0,\n        last_received_lsn: 0,\n        disk_consistent_lsn: 0,\n        remote_consistent_lsn: 0,\n        replytime: 0,\n        shard_number: 0,\n        corruption_detected: false,\n    };\n\n    let empty_wal_rate_limiter = crate::bindings::WalRateLimiter {\n        effective_max_wal_bytes_per_second: crate::bindings::pg_atomic_uint32 { value: 0 },\n        should_limit: crate::bindings::pg_atomic_uint32 { value: 0 },\n        sent_bytes: 0,\n        batch_start_time_us: crate::bindings::pg_atomic_uint64 { value: 0 },\n        batch_end_time_us: crate::bindings::pg_atomic_uint64 { value: 0 },\n    };\n\n    crate::bindings::WalproposerShmemState {\n        propEpochStartLsn: crate::bindings::pg_atomic_uint64 { value: 0 },\n        donor_name: [0; 64],\n        donor_conninfo: [0; 1024],\n        donor_lsn: 0,\n        mutex: 0,\n        mineLastElectedTerm: crate::bindings::pg_atomic_uint64 { value: 0 },\n        backpressureThrottlingTime: crate::bindings::pg_atomic_uint64 { value: 0 },\n        currentClusterSize: crate::bindings::pg_atomic_uint64 { value: 0 },\n        shard_ps_feedback: [empty_feedback; 128],\n        num_shards: 0,\n        replica_promote: false,\n        min_ps_feedback: empty_feedback,\n        wal_rate_limiter: empty_wal_rate_limiter,\n        num_safekeepers: 0,\n        safekeeper_status: [0; 32],\n    }\n}\n\nimpl std::fmt::Display for Level {\n    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {\n        write!(f, \"{self:?}\")\n    }\n}\n\n/// Take ownership of `Vec<u8>` from StringInfoData.\n#[allow(clippy::unnecessary_cast)]\npub(crate) fn take_vec_u8(pg: &mut StringInfoData) -> Option<Vec<u8>> {\n    if pg.data.is_null() {\n        return None;\n    }\n\n    let ptr = pg.data as *mut u8;\n    let length = pg.len as usize;\n    let capacity = pg.maxlen as usize;\n\n    pg.data = std::ptr::null_mut();\n    pg.len = 0;\n    pg.maxlen = 0;\n\n    unsafe { Some(Vec::from_raw_parts(ptr, length, capacity)) }\n}\n\n/// Store `Vec<u8>` in StringInfoData.\nfn store_vec_u8(pg: &mut StringInfoData, vec: Vec<u8>) -> *mut ::std::os::raw::c_char {\n    let ptr = vec.as_ptr() as *mut ::std::os::raw::c_char;\n    let length = vec.len();\n    let capacity = vec.capacity();\n\n    assert!(pg.data.is_null());\n\n    pg.data = ptr;\n    pg.len = length as i32;\n    pg.maxlen = capacity as i32;\n\n    std::mem::forget(vec);\n\n    ptr\n}\n"
  },
  {
    "path": "libs/walproposer/src/lib.rs",
    "content": "pub mod bindings {\n    #![allow(non_upper_case_globals)]\n    #![allow(non_camel_case_types)]\n    #![allow(non_snake_case)]\n    // bindgen creates some unsafe code with no doc comments.\n    #![allow(clippy::missing_safety_doc)]\n    // noted at 1.63 that in many cases there's a u32 -> u32 transmutes in bindgen code.\n    #![allow(clippy::useless_transmute)]\n\n    include!(concat!(env!(\"OUT_DIR\"), \"/bindings.rs\"));\n}\n\npub mod api_bindings;\npub mod walproposer;\n"
  },
  {
    "path": "libs/walproposer/src/walproposer.rs",
    "content": "#![allow(clippy::todo)]\n\nuse std::ffi::CString;\nuse std::str::FromStr;\n\nuse postgres_ffi::WAL_SEGMENT_SIZE;\nuse utils::id::TenantTimelineId;\nuse utils::lsn::Lsn;\n\nuse crate::api_bindings::{Level, create_api, take_vec_u8};\nuse crate::bindings::{\n    NeonWALReadResult, Safekeeper, WalProposer, WalProposerBroadcast, WalProposerConfig,\n    WalProposerCreate, WalProposerFree, WalProposerPoll, WalProposerStart,\n};\n\n/// Rust high-level wrapper for C walproposer API. Many methods are not required\n/// for simple cases, hence todo!() in default implementations.\n///\n/// Refer to `pgxn/neon/walproposer.h` for documentation.\npub trait ApiImpl {\n    fn get_shmem_state(&self) -> *mut crate::bindings::WalproposerShmemState {\n        todo!()\n    }\n\n    fn start_streaming(&self, _startpos: u64, _callback: &StreamingCallback) {\n        todo!()\n    }\n\n    fn get_flush_rec_ptr(&self) -> u64 {\n        todo!()\n    }\n\n    fn update_donor(&self, _donor: &mut Safekeeper, _donor_lsn: u64) {\n        todo!()\n    }\n\n    fn get_current_timestamp(&self) -> i64 {\n        todo!()\n    }\n\n    fn conn_error_message(&self, _sk: &mut Safekeeper) -> String {\n        todo!()\n    }\n\n    fn conn_status(&self, _sk: &mut Safekeeper) -> crate::bindings::WalProposerConnStatusType {\n        todo!()\n    }\n\n    fn conn_connect_start(&self, _sk: &mut Safekeeper) {\n        todo!()\n    }\n\n    fn conn_connect_poll(\n        &self,\n        _sk: &mut Safekeeper,\n    ) -> crate::bindings::WalProposerConnectPollStatusType {\n        todo!()\n    }\n\n    fn conn_send_query(&self, _sk: &mut Safekeeper, _query: &str) -> bool {\n        todo!()\n    }\n\n    fn conn_get_query_result(\n        &self,\n        _sk: &mut Safekeeper,\n    ) -> crate::bindings::WalProposerExecStatusType {\n        todo!()\n    }\n\n    fn conn_flush(&self, _sk: &mut Safekeeper) -> i32 {\n        todo!()\n    }\n\n    fn conn_finish(&self, _sk: &mut Safekeeper) {\n        todo!()\n    }\n\n    fn conn_async_read(\n        &self,\n        _sk: &mut Safekeeper,\n        _vec: &mut Vec<u8>,\n    ) -> crate::bindings::PGAsyncReadResult {\n        todo!()\n    }\n\n    fn conn_async_write(\n        &self,\n        _sk: &mut Safekeeper,\n        _buf: &[u8],\n    ) -> crate::bindings::PGAsyncWriteResult {\n        todo!()\n    }\n\n    fn conn_blocking_write(&self, _sk: &mut Safekeeper, _buf: &[u8]) -> bool {\n        todo!()\n    }\n\n    fn recovery_download(&self, _wp: &mut WalProposer, _sk: &mut Safekeeper) -> bool {\n        todo!()\n    }\n\n    fn wal_reader_allocate(&self, _sk: &mut Safekeeper) -> NeonWALReadResult {\n        todo!()\n    }\n\n    fn wal_read(&self, _sk: &mut Safekeeper, _buf: &mut [u8], _startpos: u64) -> NeonWALReadResult {\n        todo!()\n    }\n\n    fn wal_reader_events(&self, _sk: &mut Safekeeper) -> u32 {\n        todo!()\n    }\n\n    fn init_event_set(&self, _wp: &mut WalProposer) {\n        todo!()\n    }\n\n    fn update_event_set(&self, _sk: &mut Safekeeper, _events_mask: u32) {\n        todo!()\n    }\n\n    fn active_state_update_event_set(&self, _sk: &mut Safekeeper) {\n        todo!()\n    }\n\n    fn add_safekeeper_event_set(&self, _sk: &mut Safekeeper, _events_mask: u32) {\n        todo!()\n    }\n\n    fn rm_safekeeper_event_set(&self, _sk: &mut Safekeeper) {\n        todo!()\n    }\n\n    fn wait_event_set(&self, _wp: &mut WalProposer, _timeout_millis: i64) -> WaitResult {\n        todo!()\n    }\n\n    fn strong_random(&self, _buf: &mut [u8]) -> bool {\n        todo!()\n    }\n\n    fn get_redo_start_lsn(&self) -> u64 {\n        todo!()\n    }\n\n    fn finish_sync_safekeepers(&self, _lsn: u64) -> ! {\n        todo!()\n    }\n\n    fn process_safekeeper_feedback(&mut self, _wp: &mut WalProposer, _sk: &mut Safekeeper) {\n        todo!()\n    }\n\n    fn log_internal(&self, _wp: &mut WalProposer, _level: Level, _msg: &str) {\n        todo!()\n    }\n\n    fn after_election(&self, _wp: &mut WalProposer) {\n        todo!()\n    }\n\n    /* BEGIN_HADRON */\n    fn reset_safekeeper_statuses_for_metrics(&self, _wp: &mut WalProposer, _num_safekeepers: u32) {\n        // Do nothing for testing purposes.\n    }\n\n    fn update_safekeeper_status_for_metrics(\n        &self,\n        _wp: &mut WalProposer,\n        _sk_index: u32,\n        _status: u8,\n    ) {\n        // Do nothing for testing purposes.\n    }\n    /* END_HADRON */\n}\n\n#[derive(Debug)]\npub enum WaitResult {\n    Latch,\n    Timeout,\n    Network(*mut Safekeeper, u32),\n}\n\n#[derive(Clone)]\npub struct Config {\n    /// Tenant and timeline id\n    pub ttid: TenantTimelineId,\n    /// List of safekeepers in format `host:port`\n    pub safekeepers_list: Vec<String>,\n    /// libpq connection info options\n    pub safekeeper_conninfo_options: String,\n    /// Safekeeper reconnect timeout in milliseconds\n    pub safekeeper_reconnect_timeout: i32,\n    /// Safekeeper connection timeout in milliseconds\n    pub safekeeper_connection_timeout: i32,\n    /// walproposer mode, finish when all safekeepers are synced or subscribe\n    /// to WAL streaming\n    pub sync_safekeepers: bool,\n}\n\n/// WalProposer main struct. C methods are reexported as Rust functions.\npub struct Wrapper {\n    wp: *mut WalProposer,\n    _safekeepers_list_vec: Vec<u8>,\n}\n\nimpl Wrapper {\n    pub fn new(api: Box<dyn ApiImpl>, config: Config) -> Wrapper {\n        let neon_tenant = CString::new(config.ttid.tenant_id.to_string())\n            .unwrap()\n            .into_raw();\n        let neon_timeline = CString::new(config.ttid.timeline_id.to_string())\n            .unwrap()\n            .into_raw();\n\n        let mut safekeepers_list_vec = CString::new(config.safekeepers_list.join(\",\"))\n            .unwrap()\n            .into_bytes_with_nul();\n        assert!(safekeepers_list_vec.len() == safekeepers_list_vec.capacity());\n        let safekeepers_list = safekeepers_list_vec.as_mut_ptr() as *mut std::ffi::c_char;\n        let safekeeper_conninfo_options = CString::from_str(&config.safekeeper_conninfo_options)\n            .unwrap()\n            .into_raw();\n\n        let callback_data = Box::into_raw(Box::new(api)) as *mut ::std::os::raw::c_void;\n\n        let c_config = WalProposerConfig {\n            neon_tenant,\n            neon_timeline,\n            safekeepers_list,\n            safekeeper_conninfo_options,\n            safekeeper_reconnect_timeout: config.safekeeper_reconnect_timeout,\n            safekeeper_connection_timeout: config.safekeeper_connection_timeout,\n            wal_segment_size: WAL_SEGMENT_SIZE as i32, // default 16MB\n            syncSafekeepers: config.sync_safekeepers,\n            systemId: 0,\n            pgTimeline: 1,\n            proto_version: 3,\n            callback_data,\n        };\n        let c_config = Box::into_raw(Box::new(c_config));\n\n        let api = create_api();\n        let wp = unsafe { WalProposerCreate(c_config, api) };\n        Wrapper {\n            wp,\n            _safekeepers_list_vec: safekeepers_list_vec,\n        }\n    }\n\n    pub fn start(&self) {\n        unsafe { WalProposerStart(self.wp) }\n    }\n}\n\nimpl Drop for Wrapper {\n    fn drop(&mut self) {\n        unsafe {\n            let config = (*self.wp).config;\n            drop(Box::from_raw(\n                (*config).callback_data as *mut Box<dyn ApiImpl>,\n            ));\n            drop(CString::from_raw((*config).neon_tenant));\n            drop(CString::from_raw((*config).neon_timeline));\n            drop(Box::from_raw(config));\n\n            for i in 0..(*self.wp).n_safekeepers {\n                let sk = &mut (*self.wp).safekeeper[i as usize];\n                take_vec_u8(&mut sk.inbuf);\n            }\n\n            WalProposerFree(self.wp);\n        }\n    }\n}\n\npub struct StreamingCallback {\n    wp: *mut WalProposer,\n}\n\nimpl StreamingCallback {\n    pub fn new(wp: *mut WalProposer) -> StreamingCallback {\n        StreamingCallback { wp }\n    }\n\n    pub fn broadcast(&self, startpos: Lsn, endpos: Lsn) {\n        unsafe { WalProposerBroadcast(self.wp, startpos.0, endpos.0) }\n    }\n\n    pub fn poll(&self) {\n        unsafe { WalProposerPoll(self.wp) }\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use core::panic;\n    use std::cell::{Cell, UnsafeCell};\n    use std::ffi::CString;\n    use std::sync::atomic::AtomicUsize;\n    use std::sync::mpsc::sync_channel;\n\n    use utils::id::TenantTimelineId;\n\n    use super::ApiImpl;\n    use crate::api_bindings::Level;\n    use crate::bindings::{NeonWALReadResult, PG_VERSION_NUM};\n    use crate::walproposer::Wrapper;\n\n    #[derive(Clone, Copy, Debug)]\n    struct WaitEventsData {\n        sk: *mut crate::bindings::Safekeeper,\n        event_mask: u32,\n    }\n\n    struct MockImpl {\n        // data to return from wait_event_set\n        wait_events: Cell<WaitEventsData>,\n        // walproposer->safekeeper messages\n        expected_messages: Vec<Vec<u8>>,\n        expected_ptr: AtomicUsize,\n        // safekeeper->walproposer messages\n        safekeeper_replies: Vec<Vec<u8>>,\n        replies_ptr: AtomicUsize,\n        // channel to send LSN to the main thread\n        sync_channel: std::sync::mpsc::SyncSender<u64>,\n        // Shmem state, used for storing donor info\n        shmem: UnsafeCell<crate::bindings::WalproposerShmemState>,\n    }\n\n    impl MockImpl {\n        fn check_walproposer_msg(&self, msg: &[u8]) {\n            let ptr = self\n                .expected_ptr\n                .fetch_add(1, std::sync::atomic::Ordering::SeqCst);\n\n            if ptr >= self.expected_messages.len() {\n                panic!(\"unexpected message from walproposer\");\n            }\n\n            let expected_msg = &self.expected_messages[ptr];\n            assert_eq!(msg, expected_msg.as_slice());\n        }\n\n        fn next_safekeeper_reply(&self) -> &[u8] {\n            let ptr = self\n                .replies_ptr\n                .fetch_add(1, std::sync::atomic::Ordering::SeqCst);\n\n            if ptr >= self.safekeeper_replies.len() {\n                panic!(\"no more safekeeper replies\");\n            }\n\n            &self.safekeeper_replies[ptr]\n        }\n    }\n\n    impl ApiImpl for MockImpl {\n        fn get_shmem_state(&self) -> *mut crate::bindings::WalproposerShmemState {\n            self.shmem.get()\n        }\n\n        fn get_current_timestamp(&self) -> i64 {\n            println!(\"get_current_timestamp\");\n            0\n        }\n\n        fn update_donor(&self, donor: &mut crate::bindings::Safekeeper, donor_lsn: u64) {\n            let mut shmem = unsafe { *self.get_shmem_state() };\n            shmem.propEpochStartLsn.value = donor_lsn;\n            shmem.donor_conninfo = donor.conninfo;\n            shmem.donor_lsn = donor_lsn;\n        }\n\n        fn conn_status(\n            &self,\n            _: &mut crate::bindings::Safekeeper,\n        ) -> crate::bindings::WalProposerConnStatusType {\n            println!(\"conn_status\");\n            crate::bindings::WalProposerConnStatusType_WP_CONNECTION_OK\n        }\n\n        fn conn_connect_start(&self, _: &mut crate::bindings::Safekeeper) {\n            println!(\"conn_connect_start\");\n        }\n\n        fn conn_connect_poll(\n            &self,\n            _: &mut crate::bindings::Safekeeper,\n        ) -> crate::bindings::WalProposerConnectPollStatusType {\n            println!(\"conn_connect_poll\");\n            crate::bindings::WalProposerConnectPollStatusType_WP_CONN_POLLING_OK\n        }\n\n        fn conn_send_query(&self, _: &mut crate::bindings::Safekeeper, query: &str) -> bool {\n            println!(\"conn_send_query: {query}\");\n            true\n        }\n\n        fn conn_get_query_result(\n            &self,\n            _: &mut crate::bindings::Safekeeper,\n        ) -> crate::bindings::WalProposerExecStatusType {\n            println!(\"conn_get_query_result\");\n            crate::bindings::WalProposerExecStatusType_WP_EXEC_SUCCESS_COPYBOTH\n        }\n\n        fn conn_async_read(\n            &self,\n            _: &mut crate::bindings::Safekeeper,\n            vec: &mut Vec<u8>,\n        ) -> crate::bindings::PGAsyncReadResult {\n            println!(\"conn_async_read\");\n            let reply = self.next_safekeeper_reply();\n            println!(\"conn_async_read result: {reply:?}\");\n            vec.extend_from_slice(reply);\n            crate::bindings::PGAsyncReadResult_PG_ASYNC_READ_SUCCESS\n        }\n\n        fn conn_blocking_write(&self, _: &mut crate::bindings::Safekeeper, buf: &[u8]) -> bool {\n            println!(\"conn_blocking_write: {buf:?}\");\n            self.check_walproposer_msg(buf);\n            true\n        }\n\n        fn recovery_download(\n            &self,\n            _wp: &mut crate::bindings::WalProposer,\n            _sk: &mut crate::bindings::Safekeeper,\n        ) -> bool {\n            true\n        }\n\n        fn wal_reader_allocate(&self, _: &mut crate::bindings::Safekeeper) -> NeonWALReadResult {\n            println!(\"wal_reader_allocate\");\n            crate::bindings::NeonWALReadResult_NEON_WALREAD_SUCCESS\n        }\n\n        fn init_event_set(&self, _: &mut crate::bindings::WalProposer) {\n            println!(\"init_event_set\")\n        }\n\n        fn update_event_set(&self, sk: &mut crate::bindings::Safekeeper, event_mask: u32) {\n            println!(\n                \"update_event_set, sk={:?}, events_mask={:#b}\",\n                sk as *mut crate::bindings::Safekeeper, event_mask\n            );\n            self.wait_events.set(WaitEventsData { sk, event_mask });\n        }\n\n        fn add_safekeeper_event_set(&self, sk: &mut crate::bindings::Safekeeper, event_mask: u32) {\n            println!(\n                \"add_safekeeper_event_set, sk={:?}, events_mask={:#b}\",\n                sk as *mut crate::bindings::Safekeeper, event_mask\n            );\n            self.wait_events.set(WaitEventsData { sk, event_mask });\n        }\n\n        fn rm_safekeeper_event_set(&self, sk: &mut crate::bindings::Safekeeper) {\n            println!(\n                \"rm_safekeeper_event_set, sk={:?}\",\n                sk as *mut crate::bindings::Safekeeper\n            );\n        }\n\n        fn wait_event_set(\n            &self,\n            _: &mut crate::bindings::WalProposer,\n            timeout_millis: i64,\n        ) -> super::WaitResult {\n            let data = self.wait_events.get();\n            println!(\"wait_event_set, timeout_millis={timeout_millis}, res={data:?}\");\n            super::WaitResult::Network(data.sk, data.event_mask)\n        }\n\n        fn strong_random(&self, buf: &mut [u8]) -> bool {\n            println!(\"strong_random\");\n            buf.fill(0);\n            true\n        }\n\n        fn finish_sync_safekeepers(&self, lsn: u64) -> ! {\n            self.sync_channel.send(lsn).unwrap();\n            panic!(\"sync safekeepers finished at lsn={}\", lsn);\n        }\n\n        fn log_internal(&self, _wp: &mut crate::bindings::WalProposer, level: Level, msg: &str) {\n            println!(\"wp_log[{level}] {msg}\");\n        }\n\n        fn after_election(&self, _wp: &mut crate::bindings::WalProposer) {\n            println!(\"after_election\");\n        }\n    }\n\n    /// Test that walproposer can successfully connect to safekeeper and finish\n    /// sync_safekeepers. API is mocked in MockImpl.\n    ///\n    /// Run this test with valgrind to detect leaks:\n    /// `valgrind --leak-check=full target/debug/deps/walproposer-<build>`\n    #[test]\n    fn test_simple_sync_safekeepers() -> anyhow::Result<()> {\n        let ttid = TenantTimelineId::new(\n            \"9e4c8f36063c6c6e93bc20d65a820f3d\".parse()?,\n            \"9e4c8f36063c6c6e93bc20d65a820f3d\".parse()?,\n        );\n\n        let (sender, receiver) = sync_channel(1);\n\n        // Messages definitions are at walproposer.h\n        // xxx: it would be better to extract them from safekeeper crate and\n        // use serialization/deserialization here.\n        let greeting_tag = (b'g').to_be_bytes();\n        let tenant_id = CString::new(ttid.tenant_id.to_string())\n            .unwrap()\n            .into_bytes_with_nul();\n        let timeline_id = CString::new(ttid.timeline_id.to_string())\n            .unwrap()\n            .into_bytes_with_nul();\n        let mconf_gen = 0_u32.to_be_bytes();\n        let mconf_members_len = 0_u32.to_be_bytes();\n        let mconf_members_new_len = 0_u32.to_be_bytes();\n        let pg_version: [u8; 4] = PG_VERSION_NUM.to_be_bytes();\n        let system_id = 0_u64.to_be_bytes();\n        let wal_seg_size = 16777216_u32.to_be_bytes();\n\n        let proposer_greeting = [\n            greeting_tag.as_slice(),\n            tenant_id.as_slice(),\n            timeline_id.as_slice(),\n            mconf_gen.as_slice(),\n            mconf_members_len.as_slice(),\n            mconf_members_new_len.as_slice(),\n            pg_version.as_slice(),\n            system_id.as_slice(),\n            wal_seg_size.as_slice(),\n        ]\n        .concat();\n\n        let voting_tag = (b'v').to_be_bytes();\n        let vote_request_term = 3_u64.to_be_bytes();\n        let vote_request = [\n            voting_tag.as_slice(),\n            mconf_gen.as_slice(),\n            vote_request_term.as_slice(),\n        ]\n        .concat();\n\n        let acceptor_greeting_term = 2_u64.to_be_bytes();\n        let acceptor_greeting_node_id = 1_u64.to_be_bytes();\n        let acceptor_greeting = [\n            greeting_tag.as_slice(),\n            acceptor_greeting_node_id.as_slice(),\n            mconf_gen.as_slice(),\n            mconf_members_len.as_slice(),\n            mconf_members_new_len.as_slice(),\n            acceptor_greeting_term.as_slice(),\n        ]\n        .concat();\n\n        let vote_response_term = 3_u64.to_be_bytes();\n        let vote_given = 1_u8.to_be_bytes();\n        let flush_lsn = 0x539_u64.to_be_bytes();\n        let truncate_lsn = 0x539_u64.to_be_bytes();\n        let th_len = 1_u32.to_be_bytes();\n        let th_term = 2_u64.to_be_bytes();\n        let th_lsn = 0x539_u64.to_be_bytes();\n        let vote_response = [\n            voting_tag.as_slice(),\n            mconf_gen.as_slice(),\n            vote_response_term.as_slice(),\n            vote_given.as_slice(),\n            flush_lsn.as_slice(),\n            truncate_lsn.as_slice(),\n            th_len.as_slice(),\n            th_term.as_slice(),\n            th_lsn.as_slice(),\n        ]\n        .concat();\n\n        let my_impl: Box<dyn ApiImpl> = Box::new(MockImpl {\n            wait_events: Cell::new(WaitEventsData {\n                sk: std::ptr::null_mut(),\n                event_mask: 0,\n            }),\n            expected_messages: vec![proposer_greeting, vote_request],\n            expected_ptr: AtomicUsize::new(0),\n            safekeeper_replies: vec![acceptor_greeting, vote_response],\n            replies_ptr: AtomicUsize::new(0),\n            sync_channel: sender,\n            shmem: UnsafeCell::new(crate::api_bindings::empty_shmem()),\n        });\n        let config = crate::walproposer::Config {\n            ttid,\n            safekeepers_list: vec![\"localhost:5000\".to_string()],\n            safekeeper_conninfo_options: String::new(),\n            safekeeper_reconnect_timeout: 1000,\n            safekeeper_connection_timeout: 10000,\n            sync_safekeepers: true,\n        };\n\n        let wp = Wrapper::new(my_impl, config);\n\n        // walproposer will panic when it finishes sync_safekeepers\n        std::panic::catch_unwind(|| wp.start()).unwrap_err();\n        // validate the resulting LSN\n        assert_eq!(receiver.try_recv(), Ok(1337));\n        Ok(())\n        // drop() will free up resources here\n    }\n}\n"
  },
  {
    "path": "pageserver/Cargo.toml",
    "content": "[package]\nname = \"pageserver\"\nversion = \"0.1.0\"\nedition = \"2024\"\nlicense.workspace = true\n\n[features]\ndefault = []\n# Enables test-only APIs, incuding failpoints. In particular, enables the `fail_point!` macro,\n# which adds some runtime cost to run tests on outage conditions\ntesting = [\"fail/failpoints\", \"pageserver_api/testing\", \"wal_decoder/testing\", \"pageserver_client/testing\"]\n\n# Direct IO alignment options (propagated to pageserver_api)\nio-align-512 = [\"pageserver_api/io-align-512\"]\nio-align-4k = [\"pageserver_api/io-align-4k\"]\n\nfuzz-read-path = [\"testing\"]\n\n# Enables benchmarking only APIs\nbenchmarking = []\n\n[dependencies]\nanyhow.workspace = true\narc-swap.workspace = true\nasync-compression.workspace = true\nasync-stream.workspace = true\nbincode.workspace = true\nbit_field.workspace = true\nbyteorder.workspace = true\nbytes.workspace = true\ncamino-tempfile.workspace = true\ncamino.workspace = true\nchrono = { workspace = true, features = [\"serde\"] }\nclap = { workspace = true, features = [\"string\"] }\nconsumption_metrics.workspace = true\ncrc32c.workspace = true\neither.workspace = true\nenum-map.workspace = true\nenumset = { workspace = true, features = [\"serde\"]}\nfail.workspace = true\nfutures.workspace = true\nhashlink.workspace = true\nhex.workspace = true\nhttp.workspace = true\nhttp-utils.workspace = true\nhumantime-serde.workspace = true\nhumantime.workspace = true\nhyper0.workspace = true\nitertools.workspace = true\njsonwebtoken.workspace = true\nmd5.workspace = true\nmetrics.workspace = true\nnix.workspace = true\nnum_cpus.workspace = true # hack to get the number of worker threads tokio uses\nnum-traits.workspace = true\nonce_cell.workspace = true\npageserver_api.workspace = true\npageserver_client.workspace = true # for ResponseErrorMessageExt TOOD refactor that\npageserver_compaction.workspace = true\npageserver_page_api.workspace = true\npem.workspace = true\npin-project-lite.workspace = true\npostgres_backend.workspace = true\npostgres_connection.workspace = true\npostgres_ffi.workspace = true\npostgres_ffi_types.workspace = true\npostgres_initdb.workspace = true\npostgres-protocol.workspace = true\npostgres-types.workspace = true\nposthog_client_lite.workspace = true\npprof.workspace = true\npq_proto.workspace = true\nrand.workspace = true\nrange-set-blaze = { version = \"0.1.16\", features = [\"alloc\"] }\nregex.workspace = true\nremote_storage.workspace = true\nreqwest.workspace = true\nrpds.workspace = true\nrustls.workspace = true\nscopeguard.workspace = true\nsend-future.workspace = true\nserde_json = { workspace = true, features = [\"raw_value\"] }\nserde_path_to_error.workspace = true\nserde_with.workspace = true\nserde.workspace = true\nsmallvec.workspace = true\nstorage_broker.workspace = true\nstrum_macros.workspace = true\nstrum.workspace = true\nsysinfo.workspace = true\ntenant_size_model.workspace = true\nthiserror.workspace = true\ntikv-jemallocator.workspace = true\ntokio = { workspace = true, features = [\"process\", \"sync\", \"fs\", \"rt\", \"io-util\", \"time\"] }\ntokio-epoll-uring.workspace = true\ntokio-io-timeout.workspace = true\ntokio-postgres.workspace = true\ntokio-rustls.workspace = true\ntokio-stream.workspace = true\ntokio-tar.workspace = true\ntokio-util.workspace = true\ntoml_edit = { workspace = true, features = [ \"serde\" ] }\ntonic.workspace = true\ntonic-reflection.workspace = true\ntower.workspace = true\ntracing.workspace = true\ntracing-utils.workspace = true\nurl.workspace = true\nutils.workspace = true\nwal_decoder.workspace = true\nwalkdir.workspace = true\nworkspace_hack.workspace = true\ntwox-hash.workspace = true\n\n[target.'cfg(target_os = \"linux\")'.dependencies]\nprocfs.workspace = true\n\n[dev-dependencies]\nbase64.workspace = true\ncriterion.workspace = true\nhex-literal.workspace = true\ntokio = { workspace = true, features = [\"process\", \"sync\", \"fs\", \"rt\", \"io-util\", \"time\", \"test-util\"] }\nindoc.workspace = true\nuuid.workspace = true\nrstest.workspace = true\n\n[[bench]]\nname = \"bench_layer_map\"\nharness = false\n\n[[bench]]\nname = \"bench_walredo\"\nharness = false\n\n[[bench]]\nname = \"bench_ingest\"\nharness = false\nrequired-features = [\"benchmarking\"]\n\n[[bench]]\nname = \"upload_queue\"\nharness = false\n\n[[bench]]\nname = \"bench_metrics\"\nharness = false\n\n[[bin]]\nname = \"test_helper_slow_client_reads\"\nrequired-features = [ \"testing\" ]\n"
  },
  {
    "path": "pageserver/benches/README.md",
    "content": "## Pageserver Benchmarks\n\n# How to run\n\nTo run all benchmarks:\n`cargo bench`\n\nTo run a specific file:\n`cargo bench --bench bench_layer_map`\n\nTo run a specific function:\n`cargo bench --bench bench_layer_map -- real_map_uniform_queries`\n"
  },
  {
    "path": "pageserver/benches/bench_ingest.rs",
    "content": "use std::env;\nuse std::num::NonZeroUsize;\nuse std::sync::Arc;\n\nuse bytes::Bytes;\nuse camino::Utf8PathBuf;\nuse criterion::{Criterion, criterion_group, criterion_main};\nuse futures::stream::FuturesUnordered;\nuse pageserver::config::PageServerConf;\nuse pageserver::context::{DownloadBehavior, RequestContext};\nuse pageserver::keyspace::KeySpace;\nuse pageserver::l0_flush::{L0FlushConfig, L0FlushGlobalState};\nuse pageserver::task_mgr::TaskKind;\nuse pageserver::tenant::storage_layer::IoConcurrency;\nuse pageserver::tenant::storage_layer::{InMemoryLayer, ValuesReconstructState};\nuse pageserver::{page_cache, virtual_file};\nuse pageserver_api::config::GetVectoredConcurrentIo;\nuse pageserver_api::key::Key;\nuse pageserver_api::models::virtual_file::IoMode;\nuse pageserver_api::shard::TenantShardId;\nuse tokio_stream::StreamExt;\nuse tokio_util::sync::CancellationToken;\nuse utils::bin_ser::BeSer;\nuse utils::id::{TenantId, TimelineId};\nuse utils::lsn::Lsn;\nuse utils::sync::gate::Gate;\nuse wal_decoder::models::value::Value;\nuse wal_decoder::serialized_batch::SerializedValueBatch;\n\n// A very cheap hash for generating non-sequential keys.\nfn murmurhash32(mut h: u32) -> u32 {\n    h ^= h >> 16;\n    h = h.wrapping_mul(0x85ebca6b);\n    h ^= h >> 13;\n    h = h.wrapping_mul(0xc2b2ae35);\n    h ^= h >> 16;\n    h\n}\n\n#[derive(serde::Serialize, Clone, Copy, Debug, PartialEq)]\nenum KeyLayout {\n    /// Sequential unique keys\n    Sequential,\n    /// Random unique keys\n    Random,\n    /// Random keys, but only use the bits from the mask of them\n    RandomReuse(u32),\n}\n\n#[derive(serde::Serialize, Clone, Copy, Debug, PartialEq)]\nenum WriteDelta {\n    Yes,\n    No,\n}\n\n#[derive(serde::Serialize, Clone, Copy, Debug, PartialEq)]\nenum ConcurrentReads {\n    Yes,\n    No,\n}\n\nasync fn ingest(\n    conf: &'static PageServerConf,\n    put_size: usize,\n    put_count: usize,\n    key_layout: KeyLayout,\n    write_delta: WriteDelta,\n    concurrent_reads: ConcurrentReads,\n) -> anyhow::Result<()> {\n    if concurrent_reads == ConcurrentReads::Yes {\n        assert_eq!(key_layout, KeyLayout::Sequential);\n    }\n\n    let mut lsn = utils::lsn::Lsn(1000);\n    let mut key = Key::from_i128(0x0);\n\n    let timeline_id = TimelineId::generate();\n    let tenant_id = TenantId::generate();\n    let tenant_shard_id = TenantShardId::unsharded(tenant_id);\n\n    tokio::fs::create_dir_all(conf.timeline_path(&tenant_shard_id, &timeline_id)).await?;\n\n    let ctx =\n        RequestContext::new(TaskKind::DebugTool, DownloadBehavior::Error).with_scope_debug_tools();\n\n    let gate = utils::sync::gate::Gate::default();\n    let cancel = CancellationToken::new();\n\n    let layer = Arc::new(\n        InMemoryLayer::create(\n            conf,\n            timeline_id,\n            tenant_shard_id,\n            lsn,\n            &gate,\n            &cancel,\n            &ctx,\n        )\n        .await?,\n    );\n\n    let data = Value::Image(Bytes::from(vec![0u8; put_size]));\n    let data_ser_size = data.serialized_size().unwrap() as usize;\n    let ctx = RequestContext::new(\n        pageserver::task_mgr::TaskKind::WalReceiverConnectionHandler,\n        pageserver::context::DownloadBehavior::Download,\n    );\n\n    const READ_BATCH_SIZE: u32 = 32;\n    let (tx, mut rx) = tokio::sync::watch::channel::<Option<Key>>(None);\n    let reader_cancel = CancellationToken::new();\n    let reader_handle = if concurrent_reads == ConcurrentReads::Yes {\n        Some(tokio::task::spawn({\n            let cancel = reader_cancel.clone();\n            let layer = layer.clone();\n            let ctx = ctx.attached_child();\n            async move {\n                let gate = Gate::default();\n                let gate_guard = gate.enter().unwrap();\n                let io_concurrency = IoConcurrency::spawn_from_conf(\n                    GetVectoredConcurrentIo::SidecarTask,\n                    gate_guard,\n                );\n\n                rx.wait_for(|key| key.is_some()).await.unwrap();\n\n                while !cancel.is_cancelled() {\n                    let key = match *rx.borrow() {\n                        Some(some) => some,\n                        None => unreachable!(),\n                    };\n\n                    let mut start_key = key;\n                    start_key.field6 = key.field6.saturating_sub(READ_BATCH_SIZE);\n                    let key_range = start_key..key.next();\n\n                    let mut reconstruct_state = ValuesReconstructState::new(io_concurrency.clone());\n\n                    layer\n                        .get_values_reconstruct_data(\n                            KeySpace::single(key_range),\n                            Lsn(1)..Lsn(u64::MAX),\n                            &mut reconstruct_state,\n                            &ctx,\n                        )\n                        .await\n                        .unwrap();\n\n                    let mut collect_futs = std::mem::take(&mut reconstruct_state.keys)\n                        .into_values()\n                        .map(|state| state.sink_pending_ios())\n                        .collect::<FuturesUnordered<_>>();\n                    while collect_futs.next().await.is_some() {}\n                }\n\n                drop(io_concurrency);\n                gate.close().await;\n            }\n        }))\n    } else {\n        None\n    };\n\n    const BATCH_SIZE: usize = 16;\n    let mut batch = Vec::new();\n\n    for i in 0..put_count {\n        lsn += put_size as u64;\n\n        // Generate lots of keys within a single relation, which simulates the typical bulk ingest case: people\n        // usually care the most about write performance when they're blasting a huge batch of data into a huge table.\n        match key_layout {\n            KeyLayout::Sequential => {\n                // Use sequential order to illustrate the experience a user is likely to have\n                // when ingesting bulk data.\n                key.field6 = i as u32;\n            }\n            KeyLayout::Random => {\n                // Use random-order keys to avoid giving a false advantage to data structures that are\n                // faster when inserting on the end.\n                key.field6 = murmurhash32(i as u32);\n            }\n            KeyLayout::RandomReuse(mask) => {\n                // Use low bits only, to limit cardinality\n                key.field6 = murmurhash32(i as u32) & mask;\n            }\n        }\n\n        batch.push((key.to_compact(), lsn, data_ser_size, data.clone()));\n        if batch.len() >= BATCH_SIZE {\n            let last_key = Key::from_compact(batch.last().unwrap().0);\n\n            let this_batch = std::mem::take(&mut batch);\n            let serialized = SerializedValueBatch::from_values(this_batch);\n            layer.put_batch(serialized, &ctx).await?;\n\n            tx.send(Some(last_key)).unwrap();\n        }\n    }\n    if !batch.is_empty() {\n        let last_key = Key::from_compact(batch.last().unwrap().0);\n\n        let this_batch = std::mem::take(&mut batch);\n        let serialized = SerializedValueBatch::from_values(this_batch);\n        layer.put_batch(serialized, &ctx).await?;\n\n        tx.send(Some(last_key)).unwrap();\n    }\n    layer.freeze(lsn + 1).await;\n\n    if write_delta == WriteDelta::Yes {\n        let l0_flush_state = L0FlushGlobalState::new(L0FlushConfig::Direct {\n            max_concurrency: NonZeroUsize::new(1).unwrap(),\n        });\n        let (_desc, path) = layer\n            .write_to_disk(&ctx, None, l0_flush_state.inner(), &gate, cancel.clone())\n            .await?\n            .unwrap();\n        tokio::fs::remove_file(path).await?;\n    }\n\n    reader_cancel.cancel();\n    if let Some(handle) = reader_handle {\n        handle.await.unwrap();\n    }\n\n    Ok(())\n}\n\n/// Wrapper to instantiate a tokio runtime\nfn ingest_main(\n    conf: &'static PageServerConf,\n    io_mode: IoMode,\n    put_size: usize,\n    put_count: usize,\n    key_layout: KeyLayout,\n    write_delta: WriteDelta,\n    concurrent_reads: ConcurrentReads,\n) {\n    pageserver::virtual_file::set_io_mode(io_mode);\n\n    let runtime = tokio::runtime::Builder::new_multi_thread()\n        .enable_all()\n        .build()\n        .unwrap();\n\n    runtime.block_on(async move {\n        let r = ingest(\n            conf,\n            put_size,\n            put_count,\n            key_layout,\n            write_delta,\n            concurrent_reads,\n        )\n        .await;\n        if let Err(e) = r {\n            panic!(\"{e:?}\");\n        }\n    });\n}\n\n/// Declare a series of benchmarks for the Pageserver's ingest write path.\n///\n/// This benchmark does not include WAL decode: it starts at InMemoryLayer::put_value, and ends either\n/// at freezing the ephemeral layer, or writing the ephemeral layer out to an L0 (depending on whether WriteDelta is set).\n///\n/// Genuine disk I/O is used, so expect results to differ depending on storage.  However, when running on\n/// a fast disk, CPU is the bottleneck at time of writing.\nfn criterion_benchmark(c: &mut Criterion) {\n    let temp_dir_parent: Utf8PathBuf = env::current_dir().unwrap().try_into().unwrap();\n    let temp_dir = camino_tempfile::tempdir_in(temp_dir_parent).unwrap();\n    eprintln!(\"Data directory: {}\", temp_dir.path());\n\n    let conf: &'static PageServerConf = Box::leak(Box::new(\n        pageserver::config::PageServerConf::dummy_conf(temp_dir.path().to_path_buf()),\n    ));\n    virtual_file::init(\n        16384,\n        virtual_file::io_engine_for_bench(),\n        // immaterial, each `ingest_main` invocation below overrides this\n        conf.virtual_file_io_mode,\n        // without actually doing syncs, buffered writes have an unfair advantage over direct IO writes\n        virtual_file::SyncMode::Sync,\n    );\n    page_cache::init(conf.page_cache_size);\n\n    #[derive(serde::Serialize)]\n    struct ExplodedParameters {\n        io_mode: IoMode,\n        volume_mib: usize,\n        key_size: usize,\n        key_layout: KeyLayout,\n        write_delta: WriteDelta,\n        concurrent_reads: ConcurrentReads,\n    }\n    #[derive(Clone)]\n    struct HandPickedParameters {\n        volume_mib: usize,\n        key_size: usize,\n        key_layout: KeyLayout,\n        write_delta: WriteDelta,\n    }\n    let expect = vec![\n        // Small values (100b) tests\n        HandPickedParameters {\n            volume_mib: 128,\n            key_size: 100,\n            key_layout: KeyLayout::Sequential,\n            write_delta: WriteDelta::Yes,\n        },\n        HandPickedParameters {\n            volume_mib: 128,\n            key_size: 100,\n            key_layout: KeyLayout::Random,\n            write_delta: WriteDelta::Yes,\n        },\n        HandPickedParameters {\n            volume_mib: 128,\n            key_size: 100,\n            key_layout: KeyLayout::RandomReuse(0x3ff),\n            write_delta: WriteDelta::Yes,\n        },\n        HandPickedParameters {\n            volume_mib: 128,\n            key_size: 100,\n            key_layout: KeyLayout::Sequential,\n            write_delta: WriteDelta::No,\n        },\n        // Large values (8k) tests\n        HandPickedParameters {\n            volume_mib: 128,\n            key_size: 8192,\n            key_layout: KeyLayout::Sequential,\n            write_delta: WriteDelta::Yes,\n        },\n        HandPickedParameters {\n            volume_mib: 128,\n            key_size: 8192,\n            key_layout: KeyLayout::Sequential,\n            write_delta: WriteDelta::No,\n        },\n    ];\n    let exploded_parameters = {\n        let mut out = Vec::new();\n        for concurrent_reads in [ConcurrentReads::Yes, ConcurrentReads::No] {\n            for param in expect.clone() {\n                let HandPickedParameters {\n                    volume_mib,\n                    key_size,\n                    key_layout,\n                    write_delta,\n                } = param;\n\n                if key_layout != KeyLayout::Sequential && concurrent_reads == ConcurrentReads::Yes {\n                    continue;\n                }\n\n                out.push(ExplodedParameters {\n                    io_mode: IoMode::DirectRw,\n                    volume_mib,\n                    key_size,\n                    key_layout,\n                    write_delta,\n                    concurrent_reads,\n                });\n            }\n        }\n        out\n    };\n    impl ExplodedParameters {\n        fn benchmark_id(&self) -> String {\n            let ExplodedParameters {\n                io_mode,\n                volume_mib,\n                key_size,\n                key_layout,\n                write_delta,\n                concurrent_reads,\n            } = self;\n            format!(\n                \"io_mode={io_mode:?} volume_mib={volume_mib:?} key_size_bytes={key_size:?} key_layout={key_layout:?} write_delta={write_delta:?} concurrent_reads={concurrent_reads:?}\"\n            )\n        }\n    }\n    let mut group = c.benchmark_group(\"ingest\");\n    for params in exploded_parameters {\n        let id = params.benchmark_id();\n        let ExplodedParameters {\n            io_mode,\n            volume_mib,\n            key_size,\n            key_layout,\n            write_delta,\n            concurrent_reads,\n        } = params;\n        let put_count = volume_mib * 1024 * 1024 / key_size;\n        group.throughput(criterion::Throughput::Bytes((key_size * put_count) as u64));\n        group.sample_size(10);\n        group.bench_function(id, |b| {\n            b.iter(|| {\n                ingest_main(\n                    conf,\n                    io_mode,\n                    key_size,\n                    put_count,\n                    key_layout,\n                    write_delta,\n                    concurrent_reads,\n                )\n            })\n        });\n    }\n}\n\ncriterion_group!(benches, criterion_benchmark);\ncriterion_main!(benches);\n\n/*\ncargo bench --bench bench_ingest\n\nim4gn.2xlarge:\n\ningest/io_mode=Buffered volume_mib=128 key_size_bytes=100 key_layout=Sequential write_delta=Yes\n                        time:   [1.2901 s 1.2943 s 1.2991 s]\n                        thrpt:  [98.533 MiB/s 98.892 MiB/s 99.220 MiB/s]\ningest/io_mode=Buffered volume_mib=128 key_size_bytes=100 key_layout=Random write_delta=Yes\n                        time:   [2.1387 s 2.1623 s 2.1845 s]\n                        thrpt:  [58.595 MiB/s 59.197 MiB/s 59.851 MiB/s]\ningest/io_mode=Buffered volume_mib=128 key_size_bytes=100 key_layout=RandomReuse(1023) write_delta=Y...\n                        time:   [1.2036 s 1.2074 s 1.2122 s]\n                        thrpt:  [105.60 MiB/s 106.01 MiB/s 106.35 MiB/s]\ningest/io_mode=Buffered volume_mib=128 key_size_bytes=100 key_layout=Sequential write_delta=No\n                        time:   [520.55 ms 521.46 ms 522.57 ms]\n                        thrpt:  [244.94 MiB/s 245.47 MiB/s 245.89 MiB/s]\ningest/io_mode=Buffered volume_mib=128 key_size_bytes=8192 key_layout=Sequential write_delta=Yes\n                        time:   [440.33 ms 442.24 ms 444.10 ms]\n                        thrpt:  [288.22 MiB/s 289.43 MiB/s 290.69 MiB/s]\ningest/io_mode=Buffered volume_mib=128 key_size_bytes=8192 key_layout=Sequential write_delta=No\n                        time:   [168.78 ms 169.42 ms 170.18 ms]\n                        thrpt:  [752.16 MiB/s 755.52 MiB/s 758.40 MiB/s]\ningest/io_mode=Direct volume_mib=128 key_size_bytes=100 key_layout=Sequential write_delta=Yes\n                        time:   [1.2978 s 1.3094 s 1.3227 s]\n                        thrpt:  [96.775 MiB/s 97.758 MiB/s 98.632 MiB/s]\ningest/io_mode=Direct volume_mib=128 key_size_bytes=100 key_layout=Random write_delta=Yes\n                        time:   [2.1976 s 2.2067 s 2.2154 s]\n                        thrpt:  [57.777 MiB/s 58.006 MiB/s 58.245 MiB/s]\ningest/io_mode=Direct volume_mib=128 key_size_bytes=100 key_layout=RandomReuse(1023) write_delta=Yes\n                        time:   [1.2103 s 1.2160 s 1.2233 s]\n                        thrpt:  [104.64 MiB/s 105.26 MiB/s 105.76 MiB/s]\ningest/io_mode=Direct volume_mib=128 key_size_bytes=100 key_layout=Sequential write_delta=No\n                        time:   [525.05 ms 526.37 ms 527.79 ms]\n                        thrpt:  [242.52 MiB/s 243.17 MiB/s 243.79 MiB/s]\ningest/io_mode=Direct volume_mib=128 key_size_bytes=8192 key_layout=Sequential write_delta=Yes\n                        time:   [443.06 ms 444.88 ms 447.15 ms]\n                        thrpt:  [286.26 MiB/s 287.72 MiB/s 288.90 MiB/s]\ningest/io_mode=Direct volume_mib=128 key_size_bytes=8192 key_layout=Sequential write_delta=No\n                        time:   [169.40 ms 169.80 ms 170.17 ms]\n                        thrpt:  [752.21 MiB/s 753.81 MiB/s 755.60 MiB/s]\ningest/io_mode=DirectRw volume_mib=128 key_size_bytes=100 key_layout=Sequential write_delta=Yes\n                        time:   [1.2844 s 1.2915 s 1.2990 s]\n                        thrpt:  [98.536 MiB/s 99.112 MiB/s 99.657 MiB/s]\ningest/io_mode=DirectRw volume_mib=128 key_size_bytes=100 key_layout=Random write_delta=Yes\n                        time:   [2.1431 s 2.1663 s 2.1900 s]\n                        thrpt:  [58.446 MiB/s 59.087 MiB/s 59.726 MiB/s]\ningest/io_mode=DirectRw volume_mib=128 key_size_bytes=100 key_layout=RandomReuse(1023) write_delta=Y...\n                        time:   [1.1906 s 1.1926 s 1.1947 s]\n                        thrpt:  [107.14 MiB/s 107.33 MiB/s 107.51 MiB/s]\ningest/io_mode=DirectRw volume_mib=128 key_size_bytes=100 key_layout=Sequential write_delta=No\n                        time:   [516.86 ms 518.25 ms 519.47 ms]\n                        thrpt:  [246.40 MiB/s 246.98 MiB/s 247.65 MiB/s]\ningest/io_mode=DirectRw volume_mib=128 key_size_bytes=8192 key_layout=Sequential write_delta=Yes\n                        time:   [536.50 ms 536.53 ms 536.60 ms]\n                        thrpt:  [238.54 MiB/s 238.57 MiB/s 238.59 MiB/s]\ningest/io_mode=DirectRw volume_mib=128 key_size_bytes=8192 key_layout=Sequential write_delta=No\n                        time:   [267.77 ms 267.90 ms 268.04 ms]\n                        thrpt:  [477.53 MiB/s 477.79 MiB/s 478.02 MiB/s]\n\nHetzner AX102:\n\ningest/io_mode=Buffered volume_mib=128 key_size_bytes=100 key_layout=Sequential write_delta=Yes\n                        time:   [836.58 ms 861.93 ms 886.57 ms]\n                        thrpt:  [144.38 MiB/s 148.50 MiB/s 153.00 MiB/s]\ningest/io_mode=Buffered volume_mib=128 key_size_bytes=100 key_layout=Random write_delta=Yes\n                        time:   [1.2782 s 1.3191 s 1.3665 s]\n                        thrpt:  [93.668 MiB/s 97.037 MiB/s 100.14 MiB/s]\ningest/io_mode=Buffered volume_mib=128 key_size_bytes=100 key_layout=RandomReuse(1023) write_delta=Y...\n                        time:   [791.27 ms 807.08 ms 822.95 ms]\n                        thrpt:  [155.54 MiB/s 158.60 MiB/s 161.77 MiB/s]\ningest/io_mode=Buffered volume_mib=128 key_size_bytes=100 key_layout=Sequential write_delta=No\n                        time:   [310.78 ms 314.66 ms 318.47 ms]\n                        thrpt:  [401.92 MiB/s 406.79 MiB/s 411.87 MiB/s]\ningest/io_mode=Buffered volume_mib=128 key_size_bytes=8192 key_layout=Sequential write_delta=Yes\n                        time:   [377.11 ms 387.77 ms 399.21 ms]\n                        thrpt:  [320.63 MiB/s 330.10 MiB/s 339.42 MiB/s]\ningest/io_mode=Buffered volume_mib=128 key_size_bytes=8192 key_layout=Sequential write_delta=No\n                        time:   [128.37 ms 132.96 ms 138.55 ms]\n                        thrpt:  [923.83 MiB/s 962.69 MiB/s 997.11 MiB/s]\ningest/io_mode=Direct volume_mib=128 key_size_bytes=100 key_layout=Sequential write_delta=Yes\n                        time:   [900.38 ms 914.88 ms 928.86 ms]\n                        thrpt:  [137.80 MiB/s 139.91 MiB/s 142.16 MiB/s]\ningest/io_mode=Direct volume_mib=128 key_size_bytes=100 key_layout=Random write_delta=Yes\n                        time:   [1.2538 s 1.2936 s 1.3313 s]\n                        thrpt:  [96.149 MiB/s 98.946 MiB/s 102.09 MiB/s]\ningest/io_mode=Direct volume_mib=128 key_size_bytes=100 key_layout=RandomReuse(1023) write_delta=Yes\n                        time:   [787.17 ms 803.89 ms 820.63 ms]\n                        thrpt:  [155.98 MiB/s 159.23 MiB/s 162.61 MiB/s]\ningest/io_mode=Direct volume_mib=128 key_size_bytes=100 key_layout=Sequential write_delta=No\n                        time:   [318.78 ms 321.89 ms 324.74 ms]\n                        thrpt:  [394.16 MiB/s 397.65 MiB/s 401.53 MiB/s]\ningest/io_mode=Direct volume_mib=128 key_size_bytes=8192 key_layout=Sequential write_delta=Yes\n                        time:   [374.01 ms 383.45 ms 393.20 ms]\n                        thrpt:  [325.53 MiB/s 333.81 MiB/s 342.24 MiB/s]\ningest/io_mode=Direct volume_mib=128 key_size_bytes=8192 key_layout=Sequential write_delta=No\n                        time:   [137.98 ms 141.31 ms 143.57 ms]\n                        thrpt:  [891.58 MiB/s 905.79 MiB/s 927.66 MiB/s]\ningest/io_mode=DirectRw volume_mib=128 key_size_bytes=100 key_layout=Sequential write_delta=Yes\n                        time:   [613.69 ms 622.48 ms 630.97 ms]\n                        thrpt:  [202.86 MiB/s 205.63 MiB/s 208.57 MiB/s]\ningest/io_mode=DirectRw volume_mib=128 key_size_bytes=100 key_layout=Random write_delta=Yes\n                        time:   [1.0299 s 1.0766 s 1.1273 s]\n                        thrpt:  [113.55 MiB/s 118.90 MiB/s 124.29 MiB/s]\ningest/io_mode=DirectRw volume_mib=128 key_size_bytes=100 key_layout=RandomReuse(1023) write_delta=Y...\n                        time:   [637.80 ms 647.78 ms 658.01 ms]\n                        thrpt:  [194.53 MiB/s 197.60 MiB/s 200.69 MiB/s]\ningest/io_mode=DirectRw volume_mib=128 key_size_bytes=100 key_layout=Sequential write_delta=No\n                        time:   [266.09 ms 267.20 ms 268.31 ms]\n                        thrpt:  [477.06 MiB/s 479.04 MiB/s 481.04 MiB/s]\ningest/io_mode=DirectRw volume_mib=128 key_size_bytes=8192 key_layout=Sequential write_delta=Yes\n                        time:   [269.34 ms 273.27 ms 277.69 ms]\n                        thrpt:  [460.95 MiB/s 468.40 MiB/s 475.24 MiB/s]\ningest/io_mode=DirectRw volume_mib=128 key_size_bytes=8192 key_layout=Sequential write_delta=No\n                        time:   [123.18 ms 124.24 ms 125.15 ms]\n                        thrpt:  [1022.8 MiB/s 1.0061 GiB/s 1.0148 GiB/s]\n*/\n"
  },
  {
    "path": "pageserver/benches/bench_layer_map.rs",
    "content": "use std::cmp::{max, min};\nuse std::fs::File;\nuse std::io::{BufRead, BufReader};\nuse std::path::PathBuf;\nuse std::str::FromStr;\nuse std::time::Instant;\n\nuse criterion::measurement::WallTime;\nuse criterion::{BenchmarkGroup, Criterion, black_box, criterion_group, criterion_main};\nuse pageserver::tenant::layer_map::LayerMap;\nuse pageserver::tenant::storage_layer::{LayerName, PersistentLayerDesc};\nuse pageserver_api::key::Key;\nuse pageserver_api::shard::TenantShardId;\nuse rand::prelude::{SeedableRng, StdRng};\nuse rand::seq::IndexedRandom;\nuse utils::id::{TenantId, TimelineId};\nuse utils::lsn::Lsn;\n\nfn fixture_path(relative: &str) -> PathBuf {\n    PathBuf::from(env!(\"CARGO_MANIFEST_DIR\")).join(relative)\n}\n\nfn build_layer_map(filename_dump: PathBuf) -> LayerMap {\n    let mut layer_map = LayerMap::default();\n\n    let mut min_lsn = Lsn(u64::MAX);\n    let mut max_lsn = Lsn(0);\n\n    let filenames = BufReader::new(File::open(filename_dump).unwrap()).lines();\n\n    let mut updates = layer_map.batch_update();\n    for fname in filenames {\n        let fname = fname.unwrap();\n        let fname = LayerName::from_str(&fname).unwrap();\n        let layer = PersistentLayerDesc::from(fname);\n\n        let lsn_range = layer.get_lsn_range();\n        min_lsn = min(min_lsn, lsn_range.start);\n        max_lsn = max(max_lsn, Lsn(lsn_range.end.0 - 1));\n\n        updates.insert_historic(layer);\n    }\n\n    println!(\"min: {min_lsn}, max: {max_lsn}\");\n\n    updates.flush();\n    layer_map\n}\n\n/// Construct a layer map query pattern for benchmarks\nfn uniform_query_pattern(layer_map: &LayerMap) -> Vec<(Key, Lsn)> {\n    // For each image layer we query one of the pages contained, at LSN right\n    // before the image layer was created. This gives us a somewhat uniform\n    // coverage of both the lsn and key space because image layers have\n    // approximately equal sizes and cover approximately equal WAL since\n    // last image.\n    layer_map\n        .iter_historic_layers()\n        .filter_map(|l| {\n            if l.is_incremental() {\n                None\n            } else {\n                let kr = l.get_key_range();\n                let lr = l.get_lsn_range();\n\n                let key_inside = kr.start.next();\n                let lsn_before = Lsn(lr.start.0 - 1);\n\n                Some((key_inside, lsn_before))\n            }\n        })\n        .collect()\n}\n\n// Benchmark using metadata extracted from our performance test environment, from\n// a project where we have run pgbench many timmes. The pgbench database was initialized\n// between each test run.\nfn bench_from_captest_env(c: &mut Criterion) {\n    // TODO consider compressing this file\n    let layer_map = build_layer_map(fixture_path(\"benches/odd-brook-layernames.txt\"));\n    let queries: Vec<(Key, Lsn)> = uniform_query_pattern(&layer_map);\n\n    // Test with uniform query pattern\n    c.bench_function(\"captest_uniform_queries\", |b| {\n        b.iter(|| {\n            for q in queries.clone().into_iter() {\n                black_box(layer_map.search(q.0, q.1));\n            }\n        });\n    });\n\n    // test with a key that corresponds to the RelDir entry. See pgdatadir_mapping.rs.\n    c.bench_function(\"captest_rel_dir_query\", |b| {\n        b.iter(|| {\n            let result = black_box(layer_map.search(\n                Key::from_hex(\"000000067F00008000000000000000000001\").unwrap(),\n                // This LSN is higher than any of the LSNs in the tree\n                Lsn::from_str(\"D0/80208AE1\").unwrap(),\n            ));\n            result.unwrap();\n        });\n    });\n}\n\n// Benchmark using metadata extracted from a real project that was taknig\n// too long processing layer map queries.\nfn bench_from_real_project(c: &mut Criterion) {\n    // Init layer map\n    let now = Instant::now();\n    let layer_map = build_layer_map(fixture_path(\"benches/odd-brook-layernames.txt\"));\n    println!(\"Finished layer map init in {:?}\", now.elapsed());\n\n    // Choose uniformly distributed queries\n    let queries: Vec<(Key, Lsn)> = uniform_query_pattern(&layer_map);\n\n    // Define and name the benchmark function\n    let mut group = c.benchmark_group(\"real_map\");\n    group.bench_function(\"uniform_queries\", |b| {\n        b.iter(|| {\n            for q in queries.clone().into_iter() {\n                black_box(layer_map.search(q.0, q.1));\n            }\n        });\n    });\n    group.finish();\n}\n\n// Benchmark using synthetic data. Arrange image layers on stacked diagonal lines.\nfn bench_sequential(c: &mut Criterion) {\n    // Init layer map. Create 100_000 layers arranged in 1000 diagonal lines.\n    //\n    // TODO This code is pretty slow and runs even if we're only running other\n    //      benchmarks. It needs to be somewhere else, but it's not clear where.\n    //      Putting it inside the `bench_function` closure is not a solution\n    //      because then it runs multiple times during warmup.\n    let now = Instant::now();\n    let mut layer_map = LayerMap::default();\n    let mut updates = layer_map.batch_update();\n    for i in 0..100_000 {\n        let i32 = (i as u32) % 100;\n        let zero = Key::from_hex(\"000000000000000000000000000000000000\").unwrap();\n        let layer = PersistentLayerDesc::new_img(\n            TenantShardId::unsharded(TenantId::generate()),\n            TimelineId::generate(),\n            zero.add(10 * i32)..zero.add(10 * i32 + 1),\n            Lsn(i),\n            0,\n        );\n        updates.insert_historic(layer);\n    }\n    updates.flush();\n    println!(\"Finished layer map init in {:?}\", now.elapsed());\n\n    // Choose 100 uniformly random queries\n    let rng = &mut StdRng::seed_from_u64(1);\n    let queries: Vec<(Key, Lsn)> = uniform_query_pattern(&layer_map)\n        .choose_multiple(rng, 100)\n        .copied()\n        .collect();\n\n    // Define and name the benchmark function\n    let mut group = c.benchmark_group(\"sequential\");\n    group.bench_function(\"uniform_queries\", |b| {\n        b.iter(|| {\n            for q in queries.clone().into_iter() {\n                black_box(layer_map.search(q.0, q.1));\n            }\n        });\n    });\n    group.finish();\n}\n\nfn bench_visibility_with_map(\n    group: &mut BenchmarkGroup<WallTime>,\n    layer_map: LayerMap,\n    read_points: Vec<Lsn>,\n    bench_name: &str,\n) {\n    group.bench_function(bench_name, |b| {\n        b.iter(|| black_box(layer_map.get_visibility(read_points.clone())));\n    });\n}\n\n// Benchmark using synthetic data. Arrange image layers on stacked diagonal lines.\nfn bench_visibility(c: &mut Criterion) {\n    let mut group = c.benchmark_group(\"visibility\");\n    {\n        // Init layer map. Create 100_000 layers arranged in 1000 diagonal lines.\n        let now = Instant::now();\n        let mut layer_map = LayerMap::default();\n        let mut updates = layer_map.batch_update();\n        for i in 0..100_000 {\n            let i32 = (i as u32) % 100;\n            let zero = Key::from_hex(\"000000000000000000000000000000000000\").unwrap();\n            let layer = PersistentLayerDesc::new_img(\n                TenantShardId::unsharded(TenantId::generate()),\n                TimelineId::generate(),\n                zero.add(10 * i32)..zero.add(10 * i32 + 1),\n                Lsn(i),\n                0,\n            );\n            updates.insert_historic(layer);\n        }\n        updates.flush();\n        println!(\"Finished layer map init in {:?}\", now.elapsed());\n\n        let mut read_points = Vec::new();\n        for i in (0..100_000).step_by(1000) {\n            read_points.push(Lsn(i));\n        }\n\n        bench_visibility_with_map(&mut group, layer_map, read_points, \"sequential\");\n    }\n\n    {\n        let layer_map = build_layer_map(fixture_path(\"benches/odd-brook-layernames.txt\"));\n        let read_points = vec![Lsn(0x1C760FA190)];\n        bench_visibility_with_map(&mut group, layer_map, read_points, \"real_map\");\n\n        let layer_map = build_layer_map(fixture_path(\"benches/odd-brook-layernames.txt\"));\n        let read_points = vec![\n            Lsn(0x1C760FA190),\n            Lsn(0x000000931BEAD539),\n            Lsn(0x000000931BF63011),\n            Lsn(0x000000931B33AE68),\n            Lsn(0x00000038E67ABFA0),\n            Lsn(0x000000931B33AE68),\n            Lsn(0x000000914E3F38F0),\n            Lsn(0x000000931B33AE68),\n        ];\n        bench_visibility_with_map(&mut group, layer_map, read_points, \"real_map_many_branches\");\n    }\n\n    group.finish();\n}\n\ncriterion_group!(group_1, bench_from_captest_env);\ncriterion_group!(group_2, bench_from_real_project);\ncriterion_group!(group_3, bench_sequential);\ncriterion_group!(group_4, bench_visibility);\ncriterion_main!(group_1, group_2, group_3, group_4);\n"
  },
  {
    "path": "pageserver/benches/bench_metrics.rs",
    "content": "use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main};\nuse utils::id::{TenantId, TimelineId};\n\n//\n// Demonstrates that repeat label values lookup is a multicore scalability bottleneck\n// that is worth avoiding.\n//\ncriterion_group!(\n    label_values,\n    label_values::bench_naive_usage,\n    label_values::bench_cache_label_values_lookup\n);\nmod label_values {\n    use super::*;\n\n    pub fn bench_naive_usage(c: &mut Criterion) {\n        let mut g = c.benchmark_group(\"label_values__naive_usage\");\n\n        for ntimelines in [1, 4, 8] {\n            g.bench_with_input(\n                BenchmarkId::new(\"ntimelines\", ntimelines),\n                &ntimelines,\n                |b, ntimelines| {\n                    b.iter_custom(|iters| {\n                        let barrier = std::sync::Barrier::new(*ntimelines + 1);\n\n                        let timelines = (0..*ntimelines)\n                            .map(|_| {\n                                (\n                                    TenantId::generate().to_string(),\n                                    \"0000\".to_string(),\n                                    TimelineId::generate().to_string(),\n                                )\n                            })\n                            .collect::<Vec<_>>();\n\n                        let metric_vec = metrics::UIntGaugeVec::new(\n                            metrics::opts!(\"testmetric\", \"testhelp\"),\n                            &[\"tenant_id\", \"shard_id\", \"timeline_id\"],\n                        )\n                        .unwrap();\n\n                        std::thread::scope(|s| {\n                            for (tenant_id, shard_id, timeline_id) in &timelines {\n                                s.spawn(|| {\n                                    barrier.wait();\n                                    for _ in 0..iters {\n                                        metric_vec\n                                            .with_label_values(&[tenant_id, shard_id, timeline_id])\n                                            .inc();\n                                    }\n                                    barrier.wait();\n                                });\n                            }\n                            barrier.wait();\n                            let start = std::time::Instant::now();\n                            barrier.wait();\n                            start.elapsed()\n                        })\n                    })\n                },\n            );\n        }\n        g.finish();\n    }\n\n    pub fn bench_cache_label_values_lookup(c: &mut Criterion) {\n        let mut g = c.benchmark_group(\"label_values__cache_label_values_lookup\");\n\n        for ntimelines in [1, 4, 8] {\n            g.bench_with_input(\n                BenchmarkId::new(\"ntimelines\", ntimelines),\n                &ntimelines,\n                |b, ntimelines| {\n                    b.iter_custom(|iters| {\n                        let barrier = std::sync::Barrier::new(*ntimelines + 1);\n\n                        let timelines = (0..*ntimelines)\n                            .map(|_| {\n                                (\n                                    TenantId::generate().to_string(),\n                                    \"0000\".to_string(),\n                                    TimelineId::generate().to_string(),\n                                )\n                            })\n                            .collect::<Vec<_>>();\n\n                        let metric_vec = metrics::UIntGaugeVec::new(\n                            metrics::opts!(\"testmetric\", \"testhelp\"),\n                            &[\"tenant_id\", \"shard_id\", \"timeline_id\"],\n                        )\n                        .unwrap();\n\n                        std::thread::scope(|s| {\n                            for (tenant_id, shard_id, timeline_id) in &timelines {\n                                s.spawn(|| {\n                                    let metric = metric_vec.with_label_values(&[\n                                        tenant_id,\n                                        shard_id,\n                                        timeline_id,\n                                    ]);\n                                    barrier.wait();\n                                    for _ in 0..iters {\n                                        metric.inc();\n                                    }\n                                    barrier.wait();\n                                });\n                            }\n                            barrier.wait();\n                            let start = std::time::Instant::now();\n                            barrier.wait();\n                            start.elapsed()\n                        })\n                    })\n                },\n            );\n        }\n        g.finish();\n    }\n}\n\n//\n// Demonstrates that even a single metric can be a scalability bottleneck\n// if multiple threads in it concurrently but there's nothing we can do\n// about it without changing the metrics framework to use e.g. sharded counte atomics.\n//\ncriterion_group!(\n    single_metric_multicore_scalability,\n    single_metric_multicore_scalability::bench,\n);\nmod single_metric_multicore_scalability {\n    use super::*;\n\n    pub fn bench(c: &mut Criterion) {\n        let mut g = c.benchmark_group(\"single_metric_multicore_scalability\");\n\n        for nthreads in [1, 4, 8] {\n            g.bench_with_input(\n                BenchmarkId::new(\"nthreads\", nthreads),\n                &nthreads,\n                |b, nthreads| {\n                    b.iter_custom(|iters| {\n                        let barrier = std::sync::Barrier::new(*nthreads + 1);\n\n                        let metric = metrics::UIntGauge::new(\"testmetric\", \"testhelp\").unwrap();\n\n                        std::thread::scope(|s| {\n                            for _ in 0..*nthreads {\n                                s.spawn(|| {\n                                    barrier.wait();\n                                    for _ in 0..iters {\n                                        metric.inc();\n                                    }\n                                    barrier.wait();\n                                });\n                            }\n                            barrier.wait();\n                            let start = std::time::Instant::now();\n                            barrier.wait();\n                            start.elapsed()\n                        })\n                    })\n                },\n            );\n        }\n        g.finish();\n    }\n}\n\n//\n// Demonstrates that even if we cache label value, the propagation of such a cached metric value\n// by Clone'ing it is a scalability bottleneck.\n// The reason is that it's an Arc internally and thus there's contention on the reference count atomics.\n//\n// We can avoid that by having long-lived references per thread (= indirection).\n//\ncriterion_group!(\n    propagation_of_cached_label_value,\n    propagation_of_cached_label_value::bench_naive,\n    propagation_of_cached_label_value::bench_long_lived_reference_per_thread,\n);\nmod propagation_of_cached_label_value {\n    use std::sync::Arc;\n\n    use super::*;\n\n    pub fn bench_naive(c: &mut Criterion) {\n        let mut g = c.benchmark_group(\"propagation_of_cached_label_value__naive\");\n\n        for nthreads in [1, 4, 8] {\n            g.bench_with_input(\n                BenchmarkId::new(\"nthreads\", nthreads),\n                &nthreads,\n                |b, nthreads| {\n                    b.iter_custom(|iters| {\n                        let barrier = std::sync::Barrier::new(*nthreads + 1);\n\n                        let metric = metrics::UIntGauge::new(\"testmetric\", \"testhelp\").unwrap();\n\n                        std::thread::scope(|s| {\n                            for _ in 0..*nthreads {\n                                s.spawn(|| {\n                                    barrier.wait();\n                                    for _ in 0..iters {\n                                        // propagating the metric means we'd clone it into the child RequestContext\n                                        let propagated = metric.clone();\n                                        // simulate some work\n                                        criterion::black_box(propagated);\n                                    }\n                                    barrier.wait();\n                                });\n                            }\n                            barrier.wait();\n                            let start = std::time::Instant::now();\n                            barrier.wait();\n                            start.elapsed()\n                        })\n                    })\n                },\n            );\n        }\n        g.finish();\n    }\n\n    pub fn bench_long_lived_reference_per_thread(c: &mut Criterion) {\n        let mut g =\n            c.benchmark_group(\"propagation_of_cached_label_value__long_lived_reference_per_thread\");\n\n        for nthreads in [1, 4, 8] {\n            g.bench_with_input(\n                BenchmarkId::new(\"nthreads\", nthreads),\n                &nthreads,\n                |b, nthreads| {\n                    b.iter_custom(|iters| {\n                        let barrier = std::sync::Barrier::new(*nthreads + 1);\n\n                        let metric = metrics::UIntGauge::new(\"testmetric\", \"testhelp\").unwrap();\n\n                        std::thread::scope(|s| {\n                            for _ in 0..*nthreads {\n                                s.spawn(|| {\n                                    // This is the technique.\n                                    let this_threads_metric_reference = Arc::new(metric.clone());\n\n                                    barrier.wait();\n                                    for _ in 0..iters {\n                                        // propagating the metric means we'd clone it into the child RequestContext\n                                        let propagated = Arc::clone(&this_threads_metric_reference);\n                                        // simulate some work (include the pointer chase!)\n                                        criterion::black_box(&*propagated);\n                                    }\n                                    barrier.wait();\n                                });\n                            }\n                            barrier.wait();\n                            let start = std::time::Instant::now();\n                            barrier.wait();\n                            start.elapsed()\n                        })\n                    })\n                },\n            );\n        }\n    }\n}\n\ncriterion_group!(histograms, histograms::bench_bucket_scalability);\nmod histograms {\n    use std::time::Instant;\n\n    use criterion::{BenchmarkId, Criterion};\n    use metrics::core::Collector;\n\n    pub fn bench_bucket_scalability(c: &mut Criterion) {\n        let mut g = c.benchmark_group(\"bucket_scalability\");\n\n        for n in [1, 4, 8, 16, 32, 64, 128, 256] {\n            g.bench_with_input(BenchmarkId::new(\"nbuckets\", n), &n, |b, n| {\n                b.iter_custom(|iters| {\n                    let buckets: Vec<f64> = (0..*n).map(|i| i as f64 * 100.0).collect();\n                    let histo = metrics::Histogram::with_opts(\n                        metrics::prometheus::HistogramOpts::new(\"name\", \"help\")\n                            .buckets(buckets.clone()),\n                    )\n                    .unwrap();\n                    let start = Instant::now();\n                    for i in 0..usize::try_from(iters).unwrap() {\n                        histo.observe(buckets[i % buckets.len()]);\n                    }\n                    let elapsed = start.elapsed();\n                    // self-test\n                    let mfs = histo.collect();\n                    assert_eq!(mfs.len(), 1);\n                    let metrics = mfs[0].get_metric();\n                    assert_eq!(metrics.len(), 1);\n                    let histo = metrics[0].get_histogram();\n                    let buckets = histo.get_bucket();\n                    assert!(\n                        buckets\n                            .iter()\n                            .enumerate()\n                            .all(|(i, b)| b.get_cumulative_count()\n                                >= i as u64 * (iters / buckets.len() as u64))\n                    );\n                    elapsed\n                })\n            });\n        }\n    }\n}\n\ncriterion_main!(\n    label_values,\n    single_metric_multicore_scalability,\n    propagation_of_cached_label_value,\n    histograms,\n);\n\n/*\nRUST_BACKTRACE=full cargo bench --bench bench_metrics --  --discard-baseline --noplot\n\nResults on an im4gn.2xlarge instance\n\nlabel_values__naive_usage/ntimelines/1 time:   [178.71 ns 178.74 ns 178.76 ns]\nlabel_values__naive_usage/ntimelines/4 time:   [532.94 ns 539.59 ns 546.31 ns]\nlabel_values__naive_usage/ntimelines/8 time:   [1.1082 µs 1.1109 µs 1.1135 µs]\nlabel_values__cache_label_values_lookup/ntimelines/1 time:   [6.4116 ns 6.4119 ns 6.4123 ns]\nlabel_values__cache_label_values_lookup/ntimelines/4 time:   [6.3482 ns 6.3819 ns 6.4079 ns]\nlabel_values__cache_label_values_lookup/ntimelines/8 time:   [6.4213 ns 6.5279 ns 6.6293 ns]\nsingle_metric_multicore_scalability/nthreads/1 time:   [6.0102 ns 6.0104 ns 6.0106 ns]\nsingle_metric_multicore_scalability/nthreads/4 time:   [38.127 ns 38.275 ns 38.416 ns]\nsingle_metric_multicore_scalability/nthreads/8 time:   [73.698 ns 74.882 ns 75.864 ns]\npropagation_of_cached_label_value__naive/nthreads/1 time:   [14.424 ns 14.425 ns 14.426 ns]\npropagation_of_cached_label_value__naive/nthreads/4 time:   [100.71 ns 102.53 ns 104.35 ns]\npropagation_of_cached_label_value__naive/nthreads/8 time:   [211.50 ns 214.44 ns 216.87 ns]\npropagation_of_cached_label_value__long_lived_reference_per_thread/nthreads/1 time:   [14.135 ns 14.147 ns 14.160 ns]\npropagation_of_cached_label_value__long_lived_reference_per_thread/nthreads/4 time:   [14.243 ns 14.255 ns 14.268 ns]\npropagation_of_cached_label_value__long_lived_reference_per_thread/nthreads/8 time:   [14.470 ns 14.682 ns 14.895 ns]\nbucket_scalability/nbuckets/1     time:   [30.352 ns 30.353 ns 30.354 ns]\nbucket_scalability/nbuckets/4     time:   [30.464 ns 30.465 ns 30.467 ns]\nbucket_scalability/nbuckets/8     time:   [30.569 ns 30.575 ns 30.584 ns]\nbucket_scalability/nbuckets/16      time:   [30.961 ns 30.965 ns 30.969 ns]\nbucket_scalability/nbuckets/32      time:   [35.691 ns 35.707 ns 35.722 ns]\nbucket_scalability/nbuckets/64      time:   [47.829 ns 47.898 ns 47.974 ns]\nbucket_scalability/nbuckets/128     time:   [73.479 ns 73.512 ns 73.545 ns]\nbucket_scalability/nbuckets/256     time:   [127.92 ns 127.94 ns 127.96 ns]\n\nResults on an i3en.3xlarge instance\n\nlabel_values__naive_usage/ntimelines/1      time:   [117.32 ns 117.53 ns 117.74 ns]\nlabel_values__naive_usage/ntimelines/4      time:   [736.58 ns 741.12 ns 745.61 ns]\nlabel_values__naive_usage/ntimelines/8      time:   [1.4513 µs 1.4596 µs 1.4665 µs]\nlabel_values__cache_label_values_lookup/ntimelines/1      time:   [8.0964 ns 8.0979 ns 8.0995 ns]\nlabel_values__cache_label_values_lookup/ntimelines/4      time:   [8.1620 ns 8.2912 ns 8.4491 ns]\nlabel_values__cache_label_values_lookup/ntimelines/8      time:   [14.148 ns 14.237 ns 14.324 ns]\nsingle_metric_multicore_scalability/nthreads/1      time:   [8.0993 ns 8.1013 ns 8.1046 ns]\nsingle_metric_multicore_scalability/nthreads/4      time:   [80.039 ns 80.672 ns 81.297 ns]\nsingle_metric_multicore_scalability/nthreads/8      time:   [153.58 ns 154.23 ns 154.90 ns]\npropagation_of_cached_label_value__naive/nthreads/1     time:   [13.924 ns 13.926 ns 13.928 ns]\npropagation_of_cached_label_value__naive/nthreads/4     time:   [143.66 ns 145.27 ns 146.59 ns]\npropagation_of_cached_label_value__naive/nthreads/8     time:   [296.51 ns 297.90 ns 299.30 ns]\npropagation_of_cached_label_value__long_lived_reference_per_thread/nthreads/1     time:   [14.013 ns 14.149 ns 14.308 ns]\npropagation_of_cached_label_value__long_lived_reference_per_thread/nthreads/4     time:   [14.311 ns 14.625 ns 14.984 ns]\npropagation_of_cached_label_value__long_lived_reference_per_thread/nthreads/8     time:   [25.981 ns 26.227 ns 26.476 ns]\n\nResults on an Standard L16s v3 (16 vcpus, 128 GiB memory)  Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz\n\nlabel_values__naive_usage/ntimelines/1      time:   [101.63 ns 101.84 ns 102.06 ns]\nlabel_values__naive_usage/ntimelines/4      time:   [417.55 ns 424.73 ns 432.63 ns]\nlabel_values__naive_usage/ntimelines/8      time:   [874.91 ns 889.51 ns 904.25 ns]\nlabel_values__cache_label_values_lookup/ntimelines/1      time:   [5.7724 ns 5.7760 ns 5.7804 ns]\nlabel_values__cache_label_values_lookup/ntimelines/4      time:   [7.8878 ns 7.9401 ns 8.0034 ns]\nlabel_values__cache_label_values_lookup/ntimelines/8      time:   [7.2621 ns 7.6354 ns 8.0337 ns]\nsingle_metric_multicore_scalability/nthreads/1      time:   [5.7710 ns 5.7744 ns 5.7785 ns]\nsingle_metric_multicore_scalability/nthreads/4      time:   [66.629 ns 66.994 ns 67.336 ns]\nsingle_metric_multicore_scalability/nthreads/8      time:   [130.85 ns 131.98 ns 132.91 ns]\npropagation_of_cached_label_value__naive/nthreads/1     time:   [11.540 ns 11.546 ns 11.553 ns]\npropagation_of_cached_label_value__naive/nthreads/4     time:   [131.22 ns 131.90 ns 132.56 ns]\npropagation_of_cached_label_value__naive/nthreads/8     time:   [260.99 ns 262.75 ns 264.26 ns]\npropagation_of_cached_label_value__long_lived_reference_per_thread/nthreads/1     time:   [11.544 ns 11.550 ns 11.557 ns]\npropagation_of_cached_label_value__long_lived_reference_per_thread/nthreads/4     time:   [11.568 ns 11.642 ns 11.763 ns]\npropagation_of_cached_label_value__long_lived_reference_per_thread/nthreads/8     time:   [13.416 ns 14.121 ns 14.886 ns\n\nResults on an M4 MAX MacBook Pro   Total Number of Cores:\t14 (10 performance and 4 efficiency)\n\nlabel_values__naive_usage/ntimelines/1      time:   [52.711 ns 53.026 ns 53.381 ns]\nlabel_values__naive_usage/ntimelines/4      time:   [323.99 ns 330.40 ns 337.53 ns]\nlabel_values__naive_usage/ntimelines/8      time:   [1.1615 µs 1.1998 µs 1.2399 µs]\nlabel_values__cache_label_values_lookup/ntimelines/1      time:   [1.6635 ns 1.6715 ns 1.6809 ns]\nlabel_values__cache_label_values_lookup/ntimelines/4      time:   [1.7786 ns 1.7876 ns 1.8028 ns]\nlabel_values__cache_label_values_lookup/ntimelines/8      time:   [1.8195 ns 1.8371 ns 1.8665 ns]\nsingle_metric_multicore_scalability/nthreads/1      time:   [1.7764 ns 1.7909 ns 1.8079 ns]\nsingle_metric_multicore_scalability/nthreads/4      time:   [33.875 ns 34.868 ns 35.923 ns]\nsingle_metric_multicore_scalability/nthreads/8      time:   [226.85 ns 235.30 ns 244.18 ns]\npropagation_of_cached_label_value__naive/nthreads/1     time:   [3.4337 ns 3.4491 ns 3.4660 ns]\npropagation_of_cached_label_value__naive/nthreads/4     time:   [69.486 ns 71.937 ns 74.472 ns]\npropagation_of_cached_label_value__naive/nthreads/8     time:   [434.87 ns 456.47 ns 477.84 ns]\npropagation_of_cached_label_value__long_lived_reference_per_thread/nthreads/1     time:   [3.3767 ns 3.3974 ns 3.4220 ns]\npropagation_of_cached_label_value__long_lived_reference_per_thread/nthreads/4     time:   [3.6105 ns 4.2355 ns 5.1463 ns]\npropagation_of_cached_label_value__long_lived_reference_per_thread/nthreads/8     time:   [4.0889 ns 4.9714 ns 6.0779 ns]\nbucket_scalability/nbuckets/1     time:   [4.8455 ns 4.8542 ns 4.8646 ns]\nbucket_scalability/nbuckets/4     time:   [4.5663 ns 4.5722 ns 4.5787 ns]\nbucket_scalability/nbuckets/8     time:   [4.5531 ns 4.5670 ns 4.5842 ns]\nbucket_scalability/nbuckets/16      time:   [4.6392 ns 4.6524 ns 4.6685 ns]\nbucket_scalability/nbuckets/32      time:   [6.0302 ns 6.0439 ns 6.0589 ns]\nbucket_scalability/nbuckets/64      time:   [10.608 ns 10.644 ns 10.691 ns]\nbucket_scalability/nbuckets/128     time:   [22.178 ns 22.316 ns 22.483 ns]\nbucket_scalability/nbuckets/256     time:   [42.190 ns 42.328 ns 42.492 ns]\n\nResults on a Hetzner AX102 AMD Ryzen 9 7950X3D 16-Core Processor\n\nlabel_values__naive_usage/ntimelines/1      time:   [64.510 ns 64.559 ns 64.610 ns]\nlabel_values__naive_usage/ntimelines/4      time:   [309.71 ns 326.09 ns 342.32 ns]\nlabel_values__naive_usage/ntimelines/8      time:   [776.92 ns 819.35 ns 856.93 ns]\nlabel_values__cache_label_values_lookup/ntimelines/1      time:   [1.2855 ns 1.2943 ns 1.3021 ns]\nlabel_values__cache_label_values_lookup/ntimelines/4      time:   [1.3865 ns 1.4139 ns 1.4441 ns]\nlabel_values__cache_label_values_lookup/ntimelines/8      time:   [1.5311 ns 1.5669 ns 1.6046 ns]\nsingle_metric_multicore_scalability/nthreads/1      time:   [1.1927 ns 1.1981 ns 1.2049 ns]\nsingle_metric_multicore_scalability/nthreads/4      time:   [24.346 ns 25.439 ns 26.634 ns]\nsingle_metric_multicore_scalability/nthreads/8      time:   [58.666 ns 60.137 ns 61.486 ns]\npropagation_of_cached_label_value__naive/nthreads/1     time:   [2.7067 ns 2.7238 ns 2.7402 ns]\npropagation_of_cached_label_value__naive/nthreads/4     time:   [62.723 ns 66.214 ns 69.787 ns]\npropagation_of_cached_label_value__naive/nthreads/8     time:   [164.24 ns 170.10 ns 175.68 ns]\npropagation_of_cached_label_value__long_lived_reference_per_thread/nthreads/1     time:   [2.2915 ns 2.2960 ns 2.3012 ns]\npropagation_of_cached_label_value__long_lived_reference_per_thread/nthreads/4     time:   [2.5726 ns 2.6158 ns 2.6624 ns]\npropagation_of_cached_label_value__long_lived_reference_per_thread/nthreads/8     time:   [2.7068 ns 2.8243 ns 2.9824 ns]\nbucket_scalability/nbuckets/1     time:   [6.3998 ns 6.4288 ns 6.4684 ns]\nbucket_scalability/nbuckets/4     time:   [6.3603 ns 6.3620 ns 6.3637 ns]\nbucket_scalability/nbuckets/8     time:   [6.1646 ns 6.1654 ns 6.1667 ns]\nbucket_scalability/nbuckets/16      time:   [6.1341 ns 6.1391 ns 6.1454 ns]\nbucket_scalability/nbuckets/32      time:   [8.2206 ns 8.2254 ns 8.2301 ns]\nbucket_scalability/nbuckets/64      time:   [13.988 ns 13.994 ns 14.000 ns]\nbucket_scalability/nbuckets/128     time:   [28.180 ns 28.216 ns 28.251 ns]\nbucket_scalability/nbuckets/256     time:   [54.914 ns 54.931 ns 54.951 ns]\n\n*/\n"
  },
  {
    "path": "pageserver/benches/bench_walredo.rs",
    "content": "//! Quantify a single walredo manager's throughput under N concurrent callers.\n//!\n//! The benchmark implementation ([`bench_impl`]) is parametrized by\n//! - `redo_work` => an async closure that takes a `PostgresRedoManager` and performs one redo\n//! - `n_redos` => number of times the benchmark shell execute the `redo_work`\n//! - `nclients` => number of clients (more on this shortly).\n//!\n//! The benchmark impl sets up a multi-threaded tokio runtime with default parameters.\n//! It spawns `nclients` times [`client`] tokio tasks.\n//! Each task executes the `redo_work` `n_redos/nclients` times.\n//!\n//! We exercise the following combinations:\n//! - `redo_work = ping / short / medium``\n//! - `nclients = [1, 2, 4, 8, 16, 32, 64, 128]`\n//!\n//! We let `criterion` determine the `n_redos` using `iter_custom`.\n//! The idea is that for each `(redo_work, nclients)` combination,\n//! criterion will run the `bench_impl` multiple times with different `n_redos`.\n//! The `bench_impl` reports the aggregate wall clock time from the clients' perspective.\n//! Criterion will divide that by `n_redos` to compute the \"time per iteration\".\n//! In our case, \"time per iteration\" means \"time per redo_work execution\".\n//!\n//! NB: the way by which `iter_custom` determines the \"number of iterations\"\n//! is called sampling. Apparently the idea here is to detect outliers.\n//! We're not sure whether the current choice of sampling method makes sense.\n//! See https://bheisler.github.io/criterion.rs/book/user_guide/command_line_output.html#collecting-samples\n//!\n//! # Reference Numbers\n//!\n//! 2024-09-18 on im4gn.2xlarge\n//!\n//! ```text\n//! ping/1                  time:   [21.789 µs 21.918 µs 22.078 µs]\n//! ping/2                  time:   [27.686 µs 27.812 µs 27.970 µs]\n//! ping/4                  time:   [35.468 µs 35.671 µs 35.926 µs]\n//! ping/8                  time:   [59.682 µs 59.987 µs 60.363 µs]\n//! ping/16                 time:   [101.79 µs 102.37 µs 103.08 µs]\n//! ping/32                 time:   [184.18 µs 185.15 µs 186.36 µs]\n//! ping/64                 time:   [349.86 µs 351.45 µs 353.47 µs]\n//! ping/128                time:   [684.53 µs 687.98 µs 692.17 µs]\n//! short/1                 time:   [31.833 µs 32.126 µs 32.428 µs]\n//! short/2                 time:   [35.558 µs 35.756 µs 35.992 µs]\n//! short/4                 time:   [44.850 µs 45.138 µs 45.484 µs]\n//! short/8                 time:   [65.985 µs 66.379 µs 66.853 µs]\n//! short/16                time:   [127.06 µs 127.90 µs 128.87 µs]\n//! short/32                time:   [252.98 µs 254.70 µs 256.73 µs]\n//! short/64                time:   [497.13 µs 499.86 µs 503.26 µs]\n//! short/128               time:   [987.46 µs 993.45 µs 1.0004 ms]\n//! medium/1                time:   [137.91 µs 138.55 µs 139.35 µs]\n//! medium/2                time:   [192.00 µs 192.91 µs 194.07 µs]\n//! medium/4                time:   [389.62 µs 391.55 µs 394.01 µs]\n//! medium/8                time:   [776.80 µs 780.33 µs 784.77 µs]\n//! medium/16               time:   [1.5323 ms 1.5383 ms 1.5459 ms]\n//! medium/32               time:   [3.0120 ms 3.0226 ms 3.0350 ms]\n//! medium/64               time:   [5.7405 ms 5.7787 ms 5.8166 ms]\n//! medium/128              time:   [10.412 ms 10.574 ms 10.718 ms]\n//! ```\n\nuse std::future::Future;\nuse std::sync::Arc;\nuse std::time::{Duration, Instant};\n\nuse anyhow::Context;\nuse bytes::{Buf, Bytes};\nuse criterion::{BenchmarkId, Criterion};\nuse once_cell::sync::Lazy;\nuse pageserver::config::PageServerConf;\nuse pageserver::walredo::{PostgresRedoManager, RedoAttemptType};\nuse pageserver_api::key::Key;\nuse pageserver_api::shard::TenantShardId;\nuse postgres_ffi::{BLCKSZ, PgMajorVersion};\nuse tokio::sync::Barrier;\nuse tokio::task::JoinSet;\nuse utils::id::TenantId;\nuse utils::lsn::Lsn;\nuse wal_decoder::models::record::NeonWalRecord;\n\nfn bench(c: &mut Criterion) {\n    macro_rules! bench_group {\n        ($name:expr, $redo_work:expr) => {{\n            let name: &str = $name;\n            let nclients = [1, 2, 4, 8, 16, 32, 64, 128];\n            for nclients in nclients {\n                let mut group = c.benchmark_group(name);\n                group.bench_with_input(\n                    BenchmarkId::from_parameter(nclients),\n                    &nclients,\n                    |b, nclients| {\n                        b.iter_custom(|iters| bench_impl($redo_work, iters, *nclients));\n                    },\n                );\n            }\n        }};\n    }\n    //\n    // benchmark the protocol implementation\n    //\n    let pg_version = PgMajorVersion::PG14;\n    bench_group!(\n        \"ping\",\n        Arc::new(move |mgr: Arc<PostgresRedoManager>| async move {\n            let _: () = mgr.ping(pg_version).await.unwrap();\n        })\n    );\n    //\n    // benchmarks with actual record redo\n    //\n    let make_redo_work = |req: &'static Request| {\n        Arc::new(move |mgr: Arc<PostgresRedoManager>| async move {\n            let page = req.execute(&mgr).await.unwrap();\n            assert_eq!(page.remaining(), BLCKSZ as usize);\n        })\n    };\n    bench_group!(\"short\", {\n        static REQUEST: Lazy<Request> = Lazy::new(Request::short_input);\n        make_redo_work(&REQUEST)\n    });\n    bench_group!(\"medium\", {\n        static REQUEST: Lazy<Request> = Lazy::new(Request::medium_input);\n        make_redo_work(&REQUEST)\n    });\n}\ncriterion::criterion_group!(benches, bench);\ncriterion::criterion_main!(benches);\n\n// Returns the sum of each client's wall-clock time spent executing their share of the n_redos.\nfn bench_impl<F, Fut>(redo_work: Arc<F>, n_redos: u64, nclients: u64) -> Duration\nwhere\n    F: Fn(Arc<PostgresRedoManager>) -> Fut + Send + Sync + 'static,\n    Fut: Future<Output = ()> + Send + 'static,\n{\n    let repo_dir = camino_tempfile::tempdir_in(env!(\"CARGO_TARGET_TMPDIR\")).unwrap();\n\n    let conf = PageServerConf::dummy_conf(repo_dir.path().to_path_buf());\n    let conf = Box::leak(Box::new(conf));\n    let tenant_shard_id = TenantShardId::unsharded(TenantId::generate());\n\n    let rt = tokio::runtime::Builder::new_multi_thread()\n        .enable_all()\n        .build()\n        .unwrap();\n\n    let start = Arc::new(Barrier::new(nclients as usize));\n\n    let mut tasks = JoinSet::new();\n\n    let manager = PostgresRedoManager::new(conf, tenant_shard_id);\n    let manager = Arc::new(manager);\n\n    // divide the amount of work equally among the clients.\n    let nredos_per_client = n_redos / nclients;\n    for _ in 0..nclients {\n        rt.block_on(async {\n            tasks.spawn(client(\n                Arc::clone(&manager),\n                Arc::clone(&start),\n                Arc::clone(&redo_work),\n                nredos_per_client,\n            ))\n        });\n    }\n\n    rt.block_on(async move {\n        let mut total_wallclock_time = Duration::ZERO;\n        while let Some(res) = tasks.join_next().await {\n            total_wallclock_time += res.unwrap();\n        }\n        total_wallclock_time\n    })\n}\n\nasync fn client<F, Fut>(\n    mgr: Arc<PostgresRedoManager>,\n    start: Arc<Barrier>,\n    redo_work: Arc<F>,\n    n_redos: u64,\n) -> Duration\nwhere\n    F: Fn(Arc<PostgresRedoManager>) -> Fut + Send + Sync + 'static,\n    Fut: Future<Output = ()> + Send + 'static,\n{\n    start.wait().await;\n    let start = Instant::now();\n    for _ in 0..n_redos {\n        redo_work(Arc::clone(&mgr)).await;\n        // The real pageserver will rarely if ever do 2 walredos in a row without\n        // yielding to the executor.\n        tokio::task::yield_now().await;\n    }\n    start.elapsed()\n}\n\nmacro_rules! lsn {\n    ($input:expr) => {{\n        let input = $input;\n        match <Lsn as std::str::FromStr>::from_str(input) {\n            Ok(lsn) => lsn,\n            Err(e) => panic!(\"failed to parse {}: {}\", input, e),\n        }\n    }};\n}\n\n/// Simple wrapper around `WalRedoManager::request_redo`.\n///\n/// In benchmarks this is cloned around.\n#[derive(Clone)]\nstruct Request {\n    key: Key,\n    lsn: Lsn,\n    base_img: Option<(Lsn, Bytes)>,\n    records: Vec<(Lsn, NeonWalRecord)>,\n    pg_version: PgMajorVersion,\n}\n\nimpl Request {\n    async fn execute(&self, manager: &PostgresRedoManager) -> anyhow::Result<Bytes> {\n        let Request {\n            key,\n            lsn,\n            base_img,\n            records,\n            pg_version,\n        } = self;\n\n        // TODO: avoid these clones\n        manager\n            .request_redo(\n                *key,\n                *lsn,\n                base_img.clone(),\n                records.clone(),\n                *pg_version,\n                RedoAttemptType::ReadPage,\n            )\n            .await\n            .context(\"request_redo\")\n    }\n\n    fn pg_record(will_init: bool, bytes: &'static [u8]) -> NeonWalRecord {\n        let rec = Bytes::from_static(bytes);\n        NeonWalRecord::Postgres { will_init, rec }\n    }\n\n    /// Short payload, 1132 bytes.\n    // pg_records are copypasted from log, where they are put with Debug impl of Bytes, which uses \\0\n    // for null bytes.\n    #[allow(clippy::octal_escapes)]\n    pub fn short_input() -> Request {\n        let pg_record = Self::pg_record;\n        Request {\n        key: Key {\n            field1: 0,\n            field2: 1663,\n            field3: 13010,\n            field4: 1259,\n            field5: 0,\n            field6: 0,\n        },\n        lsn: lsn!(\"0/16E2408\"),\n        base_img: None,\n        records: vec![\n            (\n                lsn!(\"0/16A9388\"),\n                pg_record(true, b\"j\\x03\\0\\0\\0\\x04\\0\\0\\xe8\\x7fj\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xd0\\x16\\x13Y\\0\\x10\\0\\04\\x03\\xd4\\0\\x05\\x7f\\x06\\0\\0\\xd22\\0\\0\\xeb\\x04\\0\\0\\0\\0\\0\\0\\xff\\x03\\0\\0\\0\\0\\x80\\xeca\\x01\\0\\0\\x01\\0\\xd4\\0\\xa0\\x1d\\0 \\x04 \\0\\0\\0\\0/\\0\\x01\\0\\xa0\\x9dX\\x01\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0.\\0\\x01\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\00\\x9f\\x9a\\x01P\\x9e\\xb2\\x01\\0\\x04\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\x02\\0!\\0\\x01\\x08 \\xff\\xff\\xff?\\0\\0\\0\\0\\0\\0@\\0\\0another_table\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\x98\\x08\\0\\0\\x02@\\0\\0\\0\\0\\0\\0\\n\\0\\0\\0\\x02\\0\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\x80\\xbf\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0pr\\x01\\0\\0\\0\\0\\0\\0\\0\\0\\x01d\\0\\0\\0\\0\\0\\0\\x04\\0\\0\\x01\\0\\0\\0\\0\\0\\0\\0\\x0c\\x02\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0/\\0!\\x80\\x03+ \\xff\\xff\\xff\\x7f\\0\\0\\0\\0\\0\\xdf\\x04\\0\\0pg_type\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\x0b\\0\\0\\0G\\0\\0\\0\\0\\0\\0\\0\\n\\0\\0\\0\\x02\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\x0e\\0\\0\\0\\0@\\x16D\\x0e\\0\\0\\0K\\x10\\0\\0\\x01\\0pr \\0\\0\\0\\0\\0\\0\\0\\0\\x01n\\0\\0\\0\\0\\0\\xd6\\x02\\0\\0\\x01\\0\\0\\0[\\x01\\0\\0\\0\\0\\0\\0\\0\\t\\x04\\0\\0\\x02\\0\\0\\0\\x01\\0\\0\\0\\n\\0\\0\\0\\n\\0\\0\\0\\x7f\\0\\0\\0\\0\\0\\0\\0\\n\\0\\0\\0\\x02\\0\\0\\0\\0\\0\\0C\\x01\\0\\0\\x15\\x01\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0.\\0!\\x80\\x03+ \\xff\\xff\\xff\\x7f\\0\\0\\0\\0\\0;\\n\\0\\0pg_statistic\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\x0b\\0\\0\\0\\xfd.\\0\\0\\0\\0\\0\\0\\n\\0\\0\\0\\x02\\0\\0\\0;\\n\\0\\0\\0\\0\\0\\0\\x13\\0\\0\\0\\0\\0\\xcbC\\x13\\0\\0\\0\\x18\\x0b\\0\\0\\x01\\0pr\\x1f\\0\\0\\0\\0\\0\\0\\0\\0\\x01n\\0\\0\\0\\0\\0\\xd6\\x02\\0\\0\\x01\\0\\0\\0C\\x01\\0\\0\\0\\0\\0\\0\\0\\t\\x04\\0\\0\\x01\\0\\0\\0\\x01\\0\\0\\0\\n\\0\\0\\0\\n\\0\\0\\0\\x7f\\0\\0\\0\\0\\0\\0\\x02\\0\\x01\"),\n            ),\n            (\n                lsn!(\"0/16D4080\"),\n                pg_record(false, b\"\\xbc\\0\\0\\0\\0\\0\\0\\0h?m\\x01\\0\\0\\0\\0p\\n\\0\\09\\x08\\xa3\\xea\\0 \\x8c\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\xeb\\x04\\0\\0\\0\\0\\0\\0\\xff\\x02\\0@\\0\\0another_table\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\x98\\x08\\0\\0\\x02@\\0\\0\\0\\0\\0\\0\\n\\0\\0\\0\\x02\\0\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\x05\\0\\0\\0\\0@zD\\x05\\0\\0\\0\\0\\0\\0\\0\\0\\0pr\\x01\\0\\0\\0\\0\\0\\0\\0\\0\\x01d\\0\\0\\0\\0\\0\\0\\x04\\0\\0\\x01\\0\\0\\0\\x02\\0\"),\n            ),\n        ],\n        pg_version: PgMajorVersion::PG14,\n    }\n    }\n\n    /// Medium sized payload, serializes as 26393 bytes.\n    // see [`short`]\n    #[allow(clippy::octal_escapes)]\n    pub fn medium_input() -> Request {\n        let pg_record = Self::pg_record;\n        Request {\n        key: Key {\n            field1: 0,\n            field2: 1663,\n            field3: 13010,\n            field4: 16384,\n            field5: 0,\n            field6: 0,\n        },\n        lsn: lsn!(\"0/16E2440\"),\n        base_img: None,\n        records: vec![\n            (lsn!(\"0/16B40A0\"), pg_record(true, b\"C\\0\\0\\0\\0\\x04\\0\\0(@k\\x01\\0\\0\\0\\0\\x80\\n\\0\\0\\x9c$2\\xb4\\0`\\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\0\\0\\0\\0\\0\\0\\0\\0\\x01\\0\\0\")),\n            (lsn!(\"0/16B40E8\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0X@k\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\x8c\\xe7\\xaa}\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\x01\\0\\0\\0\\0\\0\\0\\0\\x02\\0\\0\")),\n            (lsn!(\"0/16B4130\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\xa0@k\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xb3\\xa9a\\x89\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\x02\\0\\0\\0\\0\\0\\0\\0\\x03\\0\\0\")),\n            (lsn!(\"0/16B4178\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\xe8@k\\x01\\0\\0\\0\\0\\0\\n\\0\\0Z\\xd8\\xd4W\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\x03\\0\\0\\0\\0\\0\\0\\0\\x04\\0\\0\")),\n            (lsn!(\"0/16B41C0\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\00Ak\\x01\\0\\0\\0\\0\\0\\n\\0\\0G%L\\xe1\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\x04\\0\\0\\0\\0\\0\\0\\0\\x05\\0\\0\")),\n            (lsn!(\"0/16B4208\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0xAk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xbf\\xe2Z\\xed\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\x05\\0\\0\\0\\0\\0\\0\\0\\x06\\0\\0\")),\n            (lsn!(\"0/16B4250\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\xc0Ak\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xcc\\xcc6}\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\x06\\0\\0\\0\\0\\0\\0\\0\\x07\\0\\0\")),\n            (lsn!(\"0/16B4298\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\x08Bk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xdc\\t\\x18v\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\x07\\0\\0\\0\\0\\0\\0\\0\\x08\\0\\0\")),\n            (lsn!(\"0/16B42E0\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0PBk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xe3\\\\\\xb0U\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\x08\\0\\0\\0\\0\\0\\0\\0\\t\\0\\0\")),\n            (lsn!(\"0/16B4328\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\x98Bk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\x83[\\xe8\\x90\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\t\\0\\0\\0\\0\\0\\0\\0\\n\\0\\0\")),\n            (lsn!(\"0/16B4370\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\xe0Bk\\x01\\0\\0\\0\\0\\0\\n\\0\\0$\\xd5m\\xad\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\n\\0\\0\\0\\0\\0\\0\\0\\x0b\\0\\0\")),\n            (lsn!(\"0/16B43B8\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0(Ck\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\x94\\x93\\xe7-\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\x0b\\0\\0\\0\\0\\0\\0\\0\\x0c\\0\\0\")),\n            (lsn!(\"0/16B4400\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0pCk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xd0Y@\\xc5\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\x0c\\0\\0\\0\\0\\0\\0\\0\\r\\0\\0\")),\n            (lsn!(\"0/16B4448\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\xb8Ck\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xb0^\\x18\\0\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\r\\0\\0\\0\\0\\0\\0\\0\\x0e\\0\\0\")),\n            (lsn!(\"0/16B4490\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\0Dk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\x97,\\x15z\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\x0e\\0\\0\\0\\0\\0\\0\\0\\x0f\\0\\0\")),\n            (lsn!(\"0/16B44D8\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0HDk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xfa\\x04\\xb1@\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\x0f\\0\\0\\0\\0\\0\\0\\0\\x10\\0\\0\")),\n            (lsn!(\"0/16B4520\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\x90Dk\\x01\\0\\0\\0\\0\\0\\n\\0\\0Z\\xd9\\xa49\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\x10\\0\\0\\0\\0\\0\\0\\0\\x11\\0\\0\")),\n            (lsn!(\"0/16B4568\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\xd8Dk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xa2\\x1e\\xb25\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\x11\\0\\0\\0\\0\\0\\0\\0\\x12\\0\\0\")),\n            (lsn!(\"0/16B45B0\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0 Ek\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\\\\\xa7\\x08V\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\x12\\0\\0\\0\\0\\0\\0\\0\\x13\\0\\0\")),\n            (lsn!(\"0/16B45F8\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0hEk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xb5\\xd6\\xbd\\x88\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\x13\\0\\0\\0\\0\\0\\0\\0\\x14\\0\\0\")),\n            (lsn!(\"0/16B4640\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\xb0Ek\\x01\\0\\0\\0\\0\\0\\n\\0\\0i\\xdcT\\xa9\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\x14\\0\\0\\0\\0\\0\\0\\0\\x15\\0\\0\")),\n            (lsn!(\"0/16B4688\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\xf8Ek\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\x91\\x1bB\\xa5\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\x15\\0\\0\\0\\0\\0\\0\\0\\x16\\0\\0\")),\n            (lsn!(\"0/16B46D0\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0@Fk\\x01\\0\\0\\0\\0\\0\\n\\0\\0P[P\\x89\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\x16\\0\\0\\0\\0\\0\\0\\0\\x17\\0\\0\")),\n            (lsn!(\"0/16B4718\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\x88Fk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xf2\\xf0\\0>\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\x17\\0\\0\\0\\0\\0\\0\\0\\x18\\0\\0\")),\n            (lsn!(\"0/16B4760\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\xd0Fk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xcd\\xa5\\xa8\\x1d\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\x18\\0\\0\\0\\0\\0\\0\\0\\x19\\0\\0\")),\n            (lsn!(\"0/16B47A8\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\x18Gk\\x01\\0\\0\\0\\0\\0\\n\\0\\0lU\\x81O\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\x19\\0\\0\\0\\0\\0\\0\\0\\x1a\\0\\0\")),\n            (lsn!(\"0/16B47F0\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0`Gk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xcb\\xdb\\x04r\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\x1a\\0\\0\\0\\0\\0\\0\\0\\x1b\\0\\0\")),\n            (lsn!(\"0/16B4838\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\xa8Gk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xbaj\\xffe\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\x1b\\0\\0\\0\\0\\0\\0\\0\\x1c\\0\\0\")),\n            (lsn!(\"0/16B4880\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\xf0Gk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xfe\\xa0X\\x8d\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\x1c\\0\\0\\0\\0\\0\\0\\0\\x1d\\0\\0\")),\n            (lsn!(\"0/16B48C8\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\08Hk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\x06\\x9e_\\x0e\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\x1d\\0\\0\\0\\0\\0\\0\\0\\x1e\\0\\0\")),\n            (lsn!(\"0/16B4910\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\x80Hk\\x01\\0\\0\\0\\0\\0\\n\\0\\0u\\xb03\\x9e\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\x1e\\0\\0\\0\\0\\0\\0\\0\\x1f\\0\\0\")),\n            (lsn!(\"0/16B4958\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\xc8Hk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xb6\\x1e\\xe3-\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\x1f\\0\\0\\0\\0\\0\\0\\0 \\0\\0\")),\n            (lsn!(\"0/16B49A0\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\x10Ik\\x01\\0\\0\\0\\0\\0\\n\\0\\0(\\xd2\\x8d\\xe1\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0 \\0\\0\\0\\0\\0\\0\\0!\\0\\0\")),\n            (lsn!(\"0/16B49E8\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0XIk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xd0\\x15\\x9b\\xed\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0!\\0\\0\\0\\0\\0\\0\\0\\\"\\0\\0\")),\n            (lsn!(\"0/16B4A30\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\xa0Ik\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xef[P\\x19\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\\"\\0\\0\\0\\0\\0\\0\\0#\\0\\0\")),\n            (lsn!(\"0/16B4A78\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\xe8Ik\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\x06*\\xe5\\xc7\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0#\\0\\0\\0\\0\\0\\0\\0$\\0\\0\")),\n            (lsn!(\"0/16B4AC0\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\00Jk\\x01\\0\\0\\0\\0\\0\\n\\0\\0hNrZ\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0$\\0\\0\\0\\0\\0\\0\\0%\\0\\0\")),\n            (lsn!(\"0/16B4B08\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0xJk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\x90\\x89dV\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0%\\0\\0\\0\\0\\0\\0\\0&\\0\\0\")),\n            (lsn!(\"0/16B4B50\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\xc0Jk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xe3\\xa7\\x08\\xc6\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0&\\0\\0\\0\\0\\0\\0\\0'\\0\\0\")),\n            (lsn!(\"0/16B4B98\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\x08Kk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\x80\\xfb)\\xe6\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0'\\0\\0\\0\\0\\0\\0\\0(\\0\\0\")),\n            (lsn!(\"0/16B4BE0\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0PKk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xbf\\xae\\x81\\xc5\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0(\\0\\0\\0\\0\\0\\0\\0)\\0\\0\")),\n            (lsn!(\"0/16B4C28\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\x98Kk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xdf\\xa9\\xd9\\0\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0)\\0\\0\\0\\0\\0\\0\\0*\\0\\0\")),\n            (lsn!(\"0/16B4C70\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\xe0Kk\\x01\\0\\0\\0\\0\\0\\n\\0\\0x'\\\\=\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0*\\0\\0\\0\\0\\0\\0\\0+\\0\\0\")),\n            (lsn!(\"0/16B4CB8\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0(Lk\\x01\\0\\0\\0\\0\\0\\n\\0\\0]\\xca\\xc6\\xc0\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0+\\0\\0\\0\\0\\0\\0\\0,\\0\\0\")),\n            (lsn!(\"0/16B4D00\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0pLk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\x19\\0a(\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0,\\0\\0\\0\\0\\0\\0\\0-\\0\\0\")),\n            (lsn!(\"0/16B4D48\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\xb8Lk\\x01\\0\\0\\0\\0\\0\\n\\0\\0y\\x079\\xed\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0-\\0\\0\\0\\0\\0\\0\\0.\\0\\0\")),\n            (lsn!(\"0/16B4D90\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\0Mk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xcb\\xde$\\xea\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0.\\0\\0\\0\\0\\0\\0\\0/\\0\\0\")),\n            (lsn!(\"0/16B4DD8\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0HMk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xa6\\xf6\\x80\\xd0\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0/\\0\\0\\0\\0\\0\\0\\00\\0\\0\")),\n            (lsn!(\"0/16B4E20\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\x90Mk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\x06+\\x95\\xa9\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\00\\0\\0\\0\\0\\0\\0\\01\\0\\0\")),\n            (lsn!(\"0/16B4E68\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\xd8Mk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xfe\\xec\\x83\\xa5\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\01\\0\\0\\0\\0\\0\\0\\02\\0\\0\")),\n            (lsn!(\"0/16B4EB0\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0 Nk\\x01\\0\\0\\0\\0\\0\\n\\0\\0s\\xcc6\\xed\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\02\\0\\0\\0\\0\\0\\0\\03\\0\\0\")),\n            (lsn!(\"0/16B4EF8\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0hNk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\x9a\\xbd\\x833\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\03\\0\\0\\0\\0\\0\\0\\04\\0\\0\")),\n            (lsn!(\"0/16B4F40\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\xb0Nk\\x01\\0\\0\\0\\0\\0\\n\\0\\0F\\xb7j\\x12\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\04\\0\\0\\0\\0\\0\\0\\05\\0\\0\")),\n            (lsn!(\"0/16B4F88\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\xf8Nk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xbep|\\x1e\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\05\\0\\0\\0\\0\\0\\0\\06\\0\\0\")),\n            (lsn!(\"0/16B4FD0\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0@Ok\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\x0c\\xa9a\\x19\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\06\\0\\0\\0\\0\\0\\0\\07\\0\\0\")),\n            (lsn!(\"0/16B5018\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\x88Ok\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xae\\x021\\xae\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\07\\0\\0\\0\\0\\0\\0\\08\\0\\0\")),\n            (lsn!(\"0/16B5060\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\xd0Ok\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\x91W\\x99\\x8d\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\08\\0\\0\\0\\0\\0\\0\\09\\0\\0\")),\n            (lsn!(\"0/16B50A8\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\x18Pk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\0\\xd4\\x0eS\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\09\\0\\0\\0\\0\\0\\0\\0:\\0\\0\")),\n            (lsn!(\"0/16B50F0\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0`Pk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xa7Z\\x8bn\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0:\\0\\0\\0\\0\\0\\0\\0;\\0\\0\")),\n            (lsn!(\"0/16B5138\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\xa8Pk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xd6\\xebpy\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0;\\0\\0\\0\\0\\0\\0\\0<\\0\\0\")),\n            (lsn!(\"0/16B5180\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\xf0Pk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\x92!\\xd7\\x91\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0<\\0\\0\\0\\0\\0\\0\\0=\\0\\0\")),\n            (lsn!(\"0/16B51C8\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\08Qk\\x01\\0\\0\\0\\0\\0\\n\\0\\03\\xd1\\xfe\\xc3\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0=\\0\\0\\0\\0\\0\\0\\0>\\0\\0\")),\n            (lsn!(\"0/16B5210\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\x80Qk\\x01\\0\\0\\0\\0\\0\\n\\0\\0@\\xff\\x92S\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0>\\0\\0\\0\\0\\0\\0\\0?\\0\\0\")),\n            (lsn!(\"0/16B5258\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\xc8Qk\\x01\\0\\0\\0\\0\\0\\n\\0\\0.*G\\xf7\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0?\\0\\0\\0\\0\\0\\0\\0@\\0\\0\")),\n            (lsn!(\"0/16B52A0\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\x10Rk\\x01\\0\\0\\0\\0\\0\\n\\0\\0=\\xb23T\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0@\\0\\0\\0\\0\\0\\0\\0A\\0\\0\")),\n            (lsn!(\"0/16B52E8\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0XRk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xc5u%X\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0A\\0\\0\\0\\0\\0\\0\\0B\\0\\0\")),\n            (lsn!(\"0/16B5330\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\xa0Rk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xfa;\\xee\\xac\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0B\\0\\0\\0\\0\\0\\0\\0C\\0\\0\")),\n            (lsn!(\"0/16B5378\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\xe8Rk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\x13J[r\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0C\\0\\0\\0\\0\\0\\0\\0D\\0\\0\")),\n            (lsn!(\"0/16B53C0\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\00Sk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\x0e\\xb7\\xc3\\xc4\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0D\\0\\0\\0\\0\\0\\0\\0E\\0\\0\")),\n            (lsn!(\"0/16B5408\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0xSk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xf6p\\xd5\\xc8\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0E\\0\\0\\0\\0\\0\\0\\0F\\0\\0\")),\n            (lsn!(\"0/16B5450\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\xc0Sk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\x85^\\xb9X\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0F\\0\\0\\0\\0\\0\\0\\0G\\0\\0\")),\n            (lsn!(\"0/16B5498\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\x08Tk\\x01\\0\\0\\0\\0\\0\\n\\0\\0s\\xa9\\x88\\x05\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0G\\0\\0\\0\\0\\0\\0\\0H\\0\\0\")),\n            (lsn!(\"0/16B54E0\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0PTk\\x01\\0\\0\\0\\0\\0\\n\\0\\0L\\xfc &\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0H\\0\\0\\0\\0\\0\\0\\0I\\0\\0\")),\n            (lsn!(\"0/16B5528\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\x98Tk\\x01\\0\\0\\0\\0\\0\\n\\0\\0,\\xfbx\\xe3\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0I\\0\\0\\0\\0\\0\\0\\0J\\0\\0\")),\n            (lsn!(\"0/16B5570\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\xe0Tk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\x8bu\\xfd\\xde\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0J\\0\\0\\0\\0\\0\\0\\0K\\0\\0\")),\n            (lsn!(\"0/16B55B8\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0(Uk\\x01\\0\\0\\0\\0\\0\\n\\0\\0;3w^\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0K\\0\\0\\0\\0\\0\\0\\0L\\0\\0\")),\n            (lsn!(\"0/16B5600\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0pUk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\x7f\\xf9\\xd0\\xb6\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0L\\0\\0\\0\\0\\0\\0\\0M\\0\\0\")),\n            (lsn!(\"0/16B5648\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\xb8Uk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\x1f\\xfe\\x88s\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0M\\0\\0\\0\\0\\0\\0\\0N\\0\\0\")),\n            (lsn!(\"0/16B5690\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\0Vk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xde\\xbe\\x9a_\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0N\\0\\0\\0\\0\\0\\0\\0O\\0\\0\")),\n            (lsn!(\"0/16B56D8\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0HVk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xb3\\x96>e\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0O\\0\\0\\0\\0\\0\\0\\0P\\0\\0\")),\n            (lsn!(\"0/16B5720\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\x90Vk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\x13K+\\x1c\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0P\\0\\0\\0\\0\\0\\0\\0Q\\0\\0\")),\n            (lsn!(\"0/16B5768\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\xd8Vk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xeb\\x8c=\\x10\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0Q\\0\\0\\0\\0\\0\\0\\0R\\0\\0\")),\n            (lsn!(\"0/16B57B0\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0 Wk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\x155\\x87s\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0R\\0\\0\\0\\0\\0\\0\\0S\\0\\0\")),\n            (lsn!(\"0/16B57F8\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0hWk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xfcD2\\xad\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0S\\0\\0\\0\\0\\0\\0\\0T\\0\\0\")),\n            (lsn!(\"0/16B5840\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\xb0Wk\\x01\\0\\0\\0\\0\\0\\n\\0\\0 N\\xdb\\x8c\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0T\\0\\0\\0\\0\\0\\0\\0U\\0\\0\")),\n            (lsn!(\"0/16B5888\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\xf8Wk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xd8\\x89\\xcd\\x80\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0U\\0\\0\\0\\0\\0\\0\\0V\\0\\0\")),\n            (lsn!(\"0/16B58D0\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0@Xk\\x01\\0\\0\\0\\0\\0\\n\\0\\03\\x9e\\xfeV\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0V\\0\\0\\0\\0\\0\\0\\0W\\0\\0\")),\n            (lsn!(\"0/16B5918\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\x88Xk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\x915\\xae\\xe1\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0W\\0\\0\\0\\0\\0\\0\\0X\\0\\0\")),\n            (lsn!(\"0/16B5960\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\xd0Xk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xae`\\x06\\xc2\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0X\\0\\0\\0\\0\\0\\0\\0Y\\0\\0\")),\n            (lsn!(\"0/16B59A8\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\x18Yk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\x0f\\x90/\\x90\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0Y\\0\\0\\0\\0\\0\\0\\0Z\\0\\0\")),\n            (lsn!(\"0/16B59F0\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0`Yk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xa8\\x1e\\xaa\\xad\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0Z\\0\\0\\0\\0\\0\\0\\0[\\0\\0\")),\n            (lsn!(\"0/16B5A38\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\xa8Yk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xd9\\xafQ\\xba\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0[\\0\\0\\0\\0\\0\\0\\0\\\\\\0\\0\")),\n            (lsn!(\"0/16B5A80\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\xf0Yk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\x9de\\xf6R\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\\\\\0\\0\\0\\0\\0\\0\\0]\\0\\0\")),\n            (lsn!(\"0/16B5AC8\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\08Zk\\x01\\0\\0\\0\\0\\0\\n\\0\\0O\\x0c\\xd0+\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0]\\0\\0\\0\\0\\0\\0\\0^\\0\\0\")),\n            (lsn!(\"0/16B5B10\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\x80Zk\\x01\\0\\0\\0\\0\\0\\n\\0\\0<\\\"\\xbc\\xbb\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0^\\0\\0\\0\\0\\0\\0\\0_\\0\\0\")),\n            (lsn!(\"0/16B5B58\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\xc8Zk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xff\\x8cl\\x08\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0_\\0\\0\\0\\0\\0\\0\\0`\\0\\0\")),\n            (lsn!(\"0/16B5BA0\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\x10[k\\x01\\0\\0\\0\\0\\0\\n\\0\\0a@\\x02\\xc4\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0`\\0\\0\\0\\0\\0\\0\\0a\\0\\0\")),\n            (lsn!(\"0/16B5BE8\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0X[k\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\x99\\x87\\x14\\xc8\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0a\\0\\0\\0\\0\\0\\0\\0b\\0\\0\")),\n            (lsn!(\"0/16B5C30\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\xa0[k\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xa6\\xc9\\xdf<\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0b\\0\\0\\0\\0\\0\\0\\0c\\0\\0\")),\n            (lsn!(\"0/16B5C78\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\xe8[k\\x01\\0\\0\\0\\0\\0\\n\\0\\0O\\xb8j\\xe2\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0c\\0\\0\\0\\0\\0\\0\\0d\\0\\0\")),\n            (lsn!(\"0/16B5CC0\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\00\\\\k\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xc7\\xee\\xe2)\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0d\\0\\0\\0\\0\\0\\0\\0e\\0\\0\")),\n            (lsn!(\"0/16B5D08\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0x\\\\k\\x01\\0\\0\\0\\0\\0\\n\\0\\0?)\\xf4%\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0e\\0\\0\\0\\0\\0\\0\\0f\\0\\0\")),\n            (lsn!(\"0/16B5D50\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\xc0\\\\k\\x01\\0\\0\\0\\0\\0\\n\\0\\0L\\x07\\x98\\xb5\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0f\\0\\0\\0\\0\\0\\0\\0g\\0\\0\")),\n            (lsn!(\"0/16B5D98\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\x08]k\\x01\\0\\0\\0\\0\\0\\n\\0\\0/[\\xb9\\x95\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0g\\0\\0\\0\\0\\0\\0\\0h\\0\\0\")),\n            (lsn!(\"0/16B5DE0\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0P]k\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\x10\\x0e\\x11\\xb6\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0h\\0\\0\\0\\0\\0\\0\\0i\\0\\0\")),\n            (lsn!(\"0/16B5E28\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\x98]k\\x01\\0\\0\\0\\0\\0\\n\\0\\0p\\tIs\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0i\\0\\0\\0\\0\\0\\0\\0j\\0\\0\")),\n            (lsn!(\"0/16B5E70\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\xe0]k\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xd7\\x87\\xccN\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0j\\0\\0\\0\\0\\0\\0\\0k\\0\\0\")),\n            (lsn!(\"0/16B5EB8\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0(^k\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\x14XI\\xe5\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0k\\0\\0\\0\\0\\0\\0\\0l\\0\\0\")),\n            (lsn!(\"0/16B5F00\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0p^k\\x01\\0\\0\\0\\0\\0\\n\\0\\0P\\x92\\xee\\r\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0l\\0\\0\\0\\0\\0\\0\\0m\\0\\0\")),\n            (lsn!(\"0/16B5F48\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\xb8^k\\x01\\0\\0\\0\\0\\0\\n\\0\\00\\x95\\xb6\\xc8\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0m\\0\\0\\0\\0\\0\\0\\0n\\0\\0\")),\n            (lsn!(\"0/16B5F90\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\0_k\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\x82L\\xab\\xcf\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0n\\0\\0\\0\\0\\0\\0\\0o\\0\\0\")),\n            (lsn!(\"0/16B5FD8\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0H_k\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xefd\\x0f\\xf5\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0o\\0\\0\\0\\0\\0\\0\\0p\\0\\0\")),\n            (lsn!(\"0/16B6038\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\x90_k\\x01\\0\\0\\0\\0\\0\\n\\0\\0O\\xb9\\x1a\\x8c\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0p\\0\\0\\0\\0\\0\\0\\0q\\0\\0\")),\n            (lsn!(\"0/16B6080\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\xd8_k\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xb7~\\x0c\\x80\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0q\\0\\0\\0\\0\\0\\0\\0r\\0\\0\")),\n            (lsn!(\"0/16B60C8\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\08`k\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xc9\\xc1bC\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0r\\0\\0\\0\\0\\0\\0\\0s\\0\\0\")),\n            (lsn!(\"0/16B6110\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\x80`k\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xc1xD\\x1b\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0s\\0\\0\\0\\0\\0\\0\\0t\\0\\0\")),\n            (lsn!(\"0/16B6158\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\xc8`k\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\x96j\\xca\\xea\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0t\\0\\0\\0\\0\\0\\0\\0u\\0\\0\")),\n            (lsn!(\"0/16B61A0\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\x10ak\\x01\\0\\0\\0\\0\\0\\n\\0\\0$B\\xca\\xa1\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0u\\0\\0\\0\\0\\0\\0\\0v\\0\\0\")),\n            (lsn!(\"0/16B61E8\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0Xak\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xb6\\xa45\\xb7\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0v\\0\\0\\0\\0\\0\\0\\0w\\0\\0\")),\n            (lsn!(\"0/16B6230\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\xa0ak\\x01\\0\\0\\0\\0\\0\\n\\0\\0!g\\x1f+\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0w\\0\\0\\0\\0\\0\\0\\0x\\0\\0\")),\n            (lsn!(\"0/16B6278\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\xe8ak\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\r\\xea\\x9e\\x11\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0x\\0\\0\\0\\0\\0\\0\\0y\\0\\0\")),\n            (lsn!(\"0/16B62C0\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\00bk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xcc[\\x91q\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0y\\0\\0\\0\\0\\0\\0\\0z\\0\\0\")),\n            (lsn!(\"0/16B6308\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0xbk\\x01\\0\\0\\0\\0\\0\\n\\0\\0^\\xbdng\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0z\\0\\0\\0\\0\\0\\0\\0{\\0\\0\")),\n            (lsn!(\"0/16B6350\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\xc0bk\\x01\\0\\0\\0\\0\\0\\n\\0\\0V\\x04H?\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0{\\0\\0\\0\\0\\0\\0\\0|\\0\\0\")),\n            (lsn!(\"0/16B6398\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\x08ck\\x01\\0\\0\\0\\0\\0\\n\\0\\0X!\\xf9\\x90\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0|\\0\\0\\0\\0\\0\\0\\0}\\0\\0\")),\n            (lsn!(\"0/16B63E0\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0Pck\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xb3>\\xc6\\x85\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0}\\0\\0\\0\\0\\0\\0\\0~\\0\\0\")),\n            (lsn!(\"0/16B6428\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\x98ck\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xb9\\x18wZ\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0~\\0\\0\\0\\0\\0\\0\\0\\x7f\\0\\0\")),\n            (lsn!(\"0/16B6470\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\xe0ck\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xb8R\\xd2\\xfb\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\x7f\\0\\0\\0\\0\\0\\0\\0\\x80\\0\\0\")),\n            (lsn!(\"0/16B64B8\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0(dk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xa2\\xbb\\xbb\\x9f\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\x80\\0\\0\\0\\0\\0\\0\\0\\x81\\0\\0\")),\n            (lsn!(\"0/16B6500\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0pdk\\x01\\0\\0\\0\\0\\0\\n\\0\\0I\\xa4\\x84\\x8a\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\x81\\0\\0\\0\\0\\0\\0\\0\\x82\\0\\0\")),\n            (lsn!(\"0/16B6548\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\xb8dk\\x01\\0\\0\\0\\0\\0\\n\\0\\0C\\x825U\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\x82\\0\\0\\0\\0\\0\\0\\0\\x83\\0\\0\")),\n            (lsn!(\"0/16B6590\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\0ek\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\x8a\\xccb\\x9a\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\x83\\0\\0\\0\\0\\0\\0\\0\\x84\\0\\0\")),\n            (lsn!(\"0/16B65D8\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0Hek\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xdd\\xde\\xeck\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\x84\\0\\0\\0\\0\\0\\0\\0\\x85\\0\\0\")),\n            (lsn!(\"0/16B6620\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\x90ek\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xae\\x01\\x9d\\xb7\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\x85\\0\\0\\0\\0\\0\\0\\0\\x86\\0\\0\")),\n            (lsn!(\"0/16B6668\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\xd8ek\\x01\\0\\0\\0\\0\\0\\n\\0\\0<\\xe7b\\xa1\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\x86\\0\\0\\0\\0\\0\\0\\0\\x87\\0\\0\")),\n            (lsn!(\"0/16B66B0\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0 fk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\x19J6\\x81\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\x87\\0\\0\\0\\0\\0\\0\\0\\x88\\0\\0\")),\n            (lsn!(\"0/16B66F8\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0hfk\\x01\\0\\0\\0\\0\\0\\n\\0\\05\\xc7\\xb7\\xbb\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\x88\\0\\0\\0\\0\\0\\0\\0\\x89\\0\\0\")),\n            (lsn!(\"0/16B6740\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\xb0fk\\x01\\0\\0\\0\\0\\0\\n\\0\\0F\\x18\\xc6g\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\x89\\0\\0\\0\\0\\0\\0\\0\\x8a\\0\\0\")),\n            (lsn!(\"0/16B6788\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\xf8fk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xd4\\xfe9q\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\x8a\\0\\0\\0\\0\\0\\0\\0\\x8b\\0\\0\")),\n            (lsn!(\"0/16B67D0\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0@gk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\x1d\\xb0n\\xbe\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\x8b\\0\\0\\0\\0\\0\\0\\0\\x8c\\0\\0\")),\n            (lsn!(\"0/16B6818\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\x88gk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xd2b\\xae\\x86\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\x8c\\0\\0\\0\\0\\0\\0\\0\\x8d\\0\\0\")),\n            (lsn!(\"0/16B6860\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\xd0gk\\x01\\0\\0\\0\\0\\0\\n\\0\\09}\\x91\\x93\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\x8d\\0\\0\\0\\0\\0\\0\\0\\x8e\\0\\0\")),\n            (lsn!(\"0/16B68A8\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\x18hk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xabb\\x7f\\n\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\x8e\\0\\0\\0\\0\\0\\0\\0\\x8f\\0\\0\")),\n            (lsn!(\"0/16B68F0\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0`hk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xf3\\\"\\xa1\\x1b\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\x8f\\0\\0\\0\\0\\0\\0\\0\\x90\\0\\0\")),\n            (lsn!(\"0/16B6938\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\xa8hk\\x01\\0\\0\\0\\0\\0\\n\\0\\0@'\\x9d{\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\x90\\0\\0\\0\\0\\0\\0\\0\\x91\\0\\0\")),\n            (lsn!(\"0/16B6980\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\xf0hk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xab8\\xa2n\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\x91\\0\\0\\0\\0\\0\\0\\0\\x92\\0\\0\")),\n            (lsn!(\"0/16B69C8\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\08ik\\x01\\0\\0\\0\\0\\0\\n\\0\\0`\\xe9b&\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\x92\\0\\0\\0\\0\\0\\0\\0\\x93\\0\\0\")),\n            (lsn!(\"0/16B6A10\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\x80ik\\x01\\0\\0\\0\\0\\0\\n\\0\\0hPD~\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\x93\\0\\0\\0\\0\\0\\0\\0\\x94\\0\\0\")),\n            (lsn!(\"0/16B6A58\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\xc8ik\\x01\\0\\0\\0\\0\\0\\n\\0\\0?B\\xca\\x8f\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\x94\\0\\0\\0\\0\\0\\0\\0\\x95\\0\\0\")),\n            (lsn!(\"0/16B6AA0\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\x10jk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xfe\\xf3\\xc5\\xef\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\x95\\0\\0\\0\\0\\0\\0\\0\\x96\\0\\0\")),\n            (lsn!(\"0/16B6AE8\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0Xjk\\x01\\0\\0\\0\\0\\0\\n\\0\\0l\\x15:\\xf9\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\x96\\0\\0\\0\\0\\0\\0\\0\\x97\\0\\0\")),\n            (lsn!(\"0/16B6B30\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\xa0jk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xfb\\xd6\\x10e\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\x97\\0\\0\\0\\0\\0\\0\\0\\x98\\0\\0\")),\n            (lsn!(\"0/16B6B78\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\xe8jk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xd7[\\x91_\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\x98\\0\\0\\0\\0\\0\\0\\0\\x99\\0\\0\")),\n            (lsn!(\"0/16B6BC0\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\00kk\\x01\\0\\0\\0\\0\\0\\n\\0\\0es\\x91\\x14\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\x99\\0\\0\\0\\0\\0\\0\\0\\x9a\\0\\0\")),\n            (lsn!(\"0/16B6C08\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0xkk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xf7\\x95n\\x02\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\x9a\\0\\0\\0\\0\\0\\0\\0\\x9b\\0\\0\")),\n            (lsn!(\"0/16B6C50\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\xc0kk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xff,HZ\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\x9b\\0\\0\\0\\0\\0\\0\\0\\x9c\\0\\0\")),\n            (lsn!(\"0/16B6C98\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\x08lk\\x01\\0\\0\\0\\0\\0\\n\\0\\0d\\xa2\\xe9\\x88\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\x9c\\0\\0\\0\\0\\0\\0\\0\\x9d\\0\\0\")),\n            (lsn!(\"0/16B6CE0\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0Plk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\x8f\\xbd\\xd6\\x9d\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\x9d\\0\\0\\0\\0\\0\\0\\0\\x9e\\0\\0\")),\n            (lsn!(\"0/16B6D28\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\x98lk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\x85\\x9bgB\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\x9e\\0\\0\\0\\0\\0\\0\\0\\x9f\\0\\0\")),\n            (lsn!(\"0/16B6D70\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\xe0lk\\x01\\0\\0\\0\\0\\0\\n\\0\\0s]\\xcd\\xda\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\x9f\\0\\0\\0\\0\\0\\0\\0\\xa0\\0\\0\")),\n            (lsn!(\"0/16B6DB8\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0(mk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xfeI\\x8a\\x0f\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\xa0\\0\\0\\0\\0\\0\\0\\0\\xa1\\0\\0\")),\n            (lsn!(\"0/16B6E00\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0pmk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\x15V\\xb5\\x1a\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\xa1\\0\\0\\0\\0\\0\\0\\0\\xa2\\0\\0\")),\n            (lsn!(\"0/16B6E48\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\xb8mk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\x1fp\\x04\\xc5\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\xa2\\0\\0\\0\\0\\0\\0\\0\\xa3\\0\\0\")),\n            (lsn!(\"0/16B6E90\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\0nk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xa5\\xa7\\\\!\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\xa3\\0\\0\\0\\0\\0\\0\\0\\xa4\\0\\0\")),\n            (lsn!(\"0/16B6ED8\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0Hnk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xf2\\xb5\\xd2\\xd0\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\xa4\\0\\0\\0\\0\\0\\0\\0\\xa5\\0\\0\")),\n            (lsn!(\"0/16B6F20\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\x90nk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\x81j\\xa3\\x0c\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\xa5\\0\\0\\0\\0\\0\\0\\0\\xa6\\0\\0\")),\n            (lsn!(\"0/16B6F68\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\xd8nk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\x13\\x8c\\\\\\x1a\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\xa6\\0\\0\\0\\0\\0\\0\\0\\xa7\\0\\0\")),\n            (lsn!(\"0/16B6FB0\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0 ok\\x01\\0\\0\\0\\0\\0\\n\\0\\0E\\xb8\\x07\\x11\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\xa7\\0\\0\\0\\0\\0\\0\\0\\xa8\\0\\0\")),\n            (lsn!(\"0/16B6FF8\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0hok\\x01\\0\\0\\0\\0\\0\\n\\0\\0i5\\x86+\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\xa8\\0\\0\\0\\0\\0\\0\\0\\xa9\\0\\0\")),\n            (lsn!(\"0/16B7040\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\xb0ok\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\x1a\\xea\\xf7\\xf7\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\xa9\\0\\0\\0\\0\\0\\0\\0\\xaa\\0\\0\")),\n            (lsn!(\"0/16B7088\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\xf8ok\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\x88\\x0c\\x08\\xe1\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\xaa\\0\\0\\0\\0\\0\\0\\0\\xab\\0\\0\")),\n            (lsn!(\"0/16B70D0\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0@pk\\x01\\0\\0\\0\\0\\0\\n\\0\\0q1\\xe1\\xa2\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\xab\\0\\0\\0\\0\\0\\0\\0\\xac\\0\\0\")),\n            (lsn!(\"0/16B7118\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\x88pk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xbe\\xe3!\\x9a\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\xac\\0\\0\\0\\0\\0\\0\\0\\xad\\0\\0\")),\n            (lsn!(\"0/16B7160\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\xd0pk\\x01\\0\\0\\0\\0\\0\\n\\0\\0U\\xfc\\x1e\\x8f\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\xad\\0\\0\\0\\0\\0\\0\\0\\xae\\0\\0\")),\n            (lsn!(\"0/16B71A8\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\x18qk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\x9e-\\xde\\xc7\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\xae\\0\\0\\0\\0\\0\\0\\0\\xaf\\0\\0\")),\n            (lsn!(\"0/16B71F0\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0`qk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xc6m\\0\\xd6\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\xaf\\0\\0\\0\\0\\0\\0\\0\\xb0\\0\\0\")),\n            (lsn!(\"0/16B7238\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\xa8qk\\x01\\0\\0\\0\\0\\0\\n\\0\\0uh<\\xb6\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\xb0\\0\\0\\0\\0\\0\\0\\0\\xb1\\0\\0\")),\n            (lsn!(\"0/16B7280\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\xf0qk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\x9ew\\x03\\xa3\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\xb1\\0\\0\\0\\0\\0\\0\\0\\xb2\\0\\0\")),\n            (lsn!(\"0/16B72C8\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\08rk\\x01\\0\\0\\0\\0\\0\\n\\0\\0&?\\xcc\\xc0\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\xb2\\0\\0\\0\\0\\0\\0\\0\\xb3\\0\\0\")),\n            (lsn!(\"0/16B7310\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\x80rk\\x01\\0\\0\\0\\0\\0\\n\\0\\0.\\x86\\xea\\x98\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\xb3\\0\\0\\0\\0\\0\\0\\0\\xb4\\0\\0\")),\n            (lsn!(\"0/16B7358\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\xc8rk\\x01\\0\\0\\0\\0\\0\\n\\0\\0y\\x94di\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\xb4\\0\\0\\0\\0\\0\\0\\0\\xb5\\0\\0\")),\n            (lsn!(\"0/16B73A0\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\x10sk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xcb\\xbcd\\\"\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\xb5\\0\\0\\0\\0\\0\\0\\0\\xb6\\0\\0\")),\n            (lsn!(\"0/16B73E8\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0Xsk\\x01\\0\\0\\0\\0\\0\\n\\0\\0YZ\\x9b4\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\xb6\\0\\0\\0\\0\\0\\0\\0\\xb7\\0\\0\")),\n            (lsn!(\"0/16B7430\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\xa0sk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xce\\x99\\xb1\\xa8\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\xb7\\0\\0\\0\\0\\0\\0\\0\\xb8\\0\\0\")),\n            (lsn!(\"0/16B7478\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\xe8sk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xe2\\x140\\x92\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\xb8\\0\\0\\0\\0\\0\\0\\0\\xb9\\0\\0\")),\n            (lsn!(\"0/16B74C0\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\00tk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xc5\\x97 \\xa4\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\xb9\\0\\0\\0\\0\\0\\0\\0\\xba\\0\\0\")),\n            (lsn!(\"0/16B7508\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0xtk\\x01\\0\\0\\0\\0\\0\\n\\0\\0Wq\\xdf\\xb2\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\xba\\0\\0\\0\\0\\0\\0\\0\\xbb\\0\\0\")),\n            (lsn!(\"0/16B7550\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\xc0tk\\x01\\0\\0\\0\\0\\0\\n\\0\\0_\\xc8\\xf9\\xea\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\xbb\\0\\0\\0\\0\\0\\0\\0\\xbc\\0\\0\")),\n            (lsn!(\"0/16B7598\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\x08uk\\x01\\0\\0\\0\\0\\0\\n\\0\\0Q\\xedHE\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\xbc\\0\\0\\0\\0\\0\\0\\0\\xbd\\0\\0\")),\n            (lsn!(\"0/16B75E0\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0Puk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xba\\xf2wP\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\xbd\\0\\0\\0\\0\\0\\0\\0\\xbe\\0\\0\")),\n            (lsn!(\"0/16B7628\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\x98uk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xb0\\xd4\\xc6\\x8f\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\xbe\\0\\0\\0\\0\\0\\0\\0\\xbf\\0\\0\")),\n            (lsn!(\"0/16B7670\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\xe0uk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xebii\\0\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\xbf\\0\\0\\0\\0\\0\\0\\0\\xc0\\0\\0\")),\n            (lsn!(\"0/16B76B8\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0(vk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xeb)4\\xba\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\xc0\\0\\0\\0\\0\\0\\0\\0\\xc1\\0\\0\")),\n            (lsn!(\"0/16B7700\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0pvk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\06\\x0b\\xaf\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\xc1\\0\\0\\0\\0\\0\\0\\0\\xc2\\0\\0\")),\n            (lsn!(\"0/16B7748\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\xb8vk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\n\\x10\\xbap\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\xc2\\0\\0\\0\\0\\0\\0\\0\\xc3\\0\\0\")),\n            (lsn!(\"0/16B7790\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\0wk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xc3^\\xed\\xbf\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\xc3\\0\\0\\0\\0\\0\\0\\0\\xc4\\0\\0\")),\n            (lsn!(\"0/16B77D8\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0Hwk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\x94LcN\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\xc4\\0\\0\\0\\0\\0\\0\\0\\xc5\\0\\0\")),\n            (lsn!(\"0/16B7820\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\x90wk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xe7\\x93\\x12\\x92\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\xc5\\0\\0\\0\\0\\0\\0\\0\\xc6\\0\\0\")),\n            (lsn!(\"0/16B7868\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\xd8wk\\x01\\0\\0\\0\\0\\0\\n\\0\\0uu\\xed\\x84\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\xc6\\0\\0\\0\\0\\0\\0\\0\\xc7\\0\\0\")),\n            (lsn!(\"0/16B78B0\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0 xk\\x01\\0\\0\\0\\0\\0\\n\\0\\0z\\x8f\\x98^\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\xc7\\0\\0\\0\\0\\0\\0\\0\\xc8\\0\\0\")),\n            (lsn!(\"0/16B78F8\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0hxk\\x01\\0\\0\\0\\0\\0\\n\\0\\0V\\x02\\x19d\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\xc8\\0\\0\\0\\0\\0\\0\\0\\xc9\\0\\0\")),\n            (lsn!(\"0/16B7940\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\xb0xk\\x01\\0\\0\\0\\0\\0\\n\\0\\0%\\xddh\\xb8\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\xc9\\0\\0\\0\\0\\0\\0\\0\\xca\\0\\0\")),\n            (lsn!(\"0/16B7988\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\xf8xk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xb7;\\x97\\xae\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\xca\\0\\0\\0\\0\\0\\0\\0\\xcb\\0\\0\")),\n            (lsn!(\"0/16B79D0\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0@yk\\x01\\0\\0\\0\\0\\0\\n\\0\\0~u\\xc0a\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\xcb\\0\\0\\0\\0\\0\\0\\0\\xcc\\0\\0\")),\n            (lsn!(\"0/16B7A18\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\x88yk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xb1\\xa7\\0Y\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\xcc\\0\\0\\0\\0\\0\\0\\0\\xcd\\0\\0\")),\n            (lsn!(\"0/16B7A60\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\xd0yk\\x01\\0\\0\\0\\0\\0\\n\\0\\0Z\\xb8?L\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\xcd\\0\\0\\0\\0\\0\\0\\0\\xce\\0\\0\")),\n            (lsn!(\"0/16B7AA8\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\x18zk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xe2\\xf0\\xf0/\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\xce\\0\\0\\0\\0\\0\\0\\0\\xcf\\0\\0\")),\n            (lsn!(\"0/16B7AF0\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0`zk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xba\\xb0.>\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\xcf\\0\\0\\0\\0\\0\\0\\0\\xd0\\0\\0\")),\n            (lsn!(\"0/16B7B38\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\xa8zk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\t\\xb5\\x12^\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\xd0\\0\\0\\0\\0\\0\\0\\0\\xd1\\0\\0\")),\n            (lsn!(\"0/16B7B80\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\xf0zk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xe2\\xaa-K\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\xd1\\0\\0\\0\\0\\0\\0\\0\\xd2\\0\\0\")),\n            (lsn!(\"0/16B7BC8\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\08{k\\x01\\0\\0\\0\\0\\0\\n\\0\\0){\\xed\\x03\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\xd2\\0\\0\\0\\0\\0\\0\\0\\xd3\\0\\0\")),\n            (lsn!(\"0/16B7C10\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\x80{k\\x01\\0\\0\\0\\0\\0\\n\\0\\0!\\xc2\\xcb[\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\xd3\\0\\0\\0\\0\\0\\0\\0\\xd4\\0\\0\")),\n            (lsn!(\"0/16B7C58\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\xc8{k\\x01\\0\\0\\0\\0\\0\\n\\0\\0v\\xd0E\\xaa\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\xd4\\0\\0\\0\\0\\0\\0\\0\\xd5\\0\\0\")),\n            (lsn!(\"0/16B7CA0\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\x10|k\\x01\\0\\0\\0\\0\\0\\n\\0\\0QSU\\x9c\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\xd5\\0\\0\\0\\0\\0\\0\\0\\xd6\\0\\0\")),\n            (lsn!(\"0/16B7CE8\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0X|k\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xc3\\xb5\\xaa\\x8a\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\xd6\\0\\0\\0\\0\\0\\0\\0\\xd7\\0\\0\")),\n            (lsn!(\"0/16B7D30\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\xa0|k\\x01\\0\\0\\0\\0\\0\\n\\0\\0Tv\\x80\\x16\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\xd7\\0\\0\\0\\0\\0\\0\\0\\xd8\\0\\0\")),\n            (lsn!(\"0/16B7D78\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\xe8|k\\x01\\0\\0\\0\\0\\0\\n\\0\\0x\\xfb\\x01,\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\xd8\\0\\0\\0\\0\\0\\0\\0\\xd9\\0\\0\")),\n            (lsn!(\"0/16B7DC0\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\00}k\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xca\\xd3\\x01g\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\xd9\\0\\0\\0\\0\\0\\0\\0\\xda\\0\\0\")),\n            (lsn!(\"0/16B7E08\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0x}k\\x01\\0\\0\\0\\0\\0\\n\\0\\0X5\\xfeq\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\xda\\0\\0\\0\\0\\0\\0\\0\\xdb\\0\\0\")),\n            (lsn!(\"0/16B7E50\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\xc0}k\\x01\\0\\0\\0\\0\\0\\n\\0\\0P\\x8c\\xd8)\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\xdb\\0\\0\\0\\0\\0\\0\\0\\xdc\\0\\0\")),\n            (lsn!(\"0/16B7E98\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\x08~k\\x01\\0\\0\\0\\0\\0\\n\\0\\0-0f\\xad\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\xdc\\0\\0\\0\\0\\0\\0\\0\\xdd\\0\\0\")),\n            (lsn!(\"0/16B7EE0\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0P~k\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xc6/Y\\xb8\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\xdd\\0\\0\\0\\0\\0\\0\\0\\xde\\0\\0\")),\n            (lsn!(\"0/16B7F28\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\x98~k\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xcc\\t\\xe8g\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\xde\\0\\0\\0\\0\\0\\0\\0\\xdf\\0\\0\")),\n            (lsn!(\"0/16B7F70\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0\\xe0~k\\x01\\0\\0\\0\\0\\0\\n\\0\\0:\\xcfB\\xff\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\xdf\\0\\0\\0\\0\\0\\0\\0\\xe0\\0\\0\")),\n            (lsn!(\"0/16B7FB8\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0(\\x7fk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xb7\\xdb\\x05*\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\xe0\\0\\0\\0\\0\\0\\0\\0\\xe1\\0\\0\")),\n            (lsn!(\"0/16B8000\"), pg_record(false, b\"C\\0\\0\\0\\0\\x04\\0\\0p\\x7fk\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\\\\\xc4:?\\0 \\x12\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\xff\\x03\\x01\\0\\0\\x08\\x01\\0\\0\\0\\x18\\0\\xe1\\0\\0\\0\\0\\0\\0\\0\\xe2\\0\\0\")),\n            (lsn!(\"0/16CBD68\"), pg_record(false, b\"@ \\0\\0\\0\\0\\0\\0\\xc0|l\\x01\\0\\0\\0\\0@\\t\\0\\0\\xdf\\xb0\\x1a`\\0\\x12\\0\\0\\0 \\0\\0\\x04\\x7f\\x06\\0\\0\\xd22\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\x01\\x80\\0\\0\\0\\0\\0\\0\\xff\\x05\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\x18\\0\\0 \\0 \\x04 \\0\\0\\0\\0\\x01\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\x04\\0\\0\\x01\")),\n        ],\n        pg_version: PgMajorVersion::PG14,\n    }\n    }\n}\n"
  },
  {
    "path": "pageserver/benches/large-layer-map-layernames.txt",
    "content": "000000000000000000000000000000000000-000000067F00008000000032090100000000__0000006CF69CD8B0\n000000000000000000000000000000000000-000000067F00008000000032090100000000__0000006F949B7C08\n000000000000000000000000000000000000-000000067F00008000000032090100000000__00000071F15CF6B0\n000000000000000000000000000000000000-000000067F00008000000032090100000000__00000072AEE2BFE0\n000000000000000000000000000000000000-000000067F00008000000032090100000000__000000756884A510\n000000000000000000000000000000000000-000000067F00008000000032090100000000__00000077B1836CA0\n000000000000000000000000000000000000-000000067F00008000000032090100000000__0000007D41715570\n000000000000000000000000000000000000-000000067F00008000000032090100000000__0000007F12B83FE8\n000000000000000000000000000000000000-000000067F00008000000032090100000000__00000083D5DE3FD0\n000000000000000000000000000000000000-000000067F00008000000032090100000000__000000873B520940\n000000000000000000000000000000000000-000000067F00008000000032090100000000__000000890CF51FE0\n000000000000000000000000000000000000-000000067F00008000000032090100000000__0000008C71903720\n000000000000000000000000000000000000-000000067F00008000000032090100000000__0000008E43487FF0\n000000000000000000000000000000000000-000000067F00008000000032090100000000__0000009445A06DC8\n000000000000000000000000000000000000-000000067F00008000000032090100000000__00000096187D1FC8\n000000000000000000000000000000000000-000000067F00008000000032090100000000__00000096E85806C0\n000000000000000000000000000000000000-000000067F00008000000032090100000000__0000009921F3B4A8\n000000000000000000000000000000000000-000000067F00008000000032090100000000__0000009B5229DFE8\n000000000000000000000000000000000000-000000067F00008000000032090100000000__0000009EBB11FFC0\n000000000000000000000000000000000000-000000067F00008000000032090100000000__000000A93DDE5FE0\n000000000000000000000000000000000000-000000067F00008000000032090100000000__000000AD3698E000\n000000000000000000000000000000000000-000000067F00008000000032090100000000__000000B3AC039FE8\n000000000000000000000000000000000000-000000067F00008000000032090100000000__000000B8606C92A0\n000000000000000000000000000000000000-000000067F00008000000032090100000000__000000BC59629F98\n000000000000000000000000000000000000-000000067F00008000000032090100000000__000000BD25E66810\n000000000000000000000000000000000000-000000067F00008000000032090100000000__000000BEF683BFD0\n000000000000000000000000000000000000-000000067F00008000000032090100000000__000000C14270A078\n000000000000000000000000000000000000-000000067F00008000000032090100000000__000000C3687EDFE8\n000000000000000000000000000000000000-000000067F00008000000032090100000000__000000C6C7BD8140\n000000000000000000000000000000000000-000000067F00008000000032090100000000__000000C896B8DFD8\n000000000000000000000000000000000000-000000067F00008000000032090100000000__000000CB82C2FF68\n000000000000000000000000000000000000-000000067F00008000000032090100000000__000000CD51009FE8\n000000000000000000000000000000000000-000000067F00008000000032090100000000__000000CF7E08BFD0\n000000000000000000000000000000000000-000000067F00008000000540090100000000__0000006AEF261AF8\n000000000000000000000000000000000000-000000067F00008000000560090100000000__0000006DA30DA180\n000000000000000000000000000000000000-000000067F00008000000580090100000000__0000006FAFE25518\n000000000000000000000000000000000000-000000067F000080000005E0090100000000__00000073AF75E930\n000000000000000000000000000000000000-000000067F00008000000620090100000000__00000078B2CB1C68\n000000000000000000000000000000000000-000000067F00008000000640090100000000__0000007B9877EF40\n000000000000000000000000000000000000-000000067F00008000000680090100000000__00000080E477E868\n000000000000000000000000000000000000-000000067F000080000006C0090100000000__00000085BE169568\n000000000000000000000000000000000000-000000067F00008000000700090100000000__0000008AF15FEF50\n000000000000000000000000000000000000-000000067F00008000000740090100000000__000000902186B1D0\n000000000000000000000000000000000000-000000067F00008000000760090100000000__00000092CA5E4EA8\n000000000000000000000000000000000000-000000067F000080000007E0090100000000__0000009D34F8D4D8\n000000000000000000000000000000000000-000000067F00008000000820090100000000__000000A29F1D8950\n000000000000000000000000000000000000-000000067F00008000000860090100000000__000000A434813A68\n000000000000000000000000000000000000-000000067F000080000008C0090100000000__000000AAEBE534F8\n000000000000000000000000000000000000-000000067F00008000000960090100000000__000000B6C2E92A88\n000000000000000000000000000000000000-000000067F00008000000A20090100000000__000000C5745579F0\n000000000000000000000000000000000000-000000067F00008000000A60090100000000__000000CA2C877DC8\n000000000000000000000000000000000000-030000000000000000000000000000000002__000000AFB4666000\n000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__000000CF7DC97FD1-000000CF801FC221\n000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__000000CF801FC221-000000CF801FDB61\n000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__000000CF801FDB61-000000CF80201FA1\n000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__000000CF80201FA1-000000CF80203CC1\n000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__000000CF80203CC1-000000CF802067C1\n000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__000000CF802067C1-000000CF80208AE1\n000000067F000032AC000040040000000000-000000067F000080000005400C0000007DD8__0000006A5C770149-0000006ACEF98449\n000000067F000032AC000040040000000000-000000067F000080000005600C0000008077__0000006CF7781D19-0000006D69B48989\n000000067F000032AC000040040000000000-000000067F000080000005800C0000007A49__0000006F95E72491-0000006FA8EDF3B9\n000000067F000032AC000040040000000000-000000067F000080000005A00C0000007614__000000723877FF21-00000072A0D7CEA1\n000000067F000032AC000040040000000000-000000067F000080000005C00C0000016516__00000072A0D7CEA1-0000007318DDE691\n000000067F000032AC000040040000000000-000000067F000080000006000C0000008FB7__00000075687C3009-00000075E915EBC9\n000000067F000032AC000040040000000000-000000067F000080000006200C0000009441__0000007805801C41-00000078859FEA11\n000000067F000032AC000040040000000000-000000067F000080000006400C0000007987__0000007AA1DF6639-0000007B14D5C521\n000000067F000032AC000040040000000000-000000067F000080000006600C0000009381__0000007D41EA8D51-0000007DC21DE569\n000000067F000032AC000040040000000000-000000067F000080000006800C0000007D6A__0000007FDCDCE659-000000804F6BFFC1\n000000067F000032AC000040040000000000-000000067F000080000006801400000044E4__00000081AFAF5FD1-0000008215AFE5A9\n000000067F000032AC000040040000000000-000000067F000080000006C00C00000090F5__00000084A325AA01-00000085239DFB81\n000000067F000032AC000040040000000000-000000067F000080000006E00C00000096C8__000000873C9A2551-00000087BC75E5B1\n000000067F000032AC000040040000000000-000000067F000080000007000C000000955C__00000089D6B8EE99-0000008A56BBF739\n000000067F000032AC000040040000000000-000000067F000080000007200C000000933D__0000008C72843D41-0000008CF2BFFC89\n000000067F000032AC000040040000000000-000000067F000080000007400C00000090E9__0000008F10E3E189-0000008F915DE591\n000000067F000032AC000040040000000000-000000067F000080000007600C0000008180__00000091A6DD7A79-0000009228F7FA79\n000000067F000032AC000040040000000000-000000067F000080000007800C000000974C__0000009446B52FD1-00000094D67DF4F9\n000000067F000032AC000040040000000000-000000067F000080000007A00C000000974B__00000096E85829C9-00000098A7ADFC91\n000000067F000032AC000040040000000000-000000067F000080000007C00C0000007EA5__000000997F5D23C9-00000099F1C9FC71\n000000067F000032AC000040040000000000-000000067F000080000007E00C00000092CD__0000009C1E8CC879-0000009C9ED3F059\n000000067F000032AC000040040000000000-000000067F000080000008000C00000081F6__0000009EBBC72771-000000A154401909\n000000067F000032AC000040040000000000-000000067F000080000008200C000000974D__000000A154401909-000000A1E407F839\n000000067F000032AC000040040000000000-000000067F0000800000082014000000393C__000000A323C9E001-000000A37A60B1A9\n000000067F000032AC000040040000000000-000000067F000080000008600C0000009747__000000A37A60B1A9-000000A3CA47ECA9\n000000067F000032AC000040040000000000-000000067F000080000008801C0000009703__000000A5A081B661-000000A6503DE919\n000000067F000032AC000040040000000000-000000067F000080000008801C00000CF6B0__000000A6F001F909-000000A91D97FD49\n000000067F000032AC000040040000000000-000000067F000080000008C00C0000002330__000000A98AB7EE49-000000AA2597E9A1\n000000067F000032AC000040040000000000-000000067F000080000008E00C00000077B3__000000AB6533BFD9-000000ABF63DF511\n000000067F000032AC000040040000000000-000000067F000080000008E02A000000529F__000000AF5D587FE1-000000AFB4666001\n000000067F000032AC000040040000000000-000000067F000080000009004000000047E0__000000B18495C001-000000B1FA75F501\n000000067F000032AC000040040000000000-000000067F00008000000920140000005289__000000B3AB3B7FC9-000000B4208FF3D1\n000000067F000032AC000040040000000000-000000067F000080000009400C000008DEA4__000000B4E047E5A9-000000B5CED8CF79\n000000067F000032AC000040040000000000-000000067F000080000009600C000000974F__000000B5CED8CF79-000000B63EADE5B9\n000000067F000032AC000040040000000000-000000067F000080000009600C0000055A74__000000B808718889-000000B8606C92A1\n000000067F000032AC000040040000000000-000000067F000080000009800C0000009748__000000B8606C92A1-000000B8E03BF0B9\n000000067F000032AC000040040000000000-000000067F000080000009800C000010EC71__000000BA1FC3FB39-000000BA9685E7C1\n000000067F000032AC000040040000000000-000000067F000080000009A00C0000071F6F__000000BCEF79BE91-000000BD263A5849\n000000067F000032AC000040040000000000-000000067F000080000009C00C0000009749__000000BD263A5849-000000BDA607F261\n000000067F000032AC000040040000000000-000000067F000080000009E00C0000004916__000000BEF5F47FD1-000000BF48FFEB11\n000000067F000032AC000040040000000000-000000067F00008000000A000C0000008EF9__000000C19744E959-000000C217F3F379\n000000067F000032AC000040040000000000-000000067F00008000000A200C0000009748__000000C430961E71-000000C4C05DDB29\n000000067F000032AC000040040000000000-000000067F00008000000A400C0000009743__000000C6C87B6329-000000C74849FAE1\n000000067F000032AC000040040000000000-000000067F00008000000A600C0000009746__000000C90726D0D9-000000C986F5F0D9\n000000067F000032AC000040040000000000-000000067F00008000000A600C000007A149__000000CB40C16489-000000CB82C37859\n000000067F000032AC000040040000000000-000000067F00008000000A800C0000009748__000000CB82C37859-000000CC11F5EDC9\n000000067F000032AC000040040000000000-000000067F00008000000A800F0100000003__000000CD51344F89-000000CDCC7BF889\n000000067F00008000000000000000000001-000000067F000080000005400C000004B479__0000006C98B77D29-0000006CF7781D19\n000000067F00008000000000000000000001-000000067F000080000005400C0000104BE4__0000006C1E7C73C1-0000006C98B77D29\n000000067F00008000000000000000000001-000000067F000080000005600C0000048643__0000006F3370DD59-0000006F95E72491\n000000067F00008000000000000000000001-000000067F000080000005600C0000100001__0000006EB935F989-0000006F3370DD59\n000000067F00008000000000000000000001-000000067F000080000005800C000005CF06__00000071F21624D1-000000723877FF21\n000000067F00008000000000000000000001-000000067F000080000005800C000009D78D__000000716A103FC9-00000071F21624D1\n000000067F00008000000000000000000001-000000067F000080000005800C00000CDE2D__00000070E8761431-000000716A103FC9\n000000067F00008000000000000000000001-000000067F000080000005E00C00000385D9__0000007318DDE691-0000007497B01FF9\n000000067F00008000000000000000000001-000000067F000080000005E00C0000050175__000000751253A4C1-00000075687C3009\n000000067F00008000000000000000000001-000000067F000080000005E00C00000AF576__0000007497B01FF9-000000751253A4C1\n000000067F00008000000000000000000001-000000067F000080000006000C0000051A02__00000077B2AD0F91-0000007805801C41\n000000067F00008000000000000000000001-000000067F000080000006000C00000C3C38__00000077391A8001-00000077B2AD0F91\n000000067F00008000000000000000000001-000000067F000080000006000C00000C56C1__00000076A8CDE8F9-00000077391A8001\n000000067F00008000000000000000000001-000000067F000080000006200C000004811C__0000007A3F679FA1-0000007AA1DF6639\n000000067F00008000000000000000000001-000000067F000080000006200C0000107883__00000079C527F0D9-0000007A3F679FA1\n000000067F00008000000000000000000001-000000067F000080000006400C000004B4C9__0000007B14D5C521-0000007C73B53FC9\n000000067F00008000000000000000000001-000000067F000080000006400C000005258F__0000007CEE5A0B91-0000007D41EA8D51\n000000067F00008000000000000000000001-000000067F000080000006400C00000A887C__0000007C73B53FC9-0000007CEE5A0B91\n000000067F00008000000000000000000001-000000067F000080000006600C0000049742__0000007F7BE4E6F1-0000007FDCDCE659\n000000067F00008000000000000000000001-000000067F000080000006600C00000BC29F__0000007E71DBF8F9-0000007F11E4BFE9\n000000067F00008000000000000000000001-000000067F000080000006600C0000111C82__0000007F11E4BFE9-0000007F7BE4E6F1\n000000067F00008000000000000000000001-000000067F000080000006800C00000A8D4C__00000080EF2FF5B9-00000081AFAF5FD1\n000000067F00008000000000000000000001-000000067F000080000006A00C0000051984__000000844F1A6789-00000084A325AA01\n000000067F00008000000000000000000001-000000067F000080000006A00C00000703EC__00000082B573F579-00000083D5901FD9\n000000067F00008000000000000000000001-000000067F000080000006A00C00000C4CC8__00000083D5901FD9-000000844F1A6789\n000000067F00008000000000000000000001-000000067F000080000006C00C0000055EA3__00000086ED29E361-000000873C9A2551\n000000067F00008000000000000000000001-000000067F000080000006C00C00000BC102__00000085D35BF439-0000008673817FC9\n000000067F00008000000000000000000001-000000067F000080000006C00C00000BFB6E__0000008673817FC9-00000086ED29E361\n000000067F00008000000000000000000001-000000067F000080000006E00C0000054244__0000008985FD3611-00000089D6B8EE99\n000000067F00008000000000000000000001-000000067F000080000006E00C00000B6F42__000000890C5B6001-0000008985FD3611\n000000067F00008000000000000000000001-000000067F000080000006E00C00000C5883__000000887C2DFE59-000000890C5B6001\n000000067F00008000000000000000000001-000000067F000080000007000C0000053C20__0000008C2045B721-0000008C72843D41\n000000067F00008000000000000000000001-000000067F000080000007000C00000B2B06__0000008AF67FEC19-0000008BA6803FC9\n000000067F00008000000000000000000001-000000067F000080000007000C00000BF157__0000008BA6803FC9-0000008C2045B721\n000000067F00008000000000000000000001-000000067F000080000007200C0000051312__0000008EBC4827C1-0000008F10E3E189\n000000067F00008000000000000000000001-000000067F000080000007200C00000BA086__0000008E42A19FD1-0000008EBC4827C1\n000000067F00008000000000000000000001-000000067F000080000007200C00000C58B0__0000008DB277FA49-0000008E42A19FD1\n000000067F00008000000000000000000001-000000067F000080000007400C000004DF08__000000914B2393B1-00000091A6DD7A79\n000000067F00008000000000000000000001-000000067F000080000007400C00000FCCA8__00000090D0E5EA29-000000914B2393B1\n000000067F00008000000000000000000001-000000067F000080000007600C00000544BA__0000009228F7FA79-00000093786F8001\n000000067F00008000000000000000000001-000000067F000080000007600C0000061028__0000009402435A49-0000009446B52FD1\n000000067F00008000000000000000000001-000000067F000080000007600C000008C52F__00000093786F8001-0000009402435A49\n000000067F00008000000000000000000001-000000067F000080000007800C000006D445__00000096AEF27399-00000096E85829C9\n000000067F00008000000000000000000001-000000067F000080000007800C000007B8BC__00000096193A8001-00000096AEF27399\n000000067F00008000000000000000000001-000000067F000080000007800C00000CD6B6__000000959635F2A9-00000096193A8001\n000000067F00008000000000000000000001-000000067F000080000007A00C000004B9A5__0000009921E47AA1-000000997F5D23C9\n000000067F00008000000000000000000001-000000067F000080000007A00C00000F720F__00000098A7ADFC91-0000009921E47AA1\n000000067F00008000000000000000000001-000000067F000080000007C00C0000052A9D__0000009BCB4E4461-0000009C1E8CC879\n000000067F00008000000000000000000001-000000067F000080000007C00C00000A9244__0000009A918DF181-0000009B51A8BBB9\n000000067F00008000000000000000000001-000000067F000080000007C00C00000BA258__0000009B51A8BBB9-0000009BCB4E4461\n000000067F00008000000000000000000001-000000067F000080000007E00C0000061ADC__0000009E781A9731-0000009EBBC72771\n000000067F00008000000000000000000001-000000067F000080000007E00C0000093E3A__0000009DEEE6BFF9-0000009E781A9731\n000000067F00008000000000000000000001-000000067F000080000007E00C00000B2704__0000009D3E97E549-0000009DEEE6BFF9\n000000067F00008000000000000000000001-000000067F000080000008200C000005D8FE__000000A1E407F839-000000A323C9E001\n000000067F00008000000000000000000001-000000067F000080000008600C000010ECC4__000000A539BDE561-000000A5A081B661\n000000067F00008000000000000000000001-000000067F000080000008A00C0000104A0C__000000A91D97FD49-000000A98AB7EE49\n000000067F00008000000000000000000001-000000067F000080000008C00C000005DA8C__000000AA2597E9A1-000000AB6533BFD9\n000000067F00008000000000000000000001-000000067F000080000008E00C00000BC018__000000AC9601EA19-000000AD36393FE9\n000000067F00008000000000000000000001-000000067F000080000008E0140000003E33__000000AD36393FE9-000000ADB047EAB9\n000000067F00008000000000000000000001-000000067F000080000008E022000008E3D1__000000AE6FFFE799-000000AF5D587FE1\n000000067F00008000000000000000000001-000000067F000080000009003800000C5213__000000B0F3EDEAC9-000000B18495C001\n000000067F00008000000000000000000001-000000067F000080000009200C000009567A__000000B2CA27F641-000000B3AB3B7FC9\n000000067F00008000000000000000000001-000000067F000080000009600C00000A93FD__000000B6DE71F5F9-000000B79E68FFF9\n000000067F00008000000000000000000001-000000067F000080000009600C020000000B__000000B79E68FFF9-000000B808718889\n000000067F00008000000000000000000001-000000067F000080000009A00C00000794DC__000000BC596B5D59-000000BCEF79BE91\n000000067F00008000000000000000000001-000000067F000080000009A00C00000D6C06__000000BBE607E8F1-000000BC596B5D59\n000000067F00008000000000000000000001-000000067F000080000009C00C00000B2921__000000BE45CBFBB9-000000BEF5F47FD1\n000000067F00008000000000000000000001-000000067F000080000009E00C0000050E55__000000C1426D92E1-000000C19744E959\n000000067F00008000000000000000000001-000000067F000080000009E00C000009FB21__000000BFF8BDFEE9-000000C0C8CA5FF1\n000000067F00008000000000000000000001-000000067F000080000009E00C00000C0C74__000000C0C8CA5FF1-000000C1426D92E1\n000000067F00008000000000000000000001-000000067F00008000000A000C000005635B__000000C3E17E01A1-000000C430961E71\n000000067F00008000000000000000000001-000000067F00008000000A000C00000B8B52__000000C367E48001-000000C3E17E01A1\n000000067F00008000000000000000000001-000000067F00008000000A000C00000BC072__000000C2C7B1ECC1-000000C367E48001\n000000067F00008000000000000000000001-000000067F00008000000A200C00000677D8__000000C689AF4AC1-000000C6C87B6329\n000000067F00008000000000000000000001-000000067F00008000000A200C00000933F0__000000C600A8FFF9-000000C689AF4AC1\n000000067F00008000000000000000000001-000000067F00008000000A200C00000BBC1F__000000C56021EB29-000000C600A8FFF9\n000000067F00008000000000000000000001-000000067F00008000000A400C00000C4AE6__000000C80801E859-000000C8993EBFF9\n000000067F00008000000000000000000001-000000067F00008000000A400C0000107F8F__000000C8993EBFF9-000000C90726D0D9\n000000067F00008000000000000000000001-000000067F00008000000A600C0000054BFB__000000C986F5F0D9-000000CAD5D7FFF1\n000000067F00008000000000000000000001-000000067F00008000000A600C00001117CB__000000CAD5D7FFF1-000000CB40C16489\n000000067F00008000000000000000000001-000000067F00008000000A800C00000BCB46__000000CCB1B9E181-000000CD51344F89\n000000067F00008000000000000000000001-000000067F00008000000AA00C0000078E97__000000CE6C3FED31-000000CF7DC97FD1\n000000067F00008000000004E10100000002-000000067F000080000005400C000004BA9C__0000006ACEF98449-0000006C1E7C73C1\n000000067F00008000000004E10100000002-000000067F000080000005800C0000071854__0000007048B1EC09-00000070E8761431\n000000067F00008000000004E10200000000-000000067F000080000005600C000004BA9D__0000006D69B48989-0000006EB935F989\n000000067F00008000000004EB0100000002-000000067F00008000000A400C00000551FC__000000C74849FAE1-000000C80801E859\n000000067F000080000005200C000006C000-030000000000000000000000000000000002__000000687B67FC58\n000000067F00008000000520140000028A69-030000000000000000000000000000000002__0000006981B5FDC9-00000069FBEEB099\n000000067F0000800000052014000002C260-030000000000000000000000000000000002__00000069FBEEB099-0000006A5C770149\n000000067F000080000005400C0000000000-000000067F000080000005400C0000004000__0000006CF69CD8B0\n000000067F000080000005400C0000004000-000000067F000080000005400C0000008000__0000006CF69CD8B0\n000000067F000080000005400C0000008000-000000067F000080000005400C000000C000__0000006CF69CD8B0\n000000067F000080000005400C000000C000-000000067F000080000005400C0000010000__0000006CF69CD8B0\n000000067F000080000005400C0000010000-000000067F000080000005400C0000014000__0000006CF69CD8B0\n000000067F000080000005400C0000014000-000000067F000080000005400C0000018000__0000006CF69CD8B0\n000000067F000080000005400C0000018000-000000067F000080000005400C000001C000__0000006CF69CD8B0\n000000067F000080000005400C000001C000-000000067F000080000005400C0000020000__0000006CF69CD8B0\n000000067F000080000005400C0000020000-000000067F000080000005400C0000024000__0000006CF69CD8B0\n000000067F000080000005400C0000024000-000000067F000080000005400C0000028000__0000006CF69CD8B0\n000000067F000080000005400C0000028000-000000067F000080000005400C000002C000__0000006CF69CD8B0\n000000067F000080000005400C000002C000-000000067F000080000005400C0000030000__0000006CF69CD8B0\n000000067F000080000005400C0000030000-000000067F000080000005400C0000034000__0000006CF69CD8B0\n000000067F000080000005400C0000034000-000000067F000080000005400C0000038000__0000006CF69CD8B0\n000000067F000080000005400C0000038000-000000067F000080000005400C000003C000__0000006CF69CD8B0\n000000067F000080000005400C000003C000-000000067F000080000005400C0000040000__0000006CF69CD8B0\n000000067F000080000005400C0000040000-000000067F000080000005400C0000044000__0000006CF69CD8B0\n000000067F000080000005400C0000044000-000000067F000080000005400C0000048000__0000006CF69CD8B0\n000000067F000080000005400C0000048000-000000067F000080000005400C000004C000__0000006CF69CD8B0\n000000067F000080000005400C000004B483-000000067F000080000005400C00000967AD__0000006C98B77D29-0000006CF7781D19\n000000067F000080000005400C000004C000-000000067F000080000005400C0000050000__0000006CF69CD8B0\n000000067F000080000005400C0000050000-000000067F000080000005400C0000054000__0000006CF69CD8B0\n000000067F000080000005400C0000054000-000000067F000080000005400C0000058000__0000006CF69CD8B0\n000000067F000080000005400C0000054000-030000000000000000000000000000000002__0000006AEF261AF8\n000000067F000080000005400C0000058000-000000067F000080000005400C000005C000__0000006CF69CD8B0\n000000067F000080000005400C000005C000-000000067F000080000005400C0000060000__0000006CF69CD8B0\n000000067F000080000005400C0000060000-000000067F000080000005400C0000064000__0000006CF69CD8B0\n000000067F000080000005400C0000064000-000000067F000080000005400C0000068000__0000006CF69CD8B0\n000000067F000080000005400C0000068000-000000067F000080000005400C000006C000__0000006CF69CD8B0\n000000067F000080000005400C000006C000-000000067F000080000005400C0000070000__0000006CF69CD8B0\n000000067F000080000005400C0000070000-000000067F000080000005400C0000074000__0000006CF69CD8B0\n000000067F000080000005400C0000074000-000000067F000080000005400C0000078000__0000006CF69CD8B0\n000000067F000080000005400C0000078000-000000067F000080000005400C000007C000__0000006CF69CD8B0\n000000067F000080000005400C000007C000-000000067F000080000005400C0000080000__0000006CF69CD8B0\n000000067F000080000005400C0000080000-000000067F000080000005400C0000084000__0000006CF69CD8B0\n000000067F000080000005400C0000084000-000000067F000080000005400C0000088000__0000006CF69CD8B0\n000000067F000080000005400C0000088000-000000067F000080000005400C000008C000__0000006CF69CD8B0\n000000067F000080000005400C000008C000-000000067F000080000005400C0000090000__0000006CF69CD8B0\n000000067F000080000005400C0000090000-000000067F000080000005400C0000094000__0000006CF69CD8B0\n000000067F000080000005400C0000094000-000000067F000080000005400C0000098000__0000006CF69CD8B0\n000000067F000080000005400C00000967BA-000000067F000080000005400C00000E2771__0000006C98B77D29-0000006CF7781D19\n000000067F000080000005400C0000098000-000000067F000080000005400C000009C000__0000006CF69CD8B0\n000000067F000080000005400C000009C000-000000067F000080000005400C00000A0000__0000006CF69CD8B0\n000000067F000080000005400C00000A0000-000000067F000080000005400C00000A4000__0000006CF69CD8B0\n000000067F000080000005400C00000A4000-000000067F000080000005400C00000A8000__0000006CF69CD8B0\n000000067F000080000005400C00000A8000-000000067F000080000005400C00000AC000__0000006CF69CD8B0\n000000067F000080000005400C00000AC000-000000067F000080000005400C00000B0000__0000006CF69CD8B0\n000000067F000080000005400C00000B0000-000000067F000080000005400C00000B4000__0000006CF69CD8B0\n000000067F000080000005400C00000B4000-000000067F000080000005400C00000B8000__0000006CF69CD8B0\n000000067F000080000005400C00000B8000-000000067F000080000005400C00000BC000__0000006CF69CD8B0\n000000067F000080000005400C00000BC000-000000067F000080000005400C00000C0000__0000006CF69CD8B0\n000000067F000080000005400C00000C0000-000000067F000080000005400C00000C4000__0000006CF69CD8B0\n000000067F000080000005400C00000C4000-000000067F000080000005400C00000C8000__0000006CF69CD8B0\n000000067F000080000005400C00000C8000-000000067F000080000005400C00000CC000__0000006CF69CD8B0\n000000067F000080000005400C00000CC000-000000067F000080000005400C00000D0000__0000006CF69CD8B0\n000000067F000080000005400C00000D0000-000000067F000080000005400C00000D4000__0000006CF69CD8B0\n000000067F000080000005400C00000D4000-000000067F000080000005400C00000D8000__0000006CF69CD8B0\n000000067F000080000005400C00000D8000-000000067F000080000005400C00000DC000__0000006CF69CD8B0\n000000067F000080000005400C00000DC000-000000067F000080000005400C00000E0000__0000006CF69CD8B0\n000000067F000080000005400C00000E0000-000000067F000080000005400C00000E4000__0000006CF69CD8B0\n000000067F000080000005400C00000E277B-000000067F00008000000540140000005B2E__0000006C98B77D29-0000006CF7781D19\n000000067F000080000005400C00000E4000-000000067F000080000005400C00000E8000__0000006CF69CD8B0\n000000067F000080000005400C00000E8000-000000067F000080000005400C00000EC000__0000006CF69CD8B0\n000000067F000080000005400C00000EC000-000000067F000080000005400C00000F0000__0000006CF69CD8B0\n000000067F000080000005400C00000F0000-000000067F000080000005400C00000F4000__0000006CF69CD8B0\n000000067F000080000005400C00000F4000-000000067F000080000005400C00000F8000__0000006CF69CD8B0\n000000067F000080000005400C00000F8000-000000067F000080000005400C00000FC000__0000006CF69CD8B0\n000000067F000080000005400C00000FC000-000000067F000080000005400C0000100000__0000006CF69CD8B0\n000000067F000080000005400C0000100000-000000067F000080000005400C0000104000__0000006CF69CD8B0\n000000067F000080000005400C0000104000-000000067F000080000005400C0000108000__0000006CF69CD8B0\n000000067F000080000005400C0000108000-000000067F000080000005400C000010C000__0000006CF69CD8B0\n000000067F000080000005400C000010C000-000000067F000080000005400C0000110000__0000006CF69CD8B0\n000000067F000080000005400C0000110000-000000067F00008000000540120100000000__0000006CF69CD8B0\n000000067F000080000005400C0100000000-000000067F00008000000540140000004760__0000006C1E7C73C1-0000006C98B77D29\n000000067F00008000000540140000004760-000000067F0000800000054014000000BB51__0000006C1E7C73C1-0000006C98B77D29\n000000067F00008000000540140000005B2F-000000067F0000800000054014000001A04C__0000006C98B77D29-0000006CF7781D19\n000000067F0000800000054014000000BB51-000000067F00008000000540140000012EFA__0000006C1E7C73C1-0000006C98B77D29\n000000067F00008000000540140000012EFA-000000067F0000800000054014000001A2E5__0000006C1E7C73C1-0000006C98B77D29\n000000067F0000800000054014000001A04E-000000067F0000800000054016000000022B__0000006C98B77D29-0000006CF7781D19\n000000067F0000800000054014000001A2E5-000000067F000080000005401400000216D5__0000006C1E7C73C1-0000006C98B77D29\n000000067F000080000005401400000216D5-000000067F00008000000540140000028AD9__0000006C1E7C73C1-0000006C98B77D29\n000000067F00008000000540140000028AD9-030000000000000000000000000000000002__0000006C1E7C73C1-0000006C98B77D29\n000000067F0000800000054016000000022B-030000000000000000000000000000000002__0000006C98B77D29-0000006CF7781D19\n000000067F000080000005600C0000000000-000000067F000080000005600C0000004000__0000006DA30DA180\n000000067F000080000005600C0000000000-000000067F000080000005600C0000004000__0000006F949B7C08\n000000067F000080000005600C0000004000-000000067F000080000005600C0000008000__0000006DA30DA180\n000000067F000080000005600C0000004000-000000067F000080000005600C0000008000__0000006F949B7C08\n000000067F000080000005600C0000008000-000000067F000080000005600C000000C000__0000006DA30DA180\n000000067F000080000005600C0000008000-000000067F000080000005600C000000C000__0000006F949B7C08\n000000067F000080000005600C0000008077-000000067F000080000005600C00000117CE__0000006CF7781D19-0000006D69B48989\n000000067F000080000005600C000000C000-000000067F000080000005600C0000010000__0000006DA30DA180\n000000067F000080000005600C000000C000-000000067F000080000005600C0000010000__0000006F949B7C08\n000000067F000080000005600C0000010000-000000067F000080000005600C0000014000__0000006DA30DA180\n000000067F000080000005600C0000010000-000000067F000080000005600C0000014000__0000006F949B7C08\n000000067F000080000005600C00000117CE-000000067F000080000005600C000001AF0A__0000006CF7781D19-0000006D69B48989\n000000067F000080000005600C0000014000-000000067F000080000005600C0000018000__0000006DA30DA180\n000000067F000080000005600C0000014000-000000067F000080000005600C0000018000__0000006F949B7C08\n000000067F000080000005600C0000018000-000000067F000080000005600C000001C000__0000006DA30DA180\n000000067F000080000005600C0000018000-000000067F000080000005600C000001C000__0000006F949B7C08\n000000067F000080000005600C000001AF0A-000000067F000080000005600C0000024670__0000006CF7781D19-0000006D69B48989\n000000067F000080000005600C000001C000-000000067F000080000005600C0000020000__0000006DA30DA180\n000000067F000080000005600C000001C000-000000067F000080000005600C0000020000__0000006F949B7C08\n000000067F000080000005600C0000020000-000000067F000080000005600C0000024000__0000006DA30DA180\n000000067F000080000005600C0000020000-000000067F000080000005600C0000024000__0000006F949B7C08\n000000067F000080000005600C0000024000-000000067F000080000005600C0000028000__0000006DA30DA180\n000000067F000080000005600C0000024000-000000067F000080000005600C0000028000__0000006F949B7C08\n000000067F000080000005600C0000024670-000000067F000080000005600C000002DDD6__0000006CF7781D19-0000006D69B48989\n000000067F000080000005600C0000028000-000000067F000080000005600C000002C000__0000006DA30DA180\n000000067F000080000005600C0000028000-000000067F000080000005600C000002C000__0000006F949B7C08\n000000067F000080000005600C000002C000-000000067F000080000005600C0000030000__0000006DA30DA180\n000000067F000080000005600C000002C000-000000067F000080000005600C0000030000__0000006F949B7C08\n000000067F000080000005600C000002DDD6-000000067F000080000005600C000003752A__0000006CF7781D19-0000006D69B48989\n000000067F000080000005600C0000030000-000000067F000080000005600C0000034000__0000006DA30DA180\n000000067F000080000005600C0000030000-000000067F000080000005600C0000034000__0000006F949B7C08\n000000067F000080000005600C0000034000-000000067F000080000005600C0000038000__0000006DA30DA180\n000000067F000080000005600C0000034000-000000067F000080000005600C0000038000__0000006F949B7C08\n000000067F000080000005600C000003752A-000000067F000080000005600C0000040C90__0000006CF7781D19-0000006D69B48989\n000000067F000080000005600C0000038000-000000067F000080000005600C000003C000__0000006DA30DA180\n000000067F000080000005600C0000038000-000000067F000080000005600C000003C000__0000006F949B7C08\n000000067F000080000005600C000003C000-000000067F000080000005600C0000040000__0000006DA30DA180\n000000067F000080000005600C000003C000-000000067F000080000005600C0000040000__0000006F949B7C08\n000000067F000080000005600C0000040000-000000067F000080000005600C0000044000__0000006DA30DA180\n000000067F000080000005600C0000040000-000000067F000080000005600C0000044000__0000006F949B7C08\n000000067F000080000005600C0000040C90-030000000000000000000000000000000002__0000006CF7781D19-0000006D69B48989\n000000067F000080000005600C0000044000-000000067F000080000005600C0000048000__0000006DA30DA180\n000000067F000080000005600C0000044000-000000067F000080000005600C0000048000__0000006F949B7C08\n000000067F000080000005600C0000048000-000000067F000080000005600C000004C000__0000006DA30DA180\n000000067F000080000005600C0000048000-000000067F000080000005600C000004C000__0000006F949B7C08\n000000067F000080000005600C0000048643-000000067F000080000005600C00000907F3__0000006F3370DD59-0000006F95E72491\n000000067F000080000005600C000004BA9D-000000067F000080000005600C00000551D2__0000006D69B48989-0000006EB935F989\n000000067F000080000005600C000004C000-000000067F000080000005600C0000050000__0000006DA30DA180\n000000067F000080000005600C000004C000-000000067F000080000005600C0000050000__0000006F949B7C08\n000000067F000080000005600C0000050000-000000067F000080000005600C0000054000__0000006DA30DA180\n000000067F000080000005600C0000050000-000000067F000080000005600C0000054000__0000006F949B7C08\n000000067F000080000005600C0000054000-000000067F000080000005600C0000058000__0000006DA30DA180\n000000067F000080000005600C0000054000-000000067F000080000005600C0000058000__0000006F949B7C08\n000000067F000080000005600C00000551D2-000000067F000080000005600C000005E90B__0000006D69B48989-0000006EB935F989\n000000067F000080000005600C0000058000-000000067F000080000005600C000005C000__0000006DA30DA180\n000000067F000080000005600C0000058000-000000067F000080000005600C000005C000__0000006F949B7C08\n000000067F000080000005600C000005C000-000000067F000080000005600C0000060000__0000006DA30DA180\n000000067F000080000005600C000005C000-000000067F000080000005600C0000060000__0000006F949B7C08\n000000067F000080000005600C000005E90B-000000067F000080000005600C000006802B__0000006D69B48989-0000006EB935F989\n000000067F000080000005600C0000060000-000000067F000080000005600C0000064000__0000006DA30DA180\n000000067F000080000005600C0000060000-000000067F000080000005600C0000064000__0000006F949B7C08\n000000067F000080000005600C0000064000-000000067F000080000005600C0000068000__0000006F949B7C08\n000000067F000080000005600C0000064000-030000000000000000000000000000000002__0000006DA30DA180\n000000067F000080000005600C0000068000-000000067F000080000005600C000006C000__0000006F949B7C08\n000000067F000080000005600C000006802B-000000067F000080000005600C0000071782__0000006D69B48989-0000006EB935F989\n000000067F000080000005600C000006C000-000000067F000080000005600C0000070000__0000006F949B7C08\n000000067F000080000005600C0000070000-000000067F000080000005600C0000074000__0000006F949B7C08\n000000067F000080000005600C0000071782-000000067F000080000005600C000007AEE8__0000006D69B48989-0000006EB935F989\n000000067F000080000005600C0000074000-000000067F000080000005600C0000078000__0000006F949B7C08\n000000067F000080000005600C0000078000-000000067F000080000005600C000007C000__0000006F949B7C08\n000000067F000080000005600C000007AEE8-000000067F000080000005600C000008460B__0000006D69B48989-0000006EB935F989\n000000067F000080000005600C000007C000-000000067F000080000005600C0000080000__0000006F949B7C08\n000000067F000080000005600C0000080000-000000067F000080000005600C0000084000__0000006F949B7C08\n000000067F000080000005600C0000084000-000000067F000080000005600C0000088000__0000006F949B7C08\n000000067F000080000005600C000008460B-000000067F000080000005600C000008DD71__0000006D69B48989-0000006EB935F989\n000000067F000080000005600C0000088000-000000067F000080000005600C000008C000__0000006F949B7C08\n000000067F000080000005600C000008C000-000000067F000080000005600C0000090000__0000006F949B7C08\n000000067F000080000005600C000008DD71-000000067F000080000005600C00000974D7__0000006D69B48989-0000006EB935F989\n000000067F000080000005600C0000090000-000000067F000080000005600C0000094000__0000006F949B7C08\n000000067F000080000005600C00000907F5-000000067F000080000005600C00000D90E0__0000006F3370DD59-0000006F95E72491\n000000067F000080000005600C0000094000-000000067F000080000005600C0000098000__0000006F949B7C08\n000000067F000080000005600C00000974D7-000000067F000080000005600C00000A0C0B__0000006D69B48989-0000006EB935F989\n000000067F000080000005600C0000098000-000000067F000080000005600C000009C000__0000006F949B7C08\n000000067F000080000005600C000009C000-000000067F000080000005600C00000A0000__0000006F949B7C08\n000000067F000080000005600C00000A0000-000000067F000080000005600C00000A4000__0000006F949B7C08\n000000067F000080000005600C00000A0C0B-000000067F000080000005600C00000AA371__0000006D69B48989-0000006EB935F989\n000000067F000080000005600C00000A4000-000000067F000080000005600C00000A8000__0000006F949B7C08\n000000067F000080000005600C00000A8000-000000067F000080000005600C00000AC000__0000006F949B7C08\n000000067F000080000005600C00000AA371-000000067F000080000005600C00000B3AD7__0000006D69B48989-0000006EB935F989\n000000067F000080000005600C00000AC000-000000067F000080000005600C00000B0000__0000006F949B7C08\n000000067F000080000005600C00000B0000-000000067F000080000005600C00000B4000__0000006F949B7C08\n000000067F000080000005600C00000B3AD7-000000067F000080000005600C00000BD20B__0000006D69B48989-0000006EB935F989\n000000067F000080000005600C00000B4000-000000067F000080000005600C00000B8000__0000006F949B7C08\n000000067F000080000005600C00000B8000-000000067F000080000005600C00000BC000__0000006F949B7C08\n000000067F000080000005600C00000BC000-000000067F000080000005600C00000C0000__0000006F949B7C08\n000000067F000080000005600C00000BD20B-000000067F000080000005600C00000C6932__0000006D69B48989-0000006EB935F989\n000000067F000080000005600C00000C0000-000000067F000080000005600C00000C4000__0000006F949B7C08\n000000067F000080000005600C00000C4000-000000067F000080000005600C00000C8000__0000006F949B7C08\n000000067F000080000005600C00000C6932-000000067F000080000005600C00000D0098__0000006D69B48989-0000006EB935F989\n000000067F000080000005600C00000C8000-000000067F000080000005600C00000CC000__0000006F949B7C08\n000000067F000080000005600C00000CC000-000000067F000080000005600C00000D0000__0000006F949B7C08\n000000067F000080000005600C00000D0000-000000067F000080000005600C00000D4000__0000006F949B7C08\n000000067F000080000005600C00000D0098-000000067F000080000005600C00000D97FE__0000006D69B48989-0000006EB935F989\n000000067F000080000005600C00000D4000-000000067F000080000005600C00000D8000__0000006F949B7C08\n000000067F000080000005600C00000D8000-000000067F000080000005600C00000DC000__0000006F949B7C08\n000000067F000080000005600C00000D90F8-000000067F00008000000560140000002A9A__0000006F3370DD59-0000006F95E72491\n000000067F000080000005600C00000D97FE-000000067F000080000005600C00000E2F0B__0000006D69B48989-0000006EB935F989\n000000067F000080000005600C00000DC000-000000067F000080000005600C00000E0000__0000006F949B7C08\n000000067F000080000005600C00000E0000-000000067F000080000005600C00000E4000__0000006F949B7C08\n000000067F000080000005600C00000E2F0B-000000067F000080000005600C00000EC671__0000006D69B48989-0000006EB935F989\n000000067F000080000005600C00000E4000-000000067F000080000005600C00000E8000__0000006F949B7C08\n000000067F000080000005600C00000E8000-000000067F000080000005600C00000EC000__0000006F949B7C08\n000000067F000080000005600C00000EC000-000000067F000080000005600C00000F0000__0000006F949B7C08\n000000067F000080000005600C00000EC671-000000067F000080000005600C00000F5D9F__0000006D69B48989-0000006EB935F989\n000000067F000080000005600C00000F0000-000000067F000080000005600C00000F4000__0000006F949B7C08\n000000067F000080000005600C00000F4000-000000067F000080000005600C00000F8000__0000006F949B7C08\n000000067F000080000005600C00000F5D9F-000000067F000080000005600C00000FF505__0000006D69B48989-0000006EB935F989\n000000067F000080000005600C00000F8000-000000067F000080000005600C00000FC000__0000006F949B7C08\n000000067F000080000005600C00000FC000-000000067F000080000005600C0000100000__0000006F949B7C08\n000000067F000080000005600C00000FF505-000000067F000080000005600C0000108C10__0000006D69B48989-0000006EB935F989\n000000067F000080000005600C0000100000-000000067F000080000005600C0000104000__0000006F949B7C08\n000000067F000080000005600C0000100001-000000067F000080000005600C0000111BF7__0000006EB935F989-0000006F3370DD59\n000000067F000080000005600C0000104000-000000067F000080000005600C0000108000__0000006F949B7C08\n000000067F000080000005600C0000108000-000000067F000080000005600C000010C000__0000006F949B7C08\n000000067F000080000005600C0000108C10-000000067F000080000005600C0100000000__0000006D69B48989-0000006EB935F989\n000000067F000080000005600C000010C000-000000067F000080000005600C0000110000__0000006F949B7C08\n000000067F000080000005600C0000110000-000000067F00008000000560120100000000__0000006F949B7C08\n000000067F000080000005600C0000111BF7-000000067F0000800000056014000000451D__0000006EB935F989-0000006F3370DD59\n000000067F00008000000560140000002A9A-000000067F00008000000560140000016143__0000006F3370DD59-0000006F95E72491\n000000067F0000800000056014000000451D-000000067F0000800000056014000000B9A7__0000006EB935F989-0000006F3370DD59\n000000067F0000800000056014000000B9A7-000000067F00008000000560140000012DE3__0000006EB935F989-0000006F3370DD59\n000000067F00008000000560140000012DE3-000000067F0000800000056014000001A213__0000006EB935F989-0000006F3370DD59\n000000067F00008000000560140000016143-000000067F00008000000560140000029CE0__0000006F3370DD59-0000006F95E72491\n000000067F0000800000056014000001A213-000000067F00008000000560140000021666__0000006EB935F989-0000006F3370DD59\n000000067F00008000000560140000021666-000000067F00008000000560140000028A7C__0000006EB935F989-0000006F3370DD59\n000000067F00008000000560140000028A7C-030000000000000000000000000000000002__0000006EB935F989-0000006F3370DD59\n000000067F00008000000560140000029CE2-030000000000000000000000000000000002__0000006F3370DD59-0000006F95E72491\n000000067F000080000005800C0000000000-000000067F000080000005800C0000004000__0000006FAFE25518\n000000067F000080000005800C0000000000-000000067F000080000005800C0000004000__00000071F15CF6B0\n000000067F000080000005800C0000004000-000000067F000080000005800C0000008000__0000006FAFE25518\n000000067F000080000005800C0000004000-000000067F000080000005800C0000008000__00000071F15CF6B0\n000000067F000080000005800C0000007A49-030000000000000000000000000000000002__0000006F95E72491-0000006FA8EDF3B9\n000000067F000080000005800C0000008000-000000067F000080000005800C000000C000__0000006FAFE25518\n000000067F000080000005800C0000008000-000000067F000080000005800C000000C000__0000007168C9DFF8\n000000067F000080000005800C0000008000-000000067F000080000005800C000000C000__00000072377CDB60\n000000067F000080000005800C00000096DE-000000067F000080000005800C0000012E0C__0000006FA8EDF3B9-0000007048B1EC09\n000000067F000080000005800C000000C000-000000067F000080000005800C0000010000__0000007168C9DFF8\n000000067F000080000005800C000000C000-000000067F000080000005800C0000010000__00000072377CDB60\n000000067F000080000005800C000000C000-030000000000000000000000000000000002__0000006FAFE25518\n000000067F000080000005800C0000010000-000000067F000080000005800C0000014000__0000007168C9DFF8\n000000067F000080000005800C0000010000-000000067F000080000005800C0000014000__00000072377CDB60\n000000067F000080000005800C0000012E0C-000000067F000080000005800C000001C572__0000006FA8EDF3B9-0000007048B1EC09\n000000067F000080000005800C0000014000-000000067F000080000005800C0000018000__0000007168C9DFF8\n000000067F000080000005800C0000014000-000000067F000080000005800C0000018000__00000072377CDB60\n000000067F000080000005800C0000018000-000000067F000080000005800C000001C000__0000007168C9DFF8\n000000067F000080000005800C0000018000-000000067F000080000005800C000001C000__00000072377CDB60\n000000067F000080000005800C000001C000-000000067F000080000005800C0000020000__0000007168C9DFF8\n000000067F000080000005800C000001C000-000000067F000080000005800C0000020000__00000072377CDB60\n000000067F000080000005800C000001C572-000000067F000080000005800C0000025CD8__0000006FA8EDF3B9-0000007048B1EC09\n000000067F000080000005800C0000020000-000000067F000080000005800C0000024000__0000007168C9DFF8\n000000067F000080000005800C0000020000-000000067F000080000005800C0000024000__00000072377CDB60\n000000067F000080000005800C0000024000-000000067F000080000005800C0000028000__0000007168C9DFF8\n000000067F000080000005800C0000024000-000000067F000080000005800C0000028000__00000072377CDB60\n000000067F000080000005800C0000025CD8-000000067F000080000005800C000002F40B__0000006FA8EDF3B9-0000007048B1EC09\n000000067F000080000005800C0000028000-000000067F000080000005800C000002C000__0000007168C9DFF8\n000000067F000080000005800C0000028000-000000067F000080000005800C000002C000__00000072377CDB60\n000000067F000080000005800C000002C000-000000067F000080000005800C0000030000__0000007168C9DFF8\n000000067F000080000005800C000002C000-000000067F000080000005800C0000030000__00000072377CDB60\n000000067F000080000005800C000002F40B-000000067F000080000005800C0000038B1E__0000006FA8EDF3B9-0000007048B1EC09\n000000067F000080000005800C0000030000-000000067F000080000005800C0000034000__0000007168C9DFF8\n000000067F000080000005800C0000030000-000000067F000080000005800C0000034000__00000072377CDB60\n000000067F000080000005800C0000034000-000000067F000080000005800C0000038000__0000007168C9DFF8\n000000067F000080000005800C0000034000-000000067F000080000005800C0000038000__00000072377CDB60\n000000067F000080000005800C0000038000-000000067F000080000005800C000003C000__0000007168C9DFF8\n000000067F000080000005800C0000038000-000000067F000080000005800C000003C000__00000072377CDB60\n000000067F000080000005800C0000038B1E-000000067F000080000005800C0000042284__0000006FA8EDF3B9-0000007048B1EC09\n000000067F000080000005800C000003C000-000000067F000080000005800C0000040000__0000007168C9DFF8\n000000067F000080000005800C000003C000-000000067F000080000005800C0000040000__00000072377CDB60\n000000067F000080000005800C0000040000-000000067F000080000005800C0000044000__0000007168C9DFF8\n000000067F000080000005800C0000040000-000000067F000080000005800C0000044000__00000072377CDB60\n000000067F000080000005800C0000042284-000000067F000080000005800C000004B9EA__0000006FA8EDF3B9-0000007048B1EC09\n000000067F000080000005800C0000044000-000000067F000080000005800C0000048000__0000007168C9DFF8\n000000067F000080000005800C0000044000-000000067F000080000005800C0000048000__00000072377CDB60\n000000067F000080000005800C0000048000-000000067F000080000005800C000004C000__0000007168C9DFF8\n000000067F000080000005800C0000048000-000000067F000080000005800C000004C000__00000072377CDB60\n000000067F000080000005800C000004B9EA-000000067F000080000005800C000005510B__0000006FA8EDF3B9-0000007048B1EC09\n000000067F000080000005800C000004C000-000000067F000080000005800C0000050000__0000007168C9DFF8\n000000067F000080000005800C000004C000-000000067F000080000005800C0000050000__00000072377CDB60\n000000067F000080000005800C0000050000-000000067F000080000005800C0000054000__0000007168C9DFF8\n000000067F000080000005800C0000050000-000000067F000080000005800C0000054000__00000072377CDB60\n000000067F000080000005800C0000054000-000000067F000080000005800C0000058000__0000007168C9DFF8\n000000067F000080000005800C0000054000-000000067F000080000005800C0000058000__00000072377CDB60\n000000067F000080000005800C000005510B-000000067F000080000005800C000005E871__0000006FA8EDF3B9-0000007048B1EC09\n000000067F000080000005800C0000058000-000000067F000080000005800C000005C000__0000007168C9DFF8\n000000067F000080000005800C0000058000-000000067F000080000005800C000005C000__00000072377CDB60\n000000067F000080000005800C000005C000-000000067F000080000005800C0000060000__0000007168C9DFF8\n000000067F000080000005800C000005C000-000000067F000080000005800C0000060000__00000072377CDB60\n000000067F000080000005800C000005CF08-000000067F000080000005800C00000BAF56__00000071F21624D1-000000723877FF21\n000000067F000080000005800C000005E871-000000067F000080000005800C0000067F8B__0000006FA8EDF3B9-0000007048B1EC09\n000000067F000080000005800C0000060000-000000067F000080000005800C0000064000__0000007168C9DFF8\n000000067F000080000005800C0000060000-000000067F000080000005800C0000064000__00000072377CDB60\n000000067F000080000005800C0000064000-000000067F000080000005800C0000068000__0000007168C9DFF8\n000000067F000080000005800C0000064000-000000067F000080000005800C0000068000__00000072377CDB60\n000000067F000080000005800C0000067F8B-000000067F000080000005800C0100000000__0000006FA8EDF3B9-0000007048B1EC09\n000000067F000080000005800C0000068000-000000067F000080000005800C000006C000__0000007168C9DFF8\n000000067F000080000005800C0000068000-000000067F000080000005800C000006C000__00000072377CDB60\n000000067F000080000005800C000006C000-000000067F000080000005800C0000070000__0000007168C9DFF8\n000000067F000080000005800C000006C000-000000067F000080000005800C0000070000__00000072377CDB60\n000000067F000080000005800C0000070000-000000067F000080000005800C0000074000__0000007168C9DFF8\n000000067F000080000005800C0000070000-000000067F000080000005800C0000074000__00000072377CDB60\n000000067F000080000005800C0000071854-000000067F000080000005800C000007AFBA__0000007048B1EC09-00000070E8761431\n000000067F000080000005800C0000074000-000000067F000080000005800C0000078000__0000007168C9DFF8\n000000067F000080000005800C0000074000-000000067F000080000005800C0000078000__00000072377CDB60\n000000067F000080000005800C0000078000-000000067F000080000005800C000007C000__0000007168C9DFF8\n000000067F000080000005800C0000078000-000000067F000080000005800C000007C000__00000072377CDB60\n000000067F000080000005800C000007AFBA-000000067F000080000005800C0000084720__0000007048B1EC09-00000070E8761431\n000000067F000080000005800C000007C000-000000067F000080000005800C0000080000__0000007168C9DFF8\n000000067F000080000005800C000007C000-000000067F000080000005800C0000080000__00000072377CDB60\n000000067F000080000005800C0000080000-000000067F000080000005800C0000084000__0000007168C9DFF8\n000000067F000080000005800C0000080000-000000067F000080000005800C0000084000__00000072377CDB60\n000000067F000080000005800C0000084000-000000067F000080000005800C0000088000__0000007168C9DFF8\n000000067F000080000005800C0000084000-000000067F000080000005800C0000088000__00000072377CDB60\n000000067F000080000005800C0000084720-000000067F000080000005800C000008DE86__0000007048B1EC09-00000070E8761431\n000000067F000080000005800C0000088000-000000067F000080000005800C000008C000__0000007168C9DFF8\n000000067F000080000005800C0000088000-000000067F000080000005800C000008C000__00000072377CDB60\n000000067F000080000005800C000008C000-000000067F000080000005800C0000090000__0000007168C9DFF8\n000000067F000080000005800C000008C000-000000067F000080000005800C0000090000__00000072377CDB60\n000000067F000080000005800C000008DE86-000000067F000080000005800C00000975A6__0000007048B1EC09-00000070E8761431\n000000067F000080000005800C0000090000-000000067F000080000005800C0000094000__0000007168C9DFF8\n000000067F000080000005800C0000090000-000000067F000080000005800C0000094000__00000072377CDB60\n000000067F000080000005800C0000094000-000000067F000080000005800C0000098000__0000007168C9DFF8\n000000067F000080000005800C0000094000-000000067F000080000005800C0000098000__00000072377CDB60\n000000067F000080000005800C00000975A6-000000067F000080000005800C00000A0D0C__0000007048B1EC09-00000070E8761431\n000000067F000080000005800C0000098000-000000067F000080000005800C000009C000__0000007168C9DFF8\n000000067F000080000005800C0000098000-000000067F000080000005800C000009C000__00000072377CDB60\n000000067F000080000005800C000009C000-000000067F000080000005800C00000A0000__0000007168C9DFF8\n000000067F000080000005800C000009C000-000000067F000080000005800C00000A0000__00000072377CDB60\n000000067F000080000005800C000009D78D-000000067F000080000005800C0200000018__000000716A103FC9-00000071F21624D1\n000000067F000080000005800C00000A0000-000000067F000080000005800C00000A4000__0000007168C9DFF8\n000000067F000080000005800C00000A0000-000000067F000080000005800C00000A4000__00000072377CDB60\n000000067F000080000005800C00000A0D0C-000000067F000080000005800C00000AA472__0000007048B1EC09-00000070E8761431\n000000067F000080000005800C00000A4000-000000067F000080000005800C00000A8000__0000007168C9DFF8\n000000067F000080000005800C00000A4000-000000067F000080000005800C00000A8000__00000072377CDB60\n000000067F000080000005800C00000A8000-000000067F000080000005800C00000AC000__0000007168C9DFF8\n000000067F000080000005800C00000A8000-000000067F000080000005800C00000AC000__00000072377CDB60\n000000067F000080000005800C00000AA472-000000067F000080000005800C00000B3BB4__0000007048B1EC09-00000070E8761431\n000000067F000080000005800C00000AC000-000000067F000080000005800C00000B0000__0000007168C9DFF8\n000000067F000080000005800C00000AC000-000000067F000080000005800C00000B0000__00000072377CDB60\n000000067F000080000005800C00000B0000-000000067F000080000005800C00000B4000__0000007168C9DFF8\n000000067F000080000005800C00000B0000-000000067F000080000005800C00000B4000__00000072377CDB60\n000000067F000080000005800C00000B3BB4-000000067F000080000005800C00000BD30B__0000007048B1EC09-00000070E8761431\n000000067F000080000005800C00000B4000-000000067F000080000005800C00000B8000__0000007168C9DFF8\n000000067F000080000005800C00000B4000-000000067F000080000005800C00000B8000__00000072377CDB60\n000000067F000080000005800C00000B8000-000000067F000080000005800C00000BC000__0000007168C9DFF8\n000000067F000080000005800C00000B8000-000000067F000080000005800C00000BC000__00000072377CDB60\n000000067F000080000005800C00000BAF5F-000000067F000080000005801400000007C1__00000071F21624D1-000000723877FF21\n000000067F000080000005800C00000BC000-000000067F000080000005800C00000C0000__0000007168C9DFF8\n000000067F000080000005800C00000BC000-000000067F000080000005800C00000C0000__00000072377CDB60\n000000067F000080000005800C00000BD30B-000000067F000080000005800C00000C6A32__0000007048B1EC09-00000070E8761431\n000000067F000080000005800C00000C0000-000000067F000080000005800C00000C4000__0000007168C9DFF8\n000000067F000080000005800C00000C0000-000000067F000080000005800C00000C4000__00000072377CDB60\n000000067F000080000005800C00000C4000-000000067F000080000005800C00000C8000__0000007168C9DFF8\n000000067F000080000005800C00000C4000-000000067F000080000005800C00000C8000__00000072377CDB60\n000000067F000080000005800C00000C6A32-000000067F000080000005800C0100000000__0000007048B1EC09-00000070E8761431\n000000067F000080000005800C00000C8000-000000067F000080000005800C00000CC000__0000007168C9DFF8\n000000067F000080000005800C00000C8000-000000067F000080000005800C00000CC000__00000072377CDB60\n000000067F000080000005800C00000CC000-000000067F000080000005800C00000D0000__0000007168C9DFF8\n000000067F000080000005800C00000CC000-000000067F000080000005800C00000D0000__00000072377CDB60\n000000067F000080000005800C00000CDE2D-000000067F000080000005800C00000D754D__00000070E8761431-000000716A103FC9\n000000067F000080000005800C00000D0000-000000067F000080000005800C00000D4000__0000007168C9DFF8\n000000067F000080000005800C00000D0000-000000067F000080000005800C00000D4000__00000072377CDB60\n000000067F000080000005800C00000D4000-000000067F000080000005800C00000D8000__0000007168C9DFF8\n000000067F000080000005800C00000D4000-000000067F000080000005800C00000D8000__00000072377CDB60\n000000067F000080000005800C00000D754D-000000067F000080000005800C00000E0CB3__00000070E8761431-000000716A103FC9\n000000067F000080000005800C00000D8000-000000067F000080000005800C00000DC000__0000007168C9DFF8\n000000067F000080000005800C00000D8000-000000067F000080000005800C00000DC000__00000072377CDB60\n000000067F000080000005800C00000DC000-000000067F000080000005800C00000E0000__0000007168C9DFF8\n000000067F000080000005800C00000DC000-000000067F000080000005800C00000E0000__00000072377CDB60\n000000067F000080000005800C00000E0000-000000067F000080000005800C00000E4000__0000007168C9DFF8\n000000067F000080000005800C00000E0000-000000067F000080000005800C00000E4000__00000072377CDB60\n000000067F000080000005800C00000E0CB3-000000067F000080000005800C00000EA409__00000070E8761431-000000716A103FC9\n000000067F000080000005800C00000E4000-000000067F000080000005800C00000E8000__0000007168C9DFF8\n000000067F000080000005800C00000E4000-000000067F000080000005800C00000E8000__00000072377CDB60\n000000067F000080000005800C00000E8000-000000067F000080000005800C00000EC000__0000007168C9DFF8\n000000067F000080000005800C00000E8000-000000067F000080000005800C00000EC000__00000072377CDB60\n000000067F000080000005800C00000EA409-000000067F000080000005800C00000F3B4B__00000070E8761431-000000716A103FC9\n000000067F000080000005800C00000EC000-000000067F000080000005800C00000F0000__0000007168C9DFF8\n000000067F000080000005800C00000EC000-000000067F000080000005800C00000F0000__00000072377CDB60\n000000067F000080000005800C00000F0000-000000067F000080000005800C00000F4000__0000007168C9DFF8\n000000067F000080000005800C00000F0000-000000067F000080000005800C00000F4000__00000072377CDB60\n000000067F000080000005800C00000F3B4B-000000067F000080000005800C00000FD2B1__00000070E8761431-000000716A103FC9\n000000067F000080000005800C00000F4000-000000067F000080000005800C00000F8000__0000007168C9DFF8\n000000067F000080000005800C00000F4000-000000067F000080000005800C00000F8000__00000072377CDB60\n000000067F000080000005800C00000F8000-000000067F000080000005800C00000FC000__0000007168C9DFF8\n000000067F000080000005800C00000F8000-000000067F000080000005800C00000FC000__00000072377CDB60\n000000067F000080000005800C00000FC000-000000067F000080000005800C0000100000__0000007168C9DFF8\n000000067F000080000005800C00000FC000-000000067F000080000005800C0000100000__00000072377CDB60\n000000067F000080000005800C00000FD2B1-000000067F000080000005800C00001069D8__00000070E8761431-000000716A103FC9\n000000067F000080000005800C0000100000-000000067F000080000005800C0000104000__0000007168C9DFF8\n000000067F000080000005800C0000100000-000000067F000080000005800C0000104000__00000072377CDB60\n000000067F000080000005800C0000104000-000000067F000080000005800C0000108000__0000007168C9DFF8\n000000067F000080000005800C0000104000-000000067F000080000005800C0000108000__00000072377CDB60\n000000067F000080000005800C00001069D8-000000067F000080000005800C000011010C__00000070E8761431-000000716A103FC9\n000000067F000080000005800C0000108000-000000067F000080000005800C000010C000__0000007168C9DFF8\n000000067F000080000005800C0000108000-000000067F000080000005800C000010C000__00000072377CDB60\n000000067F000080000005800C000010C000-000000067F000080000005800C0000110000__0000007168C9DFF8\n000000067F000080000005800C000010C000-000000067F000080000005800C0000110000__00000072377CDB60\n000000067F000080000005800C0000110000-000000067F00008000000580120100000000__00000072377CDB60\n000000067F000080000005800C0000110000-030000000000000000000000000000000002__0000007168C9DFF8\n000000067F000080000005800C000011010C-01000000000000000100000002000000001E__00000070E8761431-000000716A103FC9\n000000067F000080000005800C0200000018-000000067F000080000005801400000059BE__000000716A103FC9-00000071F21624D1\n000000067F00008000000580140000000000-000000067F00008000000580140000004000__00000072377CDB60\n000000067F000080000005801400000007C3-000000067F00008000000580140000020462__00000071F21624D1-000000723877FF21\n000000067F00008000000580140000004000-000000067F00008000000580140000008000__00000072377CDB60\n000000067F000080000005801400000059BE-000000067F0000800000058014000000BF38__000000716A103FC9-00000071F21624D1\n000000067F00008000000580140000008000-000000067F0000800000058014000000C000__00000072377CDB60\n000000067F0000800000058014000000BF38-000000067F00008000000580140000012530__000000716A103FC9-00000071F21624D1\n000000067F0000800000058014000000C000-000000067F00008000000580140000010000__00000072377CDB60\n000000067F00008000000580140000010000-000000067F00008000000580140000014000__00000072377CDB60\n000000067F00008000000580140000012530-000000067F00008000000580140000018B50__000000716A103FC9-00000071F21624D1\n000000067F00008000000580140000014000-000000067F00008000000580140000018000__00000072377CDB60\n000000067F00008000000580140000018000-000000067F0000800000058014000001C000__00000072377CDB60\n000000067F00008000000580140000018B50-000000067F0000800000058014000001F0D3__000000716A103FC9-00000071F21624D1\n000000067F0000800000058014000001C000-000000067F00008000000580140000020000__00000072377CDB60\n000000067F0000800000058014000001F0D3-000000067F0000800000058014000002562B__000000716A103FC9-00000071F21624D1\n000000067F00008000000580140000020000-000000067F00008000000580140000024000__00000072377CDB60\n000000067F00008000000580140000020464-030000000000000000000000000000000002__00000071F21624D1-000000723877FF21\n000000067F00008000000580140000024000-000000067F00008000000580140000028000__00000072377CDB60\n000000067F0000800000058014000002562B-000000067F0000800000058014000002BC37__000000716A103FC9-00000071F21624D1\n000000067F00008000000580140000028000-000000067F0000800000058014000002C000__00000072377CDB60\n000000067F0000800000058014000002BC37-030000000000000000000000000000000002__000000716A103FC9-00000071F21624D1\n000000067F0000800000058014000002C000-030000000000000000000000000000000002__00000072377CDB60\n000000067F000080000005A00C0000007614-000000067F000080000005A00C000000ED44__000000723877FF21-00000072A0D7CEA1\n000000067F000080000005A00C000000ED44-000000067F000080000005A00C0000016337__000000723877FF21-00000072A0D7CEA1\n000000067F000080000005A00C0000016337-000000067F000080000005A014000000148C__000000723877FF21-00000072A0D7CEA1\n000000067F000080000005A014000000148C-000000067F000080000005C00C0000003207__000000723877FF21-00000072A0D7CEA1\n000000067F000080000005C00C0000003207-000000067F000080000005C00C000000C96D__000000723877FF21-00000072A0D7CEA1\n000000067F000080000005C00C000000C96D-030000000000000000000000000000000002__000000723877FF21-00000072A0D7CEA1\n000000067F000080000005C00C0000016516-000000067F000080000005C0140000001694__00000072A0D7CEA1-0000007318DDE691\n000000067F000080000005C0140000001694-000000067F000080000005E00C000000360C__00000072A0D7CEA1-0000007318DDE691\n000000067F000080000005E00C0000000000-000000067F000080000005E00C0000004000__00000073AF75E930\n000000067F000080000005E00C0000000000-000000067F000080000005E00C0000004000__000000756884A510\n000000067F000080000005E00C000000360C-000000067F000080000005E00C000000CD72__00000072A0D7CEA1-0000007318DDE691\n000000067F000080000005E00C0000004000-000000067F000080000005E00C0000008000__00000073AF75E930\n000000067F000080000005E00C0000004000-000000067F000080000005E00C0000008000__000000756884A510\n000000067F000080000005E00C0000008000-000000067F000080000005E00C000000C000__00000073AF75E930\n000000067F000080000005E00C0000008000-000000067F000080000005E00C000000C000__000000756884A510\n000000067F000080000005E00C000000C000-000000067F000080000005E00C0000010000__00000073AF75E930\n000000067F000080000005E00C000000C000-000000067F000080000005E00C0000010000__000000756884A510\n000000067F000080000005E00C000000CD72-000000067F000080000005E00C00000164D8__00000072A0D7CEA1-0000007318DDE691\n000000067F000080000005E00C0000010000-000000067F000080000005E00C0000014000__00000073AF75E930\n000000067F000080000005E00C0000010000-000000067F000080000005E00C0000014000__000000756884A510\n000000067F000080000005E00C0000014000-000000067F000080000005E00C0000018000__00000073AF75E930\n000000067F000080000005E00C0000014000-000000067F000080000005E00C0000018000__000000756884A510\n000000067F000080000005E00C00000164D8-000000067F000080000005E00C000001FC0B__00000072A0D7CEA1-0000007318DDE691\n000000067F000080000005E00C0000018000-000000067F000080000005E00C000001C000__00000073AF75E930\n000000067F000080000005E00C0000018000-000000067F000080000005E00C000001C000__000000756884A510\n000000067F000080000005E00C000001C000-000000067F000080000005E00C0000020000__00000073AF75E930\n000000067F000080000005E00C000001C000-000000067F000080000005E00C0000020000__000000756884A510\n000000067F000080000005E00C000001FC0B-000000067F000080000005E00C0000029319__00000072A0D7CEA1-0000007318DDE691\n000000067F000080000005E00C0000020000-000000067F000080000005E00C0000024000__00000073AF75E930\n000000067F000080000005E00C0000020000-000000067F000080000005E00C0000024000__000000756884A510\n000000067F000080000005E00C0000024000-000000067F000080000005E00C0000028000__00000073AF75E930\n000000067F000080000005E00C0000024000-000000067F000080000005E00C0000028000__000000756884A510\n000000067F000080000005E00C0000028000-000000067F000080000005E00C000002C000__00000073AF75E930\n000000067F000080000005E00C0000028000-000000067F000080000005E00C000002C000__000000756884A510\n000000067F000080000005E00C0000029319-030000000000000000000000000000000002__00000072A0D7CEA1-0000007318DDE691\n000000067F000080000005E00C000002C000-000000067F000080000005E00C0000030000__00000073AF75E930\n000000067F000080000005E00C000002C000-000000067F000080000005E00C0000030000__000000756884A510\n000000067F000080000005E00C0000030000-000000067F000080000005E00C0000034000__00000073AF75E930\n000000067F000080000005E00C0000030000-000000067F000080000005E00C0000034000__000000756884A510\n000000067F000080000005E00C0000034000-000000067F000080000005E00C0000038000__00000073AF75E930\n000000067F000080000005E00C0000034000-000000067F000080000005E00C0000038000__000000756884A510\n000000067F000080000005E00C0000038000-000000067F000080000005E00C000003C000__00000073AF75E930\n000000067F000080000005E00C0000038000-000000067F000080000005E00C000003C000__000000756884A510\n000000067F000080000005E00C00000385D9-000000067F000080000005E00C0000041D0A__0000007318DDE691-0000007497B01FF9\n000000067F000080000005E00C000003C000-000000067F000080000005E00C0000040000__00000073AF75E930\n000000067F000080000005E00C000003C000-000000067F000080000005E00C0000040000__000000756884A510\n000000067F000080000005E00C0000040000-000000067F000080000005E00C0000044000__00000073AF75E930\n000000067F000080000005E00C0000040000-000000067F000080000005E00C0000044000__000000756884A510\n000000067F000080000005E00C0000041D0A-000000067F000080000005E00C000004B470__0000007318DDE691-0000007497B01FF9\n000000067F000080000005E00C0000044000-000000067F000080000005E00C0000048000__00000073AF75E930\n000000067F000080000005E00C0000044000-000000067F000080000005E00C0000048000__000000756884A510\n000000067F000080000005E00C0000048000-000000067F000080000005E00C000004C000__00000073AF75E930\n000000067F000080000005E00C0000048000-000000067F000080000005E00C000004C000__000000756884A510\n000000067F000080000005E00C000004B470-000000067F000080000005E00C0000054BA9__0000007318DDE691-0000007497B01FF9\n000000067F000080000005E00C000004C000-000000067F000080000005E00C0000050000__00000073AF75E930\n000000067F000080000005E00C000004C000-000000067F000080000005E00C0000050000__000000756884A510\n000000067F000080000005E00C0000050000-000000067F000080000005E00C0000054000__00000073AF75E930\n000000067F000080000005E00C0000050000-000000067F000080000005E00C0000054000__000000756884A510\n000000067F000080000005E00C000005017A-000000067F000080000005E00C000009FEAD__000000751253A4C1-00000075687C3009\n000000067F000080000005E00C0000054000-000000067F000080000005E00C0000058000__00000073AF75E930\n000000067F000080000005E00C0000054000-000000067F000080000005E00C0000058000__000000756884A510\n000000067F000080000005E00C0000054BA9-000000067F000080000005E00C000005E30B__0000007318DDE691-0000007497B01FF9\n000000067F000080000005E00C0000058000-000000067F000080000005E00C000005C000__00000073AF75E930\n000000067F000080000005E00C0000058000-000000067F000080000005E00C000005C000__000000756884A510\n000000067F000080000005E00C000005C000-000000067F000080000005E00C0000060000__00000073AF75E930\n000000067F000080000005E00C000005C000-000000067F000080000005E00C0000060000__000000756884A510\n000000067F000080000005E00C000005E30B-000000067F000080000005E00C0000067A2C__0000007318DDE691-0000007497B01FF9\n000000067F000080000005E00C0000060000-000000067F000080000005E00C0000064000__00000073AF75E930\n000000067F000080000005E00C0000060000-000000067F000080000005E00C0000064000__000000756884A510\n000000067F000080000005E00C0000064000-000000067F000080000005E00C0000068000__00000073AF75E930\n000000067F000080000005E00C0000064000-000000067F000080000005E00C0000068000__000000756884A510\n000000067F000080000005E00C0000067A2C-000000067F000080000005E00C0000071187__0000007318DDE691-0000007497B01FF9\n000000067F000080000005E00C0000068000-000000067F000080000005E00C000006C000__00000073AF75E930\n000000067F000080000005E00C0000068000-000000067F000080000005E00C000006C000__000000756884A510\n000000067F000080000005E00C000006C000-000000067F000080000005E00C0000070000__00000073AF75E930\n000000067F000080000005E00C000006C000-000000067F000080000005E00C0000070000__000000756884A510\n000000067F000080000005E00C0000070000-000000067F000080000005E00C0000074000__00000073AF75E930\n000000067F000080000005E00C0000070000-000000067F000080000005E00C0000074000__000000756884A510\n000000067F000080000005E00C0000071187-000000067F000080000005E00C000007A8ED__0000007318DDE691-0000007497B01FF9\n000000067F000080000005E00C0000074000-000000067F000080000005E00C0000078000__00000073AF75E930\n000000067F000080000005E00C0000074000-000000067F000080000005E00C0000078000__000000756884A510\n000000067F000080000005E00C0000078000-000000067F000080000005E00C000007C000__00000073AF75E930\n000000067F000080000005E00C0000078000-000000067F000080000005E00C000007C000__000000756884A510\n000000067F000080000005E00C000007A8ED-000000067F000080000005E00C000008400B__0000007318DDE691-0000007497B01FF9\n000000067F000080000005E00C000007C000-000000067F000080000005E00C0000080000__00000073AF75E930\n000000067F000080000005E00C000007C000-000000067F000080000005E00C0000080000__000000756884A510\n000000067F000080000005E00C0000080000-000000067F000080000005E00C0000084000__00000073AF75E930\n000000067F000080000005E00C0000080000-000000067F000080000005E00C0000084000__000000756884A510\n000000067F000080000005E00C0000084000-000000067F000080000005E00C0000088000__00000073AF75E930\n000000067F000080000005E00C0000084000-000000067F000080000005E00C0000088000__000000756884A510\n000000067F000080000005E00C000008400B-000000067F000080000005E00C000008D771__0000007318DDE691-0000007497B01FF9\n000000067F000080000005E00C0000088000-000000067F000080000005E00C000008C000__000000756884A510\n000000067F000080000005E00C0000088000-030000000000000000000000000000000002__00000073AF75E930\n000000067F000080000005E00C000008C000-000000067F000080000005E00C0000090000__000000756884A510\n000000067F000080000005E00C000008D771-000000067F000080000005E00C0000096ED7__0000007318DDE691-0000007497B01FF9\n000000067F000080000005E00C0000090000-000000067F000080000005E00C0000094000__000000756884A510\n000000067F000080000005E00C0000094000-000000067F000080000005E00C0000098000__000000756884A510\n000000067F000080000005E00C0000096ED7-000000067F000080000005E00C00000A060B__0000007318DDE691-0000007497B01FF9\n000000067F000080000005E00C0000098000-000000067F000080000005E00C000009C000__000000756884A510\n000000067F000080000005E00C000009C000-000000067F000080000005E00C00000A0000__000000756884A510\n000000067F000080000005E00C000009FEB2-000000067F000080000005E00C00000EF4ED__000000751253A4C1-00000075687C3009\n000000067F000080000005E00C00000A0000-000000067F000080000005E00C00000A4000__000000756884A510\n000000067F000080000005E00C00000A060B-000000067F000080000005E00C00000A9D71__0000007318DDE691-0000007497B01FF9\n000000067F000080000005E00C00000A4000-000000067F000080000005E00C00000A8000__000000756884A510\n000000067F000080000005E00C00000A8000-000000067F000080000005E00C00000AC000__000000756884A510\n000000067F000080000005E00C00000A9D71-000000067F000080000005E00C00000B34D7__0000007318DDE691-0000007497B01FF9\n000000067F000080000005E00C00000AC000-000000067F000080000005E00C00000B0000__000000756884A510\n000000067F000080000005E00C00000AF576-000000067F000080000005E00C0200000023__0000007497B01FF9-000000751253A4C1\n000000067F000080000005E00C00000B0000-000000067F000080000005E00C00000B4000__000000756884A510\n000000067F000080000005E00C00000B34D7-000000067F000080000005E00C00000BCC0C__0000007318DDE691-0000007497B01FF9\n000000067F000080000005E00C00000B4000-000000067F000080000005E00C00000B8000__000000756884A510\n000000067F000080000005E00C00000B8000-000000067F000080000005E00C00000BC000__000000756884A510\n000000067F000080000005E00C00000BC000-000000067F000080000005E00C00000C0000__000000756884A510\n000000067F000080000005E00C00000BCC0C-000000067F000080000005E00C00000C6336__0000007318DDE691-0000007497B01FF9\n000000067F000080000005E00C00000C0000-000000067F000080000005E00C00000C4000__000000756884A510\n000000067F000080000005E00C00000C4000-000000067F000080000005E00C00000C8000__000000756884A510\n000000067F000080000005E00C00000C6336-000000067F000080000005E00C00000CFA9C__0000007318DDE691-0000007497B01FF9\n000000067F000080000005E00C00000C8000-000000067F000080000005E00C00000CC000__000000756884A510\n000000067F000080000005E00C00000CC000-000000067F000080000005E00C00000D0000__000000756884A510\n000000067F000080000005E00C00000CFA9C-000000067F000080000005E00C00000D91AB__0000007318DDE691-0000007497B01FF9\n000000067F000080000005E00C00000D0000-000000067F000080000005E00C00000D4000__000000756884A510\n000000067F000080000005E00C00000D4000-000000067F000080000005E00C00000D8000__000000756884A510\n000000067F000080000005E00C00000D8000-000000067F000080000005E00C00000DC000__000000756884A510\n000000067F000080000005E00C00000D91AB-000000067F000080000005E00C00000E2911__0000007318DDE691-0000007497B01FF9\n000000067F000080000005E00C00000DC000-000000067F000080000005E00C00000E0000__000000756884A510\n000000067F000080000005E00C00000E0000-000000067F000080000005E00C00000E4000__000000756884A510\n000000067F000080000005E00C00000E2911-000000067F000080000005E00C00000EC077__0000007318DDE691-0000007497B01FF9\n000000067F000080000005E00C00000E4000-000000067F000080000005E00C00000E8000__000000756884A510\n000000067F000080000005E00C00000E8000-000000067F000080000005E00C00000EC000__000000756884A510\n000000067F000080000005E00C00000EC000-000000067F000080000005E00C00000F0000__000000756884A510\n000000067F000080000005E00C00000EC077-000000067F000080000005E00C00000F57A8__0000007318DDE691-0000007497B01FF9\n000000067F000080000005E00C00000EF4F1-000000067F000080000005E014000000BDDE__000000751253A4C1-00000075687C3009\n000000067F000080000005E00C00000F0000-000000067F000080000005E00C00000F4000__000000756884A510\n000000067F000080000005E00C00000F4000-000000067F000080000005E00C00000F8000__000000756884A510\n000000067F000080000005E00C00000F57A8-000000067F000080000005E00C00000FEF0A__0000007318DDE691-0000007497B01FF9\n000000067F000080000005E00C00000F8000-000000067F000080000005E00C00000FC000__000000756884A510\n000000067F000080000005E00C00000FC000-000000067F000080000005E00C0000100000__000000756884A510\n000000067F000080000005E00C00000FEF0A-000000067F000080000005E00C000010862B__0000007318DDE691-0000007497B01FF9\n000000067F000080000005E00C0000100000-000000067F000080000005E00C0000104000__000000756884A510\n000000067F000080000005E00C0000104000-000000067F000080000005E00C0000108000__000000756884A510\n000000067F000080000005E00C0000108000-000000067F000080000005E00C000010C000__000000756884A510\n000000067F000080000005E00C000010862B-000000067F000080000005E00C0000111C20__0000007318DDE691-0000007497B01FF9\n000000067F000080000005E00C000010C000-000000067F000080000005E00C0000110000__000000756884A510\n000000067F000080000005E00C0000110000-000000067F000080000005E0120100000000__000000756884A510\n000000067F000080000005E00C00FFFFFFFF-010000000000000001000000030000000002__0000007318DDE691-0000007497B01FF9\n000000067F000080000005E00C02FFFFFFFF-000000067F000080000005E0140000006C41__0000007497B01FF9-000000751253A4C1\n000000067F000080000005E0140000000000-000000067F000080000005E0140000004000__000000756884A510\n000000067F000080000005E0140000004000-000000067F000080000005E0140000008000__000000756884A510\n000000067F000080000005E0140000006C41-000000067F000080000005E014000000D890__0000007497B01FF9-000000751253A4C1\n000000067F000080000005E0140000008000-000000067F000080000005E014000000C000__000000756884A510\n000000067F000080000005E014000000BDDE-000000067F000080000005E0140000023A18__000000751253A4C1-00000075687C3009\n000000067F000080000005E014000000C000-000000067F000080000005E0140000010000__000000756884A510\n000000067F000080000005E014000000D890-000000067F000080000005E01400000144C8__0000007497B01FF9-000000751253A4C1\n000000067F000080000005E0140000010000-000000067F000080000005E0140000014000__000000756884A510\n000000067F000080000005E0140000014000-000000067F000080000005E0140000018000__000000756884A510\n000000067F000080000005E01400000144C8-000000067F000080000005E014000001B1AC__0000007497B01FF9-000000751253A4C1\n000000067F000080000005E0140000018000-000000067F000080000005E014000001C000__000000756884A510\n000000067F000080000005E014000001B1AC-000000067F000080000005E0140000021E03__0000007497B01FF9-000000751253A4C1\n000000067F000080000005E014000001C000-000000067F000080000005E0140000020000__000000756884A510\n000000067F000080000005E0140000020000-000000067F000080000005E0140000024000__000000756884A510\n000000067F000080000005E0140000021E03-000000067F000080000005E0140000028A36__0000007497B01FF9-000000751253A4C1\n000000067F000080000005E0140000023A18-030000000000000000000000000000000002__000000751253A4C1-00000075687C3009\n000000067F000080000005E0140000024000-000000067F000080000005E0140000028000__000000756884A510\n000000067F000080000005E0140000028000-000000067F000080000005E014000002C000__000000756884A510\n000000067F000080000005E0140000028A36-030000000000000000000000000000000002__0000007497B01FF9-000000751253A4C1\n000000067F000080000005E014000002C000-030000000000000000000000000000000002__000000756884A510\n000000067F000080000006000C0000000000-000000067F000080000006000C0000004000__00000077B1836CA0\n000000067F000080000006000C0000004000-000000067F000080000006000C0000008000__00000077B1836CA0\n000000067F000080000006000C0000008000-000000067F000080000006000C000000C000__00000077B1836CA0\n000000067F000080000006000C0000008FB7-000000067F000080000006000C000001271D__00000075687C3009-00000075E915EBC9\n000000067F000080000006000C000000C000-000000067F000080000006000C0000010000__00000077B1836CA0\n000000067F000080000006000C0000010000-000000067F000080000006000C0000014000__00000077B1836CA0\n000000067F000080000006000C000001271D-000000067F000080000006000C000001BE83__00000075687C3009-00000075E915EBC9\n000000067F000080000006000C0000014000-000000067F000080000006000C0000018000__00000077B1836CA0\n000000067F000080000006000C0000018000-000000067F000080000006000C000001C000__00000077B1836CA0\n000000067F000080000006000C000001BE83-000000067F000080000006000C00000255B6__00000075687C3009-00000075E915EBC9\n000000067F000080000006000C000001C000-000000067F000080000006000C0000020000__00000077B1836CA0\n000000067F000080000006000C0000020000-000000067F000080000006000C0000024000__00000077B1836CA0\n000000067F000080000006000C0000024000-000000067F000080000006000C0000028000__00000077B1836CA0\n000000067F000080000006000C00000255B6-000000067F000080000006000C000002ED0B__00000075687C3009-00000075E915EBC9\n000000067F000080000006000C0000028000-000000067F000080000006000C000002C000__00000077B1836CA0\n000000067F000080000006000C000002C000-000000067F000080000006000C0000030000__00000077B1836CA0\n000000067F000080000006000C000002ED0B-000000067F000080000006000C000003842B__00000075687C3009-00000075E915EBC9\n000000067F000080000006000C0000030000-000000067F000080000006000C0000034000__00000077B1836CA0\n000000067F000080000006000C0000034000-000000067F000080000006000C0000038000__00000077B1836CA0\n000000067F000080000006000C0000038000-000000067F000080000006000C000003C000__00000077B1836CA0\n000000067F000080000006000C000003842B-000000067F000080000006000C0000041B80__00000075687C3009-00000075E915EBC9\n000000067F000080000006000C000003C000-000000067F000080000006000C0000040000__00000077B1836CA0\n000000067F000080000006000C0000040000-000000067F000080000006000C0000044000__00000077B1836CA0\n000000067F000080000006000C0000041B80-000000067F000080000006000C000004B2E6__00000075687C3009-00000075E915EBC9\n000000067F000080000006000C0000044000-000000067F000080000006000C0000048000__00000077B1836CA0\n000000067F000080000006000C0000048000-000000067F000080000006000C000004C000__0000007739203FF0\n000000067F000080000006000C000004B2E6-030000000000000000000000000000000002__00000075687C3009-00000075E915EBC9\n000000067F000080000006000C000004BAC2-000000067F000080000006000C00000551F7__00000075E915EBC9-00000076A8CDE8F9\n000000067F000080000006000C000004C000-000000067F000080000006000C0000050000__0000007739203FF0\n000000067F000080000006000C0000050000-000000067F000080000006000C0000054000__0000007739203FF0\n000000067F000080000006000C0000051A05-000000067F000080000006000C00000A4D93__00000077B2AD0F91-0000007805801C41\n000000067F000080000006000C0000054000-000000067F000080000006000C0000058000__0000007739203FF0\n000000067F000080000006000C00000551F7-000000067F000080000006000C000005E90B__00000075E915EBC9-00000076A8CDE8F9\n000000067F000080000006000C0000058000-000000067F000080000006000C000005C000__0000007739203FF0\n000000067F000080000006000C000005C000-000000067F000080000006000C0000060000__0000007739203FF0\n000000067F000080000006000C000005E90B-000000067F000080000006000C000006802B__00000075E915EBC9-00000076A8CDE8F9\n000000067F000080000006000C0000060000-000000067F000080000006000C0000064000__0000007739203FF0\n000000067F000080000006000C0000064000-000000067F000080000006000C0000068000__0000007739203FF0\n000000067F000080000006000C0000068000-000000067F000080000006000C000006C000__0000007739203FF0\n000000067F000080000006000C000006802B-000000067F000080000006000C0000071782__00000075E915EBC9-00000076A8CDE8F9\n000000067F000080000006000C000006C000-000000067F000080000006000C0000070000__0000007739203FF0\n000000067F000080000006000C0000070000-000000067F000080000006000C0000074000__0000007739203FF0\n000000067F000080000006000C0000071782-000000067F000080000006000C000007AEE8__00000075E915EBC9-00000076A8CDE8F9\n000000067F000080000006000C0000074000-000000067F000080000006000C0000078000__0000007739203FF0\n000000067F000080000006000C0000078000-000000067F000080000006000C000007C000__0000007739203FF0\n000000067F000080000006000C000007AEE8-000000067F000080000006000C000008460B__00000075E915EBC9-00000076A8CDE8F9\n000000067F000080000006000C000007C000-000000067F000080000006000C0000080000__0000007739203FF0\n000000067F000080000006000C0000080000-000000067F000080000006000C0000084000__0000007739203FF0\n000000067F000080000006000C0000084000-000000067F000080000006000C0000088000__0000007739203FF0\n000000067F000080000006000C000008460B-000000067F000080000006000C000008DD71__00000075E915EBC9-00000076A8CDE8F9\n000000067F000080000006000C0000088000-000000067F000080000006000C000008C000__0000007739203FF0\n000000067F000080000006000C000008C000-000000067F000080000006000C0000090000__0000007739203FF0\n000000067F000080000006000C000008DD71-000000067F000080000006000C00000974D7__00000075E915EBC9-00000076A8CDE8F9\n000000067F000080000006000C0000090000-000000067F000080000006000C0000094000__0000007739203FF0\n000000067F000080000006000C0000094000-000000067F000080000006000C0000098000__0000007739203FF0\n000000067F000080000006000C00000974D7-000000067F000080000006000C00000A0C0B__00000075E915EBC9-00000076A8CDE8F9\n000000067F000080000006000C0000098000-000000067F000080000006000C000009C000__0000007739203FF0\n000000067F000080000006000C000009C000-000000067F000080000006000C00000A0000__0000007739203FF0\n000000067F000080000006000C00000A0000-000000067F000080000006000C00000A4000__0000007739203FF0\n000000067F000080000006000C00000A0C0B-000000067F000080000006000C00000AA371__00000075E915EBC9-00000076A8CDE8F9\n000000067F000080000006000C00000A4000-000000067F000080000006000C00000A8000__0000007739203FF0\n000000067F000080000006000C00000A4D95-000000067F000080000006000C00000F7C7B__00000077B2AD0F91-0000007805801C41\n000000067F000080000006000C00000A8000-000000067F000080000006000C00000AC000__0000007739203FF0\n000000067F000080000006000C00000AA371-000000067F000080000006000C00000B3AD7__00000075E915EBC9-00000076A8CDE8F9\n000000067F000080000006000C00000AC000-000000067F000080000006000C00000B0000__0000007739203FF0\n000000067F000080000006000C00000B0000-000000067F000080000006000C00000B4000__0000007739203FF0\n000000067F000080000006000C00000B3AD7-000000067F000080000006000C00000BD20B__00000075E915EBC9-00000076A8CDE8F9\n000000067F000080000006000C00000B4000-000000067F000080000006000C00000B8000__0000007739203FF0\n000000067F000080000006000C00000B8000-000000067F000080000006000C00000BC000__0000007739203FF0\n000000067F000080000006000C00000BC000-000000067F000080000006000C00000C0000__0000007739203FF0\n000000067F000080000006000C00000BD20B-000000067F000080000006000C0100000000__00000075E915EBC9-00000076A8CDE8F9\n000000067F000080000006000C00000C0000-000000067F000080000006000C00000C4000__0000007739203FF0\n000000067F000080000006000C00000C3C38-000000067F00008000000600140000001B38__00000077391A8001-00000077B2AD0F91\n000000067F000080000006000C00000C4000-000000067F000080000006000C00000C8000__0000007739203FF0\n000000067F000080000006000C00000C56C1-000000067F000080000006000C00000CEE0A__00000076A8CDE8F9-00000077391A8001\n000000067F000080000006000C00000C8000-000000067F000080000006000C00000CC000__0000007739203FF0\n000000067F000080000006000C00000CC000-000000067F000080000006000C00000D0000__0000007739203FF0\n000000067F000080000006000C00000CEE0A-000000067F000080000006000C00000D8520__00000076A8CDE8F9-00000077391A8001\n000000067F000080000006000C00000D0000-000000067F000080000006000C00000D4000__0000007739203FF0\n000000067F000080000006000C00000D4000-000000067F000080000006000C00000D8000__0000007739203FF0\n000000067F000080000006000C00000D8000-000000067F000080000006000C00000DC000__0000007739203FF0\n000000067F000080000006000C00000D8520-000000067F000080000006000C00000E1C86__00000076A8CDE8F9-00000077391A8001\n000000067F000080000006000C00000DC000-000000067F000080000006000C00000E0000__0000007739203FF0\n000000067F000080000006000C00000E0000-000000067F000080000006000C00000E4000__0000007739203FF0\n000000067F000080000006000C00000E1C86-000000067F000080000006000C00000EB3EC__00000076A8CDE8F9-00000077391A8001\n000000067F000080000006000C00000E4000-000000067F000080000006000C00000E8000__0000007739203FF0\n000000067F000080000006000C00000E8000-000000067F000080000006000C00000EC000__0000007739203FF0\n000000067F000080000006000C00000EB3EC-000000067F000080000006000C00000F4B0C__00000076A8CDE8F9-00000077391A8001\n000000067F000080000006000C00000EC000-000000067F000080000006000C00000F0000__0000007739203FF0\n000000067F000080000006000C00000F0000-000000067F000080000006000C00000F4000__0000007739203FF0\n000000067F000080000006000C00000F4000-000000067F000080000006000C00000F8000__0000007739203FF0\n000000067F000080000006000C00000F4B0C-000000067F000080000006000C00000FE272__00000076A8CDE8F9-00000077391A8001\n000000067F000080000006000C00000F7C96-000000067F0000800000060014000000F3A9__00000077B2AD0F91-0000007805801C41\n000000067F000080000006000C00000F8000-000000067F000080000006000C00000FC000__0000007739203FF0\n000000067F000080000006000C00000FC000-000000067F000080000006000C0000100000__0000007739203FF0\n000000067F000080000006000C00000FE272-000000067F000080000006000C000010798F__00000076A8CDE8F9-00000077391A8001\n000000067F000080000006000C0000100000-000000067F000080000006000C0000104000__0000007739203FF0\n000000067F000080000006000C0000104000-000000067F000080000006000C0000108000__0000007739203FF0\n000000067F000080000006000C000010798F-000000067F000080000006000C00001110F5__00000076A8CDE8F9-00000077391A8001\n000000067F000080000006000C0000108000-000000067F000080000006000C000010C000__0000007739203FF0\n000000067F000080000006000C000010C000-000000067F000080000006000C0000110000__0000007739203FF0\n000000067F000080000006000C0000110000-030000000000000000000000000000000002__0000007739203FF0\n000000067F000080000006000C00001110F5-010000000000000001000000030000000006__00000076A8CDE8F9-00000077391A8001\n000000067F00008000000600140000001B38-000000067F00008000000600140000008758__00000077391A8001-00000077B2AD0F91\n000000067F00008000000600140000008758-000000067F0000800000060014000000F32F__00000077391A8001-00000077B2AD0F91\n000000067F0000800000060014000000F32F-000000067F00008000000600140000015EDC__00000077391A8001-00000077B2AD0F91\n000000067F0000800000060014000000F3A9-000000067F00008000000600140000028656__00000077B2AD0F91-0000007805801C41\n000000067F00008000000600140000015EDC-000000067F0000800000060014000001CB12__00000077391A8001-00000077B2AD0F91\n000000067F0000800000060014000001CB12-000000067F000080000006001400000236BC__00000077391A8001-00000077B2AD0F91\n000000067F000080000006001400000236BC-000000067F0000800000060014000002A294__00000077391A8001-00000077B2AD0F91\n000000067F00008000000600140000028657-030000000000000000000000000000000002__00000077B2AD0F91-0000007805801C41\n000000067F0000800000060014000002A294-030000000000000000000000000000000002__00000077391A8001-00000077B2AD0F91\n000000067F000080000006200C0000000000-000000067F000080000006200C0000004000__00000078B2CB1C68\n000000067F000080000006200C0000004000-000000067F000080000006200C0000008000__00000078B2CB1C68\n000000067F000080000006200C0000008000-000000067F000080000006200C000000C000__00000078B2CB1C68\n000000067F000080000006200C0000009441-000000067F000080000006200C0000012B8D__0000007805801C41-00000078859FEA11\n000000067F000080000006200C000000C000-000000067F000080000006200C0000010000__00000078B2CB1C68\n000000067F000080000006200C0000010000-000000067F000080000006200C0000014000__00000078B2CB1C68\n000000067F000080000006200C0000012B8D-000000067F000080000006200C000001C2F3__0000007805801C41-00000078859FEA11\n000000067F000080000006200C0000014000-000000067F000080000006200C0000018000__00000078B2CB1C68\n000000067F000080000006200C0000018000-000000067F000080000006200C000001C000__00000078B2CB1C68\n000000067F000080000006200C000001C000-000000067F000080000006200C0000020000__00000078B2CB1C68\n000000067F000080000006200C000001C2F3-000000067F000080000006200C0000025A0C__0000007805801C41-00000078859FEA11\n000000067F000080000006200C0000020000-000000067F000080000006200C0000024000__00000078B2CB1C68\n000000067F000080000006200C0000024000-000000067F000080000006200C0000028000__00000078B2CB1C68\n000000067F000080000006200C0000025A0C-000000067F000080000006200C000002F172__0000007805801C41-00000078859FEA11\n000000067F000080000006200C0000028000-000000067F000080000006200C000002C000__00000078B2CB1C68\n000000067F000080000006200C000002C000-000000067F000080000006200C0000030000__00000078B2CB1C68\n000000067F000080000006200C000002F172-000000067F000080000006200C00000388D8__0000007805801C41-00000078859FEA11\n000000067F000080000006200C0000030000-000000067F000080000006200C0000034000__00000078B2CB1C68\n000000067F000080000006200C0000034000-000000067F000080000006200C0000038000__00000078B2CB1C68\n000000067F000080000006200C0000038000-000000067F000080000006200C000003C000__00000078B2CB1C68\n000000067F000080000006200C00000388D8-000000067F000080000006200C0000042009__0000007805801C41-00000078859FEA11\n000000067F000080000006200C000003C000-000000067F000080000006200C0000040000__00000078B2CB1C68\n000000067F000080000006200C0000040000-000000067F000080000006200C0000044000__00000078B2CB1C68\n000000067F000080000006200C0000042009-000000067F000080000006200C000004B76F__0000007805801C41-00000078859FEA11\n000000067F000080000006200C0000044000-000000067F000080000006200C0000048000__00000078B2CB1C68\n000000067F000080000006200C0000048000-000000067F000080000006200C000004C000__00000078B2CB1C68\n000000067F000080000006200C0000048000-000000067F000080000006200C000004C000__0000007AA0A6FB48\n000000067F000080000006200C0000048121-000000067F000080000006200C0000090C08__0000007A3F679FA1-0000007AA1DF6639\n000000067F000080000006200C000004B76F-030000000000000000000000000000000002__0000007805801C41-00000078859FEA11\n000000067F000080000006200C000004BAC9-000000067F000080000006200C00000551FE__00000078859FEA11-00000079C527F0D9\n000000067F000080000006200C000004C000-000000067F000080000006200C0000050000__00000078B2CB1C68\n000000067F000080000006200C000004C000-000000067F000080000006200C0000050000__0000007AA0A6FB48\n000000067F000080000006200C0000050000-000000067F000080000006200C0000054000__00000078B2CB1C68\n000000067F000080000006200C0000050000-000000067F000080000006200C0000054000__0000007AA0A6FB48\n000000067F000080000006200C0000054000-000000067F000080000006200C0000058000__00000078B2CB1C68\n000000067F000080000006200C0000054000-000000067F000080000006200C0000058000__0000007AA0A6FB48\n000000067F000080000006200C00000551FE-000000067F000080000006200C000005E90C__00000078859FEA11-00000079C527F0D9\n000000067F000080000006200C0000058000-000000067F000080000006200C000005C000__00000078B2CB1C68\n000000067F000080000006200C0000058000-000000067F000080000006200C000005C000__0000007AA0A6FB48\n000000067F000080000006200C000005C000-000000067F000080000006200C0000060000__00000078B2CB1C68\n000000067F000080000006200C000005C000-000000067F000080000006200C0000060000__0000007AA0A6FB48\n000000067F000080000006200C000005E90C-000000067F000080000006200C000006802C__00000078859FEA11-00000079C527F0D9\n000000067F000080000006200C0000060000-000000067F000080000006200C0000064000__00000078B2CB1C68\n000000067F000080000006200C0000060000-000000067F000080000006200C0000064000__0000007AA0A6FB48\n000000067F000080000006200C0000064000-000000067F000080000006200C0000068000__0000007AA0A6FB48\n000000067F000080000006200C0000064000-030000000000000000000000000000000002__00000078B2CB1C68\n000000067F000080000006200C0000068000-000000067F000080000006200C000006C000__0000007AA0A6FB48\n000000067F000080000006200C000006802C-000000067F000080000006200C0000071783__00000078859FEA11-00000079C527F0D9\n000000067F000080000006200C000006C000-000000067F000080000006200C0000070000__0000007AA0A6FB48\n000000067F000080000006200C0000070000-000000067F000080000006200C0000074000__0000007AA0A6FB48\n000000067F000080000006200C0000071783-000000067F000080000006200C000007AEE9__00000078859FEA11-00000079C527F0D9\n000000067F000080000006200C0000074000-000000067F000080000006200C0000078000__0000007AA0A6FB48\n000000067F000080000006200C0000078000-000000067F000080000006200C000007C000__0000007AA0A6FB48\n000000067F000080000006200C000007AEE9-000000067F000080000006200C000008460B__00000078859FEA11-00000079C527F0D9\n000000067F000080000006200C000007C000-000000067F000080000006200C0000080000__0000007AA0A6FB48\n000000067F000080000006200C0000080000-000000067F000080000006200C0000084000__0000007AA0A6FB48\n000000067F000080000006200C0000084000-000000067F000080000006200C0000088000__0000007AA0A6FB48\n000000067F000080000006200C000008460B-000000067F000080000006200C000008DD71__00000078859FEA11-00000079C527F0D9\n000000067F000080000006200C0000088000-000000067F000080000006200C000008C000__0000007AA0A6FB48\n000000067F000080000006200C000008C000-000000067F000080000006200C0000090000__0000007AA0A6FB48\n000000067F000080000006200C000008DD71-000000067F000080000006200C00000974D7__00000078859FEA11-00000079C527F0D9\n000000067F000080000006200C0000090000-000000067F000080000006200C0000094000__0000007AA0A6FB48\n000000067F000080000006200C0000090C11-000000067F000080000006200C00000DA35B__0000007A3F679FA1-0000007AA1DF6639\n000000067F000080000006200C0000094000-000000067F000080000006200C0000098000__0000007AA0A6FB48\n000000067F000080000006200C00000974D7-000000067F000080000006200C00000A0C0B__00000078859FEA11-00000079C527F0D9\n000000067F000080000006200C0000098000-000000067F000080000006200C000009C000__0000007AA0A6FB48\n000000067F000080000006200C000009C000-000000067F000080000006200C00000A0000__0000007AA0A6FB48\n000000067F000080000006200C00000A0000-000000067F000080000006200C00000A4000__0000007AA0A6FB48\n000000067F000080000006200C00000A0C0B-000000067F000080000006200C00000AA371__00000078859FEA11-00000079C527F0D9\n000000067F000080000006200C00000A4000-000000067F000080000006200C00000A8000__0000007AA0A6FB48\n000000067F000080000006200C00000A8000-000000067F000080000006200C00000AC000__0000007AA0A6FB48\n000000067F000080000006200C00000AA371-000000067F000080000006200C00000B3AD7__00000078859FEA11-00000079C527F0D9\n000000067F000080000006200C00000AC000-000000067F000080000006200C00000B0000__0000007AA0A6FB48\n000000067F000080000006200C00000B0000-000000067F000080000006200C00000B4000__0000007AA0A6FB48\n000000067F000080000006200C00000B3AD7-000000067F000080000006200C00000BD20B__00000078859FEA11-00000079C527F0D9\n000000067F000080000006200C00000B4000-000000067F000080000006200C00000B8000__0000007AA0A6FB48\n000000067F000080000006200C00000B8000-000000067F000080000006200C00000BC000__0000007AA0A6FB48\n000000067F000080000006200C00000BC000-000000067F000080000006200C00000C0000__0000007AA0A6FB48\n000000067F000080000006200C00000BD20B-000000067F000080000006200C00000C6932__00000078859FEA11-00000079C527F0D9\n000000067F000080000006200C00000C0000-000000067F000080000006200C00000C4000__0000007AA0A6FB48\n000000067F000080000006200C00000C4000-000000067F000080000006200C00000C8000__0000007AA0A6FB48\n000000067F000080000006200C00000C6932-000000067F000080000006200C00000D0098__00000078859FEA11-00000079C527F0D9\n000000067F000080000006200C00000C8000-000000067F000080000006200C00000CC000__0000007AA0A6FB48\n000000067F000080000006200C00000CC000-000000067F000080000006200C00000D0000__0000007AA0A6FB48\n000000067F000080000006200C00000D0000-000000067F000080000006200C00000D4000__0000007AA0A6FB48\n000000067F000080000006200C00000D0098-000000067F000080000006200C00000D97FE__00000078859FEA11-00000079C527F0D9\n000000067F000080000006200C00000D4000-000000067F000080000006200C00000D8000__0000007AA0A6FB48\n000000067F000080000006200C00000D8000-000000067F000080000006200C00000DC000__0000007AA0A6FB48\n000000067F000080000006200C00000D97FE-000000067F000080000006200C00000E2F0B__00000078859FEA11-00000079C527F0D9\n000000067F000080000006200C00000DA36C-000000067F00008000000620140000002D07__0000007A3F679FA1-0000007AA1DF6639\n000000067F000080000006200C00000DC000-000000067F000080000006200C00000E0000__0000007AA0A6FB48\n000000067F000080000006200C00000E0000-000000067F000080000006200C00000E4000__0000007AA0A6FB48\n000000067F000080000006200C00000E2F0B-000000067F000080000006200C00000EC671__00000078859FEA11-00000079C527F0D9\n000000067F000080000006200C00000E4000-000000067F000080000006200C00000E8000__0000007AA0A6FB48\n000000067F000080000006200C00000E8000-000000067F000080000006200C00000EC000__0000007AA0A6FB48\n000000067F000080000006200C00000EC000-000000067F000080000006200C00000F0000__0000007AA0A6FB48\n000000067F000080000006200C00000EC671-000000067F000080000006200C00000F5D9F__00000078859FEA11-00000079C527F0D9\n000000067F000080000006200C00000F0000-000000067F000080000006200C00000F4000__0000007AA0A6FB48\n000000067F000080000006200C00000F4000-000000067F000080000006200C00000F8000__0000007AA0A6FB48\n000000067F000080000006200C00000F5D9F-000000067F000080000006200C00000FF505__00000078859FEA11-00000079C527F0D9\n000000067F000080000006200C00000F8000-000000067F000080000006200C00000FC000__0000007AA0A6FB48\n000000067F000080000006200C00000FC000-000000067F000080000006200C0000100000__0000007AA0A6FB48\n000000067F000080000006200C00000FF505-000000067F000080000006200C0000108C10__00000078859FEA11-00000079C527F0D9\n000000067F000080000006200C0000100000-000000067F000080000006200C0000104000__0000007AA0A6FB48\n000000067F000080000006200C0000104000-000000067F000080000006200C0000108000__0000007AA0A6FB48\n000000067F000080000006200C0000107883-000000067F000080000006200C01000000AF__00000079C527F0D9-0000007A3F679FA1\n000000067F000080000006200C0000108000-000000067F000080000006200C000010C000__0000007AA0A6FB48\n000000067F000080000006200C0000108C10-000000067F000080000006200C0100000000__00000078859FEA11-00000079C527F0D9\n000000067F000080000006200C000010C000-000000067F000080000006200C0000110000__0000007AA0A6FB48\n000000067F000080000006200C0000110000-000000067F00008000000620120100000000__0000007AA0A6FB48\n000000067F000080000006200C01000000AF-000000067F00008000000620140000004888__00000079C527F0D9-0000007A3F679FA1\n000000067F00008000000620140000002D0A-000000067F00008000000620140000016355__0000007A3F679FA1-0000007AA1DF6639\n000000067F00008000000620140000004888-000000067F0000800000062014000000BC11__00000079C527F0D9-0000007A3F679FA1\n000000067F0000800000062014000000BC11-000000067F00008000000620140000012FA7__00000079C527F0D9-0000007A3F679FA1\n000000067F00008000000620140000012FA7-000000067F0000800000062014000001A33D__00000079C527F0D9-0000007A3F679FA1\n000000067F00008000000620140000016357-000000067F00008000000620140000029C35__0000007A3F679FA1-0000007AA1DF6639\n000000067F0000800000062014000001A33D-000000067F000080000006201400000216B4__00000079C527F0D9-0000007A3F679FA1\n000000067F000080000006201400000216B4-000000067F00008000000620140000028A65__00000079C527F0D9-0000007A3F679FA1\n000000067F00008000000620140000028A65-030000000000000000000000000000000002__00000079C527F0D9-0000007A3F679FA1\n000000067F00008000000620140000029C38-030000000000000000000000000000000002__0000007A3F679FA1-0000007AA1DF6639\n000000067F000080000006400C0000000000-000000067F000080000006400C0000004000__0000007B9877EF40\n000000067F000080000006400C0000000000-000000067F000080000006400C0000004000__0000007D41715570\n000000067F000080000006400C0000004000-000000067F000080000006400C0000008000__0000007B9877EF40\n000000067F000080000006400C0000004000-000000067F000080000006400C0000008000__0000007D41715570\n000000067F000080000006400C0000007987-000000067F000080000006400C00000110ED__0000007AA1DF6639-0000007B14D5C521\n000000067F000080000006400C0000008000-000000067F000080000006400C000000C000__0000007B9877EF40\n000000067F000080000006400C0000008000-000000067F000080000006400C000000C000__0000007D41715570\n000000067F000080000006400C000000C000-000000067F000080000006400C0000010000__0000007B9877EF40\n000000067F000080000006400C000000C000-000000067F000080000006400C0000010000__0000007D41715570\n000000067F000080000006400C0000010000-000000067F000080000006400C0000014000__0000007B9877EF40\n000000067F000080000006400C0000010000-000000067F000080000006400C0000014000__0000007D41715570\n000000067F000080000006400C00000110ED-000000067F000080000006400C000001A80A__0000007AA1DF6639-0000007B14D5C521\n000000067F000080000006400C0000014000-000000067F000080000006400C0000018000__0000007B9877EF40\n000000067F000080000006400C0000014000-000000067F000080000006400C0000018000__0000007D41715570\n000000067F000080000006400C0000018000-000000067F000080000006400C000001C000__0000007B9877EF40\n000000067F000080000006400C0000018000-000000067F000080000006400C000001C000__0000007D41715570\n000000067F000080000006400C000001A80A-000000067F000080000006400C0000023F4A__0000007AA1DF6639-0000007B14D5C521\n000000067F000080000006400C000001C000-000000067F000080000006400C0000020000__0000007B9877EF40\n000000067F000080000006400C000001C000-000000067F000080000006400C0000020000__0000007D41715570\n000000067F000080000006400C0000020000-000000067F000080000006400C0000024000__0000007B9877EF40\n000000067F000080000006400C0000020000-000000067F000080000006400C0000024000__0000007D41715570\n000000067F000080000006400C0000023F4A-000000067F000080000006400C000002D6B0__0000007AA1DF6639-0000007B14D5C521\n000000067F000080000006400C0000024000-000000067F000080000006400C0000028000__0000007B9877EF40\n000000067F000080000006400C0000024000-000000067F000080000006400C0000028000__0000007D41715570\n000000067F000080000006400C0000028000-000000067F000080000006400C000002C000__0000007B9877EF40\n000000067F000080000006400C0000028000-000000067F000080000006400C000002C000__0000007D41715570\n000000067F000080000006400C000002C000-000000067F000080000006400C0000030000__0000007B9877EF40\n000000067F000080000006400C000002C000-000000067F000080000006400C0000030000__0000007D41715570\n000000067F000080000006400C000002D6B0-000000067F000080000006400C0000036DD4__0000007AA1DF6639-0000007B14D5C521\n000000067F000080000006400C0000030000-000000067F000080000006400C0000034000__0000007B9877EF40\n000000067F000080000006400C0000030000-000000067F000080000006400C0000034000__0000007D41715570\n000000067F000080000006400C0000034000-000000067F000080000006400C0000038000__0000007B9877EF40\n000000067F000080000006400C0000034000-000000067F000080000006400C0000038000__0000007D41715570\n000000067F000080000006400C0000036DD4-000000067F000080000006400C000004050A__0000007AA1DF6639-0000007B14D5C521\n000000067F000080000006400C0000038000-000000067F000080000006400C000003C000__0000007B9877EF40\n000000067F000080000006400C0000038000-000000067F000080000006400C000003C000__0000007D41715570\n000000067F000080000006400C000003C000-000000067F000080000006400C0000040000__0000007B9877EF40\n000000067F000080000006400C000003C000-000000067F000080000006400C0000040000__0000007D41715570\n000000067F000080000006400C0000040000-000000067F000080000006400C0000044000__0000007B9877EF40\n000000067F000080000006400C0000040000-000000067F000080000006400C0000044000__0000007D41715570\n000000067F000080000006400C000004050A-030000000000000000000000000000000002__0000007AA1DF6639-0000007B14D5C521\n000000067F000080000006400C0000044000-000000067F000080000006400C0000048000__0000007B9877EF40\n000000067F000080000006400C0000044000-000000067F000080000006400C0000048000__0000007D41715570\n000000067F000080000006400C0000048000-000000067F000080000006400C000004C000__0000007B9877EF40\n000000067F000080000006400C0000048000-000000067F000080000006400C000004C000__0000007D41715570\n000000067F000080000006400C000004B4C9-000000067F000080000006400C0000054C01__0000007B14D5C521-0000007C73B53FC9\n000000067F000080000006400C000004C000-000000067F000080000006400C0000050000__0000007B9877EF40\n000000067F000080000006400C000004C000-000000067F000080000006400C0000050000__0000007D41715570\n000000067F000080000006400C0000050000-000000067F000080000006400C0000054000__0000007B9877EF40\n000000067F000080000006400C0000050000-000000067F000080000006400C0000054000__0000007D41715570\n000000067F000080000006400C00000525C4-000000067F000080000006400C00000A47A7__0000007CEE5A0B91-0000007D41EA8D51\n000000067F000080000006400C0000054000-000000067F000080000006400C0000058000__0000007B9877EF40\n000000067F000080000006400C0000054000-000000067F000080000006400C0000058000__0000007D41715570\n000000067F000080000006400C0000054C01-000000067F000080000006400C000005E30C__0000007B14D5C521-0000007C73B53FC9\n000000067F000080000006400C0000058000-000000067F000080000006400C000005C000__0000007B9877EF40\n000000067F000080000006400C0000058000-000000067F000080000006400C000005C000__0000007D41715570\n000000067F000080000006400C000005C000-000000067F000080000006400C0000060000__0000007B9877EF40\n000000067F000080000006400C000005C000-000000067F000080000006400C0000060000__0000007D41715570\n000000067F000080000006400C000005E30C-000000067F000080000006400C0000067A2C__0000007B14D5C521-0000007C73B53FC9\n000000067F000080000006400C0000060000-000000067F000080000006400C0000064000__0000007B9877EF40\n000000067F000080000006400C0000060000-000000067F000080000006400C0000064000__0000007D41715570\n000000067F000080000006400C0000064000-000000067F000080000006400C0000068000__0000007B9877EF40\n000000067F000080000006400C0000064000-000000067F000080000006400C0000068000__0000007D41715570\n000000067F000080000006400C0000067A2C-000000067F000080000006400C0000071187__0000007B14D5C521-0000007C73B53FC9\n000000067F000080000006400C0000068000-000000067F000080000006400C000006C000__0000007B9877EF40\n000000067F000080000006400C0000068000-000000067F000080000006400C000006C000__0000007D41715570\n000000067F000080000006400C000006C000-000000067F000080000006400C0000070000__0000007B9877EF40\n000000067F000080000006400C000006C000-000000067F000080000006400C0000070000__0000007D41715570\n000000067F000080000006400C0000070000-000000067F000080000006400C0000074000__0000007B9877EF40\n000000067F000080000006400C0000070000-000000067F000080000006400C0000074000__0000007D41715570\n000000067F000080000006400C0000071187-000000067F000080000006400C000007A8ED__0000007B14D5C521-0000007C73B53FC9\n000000067F000080000006400C0000074000-000000067F000080000006400C0000078000__0000007B9877EF40\n000000067F000080000006400C0000074000-000000067F000080000006400C0000078000__0000007D41715570\n000000067F000080000006400C0000078000-000000067F000080000006400C000007C000__0000007B9877EF40\n000000067F000080000006400C0000078000-000000067F000080000006400C000007C000__0000007D41715570\n000000067F000080000006400C000007A8ED-000000067F000080000006400C000008400B__0000007B14D5C521-0000007C73B53FC9\n000000067F000080000006400C000007C000-000000067F000080000006400C0000080000__0000007B9877EF40\n000000067F000080000006400C000007C000-000000067F000080000006400C0000080000__0000007D41715570\n000000067F000080000006400C0000080000-000000067F000080000006400C0000084000__0000007B9877EF40\n000000067F000080000006400C0000080000-000000067F000080000006400C0000084000__0000007D41715570\n000000067F000080000006400C0000084000-000000067F000080000006400C0000088000__0000007B9877EF40\n000000067F000080000006400C0000084000-000000067F000080000006400C0000088000__0000007D41715570\n000000067F000080000006400C000008400B-000000067F000080000006400C000008D771__0000007B14D5C521-0000007C73B53FC9\n000000067F000080000006400C0000088000-000000067F000080000006400C000008C000__0000007B9877EF40\n000000067F000080000006400C0000088000-000000067F000080000006400C000008C000__0000007D41715570\n000000067F000080000006400C000008C000-000000067F000080000006400C0000090000__0000007B9877EF40\n000000067F000080000006400C000008C000-000000067F000080000006400C0000090000__0000007D41715570\n000000067F000080000006400C000008D771-000000067F000080000006400C0000096ED7__0000007B14D5C521-0000007C73B53FC9\n000000067F000080000006400C0000090000-000000067F000080000006400C0000094000__0000007D41715570\n000000067F000080000006400C0000090000-030000000000000000000000000000000002__0000007B9877EF40\n000000067F000080000006400C0000094000-000000067F000080000006400C0000098000__0000007D41715570\n000000067F000080000006400C0000096ED7-000000067F000080000006400C00000A060B__0000007B14D5C521-0000007C73B53FC9\n000000067F000080000006400C0000098000-000000067F000080000006400C000009C000__0000007D41715570\n000000067F000080000006400C000009C000-000000067F000080000006400C00000A0000__0000007D41715570\n000000067F000080000006400C00000A0000-000000067F000080000006400C00000A4000__0000007D41715570\n000000067F000080000006400C00000A060B-000000067F000080000006400C00000A9D71__0000007B14D5C521-0000007C73B53FC9\n000000067F000080000006400C00000A4000-000000067F000080000006400C00000A8000__0000007D41715570\n000000067F000080000006400C00000A47B1-000000067F000080000006400C00000F593E__0000007CEE5A0B91-0000007D41EA8D51\n000000067F000080000006400C00000A8000-000000067F000080000006400C00000AC000__0000007D41715570\n000000067F000080000006400C00000A887C-000000067F000080000006400C020000001F__0000007C73B53FC9-0000007CEE5A0B91\n000000067F000080000006400C00000A9D71-000000067F000080000006400C00000B34D7__0000007B14D5C521-0000007C73B53FC9\n000000067F000080000006400C00000AC000-000000067F000080000006400C00000B0000__0000007D41715570\n000000067F000080000006400C00000B0000-000000067F000080000006400C00000B4000__0000007D41715570\n000000067F000080000006400C00000B34D7-000000067F000080000006400C00000BCC0C__0000007B14D5C521-0000007C73B53FC9\n000000067F000080000006400C00000B4000-000000067F000080000006400C00000B8000__0000007D41715570\n000000067F000080000006400C00000B8000-000000067F000080000006400C00000BC000__0000007D41715570\n000000067F000080000006400C00000BC000-000000067F000080000006400C00000C0000__0000007D41715570\n000000067F000080000006400C00000BCC0C-000000067F000080000006400C00000C6336__0000007B14D5C521-0000007C73B53FC9\n000000067F000080000006400C00000C0000-000000067F000080000006400C00000C4000__0000007D41715570\n000000067F000080000006400C00000C4000-000000067F000080000006400C00000C8000__0000007D41715570\n000000067F000080000006400C00000C6336-000000067F000080000006400C00000CFA9C__0000007B14D5C521-0000007C73B53FC9\n000000067F000080000006400C00000C8000-000000067F000080000006400C00000CC000__0000007D41715570\n000000067F000080000006400C00000CC000-000000067F000080000006400C00000D0000__0000007D41715570\n000000067F000080000006400C00000CFA9C-000000067F000080000006400C00000D91AB__0000007B14D5C521-0000007C73B53FC9\n000000067F000080000006400C00000D0000-000000067F000080000006400C00000D4000__0000007D41715570\n000000067F000080000006400C00000D4000-000000067F000080000006400C00000D8000__0000007D41715570\n000000067F000080000006400C00000D8000-000000067F000080000006400C00000DC000__0000007D41715570\n000000067F000080000006400C00000D91AB-000000067F000080000006400C00000E2911__0000007B14D5C521-0000007C73B53FC9\n000000067F000080000006400C00000DC000-000000067F000080000006400C00000E0000__0000007D41715570\n000000067F000080000006400C00000E0000-000000067F000080000006400C00000E4000__0000007D41715570\n000000067F000080000006400C00000E2911-000000067F000080000006400C00000EC077__0000007B14D5C521-0000007C73B53FC9\n000000067F000080000006400C00000E4000-000000067F000080000006400C00000E8000__0000007D41715570\n000000067F000080000006400C00000E8000-000000067F000080000006400C00000EC000__0000007D41715570\n000000067F000080000006400C00000EC000-000000067F000080000006400C00000F0000__0000007D41715570\n000000067F000080000006400C00000EC077-000000067F000080000006400C00000F57A8__0000007B14D5C521-0000007C73B53FC9\n000000067F000080000006400C00000F0000-000000067F000080000006400C00000F4000__0000007D41715570\n000000067F000080000006400C00000F4000-000000067F000080000006400C00000F8000__0000007D41715570\n000000067F000080000006400C00000F57A8-000000067F000080000006400C00000FEF0A__0000007B14D5C521-0000007C73B53FC9\n000000067F000080000006400C00000F5940-000000067F0000800000064014000000E7FF__0000007CEE5A0B91-0000007D41EA8D51\n000000067F000080000006400C00000F8000-000000067F000080000006400C00000FC000__0000007D41715570\n000000067F000080000006400C00000FC000-000000067F000080000006400C0000100000__0000007D41715570\n000000067F000080000006400C00000FEF0A-000000067F000080000006400C000010862B__0000007B14D5C521-0000007C73B53FC9\n000000067F000080000006400C0000100000-000000067F000080000006400C0000104000__0000007D41715570\n000000067F000080000006400C0000104000-000000067F000080000006400C0000108000__0000007D41715570\n000000067F000080000006400C0000108000-000000067F000080000006400C000010C000__0000007D41715570\n000000067F000080000006400C000010862B-000000067F000080000006400C0000111C20__0000007B14D5C521-0000007C73B53FC9\n000000067F000080000006400C000010C000-000000067F000080000006400C0000110000__0000007D41715570\n000000067F000080000006400C0000110000-000000067F00008000000640120100000000__0000007D41715570\n000000067F000080000006400C00FFFFFFFF-01000000000000000100000003000000000D__0000007B14D5C521-0000007C73B53FC9\n000000067F000080000006400C020000001F-000000067F0000800000064014000000691F__0000007C73B53FC9-0000007CEE5A0B91\n000000067F00008000000640140000000000-000000067F00008000000640140000004000__0000007D41715570\n000000067F00008000000640140000004000-000000067F00008000000640140000008000__0000007D41715570\n000000067F0000800000064014000000691F-000000067F0000800000064014000000D68F__0000007C73B53FC9-0000007CEE5A0B91\n000000067F00008000000640140000008000-000000067F0000800000064014000000C000__0000007D41715570\n000000067F0000800000064014000000C000-000000067F00008000000640140000010000__0000007D41715570\n000000067F0000800000064014000000D68F-000000067F00008000000640140000014406__0000007C73B53FC9-0000007CEE5A0B91\n000000067F0000800000064014000000E803-000000067F000080000006401400000274BB__0000007CEE5A0B91-0000007D41EA8D51\n000000067F00008000000640140000010000-000000067F00008000000640140000014000__0000007D41715570\n000000067F00008000000640140000014000-000000067F00008000000640140000018000__0000007D41715570\n000000067F00008000000640140000014406-000000067F0000800000064014000001B192__0000007C73B53FC9-0000007CEE5A0B91\n000000067F00008000000640140000018000-000000067F0000800000064014000001C000__0000007D41715570\n000000067F0000800000064014000001B192-000000067F00008000000640140000021F03__0000007C73B53FC9-0000007CEE5A0B91\n000000067F0000800000064014000001C000-000000067F00008000000640140000020000__0000007D41715570\n000000067F00008000000640140000020000-000000067F00008000000640140000024000__0000007D41715570\n000000067F00008000000640140000021F03-000000067F00008000000640140000028C6A__0000007C73B53FC9-0000007CEE5A0B91\n000000067F00008000000640140000024000-000000067F00008000000640140000028000__0000007D41715570\n000000067F000080000006401400000274BF-030000000000000000000000000000000002__0000007CEE5A0B91-0000007D41EA8D51\n000000067F00008000000640140000028000-000000067F0000800000064014000002C000__0000007D41715570\n000000067F00008000000640140000028C6A-030000000000000000000000000000000002__0000007C73B53FC9-0000007CEE5A0B91\n000000067F0000800000064014000002C000-030000000000000000000000000000000002__0000007D41715570\n000000067F000080000006600C0000000000-000000067F000080000006600C0000004000__0000007F12B83FE8\n000000067F000080000006600C0000004000-000000067F000080000006600C0000008000__0000007F12B83FE8\n000000067F000080000006600C0000008000-000000067F000080000006600C000000C000__0000007F12B83FE8\n000000067F000080000006600C0000009381-000000067F000080000006600C0000012AE7__0000007D41EA8D51-0000007DC21DE569\n000000067F000080000006600C000000C000-000000067F000080000006600C0000010000__0000007F12B83FE8\n000000067F000080000006600C0000010000-000000067F000080000006600C0000014000__0000007F12B83FE8\n000000067F000080000006600C0000012AE7-000000067F000080000006600C000001C20B__0000007D41EA8D51-0000007DC21DE569\n000000067F000080000006600C0000014000-000000067F000080000006600C0000018000__0000007F12B83FE8\n000000067F000080000006600C0000018000-000000067F000080000006600C000001C000__0000007F12B83FE8\n000000067F000080000006600C000001C000-000000067F000080000006600C0000020000__0000007F12B83FE8\n000000067F000080000006600C000001C20B-000000067F000080000006600C000002593B__0000007D41EA8D51-0000007DC21DE569\n000000067F000080000006600C0000020000-000000067F000080000006600C0000024000__0000007F12B83FE8\n000000067F000080000006600C0000024000-000000067F000080000006600C0000028000__0000007F12B83FE8\n000000067F000080000006600C000002593B-000000067F000080000006600C000002F0A1__0000007D41EA8D51-0000007DC21DE569\n000000067F000080000006600C0000028000-000000067F000080000006600C000002C000__0000007F12B83FE8\n000000067F000080000006600C000002C000-000000067F000080000006600C0000030000__0000007F12B83FE8\n000000067F000080000006600C000002F0A1-000000067F000080000006600C00000387B6__0000007D41EA8D51-0000007DC21DE569\n000000067F000080000006600C0000030000-000000067F000080000006600C0000034000__0000007F12B83FE8\n000000067F000080000006600C0000034000-000000067F000080000006600C0000038000__0000007F12B83FE8\n000000067F000080000006600C0000038000-000000067F000080000006600C000003C000__0000007F12B83FE8\n000000067F000080000006600C00000387B6-000000067F000080000006600C0000041F1C__0000007D41EA8D51-0000007DC21DE569\n000000067F000080000006600C000003C000-000000067F000080000006600C0000040000__0000007F12B83FE8\n000000067F000080000006600C0000040000-000000067F000080000006600C0000044000__0000007F12B83FE8\n000000067F000080000006600C0000041F1C-000000067F000080000006600C000004B682__0000007D41EA8D51-0000007DC21DE569\n000000067F000080000006600C0000044000-000000067F000080000006600C0000048000__0000007F12B83FE8\n000000067F000080000006600C0000048000-000000067F000080000006600C000004C000__0000007F108C1FD8\n000000067F000080000006600C0000048000-000000067F000080000006600C000004C000__0000007FDCA75700\n000000067F000080000006600C0000049743-000000067F000080000006600C0000093532__0000007F7BE4E6F1-0000007FDCDCE659\n000000067F000080000006600C000004B682-030000000000000000000000000000000002__0000007D41EA8D51-0000007DC21DE569\n000000067F000080000006600C000004BAC3-000000067F000080000006600C00000551F8__0000007DC21DE569-0000007E71DBF8F9\n000000067F000080000006600C000004C000-000000067F000080000006600C0000050000__0000007F108C1FD8\n000000067F000080000006600C000004C000-000000067F000080000006600C0000050000__0000007FDCA75700\n000000067F000080000006600C0000050000-000000067F000080000006600C0000054000__0000007F108C1FD8\n000000067F000080000006600C0000050000-000000067F000080000006600C0000054000__0000007FDCA75700\n000000067F000080000006600C0000054000-000000067F000080000006600C0000058000__0000007F108C1FD8\n000000067F000080000006600C0000054000-000000067F000080000006600C0000058000__0000007FDCA75700\n000000067F000080000006600C00000551F8-000000067F000080000006600C000005E90C__0000007DC21DE569-0000007E71DBF8F9\n000000067F000080000006600C0000058000-000000067F000080000006600C000005C000__0000007F108C1FD8\n000000067F000080000006600C0000058000-000000067F000080000006600C000005C000__0000007FDCA75700\n000000067F000080000006600C000005C000-000000067F000080000006600C0000060000__0000007F108C1FD8\n000000067F000080000006600C000005C000-000000067F000080000006600C0000060000__0000007FDCA75700\n000000067F000080000006600C000005E90C-000000067F000080000006600C000006802C__0000007DC21DE569-0000007E71DBF8F9\n000000067F000080000006600C0000060000-000000067F000080000006600C0000064000__0000007F108C1FD8\n000000067F000080000006600C0000060000-000000067F000080000006600C0000064000__0000007FDCA75700\n000000067F000080000006600C0000064000-000000067F000080000006600C0000068000__0000007F108C1FD8\n000000067F000080000006600C0000064000-000000067F000080000006600C0000068000__0000007FDCA75700\n000000067F000080000006600C0000068000-000000067F000080000006600C000006C000__0000007F108C1FD8\n000000067F000080000006600C0000068000-000000067F000080000006600C000006C000__0000007FDCA75700\n000000067F000080000006600C000006802C-000000067F000080000006600C0000071783__0000007DC21DE569-0000007E71DBF8F9\n000000067F000080000006600C000006C000-000000067F000080000006600C0000070000__0000007F108C1FD8\n000000067F000080000006600C000006C000-000000067F000080000006600C0000070000__0000007FDCA75700\n000000067F000080000006600C0000070000-000000067F000080000006600C0000074000__0000007F108C1FD8\n000000067F000080000006600C0000070000-000000067F000080000006600C0000074000__0000007FDCA75700\n000000067F000080000006600C0000071783-000000067F000080000006600C000007AEE9__0000007DC21DE569-0000007E71DBF8F9\n000000067F000080000006600C0000074000-000000067F000080000006600C0000078000__0000007F108C1FD8\n000000067F000080000006600C0000074000-000000067F000080000006600C0000078000__0000007FDCA75700\n000000067F000080000006600C0000078000-000000067F000080000006600C000007C000__0000007F108C1FD8\n000000067F000080000006600C0000078000-000000067F000080000006600C000007C000__0000007FDCA75700\n000000067F000080000006600C000007AEE9-000000067F000080000006600C000008460B__0000007DC21DE569-0000007E71DBF8F9\n000000067F000080000006600C000007C000-000000067F000080000006600C0000080000__0000007F108C1FD8\n000000067F000080000006600C000007C000-000000067F000080000006600C0000080000__0000007FDCA75700\n000000067F000080000006600C0000080000-000000067F000080000006600C0000084000__0000007F108C1FD8\n000000067F000080000006600C0000080000-000000067F000080000006600C0000084000__0000007FDCA75700\n000000067F000080000006600C0000084000-000000067F000080000006600C0000088000__0000007F108C1FD8\n000000067F000080000006600C0000084000-000000067F000080000006600C0000088000__0000007FDCA75700\n000000067F000080000006600C000008460B-000000067F000080000006600C000008DD71__0000007DC21DE569-0000007E71DBF8F9\n000000067F000080000006600C0000088000-000000067F000080000006600C000008C000__0000007F108C1FD8\n000000067F000080000006600C0000088000-000000067F000080000006600C000008C000__0000007FDCA75700\n000000067F000080000006600C000008C000-000000067F000080000006600C0000090000__0000007F108C1FD8\n000000067F000080000006600C000008C000-000000067F000080000006600C0000090000__0000007FDCA75700\n000000067F000080000006600C000008DD71-000000067F000080000006600C00000974D7__0000007DC21DE569-0000007E71DBF8F9\n000000067F000080000006600C0000090000-000000067F000080000006600C0000094000__0000007F108C1FD8\n000000067F000080000006600C0000090000-000000067F000080000006600C0000094000__0000007FDCA75700\n000000067F000080000006600C0000093532-000000067F000080000006600C00000DD150__0000007F7BE4E6F1-0000007FDCDCE659\n000000067F000080000006600C0000094000-000000067F000080000006600C0000098000__0000007F108C1FD8\n000000067F000080000006600C0000094000-000000067F000080000006600C0000098000__0000007FDCA75700\n000000067F000080000006600C00000974D7-000000067F000080000006600C00000A0C0B__0000007DC21DE569-0000007E71DBF8F9\n000000067F000080000006600C0000098000-000000067F000080000006600C000009C000__0000007F108C1FD8\n000000067F000080000006600C0000098000-000000067F000080000006600C000009C000__0000007FDCA75700\n000000067F000080000006600C000009C000-000000067F000080000006600C00000A0000__0000007F108C1FD8\n000000067F000080000006600C000009C000-000000067F000080000006600C00000A0000__0000007FDCA75700\n000000067F000080000006600C00000A0000-000000067F000080000006600C00000A4000__0000007F108C1FD8\n000000067F000080000006600C00000A0000-000000067F000080000006600C00000A4000__0000007FDCA75700\n000000067F000080000006600C00000A0C0B-000000067F000080000006600C00000AA371__0000007DC21DE569-0000007E71DBF8F9\n000000067F000080000006600C00000A4000-000000067F000080000006600C00000A8000__0000007F108C1FD8\n000000067F000080000006600C00000A4000-000000067F000080000006600C00000A8000__0000007FDCA75700\n000000067F000080000006600C00000A8000-000000067F000080000006600C00000AC000__0000007F108C1FD8\n000000067F000080000006600C00000A8000-000000067F000080000006600C00000AC000__0000007FDCA75700\n000000067F000080000006600C00000AA371-000000067F000080000006600C00000B3AD7__0000007DC21DE569-0000007E71DBF8F9\n000000067F000080000006600C00000AC000-000000067F000080000006600C00000B0000__0000007F108C1FD8\n000000067F000080000006600C00000AC000-000000067F000080000006600C00000B0000__0000007FDCA75700\n000000067F000080000006600C00000B0000-000000067F000080000006600C00000B4000__0000007F108C1FD8\n000000067F000080000006600C00000B0000-000000067F000080000006600C00000B4000__0000007FDCA75700\n000000067F000080000006600C00000B3AD7-000000067F000080000006600C0100000000__0000007DC21DE569-0000007E71DBF8F9\n000000067F000080000006600C00000B4000-000000067F000080000006600C00000B8000__0000007F108C1FD8\n000000067F000080000006600C00000B4000-000000067F000080000006600C00000B8000__0000007FDCA75700\n000000067F000080000006600C00000B8000-000000067F000080000006600C00000BC000__0000007F108C1FD8\n000000067F000080000006600C00000B8000-000000067F000080000006600C00000BC000__0000007FDCA75700\n000000067F000080000006600C00000BC000-000000067F000080000006600C00000C0000__0000007F108C1FD8\n000000067F000080000006600C00000BC000-000000067F000080000006600C00000C0000__0000007FDCA75700\n000000067F000080000006600C00000BC29F-000000067F000080000006600C00000C59CF__0000007E71DBF8F9-0000007F11E4BFE9\n000000067F000080000006600C00000C0000-000000067F000080000006600C00000C4000__0000007F108C1FD8\n000000067F000080000006600C00000C0000-000000067F000080000006600C00000C4000__0000007FDCA75700\n000000067F000080000006600C00000C4000-000000067F000080000006600C00000C8000__0000007F108C1FD8\n000000067F000080000006600C00000C4000-000000067F000080000006600C00000C8000__0000007FDCA75700\n000000067F000080000006600C00000C59CF-000000067F000080000006600C00000CF10B__0000007E71DBF8F9-0000007F11E4BFE9\n000000067F000080000006600C00000C8000-000000067F000080000006600C00000CC000__0000007F108C1FD8\n000000067F000080000006600C00000C8000-000000067F000080000006600C00000CC000__0000007FDCA75700\n000000067F000080000006600C00000CC000-000000067F000080000006600C00000D0000__0000007F108C1FD8\n000000067F000080000006600C00000CC000-000000067F000080000006600C00000D0000__0000007FDCA75700\n000000067F000080000006600C00000CF10B-000000067F000080000006600C00000D882C__0000007E71DBF8F9-0000007F11E4BFE9\n000000067F000080000006600C00000D0000-000000067F000080000006600C00000D4000__0000007F108C1FD8\n000000067F000080000006600C00000D0000-000000067F000080000006600C00000D4000__0000007FDCA75700\n000000067F000080000006600C00000D4000-000000067F000080000006600C00000D8000__0000007F108C1FD8\n000000067F000080000006600C00000D4000-000000067F000080000006600C00000D8000__0000007FDCA75700\n000000067F000080000006600C00000D8000-000000067F000080000006600C00000DC000__0000007F108C1FD8\n000000067F000080000006600C00000D8000-000000067F000080000006600C00000DC000__0000007FDCA75700\n000000067F000080000006600C00000D882C-000000067F000080000006600C00000E1F7F__0000007E71DBF8F9-0000007F11E4BFE9\n000000067F000080000006600C00000DC000-000000067F000080000006600C00000E0000__0000007F108C1FD8\n000000067F000080000006600C00000DC000-000000067F000080000006600C00000E0000__0000007FDCA75700\n000000067F000080000006600C00000DD152-000000067F00008000000660140000003DA8__0000007F7BE4E6F1-0000007FDCDCE659\n000000067F000080000006600C00000E0000-000000067F000080000006600C00000E4000__0000007F108C1FD8\n000000067F000080000006600C00000E0000-000000067F000080000006600C00000E4000__0000007FDCA75700\n000000067F000080000006600C00000E1F7F-000000067F000080000006600C00000EB6E5__0000007E71DBF8F9-0000007F11E4BFE9\n000000067F000080000006600C00000E4000-000000067F000080000006600C00000E8000__0000007F108C1FD8\n000000067F000080000006600C00000E4000-000000067F000080000006600C00000E8000__0000007FDCA75700\n000000067F000080000006600C00000E8000-000000067F000080000006600C00000EC000__0000007F108C1FD8\n000000067F000080000006600C00000E8000-000000067F000080000006600C00000EC000__0000007FDCA75700\n000000067F000080000006600C00000EB6E5-000000067F000080000006600C00000F4E0C__0000007E71DBF8F9-0000007F11E4BFE9\n000000067F000080000006600C00000EC000-000000067F000080000006600C00000F0000__0000007F108C1FD8\n000000067F000080000006600C00000EC000-000000067F000080000006600C00000F0000__0000007FDCA75700\n000000067F000080000006600C00000F0000-000000067F000080000006600C00000F4000__0000007F108C1FD8\n000000067F000080000006600C00000F0000-000000067F000080000006600C00000F4000__0000007FDCA75700\n000000067F000080000006600C00000F4000-000000067F000080000006600C00000F8000__0000007F108C1FD8\n000000067F000080000006600C00000F4000-000000067F000080000006600C00000F8000__0000007FDCA75700\n000000067F000080000006600C00000F4E0C-000000067F000080000006600C00000FE572__0000007E71DBF8F9-0000007F11E4BFE9\n000000067F000080000006600C00000F8000-000000067F000080000006600C00000FC000__0000007F108C1FD8\n000000067F000080000006600C00000F8000-000000067F000080000006600C00000FC000__0000007FDCA75700\n000000067F000080000006600C00000FC000-000000067F000080000006600C0000100000__0000007F108C1FD8\n000000067F000080000006600C00000FC000-000000067F000080000006600C0000100000__0000007FDCA75700\n000000067F000080000006600C00000FE572-000000067F000080000006600C0000107CD8__0000007E71DBF8F9-0000007F11E4BFE9\n000000067F000080000006600C0000100000-000000067F000080000006600C0000104000__0000007F108C1FD8\n000000067F000080000006600C0000100000-000000067F000080000006600C0000104000__0000007FDCA75700\n000000067F000080000006600C0000104000-000000067F000080000006600C0000108000__0000007F108C1FD8\n000000067F000080000006600C0000104000-000000067F000080000006600C0000108000__0000007FDCA75700\n000000067F000080000006600C0000107CD8-000000067F000080000006600C000011140B__0000007E71DBF8F9-0000007F11E4BFE9\n000000067F000080000006600C0000108000-000000067F000080000006600C000010C000__0000007F108C1FD8\n000000067F000080000006600C0000108000-000000067F000080000006600C000010C000__0000007FDCA75700\n000000067F000080000006600C000010C000-000000067F000080000006600C0000110000__0000007F108C1FD8\n000000067F000080000006600C000010C000-000000067F000080000006600C0000110000__0000007FDCA75700\n000000067F000080000006600C0000110000-000000067F00008000000660120100000000__0000007FDCA75700\n000000067F000080000006600C0000110000-030000000000000000000000000000000002__0000007F108C1FD8\n000000067F000080000006600C000011140B-010000000000000001000000030000000010__0000007E71DBF8F9-0000007F11E4BFE9\n000000067F000080000006600C0000111C82-000000067F0000800000066014000000535B__0000007F11E4BFE9-0000007F7BE4E6F1\n000000067F00008000000660140000000000-000000067F00008000000660140000004000__0000007FDCA75700\n000000067F00008000000660140000003DAA-000000067F00008000000660140000017C4D__0000007F7BE4E6F1-0000007FDCDCE659\n000000067F00008000000660140000004000-000000067F00008000000660140000008000__0000007FDCA75700\n000000067F0000800000066014000000535B-000000067F0000800000066014000000C839__0000007F11E4BFE9-0000007F7BE4E6F1\n000000067F00008000000660140000008000-000000067F0000800000066014000000C000__0000007FDCA75700\n000000067F0000800000066014000000C000-000000067F00008000000660140000010000__0000007FDCA75700\n000000067F0000800000066014000000C839-000000067F00008000000660140000013D42__0000007F11E4BFE9-0000007F7BE4E6F1\n000000067F00008000000660140000010000-000000067F00008000000660140000014000__0000007FDCA75700\n000000067F00008000000660140000013D42-000000067F0000800000066014000001B222__0000007F11E4BFE9-0000007F7BE4E6F1\n000000067F00008000000660140000014000-000000067F00008000000660140000018000__0000007FDCA75700\n000000067F00008000000660140000017C51-000000067F0000800000066014000002B9D0__0000007F7BE4E6F1-0000007FDCDCE659\n000000067F00008000000660140000018000-000000067F0000800000066014000001C000__0000007FDCA75700\n000000067F0000800000066014000001B222-000000067F00008000000660140000022704__0000007F11E4BFE9-0000007F7BE4E6F1\n000000067F0000800000066014000001C000-000000067F00008000000660140000020000__0000007FDCA75700\n000000067F00008000000660140000020000-000000067F00008000000660140000024000__0000007FDCA75700\n000000067F00008000000660140000022704-000000067F00008000000660140000029C2D__0000007F11E4BFE9-0000007F7BE4E6F1\n000000067F00008000000660140000024000-000000067F00008000000660140000028000__0000007FDCA75700\n000000067F00008000000660140000028000-000000067F0000800000066014000002C000__0000007FDCA75700\n000000067F00008000000660140000029C2D-030000000000000000000000000000000002__0000007F11E4BFE9-0000007F7BE4E6F1\n000000067F0000800000066014000002B9D1-030000000000000000000000000000000002__0000007F7BE4E6F1-0000007FDCDCE659\n000000067F0000800000066014000002C000-030000000000000000000000000000000002__0000007FDCA75700\n000000067F000080000006800C0000000000-000000067F000080000006800C0000004000__00000081AFEDBFE0\n000000067F000080000006800C0000004000-000000067F000080000006800C0000008000__00000081AFEDBFE0\n000000067F000080000006800C0000007D6A-000000067F000080000006800C00000114D0__0000007FDCDCE659-000000804F6BFFC1\n000000067F000080000006800C0000008000-000000067F000080000006800C000000C000__00000081AFEDBFE0\n000000067F000080000006800C000000C000-000000067F000080000006800C0000010000__00000081AFEDBFE0\n000000067F000080000006800C0000010000-000000067F000080000006800C0000014000__00000081AFEDBFE0\n000000067F000080000006800C00000114D0-000000067F000080000006800C000001AC0B__0000007FDCDCE659-000000804F6BFFC1\n000000067F000080000006800C0000014000-000000067F000080000006800C0000018000__00000081AFEDBFE0\n000000067F000080000006800C0000018000-000000067F000080000006800C000001C000__00000081AFEDBFE0\n000000067F000080000006800C000001AC0B-000000067F000080000006800C0000024348__0000007FDCDCE659-000000804F6BFFC1\n000000067F000080000006800C000001C000-000000067F000080000006800C0000020000__00000081AFEDBFE0\n000000067F000080000006800C0000020000-000000067F000080000006800C0000024000__00000081AFEDBFE0\n000000067F000080000006800C0000024000-000000067F000080000006800C0000028000__00000081AFEDBFE0\n000000067F000080000006800C0000024348-000000067F000080000006800C000002DAAE__0000007FDCDCE659-000000804F6BFFC1\n000000067F000080000006800C0000028000-000000067F000080000006800C000002C000__00000081AFEDBFE0\n000000067F000080000006800C000002C000-000000067F000080000006800C0000030000__00000081AFEDBFE0\n000000067F000080000006800C000002DAAE-000000067F000080000006800C00000371D0__0000007FDCDCE659-000000804F6BFFC1\n000000067F000080000006800C0000030000-000000067F000080000006800C0000034000__00000081AFEDBFE0\n000000067F000080000006800C0000034000-000000067F000080000006800C0000038000__00000081AFEDBFE0\n000000067F000080000006800C00000371D0-000000067F000080000006800C000004090B__0000007FDCDCE659-000000804F6BFFC1\n000000067F000080000006800C0000038000-000000067F000080000006800C000003C000__00000081AFEDBFE0\n000000067F000080000006800C000003C000-000000067F000080000006800C0000040000__00000081AFEDBFE0\n000000067F000080000006800C0000040000-000000067F000080000006800C0000044000__00000081A164D628\n000000067F000080000006800C000004090B-030000000000000000000000000000000002__0000007FDCDCE659-000000804F6BFFC1\n000000067F000080000006800C0000042368-000000067F000080000006800C000004BACE__000000804F6BFFC1-00000080EF2FF5B9\n000000067F000080000006800C0000044000-000000067F000080000006800C0000048000__00000081A164D628\n000000067F000080000006800C0000048000-000000067F000080000006800C000004C000__00000081A164D628\n000000067F000080000006800C000004BACE-000000067F000080000006800C0000055202__000000804F6BFFC1-00000080EF2FF5B9\n000000067F000080000006800C000004C000-000000067F000080000006800C0000050000__00000081A164D628\n000000067F000080000006800C0000050000-000000067F000080000006800C0000054000__00000081A164D628\n000000067F000080000006800C0000054000-000000067F000080000006800C0000058000__00000081A164D628\n000000067F000080000006800C0000055202-000000067F000080000006800C000005E90D__000000804F6BFFC1-00000080EF2FF5B9\n000000067F000080000006800C0000058000-000000067F000080000006800C000005C000__00000081A164D628\n000000067F000080000006800C000005C000-000000067F000080000006800C0000060000__00000081A164D628\n000000067F000080000006800C000005E90D-000000067F000080000006800C000006802B__000000804F6BFFC1-00000080EF2FF5B9\n000000067F000080000006800C0000060000-000000067F000080000006800C0000064000__00000081A164D628\n000000067F000080000006800C0000064000-000000067F000080000006800C0000068000__00000081A164D628\n000000067F000080000006800C0000068000-000000067F000080000006800C000006C000__00000081A164D628\n000000067F000080000006800C000006802B-000000067F000080000006800C0000071782__000000804F6BFFC1-00000080EF2FF5B9\n000000067F000080000006800C000006C000-000000067F000080000006800C0000070000__00000081A164D628\n000000067F000080000006800C0000070000-000000067F000080000006800C0000074000__00000081A164D628\n000000067F000080000006800C0000071782-000000067F000080000006800C000007AEE8__000000804F6BFFC1-00000080EF2FF5B9\n000000067F000080000006800C0000074000-000000067F000080000006800C0000078000__00000081A164D628\n000000067F000080000006800C0000078000-000000067F000080000006800C000007C000__00000081A164D628\n000000067F000080000006800C000007AEE8-000000067F000080000006800C000008460B__000000804F6BFFC1-00000080EF2FF5B9\n000000067F000080000006800C000007C000-000000067F000080000006800C0000080000__00000081A164D628\n000000067F000080000006800C0000080000-000000067F000080000006800C0000084000__00000081A164D628\n000000067F000080000006800C0000084000-000000067F000080000006800C0000088000__00000081A164D628\n000000067F000080000006800C000008460B-000000067F000080000006800C000008DD71__000000804F6BFFC1-00000080EF2FF5B9\n000000067F000080000006800C0000088000-000000067F000080000006800C000008C000__00000081A164D628\n000000067F000080000006800C000008C000-000000067F000080000006800C0000090000__00000081A164D628\n000000067F000080000006800C000008DD71-000000067F000080000006800C00000974D7__000000804F6BFFC1-00000080EF2FF5B9\n000000067F000080000006800C0000090000-000000067F000080000006800C0000094000__00000081A164D628\n000000067F000080000006800C0000094000-000000067F000080000006800C0000098000__00000081A164D628\n000000067F000080000006800C00000974D7-000000067F000080000006800C00000A0C0B__000000804F6BFFC1-00000080EF2FF5B9\n000000067F000080000006800C0000098000-000000067F000080000006800C000009C000__00000081A164D628\n000000067F000080000006800C000009C000-000000067F000080000006800C00000A0000__00000081A164D628\n000000067F000080000006800C00000A0000-000000067F000080000006800C00000A4000__00000081A164D628\n000000067F000080000006800C00000A0C0B-000000067F000080000006800C0100000000__000000804F6BFFC1-00000080EF2FF5B9\n000000067F000080000006800C00000A4000-000000067F000080000006800C00000A8000__00000081A164D628\n000000067F000080000006800C00000A8000-000000067F000080000006800C00000AC000__00000081A164D628\n000000067F000080000006800C00000A8D4C-000000067F000080000006800C00000B24B2__00000080EF2FF5B9-00000081AFAF5FD1\n000000067F000080000006800C00000AC000-000000067F000080000006800C00000B0000__00000081A164D628\n000000067F000080000006800C00000B0000-000000067F000080000006800C00000B4000__00000081A164D628\n000000067F000080000006800C00000B24B2-000000067F000080000006800C00000BBC0B__00000080EF2FF5B9-00000081AFAF5FD1\n000000067F000080000006800C00000B4000-000000067F000080000006800C00000B8000__00000081A164D628\n000000067F000080000006800C00000B8000-000000067F000080000006800C00000BC000__00000081A164D628\n000000067F000080000006800C00000BBC0B-000000067F000080000006800C00000C533F__00000080EF2FF5B9-00000081AFAF5FD1\n000000067F000080000006800C00000BC000-000000067F000080000006800C00000C0000__00000081A164D628\n000000067F000080000006800C00000C0000-000000067F000080000006800C00000C4000__00000081A164D628\n000000067F000080000006800C00000C4000-000000067F000080000006800C00000C8000__00000081A164D628\n000000067F000080000006800C00000C533F-000000067F000080000006800C00000CEAA5__00000080EF2FF5B9-00000081AFAF5FD1\n000000067F000080000006800C00000C8000-000000067F000080000006800C00000CC000__00000081A164D628\n000000067F000080000006800C00000CC000-000000067F000080000006800C00000D0000__00000081A164D628\n000000067F000080000006800C00000CEAA5-000000067F000080000006800C00000D81BE__00000080EF2FF5B9-00000081AFAF5FD1\n000000067F000080000006800C00000D0000-000000067F000080000006800C00000D4000__00000081A164D628\n000000067F000080000006800C00000D4000-000000067F000080000006800C00000D8000__00000081A164D628\n000000067F000080000006800C00000D8000-000000067F000080000006800C00000DC000__00000081A164D628\n000000067F000080000006800C00000D81BE-000000067F000080000006800C00000E190B__00000080EF2FF5B9-00000081AFAF5FD1\n000000067F000080000006800C00000DC000-000000067F000080000006800C00000E0000__00000081A164D628\n000000067F000080000006800C00000E0000-000000067F000080000006800C00000E4000__00000081A164D628\n000000067F000080000006800C00000E190B-000000067F000080000006800C00000EB071__00000080EF2FF5B9-00000081AFAF5FD1\n000000067F000080000006800C00000E4000-000000067F000080000006800C00000E8000__00000081A164D628\n000000067F000080000006800C00000E8000-000000067F000080000006800C00000EC000__00000081A164D628\n000000067F000080000006800C00000EB071-000000067F000080000006800C00000F47AC__00000080EF2FF5B9-00000081AFAF5FD1\n000000067F000080000006800C00000EC000-000000067F000080000006800C00000F0000__00000081A164D628\n000000067F000080000006800C00000F0000-000000067F000080000006800C00000F4000__00000081A164D628\n000000067F000080000006800C00000F4000-000000067F000080000006800C00000F8000__00000081A164D628\n000000067F000080000006800C00000F47AC-000000067F000080000006800C00000FDF0A__00000080EF2FF5B9-00000081AFAF5FD1\n000000067F000080000006800C00000F8000-000000067F000080000006800C00000FC000__00000081A164D628\n000000067F000080000006800C00000FC000-000000067F000080000006800C0000100000__00000081A164D628\n000000067F000080000006800C00000FDF0A-000000067F000080000006800C000010762B__00000080EF2FF5B9-00000081AFAF5FD1\n000000067F000080000006800C0000100000-000000067F000080000006800C0000104000__00000081A164D628\n000000067F000080000006800C0000104000-000000067F000080000006800C0000108000__00000081A164D628\n000000067F000080000006800C000010762B-000000067F000080000006800C0000110D88__00000080EF2FF5B9-00000081AFAF5FD1\n000000067F000080000006800C0000108000-030000000000000000000000000000000002__00000081A164D628\n000000067F000080000006800C0000110D88-010000000000000001000000030000000014__00000080EF2FF5B9-00000081AFAF5FD1\n000000067F000080000006801400000044E4-000000067F0000800000068014000000C3F5__00000081AFAF5FD1-0000008215AFE5A9\n000000067F0000800000068014000000C3F5-000000067F00008000000680140000014303__00000081AFAF5FD1-0000008215AFE5A9\n000000067F00008000000680140000014303-000000067F0000800000068014000001C214__00000081AFAF5FD1-0000008215AFE5A9\n000000067F0000800000068014000001C214-000000067F00008000000680140000024125__00000081AFAF5FD1-0000008215AFE5A9\n000000067F00008000000680140000024125-000000067F0000800000068014000002C035__00000081AFAF5FD1-0000008215AFE5A9\n000000067F0000800000068014000002C035-000000067F000080000006A00C00000072CA__00000081AFAF5FD1-0000008215AFE5A9\n000000067F000080000006A00C0000000000-000000067F000080000006A00C0000004000__00000083D5DE3FD0\n000000067F000080000006A00C0000004000-000000067F000080000006A00C0000008000__00000083D5DE3FD0\n000000067F000080000006A00C00000072CA-030000000000000000000000000000000002__00000081AFAF5FD1-0000008215AFE5A9\n000000067F000080000006A00C0000008000-000000067F000080000006A00C000000C000__00000083865C64B8\n000000067F000080000006A00C0000008000-000000067F000080000006A00C000000C000__00000084A1F03030\n000000067F000080000006A00C00000096E3-000000067F000080000006A00C0000012E0B__0000008215AFE5A9-00000082B573F579\n000000067F000080000006A00C000000C000-000000067F000080000006A00C0000010000__00000083865C64B8\n000000067F000080000006A00C000000C000-000000067F000080000006A00C0000010000__00000084A1F03030\n000000067F000080000006A00C0000010000-000000067F000080000006A00C0000014000__00000083865C64B8\n000000067F000080000006A00C0000010000-000000067F000080000006A00C0000014000__00000084A1F03030\n000000067F000080000006A00C0000012E0B-000000067F000080000006A00C000001C571__0000008215AFE5A9-00000082B573F579\n000000067F000080000006A00C0000014000-000000067F000080000006A00C0000018000__00000083865C64B8\n000000067F000080000006A00C0000014000-000000067F000080000006A00C0000018000__00000084A1F03030\n000000067F000080000006A00C0000018000-000000067F000080000006A00C000001C000__00000083865C64B8\n000000067F000080000006A00C0000018000-000000067F000080000006A00C000001C000__00000084A1F03030\n000000067F000080000006A00C000001C000-000000067F000080000006A00C0000020000__00000083865C64B8\n000000067F000080000006A00C000001C000-000000067F000080000006A00C0000020000__00000084A1F03030\n000000067F000080000006A00C000001C571-000000067F000080000006A00C0000025CD7__0000008215AFE5A9-00000082B573F579\n000000067F000080000006A00C0000020000-000000067F000080000006A00C0000024000__00000083865C64B8\n000000067F000080000006A00C0000020000-000000067F000080000006A00C0000024000__00000084A1F03030\n000000067F000080000006A00C0000024000-000000067F000080000006A00C0000028000__00000083865C64B8\n000000067F000080000006A00C0000024000-000000067F000080000006A00C0000028000__00000084A1F03030\n000000067F000080000006A00C0000025CD7-000000067F000080000006A00C000002F40B__0000008215AFE5A9-00000082B573F579\n000000067F000080000006A00C0000028000-000000067F000080000006A00C000002C000__00000083865C64B8\n000000067F000080000006A00C0000028000-000000067F000080000006A00C000002C000__00000084A1F03030\n000000067F000080000006A00C000002C000-000000067F000080000006A00C0000030000__00000083865C64B8\n000000067F000080000006A00C000002C000-000000067F000080000006A00C0000030000__00000084A1F03030\n000000067F000080000006A00C000002F40B-000000067F000080000006A00C0000038B1E__0000008215AFE5A9-00000082B573F579\n000000067F000080000006A00C0000030000-000000067F000080000006A00C0000034000__00000083865C64B8\n000000067F000080000006A00C0000030000-000000067F000080000006A00C0000034000__00000084A1F03030\n000000067F000080000006A00C0000034000-000000067F000080000006A00C0000038000__00000083865C64B8\n000000067F000080000006A00C0000034000-000000067F000080000006A00C0000038000__00000084A1F03030\n000000067F000080000006A00C0000038000-000000067F000080000006A00C000003C000__00000083865C64B8\n000000067F000080000006A00C0000038000-000000067F000080000006A00C000003C000__00000084A1F03030\n000000067F000080000006A00C0000038B1E-000000067F000080000006A00C0000042284__0000008215AFE5A9-00000082B573F579\n000000067F000080000006A00C000003C000-000000067F000080000006A00C0000040000__00000083865C64B8\n000000067F000080000006A00C000003C000-000000067F000080000006A00C0000040000__00000084A1F03030\n000000067F000080000006A00C0000040000-000000067F000080000006A00C0000044000__00000083865C64B8\n000000067F000080000006A00C0000040000-000000067F000080000006A00C0000044000__00000084A1F03030\n000000067F000080000006A00C0000042284-000000067F000080000006A00C000004B9EA__0000008215AFE5A9-00000082B573F579\n000000067F000080000006A00C0000044000-000000067F000080000006A00C0000048000__00000083865C64B8\n000000067F000080000006A00C0000044000-000000067F000080000006A00C0000048000__00000084A1F03030\n000000067F000080000006A00C0000048000-000000067F000080000006A00C000004C000__00000083865C64B8\n000000067F000080000006A00C0000048000-000000067F000080000006A00C000004C000__00000084A1F03030\n000000067F000080000006A00C000004B9EA-000000067F000080000006A00C000005510B__0000008215AFE5A9-00000082B573F579\n000000067F000080000006A00C000004C000-000000067F000080000006A00C0000050000__00000083865C64B8\n000000067F000080000006A00C000004C000-000000067F000080000006A00C0000050000__00000084A1F03030\n000000067F000080000006A00C0000050000-000000067F000080000006A00C0000054000__00000083865C64B8\n000000067F000080000006A00C0000050000-000000067F000080000006A00C0000054000__00000084A1F03030\n000000067F000080000006A00C000005198B-000000067F000080000006A00C00000A31A6__000000844F1A6789-00000084A325AA01\n000000067F000080000006A00C0000054000-000000067F000080000006A00C0000058000__00000083865C64B8\n000000067F000080000006A00C0000054000-000000067F000080000006A00C0000058000__00000084A1F03030\n000000067F000080000006A00C000005510B-000000067F000080000006A00C000005E871__0000008215AFE5A9-00000082B573F579\n000000067F000080000006A00C0000058000-000000067F000080000006A00C000005C000__00000083865C64B8\n000000067F000080000006A00C0000058000-000000067F000080000006A00C000005C000__00000084A1F03030\n000000067F000080000006A00C000005C000-000000067F000080000006A00C0000060000__00000083865C64B8\n000000067F000080000006A00C000005C000-000000067F000080000006A00C0000060000__00000084A1F03030\n000000067F000080000006A00C000005E871-000000067F000080000006A00C0000067F8B__0000008215AFE5A9-00000082B573F579\n000000067F000080000006A00C0000060000-000000067F000080000006A00C0000064000__00000083865C64B8\n000000067F000080000006A00C0000060000-000000067F000080000006A00C0000064000__00000084A1F03030\n000000067F000080000006A00C0000064000-000000067F000080000006A00C0000068000__00000083865C64B8\n000000067F000080000006A00C0000064000-000000067F000080000006A00C0000068000__00000084A1F03030\n000000067F000080000006A00C0000067F8B-000000067F000080000006A00C0100000000__0000008215AFE5A9-00000082B573F579\n000000067F000080000006A00C0000068000-000000067F000080000006A00C000006C000__00000083865C64B8\n000000067F000080000006A00C0000068000-000000067F000080000006A00C000006C000__00000084A1F03030\n000000067F000080000006A00C000006C000-000000067F000080000006A00C0000070000__00000083865C64B8\n000000067F000080000006A00C000006C000-000000067F000080000006A00C0000070000__00000084A1F03030\n000000067F000080000006A00C0000070000-000000067F000080000006A00C0000074000__00000083865C64B8\n000000067F000080000006A00C0000070000-000000067F000080000006A00C0000074000__00000084A1F03030\n000000067F000080000006A00C00000703EC-000000067F000080000006A00C0000079B0C__00000082B573F579-00000083D5901FD9\n000000067F000080000006A00C0000074000-000000067F000080000006A00C0000078000__00000083865C64B8\n000000067F000080000006A00C0000074000-000000067F000080000006A00C0000078000__00000084A1F03030\n000000067F000080000006A00C0000078000-000000067F000080000006A00C000007C000__00000083865C64B8\n000000067F000080000006A00C0000078000-000000067F000080000006A00C000007C000__00000084A1F03030\n000000067F000080000006A00C0000079B0C-000000067F000080000006A00C0000083272__00000082B573F579-00000083D5901FD9\n000000067F000080000006A00C000007C000-000000067F000080000006A00C0000080000__00000083865C64B8\n000000067F000080000006A00C000007C000-000000067F000080000006A00C0000080000__00000084A1F03030\n000000067F000080000006A00C0000080000-000000067F000080000006A00C0000084000__00000083865C64B8\n000000067F000080000006A00C0000080000-000000067F000080000006A00C0000084000__00000084A1F03030\n000000067F000080000006A00C0000083272-000000067F000080000006A00C000008C9D8__00000082B573F579-00000083D5901FD9\n000000067F000080000006A00C0000084000-000000067F000080000006A00C0000088000__00000083865C64B8\n000000067F000080000006A00C0000084000-000000067F000080000006A00C0000088000__00000084A1F03030\n000000067F000080000006A00C0000088000-000000067F000080000006A00C000008C000__00000083865C64B8\n000000067F000080000006A00C0000088000-000000067F000080000006A00C000008C000__00000084A1F03030\n000000067F000080000006A00C000008C000-000000067F000080000006A00C0000090000__00000083865C64B8\n000000067F000080000006A00C000008C000-000000067F000080000006A00C0000090000__00000084A1F03030\n000000067F000080000006A00C000008C9D8-000000067F000080000006A00C0000096129__00000082B573F579-00000083D5901FD9\n000000067F000080000006A00C0000090000-000000067F000080000006A00C0000094000__00000083865C64B8\n000000067F000080000006A00C0000090000-000000067F000080000006A00C0000094000__00000084A1F03030\n000000067F000080000006A00C0000094000-000000067F000080000006A00C0000098000__00000083865C64B8\n000000067F000080000006A00C0000094000-000000067F000080000006A00C0000098000__00000084A1F03030\n000000067F000080000006A00C0000096129-000000067F000080000006A00C000009F88F__00000082B573F579-00000083D5901FD9\n000000067F000080000006A00C0000098000-000000067F000080000006A00C000009C000__00000083865C64B8\n000000067F000080000006A00C0000098000-000000067F000080000006A00C000009C000__00000084A1F03030\n000000067F000080000006A00C000009C000-000000067F000080000006A00C00000A0000__00000083865C64B8\n000000067F000080000006A00C000009C000-000000067F000080000006A00C00000A0000__00000084A1F03030\n000000067F000080000006A00C000009F88F-000000067F000080000006A00C00000A8F9F__00000082B573F579-00000083D5901FD9\n000000067F000080000006A00C00000A0000-000000067F000080000006A00C00000A4000__00000083865C64B8\n000000067F000080000006A00C00000A0000-000000067F000080000006A00C00000A4000__00000084A1F03030\n000000067F000080000006A00C00000A31B0-000000067F000080000006A00C00000F4C19__000000844F1A6789-00000084A325AA01\n000000067F000080000006A00C00000A4000-000000067F000080000006A00C00000A8000__00000083865C64B8\n000000067F000080000006A00C00000A4000-000000067F000080000006A00C00000A8000__00000084A1F03030\n000000067F000080000006A00C00000A8000-000000067F000080000006A00C00000AC000__00000083865C64B8\n000000067F000080000006A00C00000A8000-000000067F000080000006A00C00000AC000__00000084A1F03030\n000000067F000080000006A00C00000A8F9F-000000067F000080000006A00C00000B2705__00000082B573F579-00000083D5901FD9\n000000067F000080000006A00C00000AC000-000000067F000080000006A00C00000B0000__00000083865C64B8\n000000067F000080000006A00C00000AC000-000000067F000080000006A00C00000B0000__00000084A1F03030\n000000067F000080000006A00C00000B0000-000000067F000080000006A00C00000B4000__00000083865C64B8\n000000067F000080000006A00C00000B0000-000000067F000080000006A00C00000B4000__00000084A1F03030\n000000067F000080000006A00C00000B2705-000000067F000080000006A00C00000BBE10__00000082B573F579-00000083D5901FD9\n000000067F000080000006A00C00000B4000-000000067F000080000006A00C00000B8000__00000083865C64B8\n000000067F000080000006A00C00000B4000-000000067F000080000006A00C00000B8000__00000084A1F03030\n000000067F000080000006A00C00000B8000-000000067F000080000006A00C00000BC000__00000083865C64B8\n000000067F000080000006A00C00000B8000-000000067F000080000006A00C00000BC000__00000084A1F03030\n000000067F000080000006A00C00000BBE10-000000067F000080000006A00C00000C5543__00000082B573F579-00000083D5901FD9\n000000067F000080000006A00C00000BC000-000000067F000080000006A00C00000C0000__00000083865C64B8\n000000067F000080000006A00C00000BC000-000000067F000080000006A00C00000C0000__00000084A1F03030\n000000067F000080000006A00C00000C0000-000000067F000080000006A00C00000C4000__00000083865C64B8\n000000067F000080000006A00C00000C0000-000000067F000080000006A00C00000C4000__00000084A1F03030\n000000067F000080000006A00C00000C4000-000000067F000080000006A00C00000C8000__00000083865C64B8\n000000067F000080000006A00C00000C4000-000000067F000080000006A00C00000C8000__00000084A1F03030\n000000067F000080000006A00C00000C4CC8-000000067F000080000006A0140000001CBC__00000083D5901FD9-000000844F1A6789\n000000067F000080000006A00C00000C5543-000000067F000080000006A00C00000CECA9__00000082B573F579-00000083D5901FD9\n000000067F000080000006A00C00000C8000-000000067F000080000006A00C00000CC000__00000083865C64B8\n000000067F000080000006A00C00000C8000-000000067F000080000006A00C00000CC000__00000084A1F03030\n000000067F000080000006A00C00000CC000-000000067F000080000006A00C00000D0000__00000083865C64B8\n000000067F000080000006A00C00000CC000-000000067F000080000006A00C00000D0000__00000084A1F03030\n000000067F000080000006A00C00000CECA9-000000067F000080000006A00C00000D83C0__00000082B573F579-00000083D5901FD9\n000000067F000080000006A00C00000D0000-000000067F000080000006A00C00000D4000__00000083865C64B8\n000000067F000080000006A00C00000D0000-000000067F000080000006A00C00000D4000__00000084A1F03030\n000000067F000080000006A00C00000D4000-000000067F000080000006A00C00000D8000__00000083865C64B8\n000000067F000080000006A00C00000D4000-000000067F000080000006A00C00000D8000__00000084A1F03030\n000000067F000080000006A00C00000D8000-000000067F000080000006A00C00000DC000__00000083865C64B8\n000000067F000080000006A00C00000D8000-000000067F000080000006A00C00000DC000__00000084A1F03030\n000000067F000080000006A00C00000D83C0-000000067F000080000006A00C00000E1B0A__00000082B573F579-00000083D5901FD9\n000000067F000080000006A00C00000DC000-000000067F000080000006A00C00000E0000__00000083865C64B8\n000000067F000080000006A00C00000DC000-000000067F000080000006A00C00000E0000__00000084A1F03030\n000000067F000080000006A00C00000E0000-000000067F000080000006A00C00000E4000__00000084A1F03030\n000000067F000080000006A00C00000E0000-030000000000000000000000000000000002__00000083865C64B8\n000000067F000080000006A00C00000E1B0A-000000067F000080000006A00C00000EB270__00000082B573F579-00000083D5901FD9\n000000067F000080000006A00C00000E4000-000000067F000080000006A00C00000E8000__00000084A1F03030\n000000067F000080000006A00C00000E8000-000000067F000080000006A00C00000EC000__00000084A1F03030\n000000067F000080000006A00C00000EB270-000000067F000080000006A00C00000F49AA__00000082B573F579-00000083D5901FD9\n000000067F000080000006A00C00000EC000-000000067F000080000006A00C00000F0000__00000084A1F03030\n000000067F000080000006A00C00000F0000-000000067F000080000006A00C00000F4000__00000084A1F03030\n000000067F000080000006A00C00000F4000-000000067F000080000006A00C00000F8000__00000084A1F03030\n000000067F000080000006A00C00000F49AA-000000067F000080000006A00C00000FE10A__00000082B573F579-00000083D5901FD9\n000000067F000080000006A00C00000F4C23-000000067F000080000006A014000000E1C2__000000844F1A6789-00000084A325AA01\n000000067F000080000006A00C00000F8000-000000067F000080000006A00C00000FC000__00000084A1F03030\n000000067F000080000006A00C00000FC000-000000067F000080000006A00C0000100000__00000084A1F03030\n000000067F000080000006A00C00000FE10A-000000067F000080000006A00C000010782C__00000082B573F579-00000083D5901FD9\n000000067F000080000006A00C0000100000-000000067F000080000006A00C0000104000__00000084A1F03030\n000000067F000080000006A00C0000104000-000000067F000080000006A00C0000108000__00000084A1F03030\n000000067F000080000006A00C000010782C-000000067F000080000006A00C0000110F88__00000082B573F579-00000083D5901FD9\n000000067F000080000006A00C0000108000-000000067F000080000006A00C000010C000__00000084A1F03030\n000000067F000080000006A00C000010C000-000000067F000080000006A00C0000110000__00000084A1F03030\n000000067F000080000006A00C0000110000-000000067F000080000006A0120100000000__00000084A1F03030\n000000067F000080000006A00C0000110F88-010000000000000001000000030000000014__00000082B573F579-00000083D5901FD9\n000000067F000080000006A0140000000000-000000067F000080000006A0140000004000__00000084A1F03030\n000000067F000080000006A0140000001CBC-000000067F000080000006A01400000088E1__00000083D5901FD9-000000844F1A6789\n000000067F000080000006A0140000004000-000000067F000080000006A0140000008000__00000084A1F03030\n000000067F000080000006A0140000008000-000000067F000080000006A014000000C000__00000084A1F03030\n000000067F000080000006A01400000088E1-000000067F000080000006A014000000F459__00000083D5901FD9-000000844F1A6789\n000000067F000080000006A014000000C000-000000067F000080000006A0140000010000__00000084A1F03030\n000000067F000080000006A014000000E1C2-000000067F000080000006A014000002682C__000000844F1A6789-00000084A325AA01\n000000067F000080000006A014000000F459-000000067F000080000006A0140000016068__00000083D5901FD9-000000844F1A6789\n000000067F000080000006A0140000010000-000000067F000080000006A0140000014000__00000084A1F03030\n000000067F000080000006A0140000014000-000000067F000080000006A0140000018000__00000084A1F03030\n000000067F000080000006A0140000016068-000000067F000080000006A014000001CC14__00000083D5901FD9-000000844F1A6789\n000000067F000080000006A0140000018000-000000067F000080000006A014000001C000__00000084A1F03030\n000000067F000080000006A014000001C000-000000067F000080000006A0140000020000__00000084A1F03030\n000000067F000080000006A014000001CC14-000000067F000080000006A014000002384E__00000083D5901FD9-000000844F1A6789\n000000067F000080000006A0140000020000-000000067F000080000006A0140000024000__00000084A1F03030\n000000067F000080000006A014000002384E-000000067F000080000006A014000002A467__00000083D5901FD9-000000844F1A6789\n000000067F000080000006A0140000024000-000000067F000080000006A0140000028000__00000084A1F03030\n000000067F000080000006A0140000026831-030000000000000000000000000000000002__000000844F1A6789-00000084A325AA01\n000000067F000080000006A0140000028000-000000067F000080000006A014000002C000__00000084A1F03030\n000000067F000080000006A014000002A467-030000000000000000000000000000000002__00000083D5901FD9-000000844F1A6789\n000000067F000080000006A014000002C000-030000000000000000000000000000000002__00000084A1F03030\n000000067F000080000006C00C0000000000-000000067F000080000006C00C0000004000__00000086746BDFE0\n000000067F000080000006C00C0000004000-000000067F000080000006C00C0000008000__00000086746BDFE0\n000000067F000080000006C00C0000008000-000000067F000080000006C00C000000C000__00000086746BDFE0\n000000067F000080000006C00C00000090F5-000000067F000080000006C00C000001280C__00000084A325AA01-00000085239DFB81\n000000067F000080000006C00C000000C000-000000067F000080000006C00C0000010000__00000086746BDFE0\n000000067F000080000006C00C0000010000-000000067F000080000006C00C0000014000__00000086746BDFE0\n000000067F000080000006C00C000001280C-000000067F000080000006C00C000001BF72__00000084A325AA01-00000085239DFB81\n000000067F000080000006C00C0000014000-000000067F000080000006C00C0000018000__00000086746BDFE0\n000000067F000080000006C00C0000018000-000000067F000080000006C00C000001C000__00000086746BDFE0\n000000067F000080000006C00C000001BF72-000000067F000080000006C00C00000256D8__00000084A325AA01-00000085239DFB81\n000000067F000080000006C00C000001C000-000000067F000080000006C00C0000020000__00000086746BDFE0\n000000067F000080000006C00C0000020000-000000067F000080000006C00C0000024000__00000086746BDFE0\n000000067F000080000006C00C0000024000-000000067F000080000006C00C0000028000__00000086746BDFE0\n000000067F000080000006C00C00000256D8-000000067F000080000006C00C000002EE0B__00000084A325AA01-00000085239DFB81\n000000067F000080000006C00C0000028000-000000067F000080000006C00C000002C000__00000086746BDFE0\n000000067F000080000006C00C000002C000-000000067F000080000006C00C0000030000__00000086746BDFE0\n000000067F000080000006C00C000002EE0B-000000067F000080000006C00C0000038521__00000084A325AA01-00000085239DFB81\n000000067F000080000006C00C0000030000-000000067F000080000006C00C0000034000__00000086746BDFE0\n000000067F000080000006C00C0000034000-000000067F000080000006C00C0000038000__00000086746BDFE0\n000000067F000080000006C00C0000038000-000000067F000080000006C00C000003C000__00000086746BDFE0\n000000067F000080000006C00C0000038521-000000067F000080000006C00C0000041C87__00000084A325AA01-00000085239DFB81\n000000067F000080000006C00C000003C000-000000067F000080000006C00C0000040000__00000086746BDFE0\n000000067F000080000006C00C0000040000-000000067F000080000006C00C0000044000__00000086746BDFE0\n000000067F000080000006C00C0000041C87-000000067F000080000006C00C000004B3ED__00000084A325AA01-00000085239DFB81\n000000067F000080000006C00C0000044000-000000067F000080000006C00C0000048000__00000086746BDFE0\n000000067F000080000006C00C0000048000-000000067F000080000006C00C000004C000__00000086720CFFF0\n000000067F000080000006C00C0000048000-000000067F000080000006C00C000004C000__000000873B520940\n000000067F000080000006C00C000004B3ED-030000000000000000000000000000000002__00000084A325AA01-00000085239DFB81\n000000067F000080000006C00C000004BAC4-000000067F000080000006C00C00000551F9__00000085239DFB81-00000085D35BF439\n000000067F000080000006C00C000004C000-000000067F000080000006C00C0000050000__00000086720CFFF0\n000000067F000080000006C00C000004C000-000000067F000080000006C00C0000050000__000000873B520940\n000000067F000080000006C00C0000050000-000000067F000080000006C00C0000054000__00000086720CFFF0\n000000067F000080000006C00C0000050000-000000067F000080000006C00C0000054000__000000873B520940\n000000067F000080000006C00C0000054000-000000067F000080000006C00C0000058000__00000086720CFFF0\n000000067F000080000006C00C0000054000-000000067F000080000006C00C0000058000__000000873B520940\n000000067F000080000006C00C00000551F9-000000067F000080000006C00C000005E90C__00000085239DFB81-00000085D35BF439\n000000067F000080000006C00C0000055EB3-000000067F000080000006C00C00000AB316__00000086ED29E361-000000873C9A2551\n000000067F000080000006C00C0000058000-000000067F000080000006C00C000005C000__00000086720CFFF0\n000000067F000080000006C00C0000058000-000000067F000080000006C00C000005C000__000000873B520940\n000000067F000080000006C00C000005C000-000000067F000080000006C00C0000060000__00000086720CFFF0\n000000067F000080000006C00C000005C000-000000067F000080000006C00C0000060000__000000873B520940\n000000067F000080000006C00C000005E90C-000000067F000080000006C00C000006802C__00000085239DFB81-00000085D35BF439\n000000067F000080000006C00C0000060000-000000067F000080000006C00C0000064000__00000086720CFFF0\n000000067F000080000006C00C0000060000-000000067F000080000006C00C0000064000__000000873B520940\n000000067F000080000006C00C0000064000-000000067F000080000006C00C0000068000__00000086720CFFF0\n000000067F000080000006C00C0000064000-000000067F000080000006C00C0000068000__000000873B520940\n000000067F000080000006C00C0000068000-000000067F000080000006C00C000006C000__00000086720CFFF0\n000000067F000080000006C00C0000068000-000000067F000080000006C00C000006C000__000000873B520940\n000000067F000080000006C00C000006802C-000000067F000080000006C00C0000071783__00000085239DFB81-00000085D35BF439\n000000067F000080000006C00C000006C000-000000067F000080000006C00C0000070000__00000086720CFFF0\n000000067F000080000006C00C000006C000-000000067F000080000006C00C0000070000__000000873B520940\n000000067F000080000006C00C0000070000-000000067F000080000006C00C0000074000__00000086720CFFF0\n000000067F000080000006C00C0000070000-000000067F000080000006C00C0000074000__000000873B520940\n000000067F000080000006C00C0000071783-000000067F000080000006C00C000007AEE9__00000085239DFB81-00000085D35BF439\n000000067F000080000006C00C0000074000-000000067F000080000006C00C0000078000__00000086720CFFF0\n000000067F000080000006C00C0000074000-000000067F000080000006C00C0000078000__000000873B520940\n000000067F000080000006C00C0000078000-000000067F000080000006C00C000007C000__00000086720CFFF0\n000000067F000080000006C00C0000078000-000000067F000080000006C00C000007C000__000000873B520940\n000000067F000080000006C00C000007AEE9-000000067F000080000006C00C000008460B__00000085239DFB81-00000085D35BF439\n000000067F000080000006C00C000007C000-000000067F000080000006C00C0000080000__00000086720CFFF0\n000000067F000080000006C00C000007C000-000000067F000080000006C00C0000080000__000000873B520940\n000000067F000080000006C00C0000080000-000000067F000080000006C00C0000084000__00000086720CFFF0\n000000067F000080000006C00C0000080000-000000067F000080000006C00C0000084000__000000873B520940\n000000067F000080000006C00C0000084000-000000067F000080000006C00C0000088000__00000086720CFFF0\n000000067F000080000006C00C0000084000-000000067F000080000006C00C0000088000__000000873B520940\n000000067F000080000006C00C000008460B-000000067F000080000006C00C000008DD71__00000085239DFB81-00000085D35BF439\n000000067F000080000006C00C0000088000-000000067F000080000006C00C000008C000__00000086720CFFF0\n000000067F000080000006C00C0000088000-000000067F000080000006C00C000008C000__000000873B520940\n000000067F000080000006C00C000008C000-000000067F000080000006C00C0000090000__00000086720CFFF0\n000000067F000080000006C00C000008C000-000000067F000080000006C00C0000090000__000000873B520940\n000000067F000080000006C00C000008DD71-000000067F000080000006C00C00000974D7__00000085239DFB81-00000085D35BF439\n000000067F000080000006C00C0000090000-000000067F000080000006C00C0000094000__00000086720CFFF0\n000000067F000080000006C00C0000090000-000000067F000080000006C00C0000094000__000000873B520940\n000000067F000080000006C00C0000094000-000000067F000080000006C00C0000098000__00000086720CFFF0\n000000067F000080000006C00C0000094000-000000067F000080000006C00C0000098000__000000873B520940\n000000067F000080000006C00C00000974D7-000000067F000080000006C00C00000A0C0B__00000085239DFB81-00000085D35BF439\n000000067F000080000006C00C0000098000-000000067F000080000006C00C000009C000__00000086720CFFF0\n000000067F000080000006C00C0000098000-000000067F000080000006C00C000009C000__000000873B520940\n000000067F000080000006C00C000009C000-000000067F000080000006C00C00000A0000__00000086720CFFF0\n000000067F000080000006C00C000009C000-000000067F000080000006C00C00000A0000__000000873B520940\n000000067F000080000006C00C00000A0000-000000067F000080000006C00C00000A4000__00000086720CFFF0\n000000067F000080000006C00C00000A0000-000000067F000080000006C00C00000A4000__000000873B520940\n000000067F000080000006C00C00000A0C0B-000000067F000080000006C00C00000AA371__00000085239DFB81-00000085D35BF439\n000000067F000080000006C00C00000A4000-000000067F000080000006C00C00000A8000__00000086720CFFF0\n000000067F000080000006C00C00000A4000-000000067F000080000006C00C00000A8000__000000873B520940\n000000067F000080000006C00C00000A8000-000000067F000080000006C00C00000AC000__00000086720CFFF0\n000000067F000080000006C00C00000A8000-000000067F000080000006C00C00000AC000__000000873B520940\n000000067F000080000006C00C00000AA371-000000067F000080000006C00C00000B3AD7__00000085239DFB81-00000085D35BF439\n000000067F000080000006C00C00000AB316-000000067F000080000006C00C00001015F1__00000086ED29E361-000000873C9A2551\n000000067F000080000006C00C00000AC000-000000067F000080000006C00C00000B0000__00000086720CFFF0\n000000067F000080000006C00C00000AC000-000000067F000080000006C00C00000B0000__000000873B520940\n000000067F000080000006C00C00000B0000-000000067F000080000006C00C00000B4000__00000086720CFFF0\n000000067F000080000006C00C00000B0000-000000067F000080000006C00C00000B4000__000000873B520940\n000000067F000080000006C00C00000B3AD7-000000067F000080000006C00C0100000000__00000085239DFB81-00000085D35BF439\n000000067F000080000006C00C00000B4000-000000067F000080000006C00C00000B8000__00000086720CFFF0\n000000067F000080000006C00C00000B4000-000000067F000080000006C00C00000B8000__000000873B520940\n000000067F000080000006C00C00000B8000-000000067F000080000006C00C00000BC000__00000086720CFFF0\n000000067F000080000006C00C00000B8000-000000067F000080000006C00C00000BC000__000000873B520940\n000000067F000080000006C00C00000BC000-000000067F000080000006C00C00000C0000__00000086720CFFF0\n000000067F000080000006C00C00000BC000-000000067F000080000006C00C00000C0000__000000873B520940\n000000067F000080000006C00C00000BC102-000000067F000080000006C00C00000C580D__00000085D35BF439-0000008673817FC9\n000000067F000080000006C00C00000BFB6E-000000067F000080000006C01400000016BC__0000008673817FC9-00000086ED29E361\n000000067F000080000006C00C00000C0000-000000067F000080000006C00C00000C4000__00000086720CFFF0\n000000067F000080000006C00C00000C0000-000000067F000080000006C00C00000C4000__000000873B520940\n000000067F000080000006C00C00000C4000-000000067F000080000006C00C00000C8000__00000086720CFFF0\n000000067F000080000006C00C00000C4000-000000067F000080000006C00C00000C8000__000000873B520940\n000000067F000080000006C00C00000C580D-000000067F000080000006C00C00000CEF73__00000085D35BF439-0000008673817FC9\n000000067F000080000006C00C00000C8000-000000067F000080000006C00C00000CC000__00000086720CFFF0\n000000067F000080000006C00C00000C8000-000000067F000080000006C00C00000CC000__000000873B520940\n000000067F000080000006C00C00000CC000-000000067F000080000006C00C00000D0000__00000086720CFFF0\n000000067F000080000006C00C00000CC000-000000067F000080000006C00C00000D0000__000000873B520940\n000000067F000080000006C00C00000CEF73-000000067F000080000006C00C00000D86D9__00000085D35BF439-0000008673817FC9\n000000067F000080000006C00C00000D0000-000000067F000080000006C00C00000D4000__00000086720CFFF0\n000000067F000080000006C00C00000D0000-000000067F000080000006C00C00000D4000__000000873B520940\n000000067F000080000006C00C00000D4000-000000067F000080000006C00C00000D8000__00000086720CFFF0\n000000067F000080000006C00C00000D4000-000000067F000080000006C00C00000D8000__000000873B520940\n000000067F000080000006C00C00000D8000-000000067F000080000006C00C00000DC000__00000086720CFFF0\n000000067F000080000006C00C00000D8000-000000067F000080000006C00C00000DC000__000000873B520940\n000000067F000080000006C00C00000D86D9-000000067F000080000006C00C00000E1E0C__00000085D35BF439-0000008673817FC9\n000000067F000080000006C00C00000DC000-000000067F000080000006C00C00000E0000__00000086720CFFF0\n000000067F000080000006C00C00000DC000-000000067F000080000006C00C00000E0000__000000873B520940\n000000067F000080000006C00C00000E0000-000000067F000080000006C00C00000E4000__00000086720CFFF0\n000000067F000080000006C00C00000E0000-000000067F000080000006C00C00000E4000__000000873B520940\n000000067F000080000006C00C00000E1E0C-000000067F000080000006C00C00000EB572__00000085D35BF439-0000008673817FC9\n000000067F000080000006C00C00000E4000-000000067F000080000006C00C00000E8000__00000086720CFFF0\n000000067F000080000006C00C00000E4000-000000067F000080000006C00C00000E8000__000000873B520940\n000000067F000080000006C00C00000E8000-000000067F000080000006C00C00000EC000__00000086720CFFF0\n000000067F000080000006C00C00000E8000-000000067F000080000006C00C00000EC000__000000873B520940\n000000067F000080000006C00C00000EB572-000000067F000080000006C00C00000F4CD8__00000085D35BF439-0000008673817FC9\n000000067F000080000006C00C00000EC000-000000067F000080000006C00C00000F0000__00000086720CFFF0\n000000067F000080000006C00C00000EC000-000000067F000080000006C00C00000F0000__000000873B520940\n000000067F000080000006C00C00000F0000-000000067F000080000006C00C00000F4000__00000086720CFFF0\n000000067F000080000006C00C00000F0000-000000067F000080000006C00C00000F4000__000000873B520940\n000000067F000080000006C00C00000F4000-000000067F000080000006C00C00000F8000__00000086720CFFF0\n000000067F000080000006C00C00000F4000-000000067F000080000006C00C00000F8000__000000873B520940\n000000067F000080000006C00C00000F4CD8-000000067F000080000006C00C00000FE40B__00000085D35BF439-0000008673817FC9\n000000067F000080000006C00C00000F8000-000000067F000080000006C00C00000FC000__00000086720CFFF0\n000000067F000080000006C00C00000F8000-000000067F000080000006C00C00000FC000__000000873B520940\n000000067F000080000006C00C00000FC000-000000067F000080000006C00C0000100000__00000086720CFFF0\n000000067F000080000006C00C00000FC000-000000067F000080000006C00C0000100000__000000873B520940\n000000067F000080000006C00C00000FE40B-000000067F000080000006C00C0000107B27__00000085D35BF439-0000008673817FC9\n000000067F000080000006C00C0000100000-000000067F000080000006C00C0000104000__00000086720CFFF0\n000000067F000080000006C00C0000100000-000000067F000080000006C00C0000104000__000000873B520940\n000000067F000080000006C00C00001015F3-000000067F000080000006C0140000013635__00000086ED29E361-000000873C9A2551\n000000067F000080000006C00C0000104000-000000067F000080000006C00C0000108000__00000086720CFFF0\n000000067F000080000006C00C0000104000-000000067F000080000006C00C0000108000__000000873B520940\n000000067F000080000006C00C0000107B27-000000067F000080000006C00C000011128D__00000085D35BF439-0000008673817FC9\n000000067F000080000006C00C0000108000-000000067F000080000006C00C000010C000__00000086720CFFF0\n000000067F000080000006C00C0000108000-000000067F000080000006C00C000010C000__000000873B520940\n000000067F000080000006C00C000010C000-000000067F000080000006C00C0000110000__00000086720CFFF0\n000000067F000080000006C00C000010C000-000000067F000080000006C00C0000110000__000000873B520940\n000000067F000080000006C00C0000110000-000000067F000080000006C0120100000000__000000873B520940\n000000067F000080000006C00C0000110000-030000000000000000000000000000000002__00000086720CFFF0\n000000067F000080000006C00C000011128D-010000000000000001000000030000000017__00000085D35BF439-0000008673817FC9\n000000067F000080000006C0140000000000-000000067F000080000006C0140000004000__000000873B520940\n000000067F000080000006C01400000016BC-000000067F000080000006C014000000830F__0000008673817FC9-00000086ED29E361\n000000067F000080000006C0140000004000-000000067F000080000006C0140000008000__000000873B520940\n000000067F000080000006C0140000008000-000000067F000080000006C014000000C000__000000873B520940\n000000067F000080000006C014000000830F-000000067F000080000006C014000000EF5B__0000008673817FC9-00000086ED29E361\n000000067F000080000006C014000000C000-000000067F000080000006C0140000010000__000000873B520940\n000000067F000080000006C014000000EF5B-000000067F000080000006C0140000015BA7__0000008673817FC9-00000086ED29E361\n000000067F000080000006C0140000010000-000000067F000080000006C0140000014000__000000873B520940\n000000067F000080000006C0140000013636-000000067F000080000006C014000002DB5F__00000086ED29E361-000000873C9A2551\n000000067F000080000006C0140000014000-000000067F000080000006C0140000018000__000000873B520940\n000000067F000080000006C0140000015BA7-000000067F000080000006C014000001C7F0__0000008673817FC9-00000086ED29E361\n000000067F000080000006C0140000018000-000000067F000080000006C014000001C000__000000873B520940\n000000067F000080000006C014000001C000-000000067F000080000006C0140000020000__000000873B520940\n000000067F000080000006C014000001C7F0-000000067F000080000006C0140000023430__0000008673817FC9-00000086ED29E361\n000000067F000080000006C0140000020000-000000067F000080000006C0140000024000__000000873B520940\n000000067F000080000006C0140000023430-000000067F000080000006C014000002A049__0000008673817FC9-00000086ED29E361\n000000067F000080000006C0140000024000-000000067F000080000006C0140000028000__000000873B520940\n000000067F000080000006C0140000028000-000000067F000080000006C014000002C000__000000873B520940\n000000067F000080000006C014000002A049-030000000000000000000000000000000002__0000008673817FC9-00000086ED29E361\n000000067F000080000006C014000002C000-030000000000000000000000000000000002__000000873B520940\n000000067F000080000006C014000002DB60-030000000000000000000000000000000002__00000086ED29E361-000000873C9A2551\n000000067F000080000006E00C0000000000-000000067F000080000006E00C0000004000__000000890CF51FE0\n000000067F000080000006E00C0000004000-000000067F000080000006E00C0000008000__000000890CF51FE0\n000000067F000080000006E00C0000008000-000000067F000080000006E00C000000C000__000000890CF51FE0\n000000067F000080000006E00C00000096C8-000000067F000080000006E00C0000012E0A__000000873C9A2551-00000087BC75E5B1\n000000067F000080000006E00C000000C000-000000067F000080000006E00C0000010000__000000890CF51FE0\n000000067F000080000006E00C0000010000-000000067F000080000006E00C0000014000__000000890CF51FE0\n000000067F000080000006E00C0000012E0A-000000067F000080000006E00C000001C570__000000873C9A2551-00000087BC75E5B1\n000000067F000080000006E00C0000014000-000000067F000080000006E00C0000018000__000000890CF51FE0\n000000067F000080000006E00C0000018000-000000067F000080000006E00C000001C000__000000890CF51FE0\n000000067F000080000006E00C000001C000-000000067F000080000006E00C0000020000__000000890CF51FE0\n000000067F000080000006E00C000001C570-000000067F000080000006E00C0000025CD6__000000873C9A2551-00000087BC75E5B1\n000000067F000080000006E00C0000020000-000000067F000080000006E00C0000024000__000000890CF51FE0\n000000067F000080000006E00C0000024000-000000067F000080000006E00C0000028000__000000890CF51FE0\n000000067F000080000006E00C0000025CD6-000000067F000080000006E00C000002F40A__000000873C9A2551-00000087BC75E5B1\n000000067F000080000006E00C0000028000-000000067F000080000006E00C000002C000__000000890CF51FE0\n000000067F000080000006E00C000002C000-000000067F000080000006E00C0000030000__000000890CF51FE0\n000000067F000080000006E00C000002F40A-000000067F000080000006E00C0000038B1D__000000873C9A2551-00000087BC75E5B1\n000000067F000080000006E00C0000030000-000000067F000080000006E00C0000034000__000000890CF51FE0\n000000067F000080000006E00C0000034000-000000067F000080000006E00C0000038000__000000890CF51FE0\n000000067F000080000006E00C0000038000-000000067F000080000006E00C000003C000__000000890CF51FE0\n000000067F000080000006E00C0000038B1D-000000067F000080000006E00C0000042283__000000873C9A2551-00000087BC75E5B1\n000000067F000080000006E00C000003C000-000000067F000080000006E00C0000040000__000000890CF51FE0\n000000067F000080000006E00C0000040000-000000067F000080000006E00C0000044000__000000890CF51FE0\n000000067F000080000006E00C0000042283-000000067F000080000006E00C000004B9E9__000000873C9A2551-00000087BC75E5B1\n000000067F000080000006E00C0000044000-000000067F000080000006E00C0000048000__000000890CF51FE0\n000000067F000080000006E00C0000048000-000000067F000080000006E00C000004C000__000000890AE2DFC8\n000000067F000080000006E00C0000048000-000000067F000080000006E00C000004C000__00000089D5AEF6E8\n000000067F000080000006E00C000004B9E9-030000000000000000000000000000000002__000000873C9A2551-00000087BC75E5B1\n000000067F000080000006E00C000004BACB-000000067F000080000006E00C0000055200__00000087BC75E5B1-000000887C2DFE59\n000000067F000080000006E00C000004C000-000000067F000080000006E00C0000050000__000000890AE2DFC8\n000000067F000080000006E00C000004C000-000000067F000080000006E00C0000050000__00000089D5AEF6E8\n000000067F000080000006E00C0000050000-000000067F000080000006E00C0000054000__000000890AE2DFC8\n000000067F000080000006E00C0000050000-000000067F000080000006E00C0000054000__00000089D5AEF6E8\n000000067F000080000006E00C0000054000-000000067F000080000006E00C0000058000__000000890AE2DFC8\n000000067F000080000006E00C0000054000-000000067F000080000006E00C0000058000__00000089D5AEF6E8\n000000067F000080000006E00C0000054246-000000067F000080000006E00C00000A83ED__0000008985FD3611-00000089D6B8EE99\n000000067F000080000006E00C0000055200-000000067F000080000006E00C000005E90B__00000087BC75E5B1-000000887C2DFE59\n000000067F000080000006E00C0000058000-000000067F000080000006E00C000005C000__000000890AE2DFC8\n000000067F000080000006E00C0000058000-000000067F000080000006E00C000005C000__00000089D5AEF6E8\n000000067F000080000006E00C000005C000-000000067F000080000006E00C0000060000__000000890AE2DFC8\n000000067F000080000006E00C000005C000-000000067F000080000006E00C0000060000__00000089D5AEF6E8\n000000067F000080000006E00C000005E90B-000000067F000080000006E00C000006802B__00000087BC75E5B1-000000887C2DFE59\n000000067F000080000006E00C0000060000-000000067F000080000006E00C0000064000__000000890AE2DFC8\n000000067F000080000006E00C0000060000-000000067F000080000006E00C0000064000__00000089D5AEF6E8\n000000067F000080000006E00C0000064000-000000067F000080000006E00C0000068000__000000890AE2DFC8\n000000067F000080000006E00C0000064000-000000067F000080000006E00C0000068000__00000089D5AEF6E8\n000000067F000080000006E00C0000068000-000000067F000080000006E00C000006C000__000000890AE2DFC8\n000000067F000080000006E00C0000068000-000000067F000080000006E00C000006C000__00000089D5AEF6E8\n000000067F000080000006E00C000006802B-000000067F000080000006E00C0000071782__00000087BC75E5B1-000000887C2DFE59\n000000067F000080000006E00C000006C000-000000067F000080000006E00C0000070000__000000890AE2DFC8\n000000067F000080000006E00C000006C000-000000067F000080000006E00C0000070000__00000089D5AEF6E8\n000000067F000080000006E00C0000070000-000000067F000080000006E00C0000074000__000000890AE2DFC8\n000000067F000080000006E00C0000070000-000000067F000080000006E00C0000074000__00000089D5AEF6E8\n000000067F000080000006E00C0000071782-000000067F000080000006E00C000007AEE8__00000087BC75E5B1-000000887C2DFE59\n000000067F000080000006E00C0000074000-000000067F000080000006E00C0000078000__000000890AE2DFC8\n000000067F000080000006E00C0000074000-000000067F000080000006E00C0000078000__00000089D5AEF6E8\n000000067F000080000006E00C0000078000-000000067F000080000006E00C000007C000__000000890AE2DFC8\n000000067F000080000006E00C0000078000-000000067F000080000006E00C000007C000__00000089D5AEF6E8\n000000067F000080000006E00C000007AEE8-000000067F000080000006E00C000008460B__00000087BC75E5B1-000000887C2DFE59\n000000067F000080000006E00C000007C000-000000067F000080000006E00C0000080000__000000890AE2DFC8\n000000067F000080000006E00C000007C000-000000067F000080000006E00C0000080000__00000089D5AEF6E8\n000000067F000080000006E00C0000080000-000000067F000080000006E00C0000084000__000000890AE2DFC8\n000000067F000080000006E00C0000080000-000000067F000080000006E00C0000084000__00000089D5AEF6E8\n000000067F000080000006E00C0000084000-000000067F000080000006E00C0000088000__000000890AE2DFC8\n000000067F000080000006E00C0000084000-000000067F000080000006E00C0000088000__00000089D5AEF6E8\n000000067F000080000006E00C000008460B-000000067F000080000006E00C000008DD71__00000087BC75E5B1-000000887C2DFE59\n000000067F000080000006E00C0000088000-000000067F000080000006E00C000008C000__000000890AE2DFC8\n000000067F000080000006E00C0000088000-000000067F000080000006E00C000008C000__00000089D5AEF6E8\n000000067F000080000006E00C000008C000-000000067F000080000006E00C0000090000__000000890AE2DFC8\n000000067F000080000006E00C000008C000-000000067F000080000006E00C0000090000__00000089D5AEF6E8\n000000067F000080000006E00C000008DD71-000000067F000080000006E00C00000974D7__00000087BC75E5B1-000000887C2DFE59\n000000067F000080000006E00C0000090000-000000067F000080000006E00C0000094000__000000890AE2DFC8\n000000067F000080000006E00C0000090000-000000067F000080000006E00C0000094000__00000089D5AEF6E8\n000000067F000080000006E00C0000094000-000000067F000080000006E00C0000098000__000000890AE2DFC8\n000000067F000080000006E00C0000094000-000000067F000080000006E00C0000098000__00000089D5AEF6E8\n000000067F000080000006E00C00000974D7-000000067F000080000006E00C00000A0C0B__00000087BC75E5B1-000000887C2DFE59\n000000067F000080000006E00C0000098000-000000067F000080000006E00C000009C000__000000890AE2DFC8\n000000067F000080000006E00C0000098000-000000067F000080000006E00C000009C000__00000089D5AEF6E8\n000000067F000080000006E00C000009C000-000000067F000080000006E00C00000A0000__000000890AE2DFC8\n000000067F000080000006E00C000009C000-000000067F000080000006E00C00000A0000__00000089D5AEF6E8\n000000067F000080000006E00C00000A0000-000000067F000080000006E00C00000A4000__000000890AE2DFC8\n000000067F000080000006E00C00000A0000-000000067F000080000006E00C00000A4000__00000089D5AEF6E8\n000000067F000080000006E00C00000A0C0B-000000067F000080000006E00C00000AA371__00000087BC75E5B1-000000887C2DFE59\n000000067F000080000006E00C00000A4000-000000067F000080000006E00C00000A8000__000000890AE2DFC8\n000000067F000080000006E00C00000A4000-000000067F000080000006E00C00000A8000__00000089D5AEF6E8\n000000067F000080000006E00C00000A8000-000000067F000080000006E00C00000AC000__000000890AE2DFC8\n000000067F000080000006E00C00000A8000-000000067F000080000006E00C00000AC000__00000089D5AEF6E8\n000000067F000080000006E00C00000A8407-000000067F000080000006E00C00000FD787__0000008985FD3611-00000089D6B8EE99\n000000067F000080000006E00C00000AA371-000000067F000080000006E00C00000B3AD7__00000087BC75E5B1-000000887C2DFE59\n000000067F000080000006E00C00000AC000-000000067F000080000006E00C00000B0000__000000890AE2DFC8\n000000067F000080000006E00C00000AC000-000000067F000080000006E00C00000B0000__00000089D5AEF6E8\n000000067F000080000006E00C00000B0000-000000067F000080000006E00C00000B4000__000000890AE2DFC8\n000000067F000080000006E00C00000B0000-000000067F000080000006E00C00000B4000__00000089D5AEF6E8\n000000067F000080000006E00C00000B3AD7-000000067F000080000006E00C00000BD20B__00000087BC75E5B1-000000887C2DFE59\n000000067F000080000006E00C00000B4000-000000067F000080000006E00C00000B8000__000000890AE2DFC8\n000000067F000080000006E00C00000B4000-000000067F000080000006E00C00000B8000__00000089D5AEF6E8\n000000067F000080000006E00C00000B6F42-000000067F000080000006E0140000000EEF__000000890C5B6001-0000008985FD3611\n000000067F000080000006E00C00000B8000-000000067F000080000006E00C00000BC000__000000890AE2DFC8\n000000067F000080000006E00C00000B8000-000000067F000080000006E00C00000BC000__00000089D5AEF6E8\n000000067F000080000006E00C00000BC000-000000067F000080000006E00C00000C0000__000000890AE2DFC8\n000000067F000080000006E00C00000BC000-000000067F000080000006E00C00000C0000__00000089D5AEF6E8\n000000067F000080000006E00C00000BD20B-000000067F000080000006E00C0100000000__00000087BC75E5B1-000000887C2DFE59\n000000067F000080000006E00C00000C0000-000000067F000080000006E00C00000C4000__000000890AE2DFC8\n000000067F000080000006E00C00000C0000-000000067F000080000006E00C00000C4000__00000089D5AEF6E8\n000000067F000080000006E00C00000C4000-000000067F000080000006E00C00000C8000__000000890AE2DFC8\n000000067F000080000006E00C00000C4000-000000067F000080000006E00C00000C8000__00000089D5AEF6E8\n000000067F000080000006E00C00000C5883-000000067F000080000006E00C00000CEFE9__000000887C2DFE59-000000890C5B6001\n000000067F000080000006E00C00000C8000-000000067F000080000006E00C00000CC000__000000890AE2DFC8\n000000067F000080000006E00C00000C8000-000000067F000080000006E00C00000CC000__00000089D5AEF6E8\n000000067F000080000006E00C00000CC000-000000067F000080000006E00C00000D0000__000000890AE2DFC8\n000000067F000080000006E00C00000CC000-000000067F000080000006E00C00000D0000__00000089D5AEF6E8\n000000067F000080000006E00C00000CEFE9-000000067F000080000006E00C00000D872B__000000887C2DFE59-000000890C5B6001\n000000067F000080000006E00C00000D0000-000000067F000080000006E00C00000D4000__000000890AE2DFC8\n000000067F000080000006E00C00000D0000-000000067F000080000006E00C00000D4000__00000089D5AEF6E8\n000000067F000080000006E00C00000D4000-000000067F000080000006E00C00000D8000__000000890AE2DFC8\n000000067F000080000006E00C00000D4000-000000067F000080000006E00C00000D8000__00000089D5AEF6E8\n000000067F000080000006E00C00000D8000-000000067F000080000006E00C00000DC000__000000890AE2DFC8\n000000067F000080000006E00C00000D8000-000000067F000080000006E00C00000DC000__00000089D5AEF6E8\n000000067F000080000006E00C00000D872B-000000067F000080000006E00C00000E1E91__000000887C2DFE59-000000890C5B6001\n000000067F000080000006E00C00000DC000-000000067F000080000006E00C00000E0000__000000890AE2DFC8\n000000067F000080000006E00C00000DC000-000000067F000080000006E00C00000E0000__00000089D5AEF6E8\n000000067F000080000006E00C00000E0000-000000067F000080000006E00C00000E4000__000000890AE2DFC8\n000000067F000080000006E00C00000E0000-000000067F000080000006E00C00000E4000__00000089D5AEF6E8\n000000067F000080000006E00C00000E1E91-000000067F000080000006E00C00000EB5F7__000000887C2DFE59-000000890C5B6001\n000000067F000080000006E00C00000E4000-000000067F000080000006E00C00000E8000__000000890AE2DFC8\n000000067F000080000006E00C00000E4000-000000067F000080000006E00C00000E8000__00000089D5AEF6E8\n000000067F000080000006E00C00000E8000-000000067F000080000006E00C00000EC000__000000890AE2DFC8\n000000067F000080000006E00C00000E8000-000000067F000080000006E00C00000EC000__00000089D5AEF6E8\n000000067F000080000006E00C00000EB5F7-000000067F000080000006E00C00000F4D0C__000000887C2DFE59-000000890C5B6001\n000000067F000080000006E00C00000EC000-000000067F000080000006E00C00000F0000__000000890AE2DFC8\n000000067F000080000006E00C00000EC000-000000067F000080000006E00C00000F0000__00000089D5AEF6E8\n000000067F000080000006E00C00000F0000-000000067F000080000006E00C00000F4000__000000890AE2DFC8\n000000067F000080000006E00C00000F0000-000000067F000080000006E00C00000F4000__00000089D5AEF6E8\n000000067F000080000006E00C00000F4000-000000067F000080000006E00C00000F8000__000000890AE2DFC8\n000000067F000080000006E00C00000F4000-000000067F000080000006E00C00000F8000__00000089D5AEF6E8\n000000067F000080000006E00C00000F4D0C-000000067F000080000006E00C00000FE472__000000887C2DFE59-000000890C5B6001\n000000067F000080000006E00C00000F8000-000000067F000080000006E00C00000FC000__000000890AE2DFC8\n000000067F000080000006E00C00000F8000-000000067F000080000006E00C00000FC000__00000089D5AEF6E8\n000000067F000080000006E00C00000FC000-000000067F000080000006E00C0000100000__000000890AE2DFC8\n000000067F000080000006E00C00000FC000-000000067F000080000006E00C0000100000__00000089D5AEF6E8\n000000067F000080000006E00C00000FD78D-000000067F000080000006E0140000011DB5__0000008985FD3611-00000089D6B8EE99\n000000067F000080000006E00C00000FE472-000000067F000080000006E00C0000107B8E__000000887C2DFE59-000000890C5B6001\n000000067F000080000006E00C0000100000-000000067F000080000006E00C0000104000__000000890AE2DFC8\n000000067F000080000006E00C0000100000-000000067F000080000006E00C0000104000__00000089D5AEF6E8\n000000067F000080000006E00C0000104000-000000067F000080000006E00C0000108000__000000890AE2DFC8\n000000067F000080000006E00C0000104000-000000067F000080000006E00C0000108000__00000089D5AEF6E8\n000000067F000080000006E00C0000107B8E-000000067F000080000006E00C00001112F4__000000887C2DFE59-000000890C5B6001\n000000067F000080000006E00C0000108000-000000067F000080000006E00C000010C000__000000890AE2DFC8\n000000067F000080000006E00C0000108000-000000067F000080000006E00C000010C000__00000089D5AEF6E8\n000000067F000080000006E00C000010C000-000000067F000080000006E00C0000110000__000000890AE2DFC8\n000000067F000080000006E00C000010C000-000000067F000080000006E00C0000110000__00000089D5AEF6E8\n000000067F000080000006E00C0000110000-000000067F000080000006E0120100000000__00000089D5AEF6E8\n000000067F000080000006E00C0000110000-030000000000000000000000000000000002__000000890AE2DFC8\n000000067F000080000006E00C00001112F4-01000000000000000100000003000000001A__000000887C2DFE59-000000890C5B6001\n000000067F000080000006E0140000000000-000000067F000080000006E0140000004000__00000089D5AEF6E8\n000000067F000080000006E0140000000EEF-000000067F000080000006E0140000007C4F__000000890C5B6001-0000008985FD3611\n000000067F000080000006E0140000004000-000000067F000080000006E0140000008000__00000089D5AEF6E8\n000000067F000080000006E0140000007C4F-000000067F000080000006E014000000E97E__000000890C5B6001-0000008985FD3611\n000000067F000080000006E0140000008000-000000067F000080000006E014000000C000__00000089D5AEF6E8\n000000067F000080000006E014000000C000-000000067F000080000006E0140000010000__00000089D5AEF6E8\n000000067F000080000006E014000000E97E-000000067F000080000006E01400000156DC__000000890C5B6001-0000008985FD3611\n000000067F000080000006E0140000010000-000000067F000080000006E0140000014000__00000089D5AEF6E8\n000000067F000080000006E0140000011DB5-000000067F000080000006E014000002B9CE__0000008985FD3611-00000089D6B8EE99\n000000067F000080000006E0140000014000-000000067F000080000006E0140000018000__00000089D5AEF6E8\n000000067F000080000006E01400000156DC-000000067F000080000006E014000001C468__000000890C5B6001-0000008985FD3611\n000000067F000080000006E0140000018000-000000067F000080000006E014000001C000__00000089D5AEF6E8\n000000067F000080000006E014000001C000-000000067F000080000006E0140000020000__00000089D5AEF6E8\n000000067F000080000006E014000001C468-000000067F000080000006E01400000231D5__000000890C5B6001-0000008985FD3611\n000000067F000080000006E0140000020000-000000067F000080000006E0140000024000__00000089D5AEF6E8\n000000067F000080000006E01400000231D5-000000067F000080000006E0140000029F96__000000890C5B6001-0000008985FD3611\n000000067F000080000006E0140000024000-000000067F000080000006E0140000028000__00000089D5AEF6E8\n000000067F000080000006E0140000028000-000000067F000080000006E014000002C000__00000089D5AEF6E8\n000000067F000080000006E0140000029F96-030000000000000000000000000000000002__000000890C5B6001-0000008985FD3611\n000000067F000080000006E014000002B9D0-030000000000000000000000000000000002__0000008985FD3611-00000089D6B8EE99\n000000067F000080000006E014000002C000-030000000000000000000000000000000002__00000089D5AEF6E8\n000000067F000080000007000C0000000000-000000067F000080000007000C0000004000__0000008BA730BFE8\n000000067F000080000007000C0000004000-000000067F000080000007000C0000008000__0000008BA730BFE8\n000000067F000080000007000C0000008000-000000067F000080000007000C000000C000__0000008BA730BFE8\n000000067F000080000007000C000000955C-000000067F000080000007000C0000012CC2__00000089D6B8EE99-0000008A56BBF739\n000000067F000080000007000C000000C000-000000067F000080000007000C0000010000__0000008BA730BFE8\n000000067F000080000007000C0000010000-000000067F000080000007000C0000014000__0000008BA730BFE8\n000000067F000080000007000C0000012CC2-000000067F000080000007000C000001C40A__00000089D6B8EE99-0000008A56BBF739\n000000067F000080000007000C0000014000-000000067F000080000007000C0000018000__0000008BA730BFE8\n000000067F000080000007000C0000018000-000000067F000080000007000C000001C000__0000008BA730BFE8\n000000067F000080000007000C000001C000-000000067F000080000007000C0000020000__0000008BA730BFE8\n000000067F000080000007000C000001C40A-000000067F000080000007000C0000025B39__00000089D6B8EE99-0000008A56BBF739\n000000067F000080000007000C0000020000-000000067F000080000007000C0000024000__0000008BA730BFE8\n000000067F000080000007000C0000024000-000000067F000080000007000C0000028000__0000008BA730BFE8\n000000067F000080000007000C0000025B39-000000067F000080000007000C000002F29F__00000089D6B8EE99-0000008A56BBF739\n000000067F000080000007000C0000028000-000000067F000080000007000C000002C000__0000008BA730BFE8\n000000067F000080000007000C000002C000-000000067F000080000007000C0000030000__0000008BA730BFE8\n000000067F000080000007000C000002F29F-000000067F000080000007000C00000389B3__00000089D6B8EE99-0000008A56BBF739\n000000067F000080000007000C0000030000-000000067F000080000007000C0000034000__0000008BA730BFE8\n000000067F000080000007000C0000034000-000000067F000080000007000C0000038000__0000008BA730BFE8\n000000067F000080000007000C0000038000-000000067F000080000007000C000003C000__0000008BA730BFE8\n000000067F000080000007000C00000389B3-000000067F000080000007000C0000042119__00000089D6B8EE99-0000008A56BBF739\n000000067F000080000007000C000003C000-000000067F000080000007000C0000040000__0000008BA730BFE8\n000000067F000080000007000C0000040000-000000067F000080000007000C0000044000__0000008BA730BFE8\n000000067F000080000007000C0000042119-000000067F000080000007000C000004B87F__00000089D6B8EE99-0000008A56BBF739\n000000067F000080000007000C0000044000-000000067F000080000007000C0000048000__0000008BA730BFE8\n000000067F000080000007000C0000048000-000000067F000080000007000C000004C000__0000008B9669EDB0\n000000067F000080000007000C0000048000-000000067F000080000007000C000004C000__0000008C71903720\n000000067F000080000007000C000004B87F-030000000000000000000000000000000002__00000089D6B8EE99-0000008A56BBF739\n000000067F000080000007000C000004BAD3-000000067F000080000007000C0000055207__0000008A56BBF739-0000008AF67FEC19\n000000067F000080000007000C000004C000-000000067F000080000007000C0000050000__0000008B9669EDB0\n000000067F000080000007000C000004C000-000000067F000080000007000C0000050000__0000008C71903720\n000000067F000080000007000C0000050000-000000067F000080000007000C0000054000__0000008B9669EDB0\n000000067F000080000007000C0000050000-000000067F000080000007000C0000054000__0000008C71903720\n000000067F000080000007000C0000053C23-000000067F000080000007000C00000A6F76__0000008C2045B721-0000008C72843D41\n000000067F000080000007000C0000054000-000000067F000080000007000C0000058000__0000008B9669EDB0\n000000067F000080000007000C0000054000-000000067F000080000007000C0000058000__0000008C71903720\n000000067F000080000007000C0000055207-000000067F000080000007000C000005E912__0000008A56BBF739-0000008AF67FEC19\n000000067F000080000007000C0000058000-000000067F000080000007000C000005C000__0000008B9669EDB0\n000000067F000080000007000C0000058000-000000067F000080000007000C000005C000__0000008C71903720\n000000067F000080000007000C000005C000-000000067F000080000007000C0000060000__0000008B9669EDB0\n000000067F000080000007000C000005C000-000000067F000080000007000C0000060000__0000008C71903720\n000000067F000080000007000C000005E912-000000067F000080000007000C000006802C__0000008A56BBF739-0000008AF67FEC19\n000000067F000080000007000C0000060000-000000067F000080000007000C0000064000__0000008B9669EDB0\n000000067F000080000007000C0000060000-000000067F000080000007000C0000064000__0000008C71903720\n000000067F000080000007000C0000064000-000000067F000080000007000C0000068000__0000008B9669EDB0\n000000067F000080000007000C0000064000-000000067F000080000007000C0000068000__0000008C71903720\n000000067F000080000007000C0000068000-000000067F000080000007000C000006C000__0000008B9669EDB0\n000000067F000080000007000C0000068000-000000067F000080000007000C000006C000__0000008C71903720\n000000067F000080000007000C000006802C-000000067F000080000007000C0000071783__0000008A56BBF739-0000008AF67FEC19\n000000067F000080000007000C000006C000-000000067F000080000007000C0000070000__0000008B9669EDB0\n000000067F000080000007000C000006C000-000000067F000080000007000C0000070000__0000008C71903720\n000000067F000080000007000C0000070000-000000067F000080000007000C0000074000__0000008B9669EDB0\n000000067F000080000007000C0000070000-000000067F000080000007000C0000074000__0000008C71903720\n000000067F000080000007000C0000071783-000000067F000080000007000C000007AEE9__0000008A56BBF739-0000008AF67FEC19\n000000067F000080000007000C0000074000-000000067F000080000007000C0000078000__0000008B9669EDB0\n000000067F000080000007000C0000074000-000000067F000080000007000C0000078000__0000008C71903720\n000000067F000080000007000C0000078000-000000067F000080000007000C000007C000__0000008B9669EDB0\n000000067F000080000007000C0000078000-000000067F000080000007000C000007C000__0000008C71903720\n000000067F000080000007000C000007AEE9-000000067F000080000007000C000008460B__0000008A56BBF739-0000008AF67FEC19\n000000067F000080000007000C000007C000-000000067F000080000007000C0000080000__0000008B9669EDB0\n000000067F000080000007000C000007C000-000000067F000080000007000C0000080000__0000008C71903720\n000000067F000080000007000C0000080000-000000067F000080000007000C0000084000__0000008B9669EDB0\n000000067F000080000007000C0000080000-000000067F000080000007000C0000084000__0000008C71903720\n000000067F000080000007000C0000084000-000000067F000080000007000C0000088000__0000008B9669EDB0\n000000067F000080000007000C0000084000-000000067F000080000007000C0000088000__0000008C71903720\n000000067F000080000007000C000008460B-000000067F000080000007000C000008DD71__0000008A56BBF739-0000008AF67FEC19\n000000067F000080000007000C0000088000-000000067F000080000007000C000008C000__0000008B9669EDB0\n000000067F000080000007000C0000088000-000000067F000080000007000C000008C000__0000008C71903720\n000000067F000080000007000C000008C000-000000067F000080000007000C0000090000__0000008B9669EDB0\n000000067F000080000007000C000008C000-000000067F000080000007000C0000090000__0000008C71903720\n000000067F000080000007000C000008DD71-000000067F000080000007000C00000974D7__0000008A56BBF739-0000008AF67FEC19\n000000067F000080000007000C0000090000-000000067F000080000007000C0000094000__0000008B9669EDB0\n000000067F000080000007000C0000090000-000000067F000080000007000C0000094000__0000008C71903720\n000000067F000080000007000C0000094000-000000067F000080000007000C0000098000__0000008B9669EDB0\n000000067F000080000007000C0000094000-000000067F000080000007000C0000098000__0000008C71903720\n000000067F000080000007000C00000974D7-000000067F000080000007000C00000A0C0B__0000008A56BBF739-0000008AF67FEC19\n000000067F000080000007000C0000098000-000000067F000080000007000C000009C000__0000008B9669EDB0\n000000067F000080000007000C0000098000-000000067F000080000007000C000009C000__0000008C71903720\n000000067F000080000007000C000009C000-000000067F000080000007000C00000A0000__0000008B9669EDB0\n000000067F000080000007000C000009C000-000000067F000080000007000C00000A0000__0000008C71903720\n000000067F000080000007000C00000A0000-000000067F000080000007000C00000A4000__0000008B9669EDB0\n000000067F000080000007000C00000A0000-000000067F000080000007000C00000A4000__0000008C71903720\n000000067F000080000007000C00000A0C0B-000000067F000080000007000C00000AA371__0000008A56BBF739-0000008AF67FEC19\n000000067F000080000007000C00000A4000-000000067F000080000007000C00000A8000__0000008B9669EDB0\n000000067F000080000007000C00000A4000-000000067F000080000007000C00000A8000__0000008C71903720\n000000067F000080000007000C00000A6F77-000000067F000080000007000C00000FA170__0000008C2045B721-0000008C72843D41\n000000067F000080000007000C00000A8000-000000067F000080000007000C00000AC000__0000008B9669EDB0\n000000067F000080000007000C00000A8000-000000067F000080000007000C00000AC000__0000008C71903720\n000000067F000080000007000C00000AA371-000000067F000080000007000C0100000000__0000008A56BBF739-0000008AF67FEC19\n000000067F000080000007000C00000AC000-000000067F000080000007000C00000B0000__0000008B9669EDB0\n000000067F000080000007000C00000AC000-000000067F000080000007000C00000B0000__0000008C71903720\n000000067F000080000007000C00000B0000-000000067F000080000007000C00000B4000__0000008B9669EDB0\n000000067F000080000007000C00000B0000-000000067F000080000007000C00000B4000__0000008C71903720\n000000067F000080000007000C00000B2B06-000000067F000080000007000C00000BC211__0000008AF67FEC19-0000008BA6803FC9\n000000067F000080000007000C00000B4000-000000067F000080000007000C00000B8000__0000008B9669EDB0\n000000067F000080000007000C00000B4000-000000067F000080000007000C00000B8000__0000008C71903720\n000000067F000080000007000C00000B8000-000000067F000080000007000C00000BC000__0000008B9669EDB0\n000000067F000080000007000C00000B8000-000000067F000080000007000C00000BC000__0000008C71903720\n000000067F000080000007000C00000BC000-000000067F000080000007000C00000C0000__0000008B9669EDB0\n000000067F000080000007000C00000BC000-000000067F000080000007000C00000C0000__0000008C71903720\n000000067F000080000007000C00000BC211-000000067F000080000007000C00000C5941__0000008AF67FEC19-0000008BA6803FC9\n000000067F000080000007000C00000BF157-000000067F000080000007001400000016B2__0000008BA6803FC9-0000008C2045B721\n000000067F000080000007000C00000C0000-000000067F000080000007000C00000C4000__0000008B9669EDB0\n000000067F000080000007000C00000C0000-000000067F000080000007000C00000C4000__0000008C71903720\n000000067F000080000007000C00000C4000-000000067F000080000007000C00000C8000__0000008B9669EDB0\n000000067F000080000007000C00000C4000-000000067F000080000007000C00000C8000__0000008C71903720\n000000067F000080000007000C00000C5941-000000067F000080000007000C00000CF0A7__0000008AF67FEC19-0000008BA6803FC9\n000000067F000080000007000C00000C8000-000000067F000080000007000C00000CC000__0000008B9669EDB0\n000000067F000080000007000C00000C8000-000000067F000080000007000C00000CC000__0000008C71903720\n000000067F000080000007000C00000CC000-000000067F000080000007000C00000D0000__0000008B9669EDB0\n000000067F000080000007000C00000CC000-000000067F000080000007000C00000D0000__0000008C71903720\n000000067F000080000007000C00000CF0A7-000000067F000080000007000C00000D87BC__0000008AF67FEC19-0000008BA6803FC9\n000000067F000080000007000C00000D0000-000000067F000080000007000C00000D4000__0000008B9669EDB0\n000000067F000080000007000C00000D0000-000000067F000080000007000C00000D4000__0000008C71903720\n000000067F000080000007000C00000D4000-000000067F000080000007000C00000D8000__0000008B9669EDB0\n000000067F000080000007000C00000D4000-000000067F000080000007000C00000D8000__0000008C71903720\n000000067F000080000007000C00000D8000-000000067F000080000007000C00000DC000__0000008B9669EDB0\n000000067F000080000007000C00000D8000-000000067F000080000007000C00000DC000__0000008C71903720\n000000067F000080000007000C00000D87BC-000000067F000080000007000C00000E1F0A__0000008AF67FEC19-0000008BA6803FC9\n000000067F000080000007000C00000DC000-000000067F000080000007000C00000E0000__0000008B9669EDB0\n000000067F000080000007000C00000DC000-000000067F000080000007000C00000E0000__0000008C71903720\n000000067F000080000007000C00000E0000-000000067F000080000007000C00000E4000__0000008B9669EDB0\n000000067F000080000007000C00000E0000-000000067F000080000007000C00000E4000__0000008C71903720\n000000067F000080000007000C00000E1F0A-000000067F000080000007000C00000EB670__0000008AF67FEC19-0000008BA6803FC9\n000000067F000080000007000C00000E4000-000000067F000080000007000C00000E8000__0000008B9669EDB0\n000000067F000080000007000C00000E4000-000000067F000080000007000C00000E8000__0000008C71903720\n000000067F000080000007000C00000E8000-000000067F000080000007000C00000EC000__0000008B9669EDB0\n000000067F000080000007000C00000E8000-000000067F000080000007000C00000EC000__0000008C71903720\n000000067F000080000007000C00000EB670-000000067F000080000007000C00000F4DA7__0000008AF67FEC19-0000008BA6803FC9\n000000067F000080000007000C00000EC000-000000067F000080000007000C00000F0000__0000008B9669EDB0\n000000067F000080000007000C00000EC000-000000067F000080000007000C00000F0000__0000008C71903720\n000000067F000080000007000C00000F0000-000000067F000080000007000C00000F4000__0000008B9669EDB0\n000000067F000080000007000C00000F0000-000000067F000080000007000C00000F4000__0000008C71903720\n000000067F000080000007000C00000F4000-000000067F000080000007000C00000F8000__0000008B9669EDB0\n000000067F000080000007000C00000F4000-000000067F000080000007000C00000F8000__0000008C71903720\n000000067F000080000007000C00000F4DA7-000000067F000080000007000C00000FE509__0000008AF67FEC19-0000008BA6803FC9\n000000067F000080000007000C00000F8000-000000067F000080000007000C00000FC000__0000008B9669EDB0\n000000067F000080000007000C00000F8000-000000067F000080000007000C00000FC000__0000008C71903720\n000000067F000080000007000C00000FA175-000000067F00008000000700140000010412__0000008C2045B721-0000008C72843D41\n000000067F000080000007000C00000FC000-000000067F000080000007000C0000100000__0000008B9669EDB0\n000000067F000080000007000C00000FC000-000000067F000080000007000C0000100000__0000008C71903720\n000000067F000080000007000C00000FE509-000000067F000080000007000C0000107C2B__0000008AF67FEC19-0000008BA6803FC9\n000000067F000080000007000C0000100000-000000067F000080000007000C0000104000__0000008B9669EDB0\n000000067F000080000007000C0000100000-000000067F000080000007000C0000104000__0000008C71903720\n000000067F000080000007000C0000104000-000000067F000080000007000C0000108000__0000008B9669EDB0\n000000067F000080000007000C0000104000-000000067F000080000007000C0000108000__0000008C71903720\n000000067F000080000007000C0000107C2B-000000067F000080000007000C0000111385__0000008AF67FEC19-0000008BA6803FC9\n000000067F000080000007000C0000108000-000000067F000080000007000C000010C000__0000008C71903720\n000000067F000080000007000C0000108000-030000000000000000000000000000000002__0000008B9669EDB0\n000000067F000080000007000C000010C000-000000067F000080000007000C0000110000__0000008C71903720\n000000067F000080000007000C0000110000-000000067F00008000000700120100000000__0000008C71903720\n000000067F000080000007000C0000111385-01000000000000000100000003000000001E__0000008AF67FEC19-0000008BA6803FC9\n000000067F00008000000700140000000000-000000067F00008000000700140000004000__0000008C71903720\n000000067F000080000007001400000016B2-000000067F000080000007001400000082A6__0000008BA6803FC9-0000008C2045B721\n000000067F00008000000700140000004000-000000067F00008000000700140000008000__0000008C71903720\n000000067F00008000000700140000008000-000000067F0000800000070014000000C000__0000008C71903720\n000000067F000080000007001400000082A6-000000067F0000800000070014000000EED0__0000008BA6803FC9-0000008C2045B721\n000000067F0000800000070014000000C000-000000067F00008000000700140000010000__0000008C71903720\n000000067F0000800000070014000000EED0-000000067F00008000000700140000015ADC__0000008BA6803FC9-0000008C2045B721\n000000067F00008000000700140000010000-000000067F00008000000700140000014000__0000008C71903720\n000000067F0000800000070014000001041E-000000067F000080000007001400000294B8__0000008C2045B721-0000008C72843D41\n000000067F00008000000700140000014000-000000067F00008000000700140000018000__0000008C71903720\n000000067F00008000000700140000015ADC-000000067F0000800000070014000001C6D6__0000008BA6803FC9-0000008C2045B721\n000000067F00008000000700140000018000-000000067F0000800000070014000001C000__0000008C71903720\n000000067F0000800000070014000001C000-000000067F00008000000700140000020000__0000008C71903720\n000000067F0000800000070014000001C6D6-000000067F000080000007001400000232FD__0000008BA6803FC9-0000008C2045B721\n000000067F00008000000700140000020000-000000067F00008000000700140000024000__0000008C71903720\n000000067F000080000007001400000232FD-000000067F00008000000700140000029F07__0000008BA6803FC9-0000008C2045B721\n000000067F00008000000700140000024000-000000067F00008000000700140000028000__0000008C71903720\n000000067F00008000000700140000028000-000000067F0000800000070014000002C000__0000008C71903720\n000000067F000080000007001400000294BA-030000000000000000000000000000000002__0000008C2045B721-0000008C72843D41\n000000067F00008000000700140000029F07-030000000000000000000000000000000002__0000008BA6803FC9-0000008C2045B721\n000000067F0000800000070014000002C000-030000000000000000000000000000000002__0000008C71903720\n000000067F000080000007200C0000000000-000000067F000080000007200C0000004000__0000008E43487FF0\n000000067F000080000007200C0000004000-000000067F000080000007200C0000008000__0000008E43487FF0\n000000067F000080000007200C0000008000-000000067F000080000007200C000000C000__0000008E43487FF0\n000000067F000080000007200C000000933D-000000067F000080000007200C0000012AA3__0000008C72843D41-0000008CF2BFFC89\n000000067F000080000007200C000000C000-000000067F000080000007200C0000010000__0000008E43487FF0\n000000067F000080000007200C0000010000-000000067F000080000007200C0000014000__0000008E43487FF0\n000000067F000080000007200C0000012AA3-000000067F000080000007200C000001C209__0000008C72843D41-0000008CF2BFFC89\n000000067F000080000007200C0000014000-000000067F000080000007200C0000018000__0000008E43487FF0\n000000067F000080000007200C0000018000-000000067F000080000007200C000001C000__0000008E43487FF0\n000000067F000080000007200C000001C000-000000067F000080000007200C0000020000__0000008E43487FF0\n000000067F000080000007200C000001C209-000000067F000080000007200C0000025939__0000008C72843D41-0000008CF2BFFC89\n000000067F000080000007200C0000020000-000000067F000080000007200C0000024000__0000008E43487FF0\n000000067F000080000007200C0000024000-000000067F000080000007200C0000028000__0000008E43487FF0\n000000067F000080000007200C0000025939-000000067F000080000007200C000002F09F__0000008C72843D41-0000008CF2BFFC89\n000000067F000080000007200C0000028000-000000067F000080000007200C000002C000__0000008E43487FF0\n000000067F000080000007200C000002C000-000000067F000080000007200C0000030000__0000008E43487FF0\n000000067F000080000007200C000002F09F-000000067F000080000007200C00000387B4__0000008C72843D41-0000008CF2BFFC89\n000000067F000080000007200C0000030000-000000067F000080000007200C0000034000__0000008E43487FF0\n000000067F000080000007200C0000034000-000000067F000080000007200C0000038000__0000008E43487FF0\n000000067F000080000007200C0000038000-000000067F000080000007200C000003C000__0000008E43487FF0\n000000067F000080000007200C00000387B4-000000067F000080000007200C0000041F1A__0000008C72843D41-0000008CF2BFFC89\n000000067F000080000007200C000003C000-000000067F000080000007200C0000040000__0000008E43487FF0\n000000067F000080000007200C0000040000-000000067F000080000007200C0000044000__0000008E43487FF0\n000000067F000080000007200C0000041F1A-000000067F000080000007200C000004B680__0000008C72843D41-0000008CF2BFFC89\n000000067F000080000007200C0000044000-000000067F000080000007200C0000048000__0000008E43487FF0\n000000067F000080000007200C0000048000-000000067F000080000007200C000004C000__0000008E3CDF59C0\n000000067F000080000007200C0000048000-000000067F000080000007200C000004C000__0000008F10EA21C8\n000000067F000080000007200C000004B680-030000000000000000000000000000000002__0000008C72843D41-0000008CF2BFFC89\n000000067F000080000007200C000004BACE-000000067F000080000007200C0000055202__0000008CF2BFFC89-0000008DB277FA49\n000000067F000080000007200C000004C000-000000067F000080000007200C0000050000__0000008E3CDF59C0\n000000067F000080000007200C000004C000-000000067F000080000007200C0000050000__0000008F10EA21C8\n000000067F000080000007200C0000050000-000000067F000080000007200C0000054000__0000008E3CDF59C0\n000000067F000080000007200C0000050000-000000067F000080000007200C0000054000__0000008F10EA21C8\n000000067F000080000007200C000005131D-000000067F000080000007200C00000A2138__0000008EBC4827C1-0000008F10E3E189\n000000067F000080000007200C0000054000-000000067F000080000007200C0000058000__0000008E3CDF59C0\n000000067F000080000007200C0000054000-000000067F000080000007200C0000058000__0000008F10EA21C8\n000000067F000080000007200C0000055202-000000067F000080000007200C000005E90D__0000008CF2BFFC89-0000008DB277FA49\n000000067F000080000007200C0000058000-000000067F000080000007200C000005C000__0000008E3CDF59C0\n000000067F000080000007200C0000058000-000000067F000080000007200C000005C000__0000008F10EA21C8\n000000067F000080000007200C000005C000-000000067F000080000007200C0000060000__0000008E3CDF59C0\n000000067F000080000007200C000005C000-000000067F000080000007200C0000060000__0000008F10EA21C8\n000000067F000080000007200C000005E90D-000000067F000080000007200C000006802B__0000008CF2BFFC89-0000008DB277FA49\n000000067F000080000007200C0000060000-000000067F000080000007200C0000064000__0000008E3CDF59C0\n000000067F000080000007200C0000060000-000000067F000080000007200C0000064000__0000008F10EA21C8\n000000067F000080000007200C0000064000-000000067F000080000007200C0000068000__0000008E3CDF59C0\n000000067F000080000007200C0000064000-000000067F000080000007200C0000068000__0000008F10EA21C8\n000000067F000080000007200C0000068000-000000067F000080000007200C000006C000__0000008E3CDF59C0\n000000067F000080000007200C0000068000-000000067F000080000007200C000006C000__0000008F10EA21C8\n000000067F000080000007200C000006802B-000000067F000080000007200C0000071782__0000008CF2BFFC89-0000008DB277FA49\n000000067F000080000007200C000006C000-000000067F000080000007200C0000070000__0000008E3CDF59C0\n000000067F000080000007200C000006C000-000000067F000080000007200C0000070000__0000008F10EA21C8\n000000067F000080000007200C0000070000-000000067F000080000007200C0000074000__0000008E3CDF59C0\n000000067F000080000007200C0000070000-000000067F000080000007200C0000074000__0000008F10EA21C8\n000000067F000080000007200C0000071782-000000067F000080000007200C000007AEE8__0000008CF2BFFC89-0000008DB277FA49\n000000067F000080000007200C0000074000-000000067F000080000007200C0000078000__0000008E3CDF59C0\n000000067F000080000007200C0000074000-000000067F000080000007200C0000078000__0000008F10EA21C8\n000000067F000080000007200C0000078000-000000067F000080000007200C000007C000__0000008E3CDF59C0\n000000067F000080000007200C0000078000-000000067F000080000007200C000007C000__0000008F10EA21C8\n000000067F000080000007200C000007AEE8-000000067F000080000007200C000008460B__0000008CF2BFFC89-0000008DB277FA49\n000000067F000080000007200C000007C000-000000067F000080000007200C0000080000__0000008E3CDF59C0\n000000067F000080000007200C000007C000-000000067F000080000007200C0000080000__0000008F10EA21C8\n000000067F000080000007200C0000080000-000000067F000080000007200C0000084000__0000008E3CDF59C0\n000000067F000080000007200C0000080000-000000067F000080000007200C0000084000__0000008F10EA21C8\n000000067F000080000007200C0000084000-000000067F000080000007200C0000088000__0000008E3CDF59C0\n000000067F000080000007200C0000084000-000000067F000080000007200C0000088000__0000008F10EA21C8\n000000067F000080000007200C000008460B-000000067F000080000007200C000008DD71__0000008CF2BFFC89-0000008DB277FA49\n000000067F000080000007200C0000088000-000000067F000080000007200C000008C000__0000008E3CDF59C0\n000000067F000080000007200C0000088000-000000067F000080000007200C000008C000__0000008F10EA21C8\n000000067F000080000007200C000008C000-000000067F000080000007200C0000090000__0000008E3CDF59C0\n000000067F000080000007200C000008C000-000000067F000080000007200C0000090000__0000008F10EA21C8\n000000067F000080000007200C000008DD71-000000067F000080000007200C00000974D7__0000008CF2BFFC89-0000008DB277FA49\n000000067F000080000007200C0000090000-000000067F000080000007200C0000094000__0000008E3CDF59C0\n000000067F000080000007200C0000090000-000000067F000080000007200C0000094000__0000008F10EA21C8\n000000067F000080000007200C0000094000-000000067F000080000007200C0000098000__0000008E3CDF59C0\n000000067F000080000007200C0000094000-000000067F000080000007200C0000098000__0000008F10EA21C8\n000000067F000080000007200C00000974D7-000000067F000080000007200C00000A0C0B__0000008CF2BFFC89-0000008DB277FA49\n000000067F000080000007200C0000098000-000000067F000080000007200C000009C000__0000008E3CDF59C0\n000000067F000080000007200C0000098000-000000067F000080000007200C000009C000__0000008F10EA21C8\n000000067F000080000007200C000009C000-000000067F000080000007200C00000A0000__0000008E3CDF59C0\n000000067F000080000007200C000009C000-000000067F000080000007200C00000A0000__0000008F10EA21C8\n000000067F000080000007200C00000A0000-000000067F000080000007200C00000A4000__0000008E3CDF59C0\n000000067F000080000007200C00000A0000-000000067F000080000007200C00000A4000__0000008F10EA21C8\n000000067F000080000007200C00000A0C0B-000000067F000080000007200C00000AA371__0000008CF2BFFC89-0000008DB277FA49\n000000067F000080000007200C00000A2138-000000067F000080000007200C00000F342E__0000008EBC4827C1-0000008F10E3E189\n000000067F000080000007200C00000A4000-000000067F000080000007200C00000A8000__0000008E3CDF59C0\n000000067F000080000007200C00000A4000-000000067F000080000007200C00000A8000__0000008F10EA21C8\n000000067F000080000007200C00000A8000-000000067F000080000007200C00000AC000__0000008E3CDF59C0\n000000067F000080000007200C00000A8000-000000067F000080000007200C00000AC000__0000008F10EA21C8\n000000067F000080000007200C00000AA371-000000067F000080000007200C00000B3AD7__0000008CF2BFFC89-0000008DB277FA49\n000000067F000080000007200C00000AC000-000000067F000080000007200C00000B0000__0000008E3CDF59C0\n000000067F000080000007200C00000AC000-000000067F000080000007200C00000B0000__0000008F10EA21C8\n000000067F000080000007200C00000B0000-000000067F000080000007200C00000B4000__0000008E3CDF59C0\n000000067F000080000007200C00000B0000-000000067F000080000007200C00000B4000__0000008F10EA21C8\n000000067F000080000007200C00000B3AD7-000000067F000080000007200C00000BD20B__0000008CF2BFFC89-0000008DB277FA49\n000000067F000080000007200C00000B4000-000000067F000080000007200C00000B8000__0000008E3CDF59C0\n000000067F000080000007200C00000B4000-000000067F000080000007200C00000B8000__0000008F10EA21C8\n000000067F000080000007200C00000B8000-000000067F000080000007200C00000BC000__0000008E3CDF59C0\n000000067F000080000007200C00000B8000-000000067F000080000007200C00000BC000__0000008F10EA21C8\n000000067F000080000007200C00000BA086-000000067F00008000000720140000001101__0000008E42A19FD1-0000008EBC4827C1\n000000067F000080000007200C00000BC000-000000067F000080000007200C00000C0000__0000008E3CDF59C0\n000000067F000080000007200C00000BC000-000000067F000080000007200C00000C0000__0000008F10EA21C8\n000000067F000080000007200C00000BD20B-000000067F000080000007200C0100000000__0000008CF2BFFC89-0000008DB277FA49\n000000067F000080000007200C00000C0000-000000067F000080000007200C00000C4000__0000008E3CDF59C0\n000000067F000080000007200C00000C0000-000000067F000080000007200C00000C4000__0000008F10EA21C8\n000000067F000080000007200C00000C4000-000000067F000080000007200C00000C8000__0000008E3CDF59C0\n000000067F000080000007200C00000C4000-000000067F000080000007200C00000C8000__0000008F10EA21C8\n000000067F000080000007200C00000C58B0-000000067F000080000007200C00000CF00A__0000008DB277FA49-0000008E42A19FD1\n000000067F000080000007200C00000C8000-000000067F000080000007200C00000CC000__0000008E3CDF59C0\n000000067F000080000007200C00000C8000-000000067F000080000007200C00000CC000__0000008F10EA21C8\n000000067F000080000007200C00000CC000-000000067F000080000007200C00000D0000__0000008E3CDF59C0\n000000067F000080000007200C00000CC000-000000067F000080000007200C00000D0000__0000008F10EA21C8\n000000067F000080000007200C00000CF00A-000000067F000080000007200C00000D871F__0000008DB277FA49-0000008E42A19FD1\n000000067F000080000007200C00000D0000-000000067F000080000007200C00000D4000__0000008E3CDF59C0\n000000067F000080000007200C00000D0000-000000067F000080000007200C00000D4000__0000008F10EA21C8\n000000067F000080000007200C00000D4000-000000067F000080000007200C00000D8000__0000008E3CDF59C0\n000000067F000080000007200C00000D4000-000000067F000080000007200C00000D8000__0000008F10EA21C8\n000000067F000080000007200C00000D8000-000000067F000080000007200C00000DC000__0000008E3CDF59C0\n000000067F000080000007200C00000D8000-000000067F000080000007200C00000DC000__0000008F10EA21C8\n000000067F000080000007200C00000D871F-000000067F000080000007200C00000E1E85__0000008DB277FA49-0000008E42A19FD1\n000000067F000080000007200C00000DC000-000000067F000080000007200C00000E0000__0000008E3CDF59C0\n000000067F000080000007200C00000DC000-000000067F000080000007200C00000E0000__0000008F10EA21C8\n000000067F000080000007200C00000E0000-000000067F000080000007200C00000E4000__0000008E3CDF59C0\n000000067F000080000007200C00000E0000-000000067F000080000007200C00000E4000__0000008F10EA21C8\n000000067F000080000007200C00000E1E85-000000067F000080000007200C00000EB5EB__0000008DB277FA49-0000008E42A19FD1\n000000067F000080000007200C00000E4000-000000067F000080000007200C00000E8000__0000008E3CDF59C0\n000000067F000080000007200C00000E4000-000000067F000080000007200C00000E8000__0000008F10EA21C8\n000000067F000080000007200C00000E8000-000000067F000080000007200C00000EC000__0000008E3CDF59C0\n000000067F000080000007200C00000E8000-000000067F000080000007200C00000EC000__0000008F10EA21C8\n000000067F000080000007200C00000EB5EB-000000067F000080000007200C00000F4D0C__0000008DB277FA49-0000008E42A19FD1\n000000067F000080000007200C00000EC000-000000067F000080000007200C00000F0000__0000008E3CDF59C0\n000000067F000080000007200C00000EC000-000000067F000080000007200C00000F0000__0000008F10EA21C8\n000000067F000080000007200C00000F0000-000000067F000080000007200C00000F4000__0000008E3CDF59C0\n000000067F000080000007200C00000F0000-000000067F000080000007200C00000F4000__0000008F10EA21C8\n000000067F000080000007200C00000F342F-000000067F0000800000072014000000D54C__0000008EBC4827C1-0000008F10E3E189\n000000067F000080000007200C00000F4000-000000067F000080000007200C00000F8000__0000008E3CDF59C0\n000000067F000080000007200C00000F4000-000000067F000080000007200C00000F8000__0000008F10EA21C8\n000000067F000080000007200C00000F4D0C-000000067F000080000007200C00000FE472__0000008DB277FA49-0000008E42A19FD1\n000000067F000080000007200C00000F8000-000000067F000080000007200C00000FC000__0000008E3CDF59C0\n000000067F000080000007200C00000F8000-000000067F000080000007200C00000FC000__0000008F10EA21C8\n000000067F000080000007200C00000FC000-000000067F000080000007200C0000100000__0000008E3CDF59C0\n000000067F000080000007200C00000FC000-000000067F000080000007200C0000100000__0000008F10EA21C8\n000000067F000080000007200C00000FE472-000000067F000080000007200C0000107B8E__0000008DB277FA49-0000008E42A19FD1\n000000067F000080000007200C0000100000-000000067F000080000007200C0000104000__0000008E3CDF59C0\n000000067F000080000007200C0000100000-000000067F000080000007200C0000104000__0000008F10EA21C8\n000000067F000080000007200C0000104000-000000067F000080000007200C0000108000__0000008E3CDF59C0\n000000067F000080000007200C0000104000-000000067F000080000007200C0000108000__0000008F10EA21C8\n000000067F000080000007200C0000107B8E-000000067F000080000007200C00001112F4__0000008DB277FA49-0000008E42A19FD1\n000000067F000080000007200C0000108000-000000067F000080000007200C000010C000__0000008E3CDF59C0\n000000067F000080000007200C0000108000-000000067F000080000007200C000010C000__0000008F10EA21C8\n000000067F000080000007200C000010C000-000000067F000080000007200C0000110000__0000008F10EA21C8\n000000067F000080000007200C000010C000-030000000000000000000000000000000002__0000008E3CDF59C0\n000000067F000080000007200C0000110000-000000067F00008000000720120100000000__0000008F10EA21C8\n000000067F000080000007200C00001112F4-010000000000000001000000040000000001__0000008DB277FA49-0000008E42A19FD1\n000000067F00008000000720140000000000-000000067F00008000000720140000004000__0000008F10EA21C8\n000000067F00008000000720140000001101-000000067F00008000000720140000007E82__0000008E42A19FD1-0000008EBC4827C1\n000000067F00008000000720140000004000-000000067F00008000000720140000008000__0000008F10EA21C8\n000000067F00008000000720140000007E82-000000067F0000800000072014000000EB9D__0000008E42A19FD1-0000008EBC4827C1\n000000067F00008000000720140000008000-000000067F0000800000072014000000C000__0000008F10EA21C8\n000000067F0000800000072014000000C000-000000067F00008000000720140000010000__0000008F10EA21C8\n000000067F0000800000072014000000D54D-000000067F00008000000720140000025E6D__0000008EBC4827C1-0000008F10E3E189\n000000067F0000800000072014000000EB9D-000000067F00008000000720140000015866__0000008E42A19FD1-0000008EBC4827C1\n000000067F00008000000720140000010000-000000067F00008000000720140000014000__0000008F10EA21C8\n000000067F00008000000720140000014000-000000067F00008000000720140000018000__0000008F10EA21C8\n000000067F00008000000720140000015866-000000067F0000800000072014000001C591__0000008E42A19FD1-0000008EBC4827C1\n000000067F00008000000720140000018000-000000067F0000800000072014000001C000__0000008F10EA21C8\n000000067F0000800000072014000001C000-000000067F00008000000720140000020000__0000008F10EA21C8\n000000067F0000800000072014000001C591-000000067F0000800000072014000002326E__0000008E42A19FD1-0000008EBC4827C1\n000000067F00008000000720140000020000-000000067F00008000000720140000024000__0000008F10EA21C8\n000000067F0000800000072014000002326E-000000067F00008000000720140000029F59__0000008E42A19FD1-0000008EBC4827C1\n000000067F00008000000720140000024000-000000067F00008000000720140000028000__0000008F10EA21C8\n000000067F00008000000720140000025E75-030000000000000000000000000000000002__0000008EBC4827C1-0000008F10E3E189\n000000067F00008000000720140000028000-000000067F0000800000072014000002C000__0000008F10EA21C8\n000000067F00008000000720140000029F59-030000000000000000000000000000000002__0000008E42A19FD1-0000008EBC4827C1\n000000067F0000800000072014000002C000-030000000000000000000000000000000002__0000008F10EA21C8\n000000067F000080000007400C0000000000-000000067F000080000007400C0000004000__00000091A67E3E18\n000000067F000080000007400C0000004000-000000067F000080000007400C0000008000__00000091A67E3E18\n000000067F000080000007400C0000008000-000000067F000080000007400C000000C000__00000091A67E3E18\n000000067F000080000007400C00000090E9-000000067F000080000007400C000001280C__0000008F10E3E189-0000008F915DE591\n000000067F000080000007400C000000C000-000000067F000080000007400C0000010000__00000091A67E3E18\n000000067F000080000007400C0000010000-000000067F000080000007400C0000014000__00000091A67E3E18\n000000067F000080000007400C000001280C-000000067F000080000007400C000001BF72__0000008F10E3E189-0000008F915DE591\n000000067F000080000007400C0000014000-000000067F000080000007400C0000018000__00000091A67E3E18\n000000067F000080000007400C0000018000-000000067F000080000007400C000001C000__00000091A67E3E18\n000000067F000080000007400C000001BF72-000000067F000080000007400C00000256D8__0000008F10E3E189-0000008F915DE591\n000000067F000080000007400C000001C000-000000067F000080000007400C0000020000__00000091A67E3E18\n000000067F000080000007400C0000020000-000000067F000080000007400C0000024000__00000091A67E3E18\n000000067F000080000007400C0000024000-000000067F000080000007400C0000028000__00000091A67E3E18\n000000067F000080000007400C00000256D8-000000067F000080000007400C000002EE0B__0000008F10E3E189-0000008F915DE591\n000000067F000080000007400C0000028000-000000067F000080000007400C000002C000__00000091A67E3E18\n000000067F000080000007400C000002C000-000000067F000080000007400C0000030000__00000091A67E3E18\n000000067F000080000007400C000002EE0B-000000067F000080000007400C0000038521__0000008F10E3E189-0000008F915DE591\n000000067F000080000007400C0000030000-000000067F000080000007400C0000034000__00000091A67E3E18\n000000067F000080000007400C0000034000-000000067F000080000007400C0000038000__00000091A67E3E18\n000000067F000080000007400C0000038000-000000067F000080000007400C000003C000__00000091A67E3E18\n000000067F000080000007400C0000038521-000000067F000080000007400C0000041C87__0000008F10E3E189-0000008F915DE591\n000000067F000080000007400C000003C000-000000067F000080000007400C0000040000__00000091A67E3E18\n000000067F000080000007400C0000040000-000000067F000080000007400C0000044000__00000091A67E3E18\n000000067F000080000007400C0000041C87-000000067F000080000007400C000004B3ED__0000008F10E3E189-0000008F915DE591\n000000067F000080000007400C0000044000-000000067F000080000007400C0000048000__00000091A67E3E18\n000000067F000080000007400C0000048000-000000067F000080000007400C000004C000__000000914B20A810\n000000067F000080000007400C000004B3ED-030000000000000000000000000000000002__0000008F10E3E189-0000008F915DE591\n000000067F000080000007400C000004BAC9-000000067F000080000007400C00000551FE__0000008F915DE591-000000903121F569\n000000067F000080000007400C000004C000-000000067F000080000007400C0000050000__000000914B20A810\n000000067F000080000007400C000004DF0B-000000067F000080000007400C000009B41F__000000914B2393B1-00000091A6DD7A79\n000000067F000080000007400C0000050000-000000067F000080000007400C0000054000__000000914B20A810\n000000067F000080000007400C0000054000-000000067F000080000007400C0000058000__000000914B20A810\n000000067F000080000007400C00000551FE-000000067F000080000007400C000005E90C__0000008F915DE591-000000903121F569\n000000067F000080000007400C0000058000-000000067F000080000007400C000005C000__000000914B20A810\n000000067F000080000007400C000005C000-000000067F000080000007400C0000060000__000000914B20A810\n000000067F000080000007400C000005E90C-000000067F000080000007400C000006802C__0000008F915DE591-000000903121F569\n000000067F000080000007400C0000060000-000000067F000080000007400C0000064000__000000914B20A810\n000000067F000080000007400C0000064000-000000067F000080000007400C0000068000__000000914B20A810\n000000067F000080000007400C0000068000-000000067F000080000007400C000006C000__000000914B20A810\n000000067F000080000007400C000006802C-000000067F000080000007400C0000071783__0000008F915DE591-000000903121F569\n000000067F000080000007400C000006C000-000000067F000080000007400C0000070000__000000914B20A810\n000000067F000080000007400C0000070000-000000067F000080000007400C0000074000__000000914B20A810\n000000067F000080000007400C0000071783-000000067F000080000007400C000007AEE9__0000008F915DE591-000000903121F569\n000000067F000080000007400C0000074000-000000067F000080000007400C0000078000__000000914B20A810\n000000067F000080000007400C0000078000-000000067F000080000007400C000007C000__000000914B20A810\n000000067F000080000007400C000007AEE9-000000067F000080000007400C000008460B__0000008F915DE591-000000903121F569\n000000067F000080000007400C000007C000-000000067F000080000007400C0000080000__000000914B20A810\n000000067F000080000007400C0000080000-000000067F000080000007400C0000084000__000000914B20A810\n000000067F000080000007400C0000084000-000000067F000080000007400C0000088000__000000914B20A810\n000000067F000080000007400C000008460B-000000067F000080000007400C000008DD71__0000008F915DE591-000000903121F569\n000000067F000080000007400C0000088000-000000067F000080000007400C000008C000__000000914B20A810\n000000067F000080000007400C000008C000-000000067F000080000007400C0000090000__000000914B20A810\n000000067F000080000007400C000008DD71-000000067F000080000007400C00000974D7__0000008F915DE591-000000903121F569\n000000067F000080000007400C0000090000-000000067F000080000007400C0000094000__000000914B20A810\n000000067F000080000007400C0000094000-000000067F000080000007400C0000098000__000000914B20A810\n000000067F000080000007400C00000974D7-000000067F000080000007400C00000A0C0B__0000008F915DE591-000000903121F569\n000000067F000080000007400C0000098000-000000067F000080000007400C000009C000__000000914B20A810\n000000067F000080000007400C000009B420-000000067F000080000007400C00000E830A__000000914B2393B1-00000091A6DD7A79\n000000067F000080000007400C000009C000-000000067F000080000007400C00000A0000__000000914B20A810\n000000067F000080000007400C00000A0000-000000067F000080000007400C00000A4000__000000914B20A810\n000000067F000080000007400C00000A0C0B-000000067F000080000007400C00000AA371__0000008F915DE591-000000903121F569\n000000067F000080000007400C00000A4000-000000067F000080000007400C00000A8000__000000914B20A810\n000000067F000080000007400C00000A8000-000000067F000080000007400C00000AC000__00000090DFD64240\n000000067F000080000007400C00000AA371-000000067F000080000007400C0100000000__0000008F915DE591-000000903121F569\n000000067F000080000007400C00000AA4EC-000000067F000080000007400C00000B3C0C__000000903121F569-00000090D0E5EA29\n000000067F000080000007400C00000AC000-000000067F000080000007400C00000B0000__00000090DFD64240\n000000067F000080000007400C00000B0000-000000067F000080000007400C00000B4000__00000090DFD64240\n000000067F000080000007400C00000B3C0C-000000067F000080000007400C00000BD372__000000903121F569-00000090D0E5EA29\n000000067F000080000007400C00000B4000-000000067F000080000007400C00000B8000__00000090DFD64240\n000000067F000080000007400C00000B8000-000000067F000080000007400C00000BC000__00000090DFD64240\n000000067F000080000007400C00000BC000-000000067F000080000007400C00000C0000__00000090DFD64240\n000000067F000080000007400C00000BD372-000000067F000080000007400C00000C6AD8__000000903121F569-00000090D0E5EA29\n000000067F000080000007400C00000C0000-000000067F000080000007400C00000C4000__00000090DFD64240\n000000067F000080000007400C00000C4000-000000067F000080000007400C00000C8000__00000090DFD64240\n000000067F000080000007400C00000C6AD8-000000067F000080000007400C00000D020B__000000903121F569-00000090D0E5EA29\n000000067F000080000007400C00000C8000-000000067F000080000007400C00000CC000__00000090DFD64240\n000000067F000080000007400C00000CC000-000000067F000080000007400C00000D0000__00000090DFD64240\n000000067F000080000007400C00000D0000-000000067F000080000007400C00000D4000__00000090DFD64240\n000000067F000080000007400C00000D020B-000000067F000080000007400C00000D9971__000000903121F569-00000090D0E5EA29\n000000067F000080000007400C00000D4000-000000067F000080000007400C00000D8000__00000090DFD64240\n000000067F000080000007400C00000D8000-000000067F000080000007400C00000DC000__00000090DFD64240\n000000067F000080000007400C00000D9971-000000067F000080000007400C00000E30D7__000000903121F569-00000090D0E5EA29\n000000067F000080000007400C00000DC000-000000067F000080000007400C00000E0000__00000090DFD64240\n000000067F000080000007400C00000E0000-000000067F000080000007400C00000E4000__00000090DFD64240\n000000067F000080000007400C00000E30D7-000000067F000080000007400C00000EC80B__000000903121F569-00000090D0E5EA29\n000000067F000080000007400C00000E4000-000000067F000080000007400C00000E8000__00000090DFD64240\n000000067F000080000007400C00000E8000-000000067F000080000007400C00000EC000__00000090DFD64240\n000000067F000080000007400C00000E8314-000000067F00008000000740140000008178__000000914B2393B1-00000091A6DD7A79\n000000067F000080000007400C00000EC000-000000067F000080000007400C00000F0000__00000090DFD64240\n000000067F000080000007400C00000EC80B-000000067F000080000007400C00000F5F38__000000903121F569-00000090D0E5EA29\n000000067F000080000007400C00000F0000-000000067F000080000007400C00000F4000__00000090DFD64240\n000000067F000080000007400C00000F4000-000000067F000080000007400C00000F8000__00000090DFD64240\n000000067F000080000007400C00000F5F38-000000067F000080000007400C00000FF69E__000000903121F569-00000090D0E5EA29\n000000067F000080000007400C00000F8000-000000067F000080000007400C00000FC000__00000090DFD64240\n000000067F000080000007400C00000FC000-000000067F000080000007400C0000100000__00000090DFD64240\n000000067F000080000007400C00000FCCA8-000000067F000080000007400C00001119BA__00000090D0E5EA29-000000914B2393B1\n000000067F000080000007400C00000FF69E-000000067F000080000007400C0000108DAF__000000903121F569-00000090D0E5EA29\n000000067F000080000007400C0000100000-000000067F000080000007400C0000104000__00000090DFD64240\n000000067F000080000007400C0000104000-000000067F000080000007400C0000108000__00000090DFD64240\n000000067F000080000007400C0000108000-000000067F000080000007400C000010C000__00000090DFD64240\n000000067F000080000007400C0000108DAF-000000067F000080000007400C0100000000__000000903121F569-00000090D0E5EA29\n000000067F000080000007400C000010C000-000000067F000080000007400C0000110000__00000090DFD64240\n000000067F000080000007400C0000110000-030000000000000000000000000000000002__00000090DFD64240\n000000067F000080000007400C00001119BA-000000067F00008000000740140000004326__00000090D0E5EA29-000000914B2393B1\n000000067F00008000000740140000004326-000000067F0000800000074014000000B7EE__00000090D0E5EA29-000000914B2393B1\n000000067F00008000000740140000008179-000000067F0000800000074014000001D4B7__000000914B2393B1-00000091A6DD7A79\n000000067F0000800000074014000000B7EE-000000067F00008000000740140000012CCD__00000090D0E5EA29-000000914B2393B1\n000000067F00008000000740140000012CCD-000000067F0000800000074014000001A16B__00000090D0E5EA29-000000914B2393B1\n000000067F0000800000074014000001A16B-000000067F000080000007401400000215C9__00000090D0E5EA29-000000914B2393B1\n000000067F0000800000074014000001D4BA-030000000000000000000000000000000002__000000914B2393B1-00000091A6DD7A79\n000000067F000080000007401400000215C9-000000067F00008000000740140000028A4A__00000090D0E5EA29-000000914B2393B1\n000000067F00008000000740140000028A4A-030000000000000000000000000000000002__00000090D0E5EA29-000000914B2393B1\n000000067F000080000007600C0000000000-000000067F000080000007600C0000004000__00000092CA5E4EA8\n000000067F000080000007600C0000000000-000000067F000080000007600C0000004000__0000009445A06DC8\n000000067F000080000007600C0000004000-000000067F000080000007600C0000008000__00000092CA5E4EA8\n000000067F000080000007600C0000004000-000000067F000080000007600C0000008000__0000009445A06DC8\n000000067F000080000007600C0000008000-000000067F000080000007600C000000C000__00000092CA5E4EA8\n000000067F000080000007600C0000008000-000000067F000080000007600C000000C000__0000009445A06DC8\n000000067F000080000007600C0000008180-000000067F000080000007600C00000118E6__00000091A6DD7A79-0000009228F7FA79\n000000067F000080000007600C000000C000-000000067F000080000007600C0000010000__00000092CA5E4EA8\n000000067F000080000007600C000000C000-000000067F000080000007600C0000010000__0000009445A06DC8\n000000067F000080000007600C0000010000-000000067F000080000007600C0000014000__00000092CA5E4EA8\n000000067F000080000007600C0000010000-000000067F000080000007600C0000014000__0000009445A06DC8\n000000067F000080000007600C00000118E6-000000067F000080000007600C000001B00A__00000091A6DD7A79-0000009228F7FA79\n000000067F000080000007600C0000014000-000000067F000080000007600C0000018000__00000092CA5E4EA8\n000000067F000080000007600C0000014000-000000067F000080000007600C0000018000__0000009445A06DC8\n000000067F000080000007600C0000018000-000000067F000080000007600C000001C000__00000092CA5E4EA8\n000000067F000080000007600C0000018000-000000067F000080000007600C000001C000__0000009445A06DC8\n000000067F000080000007600C000001B00A-000000067F000080000007600C0000024745__00000091A6DD7A79-0000009228F7FA79\n000000067F000080000007600C000001C000-000000067F000080000007600C0000020000__00000092CA5E4EA8\n000000067F000080000007600C000001C000-000000067F000080000007600C0000020000__0000009445A06DC8\n000000067F000080000007600C0000020000-000000067F000080000007600C0000024000__00000092CA5E4EA8\n000000067F000080000007600C0000020000-000000067F000080000007600C0000024000__0000009445A06DC8\n000000067F000080000007600C0000024000-000000067F000080000007600C0000028000__00000092CA5E4EA8\n000000067F000080000007600C0000024000-000000067F000080000007600C0000028000__0000009445A06DC8\n000000067F000080000007600C0000024745-000000067F000080000007600C000002DEAB__00000091A6DD7A79-0000009228F7FA79\n000000067F000080000007600C0000028000-000000067F000080000007600C000002C000__00000092CA5E4EA8\n000000067F000080000007600C0000028000-000000067F000080000007600C000002C000__0000009445A06DC8\n000000067F000080000007600C000002C000-000000067F000080000007600C0000030000__00000092CA5E4EA8\n000000067F000080000007600C000002C000-000000067F000080000007600C0000030000__0000009445A06DC8\n000000067F000080000007600C000002DEAB-000000067F000080000007600C00000375CB__00000091A6DD7A79-0000009228F7FA79\n000000067F000080000007600C0000030000-000000067F000080000007600C0000034000__00000092CA5E4EA8\n000000067F000080000007600C0000030000-000000067F000080000007600C0000034000__0000009445A06DC8\n000000067F000080000007600C0000034000-000000067F000080000007600C0000038000__00000092CA5E4EA8\n000000067F000080000007600C0000034000-000000067F000080000007600C0000038000__0000009445A06DC8\n000000067F000080000007600C00000375CB-000000067F000080000007600C0000040D0B__00000091A6DD7A79-0000009228F7FA79\n000000067F000080000007600C0000038000-000000067F000080000007600C000003C000__00000092CA5E4EA8\n000000067F000080000007600C0000038000-000000067F000080000007600C000003C000__0000009445A06DC8\n000000067F000080000007600C000003C000-000000067F000080000007600C0000040000__00000092CA5E4EA8\n000000067F000080000007600C000003C000-000000067F000080000007600C0000040000__0000009445A06DC8\n000000067F000080000007600C0000040000-000000067F000080000007600C0000044000__00000092CA5E4EA8\n000000067F000080000007600C0000040000-000000067F000080000007600C0000044000__0000009445A06DC8\n000000067F000080000007600C0000040D0B-000000067F000080000007600C000004A471__00000091A6DD7A79-0000009228F7FA79\n000000067F000080000007600C0000044000-000000067F000080000007600C0000048000__00000092CA5E4EA8\n000000067F000080000007600C0000044000-000000067F000080000007600C0000048000__0000009445A06DC8\n000000067F000080000007600C0000048000-000000067F000080000007600C000004C000__00000092CA5E4EA8\n000000067F000080000007600C0000048000-000000067F000080000007600C000004C000__0000009445A06DC8\n000000067F000080000007600C000004A471-030000000000000000000000000000000002__00000091A6DD7A79-0000009228F7FA79\n000000067F000080000007600C000004C000-000000067F000080000007600C0000050000__00000092CA5E4EA8\n000000067F000080000007600C000004C000-000000067F000080000007600C0000050000__0000009445A06DC8\n000000067F000080000007600C0000050000-000000067F000080000007600C0000054000__00000092CA5E4EA8\n000000067F000080000007600C0000050000-000000067F000080000007600C0000054000__0000009445A06DC8\n000000067F000080000007600C0000054000-000000067F000080000007600C0000058000__00000092CA5E4EA8\n000000067F000080000007600C0000054000-000000067F000080000007600C0000058000__0000009445A06DC8\n000000067F000080000007600C00000544BA-000000067F000080000007600C000005DC0A__0000009228F7FA79-00000093786F8001\n000000067F000080000007600C0000058000-000000067F000080000007600C000005C000__00000092CA5E4EA8\n000000067F000080000007600C0000058000-000000067F000080000007600C000005C000__0000009445A06DC8\n000000067F000080000007600C000005C000-000000067F000080000007600C0000060000__00000092CA5E4EA8\n000000067F000080000007600C000005C000-000000067F000080000007600C0000060000__0000009445A06DC8\n000000067F000080000007600C000005DC0A-000000067F000080000007600C000006732B__0000009228F7FA79-00000093786F8001\n000000067F000080000007600C0000060000-000000067F000080000007600C0000064000__00000092CA5E4EA8\n000000067F000080000007600C0000060000-000000067F000080000007600C0000064000__0000009445A06DC8\n000000067F000080000007600C0000061031-000000067F000080000007600C00000C1159__0000009402435A49-0000009446B52FD1\n000000067F000080000007600C0000064000-000000067F000080000007600C0000068000__00000092CA5E4EA8\n000000067F000080000007600C0000064000-000000067F000080000007600C0000068000__0000009445A06DC8\n000000067F000080000007600C000006732B-000000067F000080000007600C0000070A91__0000009228F7FA79-00000093786F8001\n000000067F000080000007600C0000068000-000000067F000080000007600C000006C000__00000092CA5E4EA8\n000000067F000080000007600C0000068000-000000067F000080000007600C000006C000__0000009445A06DC8\n000000067F000080000007600C000006C000-000000067F000080000007600C0000070000__00000092CA5E4EA8\n000000067F000080000007600C000006C000-000000067F000080000007600C0000070000__0000009445A06DC8\n000000067F000080000007600C0000070000-000000067F000080000007600C0000074000__00000092CA5E4EA8\n000000067F000080000007600C0000070000-000000067F000080000007600C0000074000__0000009445A06DC8\n000000067F000080000007600C0000070A91-000000067F000080000007600C000007A1F7__0000009228F7FA79-00000093786F8001\n000000067F000080000007600C0000074000-000000067F000080000007600C0000078000__00000092CA5E4EA8\n000000067F000080000007600C0000074000-000000067F000080000007600C0000078000__0000009445A06DC8\n000000067F000080000007600C0000078000-000000067F000080000007600C000007C000__00000092CA5E4EA8\n000000067F000080000007600C0000078000-000000067F000080000007600C000007C000__0000009445A06DC8\n000000067F000080000007600C000007A1F7-000000067F000080000007600C000008390C__0000009228F7FA79-00000093786F8001\n000000067F000080000007600C000007C000-000000067F000080000007600C0000080000__00000092CA5E4EA8\n000000067F000080000007600C000007C000-000000067F000080000007600C0000080000__0000009445A06DC8\n000000067F000080000007600C0000080000-000000067F000080000007600C0000084000__00000092CA5E4EA8\n000000067F000080000007600C0000080000-000000067F000080000007600C0000084000__0000009445A06DC8\n000000067F000080000007600C000008390C-000000067F000080000007600C000008D072__0000009228F7FA79-00000093786F8001\n000000067F000080000007600C0000084000-000000067F000080000007600C0000088000__00000092CA5E4EA8\n000000067F000080000007600C0000084000-000000067F000080000007600C0000088000__0000009445A06DC8\n000000067F000080000007600C0000088000-000000067F000080000007600C000008C000__00000092CA5E4EA8\n000000067F000080000007600C0000088000-000000067F000080000007600C000008C000__0000009445A06DC8\n000000067F000080000007600C000008C000-000000067F000080000007600C0000090000__00000092CA5E4EA8\n000000067F000080000007600C000008C000-000000067F000080000007600C0000090000__0000009445A06DC8\n000000067F000080000007600C000008C52F-000000067F000080000007600C000010B57A__00000093786F8001-0000009402435A49\n000000067F000080000007600C000008D072-000000067F000080000007600C000009679A__0000009228F7FA79-00000093786F8001\n000000067F000080000007600C0000090000-000000067F000080000007600C0000094000__00000092CA5E4EA8\n000000067F000080000007600C0000090000-000000067F000080000007600C0000094000__0000009445A06DC8\n000000067F000080000007600C0000094000-000000067F000080000007600C0000098000__00000092CA5E4EA8\n000000067F000080000007600C0000094000-000000067F000080000007600C0000098000__0000009445A06DC8\n000000067F000080000007600C000009679A-000000067F000080000007600C000009FF00__0000009228F7FA79-00000093786F8001\n000000067F000080000007600C0000098000-000000067F000080000007600C000009C000__00000092CA5E4EA8\n000000067F000080000007600C0000098000-000000067F000080000007600C000009C000__0000009445A06DC8\n000000067F000080000007600C000009C000-000000067F000080000007600C00000A0000__00000092CA5E4EA8\n000000067F000080000007600C000009C000-000000067F000080000007600C00000A0000__0000009445A06DC8\n000000067F000080000007600C000009FF00-000000067F000080000007600C00000A960B__0000009228F7FA79-00000093786F8001\n000000067F000080000007600C00000A0000-000000067F000080000007600C00000A4000__00000092CA5E4EA8\n000000067F000080000007600C00000A0000-000000067F000080000007600C00000A4000__0000009445A06DC8\n000000067F000080000007600C00000A4000-000000067F000080000007600C00000A8000__00000092CA5E4EA8\n000000067F000080000007600C00000A4000-000000067F000080000007600C00000A8000__0000009445A06DC8\n000000067F000080000007600C00000A8000-000000067F000080000007600C00000AC000__0000009445A06DC8\n000000067F000080000007600C00000A8000-030000000000000000000000000000000002__00000092CA5E4EA8\n000000067F000080000007600C00000A960B-000000067F000080000007600C00000B2D55__0000009228F7FA79-00000093786F8001\n000000067F000080000007600C00000AC000-000000067F000080000007600C00000B0000__0000009445A06DC8\n000000067F000080000007600C00000B0000-000000067F000080000007600C00000B4000__0000009445A06DC8\n000000067F000080000007600C00000B2D55-000000067F000080000007600C00000BC4BB__0000009228F7FA79-00000093786F8001\n000000067F000080000007600C00000B4000-000000067F000080000007600C00000B8000__0000009445A06DC8\n000000067F000080000007600C00000B8000-000000067F000080000007600C00000BC000__0000009445A06DC8\n000000067F000080000007600C00000BC000-000000067F000080000007600C00000C0000__0000009445A06DC8\n000000067F000080000007600C00000BC4BB-000000067F000080000007600C00000C5BEA__0000009228F7FA79-00000093786F8001\n000000067F000080000007600C00000C0000-000000067F000080000007600C00000C4000__0000009445A06DC8\n000000067F000080000007600C00000C115D-000000067F0000800000076014000000333A__0000009402435A49-0000009446B52FD1\n000000067F000080000007600C00000C4000-000000067F000080000007600C00000C8000__0000009445A06DC8\n000000067F000080000007600C00000C5BEA-000000067F000080000007600C00000CF30B__0000009228F7FA79-00000093786F8001\n000000067F000080000007600C00000C8000-000000067F000080000007600C00000CC000__0000009445A06DC8\n000000067F000080000007600C00000CC000-000000067F000080000007600C00000D0000__0000009445A06DC8\n000000067F000080000007600C00000CF30B-000000067F000080000007600C00000D8A2B__0000009228F7FA79-00000093786F8001\n000000067F000080000007600C00000D0000-000000067F000080000007600C00000D4000__0000009445A06DC8\n000000067F000080000007600C00000D4000-000000067F000080000007600C00000D8000__0000009445A06DC8\n000000067F000080000007600C00000D8000-000000067F000080000007600C00000DC000__0000009445A06DC8\n000000067F000080000007600C00000D8A2B-000000067F000080000007600C00000E217C__0000009228F7FA79-00000093786F8001\n000000067F000080000007600C00000DC000-000000067F000080000007600C00000E0000__0000009445A06DC8\n000000067F000080000007600C00000E0000-000000067F000080000007600C00000E4000__0000009445A06DC8\n000000067F000080000007600C00000E217C-000000067F000080000007600C00000EB8E2__0000009228F7FA79-00000093786F8001\n000000067F000080000007600C00000E4000-000000067F000080000007600C00000E8000__0000009445A06DC8\n000000067F000080000007600C00000E8000-000000067F000080000007600C00000EC000__0000009445A06DC8\n000000067F000080000007600C00000EB8E2-000000067F000080000007600C00000F500B__0000009228F7FA79-00000093786F8001\n000000067F000080000007600C00000EC000-000000067F000080000007600C00000F0000__0000009445A06DC8\n000000067F000080000007600C00000F0000-000000067F000080000007600C00000F4000__0000009445A06DC8\n000000067F000080000007600C00000F4000-000000067F000080000007600C00000F8000__0000009445A06DC8\n000000067F000080000007600C00000F500B-000000067F000080000007600C00000FE771__0000009228F7FA79-00000093786F8001\n000000067F000080000007600C00000F8000-000000067F000080000007600C00000FC000__0000009445A06DC8\n000000067F000080000007600C00000FC000-000000067F000080000007600C0000100000__0000009445A06DC8\n000000067F000080000007600C00000FE771-000000067F000080000007600C0000107ED7__0000009228F7FA79-00000093786F8001\n000000067F000080000007600C0000100000-000000067F000080000007600C0000104000__0000009445A06DC8\n000000067F000080000007600C0000104000-000000067F000080000007600C0000108000__0000009445A06DC8\n000000067F000080000007600C0000107ED7-000000067F000080000007600C000011160C__0000009228F7FA79-00000093786F8001\n000000067F000080000007600C0000108000-000000067F000080000007600C000010C000__0000009445A06DC8\n000000067F000080000007600C000010B57A-000000067F00008000000760140000003D14__00000093786F8001-0000009402435A49\n000000067F000080000007600C000010C000-000000067F000080000007600C0000110000__0000009445A06DC8\n000000067F000080000007600C0000110000-000000067F00008000000760120100000000__0000009445A06DC8\n000000067F000080000007600C000011160C-010000000000000001000000040000000008__0000009228F7FA79-00000093786F8001\n000000067F00008000000760140000000000-000000067F00008000000760140000004000__0000009445A06DC8\n000000067F00008000000760140000003354-000000067F00008000000760140000023CAB__0000009402435A49-0000009446B52FD1\n000000067F00008000000760140000003D14-000000067F0000800000076014000000A251__00000093786F8001-0000009402435A49\n000000067F00008000000760140000004000-000000067F00008000000760140000008000__0000009445A06DC8\n000000067F00008000000760140000008000-000000067F0000800000076014000000C000__0000009445A06DC8\n000000067F0000800000076014000000A251-000000067F000080000007601400000107AC__00000093786F8001-0000009402435A49\n000000067F0000800000076014000000C000-000000067F00008000000760140000010000__0000009445A06DC8\n000000067F00008000000760140000010000-000000067F00008000000760140000014000__0000009445A06DC8\n000000067F000080000007601400000107AC-000000067F00008000000760140000016CC4__00000093786F8001-0000009402435A49\n000000067F00008000000760140000014000-000000067F00008000000760140000018000__0000009445A06DC8\n000000067F00008000000760140000016CC4-000000067F0000800000076014000001D272__00000093786F8001-0000009402435A49\n000000067F00008000000760140000018000-000000067F0000800000076014000001C000__0000009445A06DC8\n000000067F0000800000076014000001C000-000000067F00008000000760140000020000__0000009445A06DC8\n000000067F0000800000076014000001D272-000000067F000080000007601400000237C3__00000093786F8001-0000009402435A49\n000000067F00008000000760140000020000-000000067F00008000000760140000024000__0000009445A06DC8\n000000067F000080000007601400000237C3-000000067F00008000000760140000029CC5__00000093786F8001-0000009402435A49\n000000067F00008000000760140000023CB3-030000000000000000000000000000000002__0000009402435A49-0000009446B52FD1\n000000067F00008000000760140000024000-000000067F00008000000760140000028000__0000009445A06DC8\n000000067F00008000000760140000028000-000000067F0000800000076014000002C000__0000009445A06DC8\n000000067F00008000000760140000029CC5-030000000000000000000000000000000002__00000093786F8001-0000009402435A49\n000000067F0000800000076014000002C000-030000000000000000000000000000000002__0000009445A06DC8\n000000067F000080000007800C0000000000-000000067F000080000007800C0000004000__00000096187D1FC8\n000000067F000080000007800C0000000000-000000067F000080000007800C0000004000__00000096E85806C0\n000000067F000080000007800C0000004000-000000067F000080000007800C0000008000__00000096187D1FC8\n000000067F000080000007800C0000004000-000000067F000080000007800C0000008000__00000096E85806C0\n000000067F000080000007800C0000008000-000000067F000080000007800C000000C000__00000096187D1FC8\n000000067F000080000007800C0000008000-000000067F000080000007800C000000C000__00000096E85806C0\n000000067F000080000007800C000000974C-000000067F000080000007800C0000012EB2__0000009446B52FD1-00000094D67DF4F9\n000000067F000080000007800C000000C000-000000067F000080000007800C0000010000__00000096187D1FC8\n000000067F000080000007800C000000C000-000000067F000080000007800C0000010000__00000096E85806C0\n000000067F000080000007800C0000010000-000000067F000080000007800C0000014000__00000096187D1FC8\n000000067F000080000007800C0000010000-000000067F000080000007800C0000014000__00000096E85806C0\n000000067F000080000007800C0000012EB2-000000067F000080000007800C000001C60B__0000009446B52FD1-00000094D67DF4F9\n000000067F000080000007800C0000014000-000000067F000080000007800C0000018000__00000096187D1FC8\n000000067F000080000007800C0000014000-000000067F000080000007800C0000018000__00000096E85806C0\n000000067F000080000007800C0000018000-000000067F000080000007800C000001C000__00000096187D1FC8\n000000067F000080000007800C0000018000-000000067F000080000007800C000001C000__00000096E85806C0\n000000067F000080000007800C000001C000-000000067F000080000007800C0000020000__00000096187D1FC8\n000000067F000080000007800C000001C000-000000067F000080000007800C0000020000__00000096E85806C0\n000000067F000080000007800C000001C60B-000000067F000080000007800C0000025D39__0000009446B52FD1-00000094D67DF4F9\n000000067F000080000007800C0000020000-000000067F000080000007800C0000024000__00000096187D1FC8\n000000067F000080000007800C0000020000-000000067F000080000007800C0000024000__00000096E85806C0\n000000067F000080000007800C0000024000-000000067F000080000007800C0000028000__00000096187D1FC8\n000000067F000080000007800C0000024000-000000067F000080000007800C0000028000__00000096E85806C0\n000000067F000080000007800C0000025D39-000000067F000080000007800C000002F49F__0000009446B52FD1-00000094D67DF4F9\n000000067F000080000007800C0000028000-000000067F000080000007800C000002C000__00000096187D1FC8\n000000067F000080000007800C0000028000-000000067F000080000007800C000002C000__00000096E85806C0\n000000067F000080000007800C000002C000-000000067F000080000007800C0000030000__00000096187D1FC8\n000000067F000080000007800C000002C000-000000067F000080000007800C0000030000__00000096E85806C0\n000000067F000080000007800C000002F49F-000000067F000080000007800C0000038BB2__0000009446B52FD1-00000094D67DF4F9\n000000067F000080000007800C0000030000-000000067F000080000007800C0000034000__00000096187D1FC8\n000000067F000080000007800C0000030000-000000067F000080000007800C0000034000__00000096E85806C0\n000000067F000080000007800C0000034000-000000067F000080000007800C0000038000__00000096187D1FC8\n000000067F000080000007800C0000034000-000000067F000080000007800C0000038000__00000096E85806C0\n000000067F000080000007800C0000038000-000000067F000080000007800C000003C000__00000096187D1FC8\n000000067F000080000007800C0000038000-000000067F000080000007800C000003C000__00000096E85806C0\n000000067F000080000007800C0000038BB2-000000067F000080000007800C0000042318__0000009446B52FD1-00000094D67DF4F9\n000000067F000080000007800C000003C000-000000067F000080000007800C0000040000__00000096187D1FC8\n000000067F000080000007800C000003C000-000000067F000080000007800C0000040000__00000096E85806C0\n000000067F000080000007800C0000040000-000000067F000080000007800C0000044000__00000096187D1FC8\n000000067F000080000007800C0000040000-000000067F000080000007800C0000044000__00000096E85806C0\n000000067F000080000007800C0000042318-000000067F000080000007800C000004BA7E__0000009446B52FD1-00000094D67DF4F9\n000000067F000080000007800C0000044000-000000067F000080000007800C0000048000__00000096187D1FC8\n000000067F000080000007800C0000044000-000000067F000080000007800C0000048000__00000096E85806C0\n000000067F000080000007800C0000048000-000000067F000080000007800C000004C000__00000096187D1FC8\n000000067F000080000007800C0000048000-000000067F000080000007800C000004C000__00000096E85806C0\n000000067F000080000007800C000004BA7E-000000067F000080000007800C00000551B3__0000009446B52FD1-00000094D67DF4F9\n000000067F000080000007800C000004C000-000000067F000080000007800C0000050000__00000096187D1FC8\n000000067F000080000007800C000004C000-000000067F000080000007800C0000050000__00000096E85806C0\n000000067F000080000007800C0000050000-000000067F000080000007800C0000054000__00000096187D1FC8\n000000067F000080000007800C0000050000-000000067F000080000007800C0000054000__00000096E85806C0\n000000067F000080000007800C0000054000-000000067F000080000007800C0000058000__0000009614F1FFE8\n000000067F000080000007800C0000054000-000000067F000080000007800C0000058000__00000096E85806C0\n000000067F000080000007800C00000551B3-030000000000000000000000000000000002__0000009446B52FD1-00000094D67DF4F9\n000000067F000080000007800C000005523E-000000067F000080000007800C000005E9A4__00000094D67DF4F9-000000959635F2A9\n000000067F000080000007800C0000058000-000000067F000080000007800C000005C000__0000009614F1FFE8\n000000067F000080000007800C0000058000-000000067F000080000007800C000005C000__00000096E85806C0\n000000067F000080000007800C000005C000-000000067F000080000007800C0000060000__0000009614F1FFE8\n000000067F000080000007800C000005C000-000000067F000080000007800C0000060000__00000096E85806C0\n000000067F000080000007800C000005E9A4-000000067F000080000007800C000006810A__00000094D67DF4F9-000000959635F2A9\n000000067F000080000007800C0000060000-000000067F000080000007800C0000064000__0000009614F1FFE8\n000000067F000080000007800C0000060000-000000067F000080000007800C0000064000__00000096E85806C0\n000000067F000080000007800C0000064000-000000067F000080000007800C0000068000__0000009614F1FFE8\n000000067F000080000007800C0000064000-000000067F000080000007800C0000068000__00000096E85806C0\n000000067F000080000007800C0000068000-000000067F000080000007800C000006C000__0000009614F1FFE8\n000000067F000080000007800C0000068000-000000067F000080000007800C000006C000__00000096E85806C0\n000000067F000080000007800C000006810A-000000067F000080000007800C0000071870__00000094D67DF4F9-000000959635F2A9\n000000067F000080000007800C000006C000-000000067F000080000007800C0000070000__0000009614F1FFE8\n000000067F000080000007800C000006C000-000000067F000080000007800C0000070000__00000096E85806C0\n000000067F000080000007800C000006D446-000000067F000080000007800C00000D9B82__00000096AEF27399-00000096E85829C9\n000000067F000080000007800C0000070000-000000067F000080000007800C0000074000__0000009614F1FFE8\n000000067F000080000007800C0000070000-000000067F000080000007800C0000074000__00000096E85806C0\n000000067F000080000007800C0000071870-000000067F000080000007800C000007AFD6__00000094D67DF4F9-000000959635F2A9\n000000067F000080000007800C0000074000-000000067F000080000007800C0000078000__0000009614F1FFE8\n000000067F000080000007800C0000074000-000000067F000080000007800C0000078000__00000096E85806C0\n000000067F000080000007800C0000078000-000000067F000080000007800C000007C000__0000009614F1FFE8\n000000067F000080000007800C0000078000-000000067F000080000007800C000007C000__00000096E85806C0\n000000067F000080000007800C000007AFD6-000000067F000080000007800C000008470B__00000094D67DF4F9-000000959635F2A9\n000000067F000080000007800C000007B8DE-000000067F000080000007800C00000F73DA__00000096193A8001-00000096AEF27399\n000000067F000080000007800C000007C000-000000067F000080000007800C0000080000__0000009614F1FFE8\n000000067F000080000007800C000007C000-000000067F000080000007800C0000080000__00000096E85806C0\n000000067F000080000007800C0000080000-000000067F000080000007800C0000084000__0000009614F1FFE8\n000000067F000080000007800C0000080000-000000067F000080000007800C0000084000__00000096E85806C0\n000000067F000080000007800C0000084000-000000067F000080000007800C0000088000__0000009614F1FFE8\n000000067F000080000007800C0000084000-000000067F000080000007800C0000088000__00000096E85806C0\n000000067F000080000007800C000008470B-000000067F000080000007800C000008DE71__00000094D67DF4F9-000000959635F2A9\n000000067F000080000007800C0000088000-000000067F000080000007800C000008C000__0000009614F1FFE8\n000000067F000080000007800C0000088000-000000067F000080000007800C000008C000__00000096E85806C0\n000000067F000080000007800C000008C000-000000067F000080000007800C0000090000__0000009614F1FFE8\n000000067F000080000007800C000008C000-000000067F000080000007800C0000090000__00000096E85806C0\n000000067F000080000007800C000008DE71-000000067F000080000007800C0000097591__00000094D67DF4F9-000000959635F2A9\n000000067F000080000007800C0000090000-000000067F000080000007800C0000094000__0000009614F1FFE8\n000000067F000080000007800C0000090000-000000067F000080000007800C0000094000__00000096E85806C0\n000000067F000080000007800C0000094000-000000067F000080000007800C0000098000__0000009614F1FFE8\n000000067F000080000007800C0000094000-000000067F000080000007800C0000098000__00000096E85806C0\n000000067F000080000007800C0000097591-000000067F000080000007800C00000A0CF7__00000094D67DF4F9-000000959635F2A9\n000000067F000080000007800C0000098000-000000067F000080000007800C000009C000__0000009614F1FFE8\n000000067F000080000007800C0000098000-000000067F000080000007800C000009C000__00000096E85806C0\n000000067F000080000007800C000009C000-000000067F000080000007800C00000A0000__0000009614F1FFE8\n000000067F000080000007800C000009C000-000000067F000080000007800C00000A0000__00000096E85806C0\n000000067F000080000007800C00000A0000-000000067F000080000007800C00000A4000__0000009614F1FFE8\n000000067F000080000007800C00000A0000-000000067F000080000007800C00000A4000__00000096E85806C0\n000000067F000080000007800C00000A0CF7-000000067F000080000007800C00000AA40B__00000094D67DF4F9-000000959635F2A9\n000000067F000080000007800C00000A4000-000000067F000080000007800C00000A8000__0000009614F1FFE8\n000000067F000080000007800C00000A4000-000000067F000080000007800C00000A8000__00000096E85806C0\n000000067F000080000007800C00000A8000-000000067F000080000007800C00000AC000__0000009614F1FFE8\n000000067F000080000007800C00000A8000-000000067F000080000007800C00000AC000__00000096E85806C0\n000000067F000080000007800C00000AA40B-000000067F000080000007800C00000B3B4D__00000094D67DF4F9-000000959635F2A9\n000000067F000080000007800C00000AC000-000000067F000080000007800C00000B0000__0000009614F1FFE8\n000000067F000080000007800C00000AC000-000000067F000080000007800C00000B0000__00000096E85806C0\n000000067F000080000007800C00000B0000-000000067F000080000007800C00000B4000__0000009614F1FFE8\n000000067F000080000007800C00000B0000-000000067F000080000007800C00000B4000__00000096E85806C0\n000000067F000080000007800C00000B3B4D-000000067F000080000007800C00000BD2B3__00000094D67DF4F9-000000959635F2A9\n000000067F000080000007800C00000B4000-000000067F000080000007800C00000B8000__0000009614F1FFE8\n000000067F000080000007800C00000B4000-000000067F000080000007800C00000B8000__00000096E85806C0\n000000067F000080000007800C00000B8000-000000067F000080000007800C00000BC000__0000009614F1FFE8\n000000067F000080000007800C00000B8000-000000067F000080000007800C00000BC000__00000096E85806C0\n000000067F000080000007800C00000BC000-000000067F000080000007800C00000C0000__0000009614F1FFE8\n000000067F000080000007800C00000BC000-000000067F000080000007800C00000C0000__00000096E85806C0\n000000067F000080000007800C00000BD2B3-000000067F000080000007800C00000C69DA__00000094D67DF4F9-000000959635F2A9\n000000067F000080000007800C00000C0000-000000067F000080000007800C00000C4000__0000009614F1FFE8\n000000067F000080000007800C00000C0000-000000067F000080000007800C00000C4000__00000096E85806C0\n000000067F000080000007800C00000C4000-000000067F000080000007800C00000C8000__0000009614F1FFE8\n000000067F000080000007800C00000C4000-000000067F000080000007800C00000C8000__00000096E85806C0\n000000067F000080000007800C00000C69DA-000000067F000080000007800C0100000000__00000094D67DF4F9-000000959635F2A9\n000000067F000080000007800C00000C8000-000000067F000080000007800C00000CC000__0000009614F1FFE8\n000000067F000080000007800C00000C8000-000000067F000080000007800C00000CC000__00000096E85806C0\n000000067F000080000007800C00000CC000-000000067F000080000007800C00000D0000__0000009614F1FFE8\n000000067F000080000007800C00000CC000-000000067F000080000007800C00000D0000__00000096E85806C0\n000000067F000080000007800C00000CD6B6-000000067F000080000007800C00000D6C18__000000959635F2A9-00000096193A8001\n000000067F000080000007800C00000D0000-000000067F000080000007800C00000D4000__0000009614F1FFE8\n000000067F000080000007800C00000D0000-000000067F000080000007800C00000D4000__00000096E85806C0\n000000067F000080000007800C00000D4000-000000067F000080000007800C00000D8000__0000009614F1FFE8\n000000067F000080000007800C00000D4000-000000067F000080000007800C00000D8000__00000096E85806C0\n000000067F000080000007800C00000D6C18-000000067F000080000007800C00000E0179__000000959635F2A9-00000096193A8001\n000000067F000080000007800C00000D8000-000000067F000080000007800C00000DC000__0000009614F1FFE8\n000000067F000080000007800C00000D8000-000000067F000080000007800C00000DC000__00000096E85806C0\n000000067F000080000007800C00000D9BA3-000000067F00008000000780140000013481__00000096AEF27399-00000096E85829C9\n000000067F000080000007800C00000DC000-000000067F000080000007800C00000E0000__0000009614F1FFE8\n000000067F000080000007800C00000DC000-000000067F000080000007800C00000E0000__00000096E85806C0\n000000067F000080000007800C00000E0000-000000067F000080000007800C00000E4000__0000009614F1FFE8\n000000067F000080000007800C00000E0000-000000067F000080000007800C00000E4000__00000096E85806C0\n000000067F000080000007800C00000E0179-000000067F000080000007800C00000E96DC__000000959635F2A9-00000096193A8001\n000000067F000080000007800C00000E4000-000000067F000080000007800C00000E8000__0000009614F1FFE8\n000000067F000080000007800C00000E4000-000000067F000080000007800C00000E8000__00000096E85806C0\n000000067F000080000007800C00000E8000-000000067F000080000007800C00000EC000__0000009614F1FFE8\n000000067F000080000007800C00000E8000-000000067F000080000007800C00000EC000__00000096E85806C0\n000000067F000080000007800C00000E96DC-000000067F000080000007800C00000F2C3E__000000959635F2A9-00000096193A8001\n000000067F000080000007800C00000EC000-000000067F000080000007800C00000F0000__0000009614F1FFE8\n000000067F000080000007800C00000EC000-000000067F000080000007800C00000F0000__00000096E85806C0\n000000067F000080000007800C00000F0000-000000067F000080000007800C00000F4000__0000009614F1FFE8\n000000067F000080000007800C00000F0000-000000067F000080000007800C00000F4000__00000096E85806C0\n000000067F000080000007800C00000F2C3E-000000067F000080000007800C00000FC1A0__000000959635F2A9-00000096193A8001\n000000067F000080000007800C00000F4000-000000067F000080000007800C00000F8000__0000009614F1FFE8\n000000067F000080000007800C00000F4000-000000067F000080000007800C00000F8000__00000096E85806C0\n000000067F000080000007800C00000F73E3-000000067F00008000000780140000003F18__00000096193A8001-00000096AEF27399\n000000067F000080000007800C00000F8000-000000067F000080000007800C00000FC000__0000009614F1FFE8\n000000067F000080000007800C00000F8000-000000067F000080000007800C00000FC000__00000096E85806C0\n000000067F000080000007800C00000FC000-000000067F000080000007800C0000100000__0000009614F1FFE8\n000000067F000080000007800C00000FC000-000000067F000080000007800C0000100000__00000096E85806C0\n000000067F000080000007800C00000FC1A0-000000067F000080000007800C00001057C1__000000959635F2A9-00000096193A8001\n000000067F000080000007800C0000100000-000000067F000080000007800C0000104000__0000009614F1FFE8\n000000067F000080000007800C0000100000-000000067F000080000007800C0000104000__00000096E85806C0\n000000067F000080000007800C0000104000-000000067F000080000007800C0000108000__0000009614F1FFE8\n000000067F000080000007800C0000104000-000000067F000080000007800C0000108000__00000096E85806C0\n000000067F000080000007800C00001057C1-000000067F000080000007800C000010EF0B__000000959635F2A9-00000096193A8001\n000000067F000080000007800C0000108000-000000067F000080000007800C000010C000__0000009614F1FFE8\n000000067F000080000007800C0000108000-000000067F000080000007800C000010C000__00000096E85806C0\n000000067F000080000007800C000010C000-000000067F000080000007800C0000110000__0000009614F1FFE8\n000000067F000080000007800C000010C000-000000067F000080000007800C0000110000__00000096E85806C0\n000000067F000080000007800C000010EF0B-01000000000000000100000004000000000B__000000959635F2A9-00000096193A8001\n000000067F000080000007800C0000110000-000000067F00008000000780120100000000__00000096E85806C0\n000000067F000080000007800C0000110000-030000000000000000000000000000000002__0000009614F1FFE8\n000000067F00008000000780140000000000-000000067F00008000000780140000004000__00000096E85806C0\n000000067F00008000000780140000003F18-000000067F00008000000780140000009ED4__00000096193A8001-00000096AEF27399\n000000067F00008000000780140000004000-000000067F00008000000780140000008000__00000096E85806C0\n000000067F00008000000780140000008000-000000067F0000800000078014000000C000__00000096E85806C0\n000000067F00008000000780140000009ED4-000000067F0000800000078014000000FE9A__00000096193A8001-00000096AEF27399\n000000067F0000800000078014000000C000-000000067F00008000000780140000010000__00000096E85806C0\n000000067F0000800000078014000000FE9A-000000067F00008000000780140000015DD1__00000096193A8001-00000096AEF27399\n000000067F00008000000780140000010000-000000067F00008000000780140000014000__00000096E85806C0\n000000067F00008000000780140000013481-030000000000000000000000000000000002__00000096AEF27399-00000096E85829C9\n000000067F00008000000780140000014000-000000067F00008000000780140000018000__00000096E85806C0\n000000067F00008000000780140000015DD1-000000067F0000800000078014000001BD7E__00000096193A8001-00000096AEF27399\n000000067F00008000000780140000018000-000000067F0000800000078014000001C000__00000096E85806C0\n000000067F0000800000078014000001BD7E-000000067F00008000000780140000021CF0__00000096193A8001-00000096AEF27399\n000000067F0000800000078014000001C000-000000067F00008000000780140000020000__00000096E85806C0\n000000067F00008000000780140000020000-000000067F00008000000780140000024000__00000096E85806C0\n000000067F00008000000780140000021CF0-000000067F00008000000780140000027CF8__00000096193A8001-00000096AEF27399\n000000067F00008000000780140000024000-000000067F00008000000780140000028000__00000096E85806C0\n000000067F00008000000780140000027CF8-000000067F0000800000078014000002DC88__00000096193A8001-00000096AEF27399\n000000067F00008000000780140000028000-000000067F0000800000078014000002C000__00000096E85806C0\n000000067F0000800000078014000002C000-030000000000000000000000000000000002__00000096E85806C0\n000000067F0000800000078014000002DC88-030000000000000000000000000000000002__00000096193A8001-00000096AEF27399\n000000067F000080000007A00C0000000000-000000067F000080000007A00C0000004000__0000009921F3B4A8\n000000067F000080000007A00C0000004000-000000067F000080000007A00C0000008000__0000009921F3B4A8\n000000067F000080000007A00C0000008000-000000067F000080000007A00C000000C000__0000009921F3B4A8\n000000067F000080000007A00C000000974B-000000067F000080000007A00C0000012EB1__00000096E85829C9-00000098A7ADFC91\n000000067F000080000007A00C000000C000-000000067F000080000007A00C0000010000__0000009921F3B4A8\n000000067F000080000007A00C0000010000-000000067F000080000007A00C0000014000__0000009921F3B4A8\n000000067F000080000007A00C0000012EB1-000000067F000080000007A00C000001C60B__00000096E85829C9-00000098A7ADFC91\n000000067F000080000007A00C0000014000-000000067F000080000007A00C0000018000__0000009921F3B4A8\n000000067F000080000007A00C0000018000-000000067F000080000007A00C000001C000__0000009921F3B4A8\n000000067F000080000007A00C000001C000-000000067F000080000007A00C0000020000__0000009921F3B4A8\n000000067F000080000007A00C000001C60B-000000067F000080000007A00C0000025D39__00000096E85829C9-00000098A7ADFC91\n000000067F000080000007A00C0000020000-000000067F000080000007A00C0000024000__0000009921F3B4A8\n000000067F000080000007A00C0000024000-000000067F000080000007A00C0000028000__0000009921F3B4A8\n000000067F000080000007A00C0000025D39-000000067F000080000007A00C000002F49F__00000096E85829C9-00000098A7ADFC91\n000000067F000080000007A00C0000028000-000000067F000080000007A00C000002C000__0000009921F3B4A8\n000000067F000080000007A00C000002C000-000000067F000080000007A00C0000030000__0000009921F3B4A8\n000000067F000080000007A00C000002F49F-000000067F000080000007A00C0000038BB2__00000096E85829C9-00000098A7ADFC91\n000000067F000080000007A00C0000030000-000000067F000080000007A00C0000034000__0000009921F3B4A8\n000000067F000080000007A00C0000034000-000000067F000080000007A00C0000038000__0000009921F3B4A8\n000000067F000080000007A00C0000038000-000000067F000080000007A00C000003C000__0000009921F3B4A8\n000000067F000080000007A00C0000038BB2-000000067F000080000007A00C0000042318__00000096E85829C9-00000098A7ADFC91\n000000067F000080000007A00C000003C000-000000067F000080000007A00C0000040000__0000009921F3B4A8\n000000067F000080000007A00C0000040000-000000067F000080000007A00C0000044000__0000009921F3B4A8\n000000067F000080000007A00C0000042318-000000067F000080000007A00C000004BA7E__00000096E85829C9-00000098A7ADFC91\n000000067F000080000007A00C0000044000-000000067F000080000007A00C0000048000__0000009921F3B4A8\n000000067F000080000007A00C0000048000-000000067F000080000007A00C000004C000__0000009921F3B4A8\n000000067F000080000007A00C000004B9B2-000000067F000080000007A00C0000097B6D__0000009921E47AA1-000000997F5D23C9\n000000067F000080000007A00C000004BA7E-000000067F000080000007A00C00000551B3__00000096E85829C9-00000098A7ADFC91\n000000067F000080000007A00C000004C000-000000067F000080000007A00C0000050000__0000009921F3B4A8\n000000067F000080000007A00C0000050000-000000067F000080000007A00C0000054000__0000009921F3B4A8\n000000067F000080000007A00C0000054000-000000067F000080000007A00C0000058000__0000009921F3B4A8\n000000067F000080000007A00C00000551B3-000000067F000080000007A00C000005E90A__00000096E85829C9-00000098A7ADFC91\n000000067F000080000007A00C0000058000-000000067F000080000007A00C000005C000__0000009921F3B4A8\n000000067F000080000007A00C000005C000-000000067F000080000007A00C0000060000__0000009921F3B4A8\n000000067F000080000007A00C000005E90A-000000067F000080000007A00C000006802C__00000096E85829C9-00000098A7ADFC91\n000000067F000080000007A00C0000060000-000000067F000080000007A00C0000064000__0000009921F3B4A8\n000000067F000080000007A00C0000064000-000000067F000080000007A00C0000068000__0000009921F3B4A8\n000000067F000080000007A00C0000068000-000000067F000080000007A00C000006C000__0000009921F3B4A8\n000000067F000080000007A00C000006802C-000000067F000080000007A00C0000071783__00000096E85829C9-00000098A7ADFC91\n000000067F000080000007A00C000006C000-000000067F000080000007A00C0000070000__0000009921F3B4A8\n000000067F000080000007A00C0000070000-000000067F000080000007A00C0000074000__0000009921F3B4A8\n000000067F000080000007A00C0000071783-000000067F000080000007A00C000007AEE8__00000096E85829C9-00000098A7ADFC91\n000000067F000080000007A00C0000074000-000000067F000080000007A00C0000078000__0000009921F3B4A8\n000000067F000080000007A00C0000078000-000000067F000080000007A00C000007C000__0000009921F3B4A8\n000000067F000080000007A00C000007AEE8-000000067F000080000007A00C000008460B__00000096E85829C9-00000098A7ADFC91\n000000067F000080000007A00C000007C000-000000067F000080000007A00C0000080000__0000009921F3B4A8\n000000067F000080000007A00C0000080000-000000067F000080000007A00C0000084000__0000009921F3B4A8\n000000067F000080000007A00C0000084000-000000067F000080000007A00C0000088000__0000009921F3B4A8\n000000067F000080000007A00C000008460B-000000067F000080000007A00C000008DD71__00000096E85829C9-00000098A7ADFC91\n000000067F000080000007A00C0000088000-000000067F000080000007A00C000008C000__0000009921F3B4A8\n000000067F000080000007A00C000008C000-000000067F000080000007A00C0000090000__0000009921F3B4A8\n000000067F000080000007A00C000008DD71-000000067F000080000007A00C00000974D7__00000096E85829C9-00000098A7ADFC91\n000000067F000080000007A00C0000090000-000000067F000080000007A00C0000094000__0000009921F3B4A8\n000000067F000080000007A00C0000094000-000000067F000080000007A00C0000098000__0000009921F3B4A8\n000000067F000080000007A00C00000974D7-000000067F000080000007A00C00000A0C0B__00000096E85829C9-00000098A7ADFC91\n000000067F000080000007A00C0000097B7A-000000067F000080000007A00C00000E3627__0000009921E47AA1-000000997F5D23C9\n000000067F000080000007A00C0000098000-000000067F000080000007A00C000009C000__0000009921F3B4A8\n000000067F000080000007A00C000009C000-000000067F000080000007A00C00000A0000__0000009921F3B4A8\n000000067F000080000007A00C00000A0000-000000067F000080000007A00C00000A4000__0000009921F3B4A8\n000000067F000080000007A00C00000A0C0B-000000067F000080000007A00C00000AA371__00000096E85829C9-00000098A7ADFC91\n000000067F000080000007A00C00000A4000-000000067F000080000007A00C00000A8000__0000009921F3B4A8\n000000067F000080000007A00C00000A8000-000000067F000080000007A00C00000AC000__0000009921F3B4A8\n000000067F000080000007A00C00000AA371-000000067F000080000007A00C00000B3AD7__00000096E85829C9-00000098A7ADFC91\n000000067F000080000007A00C00000AC000-000000067F000080000007A00C00000B0000__0000009921F3B4A8\n000000067F000080000007A00C00000B0000-000000067F000080000007A00C00000B4000__0000009921F3B4A8\n000000067F000080000007A00C00000B3AD7-000000067F000080000007A00C00000BD20B__00000096E85829C9-00000098A7ADFC91\n000000067F000080000007A00C00000B4000-000000067F000080000007A00C00000B8000__0000009921F3B4A8\n000000067F000080000007A00C00000B8000-000000067F000080000007A00C00000BC000__0000009921F3B4A8\n000000067F000080000007A00C00000BC000-000000067F000080000007A00C00000C0000__0000009921F3B4A8\n000000067F000080000007A00C00000BD20B-000000067F000080000007A00C00000C6932__00000096E85829C9-00000098A7ADFC91\n000000067F000080000007A00C00000C0000-000000067F000080000007A00C00000C4000__0000009921F3B4A8\n000000067F000080000007A00C00000C4000-000000067F000080000007A00C00000C8000__0000009921F3B4A8\n000000067F000080000007A00C00000C6932-000000067F000080000007A00C00000D0098__00000096E85829C9-00000098A7ADFC91\n000000067F000080000007A00C00000C8000-000000067F000080000007A00C00000CC000__0000009921F3B4A8\n000000067F000080000007A00C00000CC000-000000067F000080000007A00C00000D0000__0000009921F3B4A8\n000000067F000080000007A00C00000D0000-000000067F000080000007A00C00000D4000__0000009921F3B4A8\n000000067F000080000007A00C00000D0098-000000067F000080000007A00C00000D97FE__00000096E85829C9-00000098A7ADFC91\n000000067F000080000007A00C00000D4000-000000067F000080000007A00C00000D8000__0000009921F3B4A8\n000000067F000080000007A00C00000D8000-000000067F000080000007A00C00000DC000__0000009921F3B4A8\n000000067F000080000007A00C00000D97FE-000000067F000080000007A00C00000E2F0B__00000096E85829C9-00000098A7ADFC91\n000000067F000080000007A00C00000DC000-000000067F000080000007A00C00000E0000__0000009921F3B4A8\n000000067F000080000007A00C00000E0000-000000067F000080000007A00C00000E4000__0000009921F3B4A8\n000000067F000080000007A00C00000E2F0B-000000067F000080000007A00C00000EC671__00000096E85829C9-00000098A7ADFC91\n000000067F000080000007A00C00000E364A-000000067F000080000007A01400000065FE__0000009921E47AA1-000000997F5D23C9\n000000067F000080000007A00C00000E4000-000000067F000080000007A00C00000E8000__0000009921F3B4A8\n000000067F000080000007A00C00000E8000-000000067F000080000007A00C00000EC000__0000009921F3B4A8\n000000067F000080000007A00C00000EC000-000000067F000080000007A00C00000F0000__0000009921F3B4A8\n000000067F000080000007A00C00000EC671-000000067F000080000007A00C00000F5D9F__00000096E85829C9-00000098A7ADFC91\n000000067F000080000007A00C00000F0000-000000067F000080000007A00C00000F4000__0000009921F3B4A8\n000000067F000080000007A00C00000F4000-000000067F000080000007A00C00000F8000__0000009921F3B4A8\n000000067F000080000007A00C00000F5D9F-000000067F000080000007A00C00000FF505__00000096E85829C9-00000098A7ADFC91\n000000067F000080000007A00C00000F720F-000000067F000080000007A00C0000111692__00000098A7ADFC91-0000009921E47AA1\n000000067F000080000007A00C00000F8000-000000067F000080000007A00C00000FC000__0000009921F3B4A8\n000000067F000080000007A00C00000FC000-000000067F000080000007A00C0000100000__0000009921F3B4A8\n000000067F000080000007A00C00000FF505-000000067F000080000007A00C0000108C10__00000096E85829C9-00000098A7ADFC91\n000000067F000080000007A00C0000100000-000000067F000080000007A00C0000104000__0000009921F3B4A8\n000000067F000080000007A00C0000104000-000000067F000080000007A00C0000108000__0000009921F3B4A8\n000000067F000080000007A00C0000108000-000000067F000080000007A00C000010C000__0000009921F3B4A8\n000000067F000080000007A00C0000108C10-030000000000000000000000000000000002__00000096E85829C9-00000098A7ADFC91\n000000067F000080000007A00C000010C000-000000067F000080000007A00C0000110000__0000009921F3B4A8\n000000067F000080000007A00C0000110000-000000067F000080000007A0120100000000__0000009921F3B4A8\n000000067F000080000007A00C0000111692-000000067F000080000007A01400000040E7__00000098A7ADFC91-0000009921E47AA1\n000000067F000080000007A0140000000000-000000067F000080000007A0140000004000__0000009921F3B4A8\n000000067F000080000007A0140000004000-000000067F000080000007A0140000008000__0000009921F3B4A8\n000000067F000080000007A01400000040E7-000000067F000080000007A014000000B5F6__00000098A7ADFC91-0000009921E47AA1\n000000067F000080000007A0140000006601-000000067F000080000007A014000001B4CB__0000009921E47AA1-000000997F5D23C9\n000000067F000080000007A0140000008000-000000067F000080000007A014000000C000__0000009921F3B4A8\n000000067F000080000007A014000000B5F6-000000067F000080000007A0140000012AFC__00000098A7ADFC91-0000009921E47AA1\n000000067F000080000007A014000000C000-000000067F000080000007A0140000010000__0000009921F3B4A8\n000000067F000080000007A0140000010000-000000067F000080000007A0140000014000__0000009921F3B4A8\n000000067F000080000007A0140000012AFC-000000067F000080000007A0140000019F9B__00000098A7ADFC91-0000009921E47AA1\n000000067F000080000007A0140000014000-000000067F000080000007A0140000018000__0000009921F3B4A8\n000000067F000080000007A0140000018000-000000067F000080000007A014000001C000__0000009921F3B4A8\n000000067F000080000007A0140000019F9B-000000067F000080000007A01400000214BE__00000098A7ADFC91-0000009921E47AA1\n000000067F000080000007A014000001B4CB-030000000000000000000000000000000002__0000009921E47AA1-000000997F5D23C9\n000000067F000080000007A014000001C000-000000067F000080000007A0140000020000__0000009921F3B4A8\n000000067F000080000007A0140000020000-000000067F000080000007A0140000024000__0000009921F3B4A8\n000000067F000080000007A01400000214BE-000000067F000080000007A01400000289C9__00000098A7ADFC91-0000009921E47AA1\n000000067F000080000007A0140000024000-000000067F000080000007A0140000028000__0000009921F3B4A8\n000000067F000080000007A0140000028000-000000067F000080000007A014000002C000__0000009921F3B4A8\n000000067F000080000007A01400000289C9-030000000000000000000000000000000002__00000098A7ADFC91-0000009921E47AA1\n000000067F000080000007A014000002C000-030000000000000000000000000000000002__0000009921F3B4A8\n000000067F000080000007C00C0000000000-000000067F000080000007C00C0000004000__0000009B5229DFE8\n000000067F000080000007C00C0000004000-000000067F000080000007C00C0000008000__0000009B5229DFE8\n000000067F000080000007C00C0000007EA5-000000067F000080000007C00C00000115FE__000000997F5D23C9-00000099F1C9FC71\n000000067F000080000007C00C0000008000-000000067F000080000007C00C000000C000__0000009B5229DFE8\n000000067F000080000007C00C000000C000-000000067F000080000007C00C0000010000__0000009B5229DFE8\n000000067F000080000007C00C0000010000-000000067F000080000007C00C0000014000__0000009B5229DFE8\n000000067F000080000007C00C00000115FE-000000067F000080000007C00C000001AD0C__000000997F5D23C9-00000099F1C9FC71\n000000067F000080000007C00C0000014000-000000067F000080000007C00C0000018000__0000009B5229DFE8\n000000067F000080000007C00C0000018000-000000067F000080000007C00C000001C000__0000009B5229DFE8\n000000067F000080000007C00C000001AD0C-000000067F000080000007C00C0000024472__000000997F5D23C9-00000099F1C9FC71\n000000067F000080000007C00C000001C000-000000067F000080000007C00C0000020000__0000009B5229DFE8\n000000067F000080000007C00C0000020000-000000067F000080000007C00C0000024000__0000009B5229DFE8\n000000067F000080000007C00C0000024000-000000067F000080000007C00C0000028000__0000009B5229DFE8\n000000067F000080000007C00C0000024472-000000067F000080000007C00C000002DBD8__000000997F5D23C9-00000099F1C9FC71\n000000067F000080000007C00C0000028000-000000067F000080000007C00C000002C000__0000009B5229DFE8\n000000067F000080000007C00C000002C000-000000067F000080000007C00C0000030000__0000009B5229DFE8\n000000067F000080000007C00C000002DBD8-000000067F000080000007C00C000003732B__000000997F5D23C9-00000099F1C9FC71\n000000067F000080000007C00C0000030000-000000067F000080000007C00C0000034000__0000009B5229DFE8\n000000067F000080000007C00C0000034000-000000067F000080000007C00C0000038000__0000009B5229DFE8\n000000067F000080000007C00C000003732B-000000067F000080000007C00C0000040A91__000000997F5D23C9-00000099F1C9FC71\n000000067F000080000007C00C0000038000-000000067F000080000007C00C000003C000__0000009B5229DFE8\n000000067F000080000007C00C000003C000-000000067F000080000007C00C0000040000__0000009B5229DFE8\n000000067F000080000007C00C0000040000-000000067F000080000007C00C0000044000__0000009B40525F80\n000000067F000080000007C00C0000040000-000000067F000080000007C00C0000044000__0000009C1E3799F0\n000000067F000080000007C00C0000040A91-030000000000000000000000000000000002__000000997F5D23C9-00000099F1C9FC71\n000000067F000080000007C00C0000042360-000000067F000080000007C00C000004BAC6__00000099F1C9FC71-0000009A918DF181\n000000067F000080000007C00C0000044000-000000067F000080000007C00C0000048000__0000009B40525F80\n000000067F000080000007C00C0000044000-000000067F000080000007C00C0000048000__0000009C1E3799F0\n000000067F000080000007C00C0000048000-000000067F000080000007C00C000004C000__0000009B40525F80\n000000067F000080000007C00C0000048000-000000067F000080000007C00C000004C000__0000009C1E3799F0\n000000067F000080000007C00C000004BAC6-000000067F000080000007C00C00000551FB__00000099F1C9FC71-0000009A918DF181\n000000067F000080000007C00C000004C000-000000067F000080000007C00C0000050000__0000009B40525F80\n000000067F000080000007C00C000004C000-000000067F000080000007C00C0000050000__0000009C1E3799F0\n000000067F000080000007C00C0000050000-000000067F000080000007C00C0000054000__0000009B40525F80\n000000067F000080000007C00C0000050000-000000067F000080000007C00C0000054000__0000009C1E3799F0\n000000067F000080000007C00C0000052AA4-000000067F000080000007C00C00000A4244__0000009BCB4E4461-0000009C1E8CC879\n000000067F000080000007C00C0000054000-000000067F000080000007C00C0000058000__0000009B40525F80\n000000067F000080000007C00C0000054000-000000067F000080000007C00C0000058000__0000009C1E3799F0\n000000067F000080000007C00C00000551FB-000000067F000080000007C00C000005E90B__00000099F1C9FC71-0000009A918DF181\n000000067F000080000007C00C0000058000-000000067F000080000007C00C000005C000__0000009B40525F80\n000000067F000080000007C00C0000058000-000000067F000080000007C00C000005C000__0000009C1E3799F0\n000000067F000080000007C00C000005C000-000000067F000080000007C00C0000060000__0000009B40525F80\n000000067F000080000007C00C000005C000-000000067F000080000007C00C0000060000__0000009C1E3799F0\n000000067F000080000007C00C000005E90B-000000067F000080000007C00C000006802B__00000099F1C9FC71-0000009A918DF181\n000000067F000080000007C00C0000060000-000000067F000080000007C00C0000064000__0000009B40525F80\n000000067F000080000007C00C0000060000-000000067F000080000007C00C0000064000__0000009C1E3799F0\n000000067F000080000007C00C0000064000-000000067F000080000007C00C0000068000__0000009B40525F80\n000000067F000080000007C00C0000064000-000000067F000080000007C00C0000068000__0000009C1E3799F0\n000000067F000080000007C00C0000068000-000000067F000080000007C00C000006C000__0000009B40525F80\n000000067F000080000007C00C0000068000-000000067F000080000007C00C000006C000__0000009C1E3799F0\n000000067F000080000007C00C000006802B-000000067F000080000007C00C0000071782__00000099F1C9FC71-0000009A918DF181\n000000067F000080000007C00C000006C000-000000067F000080000007C00C0000070000__0000009B40525F80\n000000067F000080000007C00C000006C000-000000067F000080000007C00C0000070000__0000009C1E3799F0\n000000067F000080000007C00C0000070000-000000067F000080000007C00C0000074000__0000009B40525F80\n000000067F000080000007C00C0000070000-000000067F000080000007C00C0000074000__0000009C1E3799F0\n000000067F000080000007C00C0000071782-000000067F000080000007C00C000007AEE8__00000099F1C9FC71-0000009A918DF181\n000000067F000080000007C00C0000074000-000000067F000080000007C00C0000078000__0000009B40525F80\n000000067F000080000007C00C0000074000-000000067F000080000007C00C0000078000__0000009C1E3799F0\n000000067F000080000007C00C0000078000-000000067F000080000007C00C000007C000__0000009B40525F80\n000000067F000080000007C00C0000078000-000000067F000080000007C00C000007C000__0000009C1E3799F0\n000000067F000080000007C00C000007AEE8-000000067F000080000007C00C000008460B__00000099F1C9FC71-0000009A918DF181\n000000067F000080000007C00C000007C000-000000067F000080000007C00C0000080000__0000009B40525F80\n000000067F000080000007C00C000007C000-000000067F000080000007C00C0000080000__0000009C1E3799F0\n000000067F000080000007C00C0000080000-000000067F000080000007C00C0000084000__0000009B40525F80\n000000067F000080000007C00C0000080000-000000067F000080000007C00C0000084000__0000009C1E3799F0\n000000067F000080000007C00C0000084000-000000067F000080000007C00C0000088000__0000009B40525F80\n000000067F000080000007C00C0000084000-000000067F000080000007C00C0000088000__0000009C1E3799F0\n000000067F000080000007C00C000008460B-000000067F000080000007C00C000008DD71__00000099F1C9FC71-0000009A918DF181\n000000067F000080000007C00C0000088000-000000067F000080000007C00C000008C000__0000009B40525F80\n000000067F000080000007C00C0000088000-000000067F000080000007C00C000008C000__0000009C1E3799F0\n000000067F000080000007C00C000008C000-000000067F000080000007C00C0000090000__0000009B40525F80\n000000067F000080000007C00C000008C000-000000067F000080000007C00C0000090000__0000009C1E3799F0\n000000067F000080000007C00C000008DD71-000000067F000080000007C00C00000974D7__00000099F1C9FC71-0000009A918DF181\n000000067F000080000007C00C0000090000-000000067F000080000007C00C0000094000__0000009B40525F80\n000000067F000080000007C00C0000090000-000000067F000080000007C00C0000094000__0000009C1E3799F0\n000000067F000080000007C00C0000094000-000000067F000080000007C00C0000098000__0000009B40525F80\n000000067F000080000007C00C0000094000-000000067F000080000007C00C0000098000__0000009C1E3799F0\n000000067F000080000007C00C00000974D7-000000067F000080000007C00C00000A0C0B__00000099F1C9FC71-0000009A918DF181\n000000067F000080000007C00C0000098000-000000067F000080000007C00C000009C000__0000009B40525F80\n000000067F000080000007C00C0000098000-000000067F000080000007C00C000009C000__0000009C1E3799F0\n000000067F000080000007C00C000009C000-000000067F000080000007C00C00000A0000__0000009B40525F80\n000000067F000080000007C00C000009C000-000000067F000080000007C00C00000A0000__0000009C1E3799F0\n000000067F000080000007C00C00000A0000-000000067F000080000007C00C00000A4000__0000009B40525F80\n000000067F000080000007C00C00000A0000-000000067F000080000007C00C00000A4000__0000009C1E3799F0\n000000067F000080000007C00C00000A0C0B-000000067F000080000007C00C0100000000__00000099F1C9FC71-0000009A918DF181\n000000067F000080000007C00C00000A4000-000000067F000080000007C00C00000A8000__0000009B40525F80\n000000067F000080000007C00C00000A4000-000000067F000080000007C00C00000A8000__0000009C1E3799F0\n000000067F000080000007C00C00000A424C-000000067F000080000007C00C00000F5B43__0000009BCB4E4461-0000009C1E8CC879\n000000067F000080000007C00C00000A8000-000000067F000080000007C00C00000AC000__0000009B40525F80\n000000067F000080000007C00C00000A8000-000000067F000080000007C00C00000AC000__0000009C1E3799F0\n000000067F000080000007C00C00000A9244-000000067F000080000007C00C00000B2991__0000009A918DF181-0000009B51A8BBB9\n000000067F000080000007C00C00000AC000-000000067F000080000007C00C00000B0000__0000009B40525F80\n000000067F000080000007C00C00000AC000-000000067F000080000007C00C00000B0000__0000009C1E3799F0\n000000067F000080000007C00C00000B0000-000000067F000080000007C00C00000B4000__0000009B40525F80\n000000067F000080000007C00C00000B0000-000000067F000080000007C00C00000B4000__0000009C1E3799F0\n000000067F000080000007C00C00000B2991-000000067F000080000007C00C00000BC0F7__0000009A918DF181-0000009B51A8BBB9\n000000067F000080000007C00C00000B4000-000000067F000080000007C00C00000B8000__0000009B40525F80\n000000067F000080000007C00C00000B4000-000000067F000080000007C00C00000B8000__0000009C1E3799F0\n000000067F000080000007C00C00000B8000-000000067F000080000007C00C00000BC000__0000009B40525F80\n000000067F000080000007C00C00000B8000-000000067F000080000007C00C00000BC000__0000009C1E3799F0\n000000067F000080000007C00C00000BA258-000000067F000080000007C01400000011E2__0000009B51A8BBB9-0000009BCB4E4461\n000000067F000080000007C00C00000BC000-000000067F000080000007C00C00000C0000__0000009B40525F80\n000000067F000080000007C00C00000BC000-000000067F000080000007C00C00000C0000__0000009C1E3799F0\n000000067F000080000007C00C00000BC0F7-000000067F000080000007C00C00000C580C__0000009A918DF181-0000009B51A8BBB9\n000000067F000080000007C00C00000C0000-000000067F000080000007C00C00000C4000__0000009B40525F80\n000000067F000080000007C00C00000C0000-000000067F000080000007C00C00000C4000__0000009C1E3799F0\n000000067F000080000007C00C00000C4000-000000067F000080000007C00C00000C8000__0000009B40525F80\n000000067F000080000007C00C00000C4000-000000067F000080000007C00C00000C8000__0000009C1E3799F0\n000000067F000080000007C00C00000C580C-000000067F000080000007C00C00000CEF72__0000009A918DF181-0000009B51A8BBB9\n000000067F000080000007C00C00000C8000-000000067F000080000007C00C00000CC000__0000009B40525F80\n000000067F000080000007C00C00000C8000-000000067F000080000007C00C00000CC000__0000009C1E3799F0\n000000067F000080000007C00C00000CC000-000000067F000080000007C00C00000D0000__0000009B40525F80\n000000067F000080000007C00C00000CC000-000000067F000080000007C00C00000D0000__0000009C1E3799F0\n000000067F000080000007C00C00000CEF72-000000067F000080000007C00C00000D86D8__0000009A918DF181-0000009B51A8BBB9\n000000067F000080000007C00C00000D0000-000000067F000080000007C00C00000D4000__0000009B40525F80\n000000067F000080000007C00C00000D0000-000000067F000080000007C00C00000D4000__0000009C1E3799F0\n000000067F000080000007C00C00000D4000-000000067F000080000007C00C00000D8000__0000009B40525F80\n000000067F000080000007C00C00000D4000-000000067F000080000007C00C00000D8000__0000009C1E3799F0\n000000067F000080000007C00C00000D8000-000000067F000080000007C00C00000DC000__0000009B40525F80\n000000067F000080000007C00C00000D8000-000000067F000080000007C00C00000DC000__0000009C1E3799F0\n000000067F000080000007C00C00000D86D8-000000067F000080000007C00C00000E1E0B__0000009A918DF181-0000009B51A8BBB9\n000000067F000080000007C00C00000DC000-000000067F000080000007C00C00000E0000__0000009B40525F80\n000000067F000080000007C00C00000DC000-000000067F000080000007C00C00000E0000__0000009C1E3799F0\n000000067F000080000007C00C00000E0000-000000067F000080000007C00C00000E4000__0000009B40525F80\n000000067F000080000007C00C00000E0000-000000067F000080000007C00C00000E4000__0000009C1E3799F0\n000000067F000080000007C00C00000E1E0B-000000067F000080000007C00C00000EB571__0000009A918DF181-0000009B51A8BBB9\n000000067F000080000007C00C00000E4000-000000067F000080000007C00C00000E8000__0000009B40525F80\n000000067F000080000007C00C00000E4000-000000067F000080000007C00C00000E8000__0000009C1E3799F0\n000000067F000080000007C00C00000E8000-000000067F000080000007C00C00000EC000__0000009B40525F80\n000000067F000080000007C00C00000E8000-000000067F000080000007C00C00000EC000__0000009C1E3799F0\n000000067F000080000007C00C00000EB571-000000067F000080000007C00C00000F4CD7__0000009A918DF181-0000009B51A8BBB9\n000000067F000080000007C00C00000EC000-000000067F000080000007C00C00000F0000__0000009B40525F80\n000000067F000080000007C00C00000EC000-000000067F000080000007C00C00000F0000__0000009C1E3799F0\n000000067F000080000007C00C00000F0000-000000067F000080000007C00C00000F4000__0000009B40525F80\n000000067F000080000007C00C00000F0000-000000067F000080000007C00C00000F4000__0000009C1E3799F0\n000000067F000080000007C00C00000F4000-000000067F000080000007C00C00000F8000__0000009B40525F80\n000000067F000080000007C00C00000F4000-000000067F000080000007C00C00000F8000__0000009C1E3799F0\n000000067F000080000007C00C00000F4CD7-000000067F000080000007C00C00000FE40B__0000009A918DF181-0000009B51A8BBB9\n000000067F000080000007C00C00000F5B56-000000067F000080000007C014000000EB5A__0000009BCB4E4461-0000009C1E8CC879\n000000067F000080000007C00C00000F8000-000000067F000080000007C00C00000FC000__0000009B40525F80\n000000067F000080000007C00C00000F8000-000000067F000080000007C00C00000FC000__0000009C1E3799F0\n000000067F000080000007C00C00000FC000-000000067F000080000007C00C0000100000__0000009B40525F80\n000000067F000080000007C00C00000FC000-000000067F000080000007C00C0000100000__0000009C1E3799F0\n000000067F000080000007C00C00000FE40B-000000067F000080000007C00C0000107B27__0000009A918DF181-0000009B51A8BBB9\n000000067F000080000007C00C0000100000-000000067F000080000007C00C0000104000__0000009B40525F80\n000000067F000080000007C00C0000100000-000000067F000080000007C00C0000104000__0000009C1E3799F0\n000000067F000080000007C00C0000104000-000000067F000080000007C00C0000108000__0000009B40525F80\n000000067F000080000007C00C0000104000-000000067F000080000007C00C0000108000__0000009C1E3799F0\n000000067F000080000007C00C0000107B27-000000067F000080000007C00C000011128D__0000009A918DF181-0000009B51A8BBB9\n000000067F000080000007C00C0000108000-000000067F000080000007C00C000010C000__0000009C1E3799F0\n000000067F000080000007C00C0000108000-030000000000000000000000000000000002__0000009B40525F80\n000000067F000080000007C00C000010C000-000000067F000080000007C00C0000110000__0000009C1E3799F0\n000000067F000080000007C00C0000110000-000000067F000080000007C0120100000000__0000009C1E3799F0\n000000067F000080000007C00C000011128D-010000000000000001000000040000000012__0000009A918DF181-0000009B51A8BBB9\n000000067F000080000007C0140000000000-000000067F000080000007C0140000004000__0000009C1E3799F0\n000000067F000080000007C01400000011E2-000000067F000080000007C0140000007F04__0000009B51A8BBB9-0000009BCB4E4461\n000000067F000080000007C0140000004000-000000067F000080000007C0140000008000__0000009C1E3799F0\n000000067F000080000007C0140000007F04-000000067F000080000007C014000000EC12__0000009B51A8BBB9-0000009BCB4E4461\n000000067F000080000007C0140000008000-000000067F000080000007C014000000C000__0000009C1E3799F0\n000000067F000080000007C014000000C000-000000067F000080000007C0140000010000__0000009C1E3799F0\n000000067F000080000007C014000000EB5A-000000067F000080000007C0140000027B5C__0000009BCB4E4461-0000009C1E8CC879\n000000067F000080000007C014000000EC12-000000067F000080000007C0140000015910__0000009B51A8BBB9-0000009BCB4E4461\n000000067F000080000007C0140000010000-000000067F000080000007C0140000014000__0000009C1E3799F0\n000000067F000080000007C0140000014000-000000067F000080000007C0140000018000__0000009C1E3799F0\n000000067F000080000007C0140000015910-000000067F000080000007C014000001C5BB__0000009B51A8BBB9-0000009BCB4E4461\n000000067F000080000007C0140000018000-000000067F000080000007C014000001C000__0000009C1E3799F0\n000000067F000080000007C014000001C000-000000067F000080000007C0140000020000__0000009C1E3799F0\n000000067F000080000007C014000001C5BB-000000067F000080000007C0140000023298__0000009B51A8BBB9-0000009BCB4E4461\n000000067F000080000007C0140000020000-000000067F000080000007C0140000024000__0000009C1E3799F0\n000000067F000080000007C0140000023298-000000067F000080000007C0140000029F9A__0000009B51A8BBB9-0000009BCB4E4461\n000000067F000080000007C0140000024000-000000067F000080000007C0140000028000__0000009C1E3799F0\n000000067F000080000007C0140000027B5E-030000000000000000000000000000000002__0000009BCB4E4461-0000009C1E8CC879\n000000067F000080000007C0140000028000-000000067F000080000007C014000002C000__0000009C1E3799F0\n000000067F000080000007C0140000029F9A-030000000000000000000000000000000002__0000009B51A8BBB9-0000009BCB4E4461\n000000067F000080000007C014000002C000-030000000000000000000000000000000002__0000009C1E3799F0\n000000067F000080000007E00C0000000000-000000067F000080000007E00C0000004000__0000009DEF760000\n000000067F000080000007E00C0000004000-000000067F000080000007E00C0000008000__0000009DEF760000\n000000067F000080000007E00C0000008000-000000067F000080000007E00C000000C000__0000009DEF760000\n000000067F000080000007E00C00000092CD-000000067F000080000007E00C0000012A0A__0000009C1E8CC879-0000009C9ED3F059\n000000067F000080000007E00C000000C000-000000067F000080000007E00C0000010000__0000009DEF760000\n000000067F000080000007E00C0000010000-000000067F000080000007E00C0000014000__0000009DEF760000\n000000067F000080000007E00C0000012A0A-000000067F000080000007E00C000001C170__0000009C1E8CC879-0000009C9ED3F059\n000000067F000080000007E00C0000014000-000000067F000080000007E00C0000018000__0000009DEF760000\n000000067F000080000007E00C0000018000-000000067F000080000007E00C000001C000__0000009DEF760000\n000000067F000080000007E00C000001C000-000000067F000080000007E00C0000020000__0000009DEF760000\n000000067F000080000007E00C000001C170-000000067F000080000007E00C00000258D6__0000009C1E8CC879-0000009C9ED3F059\n000000067F000080000007E00C0000020000-000000067F000080000007E00C0000024000__0000009DEF760000\n000000067F000080000007E00C0000024000-000000067F000080000007E00C0000028000__0000009DEF760000\n000000067F000080000007E00C00000258D6-000000067F000080000007E00C000002F00B__0000009C1E8CC879-0000009C9ED3F059\n000000067F000080000007E00C0000028000-000000067F000080000007E00C000002C000__0000009DEF760000\n000000067F000080000007E00C000002C000-000000067F000080000007E00C0000030000__0000009DEF760000\n000000067F000080000007E00C000002F00B-000000067F000080000007E00C0000038720__0000009C1E8CC879-0000009C9ED3F059\n000000067F000080000007E00C0000030000-000000067F000080000007E00C0000034000__0000009DEF760000\n000000067F000080000007E00C0000034000-000000067F000080000007E00C0000038000__0000009DEF760000\n000000067F000080000007E00C0000038000-000000067F000080000007E00C000003C000__0000009DEF760000\n000000067F000080000007E00C0000038720-000000067F000080000007E00C0000041E86__0000009C1E8CC879-0000009C9ED3F059\n000000067F000080000007E00C000003C000-000000067F000080000007E00C0000040000__0000009DEF760000\n000000067F000080000007E00C0000040000-000000067F000080000007E00C0000044000__0000009DEF760000\n000000067F000080000007E00C0000041E86-000000067F000080000007E00C000004B5EC__0000009C1E8CC879-0000009C9ED3F059\n000000067F000080000007E00C0000044000-000000067F000080000007E00C0000048000__0000009DEF760000\n000000067F000080000007E00C0000048000-000000067F000080000007E00C000004C000__0000009DDBE10620\n000000067F000080000007E00C0000048000-000000067F000080000007E00C000004C000__0000009EBB11FFC0\n000000067F000080000007E00C000004B5EC-030000000000000000000000000000000002__0000009C1E8CC879-0000009C9ED3F059\n000000067F000080000007E00C000004BACA-000000067F000080000007E00C00000551FF__0000009C9ED3F059-0000009D3E97E549\n000000067F000080000007E00C000004C000-000000067F000080000007E00C0000050000__0000009DDBE10620\n000000067F000080000007E00C000004C000-000000067F000080000007E00C0000050000__0000009EBB11FFC0\n000000067F000080000007E00C0000050000-000000067F000080000007E00C0000054000__0000009DDBE10620\n000000067F000080000007E00C0000050000-000000067F000080000007E00C0000054000__0000009EBB11FFC0\n000000067F000080000007E00C0000054000-000000067F000080000007E00C0000058000__0000009DDBE10620\n000000067F000080000007E00C0000054000-000000067F000080000007E00C0000058000__0000009EBB11FFC0\n000000067F000080000007E00C00000551FF-000000067F000080000007E00C000005E90C__0000009C9ED3F059-0000009D3E97E549\n000000067F000080000007E00C0000058000-000000067F000080000007E00C000005C000__0000009DDBE10620\n000000067F000080000007E00C0000058000-000000067F000080000007E00C000005C000__0000009EBB11FFC0\n000000067F000080000007E00C000005C000-000000067F000080000007E00C0000060000__0000009DDBE10620\n000000067F000080000007E00C000005C000-000000067F000080000007E00C0000060000__0000009EBB11FFC0\n000000067F000080000007E00C000005E90C-000000067F000080000007E00C000006802C__0000009C9ED3F059-0000009D3E97E549\n000000067F000080000007E00C0000060000-000000067F000080000007E00C0000064000__0000009DDBE10620\n000000067F000080000007E00C0000060000-000000067F000080000007E00C0000064000__0000009EBB11FFC0\n000000067F000080000007E00C0000061AE1-000000067F000080000007E00C00000C2A6C__0000009E781A9731-0000009EBBC72771\n000000067F000080000007E00C0000064000-000000067F000080000007E00C0000068000__0000009DDBE10620\n000000067F000080000007E00C0000064000-000000067F000080000007E00C0000068000__0000009EBB11FFC0\n000000067F000080000007E00C0000068000-000000067F000080000007E00C000006C000__0000009DDBE10620\n000000067F000080000007E00C0000068000-000000067F000080000007E00C000006C000__0000009EBB11FFC0\n000000067F000080000007E00C000006802C-000000067F000080000007E00C0000071783__0000009C9ED3F059-0000009D3E97E549\n000000067F000080000007E00C000006C000-000000067F000080000007E00C0000070000__0000009DDBE10620\n000000067F000080000007E00C000006C000-000000067F000080000007E00C0000070000__0000009EBB11FFC0\n000000067F000080000007E00C0000070000-000000067F000080000007E00C0000074000__0000009DDBE10620\n000000067F000080000007E00C0000070000-000000067F000080000007E00C0000074000__0000009EBB11FFC0\n000000067F000080000007E00C0000071783-000000067F000080000007E00C000007AEE9__0000009C9ED3F059-0000009D3E97E549\n000000067F000080000007E00C0000074000-000000067F000080000007E00C0000078000__0000009DDBE10620\n000000067F000080000007E00C0000074000-000000067F000080000007E00C0000078000__0000009EBB11FFC0\n000000067F000080000007E00C0000078000-000000067F000080000007E00C000007C000__0000009DDBE10620\n000000067F000080000007E00C0000078000-000000067F000080000007E00C000007C000__0000009EBB11FFC0\n000000067F000080000007E00C000007AEE9-000000067F000080000007E00C000008460B__0000009C9ED3F059-0000009D3E97E549\n000000067F000080000007E00C000007C000-000000067F000080000007E00C0000080000__0000009DDBE10620\n000000067F000080000007E00C000007C000-000000067F000080000007E00C0000080000__0000009EBB11FFC0\n000000067F000080000007E00C0000080000-000000067F000080000007E00C0000084000__0000009DDBE10620\n000000067F000080000007E00C0000080000-000000067F000080000007E00C0000084000__0000009EBB11FFC0\n000000067F000080000007E00C0000084000-000000067F000080000007E00C0000088000__0000009DDBE10620\n000000067F000080000007E00C0000084000-000000067F000080000007E00C0000088000__0000009EBB11FFC0\n000000067F000080000007E00C000008460B-000000067F000080000007E00C000008DD71__0000009C9ED3F059-0000009D3E97E549\n000000067F000080000007E00C0000088000-000000067F000080000007E00C000008C000__0000009DDBE10620\n000000067F000080000007E00C0000088000-000000067F000080000007E00C000008C000__0000009EBB11FFC0\n000000067F000080000007E00C000008C000-000000067F000080000007E00C0000090000__0000009DDBE10620\n000000067F000080000007E00C000008C000-000000067F000080000007E00C0000090000__0000009EBB11FFC0\n000000067F000080000007E00C000008DD71-000000067F000080000007E00C00000974D7__0000009C9ED3F059-0000009D3E97E549\n000000067F000080000007E00C0000090000-000000067F000080000007E00C0000094000__0000009DDBE10620\n000000067F000080000007E00C0000090000-000000067F000080000007E00C0000094000__0000009EBB11FFC0\n000000067F000080000007E00C0000093E3A-000000067F000080000007E00C0000111CED__0000009DEEE6BFF9-0000009E781A9731\n000000067F000080000007E00C0000094000-000000067F000080000007E00C0000098000__0000009DDBE10620\n000000067F000080000007E00C0000094000-000000067F000080000007E00C0000098000__0000009EBB11FFC0\n000000067F000080000007E00C00000974D7-000000067F000080000007E00C00000A0C0B__0000009C9ED3F059-0000009D3E97E549\n000000067F000080000007E00C0000098000-000000067F000080000007E00C000009C000__0000009DDBE10620\n000000067F000080000007E00C0000098000-000000067F000080000007E00C000009C000__0000009EBB11FFC0\n000000067F000080000007E00C000009C000-000000067F000080000007E00C00000A0000__0000009DDBE10620\n000000067F000080000007E00C000009C000-000000067F000080000007E00C00000A0000__0000009EBB11FFC0\n000000067F000080000007E00C00000A0000-000000067F000080000007E00C00000A4000__0000009DDBE10620\n000000067F000080000007E00C00000A0000-000000067F000080000007E00C00000A4000__0000009EBB11FFC0\n000000067F000080000007E00C00000A0C0B-000000067F000080000007E00C00000AA371__0000009C9ED3F059-0000009D3E97E549\n000000067F000080000007E00C00000A4000-000000067F000080000007E00C00000A8000__0000009DDBE10620\n000000067F000080000007E00C00000A4000-000000067F000080000007E00C00000A8000__0000009EBB11FFC0\n000000067F000080000007E00C00000A8000-000000067F000080000007E00C00000AC000__0000009DDBE10620\n000000067F000080000007E00C00000A8000-000000067F000080000007E00C00000AC000__0000009EBB11FFC0\n000000067F000080000007E00C00000AA371-000000067F000080000007E00C0100000000__0000009C9ED3F059-0000009D3E97E549\n000000067F000080000007E00C00000AC000-000000067F000080000007E00C00000B0000__0000009DDBE10620\n000000067F000080000007E00C00000AC000-000000067F000080000007E00C00000B0000__0000009EBB11FFC0\n000000067F000080000007E00C00000B0000-000000067F000080000007E00C00000B4000__0000009DDBE10620\n000000067F000080000007E00C00000B0000-000000067F000080000007E00C00000B4000__0000009EBB11FFC0\n000000067F000080000007E00C00000B2704-000000067F000080000007E00C00000BBE0F__0000009D3E97E549-0000009DEEE6BFF9\n000000067F000080000007E00C00000B4000-000000067F000080000007E00C00000B8000__0000009DDBE10620\n000000067F000080000007E00C00000B4000-000000067F000080000007E00C00000B8000__0000009EBB11FFC0\n000000067F000080000007E00C00000B8000-000000067F000080000007E00C00000BC000__0000009DDBE10620\n000000067F000080000007E00C00000B8000-000000067F000080000007E00C00000BC000__0000009EBB11FFC0\n000000067F000080000007E00C00000BBE0F-000000067F000080000007E00C00000C5542__0000009D3E97E549-0000009DEEE6BFF9\n000000067F000080000007E00C00000BC000-000000067F000080000007E00C00000C0000__0000009DDBE10620\n000000067F000080000007E00C00000BC000-000000067F000080000007E00C00000C0000__0000009EBB11FFC0\n000000067F000080000007E00C00000C0000-000000067F000080000007E00C00000C4000__0000009DDBE10620\n000000067F000080000007E00C00000C0000-000000067F000080000007E00C00000C4000__0000009EBB11FFC0\n000000067F000080000007E00C00000C2A75-000000067F000080000007E0140000004415__0000009E781A9731-0000009EBBC72771\n000000067F000080000007E00C00000C4000-000000067F000080000007E00C00000C8000__0000009DDBE10620\n000000067F000080000007E00C00000C4000-000000067F000080000007E00C00000C8000__0000009EBB11FFC0\n000000067F000080000007E00C00000C5542-000000067F000080000007E00C00000CECA8__0000009D3E97E549-0000009DEEE6BFF9\n000000067F000080000007E00C00000C8000-000000067F000080000007E00C00000CC000__0000009DDBE10620\n000000067F000080000007E00C00000C8000-000000067F000080000007E00C00000CC000__0000009EBB11FFC0\n000000067F000080000007E00C00000CC000-000000067F000080000007E00C00000D0000__0000009DDBE10620\n000000067F000080000007E00C00000CC000-000000067F000080000007E00C00000D0000__0000009EBB11FFC0\n000000067F000080000007E00C00000CECA8-000000067F000080000007E00C00000D83BF__0000009D3E97E549-0000009DEEE6BFF9\n000000067F000080000007E00C00000D0000-000000067F000080000007E00C00000D4000__0000009DDBE10620\n000000067F000080000007E00C00000D0000-000000067F000080000007E00C00000D4000__0000009EBB11FFC0\n000000067F000080000007E00C00000D4000-000000067F000080000007E00C00000D8000__0000009DDBE10620\n000000067F000080000007E00C00000D4000-000000067F000080000007E00C00000D8000__0000009EBB11FFC0\n000000067F000080000007E00C00000D8000-000000067F000080000007E00C00000DC000__0000009DDBE10620\n000000067F000080000007E00C00000D8000-000000067F000080000007E00C00000DC000__0000009EBB11FFC0\n000000067F000080000007E00C00000D83BF-000000067F000080000007E00C00000E1B0A__0000009D3E97E549-0000009DEEE6BFF9\n000000067F000080000007E00C00000DC000-000000067F000080000007E00C00000E0000__0000009DDBE10620\n000000067F000080000007E00C00000DC000-000000067F000080000007E00C00000E0000__0000009EBB11FFC0\n000000067F000080000007E00C00000E0000-000000067F000080000007E00C00000E4000__0000009DDBE10620\n000000067F000080000007E00C00000E0000-000000067F000080000007E00C00000E4000__0000009EBB11FFC0\n000000067F000080000007E00C00000E1B0A-000000067F000080000007E00C00000EB270__0000009D3E97E549-0000009DEEE6BFF9\n000000067F000080000007E00C00000E4000-000000067F000080000007E00C00000E8000__0000009DDBE10620\n000000067F000080000007E00C00000E4000-000000067F000080000007E00C00000E8000__0000009EBB11FFC0\n000000067F000080000007E00C00000E8000-000000067F000080000007E00C00000EC000__0000009DDBE10620\n000000067F000080000007E00C00000E8000-000000067F000080000007E00C00000EC000__0000009EBB11FFC0\n000000067F000080000007E00C00000EB270-000000067F000080000007E00C00000F49AA__0000009D3E97E549-0000009DEEE6BFF9\n000000067F000080000007E00C00000EC000-000000067F000080000007E00C00000F0000__0000009DDBE10620\n000000067F000080000007E00C00000EC000-000000067F000080000007E00C00000F0000__0000009EBB11FFC0\n000000067F000080000007E00C00000F0000-000000067F000080000007E00C00000F4000__0000009DDBE10620\n000000067F000080000007E00C00000F0000-000000067F000080000007E00C00000F4000__0000009EBB11FFC0\n000000067F000080000007E00C00000F4000-000000067F000080000007E00C00000F8000__0000009DDBE10620\n000000067F000080000007E00C00000F4000-000000067F000080000007E00C00000F8000__0000009EBB11FFC0\n000000067F000080000007E00C00000F49AA-000000067F000080000007E00C00000FE10A__0000009D3E97E549-0000009DEEE6BFF9\n000000067F000080000007E00C00000F8000-000000067F000080000007E00C00000FC000__0000009DDBE10620\n000000067F000080000007E00C00000F8000-000000067F000080000007E00C00000FC000__0000009EBB11FFC0\n000000067F000080000007E00C00000FC000-000000067F000080000007E00C0000100000__0000009DDBE10620\n000000067F000080000007E00C00000FC000-000000067F000080000007E00C0000100000__0000009EBB11FFC0\n000000067F000080000007E00C00000FE10A-000000067F000080000007E00C000010782C__0000009D3E97E549-0000009DEEE6BFF9\n000000067F000080000007E00C0000100000-000000067F000080000007E00C0000104000__0000009DDBE10620\n000000067F000080000007E00C0000100000-000000067F000080000007E00C0000104000__0000009EBB11FFC0\n000000067F000080000007E00C0000104000-000000067F000080000007E00C0000108000__0000009EBB11FFC0\n000000067F000080000007E00C0000104000-030000000000000000000000000000000002__0000009DDBE10620\n000000067F000080000007E00C000010782C-000000067F000080000007E00C0000110F88__0000009D3E97E549-0000009DEEE6BFF9\n000000067F000080000007E00C0000108000-000000067F000080000007E00C000010C000__0000009EBB11FFC0\n000000067F000080000007E00C000010C000-000000067F000080000007E00C0000110000__0000009EBB11FFC0\n000000067F000080000007E00C0000110000-000000067F000080000007E0120100000000__0000009EBB11FFC0\n000000067F000080000007E00C0000110F88-010000000000000001000000040000000015__0000009D3E97E549-0000009DEEE6BFF9\n000000067F000080000007E00C0000111CED-000000067F000080000007E0140000004818__0000009DEEE6BFF9-0000009E781A9731\n000000067F000080000007E0140000000000-000000067F000080000007E0140000004000__0000009EBB11FFC0\n000000067F000080000007E0140000004000-000000067F000080000007E0140000008000__0000009EBB11FFC0\n000000067F000080000007E0140000004418-000000067F000080000007E0140000025351__0000009E781A9731-0000009EBBC72771\n000000067F000080000007E0140000004818-000000067F000080000007E014000000AD57__0000009DEEE6BFF9-0000009E781A9731\n000000067F000080000007E0140000008000-000000067F000080000007E014000000C000__0000009EBB11FFC0\n000000067F000080000007E014000000AD57-000000067F000080000007E0140000011291__0000009DEEE6BFF9-0000009E781A9731\n000000067F000080000007E014000000C000-000000067F000080000007E0140000010000__0000009EBB11FFC0\n000000067F000080000007E0140000010000-000000067F000080000007E0140000014000__0000009EBB11FFC0\n000000067F000080000007E0140000011291-000000067F000080000007E0140000017809__0000009DEEE6BFF9-0000009E781A9731\n000000067F000080000007E0140000014000-000000067F000080000007E0140000018000__0000009EBB11FFC0\n000000067F000080000007E0140000017809-000000067F000080000007E014000001DD22__0000009DEEE6BFF9-0000009E781A9731\n000000067F000080000007E0140000018000-000000067F000080000007E014000001C000__0000009EBB11FFC0\n000000067F000080000007E014000001C000-000000067F000080000007E0140000020000__0000009EBB11FFC0\n000000067F000080000007E014000001DD22-000000067F000080000007E0140000024244__0000009DEEE6BFF9-0000009E781A9731\n000000067F000080000007E0140000020000-000000067F000080000007E0140000024000__0000009EBB11FFC0\n000000067F000080000007E0140000024000-000000067F000080000007E0140000028000__0000009EBB11FFC0\n000000067F000080000007E0140000024244-000000067F000080000007E014000002A798__0000009DEEE6BFF9-0000009E781A9731\n000000067F000080000007E0140000025355-030000000000000000000000000000000002__0000009E781A9731-0000009EBBC72771\n000000067F000080000007E0140000028000-000000067F000080000007E014000002C000__0000009EBB11FFC0\n000000067F000080000007E014000002A798-030000000000000000000000000000000002__0000009DEEE6BFF9-0000009E781A9731\n000000067F000080000007E014000002C000-030000000000000000000000000000000002__0000009EBB11FFC0\n000000067F000080000008000C00000081F6-000000067F000080000008000C0000010448__0000009EBBC72771-000000A154401909\n000000067F000080000008000C0000010448-000000067F000080000008000C000001870A__0000009EBBC72771-000000A154401909\n000000067F000080000008000C000001870A-000000067F000080000008000C0000020905__0000009EBBC72771-000000A154401909\n000000067F000080000008000C0000020905-000000067F000080000008000C0000028AF3__0000009EBBC72771-000000A154401909\n000000067F000080000008000C0000028AF3-000000067F000080000008000C0000030CEA__0000009EBBC72771-000000A154401909\n000000067F000080000008000C0000030CEA-000000067F000080000008000C0000038EB6__0000009EBBC72771-000000A154401909\n000000067F000080000008000C0000038EB6-000000067F000080000008000C00000410B5__0000009EBBC72771-000000A154401909\n000000067F000080000008000C00000410B5-000000067F000080000008000C00000492CB__0000009EBBC72771-000000A154401909\n000000067F000080000008000C00000492CB-000000067F000080000008000C00000514F8__0000009EBBC72771-000000A154401909\n000000067F000080000008000C00000514F8-000000067F000080000008000C000005977B__0000009EBBC72771-000000A154401909\n000000067F000080000008000C000005977B-000000067F000080000008000C00000619C6__0000009EBBC72771-000000A154401909\n000000067F000080000008000C00000619C6-000000067F000080000008000C0000069B6B__0000009EBBC72771-000000A154401909\n000000067F000080000008000C0000069B6B-000000067F000080000008000C0000071DBE__0000009EBBC72771-000000A154401909\n000000067F000080000008000C0000071DBE-000000067F000080000008000C0000079F8E__0000009EBBC72771-000000A154401909\n000000067F000080000008000C0000079F8E-000000067F000080000008000C00000821D7__0000009EBBC72771-000000A154401909\n000000067F000080000008000C00000821D7-000000067F000080000008000C000008A3AB__0000009EBBC72771-000000A154401909\n000000067F000080000008000C000008A3AB-000000067F000080000008000C0000092556__0000009EBBC72771-000000A154401909\n000000067F000080000008000C0000092556-000000067F000080000008000C000009A744__0000009EBBC72771-000000A154401909\n000000067F000080000008000C000009A744-000000067F000080000008000C00000A29B0__0000009EBBC72771-000000A154401909\n000000067F000080000008000C00000A29B0-000000067F000080000008000C00000AAC4B__0000009EBBC72771-000000A154401909\n000000067F000080000008000C00000AAC4B-000000067F000080000008000C00000B2E21__0000009EBBC72771-000000A154401909\n000000067F000080000008000C00000B2E21-000000067F000080000008000C00000BB0DB__0000009EBBC72771-000000A154401909\n000000067F000080000008000C00000BB0DB-000000067F000080000008000C00000C331B__0000009EBBC72771-000000A154401909\n000000067F000080000008000C00000C331B-000000067F000080000008000C00000CB4D2__0000009EBBC72771-000000A154401909\n000000067F000080000008000C00000CB4D2-000000067F000080000008000C00000D3754__0000009EBBC72771-000000A154401909\n000000067F000080000008000C00000D3754-000000067F000080000008000C00000DB9C6__0000009EBBC72771-000000A154401909\n000000067F000080000008000C00000DB9C6-000000067F000080000008000C00000E3BC1__0000009EBBC72771-000000A154401909\n000000067F000080000008000C00000E3BC1-000000067F000080000008000C00000EBE00__0000009EBBC72771-000000A154401909\n000000067F000080000008000C00000EBE00-000000067F000080000008000C00000F3F63__0000009EBBC72771-000000A154401909\n000000067F000080000008000C00000F3F63-000000067F000080000008000C00000FC160__0000009EBBC72771-000000A154401909\n000000067F000080000008000C00000FC160-000000067F000080000008000C0000104448__0000009EBBC72771-000000A154401909\n000000067F000080000008000C0000104448-000000067F000080000008000C000010C675__0000009EBBC72771-000000A154401909\n000000067F000080000008000C000010C675-000000067F000080000008000C020000000B__0000009EBBC72771-000000A154401909\n000000067F000080000008000C020000000B-000000067F00008000000800140000003ED1__0000009EBBC72771-000000A154401909\n000000067F00008000000800140000003ED1-000000067F00008000000800140000009486__0000009EBBC72771-000000A154401909\n000000067F00008000000800140000009486-000000067F0000800000080014000000EA73__0000009EBBC72771-000000A154401909\n000000067F0000800000080014000000EA73-000000067F0000800000080014000001404D__0000009EBBC72771-000000A154401909\n000000067F0000800000080014000001404D-000000067F000080000008001400000195A4__0000009EBBC72771-000000A154401909\n000000067F000080000008001400000195A4-000000067F0000800000080014000001EBB4__0000009EBBC72771-000000A154401909\n000000067F0000800000080014000001EBB4-000000067F000080000008001400000241E2__0000009EBBC72771-000000A154401909\n000000067F000080000008001400000241E2-000000067F00008000000800140000029762__0000009EBBC72771-000000A154401909\n000000067F00008000000800140000029762-030000000000000000000000000000000002__0000009EBBC72771-000000A154401909\n000000067F000080000008200C0000000000-000000067F000080000008200C0000004000__000000A29F1D8950\n000000067F000080000008200C0000004000-000000067F000080000008200C0000008000__000000A29F1D8950\n000000067F000080000008200C0000008000-000000067F000080000008200C000000C000__000000A29F1D8950\n000000067F000080000008200C000000974D-000000067F000080000008200C0000012EB3__000000A154401909-000000A1E407F839\n000000067F000080000008200C000000C000-000000067F000080000008200C0000010000__000000A29F1D8950\n000000067F000080000008200C0000010000-000000067F000080000008200C0000014000__000000A29F1D8950\n000000067F000080000008200C0000012EB3-000000067F000080000008200C000001C60A__000000A154401909-000000A1E407F839\n000000067F000080000008200C0000014000-000000067F000080000008200C0000018000__000000A29F1D8950\n000000067F000080000008200C0000018000-000000067F000080000008200C000001C000__000000A29F1D8950\n000000067F000080000008200C000001C000-000000067F000080000008200C0000020000__000000A29F1D8950\n000000067F000080000008200C000001C60A-000000067F000080000008200C0000025D38__000000A154401909-000000A1E407F839\n000000067F000080000008200C0000020000-000000067F000080000008200C0000024000__000000A29F1D8950\n000000067F000080000008200C0000024000-000000067F000080000008200C0000028000__000000A29F1D8950\n000000067F000080000008200C0000025D38-000000067F000080000008200C000002F49E__000000A154401909-000000A1E407F839\n000000067F000080000008200C0000028000-000000067F000080000008200C000002C000__000000A29F1D8950\n000000067F000080000008200C000002C000-000000067F000080000008200C0000030000__000000A29F1D8950\n000000067F000080000008200C000002F49E-000000067F000080000008200C0000038BB1__000000A154401909-000000A1E407F839\n000000067F000080000008200C0000030000-000000067F000080000008200C0000034000__000000A29F1D8950\n000000067F000080000008200C0000034000-000000067F000080000008200C0000038000__000000A29F1D8950\n000000067F000080000008200C0000038000-000000067F000080000008200C000003C000__000000A29F1D8950\n000000067F000080000008200C0000038BB1-000000067F000080000008200C0000042317__000000A154401909-000000A1E407F839\n000000067F000080000008200C000003C000-000000067F000080000008200C0000040000__000000A29F1D8950\n000000067F000080000008200C0000040000-000000067F000080000008200C0000044000__000000A29F1D8950\n000000067F000080000008200C0000042317-000000067F000080000008200C000004BA7D__000000A154401909-000000A1E407F839\n000000067F000080000008200C0000044000-000000067F000080000008200C0000048000__000000A29F1D8950\n000000067F000080000008200C0000048000-000000067F000080000008200C000004C000__000000A29F1D8950\n000000067F000080000008200C000004BA7D-000000067F000080000008200C00000551B2__000000A154401909-000000A1E407F839\n000000067F000080000008200C000004C000-000000067F000080000008200C0000050000__000000A29F1D8950\n000000067F000080000008200C0000050000-000000067F000080000008200C0000054000__000000A29F1D8950\n000000067F000080000008200C0000054000-000000067F000080000008200C0000058000__000000A29F1D8950\n000000067F000080000008200C00000551B2-030000000000000000000000000000000002__000000A154401909-000000A1E407F839\n000000067F000080000008200C0000058000-000000067F000080000008200C000005C000__000000A29F1D8950\n000000067F000080000008200C000005C000-000000067F000080000008200C0000060000__000000A29F1D8950\n000000067F000080000008200C000005D8FE-000000067F000080000008200C000006700C__000000A1E407F839-000000A323C9E001\n000000067F000080000008200C0000060000-000000067F000080000008200C0000064000__000000A29F1D8950\n000000067F000080000008200C0000064000-000000067F000080000008200C0000068000__000000A29F1D8950\n000000067F000080000008200C000006700C-000000067F000080000008200C000007076D__000000A1E407F839-000000A323C9E001\n000000067F000080000008200C0000068000-000000067F000080000008200C000006C000__000000A29F1D8950\n000000067F000080000008200C000006C000-000000067F000080000008200C0000070000__000000A29F1D8950\n000000067F000080000008200C0000070000-000000067F000080000008200C0000074000__000000A29F1D8950\n000000067F000080000008200C000007076D-000000067F000080000008200C0000079ED3__000000A1E407F839-000000A323C9E001\n000000067F000080000008200C0000074000-000000067F000080000008200C0000078000__000000A29F1D8950\n000000067F000080000008200C0000078000-000000067F000080000008200C000007C000__000000A29F1D8950\n000000067F000080000008200C0000079ED3-000000067F000080000008200C000008360A__000000A1E407F839-000000A323C9E001\n000000067F000080000008200C000007C000-000000067F000080000008200C0000080000__000000A29F1D8950\n000000067F000080000008200C0000080000-000000067F000080000008200C0000084000__000000A29F1D8950\n000000067F000080000008200C000008360A-000000067F000080000008200C000008CD70__000000A1E407F839-000000A323C9E001\n000000067F000080000008200C0000084000-000000067F000080000008200C0000088000__000000A29F1D8950\n000000067F000080000008200C0000088000-000000067F000080000008200C000008C000__000000A29F1D8950\n000000067F000080000008200C000008C000-000000067F000080000008200C0000090000__000000A29F1D8950\n000000067F000080000008200C000008CD70-000000067F000080000008200C00000964D6__000000A1E407F839-000000A323C9E001\n000000067F000080000008200C0000090000-000000067F000080000008200C0000094000__000000A29F1D8950\n000000067F000080000008200C0000094000-000000067F000080000008200C0000098000__000000A29F1D8950\n000000067F000080000008200C00000964D6-000000067F000080000008200C000009FC0B__000000A1E407F839-000000A323C9E001\n000000067F000080000008200C0000098000-000000067F000080000008200C000009C000__000000A29F1D8950\n000000067F000080000008200C000009C000-000000067F000080000008200C00000A0000__000000A29F1D8950\n000000067F000080000008200C000009FC0B-000000067F000080000008200C00000A9319__000000A1E407F839-000000A323C9E001\n000000067F000080000008200C00000A0000-000000067F000080000008200C00000A4000__000000A29F1D8950\n000000067F000080000008200C00000A4000-000000067F000080000008200C00000A8000__000000A29F1D8950\n000000067F000080000008200C00000A8000-000000067F000080000008200C00000AC000__000000A29F1D8950\n000000067F000080000008200C00000A9319-000000067F000080000008200C00000B2A7F__000000A1E407F839-000000A323C9E001\n000000067F000080000008200C00000AC000-000000067F000080000008200C00000B0000__000000A29F1D8950\n000000067F000080000008200C00000B0000-000000067F000080000008200C00000B4000__000000A29F1D8950\n000000067F000080000008200C00000B2A7F-000000067F000080000008200C00000BC1E5__000000A1E407F839-000000A323C9E001\n000000067F000080000008200C00000B4000-000000067F000080000008200C00000B8000__000000A29F1D8950\n000000067F000080000008200C00000B8000-000000067F000080000008200C00000BC000__000000A29F1D8950\n000000067F000080000008200C00000BC000-000000067F000080000008200C00000C0000__000000A29F1D8950\n000000067F000080000008200C00000BC1E5-000000067F000080000008200C00000C590C__000000A1E407F839-000000A323C9E001\n000000067F000080000008200C00000C0000-010000000000000000000000000000000001__000000A29F1D8950\n000000067F000080000008200C00000C590C-000000067F000080000008200C00000CF071__000000A1E407F839-000000A323C9E001\n000000067F000080000008200C00000CF071-000000067F000080000008200C00000D8786__000000A1E407F839-000000A323C9E001\n000000067F000080000008200C00000D8786-000000067F000080000008200C00000E1EEC__000000A1E407F839-000000A323C9E001\n000000067F000080000008200C00000E1EEC-000000067F000080000008200C00000EB60C__000000A1E407F839-000000A323C9E001\n000000067F000080000008200C00000EB60C-000000067F000080000008200C00000F4D43__000000A1E407F839-000000A323C9E001\n000000067F000080000008200C00000F4D43-000000067F000080000008200C00000FE4A9__000000A1E407F839-000000A323C9E001\n000000067F000080000008200C00000FE4A9-000000067F000080000008200C0000107BC5__000000A1E407F839-000000A323C9E001\n000000067F000080000008200C0000107BC5-000000067F000080000008200C000011130B__000000A1E407F839-000000A323C9E001\n000000067F000080000008200C000011130B-01000000000000000100000004000000001C__000000A1E407F839-000000A323C9E001\n000000067F0000800000082014000000393C-000000067F0000800000082014000000B84D__000000A323C9E001-000000A37A60B1A9\n000000067F0000800000082014000000B84D-000000067F0000800000082014000001375E__000000A323C9E001-000000A37A60B1A9\n000000067F0000800000082014000001375E-000000067F0000800000082014000001B66D__000000A323C9E001-000000A37A60B1A9\n000000067F0000800000082014000001B66D-000000067F0000800000082014000002357E__000000A323C9E001-000000A37A60B1A9\n000000067F0000800000082014000002357E-000000067F0000800000082014000002B48D__000000A323C9E001-000000A37A60B1A9\n000000067F0000800000082014000002B48D-030000000000000000000000000000000002__000000A323C9E001-000000A37A60B1A9\n000000067F000080000008600C0000000000-000000067F000080000008600C0000004000__000000A434813A68\n000000067F000080000008600C0000004000-000000067F000080000008600C0000008000__000000A434813A68\n000000067F000080000008600C0000008000-000000067F000080000008600C000000C000__000000A434813A68\n000000067F000080000008600C0000009747-000000067F000080000008600C0000012EAD__000000A37A60B1A9-000000A3CA47ECA9\n000000067F000080000008600C000000C000-000000067F000080000008600C0000010000__000000A434813A68\n000000067F000080000008600C0000010000-000000067F000080000008600C0000014000__000000A434813A68\n000000067F000080000008600C0000012EAD-000000067F000080000008600C000001C60A__000000A37A60B1A9-000000A3CA47ECA9\n000000067F000080000008600C0000014000-000000067F000080000008600C0000018000__000000A434813A68\n000000067F000080000008600C0000018000-000000067F000080000008600C000001C000__000000A434813A68\n000000067F000080000008600C000001C000-000000067F000080000008600C0000020000__000000A434813A68\n000000067F000080000008600C000001C60A-000000067F000080000008600C0000025D38__000000A37A60B1A9-000000A3CA47ECA9\n000000067F000080000008600C0000020000-000000067F000080000008600C0000024000__000000A434813A68\n000000067F000080000008600C0000024000-000000067F000080000008600C0000028000__000000A434813A68\n000000067F000080000008600C0000025D38-000000067F000080000008600C000002F49E__000000A37A60B1A9-000000A3CA47ECA9\n000000067F000080000008600C0000028000-000000067F000080000008600C000002C000__000000A434813A68\n000000067F000080000008600C000002C000-000000067F000080000008600C0000030000__000000A434813A68\n000000067F000080000008600C000002F49E-030000000000000000000000000000000002__000000A37A60B1A9-000000A3CA47ECA9\n000000067F000080000008600C000002F4CA-000000067F000080000008600C0000038BDD__000000A3CA47ECA9-000000A539BDE561\n000000067F000080000008600C0000030000-000000067F000080000008600C0000034000__000000A434813A68\n000000067F000080000008600C0000034000-000000067F000080000008600C0000038000__000000A434813A68\n000000067F000080000008600C0000038000-000000067F000080000008600C000003C000__000000A434813A68\n000000067F000080000008600C0000038BDD-000000067F000080000008600C000004230B__000000A3CA47ECA9-000000A539BDE561\n000000067F000080000008600C000003C000-000000067F000080000008600C0000040000__000000A434813A68\n000000067F000080000008600C0000040000-000000067F000080000008600C0000044000__000000A434813A68\n000000067F000080000008600C000004230B-000000067F000080000008600C000004BA71__000000A3CA47ECA9-000000A539BDE561\n000000067F000080000008600C0000044000-000000067F000080000008600C0000048000__000000A434813A68\n000000067F000080000008600C0000048000-000000067F000080000008600C000004C000__000000A434813A68\n000000067F000080000008600C000004BA71-000000067F000080000008600C00000551A6__000000A3CA47ECA9-000000A539BDE561\n000000067F000080000008600C000004C000-000000067F000080000008600C0000050000__000000A434813A68\n000000067F000080000008600C0000050000-000000067F000080000008600C0000054000__000000A434813A68\n000000067F000080000008600C0000054000-000000067F000080000008600C0000058000__000000A434813A68\n000000067F000080000008600C00000551A6-000000067F000080000008600C000005E90A__000000A3CA47ECA9-000000A539BDE561\n000000067F000080000008600C0000058000-000000067F000080000008600C000005C000__000000A434813A68\n000000067F000080000008600C000005C000-000000067F000080000008600C0000060000__000000A434813A68\n000000067F000080000008600C000005E90A-000000067F000080000008600C000006802C__000000A3CA47ECA9-000000A539BDE561\n000000067F000080000008600C0000060000-000000067F000080000008600C0000064000__000000A434813A68\n000000067F000080000008600C0000064000-000000067F000080000008600C0000068000__000000A434813A68\n000000067F000080000008600C0000068000-000000067F000080000008600C000006C000__000000A434813A68\n000000067F000080000008600C000006802C-000000067F000080000008600C0000071783__000000A3CA47ECA9-000000A539BDE561\n000000067F000080000008600C000006C000-030000000000000000000000000000000002__000000A434813A68\n000000067F000080000008600C0000071783-000000067F000080000008600C000007AEE9__000000A3CA47ECA9-000000A539BDE561\n000000067F000080000008600C000007AEE9-000000067F000080000008600C000008460B__000000A3CA47ECA9-000000A539BDE561\n000000067F000080000008600C000008460B-000000067F000080000008600C000008DD71__000000A3CA47ECA9-000000A539BDE561\n000000067F000080000008600C000008DD71-000000067F000080000008600C00000974D7__000000A3CA47ECA9-000000A539BDE561\n000000067F000080000008600C00000974D7-000000067F000080000008600C00000A0C0B__000000A3CA47ECA9-000000A539BDE561\n000000067F000080000008600C00000A0C0B-000000067F000080000008600C00000AA371__000000A3CA47ECA9-000000A539BDE561\n000000067F000080000008600C00000AA371-000000067F000080000008600C00000B3AD7__000000A3CA47ECA9-000000A539BDE561\n000000067F000080000008600C00000B3AD7-000000067F000080000008600C00000BD20B__000000A3CA47ECA9-000000A539BDE561\n000000067F000080000008600C00000BD20B-000000067F000080000008600C00000C6932__000000A3CA47ECA9-000000A539BDE561\n000000067F000080000008600C00000C6932-000000067F000080000008600C00000D0098__000000A3CA47ECA9-000000A539BDE561\n000000067F000080000008600C00000D0098-000000067F000080000008600C00000D97FE__000000A3CA47ECA9-000000A539BDE561\n000000067F000080000008600C00000D97FE-000000067F000080000008600C00000E2F0B__000000A3CA47ECA9-000000A539BDE561\n000000067F000080000008600C00000E2F0B-000000067F000080000008600C00000EC671__000000A3CA47ECA9-000000A539BDE561\n000000067F000080000008600C00000EC671-000000067F000080000008600C00000F5D9F__000000A3CA47ECA9-000000A539BDE561\n000000067F000080000008600C00000F5D9F-000000067F000080000008600C00000FF505__000000A3CA47ECA9-000000A539BDE561\n000000067F000080000008600C00000FF505-000000067F000080000008600C0000108C10__000000A3CA47ECA9-000000A539BDE561\n000000067F000080000008600C0000108C10-000000067F000080000008600C0100000000__000000A3CA47ECA9-000000A539BDE561\n000000067F000080000008600C000010ECC4-000000067F00008000000860140000002607__000000A539BDE561-000000A5A081B661\n000000067F00008000000860140000002607-000000067F0000800000086014000000A518__000000A539BDE561-000000A5A081B661\n000000067F0000800000086014000000A518-000000067F00008000000860140000012429__000000A539BDE561-000000A5A081B661\n000000067F00008000000860140000012429-000000067F0000800000086014000001A338__000000A539BDE561-000000A5A081B661\n000000067F0000800000086014000001A338-000000067F00008000000860140000022249__000000A539BDE561-000000A5A081B661\n000000067F00008000000860140000022249-000000067F0000800000086014000002A159__000000A539BDE561-000000A5A081B661\n000000067F0000800000086014000002A159-030000000000000000000000000000000002__000000A539BDE561-000000A5A081B661\n000000067F000080000008801C0000009703-000000067F000080000008801C0000012E0E__000000A5A081B661-000000A6503DE919\n000000067F000080000008801C0000012E0E-000000067F000080000008801C000001C574__000000A5A081B661-000000A6503DE919\n000000067F000080000008801C000001C574-000000067F000080000008801C0000025CDA__000000A5A081B661-000000A6503DE919\n000000067F000080000008801C0000025CDA-000000067F000080000008801C000002F40A__000000A5A081B661-000000A6503DE919\n000000067F000080000008801C000002F40A-000000067F000080000008801C0000038B1D__000000A5A081B661-000000A6503DE919\n000000067F000080000008801C0000038B1D-000000067F000080000008801C0000042283__000000A5A081B661-000000A6503DE919\n000000067F000080000008801C0000042283-000000067F000080000008801C000004B9E9__000000A5A081B661-000000A6503DE919\n000000067F000080000008801C000004B9E9-000000067F000080000008801C000005510B__000000A5A081B661-000000A6503DE919\n000000067F000080000008801C000005510B-000000067F000080000008801C000005E871__000000A5A081B661-000000A6503DE919\n000000067F000080000008801C000005E871-000000067F000080000008801C0000067F8B__000000A5A081B661-000000A6503DE919\n000000067F000080000008801C0000067F8B-030000000000000000000000000000000002__000000A5A081B661-000000A6503DE919\n000000067F000080000008801C0000068000-000000067F000080000008801C000006C000__000000A76EC5DFE8\n000000067F000080000008801C00000680F7-000000067F000080000008801C000007180C__000000A6503DE919-000000A6F001F909\n000000067F000080000008801C000006C000-000000067F000080000008801C0000070000__000000A76EC5DFE8\n000000067F000080000008801C0000070000-000000067F000080000008801C0000074000__000000A76EC5DFE8\n000000067F000080000008801C000007180C-000000067F000080000008801C000007AF72__000000A6503DE919-000000A6F001F909\n000000067F000080000008801C0000074000-000000067F000080000008801C0000078000__000000A76EC5DFE8\n000000067F000080000008801C0000078000-000000067F000080000008801C000007C000__000000A76F097A80\n000000067F000080000008801C000007AF72-000000067F000080000008801C00000846D8__000000A6503DE919-000000A6F001F909\n000000067F000080000008801C000007C000-000000067F000080000008801C0000080000__000000A76F097A80\n000000067F000080000008801C0000080000-000000067F000080000008801C0000084000__000000A76F097A80\n000000067F000080000008801C0000084000-000000067F000080000008801C0000088000__000000A76F097A80\n000000067F000080000008801C00000846D8-000000067F000080000008801C000008DE0B__000000A6503DE919-000000A6F001F909\n000000067F000080000008801C0000088000-000000067F000080000008801C000008C000__000000A76F097A80\n000000067F000080000008801C000008C000-000000067F000080000008801C0000090000__000000A76F097A80\n000000067F000080000008801C000008DE0B-000000067F000080000008801C000009752B__000000A6503DE919-000000A6F001F909\n000000067F000080000008801C0000090000-000000067F000080000008801C0000094000__000000A76F097A80\n000000067F000080000008801C0000094000-000000067F000080000008801C0000098000__000000A76F097A80\n000000067F000080000008801C000009752B-000000067F000080000008801C00000A0C91__000000A6503DE919-000000A6F001F909\n000000067F000080000008801C0000098000-000000067F000080000008801C000009C000__000000A76F097A80\n000000067F000080000008801C000009C000-000000067F000080000008801C00000A0000__000000A76F097A80\n000000067F000080000008801C00000A0000-000000067F000080000008801C00000A4000__000000A76F097A80\n000000067F000080000008801C00000A0C91-000000067F000080000008801C00000AA3F7__000000A6503DE919-000000A6F001F909\n000000067F000080000008801C00000A4000-000000067F000080000008801C00000A8000__000000A76F097A80\n000000067F000080000008801C00000A8000-000000067F000080000008801C00000AC000__000000A76F097A80\n000000067F000080000008801C00000AA3F7-000000067F000080000008801C00000B3B0C__000000A6503DE919-000000A6F001F909\n000000067F000080000008801C00000AC000-000000067F000080000008801C00000B0000__000000A76F097A80\n000000067F000080000008801C00000B0000-000000067F000080000008801C00000B4000__000000A76F097A80\n000000067F000080000008801C00000B3B0C-000000067F000080000008801C00000BD272__000000A6503DE919-000000A6F001F909\n000000067F000080000008801C00000B4000-000000067F000080000008801C00000B8000__000000A76F097A80\n000000067F000080000008801C00000B8000-000000067F000080000008801C00000BC000__000000A76F097A80\n000000067F000080000008801C00000BC000-000000067F000080000008801C00000C0000__000000A76F097A80\n000000067F000080000008801C00000BD272-000000067F000080000008801C00000C6999__000000A6503DE919-000000A6F001F909\n000000067F000080000008801C00000C0000-000000067F000080000008801C00000C4000__000000A76F097A80\n000000067F000080000008801C00000C4000-000000067F000080000008801C00000C8000__000000A76F097A80\n000000067F000080000008801C00000C6999-000000067F000080000008801C0100000000__000000A6503DE919-000000A6F001F909\n000000067F000080000008801C00000C8000-000000067F000080000008801C00000CC000__000000A76F097A80\n000000067F000080000008801C00000CC000-000000067F000080000008801C00000D0000__000000A76F097A80\n000000067F000080000008801C00000CF6B0-000000067F000080000008801C00000D8DC1__000000A6F001F909-000000A91D97FD49\n000000067F000080000008801C00000D0000-000000067F000080000008801C00000D4000__000000A76F097A80\n000000067F000080000008801C00000D4000-000000067F000080000008801C00000D8000__000000A76F097A80\n000000067F000080000008801C00000D8000-000000067F000080000008801C00000DC000__000000A76F097A80\n000000067F000080000008801C00000D8DC1-000000067F000080000008801C00000E250B__000000A6F001F909-000000A91D97FD49\n000000067F000080000008801C00000DC000-000000067F000080000008801C00000E0000__000000A76F097A80\n000000067F000080000008801C00000E0000-000000067F000080000008801C00000E4000__000000A76F097A80\n000000067F000080000008801C00000E250B-000000067F000080000008801C00000EBC71__000000A6F001F909-000000A91D97FD49\n000000067F000080000008801C00000E4000-000000067F000080000008801C00000E8000__000000A76F097A80\n000000067F000080000008801C00000E8000-000000067F000080000008801C00000EC000__000000A76F097A80\n000000067F000080000008801C00000EBC71-000000067F000080000008801C00000F53A5__000000A6F001F909-000000A91D97FD49\n000000067F000080000008801C00000EC000-000000067F000080000008801C00000F0000__000000A76F097A80\n000000067F000080000008801C00000F0000-000000067F000080000008801C00000F4000__000000A76F097A80\n000000067F000080000008801C00000F4000-000000067F000080000008801C00000F8000__000000A76F097A80\n000000067F000080000008801C00000F53A5-000000067F000080000008801C00000FEB0B__000000A6F001F909-000000A91D97FD49\n000000067F000080000008801C00000F8000-000000067F000080000008801C00000FC000__000000A76F097A80\n000000067F000080000008801C00000FC000-000000067F000080000008801C0000100000__000000A76F097A80\n000000067F000080000008801C00000FEB0B-000000067F000080000008801C000010822C__000000A6F001F909-000000A91D97FD49\n000000067F000080000008801C0000100000-000000067F000080000008801C0000104000__000000A76F097A80\n000000067F000080000008801C0000104000-000000067F000080000008801C0000108000__000000A76F097A80\n000000067F000080000008801C0000108000-000000067F000080000008801C000010C000__000000A76F097A80\n000000067F000080000008801C000010822C-000000067F000080000008801C0000111982__000000A6F001F909-000000A91D97FD49\n000000067F000080000008801C000010C000-000000067F000080000008801C0000110000__000000A76F097A80\n000000067F000080000008801C0000110000-030000000000000000000000000000000002__000000A76F097A80\n000000067F000080000008801C0000111982-000000067F000080000008A00C00000084EA__000000A6F001F909-000000A91D97FD49\n000000067F000080000008A00C00000084EA-000000067F000080000008A00C0000011C0C__000000A6F001F909-000000A91D97FD49\n000000067F000080000008A00C0000011C0C-000000067F000080000008A00C000001B372__000000A6F001F909-000000A91D97FD49\n000000067F000080000008A00C000001B372-000000067F000080000008A00C0000024AD8__000000A6F001F909-000000A91D97FD49\n000000067F000080000008A00C0000024AD8-000000067F000080000008A00C000002E20B__000000A6F001F909-000000A91D97FD49\n000000067F000080000008A00C000002E20B-000000067F000080000008A00C0000037928__000000A6F001F909-000000A91D97FD49\n000000067F000080000008A00C0000037928-000000067F000080000008A00C000004108E__000000A6F001F909-000000A91D97FD49\n000000067F000080000008A00C000004108E-000000067F000080000008A00C000004A7F4__000000A6F001F909-000000A91D97FD49\n000000067F000080000008A00C000004A7F4-000000067F000080000008A00C0000053F0B__000000A6F001F909-000000A91D97FD49\n000000067F000080000008A00C0000053F0B-000000067F000080000008A00C000005D671__000000A6F001F909-000000A91D97FD49\n000000067F000080000008A00C000005D671-000000067F000080000008A00C0000066D95__000000A6F001F909-000000A91D97FD49\n000000067F000080000008A00C0000066D95-000000067F000080000008A00C00000704FB__000000A6F001F909-000000A91D97FD49\n000000067F000080000008A00C00000704FB-000000067F000080000008A00C0000079C0B__000000A6F001F909-000000A91D97FD49\n000000067F000080000008A00C0000079C0B-000000067F000080000008A00C0000083351__000000A6F001F909-000000A91D97FD49\n000000067F000080000008A00C0000083351-000000067F000080000008A00C000008CAB7__000000A6F001F909-000000A91D97FD49\n000000067F000080000008A00C000008CAB7-000000067F000080000008A00C00000961E2__000000A6F001F909-000000A91D97FD49\n000000067F000080000008A00C00000961E2-000000067F000080000008A00C000009F90B__000000A6F001F909-000000A91D97FD49\n000000067F000080000008A00C000009F90B-000000067F000080000008A00C00000A902B__000000A6F001F909-000000A91D97FD49\n000000067F000080000008A00C00000A902B-000000067F000080000008A00C00000B2779__000000A6F001F909-000000A91D97FD49\n000000067F000080000008A00C00000B2779-000000067F000080000008A00C00000BBEDF__000000A6F001F909-000000A91D97FD49\n000000067F000080000008A00C00000BBEDF-000000067F000080000008A00C00000C560A__000000A6F001F909-000000A91D97FD49\n000000067F000080000008A00C00000C560A-000000067F000080000008A00C00000CED70__000000A6F001F909-000000A91D97FD49\n000000067F000080000008A00C00000CED70-000000067F000080000008A00C00000D84D6__000000A6F001F909-000000A91D97FD49\n000000067F000080000008A00C00000D84D6-000000067F000080000008A00C00000E1C0A__000000A6F001F909-000000A91D97FD49\n000000067F000080000008A00C00000E1C0A-000000067F000080000008A00C00000EB370__000000A6F001F909-000000A91D97FD49\n000000067F000080000008A00C00000EB370-000000067F000080000008A00C00000F4AD6__000000A6F001F909-000000A91D97FD49\n000000067F000080000008A00C00000F4AD6-000000067F000080000008A00C00000FE20B__000000A6F001F909-000000A91D97FD49\n000000067F000080000008A00C00000FE20B-030000000000000000000000000000000002__000000A6F001F909-000000A91D97FD49\n000000067F000080000008A00C0000104A0C-000000067F000080000008A00C000010DF6E__000000A91D97FD49-000000A98AB7EE49\n000000067F000080000008A00C000010DF6E-000000067F000080000008A0140000001A21__000000A91D97FD49-000000A98AB7EE49\n000000067F000080000008A0140000001A21-000000067F000080000008A0140000009932__000000A91D97FD49-000000A98AB7EE49\n000000067F000080000008A0140000009932-000000067F000080000008A0140000011843__000000A91D97FD49-000000A98AB7EE49\n000000067F000080000008A0140000011843-000000067F000080000008A0140000019753__000000A91D97FD49-000000A98AB7EE49\n000000067F000080000008A0140000019753-000000067F000080000008A0140000021664__000000A91D97FD49-000000A98AB7EE49\n000000067F000080000008A0140000021664-01000000000000000100000004000000001C__000000A91D97FD49-000000A98AB7EE49\n000000067F000080000008C00C0000000000-000000067F000080000008C00C0000004000__000000AAEBE534F8\n000000067F000080000008C00C0000002330-000000067F000080000008C00C000000BA96__000000A98AB7EE49-000000AA2597E9A1\n000000067F000080000008C00C0000004000-000000067F000080000008C00C0000008000__000000AAEBE534F8\n000000067F000080000008C00C0000008000-000000067F000080000008C00C000000C000__000000AAEBE534F8\n000000067F000080000008C00C000000BA96-000000067F000080000008C00C00000151CB__000000A98AB7EE49-000000AA2597E9A1\n000000067F000080000008C00C000000C000-000000067F000080000008C00C0000010000__000000AAEBE534F8\n000000067F000080000008C00C0000010000-000000067F000080000008C00C0000014000__000000AAEBE534F8\n000000067F000080000008C00C0000014000-000000067F000080000008C00C0000018000__000000AAEBE534F8\n000000067F000080000008C00C00000151CB-000000067F000080000008C00C000001E90B__000000A98AB7EE49-000000AA2597E9A1\n000000067F000080000008C00C0000018000-000000067F000080000008C00C000001C000__000000AAEBE534F8\n000000067F000080000008C00C000001C000-000000067F000080000008C00C0000020000__000000AAEBE534F8\n000000067F000080000008C00C000001E90B-000000067F000080000008C00C000002802C__000000A98AB7EE49-000000AA2597E9A1\n000000067F000080000008C00C0000020000-000000067F000080000008C00C0000024000__000000AAEBE534F8\n000000067F000080000008C00C0000024000-000000067F000080000008C00C0000028000__000000AAEBE534F8\n000000067F000080000008C00C0000028000-000000067F000080000008C00C000002C000__000000AAEBE534F8\n000000067F000080000008C00C000002802C-000000067F000080000008C00C0000031783__000000A98AB7EE49-000000AA2597E9A1\n000000067F000080000008C00C000002C000-000000067F000080000008C00C0000030000__000000AAEBE534F8\n000000067F000080000008C00C0000030000-000000067F000080000008C00C0000034000__000000AAEBE534F8\n000000067F000080000008C00C0000031783-000000067F000080000008C00C000003AEE9__000000A98AB7EE49-000000AA2597E9A1\n000000067F000080000008C00C0000034000-000000067F000080000008C00C0000038000__000000AAEBE534F8\n000000067F000080000008C00C0000038000-000000067F000080000008C00C000003C000__000000AAEBE534F8\n000000067F000080000008C00C000003AEE9-000000067F000080000008C00C000004460B__000000A98AB7EE49-000000AA2597E9A1\n000000067F000080000008C00C000003C000-000000067F000080000008C00C0000040000__000000AAEBE534F8\n000000067F000080000008C00C0000040000-000000067F000080000008C00C0000044000__000000AAEBE534F8\n000000067F000080000008C00C0000044000-000000067F000080000008C00C0000048000__000000AAEBE534F8\n000000067F000080000008C00C000004460B-000000067F000080000008C00C000004DD71__000000A98AB7EE49-000000AA2597E9A1\n000000067F000080000008C00C0000048000-000000067F000080000008C00C000004C000__000000AAEBE534F8\n000000067F000080000008C00C000004C000-000000067F000080000008C00C0000050000__000000AAEBE534F8\n000000067F000080000008C00C000004DD71-030000000000000000000000000000000002__000000A98AB7EE49-000000AA2597E9A1\n000000067F000080000008C00C0000050000-000000067F000080000008C00C0000054000__000000AAEBE534F8\n000000067F000080000008C00C0000054000-000000067F000080000008C00C0000058000__000000AAEBE534F8\n000000067F000080000008C00C0000058000-000000067F000080000008C00C000005C000__000000AAEBE534F8\n000000067F000080000008C00C000005C000-000000067F000080000008C00C0000060000__000000AAEBE534F8\n000000067F000080000008C00C000005DA8C-000000067F000080000008C00C00000671AE__000000AA2597E9A1-000000AB6533BFD9\n000000067F000080000008C00C0000060000-000000067F000080000008C00C0000064000__000000AAEBE534F8\n000000067F000080000008C00C0000064000-000000067F000080000008C00C0000068000__000000AAEBE534F8\n000000067F000080000008C00C00000671AE-000000067F000080000008C00C000007090A__000000AA2597E9A1-000000AB6533BFD9\n000000067F000080000008C00C0000068000-000000067F000080000008C00C000006C000__000000AAEBE534F8\n000000067F000080000008C00C000006C000-000000067F000080000008C00C0000070000__000000AAEBE534F8\n000000067F000080000008C00C0000070000-000000067F000080000008C00C0000074000__000000AAEBE534F8\n000000067F000080000008C00C000007090A-000000067F000080000008C00C000007A070__000000AA2597E9A1-000000AB6533BFD9\n000000067F000080000008C00C0000074000-000000067F000080000008C00C0000078000__000000AAEBE534F8\n000000067F000080000008C00C0000078000-000000067F000080000008C00C000007C000__000000AAEBE534F8\n000000067F000080000008C00C000007A070-000000067F000080000008C00C00000837B4__000000AA2597E9A1-000000AB6533BFD9\n000000067F000080000008C00C000007C000-000000067F000080000008C00C0000080000__000000AAEBE534F8\n000000067F000080000008C00C0000080000-000000067F000080000008C00C0000084000__000000AAEBE534F8\n000000067F000080000008C00C00000837B4-000000067F000080000008C00C000008CF0A__000000AA2597E9A1-000000AB6533BFD9\n000000067F000080000008C00C0000084000-000000067F000080000008C00C0000088000__000000AAEBE534F8\n000000067F000080000008C00C0000088000-000000067F000080000008C00C000008C000__000000AAEBE534F8\n000000067F000080000008C00C000008C000-000000067F000080000008C00C0000090000__000000AAEBE534F8\n000000067F000080000008C00C000008CF0A-000000067F000080000008C00C0000096670__000000AA2597E9A1-000000AB6533BFD9\n000000067F000080000008C00C0000090000-000000067F000080000008C00C0000094000__000000AAEBE534F8\n000000067F000080000008C00C0000094000-000000067F000080000008C00C0000098000__000000AAEBE534F8\n000000067F000080000008C00C0000096670-000000067F000080000008C00C000009FDD6__000000AA2597E9A1-000000AB6533BFD9\n000000067F000080000008C00C0000098000-000000067F000080000008C00C000009C000__000000AAEBE534F8\n000000067F000080000008C00C000009C000-000000067F000080000008C00C00000A0000__000000AAEBE534F8\n000000067F000080000008C00C000009FDD6-000000067F000080000008C00C00000A952A__000000AA2597E9A1-000000AB6533BFD9\n000000067F000080000008C00C00000A0000-000000067F000080000008C00C00000A4000__000000AAEBE534F8\n000000067F000080000008C00C00000A4000-000000067F000080000008C00C00000A8000__000000AAEBE534F8\n000000067F000080000008C00C00000A8000-000000067F000080000008C00C00000AC000__000000AAEBE534F8\n000000067F000080000008C00C00000A952A-000000067F000080000008C00C00000B2C90__000000AA2597E9A1-000000AB6533BFD9\n000000067F000080000008C00C00000AC000-000000067F000080000008C00C00000B0000__000000AAEBE534F8\n000000067F000080000008C00C00000B0000-000000067F000080000008C00C00000B4000__000000AAEBE534F8\n000000067F000080000008C00C00000B2C90-000000067F000080000008C00C00000BC3F6__000000AA2597E9A1-000000AB6533BFD9\n000000067F000080000008C00C00000B4000-000000067F000080000008C00C00000B8000__000000AAEBE534F8\n000000067F000080000008C00C00000B8000-000000067F000080000008C00C00000BC000__000000AAEBE534F8\n000000067F000080000008C00C00000BC000-000000067F000080000008C00C00000C0000__000000AAEBE534F8\n000000067F000080000008C00C00000BC3F6-000000067F000080000008C00C00000C5B0C__000000AA2597E9A1-000000AB6533BFD9\n000000067F000080000008C00C00000C0000-000000067F000080000008C00C00000C4000__000000AAEBE534F8\n000000067F000080000008C00C00000C4000-000000067F000080000008C00C00000C8000__000000AAEBE534F8\n000000067F000080000008C00C00000C5B0C-000000067F000080000008C00C00000CF272__000000AA2597E9A1-000000AB6533BFD9\n000000067F000080000008C00C00000C8000-030000000000000000000000000000000002__000000AAEBE534F8\n000000067F000080000008C00C00000CF272-000000067F000080000008C00C00000D8986__000000AA2597E9A1-000000AB6533BFD9\n000000067F000080000008C00C00000D8986-000000067F000080000008C00C00000E20EC__000000AA2597E9A1-000000AB6533BFD9\n000000067F000080000008C00C00000E20EC-000000067F000080000008C00C00000EB80A__000000AA2597E9A1-000000AB6533BFD9\n000000067F000080000008C00C00000EB80A-000000067F000080000008C00C00000F4F40__000000AA2597E9A1-000000AB6533BFD9\n000000067F000080000008C00C00000F4F40-000000067F000080000008C00C00000FE6A6__000000AA2597E9A1-000000AB6533BFD9\n000000067F000080000008C00C00000FE6A6-000000067F000080000008C00C0000107DC1__000000AA2597E9A1-000000AB6533BFD9\n000000067F000080000008C00C0000107DC1-000000067F000080000008C00C000011150A__000000AA2597E9A1-000000AB6533BFD9\n000000067F000080000008C00C000011150A-01000000000000000100000004000000001C__000000AA2597E9A1-000000AB6533BFD9\n000000067F000080000008E00C0000000000-000000067F000080000008E00C0000004000__000000AD3698E000\n000000067F000080000008E00C0000004000-000000067F000080000008E00C0000008000__000000AD3698E000\n000000067F000080000008E00C00000077B3-000000067F000080000008E00C0000010F0A__000000AB6533BFD9-000000ABF63DF511\n000000067F000080000008E00C0000008000-000000067F000080000008E00C000000C000__000000AD3698E000\n000000067F000080000008E00C000000C000-000000067F000080000008E00C0000010000__000000AD3698E000\n000000067F000080000008E00C0000010000-000000067F000080000008E00C0000014000__000000AD3698E000\n000000067F000080000008E00C0000010F0A-000000067F000080000008E00C000001A670__000000AB6533BFD9-000000ABF63DF511\n000000067F000080000008E00C0000014000-000000067F000080000008E00C0000018000__000000AD3698E000\n000000067F000080000008E00C0000018000-000000067F000080000008E00C000001C000__000000AD3698E000\n000000067F000080000008E00C000001A670-000000067F000080000008E00C0000023DB1__000000AB6533BFD9-000000ABF63DF511\n000000067F000080000008E00C000001C000-000000067F000080000008E00C0000020000__000000AD3698E000\n000000067F000080000008E00C0000020000-000000067F000080000008E00C0000024000__000000AD3698E000\n000000067F000080000008E00C0000023DB1-000000067F000080000008E00C000002D50A__000000AB6533BFD9-000000ABF63DF511\n000000067F000080000008E00C0000024000-000000067F000080000008E00C0000028000__000000AD3698E000\n000000067F000080000008E00C0000028000-000000067F000080000008E00C000002C000__000000AD3698E000\n000000067F000080000008E00C000002C000-000000067F000080000008E00C0000030000__000000AD3698E000\n000000067F000080000008E00C000002D50A-000000067F000080000008E00C0000036C30__000000AB6533BFD9-000000ABF63DF511\n000000067F000080000008E00C0000030000-000000067F000080000008E00C0000034000__000000AD3698E000\n000000067F000080000008E00C0000034000-000000067F000080000008E00C0000038000__000000AD3698E000\n000000067F000080000008E00C0000036C30-000000067F000080000008E00C0000040393__000000AB6533BFD9-000000ABF63DF511\n000000067F000080000008E00C0000038000-000000067F000080000008E00C000003C000__000000AD3698E000\n000000067F000080000008E00C000003C000-000000067F000080000008E00C0000040000__000000AD3698E000\n000000067F000080000008E00C0000040000-000000067F000080000008E00C0000044000__000000AD3698E000\n000000067F000080000008E00C0000040393-000000067F000080000008E00C0000049AF9__000000AB6533BFD9-000000ABF63DF511\n000000067F000080000008E00C0000044000-000000067F000080000008E00C0000048000__000000AD3698E000\n000000067F000080000008E00C0000048000-000000067F000080000008E00C000004C000__000000AD3698E000\n000000067F000080000008E00C0000049AF9-000000067F000080000008E00C000005320C__000000AB6533BFD9-000000ABF63DF511\n000000067F000080000008E00C000004C000-000000067F000080000008E00C0000050000__000000AD3698E000\n000000067F000080000008E00C0000050000-000000067F000080000008E00C0000054000__000000AD3698E000\n000000067F000080000008E00C000005320C-030000000000000000000000000000000002__000000AB6533BFD9-000000ABF63DF511\n000000067F000080000008E00C0000054000-000000067F000080000008E00C0000058000__000000AD34AF7FD8\n000000067F000080000008E00C000005523E-000000067F000080000008E00C000005E9A4__000000ABF63DF511-000000AC9601EA19\n000000067F000080000008E00C0000058000-000000067F000080000008E00C000005C000__000000AD34AF7FD8\n000000067F000080000008E00C000005C000-000000067F000080000008E00C0000060000__000000AD34AF7FD8\n000000067F000080000008E00C000005E9A4-000000067F000080000008E00C000006810A__000000ABF63DF511-000000AC9601EA19\n000000067F000080000008E00C0000060000-000000067F000080000008E00C0000064000__000000AD34AF7FD8\n000000067F000080000008E00C0000064000-000000067F000080000008E00C0000068000__000000AD34AF7FD8\n000000067F000080000008E00C0000068000-000000067F000080000008E00C000006C000__000000AD34AF7FD8\n000000067F000080000008E00C000006810A-000000067F000080000008E00C0000071870__000000ABF63DF511-000000AC9601EA19\n000000067F000080000008E00C000006C000-000000067F000080000008E00C0000070000__000000AD34AF7FD8\n000000067F000080000008E00C0000070000-000000067F000080000008E00C0000074000__000000AD34AF7FD8\n000000067F000080000008E00C0000071870-000000067F000080000008E00C000007AFD6__000000ABF63DF511-000000AC9601EA19\n000000067F000080000008E00C0000074000-000000067F000080000008E00C0000078000__000000AD34AF7FD8\n000000067F000080000008E00C0000078000-000000067F000080000008E00C000007C000__000000AD34AF7FD8\n000000067F000080000008E00C000007AFD6-000000067F000080000008E00C000008470B__000000ABF63DF511-000000AC9601EA19\n000000067F000080000008E00C000007C000-000000067F000080000008E00C0000080000__000000AD34AF7FD8\n000000067F000080000008E00C0000080000-000000067F000080000008E00C0000084000__000000AD34AF7FD8\n000000067F000080000008E00C0000084000-000000067F000080000008E00C0000088000__000000AD34AF7FD8\n000000067F000080000008E00C000008470B-000000067F000080000008E00C000008DE71__000000ABF63DF511-000000AC9601EA19\n000000067F000080000008E00C0000088000-000000067F000080000008E00C000008C000__000000AD34AF7FD8\n000000067F000080000008E00C000008C000-000000067F000080000008E00C0000090000__000000AD34AF7FD8\n000000067F000080000008E00C000008DE71-000000067F000080000008E00C0000097591__000000ABF63DF511-000000AC9601EA19\n000000067F000080000008E00C0000090000-000000067F000080000008E00C0000094000__000000AD34AF7FD8\n000000067F000080000008E00C0000094000-000000067F000080000008E00C0000098000__000000AD34AF7FD8\n000000067F000080000008E00C0000097591-000000067F000080000008E00C00000A0CF7__000000ABF63DF511-000000AC9601EA19\n000000067F000080000008E00C0000098000-000000067F000080000008E00C000009C000__000000AD34AF7FD8\n000000067F000080000008E00C000009C000-000000067F000080000008E00C00000A0000__000000AD34AF7FD8\n000000067F000080000008E00C00000A0000-000000067F000080000008E00C00000A4000__000000AD34AF7FD8\n000000067F000080000008E00C00000A0CF7-000000067F000080000008E00C00000AA40B__000000ABF63DF511-000000AC9601EA19\n000000067F000080000008E00C00000A4000-000000067F000080000008E00C00000A8000__000000AD34AF7FD8\n000000067F000080000008E00C00000A8000-000000067F000080000008E00C00000AC000__000000AD34AF7FD8\n000000067F000080000008E00C00000AA40B-000000067F000080000008E00C00000B3B4D__000000ABF63DF511-000000AC9601EA19\n000000067F000080000008E00C00000AC000-000000067F000080000008E00C00000B0000__000000AD34AF7FD8\n000000067F000080000008E00C00000B0000-000000067F000080000008E00C00000B4000__000000AD34AF7FD8\n000000067F000080000008E00C00000B3B4D-000000067F000080000008E00C0100000000__000000ABF63DF511-000000AC9601EA19\n000000067F000080000008E00C00000B4000-000000067F000080000008E00C00000B8000__000000AD34AF7FD8\n000000067F000080000008E00C00000B8000-000000067F000080000008E00C00000BC000__000000AD34AF7FD8\n000000067F000080000008E00C00000BC000-000000067F000080000008E00C00000C0000__000000AD34AF7FD8\n000000067F000080000008E00C00000BC018-000000067F000080000008E00C00000C5749__000000AC9601EA19-000000AD36393FE9\n000000067F000080000008E00C00000C0000-000000067F000080000008E00C00000C4000__000000AD34AF7FD8\n000000067F000080000008E00C00000C4000-000000067F000080000008E00C00000C8000__000000AD34AF7FD8\n000000067F000080000008E00C00000C5749-000000067F000080000008E00C00000CEEAF__000000AC9601EA19-000000AD36393FE9\n000000067F000080000008E00C00000C8000-000000067F000080000008E00C00000CC000__000000AD34AF7FD8\n000000067F000080000008E00C00000CC000-000000067F000080000008E00C00000D0000__000000AD34AF7FD8\n000000067F000080000008E00C00000CEEAF-000000067F000080000008E00C00000D85C5__000000AC9601EA19-000000AD36393FE9\n000000067F000080000008E00C00000D0000-000000067F000080000008E00C00000D4000__000000AD34AF7FD8\n000000067F000080000008E00C00000D4000-000000067F000080000008E00C00000D8000__000000AD34AF7FD8\n000000067F000080000008E00C00000D8000-000000067F000080000008E00C00000DC000__000000AD34AF7FD8\n000000067F000080000008E00C00000D85C5-000000067F000080000008E00C00000E1D0B__000000AC9601EA19-000000AD36393FE9\n000000067F000080000008E00C00000DC000-000000067F000080000008E00C00000E0000__000000AD34AF7FD8\n000000067F000080000008E00C00000E0000-000000067F000080000008E00C00000E4000__000000AD34AF7FD8\n000000067F000080000008E00C00000E1D0B-000000067F000080000008E00C00000EB471__000000AC9601EA19-000000AD36393FE9\n000000067F000080000008E00C00000E4000-000000067F000080000008E00C00000E8000__000000AD34AF7FD8\n000000067F000080000008E00C00000E8000-000000067F000080000008E00C00000EC000__000000AD34AF7FD8\n000000067F000080000008E00C00000EB471-000000067F000080000008E00C00000F4BAA__000000AC9601EA19-000000AD36393FE9\n000000067F000080000008E00C00000EC000-000000067F000080000008E00C00000F0000__000000AD34AF7FD8\n000000067F000080000008E00C00000F0000-000000067F000080000008E00C00000F4000__000000AD34AF7FD8\n000000067F000080000008E00C00000F4000-000000067F000080000008E00C00000F8000__000000AD34AF7FD8\n000000067F000080000008E00C00000F4BAA-000000067F000080000008E00C00000FE30A__000000AC9601EA19-000000AD36393FE9\n000000067F000080000008E00C00000F8000-000000067F000080000008E00C00000FC000__000000AD34AF7FD8\n000000067F000080000008E00C00000FC000-000000067F000080000008E00C0000100000__000000AD34AF7FD8\n000000067F000080000008E00C00000FE30A-000000067F000080000008E00C0000107A2C__000000AC9601EA19-000000AD36393FE9\n000000067F000080000008E00C0000100000-000000067F000080000008E00C0000104000__000000AD34AF7FD8\n000000067F000080000008E00C0000104000-000000067F000080000008E00C0000108000__000000AD34AF7FD8\n000000067F000080000008E00C0000107A2C-000000067F000080000008E00C0000111187__000000AC9601EA19-000000AD36393FE9\n000000067F000080000008E00C0000108000-000000067F000080000008E00C000010C000__000000AD34AF7FD8\n000000067F000080000008E00C000010C000-000000067F000080000008E00C0000110000__000000AD34AF7FD8\n000000067F000080000008E00C0000110000-030000000000000000000000000000000002__000000AD34AF7FD8\n000000067F000080000008E00C0000111187-01000000000000000100000004000000001C__000000AC9601EA19-000000AD36393FE9\n000000067F000080000008E0140000003E33-000000067F000080000008E014000000BD44__000000AD36393FE9-000000ADB047EAB9\n000000067F000080000008E014000000BD44-000000067F000080000008E0140000013C54__000000AD36393FE9-000000ADB047EAB9\n000000067F000080000008E0140000013C54-000000067F000080000008E014000001BB63__000000AD36393FE9-000000ADB047EAB9\n000000067F000080000008E014000001BB63-000000067F000080000008E0140000023A74__000000AD36393FE9-000000ADB047EAB9\n000000067F000080000008E0140000023A74-000000067F000080000008E014000002B984__000000AD36393FE9-000000ADB047EAB9\n000000067F000080000008E014000002B984-000000067F000080000008E0220000006AD0__000000AD36393FE9-000000ADB047EAB9\n000000067F000080000008E0220000000000-000000067F000080000008E0220000004000__000000AF5D7D4000\n000000067F000080000008E0220000004000-000000067F000080000008E0220000008000__000000AF5D7D4000\n000000067F000080000008E0220000006AD0-000000067F000080000008E022000001020C__000000AD36393FE9-000000ADB047EAB9\n000000067F000080000008E0220000008000-000000067F000080000008E022000000C000__000000AF5D7D4000\n000000067F000080000008E022000000C000-000000067F000080000008E0220000010000__000000AF5D7D4000\n000000067F000080000008E0220000010000-000000067F000080000008E0220000014000__000000AF5D7D4000\n000000067F000080000008E022000001020C-01000000000000000100000004000000001C__000000AD36393FE9-000000ADB047EAB9\n000000067F000080000008E0220000014000-000000067F000080000008E0220000018000__000000AF56604248\n000000067F000080000008E02200000151DD-000000067F000080000008E022000001E90B__000000ADB047EAB9-000000AE6FFFE799\n000000067F000080000008E0220000018000-000000067F000080000008E022000001C000__000000AF56604248\n000000067F000080000008E022000001C000-000000067F000080000008E0220000020000__000000AF56604248\n000000067F000080000008E022000001E90B-000000067F000080000008E022000002802C__000000ADB047EAB9-000000AE6FFFE799\n000000067F000080000008E0220000020000-000000067F000080000008E0220000024000__000000AF56604248\n000000067F000080000008E0220000024000-000000067F000080000008E0220000028000__000000AF56604248\n000000067F000080000008E0220000028000-000000067F000080000008E022000002C000__000000AF56604248\n000000067F000080000008E022000002802C-000000067F000080000008E0220000031783__000000ADB047EAB9-000000AE6FFFE799\n000000067F000080000008E022000002C000-000000067F000080000008E0220000030000__000000AF56604248\n000000067F000080000008E0220000030000-000000067F000080000008E0220000034000__000000AF56604248\n000000067F000080000008E0220000031783-000000067F000080000008E022000003AEE9__000000ADB047EAB9-000000AE6FFFE799\n000000067F000080000008E0220000034000-000000067F000080000008E0220000038000__000000AF56604248\n000000067F000080000008E0220000038000-000000067F000080000008E022000003C000__000000AF56604248\n000000067F000080000008E022000003AEE9-000000067F000080000008E022000004460B__000000ADB047EAB9-000000AE6FFFE799\n000000067F000080000008E022000003C000-000000067F000080000008E0220000040000__000000AF56604248\n000000067F000080000008E0220000040000-000000067F000080000008E0220000044000__000000AF56604248\n000000067F000080000008E0220000044000-000000067F000080000008E0220000048000__000000AF56604248\n000000067F000080000008E022000004460B-000000067F000080000008E022000004DD71__000000ADB047EAB9-000000AE6FFFE799\n000000067F000080000008E0220000048000-000000067F000080000008E022000004C000__000000AF56604248\n000000067F000080000008E022000004C000-000000067F000080000008E0220000050000__000000AF56604248\n000000067F000080000008E022000004DD71-000000067F000080000008E02200000574D7__000000ADB047EAB9-000000AE6FFFE799\n000000067F000080000008E0220000050000-000000067F000080000008E0220000054000__000000AF56604248\n000000067F000080000008E0220000054000-000000067F000080000008E0220000058000__000000AF56604248\n000000067F000080000008E02200000574D7-000000067F000080000008E0220000060C0B__000000ADB047EAB9-000000AE6FFFE799\n000000067F000080000008E0220000058000-000000067F000080000008E022000005C000__000000AF56604248\n000000067F000080000008E022000005C000-000000067F000080000008E0220000060000__000000AF56604248\n000000067F000080000008E0220000060000-000000067F000080000008E0220000064000__000000AF56604248\n000000067F000080000008E0220000060C0B-000000067F000080000008E022000006A371__000000ADB047EAB9-000000AE6FFFE799\n000000067F000080000008E0220000064000-000000067F000080000008E0220000068000__000000AF56604248\n000000067F000080000008E0220000068000-000000067F000080000008E022000006C000__000000AF56604248\n000000067F000080000008E022000006A371-000000067F000080000008E0220000073AD7__000000ADB047EAB9-000000AE6FFFE799\n000000067F000080000008E022000006C000-000000067F000080000008E0220000070000__000000AF56604248\n000000067F000080000008E0220000070000-000000067F000080000008E0220000074000__000000AF56604248\n000000067F000080000008E0220000073AD7-000000067F000080000008E022000007D20B__000000ADB047EAB9-000000AE6FFFE799\n000000067F000080000008E0220000074000-000000067F000080000008E0220000078000__000000AF56604248\n000000067F000080000008E0220000078000-000000067F000080000008E022000007C000__000000AF56604248\n000000067F000080000008E022000007C000-000000067F000080000008E0220000080000__000000AF56604248\n000000067F000080000008E022000007D20B-000000067F000080000008E0220000086932__000000ADB047EAB9-000000AE6FFFE799\n000000067F000080000008E0220000080000-000000067F000080000008E0220000084000__000000AF56604248\n000000067F000080000008E0220000084000-000000067F000080000008E0220000088000__000000AF56604248\n000000067F000080000008E0220000086932-000000067F000080000008E0220100000000__000000ADB047EAB9-000000AE6FFFE799\n000000067F000080000008E0220000088000-000000067F000080000008E022000008C000__000000AF56604248\n000000067F000080000008E022000008C000-000000067F000080000008E0220000090000__000000AF56604248\n000000067F000080000008E022000008E3D1-000000067F000080000008E022000009797E__000000AE6FFFE799-000000AF5D587FE1\n000000067F000080000008E0220000090000-000000067F000080000008E0220000094000__000000AF56604248\n000000067F000080000008E0220000094000-000000067F000080000008E0220000098000__000000AF56604248\n000000067F000080000008E022000009797E-000000067F000080000008E02200000A10E4__000000AE6FFFE799-000000AF5D587FE1\n000000067F000080000008E0220000098000-000000067F000080000008E022000009C000__000000AF56604248\n000000067F000080000008E022000009C000-000000067F000080000008E02200000A0000__000000AF56604248\n000000067F000080000008E02200000A0000-000000067F000080000008E02200000A4000__000000AF56604248\n000000067F000080000008E02200000A10E4-000000067F000080000008E02200000AA80B__000000AE6FFFE799-000000AF5D587FE1\n000000067F000080000008E02200000A4000-000000067F000080000008E02200000A8000__000000AF56604248\n000000067F000080000008E02200000A8000-000000067F000080000008E02200000AC000__000000AF56604248\n000000067F000080000008E02200000AA80B-000000067F000080000008E02200000B3F4B__000000AE6FFFE799-000000AF5D587FE1\n000000067F000080000008E02200000AC000-000000067F000080000008E02200000B0000__000000AF56604248\n000000067F000080000008E02200000B0000-000000067F000080000008E02200000B4000__000000AF56604248\n000000067F000080000008E02200000B3F4B-000000067F000080000008E02200000BD6B1__000000AE6FFFE799-000000AF5D587FE1\n000000067F000080000008E02200000B4000-000000067F000080000008E02200000B8000__000000AF56604248\n000000067F000080000008E02200000B8000-000000067F000080000008E02200000BC000__000000AF56604248\n000000067F000080000008E02200000BC000-000000067F000080000008E02200000C0000__000000AF56604248\n000000067F000080000008E02200000BD6B1-000000067F000080000008E02200000C6DD5__000000AE6FFFE799-000000AF5D587FE1\n000000067F000080000008E02200000C0000-000000067F000080000008E02200000C4000__000000AF56604248\n000000067F000080000008E02200000C4000-000000067F000080000008E02200000C8000__000000AF56604248\n000000067F000080000008E02200000C6DD5-000000067F000080000008E02200000D050B__000000AE6FFFE799-000000AF5D587FE1\n000000067F000080000008E02200000C8000-000000067F000080000008E02200000CC000__000000AF56604248\n000000067F000080000008E02200000CC000-000000067F000080000008E02200000D0000__000000AF56604248\n000000067F000080000008E02200000D0000-000000067F000080000008E02200000D4000__000000AF56604248\n000000067F000080000008E02200000D050B-000000067F000080000008E02200000D9C71__000000AE6FFFE799-000000AF5D587FE1\n000000067F000080000008E02200000D4000-000000067F000080000008E02200000D8000__000000AF56604248\n000000067F000080000008E02200000D8000-000000067F000080000008E02200000DC000__000000AF56604248\n000000067F000080000008E02200000D9C71-000000067F000080000008E02200000E33B8__000000AE6FFFE799-000000AF5D587FE1\n000000067F000080000008E02200000DC000-000000067F000080000008E02200000E0000__000000AF56604248\n000000067F000080000008E02200000E0000-000000067F000080000008E02200000E4000__000000AF56604248\n000000067F000080000008E02200000E33B8-000000067F000080000008E02200000ECB09__000000AE6FFFE799-000000AF5D587FE1\n000000067F000080000008E02200000E4000-000000067F000080000008E02200000E8000__000000AF56604248\n000000067F000080000008E02200000E8000-000000067F000080000008E02200000EC000__000000AF56604248\n000000067F000080000008E02200000EC000-000000067F000080000008E02200000F0000__000000AF56604248\n000000067F000080000008E02200000ECB09-000000067F000080000008E02200000F626F__000000AE6FFFE799-000000AF5D587FE1\n000000067F000080000008E02200000F0000-000000067F000080000008E02200000F4000__000000AF56604248\n000000067F000080000008E02200000F4000-000000067F000080000008E02200000F8000__000000AF56604248\n000000067F000080000008E02200000F626F-000000067F000080000008E02200000FF9D5__000000AE6FFFE799-000000AF5D587FE1\n000000067F000080000008E02200000F8000-000000067F000080000008E02200000FC000__000000AF56604248\n000000067F000080000008E02200000FC000-000000067F000080000008E0220000100000__000000AF56604248\n000000067F000080000008E02200000FF9D5-000000067F000080000008E022000010912A__000000AE6FFFE799-000000AF5D587FE1\n000000067F000080000008E0220000100000-000000067F000080000008E0220000104000__000000AF56604248\n000000067F000080000008E0220000104000-000000067F000080000008E0220000108000__000000AF56604248\n000000067F000080000008E0220000108000-000000067F000080000008E022000010C000__000000AF56604248\n000000067F000080000008E022000010912A-000000067F000080000008E0220000111C20__000000AE6FFFE799-000000AF5D587FE1\n000000067F000080000008E022000010C000-030000000000000000000000000000000002__000000AF56604248\n000000067F000080000008E02200FFFFFFFF-01000000000000000100000004000000001C__000000AE6FFFE799-000000AF5D587FE1\n000000067F000080000008E02A000000529F-000000067F000080000008E02A000000D1B0__000000AF5D587FE1-000000AFB4666001\n000000067F000080000008E02A000000D1B0-000000067F000080000008E02A00000150BF__000000AF5D587FE1-000000AFB4666001\n000000067F000080000008E02A00000150BF-000000067F000080000008E02A000001CFD0__000000AF5D587FE1-000000AFB4666001\n000000067F000080000008E02A000001CFD0-000000067F000080000008E02A0000024EE1__000000AF5D587FE1-000000AFB4666001\n000000067F000080000008E02A0000024EE1-000000067F000080000008E02A000002CDF1__000000AF5D587FE1-000000AFB4666001\n000000067F000080000008E02A000002CDF1-030000000000000000000000000000000002__000000AF5D587FE1-000000AFB4666001\n000000067F00008000000900380000000000-000000067F0000800000090038000000970B__000000AFB4666001-000000B05429F579\n000000067F0000800000090038000000970B-000000067F00008000000900380000012E71__000000AFB4666001-000000B05429F579\n000000067F00008000000900380000012E71-000000067F0000800000090038000001C5D7__000000AFB4666001-000000B05429F579\n000000067F0000800000090038000001C5D7-000000067F00008000000900380000025D2B__000000AFB4666001-000000B05429F579\n000000067F00008000000900380000025D2B-000000067F0000800000090038000002F491__000000AFB4666001-000000B05429F579\n000000067F0000800000090038000002F491-000000067F00008000000900380000038BA4__000000AFB4666001-000000B05429F579\n000000067F00008000000900380000038BA4-000000067F0000800000090038000004230A__000000AFB4666001-000000B05429F579\n000000067F0000800000090038000004230A-000000067F0000800000090038000004BA70__000000AFB4666001-000000B05429F579\n000000067F0000800000090038000004BA70-000000067F000080000009003800000551A5__000000AFB4666001-000000B05429F579\n000000067F000080000009003800000551A5-000000067F0000800000090038000005E909__000000AFB4666001-000000B05429F579\n000000067F0000800000090038000005C000-000000067F00008000000900380000060000__000000B18434BFD0\n000000067F0000800000090038000005E909-000000067F000080000009003B0100000000__000000AFB4666001-000000B05429F579\n000000067F0000800000090038000005EA0C-000000067F00008000000900380000068125__000000B05429F579-000000B0F3EDEAC9\n000000067F00008000000900380000060000-000000067F00008000000900380000064000__000000B18434BFD0\n000000067F00008000000900380000064000-000000067F00008000000900380000068000__000000B18434BFD0\n000000067F00008000000900380000068000-000000067F0000800000090038000006C000__000000B18434BFD0\n000000067F00008000000900380000068125-000000067F0000800000090038000007188B__000000B05429F579-000000B0F3EDEAC9\n000000067F0000800000090038000006C000-000000067F00008000000900380000070000__000000B18434BFD0\n000000067F00008000000900380000070000-000000067F00008000000900380000074000__000000B18434BFD0\n000000067F0000800000090038000007188B-000000067F0000800000090038000007AFF1__000000B05429F579-000000B0F3EDEAC9\n000000067F00008000000900380000074000-000000067F00008000000900380000078000__000000B18434BFD0\n000000067F00008000000900380000078000-000000067F0000800000090038000007C000__000000B18434BFD0\n000000067F0000800000090038000007AFF1-000000067F0000800000090038000008470C__000000B05429F579-000000B0F3EDEAC9\n000000067F0000800000090038000007C000-000000067F00008000000900380000080000__000000B18434BFD0\n000000067F00008000000900380000080000-000000067F00008000000900380000084000__000000B18434BFD0\n000000067F00008000000900380000084000-000000067F00008000000900380000088000__000000B18434BFD0\n000000067F0000800000090038000008470C-000000067F0000800000090038000008DE72__000000B05429F579-000000B0F3EDEAC9\n000000067F00008000000900380000088000-000000067F0000800000090038000008C000__000000B18434BFD0\n000000067F0000800000090038000008C000-000000067F00008000000900380000090000__000000B18434BFD0\n000000067F0000800000090038000008DE72-000000067F00008000000900380000097592__000000B05429F579-000000B0F3EDEAC9\n000000067F00008000000900380000090000-000000067F00008000000900380000094000__000000B18434BFD0\n000000067F00008000000900380000094000-000000067F00008000000900380000098000__000000B18434BFD0\n000000067F00008000000900380000097592-000000067F000080000009003800000A0CF8__000000B05429F579-000000B0F3EDEAC9\n000000067F00008000000900380000098000-000000067F0000800000090038000009C000__000000B18434BFD0\n000000067F0000800000090038000009C000-000000067F000080000009003800000A0000__000000B18434BFD0\n000000067F000080000009003800000A0000-000000067F000080000009003800000A4000__000000B18434BFD0\n000000067F000080000009003800000A0CF8-000000067F000080000009003800000AA40C__000000B05429F579-000000B0F3EDEAC9\n000000067F000080000009003800000A4000-000000067F000080000009003800000A8000__000000B18434BFD0\n000000067F000080000009003800000A8000-000000067F000080000009003800000AC000__000000B18434BFD0\n000000067F000080000009003800000AA40C-000000067F000080000009003800000B3B4E__000000B05429F579-000000B0F3EDEAC9\n000000067F000080000009003800000AC000-000000067F000080000009003800000B0000__000000B18434BFD0\n000000067F000080000009003800000B0000-000000067F000080000009003800000B4000__000000B18434BFD0\n000000067F000080000009003800000B3B4E-000000067F000080000009003800000BD2B4__000000B05429F579-000000B0F3EDEAC9\n000000067F000080000009003800000B4000-000000067F000080000009003800000B8000__000000B18434BFD0\n000000067F000080000009003800000B8000-000000067F000080000009003800000BC000__000000B18434BFD0\n000000067F000080000009003800000BC000-000000067F000080000009003800000C0000__000000B18434BFD0\n000000067F000080000009003800000BD2B4-000000067F00008000000900380100000000__000000B05429F579-000000B0F3EDEAC9\n000000067F000080000009003800000C0000-000000067F000080000009003800000C4000__000000B18434BFD0\n000000067F000080000009003800000C4000-000000067F000080000009003800000C8000__000000B18434BFD0\n000000067F000080000009003800000C5213-000000067F000080000009003800000CE979__000000B0F3EDEAC9-000000B18495C001\n000000067F000080000009003800000C8000-000000067F000080000009003800000CC000__000000B18434BFD0\n000000067F000080000009003800000CC000-000000067F000080000009003800000D0000__000000B18434BFD0\n000000067F000080000009003800000CE979-000000067F000080000009003800000D80DF__000000B0F3EDEAC9-000000B18495C001\n000000067F000080000009003800000D0000-000000067F000080000009003800000D4000__000000B18434BFD0\n000000067F000080000009003800000D4000-000000067F000080000009003800000D8000__000000B18434BFD0\n000000067F000080000009003800000D8000-000000067F000080000009003800000DC000__000000B18434BFD0\n000000067F000080000009003800000D80DF-000000067F000080000009003800000E180A__000000B0F3EDEAC9-000000B18495C001\n000000067F000080000009003800000DC000-000000067F000080000009003800000E0000__000000B18434BFD0\n000000067F000080000009003800000E0000-000000067F000080000009003800000E4000__000000B18434BFD0\n000000067F000080000009003800000E180A-000000067F000080000009003800000EAF70__000000B0F3EDEAC9-000000B18495C001\n000000067F000080000009003800000E4000-000000067F000080000009003800000E8000__000000B18434BFD0\n000000067F000080000009003800000E8000-000000067F000080000009003800000EC000__000000B18434BFD0\n000000067F000080000009003800000EAF70-000000067F000080000009003800000F46D6__000000B0F3EDEAC9-000000B18495C001\n000000067F000080000009003800000EC000-000000067F000080000009003800000F0000__000000B18434BFD0\n000000067F000080000009003800000F0000-000000067F000080000009003800000F4000__000000B18434BFD0\n000000067F000080000009003800000F4000-000000067F000080000009003800000F8000__000000B18434BFD0\n000000067F000080000009003800000F46D6-000000067F000080000009003800000FDE0B__000000B0F3EDEAC9-000000B18495C001\n000000067F000080000009003800000F8000-000000067F000080000009003800000FC000__000000B18434BFD0\n000000067F000080000009003800000FC000-000000067F00008000000900380000100000__000000B18434BFD0\n000000067F000080000009003800000FDE0B-000000067F0000800000090038000010752B__000000B0F3EDEAC9-000000B18495C001\n000000067F00008000000900380000100000-000000067F00008000000900380000104000__000000B18434BFD0\n000000067F00008000000900380000104000-000000067F00008000000900380000108000__000000B18434BFD0\n000000067F0000800000090038000010752B-000000067F00008000000900380000110C91__000000B0F3EDEAC9-000000B18495C001\n000000067F00008000000900380000108000-000000067F0000800000090038000010C000__000000B18434BFD0\n000000067F0000800000090038000010C000-000000067F00008000000900380000110000__000000B18434BFD0\n000000067F00008000000900380000110000-030000000000000000000000000000000002__000000B18434BFD0\n000000067F00008000000900380000110C91-01000000000000000100000004000000001C__000000B0F3EDEAC9-000000B18495C001\n000000067F000080000009004000000047E0-000000067F0000800000090040000000C6F1__000000B18495C001-000000B1FA75F501\n000000067F0000800000090040000000C6F1-000000067F00008000000900400000014600__000000B18495C001-000000B1FA75F501\n000000067F00008000000900400000014600-000000067F0000800000090040000001C511__000000B18495C001-000000B1FA75F501\n000000067F0000800000090040000001C511-000000067F00008000000900400000024421__000000B18495C001-000000B1FA75F501\n000000067F00008000000900400000024421-000000067F0000800000090040000002C331__000000B18495C001-000000B1FA75F501\n000000067F0000800000090040000002C331-000000067F000080000009200C0000007658__000000B18495C001-000000B1FA75F501\n000000067F000080000009200C0000000000-000000067F000080000009200C0000004000__000000B3AC039FE8\n000000067F000080000009200C0000004000-000000067F000080000009200C0000008000__000000B3AC039FE8\n000000067F000080000009200C0000007658-000000067F000080000009200C0000010DB5__000000B18495C001-000000B1FA75F501\n000000067F000080000009200C0000008000-000000067F000080000009200C000000C000__000000B3AC039FE8\n000000067F000080000009200C000000C000-000000067F000080000009200C0000010000__000000B3AC039FE8\n000000067F000080000009200C0000010000-000000067F000080000009200C0000014000__000000B3A3EC82C8\n000000067F000080000009200C0000010DB5-030000000000000000000000000000000002__000000B18495C001-000000B1FA75F501\n000000067F000080000009200C0000012E97-000000067F000080000009200C000001C5FD__000000B1FA75F501-000000B2CA27F641\n000000067F000080000009200C0000014000-000000067F000080000009200C0000018000__000000B3A3EC82C8\n000000067F000080000009200C0000018000-000000067F000080000009200C000001C000__000000B3A3EC82C8\n000000067F000080000009200C000001C000-000000067F000080000009200C0000020000__000000B3A3EC82C8\n000000067F000080000009200C000001C5FD-000000067F000080000009200C0000025D0C__000000B1FA75F501-000000B2CA27F641\n000000067F000080000009200C0000020000-000000067F000080000009200C0000024000__000000B3A3EC82C8\n000000067F000080000009200C0000024000-000000067F000080000009200C0000028000__000000B3A3EC82C8\n000000067F000080000009200C0000025D0C-000000067F000080000009200C000002F472__000000B1FA75F501-000000B2CA27F641\n000000067F000080000009200C0000028000-000000067F000080000009200C000002C000__000000B3A3EC82C8\n000000067F000080000009200C000002C000-000000067F000080000009200C0000030000__000000B3A3EC82C8\n000000067F000080000009200C000002F472-000000067F000080000009200C0000038B85__000000B1FA75F501-000000B2CA27F641\n000000067F000080000009200C0000030000-000000067F000080000009200C0000034000__000000B3A3EC82C8\n000000067F000080000009200C0000034000-000000067F000080000009200C0000038000__000000B3A3EC82C8\n000000067F000080000009200C0000038000-000000067F000080000009200C000003C000__000000B3A3EC82C8\n000000067F000080000009200C0000038B85-000000067F000080000009200C00000422EB__000000B1FA75F501-000000B2CA27F641\n000000067F000080000009200C000003C000-000000067F000080000009200C0000040000__000000B3A3EC82C8\n000000067F000080000009200C0000040000-000000067F000080000009200C0000044000__000000B3A3EC82C8\n000000067F000080000009200C00000422EB-000000067F000080000009200C000004BA0C__000000B1FA75F501-000000B2CA27F641\n000000067F000080000009200C0000044000-000000067F000080000009200C0000048000__000000B3A3EC82C8\n000000067F000080000009200C0000048000-000000067F000080000009200C000004C000__000000B3A3EC82C8\n000000067F000080000009200C000004BA0C-000000067F000080000009200C0000055141__000000B1FA75F501-000000B2CA27F641\n000000067F000080000009200C000004C000-000000067F000080000009200C0000050000__000000B3A3EC82C8\n000000067F000080000009200C0000050000-000000067F000080000009200C0000054000__000000B3A3EC82C8\n000000067F000080000009200C0000054000-000000067F000080000009200C0000058000__000000B3A3EC82C8\n000000067F000080000009200C0000055141-000000067F000080000009200C000005E8A7__000000B1FA75F501-000000B2CA27F641\n000000067F000080000009200C0000058000-000000067F000080000009200C000005C000__000000B3A3EC82C8\n000000067F000080000009200C000005C000-000000067F000080000009200C0000060000__000000B3A3EC82C8\n000000067F000080000009200C000005E8A7-000000067F000080000009200C0000067FC1__000000B1FA75F501-000000B2CA27F641\n000000067F000080000009200C0000060000-000000067F000080000009200C0000064000__000000B3A3EC82C8\n000000067F000080000009200C0000064000-000000067F000080000009200C0000068000__000000B3A3EC82C8\n000000067F000080000009200C0000067FC1-000000067F000080000009200C0000071709__000000B1FA75F501-000000B2CA27F641\n000000067F000080000009200C0000068000-000000067F000080000009200C000006C000__000000B3A3EC82C8\n000000067F000080000009200C000006C000-000000067F000080000009200C0000070000__000000B3A3EC82C8\n000000067F000080000009200C0000070000-000000067F000080000009200C0000074000__000000B3A3EC82C8\n000000067F000080000009200C0000071709-000000067F000080000009200C000007AE6F__000000B1FA75F501-000000B2CA27F641\n000000067F000080000009200C0000074000-000000067F000080000009200C0000078000__000000B3A3EC82C8\n000000067F000080000009200C0000078000-000000067F000080000009200C000007C000__000000B3A3EC82C8\n000000067F000080000009200C000007AE6F-000000067F000080000009200C00000845AB__000000B1FA75F501-000000B2CA27F641\n000000067F000080000009200C000007C000-000000067F000080000009200C0000080000__000000B3A3EC82C8\n000000067F000080000009200C0000080000-000000067F000080000009200C0000084000__000000B3A3EC82C8\n000000067F000080000009200C0000084000-000000067F000080000009200C0000088000__000000B3A3EC82C8\n000000067F000080000009200C00000845AB-000000067F000080000009200C000008DD09__000000B1FA75F501-000000B2CA27F641\n000000067F000080000009200C0000088000-000000067F000080000009200C000008C000__000000B3A3EC82C8\n000000067F000080000009200C000008C000-000000067F000080000009200C0000090000__000000B3A3EC82C8\n000000067F000080000009200C000008DD09-000000067F000080000009200C0100000000__000000B1FA75F501-000000B2CA27F641\n000000067F000080000009200C0000090000-000000067F000080000009200C0000094000__000000B3A3EC82C8\n000000067F000080000009200C0000094000-000000067F000080000009200C0000098000__000000B3A3EC82C8\n000000067F000080000009200C000009567A-000000067F000080000009200C000009EDE0__000000B2CA27F641-000000B3AB3B7FC9\n000000067F000080000009200C0000098000-000000067F000080000009200C000009C000__000000B3A3EC82C8\n000000067F000080000009200C000009C000-000000067F000080000009200C00000A0000__000000B3A3EC82C8\n000000067F000080000009200C000009EDE0-000000067F000080000009200C00000A852B__000000B2CA27F641-000000B3AB3B7FC9\n000000067F000080000009200C00000A0000-000000067F000080000009200C00000A4000__000000B3A3EC82C8\n000000067F000080000009200C00000A4000-000000067F000080000009200C00000A8000__000000B3A3EC82C8\n000000067F000080000009200C00000A8000-000000067F000080000009200C00000AC000__000000B3A3EC82C8\n000000067F000080000009200C00000A852B-000000067F000080000009200C00000B1C91__000000B2CA27F641-000000B3AB3B7FC9\n000000067F000080000009200C00000AC000-000000067F000080000009200C00000B0000__000000B3A3EC82C8\n000000067F000080000009200C00000B0000-000000067F000080000009200C00000B4000__000000B3A3EC82C8\n000000067F000080000009200C00000B1C91-000000067F000080000009200C00000BB3F7__000000B2CA27F641-000000B3AB3B7FC9\n000000067F000080000009200C00000B4000-000000067F000080000009200C00000B8000__000000B3A3EC82C8\n000000067F000080000009200C00000B8000-000000067F000080000009200C00000BC000__000000B3A3EC82C8\n000000067F000080000009200C00000BB3F7-000000067F000080000009200C00000C4B0C__000000B2CA27F641-000000B3AB3B7FC9\n000000067F000080000009200C00000BC000-000000067F000080000009200C00000C0000__000000B3A3EC82C8\n000000067F000080000009200C00000C0000-000000067F000080000009200C00000C4000__000000B3A3EC82C8\n000000067F000080000009200C00000C4000-000000067F000080000009200C00000C8000__000000B3A3EC82C8\n000000067F000080000009200C00000C4B0C-000000067F000080000009200C00000CE272__000000B2CA27F641-000000B3AB3B7FC9\n000000067F000080000009200C00000C8000-000000067F000080000009200C00000CC000__000000B3A3EC82C8\n000000067F000080000009200C00000CC000-000000067F000080000009200C00000D0000__000000B3A3EC82C8\n000000067F000080000009200C00000CE272-000000067F000080000009200C00000D798F__000000B2CA27F641-000000B3AB3B7FC9\n000000067F000080000009200C00000D0000-000000067F000080000009200C00000D4000__000000B3A3EC82C8\n000000067F000080000009200C00000D4000-000000067F000080000009200C00000D8000__000000B3A3EC82C8\n000000067F000080000009200C00000D798F-000000067F000080000009200C00000E10F5__000000B2CA27F641-000000B3AB3B7FC9\n000000067F000080000009200C00000D8000-000000067F000080000009200C00000DC000__000000B3A3EC82C8\n000000067F000080000009200C00000DC000-000000067F000080000009200C00000E0000__000000B3A3EC82C8\n000000067F000080000009200C00000E0000-000000067F000080000009200C00000E4000__000000B3A3EC82C8\n000000067F000080000009200C00000E10F5-000000067F000080000009200C00000EA80B__000000B2CA27F641-000000B3AB3B7FC9\n000000067F000080000009200C00000E4000-000000067F000080000009200C00000E8000__000000B3A3EC82C8\n000000067F000080000009200C00000E8000-000000067F000080000009200C00000EC000__000000B3A3EC82C8\n000000067F000080000009200C00000EA80B-000000067F000080000009200C00000F3F4B__000000B2CA27F641-000000B3AB3B7FC9\n000000067F000080000009200C00000EC000-000000067F000080000009200C00000F0000__000000B3A3EC82C8\n000000067F000080000009200C00000F0000-000000067F000080000009200C00000F4000__000000B3A3EC82C8\n000000067F000080000009200C00000F3F4B-000000067F000080000009200C00000FD6B1__000000B2CA27F641-000000B3AB3B7FC9\n000000067F000080000009200C00000F4000-000000067F000080000009200C00000F8000__000000B3A3EC82C8\n000000067F000080000009200C00000F8000-000000067F000080000009200C00000FC000__000000B3A3EC82C8\n000000067F000080000009200C00000FC000-000000067F000080000009200C0000100000__000000B3A3EC82C8\n000000067F000080000009200C00000FD6B1-000000067F000080000009200C0000106DD5__000000B2CA27F641-000000B3AB3B7FC9\n000000067F000080000009200C0000100000-000000067F000080000009200C0000104000__000000B3A3EC82C8\n000000067F000080000009200C0000104000-000000067F000080000009200C0000108000__000000B3A3EC82C8\n000000067F000080000009200C0000106DD5-000000067F000080000009200C000011050B__000000B2CA27F641-000000B3AB3B7FC9\n000000067F000080000009200C0000108000-000000067F000080000009200C000010C000__000000B3A3EC82C8\n000000067F000080000009200C000010C000-030000000000000000000000000000000002__000000B3A3EC82C8\n000000067F000080000009200C000011050B-01000000000000000100000004000000001C__000000B2CA27F641-000000B3AB3B7FC9\n000000067F00008000000920140000005289-000000067F0000800000092014000000D19A__000000B3AB3B7FC9-000000B4208FF3D1\n000000067F0000800000092014000000D19A-000000067F000080000009201400000150A9__000000B3AB3B7FC9-000000B4208FF3D1\n000000067F000080000009201400000150A9-000000067F0000800000092014000001CFBA__000000B3AB3B7FC9-000000B4208FF3D1\n000000067F0000800000092014000001CFBA-000000067F00008000000920140000024ECB__000000B3AB3B7FC9-000000B4208FF3D1\n000000067F00008000000920140000024ECB-000000067F0000800000092014000002CDDB__000000B3AB3B7FC9-000000B4208FF3D1\n000000067F0000800000092014000002CDDB-000000067F000080000009400C000000830C__000000B3AB3B7FC9-000000B4208FF3D1\n000000067F000080000009400C0000000000-000000067F000080000009400C0000004000__000000B5CED8CF78\n000000067F000080000009400C0000004000-000000067F000080000009400C0000008000__000000B5CED8CF78\n000000067F000080000009400C0000008000-000000067F000080000009400C000000C000__000000B5CED8CF78\n000000067F000080000009400C000000830C-000000067F000080000009400C0000011A72__000000B3AB3B7FC9-000000B4208FF3D1\n000000067F000080000009400C000000C000-000000067F000080000009400C0000010000__000000B5CED8CF78\n000000067F000080000009400C0000010000-000000067F000080000009400C0000014000__000000B568835548\n000000067F000080000009400C0000011A72-030000000000000000000000000000000002__000000B3AB3B7FC9-000000B4208FF3D1\n000000067F000080000009400C0000012E51-000000067F000080000009400C000001C5B7__000000B4208FF3D1-000000B43089EC11\n000000067F000080000009400C0000012E51-000000067F000080000009400C000001C5B7__000000B4208FF3D1-000000B4E047E5A9\n000000067F000080000009400C0000014000-000000067F000080000009400C0000018000__000000B568835548\n000000067F000080000009400C0000018000-000000067F000080000009400C000001C000__000000B568835548\n000000067F000080000009400C000001C000-000000067F000080000009400C0000020000__000000B568835548\n000000067F000080000009400C000001C5B7-000000067F000080000009400C0000025D1D__000000B4208FF3D1-000000B4E047E5A9\n000000067F000080000009400C000001C5B7-000000067F000080000009400C0100000000__000000B4208FF3D1-000000B43089EC11\n000000067F000080000009400C0000020000-000000067F000080000009400C0000024000__000000B568835548\n000000067F000080000009400C0000024000-000000067F000080000009400C0000028000__000000B568835548\n000000067F000080000009400C0000025D1D-000000067F000080000009400C000002F483__000000B4208FF3D1-000000B4E047E5A9\n000000067F000080000009400C0000028000-000000067F000080000009400C000002C000__000000B568835548\n000000067F000080000009400C000002C000-000000067F000080000009400C0000030000__000000B568835548\n000000067F000080000009400C000002F483-000000067F000080000009400C0000038B96__000000B4208FF3D1-000000B4E047E5A9\n000000067F000080000009400C0000030000-000000067F000080000009400C0000034000__000000B568835548\n000000067F000080000009400C0000034000-000000067F000080000009400C0000038000__000000B568835548\n000000067F000080000009400C0000038000-000000067F000080000009400C000003C000__000000B568835548\n000000067F000080000009400C0000038B96-000000067F000080000009400C00000422FC__000000B4208FF3D1-000000B4E047E5A9\n000000067F000080000009400C000003C000-000000067F000080000009400C0000040000__000000B568835548\n000000067F000080000009400C0000040000-000000067F000080000009400C0000044000__000000B568835548\n000000067F000080000009400C00000422FC-000000067F000080000009400C000004BA0C__000000B4208FF3D1-000000B4E047E5A9\n000000067F000080000009400C0000044000-000000067F000080000009400C0000048000__000000B568835548\n000000067F000080000009400C0000048000-000000067F000080000009400C000004C000__000000B568835548\n000000067F000080000009400C000004BA0C-000000067F000080000009400C0000055141__000000B4208FF3D1-000000B4E047E5A9\n000000067F000080000009400C000004C000-000000067F000080000009400C0000050000__000000B568835548\n000000067F000080000009400C0000050000-000000067F000080000009400C0000054000__000000B568835548\n000000067F000080000009400C0000054000-000000067F000080000009400C0000058000__000000B568835548\n000000067F000080000009400C0000055141-000000067F000080000009400C000005E8A7__000000B4208FF3D1-000000B4E047E5A9\n000000067F000080000009400C0000058000-000000067F000080000009400C000005C000__000000B568835548\n000000067F000080000009400C000005C000-000000067F000080000009400C0000060000__000000B568835548\n000000067F000080000009400C000005E8A7-000000067F000080000009400C0000067FC1__000000B4208FF3D1-000000B4E047E5A9\n000000067F000080000009400C0000060000-000000067F000080000009400C0000064000__000000B568835548\n000000067F000080000009400C0000064000-000000067F000080000009400C0000068000__000000B568835548\n000000067F000080000009400C0000067FC1-000000067F000080000009400C0000071709__000000B4208FF3D1-000000B4E047E5A9\n000000067F000080000009400C0000068000-000000067F000080000009400C000006C000__000000B568835548\n000000067F000080000009400C000006C000-000000067F000080000009400C0000070000__000000B568835548\n000000067F000080000009400C0000070000-000000067F000080000009400C0000074000__000000B568835548\n000000067F000080000009400C0000071709-000000067F000080000009400C000007AE6F__000000B4208FF3D1-000000B4E047E5A9\n000000067F000080000009400C0000074000-000000067F000080000009400C0000078000__000000B568835548\n000000067F000080000009400C0000078000-000000067F000080000009400C000007C000__000000B568835548\n000000067F000080000009400C000007AE6F-000000067F000080000009400C00000845AB__000000B4208FF3D1-000000B4E047E5A9\n000000067F000080000009400C000007C000-000000067F000080000009400C0000080000__000000B568835548\n000000067F000080000009400C0000080000-000000067F000080000009400C0000084000__000000B568835548\n000000067F000080000009400C0000084000-000000067F000080000009400C0000088000__000000B568835548\n000000067F000080000009400C00000845AB-000000067F000080000009400C0100000000__000000B4208FF3D1-000000B4E047E5A9\n000000067F000080000009400C0000088000-000000067F000080000009400C000008C000__000000B568835548\n000000067F000080000009400C000008C000-000000067F000080000009400C0000090000__000000B568835548\n000000067F000080000009400C000008DEA4-000000067F000080000009400C00000975C4__000000B4E047E5A9-000000B5CED8CF79\n000000067F000080000009400C0000090000-000000067F000080000009400C0000094000__000000B568835548\n000000067F000080000009400C0000094000-000000067F000080000009400C0000098000__000000B568835548\n000000067F000080000009400C00000975C4-000000067F000080000009400C00000A0D0A__000000B4E047E5A9-000000B5CED8CF79\n000000067F000080000009400C0000098000-000000067F000080000009400C000009C000__000000B568835548\n000000067F000080000009400C000009C000-000000067F000080000009400C00000A0000__000000B568835548\n000000067F000080000009400C00000A0000-000000067F000080000009400C00000A4000__000000B568835548\n000000067F000080000009400C00000A0D0A-000000067F000080000009400C00000AA470__000000B4E047E5A9-000000B5CED8CF79\n000000067F000080000009400C00000A4000-000000067F000080000009400C00000A8000__000000B568835548\n000000067F000080000009400C00000A8000-000000067F000080000009400C00000AC000__000000B568835548\n000000067F000080000009400C00000AA470-000000067F000080000009400C00000B3BB2__000000B4E047E5A9-000000B5CED8CF79\n000000067F000080000009400C00000AC000-000000067F000080000009400C00000B0000__000000B568835548\n000000067F000080000009400C00000B0000-000000067F000080000009400C00000B4000__000000B568835548\n000000067F000080000009400C00000B3BB2-000000067F000080000009400C00000BD30A__000000B4E047E5A9-000000B5CED8CF79\n000000067F000080000009400C00000B4000-000000067F000080000009400C00000B8000__000000B568835548\n000000067F000080000009400C00000B8000-000000067F000080000009400C00000BC000__000000B568835548\n000000067F000080000009400C00000BC000-000000067F000080000009400C00000C0000__000000B568835548\n000000067F000080000009400C00000BD30A-000000067F000080000009400C00000C6A30__000000B4E047E5A9-000000B5CED8CF79\n000000067F000080000009400C00000C0000-000000067F000080000009400C00000C4000__000000B568835548\n000000067F000080000009400C00000C4000-000000067F000080000009400C00000C8000__000000B568835548\n000000067F000080000009400C00000C6A30-000000067F000080000009400C00000D0194__000000B4E047E5A9-000000B5CED8CF79\n000000067F000080000009400C00000C8000-000000067F000080000009400C00000CC000__000000B568835548\n000000067F000080000009400C00000CC000-000000067F000080000009400C00000D0000__000000B568835548\n000000067F000080000009400C00000D0000-000000067F000080000009400C00000D4000__000000B568835548\n000000067F000080000009400C00000D0194-000000067F000080000009400C00000D98FA__000000B4E047E5A9-000000B5CED8CF79\n000000067F000080000009400C00000D4000-030000000000000000000000000000000002__000000B568835548\n000000067F000080000009400C00000D98FA-000000067F000080000009400C00000E300D__000000B4E047E5A9-000000B5CED8CF79\n000000067F000080000009400C00000E300D-000000067F000080000009400C00000EC773__000000B4E047E5A9-000000B5CED8CF79\n000000067F000080000009400C00000EC773-000000067F000080000009400C00000F5ED9__000000B4E047E5A9-000000B5CED8CF79\n000000067F000080000009400C00000F5ED9-000000067F000080000009400C00000FF60C__000000B4E047E5A9-000000B5CED8CF79\n000000067F000080000009400C00000FF60C-000000067F000080000009400C0000108D1D__000000B4E047E5A9-000000B5CED8CF79\n000000067F000080000009400C0000108D1D-000000067F000080000009400C0000111C20__000000B4E047E5A9-000000B5CED8CF79\n000000067F000080000009400C00FFFFFFFF-030000000000000000000000000000000002__000000B4E047E5A9-000000B5CED8CF79\n000000067F000080000009600C0000000000-000000067F000080000009600C0000004000__000000B79F439FE0\n000000067F000080000009600C0000004000-000000067F000080000009600C0000008000__000000B79F439FE0\n000000067F000080000009600C0000008000-000000067F000080000009600C000000C000__000000B79F439FE0\n000000067F000080000009600C000000974F-000000067F000080000009600C0000012EB5__000000B5CED8CF79-000000B63EADE5B9\n000000067F000080000009600C000000C000-000000067F000080000009600C0000010000__000000B79F439FE0\n000000067F000080000009600C0000010000-000000067F000080000009600C0000014000__000000B79F439FE0\n000000067F000080000009600C0000012EB5-000000067F000080000009600C000001C60A__000000B5CED8CF79-000000B63EADE5B9\n000000067F000080000009600C0000014000-000000067F000080000009600C0000018000__000000B79F439FE0\n000000067F000080000009600C0000018000-000000067F000080000009600C000001C000__000000B79F439FE0\n000000067F000080000009600C000001C000-000000067F000080000009600C0000020000__000000B79F439FE0\n000000067F000080000009600C000001C60A-000000067F000080000009600C0000025D38__000000B5CED8CF79-000000B63EADE5B9\n000000067F000080000009600C0000020000-000000067F000080000009600C0000024000__000000B79F439FE0\n000000067F000080000009600C0000024000-000000067F000080000009600C0000028000__000000B79F439FE0\n000000067F000080000009600C0000025D38-000000067F000080000009600C000002F49E__000000B5CED8CF79-000000B63EADE5B9\n000000067F000080000009600C0000028000-000000067F000080000009600C000002C000__000000B79F439FE0\n000000067F000080000009600C000002C000-000000067F000080000009600C0000030000__000000B79F439FE0\n000000067F000080000009600C000002F49E-000000067F000080000009600C0000038BB1__000000B5CED8CF79-000000B63EADE5B9\n000000067F000080000009600C0000030000-000000067F000080000009600C0000034000__000000B79F439FE0\n000000067F000080000009600C0000034000-000000067F000080000009600C0000038000__000000B79F439FE0\n000000067F000080000009600C0000038000-000000067F000080000009600C000003C000__000000B79F439FE0\n000000067F000080000009600C0000038BB1-000000067F000080000009600C0000042317__000000B5CED8CF79-000000B63EADE5B9\n000000067F000080000009600C000003C000-000000067F000080000009600C0000040000__000000B79F439FE0\n000000067F000080000009600C0000040000-000000067F000080000009600C0000044000__000000B79D17BFD0\n000000067F000080000009600C0000040000-000000067F000080000009600C0000044000__000000B8606C92A0\n000000067F000080000009600C0000042317-030000000000000000000000000000000002__000000B5CED8CF79-000000B63EADE5B9\n000000067F000080000009600C000004236E-000000067F000080000009600C000004BAD4__000000B63EADE5B9-000000B6DE71F5F9\n000000067F000080000009600C0000044000-000000067F000080000009600C0000048000__000000B79D17BFD0\n000000067F000080000009600C0000044000-000000067F000080000009600C0000048000__000000B8606C92A0\n000000067F000080000009600C0000048000-000000067F000080000009600C000004C000__000000B79D17BFD0\n000000067F000080000009600C0000048000-000000067F000080000009600C000004C000__000000B8606C92A0\n000000067F000080000009600C000004BAD4-000000067F000080000009600C0000055208__000000B63EADE5B9-000000B6DE71F5F9\n000000067F000080000009600C000004C000-000000067F000080000009600C0000050000__000000B79D17BFD0\n000000067F000080000009600C000004C000-000000067F000080000009600C0000050000__000000B8606C92A0\n000000067F000080000009600C0000050000-000000067F000080000009600C0000054000__000000B79D17BFD0\n000000067F000080000009600C0000050000-000000067F000080000009600C0000054000__000000B8606C92A0\n000000067F000080000009600C0000054000-000000067F000080000009600C0000058000__000000B79D17BFD0\n000000067F000080000009600C0000054000-000000067F000080000009600C0000058000__000000B8606C92A0\n000000067F000080000009600C0000055208-000000067F000080000009600C000005E96E__000000B63EADE5B9-000000B6DE71F5F9\n000000067F000080000009600C0000055A77-000000067F000080000009600C00000AAEA5__000000B808718889-000000B8606C92A1\n000000067F000080000009600C0000058000-000000067F000080000009600C000005C000__000000B79D17BFD0\n000000067F000080000009600C0000058000-000000067F000080000009600C000005C000__000000B8606C92A0\n000000067F000080000009600C000005C000-000000067F000080000009600C0000060000__000000B79D17BFD0\n000000067F000080000009600C000005C000-000000067F000080000009600C0000060000__000000B8606C92A0\n000000067F000080000009600C000005E96E-000000067F000080000009600C00000680D4__000000B63EADE5B9-000000B6DE71F5F9\n000000067F000080000009600C0000060000-000000067F000080000009600C0000064000__000000B79D17BFD0\n000000067F000080000009600C0000060000-000000067F000080000009600C0000064000__000000B8606C92A0\n000000067F000080000009600C0000064000-000000067F000080000009600C0000068000__000000B79D17BFD0\n000000067F000080000009600C0000064000-000000067F000080000009600C0000068000__000000B8606C92A0\n000000067F000080000009600C0000068000-000000067F000080000009600C000006C000__000000B79D17BFD0\n000000067F000080000009600C0000068000-000000067F000080000009600C000006C000__000000B8606C92A0\n000000067F000080000009600C00000680D4-000000067F000080000009600C000007180B__000000B63EADE5B9-000000B6DE71F5F9\n000000067F000080000009600C000006C000-000000067F000080000009600C0000070000__000000B79D17BFD0\n000000067F000080000009600C000006C000-000000067F000080000009600C0000070000__000000B8606C92A0\n000000067F000080000009600C0000070000-000000067F000080000009600C0000074000__000000B79D17BFD0\n000000067F000080000009600C0000070000-000000067F000080000009600C0000074000__000000B8606C92A0\n000000067F000080000009600C000007180B-000000067F000080000009600C000007AF71__000000B63EADE5B9-000000B6DE71F5F9\n000000067F000080000009600C0000074000-000000067F000080000009600C0000078000__000000B79D17BFD0\n000000067F000080000009600C0000074000-000000067F000080000009600C0000078000__000000B8606C92A0\n000000067F000080000009600C0000078000-000000067F000080000009600C000007C000__000000B79D17BFD0\n000000067F000080000009600C0000078000-000000067F000080000009600C000007C000__000000B8606C92A0\n000000067F000080000009600C000007AF71-000000067F000080000009600C00000846D7__000000B63EADE5B9-000000B6DE71F5F9\n000000067F000080000009600C000007C000-000000067F000080000009600C0000080000__000000B79D17BFD0\n000000067F000080000009600C000007C000-000000067F000080000009600C0000080000__000000B8606C92A0\n000000067F000080000009600C0000080000-000000067F000080000009600C0000084000__000000B79D17BFD0\n000000067F000080000009600C0000080000-000000067F000080000009600C0000084000__000000B8606C92A0\n000000067F000080000009600C0000084000-000000067F000080000009600C0000088000__000000B79D17BFD0\n000000067F000080000009600C0000084000-000000067F000080000009600C0000088000__000000B8606C92A0\n000000067F000080000009600C00000846D7-000000067F000080000009600C000008DE0C__000000B63EADE5B9-000000B6DE71F5F9\n000000067F000080000009600C0000088000-000000067F000080000009600C000008C000__000000B79D17BFD0\n000000067F000080000009600C0000088000-000000067F000080000009600C000008C000__000000B8606C92A0\n000000067F000080000009600C000008C000-000000067F000080000009600C0000090000__000000B79D17BFD0\n000000067F000080000009600C000008C000-000000067F000080000009600C0000090000__000000B8606C92A0\n000000067F000080000009600C000008DE0C-000000067F000080000009600C000009752C__000000B63EADE5B9-000000B6DE71F5F9\n000000067F000080000009600C0000090000-000000067F000080000009600C0000094000__000000B79D17BFD0\n000000067F000080000009600C0000090000-000000067F000080000009600C0000094000__000000B8606C92A0\n000000067F000080000009600C0000094000-000000067F000080000009600C0000098000__000000B79D17BFD0\n000000067F000080000009600C0000094000-000000067F000080000009600C0000098000__000000B8606C92A0\n000000067F000080000009600C000009752C-000000067F000080000009600C00000A0C92__000000B63EADE5B9-000000B6DE71F5F9\n000000067F000080000009600C0000098000-000000067F000080000009600C000009C000__000000B79D17BFD0\n000000067F000080000009600C0000098000-000000067F000080000009600C000009C000__000000B8606C92A0\n000000067F000080000009600C000009C000-000000067F000080000009600C00000A0000__000000B79D17BFD0\n000000067F000080000009600C000009C000-000000067F000080000009600C00000A0000__000000B8606C92A0\n000000067F000080000009600C00000A0000-000000067F000080000009600C00000A4000__000000B79D17BFD0\n000000067F000080000009600C00000A0000-000000067F000080000009600C00000A4000__000000B8606C92A0\n000000067F000080000009600C00000A0C92-000000067F000080000009600C0100000000__000000B63EADE5B9-000000B6DE71F5F9\n000000067F000080000009600C00000A4000-000000067F000080000009600C00000A8000__000000B79D17BFD0\n000000067F000080000009600C00000A4000-000000067F000080000009600C00000A8000__000000B8606C92A0\n000000067F000080000009600C00000A8000-000000067F000080000009600C00000AC000__000000B79D17BFD0\n000000067F000080000009600C00000A8000-000000067F000080000009600C00000AC000__000000B8606C92A0\n000000067F000080000009600C00000A93FD-000000067F000080000009600C00000B2B0C__000000B6DE71F5F9-000000B79E68FFF9\n000000067F000080000009600C00000AAEA5-000000067F000080000009600C0000101445__000000B808718889-000000B8606C92A1\n000000067F000080000009600C00000AC000-000000067F000080000009600C00000B0000__000000B79D17BFD0\n000000067F000080000009600C00000AC000-000000067F000080000009600C00000B0000__000000B8606C92A0\n000000067F000080000009600C00000B0000-000000067F000080000009600C00000B4000__000000B79D17BFD0\n000000067F000080000009600C00000B0000-000000067F000080000009600C00000B4000__000000B8606C92A0\n000000067F000080000009600C00000B2B0C-000000067F000080000009600C00000BC272__000000B6DE71F5F9-000000B79E68FFF9\n000000067F000080000009600C00000B4000-000000067F000080000009600C00000B8000__000000B79D17BFD0\n000000067F000080000009600C00000B4000-000000067F000080000009600C00000B8000__000000B8606C92A0\n000000067F000080000009600C00000B8000-000000067F000080000009600C00000BC000__000000B79D17BFD0\n000000067F000080000009600C00000B8000-000000067F000080000009600C00000BC000__000000B8606C92A0\n000000067F000080000009600C00000BC000-000000067F000080000009600C00000C0000__000000B79D17BFD0\n000000067F000080000009600C00000BC000-000000067F000080000009600C00000C0000__000000B8606C92A0\n000000067F000080000009600C00000BC272-000000067F000080000009600C00000C59A2__000000B6DE71F5F9-000000B79E68FFF9\n000000067F000080000009600C00000C0000-000000067F000080000009600C00000C4000__000000B79D17BFD0\n000000067F000080000009600C00000C0000-000000067F000080000009600C00000C4000__000000B8606C92A0\n000000067F000080000009600C00000C4000-000000067F000080000009600C00000C8000__000000B79D17BFD0\n000000067F000080000009600C00000C4000-000000067F000080000009600C00000C8000__000000B8606C92A0\n000000067F000080000009600C00000C59A2-000000067F000080000009600C00000CF108__000000B6DE71F5F9-000000B79E68FFF9\n000000067F000080000009600C00000C8000-000000067F000080000009600C00000CC000__000000B79D17BFD0\n000000067F000080000009600C00000C8000-000000067F000080000009600C00000CC000__000000B8606C92A0\n000000067F000080000009600C00000CC000-000000067F000080000009600C00000D0000__000000B79D17BFD0\n000000067F000080000009600C00000CC000-000000067F000080000009600C00000D0000__000000B8606C92A0\n000000067F000080000009600C00000CF108-000000067F000080000009600C00000D882B__000000B6DE71F5F9-000000B79E68FFF9\n000000067F000080000009600C00000D0000-000000067F000080000009600C00000D4000__000000B79D17BFD0\n000000067F000080000009600C00000D0000-000000067F000080000009600C00000D4000__000000B8606C92A0\n000000067F000080000009600C00000D4000-000000067F000080000009600C00000D8000__000000B79D17BFD0\n000000067F000080000009600C00000D4000-000000067F000080000009600C00000D8000__000000B8606C92A0\n000000067F000080000009600C00000D8000-000000067F000080000009600C00000DC000__000000B79D17BFD0\n000000067F000080000009600C00000D8000-000000067F000080000009600C00000DC000__000000B8606C92A0\n000000067F000080000009600C00000D882B-000000067F000080000009600C00000E1F7E__000000B6DE71F5F9-000000B79E68FFF9\n000000067F000080000009600C00000DC000-000000067F000080000009600C00000E0000__000000B79D17BFD0\n000000067F000080000009600C00000DC000-000000067F000080000009600C00000E0000__000000B8606C92A0\n000000067F000080000009600C00000E0000-000000067F000080000009600C00000E4000__000000B79D17BFD0\n000000067F000080000009600C00000E0000-000000067F000080000009600C00000E4000__000000B8606C92A0\n000000067F000080000009600C00000E1F7E-000000067F000080000009600C00000EB6E4__000000B6DE71F5F9-000000B79E68FFF9\n000000067F000080000009600C00000E4000-000000067F000080000009600C00000E8000__000000B79D17BFD0\n000000067F000080000009600C00000E4000-000000067F000080000009600C00000E8000__000000B8606C92A0\n000000067F000080000009600C00000E8000-000000067F000080000009600C00000EC000__000000B79D17BFD0\n000000067F000080000009600C00000E8000-000000067F000080000009600C00000EC000__000000B8606C92A0\n000000067F000080000009600C00000EB6E4-000000067F000080000009600C00000F4E0B__000000B6DE71F5F9-000000B79E68FFF9\n000000067F000080000009600C00000EC000-000000067F000080000009600C00000F0000__000000B79D17BFD0\n000000067F000080000009600C00000EC000-000000067F000080000009600C00000F0000__000000B8606C92A0\n000000067F000080000009600C00000F0000-000000067F000080000009600C00000F4000__000000B79D17BFD0\n000000067F000080000009600C00000F0000-000000067F000080000009600C00000F4000__000000B8606C92A0\n000000067F000080000009600C00000F4000-000000067F000080000009600C00000F8000__000000B79D17BFD0\n000000067F000080000009600C00000F4000-000000067F000080000009600C00000F8000__000000B8606C92A0\n000000067F000080000009600C00000F4E0B-000000067F000080000009600C00000FE571__000000B6DE71F5F9-000000B79E68FFF9\n000000067F000080000009600C00000F8000-000000067F000080000009600C00000FC000__000000B79D17BFD0\n000000067F000080000009600C00000F8000-000000067F000080000009600C00000FC000__000000B8606C92A0\n000000067F000080000009600C00000FC000-000000067F000080000009600C0000100000__000000B79D17BFD0\n000000067F000080000009600C00000FC000-000000067F000080000009600C0000100000__000000B8606C92A0\n000000067F000080000009600C00000FE571-000000067F000080000009600C0000107CD7__000000B6DE71F5F9-000000B79E68FFF9\n000000067F000080000009600C0000100000-000000067F000080000009600C0000104000__000000B79D17BFD0\n000000067F000080000009600C0000100000-000000067F000080000009600C0000104000__000000B8606C92A0\n000000067F000080000009600C000010144D-000000067F0000800000096014000000E7D9__000000B808718889-000000B8606C92A1\n000000067F000080000009600C0000104000-000000067F000080000009600C0000108000__000000B79D17BFD0\n000000067F000080000009600C0000104000-000000067F000080000009600C0000108000__000000B8606C92A0\n000000067F000080000009600C0000107CD7-000000067F000080000009600C000011140C__000000B6DE71F5F9-000000B79E68FFF9\n000000067F000080000009600C0000108000-000000067F000080000009600C000010C000__000000B79D17BFD0\n000000067F000080000009600C0000108000-000000067F000080000009600C000010C000__000000B8606C92A0\n000000067F000080000009600C000010C000-000000067F000080000009600C0000110000__000000B79D17BFD0\n000000067F000080000009600C000010C000-000000067F000080000009600C0000110000__000000B8606C92A0\n000000067F000080000009600C0000110000-000000067F00008000000960120100000000__000000B8606C92A0\n000000067F000080000009600C0000110000-030000000000000000000000000000000002__000000B79D17BFD0\n000000067F000080000009600C000011140C-01000000000000000100000004000000001C__000000B6DE71F5F9-000000B79E68FFF9\n000000067F000080000009600C020000000B-000000067F0000800000096014000000571F__000000B79E68FFF9-000000B808718889\n000000067F00008000000960140000000000-000000067F00008000000960140000004000__000000B8606C92A0\n000000067F00008000000960140000004000-000000067F00008000000960140000008000__000000B8606C92A0\n000000067F0000800000096014000000571F-000000067F0000800000096014000000CB61__000000B79E68FFF9-000000B808718889\n000000067F00008000000960140000008000-000000067F0000800000096014000000C000__000000B8606C92A0\n000000067F0000800000096014000000C000-000000067F00008000000960140000010000__000000B8606C92A0\n000000067F0000800000096014000000CB61-000000067F00008000000960140000013F98__000000B79E68FFF9-000000B808718889\n000000067F0000800000096014000000E7DB-000000067F00008000000960140000022A8D__000000B808718889-000000B8606C92A1\n000000067F00008000000960140000010000-000000067F00008000000960140000014000__000000B8606C92A0\n000000067F00008000000960140000013F98-000000067F0000800000096014000001B3C2__000000B79E68FFF9-000000B808718889\n000000067F00008000000960140000014000-000000067F00008000000960140000018000__000000B8606C92A0\n000000067F00008000000960140000018000-000000067F0000800000096014000001C000__000000B8606C92A0\n000000067F0000800000096014000001B3C2-000000067F000080000009601400000227FC__000000B79E68FFF9-000000B808718889\n000000067F0000800000096014000001C000-000000067F00008000000960140000020000__000000B8606C92A0\n000000067F00008000000960140000020000-000000067F00008000000960140000024000__000000B8606C92A0\n000000067F000080000009601400000227FC-000000067F00008000000960140000029BD8__000000B79E68FFF9-000000B808718889\n000000067F00008000000960140000022A8D-030000000000000000000000000000000002__000000B808718889-000000B8606C92A1\n000000067F00008000000960140000024000-000000067F00008000000960140000028000__000000B8606C92A0\n000000067F00008000000960140000028000-000000067F0000800000096014000002C000__000000B8606C92A0\n000000067F00008000000960140000029BD8-030000000000000000000000000000000002__000000B79E68FFF9-000000B808718889\n000000067F0000800000096014000002C000-030000000000000000000000000000000002__000000B8606C92A0\n000000067F000080000009800C0000009748-000000067F000080000009800C0000012EAE__000000B8606C92A1-000000B8E03BF0B9\n000000067F000080000009800C0000012EAE-000000067F000080000009800C000001C60A__000000B8606C92A1-000000B8E03BF0B9\n000000067F000080000009800C000001C60A-000000067F000080000009800C0000025D38__000000B8606C92A1-000000B8E03BF0B9\n000000067F000080000009800C0000025D38-000000067F000080000009800C000002F49E__000000B8606C92A1-000000B8E03BF0B9\n000000067F000080000009800C000002F49E-000000067F000080000009800C0000038BB1__000000B8606C92A1-000000B8E03BF0B9\n000000067F000080000009800C0000038BB1-000000067F000080000009800C0000042317__000000B8606C92A1-000000B8E03BF0B9\n000000067F000080000009800C0000042317-000000067F000080000009800C000004BA7D__000000B8606C92A1-000000B8E03BF0B9\n000000067F000080000009800C000004BA7D-030000000000000000000000000000000002__000000B8606C92A1-000000B8E03BF0B9\n000000067F000080000009800C000004BAD2-000000067F000080000009800C0000055206__000000B8E03BF0B9-000000B97FFFFFE9\n000000067F000080000009800C0000055206-000000067F000080000009800C000005E911__000000B8E03BF0B9-000000B97FFFFFE9\n000000067F000080000009800C000005E911-000000067F000080000009800C000006802B__000000B8E03BF0B9-000000B97FFFFFE9\n000000067F000080000009800C000006802B-000000067F000080000009800C0000071782__000000B8E03BF0B9-000000B97FFFFFE9\n000000067F000080000009800C0000071782-000000067F000080000009800C000007AEE8__000000B8E03BF0B9-000000B97FFFFFE9\n000000067F000080000009800C000007AEE8-000000067F000080000009800C000008460B__000000B8E03BF0B9-000000B97FFFFFE9\n000000067F000080000009800C000008460B-000000067F000080000009800C000008DD71__000000B8E03BF0B9-000000B97FFFFFE9\n000000067F000080000009800C000008DD71-000000067F000080000009800C00000974D7__000000B8E03BF0B9-000000B97FFFFFE9\n000000067F000080000009800C00000974D7-000000067F000080000009800C00000A0C0B__000000B8E03BF0B9-000000B97FFFFFE9\n000000067F000080000009800C00000A0C0B-000000067F000080000009800C00000AA371__000000B8E03BF0B9-000000B97FFFFFE9\n000000067F000080000009800C00000A8000-000000067F000080000009800C00000AC000__000000BA2E67EA20\n000000067F000080000009800C00000AA371-000000067F000080000009800C0100000000__000000B8E03BF0B9-000000B97FFFFFE9\n000000067F000080000009800C00000AA4F5-000000067F000080000009800C00000B3C0B__000000B97FFFFFE9-000000BA1FC3FB39\n000000067F000080000009800C00000AC000-000000067F000080000009800C00000B0000__000000BA2E67EA20\n000000067F000080000009800C00000B0000-000000067F000080000009800C00000B4000__000000BA2E67EA20\n000000067F000080000009800C00000B3C0B-000000067F000080000009800C00000BD371__000000B97FFFFFE9-000000BA1FC3FB39\n000000067F000080000009800C00000B4000-000000067F000080000009800C00000B8000__000000BA2E67EA20\n000000067F000080000009800C00000B8000-000000067F000080000009800C00000BC000__000000BA2E67EA20\n000000067F000080000009800C00000BC000-000000067F000080000009800C00000C0000__000000BA2E67EA20\n000000067F000080000009800C00000BD371-000000067F000080000009800C00000C6AD7__000000B97FFFFFE9-000000BA1FC3FB39\n000000067F000080000009800C00000C0000-000000067F000080000009800C00000C4000__000000BA2E67EA20\n000000067F000080000009800C00000C4000-000000067F000080000009800C00000C8000__000000BA2E67EA20\n000000067F000080000009800C00000C6AD7-000000067F000080000009800C00000D020B__000000B97FFFFFE9-000000BA1FC3FB39\n000000067F000080000009800C00000C8000-000000067F000080000009800C00000CC000__000000BA2E67EA20\n000000067F000080000009800C00000CC000-000000067F000080000009800C00000D0000__000000BA2E67EA20\n000000067F000080000009800C00000D0000-000000067F000080000009800C00000D4000__000000BA2E67EA20\n000000067F000080000009800C00000D020B-000000067F000080000009800C00000D9971__000000B97FFFFFE9-000000BA1FC3FB39\n000000067F000080000009800C00000D4000-000000067F000080000009800C00000D8000__000000BA2E67EA20\n000000067F000080000009800C00000D8000-000000067F000080000009800C00000DC000__000000BA2E67EA20\n000000067F000080000009800C00000D9971-000000067F000080000009800C00000E30D7__000000B97FFFFFE9-000000BA1FC3FB39\n000000067F000080000009800C00000DC000-000000067F000080000009800C00000E0000__000000BA2E67EA20\n000000067F000080000009800C00000E0000-000000067F000080000009800C00000E4000__000000BA2E67EA20\n000000067F000080000009800C00000E30D7-000000067F000080000009800C00000EC80B__000000B97FFFFFE9-000000BA1FC3FB39\n000000067F000080000009800C00000E4000-000000067F000080000009800C00000E8000__000000BA2E67EA20\n000000067F000080000009800C00000E8000-000000067F000080000009800C00000EC000__000000BA2E67EA20\n000000067F000080000009800C00000EC000-000000067F000080000009800C00000F0000__000000BA2E67EA20\n000000067F000080000009800C00000EC80B-000000067F000080000009800C00000F5F38__000000B97FFFFFE9-000000BA1FC3FB39\n000000067F000080000009800C00000F0000-000000067F000080000009800C00000F4000__000000BA2E67EA20\n000000067F000080000009800C00000F4000-000000067F000080000009800C00000F8000__000000BA2E67EA20\n000000067F000080000009800C00000F5F38-000000067F000080000009800C00000FF69E__000000B97FFFFFE9-000000BA1FC3FB39\n000000067F000080000009800C00000F8000-000000067F000080000009800C00000FC000__000000BA2E67EA20\n000000067F000080000009800C00000FC000-000000067F000080000009800C0000100000__000000BA2E67EA20\n000000067F000080000009800C00000FF69E-000000067F000080000009800C0000108DAF__000000B97FFFFFE9-000000BA1FC3FB39\n000000067F000080000009800C0000100000-000000067F000080000009800C0000104000__000000BA2E67EA20\n000000067F000080000009800C0000104000-000000067F000080000009800C0000108000__000000BA2E67EA20\n000000067F000080000009800C0000108000-000000067F000080000009800C000010C000__000000BA2E67EA20\n000000067F000080000009800C0000108DAF-000000067F000080000009800F0100000003__000000B97FFFFFE9-000000BA1FC3FB39\n000000067F000080000009800C000010C000-000000067F000080000009800C0000110000__000000BA2E67EA20\n000000067F000080000009800C000010EC71-000000067F000080000009801400000025C3__000000BA1FC3FB39-000000BA9685E7C1\n000000067F000080000009800C0000110000-030000000000000000000000000000000002__000000BA2E67EA20\n000000067F000080000009801400000025C3-000000067F0000800000098014000000A4D3__000000BA1FC3FB39-000000BA9685E7C1\n000000067F0000800000098014000000A4D3-000000067F000080000009801400000123E4__000000BA1FC3FB39-000000BA9685E7C1\n000000067F000080000009801400000123E4-000000067F0000800000098014000001A2F3__000000BA1FC3FB39-000000BA9685E7C1\n000000067F0000800000098014000001A2F3-000000067F00008000000980140000022204__000000BA1FC3FB39-000000BA9685E7C1\n000000067F00008000000980140000022204-000000067F0000800000098014000002A114__000000BA1FC3FB39-000000BA9685E7C1\n000000067F0000800000098014000002A114-000000067F000080000009A00C0000004DB3__000000BA1FC3FB39-000000BA9685E7C1\n000000067F000080000009A00C0000000000-000000067F000080000009A00C0000004000__000000BCEF79BE90\n000000067F000080000009A00C0000004000-000000067F000080000009A00C0000008000__000000BCEF79BE90\n000000067F000080000009A00C0000004DB3-030000000000000000000000000000000002__000000BA1FC3FB39-000000BA9685E7C1\n000000067F000080000009A00C0000008000-000000067F000080000009A00C000000C000__000000BC59629F98\n000000067F000080000009A00C0000008000-000000067F000080000009A00C000000C000__000000BD25E66810\n000000067F000080000009A00C00000096E8-000000067F000080000009A00C0000012E0B__000000BA9685E7C1-000000BB4643FBD1\n000000067F000080000009A00C000000C000-000000067F000080000009A00C0000010000__000000BC59629F98\n000000067F000080000009A00C000000C000-000000067F000080000009A00C0000010000__000000BD25E66810\n000000067F000080000009A00C0000010000-000000067F000080000009A00C0000014000__000000BC59629F98\n000000067F000080000009A00C0000010000-000000067F000080000009A00C0000014000__000000BD25E66810\n000000067F000080000009A00C0000012E0B-000000067F000080000009A00C000001C571__000000BA9685E7C1-000000BB4643FBD1\n000000067F000080000009A00C0000014000-000000067F000080000009A00C0000018000__000000BC59629F98\n000000067F000080000009A00C0000014000-000000067F000080000009A00C0000018000__000000BD25E66810\n000000067F000080000009A00C0000018000-000000067F000080000009A00C000001C000__000000BC59629F98\n000000067F000080000009A00C0000018000-000000067F000080000009A00C000001C000__000000BD25E66810\n000000067F000080000009A00C000001C000-000000067F000080000009A00C0000020000__000000BC59629F98\n000000067F000080000009A00C000001C000-000000067F000080000009A00C0000020000__000000BD25E66810\n000000067F000080000009A00C000001C571-000000067F000080000009A00C0000025CD7__000000BA9685E7C1-000000BB4643FBD1\n000000067F000080000009A00C0000020000-000000067F000080000009A00C0000024000__000000BC59629F98\n000000067F000080000009A00C0000020000-000000067F000080000009A00C0000024000__000000BD25E66810\n000000067F000080000009A00C0000024000-000000067F000080000009A00C0000028000__000000BC59629F98\n000000067F000080000009A00C0000024000-000000067F000080000009A00C0000028000__000000BD25E66810\n000000067F000080000009A00C0000025CD7-000000067F000080000009A00C000002F40B__000000BA9685E7C1-000000BB4643FBD1\n000000067F000080000009A00C0000028000-000000067F000080000009A00C000002C000__000000BC59629F98\n000000067F000080000009A00C0000028000-000000067F000080000009A00C000002C000__000000BD25E66810\n000000067F000080000009A00C000002C000-000000067F000080000009A00C0000030000__000000BC59629F98\n000000067F000080000009A00C000002C000-000000067F000080000009A00C0000030000__000000BD25E66810\n000000067F000080000009A00C000002F40B-000000067F000080000009A00C0000038B1E__000000BA9685E7C1-000000BB4643FBD1\n000000067F000080000009A00C0000030000-000000067F000080000009A00C0000034000__000000BC59629F98\n000000067F000080000009A00C0000030000-000000067F000080000009A00C0000034000__000000BD25E66810\n000000067F000080000009A00C0000034000-000000067F000080000009A00C0000038000__000000BC59629F98\n000000067F000080000009A00C0000034000-000000067F000080000009A00C0000038000__000000BD25E66810\n000000067F000080000009A00C0000038000-000000067F000080000009A00C000003C000__000000BC59629F98\n000000067F000080000009A00C0000038000-000000067F000080000009A00C000003C000__000000BD25E66810\n000000067F000080000009A00C0000038B1E-000000067F000080000009A00C0000042284__000000BA9685E7C1-000000BB4643FBD1\n000000067F000080000009A00C000003C000-000000067F000080000009A00C0000040000__000000BC59629F98\n000000067F000080000009A00C000003C000-000000067F000080000009A00C0000040000__000000BD25E66810\n000000067F000080000009A00C0000040000-000000067F000080000009A00C0000044000__000000BC59629F98\n000000067F000080000009A00C0000040000-000000067F000080000009A00C0000044000__000000BD25E66810\n000000067F000080000009A00C0000042284-000000067F000080000009A00C000004B9EA__000000BA9685E7C1-000000BB4643FBD1\n000000067F000080000009A00C0000044000-000000067F000080000009A00C0000048000__000000BC59629F98\n000000067F000080000009A00C0000044000-000000067F000080000009A00C0000048000__000000BD25E66810\n000000067F000080000009A00C0000048000-000000067F000080000009A00C000004C000__000000BC59629F98\n000000067F000080000009A00C0000048000-000000067F000080000009A00C000004C000__000000BD25E66810\n000000067F000080000009A00C000004B9EA-000000067F000080000009A00C000005510B__000000BA9685E7C1-000000BB4643FBD1\n000000067F000080000009A00C000004C000-000000067F000080000009A00C0000050000__000000BC59629F98\n000000067F000080000009A00C000004C000-000000067F000080000009A00C0000050000__000000BD25E66810\n000000067F000080000009A00C0000050000-000000067F000080000009A00C0000054000__000000BC59629F98\n000000067F000080000009A00C0000050000-000000067F000080000009A00C0000054000__000000BD25E66810\n000000067F000080000009A00C0000054000-000000067F000080000009A00C0000058000__000000BC59629F98\n000000067F000080000009A00C0000054000-000000067F000080000009A00C0000058000__000000BD25E66810\n000000067F000080000009A00C000005510B-000000067F000080000009A00C000005E871__000000BA9685E7C1-000000BB4643FBD1\n000000067F000080000009A00C0000058000-000000067F000080000009A00C000005C000__000000BC59629F98\n000000067F000080000009A00C0000058000-000000067F000080000009A00C000005C000__000000BD25E66810\n000000067F000080000009A00C000005C000-000000067F000080000009A00C0000060000__000000BC59629F98\n000000067F000080000009A00C000005C000-000000067F000080000009A00C0000060000__000000BD25E66810\n000000067F000080000009A00C000005E871-000000067F000080000009A00C0000067F8B__000000BA9685E7C1-000000BB4643FBD1\n000000067F000080000009A00C0000060000-000000067F000080000009A00C0000064000__000000BC59629F98\n000000067F000080000009A00C0000060000-000000067F000080000009A00C0000064000__000000BD25E66810\n000000067F000080000009A00C0000064000-000000067F000080000009A00C0000068000__000000BC59629F98\n000000067F000080000009A00C0000064000-000000067F000080000009A00C0000068000__000000BD25E66810\n000000067F000080000009A00C0000067F8B-000000067F000080000009A00C00000716F1__000000BA9685E7C1-000000BB4643FBD1\n000000067F000080000009A00C0000068000-000000067F000080000009A00C000006C000__000000BC59629F98\n000000067F000080000009A00C0000068000-000000067F000080000009A00C000006C000__000000BD25E66810\n000000067F000080000009A00C000006C000-000000067F000080000009A00C0000070000__000000BC59629F98\n000000067F000080000009A00C000006C000-000000067F000080000009A00C0000070000__000000BD25E66810\n000000067F000080000009A00C0000070000-000000067F000080000009A00C0000074000__000000BC53F74828\n000000067F000080000009A00C0000070000-000000067F000080000009A00C0000074000__000000BD25E66810\n000000067F000080000009A00C00000716F1-000000067F000080000009A00C0100000000__000000BA9685E7C1-000000BB4643FBD1\n000000067F000080000009A00C0000071875-000000067F000080000009A00C000007AFDB__000000BB4643FBD1-000000BBE607E8F1\n000000067F000080000009A00C0000071F8D-000000067F000080000009A00C00000E4F8F__000000BCEF79BE91-000000BD263A5849\n000000067F000080000009A00C0000074000-000000067F000080000009A00C0000078000__000000BC53F74828\n000000067F000080000009A00C0000074000-000000067F000080000009A00C0000078000__000000BD25E66810\n000000067F000080000009A00C0000078000-000000067F000080000009A00C000007C000__000000BC53F74828\n000000067F000080000009A00C0000078000-000000067F000080000009A00C000007C000__000000BD25E66810\n000000067F000080000009A00C00000794E0-000000067F000080000009A00C00000F2480__000000BC596B5D59-000000BCEF79BE91\n000000067F000080000009A00C000007AFDB-000000067F000080000009A00C000008470A__000000BB4643FBD1-000000BBE607E8F1\n000000067F000080000009A00C000007C000-000000067F000080000009A00C0000080000__000000BC53F74828\n000000067F000080000009A00C000007C000-000000067F000080000009A00C0000080000__000000BD25E66810\n000000067F000080000009A00C0000080000-000000067F000080000009A00C0000084000__000000BC53F74828\n000000067F000080000009A00C0000080000-000000067F000080000009A00C0000084000__000000BD25E66810\n000000067F000080000009A00C0000084000-000000067F000080000009A00C0000088000__000000BC53F74828\n000000067F000080000009A00C0000084000-000000067F000080000009A00C0000088000__000000BD25E66810\n000000067F000080000009A00C000008470A-000000067F000080000009A00C000008DE70__000000BB4643FBD1-000000BBE607E8F1\n000000067F000080000009A00C0000088000-000000067F000080000009A00C000008C000__000000BC53F74828\n000000067F000080000009A00C0000088000-000000067F000080000009A00C000008C000__000000BD25E66810\n000000067F000080000009A00C000008C000-000000067F000080000009A00C0000090000__000000BC53F74828\n000000067F000080000009A00C000008C000-000000067F000080000009A00C0000090000__000000BD25E66810\n000000067F000080000009A00C000008DE70-000000067F000080000009A00C0000097590__000000BB4643FBD1-000000BBE607E8F1\n000000067F000080000009A00C0000090000-000000067F000080000009A00C0000094000__000000BC53F74828\n000000067F000080000009A00C0000090000-000000067F000080000009A00C0000094000__000000BD25E66810\n000000067F000080000009A00C0000094000-000000067F000080000009A00C0000098000__000000BC53F74828\n000000067F000080000009A00C0000094000-000000067F000080000009A00C0000098000__000000BD25E66810\n000000067F000080000009A00C0000097590-000000067F000080000009A00C00000A0CF6__000000BB4643FBD1-000000BBE607E8F1\n000000067F000080000009A00C0000098000-000000067F000080000009A00C000009C000__000000BC53F74828\n000000067F000080000009A00C0000098000-000000067F000080000009A00C000009C000__000000BD25E66810\n000000067F000080000009A00C000009C000-000000067F000080000009A00C00000A0000__000000BC53F74828\n000000067F000080000009A00C000009C000-000000067F000080000009A00C00000A0000__000000BD25E66810\n000000067F000080000009A00C00000A0000-000000067F000080000009A00C00000A4000__000000BC53F74828\n000000067F000080000009A00C00000A0000-000000067F000080000009A00C00000A4000__000000BD25E66810\n000000067F000080000009A00C00000A0CF6-000000067F000080000009A00C00000AA40B__000000BB4643FBD1-000000BBE607E8F1\n000000067F000080000009A00C00000A4000-000000067F000080000009A00C00000A8000__000000BC53F74828\n000000067F000080000009A00C00000A4000-000000067F000080000009A00C00000A8000__000000BD25E66810\n000000067F000080000009A00C00000A8000-000000067F000080000009A00C00000AC000__000000BC53F74828\n000000067F000080000009A00C00000A8000-000000067F000080000009A00C00000AC000__000000BD25E66810\n000000067F000080000009A00C00000AA40B-000000067F000080000009A00C00000B3B4D__000000BB4643FBD1-000000BBE607E8F1\n000000067F000080000009A00C00000AC000-000000067F000080000009A00C00000B0000__000000BC53F74828\n000000067F000080000009A00C00000AC000-000000067F000080000009A00C00000B0000__000000BD25E66810\n000000067F000080000009A00C00000B0000-000000067F000080000009A00C00000B4000__000000BC53F74828\n000000067F000080000009A00C00000B0000-000000067F000080000009A00C00000B4000__000000BD25E66810\n000000067F000080000009A00C00000B3B4D-000000067F000080000009A00C00000BD2B3__000000BB4643FBD1-000000BBE607E8F1\n000000067F000080000009A00C00000B4000-000000067F000080000009A00C00000B8000__000000BC53F74828\n000000067F000080000009A00C00000B4000-000000067F000080000009A00C00000B8000__000000BD25E66810\n000000067F000080000009A00C00000B8000-000000067F000080000009A00C00000BC000__000000BC53F74828\n000000067F000080000009A00C00000B8000-000000067F000080000009A00C00000BC000__000000BD25E66810\n000000067F000080000009A00C00000BC000-000000067F000080000009A00C00000C0000__000000BC53F74828\n000000067F000080000009A00C00000BC000-000000067F000080000009A00C00000C0000__000000BD25E66810\n000000067F000080000009A00C00000BD2B3-000000067F000080000009A00C00000C69D9__000000BB4643FBD1-000000BBE607E8F1\n000000067F000080000009A00C00000C0000-000000067F000080000009A00C00000C4000__000000BC53F74828\n000000067F000080000009A00C00000C0000-000000067F000080000009A00C00000C4000__000000BD25E66810\n000000067F000080000009A00C00000C4000-000000067F000080000009A00C00000C8000__000000BC53F74828\n000000067F000080000009A00C00000C4000-000000067F000080000009A00C00000C8000__000000BD25E66810\n000000067F000080000009A00C00000C69D9-000000067F000080000009A00C00000D010C__000000BB4643FBD1-000000BBE607E8F1\n000000067F000080000009A00C00000C8000-000000067F000080000009A00C00000CC000__000000BC53F74828\n000000067F000080000009A00C00000C8000-000000067F000080000009A00C00000CC000__000000BD25E66810\n000000067F000080000009A00C00000CC000-000000067F000080000009A00C00000D0000__000000BC53F74828\n000000067F000080000009A00C00000CC000-000000067F000080000009A00C00000D0000__000000BD25E66810\n000000067F000080000009A00C00000D0000-000000067F000080000009A00C00000D4000__000000BC53F74828\n000000067F000080000009A00C00000D0000-000000067F000080000009A00C00000D4000__000000BD25E66810\n000000067F000080000009A00C00000D010C-000000067F000080000009A00C0100000000__000000BB4643FBD1-000000BBE607E8F1\n000000067F000080000009A00C00000D4000-000000067F000080000009A00C00000D8000__000000BC53F74828\n000000067F000080000009A00C00000D4000-000000067F000080000009A00C00000D8000__000000BD25E66810\n000000067F000080000009A00C00000D6C06-000000067F000080000009A00C00000E0166__000000BBE607E8F1-000000BC596B5D59\n000000067F000080000009A00C00000D8000-000000067F000080000009A00C00000DC000__000000BC53F74828\n000000067F000080000009A00C00000D8000-000000067F000080000009A00C00000DC000__000000BD25E66810\n000000067F000080000009A00C00000DC000-000000067F000080000009A00C00000E0000__000000BC53F74828\n000000067F000080000009A00C00000DC000-000000067F000080000009A00C00000E0000__000000BD25E66810\n000000067F000080000009A00C00000E0000-000000067F000080000009A00C00000E4000__000000BC53F74828\n000000067F000080000009A00C00000E0000-000000067F000080000009A00C00000E4000__000000BD25E66810\n000000067F000080000009A00C00000E0166-000000067F000080000009A00C00000E96C9__000000BBE607E8F1-000000BC596B5D59\n000000067F000080000009A00C00000E4000-000000067F000080000009A00C00000E8000__000000BC53F74828\n000000067F000080000009A00C00000E4000-000000067F000080000009A00C00000E8000__000000BD25E66810\n000000067F000080000009A00C00000E4F97-000000067F000080000009A0140000019842__000000BCEF79BE91-000000BD263A5849\n000000067F000080000009A00C00000E8000-000000067F000080000009A00C00000EC000__000000BC53F74828\n000000067F000080000009A00C00000E8000-000000067F000080000009A00C00000EC000__000000BD25E66810\n000000067F000080000009A00C00000E96C9-000000067F000080000009A00C00000F2C2B__000000BBE607E8F1-000000BC596B5D59\n000000067F000080000009A00C00000EC000-000000067F000080000009A00C00000F0000__000000BC53F74828\n000000067F000080000009A00C00000EC000-000000067F000080000009A00C00000F0000__000000BD25E66810\n000000067F000080000009A00C00000F0000-000000067F000080000009A00C00000F4000__000000BC53F74828\n000000067F000080000009A00C00000F0000-000000067F000080000009A00C00000F4000__000000BD25E66810\n000000067F000080000009A00C00000F248B-000000067F000080000009A0140000004031__000000BC596B5D59-000000BCEF79BE91\n000000067F000080000009A00C00000F2C2B-000000067F000080000009A00C00000FC18E__000000BBE607E8F1-000000BC596B5D59\n000000067F000080000009A00C00000F4000-000000067F000080000009A00C00000F8000__000000BC53F74828\n000000067F000080000009A00C00000F4000-000000067F000080000009A00C00000F8000__000000BD25E66810\n000000067F000080000009A00C00000F8000-000000067F000080000009A00C00000FC000__000000BC53F74828\n000000067F000080000009A00C00000F8000-000000067F000080000009A00C00000FC000__000000BD25E66810\n000000067F000080000009A00C00000FC000-000000067F000080000009A00C0000100000__000000BC53F74828\n000000067F000080000009A00C00000FC000-000000067F000080000009A00C0000100000__000000BD25E66810\n000000067F000080000009A00C00000FC18E-000000067F000080000009A00C00001056F2__000000BBE607E8F1-000000BC596B5D59\n000000067F000080000009A00C0000100000-000000067F000080000009A00C0000104000__000000BC53F74828\n000000067F000080000009A00C0000100000-000000067F000080000009A00C0000104000__000000BD25E66810\n000000067F000080000009A00C0000104000-000000067F000080000009A00C0000108000__000000BC53F74828\n000000067F000080000009A00C0000104000-000000067F000080000009A00C0000108000__000000BD25E66810\n000000067F000080000009A00C00001056F2-000000067F000080000009A00C000010EC54__000000BBE607E8F1-000000BC596B5D59\n000000067F000080000009A00C0000108000-000000067F000080000009A00C000010C000__000000BC53F74828\n000000067F000080000009A00C0000108000-000000067F000080000009A00C000010C000__000000BD25E66810\n000000067F000080000009A00C000010C000-000000067F000080000009A00C0000110000__000000BC53F74828\n000000067F000080000009A00C000010C000-000000067F000080000009A00C0000110000__000000BD25E66810\n000000067F000080000009A00C000010EC54-010000000000000001000000040000000020__000000BBE607E8F1-000000BC596B5D59\n000000067F000080000009A00C0000110000-000000067F000080000009A0120100000000__000000BD25E66810\n000000067F000080000009A00C0000110000-030000000000000000000000000000000002__000000BC53F74828\n000000067F000080000009A0140000000000-000000067F000080000009A0140000004000__000000BD25E66810\n000000067F000080000009A0140000004000-000000067F000080000009A0140000008000__000000BD25E66810\n000000067F000080000009A0140000004031-000000067F000080000009A0140000009FC7__000000BC596B5D59-000000BCEF79BE91\n000000067F000080000009A0140000008000-000000067F000080000009A014000000C000__000000BD25E66810\n000000067F000080000009A0140000009FC7-000000067F000080000009A014000000FF53__000000BC596B5D59-000000BCEF79BE91\n000000067F000080000009A014000000C000-000000067F000080000009A0140000010000__000000BD25E66810\n000000067F000080000009A014000000FF53-000000067F000080000009A0140000015F1C__000000BC596B5D59-000000BCEF79BE91\n000000067F000080000009A0140000010000-000000067F000080000009A0140000014000__000000BD25E66810\n000000067F000080000009A0140000014000-000000067F000080000009A0140000018000__000000BD25E66810\n000000067F000080000009A0140000015F1C-000000067F000080000009A014000001BED0__000000BC596B5D59-000000BCEF79BE91\n000000067F000080000009A0140000018000-000000067F000080000009A014000001C000__000000BD25E66810\n000000067F000080000009A0140000019844-030000000000000000000000000000000002__000000BCEF79BE91-000000BD263A5849\n000000067F000080000009A014000001BED0-000000067F000080000009A0140000021E6C__000000BC596B5D59-000000BCEF79BE91\n000000067F000080000009A014000001C000-000000067F000080000009A0140000020000__000000BD25E66810\n000000067F000080000009A0140000020000-000000067F000080000009A0140000024000__000000BD25E66810\n000000067F000080000009A0140000021E6C-000000067F000080000009A0140000027DB1__000000BC596B5D59-000000BCEF79BE91\n000000067F000080000009A0140000024000-000000067F000080000009A0140000028000__000000BD25E66810\n000000067F000080000009A0140000027DB1-000000067F000080000009A014000002DC9E__000000BC596B5D59-000000BCEF79BE91\n000000067F000080000009A0140000028000-000000067F000080000009A014000002C000__000000BD25E66810\n000000067F000080000009A014000002C000-030000000000000000000000000000000002__000000BD25E66810\n000000067F000080000009A01400FFFFFFFF-030000000000000000000000000000000002__000000BC596B5D59-000000BCEF79BE91\n000000067F000080000009C00C0000000000-000000067F000080000009C00C0000004000__000000BEF683BFD0\n000000067F000080000009C00C0000004000-000000067F000080000009C00C0000008000__000000BEF683BFD0\n000000067F000080000009C00C0000008000-000000067F000080000009C00C000000C000__000000BEF683BFD0\n000000067F000080000009C00C0000009749-000000067F000080000009C00C0000012EAF__000000BD263A5849-000000BDA607F261\n000000067F000080000009C00C000000C000-000000067F000080000009C00C0000010000__000000BEF683BFD0\n000000067F000080000009C00C0000010000-000000067F000080000009C00C0000014000__000000BEF683BFD0\n000000067F000080000009C00C0000012EAF-000000067F000080000009C00C000001C60B__000000BD263A5849-000000BDA607F261\n000000067F000080000009C00C0000014000-000000067F000080000009C00C0000018000__000000BEF683BFD0\n000000067F000080000009C00C0000018000-000000067F000080000009C00C000001C000__000000BEF683BFD0\n000000067F000080000009C00C000001C000-000000067F000080000009C00C0000020000__000000BEF683BFD0\n000000067F000080000009C00C000001C60B-000000067F000080000009C00C0000025D39__000000BD263A5849-000000BDA607F261\n000000067F000080000009C00C0000020000-000000067F000080000009C00C0000024000__000000BEF683BFD0\n000000067F000080000009C00C0000024000-000000067F000080000009C00C0000028000__000000BEF683BFD0\n000000067F000080000009C00C0000025D39-000000067F000080000009C00C000002F49F__000000BD263A5849-000000BDA607F261\n000000067F000080000009C00C0000028000-000000067F000080000009C00C000002C000__000000BEF683BFD0\n000000067F000080000009C00C000002C000-000000067F000080000009C00C0000030000__000000BEF683BFD0\n000000067F000080000009C00C000002F49F-000000067F000080000009C00C0000038BB2__000000BD263A5849-000000BDA607F261\n000000067F000080000009C00C0000030000-000000067F000080000009C00C0000034000__000000BEF683BFD0\n000000067F000080000009C00C0000034000-000000067F000080000009C00C0000038000__000000BEF683BFD0\n000000067F000080000009C00C0000038000-000000067F000080000009C00C000003C000__000000BEF683BFD0\n000000067F000080000009C00C0000038BB2-000000067F000080000009C00C0000042318__000000BD263A5849-000000BDA607F261\n000000067F000080000009C00C000003C000-000000067F000080000009C00C0000040000__000000BEF683BFD0\n000000067F000080000009C00C0000040000-000000067F000080000009C00C0000044000__000000BEF683BFD0\n000000067F000080000009C00C0000042318-000000067F000080000009C00C000004BA7E__000000BD263A5849-000000BDA607F261\n000000067F000080000009C00C0000044000-000000067F000080000009C00C0000048000__000000BEF683BFD0\n000000067F000080000009C00C0000048000-000000067F000080000009C00C000004C000__000000BEF06884C8\n000000067F000080000009C00C000004BA7E-030000000000000000000000000000000002__000000BD263A5849-000000BDA607F261\n000000067F000080000009C00C000004BAC3-000000067F000080000009C00C00000551F8__000000BDA607F261-000000BE45CBFBB9\n000000067F000080000009C00C000004C000-000000067F000080000009C00C0000050000__000000BEF06884C8\n000000067F000080000009C00C0000050000-000000067F000080000009C00C0000054000__000000BEF06884C8\n000000067F000080000009C00C0000054000-000000067F000080000009C00C0000058000__000000BEF06884C8\n000000067F000080000009C00C00000551F8-000000067F000080000009C00C000005E90C__000000BDA607F261-000000BE45CBFBB9\n000000067F000080000009C00C0000058000-000000067F000080000009C00C000005C000__000000BEF06884C8\n000000067F000080000009C00C000005C000-000000067F000080000009C00C0000060000__000000BEF06884C8\n000000067F000080000009C00C000005E90C-000000067F000080000009C00C000006802C__000000BDA607F261-000000BE45CBFBB9\n000000067F000080000009C00C0000060000-000000067F000080000009C00C0000064000__000000BEF06884C8\n000000067F000080000009C00C0000064000-000000067F000080000009C00C0000068000__000000BEF06884C8\n000000067F000080000009C00C0000068000-000000067F000080000009C00C000006C000__000000BEF06884C8\n000000067F000080000009C00C000006802C-000000067F000080000009C00C0000071783__000000BDA607F261-000000BE45CBFBB9\n000000067F000080000009C00C000006C000-000000067F000080000009C00C0000070000__000000BEF06884C8\n000000067F000080000009C00C0000070000-000000067F000080000009C00C0000074000__000000BEF06884C8\n000000067F000080000009C00C0000071783-000000067F000080000009C00C000007AEE9__000000BDA607F261-000000BE45CBFBB9\n000000067F000080000009C00C0000074000-000000067F000080000009C00C0000078000__000000BEF06884C8\n000000067F000080000009C00C0000078000-000000067F000080000009C00C000007C000__000000BEF06884C8\n000000067F000080000009C00C000007AEE9-000000067F000080000009C00C000008460B__000000BDA607F261-000000BE45CBFBB9\n000000067F000080000009C00C000007C000-000000067F000080000009C00C0000080000__000000BEF06884C8\n000000067F000080000009C00C0000080000-000000067F000080000009C00C0000084000__000000BEF06884C8\n000000067F000080000009C00C0000084000-000000067F000080000009C00C0000088000__000000BEF06884C8\n000000067F000080000009C00C000008460B-000000067F000080000009C00C000008DD71__000000BDA607F261-000000BE45CBFBB9\n000000067F000080000009C00C0000088000-000000067F000080000009C00C000008C000__000000BEF06884C8\n000000067F000080000009C00C000008C000-000000067F000080000009C00C0000090000__000000BEF06884C8\n000000067F000080000009C00C000008DD71-000000067F000080000009C00C00000974D7__000000BDA607F261-000000BE45CBFBB9\n000000067F000080000009C00C0000090000-000000067F000080000009C00C0000094000__000000BEF06884C8\n000000067F000080000009C00C0000094000-000000067F000080000009C00C0000098000__000000BEF06884C8\n000000067F000080000009C00C00000974D7-000000067F000080000009C00C00000A0C0B__000000BDA607F261-000000BE45CBFBB9\n000000067F000080000009C00C0000098000-000000067F000080000009C00C000009C000__000000BEF06884C8\n000000067F000080000009C00C000009C000-000000067F000080000009C00C00000A0000__000000BEF06884C8\n000000067F000080000009C00C00000A0000-000000067F000080000009C00C00000A4000__000000BEF06884C8\n000000067F000080000009C00C00000A0C0B-000000067F000080000009C00C00000AA371__000000BDA607F261-000000BE45CBFBB9\n000000067F000080000009C00C00000A4000-000000067F000080000009C00C00000A8000__000000BEF06884C8\n000000067F000080000009C00C00000A8000-000000067F000080000009C00C00000AC000__000000BEF06884C8\n000000067F000080000009C00C00000AA371-000000067F000080000009C00C0100000000__000000BDA607F261-000000BE45CBFBB9\n000000067F000080000009C00C00000AC000-000000067F000080000009C00C00000B0000__000000BEF06884C8\n000000067F000080000009C00C00000B0000-000000067F000080000009C00C00000B4000__000000BEF06884C8\n000000067F000080000009C00C00000B2921-000000067F000080000009C00C00000BC087__000000BE45CBFBB9-000000BEF5F47FD1\n000000067F000080000009C00C00000B4000-000000067F000080000009C00C00000B8000__000000BEF06884C8\n000000067F000080000009C00C00000B8000-000000067F000080000009C00C00000BC000__000000BEF06884C8\n000000067F000080000009C00C00000BC000-000000067F000080000009C00C00000C0000__000000BEF06884C8\n000000067F000080000009C00C00000BC087-000000067F000080000009C00C00000C57B8__000000BE45CBFBB9-000000BEF5F47FD1\n000000067F000080000009C00C00000C0000-000000067F000080000009C00C00000C4000__000000BEF06884C8\n000000067F000080000009C00C00000C4000-000000067F000080000009C00C00000C8000__000000BEF06884C8\n000000067F000080000009C00C00000C57B8-000000067F000080000009C00C00000CEF09__000000BE45CBFBB9-000000BEF5F47FD1\n000000067F000080000009C00C00000C8000-000000067F000080000009C00C00000CC000__000000BEF06884C8\n000000067F000080000009C00C00000CC000-000000067F000080000009C00C00000D0000__000000BEF06884C8\n000000067F000080000009C00C00000CEF09-000000067F000080000009C00C00000D862B__000000BE45CBFBB9-000000BEF5F47FD1\n000000067F000080000009C00C00000D0000-000000067F000080000009C00C00000D4000__000000BEF06884C8\n000000067F000080000009C00C00000D4000-000000067F000080000009C00C00000D8000__000000BEF06884C8\n000000067F000080000009C00C00000D8000-000000067F000080000009C00C00000DC000__000000BEF06884C8\n000000067F000080000009C00C00000D862B-000000067F000080000009C00C00000E1D7F__000000BE45CBFBB9-000000BEF5F47FD1\n000000067F000080000009C00C00000DC000-000000067F000080000009C00C00000E0000__000000BEF06884C8\n000000067F000080000009C00C00000E0000-000000067F000080000009C00C00000E4000__000000BEF06884C8\n000000067F000080000009C00C00000E1D7F-000000067F000080000009C00C00000EB4E5__000000BE45CBFBB9-000000BEF5F47FD1\n000000067F000080000009C00C00000E4000-000000067F000080000009C00C00000E8000__000000BEF06884C8\n000000067F000080000009C00C00000E8000-000000067F000080000009C00C00000EC000__000000BEF06884C8\n000000067F000080000009C00C00000EB4E5-000000067F000080000009C00C00000F4C0B__000000BE45CBFBB9-000000BEF5F47FD1\n000000067F000080000009C00C00000EC000-000000067F000080000009C00C00000F0000__000000BEF06884C8\n000000067F000080000009C00C00000F0000-000000067F000080000009C00C00000F4000__000000BEF06884C8\n000000067F000080000009C00C00000F4000-000000067F000080000009C00C00000F8000__000000BEF06884C8\n000000067F000080000009C00C00000F4C0B-000000067F000080000009C00C00000FE371__000000BE45CBFBB9-000000BEF5F47FD1\n000000067F000080000009C00C00000F8000-000000067F000080000009C00C00000FC000__000000BEF06884C8\n000000067F000080000009C00C00000FC000-000000067F000080000009C00C0000100000__000000BEF06884C8\n000000067F000080000009C00C00000FE371-000000067F000080000009C00C0000107AD7__000000BE45CBFBB9-000000BEF5F47FD1\n000000067F000080000009C00C0000100000-000000067F000080000009C00C0000104000__000000BEF06884C8\n000000067F000080000009C00C0000104000-000000067F000080000009C00C0000108000__000000BEF06884C8\n000000067F000080000009C00C0000107AD7-000000067F000080000009C00C000011120B__000000BE45CBFBB9-000000BEF5F47FD1\n000000067F000080000009C00C0000108000-000000067F000080000009C00C000010C000__000000BEF06884C8\n000000067F000080000009C00C000010C000-030000000000000000000000000000000002__000000BEF06884C8\n000000067F000080000009C00C000011120B-010000000000000001000000050000000003__000000BE45CBFBB9-000000BEF5F47FD1\n000000067F000080000009E00C0000000000-000000067F000080000009E00C0000004000__000000C0C9769FD8\n000000067F000080000009E00C0000004000-000000067F000080000009E00C0000008000__000000C0C9769FD8\n000000067F000080000009E00C0000004916-000000067F000080000009E00C000000E07C__000000BEF5F47FD1-000000BF48FFEB11\n000000067F000080000009E00C0000008000-000000067F000080000009E00C000000C000__000000C0C9769FD8\n000000067F000080000009E00C000000C000-000000067F000080000009E00C0000010000__000000C0C9769FD8\n000000067F000080000009E00C000000E07C-000000067F000080000009E00C000001779A__000000BEF5F47FD1-000000BF48FFEB11\n000000067F000080000009E00C0000010000-000000067F000080000009E00C0000014000__000000C0C9769FD8\n000000067F000080000009E00C0000014000-000000067F000080000009E00C0000018000__000000C0C9769FD8\n000000067F000080000009E00C000001779A-000000067F000080000009E00C0000020F00__000000BEF5F47FD1-000000BF48FFEB11\n000000067F000080000009E00C0000018000-000000067F000080000009E00C000001C000__000000C0C9769FD8\n000000067F000080000009E00C000001C000-000000067F000080000009E00C0000020000__000000C0C9769FD8\n000000067F000080000009E00C0000020000-000000067F000080000009E00C0000024000__000000C0C9769FD8\n000000067F000080000009E00C0000020F00-000000067F000080000009E00C000002A60B__000000BEF5F47FD1-000000BF48FFEB11\n000000067F000080000009E00C0000024000-000000067F000080000009E00C0000028000__000000C0C9769FD8\n000000067F000080000009E00C0000028000-000000067F000080000009E00C000002C000__000000C0C9769FD8\n000000067F000080000009E00C000002A60B-030000000000000000000000000000000002__000000BEF5F47FD1-000000BF48FFEB11\n000000067F000080000009E00C000002C000-000000067F000080000009E00C0000030000__000000C0B597E900\n000000067F000080000009E00C000002C000-000000067F000080000009E00C0000030000__000000C1972392A8\n000000067F000080000009E00C000002F506-000000067F000080000009E00C0000038C11__000000BF48FFEB11-000000BFF8BDFEE9\n000000067F000080000009E00C0000030000-000000067F000080000009E00C0000034000__000000C0B597E900\n000000067F000080000009E00C0000030000-000000067F000080000009E00C0000034000__000000C1972392A8\n000000067F000080000009E00C0000034000-000000067F000080000009E00C0000038000__000000C0B597E900\n000000067F000080000009E00C0000034000-000000067F000080000009E00C0000038000__000000C1972392A8\n000000067F000080000009E00C0000038000-000000067F000080000009E00C000003C000__000000C0B597E900\n000000067F000080000009E00C0000038000-000000067F000080000009E00C000003C000__000000C1972392A8\n000000067F000080000009E00C0000038C11-000000067F000080000009E00C0000042361__000000BF48FFEB11-000000BFF8BDFEE9\n000000067F000080000009E00C000003C000-000000067F000080000009E00C0000040000__000000C0B597E900\n000000067F000080000009E00C000003C000-000000067F000080000009E00C0000040000__000000C1972392A8\n000000067F000080000009E00C0000040000-000000067F000080000009E00C0000044000__000000C0B597E900\n000000067F000080000009E00C0000040000-000000067F000080000009E00C0000044000__000000C1972392A8\n000000067F000080000009E00C0000042361-000000067F000080000009E00C000004BAC7__000000BF48FFEB11-000000BFF8BDFEE9\n000000067F000080000009E00C0000044000-000000067F000080000009E00C0000048000__000000C0B597E900\n000000067F000080000009E00C0000044000-000000067F000080000009E00C0000048000__000000C1972392A8\n000000067F000080000009E00C0000048000-000000067F000080000009E00C000004C000__000000C0B597E900\n000000067F000080000009E00C0000048000-000000067F000080000009E00C000004C000__000000C1972392A8\n000000067F000080000009E00C000004BAC7-000000067F000080000009E00C00000551FC__000000BF48FFEB11-000000BFF8BDFEE9\n000000067F000080000009E00C000004C000-000000067F000080000009E00C0000050000__000000C0B597E900\n000000067F000080000009E00C000004C000-000000067F000080000009E00C0000050000__000000C1972392A8\n000000067F000080000009E00C0000050000-000000067F000080000009E00C0000054000__000000C0B597E900\n000000067F000080000009E00C0000050000-000000067F000080000009E00C0000054000__000000C1972392A8\n000000067F000080000009E00C0000050E89-000000067F000080000009E00C00000A18A0__000000C1426D92E1-000000C19744E959\n000000067F000080000009E00C0000054000-000000067F000080000009E00C0000058000__000000C0B597E900\n000000067F000080000009E00C0000054000-000000067F000080000009E00C0000058000__000000C1972392A8\n000000067F000080000009E00C00000551FC-000000067F000080000009E00C000005E90B__000000BF48FFEB11-000000BFF8BDFEE9\n000000067F000080000009E00C0000058000-000000067F000080000009E00C000005C000__000000C0B597E900\n000000067F000080000009E00C0000058000-000000067F000080000009E00C000005C000__000000C1972392A8\n000000067F000080000009E00C000005C000-000000067F000080000009E00C0000060000__000000C0B597E900\n000000067F000080000009E00C000005C000-000000067F000080000009E00C0000060000__000000C1972392A8\n000000067F000080000009E00C000005E90B-000000067F000080000009E00C000006802B__000000BF48FFEB11-000000BFF8BDFEE9\n000000067F000080000009E00C0000060000-000000067F000080000009E00C0000064000__000000C0B597E900\n000000067F000080000009E00C0000060000-000000067F000080000009E00C0000064000__000000C1972392A8\n000000067F000080000009E00C0000064000-000000067F000080000009E00C0000068000__000000C0B597E900\n000000067F000080000009E00C0000064000-000000067F000080000009E00C0000068000__000000C1972392A8\n000000067F000080000009E00C0000068000-000000067F000080000009E00C000006C000__000000C0B597E900\n000000067F000080000009E00C0000068000-000000067F000080000009E00C000006C000__000000C1972392A8\n000000067F000080000009E00C000006802B-000000067F000080000009E00C0000071782__000000BF48FFEB11-000000BFF8BDFEE9\n000000067F000080000009E00C000006C000-000000067F000080000009E00C0000070000__000000C0B597E900\n000000067F000080000009E00C000006C000-000000067F000080000009E00C0000070000__000000C1972392A8\n000000067F000080000009E00C0000070000-000000067F000080000009E00C0000074000__000000C0B597E900\n000000067F000080000009E00C0000070000-000000067F000080000009E00C0000074000__000000C1972392A8\n000000067F000080000009E00C0000071782-000000067F000080000009E00C000007AEE8__000000BF48FFEB11-000000BFF8BDFEE9\n000000067F000080000009E00C0000074000-000000067F000080000009E00C0000078000__000000C0B597E900\n000000067F000080000009E00C0000074000-000000067F000080000009E00C0000078000__000000C1972392A8\n000000067F000080000009E00C0000078000-000000067F000080000009E00C000007C000__000000C0B597E900\n000000067F000080000009E00C0000078000-000000067F000080000009E00C000007C000__000000C1972392A8\n000000067F000080000009E00C000007AEE8-000000067F000080000009E00C000008460B__000000BF48FFEB11-000000BFF8BDFEE9\n000000067F000080000009E00C000007C000-000000067F000080000009E00C0000080000__000000C0B597E900\n000000067F000080000009E00C000007C000-000000067F000080000009E00C0000080000__000000C1972392A8\n000000067F000080000009E00C0000080000-000000067F000080000009E00C0000084000__000000C0B597E900\n000000067F000080000009E00C0000080000-000000067F000080000009E00C0000084000__000000C1972392A8\n000000067F000080000009E00C0000084000-000000067F000080000009E00C0000088000__000000C0B597E900\n000000067F000080000009E00C0000084000-000000067F000080000009E00C0000088000__000000C1972392A8\n000000067F000080000009E00C000008460B-000000067F000080000009E00C000008DD71__000000BF48FFEB11-000000BFF8BDFEE9\n000000067F000080000009E00C0000088000-000000067F000080000009E00C000008C000__000000C0B597E900\n000000067F000080000009E00C0000088000-000000067F000080000009E00C000008C000__000000C1972392A8\n000000067F000080000009E00C000008C000-000000067F000080000009E00C0000090000__000000C0B597E900\n000000067F000080000009E00C000008C000-000000067F000080000009E00C0000090000__000000C1972392A8\n000000067F000080000009E00C000008DD71-000000067F000080000009E00C00000974D7__000000BF48FFEB11-000000BFF8BDFEE9\n000000067F000080000009E00C0000090000-000000067F000080000009E00C0000094000__000000C0B597E900\n000000067F000080000009E00C0000090000-000000067F000080000009E00C0000094000__000000C1972392A8\n000000067F000080000009E00C0000094000-000000067F000080000009E00C0000098000__000000C0B597E900\n000000067F000080000009E00C0000094000-000000067F000080000009E00C0000098000__000000C1972392A8\n000000067F000080000009E00C00000974D7-000000067F000080000009E00C0100000000__000000BF48FFEB11-000000BFF8BDFEE9\n000000067F000080000009E00C0000098000-000000067F000080000009E00C000009C000__000000C0B597E900\n000000067F000080000009E00C0000098000-000000067F000080000009E00C000009C000__000000C1972392A8\n000000067F000080000009E00C000009C000-000000067F000080000009E00C00000A0000__000000C0B597E900\n000000067F000080000009E00C000009C000-000000067F000080000009E00C00000A0000__000000C1972392A8\n000000067F000080000009E00C000009FB21-000000067F000080000009E00C00000A9230__000000BFF8BDFEE9-000000C0C8CA5FF1\n000000067F000080000009E00C00000A0000-000000067F000080000009E00C00000A4000__000000C0B597E900\n000000067F000080000009E00C00000A0000-000000067F000080000009E00C00000A4000__000000C1972392A8\n000000067F000080000009E00C00000A18A4-000000067F000080000009E00C00000F2B76__000000C1426D92E1-000000C19744E959\n000000067F000080000009E00C00000A4000-000000067F000080000009E00C00000A8000__000000C0B597E900\n000000067F000080000009E00C00000A4000-000000067F000080000009E00C00000A8000__000000C1972392A8\n000000067F000080000009E00C00000A8000-000000067F000080000009E00C00000AC000__000000C0B597E900\n000000067F000080000009E00C00000A8000-000000067F000080000009E00C00000AC000__000000C1972392A8\n000000067F000080000009E00C00000A9230-000000067F000080000009E00C00000B297D__000000BFF8BDFEE9-000000C0C8CA5FF1\n000000067F000080000009E00C00000AC000-000000067F000080000009E00C00000B0000__000000C0B597E900\n000000067F000080000009E00C00000AC000-000000067F000080000009E00C00000B0000__000000C1972392A8\n000000067F000080000009E00C00000B0000-000000067F000080000009E00C00000B4000__000000C0B597E900\n000000067F000080000009E00C00000B0000-000000067F000080000009E00C00000B4000__000000C1972392A8\n000000067F000080000009E00C00000B297D-000000067F000080000009E00C00000BC0E3__000000BFF8BDFEE9-000000C0C8CA5FF1\n000000067F000080000009E00C00000B4000-000000067F000080000009E00C00000B8000__000000C0B597E900\n000000067F000080000009E00C00000B4000-000000067F000080000009E00C00000B8000__000000C1972392A8\n000000067F000080000009E00C00000B8000-000000067F000080000009E00C00000BC000__000000C0B597E900\n000000067F000080000009E00C00000B8000-000000067F000080000009E00C00000BC000__000000C1972392A8\n000000067F000080000009E00C00000BC000-000000067F000080000009E00C00000C0000__000000C0B597E900\n000000067F000080000009E00C00000BC000-000000067F000080000009E00C00000C0000__000000C1972392A8\n000000067F000080000009E00C00000BC0E3-000000067F000080000009E00C00000C580C__000000BFF8BDFEE9-000000C0C8CA5FF1\n000000067F000080000009E00C00000C0000-000000067F000080000009E00C00000C4000__000000C0B597E900\n000000067F000080000009E00C00000C0000-000000067F000080000009E00C00000C4000__000000C1972392A8\n000000067F000080000009E00C00000C0C74-000000067F000080000009E0140000001880__000000C0C8CA5FF1-000000C1426D92E1\n000000067F000080000009E00C00000C4000-000000067F000080000009E00C00000C8000__000000C0B597E900\n000000067F000080000009E00C00000C4000-000000067F000080000009E00C00000C8000__000000C1972392A8\n000000067F000080000009E00C00000C580C-000000067F000080000009E00C00000CEF71__000000BFF8BDFEE9-000000C0C8CA5FF1\n000000067F000080000009E00C00000C8000-000000067F000080000009E00C00000CC000__000000C0B597E900\n000000067F000080000009E00C00000C8000-000000067F000080000009E00C00000CC000__000000C1972392A8\n000000067F000080000009E00C00000CC000-000000067F000080000009E00C00000D0000__000000C0B597E900\n000000067F000080000009E00C00000CC000-000000067F000080000009E00C00000D0000__000000C1972392A8\n000000067F000080000009E00C00000CEF71-000000067F000080000009E00C00000D86D7__000000BFF8BDFEE9-000000C0C8CA5FF1\n000000067F000080000009E00C00000D0000-000000067F000080000009E00C00000D4000__000000C0B597E900\n000000067F000080000009E00C00000D0000-000000067F000080000009E00C00000D4000__000000C1972392A8\n000000067F000080000009E00C00000D4000-000000067F000080000009E00C00000D8000__000000C0B597E900\n000000067F000080000009E00C00000D4000-000000067F000080000009E00C00000D8000__000000C1972392A8\n000000067F000080000009E00C00000D8000-000000067F000080000009E00C00000DC000__000000C0B597E900\n000000067F000080000009E00C00000D8000-000000067F000080000009E00C00000DC000__000000C1972392A8\n000000067F000080000009E00C00000D86D7-000000067F000080000009E00C00000E1E0C__000000BFF8BDFEE9-000000C0C8CA5FF1\n000000067F000080000009E00C00000DC000-000000067F000080000009E00C00000E0000__000000C0B597E900\n000000067F000080000009E00C00000DC000-000000067F000080000009E00C00000E0000__000000C1972392A8\n000000067F000080000009E00C00000E0000-000000067F000080000009E00C00000E4000__000000C0B597E900\n000000067F000080000009E00C00000E0000-000000067F000080000009E00C00000E4000__000000C1972392A8\n000000067F000080000009E00C00000E1E0C-000000067F000080000009E00C00000EB572__000000BFF8BDFEE9-000000C0C8CA5FF1\n000000067F000080000009E00C00000E4000-000000067F000080000009E00C00000E8000__000000C0B597E900\n000000067F000080000009E00C00000E4000-000000067F000080000009E00C00000E8000__000000C1972392A8\n000000067F000080000009E00C00000E8000-000000067F000080000009E00C00000EC000__000000C0B597E900\n000000067F000080000009E00C00000E8000-000000067F000080000009E00C00000EC000__000000C1972392A8\n000000067F000080000009E00C00000EB572-000000067F000080000009E00C00000F4CD8__000000BFF8BDFEE9-000000C0C8CA5FF1\n000000067F000080000009E00C00000EC000-000000067F000080000009E00C00000F0000__000000C0B597E900\n000000067F000080000009E00C00000EC000-000000067F000080000009E00C00000F0000__000000C1972392A8\n000000067F000080000009E00C00000F0000-000000067F000080000009E00C00000F4000__000000C0B597E900\n000000067F000080000009E00C00000F0000-000000067F000080000009E00C00000F4000__000000C1972392A8\n000000067F000080000009E00C00000F2B77-000000067F000080000009E014000000D3EB__000000C1426D92E1-000000C19744E959\n000000067F000080000009E00C00000F4000-000000067F000080000009E00C00000F8000__000000C0B597E900\n000000067F000080000009E00C00000F4000-000000067F000080000009E00C00000F8000__000000C1972392A8\n000000067F000080000009E00C00000F4CD8-000000067F000080000009E00C00000FE40B__000000BFF8BDFEE9-000000C0C8CA5FF1\n000000067F000080000009E00C00000F8000-000000067F000080000009E00C00000FC000__000000C0B597E900\n000000067F000080000009E00C00000F8000-000000067F000080000009E00C00000FC000__000000C1972392A8\n000000067F000080000009E00C00000FC000-000000067F000080000009E00C0000100000__000000C0B597E900\n000000067F000080000009E00C00000FC000-000000067F000080000009E00C0000100000__000000C1972392A8\n000000067F000080000009E00C00000FE40B-000000067F000080000009E00C0000107B27__000000BFF8BDFEE9-000000C0C8CA5FF1\n000000067F000080000009E00C0000100000-000000067F000080000009E00C0000104000__000000C0B597E900\n000000067F000080000009E00C0000100000-000000067F000080000009E00C0000104000__000000C1972392A8\n000000067F000080000009E00C0000104000-000000067F000080000009E00C0000108000__000000C1972392A8\n000000067F000080000009E00C0000104000-030000000000000000000000000000000002__000000C0B597E900\n000000067F000080000009E00C0000107B27-000000067F000080000009E00C000011128D__000000BFF8BDFEE9-000000C0C8CA5FF1\n000000067F000080000009E00C0000108000-000000067F000080000009E00C000010C000__000000C1972392A8\n000000067F000080000009E00C000010C000-000000067F000080000009E00C0000110000__000000C1972392A8\n000000067F000080000009E00C0000110000-000000067F000080000009E0120100000000__000000C1972392A8\n000000067F000080000009E00C000011128D-010000000000000001000000050000000003__000000BFF8BDFEE9-000000C0C8CA5FF1\n000000067F000080000009E0140000000000-000000067F000080000009E0140000004000__000000C1972392A8\n000000067F000080000009E0140000001880-000000067F000080000009E014000000842E__000000C0C8CA5FF1-000000C1426D92E1\n000000067F000080000009E0140000004000-000000067F000080000009E0140000008000__000000C1972392A8\n000000067F000080000009E0140000008000-000000067F000080000009E014000000C000__000000C1972392A8\n000000067F000080000009E014000000842E-000000067F000080000009E014000000F011__000000C0C8CA5FF1-000000C1426D92E1\n000000067F000080000009E014000000C000-000000067F000080000009E0140000010000__000000C1972392A8\n000000067F000080000009E014000000D3EB-000000067F000080000009E014000002578F__000000C1426D92E1-000000C19744E959\n000000067F000080000009E014000000F011-000000067F000080000009E0140000015BD8__000000C0C8CA5FF1-000000C1426D92E1\n000000067F000080000009E0140000010000-000000067F000080000009E0140000014000__000000C1972392A8\n000000067F000080000009E0140000014000-000000067F000080000009E0140000018000__000000C1972392A8\n000000067F000080000009E0140000015BD8-000000067F000080000009E014000001C7C5__000000C0C8CA5FF1-000000C1426D92E1\n000000067F000080000009E0140000018000-000000067F000080000009E014000001C000__000000C1972392A8\n000000067F000080000009E014000001C000-000000067F000080000009E0140000020000__000000C1972392A8\n000000067F000080000009E014000001C7C5-000000067F000080000009E014000002337F__000000C0C8CA5FF1-000000C1426D92E1\n000000067F000080000009E0140000020000-000000067F000080000009E0140000024000__000000C1972392A8\n000000067F000080000009E014000002337F-000000067F000080000009E0140000029F4A__000000C0C8CA5FF1-000000C1426D92E1\n000000067F000080000009E0140000024000-000000067F000080000009E0140000028000__000000C1972392A8\n000000067F000080000009E0140000025790-030000000000000000000000000000000002__000000C1426D92E1-000000C19744E959\n000000067F000080000009E0140000028000-000000067F000080000009E014000002C000__000000C1972392A8\n000000067F000080000009E0140000029F4A-030000000000000000000000000000000002__000000C0C8CA5FF1-000000C1426D92E1\n000000067F000080000009E014000002C000-030000000000000000000000000000000002__000000C1972392A8\n000000067F00008000000A000C0000000000-000000067F00008000000A000C0000004000__000000C3687EDFE8\n000000067F00008000000A000C0000004000-000000067F00008000000A000C0000008000__000000C3687EDFE8\n000000067F00008000000A000C0000008000-000000067F00008000000A000C000000C000__000000C3687EDFE8\n000000067F00008000000A000C0000008EF9-000000067F00008000000A000C000001260C__000000C19744E959-000000C217F3F379\n000000067F00008000000A000C000000C000-000000067F00008000000A000C0000010000__000000C3687EDFE8\n000000067F00008000000A000C0000010000-000000067F00008000000A000C0000014000__000000C3687EDFE8\n000000067F00008000000A000C000001260C-000000067F00008000000A000C000001BD72__000000C19744E959-000000C217F3F379\n000000067F00008000000A000C0000014000-000000067F00008000000A000C0000018000__000000C3687EDFE8\n000000067F00008000000A000C0000018000-000000067F00008000000A000C000001C000__000000C3687EDFE8\n000000067F00008000000A000C000001BD72-000000067F00008000000A000C00000254D8__000000C19744E959-000000C217F3F379\n000000067F00008000000A000C000001C000-000000067F00008000000A000C0000020000__000000C3687EDFE8\n000000067F00008000000A000C0000020000-000000067F00008000000A000C0000024000__000000C3687EDFE8\n000000067F00008000000A000C0000024000-000000067F00008000000A000C0000028000__000000C3687EDFE8\n000000067F00008000000A000C00000254D8-000000067F00008000000A000C000002EC0B__000000C19744E959-000000C217F3F379\n000000067F00008000000A000C0000028000-000000067F00008000000A000C000002C000__000000C3687EDFE8\n000000067F00008000000A000C000002C000-000000067F00008000000A000C0000030000__000000C3687EDFE8\n000000067F00008000000A000C000002EC0B-000000067F00008000000A000C0000038322__000000C19744E959-000000C217F3F379\n000000067F00008000000A000C0000030000-000000067F00008000000A000C0000034000__000000C3687EDFE8\n000000067F00008000000A000C0000034000-000000067F00008000000A000C0000038000__000000C3687EDFE8\n000000067F00008000000A000C0000038000-000000067F00008000000A000C000003C000__000000C3687EDFE8\n000000067F00008000000A000C0000038322-000000067F00008000000A000C0000041A88__000000C19744E959-000000C217F3F379\n000000067F00008000000A000C000003C000-000000067F00008000000A000C0000040000__000000C3687EDFE8\n000000067F00008000000A000C0000040000-000000067F00008000000A000C0000044000__000000C3687EDFE8\n000000067F00008000000A000C0000041A88-000000067F00008000000A000C000004B1EE__000000C19744E959-000000C217F3F379\n000000067F00008000000A000C0000044000-000000067F00008000000A000C0000048000__000000C3687EDFE8\n000000067F00008000000A000C0000048000-000000067F00008000000A000C000004C000__000000C366619FD8\n000000067F00008000000A000C0000048000-000000067F00008000000A000C000004C000__000000C42FE73810\n000000067F00008000000A000C000004B1EE-030000000000000000000000000000000002__000000C19744E959-000000C217F3F379\n000000067F00008000000A000C000004BACE-000000067F00008000000A000C0000055202__000000C217F3F379-000000C2C7B1ECC1\n000000067F00008000000A000C000004C000-000000067F00008000000A000C0000050000__000000C366619FD8\n000000067F00008000000A000C000004C000-000000067F00008000000A000C0000050000__000000C42FE73810\n000000067F00008000000A000C0000050000-000000067F00008000000A000C0000054000__000000C366619FD8\n000000067F00008000000A000C0000050000-000000067F00008000000A000C0000054000__000000C42FE73810\n000000067F00008000000A000C0000054000-000000067F00008000000A000C0000058000__000000C366619FD8\n000000067F00008000000A000C0000054000-000000067F00008000000A000C0000058000__000000C42FE73810\n000000067F00008000000A000C0000055202-000000067F00008000000A000C000005E90D__000000C217F3F379-000000C2C7B1ECC1\n000000067F00008000000A000C0000056365-000000067F00008000000A000C00000ACA1A__000000C3E17E01A1-000000C430961E71\n000000067F00008000000A000C0000058000-000000067F00008000000A000C000005C000__000000C366619FD8\n000000067F00008000000A000C0000058000-000000067F00008000000A000C000005C000__000000C42FE73810\n000000067F00008000000A000C000005C000-000000067F00008000000A000C0000060000__000000C366619FD8\n000000067F00008000000A000C000005C000-000000067F00008000000A000C0000060000__000000C42FE73810\n000000067F00008000000A000C000005E90D-000000067F00008000000A000C000006802B__000000C217F3F379-000000C2C7B1ECC1\n000000067F00008000000A000C0000060000-000000067F00008000000A000C0000064000__000000C366619FD8\n000000067F00008000000A000C0000060000-000000067F00008000000A000C0000064000__000000C42FE73810\n000000067F00008000000A000C0000064000-000000067F00008000000A000C0000068000__000000C366619FD8\n000000067F00008000000A000C0000064000-000000067F00008000000A000C0000068000__000000C42FE73810\n000000067F00008000000A000C0000068000-000000067F00008000000A000C000006C000__000000C366619FD8\n000000067F00008000000A000C0000068000-000000067F00008000000A000C000006C000__000000C42FE73810\n000000067F00008000000A000C000006802B-000000067F00008000000A000C0000071782__000000C217F3F379-000000C2C7B1ECC1\n000000067F00008000000A000C000006C000-000000067F00008000000A000C0000070000__000000C366619FD8\n000000067F00008000000A000C000006C000-000000067F00008000000A000C0000070000__000000C42FE73810\n000000067F00008000000A000C0000070000-000000067F00008000000A000C0000074000__000000C366619FD8\n000000067F00008000000A000C0000070000-000000067F00008000000A000C0000074000__000000C42FE73810\n000000067F00008000000A000C0000071782-000000067F00008000000A000C000007AEE8__000000C217F3F379-000000C2C7B1ECC1\n000000067F00008000000A000C0000074000-000000067F00008000000A000C0000078000__000000C366619FD8\n000000067F00008000000A000C0000074000-000000067F00008000000A000C0000078000__000000C42FE73810\n000000067F00008000000A000C0000078000-000000067F00008000000A000C000007C000__000000C366619FD8\n000000067F00008000000A000C0000078000-000000067F00008000000A000C000007C000__000000C42FE73810\n000000067F00008000000A000C000007AEE8-000000067F00008000000A000C000008460B__000000C217F3F379-000000C2C7B1ECC1\n000000067F00008000000A000C000007C000-000000067F00008000000A000C0000080000__000000C366619FD8\n000000067F00008000000A000C000007C000-000000067F00008000000A000C0000080000__000000C42FE73810\n000000067F00008000000A000C0000080000-000000067F00008000000A000C0000084000__000000C366619FD8\n000000067F00008000000A000C0000080000-000000067F00008000000A000C0000084000__000000C42FE73810\n000000067F00008000000A000C0000084000-000000067F00008000000A000C0000088000__000000C366619FD8\n000000067F00008000000A000C0000084000-000000067F00008000000A000C0000088000__000000C42FE73810\n000000067F00008000000A000C000008460B-000000067F00008000000A000C000008DD71__000000C217F3F379-000000C2C7B1ECC1\n000000067F00008000000A000C0000088000-000000067F00008000000A000C000008C000__000000C366619FD8\n000000067F00008000000A000C0000088000-000000067F00008000000A000C000008C000__000000C42FE73810\n000000067F00008000000A000C000008C000-000000067F00008000000A000C0000090000__000000C366619FD8\n000000067F00008000000A000C000008C000-000000067F00008000000A000C0000090000__000000C42FE73810\n000000067F00008000000A000C000008DD71-000000067F00008000000A000C00000974D7__000000C217F3F379-000000C2C7B1ECC1\n000000067F00008000000A000C0000090000-000000067F00008000000A000C0000094000__000000C366619FD8\n000000067F00008000000A000C0000090000-000000067F00008000000A000C0000094000__000000C42FE73810\n000000067F00008000000A000C0000094000-000000067F00008000000A000C0000098000__000000C366619FD8\n000000067F00008000000A000C0000094000-000000067F00008000000A000C0000098000__000000C42FE73810\n000000067F00008000000A000C00000974D7-000000067F00008000000A000C00000A0C0B__000000C217F3F379-000000C2C7B1ECC1\n000000067F00008000000A000C0000098000-000000067F00008000000A000C000009C000__000000C366619FD8\n000000067F00008000000A000C0000098000-000000067F00008000000A000C000009C000__000000C42FE73810\n000000067F00008000000A000C000009C000-000000067F00008000000A000C00000A0000__000000C366619FD8\n000000067F00008000000A000C000009C000-000000067F00008000000A000C00000A0000__000000C42FE73810\n000000067F00008000000A000C00000A0000-000000067F00008000000A000C00000A4000__000000C366619FD8\n000000067F00008000000A000C00000A0000-000000067F00008000000A000C00000A4000__000000C42FE73810\n000000067F00008000000A000C00000A0C0B-000000067F00008000000A000C00000AA371__000000C217F3F379-000000C2C7B1ECC1\n000000067F00008000000A000C00000A4000-000000067F00008000000A000C00000A8000__000000C366619FD8\n000000067F00008000000A000C00000A4000-000000067F00008000000A000C00000A8000__000000C42FE73810\n000000067F00008000000A000C00000A8000-000000067F00008000000A000C00000AC000__000000C366619FD8\n000000067F00008000000A000C00000A8000-000000067F00008000000A000C00000AC000__000000C42FE73810\n000000067F00008000000A000C00000AA371-000000067F00008000000A000C00000B3AD7__000000C217F3F379-000000C2C7B1ECC1\n000000067F00008000000A000C00000AC000-000000067F00008000000A000C00000B0000__000000C366619FD8\n000000067F00008000000A000C00000AC000-000000067F00008000000A000C00000B0000__000000C42FE73810\n000000067F00008000000A000C00000ACA25-000000067F00008000000A000C0000102D7C__000000C3E17E01A1-000000C430961E71\n000000067F00008000000A000C00000B0000-000000067F00008000000A000C00000B4000__000000C366619FD8\n000000067F00008000000A000C00000B0000-000000067F00008000000A000C00000B4000__000000C42FE73810\n000000067F00008000000A000C00000B3AD7-000000067F00008000000A000C0100000000__000000C217F3F379-000000C2C7B1ECC1\n000000067F00008000000A000C00000B4000-000000067F00008000000A000C00000B8000__000000C366619FD8\n000000067F00008000000A000C00000B4000-000000067F00008000000A000C00000B8000__000000C42FE73810\n000000067F00008000000A000C00000B8000-000000067F00008000000A000C00000BC000__000000C366619FD8\n000000067F00008000000A000C00000B8000-000000067F00008000000A000C00000BC000__000000C42FE73810\n000000067F00008000000A000C00000B8B52-000000067F00008000000A00140000001132__000000C367E48001-000000C3E17E01A1\n000000067F00008000000A000C00000BC000-000000067F00008000000A000C00000C0000__000000C366619FD8\n000000067F00008000000A000C00000BC000-000000067F00008000000A000C00000C0000__000000C42FE73810\n000000067F00008000000A000C00000BC072-000000067F00008000000A000C00000C57A3__000000C2C7B1ECC1-000000C367E48001\n000000067F00008000000A000C00000C0000-000000067F00008000000A000C00000C4000__000000C366619FD8\n000000067F00008000000A000C00000C0000-000000067F00008000000A000C00000C4000__000000C42FE73810\n000000067F00008000000A000C00000C4000-000000067F00008000000A000C00000C8000__000000C366619FD8\n000000067F00008000000A000C00000C4000-000000067F00008000000A000C00000C8000__000000C42FE73810\n000000067F00008000000A000C00000C57A3-000000067F00008000000A000C00000CEF09__000000C2C7B1ECC1-000000C367E48001\n000000067F00008000000A000C00000C8000-000000067F00008000000A000C00000CC000__000000C366619FD8\n000000067F00008000000A000C00000C8000-000000067F00008000000A000C00000CC000__000000C42FE73810\n000000067F00008000000A000C00000CC000-000000067F00008000000A000C00000D0000__000000C366619FD8\n000000067F00008000000A000C00000CC000-000000067F00008000000A000C00000D0000__000000C42FE73810\n000000067F00008000000A000C00000CEF09-000000067F00008000000A000C00000D862B__000000C2C7B1ECC1-000000C367E48001\n000000067F00008000000A000C00000D0000-000000067F00008000000A000C00000D4000__000000C366619FD8\n000000067F00008000000A000C00000D0000-000000067F00008000000A000C00000D4000__000000C42FE73810\n000000067F00008000000A000C00000D4000-000000067F00008000000A000C00000D8000__000000C366619FD8\n000000067F00008000000A000C00000D4000-000000067F00008000000A000C00000D8000__000000C42FE73810\n000000067F00008000000A000C00000D8000-000000067F00008000000A000C00000DC000__000000C366619FD8\n000000067F00008000000A000C00000D8000-000000067F00008000000A000C00000DC000__000000C42FE73810\n000000067F00008000000A000C00000D862B-000000067F00008000000A000C00000E1D7F__000000C2C7B1ECC1-000000C367E48001\n000000067F00008000000A000C00000DC000-000000067F00008000000A000C00000E0000__000000C366619FD8\n000000067F00008000000A000C00000DC000-000000067F00008000000A000C00000E0000__000000C42FE73810\n000000067F00008000000A000C00000E0000-000000067F00008000000A000C00000E4000__000000C366619FD8\n000000067F00008000000A000C00000E0000-000000067F00008000000A000C00000E4000__000000C42FE73810\n000000067F00008000000A000C00000E1D7F-000000067F00008000000A000C00000EB4E5__000000C2C7B1ECC1-000000C367E48001\n000000067F00008000000A000C00000E4000-000000067F00008000000A000C00000E8000__000000C366619FD8\n000000067F00008000000A000C00000E4000-000000067F00008000000A000C00000E8000__000000C42FE73810\n000000067F00008000000A000C00000E8000-000000067F00008000000A000C00000EC000__000000C366619FD8\n000000067F00008000000A000C00000E8000-000000067F00008000000A000C00000EC000__000000C42FE73810\n000000067F00008000000A000C00000EB4E5-000000067F00008000000A000C00000F4C0B__000000C2C7B1ECC1-000000C367E48001\n000000067F00008000000A000C00000EC000-000000067F00008000000A000C00000F0000__000000C366619FD8\n000000067F00008000000A000C00000EC000-000000067F00008000000A000C00000F0000__000000C42FE73810\n000000067F00008000000A000C00000F0000-000000067F00008000000A000C00000F4000__000000C366619FD8\n000000067F00008000000A000C00000F0000-000000067F00008000000A000C00000F4000__000000C42FE73810\n000000067F00008000000A000C00000F4000-000000067F00008000000A000C00000F8000__000000C366619FD8\n000000067F00008000000A000C00000F4000-000000067F00008000000A000C00000F8000__000000C42FE73810\n000000067F00008000000A000C00000F4C0B-000000067F00008000000A000C00000FE371__000000C2C7B1ECC1-000000C367E48001\n000000067F00008000000A000C00000F8000-000000067F00008000000A000C00000FC000__000000C366619FD8\n000000067F00008000000A000C00000F8000-000000067F00008000000A000C00000FC000__000000C42FE73810\n000000067F00008000000A000C00000FC000-000000067F00008000000A000C0000100000__000000C366619FD8\n000000067F00008000000A000C00000FC000-000000067F00008000000A000C0000100000__000000C42FE73810\n000000067F00008000000A000C00000FE371-000000067F00008000000A000C0000107AD7__000000C2C7B1ECC1-000000C367E48001\n000000067F00008000000A000C0000100000-000000067F00008000000A000C0000104000__000000C366619FD8\n000000067F00008000000A000C0000100000-000000067F00008000000A000C0000104000__000000C42FE73810\n000000067F00008000000A000C0000102D7F-000000067F00008000000A0014000001409C__000000C3E17E01A1-000000C430961E71\n000000067F00008000000A000C0000104000-000000067F00008000000A000C0000108000__000000C366619FD8\n000000067F00008000000A000C0000104000-000000067F00008000000A000C0000108000__000000C42FE73810\n000000067F00008000000A000C0000107AD7-000000067F00008000000A000C000011120B__000000C2C7B1ECC1-000000C367E48001\n000000067F00008000000A000C0000108000-000000067F00008000000A000C000010C000__000000C366619FD8\n000000067F00008000000A000C0000108000-000000067F00008000000A000C000010C000__000000C42FE73810\n000000067F00008000000A000C000010C000-000000067F00008000000A000C0000110000__000000C366619FD8\n000000067F00008000000A000C000010C000-000000067F00008000000A000C0000110000__000000C42FE73810\n000000067F00008000000A000C0000110000-000000067F00008000000A00120100000000__000000C42FE73810\n000000067F00008000000A000C0000110000-030000000000000000000000000000000002__000000C366619FD8\n000000067F00008000000A000C000011120B-010000000000000001000000050000000007__000000C2C7B1ECC1-000000C367E48001\n000000067F00008000000A00140000000000-000000067F00008000000A00140000004000__000000C42FE73810\n000000067F00008000000A00140000001132-000000067F00008000000A00140000007E49__000000C367E48001-000000C3E17E01A1\n000000067F00008000000A00140000004000-000000067F00008000000A00140000008000__000000C42FE73810\n000000067F00008000000A00140000007E49-000000067F00008000000A0014000000EBBC__000000C367E48001-000000C3E17E01A1\n000000067F00008000000A00140000008000-000000067F00008000000A0014000000C000__000000C42FE73810\n000000067F00008000000A0014000000C000-000000067F00008000000A00140000010000__000000C42FE73810\n000000067F00008000000A0014000000EBBC-000000067F00008000000A00140000015925__000000C367E48001-000000C3E17E01A1\n000000067F00008000000A00140000010000-000000067F00008000000A00140000014000__000000C42FE73810\n000000067F00008000000A00140000014000-000000067F00008000000A00140000018000__000000C42FE73810\n000000067F00008000000A0014000001409F-000000067F00008000000A0016000000020E__000000C3E17E01A1-000000C430961E71\n000000067F00008000000A00140000015925-000000067F00008000000A0014000001C612__000000C367E48001-000000C3E17E01A1\n000000067F00008000000A00140000018000-000000067F00008000000A0014000001C000__000000C42FE73810\n000000067F00008000000A0014000001C000-000000067F00008000000A00140000020000__000000C42FE73810\n000000067F00008000000A0014000001C612-000000067F00008000000A00140000023364__000000C367E48001-000000C3E17E01A1\n000000067F00008000000A00140000020000-000000067F00008000000A00140000024000__000000C42FE73810\n000000067F00008000000A00140000023364-000000067F00008000000A0014000002A070__000000C367E48001-000000C3E17E01A1\n000000067F00008000000A00140000024000-000000067F00008000000A00140000028000__000000C42FE73810\n000000067F00008000000A00140000028000-000000067F00008000000A0014000002C000__000000C42FE73810\n000000067F00008000000A0014000002A070-030000000000000000000000000000000002__000000C367E48001-000000C3E17E01A1\n000000067F00008000000A0014000002C000-030000000000000000000000000000000002__000000C42FE73810\n000000067F00008000000A0016000000020E-030000000000000000000000000000000002__000000C3E17E01A1-000000C430961E71\n000000067F00008000000A200C0000000000-000000067F00008000000A200C0000004000__000000C601294000\n000000067F00008000000A200C0000004000-000000067F00008000000A200C0000008000__000000C601294000\n000000067F00008000000A200C0000008000-000000067F00008000000A200C000000C000__000000C601294000\n000000067F00008000000A200C0000009748-000000067F00008000000A200C0000012EAE__000000C430961E71-000000C4C05DDB29\n000000067F00008000000A200C000000C000-000000067F00008000000A200C0000010000__000000C601294000\n000000067F00008000000A200C0000010000-000000067F00008000000A200C0000014000__000000C601294000\n000000067F00008000000A200C0000012EAE-000000067F00008000000A200C000001C60A__000000C430961E71-000000C4C05DDB29\n000000067F00008000000A200C0000014000-000000067F00008000000A200C0000018000__000000C601294000\n000000067F00008000000A200C0000018000-000000067F00008000000A200C000001C000__000000C601294000\n000000067F00008000000A200C000001C000-000000067F00008000000A200C0000020000__000000C601294000\n000000067F00008000000A200C000001C60A-000000067F00008000000A200C0000025D38__000000C430961E71-000000C4C05DDB29\n000000067F00008000000A200C0000020000-000000067F00008000000A200C0000024000__000000C601294000\n000000067F00008000000A200C0000024000-000000067F00008000000A200C0000028000__000000C601294000\n000000067F00008000000A200C0000025D38-000000067F00008000000A200C000002F49E__000000C430961E71-000000C4C05DDB29\n000000067F00008000000A200C0000028000-000000067F00008000000A200C000002C000__000000C601294000\n000000067F00008000000A200C000002C000-000000067F00008000000A200C0000030000__000000C601294000\n000000067F00008000000A200C000002F49E-000000067F00008000000A200C0000038BB1__000000C430961E71-000000C4C05DDB29\n000000067F00008000000A200C0000030000-000000067F00008000000A200C0000034000__000000C601294000\n000000067F00008000000A200C0000034000-000000067F00008000000A200C0000038000__000000C601294000\n000000067F00008000000A200C0000038000-000000067F00008000000A200C000003C000__000000C601294000\n000000067F00008000000A200C0000038BB1-000000067F00008000000A200C0000042317__000000C430961E71-000000C4C05DDB29\n000000067F00008000000A200C000003C000-000000067F00008000000A200C0000040000__000000C601294000\n000000067F00008000000A200C0000040000-000000067F00008000000A200C0000044000__000000C601294000\n000000067F00008000000A200C0000042317-000000067F00008000000A200C000004BA7D__000000C430961E71-000000C4C05DDB29\n000000067F00008000000A200C0000044000-000000067F00008000000A200C0000048000__000000C601294000\n000000067F00008000000A200C0000048000-000000067F00008000000A200C000004C000__000000C601294000\n000000067F00008000000A200C000004BA7D-000000067F00008000000A200C00000551B2__000000C430961E71-000000C4C05DDB29\n000000067F00008000000A200C000004C000-000000067F00008000000A200C0000050000__000000C601294000\n000000067F00008000000A200C0000050000-000000067F00008000000A200C0000054000__000000C601294000\n000000067F00008000000A200C0000054000-000000067F00008000000A200C0000058000__000000C5FED35FC8\n000000067F00008000000A200C0000054000-000000067F00008000000A200C0000058000__000000C6C7BD8140\n000000067F00008000000A200C00000551B2-030000000000000000000000000000000002__000000C430961E71-000000C4C05DDB29\n000000067F00008000000A200C0000055230-000000067F00008000000A200C000005E996__000000C4C05DDB29-000000C56021EB29\n000000067F00008000000A200C0000058000-000000067F00008000000A200C000005C000__000000C5FED35FC8\n000000067F00008000000A200C0000058000-000000067F00008000000A200C000005C000__000000C6C7BD8140\n000000067F00008000000A200C000005C000-000000067F00008000000A200C0000060000__000000C5FED35FC8\n000000067F00008000000A200C000005C000-000000067F00008000000A200C0000060000__000000C6C7BD8140\n000000067F00008000000A200C000005E996-000000067F00008000000A200C00000680FC__000000C4C05DDB29-000000C56021EB29\n000000067F00008000000A200C0000060000-000000067F00008000000A200C0000064000__000000C5FED35FC8\n000000067F00008000000A200C0000060000-000000067F00008000000A200C0000064000__000000C6C7BD8140\n000000067F00008000000A200C0000064000-000000067F00008000000A200C0000068000__000000C5FED35FC8\n000000067F00008000000A200C0000064000-000000067F00008000000A200C0000068000__000000C6C7BD8140\n000000067F00008000000A200C00000677DB-000000067F00008000000A200C00000CF739__000000C689AF4AC1-000000C6C87B6329\n000000067F00008000000A200C0000068000-000000067F00008000000A200C000006C000__000000C5FED35FC8\n000000067F00008000000A200C0000068000-000000067F00008000000A200C000006C000__000000C6C7BD8140\n000000067F00008000000A200C00000680FC-000000067F00008000000A200C000007180C__000000C4C05DDB29-000000C56021EB29\n000000067F00008000000A200C000006C000-000000067F00008000000A200C0000070000__000000C5FED35FC8\n000000067F00008000000A200C000006C000-000000067F00008000000A200C0000070000__000000C6C7BD8140\n000000067F00008000000A200C0000070000-000000067F00008000000A200C0000074000__000000C5FED35FC8\n000000067F00008000000A200C0000070000-000000067F00008000000A200C0000074000__000000C6C7BD8140\n000000067F00008000000A200C000007180C-000000067F00008000000A200C000007AF72__000000C4C05DDB29-000000C56021EB29\n000000067F00008000000A200C0000074000-000000067F00008000000A200C0000078000__000000C5FED35FC8\n000000067F00008000000A200C0000074000-000000067F00008000000A200C0000078000__000000C6C7BD8140\n000000067F00008000000A200C0000078000-000000067F00008000000A200C000007C000__000000C5FED35FC8\n000000067F00008000000A200C0000078000-000000067F00008000000A200C000007C000__000000C6C7BD8140\n000000067F00008000000A200C000007AF72-000000067F00008000000A200C00000846D8__000000C4C05DDB29-000000C56021EB29\n000000067F00008000000A200C000007C000-000000067F00008000000A200C0000080000__000000C5FED35FC8\n000000067F00008000000A200C000007C000-000000067F00008000000A200C0000080000__000000C6C7BD8140\n000000067F00008000000A200C0000080000-000000067F00008000000A200C0000084000__000000C5FED35FC8\n000000067F00008000000A200C0000080000-000000067F00008000000A200C0000084000__000000C6C7BD8140\n000000067F00008000000A200C0000084000-000000067F00008000000A200C0000088000__000000C5FED35FC8\n000000067F00008000000A200C0000084000-000000067F00008000000A200C0000088000__000000C6C7BD8140\n000000067F00008000000A200C00000846D8-000000067F00008000000A200C000008DE0B__000000C4C05DDB29-000000C56021EB29\n000000067F00008000000A200C0000088000-000000067F00008000000A200C000008C000__000000C5FED35FC8\n000000067F00008000000A200C0000088000-000000067F00008000000A200C000008C000__000000C6C7BD8140\n000000067F00008000000A200C000008C000-000000067F00008000000A200C0000090000__000000C5FED35FC8\n000000067F00008000000A200C000008C000-000000067F00008000000A200C0000090000__000000C6C7BD8140\n000000067F00008000000A200C000008DE0B-000000067F00008000000A200C000009752B__000000C4C05DDB29-000000C56021EB29\n000000067F00008000000A200C0000090000-000000067F00008000000A200C0000094000__000000C5FED35FC8\n000000067F00008000000A200C0000090000-000000067F00008000000A200C0000094000__000000C6C7BD8140\n000000067F00008000000A200C00000933F0-000000067F00008000000A200C0000110901__000000C600A8FFF9-000000C689AF4AC1\n000000067F00008000000A200C0000094000-000000067F00008000000A200C0000098000__000000C5FED35FC8\n000000067F00008000000A200C0000094000-000000067F00008000000A200C0000098000__000000C6C7BD8140\n000000067F00008000000A200C000009752B-000000067F00008000000A200C00000A0C91__000000C4C05DDB29-000000C56021EB29\n000000067F00008000000A200C0000098000-000000067F00008000000A200C000009C000__000000C5FED35FC8\n000000067F00008000000A200C0000098000-000000067F00008000000A200C000009C000__000000C6C7BD8140\n000000067F00008000000A200C000009C000-000000067F00008000000A200C00000A0000__000000C5FED35FC8\n000000067F00008000000A200C000009C000-000000067F00008000000A200C00000A0000__000000C6C7BD8140\n000000067F00008000000A200C00000A0000-000000067F00008000000A200C00000A4000__000000C5FED35FC8\n000000067F00008000000A200C00000A0000-000000067F00008000000A200C00000A4000__000000C6C7BD8140\n000000067F00008000000A200C00000A0C91-000000067F00008000000A200C00000AA3F7__000000C4C05DDB29-000000C56021EB29\n000000067F00008000000A200C00000A4000-000000067F00008000000A200C00000A8000__000000C5FED35FC8\n000000067F00008000000A200C00000A4000-000000067F00008000000A200C00000A8000__000000C6C7BD8140\n000000067F00008000000A200C00000A8000-000000067F00008000000A200C00000AC000__000000C5FED35FC8\n000000067F00008000000A200C00000A8000-000000067F00008000000A200C00000AC000__000000C6C7BD8140\n000000067F00008000000A200C00000AA3F7-000000067F00008000000A200C00000B3B0C__000000C4C05DDB29-000000C56021EB29\n000000067F00008000000A200C00000AC000-000000067F00008000000A200C00000B0000__000000C5FED35FC8\n000000067F00008000000A200C00000AC000-000000067F00008000000A200C00000B0000__000000C6C7BD8140\n000000067F00008000000A200C00000B0000-000000067F00008000000A200C00000B4000__000000C5FED35FC8\n000000067F00008000000A200C00000B0000-000000067F00008000000A200C00000B4000__000000C6C7BD8140\n000000067F00008000000A200C00000B3B0C-000000067F00008000000A200C0100000000__000000C4C05DDB29-000000C56021EB29\n000000067F00008000000A200C00000B4000-000000067F00008000000A200C00000B8000__000000C5FED35FC8\n000000067F00008000000A200C00000B4000-000000067F00008000000A200C00000B8000__000000C6C7BD8140\n000000067F00008000000A200C00000B8000-000000067F00008000000A200C00000BC000__000000C5FED35FC8\n000000067F00008000000A200C00000B8000-000000067F00008000000A200C00000BC000__000000C6C7BD8140\n000000067F00008000000A200C00000BBC1F-000000067F00008000000A200C00000C5353__000000C56021EB29-000000C600A8FFF9\n000000067F00008000000A200C00000BC000-000000067F00008000000A200C00000C0000__000000C5FED35FC8\n000000067F00008000000A200C00000BC000-000000067F00008000000A200C00000C0000__000000C6C7BD8140\n000000067F00008000000A200C00000C0000-000000067F00008000000A200C00000C4000__000000C5FED35FC8\n000000067F00008000000A200C00000C0000-000000067F00008000000A200C00000C4000__000000C6C7BD8140\n000000067F00008000000A200C00000C4000-000000067F00008000000A200C00000C8000__000000C5FED35FC8\n000000067F00008000000A200C00000C4000-000000067F00008000000A200C00000C8000__000000C6C7BD8140\n000000067F00008000000A200C00000C5353-000000067F00008000000A200C00000CEAB9__000000C56021EB29-000000C600A8FFF9\n000000067F00008000000A200C00000C8000-000000067F00008000000A200C00000CC000__000000C5FED35FC8\n000000067F00008000000A200C00000C8000-000000067F00008000000A200C00000CC000__000000C6C7BD8140\n000000067F00008000000A200C00000CC000-000000067F00008000000A200C00000D0000__000000C5FED35FC8\n000000067F00008000000A200C00000CC000-000000067F00008000000A200C00000D0000__000000C6C7BD8140\n000000067F00008000000A200C00000CEAB9-000000067F00008000000A200C00000D81D2__000000C56021EB29-000000C600A8FFF9\n000000067F00008000000A200C00000CF742-000000067F00008000000A2014000000B47B__000000C689AF4AC1-000000C6C87B6329\n000000067F00008000000A200C00000D0000-000000067F00008000000A200C00000D4000__000000C5FED35FC8\n000000067F00008000000A200C00000D0000-000000067F00008000000A200C00000D4000__000000C6C7BD8140\n000000067F00008000000A200C00000D4000-000000067F00008000000A200C00000D8000__000000C5FED35FC8\n000000067F00008000000A200C00000D4000-000000067F00008000000A200C00000D8000__000000C6C7BD8140\n000000067F00008000000A200C00000D8000-000000067F00008000000A200C00000DC000__000000C5FED35FC8\n000000067F00008000000A200C00000D8000-000000067F00008000000A200C00000DC000__000000C6C7BD8140\n000000067F00008000000A200C00000D81D2-000000067F00008000000A200C00000E190B__000000C56021EB29-000000C600A8FFF9\n000000067F00008000000A200C00000DC000-000000067F00008000000A200C00000E0000__000000C5FED35FC8\n000000067F00008000000A200C00000DC000-000000067F00008000000A200C00000E0000__000000C6C7BD8140\n000000067F00008000000A200C00000E0000-000000067F00008000000A200C00000E4000__000000C5FED35FC8\n000000067F00008000000A200C00000E0000-000000067F00008000000A200C00000E4000__000000C6C7BD8140\n000000067F00008000000A200C00000E190B-000000067F00008000000A200C00000EB071__000000C56021EB29-000000C600A8FFF9\n000000067F00008000000A200C00000E4000-000000067F00008000000A200C00000E8000__000000C5FED35FC8\n000000067F00008000000A200C00000E4000-000000067F00008000000A200C00000E8000__000000C6C7BD8140\n000000067F00008000000A200C00000E8000-000000067F00008000000A200C00000EC000__000000C5FED35FC8\n000000067F00008000000A200C00000E8000-000000067F00008000000A200C00000EC000__000000C6C7BD8140\n000000067F00008000000A200C00000EB071-000000067F00008000000A200C00000F47AC__000000C56021EB29-000000C600A8FFF9\n000000067F00008000000A200C00000EC000-000000067F00008000000A200C00000F0000__000000C5FED35FC8\n000000067F00008000000A200C00000EC000-000000067F00008000000A200C00000F0000__000000C6C7BD8140\n000000067F00008000000A200C00000F0000-000000067F00008000000A200C00000F4000__000000C5FED35FC8\n000000067F00008000000A200C00000F0000-000000067F00008000000A200C00000F4000__000000C6C7BD8140\n000000067F00008000000A200C00000F4000-000000067F00008000000A200C00000F8000__000000C5FED35FC8\n000000067F00008000000A200C00000F4000-000000067F00008000000A200C00000F8000__000000C6C7BD8140\n000000067F00008000000A200C00000F47AC-000000067F00008000000A200C00000FDF0A__000000C56021EB29-000000C600A8FFF9\n000000067F00008000000A200C00000F8000-000000067F00008000000A200C00000FC000__000000C5FED35FC8\n000000067F00008000000A200C00000F8000-000000067F00008000000A200C00000FC000__000000C6C7BD8140\n000000067F00008000000A200C00000FC000-000000067F00008000000A200C0000100000__000000C5FED35FC8\n000000067F00008000000A200C00000FC000-000000067F00008000000A200C0000100000__000000C6C7BD8140\n000000067F00008000000A200C00000FDF0A-000000067F00008000000A200C000010762B__000000C56021EB29-000000C600A8FFF9\n000000067F00008000000A200C0000100000-000000067F00008000000A200C0000104000__000000C5FED35FC8\n000000067F00008000000A200C0000100000-000000067F00008000000A200C0000104000__000000C6C7BD8140\n000000067F00008000000A200C0000104000-000000067F00008000000A200C0000108000__000000C5FED35FC8\n000000067F00008000000A200C0000104000-000000067F00008000000A200C0000108000__000000C6C7BD8140\n000000067F00008000000A200C000010762B-000000067F00008000000A200C0000110D88__000000C56021EB29-000000C600A8FFF9\n000000067F00008000000A200C0000108000-000000067F00008000000A200C000010C000__000000C5FED35FC8\n000000067F00008000000A200C0000108000-000000067F00008000000A200C000010C000__000000C6C7BD8140\n000000067F00008000000A200C000010C000-000000067F00008000000A200C0000110000__000000C5FED35FC8\n000000067F00008000000A200C000010C000-000000067F00008000000A200C0000110000__000000C6C7BD8140\n000000067F00008000000A200C0000110000-000000067F00008000000A20120100000000__000000C6C7BD8140\n000000067F00008000000A200C0000110000-030000000000000000000000000000000002__000000C5FED35FC8\n000000067F00008000000A200C0000110901-000000067F00008000000A201400000047CD__000000C600A8FFF9-000000C689AF4AC1\n000000067F00008000000A200C0000110D88-01000000000000000100000005000000000A__000000C56021EB29-000000C600A8FFF9\n000000067F00008000000A20140000000000-000000067F00008000000A20140000004000__000000C6C7BD8140\n000000067F00008000000A20140000004000-000000067F00008000000A20140000008000__000000C6C7BD8140\n000000067F00008000000A201400000047CD-000000067F00008000000A2014000000ADA8__000000C600A8FFF9-000000C689AF4AC1\n000000067F00008000000A20140000008000-000000067F00008000000A2014000000C000__000000C6C7BD8140\n000000067F00008000000A2014000000ADA8-000000067F00008000000A201400000113B8__000000C600A8FFF9-000000C689AF4AC1\n000000067F00008000000A2014000000B47C-010000000000000001000000050100000000__000000C689AF4AC1-000000C6C87B6329\n000000067F00008000000A2014000000C000-000000067F00008000000A20140000010000__000000C6C7BD8140\n000000067F00008000000A20140000010000-000000067F00008000000A20140000014000__000000C6C7BD8140\n000000067F00008000000A201400000113B8-000000067F00008000000A20140000017969__000000C600A8FFF9-000000C689AF4AC1\n000000067F00008000000A20140000014000-000000067F00008000000A20140000018000__000000C6C7BD8140\n000000067F00008000000A20140000017969-000000067F00008000000A2014000001DF7E__000000C600A8FFF9-000000C689AF4AC1\n000000067F00008000000A20140000018000-000000067F00008000000A2014000001C000__000000C6C7BD8140\n000000067F00008000000A2014000001C000-000000067F00008000000A20140000020000__000000C6C7BD8140\n000000067F00008000000A2014000001DF7E-000000067F00008000000A2014000002457D__000000C600A8FFF9-000000C689AF4AC1\n000000067F00008000000A20140000020000-000000067F00008000000A20140000024000__000000C6C7BD8140\n000000067F00008000000A20140000024000-000000067F00008000000A20140000028000__000000C6C7BD8140\n000000067F00008000000A2014000002457D-000000067F00008000000A2014000002AB1D__000000C600A8FFF9-000000C689AF4AC1\n000000067F00008000000A20140000028000-000000067F00008000000A2014000002C000__000000C6C7BD8140\n000000067F00008000000A2014000002AB1D-030000000000000000000000000000000002__000000C600A8FFF9-000000C689AF4AC1\n000000067F00008000000A2014000002C000-030000000000000000000000000000000002__000000C6C7BD8140\n000000067F00008000000A400C0000000000-000000067F00008000000A400C0000004000__000000C896B8DFD8\n000000067F00008000000A400C0000004000-000000067F00008000000A400C0000008000__000000C896B8DFD8\n000000067F00008000000A400C0000008000-000000067F00008000000A400C000000C000__000000C896B8DFD8\n000000067F00008000000A400C0000009743-000000067F00008000000A400C0000012EA9__000000C6C87B6329-000000C74849FAE1\n000000067F00008000000A400C000000C000-000000067F00008000000A400C0000010000__000000C896B8DFD8\n000000067F00008000000A400C0000010000-000000067F00008000000A400C0000014000__000000C896B8DFD8\n000000067F00008000000A400C0000012EA9-000000067F00008000000A400C000001C60A__000000C6C87B6329-000000C74849FAE1\n000000067F00008000000A400C0000014000-000000067F00008000000A400C0000018000__000000C896B8DFD8\n000000067F00008000000A400C0000018000-000000067F00008000000A400C000001C000__000000C896B8DFD8\n000000067F00008000000A400C000001C000-000000067F00008000000A400C0000020000__000000C896B8DFD8\n000000067F00008000000A400C000001C60A-000000067F00008000000A400C0000025D38__000000C6C87B6329-000000C74849FAE1\n000000067F00008000000A400C0000020000-000000067F00008000000A400C0000024000__000000C896B8DFD8\n000000067F00008000000A400C0000024000-000000067F00008000000A400C0000028000__000000C896B8DFD8\n000000067F00008000000A400C0000025D38-000000067F00008000000A400C000002F49E__000000C6C87B6329-000000C74849FAE1\n000000067F00008000000A400C0000028000-000000067F00008000000A400C000002C000__000000C896B8DFD8\n000000067F00008000000A400C000002C000-000000067F00008000000A400C0000030000__000000C896B8DFD8\n000000067F00008000000A400C000002F49E-000000067F00008000000A400C0000038BB1__000000C6C87B6329-000000C74849FAE1\n000000067F00008000000A400C0000030000-000000067F00008000000A400C0000034000__000000C896B8DFD8\n000000067F00008000000A400C0000034000-000000067F00008000000A400C0000038000__000000C896B8DFD8\n000000067F00008000000A400C0000038000-000000067F00008000000A400C000003C000__000000C896B8DFD8\n000000067F00008000000A400C0000038BB1-000000067F00008000000A400C0000042317__000000C6C87B6329-000000C74849FAE1\n000000067F00008000000A400C000003C000-000000067F00008000000A400C0000040000__000000C896B8DFD8\n000000067F00008000000A400C0000040000-000000067F00008000000A400C0000044000__000000C896B8DFD8\n000000067F00008000000A400C0000042317-000000067F00008000000A400C000004BA7D__000000C6C87B6329-000000C74849FAE1\n000000067F00008000000A400C0000044000-000000067F00008000000A400C0000048000__000000C896B8DFD8\n000000067F00008000000A400C0000048000-000000067F00008000000A400C000004C000__000000C896B8DFD8\n000000067F00008000000A400C000004BA7D-030000000000000000000000000000000002__000000C6C87B6329-000000C74849FAE1\n000000067F00008000000A400C000004C000-000000067F00008000000A400C0000050000__000000C896B8DFD8\n000000067F00008000000A400C0000050000-000000067F00008000000A400C0000054000__000000C896B8DFD8\n000000067F00008000000A400C0000054000-000000067F00008000000A400C0000058000__000000C896B8DFD8\n000000067F00008000000A400C00000551FC-000000067F00008000000A400C000005E90B__000000C74849FAE1-000000C80801E859\n000000067F00008000000A400C0000058000-000000067F00008000000A400C000005C000__000000C896B8DFD8\n000000067F00008000000A400C000005C000-000000067F00008000000A400C0000060000__000000C896B8DFD8\n000000067F00008000000A400C000005E90B-000000067F00008000000A400C000006802B__000000C74849FAE1-000000C80801E859\n000000067F00008000000A400C0000060000-000000067F00008000000A400C0000064000__000000C896B8DFD8\n000000067F00008000000A400C0000064000-000000067F00008000000A400C0000068000__000000C896B8DFD8\n000000067F00008000000A400C0000068000-000000067F00008000000A400C000006C000__000000C896B8DFD8\n000000067F00008000000A400C000006802B-000000067F00008000000A400C0000071782__000000C74849FAE1-000000C80801E859\n000000067F00008000000A400C000006C000-000000067F00008000000A400C0000070000__000000C896B8DFD8\n000000067F00008000000A400C0000070000-000000067F00008000000A400C0000074000__000000C896B8DFD8\n000000067F00008000000A400C0000071782-000000067F00008000000A400C000007AEE8__000000C74849FAE1-000000C80801E859\n000000067F00008000000A400C0000074000-000000067F00008000000A400C0000078000__000000C896B8DFD8\n000000067F00008000000A400C0000078000-000000067F00008000000A400C000007C000__000000C896B8DFD8\n000000067F00008000000A400C000007AEE8-000000067F00008000000A400C000008460B__000000C74849FAE1-000000C80801E859\n000000067F00008000000A400C000007C000-000000067F00008000000A400C0000080000__000000C896B8DFD8\n000000067F00008000000A400C0000080000-000000067F00008000000A400C0000084000__000000C896B8DFD8\n000000067F00008000000A400C0000084000-000000067F00008000000A400C0000088000__000000C896B8DFD8\n000000067F00008000000A400C000008460B-000000067F00008000000A400C000008DD71__000000C74849FAE1-000000C80801E859\n000000067F00008000000A400C0000088000-000000067F00008000000A400C000008C000__000000C896B8DFD8\n000000067F00008000000A400C000008C000-000000067F00008000000A400C0000090000__000000C896B8DFD8\n000000067F00008000000A400C000008DD71-000000067F00008000000A400C00000974D7__000000C74849FAE1-000000C80801E859\n000000067F00008000000A400C0000090000-000000067F00008000000A400C0000094000__000000C896B8DFD8\n000000067F00008000000A400C0000094000-000000067F00008000000A400C0000098000__000000C896B8DFD8\n000000067F00008000000A400C00000974D7-000000067F00008000000A400C00000A0C0B__000000C74849FAE1-000000C80801E859\n000000067F00008000000A400C0000098000-000000067F00008000000A400C000009C000__000000C896B8DFD8\n000000067F00008000000A400C000009C000-000000067F00008000000A400C00000A0000__000000C896B8DFD8\n000000067F00008000000A400C00000A0000-000000067F00008000000A400C00000A4000__000000C896B8DFD8\n000000067F00008000000A400C00000A0C0B-000000067F00008000000A400C00000AA371__000000C74849FAE1-000000C80801E859\n000000067F00008000000A400C00000A4000-000000067F00008000000A400C00000A8000__000000C896B8DFD8\n000000067F00008000000A400C00000A8000-000000067F00008000000A400C00000AC000__000000C896B8DFD8\n000000067F00008000000A400C00000AA371-000000067F00008000000A400C00000B3AD7__000000C74849FAE1-000000C80801E859\n000000067F00008000000A400C00000AC000-000000067F00008000000A400C00000B0000__000000C896B8DFD8\n000000067F00008000000A400C00000B0000-000000067F00008000000A400C00000B4000__000000C896B8DFD8\n000000067F00008000000A400C00000B3AD7-000000067F00008000000A400C00000BD20B__000000C74849FAE1-000000C80801E859\n000000067F00008000000A400C00000B4000-000000067F00008000000A400C00000B8000__000000C896B8DFD8\n000000067F00008000000A400C00000B8000-000000067F00008000000A400C00000BC000__000000C896B8DFD8\n000000067F00008000000A400C00000BC000-000000067F00008000000A400C00000C0000__000000C896B8DFD8\n000000067F00008000000A400C00000BD20B-000000067F00008000000A400C0100000000__000000C74849FAE1-000000C80801E859\n000000067F00008000000A400C00000C0000-000000067F00008000000A400C00000C4000__000000C896B8DFD8\n000000067F00008000000A400C00000C4000-000000067F00008000000A400C00000C8000__000000C896B8DFD8\n000000067F00008000000A400C00000C4AE6-000000067F00008000000A400C00000CE20C__000000C80801E859-000000C8993EBFF9\n000000067F00008000000A400C00000C8000-000000067F00008000000A400C00000CC000__000000C896B8DFD8\n000000067F00008000000A400C00000CC000-000000067F00008000000A400C00000D0000__000000C896B8DFD8\n000000067F00008000000A400C00000CE20C-000000067F00008000000A400C00000D7929__000000C80801E859-000000C8993EBFF9\n000000067F00008000000A400C00000D0000-000000067F00008000000A400C00000D4000__000000C896B8DFD8\n000000067F00008000000A400C00000D4000-000000067F00008000000A400C00000D8000__000000C896B8DFD8\n000000067F00008000000A400C00000D7929-000000067F00008000000A400C00000E108F__000000C80801E859-000000C8993EBFF9\n000000067F00008000000A400C00000D8000-000000067F00008000000A400C00000DC000__000000C896B8DFD8\n000000067F00008000000A400C00000DC000-000000067F00008000000A400C00000E0000__000000C896B8DFD8\n000000067F00008000000A400C00000E0000-000000067F00008000000A400C00000E4000__000000C896B8DFD8\n000000067F00008000000A400C00000E108F-000000067F00008000000A400C00000EA7F5__000000C80801E859-000000C8993EBFF9\n000000067F00008000000A400C00000E4000-000000067F00008000000A400C00000E8000__000000C896B8DFD8\n000000067F00008000000A400C00000E8000-000000067F00008000000A400C00000EC000__000000C896B8DFD8\n000000067F00008000000A400C00000EA7F5-000000067F00008000000A400C00000F3F0B__000000C80801E859-000000C8993EBFF9\n000000067F00008000000A400C00000EC000-000000067F00008000000A400C00000F0000__000000C896B8DFD8\n000000067F00008000000A400C00000F0000-000000067F00008000000A400C00000F4000__000000C896B8DFD8\n000000067F00008000000A400C00000F3F0B-000000067F00008000000A400C00000FD671__000000C80801E859-000000C8993EBFF9\n000000067F00008000000A400C00000F4000-000000067F00008000000A400C00000F8000__000000C896B8DFD8\n000000067F00008000000A400C00000F8000-000000067F00008000000A400C00000FC000__000000C896B8DFD8\n000000067F00008000000A400C00000FC000-000000067F00008000000A400C0000100000__000000C896B8DFD8\n000000067F00008000000A400C00000FD671-000000067F00008000000A400C0000106D95__000000C80801E859-000000C8993EBFF9\n000000067F00008000000A400C0000100000-000000067F00008000000A400C0000104000__000000C896B8DFD8\n000000067F00008000000A400C0000104000-000000067F00008000000A400C0000108000__000000C896B8DFD8\n000000067F00008000000A400C0000106D95-000000067F00008000000A400C00001104FB__000000C80801E859-000000C8993EBFF9\n000000067F00008000000A400C0000107F8F-000000067F00008000000A40140000005626__000000C8993EBFF9-000000C90726D0D9\n000000067F00008000000A400C0000108000-000000067F00008000000A400C000010C000__000000C896B8DFD8\n000000067F00008000000A400C000010C000-000000067F00008000000A400C0000110000__000000C896B8DFD8\n000000067F00008000000A400C0000110000-030000000000000000000000000000000002__000000C896B8DFD8\n000000067F00008000000A400C00001104FB-01000000000000000100000005000000000D__000000C80801E859-000000C8993EBFF9\n000000067F00008000000A40140000005626-000000067F00008000000A4014000000C7F9__000000C8993EBFF9-000000C90726D0D9\n000000067F00008000000A4014000000C7F9-000000067F00008000000A401400000139F8__000000C8993EBFF9-000000C90726D0D9\n000000067F00008000000A401400000139F8-000000067F00008000000A4014000001ABE9__000000C8993EBFF9-000000C90726D0D9\n000000067F00008000000A4014000001ABE9-000000067F00008000000A40140000021DF4__000000C8993EBFF9-000000C90726D0D9\n000000067F00008000000A40140000021DF4-000000067F00008000000A40140000028FA9__000000C8993EBFF9-000000C90726D0D9\n000000067F00008000000A40140000028FA9-030000000000000000000000000000000002__000000C8993EBFF9-000000C90726D0D9\n000000067F00008000000A600C0000000000-000000067F00008000000A600C0000004000__000000CA2C877DC8\n000000067F00008000000A600C0000000000-000000067F00008000000A600C0000004000__000000CB82C2FF68\n000000067F00008000000A600C0000004000-000000067F00008000000A600C0000008000__000000CA2C877DC8\n000000067F00008000000A600C0000004000-000000067F00008000000A600C0000008000__000000CB82C2FF68\n000000067F00008000000A600C0000008000-000000067F00008000000A600C000000C000__000000CA2C877DC8\n000000067F00008000000A600C0000008000-000000067F00008000000A600C000000C000__000000CB82C2FF68\n000000067F00008000000A600C0000009746-000000067F00008000000A600C0000012EAC__000000C90726D0D9-000000C986F5F0D9\n000000067F00008000000A600C000000C000-000000067F00008000000A600C0000010000__000000CA2C877DC8\n000000067F00008000000A600C000000C000-000000067F00008000000A600C0000010000__000000CB82C2FF68\n000000067F00008000000A600C0000010000-000000067F00008000000A600C0000014000__000000CA2C877DC8\n000000067F00008000000A600C0000010000-000000067F00008000000A600C0000014000__000000CB82C2FF68\n000000067F00008000000A600C0000012EAC-000000067F00008000000A600C000001C60A__000000C90726D0D9-000000C986F5F0D9\n000000067F00008000000A600C0000014000-000000067F00008000000A600C0000018000__000000CA2C877DC8\n000000067F00008000000A600C0000014000-000000067F00008000000A600C0000018000__000000CB82C2FF68\n000000067F00008000000A600C0000018000-000000067F00008000000A600C000001C000__000000CA2C877DC8\n000000067F00008000000A600C0000018000-000000067F00008000000A600C000001C000__000000CB82C2FF68\n000000067F00008000000A600C000001C000-000000067F00008000000A600C0000020000__000000CA2C877DC8\n000000067F00008000000A600C000001C000-000000067F00008000000A600C0000020000__000000CB82C2FF68\n000000067F00008000000A600C000001C60A-000000067F00008000000A600C0000025D38__000000C90726D0D9-000000C986F5F0D9\n000000067F00008000000A600C0000020000-000000067F00008000000A600C0000024000__000000CA2C877DC8\n000000067F00008000000A600C0000020000-000000067F00008000000A600C0000024000__000000CB82C2FF68\n000000067F00008000000A600C0000024000-000000067F00008000000A600C0000028000__000000CA2C877DC8\n000000067F00008000000A600C0000024000-000000067F00008000000A600C0000028000__000000CB82C2FF68\n000000067F00008000000A600C0000025D38-000000067F00008000000A600C000002F49E__000000C90726D0D9-000000C986F5F0D9\n000000067F00008000000A600C0000028000-000000067F00008000000A600C000002C000__000000CA2C877DC8\n000000067F00008000000A600C0000028000-000000067F00008000000A600C000002C000__000000CB82C2FF68\n000000067F00008000000A600C000002C000-000000067F00008000000A600C0000030000__000000CA2C877DC8\n000000067F00008000000A600C000002C000-000000067F00008000000A600C0000030000__000000CB82C2FF68\n000000067F00008000000A600C000002F49E-000000067F00008000000A600C0000038BB1__000000C90726D0D9-000000C986F5F0D9\n000000067F00008000000A600C0000030000-000000067F00008000000A600C0000034000__000000CA2C877DC8\n000000067F00008000000A600C0000030000-000000067F00008000000A600C0000034000__000000CB82C2FF68\n000000067F00008000000A600C0000034000-000000067F00008000000A600C0000038000__000000CA2C877DC8\n000000067F00008000000A600C0000034000-000000067F00008000000A600C0000038000__000000CB82C2FF68\n000000067F00008000000A600C0000038000-000000067F00008000000A600C000003C000__000000CA2C877DC8\n000000067F00008000000A600C0000038000-000000067F00008000000A600C000003C000__000000CB82C2FF68\n000000067F00008000000A600C0000038BB1-000000067F00008000000A600C0000042317__000000C90726D0D9-000000C986F5F0D9\n000000067F00008000000A600C000003C000-000000067F00008000000A600C0000040000__000000CA2C877DC8\n000000067F00008000000A600C000003C000-000000067F00008000000A600C0000040000__000000CB82C2FF68\n000000067F00008000000A600C0000040000-000000067F00008000000A600C0000044000__000000CA2C877DC8\n000000067F00008000000A600C0000040000-000000067F00008000000A600C0000044000__000000CB82C2FF68\n000000067F00008000000A600C0000042317-000000067F00008000000A600C000004BA7D__000000C90726D0D9-000000C986F5F0D9\n000000067F00008000000A600C0000044000-000000067F00008000000A600C0000048000__000000CA2C877DC8\n000000067F00008000000A600C0000044000-000000067F00008000000A600C0000048000__000000CB82C2FF68\n000000067F00008000000A600C0000048000-000000067F00008000000A600C000004C000__000000CA2C877DC8\n000000067F00008000000A600C0000048000-000000067F00008000000A600C000004C000__000000CB82C2FF68\n000000067F00008000000A600C000004BA7D-030000000000000000000000000000000002__000000C90726D0D9-000000C986F5F0D9\n000000067F00008000000A600C000004C000-000000067F00008000000A600C0000050000__000000CA2C877DC8\n000000067F00008000000A600C000004C000-000000067F00008000000A600C0000050000__000000CB82C2FF68\n000000067F00008000000A600C0000050000-000000067F00008000000A600C0000054000__000000CA2C877DC8\n000000067F00008000000A600C0000050000-000000067F00008000000A600C0000054000__000000CB82C2FF68\n000000067F00008000000A600C0000054000-000000067F00008000000A600C0000058000__000000CA2C877DC8\n000000067F00008000000A600C0000054000-000000067F00008000000A600C0000058000__000000CB82C2FF68\n000000067F00008000000A600C0000054BFB-000000067F00008000000A600C000005E30C__000000C986F5F0D9-000000CAD5D7FFF1\n000000067F00008000000A600C0000058000-000000067F00008000000A600C000005C000__000000CA2C877DC8\n000000067F00008000000A600C0000058000-000000067F00008000000A600C000005C000__000000CB82C2FF68\n000000067F00008000000A600C000005C000-000000067F00008000000A600C0000060000__000000CA2C877DC8\n000000067F00008000000A600C000005C000-000000067F00008000000A600C0000060000__000000CB82C2FF68\n000000067F00008000000A600C000005E30C-000000067F00008000000A600C0000067A2B__000000C986F5F0D9-000000CAD5D7FFF1\n000000067F00008000000A600C0000060000-000000067F00008000000A600C0000064000__000000CA2C877DC8\n000000067F00008000000A600C0000060000-000000067F00008000000A600C0000064000__000000CB82C2FF68\n000000067F00008000000A600C0000064000-000000067F00008000000A600C0000068000__000000CA2C877DC8\n000000067F00008000000A600C0000064000-000000067F00008000000A600C0000068000__000000CB82C2FF68\n000000067F00008000000A600C0000067A2B-000000067F00008000000A600C0000071186__000000C986F5F0D9-000000CAD5D7FFF1\n000000067F00008000000A600C0000068000-000000067F00008000000A600C000006C000__000000CA2C877DC8\n000000067F00008000000A600C0000068000-000000067F00008000000A600C000006C000__000000CB82C2FF68\n000000067F00008000000A600C000006C000-000000067F00008000000A600C0000070000__000000CA2C877DC8\n000000067F00008000000A600C000006C000-000000067F00008000000A600C0000070000__000000CB82C2FF68\n000000067F00008000000A600C0000070000-000000067F00008000000A600C0000074000__000000CA2C877DC8\n000000067F00008000000A600C0000070000-000000067F00008000000A600C0000074000__000000CB82C2FF68\n000000067F00008000000A600C0000071186-000000067F00008000000A600C000007A8EC__000000C986F5F0D9-000000CAD5D7FFF1\n000000067F00008000000A600C0000074000-000000067F00008000000A600C0000078000__000000CA2C877DC8\n000000067F00008000000A600C0000074000-000000067F00008000000A600C0000078000__000000CB82C2FF68\n000000067F00008000000A600C0000078000-000000067F00008000000A600C000007C000__000000CA2C877DC8\n000000067F00008000000A600C0000078000-000000067F00008000000A600C000007C000__000000CB82C2FF68\n000000067F00008000000A600C000007A149-000000067F00008000000A600C00000F5F42__000000CB40C16489-000000CB82C37859\n000000067F00008000000A600C000007A8EC-000000067F00008000000A600C000008400A__000000C986F5F0D9-000000CAD5D7FFF1\n000000067F00008000000A600C000007C000-000000067F00008000000A600C0000080000__000000CA2C877DC8\n000000067F00008000000A600C000007C000-000000067F00008000000A600C0000080000__000000CB82C2FF68\n000000067F00008000000A600C0000080000-000000067F00008000000A600C0000084000__000000CA2C877DC8\n000000067F00008000000A600C0000080000-000000067F00008000000A600C0000084000__000000CB82C2FF68\n000000067F00008000000A600C0000084000-000000067F00008000000A600C0000088000__000000CA2C877DC8\n000000067F00008000000A600C0000084000-000000067F00008000000A600C0000088000__000000CB82C2FF68\n000000067F00008000000A600C000008400A-000000067F00008000000A600C000008D770__000000C986F5F0D9-000000CAD5D7FFF1\n000000067F00008000000A600C0000088000-000000067F00008000000A600C000008C000__000000CA2C877DC8\n000000067F00008000000A600C0000088000-000000067F00008000000A600C000008C000__000000CB82C2FF68\n000000067F00008000000A600C000008C000-000000067F00008000000A600C0000090000__000000CA2C877DC8\n000000067F00008000000A600C000008C000-000000067F00008000000A600C0000090000__000000CB82C2FF68\n000000067F00008000000A600C000008D770-000000067F00008000000A600C0000096ED6__000000C986F5F0D9-000000CAD5D7FFF1\n000000067F00008000000A600C0000090000-000000067F00008000000A600C0000094000__000000CA2C877DC8\n000000067F00008000000A600C0000090000-000000067F00008000000A600C0000094000__000000CB82C2FF68\n000000067F00008000000A600C0000094000-000000067F00008000000A600C0000098000__000000CA2C877DC8\n000000067F00008000000A600C0000094000-000000067F00008000000A600C0000098000__000000CB82C2FF68\n000000067F00008000000A600C0000096ED6-000000067F00008000000A600C00000A060B__000000C986F5F0D9-000000CAD5D7FFF1\n000000067F00008000000A600C0000098000-000000067F00008000000A600C000009C000__000000CA2C877DC8\n000000067F00008000000A600C0000098000-000000067F00008000000A600C000009C000__000000CB82C2FF68\n000000067F00008000000A600C000009C000-000000067F00008000000A600C00000A0000__000000CA2C877DC8\n000000067F00008000000A600C000009C000-000000067F00008000000A600C00000A0000__000000CB82C2FF68\n000000067F00008000000A600C00000A0000-000000067F00008000000A600C00000A4000__000000CA2C877DC8\n000000067F00008000000A600C00000A0000-000000067F00008000000A600C00000A4000__000000CB82C2FF68\n000000067F00008000000A600C00000A060B-000000067F00008000000A600C00000A9D71__000000C986F5F0D9-000000CAD5D7FFF1\n000000067F00008000000A600C00000A4000-000000067F00008000000A600C00000A8000__000000CA2C877DC8\n000000067F00008000000A600C00000A4000-000000067F00008000000A600C00000A8000__000000CB82C2FF68\n000000067F00008000000A600C00000A8000-000000067F00008000000A600C00000AC000__000000CA2C877DC8\n000000067F00008000000A600C00000A8000-000000067F00008000000A600C00000AC000__000000CB82C2FF68\n000000067F00008000000A600C00000A9D71-000000067F00008000000A600C00000B34D7__000000C986F5F0D9-000000CAD5D7FFF1\n000000067F00008000000A600C00000AC000-000000067F00008000000A600C00000B0000__000000CB82C2FF68\n000000067F00008000000A600C00000AC000-030000000000000000000000000000000002__000000CA2C877DC8\n000000067F00008000000A600C00000B0000-000000067F00008000000A600C00000B4000__000000CB82C2FF68\n000000067F00008000000A600C00000B34D7-000000067F00008000000A600C00000BCC0C__000000C986F5F0D9-000000CAD5D7FFF1\n000000067F00008000000A600C00000B4000-000000067F00008000000A600C00000B8000__000000CB82C2FF68\n000000067F00008000000A600C00000B8000-000000067F00008000000A600C00000BC000__000000CB82C2FF68\n000000067F00008000000A600C00000BC000-000000067F00008000000A600C00000C0000__000000CB82C2FF68\n000000067F00008000000A600C00000BCC0C-000000067F00008000000A600C00000C6336__000000C986F5F0D9-000000CAD5D7FFF1\n000000067F00008000000A600C00000C0000-000000067F00008000000A600C00000C4000__000000CB82C2FF68\n000000067F00008000000A600C00000C4000-000000067F00008000000A600C00000C8000__000000CB82C2FF68\n000000067F00008000000A600C00000C6336-000000067F00008000000A600C00000CFA9C__000000C986F5F0D9-000000CAD5D7FFF1\n000000067F00008000000A600C00000C8000-000000067F00008000000A600C00000CC000__000000CB82C2FF68\n000000067F00008000000A600C00000CC000-000000067F00008000000A600C00000D0000__000000CB82C2FF68\n000000067F00008000000A600C00000CFA9C-000000067F00008000000A600C00000D91AB__000000C986F5F0D9-000000CAD5D7FFF1\n000000067F00008000000A600C00000D0000-000000067F00008000000A600C00000D4000__000000CB82C2FF68\n000000067F00008000000A600C00000D4000-000000067F00008000000A600C00000D8000__000000CB82C2FF68\n000000067F00008000000A600C00000D8000-000000067F00008000000A600C00000DC000__000000CB82C2FF68\n000000067F00008000000A600C00000D91AB-000000067F00008000000A600C00000E2911__000000C986F5F0D9-000000CAD5D7FFF1\n000000067F00008000000A600C00000DC000-000000067F00008000000A600C00000E0000__000000CB82C2FF68\n000000067F00008000000A600C00000E0000-000000067F00008000000A600C00000E4000__000000CB82C2FF68\n000000067F00008000000A600C00000E2911-000000067F00008000000A600C00000EC077__000000C986F5F0D9-000000CAD5D7FFF1\n000000067F00008000000A600C00000E4000-000000067F00008000000A600C00000E8000__000000CB82C2FF68\n000000067F00008000000A600C00000E8000-000000067F00008000000A600C00000EC000__000000CB82C2FF68\n000000067F00008000000A600C00000EC000-000000067F00008000000A600C00000F0000__000000CB82C2FF68\n000000067F00008000000A600C00000EC077-000000067F00008000000A600C00000F57A8__000000C986F5F0D9-000000CAD5D7FFF1\n000000067F00008000000A600C00000F0000-000000067F00008000000A600C00000F4000__000000CB82C2FF68\n000000067F00008000000A600C00000F4000-000000067F00008000000A600C00000F8000__000000CB82C2FF68\n000000067F00008000000A600C00000F57A8-000000067F00008000000A600C00000FEF0A__000000C986F5F0D9-000000CAD5D7FFF1\n000000067F00008000000A600C00000F5F4F-000000067F00008000000A60140000011158__000000CB40C16489-000000CB82C37859\n000000067F00008000000A600C00000F8000-000000067F00008000000A600C00000FC000__000000CB82C2FF68\n000000067F00008000000A600C00000FC000-000000067F00008000000A600C0000100000__000000CB82C2FF68\n000000067F00008000000A600C00000FEF0A-000000067F00008000000A600C000010862B__000000C986F5F0D9-000000CAD5D7FFF1\n000000067F00008000000A600C0000100000-000000067F00008000000A600C0000104000__000000CB82C2FF68\n000000067F00008000000A600C0000104000-000000067F00008000000A600C0000108000__000000CB82C2FF68\n000000067F00008000000A600C0000108000-000000067F00008000000A600C000010C000__000000CB82C2FF68\n000000067F00008000000A600C000010862B-000000067F00008000000A600C0000111C20__000000C986F5F0D9-000000CAD5D7FFF1\n000000067F00008000000A600C000010C000-000000067F00008000000A600C0000110000__000000CB82C2FF68\n000000067F00008000000A600C0000110000-000000067F00008000000A60120100000000__000000CB82C2FF68\n000000067F00008000000A600C00001117CB-000000067F00008000000A6014000000499B__000000CAD5D7FFF1-000000CB40C16489\n000000067F00008000000A600C00FFFFFFFF-01000000000000000100000005000000000E__000000C986F5F0D9-000000CAD5D7FFF1\n000000067F00008000000A60140000000000-000000067F00008000000A60140000004000__000000CB82C2FF68\n000000067F00008000000A60140000004000-000000067F00008000000A60140000008000__000000CB82C2FF68\n000000067F00008000000A6014000000499B-000000067F00008000000A6014000000BD4E__000000CAD5D7FFF1-000000CB40C16489\n000000067F00008000000A60140000008000-000000067F00008000000A6014000000C000__000000CB82C2FF68\n000000067F00008000000A6014000000BD4E-000000067F00008000000A601400000130ED__000000CAD5D7FFF1-000000CB40C16489\n000000067F00008000000A6014000000C000-000000067F00008000000A60140000010000__000000CB82C2FF68\n000000067F00008000000A60140000010000-000000067F00008000000A60140000014000__000000CB82C2FF68\n000000067F00008000000A60140000011159-000000067F00008000000A60140000029BB2__000000CB40C16489-000000CB82C37859\n000000067F00008000000A601400000130ED-000000067F00008000000A6014000001A4BD__000000CAD5D7FFF1-000000CB40C16489\n000000067F00008000000A60140000014000-000000067F00008000000A60140000018000__000000CB82C2FF68\n000000067F00008000000A60140000018000-000000067F00008000000A6014000001C000__000000CB82C2FF68\n000000067F00008000000A6014000001A4BD-000000067F00008000000A60140000021886__000000CAD5D7FFF1-000000CB40C16489\n000000067F00008000000A6014000001C000-000000067F00008000000A60140000020000__000000CB82C2FF68\n000000067F00008000000A60140000020000-000000067F00008000000A60140000024000__000000CB82C2FF68\n000000067F00008000000A60140000021886-000000067F00008000000A60140000028C0A__000000CAD5D7FFF1-000000CB40C16489\n000000067F00008000000A60140000024000-000000067F00008000000A60140000028000__000000CB82C2FF68\n000000067F00008000000A60140000028000-000000067F00008000000A6014000002C000__000000CB82C2FF68\n000000067F00008000000A60140000028C0A-030000000000000000000000000000000002__000000CAD5D7FFF1-000000CB40C16489\n000000067F00008000000A60140000029BB2-030000000000000000000000000000000002__000000CB40C16489-000000CB82C37859\n000000067F00008000000A6014000002C000-030000000000000000000000000000000002__000000CB82C2FF68\n000000067F00008000000A800C0000000000-000000067F00008000000A800C0000004000__000000CD51009FE8\n000000067F00008000000A800C0000004000-000000067F00008000000A800C0000008000__000000CD51009FE8\n000000067F00008000000A800C0000008000-000000067F00008000000A800C000000C000__000000CD51009FE8\n000000067F00008000000A800C0000009748-000000067F00008000000A800C0000012EAE__000000CB82C37859-000000CC11F5EDC9\n000000067F00008000000A800C000000C000-000000067F00008000000A800C0000010000__000000CD51009FE8\n000000067F00008000000A800C0000010000-000000067F00008000000A800C0000014000__000000CD51009FE8\n000000067F00008000000A800C0000012EAE-000000067F00008000000A800C000001C60A__000000CB82C37859-000000CC11F5EDC9\n000000067F00008000000A800C0000014000-000000067F00008000000A800C0000018000__000000CD51009FE8\n000000067F00008000000A800C0000018000-000000067F00008000000A800C000001C000__000000CD51009FE8\n000000067F00008000000A800C000001C000-000000067F00008000000A800C0000020000__000000CD51009FE8\n000000067F00008000000A800C000001C60A-000000067F00008000000A800C0000025D38__000000CB82C37859-000000CC11F5EDC9\n000000067F00008000000A800C0000020000-000000067F00008000000A800C0000024000__000000CD51009FE8\n000000067F00008000000A800C0000024000-000000067F00008000000A800C0000028000__000000CD51009FE8\n000000067F00008000000A800C0000025D38-000000067F00008000000A800C000002F49E__000000CB82C37859-000000CC11F5EDC9\n000000067F00008000000A800C0000028000-000000067F00008000000A800C000002C000__000000CD51009FE8\n000000067F00008000000A800C000002C000-000000067F00008000000A800C0000030000__000000CD51009FE8\n000000067F00008000000A800C000002F49E-000000067F00008000000A800C0000038BB1__000000CB82C37859-000000CC11F5EDC9\n000000067F00008000000A800C0000030000-000000067F00008000000A800C0000034000__000000CD51009FE8\n000000067F00008000000A800C0000034000-000000067F00008000000A800C0000038000__000000CD51009FE8\n000000067F00008000000A800C0000038000-000000067F00008000000A800C000003C000__000000CD51009FE8\n000000067F00008000000A800C0000038BB1-000000067F00008000000A800C0000042317__000000CB82C37859-000000CC11F5EDC9\n000000067F00008000000A800C000003C000-000000067F00008000000A800C0000040000__000000CD51009FE8\n000000067F00008000000A800C0000040000-000000067F00008000000A800C0000044000__000000CD51009FE8\n000000067F00008000000A800C0000042317-000000067F00008000000A800C000004BA7D__000000CB82C37859-000000CC11F5EDC9\n000000067F00008000000A800C0000044000-000000067F00008000000A800C0000048000__000000CD51009FE8\n000000067F00008000000A800C0000048000-000000067F00008000000A800C000004C000__000000CD51009FE8\n000000067F00008000000A800C000004BA7D-000000067F00008000000A800C0000054CA0__000000CB82C37859-000000CC11F5EDC9\n000000067F00008000000A800C000004C000-000000067F00008000000A800C0000050000__000000CD51009FE8\n000000067F00008000000A800C0000050000-000000067F00008000000A800C0000054000__000000CD51009FE8\n000000067F00008000000A800C0000054000-000000067F00008000000A800C0000058000__000000CD51009FE8\n000000067F00008000000A800C0000054C9F-000000067F00008000000A800C000005E405__000000CC11F5EDC9-000000CCB1B9E181\n000000067F00008000000A800C0000058000-000000067F00008000000A800C000005C000__000000CD51009FE8\n000000067F00008000000A800C000005C000-000000067F00008000000A800C0000060000__000000CD51009FE8\n000000067F00008000000A800C000005E405-000000067F00008000000A800C0000067B10__000000CC11F5EDC9-000000CCB1B9E181\n000000067F00008000000A800C0000060000-000000067F00008000000A800C0000064000__000000CD51009FE8\n000000067F00008000000A800C0000064000-000000067F00008000000A800C0000068000__000000CD51009FE8\n000000067F00008000000A800C0000067B10-000000067F00008000000A800C0000071276__000000CC11F5EDC9-000000CCB1B9E181\n000000067F00008000000A800C0000068000-000000067F00008000000A800C000006C000__000000CD51009FE8\n000000067F00008000000A800C000006C000-000000067F00008000000A800C0000070000__000000CD51009FE8\n000000067F00008000000A800C0000070000-000000067F00008000000A800C0000074000__000000CD51009FE8\n000000067F00008000000A800C0000071276-000000067F00008000000A800C000007A9DC__000000CC11F5EDC9-000000CCB1B9E181\n000000067F00008000000A800C0000074000-000000067F00008000000A800C0000078000__000000CD51009FE8\n000000067F00008000000A800C0000078000-000000067F00008000000A800C000007C000__000000CD51009FE8\n000000067F00008000000A800C000007A9DC-000000067F00008000000A800C000008410B__000000CC11F5EDC9-000000CCB1B9E181\n000000067F00008000000A800C000007C000-000000067F00008000000A800C0000080000__000000CD51009FE8\n000000067F00008000000A800C0000080000-000000067F00008000000A800C0000084000__000000CD51009FE8\n000000067F00008000000A800C0000084000-000000067F00008000000A800C0000088000__000000CD51009FE8\n000000067F00008000000A800C000008410B-000000067F00008000000A800C000008D871__000000CC11F5EDC9-000000CCB1B9E181\n000000067F00008000000A800C0000088000-000000067F00008000000A800C000008C000__000000CD51009FE8\n000000067F00008000000A800C000008C000-000000067F00008000000A800C0000090000__000000CD51009FE8\n000000067F00008000000A800C000008D871-000000067F00008000000A800C0000096F94__000000CC11F5EDC9-000000CCB1B9E181\n000000067F00008000000A800C0000090000-000000067F00008000000A800C0000094000__000000CD51009FE8\n000000067F00008000000A800C0000094000-000000067F00008000000A800C0000098000__000000CD51009FE8\n000000067F00008000000A800C0000096F94-000000067F00008000000A800C00000A06FA__000000CC11F5EDC9-000000CCB1B9E181\n000000067F00008000000A800C0000098000-000000067F00008000000A800C000009C000__000000CD51009FE8\n000000067F00008000000A800C000009C000-000000067F00008000000A800C00000A0000__000000CD51009FE8\n000000067F00008000000A800C00000A0000-000000067F00008000000A800C00000A4000__000000CD51009FE8\n000000067F00008000000A800C00000A06FA-000000067F00008000000A800C00000A9E0D__000000CC11F5EDC9-000000CCB1B9E181\n000000067F00008000000A800C00000A4000-000000067F00008000000A800C00000A8000__000000CD51009FE8\n000000067F00008000000A800C00000A8000-000000067F00008000000A800C00000AC000__000000CD51009FE8\n000000067F00008000000A800C00000A9E0D-000000067F00008000000A800C00000B3553__000000CC11F5EDC9-000000CCB1B9E181\n000000067F00008000000A800C00000AC000-000000067F00008000000A800C00000B0000__000000CD51009FE8\n000000067F00008000000A800C00000B0000-000000067F00008000000A800C00000B4000__000000CD51009FE8\n000000067F00008000000A800C00000B3553-000000067F00008000000A800C0100000000__000000CC11F5EDC9-000000CCB1B9E181\n000000067F00008000000A800C00000B4000-000000067F00008000000A800C00000B8000__000000CD51009FE8\n000000067F00008000000A800C00000B8000-000000067F00008000000A800C00000BC000__000000CD51009FE8\n000000067F00008000000A800C00000BC000-000000067F00008000000A800C00000C0000__000000CD51009FE8\n000000067F00008000000A800C00000BCB46-000000067F00008000000A800C00000C62AC__000000CCB1B9E181-000000CD51344F89\n000000067F00008000000A800C00000C0000-000000067F00008000000A800C00000C4000__000000CD51009FE8\n000000067F00008000000A800C00000C4000-000000067F00008000000A800C00000C8000__000000CD51009FE8\n000000067F00008000000A800C00000C62AC-000000067F00008000000A800C00000CFA09__000000CCB1B9E181-000000CD51344F89\n000000067F00008000000A800C00000C8000-000000067F00008000000A800C00000CC000__000000CD51009FE8\n000000067F00008000000A800C00000CC000-000000067F00008000000A800C00000D0000__000000CD51009FE8\n000000067F00008000000A800C00000CFA09-000000067F00008000000A800C00000D9118__000000CCB1B9E181-000000CD51344F89\n000000067F00008000000A800C00000D0000-000000067F00008000000A800C00000D4000__000000CD51009FE8\n000000067F00008000000A800C00000D4000-000000067F00008000000A800C00000D8000__000000CD51009FE8\n000000067F00008000000A800C00000D8000-000000067F00008000000A800C00000DC000__000000CD51009FE8\n000000067F00008000000A800C00000D9118-000000067F00008000000A800C00000E287E__000000CCB1B9E181-000000CD51344F89\n000000067F00008000000A800C00000DC000-000000067F00008000000A800C00000E0000__000000CD51009FE8\n000000067F00008000000A800C00000E0000-000000067F00008000000A800C00000E4000__000000CD51009FE8\n000000067F00008000000A800C00000E287E-000000067F00008000000A800C00000EBFE4__000000CCB1B9E181-000000CD51344F89\n000000067F00008000000A800C00000E4000-000000067F00008000000A800C00000E8000__000000CD51009FE8\n000000067F00008000000A800C00000E8000-000000067F00008000000A800C00000EC000__000000CD51009FE8\n000000067F00008000000A800C00000EBFE4-000000067F00008000000A800C00000F570B__000000CCB1B9E181-000000CD51344F89\n000000067F00008000000A800C00000EC000-000000067F00008000000A800C00000F0000__000000CD51009FE8\n000000067F00008000000A800C00000F0000-000000067F00008000000A800C00000F4000__000000CD51009FE8\n000000067F00008000000A800C00000F4000-000000067F00008000000A800C00000F8000__000000CD51009FE8\n000000067F00008000000A800C00000F570B-000000067F00008000000A800C00000FEE71__000000CCB1B9E181-000000CD51344F89\n000000067F00008000000A800C00000F8000-000000067F00008000000A800C00000FC000__000000CD51009FE8\n000000067F00008000000A800C00000FC000-000000067F00008000000A800C0000100000__000000CD51009FE8\n000000067F00008000000A800C00000FEE71-000000067F00008000000A800C0000108587__000000CCB1B9E181-000000CD51344F89\n000000067F00008000000A800C0000100000-000000067F00008000000A800C0000104000__000000CD51009FE8\n000000067F00008000000A800C0000104000-000000067F00008000000A800C0000108000__000000CD51009FE8\n000000067F00008000000A800C0000108000-000000067F00008000000A800C000010C000__000000CD51009FE8\n000000067F00008000000A800C0000108587-000000067F00008000000A800C0000111C20__000000CCB1B9E181-000000CD51344F89\n000000067F00008000000A800C000010C000-000000067F00008000000A800C0000110000__000000CD51009FE8\n000000067F00008000000A800C0000110000-030000000000000000000000000000000002__000000CD51009FE8\n000000067F00008000000A800C00FFFFFFFF-010000000000000001000000050000000011__000000CCB1B9E181-000000CD51344F89\n000000067F00008000000A800C00FFFFFFFF-030000000000000000000000000000000002__000000CB82C37859-000000CC11F5EDC9\n000000067F00008000000A800F0200000000-000000067F00008000000A80140000007ADF__000000CD51344F89-000000CDCC7BF889\n000000067F00008000000A80140000007ADF-000000067F00008000000A8014000000F7D0__000000CD51344F89-000000CDCC7BF889\n000000067F00008000000A8014000000F7D0-000000067F00008000000A801400000176D0__000000CD51344F89-000000CDCC7BF889\n000000067F00008000000A801400000176D0-000000067F00008000000A8014000001F5D2__000000CD51344F89-000000CDCC7BF889\n000000067F00008000000A8014000001F5D2-000000067F00008000000A801400000274D5__000000CD51344F89-000000CDCC7BF889\n000000067F00008000000A801400000274D5-000000067F00008000000AA00C0000001863__000000CD51344F89-000000CDCC7BF889\n000000067F00008000000AA00C0000000000-000000067F00008000000AA00C0000004000__000000CF7E08BFD0\n000000067F00008000000AA00C0000001863-000000067F00008000000AA00C000000AFC9__000000CD51344F89-000000CDCC7BF889\n000000067F00008000000AA00C0000004000-000000067F00008000000AA00C0000008000__000000CF7E08BFD0\n000000067F00008000000AA00C0000008000-000000067F00008000000AA00C000000C000__000000CF7E08BFD0\n000000067F00008000000AA00C000000AFC9-030000000000000000000000000000000002__000000CD51344F89-000000CDCC7BF889\n000000067F00008000000AA00C000000C000-000000067F00008000000AA00C0000010000__000000CF7E08BFD0\n000000067F00008000000AA00C0000010000-000000067F00008000000AA00C0000014000__000000CF7B8D3FD0\n000000067F00008000000AA00C00000126EC-000000067F00008000000AA00C000001BE0C__000000CDCC7BF889-000000CE6C3FED31\n000000067F00008000000AA00C0000014000-000000067F00008000000AA00C0000018000__000000CF7B8D3FD0\n000000067F00008000000AA00C0000018000-000000067F00008000000AA00C000001C000__000000CF7B8D3FD0\n000000067F00008000000AA00C000001BE0C-000000067F00008000000AA00C000002553F__000000CDCC7BF889-000000CE6C3FED31\n000000067F00008000000AA00C000001C000-000000067F00008000000AA00C0000020000__000000CF7B8D3FD0\n000000067F00008000000AA00C0000020000-000000067F00008000000AA00C0000024000__000000CF7B8D3FD0\n000000067F00008000000AA00C0000024000-000000067F00008000000AA00C0000028000__000000CF7B8D3FD0\n000000067F00008000000AA00C000002553F-000000067F00008000000AA00C000002ECA5__000000CDCC7BF889-000000CE6C3FED31\n000000067F00008000000AA00C0000028000-000000067F00008000000AA00C000002C000__000000CF7B8D3FD0\n000000067F00008000000AA00C000002C000-000000067F00008000000AA00C0000030000__000000CF7B8D3FD0\n000000067F00008000000AA00C000002ECA5-000000067F00008000000AA00C00000383BC__000000CDCC7BF889-000000CE6C3FED31\n000000067F00008000000AA00C0000030000-000000067F00008000000AA00C0000034000__000000CF7B8D3FD0\n000000067F00008000000AA00C0000034000-000000067F00008000000AA00C0000038000__000000CF7B8D3FD0\n000000067F00008000000AA00C0000038000-000000067F00008000000AA00C000003C000__000000CF7B8D3FD0\n000000067F00008000000AA00C00000383BC-000000067F00008000000AA00C0000041B0A__000000CDCC7BF889-000000CE6C3FED31\n000000067F00008000000AA00C000003C000-000000067F00008000000AA00C0000040000__000000CF7B8D3FD0\n000000067F00008000000AA00C0000040000-000000067F00008000000AA00C0000044000__000000CF7B8D3FD0\n000000067F00008000000AA00C0000041B0A-000000067F00008000000AA00C000004B270__000000CDCC7BF889-000000CE6C3FED31\n000000067F00008000000AA00C0000044000-000000067F00008000000AA00C0000048000__000000CF7B8D3FD0\n000000067F00008000000AA00C0000048000-000000067F00008000000AA00C000004C000__000000CF7B8D3FD0\n000000067F00008000000AA00C000004B270-000000067F00008000000AA00C00000549AA__000000CDCC7BF889-000000CE6C3FED31\n000000067F00008000000AA00C000004C000-000000067F00008000000AA00C0000050000__000000CF7B8D3FD0\n000000067F00008000000AA00C0000050000-000000067F00008000000AA00C0000054000__000000CF7B8D3FD0\n000000067F00008000000AA00C0000054000-000000067F00008000000AA00C0000058000__000000CF7B8D3FD0\n000000067F00008000000AA00C00000549AA-000000067F00008000000AA00C000005E10B__000000CDCC7BF889-000000CE6C3FED31\n000000067F00008000000AA00C0000058000-000000067F00008000000AA00C000005C000__000000CF7B8D3FD0\n000000067F00008000000AA00C000005C000-000000067F00008000000AA00C0000060000__000000CF7B8D3FD0\n000000067F00008000000AA00C000005E10B-000000067F00008000000AA00C000006782C__000000CDCC7BF889-000000CE6C3FED31\n000000067F00008000000AA00C0000060000-000000067F00008000000AA00C0000064000__000000CF7B8D3FD0\n000000067F00008000000AA00C0000064000-000000067F00008000000AA00C0000068000__000000CF7B8D3FD0\n000000067F00008000000AA00C000006782C-000000067F00008000000AA00C0000070F88__000000CDCC7BF889-000000CE6C3FED31\n000000067F00008000000AA00C0000068000-000000067F00008000000AA00C000006C000__000000CF7B8D3FD0\n000000067F00008000000AA00C000006C000-000000067F00008000000AA00C0000070000__000000CF7B8D3FD0\n000000067F00008000000AA00C0000070000-000000067F00008000000AA00C0000074000__000000CF7B8D3FD0\n000000067F00008000000AA00C0000070F88-000000067F00008000000AA00C0100000000__000000CDCC7BF889-000000CE6C3FED31\n000000067F00008000000AA00C0000074000-000000067F00008000000AA00C0000078000__000000CF7B8D3FD0\n000000067F00008000000AA00C0000078000-000000067F00008000000AA00C000007C000__000000CF7B8D3FD0\n000000067F00008000000AA00C0000078E97-000000067F00008000000AA00C00000823F9__000000CE6C3FED31-000000CF7DC97FD1\n000000067F00008000000AA00C000007C000-000000067F00008000000AA00C0000080000__000000CF7B8D3FD0\n000000067F00008000000AA00C0000080000-000000067F00008000000AA00C0000084000__000000CF7B8D3FD0\n000000067F00008000000AA00C00000823F9-000000067F00008000000AA00C000008BA8A__000000CE6C3FED31-000000CF7DC97FD1\n000000067F00008000000AA00C0000084000-000000067F00008000000AA00C0000088000__000000CF7B8D3FD0\n000000067F00008000000AA00C0000088000-000000067F00008000000AA00C000008C000__000000CF7B8D3FD0\n000000067F00008000000AA00C000008BA8A-000000067F00008000000AA00C00000951BF__000000CE6C3FED31-000000CF7DC97FD1\n000000067F00008000000AA00C000008C000-000000067F00008000000AA00C0000090000__000000CF7B8D3FD0\n000000067F00008000000AA00C0000090000-000000067F00008000000AA00C0000094000__000000CF7B8D3FD0\n000000067F00008000000AA00C0000094000-000000067F00008000000AA00C0000098000__000000CF7B8D3FD0\n000000067F00008000000AA00C00000951BF-000000067F00008000000AA00C000009E90A__000000CE6C3FED31-000000CF7DC97FD1\n000000067F00008000000AA00C0000098000-000000067F00008000000AA00C000009C000__000000CF7B8D3FD0\n000000067F00008000000AA00C000009C000-000000067F00008000000AA00C00000A0000__000000CF7B8D3FD0\n000000067F00008000000AA00C000009E90A-000000067F00008000000AA00C00000A802B__000000CE6C3FED31-000000CF7DC97FD1\n000000067F00008000000AA00C00000A0000-000000067F00008000000AA00C00000A4000__000000CF7B8D3FD0\n000000067F00008000000AA00C00000A4000-000000067F00008000000AA00C00000A8000__000000CF7B8D3FD0\n000000067F00008000000AA00C00000A8000-000000067F00008000000AA00C00000AC000__000000CF7B8D3FD0\n000000067F00008000000AA00C00000A802B-000000067F00008000000AA00C00000B1782__000000CE6C3FED31-000000CF7DC97FD1\n000000067F00008000000AA00C00000AC000-000000067F00008000000AA00C00000B0000__000000CF7B8D3FD0\n000000067F00008000000AA00C00000B0000-000000067F00008000000AA00C00000B4000__000000CF7B8D3FD0\n000000067F00008000000AA00C00000B1782-000000067F00008000000AA00C00000BAEE8__000000CE6C3FED31-000000CF7DC97FD1\n000000067F00008000000AA00C00000B4000-000000067F00008000000AA00C00000B8000__000000CF7B8D3FD0\n000000067F00008000000AA00C00000B8000-000000067F00008000000AA00C00000BC000__000000CF7B8D3FD0\n000000067F00008000000AA00C00000BAEE8-000000067F00008000000AA00C00000C460C__000000CE6C3FED31-000000CF7DC97FD1\n000000067F00008000000AA00C00000BC000-000000067F00008000000AA00C00000C0000__000000CF7B8D3FD0\n000000067F00008000000AA00C00000C0000-000000067F00008000000AA00C00000C4000__000000CF7B8D3FD0\n000000067F00008000000AA00C00000C4000-000000067F00008000000AA00C00000C8000__000000CF7B8D3FD0\n000000067F00008000000AA00C00000C460C-000000067F00008000000AA00C00000CDD72__000000CE6C3FED31-000000CF7DC97FD1\n000000067F00008000000AA00C00000C8000-000000067F00008000000AA00C00000CC000__000000CF7B8D3FD0\n000000067F00008000000AA00C00000CC000-000000067F00008000000AA00C00000D0000__000000CF7B8D3FD0\n000000067F00008000000AA00C00000CDD72-000000067F00008000000AA00C00000D74D8__000000CE6C3FED31-000000CF7DC97FD1\n000000067F00008000000AA00C00000D0000-000000067F00008000000AA00C00000D4000__000000CF7B8D3FD0\n000000067F00008000000AA00C00000D4000-000000067F00008000000AA00C00000D8000__000000CF7B8D3FD0\n000000067F00008000000AA00C00000D74D8-000000067F00008000000AA00C00000E0C0B__000000CE6C3FED31-000000CF7DC97FD1\n000000067F00008000000AA00C00000D8000-000000067F00008000000AA00C00000DC000__000000CF7B8D3FD0\n000000067F00008000000AA00C00000DC000-000000067F00008000000AA00C00000E0000__000000CF7B8D3FD0\n000000067F00008000000AA00C00000E0000-000000067F00008000000AA00C00000E4000__000000CF7B8D3FD0\n000000067F00008000000AA00C00000E0C0B-000000067F00008000000AA00C00000EA371__000000CE6C3FED31-000000CF7DC97FD1\n000000067F00008000000AA00C00000E4000-000000067F00008000000AA00C00000E8000__000000CF7B8D3FD0\n000000067F00008000000AA00C00000E8000-000000067F00008000000AA00C00000EC000__000000CF7B8D3FD0\n000000067F00008000000AA00C00000EA371-000000067F00008000000AA00C00000F3AD7__000000CE6C3FED31-000000CF7DC97FD1\n000000067F00008000000AA00C00000EC000-000000067F00008000000AA00C00000F0000__000000CF7B8D3FD0\n000000067F00008000000AA00C00000F0000-000000067F00008000000AA00C00000F4000__000000CF7B8D3FD0\n000000067F00008000000AA00C00000F3AD7-000000067F00008000000AA00C00000FD20B__000000CE6C3FED31-000000CF7DC97FD1\n000000067F00008000000AA00C00000F4000-000000067F00008000000AA00C00000F8000__000000CF7B8D3FD0\n000000067F00008000000AA00C00000F8000-000000067F00008000000AA00C00000FC000__000000CF7B8D3FD0\n000000067F00008000000AA00C00000FC000-000000067F00008000000AA00C0000100000__000000CF7B8D3FD0\n000000067F00008000000AA00C00000FD20B-000000067F00008000000AA00C0000106932__000000CE6C3FED31-000000CF7DC97FD1\n000000067F00008000000AA00C0000100000-000000067F00008000000AA00C0000104000__000000CF7B8D3FD0\n000000067F00008000000AA00C0000104000-000000067F00008000000AA00C0000108000__000000CF7B8D3FD0\n000000067F00008000000AA00C0000106932-000000067F00008000000AA00C0000110098__000000CE6C3FED31-000000CF7DC97FD1\n000000067F00008000000AA00C0000108000-000000067F00008000000AA00C000010C000__000000CF7B8D3FD0\n000000067F00008000000AA00C000010C000-000000067F00008000000AA00C0000110000__000000CF7B8D3FD0\n000000067F00008000000AA00C0000110000-030000000000000000000000000000000002__000000CF7B8D3FD0\n000000067F00008000000AA00C0000110098-010000000000000001000000050000000012__000000CE6C3FED31-000000CF7DC97FD1\n010000000000000001000000000000000000-030000000000000000000000000000000002__000000A29F1D8950\n030000000000000000000000000000000001-030000000000000000000000000000000002__000000C689AF4AC1-000000C6C87B6329\n"
  },
  {
    "path": "pageserver/benches/odd-brook-layernames.txt",
    "content": "000000000000000000000000000000000000-000000067F00004002000089C30100000000__0000001C760FA190\n000000000000000000000000000000000000-000000067F00004002000089C30100000000__00000038E67ABFA0\n000000000000000000000000000000000000-000000067F00004002000089C30100000000__0000003903F1CFE8\n000000000000000000000000000000000000-000000067F00004002000089C30100000000__0000003B99F7F8A0\n000000000000000000000000000000000000-000000067F00004002000089C30100000000__0000005D2FFFFB38\n000000000000000000000000000000000000-000000067F00004002000089C30100000000__00000073AD3FE6B8\n000000000000000000000000000000000000-000000067F00004002000089C30100000000__000000914E3F38F0\n000000000000000000000000000000000000-000000067F00004002000089C30100000000__000000931B33AE68\n000000000000000000000000000000000000-000000067F00004002000089C30100000000__000000931B9AFDF8\n000000000000000000000000000000000000-000000067F0000400200008A4F0100000000__000000009E3FE898\n000000000000000000000000000000000000-030000000000000000000000000000000002__0000000001696070-00000000016E8B31\n000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__000000931C070601-000000931C075661\n000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__000000931C075661-000000931C0794A1\n000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__000000931C0794A1-000000931C07C709\n000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__000000931C07C709-000000931C07FED1\n000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__000000931C07FED1-000000931C081909\n000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__000000931C081909-000000931C083E31\n000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__000000931C083E31-000000931C088149\n000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__000000931C088149-000000931C088409\n000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__000000931C088409-000000931C0887F1\n000000067F000032AC000040040000000000-000000067F0000400200008A5900000080D3__00000000016F8AC9-000000007119E789\n000000067F000032AC000040040000000000-000000067F000040020000A0000000008989__0000001C725D0191-0000002070591C61\n000000067F000032AC000040040000000000-000000067F000040020000C0000000007F72__00000038ED8FA069-0000003ABA685F11\n000000067F000032AC000040040000000000-000000067F000040020000E000000000899C__0000003ABA698781-0000003B6A0FFB09\n000000067F000032AC000040040000000000-000000067F000040020000E0000000F4FCF9__00000056FC37F3D9-000000572A7B4CD9\n000000067F000032AC000040040000000000-000000067F0000400200010000000000899C__000000572A7C74A1-0000005CA7BBD6F9\n000000067F000032AC000040040000000000-000000067F00004002000140000000008988__000000739A920D71-0000008D2DB5E0C1\n000000067F000032AC000040040000000000-000000067F00004002000140000000F32D01__0000008FAC75E259-000000900BB52179\n000000067F000032AC000040040000000000-000000067F00004002000160000000007F7A__000000900BB52179-0000009046EDA719\n000000067F000032AC000040040000000000-000000067F00004002000160000000037E1D__0000009046EDA719-000000914E3FE031\n000000067F000032AC000040040000000000-000000067F00004002000180000000007F7A__000000914E3FE031-000000919CCE8B21\n000000067F000032AC000040040000000000-000000067F0000400200018000000014F52F__00000092D346E5E9-000000931B991E09\n000000067F000032AC000040040000000000-030000000000000000000000000000000002__00000000016E8B31-00000000016F8AC9\n000000067F000032AC000040040000000000-030000000000000000000000000000000002__0000001C725A5929-0000001C725C25F1\n000000067F000032AC000040040000000000-030000000000000000000000000000000002__0000001C725C25F1-0000001C725D0191\n000000067F000032AC000040040000000000-030000000000000000000000000000000002__00000038ECF55FD9-00000038ED8D1E61\n000000067F000032AC000040040000000000-030000000000000000000000000000000002__00000038ED8D1E61-00000038ED8E5D49\n000000067F000032AC000040040000000000-030000000000000000000000000000000002__00000038ED8E5D49-00000038ED8FA069\n000000067F000032AC000040040000000000-030000000000000000000000000000000002__0000003ABA685F11-0000003ABA698781\n000000067F000032AC000040040000000000-030000000000000000000000000000000002__000000572A7B4CD9-000000572A7C74A1\n000000067F000032AC000040040000000000-030000000000000000000000000000000002__000000739A8D1299-000000739A8E6EF9\n000000067F000032AC000040040000000000-030000000000000000000000000000000002__000000739A8E6EF9-000000739A8FC4B9\n000000067F000032AC000040040000000000-030000000000000000000000000000000002__000000739A8FC4B9-000000739A920D71\n000000067F000032AC000040040000000000-030000000000000000000000000000000002__000000931B991E09-000000931B9AAA89\n000000067F000032AC000040040000000000-030000000000000000000000000000000002__000000931B9AAA89-000000931B9D7EF9\n000000067F000032AC000040040000000000-030000000000000000000000000000000002__000000931B9D7EF9-000000931B9E97C9\n000000067F000032AC000040040000000000-030000000000000000000000000000000002__000000931B9E97C9-000000931BA45F31\n000000067F000032AC000040040000000000-030000000000000000000000000000000002__000000931BA45F31-000000931BA69491\n000000067F000032AC000040040000000000-030000000000000000000000000000000002__000000931BA69491-000000931BA85AD9\n000000067F000032AC000040040000000000-030000000000000000000000000000000002__000000931BA85AD9-000000931BAB3D49\n000000067F000032AC000040040000000000-030000000000000000000000000000000002__000000931BAB3D49-000000931BAD4F09\n000000067F000032AC000040040000000000-030000000000000000000000000000000002__000000931BAD4F09-000000931BAFBE51\n000000067F000032AC000040040000000000-030000000000000000000000000000000002__000000931BAFBE51-000000931BB20A89\n000000067F000032AC000040040000000000-030000000000000000000000000000000002__000000931BB20A89-000000931BB445C9\n000000067F000032AC000040040000000000-030000000000000000000000000000000002__000000931BB445C9-000000931BB6C539\n000000067F000032AC000040040000000000-030000000000000000000000000000000002__000000931BB6C539-000000931BB94A11\n000000067F000032AC000040040000000000-030000000000000000000000000000000002__000000931BB94A11-000000931BBC0179\n000000067F000032AC000040040000000000-030000000000000000000000000000000002__000000931BBC0179-000000931BBE4B21\n000000067F000032AC000040040000000000-030000000000000000000000000000000002__000000931BBE4B21-000000931BC0FCC9\n000000067F000032AC000040040000000000-030000000000000000000000000000000002__000000931BC0FCC9-000000931BC36E61\n000000067F000032AC000040040000000000-030000000000000000000000000000000002__000000931BC36E61-000000931BC579B1\n000000067F000032AC000040040000000000-030000000000000000000000000000000002__000000931BC579B1-000000931BC790F1\n000000067F000032AC000040040000000000-030000000000000000000000000000000002__000000931BC790F1-000000931BC96EC9\n000000067F000032AC000040040000000000-030000000000000000000000000000000002__000000931BC96EC9-000000931BCB5D09\n000000067F000032AC000040040000000000-030000000000000000000000000000000002__000000931BCB5D09-000000931BCD7991\n000000067F000032AC000040040000000000-030000000000000000000000000000000002__000000931BCD7991-000000931BCF66C9\n000000067F000032AC000040040000000000-030000000000000000000000000000000002__000000931BCF66C9-000000931BD15B61\n000000067F000032AC000040040000000000-030000000000000000000000000000000002__000000931BD15B61-000000931BD3B251\n000000067F000032AC000040040000000000-030000000000000000000000000000000002__000000931BD3B251-000000931BD5E7D9\n000000067F000032AC000040040000000000-030000000000000000000000000000000002__000000931BD5E7D9-000000931BD82A51\n000000067F000032AC000040040000000000-030000000000000000000000000000000002__000000931BD82A51-000000931BDA7A71\n000000067F000032AC000040040000000000-030000000000000000000000000000000002__000000931BDA7A71-000000931BDD2F29\n000000067F000032AC000040040000000000-030000000000000000000000000000000002__000000931BDD2F29-000000931BDF89D1\n000000067F000032AC000040040000000000-030000000000000000000000000000000002__000000931BDF89D1-000000931BE1D831\n000000067F000032AC000040040000000000-030000000000000000000000000000000002__000000931BE1D831-000000931BE40719\n000000067F000032AC000040040000000000-030000000000000000000000000000000002__000000931BE40719-000000931BE6B0D1\n000000067F000032AC000040040000000000-030000000000000000000000000000000002__000000931BE6B0D1-000000931BE887A9\n000000067F000032AC000040040000000000-030000000000000000000000000000000002__000000931BE887A9-000000931BEAD539\n000000067F000032AC000040040000000000-030000000000000000000000000000000002__000000931BEAD539-000000931BEC56B9\n000000067F000032AC000040040000000000-030000000000000000000000000000000002__000000931BEC56B9-000000931BEE27D9\n000000067F000032AC000040040000000000-030000000000000000000000000000000002__000000931BEE27D9-000000931BF00151\n000000067F000032AC000040040000000000-030000000000000000000000000000000002__000000931BF00151-000000931BF24059\n000000067F000032AC000040040000000000-030000000000000000000000000000000002__000000931BF24059-000000931BF3EB61\n000000067F000032AC000040040000000000-030000000000000000000000000000000002__000000931BF3EB61-000000931BF63011\n000000067F000032AC000040040000000000-030000000000000000000000000000000002__000000931BF63011-000000931BF84BB9\n000000067F000032AC000040040000000000-030000000000000000000000000000000002__000000931BF84BB9-000000931BFAAFF1\n000000067F000032AC000040040000000000-030000000000000000000000000000000002__000000931BFAAFF1-000000931BFD3511\n000000067F000032AC000040040000000000-030000000000000000000000000000000002__000000931BFD3511-000000931BFF93D9\n000000067F000032AC000040040000000000-030000000000000000000000000000000002__000000931BFF93D9-000000931C01DAE1\n000000067F000032AC000040040000000000-030000000000000000000000000000000002__000000931C01DAE1-000000931C045291\n000000067F000032AC000040040000000000-030000000000000000000000000000000002__000000931C045291-000000931C070601\n000000067F00004002000000000000000001-000000067F0000400200008A590000F1B7DD__0000001BE353E181-0000001C725A5929\n000000067F00004002000000000000000001-000000067F000040020000A0000000F11587__000000384463E2C1-00000038E1E2FE19\n000000067F00004002000000000000000001-000000067F0000400200010000000030067A__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000000000000000001-010000000000000001000000000000000001__00000038E5077EE1-00000038E68FBE49\n000000067F00004002000000000000000001-010000000000000001000000000000000001__00000038E99BFDE9-00000038EAFDDF91\n000000067F000040020000840E0100000000-000000067F0000400200008A590000044853__000000007119E789-0000000120C1DDF9\n000000067F0000400200008A590000000000-000000067F0000400200008A590000004000__000000009E3FE898\n000000067F0000400200008A590000000000-000000067F0000400200008A590000004000__0000001C760FA190\n000000067F0000400200008A590000000000-000000067F0000400200008A590000004000__00000038E67ABFA0\n000000067F0000400200008A590000000000-000000067F0000400200008A590000004000__0000003903F1CFE8\n000000067F0000400200008A590000000000-000000067F0000400200008A590000004000__0000003B99F7F8A0\n000000067F0000400200008A590000000000-000000067F0000400200008A590000004000__0000005D2FFFFB38\n000000067F0000400200008A590000000000-000000067F0000400200008A590000004000__00000073AD3FE6B8\n000000067F0000400200008A590000000000-000000067F0000400200008A590000004000__000000914E3F38F0\n000000067F0000400200008A590000000000-000000067F0000400200008A590000004000__000000931B33AE68\n000000067F0000400200008A590000000000-000000067F0000400200008A590000004000__000000931B9AFDF8\n000000067F0000400200008A590000004000-000000067F0000400200008A590000008000__000000009E3FE898\n000000067F0000400200008A590000004000-000000067F0000400200008A590000008000__0000001C760FA190\n000000067F0000400200008A590000004000-000000067F0000400200008A590000008000__00000038E67ABFA0\n000000067F0000400200008A590000004000-000000067F0000400200008A590000008000__0000003903F1CFE8\n000000067F0000400200008A590000004000-000000067F0000400200008A590000008000__0000003B99F7F8A0\n000000067F0000400200008A590000004000-000000067F0000400200008A590000008000__0000005D2FFFFB38\n000000067F0000400200008A590000004000-000000067F0000400200008A590000008000__00000073AD3FE6B8\n000000067F0000400200008A590000004000-000000067F0000400200008A590000008000__000000914E3F38F0\n000000067F0000400200008A590000004000-000000067F0000400200008A590000008000__000000931B33AE68\n000000067F0000400200008A590000004000-000000067F0000400200008A590000008000__000000931B9AFDF8\n000000067F0000400200008A590000008000-000000067F0000400200008A59000000C000__000000009E3FE898\n000000067F0000400200008A590000008000-000000067F0000400200008A59000000C000__0000001C760FA190\n000000067F0000400200008A590000008000-000000067F0000400200008A59000000C000__00000038E67ABFA0\n000000067F0000400200008A590000008000-000000067F0000400200008A59000000C000__0000003903F1CFE8\n000000067F0000400200008A590000008000-000000067F0000400200008A59000000C000__0000003B99F7F8A0\n000000067F0000400200008A590000008000-000000067F0000400200008A59000000C000__0000005D2FFFFB38\n000000067F0000400200008A590000008000-000000067F0000400200008A59000000C000__00000073AD3FE6B8\n000000067F0000400200008A590000008000-000000067F0000400200008A59000000C000__000000914E3F38F0\n000000067F0000400200008A590000008000-000000067F0000400200008A59000000C000__000000931B33AE68\n000000067F0000400200008A590000008000-000000067F0000400200008A59000000C000__000000931B9AFDF8\n000000067F0000400200008A5900000080D3-000000067F0000400200008A590000010AB3__00000000016F8AC9-000000007119E789\n000000067F0000400200008A59000000C000-000000067F0000400200008A590000010000__000000009E3FE898\n000000067F0000400200008A59000000C000-000000067F0000400200008A590000010000__0000001C760FA190\n000000067F0000400200008A59000000C000-000000067F0000400200008A590000010000__00000038E67ABFA0\n000000067F0000400200008A59000000C000-000000067F0000400200008A590000010000__0000003903F1CFE8\n000000067F0000400200008A59000000C000-000000067F0000400200008A590000010000__0000003B99F7F8A0\n000000067F0000400200008A59000000C000-000000067F0000400200008A590000010000__0000005D2FFFFB38\n000000067F0000400200008A59000000C000-000000067F0000400200008A590000010000__00000073AD3FE6B8\n000000067F0000400200008A59000000C000-000000067F0000400200008A590000010000__000000914E3F38F0\n000000067F0000400200008A59000000C000-000000067F0000400200008A590000010000__000000931B33AE68\n000000067F0000400200008A59000000C000-000000067F0000400200008A590000010000__000000931B9AFDF8\n000000067F0000400200008A590000010000-000000067F0000400200008A590000014000__000000009E3FE898\n000000067F0000400200008A590000010000-000000067F0000400200008A590000014000__0000001C760FA190\n000000067F0000400200008A590000010000-000000067F0000400200008A590000014000__00000038E67ABFA0\n000000067F0000400200008A590000010000-000000067F0000400200008A590000014000__0000003903F1CFE8\n000000067F0000400200008A590000010000-000000067F0000400200008A590000014000__0000003B99F7F8A0\n000000067F0000400200008A590000010000-000000067F0000400200008A590000014000__0000005D2FFFFB38\n000000067F0000400200008A590000010000-000000067F0000400200008A590000014000__00000073AD3FE6B8\n000000067F0000400200008A590000010000-000000067F0000400200008A590000014000__000000914E3F38F0\n000000067F0000400200008A590000010000-000000067F0000400200008A590000014000__000000931B33AE68\n000000067F0000400200008A590000010000-000000067F0000400200008A590000014000__000000931B9AFDF8\n000000067F0000400200008A590000010AB3-000000067F0000400200008A5900000194AF__00000000016F8AC9-000000007119E789\n000000067F0000400200008A590000014000-000000067F0000400200008A590000018000__000000009E3FE898\n000000067F0000400200008A590000014000-000000067F0000400200008A590000018000__0000001C760FA190\n000000067F0000400200008A590000014000-000000067F0000400200008A590000018000__00000038E67ABFA0\n000000067F0000400200008A590000014000-000000067F0000400200008A590000018000__0000003903F1CFE8\n000000067F0000400200008A590000014000-000000067F0000400200008A590000018000__0000003B99F7F8A0\n000000067F0000400200008A590000014000-000000067F0000400200008A590000018000__0000005D2FFFFB38\n000000067F0000400200008A590000014000-000000067F0000400200008A590000018000__00000073AD3FE6B8\n000000067F0000400200008A590000014000-000000067F0000400200008A590000018000__000000914E3F38F0\n000000067F0000400200008A590000014000-000000067F0000400200008A590000018000__000000931B33AE68\n000000067F0000400200008A590000014000-000000067F0000400200008A590000018000__000000931B9AFDF8\n000000067F0000400200008A590000018000-000000067F0000400200008A59000001C000__000000009E3FE898\n000000067F0000400200008A590000018000-000000067F0000400200008A59000001C000__0000001C760FA190\n000000067F0000400200008A590000018000-000000067F0000400200008A59000001C000__00000038E67ABFA0\n000000067F0000400200008A590000018000-000000067F0000400200008A59000001C000__0000003903F1CFE8\n000000067F0000400200008A590000018000-000000067F0000400200008A59000001C000__0000003B99F7F8A0\n000000067F0000400200008A590000018000-000000067F0000400200008A59000001C000__0000005D2FFFFB38\n000000067F0000400200008A590000018000-000000067F0000400200008A59000001C000__00000073AD3FE6B8\n000000067F0000400200008A590000018000-000000067F0000400200008A59000001C000__000000914E3F38F0\n000000067F0000400200008A590000018000-000000067F0000400200008A59000001C000__000000931B33AE68\n000000067F0000400200008A590000018000-000000067F0000400200008A59000001C000__000000931B9AFDF8\n000000067F0000400200008A5900000194AF-000000067F0000400200008A590000021EB4__00000000016F8AC9-000000007119E789\n000000067F0000400200008A59000001C000-000000067F0000400200008A590000020000__000000009E3FE898\n000000067F0000400200008A59000001C000-000000067F0000400200008A590000020000__0000001C760FA190\n000000067F0000400200008A59000001C000-000000067F0000400200008A590000020000__00000038E67ABFA0\n000000067F0000400200008A59000001C000-000000067F0000400200008A590000020000__0000003903F1CFE8\n000000067F0000400200008A59000001C000-000000067F0000400200008A590000020000__0000003B99F7F8A0\n000000067F0000400200008A59000001C000-000000067F0000400200008A590000020000__0000005D2FFFFB38\n000000067F0000400200008A59000001C000-000000067F0000400200008A590000020000__00000073AD3FE6B8\n000000067F0000400200008A59000001C000-000000067F0000400200008A590000020000__000000914E3F38F0\n000000067F0000400200008A59000001C000-000000067F0000400200008A590000020000__000000931B33AE68\n000000067F0000400200008A59000001C000-000000067F0000400200008A590000020000__000000931B9AFDF8\n000000067F0000400200008A590000020000-000000067F0000400200008A590000024000__000000009E3FE898\n000000067F0000400200008A590000020000-000000067F0000400200008A590000024000__0000001C760FA190\n000000067F0000400200008A590000020000-000000067F0000400200008A590000024000__00000038E67ABFA0\n000000067F0000400200008A590000020000-000000067F0000400200008A590000024000__0000003903F1CFE8\n000000067F0000400200008A590000020000-000000067F0000400200008A590000024000__0000003B99F7F8A0\n000000067F0000400200008A590000020000-000000067F0000400200008A590000024000__0000005D2FFFFB38\n000000067F0000400200008A590000020000-000000067F0000400200008A590000024000__00000073AD3FE6B8\n000000067F0000400200008A590000020000-000000067F0000400200008A590000024000__000000914E3F38F0\n000000067F0000400200008A590000020000-000000067F0000400200008A590000024000__000000931B33AE68\n000000067F0000400200008A590000020000-000000067F0000400200008A590000024000__000000931B9AFDF8\n000000067F0000400200008A590000021EB4-000000067F0000400200008A59000002A89D__00000000016F8AC9-000000007119E789\n000000067F0000400200008A590000024000-000000067F0000400200008A590000028000__000000009E3FE898\n000000067F0000400200008A590000024000-000000067F0000400200008A590000028000__0000001C760FA190\n000000067F0000400200008A590000024000-000000067F0000400200008A590000028000__00000038E67ABFA0\n000000067F0000400200008A590000024000-000000067F0000400200008A590000028000__0000003903F1CFE8\n000000067F0000400200008A590000024000-000000067F0000400200008A590000028000__0000003B99F7F8A0\n000000067F0000400200008A590000024000-000000067F0000400200008A590000028000__0000005D2FFFFB38\n000000067F0000400200008A590000024000-000000067F0000400200008A590000028000__00000073AD3FE6B8\n000000067F0000400200008A590000024000-000000067F0000400200008A590000028000__000000914E3F38F0\n000000067F0000400200008A590000024000-000000067F0000400200008A590000028000__000000931B33AE68\n000000067F0000400200008A590000024000-000000067F0000400200008A590000028000__000000931B9AFDF8\n000000067F0000400200008A590000028000-000000067F0000400200008A59000002C000__000000009E3FE898\n000000067F0000400200008A590000028000-000000067F0000400200008A59000002C000__0000001C760FA190\n000000067F0000400200008A590000028000-000000067F0000400200008A59000002C000__00000038E67ABFA0\n000000067F0000400200008A590000028000-000000067F0000400200008A59000002C000__0000003903F1CFE8\n000000067F0000400200008A590000028000-000000067F0000400200008A59000002C000__0000003B99F7F8A0\n000000067F0000400200008A590000028000-000000067F0000400200008A59000002C000__0000005D2FFFFB38\n000000067F0000400200008A590000028000-000000067F0000400200008A59000002C000__00000073AD3FE6B8\n000000067F0000400200008A590000028000-000000067F0000400200008A59000002C000__000000914E3F38F0\n000000067F0000400200008A590000028000-000000067F0000400200008A59000002C000__000000931B33AE68\n000000067F0000400200008A590000028000-000000067F0000400200008A59000002C000__000000931B9AFDF8\n000000067F0000400200008A59000002A89D-000000067F0000400200008A590000033278__00000000016F8AC9-000000007119E789\n000000067F0000400200008A59000002C000-000000067F0000400200008A590000030000__000000009E3FE898\n000000067F0000400200008A59000002C000-000000067F0000400200008A590000030000__0000001C760FA190\n000000067F0000400200008A59000002C000-000000067F0000400200008A590000030000__00000038E67ABFA0\n000000067F0000400200008A59000002C000-000000067F0000400200008A590000030000__0000003903F1CFE8\n000000067F0000400200008A59000002C000-000000067F0000400200008A590000030000__0000003B99F7F8A0\n000000067F0000400200008A59000002C000-000000067F0000400200008A590000030000__0000005D2FFFFB38\n000000067F0000400200008A59000002C000-000000067F0000400200008A590000030000__00000073AD3FE6B8\n000000067F0000400200008A59000002C000-000000067F0000400200008A590000030000__000000914E3F38F0\n000000067F0000400200008A59000002C000-000000067F0000400200008A590000030000__000000931B33AE68\n000000067F0000400200008A59000002C000-000000067F0000400200008A590000030000__000000931B9AFDF8\n000000067F0000400200008A590000030000-000000067F0000400200008A590000034000__000000009E3FE898\n000000067F0000400200008A590000030000-000000067F0000400200008A590000034000__0000001C760FA190\n000000067F0000400200008A590000030000-000000067F0000400200008A590000034000__00000038E67ABFA0\n000000067F0000400200008A590000030000-000000067F0000400200008A590000034000__0000003903F1CFE8\n000000067F0000400200008A590000030000-000000067F0000400200008A590000034000__0000003B99F7F8A0\n000000067F0000400200008A590000030000-000000067F0000400200008A590000034000__0000005D2FFFFB38\n000000067F0000400200008A590000030000-000000067F0000400200008A590000034000__00000073AD3FE6B8\n000000067F0000400200008A590000030000-000000067F0000400200008A590000034000__000000914E3F38F0\n000000067F0000400200008A590000030000-000000067F0000400200008A590000034000__000000931B33AE68\n000000067F0000400200008A590000030000-000000067F0000400200008A590000034000__000000931B9AFDF8\n000000067F0000400200008A590000033278-000000067F0000400200008A59000003BC3A__00000000016F8AC9-000000007119E789\n000000067F0000400200008A590000034000-000000067F0000400200008A590000038000__000000009E3FE898\n000000067F0000400200008A590000034000-000000067F0000400200008A590000038000__0000001C760FA190\n000000067F0000400200008A590000034000-000000067F0000400200008A590000038000__00000038E67ABFA0\n000000067F0000400200008A590000034000-000000067F0000400200008A590000038000__0000003903F1CFE8\n000000067F0000400200008A590000034000-000000067F0000400200008A590000038000__0000003B99F7F8A0\n000000067F0000400200008A590000034000-000000067F0000400200008A590000038000__0000005D2FFFFB38\n000000067F0000400200008A590000034000-000000067F0000400200008A590000038000__00000073AD3FE6B8\n000000067F0000400200008A590000034000-000000067F0000400200008A590000038000__000000914E3F38F0\n000000067F0000400200008A590000034000-000000067F0000400200008A590000038000__000000931B33AE68\n000000067F0000400200008A590000034000-000000067F0000400200008A590000038000__000000931B9AFDF8\n000000067F0000400200008A590000038000-000000067F0000400200008A59000003C000__000000009E3FE898\n000000067F0000400200008A590000038000-000000067F0000400200008A59000003C000__0000001C760FA190\n000000067F0000400200008A590000038000-000000067F0000400200008A59000003C000__00000038E67ABFA0\n000000067F0000400200008A590000038000-000000067F0000400200008A59000003C000__0000003903F1CFE8\n000000067F0000400200008A590000038000-000000067F0000400200008A59000003C000__0000003B99F7F8A0\n000000067F0000400200008A590000038000-000000067F0000400200008A59000003C000__0000005D2FFFFB38\n000000067F0000400200008A590000038000-000000067F0000400200008A59000003C000__00000073AD3FE6B8\n000000067F0000400200008A590000038000-000000067F0000400200008A59000003C000__000000914E3F38F0\n000000067F0000400200008A590000038000-000000067F0000400200008A59000003C000__000000931B33AE68\n000000067F0000400200008A590000038000-000000067F0000400200008A59000003C000__000000931B9AFDF8\n000000067F0000400200008A59000003BC3A-030000000000000000000000000000000002__00000000016F8AC9-000000007119E789\n000000067F0000400200008A59000003C000-000000067F0000400200008A590000040000__000000009E3FE898\n000000067F0000400200008A59000003C000-000000067F0000400200008A590000040000__0000001C760FA190\n000000067F0000400200008A59000003C000-000000067F0000400200008A590000040000__00000038E67ABFA0\n000000067F0000400200008A59000003C000-000000067F0000400200008A590000040000__0000003903F1CFE8\n000000067F0000400200008A59000003C000-000000067F0000400200008A590000040000__0000003B99F7F8A0\n000000067F0000400200008A59000003C000-000000067F0000400200008A590000040000__0000005D2FFFFB38\n000000067F0000400200008A59000003C000-000000067F0000400200008A590000040000__00000073AD3FE6B8\n000000067F0000400200008A59000003C000-000000067F0000400200008A590000040000__000000914E3F38F0\n000000067F0000400200008A59000003C000-000000067F0000400200008A590000040000__000000931B33AE68\n000000067F0000400200008A59000003C000-000000067F0000400200008A590000040000__000000931B9AFDF8\n000000067F0000400200008A590000040000-000000067F0000400200008A590000044000__000000009E3FE898\n000000067F0000400200008A590000040000-000000067F0000400200008A590000044000__0000001C760FA190\n000000067F0000400200008A590000040000-000000067F0000400200008A590000044000__00000038E67ABFA0\n000000067F0000400200008A590000040000-000000067F0000400200008A590000044000__0000003903F1CFE8\n000000067F0000400200008A590000040000-000000067F0000400200008A590000044000__0000003B99F7F8A0\n000000067F0000400200008A590000040000-000000067F0000400200008A590000044000__0000005D2FFFFB38\n000000067F0000400200008A590000040000-000000067F0000400200008A590000044000__00000073AD3FE6B8\n000000067F0000400200008A590000040000-000000067F0000400200008A590000044000__000000914E3F38F0\n000000067F0000400200008A590000040000-000000067F0000400200008A590000044000__000000931B33AE68\n000000067F0000400200008A590000040000-000000067F0000400200008A590000044000__000000931B9AFDF8\n000000067F0000400200008A590000044000-000000067F0000400200008A590000048000__000000009E3FE898\n000000067F0000400200008A590000044000-000000067F0000400200008A590000048000__0000001C760FA190\n000000067F0000400200008A590000044000-000000067F0000400200008A590000048000__00000038E67ABFA0\n000000067F0000400200008A590000044000-000000067F0000400200008A590000048000__0000003903F1CFE8\n000000067F0000400200008A590000044000-000000067F0000400200008A590000048000__0000003B99F7F8A0\n000000067F0000400200008A590000044000-000000067F0000400200008A590000048000__0000005D2FFFFB38\n000000067F0000400200008A590000044000-000000067F0000400200008A590000048000__00000073AD3FE6B8\n000000067F0000400200008A590000044000-000000067F0000400200008A590000048000__000000914E3F38F0\n000000067F0000400200008A590000044000-000000067F0000400200008A590000048000__000000931B33AE68\n000000067F0000400200008A590000044000-000000067F0000400200008A590000048000__000000931B9AFDF8\n000000067F0000400200008A590000044853-000000067F0000400200008A59000004D22E__000000007119E789-0000000120C1DDF9\n000000067F0000400200008A590000048000-000000067F0000400200008A59000004C000__000000009E3FE898\n000000067F0000400200008A590000048000-000000067F0000400200008A59000004C000__0000001C760FA190\n000000067F0000400200008A590000048000-000000067F0000400200008A59000004C000__00000038E67ABFA0\n000000067F0000400200008A590000048000-000000067F0000400200008A59000004C000__0000003903F1CFE8\n000000067F0000400200008A590000048000-000000067F0000400200008A59000004C000__0000003B99F7F8A0\n000000067F0000400200008A590000048000-000000067F0000400200008A59000004C000__0000005D2FFFFB38\n000000067F0000400200008A590000048000-000000067F0000400200008A59000004C000__00000073AD3FE6B8\n000000067F0000400200008A590000048000-000000067F0000400200008A59000004C000__000000914E3F38F0\n000000067F0000400200008A590000048000-000000067F0000400200008A59000004C000__000000931B33AE68\n000000067F0000400200008A590000048000-000000067F0000400200008A59000004C000__000000931B9AFDF8\n000000067F0000400200008A59000004C000-000000067F0000400200008A590000050000__000000009E3FE898\n000000067F0000400200008A59000004C000-000000067F0000400200008A590000050000__0000001C760FA190\n000000067F0000400200008A59000004C000-000000067F0000400200008A590000050000__00000038E67ABFA0\n000000067F0000400200008A59000004C000-000000067F0000400200008A590000050000__0000003903F1CFE8\n000000067F0000400200008A59000004C000-000000067F0000400200008A590000050000__0000003B99F7F8A0\n000000067F0000400200008A59000004C000-000000067F0000400200008A590000050000__0000005D2FFFFB38\n000000067F0000400200008A59000004C000-000000067F0000400200008A590000050000__00000073AD3FE6B8\n000000067F0000400200008A59000004C000-000000067F0000400200008A590000050000__000000914E3F38F0\n000000067F0000400200008A59000004C000-000000067F0000400200008A590000050000__000000931B33AE68\n000000067F0000400200008A59000004C000-000000067F0000400200008A590000050000__000000931B9AFDF8\n000000067F0000400200008A59000004D22E-000000067F0000400200008A590000055C2F__000000007119E789-0000000120C1DDF9\n000000067F0000400200008A590000050000-000000067F0000400200008A590000054000__000000009E3FE898\n000000067F0000400200008A590000050000-000000067F0000400200008A590000054000__0000001C760FA190\n000000067F0000400200008A590000050000-000000067F0000400200008A590000054000__00000038E67ABFA0\n000000067F0000400200008A590000050000-000000067F0000400200008A590000054000__0000003903F1CFE8\n000000067F0000400200008A590000050000-000000067F0000400200008A590000054000__0000003B99F7F8A0\n000000067F0000400200008A590000050000-000000067F0000400200008A590000054000__0000005D2FFFFB38\n000000067F0000400200008A590000050000-000000067F0000400200008A590000054000__00000073AD3FE6B8\n000000067F0000400200008A590000050000-000000067F0000400200008A590000054000__000000914E3F38F0\n000000067F0000400200008A590000050000-000000067F0000400200008A590000054000__000000931B33AE68\n000000067F0000400200008A590000050000-000000067F0000400200008A590000054000__000000931B9AFDF8\n000000067F0000400200008A590000054000-000000067F0000400200008A590000058000__0000001C760FA190\n000000067F0000400200008A590000054000-000000067F0000400200008A590000058000__00000038E67ABFA0\n000000067F0000400200008A590000054000-000000067F0000400200008A590000058000__0000003903F1CFE8\n000000067F0000400200008A590000054000-000000067F0000400200008A590000058000__0000003B99F7F8A0\n000000067F0000400200008A590000054000-000000067F0000400200008A590000058000__0000005D2FFFFB38\n000000067F0000400200008A590000054000-000000067F0000400200008A590000058000__00000073AD3FE6B8\n000000067F0000400200008A590000054000-000000067F0000400200008A590000058000__000000914E3F38F0\n000000067F0000400200008A590000054000-000000067F0000400200008A590000058000__000000931B33AE68\n000000067F0000400200008A590000054000-000000067F0000400200008A590000058000__000000931B9AFDF8\n000000067F0000400200008A590000054000-030000000000000000000000000000000002__000000009E3FE898\n000000067F0000400200008A590000055C2F-000000067F0000400200008A59000005E61C__000000007119E789-0000000120C1DDF9\n000000067F0000400200008A590000058000-000000067F0000400200008A59000005C000__0000001C760FA190\n000000067F0000400200008A590000058000-000000067F0000400200008A59000005C000__00000038E67ABFA0\n000000067F0000400200008A590000058000-000000067F0000400200008A59000005C000__0000003903F1CFE8\n000000067F0000400200008A590000058000-000000067F0000400200008A59000005C000__0000003B99F7F8A0\n000000067F0000400200008A590000058000-000000067F0000400200008A59000005C000__0000005D2FFFFB38\n000000067F0000400200008A590000058000-000000067F0000400200008A59000005C000__00000073AD3FE6B8\n000000067F0000400200008A590000058000-000000067F0000400200008A59000005C000__000000914E3F38F0\n000000067F0000400200008A590000058000-000000067F0000400200008A59000005C000__000000931B33AE68\n000000067F0000400200008A590000058000-000000067F0000400200008A59000005C000__000000931B9AFDF8\n000000067F0000400200008A59000005C000-000000067F0000400200008A590000060000__0000001C760FA190\n000000067F0000400200008A59000005C000-000000067F0000400200008A590000060000__00000038E67ABFA0\n000000067F0000400200008A59000005C000-000000067F0000400200008A590000060000__0000003903F1CFE8\n000000067F0000400200008A59000005C000-000000067F0000400200008A590000060000__0000003B99F7F8A0\n000000067F0000400200008A59000005C000-000000067F0000400200008A590000060000__0000005D2FFFFB38\n000000067F0000400200008A59000005C000-000000067F0000400200008A590000060000__00000073AD3FE6B8\n000000067F0000400200008A59000005C000-000000067F0000400200008A590000060000__000000914E3F38F0\n000000067F0000400200008A59000005C000-000000067F0000400200008A590000060000__000000931B33AE68\n000000067F0000400200008A59000005C000-000000067F0000400200008A590000060000__000000931B9AFDF8\n000000067F0000400200008A59000005E61C-000000067F0000400200008A590000066FFD__000000007119E789-0000000120C1DDF9\n000000067F0000400200008A590000060000-000000067F0000400200008A590000064000__0000001C760FA190\n000000067F0000400200008A590000060000-000000067F0000400200008A590000064000__00000038E67ABFA0\n000000067F0000400200008A590000060000-000000067F0000400200008A590000064000__0000003903F1CFE8\n000000067F0000400200008A590000060000-000000067F0000400200008A590000064000__0000003B99F7F8A0\n000000067F0000400200008A590000060000-000000067F0000400200008A590000064000__0000005D2FFFFB38\n000000067F0000400200008A590000060000-000000067F0000400200008A590000064000__00000073AD3FE6B8\n000000067F0000400200008A590000060000-000000067F0000400200008A590000064000__000000914E3F38F0\n000000067F0000400200008A590000060000-000000067F0000400200008A590000064000__000000931B33AE68\n000000067F0000400200008A590000060000-000000067F0000400200008A590000064000__000000931B9AFDF8\n000000067F0000400200008A590000064000-000000067F0000400200008A590000068000__0000001C760FA190\n000000067F0000400200008A590000064000-000000067F0000400200008A590000068000__00000038E67ABFA0\n000000067F0000400200008A590000064000-000000067F0000400200008A590000068000__0000003903F1CFE8\n000000067F0000400200008A590000064000-000000067F0000400200008A590000068000__0000003B99F7F8A0\n000000067F0000400200008A590000064000-000000067F0000400200008A590000068000__0000005D2FFFFB38\n000000067F0000400200008A590000064000-000000067F0000400200008A590000068000__00000073AD3FE6B8\n000000067F0000400200008A590000064000-000000067F0000400200008A590000068000__000000914E3F38F0\n000000067F0000400200008A590000064000-000000067F0000400200008A590000068000__000000931B33AE68\n000000067F0000400200008A590000064000-000000067F0000400200008A590000068000__000000931B9AFDF8\n000000067F0000400200008A590000066FFD-000000067F0000400200008A59000006F9CB__000000007119E789-0000000120C1DDF9\n000000067F0000400200008A590000068000-000000067F0000400200008A59000006C000__0000001C760FA190\n000000067F0000400200008A590000068000-000000067F0000400200008A59000006C000__00000038E67ABFA0\n000000067F0000400200008A590000068000-000000067F0000400200008A59000006C000__0000003903F1CFE8\n000000067F0000400200008A590000068000-000000067F0000400200008A59000006C000__0000003B99F7F8A0\n000000067F0000400200008A590000068000-000000067F0000400200008A59000006C000__0000005D2FFFFB38\n000000067F0000400200008A590000068000-000000067F0000400200008A59000006C000__00000073AD3FE6B8\n000000067F0000400200008A590000068000-000000067F0000400200008A59000006C000__000000914E3F38F0\n000000067F0000400200008A590000068000-000000067F0000400200008A59000006C000__000000931B33AE68\n000000067F0000400200008A590000068000-000000067F0000400200008A59000006C000__000000931B9AFDF8\n000000067F0000400200008A59000006C000-000000067F0000400200008A590000070000__0000001C760FA190\n000000067F0000400200008A59000006C000-000000067F0000400200008A590000070000__00000038E67ABFA0\n000000067F0000400200008A59000006C000-000000067F0000400200008A590000070000__0000003903F1CFE8\n000000067F0000400200008A59000006C000-000000067F0000400200008A590000070000__0000003B99F7F8A0\n000000067F0000400200008A59000006C000-000000067F0000400200008A590000070000__0000005D2FFFFB38\n000000067F0000400200008A59000006C000-000000067F0000400200008A590000070000__00000073AD3FE6B8\n000000067F0000400200008A59000006C000-000000067F0000400200008A590000070000__000000914E3F38F0\n000000067F0000400200008A59000006C000-000000067F0000400200008A590000070000__000000931B33AE68\n000000067F0000400200008A59000006C000-000000067F0000400200008A590000070000__000000931B9AFDF8\n000000067F0000400200008A59000006F9CB-000000067F0000400200008A590000078388__000000007119E789-0000000120C1DDF9\n000000067F0000400200008A590000070000-000000067F0000400200008A590000074000__0000001C760FA190\n000000067F0000400200008A590000070000-000000067F0000400200008A590000074000__00000038E67ABFA0\n000000067F0000400200008A590000070000-000000067F0000400200008A590000074000__0000003903F1CFE8\n000000067F0000400200008A590000070000-000000067F0000400200008A590000074000__0000003B99F7F8A0\n000000067F0000400200008A590000070000-000000067F0000400200008A590000074000__0000005D2FFFFB38\n000000067F0000400200008A590000070000-000000067F0000400200008A590000074000__00000073AD3FE6B8\n000000067F0000400200008A590000070000-000000067F0000400200008A590000074000__000000914E3F38F0\n000000067F0000400200008A590000070000-000000067F0000400200008A590000074000__000000931B33AE68\n000000067F0000400200008A590000070000-000000067F0000400200008A590000074000__000000931B9AFDF8\n000000067F0000400200008A590000074000-000000067F0000400200008A590000078000__0000001C760FA190\n000000067F0000400200008A590000074000-000000067F0000400200008A590000078000__00000038E67ABFA0\n000000067F0000400200008A590000074000-000000067F0000400200008A590000078000__0000003903F1CFE8\n000000067F0000400200008A590000074000-000000067F0000400200008A590000078000__0000003B99F7F8A0\n000000067F0000400200008A590000074000-000000067F0000400200008A590000078000__0000005D2FFFFB38\n000000067F0000400200008A590000074000-000000067F0000400200008A590000078000__00000073AD3FE6B8\n000000067F0000400200008A590000074000-000000067F0000400200008A590000078000__000000914E3F38F0\n000000067F0000400200008A590000074000-000000067F0000400200008A590000078000__000000931B33AE68\n000000067F0000400200008A590000074000-000000067F0000400200008A590000078000__000000931B9AFDF8\n000000067F0000400200008A590000078000-000000067F0000400200008A59000007C000__0000001C760FA190\n000000067F0000400200008A590000078000-000000067F0000400200008A59000007C000__00000038E67ABFA0\n000000067F0000400200008A590000078000-000000067F0000400200008A59000007C000__0000003903F1CFE8\n000000067F0000400200008A590000078000-000000067F0000400200008A59000007C000__0000003B99F7F8A0\n000000067F0000400200008A590000078000-000000067F0000400200008A59000007C000__0000005D2FFFFB38\n000000067F0000400200008A590000078000-000000067F0000400200008A59000007C000__00000073AD3FE6B8\n000000067F0000400200008A590000078000-000000067F0000400200008A59000007C000__000000914E3F38F0\n000000067F0000400200008A590000078000-000000067F0000400200008A59000007C000__000000931B33AE68\n000000067F0000400200008A590000078000-000000067F0000400200008A59000007C000__000000931B9AFDF8\n000000067F0000400200008A590000078388-000000067F0000400200008A590000080D43__000000007119E789-0000000120C1DDF9\n000000067F0000400200008A59000007C000-000000067F0000400200008A590000080000__0000001C760FA190\n000000067F0000400200008A59000007C000-000000067F0000400200008A590000080000__00000038E67ABFA0\n000000067F0000400200008A59000007C000-000000067F0000400200008A590000080000__0000003903F1CFE8\n000000067F0000400200008A59000007C000-000000067F0000400200008A590000080000__0000003B99F7F8A0\n000000067F0000400200008A59000007C000-000000067F0000400200008A590000080000__0000005D2FFFFB38\n000000067F0000400200008A59000007C000-000000067F0000400200008A590000080000__00000073AD3FE6B8\n000000067F0000400200008A59000007C000-000000067F0000400200008A590000080000__000000914E3F38F0\n000000067F0000400200008A59000007C000-000000067F0000400200008A590000080000__000000931B33AE68\n000000067F0000400200008A59000007C000-000000067F0000400200008A590000080000__000000931B9AFDF8\n000000067F0000400200008A590000080000-000000067F0000400200008A590000084000__0000001C760FA190\n000000067F0000400200008A590000080000-000000067F0000400200008A590000084000__00000038E67ABFA0\n000000067F0000400200008A590000080000-000000067F0000400200008A590000084000__0000003903F1CFE8\n000000067F0000400200008A590000080000-000000067F0000400200008A590000084000__0000003B99F7F8A0\n000000067F0000400200008A590000080000-000000067F0000400200008A590000084000__0000005D2FFFFB38\n000000067F0000400200008A590000080000-000000067F0000400200008A590000084000__00000073AD3FE6B8\n000000067F0000400200008A590000080000-000000067F0000400200008A590000084000__000000914E3F38F0\n000000067F0000400200008A590000080000-000000067F0000400200008A590000084000__000000931B33AE68\n000000067F0000400200008A590000080000-000000067F0000400200008A590000084000__000000931B9AFDF8\n000000067F0000400200008A590000080D43-000000067F0000400200008A590000089730__000000007119E789-0000000120C1DDF9\n000000067F0000400200008A590000084000-000000067F0000400200008A590000088000__0000001C760FA190\n000000067F0000400200008A590000084000-000000067F0000400200008A590000088000__00000038E67ABFA0\n000000067F0000400200008A590000084000-000000067F0000400200008A590000088000__0000003903F1CFE8\n000000067F0000400200008A590000084000-000000067F0000400200008A590000088000__0000003B99F7F8A0\n000000067F0000400200008A590000084000-000000067F0000400200008A590000088000__0000005D2FFFFB38\n000000067F0000400200008A590000084000-000000067F0000400200008A590000088000__00000073AD3FE6B8\n000000067F0000400200008A590000084000-000000067F0000400200008A590000088000__000000914E3F38F0\n000000067F0000400200008A590000084000-000000067F0000400200008A590000088000__000000931B33AE68\n000000067F0000400200008A590000084000-000000067F0000400200008A590000088000__000000931B9AFDF8\n000000067F0000400200008A590000088000-000000067F0000400200008A59000008C000__0000001C760FA190\n000000067F0000400200008A590000088000-000000067F0000400200008A59000008C000__00000038E67ABFA0\n000000067F0000400200008A590000088000-000000067F0000400200008A59000008C000__0000003903F1CFE8\n000000067F0000400200008A590000088000-000000067F0000400200008A59000008C000__0000003B99F7F8A0\n000000067F0000400200008A590000088000-000000067F0000400200008A59000008C000__0000005D2FFFFB38\n000000067F0000400200008A590000088000-000000067F0000400200008A59000008C000__00000073AD3FE6B8\n000000067F0000400200008A590000088000-000000067F0000400200008A59000008C000__000000914E3F38F0\n000000067F0000400200008A590000088000-000000067F0000400200008A59000008C000__000000931B33AE68\n000000067F0000400200008A590000088000-000000067F0000400200008A59000008C000__000000931B9AFDF8\n000000067F0000400200008A590000089730-000000067F0000400200008A590000092129__000000007119E789-0000000120C1DDF9\n000000067F0000400200008A59000008C000-000000067F0000400200008A590000090000__0000001C760FA190\n000000067F0000400200008A59000008C000-000000067F0000400200008A590000090000__00000038E67ABFA0\n000000067F0000400200008A59000008C000-000000067F0000400200008A590000090000__0000003903F1CFE8\n000000067F0000400200008A59000008C000-000000067F0000400200008A590000090000__0000003B99F7F8A0\n000000067F0000400200008A59000008C000-000000067F0000400200008A590000090000__0000005D2FFFFB38\n000000067F0000400200008A59000008C000-000000067F0000400200008A590000090000__00000073AD3FE6B8\n000000067F0000400200008A59000008C000-000000067F0000400200008A590000090000__000000914E3F38F0\n000000067F0000400200008A59000008C000-000000067F0000400200008A590000090000__000000931B33AE68\n000000067F0000400200008A59000008C000-000000067F0000400200008A590000090000__000000931B9AFDF8\n000000067F0000400200008A590000090000-000000067F0000400200008A590000094000__0000001C760FA190\n000000067F0000400200008A590000090000-000000067F0000400200008A590000094000__00000038E67ABFA0\n000000067F0000400200008A590000090000-000000067F0000400200008A590000094000__0000003903F1CFE8\n000000067F0000400200008A590000090000-000000067F0000400200008A590000094000__0000003B99F7F8A0\n000000067F0000400200008A590000090000-000000067F0000400200008A590000094000__0000005D2FFFFB38\n000000067F0000400200008A590000090000-000000067F0000400200008A590000094000__00000073AD3FE6B8\n000000067F0000400200008A590000090000-000000067F0000400200008A590000094000__000000914E3F38F0\n000000067F0000400200008A590000090000-000000067F0000400200008A590000094000__000000931B33AE68\n000000067F0000400200008A590000090000-000000067F0000400200008A590000094000__000000931B9AFDF8\n000000067F0000400200008A590000092129-000000067F0000400200008A59000009AB12__000000007119E789-0000000120C1DDF9\n000000067F0000400200008A590000094000-000000067F0000400200008A590000098000__0000001C760FA190\n000000067F0000400200008A590000094000-000000067F0000400200008A590000098000__00000038E67ABFA0\n000000067F0000400200008A590000094000-000000067F0000400200008A590000098000__0000003903F1CFE8\n000000067F0000400200008A590000094000-000000067F0000400200008A590000098000__0000003B99F7F8A0\n000000067F0000400200008A590000094000-000000067F0000400200008A590000098000__0000005D2FFFFB38\n000000067F0000400200008A590000094000-000000067F0000400200008A590000098000__00000073AD3FE6B8\n000000067F0000400200008A590000094000-000000067F0000400200008A590000098000__000000914E3F38F0\n000000067F0000400200008A590000094000-000000067F0000400200008A590000098000__000000931B33AE68\n000000067F0000400200008A590000094000-000000067F0000400200008A590000098000__000000931B9AFDF8\n000000067F0000400200008A590000098000-000000067F0000400200008A59000009C000__0000001C725A2400\n000000067F0000400200008A590000098000-000000067F0000400200008A59000009C000__0000001C760FA190\n000000067F0000400200008A590000098000-000000067F0000400200008A59000009C000__00000038E67ABFA0\n000000067F0000400200008A590000098000-000000067F0000400200008A59000009C000__0000003903F1CFE8\n000000067F0000400200008A590000098000-000000067F0000400200008A59000009C000__0000003B99F7F8A0\n000000067F0000400200008A590000098000-000000067F0000400200008A59000009C000__0000005D2FFFFB38\n000000067F0000400200008A590000098000-000000067F0000400200008A59000009C000__00000073AD3FE6B8\n000000067F0000400200008A590000098000-000000067F0000400200008A59000009C000__000000914E3F38F0\n000000067F0000400200008A590000098000-000000067F0000400200008A59000009C000__000000931B33AE68\n000000067F0000400200008A590000098000-000000067F0000400200008A59000009C000__000000931B9AFDF8\n000000067F0000400200008A59000009AB12-000000067F0000400200008A590100000000__000000007119E789-0000000120C1DDF9\n000000067F0000400200008A59000009AE54-000000067F0000400200008A5900000A3836__0000000120C1DDF9-00000001C071E001\n000000067F0000400200008A59000009C000-000000067F0000400200008A5900000A0000__0000001C725A2400\n000000067F0000400200008A59000009C000-000000067F0000400200008A5900000A0000__0000001C760FA190\n000000067F0000400200008A59000009C000-000000067F0000400200008A5900000A0000__00000038E67ABFA0\n000000067F0000400200008A59000009C000-000000067F0000400200008A5900000A0000__0000003903F1CFE8\n000000067F0000400200008A59000009C000-000000067F0000400200008A5900000A0000__0000003B99F7F8A0\n000000067F0000400200008A59000009C000-000000067F0000400200008A5900000A0000__0000005D2FFFFB38\n000000067F0000400200008A59000009C000-000000067F0000400200008A5900000A0000__00000073AD3FE6B8\n000000067F0000400200008A59000009C000-000000067F0000400200008A5900000A0000__000000914E3F38F0\n000000067F0000400200008A59000009C000-000000067F0000400200008A5900000A0000__000000931B33AE68\n000000067F0000400200008A59000009C000-000000067F0000400200008A5900000A0000__000000931B9AFDF8\n000000067F0000400200008A5900000A0000-000000067F0000400200008A5900000A4000__0000001C725A2400\n000000067F0000400200008A5900000A0000-000000067F0000400200008A5900000A4000__0000001C760FA190\n000000067F0000400200008A5900000A0000-000000067F0000400200008A5900000A4000__00000038E67ABFA0\n000000067F0000400200008A5900000A0000-000000067F0000400200008A5900000A4000__0000003903F1CFE8\n000000067F0000400200008A5900000A0000-000000067F0000400200008A5900000A4000__0000003B99F7F8A0\n000000067F0000400200008A5900000A0000-000000067F0000400200008A5900000A4000__0000005D2FFFFB38\n000000067F0000400200008A5900000A0000-000000067F0000400200008A5900000A4000__00000073AD3FE6B8\n000000067F0000400200008A5900000A0000-000000067F0000400200008A5900000A4000__000000914E3F38F0\n000000067F0000400200008A5900000A0000-000000067F0000400200008A5900000A4000__000000931B33AE68\n000000067F0000400200008A5900000A0000-000000067F0000400200008A5900000A4000__000000931B9AFDF8\n000000067F0000400200008A5900000A3836-000000067F0000400200008A5900000AC1F4__0000000120C1DDF9-00000001C071E001\n000000067F0000400200008A5900000A4000-000000067F0000400200008A5900000A8000__0000001C725A2400\n000000067F0000400200008A5900000A4000-000000067F0000400200008A5900000A8000__0000001C760FA190\n000000067F0000400200008A5900000A4000-000000067F0000400200008A5900000A8000__00000038E67ABFA0\n000000067F0000400200008A5900000A4000-000000067F0000400200008A5900000A8000__0000003903F1CFE8\n000000067F0000400200008A5900000A4000-000000067F0000400200008A5900000A8000__0000003B99F7F8A0\n000000067F0000400200008A5900000A4000-000000067F0000400200008A5900000A8000__0000005D2FFFFB38\n000000067F0000400200008A5900000A4000-000000067F0000400200008A5900000A8000__00000073AD3FE6B8\n000000067F0000400200008A5900000A4000-000000067F0000400200008A5900000A8000__000000914E3F38F0\n000000067F0000400200008A5900000A4000-000000067F0000400200008A5900000A8000__000000931B33AE68\n000000067F0000400200008A5900000A4000-000000067F0000400200008A5900000A8000__000000931B9AFDF8\n000000067F0000400200008A5900000A8000-000000067F0000400200008A5900000AC000__0000001C725A2400\n000000067F0000400200008A5900000A8000-000000067F0000400200008A5900000AC000__0000001C760FA190\n000000067F0000400200008A5900000A8000-000000067F0000400200008A5900000AC000__00000038E67ABFA0\n000000067F0000400200008A5900000A8000-000000067F0000400200008A5900000AC000__0000003903F1CFE8\n000000067F0000400200008A5900000A8000-000000067F0000400200008A5900000AC000__0000003B99F7F8A0\n000000067F0000400200008A5900000A8000-000000067F0000400200008A5900000AC000__0000005D2FFFFB38\n000000067F0000400200008A5900000A8000-000000067F0000400200008A5900000AC000__00000073AD3FE6B8\n000000067F0000400200008A5900000A8000-000000067F0000400200008A5900000AC000__000000914E3F38F0\n000000067F0000400200008A5900000A8000-000000067F0000400200008A5900000AC000__000000931B33AE68\n000000067F0000400200008A5900000A8000-000000067F0000400200008A5900000AC000__000000931B9AFDF8\n000000067F0000400200008A5900000AC000-000000067F0000400200008A5900000B0000__0000001C725A2400\n000000067F0000400200008A5900000AC000-000000067F0000400200008A5900000B0000__0000001C760FA190\n000000067F0000400200008A5900000AC000-000000067F0000400200008A5900000B0000__00000038E67ABFA0\n000000067F0000400200008A5900000AC000-000000067F0000400200008A5900000B0000__0000003903F1CFE8\n000000067F0000400200008A5900000AC000-000000067F0000400200008A5900000B0000__0000003B99F7F8A0\n000000067F0000400200008A5900000AC000-000000067F0000400200008A5900000B0000__0000005D2FFFFB38\n000000067F0000400200008A5900000AC000-000000067F0000400200008A5900000B0000__00000073AD3FE6B8\n000000067F0000400200008A5900000AC000-000000067F0000400200008A5900000B0000__000000914E3F38F0\n000000067F0000400200008A5900000AC000-000000067F0000400200008A5900000B0000__000000931B33AE68\n000000067F0000400200008A5900000AC000-000000067F0000400200008A5900000B0000__000000931B9AFDF8\n000000067F0000400200008A5900000AC1F4-000000067F0000400200008A5900000B4BC0__0000000120C1DDF9-00000001C071E001\n000000067F0000400200008A5900000B0000-000000067F0000400200008A5900000B4000__0000001C725A2400\n000000067F0000400200008A5900000B0000-000000067F0000400200008A5900000B4000__0000001C760FA190\n000000067F0000400200008A5900000B0000-000000067F0000400200008A5900000B4000__00000038E67ABFA0\n000000067F0000400200008A5900000B0000-000000067F0000400200008A5900000B4000__0000003903F1CFE8\n000000067F0000400200008A5900000B0000-000000067F0000400200008A5900000B4000__0000003B99F7F8A0\n000000067F0000400200008A5900000B0000-000000067F0000400200008A5900000B4000__0000005D2FFFFB38\n000000067F0000400200008A5900000B0000-000000067F0000400200008A5900000B4000__00000073AD3FE6B8\n000000067F0000400200008A5900000B0000-000000067F0000400200008A5900000B4000__000000914E3F38F0\n000000067F0000400200008A5900000B0000-000000067F0000400200008A5900000B4000__000000931B33AE68\n000000067F0000400200008A5900000B0000-000000067F0000400200008A5900000B4000__000000931B9AFDF8\n000000067F0000400200008A5900000B4000-000000067F0000400200008A5900000B8000__0000001C725A2400\n000000067F0000400200008A5900000B4000-000000067F0000400200008A5900000B8000__0000001C760FA190\n000000067F0000400200008A5900000B4000-000000067F0000400200008A5900000B8000__00000038E67ABFA0\n000000067F0000400200008A5900000B4000-000000067F0000400200008A5900000B8000__0000003903F1CFE8\n000000067F0000400200008A5900000B4000-000000067F0000400200008A5900000B8000__0000003B99F7F8A0\n000000067F0000400200008A5900000B4000-000000067F0000400200008A5900000B8000__0000005D2FFFFB38\n000000067F0000400200008A5900000B4000-000000067F0000400200008A5900000B8000__00000073AD3FE6B8\n000000067F0000400200008A5900000B4000-000000067F0000400200008A5900000B8000__000000914E3F38F0\n000000067F0000400200008A5900000B4000-000000067F0000400200008A5900000B8000__000000931B33AE68\n000000067F0000400200008A5900000B4000-000000067F0000400200008A5900000B8000__000000931B9AFDF8\n000000067F0000400200008A5900000B4BC0-000000067F0000400200008A5900000BD58B__0000000120C1DDF9-00000001C071E001\n000000067F0000400200008A5900000B8000-000000067F0000400200008A5900000BC000__0000001C725A2400\n000000067F0000400200008A5900000B8000-000000067F0000400200008A5900000BC000__0000001C760FA190\n000000067F0000400200008A5900000B8000-000000067F0000400200008A5900000BC000__00000038E67ABFA0\n000000067F0000400200008A5900000B8000-000000067F0000400200008A5900000BC000__0000003903F1CFE8\n000000067F0000400200008A5900000B8000-000000067F0000400200008A5900000BC000__0000003B99F7F8A0\n000000067F0000400200008A5900000B8000-000000067F0000400200008A5900000BC000__0000005D2FFFFB38\n000000067F0000400200008A5900000B8000-000000067F0000400200008A5900000BC000__00000073AD3FE6B8\n000000067F0000400200008A5900000B8000-000000067F0000400200008A5900000BC000__000000914E3F38F0\n000000067F0000400200008A5900000B8000-000000067F0000400200008A5900000BC000__000000931B33AE68\n000000067F0000400200008A5900000B8000-000000067F0000400200008A5900000BC000__000000931B9AFDF8\n000000067F0000400200008A5900000BC000-000000067F0000400200008A5900000C0000__0000001C725A2400\n000000067F0000400200008A5900000BC000-000000067F0000400200008A5900000C0000__0000001C760FA190\n000000067F0000400200008A5900000BC000-000000067F0000400200008A5900000C0000__00000038E67ABFA0\n000000067F0000400200008A5900000BC000-000000067F0000400200008A5900000C0000__0000003903F1CFE8\n000000067F0000400200008A5900000BC000-000000067F0000400200008A5900000C0000__0000003B99F7F8A0\n000000067F0000400200008A5900000BC000-000000067F0000400200008A5900000C0000__0000005D2FFFFB38\n000000067F0000400200008A5900000BC000-000000067F0000400200008A5900000C0000__00000073AD3FE6B8\n000000067F0000400200008A5900000BC000-000000067F0000400200008A5900000C0000__000000914E3F38F0\n000000067F0000400200008A5900000BC000-000000067F0000400200008A5900000C0000__000000931B33AE68\n000000067F0000400200008A5900000BC000-000000067F0000400200008A5900000C0000__000000931B9AFDF8\n000000067F0000400200008A5900000BD58B-000000067F0000400200008A5900000C5F89__0000000120C1DDF9-00000001C071E001\n000000067F0000400200008A5900000C0000-000000067F0000400200008A5900000C4000__0000001C725A2400\n000000067F0000400200008A5900000C0000-000000067F0000400200008A5900000C4000__0000001C760FA190\n000000067F0000400200008A5900000C0000-000000067F0000400200008A5900000C4000__00000038E67ABFA0\n000000067F0000400200008A5900000C0000-000000067F0000400200008A5900000C4000__0000003903F1CFE8\n000000067F0000400200008A5900000C0000-000000067F0000400200008A5900000C4000__0000003B99F7F8A0\n000000067F0000400200008A5900000C0000-000000067F0000400200008A5900000C4000__0000005D2FFFFB38\n000000067F0000400200008A5900000C0000-000000067F0000400200008A5900000C4000__00000073AD3FE6B8\n000000067F0000400200008A5900000C0000-000000067F0000400200008A5900000C4000__000000914E3F38F0\n000000067F0000400200008A5900000C0000-000000067F0000400200008A5900000C4000__000000931B33AE68\n000000067F0000400200008A5900000C0000-000000067F0000400200008A5900000C4000__000000931B9AFDF8\n000000067F0000400200008A5900000C4000-000000067F0000400200008A5900000C8000__0000001C725A2400\n000000067F0000400200008A5900000C4000-000000067F0000400200008A5900000C8000__0000001C760FA190\n000000067F0000400200008A5900000C4000-000000067F0000400200008A5900000C8000__00000038E67ABFA0\n000000067F0000400200008A5900000C4000-000000067F0000400200008A5900000C8000__0000003903F1CFE8\n000000067F0000400200008A5900000C4000-000000067F0000400200008A5900000C8000__0000003B99F7F8A0\n000000067F0000400200008A5900000C4000-000000067F0000400200008A5900000C8000__0000005D2FFFFB38\n000000067F0000400200008A5900000C4000-000000067F0000400200008A5900000C8000__00000073AD3FE6B8\n000000067F0000400200008A5900000C4000-000000067F0000400200008A5900000C8000__000000914E3F38F0\n000000067F0000400200008A5900000C4000-000000067F0000400200008A5900000C8000__000000931B33AE68\n000000067F0000400200008A5900000C4000-000000067F0000400200008A5900000C8000__000000931B9AFDF8\n000000067F0000400200008A5900000C5F89-000000067F0000400200008A5900000CE983__0000000120C1DDF9-00000001C071E001\n000000067F0000400200008A5900000C8000-000000067F0000400200008A5900000CC000__0000001C725A2400\n000000067F0000400200008A5900000C8000-000000067F0000400200008A5900000CC000__0000001C760FA190\n000000067F0000400200008A5900000C8000-000000067F0000400200008A5900000CC000__00000038E67ABFA0\n000000067F0000400200008A5900000C8000-000000067F0000400200008A5900000CC000__0000003903F1CFE8\n000000067F0000400200008A5900000C8000-000000067F0000400200008A5900000CC000__0000003B99F7F8A0\n000000067F0000400200008A5900000C8000-000000067F0000400200008A5900000CC000__0000005D2FFFFB38\n000000067F0000400200008A5900000C8000-000000067F0000400200008A5900000CC000__00000073AD3FE6B8\n000000067F0000400200008A5900000C8000-000000067F0000400200008A5900000CC000__000000914E3F38F0\n000000067F0000400200008A5900000C8000-000000067F0000400200008A5900000CC000__000000931B33AE68\n000000067F0000400200008A5900000C8000-000000067F0000400200008A5900000CC000__000000931B9AFDF8\n000000067F0000400200008A5900000CC000-000000067F0000400200008A5900000D0000__0000001C725A2400\n000000067F0000400200008A5900000CC000-000000067F0000400200008A5900000D0000__0000001C760FA190\n000000067F0000400200008A5900000CC000-000000067F0000400200008A5900000D0000__00000038E67ABFA0\n000000067F0000400200008A5900000CC000-000000067F0000400200008A5900000D0000__0000003903F1CFE8\n000000067F0000400200008A5900000CC000-000000067F0000400200008A5900000D0000__0000003B99F7F8A0\n000000067F0000400200008A5900000CC000-000000067F0000400200008A5900000D0000__0000005D2FFFFB38\n000000067F0000400200008A5900000CC000-000000067F0000400200008A5900000D0000__00000073AD3FE6B8\n000000067F0000400200008A5900000CC000-000000067F0000400200008A5900000D0000__000000914E3F38F0\n000000067F0000400200008A5900000CC000-000000067F0000400200008A5900000D0000__000000931B33AE68\n000000067F0000400200008A5900000CC000-000000067F0000400200008A5900000D0000__000000931B9AFDF8\n000000067F0000400200008A5900000CE983-000000067F0000400200008A5900000D736F__0000000120C1DDF9-00000001C071E001\n000000067F0000400200008A5900000D0000-000000067F0000400200008A5900000D4000__0000001C725A2400\n000000067F0000400200008A5900000D0000-000000067F0000400200008A5900000D4000__0000001C760FA190\n000000067F0000400200008A5900000D0000-000000067F0000400200008A5900000D4000__00000038E67ABFA0\n000000067F0000400200008A5900000D0000-000000067F0000400200008A5900000D4000__0000003903F1CFE8\n000000067F0000400200008A5900000D0000-000000067F0000400200008A5900000D4000__0000003B99F7F8A0\n000000067F0000400200008A5900000D0000-000000067F0000400200008A5900000D4000__0000005D2FFFFB38\n000000067F0000400200008A5900000D0000-000000067F0000400200008A5900000D4000__00000073AD3FE6B8\n000000067F0000400200008A5900000D0000-000000067F0000400200008A5900000D4000__000000914E3F38F0\n000000067F0000400200008A5900000D0000-000000067F0000400200008A5900000D4000__000000931B33AE68\n000000067F0000400200008A5900000D0000-000000067F0000400200008A5900000D4000__000000931B9AFDF8\n000000067F0000400200008A5900000D4000-000000067F0000400200008A5900000D8000__0000001C725A2400\n000000067F0000400200008A5900000D4000-000000067F0000400200008A5900000D8000__0000001C760FA190\n000000067F0000400200008A5900000D4000-000000067F0000400200008A5900000D8000__00000038E67ABFA0\n000000067F0000400200008A5900000D4000-000000067F0000400200008A5900000D8000__0000003903F1CFE8\n000000067F0000400200008A5900000D4000-000000067F0000400200008A5900000D8000__0000003B99F7F8A0\n000000067F0000400200008A5900000D4000-000000067F0000400200008A5900000D8000__0000005D2FFFFB38\n000000067F0000400200008A5900000D4000-000000067F0000400200008A5900000D8000__00000073AD3FE6B8\n000000067F0000400200008A5900000D4000-000000067F0000400200008A5900000D8000__000000914E3F38F0\n000000067F0000400200008A5900000D4000-000000067F0000400200008A5900000D8000__000000931B33AE68\n000000067F0000400200008A5900000D4000-000000067F0000400200008A5900000D8000__000000931B9AFDF8\n000000067F0000400200008A5900000D736F-000000067F0000400200008A5900000DFD47__0000000120C1DDF9-00000001C071E001\n000000067F0000400200008A5900000D8000-000000067F0000400200008A5900000DC000__0000001C725A2400\n000000067F0000400200008A5900000D8000-000000067F0000400200008A5900000DC000__0000001C760FA190\n000000067F0000400200008A5900000D8000-000000067F0000400200008A5900000DC000__00000038E67ABFA0\n000000067F0000400200008A5900000D8000-000000067F0000400200008A5900000DC000__0000003903F1CFE8\n000000067F0000400200008A5900000D8000-000000067F0000400200008A5900000DC000__0000003B99F7F8A0\n000000067F0000400200008A5900000D8000-000000067F0000400200008A5900000DC000__0000005D2FFFFB38\n000000067F0000400200008A5900000D8000-000000067F0000400200008A5900000DC000__00000073AD3FE6B8\n000000067F0000400200008A5900000D8000-000000067F0000400200008A5900000DC000__000000914E3F38F0\n000000067F0000400200008A5900000D8000-000000067F0000400200008A5900000DC000__000000931B33AE68\n000000067F0000400200008A5900000D8000-000000067F0000400200008A5900000DC000__000000931B9AFDF8\n000000067F0000400200008A5900000DC000-000000067F0000400200008A5900000E0000__0000001C725A2400\n000000067F0000400200008A5900000DC000-000000067F0000400200008A5900000E0000__0000001C760FA190\n000000067F0000400200008A5900000DC000-000000067F0000400200008A5900000E0000__00000038E67ABFA0\n000000067F0000400200008A5900000DC000-000000067F0000400200008A5900000E0000__0000003903F1CFE8\n000000067F0000400200008A5900000DC000-000000067F0000400200008A5900000E0000__0000003B99F7F8A0\n000000067F0000400200008A5900000DC000-000000067F0000400200008A5900000E0000__0000005D2FFFFB38\n000000067F0000400200008A5900000DC000-000000067F0000400200008A5900000E0000__00000073AD3FE6B8\n000000067F0000400200008A5900000DC000-000000067F0000400200008A5900000E0000__000000914E3F38F0\n000000067F0000400200008A5900000DC000-000000067F0000400200008A5900000E0000__000000931B33AE68\n000000067F0000400200008A5900000DC000-000000067F0000400200008A5900000E0000__000000931B9AFDF8\n000000067F0000400200008A5900000DFD47-000000067F0000400200008A5900000E870D__0000000120C1DDF9-00000001C071E001\n000000067F0000400200008A5900000E0000-000000067F0000400200008A5900000E4000__0000001C725A2400\n000000067F0000400200008A5900000E0000-000000067F0000400200008A5900000E4000__0000001C760FA190\n000000067F0000400200008A5900000E0000-000000067F0000400200008A5900000E4000__00000038E67ABFA0\n000000067F0000400200008A5900000E0000-000000067F0000400200008A5900000E4000__0000003903F1CFE8\n000000067F0000400200008A5900000E0000-000000067F0000400200008A5900000E4000__0000003B99F7F8A0\n000000067F0000400200008A5900000E0000-000000067F0000400200008A5900000E4000__0000005D2FFFFB38\n000000067F0000400200008A5900000E0000-000000067F0000400200008A5900000E4000__00000073AD3FE6B8\n000000067F0000400200008A5900000E0000-000000067F0000400200008A5900000E4000__000000914E3F38F0\n000000067F0000400200008A5900000E0000-000000067F0000400200008A5900000E4000__000000931B33AE68\n000000067F0000400200008A5900000E0000-000000067F0000400200008A5900000E4000__000000931B9AFDF8\n000000067F0000400200008A5900000E4000-000000067F0000400200008A5900000E8000__0000001C725A2400\n000000067F0000400200008A5900000E4000-000000067F0000400200008A5900000E8000__0000001C760FA190\n000000067F0000400200008A5900000E4000-000000067F0000400200008A5900000E8000__00000038E67ABFA0\n000000067F0000400200008A5900000E4000-000000067F0000400200008A5900000E8000__0000003903F1CFE8\n000000067F0000400200008A5900000E4000-000000067F0000400200008A5900000E8000__0000003B99F7F8A0\n000000067F0000400200008A5900000E4000-000000067F0000400200008A5900000E8000__0000005D2FFFFB38\n000000067F0000400200008A5900000E4000-000000067F0000400200008A5900000E8000__00000073AD3FE6B8\n000000067F0000400200008A5900000E4000-000000067F0000400200008A5900000E8000__000000914E3F38F0\n000000067F0000400200008A5900000E4000-000000067F0000400200008A5900000E8000__000000931B33AE68\n000000067F0000400200008A5900000E4000-000000067F0000400200008A5900000E8000__000000931B9AFDF8\n000000067F0000400200008A5900000E8000-000000067F0000400200008A5900000EC000__0000001C725A2400\n000000067F0000400200008A5900000E8000-000000067F0000400200008A5900000EC000__0000001C760FA190\n000000067F0000400200008A5900000E8000-000000067F0000400200008A5900000EC000__00000038E67ABFA0\n000000067F0000400200008A5900000E8000-000000067F0000400200008A5900000EC000__0000003903F1CFE8\n000000067F0000400200008A5900000E8000-000000067F0000400200008A5900000EC000__0000003B99F7F8A0\n000000067F0000400200008A5900000E8000-000000067F0000400200008A5900000EC000__0000005D2FFFFB38\n000000067F0000400200008A5900000E8000-000000067F0000400200008A5900000EC000__00000073AD3FE6B8\n000000067F0000400200008A5900000E8000-000000067F0000400200008A5900000EC000__000000914E3F38F0\n000000067F0000400200008A5900000E8000-000000067F0000400200008A5900000EC000__000000931B33AE68\n000000067F0000400200008A5900000E8000-000000067F0000400200008A5900000EC000__000000931B9AFDF8\n000000067F0000400200008A5900000E870D-000000067F0000400200008A5900000F10C9__0000000120C1DDF9-00000001C071E001\n000000067F0000400200008A5900000EC000-000000067F0000400200008A5900000F0000__0000001C725A2400\n000000067F0000400200008A5900000EC000-000000067F0000400200008A5900000F0000__0000001C760FA190\n000000067F0000400200008A5900000EC000-000000067F0000400200008A5900000F0000__00000038E67ABFA0\n000000067F0000400200008A5900000EC000-000000067F0000400200008A5900000F0000__0000003903F1CFE8\n000000067F0000400200008A5900000EC000-000000067F0000400200008A5900000F0000__0000003B99F7F8A0\n000000067F0000400200008A5900000EC000-000000067F0000400200008A5900000F0000__0000005D2FFFFB38\n000000067F0000400200008A5900000EC000-000000067F0000400200008A5900000F0000__00000073AD3FE6B8\n000000067F0000400200008A5900000EC000-000000067F0000400200008A5900000F0000__000000914E3F38F0\n000000067F0000400200008A5900000EC000-000000067F0000400200008A5900000F0000__000000931B33AE68\n000000067F0000400200008A5900000EC000-000000067F0000400200008A5900000F0000__000000931B9AFDF8\n000000067F0000400200008A5900000F0000-000000067F0000400200008A5900000F4000__000000028BBFFDB8\n000000067F0000400200008A5900000F0000-000000067F0000400200008A5900000F4000__0000001C760FA190\n000000067F0000400200008A5900000F0000-000000067F0000400200008A5900000F4000__00000038E67ABFA0\n000000067F0000400200008A5900000F0000-000000067F0000400200008A5900000F4000__0000003903F1CFE8\n000000067F0000400200008A5900000F0000-000000067F0000400200008A5900000F4000__0000003B99F7F8A0\n000000067F0000400200008A5900000F0000-000000067F0000400200008A5900000F4000__0000005D2FFFFB38\n000000067F0000400200008A5900000F0000-000000067F0000400200008A5900000F4000__00000073AD3FE6B8\n000000067F0000400200008A5900000F0000-000000067F0000400200008A5900000F4000__000000914E3F38F0\n000000067F0000400200008A5900000F0000-000000067F0000400200008A5900000F4000__000000931B33AE68\n000000067F0000400200008A5900000F0000-000000067F0000400200008A5900000F4000__000000931B9AFDF8\n000000067F0000400200008A5900000F10C9-000000067F0000400200008A590100000000__0000000120C1DDF9-00000001C071E001\n000000067F0000400200008A5900000F13A6-000000067F0000400200008A5900000F9D70__00000001C071E001-000000027019FBC1\n000000067F0000400200008A5900000F4000-000000067F0000400200008A5900000F8000__000000028BBFFDB8\n000000067F0000400200008A5900000F4000-000000067F0000400200008A5900000F8000__0000001C760FA190\n000000067F0000400200008A5900000F4000-000000067F0000400200008A5900000F8000__00000038E67ABFA0\n000000067F0000400200008A5900000F4000-000000067F0000400200008A5900000F8000__0000003903F1CFE8\n000000067F0000400200008A5900000F4000-000000067F0000400200008A5900000F8000__0000003B99F7F8A0\n000000067F0000400200008A5900000F4000-000000067F0000400200008A5900000F8000__0000005D2FFFFB38\n000000067F0000400200008A5900000F4000-000000067F0000400200008A5900000F8000__00000073AD3FE6B8\n000000067F0000400200008A5900000F4000-000000067F0000400200008A5900000F8000__000000914E3F38F0\n000000067F0000400200008A5900000F4000-000000067F0000400200008A5900000F8000__000000931B33AE68\n000000067F0000400200008A5900000F4000-000000067F0000400200008A5900000F8000__000000931B9AFDF8\n000000067F0000400200008A5900000F8000-000000067F0000400200008A5900000FC000__000000028BBFFDB8\n000000067F0000400200008A5900000F8000-000000067F0000400200008A5900000FC000__0000001C760FA190\n000000067F0000400200008A5900000F8000-000000067F0000400200008A5900000FC000__00000038E67ABFA0\n000000067F0000400200008A5900000F8000-000000067F0000400200008A5900000FC000__0000003903F1CFE8\n000000067F0000400200008A5900000F8000-000000067F0000400200008A5900000FC000__0000003B99F7F8A0\n000000067F0000400200008A5900000F8000-000000067F0000400200008A5900000FC000__0000005D2FFFFB38\n000000067F0000400200008A5900000F8000-000000067F0000400200008A5900000FC000__00000073AD3FE6B8\n000000067F0000400200008A5900000F8000-000000067F0000400200008A5900000FC000__000000914E3F38F0\n000000067F0000400200008A5900000F8000-000000067F0000400200008A5900000FC000__000000931B33AE68\n000000067F0000400200008A5900000F8000-000000067F0000400200008A5900000FC000__000000931B9AFDF8\n000000067F0000400200008A5900000F9D70-000000067F0000400200008A59000010275E__00000001C071E001-000000027019FBC1\n000000067F0000400200008A5900000FC000-000000067F0000400200008A590000100000__000000028BBFFDB8\n000000067F0000400200008A5900000FC000-000000067F0000400200008A590000100000__0000001C760FA190\n000000067F0000400200008A5900000FC000-000000067F0000400200008A590000100000__00000038E67ABFA0\n000000067F0000400200008A5900000FC000-000000067F0000400200008A590000100000__0000003903F1CFE8\n000000067F0000400200008A5900000FC000-000000067F0000400200008A590000100000__0000003B99F7F8A0\n000000067F0000400200008A5900000FC000-000000067F0000400200008A590000100000__0000005D2FFFFB38\n000000067F0000400200008A5900000FC000-000000067F0000400200008A590000100000__00000073AD3FE6B8\n000000067F0000400200008A5900000FC000-000000067F0000400200008A590000100000__000000914E3F38F0\n000000067F0000400200008A5900000FC000-000000067F0000400200008A590000100000__000000931B33AE68\n000000067F0000400200008A5900000FC000-000000067F0000400200008A590000100000__000000931B9AFDF8\n000000067F0000400200008A590000100000-000000067F0000400200008A590000104000__000000028BBFFDB8\n000000067F0000400200008A590000100000-000000067F0000400200008A590000104000__0000001C760FA190\n000000067F0000400200008A590000100000-000000067F0000400200008A590000104000__00000038E67ABFA0\n000000067F0000400200008A590000100000-000000067F0000400200008A590000104000__0000003903F1CFE8\n000000067F0000400200008A590000100000-000000067F0000400200008A590000104000__0000003B99F7F8A0\n000000067F0000400200008A590000100000-000000067F0000400200008A590000104000__0000005D2FFFFB38\n000000067F0000400200008A590000100000-000000067F0000400200008A590000104000__00000073AD3FE6B8\n000000067F0000400200008A590000100000-000000067F0000400200008A590000104000__000000914E3F38F0\n000000067F0000400200008A590000100000-000000067F0000400200008A590000104000__000000931B33AE68\n000000067F0000400200008A590000100000-000000067F0000400200008A590000104000__000000931B9AFDF8\n000000067F0000400200008A59000010275E-000000067F0000400200008A59000010B151__00000001C071E001-000000027019FBC1\n000000067F0000400200008A590000104000-000000067F0000400200008A590000108000__000000028BBFFDB8\n000000067F0000400200008A590000104000-000000067F0000400200008A590000108000__0000001C760FA190\n000000067F0000400200008A590000104000-000000067F0000400200008A590000108000__00000038E67ABFA0\n000000067F0000400200008A590000104000-000000067F0000400200008A590000108000__0000003903F1CFE8\n000000067F0000400200008A590000104000-000000067F0000400200008A590000108000__0000003B99F7F8A0\n000000067F0000400200008A590000104000-000000067F0000400200008A590000108000__0000005D2FFFFB38\n000000067F0000400200008A590000104000-000000067F0000400200008A590000108000__00000073AD3FE6B8\n000000067F0000400200008A590000104000-000000067F0000400200008A590000108000__000000914E3F38F0\n000000067F0000400200008A590000104000-000000067F0000400200008A590000108000__000000931B33AE68\n000000067F0000400200008A590000104000-000000067F0000400200008A590000108000__000000931B9AFDF8\n000000067F0000400200008A590000108000-000000067F0000400200008A59000010C000__000000028BBFFDB8\n000000067F0000400200008A590000108000-000000067F0000400200008A59000010C000__0000001C760FA190\n000000067F0000400200008A590000108000-000000067F0000400200008A59000010C000__00000038E67ABFA0\n000000067F0000400200008A590000108000-000000067F0000400200008A59000010C000__0000003903F1CFE8\n000000067F0000400200008A590000108000-000000067F0000400200008A59000010C000__0000003B99F7F8A0\n000000067F0000400200008A590000108000-000000067F0000400200008A59000010C000__0000005D2FFFFB38\n000000067F0000400200008A590000108000-000000067F0000400200008A59000010C000__00000073AD3FE6B8\n000000067F0000400200008A590000108000-000000067F0000400200008A59000010C000__000000914E3F38F0\n000000067F0000400200008A590000108000-000000067F0000400200008A59000010C000__000000931B33AE68\n000000067F0000400200008A590000108000-000000067F0000400200008A59000010C000__000000931B9AFDF8\n000000067F0000400200008A59000010B151-000000067F0000400200008A590000113B39__00000001C071E001-000000027019FBC1\n000000067F0000400200008A59000010C000-000000067F0000400200008A590000110000__000000028BBFFDB8\n000000067F0000400200008A59000010C000-000000067F0000400200008A590000110000__0000001C760FA190\n000000067F0000400200008A59000010C000-000000067F0000400200008A590000110000__00000038E67ABFA0\n000000067F0000400200008A59000010C000-000000067F0000400200008A590000110000__0000003903F1CFE8\n000000067F0000400200008A59000010C000-000000067F0000400200008A590000110000__0000003B99F7F8A0\n000000067F0000400200008A59000010C000-000000067F0000400200008A590000110000__0000005D2FFFFB38\n000000067F0000400200008A59000010C000-000000067F0000400200008A590000110000__00000073AD3FE6B8\n000000067F0000400200008A59000010C000-000000067F0000400200008A590000110000__000000914E3F38F0\n000000067F0000400200008A59000010C000-000000067F0000400200008A590000110000__000000931B33AE68\n000000067F0000400200008A59000010C000-000000067F0000400200008A590000110000__000000931B9AFDF8\n000000067F0000400200008A590000110000-000000067F0000400200008A590000114000__000000028BBFFDB8\n000000067F0000400200008A590000110000-000000067F0000400200008A590000114000__0000001C760FA190\n000000067F0000400200008A590000110000-000000067F0000400200008A590000114000__00000038E67ABFA0\n000000067F0000400200008A590000110000-000000067F0000400200008A590000114000__0000003903F1CFE8\n000000067F0000400200008A590000110000-000000067F0000400200008A590000114000__0000003B99F7F8A0\n000000067F0000400200008A590000110000-000000067F0000400200008A590000114000__0000005D2FFFFB38\n000000067F0000400200008A590000110000-000000067F0000400200008A590000114000__00000073AD3FE6B8\n000000067F0000400200008A590000110000-000000067F0000400200008A590000114000__000000914E3F38F0\n000000067F0000400200008A590000110000-000000067F0000400200008A590000114000__000000931B33AE68\n000000067F0000400200008A590000110000-000000067F0000400200008A590000114000__000000931B9AFDF8\n000000067F0000400200008A590000113B39-000000067F0000400200008A59000011C515__00000001C071E001-000000027019FBC1\n000000067F0000400200008A590000114000-000000067F0000400200008A590000118000__000000028BBFFDB8\n000000067F0000400200008A590000114000-000000067F0000400200008A590000118000__0000001C760FA190\n000000067F0000400200008A590000114000-000000067F0000400200008A590000118000__00000038E67ABFA0\n000000067F0000400200008A590000114000-000000067F0000400200008A590000118000__0000003903F1CFE8\n000000067F0000400200008A590000114000-000000067F0000400200008A590000118000__0000003B99F7F8A0\n000000067F0000400200008A590000114000-000000067F0000400200008A590000118000__0000005D2FFFFB38\n000000067F0000400200008A590000114000-000000067F0000400200008A590000118000__00000073AD3FE6B8\n000000067F0000400200008A590000114000-000000067F0000400200008A590000118000__000000914E3F38F0\n000000067F0000400200008A590000114000-000000067F0000400200008A590000118000__000000931B33AE68\n000000067F0000400200008A590000114000-000000067F0000400200008A590000118000__000000931B9AFDF8\n000000067F0000400200008A590000118000-000000067F0000400200008A59000011C000__000000028BBFFDB8\n000000067F0000400200008A590000118000-000000067F0000400200008A59000011C000__0000001C760FA190\n000000067F0000400200008A590000118000-000000067F0000400200008A59000011C000__00000038E67ABFA0\n000000067F0000400200008A590000118000-000000067F0000400200008A59000011C000__0000003903F1CFE8\n000000067F0000400200008A590000118000-000000067F0000400200008A59000011C000__0000003B99F7F8A0\n000000067F0000400200008A590000118000-000000067F0000400200008A59000011C000__0000005D2FFFFB38\n000000067F0000400200008A590000118000-000000067F0000400200008A59000011C000__00000073AD3FE6B8\n000000067F0000400200008A590000118000-000000067F0000400200008A59000011C000__000000914E3F38F0\n000000067F0000400200008A590000118000-000000067F0000400200008A59000011C000__000000931B33AE68\n000000067F0000400200008A590000118000-000000067F0000400200008A59000011C000__000000931B9AFDF8\n000000067F0000400200008A59000011C000-000000067F0000400200008A590000120000__000000028BBFFDB8\n000000067F0000400200008A59000011C000-000000067F0000400200008A590000120000__0000001C760FA190\n000000067F0000400200008A59000011C000-000000067F0000400200008A590000120000__00000038E67ABFA0\n000000067F0000400200008A59000011C000-000000067F0000400200008A590000120000__0000003903F1CFE8\n000000067F0000400200008A59000011C000-000000067F0000400200008A590000120000__0000003B99F7F8A0\n000000067F0000400200008A59000011C000-000000067F0000400200008A590000120000__0000005D2FFFFB38\n000000067F0000400200008A59000011C000-000000067F0000400200008A590000120000__00000073AD3FE6B8\n000000067F0000400200008A59000011C000-000000067F0000400200008A590000120000__000000914E3F38F0\n000000067F0000400200008A59000011C000-000000067F0000400200008A590000120000__000000931B33AE68\n000000067F0000400200008A59000011C000-000000067F0000400200008A590000120000__000000931B9AFDF8\n000000067F0000400200008A59000011C515-000000067F0000400200008A590000124EDB__00000001C071E001-000000027019FBC1\n000000067F0000400200008A590000120000-000000067F0000400200008A590000124000__000000028BBFFDB8\n000000067F0000400200008A590000120000-000000067F0000400200008A590000124000__0000001C760FA190\n000000067F0000400200008A590000120000-000000067F0000400200008A590000124000__00000038E67ABFA0\n000000067F0000400200008A590000120000-000000067F0000400200008A590000124000__0000003903F1CFE8\n000000067F0000400200008A590000120000-000000067F0000400200008A590000124000__0000003B99F7F8A0\n000000067F0000400200008A590000120000-000000067F0000400200008A590000124000__0000005D2FFFFB38\n000000067F0000400200008A590000120000-000000067F0000400200008A590000124000__00000073AD3FE6B8\n000000067F0000400200008A590000120000-000000067F0000400200008A590000124000__000000914E3F38F0\n000000067F0000400200008A590000120000-000000067F0000400200008A590000124000__000000931B33AE68\n000000067F0000400200008A590000120000-000000067F0000400200008A590000124000__000000931B9AFDF8\n000000067F0000400200008A590000124000-000000067F0000400200008A590000128000__000000028BBFFDB8\n000000067F0000400200008A590000124000-000000067F0000400200008A590000128000__0000001C760FA190\n000000067F0000400200008A590000124000-000000067F0000400200008A590000128000__00000038E67ABFA0\n000000067F0000400200008A590000124000-000000067F0000400200008A590000128000__0000003903F1CFE8\n000000067F0000400200008A590000124000-000000067F0000400200008A590000128000__0000003B99F7F8A0\n000000067F0000400200008A590000124000-000000067F0000400200008A590000128000__0000005D2FFFFB38\n000000067F0000400200008A590000124000-000000067F0000400200008A590000128000__00000073AD3FE6B8\n000000067F0000400200008A590000124000-000000067F0000400200008A590000128000__000000914E3F38F0\n000000067F0000400200008A590000124000-000000067F0000400200008A590000128000__000000931B33AE68\n000000067F0000400200008A590000124000-000000067F0000400200008A590000128000__000000931B9AFDF8\n000000067F0000400200008A590000124EDB-000000067F0000400200008A59000012D89B__00000001C071E001-000000027019FBC1\n000000067F0000400200008A590000128000-000000067F0000400200008A59000012C000__000000028BBFFDB8\n000000067F0000400200008A590000128000-000000067F0000400200008A59000012C000__0000001C760FA190\n000000067F0000400200008A590000128000-000000067F0000400200008A59000012C000__00000038E67ABFA0\n000000067F0000400200008A590000128000-000000067F0000400200008A59000012C000__0000003903F1CFE8\n000000067F0000400200008A590000128000-000000067F0000400200008A59000012C000__0000003B99F7F8A0\n000000067F0000400200008A590000128000-000000067F0000400200008A59000012C000__0000005D2FFFFB38\n000000067F0000400200008A590000128000-000000067F0000400200008A59000012C000__00000073AD3FE6B8\n000000067F0000400200008A590000128000-000000067F0000400200008A59000012C000__000000914E3F38F0\n000000067F0000400200008A590000128000-000000067F0000400200008A59000012C000__000000931B33AE68\n000000067F0000400200008A590000128000-000000067F0000400200008A59000012C000__000000931B9AFDF8\n000000067F0000400200008A59000012C000-000000067F0000400200008A590000130000__000000028BBFFDB8\n000000067F0000400200008A59000012C000-000000067F0000400200008A590000130000__0000001C760FA190\n000000067F0000400200008A59000012C000-000000067F0000400200008A590000130000__00000038E67ABFA0\n000000067F0000400200008A59000012C000-000000067F0000400200008A590000130000__0000003903F1CFE8\n000000067F0000400200008A59000012C000-000000067F0000400200008A590000130000__0000003B99F7F8A0\n000000067F0000400200008A59000012C000-000000067F0000400200008A590000130000__0000005D2FFFFB38\n000000067F0000400200008A59000012C000-000000067F0000400200008A590000130000__00000073AD3FE6B8\n000000067F0000400200008A59000012C000-000000067F0000400200008A590000130000__000000914E3F38F0\n000000067F0000400200008A59000012C000-000000067F0000400200008A590000130000__000000931B33AE68\n000000067F0000400200008A59000012C000-000000067F0000400200008A590000130000__000000931B9AFDF8\n000000067F0000400200008A59000012D89B-000000067F0000400200008A590000136269__00000001C071E001-000000027019FBC1\n000000067F0000400200008A590000130000-000000067F0000400200008A590000134000__000000028BBFFDB8\n000000067F0000400200008A590000130000-000000067F0000400200008A590000134000__0000001C760FA190\n000000067F0000400200008A590000130000-000000067F0000400200008A590000134000__00000038E67ABFA0\n000000067F0000400200008A590000130000-000000067F0000400200008A590000134000__0000003903F1CFE8\n000000067F0000400200008A590000130000-000000067F0000400200008A590000134000__0000003B99F7F8A0\n000000067F0000400200008A590000130000-000000067F0000400200008A590000134000__0000005D2FFFFB38\n000000067F0000400200008A590000130000-000000067F0000400200008A590000134000__00000073AD3FE6B8\n000000067F0000400200008A590000130000-000000067F0000400200008A590000134000__000000914E3F38F0\n000000067F0000400200008A590000130000-000000067F0000400200008A590000134000__000000931B33AE68\n000000067F0000400200008A590000130000-000000067F0000400200008A590000134000__000000931B9AFDF8\n000000067F0000400200008A590000134000-000000067F0000400200008A590000138000__000000028BBFFDB8\n000000067F0000400200008A590000134000-000000067F0000400200008A590000138000__0000001C760FA190\n000000067F0000400200008A590000134000-000000067F0000400200008A590000138000__00000038E67ABFA0\n000000067F0000400200008A590000134000-000000067F0000400200008A590000138000__0000003903F1CFE8\n000000067F0000400200008A590000134000-000000067F0000400200008A590000138000__0000003B99F7F8A0\n000000067F0000400200008A590000134000-000000067F0000400200008A590000138000__0000005D2FFFFB38\n000000067F0000400200008A590000134000-000000067F0000400200008A590000138000__00000073AD3FE6B8\n000000067F0000400200008A590000134000-000000067F0000400200008A590000138000__000000914E3F38F0\n000000067F0000400200008A590000134000-000000067F0000400200008A590000138000__000000931B33AE68\n000000067F0000400200008A590000134000-000000067F0000400200008A590000138000__000000931B9AFDF8\n000000067F0000400200008A590000136269-000000067F0000400200008A59000013EC56__00000001C071E001-000000027019FBC1\n000000067F0000400200008A590000138000-000000067F0000400200008A59000013C000__000000028BBFFDB8\n000000067F0000400200008A590000138000-000000067F0000400200008A59000013C000__0000001C760FA190\n000000067F0000400200008A590000138000-000000067F0000400200008A59000013C000__00000038E67ABFA0\n000000067F0000400200008A590000138000-000000067F0000400200008A59000013C000__0000003903F1CFE8\n000000067F0000400200008A590000138000-000000067F0000400200008A59000013C000__0000003B99F7F8A0\n000000067F0000400200008A590000138000-000000067F0000400200008A59000013C000__0000005D2FFFFB38\n000000067F0000400200008A590000138000-000000067F0000400200008A59000013C000__00000073AD3FE6B8\n000000067F0000400200008A590000138000-000000067F0000400200008A59000013C000__000000914E3F38F0\n000000067F0000400200008A590000138000-000000067F0000400200008A59000013C000__000000931B33AE68\n000000067F0000400200008A590000138000-000000067F0000400200008A59000013C000__000000931B9AFDF8\n000000067F0000400200008A59000013C000-000000067F0000400200008A590000140000__000000028BBFFDB8\n000000067F0000400200008A59000013C000-000000067F0000400200008A590000140000__0000001C760FA190\n000000067F0000400200008A59000013C000-000000067F0000400200008A590000140000__00000038E67ABFA0\n000000067F0000400200008A59000013C000-000000067F0000400200008A590000140000__0000003903F1CFE8\n000000067F0000400200008A59000013C000-000000067F0000400200008A590000140000__0000003B99F7F8A0\n000000067F0000400200008A59000013C000-000000067F0000400200008A590000140000__0000005D2FFFFB38\n000000067F0000400200008A59000013C000-000000067F0000400200008A590000140000__00000073AD3FE6B8\n000000067F0000400200008A59000013C000-000000067F0000400200008A590000140000__000000914E3F38F0\n000000067F0000400200008A59000013C000-000000067F0000400200008A590000140000__000000931B33AE68\n000000067F0000400200008A59000013C000-000000067F0000400200008A590000140000__000000931B9AFDF8\n000000067F0000400200008A59000013EC56-000000067F0000400200008A590000147647__00000001C071E001-000000027019FBC1\n000000067F0000400200008A590000140000-000000067F0000400200008A590000144000__000000028BBFFDB8\n000000067F0000400200008A590000140000-000000067F0000400200008A590000144000__0000001C760FA190\n000000067F0000400200008A590000140000-000000067F0000400200008A590000144000__00000038E67ABFA0\n000000067F0000400200008A590000140000-000000067F0000400200008A590000144000__0000003903F1CFE8\n000000067F0000400200008A590000140000-000000067F0000400200008A590000144000__0000003B99F7F8A0\n000000067F0000400200008A590000140000-000000067F0000400200008A590000144000__0000005D2FFFFB38\n000000067F0000400200008A590000140000-000000067F0000400200008A590000144000__00000073AD3FE6B8\n000000067F0000400200008A590000140000-000000067F0000400200008A590000144000__000000914E3F38F0\n000000067F0000400200008A590000140000-000000067F0000400200008A590000144000__000000931B33AE68\n000000067F0000400200008A590000140000-000000067F0000400200008A590000144000__000000931B9AFDF8\n000000067F0000400200008A590000144000-000000067F0000400200008A590000148000__000000028BBFFDB8\n000000067F0000400200008A590000144000-000000067F0000400200008A590000148000__0000001C760FA190\n000000067F0000400200008A590000144000-000000067F0000400200008A590000148000__00000038E67ABFA0\n000000067F0000400200008A590000144000-000000067F0000400200008A590000148000__0000003903F1CFE8\n000000067F0000400200008A590000144000-000000067F0000400200008A590000148000__0000003B99F7F8A0\n000000067F0000400200008A590000144000-000000067F0000400200008A590000148000__0000005D2FFFFB38\n000000067F0000400200008A590000144000-000000067F0000400200008A590000148000__00000073AD3FE6B8\n000000067F0000400200008A590000144000-000000067F0000400200008A590000148000__000000914E3F38F0\n000000067F0000400200008A590000144000-000000067F0000400200008A590000148000__000000931B33AE68\n000000067F0000400200008A590000144000-000000067F0000400200008A590000148000__000000931B9AFDF8\n000000067F0000400200008A590000147647-000000067F0000400200008A590000150027__00000001C071E001-000000027019FBC1\n000000067F0000400200008A590000148000-000000067F0000400200008A59000014C000__000000028BBFFDB8\n000000067F0000400200008A590000148000-000000067F0000400200008A59000014C000__0000001C760FA190\n000000067F0000400200008A590000148000-000000067F0000400200008A59000014C000__00000038E67ABFA0\n000000067F0000400200008A590000148000-000000067F0000400200008A59000014C000__0000003903F1CFE8\n000000067F0000400200008A590000148000-000000067F0000400200008A59000014C000__0000003B99F7F8A0\n000000067F0000400200008A590000148000-000000067F0000400200008A59000014C000__0000005D2FFFFB38\n000000067F0000400200008A590000148000-000000067F0000400200008A59000014C000__00000073AD3FE6B8\n000000067F0000400200008A590000148000-000000067F0000400200008A59000014C000__000000914E3F38F0\n000000067F0000400200008A590000148000-000000067F0000400200008A59000014C000__000000931B33AE68\n000000067F0000400200008A590000148000-000000067F0000400200008A59000014C000__000000931B9AFDF8\n000000067F0000400200008A59000014C000-000000067F0000400200008A590000150000__000000028BBFFDB8\n000000067F0000400200008A59000014C000-000000067F0000400200008A590000150000__0000001C760FA190\n000000067F0000400200008A59000014C000-000000067F0000400200008A590000150000__00000038E67ABFA0\n000000067F0000400200008A59000014C000-000000067F0000400200008A590000150000__0000003903F1CFE8\n000000067F0000400200008A59000014C000-000000067F0000400200008A590000150000__0000003B99F7F8A0\n000000067F0000400200008A59000014C000-000000067F0000400200008A590000150000__0000005D2FFFFB38\n000000067F0000400200008A59000014C000-000000067F0000400200008A590000150000__00000073AD3FE6B8\n000000067F0000400200008A59000014C000-000000067F0000400200008A590000150000__000000914E3F38F0\n000000067F0000400200008A59000014C000-000000067F0000400200008A590000150000__000000931B33AE68\n000000067F0000400200008A59000014C000-000000067F0000400200008A590000150000__000000931B9AFDF8\n000000067F0000400200008A590000150000-000000067F0000400200008A590000154000__000000028BBFFDB8\n000000067F0000400200008A590000150000-000000067F0000400200008A590000154000__0000001C760FA190\n000000067F0000400200008A590000150000-000000067F0000400200008A590000154000__00000038E67ABFA0\n000000067F0000400200008A590000150000-000000067F0000400200008A590000154000__0000003903F1CFE8\n000000067F0000400200008A590000150000-000000067F0000400200008A590000154000__0000003B99F7F8A0\n000000067F0000400200008A590000150000-000000067F0000400200008A590000154000__0000005D2FFFFB38\n000000067F0000400200008A590000150000-000000067F0000400200008A590000154000__00000073AD3FE6B8\n000000067F0000400200008A590000150000-000000067F0000400200008A590000154000__000000914E3F38F0\n000000067F0000400200008A590000150000-000000067F0000400200008A590000154000__000000931B33AE68\n000000067F0000400200008A590000150000-000000067F0000400200008A590000154000__000000931B9AFDF8\n000000067F0000400200008A590000150027-000000067F0000400200008A590100000000__00000001C071E001-000000027019FBC1\n000000067F0000400200008A590000150355-000000067F0000400200008A590000158D32__000000027019FBC1-000000030FC9ED71\n000000067F0000400200008A590000154000-000000067F0000400200008A590000158000__000000028BBFFDB8\n000000067F0000400200008A590000154000-000000067F0000400200008A590000158000__0000001C760FA190\n000000067F0000400200008A590000154000-000000067F0000400200008A590000158000__00000038E67ABFA0\n000000067F0000400200008A590000154000-000000067F0000400200008A590000158000__0000003903F1CFE8\n000000067F0000400200008A590000154000-000000067F0000400200008A590000158000__0000003B99F7F8A0\n000000067F0000400200008A590000154000-000000067F0000400200008A590000158000__0000005D2FFFFB38\n000000067F0000400200008A590000154000-000000067F0000400200008A590000158000__00000073AD3FE6B8\n000000067F0000400200008A590000154000-000000067F0000400200008A590000158000__000000914E3F38F0\n000000067F0000400200008A590000154000-000000067F0000400200008A590000158000__000000931B33AE68\n000000067F0000400200008A590000154000-000000067F0000400200008A590000158000__000000931B9AFDF8\n000000067F0000400200008A590000158000-000000067F0000400200008A59000015C000__000000028BBFFDB8\n000000067F0000400200008A590000158000-000000067F0000400200008A59000015C000__0000001C760FA190\n000000067F0000400200008A590000158000-000000067F0000400200008A59000015C000__00000038E67ABFA0\n000000067F0000400200008A590000158000-000000067F0000400200008A59000015C000__0000003903F1CFE8\n000000067F0000400200008A590000158000-000000067F0000400200008A59000015C000__0000003B99F7F8A0\n000000067F0000400200008A590000158000-000000067F0000400200008A59000015C000__0000005D2FFFFB38\n000000067F0000400200008A590000158000-000000067F0000400200008A59000015C000__00000073AD3FE6B8\n000000067F0000400200008A590000158000-000000067F0000400200008A59000015C000__000000914E3F38F0\n000000067F0000400200008A590000158000-000000067F0000400200008A59000015C000__000000931B33AE68\n000000067F0000400200008A590000158000-000000067F0000400200008A59000015C000__000000931B9AFDF8\n000000067F0000400200008A590000158D32-000000067F0000400200008A5900001616F5__000000027019FBC1-000000030FC9ED71\n000000067F0000400200008A59000015C000-000000067F0000400200008A590000160000__0000001C760FA190\n000000067F0000400200008A59000015C000-000000067F0000400200008A590000160000__00000038E67ABFA0\n000000067F0000400200008A59000015C000-000000067F0000400200008A590000160000__0000003903F1CFE8\n000000067F0000400200008A59000015C000-000000067F0000400200008A590000160000__0000003B99F7F8A0\n000000067F0000400200008A59000015C000-000000067F0000400200008A590000160000__0000005D2FFFFB38\n000000067F0000400200008A59000015C000-000000067F0000400200008A590000160000__00000073AD3FE6B8\n000000067F0000400200008A59000015C000-000000067F0000400200008A590000160000__000000914E3F38F0\n000000067F0000400200008A59000015C000-000000067F0000400200008A590000160000__000000931B33AE68\n000000067F0000400200008A59000015C000-000000067F0000400200008A590000160000__000000931B9AFDF8\n000000067F0000400200008A59000015C000-030000000000000000000000000000000002__000000028BBFFDB8\n000000067F0000400200008A590000160000-000000067F0000400200008A590000164000__0000001C760FA190\n000000067F0000400200008A590000160000-000000067F0000400200008A590000164000__00000038E67ABFA0\n000000067F0000400200008A590000160000-000000067F0000400200008A590000164000__0000003903F1CFE8\n000000067F0000400200008A590000160000-000000067F0000400200008A590000164000__0000003B99F7F8A0\n000000067F0000400200008A590000160000-000000067F0000400200008A590000164000__0000005D2FFFFB38\n000000067F0000400200008A590000160000-000000067F0000400200008A590000164000__00000073AD3FE6B8\n000000067F0000400200008A590000160000-000000067F0000400200008A590000164000__000000914E3F38F0\n000000067F0000400200008A590000160000-000000067F0000400200008A590000164000__000000931B33AE68\n000000067F0000400200008A590000160000-000000067F0000400200008A590000164000__000000931B9AFDF8\n000000067F0000400200008A5900001616F5-000000067F0000400200008A59000016A0B7__000000027019FBC1-000000030FC9ED71\n000000067F0000400200008A590000164000-000000067F0000400200008A590000168000__0000001C760FA190\n000000067F0000400200008A590000164000-000000067F0000400200008A590000168000__00000038E67ABFA0\n000000067F0000400200008A590000164000-000000067F0000400200008A590000168000__0000003903F1CFE8\n000000067F0000400200008A590000164000-000000067F0000400200008A590000168000__0000003B99F7F8A0\n000000067F0000400200008A590000164000-000000067F0000400200008A590000168000__0000005D2FFFFB38\n000000067F0000400200008A590000164000-000000067F0000400200008A590000168000__00000073AD3FE6B8\n000000067F0000400200008A590000164000-000000067F0000400200008A590000168000__000000914E3F38F0\n000000067F0000400200008A590000164000-000000067F0000400200008A590000168000__000000931B33AE68\n000000067F0000400200008A590000164000-000000067F0000400200008A590000168000__000000931B9AFDF8\n000000067F0000400200008A590000168000-000000067F0000400200008A59000016C000__0000001C760FA190\n000000067F0000400200008A590000168000-000000067F0000400200008A59000016C000__00000038E67ABFA0\n000000067F0000400200008A590000168000-000000067F0000400200008A59000016C000__0000003903F1CFE8\n000000067F0000400200008A590000168000-000000067F0000400200008A59000016C000__0000003B99F7F8A0\n000000067F0000400200008A590000168000-000000067F0000400200008A59000016C000__0000005D2FFFFB38\n000000067F0000400200008A590000168000-000000067F0000400200008A59000016C000__00000073AD3FE6B8\n000000067F0000400200008A590000168000-000000067F0000400200008A59000016C000__000000914E3F38F0\n000000067F0000400200008A590000168000-000000067F0000400200008A59000016C000__000000931B33AE68\n000000067F0000400200008A590000168000-000000067F0000400200008A59000016C000__000000931B9AFDF8\n000000067F0000400200008A59000016A0B7-000000067F0000400200008A590000172A96__000000027019FBC1-000000030FC9ED71\n000000067F0000400200008A59000016C000-000000067F0000400200008A590000170000__0000001C760FA190\n000000067F0000400200008A59000016C000-000000067F0000400200008A590000170000__00000038E67ABFA0\n000000067F0000400200008A59000016C000-000000067F0000400200008A590000170000__0000003903F1CFE8\n000000067F0000400200008A59000016C000-000000067F0000400200008A590000170000__0000003B99F7F8A0\n000000067F0000400200008A59000016C000-000000067F0000400200008A590000170000__0000005D2FFFFB38\n000000067F0000400200008A59000016C000-000000067F0000400200008A590000170000__00000073AD3FE6B8\n000000067F0000400200008A59000016C000-000000067F0000400200008A590000170000__000000914E3F38F0\n000000067F0000400200008A59000016C000-000000067F0000400200008A590000170000__000000931B33AE68\n000000067F0000400200008A59000016C000-000000067F0000400200008A590000170000__000000931B9AFDF8\n000000067F0000400200008A590000170000-000000067F0000400200008A590000174000__0000001C760FA190\n000000067F0000400200008A590000170000-000000067F0000400200008A590000174000__00000038E67ABFA0\n000000067F0000400200008A590000170000-000000067F0000400200008A590000174000__0000003903F1CFE8\n000000067F0000400200008A590000170000-000000067F0000400200008A590000174000__0000003B99F7F8A0\n000000067F0000400200008A590000170000-000000067F0000400200008A590000174000__0000005D2FFFFB38\n000000067F0000400200008A590000170000-000000067F0000400200008A590000174000__00000073AD3FE6B8\n000000067F0000400200008A590000170000-000000067F0000400200008A590000174000__000000914E3F38F0\n000000067F0000400200008A590000170000-000000067F0000400200008A590000174000__000000931B33AE68\n000000067F0000400200008A590000170000-000000067F0000400200008A590000174000__000000931B9AFDF8\n000000067F0000400200008A590000172A96-000000067F0000400200008A59000017B48B__000000027019FBC1-000000030FC9ED71\n000000067F0000400200008A590000174000-000000067F0000400200008A590000178000__0000001C760FA190\n000000067F0000400200008A590000174000-000000067F0000400200008A590000178000__00000038E67ABFA0\n000000067F0000400200008A590000174000-000000067F0000400200008A590000178000__0000003903F1CFE8\n000000067F0000400200008A590000174000-000000067F0000400200008A590000178000__0000003B99F7F8A0\n000000067F0000400200008A590000174000-000000067F0000400200008A590000178000__0000005D2FFFFB38\n000000067F0000400200008A590000174000-000000067F0000400200008A590000178000__00000073AD3FE6B8\n000000067F0000400200008A590000174000-000000067F0000400200008A590000178000__000000914E3F38F0\n000000067F0000400200008A590000174000-000000067F0000400200008A590000178000__000000931B33AE68\n000000067F0000400200008A590000174000-000000067F0000400200008A590000178000__000000931B9AFDF8\n000000067F0000400200008A590000178000-000000067F0000400200008A59000017C000__0000001C760FA190\n000000067F0000400200008A590000178000-000000067F0000400200008A59000017C000__00000038E67ABFA0\n000000067F0000400200008A590000178000-000000067F0000400200008A59000017C000__0000003903F1CFE8\n000000067F0000400200008A590000178000-000000067F0000400200008A59000017C000__0000003B99F7F8A0\n000000067F0000400200008A590000178000-000000067F0000400200008A59000017C000__0000005D2FFFFB38\n000000067F0000400200008A590000178000-000000067F0000400200008A59000017C000__00000073AD3FE6B8\n000000067F0000400200008A590000178000-000000067F0000400200008A59000017C000__000000914E3F38F0\n000000067F0000400200008A590000178000-000000067F0000400200008A59000017C000__000000931B33AE68\n000000067F0000400200008A590000178000-000000067F0000400200008A59000017C000__000000931B9AFDF8\n000000067F0000400200008A59000017B48B-000000067F0000400200008A590000183E80__000000027019FBC1-000000030FC9ED71\n000000067F0000400200008A59000017C000-000000067F0000400200008A590000180000__0000001C760FA190\n000000067F0000400200008A59000017C000-000000067F0000400200008A590000180000__00000038E67ABFA0\n000000067F0000400200008A59000017C000-000000067F0000400200008A590000180000__0000003903F1CFE8\n000000067F0000400200008A59000017C000-000000067F0000400200008A590000180000__0000003B99F7F8A0\n000000067F0000400200008A59000017C000-000000067F0000400200008A590000180000__0000005D2FFFFB38\n000000067F0000400200008A59000017C000-000000067F0000400200008A590000180000__00000073AD3FE6B8\n000000067F0000400200008A59000017C000-000000067F0000400200008A590000180000__000000914E3F38F0\n000000067F0000400200008A59000017C000-000000067F0000400200008A590000180000__000000931B33AE68\n000000067F0000400200008A59000017C000-000000067F0000400200008A590000180000__000000931B9AFDF8\n000000067F0000400200008A590000180000-000000067F0000400200008A590000184000__0000001C760FA190\n000000067F0000400200008A590000180000-000000067F0000400200008A590000184000__00000038E67ABFA0\n000000067F0000400200008A590000180000-000000067F0000400200008A590000184000__0000003903F1CFE8\n000000067F0000400200008A590000180000-000000067F0000400200008A590000184000__0000003B99F7F8A0\n000000067F0000400200008A590000180000-000000067F0000400200008A590000184000__0000005D2FFFFB38\n000000067F0000400200008A590000180000-000000067F0000400200008A590000184000__00000073AD3FE6B8\n000000067F0000400200008A590000180000-000000067F0000400200008A590000184000__000000914E3F38F0\n000000067F0000400200008A590000180000-000000067F0000400200008A590000184000__000000931B33AE68\n000000067F0000400200008A590000180000-000000067F0000400200008A590000184000__000000931B9AFDF8\n000000067F0000400200008A590000183E80-000000067F0000400200008A59000018C866__000000027019FBC1-000000030FC9ED71\n000000067F0000400200008A590000184000-000000067F0000400200008A590000188000__0000001C760FA190\n000000067F0000400200008A590000184000-000000067F0000400200008A590000188000__00000038E67ABFA0\n000000067F0000400200008A590000184000-000000067F0000400200008A590000188000__0000003903F1CFE8\n000000067F0000400200008A590000184000-000000067F0000400200008A590000188000__0000003B99F7F8A0\n000000067F0000400200008A590000184000-000000067F0000400200008A590000188000__0000005D2FFFFB38\n000000067F0000400200008A590000184000-000000067F0000400200008A590000188000__00000073AD3FE6B8\n000000067F0000400200008A590000184000-000000067F0000400200008A590000188000__000000914E3F38F0\n000000067F0000400200008A590000184000-000000067F0000400200008A590000188000__000000931B33AE68\n000000067F0000400200008A590000184000-000000067F0000400200008A590000188000__000000931B9AFDF8\n000000067F0000400200008A590000188000-000000067F0000400200008A59000018C000__0000001C760FA190\n000000067F0000400200008A590000188000-000000067F0000400200008A59000018C000__00000038E67ABFA0\n000000067F0000400200008A590000188000-000000067F0000400200008A59000018C000__0000003903F1CFE8\n000000067F0000400200008A590000188000-000000067F0000400200008A59000018C000__0000003B99F7F8A0\n000000067F0000400200008A590000188000-000000067F0000400200008A59000018C000__0000005D2FFFFB38\n000000067F0000400200008A590000188000-000000067F0000400200008A59000018C000__00000073AD3FE6B8\n000000067F0000400200008A590000188000-000000067F0000400200008A59000018C000__000000914E3F38F0\n000000067F0000400200008A590000188000-000000067F0000400200008A59000018C000__000000931B33AE68\n000000067F0000400200008A590000188000-000000067F0000400200008A59000018C000__000000931B9AFDF8\n000000067F0000400200008A59000018C000-000000067F0000400200008A590000190000__0000001C760FA190\n000000067F0000400200008A59000018C000-000000067F0000400200008A590000190000__00000038E67ABFA0\n000000067F0000400200008A59000018C000-000000067F0000400200008A590000190000__0000003903F1CFE8\n000000067F0000400200008A59000018C000-000000067F0000400200008A590000190000__0000003B99F7F8A0\n000000067F0000400200008A59000018C000-000000067F0000400200008A590000190000__0000005D2FFFFB38\n000000067F0000400200008A59000018C000-000000067F0000400200008A590000190000__00000073AD3FE6B8\n000000067F0000400200008A59000018C000-000000067F0000400200008A590000190000__000000914E3F38F0\n000000067F0000400200008A59000018C000-000000067F0000400200008A590000190000__000000931B33AE68\n000000067F0000400200008A59000018C000-000000067F0000400200008A590000190000__000000931B9AFDF8\n000000067F0000400200008A59000018C866-000000067F0000400200008A590000195243__000000027019FBC1-000000030FC9ED71\n000000067F0000400200008A590000190000-000000067F0000400200008A590000194000__0000001C760FA190\n000000067F0000400200008A590000190000-000000067F0000400200008A590000194000__00000038E67ABFA0\n000000067F0000400200008A590000190000-000000067F0000400200008A590000194000__0000003903F1CFE8\n000000067F0000400200008A590000190000-000000067F0000400200008A590000194000__0000003B99F7F8A0\n000000067F0000400200008A590000190000-000000067F0000400200008A590000194000__0000005D2FFFFB38\n000000067F0000400200008A590000190000-000000067F0000400200008A590000194000__00000073AD3FE6B8\n000000067F0000400200008A590000190000-000000067F0000400200008A590000194000__000000914E3F38F0\n000000067F0000400200008A590000190000-000000067F0000400200008A590000194000__000000931B33AE68\n000000067F0000400200008A590000190000-000000067F0000400200008A590000194000__000000931B9AFDF8\n000000067F0000400200008A590000194000-000000067F0000400200008A590000198000__0000001C760FA190\n000000067F0000400200008A590000194000-000000067F0000400200008A590000198000__00000038E67ABFA0\n000000067F0000400200008A590000194000-000000067F0000400200008A590000198000__0000003903F1CFE8\n000000067F0000400200008A590000194000-000000067F0000400200008A590000198000__0000003B99F7F8A0\n000000067F0000400200008A590000194000-000000067F0000400200008A590000198000__0000005D2FFFFB38\n000000067F0000400200008A590000194000-000000067F0000400200008A590000198000__00000073AD3FE6B8\n000000067F0000400200008A590000194000-000000067F0000400200008A590000198000__000000914E3F38F0\n000000067F0000400200008A590000194000-000000067F0000400200008A590000198000__000000931B33AE68\n000000067F0000400200008A590000194000-000000067F0000400200008A590000198000__000000931B9AFDF8\n000000067F0000400200008A590000195243-000000067F0000400200008A59000019DC01__000000027019FBC1-000000030FC9ED71\n000000067F0000400200008A590000198000-000000067F0000400200008A59000019C000__0000001C760FA190\n000000067F0000400200008A590000198000-000000067F0000400200008A59000019C000__00000038E67ABFA0\n000000067F0000400200008A590000198000-000000067F0000400200008A59000019C000__0000003903F1CFE8\n000000067F0000400200008A590000198000-000000067F0000400200008A59000019C000__0000003B99F7F8A0\n000000067F0000400200008A590000198000-000000067F0000400200008A59000019C000__0000005D2FFFFB38\n000000067F0000400200008A590000198000-000000067F0000400200008A59000019C000__00000073AD3FE6B8\n000000067F0000400200008A590000198000-000000067F0000400200008A59000019C000__000000914E3F38F0\n000000067F0000400200008A590000198000-000000067F0000400200008A59000019C000__000000931B33AE68\n000000067F0000400200008A590000198000-000000067F0000400200008A59000019C000__000000931B9AFDF8\n000000067F0000400200008A59000019C000-000000067F0000400200008A5900001A0000__0000001C760FA190\n000000067F0000400200008A59000019C000-000000067F0000400200008A5900001A0000__00000038E67ABFA0\n000000067F0000400200008A59000019C000-000000067F0000400200008A5900001A0000__0000003903F1CFE8\n000000067F0000400200008A59000019C000-000000067F0000400200008A5900001A0000__0000003B99F7F8A0\n000000067F0000400200008A59000019C000-000000067F0000400200008A5900001A0000__0000005D2FFFFB38\n000000067F0000400200008A59000019C000-000000067F0000400200008A5900001A0000__00000073AD3FE6B8\n000000067F0000400200008A59000019C000-000000067F0000400200008A5900001A0000__000000914E3F38F0\n000000067F0000400200008A59000019C000-000000067F0000400200008A5900001A0000__000000931B33AE68\n000000067F0000400200008A59000019C000-000000067F0000400200008A5900001A0000__000000931B9AFDF8\n000000067F0000400200008A59000019DC01-000000067F0000400200008A5900001A65B5__000000027019FBC1-000000030FC9ED71\n000000067F0000400200008A5900001A0000-000000067F0000400200008A5900001A4000__0000001C760FA190\n000000067F0000400200008A5900001A0000-000000067F0000400200008A5900001A4000__00000038E67ABFA0\n000000067F0000400200008A5900001A0000-000000067F0000400200008A5900001A4000__0000003903F1CFE8\n000000067F0000400200008A5900001A0000-000000067F0000400200008A5900001A4000__0000003B99F7F8A0\n000000067F0000400200008A5900001A0000-000000067F0000400200008A5900001A4000__0000005D2FFFFB38\n000000067F0000400200008A5900001A0000-000000067F0000400200008A5900001A4000__00000073AD3FE6B8\n000000067F0000400200008A5900001A0000-000000067F0000400200008A5900001A4000__000000914E3F38F0\n000000067F0000400200008A5900001A0000-000000067F0000400200008A5900001A4000__000000931B33AE68\n000000067F0000400200008A5900001A0000-000000067F0000400200008A5900001A4000__000000931B9AFDF8\n000000067F0000400200008A5900001A4000-000000067F0000400200008A5900001A8000__0000001C725A2400\n000000067F0000400200008A5900001A4000-000000067F0000400200008A5900001A8000__0000001C760FA190\n000000067F0000400200008A5900001A4000-000000067F0000400200008A5900001A8000__00000038E67ABFA0\n000000067F0000400200008A5900001A4000-000000067F0000400200008A5900001A8000__0000003903F1CFE8\n000000067F0000400200008A5900001A4000-000000067F0000400200008A5900001A8000__0000003B99F7F8A0\n000000067F0000400200008A5900001A4000-000000067F0000400200008A5900001A8000__0000005D2FFFFB38\n000000067F0000400200008A5900001A4000-000000067F0000400200008A5900001A8000__00000073AD3FE6B8\n000000067F0000400200008A5900001A4000-000000067F0000400200008A5900001A8000__000000914E3F38F0\n000000067F0000400200008A5900001A4000-000000067F0000400200008A5900001A8000__000000931B33AE68\n000000067F0000400200008A5900001A4000-000000067F0000400200008A5900001A8000__000000931B9AFDF8\n000000067F0000400200008A5900001A65B5-000000067F0000400200008A590100000000__000000027019FBC1-000000030FC9ED71\n000000067F0000400200008A5900001A6891-000000067F0000400200008A5900001AF277__000000030FC9ED71-00000003AF79E5E9\n000000067F0000400200008A5900001A8000-000000067F0000400200008A5900001AC000__0000001C725A2400\n000000067F0000400200008A5900001A8000-000000067F0000400200008A5900001AC000__0000001C760FA190\n000000067F0000400200008A5900001A8000-000000067F0000400200008A5900001AC000__00000038E67ABFA0\n000000067F0000400200008A5900001A8000-000000067F0000400200008A5900001AC000__0000003903F1CFE8\n000000067F0000400200008A5900001A8000-000000067F0000400200008A5900001AC000__0000003B99F7F8A0\n000000067F0000400200008A5900001A8000-000000067F0000400200008A5900001AC000__0000005D2FFFFB38\n000000067F0000400200008A5900001A8000-000000067F0000400200008A5900001AC000__00000073AD3FE6B8\n000000067F0000400200008A5900001A8000-000000067F0000400200008A5900001AC000__000000914E3F38F0\n000000067F0000400200008A5900001A8000-000000067F0000400200008A5900001AC000__000000931B33AE68\n000000067F0000400200008A5900001A8000-000000067F0000400200008A5900001AC000__000000931B9AFDF8\n000000067F0000400200008A5900001AC000-000000067F0000400200008A5900001B0000__0000001C725A2400\n000000067F0000400200008A5900001AC000-000000067F0000400200008A5900001B0000__0000001C760FA190\n000000067F0000400200008A5900001AC000-000000067F0000400200008A5900001B0000__00000038E67ABFA0\n000000067F0000400200008A5900001AC000-000000067F0000400200008A5900001B0000__0000003903F1CFE8\n000000067F0000400200008A5900001AC000-000000067F0000400200008A5900001B0000__0000003B99F7F8A0\n000000067F0000400200008A5900001AC000-000000067F0000400200008A5900001B0000__0000005D2FFFFB38\n000000067F0000400200008A5900001AC000-000000067F0000400200008A5900001B0000__00000073AD3FE6B8\n000000067F0000400200008A5900001AC000-000000067F0000400200008A5900001B0000__000000914E3F38F0\n000000067F0000400200008A5900001AC000-000000067F0000400200008A5900001B0000__000000931B33AE68\n000000067F0000400200008A5900001AC000-000000067F0000400200008A5900001B0000__000000931B9AFDF8\n000000067F0000400200008A5900001AF277-000000067F0000400200008A5900001B7C62__000000030FC9ED71-00000003AF79E5E9\n000000067F0000400200008A5900001B0000-000000067F0000400200008A5900001B4000__0000001C725A2400\n000000067F0000400200008A5900001B0000-000000067F0000400200008A5900001B4000__0000001C760FA190\n000000067F0000400200008A5900001B0000-000000067F0000400200008A5900001B4000__00000038E67ABFA0\n000000067F0000400200008A5900001B0000-000000067F0000400200008A5900001B4000__0000003903F1CFE8\n000000067F0000400200008A5900001B0000-000000067F0000400200008A5900001B4000__0000003B99F7F8A0\n000000067F0000400200008A5900001B0000-000000067F0000400200008A5900001B4000__0000005D2FFFFB38\n000000067F0000400200008A5900001B0000-000000067F0000400200008A5900001B4000__00000073AD3FE6B8\n000000067F0000400200008A5900001B0000-000000067F0000400200008A5900001B4000__000000914E3F38F0\n000000067F0000400200008A5900001B0000-000000067F0000400200008A5900001B4000__000000931B33AE68\n000000067F0000400200008A5900001B0000-000000067F0000400200008A5900001B4000__000000931B9AFDF8\n000000067F0000400200008A5900001B4000-000000067F0000400200008A5900001B8000__0000001C725A2400\n000000067F0000400200008A5900001B4000-000000067F0000400200008A5900001B8000__0000001C760FA190\n000000067F0000400200008A5900001B4000-000000067F0000400200008A5900001B8000__00000038E67ABFA0\n000000067F0000400200008A5900001B4000-000000067F0000400200008A5900001B8000__0000003903F1CFE8\n000000067F0000400200008A5900001B4000-000000067F0000400200008A5900001B8000__0000003B99F7F8A0\n000000067F0000400200008A5900001B4000-000000067F0000400200008A5900001B8000__0000005D2FFFFB38\n000000067F0000400200008A5900001B4000-000000067F0000400200008A5900001B8000__00000073AD3FE6B8\n000000067F0000400200008A5900001B4000-000000067F0000400200008A5900001B8000__000000914E3F38F0\n000000067F0000400200008A5900001B4000-000000067F0000400200008A5900001B8000__000000931B33AE68\n000000067F0000400200008A5900001B4000-000000067F0000400200008A5900001B8000__000000931B9AFDF8\n000000067F0000400200008A5900001B7C62-000000067F0000400200008A5900001C0650__000000030FC9ED71-00000003AF79E5E9\n000000067F0000400200008A5900001B8000-000000067F0000400200008A5900001BC000__0000001C725A2400\n000000067F0000400200008A5900001B8000-000000067F0000400200008A5900001BC000__0000001C760FA190\n000000067F0000400200008A5900001B8000-000000067F0000400200008A5900001BC000__00000038E67ABFA0\n000000067F0000400200008A5900001B8000-000000067F0000400200008A5900001BC000__0000003903F1CFE8\n000000067F0000400200008A5900001B8000-000000067F0000400200008A5900001BC000__0000003B99F7F8A0\n000000067F0000400200008A5900001B8000-000000067F0000400200008A5900001BC000__0000005D2FFFFB38\n000000067F0000400200008A5900001B8000-000000067F0000400200008A5900001BC000__00000073AD3FE6B8\n000000067F0000400200008A5900001B8000-000000067F0000400200008A5900001BC000__000000914E3F38F0\n000000067F0000400200008A5900001B8000-000000067F0000400200008A5900001BC000__000000931B33AE68\n000000067F0000400200008A5900001B8000-000000067F0000400200008A5900001BC000__000000931B9AFDF8\n000000067F0000400200008A5900001BC000-000000067F0000400200008A5900001C0000__0000001C725A2400\n000000067F0000400200008A5900001BC000-000000067F0000400200008A5900001C0000__0000001C760FA190\n000000067F0000400200008A5900001BC000-000000067F0000400200008A5900001C0000__00000038E67ABFA0\n000000067F0000400200008A5900001BC000-000000067F0000400200008A5900001C0000__0000003903F1CFE8\n000000067F0000400200008A5900001BC000-000000067F0000400200008A5900001C0000__0000003B99F7F8A0\n000000067F0000400200008A5900001BC000-000000067F0000400200008A5900001C0000__0000005D2FFFFB38\n000000067F0000400200008A5900001BC000-000000067F0000400200008A5900001C0000__00000073AD3FE6B8\n000000067F0000400200008A5900001BC000-000000067F0000400200008A5900001C0000__000000914E3F38F0\n000000067F0000400200008A5900001BC000-000000067F0000400200008A5900001C0000__000000931B33AE68\n000000067F0000400200008A5900001BC000-000000067F0000400200008A5900001C0000__000000931B9AFDF8\n000000067F0000400200008A5900001C0000-000000067F0000400200008A5900001C4000__0000001C725A2400\n000000067F0000400200008A5900001C0000-000000067F0000400200008A5900001C4000__0000001C760FA190\n000000067F0000400200008A5900001C0000-000000067F0000400200008A5900001C4000__00000038E67ABFA0\n000000067F0000400200008A5900001C0000-000000067F0000400200008A5900001C4000__0000003903F1CFE8\n000000067F0000400200008A5900001C0000-000000067F0000400200008A5900001C4000__0000003B99F7F8A0\n000000067F0000400200008A5900001C0000-000000067F0000400200008A5900001C4000__0000005D2FFFFB38\n000000067F0000400200008A5900001C0000-000000067F0000400200008A5900001C4000__00000073AD3FE6B8\n000000067F0000400200008A5900001C0000-000000067F0000400200008A5900001C4000__000000914E3F38F0\n000000067F0000400200008A5900001C0000-000000067F0000400200008A5900001C4000__000000931B33AE68\n000000067F0000400200008A5900001C0000-000000067F0000400200008A5900001C4000__000000931B9AFDF8\n000000067F0000400200008A5900001C0650-000000067F0000400200008A5900001C9029__000000030FC9ED71-00000003AF79E5E9\n000000067F0000400200008A5900001C4000-000000067F0000400200008A5900001C8000__0000001C725A2400\n000000067F0000400200008A5900001C4000-000000067F0000400200008A5900001C8000__0000001C760FA190\n000000067F0000400200008A5900001C4000-000000067F0000400200008A5900001C8000__00000038E67ABFA0\n000000067F0000400200008A5900001C4000-000000067F0000400200008A5900001C8000__0000003903F1CFE8\n000000067F0000400200008A5900001C4000-000000067F0000400200008A5900001C8000__0000003B99F7F8A0\n000000067F0000400200008A5900001C4000-000000067F0000400200008A5900001C8000__0000005D2FFFFB38\n000000067F0000400200008A5900001C4000-000000067F0000400200008A5900001C8000__00000073AD3FE6B8\n000000067F0000400200008A5900001C4000-000000067F0000400200008A5900001C8000__000000914E3F38F0\n000000067F0000400200008A5900001C4000-000000067F0000400200008A5900001C8000__000000931B33AE68\n000000067F0000400200008A5900001C4000-000000067F0000400200008A5900001C8000__000000931B9AFDF8\n000000067F0000400200008A5900001C8000-000000067F0000400200008A5900001CC000__0000001C725A2400\n000000067F0000400200008A5900001C8000-000000067F0000400200008A5900001CC000__0000001C760FA190\n000000067F0000400200008A5900001C8000-000000067F0000400200008A5900001CC000__00000038E67ABFA0\n000000067F0000400200008A5900001C8000-000000067F0000400200008A5900001CC000__0000003903F1CFE8\n000000067F0000400200008A5900001C8000-000000067F0000400200008A5900001CC000__0000003B99F7F8A0\n000000067F0000400200008A5900001C8000-000000067F0000400200008A5900001CC000__0000005D2FFFFB38\n000000067F0000400200008A5900001C8000-000000067F0000400200008A5900001CC000__00000073AD3FE6B8\n000000067F0000400200008A5900001C8000-000000067F0000400200008A5900001CC000__000000914E3F38F0\n000000067F0000400200008A5900001C8000-000000067F0000400200008A5900001CC000__000000931B33AE68\n000000067F0000400200008A5900001C8000-000000067F0000400200008A5900001CC000__000000931B9AFDF8\n000000067F0000400200008A5900001C9029-000000067F0000400200008A5900001D19FA__000000030FC9ED71-00000003AF79E5E9\n000000067F0000400200008A5900001CC000-000000067F0000400200008A5900001D0000__0000001C725A2400\n000000067F0000400200008A5900001CC000-000000067F0000400200008A5900001D0000__0000001C760FA190\n000000067F0000400200008A5900001CC000-000000067F0000400200008A5900001D0000__00000038E67ABFA0\n000000067F0000400200008A5900001CC000-000000067F0000400200008A5900001D0000__0000003903F1CFE8\n000000067F0000400200008A5900001CC000-000000067F0000400200008A5900001D0000__0000003B99F7F8A0\n000000067F0000400200008A5900001CC000-000000067F0000400200008A5900001D0000__0000005D2FFFFB38\n000000067F0000400200008A5900001CC000-000000067F0000400200008A5900001D0000__00000073AD3FE6B8\n000000067F0000400200008A5900001CC000-000000067F0000400200008A5900001D0000__000000914E3F38F0\n000000067F0000400200008A5900001CC000-000000067F0000400200008A5900001D0000__000000931B33AE68\n000000067F0000400200008A5900001CC000-000000067F0000400200008A5900001D0000__000000931B9AFDF8\n000000067F0000400200008A5900001D0000-000000067F0000400200008A5900001D4000__0000001C725A2400\n000000067F0000400200008A5900001D0000-000000067F0000400200008A5900001D4000__0000001C760FA190\n000000067F0000400200008A5900001D0000-000000067F0000400200008A5900001D4000__00000038E67ABFA0\n000000067F0000400200008A5900001D0000-000000067F0000400200008A5900001D4000__0000003903F1CFE8\n000000067F0000400200008A5900001D0000-000000067F0000400200008A5900001D4000__0000003B99F7F8A0\n000000067F0000400200008A5900001D0000-000000067F0000400200008A5900001D4000__0000005D2FFFFB38\n000000067F0000400200008A5900001D0000-000000067F0000400200008A5900001D4000__00000073AD3FE6B8\n000000067F0000400200008A5900001D0000-000000067F0000400200008A5900001D4000__000000914E3F38F0\n000000067F0000400200008A5900001D0000-000000067F0000400200008A5900001D4000__000000931B33AE68\n000000067F0000400200008A5900001D0000-000000067F0000400200008A5900001D4000__000000931B9AFDF8\n000000067F0000400200008A5900001D19FA-000000067F0000400200008A5900001DA3D0__000000030FC9ED71-00000003AF79E5E9\n000000067F0000400200008A5900001D4000-000000067F0000400200008A5900001D8000__0000001C725A2400\n000000067F0000400200008A5900001D4000-000000067F0000400200008A5900001D8000__0000001C760FA190\n000000067F0000400200008A5900001D4000-000000067F0000400200008A5900001D8000__00000038E67ABFA0\n000000067F0000400200008A5900001D4000-000000067F0000400200008A5900001D8000__0000003903F1CFE8\n000000067F0000400200008A5900001D4000-000000067F0000400200008A5900001D8000__0000003B99F7F8A0\n000000067F0000400200008A5900001D4000-000000067F0000400200008A5900001D8000__0000005D2FFFFB38\n000000067F0000400200008A5900001D4000-000000067F0000400200008A5900001D8000__00000073AD3FE6B8\n000000067F0000400200008A5900001D4000-000000067F0000400200008A5900001D8000__000000914E3F38F0\n000000067F0000400200008A5900001D4000-000000067F0000400200008A5900001D8000__000000931B33AE68\n000000067F0000400200008A5900001D4000-000000067F0000400200008A5900001D8000__000000931B9AFDF8\n000000067F0000400200008A5900001D8000-000000067F0000400200008A5900001DC000__0000001C725A2400\n000000067F0000400200008A5900001D8000-000000067F0000400200008A5900001DC000__0000001C760FA190\n000000067F0000400200008A5900001D8000-000000067F0000400200008A5900001DC000__00000038E67ABFA0\n000000067F0000400200008A5900001D8000-000000067F0000400200008A5900001DC000__0000003903F1CFE8\n000000067F0000400200008A5900001D8000-000000067F0000400200008A5900001DC000__0000003B99F7F8A0\n000000067F0000400200008A5900001D8000-000000067F0000400200008A5900001DC000__0000005D2FFFFB38\n000000067F0000400200008A5900001D8000-000000067F0000400200008A5900001DC000__00000073AD3FE6B8\n000000067F0000400200008A5900001D8000-000000067F0000400200008A5900001DC000__000000914E3F38F0\n000000067F0000400200008A5900001D8000-000000067F0000400200008A5900001DC000__000000931B33AE68\n000000067F0000400200008A5900001D8000-000000067F0000400200008A5900001DC000__000000931B9AFDF8\n000000067F0000400200008A5900001DA3D0-000000067F0000400200008A5900001E2D99__000000030FC9ED71-00000003AF79E5E9\n000000067F0000400200008A5900001DC000-000000067F0000400200008A5900001E0000__0000001C725A2400\n000000067F0000400200008A5900001DC000-000000067F0000400200008A5900001E0000__0000001C760FA190\n000000067F0000400200008A5900001DC000-000000067F0000400200008A5900001E0000__00000038E67ABFA0\n000000067F0000400200008A5900001DC000-000000067F0000400200008A5900001E0000__0000003903F1CFE8\n000000067F0000400200008A5900001DC000-000000067F0000400200008A5900001E0000__0000003B99F7F8A0\n000000067F0000400200008A5900001DC000-000000067F0000400200008A5900001E0000__0000005D2FFFFB38\n000000067F0000400200008A5900001DC000-000000067F0000400200008A5900001E0000__00000073AD3FE6B8\n000000067F0000400200008A5900001DC000-000000067F0000400200008A5900001E0000__000000914E3F38F0\n000000067F0000400200008A5900001DC000-000000067F0000400200008A5900001E0000__000000931B33AE68\n000000067F0000400200008A5900001DC000-000000067F0000400200008A5900001E0000__000000931B9AFDF8\n000000067F0000400200008A5900001E0000-000000067F0000400200008A5900001E4000__0000001C725A2400\n000000067F0000400200008A5900001E0000-000000067F0000400200008A5900001E4000__0000001C760FA190\n000000067F0000400200008A5900001E0000-000000067F0000400200008A5900001E4000__00000038E67ABFA0\n000000067F0000400200008A5900001E0000-000000067F0000400200008A5900001E4000__0000003903F1CFE8\n000000067F0000400200008A5900001E0000-000000067F0000400200008A5900001E4000__0000003B99F7F8A0\n000000067F0000400200008A5900001E0000-000000067F0000400200008A5900001E4000__0000005D2FFFFB38\n000000067F0000400200008A5900001E0000-000000067F0000400200008A5900001E4000__00000073AD3FE6B8\n000000067F0000400200008A5900001E0000-000000067F0000400200008A5900001E4000__000000914E3F38F0\n000000067F0000400200008A5900001E0000-000000067F0000400200008A5900001E4000__000000931B33AE68\n000000067F0000400200008A5900001E0000-000000067F0000400200008A5900001E4000__000000931B9AFDF8\n000000067F0000400200008A5900001E2D99-000000067F0000400200008A5900001EB784__000000030FC9ED71-00000003AF79E5E9\n000000067F0000400200008A5900001E4000-000000067F0000400200008A5900001E8000__0000001C725A2400\n000000067F0000400200008A5900001E4000-000000067F0000400200008A5900001E8000__0000001C760FA190\n000000067F0000400200008A5900001E4000-000000067F0000400200008A5900001E8000__00000038E67ABFA0\n000000067F0000400200008A5900001E4000-000000067F0000400200008A5900001E8000__0000003903F1CFE8\n000000067F0000400200008A5900001E4000-000000067F0000400200008A5900001E8000__0000003B99F7F8A0\n000000067F0000400200008A5900001E4000-000000067F0000400200008A5900001E8000__0000005D2FFFFB38\n000000067F0000400200008A5900001E4000-000000067F0000400200008A5900001E8000__00000073AD3FE6B8\n000000067F0000400200008A5900001E4000-000000067F0000400200008A5900001E8000__000000914E3F38F0\n000000067F0000400200008A5900001E4000-000000067F0000400200008A5900001E8000__000000931B33AE68\n000000067F0000400200008A5900001E4000-000000067F0000400200008A5900001E8000__000000931B9AFDF8\n000000067F0000400200008A5900001E8000-000000067F0000400200008A5900001EC000__0000001C725A2400\n000000067F0000400200008A5900001E8000-000000067F0000400200008A5900001EC000__0000001C760FA190\n000000067F0000400200008A5900001E8000-000000067F0000400200008A5900001EC000__00000038E67ABFA0\n000000067F0000400200008A5900001E8000-000000067F0000400200008A5900001EC000__0000003903F1CFE8\n000000067F0000400200008A5900001E8000-000000067F0000400200008A5900001EC000__0000003B99F7F8A0\n000000067F0000400200008A5900001E8000-000000067F0000400200008A5900001EC000__0000005D2FFFFB38\n000000067F0000400200008A5900001E8000-000000067F0000400200008A5900001EC000__00000073AD3FE6B8\n000000067F0000400200008A5900001E8000-000000067F0000400200008A5900001EC000__000000914E3F38F0\n000000067F0000400200008A5900001E8000-000000067F0000400200008A5900001EC000__000000931B33AE68\n000000067F0000400200008A5900001E8000-000000067F0000400200008A5900001EC000__000000931B9AFDF8\n000000067F0000400200008A5900001EB784-000000067F0000400200008A5900001F4172__000000030FC9ED71-00000003AF79E5E9\n000000067F0000400200008A5900001EC000-000000067F0000400200008A5900001F0000__0000001C725A2400\n000000067F0000400200008A5900001EC000-000000067F0000400200008A5900001F0000__0000001C760FA190\n000000067F0000400200008A5900001EC000-000000067F0000400200008A5900001F0000__00000038E67ABFA0\n000000067F0000400200008A5900001EC000-000000067F0000400200008A5900001F0000__0000003903F1CFE8\n000000067F0000400200008A5900001EC000-000000067F0000400200008A5900001F0000__0000003B99F7F8A0\n000000067F0000400200008A5900001EC000-000000067F0000400200008A5900001F0000__0000005D2FFFFB38\n000000067F0000400200008A5900001EC000-000000067F0000400200008A5900001F0000__00000073AD3FE6B8\n000000067F0000400200008A5900001EC000-000000067F0000400200008A5900001F0000__000000914E3F38F0\n000000067F0000400200008A5900001EC000-000000067F0000400200008A5900001F0000__000000931B33AE68\n000000067F0000400200008A5900001EC000-000000067F0000400200008A5900001F0000__000000931B9AFDF8\n000000067F0000400200008A5900001F0000-000000067F0000400200008A5900001F4000__0000001C725A2400\n000000067F0000400200008A5900001F0000-000000067F0000400200008A5900001F4000__0000001C760FA190\n000000067F0000400200008A5900001F0000-000000067F0000400200008A5900001F4000__00000038E67ABFA0\n000000067F0000400200008A5900001F0000-000000067F0000400200008A5900001F4000__0000003903F1CFE8\n000000067F0000400200008A5900001F0000-000000067F0000400200008A5900001F4000__0000003B99F7F8A0\n000000067F0000400200008A5900001F0000-000000067F0000400200008A5900001F4000__0000005D2FFFFB38\n000000067F0000400200008A5900001F0000-000000067F0000400200008A5900001F4000__00000073AD3FE6B8\n000000067F0000400200008A5900001F0000-000000067F0000400200008A5900001F4000__000000914E3F38F0\n000000067F0000400200008A5900001F0000-000000067F0000400200008A5900001F4000__000000931B33AE68\n000000067F0000400200008A5900001F0000-000000067F0000400200008A5900001F4000__000000931B9AFDF8\n000000067F0000400200008A5900001F4000-000000067F0000400200008A5900001F8000__0000001C725A2400\n000000067F0000400200008A5900001F4000-000000067F0000400200008A5900001F8000__0000001C760FA190\n000000067F0000400200008A5900001F4000-000000067F0000400200008A5900001F8000__00000038E67ABFA0\n000000067F0000400200008A5900001F4000-000000067F0000400200008A5900001F8000__0000003903F1CFE8\n000000067F0000400200008A5900001F4000-000000067F0000400200008A5900001F8000__0000003B99F7F8A0\n000000067F0000400200008A5900001F4000-000000067F0000400200008A5900001F8000__0000005D2FFFFB38\n000000067F0000400200008A5900001F4000-000000067F0000400200008A5900001F8000__00000073AD3FE6B8\n000000067F0000400200008A5900001F4000-000000067F0000400200008A5900001F8000__000000914E3F38F0\n000000067F0000400200008A5900001F4000-000000067F0000400200008A5900001F8000__000000931B33AE68\n000000067F0000400200008A5900001F4000-000000067F0000400200008A5900001F8000__000000931B9AFDF8\n000000067F0000400200008A5900001F4172-000000067F0000400200008A5900001FCB6A__000000030FC9ED71-00000003AF79E5E9\n000000067F0000400200008A5900001F8000-000000067F0000400200008A5900001FC000__0000001C725A2400\n000000067F0000400200008A5900001F8000-000000067F0000400200008A5900001FC000__0000001C760FA190\n000000067F0000400200008A5900001F8000-000000067F0000400200008A5900001FC000__00000038E67ABFA0\n000000067F0000400200008A5900001F8000-000000067F0000400200008A5900001FC000__0000003903F1CFE8\n000000067F0000400200008A5900001F8000-000000067F0000400200008A5900001FC000__0000003B99F7F8A0\n000000067F0000400200008A5900001F8000-000000067F0000400200008A5900001FC000__0000005D2FFFFB38\n000000067F0000400200008A5900001F8000-000000067F0000400200008A5900001FC000__00000073AD3FE6B8\n000000067F0000400200008A5900001F8000-000000067F0000400200008A5900001FC000__000000914E3F38F0\n000000067F0000400200008A5900001F8000-000000067F0000400200008A5900001FC000__000000931B33AE68\n000000067F0000400200008A5900001F8000-000000067F0000400200008A5900001FC000__000000931B9AFDF8\n000000067F0000400200008A5900001FC000-000000067F0000400200008A590000200000__0000000478F987C0\n000000067F0000400200008A5900001FC000-000000067F0000400200008A590000200000__0000001C760FA190\n000000067F0000400200008A5900001FC000-000000067F0000400200008A590000200000__00000038E67ABFA0\n000000067F0000400200008A5900001FC000-000000067F0000400200008A590000200000__0000003903F1CFE8\n000000067F0000400200008A5900001FC000-000000067F0000400200008A590000200000__0000003B99F7F8A0\n000000067F0000400200008A5900001FC000-000000067F0000400200008A590000200000__0000005D2FFFFB38\n000000067F0000400200008A5900001FC000-000000067F0000400200008A590000200000__00000073AD3FE6B8\n000000067F0000400200008A5900001FC000-000000067F0000400200008A590000200000__000000914E3F38F0\n000000067F0000400200008A5900001FC000-000000067F0000400200008A590000200000__000000931B33AE68\n000000067F0000400200008A5900001FC000-000000067F0000400200008A590000200000__000000931B9AFDF8\n000000067F0000400200008A5900001FCB6A-000000067F0000400200008A590100000000__000000030FC9ED71-00000003AF79E5E9\n000000067F0000400200008A5900001FCE37-000000067F0000400200008A59000020580F__00000003AF79E5E9-000000044F29F379\n000000067F0000400200008A590000200000-000000067F0000400200008A590000204000__0000000478F987C0\n000000067F0000400200008A590000200000-000000067F0000400200008A590000204000__0000001C760FA190\n000000067F0000400200008A590000200000-000000067F0000400200008A590000204000__00000038E67ABFA0\n000000067F0000400200008A590000200000-000000067F0000400200008A590000204000__0000003903F1CFE8\n000000067F0000400200008A590000200000-000000067F0000400200008A590000204000__0000003B99F7F8A0\n000000067F0000400200008A590000200000-000000067F0000400200008A590000204000__0000005D2FFFFB38\n000000067F0000400200008A590000200000-000000067F0000400200008A590000204000__00000073AD3FE6B8\n000000067F0000400200008A590000200000-000000067F0000400200008A590000204000__000000914E3F38F0\n000000067F0000400200008A590000200000-000000067F0000400200008A590000204000__000000931B33AE68\n000000067F0000400200008A590000200000-000000067F0000400200008A590000204000__000000931B9AFDF8\n000000067F0000400200008A590000204000-000000067F0000400200008A590000208000__0000000478F987C0\n000000067F0000400200008A590000204000-000000067F0000400200008A590000208000__0000001C760FA190\n000000067F0000400200008A590000204000-000000067F0000400200008A590000208000__00000038E67ABFA0\n000000067F0000400200008A590000204000-000000067F0000400200008A590000208000__0000003903F1CFE8\n000000067F0000400200008A590000204000-000000067F0000400200008A590000208000__0000003B99F7F8A0\n000000067F0000400200008A590000204000-000000067F0000400200008A590000208000__0000005D2FFFFB38\n000000067F0000400200008A590000204000-000000067F0000400200008A590000208000__00000073AD3FE6B8\n000000067F0000400200008A590000204000-000000067F0000400200008A590000208000__000000914E3F38F0\n000000067F0000400200008A590000204000-000000067F0000400200008A590000208000__000000931B33AE68\n000000067F0000400200008A590000204000-000000067F0000400200008A590000208000__000000931B9AFDF8\n000000067F0000400200008A59000020580F-000000067F0000400200008A59000020E1DF__00000003AF79E5E9-000000044F29F379\n000000067F0000400200008A590000208000-000000067F0000400200008A59000020C000__0000000478F987C0\n000000067F0000400200008A590000208000-000000067F0000400200008A59000020C000__0000001C760FA190\n000000067F0000400200008A590000208000-000000067F0000400200008A59000020C000__00000038E67ABFA0\n000000067F0000400200008A590000208000-000000067F0000400200008A59000020C000__0000003903F1CFE8\n000000067F0000400200008A590000208000-000000067F0000400200008A59000020C000__0000003B99F7F8A0\n000000067F0000400200008A590000208000-000000067F0000400200008A59000020C000__0000005D2FFFFB38\n000000067F0000400200008A590000208000-000000067F0000400200008A59000020C000__00000073AD3FE6B8\n000000067F0000400200008A590000208000-000000067F0000400200008A59000020C000__000000914E3F38F0\n000000067F0000400200008A590000208000-000000067F0000400200008A59000020C000__000000931B33AE68\n000000067F0000400200008A590000208000-000000067F0000400200008A59000020C000__000000931B9AFDF8\n000000067F0000400200008A59000020C000-000000067F0000400200008A590000210000__0000000478F987C0\n000000067F0000400200008A59000020C000-000000067F0000400200008A590000210000__0000001C760FA190\n000000067F0000400200008A59000020C000-000000067F0000400200008A590000210000__00000038E67ABFA0\n000000067F0000400200008A59000020C000-000000067F0000400200008A590000210000__0000003903F1CFE8\n000000067F0000400200008A59000020C000-000000067F0000400200008A590000210000__0000003B99F7F8A0\n000000067F0000400200008A59000020C000-000000067F0000400200008A590000210000__0000005D2FFFFB38\n000000067F0000400200008A59000020C000-000000067F0000400200008A590000210000__00000073AD3FE6B8\n000000067F0000400200008A59000020C000-000000067F0000400200008A590000210000__000000914E3F38F0\n000000067F0000400200008A59000020C000-000000067F0000400200008A590000210000__000000931B33AE68\n000000067F0000400200008A59000020C000-000000067F0000400200008A590000210000__000000931B9AFDF8\n000000067F0000400200008A59000020E1DF-000000067F0000400200008A590000216BBF__00000003AF79E5E9-000000044F29F379\n000000067F0000400200008A590000210000-000000067F0000400200008A590000214000__0000000478F987C0\n000000067F0000400200008A590000210000-000000067F0000400200008A590000214000__0000001C760FA190\n000000067F0000400200008A590000210000-000000067F0000400200008A590000214000__00000038E67ABFA0\n000000067F0000400200008A590000210000-000000067F0000400200008A590000214000__0000003903F1CFE8\n000000067F0000400200008A590000210000-000000067F0000400200008A590000214000__0000003B99F7F8A0\n000000067F0000400200008A590000210000-000000067F0000400200008A590000214000__0000005D2FFFFB38\n000000067F0000400200008A590000210000-000000067F0000400200008A590000214000__00000073AD3FE6B8\n000000067F0000400200008A590000210000-000000067F0000400200008A590000214000__000000914E3F38F0\n000000067F0000400200008A590000210000-000000067F0000400200008A590000214000__000000931B33AE68\n000000067F0000400200008A590000210000-000000067F0000400200008A590000214000__000000931B9AFDF8\n000000067F0000400200008A590000214000-000000067F0000400200008A590000218000__0000000478F987C0\n000000067F0000400200008A590000214000-000000067F0000400200008A590000218000__0000001C760FA190\n000000067F0000400200008A590000214000-000000067F0000400200008A590000218000__00000038E67ABFA0\n000000067F0000400200008A590000214000-000000067F0000400200008A590000218000__0000003903F1CFE8\n000000067F0000400200008A590000214000-000000067F0000400200008A590000218000__0000003B99F7F8A0\n000000067F0000400200008A590000214000-000000067F0000400200008A590000218000__0000005D2FFFFB38\n000000067F0000400200008A590000214000-000000067F0000400200008A590000218000__00000073AD3FE6B8\n000000067F0000400200008A590000214000-000000067F0000400200008A590000218000__000000914E3F38F0\n000000067F0000400200008A590000214000-000000067F0000400200008A590000218000__000000931B33AE68\n000000067F0000400200008A590000214000-000000067F0000400200008A590000218000__000000931B9AFDF8\n000000067F0000400200008A590000216BBF-000000067F0000400200008A59000021F588__00000003AF79E5E9-000000044F29F379\n000000067F0000400200008A590000218000-000000067F0000400200008A59000021C000__0000000478F987C0\n000000067F0000400200008A590000218000-000000067F0000400200008A59000021C000__0000001C760FA190\n000000067F0000400200008A590000218000-000000067F0000400200008A59000021C000__00000038E67ABFA0\n000000067F0000400200008A590000218000-000000067F0000400200008A59000021C000__0000003903F1CFE8\n000000067F0000400200008A590000218000-000000067F0000400200008A59000021C000__0000003B99F7F8A0\n000000067F0000400200008A590000218000-000000067F0000400200008A59000021C000__0000005D2FFFFB38\n000000067F0000400200008A590000218000-000000067F0000400200008A59000021C000__00000073AD3FE6B8\n000000067F0000400200008A590000218000-000000067F0000400200008A59000021C000__000000914E3F38F0\n000000067F0000400200008A590000218000-000000067F0000400200008A59000021C000__000000931B33AE68\n000000067F0000400200008A590000218000-000000067F0000400200008A59000021C000__000000931B9AFDF8\n000000067F0000400200008A59000021C000-000000067F0000400200008A590000220000__0000000478F987C0\n000000067F0000400200008A59000021C000-000000067F0000400200008A590000220000__0000001C760FA190\n000000067F0000400200008A59000021C000-000000067F0000400200008A590000220000__00000038E67ABFA0\n000000067F0000400200008A59000021C000-000000067F0000400200008A590000220000__0000003903F1CFE8\n000000067F0000400200008A59000021C000-000000067F0000400200008A590000220000__0000003B99F7F8A0\n000000067F0000400200008A59000021C000-000000067F0000400200008A590000220000__0000005D2FFFFB38\n000000067F0000400200008A59000021C000-000000067F0000400200008A590000220000__00000073AD3FE6B8\n000000067F0000400200008A59000021C000-000000067F0000400200008A590000220000__000000914E3F38F0\n000000067F0000400200008A59000021C000-000000067F0000400200008A590000220000__000000931B33AE68\n000000067F0000400200008A59000021C000-000000067F0000400200008A590000220000__000000931B9AFDF8\n000000067F0000400200008A59000021F588-000000067F0000400200008A590000227F75__00000003AF79E5E9-000000044F29F379\n000000067F0000400200008A590000220000-000000067F0000400200008A590000224000__0000000478F987C0\n000000067F0000400200008A590000220000-000000067F0000400200008A590000224000__0000001C760FA190\n000000067F0000400200008A590000220000-000000067F0000400200008A590000224000__00000038E67ABFA0\n000000067F0000400200008A590000220000-000000067F0000400200008A590000224000__0000003903F1CFE8\n000000067F0000400200008A590000220000-000000067F0000400200008A590000224000__0000003B99F7F8A0\n000000067F0000400200008A590000220000-000000067F0000400200008A590000224000__0000005D2FFFFB38\n000000067F0000400200008A590000220000-000000067F0000400200008A590000224000__00000073AD3FE6B8\n000000067F0000400200008A590000220000-000000067F0000400200008A590000224000__000000914E3F38F0\n000000067F0000400200008A590000220000-000000067F0000400200008A590000224000__000000931B33AE68\n000000067F0000400200008A590000220000-000000067F0000400200008A590000224000__000000931B9AFDF8\n000000067F0000400200008A590000224000-000000067F0000400200008A590000228000__0000000478F987C0\n000000067F0000400200008A590000224000-000000067F0000400200008A590000228000__0000001C760FA190\n000000067F0000400200008A590000224000-000000067F0000400200008A590000228000__00000038E67ABFA0\n000000067F0000400200008A590000224000-000000067F0000400200008A590000228000__0000003903F1CFE8\n000000067F0000400200008A590000224000-000000067F0000400200008A590000228000__0000003B99F7F8A0\n000000067F0000400200008A590000224000-000000067F0000400200008A590000228000__0000005D2FFFFB38\n000000067F0000400200008A590000224000-000000067F0000400200008A590000228000__00000073AD3FE6B8\n000000067F0000400200008A590000224000-000000067F0000400200008A590000228000__000000914E3F38F0\n000000067F0000400200008A590000224000-000000067F0000400200008A590000228000__000000931B33AE68\n000000067F0000400200008A590000224000-000000067F0000400200008A590000228000__000000931B9AFDF8\n000000067F0000400200008A590000227F75-000000067F0000400200008A590000230964__00000003AF79E5E9-000000044F29F379\n000000067F0000400200008A590000228000-000000067F0000400200008A59000022C000__0000000478F987C0\n000000067F0000400200008A590000228000-000000067F0000400200008A59000022C000__0000001C760FA190\n000000067F0000400200008A590000228000-000000067F0000400200008A59000022C000__00000038E67ABFA0\n000000067F0000400200008A590000228000-000000067F0000400200008A59000022C000__0000003903F1CFE8\n000000067F0000400200008A590000228000-000000067F0000400200008A59000022C000__0000003B99F7F8A0\n000000067F0000400200008A590000228000-000000067F0000400200008A59000022C000__0000005D2FFFFB38\n000000067F0000400200008A590000228000-000000067F0000400200008A59000022C000__00000073AD3FE6B8\n000000067F0000400200008A590000228000-000000067F0000400200008A59000022C000__000000914E3F38F0\n000000067F0000400200008A590000228000-000000067F0000400200008A59000022C000__000000931B33AE68\n000000067F0000400200008A590000228000-000000067F0000400200008A59000022C000__000000931B9AFDF8\n000000067F0000400200008A59000022C000-000000067F0000400200008A590000230000__0000000478F987C0\n000000067F0000400200008A59000022C000-000000067F0000400200008A590000230000__0000001C760FA190\n000000067F0000400200008A59000022C000-000000067F0000400200008A590000230000__00000038E67ABFA0\n000000067F0000400200008A59000022C000-000000067F0000400200008A590000230000__0000003903F1CFE8\n000000067F0000400200008A59000022C000-000000067F0000400200008A590000230000__0000003B99F7F8A0\n000000067F0000400200008A59000022C000-000000067F0000400200008A590000230000__0000005D2FFFFB38\n000000067F0000400200008A59000022C000-000000067F0000400200008A590000230000__00000073AD3FE6B8\n000000067F0000400200008A59000022C000-000000067F0000400200008A590000230000__000000914E3F38F0\n000000067F0000400200008A59000022C000-000000067F0000400200008A590000230000__000000931B33AE68\n000000067F0000400200008A59000022C000-000000067F0000400200008A590000230000__000000931B9AFDF8\n000000067F0000400200008A590000230000-000000067F0000400200008A590000234000__0000000478F987C0\n000000067F0000400200008A590000230000-000000067F0000400200008A590000234000__0000001C760FA190\n000000067F0000400200008A590000230000-000000067F0000400200008A590000234000__00000038E67ABFA0\n000000067F0000400200008A590000230000-000000067F0000400200008A590000234000__0000003903F1CFE8\n000000067F0000400200008A590000230000-000000067F0000400200008A590000234000__0000003B99F7F8A0\n000000067F0000400200008A590000230000-000000067F0000400200008A590000234000__0000005D2FFFFB38\n000000067F0000400200008A590000230000-000000067F0000400200008A590000234000__00000073AD3FE6B8\n000000067F0000400200008A590000230000-000000067F0000400200008A590000234000__000000914E3F38F0\n000000067F0000400200008A590000230000-000000067F0000400200008A590000234000__000000931B33AE68\n000000067F0000400200008A590000230000-000000067F0000400200008A590000234000__000000931B9AFDF8\n000000067F0000400200008A590000230964-000000067F0000400200008A590000239354__00000003AF79E5E9-000000044F29F379\n000000067F0000400200008A590000234000-000000067F0000400200008A590000238000__0000000478F987C0\n000000067F0000400200008A590000234000-000000067F0000400200008A590000238000__0000001C760FA190\n000000067F0000400200008A590000234000-000000067F0000400200008A590000238000__00000038E67ABFA0\n000000067F0000400200008A590000234000-000000067F0000400200008A590000238000__0000003903F1CFE8\n000000067F0000400200008A590000234000-000000067F0000400200008A590000238000__0000003B99F7F8A0\n000000067F0000400200008A590000234000-000000067F0000400200008A590000238000__0000005D2FFFFB38\n000000067F0000400200008A590000234000-000000067F0000400200008A590000238000__00000073AD3FE6B8\n000000067F0000400200008A590000234000-000000067F0000400200008A590000238000__000000914E3F38F0\n000000067F0000400200008A590000234000-000000067F0000400200008A590000238000__000000931B33AE68\n000000067F0000400200008A590000234000-000000067F0000400200008A590000238000__000000931B9AFDF8\n000000067F0000400200008A590000238000-000000067F0000400200008A59000023C000__0000000478F987C0\n000000067F0000400200008A590000238000-000000067F0000400200008A59000023C000__0000001C760FA190\n000000067F0000400200008A590000238000-000000067F0000400200008A59000023C000__00000038E67ABFA0\n000000067F0000400200008A590000238000-000000067F0000400200008A59000023C000__0000003903F1CFE8\n000000067F0000400200008A590000238000-000000067F0000400200008A59000023C000__0000003B99F7F8A0\n000000067F0000400200008A590000238000-000000067F0000400200008A59000023C000__0000005D2FFFFB38\n000000067F0000400200008A590000238000-000000067F0000400200008A59000023C000__00000073AD3FE6B8\n000000067F0000400200008A590000238000-000000067F0000400200008A59000023C000__000000914E3F38F0\n000000067F0000400200008A590000238000-000000067F0000400200008A59000023C000__000000931B33AE68\n000000067F0000400200008A590000238000-000000067F0000400200008A59000023C000__000000931B9AFDF8\n000000067F0000400200008A590000239354-000000067F0000400200008A590000241D2A__00000003AF79E5E9-000000044F29F379\n000000067F0000400200008A59000023C000-000000067F0000400200008A590000240000__0000000478F987C0\n000000067F0000400200008A59000023C000-000000067F0000400200008A590000240000__0000001C760FA190\n000000067F0000400200008A59000023C000-000000067F0000400200008A590000240000__00000038E67ABFA0\n000000067F0000400200008A59000023C000-000000067F0000400200008A590000240000__0000003903F1CFE8\n000000067F0000400200008A59000023C000-000000067F0000400200008A590000240000__0000003B99F7F8A0\n000000067F0000400200008A59000023C000-000000067F0000400200008A590000240000__0000005D2FFFFB38\n000000067F0000400200008A59000023C000-000000067F0000400200008A590000240000__00000073AD3FE6B8\n000000067F0000400200008A59000023C000-000000067F0000400200008A590000240000__000000914E3F38F0\n000000067F0000400200008A59000023C000-000000067F0000400200008A590000240000__000000931B33AE68\n000000067F0000400200008A59000023C000-000000067F0000400200008A590000240000__000000931B9AFDF8\n000000067F0000400200008A590000240000-000000067F0000400200008A590000244000__0000000478F987C0\n000000067F0000400200008A590000240000-000000067F0000400200008A590000244000__0000001C760FA190\n000000067F0000400200008A590000240000-000000067F0000400200008A590000244000__00000038E67ABFA0\n000000067F0000400200008A590000240000-000000067F0000400200008A590000244000__0000003903F1CFE8\n000000067F0000400200008A590000240000-000000067F0000400200008A590000244000__0000003B99F7F8A0\n000000067F0000400200008A590000240000-000000067F0000400200008A590000244000__0000005D2FFFFB38\n000000067F0000400200008A590000240000-000000067F0000400200008A590000244000__00000073AD3FE6B8\n000000067F0000400200008A590000240000-000000067F0000400200008A590000244000__000000914E3F38F0\n000000067F0000400200008A590000240000-000000067F0000400200008A590000244000__000000931B33AE68\n000000067F0000400200008A590000240000-000000067F0000400200008A590000244000__000000931B9AFDF8\n000000067F0000400200008A590000241D2A-000000067F0000400200008A59000024A6FE__00000003AF79E5E9-000000044F29F379\n000000067F0000400200008A590000244000-000000067F0000400200008A590000248000__0000000478F987C0\n000000067F0000400200008A590000244000-000000067F0000400200008A590000248000__0000001C760FA190\n000000067F0000400200008A590000244000-000000067F0000400200008A590000248000__00000038E67ABFA0\n000000067F0000400200008A590000244000-000000067F0000400200008A590000248000__0000003903F1CFE8\n000000067F0000400200008A590000244000-000000067F0000400200008A590000248000__0000003B99F7F8A0\n000000067F0000400200008A590000244000-000000067F0000400200008A590000248000__0000005D2FFFFB38\n000000067F0000400200008A590000244000-000000067F0000400200008A590000248000__00000073AD3FE6B8\n000000067F0000400200008A590000244000-000000067F0000400200008A590000248000__000000914E3F38F0\n000000067F0000400200008A590000244000-000000067F0000400200008A590000248000__000000931B33AE68\n000000067F0000400200008A590000244000-000000067F0000400200008A590000248000__000000931B9AFDF8\n000000067F0000400200008A590000248000-000000067F0000400200008A59000024C000__0000000478F987C0\n000000067F0000400200008A590000248000-000000067F0000400200008A59000024C000__0000001C760FA190\n000000067F0000400200008A590000248000-000000067F0000400200008A59000024C000__00000038E67ABFA0\n000000067F0000400200008A590000248000-000000067F0000400200008A59000024C000__0000003903F1CFE8\n000000067F0000400200008A590000248000-000000067F0000400200008A59000024C000__0000003B99F7F8A0\n000000067F0000400200008A590000248000-000000067F0000400200008A59000024C000__0000005D2FFFFB38\n000000067F0000400200008A590000248000-000000067F0000400200008A59000024C000__00000073AD3FE6B8\n000000067F0000400200008A590000248000-000000067F0000400200008A59000024C000__000000914E3F38F0\n000000067F0000400200008A590000248000-000000067F0000400200008A59000024C000__000000931B33AE68\n000000067F0000400200008A590000248000-000000067F0000400200008A59000024C000__000000931B9AFDF8\n000000067F0000400200008A59000024A6FE-000000067F0000400200008A5900002530C1__00000003AF79E5E9-000000044F29F379\n000000067F0000400200008A59000024C000-000000067F0000400200008A590000250000__0000000478F987C0\n000000067F0000400200008A59000024C000-000000067F0000400200008A590000250000__0000001C760FA190\n000000067F0000400200008A59000024C000-000000067F0000400200008A590000250000__00000038E67ABFA0\n000000067F0000400200008A59000024C000-000000067F0000400200008A590000250000__0000003903F1CFE8\n000000067F0000400200008A59000024C000-000000067F0000400200008A590000250000__0000003B99F7F8A0\n000000067F0000400200008A59000024C000-000000067F0000400200008A590000250000__0000005D2FFFFB38\n000000067F0000400200008A59000024C000-000000067F0000400200008A590000250000__00000073AD3FE6B8\n000000067F0000400200008A59000024C000-000000067F0000400200008A590000250000__000000914E3F38F0\n000000067F0000400200008A59000024C000-000000067F0000400200008A590000250000__000000931B33AE68\n000000067F0000400200008A59000024C000-000000067F0000400200008A590000250000__000000931B9AFDF8\n000000067F0000400200008A590000250000-000000067F0000400200008A590000254000__0000000478F987C0\n000000067F0000400200008A590000250000-000000067F0000400200008A590000254000__0000001C760FA190\n000000067F0000400200008A590000250000-000000067F0000400200008A590000254000__00000038E67ABFA0\n000000067F0000400200008A590000250000-000000067F0000400200008A590000254000__0000003903F1CFE8\n000000067F0000400200008A590000250000-000000067F0000400200008A590000254000__0000003B99F7F8A0\n000000067F0000400200008A590000250000-000000067F0000400200008A590000254000__0000005D2FFFFB38\n000000067F0000400200008A590000250000-000000067F0000400200008A590000254000__00000073AD3FE6B8\n000000067F0000400200008A590000250000-000000067F0000400200008A590000254000__000000914E3F38F0\n000000067F0000400200008A590000250000-000000067F0000400200008A590000254000__000000931B33AE68\n000000067F0000400200008A590000250000-000000067F0000400200008A590000254000__000000931B9AFDF8\n000000067F0000400200008A5900002530C1-000000067F0000400200008A590100000000__00000003AF79E5E9-000000044F29F379\n000000067F0000400200008A59000025338B-000000067F0000400200008A59000025BD50__000000044F29F379-00000004FED1E2E1\n000000067F0000400200008A590000254000-000000067F0000400200008A590000258000__0000000478F987C0\n000000067F0000400200008A590000254000-000000067F0000400200008A590000258000__0000001C760FA190\n000000067F0000400200008A590000254000-000000067F0000400200008A590000258000__00000038E67ABFA0\n000000067F0000400200008A590000254000-000000067F0000400200008A590000258000__0000003903F1CFE8\n000000067F0000400200008A590000254000-000000067F0000400200008A590000258000__0000003B99F7F8A0\n000000067F0000400200008A590000254000-000000067F0000400200008A590000258000__0000005D2FFFFB38\n000000067F0000400200008A590000254000-000000067F0000400200008A590000258000__00000073AD3FE6B8\n000000067F0000400200008A590000254000-000000067F0000400200008A590000258000__000000914E3F38F0\n000000067F0000400200008A590000254000-000000067F0000400200008A590000258000__000000931B33AE68\n000000067F0000400200008A590000254000-000000067F0000400200008A590000258000__000000931B9AFDF8\n000000067F0000400200008A590000258000-000000067F0000400200008A59000025C000__0000000478F987C0\n000000067F0000400200008A590000258000-000000067F0000400200008A59000025C000__0000001C760FA190\n000000067F0000400200008A590000258000-000000067F0000400200008A59000025C000__00000038E67ABFA0\n000000067F0000400200008A590000258000-000000067F0000400200008A59000025C000__0000003903F1CFE8\n000000067F0000400200008A590000258000-000000067F0000400200008A59000025C000__0000003B99F7F8A0\n000000067F0000400200008A590000258000-000000067F0000400200008A59000025C000__0000005D2FFFFB38\n000000067F0000400200008A590000258000-000000067F0000400200008A59000025C000__00000073AD3FE6B8\n000000067F0000400200008A590000258000-000000067F0000400200008A59000025C000__000000914E3F38F0\n000000067F0000400200008A590000258000-000000067F0000400200008A59000025C000__000000931B33AE68\n000000067F0000400200008A590000258000-000000067F0000400200008A59000025C000__000000931B9AFDF8\n000000067F0000400200008A59000025BD50-000000067F0000400200008A59000026473E__000000044F29F379-00000004FED1E2E1\n000000067F0000400200008A59000025C000-000000067F0000400200008A590000260000__0000000478F987C0\n000000067F0000400200008A59000025C000-000000067F0000400200008A590000260000__0000001C760FA190\n000000067F0000400200008A59000025C000-000000067F0000400200008A590000260000__00000038E67ABFA0\n000000067F0000400200008A59000025C000-000000067F0000400200008A590000260000__0000003903F1CFE8\n000000067F0000400200008A59000025C000-000000067F0000400200008A590000260000__0000003B99F7F8A0\n000000067F0000400200008A59000025C000-000000067F0000400200008A590000260000__0000005D2FFFFB38\n000000067F0000400200008A59000025C000-000000067F0000400200008A590000260000__00000073AD3FE6B8\n000000067F0000400200008A59000025C000-000000067F0000400200008A590000260000__000000914E3F38F0\n000000067F0000400200008A59000025C000-000000067F0000400200008A590000260000__000000931B33AE68\n000000067F0000400200008A59000025C000-000000067F0000400200008A590000260000__000000931B9AFDF8\n000000067F0000400200008A590000260000-000000067F0000400200008A590000264000__0000000478F987C0\n000000067F0000400200008A590000260000-000000067F0000400200008A590000264000__0000001C760FA190\n000000067F0000400200008A590000260000-000000067F0000400200008A590000264000__00000038E67ABFA0\n000000067F0000400200008A590000260000-000000067F0000400200008A590000264000__0000003903F1CFE8\n000000067F0000400200008A590000260000-000000067F0000400200008A590000264000__0000003B99F7F8A0\n000000067F0000400200008A590000260000-000000067F0000400200008A590000264000__0000005D2FFFFB38\n000000067F0000400200008A590000260000-000000067F0000400200008A590000264000__00000073AD3FE6B8\n000000067F0000400200008A590000260000-000000067F0000400200008A590000264000__000000914E3F38F0\n000000067F0000400200008A590000260000-000000067F0000400200008A590000264000__000000931B33AE68\n000000067F0000400200008A590000260000-000000067F0000400200008A590000264000__000000931B9AFDF8\n000000067F0000400200008A590000264000-000000067F0000400200008A590000268000__0000000478F987C0\n000000067F0000400200008A590000264000-000000067F0000400200008A590000268000__0000001C760FA190\n000000067F0000400200008A590000264000-000000067F0000400200008A590000268000__00000038E67ABFA0\n000000067F0000400200008A590000264000-000000067F0000400200008A590000268000__0000003903F1CFE8\n000000067F0000400200008A590000264000-000000067F0000400200008A590000268000__0000003B99F7F8A0\n000000067F0000400200008A590000264000-000000067F0000400200008A590000268000__0000005D2FFFFB38\n000000067F0000400200008A590000264000-000000067F0000400200008A590000268000__00000073AD3FE6B8\n000000067F0000400200008A590000264000-000000067F0000400200008A590000268000__000000914E3F38F0\n000000067F0000400200008A590000264000-000000067F0000400200008A590000268000__000000931B33AE68\n000000067F0000400200008A590000264000-000000067F0000400200008A590000268000__000000931B9AFDF8\n000000067F0000400200008A59000026473E-000000067F0000400200008A59000026D126__000000044F29F379-00000004FED1E2E1\n000000067F0000400200008A590000268000-000000067F0000400200008A59000026C000__0000001C760FA190\n000000067F0000400200008A590000268000-000000067F0000400200008A59000026C000__00000038E67ABFA0\n000000067F0000400200008A590000268000-000000067F0000400200008A59000026C000__0000003903F1CFE8\n000000067F0000400200008A590000268000-000000067F0000400200008A59000026C000__0000003B99F7F8A0\n000000067F0000400200008A590000268000-000000067F0000400200008A59000026C000__0000005D2FFFFB38\n000000067F0000400200008A590000268000-000000067F0000400200008A59000026C000__00000073AD3FE6B8\n000000067F0000400200008A590000268000-000000067F0000400200008A59000026C000__000000914E3F38F0\n000000067F0000400200008A590000268000-000000067F0000400200008A59000026C000__000000931B33AE68\n000000067F0000400200008A590000268000-000000067F0000400200008A59000026C000__000000931B9AFDF8\n000000067F0000400200008A590000268000-030000000000000000000000000000000002__0000000478F987C0\n000000067F0000400200008A59000026C000-000000067F0000400200008A590000270000__0000001C760FA190\n000000067F0000400200008A59000026C000-000000067F0000400200008A590000270000__00000038E67ABFA0\n000000067F0000400200008A59000026C000-000000067F0000400200008A590000270000__0000003903F1CFE8\n000000067F0000400200008A59000026C000-000000067F0000400200008A590000270000__0000003B99F7F8A0\n000000067F0000400200008A59000026C000-000000067F0000400200008A590000270000__0000005D2FFFFB38\n000000067F0000400200008A59000026C000-000000067F0000400200008A590000270000__00000073AD3FE6B8\n000000067F0000400200008A59000026C000-000000067F0000400200008A590000270000__000000914E3F38F0\n000000067F0000400200008A59000026C000-000000067F0000400200008A590000270000__000000931B33AE68\n000000067F0000400200008A59000026C000-000000067F0000400200008A590000270000__000000931B9AFDF8\n000000067F0000400200008A59000026D126-000000067F0000400200008A590000275B09__000000044F29F379-00000004FED1E2E1\n000000067F0000400200008A590000270000-000000067F0000400200008A590000274000__0000001C760FA190\n000000067F0000400200008A590000270000-000000067F0000400200008A590000274000__00000038E67ABFA0\n000000067F0000400200008A590000270000-000000067F0000400200008A590000274000__0000003903F1CFE8\n000000067F0000400200008A590000270000-000000067F0000400200008A590000274000__0000003B99F7F8A0\n000000067F0000400200008A590000270000-000000067F0000400200008A590000274000__0000005D2FFFFB38\n000000067F0000400200008A590000270000-000000067F0000400200008A590000274000__00000073AD3FE6B8\n000000067F0000400200008A590000270000-000000067F0000400200008A590000274000__000000914E3F38F0\n000000067F0000400200008A590000270000-000000067F0000400200008A590000274000__000000931B33AE68\n000000067F0000400200008A590000270000-000000067F0000400200008A590000274000__000000931B9AFDF8\n000000067F0000400200008A590000274000-000000067F0000400200008A590000278000__0000001C760FA190\n000000067F0000400200008A590000274000-000000067F0000400200008A590000278000__00000038E67ABFA0\n000000067F0000400200008A590000274000-000000067F0000400200008A590000278000__0000003903F1CFE8\n000000067F0000400200008A590000274000-000000067F0000400200008A590000278000__0000003B99F7F8A0\n000000067F0000400200008A590000274000-000000067F0000400200008A590000278000__0000005D2FFFFB38\n000000067F0000400200008A590000274000-000000067F0000400200008A590000278000__00000073AD3FE6B8\n000000067F0000400200008A590000274000-000000067F0000400200008A590000278000__000000914E3F38F0\n000000067F0000400200008A590000274000-000000067F0000400200008A590000278000__000000931B33AE68\n000000067F0000400200008A590000274000-000000067F0000400200008A590000278000__000000931B9AFDF8\n000000067F0000400200008A590000275B09-000000067F0000400200008A59000027E4E0__000000044F29F379-00000004FED1E2E1\n000000067F0000400200008A590000278000-000000067F0000400200008A59000027C000__0000001C760FA190\n000000067F0000400200008A590000278000-000000067F0000400200008A59000027C000__00000038E67ABFA0\n000000067F0000400200008A590000278000-000000067F0000400200008A59000027C000__0000003903F1CFE8\n000000067F0000400200008A590000278000-000000067F0000400200008A59000027C000__0000003B99F7F8A0\n000000067F0000400200008A590000278000-000000067F0000400200008A59000027C000__0000005D2FFFFB38\n000000067F0000400200008A590000278000-000000067F0000400200008A59000027C000__00000073AD3FE6B8\n000000067F0000400200008A590000278000-000000067F0000400200008A59000027C000__000000914E3F38F0\n000000067F0000400200008A590000278000-000000067F0000400200008A59000027C000__000000931B33AE68\n000000067F0000400200008A590000278000-000000067F0000400200008A59000027C000__000000931B9AFDF8\n000000067F0000400200008A59000027C000-000000067F0000400200008A590000280000__0000001C760FA190\n000000067F0000400200008A59000027C000-000000067F0000400200008A590000280000__00000038E67ABFA0\n000000067F0000400200008A59000027C000-000000067F0000400200008A590000280000__0000003903F1CFE8\n000000067F0000400200008A59000027C000-000000067F0000400200008A590000280000__0000003B99F7F8A0\n000000067F0000400200008A59000027C000-000000067F0000400200008A590000280000__0000005D2FFFFB38\n000000067F0000400200008A59000027C000-000000067F0000400200008A590000280000__00000073AD3FE6B8\n000000067F0000400200008A59000027C000-000000067F0000400200008A590000280000__000000914E3F38F0\n000000067F0000400200008A59000027C000-000000067F0000400200008A590000280000__000000931B33AE68\n000000067F0000400200008A59000027C000-000000067F0000400200008A590000280000__000000931B9AFDF8\n000000067F0000400200008A59000027E4E0-000000067F0000400200008A590000286EB2__000000044F29F379-00000004FED1E2E1\n000000067F0000400200008A590000280000-000000067F0000400200008A590000284000__0000001C760FA190\n000000067F0000400200008A590000280000-000000067F0000400200008A590000284000__00000038E67ABFA0\n000000067F0000400200008A590000280000-000000067F0000400200008A590000284000__0000003903F1CFE8\n000000067F0000400200008A590000280000-000000067F0000400200008A590000284000__0000003B99F7F8A0\n000000067F0000400200008A590000280000-000000067F0000400200008A590000284000__0000005D2FFFFB38\n000000067F0000400200008A590000280000-000000067F0000400200008A590000284000__00000073AD3FE6B8\n000000067F0000400200008A590000280000-000000067F0000400200008A590000284000__000000914E3F38F0\n000000067F0000400200008A590000280000-000000067F0000400200008A590000284000__000000931B33AE68\n000000067F0000400200008A590000280000-000000067F0000400200008A590000284000__000000931B9AFDF8\n000000067F0000400200008A590000284000-000000067F0000400200008A590000288000__0000001C760FA190\n000000067F0000400200008A590000284000-000000067F0000400200008A590000288000__00000038E67ABFA0\n000000067F0000400200008A590000284000-000000067F0000400200008A590000288000__0000003903F1CFE8\n000000067F0000400200008A590000284000-000000067F0000400200008A590000288000__0000003B99F7F8A0\n000000067F0000400200008A590000284000-000000067F0000400200008A590000288000__0000005D2FFFFB38\n000000067F0000400200008A590000284000-000000067F0000400200008A590000288000__00000073AD3FE6B8\n000000067F0000400200008A590000284000-000000067F0000400200008A590000288000__000000914E3F38F0\n000000067F0000400200008A590000284000-000000067F0000400200008A590000288000__000000931B33AE68\n000000067F0000400200008A590000284000-000000067F0000400200008A590000288000__000000931B9AFDF8\n000000067F0000400200008A590000286EB2-000000067F0000400200008A59000028F86E__000000044F29F379-00000004FED1E2E1\n000000067F0000400200008A590000288000-000000067F0000400200008A59000028C000__0000001C760FA190\n000000067F0000400200008A590000288000-000000067F0000400200008A59000028C000__00000038E67ABFA0\n000000067F0000400200008A590000288000-000000067F0000400200008A59000028C000__0000003903F1CFE8\n000000067F0000400200008A590000288000-000000067F0000400200008A59000028C000__0000003B99F7F8A0\n000000067F0000400200008A590000288000-000000067F0000400200008A59000028C000__0000005D2FFFFB38\n000000067F0000400200008A590000288000-000000067F0000400200008A59000028C000__00000073AD3FE6B8\n000000067F0000400200008A590000288000-000000067F0000400200008A59000028C000__000000914E3F38F0\n000000067F0000400200008A590000288000-000000067F0000400200008A59000028C000__000000931B33AE68\n000000067F0000400200008A590000288000-000000067F0000400200008A59000028C000__000000931B9AFDF8\n000000067F0000400200008A59000028C000-000000067F0000400200008A590000290000__0000001C760FA190\n000000067F0000400200008A59000028C000-000000067F0000400200008A590000290000__00000038E67ABFA0\n000000067F0000400200008A59000028C000-000000067F0000400200008A590000290000__0000003903F1CFE8\n000000067F0000400200008A59000028C000-000000067F0000400200008A590000290000__0000003B99F7F8A0\n000000067F0000400200008A59000028C000-000000067F0000400200008A590000290000__0000005D2FFFFB38\n000000067F0000400200008A59000028C000-000000067F0000400200008A590000290000__00000073AD3FE6B8\n000000067F0000400200008A59000028C000-000000067F0000400200008A590000290000__000000914E3F38F0\n000000067F0000400200008A59000028C000-000000067F0000400200008A590000290000__000000931B33AE68\n000000067F0000400200008A59000028C000-000000067F0000400200008A590000290000__000000931B9AFDF8\n000000067F0000400200008A59000028F86E-000000067F0000400200008A590000298236__000000044F29F379-00000004FED1E2E1\n000000067F0000400200008A590000290000-000000067F0000400200008A590000294000__0000001C760FA190\n000000067F0000400200008A590000290000-000000067F0000400200008A590000294000__00000038E67ABFA0\n000000067F0000400200008A590000290000-000000067F0000400200008A590000294000__0000003903F1CFE8\n000000067F0000400200008A590000290000-000000067F0000400200008A590000294000__0000003B99F7F8A0\n000000067F0000400200008A590000290000-000000067F0000400200008A590000294000__0000005D2FFFFB38\n000000067F0000400200008A590000290000-000000067F0000400200008A590000294000__00000073AD3FE6B8\n000000067F0000400200008A590000290000-000000067F0000400200008A590000294000__000000914E3F38F0\n000000067F0000400200008A590000290000-000000067F0000400200008A590000294000__000000931B33AE68\n000000067F0000400200008A590000290000-000000067F0000400200008A590000294000__000000931B9AFDF8\n000000067F0000400200008A590000294000-000000067F0000400200008A590000298000__0000001C760FA190\n000000067F0000400200008A590000294000-000000067F0000400200008A590000298000__00000038E67ABFA0\n000000067F0000400200008A590000294000-000000067F0000400200008A590000298000__0000003903F1CFE8\n000000067F0000400200008A590000294000-000000067F0000400200008A590000298000__0000003B99F7F8A0\n000000067F0000400200008A590000294000-000000067F0000400200008A590000298000__0000005D2FFFFB38\n000000067F0000400200008A590000294000-000000067F0000400200008A590000298000__00000073AD3FE6B8\n000000067F0000400200008A590000294000-000000067F0000400200008A590000298000__000000914E3F38F0\n000000067F0000400200008A590000294000-000000067F0000400200008A590000298000__000000931B33AE68\n000000067F0000400200008A590000294000-000000067F0000400200008A590000298000__000000931B9AFDF8\n000000067F0000400200008A590000298000-000000067F0000400200008A59000029C000__0000001C760FA190\n000000067F0000400200008A590000298000-000000067F0000400200008A59000029C000__00000038E67ABFA0\n000000067F0000400200008A590000298000-000000067F0000400200008A59000029C000__0000003903F1CFE8\n000000067F0000400200008A590000298000-000000067F0000400200008A59000029C000__0000003B99F7F8A0\n000000067F0000400200008A590000298000-000000067F0000400200008A59000029C000__0000005D2FFFFB38\n000000067F0000400200008A590000298000-000000067F0000400200008A59000029C000__00000073AD3FE6B8\n000000067F0000400200008A590000298000-000000067F0000400200008A59000029C000__000000914E3F38F0\n000000067F0000400200008A590000298000-000000067F0000400200008A59000029C000__000000931B33AE68\n000000067F0000400200008A590000298000-000000067F0000400200008A59000029C000__000000931B9AFDF8\n000000067F0000400200008A590000298236-000000067F0000400200008A5900002A0C2B__000000044F29F379-00000004FED1E2E1\n000000067F0000400200008A59000029C000-000000067F0000400200008A5900002A0000__0000001C760FA190\n000000067F0000400200008A59000029C000-000000067F0000400200008A5900002A0000__00000038E67ABFA0\n000000067F0000400200008A59000029C000-000000067F0000400200008A5900002A0000__0000003903F1CFE8\n000000067F0000400200008A59000029C000-000000067F0000400200008A5900002A0000__0000003B99F7F8A0\n000000067F0000400200008A59000029C000-000000067F0000400200008A5900002A0000__0000005D2FFFFB38\n000000067F0000400200008A59000029C000-000000067F0000400200008A5900002A0000__00000073AD3FE6B8\n000000067F0000400200008A59000029C000-000000067F0000400200008A5900002A0000__000000914E3F38F0\n000000067F0000400200008A59000029C000-000000067F0000400200008A5900002A0000__000000931B33AE68\n000000067F0000400200008A59000029C000-000000067F0000400200008A5900002A0000__000000931B9AFDF8\n000000067F0000400200008A5900002A0000-000000067F0000400200008A5900002A4000__0000001C760FA190\n000000067F0000400200008A5900002A0000-000000067F0000400200008A5900002A4000__00000038E67ABFA0\n000000067F0000400200008A5900002A0000-000000067F0000400200008A5900002A4000__0000003903F1CFE8\n000000067F0000400200008A5900002A0000-000000067F0000400200008A5900002A4000__0000003B99F7F8A0\n000000067F0000400200008A5900002A0000-000000067F0000400200008A5900002A4000__0000005D2FFFFB38\n000000067F0000400200008A5900002A0000-000000067F0000400200008A5900002A4000__00000073AD3FE6B8\n000000067F0000400200008A5900002A0000-000000067F0000400200008A5900002A4000__000000914E3F38F0\n000000067F0000400200008A5900002A0000-000000067F0000400200008A5900002A4000__000000931B33AE68\n000000067F0000400200008A5900002A0000-000000067F0000400200008A5900002A4000__000000931B9AFDF8\n000000067F0000400200008A5900002A0C2B-000000067F0000400200008A5900002A961E__000000044F29F379-00000004FED1E2E1\n000000067F0000400200008A5900002A4000-000000067F0000400200008A5900002A8000__0000001C760FA190\n000000067F0000400200008A5900002A4000-000000067F0000400200008A5900002A8000__00000038E67ABFA0\n000000067F0000400200008A5900002A4000-000000067F0000400200008A5900002A8000__0000003903F1CFE8\n000000067F0000400200008A5900002A4000-000000067F0000400200008A5900002A8000__0000003B99F7F8A0\n000000067F0000400200008A5900002A4000-000000067F0000400200008A5900002A8000__0000005D2FFFFB38\n000000067F0000400200008A5900002A4000-000000067F0000400200008A5900002A8000__00000073AD3FE6B8\n000000067F0000400200008A5900002A4000-000000067F0000400200008A5900002A8000__000000914E3F38F0\n000000067F0000400200008A5900002A4000-000000067F0000400200008A5900002A8000__000000931B33AE68\n000000067F0000400200008A5900002A4000-000000067F0000400200008A5900002A8000__000000931B9AFDF8\n000000067F0000400200008A5900002A8000-000000067F0000400200008A5900002AC000__0000001C760FA190\n000000067F0000400200008A5900002A8000-000000067F0000400200008A5900002AC000__00000038E67ABFA0\n000000067F0000400200008A5900002A8000-000000067F0000400200008A5900002AC000__0000003903F1CFE8\n000000067F0000400200008A5900002A8000-000000067F0000400200008A5900002AC000__0000003B99F7F8A0\n000000067F0000400200008A5900002A8000-000000067F0000400200008A5900002AC000__0000005D2FFFFB38\n000000067F0000400200008A5900002A8000-000000067F0000400200008A5900002AC000__00000073AD3FE6B8\n000000067F0000400200008A5900002A8000-000000067F0000400200008A5900002AC000__000000914E3F38F0\n000000067F0000400200008A5900002A8000-000000067F0000400200008A5900002AC000__000000931B33AE68\n000000067F0000400200008A5900002A8000-000000067F0000400200008A5900002AC000__000000931B9AFDF8\n000000067F0000400200008A5900002A961E-000000067F0000400200008A5900002B2001__000000044F29F379-00000004FED1E2E1\n000000067F0000400200008A5900002AC000-000000067F0000400200008A5900002B0000__0000001C760FA190\n000000067F0000400200008A5900002AC000-000000067F0000400200008A5900002B0000__00000038E67ABFA0\n000000067F0000400200008A5900002AC000-000000067F0000400200008A5900002B0000__0000003903F1CFE8\n000000067F0000400200008A5900002AC000-000000067F0000400200008A5900002B0000__0000003B99F7F8A0\n000000067F0000400200008A5900002AC000-000000067F0000400200008A5900002B0000__0000005D2FFFFB38\n000000067F0000400200008A5900002AC000-000000067F0000400200008A5900002B0000__00000073AD3FE6B8\n000000067F0000400200008A5900002AC000-000000067F0000400200008A5900002B0000__000000914E3F38F0\n000000067F0000400200008A5900002AC000-000000067F0000400200008A5900002B0000__000000931B33AE68\n000000067F0000400200008A5900002AC000-000000067F0000400200008A5900002B0000__000000931B9AFDF8\n000000067F0000400200008A5900002B0000-000000067F0000400200008A5900002B4000__0000001C725A2400\n000000067F0000400200008A5900002B0000-000000067F0000400200008A5900002B4000__0000001C760FA190\n000000067F0000400200008A5900002B0000-000000067F0000400200008A5900002B4000__00000038E67ABFA0\n000000067F0000400200008A5900002B0000-000000067F0000400200008A5900002B4000__0000003903F1CFE8\n000000067F0000400200008A5900002B0000-000000067F0000400200008A5900002B4000__0000003B99F7F8A0\n000000067F0000400200008A5900002B0000-000000067F0000400200008A5900002B4000__0000005D2FFFFB38\n000000067F0000400200008A5900002B0000-000000067F0000400200008A5900002B4000__00000073AD3FE6B8\n000000067F0000400200008A5900002B0000-000000067F0000400200008A5900002B4000__000000914E3F38F0\n000000067F0000400200008A5900002B0000-000000067F0000400200008A5900002B4000__000000931B33AE68\n000000067F0000400200008A5900002B0000-000000067F0000400200008A5900002B4000__000000931B9AFDF8\n000000067F0000400200008A5900002B2001-000000067F0000400200008A590100000000__000000044F29F379-00000004FED1E2E1\n000000067F0000400200008A5900002B2344-000000067F0000400200008A5900002BAD21__00000004FED1E2E1-000000059E81EB61\n000000067F0000400200008A5900002B4000-000000067F0000400200008A5900002B8000__0000001C725A2400\n000000067F0000400200008A5900002B4000-000000067F0000400200008A5900002B8000__0000001C760FA190\n000000067F0000400200008A5900002B4000-000000067F0000400200008A5900002B8000__00000038E67ABFA0\n000000067F0000400200008A5900002B4000-000000067F0000400200008A5900002B8000__0000003903F1CFE8\n000000067F0000400200008A5900002B4000-000000067F0000400200008A5900002B8000__0000003B99F7F8A0\n000000067F0000400200008A5900002B4000-000000067F0000400200008A5900002B8000__0000005D2FFFFB38\n000000067F0000400200008A5900002B4000-000000067F0000400200008A5900002B8000__00000073AD3FE6B8\n000000067F0000400200008A5900002B4000-000000067F0000400200008A5900002B8000__000000914E3F38F0\n000000067F0000400200008A5900002B4000-000000067F0000400200008A5900002B8000__000000931B33AE68\n000000067F0000400200008A5900002B4000-000000067F0000400200008A5900002B8000__000000931B9AFDF8\n000000067F0000400200008A5900002B8000-000000067F0000400200008A5900002BC000__0000001C725A2400\n000000067F0000400200008A5900002B8000-000000067F0000400200008A5900002BC000__0000001C760FA190\n000000067F0000400200008A5900002B8000-000000067F0000400200008A5900002BC000__00000038E67ABFA0\n000000067F0000400200008A5900002B8000-000000067F0000400200008A5900002BC000__0000003903F1CFE8\n000000067F0000400200008A5900002B8000-000000067F0000400200008A5900002BC000__0000003B99F7F8A0\n000000067F0000400200008A5900002B8000-000000067F0000400200008A5900002BC000__0000005D2FFFFB38\n000000067F0000400200008A5900002B8000-000000067F0000400200008A5900002BC000__00000073AD3FE6B8\n000000067F0000400200008A5900002B8000-000000067F0000400200008A5900002BC000__000000914E3F38F0\n000000067F0000400200008A5900002B8000-000000067F0000400200008A5900002BC000__000000931B33AE68\n000000067F0000400200008A5900002B8000-000000067F0000400200008A5900002BC000__000000931B9AFDF8\n000000067F0000400200008A5900002BAD21-000000067F0000400200008A5900002C36DD__00000004FED1E2E1-000000059E81EB61\n000000067F0000400200008A5900002BC000-000000067F0000400200008A5900002C0000__0000001C725A2400\n000000067F0000400200008A5900002BC000-000000067F0000400200008A5900002C0000__0000001C760FA190\n000000067F0000400200008A5900002BC000-000000067F0000400200008A5900002C0000__00000038E67ABFA0\n000000067F0000400200008A5900002BC000-000000067F0000400200008A5900002C0000__0000003903F1CFE8\n000000067F0000400200008A5900002BC000-000000067F0000400200008A5900002C0000__0000003B99F7F8A0\n000000067F0000400200008A5900002BC000-000000067F0000400200008A5900002C0000__0000005D2FFFFB38\n000000067F0000400200008A5900002BC000-000000067F0000400200008A5900002C0000__00000073AD3FE6B8\n000000067F0000400200008A5900002BC000-000000067F0000400200008A5900002C0000__000000914E3F38F0\n000000067F0000400200008A5900002BC000-000000067F0000400200008A5900002C0000__000000931B33AE68\n000000067F0000400200008A5900002BC000-000000067F0000400200008A5900002C0000__000000931B9AFDF8\n000000067F0000400200008A5900002C0000-000000067F0000400200008A5900002C4000__0000001C725A2400\n000000067F0000400200008A5900002C0000-000000067F0000400200008A5900002C4000__0000001C760FA190\n000000067F0000400200008A5900002C0000-000000067F0000400200008A5900002C4000__00000038E67ABFA0\n000000067F0000400200008A5900002C0000-000000067F0000400200008A5900002C4000__0000003903F1CFE8\n000000067F0000400200008A5900002C0000-000000067F0000400200008A5900002C4000__0000003B99F7F8A0\n000000067F0000400200008A5900002C0000-000000067F0000400200008A5900002C4000__0000005D2FFFFB38\n000000067F0000400200008A5900002C0000-000000067F0000400200008A5900002C4000__00000073AD3FE6B8\n000000067F0000400200008A5900002C0000-000000067F0000400200008A5900002C4000__000000914E3F38F0\n000000067F0000400200008A5900002C0000-000000067F0000400200008A5900002C4000__000000931B33AE68\n000000067F0000400200008A5900002C0000-000000067F0000400200008A5900002C4000__000000931B9AFDF8\n000000067F0000400200008A5900002C36DD-000000067F0000400200008A5900002CC0AA__00000004FED1E2E1-000000059E81EB61\n000000067F0000400200008A5900002C4000-000000067F0000400200008A5900002C8000__0000001C725A2400\n000000067F0000400200008A5900002C4000-000000067F0000400200008A5900002C8000__0000001C760FA190\n000000067F0000400200008A5900002C4000-000000067F0000400200008A5900002C8000__00000038E67ABFA0\n000000067F0000400200008A5900002C4000-000000067F0000400200008A5900002C8000__0000003903F1CFE8\n000000067F0000400200008A5900002C4000-000000067F0000400200008A5900002C8000__0000003B99F7F8A0\n000000067F0000400200008A5900002C4000-000000067F0000400200008A5900002C8000__0000005D2FFFFB38\n000000067F0000400200008A5900002C4000-000000067F0000400200008A5900002C8000__00000073AD3FE6B8\n000000067F0000400200008A5900002C4000-000000067F0000400200008A5900002C8000__000000914E3F38F0\n000000067F0000400200008A5900002C4000-000000067F0000400200008A5900002C8000__000000931B33AE68\n000000067F0000400200008A5900002C4000-000000067F0000400200008A5900002C8000__000000931B9AFDF8\n000000067F0000400200008A5900002C8000-000000067F0000400200008A5900002CC000__0000001C725A2400\n000000067F0000400200008A5900002C8000-000000067F0000400200008A5900002CC000__0000001C760FA190\n000000067F0000400200008A5900002C8000-000000067F0000400200008A5900002CC000__00000038E67ABFA0\n000000067F0000400200008A5900002C8000-000000067F0000400200008A5900002CC000__0000003903F1CFE8\n000000067F0000400200008A5900002C8000-000000067F0000400200008A5900002CC000__0000003B99F7F8A0\n000000067F0000400200008A5900002C8000-000000067F0000400200008A5900002CC000__0000005D2FFFFB38\n000000067F0000400200008A5900002C8000-000000067F0000400200008A5900002CC000__00000073AD3FE6B8\n000000067F0000400200008A5900002C8000-000000067F0000400200008A5900002CC000__000000914E3F38F0\n000000067F0000400200008A5900002C8000-000000067F0000400200008A5900002CC000__000000931B33AE68\n000000067F0000400200008A5900002C8000-000000067F0000400200008A5900002CC000__000000931B9AFDF8\n000000067F0000400200008A5900002CC000-000000067F0000400200008A5900002D0000__0000001C725A2400\n000000067F0000400200008A5900002CC000-000000067F0000400200008A5900002D0000__0000001C760FA190\n000000067F0000400200008A5900002CC000-000000067F0000400200008A5900002D0000__00000038E67ABFA0\n000000067F0000400200008A5900002CC000-000000067F0000400200008A5900002D0000__0000003903F1CFE8\n000000067F0000400200008A5900002CC000-000000067F0000400200008A5900002D0000__0000003B99F7F8A0\n000000067F0000400200008A5900002CC000-000000067F0000400200008A5900002D0000__0000005D2FFFFB38\n000000067F0000400200008A5900002CC000-000000067F0000400200008A5900002D0000__00000073AD3FE6B8\n000000067F0000400200008A5900002CC000-000000067F0000400200008A5900002D0000__000000914E3F38F0\n000000067F0000400200008A5900002CC000-000000067F0000400200008A5900002D0000__000000931B33AE68\n000000067F0000400200008A5900002CC000-000000067F0000400200008A5900002D0000__000000931B9AFDF8\n000000067F0000400200008A5900002CC0AA-000000067F0000400200008A5900002D4A82__00000004FED1E2E1-000000059E81EB61\n000000067F0000400200008A5900002D0000-000000067F0000400200008A5900002D4000__0000001C725A2400\n000000067F0000400200008A5900002D0000-000000067F0000400200008A5900002D4000__0000001C760FA190\n000000067F0000400200008A5900002D0000-000000067F0000400200008A5900002D4000__00000038E67ABFA0\n000000067F0000400200008A5900002D0000-000000067F0000400200008A5900002D4000__0000003903F1CFE8\n000000067F0000400200008A5900002D0000-000000067F0000400200008A5900002D4000__0000003B99F7F8A0\n000000067F0000400200008A5900002D0000-000000067F0000400200008A5900002D4000__0000005D2FFFFB38\n000000067F0000400200008A5900002D0000-000000067F0000400200008A5900002D4000__00000073AD3FE6B8\n000000067F0000400200008A5900002D0000-000000067F0000400200008A5900002D4000__000000914E3F38F0\n000000067F0000400200008A5900002D0000-000000067F0000400200008A5900002D4000__000000931B33AE68\n000000067F0000400200008A5900002D0000-000000067F0000400200008A5900002D4000__000000931B9AFDF8\n000000067F0000400200008A5900002D4000-000000067F0000400200008A5900002D8000__0000001C725A2400\n000000067F0000400200008A5900002D4000-000000067F0000400200008A5900002D8000__0000001C760FA190\n000000067F0000400200008A5900002D4000-000000067F0000400200008A5900002D8000__00000038E67ABFA0\n000000067F0000400200008A5900002D4000-000000067F0000400200008A5900002D8000__0000003903F1CFE8\n000000067F0000400200008A5900002D4000-000000067F0000400200008A5900002D8000__0000003B99F7F8A0\n000000067F0000400200008A5900002D4000-000000067F0000400200008A5900002D8000__0000005D2FFFFB38\n000000067F0000400200008A5900002D4000-000000067F0000400200008A5900002D8000__00000073AD3FE6B8\n000000067F0000400200008A5900002D4000-000000067F0000400200008A5900002D8000__000000914E3F38F0\n000000067F0000400200008A5900002D4000-000000067F0000400200008A5900002D8000__000000931B33AE68\n000000067F0000400200008A5900002D4000-000000067F0000400200008A5900002D8000__000000931B9AFDF8\n000000067F0000400200008A5900002D4A82-000000067F0000400200008A5900002DD480__00000004FED1E2E1-000000059E81EB61\n000000067F0000400200008A5900002D8000-000000067F0000400200008A5900002DC000__0000001C725A2400\n000000067F0000400200008A5900002D8000-000000067F0000400200008A5900002DC000__0000001C760FA190\n000000067F0000400200008A5900002D8000-000000067F0000400200008A5900002DC000__00000038E67ABFA0\n000000067F0000400200008A5900002D8000-000000067F0000400200008A5900002DC000__0000003903F1CFE8\n000000067F0000400200008A5900002D8000-000000067F0000400200008A5900002DC000__0000003B99F7F8A0\n000000067F0000400200008A5900002D8000-000000067F0000400200008A5900002DC000__0000005D2FFFFB38\n000000067F0000400200008A5900002D8000-000000067F0000400200008A5900002DC000__00000073AD3FE6B8\n000000067F0000400200008A5900002D8000-000000067F0000400200008A5900002DC000__000000914E3F38F0\n000000067F0000400200008A5900002D8000-000000067F0000400200008A5900002DC000__000000931B33AE68\n000000067F0000400200008A5900002D8000-000000067F0000400200008A5900002DC000__000000931B9AFDF8\n000000067F0000400200008A5900002DC000-000000067F0000400200008A5900002E0000__0000001C725A2400\n000000067F0000400200008A5900002DC000-000000067F0000400200008A5900002E0000__0000001C760FA190\n000000067F0000400200008A5900002DC000-000000067F0000400200008A5900002E0000__00000038E67ABFA0\n000000067F0000400200008A5900002DC000-000000067F0000400200008A5900002E0000__0000003903F1CFE8\n000000067F0000400200008A5900002DC000-000000067F0000400200008A5900002E0000__0000003B99F7F8A0\n000000067F0000400200008A5900002DC000-000000067F0000400200008A5900002E0000__0000005D2FFFFB38\n000000067F0000400200008A5900002DC000-000000067F0000400200008A5900002E0000__00000073AD3FE6B8\n000000067F0000400200008A5900002DC000-000000067F0000400200008A5900002E0000__000000914E3F38F0\n000000067F0000400200008A5900002DC000-000000067F0000400200008A5900002E0000__000000931B33AE68\n000000067F0000400200008A5900002DC000-000000067F0000400200008A5900002E0000__000000931B9AFDF8\n000000067F0000400200008A5900002DD480-000000067F0000400200008A5900002E5E6E__00000004FED1E2E1-000000059E81EB61\n000000067F0000400200008A5900002E0000-000000067F0000400200008A5900002E4000__0000001C725A2400\n000000067F0000400200008A5900002E0000-000000067F0000400200008A5900002E4000__0000001C760FA190\n000000067F0000400200008A5900002E0000-000000067F0000400200008A5900002E4000__00000038E67ABFA0\n000000067F0000400200008A5900002E0000-000000067F0000400200008A5900002E4000__0000003903F1CFE8\n000000067F0000400200008A5900002E0000-000000067F0000400200008A5900002E4000__0000003B99F7F8A0\n000000067F0000400200008A5900002E0000-000000067F0000400200008A5900002E4000__0000005D2FFFFB38\n000000067F0000400200008A5900002E0000-000000067F0000400200008A5900002E4000__00000073AD3FE6B8\n000000067F0000400200008A5900002E0000-000000067F0000400200008A5900002E4000__000000914E3F38F0\n000000067F0000400200008A5900002E0000-000000067F0000400200008A5900002E4000__000000931B33AE68\n000000067F0000400200008A5900002E0000-000000067F0000400200008A5900002E4000__000000931B9AFDF8\n000000067F0000400200008A5900002E4000-000000067F0000400200008A5900002E8000__0000001C725A2400\n000000067F0000400200008A5900002E4000-000000067F0000400200008A5900002E8000__0000001C760FA190\n000000067F0000400200008A5900002E4000-000000067F0000400200008A5900002E8000__00000038E67ABFA0\n000000067F0000400200008A5900002E4000-000000067F0000400200008A5900002E8000__0000003903F1CFE8\n000000067F0000400200008A5900002E4000-000000067F0000400200008A5900002E8000__0000003B99F7F8A0\n000000067F0000400200008A5900002E4000-000000067F0000400200008A5900002E8000__0000005D2FFFFB38\n000000067F0000400200008A5900002E4000-000000067F0000400200008A5900002E8000__00000073AD3FE6B8\n000000067F0000400200008A5900002E4000-000000067F0000400200008A5900002E8000__000000914E3F38F0\n000000067F0000400200008A5900002E4000-000000067F0000400200008A5900002E8000__000000931B33AE68\n000000067F0000400200008A5900002E4000-000000067F0000400200008A5900002E8000__000000931B9AFDF8\n000000067F0000400200008A5900002E5E6E-000000067F0000400200008A5900002EE857__00000004FED1E2E1-000000059E81EB61\n000000067F0000400200008A5900002E8000-000000067F0000400200008A5900002EC000__0000001C725A2400\n000000067F0000400200008A5900002E8000-000000067F0000400200008A5900002EC000__0000001C760FA190\n000000067F0000400200008A5900002E8000-000000067F0000400200008A5900002EC000__00000038E67ABFA0\n000000067F0000400200008A5900002E8000-000000067F0000400200008A5900002EC000__0000003903F1CFE8\n000000067F0000400200008A5900002E8000-000000067F0000400200008A5900002EC000__0000003B99F7F8A0\n000000067F0000400200008A5900002E8000-000000067F0000400200008A5900002EC000__0000005D2FFFFB38\n000000067F0000400200008A5900002E8000-000000067F0000400200008A5900002EC000__00000073AD3FE6B8\n000000067F0000400200008A5900002E8000-000000067F0000400200008A5900002EC000__000000914E3F38F0\n000000067F0000400200008A5900002E8000-000000067F0000400200008A5900002EC000__000000931B33AE68\n000000067F0000400200008A5900002E8000-000000067F0000400200008A5900002EC000__000000931B9AFDF8\n000000067F0000400200008A5900002EC000-000000067F0000400200008A5900002F0000__0000001C725A2400\n000000067F0000400200008A5900002EC000-000000067F0000400200008A5900002F0000__0000001C760FA190\n000000067F0000400200008A5900002EC000-000000067F0000400200008A5900002F0000__00000038E67ABFA0\n000000067F0000400200008A5900002EC000-000000067F0000400200008A5900002F0000__0000003903F1CFE8\n000000067F0000400200008A5900002EC000-000000067F0000400200008A5900002F0000__0000003B99F7F8A0\n000000067F0000400200008A5900002EC000-000000067F0000400200008A5900002F0000__0000005D2FFFFB38\n000000067F0000400200008A5900002EC000-000000067F0000400200008A5900002F0000__00000073AD3FE6B8\n000000067F0000400200008A5900002EC000-000000067F0000400200008A5900002F0000__000000914E3F38F0\n000000067F0000400200008A5900002EC000-000000067F0000400200008A5900002F0000__000000931B33AE68\n000000067F0000400200008A5900002EC000-000000067F0000400200008A5900002F0000__000000931B9AFDF8\n000000067F0000400200008A5900002EE857-000000067F0000400200008A5900002F722B__00000004FED1E2E1-000000059E81EB61\n000000067F0000400200008A5900002F0000-000000067F0000400200008A5900002F4000__0000001C725A2400\n000000067F0000400200008A5900002F0000-000000067F0000400200008A5900002F4000__0000001C760FA190\n000000067F0000400200008A5900002F0000-000000067F0000400200008A5900002F4000__00000038E67ABFA0\n000000067F0000400200008A5900002F0000-000000067F0000400200008A5900002F4000__0000003903F1CFE8\n000000067F0000400200008A5900002F0000-000000067F0000400200008A5900002F4000__0000003B99F7F8A0\n000000067F0000400200008A5900002F0000-000000067F0000400200008A5900002F4000__0000005D2FFFFB38\n000000067F0000400200008A5900002F0000-000000067F0000400200008A5900002F4000__00000073AD3FE6B8\n000000067F0000400200008A5900002F0000-000000067F0000400200008A5900002F4000__000000914E3F38F0\n000000067F0000400200008A5900002F0000-000000067F0000400200008A5900002F4000__000000931B33AE68\n000000067F0000400200008A5900002F0000-000000067F0000400200008A5900002F4000__000000931B9AFDF8\n000000067F0000400200008A5900002F4000-000000067F0000400200008A5900002F8000__0000001C725A2400\n000000067F0000400200008A5900002F4000-000000067F0000400200008A5900002F8000__0000001C760FA190\n000000067F0000400200008A5900002F4000-000000067F0000400200008A5900002F8000__00000038E67ABFA0\n000000067F0000400200008A5900002F4000-000000067F0000400200008A5900002F8000__0000003903F1CFE8\n000000067F0000400200008A5900002F4000-000000067F0000400200008A5900002F8000__0000003B99F7F8A0\n000000067F0000400200008A5900002F4000-000000067F0000400200008A5900002F8000__0000005D2FFFFB38\n000000067F0000400200008A5900002F4000-000000067F0000400200008A5900002F8000__00000073AD3FE6B8\n000000067F0000400200008A5900002F4000-000000067F0000400200008A5900002F8000__000000914E3F38F0\n000000067F0000400200008A5900002F4000-000000067F0000400200008A5900002F8000__000000931B33AE68\n000000067F0000400200008A5900002F4000-000000067F0000400200008A5900002F8000__000000931B9AFDF8\n000000067F0000400200008A5900002F722B-000000067F0000400200008A5900002FFBF0__00000004FED1E2E1-000000059E81EB61\n000000067F0000400200008A5900002F8000-000000067F0000400200008A5900002FC000__0000001C725A2400\n000000067F0000400200008A5900002F8000-000000067F0000400200008A5900002FC000__0000001C760FA190\n000000067F0000400200008A5900002F8000-000000067F0000400200008A5900002FC000__00000038E67ABFA0\n000000067F0000400200008A5900002F8000-000000067F0000400200008A5900002FC000__0000003903F1CFE8\n000000067F0000400200008A5900002F8000-000000067F0000400200008A5900002FC000__0000003B99F7F8A0\n000000067F0000400200008A5900002F8000-000000067F0000400200008A5900002FC000__0000005D2FFFFB38\n000000067F0000400200008A5900002F8000-000000067F0000400200008A5900002FC000__00000073AD3FE6B8\n000000067F0000400200008A5900002F8000-000000067F0000400200008A5900002FC000__000000914E3F38F0\n000000067F0000400200008A5900002F8000-000000067F0000400200008A5900002FC000__000000931B33AE68\n000000067F0000400200008A5900002F8000-000000067F0000400200008A5900002FC000__000000931B9AFDF8\n000000067F0000400200008A5900002FC000-000000067F0000400200008A590000300000__0000001C725A2400\n000000067F0000400200008A5900002FC000-000000067F0000400200008A590000300000__0000001C760FA190\n000000067F0000400200008A5900002FC000-000000067F0000400200008A590000300000__00000038E67ABFA0\n000000067F0000400200008A5900002FC000-000000067F0000400200008A590000300000__0000003903F1CFE8\n000000067F0000400200008A5900002FC000-000000067F0000400200008A590000300000__0000003B99F7F8A0\n000000067F0000400200008A5900002FC000-000000067F0000400200008A590000300000__0000005D2FFFFB38\n000000067F0000400200008A5900002FC000-000000067F0000400200008A590000300000__00000073AD3FE6B8\n000000067F0000400200008A5900002FC000-000000067F0000400200008A590000300000__000000914E3F38F0\n000000067F0000400200008A5900002FC000-000000067F0000400200008A590000300000__000000931B33AE68\n000000067F0000400200008A5900002FC000-000000067F0000400200008A590000300000__000000931B9AFDF8\n000000067F0000400200008A5900002FFBF0-000000067F0000400200008A5900003085CB__00000004FED1E2E1-000000059E81EB61\n000000067F0000400200008A590000300000-000000067F0000400200008A590000304000__0000001C725A2400\n000000067F0000400200008A590000300000-000000067F0000400200008A590000304000__0000001C760FA190\n000000067F0000400200008A590000300000-000000067F0000400200008A590000304000__00000038E67ABFA0\n000000067F0000400200008A590000300000-000000067F0000400200008A590000304000__0000003903F1CFE8\n000000067F0000400200008A590000300000-000000067F0000400200008A590000304000__0000003B99F7F8A0\n000000067F0000400200008A590000300000-000000067F0000400200008A590000304000__0000005D2FFFFB38\n000000067F0000400200008A590000300000-000000067F0000400200008A590000304000__00000073AD3FE6B8\n000000067F0000400200008A590000300000-000000067F0000400200008A590000304000__000000914E3F38F0\n000000067F0000400200008A590000300000-000000067F0000400200008A590000304000__000000931B33AE68\n000000067F0000400200008A590000300000-000000067F0000400200008A590000304000__000000931B9AFDF8\n000000067F0000400200008A590000304000-000000067F0000400200008A590000308000__0000001C725A2400\n000000067F0000400200008A590000304000-000000067F0000400200008A590000308000__0000001C760FA190\n000000067F0000400200008A590000304000-000000067F0000400200008A590000308000__00000038E67ABFA0\n000000067F0000400200008A590000304000-000000067F0000400200008A590000308000__0000003903F1CFE8\n000000067F0000400200008A590000304000-000000067F0000400200008A590000308000__0000003B99F7F8A0\n000000067F0000400200008A590000304000-000000067F0000400200008A590000308000__0000005D2FFFFB38\n000000067F0000400200008A590000304000-000000067F0000400200008A590000308000__00000073AD3FE6B8\n000000067F0000400200008A590000304000-000000067F0000400200008A590000308000__000000914E3F38F0\n000000067F0000400200008A590000304000-000000067F0000400200008A590000308000__000000931B33AE68\n000000067F0000400200008A590000304000-000000067F0000400200008A590000308000__000000931B9AFDF8\n000000067F0000400200008A590000308000-000000067F0000400200008A59000030C000__000000067DFFFF90\n000000067F0000400200008A590000308000-000000067F0000400200008A59000030C000__0000001C760FA190\n000000067F0000400200008A590000308000-000000067F0000400200008A59000030C000__00000038E67ABFA0\n000000067F0000400200008A590000308000-000000067F0000400200008A59000030C000__0000003903F1CFE8\n000000067F0000400200008A590000308000-000000067F0000400200008A59000030C000__0000003B99F7F8A0\n000000067F0000400200008A590000308000-000000067F0000400200008A59000030C000__0000005D2FFFFB38\n000000067F0000400200008A590000308000-000000067F0000400200008A59000030C000__00000073AD3FE6B8\n000000067F0000400200008A590000308000-000000067F0000400200008A59000030C000__000000914E3F38F0\n000000067F0000400200008A590000308000-000000067F0000400200008A59000030C000__000000931B33AE68\n000000067F0000400200008A590000308000-000000067F0000400200008A59000030C000__000000931B9AFDF8\n000000067F0000400200008A5900003085CB-000000067F0000400200008A590100000000__00000004FED1E2E1-000000059E81EB61\n000000067F0000400200008A590000308891-000000067F0000400200008A59000031126B__000000059E81EB61-000000064E25E851\n000000067F0000400200008A59000030C000-000000067F0000400200008A590000310000__000000067DFFFF90\n000000067F0000400200008A59000030C000-000000067F0000400200008A590000310000__0000001C760FA190\n000000067F0000400200008A59000030C000-000000067F0000400200008A590000310000__00000038E67ABFA0\n000000067F0000400200008A59000030C000-000000067F0000400200008A590000310000__0000003903F1CFE8\n000000067F0000400200008A59000030C000-000000067F0000400200008A590000310000__0000003B99F7F8A0\n000000067F0000400200008A59000030C000-000000067F0000400200008A590000310000__0000005D2FFFFB38\n000000067F0000400200008A59000030C000-000000067F0000400200008A590000310000__00000073AD3FE6B8\n000000067F0000400200008A59000030C000-000000067F0000400200008A590000310000__000000914E3F38F0\n000000067F0000400200008A59000030C000-000000067F0000400200008A590000310000__000000931B33AE68\n000000067F0000400200008A59000030C000-000000067F0000400200008A590000310000__000000931B9AFDF8\n000000067F0000400200008A590000310000-000000067F0000400200008A590000314000__000000067DFFFF90\n000000067F0000400200008A590000310000-000000067F0000400200008A590000314000__0000001C760FA190\n000000067F0000400200008A590000310000-000000067F0000400200008A590000314000__00000038E67ABFA0\n000000067F0000400200008A590000310000-000000067F0000400200008A590000314000__0000003903F1CFE8\n000000067F0000400200008A590000310000-000000067F0000400200008A590000314000__0000003B99F7F8A0\n000000067F0000400200008A590000310000-000000067F0000400200008A590000314000__0000005D2FFFFB38\n000000067F0000400200008A590000310000-000000067F0000400200008A590000314000__00000073AD3FE6B8\n000000067F0000400200008A590000310000-000000067F0000400200008A590000314000__000000914E3F38F0\n000000067F0000400200008A590000310000-000000067F0000400200008A590000314000__000000931B33AE68\n000000067F0000400200008A590000310000-000000067F0000400200008A590000314000__000000931B9AFDF8\n000000067F0000400200008A59000031126B-000000067F0000400200008A590000319C61__000000059E81EB61-000000064E25E851\n000000067F0000400200008A590000314000-000000067F0000400200008A590000318000__000000067DFFFF90\n000000067F0000400200008A590000314000-000000067F0000400200008A590000318000__0000001C760FA190\n000000067F0000400200008A590000314000-000000067F0000400200008A590000318000__00000038E67ABFA0\n000000067F0000400200008A590000314000-000000067F0000400200008A590000318000__0000003903F1CFE8\n000000067F0000400200008A590000314000-000000067F0000400200008A590000318000__0000003B99F7F8A0\n000000067F0000400200008A590000314000-000000067F0000400200008A590000318000__0000005D2FFFFB38\n000000067F0000400200008A590000314000-000000067F0000400200008A590000318000__00000073AD3FE6B8\n000000067F0000400200008A590000314000-000000067F0000400200008A590000318000__000000914E3F38F0\n000000067F0000400200008A590000314000-000000067F0000400200008A590000318000__000000931B33AE68\n000000067F0000400200008A590000314000-000000067F0000400200008A590000318000__000000931B9AFDF8\n000000067F0000400200008A590000318000-000000067F0000400200008A59000031C000__000000067DFFFF90\n000000067F0000400200008A590000318000-000000067F0000400200008A59000031C000__0000001C760FA190\n000000067F0000400200008A590000318000-000000067F0000400200008A59000031C000__00000038E67ABFA0\n000000067F0000400200008A590000318000-000000067F0000400200008A59000031C000__0000003903F1CFE8\n000000067F0000400200008A590000318000-000000067F0000400200008A59000031C000__0000003B99F7F8A0\n000000067F0000400200008A590000318000-000000067F0000400200008A59000031C000__0000005D2FFFFB38\n000000067F0000400200008A590000318000-000000067F0000400200008A59000031C000__00000073AD3FE6B8\n000000067F0000400200008A590000318000-000000067F0000400200008A59000031C000__000000914E3F38F0\n000000067F0000400200008A590000318000-000000067F0000400200008A59000031C000__000000931B33AE68\n000000067F0000400200008A590000318000-000000067F0000400200008A59000031C000__000000931B9AFDF8\n000000067F0000400200008A590000319C61-000000067F0000400200008A590000322645__000000059E81EB61-000000064E25E851\n000000067F0000400200008A59000031C000-000000067F0000400200008A590000320000__000000067DFFFF90\n000000067F0000400200008A59000031C000-000000067F0000400200008A590000320000__0000001C760FA190\n000000067F0000400200008A59000031C000-000000067F0000400200008A590000320000__00000038E67ABFA0\n000000067F0000400200008A59000031C000-000000067F0000400200008A590000320000__0000003903F1CFE8\n000000067F0000400200008A59000031C000-000000067F0000400200008A590000320000__0000003B99F7F8A0\n000000067F0000400200008A59000031C000-000000067F0000400200008A590000320000__0000005D2FFFFB38\n000000067F0000400200008A59000031C000-000000067F0000400200008A590000320000__00000073AD3FE6B8\n000000067F0000400200008A59000031C000-000000067F0000400200008A590000320000__000000914E3F38F0\n000000067F0000400200008A59000031C000-000000067F0000400200008A590000320000__000000931B33AE68\n000000067F0000400200008A59000031C000-000000067F0000400200008A590000320000__000000931B9AFDF8\n000000067F0000400200008A590000320000-000000067F0000400200008A590000324000__000000067DFFFF90\n000000067F0000400200008A590000320000-000000067F0000400200008A590000324000__0000001C760FA190\n000000067F0000400200008A590000320000-000000067F0000400200008A590000324000__00000038E67ABFA0\n000000067F0000400200008A590000320000-000000067F0000400200008A590000324000__0000003903F1CFE8\n000000067F0000400200008A590000320000-000000067F0000400200008A590000324000__0000003B99F7F8A0\n000000067F0000400200008A590000320000-000000067F0000400200008A590000324000__0000005D2FFFFB38\n000000067F0000400200008A590000320000-000000067F0000400200008A590000324000__00000073AD3FE6B8\n000000067F0000400200008A590000320000-000000067F0000400200008A590000324000__000000914E3F38F0\n000000067F0000400200008A590000320000-000000067F0000400200008A590000324000__000000931B33AE68\n000000067F0000400200008A590000320000-000000067F0000400200008A590000324000__000000931B9AFDF8\n000000067F0000400200008A590000322645-000000067F0000400200008A59000032B01B__000000059E81EB61-000000064E25E851\n000000067F0000400200008A590000324000-000000067F0000400200008A590000328000__000000067DFFFF90\n000000067F0000400200008A590000324000-000000067F0000400200008A590000328000__0000001C760FA190\n000000067F0000400200008A590000324000-000000067F0000400200008A590000328000__00000038E67ABFA0\n000000067F0000400200008A590000324000-000000067F0000400200008A590000328000__0000003903F1CFE8\n000000067F0000400200008A590000324000-000000067F0000400200008A590000328000__0000003B99F7F8A0\n000000067F0000400200008A590000324000-000000067F0000400200008A590000328000__0000005D2FFFFB38\n000000067F0000400200008A590000324000-000000067F0000400200008A590000328000__00000073AD3FE6B8\n000000067F0000400200008A590000324000-000000067F0000400200008A590000328000__000000914E3F38F0\n000000067F0000400200008A590000324000-000000067F0000400200008A590000328000__000000931B33AE68\n000000067F0000400200008A590000324000-000000067F0000400200008A590000328000__000000931B9AFDF8\n000000067F0000400200008A590000328000-000000067F0000400200008A59000032C000__000000067DFFFF90\n000000067F0000400200008A590000328000-000000067F0000400200008A59000032C000__0000001C760FA190\n000000067F0000400200008A590000328000-000000067F0000400200008A59000032C000__00000038E67ABFA0\n000000067F0000400200008A590000328000-000000067F0000400200008A59000032C000__0000003903F1CFE8\n000000067F0000400200008A590000328000-000000067F0000400200008A59000032C000__0000003B99F7F8A0\n000000067F0000400200008A590000328000-000000067F0000400200008A59000032C000__0000005D2FFFFB38\n000000067F0000400200008A590000328000-000000067F0000400200008A59000032C000__00000073AD3FE6B8\n000000067F0000400200008A590000328000-000000067F0000400200008A59000032C000__000000914E3F38F0\n000000067F0000400200008A590000328000-000000067F0000400200008A59000032C000__000000931B33AE68\n000000067F0000400200008A590000328000-000000067F0000400200008A59000032C000__000000931B9AFDF8\n000000067F0000400200008A59000032B01B-000000067F0000400200008A5900003339E7__000000059E81EB61-000000064E25E851\n000000067F0000400200008A59000032C000-000000067F0000400200008A590000330000__000000067DFFFF90\n000000067F0000400200008A59000032C000-000000067F0000400200008A590000330000__0000001C760FA190\n000000067F0000400200008A59000032C000-000000067F0000400200008A590000330000__00000038E67ABFA0\n000000067F0000400200008A59000032C000-000000067F0000400200008A590000330000__0000003903F1CFE8\n000000067F0000400200008A59000032C000-000000067F0000400200008A590000330000__0000003B99F7F8A0\n000000067F0000400200008A59000032C000-000000067F0000400200008A590000330000__0000005D2FFFFB38\n000000067F0000400200008A59000032C000-000000067F0000400200008A590000330000__00000073AD3FE6B8\n000000067F0000400200008A59000032C000-000000067F0000400200008A590000330000__000000914E3F38F0\n000000067F0000400200008A59000032C000-000000067F0000400200008A590000330000__000000931B33AE68\n000000067F0000400200008A59000032C000-000000067F0000400200008A590000330000__000000931B9AFDF8\n000000067F0000400200008A590000330000-000000067F0000400200008A590000334000__000000067DFFFF90\n000000067F0000400200008A590000330000-000000067F0000400200008A590000334000__0000001C760FA190\n000000067F0000400200008A590000330000-000000067F0000400200008A590000334000__00000038E67ABFA0\n000000067F0000400200008A590000330000-000000067F0000400200008A590000334000__0000003903F1CFE8\n000000067F0000400200008A590000330000-000000067F0000400200008A590000334000__0000003B99F7F8A0\n000000067F0000400200008A590000330000-000000067F0000400200008A590000334000__0000005D2FFFFB38\n000000067F0000400200008A590000330000-000000067F0000400200008A590000334000__00000073AD3FE6B8\n000000067F0000400200008A590000330000-000000067F0000400200008A590000334000__000000914E3F38F0\n000000067F0000400200008A590000330000-000000067F0000400200008A590000334000__000000931B33AE68\n000000067F0000400200008A590000330000-000000067F0000400200008A590000334000__000000931B9AFDF8\n000000067F0000400200008A5900003339E7-000000067F0000400200008A59000033C3C0__000000059E81EB61-000000064E25E851\n000000067F0000400200008A590000334000-000000067F0000400200008A590000338000__000000067DFFFF90\n000000067F0000400200008A590000334000-000000067F0000400200008A590000338000__0000001C760FA190\n000000067F0000400200008A590000334000-000000067F0000400200008A590000338000__00000038E67ABFA0\n000000067F0000400200008A590000334000-000000067F0000400200008A590000338000__0000003903F1CFE8\n000000067F0000400200008A590000334000-000000067F0000400200008A590000338000__0000003B99F7F8A0\n000000067F0000400200008A590000334000-000000067F0000400200008A590000338000__0000005D2FFFFB38\n000000067F0000400200008A590000334000-000000067F0000400200008A590000338000__00000073AD3FE6B8\n000000067F0000400200008A590000334000-000000067F0000400200008A590000338000__000000914E3F38F0\n000000067F0000400200008A590000334000-000000067F0000400200008A590000338000__000000931B33AE68\n000000067F0000400200008A590000334000-000000067F0000400200008A590000338000__000000931B9AFDF8\n000000067F0000400200008A590000338000-000000067F0000400200008A59000033C000__000000067DFFFF90\n000000067F0000400200008A590000338000-000000067F0000400200008A59000033C000__0000001C760FA190\n000000067F0000400200008A590000338000-000000067F0000400200008A59000033C000__00000038E67ABFA0\n000000067F0000400200008A590000338000-000000067F0000400200008A59000033C000__0000003903F1CFE8\n000000067F0000400200008A590000338000-000000067F0000400200008A59000033C000__0000003B99F7F8A0\n000000067F0000400200008A590000338000-000000067F0000400200008A59000033C000__0000005D2FFFFB38\n000000067F0000400200008A590000338000-000000067F0000400200008A59000033C000__00000073AD3FE6B8\n000000067F0000400200008A590000338000-000000067F0000400200008A59000033C000__000000914E3F38F0\n000000067F0000400200008A590000338000-000000067F0000400200008A59000033C000__000000931B33AE68\n000000067F0000400200008A590000338000-000000067F0000400200008A59000033C000__000000931B9AFDF8\n000000067F0000400200008A59000033C000-000000067F0000400200008A590000340000__000000067DFFFF90\n000000067F0000400200008A59000033C000-000000067F0000400200008A590000340000__0000001C760FA190\n000000067F0000400200008A59000033C000-000000067F0000400200008A590000340000__00000038E67ABFA0\n000000067F0000400200008A59000033C000-000000067F0000400200008A590000340000__0000003903F1CFE8\n000000067F0000400200008A59000033C000-000000067F0000400200008A590000340000__0000003B99F7F8A0\n000000067F0000400200008A59000033C000-000000067F0000400200008A590000340000__0000005D2FFFFB38\n000000067F0000400200008A59000033C000-000000067F0000400200008A590000340000__00000073AD3FE6B8\n000000067F0000400200008A59000033C000-000000067F0000400200008A590000340000__000000914E3F38F0\n000000067F0000400200008A59000033C000-000000067F0000400200008A590000340000__000000931B33AE68\n000000067F0000400200008A59000033C000-000000067F0000400200008A590000340000__000000931B9AFDF8\n000000067F0000400200008A59000033C3C0-000000067F0000400200008A590000344D8E__000000059E81EB61-000000064E25E851\n000000067F0000400200008A590000340000-000000067F0000400200008A590000344000__000000067DFFFF90\n000000067F0000400200008A590000340000-000000067F0000400200008A590000344000__0000001C760FA190\n000000067F0000400200008A590000340000-000000067F0000400200008A590000344000__00000038E67ABFA0\n000000067F0000400200008A590000340000-000000067F0000400200008A590000344000__0000003903F1CFE8\n000000067F0000400200008A590000340000-000000067F0000400200008A590000344000__0000003B99F7F8A0\n000000067F0000400200008A590000340000-000000067F0000400200008A590000344000__0000005D2FFFFB38\n000000067F0000400200008A590000340000-000000067F0000400200008A590000344000__00000073AD3FE6B8\n000000067F0000400200008A590000340000-000000067F0000400200008A590000344000__000000914E3F38F0\n000000067F0000400200008A590000340000-000000067F0000400200008A590000344000__000000931B33AE68\n000000067F0000400200008A590000340000-000000067F0000400200008A590000344000__000000931B9AFDF8\n000000067F0000400200008A590000344000-000000067F0000400200008A590000348000__000000067DFFFF90\n000000067F0000400200008A590000344000-000000067F0000400200008A590000348000__0000001C760FA190\n000000067F0000400200008A590000344000-000000067F0000400200008A590000348000__00000038E67ABFA0\n000000067F0000400200008A590000344000-000000067F0000400200008A590000348000__0000003903F1CFE8\n000000067F0000400200008A590000344000-000000067F0000400200008A590000348000__0000003B99F7F8A0\n000000067F0000400200008A590000344000-000000067F0000400200008A590000348000__0000005D2FFFFB38\n000000067F0000400200008A590000344000-000000067F0000400200008A590000348000__00000073AD3FE6B8\n000000067F0000400200008A590000344000-000000067F0000400200008A590000348000__000000914E3F38F0\n000000067F0000400200008A590000344000-000000067F0000400200008A590000348000__000000931B33AE68\n000000067F0000400200008A590000344000-000000067F0000400200008A590000348000__000000931B9AFDF8\n000000067F0000400200008A590000344D8E-000000067F0000400200008A59000034D773__000000059E81EB61-000000064E25E851\n000000067F0000400200008A590000348000-000000067F0000400200008A59000034C000__000000067DFFFF90\n000000067F0000400200008A590000348000-000000067F0000400200008A59000034C000__0000001C760FA190\n000000067F0000400200008A590000348000-000000067F0000400200008A59000034C000__00000038E67ABFA0\n000000067F0000400200008A590000348000-000000067F0000400200008A59000034C000__0000003903F1CFE8\n000000067F0000400200008A590000348000-000000067F0000400200008A59000034C000__0000003B99F7F8A0\n000000067F0000400200008A590000348000-000000067F0000400200008A59000034C000__0000005D2FFFFB38\n000000067F0000400200008A590000348000-000000067F0000400200008A59000034C000__00000073AD3FE6B8\n000000067F0000400200008A590000348000-000000067F0000400200008A59000034C000__000000914E3F38F0\n000000067F0000400200008A590000348000-000000067F0000400200008A59000034C000__000000931B33AE68\n000000067F0000400200008A590000348000-000000067F0000400200008A59000034C000__000000931B9AFDF8\n000000067F0000400200008A59000034C000-000000067F0000400200008A590000350000__000000067DFFFF90\n000000067F0000400200008A59000034C000-000000067F0000400200008A590000350000__0000001C760FA190\n000000067F0000400200008A59000034C000-000000067F0000400200008A590000350000__00000038E67ABFA0\n000000067F0000400200008A59000034C000-000000067F0000400200008A590000350000__0000003903F1CFE8\n000000067F0000400200008A59000034C000-000000067F0000400200008A590000350000__0000003B99F7F8A0\n000000067F0000400200008A59000034C000-000000067F0000400200008A590000350000__0000005D2FFFFB38\n000000067F0000400200008A59000034C000-000000067F0000400200008A590000350000__00000073AD3FE6B8\n000000067F0000400200008A59000034C000-000000067F0000400200008A590000350000__000000914E3F38F0\n000000067F0000400200008A59000034C000-000000067F0000400200008A590000350000__000000931B33AE68\n000000067F0000400200008A59000034C000-000000067F0000400200008A590000350000__000000931B9AFDF8\n000000067F0000400200008A59000034D773-000000067F0000400200008A590000356163__000000059E81EB61-000000064E25E851\n000000067F0000400200008A590000350000-000000067F0000400200008A590000354000__000000067DFFFF90\n000000067F0000400200008A590000350000-000000067F0000400200008A590000354000__0000001C760FA190\n000000067F0000400200008A590000350000-000000067F0000400200008A590000354000__00000038E67ABFA0\n000000067F0000400200008A590000350000-000000067F0000400200008A590000354000__0000003903F1CFE8\n000000067F0000400200008A590000350000-000000067F0000400200008A590000354000__0000003B99F7F8A0\n000000067F0000400200008A590000350000-000000067F0000400200008A590000354000__0000005D2FFFFB38\n000000067F0000400200008A590000350000-000000067F0000400200008A590000354000__00000073AD3FE6B8\n000000067F0000400200008A590000350000-000000067F0000400200008A590000354000__000000914E3F38F0\n000000067F0000400200008A590000350000-000000067F0000400200008A590000354000__000000931B33AE68\n000000067F0000400200008A590000350000-000000067F0000400200008A590000354000__000000931B9AFDF8\n000000067F0000400200008A590000354000-000000067F0000400200008A590000358000__000000067DFFFF90\n000000067F0000400200008A590000354000-000000067F0000400200008A590000358000__0000001C760FA190\n000000067F0000400200008A590000354000-000000067F0000400200008A590000358000__00000038E67ABFA0\n000000067F0000400200008A590000354000-000000067F0000400200008A590000358000__0000003903F1CFE8\n000000067F0000400200008A590000354000-000000067F0000400200008A590000358000__0000003B99F7F8A0\n000000067F0000400200008A590000354000-000000067F0000400200008A590000358000__0000005D2FFFFB38\n000000067F0000400200008A590000354000-000000067F0000400200008A590000358000__00000073AD3FE6B8\n000000067F0000400200008A590000354000-000000067F0000400200008A590000358000__000000914E3F38F0\n000000067F0000400200008A590000354000-000000067F0000400200008A590000358000__000000931B33AE68\n000000067F0000400200008A590000354000-000000067F0000400200008A590000358000__000000931B9AFDF8\n000000067F0000400200008A590000356163-000000067F0000400200008A59000035EB54__000000059E81EB61-000000064E25E851\n000000067F0000400200008A590000358000-000000067F0000400200008A59000035C000__000000067DFFFF90\n000000067F0000400200008A590000358000-000000067F0000400200008A59000035C000__0000001C760FA190\n000000067F0000400200008A590000358000-000000067F0000400200008A59000035C000__00000038E67ABFA0\n000000067F0000400200008A590000358000-000000067F0000400200008A59000035C000__0000003903F1CFE8\n000000067F0000400200008A590000358000-000000067F0000400200008A59000035C000__0000003B99F7F8A0\n000000067F0000400200008A590000358000-000000067F0000400200008A59000035C000__0000005D2FFFFB38\n000000067F0000400200008A590000358000-000000067F0000400200008A59000035C000__00000073AD3FE6B8\n000000067F0000400200008A590000358000-000000067F0000400200008A59000035C000__000000914E3F38F0\n000000067F0000400200008A590000358000-000000067F0000400200008A59000035C000__000000931B33AE68\n000000067F0000400200008A590000358000-000000067F0000400200008A59000035C000__000000931B9AFDF8\n000000067F0000400200008A59000035C000-000000067F0000400200008A590000360000__000000067DFFFF90\n000000067F0000400200008A59000035C000-000000067F0000400200008A590000360000__0000001C760FA190\n000000067F0000400200008A59000035C000-000000067F0000400200008A590000360000__00000038E67ABFA0\n000000067F0000400200008A59000035C000-000000067F0000400200008A590000360000__0000003903F1CFE8\n000000067F0000400200008A59000035C000-000000067F0000400200008A590000360000__0000003B99F7F8A0\n000000067F0000400200008A59000035C000-000000067F0000400200008A590000360000__0000005D2FFFFB38\n000000067F0000400200008A59000035C000-000000067F0000400200008A590000360000__00000073AD3FE6B8\n000000067F0000400200008A59000035C000-000000067F0000400200008A590000360000__000000914E3F38F0\n000000067F0000400200008A59000035C000-000000067F0000400200008A590000360000__000000931B33AE68\n000000067F0000400200008A59000035C000-000000067F0000400200008A590000360000__000000931B9AFDF8\n000000067F0000400200008A59000035EB54-000000067F0000400200008A59000036753C__000000059E81EB61-000000064E25E851\n000000067F0000400200008A590000360000-000000067F0000400200008A590000364000__000000067DFFFF90\n000000067F0000400200008A590000360000-000000067F0000400200008A590000364000__0000001C760FA190\n000000067F0000400200008A590000360000-000000067F0000400200008A590000364000__00000038E67ABFA0\n000000067F0000400200008A590000360000-000000067F0000400200008A590000364000__0000003903F1CFE8\n000000067F0000400200008A590000360000-000000067F0000400200008A590000364000__0000003B99F7F8A0\n000000067F0000400200008A590000360000-000000067F0000400200008A590000364000__0000005D2FFFFB38\n000000067F0000400200008A590000360000-000000067F0000400200008A590000364000__00000073AD3FE6B8\n000000067F0000400200008A590000360000-000000067F0000400200008A590000364000__000000914E3F38F0\n000000067F0000400200008A590000360000-000000067F0000400200008A590000364000__000000931B33AE68\n000000067F0000400200008A590000360000-000000067F0000400200008A590000364000__000000931B9AFDF8\n000000067F0000400200008A590000364000-000000067F0000400200008A590000368000__000000067DFFFF90\n000000067F0000400200008A590000364000-000000067F0000400200008A590000368000__0000001C760FA190\n000000067F0000400200008A590000364000-000000067F0000400200008A590000368000__00000038E67ABFA0\n000000067F0000400200008A590000364000-000000067F0000400200008A590000368000__0000003903F1CFE8\n000000067F0000400200008A590000364000-000000067F0000400200008A590000368000__0000003B99F7F8A0\n000000067F0000400200008A590000364000-000000067F0000400200008A590000368000__0000005D2FFFFB38\n000000067F0000400200008A590000364000-000000067F0000400200008A590000368000__00000073AD3FE6B8\n000000067F0000400200008A590000364000-000000067F0000400200008A590000368000__000000914E3F38F0\n000000067F0000400200008A590000364000-000000067F0000400200008A590000368000__000000931B33AE68\n000000067F0000400200008A590000364000-000000067F0000400200008A590000368000__000000931B9AFDF8\n000000067F0000400200008A59000036753C-000000067F0000400200008A590100000000__000000059E81EB61-000000064E25E851\n000000067F0000400200008A59000036783E-000000067F0000400200008A590000370211__000000064E25E851-00000006FDCDDAF1\n000000067F0000400200008A590000368000-000000067F0000400200008A59000036C000__000000067DFFFF90\n000000067F0000400200008A590000368000-000000067F0000400200008A59000036C000__0000001C760FA190\n000000067F0000400200008A590000368000-000000067F0000400200008A59000036C000__00000038E67ABFA0\n000000067F0000400200008A590000368000-000000067F0000400200008A59000036C000__0000003903F1CFE8\n000000067F0000400200008A590000368000-000000067F0000400200008A59000036C000__0000003B99F7F8A0\n000000067F0000400200008A590000368000-000000067F0000400200008A59000036C000__0000005D2FFFFB38\n000000067F0000400200008A590000368000-000000067F0000400200008A59000036C000__00000073AD3FE6B8\n000000067F0000400200008A590000368000-000000067F0000400200008A59000036C000__000000914E3F38F0\n000000067F0000400200008A590000368000-000000067F0000400200008A59000036C000__000000931B33AE68\n000000067F0000400200008A590000368000-000000067F0000400200008A59000036C000__000000931B9AFDF8\n000000067F0000400200008A59000036C000-000000067F0000400200008A590000370000__000000067DFFFF90\n000000067F0000400200008A59000036C000-000000067F0000400200008A590000370000__0000001C760FA190\n000000067F0000400200008A59000036C000-000000067F0000400200008A590000370000__00000038E67ABFA0\n000000067F0000400200008A59000036C000-000000067F0000400200008A590000370000__0000003903F1CFE8\n000000067F0000400200008A59000036C000-000000067F0000400200008A590000370000__0000003B99F7F8A0\n000000067F0000400200008A59000036C000-000000067F0000400200008A590000370000__0000005D2FFFFB38\n000000067F0000400200008A59000036C000-000000067F0000400200008A590000370000__00000073AD3FE6B8\n000000067F0000400200008A59000036C000-000000067F0000400200008A590000370000__000000914E3F38F0\n000000067F0000400200008A59000036C000-000000067F0000400200008A590000370000__000000931B33AE68\n000000067F0000400200008A59000036C000-000000067F0000400200008A590000370000__000000931B9AFDF8\n000000067F0000400200008A590000370000-000000067F0000400200008A590000374000__000000067DFFFF90\n000000067F0000400200008A590000370000-000000067F0000400200008A590000374000__0000001C760FA190\n000000067F0000400200008A590000370000-000000067F0000400200008A590000374000__00000038E67ABFA0\n000000067F0000400200008A590000370000-000000067F0000400200008A590000374000__0000003903F1CFE8\n000000067F0000400200008A590000370000-000000067F0000400200008A590000374000__0000003B99F7F8A0\n000000067F0000400200008A590000370000-000000067F0000400200008A590000374000__0000005D2FFFFB38\n000000067F0000400200008A590000370000-000000067F0000400200008A590000374000__00000073AD3FE6B8\n000000067F0000400200008A590000370000-000000067F0000400200008A590000374000__000000914E3F38F0\n000000067F0000400200008A590000370000-000000067F0000400200008A590000374000__000000931B33AE68\n000000067F0000400200008A590000370000-000000067F0000400200008A590000374000__000000931B9AFDF8\n000000067F0000400200008A590000370211-000000067F0000400200008A590000378BCB__000000064E25E851-00000006FDCDDAF1\n000000067F0000400200008A590000374000-000000067F0000400200008A590000378000__000000067DFFFF90\n000000067F0000400200008A590000374000-000000067F0000400200008A590000378000__0000001C760FA190\n000000067F0000400200008A590000374000-000000067F0000400200008A590000378000__00000038E67ABFA0\n000000067F0000400200008A590000374000-000000067F0000400200008A590000378000__0000003903F1CFE8\n000000067F0000400200008A590000374000-000000067F0000400200008A590000378000__0000003B99F7F8A0\n000000067F0000400200008A590000374000-000000067F0000400200008A590000378000__0000005D2FFFFB38\n000000067F0000400200008A590000374000-000000067F0000400200008A590000378000__00000073AD3FE6B8\n000000067F0000400200008A590000374000-000000067F0000400200008A590000378000__000000914E3F38F0\n000000067F0000400200008A590000374000-000000067F0000400200008A590000378000__000000931B33AE68\n000000067F0000400200008A590000374000-000000067F0000400200008A590000378000__000000931B9AFDF8\n000000067F0000400200008A590000378000-000000067F0000400200008A59000037C000__000000067DFFFF90\n000000067F0000400200008A590000378000-000000067F0000400200008A59000037C000__0000001C760FA190\n000000067F0000400200008A590000378000-000000067F0000400200008A59000037C000__00000038E67ABFA0\n000000067F0000400200008A590000378000-000000067F0000400200008A59000037C000__0000003903F1CFE8\n000000067F0000400200008A590000378000-000000067F0000400200008A59000037C000__0000003B99F7F8A0\n000000067F0000400200008A590000378000-000000067F0000400200008A59000037C000__0000005D2FFFFB38\n000000067F0000400200008A590000378000-000000067F0000400200008A59000037C000__00000073AD3FE6B8\n000000067F0000400200008A590000378000-000000067F0000400200008A59000037C000__000000914E3F38F0\n000000067F0000400200008A590000378000-000000067F0000400200008A59000037C000__000000931B33AE68\n000000067F0000400200008A590000378000-000000067F0000400200008A59000037C000__000000931B9AFDF8\n000000067F0000400200008A590000378BCB-000000067F0000400200008A590000381599__000000064E25E851-00000006FDCDDAF1\n000000067F0000400200008A59000037C000-000000067F0000400200008A590000380000__000000067DFFFF90\n000000067F0000400200008A59000037C000-000000067F0000400200008A590000380000__0000001C760FA190\n000000067F0000400200008A59000037C000-000000067F0000400200008A590000380000__00000038E67ABFA0\n000000067F0000400200008A59000037C000-000000067F0000400200008A590000380000__0000003903F1CFE8\n000000067F0000400200008A59000037C000-000000067F0000400200008A590000380000__0000003B99F7F8A0\n000000067F0000400200008A59000037C000-000000067F0000400200008A590000380000__0000005D2FFFFB38\n000000067F0000400200008A59000037C000-000000067F0000400200008A590000380000__00000073AD3FE6B8\n000000067F0000400200008A59000037C000-000000067F0000400200008A590000380000__000000914E3F38F0\n000000067F0000400200008A59000037C000-000000067F0000400200008A590000380000__000000931B33AE68\n000000067F0000400200008A59000037C000-000000067F0000400200008A590000380000__000000931B9AFDF8\n000000067F0000400200008A590000380000-000000067F0000400200008A590000384000__0000001C760FA190\n000000067F0000400200008A590000380000-000000067F0000400200008A590000384000__00000038E67ABFA0\n000000067F0000400200008A590000380000-000000067F0000400200008A590000384000__0000003903F1CFE8\n000000067F0000400200008A590000380000-000000067F0000400200008A590000384000__0000003B99F7F8A0\n000000067F0000400200008A590000380000-000000067F0000400200008A590000384000__0000005D2FFFFB38\n000000067F0000400200008A590000380000-000000067F0000400200008A590000384000__00000073AD3FE6B8\n000000067F0000400200008A590000380000-000000067F0000400200008A590000384000__000000914E3F38F0\n000000067F0000400200008A590000380000-000000067F0000400200008A590000384000__000000931B33AE68\n000000067F0000400200008A590000380000-000000067F0000400200008A590000384000__000000931B9AFDF8\n000000067F0000400200008A590000380000-030000000000000000000000000000000002__000000067DFFFF90\n000000067F0000400200008A590000381599-000000067F0000400200008A590000389F86__000000064E25E851-00000006FDCDDAF1\n000000067F0000400200008A590000384000-000000067F0000400200008A590000388000__0000001C760FA190\n000000067F0000400200008A590000384000-000000067F0000400200008A590000388000__00000038E67ABFA0\n000000067F0000400200008A590000384000-000000067F0000400200008A590000388000__0000003903F1CFE8\n000000067F0000400200008A590000384000-000000067F0000400200008A590000388000__0000003B99F7F8A0\n000000067F0000400200008A590000384000-000000067F0000400200008A590000388000__0000005D2FFFFB38\n000000067F0000400200008A590000384000-000000067F0000400200008A590000388000__00000073AD3FE6B8\n000000067F0000400200008A590000384000-000000067F0000400200008A590000388000__000000914E3F38F0\n000000067F0000400200008A590000384000-000000067F0000400200008A590000388000__000000931B33AE68\n000000067F0000400200008A590000384000-000000067F0000400200008A590000388000__000000931B9AFDF8\n000000067F0000400200008A590000388000-000000067F0000400200008A59000038C000__0000001C760FA190\n000000067F0000400200008A590000388000-000000067F0000400200008A59000038C000__00000038E67ABFA0\n000000067F0000400200008A590000388000-000000067F0000400200008A59000038C000__0000003903F1CFE8\n000000067F0000400200008A590000388000-000000067F0000400200008A59000038C000__0000003B99F7F8A0\n000000067F0000400200008A590000388000-000000067F0000400200008A59000038C000__0000005D2FFFFB38\n000000067F0000400200008A590000388000-000000067F0000400200008A59000038C000__00000073AD3FE6B8\n000000067F0000400200008A590000388000-000000067F0000400200008A59000038C000__000000914E3F38F0\n000000067F0000400200008A590000388000-000000067F0000400200008A59000038C000__000000931B33AE68\n000000067F0000400200008A590000388000-000000067F0000400200008A59000038C000__000000931B9AFDF8\n000000067F0000400200008A590000389F86-000000067F0000400200008A590000392976__000000064E25E851-00000006FDCDDAF1\n000000067F0000400200008A59000038C000-000000067F0000400200008A590000390000__0000001C760FA190\n000000067F0000400200008A59000038C000-000000067F0000400200008A590000390000__00000038E67ABFA0\n000000067F0000400200008A59000038C000-000000067F0000400200008A590000390000__0000003903F1CFE8\n000000067F0000400200008A59000038C000-000000067F0000400200008A590000390000__0000003B99F7F8A0\n000000067F0000400200008A59000038C000-000000067F0000400200008A590000390000__0000005D2FFFFB38\n000000067F0000400200008A59000038C000-000000067F0000400200008A590000390000__00000073AD3FE6B8\n000000067F0000400200008A59000038C000-000000067F0000400200008A590000390000__000000914E3F38F0\n000000067F0000400200008A59000038C000-000000067F0000400200008A590000390000__000000931B33AE68\n000000067F0000400200008A59000038C000-000000067F0000400200008A590000390000__000000931B9AFDF8\n000000067F0000400200008A590000390000-000000067F0000400200008A590000394000__0000001C760FA190\n000000067F0000400200008A590000390000-000000067F0000400200008A590000394000__00000038E67ABFA0\n000000067F0000400200008A590000390000-000000067F0000400200008A590000394000__0000003903F1CFE8\n000000067F0000400200008A590000390000-000000067F0000400200008A590000394000__0000003B99F7F8A0\n000000067F0000400200008A590000390000-000000067F0000400200008A590000394000__0000005D2FFFFB38\n000000067F0000400200008A590000390000-000000067F0000400200008A590000394000__00000073AD3FE6B8\n000000067F0000400200008A590000390000-000000067F0000400200008A590000394000__000000914E3F38F0\n000000067F0000400200008A590000390000-000000067F0000400200008A590000394000__000000931B33AE68\n000000067F0000400200008A590000390000-000000067F0000400200008A590000394000__000000931B9AFDF8\n000000067F0000400200008A590000392976-000000067F0000400200008A59000039B366__000000064E25E851-00000006FDCDDAF1\n000000067F0000400200008A590000394000-000000067F0000400200008A590000398000__0000001C760FA190\n000000067F0000400200008A590000394000-000000067F0000400200008A590000398000__00000038E67ABFA0\n000000067F0000400200008A590000394000-000000067F0000400200008A590000398000__0000003903F1CFE8\n000000067F0000400200008A590000394000-000000067F0000400200008A590000398000__0000003B99F7F8A0\n000000067F0000400200008A590000394000-000000067F0000400200008A590000398000__0000005D2FFFFB38\n000000067F0000400200008A590000394000-000000067F0000400200008A590000398000__00000073AD3FE6B8\n000000067F0000400200008A590000394000-000000067F0000400200008A590000398000__000000914E3F38F0\n000000067F0000400200008A590000394000-000000067F0000400200008A590000398000__000000931B33AE68\n000000067F0000400200008A590000394000-000000067F0000400200008A590000398000__000000931B9AFDF8\n000000067F0000400200008A590000398000-000000067F0000400200008A59000039C000__0000001C760FA190\n000000067F0000400200008A590000398000-000000067F0000400200008A59000039C000__00000038E67ABFA0\n000000067F0000400200008A590000398000-000000067F0000400200008A59000039C000__0000003903F1CFE8\n000000067F0000400200008A590000398000-000000067F0000400200008A59000039C000__0000003B99F7F8A0\n000000067F0000400200008A590000398000-000000067F0000400200008A59000039C000__0000005D2FFFFB38\n000000067F0000400200008A590000398000-000000067F0000400200008A59000039C000__00000073AD3FE6B8\n000000067F0000400200008A590000398000-000000067F0000400200008A59000039C000__000000914E3F38F0\n000000067F0000400200008A590000398000-000000067F0000400200008A59000039C000__000000931B33AE68\n000000067F0000400200008A590000398000-000000067F0000400200008A59000039C000__000000931B9AFDF8\n000000067F0000400200008A59000039B366-000000067F0000400200008A5900003A3D42__000000064E25E851-00000006FDCDDAF1\n000000067F0000400200008A59000039C000-000000067F0000400200008A5900003A0000__0000001C760FA190\n000000067F0000400200008A59000039C000-000000067F0000400200008A5900003A0000__00000038E67ABFA0\n000000067F0000400200008A59000039C000-000000067F0000400200008A5900003A0000__0000003903F1CFE8\n000000067F0000400200008A59000039C000-000000067F0000400200008A5900003A0000__0000003B99F7F8A0\n000000067F0000400200008A59000039C000-000000067F0000400200008A5900003A0000__0000005D2FFFFB38\n000000067F0000400200008A59000039C000-000000067F0000400200008A5900003A0000__00000073AD3FE6B8\n000000067F0000400200008A59000039C000-000000067F0000400200008A5900003A0000__000000914E3F38F0\n000000067F0000400200008A59000039C000-000000067F0000400200008A5900003A0000__000000931B33AE68\n000000067F0000400200008A59000039C000-000000067F0000400200008A5900003A0000__000000931B9AFDF8\n000000067F0000400200008A5900003A0000-000000067F0000400200008A5900003A4000__0000001C760FA190\n000000067F0000400200008A5900003A0000-000000067F0000400200008A5900003A4000__00000038E67ABFA0\n000000067F0000400200008A5900003A0000-000000067F0000400200008A5900003A4000__0000003903F1CFE8\n000000067F0000400200008A5900003A0000-000000067F0000400200008A5900003A4000__0000003B99F7F8A0\n000000067F0000400200008A5900003A0000-000000067F0000400200008A5900003A4000__0000005D2FFFFB38\n000000067F0000400200008A5900003A0000-000000067F0000400200008A5900003A4000__00000073AD3FE6B8\n000000067F0000400200008A5900003A0000-000000067F0000400200008A5900003A4000__000000914E3F38F0\n000000067F0000400200008A5900003A0000-000000067F0000400200008A5900003A4000__000000931B33AE68\n000000067F0000400200008A5900003A0000-000000067F0000400200008A5900003A4000__000000931B9AFDF8\n000000067F0000400200008A5900003A3D42-000000067F0000400200008A5900003AC710__000000064E25E851-00000006FDCDDAF1\n000000067F0000400200008A5900003A4000-000000067F0000400200008A5900003A8000__0000001C760FA190\n000000067F0000400200008A5900003A4000-000000067F0000400200008A5900003A8000__00000038E67ABFA0\n000000067F0000400200008A5900003A4000-000000067F0000400200008A5900003A8000__0000003903F1CFE8\n000000067F0000400200008A5900003A4000-000000067F0000400200008A5900003A8000__0000003B99F7F8A0\n000000067F0000400200008A5900003A4000-000000067F0000400200008A5900003A8000__0000005D2FFFFB38\n000000067F0000400200008A5900003A4000-000000067F0000400200008A5900003A8000__00000073AD3FE6B8\n000000067F0000400200008A5900003A4000-000000067F0000400200008A5900003A8000__000000914E3F38F0\n000000067F0000400200008A5900003A4000-000000067F0000400200008A5900003A8000__000000931B33AE68\n000000067F0000400200008A5900003A4000-000000067F0000400200008A5900003A8000__000000931B9AFDF8\n000000067F0000400200008A5900003A8000-000000067F0000400200008A5900003AC000__0000001C760FA190\n000000067F0000400200008A5900003A8000-000000067F0000400200008A5900003AC000__00000038E67ABFA0\n000000067F0000400200008A5900003A8000-000000067F0000400200008A5900003AC000__0000003903F1CFE8\n000000067F0000400200008A5900003A8000-000000067F0000400200008A5900003AC000__0000003B99F7F8A0\n000000067F0000400200008A5900003A8000-000000067F0000400200008A5900003AC000__0000005D2FFFFB38\n000000067F0000400200008A5900003A8000-000000067F0000400200008A5900003AC000__00000073AD3FE6B8\n000000067F0000400200008A5900003A8000-000000067F0000400200008A5900003AC000__000000914E3F38F0\n000000067F0000400200008A5900003A8000-000000067F0000400200008A5900003AC000__000000931B33AE68\n000000067F0000400200008A5900003A8000-000000067F0000400200008A5900003AC000__000000931B9AFDF8\n000000067F0000400200008A5900003AC000-000000067F0000400200008A5900003B0000__0000001C760FA190\n000000067F0000400200008A5900003AC000-000000067F0000400200008A5900003B0000__00000038E67ABFA0\n000000067F0000400200008A5900003AC000-000000067F0000400200008A5900003B0000__0000003903F1CFE8\n000000067F0000400200008A5900003AC000-000000067F0000400200008A5900003B0000__0000003B99F7F8A0\n000000067F0000400200008A5900003AC000-000000067F0000400200008A5900003B0000__0000005D2FFFFB38\n000000067F0000400200008A5900003AC000-000000067F0000400200008A5900003B0000__00000073AD3FE6B8\n000000067F0000400200008A5900003AC000-000000067F0000400200008A5900003B0000__000000914E3F38F0\n000000067F0000400200008A5900003AC000-000000067F0000400200008A5900003B0000__000000931B33AE68\n000000067F0000400200008A5900003AC000-000000067F0000400200008A5900003B0000__000000931B9AFDF8\n000000067F0000400200008A5900003AC710-000000067F0000400200008A5900003B50C6__000000064E25E851-00000006FDCDDAF1\n000000067F0000400200008A5900003B0000-000000067F0000400200008A5900003B4000__0000001C760FA190\n000000067F0000400200008A5900003B0000-000000067F0000400200008A5900003B4000__00000038E67ABFA0\n000000067F0000400200008A5900003B0000-000000067F0000400200008A5900003B4000__0000003903F1CFE8\n000000067F0000400200008A5900003B0000-000000067F0000400200008A5900003B4000__0000003B99F7F8A0\n000000067F0000400200008A5900003B0000-000000067F0000400200008A5900003B4000__0000005D2FFFFB38\n000000067F0000400200008A5900003B0000-000000067F0000400200008A5900003B4000__00000073AD3FE6B8\n000000067F0000400200008A5900003B0000-000000067F0000400200008A5900003B4000__000000914E3F38F0\n000000067F0000400200008A5900003B0000-000000067F0000400200008A5900003B4000__000000931B33AE68\n000000067F0000400200008A5900003B0000-000000067F0000400200008A5900003B4000__000000931B9AFDF8\n000000067F0000400200008A5900003B4000-000000067F0000400200008A5900003B8000__0000001C760FA190\n000000067F0000400200008A5900003B4000-000000067F0000400200008A5900003B8000__00000038E67ABFA0\n000000067F0000400200008A5900003B4000-000000067F0000400200008A5900003B8000__0000003903F1CFE8\n000000067F0000400200008A5900003B4000-000000067F0000400200008A5900003B8000__0000003B99F7F8A0\n000000067F0000400200008A5900003B4000-000000067F0000400200008A5900003B8000__0000005D2FFFFB38\n000000067F0000400200008A5900003B4000-000000067F0000400200008A5900003B8000__00000073AD3FE6B8\n000000067F0000400200008A5900003B4000-000000067F0000400200008A5900003B8000__000000914E3F38F0\n000000067F0000400200008A5900003B4000-000000067F0000400200008A5900003B8000__000000931B33AE68\n000000067F0000400200008A5900003B4000-000000067F0000400200008A5900003B8000__000000931B9AFDF8\n000000067F0000400200008A5900003B50C6-000000067F0000400200008A5900003BDA8D__000000064E25E851-00000006FDCDDAF1\n000000067F0000400200008A5900003B8000-000000067F0000400200008A5900003BC000__0000001C760FA190\n000000067F0000400200008A5900003B8000-000000067F0000400200008A5900003BC000__00000038E67ABFA0\n000000067F0000400200008A5900003B8000-000000067F0000400200008A5900003BC000__0000003903F1CFE8\n000000067F0000400200008A5900003B8000-000000067F0000400200008A5900003BC000__0000003B99F7F8A0\n000000067F0000400200008A5900003B8000-000000067F0000400200008A5900003BC000__0000005D2FFFFB38\n000000067F0000400200008A5900003B8000-000000067F0000400200008A5900003BC000__00000073AD3FE6B8\n000000067F0000400200008A5900003B8000-000000067F0000400200008A5900003BC000__000000914E3F38F0\n000000067F0000400200008A5900003B8000-000000067F0000400200008A5900003BC000__000000931B33AE68\n000000067F0000400200008A5900003B8000-000000067F0000400200008A5900003BC000__000000931B9AFDF8\n000000067F0000400200008A5900003BC000-000000067F0000400200008A5900003C0000__0000001C760FA190\n000000067F0000400200008A5900003BC000-000000067F0000400200008A5900003C0000__00000038E67ABFA0\n000000067F0000400200008A5900003BC000-000000067F0000400200008A5900003C0000__0000003903F1CFE8\n000000067F0000400200008A5900003BC000-000000067F0000400200008A5900003C0000__0000003B99F7F8A0\n000000067F0000400200008A5900003BC000-000000067F0000400200008A5900003C0000__0000005D2FFFFB38\n000000067F0000400200008A5900003BC000-000000067F0000400200008A5900003C0000__00000073AD3FE6B8\n000000067F0000400200008A5900003BC000-000000067F0000400200008A5900003C0000__000000914E3F38F0\n000000067F0000400200008A5900003BC000-000000067F0000400200008A5900003C0000__000000931B33AE68\n000000067F0000400200008A5900003BC000-000000067F0000400200008A5900003C0000__000000931B9AFDF8\n000000067F0000400200008A5900003BDA8D-000000067F0000400200008A5900003C648A__000000064E25E851-00000006FDCDDAF1\n000000067F0000400200008A5900003C0000-000000067F0000400200008A5900003C4000__0000001C760FA190\n000000067F0000400200008A5900003C0000-000000067F0000400200008A5900003C4000__00000038E67ABFA0\n000000067F0000400200008A5900003C0000-000000067F0000400200008A5900003C4000__0000003903F1CFE8\n000000067F0000400200008A5900003C0000-000000067F0000400200008A5900003C4000__0000003B99F7F8A0\n000000067F0000400200008A5900003C0000-000000067F0000400200008A5900003C4000__0000005D2FFFFB38\n000000067F0000400200008A5900003C0000-000000067F0000400200008A5900003C4000__00000073AD3FE6B8\n000000067F0000400200008A5900003C0000-000000067F0000400200008A5900003C4000__000000914E3F38F0\n000000067F0000400200008A5900003C0000-000000067F0000400200008A5900003C4000__000000931B33AE68\n000000067F0000400200008A5900003C0000-000000067F0000400200008A5900003C4000__000000931B9AFDF8\n000000067F0000400200008A5900003C4000-000000067F0000400200008A5900003C8000__0000001C725A2400\n000000067F0000400200008A5900003C4000-000000067F0000400200008A5900003C8000__0000001C760FA190\n000000067F0000400200008A5900003C4000-000000067F0000400200008A5900003C8000__00000038E67ABFA0\n000000067F0000400200008A5900003C4000-000000067F0000400200008A5900003C8000__0000003903F1CFE8\n000000067F0000400200008A5900003C4000-000000067F0000400200008A5900003C8000__0000003B99F7F8A0\n000000067F0000400200008A5900003C4000-000000067F0000400200008A5900003C8000__0000005D2FFFFB38\n000000067F0000400200008A5900003C4000-000000067F0000400200008A5900003C8000__00000073AD3FE6B8\n000000067F0000400200008A5900003C4000-000000067F0000400200008A5900003C8000__000000914E3F38F0\n000000067F0000400200008A5900003C4000-000000067F0000400200008A5900003C8000__000000931B33AE68\n000000067F0000400200008A5900003C4000-000000067F0000400200008A5900003C8000__000000931B9AFDF8\n000000067F0000400200008A5900003C648A-000000067F0000400200008A590100000000__000000064E25E851-00000006FDCDDAF1\n000000067F0000400200008A5900003C67B6-000000067F0000400200008A5900003CF1B7__00000006FDCDDAF1-00000007AD75F249\n000000067F0000400200008A5900003C8000-000000067F0000400200008A5900003CC000__0000001C725A2400\n000000067F0000400200008A5900003C8000-000000067F0000400200008A5900003CC000__0000001C760FA190\n000000067F0000400200008A5900003C8000-000000067F0000400200008A5900003CC000__00000038E67ABFA0\n000000067F0000400200008A5900003C8000-000000067F0000400200008A5900003CC000__0000003903F1CFE8\n000000067F0000400200008A5900003C8000-000000067F0000400200008A5900003CC000__0000003B99F7F8A0\n000000067F0000400200008A5900003C8000-000000067F0000400200008A5900003CC000__0000005D2FFFFB38\n000000067F0000400200008A5900003C8000-000000067F0000400200008A5900003CC000__00000073AD3FE6B8\n000000067F0000400200008A5900003C8000-000000067F0000400200008A5900003CC000__000000914E3F38F0\n000000067F0000400200008A5900003C8000-000000067F0000400200008A5900003CC000__000000931B33AE68\n000000067F0000400200008A5900003C8000-000000067F0000400200008A5900003CC000__000000931B9AFDF8\n000000067F0000400200008A5900003CC000-000000067F0000400200008A5900003D0000__0000001C725A2400\n000000067F0000400200008A5900003CC000-000000067F0000400200008A5900003D0000__0000001C760FA190\n000000067F0000400200008A5900003CC000-000000067F0000400200008A5900003D0000__00000038E67ABFA0\n000000067F0000400200008A5900003CC000-000000067F0000400200008A5900003D0000__0000003903F1CFE8\n000000067F0000400200008A5900003CC000-000000067F0000400200008A5900003D0000__0000003B99F7F8A0\n000000067F0000400200008A5900003CC000-000000067F0000400200008A5900003D0000__0000005D2FFFFB38\n000000067F0000400200008A5900003CC000-000000067F0000400200008A5900003D0000__00000073AD3FE6B8\n000000067F0000400200008A5900003CC000-000000067F0000400200008A5900003D0000__000000914E3F38F0\n000000067F0000400200008A5900003CC000-000000067F0000400200008A5900003D0000__000000931B33AE68\n000000067F0000400200008A5900003CC000-000000067F0000400200008A5900003D0000__000000931B9AFDF8\n000000067F0000400200008A5900003CF1B7-000000067F0000400200008A5900003D7BAC__00000006FDCDDAF1-00000007AD75F249\n000000067F0000400200008A5900003D0000-000000067F0000400200008A5900003D4000__0000001C725A2400\n000000067F0000400200008A5900003D0000-000000067F0000400200008A5900003D4000__0000001C760FA190\n000000067F0000400200008A5900003D0000-000000067F0000400200008A5900003D4000__00000038E67ABFA0\n000000067F0000400200008A5900003D0000-000000067F0000400200008A5900003D4000__0000003903F1CFE8\n000000067F0000400200008A5900003D0000-000000067F0000400200008A5900003D4000__0000003B99F7F8A0\n000000067F0000400200008A5900003D0000-000000067F0000400200008A5900003D4000__0000005D2FFFFB38\n000000067F0000400200008A5900003D0000-000000067F0000400200008A5900003D4000__00000073AD3FE6B8\n000000067F0000400200008A5900003D0000-000000067F0000400200008A5900003D4000__000000914E3F38F0\n000000067F0000400200008A5900003D0000-000000067F0000400200008A5900003D4000__000000931B33AE68\n000000067F0000400200008A5900003D0000-000000067F0000400200008A5900003D4000__000000931B9AFDF8\n000000067F0000400200008A5900003D4000-000000067F0000400200008A5900003D8000__0000001C725A2400\n000000067F0000400200008A5900003D4000-000000067F0000400200008A5900003D8000__0000001C760FA190\n000000067F0000400200008A5900003D4000-000000067F0000400200008A5900003D8000__00000038E67ABFA0\n000000067F0000400200008A5900003D4000-000000067F0000400200008A5900003D8000__0000003903F1CFE8\n000000067F0000400200008A5900003D4000-000000067F0000400200008A5900003D8000__0000003B99F7F8A0\n000000067F0000400200008A5900003D4000-000000067F0000400200008A5900003D8000__0000005D2FFFFB38\n000000067F0000400200008A5900003D4000-000000067F0000400200008A5900003D8000__00000073AD3FE6B8\n000000067F0000400200008A5900003D4000-000000067F0000400200008A5900003D8000__000000914E3F38F0\n000000067F0000400200008A5900003D4000-000000067F0000400200008A5900003D8000__000000931B33AE68\n000000067F0000400200008A5900003D4000-000000067F0000400200008A5900003D8000__000000931B9AFDF8\n000000067F0000400200008A5900003D7BAC-000000067F0000400200008A5900003E0586__00000006FDCDDAF1-00000007AD75F249\n000000067F0000400200008A5900003D8000-000000067F0000400200008A5900003DC000__0000001C725A2400\n000000067F0000400200008A5900003D8000-000000067F0000400200008A5900003DC000__0000001C760FA190\n000000067F0000400200008A5900003D8000-000000067F0000400200008A5900003DC000__00000038E67ABFA0\n000000067F0000400200008A5900003D8000-000000067F0000400200008A5900003DC000__0000003903F1CFE8\n000000067F0000400200008A5900003D8000-000000067F0000400200008A5900003DC000__0000003B99F7F8A0\n000000067F0000400200008A5900003D8000-000000067F0000400200008A5900003DC000__0000005D2FFFFB38\n000000067F0000400200008A5900003D8000-000000067F0000400200008A5900003DC000__00000073AD3FE6B8\n000000067F0000400200008A5900003D8000-000000067F0000400200008A5900003DC000__000000914E3F38F0\n000000067F0000400200008A5900003D8000-000000067F0000400200008A5900003DC000__000000931B33AE68\n000000067F0000400200008A5900003D8000-000000067F0000400200008A5900003DC000__000000931B9AFDF8\n000000067F0000400200008A5900003DC000-000000067F0000400200008A5900003E0000__0000001C725A2400\n000000067F0000400200008A5900003DC000-000000067F0000400200008A5900003E0000__0000001C760FA190\n000000067F0000400200008A5900003DC000-000000067F0000400200008A5900003E0000__00000038E67ABFA0\n000000067F0000400200008A5900003DC000-000000067F0000400200008A5900003E0000__0000003903F1CFE8\n000000067F0000400200008A5900003DC000-000000067F0000400200008A5900003E0000__0000003B99F7F8A0\n000000067F0000400200008A5900003DC000-000000067F0000400200008A5900003E0000__0000005D2FFFFB38\n000000067F0000400200008A5900003DC000-000000067F0000400200008A5900003E0000__00000073AD3FE6B8\n000000067F0000400200008A5900003DC000-000000067F0000400200008A5900003E0000__000000914E3F38F0\n000000067F0000400200008A5900003DC000-000000067F0000400200008A5900003E0000__000000931B33AE68\n000000067F0000400200008A5900003DC000-000000067F0000400200008A5900003E0000__000000931B9AFDF8\n000000067F0000400200008A5900003E0000-000000067F0000400200008A5900003E4000__0000001C725A2400\n000000067F0000400200008A5900003E0000-000000067F0000400200008A5900003E4000__0000001C760FA190\n000000067F0000400200008A5900003E0000-000000067F0000400200008A5900003E4000__00000038E67ABFA0\n000000067F0000400200008A5900003E0000-000000067F0000400200008A5900003E4000__0000003903F1CFE8\n000000067F0000400200008A5900003E0000-000000067F0000400200008A5900003E4000__0000003B99F7F8A0\n000000067F0000400200008A5900003E0000-000000067F0000400200008A5900003E4000__0000005D2FFFFB38\n000000067F0000400200008A5900003E0000-000000067F0000400200008A5900003E4000__00000073AD3FE6B8\n000000067F0000400200008A5900003E0000-000000067F0000400200008A5900003E4000__000000914E3F38F0\n000000067F0000400200008A5900003E0000-000000067F0000400200008A5900003E4000__000000931B33AE68\n000000067F0000400200008A5900003E0000-000000067F0000400200008A5900003E4000__000000931B9AFDF8\n000000067F0000400200008A5900003E0586-000000067F0000400200008A5900003E8F57__00000006FDCDDAF1-00000007AD75F249\n000000067F0000400200008A5900003E4000-000000067F0000400200008A5900003E8000__0000001C725A2400\n000000067F0000400200008A5900003E4000-000000067F0000400200008A5900003E8000__0000001C760FA190\n000000067F0000400200008A5900003E4000-000000067F0000400200008A5900003E8000__00000038E67ABFA0\n000000067F0000400200008A5900003E4000-000000067F0000400200008A5900003E8000__0000003903F1CFE8\n000000067F0000400200008A5900003E4000-000000067F0000400200008A5900003E8000__0000003B99F7F8A0\n000000067F0000400200008A5900003E4000-000000067F0000400200008A5900003E8000__0000005D2FFFFB38\n000000067F0000400200008A5900003E4000-000000067F0000400200008A5900003E8000__00000073AD3FE6B8\n000000067F0000400200008A5900003E4000-000000067F0000400200008A5900003E8000__000000914E3F38F0\n000000067F0000400200008A5900003E4000-000000067F0000400200008A5900003E8000__000000931B33AE68\n000000067F0000400200008A5900003E4000-000000067F0000400200008A5900003E8000__000000931B9AFDF8\n000000067F0000400200008A5900003E8000-000000067F0000400200008A5900003EC000__0000001C725A2400\n000000067F0000400200008A5900003E8000-000000067F0000400200008A5900003EC000__0000001C760FA190\n000000067F0000400200008A5900003E8000-000000067F0000400200008A5900003EC000__00000038E67ABFA0\n000000067F0000400200008A5900003E8000-000000067F0000400200008A5900003EC000__0000003903F1CFE8\n000000067F0000400200008A5900003E8000-000000067F0000400200008A5900003EC000__0000003B99F7F8A0\n000000067F0000400200008A5900003E8000-000000067F0000400200008A5900003EC000__0000005D2FFFFB38\n000000067F0000400200008A5900003E8000-000000067F0000400200008A5900003EC000__00000073AD3FE6B8\n000000067F0000400200008A5900003E8000-000000067F0000400200008A5900003EC000__000000914E3F38F0\n000000067F0000400200008A5900003E8000-000000067F0000400200008A5900003EC000__000000931B33AE68\n000000067F0000400200008A5900003E8000-000000067F0000400200008A5900003EC000__000000931B9AFDF8\n000000067F0000400200008A5900003E8F57-000000067F0000400200008A5900003F1912__00000006FDCDDAF1-00000007AD75F249\n000000067F0000400200008A5900003EC000-000000067F0000400200008A5900003F0000__0000001C725A2400\n000000067F0000400200008A5900003EC000-000000067F0000400200008A5900003F0000__0000001C760FA190\n000000067F0000400200008A5900003EC000-000000067F0000400200008A5900003F0000__00000038E67ABFA0\n000000067F0000400200008A5900003EC000-000000067F0000400200008A5900003F0000__0000003903F1CFE8\n000000067F0000400200008A5900003EC000-000000067F0000400200008A5900003F0000__0000003B99F7F8A0\n000000067F0000400200008A5900003EC000-000000067F0000400200008A5900003F0000__0000005D2FFFFB38\n000000067F0000400200008A5900003EC000-000000067F0000400200008A5900003F0000__00000073AD3FE6B8\n000000067F0000400200008A5900003EC000-000000067F0000400200008A5900003F0000__000000914E3F38F0\n000000067F0000400200008A5900003EC000-000000067F0000400200008A5900003F0000__000000931B33AE68\n000000067F0000400200008A5900003EC000-000000067F0000400200008A5900003F0000__000000931B9AFDF8\n000000067F0000400200008A5900003F0000-000000067F0000400200008A5900003F4000__0000001C725A2400\n000000067F0000400200008A5900003F0000-000000067F0000400200008A5900003F4000__0000001C760FA190\n000000067F0000400200008A5900003F0000-000000067F0000400200008A5900003F4000__00000038E67ABFA0\n000000067F0000400200008A5900003F0000-000000067F0000400200008A5900003F4000__0000003903F1CFE8\n000000067F0000400200008A5900003F0000-000000067F0000400200008A5900003F4000__0000003B99F7F8A0\n000000067F0000400200008A5900003F0000-000000067F0000400200008A5900003F4000__0000005D2FFFFB38\n000000067F0000400200008A5900003F0000-000000067F0000400200008A5900003F4000__00000073AD3FE6B8\n000000067F0000400200008A5900003F0000-000000067F0000400200008A5900003F4000__000000914E3F38F0\n000000067F0000400200008A5900003F0000-000000067F0000400200008A5900003F4000__000000931B33AE68\n000000067F0000400200008A5900003F0000-000000067F0000400200008A5900003F4000__000000931B9AFDF8\n000000067F0000400200008A5900003F1912-000000067F0000400200008A5900003FA2D9__00000006FDCDDAF1-00000007AD75F249\n000000067F0000400200008A5900003F4000-000000067F0000400200008A5900003F8000__0000001C725A2400\n000000067F0000400200008A5900003F4000-000000067F0000400200008A5900003F8000__0000001C760FA190\n000000067F0000400200008A5900003F4000-000000067F0000400200008A5900003F8000__00000038E67ABFA0\n000000067F0000400200008A5900003F4000-000000067F0000400200008A5900003F8000__0000003903F1CFE8\n000000067F0000400200008A5900003F4000-000000067F0000400200008A5900003F8000__0000003B99F7F8A0\n000000067F0000400200008A5900003F4000-000000067F0000400200008A5900003F8000__0000005D2FFFFB38\n000000067F0000400200008A5900003F4000-000000067F0000400200008A5900003F8000__00000073AD3FE6B8\n000000067F0000400200008A5900003F4000-000000067F0000400200008A5900003F8000__000000914E3F38F0\n000000067F0000400200008A5900003F4000-000000067F0000400200008A5900003F8000__000000931B33AE68\n000000067F0000400200008A5900003F4000-000000067F0000400200008A5900003F8000__000000931B9AFDF8\n000000067F0000400200008A5900003F8000-000000067F0000400200008A5900003FC000__0000001C725A2400\n000000067F0000400200008A5900003F8000-000000067F0000400200008A5900003FC000__0000001C760FA190\n000000067F0000400200008A5900003F8000-000000067F0000400200008A5900003FC000__00000038E67ABFA0\n000000067F0000400200008A5900003F8000-000000067F0000400200008A5900003FC000__0000003903F1CFE8\n000000067F0000400200008A5900003F8000-000000067F0000400200008A5900003FC000__0000003B99F7F8A0\n000000067F0000400200008A5900003F8000-000000067F0000400200008A5900003FC000__0000005D2FFFFB38\n000000067F0000400200008A5900003F8000-000000067F0000400200008A5900003FC000__00000073AD3FE6B8\n000000067F0000400200008A5900003F8000-000000067F0000400200008A5900003FC000__000000914E3F38F0\n000000067F0000400200008A5900003F8000-000000067F0000400200008A5900003FC000__000000931B33AE68\n000000067F0000400200008A5900003F8000-000000067F0000400200008A5900003FC000__000000931B9AFDF8\n000000067F0000400200008A5900003FA2D9-000000067F0000400200008A590000402CDA__00000006FDCDDAF1-00000007AD75F249\n000000067F0000400200008A5900003FC000-000000067F0000400200008A590000400000__0000001C725A2400\n000000067F0000400200008A5900003FC000-000000067F0000400200008A590000400000__0000001C760FA190\n000000067F0000400200008A5900003FC000-000000067F0000400200008A590000400000__00000038E67ABFA0\n000000067F0000400200008A5900003FC000-000000067F0000400200008A590000400000__0000003903F1CFE8\n000000067F0000400200008A5900003FC000-000000067F0000400200008A590000400000__0000003B99F7F8A0\n000000067F0000400200008A5900003FC000-000000067F0000400200008A590000400000__0000005D2FFFFB38\n000000067F0000400200008A5900003FC000-000000067F0000400200008A590000400000__00000073AD3FE6B8\n000000067F0000400200008A5900003FC000-000000067F0000400200008A590000400000__000000914E3F38F0\n000000067F0000400200008A5900003FC000-000000067F0000400200008A590000400000__000000931B33AE68\n000000067F0000400200008A5900003FC000-000000067F0000400200008A590000400000__000000931B9AFDF8\n000000067F0000400200008A590000400000-000000067F0000400200008A590000404000__0000001C725A2400\n000000067F0000400200008A590000400000-000000067F0000400200008A590000404000__0000001C760FA190\n000000067F0000400200008A590000400000-000000067F0000400200008A590000404000__00000038E67ABFA0\n000000067F0000400200008A590000400000-000000067F0000400200008A590000404000__0000003903F1CFE8\n000000067F0000400200008A590000400000-000000067F0000400200008A590000404000__0000003B99F7F8A0\n000000067F0000400200008A590000400000-000000067F0000400200008A590000404000__0000005D2FFFFB38\n000000067F0000400200008A590000400000-000000067F0000400200008A590000404000__00000073AD3FE6B8\n000000067F0000400200008A590000400000-000000067F0000400200008A590000404000__000000914E3F38F0\n000000067F0000400200008A590000400000-000000067F0000400200008A590000404000__000000931B33AE68\n000000067F0000400200008A590000400000-000000067F0000400200008A590000404000__000000931B9AFDF8\n000000067F0000400200008A590000402CDA-000000067F0000400200008A59000040B6DB__00000006FDCDDAF1-00000007AD75F249\n000000067F0000400200008A590000404000-000000067F0000400200008A590000408000__0000001C725A2400\n000000067F0000400200008A590000404000-000000067F0000400200008A590000408000__0000001C760FA190\n000000067F0000400200008A590000404000-000000067F0000400200008A590000408000__00000038E67ABFA0\n000000067F0000400200008A590000404000-000000067F0000400200008A590000408000__0000003903F1CFE8\n000000067F0000400200008A590000404000-000000067F0000400200008A590000408000__0000003B99F7F8A0\n000000067F0000400200008A590000404000-000000067F0000400200008A590000408000__0000005D2FFFFB38\n000000067F0000400200008A590000404000-000000067F0000400200008A590000408000__00000073AD3FE6B8\n000000067F0000400200008A590000404000-000000067F0000400200008A590000408000__000000914E3F38F0\n000000067F0000400200008A590000404000-000000067F0000400200008A590000408000__000000931B33AE68\n000000067F0000400200008A590000404000-000000067F0000400200008A590000408000__000000931B9AFDF8\n000000067F0000400200008A590000408000-000000067F0000400200008A59000040C000__0000001C725A2400\n000000067F0000400200008A590000408000-000000067F0000400200008A59000040C000__0000001C760FA190\n000000067F0000400200008A590000408000-000000067F0000400200008A59000040C000__00000038E67ABFA0\n000000067F0000400200008A590000408000-000000067F0000400200008A59000040C000__0000003903F1CFE8\n000000067F0000400200008A590000408000-000000067F0000400200008A59000040C000__0000003B99F7F8A0\n000000067F0000400200008A590000408000-000000067F0000400200008A59000040C000__0000005D2FFFFB38\n000000067F0000400200008A590000408000-000000067F0000400200008A59000040C000__00000073AD3FE6B8\n000000067F0000400200008A590000408000-000000067F0000400200008A59000040C000__000000914E3F38F0\n000000067F0000400200008A590000408000-000000067F0000400200008A59000040C000__000000931B33AE68\n000000067F0000400200008A590000408000-000000067F0000400200008A59000040C000__000000931B9AFDF8\n000000067F0000400200008A59000040B6DB-000000067F0000400200008A5900004140CA__00000006FDCDDAF1-00000007AD75F249\n000000067F0000400200008A59000040C000-000000067F0000400200008A590000410000__0000001C725A2400\n000000067F0000400200008A59000040C000-000000067F0000400200008A590000410000__0000001C760FA190\n000000067F0000400200008A59000040C000-000000067F0000400200008A590000410000__00000038E67ABFA0\n000000067F0000400200008A59000040C000-000000067F0000400200008A590000410000__0000003903F1CFE8\n000000067F0000400200008A59000040C000-000000067F0000400200008A590000410000__0000003B99F7F8A0\n000000067F0000400200008A59000040C000-000000067F0000400200008A590000410000__0000005D2FFFFB38\n000000067F0000400200008A59000040C000-000000067F0000400200008A590000410000__00000073AD3FE6B8\n000000067F0000400200008A59000040C000-000000067F0000400200008A590000410000__000000914E3F38F0\n000000067F0000400200008A59000040C000-000000067F0000400200008A590000410000__000000931B33AE68\n000000067F0000400200008A59000040C000-000000067F0000400200008A590000410000__000000931B9AFDF8\n000000067F0000400200008A590000410000-000000067F0000400200008A590000414000__0000001C725A2400\n000000067F0000400200008A590000410000-000000067F0000400200008A590000414000__0000001C760FA190\n000000067F0000400200008A590000410000-000000067F0000400200008A590000414000__00000038E67ABFA0\n000000067F0000400200008A590000410000-000000067F0000400200008A590000414000__0000003903F1CFE8\n000000067F0000400200008A590000410000-000000067F0000400200008A590000414000__0000003B99F7F8A0\n000000067F0000400200008A590000410000-000000067F0000400200008A590000414000__0000005D2FFFFB38\n000000067F0000400200008A590000410000-000000067F0000400200008A590000414000__00000073AD3FE6B8\n000000067F0000400200008A590000410000-000000067F0000400200008A590000414000__000000914E3F38F0\n000000067F0000400200008A590000410000-000000067F0000400200008A590000414000__000000931B33AE68\n000000067F0000400200008A590000410000-000000067F0000400200008A590000414000__000000931B9AFDF8\n000000067F0000400200008A590000414000-000000067F0000400200008A590000418000__0000001C725A2400\n000000067F0000400200008A590000414000-000000067F0000400200008A590000418000__0000001C760FA190\n000000067F0000400200008A590000414000-000000067F0000400200008A590000418000__00000038E67ABFA0\n000000067F0000400200008A590000414000-000000067F0000400200008A590000418000__0000003903F1CFE8\n000000067F0000400200008A590000414000-000000067F0000400200008A590000418000__0000003B99F7F8A0\n000000067F0000400200008A590000414000-000000067F0000400200008A590000418000__0000005D2FFFFB38\n000000067F0000400200008A590000414000-000000067F0000400200008A590000418000__00000073AD3FE6B8\n000000067F0000400200008A590000414000-000000067F0000400200008A590000418000__000000914E3F38F0\n000000067F0000400200008A590000414000-000000067F0000400200008A590000418000__000000931B33AE68\n000000067F0000400200008A590000414000-000000067F0000400200008A590000418000__000000931B9AFDF8\n000000067F0000400200008A5900004140CA-000000067F0000400200008A59000041CAA5__00000006FDCDDAF1-00000007AD75F249\n000000067F0000400200008A590000418000-000000067F0000400200008A59000041C000__0000001C725A2400\n000000067F0000400200008A590000418000-000000067F0000400200008A59000041C000__0000001C760FA190\n000000067F0000400200008A590000418000-000000067F0000400200008A59000041C000__00000038E67ABFA0\n000000067F0000400200008A590000418000-000000067F0000400200008A59000041C000__0000003903F1CFE8\n000000067F0000400200008A590000418000-000000067F0000400200008A59000041C000__0000003B99F7F8A0\n000000067F0000400200008A590000418000-000000067F0000400200008A59000041C000__0000005D2FFFFB38\n000000067F0000400200008A590000418000-000000067F0000400200008A59000041C000__00000073AD3FE6B8\n000000067F0000400200008A590000418000-000000067F0000400200008A59000041C000__000000914E3F38F0\n000000067F0000400200008A590000418000-000000067F0000400200008A59000041C000__000000931B33AE68\n000000067F0000400200008A590000418000-000000067F0000400200008A59000041C000__000000931B9AFDF8\n000000067F0000400200008A59000041C000-000000067F0000400200008A590000420000__0000001C725A2400\n000000067F0000400200008A59000041C000-000000067F0000400200008A590000420000__0000001C760FA190\n000000067F0000400200008A59000041C000-000000067F0000400200008A590000420000__00000038E67ABFA0\n000000067F0000400200008A59000041C000-000000067F0000400200008A590000420000__0000003903F1CFE8\n000000067F0000400200008A59000041C000-000000067F0000400200008A590000420000__0000003B99F7F8A0\n000000067F0000400200008A59000041C000-000000067F0000400200008A590000420000__0000005D2FFFFB38\n000000067F0000400200008A59000041C000-000000067F0000400200008A590000420000__00000073AD3FE6B8\n000000067F0000400200008A59000041C000-000000067F0000400200008A590000420000__000000914E3F38F0\n000000067F0000400200008A59000041C000-000000067F0000400200008A590000420000__000000931B33AE68\n000000067F0000400200008A59000041C000-000000067F0000400200008A590000420000__000000931B9AFDF8\n000000067F0000400200008A59000041CAA5-000000067F0000400200008A59000042546F__00000006FDCDDAF1-00000007AD75F249\n000000067F0000400200008A590000420000-000000067F0000400200008A590000424000__0000001C725A2400\n000000067F0000400200008A590000420000-000000067F0000400200008A590000424000__0000001C760FA190\n000000067F0000400200008A590000420000-000000067F0000400200008A590000424000__00000038E67ABFA0\n000000067F0000400200008A590000420000-000000067F0000400200008A590000424000__0000003903F1CFE8\n000000067F0000400200008A590000420000-000000067F0000400200008A590000424000__0000003B99F7F8A0\n000000067F0000400200008A590000420000-000000067F0000400200008A590000424000__0000005D2FFFFB38\n000000067F0000400200008A590000420000-000000067F0000400200008A590000424000__00000073AD3FE6B8\n000000067F0000400200008A590000420000-000000067F0000400200008A590000424000__000000914E3F38F0\n000000067F0000400200008A590000420000-000000067F0000400200008A590000424000__000000931B33AE68\n000000067F0000400200008A590000420000-000000067F0000400200008A590000424000__000000931B9AFDF8\n000000067F0000400200008A590000424000-000000067F0000400200008A590000428000__000000088D7FE420\n000000067F0000400200008A590000424000-000000067F0000400200008A590000428000__0000001C760FA190\n000000067F0000400200008A590000424000-000000067F0000400200008A590000428000__00000038E67ABFA0\n000000067F0000400200008A590000424000-000000067F0000400200008A590000428000__0000003903F1CFE8\n000000067F0000400200008A590000424000-000000067F0000400200008A590000428000__0000003B99F7F8A0\n000000067F0000400200008A590000424000-000000067F0000400200008A590000428000__0000005D2FFFFB38\n000000067F0000400200008A590000424000-000000067F0000400200008A590000428000__00000073AD3FE6B8\n000000067F0000400200008A590000424000-000000067F0000400200008A590000428000__000000914E3F38F0\n000000067F0000400200008A590000424000-000000067F0000400200008A590000428000__000000931B33AE68\n000000067F0000400200008A590000424000-000000067F0000400200008A590000428000__000000931B9AFDF8\n000000067F0000400200008A59000042546F-000000067F0000400200008A590100000000__00000006FDCDDAF1-00000007AD75F249\n000000067F0000400200008A59000042576F-000000067F0000400200008A59000042E12F__00000007AD75F249-000000085D1DF561\n000000067F0000400200008A590000428000-000000067F0000400200008A59000042C000__000000088D7FE420\n000000067F0000400200008A590000428000-000000067F0000400200008A59000042C000__0000001C760FA190\n000000067F0000400200008A590000428000-000000067F0000400200008A59000042C000__00000038E67ABFA0\n000000067F0000400200008A590000428000-000000067F0000400200008A59000042C000__0000003903F1CFE8\n000000067F0000400200008A590000428000-000000067F0000400200008A59000042C000__0000003B99F7F8A0\n000000067F0000400200008A590000428000-000000067F0000400200008A59000042C000__0000005D2FFFFB38\n000000067F0000400200008A590000428000-000000067F0000400200008A59000042C000__00000073AD3FE6B8\n000000067F0000400200008A590000428000-000000067F0000400200008A59000042C000__000000914E3F38F0\n000000067F0000400200008A590000428000-000000067F0000400200008A59000042C000__000000931B33AE68\n000000067F0000400200008A590000428000-000000067F0000400200008A59000042C000__000000931B9AFDF8\n000000067F0000400200008A59000042C000-000000067F0000400200008A590000430000__000000088D7FE420\n000000067F0000400200008A59000042C000-000000067F0000400200008A590000430000__0000001C760FA190\n000000067F0000400200008A59000042C000-000000067F0000400200008A590000430000__00000038E67ABFA0\n000000067F0000400200008A59000042C000-000000067F0000400200008A590000430000__0000003903F1CFE8\n000000067F0000400200008A59000042C000-000000067F0000400200008A590000430000__0000003B99F7F8A0\n000000067F0000400200008A59000042C000-000000067F0000400200008A590000430000__0000005D2FFFFB38\n000000067F0000400200008A59000042C000-000000067F0000400200008A590000430000__00000073AD3FE6B8\n000000067F0000400200008A59000042C000-000000067F0000400200008A590000430000__000000914E3F38F0\n000000067F0000400200008A59000042C000-000000067F0000400200008A590000430000__000000931B33AE68\n000000067F0000400200008A59000042C000-000000067F0000400200008A590000430000__000000931B9AFDF8\n000000067F0000400200008A59000042E12F-000000067F0000400200008A590000436B05__00000007AD75F249-000000085D1DF561\n000000067F0000400200008A590000430000-000000067F0000400200008A590000434000__000000088D7FE420\n000000067F0000400200008A590000430000-000000067F0000400200008A590000434000__0000001C760FA190\n000000067F0000400200008A590000430000-000000067F0000400200008A590000434000__00000038E67ABFA0\n000000067F0000400200008A590000430000-000000067F0000400200008A590000434000__0000003903F1CFE8\n000000067F0000400200008A590000430000-000000067F0000400200008A590000434000__0000003B99F7F8A0\n000000067F0000400200008A590000430000-000000067F0000400200008A590000434000__0000005D2FFFFB38\n000000067F0000400200008A590000430000-000000067F0000400200008A590000434000__00000073AD3FE6B8\n000000067F0000400200008A590000430000-000000067F0000400200008A590000434000__000000914E3F38F0\n000000067F0000400200008A590000430000-000000067F0000400200008A590000434000__000000931B33AE68\n000000067F0000400200008A590000430000-000000067F0000400200008A590000434000__000000931B9AFDF8\n000000067F0000400200008A590000434000-000000067F0000400200008A590000438000__000000088D7FE420\n000000067F0000400200008A590000434000-000000067F0000400200008A590000438000__0000001C760FA190\n000000067F0000400200008A590000434000-000000067F0000400200008A590000438000__00000038E67ABFA0\n000000067F0000400200008A590000434000-000000067F0000400200008A590000438000__0000003903F1CFE8\n000000067F0000400200008A590000434000-000000067F0000400200008A590000438000__0000003B99F7F8A0\n000000067F0000400200008A590000434000-000000067F0000400200008A590000438000__0000005D2FFFFB38\n000000067F0000400200008A590000434000-000000067F0000400200008A590000438000__00000073AD3FE6B8\n000000067F0000400200008A590000434000-000000067F0000400200008A590000438000__000000914E3F38F0\n000000067F0000400200008A590000434000-000000067F0000400200008A590000438000__000000931B33AE68\n000000067F0000400200008A590000434000-000000067F0000400200008A590000438000__000000931B9AFDF8\n000000067F0000400200008A590000436B05-000000067F0000400200008A59000043F4F4__00000007AD75F249-000000085D1DF561\n000000067F0000400200008A590000438000-000000067F0000400200008A59000043C000__000000088D7FE420\n000000067F0000400200008A590000438000-000000067F0000400200008A59000043C000__0000001C760FA190\n000000067F0000400200008A590000438000-000000067F0000400200008A59000043C000__00000038E67ABFA0\n000000067F0000400200008A590000438000-000000067F0000400200008A59000043C000__0000003903F1CFE8\n000000067F0000400200008A590000438000-000000067F0000400200008A59000043C000__0000003B99F7F8A0\n000000067F0000400200008A590000438000-000000067F0000400200008A59000043C000__0000005D2FFFFB38\n000000067F0000400200008A590000438000-000000067F0000400200008A59000043C000__00000073AD3FE6B8\n000000067F0000400200008A590000438000-000000067F0000400200008A59000043C000__000000914E3F38F0\n000000067F0000400200008A590000438000-000000067F0000400200008A59000043C000__000000931B33AE68\n000000067F0000400200008A590000438000-000000067F0000400200008A59000043C000__000000931B9AFDF8\n000000067F0000400200008A59000043C000-000000067F0000400200008A590000440000__000000088D7FE420\n000000067F0000400200008A59000043C000-000000067F0000400200008A590000440000__0000001C760FA190\n000000067F0000400200008A59000043C000-000000067F0000400200008A590000440000__00000038E67ABFA0\n000000067F0000400200008A59000043C000-000000067F0000400200008A590000440000__0000003903F1CFE8\n000000067F0000400200008A59000043C000-000000067F0000400200008A590000440000__0000003B99F7F8A0\n000000067F0000400200008A59000043C000-000000067F0000400200008A590000440000__0000005D2FFFFB38\n000000067F0000400200008A59000043C000-000000067F0000400200008A590000440000__00000073AD3FE6B8\n000000067F0000400200008A59000043C000-000000067F0000400200008A590000440000__000000914E3F38F0\n000000067F0000400200008A59000043C000-000000067F0000400200008A590000440000__000000931B33AE68\n000000067F0000400200008A59000043C000-000000067F0000400200008A590000440000__000000931B9AFDF8\n000000067F0000400200008A59000043F4F4-000000067F0000400200008A590000447EE3__00000007AD75F249-000000085D1DF561\n000000067F0000400200008A590000440000-000000067F0000400200008A590000444000__000000088D7FE420\n000000067F0000400200008A590000440000-000000067F0000400200008A590000444000__0000001C760FA190\n000000067F0000400200008A590000440000-000000067F0000400200008A590000444000__00000038E67ABFA0\n000000067F0000400200008A590000440000-000000067F0000400200008A590000444000__0000003903F1CFE8\n000000067F0000400200008A590000440000-000000067F0000400200008A590000444000__0000003B99F7F8A0\n000000067F0000400200008A590000440000-000000067F0000400200008A590000444000__0000005D2FFFFB38\n000000067F0000400200008A590000440000-000000067F0000400200008A590000444000__00000073AD3FE6B8\n000000067F0000400200008A590000440000-000000067F0000400200008A590000444000__000000914E3F38F0\n000000067F0000400200008A590000440000-000000067F0000400200008A590000444000__000000931B33AE68\n000000067F0000400200008A590000440000-000000067F0000400200008A590000444000__000000931B9AFDF8\n000000067F0000400200008A590000444000-000000067F0000400200008A590000448000__000000088D7FE420\n000000067F0000400200008A590000444000-000000067F0000400200008A590000448000__0000001C760FA190\n000000067F0000400200008A590000444000-000000067F0000400200008A590000448000__00000038E67ABFA0\n000000067F0000400200008A590000444000-000000067F0000400200008A590000448000__0000003903F1CFE8\n000000067F0000400200008A590000444000-000000067F0000400200008A590000448000__0000003B99F7F8A0\n000000067F0000400200008A590000444000-000000067F0000400200008A590000448000__0000005D2FFFFB38\n000000067F0000400200008A590000444000-000000067F0000400200008A590000448000__00000073AD3FE6B8\n000000067F0000400200008A590000444000-000000067F0000400200008A590000448000__000000914E3F38F0\n000000067F0000400200008A590000444000-000000067F0000400200008A590000448000__000000931B33AE68\n000000067F0000400200008A590000444000-000000067F0000400200008A590000448000__000000931B9AFDF8\n000000067F0000400200008A590000447EE3-000000067F0000400200008A5900004508CC__00000007AD75F249-000000085D1DF561\n000000067F0000400200008A590000448000-000000067F0000400200008A59000044C000__000000088D7FE420\n000000067F0000400200008A590000448000-000000067F0000400200008A59000044C000__0000001C760FA190\n000000067F0000400200008A590000448000-000000067F0000400200008A59000044C000__00000038E67ABFA0\n000000067F0000400200008A590000448000-000000067F0000400200008A59000044C000__0000003903F1CFE8\n000000067F0000400200008A590000448000-000000067F0000400200008A59000044C000__0000003B99F7F8A0\n000000067F0000400200008A590000448000-000000067F0000400200008A59000044C000__0000005D2FFFFB38\n000000067F0000400200008A590000448000-000000067F0000400200008A59000044C000__00000073AD3FE6B8\n000000067F0000400200008A590000448000-000000067F0000400200008A59000044C000__000000914E3F38F0\n000000067F0000400200008A590000448000-000000067F0000400200008A59000044C000__000000931B33AE68\n000000067F0000400200008A590000448000-000000067F0000400200008A59000044C000__000000931B9AFDF8\n000000067F0000400200008A59000044C000-000000067F0000400200008A590000450000__000000088D7FE420\n000000067F0000400200008A59000044C000-000000067F0000400200008A590000450000__0000001C760FA190\n000000067F0000400200008A59000044C000-000000067F0000400200008A590000450000__00000038E67ABFA0\n000000067F0000400200008A59000044C000-000000067F0000400200008A590000450000__0000003903F1CFE8\n000000067F0000400200008A59000044C000-000000067F0000400200008A590000450000__0000003B99F7F8A0\n000000067F0000400200008A59000044C000-000000067F0000400200008A590000450000__0000005D2FFFFB38\n000000067F0000400200008A59000044C000-000000067F0000400200008A590000450000__00000073AD3FE6B8\n000000067F0000400200008A59000044C000-000000067F0000400200008A590000450000__000000914E3F38F0\n000000067F0000400200008A59000044C000-000000067F0000400200008A590000450000__000000931B33AE68\n000000067F0000400200008A59000044C000-000000067F0000400200008A590000450000__000000931B9AFDF8\n000000067F0000400200008A590000450000-000000067F0000400200008A590000454000__000000088D7FE420\n000000067F0000400200008A590000450000-000000067F0000400200008A590000454000__0000001C760FA190\n000000067F0000400200008A590000450000-000000067F0000400200008A590000454000__00000038E67ABFA0\n000000067F0000400200008A590000450000-000000067F0000400200008A590000454000__0000003903F1CFE8\n000000067F0000400200008A590000450000-000000067F0000400200008A590000454000__0000003B99F7F8A0\n000000067F0000400200008A590000450000-000000067F0000400200008A590000454000__0000005D2FFFFB38\n000000067F0000400200008A590000450000-000000067F0000400200008A590000454000__00000073AD3FE6B8\n000000067F0000400200008A590000450000-000000067F0000400200008A590000454000__000000914E3F38F0\n000000067F0000400200008A590000450000-000000067F0000400200008A590000454000__000000931B33AE68\n000000067F0000400200008A590000450000-000000067F0000400200008A590000454000__000000931B9AFDF8\n000000067F0000400200008A5900004508CC-000000067F0000400200008A5900004592AC__00000007AD75F249-000000085D1DF561\n000000067F0000400200008A590000454000-000000067F0000400200008A590000458000__000000088D7FE420\n000000067F0000400200008A590000454000-000000067F0000400200008A590000458000__0000001C760FA190\n000000067F0000400200008A590000454000-000000067F0000400200008A590000458000__00000038E67ABFA0\n000000067F0000400200008A590000454000-000000067F0000400200008A590000458000__0000003903F1CFE8\n000000067F0000400200008A590000454000-000000067F0000400200008A590000458000__0000003B99F7F8A0\n000000067F0000400200008A590000454000-000000067F0000400200008A590000458000__0000005D2FFFFB38\n000000067F0000400200008A590000454000-000000067F0000400200008A590000458000__00000073AD3FE6B8\n000000067F0000400200008A590000454000-000000067F0000400200008A590000458000__000000914E3F38F0\n000000067F0000400200008A590000454000-000000067F0000400200008A590000458000__000000931B33AE68\n000000067F0000400200008A590000454000-000000067F0000400200008A590000458000__000000931B9AFDF8\n000000067F0000400200008A590000458000-000000067F0000400200008A59000045C000__000000088D7FE420\n000000067F0000400200008A590000458000-000000067F0000400200008A59000045C000__0000001C760FA190\n000000067F0000400200008A590000458000-000000067F0000400200008A59000045C000__00000038E67ABFA0\n000000067F0000400200008A590000458000-000000067F0000400200008A59000045C000__0000003903F1CFE8\n000000067F0000400200008A590000458000-000000067F0000400200008A59000045C000__0000003B99F7F8A0\n000000067F0000400200008A590000458000-000000067F0000400200008A59000045C000__0000005D2FFFFB38\n000000067F0000400200008A590000458000-000000067F0000400200008A59000045C000__00000073AD3FE6B8\n000000067F0000400200008A590000458000-000000067F0000400200008A59000045C000__000000914E3F38F0\n000000067F0000400200008A590000458000-000000067F0000400200008A59000045C000__000000931B33AE68\n000000067F0000400200008A590000458000-000000067F0000400200008A59000045C000__000000931B9AFDF8\n000000067F0000400200008A5900004592AC-000000067F0000400200008A590000461C6A__00000007AD75F249-000000085D1DF561\n000000067F0000400200008A59000045C000-000000067F0000400200008A590000460000__000000088D7FE420\n000000067F0000400200008A59000045C000-000000067F0000400200008A590000460000__0000001C760FA190\n000000067F0000400200008A59000045C000-000000067F0000400200008A590000460000__00000038E67ABFA0\n000000067F0000400200008A59000045C000-000000067F0000400200008A590000460000__0000003903F1CFE8\n000000067F0000400200008A59000045C000-000000067F0000400200008A590000460000__0000003B99F7F8A0\n000000067F0000400200008A59000045C000-000000067F0000400200008A590000460000__0000005D2FFFFB38\n000000067F0000400200008A59000045C000-000000067F0000400200008A590000460000__00000073AD3FE6B8\n000000067F0000400200008A59000045C000-000000067F0000400200008A590000460000__000000914E3F38F0\n000000067F0000400200008A59000045C000-000000067F0000400200008A590000460000__000000931B33AE68\n000000067F0000400200008A59000045C000-000000067F0000400200008A590000460000__000000931B9AFDF8\n000000067F0000400200008A590000460000-000000067F0000400200008A590000464000__000000088D7FE420\n000000067F0000400200008A590000460000-000000067F0000400200008A590000464000__0000001C760FA190\n000000067F0000400200008A590000460000-000000067F0000400200008A590000464000__00000038E67ABFA0\n000000067F0000400200008A590000460000-000000067F0000400200008A590000464000__0000003903F1CFE8\n000000067F0000400200008A590000460000-000000067F0000400200008A590000464000__0000003B99F7F8A0\n000000067F0000400200008A590000460000-000000067F0000400200008A590000464000__0000005D2FFFFB38\n000000067F0000400200008A590000460000-000000067F0000400200008A590000464000__00000073AD3FE6B8\n000000067F0000400200008A590000460000-000000067F0000400200008A590000464000__000000914E3F38F0\n000000067F0000400200008A590000460000-000000067F0000400200008A590000464000__000000931B33AE68\n000000067F0000400200008A590000460000-000000067F0000400200008A590000464000__000000931B9AFDF8\n000000067F0000400200008A590000461C6A-000000067F0000400200008A59000046A62B__00000007AD75F249-000000085D1DF561\n000000067F0000400200008A590000464000-000000067F0000400200008A590000468000__000000088D7FE420\n000000067F0000400200008A590000464000-000000067F0000400200008A590000468000__0000001C760FA190\n000000067F0000400200008A590000464000-000000067F0000400200008A590000468000__00000038E67ABFA0\n000000067F0000400200008A590000464000-000000067F0000400200008A590000468000__0000003903F1CFE8\n000000067F0000400200008A590000464000-000000067F0000400200008A590000468000__0000003B99F7F8A0\n000000067F0000400200008A590000464000-000000067F0000400200008A590000468000__0000005D2FFFFB38\n000000067F0000400200008A590000464000-000000067F0000400200008A590000468000__00000073AD3FE6B8\n000000067F0000400200008A590000464000-000000067F0000400200008A590000468000__000000914E3F38F0\n000000067F0000400200008A590000464000-000000067F0000400200008A590000468000__000000931B33AE68\n000000067F0000400200008A590000464000-000000067F0000400200008A590000468000__000000931B9AFDF8\n000000067F0000400200008A590000468000-000000067F0000400200008A59000046C000__000000088D7FE420\n000000067F0000400200008A590000468000-000000067F0000400200008A59000046C000__0000001C760FA190\n000000067F0000400200008A590000468000-000000067F0000400200008A59000046C000__00000038E67ABFA0\n000000067F0000400200008A590000468000-000000067F0000400200008A59000046C000__0000003903F1CFE8\n000000067F0000400200008A590000468000-000000067F0000400200008A59000046C000__0000003B99F7F8A0\n000000067F0000400200008A590000468000-000000067F0000400200008A59000046C000__0000005D2FFFFB38\n000000067F0000400200008A590000468000-000000067F0000400200008A59000046C000__00000073AD3FE6B8\n000000067F0000400200008A590000468000-000000067F0000400200008A59000046C000__000000914E3F38F0\n000000067F0000400200008A590000468000-000000067F0000400200008A59000046C000__000000931B33AE68\n000000067F0000400200008A590000468000-000000067F0000400200008A59000046C000__000000931B9AFDF8\n000000067F0000400200008A59000046A62B-000000067F0000400200008A590000473003__00000007AD75F249-000000085D1DF561\n000000067F0000400200008A59000046C000-000000067F0000400200008A590000470000__000000088D7FE420\n000000067F0000400200008A59000046C000-000000067F0000400200008A590000470000__0000001C760FA190\n000000067F0000400200008A59000046C000-000000067F0000400200008A590000470000__00000038E67ABFA0\n000000067F0000400200008A59000046C000-000000067F0000400200008A590000470000__0000003903F1CFE8\n000000067F0000400200008A59000046C000-000000067F0000400200008A590000470000__0000003B99F7F8A0\n000000067F0000400200008A59000046C000-000000067F0000400200008A590000470000__0000005D2FFFFB38\n000000067F0000400200008A59000046C000-000000067F0000400200008A590000470000__00000073AD3FE6B8\n000000067F0000400200008A59000046C000-000000067F0000400200008A590000470000__000000914E3F38F0\n000000067F0000400200008A59000046C000-000000067F0000400200008A590000470000__000000931B33AE68\n000000067F0000400200008A59000046C000-000000067F0000400200008A590000470000__000000931B9AFDF8\n000000067F0000400200008A590000470000-000000067F0000400200008A590000474000__000000088D7FE420\n000000067F0000400200008A590000470000-000000067F0000400200008A590000474000__0000001C760FA190\n000000067F0000400200008A590000470000-000000067F0000400200008A590000474000__00000038E67ABFA0\n000000067F0000400200008A590000470000-000000067F0000400200008A590000474000__0000003903F1CFE8\n000000067F0000400200008A590000470000-000000067F0000400200008A590000474000__0000003B99F7F8A0\n000000067F0000400200008A590000470000-000000067F0000400200008A590000474000__0000005D2FFFFB38\n000000067F0000400200008A590000470000-000000067F0000400200008A590000474000__00000073AD3FE6B8\n000000067F0000400200008A590000470000-000000067F0000400200008A590000474000__000000914E3F38F0\n000000067F0000400200008A590000470000-000000067F0000400200008A590000474000__000000931B33AE68\n000000067F0000400200008A590000470000-000000067F0000400200008A590000474000__000000931B9AFDF8\n000000067F0000400200008A590000473003-000000067F0000400200008A59000047B9EA__00000007AD75F249-000000085D1DF561\n000000067F0000400200008A590000474000-000000067F0000400200008A590000478000__000000088D7FE420\n000000067F0000400200008A590000474000-000000067F0000400200008A590000478000__0000001C760FA190\n000000067F0000400200008A590000474000-000000067F0000400200008A590000478000__00000038E67ABFA0\n000000067F0000400200008A590000474000-000000067F0000400200008A590000478000__0000003903F1CFE8\n000000067F0000400200008A590000474000-000000067F0000400200008A590000478000__0000003B99F7F8A0\n000000067F0000400200008A590000474000-000000067F0000400200008A590000478000__0000005D2FFFFB38\n000000067F0000400200008A590000474000-000000067F0000400200008A590000478000__00000073AD3FE6B8\n000000067F0000400200008A590000474000-000000067F0000400200008A590000478000__000000914E3F38F0\n000000067F0000400200008A590000474000-000000067F0000400200008A590000478000__000000931B33AE68\n000000067F0000400200008A590000474000-000000067F0000400200008A590000478000__000000931B9AFDF8\n000000067F0000400200008A590000478000-000000067F0000400200008A59000047C000__000000088D7FE420\n000000067F0000400200008A590000478000-000000067F0000400200008A59000047C000__0000001C760FA190\n000000067F0000400200008A590000478000-000000067F0000400200008A59000047C000__00000038E67ABFA0\n000000067F0000400200008A590000478000-000000067F0000400200008A59000047C000__0000003903F1CFE8\n000000067F0000400200008A590000478000-000000067F0000400200008A59000047C000__0000003B99F7F8A0\n000000067F0000400200008A590000478000-000000067F0000400200008A59000047C000__0000005D2FFFFB38\n000000067F0000400200008A590000478000-000000067F0000400200008A59000047C000__00000073AD3FE6B8\n000000067F0000400200008A590000478000-000000067F0000400200008A59000047C000__000000914E3F38F0\n000000067F0000400200008A590000478000-000000067F0000400200008A59000047C000__000000931B33AE68\n000000067F0000400200008A590000478000-000000067F0000400200008A59000047C000__000000931B9AFDF8\n000000067F0000400200008A59000047B9EA-000000067F0000400200008A5900004843E5__00000007AD75F249-000000085D1DF561\n000000067F0000400200008A59000047C000-000000067F0000400200008A590000480000__000000088D7FE420\n000000067F0000400200008A59000047C000-000000067F0000400200008A590000480000__0000001C760FA190\n000000067F0000400200008A59000047C000-000000067F0000400200008A590000480000__00000038E67ABFA0\n000000067F0000400200008A59000047C000-000000067F0000400200008A590000480000__0000003903F1CFE8\n000000067F0000400200008A59000047C000-000000067F0000400200008A590000480000__0000003B99F7F8A0\n000000067F0000400200008A59000047C000-000000067F0000400200008A590000480000__0000005D2FFFFB38\n000000067F0000400200008A59000047C000-000000067F0000400200008A590000480000__00000073AD3FE6B8\n000000067F0000400200008A59000047C000-000000067F0000400200008A590000480000__000000914E3F38F0\n000000067F0000400200008A59000047C000-000000067F0000400200008A590000480000__000000931B33AE68\n000000067F0000400200008A59000047C000-000000067F0000400200008A590000480000__000000931B9AFDF8\n000000067F0000400200008A590000480000-000000067F0000400200008A590000484000__000000088D7FE420\n000000067F0000400200008A590000480000-000000067F0000400200008A590000484000__0000001C760FA190\n000000067F0000400200008A590000480000-000000067F0000400200008A590000484000__00000038E67ABFA0\n000000067F0000400200008A590000480000-000000067F0000400200008A590000484000__0000003903F1CFE8\n000000067F0000400200008A590000480000-000000067F0000400200008A590000484000__0000003B99F7F8A0\n000000067F0000400200008A590000480000-000000067F0000400200008A590000484000__0000005D2FFFFB38\n000000067F0000400200008A590000480000-000000067F0000400200008A590000484000__00000073AD3FE6B8\n000000067F0000400200008A590000480000-000000067F0000400200008A590000484000__000000914E3F38F0\n000000067F0000400200008A590000480000-000000067F0000400200008A590000484000__000000931B33AE68\n000000067F0000400200008A590000480000-000000067F0000400200008A590000484000__000000931B9AFDF8\n000000067F0000400200008A590000484000-000000067F0000400200008A590000488000__000000088D7FE420\n000000067F0000400200008A590000484000-000000067F0000400200008A590000488000__0000001C760FA190\n000000067F0000400200008A590000484000-000000067F0000400200008A590000488000__00000038E67ABFA0\n000000067F0000400200008A590000484000-000000067F0000400200008A590000488000__0000003903F1CFE8\n000000067F0000400200008A590000484000-000000067F0000400200008A590000488000__0000003B99F7F8A0\n000000067F0000400200008A590000484000-000000067F0000400200008A590000488000__0000005D2FFFFB38\n000000067F0000400200008A590000484000-000000067F0000400200008A590000488000__00000073AD3FE6B8\n000000067F0000400200008A590000484000-000000067F0000400200008A590000488000__000000914E3F38F0\n000000067F0000400200008A590000484000-000000067F0000400200008A590000488000__000000931B33AE68\n000000067F0000400200008A590000484000-000000067F0000400200008A590000488000__000000931B9AFDF8\n000000067F0000400200008A5900004843E5-000000067F0000400200008A590100000000__00000007AD75F249-000000085D1DF561\n000000067F0000400200008A590000484710-000000067F0000400200008A59000048D0ED__000000085D1DF561-000000090CC5DF81\n000000067F0000400200008A590000488000-000000067F0000400200008A59000048C000__000000088D7FE420\n000000067F0000400200008A590000488000-000000067F0000400200008A59000048C000__0000001C760FA190\n000000067F0000400200008A590000488000-000000067F0000400200008A59000048C000__00000038E67ABFA0\n000000067F0000400200008A590000488000-000000067F0000400200008A59000048C000__0000003903F1CFE8\n000000067F0000400200008A590000488000-000000067F0000400200008A59000048C000__0000003B99F7F8A0\n000000067F0000400200008A590000488000-000000067F0000400200008A59000048C000__0000005D2FFFFB38\n000000067F0000400200008A590000488000-000000067F0000400200008A59000048C000__00000073AD3FE6B8\n000000067F0000400200008A590000488000-000000067F0000400200008A59000048C000__000000914E3F38F0\n000000067F0000400200008A590000488000-000000067F0000400200008A59000048C000__000000931B33AE68\n000000067F0000400200008A590000488000-000000067F0000400200008A59000048C000__000000931B9AFDF8\n000000067F0000400200008A59000048C000-000000067F0000400200008A590000490000__000000088D7FE420\n000000067F0000400200008A59000048C000-000000067F0000400200008A590000490000__0000001C760FA190\n000000067F0000400200008A59000048C000-000000067F0000400200008A590000490000__00000038E67ABFA0\n000000067F0000400200008A59000048C000-000000067F0000400200008A590000490000__0000003903F1CFE8\n000000067F0000400200008A59000048C000-000000067F0000400200008A590000490000__0000003B99F7F8A0\n000000067F0000400200008A59000048C000-000000067F0000400200008A590000490000__0000005D2FFFFB38\n000000067F0000400200008A59000048C000-000000067F0000400200008A590000490000__00000073AD3FE6B8\n000000067F0000400200008A59000048C000-000000067F0000400200008A590000490000__000000914E3F38F0\n000000067F0000400200008A59000048C000-000000067F0000400200008A590000490000__000000931B33AE68\n000000067F0000400200008A59000048C000-000000067F0000400200008A590000490000__000000931B9AFDF8\n000000067F0000400200008A59000048D0ED-000000067F0000400200008A590000495ACD__000000085D1DF561-000000090CC5DF81\n000000067F0000400200008A590000490000-000000067F0000400200008A590000494000__000000088D7FE420\n000000067F0000400200008A590000490000-000000067F0000400200008A590000494000__0000001C760FA190\n000000067F0000400200008A590000490000-000000067F0000400200008A590000494000__00000038E67ABFA0\n000000067F0000400200008A590000490000-000000067F0000400200008A590000494000__0000003903F1CFE8\n000000067F0000400200008A590000490000-000000067F0000400200008A590000494000__0000003B99F7F8A0\n000000067F0000400200008A590000490000-000000067F0000400200008A590000494000__0000005D2FFFFB38\n000000067F0000400200008A590000490000-000000067F0000400200008A590000494000__00000073AD3FE6B8\n000000067F0000400200008A590000490000-000000067F0000400200008A590000494000__000000914E3F38F0\n000000067F0000400200008A590000490000-000000067F0000400200008A590000494000__000000931B33AE68\n000000067F0000400200008A590000490000-000000067F0000400200008A590000494000__000000931B9AFDF8\n000000067F0000400200008A590000494000-000000067F0000400200008A590000498000__000000088D7FE420\n000000067F0000400200008A590000494000-000000067F0000400200008A590000498000__0000001C760FA190\n000000067F0000400200008A590000494000-000000067F0000400200008A590000498000__00000038E67ABFA0\n000000067F0000400200008A590000494000-000000067F0000400200008A590000498000__0000003903F1CFE8\n000000067F0000400200008A590000494000-000000067F0000400200008A590000498000__0000003B99F7F8A0\n000000067F0000400200008A590000494000-000000067F0000400200008A590000498000__0000005D2FFFFB38\n000000067F0000400200008A590000494000-000000067F0000400200008A590000498000__00000073AD3FE6B8\n000000067F0000400200008A590000494000-000000067F0000400200008A590000498000__000000914E3F38F0\n000000067F0000400200008A590000494000-000000067F0000400200008A590000498000__000000931B33AE68\n000000067F0000400200008A590000494000-000000067F0000400200008A590000498000__000000931B9AFDF8\n000000067F0000400200008A590000495ACD-000000067F0000400200008A59000049E49A__000000085D1DF561-000000090CC5DF81\n000000067F0000400200008A590000498000-000000067F0000400200008A59000049C000__000000088D7FE420\n000000067F0000400200008A590000498000-000000067F0000400200008A59000049C000__0000001C760FA190\n000000067F0000400200008A590000498000-000000067F0000400200008A59000049C000__00000038E67ABFA0\n000000067F0000400200008A590000498000-000000067F0000400200008A59000049C000__0000003903F1CFE8\n000000067F0000400200008A590000498000-000000067F0000400200008A59000049C000__0000003B99F7F8A0\n000000067F0000400200008A590000498000-000000067F0000400200008A59000049C000__0000005D2FFFFB38\n000000067F0000400200008A590000498000-000000067F0000400200008A59000049C000__00000073AD3FE6B8\n000000067F0000400200008A590000498000-000000067F0000400200008A59000049C000__000000914E3F38F0\n000000067F0000400200008A590000498000-000000067F0000400200008A59000049C000__000000931B33AE68\n000000067F0000400200008A590000498000-000000067F0000400200008A59000049C000__000000931B9AFDF8\n000000067F0000400200008A59000049C000-000000067F0000400200008A5900004A0000__0000001C760FA190\n000000067F0000400200008A59000049C000-000000067F0000400200008A5900004A0000__00000038E67ABFA0\n000000067F0000400200008A59000049C000-000000067F0000400200008A5900004A0000__0000003903F1CFE8\n000000067F0000400200008A59000049C000-000000067F0000400200008A5900004A0000__0000003B99F7F8A0\n000000067F0000400200008A59000049C000-000000067F0000400200008A5900004A0000__0000005D2FFFFB38\n000000067F0000400200008A59000049C000-000000067F0000400200008A5900004A0000__00000073AD3FE6B8\n000000067F0000400200008A59000049C000-000000067F0000400200008A5900004A0000__000000914E3F38F0\n000000067F0000400200008A59000049C000-000000067F0000400200008A5900004A0000__000000931B33AE68\n000000067F0000400200008A59000049C000-000000067F0000400200008A5900004A0000__000000931B9AFDF8\n000000067F0000400200008A59000049C000-030000000000000000000000000000000002__000000088D7FE420\n000000067F0000400200008A59000049E49A-000000067F0000400200008A5900004A6E62__000000085D1DF561-000000090CC5DF81\n000000067F0000400200008A5900004A0000-000000067F0000400200008A5900004A4000__0000001C760FA190\n000000067F0000400200008A5900004A0000-000000067F0000400200008A5900004A4000__00000038E67ABFA0\n000000067F0000400200008A5900004A0000-000000067F0000400200008A5900004A4000__0000003903F1CFE8\n000000067F0000400200008A5900004A0000-000000067F0000400200008A5900004A4000__0000003B99F7F8A0\n000000067F0000400200008A5900004A0000-000000067F0000400200008A5900004A4000__0000005D2FFFFB38\n000000067F0000400200008A5900004A0000-000000067F0000400200008A5900004A4000__00000073AD3FE6B8\n000000067F0000400200008A5900004A0000-000000067F0000400200008A5900004A4000__000000914E3F38F0\n000000067F0000400200008A5900004A0000-000000067F0000400200008A5900004A4000__000000931B33AE68\n000000067F0000400200008A5900004A0000-000000067F0000400200008A5900004A4000__000000931B9AFDF8\n000000067F0000400200008A5900004A4000-000000067F0000400200008A5900004A8000__0000001C760FA190\n000000067F0000400200008A5900004A4000-000000067F0000400200008A5900004A8000__00000038E67ABFA0\n000000067F0000400200008A5900004A4000-000000067F0000400200008A5900004A8000__0000003903F1CFE8\n000000067F0000400200008A5900004A4000-000000067F0000400200008A5900004A8000__0000003B99F7F8A0\n000000067F0000400200008A5900004A4000-000000067F0000400200008A5900004A8000__0000005D2FFFFB38\n000000067F0000400200008A5900004A4000-000000067F0000400200008A5900004A8000__00000073AD3FE6B8\n000000067F0000400200008A5900004A4000-000000067F0000400200008A5900004A8000__000000914E3F38F0\n000000067F0000400200008A5900004A4000-000000067F0000400200008A5900004A8000__000000931B33AE68\n000000067F0000400200008A5900004A4000-000000067F0000400200008A5900004A8000__000000931B9AFDF8\n000000067F0000400200008A5900004A6E62-000000067F0000400200008A5900004AF849__000000085D1DF561-000000090CC5DF81\n000000067F0000400200008A5900004A8000-000000067F0000400200008A5900004AC000__0000001C760FA190\n000000067F0000400200008A5900004A8000-000000067F0000400200008A5900004AC000__00000038E67ABFA0\n000000067F0000400200008A5900004A8000-000000067F0000400200008A5900004AC000__0000003903F1CFE8\n000000067F0000400200008A5900004A8000-000000067F0000400200008A5900004AC000__0000003B99F7F8A0\n000000067F0000400200008A5900004A8000-000000067F0000400200008A5900004AC000__0000005D2FFFFB38\n000000067F0000400200008A5900004A8000-000000067F0000400200008A5900004AC000__00000073AD3FE6B8\n000000067F0000400200008A5900004A8000-000000067F0000400200008A5900004AC000__000000914E3F38F0\n000000067F0000400200008A5900004A8000-000000067F0000400200008A5900004AC000__000000931B33AE68\n000000067F0000400200008A5900004A8000-000000067F0000400200008A5900004AC000__000000931B9AFDF8\n000000067F0000400200008A5900004AC000-000000067F0000400200008A5900004B0000__0000001C760FA190\n000000067F0000400200008A5900004AC000-000000067F0000400200008A5900004B0000__00000038E67ABFA0\n000000067F0000400200008A5900004AC000-000000067F0000400200008A5900004B0000__0000003903F1CFE8\n000000067F0000400200008A5900004AC000-000000067F0000400200008A5900004B0000__0000003B99F7F8A0\n000000067F0000400200008A5900004AC000-000000067F0000400200008A5900004B0000__0000005D2FFFFB38\n000000067F0000400200008A5900004AC000-000000067F0000400200008A5900004B0000__00000073AD3FE6B8\n000000067F0000400200008A5900004AC000-000000067F0000400200008A5900004B0000__000000914E3F38F0\n000000067F0000400200008A5900004AC000-000000067F0000400200008A5900004B0000__000000931B33AE68\n000000067F0000400200008A5900004AC000-000000067F0000400200008A5900004B0000__000000931B9AFDF8\n000000067F0000400200008A5900004AF849-000000067F0000400200008A5900004B823E__000000085D1DF561-000000090CC5DF81\n000000067F0000400200008A5900004B0000-000000067F0000400200008A5900004B4000__0000001C760FA190\n000000067F0000400200008A5900004B0000-000000067F0000400200008A5900004B4000__00000038E67ABFA0\n000000067F0000400200008A5900004B0000-000000067F0000400200008A5900004B4000__0000003903F1CFE8\n000000067F0000400200008A5900004B0000-000000067F0000400200008A5900004B4000__0000003B99F7F8A0\n000000067F0000400200008A5900004B0000-000000067F0000400200008A5900004B4000__0000005D2FFFFB38\n000000067F0000400200008A5900004B0000-000000067F0000400200008A5900004B4000__00000073AD3FE6B8\n000000067F0000400200008A5900004B0000-000000067F0000400200008A5900004B4000__000000914E3F38F0\n000000067F0000400200008A5900004B0000-000000067F0000400200008A5900004B4000__000000931B33AE68\n000000067F0000400200008A5900004B0000-000000067F0000400200008A5900004B4000__000000931B9AFDF8\n000000067F0000400200008A5900004B4000-000000067F0000400200008A5900004B8000__0000001C760FA190\n000000067F0000400200008A5900004B4000-000000067F0000400200008A5900004B8000__00000038E67ABFA0\n000000067F0000400200008A5900004B4000-000000067F0000400200008A5900004B8000__0000003903F1CFE8\n000000067F0000400200008A5900004B4000-000000067F0000400200008A5900004B8000__0000003B99F7F8A0\n000000067F0000400200008A5900004B4000-000000067F0000400200008A5900004B8000__0000005D2FFFFB38\n000000067F0000400200008A5900004B4000-000000067F0000400200008A5900004B8000__00000073AD3FE6B8\n000000067F0000400200008A5900004B4000-000000067F0000400200008A5900004B8000__000000914E3F38F0\n000000067F0000400200008A5900004B4000-000000067F0000400200008A5900004B8000__000000931B33AE68\n000000067F0000400200008A5900004B4000-000000067F0000400200008A5900004B8000__000000931B9AFDF8\n000000067F0000400200008A5900004B8000-000000067F0000400200008A5900004BC000__0000001C760FA190\n000000067F0000400200008A5900004B8000-000000067F0000400200008A5900004BC000__00000038E67ABFA0\n000000067F0000400200008A5900004B8000-000000067F0000400200008A5900004BC000__0000003903F1CFE8\n000000067F0000400200008A5900004B8000-000000067F0000400200008A5900004BC000__0000003B99F7F8A0\n000000067F0000400200008A5900004B8000-000000067F0000400200008A5900004BC000__0000005D2FFFFB38\n000000067F0000400200008A5900004B8000-000000067F0000400200008A5900004BC000__00000073AD3FE6B8\n000000067F0000400200008A5900004B8000-000000067F0000400200008A5900004BC000__000000914E3F38F0\n000000067F0000400200008A5900004B8000-000000067F0000400200008A5900004BC000__000000931B33AE68\n000000067F0000400200008A5900004B8000-000000067F0000400200008A5900004BC000__000000931B9AFDF8\n000000067F0000400200008A5900004B823E-000000067F0000400200008A5900004C0C22__000000085D1DF561-000000090CC5DF81\n000000067F0000400200008A5900004BC000-000000067F0000400200008A5900004C0000__0000001C760FA190\n000000067F0000400200008A5900004BC000-000000067F0000400200008A5900004C0000__00000038E67ABFA0\n000000067F0000400200008A5900004BC000-000000067F0000400200008A5900004C0000__0000003903F1CFE8\n000000067F0000400200008A5900004BC000-000000067F0000400200008A5900004C0000__0000003B99F7F8A0\n000000067F0000400200008A5900004BC000-000000067F0000400200008A5900004C0000__0000005D2FFFFB38\n000000067F0000400200008A5900004BC000-000000067F0000400200008A5900004C0000__00000073AD3FE6B8\n000000067F0000400200008A5900004BC000-000000067F0000400200008A5900004C0000__000000914E3F38F0\n000000067F0000400200008A5900004BC000-000000067F0000400200008A5900004C0000__000000931B33AE68\n000000067F0000400200008A5900004BC000-000000067F0000400200008A5900004C0000__000000931B9AFDF8\n000000067F0000400200008A5900004C0000-000000067F0000400200008A5900004C4000__0000001C760FA190\n000000067F0000400200008A5900004C0000-000000067F0000400200008A5900004C4000__00000038E67ABFA0\n000000067F0000400200008A5900004C0000-000000067F0000400200008A5900004C4000__0000003903F1CFE8\n000000067F0000400200008A5900004C0000-000000067F0000400200008A5900004C4000__0000003B99F7F8A0\n000000067F0000400200008A5900004C0000-000000067F0000400200008A5900004C4000__0000005D2FFFFB38\n000000067F0000400200008A5900004C0000-000000067F0000400200008A5900004C4000__00000073AD3FE6B8\n000000067F0000400200008A5900004C0000-000000067F0000400200008A5900004C4000__000000914E3F38F0\n000000067F0000400200008A5900004C0000-000000067F0000400200008A5900004C4000__000000931B33AE68\n000000067F0000400200008A5900004C0000-000000067F0000400200008A5900004C4000__000000931B9AFDF8\n000000067F0000400200008A5900004C0C22-000000067F0000400200008A5900004C9601__000000085D1DF561-000000090CC5DF81\n000000067F0000400200008A5900004C4000-000000067F0000400200008A5900004C8000__0000001C760FA190\n000000067F0000400200008A5900004C4000-000000067F0000400200008A5900004C8000__00000038E67ABFA0\n000000067F0000400200008A5900004C4000-000000067F0000400200008A5900004C8000__0000003903F1CFE8\n000000067F0000400200008A5900004C4000-000000067F0000400200008A5900004C8000__0000003B99F7F8A0\n000000067F0000400200008A5900004C4000-000000067F0000400200008A5900004C8000__0000005D2FFFFB38\n000000067F0000400200008A5900004C4000-000000067F0000400200008A5900004C8000__00000073AD3FE6B8\n000000067F0000400200008A5900004C4000-000000067F0000400200008A5900004C8000__000000914E3F38F0\n000000067F0000400200008A5900004C4000-000000067F0000400200008A5900004C8000__000000931B33AE68\n000000067F0000400200008A5900004C4000-000000067F0000400200008A5900004C8000__000000931B9AFDF8\n000000067F0000400200008A5900004C8000-000000067F0000400200008A5900004CC000__0000001C760FA190\n000000067F0000400200008A5900004C8000-000000067F0000400200008A5900004CC000__00000038E67ABFA0\n000000067F0000400200008A5900004C8000-000000067F0000400200008A5900004CC000__0000003903F1CFE8\n000000067F0000400200008A5900004C8000-000000067F0000400200008A5900004CC000__0000003B99F7F8A0\n000000067F0000400200008A5900004C8000-000000067F0000400200008A5900004CC000__0000005D2FFFFB38\n000000067F0000400200008A5900004C8000-000000067F0000400200008A5900004CC000__00000073AD3FE6B8\n000000067F0000400200008A5900004C8000-000000067F0000400200008A5900004CC000__000000914E3F38F0\n000000067F0000400200008A5900004C8000-000000067F0000400200008A5900004CC000__000000931B33AE68\n000000067F0000400200008A5900004C8000-000000067F0000400200008A5900004CC000__000000931B9AFDF8\n000000067F0000400200008A5900004C9601-000000067F0000400200008A5900004D1FCD__000000085D1DF561-000000090CC5DF81\n000000067F0000400200008A5900004CC000-000000067F0000400200008A5900004D0000__0000001C760FA190\n000000067F0000400200008A5900004CC000-000000067F0000400200008A5900004D0000__00000038E67ABFA0\n000000067F0000400200008A5900004CC000-000000067F0000400200008A5900004D0000__0000003903F1CFE8\n000000067F0000400200008A5900004CC000-000000067F0000400200008A5900004D0000__0000003B99F7F8A0\n000000067F0000400200008A5900004CC000-000000067F0000400200008A5900004D0000__0000005D2FFFFB38\n000000067F0000400200008A5900004CC000-000000067F0000400200008A5900004D0000__00000073AD3FE6B8\n000000067F0000400200008A5900004CC000-000000067F0000400200008A5900004D0000__000000914E3F38F0\n000000067F0000400200008A5900004CC000-000000067F0000400200008A5900004D0000__000000931B33AE68\n000000067F0000400200008A5900004CC000-000000067F0000400200008A5900004D0000__000000931B9AFDF8\n000000067F0000400200008A5900004D0000-000000067F0000400200008A5900004D4000__0000001C760FA190\n000000067F0000400200008A5900004D0000-000000067F0000400200008A5900004D4000__00000038E67ABFA0\n000000067F0000400200008A5900004D0000-000000067F0000400200008A5900004D4000__0000003903F1CFE8\n000000067F0000400200008A5900004D0000-000000067F0000400200008A5900004D4000__0000003B99F7F8A0\n000000067F0000400200008A5900004D0000-000000067F0000400200008A5900004D4000__0000005D2FFFFB38\n000000067F0000400200008A5900004D0000-000000067F0000400200008A5900004D4000__00000073AD3FE6B8\n000000067F0000400200008A5900004D0000-000000067F0000400200008A5900004D4000__000000914E3F38F0\n000000067F0000400200008A5900004D0000-000000067F0000400200008A5900004D4000__000000931B33AE68\n000000067F0000400200008A5900004D0000-000000067F0000400200008A5900004D4000__000000931B9AFDF8\n000000067F0000400200008A5900004D1FCD-000000067F0000400200008A5900004DA99B__000000085D1DF561-000000090CC5DF81\n000000067F0000400200008A5900004D4000-000000067F0000400200008A5900004D8000__0000001C760FA190\n000000067F0000400200008A5900004D4000-000000067F0000400200008A5900004D8000__00000038E67ABFA0\n000000067F0000400200008A5900004D4000-000000067F0000400200008A5900004D8000__0000003903F1CFE8\n000000067F0000400200008A5900004D4000-000000067F0000400200008A5900004D8000__0000003B99F7F8A0\n000000067F0000400200008A5900004D4000-000000067F0000400200008A5900004D8000__0000005D2FFFFB38\n000000067F0000400200008A5900004D4000-000000067F0000400200008A5900004D8000__00000073AD3FE6B8\n000000067F0000400200008A5900004D4000-000000067F0000400200008A5900004D8000__000000914E3F38F0\n000000067F0000400200008A5900004D4000-000000067F0000400200008A5900004D8000__000000931B33AE68\n000000067F0000400200008A5900004D4000-000000067F0000400200008A5900004D8000__000000931B9AFDF8\n000000067F0000400200008A5900004D8000-000000067F0000400200008A5900004DC000__0000001C760FA190\n000000067F0000400200008A5900004D8000-000000067F0000400200008A5900004DC000__00000038E67ABFA0\n000000067F0000400200008A5900004D8000-000000067F0000400200008A5900004DC000__0000003903F1CFE8\n000000067F0000400200008A5900004D8000-000000067F0000400200008A5900004DC000__0000003B99F7F8A0\n000000067F0000400200008A5900004D8000-000000067F0000400200008A5900004DC000__0000005D2FFFFB38\n000000067F0000400200008A5900004D8000-000000067F0000400200008A5900004DC000__00000073AD3FE6B8\n000000067F0000400200008A5900004D8000-000000067F0000400200008A5900004DC000__000000914E3F38F0\n000000067F0000400200008A5900004D8000-000000067F0000400200008A5900004DC000__000000931B33AE68\n000000067F0000400200008A5900004D8000-000000067F0000400200008A5900004DC000__000000931B9AFDF8\n000000067F0000400200008A5900004DA99B-000000067F0000400200008A5900004E3359__000000085D1DF561-000000090CC5DF81\n000000067F0000400200008A5900004DC000-000000067F0000400200008A5900004E0000__0000001C760FA190\n000000067F0000400200008A5900004DC000-000000067F0000400200008A5900004E0000__00000038E67ABFA0\n000000067F0000400200008A5900004DC000-000000067F0000400200008A5900004E0000__0000003903F1CFE8\n000000067F0000400200008A5900004DC000-000000067F0000400200008A5900004E0000__0000003B99F7F8A0\n000000067F0000400200008A5900004DC000-000000067F0000400200008A5900004E0000__0000005D2FFFFB38\n000000067F0000400200008A5900004DC000-000000067F0000400200008A5900004E0000__00000073AD3FE6B8\n000000067F0000400200008A5900004DC000-000000067F0000400200008A5900004E0000__000000914E3F38F0\n000000067F0000400200008A5900004DC000-000000067F0000400200008A5900004E0000__000000931B33AE68\n000000067F0000400200008A5900004DC000-000000067F0000400200008A5900004E0000__000000931B9AFDF8\n000000067F0000400200008A5900004E0000-000000067F0000400200008A5900004E4000__0000001C725A2400\n000000067F0000400200008A5900004E0000-000000067F0000400200008A5900004E4000__0000001C760FA190\n000000067F0000400200008A5900004E0000-000000067F0000400200008A5900004E4000__00000038E67ABFA0\n000000067F0000400200008A5900004E0000-000000067F0000400200008A5900004E4000__0000003903F1CFE8\n000000067F0000400200008A5900004E0000-000000067F0000400200008A5900004E4000__0000003B99F7F8A0\n000000067F0000400200008A5900004E0000-000000067F0000400200008A5900004E4000__0000005D2FFFFB38\n000000067F0000400200008A5900004E0000-000000067F0000400200008A5900004E4000__00000073AD3FE6B8\n000000067F0000400200008A5900004E0000-000000067F0000400200008A5900004E4000__000000914E3F38F0\n000000067F0000400200008A5900004E0000-000000067F0000400200008A5900004E4000__000000931B33AE68\n000000067F0000400200008A5900004E0000-000000067F0000400200008A5900004E4000__000000931B9AFDF8\n000000067F0000400200008A5900004E3359-000000067F0000400200008A590100000000__000000085D1DF561-000000090CC5DF81\n000000067F0000400200008A5900004E367E-000000067F0000400200008A5900004EC06D__000000090CC5DF81-00000009AC75E659\n000000067F0000400200008A5900004E4000-000000067F0000400200008A5900004E8000__0000001C725A2400\n000000067F0000400200008A5900004E4000-000000067F0000400200008A5900004E8000__0000001C760FA190\n000000067F0000400200008A5900004E4000-000000067F0000400200008A5900004E8000__00000038E67ABFA0\n000000067F0000400200008A5900004E4000-000000067F0000400200008A5900004E8000__0000003903F1CFE8\n000000067F0000400200008A5900004E4000-000000067F0000400200008A5900004E8000__0000003B99F7F8A0\n000000067F0000400200008A5900004E4000-000000067F0000400200008A5900004E8000__0000005D2FFFFB38\n000000067F0000400200008A5900004E4000-000000067F0000400200008A5900004E8000__00000073AD3FE6B8\n000000067F0000400200008A5900004E4000-000000067F0000400200008A5900004E8000__000000914E3F38F0\n000000067F0000400200008A5900004E4000-000000067F0000400200008A5900004E8000__000000931B33AE68\n000000067F0000400200008A5900004E4000-000000067F0000400200008A5900004E8000__000000931B9AFDF8\n000000067F0000400200008A5900004E8000-000000067F0000400200008A5900004EC000__0000001C725A2400\n000000067F0000400200008A5900004E8000-000000067F0000400200008A5900004EC000__0000001C760FA190\n000000067F0000400200008A5900004E8000-000000067F0000400200008A5900004EC000__00000038E67ABFA0\n000000067F0000400200008A5900004E8000-000000067F0000400200008A5900004EC000__0000003903F1CFE8\n000000067F0000400200008A5900004E8000-000000067F0000400200008A5900004EC000__0000003B99F7F8A0\n000000067F0000400200008A5900004E8000-000000067F0000400200008A5900004EC000__0000005D2FFFFB38\n000000067F0000400200008A5900004E8000-000000067F0000400200008A5900004EC000__00000073AD3FE6B8\n000000067F0000400200008A5900004E8000-000000067F0000400200008A5900004EC000__000000914E3F38F0\n000000067F0000400200008A5900004E8000-000000067F0000400200008A5900004EC000__000000931B33AE68\n000000067F0000400200008A5900004E8000-000000067F0000400200008A5900004EC000__000000931B9AFDF8\n000000067F0000400200008A5900004EC000-000000067F0000400200008A5900004F0000__0000001C725A2400\n000000067F0000400200008A5900004EC000-000000067F0000400200008A5900004F0000__0000001C760FA190\n000000067F0000400200008A5900004EC000-000000067F0000400200008A5900004F0000__00000038E67ABFA0\n000000067F0000400200008A5900004EC000-000000067F0000400200008A5900004F0000__0000003903F1CFE8\n000000067F0000400200008A5900004EC000-000000067F0000400200008A5900004F0000__0000003B99F7F8A0\n000000067F0000400200008A5900004EC000-000000067F0000400200008A5900004F0000__0000005D2FFFFB38\n000000067F0000400200008A5900004EC000-000000067F0000400200008A5900004F0000__00000073AD3FE6B8\n000000067F0000400200008A5900004EC000-000000067F0000400200008A5900004F0000__000000914E3F38F0\n000000067F0000400200008A5900004EC000-000000067F0000400200008A5900004F0000__000000931B33AE68\n000000067F0000400200008A5900004EC000-000000067F0000400200008A5900004F0000__000000931B9AFDF8\n000000067F0000400200008A5900004EC06D-000000067F0000400200008A5900004F4A59__000000090CC5DF81-00000009AC75E659\n000000067F0000400200008A5900004F0000-000000067F0000400200008A5900004F4000__0000001C725A2400\n000000067F0000400200008A5900004F0000-000000067F0000400200008A5900004F4000__0000001C760FA190\n000000067F0000400200008A5900004F0000-000000067F0000400200008A5900004F4000__00000038E67ABFA0\n000000067F0000400200008A5900004F0000-000000067F0000400200008A5900004F4000__0000003903F1CFE8\n000000067F0000400200008A5900004F0000-000000067F0000400200008A5900004F4000__0000003B99F7F8A0\n000000067F0000400200008A5900004F0000-000000067F0000400200008A5900004F4000__0000005D2FFFFB38\n000000067F0000400200008A5900004F0000-000000067F0000400200008A5900004F4000__00000073AD3FE6B8\n000000067F0000400200008A5900004F0000-000000067F0000400200008A5900004F4000__000000914E3F38F0\n000000067F0000400200008A5900004F0000-000000067F0000400200008A5900004F4000__000000931B33AE68\n000000067F0000400200008A5900004F0000-000000067F0000400200008A5900004F4000__000000931B9AFDF8\n000000067F0000400200008A5900004F4000-000000067F0000400200008A5900004F8000__0000001C725A2400\n000000067F0000400200008A5900004F4000-000000067F0000400200008A5900004F8000__0000001C760FA190\n000000067F0000400200008A5900004F4000-000000067F0000400200008A5900004F8000__00000038E67ABFA0\n000000067F0000400200008A5900004F4000-000000067F0000400200008A5900004F8000__0000003903F1CFE8\n000000067F0000400200008A5900004F4000-000000067F0000400200008A5900004F8000__0000003B99F7F8A0\n000000067F0000400200008A5900004F4000-000000067F0000400200008A5900004F8000__0000005D2FFFFB38\n000000067F0000400200008A5900004F4000-000000067F0000400200008A5900004F8000__00000073AD3FE6B8\n000000067F0000400200008A5900004F4000-000000067F0000400200008A5900004F8000__000000914E3F38F0\n000000067F0000400200008A5900004F4000-000000067F0000400200008A5900004F8000__000000931B33AE68\n000000067F0000400200008A5900004F4000-000000067F0000400200008A5900004F8000__000000931B9AFDF8\n000000067F0000400200008A5900004F4A59-000000067F0000400200008A5900004FD445__000000090CC5DF81-00000009AC75E659\n000000067F0000400200008A5900004F8000-000000067F0000400200008A5900004FC000__0000001C725A2400\n000000067F0000400200008A5900004F8000-000000067F0000400200008A5900004FC000__0000001C760FA190\n000000067F0000400200008A5900004F8000-000000067F0000400200008A5900004FC000__00000038E67ABFA0\n000000067F0000400200008A5900004F8000-000000067F0000400200008A5900004FC000__0000003903F1CFE8\n000000067F0000400200008A5900004F8000-000000067F0000400200008A5900004FC000__0000003B99F7F8A0\n000000067F0000400200008A5900004F8000-000000067F0000400200008A5900004FC000__0000005D2FFFFB38\n000000067F0000400200008A5900004F8000-000000067F0000400200008A5900004FC000__00000073AD3FE6B8\n000000067F0000400200008A5900004F8000-000000067F0000400200008A5900004FC000__000000914E3F38F0\n000000067F0000400200008A5900004F8000-000000067F0000400200008A5900004FC000__000000931B33AE68\n000000067F0000400200008A5900004F8000-000000067F0000400200008A5900004FC000__000000931B9AFDF8\n000000067F0000400200008A5900004FC000-000000067F0000400200008A590000500000__0000001C725A2400\n000000067F0000400200008A5900004FC000-000000067F0000400200008A590000500000__0000001C760FA190\n000000067F0000400200008A5900004FC000-000000067F0000400200008A590000500000__00000038E67ABFA0\n000000067F0000400200008A5900004FC000-000000067F0000400200008A590000500000__0000003903F1CFE8\n000000067F0000400200008A5900004FC000-000000067F0000400200008A590000500000__0000003B99F7F8A0\n000000067F0000400200008A5900004FC000-000000067F0000400200008A590000500000__0000005D2FFFFB38\n000000067F0000400200008A5900004FC000-000000067F0000400200008A590000500000__00000073AD3FE6B8\n000000067F0000400200008A5900004FC000-000000067F0000400200008A590000500000__000000914E3F38F0\n000000067F0000400200008A5900004FC000-000000067F0000400200008A590000500000__000000931B33AE68\n000000067F0000400200008A5900004FC000-000000067F0000400200008A590000500000__000000931B9AFDF8\n000000067F0000400200008A5900004FD445-000000067F0000400200008A590000505E1F__000000090CC5DF81-00000009AC75E659\n000000067F0000400200008A590000500000-000000067F0000400200008A590000504000__0000001C725A2400\n000000067F0000400200008A590000500000-000000067F0000400200008A590000504000__0000001C760FA190\n000000067F0000400200008A590000500000-000000067F0000400200008A590000504000__00000038E67ABFA0\n000000067F0000400200008A590000500000-000000067F0000400200008A590000504000__0000003903F1CFE8\n000000067F0000400200008A590000500000-000000067F0000400200008A590000504000__0000003B99F7F8A0\n000000067F0000400200008A590000500000-000000067F0000400200008A590000504000__0000005D2FFFFB38\n000000067F0000400200008A590000500000-000000067F0000400200008A590000504000__00000073AD3FE6B8\n000000067F0000400200008A590000500000-000000067F0000400200008A590000504000__000000914E3F38F0\n000000067F0000400200008A590000500000-000000067F0000400200008A590000504000__000000931B33AE68\n000000067F0000400200008A590000500000-000000067F0000400200008A590000504000__000000931B9AFDF8\n000000067F0000400200008A590000504000-000000067F0000400200008A590000508000__0000001C725A2400\n000000067F0000400200008A590000504000-000000067F0000400200008A590000508000__0000001C760FA190\n000000067F0000400200008A590000504000-000000067F0000400200008A590000508000__00000038E67ABFA0\n000000067F0000400200008A590000504000-000000067F0000400200008A590000508000__0000003903F1CFE8\n000000067F0000400200008A590000504000-000000067F0000400200008A590000508000__0000003B99F7F8A0\n000000067F0000400200008A590000504000-000000067F0000400200008A590000508000__0000005D2FFFFB38\n000000067F0000400200008A590000504000-000000067F0000400200008A590000508000__00000073AD3FE6B8\n000000067F0000400200008A590000504000-000000067F0000400200008A590000508000__000000914E3F38F0\n000000067F0000400200008A590000504000-000000067F0000400200008A590000508000__000000931B33AE68\n000000067F0000400200008A590000504000-000000067F0000400200008A590000508000__000000931B9AFDF8\n000000067F0000400200008A590000505E1F-000000067F0000400200008A59000050E7F0__000000090CC5DF81-00000009AC75E659\n000000067F0000400200008A590000508000-000000067F0000400200008A59000050C000__0000001C725A2400\n000000067F0000400200008A590000508000-000000067F0000400200008A59000050C000__0000001C760FA190\n000000067F0000400200008A590000508000-000000067F0000400200008A59000050C000__00000038E67ABFA0\n000000067F0000400200008A590000508000-000000067F0000400200008A59000050C000__0000003903F1CFE8\n000000067F0000400200008A590000508000-000000067F0000400200008A59000050C000__0000003B99F7F8A0\n000000067F0000400200008A590000508000-000000067F0000400200008A59000050C000__0000005D2FFFFB38\n000000067F0000400200008A590000508000-000000067F0000400200008A59000050C000__00000073AD3FE6B8\n000000067F0000400200008A590000508000-000000067F0000400200008A59000050C000__000000914E3F38F0\n000000067F0000400200008A590000508000-000000067F0000400200008A59000050C000__000000931B33AE68\n000000067F0000400200008A590000508000-000000067F0000400200008A59000050C000__000000931B9AFDF8\n000000067F0000400200008A59000050C000-000000067F0000400200008A590000510000__0000001C725A2400\n000000067F0000400200008A59000050C000-000000067F0000400200008A590000510000__0000001C760FA190\n000000067F0000400200008A59000050C000-000000067F0000400200008A590000510000__00000038E67ABFA0\n000000067F0000400200008A59000050C000-000000067F0000400200008A590000510000__0000003903F1CFE8\n000000067F0000400200008A59000050C000-000000067F0000400200008A590000510000__0000003B99F7F8A0\n000000067F0000400200008A59000050C000-000000067F0000400200008A590000510000__0000005D2FFFFB38\n000000067F0000400200008A59000050C000-000000067F0000400200008A590000510000__00000073AD3FE6B8\n000000067F0000400200008A59000050C000-000000067F0000400200008A590000510000__000000914E3F38F0\n000000067F0000400200008A59000050C000-000000067F0000400200008A590000510000__000000931B33AE68\n000000067F0000400200008A59000050C000-000000067F0000400200008A590000510000__000000931B9AFDF8\n000000067F0000400200008A59000050E7F0-000000067F0000400200008A5900005171C0__000000090CC5DF81-00000009AC75E659\n000000067F0000400200008A590000510000-000000067F0000400200008A590000514000__0000001C725A2400\n000000067F0000400200008A590000510000-000000067F0000400200008A590000514000__0000001C760FA190\n000000067F0000400200008A590000510000-000000067F0000400200008A590000514000__00000038E67ABFA0\n000000067F0000400200008A590000510000-000000067F0000400200008A590000514000__0000003903F1CFE8\n000000067F0000400200008A590000510000-000000067F0000400200008A590000514000__0000003B99F7F8A0\n000000067F0000400200008A590000510000-000000067F0000400200008A590000514000__0000005D2FFFFB38\n000000067F0000400200008A590000510000-000000067F0000400200008A590000514000__00000073AD3FE6B8\n000000067F0000400200008A590000510000-000000067F0000400200008A590000514000__000000914E3F38F0\n000000067F0000400200008A590000510000-000000067F0000400200008A590000514000__000000931B33AE68\n000000067F0000400200008A590000510000-000000067F0000400200008A590000514000__000000931B9AFDF8\n000000067F0000400200008A590000514000-000000067F0000400200008A590000518000__0000001C725A2400\n000000067F0000400200008A590000514000-000000067F0000400200008A590000518000__0000001C760FA190\n000000067F0000400200008A590000514000-000000067F0000400200008A590000518000__00000038E67ABFA0\n000000067F0000400200008A590000514000-000000067F0000400200008A590000518000__0000003903F1CFE8\n000000067F0000400200008A590000514000-000000067F0000400200008A590000518000__0000003B99F7F8A0\n000000067F0000400200008A590000514000-000000067F0000400200008A590000518000__0000005D2FFFFB38\n000000067F0000400200008A590000514000-000000067F0000400200008A590000518000__00000073AD3FE6B8\n000000067F0000400200008A590000514000-000000067F0000400200008A590000518000__000000914E3F38F0\n000000067F0000400200008A590000514000-000000067F0000400200008A590000518000__000000931B33AE68\n000000067F0000400200008A590000514000-000000067F0000400200008A590000518000__000000931B9AFDF8\n000000067F0000400200008A5900005171C0-000000067F0000400200008A59000051FB89__000000090CC5DF81-00000009AC75E659\n000000067F0000400200008A590000518000-000000067F0000400200008A59000051C000__0000001C725A2400\n000000067F0000400200008A590000518000-000000067F0000400200008A59000051C000__0000001C760FA190\n000000067F0000400200008A590000518000-000000067F0000400200008A59000051C000__00000038E67ABFA0\n000000067F0000400200008A590000518000-000000067F0000400200008A59000051C000__0000003903F1CFE8\n000000067F0000400200008A590000518000-000000067F0000400200008A59000051C000__0000003B99F7F8A0\n000000067F0000400200008A590000518000-000000067F0000400200008A59000051C000__0000005D2FFFFB38\n000000067F0000400200008A590000518000-000000067F0000400200008A59000051C000__00000073AD3FE6B8\n000000067F0000400200008A590000518000-000000067F0000400200008A59000051C000__000000914E3F38F0\n000000067F0000400200008A590000518000-000000067F0000400200008A59000051C000__000000931B33AE68\n000000067F0000400200008A590000518000-000000067F0000400200008A59000051C000__000000931B9AFDF8\n000000067F0000400200008A59000051C000-000000067F0000400200008A590000520000__0000001C725A2400\n000000067F0000400200008A59000051C000-000000067F0000400200008A590000520000__0000001C760FA190\n000000067F0000400200008A59000051C000-000000067F0000400200008A590000520000__00000038E67ABFA0\n000000067F0000400200008A59000051C000-000000067F0000400200008A590000520000__0000003903F1CFE8\n000000067F0000400200008A59000051C000-000000067F0000400200008A590000520000__0000003B99F7F8A0\n000000067F0000400200008A59000051C000-000000067F0000400200008A590000520000__0000005D2FFFFB38\n000000067F0000400200008A59000051C000-000000067F0000400200008A590000520000__00000073AD3FE6B8\n000000067F0000400200008A59000051C000-000000067F0000400200008A590000520000__000000914E3F38F0\n000000067F0000400200008A59000051C000-000000067F0000400200008A590000520000__000000931B33AE68\n000000067F0000400200008A59000051C000-000000067F0000400200008A590000520000__000000931B9AFDF8\n000000067F0000400200008A59000051FB89-000000067F0000400200008A590000528577__000000090CC5DF81-00000009AC75E659\n000000067F0000400200008A590000520000-000000067F0000400200008A590000524000__0000001C725A2400\n000000067F0000400200008A590000520000-000000067F0000400200008A590000524000__0000001C760FA190\n000000067F0000400200008A590000520000-000000067F0000400200008A590000524000__00000038E67ABFA0\n000000067F0000400200008A590000520000-000000067F0000400200008A590000524000__0000003903F1CFE8\n000000067F0000400200008A590000520000-000000067F0000400200008A590000524000__0000003B99F7F8A0\n000000067F0000400200008A590000520000-000000067F0000400200008A590000524000__0000005D2FFFFB38\n000000067F0000400200008A590000520000-000000067F0000400200008A590000524000__00000073AD3FE6B8\n000000067F0000400200008A590000520000-000000067F0000400200008A590000524000__000000914E3F38F0\n000000067F0000400200008A590000520000-000000067F0000400200008A590000524000__000000931B33AE68\n000000067F0000400200008A590000520000-000000067F0000400200008A590000524000__000000931B9AFDF8\n000000067F0000400200008A590000524000-000000067F0000400200008A590000528000__0000001C725A2400\n000000067F0000400200008A590000524000-000000067F0000400200008A590000528000__0000001C760FA190\n000000067F0000400200008A590000524000-000000067F0000400200008A590000528000__00000038E67ABFA0\n000000067F0000400200008A590000524000-000000067F0000400200008A590000528000__0000003903F1CFE8\n000000067F0000400200008A590000524000-000000067F0000400200008A590000528000__0000003B99F7F8A0\n000000067F0000400200008A590000524000-000000067F0000400200008A590000528000__0000005D2FFFFB38\n000000067F0000400200008A590000524000-000000067F0000400200008A590000528000__00000073AD3FE6B8\n000000067F0000400200008A590000524000-000000067F0000400200008A590000528000__000000914E3F38F0\n000000067F0000400200008A590000524000-000000067F0000400200008A590000528000__000000931B33AE68\n000000067F0000400200008A590000524000-000000067F0000400200008A590000528000__000000931B9AFDF8\n000000067F0000400200008A590000528000-000000067F0000400200008A59000052C000__0000001C725A2400\n000000067F0000400200008A590000528000-000000067F0000400200008A59000052C000__0000001C760FA190\n000000067F0000400200008A590000528000-000000067F0000400200008A59000052C000__00000038E67ABFA0\n000000067F0000400200008A590000528000-000000067F0000400200008A59000052C000__0000003903F1CFE8\n000000067F0000400200008A590000528000-000000067F0000400200008A59000052C000__0000003B99F7F8A0\n000000067F0000400200008A590000528000-000000067F0000400200008A59000052C000__0000005D2FFFFB38\n000000067F0000400200008A590000528000-000000067F0000400200008A59000052C000__00000073AD3FE6B8\n000000067F0000400200008A590000528000-000000067F0000400200008A59000052C000__000000914E3F38F0\n000000067F0000400200008A590000528000-000000067F0000400200008A59000052C000__000000931B33AE68\n000000067F0000400200008A590000528000-000000067F0000400200008A59000052C000__000000931B9AFDF8\n000000067F0000400200008A590000528577-000000067F0000400200008A590000530F67__000000090CC5DF81-00000009AC75E659\n000000067F0000400200008A59000052C000-000000067F0000400200008A590000530000__0000001C725A2400\n000000067F0000400200008A59000052C000-000000067F0000400200008A590000530000__0000001C760FA190\n000000067F0000400200008A59000052C000-000000067F0000400200008A590000530000__00000038E67ABFA0\n000000067F0000400200008A59000052C000-000000067F0000400200008A590000530000__0000003903F1CFE8\n000000067F0000400200008A59000052C000-000000067F0000400200008A590000530000__0000003B99F7F8A0\n000000067F0000400200008A59000052C000-000000067F0000400200008A590000530000__0000005D2FFFFB38\n000000067F0000400200008A59000052C000-000000067F0000400200008A590000530000__00000073AD3FE6B8\n000000067F0000400200008A59000052C000-000000067F0000400200008A590000530000__000000914E3F38F0\n000000067F0000400200008A59000052C000-000000067F0000400200008A590000530000__000000931B33AE68\n000000067F0000400200008A59000052C000-000000067F0000400200008A590000530000__000000931B9AFDF8\n000000067F0000400200008A590000530000-000000067F0000400200008A590000534000__0000001C725A2400\n000000067F0000400200008A590000530000-000000067F0000400200008A590000534000__0000001C760FA190\n000000067F0000400200008A590000530000-000000067F0000400200008A590000534000__00000038E67ABFA0\n000000067F0000400200008A590000530000-000000067F0000400200008A590000534000__0000003903F1CFE8\n000000067F0000400200008A590000530000-000000067F0000400200008A590000534000__0000003B99F7F8A0\n000000067F0000400200008A590000530000-000000067F0000400200008A590000534000__0000005D2FFFFB38\n000000067F0000400200008A590000530000-000000067F0000400200008A590000534000__00000073AD3FE6B8\n000000067F0000400200008A590000530000-000000067F0000400200008A590000534000__000000914E3F38F0\n000000067F0000400200008A590000530000-000000067F0000400200008A590000534000__000000931B33AE68\n000000067F0000400200008A590000530000-000000067F0000400200008A590000534000__000000931B9AFDF8\n000000067F0000400200008A590000530F67-000000067F0000400200008A590000539959__000000090CC5DF81-00000009AC75E659\n000000067F0000400200008A590000534000-000000067F0000400200008A590000538000__0000001C725A2400\n000000067F0000400200008A590000534000-000000067F0000400200008A590000538000__0000001C760FA190\n000000067F0000400200008A590000534000-000000067F0000400200008A590000538000__00000038E67ABFA0\n000000067F0000400200008A590000534000-000000067F0000400200008A590000538000__0000003903F1CFE8\n000000067F0000400200008A590000534000-000000067F0000400200008A590000538000__0000003B99F7F8A0\n000000067F0000400200008A590000534000-000000067F0000400200008A590000538000__0000005D2FFFFB38\n000000067F0000400200008A590000534000-000000067F0000400200008A590000538000__00000073AD3FE6B8\n000000067F0000400200008A590000534000-000000067F0000400200008A590000538000__000000914E3F38F0\n000000067F0000400200008A590000534000-000000067F0000400200008A590000538000__000000931B33AE68\n000000067F0000400200008A590000534000-000000067F0000400200008A590000538000__000000931B9AFDF8\n000000067F0000400200008A590000538000-000000067F0000400200008A59000053C000__0000000A7B3FF158\n000000067F0000400200008A590000538000-000000067F0000400200008A59000053C000__0000001C760FA190\n000000067F0000400200008A590000538000-000000067F0000400200008A59000053C000__00000038E67ABFA0\n000000067F0000400200008A590000538000-000000067F0000400200008A59000053C000__0000003903F1CFE8\n000000067F0000400200008A590000538000-000000067F0000400200008A59000053C000__0000003B99F7F8A0\n000000067F0000400200008A590000538000-000000067F0000400200008A59000053C000__0000005D2FFFFB38\n000000067F0000400200008A590000538000-000000067F0000400200008A59000053C000__00000073AD3FE6B8\n000000067F0000400200008A590000538000-000000067F0000400200008A59000053C000__000000914E3F38F0\n000000067F0000400200008A590000538000-000000067F0000400200008A59000053C000__000000931B33AE68\n000000067F0000400200008A590000538000-000000067F0000400200008A59000053C000__000000931B9AFDF8\n000000067F0000400200008A590000539959-000000067F0000400200008A590100000000__000000090CC5DF81-00000009AC75E659\n000000067F0000400200008A590000539C35-000000067F0000400200008A590000542603__00000009AC75E659-0000000A4C25FC21\n000000067F0000400200008A59000053C000-000000067F0000400200008A590000540000__0000000A7B3FF158\n000000067F0000400200008A59000053C000-000000067F0000400200008A590000540000__0000001C760FA190\n000000067F0000400200008A59000053C000-000000067F0000400200008A590000540000__00000038E67ABFA0\n000000067F0000400200008A59000053C000-000000067F0000400200008A590000540000__0000003903F1CFE8\n000000067F0000400200008A59000053C000-000000067F0000400200008A590000540000__0000003B99F7F8A0\n000000067F0000400200008A59000053C000-000000067F0000400200008A590000540000__0000005D2FFFFB38\n000000067F0000400200008A59000053C000-000000067F0000400200008A590000540000__00000073AD3FE6B8\n000000067F0000400200008A59000053C000-000000067F0000400200008A590000540000__000000914E3F38F0\n000000067F0000400200008A59000053C000-000000067F0000400200008A590000540000__000000931B33AE68\n000000067F0000400200008A59000053C000-000000067F0000400200008A590000540000__000000931B9AFDF8\n000000067F0000400200008A590000540000-000000067F0000400200008A590000544000__0000000A7B3FF158\n000000067F0000400200008A590000540000-000000067F0000400200008A590000544000__0000001C760FA190\n000000067F0000400200008A590000540000-000000067F0000400200008A590000544000__00000038E67ABFA0\n000000067F0000400200008A590000540000-000000067F0000400200008A590000544000__0000003903F1CFE8\n000000067F0000400200008A590000540000-000000067F0000400200008A590000544000__0000003B99F7F8A0\n000000067F0000400200008A590000540000-000000067F0000400200008A590000544000__0000005D2FFFFB38\n000000067F0000400200008A590000540000-000000067F0000400200008A590000544000__00000073AD3FE6B8\n000000067F0000400200008A590000540000-000000067F0000400200008A590000544000__000000914E3F38F0\n000000067F0000400200008A590000540000-000000067F0000400200008A590000544000__000000931B33AE68\n000000067F0000400200008A590000540000-000000067F0000400200008A590000544000__000000931B9AFDF8\n000000067F0000400200008A590000542603-000000067F0000400200008A59000054AFD4__00000009AC75E659-0000000A4C25FC21\n000000067F0000400200008A590000544000-000000067F0000400200008A590000548000__0000000A7B3FF158\n000000067F0000400200008A590000544000-000000067F0000400200008A590000548000__0000001C760FA190\n000000067F0000400200008A590000544000-000000067F0000400200008A590000548000__00000038E67ABFA0\n000000067F0000400200008A590000544000-000000067F0000400200008A590000548000__0000003903F1CFE8\n000000067F0000400200008A590000544000-000000067F0000400200008A590000548000__0000003B99F7F8A0\n000000067F0000400200008A590000544000-000000067F0000400200008A590000548000__0000005D2FFFFB38\n000000067F0000400200008A590000544000-000000067F0000400200008A590000548000__00000073AD3FE6B8\n000000067F0000400200008A590000544000-000000067F0000400200008A590000548000__000000914E3F38F0\n000000067F0000400200008A590000544000-000000067F0000400200008A590000548000__000000931B33AE68\n000000067F0000400200008A590000544000-000000067F0000400200008A590000548000__000000931B9AFDF8\n000000067F0000400200008A590000548000-000000067F0000400200008A59000054C000__0000000A7B3FF158\n000000067F0000400200008A590000548000-000000067F0000400200008A59000054C000__0000001C760FA190\n000000067F0000400200008A590000548000-000000067F0000400200008A59000054C000__00000038E67ABFA0\n000000067F0000400200008A590000548000-000000067F0000400200008A59000054C000__0000003903F1CFE8\n000000067F0000400200008A590000548000-000000067F0000400200008A59000054C000__0000003B99F7F8A0\n000000067F0000400200008A590000548000-000000067F0000400200008A59000054C000__0000005D2FFFFB38\n000000067F0000400200008A590000548000-000000067F0000400200008A59000054C000__00000073AD3FE6B8\n000000067F0000400200008A590000548000-000000067F0000400200008A59000054C000__000000914E3F38F0\n000000067F0000400200008A590000548000-000000067F0000400200008A59000054C000__000000931B33AE68\n000000067F0000400200008A590000548000-000000067F0000400200008A59000054C000__000000931B9AFDF8\n000000067F0000400200008A59000054AFD4-000000067F0000400200008A59000055399F__00000009AC75E659-0000000A4C25FC21\n000000067F0000400200008A59000054C000-000000067F0000400200008A590000550000__0000000A7B3FF158\n000000067F0000400200008A59000054C000-000000067F0000400200008A590000550000__0000001C760FA190\n000000067F0000400200008A59000054C000-000000067F0000400200008A590000550000__00000038E67ABFA0\n000000067F0000400200008A59000054C000-000000067F0000400200008A590000550000__0000003903F1CFE8\n000000067F0000400200008A59000054C000-000000067F0000400200008A590000550000__0000003B99F7F8A0\n000000067F0000400200008A59000054C000-000000067F0000400200008A590000550000__0000005D2FFFFB38\n000000067F0000400200008A59000054C000-000000067F0000400200008A590000550000__00000073AD3FE6B8\n000000067F0000400200008A59000054C000-000000067F0000400200008A590000550000__000000914E3F38F0\n000000067F0000400200008A59000054C000-000000067F0000400200008A590000550000__000000931B33AE68\n000000067F0000400200008A59000054C000-000000067F0000400200008A590000550000__000000931B9AFDF8\n000000067F0000400200008A590000550000-000000067F0000400200008A590000554000__0000000A7B3FF158\n000000067F0000400200008A590000550000-000000067F0000400200008A590000554000__0000001C760FA190\n000000067F0000400200008A590000550000-000000067F0000400200008A590000554000__00000038E67ABFA0\n000000067F0000400200008A590000550000-000000067F0000400200008A590000554000__0000003903F1CFE8\n000000067F0000400200008A590000550000-000000067F0000400200008A590000554000__0000003B99F7F8A0\n000000067F0000400200008A590000550000-000000067F0000400200008A590000554000__0000005D2FFFFB38\n000000067F0000400200008A590000550000-000000067F0000400200008A590000554000__00000073AD3FE6B8\n000000067F0000400200008A590000550000-000000067F0000400200008A590000554000__000000914E3F38F0\n000000067F0000400200008A590000550000-000000067F0000400200008A590000554000__000000931B33AE68\n000000067F0000400200008A590000550000-000000067F0000400200008A590000554000__000000931B9AFDF8\n000000067F0000400200008A59000055399F-000000067F0000400200008A59000055C370__00000009AC75E659-0000000A4C25FC21\n000000067F0000400200008A590000554000-000000067F0000400200008A590000558000__0000000A7B3FF158\n000000067F0000400200008A590000554000-000000067F0000400200008A590000558000__0000001C760FA190\n000000067F0000400200008A590000554000-000000067F0000400200008A590000558000__00000038E67ABFA0\n000000067F0000400200008A590000554000-000000067F0000400200008A590000558000__0000003903F1CFE8\n000000067F0000400200008A590000554000-000000067F0000400200008A590000558000__0000003B99F7F8A0\n000000067F0000400200008A590000554000-000000067F0000400200008A590000558000__0000005D2FFFFB38\n000000067F0000400200008A590000554000-000000067F0000400200008A590000558000__00000073AD3FE6B8\n000000067F0000400200008A590000554000-000000067F0000400200008A590000558000__000000914E3F38F0\n000000067F0000400200008A590000554000-000000067F0000400200008A590000558000__000000931B33AE68\n000000067F0000400200008A590000554000-000000067F0000400200008A590000558000__000000931B9AFDF8\n000000067F0000400200008A590000558000-000000067F0000400200008A59000055C000__0000000A7B3FF158\n000000067F0000400200008A590000558000-000000067F0000400200008A59000055C000__0000001C760FA190\n000000067F0000400200008A590000558000-000000067F0000400200008A59000055C000__00000038E67ABFA0\n000000067F0000400200008A590000558000-000000067F0000400200008A59000055C000__0000003903F1CFE8\n000000067F0000400200008A590000558000-000000067F0000400200008A59000055C000__0000003B99F7F8A0\n000000067F0000400200008A590000558000-000000067F0000400200008A59000055C000__0000005D2FFFFB38\n000000067F0000400200008A590000558000-000000067F0000400200008A59000055C000__00000073AD3FE6B8\n000000067F0000400200008A590000558000-000000067F0000400200008A59000055C000__000000914E3F38F0\n000000067F0000400200008A590000558000-000000067F0000400200008A59000055C000__000000931B33AE68\n000000067F0000400200008A590000558000-000000067F0000400200008A59000055C000__000000931B9AFDF8\n000000067F0000400200008A59000055C000-000000067F0000400200008A590000560000__0000000A7B3FF158\n000000067F0000400200008A59000055C000-000000067F0000400200008A590000560000__0000001C760FA190\n000000067F0000400200008A59000055C000-000000067F0000400200008A590000560000__00000038E67ABFA0\n000000067F0000400200008A59000055C000-000000067F0000400200008A590000560000__0000003903F1CFE8\n000000067F0000400200008A59000055C000-000000067F0000400200008A590000560000__0000003B99F7F8A0\n000000067F0000400200008A59000055C000-000000067F0000400200008A590000560000__0000005D2FFFFB38\n000000067F0000400200008A59000055C000-000000067F0000400200008A590000560000__00000073AD3FE6B8\n000000067F0000400200008A59000055C000-000000067F0000400200008A590000560000__000000914E3F38F0\n000000067F0000400200008A59000055C000-000000067F0000400200008A590000560000__000000931B33AE68\n000000067F0000400200008A59000055C000-000000067F0000400200008A590000560000__000000931B9AFDF8\n000000067F0000400200008A59000055C370-000000067F0000400200008A590000564D5E__00000009AC75E659-0000000A4C25FC21\n000000067F0000400200008A590000560000-000000067F0000400200008A590000564000__0000000A7B3FF158\n000000067F0000400200008A590000560000-000000067F0000400200008A590000564000__0000001C760FA190\n000000067F0000400200008A590000560000-000000067F0000400200008A590000564000__00000038E67ABFA0\n000000067F0000400200008A590000560000-000000067F0000400200008A590000564000__0000003903F1CFE8\n000000067F0000400200008A590000560000-000000067F0000400200008A590000564000__0000003B99F7F8A0\n000000067F0000400200008A590000560000-000000067F0000400200008A590000564000__0000005D2FFFFB38\n000000067F0000400200008A590000560000-000000067F0000400200008A590000564000__00000073AD3FE6B8\n000000067F0000400200008A590000560000-000000067F0000400200008A590000564000__000000914E3F38F0\n000000067F0000400200008A590000560000-000000067F0000400200008A590000564000__000000931B33AE68\n000000067F0000400200008A590000560000-000000067F0000400200008A590000564000__000000931B9AFDF8\n000000067F0000400200008A590000564000-000000067F0000400200008A590000568000__0000000A7B3FF158\n000000067F0000400200008A590000564000-000000067F0000400200008A590000568000__0000001C760FA190\n000000067F0000400200008A590000564000-000000067F0000400200008A590000568000__00000038E67ABFA0\n000000067F0000400200008A590000564000-000000067F0000400200008A590000568000__0000003903F1CFE8\n000000067F0000400200008A590000564000-000000067F0000400200008A590000568000__0000003B99F7F8A0\n000000067F0000400200008A590000564000-000000067F0000400200008A590000568000__0000005D2FFFFB38\n000000067F0000400200008A590000564000-000000067F0000400200008A590000568000__00000073AD3FE6B8\n000000067F0000400200008A590000564000-000000067F0000400200008A590000568000__000000914E3F38F0\n000000067F0000400200008A590000564000-000000067F0000400200008A590000568000__000000931B33AE68\n000000067F0000400200008A590000564000-000000067F0000400200008A590000568000__000000931B9AFDF8\n000000067F0000400200008A590000564D5E-000000067F0000400200008A59000056D74C__00000009AC75E659-0000000A4C25FC21\n000000067F0000400200008A590000568000-000000067F0000400200008A59000056C000__0000000A7B3FF158\n000000067F0000400200008A590000568000-000000067F0000400200008A59000056C000__0000001C760FA190\n000000067F0000400200008A590000568000-000000067F0000400200008A59000056C000__00000038E67ABFA0\n000000067F0000400200008A590000568000-000000067F0000400200008A59000056C000__0000003903F1CFE8\n000000067F0000400200008A590000568000-000000067F0000400200008A59000056C000__0000003B99F7F8A0\n000000067F0000400200008A590000568000-000000067F0000400200008A59000056C000__0000005D2FFFFB38\n000000067F0000400200008A590000568000-000000067F0000400200008A59000056C000__00000073AD3FE6B8\n000000067F0000400200008A590000568000-000000067F0000400200008A59000056C000__000000914E3F38F0\n000000067F0000400200008A590000568000-000000067F0000400200008A59000056C000__000000931B33AE68\n000000067F0000400200008A590000568000-000000067F0000400200008A59000056C000__000000931B9AFDF8\n000000067F0000400200008A59000056C000-000000067F0000400200008A590000570000__0000000A7B3FF158\n000000067F0000400200008A59000056C000-000000067F0000400200008A590000570000__0000001C760FA190\n000000067F0000400200008A59000056C000-000000067F0000400200008A590000570000__00000038E67ABFA0\n000000067F0000400200008A59000056C000-000000067F0000400200008A590000570000__0000003903F1CFE8\n000000067F0000400200008A59000056C000-000000067F0000400200008A590000570000__0000003B99F7F8A0\n000000067F0000400200008A59000056C000-000000067F0000400200008A590000570000__0000005D2FFFFB38\n000000067F0000400200008A59000056C000-000000067F0000400200008A590000570000__00000073AD3FE6B8\n000000067F0000400200008A59000056C000-000000067F0000400200008A590000570000__000000914E3F38F0\n000000067F0000400200008A59000056C000-000000067F0000400200008A590000570000__000000931B33AE68\n000000067F0000400200008A59000056C000-000000067F0000400200008A590000570000__000000931B9AFDF8\n000000067F0000400200008A59000056D74C-000000067F0000400200008A590000576130__00000009AC75E659-0000000A4C25FC21\n000000067F0000400200008A590000570000-000000067F0000400200008A590000574000__0000000A7B3FF158\n000000067F0000400200008A590000570000-000000067F0000400200008A590000574000__0000001C760FA190\n000000067F0000400200008A590000570000-000000067F0000400200008A590000574000__00000038E67ABFA0\n000000067F0000400200008A590000570000-000000067F0000400200008A590000574000__0000003903F1CFE8\n000000067F0000400200008A590000570000-000000067F0000400200008A590000574000__0000003B99F7F8A0\n000000067F0000400200008A590000570000-000000067F0000400200008A590000574000__0000005D2FFFFB38\n000000067F0000400200008A590000570000-000000067F0000400200008A590000574000__00000073AD3FE6B8\n000000067F0000400200008A590000570000-000000067F0000400200008A590000574000__000000914E3F38F0\n000000067F0000400200008A590000570000-000000067F0000400200008A590000574000__000000931B33AE68\n000000067F0000400200008A590000570000-000000067F0000400200008A590000574000__000000931B9AFDF8\n000000067F0000400200008A590000574000-000000067F0000400200008A590000578000__0000000A7B3FF158\n000000067F0000400200008A590000574000-000000067F0000400200008A590000578000__0000001C760FA190\n000000067F0000400200008A590000574000-000000067F0000400200008A590000578000__00000038E67ABFA0\n000000067F0000400200008A590000574000-000000067F0000400200008A590000578000__0000003903F1CFE8\n000000067F0000400200008A590000574000-000000067F0000400200008A590000578000__0000003B99F7F8A0\n000000067F0000400200008A590000574000-000000067F0000400200008A590000578000__0000005D2FFFFB38\n000000067F0000400200008A590000574000-000000067F0000400200008A590000578000__00000073AD3FE6B8\n000000067F0000400200008A590000574000-000000067F0000400200008A590000578000__000000914E3F38F0\n000000067F0000400200008A590000574000-000000067F0000400200008A590000578000__000000931B33AE68\n000000067F0000400200008A590000574000-000000067F0000400200008A590000578000__000000931B9AFDF8\n000000067F0000400200008A590000576130-000000067F0000400200008A59000057EAFE__00000009AC75E659-0000000A4C25FC21\n000000067F0000400200008A590000578000-000000067F0000400200008A59000057C000__0000000A7B3FF158\n000000067F0000400200008A590000578000-000000067F0000400200008A59000057C000__0000001C760FA190\n000000067F0000400200008A590000578000-000000067F0000400200008A59000057C000__00000038E67ABFA0\n000000067F0000400200008A590000578000-000000067F0000400200008A59000057C000__0000003903F1CFE8\n000000067F0000400200008A590000578000-000000067F0000400200008A59000057C000__0000003B99F7F8A0\n000000067F0000400200008A590000578000-000000067F0000400200008A59000057C000__0000005D2FFFFB38\n000000067F0000400200008A590000578000-000000067F0000400200008A59000057C000__00000073AD3FE6B8\n000000067F0000400200008A590000578000-000000067F0000400200008A59000057C000__000000914E3F38F0\n000000067F0000400200008A590000578000-000000067F0000400200008A59000057C000__000000931B33AE68\n000000067F0000400200008A590000578000-000000067F0000400200008A59000057C000__000000931B9AFDF8\n000000067F0000400200008A59000057C000-000000067F0000400200008A590000580000__0000000A7B3FF158\n000000067F0000400200008A59000057C000-000000067F0000400200008A590000580000__0000001C760FA190\n000000067F0000400200008A59000057C000-000000067F0000400200008A590000580000__00000038E67ABFA0\n000000067F0000400200008A59000057C000-000000067F0000400200008A590000580000__0000003903F1CFE8\n000000067F0000400200008A59000057C000-000000067F0000400200008A590000580000__0000003B99F7F8A0\n000000067F0000400200008A59000057C000-000000067F0000400200008A590000580000__0000005D2FFFFB38\n000000067F0000400200008A59000057C000-000000067F0000400200008A590000580000__00000073AD3FE6B8\n000000067F0000400200008A59000057C000-000000067F0000400200008A590000580000__000000914E3F38F0\n000000067F0000400200008A59000057C000-000000067F0000400200008A590000580000__000000931B33AE68\n000000067F0000400200008A59000057C000-000000067F0000400200008A590000580000__000000931B9AFDF8\n000000067F0000400200008A59000057EAFE-000000067F0000400200008A5900005874D9__00000009AC75E659-0000000A4C25FC21\n000000067F0000400200008A590000580000-000000067F0000400200008A590000584000__0000000A7B3FF158\n000000067F0000400200008A590000580000-000000067F0000400200008A590000584000__0000001C760FA190\n000000067F0000400200008A590000580000-000000067F0000400200008A590000584000__00000038E67ABFA0\n000000067F0000400200008A590000580000-000000067F0000400200008A590000584000__0000003903F1CFE8\n000000067F0000400200008A590000580000-000000067F0000400200008A590000584000__0000003B99F7F8A0\n000000067F0000400200008A590000580000-000000067F0000400200008A590000584000__0000005D2FFFFB38\n000000067F0000400200008A590000580000-000000067F0000400200008A590000584000__00000073AD3FE6B8\n000000067F0000400200008A590000580000-000000067F0000400200008A590000584000__000000914E3F38F0\n000000067F0000400200008A590000580000-000000067F0000400200008A590000584000__000000931B33AE68\n000000067F0000400200008A590000580000-000000067F0000400200008A590000584000__000000931B9AFDF8\n000000067F0000400200008A590000584000-000000067F0000400200008A590000588000__0000000A7B3FF158\n000000067F0000400200008A590000584000-000000067F0000400200008A590000588000__0000001C760FA190\n000000067F0000400200008A590000584000-000000067F0000400200008A590000588000__00000038E67ABFA0\n000000067F0000400200008A590000584000-000000067F0000400200008A590000588000__0000003903F1CFE8\n000000067F0000400200008A590000584000-000000067F0000400200008A590000588000__0000003B99F7F8A0\n000000067F0000400200008A590000584000-000000067F0000400200008A590000588000__0000005D2FFFFB38\n000000067F0000400200008A590000584000-000000067F0000400200008A590000588000__00000073AD3FE6B8\n000000067F0000400200008A590000584000-000000067F0000400200008A590000588000__000000914E3F38F0\n000000067F0000400200008A590000584000-000000067F0000400200008A590000588000__000000931B33AE68\n000000067F0000400200008A590000584000-000000067F0000400200008A590000588000__000000931B9AFDF8\n000000067F0000400200008A5900005874D9-000000067F0000400200008A59000058FEA7__00000009AC75E659-0000000A4C25FC21\n000000067F0000400200008A590000588000-000000067F0000400200008A59000058C000__0000000A7B3FF158\n000000067F0000400200008A590000588000-000000067F0000400200008A59000058C000__0000001C760FA190\n000000067F0000400200008A590000588000-000000067F0000400200008A59000058C000__00000038E67ABFA0\n000000067F0000400200008A590000588000-000000067F0000400200008A59000058C000__0000003903F1CFE8\n000000067F0000400200008A590000588000-000000067F0000400200008A59000058C000__0000003B99F7F8A0\n000000067F0000400200008A590000588000-000000067F0000400200008A59000058C000__0000005D2FFFFB38\n000000067F0000400200008A590000588000-000000067F0000400200008A59000058C000__00000073AD3FE6B8\n000000067F0000400200008A590000588000-000000067F0000400200008A59000058C000__000000914E3F38F0\n000000067F0000400200008A590000588000-000000067F0000400200008A59000058C000__000000931B33AE68\n000000067F0000400200008A590000588000-000000067F0000400200008A59000058C000__000000931B9AFDF8\n000000067F0000400200008A59000058C000-000000067F0000400200008A590000590000__0000000A7B3FF158\n000000067F0000400200008A59000058C000-000000067F0000400200008A590000590000__0000001C760FA190\n000000067F0000400200008A59000058C000-000000067F0000400200008A590000590000__00000038E67ABFA0\n000000067F0000400200008A59000058C000-000000067F0000400200008A590000590000__0000003903F1CFE8\n000000067F0000400200008A59000058C000-000000067F0000400200008A590000590000__0000003B99F7F8A0\n000000067F0000400200008A59000058C000-000000067F0000400200008A590000590000__0000005D2FFFFB38\n000000067F0000400200008A59000058C000-000000067F0000400200008A590000590000__00000073AD3FE6B8\n000000067F0000400200008A59000058C000-000000067F0000400200008A590000590000__000000914E3F38F0\n000000067F0000400200008A59000058C000-000000067F0000400200008A590000590000__000000931B33AE68\n000000067F0000400200008A59000058C000-000000067F0000400200008A590000590000__000000931B9AFDF8\n000000067F0000400200008A59000058FEA7-000000067F0000400200008A590100000000__00000009AC75E659-0000000A4C25FC21\n000000067F0000400200008A590000590000-000000067F0000400200008A590000594000__0000000A7B3FF158\n000000067F0000400200008A590000590000-000000067F0000400200008A590000594000__0000001C760FA190\n000000067F0000400200008A590000590000-000000067F0000400200008A590000594000__00000038E67ABFA0\n000000067F0000400200008A590000590000-000000067F0000400200008A590000594000__0000003903F1CFE8\n000000067F0000400200008A590000590000-000000067F0000400200008A590000594000__0000003B99F7F8A0\n000000067F0000400200008A590000590000-000000067F0000400200008A590000594000__0000005D2FFFFB38\n000000067F0000400200008A590000590000-000000067F0000400200008A590000594000__00000073AD3FE6B8\n000000067F0000400200008A590000590000-000000067F0000400200008A590000594000__000000914E3F38F0\n000000067F0000400200008A590000590000-000000067F0000400200008A590000594000__000000931B33AE68\n000000067F0000400200008A590000590000-000000067F0000400200008A590000594000__000000931B9AFDF8\n000000067F0000400200008A590000590185-000000067F0000400200008A590000598B56__0000000A4C25FC21-0000000AEBD5F889\n000000067F0000400200008A590000594000-000000067F0000400200008A590000598000__0000000A7B3FF158\n000000067F0000400200008A590000594000-000000067F0000400200008A590000598000__0000001C760FA190\n000000067F0000400200008A590000594000-000000067F0000400200008A590000598000__00000038E67ABFA0\n000000067F0000400200008A590000594000-000000067F0000400200008A590000598000__0000003903F1CFE8\n000000067F0000400200008A590000594000-000000067F0000400200008A590000598000__0000003B99F7F8A0\n000000067F0000400200008A590000594000-000000067F0000400200008A590000598000__0000005D2FFFFB38\n000000067F0000400200008A590000594000-000000067F0000400200008A590000598000__00000073AD3FE6B8\n000000067F0000400200008A590000594000-000000067F0000400200008A590000598000__000000914E3F38F0\n000000067F0000400200008A590000594000-000000067F0000400200008A590000598000__000000931B33AE68\n000000067F0000400200008A590000594000-000000067F0000400200008A590000598000__000000931B9AFDF8\n000000067F0000400200008A590000598000-000000067F0000400200008A59000059C000__0000000A7B3FF158\n000000067F0000400200008A590000598000-000000067F0000400200008A59000059C000__0000001C760FA190\n000000067F0000400200008A590000598000-000000067F0000400200008A59000059C000__00000038E67ABFA0\n000000067F0000400200008A590000598000-000000067F0000400200008A59000059C000__0000003903F1CFE8\n000000067F0000400200008A590000598000-000000067F0000400200008A59000059C000__0000003B99F7F8A0\n000000067F0000400200008A590000598000-000000067F0000400200008A59000059C000__0000005D2FFFFB38\n000000067F0000400200008A590000598000-000000067F0000400200008A59000059C000__00000073AD3FE6B8\n000000067F0000400200008A590000598000-000000067F0000400200008A59000059C000__000000914E3F38F0\n000000067F0000400200008A590000598000-000000067F0000400200008A59000059C000__000000931B33AE68\n000000067F0000400200008A590000598000-000000067F0000400200008A59000059C000__000000931B9AFDF8\n000000067F0000400200008A590000598B56-000000067F0000400200008A5900005A153E__0000000A4C25FC21-0000000AEBD5F889\n000000067F0000400200008A59000059C000-000000067F0000400200008A5900005A0000__0000000A7B3FF158\n000000067F0000400200008A59000059C000-000000067F0000400200008A5900005A0000__0000001C760FA190\n000000067F0000400200008A59000059C000-000000067F0000400200008A5900005A0000__00000038E67ABFA0\n000000067F0000400200008A59000059C000-000000067F0000400200008A5900005A0000__0000003903F1CFE8\n000000067F0000400200008A59000059C000-000000067F0000400200008A5900005A0000__0000003B99F7F8A0\n000000067F0000400200008A59000059C000-000000067F0000400200008A5900005A0000__0000005D2FFFFB38\n000000067F0000400200008A59000059C000-000000067F0000400200008A5900005A0000__00000073AD3FE6B8\n000000067F0000400200008A59000059C000-000000067F0000400200008A5900005A0000__000000914E3F38F0\n000000067F0000400200008A59000059C000-000000067F0000400200008A5900005A0000__000000931B33AE68\n000000067F0000400200008A59000059C000-000000067F0000400200008A5900005A0000__000000931B9AFDF8\n000000067F0000400200008A5900005A0000-000000067F0000400200008A5900005A4000__0000000A7B3FF158\n000000067F0000400200008A5900005A0000-000000067F0000400200008A5900005A4000__0000001C760FA190\n000000067F0000400200008A5900005A0000-000000067F0000400200008A5900005A4000__00000038E67ABFA0\n000000067F0000400200008A5900005A0000-000000067F0000400200008A5900005A4000__0000003903F1CFE8\n000000067F0000400200008A5900005A0000-000000067F0000400200008A5900005A4000__0000003B99F7F8A0\n000000067F0000400200008A5900005A0000-000000067F0000400200008A5900005A4000__0000005D2FFFFB38\n000000067F0000400200008A5900005A0000-000000067F0000400200008A5900005A4000__00000073AD3FE6B8\n000000067F0000400200008A5900005A0000-000000067F0000400200008A5900005A4000__000000914E3F38F0\n000000067F0000400200008A5900005A0000-000000067F0000400200008A5900005A4000__000000931B33AE68\n000000067F0000400200008A5900005A0000-000000067F0000400200008A5900005A4000__000000931B9AFDF8\n000000067F0000400200008A5900005A153E-000000067F0000400200008A5900005A9F2C__0000000A4C25FC21-0000000AEBD5F889\n000000067F0000400200008A5900005A4000-000000067F0000400200008A5900005A8000__0000000A7B3FF158\n000000067F0000400200008A5900005A4000-000000067F0000400200008A5900005A8000__0000001C760FA190\n000000067F0000400200008A5900005A4000-000000067F0000400200008A5900005A8000__00000038E67ABFA0\n000000067F0000400200008A5900005A4000-000000067F0000400200008A5900005A8000__0000003903F1CFE8\n000000067F0000400200008A5900005A4000-000000067F0000400200008A5900005A8000__0000003B99F7F8A0\n000000067F0000400200008A5900005A4000-000000067F0000400200008A5900005A8000__0000005D2FFFFB38\n000000067F0000400200008A5900005A4000-000000067F0000400200008A5900005A8000__00000073AD3FE6B8\n000000067F0000400200008A5900005A4000-000000067F0000400200008A5900005A8000__000000914E3F38F0\n000000067F0000400200008A5900005A4000-000000067F0000400200008A5900005A8000__000000931B33AE68\n000000067F0000400200008A5900005A4000-000000067F0000400200008A5900005A8000__000000931B9AFDF8\n000000067F0000400200008A5900005A8000-000000067F0000400200008A5900005AC000__0000001C760FA190\n000000067F0000400200008A5900005A8000-000000067F0000400200008A5900005AC000__00000038E67ABFA0\n000000067F0000400200008A5900005A8000-000000067F0000400200008A5900005AC000__0000003903F1CFE8\n000000067F0000400200008A5900005A8000-000000067F0000400200008A5900005AC000__0000003B99F7F8A0\n000000067F0000400200008A5900005A8000-000000067F0000400200008A5900005AC000__0000005D2FFFFB38\n000000067F0000400200008A5900005A8000-000000067F0000400200008A5900005AC000__00000073AD3FE6B8\n000000067F0000400200008A5900005A8000-000000067F0000400200008A5900005AC000__000000914E3F38F0\n000000067F0000400200008A5900005A8000-000000067F0000400200008A5900005AC000__000000931B33AE68\n000000067F0000400200008A5900005A8000-000000067F0000400200008A5900005AC000__000000931B9AFDF8\n000000067F0000400200008A5900005A8000-030000000000000000000000000000000002__0000000A7B3FF158\n000000067F0000400200008A5900005A9F2C-000000067F0000400200008A5900005B290F__0000000A4C25FC21-0000000AEBD5F889\n000000067F0000400200008A5900005AC000-000000067F0000400200008A5900005B0000__0000001C760FA190\n000000067F0000400200008A5900005AC000-000000067F0000400200008A5900005B0000__00000038E67ABFA0\n000000067F0000400200008A5900005AC000-000000067F0000400200008A5900005B0000__0000003903F1CFE8\n000000067F0000400200008A5900005AC000-000000067F0000400200008A5900005B0000__0000003B99F7F8A0\n000000067F0000400200008A5900005AC000-000000067F0000400200008A5900005B0000__0000005D2FFFFB38\n000000067F0000400200008A5900005AC000-000000067F0000400200008A5900005B0000__00000073AD3FE6B8\n000000067F0000400200008A5900005AC000-000000067F0000400200008A5900005B0000__000000914E3F38F0\n000000067F0000400200008A5900005AC000-000000067F0000400200008A5900005B0000__000000931B33AE68\n000000067F0000400200008A5900005AC000-000000067F0000400200008A5900005B0000__000000931B9AFDF8\n000000067F0000400200008A5900005B0000-000000067F0000400200008A5900005B4000__0000001C760FA190\n000000067F0000400200008A5900005B0000-000000067F0000400200008A5900005B4000__00000038E67ABFA0\n000000067F0000400200008A5900005B0000-000000067F0000400200008A5900005B4000__0000003903F1CFE8\n000000067F0000400200008A5900005B0000-000000067F0000400200008A5900005B4000__0000003B99F7F8A0\n000000067F0000400200008A5900005B0000-000000067F0000400200008A5900005B4000__0000005D2FFFFB38\n000000067F0000400200008A5900005B0000-000000067F0000400200008A5900005B4000__00000073AD3FE6B8\n000000067F0000400200008A5900005B0000-000000067F0000400200008A5900005B4000__000000914E3F38F0\n000000067F0000400200008A5900005B0000-000000067F0000400200008A5900005B4000__000000931B33AE68\n000000067F0000400200008A5900005B0000-000000067F0000400200008A5900005B4000__000000931B9AFDF8\n000000067F0000400200008A5900005B290F-000000067F0000400200008A5900005BB2DB__0000000A4C25FC21-0000000AEBD5F889\n000000067F0000400200008A5900005B4000-000000067F0000400200008A5900005B8000__0000001C760FA190\n000000067F0000400200008A5900005B4000-000000067F0000400200008A5900005B8000__00000038E67ABFA0\n000000067F0000400200008A5900005B4000-000000067F0000400200008A5900005B8000__0000003903F1CFE8\n000000067F0000400200008A5900005B4000-000000067F0000400200008A5900005B8000__0000003B99F7F8A0\n000000067F0000400200008A5900005B4000-000000067F0000400200008A5900005B8000__0000005D2FFFFB38\n000000067F0000400200008A5900005B4000-000000067F0000400200008A5900005B8000__00000073AD3FE6B8\n000000067F0000400200008A5900005B4000-000000067F0000400200008A5900005B8000__000000914E3F38F0\n000000067F0000400200008A5900005B4000-000000067F0000400200008A5900005B8000__000000931B33AE68\n000000067F0000400200008A5900005B4000-000000067F0000400200008A5900005B8000__000000931B9AFDF8\n000000067F0000400200008A5900005B8000-000000067F0000400200008A5900005BC000__0000001C760FA190\n000000067F0000400200008A5900005B8000-000000067F0000400200008A5900005BC000__00000038E67ABFA0\n000000067F0000400200008A5900005B8000-000000067F0000400200008A5900005BC000__0000003903F1CFE8\n000000067F0000400200008A5900005B8000-000000067F0000400200008A5900005BC000__0000003B99F7F8A0\n000000067F0000400200008A5900005B8000-000000067F0000400200008A5900005BC000__0000005D2FFFFB38\n000000067F0000400200008A5900005B8000-000000067F0000400200008A5900005BC000__00000073AD3FE6B8\n000000067F0000400200008A5900005B8000-000000067F0000400200008A5900005BC000__000000914E3F38F0\n000000067F0000400200008A5900005B8000-000000067F0000400200008A5900005BC000__000000931B33AE68\n000000067F0000400200008A5900005B8000-000000067F0000400200008A5900005BC000__000000931B9AFDF8\n000000067F0000400200008A5900005BB2DB-000000067F0000400200008A5900005C3CB1__0000000A4C25FC21-0000000AEBD5F889\n000000067F0000400200008A5900005BC000-000000067F0000400200008A5900005C0000__0000001C760FA190\n000000067F0000400200008A5900005BC000-000000067F0000400200008A5900005C0000__00000038E67ABFA0\n000000067F0000400200008A5900005BC000-000000067F0000400200008A5900005C0000__0000003903F1CFE8\n000000067F0000400200008A5900005BC000-000000067F0000400200008A5900005C0000__0000003B99F7F8A0\n000000067F0000400200008A5900005BC000-000000067F0000400200008A5900005C0000__0000005D2FFFFB38\n000000067F0000400200008A5900005BC000-000000067F0000400200008A5900005C0000__00000073AD3FE6B8\n000000067F0000400200008A5900005BC000-000000067F0000400200008A5900005C0000__000000914E3F38F0\n000000067F0000400200008A5900005BC000-000000067F0000400200008A5900005C0000__000000931B33AE68\n000000067F0000400200008A5900005BC000-000000067F0000400200008A5900005C0000__000000931B9AFDF8\n000000067F0000400200008A5900005C0000-000000067F0000400200008A5900005C4000__0000001C760FA190\n000000067F0000400200008A5900005C0000-000000067F0000400200008A5900005C4000__00000038E67ABFA0\n000000067F0000400200008A5900005C0000-000000067F0000400200008A5900005C4000__0000003903F1CFE8\n000000067F0000400200008A5900005C0000-000000067F0000400200008A5900005C4000__0000003B99F7F8A0\n000000067F0000400200008A5900005C0000-000000067F0000400200008A5900005C4000__0000005D2FFFFB38\n000000067F0000400200008A5900005C0000-000000067F0000400200008A5900005C4000__00000073AD3FE6B8\n000000067F0000400200008A5900005C0000-000000067F0000400200008A5900005C4000__000000914E3F38F0\n000000067F0000400200008A5900005C0000-000000067F0000400200008A5900005C4000__000000931B33AE68\n000000067F0000400200008A5900005C0000-000000067F0000400200008A5900005C4000__000000931B9AFDF8\n000000067F0000400200008A5900005C3CB1-000000067F0000400200008A5900005CC678__0000000A4C25FC21-0000000AEBD5F889\n000000067F0000400200008A5900005C4000-000000067F0000400200008A5900005C8000__0000001C760FA190\n000000067F0000400200008A5900005C4000-000000067F0000400200008A5900005C8000__00000038E67ABFA0\n000000067F0000400200008A5900005C4000-000000067F0000400200008A5900005C8000__0000003903F1CFE8\n000000067F0000400200008A5900005C4000-000000067F0000400200008A5900005C8000__0000003B99F7F8A0\n000000067F0000400200008A5900005C4000-000000067F0000400200008A5900005C8000__0000005D2FFFFB38\n000000067F0000400200008A5900005C4000-000000067F0000400200008A5900005C8000__00000073AD3FE6B8\n000000067F0000400200008A5900005C4000-000000067F0000400200008A5900005C8000__000000914E3F38F0\n000000067F0000400200008A5900005C4000-000000067F0000400200008A5900005C8000__000000931B33AE68\n000000067F0000400200008A5900005C4000-000000067F0000400200008A5900005C8000__000000931B9AFDF8\n000000067F0000400200008A5900005C8000-000000067F0000400200008A5900005CC000__0000001C760FA190\n000000067F0000400200008A5900005C8000-000000067F0000400200008A5900005CC000__00000038E67ABFA0\n000000067F0000400200008A5900005C8000-000000067F0000400200008A5900005CC000__0000003903F1CFE8\n000000067F0000400200008A5900005C8000-000000067F0000400200008A5900005CC000__0000003B99F7F8A0\n000000067F0000400200008A5900005C8000-000000067F0000400200008A5900005CC000__0000005D2FFFFB38\n000000067F0000400200008A5900005C8000-000000067F0000400200008A5900005CC000__00000073AD3FE6B8\n000000067F0000400200008A5900005C8000-000000067F0000400200008A5900005CC000__000000914E3F38F0\n000000067F0000400200008A5900005C8000-000000067F0000400200008A5900005CC000__000000931B33AE68\n000000067F0000400200008A5900005C8000-000000067F0000400200008A5900005CC000__000000931B9AFDF8\n000000067F0000400200008A5900005CC000-000000067F0000400200008A5900005D0000__0000001C760FA190\n000000067F0000400200008A5900005CC000-000000067F0000400200008A5900005D0000__00000038E67ABFA0\n000000067F0000400200008A5900005CC000-000000067F0000400200008A5900005D0000__0000003903F1CFE8\n000000067F0000400200008A5900005CC000-000000067F0000400200008A5900005D0000__0000003B99F7F8A0\n000000067F0000400200008A5900005CC000-000000067F0000400200008A5900005D0000__0000005D2FFFFB38\n000000067F0000400200008A5900005CC000-000000067F0000400200008A5900005D0000__00000073AD3FE6B8\n000000067F0000400200008A5900005CC000-000000067F0000400200008A5900005D0000__000000914E3F38F0\n000000067F0000400200008A5900005CC000-000000067F0000400200008A5900005D0000__000000931B33AE68\n000000067F0000400200008A5900005CC000-000000067F0000400200008A5900005D0000__000000931B9AFDF8\n000000067F0000400200008A5900005CC678-000000067F0000400200008A5900005D5052__0000000A4C25FC21-0000000AEBD5F889\n000000067F0000400200008A5900005D0000-000000067F0000400200008A5900005D4000__0000001C760FA190\n000000067F0000400200008A5900005D0000-000000067F0000400200008A5900005D4000__00000038E67ABFA0\n000000067F0000400200008A5900005D0000-000000067F0000400200008A5900005D4000__0000003903F1CFE8\n000000067F0000400200008A5900005D0000-000000067F0000400200008A5900005D4000__0000003B99F7F8A0\n000000067F0000400200008A5900005D0000-000000067F0000400200008A5900005D4000__0000005D2FFFFB38\n000000067F0000400200008A5900005D0000-000000067F0000400200008A5900005D4000__00000073AD3FE6B8\n000000067F0000400200008A5900005D0000-000000067F0000400200008A5900005D4000__000000914E3F38F0\n000000067F0000400200008A5900005D0000-000000067F0000400200008A5900005D4000__000000931B33AE68\n000000067F0000400200008A5900005D0000-000000067F0000400200008A5900005D4000__000000931B9AFDF8\n000000067F0000400200008A5900005D4000-000000067F0000400200008A5900005D8000__0000001C760FA190\n000000067F0000400200008A5900005D4000-000000067F0000400200008A5900005D8000__00000038E67ABFA0\n000000067F0000400200008A5900005D4000-000000067F0000400200008A5900005D8000__0000003903F1CFE8\n000000067F0000400200008A5900005D4000-000000067F0000400200008A5900005D8000__0000003B99F7F8A0\n000000067F0000400200008A5900005D4000-000000067F0000400200008A5900005D8000__0000005D2FFFFB38\n000000067F0000400200008A5900005D4000-000000067F0000400200008A5900005D8000__00000073AD3FE6B8\n000000067F0000400200008A5900005D4000-000000067F0000400200008A5900005D8000__000000914E3F38F0\n000000067F0000400200008A5900005D4000-000000067F0000400200008A5900005D8000__000000931B33AE68\n000000067F0000400200008A5900005D4000-000000067F0000400200008A5900005D8000__000000931B9AFDF8\n000000067F0000400200008A5900005D5052-000000067F0000400200008A5900005DDA38__0000000A4C25FC21-0000000AEBD5F889\n000000067F0000400200008A5900005D8000-000000067F0000400200008A5900005DC000__0000001C760FA190\n000000067F0000400200008A5900005D8000-000000067F0000400200008A5900005DC000__00000038E67ABFA0\n000000067F0000400200008A5900005D8000-000000067F0000400200008A5900005DC000__0000003903F1CFE8\n000000067F0000400200008A5900005D8000-000000067F0000400200008A5900005DC000__0000003B99F7F8A0\n000000067F0000400200008A5900005D8000-000000067F0000400200008A5900005DC000__0000005D2FFFFB38\n000000067F0000400200008A5900005D8000-000000067F0000400200008A5900005DC000__00000073AD3FE6B8\n000000067F0000400200008A5900005D8000-000000067F0000400200008A5900005DC000__000000914E3F38F0\n000000067F0000400200008A5900005D8000-000000067F0000400200008A5900005DC000__000000931B33AE68\n000000067F0000400200008A5900005D8000-000000067F0000400200008A5900005DC000__000000931B9AFDF8\n000000067F0000400200008A5900005DC000-000000067F0000400200008A5900005E0000__0000001C760FA190\n000000067F0000400200008A5900005DC000-000000067F0000400200008A5900005E0000__00000038E67ABFA0\n000000067F0000400200008A5900005DC000-000000067F0000400200008A5900005E0000__0000003903F1CFE8\n000000067F0000400200008A5900005DC000-000000067F0000400200008A5900005E0000__0000003B99F7F8A0\n000000067F0000400200008A5900005DC000-000000067F0000400200008A5900005E0000__0000005D2FFFFB38\n000000067F0000400200008A5900005DC000-000000067F0000400200008A5900005E0000__00000073AD3FE6B8\n000000067F0000400200008A5900005DC000-000000067F0000400200008A5900005E0000__000000914E3F38F0\n000000067F0000400200008A5900005DC000-000000067F0000400200008A5900005E0000__000000931B33AE68\n000000067F0000400200008A5900005DC000-000000067F0000400200008A5900005E0000__000000931B9AFDF8\n000000067F0000400200008A5900005DDA38-000000067F0000400200008A5900005E6422__0000000A4C25FC21-0000000AEBD5F889\n000000067F0000400200008A5900005E0000-000000067F0000400200008A5900005E4000__0000001C760FA190\n000000067F0000400200008A5900005E0000-000000067F0000400200008A5900005E4000__00000038E67ABFA0\n000000067F0000400200008A5900005E0000-000000067F0000400200008A5900005E4000__0000003903F1CFE8\n000000067F0000400200008A5900005E0000-000000067F0000400200008A5900005E4000__0000003B99F7F8A0\n000000067F0000400200008A5900005E0000-000000067F0000400200008A5900005E4000__0000005D2FFFFB38\n000000067F0000400200008A5900005E0000-000000067F0000400200008A5900005E4000__00000073AD3FE6B8\n000000067F0000400200008A5900005E0000-000000067F0000400200008A5900005E4000__000000914E3F38F0\n000000067F0000400200008A5900005E0000-000000067F0000400200008A5900005E4000__000000931B33AE68\n000000067F0000400200008A5900005E0000-000000067F0000400200008A5900005E4000__000000931B9AFDF8\n000000067F0000400200008A5900005E4000-000000067F0000400200008A5900005E8000__0000001C725A2400\n000000067F0000400200008A5900005E4000-000000067F0000400200008A5900005E8000__0000001C760FA190\n000000067F0000400200008A5900005E4000-000000067F0000400200008A5900005E8000__00000038E67ABFA0\n000000067F0000400200008A5900005E4000-000000067F0000400200008A5900005E8000__0000003903F1CFE8\n000000067F0000400200008A5900005E4000-000000067F0000400200008A5900005E8000__0000003B99F7F8A0\n000000067F0000400200008A5900005E4000-000000067F0000400200008A5900005E8000__0000005D2FFFFB38\n000000067F0000400200008A5900005E4000-000000067F0000400200008A5900005E8000__00000073AD3FE6B8\n000000067F0000400200008A5900005E4000-000000067F0000400200008A5900005E8000__000000914E3F38F0\n000000067F0000400200008A5900005E4000-000000067F0000400200008A5900005E8000__000000931B33AE68\n000000067F0000400200008A5900005E4000-000000067F0000400200008A5900005E8000__000000931B9AFDF8\n000000067F0000400200008A5900005E6422-000000067F0000400200008A590100000000__0000000A4C25FC21-0000000AEBD5F889\n000000067F0000400200008A5900005E670E-000000067F0000400200008A5900005EF0E7__0000000AEBD5F889-0000000B8B85DC91\n000000067F0000400200008A5900005E8000-000000067F0000400200008A5900005EC000__0000001C725A2400\n000000067F0000400200008A5900005E8000-000000067F0000400200008A5900005EC000__0000001C760FA190\n000000067F0000400200008A5900005E8000-000000067F0000400200008A5900005EC000__00000038E67ABFA0\n000000067F0000400200008A5900005E8000-000000067F0000400200008A5900005EC000__0000003903F1CFE8\n000000067F0000400200008A5900005E8000-000000067F0000400200008A5900005EC000__0000003B99F7F8A0\n000000067F0000400200008A5900005E8000-000000067F0000400200008A5900005EC000__0000005D2FFFFB38\n000000067F0000400200008A5900005E8000-000000067F0000400200008A5900005EC000__00000073AD3FE6B8\n000000067F0000400200008A5900005E8000-000000067F0000400200008A5900005EC000__000000914E3F38F0\n000000067F0000400200008A5900005E8000-000000067F0000400200008A5900005EC000__000000931B33AE68\n000000067F0000400200008A5900005E8000-000000067F0000400200008A5900005EC000__000000931B9AFDF8\n000000067F0000400200008A5900005EC000-000000067F0000400200008A5900005F0000__0000001C725A2400\n000000067F0000400200008A5900005EC000-000000067F0000400200008A5900005F0000__0000001C760FA190\n000000067F0000400200008A5900005EC000-000000067F0000400200008A5900005F0000__00000038E67ABFA0\n000000067F0000400200008A5900005EC000-000000067F0000400200008A5900005F0000__0000003903F1CFE8\n000000067F0000400200008A5900005EC000-000000067F0000400200008A5900005F0000__0000003B99F7F8A0\n000000067F0000400200008A5900005EC000-000000067F0000400200008A5900005F0000__0000005D2FFFFB38\n000000067F0000400200008A5900005EC000-000000067F0000400200008A5900005F0000__00000073AD3FE6B8\n000000067F0000400200008A5900005EC000-000000067F0000400200008A5900005F0000__000000914E3F38F0\n000000067F0000400200008A5900005EC000-000000067F0000400200008A5900005F0000__000000931B33AE68\n000000067F0000400200008A5900005EC000-000000067F0000400200008A5900005F0000__000000931B9AFDF8\n000000067F0000400200008A5900005EF0E7-000000067F0000400200008A5900005F7AC2__0000000AEBD5F889-0000000B8B85DC91\n000000067F0000400200008A5900005F0000-000000067F0000400200008A5900005F4000__0000001C725A2400\n000000067F0000400200008A5900005F0000-000000067F0000400200008A5900005F4000__0000001C760FA190\n000000067F0000400200008A5900005F0000-000000067F0000400200008A5900005F4000__00000038E67ABFA0\n000000067F0000400200008A5900005F0000-000000067F0000400200008A5900005F4000__0000003903F1CFE8\n000000067F0000400200008A5900005F0000-000000067F0000400200008A5900005F4000__0000003B99F7F8A0\n000000067F0000400200008A5900005F0000-000000067F0000400200008A5900005F4000__0000005D2FFFFB38\n000000067F0000400200008A5900005F0000-000000067F0000400200008A5900005F4000__00000073AD3FE6B8\n000000067F0000400200008A5900005F0000-000000067F0000400200008A5900005F4000__000000914E3F38F0\n000000067F0000400200008A5900005F0000-000000067F0000400200008A5900005F4000__000000931B33AE68\n000000067F0000400200008A5900005F0000-000000067F0000400200008A5900005F4000__000000931B9AFDF8\n000000067F0000400200008A5900005F4000-000000067F0000400200008A5900005F8000__0000001C725A2400\n000000067F0000400200008A5900005F4000-000000067F0000400200008A5900005F8000__0000001C760FA190\n000000067F0000400200008A5900005F4000-000000067F0000400200008A5900005F8000__00000038E67ABFA0\n000000067F0000400200008A5900005F4000-000000067F0000400200008A5900005F8000__0000003903F1CFE8\n000000067F0000400200008A5900005F4000-000000067F0000400200008A5900005F8000__0000003B99F7F8A0\n000000067F0000400200008A5900005F4000-000000067F0000400200008A5900005F8000__0000005D2FFFFB38\n000000067F0000400200008A5900005F4000-000000067F0000400200008A5900005F8000__00000073AD3FE6B8\n000000067F0000400200008A5900005F4000-000000067F0000400200008A5900005F8000__000000914E3F38F0\n000000067F0000400200008A5900005F4000-000000067F0000400200008A5900005F8000__000000931B33AE68\n000000067F0000400200008A5900005F4000-000000067F0000400200008A5900005F8000__000000931B9AFDF8\n000000067F0000400200008A5900005F7AC2-000000067F0000400200008A590000600494__0000000AEBD5F889-0000000B8B85DC91\n000000067F0000400200008A5900005F8000-000000067F0000400200008A5900005FC000__0000001C725A2400\n000000067F0000400200008A5900005F8000-000000067F0000400200008A5900005FC000__0000001C760FA190\n000000067F0000400200008A5900005F8000-000000067F0000400200008A5900005FC000__00000038E67ABFA0\n000000067F0000400200008A5900005F8000-000000067F0000400200008A5900005FC000__0000003903F1CFE8\n000000067F0000400200008A5900005F8000-000000067F0000400200008A5900005FC000__0000003B99F7F8A0\n000000067F0000400200008A5900005F8000-000000067F0000400200008A5900005FC000__0000005D2FFFFB38\n000000067F0000400200008A5900005F8000-000000067F0000400200008A5900005FC000__00000073AD3FE6B8\n000000067F0000400200008A5900005F8000-000000067F0000400200008A5900005FC000__000000914E3F38F0\n000000067F0000400200008A5900005F8000-000000067F0000400200008A5900005FC000__000000931B33AE68\n000000067F0000400200008A5900005F8000-000000067F0000400200008A5900005FC000__000000931B9AFDF8\n000000067F0000400200008A5900005FC000-000000067F0000400200008A590000600000__0000001C725A2400\n000000067F0000400200008A5900005FC000-000000067F0000400200008A590000600000__0000001C760FA190\n000000067F0000400200008A5900005FC000-000000067F0000400200008A590000600000__00000038E67ABFA0\n000000067F0000400200008A5900005FC000-000000067F0000400200008A590000600000__0000003903F1CFE8\n000000067F0000400200008A5900005FC000-000000067F0000400200008A590000600000__0000003B99F7F8A0\n000000067F0000400200008A5900005FC000-000000067F0000400200008A590000600000__0000005D2FFFFB38\n000000067F0000400200008A5900005FC000-000000067F0000400200008A590000600000__00000073AD3FE6B8\n000000067F0000400200008A5900005FC000-000000067F0000400200008A590000600000__000000914E3F38F0\n000000067F0000400200008A5900005FC000-000000067F0000400200008A590000600000__000000931B33AE68\n000000067F0000400200008A5900005FC000-000000067F0000400200008A590000600000__000000931B9AFDF8\n000000067F0000400200008A590000600000-000000067F0000400200008A590000604000__0000001C725A2400\n000000067F0000400200008A590000600000-000000067F0000400200008A590000604000__0000001C760FA190\n000000067F0000400200008A590000600000-000000067F0000400200008A590000604000__00000038E67ABFA0\n000000067F0000400200008A590000600000-000000067F0000400200008A590000604000__0000003903F1CFE8\n000000067F0000400200008A590000600000-000000067F0000400200008A590000604000__0000003B99F7F8A0\n000000067F0000400200008A590000600000-000000067F0000400200008A590000604000__0000005D2FFFFB38\n000000067F0000400200008A590000600000-000000067F0000400200008A590000604000__00000073AD3FE6B8\n000000067F0000400200008A590000600000-000000067F0000400200008A590000604000__000000914E3F38F0\n000000067F0000400200008A590000600000-000000067F0000400200008A590000604000__000000931B33AE68\n000000067F0000400200008A590000600000-000000067F0000400200008A590000604000__000000931B9AFDF8\n000000067F0000400200008A590000600494-000000067F0000400200008A590000608E5C__0000000AEBD5F889-0000000B8B85DC91\n000000067F0000400200008A590000604000-000000067F0000400200008A590000608000__0000001C725A2400\n000000067F0000400200008A590000604000-000000067F0000400200008A590000608000__0000001C760FA190\n000000067F0000400200008A590000604000-000000067F0000400200008A590000608000__00000038E67ABFA0\n000000067F0000400200008A590000604000-000000067F0000400200008A590000608000__0000003903F1CFE8\n000000067F0000400200008A590000604000-000000067F0000400200008A590000608000__0000003B99F7F8A0\n000000067F0000400200008A590000604000-000000067F0000400200008A590000608000__0000005D2FFFFB38\n000000067F0000400200008A590000604000-000000067F0000400200008A590000608000__00000073AD3FE6B8\n000000067F0000400200008A590000604000-000000067F0000400200008A590000608000__000000914E3F38F0\n000000067F0000400200008A590000604000-000000067F0000400200008A590000608000__000000931B33AE68\n000000067F0000400200008A590000604000-000000067F0000400200008A590000608000__000000931B9AFDF8\n000000067F0000400200008A590000608000-000000067F0000400200008A59000060C000__0000001C725A2400\n000000067F0000400200008A590000608000-000000067F0000400200008A59000060C000__0000001C760FA190\n000000067F0000400200008A590000608000-000000067F0000400200008A59000060C000__00000038E67ABFA0\n000000067F0000400200008A590000608000-000000067F0000400200008A59000060C000__0000003903F1CFE8\n000000067F0000400200008A590000608000-000000067F0000400200008A59000060C000__0000003B99F7F8A0\n000000067F0000400200008A590000608000-000000067F0000400200008A59000060C000__0000005D2FFFFB38\n000000067F0000400200008A590000608000-000000067F0000400200008A59000060C000__00000073AD3FE6B8\n000000067F0000400200008A590000608000-000000067F0000400200008A59000060C000__000000914E3F38F0\n000000067F0000400200008A590000608000-000000067F0000400200008A59000060C000__000000931B33AE68\n000000067F0000400200008A590000608000-000000067F0000400200008A59000060C000__000000931B9AFDF8\n000000067F0000400200008A590000608E5C-000000067F0000400200008A590000611840__0000000AEBD5F889-0000000B8B85DC91\n000000067F0000400200008A59000060C000-000000067F0000400200008A590000610000__0000001C725A2400\n000000067F0000400200008A59000060C000-000000067F0000400200008A590000610000__0000001C760FA190\n000000067F0000400200008A59000060C000-000000067F0000400200008A590000610000__00000038E67ABFA0\n000000067F0000400200008A59000060C000-000000067F0000400200008A590000610000__0000003903F1CFE8\n000000067F0000400200008A59000060C000-000000067F0000400200008A590000610000__0000003B99F7F8A0\n000000067F0000400200008A59000060C000-000000067F0000400200008A590000610000__0000005D2FFFFB38\n000000067F0000400200008A59000060C000-000000067F0000400200008A590000610000__00000073AD3FE6B8\n000000067F0000400200008A59000060C000-000000067F0000400200008A590000610000__000000914E3F38F0\n000000067F0000400200008A59000060C000-000000067F0000400200008A590000610000__000000931B33AE68\n000000067F0000400200008A59000060C000-000000067F0000400200008A590000610000__000000931B9AFDF8\n000000067F0000400200008A590000610000-000000067F0000400200008A590000614000__0000001C725A2400\n000000067F0000400200008A590000610000-000000067F0000400200008A590000614000__0000001C760FA190\n000000067F0000400200008A590000610000-000000067F0000400200008A590000614000__00000038E67ABFA0\n000000067F0000400200008A590000610000-000000067F0000400200008A590000614000__0000003903F1CFE8\n000000067F0000400200008A590000610000-000000067F0000400200008A590000614000__0000003B99F7F8A0\n000000067F0000400200008A590000610000-000000067F0000400200008A590000614000__0000005D2FFFFB38\n000000067F0000400200008A590000610000-000000067F0000400200008A590000614000__00000073AD3FE6B8\n000000067F0000400200008A590000610000-000000067F0000400200008A590000614000__000000914E3F38F0\n000000067F0000400200008A590000610000-000000067F0000400200008A590000614000__000000931B33AE68\n000000067F0000400200008A590000610000-000000067F0000400200008A590000614000__000000931B9AFDF8\n000000067F0000400200008A590000611840-000000067F0000400200008A59000061A226__0000000AEBD5F889-0000000B8B85DC91\n000000067F0000400200008A590000614000-000000067F0000400200008A590000618000__0000001C725A2400\n000000067F0000400200008A590000614000-000000067F0000400200008A590000618000__0000001C760FA190\n000000067F0000400200008A590000614000-000000067F0000400200008A590000618000__00000038E67ABFA0\n000000067F0000400200008A590000614000-000000067F0000400200008A590000618000__0000003903F1CFE8\n000000067F0000400200008A590000614000-000000067F0000400200008A590000618000__0000003B99F7F8A0\n000000067F0000400200008A590000614000-000000067F0000400200008A590000618000__0000005D2FFFFB38\n000000067F0000400200008A590000614000-000000067F0000400200008A590000618000__00000073AD3FE6B8\n000000067F0000400200008A590000614000-000000067F0000400200008A590000618000__000000914E3F38F0\n000000067F0000400200008A590000614000-000000067F0000400200008A590000618000__000000931B33AE68\n000000067F0000400200008A590000614000-000000067F0000400200008A590000618000__000000931B9AFDF8\n000000067F0000400200008A590000618000-000000067F0000400200008A59000061C000__0000001C725A2400\n000000067F0000400200008A590000618000-000000067F0000400200008A59000061C000__0000001C760FA190\n000000067F0000400200008A590000618000-000000067F0000400200008A59000061C000__00000038E67ABFA0\n000000067F0000400200008A590000618000-000000067F0000400200008A59000061C000__0000003903F1CFE8\n000000067F0000400200008A590000618000-000000067F0000400200008A59000061C000__0000003B99F7F8A0\n000000067F0000400200008A590000618000-000000067F0000400200008A59000061C000__0000005D2FFFFB38\n000000067F0000400200008A590000618000-000000067F0000400200008A59000061C000__00000073AD3FE6B8\n000000067F0000400200008A590000618000-000000067F0000400200008A59000061C000__000000914E3F38F0\n000000067F0000400200008A590000618000-000000067F0000400200008A59000061C000__000000931B33AE68\n000000067F0000400200008A590000618000-000000067F0000400200008A59000061C000__000000931B9AFDF8\n000000067F0000400200008A59000061A226-000000067F0000400200008A590000622C03__0000000AEBD5F889-0000000B8B85DC91\n000000067F0000400200008A59000061C000-000000067F0000400200008A590000620000__0000001C725A2400\n000000067F0000400200008A59000061C000-000000067F0000400200008A590000620000__0000001C760FA190\n000000067F0000400200008A59000061C000-000000067F0000400200008A590000620000__00000038E67ABFA0\n000000067F0000400200008A59000061C000-000000067F0000400200008A590000620000__0000003903F1CFE8\n000000067F0000400200008A59000061C000-000000067F0000400200008A590000620000__0000003B99F7F8A0\n000000067F0000400200008A59000061C000-000000067F0000400200008A590000620000__0000005D2FFFFB38\n000000067F0000400200008A59000061C000-000000067F0000400200008A590000620000__00000073AD3FE6B8\n000000067F0000400200008A59000061C000-000000067F0000400200008A590000620000__000000914E3F38F0\n000000067F0000400200008A59000061C000-000000067F0000400200008A590000620000__000000931B33AE68\n000000067F0000400200008A59000061C000-000000067F0000400200008A590000620000__000000931B9AFDF8\n000000067F0000400200008A590000620000-000000067F0000400200008A590000624000__0000001C725A2400\n000000067F0000400200008A590000620000-000000067F0000400200008A590000624000__0000001C760FA190\n000000067F0000400200008A590000620000-000000067F0000400200008A590000624000__00000038E67ABFA0\n000000067F0000400200008A590000620000-000000067F0000400200008A590000624000__0000003903F1CFE8\n000000067F0000400200008A590000620000-000000067F0000400200008A590000624000__0000003B99F7F8A0\n000000067F0000400200008A590000620000-000000067F0000400200008A590000624000__0000005D2FFFFB38\n000000067F0000400200008A590000620000-000000067F0000400200008A590000624000__00000073AD3FE6B8\n000000067F0000400200008A590000620000-000000067F0000400200008A590000624000__000000914E3F38F0\n000000067F0000400200008A590000620000-000000067F0000400200008A590000624000__000000931B33AE68\n000000067F0000400200008A590000620000-000000067F0000400200008A590000624000__000000931B9AFDF8\n000000067F0000400200008A590000622C03-000000067F0000400200008A59000062B5D9__0000000AEBD5F889-0000000B8B85DC91\n000000067F0000400200008A590000624000-000000067F0000400200008A590000628000__0000001C725A2400\n000000067F0000400200008A590000624000-000000067F0000400200008A590000628000__0000001C760FA190\n000000067F0000400200008A590000624000-000000067F0000400200008A590000628000__00000038E67ABFA0\n000000067F0000400200008A590000624000-000000067F0000400200008A590000628000__0000003903F1CFE8\n000000067F0000400200008A590000624000-000000067F0000400200008A590000628000__0000003B99F7F8A0\n000000067F0000400200008A590000624000-000000067F0000400200008A590000628000__0000005D2FFFFB38\n000000067F0000400200008A590000624000-000000067F0000400200008A590000628000__00000073AD3FE6B8\n000000067F0000400200008A590000624000-000000067F0000400200008A590000628000__000000914E3F38F0\n000000067F0000400200008A590000624000-000000067F0000400200008A590000628000__000000931B33AE68\n000000067F0000400200008A590000624000-000000067F0000400200008A590000628000__000000931B9AFDF8\n000000067F0000400200008A590000628000-000000067F0000400200008A59000062C000__0000001C725A2400\n000000067F0000400200008A590000628000-000000067F0000400200008A59000062C000__0000001C760FA190\n000000067F0000400200008A590000628000-000000067F0000400200008A59000062C000__00000038E67ABFA0\n000000067F0000400200008A590000628000-000000067F0000400200008A59000062C000__0000003903F1CFE8\n000000067F0000400200008A590000628000-000000067F0000400200008A59000062C000__0000003B99F7F8A0\n000000067F0000400200008A590000628000-000000067F0000400200008A59000062C000__0000005D2FFFFB38\n000000067F0000400200008A590000628000-000000067F0000400200008A59000062C000__00000073AD3FE6B8\n000000067F0000400200008A590000628000-000000067F0000400200008A59000062C000__000000914E3F38F0\n000000067F0000400200008A590000628000-000000067F0000400200008A59000062C000__000000931B33AE68\n000000067F0000400200008A590000628000-000000067F0000400200008A59000062C000__000000931B9AFDF8\n000000067F0000400200008A59000062B5D9-000000067F0000400200008A590000633FB7__0000000AEBD5F889-0000000B8B85DC91\n000000067F0000400200008A59000062C000-000000067F0000400200008A590000630000__0000001C725A2400\n000000067F0000400200008A59000062C000-000000067F0000400200008A590000630000__0000001C760FA190\n000000067F0000400200008A59000062C000-000000067F0000400200008A590000630000__00000038E67ABFA0\n000000067F0000400200008A59000062C000-000000067F0000400200008A590000630000__0000003903F1CFE8\n000000067F0000400200008A59000062C000-000000067F0000400200008A590000630000__0000003B99F7F8A0\n000000067F0000400200008A59000062C000-000000067F0000400200008A590000630000__0000005D2FFFFB38\n000000067F0000400200008A59000062C000-000000067F0000400200008A590000630000__00000073AD3FE6B8\n000000067F0000400200008A59000062C000-000000067F0000400200008A590000630000__000000914E3F38F0\n000000067F0000400200008A59000062C000-000000067F0000400200008A590000630000__000000931B33AE68\n000000067F0000400200008A59000062C000-000000067F0000400200008A590000630000__000000931B9AFDF8\n000000067F0000400200008A590000630000-000000067F0000400200008A590000634000__0000001C725A2400\n000000067F0000400200008A590000630000-000000067F0000400200008A590000634000__0000001C760FA190\n000000067F0000400200008A590000630000-000000067F0000400200008A590000634000__00000038E67ABFA0\n000000067F0000400200008A590000630000-000000067F0000400200008A590000634000__0000003903F1CFE8\n000000067F0000400200008A590000630000-000000067F0000400200008A590000634000__0000003B99F7F8A0\n000000067F0000400200008A590000630000-000000067F0000400200008A590000634000__0000005D2FFFFB38\n000000067F0000400200008A590000630000-000000067F0000400200008A590000634000__00000073AD3FE6B8\n000000067F0000400200008A590000630000-000000067F0000400200008A590000634000__000000914E3F38F0\n000000067F0000400200008A590000630000-000000067F0000400200008A590000634000__000000931B33AE68\n000000067F0000400200008A590000630000-000000067F0000400200008A590000634000__000000931B9AFDF8\n000000067F0000400200008A590000633FB7-000000067F0000400200008A59000063C989__0000000AEBD5F889-0000000B8B85DC91\n000000067F0000400200008A590000634000-000000067F0000400200008A590000638000__0000001C725A2400\n000000067F0000400200008A590000634000-000000067F0000400200008A590000638000__0000001C760FA190\n000000067F0000400200008A590000634000-000000067F0000400200008A590000638000__00000038E67ABFA0\n000000067F0000400200008A590000634000-000000067F0000400200008A590000638000__0000003903F1CFE8\n000000067F0000400200008A590000634000-000000067F0000400200008A590000638000__0000003B99F7F8A0\n000000067F0000400200008A590000634000-000000067F0000400200008A590000638000__0000005D2FFFFB38\n000000067F0000400200008A590000634000-000000067F0000400200008A590000638000__00000073AD3FE6B8\n000000067F0000400200008A590000634000-000000067F0000400200008A590000638000__000000914E3F38F0\n000000067F0000400200008A590000634000-000000067F0000400200008A590000638000__000000931B33AE68\n000000067F0000400200008A590000634000-000000067F0000400200008A590000638000__000000931B9AFDF8\n000000067F0000400200008A590000638000-000000067F0000400200008A59000063C000__0000001C725A2400\n000000067F0000400200008A590000638000-000000067F0000400200008A59000063C000__0000001C760FA190\n000000067F0000400200008A590000638000-000000067F0000400200008A59000063C000__00000038E67ABFA0\n000000067F0000400200008A590000638000-000000067F0000400200008A59000063C000__0000003903F1CFE8\n000000067F0000400200008A590000638000-000000067F0000400200008A59000063C000__0000003B99F7F8A0\n000000067F0000400200008A590000638000-000000067F0000400200008A59000063C000__0000005D2FFFFB38\n000000067F0000400200008A590000638000-000000067F0000400200008A59000063C000__00000073AD3FE6B8\n000000067F0000400200008A590000638000-000000067F0000400200008A59000063C000__000000914E3F38F0\n000000067F0000400200008A590000638000-000000067F0000400200008A59000063C000__000000931B33AE68\n000000067F0000400200008A590000638000-000000067F0000400200008A59000063C000__000000931B9AFDF8\n000000067F0000400200008A59000063C000-000000067F0000400200008A590000640000__0000000C539FF890\n000000067F0000400200008A59000063C000-000000067F0000400200008A590000640000__0000001C760FA190\n000000067F0000400200008A59000063C000-000000067F0000400200008A590000640000__00000038E67ABFA0\n000000067F0000400200008A59000063C000-000000067F0000400200008A590000640000__0000003903F1CFE8\n000000067F0000400200008A59000063C000-000000067F0000400200008A590000640000__0000003B99F7F8A0\n000000067F0000400200008A59000063C000-000000067F0000400200008A590000640000__0000005D2FFFFB38\n000000067F0000400200008A59000063C000-000000067F0000400200008A590000640000__00000073AD3FE6B8\n000000067F0000400200008A59000063C000-000000067F0000400200008A590000640000__000000914E3F38F0\n000000067F0000400200008A59000063C000-000000067F0000400200008A590000640000__000000931B33AE68\n000000067F0000400200008A59000063C000-000000067F0000400200008A590000640000__000000931B9AFDF8\n000000067F0000400200008A59000063C989-000000067F0000400200008A590100000000__0000000AEBD5F889-0000000B8B85DC91\n000000067F0000400200008A59000063CC6C-000000067F0000400200008A590000645631__0000000B8B85DC91-0000000C3B2DF409\n000000067F0000400200008A590000640000-000000067F0000400200008A590000644000__0000000C539FF890\n000000067F0000400200008A590000640000-000000067F0000400200008A590000644000__0000001C760FA190\n000000067F0000400200008A590000640000-000000067F0000400200008A590000644000__00000038E67ABFA0\n000000067F0000400200008A590000640000-000000067F0000400200008A590000644000__0000003903F1CFE8\n000000067F0000400200008A590000640000-000000067F0000400200008A590000644000__0000003B99F7F8A0\n000000067F0000400200008A590000640000-000000067F0000400200008A590000644000__0000005D2FFFFB38\n000000067F0000400200008A590000640000-000000067F0000400200008A590000644000__00000073AD3FE6B8\n000000067F0000400200008A590000640000-000000067F0000400200008A590000644000__000000914E3F38F0\n000000067F0000400200008A590000640000-000000067F0000400200008A590000644000__000000931B33AE68\n000000067F0000400200008A590000640000-000000067F0000400200008A590000644000__000000931B9AFDF8\n000000067F0000400200008A590000644000-000000067F0000400200008A590000648000__0000000C539FF890\n000000067F0000400200008A590000644000-000000067F0000400200008A590000648000__0000001C760FA190\n000000067F0000400200008A590000644000-000000067F0000400200008A590000648000__00000038E67ABFA0\n000000067F0000400200008A590000644000-000000067F0000400200008A590000648000__0000003903F1CFE8\n000000067F0000400200008A590000644000-000000067F0000400200008A590000648000__0000003B99F7F8A0\n000000067F0000400200008A590000644000-000000067F0000400200008A590000648000__0000005D2FFFFB38\n000000067F0000400200008A590000644000-000000067F0000400200008A590000648000__00000073AD3FE6B8\n000000067F0000400200008A590000644000-000000067F0000400200008A590000648000__000000914E3F38F0\n000000067F0000400200008A590000644000-000000067F0000400200008A590000648000__000000931B33AE68\n000000067F0000400200008A590000644000-000000067F0000400200008A590000648000__000000931B9AFDF8\n000000067F0000400200008A590000645631-000000067F0000400200008A59000064E015__0000000B8B85DC91-0000000C3B2DF409\n000000067F0000400200008A590000648000-000000067F0000400200008A59000064C000__0000000C539FF890\n000000067F0000400200008A590000648000-000000067F0000400200008A59000064C000__0000001C760FA190\n000000067F0000400200008A590000648000-000000067F0000400200008A59000064C000__00000038E67ABFA0\n000000067F0000400200008A590000648000-000000067F0000400200008A59000064C000__0000003903F1CFE8\n000000067F0000400200008A590000648000-000000067F0000400200008A59000064C000__0000003B99F7F8A0\n000000067F0000400200008A590000648000-000000067F0000400200008A59000064C000__0000005D2FFFFB38\n000000067F0000400200008A590000648000-000000067F0000400200008A59000064C000__00000073AD3FE6B8\n000000067F0000400200008A590000648000-000000067F0000400200008A59000064C000__000000914E3F38F0\n000000067F0000400200008A590000648000-000000067F0000400200008A59000064C000__000000931B33AE68\n000000067F0000400200008A590000648000-000000067F0000400200008A59000064C000__000000931B9AFDF8\n000000067F0000400200008A59000064C000-000000067F0000400200008A590000650000__0000000C539FF890\n000000067F0000400200008A59000064C000-000000067F0000400200008A590000650000__0000001C760FA190\n000000067F0000400200008A59000064C000-000000067F0000400200008A590000650000__00000038E67ABFA0\n000000067F0000400200008A59000064C000-000000067F0000400200008A590000650000__0000003903F1CFE8\n000000067F0000400200008A59000064C000-000000067F0000400200008A590000650000__0000003B99F7F8A0\n000000067F0000400200008A59000064C000-000000067F0000400200008A590000650000__0000005D2FFFFB38\n000000067F0000400200008A59000064C000-000000067F0000400200008A590000650000__00000073AD3FE6B8\n000000067F0000400200008A59000064C000-000000067F0000400200008A590000650000__000000914E3F38F0\n000000067F0000400200008A59000064C000-000000067F0000400200008A590000650000__000000931B33AE68\n000000067F0000400200008A59000064C000-000000067F0000400200008A590000650000__000000931B9AFDF8\n000000067F0000400200008A59000064E015-000000067F0000400200008A5900006569FE__0000000B8B85DC91-0000000C3B2DF409\n000000067F0000400200008A590000650000-000000067F0000400200008A590000654000__0000000C539FF890\n000000067F0000400200008A590000650000-000000067F0000400200008A590000654000__0000001C760FA190\n000000067F0000400200008A590000650000-000000067F0000400200008A590000654000__00000038E67ABFA0\n000000067F0000400200008A590000650000-000000067F0000400200008A590000654000__0000003903F1CFE8\n000000067F0000400200008A590000650000-000000067F0000400200008A590000654000__0000003B99F7F8A0\n000000067F0000400200008A590000650000-000000067F0000400200008A590000654000__0000005D2FFFFB38\n000000067F0000400200008A590000650000-000000067F0000400200008A590000654000__00000073AD3FE6B8\n000000067F0000400200008A590000650000-000000067F0000400200008A590000654000__000000914E3F38F0\n000000067F0000400200008A590000650000-000000067F0000400200008A590000654000__000000931B33AE68\n000000067F0000400200008A590000650000-000000067F0000400200008A590000654000__000000931B9AFDF8\n000000067F0000400200008A590000654000-000000067F0000400200008A590000658000__0000000C539FF890\n000000067F0000400200008A590000654000-000000067F0000400200008A590000658000__0000001C760FA190\n000000067F0000400200008A590000654000-000000067F0000400200008A590000658000__00000038E67ABFA0\n000000067F0000400200008A590000654000-000000067F0000400200008A590000658000__0000003903F1CFE8\n000000067F0000400200008A590000654000-000000067F0000400200008A590000658000__0000003B99F7F8A0\n000000067F0000400200008A590000654000-000000067F0000400200008A590000658000__0000005D2FFFFB38\n000000067F0000400200008A590000654000-000000067F0000400200008A590000658000__00000073AD3FE6B8\n000000067F0000400200008A590000654000-000000067F0000400200008A590000658000__000000914E3F38F0\n000000067F0000400200008A590000654000-000000067F0000400200008A590000658000__000000931B33AE68\n000000067F0000400200008A590000654000-000000067F0000400200008A590000658000__000000931B9AFDF8\n000000067F0000400200008A5900006569FE-000000067F0000400200008A59000065F3ED__0000000B8B85DC91-0000000C3B2DF409\n000000067F0000400200008A590000658000-000000067F0000400200008A59000065C000__0000000C539FF890\n000000067F0000400200008A590000658000-000000067F0000400200008A59000065C000__0000001C760FA190\n000000067F0000400200008A590000658000-000000067F0000400200008A59000065C000__00000038E67ABFA0\n000000067F0000400200008A590000658000-000000067F0000400200008A59000065C000__0000003903F1CFE8\n000000067F0000400200008A590000658000-000000067F0000400200008A59000065C000__0000003B99F7F8A0\n000000067F0000400200008A590000658000-000000067F0000400200008A59000065C000__0000005D2FFFFB38\n000000067F0000400200008A590000658000-000000067F0000400200008A59000065C000__00000073AD3FE6B8\n000000067F0000400200008A590000658000-000000067F0000400200008A59000065C000__000000914E3F38F0\n000000067F0000400200008A590000658000-000000067F0000400200008A59000065C000__000000931B33AE68\n000000067F0000400200008A590000658000-000000067F0000400200008A59000065C000__000000931B9AFDF8\n000000067F0000400200008A59000065C000-000000067F0000400200008A590000660000__0000000C539FF890\n000000067F0000400200008A59000065C000-000000067F0000400200008A590000660000__0000001C760FA190\n000000067F0000400200008A59000065C000-000000067F0000400200008A590000660000__00000038E67ABFA0\n000000067F0000400200008A59000065C000-000000067F0000400200008A590000660000__0000003903F1CFE8\n000000067F0000400200008A59000065C000-000000067F0000400200008A590000660000__0000003B99F7F8A0\n000000067F0000400200008A59000065C000-000000067F0000400200008A590000660000__0000005D2FFFFB38\n000000067F0000400200008A59000065C000-000000067F0000400200008A590000660000__00000073AD3FE6B8\n000000067F0000400200008A59000065C000-000000067F0000400200008A590000660000__000000914E3F38F0\n000000067F0000400200008A59000065C000-000000067F0000400200008A590000660000__000000931B33AE68\n000000067F0000400200008A59000065C000-000000067F0000400200008A590000660000__000000931B9AFDF8\n000000067F0000400200008A59000065F3ED-000000067F0000400200008A590000667DD3__0000000B8B85DC91-0000000C3B2DF409\n000000067F0000400200008A590000660000-000000067F0000400200008A590000664000__0000000C539FF890\n000000067F0000400200008A590000660000-000000067F0000400200008A590000664000__0000001C760FA190\n000000067F0000400200008A590000660000-000000067F0000400200008A590000664000__00000038E67ABFA0\n000000067F0000400200008A590000660000-000000067F0000400200008A590000664000__0000003903F1CFE8\n000000067F0000400200008A590000660000-000000067F0000400200008A590000664000__0000003B99F7F8A0\n000000067F0000400200008A590000660000-000000067F0000400200008A590000664000__0000005D2FFFFB38\n000000067F0000400200008A590000660000-000000067F0000400200008A590000664000__00000073AD3FE6B8\n000000067F0000400200008A590000660000-000000067F0000400200008A590000664000__000000914E3F38F0\n000000067F0000400200008A590000660000-000000067F0000400200008A590000664000__000000931B33AE68\n000000067F0000400200008A590000660000-000000067F0000400200008A590000664000__000000931B9AFDF8\n000000067F0000400200008A590000664000-000000067F0000400200008A590000668000__0000000C539FF890\n000000067F0000400200008A590000664000-000000067F0000400200008A590000668000__0000001C760FA190\n000000067F0000400200008A590000664000-000000067F0000400200008A590000668000__00000038E67ABFA0\n000000067F0000400200008A590000664000-000000067F0000400200008A590000668000__0000003903F1CFE8\n000000067F0000400200008A590000664000-000000067F0000400200008A590000668000__0000003B99F7F8A0\n000000067F0000400200008A590000664000-000000067F0000400200008A590000668000__0000005D2FFFFB38\n000000067F0000400200008A590000664000-000000067F0000400200008A590000668000__00000073AD3FE6B8\n000000067F0000400200008A590000664000-000000067F0000400200008A590000668000__000000914E3F38F0\n000000067F0000400200008A590000664000-000000067F0000400200008A590000668000__000000931B33AE68\n000000067F0000400200008A590000664000-000000067F0000400200008A590000668000__000000931B9AFDF8\n000000067F0000400200008A590000667DD3-000000067F0000400200008A5900006707A7__0000000B8B85DC91-0000000C3B2DF409\n000000067F0000400200008A590000668000-000000067F0000400200008A59000066C000__0000000C539FF890\n000000067F0000400200008A590000668000-000000067F0000400200008A59000066C000__0000001C760FA190\n000000067F0000400200008A590000668000-000000067F0000400200008A59000066C000__00000038E67ABFA0\n000000067F0000400200008A590000668000-000000067F0000400200008A59000066C000__0000003903F1CFE8\n000000067F0000400200008A590000668000-000000067F0000400200008A59000066C000__0000003B99F7F8A0\n000000067F0000400200008A590000668000-000000067F0000400200008A59000066C000__0000005D2FFFFB38\n000000067F0000400200008A590000668000-000000067F0000400200008A59000066C000__00000073AD3FE6B8\n000000067F0000400200008A590000668000-000000067F0000400200008A59000066C000__000000914E3F38F0\n000000067F0000400200008A590000668000-000000067F0000400200008A59000066C000__000000931B33AE68\n000000067F0000400200008A590000668000-000000067F0000400200008A59000066C000__000000931B9AFDF8\n000000067F0000400200008A59000066C000-000000067F0000400200008A590000670000__0000000C539FF890\n000000067F0000400200008A59000066C000-000000067F0000400200008A590000670000__0000001C760FA190\n000000067F0000400200008A59000066C000-000000067F0000400200008A590000670000__00000038E67ABFA0\n000000067F0000400200008A59000066C000-000000067F0000400200008A590000670000__0000003903F1CFE8\n000000067F0000400200008A59000066C000-000000067F0000400200008A590000670000__0000003B99F7F8A0\n000000067F0000400200008A59000066C000-000000067F0000400200008A590000670000__0000005D2FFFFB38\n000000067F0000400200008A59000066C000-000000067F0000400200008A590000670000__00000073AD3FE6B8\n000000067F0000400200008A59000066C000-000000067F0000400200008A590000670000__000000914E3F38F0\n000000067F0000400200008A59000066C000-000000067F0000400200008A590000670000__000000931B33AE68\n000000067F0000400200008A59000066C000-000000067F0000400200008A590000670000__000000931B9AFDF8\n000000067F0000400200008A590000670000-000000067F0000400200008A590000674000__0000000C539FF890\n000000067F0000400200008A590000670000-000000067F0000400200008A590000674000__0000001C760FA190\n000000067F0000400200008A590000670000-000000067F0000400200008A590000674000__00000038E67ABFA0\n000000067F0000400200008A590000670000-000000067F0000400200008A590000674000__0000003903F1CFE8\n000000067F0000400200008A590000670000-000000067F0000400200008A590000674000__0000003B99F7F8A0\n000000067F0000400200008A590000670000-000000067F0000400200008A590000674000__0000005D2FFFFB38\n000000067F0000400200008A590000670000-000000067F0000400200008A590000674000__00000073AD3FE6B8\n000000067F0000400200008A590000670000-000000067F0000400200008A590000674000__000000914E3F38F0\n000000067F0000400200008A590000670000-000000067F0000400200008A590000674000__000000931B33AE68\n000000067F0000400200008A590000670000-000000067F0000400200008A590000674000__000000931B9AFDF8\n000000067F0000400200008A5900006707A7-000000067F0000400200008A59000067917A__0000000B8B85DC91-0000000C3B2DF409\n000000067F0000400200008A590000674000-000000067F0000400200008A590000678000__0000000C539FF890\n000000067F0000400200008A590000674000-000000067F0000400200008A590000678000__0000001C760FA190\n000000067F0000400200008A590000674000-000000067F0000400200008A590000678000__00000038E67ABFA0\n000000067F0000400200008A590000674000-000000067F0000400200008A590000678000__0000003903F1CFE8\n000000067F0000400200008A590000674000-000000067F0000400200008A590000678000__0000003B99F7F8A0\n000000067F0000400200008A590000674000-000000067F0000400200008A590000678000__0000005D2FFFFB38\n000000067F0000400200008A590000674000-000000067F0000400200008A590000678000__00000073AD3FE6B8\n000000067F0000400200008A590000674000-000000067F0000400200008A590000678000__000000914E3F38F0\n000000067F0000400200008A590000674000-000000067F0000400200008A590000678000__000000931B33AE68\n000000067F0000400200008A590000674000-000000067F0000400200008A590000678000__000000931B9AFDF8\n000000067F0000400200008A590000678000-000000067F0000400200008A59000067C000__0000000C539FF890\n000000067F0000400200008A590000678000-000000067F0000400200008A59000067C000__0000001C760FA190\n000000067F0000400200008A590000678000-000000067F0000400200008A59000067C000__00000038E67ABFA0\n000000067F0000400200008A590000678000-000000067F0000400200008A59000067C000__0000003903F1CFE8\n000000067F0000400200008A590000678000-000000067F0000400200008A59000067C000__0000003B99F7F8A0\n000000067F0000400200008A590000678000-000000067F0000400200008A59000067C000__0000005D2FFFFB38\n000000067F0000400200008A590000678000-000000067F0000400200008A59000067C000__00000073AD3FE6B8\n000000067F0000400200008A590000678000-000000067F0000400200008A59000067C000__000000914E3F38F0\n000000067F0000400200008A590000678000-000000067F0000400200008A59000067C000__000000931B33AE68\n000000067F0000400200008A590000678000-000000067F0000400200008A59000067C000__000000931B9AFDF8\n000000067F0000400200008A59000067917A-000000067F0000400200008A590000681B34__0000000B8B85DC91-0000000C3B2DF409\n000000067F0000400200008A59000067C000-000000067F0000400200008A590000680000__0000000C539FF890\n000000067F0000400200008A59000067C000-000000067F0000400200008A590000680000__0000001C760FA190\n000000067F0000400200008A59000067C000-000000067F0000400200008A590000680000__00000038E67ABFA0\n000000067F0000400200008A59000067C000-000000067F0000400200008A590000680000__0000003903F1CFE8\n000000067F0000400200008A59000067C000-000000067F0000400200008A590000680000__0000003B99F7F8A0\n000000067F0000400200008A59000067C000-000000067F0000400200008A590000680000__0000005D2FFFFB38\n000000067F0000400200008A59000067C000-000000067F0000400200008A590000680000__00000073AD3FE6B8\n000000067F0000400200008A59000067C000-000000067F0000400200008A590000680000__000000914E3F38F0\n000000067F0000400200008A59000067C000-000000067F0000400200008A590000680000__000000931B33AE68\n000000067F0000400200008A59000067C000-000000067F0000400200008A590000680000__000000931B9AFDF8\n000000067F0000400200008A590000680000-000000067F0000400200008A590000684000__0000000C539FF890\n000000067F0000400200008A590000680000-000000067F0000400200008A590000684000__0000001C760FA190\n000000067F0000400200008A590000680000-000000067F0000400200008A590000684000__00000038E67ABFA0\n000000067F0000400200008A590000680000-000000067F0000400200008A590000684000__0000003903F1CFE8\n000000067F0000400200008A590000680000-000000067F0000400200008A590000684000__0000003B99F7F8A0\n000000067F0000400200008A590000680000-000000067F0000400200008A590000684000__0000005D2FFFFB38\n000000067F0000400200008A590000680000-000000067F0000400200008A590000684000__00000073AD3FE6B8\n000000067F0000400200008A590000680000-000000067F0000400200008A590000684000__000000914E3F38F0\n000000067F0000400200008A590000680000-000000067F0000400200008A590000684000__000000931B33AE68\n000000067F0000400200008A590000680000-000000067F0000400200008A590000684000__000000931B9AFDF8\n000000067F0000400200008A590000681B34-000000067F0000400200008A59000068A51E__0000000B8B85DC91-0000000C3B2DF409\n000000067F0000400200008A590000684000-000000067F0000400200008A590000688000__0000000C539FF890\n000000067F0000400200008A590000684000-000000067F0000400200008A590000688000__0000001C760FA190\n000000067F0000400200008A590000684000-000000067F0000400200008A590000688000__00000038E67ABFA0\n000000067F0000400200008A590000684000-000000067F0000400200008A590000688000__0000003903F1CFE8\n000000067F0000400200008A590000684000-000000067F0000400200008A590000688000__0000003B99F7F8A0\n000000067F0000400200008A590000684000-000000067F0000400200008A590000688000__0000005D2FFFFB38\n000000067F0000400200008A590000684000-000000067F0000400200008A590000688000__00000073AD3FE6B8\n000000067F0000400200008A590000684000-000000067F0000400200008A590000688000__000000914E3F38F0\n000000067F0000400200008A590000684000-000000067F0000400200008A590000688000__000000931B33AE68\n000000067F0000400200008A590000684000-000000067F0000400200008A590000688000__000000931B9AFDF8\n000000067F0000400200008A590000688000-000000067F0000400200008A59000068C000__0000000C539FF890\n000000067F0000400200008A590000688000-000000067F0000400200008A59000068C000__0000001C760FA190\n000000067F0000400200008A590000688000-000000067F0000400200008A59000068C000__00000038E67ABFA0\n000000067F0000400200008A590000688000-000000067F0000400200008A59000068C000__0000003903F1CFE8\n000000067F0000400200008A590000688000-000000067F0000400200008A59000068C000__0000003B99F7F8A0\n000000067F0000400200008A590000688000-000000067F0000400200008A59000068C000__0000005D2FFFFB38\n000000067F0000400200008A590000688000-000000067F0000400200008A59000068C000__00000073AD3FE6B8\n000000067F0000400200008A590000688000-000000067F0000400200008A59000068C000__000000914E3F38F0\n000000067F0000400200008A590000688000-000000067F0000400200008A59000068C000__000000931B33AE68\n000000067F0000400200008A590000688000-000000067F0000400200008A59000068C000__000000931B9AFDF8\n000000067F0000400200008A59000068A51E-000000067F0000400200008A590000692F04__0000000B8B85DC91-0000000C3B2DF409\n000000067F0000400200008A59000068C000-000000067F0000400200008A590000690000__0000000C539FF890\n000000067F0000400200008A59000068C000-000000067F0000400200008A590000690000__0000001C760FA190\n000000067F0000400200008A59000068C000-000000067F0000400200008A590000690000__00000038E67ABFA0\n000000067F0000400200008A59000068C000-000000067F0000400200008A590000690000__0000003903F1CFE8\n000000067F0000400200008A59000068C000-000000067F0000400200008A590000690000__0000003B99F7F8A0\n000000067F0000400200008A59000068C000-000000067F0000400200008A590000690000__0000005D2FFFFB38\n000000067F0000400200008A59000068C000-000000067F0000400200008A590000690000__00000073AD3FE6B8\n000000067F0000400200008A59000068C000-000000067F0000400200008A590000690000__000000914E3F38F0\n000000067F0000400200008A59000068C000-000000067F0000400200008A590000690000__000000931B33AE68\n000000067F0000400200008A59000068C000-000000067F0000400200008A590000690000__000000931B9AFDF8\n000000067F0000400200008A590000690000-000000067F0000400200008A590000694000__0000000C539FF890\n000000067F0000400200008A590000690000-000000067F0000400200008A590000694000__0000001C760FA190\n000000067F0000400200008A590000690000-000000067F0000400200008A590000694000__00000038E67ABFA0\n000000067F0000400200008A590000690000-000000067F0000400200008A590000694000__0000003903F1CFE8\n000000067F0000400200008A590000690000-000000067F0000400200008A590000694000__0000003B99F7F8A0\n000000067F0000400200008A590000690000-000000067F0000400200008A590000694000__0000005D2FFFFB38\n000000067F0000400200008A590000690000-000000067F0000400200008A590000694000__00000073AD3FE6B8\n000000067F0000400200008A590000690000-000000067F0000400200008A590000694000__000000914E3F38F0\n000000067F0000400200008A590000690000-000000067F0000400200008A590000694000__000000931B33AE68\n000000067F0000400200008A590000690000-000000067F0000400200008A590000694000__000000931B9AFDF8\n000000067F0000400200008A590000692F04-000000067F0000400200008A59000069B8DE__0000000B8B85DC91-0000000C3B2DF409\n000000067F0000400200008A590000694000-000000067F0000400200008A590000698000__0000000C539FF890\n000000067F0000400200008A590000694000-000000067F0000400200008A590000698000__0000001C760FA190\n000000067F0000400200008A590000694000-000000067F0000400200008A590000698000__00000038E67ABFA0\n000000067F0000400200008A590000694000-000000067F0000400200008A590000698000__0000003903F1CFE8\n000000067F0000400200008A590000694000-000000067F0000400200008A590000698000__0000003B99F7F8A0\n000000067F0000400200008A590000694000-000000067F0000400200008A590000698000__0000005D2FFFFB38\n000000067F0000400200008A590000694000-000000067F0000400200008A590000698000__00000073AD3FE6B8\n000000067F0000400200008A590000694000-000000067F0000400200008A590000698000__000000914E3F38F0\n000000067F0000400200008A590000694000-000000067F0000400200008A590000698000__000000931B33AE68\n000000067F0000400200008A590000694000-000000067F0000400200008A590000698000__000000931B9AFDF8\n000000067F0000400200008A590000698000-000000067F0000400200008A59000069C000__0000000C539FF890\n000000067F0000400200008A590000698000-000000067F0000400200008A59000069C000__0000001C760FA190\n000000067F0000400200008A590000698000-000000067F0000400200008A59000069C000__00000038E67ABFA0\n000000067F0000400200008A590000698000-000000067F0000400200008A59000069C000__0000003903F1CFE8\n000000067F0000400200008A590000698000-000000067F0000400200008A59000069C000__0000003B99F7F8A0\n000000067F0000400200008A590000698000-000000067F0000400200008A59000069C000__0000005D2FFFFB38\n000000067F0000400200008A590000698000-000000067F0000400200008A59000069C000__00000073AD3FE6B8\n000000067F0000400200008A590000698000-000000067F0000400200008A59000069C000__000000914E3F38F0\n000000067F0000400200008A590000698000-000000067F0000400200008A59000069C000__000000931B33AE68\n000000067F0000400200008A590000698000-000000067F0000400200008A59000069C000__000000931B9AFDF8\n000000067F0000400200008A59000069B8DE-000000067F0000400200008A590100000000__0000000B8B85DC91-0000000C3B2DF409\n000000067F0000400200008A59000069BC09-000000067F0000400200008A5900006A45D6__0000000C3B2DF409-0000000CDADDDFC9\n000000067F0000400200008A59000069C000-000000067F0000400200008A5900006A0000__0000000C539FF890\n000000067F0000400200008A59000069C000-000000067F0000400200008A5900006A0000__0000001C760FA190\n000000067F0000400200008A59000069C000-000000067F0000400200008A5900006A0000__00000038E67ABFA0\n000000067F0000400200008A59000069C000-000000067F0000400200008A5900006A0000__0000003903F1CFE8\n000000067F0000400200008A59000069C000-000000067F0000400200008A5900006A0000__0000003B99F7F8A0\n000000067F0000400200008A59000069C000-000000067F0000400200008A5900006A0000__0000005D2FFFFB38\n000000067F0000400200008A59000069C000-000000067F0000400200008A5900006A0000__00000073AD3FE6B8\n000000067F0000400200008A59000069C000-000000067F0000400200008A5900006A0000__000000914E3F38F0\n000000067F0000400200008A59000069C000-000000067F0000400200008A5900006A0000__000000931B33AE68\n000000067F0000400200008A59000069C000-000000067F0000400200008A5900006A0000__000000931B9AFDF8\n000000067F0000400200008A5900006A0000-000000067F0000400200008A5900006A4000__0000000C539FF890\n000000067F0000400200008A5900006A0000-000000067F0000400200008A5900006A4000__0000001C760FA190\n000000067F0000400200008A5900006A0000-000000067F0000400200008A5900006A4000__00000038E67ABFA0\n000000067F0000400200008A5900006A0000-000000067F0000400200008A5900006A4000__0000003903F1CFE8\n000000067F0000400200008A5900006A0000-000000067F0000400200008A5900006A4000__0000003B99F7F8A0\n000000067F0000400200008A5900006A0000-000000067F0000400200008A5900006A4000__0000005D2FFFFB38\n000000067F0000400200008A5900006A0000-000000067F0000400200008A5900006A4000__00000073AD3FE6B8\n000000067F0000400200008A5900006A0000-000000067F0000400200008A5900006A4000__000000914E3F38F0\n000000067F0000400200008A5900006A0000-000000067F0000400200008A5900006A4000__000000931B33AE68\n000000067F0000400200008A5900006A0000-000000067F0000400200008A5900006A4000__000000931B9AFDF8\n000000067F0000400200008A5900006A4000-000000067F0000400200008A5900006A8000__0000000C539FF890\n000000067F0000400200008A5900006A4000-000000067F0000400200008A5900006A8000__0000001C760FA190\n000000067F0000400200008A5900006A4000-000000067F0000400200008A5900006A8000__00000038E67ABFA0\n000000067F0000400200008A5900006A4000-000000067F0000400200008A5900006A8000__0000003903F1CFE8\n000000067F0000400200008A5900006A4000-000000067F0000400200008A5900006A8000__0000003B99F7F8A0\n000000067F0000400200008A5900006A4000-000000067F0000400200008A5900006A8000__0000005D2FFFFB38\n000000067F0000400200008A5900006A4000-000000067F0000400200008A5900006A8000__00000073AD3FE6B8\n000000067F0000400200008A5900006A4000-000000067F0000400200008A5900006A8000__000000914E3F38F0\n000000067F0000400200008A5900006A4000-000000067F0000400200008A5900006A8000__000000931B33AE68\n000000067F0000400200008A5900006A4000-000000067F0000400200008A5900006A8000__000000931B9AFDF8\n000000067F0000400200008A5900006A45D6-000000067F0000400200008A5900006ACFB4__0000000C3B2DF409-0000000CDADDDFC9\n000000067F0000400200008A5900006A8000-000000067F0000400200008A5900006AC000__0000001C760FA190\n000000067F0000400200008A5900006A8000-000000067F0000400200008A5900006AC000__00000038E67ABFA0\n000000067F0000400200008A5900006A8000-000000067F0000400200008A5900006AC000__0000003903F1CFE8\n000000067F0000400200008A5900006A8000-000000067F0000400200008A5900006AC000__0000003B99F7F8A0\n000000067F0000400200008A5900006A8000-000000067F0000400200008A5900006AC000__0000005D2FFFFB38\n000000067F0000400200008A5900006A8000-000000067F0000400200008A5900006AC000__00000073AD3FE6B8\n000000067F0000400200008A5900006A8000-000000067F0000400200008A5900006AC000__000000914E3F38F0\n000000067F0000400200008A5900006A8000-000000067F0000400200008A5900006AC000__000000931B33AE68\n000000067F0000400200008A5900006A8000-000000067F0000400200008A5900006AC000__000000931B9AFDF8\n000000067F0000400200008A5900006A8000-030000000000000000000000000000000002__0000000C539FF890\n000000067F0000400200008A5900006AC000-000000067F0000400200008A5900006B0000__0000001C760FA190\n000000067F0000400200008A5900006AC000-000000067F0000400200008A5900006B0000__00000038E67ABFA0\n000000067F0000400200008A5900006AC000-000000067F0000400200008A5900006B0000__0000003903F1CFE8\n000000067F0000400200008A5900006AC000-000000067F0000400200008A5900006B0000__0000003B99F7F8A0\n000000067F0000400200008A5900006AC000-000000067F0000400200008A5900006B0000__0000005D2FFFFB38\n000000067F0000400200008A5900006AC000-000000067F0000400200008A5900006B0000__00000073AD3FE6B8\n000000067F0000400200008A5900006AC000-000000067F0000400200008A5900006B0000__000000914E3F38F0\n000000067F0000400200008A5900006AC000-000000067F0000400200008A5900006B0000__000000931B33AE68\n000000067F0000400200008A5900006AC000-000000067F0000400200008A5900006B0000__000000931B9AFDF8\n000000067F0000400200008A5900006ACFB4-000000067F0000400200008A5900006B598B__0000000C3B2DF409-0000000CDADDDFC9\n000000067F0000400200008A5900006B0000-000000067F0000400200008A5900006B4000__0000001C760FA190\n000000067F0000400200008A5900006B0000-000000067F0000400200008A5900006B4000__00000038E67ABFA0\n000000067F0000400200008A5900006B0000-000000067F0000400200008A5900006B4000__0000003903F1CFE8\n000000067F0000400200008A5900006B0000-000000067F0000400200008A5900006B4000__0000003B99F7F8A0\n000000067F0000400200008A5900006B0000-000000067F0000400200008A5900006B4000__0000005D2FFFFB38\n000000067F0000400200008A5900006B0000-000000067F0000400200008A5900006B4000__00000073AD3FE6B8\n000000067F0000400200008A5900006B0000-000000067F0000400200008A5900006B4000__000000914E3F38F0\n000000067F0000400200008A5900006B0000-000000067F0000400200008A5900006B4000__000000931B33AE68\n000000067F0000400200008A5900006B0000-000000067F0000400200008A5900006B4000__000000931B9AFDF8\n000000067F0000400200008A5900006B4000-000000067F0000400200008A5900006B8000__0000001C760FA190\n000000067F0000400200008A5900006B4000-000000067F0000400200008A5900006B8000__00000038E67ABFA0\n000000067F0000400200008A5900006B4000-000000067F0000400200008A5900006B8000__0000003903F1CFE8\n000000067F0000400200008A5900006B4000-000000067F0000400200008A5900006B8000__0000003B99F7F8A0\n000000067F0000400200008A5900006B4000-000000067F0000400200008A5900006B8000__0000005D2FFFFB38\n000000067F0000400200008A5900006B4000-000000067F0000400200008A5900006B8000__00000073AD3FE6B8\n000000067F0000400200008A5900006B4000-000000067F0000400200008A5900006B8000__000000914E3F38F0\n000000067F0000400200008A5900006B4000-000000067F0000400200008A5900006B8000__000000931B33AE68\n000000067F0000400200008A5900006B4000-000000067F0000400200008A5900006B8000__000000931B9AFDF8\n000000067F0000400200008A5900006B598B-000000067F0000400200008A5900006BE35A__0000000C3B2DF409-0000000CDADDDFC9\n000000067F0000400200008A5900006B8000-000000067F0000400200008A5900006BC000__0000001C760FA190\n000000067F0000400200008A5900006B8000-000000067F0000400200008A5900006BC000__00000038E67ABFA0\n000000067F0000400200008A5900006B8000-000000067F0000400200008A5900006BC000__0000003903F1CFE8\n000000067F0000400200008A5900006B8000-000000067F0000400200008A5900006BC000__0000003B99F7F8A0\n000000067F0000400200008A5900006B8000-000000067F0000400200008A5900006BC000__0000005D2FFFFB38\n000000067F0000400200008A5900006B8000-000000067F0000400200008A5900006BC000__00000073AD3FE6B8\n000000067F0000400200008A5900006B8000-000000067F0000400200008A5900006BC000__000000914E3F38F0\n000000067F0000400200008A5900006B8000-000000067F0000400200008A5900006BC000__000000931B33AE68\n000000067F0000400200008A5900006B8000-000000067F0000400200008A5900006BC000__000000931B9AFDF8\n000000067F0000400200008A5900006BC000-000000067F0000400200008A5900006C0000__0000001C760FA190\n000000067F0000400200008A5900006BC000-000000067F0000400200008A5900006C0000__00000038E67ABFA0\n000000067F0000400200008A5900006BC000-000000067F0000400200008A5900006C0000__0000003903F1CFE8\n000000067F0000400200008A5900006BC000-000000067F0000400200008A5900006C0000__0000003B99F7F8A0\n000000067F0000400200008A5900006BC000-000000067F0000400200008A5900006C0000__0000005D2FFFFB38\n000000067F0000400200008A5900006BC000-000000067F0000400200008A5900006C0000__00000073AD3FE6B8\n000000067F0000400200008A5900006BC000-000000067F0000400200008A5900006C0000__000000914E3F38F0\n000000067F0000400200008A5900006BC000-000000067F0000400200008A5900006C0000__000000931B33AE68\n000000067F0000400200008A5900006BC000-000000067F0000400200008A5900006C0000__000000931B9AFDF8\n000000067F0000400200008A5900006BE35A-000000067F0000400200008A5900006C6D3C__0000000C3B2DF409-0000000CDADDDFC9\n000000067F0000400200008A5900006C0000-000000067F0000400200008A5900006C4000__0000001C760FA190\n000000067F0000400200008A5900006C0000-000000067F0000400200008A5900006C4000__00000038E67ABFA0\n000000067F0000400200008A5900006C0000-000000067F0000400200008A5900006C4000__0000003903F1CFE8\n000000067F0000400200008A5900006C0000-000000067F0000400200008A5900006C4000__0000003B99F7F8A0\n000000067F0000400200008A5900006C0000-000000067F0000400200008A5900006C4000__0000005D2FFFFB38\n000000067F0000400200008A5900006C0000-000000067F0000400200008A5900006C4000__00000073AD3FE6B8\n000000067F0000400200008A5900006C0000-000000067F0000400200008A5900006C4000__000000914E3F38F0\n000000067F0000400200008A5900006C0000-000000067F0000400200008A5900006C4000__000000931B33AE68\n000000067F0000400200008A5900006C0000-000000067F0000400200008A5900006C4000__000000931B9AFDF8\n000000067F0000400200008A5900006C4000-000000067F0000400200008A5900006C8000__0000001C760FA190\n000000067F0000400200008A5900006C4000-000000067F0000400200008A5900006C8000__00000038E67ABFA0\n000000067F0000400200008A5900006C4000-000000067F0000400200008A5900006C8000__0000003903F1CFE8\n000000067F0000400200008A5900006C4000-000000067F0000400200008A5900006C8000__0000003B99F7F8A0\n000000067F0000400200008A5900006C4000-000000067F0000400200008A5900006C8000__0000005D2FFFFB38\n000000067F0000400200008A5900006C4000-000000067F0000400200008A5900006C8000__00000073AD3FE6B8\n000000067F0000400200008A5900006C4000-000000067F0000400200008A5900006C8000__000000914E3F38F0\n000000067F0000400200008A5900006C4000-000000067F0000400200008A5900006C8000__000000931B33AE68\n000000067F0000400200008A5900006C4000-000000067F0000400200008A5900006C8000__000000931B9AFDF8\n000000067F0000400200008A5900006C6D3C-000000067F0000400200008A5900006CF724__0000000C3B2DF409-0000000CDADDDFC9\n000000067F0000400200008A5900006C8000-000000067F0000400200008A5900006CC000__0000001C760FA190\n000000067F0000400200008A5900006C8000-000000067F0000400200008A5900006CC000__00000038E67ABFA0\n000000067F0000400200008A5900006C8000-000000067F0000400200008A5900006CC000__0000003903F1CFE8\n000000067F0000400200008A5900006C8000-000000067F0000400200008A5900006CC000__0000003B99F7F8A0\n000000067F0000400200008A5900006C8000-000000067F0000400200008A5900006CC000__0000005D2FFFFB38\n000000067F0000400200008A5900006C8000-000000067F0000400200008A5900006CC000__00000073AD3FE6B8\n000000067F0000400200008A5900006C8000-000000067F0000400200008A5900006CC000__000000914E3F38F0\n000000067F0000400200008A5900006C8000-000000067F0000400200008A5900006CC000__000000931B33AE68\n000000067F0000400200008A5900006C8000-000000067F0000400200008A5900006CC000__000000931B9AFDF8\n000000067F0000400200008A5900006CC000-000000067F0000400200008A5900006D0000__0000001C760FA190\n000000067F0000400200008A5900006CC000-000000067F0000400200008A5900006D0000__00000038E67ABFA0\n000000067F0000400200008A5900006CC000-000000067F0000400200008A5900006D0000__0000003903F1CFE8\n000000067F0000400200008A5900006CC000-000000067F0000400200008A5900006D0000__0000003B99F7F8A0\n000000067F0000400200008A5900006CC000-000000067F0000400200008A5900006D0000__0000005D2FFFFB38\n000000067F0000400200008A5900006CC000-000000067F0000400200008A5900006D0000__00000073AD3FE6B8\n000000067F0000400200008A5900006CC000-000000067F0000400200008A5900006D0000__000000914E3F38F0\n000000067F0000400200008A5900006CC000-000000067F0000400200008A5900006D0000__000000931B33AE68\n000000067F0000400200008A5900006CC000-000000067F0000400200008A5900006D0000__000000931B9AFDF8\n000000067F0000400200008A5900006CF724-000000067F0000400200008A5900006D80FB__0000000C3B2DF409-0000000CDADDDFC9\n000000067F0000400200008A5900006D0000-000000067F0000400200008A5900006D4000__0000001C760FA190\n000000067F0000400200008A5900006D0000-000000067F0000400200008A5900006D4000__00000038E67ABFA0\n000000067F0000400200008A5900006D0000-000000067F0000400200008A5900006D4000__0000003903F1CFE8\n000000067F0000400200008A5900006D0000-000000067F0000400200008A5900006D4000__0000003B99F7F8A0\n000000067F0000400200008A5900006D0000-000000067F0000400200008A5900006D4000__0000005D2FFFFB38\n000000067F0000400200008A5900006D0000-000000067F0000400200008A5900006D4000__00000073AD3FE6B8\n000000067F0000400200008A5900006D0000-000000067F0000400200008A5900006D4000__000000914E3F38F0\n000000067F0000400200008A5900006D0000-000000067F0000400200008A5900006D4000__000000931B33AE68\n000000067F0000400200008A5900006D0000-000000067F0000400200008A5900006D4000__000000931B9AFDF8\n000000067F0000400200008A5900006D4000-000000067F0000400200008A5900006D8000__0000001C760FA190\n000000067F0000400200008A5900006D4000-000000067F0000400200008A5900006D8000__00000038E67ABFA0\n000000067F0000400200008A5900006D4000-000000067F0000400200008A5900006D8000__0000003903F1CFE8\n000000067F0000400200008A5900006D4000-000000067F0000400200008A5900006D8000__0000003B99F7F8A0\n000000067F0000400200008A5900006D4000-000000067F0000400200008A5900006D8000__0000005D2FFFFB38\n000000067F0000400200008A5900006D4000-000000067F0000400200008A5900006D8000__00000073AD3FE6B8\n000000067F0000400200008A5900006D4000-000000067F0000400200008A5900006D8000__000000914E3F38F0\n000000067F0000400200008A5900006D4000-000000067F0000400200008A5900006D8000__000000931B33AE68\n000000067F0000400200008A5900006D4000-000000067F0000400200008A5900006D8000__000000931B9AFDF8\n000000067F0000400200008A5900006D8000-000000067F0000400200008A5900006DC000__0000001C760FA190\n000000067F0000400200008A5900006D8000-000000067F0000400200008A5900006DC000__00000038E67ABFA0\n000000067F0000400200008A5900006D8000-000000067F0000400200008A5900006DC000__0000003903F1CFE8\n000000067F0000400200008A5900006D8000-000000067F0000400200008A5900006DC000__0000003B99F7F8A0\n000000067F0000400200008A5900006D8000-000000067F0000400200008A5900006DC000__0000005D2FFFFB38\n000000067F0000400200008A5900006D8000-000000067F0000400200008A5900006DC000__00000073AD3FE6B8\n000000067F0000400200008A5900006D8000-000000067F0000400200008A5900006DC000__000000914E3F38F0\n000000067F0000400200008A5900006D8000-000000067F0000400200008A5900006DC000__000000931B33AE68\n000000067F0000400200008A5900006D8000-000000067F0000400200008A5900006DC000__000000931B9AFDF8\n000000067F0000400200008A5900006D80FB-000000067F0000400200008A5900006E0AD6__0000000C3B2DF409-0000000CDADDDFC9\n000000067F0000400200008A5900006DC000-000000067F0000400200008A5900006E0000__0000001C760FA190\n000000067F0000400200008A5900006DC000-000000067F0000400200008A5900006E0000__00000038E67ABFA0\n000000067F0000400200008A5900006DC000-000000067F0000400200008A5900006E0000__0000003903F1CFE8\n000000067F0000400200008A5900006DC000-000000067F0000400200008A5900006E0000__0000003B99F7F8A0\n000000067F0000400200008A5900006DC000-000000067F0000400200008A5900006E0000__0000005D2FFFFB38\n000000067F0000400200008A5900006DC000-000000067F0000400200008A5900006E0000__00000073AD3FE6B8\n000000067F0000400200008A5900006DC000-000000067F0000400200008A5900006E0000__000000914E3F38F0\n000000067F0000400200008A5900006DC000-000000067F0000400200008A5900006E0000__000000931B33AE68\n000000067F0000400200008A5900006DC000-000000067F0000400200008A5900006E0000__000000931B9AFDF8\n000000067F0000400200008A5900006E0000-000000067F0000400200008A5900006E4000__0000001C760FA190\n000000067F0000400200008A5900006E0000-000000067F0000400200008A5900006E4000__00000038E67ABFA0\n000000067F0000400200008A5900006E0000-000000067F0000400200008A5900006E4000__0000003903F1CFE8\n000000067F0000400200008A5900006E0000-000000067F0000400200008A5900006E4000__0000003B99F7F8A0\n000000067F0000400200008A5900006E0000-000000067F0000400200008A5900006E4000__0000005D2FFFFB38\n000000067F0000400200008A5900006E0000-000000067F0000400200008A5900006E4000__00000073AD3FE6B8\n000000067F0000400200008A5900006E0000-000000067F0000400200008A5900006E4000__000000914E3F38F0\n000000067F0000400200008A5900006E0000-000000067F0000400200008A5900006E4000__000000931B33AE68\n000000067F0000400200008A5900006E0000-000000067F0000400200008A5900006E4000__000000931B9AFDF8\n000000067F0000400200008A5900006E0AD6-000000067F0000400200008A5900006E94BB__0000000C3B2DF409-0000000CDADDDFC9\n000000067F0000400200008A5900006E4000-000000067F0000400200008A5900006E8000__0000001C760FA190\n000000067F0000400200008A5900006E4000-000000067F0000400200008A5900006E8000__00000038E67ABFA0\n000000067F0000400200008A5900006E4000-000000067F0000400200008A5900006E8000__0000003903F1CFE8\n000000067F0000400200008A5900006E4000-000000067F0000400200008A5900006E8000__0000003B99F7F8A0\n000000067F0000400200008A5900006E4000-000000067F0000400200008A5900006E8000__0000005D2FFFFB38\n000000067F0000400200008A5900006E4000-000000067F0000400200008A5900006E8000__00000073AD3FE6B8\n000000067F0000400200008A5900006E4000-000000067F0000400200008A5900006E8000__000000914E3F38F0\n000000067F0000400200008A5900006E4000-000000067F0000400200008A5900006E8000__000000931B33AE68\n000000067F0000400200008A5900006E4000-000000067F0000400200008A5900006E8000__000000931B9AFDF8\n000000067F0000400200008A5900006E8000-000000067F0000400200008A5900006EC000__0000001C760FA190\n000000067F0000400200008A5900006E8000-000000067F0000400200008A5900006EC000__00000038E67ABFA0\n000000067F0000400200008A5900006E8000-000000067F0000400200008A5900006EC000__0000003903F1CFE8\n000000067F0000400200008A5900006E8000-000000067F0000400200008A5900006EC000__0000003B99F7F8A0\n000000067F0000400200008A5900006E8000-000000067F0000400200008A5900006EC000__0000005D2FFFFB38\n000000067F0000400200008A5900006E8000-000000067F0000400200008A5900006EC000__00000073AD3FE6B8\n000000067F0000400200008A5900006E8000-000000067F0000400200008A5900006EC000__000000914E3F38F0\n000000067F0000400200008A5900006E8000-000000067F0000400200008A5900006EC000__000000931B33AE68\n000000067F0000400200008A5900006E8000-000000067F0000400200008A5900006EC000__000000931B9AFDF8\n000000067F0000400200008A5900006E94BB-000000067F0000400200008A5900006F1E92__0000000C3B2DF409-0000000CDADDDFC9\n000000067F0000400200008A5900006EC000-000000067F0000400200008A5900006F0000__0000001C760FA190\n000000067F0000400200008A5900006EC000-000000067F0000400200008A5900006F0000__00000038E67ABFA0\n000000067F0000400200008A5900006EC000-000000067F0000400200008A5900006F0000__0000003903F1CFE8\n000000067F0000400200008A5900006EC000-000000067F0000400200008A5900006F0000__0000003B99F7F8A0\n000000067F0000400200008A5900006EC000-000000067F0000400200008A5900006F0000__0000005D2FFFFB38\n000000067F0000400200008A5900006EC000-000000067F0000400200008A5900006F0000__00000073AD3FE6B8\n000000067F0000400200008A5900006EC000-000000067F0000400200008A5900006F0000__000000914E3F38F0\n000000067F0000400200008A5900006EC000-000000067F0000400200008A5900006F0000__000000931B33AE68\n000000067F0000400200008A5900006EC000-000000067F0000400200008A5900006F0000__000000931B9AFDF8\n000000067F0000400200008A5900006F0000-000000067F0000400200008A5900006F4000__0000001C725A2400\n000000067F0000400200008A5900006F0000-000000067F0000400200008A5900006F4000__0000001C760FA190\n000000067F0000400200008A5900006F0000-000000067F0000400200008A5900006F4000__00000038E67ABFA0\n000000067F0000400200008A5900006F0000-000000067F0000400200008A5900006F4000__0000003903F1CFE8\n000000067F0000400200008A5900006F0000-000000067F0000400200008A5900006F4000__0000003B99F7F8A0\n000000067F0000400200008A5900006F0000-000000067F0000400200008A5900006F4000__0000005D2FFFFB38\n000000067F0000400200008A5900006F0000-000000067F0000400200008A5900006F4000__00000073AD3FE6B8\n000000067F0000400200008A5900006F0000-000000067F0000400200008A5900006F4000__000000914E3F38F0\n000000067F0000400200008A5900006F0000-000000067F0000400200008A5900006F4000__000000931B33AE68\n000000067F0000400200008A5900006F0000-000000067F0000400200008A5900006F4000__000000931B9AFDF8\n000000067F0000400200008A5900006F1E92-000000067F0000400200008A590100000000__0000000C3B2DF409-0000000CDADDDFC9\n000000067F0000400200008A5900006F215C-000000067F0000400200008A5900006FAB35__0000000CDADDDFC9-0000000D8A85D199\n000000067F0000400200008A5900006F4000-000000067F0000400200008A5900006F8000__0000001C725A2400\n000000067F0000400200008A5900006F4000-000000067F0000400200008A5900006F8000__0000001C760FA190\n000000067F0000400200008A5900006F4000-000000067F0000400200008A5900006F8000__00000038E67ABFA0\n000000067F0000400200008A5900006F4000-000000067F0000400200008A5900006F8000__0000003903F1CFE8\n000000067F0000400200008A5900006F4000-000000067F0000400200008A5900006F8000__0000003B99F7F8A0\n000000067F0000400200008A5900006F4000-000000067F0000400200008A5900006F8000__0000005D2FFFFB38\n000000067F0000400200008A5900006F4000-000000067F0000400200008A5900006F8000__00000073AD3FE6B8\n000000067F0000400200008A5900006F4000-000000067F0000400200008A5900006F8000__000000914E3F38F0\n000000067F0000400200008A5900006F4000-000000067F0000400200008A5900006F8000__000000931B33AE68\n000000067F0000400200008A5900006F4000-000000067F0000400200008A5900006F8000__000000931B9AFDF8\n000000067F0000400200008A5900006F8000-000000067F0000400200008A5900006FC000__0000001C725A2400\n000000067F0000400200008A5900006F8000-000000067F0000400200008A5900006FC000__0000001C760FA190\n000000067F0000400200008A5900006F8000-000000067F0000400200008A5900006FC000__00000038E67ABFA0\n000000067F0000400200008A5900006F8000-000000067F0000400200008A5900006FC000__0000003903F1CFE8\n000000067F0000400200008A5900006F8000-000000067F0000400200008A5900006FC000__0000003B99F7F8A0\n000000067F0000400200008A5900006F8000-000000067F0000400200008A5900006FC000__0000005D2FFFFB38\n000000067F0000400200008A5900006F8000-000000067F0000400200008A5900006FC000__00000073AD3FE6B8\n000000067F0000400200008A5900006F8000-000000067F0000400200008A5900006FC000__000000914E3F38F0\n000000067F0000400200008A5900006F8000-000000067F0000400200008A5900006FC000__000000931B33AE68\n000000067F0000400200008A5900006F8000-000000067F0000400200008A5900006FC000__000000931B9AFDF8\n000000067F0000400200008A5900006FAB35-000000067F0000400200008A590000703515__0000000CDADDDFC9-0000000D8A85D199\n000000067F0000400200008A5900006FC000-000000067F0000400200008A590000700000__0000001C725A2400\n000000067F0000400200008A5900006FC000-000000067F0000400200008A590000700000__0000001C760FA190\n000000067F0000400200008A5900006FC000-000000067F0000400200008A590000700000__00000038E67ABFA0\n000000067F0000400200008A5900006FC000-000000067F0000400200008A590000700000__0000003903F1CFE8\n000000067F0000400200008A5900006FC000-000000067F0000400200008A590000700000__0000003B99F7F8A0\n000000067F0000400200008A5900006FC000-000000067F0000400200008A590000700000__0000005D2FFFFB38\n000000067F0000400200008A5900006FC000-000000067F0000400200008A590000700000__00000073AD3FE6B8\n000000067F0000400200008A5900006FC000-000000067F0000400200008A590000700000__000000914E3F38F0\n000000067F0000400200008A5900006FC000-000000067F0000400200008A590000700000__000000931B33AE68\n000000067F0000400200008A5900006FC000-000000067F0000400200008A590000700000__000000931B9AFDF8\n000000067F0000400200008A590000700000-000000067F0000400200008A590000704000__0000001C725A2400\n000000067F0000400200008A590000700000-000000067F0000400200008A590000704000__0000001C760FA190\n000000067F0000400200008A590000700000-000000067F0000400200008A590000704000__00000038E67ABFA0\n000000067F0000400200008A590000700000-000000067F0000400200008A590000704000__0000003903F1CFE8\n000000067F0000400200008A590000700000-000000067F0000400200008A590000704000__0000003B99F7F8A0\n000000067F0000400200008A590000700000-000000067F0000400200008A590000704000__0000005D2FFFFB38\n000000067F0000400200008A590000700000-000000067F0000400200008A590000704000__00000073AD3FE6B8\n000000067F0000400200008A590000700000-000000067F0000400200008A590000704000__000000914E3F38F0\n000000067F0000400200008A590000700000-000000067F0000400200008A590000704000__000000931B33AE68\n000000067F0000400200008A590000700000-000000067F0000400200008A590000704000__000000931B9AFDF8\n000000067F0000400200008A590000703515-000000067F0000400200008A59000070BEF5__0000000CDADDDFC9-0000000D8A85D199\n000000067F0000400200008A590000704000-000000067F0000400200008A590000708000__0000001C725A2400\n000000067F0000400200008A590000704000-000000067F0000400200008A590000708000__0000001C760FA190\n000000067F0000400200008A590000704000-000000067F0000400200008A590000708000__00000038E67ABFA0\n000000067F0000400200008A590000704000-000000067F0000400200008A590000708000__0000003903F1CFE8\n000000067F0000400200008A590000704000-000000067F0000400200008A590000708000__0000003B99F7F8A0\n000000067F0000400200008A590000704000-000000067F0000400200008A590000708000__0000005D2FFFFB38\n000000067F0000400200008A590000704000-000000067F0000400200008A590000708000__00000073AD3FE6B8\n000000067F0000400200008A590000704000-000000067F0000400200008A590000708000__000000914E3F38F0\n000000067F0000400200008A590000704000-000000067F0000400200008A590000708000__000000931B33AE68\n000000067F0000400200008A590000704000-000000067F0000400200008A590000708000__000000931B9AFDF8\n000000067F0000400200008A590000708000-000000067F0000400200008A59000070C000__0000001C725A2400\n000000067F0000400200008A590000708000-000000067F0000400200008A59000070C000__0000001C760FA190\n000000067F0000400200008A590000708000-000000067F0000400200008A59000070C000__00000038E67ABFA0\n000000067F0000400200008A590000708000-000000067F0000400200008A59000070C000__0000003903F1CFE8\n000000067F0000400200008A590000708000-000000067F0000400200008A59000070C000__0000003B99F7F8A0\n000000067F0000400200008A590000708000-000000067F0000400200008A59000070C000__0000005D2FFFFB38\n000000067F0000400200008A590000708000-000000067F0000400200008A59000070C000__00000073AD3FE6B8\n000000067F0000400200008A590000708000-000000067F0000400200008A59000070C000__000000914E3F38F0\n000000067F0000400200008A590000708000-000000067F0000400200008A59000070C000__000000931B33AE68\n000000067F0000400200008A590000708000-000000067F0000400200008A59000070C000__000000931B9AFDF8\n000000067F0000400200008A59000070BEF5-000000067F0000400200008A5900007148DC__0000000CDADDDFC9-0000000D8A85D199\n000000067F0000400200008A59000070C000-000000067F0000400200008A590000710000__0000001C725A2400\n000000067F0000400200008A59000070C000-000000067F0000400200008A590000710000__0000001C760FA190\n000000067F0000400200008A59000070C000-000000067F0000400200008A590000710000__00000038E67ABFA0\n000000067F0000400200008A59000070C000-000000067F0000400200008A590000710000__0000003903F1CFE8\n000000067F0000400200008A59000070C000-000000067F0000400200008A590000710000__0000003B99F7F8A0\n000000067F0000400200008A59000070C000-000000067F0000400200008A590000710000__0000005D2FFFFB38\n000000067F0000400200008A59000070C000-000000067F0000400200008A590000710000__00000073AD3FE6B8\n000000067F0000400200008A59000070C000-000000067F0000400200008A590000710000__000000914E3F38F0\n000000067F0000400200008A59000070C000-000000067F0000400200008A590000710000__000000931B33AE68\n000000067F0000400200008A59000070C000-000000067F0000400200008A590000710000__000000931B9AFDF8\n000000067F0000400200008A590000710000-000000067F0000400200008A590000714000__0000001C725A2400\n000000067F0000400200008A590000710000-000000067F0000400200008A590000714000__0000001C760FA190\n000000067F0000400200008A590000710000-000000067F0000400200008A590000714000__00000038E67ABFA0\n000000067F0000400200008A590000710000-000000067F0000400200008A590000714000__0000003903F1CFE8\n000000067F0000400200008A590000710000-000000067F0000400200008A590000714000__0000003B99F7F8A0\n000000067F0000400200008A590000710000-000000067F0000400200008A590000714000__0000005D2FFFFB38\n000000067F0000400200008A590000710000-000000067F0000400200008A590000714000__00000073AD3FE6B8\n000000067F0000400200008A590000710000-000000067F0000400200008A590000714000__000000914E3F38F0\n000000067F0000400200008A590000710000-000000067F0000400200008A590000714000__000000931B33AE68\n000000067F0000400200008A590000710000-000000067F0000400200008A590000714000__000000931B9AFDF8\n000000067F0000400200008A590000714000-000000067F0000400200008A590000718000__0000001C725A2400\n000000067F0000400200008A590000714000-000000067F0000400200008A590000718000__0000001C760FA190\n000000067F0000400200008A590000714000-000000067F0000400200008A590000718000__00000038E67ABFA0\n000000067F0000400200008A590000714000-000000067F0000400200008A590000718000__0000003903F1CFE8\n000000067F0000400200008A590000714000-000000067F0000400200008A590000718000__0000003B99F7F8A0\n000000067F0000400200008A590000714000-000000067F0000400200008A590000718000__0000005D2FFFFB38\n000000067F0000400200008A590000714000-000000067F0000400200008A590000718000__00000073AD3FE6B8\n000000067F0000400200008A590000714000-000000067F0000400200008A590000718000__000000914E3F38F0\n000000067F0000400200008A590000714000-000000067F0000400200008A590000718000__000000931B33AE68\n000000067F0000400200008A590000714000-000000067F0000400200008A590000718000__000000931B9AFDF8\n000000067F0000400200008A5900007148DC-000000067F0000400200008A59000071D2BF__0000000CDADDDFC9-0000000D8A85D199\n000000067F0000400200008A590000718000-000000067F0000400200008A59000071C000__0000001C725A2400\n000000067F0000400200008A590000718000-000000067F0000400200008A59000071C000__0000001C760FA190\n000000067F0000400200008A590000718000-000000067F0000400200008A59000071C000__00000038E67ABFA0\n000000067F0000400200008A590000718000-000000067F0000400200008A59000071C000__0000003903F1CFE8\n000000067F0000400200008A590000718000-000000067F0000400200008A59000071C000__0000003B99F7F8A0\n000000067F0000400200008A590000718000-000000067F0000400200008A59000071C000__0000005D2FFFFB38\n000000067F0000400200008A590000718000-000000067F0000400200008A59000071C000__00000073AD3FE6B8\n000000067F0000400200008A590000718000-000000067F0000400200008A59000071C000__000000914E3F38F0\n000000067F0000400200008A590000718000-000000067F0000400200008A59000071C000__000000931B33AE68\n000000067F0000400200008A590000718000-000000067F0000400200008A59000071C000__000000931B9AFDF8\n000000067F0000400200008A59000071C000-000000067F0000400200008A590000720000__0000001C725A2400\n000000067F0000400200008A59000071C000-000000067F0000400200008A590000720000__0000001C760FA190\n000000067F0000400200008A59000071C000-000000067F0000400200008A590000720000__00000038E67ABFA0\n000000067F0000400200008A59000071C000-000000067F0000400200008A590000720000__0000003903F1CFE8\n000000067F0000400200008A59000071C000-000000067F0000400200008A590000720000__0000003B99F7F8A0\n000000067F0000400200008A59000071C000-000000067F0000400200008A590000720000__0000005D2FFFFB38\n000000067F0000400200008A59000071C000-000000067F0000400200008A590000720000__00000073AD3FE6B8\n000000067F0000400200008A59000071C000-000000067F0000400200008A590000720000__000000914E3F38F0\n000000067F0000400200008A59000071C000-000000067F0000400200008A590000720000__000000931B33AE68\n000000067F0000400200008A59000071C000-000000067F0000400200008A590000720000__000000931B9AFDF8\n000000067F0000400200008A59000071D2BF-000000067F0000400200008A590000725C94__0000000CDADDDFC9-0000000D8A85D199\n000000067F0000400200008A590000720000-000000067F0000400200008A590000724000__0000001C725A2400\n000000067F0000400200008A590000720000-000000067F0000400200008A590000724000__0000001C760FA190\n000000067F0000400200008A590000720000-000000067F0000400200008A590000724000__00000038E67ABFA0\n000000067F0000400200008A590000720000-000000067F0000400200008A590000724000__0000003903F1CFE8\n000000067F0000400200008A590000720000-000000067F0000400200008A590000724000__0000003B99F7F8A0\n000000067F0000400200008A590000720000-000000067F0000400200008A590000724000__0000005D2FFFFB38\n000000067F0000400200008A590000720000-000000067F0000400200008A590000724000__00000073AD3FE6B8\n000000067F0000400200008A590000720000-000000067F0000400200008A590000724000__000000914E3F38F0\n000000067F0000400200008A590000720000-000000067F0000400200008A590000724000__000000931B33AE68\n000000067F0000400200008A590000720000-000000067F0000400200008A590000724000__000000931B9AFDF8\n000000067F0000400200008A590000724000-000000067F0000400200008A590000728000__0000001C725A2400\n000000067F0000400200008A590000724000-000000067F0000400200008A590000728000__0000001C760FA190\n000000067F0000400200008A590000724000-000000067F0000400200008A590000728000__00000038E67ABFA0\n000000067F0000400200008A590000724000-000000067F0000400200008A590000728000__0000003903F1CFE8\n000000067F0000400200008A590000724000-000000067F0000400200008A590000728000__0000003B99F7F8A0\n000000067F0000400200008A590000724000-000000067F0000400200008A590000728000__0000005D2FFFFB38\n000000067F0000400200008A590000724000-000000067F0000400200008A590000728000__00000073AD3FE6B8\n000000067F0000400200008A590000724000-000000067F0000400200008A590000728000__000000914E3F38F0\n000000067F0000400200008A590000724000-000000067F0000400200008A590000728000__000000931B33AE68\n000000067F0000400200008A590000724000-000000067F0000400200008A590000728000__000000931B9AFDF8\n000000067F0000400200008A590000725C94-000000067F0000400200008A59000072E65C__0000000CDADDDFC9-0000000D8A85D199\n000000067F0000400200008A590000728000-000000067F0000400200008A59000072C000__0000001C725A2400\n000000067F0000400200008A590000728000-000000067F0000400200008A59000072C000__0000001C760FA190\n000000067F0000400200008A590000728000-000000067F0000400200008A59000072C000__00000038E67ABFA0\n000000067F0000400200008A590000728000-000000067F0000400200008A59000072C000__0000003903F1CFE8\n000000067F0000400200008A590000728000-000000067F0000400200008A59000072C000__0000003B99F7F8A0\n000000067F0000400200008A590000728000-000000067F0000400200008A59000072C000__0000005D2FFFFB38\n000000067F0000400200008A590000728000-000000067F0000400200008A59000072C000__00000073AD3FE6B8\n000000067F0000400200008A590000728000-000000067F0000400200008A59000072C000__000000914E3F38F0\n000000067F0000400200008A590000728000-000000067F0000400200008A59000072C000__000000931B33AE68\n000000067F0000400200008A590000728000-000000067F0000400200008A59000072C000__000000931B9AFDF8\n000000067F0000400200008A59000072C000-000000067F0000400200008A590000730000__0000001C725A2400\n000000067F0000400200008A59000072C000-000000067F0000400200008A590000730000__0000001C760FA190\n000000067F0000400200008A59000072C000-000000067F0000400200008A590000730000__00000038E67ABFA0\n000000067F0000400200008A59000072C000-000000067F0000400200008A590000730000__0000003903F1CFE8\n000000067F0000400200008A59000072C000-000000067F0000400200008A590000730000__0000003B99F7F8A0\n000000067F0000400200008A59000072C000-000000067F0000400200008A590000730000__0000005D2FFFFB38\n000000067F0000400200008A59000072C000-000000067F0000400200008A590000730000__00000073AD3FE6B8\n000000067F0000400200008A59000072C000-000000067F0000400200008A590000730000__000000914E3F38F0\n000000067F0000400200008A59000072C000-000000067F0000400200008A590000730000__000000931B33AE68\n000000067F0000400200008A59000072C000-000000067F0000400200008A590000730000__000000931B9AFDF8\n000000067F0000400200008A59000072E65C-000000067F0000400200008A590000737034__0000000CDADDDFC9-0000000D8A85D199\n000000067F0000400200008A590000730000-000000067F0000400200008A590000734000__0000001C725A2400\n000000067F0000400200008A590000730000-000000067F0000400200008A590000734000__0000001C760FA190\n000000067F0000400200008A590000730000-000000067F0000400200008A590000734000__00000038E67ABFA0\n000000067F0000400200008A590000730000-000000067F0000400200008A590000734000__0000003903F1CFE8\n000000067F0000400200008A590000730000-000000067F0000400200008A590000734000__0000003B99F7F8A0\n000000067F0000400200008A590000730000-000000067F0000400200008A590000734000__0000005D2FFFFB38\n000000067F0000400200008A590000730000-000000067F0000400200008A590000734000__00000073AD3FE6B8\n000000067F0000400200008A590000730000-000000067F0000400200008A590000734000__000000914E3F38F0\n000000067F0000400200008A590000730000-000000067F0000400200008A590000734000__000000931B33AE68\n000000067F0000400200008A590000730000-000000067F0000400200008A590000734000__000000931B9AFDF8\n000000067F0000400200008A590000734000-000000067F0000400200008A590000738000__0000001C725A2400\n000000067F0000400200008A590000734000-000000067F0000400200008A590000738000__0000001C760FA190\n000000067F0000400200008A590000734000-000000067F0000400200008A590000738000__00000038E67ABFA0\n000000067F0000400200008A590000734000-000000067F0000400200008A590000738000__0000003903F1CFE8\n000000067F0000400200008A590000734000-000000067F0000400200008A590000738000__0000003B99F7F8A0\n000000067F0000400200008A590000734000-000000067F0000400200008A590000738000__0000005D2FFFFB38\n000000067F0000400200008A590000734000-000000067F0000400200008A590000738000__00000073AD3FE6B8\n000000067F0000400200008A590000734000-000000067F0000400200008A590000738000__000000914E3F38F0\n000000067F0000400200008A590000734000-000000067F0000400200008A590000738000__000000931B33AE68\n000000067F0000400200008A590000734000-000000067F0000400200008A590000738000__000000931B9AFDF8\n000000067F0000400200008A590000737034-000000067F0000400200008A59000073FA16__0000000CDADDDFC9-0000000D8A85D199\n000000067F0000400200008A590000738000-000000067F0000400200008A59000073C000__0000001C725A2400\n000000067F0000400200008A590000738000-000000067F0000400200008A59000073C000__0000001C760FA190\n000000067F0000400200008A590000738000-000000067F0000400200008A59000073C000__00000038E67ABFA0\n000000067F0000400200008A590000738000-000000067F0000400200008A59000073C000__0000003903F1CFE8\n000000067F0000400200008A590000738000-000000067F0000400200008A59000073C000__0000003B99F7F8A0\n000000067F0000400200008A590000738000-000000067F0000400200008A59000073C000__0000005D2FFFFB38\n000000067F0000400200008A590000738000-000000067F0000400200008A59000073C000__00000073AD3FE6B8\n000000067F0000400200008A590000738000-000000067F0000400200008A59000073C000__000000914E3F38F0\n000000067F0000400200008A590000738000-000000067F0000400200008A59000073C000__000000931B33AE68\n000000067F0000400200008A590000738000-000000067F0000400200008A59000073C000__000000931B9AFDF8\n000000067F0000400200008A59000073C000-000000067F0000400200008A590000740000__0000001C725A2400\n000000067F0000400200008A59000073C000-000000067F0000400200008A590000740000__0000001C760FA190\n000000067F0000400200008A59000073C000-000000067F0000400200008A590000740000__00000038E67ABFA0\n000000067F0000400200008A59000073C000-000000067F0000400200008A590000740000__0000003903F1CFE8\n000000067F0000400200008A59000073C000-000000067F0000400200008A590000740000__0000003B99F7F8A0\n000000067F0000400200008A59000073C000-000000067F0000400200008A590000740000__0000005D2FFFFB38\n000000067F0000400200008A59000073C000-000000067F0000400200008A590000740000__00000073AD3FE6B8\n000000067F0000400200008A59000073C000-000000067F0000400200008A590000740000__000000914E3F38F0\n000000067F0000400200008A59000073C000-000000067F0000400200008A590000740000__000000931B33AE68\n000000067F0000400200008A59000073C000-000000067F0000400200008A590000740000__000000931B9AFDF8\n000000067F0000400200008A59000073FA16-000000067F0000400200008A5900007483EF__0000000CDADDDFC9-0000000D8A85D199\n000000067F0000400200008A590000740000-000000067F0000400200008A590000744000__0000001C725A2400\n000000067F0000400200008A590000740000-000000067F0000400200008A590000744000__0000001C760FA190\n000000067F0000400200008A590000740000-000000067F0000400200008A590000744000__00000038E67ABFA0\n000000067F0000400200008A590000740000-000000067F0000400200008A590000744000__0000003903F1CFE8\n000000067F0000400200008A590000740000-000000067F0000400200008A590000744000__0000003B99F7F8A0\n000000067F0000400200008A590000740000-000000067F0000400200008A590000744000__0000005D2FFFFB38\n000000067F0000400200008A590000740000-000000067F0000400200008A590000744000__00000073AD3FE6B8\n000000067F0000400200008A590000740000-000000067F0000400200008A590000744000__000000914E3F38F0\n000000067F0000400200008A590000740000-000000067F0000400200008A590000744000__000000931B33AE68\n000000067F0000400200008A590000740000-000000067F0000400200008A590000744000__000000931B9AFDF8\n000000067F0000400200008A590000744000-000000067F0000400200008A590000748000__0000001C725A2400\n000000067F0000400200008A590000744000-000000067F0000400200008A590000748000__0000001C760FA190\n000000067F0000400200008A590000744000-000000067F0000400200008A590000748000__00000038E67ABFA0\n000000067F0000400200008A590000744000-000000067F0000400200008A590000748000__0000003903F1CFE8\n000000067F0000400200008A590000744000-000000067F0000400200008A590000748000__0000003B99F7F8A0\n000000067F0000400200008A590000744000-000000067F0000400200008A590000748000__0000005D2FFFFB38\n000000067F0000400200008A590000744000-000000067F0000400200008A590000748000__00000073AD3FE6B8\n000000067F0000400200008A590000744000-000000067F0000400200008A590000748000__000000914E3F38F0\n000000067F0000400200008A590000744000-000000067F0000400200008A590000748000__000000931B33AE68\n000000067F0000400200008A590000744000-000000067F0000400200008A590000748000__000000931B9AFDF8\n000000067F0000400200008A590000748000-000000067F0000400200008A59000074C000__0000001C725A2400\n000000067F0000400200008A590000748000-000000067F0000400200008A59000074C000__0000001C760FA190\n000000067F0000400200008A590000748000-000000067F0000400200008A59000074C000__00000038E67ABFA0\n000000067F0000400200008A590000748000-000000067F0000400200008A59000074C000__0000003903F1CFE8\n000000067F0000400200008A590000748000-000000067F0000400200008A59000074C000__0000003B99F7F8A0\n000000067F0000400200008A590000748000-000000067F0000400200008A59000074C000__0000005D2FFFFB38\n000000067F0000400200008A590000748000-000000067F0000400200008A59000074C000__00000073AD3FE6B8\n000000067F0000400200008A590000748000-000000067F0000400200008A59000074C000__000000914E3F38F0\n000000067F0000400200008A590000748000-000000067F0000400200008A59000074C000__000000931B33AE68\n000000067F0000400200008A590000748000-000000067F0000400200008A59000074C000__000000931B9AFDF8\n000000067F0000400200008A5900007483EF-000000067F0000400200008A590000750DD5__0000000CDADDDFC9-0000000D8A85D199\n000000067F0000400200008A59000074C000-000000067F0000400200008A590000750000__0000001C725A2400\n000000067F0000400200008A59000074C000-000000067F0000400200008A590000750000__0000001C760FA190\n000000067F0000400200008A59000074C000-000000067F0000400200008A590000750000__00000038E67ABFA0\n000000067F0000400200008A59000074C000-000000067F0000400200008A590000750000__0000003903F1CFE8\n000000067F0000400200008A59000074C000-000000067F0000400200008A590000750000__0000003B99F7F8A0\n000000067F0000400200008A59000074C000-000000067F0000400200008A590000750000__0000005D2FFFFB38\n000000067F0000400200008A59000074C000-000000067F0000400200008A590000750000__00000073AD3FE6B8\n000000067F0000400200008A59000074C000-000000067F0000400200008A590000750000__000000914E3F38F0\n000000067F0000400200008A59000074C000-000000067F0000400200008A590000750000__000000931B33AE68\n000000067F0000400200008A59000074C000-000000067F0000400200008A590000750000__000000931B9AFDF8\n000000067F0000400200008A590000750000-000000067F0000400200008A590000754000__0000000E54FFE720\n000000067F0000400200008A590000750000-000000067F0000400200008A590000754000__0000001C760FA190\n000000067F0000400200008A590000750000-000000067F0000400200008A590000754000__00000038E67ABFA0\n000000067F0000400200008A590000750000-000000067F0000400200008A590000754000__0000003903F1CFE8\n000000067F0000400200008A590000750000-000000067F0000400200008A590000754000__0000003B99F7F8A0\n000000067F0000400200008A590000750000-000000067F0000400200008A590000754000__0000005D2FFFFB38\n000000067F0000400200008A590000750000-000000067F0000400200008A590000754000__00000073AD3FE6B8\n000000067F0000400200008A590000750000-000000067F0000400200008A590000754000__000000914E3F38F0\n000000067F0000400200008A590000750000-000000067F0000400200008A590000754000__000000931B33AE68\n000000067F0000400200008A590000750000-000000067F0000400200008A590000754000__000000931B9AFDF8\n000000067F0000400200008A590000750DD5-000000067F0000400200008A590100000000__0000000CDADDDFC9-0000000D8A85D199\n000000067F0000400200008A5900007510F5-000000067F0000400200008A590000759AD2__0000000D8A85D199-0000000E2A359AC1\n000000067F0000400200008A590000754000-000000067F0000400200008A590000758000__0000000E54FFE720\n000000067F0000400200008A590000754000-000000067F0000400200008A590000758000__0000001C760FA190\n000000067F0000400200008A590000754000-000000067F0000400200008A590000758000__00000038E67ABFA0\n000000067F0000400200008A590000754000-000000067F0000400200008A590000758000__0000003903F1CFE8\n000000067F0000400200008A590000754000-000000067F0000400200008A590000758000__0000003B99F7F8A0\n000000067F0000400200008A590000754000-000000067F0000400200008A590000758000__0000005D2FFFFB38\n000000067F0000400200008A590000754000-000000067F0000400200008A590000758000__00000073AD3FE6B8\n000000067F0000400200008A590000754000-000000067F0000400200008A590000758000__000000914E3F38F0\n000000067F0000400200008A590000754000-000000067F0000400200008A590000758000__000000931B33AE68\n000000067F0000400200008A590000754000-000000067F0000400200008A590000758000__000000931B9AFDF8\n000000067F0000400200008A590000758000-000000067F0000400200008A59000075C000__0000000E54FFE720\n000000067F0000400200008A590000758000-000000067F0000400200008A59000075C000__0000001C760FA190\n000000067F0000400200008A590000758000-000000067F0000400200008A59000075C000__00000038E67ABFA0\n000000067F0000400200008A590000758000-000000067F0000400200008A59000075C000__0000003903F1CFE8\n000000067F0000400200008A590000758000-000000067F0000400200008A59000075C000__0000003B99F7F8A0\n000000067F0000400200008A590000758000-000000067F0000400200008A59000075C000__0000005D2FFFFB38\n000000067F0000400200008A590000758000-000000067F0000400200008A59000075C000__00000073AD3FE6B8\n000000067F0000400200008A590000758000-000000067F0000400200008A59000075C000__000000914E3F38F0\n000000067F0000400200008A590000758000-000000067F0000400200008A59000075C000__000000931B33AE68\n000000067F0000400200008A590000758000-000000067F0000400200008A59000075C000__000000931B9AFDF8\n000000067F0000400200008A590000759AD2-000000067F0000400200008A5900007624AB__0000000D8A85D199-0000000E2A359AC1\n000000067F0000400200008A59000075C000-000000067F0000400200008A590000760000__0000000E54FFE720\n000000067F0000400200008A59000075C000-000000067F0000400200008A590000760000__0000001C760FA190\n000000067F0000400200008A59000075C000-000000067F0000400200008A590000760000__00000038E67ABFA0\n000000067F0000400200008A59000075C000-000000067F0000400200008A590000760000__0000003903F1CFE8\n000000067F0000400200008A59000075C000-000000067F0000400200008A590000760000__0000003B99F7F8A0\n000000067F0000400200008A59000075C000-000000067F0000400200008A590000760000__0000005D2FFFFB38\n000000067F0000400200008A59000075C000-000000067F0000400200008A590000760000__00000073AD3FE6B8\n000000067F0000400200008A59000075C000-000000067F0000400200008A590000760000__000000914E3F38F0\n000000067F0000400200008A59000075C000-000000067F0000400200008A590000760000__000000931B33AE68\n000000067F0000400200008A59000075C000-000000067F0000400200008A590000760000__000000931B9AFDF8\n000000067F0000400200008A590000760000-000000067F0000400200008A590000764000__0000000E54FFE720\n000000067F0000400200008A590000760000-000000067F0000400200008A590000764000__0000001C760FA190\n000000067F0000400200008A590000760000-000000067F0000400200008A590000764000__00000038E67ABFA0\n000000067F0000400200008A590000760000-000000067F0000400200008A590000764000__0000003903F1CFE8\n000000067F0000400200008A590000760000-000000067F0000400200008A590000764000__0000003B99F7F8A0\n000000067F0000400200008A590000760000-000000067F0000400200008A590000764000__0000005D2FFFFB38\n000000067F0000400200008A590000760000-000000067F0000400200008A590000764000__00000073AD3FE6B8\n000000067F0000400200008A590000760000-000000067F0000400200008A590000764000__000000914E3F38F0\n000000067F0000400200008A590000760000-000000067F0000400200008A590000764000__000000931B33AE68\n000000067F0000400200008A590000760000-000000067F0000400200008A590000764000__000000931B9AFDF8\n000000067F0000400200008A5900007624AB-000000067F0000400200008A59000076AE86__0000000D8A85D199-0000000E2A359AC1\n000000067F0000400200008A590000764000-000000067F0000400200008A590000768000__0000000E54FFE720\n000000067F0000400200008A590000764000-000000067F0000400200008A590000768000__0000001C760FA190\n000000067F0000400200008A590000764000-000000067F0000400200008A590000768000__00000038E67ABFA0\n000000067F0000400200008A590000764000-000000067F0000400200008A590000768000__0000003903F1CFE8\n000000067F0000400200008A590000764000-000000067F0000400200008A590000768000__0000003B99F7F8A0\n000000067F0000400200008A590000764000-000000067F0000400200008A590000768000__0000005D2FFFFB38\n000000067F0000400200008A590000764000-000000067F0000400200008A590000768000__00000073AD3FE6B8\n000000067F0000400200008A590000764000-000000067F0000400200008A590000768000__000000914E3F38F0\n000000067F0000400200008A590000764000-000000067F0000400200008A590000768000__000000931B33AE68\n000000067F0000400200008A590000764000-000000067F0000400200008A590000768000__000000931B9AFDF8\n000000067F0000400200008A590000768000-000000067F0000400200008A59000076C000__0000000E54FFE720\n000000067F0000400200008A590000768000-000000067F0000400200008A59000076C000__0000001C760FA190\n000000067F0000400200008A590000768000-000000067F0000400200008A59000076C000__00000038E67ABFA0\n000000067F0000400200008A590000768000-000000067F0000400200008A59000076C000__0000003903F1CFE8\n000000067F0000400200008A590000768000-000000067F0000400200008A59000076C000__0000003B99F7F8A0\n000000067F0000400200008A590000768000-000000067F0000400200008A59000076C000__0000005D2FFFFB38\n000000067F0000400200008A590000768000-000000067F0000400200008A59000076C000__00000073AD3FE6B8\n000000067F0000400200008A590000768000-000000067F0000400200008A59000076C000__000000914E3F38F0\n000000067F0000400200008A590000768000-000000067F0000400200008A59000076C000__000000931B33AE68\n000000067F0000400200008A590000768000-000000067F0000400200008A59000076C000__000000931B9AFDF8\n000000067F0000400200008A59000076AE86-000000067F0000400200008A590000773859__0000000D8A85D199-0000000E2A359AC1\n000000067F0000400200008A59000076C000-000000067F0000400200008A590000770000__0000000E54FFE720\n000000067F0000400200008A59000076C000-000000067F0000400200008A590000770000__0000001C760FA190\n000000067F0000400200008A59000076C000-000000067F0000400200008A590000770000__00000038E67ABFA0\n000000067F0000400200008A59000076C000-000000067F0000400200008A590000770000__0000003903F1CFE8\n000000067F0000400200008A59000076C000-000000067F0000400200008A590000770000__0000003B99F7F8A0\n000000067F0000400200008A59000076C000-000000067F0000400200008A590000770000__0000005D2FFFFB38\n000000067F0000400200008A59000076C000-000000067F0000400200008A590000770000__00000073AD3FE6B8\n000000067F0000400200008A59000076C000-000000067F0000400200008A590000770000__000000914E3F38F0\n000000067F0000400200008A59000076C000-000000067F0000400200008A590000770000__000000931B33AE68\n000000067F0000400200008A59000076C000-000000067F0000400200008A590000770000__000000931B9AFDF8\n000000067F0000400200008A590000770000-000000067F0000400200008A590000774000__0000000E54FFE720\n000000067F0000400200008A590000770000-000000067F0000400200008A590000774000__0000001C760FA190\n000000067F0000400200008A590000770000-000000067F0000400200008A590000774000__00000038E67ABFA0\n000000067F0000400200008A590000770000-000000067F0000400200008A590000774000__0000003903F1CFE8\n000000067F0000400200008A590000770000-000000067F0000400200008A590000774000__0000003B99F7F8A0\n000000067F0000400200008A590000770000-000000067F0000400200008A590000774000__0000005D2FFFFB38\n000000067F0000400200008A590000770000-000000067F0000400200008A590000774000__00000073AD3FE6B8\n000000067F0000400200008A590000770000-000000067F0000400200008A590000774000__000000914E3F38F0\n000000067F0000400200008A590000770000-000000067F0000400200008A590000774000__000000931B33AE68\n000000067F0000400200008A590000770000-000000067F0000400200008A590000774000__000000931B9AFDF8\n000000067F0000400200008A590000773859-000000067F0000400200008A59000077C231__0000000D8A85D199-0000000E2A359AC1\n000000067F0000400200008A590000774000-000000067F0000400200008A590000778000__0000000E54FFE720\n000000067F0000400200008A590000774000-000000067F0000400200008A590000778000__0000001C760FA190\n000000067F0000400200008A590000774000-000000067F0000400200008A590000778000__00000038E67ABFA0\n000000067F0000400200008A590000774000-000000067F0000400200008A590000778000__0000003903F1CFE8\n000000067F0000400200008A590000774000-000000067F0000400200008A590000778000__0000003B99F7F8A0\n000000067F0000400200008A590000774000-000000067F0000400200008A590000778000__0000005D2FFFFB38\n000000067F0000400200008A590000774000-000000067F0000400200008A590000778000__00000073AD3FE6B8\n000000067F0000400200008A590000774000-000000067F0000400200008A590000778000__000000914E3F38F0\n000000067F0000400200008A590000774000-000000067F0000400200008A590000778000__000000931B33AE68\n000000067F0000400200008A590000774000-000000067F0000400200008A590000778000__000000931B9AFDF8\n000000067F0000400200008A590000778000-000000067F0000400200008A59000077C000__0000000E54FFE720\n000000067F0000400200008A590000778000-000000067F0000400200008A59000077C000__0000001C760FA190\n000000067F0000400200008A590000778000-000000067F0000400200008A59000077C000__00000038E67ABFA0\n000000067F0000400200008A590000778000-000000067F0000400200008A59000077C000__0000003903F1CFE8\n000000067F0000400200008A590000778000-000000067F0000400200008A59000077C000__0000003B99F7F8A0\n000000067F0000400200008A590000778000-000000067F0000400200008A59000077C000__0000005D2FFFFB38\n000000067F0000400200008A590000778000-000000067F0000400200008A59000077C000__00000073AD3FE6B8\n000000067F0000400200008A590000778000-000000067F0000400200008A59000077C000__000000914E3F38F0\n000000067F0000400200008A590000778000-000000067F0000400200008A59000077C000__000000931B33AE68\n000000067F0000400200008A590000778000-000000067F0000400200008A59000077C000__000000931B9AFDF8\n000000067F0000400200008A59000077C000-000000067F0000400200008A590000780000__0000000E54FFE720\n000000067F0000400200008A59000077C000-000000067F0000400200008A590000780000__0000001C760FA190\n000000067F0000400200008A59000077C000-000000067F0000400200008A590000780000__00000038E67ABFA0\n000000067F0000400200008A59000077C000-000000067F0000400200008A590000780000__0000003903F1CFE8\n000000067F0000400200008A59000077C000-000000067F0000400200008A590000780000__0000003B99F7F8A0\n000000067F0000400200008A59000077C000-000000067F0000400200008A590000780000__0000005D2FFFFB38\n000000067F0000400200008A59000077C000-000000067F0000400200008A590000780000__00000073AD3FE6B8\n000000067F0000400200008A59000077C000-000000067F0000400200008A590000780000__000000914E3F38F0\n000000067F0000400200008A59000077C000-000000067F0000400200008A590000780000__000000931B33AE68\n000000067F0000400200008A59000077C000-000000067F0000400200008A590000780000__000000931B9AFDF8\n000000067F0000400200008A59000077C231-000000067F0000400200008A590000784C0A__0000000D8A85D199-0000000E2A359AC1\n000000067F0000400200008A590000780000-000000067F0000400200008A590000784000__0000000E54FFE720\n000000067F0000400200008A590000780000-000000067F0000400200008A590000784000__0000001C760FA190\n000000067F0000400200008A590000780000-000000067F0000400200008A590000784000__00000038E67ABFA0\n000000067F0000400200008A590000780000-000000067F0000400200008A590000784000__0000003903F1CFE8\n000000067F0000400200008A590000780000-000000067F0000400200008A590000784000__0000003B99F7F8A0\n000000067F0000400200008A590000780000-000000067F0000400200008A590000784000__0000005D2FFFFB38\n000000067F0000400200008A590000780000-000000067F0000400200008A590000784000__00000073AD3FE6B8\n000000067F0000400200008A590000780000-000000067F0000400200008A590000784000__000000914E3F38F0\n000000067F0000400200008A590000780000-000000067F0000400200008A590000784000__000000931B33AE68\n000000067F0000400200008A590000780000-000000067F0000400200008A590000784000__000000931B9AFDF8\n000000067F0000400200008A590000784000-000000067F0000400200008A590000788000__0000000E54FFE720\n000000067F0000400200008A590000784000-000000067F0000400200008A590000788000__0000001C760FA190\n000000067F0000400200008A590000784000-000000067F0000400200008A590000788000__00000038E67ABFA0\n000000067F0000400200008A590000784000-000000067F0000400200008A590000788000__0000003903F1CFE8\n000000067F0000400200008A590000784000-000000067F0000400200008A590000788000__0000003B99F7F8A0\n000000067F0000400200008A590000784000-000000067F0000400200008A590000788000__0000005D2FFFFB38\n000000067F0000400200008A590000784000-000000067F0000400200008A590000788000__00000073AD3FE6B8\n000000067F0000400200008A590000784000-000000067F0000400200008A590000788000__000000914E3F38F0\n000000067F0000400200008A590000784000-000000067F0000400200008A590000788000__000000931B33AE68\n000000067F0000400200008A590000784000-000000067F0000400200008A590000788000__000000931B9AFDF8\n000000067F0000400200008A590000784C0A-000000067F0000400200008A59000078D5DB__0000000D8A85D199-0000000E2A359AC1\n000000067F0000400200008A590000788000-000000067F0000400200008A59000078C000__0000000E54FFE720\n000000067F0000400200008A590000788000-000000067F0000400200008A59000078C000__0000001C760FA190\n000000067F0000400200008A590000788000-000000067F0000400200008A59000078C000__00000038E67ABFA0\n000000067F0000400200008A590000788000-000000067F0000400200008A59000078C000__0000003903F1CFE8\n000000067F0000400200008A590000788000-000000067F0000400200008A59000078C000__0000003B99F7F8A0\n000000067F0000400200008A590000788000-000000067F0000400200008A59000078C000__0000005D2FFFFB38\n000000067F0000400200008A590000788000-000000067F0000400200008A59000078C000__00000073AD3FE6B8\n000000067F0000400200008A590000788000-000000067F0000400200008A59000078C000__000000914E3F38F0\n000000067F0000400200008A590000788000-000000067F0000400200008A59000078C000__000000931B33AE68\n000000067F0000400200008A590000788000-000000067F0000400200008A59000078C000__000000931B9AFDF8\n000000067F0000400200008A59000078C000-000000067F0000400200008A590000790000__0000000E54FFE720\n000000067F0000400200008A59000078C000-000000067F0000400200008A590000790000__0000001C760FA190\n000000067F0000400200008A59000078C000-000000067F0000400200008A590000790000__00000038E67ABFA0\n000000067F0000400200008A59000078C000-000000067F0000400200008A590000790000__0000003903F1CFE8\n000000067F0000400200008A59000078C000-000000067F0000400200008A590000790000__0000003B99F7F8A0\n000000067F0000400200008A59000078C000-000000067F0000400200008A590000790000__0000005D2FFFFB38\n000000067F0000400200008A59000078C000-000000067F0000400200008A590000790000__00000073AD3FE6B8\n000000067F0000400200008A59000078C000-000000067F0000400200008A590000790000__000000914E3F38F0\n000000067F0000400200008A59000078C000-000000067F0000400200008A590000790000__000000931B33AE68\n000000067F0000400200008A59000078C000-000000067F0000400200008A590000790000__000000931B9AFDF8\n000000067F0000400200008A59000078D5DB-000000067F0000400200008A590000795FB7__0000000D8A85D199-0000000E2A359AC1\n000000067F0000400200008A590000790000-000000067F0000400200008A590000794000__0000000E54FFE720\n000000067F0000400200008A590000790000-000000067F0000400200008A590000794000__0000001C760FA190\n000000067F0000400200008A590000790000-000000067F0000400200008A590000794000__00000038E67ABFA0\n000000067F0000400200008A590000790000-000000067F0000400200008A590000794000__0000003903F1CFE8\n000000067F0000400200008A590000790000-000000067F0000400200008A590000794000__0000003B99F7F8A0\n000000067F0000400200008A590000790000-000000067F0000400200008A590000794000__0000005D2FFFFB38\n000000067F0000400200008A590000790000-000000067F0000400200008A590000794000__00000073AD3FE6B8\n000000067F0000400200008A590000790000-000000067F0000400200008A590000794000__000000914E3F38F0\n000000067F0000400200008A590000790000-000000067F0000400200008A590000794000__000000931B33AE68\n000000067F0000400200008A590000790000-000000067F0000400200008A590000794000__000000931B9AFDF8\n000000067F0000400200008A590000794000-000000067F0000400200008A590000798000__0000000E54FFE720\n000000067F0000400200008A590000794000-000000067F0000400200008A590000798000__0000001C760FA190\n000000067F0000400200008A590000794000-000000067F0000400200008A590000798000__00000038E67ABFA0\n000000067F0000400200008A590000794000-000000067F0000400200008A590000798000__0000003903F1CFE8\n000000067F0000400200008A590000794000-000000067F0000400200008A590000798000__0000003B99F7F8A0\n000000067F0000400200008A590000794000-000000067F0000400200008A590000798000__0000005D2FFFFB38\n000000067F0000400200008A590000794000-000000067F0000400200008A590000798000__00000073AD3FE6B8\n000000067F0000400200008A590000794000-000000067F0000400200008A590000798000__000000914E3F38F0\n000000067F0000400200008A590000794000-000000067F0000400200008A590000798000__000000931B33AE68\n000000067F0000400200008A590000794000-000000067F0000400200008A590000798000__000000931B9AFDF8\n000000067F0000400200008A590000795FB7-000000067F0000400200008A59000079E99E__0000000D8A85D199-0000000E2A359AC1\n000000067F0000400200008A590000798000-000000067F0000400200008A59000079C000__0000000E54FFE720\n000000067F0000400200008A590000798000-000000067F0000400200008A59000079C000__0000001C760FA190\n000000067F0000400200008A590000798000-000000067F0000400200008A59000079C000__00000038E67ABFA0\n000000067F0000400200008A590000798000-000000067F0000400200008A59000079C000__0000003903F1CFE8\n000000067F0000400200008A590000798000-000000067F0000400200008A59000079C000__0000003B99F7F8A0\n000000067F0000400200008A590000798000-000000067F0000400200008A59000079C000__0000005D2FFFFB38\n000000067F0000400200008A590000798000-000000067F0000400200008A59000079C000__00000073AD3FE6B8\n000000067F0000400200008A590000798000-000000067F0000400200008A59000079C000__000000914E3F38F0\n000000067F0000400200008A590000798000-000000067F0000400200008A59000079C000__000000931B33AE68\n000000067F0000400200008A590000798000-000000067F0000400200008A59000079C000__000000931B9AFDF8\n000000067F0000400200008A59000079C000-000000067F0000400200008A5900007A0000__0000000E54FFE720\n000000067F0000400200008A59000079C000-000000067F0000400200008A5900007A0000__0000001C760FA190\n000000067F0000400200008A59000079C000-000000067F0000400200008A5900007A0000__00000038E67ABFA0\n000000067F0000400200008A59000079C000-000000067F0000400200008A5900007A0000__0000003903F1CFE8\n000000067F0000400200008A59000079C000-000000067F0000400200008A5900007A0000__0000003B99F7F8A0\n000000067F0000400200008A59000079C000-000000067F0000400200008A5900007A0000__0000005D2FFFFB38\n000000067F0000400200008A59000079C000-000000067F0000400200008A5900007A0000__00000073AD3FE6B8\n000000067F0000400200008A59000079C000-000000067F0000400200008A5900007A0000__000000914E3F38F0\n000000067F0000400200008A59000079C000-000000067F0000400200008A5900007A0000__000000931B33AE68\n000000067F0000400200008A59000079C000-000000067F0000400200008A5900007A0000__000000931B9AFDF8\n000000067F0000400200008A59000079E99E-000000067F0000400200008A5900007A7383__0000000D8A85D199-0000000E2A359AC1\n000000067F0000400200008A5900007A0000-000000067F0000400200008A5900007A4000__0000000E54FFE720\n000000067F0000400200008A5900007A0000-000000067F0000400200008A5900007A4000__0000001C760FA190\n000000067F0000400200008A5900007A0000-000000067F0000400200008A5900007A4000__00000038E67ABFA0\n000000067F0000400200008A5900007A0000-000000067F0000400200008A5900007A4000__0000003903F1CFE8\n000000067F0000400200008A5900007A0000-000000067F0000400200008A5900007A4000__0000003B99F7F8A0\n000000067F0000400200008A5900007A0000-000000067F0000400200008A5900007A4000__0000005D2FFFFB38\n000000067F0000400200008A5900007A0000-000000067F0000400200008A5900007A4000__00000073AD3FE6B8\n000000067F0000400200008A5900007A0000-000000067F0000400200008A5900007A4000__000000914E3F38F0\n000000067F0000400200008A5900007A0000-000000067F0000400200008A5900007A4000__000000931B33AE68\n000000067F0000400200008A5900007A0000-000000067F0000400200008A5900007A4000__000000931B9AFDF8\n000000067F0000400200008A5900007A4000-000000067F0000400200008A5900007A8000__0000000E54FFE720\n000000067F0000400200008A5900007A4000-000000067F0000400200008A5900007A8000__0000001C760FA190\n000000067F0000400200008A5900007A4000-000000067F0000400200008A5900007A8000__00000038E67ABFA0\n000000067F0000400200008A5900007A4000-000000067F0000400200008A5900007A8000__0000003903F1CFE8\n000000067F0000400200008A5900007A4000-000000067F0000400200008A5900007A8000__0000003B99F7F8A0\n000000067F0000400200008A5900007A4000-000000067F0000400200008A5900007A8000__0000005D2FFFFB38\n000000067F0000400200008A5900007A4000-000000067F0000400200008A5900007A8000__00000073AD3FE6B8\n000000067F0000400200008A5900007A4000-000000067F0000400200008A5900007A8000__000000914E3F38F0\n000000067F0000400200008A5900007A4000-000000067F0000400200008A5900007A8000__000000931B33AE68\n000000067F0000400200008A5900007A4000-000000067F0000400200008A5900007A8000__000000931B9AFDF8\n000000067F0000400200008A5900007A7383-000000067F0000400200008A590100000000__0000000D8A85D199-0000000E2A359AC1\n000000067F0000400200008A5900007A7649-000000067F0000400200008A5900007B001B__0000000E2A359AC1-0000000ED9DDF211\n000000067F0000400200008A5900007A8000-000000067F0000400200008A5900007AC000__0000000E54FFE720\n000000067F0000400200008A5900007A8000-000000067F0000400200008A5900007AC000__0000001C760FA190\n000000067F0000400200008A5900007A8000-000000067F0000400200008A5900007AC000__00000038E67ABFA0\n000000067F0000400200008A5900007A8000-000000067F0000400200008A5900007AC000__0000003903F1CFE8\n000000067F0000400200008A5900007A8000-000000067F0000400200008A5900007AC000__0000003B99F7F8A0\n000000067F0000400200008A5900007A8000-000000067F0000400200008A5900007AC000__0000005D2FFFFB38\n000000067F0000400200008A5900007A8000-000000067F0000400200008A5900007AC000__00000073AD3FE6B8\n000000067F0000400200008A5900007A8000-000000067F0000400200008A5900007AC000__000000914E3F38F0\n000000067F0000400200008A5900007A8000-000000067F0000400200008A5900007AC000__000000931B33AE68\n000000067F0000400200008A5900007A8000-000000067F0000400200008A5900007AC000__000000931B9AFDF8\n000000067F0000400200008A5900007AC000-000000067F0000400200008A5900007B0000__0000000E54FFE720\n000000067F0000400200008A5900007AC000-000000067F0000400200008A5900007B0000__0000001C760FA190\n000000067F0000400200008A5900007AC000-000000067F0000400200008A5900007B0000__00000038E67ABFA0\n000000067F0000400200008A5900007AC000-000000067F0000400200008A5900007B0000__0000003903F1CFE8\n000000067F0000400200008A5900007AC000-000000067F0000400200008A5900007B0000__0000003B99F7F8A0\n000000067F0000400200008A5900007AC000-000000067F0000400200008A5900007B0000__0000005D2FFFFB38\n000000067F0000400200008A5900007AC000-000000067F0000400200008A5900007B0000__00000073AD3FE6B8\n000000067F0000400200008A5900007AC000-000000067F0000400200008A5900007B0000__000000914E3F38F0\n000000067F0000400200008A5900007AC000-000000067F0000400200008A5900007B0000__000000931B33AE68\n000000067F0000400200008A5900007AC000-000000067F0000400200008A5900007B0000__000000931B9AFDF8\n000000067F0000400200008A5900007B0000-000000067F0000400200008A5900007B4000__0000000E54FFE720\n000000067F0000400200008A5900007B0000-000000067F0000400200008A5900007B4000__0000001C760FA190\n000000067F0000400200008A5900007B0000-000000067F0000400200008A5900007B4000__00000038E67ABFA0\n000000067F0000400200008A5900007B0000-000000067F0000400200008A5900007B4000__0000003903F1CFE8\n000000067F0000400200008A5900007B0000-000000067F0000400200008A5900007B4000__0000003B99F7F8A0\n000000067F0000400200008A5900007B0000-000000067F0000400200008A5900007B4000__0000005D2FFFFB38\n000000067F0000400200008A5900007B0000-000000067F0000400200008A5900007B4000__00000073AD3FE6B8\n000000067F0000400200008A5900007B0000-000000067F0000400200008A5900007B4000__000000914E3F38F0\n000000067F0000400200008A5900007B0000-000000067F0000400200008A5900007B4000__000000931B33AE68\n000000067F0000400200008A5900007B0000-000000067F0000400200008A5900007B4000__000000931B9AFDF8\n000000067F0000400200008A5900007B001B-000000067F0000400200008A5900007B89F9__0000000E2A359AC1-0000000ED9DDF211\n000000067F0000400200008A5900007B4000-000000067F0000400200008A5900007B8000__0000000E54FFE720\n000000067F0000400200008A5900007B4000-000000067F0000400200008A5900007B8000__0000001C760FA190\n000000067F0000400200008A5900007B4000-000000067F0000400200008A5900007B8000__00000038E67ABFA0\n000000067F0000400200008A5900007B4000-000000067F0000400200008A5900007B8000__0000003903F1CFE8\n000000067F0000400200008A5900007B4000-000000067F0000400200008A5900007B8000__0000003B99F7F8A0\n000000067F0000400200008A5900007B4000-000000067F0000400200008A5900007B8000__0000005D2FFFFB38\n000000067F0000400200008A5900007B4000-000000067F0000400200008A5900007B8000__00000073AD3FE6B8\n000000067F0000400200008A5900007B4000-000000067F0000400200008A5900007B8000__000000914E3F38F0\n000000067F0000400200008A5900007B4000-000000067F0000400200008A5900007B8000__000000931B33AE68\n000000067F0000400200008A5900007B4000-000000067F0000400200008A5900007B8000__000000931B9AFDF8\n000000067F0000400200008A5900007B8000-000000067F0000400200008A5900007BC000__0000000E54FFE720\n000000067F0000400200008A5900007B8000-000000067F0000400200008A5900007BC000__0000001C760FA190\n000000067F0000400200008A5900007B8000-000000067F0000400200008A5900007BC000__00000038E67ABFA0\n000000067F0000400200008A5900007B8000-000000067F0000400200008A5900007BC000__0000003903F1CFE8\n000000067F0000400200008A5900007B8000-000000067F0000400200008A5900007BC000__0000003B99F7F8A0\n000000067F0000400200008A5900007B8000-000000067F0000400200008A5900007BC000__0000005D2FFFFB38\n000000067F0000400200008A5900007B8000-000000067F0000400200008A5900007BC000__00000073AD3FE6B8\n000000067F0000400200008A5900007B8000-000000067F0000400200008A5900007BC000__000000914E3F38F0\n000000067F0000400200008A5900007B8000-000000067F0000400200008A5900007BC000__000000931B33AE68\n000000067F0000400200008A5900007B8000-000000067F0000400200008A5900007BC000__000000931B9AFDF8\n000000067F0000400200008A5900007B89F9-000000067F0000400200008A5900007C13E0__0000000E2A359AC1-0000000ED9DDF211\n000000067F0000400200008A5900007BC000-000000067F0000400200008A5900007C0000__0000001C760FA190\n000000067F0000400200008A5900007BC000-000000067F0000400200008A5900007C0000__00000038E67ABFA0\n000000067F0000400200008A5900007BC000-000000067F0000400200008A5900007C0000__0000003903F1CFE8\n000000067F0000400200008A5900007BC000-000000067F0000400200008A5900007C0000__0000003B99F7F8A0\n000000067F0000400200008A5900007BC000-000000067F0000400200008A5900007C0000__0000005D2FFFFB38\n000000067F0000400200008A5900007BC000-000000067F0000400200008A5900007C0000__00000073AD3FE6B8\n000000067F0000400200008A5900007BC000-000000067F0000400200008A5900007C0000__000000914E3F38F0\n000000067F0000400200008A5900007BC000-000000067F0000400200008A5900007C0000__000000931B33AE68\n000000067F0000400200008A5900007BC000-000000067F0000400200008A5900007C0000__000000931B9AFDF8\n000000067F0000400200008A5900007BC000-030000000000000000000000000000000002__0000000E54FFE720\n000000067F0000400200008A5900007C0000-000000067F0000400200008A5900007C4000__0000001C760FA190\n000000067F0000400200008A5900007C0000-000000067F0000400200008A5900007C4000__00000038E67ABFA0\n000000067F0000400200008A5900007C0000-000000067F0000400200008A5900007C4000__0000003903F1CFE8\n000000067F0000400200008A5900007C0000-000000067F0000400200008A5900007C4000__0000003B99F7F8A0\n000000067F0000400200008A5900007C0000-000000067F0000400200008A5900007C4000__0000005D2FFFFB38\n000000067F0000400200008A5900007C0000-000000067F0000400200008A5900007C4000__00000073AD3FE6B8\n000000067F0000400200008A5900007C0000-000000067F0000400200008A5900007C4000__000000914E3F38F0\n000000067F0000400200008A5900007C0000-000000067F0000400200008A5900007C4000__000000931B33AE68\n000000067F0000400200008A5900007C0000-000000067F0000400200008A5900007C4000__000000931B9AFDF8\n000000067F0000400200008A5900007C13E0-000000067F0000400200008A5900007C9DC3__0000000E2A359AC1-0000000ED9DDF211\n000000067F0000400200008A5900007C4000-000000067F0000400200008A5900007C8000__0000001C760FA190\n000000067F0000400200008A5900007C4000-000000067F0000400200008A5900007C8000__00000038E67ABFA0\n000000067F0000400200008A5900007C4000-000000067F0000400200008A5900007C8000__0000003903F1CFE8\n000000067F0000400200008A5900007C4000-000000067F0000400200008A5900007C8000__0000003B99F7F8A0\n000000067F0000400200008A5900007C4000-000000067F0000400200008A5900007C8000__0000005D2FFFFB38\n000000067F0000400200008A5900007C4000-000000067F0000400200008A5900007C8000__00000073AD3FE6B8\n000000067F0000400200008A5900007C4000-000000067F0000400200008A5900007C8000__000000914E3F38F0\n000000067F0000400200008A5900007C4000-000000067F0000400200008A5900007C8000__000000931B33AE68\n000000067F0000400200008A5900007C4000-000000067F0000400200008A5900007C8000__000000931B9AFDF8\n000000067F0000400200008A5900007C8000-000000067F0000400200008A5900007CC000__0000001C760FA190\n000000067F0000400200008A5900007C8000-000000067F0000400200008A5900007CC000__00000038E67ABFA0\n000000067F0000400200008A5900007C8000-000000067F0000400200008A5900007CC000__0000003903F1CFE8\n000000067F0000400200008A5900007C8000-000000067F0000400200008A5900007CC000__0000003B99F7F8A0\n000000067F0000400200008A5900007C8000-000000067F0000400200008A5900007CC000__0000005D2FFFFB38\n000000067F0000400200008A5900007C8000-000000067F0000400200008A5900007CC000__00000073AD3FE6B8\n000000067F0000400200008A5900007C8000-000000067F0000400200008A5900007CC000__000000914E3F38F0\n000000067F0000400200008A5900007C8000-000000067F0000400200008A5900007CC000__000000931B33AE68\n000000067F0000400200008A5900007C8000-000000067F0000400200008A5900007CC000__000000931B9AFDF8\n000000067F0000400200008A5900007C9DC3-000000067F0000400200008A5900007D2796__0000000E2A359AC1-0000000ED9DDF211\n000000067F0000400200008A5900007CC000-000000067F0000400200008A5900007D0000__0000001C760FA190\n000000067F0000400200008A5900007CC000-000000067F0000400200008A5900007D0000__00000038E67ABFA0\n000000067F0000400200008A5900007CC000-000000067F0000400200008A5900007D0000__0000003903F1CFE8\n000000067F0000400200008A5900007CC000-000000067F0000400200008A5900007D0000__0000003B99F7F8A0\n000000067F0000400200008A5900007CC000-000000067F0000400200008A5900007D0000__0000005D2FFFFB38\n000000067F0000400200008A5900007CC000-000000067F0000400200008A5900007D0000__00000073AD3FE6B8\n000000067F0000400200008A5900007CC000-000000067F0000400200008A5900007D0000__000000914E3F38F0\n000000067F0000400200008A5900007CC000-000000067F0000400200008A5900007D0000__000000931B33AE68\n000000067F0000400200008A5900007CC000-000000067F0000400200008A5900007D0000__000000931B9AFDF8\n000000067F0000400200008A5900007D0000-000000067F0000400200008A5900007D4000__0000001C760FA190\n000000067F0000400200008A5900007D0000-000000067F0000400200008A5900007D4000__00000038E67ABFA0\n000000067F0000400200008A5900007D0000-000000067F0000400200008A5900007D4000__0000003903F1CFE8\n000000067F0000400200008A5900007D0000-000000067F0000400200008A5900007D4000__0000003B99F7F8A0\n000000067F0000400200008A5900007D0000-000000067F0000400200008A5900007D4000__0000005D2FFFFB38\n000000067F0000400200008A5900007D0000-000000067F0000400200008A5900007D4000__00000073AD3FE6B8\n000000067F0000400200008A5900007D0000-000000067F0000400200008A5900007D4000__000000914E3F38F0\n000000067F0000400200008A5900007D0000-000000067F0000400200008A5900007D4000__000000931B33AE68\n000000067F0000400200008A5900007D0000-000000067F0000400200008A5900007D4000__000000931B9AFDF8\n000000067F0000400200008A5900007D2796-000000067F0000400200008A5900007DB171__0000000E2A359AC1-0000000ED9DDF211\n000000067F0000400200008A5900007D4000-000000067F0000400200008A5900007D8000__0000001C760FA190\n000000067F0000400200008A5900007D4000-000000067F0000400200008A5900007D8000__00000038E67ABFA0\n000000067F0000400200008A5900007D4000-000000067F0000400200008A5900007D8000__0000003903F1CFE8\n000000067F0000400200008A5900007D4000-000000067F0000400200008A5900007D8000__0000003B99F7F8A0\n000000067F0000400200008A5900007D4000-000000067F0000400200008A5900007D8000__0000005D2FFFFB38\n000000067F0000400200008A5900007D4000-000000067F0000400200008A5900007D8000__00000073AD3FE6B8\n000000067F0000400200008A5900007D4000-000000067F0000400200008A5900007D8000__000000914E3F38F0\n000000067F0000400200008A5900007D4000-000000067F0000400200008A5900007D8000__000000931B33AE68\n000000067F0000400200008A5900007D4000-000000067F0000400200008A5900007D8000__000000931B9AFDF8\n000000067F0000400200008A5900007D8000-000000067F0000400200008A5900007DC000__0000001C760FA190\n000000067F0000400200008A5900007D8000-000000067F0000400200008A5900007DC000__00000038E67ABFA0\n000000067F0000400200008A5900007D8000-000000067F0000400200008A5900007DC000__0000003903F1CFE8\n000000067F0000400200008A5900007D8000-000000067F0000400200008A5900007DC000__0000003B99F7F8A0\n000000067F0000400200008A5900007D8000-000000067F0000400200008A5900007DC000__0000005D2FFFFB38\n000000067F0000400200008A5900007D8000-000000067F0000400200008A5900007DC000__00000073AD3FE6B8\n000000067F0000400200008A5900007D8000-000000067F0000400200008A5900007DC000__000000914E3F38F0\n000000067F0000400200008A5900007D8000-000000067F0000400200008A5900007DC000__000000931B33AE68\n000000067F0000400200008A5900007D8000-000000067F0000400200008A5900007DC000__000000931B9AFDF8\n000000067F0000400200008A5900007DB171-000000067F0000400200008A5900007E3B60__0000000E2A359AC1-0000000ED9DDF211\n000000067F0000400200008A5900007DC000-000000067F0000400200008A5900007E0000__0000001C760FA190\n000000067F0000400200008A5900007DC000-000000067F0000400200008A5900007E0000__00000038E67ABFA0\n000000067F0000400200008A5900007DC000-000000067F0000400200008A5900007E0000__0000003903F1CFE8\n000000067F0000400200008A5900007DC000-000000067F0000400200008A5900007E0000__0000003B99F7F8A0\n000000067F0000400200008A5900007DC000-000000067F0000400200008A5900007E0000__0000005D2FFFFB38\n000000067F0000400200008A5900007DC000-000000067F0000400200008A5900007E0000__00000073AD3FE6B8\n000000067F0000400200008A5900007DC000-000000067F0000400200008A5900007E0000__000000914E3F38F0\n000000067F0000400200008A5900007DC000-000000067F0000400200008A5900007E0000__000000931B33AE68\n000000067F0000400200008A5900007DC000-000000067F0000400200008A5900007E0000__000000931B9AFDF8\n000000067F0000400200008A5900007E0000-000000067F0000400200008A5900007E4000__0000001C760FA190\n000000067F0000400200008A5900007E0000-000000067F0000400200008A5900007E4000__00000038E67ABFA0\n000000067F0000400200008A5900007E0000-000000067F0000400200008A5900007E4000__0000003903F1CFE8\n000000067F0000400200008A5900007E0000-000000067F0000400200008A5900007E4000__0000003B99F7F8A0\n000000067F0000400200008A5900007E0000-000000067F0000400200008A5900007E4000__0000005D2FFFFB38\n000000067F0000400200008A5900007E0000-000000067F0000400200008A5900007E4000__00000073AD3FE6B8\n000000067F0000400200008A5900007E0000-000000067F0000400200008A5900007E4000__000000914E3F38F0\n000000067F0000400200008A5900007E0000-000000067F0000400200008A5900007E4000__000000931B33AE68\n000000067F0000400200008A5900007E0000-000000067F0000400200008A5900007E4000__000000931B9AFDF8\n000000067F0000400200008A5900007E3B60-000000067F0000400200008A5900007EC53A__0000000E2A359AC1-0000000ED9DDF211\n000000067F0000400200008A5900007E4000-000000067F0000400200008A5900007E8000__0000001C760FA190\n000000067F0000400200008A5900007E4000-000000067F0000400200008A5900007E8000__00000038E67ABFA0\n000000067F0000400200008A5900007E4000-000000067F0000400200008A5900007E8000__0000003903F1CFE8\n000000067F0000400200008A5900007E4000-000000067F0000400200008A5900007E8000__0000003B99F7F8A0\n000000067F0000400200008A5900007E4000-000000067F0000400200008A5900007E8000__0000005D2FFFFB38\n000000067F0000400200008A5900007E4000-000000067F0000400200008A5900007E8000__00000073AD3FE6B8\n000000067F0000400200008A5900007E4000-000000067F0000400200008A5900007E8000__000000914E3F38F0\n000000067F0000400200008A5900007E4000-000000067F0000400200008A5900007E8000__000000931B33AE68\n000000067F0000400200008A5900007E4000-000000067F0000400200008A5900007E8000__000000931B9AFDF8\n000000067F0000400200008A5900007E8000-000000067F0000400200008A5900007EC000__0000001C760FA190\n000000067F0000400200008A5900007E8000-000000067F0000400200008A5900007EC000__00000038E67ABFA0\n000000067F0000400200008A5900007E8000-000000067F0000400200008A5900007EC000__0000003903F1CFE8\n000000067F0000400200008A5900007E8000-000000067F0000400200008A5900007EC000__0000003B99F7F8A0\n000000067F0000400200008A5900007E8000-000000067F0000400200008A5900007EC000__0000005D2FFFFB38\n000000067F0000400200008A5900007E8000-000000067F0000400200008A5900007EC000__00000073AD3FE6B8\n000000067F0000400200008A5900007E8000-000000067F0000400200008A5900007EC000__000000914E3F38F0\n000000067F0000400200008A5900007E8000-000000067F0000400200008A5900007EC000__000000931B33AE68\n000000067F0000400200008A5900007E8000-000000067F0000400200008A5900007EC000__000000931B9AFDF8\n000000067F0000400200008A5900007EC000-000000067F0000400200008A5900007F0000__0000001C760FA190\n000000067F0000400200008A5900007EC000-000000067F0000400200008A5900007F0000__00000038E67ABFA0\n000000067F0000400200008A5900007EC000-000000067F0000400200008A5900007F0000__0000003903F1CFE8\n000000067F0000400200008A5900007EC000-000000067F0000400200008A5900007F0000__0000003B99F7F8A0\n000000067F0000400200008A5900007EC000-000000067F0000400200008A5900007F0000__0000005D2FFFFB38\n000000067F0000400200008A5900007EC000-000000067F0000400200008A5900007F0000__00000073AD3FE6B8\n000000067F0000400200008A5900007EC000-000000067F0000400200008A5900007F0000__000000914E3F38F0\n000000067F0000400200008A5900007EC000-000000067F0000400200008A5900007F0000__000000931B33AE68\n000000067F0000400200008A5900007EC000-000000067F0000400200008A5900007F0000__000000931B9AFDF8\n000000067F0000400200008A5900007EC53A-000000067F0000400200008A5900007F4F2A__0000000E2A359AC1-0000000ED9DDF211\n000000067F0000400200008A5900007F0000-000000067F0000400200008A5900007F4000__0000001C760FA190\n000000067F0000400200008A5900007F0000-000000067F0000400200008A5900007F4000__00000038E67ABFA0\n000000067F0000400200008A5900007F0000-000000067F0000400200008A5900007F4000__0000003903F1CFE8\n000000067F0000400200008A5900007F0000-000000067F0000400200008A5900007F4000__0000003B99F7F8A0\n000000067F0000400200008A5900007F0000-000000067F0000400200008A5900007F4000__0000005D2FFFFB38\n000000067F0000400200008A5900007F0000-000000067F0000400200008A5900007F4000__00000073AD3FE6B8\n000000067F0000400200008A5900007F0000-000000067F0000400200008A5900007F4000__000000914E3F38F0\n000000067F0000400200008A5900007F0000-000000067F0000400200008A5900007F4000__000000931B33AE68\n000000067F0000400200008A5900007F0000-000000067F0000400200008A5900007F4000__000000931B9AFDF8\n000000067F0000400200008A5900007F4000-000000067F0000400200008A5900007F8000__0000001C760FA190\n000000067F0000400200008A5900007F4000-000000067F0000400200008A5900007F8000__00000038E67ABFA0\n000000067F0000400200008A5900007F4000-000000067F0000400200008A5900007F8000__0000003903F1CFE8\n000000067F0000400200008A5900007F4000-000000067F0000400200008A5900007F8000__0000003B99F7F8A0\n000000067F0000400200008A5900007F4000-000000067F0000400200008A5900007F8000__0000005D2FFFFB38\n000000067F0000400200008A5900007F4000-000000067F0000400200008A5900007F8000__00000073AD3FE6B8\n000000067F0000400200008A5900007F4000-000000067F0000400200008A5900007F8000__000000914E3F38F0\n000000067F0000400200008A5900007F4000-000000067F0000400200008A5900007F8000__000000931B33AE68\n000000067F0000400200008A5900007F4000-000000067F0000400200008A5900007F8000__000000931B9AFDF8\n000000067F0000400200008A5900007F4F2A-000000067F0000400200008A5900007FD903__0000000E2A359AC1-0000000ED9DDF211\n000000067F0000400200008A5900007F8000-000000067F0000400200008A5900007FC000__0000001C760FA190\n000000067F0000400200008A5900007F8000-000000067F0000400200008A5900007FC000__00000038E67ABFA0\n000000067F0000400200008A5900007F8000-000000067F0000400200008A5900007FC000__0000003903F1CFE8\n000000067F0000400200008A5900007F8000-000000067F0000400200008A5900007FC000__0000003B99F7F8A0\n000000067F0000400200008A5900007F8000-000000067F0000400200008A5900007FC000__0000005D2FFFFB38\n000000067F0000400200008A5900007F8000-000000067F0000400200008A5900007FC000__00000073AD3FE6B8\n000000067F0000400200008A5900007F8000-000000067F0000400200008A5900007FC000__000000914E3F38F0\n000000067F0000400200008A5900007F8000-000000067F0000400200008A5900007FC000__000000931B33AE68\n000000067F0000400200008A5900007F8000-000000067F0000400200008A5900007FC000__000000931B9AFDF8\n000000067F0000400200008A5900007FC000-000000067F0000400200008A590000800000__0000001C760FA190\n000000067F0000400200008A5900007FC000-000000067F0000400200008A590000800000__00000038E67ABFA0\n000000067F0000400200008A5900007FC000-000000067F0000400200008A590000800000__0000003903F1CFE8\n000000067F0000400200008A5900007FC000-000000067F0000400200008A590000800000__0000003B99F7F8A0\n000000067F0000400200008A5900007FC000-000000067F0000400200008A590000800000__0000005D2FFFFB38\n000000067F0000400200008A5900007FC000-000000067F0000400200008A590000800000__00000073AD3FE6B8\n000000067F0000400200008A5900007FC000-000000067F0000400200008A590000800000__000000914E3F38F0\n000000067F0000400200008A5900007FC000-000000067F0000400200008A590000800000__000000931B33AE68\n000000067F0000400200008A5900007FC000-000000067F0000400200008A590000800000__000000931B9AFDF8\n000000067F0000400200008A5900007FD903-000000067F0000400200008A5900008062D2__0000000E2A359AC1-0000000ED9DDF211\n000000067F0000400200008A590000800000-000000067F0000400200008A590000804000__0000001C760FA190\n000000067F0000400200008A590000800000-000000067F0000400200008A590000804000__00000038E67ABFA0\n000000067F0000400200008A590000800000-000000067F0000400200008A590000804000__0000003903F1CFE8\n000000067F0000400200008A590000800000-000000067F0000400200008A590000804000__0000003B99F7F8A0\n000000067F0000400200008A590000800000-000000067F0000400200008A590000804000__0000005D2FFFFB38\n000000067F0000400200008A590000800000-000000067F0000400200008A590000804000__00000073AD3FE6B8\n000000067F0000400200008A590000800000-000000067F0000400200008A590000804000__000000914E3F38F0\n000000067F0000400200008A590000800000-000000067F0000400200008A590000804000__000000931B33AE68\n000000067F0000400200008A590000800000-000000067F0000400200008A590000804000__000000931B9AFDF8\n000000067F0000400200008A590000804000-000000067F0000400200008A590000808000__0000001C725A2400\n000000067F0000400200008A590000804000-000000067F0000400200008A590000808000__0000001C760FA190\n000000067F0000400200008A590000804000-000000067F0000400200008A590000808000__00000038E67ABFA0\n000000067F0000400200008A590000804000-000000067F0000400200008A590000808000__0000003903F1CFE8\n000000067F0000400200008A590000804000-000000067F0000400200008A590000808000__0000003B99F7F8A0\n000000067F0000400200008A590000804000-000000067F0000400200008A590000808000__0000005D2FFFFB38\n000000067F0000400200008A590000804000-000000067F0000400200008A590000808000__00000073AD3FE6B8\n000000067F0000400200008A590000804000-000000067F0000400200008A590000808000__000000914E3F38F0\n000000067F0000400200008A590000804000-000000067F0000400200008A590000808000__000000931B33AE68\n000000067F0000400200008A590000804000-000000067F0000400200008A590000808000__000000931B9AFDF8\n000000067F0000400200008A5900008062D2-000000067F0000400200008A590100000000__0000000E2A359AC1-0000000ED9DDF211\n000000067F0000400200008A5900008065DC-000000067F0000400200008A59000080EFB7__0000000ED9DDF211-0000000F8985D279\n000000067F0000400200008A590000808000-000000067F0000400200008A59000080C000__0000001C725A2400\n000000067F0000400200008A590000808000-000000067F0000400200008A59000080C000__0000001C760FA190\n000000067F0000400200008A590000808000-000000067F0000400200008A59000080C000__00000038E67ABFA0\n000000067F0000400200008A590000808000-000000067F0000400200008A59000080C000__0000003903F1CFE8\n000000067F0000400200008A590000808000-000000067F0000400200008A59000080C000__0000003B99F7F8A0\n000000067F0000400200008A590000808000-000000067F0000400200008A59000080C000__0000005D2FFFFB38\n000000067F0000400200008A590000808000-000000067F0000400200008A59000080C000__00000073AD3FE6B8\n000000067F0000400200008A590000808000-000000067F0000400200008A59000080C000__000000914E3F38F0\n000000067F0000400200008A590000808000-000000067F0000400200008A59000080C000__000000931B33AE68\n000000067F0000400200008A590000808000-000000067F0000400200008A59000080C000__000000931B9AFDF8\n000000067F0000400200008A59000080C000-000000067F0000400200008A590000810000__0000001C725A2400\n000000067F0000400200008A59000080C000-000000067F0000400200008A590000810000__0000001C760FA190\n000000067F0000400200008A59000080C000-000000067F0000400200008A590000810000__00000038E67ABFA0\n000000067F0000400200008A59000080C000-000000067F0000400200008A590000810000__0000003903F1CFE8\n000000067F0000400200008A59000080C000-000000067F0000400200008A590000810000__0000003B99F7F8A0\n000000067F0000400200008A59000080C000-000000067F0000400200008A590000810000__0000005D2FFFFB38\n000000067F0000400200008A59000080C000-000000067F0000400200008A590000810000__00000073AD3FE6B8\n000000067F0000400200008A59000080C000-000000067F0000400200008A590000810000__000000914E3F38F0\n000000067F0000400200008A59000080C000-000000067F0000400200008A590000810000__000000931B33AE68\n000000067F0000400200008A59000080C000-000000067F0000400200008A590000810000__000000931B9AFDF8\n000000067F0000400200008A59000080EFB7-000000067F0000400200008A590000817999__0000000ED9DDF211-0000000F8985D279\n000000067F0000400200008A590000810000-000000067F0000400200008A590000814000__0000001C725A2400\n000000067F0000400200008A590000810000-000000067F0000400200008A590000814000__0000001C760FA190\n000000067F0000400200008A590000810000-000000067F0000400200008A590000814000__00000038E67ABFA0\n000000067F0000400200008A590000810000-000000067F0000400200008A590000814000__0000003903F1CFE8\n000000067F0000400200008A590000810000-000000067F0000400200008A590000814000__0000003B99F7F8A0\n000000067F0000400200008A590000810000-000000067F0000400200008A590000814000__0000005D2FFFFB38\n000000067F0000400200008A590000810000-000000067F0000400200008A590000814000__00000073AD3FE6B8\n000000067F0000400200008A590000810000-000000067F0000400200008A590000814000__000000914E3F38F0\n000000067F0000400200008A590000810000-000000067F0000400200008A590000814000__000000931B33AE68\n000000067F0000400200008A590000810000-000000067F0000400200008A590000814000__000000931B9AFDF8\n000000067F0000400200008A590000814000-000000067F0000400200008A590000818000__0000001C725A2400\n000000067F0000400200008A590000814000-000000067F0000400200008A590000818000__0000001C760FA190\n000000067F0000400200008A590000814000-000000067F0000400200008A590000818000__00000038E67ABFA0\n000000067F0000400200008A590000814000-000000067F0000400200008A590000818000__0000003903F1CFE8\n000000067F0000400200008A590000814000-000000067F0000400200008A590000818000__0000003B99F7F8A0\n000000067F0000400200008A590000814000-000000067F0000400200008A590000818000__0000005D2FFFFB38\n000000067F0000400200008A590000814000-000000067F0000400200008A590000818000__00000073AD3FE6B8\n000000067F0000400200008A590000814000-000000067F0000400200008A590000818000__000000914E3F38F0\n000000067F0000400200008A590000814000-000000067F0000400200008A590000818000__000000931B33AE68\n000000067F0000400200008A590000814000-000000067F0000400200008A590000818000__000000931B9AFDF8\n000000067F0000400200008A590000817999-000000067F0000400200008A59000082037D__0000000ED9DDF211-0000000F8985D279\n000000067F0000400200008A590000818000-000000067F0000400200008A59000081C000__0000001C725A2400\n000000067F0000400200008A590000818000-000000067F0000400200008A59000081C000__0000001C760FA190\n000000067F0000400200008A590000818000-000000067F0000400200008A59000081C000__00000038E67ABFA0\n000000067F0000400200008A590000818000-000000067F0000400200008A59000081C000__0000003903F1CFE8\n000000067F0000400200008A590000818000-000000067F0000400200008A59000081C000__0000003B99F7F8A0\n000000067F0000400200008A590000818000-000000067F0000400200008A59000081C000__0000005D2FFFFB38\n000000067F0000400200008A590000818000-000000067F0000400200008A59000081C000__00000073AD3FE6B8\n000000067F0000400200008A590000818000-000000067F0000400200008A59000081C000__000000914E3F38F0\n000000067F0000400200008A590000818000-000000067F0000400200008A59000081C000__000000931B33AE68\n000000067F0000400200008A590000818000-000000067F0000400200008A59000081C000__000000931B9AFDF8\n000000067F0000400200008A59000081C000-000000067F0000400200008A590000820000__0000001C725A2400\n000000067F0000400200008A59000081C000-000000067F0000400200008A590000820000__0000001C760FA190\n000000067F0000400200008A59000081C000-000000067F0000400200008A590000820000__00000038E67ABFA0\n000000067F0000400200008A59000081C000-000000067F0000400200008A590000820000__0000003903F1CFE8\n000000067F0000400200008A59000081C000-000000067F0000400200008A590000820000__0000003B99F7F8A0\n000000067F0000400200008A59000081C000-000000067F0000400200008A590000820000__0000005D2FFFFB38\n000000067F0000400200008A59000081C000-000000067F0000400200008A590000820000__00000073AD3FE6B8\n000000067F0000400200008A59000081C000-000000067F0000400200008A590000820000__000000914E3F38F0\n000000067F0000400200008A59000081C000-000000067F0000400200008A590000820000__000000931B33AE68\n000000067F0000400200008A59000081C000-000000067F0000400200008A590000820000__000000931B9AFDF8\n000000067F0000400200008A590000820000-000000067F0000400200008A590000824000__0000001C725A2400\n000000067F0000400200008A590000820000-000000067F0000400200008A590000824000__0000001C760FA190\n000000067F0000400200008A590000820000-000000067F0000400200008A590000824000__00000038E67ABFA0\n000000067F0000400200008A590000820000-000000067F0000400200008A590000824000__0000003903F1CFE8\n000000067F0000400200008A590000820000-000000067F0000400200008A590000824000__0000003B99F7F8A0\n000000067F0000400200008A590000820000-000000067F0000400200008A590000824000__0000005D2FFFFB38\n000000067F0000400200008A590000820000-000000067F0000400200008A590000824000__00000073AD3FE6B8\n000000067F0000400200008A590000820000-000000067F0000400200008A590000824000__000000914E3F38F0\n000000067F0000400200008A590000820000-000000067F0000400200008A590000824000__000000931B33AE68\n000000067F0000400200008A590000820000-000000067F0000400200008A590000824000__000000931B9AFDF8\n000000067F0000400200008A59000082037D-000000067F0000400200008A590000828D52__0000000ED9DDF211-0000000F8985D279\n000000067F0000400200008A590000824000-000000067F0000400200008A590000828000__0000001C725A2400\n000000067F0000400200008A590000824000-000000067F0000400200008A590000828000__0000001C760FA190\n000000067F0000400200008A590000824000-000000067F0000400200008A590000828000__00000038E67ABFA0\n000000067F0000400200008A590000824000-000000067F0000400200008A590000828000__0000003903F1CFE8\n000000067F0000400200008A590000824000-000000067F0000400200008A590000828000__0000003B99F7F8A0\n000000067F0000400200008A590000824000-000000067F0000400200008A590000828000__0000005D2FFFFB38\n000000067F0000400200008A590000824000-000000067F0000400200008A590000828000__00000073AD3FE6B8\n000000067F0000400200008A590000824000-000000067F0000400200008A590000828000__000000914E3F38F0\n000000067F0000400200008A590000824000-000000067F0000400200008A590000828000__000000931B33AE68\n000000067F0000400200008A590000824000-000000067F0000400200008A590000828000__000000931B9AFDF8\n000000067F0000400200008A590000828000-000000067F0000400200008A59000082C000__0000001C725A2400\n000000067F0000400200008A590000828000-000000067F0000400200008A59000082C000__0000001C760FA190\n000000067F0000400200008A590000828000-000000067F0000400200008A59000082C000__00000038E67ABFA0\n000000067F0000400200008A590000828000-000000067F0000400200008A59000082C000__0000003903F1CFE8\n000000067F0000400200008A590000828000-000000067F0000400200008A59000082C000__0000003B99F7F8A0\n000000067F0000400200008A590000828000-000000067F0000400200008A59000082C000__0000005D2FFFFB38\n000000067F0000400200008A590000828000-000000067F0000400200008A59000082C000__00000073AD3FE6B8\n000000067F0000400200008A590000828000-000000067F0000400200008A59000082C000__000000914E3F38F0\n000000067F0000400200008A590000828000-000000067F0000400200008A59000082C000__000000931B33AE68\n000000067F0000400200008A590000828000-000000067F0000400200008A59000082C000__000000931B9AFDF8\n000000067F0000400200008A590000828D52-000000067F0000400200008A590000831734__0000000ED9DDF211-0000000F8985D279\n000000067F0000400200008A59000082C000-000000067F0000400200008A590000830000__0000001C725A2400\n000000067F0000400200008A59000082C000-000000067F0000400200008A590000830000__0000001C760FA190\n000000067F0000400200008A59000082C000-000000067F0000400200008A590000830000__00000038E67ABFA0\n000000067F0000400200008A59000082C000-000000067F0000400200008A590000830000__0000003903F1CFE8\n000000067F0000400200008A59000082C000-000000067F0000400200008A590000830000__0000003B99F7F8A0\n000000067F0000400200008A59000082C000-000000067F0000400200008A590000830000__0000005D2FFFFB38\n000000067F0000400200008A59000082C000-000000067F0000400200008A590000830000__00000073AD3FE6B8\n000000067F0000400200008A59000082C000-000000067F0000400200008A590000830000__000000914E3F38F0\n000000067F0000400200008A59000082C000-000000067F0000400200008A590000830000__000000931B33AE68\n000000067F0000400200008A59000082C000-000000067F0000400200008A590000830000__000000931B9AFDF8\n000000067F0000400200008A590000830000-000000067F0000400200008A590000834000__0000001C725A2400\n000000067F0000400200008A590000830000-000000067F0000400200008A590000834000__0000001C760FA190\n000000067F0000400200008A590000830000-000000067F0000400200008A590000834000__00000038E67ABFA0\n000000067F0000400200008A590000830000-000000067F0000400200008A590000834000__0000003903F1CFE8\n000000067F0000400200008A590000830000-000000067F0000400200008A590000834000__0000003B99F7F8A0\n000000067F0000400200008A590000830000-000000067F0000400200008A590000834000__0000005D2FFFFB38\n000000067F0000400200008A590000830000-000000067F0000400200008A590000834000__00000073AD3FE6B8\n000000067F0000400200008A590000830000-000000067F0000400200008A590000834000__000000914E3F38F0\n000000067F0000400200008A590000830000-000000067F0000400200008A590000834000__000000931B33AE68\n000000067F0000400200008A590000830000-000000067F0000400200008A590000834000__000000931B9AFDF8\n000000067F0000400200008A590000831734-000000067F0000400200008A59000083A114__0000000ED9DDF211-0000000F8985D279\n000000067F0000400200008A590000834000-000000067F0000400200008A590000838000__0000001C725A2400\n000000067F0000400200008A590000834000-000000067F0000400200008A590000838000__0000001C760FA190\n000000067F0000400200008A590000834000-000000067F0000400200008A590000838000__00000038E67ABFA0\n000000067F0000400200008A590000834000-000000067F0000400200008A590000838000__0000003903F1CFE8\n000000067F0000400200008A590000834000-000000067F0000400200008A590000838000__0000003B99F7F8A0\n000000067F0000400200008A590000834000-000000067F0000400200008A590000838000__0000005D2FFFFB38\n000000067F0000400200008A590000834000-000000067F0000400200008A590000838000__00000073AD3FE6B8\n000000067F0000400200008A590000834000-000000067F0000400200008A590000838000__000000914E3F38F0\n000000067F0000400200008A590000834000-000000067F0000400200008A590000838000__000000931B33AE68\n000000067F0000400200008A590000834000-000000067F0000400200008A590000838000__000000931B9AFDF8\n000000067F0000400200008A590000838000-000000067F0000400200008A59000083C000__0000001C725A2400\n000000067F0000400200008A590000838000-000000067F0000400200008A59000083C000__0000001C760FA190\n000000067F0000400200008A590000838000-000000067F0000400200008A59000083C000__00000038E67ABFA0\n000000067F0000400200008A590000838000-000000067F0000400200008A59000083C000__0000003903F1CFE8\n000000067F0000400200008A590000838000-000000067F0000400200008A59000083C000__0000003B99F7F8A0\n000000067F0000400200008A590000838000-000000067F0000400200008A59000083C000__0000005D2FFFFB38\n000000067F0000400200008A590000838000-000000067F0000400200008A59000083C000__00000073AD3FE6B8\n000000067F0000400200008A590000838000-000000067F0000400200008A59000083C000__000000914E3F38F0\n000000067F0000400200008A590000838000-000000067F0000400200008A59000083C000__000000931B33AE68\n000000067F0000400200008A590000838000-000000067F0000400200008A59000083C000__000000931B9AFDF8\n000000067F0000400200008A59000083A114-000000067F0000400200008A590000842AE0__0000000ED9DDF211-0000000F8985D279\n000000067F0000400200008A59000083C000-000000067F0000400200008A590000840000__0000001C725A2400\n000000067F0000400200008A59000083C000-000000067F0000400200008A590000840000__0000001C760FA190\n000000067F0000400200008A59000083C000-000000067F0000400200008A590000840000__00000038E67ABFA0\n000000067F0000400200008A59000083C000-000000067F0000400200008A590000840000__0000003903F1CFE8\n000000067F0000400200008A59000083C000-000000067F0000400200008A590000840000__0000003B99F7F8A0\n000000067F0000400200008A59000083C000-000000067F0000400200008A590000840000__0000005D2FFFFB38\n000000067F0000400200008A59000083C000-000000067F0000400200008A590000840000__00000073AD3FE6B8\n000000067F0000400200008A59000083C000-000000067F0000400200008A590000840000__000000914E3F38F0\n000000067F0000400200008A59000083C000-000000067F0000400200008A590000840000__000000931B33AE68\n000000067F0000400200008A59000083C000-000000067F0000400200008A590000840000__000000931B9AFDF8\n000000067F0000400200008A590000840000-000000067F0000400200008A590000844000__0000001C725A2400\n000000067F0000400200008A590000840000-000000067F0000400200008A590000844000__0000001C760FA190\n000000067F0000400200008A590000840000-000000067F0000400200008A590000844000__00000038E67ABFA0\n000000067F0000400200008A590000840000-000000067F0000400200008A590000844000__0000003903F1CFE8\n000000067F0000400200008A590000840000-000000067F0000400200008A590000844000__0000003B99F7F8A0\n000000067F0000400200008A590000840000-000000067F0000400200008A590000844000__0000005D2FFFFB38\n000000067F0000400200008A590000840000-000000067F0000400200008A590000844000__00000073AD3FE6B8\n000000067F0000400200008A590000840000-000000067F0000400200008A590000844000__000000914E3F38F0\n000000067F0000400200008A590000840000-000000067F0000400200008A590000844000__000000931B33AE68\n000000067F0000400200008A590000840000-000000067F0000400200008A590000844000__000000931B9AFDF8\n000000067F0000400200008A590000842AE0-000000067F0000400200008A59000084B4C5__0000000ED9DDF211-0000000F8985D279\n000000067F0000400200008A590000844000-000000067F0000400200008A590000848000__0000001C725A2400\n000000067F0000400200008A590000844000-000000067F0000400200008A590000848000__0000001C760FA190\n000000067F0000400200008A590000844000-000000067F0000400200008A590000848000__00000038E67ABFA0\n000000067F0000400200008A590000844000-000000067F0000400200008A590000848000__0000003903F1CFE8\n000000067F0000400200008A590000844000-000000067F0000400200008A590000848000__0000003B99F7F8A0\n000000067F0000400200008A590000844000-000000067F0000400200008A590000848000__0000005D2FFFFB38\n000000067F0000400200008A590000844000-000000067F0000400200008A590000848000__00000073AD3FE6B8\n000000067F0000400200008A590000844000-000000067F0000400200008A590000848000__000000914E3F38F0\n000000067F0000400200008A590000844000-000000067F0000400200008A590000848000__000000931B33AE68\n000000067F0000400200008A590000844000-000000067F0000400200008A590000848000__000000931B9AFDF8\n000000067F0000400200008A590000848000-000000067F0000400200008A59000084C000__0000001C725A2400\n000000067F0000400200008A590000848000-000000067F0000400200008A59000084C000__0000001C760FA190\n000000067F0000400200008A590000848000-000000067F0000400200008A59000084C000__00000038E67ABFA0\n000000067F0000400200008A590000848000-000000067F0000400200008A59000084C000__0000003903F1CFE8\n000000067F0000400200008A590000848000-000000067F0000400200008A59000084C000__0000003B99F7F8A0\n000000067F0000400200008A590000848000-000000067F0000400200008A59000084C000__0000005D2FFFFB38\n000000067F0000400200008A590000848000-000000067F0000400200008A59000084C000__00000073AD3FE6B8\n000000067F0000400200008A590000848000-000000067F0000400200008A59000084C000__000000914E3F38F0\n000000067F0000400200008A590000848000-000000067F0000400200008A59000084C000__000000931B33AE68\n000000067F0000400200008A590000848000-000000067F0000400200008A59000084C000__000000931B9AFDF8\n000000067F0000400200008A59000084B4C5-000000067F0000400200008A590000853EA6__0000000ED9DDF211-0000000F8985D279\n000000067F0000400200008A59000084C000-000000067F0000400200008A590000850000__0000001C725A2400\n000000067F0000400200008A59000084C000-000000067F0000400200008A590000850000__0000001C760FA190\n000000067F0000400200008A59000084C000-000000067F0000400200008A590000850000__00000038E67ABFA0\n000000067F0000400200008A59000084C000-000000067F0000400200008A590000850000__0000003903F1CFE8\n000000067F0000400200008A59000084C000-000000067F0000400200008A590000850000__0000003B99F7F8A0\n000000067F0000400200008A59000084C000-000000067F0000400200008A590000850000__0000005D2FFFFB38\n000000067F0000400200008A59000084C000-000000067F0000400200008A590000850000__00000073AD3FE6B8\n000000067F0000400200008A59000084C000-000000067F0000400200008A590000850000__000000914E3F38F0\n000000067F0000400200008A59000084C000-000000067F0000400200008A590000850000__000000931B33AE68\n000000067F0000400200008A59000084C000-000000067F0000400200008A590000850000__000000931B9AFDF8\n000000067F0000400200008A590000850000-000000067F0000400200008A590000854000__0000001C725A2400\n000000067F0000400200008A590000850000-000000067F0000400200008A590000854000__0000001C760FA190\n000000067F0000400200008A590000850000-000000067F0000400200008A590000854000__00000038E67ABFA0\n000000067F0000400200008A590000850000-000000067F0000400200008A590000854000__0000003903F1CFE8\n000000067F0000400200008A590000850000-000000067F0000400200008A590000854000__0000003B99F7F8A0\n000000067F0000400200008A590000850000-000000067F0000400200008A590000854000__0000005D2FFFFB38\n000000067F0000400200008A590000850000-000000067F0000400200008A590000854000__00000073AD3FE6B8\n000000067F0000400200008A590000850000-000000067F0000400200008A590000854000__000000914E3F38F0\n000000067F0000400200008A590000850000-000000067F0000400200008A590000854000__000000931B33AE68\n000000067F0000400200008A590000850000-000000067F0000400200008A590000854000__000000931B9AFDF8\n000000067F0000400200008A590000853EA6-000000067F0000400200008A59000085C894__0000000ED9DDF211-0000000F8985D279\n000000067F0000400200008A590000854000-000000067F0000400200008A590000858000__0000001C725A2400\n000000067F0000400200008A590000854000-000000067F0000400200008A590000858000__0000001C760FA190\n000000067F0000400200008A590000854000-000000067F0000400200008A590000858000__00000038E67ABFA0\n000000067F0000400200008A590000854000-000000067F0000400200008A590000858000__0000003903F1CFE8\n000000067F0000400200008A590000854000-000000067F0000400200008A590000858000__0000003B99F7F8A0\n000000067F0000400200008A590000854000-000000067F0000400200008A590000858000__0000005D2FFFFB38\n000000067F0000400200008A590000854000-000000067F0000400200008A590000858000__00000073AD3FE6B8\n000000067F0000400200008A590000854000-000000067F0000400200008A590000858000__000000914E3F38F0\n000000067F0000400200008A590000854000-000000067F0000400200008A590000858000__000000931B33AE68\n000000067F0000400200008A590000854000-000000067F0000400200008A590000858000__000000931B9AFDF8\n000000067F0000400200008A590000858000-000000067F0000400200008A59000085C000__0000001C725A2400\n000000067F0000400200008A590000858000-000000067F0000400200008A59000085C000__0000001C760FA190\n000000067F0000400200008A590000858000-000000067F0000400200008A59000085C000__00000038E67ABFA0\n000000067F0000400200008A590000858000-000000067F0000400200008A59000085C000__0000003903F1CFE8\n000000067F0000400200008A590000858000-000000067F0000400200008A59000085C000__0000003B99F7F8A0\n000000067F0000400200008A590000858000-000000067F0000400200008A59000085C000__0000005D2FFFFB38\n000000067F0000400200008A590000858000-000000067F0000400200008A59000085C000__00000073AD3FE6B8\n000000067F0000400200008A590000858000-000000067F0000400200008A59000085C000__000000914E3F38F0\n000000067F0000400200008A590000858000-000000067F0000400200008A59000085C000__000000931B33AE68\n000000067F0000400200008A590000858000-000000067F0000400200008A59000085C000__000000931B9AFDF8\n000000067F0000400200008A59000085C000-000000067F0000400200008A590000860000__0000001C725A2400\n000000067F0000400200008A59000085C000-000000067F0000400200008A590000860000__0000001C760FA190\n000000067F0000400200008A59000085C000-000000067F0000400200008A590000860000__00000038E67ABFA0\n000000067F0000400200008A59000085C000-000000067F0000400200008A590000860000__0000003903F1CFE8\n000000067F0000400200008A59000085C000-000000067F0000400200008A590000860000__0000003B99F7F8A0\n000000067F0000400200008A59000085C000-000000067F0000400200008A590000860000__0000005D2FFFFB38\n000000067F0000400200008A59000085C000-000000067F0000400200008A590000860000__00000073AD3FE6B8\n000000067F0000400200008A59000085C000-000000067F0000400200008A590000860000__000000914E3F38F0\n000000067F0000400200008A59000085C000-000000067F0000400200008A590000860000__000000931B33AE68\n000000067F0000400200008A59000085C000-000000067F0000400200008A590000860000__000000931B9AFDF8\n000000067F0000400200008A59000085C894-000000067F0000400200008A590000865277__0000000ED9DDF211-0000000F8985D279\n000000067F0000400200008A590000860000-000000067F0000400200008A590000864000__0000001C725A2400\n000000067F0000400200008A590000860000-000000067F0000400200008A590000864000__0000001C760FA190\n000000067F0000400200008A590000860000-000000067F0000400200008A590000864000__00000038E67ABFA0\n000000067F0000400200008A590000860000-000000067F0000400200008A590000864000__0000003903F1CFE8\n000000067F0000400200008A590000860000-000000067F0000400200008A590000864000__0000003B99F7F8A0\n000000067F0000400200008A590000860000-000000067F0000400200008A590000864000__0000005D2FFFFB38\n000000067F0000400200008A590000860000-000000067F0000400200008A590000864000__00000073AD3FE6B8\n000000067F0000400200008A590000860000-000000067F0000400200008A590000864000__000000914E3F38F0\n000000067F0000400200008A590000860000-000000067F0000400200008A590000864000__000000931B33AE68\n000000067F0000400200008A590000860000-000000067F0000400200008A590000864000__000000931B9AFDF8\n000000067F0000400200008A590000864000-000000067F0000400200008A590000868000__000000106915EC38\n000000067F0000400200008A590000864000-000000067F0000400200008A590000868000__0000001C760FA190\n000000067F0000400200008A590000864000-000000067F0000400200008A590000868000__00000038E67ABFA0\n000000067F0000400200008A590000864000-000000067F0000400200008A590000868000__0000003903F1CFE8\n000000067F0000400200008A590000864000-000000067F0000400200008A590000868000__0000003B99F7F8A0\n000000067F0000400200008A590000864000-000000067F0000400200008A590000868000__0000005D2FFFFB38\n000000067F0000400200008A590000864000-000000067F0000400200008A590000868000__00000073AD3FE6B8\n000000067F0000400200008A590000864000-000000067F0000400200008A590000868000__000000914E3F38F0\n000000067F0000400200008A590000864000-000000067F0000400200008A590000868000__000000931B33AE68\n000000067F0000400200008A590000864000-000000067F0000400200008A590000868000__000000931B9AFDF8\n000000067F0000400200008A590000865277-000000067F0000400200008A590100000000__0000000ED9DDF211-0000000F8985D279\n000000067F0000400200008A59000086558B-000000067F0000400200008A59000086DF61__0000000F8985D279-00000010392DE3B9\n000000067F0000400200008A590000868000-000000067F0000400200008A59000086C000__000000106915EC38\n000000067F0000400200008A590000868000-000000067F0000400200008A59000086C000__0000001C760FA190\n000000067F0000400200008A590000868000-000000067F0000400200008A59000086C000__00000038E67ABFA0\n000000067F0000400200008A590000868000-000000067F0000400200008A59000086C000__0000003903F1CFE8\n000000067F0000400200008A590000868000-000000067F0000400200008A59000086C000__0000003B99F7F8A0\n000000067F0000400200008A590000868000-000000067F0000400200008A59000086C000__0000005D2FFFFB38\n000000067F0000400200008A590000868000-000000067F0000400200008A59000086C000__00000073AD3FE6B8\n000000067F0000400200008A590000868000-000000067F0000400200008A59000086C000__000000914E3F38F0\n000000067F0000400200008A590000868000-000000067F0000400200008A59000086C000__000000931B33AE68\n000000067F0000400200008A590000868000-000000067F0000400200008A59000086C000__000000931B9AFDF8\n000000067F0000400200008A59000086C000-000000067F0000400200008A590000870000__000000106915EC38\n000000067F0000400200008A59000086C000-000000067F0000400200008A590000870000__0000001C760FA190\n000000067F0000400200008A59000086C000-000000067F0000400200008A590000870000__00000038E67ABFA0\n000000067F0000400200008A59000086C000-000000067F0000400200008A590000870000__0000003903F1CFE8\n000000067F0000400200008A59000086C000-000000067F0000400200008A590000870000__0000003B99F7F8A0\n000000067F0000400200008A59000086C000-000000067F0000400200008A590000870000__0000005D2FFFFB38\n000000067F0000400200008A59000086C000-000000067F0000400200008A590000870000__00000073AD3FE6B8\n000000067F0000400200008A59000086C000-000000067F0000400200008A590000870000__000000914E3F38F0\n000000067F0000400200008A59000086C000-000000067F0000400200008A590000870000__000000931B33AE68\n000000067F0000400200008A59000086C000-000000067F0000400200008A590000870000__000000931B9AFDF8\n000000067F0000400200008A59000086DF61-000000067F0000400200008A59000087693A__0000000F8985D279-00000010392DE3B9\n000000067F0000400200008A590000870000-000000067F0000400200008A590000874000__000000106915EC38\n000000067F0000400200008A590000870000-000000067F0000400200008A590000874000__0000001C760FA190\n000000067F0000400200008A590000870000-000000067F0000400200008A590000874000__00000038E67ABFA0\n000000067F0000400200008A590000870000-000000067F0000400200008A590000874000__0000003903F1CFE8\n000000067F0000400200008A590000870000-000000067F0000400200008A590000874000__0000003B99F7F8A0\n000000067F0000400200008A590000870000-000000067F0000400200008A590000874000__0000005D2FFFFB38\n000000067F0000400200008A590000870000-000000067F0000400200008A590000874000__00000073AD3FE6B8\n000000067F0000400200008A590000870000-000000067F0000400200008A590000874000__000000914E3F38F0\n000000067F0000400200008A590000870000-000000067F0000400200008A590000874000__000000931B33AE68\n000000067F0000400200008A590000870000-000000067F0000400200008A590000874000__000000931B9AFDF8\n000000067F0000400200008A590000874000-000000067F0000400200008A590000878000__000000106915EC38\n000000067F0000400200008A590000874000-000000067F0000400200008A590000878000__0000001C760FA190\n000000067F0000400200008A590000874000-000000067F0000400200008A590000878000__00000038E67ABFA0\n000000067F0000400200008A590000874000-000000067F0000400200008A590000878000__0000003903F1CFE8\n000000067F0000400200008A590000874000-000000067F0000400200008A590000878000__0000003B99F7F8A0\n000000067F0000400200008A590000874000-000000067F0000400200008A590000878000__0000005D2FFFFB38\n000000067F0000400200008A590000874000-000000067F0000400200008A590000878000__00000073AD3FE6B8\n000000067F0000400200008A590000874000-000000067F0000400200008A590000878000__000000914E3F38F0\n000000067F0000400200008A590000874000-000000067F0000400200008A590000878000__000000931B33AE68\n000000067F0000400200008A590000874000-000000067F0000400200008A590000878000__000000931B9AFDF8\n000000067F0000400200008A59000087693A-000000067F0000400200008A59000087F311__0000000F8985D279-00000010392DE3B9\n000000067F0000400200008A590000878000-000000067F0000400200008A59000087C000__000000106915EC38\n000000067F0000400200008A590000878000-000000067F0000400200008A59000087C000__0000001C760FA190\n000000067F0000400200008A590000878000-000000067F0000400200008A59000087C000__00000038E67ABFA0\n000000067F0000400200008A590000878000-000000067F0000400200008A59000087C000__0000003903F1CFE8\n000000067F0000400200008A590000878000-000000067F0000400200008A59000087C000__0000003B99F7F8A0\n000000067F0000400200008A590000878000-000000067F0000400200008A59000087C000__0000005D2FFFFB38\n000000067F0000400200008A590000878000-000000067F0000400200008A59000087C000__00000073AD3FE6B8\n000000067F0000400200008A590000878000-000000067F0000400200008A59000087C000__000000914E3F38F0\n000000067F0000400200008A590000878000-000000067F0000400200008A59000087C000__000000931B33AE68\n000000067F0000400200008A590000878000-000000067F0000400200008A59000087C000__000000931B9AFDF8\n000000067F0000400200008A59000087C000-000000067F0000400200008A590000880000__000000106915EC38\n000000067F0000400200008A59000087C000-000000067F0000400200008A590000880000__0000001C760FA190\n000000067F0000400200008A59000087C000-000000067F0000400200008A590000880000__00000038E67ABFA0\n000000067F0000400200008A59000087C000-000000067F0000400200008A590000880000__0000003903F1CFE8\n000000067F0000400200008A59000087C000-000000067F0000400200008A590000880000__0000003B99F7F8A0\n000000067F0000400200008A59000087C000-000000067F0000400200008A590000880000__0000005D2FFFFB38\n000000067F0000400200008A59000087C000-000000067F0000400200008A590000880000__00000073AD3FE6B8\n000000067F0000400200008A59000087C000-000000067F0000400200008A590000880000__000000914E3F38F0\n000000067F0000400200008A59000087C000-000000067F0000400200008A590000880000__000000931B33AE68\n000000067F0000400200008A59000087C000-000000067F0000400200008A590000880000__000000931B9AFDF8\n000000067F0000400200008A59000087F311-000000067F0000400200008A590000887CE5__0000000F8985D279-00000010392DE3B9\n000000067F0000400200008A590000880000-000000067F0000400200008A590000884000__000000106915EC38\n000000067F0000400200008A590000880000-000000067F0000400200008A590000884000__0000001C760FA190\n000000067F0000400200008A590000880000-000000067F0000400200008A590000884000__00000038E67ABFA0\n000000067F0000400200008A590000880000-000000067F0000400200008A590000884000__0000003903F1CFE8\n000000067F0000400200008A590000880000-000000067F0000400200008A590000884000__0000003B99F7F8A0\n000000067F0000400200008A590000880000-000000067F0000400200008A590000884000__0000005D2FFFFB38\n000000067F0000400200008A590000880000-000000067F0000400200008A590000884000__00000073AD3FE6B8\n000000067F0000400200008A590000880000-000000067F0000400200008A590000884000__000000914E3F38F0\n000000067F0000400200008A590000880000-000000067F0000400200008A590000884000__000000931B33AE68\n000000067F0000400200008A590000880000-000000067F0000400200008A590000884000__000000931B9AFDF8\n000000067F0000400200008A590000884000-000000067F0000400200008A590000888000__000000106915EC38\n000000067F0000400200008A590000884000-000000067F0000400200008A590000888000__0000001C760FA190\n000000067F0000400200008A590000884000-000000067F0000400200008A590000888000__00000038E67ABFA0\n000000067F0000400200008A590000884000-000000067F0000400200008A590000888000__0000003903F1CFE8\n000000067F0000400200008A590000884000-000000067F0000400200008A590000888000__0000003B99F7F8A0\n000000067F0000400200008A590000884000-000000067F0000400200008A590000888000__0000005D2FFFFB38\n000000067F0000400200008A590000884000-000000067F0000400200008A590000888000__00000073AD3FE6B8\n000000067F0000400200008A590000884000-000000067F0000400200008A590000888000__000000914E3F38F0\n000000067F0000400200008A590000884000-000000067F0000400200008A590000888000__000000931B33AE68\n000000067F0000400200008A590000884000-000000067F0000400200008A590000888000__000000931B9AFDF8\n000000067F0000400200008A590000887CE5-000000067F0000400200008A5900008906C5__0000000F8985D279-00000010392DE3B9\n000000067F0000400200008A590000888000-000000067F0000400200008A59000088C000__000000106915EC38\n000000067F0000400200008A590000888000-000000067F0000400200008A59000088C000__0000001C760FA190\n000000067F0000400200008A590000888000-000000067F0000400200008A59000088C000__00000038E67ABFA0\n000000067F0000400200008A590000888000-000000067F0000400200008A59000088C000__0000003903F1CFE8\n000000067F0000400200008A590000888000-000000067F0000400200008A59000088C000__0000003B99F7F8A0\n000000067F0000400200008A590000888000-000000067F0000400200008A59000088C000__0000005D2FFFFB38\n000000067F0000400200008A590000888000-000000067F0000400200008A59000088C000__00000073AD3FE6B8\n000000067F0000400200008A590000888000-000000067F0000400200008A59000088C000__000000914E3F38F0\n000000067F0000400200008A590000888000-000000067F0000400200008A59000088C000__000000931B33AE68\n000000067F0000400200008A590000888000-000000067F0000400200008A59000088C000__000000931B9AFDF8\n000000067F0000400200008A59000088C000-000000067F0000400200008A590000890000__000000106915EC38\n000000067F0000400200008A59000088C000-000000067F0000400200008A590000890000__0000001C760FA190\n000000067F0000400200008A59000088C000-000000067F0000400200008A590000890000__00000038E67ABFA0\n000000067F0000400200008A59000088C000-000000067F0000400200008A590000890000__0000003903F1CFE8\n000000067F0000400200008A59000088C000-000000067F0000400200008A590000890000__0000003B99F7F8A0\n000000067F0000400200008A59000088C000-000000067F0000400200008A590000890000__0000005D2FFFFB38\n000000067F0000400200008A59000088C000-000000067F0000400200008A590000890000__00000073AD3FE6B8\n000000067F0000400200008A59000088C000-000000067F0000400200008A590000890000__000000914E3F38F0\n000000067F0000400200008A59000088C000-000000067F0000400200008A590000890000__000000931B33AE68\n000000067F0000400200008A59000088C000-000000067F0000400200008A590000890000__000000931B9AFDF8\n000000067F0000400200008A590000890000-000000067F0000400200008A590000894000__000000106915EC38\n000000067F0000400200008A590000890000-000000067F0000400200008A590000894000__0000001C760FA190\n000000067F0000400200008A590000890000-000000067F0000400200008A590000894000__00000038E67ABFA0\n000000067F0000400200008A590000890000-000000067F0000400200008A590000894000__0000003903F1CFE8\n000000067F0000400200008A590000890000-000000067F0000400200008A590000894000__0000003B99F7F8A0\n000000067F0000400200008A590000890000-000000067F0000400200008A590000894000__0000005D2FFFFB38\n000000067F0000400200008A590000890000-000000067F0000400200008A590000894000__00000073AD3FE6B8\n000000067F0000400200008A590000890000-000000067F0000400200008A590000894000__000000914E3F38F0\n000000067F0000400200008A590000890000-000000067F0000400200008A590000894000__000000931B33AE68\n000000067F0000400200008A590000890000-000000067F0000400200008A590000894000__000000931B9AFDF8\n000000067F0000400200008A5900008906C5-000000067F0000400200008A5900008990AC__0000000F8985D279-00000010392DE3B9\n000000067F0000400200008A590000894000-000000067F0000400200008A590000898000__000000106915EC38\n000000067F0000400200008A590000894000-000000067F0000400200008A590000898000__0000001C760FA190\n000000067F0000400200008A590000894000-000000067F0000400200008A590000898000__00000038E67ABFA0\n000000067F0000400200008A590000894000-000000067F0000400200008A590000898000__0000003903F1CFE8\n000000067F0000400200008A590000894000-000000067F0000400200008A590000898000__0000003B99F7F8A0\n000000067F0000400200008A590000894000-000000067F0000400200008A590000898000__0000005D2FFFFB38\n000000067F0000400200008A590000894000-000000067F0000400200008A590000898000__00000073AD3FE6B8\n000000067F0000400200008A590000894000-000000067F0000400200008A590000898000__000000914E3F38F0\n000000067F0000400200008A590000894000-000000067F0000400200008A590000898000__000000931B33AE68\n000000067F0000400200008A590000894000-000000067F0000400200008A590000898000__000000931B9AFDF8\n000000067F0000400200008A590000898000-000000067F0000400200008A59000089C000__000000106915EC38\n000000067F0000400200008A590000898000-000000067F0000400200008A59000089C000__0000001C760FA190\n000000067F0000400200008A590000898000-000000067F0000400200008A59000089C000__00000038E67ABFA0\n000000067F0000400200008A590000898000-000000067F0000400200008A59000089C000__0000003903F1CFE8\n000000067F0000400200008A590000898000-000000067F0000400200008A59000089C000__0000003B99F7F8A0\n000000067F0000400200008A590000898000-000000067F0000400200008A59000089C000__0000005D2FFFFB38\n000000067F0000400200008A590000898000-000000067F0000400200008A59000089C000__00000073AD3FE6B8\n000000067F0000400200008A590000898000-000000067F0000400200008A59000089C000__000000914E3F38F0\n000000067F0000400200008A590000898000-000000067F0000400200008A59000089C000__000000931B33AE68\n000000067F0000400200008A590000898000-000000067F0000400200008A59000089C000__000000931B9AFDF8\n000000067F0000400200008A5900008990AC-000000067F0000400200008A5900008A1AA3__0000000F8985D279-00000010392DE3B9\n000000067F0000400200008A59000089C000-000000067F0000400200008A5900008A0000__000000106915EC38\n000000067F0000400200008A59000089C000-000000067F0000400200008A5900008A0000__0000001C760FA190\n000000067F0000400200008A59000089C000-000000067F0000400200008A5900008A0000__00000038E67ABFA0\n000000067F0000400200008A59000089C000-000000067F0000400200008A5900008A0000__0000003903F1CFE8\n000000067F0000400200008A59000089C000-000000067F0000400200008A5900008A0000__0000003B99F7F8A0\n000000067F0000400200008A59000089C000-000000067F0000400200008A5900008A0000__0000005D2FFFFB38\n000000067F0000400200008A59000089C000-000000067F0000400200008A5900008A0000__00000073AD3FE6B8\n000000067F0000400200008A59000089C000-000000067F0000400200008A5900008A0000__000000914E3F38F0\n000000067F0000400200008A59000089C000-000000067F0000400200008A5900008A0000__000000931B33AE68\n000000067F0000400200008A59000089C000-000000067F0000400200008A5900008A0000__000000931B9AFDF8\n000000067F0000400200008A5900008A0000-000000067F0000400200008A5900008A4000__000000106915EC38\n000000067F0000400200008A5900008A0000-000000067F0000400200008A5900008A4000__0000001C760FA190\n000000067F0000400200008A5900008A0000-000000067F0000400200008A5900008A4000__00000038E67ABFA0\n000000067F0000400200008A5900008A0000-000000067F0000400200008A5900008A4000__0000003903F1CFE8\n000000067F0000400200008A5900008A0000-000000067F0000400200008A5900008A4000__0000003B99F7F8A0\n000000067F0000400200008A5900008A0000-000000067F0000400200008A5900008A4000__0000005D2FFFFB38\n000000067F0000400200008A5900008A0000-000000067F0000400200008A5900008A4000__00000073AD3FE6B8\n000000067F0000400200008A5900008A0000-000000067F0000400200008A5900008A4000__000000914E3F38F0\n000000067F0000400200008A5900008A0000-000000067F0000400200008A5900008A4000__000000931B33AE68\n000000067F0000400200008A5900008A0000-000000067F0000400200008A5900008A4000__000000931B9AFDF8\n000000067F0000400200008A5900008A1AA3-000000067F0000400200008A5900008AA478__0000000F8985D279-00000010392DE3B9\n000000067F0000400200008A5900008A4000-000000067F0000400200008A5900008A8000__000000106915EC38\n000000067F0000400200008A5900008A4000-000000067F0000400200008A5900008A8000__0000001C760FA190\n000000067F0000400200008A5900008A4000-000000067F0000400200008A5900008A8000__00000038E67ABFA0\n000000067F0000400200008A5900008A4000-000000067F0000400200008A5900008A8000__0000003903F1CFE8\n000000067F0000400200008A5900008A4000-000000067F0000400200008A5900008A8000__0000003B99F7F8A0\n000000067F0000400200008A5900008A4000-000000067F0000400200008A5900008A8000__0000005D2FFFFB38\n000000067F0000400200008A5900008A4000-000000067F0000400200008A5900008A8000__00000073AD3FE6B8\n000000067F0000400200008A5900008A4000-000000067F0000400200008A5900008A8000__000000914E3F38F0\n000000067F0000400200008A5900008A4000-000000067F0000400200008A5900008A8000__000000931B33AE68\n000000067F0000400200008A5900008A4000-000000067F0000400200008A5900008A8000__000000931B9AFDF8\n000000067F0000400200008A5900008A8000-000000067F0000400200008A5900008AC000__000000106915EC38\n000000067F0000400200008A5900008A8000-000000067F0000400200008A5900008AC000__0000001C760FA190\n000000067F0000400200008A5900008A8000-000000067F0000400200008A5900008AC000__00000038E67ABFA0\n000000067F0000400200008A5900008A8000-000000067F0000400200008A5900008AC000__0000003903F1CFE8\n000000067F0000400200008A5900008A8000-000000067F0000400200008A5900008AC000__0000003B99F7F8A0\n000000067F0000400200008A5900008A8000-000000067F0000400200008A5900008AC000__0000005D2FFFFB38\n000000067F0000400200008A5900008A8000-000000067F0000400200008A5900008AC000__00000073AD3FE6B8\n000000067F0000400200008A5900008A8000-000000067F0000400200008A5900008AC000__000000914E3F38F0\n000000067F0000400200008A5900008A8000-000000067F0000400200008A5900008AC000__000000931B33AE68\n000000067F0000400200008A5900008A8000-000000067F0000400200008A5900008AC000__000000931B9AFDF8\n000000067F0000400200008A5900008AA478-000000067F0000400200008A5900008B2E53__0000000F8985D279-00000010392DE3B9\n000000067F0000400200008A5900008AC000-000000067F0000400200008A5900008B0000__000000106915EC38\n000000067F0000400200008A5900008AC000-000000067F0000400200008A5900008B0000__0000001C760FA190\n000000067F0000400200008A5900008AC000-000000067F0000400200008A5900008B0000__00000038E67ABFA0\n000000067F0000400200008A5900008AC000-000000067F0000400200008A5900008B0000__0000003903F1CFE8\n000000067F0000400200008A5900008AC000-000000067F0000400200008A5900008B0000__0000003B99F7F8A0\n000000067F0000400200008A5900008AC000-000000067F0000400200008A5900008B0000__0000005D2FFFFB38\n000000067F0000400200008A5900008AC000-000000067F0000400200008A5900008B0000__00000073AD3FE6B8\n000000067F0000400200008A5900008AC000-000000067F0000400200008A5900008B0000__000000914E3F38F0\n000000067F0000400200008A5900008AC000-000000067F0000400200008A5900008B0000__000000931B33AE68\n000000067F0000400200008A5900008AC000-000000067F0000400200008A5900008B0000__000000931B9AFDF8\n000000067F0000400200008A5900008B0000-000000067F0000400200008A5900008B4000__000000106915EC38\n000000067F0000400200008A5900008B0000-000000067F0000400200008A5900008B4000__0000001C760FA190\n000000067F0000400200008A5900008B0000-000000067F0000400200008A5900008B4000__00000038E67ABFA0\n000000067F0000400200008A5900008B0000-000000067F0000400200008A5900008B4000__0000003903F1CFE8\n000000067F0000400200008A5900008B0000-000000067F0000400200008A5900008B4000__0000003B99F7F8A0\n000000067F0000400200008A5900008B0000-000000067F0000400200008A5900008B4000__0000005D2FFFFB38\n000000067F0000400200008A5900008B0000-000000067F0000400200008A5900008B4000__00000073AD3FE6B8\n000000067F0000400200008A5900008B0000-000000067F0000400200008A5900008B4000__000000914E3F38F0\n000000067F0000400200008A5900008B0000-000000067F0000400200008A5900008B4000__000000931B33AE68\n000000067F0000400200008A5900008B0000-000000067F0000400200008A5900008B4000__000000931B9AFDF8\n000000067F0000400200008A5900008B2E53-000000067F0000400200008A5900008BB832__0000000F8985D279-00000010392DE3B9\n000000067F0000400200008A5900008B4000-000000067F0000400200008A5900008B8000__000000106915EC38\n000000067F0000400200008A5900008B4000-000000067F0000400200008A5900008B8000__0000001C760FA190\n000000067F0000400200008A5900008B4000-000000067F0000400200008A5900008B8000__00000038E67ABFA0\n000000067F0000400200008A5900008B4000-000000067F0000400200008A5900008B8000__0000003903F1CFE8\n000000067F0000400200008A5900008B4000-000000067F0000400200008A5900008B8000__0000003B99F7F8A0\n000000067F0000400200008A5900008B4000-000000067F0000400200008A5900008B8000__0000005D2FFFFB38\n000000067F0000400200008A5900008B4000-000000067F0000400200008A5900008B8000__00000073AD3FE6B8\n000000067F0000400200008A5900008B4000-000000067F0000400200008A5900008B8000__000000914E3F38F0\n000000067F0000400200008A5900008B4000-000000067F0000400200008A5900008B8000__000000931B33AE68\n000000067F0000400200008A5900008B4000-000000067F0000400200008A5900008B8000__000000931B9AFDF8\n000000067F0000400200008A5900008B8000-000000067F0000400200008A5900008BC000__000000106915EC38\n000000067F0000400200008A5900008B8000-000000067F0000400200008A5900008BC000__0000001C760FA190\n000000067F0000400200008A5900008B8000-000000067F0000400200008A5900008BC000__00000038E67ABFA0\n000000067F0000400200008A5900008B8000-000000067F0000400200008A5900008BC000__0000003903F1CFE8\n000000067F0000400200008A5900008B8000-000000067F0000400200008A5900008BC000__0000003B99F7F8A0\n000000067F0000400200008A5900008B8000-000000067F0000400200008A5900008BC000__0000005D2FFFFB38\n000000067F0000400200008A5900008B8000-000000067F0000400200008A5900008BC000__00000073AD3FE6B8\n000000067F0000400200008A5900008B8000-000000067F0000400200008A5900008BC000__000000914E3F38F0\n000000067F0000400200008A5900008B8000-000000067F0000400200008A5900008BC000__000000931B33AE68\n000000067F0000400200008A5900008B8000-000000067F0000400200008A5900008BC000__000000931B9AFDF8\n000000067F0000400200008A5900008BB832-000000067F0000400200008A5900008C4202__0000000F8985D279-00000010392DE3B9\n000000067F0000400200008A5900008BC000-000000067F0000400200008A5900008C0000__000000106915EC38\n000000067F0000400200008A5900008BC000-000000067F0000400200008A5900008C0000__0000001C760FA190\n000000067F0000400200008A5900008BC000-000000067F0000400200008A5900008C0000__00000038E67ABFA0\n000000067F0000400200008A5900008BC000-000000067F0000400200008A5900008C0000__0000003903F1CFE8\n000000067F0000400200008A5900008BC000-000000067F0000400200008A5900008C0000__0000003B99F7F8A0\n000000067F0000400200008A5900008BC000-000000067F0000400200008A5900008C0000__0000005D2FFFFB38\n000000067F0000400200008A5900008BC000-000000067F0000400200008A5900008C0000__00000073AD3FE6B8\n000000067F0000400200008A5900008BC000-000000067F0000400200008A5900008C0000__000000914E3F38F0\n000000067F0000400200008A5900008BC000-000000067F0000400200008A5900008C0000__000000931B33AE68\n000000067F0000400200008A5900008BC000-000000067F0000400200008A5900008C0000__000000931B9AFDF8\n000000067F0000400200008A5900008C0000-000000067F0000400200008A5900008C4000__000000106915EC38\n000000067F0000400200008A5900008C0000-000000067F0000400200008A5900008C4000__0000001C760FA190\n000000067F0000400200008A5900008C0000-000000067F0000400200008A5900008C4000__00000038E67ABFA0\n000000067F0000400200008A5900008C0000-000000067F0000400200008A5900008C4000__0000003903F1CFE8\n000000067F0000400200008A5900008C0000-000000067F0000400200008A5900008C4000__0000003B99F7F8A0\n000000067F0000400200008A5900008C0000-000000067F0000400200008A5900008C4000__0000005D2FFFFB38\n000000067F0000400200008A5900008C0000-000000067F0000400200008A5900008C4000__00000073AD3FE6B8\n000000067F0000400200008A5900008C0000-000000067F0000400200008A5900008C4000__000000914E3F38F0\n000000067F0000400200008A5900008C0000-000000067F0000400200008A5900008C4000__000000931B33AE68\n000000067F0000400200008A5900008C0000-000000067F0000400200008A5900008C4000__000000931B9AFDF8\n000000067F0000400200008A5900008C4000-000000067F0000400200008A5900008C8000__000000106915EC38\n000000067F0000400200008A5900008C4000-000000067F0000400200008A5900008C8000__0000001C760FA190\n000000067F0000400200008A5900008C4000-000000067F0000400200008A5900008C8000__00000038E67ABFA0\n000000067F0000400200008A5900008C4000-000000067F0000400200008A5900008C8000__0000003903F1CFE8\n000000067F0000400200008A5900008C4000-000000067F0000400200008A5900008C8000__0000003B99F7F8A0\n000000067F0000400200008A5900008C4000-000000067F0000400200008A5900008C8000__0000005D2FFFFB38\n000000067F0000400200008A5900008C4000-000000067F0000400200008A5900008C8000__00000073AD3FE6B8\n000000067F0000400200008A5900008C4000-000000067F0000400200008A5900008C8000__000000914E3F38F0\n000000067F0000400200008A5900008C4000-000000067F0000400200008A5900008C8000__000000931B33AE68\n000000067F0000400200008A5900008C4000-000000067F0000400200008A5900008C8000__000000931B9AFDF8\n000000067F0000400200008A5900008C4202-000000067F0000400200008A590100000000__0000000F8985D279-00000010392DE3B9\n000000067F0000400200008A5900008C4513-000000067F0000400200008A5900008CCEF0__00000010392DE3B9-00000010E8D5E0A1\n000000067F0000400200008A5900008C8000-000000067F0000400200008A5900008CC000__000000106915EC38\n000000067F0000400200008A5900008C8000-000000067F0000400200008A5900008CC000__0000001C760FA190\n000000067F0000400200008A5900008C8000-000000067F0000400200008A5900008CC000__00000038E67ABFA0\n000000067F0000400200008A5900008C8000-000000067F0000400200008A5900008CC000__0000003903F1CFE8\n000000067F0000400200008A5900008C8000-000000067F0000400200008A5900008CC000__0000003B99F7F8A0\n000000067F0000400200008A5900008C8000-000000067F0000400200008A5900008CC000__0000005D2FFFFB38\n000000067F0000400200008A5900008C8000-000000067F0000400200008A5900008CC000__00000073AD3FE6B8\n000000067F0000400200008A5900008C8000-000000067F0000400200008A5900008CC000__000000914E3F38F0\n000000067F0000400200008A5900008C8000-000000067F0000400200008A5900008CC000__000000931B33AE68\n000000067F0000400200008A5900008C8000-000000067F0000400200008A5900008CC000__000000931B9AFDF8\n000000067F0000400200008A5900008CC000-000000067F0000400200008A5900008D0000__000000106915EC38\n000000067F0000400200008A5900008CC000-000000067F0000400200008A5900008D0000__0000001C760FA190\n000000067F0000400200008A5900008CC000-000000067F0000400200008A5900008D0000__00000038E67ABFA0\n000000067F0000400200008A5900008CC000-000000067F0000400200008A5900008D0000__0000003903F1CFE8\n000000067F0000400200008A5900008CC000-000000067F0000400200008A5900008D0000__0000003B99F7F8A0\n000000067F0000400200008A5900008CC000-000000067F0000400200008A5900008D0000__0000005D2FFFFB38\n000000067F0000400200008A5900008CC000-000000067F0000400200008A5900008D0000__00000073AD3FE6B8\n000000067F0000400200008A5900008CC000-000000067F0000400200008A5900008D0000__000000914E3F38F0\n000000067F0000400200008A5900008CC000-000000067F0000400200008A5900008D0000__000000931B33AE68\n000000067F0000400200008A5900008CC000-000000067F0000400200008A5900008D0000__000000931B9AFDF8\n000000067F0000400200008A5900008CCEF0-000000067F0000400200008A5900008D58DC__00000010392DE3B9-00000010E8D5E0A1\n000000067F0000400200008A5900008D0000-000000067F0000400200008A5900008D4000__000000106915EC38\n000000067F0000400200008A5900008D0000-000000067F0000400200008A5900008D4000__0000001C760FA190\n000000067F0000400200008A5900008D0000-000000067F0000400200008A5900008D4000__00000038E67ABFA0\n000000067F0000400200008A5900008D0000-000000067F0000400200008A5900008D4000__0000003903F1CFE8\n000000067F0000400200008A5900008D0000-000000067F0000400200008A5900008D4000__0000003B99F7F8A0\n000000067F0000400200008A5900008D0000-000000067F0000400200008A5900008D4000__0000005D2FFFFB38\n000000067F0000400200008A5900008D0000-000000067F0000400200008A5900008D4000__00000073AD3FE6B8\n000000067F0000400200008A5900008D0000-000000067F0000400200008A5900008D4000__000000914E3F38F0\n000000067F0000400200008A5900008D0000-000000067F0000400200008A5900008D4000__000000931B33AE68\n000000067F0000400200008A5900008D0000-000000067F0000400200008A5900008D4000__000000931B9AFDF8\n000000067F0000400200008A5900008D4000-000000067F0000400200008A5900008D8000__000000106915EC38\n000000067F0000400200008A5900008D4000-000000067F0000400200008A5900008D8000__0000001C760FA190\n000000067F0000400200008A5900008D4000-000000067F0000400200008A5900008D8000__00000038E67ABFA0\n000000067F0000400200008A5900008D4000-000000067F0000400200008A5900008D8000__0000003903F1CFE8\n000000067F0000400200008A5900008D4000-000000067F0000400200008A5900008D8000__0000003B99F7F8A0\n000000067F0000400200008A5900008D4000-000000067F0000400200008A5900008D8000__0000005D2FFFFB38\n000000067F0000400200008A5900008D4000-000000067F0000400200008A5900008D8000__00000073AD3FE6B8\n000000067F0000400200008A5900008D4000-000000067F0000400200008A5900008D8000__000000914E3F38F0\n000000067F0000400200008A5900008D4000-000000067F0000400200008A5900008D8000__000000931B33AE68\n000000067F0000400200008A5900008D4000-000000067F0000400200008A5900008D8000__000000931B9AFDF8\n000000067F0000400200008A5900008D58DC-000000067F0000400200008A5900008DE2CB__00000010392DE3B9-00000010E8D5E0A1\n000000067F0000400200008A5900008D8000-000000067F0000400200008A5900008DC000__000000106915EC38\n000000067F0000400200008A5900008D8000-000000067F0000400200008A5900008DC000__0000001C760FA190\n000000067F0000400200008A5900008D8000-000000067F0000400200008A5900008DC000__00000038E67ABFA0\n000000067F0000400200008A5900008D8000-000000067F0000400200008A5900008DC000__0000003903F1CFE8\n000000067F0000400200008A5900008D8000-000000067F0000400200008A5900008DC000__0000003B99F7F8A0\n000000067F0000400200008A5900008D8000-000000067F0000400200008A5900008DC000__0000005D2FFFFB38\n000000067F0000400200008A5900008D8000-000000067F0000400200008A5900008DC000__00000073AD3FE6B8\n000000067F0000400200008A5900008D8000-000000067F0000400200008A5900008DC000__000000914E3F38F0\n000000067F0000400200008A5900008D8000-000000067F0000400200008A5900008DC000__000000931B33AE68\n000000067F0000400200008A5900008D8000-000000067F0000400200008A5900008DC000__000000931B9AFDF8\n000000067F0000400200008A5900008DC000-000000067F0000400200008A5900008E0000__0000001C760FA190\n000000067F0000400200008A5900008DC000-000000067F0000400200008A5900008E0000__00000038E67ABFA0\n000000067F0000400200008A5900008DC000-000000067F0000400200008A5900008E0000__0000003903F1CFE8\n000000067F0000400200008A5900008DC000-000000067F0000400200008A5900008E0000__0000003B99F7F8A0\n000000067F0000400200008A5900008DC000-000000067F0000400200008A5900008E0000__0000005D2FFFFB38\n000000067F0000400200008A5900008DC000-000000067F0000400200008A5900008E0000__00000073AD3FE6B8\n000000067F0000400200008A5900008DC000-000000067F0000400200008A5900008E0000__000000914E3F38F0\n000000067F0000400200008A5900008DC000-000000067F0000400200008A5900008E0000__000000931B33AE68\n000000067F0000400200008A5900008DC000-000000067F0000400200008A5900008E0000__000000931B9AFDF8\n000000067F0000400200008A5900008DC000-030000000000000000000000000000000002__000000106915EC38\n000000067F0000400200008A5900008DE2CB-000000067F0000400200008A5900008E6C9E__00000010392DE3B9-00000010E8D5E0A1\n000000067F0000400200008A5900008E0000-000000067F0000400200008A5900008E4000__0000001C760FA190\n000000067F0000400200008A5900008E0000-000000067F0000400200008A5900008E4000__00000038E67ABFA0\n000000067F0000400200008A5900008E0000-000000067F0000400200008A5900008E4000__0000003903F1CFE8\n000000067F0000400200008A5900008E0000-000000067F0000400200008A5900008E4000__0000003B99F7F8A0\n000000067F0000400200008A5900008E0000-000000067F0000400200008A5900008E4000__0000005D2FFFFB38\n000000067F0000400200008A5900008E0000-000000067F0000400200008A5900008E4000__00000073AD3FE6B8\n000000067F0000400200008A5900008E0000-000000067F0000400200008A5900008E4000__000000914E3F38F0\n000000067F0000400200008A5900008E0000-000000067F0000400200008A5900008E4000__000000931B33AE68\n000000067F0000400200008A5900008E0000-000000067F0000400200008A5900008E4000__000000931B9AFDF8\n000000067F0000400200008A5900008E4000-000000067F0000400200008A5900008E8000__0000001C760FA190\n000000067F0000400200008A5900008E4000-000000067F0000400200008A5900008E8000__00000038E67ABFA0\n000000067F0000400200008A5900008E4000-000000067F0000400200008A5900008E8000__0000003903F1CFE8\n000000067F0000400200008A5900008E4000-000000067F0000400200008A5900008E8000__0000003B99F7F8A0\n000000067F0000400200008A5900008E4000-000000067F0000400200008A5900008E8000__0000005D2FFFFB38\n000000067F0000400200008A5900008E4000-000000067F0000400200008A5900008E8000__00000073AD3FE6B8\n000000067F0000400200008A5900008E4000-000000067F0000400200008A5900008E8000__000000914E3F38F0\n000000067F0000400200008A5900008E4000-000000067F0000400200008A5900008E8000__000000931B33AE68\n000000067F0000400200008A5900008E4000-000000067F0000400200008A5900008E8000__000000931B9AFDF8\n000000067F0000400200008A5900008E6C9E-000000067F0000400200008A5900008EF67E__00000010392DE3B9-00000010E8D5E0A1\n000000067F0000400200008A5900008E8000-000000067F0000400200008A5900008EC000__0000001C760FA190\n000000067F0000400200008A5900008E8000-000000067F0000400200008A5900008EC000__00000038E67ABFA0\n000000067F0000400200008A5900008E8000-000000067F0000400200008A5900008EC000__0000003903F1CFE8\n000000067F0000400200008A5900008E8000-000000067F0000400200008A5900008EC000__0000003B99F7F8A0\n000000067F0000400200008A5900008E8000-000000067F0000400200008A5900008EC000__0000005D2FFFFB38\n000000067F0000400200008A5900008E8000-000000067F0000400200008A5900008EC000__00000073AD3FE6B8\n000000067F0000400200008A5900008E8000-000000067F0000400200008A5900008EC000__000000914E3F38F0\n000000067F0000400200008A5900008E8000-000000067F0000400200008A5900008EC000__000000931B33AE68\n000000067F0000400200008A5900008E8000-000000067F0000400200008A5900008EC000__000000931B9AFDF8\n000000067F0000400200008A5900008EC000-000000067F0000400200008A5900008F0000__0000001C760FA190\n000000067F0000400200008A5900008EC000-000000067F0000400200008A5900008F0000__00000038E67ABFA0\n000000067F0000400200008A5900008EC000-000000067F0000400200008A5900008F0000__0000003903F1CFE8\n000000067F0000400200008A5900008EC000-000000067F0000400200008A5900008F0000__0000003B99F7F8A0\n000000067F0000400200008A5900008EC000-000000067F0000400200008A5900008F0000__0000005D2FFFFB38\n000000067F0000400200008A5900008EC000-000000067F0000400200008A5900008F0000__00000073AD3FE6B8\n000000067F0000400200008A5900008EC000-000000067F0000400200008A5900008F0000__000000914E3F38F0\n000000067F0000400200008A5900008EC000-000000067F0000400200008A5900008F0000__000000931B33AE68\n000000067F0000400200008A5900008EC000-000000067F0000400200008A5900008F0000__000000931B9AFDF8\n000000067F0000400200008A5900008EF67E-000000067F0000400200008A5900008F805B__00000010392DE3B9-00000010E8D5E0A1\n000000067F0000400200008A5900008F0000-000000067F0000400200008A5900008F4000__0000001C760FA190\n000000067F0000400200008A5900008F0000-000000067F0000400200008A5900008F4000__00000038E67ABFA0\n000000067F0000400200008A5900008F0000-000000067F0000400200008A5900008F4000__0000003903F1CFE8\n000000067F0000400200008A5900008F0000-000000067F0000400200008A5900008F4000__0000003B99F7F8A0\n000000067F0000400200008A5900008F0000-000000067F0000400200008A5900008F4000__0000005D2FFFFB38\n000000067F0000400200008A5900008F0000-000000067F0000400200008A5900008F4000__00000073AD3FE6B8\n000000067F0000400200008A5900008F0000-000000067F0000400200008A5900008F4000__000000914E3F38F0\n000000067F0000400200008A5900008F0000-000000067F0000400200008A5900008F4000__000000931B33AE68\n000000067F0000400200008A5900008F0000-000000067F0000400200008A5900008F4000__000000931B9AFDF8\n000000067F0000400200008A5900008F4000-000000067F0000400200008A5900008F8000__0000001C760FA190\n000000067F0000400200008A5900008F4000-000000067F0000400200008A5900008F8000__00000038E67ABFA0\n000000067F0000400200008A5900008F4000-000000067F0000400200008A5900008F8000__0000003903F1CFE8\n000000067F0000400200008A5900008F4000-000000067F0000400200008A5900008F8000__0000003B99F7F8A0\n000000067F0000400200008A5900008F4000-000000067F0000400200008A5900008F8000__0000005D2FFFFB38\n000000067F0000400200008A5900008F4000-000000067F0000400200008A5900008F8000__00000073AD3FE6B8\n000000067F0000400200008A5900008F4000-000000067F0000400200008A5900008F8000__000000914E3F38F0\n000000067F0000400200008A5900008F4000-000000067F0000400200008A5900008F8000__000000931B33AE68\n000000067F0000400200008A5900008F4000-000000067F0000400200008A5900008F8000__000000931B9AFDF8\n000000067F0000400200008A5900008F8000-000000067F0000400200008A5900008FC000__0000001C760FA190\n000000067F0000400200008A5900008F8000-000000067F0000400200008A5900008FC000__00000038E67ABFA0\n000000067F0000400200008A5900008F8000-000000067F0000400200008A5900008FC000__0000003903F1CFE8\n000000067F0000400200008A5900008F8000-000000067F0000400200008A5900008FC000__0000003B99F7F8A0\n000000067F0000400200008A5900008F8000-000000067F0000400200008A5900008FC000__0000005D2FFFFB38\n000000067F0000400200008A5900008F8000-000000067F0000400200008A5900008FC000__00000073AD3FE6B8\n000000067F0000400200008A5900008F8000-000000067F0000400200008A5900008FC000__000000914E3F38F0\n000000067F0000400200008A5900008F8000-000000067F0000400200008A5900008FC000__000000931B33AE68\n000000067F0000400200008A5900008F8000-000000067F0000400200008A5900008FC000__000000931B9AFDF8\n000000067F0000400200008A5900008F805B-000000067F0000400200008A590000900A29__00000010392DE3B9-00000010E8D5E0A1\n000000067F0000400200008A5900008FC000-000000067F0000400200008A590000900000__0000001C760FA190\n000000067F0000400200008A5900008FC000-000000067F0000400200008A590000900000__00000038E67ABFA0\n000000067F0000400200008A5900008FC000-000000067F0000400200008A590000900000__0000003903F1CFE8\n000000067F0000400200008A5900008FC000-000000067F0000400200008A590000900000__0000003B99F7F8A0\n000000067F0000400200008A5900008FC000-000000067F0000400200008A590000900000__0000005D2FFFFB38\n000000067F0000400200008A5900008FC000-000000067F0000400200008A590000900000__00000073AD3FE6B8\n000000067F0000400200008A5900008FC000-000000067F0000400200008A590000900000__000000914E3F38F0\n000000067F0000400200008A5900008FC000-000000067F0000400200008A590000900000__000000931B33AE68\n000000067F0000400200008A5900008FC000-000000067F0000400200008A590000900000__000000931B9AFDF8\n000000067F0000400200008A590000900000-000000067F0000400200008A590000904000__0000001C760FA190\n000000067F0000400200008A590000900000-000000067F0000400200008A590000904000__00000038E67ABFA0\n000000067F0000400200008A590000900000-000000067F0000400200008A590000904000__0000003903F1CFE8\n000000067F0000400200008A590000900000-000000067F0000400200008A590000904000__0000003B99F7F8A0\n000000067F0000400200008A590000900000-000000067F0000400200008A590000904000__0000005D2FFFFB38\n000000067F0000400200008A590000900000-000000067F0000400200008A590000904000__00000073AD3FE6B8\n000000067F0000400200008A590000900000-000000067F0000400200008A590000904000__000000914E3F38F0\n000000067F0000400200008A590000900000-000000067F0000400200008A590000904000__000000931B33AE68\n000000067F0000400200008A590000900000-000000067F0000400200008A590000904000__000000931B9AFDF8\n000000067F0000400200008A590000900A29-000000067F0000400200008A59000090940E__00000010392DE3B9-00000010E8D5E0A1\n000000067F0000400200008A590000904000-000000067F0000400200008A590000908000__0000001C760FA190\n000000067F0000400200008A590000904000-000000067F0000400200008A590000908000__00000038E67ABFA0\n000000067F0000400200008A590000904000-000000067F0000400200008A590000908000__0000003903F1CFE8\n000000067F0000400200008A590000904000-000000067F0000400200008A590000908000__0000003B99F7F8A0\n000000067F0000400200008A590000904000-000000067F0000400200008A590000908000__0000005D2FFFFB38\n000000067F0000400200008A590000904000-000000067F0000400200008A590000908000__00000073AD3FE6B8\n000000067F0000400200008A590000904000-000000067F0000400200008A590000908000__000000914E3F38F0\n000000067F0000400200008A590000904000-000000067F0000400200008A590000908000__000000931B33AE68\n000000067F0000400200008A590000904000-000000067F0000400200008A590000908000__000000931B9AFDF8\n000000067F0000400200008A590000908000-000000067F0000400200008A59000090C000__0000001C760FA190\n000000067F0000400200008A590000908000-000000067F0000400200008A59000090C000__00000038E67ABFA0\n000000067F0000400200008A590000908000-000000067F0000400200008A59000090C000__0000003903F1CFE8\n000000067F0000400200008A590000908000-000000067F0000400200008A59000090C000__0000003B99F7F8A0\n000000067F0000400200008A590000908000-000000067F0000400200008A59000090C000__0000005D2FFFFB38\n000000067F0000400200008A590000908000-000000067F0000400200008A59000090C000__00000073AD3FE6B8\n000000067F0000400200008A590000908000-000000067F0000400200008A59000090C000__000000914E3F38F0\n000000067F0000400200008A590000908000-000000067F0000400200008A59000090C000__000000931B33AE68\n000000067F0000400200008A590000908000-000000067F0000400200008A59000090C000__000000931B9AFDF8\n000000067F0000400200008A59000090940E-000000067F0000400200008A590000911DE5__00000010392DE3B9-00000010E8D5E0A1\n000000067F0000400200008A59000090C000-000000067F0000400200008A590000910000__0000001C760FA190\n000000067F0000400200008A59000090C000-000000067F0000400200008A590000910000__00000038E67ABFA0\n000000067F0000400200008A59000090C000-000000067F0000400200008A590000910000__0000003903F1CFE8\n000000067F0000400200008A59000090C000-000000067F0000400200008A590000910000__0000003B99F7F8A0\n000000067F0000400200008A59000090C000-000000067F0000400200008A590000910000__0000005D2FFFFB38\n000000067F0000400200008A59000090C000-000000067F0000400200008A590000910000__00000073AD3FE6B8\n000000067F0000400200008A59000090C000-000000067F0000400200008A590000910000__000000914E3F38F0\n000000067F0000400200008A59000090C000-000000067F0000400200008A590000910000__000000931B33AE68\n000000067F0000400200008A59000090C000-000000067F0000400200008A590000910000__000000931B9AFDF8\n000000067F0000400200008A590000910000-000000067F0000400200008A590000914000__0000001C760FA190\n000000067F0000400200008A590000910000-000000067F0000400200008A590000914000__00000038E67ABFA0\n000000067F0000400200008A590000910000-000000067F0000400200008A590000914000__0000003903F1CFE8\n000000067F0000400200008A590000910000-000000067F0000400200008A590000914000__0000003B99F7F8A0\n000000067F0000400200008A590000910000-000000067F0000400200008A590000914000__0000005D2FFFFB38\n000000067F0000400200008A590000910000-000000067F0000400200008A590000914000__00000073AD3FE6B8\n000000067F0000400200008A590000910000-000000067F0000400200008A590000914000__000000914E3F38F0\n000000067F0000400200008A590000910000-000000067F0000400200008A590000914000__000000931B33AE68\n000000067F0000400200008A590000910000-000000067F0000400200008A590000914000__000000931B9AFDF8\n000000067F0000400200008A590000911DE5-000000067F0000400200008A59000091A7C5__00000010392DE3B9-00000010E8D5E0A1\n000000067F0000400200008A590000914000-000000067F0000400200008A590000918000__0000001C760FA190\n000000067F0000400200008A590000914000-000000067F0000400200008A590000918000__00000038E67ABFA0\n000000067F0000400200008A590000914000-000000067F0000400200008A590000918000__0000003903F1CFE8\n000000067F0000400200008A590000914000-000000067F0000400200008A590000918000__0000003B99F7F8A0\n000000067F0000400200008A590000914000-000000067F0000400200008A590000918000__0000005D2FFFFB38\n000000067F0000400200008A590000914000-000000067F0000400200008A590000918000__00000073AD3FE6B8\n000000067F0000400200008A590000914000-000000067F0000400200008A590000918000__000000914E3F38F0\n000000067F0000400200008A590000914000-000000067F0000400200008A590000918000__000000931B33AE68\n000000067F0000400200008A590000914000-000000067F0000400200008A590000918000__000000931B9AFDF8\n000000067F0000400200008A590000918000-000000067F0000400200008A59000091C000__0000001C760FA190\n000000067F0000400200008A590000918000-000000067F0000400200008A59000091C000__00000038E67ABFA0\n000000067F0000400200008A590000918000-000000067F0000400200008A59000091C000__0000003903F1CFE8\n000000067F0000400200008A590000918000-000000067F0000400200008A59000091C000__0000003B99F7F8A0\n000000067F0000400200008A590000918000-000000067F0000400200008A59000091C000__0000005D2FFFFB38\n000000067F0000400200008A590000918000-000000067F0000400200008A59000091C000__00000073AD3FE6B8\n000000067F0000400200008A590000918000-000000067F0000400200008A59000091C000__000000914E3F38F0\n000000067F0000400200008A590000918000-000000067F0000400200008A59000091C000__000000931B33AE68\n000000067F0000400200008A590000918000-000000067F0000400200008A59000091C000__000000931B9AFDF8\n000000067F0000400200008A59000091A7C5-000000067F0000400200008A590000923196__00000010392DE3B9-00000010E8D5E0A1\n000000067F0000400200008A59000091C000-000000067F0000400200008A590000920000__0000001C760FA190\n000000067F0000400200008A59000091C000-000000067F0000400200008A590000920000__00000038E67ABFA0\n000000067F0000400200008A59000091C000-000000067F0000400200008A590000920000__0000003903F1CFE8\n000000067F0000400200008A59000091C000-000000067F0000400200008A590000920000__0000003B99F7F8A0\n000000067F0000400200008A59000091C000-000000067F0000400200008A590000920000__0000005D2FFFFB38\n000000067F0000400200008A59000091C000-000000067F0000400200008A590000920000__00000073AD3FE6B8\n000000067F0000400200008A59000091C000-000000067F0000400200008A590000920000__000000914E3F38F0\n000000067F0000400200008A59000091C000-000000067F0000400200008A590000920000__000000931B33AE68\n000000067F0000400200008A59000091C000-000000067F0000400200008A590000920000__000000931B9AFDF8\n000000067F0000400200008A590000920000-000000067F0000400200008A590000924000__0000001C725A2400\n000000067F0000400200008A590000920000-000000067F0000400200008A590000924000__0000001C760FA190\n000000067F0000400200008A590000920000-000000067F0000400200008A590000924000__00000038E67ABFA0\n000000067F0000400200008A590000920000-000000067F0000400200008A590000924000__0000003903F1CFE8\n000000067F0000400200008A590000920000-000000067F0000400200008A590000924000__0000003B99F7F8A0\n000000067F0000400200008A590000920000-000000067F0000400200008A590000924000__0000005D2FFFFB38\n000000067F0000400200008A590000920000-000000067F0000400200008A590000924000__00000073AD3FE6B8\n000000067F0000400200008A590000920000-000000067F0000400200008A590000924000__000000914E3F38F0\n000000067F0000400200008A590000920000-000000067F0000400200008A590000924000__000000931B33AE68\n000000067F0000400200008A590000920000-000000067F0000400200008A590000924000__000000931B9AFDF8\n000000067F0000400200008A590000923196-000000067F0000400200008A590100000000__00000010392DE3B9-00000010E8D5E0A1\n000000067F0000400200008A5900009234AC-000000067F0000400200008A59000092BE93__00000010E8D5E0A1-00000011987BE139\n000000067F0000400200008A590000924000-000000067F0000400200008A590000928000__0000001C725A2400\n000000067F0000400200008A590000924000-000000067F0000400200008A590000928000__0000001C760FA190\n000000067F0000400200008A590000924000-000000067F0000400200008A590000928000__00000038E67ABFA0\n000000067F0000400200008A590000924000-000000067F0000400200008A590000928000__0000003903F1CFE8\n000000067F0000400200008A590000924000-000000067F0000400200008A590000928000__0000003B99F7F8A0\n000000067F0000400200008A590000924000-000000067F0000400200008A590000928000__0000005D2FFFFB38\n000000067F0000400200008A590000924000-000000067F0000400200008A590000928000__00000073AD3FE6B8\n000000067F0000400200008A590000924000-000000067F0000400200008A590000928000__000000914E3F38F0\n000000067F0000400200008A590000924000-000000067F0000400200008A590000928000__000000931B33AE68\n000000067F0000400200008A590000924000-000000067F0000400200008A590000928000__000000931B9AFDF8\n000000067F0000400200008A590000928000-000000067F0000400200008A59000092C000__0000001C725A2400\n000000067F0000400200008A590000928000-000000067F0000400200008A59000092C000__0000001C760FA190\n000000067F0000400200008A590000928000-000000067F0000400200008A59000092C000__00000038E67ABFA0\n000000067F0000400200008A590000928000-000000067F0000400200008A59000092C000__0000003903F1CFE8\n000000067F0000400200008A590000928000-000000067F0000400200008A59000092C000__0000003B99F7F8A0\n000000067F0000400200008A590000928000-000000067F0000400200008A59000092C000__0000005D2FFFFB38\n000000067F0000400200008A590000928000-000000067F0000400200008A59000092C000__00000073AD3FE6B8\n000000067F0000400200008A590000928000-000000067F0000400200008A59000092C000__000000914E3F38F0\n000000067F0000400200008A590000928000-000000067F0000400200008A59000092C000__000000931B33AE68\n000000067F0000400200008A590000928000-000000067F0000400200008A59000092C000__000000931B9AFDF8\n000000067F0000400200008A59000092BE93-000000067F0000400200008A590000934873__00000010E8D5E0A1-00000011987BE139\n000000067F0000400200008A59000092C000-000000067F0000400200008A590000930000__0000001C725A2400\n000000067F0000400200008A59000092C000-000000067F0000400200008A590000930000__0000001C760FA190\n000000067F0000400200008A59000092C000-000000067F0000400200008A590000930000__00000038E67ABFA0\n000000067F0000400200008A59000092C000-000000067F0000400200008A590000930000__0000003903F1CFE8\n000000067F0000400200008A59000092C000-000000067F0000400200008A590000930000__0000003B99F7F8A0\n000000067F0000400200008A59000092C000-000000067F0000400200008A590000930000__0000005D2FFFFB38\n000000067F0000400200008A59000092C000-000000067F0000400200008A590000930000__00000073AD3FE6B8\n000000067F0000400200008A59000092C000-000000067F0000400200008A590000930000__000000914E3F38F0\n000000067F0000400200008A59000092C000-000000067F0000400200008A590000930000__000000931B33AE68\n000000067F0000400200008A59000092C000-000000067F0000400200008A590000930000__000000931B9AFDF8\n000000067F0000400200008A590000930000-000000067F0000400200008A590000934000__0000001C725A2400\n000000067F0000400200008A590000930000-000000067F0000400200008A590000934000__0000001C760FA190\n000000067F0000400200008A590000930000-000000067F0000400200008A590000934000__00000038E67ABFA0\n000000067F0000400200008A590000930000-000000067F0000400200008A590000934000__0000003903F1CFE8\n000000067F0000400200008A590000930000-000000067F0000400200008A590000934000__0000003B99F7F8A0\n000000067F0000400200008A590000930000-000000067F0000400200008A590000934000__0000005D2FFFFB38\n000000067F0000400200008A590000930000-000000067F0000400200008A590000934000__00000073AD3FE6B8\n000000067F0000400200008A590000930000-000000067F0000400200008A590000934000__000000914E3F38F0\n000000067F0000400200008A590000930000-000000067F0000400200008A590000934000__000000931B33AE68\n000000067F0000400200008A590000930000-000000067F0000400200008A590000934000__000000931B9AFDF8\n000000067F0000400200008A590000934000-000000067F0000400200008A590000938000__0000001C725A2400\n000000067F0000400200008A590000934000-000000067F0000400200008A590000938000__0000001C760FA190\n000000067F0000400200008A590000934000-000000067F0000400200008A590000938000__00000038E67ABFA0\n000000067F0000400200008A590000934000-000000067F0000400200008A590000938000__0000003903F1CFE8\n000000067F0000400200008A590000934000-000000067F0000400200008A590000938000__0000003B99F7F8A0\n000000067F0000400200008A590000934000-000000067F0000400200008A590000938000__0000005D2FFFFB38\n000000067F0000400200008A590000934000-000000067F0000400200008A590000938000__00000073AD3FE6B8\n000000067F0000400200008A590000934000-000000067F0000400200008A590000938000__000000914E3F38F0\n000000067F0000400200008A590000934000-000000067F0000400200008A590000938000__000000931B33AE68\n000000067F0000400200008A590000934000-000000067F0000400200008A590000938000__000000931B9AFDF8\n000000067F0000400200008A590000934873-000000067F0000400200008A59000093D24E__00000010E8D5E0A1-00000011987BE139\n000000067F0000400200008A590000938000-000000067F0000400200008A59000093C000__0000001C725A2400\n000000067F0000400200008A590000938000-000000067F0000400200008A59000093C000__0000001C760FA190\n000000067F0000400200008A590000938000-000000067F0000400200008A59000093C000__00000038E67ABFA0\n000000067F0000400200008A590000938000-000000067F0000400200008A59000093C000__0000003903F1CFE8\n000000067F0000400200008A590000938000-000000067F0000400200008A59000093C000__0000003B99F7F8A0\n000000067F0000400200008A590000938000-000000067F0000400200008A59000093C000__0000005D2FFFFB38\n000000067F0000400200008A590000938000-000000067F0000400200008A59000093C000__00000073AD3FE6B8\n000000067F0000400200008A590000938000-000000067F0000400200008A59000093C000__000000914E3F38F0\n000000067F0000400200008A590000938000-000000067F0000400200008A59000093C000__000000931B33AE68\n000000067F0000400200008A590000938000-000000067F0000400200008A59000093C000__000000931B9AFDF8\n000000067F0000400200008A59000093C000-000000067F0000400200008A590000940000__0000001C725A2400\n000000067F0000400200008A59000093C000-000000067F0000400200008A590000940000__0000001C760FA190\n000000067F0000400200008A59000093C000-000000067F0000400200008A590000940000__00000038E67ABFA0\n000000067F0000400200008A59000093C000-000000067F0000400200008A590000940000__0000003903F1CFE8\n000000067F0000400200008A59000093C000-000000067F0000400200008A590000940000__0000003B99F7F8A0\n000000067F0000400200008A59000093C000-000000067F0000400200008A590000940000__0000005D2FFFFB38\n000000067F0000400200008A59000093C000-000000067F0000400200008A590000940000__00000073AD3FE6B8\n000000067F0000400200008A59000093C000-000000067F0000400200008A590000940000__000000914E3F38F0\n000000067F0000400200008A59000093C000-000000067F0000400200008A590000940000__000000931B33AE68\n000000067F0000400200008A59000093C000-000000067F0000400200008A590000940000__000000931B9AFDF8\n000000067F0000400200008A59000093D24E-000000067F0000400200008A590000945C33__00000010E8D5E0A1-00000011987BE139\n000000067F0000400200008A590000940000-000000067F0000400200008A590000944000__0000001C725A2400\n000000067F0000400200008A590000940000-000000067F0000400200008A590000944000__0000001C760FA190\n000000067F0000400200008A590000940000-000000067F0000400200008A590000944000__00000038E67ABFA0\n000000067F0000400200008A590000940000-000000067F0000400200008A590000944000__0000003903F1CFE8\n000000067F0000400200008A590000940000-000000067F0000400200008A590000944000__0000003B99F7F8A0\n000000067F0000400200008A590000940000-000000067F0000400200008A590000944000__0000005D2FFFFB38\n000000067F0000400200008A590000940000-000000067F0000400200008A590000944000__00000073AD3FE6B8\n000000067F0000400200008A590000940000-000000067F0000400200008A590000944000__000000914E3F38F0\n000000067F0000400200008A590000940000-000000067F0000400200008A590000944000__000000931B33AE68\n000000067F0000400200008A590000940000-000000067F0000400200008A590000944000__000000931B9AFDF8\n000000067F0000400200008A590000944000-000000067F0000400200008A590000948000__0000001C725A2400\n000000067F0000400200008A590000944000-000000067F0000400200008A590000948000__0000001C760FA190\n000000067F0000400200008A590000944000-000000067F0000400200008A590000948000__00000038E67ABFA0\n000000067F0000400200008A590000944000-000000067F0000400200008A590000948000__0000003903F1CFE8\n000000067F0000400200008A590000944000-000000067F0000400200008A590000948000__0000003B99F7F8A0\n000000067F0000400200008A590000944000-000000067F0000400200008A590000948000__0000005D2FFFFB38\n000000067F0000400200008A590000944000-000000067F0000400200008A590000948000__00000073AD3FE6B8\n000000067F0000400200008A590000944000-000000067F0000400200008A590000948000__000000914E3F38F0\n000000067F0000400200008A590000944000-000000067F0000400200008A590000948000__000000931B33AE68\n000000067F0000400200008A590000944000-000000067F0000400200008A590000948000__000000931B9AFDF8\n000000067F0000400200008A590000945C33-000000067F0000400200008A59000094E60F__00000010E8D5E0A1-00000011987BE139\n000000067F0000400200008A590000948000-000000067F0000400200008A59000094C000__0000001C725A2400\n000000067F0000400200008A590000948000-000000067F0000400200008A59000094C000__0000001C760FA190\n000000067F0000400200008A590000948000-000000067F0000400200008A59000094C000__00000038E67ABFA0\n000000067F0000400200008A590000948000-000000067F0000400200008A59000094C000__0000003903F1CFE8\n000000067F0000400200008A590000948000-000000067F0000400200008A59000094C000__0000003B99F7F8A0\n000000067F0000400200008A590000948000-000000067F0000400200008A59000094C000__0000005D2FFFFB38\n000000067F0000400200008A590000948000-000000067F0000400200008A59000094C000__00000073AD3FE6B8\n000000067F0000400200008A590000948000-000000067F0000400200008A59000094C000__000000914E3F38F0\n000000067F0000400200008A590000948000-000000067F0000400200008A59000094C000__000000931B33AE68\n000000067F0000400200008A590000948000-000000067F0000400200008A59000094C000__000000931B9AFDF8\n000000067F0000400200008A59000094C000-000000067F0000400200008A590000950000__0000001C725A2400\n000000067F0000400200008A59000094C000-000000067F0000400200008A590000950000__0000001C760FA190\n000000067F0000400200008A59000094C000-000000067F0000400200008A590000950000__00000038E67ABFA0\n000000067F0000400200008A59000094C000-000000067F0000400200008A590000950000__0000003903F1CFE8\n000000067F0000400200008A59000094C000-000000067F0000400200008A590000950000__0000003B99F7F8A0\n000000067F0000400200008A59000094C000-000000067F0000400200008A590000950000__0000005D2FFFFB38\n000000067F0000400200008A59000094C000-000000067F0000400200008A590000950000__00000073AD3FE6B8\n000000067F0000400200008A59000094C000-000000067F0000400200008A590000950000__000000914E3F38F0\n000000067F0000400200008A59000094C000-000000067F0000400200008A590000950000__000000931B33AE68\n000000067F0000400200008A59000094C000-000000067F0000400200008A590000950000__000000931B9AFDF8\n000000067F0000400200008A59000094E60F-000000067F0000400200008A590000956FE7__00000010E8D5E0A1-00000011987BE139\n000000067F0000400200008A590000950000-000000067F0000400200008A590000954000__0000001C725A2400\n000000067F0000400200008A590000950000-000000067F0000400200008A590000954000__0000001C760FA190\n000000067F0000400200008A590000950000-000000067F0000400200008A590000954000__00000038E67ABFA0\n000000067F0000400200008A590000950000-000000067F0000400200008A590000954000__0000003903F1CFE8\n000000067F0000400200008A590000950000-000000067F0000400200008A590000954000__0000003B99F7F8A0\n000000067F0000400200008A590000950000-000000067F0000400200008A590000954000__0000005D2FFFFB38\n000000067F0000400200008A590000950000-000000067F0000400200008A590000954000__00000073AD3FE6B8\n000000067F0000400200008A590000950000-000000067F0000400200008A590000954000__000000914E3F38F0\n000000067F0000400200008A590000950000-000000067F0000400200008A590000954000__000000931B33AE68\n000000067F0000400200008A590000950000-000000067F0000400200008A590000954000__000000931B9AFDF8\n000000067F0000400200008A590000954000-000000067F0000400200008A590000958000__0000001C725A2400\n000000067F0000400200008A590000954000-000000067F0000400200008A590000958000__0000001C760FA190\n000000067F0000400200008A590000954000-000000067F0000400200008A590000958000__00000038E67ABFA0\n000000067F0000400200008A590000954000-000000067F0000400200008A590000958000__0000003903F1CFE8\n000000067F0000400200008A590000954000-000000067F0000400200008A590000958000__0000003B99F7F8A0\n000000067F0000400200008A590000954000-000000067F0000400200008A590000958000__0000005D2FFFFB38\n000000067F0000400200008A590000954000-000000067F0000400200008A590000958000__00000073AD3FE6B8\n000000067F0000400200008A590000954000-000000067F0000400200008A590000958000__000000914E3F38F0\n000000067F0000400200008A590000954000-000000067F0000400200008A590000958000__000000931B33AE68\n000000067F0000400200008A590000954000-000000067F0000400200008A590000958000__000000931B9AFDF8\n000000067F0000400200008A590000956FE7-000000067F0000400200008A59000095F9C6__00000010E8D5E0A1-00000011987BE139\n000000067F0000400200008A590000958000-000000067F0000400200008A59000095C000__0000001C725A2400\n000000067F0000400200008A590000958000-000000067F0000400200008A59000095C000__0000001C760FA190\n000000067F0000400200008A590000958000-000000067F0000400200008A59000095C000__00000038E67ABFA0\n000000067F0000400200008A590000958000-000000067F0000400200008A59000095C000__0000003903F1CFE8\n000000067F0000400200008A590000958000-000000067F0000400200008A59000095C000__0000003B99F7F8A0\n000000067F0000400200008A590000958000-000000067F0000400200008A59000095C000__0000005D2FFFFB38\n000000067F0000400200008A590000958000-000000067F0000400200008A59000095C000__00000073AD3FE6B8\n000000067F0000400200008A590000958000-000000067F0000400200008A59000095C000__000000914E3F38F0\n000000067F0000400200008A590000958000-000000067F0000400200008A59000095C000__000000931B33AE68\n000000067F0000400200008A590000958000-000000067F0000400200008A59000095C000__000000931B9AFDF8\n000000067F0000400200008A59000095C000-000000067F0000400200008A590000960000__0000001C725A2400\n000000067F0000400200008A59000095C000-000000067F0000400200008A590000960000__0000001C760FA190\n000000067F0000400200008A59000095C000-000000067F0000400200008A590000960000__00000038E67ABFA0\n000000067F0000400200008A59000095C000-000000067F0000400200008A590000960000__0000003903F1CFE8\n000000067F0000400200008A59000095C000-000000067F0000400200008A590000960000__0000003B99F7F8A0\n000000067F0000400200008A59000095C000-000000067F0000400200008A590000960000__0000005D2FFFFB38\n000000067F0000400200008A59000095C000-000000067F0000400200008A590000960000__00000073AD3FE6B8\n000000067F0000400200008A59000095C000-000000067F0000400200008A590000960000__000000914E3F38F0\n000000067F0000400200008A59000095C000-000000067F0000400200008A590000960000__000000931B33AE68\n000000067F0000400200008A59000095C000-000000067F0000400200008A590000960000__000000931B9AFDF8\n000000067F0000400200008A59000095F9C6-000000067F0000400200008A5900009683A0__00000010E8D5E0A1-00000011987BE139\n000000067F0000400200008A590000960000-000000067F0000400200008A590000964000__0000001C725A2400\n000000067F0000400200008A590000960000-000000067F0000400200008A590000964000__0000001C760FA190\n000000067F0000400200008A590000960000-000000067F0000400200008A590000964000__00000038E67ABFA0\n000000067F0000400200008A590000960000-000000067F0000400200008A590000964000__0000003903F1CFE8\n000000067F0000400200008A590000960000-000000067F0000400200008A590000964000__0000003B99F7F8A0\n000000067F0000400200008A590000960000-000000067F0000400200008A590000964000__0000005D2FFFFB38\n000000067F0000400200008A590000960000-000000067F0000400200008A590000964000__00000073AD3FE6B8\n000000067F0000400200008A590000960000-000000067F0000400200008A590000964000__000000914E3F38F0\n000000067F0000400200008A590000960000-000000067F0000400200008A590000964000__000000931B33AE68\n000000067F0000400200008A590000960000-000000067F0000400200008A590000964000__000000931B9AFDF8\n000000067F0000400200008A590000964000-000000067F0000400200008A590000968000__0000001C725A2400\n000000067F0000400200008A590000964000-000000067F0000400200008A590000968000__0000001C760FA190\n000000067F0000400200008A590000964000-000000067F0000400200008A590000968000__00000038E67ABFA0\n000000067F0000400200008A590000964000-000000067F0000400200008A590000968000__0000003903F1CFE8\n000000067F0000400200008A590000964000-000000067F0000400200008A590000968000__0000003B99F7F8A0\n000000067F0000400200008A590000964000-000000067F0000400200008A590000968000__0000005D2FFFFB38\n000000067F0000400200008A590000964000-000000067F0000400200008A590000968000__00000073AD3FE6B8\n000000067F0000400200008A590000964000-000000067F0000400200008A590000968000__000000914E3F38F0\n000000067F0000400200008A590000964000-000000067F0000400200008A590000968000__000000931B33AE68\n000000067F0000400200008A590000964000-000000067F0000400200008A590000968000__000000931B9AFDF8\n000000067F0000400200008A590000968000-000000067F0000400200008A59000096C000__0000001C725A2400\n000000067F0000400200008A590000968000-000000067F0000400200008A59000096C000__0000001C760FA190\n000000067F0000400200008A590000968000-000000067F0000400200008A59000096C000__00000038E67ABFA0\n000000067F0000400200008A590000968000-000000067F0000400200008A59000096C000__0000003903F1CFE8\n000000067F0000400200008A590000968000-000000067F0000400200008A59000096C000__0000003B99F7F8A0\n000000067F0000400200008A590000968000-000000067F0000400200008A59000096C000__0000005D2FFFFB38\n000000067F0000400200008A590000968000-000000067F0000400200008A59000096C000__00000073AD3FE6B8\n000000067F0000400200008A590000968000-000000067F0000400200008A59000096C000__000000914E3F38F0\n000000067F0000400200008A590000968000-000000067F0000400200008A59000096C000__000000931B33AE68\n000000067F0000400200008A590000968000-000000067F0000400200008A59000096C000__000000931B9AFDF8\n000000067F0000400200008A5900009683A0-000000067F0000400200008A590000970D7B__00000010E8D5E0A1-00000011987BE139\n000000067F0000400200008A59000096C000-000000067F0000400200008A590000970000__0000001C725A2400\n000000067F0000400200008A59000096C000-000000067F0000400200008A590000970000__0000001C760FA190\n000000067F0000400200008A59000096C000-000000067F0000400200008A590000970000__00000038E67ABFA0\n000000067F0000400200008A59000096C000-000000067F0000400200008A590000970000__0000003903F1CFE8\n000000067F0000400200008A59000096C000-000000067F0000400200008A590000970000__0000003B99F7F8A0\n000000067F0000400200008A59000096C000-000000067F0000400200008A590000970000__0000005D2FFFFB38\n000000067F0000400200008A59000096C000-000000067F0000400200008A590000970000__00000073AD3FE6B8\n000000067F0000400200008A59000096C000-000000067F0000400200008A590000970000__000000914E3F38F0\n000000067F0000400200008A59000096C000-000000067F0000400200008A590000970000__000000931B33AE68\n000000067F0000400200008A59000096C000-000000067F0000400200008A590000970000__000000931B9AFDF8\n000000067F0000400200008A590000970000-000000067F0000400200008A590000974000__0000001C725A2400\n000000067F0000400200008A590000970000-000000067F0000400200008A590000974000__0000001C760FA190\n000000067F0000400200008A590000970000-000000067F0000400200008A590000974000__00000038E67ABFA0\n000000067F0000400200008A590000970000-000000067F0000400200008A590000974000__0000003903F1CFE8\n000000067F0000400200008A590000970000-000000067F0000400200008A590000974000__0000003B99F7F8A0\n000000067F0000400200008A590000970000-000000067F0000400200008A590000974000__0000005D2FFFFB38\n000000067F0000400200008A590000970000-000000067F0000400200008A590000974000__00000073AD3FE6B8\n000000067F0000400200008A590000970000-000000067F0000400200008A590000974000__000000914E3F38F0\n000000067F0000400200008A590000970000-000000067F0000400200008A590000974000__000000931B33AE68\n000000067F0000400200008A590000970000-000000067F0000400200008A590000974000__000000931B9AFDF8\n000000067F0000400200008A590000970D7B-000000067F0000400200008A590000979751__00000010E8D5E0A1-00000011987BE139\n000000067F0000400200008A590000974000-000000067F0000400200008A590000978000__0000001C725A2400\n000000067F0000400200008A590000974000-000000067F0000400200008A590000978000__0000001C760FA190\n000000067F0000400200008A590000974000-000000067F0000400200008A590000978000__00000038E67ABFA0\n000000067F0000400200008A590000974000-000000067F0000400200008A590000978000__0000003903F1CFE8\n000000067F0000400200008A590000974000-000000067F0000400200008A590000978000__0000003B99F7F8A0\n000000067F0000400200008A590000974000-000000067F0000400200008A590000978000__0000005D2FFFFB38\n000000067F0000400200008A590000974000-000000067F0000400200008A590000978000__00000073AD3FE6B8\n000000067F0000400200008A590000974000-000000067F0000400200008A590000978000__000000914E3F38F0\n000000067F0000400200008A590000974000-000000067F0000400200008A590000978000__000000931B33AE68\n000000067F0000400200008A590000974000-000000067F0000400200008A590000978000__000000931B9AFDF8\n000000067F0000400200008A590000978000-000000067F0000400200008A59000097C000__0000001C725A2400\n000000067F0000400200008A590000978000-000000067F0000400200008A59000097C000__0000001C760FA190\n000000067F0000400200008A590000978000-000000067F0000400200008A59000097C000__00000038E67ABFA0\n000000067F0000400200008A590000978000-000000067F0000400200008A59000097C000__0000003903F1CFE8\n000000067F0000400200008A590000978000-000000067F0000400200008A59000097C000__0000003B99F7F8A0\n000000067F0000400200008A590000978000-000000067F0000400200008A59000097C000__0000005D2FFFFB38\n000000067F0000400200008A590000978000-000000067F0000400200008A59000097C000__00000073AD3FE6B8\n000000067F0000400200008A590000978000-000000067F0000400200008A59000097C000__000000914E3F38F0\n000000067F0000400200008A590000978000-000000067F0000400200008A59000097C000__000000931B33AE68\n000000067F0000400200008A590000978000-000000067F0000400200008A59000097C000__000000931B9AFDF8\n000000067F0000400200008A590000979751-000000067F0000400200008A590000982136__00000010E8D5E0A1-00000011987BE139\n000000067F0000400200008A59000097C000-000000067F0000400200008A590000980000__0000001C725A2400\n000000067F0000400200008A59000097C000-000000067F0000400200008A590000980000__0000001C760FA190\n000000067F0000400200008A59000097C000-000000067F0000400200008A590000980000__00000038E67ABFA0\n000000067F0000400200008A59000097C000-000000067F0000400200008A590000980000__0000003903F1CFE8\n000000067F0000400200008A59000097C000-000000067F0000400200008A590000980000__0000003B99F7F8A0\n000000067F0000400200008A59000097C000-000000067F0000400200008A590000980000__0000005D2FFFFB38\n000000067F0000400200008A59000097C000-000000067F0000400200008A590000980000__00000073AD3FE6B8\n000000067F0000400200008A59000097C000-000000067F0000400200008A590000980000__000000914E3F38F0\n000000067F0000400200008A59000097C000-000000067F0000400200008A590000980000__000000931B33AE68\n000000067F0000400200008A59000097C000-000000067F0000400200008A590000980000__000000931B9AFDF8\n000000067F0000400200008A590000980000-000000067F0000400200008A590000984000__000000127811CCF0\n000000067F0000400200008A590000980000-000000067F0000400200008A590000984000__0000001C760FA190\n000000067F0000400200008A590000980000-000000067F0000400200008A590000984000__00000038E67ABFA0\n000000067F0000400200008A590000980000-000000067F0000400200008A590000984000__0000003903F1CFE8\n000000067F0000400200008A590000980000-000000067F0000400200008A590000984000__0000003B99F7F8A0\n000000067F0000400200008A590000980000-000000067F0000400200008A590000984000__0000005D2FFFFB38\n000000067F0000400200008A590000980000-000000067F0000400200008A590000984000__00000073AD3FE6B8\n000000067F0000400200008A590000980000-000000067F0000400200008A590000984000__000000914E3F38F0\n000000067F0000400200008A590000980000-000000067F0000400200008A590000984000__000000931B33AE68\n000000067F0000400200008A590000980000-000000067F0000400200008A590000984000__000000931B9AFDF8\n000000067F0000400200008A590000982136-000000067F0000400200008A590100000000__00000010E8D5E0A1-00000011987BE139\n000000067F0000400200008A590000982445-000000067F0000400200008A59000098AE24__00000011987BE139-000000124823FE31\n000000067F0000400200008A590000984000-000000067F0000400200008A590000988000__000000127811CCF0\n000000067F0000400200008A590000984000-000000067F0000400200008A590000988000__0000001C760FA190\n000000067F0000400200008A590000984000-000000067F0000400200008A590000988000__00000038E67ABFA0\n000000067F0000400200008A590000984000-000000067F0000400200008A590000988000__0000003903F1CFE8\n000000067F0000400200008A590000984000-000000067F0000400200008A590000988000__0000003B99F7F8A0\n000000067F0000400200008A590000984000-000000067F0000400200008A590000988000__0000005D2FFFFB38\n000000067F0000400200008A590000984000-000000067F0000400200008A590000988000__00000073AD3FE6B8\n000000067F0000400200008A590000984000-000000067F0000400200008A590000988000__000000914E3F38F0\n000000067F0000400200008A590000984000-000000067F0000400200008A590000988000__000000931B33AE68\n000000067F0000400200008A590000984000-000000067F0000400200008A590000988000__000000931B9AFDF8\n000000067F0000400200008A590000988000-000000067F0000400200008A59000098C000__000000127811CCF0\n000000067F0000400200008A590000988000-000000067F0000400200008A59000098C000__0000001C760FA190\n000000067F0000400200008A590000988000-000000067F0000400200008A59000098C000__00000038E67ABFA0\n000000067F0000400200008A590000988000-000000067F0000400200008A59000098C000__0000003903F1CFE8\n000000067F0000400200008A590000988000-000000067F0000400200008A59000098C000__0000003B99F7F8A0\n000000067F0000400200008A590000988000-000000067F0000400200008A59000098C000__0000005D2FFFFB38\n000000067F0000400200008A590000988000-000000067F0000400200008A59000098C000__00000073AD3FE6B8\n000000067F0000400200008A590000988000-000000067F0000400200008A59000098C000__000000914E3F38F0\n000000067F0000400200008A590000988000-000000067F0000400200008A59000098C000__000000931B33AE68\n000000067F0000400200008A590000988000-000000067F0000400200008A59000098C000__000000931B9AFDF8\n000000067F0000400200008A59000098AE24-000000067F0000400200008A5900009937FD__00000011987BE139-000000124823FE31\n000000067F0000400200008A59000098C000-000000067F0000400200008A590000990000__000000127811CCF0\n000000067F0000400200008A59000098C000-000000067F0000400200008A590000990000__0000001C760FA190\n000000067F0000400200008A59000098C000-000000067F0000400200008A590000990000__00000038E67ABFA0\n000000067F0000400200008A59000098C000-000000067F0000400200008A590000990000__0000003903F1CFE8\n000000067F0000400200008A59000098C000-000000067F0000400200008A590000990000__0000003B99F7F8A0\n000000067F0000400200008A59000098C000-000000067F0000400200008A590000990000__0000005D2FFFFB38\n000000067F0000400200008A59000098C000-000000067F0000400200008A590000990000__00000073AD3FE6B8\n000000067F0000400200008A59000098C000-000000067F0000400200008A590000990000__000000914E3F38F0\n000000067F0000400200008A59000098C000-000000067F0000400200008A590000990000__000000931B33AE68\n000000067F0000400200008A59000098C000-000000067F0000400200008A590000990000__000000931B9AFDF8\n000000067F0000400200008A590000990000-000000067F0000400200008A590000994000__000000127811CCF0\n000000067F0000400200008A590000990000-000000067F0000400200008A590000994000__0000001C760FA190\n000000067F0000400200008A590000990000-000000067F0000400200008A590000994000__00000038E67ABFA0\n000000067F0000400200008A590000990000-000000067F0000400200008A590000994000__0000003903F1CFE8\n000000067F0000400200008A590000990000-000000067F0000400200008A590000994000__0000003B99F7F8A0\n000000067F0000400200008A590000990000-000000067F0000400200008A590000994000__0000005D2FFFFB38\n000000067F0000400200008A590000990000-000000067F0000400200008A590000994000__00000073AD3FE6B8\n000000067F0000400200008A590000990000-000000067F0000400200008A590000994000__000000914E3F38F0\n000000067F0000400200008A590000990000-000000067F0000400200008A590000994000__000000931B33AE68\n000000067F0000400200008A590000990000-000000067F0000400200008A590000994000__000000931B9AFDF8\n000000067F0000400200008A5900009937FD-000000067F0000400200008A59000099C1DF__00000011987BE139-000000124823FE31\n000000067F0000400200008A590000994000-000000067F0000400200008A590000998000__000000127811CCF0\n000000067F0000400200008A590000994000-000000067F0000400200008A590000998000__0000001C760FA190\n000000067F0000400200008A590000994000-000000067F0000400200008A590000998000__00000038E67ABFA0\n000000067F0000400200008A590000994000-000000067F0000400200008A590000998000__0000003903F1CFE8\n000000067F0000400200008A590000994000-000000067F0000400200008A590000998000__0000003B99F7F8A0\n000000067F0000400200008A590000994000-000000067F0000400200008A590000998000__0000005D2FFFFB38\n000000067F0000400200008A590000994000-000000067F0000400200008A590000998000__00000073AD3FE6B8\n000000067F0000400200008A590000994000-000000067F0000400200008A590000998000__000000914E3F38F0\n000000067F0000400200008A590000994000-000000067F0000400200008A590000998000__000000931B33AE68\n000000067F0000400200008A590000994000-000000067F0000400200008A590000998000__000000931B9AFDF8\n000000067F0000400200008A590000998000-000000067F0000400200008A59000099C000__000000127811CCF0\n000000067F0000400200008A590000998000-000000067F0000400200008A59000099C000__0000001C760FA190\n000000067F0000400200008A590000998000-000000067F0000400200008A59000099C000__00000038E67ABFA0\n000000067F0000400200008A590000998000-000000067F0000400200008A59000099C000__0000003903F1CFE8\n000000067F0000400200008A590000998000-000000067F0000400200008A59000099C000__0000003B99F7F8A0\n000000067F0000400200008A590000998000-000000067F0000400200008A59000099C000__0000005D2FFFFB38\n000000067F0000400200008A590000998000-000000067F0000400200008A59000099C000__00000073AD3FE6B8\n000000067F0000400200008A590000998000-000000067F0000400200008A59000099C000__000000914E3F38F0\n000000067F0000400200008A590000998000-000000067F0000400200008A59000099C000__000000931B33AE68\n000000067F0000400200008A590000998000-000000067F0000400200008A59000099C000__000000931B9AFDF8\n000000067F0000400200008A59000099C000-000000067F0000400200008A5900009A0000__000000127811CCF0\n000000067F0000400200008A59000099C000-000000067F0000400200008A5900009A0000__0000001C760FA190\n000000067F0000400200008A59000099C000-000000067F0000400200008A5900009A0000__00000038E67ABFA0\n000000067F0000400200008A59000099C000-000000067F0000400200008A5900009A0000__0000003903F1CFE8\n000000067F0000400200008A59000099C000-000000067F0000400200008A5900009A0000__0000003B99F7F8A0\n000000067F0000400200008A59000099C000-000000067F0000400200008A5900009A0000__0000005D2FFFFB38\n000000067F0000400200008A59000099C000-000000067F0000400200008A5900009A0000__00000073AD3FE6B8\n000000067F0000400200008A59000099C000-000000067F0000400200008A5900009A0000__000000914E3F38F0\n000000067F0000400200008A59000099C000-000000067F0000400200008A5900009A0000__000000931B33AE68\n000000067F0000400200008A59000099C000-000000067F0000400200008A5900009A0000__000000931B9AFDF8\n000000067F0000400200008A59000099C1DF-000000067F0000400200008A5900009A4BBB__00000011987BE139-000000124823FE31\n000000067F0000400200008A5900009A0000-000000067F0000400200008A5900009A4000__000000127811CCF0\n000000067F0000400200008A5900009A0000-000000067F0000400200008A5900009A4000__0000001C760FA190\n000000067F0000400200008A5900009A0000-000000067F0000400200008A5900009A4000__00000038E67ABFA0\n000000067F0000400200008A5900009A0000-000000067F0000400200008A5900009A4000__0000003903F1CFE8\n000000067F0000400200008A5900009A0000-000000067F0000400200008A5900009A4000__0000003B99F7F8A0\n000000067F0000400200008A5900009A0000-000000067F0000400200008A5900009A4000__0000005D2FFFFB38\n000000067F0000400200008A5900009A0000-000000067F0000400200008A5900009A4000__00000073AD3FE6B8\n000000067F0000400200008A5900009A0000-000000067F0000400200008A5900009A4000__000000914E3F38F0\n000000067F0000400200008A5900009A0000-000000067F0000400200008A5900009A4000__000000931B33AE68\n000000067F0000400200008A5900009A0000-000000067F0000400200008A5900009A4000__000000931B9AFDF8\n000000067F0000400200008A5900009A4000-000000067F0000400200008A5900009A8000__000000127811CCF0\n000000067F0000400200008A5900009A4000-000000067F0000400200008A5900009A8000__0000001C760FA190\n000000067F0000400200008A5900009A4000-000000067F0000400200008A5900009A8000__00000038E67ABFA0\n000000067F0000400200008A5900009A4000-000000067F0000400200008A5900009A8000__0000003903F1CFE8\n000000067F0000400200008A5900009A4000-000000067F0000400200008A5900009A8000__0000003B99F7F8A0\n000000067F0000400200008A5900009A4000-000000067F0000400200008A5900009A8000__0000005D2FFFFB38\n000000067F0000400200008A5900009A4000-000000067F0000400200008A5900009A8000__00000073AD3FE6B8\n000000067F0000400200008A5900009A4000-000000067F0000400200008A5900009A8000__000000914E3F38F0\n000000067F0000400200008A5900009A4000-000000067F0000400200008A5900009A8000__000000931B33AE68\n000000067F0000400200008A5900009A4000-000000067F0000400200008A5900009A8000__000000931B9AFDF8\n000000067F0000400200008A5900009A4BBB-000000067F0000400200008A5900009AD590__00000011987BE139-000000124823FE31\n000000067F0000400200008A5900009A8000-000000067F0000400200008A5900009AC000__000000127811CCF0\n000000067F0000400200008A5900009A8000-000000067F0000400200008A5900009AC000__0000001C760FA190\n000000067F0000400200008A5900009A8000-000000067F0000400200008A5900009AC000__00000038E67ABFA0\n000000067F0000400200008A5900009A8000-000000067F0000400200008A5900009AC000__0000003903F1CFE8\n000000067F0000400200008A5900009A8000-000000067F0000400200008A5900009AC000__0000003B99F7F8A0\n000000067F0000400200008A5900009A8000-000000067F0000400200008A5900009AC000__0000005D2FFFFB38\n000000067F0000400200008A5900009A8000-000000067F0000400200008A5900009AC000__00000073AD3FE6B8\n000000067F0000400200008A5900009A8000-000000067F0000400200008A5900009AC000__000000914E3F38F0\n000000067F0000400200008A5900009A8000-000000067F0000400200008A5900009AC000__000000931B33AE68\n000000067F0000400200008A5900009A8000-000000067F0000400200008A5900009AC000__000000931B9AFDF8\n000000067F0000400200008A5900009AC000-000000067F0000400200008A5900009B0000__000000127811CCF0\n000000067F0000400200008A5900009AC000-000000067F0000400200008A5900009B0000__0000001C760FA190\n000000067F0000400200008A5900009AC000-000000067F0000400200008A5900009B0000__00000038E67ABFA0\n000000067F0000400200008A5900009AC000-000000067F0000400200008A5900009B0000__0000003903F1CFE8\n000000067F0000400200008A5900009AC000-000000067F0000400200008A5900009B0000__0000003B99F7F8A0\n000000067F0000400200008A5900009AC000-000000067F0000400200008A5900009B0000__0000005D2FFFFB38\n000000067F0000400200008A5900009AC000-000000067F0000400200008A5900009B0000__00000073AD3FE6B8\n000000067F0000400200008A5900009AC000-000000067F0000400200008A5900009B0000__000000914E3F38F0\n000000067F0000400200008A5900009AC000-000000067F0000400200008A5900009B0000__000000931B33AE68\n000000067F0000400200008A5900009AC000-000000067F0000400200008A5900009B0000__000000931B9AFDF8\n000000067F0000400200008A5900009AD590-000000067F0000400200008A5900009B5F72__00000011987BE139-000000124823FE31\n000000067F0000400200008A5900009B0000-000000067F0000400200008A5900009B4000__000000127811CCF0\n000000067F0000400200008A5900009B0000-000000067F0000400200008A5900009B4000__0000001C760FA190\n000000067F0000400200008A5900009B0000-000000067F0000400200008A5900009B4000__00000038E67ABFA0\n000000067F0000400200008A5900009B0000-000000067F0000400200008A5900009B4000__0000003903F1CFE8\n000000067F0000400200008A5900009B0000-000000067F0000400200008A5900009B4000__0000003B99F7F8A0\n000000067F0000400200008A5900009B0000-000000067F0000400200008A5900009B4000__0000005D2FFFFB38\n000000067F0000400200008A5900009B0000-000000067F0000400200008A5900009B4000__00000073AD3FE6B8\n000000067F0000400200008A5900009B0000-000000067F0000400200008A5900009B4000__000000914E3F38F0\n000000067F0000400200008A5900009B0000-000000067F0000400200008A5900009B4000__000000931B33AE68\n000000067F0000400200008A5900009B0000-000000067F0000400200008A5900009B4000__000000931B9AFDF8\n000000067F0000400200008A5900009B4000-000000067F0000400200008A5900009B8000__000000127811CCF0\n000000067F0000400200008A5900009B4000-000000067F0000400200008A5900009B8000__0000001C760FA190\n000000067F0000400200008A5900009B4000-000000067F0000400200008A5900009B8000__00000038E67ABFA0\n000000067F0000400200008A5900009B4000-000000067F0000400200008A5900009B8000__0000003903F1CFE8\n000000067F0000400200008A5900009B4000-000000067F0000400200008A5900009B8000__0000003B99F7F8A0\n000000067F0000400200008A5900009B4000-000000067F0000400200008A5900009B8000__0000005D2FFFFB38\n000000067F0000400200008A5900009B4000-000000067F0000400200008A5900009B8000__00000073AD3FE6B8\n000000067F0000400200008A5900009B4000-000000067F0000400200008A5900009B8000__000000914E3F38F0\n000000067F0000400200008A5900009B4000-000000067F0000400200008A5900009B8000__000000931B33AE68\n000000067F0000400200008A5900009B4000-000000067F0000400200008A5900009B8000__000000931B9AFDF8\n000000067F0000400200008A5900009B5F72-000000067F0000400200008A5900009BE956__00000011987BE139-000000124823FE31\n000000067F0000400200008A5900009B8000-000000067F0000400200008A5900009BC000__000000127811CCF0\n000000067F0000400200008A5900009B8000-000000067F0000400200008A5900009BC000__0000001C760FA190\n000000067F0000400200008A5900009B8000-000000067F0000400200008A5900009BC000__00000038E67ABFA0\n000000067F0000400200008A5900009B8000-000000067F0000400200008A5900009BC000__0000003903F1CFE8\n000000067F0000400200008A5900009B8000-000000067F0000400200008A5900009BC000__0000003B99F7F8A0\n000000067F0000400200008A5900009B8000-000000067F0000400200008A5900009BC000__0000005D2FFFFB38\n000000067F0000400200008A5900009B8000-000000067F0000400200008A5900009BC000__00000073AD3FE6B8\n000000067F0000400200008A5900009B8000-000000067F0000400200008A5900009BC000__000000914E3F38F0\n000000067F0000400200008A5900009B8000-000000067F0000400200008A5900009BC000__000000931B33AE68\n000000067F0000400200008A5900009B8000-000000067F0000400200008A5900009BC000__000000931B9AFDF8\n000000067F0000400200008A5900009BC000-000000067F0000400200008A5900009C0000__000000127811CCF0\n000000067F0000400200008A5900009BC000-000000067F0000400200008A5900009C0000__0000001C760FA190\n000000067F0000400200008A5900009BC000-000000067F0000400200008A5900009C0000__00000038E67ABFA0\n000000067F0000400200008A5900009BC000-000000067F0000400200008A5900009C0000__0000003903F1CFE8\n000000067F0000400200008A5900009BC000-000000067F0000400200008A5900009C0000__0000003B99F7F8A0\n000000067F0000400200008A5900009BC000-000000067F0000400200008A5900009C0000__0000005D2FFFFB38\n000000067F0000400200008A5900009BC000-000000067F0000400200008A5900009C0000__00000073AD3FE6B8\n000000067F0000400200008A5900009BC000-000000067F0000400200008A5900009C0000__000000914E3F38F0\n000000067F0000400200008A5900009BC000-000000067F0000400200008A5900009C0000__000000931B33AE68\n000000067F0000400200008A5900009BC000-000000067F0000400200008A5900009C0000__000000931B9AFDF8\n000000067F0000400200008A5900009BE956-000000067F0000400200008A5900009C7338__00000011987BE139-000000124823FE31\n000000067F0000400200008A5900009C0000-000000067F0000400200008A5900009C4000__000000127811CCF0\n000000067F0000400200008A5900009C0000-000000067F0000400200008A5900009C4000__0000001C760FA190\n000000067F0000400200008A5900009C0000-000000067F0000400200008A5900009C4000__00000038E67ABFA0\n000000067F0000400200008A5900009C0000-000000067F0000400200008A5900009C4000__0000003903F1CFE8\n000000067F0000400200008A5900009C0000-000000067F0000400200008A5900009C4000__0000003B99F7F8A0\n000000067F0000400200008A5900009C0000-000000067F0000400200008A5900009C4000__0000005D2FFFFB38\n000000067F0000400200008A5900009C0000-000000067F0000400200008A5900009C4000__00000073AD3FE6B8\n000000067F0000400200008A5900009C0000-000000067F0000400200008A5900009C4000__000000914E3F38F0\n000000067F0000400200008A5900009C0000-000000067F0000400200008A5900009C4000__000000931B33AE68\n000000067F0000400200008A5900009C0000-000000067F0000400200008A5900009C4000__000000931B9AFDF8\n000000067F0000400200008A5900009C4000-000000067F0000400200008A5900009C8000__000000127811CCF0\n000000067F0000400200008A5900009C4000-000000067F0000400200008A5900009C8000__0000001C760FA190\n000000067F0000400200008A5900009C4000-000000067F0000400200008A5900009C8000__00000038E67ABFA0\n000000067F0000400200008A5900009C4000-000000067F0000400200008A5900009C8000__0000003903F1CFE8\n000000067F0000400200008A5900009C4000-000000067F0000400200008A5900009C8000__0000003B99F7F8A0\n000000067F0000400200008A5900009C4000-000000067F0000400200008A5900009C8000__0000005D2FFFFB38\n000000067F0000400200008A5900009C4000-000000067F0000400200008A5900009C8000__00000073AD3FE6B8\n000000067F0000400200008A5900009C4000-000000067F0000400200008A5900009C8000__000000914E3F38F0\n000000067F0000400200008A5900009C4000-000000067F0000400200008A5900009C8000__000000931B33AE68\n000000067F0000400200008A5900009C4000-000000067F0000400200008A5900009C8000__000000931B9AFDF8\n000000067F0000400200008A5900009C7338-000000067F0000400200008A5900009CFD0C__00000011987BE139-000000124823FE31\n000000067F0000400200008A5900009C8000-000000067F0000400200008A5900009CC000__000000127811CCF0\n000000067F0000400200008A5900009C8000-000000067F0000400200008A5900009CC000__0000001C760FA190\n000000067F0000400200008A5900009C8000-000000067F0000400200008A5900009CC000__00000038E67ABFA0\n000000067F0000400200008A5900009C8000-000000067F0000400200008A5900009CC000__0000003903F1CFE8\n000000067F0000400200008A5900009C8000-000000067F0000400200008A5900009CC000__0000003B99F7F8A0\n000000067F0000400200008A5900009C8000-000000067F0000400200008A5900009CC000__0000005D2FFFFB38\n000000067F0000400200008A5900009C8000-000000067F0000400200008A5900009CC000__00000073AD3FE6B8\n000000067F0000400200008A5900009C8000-000000067F0000400200008A5900009CC000__000000914E3F38F0\n000000067F0000400200008A5900009C8000-000000067F0000400200008A5900009CC000__000000931B33AE68\n000000067F0000400200008A5900009C8000-000000067F0000400200008A5900009CC000__000000931B9AFDF8\n000000067F0000400200008A5900009CC000-000000067F0000400200008A5900009D0000__000000127811CCF0\n000000067F0000400200008A5900009CC000-000000067F0000400200008A5900009D0000__0000001C760FA190\n000000067F0000400200008A5900009CC000-000000067F0000400200008A5900009D0000__00000038E67ABFA0\n000000067F0000400200008A5900009CC000-000000067F0000400200008A5900009D0000__0000003903F1CFE8\n000000067F0000400200008A5900009CC000-000000067F0000400200008A5900009D0000__0000003B99F7F8A0\n000000067F0000400200008A5900009CC000-000000067F0000400200008A5900009D0000__0000005D2FFFFB38\n000000067F0000400200008A5900009CC000-000000067F0000400200008A5900009D0000__00000073AD3FE6B8\n000000067F0000400200008A5900009CC000-000000067F0000400200008A5900009D0000__000000914E3F38F0\n000000067F0000400200008A5900009CC000-000000067F0000400200008A5900009D0000__000000931B33AE68\n000000067F0000400200008A5900009CC000-000000067F0000400200008A5900009D0000__000000931B9AFDF8\n000000067F0000400200008A5900009CFD0C-000000067F0000400200008A5900009D86E1__00000011987BE139-000000124823FE31\n000000067F0000400200008A5900009D0000-000000067F0000400200008A5900009D4000__000000127811CCF0\n000000067F0000400200008A5900009D0000-000000067F0000400200008A5900009D4000__0000001C760FA190\n000000067F0000400200008A5900009D0000-000000067F0000400200008A5900009D4000__00000038E67ABFA0\n000000067F0000400200008A5900009D0000-000000067F0000400200008A5900009D4000__0000003903F1CFE8\n000000067F0000400200008A5900009D0000-000000067F0000400200008A5900009D4000__0000003B99F7F8A0\n000000067F0000400200008A5900009D0000-000000067F0000400200008A5900009D4000__0000005D2FFFFB38\n000000067F0000400200008A5900009D0000-000000067F0000400200008A5900009D4000__00000073AD3FE6B8\n000000067F0000400200008A5900009D0000-000000067F0000400200008A5900009D4000__000000914E3F38F0\n000000067F0000400200008A5900009D0000-000000067F0000400200008A5900009D4000__000000931B33AE68\n000000067F0000400200008A5900009D0000-000000067F0000400200008A5900009D4000__000000931B9AFDF8\n000000067F0000400200008A5900009D4000-000000067F0000400200008A5900009D8000__000000127811CCF0\n000000067F0000400200008A5900009D4000-000000067F0000400200008A5900009D8000__0000001C760FA190\n000000067F0000400200008A5900009D4000-000000067F0000400200008A5900009D8000__00000038E67ABFA0\n000000067F0000400200008A5900009D4000-000000067F0000400200008A5900009D8000__0000003903F1CFE8\n000000067F0000400200008A5900009D4000-000000067F0000400200008A5900009D8000__0000003B99F7F8A0\n000000067F0000400200008A5900009D4000-000000067F0000400200008A5900009D8000__0000005D2FFFFB38\n000000067F0000400200008A5900009D4000-000000067F0000400200008A5900009D8000__00000073AD3FE6B8\n000000067F0000400200008A5900009D4000-000000067F0000400200008A5900009D8000__000000914E3F38F0\n000000067F0000400200008A5900009D4000-000000067F0000400200008A5900009D8000__000000931B33AE68\n000000067F0000400200008A5900009D4000-000000067F0000400200008A5900009D8000__000000931B9AFDF8\n000000067F0000400200008A5900009D8000-000000067F0000400200008A5900009DC000__000000127811CCF0\n000000067F0000400200008A5900009D8000-000000067F0000400200008A5900009DC000__0000001C760FA190\n000000067F0000400200008A5900009D8000-000000067F0000400200008A5900009DC000__00000038E67ABFA0\n000000067F0000400200008A5900009D8000-000000067F0000400200008A5900009DC000__0000003903F1CFE8\n000000067F0000400200008A5900009D8000-000000067F0000400200008A5900009DC000__0000003B99F7F8A0\n000000067F0000400200008A5900009D8000-000000067F0000400200008A5900009DC000__0000005D2FFFFB38\n000000067F0000400200008A5900009D8000-000000067F0000400200008A5900009DC000__00000073AD3FE6B8\n000000067F0000400200008A5900009D8000-000000067F0000400200008A5900009DC000__000000914E3F38F0\n000000067F0000400200008A5900009D8000-000000067F0000400200008A5900009DC000__000000931B33AE68\n000000067F0000400200008A5900009D8000-000000067F0000400200008A5900009DC000__000000931B9AFDF8\n000000067F0000400200008A5900009D86E1-000000067F0000400200008A5900009E10BE__00000011987BE139-000000124823FE31\n000000067F0000400200008A5900009DC000-000000067F0000400200008A5900009E0000__000000127811CCF0\n000000067F0000400200008A5900009DC000-000000067F0000400200008A5900009E0000__0000001C760FA190\n000000067F0000400200008A5900009DC000-000000067F0000400200008A5900009E0000__00000038E67ABFA0\n000000067F0000400200008A5900009DC000-000000067F0000400200008A5900009E0000__0000003903F1CFE8\n000000067F0000400200008A5900009DC000-000000067F0000400200008A5900009E0000__0000003B99F7F8A0\n000000067F0000400200008A5900009DC000-000000067F0000400200008A5900009E0000__0000005D2FFFFB38\n000000067F0000400200008A5900009DC000-000000067F0000400200008A5900009E0000__00000073AD3FE6B8\n000000067F0000400200008A5900009DC000-000000067F0000400200008A5900009E0000__000000914E3F38F0\n000000067F0000400200008A5900009DC000-000000067F0000400200008A5900009E0000__000000931B33AE68\n000000067F0000400200008A5900009DC000-000000067F0000400200008A5900009E0000__000000931B9AFDF8\n000000067F0000400200008A5900009E0000-000000067F0000400200008A5900009E4000__000000127811CCF0\n000000067F0000400200008A5900009E0000-000000067F0000400200008A5900009E4000__0000001C760FA190\n000000067F0000400200008A5900009E0000-000000067F0000400200008A5900009E4000__00000038E67ABFA0\n000000067F0000400200008A5900009E0000-000000067F0000400200008A5900009E4000__0000003903F1CFE8\n000000067F0000400200008A5900009E0000-000000067F0000400200008A5900009E4000__0000003B99F7F8A0\n000000067F0000400200008A5900009E0000-000000067F0000400200008A5900009E4000__0000005D2FFFFB38\n000000067F0000400200008A5900009E0000-000000067F0000400200008A5900009E4000__00000073AD3FE6B8\n000000067F0000400200008A5900009E0000-000000067F0000400200008A5900009E4000__000000914E3F38F0\n000000067F0000400200008A5900009E0000-000000067F0000400200008A5900009E4000__000000931B33AE68\n000000067F0000400200008A5900009E0000-000000067F0000400200008A5900009E4000__000000931B9AFDF8\n000000067F0000400200008A5900009E10BE-000000067F0000400200008A590100000000__00000011987BE139-000000124823FE31\n000000067F0000400200008A5900009E13DD-000000067F0000400200008A5900009E9DC2__000000124823FE31-00000012F7CBDEA1\n000000067F0000400200008A5900009E4000-000000067F0000400200008A5900009E8000__000000127811CCF0\n000000067F0000400200008A5900009E4000-000000067F0000400200008A5900009E8000__0000001C760FA190\n000000067F0000400200008A5900009E4000-000000067F0000400200008A5900009E8000__00000038E67ABFA0\n000000067F0000400200008A5900009E4000-000000067F0000400200008A5900009E8000__0000003903F1CFE8\n000000067F0000400200008A5900009E4000-000000067F0000400200008A5900009E8000__0000003B99F7F8A0\n000000067F0000400200008A5900009E4000-000000067F0000400200008A5900009E8000__0000005D2FFFFB38\n000000067F0000400200008A5900009E4000-000000067F0000400200008A5900009E8000__00000073AD3FE6B8\n000000067F0000400200008A5900009E4000-000000067F0000400200008A5900009E8000__000000914E3F38F0\n000000067F0000400200008A5900009E4000-000000067F0000400200008A5900009E8000__000000931B33AE68\n000000067F0000400200008A5900009E4000-000000067F0000400200008A5900009E8000__000000931B9AFDF8\n000000067F0000400200008A5900009E8000-000000067F0000400200008A5900009EC000__000000127811CCF0\n000000067F0000400200008A5900009E8000-000000067F0000400200008A5900009EC000__0000001C760FA190\n000000067F0000400200008A5900009E8000-000000067F0000400200008A5900009EC000__00000038E67ABFA0\n000000067F0000400200008A5900009E8000-000000067F0000400200008A5900009EC000__0000003903F1CFE8\n000000067F0000400200008A5900009E8000-000000067F0000400200008A5900009EC000__0000003B99F7F8A0\n000000067F0000400200008A5900009E8000-000000067F0000400200008A5900009EC000__0000005D2FFFFB38\n000000067F0000400200008A5900009E8000-000000067F0000400200008A5900009EC000__00000073AD3FE6B8\n000000067F0000400200008A5900009E8000-000000067F0000400200008A5900009EC000__000000914E3F38F0\n000000067F0000400200008A5900009E8000-000000067F0000400200008A5900009EC000__000000931B33AE68\n000000067F0000400200008A5900009E8000-000000067F0000400200008A5900009EC000__000000931B9AFDF8\n000000067F0000400200008A5900009E9DC2-000000067F0000400200008A5900009F27A6__000000124823FE31-00000012F7CBDEA1\n000000067F0000400200008A5900009EC000-000000067F0000400200008A5900009F0000__000000127811CCF0\n000000067F0000400200008A5900009EC000-000000067F0000400200008A5900009F0000__0000001C760FA190\n000000067F0000400200008A5900009EC000-000000067F0000400200008A5900009F0000__00000038E67ABFA0\n000000067F0000400200008A5900009EC000-000000067F0000400200008A5900009F0000__0000003903F1CFE8\n000000067F0000400200008A5900009EC000-000000067F0000400200008A5900009F0000__0000003B99F7F8A0\n000000067F0000400200008A5900009EC000-000000067F0000400200008A5900009F0000__0000005D2FFFFB38\n000000067F0000400200008A5900009EC000-000000067F0000400200008A5900009F0000__00000073AD3FE6B8\n000000067F0000400200008A5900009EC000-000000067F0000400200008A5900009F0000__000000914E3F38F0\n000000067F0000400200008A5900009EC000-000000067F0000400200008A5900009F0000__000000931B33AE68\n000000067F0000400200008A5900009EC000-000000067F0000400200008A5900009F0000__000000931B9AFDF8\n000000067F0000400200008A5900009F0000-000000067F0000400200008A5900009F4000__000000127811CCF0\n000000067F0000400200008A5900009F0000-000000067F0000400200008A5900009F4000__0000001C760FA190\n000000067F0000400200008A5900009F0000-000000067F0000400200008A5900009F4000__00000038E67ABFA0\n000000067F0000400200008A5900009F0000-000000067F0000400200008A5900009F4000__0000003903F1CFE8\n000000067F0000400200008A5900009F0000-000000067F0000400200008A5900009F4000__0000003B99F7F8A0\n000000067F0000400200008A5900009F0000-000000067F0000400200008A5900009F4000__0000005D2FFFFB38\n000000067F0000400200008A5900009F0000-000000067F0000400200008A5900009F4000__00000073AD3FE6B8\n000000067F0000400200008A5900009F0000-000000067F0000400200008A5900009F4000__000000914E3F38F0\n000000067F0000400200008A5900009F0000-000000067F0000400200008A5900009F4000__000000931B33AE68\n000000067F0000400200008A5900009F0000-000000067F0000400200008A5900009F4000__000000931B9AFDF8\n000000067F0000400200008A5900009F27A6-000000067F0000400200008A5900009FB188__000000124823FE31-00000012F7CBDEA1\n000000067F0000400200008A5900009F4000-000000067F0000400200008A5900009F8000__000000127811CCF0\n000000067F0000400200008A5900009F4000-000000067F0000400200008A5900009F8000__0000001C760FA190\n000000067F0000400200008A5900009F4000-000000067F0000400200008A5900009F8000__00000038E67ABFA0\n000000067F0000400200008A5900009F4000-000000067F0000400200008A5900009F8000__0000003903F1CFE8\n000000067F0000400200008A5900009F4000-000000067F0000400200008A5900009F8000__0000003B99F7F8A0\n000000067F0000400200008A5900009F4000-000000067F0000400200008A5900009F8000__0000005D2FFFFB38\n000000067F0000400200008A5900009F4000-000000067F0000400200008A5900009F8000__00000073AD3FE6B8\n000000067F0000400200008A5900009F4000-000000067F0000400200008A5900009F8000__000000914E3F38F0\n000000067F0000400200008A5900009F4000-000000067F0000400200008A5900009F8000__000000931B33AE68\n000000067F0000400200008A5900009F4000-000000067F0000400200008A5900009F8000__000000931B9AFDF8\n000000067F0000400200008A5900009F8000-000000067F0000400200008A5900009FC000__0000001C760FA190\n000000067F0000400200008A5900009F8000-000000067F0000400200008A5900009FC000__00000038E67ABFA0\n000000067F0000400200008A5900009F8000-000000067F0000400200008A5900009FC000__0000003903F1CFE8\n000000067F0000400200008A5900009F8000-000000067F0000400200008A5900009FC000__0000003B99F7F8A0\n000000067F0000400200008A5900009F8000-000000067F0000400200008A5900009FC000__0000005D2FFFFB38\n000000067F0000400200008A5900009F8000-000000067F0000400200008A5900009FC000__00000073AD3FE6B8\n000000067F0000400200008A5900009F8000-000000067F0000400200008A5900009FC000__000000914E3F38F0\n000000067F0000400200008A5900009F8000-000000067F0000400200008A5900009FC000__000000931B33AE68\n000000067F0000400200008A5900009F8000-000000067F0000400200008A5900009FC000__000000931B9AFDF8\n000000067F0000400200008A5900009F8000-030000000000000000000000000000000002__000000127811CCF0\n000000067F0000400200008A5900009FB188-000000067F0000400200008A590000A03B76__000000124823FE31-00000012F7CBDEA1\n000000067F0000400200008A5900009FC000-000000067F0000400200008A590000A00000__0000001C760FA190\n000000067F0000400200008A5900009FC000-000000067F0000400200008A590000A00000__00000038E67ABFA0\n000000067F0000400200008A5900009FC000-000000067F0000400200008A590000A00000__0000003903F1CFE8\n000000067F0000400200008A5900009FC000-000000067F0000400200008A590000A00000__0000003B99F7F8A0\n000000067F0000400200008A5900009FC000-000000067F0000400200008A590000A00000__0000005D2FFFFB38\n000000067F0000400200008A5900009FC000-000000067F0000400200008A590000A00000__00000073AD3FE6B8\n000000067F0000400200008A5900009FC000-000000067F0000400200008A590000A00000__000000914E3F38F0\n000000067F0000400200008A5900009FC000-000000067F0000400200008A590000A00000__000000931B33AE68\n000000067F0000400200008A5900009FC000-000000067F0000400200008A590000A00000__000000931B9AFDF8\n000000067F0000400200008A590000A00000-000000067F0000400200008A590000A04000__0000001C760FA190\n000000067F0000400200008A590000A00000-000000067F0000400200008A590000A04000__00000038E67ABFA0\n000000067F0000400200008A590000A00000-000000067F0000400200008A590000A04000__0000003903F1CFE8\n000000067F0000400200008A590000A00000-000000067F0000400200008A590000A04000__0000003B99F7F8A0\n000000067F0000400200008A590000A00000-000000067F0000400200008A590000A04000__0000005D2FFFFB38\n000000067F0000400200008A590000A00000-000000067F0000400200008A590000A04000__00000073AD3FE6B8\n000000067F0000400200008A590000A00000-000000067F0000400200008A590000A04000__000000914E3F38F0\n000000067F0000400200008A590000A00000-000000067F0000400200008A590000A04000__000000931B33AE68\n000000067F0000400200008A590000A00000-000000067F0000400200008A590000A04000__000000931B9AFDF8\n000000067F0000400200008A590000A03B76-000000067F0000400200008A590000A0C550__000000124823FE31-00000012F7CBDEA1\n000000067F0000400200008A590000A04000-000000067F0000400200008A590000A08000__0000001C760FA190\n000000067F0000400200008A590000A04000-000000067F0000400200008A590000A08000__00000038E67ABFA0\n000000067F0000400200008A590000A04000-000000067F0000400200008A590000A08000__0000003903F1CFE8\n000000067F0000400200008A590000A04000-000000067F0000400200008A590000A08000__0000003B99F7F8A0\n000000067F0000400200008A590000A04000-000000067F0000400200008A590000A08000__0000005D2FFFFB38\n000000067F0000400200008A590000A04000-000000067F0000400200008A590000A08000__00000073AD3FE6B8\n000000067F0000400200008A590000A04000-000000067F0000400200008A590000A08000__000000914E3F38F0\n000000067F0000400200008A590000A04000-000000067F0000400200008A590000A08000__000000931B33AE68\n000000067F0000400200008A590000A04000-000000067F0000400200008A590000A08000__000000931B9AFDF8\n000000067F0000400200008A590000A08000-000000067F0000400200008A590000A0C000__0000001C760FA190\n000000067F0000400200008A590000A08000-000000067F0000400200008A590000A0C000__00000038E67ABFA0\n000000067F0000400200008A590000A08000-000000067F0000400200008A590000A0C000__0000003903F1CFE8\n000000067F0000400200008A590000A08000-000000067F0000400200008A590000A0C000__0000003B99F7F8A0\n000000067F0000400200008A590000A08000-000000067F0000400200008A590000A0C000__0000005D2FFFFB38\n000000067F0000400200008A590000A08000-000000067F0000400200008A590000A0C000__00000073AD3FE6B8\n000000067F0000400200008A590000A08000-000000067F0000400200008A590000A0C000__000000914E3F38F0\n000000067F0000400200008A590000A08000-000000067F0000400200008A590000A0C000__000000931B33AE68\n000000067F0000400200008A590000A08000-000000067F0000400200008A590000A0C000__000000931B9AFDF8\n000000067F0000400200008A590000A0C000-000000067F0000400200008A590000A10000__0000001C760FA190\n000000067F0000400200008A590000A0C000-000000067F0000400200008A590000A10000__00000038E67ABFA0\n000000067F0000400200008A590000A0C000-000000067F0000400200008A590000A10000__0000003903F1CFE8\n000000067F0000400200008A590000A0C000-000000067F0000400200008A590000A10000__0000003B99F7F8A0\n000000067F0000400200008A590000A0C000-000000067F0000400200008A590000A10000__0000005D2FFFFB38\n000000067F0000400200008A590000A0C000-000000067F0000400200008A590000A10000__00000073AD3FE6B8\n000000067F0000400200008A590000A0C000-000000067F0000400200008A590000A10000__000000914E3F38F0\n000000067F0000400200008A590000A0C000-000000067F0000400200008A590000A10000__000000931B33AE68\n000000067F0000400200008A590000A0C000-000000067F0000400200008A590000A10000__000000931B9AFDF8\n000000067F0000400200008A590000A0C550-000000067F0000400200008A590000A14F25__000000124823FE31-00000012F7CBDEA1\n000000067F0000400200008A590000A10000-000000067F0000400200008A590000A14000__0000001C760FA190\n000000067F0000400200008A590000A10000-000000067F0000400200008A590000A14000__00000038E67ABFA0\n000000067F0000400200008A590000A10000-000000067F0000400200008A590000A14000__0000003903F1CFE8\n000000067F0000400200008A590000A10000-000000067F0000400200008A590000A14000__0000003B99F7F8A0\n000000067F0000400200008A590000A10000-000000067F0000400200008A590000A14000__0000005D2FFFFB38\n000000067F0000400200008A590000A10000-000000067F0000400200008A590000A14000__00000073AD3FE6B8\n000000067F0000400200008A590000A10000-000000067F0000400200008A590000A14000__000000914E3F38F0\n000000067F0000400200008A590000A10000-000000067F0000400200008A590000A14000__000000931B33AE68\n000000067F0000400200008A590000A10000-000000067F0000400200008A590000A14000__000000931B9AFDF8\n000000067F0000400200008A590000A14000-000000067F0000400200008A590000A18000__0000001C760FA190\n000000067F0000400200008A590000A14000-000000067F0000400200008A590000A18000__00000038E67ABFA0\n000000067F0000400200008A590000A14000-000000067F0000400200008A590000A18000__0000003903F1CFE8\n000000067F0000400200008A590000A14000-000000067F0000400200008A590000A18000__0000003B99F7F8A0\n000000067F0000400200008A590000A14000-000000067F0000400200008A590000A18000__0000005D2FFFFB38\n000000067F0000400200008A590000A14000-000000067F0000400200008A590000A18000__00000073AD3FE6B8\n000000067F0000400200008A590000A14000-000000067F0000400200008A590000A18000__000000914E3F38F0\n000000067F0000400200008A590000A14000-000000067F0000400200008A590000A18000__000000931B33AE68\n000000067F0000400200008A590000A14000-000000067F0000400200008A590000A18000__000000931B9AFDF8\n000000067F0000400200008A590000A14F25-000000067F0000400200008A590000A1D8F5__000000124823FE31-00000012F7CBDEA1\n000000067F0000400200008A590000A18000-000000067F0000400200008A590000A1C000__0000001C760FA190\n000000067F0000400200008A590000A18000-000000067F0000400200008A590000A1C000__00000038E67ABFA0\n000000067F0000400200008A590000A18000-000000067F0000400200008A590000A1C000__0000003903F1CFE8\n000000067F0000400200008A590000A18000-000000067F0000400200008A590000A1C000__0000003B99F7F8A0\n000000067F0000400200008A590000A18000-000000067F0000400200008A590000A1C000__0000005D2FFFFB38\n000000067F0000400200008A590000A18000-000000067F0000400200008A590000A1C000__00000073AD3FE6B8\n000000067F0000400200008A590000A18000-000000067F0000400200008A590000A1C000__000000914E3F38F0\n000000067F0000400200008A590000A18000-000000067F0000400200008A590000A1C000__000000931B33AE68\n000000067F0000400200008A590000A18000-000000067F0000400200008A590000A1C000__000000931B9AFDF8\n000000067F0000400200008A590000A1C000-000000067F0000400200008A590000A20000__0000001C760FA190\n000000067F0000400200008A590000A1C000-000000067F0000400200008A590000A20000__00000038E67ABFA0\n000000067F0000400200008A590000A1C000-000000067F0000400200008A590000A20000__0000003903F1CFE8\n000000067F0000400200008A590000A1C000-000000067F0000400200008A590000A20000__0000003B99F7F8A0\n000000067F0000400200008A590000A1C000-000000067F0000400200008A590000A20000__0000005D2FFFFB38\n000000067F0000400200008A590000A1C000-000000067F0000400200008A590000A20000__00000073AD3FE6B8\n000000067F0000400200008A590000A1C000-000000067F0000400200008A590000A20000__000000914E3F38F0\n000000067F0000400200008A590000A1C000-000000067F0000400200008A590000A20000__000000931B33AE68\n000000067F0000400200008A590000A1C000-000000067F0000400200008A590000A20000__000000931B9AFDF8\n000000067F0000400200008A590000A1D8F5-000000067F0000400200008A590000A262D4__000000124823FE31-00000012F7CBDEA1\n000000067F0000400200008A590000A20000-000000067F0000400200008A590000A24000__0000001C760FA190\n000000067F0000400200008A590000A20000-000000067F0000400200008A590000A24000__00000038E67ABFA0\n000000067F0000400200008A590000A20000-000000067F0000400200008A590000A24000__0000003903F1CFE8\n000000067F0000400200008A590000A20000-000000067F0000400200008A590000A24000__0000003B99F7F8A0\n000000067F0000400200008A590000A20000-000000067F0000400200008A590000A24000__0000005D2FFFFB38\n000000067F0000400200008A590000A20000-000000067F0000400200008A590000A24000__00000073AD3FE6B8\n000000067F0000400200008A590000A20000-000000067F0000400200008A590000A24000__000000914E3F38F0\n000000067F0000400200008A590000A20000-000000067F0000400200008A590000A24000__000000931B33AE68\n000000067F0000400200008A590000A20000-000000067F0000400200008A590000A24000__000000931B9AFDF8\n000000067F0000400200008A590000A24000-000000067F0000400200008A590000A28000__0000001C760FA190\n000000067F0000400200008A590000A24000-000000067F0000400200008A590000A28000__00000038E67ABFA0\n000000067F0000400200008A590000A24000-000000067F0000400200008A590000A28000__0000003903F1CFE8\n000000067F0000400200008A590000A24000-000000067F0000400200008A590000A28000__0000003B99F7F8A0\n000000067F0000400200008A590000A24000-000000067F0000400200008A590000A28000__0000005D2FFFFB38\n000000067F0000400200008A590000A24000-000000067F0000400200008A590000A28000__00000073AD3FE6B8\n000000067F0000400200008A590000A24000-000000067F0000400200008A590000A28000__000000914E3F38F0\n000000067F0000400200008A590000A24000-000000067F0000400200008A590000A28000__000000931B33AE68\n000000067F0000400200008A590000A24000-000000067F0000400200008A590000A28000__000000931B9AFDF8\n000000067F0000400200008A590000A262D4-000000067F0000400200008A590000A2ECBA__000000124823FE31-00000012F7CBDEA1\n000000067F0000400200008A590000A28000-000000067F0000400200008A590000A2C000__0000001C760FA190\n000000067F0000400200008A590000A28000-000000067F0000400200008A590000A2C000__00000038E67ABFA0\n000000067F0000400200008A590000A28000-000000067F0000400200008A590000A2C000__0000003903F1CFE8\n000000067F0000400200008A590000A28000-000000067F0000400200008A590000A2C000__0000003B99F7F8A0\n000000067F0000400200008A590000A28000-000000067F0000400200008A590000A2C000__0000005D2FFFFB38\n000000067F0000400200008A590000A28000-000000067F0000400200008A590000A2C000__00000073AD3FE6B8\n000000067F0000400200008A590000A28000-000000067F0000400200008A590000A2C000__000000914E3F38F0\n000000067F0000400200008A590000A28000-000000067F0000400200008A590000A2C000__000000931B33AE68\n000000067F0000400200008A590000A28000-000000067F0000400200008A590000A2C000__000000931B9AFDF8\n000000067F0000400200008A590000A2C000-000000067F0000400200008A590000A30000__0000001C760FA190\n000000067F0000400200008A590000A2C000-000000067F0000400200008A590000A30000__00000038E67ABFA0\n000000067F0000400200008A590000A2C000-000000067F0000400200008A590000A30000__0000003903F1CFE8\n000000067F0000400200008A590000A2C000-000000067F0000400200008A590000A30000__0000003B99F7F8A0\n000000067F0000400200008A590000A2C000-000000067F0000400200008A590000A30000__0000005D2FFFFB38\n000000067F0000400200008A590000A2C000-000000067F0000400200008A590000A30000__00000073AD3FE6B8\n000000067F0000400200008A590000A2C000-000000067F0000400200008A590000A30000__000000914E3F38F0\n000000067F0000400200008A590000A2C000-000000067F0000400200008A590000A30000__000000931B33AE68\n000000067F0000400200008A590000A2C000-000000067F0000400200008A590000A30000__000000931B9AFDF8\n000000067F0000400200008A590000A2ECBA-000000067F0000400200008A590000A3769E__000000124823FE31-00000012F7CBDEA1\n000000067F0000400200008A590000A30000-000000067F0000400200008A590000A34000__0000001C760FA190\n000000067F0000400200008A590000A30000-000000067F0000400200008A590000A34000__00000038E67ABFA0\n000000067F0000400200008A590000A30000-000000067F0000400200008A590000A34000__0000003903F1CFE8\n000000067F0000400200008A590000A30000-000000067F0000400200008A590000A34000__0000003B99F7F8A0\n000000067F0000400200008A590000A30000-000000067F0000400200008A590000A34000__0000005D2FFFFB38\n000000067F0000400200008A590000A30000-000000067F0000400200008A590000A34000__00000073AD3FE6B8\n000000067F0000400200008A590000A30000-000000067F0000400200008A590000A34000__000000914E3F38F0\n000000067F0000400200008A590000A30000-000000067F0000400200008A590000A34000__000000931B33AE68\n000000067F0000400200008A590000A30000-000000067F0000400200008A590000A34000__000000931B9AFDF8\n000000067F0000400200008A590000A34000-000000067F0000400200008A590000A38000__0000001C760FA190\n000000067F0000400200008A590000A34000-000000067F0000400200008A590000A38000__00000038E67ABFA0\n000000067F0000400200008A590000A34000-000000067F0000400200008A590000A38000__0000003903F1CFE8\n000000067F0000400200008A590000A34000-000000067F0000400200008A590000A38000__0000003B99F7F8A0\n000000067F0000400200008A590000A34000-000000067F0000400200008A590000A38000__0000005D2FFFFB38\n000000067F0000400200008A590000A34000-000000067F0000400200008A590000A38000__00000073AD3FE6B8\n000000067F0000400200008A590000A34000-000000067F0000400200008A590000A38000__000000914E3F38F0\n000000067F0000400200008A590000A34000-000000067F0000400200008A590000A38000__000000931B33AE68\n000000067F0000400200008A590000A34000-000000067F0000400200008A590000A38000__000000931B9AFDF8\n000000067F0000400200008A590000A3769E-000000067F0000400200008A590000A40089__000000124823FE31-00000012F7CBDEA1\n000000067F0000400200008A590000A38000-000000067F0000400200008A590000A3C000__0000001C760FA190\n000000067F0000400200008A590000A38000-000000067F0000400200008A590000A3C000__00000038E67ABFA0\n000000067F0000400200008A590000A38000-000000067F0000400200008A590000A3C000__0000003903F1CFE8\n000000067F0000400200008A590000A38000-000000067F0000400200008A590000A3C000__0000003B99F7F8A0\n000000067F0000400200008A590000A38000-000000067F0000400200008A590000A3C000__0000005D2FFFFB38\n000000067F0000400200008A590000A38000-000000067F0000400200008A590000A3C000__00000073AD3FE6B8\n000000067F0000400200008A590000A38000-000000067F0000400200008A590000A3C000__000000914E3F38F0\n000000067F0000400200008A590000A38000-000000067F0000400200008A590000A3C000__000000931B33AE68\n000000067F0000400200008A590000A38000-000000067F0000400200008A590000A3C000__000000931B9AFDF8\n000000067F0000400200008A590000A3C000-000000067F0000400200008A590000A40000__0000001C760FA190\n000000067F0000400200008A590000A3C000-000000067F0000400200008A590000A40000__00000038E67ABFA0\n000000067F0000400200008A590000A3C000-000000067F0000400200008A590000A40000__0000003903F1CFE8\n000000067F0000400200008A590000A3C000-000000067F0000400200008A590000A40000__0000003B99F7F8A0\n000000067F0000400200008A590000A3C000-000000067F0000400200008A590000A40000__0000005D2FFFFB38\n000000067F0000400200008A590000A3C000-000000067F0000400200008A590000A40000__00000073AD3FE6B8\n000000067F0000400200008A590000A3C000-000000067F0000400200008A590000A40000__000000914E3F38F0\n000000067F0000400200008A590000A3C000-000000067F0000400200008A590000A40000__000000931B33AE68\n000000067F0000400200008A590000A3C000-000000067F0000400200008A590000A40000__000000931B9AFDF8\n000000067F0000400200008A590000A40000-000000067F0000400200008A590000A44000__0000001C725A2400\n000000067F0000400200008A590000A40000-000000067F0000400200008A590000A44000__0000001C760FA190\n000000067F0000400200008A590000A40000-000000067F0000400200008A590000A44000__00000038E67ABFA0\n000000067F0000400200008A590000A40000-000000067F0000400200008A590000A44000__0000003903F1CFE8\n000000067F0000400200008A590000A40000-000000067F0000400200008A590000A44000__0000003B99F7F8A0\n000000067F0000400200008A590000A40000-000000067F0000400200008A590000A44000__0000005D2FFFFB38\n000000067F0000400200008A590000A40000-000000067F0000400200008A590000A44000__00000073AD3FE6B8\n000000067F0000400200008A590000A40000-000000067F0000400200008A590000A44000__000000914E3F38F0\n000000067F0000400200008A590000A40000-000000067F0000400200008A590000A44000__000000931B33AE68\n000000067F0000400200008A590000A40000-000000067F0000400200008A590000A44000__000000931B9AFDF8\n000000067F0000400200008A590000A40089-000000067F0000400200008A590100000000__000000124823FE31-00000012F7CBDEA1\n000000067F0000400200008A590000A4038F-000000067F0000400200008A590000A48D60__00000012F7CBDEA1-00000013977BD5E1\n000000067F0000400200008A590000A44000-000000067F0000400200008A590000A48000__0000001C725A2400\n000000067F0000400200008A590000A44000-000000067F0000400200008A590000A48000__0000001C760FA190\n000000067F0000400200008A590000A44000-000000067F0000400200008A590000A48000__00000038E67ABFA0\n000000067F0000400200008A590000A44000-000000067F0000400200008A590000A48000__0000003903F1CFE8\n000000067F0000400200008A590000A44000-000000067F0000400200008A590000A48000__0000003B99F7F8A0\n000000067F0000400200008A590000A44000-000000067F0000400200008A590000A48000__0000005D2FFFFB38\n000000067F0000400200008A590000A44000-000000067F0000400200008A590000A48000__00000073AD3FE6B8\n000000067F0000400200008A590000A44000-000000067F0000400200008A590000A48000__000000914E3F38F0\n000000067F0000400200008A590000A44000-000000067F0000400200008A590000A48000__000000931B33AE68\n000000067F0000400200008A590000A44000-000000067F0000400200008A590000A48000__000000931B9AFDF8\n000000067F0000400200008A590000A48000-000000067F0000400200008A590000A4C000__0000001C725A2400\n000000067F0000400200008A590000A48000-000000067F0000400200008A590000A4C000__0000001C760FA190\n000000067F0000400200008A590000A48000-000000067F0000400200008A590000A4C000__00000038E67ABFA0\n000000067F0000400200008A590000A48000-000000067F0000400200008A590000A4C000__0000003903F1CFE8\n000000067F0000400200008A590000A48000-000000067F0000400200008A590000A4C000__0000003B99F7F8A0\n000000067F0000400200008A590000A48000-000000067F0000400200008A590000A4C000__0000005D2FFFFB38\n000000067F0000400200008A590000A48000-000000067F0000400200008A590000A4C000__00000073AD3FE6B8\n000000067F0000400200008A590000A48000-000000067F0000400200008A590000A4C000__000000914E3F38F0\n000000067F0000400200008A590000A48000-000000067F0000400200008A590000A4C000__000000931B33AE68\n000000067F0000400200008A590000A48000-000000067F0000400200008A590000A4C000__000000931B9AFDF8\n000000067F0000400200008A590000A48D60-000000067F0000400200008A590000A51735__00000012F7CBDEA1-00000013977BD5E1\n000000067F0000400200008A590000A4C000-000000067F0000400200008A590000A50000__0000001C725A2400\n000000067F0000400200008A590000A4C000-000000067F0000400200008A590000A50000__0000001C760FA190\n000000067F0000400200008A590000A4C000-000000067F0000400200008A590000A50000__00000038E67ABFA0\n000000067F0000400200008A590000A4C000-000000067F0000400200008A590000A50000__0000003903F1CFE8\n000000067F0000400200008A590000A4C000-000000067F0000400200008A590000A50000__0000003B99F7F8A0\n000000067F0000400200008A590000A4C000-000000067F0000400200008A590000A50000__0000005D2FFFFB38\n000000067F0000400200008A590000A4C000-000000067F0000400200008A590000A50000__00000073AD3FE6B8\n000000067F0000400200008A590000A4C000-000000067F0000400200008A590000A50000__000000914E3F38F0\n000000067F0000400200008A590000A4C000-000000067F0000400200008A590000A50000__000000931B33AE68\n000000067F0000400200008A590000A4C000-000000067F0000400200008A590000A50000__000000931B9AFDF8\n000000067F0000400200008A590000A50000-000000067F0000400200008A590000A54000__0000001C725A2400\n000000067F0000400200008A590000A50000-000000067F0000400200008A590000A54000__0000001C760FA190\n000000067F0000400200008A590000A50000-000000067F0000400200008A590000A54000__00000038E67ABFA0\n000000067F0000400200008A590000A50000-000000067F0000400200008A590000A54000__0000003903F1CFE8\n000000067F0000400200008A590000A50000-000000067F0000400200008A590000A54000__0000003B99F7F8A0\n000000067F0000400200008A590000A50000-000000067F0000400200008A590000A54000__0000005D2FFFFB38\n000000067F0000400200008A590000A50000-000000067F0000400200008A590000A54000__00000073AD3FE6B8\n000000067F0000400200008A590000A50000-000000067F0000400200008A590000A54000__000000914E3F38F0\n000000067F0000400200008A590000A50000-000000067F0000400200008A590000A54000__000000931B33AE68\n000000067F0000400200008A590000A50000-000000067F0000400200008A590000A54000__000000931B9AFDF8\n000000067F0000400200008A590000A51735-000000067F0000400200008A590000A5A101__00000012F7CBDEA1-00000013977BD5E1\n000000067F0000400200008A590000A54000-000000067F0000400200008A590000A58000__0000001C725A2400\n000000067F0000400200008A590000A54000-000000067F0000400200008A590000A58000__0000001C760FA190\n000000067F0000400200008A590000A54000-000000067F0000400200008A590000A58000__00000038E67ABFA0\n000000067F0000400200008A590000A54000-000000067F0000400200008A590000A58000__0000003903F1CFE8\n000000067F0000400200008A590000A54000-000000067F0000400200008A590000A58000__0000003B99F7F8A0\n000000067F0000400200008A590000A54000-000000067F0000400200008A590000A58000__0000005D2FFFFB38\n000000067F0000400200008A590000A54000-000000067F0000400200008A590000A58000__00000073AD3FE6B8\n000000067F0000400200008A590000A54000-000000067F0000400200008A590000A58000__000000914E3F38F0\n000000067F0000400200008A590000A54000-000000067F0000400200008A590000A58000__000000931B33AE68\n000000067F0000400200008A590000A54000-000000067F0000400200008A590000A58000__000000931B9AFDF8\n000000067F0000400200008A590000A58000-000000067F0000400200008A590000A5C000__0000001C725A2400\n000000067F0000400200008A590000A58000-000000067F0000400200008A590000A5C000__0000001C760FA190\n000000067F0000400200008A590000A58000-000000067F0000400200008A590000A5C000__00000038E67ABFA0\n000000067F0000400200008A590000A58000-000000067F0000400200008A590000A5C000__0000003903F1CFE8\n000000067F0000400200008A590000A58000-000000067F0000400200008A590000A5C000__0000003B99F7F8A0\n000000067F0000400200008A590000A58000-000000067F0000400200008A590000A5C000__0000005D2FFFFB38\n000000067F0000400200008A590000A58000-000000067F0000400200008A590000A5C000__00000073AD3FE6B8\n000000067F0000400200008A590000A58000-000000067F0000400200008A590000A5C000__000000914E3F38F0\n000000067F0000400200008A590000A58000-000000067F0000400200008A590000A5C000__000000931B33AE68\n000000067F0000400200008A590000A58000-000000067F0000400200008A590000A5C000__000000931B9AFDF8\n000000067F0000400200008A590000A5A101-000000067F0000400200008A590000A62AD2__00000012F7CBDEA1-00000013977BD5E1\n000000067F0000400200008A590000A5C000-000000067F0000400200008A590000A60000__0000001C725A2400\n000000067F0000400200008A590000A5C000-000000067F0000400200008A590000A60000__0000001C760FA190\n000000067F0000400200008A590000A5C000-000000067F0000400200008A590000A60000__00000038E67ABFA0\n000000067F0000400200008A590000A5C000-000000067F0000400200008A590000A60000__0000003903F1CFE8\n000000067F0000400200008A590000A5C000-000000067F0000400200008A590000A60000__0000003B99F7F8A0\n000000067F0000400200008A590000A5C000-000000067F0000400200008A590000A60000__0000005D2FFFFB38\n000000067F0000400200008A590000A5C000-000000067F0000400200008A590000A60000__00000073AD3FE6B8\n000000067F0000400200008A590000A5C000-000000067F0000400200008A590000A60000__000000914E3F38F0\n000000067F0000400200008A590000A5C000-000000067F0000400200008A590000A60000__000000931B33AE68\n000000067F0000400200008A590000A5C000-000000067F0000400200008A590000A60000__000000931B9AFDF8\n000000067F0000400200008A590000A60000-000000067F0000400200008A590000A64000__0000001C725A2400\n000000067F0000400200008A590000A60000-000000067F0000400200008A590000A64000__0000001C760FA190\n000000067F0000400200008A590000A60000-000000067F0000400200008A590000A64000__00000038E67ABFA0\n000000067F0000400200008A590000A60000-000000067F0000400200008A590000A64000__0000003903F1CFE8\n000000067F0000400200008A590000A60000-000000067F0000400200008A590000A64000__0000003B99F7F8A0\n000000067F0000400200008A590000A60000-000000067F0000400200008A590000A64000__0000005D2FFFFB38\n000000067F0000400200008A590000A60000-000000067F0000400200008A590000A64000__00000073AD3FE6B8\n000000067F0000400200008A590000A60000-000000067F0000400200008A590000A64000__000000914E3F38F0\n000000067F0000400200008A590000A60000-000000067F0000400200008A590000A64000__000000931B33AE68\n000000067F0000400200008A590000A60000-000000067F0000400200008A590000A64000__000000931B9AFDF8\n000000067F0000400200008A590000A62AD2-000000067F0000400200008A590000A6B4C1__00000012F7CBDEA1-00000013977BD5E1\n000000067F0000400200008A590000A64000-000000067F0000400200008A590000A68000__0000001C725A2400\n000000067F0000400200008A590000A64000-000000067F0000400200008A590000A68000__0000001C760FA190\n000000067F0000400200008A590000A64000-000000067F0000400200008A590000A68000__00000038E67ABFA0\n000000067F0000400200008A590000A64000-000000067F0000400200008A590000A68000__0000003903F1CFE8\n000000067F0000400200008A590000A64000-000000067F0000400200008A590000A68000__0000003B99F7F8A0\n000000067F0000400200008A590000A64000-000000067F0000400200008A590000A68000__0000005D2FFFFB38\n000000067F0000400200008A590000A64000-000000067F0000400200008A590000A68000__00000073AD3FE6B8\n000000067F0000400200008A590000A64000-000000067F0000400200008A590000A68000__000000914E3F38F0\n000000067F0000400200008A590000A64000-000000067F0000400200008A590000A68000__000000931B33AE68\n000000067F0000400200008A590000A64000-000000067F0000400200008A590000A68000__000000931B9AFDF8\n000000067F0000400200008A590000A68000-000000067F0000400200008A590000A6C000__0000001C725A2400\n000000067F0000400200008A590000A68000-000000067F0000400200008A590000A6C000__0000001C760FA190\n000000067F0000400200008A590000A68000-000000067F0000400200008A590000A6C000__00000038E67ABFA0\n000000067F0000400200008A590000A68000-000000067F0000400200008A590000A6C000__0000003903F1CFE8\n000000067F0000400200008A590000A68000-000000067F0000400200008A590000A6C000__0000003B99F7F8A0\n000000067F0000400200008A590000A68000-000000067F0000400200008A590000A6C000__0000005D2FFFFB38\n000000067F0000400200008A590000A68000-000000067F0000400200008A590000A6C000__00000073AD3FE6B8\n000000067F0000400200008A590000A68000-000000067F0000400200008A590000A6C000__000000914E3F38F0\n000000067F0000400200008A590000A68000-000000067F0000400200008A590000A6C000__000000931B33AE68\n000000067F0000400200008A590000A68000-000000067F0000400200008A590000A6C000__000000931B9AFDF8\n000000067F0000400200008A590000A6B4C1-000000067F0000400200008A590000A73EAD__00000012F7CBDEA1-00000013977BD5E1\n000000067F0000400200008A590000A6C000-000000067F0000400200008A590000A70000__0000001C725A2400\n000000067F0000400200008A590000A6C000-000000067F0000400200008A590000A70000__0000001C760FA190\n000000067F0000400200008A590000A6C000-000000067F0000400200008A590000A70000__00000038E67ABFA0\n000000067F0000400200008A590000A6C000-000000067F0000400200008A590000A70000__0000003903F1CFE8\n000000067F0000400200008A590000A6C000-000000067F0000400200008A590000A70000__0000003B99F7F8A0\n000000067F0000400200008A590000A6C000-000000067F0000400200008A590000A70000__0000005D2FFFFB38\n000000067F0000400200008A590000A6C000-000000067F0000400200008A590000A70000__00000073AD3FE6B8\n000000067F0000400200008A590000A6C000-000000067F0000400200008A590000A70000__000000914E3F38F0\n000000067F0000400200008A590000A6C000-000000067F0000400200008A590000A70000__000000931B33AE68\n000000067F0000400200008A590000A6C000-000000067F0000400200008A590000A70000__000000931B9AFDF8\n000000067F0000400200008A590000A70000-000000067F0000400200008A590000A74000__0000001C725A2400\n000000067F0000400200008A590000A70000-000000067F0000400200008A590000A74000__0000001C760FA190\n000000067F0000400200008A590000A70000-000000067F0000400200008A590000A74000__00000038E67ABFA0\n000000067F0000400200008A590000A70000-000000067F0000400200008A590000A74000__0000003903F1CFE8\n000000067F0000400200008A590000A70000-000000067F0000400200008A590000A74000__0000003B99F7F8A0\n000000067F0000400200008A590000A70000-000000067F0000400200008A590000A74000__0000005D2FFFFB38\n000000067F0000400200008A590000A70000-000000067F0000400200008A590000A74000__00000073AD3FE6B8\n000000067F0000400200008A590000A70000-000000067F0000400200008A590000A74000__000000914E3F38F0\n000000067F0000400200008A590000A70000-000000067F0000400200008A590000A74000__000000931B33AE68\n000000067F0000400200008A590000A70000-000000067F0000400200008A590000A74000__000000931B9AFDF8\n000000067F0000400200008A590000A73EAD-000000067F0000400200008A590000A7C891__00000012F7CBDEA1-00000013977BD5E1\n000000067F0000400200008A590000A74000-000000067F0000400200008A590000A78000__0000001C725A2400\n000000067F0000400200008A590000A74000-000000067F0000400200008A590000A78000__0000001C760FA190\n000000067F0000400200008A590000A74000-000000067F0000400200008A590000A78000__00000038E67ABFA0\n000000067F0000400200008A590000A74000-000000067F0000400200008A590000A78000__0000003903F1CFE8\n000000067F0000400200008A590000A74000-000000067F0000400200008A590000A78000__0000003B99F7F8A0\n000000067F0000400200008A590000A74000-000000067F0000400200008A590000A78000__0000005D2FFFFB38\n000000067F0000400200008A590000A74000-000000067F0000400200008A590000A78000__00000073AD3FE6B8\n000000067F0000400200008A590000A74000-000000067F0000400200008A590000A78000__000000914E3F38F0\n000000067F0000400200008A590000A74000-000000067F0000400200008A590000A78000__000000931B33AE68\n000000067F0000400200008A590000A74000-000000067F0000400200008A590000A78000__000000931B9AFDF8\n000000067F0000400200008A590000A78000-000000067F0000400200008A590000A7C000__0000001C725A2400\n000000067F0000400200008A590000A78000-000000067F0000400200008A590000A7C000__0000001C760FA190\n000000067F0000400200008A590000A78000-000000067F0000400200008A590000A7C000__00000038E67ABFA0\n000000067F0000400200008A590000A78000-000000067F0000400200008A590000A7C000__0000003903F1CFE8\n000000067F0000400200008A590000A78000-000000067F0000400200008A590000A7C000__0000003B99F7F8A0\n000000067F0000400200008A590000A78000-000000067F0000400200008A590000A7C000__0000005D2FFFFB38\n000000067F0000400200008A590000A78000-000000067F0000400200008A590000A7C000__00000073AD3FE6B8\n000000067F0000400200008A590000A78000-000000067F0000400200008A590000A7C000__000000914E3F38F0\n000000067F0000400200008A590000A78000-000000067F0000400200008A590000A7C000__000000931B33AE68\n000000067F0000400200008A590000A78000-000000067F0000400200008A590000A7C000__000000931B9AFDF8\n000000067F0000400200008A590000A7C000-000000067F0000400200008A590000A80000__0000001C725A2400\n000000067F0000400200008A590000A7C000-000000067F0000400200008A590000A80000__0000001C760FA190\n000000067F0000400200008A590000A7C000-000000067F0000400200008A590000A80000__00000038E67ABFA0\n000000067F0000400200008A590000A7C000-000000067F0000400200008A590000A80000__0000003903F1CFE8\n000000067F0000400200008A590000A7C000-000000067F0000400200008A590000A80000__0000003B99F7F8A0\n000000067F0000400200008A590000A7C000-000000067F0000400200008A590000A80000__0000005D2FFFFB38\n000000067F0000400200008A590000A7C000-000000067F0000400200008A590000A80000__00000073AD3FE6B8\n000000067F0000400200008A590000A7C000-000000067F0000400200008A590000A80000__000000914E3F38F0\n000000067F0000400200008A590000A7C000-000000067F0000400200008A590000A80000__000000931B33AE68\n000000067F0000400200008A590000A7C000-000000067F0000400200008A590000A80000__000000931B9AFDF8\n000000067F0000400200008A590000A7C891-000000067F0000400200008A590000A85266__00000012F7CBDEA1-00000013977BD5E1\n000000067F0000400200008A590000A80000-000000067F0000400200008A590000A84000__0000001C725A2400\n000000067F0000400200008A590000A80000-000000067F0000400200008A590000A84000__0000001C760FA190\n000000067F0000400200008A590000A80000-000000067F0000400200008A590000A84000__00000038E67ABFA0\n000000067F0000400200008A590000A80000-000000067F0000400200008A590000A84000__0000003903F1CFE8\n000000067F0000400200008A590000A80000-000000067F0000400200008A590000A84000__0000003B99F7F8A0\n000000067F0000400200008A590000A80000-000000067F0000400200008A590000A84000__0000005D2FFFFB38\n000000067F0000400200008A590000A80000-000000067F0000400200008A590000A84000__00000073AD3FE6B8\n000000067F0000400200008A590000A80000-000000067F0000400200008A590000A84000__000000914E3F38F0\n000000067F0000400200008A590000A80000-000000067F0000400200008A590000A84000__000000931B33AE68\n000000067F0000400200008A590000A80000-000000067F0000400200008A590000A84000__000000931B9AFDF8\n000000067F0000400200008A590000A84000-000000067F0000400200008A590000A88000__0000001C725A2400\n000000067F0000400200008A590000A84000-000000067F0000400200008A590000A88000__0000001C760FA190\n000000067F0000400200008A590000A84000-000000067F0000400200008A590000A88000__00000038E67ABFA0\n000000067F0000400200008A590000A84000-000000067F0000400200008A590000A88000__0000003903F1CFE8\n000000067F0000400200008A590000A84000-000000067F0000400200008A590000A88000__0000003B99F7F8A0\n000000067F0000400200008A590000A84000-000000067F0000400200008A590000A88000__0000005D2FFFFB38\n000000067F0000400200008A590000A84000-000000067F0000400200008A590000A88000__00000073AD3FE6B8\n000000067F0000400200008A590000A84000-000000067F0000400200008A590000A88000__000000914E3F38F0\n000000067F0000400200008A590000A84000-000000067F0000400200008A590000A88000__000000931B33AE68\n000000067F0000400200008A590000A84000-000000067F0000400200008A590000A88000__000000931B9AFDF8\n000000067F0000400200008A590000A85266-000000067F0000400200008A590000A8DC37__00000012F7CBDEA1-00000013977BD5E1\n000000067F0000400200008A590000A88000-000000067F0000400200008A590000A8C000__0000001C725A2400\n000000067F0000400200008A590000A88000-000000067F0000400200008A590000A8C000__0000001C760FA190\n000000067F0000400200008A590000A88000-000000067F0000400200008A590000A8C000__00000038E67ABFA0\n000000067F0000400200008A590000A88000-000000067F0000400200008A590000A8C000__0000003903F1CFE8\n000000067F0000400200008A590000A88000-000000067F0000400200008A590000A8C000__0000003B99F7F8A0\n000000067F0000400200008A590000A88000-000000067F0000400200008A590000A8C000__0000005D2FFFFB38\n000000067F0000400200008A590000A88000-000000067F0000400200008A590000A8C000__00000073AD3FE6B8\n000000067F0000400200008A590000A88000-000000067F0000400200008A590000A8C000__000000914E3F38F0\n000000067F0000400200008A590000A88000-000000067F0000400200008A590000A8C000__000000931B33AE68\n000000067F0000400200008A590000A88000-000000067F0000400200008A590000A8C000__000000931B9AFDF8\n000000067F0000400200008A590000A8C000-000000067F0000400200008A590000A90000__0000001C725A2400\n000000067F0000400200008A590000A8C000-000000067F0000400200008A590000A90000__0000001C760FA190\n000000067F0000400200008A590000A8C000-000000067F0000400200008A590000A90000__00000038E67ABFA0\n000000067F0000400200008A590000A8C000-000000067F0000400200008A590000A90000__0000003903F1CFE8\n000000067F0000400200008A590000A8C000-000000067F0000400200008A590000A90000__0000003B99F7F8A0\n000000067F0000400200008A590000A8C000-000000067F0000400200008A590000A90000__0000005D2FFFFB38\n000000067F0000400200008A590000A8C000-000000067F0000400200008A590000A90000__00000073AD3FE6B8\n000000067F0000400200008A590000A8C000-000000067F0000400200008A590000A90000__000000914E3F38F0\n000000067F0000400200008A590000A8C000-000000067F0000400200008A590000A90000__000000931B33AE68\n000000067F0000400200008A590000A8C000-000000067F0000400200008A590000A90000__000000931B9AFDF8\n000000067F0000400200008A590000A8DC37-000000067F0000400200008A590000A9660D__00000012F7CBDEA1-00000013977BD5E1\n000000067F0000400200008A590000A90000-000000067F0000400200008A590000A94000__0000001C725A2400\n000000067F0000400200008A590000A90000-000000067F0000400200008A590000A94000__0000001C760FA190\n000000067F0000400200008A590000A90000-000000067F0000400200008A590000A94000__00000038E67ABFA0\n000000067F0000400200008A590000A90000-000000067F0000400200008A590000A94000__0000003903F1CFE8\n000000067F0000400200008A590000A90000-000000067F0000400200008A590000A94000__0000003B99F7F8A0\n000000067F0000400200008A590000A90000-000000067F0000400200008A590000A94000__0000005D2FFFFB38\n000000067F0000400200008A590000A90000-000000067F0000400200008A590000A94000__00000073AD3FE6B8\n000000067F0000400200008A590000A90000-000000067F0000400200008A590000A94000__000000914E3F38F0\n000000067F0000400200008A590000A90000-000000067F0000400200008A590000A94000__000000931B33AE68\n000000067F0000400200008A590000A90000-000000067F0000400200008A590000A94000__000000931B9AFDF8\n000000067F0000400200008A590000A94000-000000067F0000400200008A590000A98000__000000146DBFF3C0\n000000067F0000400200008A590000A94000-000000067F0000400200008A590000A98000__0000001C760FA190\n000000067F0000400200008A590000A94000-000000067F0000400200008A590000A98000__00000038E67ABFA0\n000000067F0000400200008A590000A94000-000000067F0000400200008A590000A98000__0000003903F1CFE8\n000000067F0000400200008A590000A94000-000000067F0000400200008A590000A98000__0000003B99F7F8A0\n000000067F0000400200008A590000A94000-000000067F0000400200008A590000A98000__0000005D2FFFFB38\n000000067F0000400200008A590000A94000-000000067F0000400200008A590000A98000__00000073AD3FE6B8\n000000067F0000400200008A590000A94000-000000067F0000400200008A590000A98000__000000914E3F38F0\n000000067F0000400200008A590000A94000-000000067F0000400200008A590000A98000__000000931B33AE68\n000000067F0000400200008A590000A94000-000000067F0000400200008A590000A98000__000000931B9AFDF8\n000000067F0000400200008A590000A9660D-000000067F0000400200008A590100000000__00000012F7CBDEA1-00000013977BD5E1\n000000067F0000400200008A590000A968EA-000000067F0000400200008A590000A9F2CA__00000013977BD5E1-000000144723F489\n000000067F0000400200008A590000A98000-000000067F0000400200008A590000A9C000__000000146DBFF3C0\n000000067F0000400200008A590000A98000-000000067F0000400200008A590000A9C000__0000001C760FA190\n000000067F0000400200008A590000A98000-000000067F0000400200008A590000A9C000__00000038E67ABFA0\n000000067F0000400200008A590000A98000-000000067F0000400200008A590000A9C000__0000003903F1CFE8\n000000067F0000400200008A590000A98000-000000067F0000400200008A590000A9C000__0000003B99F7F8A0\n000000067F0000400200008A590000A98000-000000067F0000400200008A590000A9C000__0000005D2FFFFB38\n000000067F0000400200008A590000A98000-000000067F0000400200008A590000A9C000__00000073AD3FE6B8\n000000067F0000400200008A590000A98000-000000067F0000400200008A590000A9C000__000000914E3F38F0\n000000067F0000400200008A590000A98000-000000067F0000400200008A590000A9C000__000000931B33AE68\n000000067F0000400200008A590000A98000-000000067F0000400200008A590000A9C000__000000931B9AFDF8\n000000067F0000400200008A590000A9C000-000000067F0000400200008A590000AA0000__000000146DBFF3C0\n000000067F0000400200008A590000A9C000-000000067F0000400200008A590000AA0000__0000001C760FA190\n000000067F0000400200008A590000A9C000-000000067F0000400200008A590000AA0000__00000038E67ABFA0\n000000067F0000400200008A590000A9C000-000000067F0000400200008A590000AA0000__0000003903F1CFE8\n000000067F0000400200008A590000A9C000-000000067F0000400200008A590000AA0000__0000003B99F7F8A0\n000000067F0000400200008A590000A9C000-000000067F0000400200008A590000AA0000__0000005D2FFFFB38\n000000067F0000400200008A590000A9C000-000000067F0000400200008A590000AA0000__00000073AD3FE6B8\n000000067F0000400200008A590000A9C000-000000067F0000400200008A590000AA0000__000000914E3F38F0\n000000067F0000400200008A590000A9C000-000000067F0000400200008A590000AA0000__000000931B33AE68\n000000067F0000400200008A590000A9C000-000000067F0000400200008A590000AA0000__000000931B9AFDF8\n000000067F0000400200008A590000A9F2CA-000000067F0000400200008A590000AA7CAE__00000013977BD5E1-000000144723F489\n000000067F0000400200008A590000AA0000-000000067F0000400200008A590000AA4000__000000146DBFF3C0\n000000067F0000400200008A590000AA0000-000000067F0000400200008A590000AA4000__0000001C760FA190\n000000067F0000400200008A590000AA0000-000000067F0000400200008A590000AA4000__00000038E67ABFA0\n000000067F0000400200008A590000AA0000-000000067F0000400200008A590000AA4000__0000003903F1CFE8\n000000067F0000400200008A590000AA0000-000000067F0000400200008A590000AA4000__0000003B99F7F8A0\n000000067F0000400200008A590000AA0000-000000067F0000400200008A590000AA4000__0000005D2FFFFB38\n000000067F0000400200008A590000AA0000-000000067F0000400200008A590000AA4000__00000073AD3FE6B8\n000000067F0000400200008A590000AA0000-000000067F0000400200008A590000AA4000__000000914E3F38F0\n000000067F0000400200008A590000AA0000-000000067F0000400200008A590000AA4000__000000931B33AE68\n000000067F0000400200008A590000AA0000-000000067F0000400200008A590000AA4000__000000931B9AFDF8\n000000067F0000400200008A590000AA4000-000000067F0000400200008A590000AA8000__000000146DBFF3C0\n000000067F0000400200008A590000AA4000-000000067F0000400200008A590000AA8000__0000001C760FA190\n000000067F0000400200008A590000AA4000-000000067F0000400200008A590000AA8000__00000038E67ABFA0\n000000067F0000400200008A590000AA4000-000000067F0000400200008A590000AA8000__0000003903F1CFE8\n000000067F0000400200008A590000AA4000-000000067F0000400200008A590000AA8000__0000003B99F7F8A0\n000000067F0000400200008A590000AA4000-000000067F0000400200008A590000AA8000__0000005D2FFFFB38\n000000067F0000400200008A590000AA4000-000000067F0000400200008A590000AA8000__00000073AD3FE6B8\n000000067F0000400200008A590000AA4000-000000067F0000400200008A590000AA8000__000000914E3F38F0\n000000067F0000400200008A590000AA4000-000000067F0000400200008A590000AA8000__000000931B33AE68\n000000067F0000400200008A590000AA4000-000000067F0000400200008A590000AA8000__000000931B9AFDF8\n000000067F0000400200008A590000AA7CAE-000000067F0000400200008A590000AB0693__00000013977BD5E1-000000144723F489\n000000067F0000400200008A590000AA8000-000000067F0000400200008A590000AAC000__000000146DBFF3C0\n000000067F0000400200008A590000AA8000-000000067F0000400200008A590000AAC000__0000001C760FA190\n000000067F0000400200008A590000AA8000-000000067F0000400200008A590000AAC000__00000038E67ABFA0\n000000067F0000400200008A590000AA8000-000000067F0000400200008A590000AAC000__0000003903F1CFE8\n000000067F0000400200008A590000AA8000-000000067F0000400200008A590000AAC000__0000003B99F7F8A0\n000000067F0000400200008A590000AA8000-000000067F0000400200008A590000AAC000__0000005D2FFFFB38\n000000067F0000400200008A590000AA8000-000000067F0000400200008A590000AAC000__00000073AD3FE6B8\n000000067F0000400200008A590000AA8000-000000067F0000400200008A590000AAC000__000000914E3F38F0\n000000067F0000400200008A590000AA8000-000000067F0000400200008A590000AAC000__000000931B33AE68\n000000067F0000400200008A590000AA8000-000000067F0000400200008A590000AAC000__000000931B9AFDF8\n000000067F0000400200008A590000AAC000-000000067F0000400200008A590000AB0000__000000146DBFF3C0\n000000067F0000400200008A590000AAC000-000000067F0000400200008A590000AB0000__0000001C760FA190\n000000067F0000400200008A590000AAC000-000000067F0000400200008A590000AB0000__00000038E67ABFA0\n000000067F0000400200008A590000AAC000-000000067F0000400200008A590000AB0000__0000003903F1CFE8\n000000067F0000400200008A590000AAC000-000000067F0000400200008A590000AB0000__0000003B99F7F8A0\n000000067F0000400200008A590000AAC000-000000067F0000400200008A590000AB0000__0000005D2FFFFB38\n000000067F0000400200008A590000AAC000-000000067F0000400200008A590000AB0000__00000073AD3FE6B8\n000000067F0000400200008A590000AAC000-000000067F0000400200008A590000AB0000__000000914E3F38F0\n000000067F0000400200008A590000AAC000-000000067F0000400200008A590000AB0000__000000931B33AE68\n000000067F0000400200008A590000AAC000-000000067F0000400200008A590000AB0000__000000931B9AFDF8\n000000067F0000400200008A590000AB0000-000000067F0000400200008A590000AB4000__000000146DBFF3C0\n000000067F0000400200008A590000AB0000-000000067F0000400200008A590000AB4000__0000001C760FA190\n000000067F0000400200008A590000AB0000-000000067F0000400200008A590000AB4000__00000038E67ABFA0\n000000067F0000400200008A590000AB0000-000000067F0000400200008A590000AB4000__0000003903F1CFE8\n000000067F0000400200008A590000AB0000-000000067F0000400200008A590000AB4000__0000003B99F7F8A0\n000000067F0000400200008A590000AB0000-000000067F0000400200008A590000AB4000__0000005D2FFFFB38\n000000067F0000400200008A590000AB0000-000000067F0000400200008A590000AB4000__00000073AD3FE6B8\n000000067F0000400200008A590000AB0000-000000067F0000400200008A590000AB4000__000000914E3F38F0\n000000067F0000400200008A590000AB0000-000000067F0000400200008A590000AB4000__000000931B33AE68\n000000067F0000400200008A590000AB0000-000000067F0000400200008A590000AB4000__000000931B9AFDF8\n000000067F0000400200008A590000AB0693-000000067F0000400200008A590000AB9074__00000013977BD5E1-000000144723F489\n000000067F0000400200008A590000AB4000-000000067F0000400200008A590000AB8000__000000146DBFF3C0\n000000067F0000400200008A590000AB4000-000000067F0000400200008A590000AB8000__0000001C760FA190\n000000067F0000400200008A590000AB4000-000000067F0000400200008A590000AB8000__00000038E67ABFA0\n000000067F0000400200008A590000AB4000-000000067F0000400200008A590000AB8000__0000003903F1CFE8\n000000067F0000400200008A590000AB4000-000000067F0000400200008A590000AB8000__0000003B99F7F8A0\n000000067F0000400200008A590000AB4000-000000067F0000400200008A590000AB8000__0000005D2FFFFB38\n000000067F0000400200008A590000AB4000-000000067F0000400200008A590000AB8000__00000073AD3FE6B8\n000000067F0000400200008A590000AB4000-000000067F0000400200008A590000AB8000__000000914E3F38F0\n000000067F0000400200008A590000AB4000-000000067F0000400200008A590000AB8000__000000931B33AE68\n000000067F0000400200008A590000AB4000-000000067F0000400200008A590000AB8000__000000931B9AFDF8\n000000067F0000400200008A590000AB8000-000000067F0000400200008A590000ABC000__000000146DBFF3C0\n000000067F0000400200008A590000AB8000-000000067F0000400200008A590000ABC000__0000001C760FA190\n000000067F0000400200008A590000AB8000-000000067F0000400200008A590000ABC000__00000038E67ABFA0\n000000067F0000400200008A590000AB8000-000000067F0000400200008A590000ABC000__0000003903F1CFE8\n000000067F0000400200008A590000AB8000-000000067F0000400200008A590000ABC000__0000003B99F7F8A0\n000000067F0000400200008A590000AB8000-000000067F0000400200008A590000ABC000__0000005D2FFFFB38\n000000067F0000400200008A590000AB8000-000000067F0000400200008A590000ABC000__00000073AD3FE6B8\n000000067F0000400200008A590000AB8000-000000067F0000400200008A590000ABC000__000000914E3F38F0\n000000067F0000400200008A590000AB8000-000000067F0000400200008A590000ABC000__000000931B33AE68\n000000067F0000400200008A590000AB8000-000000067F0000400200008A590000ABC000__000000931B9AFDF8\n000000067F0000400200008A590000AB9074-000000067F0000400200008A590000AC1A4D__00000013977BD5E1-000000144723F489\n000000067F0000400200008A590000ABC000-000000067F0000400200008A590000AC0000__000000146DBFF3C0\n000000067F0000400200008A590000ABC000-000000067F0000400200008A590000AC0000__0000001C760FA190\n000000067F0000400200008A590000ABC000-000000067F0000400200008A590000AC0000__00000038E67ABFA0\n000000067F0000400200008A590000ABC000-000000067F0000400200008A590000AC0000__0000003903F1CFE8\n000000067F0000400200008A590000ABC000-000000067F0000400200008A590000AC0000__0000003B99F7F8A0\n000000067F0000400200008A590000ABC000-000000067F0000400200008A590000AC0000__0000005D2FFFFB38\n000000067F0000400200008A590000ABC000-000000067F0000400200008A590000AC0000__00000073AD3FE6B8\n000000067F0000400200008A590000ABC000-000000067F0000400200008A590000AC0000__000000914E3F38F0\n000000067F0000400200008A590000ABC000-000000067F0000400200008A590000AC0000__000000931B33AE68\n000000067F0000400200008A590000ABC000-000000067F0000400200008A590000AC0000__000000931B9AFDF8\n000000067F0000400200008A590000AC0000-000000067F0000400200008A590000AC4000__000000146DBFF3C0\n000000067F0000400200008A590000AC0000-000000067F0000400200008A590000AC4000__0000001C760FA190\n000000067F0000400200008A590000AC0000-000000067F0000400200008A590000AC4000__00000038E67ABFA0\n000000067F0000400200008A590000AC0000-000000067F0000400200008A590000AC4000__0000003903F1CFE8\n000000067F0000400200008A590000AC0000-000000067F0000400200008A590000AC4000__0000003B99F7F8A0\n000000067F0000400200008A590000AC0000-000000067F0000400200008A590000AC4000__0000005D2FFFFB38\n000000067F0000400200008A590000AC0000-000000067F0000400200008A590000AC4000__00000073AD3FE6B8\n000000067F0000400200008A590000AC0000-000000067F0000400200008A590000AC4000__000000914E3F38F0\n000000067F0000400200008A590000AC0000-000000067F0000400200008A590000AC4000__000000931B33AE68\n000000067F0000400200008A590000AC0000-000000067F0000400200008A590000AC4000__000000931B9AFDF8\n000000067F0000400200008A590000AC1A4D-000000067F0000400200008A590000ACA420__00000013977BD5E1-000000144723F489\n000000067F0000400200008A590000AC4000-000000067F0000400200008A590000AC8000__000000146DBFF3C0\n000000067F0000400200008A590000AC4000-000000067F0000400200008A590000AC8000__0000001C760FA190\n000000067F0000400200008A590000AC4000-000000067F0000400200008A590000AC8000__00000038E67ABFA0\n000000067F0000400200008A590000AC4000-000000067F0000400200008A590000AC8000__0000003903F1CFE8\n000000067F0000400200008A590000AC4000-000000067F0000400200008A590000AC8000__0000003B99F7F8A0\n000000067F0000400200008A590000AC4000-000000067F0000400200008A590000AC8000__0000005D2FFFFB38\n000000067F0000400200008A590000AC4000-000000067F0000400200008A590000AC8000__00000073AD3FE6B8\n000000067F0000400200008A590000AC4000-000000067F0000400200008A590000AC8000__000000914E3F38F0\n000000067F0000400200008A590000AC4000-000000067F0000400200008A590000AC8000__000000931B33AE68\n000000067F0000400200008A590000AC4000-000000067F0000400200008A590000AC8000__000000931B9AFDF8\n000000067F0000400200008A590000AC8000-000000067F0000400200008A590000ACC000__000000146DBFF3C0\n000000067F0000400200008A590000AC8000-000000067F0000400200008A590000ACC000__0000001C760FA190\n000000067F0000400200008A590000AC8000-000000067F0000400200008A590000ACC000__00000038E67ABFA0\n000000067F0000400200008A590000AC8000-000000067F0000400200008A590000ACC000__0000003903F1CFE8\n000000067F0000400200008A590000AC8000-000000067F0000400200008A590000ACC000__0000003B99F7F8A0\n000000067F0000400200008A590000AC8000-000000067F0000400200008A590000ACC000__0000005D2FFFFB38\n000000067F0000400200008A590000AC8000-000000067F0000400200008A590000ACC000__00000073AD3FE6B8\n000000067F0000400200008A590000AC8000-000000067F0000400200008A590000ACC000__000000914E3F38F0\n000000067F0000400200008A590000AC8000-000000067F0000400200008A590000ACC000__000000931B33AE68\n000000067F0000400200008A590000AC8000-000000067F0000400200008A590000ACC000__000000931B9AFDF8\n000000067F0000400200008A590000ACA420-000000067F0000400200008A590000AD2DFB__00000013977BD5E1-000000144723F489\n000000067F0000400200008A590000ACC000-000000067F0000400200008A590000AD0000__000000146DBFF3C0\n000000067F0000400200008A590000ACC000-000000067F0000400200008A590000AD0000__0000001C760FA190\n000000067F0000400200008A590000ACC000-000000067F0000400200008A590000AD0000__00000038E67ABFA0\n000000067F0000400200008A590000ACC000-000000067F0000400200008A590000AD0000__0000003903F1CFE8\n000000067F0000400200008A590000ACC000-000000067F0000400200008A590000AD0000__0000003B99F7F8A0\n000000067F0000400200008A590000ACC000-000000067F0000400200008A590000AD0000__0000005D2FFFFB38\n000000067F0000400200008A590000ACC000-000000067F0000400200008A590000AD0000__00000073AD3FE6B8\n000000067F0000400200008A590000ACC000-000000067F0000400200008A590000AD0000__000000914E3F38F0\n000000067F0000400200008A590000ACC000-000000067F0000400200008A590000AD0000__000000931B33AE68\n000000067F0000400200008A590000ACC000-000000067F0000400200008A590000AD0000__000000931B9AFDF8\n000000067F0000400200008A590000AD0000-000000067F0000400200008A590000AD4000__000000146DBFF3C0\n000000067F0000400200008A590000AD0000-000000067F0000400200008A590000AD4000__0000001C760FA190\n000000067F0000400200008A590000AD0000-000000067F0000400200008A590000AD4000__00000038E67ABFA0\n000000067F0000400200008A590000AD0000-000000067F0000400200008A590000AD4000__0000003903F1CFE8\n000000067F0000400200008A590000AD0000-000000067F0000400200008A590000AD4000__0000003B99F7F8A0\n000000067F0000400200008A590000AD0000-000000067F0000400200008A590000AD4000__0000005D2FFFFB38\n000000067F0000400200008A590000AD0000-000000067F0000400200008A590000AD4000__00000073AD3FE6B8\n000000067F0000400200008A590000AD0000-000000067F0000400200008A590000AD4000__000000914E3F38F0\n000000067F0000400200008A590000AD0000-000000067F0000400200008A590000AD4000__000000931B33AE68\n000000067F0000400200008A590000AD0000-000000067F0000400200008A590000AD4000__000000931B9AFDF8\n000000067F0000400200008A590000AD2DFB-000000067F0000400200008A590000ADB7D7__00000013977BD5E1-000000144723F489\n000000067F0000400200008A590000AD4000-000000067F0000400200008A590000AD8000__000000146DBFF3C0\n000000067F0000400200008A590000AD4000-000000067F0000400200008A590000AD8000__0000001C760FA190\n000000067F0000400200008A590000AD4000-000000067F0000400200008A590000AD8000__00000038E67ABFA0\n000000067F0000400200008A590000AD4000-000000067F0000400200008A590000AD8000__0000003903F1CFE8\n000000067F0000400200008A590000AD4000-000000067F0000400200008A590000AD8000__0000003B99F7F8A0\n000000067F0000400200008A590000AD4000-000000067F0000400200008A590000AD8000__0000005D2FFFFB38\n000000067F0000400200008A590000AD4000-000000067F0000400200008A590000AD8000__00000073AD3FE6B8\n000000067F0000400200008A590000AD4000-000000067F0000400200008A590000AD8000__000000914E3F38F0\n000000067F0000400200008A590000AD4000-000000067F0000400200008A590000AD8000__000000931B33AE68\n000000067F0000400200008A590000AD4000-000000067F0000400200008A590000AD8000__000000931B9AFDF8\n000000067F0000400200008A590000AD8000-000000067F0000400200008A590000ADC000__000000146DBFF3C0\n000000067F0000400200008A590000AD8000-000000067F0000400200008A590000ADC000__0000001C760FA190\n000000067F0000400200008A590000AD8000-000000067F0000400200008A590000ADC000__00000038E67ABFA0\n000000067F0000400200008A590000AD8000-000000067F0000400200008A590000ADC000__0000003903F1CFE8\n000000067F0000400200008A590000AD8000-000000067F0000400200008A590000ADC000__0000003B99F7F8A0\n000000067F0000400200008A590000AD8000-000000067F0000400200008A590000ADC000__0000005D2FFFFB38\n000000067F0000400200008A590000AD8000-000000067F0000400200008A590000ADC000__00000073AD3FE6B8\n000000067F0000400200008A590000AD8000-000000067F0000400200008A590000ADC000__000000914E3F38F0\n000000067F0000400200008A590000AD8000-000000067F0000400200008A590000ADC000__000000931B33AE68\n000000067F0000400200008A590000AD8000-000000067F0000400200008A590000ADC000__000000931B9AFDF8\n000000067F0000400200008A590000ADB7D7-000000067F0000400200008A590000AE41BC__00000013977BD5E1-000000144723F489\n000000067F0000400200008A590000ADC000-000000067F0000400200008A590000AE0000__000000146DBFF3C0\n000000067F0000400200008A590000ADC000-000000067F0000400200008A590000AE0000__0000001C760FA190\n000000067F0000400200008A590000ADC000-000000067F0000400200008A590000AE0000__00000038E67ABFA0\n000000067F0000400200008A590000ADC000-000000067F0000400200008A590000AE0000__0000003903F1CFE8\n000000067F0000400200008A590000ADC000-000000067F0000400200008A590000AE0000__0000003B99F7F8A0\n000000067F0000400200008A590000ADC000-000000067F0000400200008A590000AE0000__0000005D2FFFFB38\n000000067F0000400200008A590000ADC000-000000067F0000400200008A590000AE0000__00000073AD3FE6B8\n000000067F0000400200008A590000ADC000-000000067F0000400200008A590000AE0000__000000914E3F38F0\n000000067F0000400200008A590000ADC000-000000067F0000400200008A590000AE0000__000000931B33AE68\n000000067F0000400200008A590000ADC000-000000067F0000400200008A590000AE0000__000000931B9AFDF8\n000000067F0000400200008A590000AE0000-000000067F0000400200008A590000AE4000__000000146DBFF3C0\n000000067F0000400200008A590000AE0000-000000067F0000400200008A590000AE4000__0000001C760FA190\n000000067F0000400200008A590000AE0000-000000067F0000400200008A590000AE4000__00000038E67ABFA0\n000000067F0000400200008A590000AE0000-000000067F0000400200008A590000AE4000__0000003903F1CFE8\n000000067F0000400200008A590000AE0000-000000067F0000400200008A590000AE4000__0000003B99F7F8A0\n000000067F0000400200008A590000AE0000-000000067F0000400200008A590000AE4000__0000005D2FFFFB38\n000000067F0000400200008A590000AE0000-000000067F0000400200008A590000AE4000__00000073AD3FE6B8\n000000067F0000400200008A590000AE0000-000000067F0000400200008A590000AE4000__000000914E3F38F0\n000000067F0000400200008A590000AE0000-000000067F0000400200008A590000AE4000__000000931B33AE68\n000000067F0000400200008A590000AE0000-000000067F0000400200008A590000AE4000__000000931B9AFDF8\n000000067F0000400200008A590000AE4000-000000067F0000400200008A590000AE8000__000000146DBFF3C0\n000000067F0000400200008A590000AE4000-000000067F0000400200008A590000AE8000__0000001C760FA190\n000000067F0000400200008A590000AE4000-000000067F0000400200008A590000AE8000__00000038E67ABFA0\n000000067F0000400200008A590000AE4000-000000067F0000400200008A590000AE8000__0000003903F1CFE8\n000000067F0000400200008A590000AE4000-000000067F0000400200008A590000AE8000__0000003B99F7F8A0\n000000067F0000400200008A590000AE4000-000000067F0000400200008A590000AE8000__0000005D2FFFFB38\n000000067F0000400200008A590000AE4000-000000067F0000400200008A590000AE8000__00000073AD3FE6B8\n000000067F0000400200008A590000AE4000-000000067F0000400200008A590000AE8000__000000914E3F38F0\n000000067F0000400200008A590000AE4000-000000067F0000400200008A590000AE8000__000000931B33AE68\n000000067F0000400200008A590000AE4000-000000067F0000400200008A590000AE8000__000000931B9AFDF8\n000000067F0000400200008A590000AE41BC-000000067F0000400200008A590000AECBAC__00000013977BD5E1-000000144723F489\n000000067F0000400200008A590000AE8000-000000067F0000400200008A590000AEC000__000000146DBFF3C0\n000000067F0000400200008A590000AE8000-000000067F0000400200008A590000AEC000__0000001C760FA190\n000000067F0000400200008A590000AE8000-000000067F0000400200008A590000AEC000__00000038E67ABFA0\n000000067F0000400200008A590000AE8000-000000067F0000400200008A590000AEC000__0000003903F1CFE8\n000000067F0000400200008A590000AE8000-000000067F0000400200008A590000AEC000__0000003B99F7F8A0\n000000067F0000400200008A590000AE8000-000000067F0000400200008A590000AEC000__0000005D2FFFFB38\n000000067F0000400200008A590000AE8000-000000067F0000400200008A590000AEC000__00000073AD3FE6B8\n000000067F0000400200008A590000AE8000-000000067F0000400200008A590000AEC000__000000914E3F38F0\n000000067F0000400200008A590000AE8000-000000067F0000400200008A590000AEC000__000000931B33AE68\n000000067F0000400200008A590000AE8000-000000067F0000400200008A590000AEC000__000000931B9AFDF8\n000000067F0000400200008A590000AEC000-000000067F0000400200008A590000AF0000__000000146DBFF3C0\n000000067F0000400200008A590000AEC000-000000067F0000400200008A590000AF0000__0000001C760FA190\n000000067F0000400200008A590000AEC000-000000067F0000400200008A590000AF0000__00000038E67ABFA0\n000000067F0000400200008A590000AEC000-000000067F0000400200008A590000AF0000__0000003903F1CFE8\n000000067F0000400200008A590000AEC000-000000067F0000400200008A590000AF0000__0000003B99F7F8A0\n000000067F0000400200008A590000AEC000-000000067F0000400200008A590000AF0000__0000005D2FFFFB38\n000000067F0000400200008A590000AEC000-000000067F0000400200008A590000AF0000__00000073AD3FE6B8\n000000067F0000400200008A590000AEC000-000000067F0000400200008A590000AF0000__000000914E3F38F0\n000000067F0000400200008A590000AEC000-000000067F0000400200008A590000AF0000__000000931B33AE68\n000000067F0000400200008A590000AEC000-000000067F0000400200008A590000AF0000__000000931B9AFDF8\n000000067F0000400200008A590000AECBAC-000000067F0000400200008A590000AF558D__00000013977BD5E1-000000144723F489\n000000067F0000400200008A590000AF0000-000000067F0000400200008A590000AF4000__000000146DBFF3C0\n000000067F0000400200008A590000AF0000-000000067F0000400200008A590000AF4000__0000001C760FA190\n000000067F0000400200008A590000AF0000-000000067F0000400200008A590000AF4000__00000038E67ABFA0\n000000067F0000400200008A590000AF0000-000000067F0000400200008A590000AF4000__0000003903F1CFE8\n000000067F0000400200008A590000AF0000-000000067F0000400200008A590000AF4000__0000003B99F7F8A0\n000000067F0000400200008A590000AF0000-000000067F0000400200008A590000AF4000__0000005D2FFFFB38\n000000067F0000400200008A590000AF0000-000000067F0000400200008A590000AF4000__00000073AD3FE6B8\n000000067F0000400200008A590000AF0000-000000067F0000400200008A590000AF4000__000000914E3F38F0\n000000067F0000400200008A590000AF0000-000000067F0000400200008A590000AF4000__000000931B33AE68\n000000067F0000400200008A590000AF0000-000000067F0000400200008A590000AF4000__000000931B9AFDF8\n000000067F0000400200008A590000AF4000-000000067F0000400200008A590000AF8000__000000146DBFF3C0\n000000067F0000400200008A590000AF4000-000000067F0000400200008A590000AF8000__0000001C760FA190\n000000067F0000400200008A590000AF4000-000000067F0000400200008A590000AF8000__00000038E67ABFA0\n000000067F0000400200008A590000AF4000-000000067F0000400200008A590000AF8000__0000003903F1CFE8\n000000067F0000400200008A590000AF4000-000000067F0000400200008A590000AF8000__0000003B99F7F8A0\n000000067F0000400200008A590000AF4000-000000067F0000400200008A590000AF8000__0000005D2FFFFB38\n000000067F0000400200008A590000AF4000-000000067F0000400200008A590000AF8000__00000073AD3FE6B8\n000000067F0000400200008A590000AF4000-000000067F0000400200008A590000AF8000__000000914E3F38F0\n000000067F0000400200008A590000AF4000-000000067F0000400200008A590000AF8000__000000931B33AE68\n000000067F0000400200008A590000AF4000-000000067F0000400200008A590000AF8000__000000931B9AFDF8\n000000067F0000400200008A590000AF558D-000000067F0000400200008A590100000000__00000013977BD5E1-000000144723F489\n000000067F0000400200008A590000AF5892-000000067F0000400200008A590000AFE262__000000144723F489-00000014E6D3F501\n000000067F0000400200008A590000AF8000-000000067F0000400200008A590000AFC000__000000146DBFF3C0\n000000067F0000400200008A590000AF8000-000000067F0000400200008A590000AFC000__0000001C760FA190\n000000067F0000400200008A590000AF8000-000000067F0000400200008A590000AFC000__00000038E67ABFA0\n000000067F0000400200008A590000AF8000-000000067F0000400200008A590000AFC000__0000003903F1CFE8\n000000067F0000400200008A590000AF8000-000000067F0000400200008A590000AFC000__0000003B99F7F8A0\n000000067F0000400200008A590000AF8000-000000067F0000400200008A590000AFC000__0000005D2FFFFB38\n000000067F0000400200008A590000AF8000-000000067F0000400200008A590000AFC000__00000073AD3FE6B8\n000000067F0000400200008A590000AF8000-000000067F0000400200008A590000AFC000__000000914E3F38F0\n000000067F0000400200008A590000AF8000-000000067F0000400200008A590000AFC000__000000931B33AE68\n000000067F0000400200008A590000AF8000-000000067F0000400200008A590000AFC000__000000931B9AFDF8\n000000067F0000400200008A590000AFC000-000000067F0000400200008A590000B00000__000000146DBFF3C0\n000000067F0000400200008A590000AFC000-000000067F0000400200008A590000B00000__0000001C760FA190\n000000067F0000400200008A590000AFC000-000000067F0000400200008A590000B00000__00000038E67ABFA0\n000000067F0000400200008A590000AFC000-000000067F0000400200008A590000B00000__0000003903F1CFE8\n000000067F0000400200008A590000AFC000-000000067F0000400200008A590000B00000__0000003B99F7F8A0\n000000067F0000400200008A590000AFC000-000000067F0000400200008A590000B00000__0000005D2FFFFB38\n000000067F0000400200008A590000AFC000-000000067F0000400200008A590000B00000__00000073AD3FE6B8\n000000067F0000400200008A590000AFC000-000000067F0000400200008A590000B00000__000000914E3F38F0\n000000067F0000400200008A590000AFC000-000000067F0000400200008A590000B00000__000000931B33AE68\n000000067F0000400200008A590000AFC000-000000067F0000400200008A590000B00000__000000931B9AFDF8\n000000067F0000400200008A590000AFE262-000000067F0000400200008A590000B06C3B__000000144723F489-00000014E6D3F501\n000000067F0000400200008A590000B00000-000000067F0000400200008A590000B04000__000000146DBFF3C0\n000000067F0000400200008A590000B00000-000000067F0000400200008A590000B04000__0000001C760FA190\n000000067F0000400200008A590000B00000-000000067F0000400200008A590000B04000__00000038E67ABFA0\n000000067F0000400200008A590000B00000-000000067F0000400200008A590000B04000__0000003903F1CFE8\n000000067F0000400200008A590000B00000-000000067F0000400200008A590000B04000__0000003B99F7F8A0\n000000067F0000400200008A590000B00000-000000067F0000400200008A590000B04000__0000005D2FFFFB38\n000000067F0000400200008A590000B00000-000000067F0000400200008A590000B04000__00000073AD3FE6B8\n000000067F0000400200008A590000B00000-000000067F0000400200008A590000B04000__000000914E3F38F0\n000000067F0000400200008A590000B00000-000000067F0000400200008A590000B04000__000000931B33AE68\n000000067F0000400200008A590000B00000-000000067F0000400200008A590000B04000__000000931B9AFDF8\n000000067F0000400200008A590000B04000-000000067F0000400200008A590000B08000__000000146DBFF3C0\n000000067F0000400200008A590000B04000-000000067F0000400200008A590000B08000__0000001C760FA190\n000000067F0000400200008A590000B04000-000000067F0000400200008A590000B08000__00000038E67ABFA0\n000000067F0000400200008A590000B04000-000000067F0000400200008A590000B08000__0000003903F1CFE8\n000000067F0000400200008A590000B04000-000000067F0000400200008A590000B08000__0000003B99F7F8A0\n000000067F0000400200008A590000B04000-000000067F0000400200008A590000B08000__0000005D2FFFFB38\n000000067F0000400200008A590000B04000-000000067F0000400200008A590000B08000__00000073AD3FE6B8\n000000067F0000400200008A590000B04000-000000067F0000400200008A590000B08000__000000914E3F38F0\n000000067F0000400200008A590000B04000-000000067F0000400200008A590000B08000__000000931B33AE68\n000000067F0000400200008A590000B04000-000000067F0000400200008A590000B08000__000000931B9AFDF8\n000000067F0000400200008A590000B06C3B-000000067F0000400200008A590000B0F60D__000000144723F489-00000014E6D3F501\n000000067F0000400200008A590000B08000-000000067F0000400200008A590000B0C000__0000001C760FA190\n000000067F0000400200008A590000B08000-000000067F0000400200008A590000B0C000__00000038E67ABFA0\n000000067F0000400200008A590000B08000-000000067F0000400200008A590000B0C000__0000003903F1CFE8\n000000067F0000400200008A590000B08000-000000067F0000400200008A590000B0C000__0000003B99F7F8A0\n000000067F0000400200008A590000B08000-000000067F0000400200008A590000B0C000__0000005D2FFFFB38\n000000067F0000400200008A590000B08000-000000067F0000400200008A590000B0C000__00000073AD3FE6B8\n000000067F0000400200008A590000B08000-000000067F0000400200008A590000B0C000__000000914E3F38F0\n000000067F0000400200008A590000B08000-000000067F0000400200008A590000B0C000__000000931B33AE68\n000000067F0000400200008A590000B08000-000000067F0000400200008A590000B0C000__000000931B9AFDF8\n000000067F0000400200008A590000B08000-030000000000000000000000000000000002__000000146DBFF3C0\n000000067F0000400200008A590000B0C000-000000067F0000400200008A590000B10000__0000001C760FA190\n000000067F0000400200008A590000B0C000-000000067F0000400200008A590000B10000__00000038E67ABFA0\n000000067F0000400200008A590000B0C000-000000067F0000400200008A590000B10000__0000003903F1CFE8\n000000067F0000400200008A590000B0C000-000000067F0000400200008A590000B10000__0000003B99F7F8A0\n000000067F0000400200008A590000B0C000-000000067F0000400200008A590000B10000__0000005D2FFFFB38\n000000067F0000400200008A590000B0C000-000000067F0000400200008A590000B10000__00000073AD3FE6B8\n000000067F0000400200008A590000B0C000-000000067F0000400200008A590000B10000__000000914E3F38F0\n000000067F0000400200008A590000B0C000-000000067F0000400200008A590000B10000__000000931B33AE68\n000000067F0000400200008A590000B0C000-000000067F0000400200008A590000B10000__000000931B9AFDF8\n000000067F0000400200008A590000B0F60D-000000067F0000400200008A590000B17FE6__000000144723F489-00000014E6D3F501\n000000067F0000400200008A590000B10000-000000067F0000400200008A590000B14000__0000001C760FA190\n000000067F0000400200008A590000B10000-000000067F0000400200008A590000B14000__00000038E67ABFA0\n000000067F0000400200008A590000B10000-000000067F0000400200008A590000B14000__0000003903F1CFE8\n000000067F0000400200008A590000B10000-000000067F0000400200008A590000B14000__0000003B99F7F8A0\n000000067F0000400200008A590000B10000-000000067F0000400200008A590000B14000__0000005D2FFFFB38\n000000067F0000400200008A590000B10000-000000067F0000400200008A590000B14000__00000073AD3FE6B8\n000000067F0000400200008A590000B10000-000000067F0000400200008A590000B14000__000000914E3F38F0\n000000067F0000400200008A590000B10000-000000067F0000400200008A590000B14000__000000931B33AE68\n000000067F0000400200008A590000B10000-000000067F0000400200008A590000B14000__000000931B9AFDF8\n000000067F0000400200008A590000B14000-000000067F0000400200008A590000B18000__0000001C760FA190\n000000067F0000400200008A590000B14000-000000067F0000400200008A590000B18000__00000038E67ABFA0\n000000067F0000400200008A590000B14000-000000067F0000400200008A590000B18000__0000003903F1CFE8\n000000067F0000400200008A590000B14000-000000067F0000400200008A590000B18000__0000003B99F7F8A0\n000000067F0000400200008A590000B14000-000000067F0000400200008A590000B18000__0000005D2FFFFB38\n000000067F0000400200008A590000B14000-000000067F0000400200008A590000B18000__00000073AD3FE6B8\n000000067F0000400200008A590000B14000-000000067F0000400200008A590000B18000__000000914E3F38F0\n000000067F0000400200008A590000B14000-000000067F0000400200008A590000B18000__000000931B33AE68\n000000067F0000400200008A590000B14000-000000067F0000400200008A590000B18000__000000931B9AFDF8\n000000067F0000400200008A590000B17FE6-000000067F0000400200008A590000B209C7__000000144723F489-00000014E6D3F501\n000000067F0000400200008A590000B18000-000000067F0000400200008A590000B1C000__0000001C760FA190\n000000067F0000400200008A590000B18000-000000067F0000400200008A590000B1C000__00000038E67ABFA0\n000000067F0000400200008A590000B18000-000000067F0000400200008A590000B1C000__0000003903F1CFE8\n000000067F0000400200008A590000B18000-000000067F0000400200008A590000B1C000__0000003B99F7F8A0\n000000067F0000400200008A590000B18000-000000067F0000400200008A590000B1C000__0000005D2FFFFB38\n000000067F0000400200008A590000B18000-000000067F0000400200008A590000B1C000__00000073AD3FE6B8\n000000067F0000400200008A590000B18000-000000067F0000400200008A590000B1C000__000000914E3F38F0\n000000067F0000400200008A590000B18000-000000067F0000400200008A590000B1C000__000000931B33AE68\n000000067F0000400200008A590000B18000-000000067F0000400200008A590000B1C000__000000931B9AFDF8\n000000067F0000400200008A590000B1C000-000000067F0000400200008A590000B20000__0000001C760FA190\n000000067F0000400200008A590000B1C000-000000067F0000400200008A590000B20000__00000038E67ABFA0\n000000067F0000400200008A590000B1C000-000000067F0000400200008A590000B20000__0000003903F1CFE8\n000000067F0000400200008A590000B1C000-000000067F0000400200008A590000B20000__0000003B99F7F8A0\n000000067F0000400200008A590000B1C000-000000067F0000400200008A590000B20000__0000005D2FFFFB38\n000000067F0000400200008A590000B1C000-000000067F0000400200008A590000B20000__00000073AD3FE6B8\n000000067F0000400200008A590000B1C000-000000067F0000400200008A590000B20000__000000914E3F38F0\n000000067F0000400200008A590000B1C000-000000067F0000400200008A590000B20000__000000931B33AE68\n000000067F0000400200008A590000B1C000-000000067F0000400200008A590000B20000__000000931B9AFDF8\n000000067F0000400200008A590000B20000-000000067F0000400200008A590000B24000__0000001C760FA190\n000000067F0000400200008A590000B20000-000000067F0000400200008A590000B24000__00000038E67ABFA0\n000000067F0000400200008A590000B20000-000000067F0000400200008A590000B24000__0000003903F1CFE8\n000000067F0000400200008A590000B20000-000000067F0000400200008A590000B24000__0000003B99F7F8A0\n000000067F0000400200008A590000B20000-000000067F0000400200008A590000B24000__0000005D2FFFFB38\n000000067F0000400200008A590000B20000-000000067F0000400200008A590000B24000__00000073AD3FE6B8\n000000067F0000400200008A590000B20000-000000067F0000400200008A590000B24000__000000914E3F38F0\n000000067F0000400200008A590000B20000-000000067F0000400200008A590000B24000__000000931B33AE68\n000000067F0000400200008A590000B20000-000000067F0000400200008A590000B24000__000000931B9AFDF8\n000000067F0000400200008A590000B209C7-000000067F0000400200008A590000B293BF__000000144723F489-00000014E6D3F501\n000000067F0000400200008A590000B24000-000000067F0000400200008A590000B28000__0000001C760FA190\n000000067F0000400200008A590000B24000-000000067F0000400200008A590000B28000__00000038E67ABFA0\n000000067F0000400200008A590000B24000-000000067F0000400200008A590000B28000__0000003903F1CFE8\n000000067F0000400200008A590000B24000-000000067F0000400200008A590000B28000__0000003B99F7F8A0\n000000067F0000400200008A590000B24000-000000067F0000400200008A590000B28000__0000005D2FFFFB38\n000000067F0000400200008A590000B24000-000000067F0000400200008A590000B28000__00000073AD3FE6B8\n000000067F0000400200008A590000B24000-000000067F0000400200008A590000B28000__000000914E3F38F0\n000000067F0000400200008A590000B24000-000000067F0000400200008A590000B28000__000000931B33AE68\n000000067F0000400200008A590000B24000-000000067F0000400200008A590000B28000__000000931B9AFDF8\n000000067F0000400200008A590000B28000-000000067F0000400200008A590000B2C000__0000001C760FA190\n000000067F0000400200008A590000B28000-000000067F0000400200008A590000B2C000__00000038E67ABFA0\n000000067F0000400200008A590000B28000-000000067F0000400200008A590000B2C000__0000003903F1CFE8\n000000067F0000400200008A590000B28000-000000067F0000400200008A590000B2C000__0000003B99F7F8A0\n000000067F0000400200008A590000B28000-000000067F0000400200008A590000B2C000__0000005D2FFFFB38\n000000067F0000400200008A590000B28000-000000067F0000400200008A590000B2C000__00000073AD3FE6B8\n000000067F0000400200008A590000B28000-000000067F0000400200008A590000B2C000__000000914E3F38F0\n000000067F0000400200008A590000B28000-000000067F0000400200008A590000B2C000__000000931B33AE68\n000000067F0000400200008A590000B28000-000000067F0000400200008A590000B2C000__000000931B9AFDF8\n000000067F0000400200008A590000B293BF-000000067F0000400200008A590000B31D9F__000000144723F489-00000014E6D3F501\n000000067F0000400200008A590000B2C000-000000067F0000400200008A590000B30000__0000001C760FA190\n000000067F0000400200008A590000B2C000-000000067F0000400200008A590000B30000__00000038E67ABFA0\n000000067F0000400200008A590000B2C000-000000067F0000400200008A590000B30000__0000003903F1CFE8\n000000067F0000400200008A590000B2C000-000000067F0000400200008A590000B30000__0000003B99F7F8A0\n000000067F0000400200008A590000B2C000-000000067F0000400200008A590000B30000__0000005D2FFFFB38\n000000067F0000400200008A590000B2C000-000000067F0000400200008A590000B30000__00000073AD3FE6B8\n000000067F0000400200008A590000B2C000-000000067F0000400200008A590000B30000__000000914E3F38F0\n000000067F0000400200008A590000B2C000-000000067F0000400200008A590000B30000__000000931B33AE68\n000000067F0000400200008A590000B2C000-000000067F0000400200008A590000B30000__000000931B9AFDF8\n000000067F0000400200008A590000B30000-000000067F0000400200008A590000B34000__0000001C760FA190\n000000067F0000400200008A590000B30000-000000067F0000400200008A590000B34000__00000038E67ABFA0\n000000067F0000400200008A590000B30000-000000067F0000400200008A590000B34000__0000003903F1CFE8\n000000067F0000400200008A590000B30000-000000067F0000400200008A590000B34000__0000003B99F7F8A0\n000000067F0000400200008A590000B30000-000000067F0000400200008A590000B34000__0000005D2FFFFB38\n000000067F0000400200008A590000B30000-000000067F0000400200008A590000B34000__00000073AD3FE6B8\n000000067F0000400200008A590000B30000-000000067F0000400200008A590000B34000__000000914E3F38F0\n000000067F0000400200008A590000B30000-000000067F0000400200008A590000B34000__000000931B33AE68\n000000067F0000400200008A590000B30000-000000067F0000400200008A590000B34000__000000931B9AFDF8\n000000067F0000400200008A590000B31D9F-000000067F0000400200008A590000B3A77A__000000144723F489-00000014E6D3F501\n000000067F0000400200008A590000B34000-000000067F0000400200008A590000B38000__0000001C760FA190\n000000067F0000400200008A590000B34000-000000067F0000400200008A590000B38000__00000038E67ABFA0\n000000067F0000400200008A590000B34000-000000067F0000400200008A590000B38000__0000003903F1CFE8\n000000067F0000400200008A590000B34000-000000067F0000400200008A590000B38000__0000003B99F7F8A0\n000000067F0000400200008A590000B34000-000000067F0000400200008A590000B38000__0000005D2FFFFB38\n000000067F0000400200008A590000B34000-000000067F0000400200008A590000B38000__00000073AD3FE6B8\n000000067F0000400200008A590000B34000-000000067F0000400200008A590000B38000__000000914E3F38F0\n000000067F0000400200008A590000B34000-000000067F0000400200008A590000B38000__000000931B33AE68\n000000067F0000400200008A590000B34000-000000067F0000400200008A590000B38000__000000931B9AFDF8\n000000067F0000400200008A590000B38000-000000067F0000400200008A590000B3C000__0000001C760FA190\n000000067F0000400200008A590000B38000-000000067F0000400200008A590000B3C000__00000038E67ABFA0\n000000067F0000400200008A590000B38000-000000067F0000400200008A590000B3C000__0000003903F1CFE8\n000000067F0000400200008A590000B38000-000000067F0000400200008A590000B3C000__0000003B99F7F8A0\n000000067F0000400200008A590000B38000-000000067F0000400200008A590000B3C000__0000005D2FFFFB38\n000000067F0000400200008A590000B38000-000000067F0000400200008A590000B3C000__00000073AD3FE6B8\n000000067F0000400200008A590000B38000-000000067F0000400200008A590000B3C000__000000914E3F38F0\n000000067F0000400200008A590000B38000-000000067F0000400200008A590000B3C000__000000931B33AE68\n000000067F0000400200008A590000B38000-000000067F0000400200008A590000B3C000__000000931B9AFDF8\n000000067F0000400200008A590000B3A77A-000000067F0000400200008A590000B4315B__000000144723F489-00000014E6D3F501\n000000067F0000400200008A590000B3C000-000000067F0000400200008A590000B40000__0000001C760FA190\n000000067F0000400200008A590000B3C000-000000067F0000400200008A590000B40000__00000038E67ABFA0\n000000067F0000400200008A590000B3C000-000000067F0000400200008A590000B40000__0000003903F1CFE8\n000000067F0000400200008A590000B3C000-000000067F0000400200008A590000B40000__0000003B99F7F8A0\n000000067F0000400200008A590000B3C000-000000067F0000400200008A590000B40000__0000005D2FFFFB38\n000000067F0000400200008A590000B3C000-000000067F0000400200008A590000B40000__00000073AD3FE6B8\n000000067F0000400200008A590000B3C000-000000067F0000400200008A590000B40000__000000914E3F38F0\n000000067F0000400200008A590000B3C000-000000067F0000400200008A590000B40000__000000931B33AE68\n000000067F0000400200008A590000B3C000-000000067F0000400200008A590000B40000__000000931B9AFDF8\n000000067F0000400200008A590000B40000-000000067F0000400200008A590000B44000__0000001C760FA190\n000000067F0000400200008A590000B40000-000000067F0000400200008A590000B44000__00000038E67ABFA0\n000000067F0000400200008A590000B40000-000000067F0000400200008A590000B44000__0000003903F1CFE8\n000000067F0000400200008A590000B40000-000000067F0000400200008A590000B44000__0000003B99F7F8A0\n000000067F0000400200008A590000B40000-000000067F0000400200008A590000B44000__0000005D2FFFFB38\n000000067F0000400200008A590000B40000-000000067F0000400200008A590000B44000__00000073AD3FE6B8\n000000067F0000400200008A590000B40000-000000067F0000400200008A590000B44000__000000914E3F38F0\n000000067F0000400200008A590000B40000-000000067F0000400200008A590000B44000__000000931B33AE68\n000000067F0000400200008A590000B40000-000000067F0000400200008A590000B44000__000000931B9AFDF8\n000000067F0000400200008A590000B4315B-000000067F0000400200008A590000B4BB2C__000000144723F489-00000014E6D3F501\n000000067F0000400200008A590000B44000-000000067F0000400200008A590000B48000__0000001C760FA190\n000000067F0000400200008A590000B44000-000000067F0000400200008A590000B48000__00000038E67ABFA0\n000000067F0000400200008A590000B44000-000000067F0000400200008A590000B48000__0000003903F1CFE8\n000000067F0000400200008A590000B44000-000000067F0000400200008A590000B48000__0000003B99F7F8A0\n000000067F0000400200008A590000B44000-000000067F0000400200008A590000B48000__0000005D2FFFFB38\n000000067F0000400200008A590000B44000-000000067F0000400200008A590000B48000__00000073AD3FE6B8\n000000067F0000400200008A590000B44000-000000067F0000400200008A590000B48000__000000914E3F38F0\n000000067F0000400200008A590000B44000-000000067F0000400200008A590000B48000__000000931B33AE68\n000000067F0000400200008A590000B44000-000000067F0000400200008A590000B48000__000000931B9AFDF8\n000000067F0000400200008A590000B48000-000000067F0000400200008A590000B4C000__0000001C725A2400\n000000067F0000400200008A590000B48000-000000067F0000400200008A590000B4C000__0000001C760FA190\n000000067F0000400200008A590000B48000-000000067F0000400200008A590000B4C000__00000038E67ABFA0\n000000067F0000400200008A590000B48000-000000067F0000400200008A590000B4C000__0000003903F1CFE8\n000000067F0000400200008A590000B48000-000000067F0000400200008A590000B4C000__0000003B99F7F8A0\n000000067F0000400200008A590000B48000-000000067F0000400200008A590000B4C000__0000005D2FFFFB38\n000000067F0000400200008A590000B48000-000000067F0000400200008A590000B4C000__00000073AD3FE6B8\n000000067F0000400200008A590000B48000-000000067F0000400200008A590000B4C000__000000914E3F38F0\n000000067F0000400200008A590000B48000-000000067F0000400200008A590000B4C000__000000931B33AE68\n000000067F0000400200008A590000B48000-000000067F0000400200008A590000B4C000__000000931B9AFDF8\n000000067F0000400200008A590000B4BB2C-000000067F0000400200008A590100000000__000000144723F489-00000014E6D3F501\n000000067F0000400200008A590000B4BDF1-000000067F0000400200008A590000B547DD__00000014E6D3F501-00000015967BE3A1\n000000067F0000400200008A590000B4C000-000000067F0000400200008A590000B50000__0000001C725A2400\n000000067F0000400200008A590000B4C000-000000067F0000400200008A590000B50000__0000001C760FA190\n000000067F0000400200008A590000B4C000-000000067F0000400200008A590000B50000__00000038E67ABFA0\n000000067F0000400200008A590000B4C000-000000067F0000400200008A590000B50000__0000003903F1CFE8\n000000067F0000400200008A590000B4C000-000000067F0000400200008A590000B50000__0000003B99F7F8A0\n000000067F0000400200008A590000B4C000-000000067F0000400200008A590000B50000__0000005D2FFFFB38\n000000067F0000400200008A590000B4C000-000000067F0000400200008A590000B50000__00000073AD3FE6B8\n000000067F0000400200008A590000B4C000-000000067F0000400200008A590000B50000__000000914E3F38F0\n000000067F0000400200008A590000B4C000-000000067F0000400200008A590000B50000__000000931B33AE68\n000000067F0000400200008A590000B4C000-000000067F0000400200008A590000B50000__000000931B9AFDF8\n000000067F0000400200008A590000B50000-000000067F0000400200008A590000B54000__0000001C725A2400\n000000067F0000400200008A590000B50000-000000067F0000400200008A590000B54000__0000001C760FA190\n000000067F0000400200008A590000B50000-000000067F0000400200008A590000B54000__00000038E67ABFA0\n000000067F0000400200008A590000B50000-000000067F0000400200008A590000B54000__0000003903F1CFE8\n000000067F0000400200008A590000B50000-000000067F0000400200008A590000B54000__0000003B99F7F8A0\n000000067F0000400200008A590000B50000-000000067F0000400200008A590000B54000__0000005D2FFFFB38\n000000067F0000400200008A590000B50000-000000067F0000400200008A590000B54000__00000073AD3FE6B8\n000000067F0000400200008A590000B50000-000000067F0000400200008A590000B54000__000000914E3F38F0\n000000067F0000400200008A590000B50000-000000067F0000400200008A590000B54000__000000931B33AE68\n000000067F0000400200008A590000B50000-000000067F0000400200008A590000B54000__000000931B9AFDF8\n000000067F0000400200008A590000B54000-000000067F0000400200008A590000B58000__0000001C725A2400\n000000067F0000400200008A590000B54000-000000067F0000400200008A590000B58000__0000001C760FA190\n000000067F0000400200008A590000B54000-000000067F0000400200008A590000B58000__00000038E67ABFA0\n000000067F0000400200008A590000B54000-000000067F0000400200008A590000B58000__0000003903F1CFE8\n000000067F0000400200008A590000B54000-000000067F0000400200008A590000B58000__0000003B99F7F8A0\n000000067F0000400200008A590000B54000-000000067F0000400200008A590000B58000__0000005D2FFFFB38\n000000067F0000400200008A590000B54000-000000067F0000400200008A590000B58000__00000073AD3FE6B8\n000000067F0000400200008A590000B54000-000000067F0000400200008A590000B58000__000000914E3F38F0\n000000067F0000400200008A590000B54000-000000067F0000400200008A590000B58000__000000931B33AE68\n000000067F0000400200008A590000B54000-000000067F0000400200008A590000B58000__000000931B9AFDF8\n000000067F0000400200008A590000B547DD-000000067F0000400200008A590000B5D1BB__00000014E6D3F501-00000015967BE3A1\n000000067F0000400200008A590000B58000-000000067F0000400200008A590000B5C000__0000001C725A2400\n000000067F0000400200008A590000B58000-000000067F0000400200008A590000B5C000__0000001C760FA190\n000000067F0000400200008A590000B58000-000000067F0000400200008A590000B5C000__00000038E67ABFA0\n000000067F0000400200008A590000B58000-000000067F0000400200008A590000B5C000__0000003903F1CFE8\n000000067F0000400200008A590000B58000-000000067F0000400200008A590000B5C000__0000003B99F7F8A0\n000000067F0000400200008A590000B58000-000000067F0000400200008A590000B5C000__0000005D2FFFFB38\n000000067F0000400200008A590000B58000-000000067F0000400200008A590000B5C000__00000073AD3FE6B8\n000000067F0000400200008A590000B58000-000000067F0000400200008A590000B5C000__000000914E3F38F0\n000000067F0000400200008A590000B58000-000000067F0000400200008A590000B5C000__000000931B33AE68\n000000067F0000400200008A590000B58000-000000067F0000400200008A590000B5C000__000000931B9AFDF8\n000000067F0000400200008A590000B5C000-000000067F0000400200008A590000B60000__0000001C725A2400\n000000067F0000400200008A590000B5C000-000000067F0000400200008A590000B60000__0000001C760FA190\n000000067F0000400200008A590000B5C000-000000067F0000400200008A590000B60000__00000038E67ABFA0\n000000067F0000400200008A590000B5C000-000000067F0000400200008A590000B60000__0000003903F1CFE8\n000000067F0000400200008A590000B5C000-000000067F0000400200008A590000B60000__0000003B99F7F8A0\n000000067F0000400200008A590000B5C000-000000067F0000400200008A590000B60000__0000005D2FFFFB38\n000000067F0000400200008A590000B5C000-000000067F0000400200008A590000B60000__00000073AD3FE6B8\n000000067F0000400200008A590000B5C000-000000067F0000400200008A590000B60000__000000914E3F38F0\n000000067F0000400200008A590000B5C000-000000067F0000400200008A590000B60000__000000931B33AE68\n000000067F0000400200008A590000B5C000-000000067F0000400200008A590000B60000__000000931B9AFDF8\n000000067F0000400200008A590000B5D1BB-000000067F0000400200008A590000B65BA4__00000014E6D3F501-00000015967BE3A1\n000000067F0000400200008A590000B60000-000000067F0000400200008A590000B64000__0000001C725A2400\n000000067F0000400200008A590000B60000-000000067F0000400200008A590000B64000__0000001C760FA190\n000000067F0000400200008A590000B60000-000000067F0000400200008A590000B64000__00000038E67ABFA0\n000000067F0000400200008A590000B60000-000000067F0000400200008A590000B64000__0000003903F1CFE8\n000000067F0000400200008A590000B60000-000000067F0000400200008A590000B64000__0000003B99F7F8A0\n000000067F0000400200008A590000B60000-000000067F0000400200008A590000B64000__0000005D2FFFFB38\n000000067F0000400200008A590000B60000-000000067F0000400200008A590000B64000__00000073AD3FE6B8\n000000067F0000400200008A590000B60000-000000067F0000400200008A590000B64000__000000914E3F38F0\n000000067F0000400200008A590000B60000-000000067F0000400200008A590000B64000__000000931B33AE68\n000000067F0000400200008A590000B60000-000000067F0000400200008A590000B64000__000000931B9AFDF8\n000000067F0000400200008A590000B64000-000000067F0000400200008A590000B68000__0000001C725A2400\n000000067F0000400200008A590000B64000-000000067F0000400200008A590000B68000__0000001C760FA190\n000000067F0000400200008A590000B64000-000000067F0000400200008A590000B68000__00000038E67ABFA0\n000000067F0000400200008A590000B64000-000000067F0000400200008A590000B68000__0000003903F1CFE8\n000000067F0000400200008A590000B64000-000000067F0000400200008A590000B68000__0000003B99F7F8A0\n000000067F0000400200008A590000B64000-000000067F0000400200008A590000B68000__0000005D2FFFFB38\n000000067F0000400200008A590000B64000-000000067F0000400200008A590000B68000__00000073AD3FE6B8\n000000067F0000400200008A590000B64000-000000067F0000400200008A590000B68000__000000914E3F38F0\n000000067F0000400200008A590000B64000-000000067F0000400200008A590000B68000__000000931B33AE68\n000000067F0000400200008A590000B64000-000000067F0000400200008A590000B68000__000000931B9AFDF8\n000000067F0000400200008A590000B65BA4-000000067F0000400200008A590000B6E588__00000014E6D3F501-00000015967BE3A1\n000000067F0000400200008A590000B68000-000000067F0000400200008A590000B6C000__0000001C725A2400\n000000067F0000400200008A590000B68000-000000067F0000400200008A590000B6C000__0000001C760FA190\n000000067F0000400200008A590000B68000-000000067F0000400200008A590000B6C000__00000038E67ABFA0\n000000067F0000400200008A590000B68000-000000067F0000400200008A590000B6C000__0000003903F1CFE8\n000000067F0000400200008A590000B68000-000000067F0000400200008A590000B6C000__0000003B99F7F8A0\n000000067F0000400200008A590000B68000-000000067F0000400200008A590000B6C000__0000005D2FFFFB38\n000000067F0000400200008A590000B68000-000000067F0000400200008A590000B6C000__00000073AD3FE6B8\n000000067F0000400200008A590000B68000-000000067F0000400200008A590000B6C000__000000914E3F38F0\n000000067F0000400200008A590000B68000-000000067F0000400200008A590000B6C000__000000931B33AE68\n000000067F0000400200008A590000B68000-000000067F0000400200008A590000B6C000__000000931B9AFDF8\n000000067F0000400200008A590000B6C000-000000067F0000400200008A590000B70000__0000001C725A2400\n000000067F0000400200008A590000B6C000-000000067F0000400200008A590000B70000__0000001C760FA190\n000000067F0000400200008A590000B6C000-000000067F0000400200008A590000B70000__00000038E67ABFA0\n000000067F0000400200008A590000B6C000-000000067F0000400200008A590000B70000__0000003903F1CFE8\n000000067F0000400200008A590000B6C000-000000067F0000400200008A590000B70000__0000003B99F7F8A0\n000000067F0000400200008A590000B6C000-000000067F0000400200008A590000B70000__0000005D2FFFFB38\n000000067F0000400200008A590000B6C000-000000067F0000400200008A590000B70000__00000073AD3FE6B8\n000000067F0000400200008A590000B6C000-000000067F0000400200008A590000B70000__000000914E3F38F0\n000000067F0000400200008A590000B6C000-000000067F0000400200008A590000B70000__000000931B33AE68\n000000067F0000400200008A590000B6C000-000000067F0000400200008A590000B70000__000000931B9AFDF8\n000000067F0000400200008A590000B6E588-000000067F0000400200008A590000B76F5E__00000014E6D3F501-00000015967BE3A1\n000000067F0000400200008A590000B70000-000000067F0000400200008A590000B74000__0000001C725A2400\n000000067F0000400200008A590000B70000-000000067F0000400200008A590000B74000__0000001C760FA190\n000000067F0000400200008A590000B70000-000000067F0000400200008A590000B74000__00000038E67ABFA0\n000000067F0000400200008A590000B70000-000000067F0000400200008A590000B74000__0000003903F1CFE8\n000000067F0000400200008A590000B70000-000000067F0000400200008A590000B74000__0000003B99F7F8A0\n000000067F0000400200008A590000B70000-000000067F0000400200008A590000B74000__0000005D2FFFFB38\n000000067F0000400200008A590000B70000-000000067F0000400200008A590000B74000__00000073AD3FE6B8\n000000067F0000400200008A590000B70000-000000067F0000400200008A590000B74000__000000914E3F38F0\n000000067F0000400200008A590000B70000-000000067F0000400200008A590000B74000__000000931B33AE68\n000000067F0000400200008A590000B70000-000000067F0000400200008A590000B74000__000000931B9AFDF8\n000000067F0000400200008A590000B74000-000000067F0000400200008A590000B78000__0000001C725A2400\n000000067F0000400200008A590000B74000-000000067F0000400200008A590000B78000__0000001C760FA190\n000000067F0000400200008A590000B74000-000000067F0000400200008A590000B78000__00000038E67ABFA0\n000000067F0000400200008A590000B74000-000000067F0000400200008A590000B78000__0000003903F1CFE8\n000000067F0000400200008A590000B74000-000000067F0000400200008A590000B78000__0000003B99F7F8A0\n000000067F0000400200008A590000B74000-000000067F0000400200008A590000B78000__0000005D2FFFFB38\n000000067F0000400200008A590000B74000-000000067F0000400200008A590000B78000__00000073AD3FE6B8\n000000067F0000400200008A590000B74000-000000067F0000400200008A590000B78000__000000914E3F38F0\n000000067F0000400200008A590000B74000-000000067F0000400200008A590000B78000__000000931B33AE68\n000000067F0000400200008A590000B74000-000000067F0000400200008A590000B78000__000000931B9AFDF8\n000000067F0000400200008A590000B76F5E-000000067F0000400200008A590000B7F935__00000014E6D3F501-00000015967BE3A1\n000000067F0000400200008A590000B78000-000000067F0000400200008A590000B7C000__0000001C725A2400\n000000067F0000400200008A590000B78000-000000067F0000400200008A590000B7C000__0000001C760FA190\n000000067F0000400200008A590000B78000-000000067F0000400200008A590000B7C000__00000038E67ABFA0\n000000067F0000400200008A590000B78000-000000067F0000400200008A590000B7C000__0000003903F1CFE8\n000000067F0000400200008A590000B78000-000000067F0000400200008A590000B7C000__0000003B99F7F8A0\n000000067F0000400200008A590000B78000-000000067F0000400200008A590000B7C000__0000005D2FFFFB38\n000000067F0000400200008A590000B78000-000000067F0000400200008A590000B7C000__00000073AD3FE6B8\n000000067F0000400200008A590000B78000-000000067F0000400200008A590000B7C000__000000914E3F38F0\n000000067F0000400200008A590000B78000-000000067F0000400200008A590000B7C000__000000931B33AE68\n000000067F0000400200008A590000B78000-000000067F0000400200008A590000B7C000__000000931B9AFDF8\n000000067F0000400200008A590000B7C000-000000067F0000400200008A590000B80000__0000001C725A2400\n000000067F0000400200008A590000B7C000-000000067F0000400200008A590000B80000__0000001C760FA190\n000000067F0000400200008A590000B7C000-000000067F0000400200008A590000B80000__00000038E67ABFA0\n000000067F0000400200008A590000B7C000-000000067F0000400200008A590000B80000__0000003903F1CFE8\n000000067F0000400200008A590000B7C000-000000067F0000400200008A590000B80000__0000003B99F7F8A0\n000000067F0000400200008A590000B7C000-000000067F0000400200008A590000B80000__0000005D2FFFFB38\n000000067F0000400200008A590000B7C000-000000067F0000400200008A590000B80000__00000073AD3FE6B8\n000000067F0000400200008A590000B7C000-000000067F0000400200008A590000B80000__000000914E3F38F0\n000000067F0000400200008A590000B7C000-000000067F0000400200008A590000B80000__000000931B33AE68\n000000067F0000400200008A590000B7C000-000000067F0000400200008A590000B80000__000000931B9AFDF8\n000000067F0000400200008A590000B7F935-000000067F0000400200008A590000B8830D__00000014E6D3F501-00000015967BE3A1\n000000067F0000400200008A590000B80000-000000067F0000400200008A590000B84000__0000001C725A2400\n000000067F0000400200008A590000B80000-000000067F0000400200008A590000B84000__0000001C760FA190\n000000067F0000400200008A590000B80000-000000067F0000400200008A590000B84000__00000038E67ABFA0\n000000067F0000400200008A590000B80000-000000067F0000400200008A590000B84000__0000003903F1CFE8\n000000067F0000400200008A590000B80000-000000067F0000400200008A590000B84000__0000003B99F7F8A0\n000000067F0000400200008A590000B80000-000000067F0000400200008A590000B84000__0000005D2FFFFB38\n000000067F0000400200008A590000B80000-000000067F0000400200008A590000B84000__00000073AD3FE6B8\n000000067F0000400200008A590000B80000-000000067F0000400200008A590000B84000__000000914E3F38F0\n000000067F0000400200008A590000B80000-000000067F0000400200008A590000B84000__000000931B33AE68\n000000067F0000400200008A590000B80000-000000067F0000400200008A590000B84000__000000931B9AFDF8\n000000067F0000400200008A590000B84000-000000067F0000400200008A590000B88000__0000001C725A2400\n000000067F0000400200008A590000B84000-000000067F0000400200008A590000B88000__0000001C760FA190\n000000067F0000400200008A590000B84000-000000067F0000400200008A590000B88000__00000038E67ABFA0\n000000067F0000400200008A590000B84000-000000067F0000400200008A590000B88000__0000003903F1CFE8\n000000067F0000400200008A590000B84000-000000067F0000400200008A590000B88000__0000003B99F7F8A0\n000000067F0000400200008A590000B84000-000000067F0000400200008A590000B88000__0000005D2FFFFB38\n000000067F0000400200008A590000B84000-000000067F0000400200008A590000B88000__00000073AD3FE6B8\n000000067F0000400200008A590000B84000-000000067F0000400200008A590000B88000__000000914E3F38F0\n000000067F0000400200008A590000B84000-000000067F0000400200008A590000B88000__000000931B33AE68\n000000067F0000400200008A590000B84000-000000067F0000400200008A590000B88000__000000931B9AFDF8\n000000067F0000400200008A590000B88000-000000067F0000400200008A590000B8C000__0000001C725A2400\n000000067F0000400200008A590000B88000-000000067F0000400200008A590000B8C000__0000001C760FA190\n000000067F0000400200008A590000B88000-000000067F0000400200008A590000B8C000__00000038E67ABFA0\n000000067F0000400200008A590000B88000-000000067F0000400200008A590000B8C000__0000003903F1CFE8\n000000067F0000400200008A590000B88000-000000067F0000400200008A590000B8C000__0000003B99F7F8A0\n000000067F0000400200008A590000B88000-000000067F0000400200008A590000B8C000__0000005D2FFFFB38\n000000067F0000400200008A590000B88000-000000067F0000400200008A590000B8C000__00000073AD3FE6B8\n000000067F0000400200008A590000B88000-000000067F0000400200008A590000B8C000__000000914E3F38F0\n000000067F0000400200008A590000B88000-000000067F0000400200008A590000B8C000__000000931B33AE68\n000000067F0000400200008A590000B88000-000000067F0000400200008A590000B8C000__000000931B9AFDF8\n000000067F0000400200008A590000B8830D-000000067F0000400200008A590000B90CE8__00000014E6D3F501-00000015967BE3A1\n000000067F0000400200008A590000B8C000-000000067F0000400200008A590000B90000__0000001C725A2400\n000000067F0000400200008A590000B8C000-000000067F0000400200008A590000B90000__0000001C760FA190\n000000067F0000400200008A590000B8C000-000000067F0000400200008A590000B90000__00000038E67ABFA0\n000000067F0000400200008A590000B8C000-000000067F0000400200008A590000B90000__0000003903F1CFE8\n000000067F0000400200008A590000B8C000-000000067F0000400200008A590000B90000__0000003B99F7F8A0\n000000067F0000400200008A590000B8C000-000000067F0000400200008A590000B90000__0000005D2FFFFB38\n000000067F0000400200008A590000B8C000-000000067F0000400200008A590000B90000__00000073AD3FE6B8\n000000067F0000400200008A590000B8C000-000000067F0000400200008A590000B90000__000000914E3F38F0\n000000067F0000400200008A590000B8C000-000000067F0000400200008A590000B90000__000000931B33AE68\n000000067F0000400200008A590000B8C000-000000067F0000400200008A590000B90000__000000931B9AFDF8\n000000067F0000400200008A590000B90000-000000067F0000400200008A590000B94000__0000001C725A2400\n000000067F0000400200008A590000B90000-000000067F0000400200008A590000B94000__0000001C760FA190\n000000067F0000400200008A590000B90000-000000067F0000400200008A590000B94000__00000038E67ABFA0\n000000067F0000400200008A590000B90000-000000067F0000400200008A590000B94000__0000003903F1CFE8\n000000067F0000400200008A590000B90000-000000067F0000400200008A590000B94000__0000003B99F7F8A0\n000000067F0000400200008A590000B90000-000000067F0000400200008A590000B94000__0000005D2FFFFB38\n000000067F0000400200008A590000B90000-000000067F0000400200008A590000B94000__00000073AD3FE6B8\n000000067F0000400200008A590000B90000-000000067F0000400200008A590000B94000__000000914E3F38F0\n000000067F0000400200008A590000B90000-000000067F0000400200008A590000B94000__000000931B33AE68\n000000067F0000400200008A590000B90000-000000067F0000400200008A590000B94000__000000931B9AFDF8\n000000067F0000400200008A590000B90CE8-000000067F0000400200008A590000B996CA__00000014E6D3F501-00000015967BE3A1\n000000067F0000400200008A590000B94000-000000067F0000400200008A590000B98000__0000001C725A2400\n000000067F0000400200008A590000B94000-000000067F0000400200008A590000B98000__0000001C760FA190\n000000067F0000400200008A590000B94000-000000067F0000400200008A590000B98000__00000038E67ABFA0\n000000067F0000400200008A590000B94000-000000067F0000400200008A590000B98000__0000003903F1CFE8\n000000067F0000400200008A590000B94000-000000067F0000400200008A590000B98000__0000003B99F7F8A0\n000000067F0000400200008A590000B94000-000000067F0000400200008A590000B98000__0000005D2FFFFB38\n000000067F0000400200008A590000B94000-000000067F0000400200008A590000B98000__00000073AD3FE6B8\n000000067F0000400200008A590000B94000-000000067F0000400200008A590000B98000__000000914E3F38F0\n000000067F0000400200008A590000B94000-000000067F0000400200008A590000B98000__000000931B33AE68\n000000067F0000400200008A590000B94000-000000067F0000400200008A590000B98000__000000931B9AFDF8\n000000067F0000400200008A590000B98000-000000067F0000400200008A590000B9C000__0000001C725A2400\n000000067F0000400200008A590000B98000-000000067F0000400200008A590000B9C000__0000001C760FA190\n000000067F0000400200008A590000B98000-000000067F0000400200008A590000B9C000__00000038E67ABFA0\n000000067F0000400200008A590000B98000-000000067F0000400200008A590000B9C000__0000003903F1CFE8\n000000067F0000400200008A590000B98000-000000067F0000400200008A590000B9C000__0000003B99F7F8A0\n000000067F0000400200008A590000B98000-000000067F0000400200008A590000B9C000__0000005D2FFFFB38\n000000067F0000400200008A590000B98000-000000067F0000400200008A590000B9C000__00000073AD3FE6B8\n000000067F0000400200008A590000B98000-000000067F0000400200008A590000B9C000__000000914E3F38F0\n000000067F0000400200008A590000B98000-000000067F0000400200008A590000B9C000__000000931B33AE68\n000000067F0000400200008A590000B98000-000000067F0000400200008A590000B9C000__000000931B9AFDF8\n000000067F0000400200008A590000B996CA-000000067F0000400200008A590000BA20AB__00000014E6D3F501-00000015967BE3A1\n000000067F0000400200008A590000B9C000-000000067F0000400200008A590000BA0000__0000001C725A2400\n000000067F0000400200008A590000B9C000-000000067F0000400200008A590000BA0000__0000001C760FA190\n000000067F0000400200008A590000B9C000-000000067F0000400200008A590000BA0000__00000038E67ABFA0\n000000067F0000400200008A590000B9C000-000000067F0000400200008A590000BA0000__0000003903F1CFE8\n000000067F0000400200008A590000B9C000-000000067F0000400200008A590000BA0000__0000003B99F7F8A0\n000000067F0000400200008A590000B9C000-000000067F0000400200008A590000BA0000__0000005D2FFFFB38\n000000067F0000400200008A590000B9C000-000000067F0000400200008A590000BA0000__00000073AD3FE6B8\n000000067F0000400200008A590000B9C000-000000067F0000400200008A590000BA0000__000000914E3F38F0\n000000067F0000400200008A590000B9C000-000000067F0000400200008A590000BA0000__000000931B33AE68\n000000067F0000400200008A590000B9C000-000000067F0000400200008A590000BA0000__000000931B9AFDF8\n000000067F0000400200008A590000BA0000-000000067F0000400200008A590000BA4000__0000001C725A2400\n000000067F0000400200008A590000BA0000-000000067F0000400200008A590000BA4000__0000001C760FA190\n000000067F0000400200008A590000BA0000-000000067F0000400200008A590000BA4000__00000038E67ABFA0\n000000067F0000400200008A590000BA0000-000000067F0000400200008A590000BA4000__0000003903F1CFE8\n000000067F0000400200008A590000BA0000-000000067F0000400200008A590000BA4000__0000003B99F7F8A0\n000000067F0000400200008A590000BA0000-000000067F0000400200008A590000BA4000__0000005D2FFFFB38\n000000067F0000400200008A590000BA0000-000000067F0000400200008A590000BA4000__00000073AD3FE6B8\n000000067F0000400200008A590000BA0000-000000067F0000400200008A590000BA4000__000000914E3F38F0\n000000067F0000400200008A590000BA0000-000000067F0000400200008A590000BA4000__000000931B33AE68\n000000067F0000400200008A590000BA0000-000000067F0000400200008A590000BA4000__000000931B9AFDF8\n000000067F0000400200008A590000BA20AB-000000067F0000400200008A590000BAAAA5__00000014E6D3F501-00000015967BE3A1\n000000067F0000400200008A590000BA4000-000000067F0000400200008A590000BA8000__0000001C725A2400\n000000067F0000400200008A590000BA4000-000000067F0000400200008A590000BA8000__0000001C760FA190\n000000067F0000400200008A590000BA4000-000000067F0000400200008A590000BA8000__00000038E67ABFA0\n000000067F0000400200008A590000BA4000-000000067F0000400200008A590000BA8000__0000003903F1CFE8\n000000067F0000400200008A590000BA4000-000000067F0000400200008A590000BA8000__0000003B99F7F8A0\n000000067F0000400200008A590000BA4000-000000067F0000400200008A590000BA8000__0000005D2FFFFB38\n000000067F0000400200008A590000BA4000-000000067F0000400200008A590000BA8000__00000073AD3FE6B8\n000000067F0000400200008A590000BA4000-000000067F0000400200008A590000BA8000__000000914E3F38F0\n000000067F0000400200008A590000BA4000-000000067F0000400200008A590000BA8000__000000931B33AE68\n000000067F0000400200008A590000BA4000-000000067F0000400200008A590000BA8000__000000931B9AFDF8\n000000067F0000400200008A590000BA8000-000000067F0000400200008A590000BAC000__00000016661DE360\n000000067F0000400200008A590000BA8000-000000067F0000400200008A590000BAC000__0000001C760FA190\n000000067F0000400200008A590000BA8000-000000067F0000400200008A590000BAC000__00000038E67ABFA0\n000000067F0000400200008A590000BA8000-000000067F0000400200008A590000BAC000__0000003903F1CFE8\n000000067F0000400200008A590000BA8000-000000067F0000400200008A590000BAC000__0000003B99F7F8A0\n000000067F0000400200008A590000BA8000-000000067F0000400200008A590000BAC000__0000005D2FFFFB38\n000000067F0000400200008A590000BA8000-000000067F0000400200008A590000BAC000__00000073AD3FE6B8\n000000067F0000400200008A590000BA8000-000000067F0000400200008A590000BAC000__000000914E3F38F0\n000000067F0000400200008A590000BA8000-000000067F0000400200008A590000BAC000__000000931B33AE68\n000000067F0000400200008A590000BA8000-000000067F0000400200008A590000BAC000__000000931B9AFDF8\n000000067F0000400200008A590000BAAAA5-000000067F0000400200008A590100000000__00000014E6D3F501-00000015967BE3A1\n000000067F0000400200008A590000BAAD99-000000067F0000400200008A590000BB3774__00000015967BE3A1-00000016362BE8F9\n000000067F0000400200008A590000BAC000-000000067F0000400200008A590000BB0000__00000016661DE360\n000000067F0000400200008A590000BAC000-000000067F0000400200008A590000BB0000__0000001C760FA190\n000000067F0000400200008A590000BAC000-000000067F0000400200008A590000BB0000__00000038E67ABFA0\n000000067F0000400200008A590000BAC000-000000067F0000400200008A590000BB0000__0000003903F1CFE8\n000000067F0000400200008A590000BAC000-000000067F0000400200008A590000BB0000__0000003B99F7F8A0\n000000067F0000400200008A590000BAC000-000000067F0000400200008A590000BB0000__0000005D2FFFFB38\n000000067F0000400200008A590000BAC000-000000067F0000400200008A590000BB0000__00000073AD3FE6B8\n000000067F0000400200008A590000BAC000-000000067F0000400200008A590000BB0000__000000914E3F38F0\n000000067F0000400200008A590000BAC000-000000067F0000400200008A590000BB0000__000000931B33AE68\n000000067F0000400200008A590000BAC000-000000067F0000400200008A590000BB0000__000000931B9AFDF8\n000000067F0000400200008A590000BB0000-000000067F0000400200008A590000BB4000__00000016661DE360\n000000067F0000400200008A590000BB0000-000000067F0000400200008A590000BB4000__0000001C760FA190\n000000067F0000400200008A590000BB0000-000000067F0000400200008A590000BB4000__00000038E67ABFA0\n000000067F0000400200008A590000BB0000-000000067F0000400200008A590000BB4000__0000003903F1CFE8\n000000067F0000400200008A590000BB0000-000000067F0000400200008A590000BB4000__0000003B99F7F8A0\n000000067F0000400200008A590000BB0000-000000067F0000400200008A590000BB4000__0000005D2FFFFB38\n000000067F0000400200008A590000BB0000-000000067F0000400200008A590000BB4000__00000073AD3FE6B8\n000000067F0000400200008A590000BB0000-000000067F0000400200008A590000BB4000__000000914E3F38F0\n000000067F0000400200008A590000BB0000-000000067F0000400200008A590000BB4000__000000931B33AE68\n000000067F0000400200008A590000BB0000-000000067F0000400200008A590000BB4000__000000931B9AFDF8\n000000067F0000400200008A590000BB3774-000000067F0000400200008A590000BBC149__00000015967BE3A1-00000016362BE8F9\n000000067F0000400200008A590000BB4000-000000067F0000400200008A590000BB8000__00000016661DE360\n000000067F0000400200008A590000BB4000-000000067F0000400200008A590000BB8000__0000001C760FA190\n000000067F0000400200008A590000BB4000-000000067F0000400200008A590000BB8000__00000038E67ABFA0\n000000067F0000400200008A590000BB4000-000000067F0000400200008A590000BB8000__0000003903F1CFE8\n000000067F0000400200008A590000BB4000-000000067F0000400200008A590000BB8000__0000003B99F7F8A0\n000000067F0000400200008A590000BB4000-000000067F0000400200008A590000BB8000__0000005D2FFFFB38\n000000067F0000400200008A590000BB4000-000000067F0000400200008A590000BB8000__00000073AD3FE6B8\n000000067F0000400200008A590000BB4000-000000067F0000400200008A590000BB8000__000000914E3F38F0\n000000067F0000400200008A590000BB4000-000000067F0000400200008A590000BB8000__000000931B33AE68\n000000067F0000400200008A590000BB4000-000000067F0000400200008A590000BB8000__000000931B9AFDF8\n000000067F0000400200008A590000BB8000-000000067F0000400200008A590000BBC000__00000016661DE360\n000000067F0000400200008A590000BB8000-000000067F0000400200008A590000BBC000__0000001C760FA190\n000000067F0000400200008A590000BB8000-000000067F0000400200008A590000BBC000__00000038E67ABFA0\n000000067F0000400200008A590000BB8000-000000067F0000400200008A590000BBC000__0000003903F1CFE8\n000000067F0000400200008A590000BB8000-000000067F0000400200008A590000BBC000__0000003B99F7F8A0\n000000067F0000400200008A590000BB8000-000000067F0000400200008A590000BBC000__0000005D2FFFFB38\n000000067F0000400200008A590000BB8000-000000067F0000400200008A590000BBC000__00000073AD3FE6B8\n000000067F0000400200008A590000BB8000-000000067F0000400200008A590000BBC000__000000914E3F38F0\n000000067F0000400200008A590000BB8000-000000067F0000400200008A590000BBC000__000000931B33AE68\n000000067F0000400200008A590000BB8000-000000067F0000400200008A590000BBC000__000000931B9AFDF8\n000000067F0000400200008A590000BBC000-000000067F0000400200008A590000BC0000__00000016661DE360\n000000067F0000400200008A590000BBC000-000000067F0000400200008A590000BC0000__0000001C760FA190\n000000067F0000400200008A590000BBC000-000000067F0000400200008A590000BC0000__00000038E67ABFA0\n000000067F0000400200008A590000BBC000-000000067F0000400200008A590000BC0000__0000003903F1CFE8\n000000067F0000400200008A590000BBC000-000000067F0000400200008A590000BC0000__0000003B99F7F8A0\n000000067F0000400200008A590000BBC000-000000067F0000400200008A590000BC0000__0000005D2FFFFB38\n000000067F0000400200008A590000BBC000-000000067F0000400200008A590000BC0000__00000073AD3FE6B8\n000000067F0000400200008A590000BBC000-000000067F0000400200008A590000BC0000__000000914E3F38F0\n000000067F0000400200008A590000BBC000-000000067F0000400200008A590000BC0000__000000931B33AE68\n000000067F0000400200008A590000BBC000-000000067F0000400200008A590000BC0000__000000931B9AFDF8\n000000067F0000400200008A590000BBC149-000000067F0000400200008A590000BC4B1C__00000015967BE3A1-00000016362BE8F9\n000000067F0000400200008A590000BC0000-000000067F0000400200008A590000BC4000__00000016661DE360\n000000067F0000400200008A590000BC0000-000000067F0000400200008A590000BC4000__0000001C760FA190\n000000067F0000400200008A590000BC0000-000000067F0000400200008A590000BC4000__00000038E67ABFA0\n000000067F0000400200008A590000BC0000-000000067F0000400200008A590000BC4000__0000003903F1CFE8\n000000067F0000400200008A590000BC0000-000000067F0000400200008A590000BC4000__0000003B99F7F8A0\n000000067F0000400200008A590000BC0000-000000067F0000400200008A590000BC4000__0000005D2FFFFB38\n000000067F0000400200008A590000BC0000-000000067F0000400200008A590000BC4000__00000073AD3FE6B8\n000000067F0000400200008A590000BC0000-000000067F0000400200008A590000BC4000__000000914E3F38F0\n000000067F0000400200008A590000BC0000-000000067F0000400200008A590000BC4000__000000931B33AE68\n000000067F0000400200008A590000BC0000-000000067F0000400200008A590000BC4000__000000931B9AFDF8\n000000067F0000400200008A590000BC4000-000000067F0000400200008A590000BC8000__00000016661DE360\n000000067F0000400200008A590000BC4000-000000067F0000400200008A590000BC8000__0000001C760FA190\n000000067F0000400200008A590000BC4000-000000067F0000400200008A590000BC8000__00000038E67ABFA0\n000000067F0000400200008A590000BC4000-000000067F0000400200008A590000BC8000__0000003903F1CFE8\n000000067F0000400200008A590000BC4000-000000067F0000400200008A590000BC8000__0000003B99F7F8A0\n000000067F0000400200008A590000BC4000-000000067F0000400200008A590000BC8000__0000005D2FFFFB38\n000000067F0000400200008A590000BC4000-000000067F0000400200008A590000BC8000__00000073AD3FE6B8\n000000067F0000400200008A590000BC4000-000000067F0000400200008A590000BC8000__000000914E3F38F0\n000000067F0000400200008A590000BC4000-000000067F0000400200008A590000BC8000__000000931B33AE68\n000000067F0000400200008A590000BC4000-000000067F0000400200008A590000BC8000__000000931B9AFDF8\n000000067F0000400200008A590000BC4B1C-000000067F0000400200008A590000BCD502__00000015967BE3A1-00000016362BE8F9\n000000067F0000400200008A590000BC8000-000000067F0000400200008A590000BCC000__00000016661DE360\n000000067F0000400200008A590000BC8000-000000067F0000400200008A590000BCC000__0000001C760FA190\n000000067F0000400200008A590000BC8000-000000067F0000400200008A590000BCC000__00000038E67ABFA0\n000000067F0000400200008A590000BC8000-000000067F0000400200008A590000BCC000__0000003903F1CFE8\n000000067F0000400200008A590000BC8000-000000067F0000400200008A590000BCC000__0000003B99F7F8A0\n000000067F0000400200008A590000BC8000-000000067F0000400200008A590000BCC000__0000005D2FFFFB38\n000000067F0000400200008A590000BC8000-000000067F0000400200008A590000BCC000__00000073AD3FE6B8\n000000067F0000400200008A590000BC8000-000000067F0000400200008A590000BCC000__000000914E3F38F0\n000000067F0000400200008A590000BC8000-000000067F0000400200008A590000BCC000__000000931B33AE68\n000000067F0000400200008A590000BC8000-000000067F0000400200008A590000BCC000__000000931B9AFDF8\n000000067F0000400200008A590000BCC000-000000067F0000400200008A590000BD0000__00000016661DE360\n000000067F0000400200008A590000BCC000-000000067F0000400200008A590000BD0000__0000001C760FA190\n000000067F0000400200008A590000BCC000-000000067F0000400200008A590000BD0000__00000038E67ABFA0\n000000067F0000400200008A590000BCC000-000000067F0000400200008A590000BD0000__0000003903F1CFE8\n000000067F0000400200008A590000BCC000-000000067F0000400200008A590000BD0000__0000003B99F7F8A0\n000000067F0000400200008A590000BCC000-000000067F0000400200008A590000BD0000__0000005D2FFFFB38\n000000067F0000400200008A590000BCC000-000000067F0000400200008A590000BD0000__00000073AD3FE6B8\n000000067F0000400200008A590000BCC000-000000067F0000400200008A590000BD0000__000000914E3F38F0\n000000067F0000400200008A590000BCC000-000000067F0000400200008A590000BD0000__000000931B33AE68\n000000067F0000400200008A590000BCC000-000000067F0000400200008A590000BD0000__000000931B9AFDF8\n000000067F0000400200008A590000BCD502-000000067F0000400200008A590000BD5ED4__00000015967BE3A1-00000016362BE8F9\n000000067F0000400200008A590000BD0000-000000067F0000400200008A590000BD4000__00000016661DE360\n000000067F0000400200008A590000BD0000-000000067F0000400200008A590000BD4000__0000001C760FA190\n000000067F0000400200008A590000BD0000-000000067F0000400200008A590000BD4000__00000038E67ABFA0\n000000067F0000400200008A590000BD0000-000000067F0000400200008A590000BD4000__0000003903F1CFE8\n000000067F0000400200008A590000BD0000-000000067F0000400200008A590000BD4000__0000003B99F7F8A0\n000000067F0000400200008A590000BD0000-000000067F0000400200008A590000BD4000__0000005D2FFFFB38\n000000067F0000400200008A590000BD0000-000000067F0000400200008A590000BD4000__00000073AD3FE6B8\n000000067F0000400200008A590000BD0000-000000067F0000400200008A590000BD4000__000000914E3F38F0\n000000067F0000400200008A590000BD0000-000000067F0000400200008A590000BD4000__000000931B33AE68\n000000067F0000400200008A590000BD0000-000000067F0000400200008A590000BD4000__000000931B9AFDF8\n000000067F0000400200008A590000BD4000-000000067F0000400200008A590000BD8000__00000016661DE360\n000000067F0000400200008A590000BD4000-000000067F0000400200008A590000BD8000__0000001C760FA190\n000000067F0000400200008A590000BD4000-000000067F0000400200008A590000BD8000__00000038E67ABFA0\n000000067F0000400200008A590000BD4000-000000067F0000400200008A590000BD8000__0000003903F1CFE8\n000000067F0000400200008A590000BD4000-000000067F0000400200008A590000BD8000__0000003B99F7F8A0\n000000067F0000400200008A590000BD4000-000000067F0000400200008A590000BD8000__0000005D2FFFFB38\n000000067F0000400200008A590000BD4000-000000067F0000400200008A590000BD8000__00000073AD3FE6B8\n000000067F0000400200008A590000BD4000-000000067F0000400200008A590000BD8000__000000914E3F38F0\n000000067F0000400200008A590000BD4000-000000067F0000400200008A590000BD8000__000000931B33AE68\n000000067F0000400200008A590000BD4000-000000067F0000400200008A590000BD8000__000000931B9AFDF8\n000000067F0000400200008A590000BD5ED4-000000067F0000400200008A590000BDE8AA__00000015967BE3A1-00000016362BE8F9\n000000067F0000400200008A590000BD8000-000000067F0000400200008A590000BDC000__00000016661DE360\n000000067F0000400200008A590000BD8000-000000067F0000400200008A590000BDC000__0000001C760FA190\n000000067F0000400200008A590000BD8000-000000067F0000400200008A590000BDC000__00000038E67ABFA0\n000000067F0000400200008A590000BD8000-000000067F0000400200008A590000BDC000__0000003903F1CFE8\n000000067F0000400200008A590000BD8000-000000067F0000400200008A590000BDC000__0000003B99F7F8A0\n000000067F0000400200008A590000BD8000-000000067F0000400200008A590000BDC000__0000005D2FFFFB38\n000000067F0000400200008A590000BD8000-000000067F0000400200008A590000BDC000__00000073AD3FE6B8\n000000067F0000400200008A590000BD8000-000000067F0000400200008A590000BDC000__000000914E3F38F0\n000000067F0000400200008A590000BD8000-000000067F0000400200008A590000BDC000__000000931B33AE68\n000000067F0000400200008A590000BD8000-000000067F0000400200008A590000BDC000__000000931B9AFDF8\n000000067F0000400200008A590000BDC000-000000067F0000400200008A590000BE0000__00000016661DE360\n000000067F0000400200008A590000BDC000-000000067F0000400200008A590000BE0000__0000001C760FA190\n000000067F0000400200008A590000BDC000-000000067F0000400200008A590000BE0000__00000038E67ABFA0\n000000067F0000400200008A590000BDC000-000000067F0000400200008A590000BE0000__0000003903F1CFE8\n000000067F0000400200008A590000BDC000-000000067F0000400200008A590000BE0000__0000003B99F7F8A0\n000000067F0000400200008A590000BDC000-000000067F0000400200008A590000BE0000__0000005D2FFFFB38\n000000067F0000400200008A590000BDC000-000000067F0000400200008A590000BE0000__00000073AD3FE6B8\n000000067F0000400200008A590000BDC000-000000067F0000400200008A590000BE0000__000000914E3F38F0\n000000067F0000400200008A590000BDC000-000000067F0000400200008A590000BE0000__000000931B33AE68\n000000067F0000400200008A590000BDC000-000000067F0000400200008A590000BE0000__000000931B9AFDF8\n000000067F0000400200008A590000BDE8AA-000000067F0000400200008A590000BE7291__00000015967BE3A1-00000016362BE8F9\n000000067F0000400200008A590000BE0000-000000067F0000400200008A590000BE4000__00000016661DE360\n000000067F0000400200008A590000BE0000-000000067F0000400200008A590000BE4000__0000001C760FA190\n000000067F0000400200008A590000BE0000-000000067F0000400200008A590000BE4000__00000038E67ABFA0\n000000067F0000400200008A590000BE0000-000000067F0000400200008A590000BE4000__0000003903F1CFE8\n000000067F0000400200008A590000BE0000-000000067F0000400200008A590000BE4000__0000003B99F7F8A0\n000000067F0000400200008A590000BE0000-000000067F0000400200008A590000BE4000__0000005D2FFFFB38\n000000067F0000400200008A590000BE0000-000000067F0000400200008A590000BE4000__00000073AD3FE6B8\n000000067F0000400200008A590000BE0000-000000067F0000400200008A590000BE4000__000000914E3F38F0\n000000067F0000400200008A590000BE0000-000000067F0000400200008A590000BE4000__000000931B33AE68\n000000067F0000400200008A590000BE0000-000000067F0000400200008A590000BE4000__000000931B9AFDF8\n000000067F0000400200008A590000BE4000-000000067F0000400200008A590000BE8000__00000016661DE360\n000000067F0000400200008A590000BE4000-000000067F0000400200008A590000BE8000__0000001C760FA190\n000000067F0000400200008A590000BE4000-000000067F0000400200008A590000BE8000__00000038E67ABFA0\n000000067F0000400200008A590000BE4000-000000067F0000400200008A590000BE8000__0000003903F1CFE8\n000000067F0000400200008A590000BE4000-000000067F0000400200008A590000BE8000__0000003B99F7F8A0\n000000067F0000400200008A590000BE4000-000000067F0000400200008A590000BE8000__0000005D2FFFFB38\n000000067F0000400200008A590000BE4000-000000067F0000400200008A590000BE8000__00000073AD3FE6B8\n000000067F0000400200008A590000BE4000-000000067F0000400200008A590000BE8000__000000914E3F38F0\n000000067F0000400200008A590000BE4000-000000067F0000400200008A590000BE8000__000000931B33AE68\n000000067F0000400200008A590000BE4000-000000067F0000400200008A590000BE8000__000000931B9AFDF8\n000000067F0000400200008A590000BE7291-000000067F0000400200008A590000BEFC6C__00000015967BE3A1-00000016362BE8F9\n000000067F0000400200008A590000BE8000-000000067F0000400200008A590000BEC000__00000016661DE360\n000000067F0000400200008A590000BE8000-000000067F0000400200008A590000BEC000__0000001C760FA190\n000000067F0000400200008A590000BE8000-000000067F0000400200008A590000BEC000__00000038E67ABFA0\n000000067F0000400200008A590000BE8000-000000067F0000400200008A590000BEC000__0000003903F1CFE8\n000000067F0000400200008A590000BE8000-000000067F0000400200008A590000BEC000__0000003B99F7F8A0\n000000067F0000400200008A590000BE8000-000000067F0000400200008A590000BEC000__0000005D2FFFFB38\n000000067F0000400200008A590000BE8000-000000067F0000400200008A590000BEC000__00000073AD3FE6B8\n000000067F0000400200008A590000BE8000-000000067F0000400200008A590000BEC000__000000914E3F38F0\n000000067F0000400200008A590000BE8000-000000067F0000400200008A590000BEC000__000000931B33AE68\n000000067F0000400200008A590000BE8000-000000067F0000400200008A590000BEC000__000000931B9AFDF8\n000000067F0000400200008A590000BEC000-000000067F0000400200008A590000BF0000__00000016661DE360\n000000067F0000400200008A590000BEC000-000000067F0000400200008A590000BF0000__0000001C760FA190\n000000067F0000400200008A590000BEC000-000000067F0000400200008A590000BF0000__00000038E67ABFA0\n000000067F0000400200008A590000BEC000-000000067F0000400200008A590000BF0000__0000003903F1CFE8\n000000067F0000400200008A590000BEC000-000000067F0000400200008A590000BF0000__0000003B99F7F8A0\n000000067F0000400200008A590000BEC000-000000067F0000400200008A590000BF0000__0000005D2FFFFB38\n000000067F0000400200008A590000BEC000-000000067F0000400200008A590000BF0000__00000073AD3FE6B8\n000000067F0000400200008A590000BEC000-000000067F0000400200008A590000BF0000__000000914E3F38F0\n000000067F0000400200008A590000BEC000-000000067F0000400200008A590000BF0000__000000931B33AE68\n000000067F0000400200008A590000BEC000-000000067F0000400200008A590000BF0000__000000931B9AFDF8\n000000067F0000400200008A590000BEFC6C-000000067F0000400200008A590000BF8634__00000015967BE3A1-00000016362BE8F9\n000000067F0000400200008A590000BF0000-000000067F0000400200008A590000BF4000__00000016661DE360\n000000067F0000400200008A590000BF0000-000000067F0000400200008A590000BF4000__0000001C760FA190\n000000067F0000400200008A590000BF0000-000000067F0000400200008A590000BF4000__00000038E67ABFA0\n000000067F0000400200008A590000BF0000-000000067F0000400200008A590000BF4000__0000003903F1CFE8\n000000067F0000400200008A590000BF0000-000000067F0000400200008A590000BF4000__0000003B99F7F8A0\n000000067F0000400200008A590000BF0000-000000067F0000400200008A590000BF4000__0000005D2FFFFB38\n000000067F0000400200008A590000BF0000-000000067F0000400200008A590000BF4000__00000073AD3FE6B8\n000000067F0000400200008A590000BF0000-000000067F0000400200008A590000BF4000__000000914E3F38F0\n000000067F0000400200008A590000BF0000-000000067F0000400200008A590000BF4000__000000931B33AE68\n000000067F0000400200008A590000BF0000-000000067F0000400200008A590000BF4000__000000931B9AFDF8\n000000067F0000400200008A590000BF4000-000000067F0000400200008A590000BF8000__00000016661DE360\n000000067F0000400200008A590000BF4000-000000067F0000400200008A590000BF8000__0000001C760FA190\n000000067F0000400200008A590000BF4000-000000067F0000400200008A590000BF8000__00000038E67ABFA0\n000000067F0000400200008A590000BF4000-000000067F0000400200008A590000BF8000__0000003903F1CFE8\n000000067F0000400200008A590000BF4000-000000067F0000400200008A590000BF8000__0000003B99F7F8A0\n000000067F0000400200008A590000BF4000-000000067F0000400200008A590000BF8000__0000005D2FFFFB38\n000000067F0000400200008A590000BF4000-000000067F0000400200008A590000BF8000__00000073AD3FE6B8\n000000067F0000400200008A590000BF4000-000000067F0000400200008A590000BF8000__000000914E3F38F0\n000000067F0000400200008A590000BF4000-000000067F0000400200008A590000BF8000__000000931B33AE68\n000000067F0000400200008A590000BF4000-000000067F0000400200008A590000BF8000__000000931B9AFDF8\n000000067F0000400200008A590000BF8000-000000067F0000400200008A590000BFC000__00000016661DE360\n000000067F0000400200008A590000BF8000-000000067F0000400200008A590000BFC000__0000001C760FA190\n000000067F0000400200008A590000BF8000-000000067F0000400200008A590000BFC000__00000038E67ABFA0\n000000067F0000400200008A590000BF8000-000000067F0000400200008A590000BFC000__0000003903F1CFE8\n000000067F0000400200008A590000BF8000-000000067F0000400200008A590000BFC000__0000003B99F7F8A0\n000000067F0000400200008A590000BF8000-000000067F0000400200008A590000BFC000__0000005D2FFFFB38\n000000067F0000400200008A590000BF8000-000000067F0000400200008A590000BFC000__00000073AD3FE6B8\n000000067F0000400200008A590000BF8000-000000067F0000400200008A590000BFC000__000000914E3F38F0\n000000067F0000400200008A590000BF8000-000000067F0000400200008A590000BFC000__000000931B33AE68\n000000067F0000400200008A590000BF8000-000000067F0000400200008A590000BFC000__000000931B9AFDF8\n000000067F0000400200008A590000BF8634-000000067F0000400200008A590000C01008__00000015967BE3A1-00000016362BE8F9\n000000067F0000400200008A590000BFC000-000000067F0000400200008A590000C00000__00000016661DE360\n000000067F0000400200008A590000BFC000-000000067F0000400200008A590000C00000__0000001C760FA190\n000000067F0000400200008A590000BFC000-000000067F0000400200008A590000C00000__00000038E67ABFA0\n000000067F0000400200008A590000BFC000-000000067F0000400200008A590000C00000__0000003903F1CFE8\n000000067F0000400200008A590000BFC000-000000067F0000400200008A590000C00000__0000003B99F7F8A0\n000000067F0000400200008A590000BFC000-000000067F0000400200008A590000C00000__0000005D2FFFFB38\n000000067F0000400200008A590000BFC000-000000067F0000400200008A590000C00000__00000073AD3FE6B8\n000000067F0000400200008A590000BFC000-000000067F0000400200008A590000C00000__000000914E3F38F0\n000000067F0000400200008A590000BFC000-000000067F0000400200008A590000C00000__000000931B33AE68\n000000067F0000400200008A590000BFC000-000000067F0000400200008A590000C00000__000000931B9AFDF8\n000000067F0000400200008A590000C00000-000000067F0000400200008A590000C04000__00000016661DE360\n000000067F0000400200008A590000C00000-000000067F0000400200008A590000C04000__0000001C760FA190\n000000067F0000400200008A590000C00000-000000067F0000400200008A590000C04000__00000038E67ABFA0\n000000067F0000400200008A590000C00000-000000067F0000400200008A590000C04000__0000003903F1CFE8\n000000067F0000400200008A590000C00000-000000067F0000400200008A590000C04000__0000003B99F7F8A0\n000000067F0000400200008A590000C00000-000000067F0000400200008A590000C04000__0000005D2FFFFB38\n000000067F0000400200008A590000C00000-000000067F0000400200008A590000C04000__00000073AD3FE6B8\n000000067F0000400200008A590000C00000-000000067F0000400200008A590000C04000__000000914E3F38F0\n000000067F0000400200008A590000C00000-000000067F0000400200008A590000C04000__000000931B33AE68\n000000067F0000400200008A590000C00000-000000067F0000400200008A590000C04000__000000931B9AFDF8\n000000067F0000400200008A590000C01008-000000067F0000400200008A590100000000__00000015967BE3A1-00000016362BE8F9\n000000067F0000400200008A590000C012F5-000000067F0000400200008A590000C09CEB__00000016362BE8F9-00000016E5D3F7B9\n000000067F0000400200008A590000C04000-000000067F0000400200008A590000C08000__00000016661DE360\n000000067F0000400200008A590000C04000-000000067F0000400200008A590000C08000__0000001C760FA190\n000000067F0000400200008A590000C04000-000000067F0000400200008A590000C08000__00000038E67ABFA0\n000000067F0000400200008A590000C04000-000000067F0000400200008A590000C08000__0000003903F1CFE8\n000000067F0000400200008A590000C04000-000000067F0000400200008A590000C08000__0000003B99F7F8A0\n000000067F0000400200008A590000C04000-000000067F0000400200008A590000C08000__0000005D2FFFFB38\n000000067F0000400200008A590000C04000-000000067F0000400200008A590000C08000__00000073AD3FE6B8\n000000067F0000400200008A590000C04000-000000067F0000400200008A590000C08000__000000914E3F38F0\n000000067F0000400200008A590000C04000-000000067F0000400200008A590000C08000__000000931B33AE68\n000000067F0000400200008A590000C04000-000000067F0000400200008A590000C08000__000000931B9AFDF8\n000000067F0000400200008A590000C08000-000000067F0000400200008A590000C0C000__00000016661DE360\n000000067F0000400200008A590000C08000-000000067F0000400200008A590000C0C000__0000001C760FA190\n000000067F0000400200008A590000C08000-000000067F0000400200008A590000C0C000__00000038E67ABFA0\n000000067F0000400200008A590000C08000-000000067F0000400200008A590000C0C000__0000003903F1CFE8\n000000067F0000400200008A590000C08000-000000067F0000400200008A590000C0C000__0000003B99F7F8A0\n000000067F0000400200008A590000C08000-000000067F0000400200008A590000C0C000__0000005D2FFFFB38\n000000067F0000400200008A590000C08000-000000067F0000400200008A590000C0C000__00000073AD3FE6B8\n000000067F0000400200008A590000C08000-000000067F0000400200008A590000C0C000__000000914E3F38F0\n000000067F0000400200008A590000C08000-000000067F0000400200008A590000C0C000__000000931B33AE68\n000000067F0000400200008A590000C08000-000000067F0000400200008A590000C0C000__000000931B9AFDF8\n000000067F0000400200008A590000C09CEB-000000067F0000400200008A590000C126CC__00000016362BE8F9-00000016E5D3F7B9\n000000067F0000400200008A590000C0C000-000000067F0000400200008A590000C10000__00000016661DE360\n000000067F0000400200008A590000C0C000-000000067F0000400200008A590000C10000__0000001C760FA190\n000000067F0000400200008A590000C0C000-000000067F0000400200008A590000C10000__00000038E67ABFA0\n000000067F0000400200008A590000C0C000-000000067F0000400200008A590000C10000__0000003903F1CFE8\n000000067F0000400200008A590000C0C000-000000067F0000400200008A590000C10000__0000003B99F7F8A0\n000000067F0000400200008A590000C0C000-000000067F0000400200008A590000C10000__0000005D2FFFFB38\n000000067F0000400200008A590000C0C000-000000067F0000400200008A590000C10000__00000073AD3FE6B8\n000000067F0000400200008A590000C0C000-000000067F0000400200008A590000C10000__000000914E3F38F0\n000000067F0000400200008A590000C0C000-000000067F0000400200008A590000C10000__000000931B33AE68\n000000067F0000400200008A590000C0C000-000000067F0000400200008A590000C10000__000000931B9AFDF8\n000000067F0000400200008A590000C10000-000000067F0000400200008A590000C14000__00000016661DE360\n000000067F0000400200008A590000C10000-000000067F0000400200008A590000C14000__0000001C760FA190\n000000067F0000400200008A590000C10000-000000067F0000400200008A590000C14000__00000038E67ABFA0\n000000067F0000400200008A590000C10000-000000067F0000400200008A590000C14000__0000003903F1CFE8\n000000067F0000400200008A590000C10000-000000067F0000400200008A590000C14000__0000003B99F7F8A0\n000000067F0000400200008A590000C10000-000000067F0000400200008A590000C14000__0000005D2FFFFB38\n000000067F0000400200008A590000C10000-000000067F0000400200008A590000C14000__00000073AD3FE6B8\n000000067F0000400200008A590000C10000-000000067F0000400200008A590000C14000__000000914E3F38F0\n000000067F0000400200008A590000C10000-000000067F0000400200008A590000C14000__000000931B33AE68\n000000067F0000400200008A590000C10000-000000067F0000400200008A590000C14000__000000931B9AFDF8\n000000067F0000400200008A590000C126CC-000000067F0000400200008A590000C1B0AB__00000016362BE8F9-00000016E5D3F7B9\n000000067F0000400200008A590000C14000-000000067F0000400200008A590000C18000__00000016661DE360\n000000067F0000400200008A590000C14000-000000067F0000400200008A590000C18000__0000001C760FA190\n000000067F0000400200008A590000C14000-000000067F0000400200008A590000C18000__00000038E67ABFA0\n000000067F0000400200008A590000C14000-000000067F0000400200008A590000C18000__0000003903F1CFE8\n000000067F0000400200008A590000C14000-000000067F0000400200008A590000C18000__0000003B99F7F8A0\n000000067F0000400200008A590000C14000-000000067F0000400200008A590000C18000__0000005D2FFFFB38\n000000067F0000400200008A590000C14000-000000067F0000400200008A590000C18000__00000073AD3FE6B8\n000000067F0000400200008A590000C14000-000000067F0000400200008A590000C18000__000000914E3F38F0\n000000067F0000400200008A590000C14000-000000067F0000400200008A590000C18000__000000931B33AE68\n000000067F0000400200008A590000C14000-000000067F0000400200008A590000C18000__000000931B9AFDF8\n000000067F0000400200008A590000C18000-000000067F0000400200008A590000C1C000__0000001C760FA190\n000000067F0000400200008A590000C18000-000000067F0000400200008A590000C1C000__00000038E67ABFA0\n000000067F0000400200008A590000C18000-000000067F0000400200008A590000C1C000__0000003903F1CFE8\n000000067F0000400200008A590000C18000-000000067F0000400200008A590000C1C000__0000003B99F7F8A0\n000000067F0000400200008A590000C18000-000000067F0000400200008A590000C1C000__0000005D2FFFFB38\n000000067F0000400200008A590000C18000-000000067F0000400200008A590000C1C000__00000073AD3FE6B8\n000000067F0000400200008A590000C18000-000000067F0000400200008A590000C1C000__000000914E3F38F0\n000000067F0000400200008A590000C18000-000000067F0000400200008A590000C1C000__000000931B33AE68\n000000067F0000400200008A590000C18000-000000067F0000400200008A590000C1C000__000000931B9AFDF8\n000000067F0000400200008A590000C18000-030000000000000000000000000000000002__00000016661DE360\n000000067F0000400200008A590000C1B0AB-000000067F0000400200008A590000C23A86__00000016362BE8F9-00000016E5D3F7B9\n000000067F0000400200008A590000C1C000-000000067F0000400200008A590000C20000__0000001C760FA190\n000000067F0000400200008A590000C1C000-000000067F0000400200008A590000C20000__00000038E67ABFA0\n000000067F0000400200008A590000C1C000-000000067F0000400200008A590000C20000__0000003903F1CFE8\n000000067F0000400200008A590000C1C000-000000067F0000400200008A590000C20000__0000003B99F7F8A0\n000000067F0000400200008A590000C1C000-000000067F0000400200008A590000C20000__0000005D2FFFFB38\n000000067F0000400200008A590000C1C000-000000067F0000400200008A590000C20000__00000073AD3FE6B8\n000000067F0000400200008A590000C1C000-000000067F0000400200008A590000C20000__000000914E3F38F0\n000000067F0000400200008A590000C1C000-000000067F0000400200008A590000C20000__000000931B33AE68\n000000067F0000400200008A590000C1C000-000000067F0000400200008A590000C20000__000000931B9AFDF8\n000000067F0000400200008A590000C20000-000000067F0000400200008A590000C24000__0000001C760FA190\n000000067F0000400200008A590000C20000-000000067F0000400200008A590000C24000__00000038E67ABFA0\n000000067F0000400200008A590000C20000-000000067F0000400200008A590000C24000__0000003903F1CFE8\n000000067F0000400200008A590000C20000-000000067F0000400200008A590000C24000__0000003B99F7F8A0\n000000067F0000400200008A590000C20000-000000067F0000400200008A590000C24000__0000005D2FFFFB38\n000000067F0000400200008A590000C20000-000000067F0000400200008A590000C24000__00000073AD3FE6B8\n000000067F0000400200008A590000C20000-000000067F0000400200008A590000C24000__000000914E3F38F0\n000000067F0000400200008A590000C20000-000000067F0000400200008A590000C24000__000000931B33AE68\n000000067F0000400200008A590000C20000-000000067F0000400200008A590000C24000__000000931B9AFDF8\n000000067F0000400200008A590000C23A86-000000067F0000400200008A590000C2C466__00000016362BE8F9-00000016E5D3F7B9\n000000067F0000400200008A590000C24000-000000067F0000400200008A590000C28000__0000001C760FA190\n000000067F0000400200008A590000C24000-000000067F0000400200008A590000C28000__00000038E67ABFA0\n000000067F0000400200008A590000C24000-000000067F0000400200008A590000C28000__0000003903F1CFE8\n000000067F0000400200008A590000C24000-000000067F0000400200008A590000C28000__0000003B99F7F8A0\n000000067F0000400200008A590000C24000-000000067F0000400200008A590000C28000__0000005D2FFFFB38\n000000067F0000400200008A590000C24000-000000067F0000400200008A590000C28000__00000073AD3FE6B8\n000000067F0000400200008A590000C24000-000000067F0000400200008A590000C28000__000000914E3F38F0\n000000067F0000400200008A590000C24000-000000067F0000400200008A590000C28000__000000931B33AE68\n000000067F0000400200008A590000C24000-000000067F0000400200008A590000C28000__000000931B9AFDF8\n000000067F0000400200008A590000C28000-000000067F0000400200008A590000C2C000__0000001C760FA190\n000000067F0000400200008A590000C28000-000000067F0000400200008A590000C2C000__00000038E67ABFA0\n000000067F0000400200008A590000C28000-000000067F0000400200008A590000C2C000__0000003903F1CFE8\n000000067F0000400200008A590000C28000-000000067F0000400200008A590000C2C000__0000003B99F7F8A0\n000000067F0000400200008A590000C28000-000000067F0000400200008A590000C2C000__0000005D2FFFFB38\n000000067F0000400200008A590000C28000-000000067F0000400200008A590000C2C000__00000073AD3FE6B8\n000000067F0000400200008A590000C28000-000000067F0000400200008A590000C2C000__000000914E3F38F0\n000000067F0000400200008A590000C28000-000000067F0000400200008A590000C2C000__000000931B33AE68\n000000067F0000400200008A590000C28000-000000067F0000400200008A590000C2C000__000000931B9AFDF8\n000000067F0000400200008A590000C2C000-000000067F0000400200008A590000C30000__0000001C760FA190\n000000067F0000400200008A590000C2C000-000000067F0000400200008A590000C30000__00000038E67ABFA0\n000000067F0000400200008A590000C2C000-000000067F0000400200008A590000C30000__0000003903F1CFE8\n000000067F0000400200008A590000C2C000-000000067F0000400200008A590000C30000__0000003B99F7F8A0\n000000067F0000400200008A590000C2C000-000000067F0000400200008A590000C30000__0000005D2FFFFB38\n000000067F0000400200008A590000C2C000-000000067F0000400200008A590000C30000__00000073AD3FE6B8\n000000067F0000400200008A590000C2C000-000000067F0000400200008A590000C30000__000000914E3F38F0\n000000067F0000400200008A590000C2C000-000000067F0000400200008A590000C30000__000000931B33AE68\n000000067F0000400200008A590000C2C000-000000067F0000400200008A590000C30000__000000931B9AFDF8\n000000067F0000400200008A590000C2C466-000000067F0000400200008A590000C34E3E__00000016362BE8F9-00000016E5D3F7B9\n000000067F0000400200008A590000C30000-000000067F0000400200008A590000C34000__0000001C760FA190\n000000067F0000400200008A590000C30000-000000067F0000400200008A590000C34000__00000038E67ABFA0\n000000067F0000400200008A590000C30000-000000067F0000400200008A590000C34000__0000003903F1CFE8\n000000067F0000400200008A590000C30000-000000067F0000400200008A590000C34000__0000003B99F7F8A0\n000000067F0000400200008A590000C30000-000000067F0000400200008A590000C34000__0000005D2FFFFB38\n000000067F0000400200008A590000C30000-000000067F0000400200008A590000C34000__00000073AD3FE6B8\n000000067F0000400200008A590000C30000-000000067F0000400200008A590000C34000__000000914E3F38F0\n000000067F0000400200008A590000C30000-000000067F0000400200008A590000C34000__000000931B33AE68\n000000067F0000400200008A590000C30000-000000067F0000400200008A590000C34000__000000931B9AFDF8\n000000067F0000400200008A590000C34000-000000067F0000400200008A590000C38000__0000001C760FA190\n000000067F0000400200008A590000C34000-000000067F0000400200008A590000C38000__00000038E67ABFA0\n000000067F0000400200008A590000C34000-000000067F0000400200008A590000C38000__0000003903F1CFE8\n000000067F0000400200008A590000C34000-000000067F0000400200008A590000C38000__0000003B99F7F8A0\n000000067F0000400200008A590000C34000-000000067F0000400200008A590000C38000__0000005D2FFFFB38\n000000067F0000400200008A590000C34000-000000067F0000400200008A590000C38000__00000073AD3FE6B8\n000000067F0000400200008A590000C34000-000000067F0000400200008A590000C38000__000000914E3F38F0\n000000067F0000400200008A590000C34000-000000067F0000400200008A590000C38000__000000931B33AE68\n000000067F0000400200008A590000C34000-000000067F0000400200008A590000C38000__000000931B9AFDF8\n000000067F0000400200008A590000C34E3E-000000067F0000400200008A590000C3D814__00000016362BE8F9-00000016E5D3F7B9\n000000067F0000400200008A590000C38000-000000067F0000400200008A590000C3C000__0000001C760FA190\n000000067F0000400200008A590000C38000-000000067F0000400200008A590000C3C000__00000038E67ABFA0\n000000067F0000400200008A590000C38000-000000067F0000400200008A590000C3C000__0000003903F1CFE8\n000000067F0000400200008A590000C38000-000000067F0000400200008A590000C3C000__0000003B99F7F8A0\n000000067F0000400200008A590000C38000-000000067F0000400200008A590000C3C000__0000005D2FFFFB38\n000000067F0000400200008A590000C38000-000000067F0000400200008A590000C3C000__00000073AD3FE6B8\n000000067F0000400200008A590000C38000-000000067F0000400200008A590000C3C000__000000914E3F38F0\n000000067F0000400200008A590000C38000-000000067F0000400200008A590000C3C000__000000931B33AE68\n000000067F0000400200008A590000C38000-000000067F0000400200008A590000C3C000__000000931B9AFDF8\n000000067F0000400200008A590000C3C000-000000067F0000400200008A590000C40000__0000001C760FA190\n000000067F0000400200008A590000C3C000-000000067F0000400200008A590000C40000__00000038E67ABFA0\n000000067F0000400200008A590000C3C000-000000067F0000400200008A590000C40000__0000003903F1CFE8\n000000067F0000400200008A590000C3C000-000000067F0000400200008A590000C40000__0000003B99F7F8A0\n000000067F0000400200008A590000C3C000-000000067F0000400200008A590000C40000__0000005D2FFFFB38\n000000067F0000400200008A590000C3C000-000000067F0000400200008A590000C40000__00000073AD3FE6B8\n000000067F0000400200008A590000C3C000-000000067F0000400200008A590000C40000__000000914E3F38F0\n000000067F0000400200008A590000C3C000-000000067F0000400200008A590000C40000__000000931B33AE68\n000000067F0000400200008A590000C3C000-000000067F0000400200008A590000C40000__000000931B9AFDF8\n000000067F0000400200008A590000C3D814-000000067F0000400200008A590000C461F2__00000016362BE8F9-00000016E5D3F7B9\n000000067F0000400200008A590000C40000-000000067F0000400200008A590000C44000__0000001C760FA190\n000000067F0000400200008A590000C40000-000000067F0000400200008A590000C44000__00000038E67ABFA0\n000000067F0000400200008A590000C40000-000000067F0000400200008A590000C44000__0000003903F1CFE8\n000000067F0000400200008A590000C40000-000000067F0000400200008A590000C44000__0000003B99F7F8A0\n000000067F0000400200008A590000C40000-000000067F0000400200008A590000C44000__0000005D2FFFFB38\n000000067F0000400200008A590000C40000-000000067F0000400200008A590000C44000__00000073AD3FE6B8\n000000067F0000400200008A590000C40000-000000067F0000400200008A590000C44000__000000914E3F38F0\n000000067F0000400200008A590000C40000-000000067F0000400200008A590000C44000__000000931B33AE68\n000000067F0000400200008A590000C40000-000000067F0000400200008A590000C44000__000000931B9AFDF8\n000000067F0000400200008A590000C44000-000000067F0000400200008A590000C48000__0000001C760FA190\n000000067F0000400200008A590000C44000-000000067F0000400200008A590000C48000__00000038E67ABFA0\n000000067F0000400200008A590000C44000-000000067F0000400200008A590000C48000__0000003903F1CFE8\n000000067F0000400200008A590000C44000-000000067F0000400200008A590000C48000__0000003B99F7F8A0\n000000067F0000400200008A590000C44000-000000067F0000400200008A590000C48000__0000005D2FFFFB38\n000000067F0000400200008A590000C44000-000000067F0000400200008A590000C48000__00000073AD3FE6B8\n000000067F0000400200008A590000C44000-000000067F0000400200008A590000C48000__000000914E3F38F0\n000000067F0000400200008A590000C44000-000000067F0000400200008A590000C48000__000000931B33AE68\n000000067F0000400200008A590000C44000-000000067F0000400200008A590000C48000__000000931B9AFDF8\n000000067F0000400200008A590000C461F2-000000067F0000400200008A590000C4EBD4__00000016362BE8F9-00000016E5D3F7B9\n000000067F0000400200008A590000C48000-000000067F0000400200008A590000C4C000__0000001C760FA190\n000000067F0000400200008A590000C48000-000000067F0000400200008A590000C4C000__00000038E67ABFA0\n000000067F0000400200008A590000C48000-000000067F0000400200008A590000C4C000__0000003903F1CFE8\n000000067F0000400200008A590000C48000-000000067F0000400200008A590000C4C000__0000003B99F7F8A0\n000000067F0000400200008A590000C48000-000000067F0000400200008A590000C4C000__0000005D2FFFFB38\n000000067F0000400200008A590000C48000-000000067F0000400200008A590000C4C000__00000073AD3FE6B8\n000000067F0000400200008A590000C48000-000000067F0000400200008A590000C4C000__000000914E3F38F0\n000000067F0000400200008A590000C48000-000000067F0000400200008A590000C4C000__000000931B33AE68\n000000067F0000400200008A590000C48000-000000067F0000400200008A590000C4C000__000000931B9AFDF8\n000000067F0000400200008A590000C4C000-000000067F0000400200008A590000C50000__0000001C760FA190\n000000067F0000400200008A590000C4C000-000000067F0000400200008A590000C50000__00000038E67ABFA0\n000000067F0000400200008A590000C4C000-000000067F0000400200008A590000C50000__0000003903F1CFE8\n000000067F0000400200008A590000C4C000-000000067F0000400200008A590000C50000__0000003B99F7F8A0\n000000067F0000400200008A590000C4C000-000000067F0000400200008A590000C50000__0000005D2FFFFB38\n000000067F0000400200008A590000C4C000-000000067F0000400200008A590000C50000__00000073AD3FE6B8\n000000067F0000400200008A590000C4C000-000000067F0000400200008A590000C50000__000000914E3F38F0\n000000067F0000400200008A590000C4C000-000000067F0000400200008A590000C50000__000000931B9A2710\n000000067F0000400200008A590000C4EBD4-000000067F0000400200008A590000C575B6__00000016362BE8F9-00000016E5D3F7B9\n000000067F0000400200008A590000C50000-000000067F0000400200008A590000C54000__0000001C760FA190\n000000067F0000400200008A590000C50000-000000067F0000400200008A590000C54000__00000038E67ABFA0\n000000067F0000400200008A590000C50000-000000067F0000400200008A590000C54000__0000003903F1CFE8\n000000067F0000400200008A590000C50000-000000067F0000400200008A590000C54000__0000003B99F7F8A0\n000000067F0000400200008A590000C50000-000000067F0000400200008A590000C54000__0000005D2FFFFB38\n000000067F0000400200008A590000C50000-000000067F0000400200008A590000C54000__00000073AD3FE6B8\n000000067F0000400200008A590000C50000-000000067F0000400200008A590000C54000__000000914E3F38F0\n000000067F0000400200008A590000C50000-000000067F0000400200008A590000C54000__000000931B9A2710\n000000067F0000400200008A590000C54000-000000067F0000400200008A590000C58000__0000001C760FA190\n000000067F0000400200008A590000C54000-000000067F0000400200008A590000C58000__00000038E67ABFA0\n000000067F0000400200008A590000C54000-000000067F0000400200008A590000C58000__0000003903F1CFE8\n000000067F0000400200008A590000C54000-000000067F0000400200008A590000C58000__0000003B99F7F8A0\n000000067F0000400200008A590000C54000-000000067F0000400200008A590000C58000__0000005D2FFFFB38\n000000067F0000400200008A590000C54000-000000067F0000400200008A590000C58000__00000073AD3FE6B8\n000000067F0000400200008A590000C54000-000000067F0000400200008A590000C58000__000000914E3F38F0\n000000067F0000400200008A590000C54000-000000067F0000400200008A590000C58000__000000931B9A2710\n000000067F0000400200008A590000C575B6-000000067F0000400200008A590000C5FF90__00000016362BE8F9-00000016E5D3F7B9\n000000067F0000400200008A590000C58000-000000067F0000400200008A590000C5C000__0000001C760FA190\n000000067F0000400200008A590000C58000-000000067F0000400200008A590000C5C000__00000038E67ABFA0\n000000067F0000400200008A590000C58000-000000067F0000400200008A590000C5C000__0000003903F1CFE8\n000000067F0000400200008A590000C58000-000000067F0000400200008A590000C5C000__0000003B99F7F8A0\n000000067F0000400200008A590000C58000-000000067F0000400200008A590000C5C000__0000005D2FFFFB38\n000000067F0000400200008A590000C58000-000000067F0000400200008A590000C5C000__00000073AD3FE6B8\n000000067F0000400200008A590000C58000-000000067F0000400200008A590000C5C000__000000914E3F38F0\n000000067F0000400200008A590000C58000-000000067F0000400200008A590000C5C000__000000931B9A2710\n000000067F0000400200008A590000C5C000-000000067F0000400200008A590000C60000__0000001C760FA190\n000000067F0000400200008A590000C5C000-000000067F0000400200008A590000C60000__00000038E67ABFA0\n000000067F0000400200008A590000C5C000-000000067F0000400200008A590000C60000__0000003903F1CFE8\n000000067F0000400200008A590000C5C000-000000067F0000400200008A590000C60000__0000003B99F7F8A0\n000000067F0000400200008A590000C5C000-000000067F0000400200008A590000C60000__0000005D2FFFFB38\n000000067F0000400200008A590000C5C000-000000067F0000400200008A590000C60000__00000073AD3FE6B8\n000000067F0000400200008A590000C5C000-000000067F0000400200008A590000C60000__000000914E3F38F0\n000000067F0000400200008A590000C5C000-000000067F0000400200008A590000C60000__000000931B9A2710\n000000067F0000400200008A590000C5FF90-000000067F0000400200008A590100000000__00000016362BE8F9-00000016E5D3F7B9\n000000067F0000400200008A590000C60000-000000067F0000400200008A590000C64000__0000001C725A2400\n000000067F0000400200008A590000C60000-000000067F0000400200008A590000C64000__0000001C760FA190\n000000067F0000400200008A590000C60000-000000067F0000400200008A590000C64000__00000038E67ABFA0\n000000067F0000400200008A590000C60000-000000067F0000400200008A590000C64000__0000003903F1CFE8\n000000067F0000400200008A590000C60000-000000067F0000400200008A590000C64000__0000003B99F7F8A0\n000000067F0000400200008A590000C60000-000000067F0000400200008A590000C64000__0000005D2FFFFB38\n000000067F0000400200008A590000C60000-000000067F0000400200008A590000C64000__00000073AD3FE6B8\n000000067F0000400200008A590000C60000-000000067F0000400200008A590000C64000__000000914E3F38F0\n000000067F0000400200008A590000C60000-000000067F0000400200008A590000C64000__000000931B9A2710\n000000067F0000400200008A590000C60295-000000067F0000400200008A590000C68C70__00000016E5D3F7B9-000000178583EBE1\n000000067F0000400200008A590000C64000-000000067F0000400200008A590000C68000__0000001C725A2400\n000000067F0000400200008A590000C64000-000000067F0000400200008A590000C68000__0000001C760FA190\n000000067F0000400200008A590000C64000-000000067F0000400200008A590000C68000__00000038E67ABFA0\n000000067F0000400200008A590000C64000-000000067F0000400200008A590000C68000__0000003903F1CFE8\n000000067F0000400200008A590000C64000-000000067F0000400200008A590000C68000__0000003B99F7F8A0\n000000067F0000400200008A590000C64000-000000067F0000400200008A590000C68000__0000005D2FFFFB38\n000000067F0000400200008A590000C64000-000000067F0000400200008A590000C68000__00000073AD3FE6B8\n000000067F0000400200008A590000C64000-000000067F0000400200008A590000C68000__000000914E3F38F0\n000000067F0000400200008A590000C64000-000000067F0000400200008A590000C68000__000000931B9A2710\n000000067F0000400200008A590000C68000-000000067F0000400200008A590000C6C000__0000001C725A2400\n000000067F0000400200008A590000C68000-000000067F0000400200008A590000C6C000__0000001C760FA190\n000000067F0000400200008A590000C68000-000000067F0000400200008A590000C6C000__00000038E67ABFA0\n000000067F0000400200008A590000C68000-000000067F0000400200008A590000C6C000__0000003903F1CFE8\n000000067F0000400200008A590000C68000-000000067F0000400200008A590000C6C000__0000003B99F7F8A0\n000000067F0000400200008A590000C68000-000000067F0000400200008A590000C6C000__0000005D2FFFFB38\n000000067F0000400200008A590000C68000-000000067F0000400200008A590000C6C000__00000073AD3FE6B8\n000000067F0000400200008A590000C68000-000000067F0000400200008A590000C6C000__000000914E3F38F0\n000000067F0000400200008A590000C68000-000000067F0000400200008A590000C6C000__000000931B9A2710\n000000067F0000400200008A590000C68C70-000000067F0000400200008A590000C7164A__00000016E5D3F7B9-000000178583EBE1\n000000067F0000400200008A590000C6C000-000000067F0000400200008A590000C70000__0000001C725A2400\n000000067F0000400200008A590000C6C000-000000067F0000400200008A590000C70000__0000001C760FA190\n000000067F0000400200008A590000C6C000-000000067F0000400200008A590000C70000__00000038E67ABFA0\n000000067F0000400200008A590000C6C000-000000067F0000400200008A590000C70000__0000003903F1CFE8\n000000067F0000400200008A590000C6C000-000000067F0000400200008A590000C70000__0000003B99F7F8A0\n000000067F0000400200008A590000C6C000-000000067F0000400200008A590000C70000__0000005D2FFFFB38\n000000067F0000400200008A590000C6C000-000000067F0000400200008A590000C70000__00000073AD3FE6B8\n000000067F0000400200008A590000C6C000-000000067F0000400200008A590000C70000__000000914E3F38F0\n000000067F0000400200008A590000C6C000-000000067F0000400200008A590000C70000__000000931B9A2710\n000000067F0000400200008A590000C70000-000000067F0000400200008A590000C74000__0000001C725A2400\n000000067F0000400200008A590000C70000-000000067F0000400200008A590000C74000__0000001C760FA190\n000000067F0000400200008A590000C70000-000000067F0000400200008A590000C74000__00000038E67ABFA0\n000000067F0000400200008A590000C70000-000000067F0000400200008A590000C74000__0000003903F1CFE8\n000000067F0000400200008A590000C70000-000000067F0000400200008A590000C74000__0000003B99F7F8A0\n000000067F0000400200008A590000C70000-000000067F0000400200008A590000C74000__0000005D2FFFFB38\n000000067F0000400200008A590000C70000-000000067F0000400200008A590000C74000__00000073AD3FE6B8\n000000067F0000400200008A590000C70000-000000067F0000400200008A590000C74000__000000914E3F38F0\n000000067F0000400200008A590000C70000-000000067F0000400200008A590000C74000__000000931B9A2710\n000000067F0000400200008A590000C7164A-000000067F0000400200008A590000C7A01A__00000016E5D3F7B9-000000178583EBE1\n000000067F0000400200008A590000C74000-000000067F0000400200008A590000C78000__0000001C725A2400\n000000067F0000400200008A590000C74000-000000067F0000400200008A590000C78000__0000001C760FA190\n000000067F0000400200008A590000C74000-000000067F0000400200008A590000C78000__00000038E67ABFA0\n000000067F0000400200008A590000C74000-000000067F0000400200008A590000C78000__0000003903F1CFE8\n000000067F0000400200008A590000C74000-000000067F0000400200008A590000C78000__0000003B99F7F8A0\n000000067F0000400200008A590000C74000-000000067F0000400200008A590000C78000__0000005D2FFFFB38\n000000067F0000400200008A590000C74000-000000067F0000400200008A590000C78000__00000073AD3FE6B8\n000000067F0000400200008A590000C74000-000000067F0000400200008A590000C78000__000000914E3F38F0\n000000067F0000400200008A590000C74000-000000067F0000400200008A590000C78000__000000931B9A2710\n000000067F0000400200008A590000C78000-000000067F0000400200008A590000C7C000__0000001C725A2400\n000000067F0000400200008A590000C78000-000000067F0000400200008A590000C7C000__0000001C760FA190\n000000067F0000400200008A590000C78000-000000067F0000400200008A590000C7C000__00000038E67ABFA0\n000000067F0000400200008A590000C78000-000000067F0000400200008A590000C7C000__0000003903F1CFE8\n000000067F0000400200008A590000C78000-000000067F0000400200008A590000C7C000__0000003B99F7F8A0\n000000067F0000400200008A590000C78000-000000067F0000400200008A590000C7C000__0000005D2FFFFB38\n000000067F0000400200008A590000C78000-000000067F0000400200008A590000C7C000__00000073AD3FE6B8\n000000067F0000400200008A590000C78000-000000067F0000400200008A590000C7C000__000000914E3F38F0\n000000067F0000400200008A590000C78000-000000067F0000400200008A590000C7C000__000000931B9A2710\n000000067F0000400200008A590000C7A01A-000000067F0000400200008A590000C829F4__00000016E5D3F7B9-000000178583EBE1\n000000067F0000400200008A590000C7C000-000000067F0000400200008A590000C80000__0000001C725A2400\n000000067F0000400200008A590000C7C000-000000067F0000400200008A590000C80000__0000001C760FA190\n000000067F0000400200008A590000C7C000-000000067F0000400200008A590000C80000__00000038E67ABFA0\n000000067F0000400200008A590000C7C000-000000067F0000400200008A590000C80000__0000003903F1CFE8\n000000067F0000400200008A590000C7C000-000000067F0000400200008A590000C80000__0000003B99F7F8A0\n000000067F0000400200008A590000C7C000-000000067F0000400200008A590000C80000__0000005D2FFFFB38\n000000067F0000400200008A590000C7C000-000000067F0000400200008A590000C80000__00000073AD3FE6B8\n000000067F0000400200008A590000C7C000-000000067F0000400200008A590000C80000__000000914E3F38F0\n000000067F0000400200008A590000C7C000-000000067F0000400200008A590000C80000__000000931B9A2710\n000000067F0000400200008A590000C80000-000000067F0000400200008A590000C84000__0000001C725A2400\n000000067F0000400200008A590000C80000-000000067F0000400200008A590000C84000__0000001C760FA190\n000000067F0000400200008A590000C80000-000000067F0000400200008A590000C84000__00000038E67ABFA0\n000000067F0000400200008A590000C80000-000000067F0000400200008A590000C84000__0000003903F1CFE8\n000000067F0000400200008A590000C80000-000000067F0000400200008A590000C84000__0000003B99F7F8A0\n000000067F0000400200008A590000C80000-000000067F0000400200008A590000C84000__0000005D2FFFFB38\n000000067F0000400200008A590000C80000-000000067F0000400200008A590000C84000__00000073AD3FE6B8\n000000067F0000400200008A590000C80000-000000067F0000400200008A590000C84000__000000914E3F38F0\n000000067F0000400200008A590000C80000-000000067F0000400200008A590000C84000__000000931B9A2710\n000000067F0000400200008A590000C829F4-000000067F0000400200008A590000C8B3D9__00000016E5D3F7B9-000000178583EBE1\n000000067F0000400200008A590000C84000-000000067F0000400200008A590000C88000__0000001C725A2400\n000000067F0000400200008A590000C84000-000000067F0000400200008A590000C88000__0000001C760FA190\n000000067F0000400200008A590000C84000-000000067F0000400200008A590000C88000__00000038E67ABFA0\n000000067F0000400200008A590000C84000-000000067F0000400200008A590000C88000__0000003903F1CFE8\n000000067F0000400200008A590000C84000-000000067F0000400200008A590000C88000__0000003B99F7F8A0\n000000067F0000400200008A590000C84000-000000067F0000400200008A590000C88000__0000005D2FFFFB38\n000000067F0000400200008A590000C84000-000000067F0000400200008A590000C88000__00000073AD3FE6B8\n000000067F0000400200008A590000C84000-000000067F0000400200008A590000C88000__000000914E3F38F0\n000000067F0000400200008A590000C84000-000000067F0000400200008A590000C88000__000000931B9A2710\n000000067F0000400200008A590000C88000-000000067F0000400200008A590000C8C000__0000001C725A2400\n000000067F0000400200008A590000C88000-000000067F0000400200008A590000C8C000__0000001C760FA190\n000000067F0000400200008A590000C88000-000000067F0000400200008A590000C8C000__00000038E67ABFA0\n000000067F0000400200008A590000C88000-000000067F0000400200008A590000C8C000__0000003903F1CFE8\n000000067F0000400200008A590000C88000-000000067F0000400200008A590000C8C000__0000003B99F7F8A0\n000000067F0000400200008A590000C88000-000000067F0000400200008A590000C8C000__0000005D2FFFFB38\n000000067F0000400200008A590000C88000-000000067F0000400200008A590000C8C000__00000073AD3FE6B8\n000000067F0000400200008A590000C88000-000000067F0000400200008A590000C8C000__000000914E3F38F0\n000000067F0000400200008A590000C88000-000000067F0000400200008A590000C8C000__000000931B9A2710\n000000067F0000400200008A590000C8B3D9-000000067F0000400200008A590000C93DC1__00000016E5D3F7B9-000000178583EBE1\n000000067F0000400200008A590000C8C000-000000067F0000400200008A590000C90000__0000001C725A2400\n000000067F0000400200008A590000C8C000-000000067F0000400200008A590000C90000__0000001C760FA190\n000000067F0000400200008A590000C8C000-000000067F0000400200008A590000C90000__00000038E67ABFA0\n000000067F0000400200008A590000C8C000-000000067F0000400200008A590000C90000__0000003903F1CFE8\n000000067F0000400200008A590000C8C000-000000067F0000400200008A590000C90000__0000003B99F7F8A0\n000000067F0000400200008A590000C8C000-000000067F0000400200008A590000C90000__0000005D2FFFFB38\n000000067F0000400200008A590000C8C000-000000067F0000400200008A590000C90000__00000073AD3FE6B8\n000000067F0000400200008A590000C8C000-000000067F0000400200008A590000C90000__000000914E3F38F0\n000000067F0000400200008A590000C8C000-000000067F0000400200008A590000C90000__000000931B9A2710\n000000067F0000400200008A590000C90000-000000067F0000400200008A590000C94000__0000001C725A2400\n000000067F0000400200008A590000C90000-000000067F0000400200008A590000C94000__0000001C760FA190\n000000067F0000400200008A590000C90000-000000067F0000400200008A590000C94000__00000038E67ABFA0\n000000067F0000400200008A590000C90000-000000067F0000400200008A590000C94000__0000003903F1CFE8\n000000067F0000400200008A590000C90000-000000067F0000400200008A590000C94000__0000003B99F7F8A0\n000000067F0000400200008A590000C90000-000000067F0000400200008A590000C94000__0000005D2FFFFB38\n000000067F0000400200008A590000C90000-000000067F0000400200008A590000C94000__00000073AD3FE6B8\n000000067F0000400200008A590000C90000-000000067F0000400200008A590000C94000__000000914E3F38F0\n000000067F0000400200008A590000C90000-000000067F0000400200008A590000C94000__000000931B9A2710\n000000067F0000400200008A590000C93DC1-000000067F0000400200008A590000C9C79F__00000016E5D3F7B9-000000178583EBE1\n000000067F0000400200008A590000C94000-000000067F0000400200008A590000C98000__0000001C725A2400\n000000067F0000400200008A590000C94000-000000067F0000400200008A590000C98000__0000001C760FA190\n000000067F0000400200008A590000C94000-000000067F0000400200008A590000C98000__00000038E67ABFA0\n000000067F0000400200008A590000C94000-000000067F0000400200008A590000C98000__0000003903F1CFE8\n000000067F0000400200008A590000C94000-000000067F0000400200008A590000C98000__0000003B99F7F8A0\n000000067F0000400200008A590000C94000-000000067F0000400200008A590000C98000__0000005D2FFFFB38\n000000067F0000400200008A590000C94000-000000067F0000400200008A590000C98000__00000073AD3FE6B8\n000000067F0000400200008A590000C94000-000000067F0000400200008A590000C98000__000000914E3F38F0\n000000067F0000400200008A590000C94000-000000067F0000400200008A590000C98000__000000931B9A2710\n000000067F0000400200008A590000C98000-000000067F0000400200008A590000C9C000__0000001C725A2400\n000000067F0000400200008A590000C98000-000000067F0000400200008A590000C9C000__0000001C760FA190\n000000067F0000400200008A590000C98000-000000067F0000400200008A590000C9C000__00000038E67ABFA0\n000000067F0000400200008A590000C98000-000000067F0000400200008A590000C9C000__0000003903F1CFE8\n000000067F0000400200008A590000C98000-000000067F0000400200008A590000C9C000__0000003B99F7F8A0\n000000067F0000400200008A590000C98000-000000067F0000400200008A590000C9C000__0000005D2FFFFB38\n000000067F0000400200008A590000C98000-000000067F0000400200008A590000C9C000__00000073AD3FE6B8\n000000067F0000400200008A590000C98000-000000067F0000400200008A590000C9C000__000000914E3F38F0\n000000067F0000400200008A590000C98000-000000067F0000400200008A590000C9C000__000000931B9A2710\n000000067F0000400200008A590000C9C000-000000067F0000400200008A590000CA0000__0000001C725A2400\n000000067F0000400200008A590000C9C000-000000067F0000400200008A590000CA0000__0000001C760FA190\n000000067F0000400200008A590000C9C000-000000067F0000400200008A590000CA0000__00000038E67ABFA0\n000000067F0000400200008A590000C9C000-000000067F0000400200008A590000CA0000__0000003903F1CFE8\n000000067F0000400200008A590000C9C000-000000067F0000400200008A590000CA0000__0000003B99F7F8A0\n000000067F0000400200008A590000C9C000-000000067F0000400200008A590000CA0000__0000005D2FFFFB38\n000000067F0000400200008A590000C9C000-000000067F0000400200008A590000CA0000__00000073AD3FE6B8\n000000067F0000400200008A590000C9C000-000000067F0000400200008A590000CA0000__000000914E3F38F0\n000000067F0000400200008A590000C9C000-000000067F0000400200008A590000CA0000__000000931B9A2710\n000000067F0000400200008A590000C9C79F-000000067F0000400200008A590000CA5172__00000016E5D3F7B9-000000178583EBE1\n000000067F0000400200008A590000CA0000-000000067F0000400200008A590000CA4000__0000001C725A2400\n000000067F0000400200008A590000CA0000-000000067F0000400200008A590000CA4000__0000001C760FA190\n000000067F0000400200008A590000CA0000-000000067F0000400200008A590000CA4000__00000038E67ABFA0\n000000067F0000400200008A590000CA0000-000000067F0000400200008A590000CA4000__0000003903F1CFE8\n000000067F0000400200008A590000CA0000-000000067F0000400200008A590000CA4000__0000003B99F7F8A0\n000000067F0000400200008A590000CA0000-000000067F0000400200008A590000CA4000__0000005D2FFFFB38\n000000067F0000400200008A590000CA0000-000000067F0000400200008A590000CA4000__00000073AD3FE6B8\n000000067F0000400200008A590000CA0000-000000067F0000400200008A590000CA4000__000000914E3F38F0\n000000067F0000400200008A590000CA0000-000000067F0000400200008A590000CA4000__000000931B9A2710\n000000067F0000400200008A590000CA4000-000000067F0000400200008A590000CA8000__0000001C725A2400\n000000067F0000400200008A590000CA4000-000000067F0000400200008A590000CA8000__0000001C760FA190\n000000067F0000400200008A590000CA4000-000000067F0000400200008A590000CA8000__00000038E67ABFA0\n000000067F0000400200008A590000CA4000-000000067F0000400200008A590000CA8000__0000003903F1CFE8\n000000067F0000400200008A590000CA4000-000000067F0000400200008A590000CA8000__0000003B99F7F8A0\n000000067F0000400200008A590000CA4000-000000067F0000400200008A590000CA8000__0000005D2FFFFB38\n000000067F0000400200008A590000CA4000-000000067F0000400200008A590000CA8000__00000073AD3FE6B8\n000000067F0000400200008A590000CA4000-000000067F0000400200008A590000CA8000__000000914E3F38F0\n000000067F0000400200008A590000CA4000-000000067F0000400200008A590000CA8000__000000931B9A2710\n000000067F0000400200008A590000CA5172-000000067F0000400200008A590000CADB56__00000016E5D3F7B9-000000178583EBE1\n000000067F0000400200008A590000CA8000-000000067F0000400200008A590000CAC000__0000001C725A2400\n000000067F0000400200008A590000CA8000-000000067F0000400200008A590000CAC000__0000001C760FA190\n000000067F0000400200008A590000CA8000-000000067F0000400200008A590000CAC000__00000038E67ABFA0\n000000067F0000400200008A590000CA8000-000000067F0000400200008A590000CAC000__0000003903F1CFE8\n000000067F0000400200008A590000CA8000-000000067F0000400200008A590000CAC000__0000003B99F7F8A0\n000000067F0000400200008A590000CA8000-000000067F0000400200008A590000CAC000__0000005D2FFFFB38\n000000067F0000400200008A590000CA8000-000000067F0000400200008A590000CAC000__00000073AD3FE6B8\n000000067F0000400200008A590000CA8000-000000067F0000400200008A590000CAC000__000000914E3F38F0\n000000067F0000400200008A590000CA8000-000000067F0000400200008A590000CAC000__000000931B9A2710\n000000067F0000400200008A590000CAC000-000000067F0000400200008A590000CB0000__0000001C725A2400\n000000067F0000400200008A590000CAC000-000000067F0000400200008A590000CB0000__0000001C760FA190\n000000067F0000400200008A590000CAC000-000000067F0000400200008A590000CB0000__00000038E67ABFA0\n000000067F0000400200008A590000CAC000-000000067F0000400200008A590000CB0000__0000003903F1CFE8\n000000067F0000400200008A590000CAC000-000000067F0000400200008A590000CB0000__0000003B99F7F8A0\n000000067F0000400200008A590000CAC000-000000067F0000400200008A590000CB0000__0000005D2FFFFB38\n000000067F0000400200008A590000CAC000-000000067F0000400200008A590000CB0000__00000073AD3FE6B8\n000000067F0000400200008A590000CAC000-000000067F0000400200008A590000CB0000__000000914E3F38F0\n000000067F0000400200008A590000CAC000-000000067F0000400200008A590000CB0000__000000931B9A2710\n000000067F0000400200008A590000CADB56-000000067F0000400200008A590000CB652D__00000016E5D3F7B9-000000178583EBE1\n000000067F0000400200008A590000CB0000-000000067F0000400200008A590000CB4000__0000001C725A2400\n000000067F0000400200008A590000CB0000-000000067F0000400200008A590000CB4000__0000001C760FA190\n000000067F0000400200008A590000CB0000-000000067F0000400200008A590000CB4000__00000038E67ABFA0\n000000067F0000400200008A590000CB0000-000000067F0000400200008A590000CB4000__0000003903F1CFE8\n000000067F0000400200008A590000CB0000-000000067F0000400200008A590000CB4000__0000003B99F7F8A0\n000000067F0000400200008A590000CB0000-000000067F0000400200008A590000CB4000__0000005D2FFFFB38\n000000067F0000400200008A590000CB0000-000000067F0000400200008A590000CB4000__00000073AD3FE6B8\n000000067F0000400200008A590000CB0000-000000067F0000400200008A590000CB4000__000000914E3F38F0\n000000067F0000400200008A590000CB0000-000000067F0000400200008A590000CB4000__000000931B9A2710\n000000067F0000400200008A590000CB4000-000000067F0000400200008A590000CB8000__000000184D31F520\n000000067F0000400200008A590000CB4000-000000067F0000400200008A590000CB8000__0000001C760FA190\n000000067F0000400200008A590000CB4000-000000067F0000400200008A590000CB8000__00000038E67ABFA0\n000000067F0000400200008A590000CB4000-000000067F0000400200008A590000CB8000__0000003903F1CFE8\n000000067F0000400200008A590000CB4000-000000067F0000400200008A590000CB8000__0000003B99F7F8A0\n000000067F0000400200008A590000CB4000-000000067F0000400200008A590000CB8000__0000005D2FFFFB38\n000000067F0000400200008A590000CB4000-000000067F0000400200008A590000CB8000__00000073AD3FE6B8\n000000067F0000400200008A590000CB4000-000000067F0000400200008A590000CB8000__000000914E3F38F0\n000000067F0000400200008A590000CB4000-000000067F0000400200008A590000CB8000__000000931B9A2710\n000000067F0000400200008A590000CB652D-000000067F0000400200008A590100000000__00000016E5D3F7B9-000000178583EBE1\n000000067F0000400200008A590000CB67FC-000000067F0000400200008A590000CBF1E3__000000178583EBE1-000000182533E779\n000000067F0000400200008A590000CB8000-000000067F0000400200008A590000CBC000__000000184D31F520\n000000067F0000400200008A590000CB8000-000000067F0000400200008A590000CBC000__0000001C760FA190\n000000067F0000400200008A590000CB8000-000000067F0000400200008A590000CBC000__00000038E67ABFA0\n000000067F0000400200008A590000CB8000-000000067F0000400200008A590000CBC000__0000003903F1CFE8\n000000067F0000400200008A590000CB8000-000000067F0000400200008A590000CBC000__0000003B99F7F8A0\n000000067F0000400200008A590000CB8000-000000067F0000400200008A590000CBC000__0000005D2FFFFB38\n000000067F0000400200008A590000CB8000-000000067F0000400200008A590000CBC000__00000073AD3FE6B8\n000000067F0000400200008A590000CB8000-000000067F0000400200008A590000CBC000__000000914E3F38F0\n000000067F0000400200008A590000CB8000-000000067F0000400200008A590000CBC000__000000931B9A2710\n000000067F0000400200008A590000CBC000-000000067F0000400200008A590000CC0000__000000184D31F520\n000000067F0000400200008A590000CBC000-000000067F0000400200008A590000CC0000__0000001C760FA190\n000000067F0000400200008A590000CBC000-000000067F0000400200008A590000CC0000__00000038E67ABFA0\n000000067F0000400200008A590000CBC000-000000067F0000400200008A590000CC0000__0000003903F1CFE8\n000000067F0000400200008A590000CBC000-000000067F0000400200008A590000CC0000__0000003B99F7F8A0\n000000067F0000400200008A590000CBC000-000000067F0000400200008A590000CC0000__0000005D2FFFFB38\n000000067F0000400200008A590000CBC000-000000067F0000400200008A590000CC0000__00000073AD3FE6B8\n000000067F0000400200008A590000CBC000-000000067F0000400200008A590000CC0000__000000914E3F38F0\n000000067F0000400200008A590000CBC000-000000067F0000400200008A590000CC0000__000000931B9A2710\n000000067F0000400200008A590000CBF1E3-000000067F0000400200008A590000CC7BC5__000000178583EBE1-000000182533E779\n000000067F0000400200008A590000CC0000-000000067F0000400200008A590000CC4000__000000184D31F520\n000000067F0000400200008A590000CC0000-000000067F0000400200008A590000CC4000__0000001C760FA190\n000000067F0000400200008A590000CC0000-000000067F0000400200008A590000CC4000__00000038E67ABFA0\n000000067F0000400200008A590000CC0000-000000067F0000400200008A590000CC4000__0000003903F1CFE8\n000000067F0000400200008A590000CC0000-000000067F0000400200008A590000CC4000__0000003B99F7F8A0\n000000067F0000400200008A590000CC0000-000000067F0000400200008A590000CC4000__0000005D2FFFFB38\n000000067F0000400200008A590000CC0000-000000067F0000400200008A590000CC4000__00000073AD3FE6B8\n000000067F0000400200008A590000CC0000-000000067F0000400200008A590000CC4000__000000914E3F38F0\n000000067F0000400200008A590000CC0000-000000067F0000400200008A590000CC4000__000000931B9A2710\n000000067F0000400200008A590000CC4000-000000067F0000400200008A590000CC8000__000000184D31F520\n000000067F0000400200008A590000CC4000-000000067F0000400200008A590000CC8000__0000001C760FA190\n000000067F0000400200008A590000CC4000-000000067F0000400200008A590000CC8000__00000038E67ABFA0\n000000067F0000400200008A590000CC4000-000000067F0000400200008A590000CC8000__0000003903F1CFE8\n000000067F0000400200008A590000CC4000-000000067F0000400200008A590000CC8000__0000003B99F7F8A0\n000000067F0000400200008A590000CC4000-000000067F0000400200008A590000CC8000__0000005D2FFFFB38\n000000067F0000400200008A590000CC4000-000000067F0000400200008A590000CC8000__00000073AD3FE6B8\n000000067F0000400200008A590000CC4000-000000067F0000400200008A590000CC8000__000000914E3F38F0\n000000067F0000400200008A590000CC4000-000000067F0000400200008A590000CC8000__000000931B9A2710\n000000067F0000400200008A590000CC7BC5-000000067F0000400200008A590000CD05AA__000000178583EBE1-000000182533E779\n000000067F0000400200008A590000CC8000-000000067F0000400200008A590000CCC000__000000184D31F520\n000000067F0000400200008A590000CC8000-000000067F0000400200008A590000CCC000__0000001C760FA190\n000000067F0000400200008A590000CC8000-000000067F0000400200008A590000CCC000__00000038E67ABFA0\n000000067F0000400200008A590000CC8000-000000067F0000400200008A590000CCC000__0000003903F1CFE8\n000000067F0000400200008A590000CC8000-000000067F0000400200008A590000CCC000__0000003B99F7F8A0\n000000067F0000400200008A590000CC8000-000000067F0000400200008A590000CCC000__0000005D2FFFFB38\n000000067F0000400200008A590000CC8000-000000067F0000400200008A590000CCC000__00000073AD3FE6B8\n000000067F0000400200008A590000CC8000-000000067F0000400200008A590000CCC000__000000914E3F38F0\n000000067F0000400200008A590000CC8000-000000067F0000400200008A590000CCC000__000000931B9A2710\n000000067F0000400200008A590000CCC000-000000067F0000400200008A590000CD0000__000000184D31F520\n000000067F0000400200008A590000CCC000-000000067F0000400200008A590000CD0000__0000001C760FA190\n000000067F0000400200008A590000CCC000-000000067F0000400200008A590000CD0000__00000038E67ABFA0\n000000067F0000400200008A590000CCC000-000000067F0000400200008A590000CD0000__0000003903F1CFE8\n000000067F0000400200008A590000CCC000-000000067F0000400200008A590000CD0000__0000003B99F7F8A0\n000000067F0000400200008A590000CCC000-000000067F0000400200008A590000CD0000__0000005D2FFFFB38\n000000067F0000400200008A590000CCC000-000000067F0000400200008A590000CD0000__00000073AD3FE6B8\n000000067F0000400200008A590000CCC000-000000067F0000400200008A590000CD0000__000000914E3F38F0\n000000067F0000400200008A590000CCC000-000000067F0000400200008A590000CD0000__000000931B9A2710\n000000067F0000400200008A590000CD0000-000000067F0000400200008A590000CD4000__000000184D31F520\n000000067F0000400200008A590000CD0000-000000067F0000400200008A590000CD4000__0000001C760FA190\n000000067F0000400200008A590000CD0000-000000067F0000400200008A590000CD4000__00000038E67ABFA0\n000000067F0000400200008A590000CD0000-000000067F0000400200008A590000CD4000__0000003903F1CFE8\n000000067F0000400200008A590000CD0000-000000067F0000400200008A590000CD4000__0000003B99F7F8A0\n000000067F0000400200008A590000CD0000-000000067F0000400200008A590000CD4000__0000005D2FFFFB38\n000000067F0000400200008A590000CD0000-000000067F0000400200008A590000CD4000__00000073AD3FE6B8\n000000067F0000400200008A590000CD0000-000000067F0000400200008A590000CD4000__000000914E3F38F0\n000000067F0000400200008A590000CD0000-000000067F0000400200008A590000CD4000__000000931B9A2710\n000000067F0000400200008A590000CD05AA-000000067F0000400200008A590000CD8F85__000000178583EBE1-000000182533E779\n000000067F0000400200008A590000CD4000-000000067F0000400200008A590000CD8000__000000184D31F520\n000000067F0000400200008A590000CD4000-000000067F0000400200008A590000CD8000__0000001C760FA190\n000000067F0000400200008A590000CD4000-000000067F0000400200008A590000CD8000__00000038E67ABFA0\n000000067F0000400200008A590000CD4000-000000067F0000400200008A590000CD8000__0000003903F1CFE8\n000000067F0000400200008A590000CD4000-000000067F0000400200008A590000CD8000__0000003B99F7F8A0\n000000067F0000400200008A590000CD4000-000000067F0000400200008A590000CD8000__0000005D2FFFFB38\n000000067F0000400200008A590000CD4000-000000067F0000400200008A590000CD8000__00000073AD3FE6B8\n000000067F0000400200008A590000CD4000-000000067F0000400200008A590000CD8000__000000914E3F38F0\n000000067F0000400200008A590000CD4000-000000067F0000400200008A590000CD8000__000000931B9A2710\n000000067F0000400200008A590000CD8000-000000067F0000400200008A590000CDC000__000000184D31F520\n000000067F0000400200008A590000CD8000-000000067F0000400200008A590000CDC000__0000001C760FA190\n000000067F0000400200008A590000CD8000-000000067F0000400200008A590000CDC000__00000038E67ABFA0\n000000067F0000400200008A590000CD8000-000000067F0000400200008A590000CDC000__0000003903F1CFE8\n000000067F0000400200008A590000CD8000-000000067F0000400200008A590000CDC000__0000003B99F7F8A0\n000000067F0000400200008A590000CD8000-000000067F0000400200008A590000CDC000__0000005D2FFFFB38\n000000067F0000400200008A590000CD8000-000000067F0000400200008A590000CDC000__00000073AD3FE6B8\n000000067F0000400200008A590000CD8000-000000067F0000400200008A590000CDC000__000000914E3F38F0\n000000067F0000400200008A590000CD8000-000000067F0000400200008A590000CDC000__000000931B9A2710\n000000067F0000400200008A590000CD8F85-000000067F0000400200008A590000CE195A__000000178583EBE1-000000182533E779\n000000067F0000400200008A590000CDC000-000000067F0000400200008A590000CE0000__000000184D31F520\n000000067F0000400200008A590000CDC000-000000067F0000400200008A590000CE0000__0000001C760FA190\n000000067F0000400200008A590000CDC000-000000067F0000400200008A590000CE0000__00000038E67ABFA0\n000000067F0000400200008A590000CDC000-000000067F0000400200008A590000CE0000__0000003903F1CFE8\n000000067F0000400200008A590000CDC000-000000067F0000400200008A590000CE0000__0000003B99F7F8A0\n000000067F0000400200008A590000CDC000-000000067F0000400200008A590000CE0000__0000005D2FFFFB38\n000000067F0000400200008A590000CDC000-000000067F0000400200008A590000CE0000__00000073AD3FE6B8\n000000067F0000400200008A590000CDC000-000000067F0000400200008A590000CE0000__000000914E3F38F0\n000000067F0000400200008A590000CDC000-000000067F0000400200008A590000CE0000__000000931B9A2710\n000000067F0000400200008A590000CE0000-000000067F0000400200008A590000CE4000__000000184D31F520\n000000067F0000400200008A590000CE0000-000000067F0000400200008A590000CE4000__0000001C760FA190\n000000067F0000400200008A590000CE0000-000000067F0000400200008A590000CE4000__00000038E67ABFA0\n000000067F0000400200008A590000CE0000-000000067F0000400200008A590000CE4000__0000003903F1CFE8\n000000067F0000400200008A590000CE0000-000000067F0000400200008A590000CE4000__0000003B99F7F8A0\n000000067F0000400200008A590000CE0000-000000067F0000400200008A590000CE4000__0000005D2FFFFB38\n000000067F0000400200008A590000CE0000-000000067F0000400200008A590000CE4000__00000073AD3FE6B8\n000000067F0000400200008A590000CE0000-000000067F0000400200008A590000CE4000__000000914E3F38F0\n000000067F0000400200008A590000CE0000-000000067F0000400200008A590000CE4000__000000931B9A2710\n000000067F0000400200008A590000CE195A-000000067F0000400200008A590000CEA33F__000000178583EBE1-000000182533E779\n000000067F0000400200008A590000CE4000-000000067F0000400200008A590000CE8000__000000184D31F520\n000000067F0000400200008A590000CE4000-000000067F0000400200008A590000CE8000__0000001C760FA190\n000000067F0000400200008A590000CE4000-000000067F0000400200008A590000CE8000__00000038E67ABFA0\n000000067F0000400200008A590000CE4000-000000067F0000400200008A590000CE8000__0000003903F1CFE8\n000000067F0000400200008A590000CE4000-000000067F0000400200008A590000CE8000__0000003B99F7F8A0\n000000067F0000400200008A590000CE4000-000000067F0000400200008A590000CE8000__0000005D2FFFFB38\n000000067F0000400200008A590000CE4000-000000067F0000400200008A590000CE8000__00000073AD3FE6B8\n000000067F0000400200008A590000CE4000-000000067F0000400200008A590000CE8000__000000914E3F38F0\n000000067F0000400200008A590000CE4000-000000067F0000400200008A590000CE8000__000000931B9A2710\n000000067F0000400200008A590000CE8000-000000067F0000400200008A590000CEC000__000000184D31F520\n000000067F0000400200008A590000CE8000-000000067F0000400200008A590000CEC000__0000001C760FA190\n000000067F0000400200008A590000CE8000-000000067F0000400200008A590000CEC000__00000038E67ABFA0\n000000067F0000400200008A590000CE8000-000000067F0000400200008A590000CEC000__0000003903F1CFE8\n000000067F0000400200008A590000CE8000-000000067F0000400200008A590000CEC000__0000003B99F7F8A0\n000000067F0000400200008A590000CE8000-000000067F0000400200008A590000CEC000__0000005D2FFFFB38\n000000067F0000400200008A590000CE8000-000000067F0000400200008A590000CEC000__00000073AD3FE6B8\n000000067F0000400200008A590000CE8000-000000067F0000400200008A590000CEC000__000000914E3F38F0\n000000067F0000400200008A590000CE8000-000000067F0000400200008A590000CEC000__000000931B9A2710\n000000067F0000400200008A590000CEA33F-000000067F0000400200008A590000CF2D12__000000178583EBE1-000000182533E779\n000000067F0000400200008A590000CEC000-000000067F0000400200008A590000CF0000__000000184D31F520\n000000067F0000400200008A590000CEC000-000000067F0000400200008A590000CF0000__0000001C760FA190\n000000067F0000400200008A590000CEC000-000000067F0000400200008A590000CF0000__00000038E67ABFA0\n000000067F0000400200008A590000CEC000-000000067F0000400200008A590000CF0000__0000003903F1CFE8\n000000067F0000400200008A590000CEC000-000000067F0000400200008A590000CF0000__0000003B99F7F8A0\n000000067F0000400200008A590000CEC000-000000067F0000400200008A590000CF0000__0000005D2FFFFB38\n000000067F0000400200008A590000CEC000-000000067F0000400200008A590000CF0000__00000073AD3FE6B8\n000000067F0000400200008A590000CEC000-000000067F0000400200008A590000CF0000__000000914E3F38F0\n000000067F0000400200008A590000CEC000-000000067F0000400200008A590000CF0000__000000931B9A2710\n000000067F0000400200008A590000CF0000-000000067F0000400200008A590000CF4000__000000184D31F520\n000000067F0000400200008A590000CF0000-000000067F0000400200008A590000CF4000__0000001C760FA190\n000000067F0000400200008A590000CF0000-000000067F0000400200008A590000CF4000__00000038E67ABFA0\n000000067F0000400200008A590000CF0000-000000067F0000400200008A590000CF4000__0000003903F1CFE8\n000000067F0000400200008A590000CF0000-000000067F0000400200008A590000CF4000__0000003B99F7F8A0\n000000067F0000400200008A590000CF0000-000000067F0000400200008A590000CF4000__0000005D2FFFFB38\n000000067F0000400200008A590000CF0000-000000067F0000400200008A590000CF4000__00000073AD3FE6B8\n000000067F0000400200008A590000CF0000-000000067F0000400200008A590000CF4000__000000914E3F38F0\n000000067F0000400200008A590000CF0000-000000067F0000400200008A590000CF4000__000000931B9A2710\n000000067F0000400200008A590000CF2D12-000000067F0000400200008A590000CFB6EA__000000178583EBE1-000000182533E779\n000000067F0000400200008A590000CF4000-000000067F0000400200008A590000CF8000__000000184D31F520\n000000067F0000400200008A590000CF4000-000000067F0000400200008A590000CF8000__0000001C760FA190\n000000067F0000400200008A590000CF4000-000000067F0000400200008A590000CF8000__00000038E67ABFA0\n000000067F0000400200008A590000CF4000-000000067F0000400200008A590000CF8000__0000003903F1CFE8\n000000067F0000400200008A590000CF4000-000000067F0000400200008A590000CF8000__0000003B99F7F8A0\n000000067F0000400200008A590000CF4000-000000067F0000400200008A590000CF8000__0000005D2FFFFB38\n000000067F0000400200008A590000CF4000-000000067F0000400200008A590000CF8000__00000073AD3FE6B8\n000000067F0000400200008A590000CF4000-000000067F0000400200008A590000CF8000__000000914E3F38F0\n000000067F0000400200008A590000CF4000-000000067F0000400200008A590000CF8000__000000931B9A2710\n000000067F0000400200008A590000CF8000-000000067F0000400200008A590000CFC000__000000184D31F520\n000000067F0000400200008A590000CF8000-000000067F0000400200008A590000CFC000__0000001C760FA190\n000000067F0000400200008A590000CF8000-000000067F0000400200008A590000CFC000__00000038E67ABFA0\n000000067F0000400200008A590000CF8000-000000067F0000400200008A590000CFC000__0000003903F1CFE8\n000000067F0000400200008A590000CF8000-000000067F0000400200008A590000CFC000__0000003B99F7F8A0\n000000067F0000400200008A590000CF8000-000000067F0000400200008A590000CFC000__0000005D2FFFFB38\n000000067F0000400200008A590000CF8000-000000067F0000400200008A590000CFC000__00000073AD3FE6B8\n000000067F0000400200008A590000CF8000-000000067F0000400200008A590000CFC000__000000914E3F38F0\n000000067F0000400200008A590000CF8000-000000067F0000400200008A590000CFC000__000000931B9A2710\n000000067F0000400200008A590000CFB6EA-000000067F0000400200008A590000D040CD__000000178583EBE1-000000182533E779\n000000067F0000400200008A590000CFC000-000000067F0000400200008A590000D00000__000000184D31F520\n000000067F0000400200008A590000CFC000-000000067F0000400200008A590000D00000__0000001C760FA190\n000000067F0000400200008A590000CFC000-000000067F0000400200008A590000D00000__00000038E67ABFA0\n000000067F0000400200008A590000CFC000-000000067F0000400200008A590000D00000__0000003903F1CFE8\n000000067F0000400200008A590000CFC000-000000067F0000400200008A590000D00000__0000003B99F7F8A0\n000000067F0000400200008A590000CFC000-000000067F0000400200008A590000D00000__0000005D2FFFFB38\n000000067F0000400200008A590000CFC000-000000067F0000400200008A590000D00000__00000073AD3FE6B8\n000000067F0000400200008A590000CFC000-000000067F0000400200008A590000D00000__000000914E3F38F0\n000000067F0000400200008A590000CFC000-000000067F0000400200008A590000D00000__000000931B9A2710\n000000067F0000400200008A590000D00000-000000067F0000400200008A590000D04000__000000184D31F520\n000000067F0000400200008A590000D00000-000000067F0000400200008A590000D04000__0000001C760FA190\n000000067F0000400200008A590000D00000-000000067F0000400200008A590000D04000__00000038E67ABFA0\n000000067F0000400200008A590000D00000-000000067F0000400200008A590000D04000__0000003903F1CFE8\n000000067F0000400200008A590000D00000-000000067F0000400200008A590000D04000__0000003B99F7F8A0\n000000067F0000400200008A590000D00000-000000067F0000400200008A590000D04000__0000005D2FFFFB38\n000000067F0000400200008A590000D00000-000000067F0000400200008A590000D04000__00000073AD3FE6B8\n000000067F0000400200008A590000D00000-000000067F0000400200008A590000D04000__000000914E3F38F0\n000000067F0000400200008A590000D00000-000000067F0000400200008A590000D04000__000000931B9A2710\n000000067F0000400200008A590000D04000-000000067F0000400200008A590000D08000__000000184D31F520\n000000067F0000400200008A590000D04000-000000067F0000400200008A590000D08000__0000001C760FA190\n000000067F0000400200008A590000D04000-000000067F0000400200008A590000D08000__00000038E67ABFA0\n000000067F0000400200008A590000D04000-000000067F0000400200008A590000D08000__0000003903F1CFE8\n000000067F0000400200008A590000D04000-000000067F0000400200008A590000D08000__0000003B99F7F8A0\n000000067F0000400200008A590000D04000-000000067F0000400200008A590000D08000__0000005D2FFFFB38\n000000067F0000400200008A590000D04000-000000067F0000400200008A590000D08000__00000073AD3FE6B8\n000000067F0000400200008A590000D04000-000000067F0000400200008A590000D08000__000000914E3F38F0\n000000067F0000400200008A590000D04000-000000067F0000400200008A590000D08000__000000931B9A2710\n000000067F0000400200008A590000D040CD-000000067F0000400200008A590000D0CAAD__000000178583EBE1-000000182533E779\n000000067F0000400200008A590000D08000-000000067F0000400200008A590000D0C000__000000184D31F520\n000000067F0000400200008A590000D08000-000000067F0000400200008A590000D0C000__0000001C760FA190\n000000067F0000400200008A590000D08000-000000067F0000400200008A590000D0C000__00000038E67ABFA0\n000000067F0000400200008A590000D08000-000000067F0000400200008A590000D0C000__0000003903F1CFE8\n000000067F0000400200008A590000D08000-000000067F0000400200008A590000D0C000__0000003B99F7F8A0\n000000067F0000400200008A590000D08000-000000067F0000400200008A590000D0C000__0000005D2FFFFB38\n000000067F0000400200008A590000D08000-000000067F0000400200008A590000D0C000__00000073AD3FE6B8\n000000067F0000400200008A590000D08000-000000067F0000400200008A590000D0C000__000000914E3F38F0\n000000067F0000400200008A590000D08000-000000067F0000400200008A590000D0C000__000000931B9A2710\n000000067F0000400200008A590000D0C000-000000067F0000400200008A590000D10000__000000184D31F520\n000000067F0000400200008A590000D0C000-000000067F0000400200008A590000D10000__0000001C760FA190\n000000067F0000400200008A590000D0C000-000000067F0000400200008A590000D10000__00000038E67ABFA0\n000000067F0000400200008A590000D0C000-000000067F0000400200008A590000D10000__0000003903F1CFE8\n000000067F0000400200008A590000D0C000-000000067F0000400200008A590000D10000__0000003B99F7F8A0\n000000067F0000400200008A590000D0C000-000000067F0000400200008A590000D10000__0000005D2FFFFB38\n000000067F0000400200008A590000D0C000-000000067F0000400200008A590000D10000__00000073AD3FE6B8\n000000067F0000400200008A590000D0C000-000000067F0000400200008A590000D10000__000000914E3F38F0\n000000067F0000400200008A590000D0C000-000000067F0000400200008A590000D10000__000000931B9A2710\n000000067F0000400200008A590000D0CAAD-000000067F0000400200008A590100000000__000000178583EBE1-000000182533E779\n000000067F0000400200008A590000D0CD6E-000000067F0000400200008A590000D1574D__000000182533E779-00000018C4E3E6C1\n000000067F0000400200008A590000D10000-000000067F0000400200008A590000D14000__000000184D31F520\n000000067F0000400200008A590000D10000-000000067F0000400200008A590000D14000__0000001C760FA190\n000000067F0000400200008A590000D10000-000000067F0000400200008A590000D14000__00000038E67ABFA0\n000000067F0000400200008A590000D10000-000000067F0000400200008A590000D14000__0000003903F1CFE8\n000000067F0000400200008A590000D10000-000000067F0000400200008A590000D14000__0000003B99F7F8A0\n000000067F0000400200008A590000D10000-000000067F0000400200008A590000D14000__0000005D2FFFFB38\n000000067F0000400200008A590000D10000-000000067F0000400200008A590000D14000__00000073AD3FE6B8\n000000067F0000400200008A590000D10000-000000067F0000400200008A590000D14000__000000914E3F38F0\n000000067F0000400200008A590000D10000-000000067F0000400200008A590000D14000__000000931B9A2710\n000000067F0000400200008A590000D14000-000000067F0000400200008A590000D18000__000000184D31F520\n000000067F0000400200008A590000D14000-000000067F0000400200008A590000D18000__0000001C760FA190\n000000067F0000400200008A590000D14000-000000067F0000400200008A590000D18000__00000038E67ABFA0\n000000067F0000400200008A590000D14000-000000067F0000400200008A590000D18000__0000003903F1CFE8\n000000067F0000400200008A590000D14000-000000067F0000400200008A590000D18000__0000003B99F7F8A0\n000000067F0000400200008A590000D14000-000000067F0000400200008A590000D18000__0000005D2FFFFB38\n000000067F0000400200008A590000D14000-000000067F0000400200008A590000D18000__00000073AD3FE6B8\n000000067F0000400200008A590000D14000-000000067F0000400200008A590000D18000__000000914E3F38F0\n000000067F0000400200008A590000D14000-000000067F0000400200008A590000D18000__000000931B9A2710\n000000067F0000400200008A590000D1574D-000000067F0000400200008A590000D1E120__000000182533E779-00000018C4E3E6C1\n000000067F0000400200008A590000D18000-000000067F0000400200008A590000D1C000__000000184D31F520\n000000067F0000400200008A590000D18000-000000067F0000400200008A590000D1C000__0000001C760FA190\n000000067F0000400200008A590000D18000-000000067F0000400200008A590000D1C000__00000038E67ABFA0\n000000067F0000400200008A590000D18000-000000067F0000400200008A590000D1C000__0000003903F1CFE8\n000000067F0000400200008A590000D18000-000000067F0000400200008A590000D1C000__0000003B99F7F8A0\n000000067F0000400200008A590000D18000-000000067F0000400200008A590000D1C000__0000005D2FFFFB38\n000000067F0000400200008A590000D18000-000000067F0000400200008A590000D1C000__00000073AD3FE6B8\n000000067F0000400200008A590000D18000-000000067F0000400200008A590000D1C000__000000914E3F38F0\n000000067F0000400200008A590000D18000-000000067F0000400200008A590000D1C000__000000931B9A2710\n000000067F0000400200008A590000D1C000-000000067F0000400200008A590000D20000__000000184D31F520\n000000067F0000400200008A590000D1C000-000000067F0000400200008A590000D20000__0000001C760FA190\n000000067F0000400200008A590000D1C000-000000067F0000400200008A590000D20000__00000038E67ABFA0\n000000067F0000400200008A590000D1C000-000000067F0000400200008A590000D20000__0000003903F1CFE8\n000000067F0000400200008A590000D1C000-000000067F0000400200008A590000D20000__0000003B99F7F8A0\n000000067F0000400200008A590000D1C000-000000067F0000400200008A590000D20000__0000005D2FFFFB38\n000000067F0000400200008A590000D1C000-000000067F0000400200008A590000D20000__00000073AD3FE6B8\n000000067F0000400200008A590000D1C000-000000067F0000400200008A590000D20000__000000914E3F38F0\n000000067F0000400200008A590000D1C000-000000067F0000400200008A590000D20000__000000931B9A2710\n000000067F0000400200008A590000D1E120-000000067F0000400200008A590000D26AF1__000000182533E779-00000018C4E3E6C1\n000000067F0000400200008A590000D20000-000000067F0000400200008A590000D24000__0000001C760FA190\n000000067F0000400200008A590000D20000-000000067F0000400200008A590000D24000__00000038E67ABFA0\n000000067F0000400200008A590000D20000-000000067F0000400200008A590000D24000__0000003903F1CFE8\n000000067F0000400200008A590000D20000-000000067F0000400200008A590000D24000__0000003B99F7F8A0\n000000067F0000400200008A590000D20000-000000067F0000400200008A590000D24000__0000005D2FFFFB38\n000000067F0000400200008A590000D20000-000000067F0000400200008A590000D24000__00000073AD3FE6B8\n000000067F0000400200008A590000D20000-000000067F0000400200008A590000D24000__000000914E3F38F0\n000000067F0000400200008A590000D20000-000000067F0000400200008A590000D24000__000000931B9A2710\n000000067F0000400200008A590000D20000-030000000000000000000000000000000002__000000184D31F520\n000000067F0000400200008A590000D24000-000000067F0000400200008A590000D28000__0000001C760FA190\n000000067F0000400200008A590000D24000-000000067F0000400200008A590000D28000__00000038E67ABFA0\n000000067F0000400200008A590000D24000-000000067F0000400200008A590000D28000__0000003903F1CFE8\n000000067F0000400200008A590000D24000-000000067F0000400200008A590000D28000__0000003B99F7F8A0\n000000067F0000400200008A590000D24000-000000067F0000400200008A590000D28000__0000005D2FFFFB38\n000000067F0000400200008A590000D24000-000000067F0000400200008A590000D28000__00000073AD3FE6B8\n000000067F0000400200008A590000D24000-000000067F0000400200008A590000D28000__000000914E3F38F0\n000000067F0000400200008A590000D24000-000000067F0000400200008A590000D28000__000000931B9A2710\n000000067F0000400200008A590000D26AF1-000000067F0000400200008A590000D2F4D7__000000182533E779-00000018C4E3E6C1\n000000067F0000400200008A590000D28000-000000067F0000400200008A590000D2C000__0000001C760FA190\n000000067F0000400200008A590000D28000-000000067F0000400200008A590000D2C000__00000038E67ABFA0\n000000067F0000400200008A590000D28000-000000067F0000400200008A590000D2C000__0000003903F1CFE8\n000000067F0000400200008A590000D28000-000000067F0000400200008A590000D2C000__0000003B99F7F8A0\n000000067F0000400200008A590000D28000-000000067F0000400200008A590000D2C000__0000005D2FFFFB38\n000000067F0000400200008A590000D28000-000000067F0000400200008A590000D2C000__00000073AD3FE6B8\n000000067F0000400200008A590000D28000-000000067F0000400200008A590000D2C000__000000914E3F38F0\n000000067F0000400200008A590000D28000-000000067F0000400200008A590000D2C000__000000931B9A2710\n000000067F0000400200008A590000D2C000-000000067F0000400200008A590000D30000__0000001C760FA190\n000000067F0000400200008A590000D2C000-000000067F0000400200008A590000D30000__00000038E67ABFA0\n000000067F0000400200008A590000D2C000-000000067F0000400200008A590000D30000__0000003903F1CFE8\n000000067F0000400200008A590000D2C000-000000067F0000400200008A590000D30000__0000003B99F7F8A0\n000000067F0000400200008A590000D2C000-000000067F0000400200008A590000D30000__0000005D2FFFFB38\n000000067F0000400200008A590000D2C000-000000067F0000400200008A590000D30000__00000073AD3FE6B8\n000000067F0000400200008A590000D2C000-000000067F0000400200008A590000D30000__000000914E3F38F0\n000000067F0000400200008A590000D2C000-000000067F0000400200008A590000D30000__000000931B9A2710\n000000067F0000400200008A590000D2F4D7-000000067F0000400200008A590000D37EB1__000000182533E779-00000018C4E3E6C1\n000000067F0000400200008A590000D30000-000000067F0000400200008A590000D34000__0000001C760FA190\n000000067F0000400200008A590000D30000-000000067F0000400200008A590000D34000__00000038E67ABFA0\n000000067F0000400200008A590000D30000-000000067F0000400200008A590000D34000__0000003903F1CFE8\n000000067F0000400200008A590000D30000-000000067F0000400200008A590000D34000__0000003B99F7F8A0\n000000067F0000400200008A590000D30000-000000067F0000400200008A590000D34000__0000005D2FFFFB38\n000000067F0000400200008A590000D30000-000000067F0000400200008A590000D34000__00000073AD3FE6B8\n000000067F0000400200008A590000D30000-000000067F0000400200008A590000D34000__000000914E3F38F0\n000000067F0000400200008A590000D30000-000000067F0000400200008A590000D34000__000000931B9A2710\n000000067F0000400200008A590000D34000-000000067F0000400200008A590000D38000__0000001C760FA190\n000000067F0000400200008A590000D34000-000000067F0000400200008A590000D38000__00000038E67ABFA0\n000000067F0000400200008A590000D34000-000000067F0000400200008A590000D38000__0000003903F1CFE8\n000000067F0000400200008A590000D34000-000000067F0000400200008A590000D38000__0000003B99F7F8A0\n000000067F0000400200008A590000D34000-000000067F0000400200008A590000D38000__0000005D2FFFFB38\n000000067F0000400200008A590000D34000-000000067F0000400200008A590000D38000__00000073AD3FE6B8\n000000067F0000400200008A590000D34000-000000067F0000400200008A590000D38000__000000914E3F38F0\n000000067F0000400200008A590000D34000-000000067F0000400200008A590000D38000__000000931B9A2710\n000000067F0000400200008A590000D37EB1-000000067F0000400200008A590000D40891__000000182533E779-00000018C4E3E6C1\n000000067F0000400200008A590000D38000-000000067F0000400200008A590000D3C000__0000001C760FA190\n000000067F0000400200008A590000D38000-000000067F0000400200008A590000D3C000__00000038E67ABFA0\n000000067F0000400200008A590000D38000-000000067F0000400200008A590000D3C000__0000003903F1CFE8\n000000067F0000400200008A590000D38000-000000067F0000400200008A590000D3C000__0000003B99F7F8A0\n000000067F0000400200008A590000D38000-000000067F0000400200008A590000D3C000__0000005D2FFFFB38\n000000067F0000400200008A590000D38000-000000067F0000400200008A590000D3C000__00000073AD3FE6B8\n000000067F0000400200008A590000D38000-000000067F0000400200008A590000D3C000__000000914E3F38F0\n000000067F0000400200008A590000D38000-000000067F0000400200008A590000D3C000__000000931B9A2710\n000000067F0000400200008A590000D3C000-000000067F0000400200008A590000D40000__0000001C760FA190\n000000067F0000400200008A590000D3C000-000000067F0000400200008A590000D40000__00000038E67ABFA0\n000000067F0000400200008A590000D3C000-000000067F0000400200008A590000D40000__0000003903F1CFE8\n000000067F0000400200008A590000D3C000-000000067F0000400200008A590000D40000__0000003B99F7F8A0\n000000067F0000400200008A590000D3C000-000000067F0000400200008A590000D40000__0000005D2FFFFB38\n000000067F0000400200008A590000D3C000-000000067F0000400200008A590000D40000__00000073AD3FE6B8\n000000067F0000400200008A590000D3C000-000000067F0000400200008A590000D40000__000000914E3F38F0\n000000067F0000400200008A590000D3C000-000000067F0000400200008A590000D40000__000000931B9A2710\n000000067F0000400200008A590000D40000-000000067F0000400200008A590000D44000__0000001C760FA190\n000000067F0000400200008A590000D40000-000000067F0000400200008A590000D44000__00000038E67ABFA0\n000000067F0000400200008A590000D40000-000000067F0000400200008A590000D44000__0000003903F1CFE8\n000000067F0000400200008A590000D40000-000000067F0000400200008A590000D44000__0000003B99F7F8A0\n000000067F0000400200008A590000D40000-000000067F0000400200008A590000D44000__0000005D2FFFFB38\n000000067F0000400200008A590000D40000-000000067F0000400200008A590000D44000__00000073AD3FE6B8\n000000067F0000400200008A590000D40000-000000067F0000400200008A590000D44000__000000914E3F38F0\n000000067F0000400200008A590000D40000-000000067F0000400200008A590000D44000__000000931B9A2710\n000000067F0000400200008A590000D40891-000000067F0000400200008A590000D4926B__000000182533E779-00000018C4E3E6C1\n000000067F0000400200008A590000D44000-000000067F0000400200008A590000D48000__0000001C760FA190\n000000067F0000400200008A590000D44000-000000067F0000400200008A590000D48000__00000038E67ABFA0\n000000067F0000400200008A590000D44000-000000067F0000400200008A590000D48000__0000003903F1CFE8\n000000067F0000400200008A590000D44000-000000067F0000400200008A590000D48000__0000003B99F7F8A0\n000000067F0000400200008A590000D44000-000000067F0000400200008A590000D48000__0000005D2FFFFB38\n000000067F0000400200008A590000D44000-000000067F0000400200008A590000D48000__00000073AD3FE6B8\n000000067F0000400200008A590000D44000-000000067F0000400200008A590000D48000__000000914E3F38F0\n000000067F0000400200008A590000D44000-000000067F0000400200008A590000D48000__000000931B9A2710\n000000067F0000400200008A590000D48000-000000067F0000400200008A590000D4C000__0000001C760FA190\n000000067F0000400200008A590000D48000-000000067F0000400200008A590000D4C000__00000038E67ABFA0\n000000067F0000400200008A590000D48000-000000067F0000400200008A590000D4C000__0000003903F1CFE8\n000000067F0000400200008A590000D48000-000000067F0000400200008A590000D4C000__0000003B99F7F8A0\n000000067F0000400200008A590000D48000-000000067F0000400200008A590000D4C000__0000005D2FFFFB38\n000000067F0000400200008A590000D48000-000000067F0000400200008A590000D4C000__00000073AD3FE6B8\n000000067F0000400200008A590000D48000-000000067F0000400200008A590000D4C000__000000914E3F38F0\n000000067F0000400200008A590000D48000-000000067F0000400200008A590000D4C000__000000931B9A2710\n000000067F0000400200008A590000D4926B-000000067F0000400200008A590000D51C42__000000182533E779-00000018C4E3E6C1\n000000067F0000400200008A590000D4C000-000000067F0000400200008A590000D50000__0000001C760FA190\n000000067F0000400200008A590000D4C000-000000067F0000400200008A590000D50000__00000038E67ABFA0\n000000067F0000400200008A590000D4C000-000000067F0000400200008A590000D50000__0000003903F1CFE8\n000000067F0000400200008A590000D4C000-000000067F0000400200008A590000D50000__0000003B99F7F8A0\n000000067F0000400200008A590000D4C000-000000067F0000400200008A590000D50000__0000005D2FFFFB38\n000000067F0000400200008A590000D4C000-000000067F0000400200008A590000D50000__00000073AD3FE6B8\n000000067F0000400200008A590000D4C000-000000067F0000400200008A590000D50000__000000914E3F38F0\n000000067F0000400200008A590000D4C000-000000067F0000400200008A590000D50000__000000931B9A2710\n000000067F0000400200008A590000D50000-000000067F0000400200008A590000D54000__0000001C760FA190\n000000067F0000400200008A590000D50000-000000067F0000400200008A590000D54000__00000038E67ABFA0\n000000067F0000400200008A590000D50000-000000067F0000400200008A590000D54000__0000003903F1CFE8\n000000067F0000400200008A590000D50000-000000067F0000400200008A590000D54000__0000003B99F7F8A0\n000000067F0000400200008A590000D50000-000000067F0000400200008A590000D54000__0000005D2FFFFB38\n000000067F0000400200008A590000D50000-000000067F0000400200008A590000D54000__00000073AD3FE6B8\n000000067F0000400200008A590000D50000-000000067F0000400200008A590000D54000__000000914E3F38F0\n000000067F0000400200008A590000D50000-000000067F0000400200008A590000D54000__000000931B9A2710\n000000067F0000400200008A590000D51C42-000000067F0000400200008A590000D5A61A__000000182533E779-00000018C4E3E6C1\n000000067F0000400200008A590000D54000-000000067F0000400200008A590000D58000__0000001C760FA190\n000000067F0000400200008A590000D54000-000000067F0000400200008A590000D58000__00000038E67ABFA0\n000000067F0000400200008A590000D54000-000000067F0000400200008A590000D58000__0000003903F1CFE8\n000000067F0000400200008A590000D54000-000000067F0000400200008A590000D58000__0000003B99F7F8A0\n000000067F0000400200008A590000D54000-000000067F0000400200008A590000D58000__0000005D2FFFFB38\n000000067F0000400200008A590000D54000-000000067F0000400200008A590000D58000__00000073AD3FE6B8\n000000067F0000400200008A590000D54000-000000067F0000400200008A590000D58000__000000914E3F38F0\n000000067F0000400200008A590000D54000-000000067F0000400200008A590000D58000__000000931B9A2710\n000000067F0000400200008A590000D58000-000000067F0000400200008A590000D5C000__0000001C760FA190\n000000067F0000400200008A590000D58000-000000067F0000400200008A590000D5C000__00000038E67ABFA0\n000000067F0000400200008A590000D58000-000000067F0000400200008A590000D5C000__0000003903F1CFE8\n000000067F0000400200008A590000D58000-000000067F0000400200008A590000D5C000__0000003B99F7F8A0\n000000067F0000400200008A590000D58000-000000067F0000400200008A590000D5C000__0000005D2FFFFB38\n000000067F0000400200008A590000D58000-000000067F0000400200008A590000D5C000__00000073AD3FE6B8\n000000067F0000400200008A590000D58000-000000067F0000400200008A590000D5C000__000000914E3F38F0\n000000067F0000400200008A590000D58000-000000067F0000400200008A590000D5C000__000000931B9A2710\n000000067F0000400200008A590000D5A61A-000000067F0000400200008A590000D62FED__000000182533E779-00000018C4E3E6C1\n000000067F0000400200008A590000D5C000-000000067F0000400200008A590000D60000__0000001C760FA190\n000000067F0000400200008A590000D5C000-000000067F0000400200008A590000D60000__00000038E67ABFA0\n000000067F0000400200008A590000D5C000-000000067F0000400200008A590000D60000__0000003903F1CFE8\n000000067F0000400200008A590000D5C000-000000067F0000400200008A590000D60000__0000003B99F7F8A0\n000000067F0000400200008A590000D5C000-000000067F0000400200008A590000D60000__0000005D2FFFFB38\n000000067F0000400200008A590000D5C000-000000067F0000400200008A590000D60000__00000073AD3FE6B8\n000000067F0000400200008A590000D5C000-000000067F0000400200008A590000D60000__000000914E3F38F0\n000000067F0000400200008A590000D5C000-000000067F0000400200008A590000D60000__000000931B9A2710\n000000067F0000400200008A590000D60000-000000067F0000400200008A590000D64000__0000001C725A2400\n000000067F0000400200008A590000D60000-000000067F0000400200008A590000D64000__0000001C760FA190\n000000067F0000400200008A590000D60000-000000067F0000400200008A590000D64000__00000038E67ABFA0\n000000067F0000400200008A590000D60000-000000067F0000400200008A590000D64000__0000003903F1CFE8\n000000067F0000400200008A590000D60000-000000067F0000400200008A590000D64000__0000003B99F7F8A0\n000000067F0000400200008A590000D60000-000000067F0000400200008A590000D64000__0000005D2FFFFB38\n000000067F0000400200008A590000D60000-000000067F0000400200008A590000D64000__00000073AD3FE6B8\n000000067F0000400200008A590000D60000-000000067F0000400200008A590000D64000__000000914E3F38F0\n000000067F0000400200008A590000D60000-000000067F0000400200008A590000D64000__000000931B9A2710\n000000067F0000400200008A590000D62FED-000000067F0000400200008A590100000000__000000182533E779-00000018C4E3E6C1\n000000067F0000400200008A590000D632CE-000000067F0000400200008A590000D6BCB1__00000018C4E3E6C1-000000196493E2E1\n000000067F0000400200008A590000D64000-000000067F0000400200008A590000D68000__0000001C725A2400\n000000067F0000400200008A590000D64000-000000067F0000400200008A590000D68000__0000001C760FA190\n000000067F0000400200008A590000D64000-000000067F0000400200008A590000D68000__00000038E67ABFA0\n000000067F0000400200008A590000D64000-000000067F0000400200008A590000D68000__0000003903F1CFE8\n000000067F0000400200008A590000D64000-000000067F0000400200008A590000D68000__0000003B99F7F8A0\n000000067F0000400200008A590000D64000-000000067F0000400200008A590000D68000__0000005D2FFFFB38\n000000067F0000400200008A590000D64000-000000067F0000400200008A590000D68000__00000073AD3FE6B8\n000000067F0000400200008A590000D64000-000000067F0000400200008A590000D68000__000000914E3F38F0\n000000067F0000400200008A590000D64000-000000067F0000400200008A590000D68000__000000931B9A2710\n000000067F0000400200008A590000D68000-000000067F0000400200008A590000D6C000__0000001C725A2400\n000000067F0000400200008A590000D68000-000000067F0000400200008A590000D6C000__0000001C760FA190\n000000067F0000400200008A590000D68000-000000067F0000400200008A590000D6C000__00000038E67ABFA0\n000000067F0000400200008A590000D68000-000000067F0000400200008A590000D6C000__0000003903F1CFE8\n000000067F0000400200008A590000D68000-000000067F0000400200008A590000D6C000__0000003B99F7F8A0\n000000067F0000400200008A590000D68000-000000067F0000400200008A590000D6C000__0000005D2FFFFB38\n000000067F0000400200008A590000D68000-000000067F0000400200008A590000D6C000__00000073AD3FE6B8\n000000067F0000400200008A590000D68000-000000067F0000400200008A590000D6C000__000000914E3F38F0\n000000067F0000400200008A590000D68000-000000067F0000400200008A590000D6C000__000000931B9A2710\n000000067F0000400200008A590000D6BCB1-000000067F0000400200008A590000D746AB__00000018C4E3E6C1-000000196493E2E1\n000000067F0000400200008A590000D6C000-000000067F0000400200008A590000D70000__0000001C725A2400\n000000067F0000400200008A590000D6C000-000000067F0000400200008A590000D70000__0000001C760FA190\n000000067F0000400200008A590000D6C000-000000067F0000400200008A590000D70000__00000038E67ABFA0\n000000067F0000400200008A590000D6C000-000000067F0000400200008A590000D70000__0000003903F1CFE8\n000000067F0000400200008A590000D6C000-000000067F0000400200008A590000D70000__0000003B99F7F8A0\n000000067F0000400200008A590000D6C000-000000067F0000400200008A590000D70000__0000005D2FFFFB38\n000000067F0000400200008A590000D6C000-000000067F0000400200008A590000D70000__00000073AD3FE6B8\n000000067F0000400200008A590000D6C000-000000067F0000400200008A590000D70000__000000914E3F38F0\n000000067F0000400200008A590000D6C000-000000067F0000400200008A590000D70000__000000931B9A2710\n000000067F0000400200008A590000D70000-000000067F0000400200008A590000D74000__0000001C725A2400\n000000067F0000400200008A590000D70000-000000067F0000400200008A590000D74000__0000001C760FA190\n000000067F0000400200008A590000D70000-000000067F0000400200008A590000D74000__00000038E67ABFA0\n000000067F0000400200008A590000D70000-000000067F0000400200008A590000D74000__0000003903F1CFE8\n000000067F0000400200008A590000D70000-000000067F0000400200008A590000D74000__0000003B99F7F8A0\n000000067F0000400200008A590000D70000-000000067F0000400200008A590000D74000__0000005D2FFFFB38\n000000067F0000400200008A590000D70000-000000067F0000400200008A590000D74000__00000073AD3FE6B8\n000000067F0000400200008A590000D70000-000000067F0000400200008A590000D74000__000000914E3F38F0\n000000067F0000400200008A590000D70000-000000067F0000400200008A590000D74000__000000931B9A2710\n000000067F0000400200008A590000D74000-000000067F0000400200008A590000D78000__0000001C725A2400\n000000067F0000400200008A590000D74000-000000067F0000400200008A590000D78000__0000001C760FA190\n000000067F0000400200008A590000D74000-000000067F0000400200008A590000D78000__00000038E67ABFA0\n000000067F0000400200008A590000D74000-000000067F0000400200008A590000D78000__0000003903F1CFE8\n000000067F0000400200008A590000D74000-000000067F0000400200008A590000D78000__0000003B99F7F8A0\n000000067F0000400200008A590000D74000-000000067F0000400200008A590000D78000__0000005D2FFFFB38\n000000067F0000400200008A590000D74000-000000067F0000400200008A590000D78000__00000073AD3FE6B8\n000000067F0000400200008A590000D74000-000000067F0000400200008A590000D78000__000000914E3F38F0\n000000067F0000400200008A590000D74000-000000067F0000400200008A590000D78000__000000931B9A2710\n000000067F0000400200008A590000D746AB-000000067F0000400200008A590000D7D090__00000018C4E3E6C1-000000196493E2E1\n000000067F0000400200008A590000D78000-000000067F0000400200008A590000D7C000__0000001C725A2400\n000000067F0000400200008A590000D78000-000000067F0000400200008A590000D7C000__0000001C760FA190\n000000067F0000400200008A590000D78000-000000067F0000400200008A590000D7C000__00000038E67ABFA0\n000000067F0000400200008A590000D78000-000000067F0000400200008A590000D7C000__0000003903F1CFE8\n000000067F0000400200008A590000D78000-000000067F0000400200008A590000D7C000__0000003B99F7F8A0\n000000067F0000400200008A590000D78000-000000067F0000400200008A590000D7C000__0000005D2FFFFB38\n000000067F0000400200008A590000D78000-000000067F0000400200008A590000D7C000__00000073AD3FE6B8\n000000067F0000400200008A590000D78000-000000067F0000400200008A590000D7C000__000000914E3F38F0\n000000067F0000400200008A590000D78000-000000067F0000400200008A590000D7C000__000000931B9A2710\n000000067F0000400200008A590000D7C000-000000067F0000400200008A590000D80000__0000001C725A2400\n000000067F0000400200008A590000D7C000-000000067F0000400200008A590000D80000__0000001C760FA190\n000000067F0000400200008A590000D7C000-000000067F0000400200008A590000D80000__00000038E67ABFA0\n000000067F0000400200008A590000D7C000-000000067F0000400200008A590000D80000__0000003903F1CFE8\n000000067F0000400200008A590000D7C000-000000067F0000400200008A590000D80000__0000003B99F7F8A0\n000000067F0000400200008A590000D7C000-000000067F0000400200008A590000D80000__0000005D2FFFFB38\n000000067F0000400200008A590000D7C000-000000067F0000400200008A590000D80000__00000073AD3FE6B8\n000000067F0000400200008A590000D7C000-000000067F0000400200008A590000D80000__000000914E3F38F0\n000000067F0000400200008A590000D7C000-000000067F0000400200008A590000D80000__000000931B9A2710\n000000067F0000400200008A590000D7D090-000000067F0000400200008A590000D85A63__00000018C4E3E6C1-000000196493E2E1\n000000067F0000400200008A590000D80000-000000067F0000400200008A590000D84000__0000001C725A2400\n000000067F0000400200008A590000D80000-000000067F0000400200008A590000D84000__0000001C760FA190\n000000067F0000400200008A590000D80000-000000067F0000400200008A590000D84000__00000038E67ABFA0\n000000067F0000400200008A590000D80000-000000067F0000400200008A590000D84000__0000003903F1CFE8\n000000067F0000400200008A590000D80000-000000067F0000400200008A590000D84000__0000003B99F7F8A0\n000000067F0000400200008A590000D80000-000000067F0000400200008A590000D84000__0000005D2FFFFB38\n000000067F0000400200008A590000D80000-000000067F0000400200008A590000D84000__00000073AD3FE6B8\n000000067F0000400200008A590000D80000-000000067F0000400200008A590000D84000__000000914E3F38F0\n000000067F0000400200008A590000D80000-000000067F0000400200008A590000D84000__000000931B9A2710\n000000067F0000400200008A590000D84000-000000067F0000400200008A590000D88000__0000001C725A2400\n000000067F0000400200008A590000D84000-000000067F0000400200008A590000D88000__0000001C760FA190\n000000067F0000400200008A590000D84000-000000067F0000400200008A590000D88000__00000038E67ABFA0\n000000067F0000400200008A590000D84000-000000067F0000400200008A590000D88000__0000003903F1CFE8\n000000067F0000400200008A590000D84000-000000067F0000400200008A590000D88000__0000003B99F7F8A0\n000000067F0000400200008A590000D84000-000000067F0000400200008A590000D88000__0000005D2FFFFB38\n000000067F0000400200008A590000D84000-000000067F0000400200008A590000D88000__00000073AD3FE6B8\n000000067F0000400200008A590000D84000-000000067F0000400200008A590000D88000__000000914E3F38F0\n000000067F0000400200008A590000D84000-000000067F0000400200008A590000D88000__000000931B9A2710\n000000067F0000400200008A590000D85A63-000000067F0000400200008A590000D8E43F__00000018C4E3E6C1-000000196493E2E1\n000000067F0000400200008A590000D88000-000000067F0000400200008A590000D8C000__0000001C725A2400\n000000067F0000400200008A590000D88000-000000067F0000400200008A590000D8C000__0000001C760FA190\n000000067F0000400200008A590000D88000-000000067F0000400200008A590000D8C000__00000038E67ABFA0\n000000067F0000400200008A590000D88000-000000067F0000400200008A590000D8C000__0000003903F1CFE8\n000000067F0000400200008A590000D88000-000000067F0000400200008A590000D8C000__0000003B99F7F8A0\n000000067F0000400200008A590000D88000-000000067F0000400200008A590000D8C000__0000005D2FFFFB38\n000000067F0000400200008A590000D88000-000000067F0000400200008A590000D8C000__00000073AD3FE6B8\n000000067F0000400200008A590000D88000-000000067F0000400200008A590000D8C000__000000914E3F38F0\n000000067F0000400200008A590000D88000-000000067F0000400200008A590000D8C000__000000931B9A2710\n000000067F0000400200008A590000D8C000-000000067F0000400200008A590000D90000__0000001C725A2400\n000000067F0000400200008A590000D8C000-000000067F0000400200008A590000D90000__0000001C760FA190\n000000067F0000400200008A590000D8C000-000000067F0000400200008A590000D90000__00000038E67ABFA0\n000000067F0000400200008A590000D8C000-000000067F0000400200008A590000D90000__0000003903F1CFE8\n000000067F0000400200008A590000D8C000-000000067F0000400200008A590000D90000__0000003B99F7F8A0\n000000067F0000400200008A590000D8C000-000000067F0000400200008A590000D90000__0000005D2FFFFB38\n000000067F0000400200008A590000D8C000-000000067F0000400200008A590000D90000__00000073AD3FE6B8\n000000067F0000400200008A590000D8C000-000000067F0000400200008A590000D90000__000000914E3F38F0\n000000067F0000400200008A590000D8C000-000000067F0000400200008A590000D90000__000000931B9A2710\n000000067F0000400200008A590000D8E43F-000000067F0000400200008A590000D96E19__00000018C4E3E6C1-000000196493E2E1\n000000067F0000400200008A590000D90000-000000067F0000400200008A590000D94000__0000001C725A2400\n000000067F0000400200008A590000D90000-000000067F0000400200008A590000D94000__0000001C760FA190\n000000067F0000400200008A590000D90000-000000067F0000400200008A590000D94000__00000038E67ABFA0\n000000067F0000400200008A590000D90000-000000067F0000400200008A590000D94000__0000003903F1CFE8\n000000067F0000400200008A590000D90000-000000067F0000400200008A590000D94000__0000003B99F7F8A0\n000000067F0000400200008A590000D90000-000000067F0000400200008A590000D94000__0000005D2FFFFB38\n000000067F0000400200008A590000D90000-000000067F0000400200008A590000D94000__00000073AD3FE6B8\n000000067F0000400200008A590000D90000-000000067F0000400200008A590000D94000__000000914E3F38F0\n000000067F0000400200008A590000D90000-000000067F0000400200008A590000D94000__000000931B9A2710\n000000067F0000400200008A590000D94000-000000067F0000400200008A590000D98000__0000001C725A2400\n000000067F0000400200008A590000D94000-000000067F0000400200008A590000D98000__0000001C760FA190\n000000067F0000400200008A590000D94000-000000067F0000400200008A590000D98000__00000038E67ABFA0\n000000067F0000400200008A590000D94000-000000067F0000400200008A590000D98000__0000003903F1CFE8\n000000067F0000400200008A590000D94000-000000067F0000400200008A590000D98000__0000003B99F7F8A0\n000000067F0000400200008A590000D94000-000000067F0000400200008A590000D98000__0000005D2FFFFB38\n000000067F0000400200008A590000D94000-000000067F0000400200008A590000D98000__00000073AD3FE6B8\n000000067F0000400200008A590000D94000-000000067F0000400200008A590000D98000__000000914E3F38F0\n000000067F0000400200008A590000D94000-000000067F0000400200008A590000D98000__000000931B9A2710\n000000067F0000400200008A590000D96E19-000000067F0000400200008A590000D9F7E0__00000018C4E3E6C1-000000196493E2E1\n000000067F0000400200008A590000D98000-000000067F0000400200008A590000D9C000__0000001C725A2400\n000000067F0000400200008A590000D98000-000000067F0000400200008A590000D9C000__0000001C760FA190\n000000067F0000400200008A590000D98000-000000067F0000400200008A590000D9C000__00000038E67ABFA0\n000000067F0000400200008A590000D98000-000000067F0000400200008A590000D9C000__0000003903F1CFE8\n000000067F0000400200008A590000D98000-000000067F0000400200008A590000D9C000__0000003B99F7F8A0\n000000067F0000400200008A590000D98000-000000067F0000400200008A590000D9C000__0000005D2FFFFB38\n000000067F0000400200008A590000D98000-000000067F0000400200008A590000D9C000__00000073AD3FE6B8\n000000067F0000400200008A590000D98000-000000067F0000400200008A590000D9C000__000000914E3F38F0\n000000067F0000400200008A590000D98000-000000067F0000400200008A590000D9C000__000000931B9A2710\n000000067F0000400200008A590000D9C000-000000067F0000400200008A590000DA0000__0000001C725A2400\n000000067F0000400200008A590000D9C000-000000067F0000400200008A590000DA0000__0000001C760FA190\n000000067F0000400200008A590000D9C000-000000067F0000400200008A590000DA0000__00000038E67ABFA0\n000000067F0000400200008A590000D9C000-000000067F0000400200008A590000DA0000__0000003903F1CFE8\n000000067F0000400200008A590000D9C000-000000067F0000400200008A590000DA0000__0000003B99F7F8A0\n000000067F0000400200008A590000D9C000-000000067F0000400200008A590000DA0000__0000005D2FFFFB38\n000000067F0000400200008A590000D9C000-000000067F0000400200008A590000DA0000__00000073AD3FE6B8\n000000067F0000400200008A590000D9C000-000000067F0000400200008A590000DA0000__000000914E3F38F0\n000000067F0000400200008A590000D9C000-000000067F0000400200008A590000DA0000__000000931B9A2710\n000000067F0000400200008A590000D9F7E0-000000067F0000400200008A590000DA81C4__00000018C4E3E6C1-000000196493E2E1\n000000067F0000400200008A590000DA0000-000000067F0000400200008A590000DA4000__0000001C725A2400\n000000067F0000400200008A590000DA0000-000000067F0000400200008A590000DA4000__0000001C760FA190\n000000067F0000400200008A590000DA0000-000000067F0000400200008A590000DA4000__00000038E67ABFA0\n000000067F0000400200008A590000DA0000-000000067F0000400200008A590000DA4000__0000003903F1CFE8\n000000067F0000400200008A590000DA0000-000000067F0000400200008A590000DA4000__0000003B99F7F8A0\n000000067F0000400200008A590000DA0000-000000067F0000400200008A590000DA4000__0000005D2FFFFB38\n000000067F0000400200008A590000DA0000-000000067F0000400200008A590000DA4000__00000073AD3FE6B8\n000000067F0000400200008A590000DA0000-000000067F0000400200008A590000DA4000__000000914E3F38F0\n000000067F0000400200008A590000DA0000-000000067F0000400200008A590000DA4000__000000931B9A2710\n000000067F0000400200008A590000DA4000-000000067F0000400200008A590000DA8000__0000001C725A2400\n000000067F0000400200008A590000DA4000-000000067F0000400200008A590000DA8000__0000001C760FA190\n000000067F0000400200008A590000DA4000-000000067F0000400200008A590000DA8000__00000038E67ABFA0\n000000067F0000400200008A590000DA4000-000000067F0000400200008A590000DA8000__0000003903F1CFE8\n000000067F0000400200008A590000DA4000-000000067F0000400200008A590000DA8000__0000003B99F7F8A0\n000000067F0000400200008A590000DA4000-000000067F0000400200008A590000DA8000__0000005D2FFFFB38\n000000067F0000400200008A590000DA4000-000000067F0000400200008A590000DA8000__00000073AD3FE6B8\n000000067F0000400200008A590000DA4000-000000067F0000400200008A590000DA8000__000000914E3F38F0\n000000067F0000400200008A590000DA4000-000000067F0000400200008A590000DA8000__000000931B9A2710\n000000067F0000400200008A590000DA8000-000000067F0000400200008A590000DAC000__0000001C725A2400\n000000067F0000400200008A590000DA8000-000000067F0000400200008A590000DAC000__0000001C760FA190\n000000067F0000400200008A590000DA8000-000000067F0000400200008A590000DAC000__00000038E67ABFA0\n000000067F0000400200008A590000DA8000-000000067F0000400200008A590000DAC000__0000003903F1CFE8\n000000067F0000400200008A590000DA8000-000000067F0000400200008A590000DAC000__0000003B99F7F8A0\n000000067F0000400200008A590000DA8000-000000067F0000400200008A590000DAC000__0000005D2FFFFB38\n000000067F0000400200008A590000DA8000-000000067F0000400200008A590000DAC000__00000073AD3FE6B8\n000000067F0000400200008A590000DA8000-000000067F0000400200008A590000DAC000__000000914E3F38F0\n000000067F0000400200008A590000DA8000-000000067F0000400200008A590000DAC000__000000931B9A2710\n000000067F0000400200008A590000DA81C4-000000067F0000400200008A590000DB0BA9__00000018C4E3E6C1-000000196493E2E1\n000000067F0000400200008A590000DAC000-000000067F0000400200008A590000DB0000__0000001C725A2400\n000000067F0000400200008A590000DAC000-000000067F0000400200008A590000DB0000__0000001C760FA190\n000000067F0000400200008A590000DAC000-000000067F0000400200008A590000DB0000__00000038E67ABFA0\n000000067F0000400200008A590000DAC000-000000067F0000400200008A590000DB0000__0000003903F1CFE8\n000000067F0000400200008A590000DAC000-000000067F0000400200008A590000DB0000__0000003B99F7F8A0\n000000067F0000400200008A590000DAC000-000000067F0000400200008A590000DB0000__0000005D2FFFFB38\n000000067F0000400200008A590000DAC000-000000067F0000400200008A590000DB0000__00000073AD3FE6B8\n000000067F0000400200008A590000DAC000-000000067F0000400200008A590000DB0000__000000914E3F38F0\n000000067F0000400200008A590000DAC000-000000067F0000400200008A590000DB0000__000000931B9A2710\n000000067F0000400200008A590000DB0000-000000067F0000400200008A590000DB4000__0000001C725A2400\n000000067F0000400200008A590000DB0000-000000067F0000400200008A590000DB4000__0000001C760FA190\n000000067F0000400200008A590000DB0000-000000067F0000400200008A590000DB4000__00000038E67ABFA0\n000000067F0000400200008A590000DB0000-000000067F0000400200008A590000DB4000__0000003903F1CFE8\n000000067F0000400200008A590000DB0000-000000067F0000400200008A590000DB4000__0000003B99F7F8A0\n000000067F0000400200008A590000DB0000-000000067F0000400200008A590000DB4000__0000005D2FFFFB38\n000000067F0000400200008A590000DB0000-000000067F0000400200008A590000DB4000__00000073AD3FE6B8\n000000067F0000400200008A590000DB0000-000000067F0000400200008A590000DB4000__000000914E3F38F0\n000000067F0000400200008A590000DB0000-000000067F0000400200008A590000DB4000__000000931B9A2710\n000000067F0000400200008A590000DB0BA9-000000067F0000400200008A590000DB9590__00000018C4E3E6C1-000000196493E2E1\n000000067F0000400200008A590000DB4000-000000067F0000400200008A590000DB8000__0000001C725A2400\n000000067F0000400200008A590000DB4000-000000067F0000400200008A590000DB8000__0000001C760FA190\n000000067F0000400200008A590000DB4000-000000067F0000400200008A590000DB8000__00000038E67ABFA0\n000000067F0000400200008A590000DB4000-000000067F0000400200008A590000DB8000__0000003903F1CFE8\n000000067F0000400200008A590000DB4000-000000067F0000400200008A590000DB8000__0000003B99F7F8A0\n000000067F0000400200008A590000DB4000-000000067F0000400200008A590000DB8000__0000005D2FFFFB38\n000000067F0000400200008A590000DB4000-000000067F0000400200008A590000DB8000__00000073AD3FE6B8\n000000067F0000400200008A590000DB4000-000000067F0000400200008A590000DB8000__000000914E3F38F0\n000000067F0000400200008A590000DB4000-000000067F0000400200008A590000DB8000__000000931B9A2710\n000000067F0000400200008A590000DB8000-000000067F0000400200008A590000DBC000__0000001A2433F0F8\n000000067F0000400200008A590000DB8000-000000067F0000400200008A590000DBC000__0000001C760FA190\n000000067F0000400200008A590000DB8000-000000067F0000400200008A590000DBC000__00000038E67ABFA0\n000000067F0000400200008A590000DB8000-000000067F0000400200008A590000DBC000__0000003903F1CFE8\n000000067F0000400200008A590000DB8000-000000067F0000400200008A590000DBC000__0000003B99F7F8A0\n000000067F0000400200008A590000DB8000-000000067F0000400200008A590000DBC000__0000005D2FFFFB38\n000000067F0000400200008A590000DB8000-000000067F0000400200008A590000DBC000__00000073AD3FE6B8\n000000067F0000400200008A590000DB8000-000000067F0000400200008A590000DBC000__000000914E3F38F0\n000000067F0000400200008A590000DB8000-000000067F0000400200008A590000DBC000__000000931B9A2710\n000000067F0000400200008A590000DB9590-000000067F0000400200008A590100000000__00000018C4E3E6C1-000000196493E2E1\n000000067F0000400200008A590000DB984D-000000067F0000400200008A590000DC221C__000000196493E2E1-0000001A0443DCD9\n000000067F0000400200008A590000DBC000-000000067F0000400200008A590000DC0000__0000001A2433F0F8\n000000067F0000400200008A590000DBC000-000000067F0000400200008A590000DC0000__0000001C760FA190\n000000067F0000400200008A590000DBC000-000000067F0000400200008A590000DC0000__00000038E67ABFA0\n000000067F0000400200008A590000DBC000-000000067F0000400200008A590000DC0000__0000003903F1CFE8\n000000067F0000400200008A590000DBC000-000000067F0000400200008A590000DC0000__0000003B99F7F8A0\n000000067F0000400200008A590000DBC000-000000067F0000400200008A590000DC0000__0000005D2FFFFB38\n000000067F0000400200008A590000DBC000-000000067F0000400200008A590000DC0000__00000073AD3FE6B8\n000000067F0000400200008A590000DBC000-000000067F0000400200008A590000DC0000__000000914E3F38F0\n000000067F0000400200008A590000DBC000-000000067F0000400200008A590000DC0000__000000931B9A2710\n000000067F0000400200008A590000DC0000-000000067F0000400200008A590000DC4000__0000001A2433F0F8\n000000067F0000400200008A590000DC0000-000000067F0000400200008A590000DC4000__0000001C760FA190\n000000067F0000400200008A590000DC0000-000000067F0000400200008A590000DC4000__00000038E67ABFA0\n000000067F0000400200008A590000DC0000-000000067F0000400200008A590000DC4000__0000003903F1CFE8\n000000067F0000400200008A590000DC0000-000000067F0000400200008A590000DC4000__0000003B99F7F8A0\n000000067F0000400200008A590000DC0000-000000067F0000400200008A590000DC4000__0000005D2FFFFB38\n000000067F0000400200008A590000DC0000-000000067F0000400200008A590000DC4000__00000073AD3FE6B8\n000000067F0000400200008A590000DC0000-000000067F0000400200008A590000DC4000__000000914E3F38F0\n000000067F0000400200008A590000DC0000-000000067F0000400200008A590000DC4000__000000931B9A2710\n000000067F0000400200008A590000DC221C-000000067F0000400200008A590000DCABF9__000000196493E2E1-0000001A0443DCD9\n000000067F0000400200008A590000DC4000-000000067F0000400200008A590000DC8000__0000001A2433F0F8\n000000067F0000400200008A590000DC4000-000000067F0000400200008A590000DC8000__0000001C760FA190\n000000067F0000400200008A590000DC4000-000000067F0000400200008A590000DC8000__00000038E67ABFA0\n000000067F0000400200008A590000DC4000-000000067F0000400200008A590000DC8000__0000003903F1CFE8\n000000067F0000400200008A590000DC4000-000000067F0000400200008A590000DC8000__0000003B99F7F8A0\n000000067F0000400200008A590000DC4000-000000067F0000400200008A590000DC8000__0000005D2FFFFB38\n000000067F0000400200008A590000DC4000-000000067F0000400200008A590000DC8000__00000073AD3FE6B8\n000000067F0000400200008A590000DC4000-000000067F0000400200008A590000DC8000__000000914E3F38F0\n000000067F0000400200008A590000DC4000-000000067F0000400200008A590000DC8000__000000931B9A2710\n000000067F0000400200008A590000DC8000-000000067F0000400200008A590000DCC000__0000001A2433F0F8\n000000067F0000400200008A590000DC8000-000000067F0000400200008A590000DCC000__0000001C760FA190\n000000067F0000400200008A590000DC8000-000000067F0000400200008A590000DCC000__00000038E67ABFA0\n000000067F0000400200008A590000DC8000-000000067F0000400200008A590000DCC000__0000003903F1CFE8\n000000067F0000400200008A590000DC8000-000000067F0000400200008A590000DCC000__0000003B99F7F8A0\n000000067F0000400200008A590000DC8000-000000067F0000400200008A590000DCC000__0000005D2FFFFB38\n000000067F0000400200008A590000DC8000-000000067F0000400200008A590000DCC000__00000073AD3FE6B8\n000000067F0000400200008A590000DC8000-000000067F0000400200008A590000DCC000__000000914E3F38F0\n000000067F0000400200008A590000DC8000-000000067F0000400200008A590000DCC000__000000931B9A2710\n000000067F0000400200008A590000DCABF9-000000067F0000400200008A590000DD35DF__000000196493E2E1-0000001A0443DCD9\n000000067F0000400200008A590000DCC000-000000067F0000400200008A590000DD0000__0000001A2433F0F8\n000000067F0000400200008A590000DCC000-000000067F0000400200008A590000DD0000__0000001C760FA190\n000000067F0000400200008A590000DCC000-000000067F0000400200008A590000DD0000__00000038E67ABFA0\n000000067F0000400200008A590000DCC000-000000067F0000400200008A590000DD0000__0000003903F1CFE8\n000000067F0000400200008A590000DCC000-000000067F0000400200008A590000DD0000__0000003B99F7F8A0\n000000067F0000400200008A590000DCC000-000000067F0000400200008A590000DD0000__0000005D2FFFFB38\n000000067F0000400200008A590000DCC000-000000067F0000400200008A590000DD0000__00000073AD3FE6B8\n000000067F0000400200008A590000DCC000-000000067F0000400200008A590000DD0000__000000914E3F38F0\n000000067F0000400200008A590000DCC000-000000067F0000400200008A590000DD0000__000000931B9A2710\n000000067F0000400200008A590000DD0000-000000067F0000400200008A590000DD4000__0000001A2433F0F8\n000000067F0000400200008A590000DD0000-000000067F0000400200008A590000DD4000__0000001C760FA190\n000000067F0000400200008A590000DD0000-000000067F0000400200008A590000DD4000__00000038E67ABFA0\n000000067F0000400200008A590000DD0000-000000067F0000400200008A590000DD4000__0000003903F1CFE8\n000000067F0000400200008A590000DD0000-000000067F0000400200008A590000DD4000__0000003B99F7F8A0\n000000067F0000400200008A590000DD0000-000000067F0000400200008A590000DD4000__0000005D2FFFFB38\n000000067F0000400200008A590000DD0000-000000067F0000400200008A590000DD4000__00000073AD3FE6B8\n000000067F0000400200008A590000DD0000-000000067F0000400200008A590000DD4000__000000914E3F38F0\n000000067F0000400200008A590000DD0000-000000067F0000400200008A590000DD4000__000000931B9A2710\n000000067F0000400200008A590000DD35DF-000000067F0000400200008A590000DDBFBF__000000196493E2E1-0000001A0443DCD9\n000000067F0000400200008A590000DD4000-000000067F0000400200008A590000DD8000__0000001A2433F0F8\n000000067F0000400200008A590000DD4000-000000067F0000400200008A590000DD8000__0000001C760FA190\n000000067F0000400200008A590000DD4000-000000067F0000400200008A590000DD8000__00000038E67ABFA0\n000000067F0000400200008A590000DD4000-000000067F0000400200008A590000DD8000__0000003903F1CFE8\n000000067F0000400200008A590000DD4000-000000067F0000400200008A590000DD8000__0000003B99F7F8A0\n000000067F0000400200008A590000DD4000-000000067F0000400200008A590000DD8000__0000005D2FFFFB38\n000000067F0000400200008A590000DD4000-000000067F0000400200008A590000DD8000__00000073AD3FE6B8\n000000067F0000400200008A590000DD4000-000000067F0000400200008A590000DD8000__000000914E3F38F0\n000000067F0000400200008A590000DD4000-000000067F0000400200008A590000DD8000__000000931B9A2710\n000000067F0000400200008A590000DD8000-000000067F0000400200008A590000DDC000__0000001A2433F0F8\n000000067F0000400200008A590000DD8000-000000067F0000400200008A590000DDC000__0000001C760FA190\n000000067F0000400200008A590000DD8000-000000067F0000400200008A590000DDC000__00000038E67ABFA0\n000000067F0000400200008A590000DD8000-000000067F0000400200008A590000DDC000__0000003903F1CFE8\n000000067F0000400200008A590000DD8000-000000067F0000400200008A590000DDC000__0000003B99F7F8A0\n000000067F0000400200008A590000DD8000-000000067F0000400200008A590000DDC000__0000005D2FFFFB38\n000000067F0000400200008A590000DD8000-000000067F0000400200008A590000DDC000__00000073AD3FE6B8\n000000067F0000400200008A590000DD8000-000000067F0000400200008A590000DDC000__000000914E3F38F0\n000000067F0000400200008A590000DD8000-000000067F0000400200008A590000DDC000__000000931B9A2710\n000000067F0000400200008A590000DDBFBF-000000067F0000400200008A590000DE49A7__000000196493E2E1-0000001A0443DCD9\n000000067F0000400200008A590000DDC000-000000067F0000400200008A590000DE0000__0000001A2433F0F8\n000000067F0000400200008A590000DDC000-000000067F0000400200008A590000DE0000__0000001C760FA190\n000000067F0000400200008A590000DDC000-000000067F0000400200008A590000DE0000__00000038E67ABFA0\n000000067F0000400200008A590000DDC000-000000067F0000400200008A590000DE0000__0000003903F1CFE8\n000000067F0000400200008A590000DDC000-000000067F0000400200008A590000DE0000__0000003B99F7F8A0\n000000067F0000400200008A590000DDC000-000000067F0000400200008A590000DE0000__0000005D2FFFFB38\n000000067F0000400200008A590000DDC000-000000067F0000400200008A590000DE0000__00000073AD3FE6B8\n000000067F0000400200008A590000DDC000-000000067F0000400200008A590000DE0000__000000914E3F38F0\n000000067F0000400200008A590000DDC000-000000067F0000400200008A590000DE0000__000000931B9A2710\n000000067F0000400200008A590000DE0000-000000067F0000400200008A590000DE4000__0000001A2433F0F8\n000000067F0000400200008A590000DE0000-000000067F0000400200008A590000DE4000__0000001C760FA190\n000000067F0000400200008A590000DE0000-000000067F0000400200008A590000DE4000__00000038E67ABFA0\n000000067F0000400200008A590000DE0000-000000067F0000400200008A590000DE4000__0000003903F1CFE8\n000000067F0000400200008A590000DE0000-000000067F0000400200008A590000DE4000__0000003B99F7F8A0\n000000067F0000400200008A590000DE0000-000000067F0000400200008A590000DE4000__0000005D2FFFFB38\n000000067F0000400200008A590000DE0000-000000067F0000400200008A590000DE4000__00000073AD3FE6B8\n000000067F0000400200008A590000DE0000-000000067F0000400200008A590000DE4000__000000914E3F38F0\n000000067F0000400200008A590000DE0000-000000067F0000400200008A590000DE4000__000000931B9A2710\n000000067F0000400200008A590000DE4000-000000067F0000400200008A590000DE8000__0000001A2433F0F8\n000000067F0000400200008A590000DE4000-000000067F0000400200008A590000DE8000__0000001C760FA190\n000000067F0000400200008A590000DE4000-000000067F0000400200008A590000DE8000__00000038E67ABFA0\n000000067F0000400200008A590000DE4000-000000067F0000400200008A590000DE8000__0000003903F1CFE8\n000000067F0000400200008A590000DE4000-000000067F0000400200008A590000DE8000__0000003B99F7F8A0\n000000067F0000400200008A590000DE4000-000000067F0000400200008A590000DE8000__0000005D2FFFFB38\n000000067F0000400200008A590000DE4000-000000067F0000400200008A590000DE8000__00000073AD3FE6B8\n000000067F0000400200008A590000DE4000-000000067F0000400200008A590000DE8000__000000914E3F38F0\n000000067F0000400200008A590000DE4000-000000067F0000400200008A590000DE8000__000000931B9A2710\n000000067F0000400200008A590000DE49A7-000000067F0000400200008A590000DED38D__000000196493E2E1-0000001A0443DCD9\n000000067F0000400200008A590000DE8000-000000067F0000400200008A590000DEC000__0000001A2433F0F8\n000000067F0000400200008A590000DE8000-000000067F0000400200008A590000DEC000__0000001C760FA190\n000000067F0000400200008A590000DE8000-000000067F0000400200008A590000DEC000__00000038E67ABFA0\n000000067F0000400200008A590000DE8000-000000067F0000400200008A590000DEC000__0000003903F1CFE8\n000000067F0000400200008A590000DE8000-000000067F0000400200008A590000DEC000__0000003B99F7F8A0\n000000067F0000400200008A590000DE8000-000000067F0000400200008A590000DEC000__0000005D2FFFFB38\n000000067F0000400200008A590000DE8000-000000067F0000400200008A590000DEC000__00000073AD3FE6B8\n000000067F0000400200008A590000DE8000-000000067F0000400200008A590000DEC000__000000914E3F38F0\n000000067F0000400200008A590000DE8000-000000067F0000400200008A590000DEC000__000000931B9A2710\n000000067F0000400200008A590000DEC000-000000067F0000400200008A590000DF0000__0000001A2433F0F8\n000000067F0000400200008A590000DEC000-000000067F0000400200008A590000DF0000__0000001C760FA190\n000000067F0000400200008A590000DEC000-000000067F0000400200008A590000DF0000__00000038E67ABFA0\n000000067F0000400200008A590000DEC000-000000067F0000400200008A590000DF0000__0000003903F1CFE8\n000000067F0000400200008A590000DEC000-000000067F0000400200008A590000DF0000__0000003B99F7F8A0\n000000067F0000400200008A590000DEC000-000000067F0000400200008A590000DF0000__0000005D2FFFFB38\n000000067F0000400200008A590000DEC000-000000067F0000400200008A590000DF0000__00000073AD3FE6B8\n000000067F0000400200008A590000DEC000-000000067F0000400200008A590000DF0000__000000914E3F38F0\n000000067F0000400200008A590000DEC000-000000067F0000400200008A590000DF0000__000000931B9A2710\n000000067F0000400200008A590000DED38D-000000067F0000400200008A590000DF5D68__000000196493E2E1-0000001A0443DCD9\n000000067F0000400200008A590000DF0000-000000067F0000400200008A590000DF4000__0000001A2433F0F8\n000000067F0000400200008A590000DF0000-000000067F0000400200008A590000DF4000__0000001C760FA190\n000000067F0000400200008A590000DF0000-000000067F0000400200008A590000DF4000__00000038E67ABFA0\n000000067F0000400200008A590000DF0000-000000067F0000400200008A590000DF4000__0000003903F1CFE8\n000000067F0000400200008A590000DF0000-000000067F0000400200008A590000DF4000__0000003B99F7F8A0\n000000067F0000400200008A590000DF0000-000000067F0000400200008A590000DF4000__0000005D2FFFFB38\n000000067F0000400200008A590000DF0000-000000067F0000400200008A590000DF4000__00000073AD3FE6B8\n000000067F0000400200008A590000DF0000-000000067F0000400200008A590000DF4000__000000914E3F38F0\n000000067F0000400200008A590000DF0000-000000067F0000400200008A590000DF4000__000000931B9A2710\n000000067F0000400200008A590000DF4000-000000067F0000400200008A590000DF8000__0000001A2433F0F8\n000000067F0000400200008A590000DF4000-000000067F0000400200008A590000DF8000__0000001C760FA190\n000000067F0000400200008A590000DF4000-000000067F0000400200008A590000DF8000__00000038E67ABFA0\n000000067F0000400200008A590000DF4000-000000067F0000400200008A590000DF8000__0000003903F1CFE8\n000000067F0000400200008A590000DF4000-000000067F0000400200008A590000DF8000__0000003B99F7F8A0\n000000067F0000400200008A590000DF4000-000000067F0000400200008A590000DF8000__0000005D2FFFFB38\n000000067F0000400200008A590000DF4000-000000067F0000400200008A590000DF8000__00000073AD3FE6B8\n000000067F0000400200008A590000DF4000-000000067F0000400200008A590000DF8000__000000914E3F38F0\n000000067F0000400200008A590000DF4000-000000067F0000400200008A590000DF8000__000000931B9A2710\n000000067F0000400200008A590000DF5D68-000000067F0000400200008A590000DFE74A__000000196493E2E1-0000001A0443DCD9\n000000067F0000400200008A590000DF8000-000000067F0000400200008A590000DFC000__0000001A2433F0F8\n000000067F0000400200008A590000DF8000-000000067F0000400200008A590000DFC000__0000001C760FA190\n000000067F0000400200008A590000DF8000-000000067F0000400200008A590000DFC000__00000038E67ABFA0\n000000067F0000400200008A590000DF8000-000000067F0000400200008A590000DFC000__0000003903F1CFE8\n000000067F0000400200008A590000DF8000-000000067F0000400200008A590000DFC000__0000003B99F7F8A0\n000000067F0000400200008A590000DF8000-000000067F0000400200008A590000DFC000__0000005D2FFFFB38\n000000067F0000400200008A590000DF8000-000000067F0000400200008A590000DFC000__00000073AD3FE6B8\n000000067F0000400200008A590000DF8000-000000067F0000400200008A590000DFC000__000000914E3F38F0\n000000067F0000400200008A590000DF8000-000000067F0000400200008A590000DFC000__000000931B9A2710\n000000067F0000400200008A590000DFC000-000000067F0000400200008A590000E00000__0000001A2433F0F8\n000000067F0000400200008A590000DFC000-000000067F0000400200008A590000E00000__0000001C760FA190\n000000067F0000400200008A590000DFC000-000000067F0000400200008A590000E00000__00000038E67ABFA0\n000000067F0000400200008A590000DFC000-000000067F0000400200008A590000E00000__0000003903F1CFE8\n000000067F0000400200008A590000DFC000-000000067F0000400200008A590000E00000__0000003B99F7F8A0\n000000067F0000400200008A590000DFC000-000000067F0000400200008A590000E00000__0000005D2FFFFB38\n000000067F0000400200008A590000DFC000-000000067F0000400200008A590000E00000__00000073AD3FE6B8\n000000067F0000400200008A590000DFC000-000000067F0000400200008A590000E00000__000000914E3F38F0\n000000067F0000400200008A590000DFC000-000000067F0000400200008A590000E00000__000000931B9A2710\n000000067F0000400200008A590000DFE74A-000000067F0000400200008A590000E0711A__000000196493E2E1-0000001A0443DCD9\n000000067F0000400200008A590000E00000-000000067F0000400200008A590000E04000__0000001A2433F0F8\n000000067F0000400200008A590000E00000-000000067F0000400200008A590000E04000__0000001C760FA190\n000000067F0000400200008A590000E00000-000000067F0000400200008A590000E04000__00000038E67ABFA0\n000000067F0000400200008A590000E00000-000000067F0000400200008A590000E04000__0000003903F1CFE8\n000000067F0000400200008A590000E00000-000000067F0000400200008A590000E04000__0000003B99F7F8A0\n000000067F0000400200008A590000E00000-000000067F0000400200008A590000E04000__0000005D2FFFFB38\n000000067F0000400200008A590000E00000-000000067F0000400200008A590000E04000__00000073AD3FE6B8\n000000067F0000400200008A590000E00000-000000067F0000400200008A590000E04000__000000914E3F38F0\n000000067F0000400200008A590000E00000-000000067F0000400200008A590000E04000__000000931B9A2710\n000000067F0000400200008A590000E04000-000000067F0000400200008A590000E08000__0000001A2433F0F8\n000000067F0000400200008A590000E04000-000000067F0000400200008A590000E08000__0000001C760FA190\n000000067F0000400200008A590000E04000-000000067F0000400200008A590000E08000__00000038E67ABFA0\n000000067F0000400200008A590000E04000-000000067F0000400200008A590000E08000__0000003903F1CFE8\n000000067F0000400200008A590000E04000-000000067F0000400200008A590000E08000__0000003B99F7F8A0\n000000067F0000400200008A590000E04000-000000067F0000400200008A590000E08000__0000005D2FFFFB38\n000000067F0000400200008A590000E04000-000000067F0000400200008A590000E08000__00000073AD3FE6B8\n000000067F0000400200008A590000E04000-000000067F0000400200008A590000E08000__000000914E3F38F0\n000000067F0000400200008A590000E04000-000000067F0000400200008A590000E08000__000000931B9A2710\n000000067F0000400200008A590000E0711A-000000067F0000400200008A590000E0FAEF__000000196493E2E1-0000001A0443DCD9\n000000067F0000400200008A590000E08000-000000067F0000400200008A590000E0C000__0000001A2433F0F8\n000000067F0000400200008A590000E08000-000000067F0000400200008A590000E0C000__0000001C760FA190\n000000067F0000400200008A590000E08000-000000067F0000400200008A590000E0C000__00000038E67ABFA0\n000000067F0000400200008A590000E08000-000000067F0000400200008A590000E0C000__0000003903F1CFE8\n000000067F0000400200008A590000E08000-000000067F0000400200008A590000E0C000__0000003B99F7F8A0\n000000067F0000400200008A590000E08000-000000067F0000400200008A590000E0C000__0000005D2FFFFB38\n000000067F0000400200008A590000E08000-000000067F0000400200008A590000E0C000__00000073AD3FE6B8\n000000067F0000400200008A590000E08000-000000067F0000400200008A590000E0C000__000000914E3F38F0\n000000067F0000400200008A590000E08000-000000067F0000400200008A590000E0C000__000000931B9A2710\n000000067F0000400200008A590000E0C000-000000067F0000400200008A590000E10000__0000001A2433F0F8\n000000067F0000400200008A590000E0C000-000000067F0000400200008A590000E10000__0000001C760FA190\n000000067F0000400200008A590000E0C000-000000067F0000400200008A590000E10000__00000038E67ABFA0\n000000067F0000400200008A590000E0C000-000000067F0000400200008A590000E10000__0000003903F1CFE8\n000000067F0000400200008A590000E0C000-000000067F0000400200008A590000E10000__0000003B99F7F8A0\n000000067F0000400200008A590000E0C000-000000067F0000400200008A590000E10000__0000005D2FFFFB38\n000000067F0000400200008A590000E0C000-000000067F0000400200008A590000E10000__00000073AD3FE6B8\n000000067F0000400200008A590000E0C000-000000067F0000400200008A590000E10000__000000914E3F38F0\n000000067F0000400200008A590000E0C000-000000067F0000400200008A590000E10000__000000931B9A2710\n000000067F0000400200008A590000E0FAEF-000000067F0000400200008A590100000000__000000196493E2E1-0000001A0443DCD9\n000000067F0000400200008A590000E0FDBF-000000067F0000400200008A590000E1879A__0000001A0443DCD9-0000001AA3F3E569\n000000067F0000400200008A590000E10000-000000067F0000400200008A590000E14000__0000001A2433F0F8\n000000067F0000400200008A590000E10000-000000067F0000400200008A590000E14000__0000001C760FA190\n000000067F0000400200008A590000E10000-000000067F0000400200008A590000E14000__00000038E67ABFA0\n000000067F0000400200008A590000E10000-000000067F0000400200008A590000E14000__0000003903F1CFE8\n000000067F0000400200008A590000E10000-000000067F0000400200008A590000E14000__0000003B99F7F8A0\n000000067F0000400200008A590000E10000-000000067F0000400200008A590000E14000__0000005D2FFFFB38\n000000067F0000400200008A590000E10000-000000067F0000400200008A590000E14000__00000073AD3FE6B8\n000000067F0000400200008A590000E10000-000000067F0000400200008A590000E14000__000000914E3F38F0\n000000067F0000400200008A590000E10000-000000067F0000400200008A590000E14000__000000931B9A2710\n000000067F0000400200008A590000E14000-000000067F0000400200008A590000E18000__0000001A2433F0F8\n000000067F0000400200008A590000E14000-000000067F0000400200008A590000E18000__0000001C760FA190\n000000067F0000400200008A590000E14000-000000067F0000400200008A590000E18000__00000038E67ABFA0\n000000067F0000400200008A590000E14000-000000067F0000400200008A590000E18000__0000003903F1CFE8\n000000067F0000400200008A590000E14000-000000067F0000400200008A590000E18000__0000003B99F7F8A0\n000000067F0000400200008A590000E14000-000000067F0000400200008A590000E18000__0000005D2FFFFB38\n000000067F0000400200008A590000E14000-000000067F0000400200008A590000E18000__00000073AD3FE6B8\n000000067F0000400200008A590000E14000-000000067F0000400200008A590000E18000__000000914E3F38F0\n000000067F0000400200008A590000E14000-000000067F0000400200008A590000E18000__000000931B9A2710\n000000067F0000400200008A590000E18000-000000067F0000400200008A590000E1C000__0000001A2433F0F8\n000000067F0000400200008A590000E18000-000000067F0000400200008A590000E1C000__0000001C760FA190\n000000067F0000400200008A590000E18000-000000067F0000400200008A590000E1C000__00000038E67ABFA0\n000000067F0000400200008A590000E18000-000000067F0000400200008A590000E1C000__0000003903F1CFE8\n000000067F0000400200008A590000E18000-000000067F0000400200008A590000E1C000__0000003B99F7F8A0\n000000067F0000400200008A590000E18000-000000067F0000400200008A590000E1C000__0000005D2FFFFB38\n000000067F0000400200008A590000E18000-000000067F0000400200008A590000E1C000__00000073AD3FE6B8\n000000067F0000400200008A590000E18000-000000067F0000400200008A590000E1C000__000000914E3F38F0\n000000067F0000400200008A590000E18000-000000067F0000400200008A590000E1C000__000000931B9A2710\n000000067F0000400200008A590000E1879A-000000067F0000400200008A590000E2117A__0000001A0443DCD9-0000001AA3F3E569\n000000067F0000400200008A590000E1C000-000000067F0000400200008A590000E20000__0000001A2433F0F8\n000000067F0000400200008A590000E1C000-000000067F0000400200008A590000E20000__0000001C760FA190\n000000067F0000400200008A590000E1C000-000000067F0000400200008A590000E20000__00000038E67ABFA0\n000000067F0000400200008A590000E1C000-000000067F0000400200008A590000E20000__0000003903F1CFE8\n000000067F0000400200008A590000E1C000-000000067F0000400200008A590000E20000__0000003B99F7F8A0\n000000067F0000400200008A590000E1C000-000000067F0000400200008A590000E20000__0000005D2FFFFB38\n000000067F0000400200008A590000E1C000-000000067F0000400200008A590000E20000__00000073AD3FE6B8\n000000067F0000400200008A590000E1C000-000000067F0000400200008A590000E20000__000000914E3F38F0\n000000067F0000400200008A590000E1C000-000000067F0000400200008A590000E20000__000000931B9A2710\n000000067F0000400200008A590000E20000-000000067F0000400200008A590000E24000__0000001C760FA190\n000000067F0000400200008A590000E20000-000000067F0000400200008A590000E24000__00000038E67ABFA0\n000000067F0000400200008A590000E20000-000000067F0000400200008A590000E24000__0000003903F1CFE8\n000000067F0000400200008A590000E20000-000000067F0000400200008A590000E24000__0000003B99F7F8A0\n000000067F0000400200008A590000E20000-000000067F0000400200008A590000E24000__0000005D2FFFFB38\n000000067F0000400200008A590000E20000-000000067F0000400200008A590000E24000__00000073AD3FE6B8\n000000067F0000400200008A590000E20000-000000067F0000400200008A590000E24000__000000914E3F38F0\n000000067F0000400200008A590000E20000-000000067F0000400200008A590000E24000__000000931B9A2710\n000000067F0000400200008A590000E20000-030000000000000000000000000000000002__0000001A2433F0F8\n000000067F0000400200008A590000E2117A-000000067F0000400200008A590000E29B5F__0000001A0443DCD9-0000001AA3F3E569\n000000067F0000400200008A590000E24000-000000067F0000400200008A590000E28000__0000001C760FA190\n000000067F0000400200008A590000E24000-000000067F0000400200008A590000E28000__00000038E67ABFA0\n000000067F0000400200008A590000E24000-000000067F0000400200008A590000E28000__0000003903F1CFE8\n000000067F0000400200008A590000E24000-000000067F0000400200008A590000E28000__0000003B99F7F8A0\n000000067F0000400200008A590000E24000-000000067F0000400200008A590000E28000__0000005D2FFFFB38\n000000067F0000400200008A590000E24000-000000067F0000400200008A590000E28000__00000073AD3FE6B8\n000000067F0000400200008A590000E24000-000000067F0000400200008A590000E28000__000000914E3F38F0\n000000067F0000400200008A590000E24000-000000067F0000400200008A590000E28000__000000931B9A2710\n000000067F0000400200008A590000E28000-000000067F0000400200008A590000E2C000__0000001C760FA190\n000000067F0000400200008A590000E28000-000000067F0000400200008A590000E2C000__00000038E67ABFA0\n000000067F0000400200008A590000E28000-000000067F0000400200008A590000E2C000__0000003903F1CFE8\n000000067F0000400200008A590000E28000-000000067F0000400200008A590000E2C000__0000003B99F7F8A0\n000000067F0000400200008A590000E28000-000000067F0000400200008A590000E2C000__0000005D2FFFFB38\n000000067F0000400200008A590000E28000-000000067F0000400200008A590000E2C000__00000073AD3FE6B8\n000000067F0000400200008A590000E28000-000000067F0000400200008A590000E2C000__000000914E3F38F0\n000000067F0000400200008A590000E28000-000000067F0000400200008A590000E2C000__000000931B9A2710\n000000067F0000400200008A590000E29B5F-000000067F0000400200008A590000E32531__0000001A0443DCD9-0000001AA3F3E569\n000000067F0000400200008A590000E2C000-000000067F0000400200008A590000E30000__0000001C760FA190\n000000067F0000400200008A590000E2C000-000000067F0000400200008A590000E30000__00000038E67ABFA0\n000000067F0000400200008A590000E2C000-000000067F0000400200008A590000E30000__0000003903F1CFE8\n000000067F0000400200008A590000E2C000-000000067F0000400200008A590000E30000__0000003B99F7F8A0\n000000067F0000400200008A590000E2C000-000000067F0000400200008A590000E30000__0000005D2FFFFB38\n000000067F0000400200008A590000E2C000-000000067F0000400200008A590000E30000__00000073AD3FE6B8\n000000067F0000400200008A590000E2C000-000000067F0000400200008A590000E30000__000000914E3F38F0\n000000067F0000400200008A590000E2C000-000000067F0000400200008A590000E30000__000000931B9A2710\n000000067F0000400200008A590000E30000-000000067F0000400200008A590000E34000__0000001C760FA190\n000000067F0000400200008A590000E30000-000000067F0000400200008A590000E34000__00000038E67ABFA0\n000000067F0000400200008A590000E30000-000000067F0000400200008A590000E34000__0000003903F1CFE8\n000000067F0000400200008A590000E30000-000000067F0000400200008A590000E34000__0000003B99F7F8A0\n000000067F0000400200008A590000E30000-000000067F0000400200008A590000E34000__0000005D2FFFFB38\n000000067F0000400200008A590000E30000-000000067F0000400200008A590000E34000__00000073AD3FE6B8\n000000067F0000400200008A590000E30000-000000067F0000400200008A590000E34000__000000914E3F38F0\n000000067F0000400200008A590000E30000-000000067F0000400200008A590000E34000__000000931B9A2710\n000000067F0000400200008A590000E32531-000000067F0000400200008A590000E3AF0F__0000001A0443DCD9-0000001AA3F3E569\n000000067F0000400200008A590000E34000-000000067F0000400200008A590000E38000__0000001C760FA190\n000000067F0000400200008A590000E34000-000000067F0000400200008A590000E38000__00000038E67ABFA0\n000000067F0000400200008A590000E34000-000000067F0000400200008A590000E38000__0000003903F1CFE8\n000000067F0000400200008A590000E34000-000000067F0000400200008A590000E38000__0000003B99F7F8A0\n000000067F0000400200008A590000E34000-000000067F0000400200008A590000E38000__0000005D2FFFFB38\n000000067F0000400200008A590000E34000-000000067F0000400200008A590000E38000__00000073AD3FE6B8\n000000067F0000400200008A590000E34000-000000067F0000400200008A590000E38000__000000914E3F38F0\n000000067F0000400200008A590000E34000-000000067F0000400200008A590000E38000__000000931B9A2710\n000000067F0000400200008A590000E38000-000000067F0000400200008A590000E3C000__0000001C760FA190\n000000067F0000400200008A590000E38000-000000067F0000400200008A590000E3C000__00000038E67ABFA0\n000000067F0000400200008A590000E38000-000000067F0000400200008A590000E3C000__0000003903F1CFE8\n000000067F0000400200008A590000E38000-000000067F0000400200008A590000E3C000__0000003B99F7F8A0\n000000067F0000400200008A590000E38000-000000067F0000400200008A590000E3C000__0000005D2FFFFB38\n000000067F0000400200008A590000E38000-000000067F0000400200008A590000E3C000__00000073AD3FE6B8\n000000067F0000400200008A590000E38000-000000067F0000400200008A590000E3C000__000000914E3F38F0\n000000067F0000400200008A590000E38000-000000067F0000400200008A590000E3C000__000000931B9A2710\n000000067F0000400200008A590000E3AF0F-000000067F0000400200008A590000E438DB__0000001A0443DCD9-0000001AA3F3E569\n000000067F0000400200008A590000E3C000-000000067F0000400200008A590000E40000__0000001C760FA190\n000000067F0000400200008A590000E3C000-000000067F0000400200008A590000E40000__00000038E67ABFA0\n000000067F0000400200008A590000E3C000-000000067F0000400200008A590000E40000__0000003903F1CFE8\n000000067F0000400200008A590000E3C000-000000067F0000400200008A590000E40000__0000003B99F7F8A0\n000000067F0000400200008A590000E3C000-000000067F0000400200008A590000E40000__0000005D2FFFFB38\n000000067F0000400200008A590000E3C000-000000067F0000400200008A590000E40000__00000073AD3FE6B8\n000000067F0000400200008A590000E3C000-000000067F0000400200008A590000E40000__000000914E3F38F0\n000000067F0000400200008A590000E3C000-000000067F0000400200008A590000E40000__000000931B9A2710\n000000067F0000400200008A590000E40000-000000067F0000400200008A590000E44000__0000001C760FA190\n000000067F0000400200008A590000E40000-000000067F0000400200008A590000E44000__00000038E67ABFA0\n000000067F0000400200008A590000E40000-000000067F0000400200008A590000E44000__0000003903F1CFE8\n000000067F0000400200008A590000E40000-000000067F0000400200008A590000E44000__0000003B99F7F8A0\n000000067F0000400200008A590000E40000-000000067F0000400200008A590000E44000__0000005D2FFFFB38\n000000067F0000400200008A590000E40000-000000067F0000400200008A590000E44000__00000073AD3FE6B8\n000000067F0000400200008A590000E40000-000000067F0000400200008A590000E44000__000000914E3F38F0\n000000067F0000400200008A590000E40000-000000067F0000400200008A590000E44000__000000931B9A2710\n000000067F0000400200008A590000E438DB-000000067F0000400200008A590000E4C2B3__0000001A0443DCD9-0000001AA3F3E569\n000000067F0000400200008A590000E44000-000000067F0000400200008A590000E48000__0000001C760FA190\n000000067F0000400200008A590000E44000-000000067F0000400200008A590000E48000__00000038E67ABFA0\n000000067F0000400200008A590000E44000-000000067F0000400200008A590000E48000__0000003903F1CFE8\n000000067F0000400200008A590000E44000-000000067F0000400200008A590000E48000__0000003B99F7F8A0\n000000067F0000400200008A590000E44000-000000067F0000400200008A590000E48000__0000005D2FFFFB38\n000000067F0000400200008A590000E44000-000000067F0000400200008A590000E48000__00000073AD3FE6B8\n000000067F0000400200008A590000E44000-000000067F0000400200008A590000E48000__000000914E3F38F0\n000000067F0000400200008A590000E44000-000000067F0000400200008A590000E48000__000000931B9A2710\n000000067F0000400200008A590000E48000-000000067F0000400200008A590000E4C000__0000001C760FA190\n000000067F0000400200008A590000E48000-000000067F0000400200008A590000E4C000__00000038E67ABFA0\n000000067F0000400200008A590000E48000-000000067F0000400200008A590000E4C000__0000003903F1CFE8\n000000067F0000400200008A590000E48000-000000067F0000400200008A590000E4C000__0000003B99F7F8A0\n000000067F0000400200008A590000E48000-000000067F0000400200008A590000E4C000__0000005D2FFFFB38\n000000067F0000400200008A590000E48000-000000067F0000400200008A590000E4C000__00000073AD3FE6B8\n000000067F0000400200008A590000E48000-000000067F0000400200008A590000E4C000__000000914E3F38F0\n000000067F0000400200008A590000E48000-000000067F0000400200008A590000E4C000__000000931B9A2710\n000000067F0000400200008A590000E4C000-000000067F0000400200008A590000E50000__0000001C760FA190\n000000067F0000400200008A590000E4C000-000000067F0000400200008A590000E50000__00000038E67ABFA0\n000000067F0000400200008A590000E4C000-000000067F0000400200008A590000E50000__0000003903F1CFE8\n000000067F0000400200008A590000E4C000-000000067F0000400200008A590000E50000__0000003B99F7F8A0\n000000067F0000400200008A590000E4C000-000000067F0000400200008A590000E50000__0000005D2FFFFB38\n000000067F0000400200008A590000E4C000-000000067F0000400200008A590000E50000__00000073AD3FE6B8\n000000067F0000400200008A590000E4C000-000000067F0000400200008A590000E50000__000000914E3F38F0\n000000067F0000400200008A590000E4C000-000000067F0000400200008A590000E50000__000000931B9A2710\n000000067F0000400200008A590000E4C2B3-000000067F0000400200008A590000E54C98__0000001A0443DCD9-0000001AA3F3E569\n000000067F0000400200008A590000E50000-000000067F0000400200008A590000E54000__0000001C760FA190\n000000067F0000400200008A590000E50000-000000067F0000400200008A590000E54000__00000038E67ABFA0\n000000067F0000400200008A590000E50000-000000067F0000400200008A590000E54000__0000003903F1CFE8\n000000067F0000400200008A590000E50000-000000067F0000400200008A590000E54000__0000003B99F7F8A0\n000000067F0000400200008A590000E50000-000000067F0000400200008A590000E54000__0000005D2FFFFB38\n000000067F0000400200008A590000E50000-000000067F0000400200008A590000E54000__00000073AD3FE6B8\n000000067F0000400200008A590000E50000-000000067F0000400200008A590000E54000__000000914E3F38F0\n000000067F0000400200008A590000E50000-000000067F0000400200008A590000E54000__000000931B9A2710\n000000067F0000400200008A590000E54000-000000067F0000400200008A590000E58000__0000001C760FA190\n000000067F0000400200008A590000E54000-000000067F0000400200008A590000E58000__00000038E67ABFA0\n000000067F0000400200008A590000E54000-000000067F0000400200008A590000E58000__0000003903F1CFE8\n000000067F0000400200008A590000E54000-000000067F0000400200008A590000E58000__0000003B99F7F8A0\n000000067F0000400200008A590000E54000-000000067F0000400200008A590000E58000__0000005D2FFFFB38\n000000067F0000400200008A590000E54000-000000067F0000400200008A590000E58000__00000073AD3FE6B8\n000000067F0000400200008A590000E54000-000000067F0000400200008A590000E58000__000000914E3F38F0\n000000067F0000400200008A590000E54000-000000067F0000400200008A590000E58000__000000931B9A2710\n000000067F0000400200008A590000E54C98-000000067F0000400200008A590000E5D67C__0000001A0443DCD9-0000001AA3F3E569\n000000067F0000400200008A590000E58000-000000067F0000400200008A590000E5C000__0000001C760FA190\n000000067F0000400200008A590000E58000-000000067F0000400200008A590000E5C000__00000038E67ABFA0\n000000067F0000400200008A590000E58000-000000067F0000400200008A590000E5C000__0000003903F1CFE8\n000000067F0000400200008A590000E58000-000000067F0000400200008A590000E5C000__0000003B99F7F8A0\n000000067F0000400200008A590000E58000-000000067F0000400200008A590000E5C000__0000005D2FFFFB38\n000000067F0000400200008A590000E58000-000000067F0000400200008A590000E5C000__00000073AD3FE6B8\n000000067F0000400200008A590000E58000-000000067F0000400200008A590000E5C000__000000914E3F38F0\n000000067F0000400200008A590000E58000-000000067F0000400200008A590000E5C000__000000931B9A2710\n000000067F0000400200008A590000E5C000-000000067F0000400200008A590000E60000__0000001C760FA190\n000000067F0000400200008A590000E5C000-000000067F0000400200008A590000E60000__00000038E67ABFA0\n000000067F0000400200008A590000E5C000-000000067F0000400200008A590000E60000__0000003903F1CFE8\n000000067F0000400200008A590000E5C000-000000067F0000400200008A590000E60000__0000003B99F7F8A0\n000000067F0000400200008A590000E5C000-000000067F0000400200008A590000E60000__0000005D2FFFFB38\n000000067F0000400200008A590000E5C000-000000067F0000400200008A590000E60000__00000073AD3FE6B8\n000000067F0000400200008A590000E5C000-000000067F0000400200008A590000E60000__000000914E3F38F0\n000000067F0000400200008A590000E5C000-000000067F0000400200008A590000E60000__000000931B9A2710\n000000067F0000400200008A590000E5D67C-000000067F0000400200008A590000E66056__0000001A0443DCD9-0000001AA3F3E569\n000000067F0000400200008A590000E60000-000000067F0000400200008A590000E64000__0000001C760FA190\n000000067F0000400200008A590000E60000-000000067F0000400200008A590000E64000__00000038E67ABFA0\n000000067F0000400200008A590000E60000-000000067F0000400200008A590000E64000__0000003903F1CFE8\n000000067F0000400200008A590000E60000-000000067F0000400200008A590000E64000__0000003B99F7F8A0\n000000067F0000400200008A590000E60000-000000067F0000400200008A590000E64000__0000005D2FFFFB38\n000000067F0000400200008A590000E60000-000000067F0000400200008A590000E64000__00000073AD3FE6B8\n000000067F0000400200008A590000E60000-000000067F0000400200008A590000E64000__000000914E3F38F0\n000000067F0000400200008A590000E60000-000000067F0000400200008A590000E64000__000000931B9A2710\n000000067F0000400200008A590000E64000-000000067F0000400200008A590000E68000__0000001C725A2400\n000000067F0000400200008A590000E64000-000000067F0000400200008A590000E68000__0000001C760FA190\n000000067F0000400200008A590000E64000-000000067F0000400200008A590000E68000__00000038E67ABFA0\n000000067F0000400200008A590000E64000-000000067F0000400200008A590000E68000__0000003903F1CFE8\n000000067F0000400200008A590000E64000-000000067F0000400200008A590000E68000__0000003B99F7F8A0\n000000067F0000400200008A590000E64000-000000067F0000400200008A590000E68000__0000005D2FFFFB38\n000000067F0000400200008A590000E64000-000000067F0000400200008A590000E68000__00000073AD3FE6B8\n000000067F0000400200008A590000E64000-000000067F0000400200008A590000E68000__000000914E3F38F0\n000000067F0000400200008A590000E64000-000000067F0000400200008A590000E68000__000000931B9A2710\n000000067F0000400200008A590000E66056-000000067F0000400200008A590100000000__0000001A0443DCD9-0000001AA3F3E569\n000000067F0000400200008A590000E6632E-000000067F0000400200008A590000E6ED05__0000001AA3F3E569-0000001B43A3F241\n000000067F0000400200008A590000E68000-000000067F0000400200008A590000E6C000__0000001C725A2400\n000000067F0000400200008A590000E68000-000000067F0000400200008A590000E6C000__0000001C760FA190\n000000067F0000400200008A590000E68000-000000067F0000400200008A590000E6C000__00000038E67ABFA0\n000000067F0000400200008A590000E68000-000000067F0000400200008A590000E6C000__0000003903F1CFE8\n000000067F0000400200008A590000E68000-000000067F0000400200008A590000E6C000__0000003B99F7F8A0\n000000067F0000400200008A590000E68000-000000067F0000400200008A590000E6C000__0000005D2FFFFB38\n000000067F0000400200008A590000E68000-000000067F0000400200008A590000E6C000__00000073AD3FE6B8\n000000067F0000400200008A590000E68000-000000067F0000400200008A590000E6C000__000000914E3F38F0\n000000067F0000400200008A590000E68000-000000067F0000400200008A590000E6C000__000000931B9A2710\n000000067F0000400200008A590000E6C000-000000067F0000400200008A590000E70000__0000001C725A2400\n000000067F0000400200008A590000E6C000-000000067F0000400200008A590000E70000__0000001C760FA190\n000000067F0000400200008A590000E6C000-000000067F0000400200008A590000E70000__00000038E67ABFA0\n000000067F0000400200008A590000E6C000-000000067F0000400200008A590000E70000__0000003903F1CFE8\n000000067F0000400200008A590000E6C000-000000067F0000400200008A590000E70000__0000003B99F7F8A0\n000000067F0000400200008A590000E6C000-000000067F0000400200008A590000E70000__0000005D2FFFFB38\n000000067F0000400200008A590000E6C000-000000067F0000400200008A590000E70000__00000073AD3FE6B8\n000000067F0000400200008A590000E6C000-000000067F0000400200008A590000E70000__000000914E3F38F0\n000000067F0000400200008A590000E6C000-000000067F0000400200008A590000E70000__000000931B9A2710\n000000067F0000400200008A590000E6ED05-000000067F0000400200008A590000E776E1__0000001AA3F3E569-0000001B43A3F241\n000000067F0000400200008A590000E70000-000000067F0000400200008A590000E74000__0000001C725A2400\n000000067F0000400200008A590000E70000-000000067F0000400200008A590000E74000__0000001C760FA190\n000000067F0000400200008A590000E70000-000000067F0000400200008A590000E74000__00000038E67ABFA0\n000000067F0000400200008A590000E70000-000000067F0000400200008A590000E74000__0000003903F1CFE8\n000000067F0000400200008A590000E70000-000000067F0000400200008A590000E74000__0000003B99F7F8A0\n000000067F0000400200008A590000E70000-000000067F0000400200008A590000E74000__0000005D2FFFFB38\n000000067F0000400200008A590000E70000-000000067F0000400200008A590000E74000__00000073AD3FE6B8\n000000067F0000400200008A590000E70000-000000067F0000400200008A590000E74000__000000914E3F38F0\n000000067F0000400200008A590000E70000-000000067F0000400200008A590000E74000__000000931B9A2710\n000000067F0000400200008A590000E74000-000000067F0000400200008A590000E78000__0000001C725A2400\n000000067F0000400200008A590000E74000-000000067F0000400200008A590000E78000__0000001C760FA190\n000000067F0000400200008A590000E74000-000000067F0000400200008A590000E78000__00000038E67ABFA0\n000000067F0000400200008A590000E74000-000000067F0000400200008A590000E78000__0000003903F1CFE8\n000000067F0000400200008A590000E74000-000000067F0000400200008A590000E78000__0000003B99F7F8A0\n000000067F0000400200008A590000E74000-000000067F0000400200008A590000E78000__0000005D2FFFFB38\n000000067F0000400200008A590000E74000-000000067F0000400200008A590000E78000__00000073AD3FE6B8\n000000067F0000400200008A590000E74000-000000067F0000400200008A590000E78000__000000914E3F38F0\n000000067F0000400200008A590000E74000-000000067F0000400200008A590000E78000__000000931B9A2710\n000000067F0000400200008A590000E776E1-000000067F0000400200008A590000E800BC__0000001AA3F3E569-0000001B43A3F241\n000000067F0000400200008A590000E78000-000000067F0000400200008A590000E7C000__0000001C725A2400\n000000067F0000400200008A590000E78000-000000067F0000400200008A590000E7C000__0000001C760FA190\n000000067F0000400200008A590000E78000-000000067F0000400200008A590000E7C000__00000038E67ABFA0\n000000067F0000400200008A590000E78000-000000067F0000400200008A590000E7C000__0000003903F1CFE8\n000000067F0000400200008A590000E78000-000000067F0000400200008A590000E7C000__0000003B99F7F8A0\n000000067F0000400200008A590000E78000-000000067F0000400200008A590000E7C000__0000005D2FFFFB38\n000000067F0000400200008A590000E78000-000000067F0000400200008A590000E7C000__00000073AD3FE6B8\n000000067F0000400200008A590000E78000-000000067F0000400200008A590000E7C000__000000914E3F38F0\n000000067F0000400200008A590000E78000-000000067F0000400200008A590000E7C000__000000931B9A2710\n000000067F0000400200008A590000E7C000-000000067F0000400200008A590000E80000__0000001C725A2400\n000000067F0000400200008A590000E7C000-000000067F0000400200008A590000E80000__0000001C760FA190\n000000067F0000400200008A590000E7C000-000000067F0000400200008A590000E80000__00000038E67ABFA0\n000000067F0000400200008A590000E7C000-000000067F0000400200008A590000E80000__0000003903F1CFE8\n000000067F0000400200008A590000E7C000-000000067F0000400200008A590000E80000__0000003B99F7F8A0\n000000067F0000400200008A590000E7C000-000000067F0000400200008A590000E80000__0000005D2FFFFB38\n000000067F0000400200008A590000E7C000-000000067F0000400200008A590000E80000__00000073AD3FE6B8\n000000067F0000400200008A590000E7C000-000000067F0000400200008A590000E80000__000000914E3F38F0\n000000067F0000400200008A590000E7C000-000000067F0000400200008A590000E80000__000000931B9A2710\n000000067F0000400200008A590000E80000-000000067F0000400200008A590000E84000__0000001C725A2400\n000000067F0000400200008A590000E80000-000000067F0000400200008A590000E84000__0000001C760FA190\n000000067F0000400200008A590000E80000-000000067F0000400200008A590000E84000__00000038E67ABFA0\n000000067F0000400200008A590000E80000-000000067F0000400200008A590000E84000__0000003903F1CFE8\n000000067F0000400200008A590000E80000-000000067F0000400200008A590000E84000__0000003B99F7F8A0\n000000067F0000400200008A590000E80000-000000067F0000400200008A590000E84000__0000005D2FFFFB38\n000000067F0000400200008A590000E80000-000000067F0000400200008A590000E84000__00000073AD3FE6B8\n000000067F0000400200008A590000E80000-000000067F0000400200008A590000E84000__000000914E3F38F0\n000000067F0000400200008A590000E80000-000000067F0000400200008A590000E84000__000000931B9A2710\n000000067F0000400200008A590000E800BC-000000067F0000400200008A590000E88A9D__0000001AA3F3E569-0000001B43A3F241\n000000067F0000400200008A590000E84000-000000067F0000400200008A590000E88000__0000001C725A2400\n000000067F0000400200008A590000E84000-000000067F0000400200008A590000E88000__0000001C760FA190\n000000067F0000400200008A590000E84000-000000067F0000400200008A590000E88000__00000038E67ABFA0\n000000067F0000400200008A590000E84000-000000067F0000400200008A590000E88000__0000003903F1CFE8\n000000067F0000400200008A590000E84000-000000067F0000400200008A590000E88000__0000003B99F7F8A0\n000000067F0000400200008A590000E84000-000000067F0000400200008A590000E88000__0000005D2FFFFB38\n000000067F0000400200008A590000E84000-000000067F0000400200008A590000E88000__00000073AD3FE6B8\n000000067F0000400200008A590000E84000-000000067F0000400200008A590000E88000__000000914E3F38F0\n000000067F0000400200008A590000E84000-000000067F0000400200008A590000E88000__000000931B9A2710\n000000067F0000400200008A590000E88000-000000067F0000400200008A590000E8C000__0000001C725A2400\n000000067F0000400200008A590000E88000-000000067F0000400200008A590000E8C000__0000001C760FA190\n000000067F0000400200008A590000E88000-000000067F0000400200008A590000E8C000__00000038E67ABFA0\n000000067F0000400200008A590000E88000-000000067F0000400200008A590000E8C000__0000003903F1CFE8\n000000067F0000400200008A590000E88000-000000067F0000400200008A590000E8C000__0000003B99F7F8A0\n000000067F0000400200008A590000E88000-000000067F0000400200008A590000E8C000__0000005D2FFFFB38\n000000067F0000400200008A590000E88000-000000067F0000400200008A590000E8C000__00000073AD3FE6B8\n000000067F0000400200008A590000E88000-000000067F0000400200008A590000E8C000__000000914E3F38F0\n000000067F0000400200008A590000E88000-000000067F0000400200008A590000E8C000__000000931B9A2710\n000000067F0000400200008A590000E88A9D-000000067F0000400200008A590000E91484__0000001AA3F3E569-0000001B43A3F241\n000000067F0000400200008A590000E8C000-000000067F0000400200008A590000E90000__0000001C725A2400\n000000067F0000400200008A590000E8C000-000000067F0000400200008A590000E90000__0000001C760FA190\n000000067F0000400200008A590000E8C000-000000067F0000400200008A590000E90000__00000038E67ABFA0\n000000067F0000400200008A590000E8C000-000000067F0000400200008A590000E90000__0000003903F1CFE8\n000000067F0000400200008A590000E8C000-000000067F0000400200008A590000E90000__0000003B99F7F8A0\n000000067F0000400200008A590000E8C000-000000067F0000400200008A590000E90000__0000005D2FFFFB38\n000000067F0000400200008A590000E8C000-000000067F0000400200008A590000E90000__00000073AD3FE6B8\n000000067F0000400200008A590000E8C000-000000067F0000400200008A590000E90000__000000914E3F38F0\n000000067F0000400200008A590000E8C000-000000067F0000400200008A590000E90000__000000931B9A2710\n000000067F0000400200008A590000E90000-000000067F0000400200008A590000E94000__0000001C725A2400\n000000067F0000400200008A590000E90000-000000067F0000400200008A590000E94000__0000001C760FA190\n000000067F0000400200008A590000E90000-000000067F0000400200008A590000E94000__00000038E67ABFA0\n000000067F0000400200008A590000E90000-000000067F0000400200008A590000E94000__0000003903F1CFE8\n000000067F0000400200008A590000E90000-000000067F0000400200008A590000E94000__0000003B99F7F8A0\n000000067F0000400200008A590000E90000-000000067F0000400200008A590000E94000__0000005D2FFFFB38\n000000067F0000400200008A590000E90000-000000067F0000400200008A590000E94000__00000073AD3FE6B8\n000000067F0000400200008A590000E90000-000000067F0000400200008A590000E94000__000000914E3F38F0\n000000067F0000400200008A590000E90000-000000067F0000400200008A590000E94000__000000931B9A2710\n000000067F0000400200008A590000E91484-000000067F0000400200008A590000E99E65__0000001AA3F3E569-0000001B43A3F241\n000000067F0000400200008A590000E94000-000000067F0000400200008A590000E98000__0000001C725A2400\n000000067F0000400200008A590000E94000-000000067F0000400200008A590000E98000__0000001C760FA190\n000000067F0000400200008A590000E94000-000000067F0000400200008A590000E98000__00000038E67ABFA0\n000000067F0000400200008A590000E94000-000000067F0000400200008A590000E98000__0000003903F1CFE8\n000000067F0000400200008A590000E94000-000000067F0000400200008A590000E98000__0000003B99F7F8A0\n000000067F0000400200008A590000E94000-000000067F0000400200008A590000E98000__0000005D2FFFFB38\n000000067F0000400200008A590000E94000-000000067F0000400200008A590000E98000__00000073AD3FE6B8\n000000067F0000400200008A590000E94000-000000067F0000400200008A590000E98000__000000914E3F38F0\n000000067F0000400200008A590000E94000-000000067F0000400200008A590000E98000__000000931B9A2710\n000000067F0000400200008A590000E98000-000000067F0000400200008A590000E9C000__0000001C725A2400\n000000067F0000400200008A590000E98000-000000067F0000400200008A590000E9C000__0000001C760FA190\n000000067F0000400200008A590000E98000-000000067F0000400200008A590000E9C000__00000038E67ABFA0\n000000067F0000400200008A590000E98000-000000067F0000400200008A590000E9C000__0000003903F1CFE8\n000000067F0000400200008A590000E98000-000000067F0000400200008A590000E9C000__0000003B99F7F8A0\n000000067F0000400200008A590000E98000-000000067F0000400200008A590000E9C000__0000005D2FFFFB38\n000000067F0000400200008A590000E98000-000000067F0000400200008A590000E9C000__00000073AD3FE6B8\n000000067F0000400200008A590000E98000-000000067F0000400200008A590000E9C000__000000914E3F38F0\n000000067F0000400200008A590000E98000-000000067F0000400200008A590000E9C000__000000931B9A2710\n000000067F0000400200008A590000E99E65-000000067F0000400200008A590000EA2841__0000001AA3F3E569-0000001B43A3F241\n000000067F0000400200008A590000E9C000-000000067F0000400200008A590000EA0000__0000001C725A2400\n000000067F0000400200008A590000E9C000-000000067F0000400200008A590000EA0000__0000001C760FA190\n000000067F0000400200008A590000E9C000-000000067F0000400200008A590000EA0000__00000038E67ABFA0\n000000067F0000400200008A590000E9C000-000000067F0000400200008A590000EA0000__0000003903F1CFE8\n000000067F0000400200008A590000E9C000-000000067F0000400200008A590000EA0000__0000003B99F7F8A0\n000000067F0000400200008A590000E9C000-000000067F0000400200008A590000EA0000__0000005D2FFFFB38\n000000067F0000400200008A590000E9C000-000000067F0000400200008A590000EA0000__00000073AD3FE6B8\n000000067F0000400200008A590000E9C000-000000067F0000400200008A590000EA0000__000000914E3F38F0\n000000067F0000400200008A590000E9C000-000000067F0000400200008A590000EA0000__000000931B9A2710\n000000067F0000400200008A590000EA0000-000000067F0000400200008A590000EA4000__0000001C725A2400\n000000067F0000400200008A590000EA0000-000000067F0000400200008A590000EA4000__0000001C760FA190\n000000067F0000400200008A590000EA0000-000000067F0000400200008A590000EA4000__00000038E67ABFA0\n000000067F0000400200008A590000EA0000-000000067F0000400200008A590000EA4000__0000003903F1CFE8\n000000067F0000400200008A590000EA0000-000000067F0000400200008A590000EA4000__0000003B99F7F8A0\n000000067F0000400200008A590000EA0000-000000067F0000400200008A590000EA4000__0000005D2FFFFB38\n000000067F0000400200008A590000EA0000-000000067F0000400200008A590000EA4000__00000073AD3FE6B8\n000000067F0000400200008A590000EA0000-000000067F0000400200008A590000EA4000__000000914E3F38F0\n000000067F0000400200008A590000EA0000-000000067F0000400200008A590000EA4000__000000931B9A2710\n000000067F0000400200008A590000EA2841-000000067F0000400200008A590000EAB20E__0000001AA3F3E569-0000001B43A3F241\n000000067F0000400200008A590000EA4000-000000067F0000400200008A590000EA8000__0000001C725A2400\n000000067F0000400200008A590000EA4000-000000067F0000400200008A590000EA8000__0000001C760FA190\n000000067F0000400200008A590000EA4000-000000067F0000400200008A590000EA8000__00000038E67ABFA0\n000000067F0000400200008A590000EA4000-000000067F0000400200008A590000EA8000__0000003903F1CFE8\n000000067F0000400200008A590000EA4000-000000067F0000400200008A590000EA8000__0000003B99F7F8A0\n000000067F0000400200008A590000EA4000-000000067F0000400200008A590000EA8000__0000005D2FFFFB38\n000000067F0000400200008A590000EA4000-000000067F0000400200008A590000EA8000__00000073AD3FE6B8\n000000067F0000400200008A590000EA4000-000000067F0000400200008A590000EA8000__000000914E3F38F0\n000000067F0000400200008A590000EA4000-000000067F0000400200008A590000EA8000__000000931B9A2710\n000000067F0000400200008A590000EA8000-000000067F0000400200008A590000EAC000__0000001C725A2400\n000000067F0000400200008A590000EA8000-000000067F0000400200008A590000EAC000__0000001C760FA190\n000000067F0000400200008A590000EA8000-000000067F0000400200008A590000EAC000__00000038E67ABFA0\n000000067F0000400200008A590000EA8000-000000067F0000400200008A590000EAC000__0000003903F1CFE8\n000000067F0000400200008A590000EA8000-000000067F0000400200008A590000EAC000__0000003B99F7F8A0\n000000067F0000400200008A590000EA8000-000000067F0000400200008A590000EAC000__0000005D2FFFFB38\n000000067F0000400200008A590000EA8000-000000067F0000400200008A590000EAC000__00000073AD3FE6B8\n000000067F0000400200008A590000EA8000-000000067F0000400200008A590000EAC000__000000914E3F38F0\n000000067F0000400200008A590000EA8000-000000067F0000400200008A590000EAC000__000000931B9A2710\n000000067F0000400200008A590000EAB20E-000000067F0000400200008A590000EB3BEC__0000001AA3F3E569-0000001B43A3F241\n000000067F0000400200008A590000EAC000-000000067F0000400200008A590000EB0000__0000001C725A2400\n000000067F0000400200008A590000EAC000-000000067F0000400200008A590000EB0000__0000001C760FA190\n000000067F0000400200008A590000EAC000-000000067F0000400200008A590000EB0000__00000038E67ABFA0\n000000067F0000400200008A590000EAC000-000000067F0000400200008A590000EB0000__0000003903F1CFE8\n000000067F0000400200008A590000EAC000-000000067F0000400200008A590000EB0000__0000003B99F7F8A0\n000000067F0000400200008A590000EAC000-000000067F0000400200008A590000EB0000__0000005D2FFFFB38\n000000067F0000400200008A590000EAC000-000000067F0000400200008A590000EB0000__00000073AD3FE6B8\n000000067F0000400200008A590000EAC000-000000067F0000400200008A590000EB0000__000000914E3F38F0\n000000067F0000400200008A590000EAC000-000000067F0000400200008A590000EB0000__000000931B9A2710\n000000067F0000400200008A590000EB0000-000000067F0000400200008A590000EB4000__0000001C725A2400\n000000067F0000400200008A590000EB0000-000000067F0000400200008A590000EB4000__0000001C760FA190\n000000067F0000400200008A590000EB0000-000000067F0000400200008A590000EB4000__00000038E67ABFA0\n000000067F0000400200008A590000EB0000-000000067F0000400200008A590000EB4000__0000003903F1CFE8\n000000067F0000400200008A590000EB0000-000000067F0000400200008A590000EB4000__0000003B99F7F8A0\n000000067F0000400200008A590000EB0000-000000067F0000400200008A590000EB4000__0000005D2FFFFB38\n000000067F0000400200008A590000EB0000-000000067F0000400200008A590000EB4000__00000073AD3FE6B8\n000000067F0000400200008A590000EB0000-000000067F0000400200008A590000EB4000__000000914E3F38F0\n000000067F0000400200008A590000EB0000-000000067F0000400200008A590000EB4000__000000931B9A2710\n000000067F0000400200008A590000EB3BEC-000000067F0000400200008A590000EBC5C4__0000001AA3F3E569-0000001B43A3F241\n000000067F0000400200008A590000EB4000-000000067F0000400200008A590000EB8000__0000001C725A2400\n000000067F0000400200008A590000EB4000-000000067F0000400200008A590000EB8000__0000001C760FA190\n000000067F0000400200008A590000EB4000-000000067F0000400200008A590000EB8000__00000038E67ABFA0\n000000067F0000400200008A590000EB4000-000000067F0000400200008A590000EB8000__0000003903F1CFE8\n000000067F0000400200008A590000EB4000-000000067F0000400200008A590000EB8000__0000003B99F7F8A0\n000000067F0000400200008A590000EB4000-000000067F0000400200008A590000EB8000__0000005D2FFFFB38\n000000067F0000400200008A590000EB4000-000000067F0000400200008A590000EB8000__00000073AD3FE6B8\n000000067F0000400200008A590000EB4000-000000067F0000400200008A590000EB8000__000000914E3F38F0\n000000067F0000400200008A590000EB4000-000000067F0000400200008A590000EB8000__000000931B9A2710\n000000067F0000400200008A590000EB8000-000000067F0000400200008A590000EBC000__0000001C725A2400\n000000067F0000400200008A590000EB8000-000000067F0000400200008A590000EBC000__0000001C760FA190\n000000067F0000400200008A590000EB8000-000000067F0000400200008A590000EBC000__00000038E67ABFA0\n000000067F0000400200008A590000EB8000-000000067F0000400200008A590000EBC000__0000003903F1CFE8\n000000067F0000400200008A590000EB8000-000000067F0000400200008A590000EBC000__0000003B99F7F8A0\n000000067F0000400200008A590000EB8000-000000067F0000400200008A590000EBC000__0000005D2FFFFB38\n000000067F0000400200008A590000EB8000-000000067F0000400200008A590000EBC000__00000073AD3FE6B8\n000000067F0000400200008A590000EB8000-000000067F0000400200008A590000EBC000__000000914E3F38F0\n000000067F0000400200008A590000EB8000-000000067F0000400200008A590000EBC000__000000931B9A2710\n000000067F0000400200008A590000EBC000-000000067F0000400200008A590000EC0000__0000001C046BD098\n000000067F0000400200008A590000EBC000-000000067F0000400200008A590000EC0000__0000001C760FA190\n000000067F0000400200008A590000EBC000-000000067F0000400200008A590000EC0000__00000038E67ABFA0\n000000067F0000400200008A590000EBC000-000000067F0000400200008A590000EC0000__0000003903F1CFE8\n000000067F0000400200008A590000EBC000-000000067F0000400200008A590000EC0000__0000003B99F7F8A0\n000000067F0000400200008A590000EBC000-000000067F0000400200008A590000EC0000__0000005D2FFFFB38\n000000067F0000400200008A590000EBC000-000000067F0000400200008A590000EC0000__00000073AD3FE6B8\n000000067F0000400200008A590000EBC000-000000067F0000400200008A590000EC0000__000000914E3F38F0\n000000067F0000400200008A590000EBC000-000000067F0000400200008A590000EC0000__000000931B9A2710\n000000067F0000400200008A590000EBC5C4-000000067F0000400200008A590100000000__0000001AA3F3E569-0000001B43A3F241\n000000067F0000400200008A590000EBC8A4-000000067F0000400200008A590000EC527C__0000001B43A3F241-0000001BE353E181\n000000067F0000400200008A590000EC0000-000000067F0000400200008A590000EC4000__0000001C046BD098\n000000067F0000400200008A590000EC0000-000000067F0000400200008A590000EC4000__0000001C760FA190\n000000067F0000400200008A590000EC0000-000000067F0000400200008A590000EC4000__00000038E67ABFA0\n000000067F0000400200008A590000EC0000-000000067F0000400200008A590000EC4000__0000003903F1CFE8\n000000067F0000400200008A590000EC0000-000000067F0000400200008A590000EC4000__0000003B99F7F8A0\n000000067F0000400200008A590000EC0000-000000067F0000400200008A590000EC4000__0000005D2FFFFB38\n000000067F0000400200008A590000EC0000-000000067F0000400200008A590000EC4000__00000073AD3FE6B8\n000000067F0000400200008A590000EC0000-000000067F0000400200008A590000EC4000__000000914E3F38F0\n000000067F0000400200008A590000EC0000-000000067F0000400200008A590000EC4000__000000931B9A2710\n000000067F0000400200008A590000EC4000-000000067F0000400200008A590000EC8000__0000001C046BD098\n000000067F0000400200008A590000EC4000-000000067F0000400200008A590000EC8000__0000001C760FA190\n000000067F0000400200008A590000EC4000-000000067F0000400200008A590000EC8000__00000038E67ABFA0\n000000067F0000400200008A590000EC4000-000000067F0000400200008A590000EC8000__0000003903F1CFE8\n000000067F0000400200008A590000EC4000-000000067F0000400200008A590000EC8000__0000003B99F7F8A0\n000000067F0000400200008A590000EC4000-000000067F0000400200008A590000EC8000__0000005D2FFFFB38\n000000067F0000400200008A590000EC4000-000000067F0000400200008A590000EC8000__00000073AD3FE6B8\n000000067F0000400200008A590000EC4000-000000067F0000400200008A590000EC8000__000000914E3F38F0\n000000067F0000400200008A590000EC4000-000000067F0000400200008A590000EC8000__000000931B9A2710\n000000067F0000400200008A590000EC527C-000000067F0000400200008A590000ECDC5F__0000001B43A3F241-0000001BE353E181\n000000067F0000400200008A590000EC8000-000000067F0000400200008A590000ECC000__0000001C046BD098\n000000067F0000400200008A590000EC8000-000000067F0000400200008A590000ECC000__0000001C760FA190\n000000067F0000400200008A590000EC8000-000000067F0000400200008A590000ECC000__00000038E67ABFA0\n000000067F0000400200008A590000EC8000-000000067F0000400200008A590000ECC000__0000003903F1CFE8\n000000067F0000400200008A590000EC8000-000000067F0000400200008A590000ECC000__0000003B99F7F8A0\n000000067F0000400200008A590000EC8000-000000067F0000400200008A590000ECC000__0000005D2FFFFB38\n000000067F0000400200008A590000EC8000-000000067F0000400200008A590000ECC000__00000073AD3FE6B8\n000000067F0000400200008A590000EC8000-000000067F0000400200008A590000ECC000__000000914E3F38F0\n000000067F0000400200008A590000EC8000-000000067F0000400200008A590000ECC000__000000931B9A2710\n000000067F0000400200008A590000ECC000-000000067F0000400200008A590000ED0000__0000001C046BD098\n000000067F0000400200008A590000ECC000-000000067F0000400200008A590000ED0000__0000001C760FA190\n000000067F0000400200008A590000ECC000-000000067F0000400200008A590000ED0000__00000038E67ABFA0\n000000067F0000400200008A590000ECC000-000000067F0000400200008A590000ED0000__0000003903F1CFE8\n000000067F0000400200008A590000ECC000-000000067F0000400200008A590000ED0000__0000003B99F7F8A0\n000000067F0000400200008A590000ECC000-000000067F0000400200008A590000ED0000__0000005D2FFFFB38\n000000067F0000400200008A590000ECC000-000000067F0000400200008A590000ED0000__00000073AD3FE6B8\n000000067F0000400200008A590000ECC000-000000067F0000400200008A590000ED0000__000000914E3F38F0\n000000067F0000400200008A590000ECC000-000000067F0000400200008A590000ED0000__000000931B9A2710\n000000067F0000400200008A590000ECDC5F-000000067F0000400200008A590000ED663C__0000001B43A3F241-0000001BE353E181\n000000067F0000400200008A590000ED0000-000000067F0000400200008A590000ED4000__0000001C046BD098\n000000067F0000400200008A590000ED0000-000000067F0000400200008A590000ED4000__0000001C760FA190\n000000067F0000400200008A590000ED0000-000000067F0000400200008A590000ED4000__00000038E67ABFA0\n000000067F0000400200008A590000ED0000-000000067F0000400200008A590000ED4000__0000003903F1CFE8\n000000067F0000400200008A590000ED0000-000000067F0000400200008A590000ED4000__0000003B99F7F8A0\n000000067F0000400200008A590000ED0000-000000067F0000400200008A590000ED4000__0000005D2FFFFB38\n000000067F0000400200008A590000ED0000-000000067F0000400200008A590000ED4000__00000073AD3FE6B8\n000000067F0000400200008A590000ED0000-000000067F0000400200008A590000ED4000__000000914E3F38F0\n000000067F0000400200008A590000ED0000-000000067F0000400200008A590000ED4000__000000931B9A2710\n000000067F0000400200008A590000ED4000-000000067F0000400200008A590000ED8000__0000001C046BD098\n000000067F0000400200008A590000ED4000-000000067F0000400200008A590000ED8000__0000001C760FA190\n000000067F0000400200008A590000ED4000-000000067F0000400200008A590000ED8000__00000038E67ABFA0\n000000067F0000400200008A590000ED4000-000000067F0000400200008A590000ED8000__0000003903F1CFE8\n000000067F0000400200008A590000ED4000-000000067F0000400200008A590000ED8000__0000003B99F7F8A0\n000000067F0000400200008A590000ED4000-000000067F0000400200008A590000ED8000__0000005D2FFFFB38\n000000067F0000400200008A590000ED4000-000000067F0000400200008A590000ED8000__00000073AD3FE6B8\n000000067F0000400200008A590000ED4000-000000067F0000400200008A590000ED8000__000000914E3F38F0\n000000067F0000400200008A590000ED4000-000000067F0000400200008A590000ED8000__000000931B9A2710\n000000067F0000400200008A590000ED663C-000000067F0000400200008A590000EDF017__0000001B43A3F241-0000001BE353E181\n000000067F0000400200008A590000ED8000-000000067F0000400200008A590000EDC000__0000001C046BD098\n000000067F0000400200008A590000ED8000-000000067F0000400200008A590000EDC000__0000001C760FA190\n000000067F0000400200008A590000ED8000-000000067F0000400200008A590000EDC000__00000038E67ABFA0\n000000067F0000400200008A590000ED8000-000000067F0000400200008A590000EDC000__0000003903F1CFE8\n000000067F0000400200008A590000ED8000-000000067F0000400200008A590000EDC000__0000003B99F7F8A0\n000000067F0000400200008A590000ED8000-000000067F0000400200008A590000EDC000__0000005D2FFFFB38\n000000067F0000400200008A590000ED8000-000000067F0000400200008A590000EDC000__00000073AD3FE6B8\n000000067F0000400200008A590000ED8000-000000067F0000400200008A590000EDC000__000000914E3F38F0\n000000067F0000400200008A590000ED8000-000000067F0000400200008A590000EDC000__000000931B9A2710\n000000067F0000400200008A590000EDC000-000000067F0000400200008A590000EE0000__0000001C046BD098\n000000067F0000400200008A590000EDC000-000000067F0000400200008A590000EE0000__0000001C760FA190\n000000067F0000400200008A590000EDC000-000000067F0000400200008A590000EE0000__00000038E67ABFA0\n000000067F0000400200008A590000EDC000-000000067F0000400200008A590000EE0000__0000003903F1CFE8\n000000067F0000400200008A590000EDC000-000000067F0000400200008A590000EE0000__0000003B99F7F8A0\n000000067F0000400200008A590000EDC000-000000067F0000400200008A590000EE0000__0000005D2FFFFB38\n000000067F0000400200008A590000EDC000-000000067F0000400200008A590000EE0000__00000073AD3FE6B8\n000000067F0000400200008A590000EDC000-000000067F0000400200008A590000EE0000__000000914E3F38F0\n000000067F0000400200008A590000EDC000-000000067F0000400200008A590000EE0000__000000931B9A2710\n000000067F0000400200008A590000EDF017-000000067F0000400200008A590000EE79E6__0000001B43A3F241-0000001BE353E181\n000000067F0000400200008A590000EE0000-000000067F0000400200008A590000EE4000__0000001C046BD098\n000000067F0000400200008A590000EE0000-000000067F0000400200008A590000EE4000__0000001C760FA190\n000000067F0000400200008A590000EE0000-000000067F0000400200008A590000EE4000__00000038E67ABFA0\n000000067F0000400200008A590000EE0000-000000067F0000400200008A590000EE4000__0000003903F1CFE8\n000000067F0000400200008A590000EE0000-000000067F0000400200008A590000EE4000__0000003B99F7F8A0\n000000067F0000400200008A590000EE0000-000000067F0000400200008A590000EE4000__0000005D2FFFFB38\n000000067F0000400200008A590000EE0000-000000067F0000400200008A590000EE4000__00000073AD3FE6B8\n000000067F0000400200008A590000EE0000-000000067F0000400200008A590000EE4000__000000914E3F38F0\n000000067F0000400200008A590000EE0000-000000067F0000400200008A590000EE4000__000000931B9A2710\n000000067F0000400200008A590000EE4000-000000067F0000400200008A590000EE8000__0000001C046BD098\n000000067F0000400200008A590000EE4000-000000067F0000400200008A590000EE8000__0000001C760FA190\n000000067F0000400200008A590000EE4000-000000067F0000400200008A590000EE8000__00000038E67ABFA0\n000000067F0000400200008A590000EE4000-000000067F0000400200008A590000EE8000__0000003903F1CFE8\n000000067F0000400200008A590000EE4000-000000067F0000400200008A590000EE8000__0000003B99F7F8A0\n000000067F0000400200008A590000EE4000-000000067F0000400200008A590000EE8000__0000005D2FFFFB38\n000000067F0000400200008A590000EE4000-000000067F0000400200008A590000EE8000__00000073AD3FE6B8\n000000067F0000400200008A590000EE4000-000000067F0000400200008A590000EE8000__000000914E3F38F0\n000000067F0000400200008A590000EE4000-000000067F0000400200008A590000EE8000__000000931B9A2710\n000000067F0000400200008A590000EE79E6-000000067F0000400200008A590000EF03CB__0000001B43A3F241-0000001BE353E181\n000000067F0000400200008A590000EE8000-000000067F0000400200008A590000EEC000__0000001C046BD098\n000000067F0000400200008A590000EE8000-000000067F0000400200008A590000EEC000__0000001C760FA190\n000000067F0000400200008A590000EE8000-000000067F0000400200008A590000EEC000__00000038E67ABFA0\n000000067F0000400200008A590000EE8000-000000067F0000400200008A590000EEC000__0000003903F1CFE8\n000000067F0000400200008A590000EE8000-000000067F0000400200008A590000EEC000__0000003B99F7F8A0\n000000067F0000400200008A590000EE8000-000000067F0000400200008A590000EEC000__0000005D2FFFFB38\n000000067F0000400200008A590000EE8000-000000067F0000400200008A590000EEC000__00000073AD3FE6B8\n000000067F0000400200008A590000EE8000-000000067F0000400200008A590000EEC000__000000914E3F38F0\n000000067F0000400200008A590000EE8000-000000067F0000400200008A590000EEC000__000000931B9A2710\n000000067F0000400200008A590000EEC000-000000067F0000400200008A590000EF0000__0000001C046BD098\n000000067F0000400200008A590000EEC000-000000067F0000400200008A590000EF0000__0000001C760FA190\n000000067F0000400200008A590000EEC000-000000067F0000400200008A590000EF0000__00000038E67ABFA0\n000000067F0000400200008A590000EEC000-000000067F0000400200008A590000EF0000__0000003903F1CFE8\n000000067F0000400200008A590000EEC000-000000067F0000400200008A590000EF0000__0000003B99F7F8A0\n000000067F0000400200008A590000EEC000-000000067F0000400200008A590000EF0000__0000005D2FFFFB38\n000000067F0000400200008A590000EEC000-000000067F0000400200008A590000EF0000__00000073AD3FE6B8\n000000067F0000400200008A590000EEC000-000000067F0000400200008A590000EF0000__000000914E3F38F0\n000000067F0000400200008A590000EEC000-000000067F0000400200008A590000EF0000__000000931B9A2710\n000000067F0000400200008A590000EF0000-000000067F0000400200008A590000EF4000__0000001C046BD098\n000000067F0000400200008A590000EF0000-000000067F0000400200008A590000EF4000__0000001C760FA190\n000000067F0000400200008A590000EF0000-000000067F0000400200008A590000EF4000__00000038E67ABFA0\n000000067F0000400200008A590000EF0000-000000067F0000400200008A590000EF4000__0000003903F1CFE8\n000000067F0000400200008A590000EF0000-000000067F0000400200008A590000EF4000__0000003B99F7F8A0\n000000067F0000400200008A590000EF0000-000000067F0000400200008A590000EF4000__0000005D2FFFFB38\n000000067F0000400200008A590000EF0000-000000067F0000400200008A590000EF4000__00000073AD3FE6B8\n000000067F0000400200008A590000EF0000-000000067F0000400200008A590000EF4000__000000914E3F38F0\n000000067F0000400200008A590000EF0000-000000067F0000400200008A590000EF4000__000000931B9A2710\n000000067F0000400200008A590000EF03CB-000000067F0000400200008A590000EF8DAC__0000001B43A3F241-0000001BE353E181\n000000067F0000400200008A590000EF4000-000000067F0000400200008A590000EF8000__0000001C046BD098\n000000067F0000400200008A590000EF4000-000000067F0000400200008A590000EF8000__0000001C760FA190\n000000067F0000400200008A590000EF4000-000000067F0000400200008A590000EF8000__00000038E67ABFA0\n000000067F0000400200008A590000EF4000-000000067F0000400200008A590000EF8000__0000003903F1CFE8\n000000067F0000400200008A590000EF4000-000000067F0000400200008A590000EF8000__0000003B99F7F8A0\n000000067F0000400200008A590000EF4000-000000067F0000400200008A590000EF8000__0000005D2FFFFB38\n000000067F0000400200008A590000EF4000-000000067F0000400200008A590000EF8000__00000073AD3FE6B8\n000000067F0000400200008A590000EF4000-000000067F0000400200008A590000EF8000__000000914E3F38F0\n000000067F0000400200008A590000EF4000-000000067F0000400200008A590000EF8000__000000931B9A2710\n000000067F0000400200008A590000EF8000-000000067F0000400200008A590000EFC000__0000001C046BD098\n000000067F0000400200008A590000EF8000-000000067F0000400200008A590000EFC000__0000001C760FA190\n000000067F0000400200008A590000EF8000-000000067F0000400200008A590000EFC000__00000038E67ABFA0\n000000067F0000400200008A590000EF8000-000000067F0000400200008A590000EFC000__0000003903F1CFE8\n000000067F0000400200008A590000EF8000-000000067F0000400200008A590000EFC000__0000003B99F7F8A0\n000000067F0000400200008A590000EF8000-000000067F0000400200008A590000EFC000__0000005D2FFFFB38\n000000067F0000400200008A590000EF8000-000000067F0000400200008A590000EFC000__00000073AD3FE6B8\n000000067F0000400200008A590000EF8000-000000067F0000400200008A590000EFC000__000000914E3F38F0\n000000067F0000400200008A590000EF8000-000000067F0000400200008A590000EFC000__000000931B9A2710\n000000067F0000400200008A590000EF8DAC-000000067F0000400200008A590000F01798__0000001B43A3F241-0000001BE353E181\n000000067F0000400200008A590000EFC000-000000067F0000400200008A590000F00000__0000001C046BD098\n000000067F0000400200008A590000EFC000-000000067F0000400200008A590000F00000__0000001C760FA190\n000000067F0000400200008A590000EFC000-000000067F0000400200008A590000F00000__00000038E67ABFA0\n000000067F0000400200008A590000EFC000-000000067F0000400200008A590000F00000__0000003903F1CFE8\n000000067F0000400200008A590000EFC000-000000067F0000400200008A590000F00000__0000003B99F7F8A0\n000000067F0000400200008A590000EFC000-000000067F0000400200008A590000F00000__0000005D2FFFFB38\n000000067F0000400200008A590000EFC000-000000067F0000400200008A590000F00000__00000073AD3FE6B8\n000000067F0000400200008A590000EFC000-000000067F0000400200008A590000F00000__000000914E3F38F0\n000000067F0000400200008A590000EFC000-000000067F0000400200008A590000F00000__000000931B9A2710\n000000067F0000400200008A590000F00000-000000067F0000400200008A590000F04000__0000001C046BD098\n000000067F0000400200008A590000F00000-000000067F0000400200008A590000F04000__0000001C760FA190\n000000067F0000400200008A590000F00000-000000067F0000400200008A590000F04000__00000038E67ABFA0\n000000067F0000400200008A590000F00000-000000067F0000400200008A590000F04000__0000003903F1CFE8\n000000067F0000400200008A590000F00000-000000067F0000400200008A590000F04000__0000003B99F7F8A0\n000000067F0000400200008A590000F00000-000000067F0000400200008A590000F04000__0000005D2FFFFB38\n000000067F0000400200008A590000F00000-000000067F0000400200008A590000F04000__00000073AD3FE6B8\n000000067F0000400200008A590000F00000-000000067F0000400200008A590000F04000__000000914E3F38F0\n000000067F0000400200008A590000F00000-000000067F0000400200008A590000F04000__000000931B9A2710\n000000067F0000400200008A590000F01798-000000067F0000400200008A590000F0A18F__0000001B43A3F241-0000001BE353E181\n000000067F0000400200008A590000F04000-000000067F0000400200008A590000F08000__0000001C046BD098\n000000067F0000400200008A590000F04000-000000067F0000400200008A590000F08000__0000001C760FA190\n000000067F0000400200008A590000F04000-000000067F0000400200008A590000F08000__00000038E67ABFA0\n000000067F0000400200008A590000F04000-000000067F0000400200008A590000F08000__0000003903F1CFE8\n000000067F0000400200008A590000F04000-000000067F0000400200008A590000F08000__0000003B99F7F8A0\n000000067F0000400200008A590000F04000-000000067F0000400200008A590000F08000__0000005D2FFFFB38\n000000067F0000400200008A590000F04000-000000067F0000400200008A590000F08000__00000073AD3FE6B8\n000000067F0000400200008A590000F04000-000000067F0000400200008A590000F08000__000000914E3F38F0\n000000067F0000400200008A590000F04000-000000067F0000400200008A590000F08000__000000931B9A2710\n000000067F0000400200008A590000F08000-000000067F0000400200008A590000F0C000__0000001C046BD098\n000000067F0000400200008A590000F08000-000000067F0000400200008A590000F0C000__0000001C760FA190\n000000067F0000400200008A590000F08000-000000067F0000400200008A590000F0C000__00000038E67ABFA0\n000000067F0000400200008A590000F08000-000000067F0000400200008A590000F0C000__0000003903F1CFE8\n000000067F0000400200008A590000F08000-000000067F0000400200008A590000F0C000__0000003B99F7F8A0\n000000067F0000400200008A590000F08000-000000067F0000400200008A590000F0C000__0000005D2FFFFB38\n000000067F0000400200008A590000F08000-000000067F0000400200008A590000F0C000__00000073AD3FE6B8\n000000067F0000400200008A590000F08000-000000067F0000400200008A590000F0C000__000000914E3F38F0\n000000067F0000400200008A590000F08000-000000067F0000400200008A590000F0C000__000000931B9A2710\n000000067F0000400200008A590000F0A18F-000000067F0000400200008A590000F12B69__0000001B43A3F241-0000001BE353E181\n000000067F0000400200008A590000F0C000-000000067F0000400200008A590000F10000__0000001C046BD098\n000000067F0000400200008A590000F0C000-000000067F0000400200008A590000F10000__0000001C760FA190\n000000067F0000400200008A590000F0C000-000000067F0000400200008A590000F10000__00000038E67ABFA0\n000000067F0000400200008A590000F0C000-000000067F0000400200008A590000F10000__0000003903F1CFE8\n000000067F0000400200008A590000F0C000-000000067F0000400200008A590000F10000__0000003B99F7F8A0\n000000067F0000400200008A590000F0C000-000000067F0000400200008A590000F10000__0000005D2FFFFB38\n000000067F0000400200008A590000F0C000-000000067F0000400200008A590000F10000__00000073AD3FE6B8\n000000067F0000400200008A590000F0C000-000000067F0000400200008A590000F10000__000000914E3F38F0\n000000067F0000400200008A590000F0C000-000000067F0000400200008A590000F10000__000000931B9A2710\n000000067F0000400200008A590000F10000-000000067F0000400200008A590000F14000__0000001C046BD098\n000000067F0000400200008A590000F10000-000000067F0000400200008A590000F14000__0000001C760FA190\n000000067F0000400200008A590000F10000-000000067F0000400200008A590000F14000__00000038E67ABFA0\n000000067F0000400200008A590000F10000-000000067F0000400200008A590000F14000__0000003903F1CFE8\n000000067F0000400200008A590000F10000-000000067F0000400200008A590000F14000__0000003B99F7F8A0\n000000067F0000400200008A590000F10000-000000067F0000400200008A590000F14000__0000005D2FFFFB38\n000000067F0000400200008A590000F10000-000000067F0000400200008A590000F14000__00000073AD3FE6B8\n000000067F0000400200008A590000F10000-000000067F0000400200008A590000F14000__000000914E3F38F0\n000000067F0000400200008A590000F10000-000000067F0000400200008A590000F14000__000000931B9A2710\n000000067F0000400200008A590000F12B69-000000067F0000400200008A590100000000__0000001B43A3F241-0000001BE353E181\n000000067F0000400200008A590000F14000-000000067F0000400200008A590000F18000__0000001C046BD098\n000000067F0000400200008A590000F14000-000000067F0000400200008A590000F18000__0000001C760FA190\n000000067F0000400200008A590000F14000-000000067F0000400200008A590000F18000__00000038E67ABFA0\n000000067F0000400200008A590000F14000-000000067F0000400200008A590000F18000__0000003903F1CFE8\n000000067F0000400200008A590000F14000-000000067F0000400200008A590000F18000__0000003B99F7F8A0\n000000067F0000400200008A590000F14000-000000067F0000400200008A590000F18000__0000005D2FFFFB38\n000000067F0000400200008A590000F14000-000000067F0000400200008A590000F18000__00000073AD3FE6B8\n000000067F0000400200008A590000F14000-000000067F0000400200008A590000F18000__000000914E3F38F0\n000000067F0000400200008A590000F14000-000000067F0000400200008A590000F18000__000000931B9A2710\n000000067F0000400200008A590000F18000-000000067F0000400200008A590000F1C000__0000001C046BD098\n000000067F0000400200008A590000F18000-000000067F0000400200008A590000F1C000__0000001C760FA190\n000000067F0000400200008A590000F18000-000000067F0000400200008A590000F1C000__00000038E67ABFA0\n000000067F0000400200008A590000F18000-000000067F0000400200008A590000F1C000__0000003903F1CFE8\n000000067F0000400200008A590000F18000-000000067F0000400200008A590000F1C000__0000003B99F7F8A0\n000000067F0000400200008A590000F18000-000000067F0000400200008A590000F1C000__0000005D2FFFFB38\n000000067F0000400200008A590000F18000-000000067F0000400200008A590000F1C000__00000073AD3FE6B8\n000000067F0000400200008A590000F18000-000000067F0000400200008A590000F1C000__000000914E3F38F0\n000000067F0000400200008A590000F18000-000000067F0000400200008A590000F1C000__000000931B9A2710\n000000067F0000400200008A590000F1B7DD-000000067F0000400200008A590000F241C2__0000001BE353E181-0000001C725A5929\n000000067F0000400200008A590000F1C000-000000067F0000400200008A590000F20000__0000001C046BD098\n000000067F0000400200008A590000F1C000-000000067F0000400200008A590000F20000__0000001C760FA190\n000000067F0000400200008A590000F1C000-000000067F0000400200008A590000F20000__00000038E67ABFA0\n000000067F0000400200008A590000F1C000-000000067F0000400200008A590000F20000__0000003903F1CFE8\n000000067F0000400200008A590000F1C000-000000067F0000400200008A590000F20000__0000003B99F7F8A0\n000000067F0000400200008A590000F1C000-000000067F0000400200008A590000F20000__0000005D2FFFFB38\n000000067F0000400200008A590000F1C000-000000067F0000400200008A590000F20000__00000073AD3FE6B8\n000000067F0000400200008A590000F1C000-000000067F0000400200008A590000F20000__000000914E3F38F0\n000000067F0000400200008A590000F1C000-000000067F0000400200008A590000F20000__000000931B9A2710\n000000067F0000400200008A590000F20000-000000067F0000400200008A590000F24000__0000001C046BD098\n000000067F0000400200008A590000F20000-000000067F0000400200008A590000F24000__0000001C760FA190\n000000067F0000400200008A590000F20000-000000067F0000400200008A590000F24000__00000038E67ABFA0\n000000067F0000400200008A590000F20000-000000067F0000400200008A590000F24000__0000003903F1CFE8\n000000067F0000400200008A590000F20000-000000067F0000400200008A590000F24000__0000003B99F7F8A0\n000000067F0000400200008A590000F20000-000000067F0000400200008A590000F24000__0000005D2FFFFB38\n000000067F0000400200008A590000F20000-000000067F0000400200008A590000F24000__00000073AD3FE6B8\n000000067F0000400200008A590000F20000-000000067F0000400200008A590000F24000__000000914E3F38F0\n000000067F0000400200008A590000F20000-000000067F0000400200008A590000F24000__000000931B9A2710\n000000067F0000400200008A590000F24000-000000067F0000400200008A590000F28000__0000001C760FA190\n000000067F0000400200008A590000F24000-000000067F0000400200008A590000F28000__00000038E67ABFA0\n000000067F0000400200008A590000F24000-000000067F0000400200008A590000F28000__0000003903F1CFE8\n000000067F0000400200008A590000F24000-000000067F0000400200008A590000F28000__0000003B99F7F8A0\n000000067F0000400200008A590000F24000-000000067F0000400200008A590000F28000__0000005D2FFFFB38\n000000067F0000400200008A590000F24000-000000067F0000400200008A590000F28000__00000073AD3FE6B8\n000000067F0000400200008A590000F24000-000000067F0000400200008A590000F28000__000000914E3F38F0\n000000067F0000400200008A590000F24000-000000067F0000400200008A590000F28000__000000931B9A2710\n000000067F0000400200008A590000F24000-030000000000000000000000000000000002__0000001C046BD098\n000000067F0000400200008A590000F241C2-000000067F0000400200008A590000F2CBA0__0000001BE353E181-0000001C725A5929\n000000067F0000400200008A590000F28000-000000067F0000400200008A590000F2C000__0000001C760FA190\n000000067F0000400200008A590000F28000-000000067F0000400200008A590000F2C000__00000038E67ABFA0\n000000067F0000400200008A590000F28000-000000067F0000400200008A590000F2C000__0000003903F1CFE8\n000000067F0000400200008A590000F28000-000000067F0000400200008A590000F2C000__0000003B99F7F8A0\n000000067F0000400200008A590000F28000-000000067F0000400200008A590000F2C000__0000005D2FFFFB38\n000000067F0000400200008A590000F28000-000000067F0000400200008A590000F2C000__00000073AD3FE6B8\n000000067F0000400200008A590000F28000-000000067F0000400200008A590000F2C000__000000914E3F38F0\n000000067F0000400200008A590000F28000-000000067F0000400200008A590000F2C000__000000931B9A2710\n000000067F0000400200008A590000F2C000-000000067F0000400200008A590000F30000__0000001C760FA190\n000000067F0000400200008A590000F2C000-000000067F0000400200008A590000F30000__00000038E67ABFA0\n000000067F0000400200008A590000F2C000-000000067F0000400200008A590000F30000__0000003903F1CFE8\n000000067F0000400200008A590000F2C000-000000067F0000400200008A590000F30000__0000003B99F7F8A0\n000000067F0000400200008A590000F2C000-000000067F0000400200008A590000F30000__0000005D2FFFFB38\n000000067F0000400200008A590000F2C000-000000067F0000400200008A590000F30000__00000073AD3FE6B8\n000000067F0000400200008A590000F2C000-000000067F0000400200008A590000F30000__000000914E3F38F0\n000000067F0000400200008A590000F2C000-000000067F0000400200008A590000F30000__000000931B9A2710\n000000067F0000400200008A590000F2CBA0-000000067F0000400200008A590000F35584__0000001BE353E181-0000001C725A5929\n000000067F0000400200008A590000F30000-000000067F0000400200008A590000F34000__0000001C760FA190\n000000067F0000400200008A590000F30000-000000067F0000400200008A590000F34000__00000038E67ABFA0\n000000067F0000400200008A590000F30000-000000067F0000400200008A590000F34000__0000003903F1CFE8\n000000067F0000400200008A590000F30000-000000067F0000400200008A590000F34000__0000003B99F7F8A0\n000000067F0000400200008A590000F30000-000000067F0000400200008A590000F34000__0000005D2FFFFB38\n000000067F0000400200008A590000F30000-000000067F0000400200008A590000F34000__00000073AD3FE6B8\n000000067F0000400200008A590000F30000-000000067F0000400200008A590000F34000__000000914E3F38F0\n000000067F0000400200008A590000F30000-000000067F0000400200008A590000F34000__000000931B9A2710\n000000067F0000400200008A590000F34000-000000067F0000400200008A590000F38000__0000001C760FA190\n000000067F0000400200008A590000F34000-000000067F0000400200008A590000F38000__00000038E67ABFA0\n000000067F0000400200008A590000F34000-000000067F0000400200008A590000F38000__0000003903F1CFE8\n000000067F0000400200008A590000F34000-000000067F0000400200008A590000F38000__0000003B99F7F8A0\n000000067F0000400200008A590000F34000-000000067F0000400200008A590000F38000__0000005D2FFFFB38\n000000067F0000400200008A590000F34000-000000067F0000400200008A590000F38000__00000073AD3FE6B8\n000000067F0000400200008A590000F34000-000000067F0000400200008A590000F38000__000000914E3F38F0\n000000067F0000400200008A590000F34000-000000067F0000400200008A590000F38000__000000931B9A2710\n000000067F0000400200008A590000F35584-000000067F0000400200008A590000F3DF5E__0000001BE353E181-0000001C725A5929\n000000067F0000400200008A590000F38000-000000067F0000400200008A590000F3C000__0000001C760FA190\n000000067F0000400200008A590000F38000-000000067F0000400200008A590000F3C000__00000038E67ABFA0\n000000067F0000400200008A590000F38000-000000067F0000400200008A590000F3C000__0000003903F1CFE8\n000000067F0000400200008A590000F38000-000000067F0000400200008A590000F3C000__0000003B99F7F8A0\n000000067F0000400200008A590000F38000-000000067F0000400200008A590000F3C000__0000005D2FFFFB38\n000000067F0000400200008A590000F38000-000000067F0000400200008A590000F3C000__00000073AD3FE6B8\n000000067F0000400200008A590000F38000-000000067F0000400200008A590000F3C000__000000914E3F38F0\n000000067F0000400200008A590000F38000-000000067F0000400200008A590000F3C000__000000931B9A2710\n000000067F0000400200008A590000F3C000-000000067F0000400200008A590000F40000__0000001C760FA190\n000000067F0000400200008A590000F3C000-000000067F0000400200008A590000F40000__00000038E67ABFA0\n000000067F0000400200008A590000F3C000-000000067F0000400200008A590000F40000__0000003903F1CFE8\n000000067F0000400200008A590000F3C000-000000067F0000400200008A590000F40000__0000003B99F7F8A0\n000000067F0000400200008A590000F3C000-000000067F0000400200008A590000F40000__0000005D2FFFFB38\n000000067F0000400200008A590000F3C000-000000067F0000400200008A590000F40000__00000073AD3FE6B8\n000000067F0000400200008A590000F3C000-000000067F0000400200008A590000F40000__000000914E3F38F0\n000000067F0000400200008A590000F3C000-000000067F0000400200008A590000F40000__000000931B9A2710\n000000067F0000400200008A590000F3DF5E-000000067F0000400200008A590000F46935__0000001BE353E181-0000001C725A5929\n000000067F0000400200008A590000F40000-000000067F0000400200008A590000F44000__0000001C760FA190\n000000067F0000400200008A590000F40000-000000067F0000400200008A590000F44000__00000038E67ABFA0\n000000067F0000400200008A590000F40000-000000067F0000400200008A590000F44000__0000003903F1CFE8\n000000067F0000400200008A590000F40000-000000067F0000400200008A590000F44000__0000003B99F7F8A0\n000000067F0000400200008A590000F40000-000000067F0000400200008A590000F44000__0000005D2FFFFB38\n000000067F0000400200008A590000F40000-000000067F0000400200008A590000F44000__00000073AD3FE6B8\n000000067F0000400200008A590000F40000-000000067F0000400200008A590000F44000__000000914E3F38F0\n000000067F0000400200008A590000F40000-000000067F0000400200008A590000F44000__000000931B9A2710\n000000067F0000400200008A590000F44000-000000067F0000400200008A590000F48000__0000001C760FA190\n000000067F0000400200008A590000F44000-000000067F0000400200008A590000F48000__00000038E67ABFA0\n000000067F0000400200008A590000F44000-000000067F0000400200008A590000F48000__0000003903F1CFE8\n000000067F0000400200008A590000F44000-000000067F0000400200008A590000F48000__0000003B99F7F8A0\n000000067F0000400200008A590000F44000-000000067F0000400200008A590000F48000__0000005D2FFFFB38\n000000067F0000400200008A590000F44000-000000067F0000400200008A590000F48000__00000073AD3FE6B8\n000000067F0000400200008A590000F44000-000000067F0000400200008A590000F48000__000000914E3F38F0\n000000067F0000400200008A590000F44000-000000067F0000400200008A590000F48000__000000931B9A2710\n000000067F0000400200008A590000F46935-000000067F0000400200008A590000F4F30D__0000001BE353E181-0000001C725A5929\n000000067F0000400200008A590000F48000-000000067F0000400200008A590000F4C000__0000001C760FA190\n000000067F0000400200008A590000F48000-000000067F0000400200008A590000F4C000__00000038E67ABFA0\n000000067F0000400200008A590000F48000-000000067F0000400200008A590000F4C000__0000003903F1CFE8\n000000067F0000400200008A590000F48000-000000067F0000400200008A590000F4C000__0000003B99F7F8A0\n000000067F0000400200008A590000F48000-000000067F0000400200008A590000F4C000__0000005D2FFFFB38\n000000067F0000400200008A590000F48000-000000067F0000400200008A590000F4C000__00000073AD3FE6B8\n000000067F0000400200008A590000F48000-000000067F0000400200008A590000F4C000__000000914E3F38F0\n000000067F0000400200008A590000F48000-000000067F0000400200008A590000F4C000__000000931B9A2710\n000000067F0000400200008A590000F4C000-000000067F0000400200008A590000F50000__0000001C760FA190\n000000067F0000400200008A590000F4C000-000000067F0000400200008A590000F50000__00000038E67ABFA0\n000000067F0000400200008A590000F4C000-000000067F0000400200008A590000F50000__0000003903F1CFE8\n000000067F0000400200008A590000F4C000-000000067F0000400200008A590000F50000__0000003B99F7F8A0\n000000067F0000400200008A590000F4C000-000000067F0000400200008A590000F50000__0000005D2FFFFB38\n000000067F0000400200008A590000F4C000-000000067F0000400200008A590000F50000__00000073AD3FE6B8\n000000067F0000400200008A590000F4C000-000000067F0000400200008A590000F50000__000000914E3F38F0\n000000067F0000400200008A590000F4C000-000000067F0000400200008A590000F50000__000000931B9A2710\n000000067F0000400200008A590000F4F30D-000000067F0000400200008A590000F57CE5__0000001BE353E181-0000001C725A5929\n000000067F0000400200008A590000F50000-000000067F0000400200008A590000F54000__0000001C760FA190\n000000067F0000400200008A590000F50000-000000067F0000400200008A590000F54000__00000038E67ABFA0\n000000067F0000400200008A590000F50000-000000067F0000400200008A590000F54000__0000003903F1CFE8\n000000067F0000400200008A590000F50000-000000067F0000400200008A590000F54000__0000003B99F7F8A0\n000000067F0000400200008A590000F50000-000000067F0000400200008A590000F54000__0000005D2FFFFB38\n000000067F0000400200008A590000F50000-000000067F0000400200008A590000F54000__00000073AD3FE6B8\n000000067F0000400200008A590000F50000-000000067F0000400200008A590000F54000__000000914E3F38F0\n000000067F0000400200008A590000F50000-000000067F0000400200008A590000F54000__000000931B9A2710\n000000067F0000400200008A590000F54000-000000067F0000400200008A590000F58000__0000001C760FA190\n000000067F0000400200008A590000F54000-000000067F0000400200008A590000F58000__00000038E67ABFA0\n000000067F0000400200008A590000F54000-000000067F0000400200008A590000F58000__0000003903F1CFE8\n000000067F0000400200008A590000F54000-000000067F0000400200008A590000F58000__0000003B99F7F8A0\n000000067F0000400200008A590000F54000-000000067F0000400200008A590000F58000__0000005D2FFFFB38\n000000067F0000400200008A590000F54000-000000067F0000400200008A590000F58000__00000073AD3FE6B8\n000000067F0000400200008A590000F54000-000000067F0000400200008A590000F58000__000000914E3F38F0\n000000067F0000400200008A590000F54000-000000067F0000400200008A590000F58000__000000931B9A2710\n000000067F0000400200008A590000F57CE5-000000067F0000400200008A590000F60351__0000001BE353E181-0000001C725A5929\n000000067F0000400200008A590000F58000-000000067F0000400200008A590000F5C000__0000001C760FA190\n000000067F0000400200008A590000F58000-000000067F0000400200008A590000F5C000__00000038E67ABFA0\n000000067F0000400200008A590000F58000-000000067F0000400200008A590000F5C000__0000003903F1CFE8\n000000067F0000400200008A590000F58000-000000067F0000400200008A590000F5C000__0000003B99F7F8A0\n000000067F0000400200008A590000F58000-000000067F0000400200008A590000F5C000__0000005D2FFFFB38\n000000067F0000400200008A590000F58000-000000067F0000400200008A590000F5C000__00000073AD3FE6B8\n000000067F0000400200008A590000F58000-000000067F0000400200008A590000F5C000__000000914E3F38F0\n000000067F0000400200008A590000F58000-000000067F0000400200008A590000F5C000__000000931B9A2710\n000000067F0000400200008A590000F5C000-000000067F0000400200008A590000F60000__0000001C760FA190\n000000067F0000400200008A590000F5C000-000000067F0000400200008A590000F60000__00000038E67ABFA0\n000000067F0000400200008A590000F5C000-000000067F0000400200008A590000F60000__0000003903F1CFE8\n000000067F0000400200008A590000F5C000-000000067F0000400200008A590000F60000__0000003B99F7F8A0\n000000067F0000400200008A590000F5C000-000000067F0000400200008A590000F60000__0000005D2FFFFB38\n000000067F0000400200008A590000F5C000-000000067F0000400200008A590000F60000__00000073AD3FE6B8\n000000067F0000400200008A590000F5C000-000000067F0000400200008A590000F60000__000000914E3F38F0\n000000067F0000400200008A590000F5C000-000000067F0000400200008A590000F60000__000000931B9A2710\n000000067F0000400200008A590000F60000-000000067F0000400200008A5E0100000000__00000038E67ABFA0\n000000067F0000400200008A590000F60000-000000067F0000400200008A5E0100000000__0000003903F1CFE8\n000000067F0000400200008A590000F60000-000000067F0000400200008A5E0100000000__0000003B99F7F8A0\n000000067F0000400200008A590000F60000-000000067F0000400200008A5E0100000000__0000005D2FFFFB38\n000000067F0000400200008A590000F60000-000000067F000040020000A0080100000000__00000073AD3FE6B8\n000000067F0000400200008A590000F60000-000000067F000040020000A0080100000000__000000914E3F38F0\n000000067F0000400200008A590000F60000-000000067F000040020000A0080100000000__000000931B9A2710\n000000067F0000400200008A590000F60000-030000000000000000000000000000000002__0000001C760FA190\n000000067F0000400200008A5900FFFFFFFF-000000067F0000400200008A5E0100000000__0000001BE353E181-0000001C725A5929\n000000067F000040020000A0000000000000-000000067F000040020000A0000000004000__00000038E67ABFA0\n000000067F000040020000A0000000000000-000000067F000040020000A0000000004000__0000003903F1CFE8\n000000067F000040020000A0000000000000-000000067F000040020000A0000000004000__0000003B99F7F8A0\n000000067F000040020000A0000000000000-000000067F000040020000A0000000004000__0000005D2FFFFB38\n000000067F000040020000A0000000004000-000000067F000040020000A0000000008000__00000038E67ABFA0\n000000067F000040020000A0000000004000-000000067F000040020000A0000000008000__0000003903F1CFE8\n000000067F000040020000A0000000004000-000000067F000040020000A0000000008000__0000003B99F7F8A0\n000000067F000040020000A0000000004000-000000067F000040020000A0000000008000__0000005D2FFFFB38\n000000067F000040020000A0000000008000-000000067F000040020000A000000000C000__00000038E67ABFA0\n000000067F000040020000A0000000008000-000000067F000040020000A000000000C000__0000003903F1CFE8\n000000067F000040020000A0000000008000-000000067F000040020000A000000000C000__0000003B99F7F8A0\n000000067F000040020000A0000000008000-000000067F000040020000A000000000C000__0000005D2FFFFB38\n000000067F000040020000A0000000008989-000000067F000040020000A0000000011373__0000001C725D0191-0000002070591C61\n000000067F000040020000A000000000C000-000000067F000040020000A0000000010000__00000038E67ABFA0\n000000067F000040020000A000000000C000-000000067F000040020000A0000000010000__0000003903F1CFE8\n000000067F000040020000A000000000C000-000000067F000040020000A0000000010000__0000003B99F7F8A0\n000000067F000040020000A000000000C000-000000067F000040020000A0000000010000__0000005D2FFFFB38\n000000067F000040020000A0000000010000-000000067F000040020000A0000000014000__00000038E67ABFA0\n000000067F000040020000A0000000010000-000000067F000040020000A0000000014000__0000003903F1CFE8\n000000067F000040020000A0000000010000-000000067F000040020000A0000000014000__0000003B99F7F8A0\n000000067F000040020000A0000000010000-000000067F000040020000A0000000014000__0000005D2FFFFB38\n000000067F000040020000A0000000011373-000000067F000040020000A0000000019D77__0000001C725D0191-0000002070591C61\n000000067F000040020000A0000000014000-000000067F000040020000A0000000018000__00000038E67ABFA0\n000000067F000040020000A0000000014000-000000067F000040020000A0000000018000__0000003903F1CFE8\n000000067F000040020000A0000000014000-000000067F000040020000A0000000018000__0000003B99F7F8A0\n000000067F000040020000A0000000014000-000000067F000040020000A0000000018000__0000005D2FFFFB38\n000000067F000040020000A0000000018000-000000067F000040020000A000000001C000__00000038E67ABFA0\n000000067F000040020000A0000000018000-000000067F000040020000A000000001C000__0000003903F1CFE8\n000000067F000040020000A0000000018000-000000067F000040020000A000000001C000__0000003B99F7F8A0\n000000067F000040020000A0000000018000-000000067F000040020000A000000001C000__0000005D2FFFFB38\n000000067F000040020000A0000000019D77-000000067F000040020000A000000002276E__0000001C725D0191-0000002070591C61\n000000067F000040020000A000000001C000-000000067F000040020000A0000000020000__00000038E67ABFA0\n000000067F000040020000A000000001C000-000000067F000040020000A0000000020000__0000003903F1CFE8\n000000067F000040020000A000000001C000-000000067F000040020000A0000000020000__0000003B99F7F8A0\n000000067F000040020000A000000001C000-000000067F000040020000A0000000020000__0000005D2FFFFB38\n000000067F000040020000A0000000020000-000000067F000040020000A0000000024000__00000038E67ABFA0\n000000067F000040020000A0000000020000-000000067F000040020000A0000000024000__0000003903F1CFE8\n000000067F000040020000A0000000020000-000000067F000040020000A0000000024000__0000003B99F7F8A0\n000000067F000040020000A0000000020000-000000067F000040020000A0000000024000__0000005D2FFFFB38\n000000067F000040020000A000000002276E-000000067F000040020000A000000002B152__0000001C725D0191-0000002070591C61\n000000067F000040020000A0000000024000-000000067F000040020000A0000000028000__00000038E67ABFA0\n000000067F000040020000A0000000024000-000000067F000040020000A0000000028000__0000003903F1CFE8\n000000067F000040020000A0000000024000-000000067F000040020000A0000000028000__0000003B99F7F8A0\n000000067F000040020000A0000000024000-000000067F000040020000A0000000028000__0000005D2FFFFB38\n000000067F000040020000A0000000028000-000000067F000040020000A000000002C000__00000038E67ABFA0\n000000067F000040020000A0000000028000-000000067F000040020000A000000002C000__0000003903F1CFE8\n000000067F000040020000A0000000028000-000000067F000040020000A000000002C000__0000003B99F7F8A0\n000000067F000040020000A0000000028000-000000067F000040020000A000000002C000__0000005D2FFFFB38\n000000067F000040020000A000000002B152-000000067F000040020000A0000000033B1C__0000001C725D0191-0000002070591C61\n000000067F000040020000A000000002C000-000000067F000040020000A0000000030000__00000038E67ABFA0\n000000067F000040020000A000000002C000-000000067F000040020000A0000000030000__0000003903F1CFE8\n000000067F000040020000A000000002C000-000000067F000040020000A0000000030000__0000003B99F7F8A0\n000000067F000040020000A000000002C000-000000067F000040020000A0000000030000__0000005D2FFFFB38\n000000067F000040020000A0000000030000-000000067F000040020000A0000000034000__00000038E67ABFA0\n000000067F000040020000A0000000030000-000000067F000040020000A0000000034000__0000003903F1CFE8\n000000067F000040020000A0000000030000-000000067F000040020000A0000000034000__0000003B99F7F8A0\n000000067F000040020000A0000000030000-000000067F000040020000A0000000034000__0000005D2FFFFB38\n000000067F000040020000A0000000033B1C-000000067F000040020000A000000003C4CA__0000001C725D0191-0000002070591C61\n000000067F000040020000A0000000034000-000000067F000040020000A0000000038000__00000038E67ABFA0\n000000067F000040020000A0000000034000-000000067F000040020000A0000000038000__0000003903F1CFE8\n000000067F000040020000A0000000034000-000000067F000040020000A0000000038000__0000003B99F7F8A0\n000000067F000040020000A0000000034000-000000067F000040020000A0000000038000__0000005D2FFFFB38\n000000067F000040020000A0000000038000-000000067F000040020000A000000003C000__00000038E67ABFA0\n000000067F000040020000A0000000038000-000000067F000040020000A000000003C000__0000003903F1CFE8\n000000067F000040020000A0000000038000-000000067F000040020000A000000003C000__0000003B99F7F8A0\n000000067F000040020000A0000000038000-000000067F000040020000A000000003C000__0000005D2FFFFB38\n000000067F000040020000A000000003C000-000000067F000040020000A0000000040000__00000038E67ABFA0\n000000067F000040020000A000000003C000-000000067F000040020000A0000000040000__0000003903F1CFE8\n000000067F000040020000A000000003C000-000000067F000040020000A0000000040000__0000003B99F7F8A0\n000000067F000040020000A000000003C000-000000067F000040020000A0000000040000__0000005D2FFFFB38\n000000067F000040020000A000000003C4CA-000000067F000040020000A0000000044E8B__0000001C725D0191-0000002070591C61\n000000067F000040020000A0000000040000-000000067F000040020000A0000000044000__00000038E67ABFA0\n000000067F000040020000A0000000040000-000000067F000040020000A0000000044000__0000003903F1CFE8\n000000067F000040020000A0000000040000-000000067F000040020000A0000000044000__0000003B99F7F8A0\n000000067F000040020000A0000000040000-000000067F000040020000A0000000044000__0000005D2FFFFB38\n000000067F000040020000A0000000044000-000000067F000040020000A0000000048000__00000038E67ABFA0\n000000067F000040020000A0000000044000-000000067F000040020000A0000000048000__0000003903F1CFE8\n000000067F000040020000A0000000044000-000000067F000040020000A0000000048000__0000003B99F7F8A0\n000000067F000040020000A0000000044000-000000067F000040020000A0000000048000__0000005D2FFFFB38\n000000067F000040020000A0000000044E8B-000000067F000040020000A000000004D882__0000001C725D0191-0000002070591C61\n000000067F000040020000A0000000048000-000000067F000040020000A000000004C000__00000038E67ABFA0\n000000067F000040020000A0000000048000-000000067F000040020000A000000004C000__0000003903F1CFE8\n000000067F000040020000A0000000048000-000000067F000040020000A000000004C000__0000003B99F7F8A0\n000000067F000040020000A0000000048000-000000067F000040020000A000000004C000__0000005D2FFFFB38\n000000067F000040020000A000000004C000-000000067F000040020000A0000000050000__00000038E67ABFA0\n000000067F000040020000A000000004C000-000000067F000040020000A0000000050000__0000003903F1CFE8\n000000067F000040020000A000000004C000-000000067F000040020000A0000000050000__0000003B99F7F8A0\n000000067F000040020000A000000004C000-000000067F000040020000A0000000050000__0000005D2FFFFB38\n000000067F000040020000A000000004D882-000000067F000040020000A0000000056278__0000001C725D0191-0000002070591C61\n000000067F000040020000A0000000050000-000000067F000040020000A0000000054000__00000038E67ABFA0\n000000067F000040020000A0000000050000-000000067F000040020000A0000000054000__0000003903F1CFE8\n000000067F000040020000A0000000050000-000000067F000040020000A0000000054000__0000003B99F7F8A0\n000000067F000040020000A0000000050000-000000067F000040020000A0000000054000__0000005D2FFFFB38\n000000067F000040020000A0000000054000-000000067F000040020000A0000000058000__00000038E67ABFA0\n000000067F000040020000A0000000054000-000000067F000040020000A0000000058000__0000003903F1CFE8\n000000067F000040020000A0000000054000-000000067F000040020000A0000000058000__0000003B99F7F8A0\n000000067F000040020000A0000000054000-000000067F000040020000A0000000058000__0000005D2FFFFB38\n000000067F000040020000A0000000056278-000000067F000040020000A000000005EC6B__0000001C725D0191-0000002070591C61\n000000067F000040020000A0000000058000-000000067F000040020000A000000005C000__00000038E67ABFA0\n000000067F000040020000A0000000058000-000000067F000040020000A000000005C000__0000003903F1CFE8\n000000067F000040020000A0000000058000-000000067F000040020000A000000005C000__0000003B99F7F8A0\n000000067F000040020000A0000000058000-000000067F000040020000A000000005C000__0000005D2FFFFB38\n000000067F000040020000A000000005C000-000000067F000040020000A0000000060000__00000038E67ABFA0\n000000067F000040020000A000000005C000-000000067F000040020000A0000000060000__0000003903F1CFE8\n000000067F000040020000A000000005C000-000000067F000040020000A0000000060000__0000003B99F7F8A0\n000000067F000040020000A000000005C000-000000067F000040020000A0000000060000__0000005D2FFFFB38\n000000067F000040020000A000000005EC6B-000000067F000040020000A0000000067651__0000001C725D0191-0000002070591C61\n000000067F000040020000A0000000060000-000000067F000040020000A0000000064000__00000038E67ABFA0\n000000067F000040020000A0000000060000-000000067F000040020000A0000000064000__0000003903F1CFE8\n000000067F000040020000A0000000060000-000000067F000040020000A0000000064000__0000003B99F7F8A0\n000000067F000040020000A0000000060000-000000067F000040020000A0000000064000__0000005D2FFFFB38\n000000067F000040020000A0000000064000-000000067F000040020000A0000000068000__00000038E67ABFA0\n000000067F000040020000A0000000064000-000000067F000040020000A0000000068000__0000003903F1CFE8\n000000067F000040020000A0000000064000-000000067F000040020000A0000000068000__0000003B99F7F8A0\n000000067F000040020000A0000000064000-000000067F000040020000A0000000068000__0000005D2FFFFB38\n000000067F000040020000A0000000067651-000000067F000040020000A000000007002B__0000001C725D0191-0000002070591C61\n000000067F000040020000A0000000068000-000000067F000040020000A000000006C000__00000038E67ABFA0\n000000067F000040020000A0000000068000-000000067F000040020000A000000006C000__0000003903F1CFE8\n000000067F000040020000A0000000068000-000000067F000040020000A000000006C000__0000003B99F7F8A0\n000000067F000040020000A0000000068000-000000067F000040020000A000000006C000__0000005D2FFFFB38\n000000067F000040020000A000000006C000-000000067F000040020000A0000000070000__00000038E67ABFA0\n000000067F000040020000A000000006C000-000000067F000040020000A0000000070000__0000003903F1CFE8\n000000067F000040020000A000000006C000-000000067F000040020000A0000000070000__0000003B99F7F8A0\n000000067F000040020000A000000006C000-000000067F000040020000A0000000070000__0000005D2FFFFB38\n000000067F000040020000A0000000070000-000000067F000040020000A0000000074000__00000038E67ABFA0\n000000067F000040020000A0000000070000-000000067F000040020000A0000000074000__0000003903F1CFE8\n000000067F000040020000A0000000070000-000000067F000040020000A0000000074000__0000003B99F7F8A0\n000000067F000040020000A0000000070000-000000067F000040020000A0000000074000__0000005D2FFFFB38\n000000067F000040020000A000000007002B-000000067F000040020000A00000000789E3__0000001C725D0191-0000002070591C61\n000000067F000040020000A0000000074000-000000067F000040020000A0000000078000__00000038E67ABFA0\n000000067F000040020000A0000000074000-000000067F000040020000A0000000078000__0000003903F1CFE8\n000000067F000040020000A0000000074000-000000067F000040020000A0000000078000__0000003B99F7F8A0\n000000067F000040020000A0000000074000-000000067F000040020000A0000000078000__0000005D2FFFFB38\n000000067F000040020000A0000000078000-000000067F000040020000A000000007C000__00000038E67ABFA0\n000000067F000040020000A0000000078000-000000067F000040020000A000000007C000__0000003903F1CFE8\n000000067F000040020000A0000000078000-000000067F000040020000A000000007C000__0000003B99F7F8A0\n000000067F000040020000A0000000078000-000000067F000040020000A000000007C000__0000005D2FFFFB38\n000000067F000040020000A00000000789E3-000000067F000040020000A00000000813A7__0000001C725D0191-0000002070591C61\n000000067F000040020000A000000007C000-000000067F000040020000A0000000080000__00000038E67ABFA0\n000000067F000040020000A000000007C000-000000067F000040020000A0000000080000__0000003903F1CFE8\n000000067F000040020000A000000007C000-000000067F000040020000A0000000080000__0000003B99F7F8A0\n000000067F000040020000A000000007C000-000000067F000040020000A0000000080000__0000005D2FFFFB38\n000000067F000040020000A0000000080000-000000067F000040020000A0000000084000__00000038E67ABFA0\n000000067F000040020000A0000000080000-000000067F000040020000A0000000084000__0000003903F1CFE8\n000000067F000040020000A0000000080000-000000067F000040020000A0000000084000__0000003B99F7F8A0\n000000067F000040020000A0000000080000-000000067F000040020000A0000000084000__0000005D2FFFFB38\n000000067F000040020000A00000000813A7-000000067F000040020000A0000000089D92__0000001C725D0191-0000002070591C61\n000000067F000040020000A0000000084000-000000067F000040020000A0000000088000__00000038E67ABFA0\n000000067F000040020000A0000000084000-000000067F000040020000A0000000088000__0000003903F1CFE8\n000000067F000040020000A0000000084000-000000067F000040020000A0000000088000__0000003B99F7F8A0\n000000067F000040020000A0000000084000-000000067F000040020000A0000000088000__0000005D2FFFFB38\n000000067F000040020000A0000000088000-000000067F000040020000A000000008C000__00000038E67ABFA0\n000000067F000040020000A0000000088000-000000067F000040020000A000000008C000__0000003903F1CFE8\n000000067F000040020000A0000000088000-000000067F000040020000A000000008C000__0000003B99F7F8A0\n000000067F000040020000A0000000088000-000000067F000040020000A000000008C000__0000005D2FFFFB38\n000000067F000040020000A0000000089D92-000000067F000040020000A000000009278A__0000001C725D0191-0000002070591C61\n000000067F000040020000A000000008C000-000000067F000040020000A0000000090000__00000038E67ABFA0\n000000067F000040020000A000000008C000-000000067F000040020000A0000000090000__0000003903F1CFE8\n000000067F000040020000A000000008C000-000000067F000040020000A0000000090000__0000003B99F7F8A0\n000000067F000040020000A000000008C000-000000067F000040020000A0000000090000__0000005D2FFFFB38\n000000067F000040020000A0000000090000-000000067F000040020000A0000000094000__00000038E67ABFA0\n000000067F000040020000A0000000090000-000000067F000040020000A0000000094000__0000003903F1CFE8\n000000067F000040020000A0000000090000-000000067F000040020000A0000000094000__0000003B99F7F8A0\n000000067F000040020000A0000000090000-000000067F000040020000A0000000094000__0000005D2FFFFB38\n000000067F000040020000A000000009278A-000000067F000040020000A000000009B17C__0000001C725D0191-0000002070591C61\n000000067F000040020000A0000000094000-000000067F000040020000A0000000098000__00000038E67ABFA0\n000000067F000040020000A0000000094000-000000067F000040020000A0000000098000__0000003903F1CFE8\n000000067F000040020000A0000000094000-000000067F000040020000A0000000098000__0000003B99F7F8A0\n000000067F000040020000A0000000094000-000000067F000040020000A0000000098000__0000005D2FFFFB38\n000000067F000040020000A0000000098000-000000067F000040020000A000000009C000__00000038E67ABFA0\n000000067F000040020000A0000000098000-000000067F000040020000A000000009C000__0000003903F1CFE8\n000000067F000040020000A0000000098000-000000067F000040020000A000000009C000__0000003B99F7F8A0\n000000067F000040020000A0000000098000-000000067F000040020000A000000009C000__0000005D2FFFFB38\n000000067F000040020000A000000009B17C-000000067F000040020000A00000000A3B54__0000001C725D0191-0000002070591C61\n000000067F000040020000A000000009C000-000000067F000040020000A00000000A0000__00000038E67ABFA0\n000000067F000040020000A000000009C000-000000067F000040020000A00000000A0000__0000003903F1CFE8\n000000067F000040020000A000000009C000-000000067F000040020000A00000000A0000__0000003B99F7F8A0\n000000067F000040020000A000000009C000-000000067F000040020000A00000000A0000__0000005D2FFFFB38\n000000067F000040020000A00000000A0000-000000067F000040020000A00000000A4000__00000038E67ABFA0\n000000067F000040020000A00000000A0000-000000067F000040020000A00000000A4000__0000003903F1CFE8\n000000067F000040020000A00000000A0000-000000067F000040020000A00000000A4000__0000003B99F7F8A0\n000000067F000040020000A00000000A0000-000000067F000040020000A00000000A4000__0000005D2FFFFB38\n000000067F000040020000A00000000A3B54-000000067F000040020000A00000000AC52A__0000001C725D0191-0000002070591C61\n000000067F000040020000A00000000A4000-000000067F000040020000A00000000A8000__00000038E67ABFA0\n000000067F000040020000A00000000A4000-000000067F000040020000A00000000A8000__0000003903F1CFE8\n000000067F000040020000A00000000A4000-000000067F000040020000A00000000A8000__0000003B99F7F8A0\n000000067F000040020000A00000000A4000-000000067F000040020000A00000000A8000__0000005D2FFFFB38\n000000067F000040020000A00000000A8000-000000067F000040020000A00000000AC000__00000038E67ABFA0\n000000067F000040020000A00000000A8000-000000067F000040020000A00000000AC000__0000003903F1CFE8\n000000067F000040020000A00000000A8000-000000067F000040020000A00000000AC000__0000003B99F7F8A0\n000000067F000040020000A00000000A8000-000000067F000040020000A00000000AC000__0000005D2FFFFB38\n000000067F000040020000A00000000AC000-000000067F000040020000A00000000B0000__00000038E67ABFA0\n000000067F000040020000A00000000AC000-000000067F000040020000A00000000B0000__0000003903F1CFE8\n000000067F000040020000A00000000AC000-000000067F000040020000A00000000B0000__0000003B99F7F8A0\n000000067F000040020000A00000000AC000-000000067F000040020000A00000000B0000__0000005D2FFFFB38\n000000067F000040020000A00000000AC52A-000000067F000040020000A00000000B4ED6__0000001C725D0191-0000002070591C61\n000000067F000040020000A00000000B0000-000000067F000040020000A00000000B4000__00000038E67ABFA0\n000000067F000040020000A00000000B0000-000000067F000040020000A00000000B4000__0000003903F1CFE8\n000000067F000040020000A00000000B0000-000000067F000040020000A00000000B4000__0000003B99F7F8A0\n000000067F000040020000A00000000B0000-000000067F000040020000A00000000B4000__0000005D2FFFFB38\n000000067F000040020000A00000000B4000-000000067F000040020000A00000000B8000__00000038E67ABFA0\n000000067F000040020000A00000000B4000-000000067F000040020000A00000000B8000__0000003903F1CFE8\n000000067F000040020000A00000000B4000-000000067F000040020000A00000000B8000__0000003B99F7F8A0\n000000067F000040020000A00000000B4000-000000067F000040020000A00000000B8000__0000005D2FFFFB38\n000000067F000040020000A00000000B4ED6-000000067F000040020000A00000000BD8A4__0000001C725D0191-0000002070591C61\n000000067F000040020000A00000000B8000-000000067F000040020000A00000000BC000__00000038E67ABFA0\n000000067F000040020000A00000000B8000-000000067F000040020000A00000000BC000__0000003903F1CFE8\n000000067F000040020000A00000000B8000-000000067F000040020000A00000000BC000__0000003B99F7F8A0\n000000067F000040020000A00000000B8000-000000067F000040020000A00000000BC000__0000005D2FFFFB38\n000000067F000040020000A00000000BC000-000000067F000040020000A00000000C0000__00000038E67ABFA0\n000000067F000040020000A00000000BC000-000000067F000040020000A00000000C0000__0000003903F1CFE8\n000000067F000040020000A00000000BC000-000000067F000040020000A00000000C0000__0000003B99F7F8A0\n000000067F000040020000A00000000BC000-000000067F000040020000A00000000C0000__0000005D2FFFFB38\n000000067F000040020000A00000000BD8A4-000000067F000040020000A00000000C629B__0000001C725D0191-0000002070591C61\n000000067F000040020000A00000000C0000-000000067F000040020000A00000000C4000__00000038E67ABFA0\n000000067F000040020000A00000000C0000-000000067F000040020000A00000000C4000__0000003903F1CFE8\n000000067F000040020000A00000000C0000-000000067F000040020000A00000000C4000__0000003B99F7F8A0\n000000067F000040020000A00000000C0000-000000067F000040020000A00000000C4000__0000005D2FFFFB38\n000000067F000040020000A00000000C4000-000000067F000040020000A00000000C8000__00000038E67ABFA0\n000000067F000040020000A00000000C4000-000000067F000040020000A00000000C8000__0000003903F1CFE8\n000000067F000040020000A00000000C4000-000000067F000040020000A00000000C8000__0000003B99F7F8A0\n000000067F000040020000A00000000C4000-000000067F000040020000A00000000C8000__0000005D2FFFFB38\n000000067F000040020000A00000000C629B-000000067F000040020000A00000000CEC94__0000001C725D0191-0000002070591C61\n000000067F000040020000A00000000C8000-000000067F000040020000A00000000CC000__00000038E67ABFA0\n000000067F000040020000A00000000C8000-000000067F000040020000A00000000CC000__0000003903F1CFE8\n000000067F000040020000A00000000C8000-000000067F000040020000A00000000CC000__0000003B99F7F8A0\n000000067F000040020000A00000000C8000-000000067F000040020000A00000000CC000__0000005D2FFFFB38\n000000067F000040020000A00000000CC000-000000067F000040020000A00000000D0000__00000038E67ABFA0\n000000067F000040020000A00000000CC000-000000067F000040020000A00000000D0000__0000003903F1CFE8\n000000067F000040020000A00000000CC000-000000067F000040020000A00000000D0000__0000003B99F7F8A0\n000000067F000040020000A00000000CC000-000000067F000040020000A00000000D0000__0000005D2FFFFB38\n000000067F000040020000A00000000CEC94-000000067F000040020000A00000000D7688__0000001C725D0191-0000002070591C61\n000000067F000040020000A00000000D0000-000000067F000040020000A00000000D4000__00000038E67ABFA0\n000000067F000040020000A00000000D0000-000000067F000040020000A00000000D4000__0000003903F1CFE8\n000000067F000040020000A00000000D0000-000000067F000040020000A00000000D4000__0000003B99F7F8A0\n000000067F000040020000A00000000D0000-000000067F000040020000A00000000D4000__0000005D2FFFFB38\n000000067F000040020000A00000000D4000-000000067F000040020000A00000000D8000__00000038E67ABFA0\n000000067F000040020000A00000000D4000-000000067F000040020000A00000000D8000__0000003903F1CFE8\n000000067F000040020000A00000000D4000-000000067F000040020000A00000000D8000__0000003B99F7F8A0\n000000067F000040020000A00000000D4000-000000067F000040020000A00000000D8000__0000005D2FFFFB38\n000000067F000040020000A00000000D7688-000000067F000040020000A00000000E0068__0000001C725D0191-0000002070591C61\n000000067F000040020000A00000000D8000-000000067F000040020000A00000000DC000__00000038E67ABFA0\n000000067F000040020000A00000000D8000-000000067F000040020000A00000000DC000__0000003903F1CFE8\n000000067F000040020000A00000000D8000-000000067F000040020000A00000000DC000__0000003B99F7F8A0\n000000067F000040020000A00000000D8000-000000067F000040020000A00000000DC000__0000005D2FFFFB38\n000000067F000040020000A00000000DC000-000000067F000040020000A00000000E0000__00000038E67ABFA0\n000000067F000040020000A00000000DC000-000000067F000040020000A00000000E0000__0000003903F1CFE8\n000000067F000040020000A00000000DC000-000000067F000040020000A00000000E0000__0000003B99F7F8A0\n000000067F000040020000A00000000DC000-000000067F000040020000A00000000E0000__0000005D2FFFFB38\n000000067F000040020000A00000000E0000-000000067F000040020000A00000000E4000__00000038E67ABFA0\n000000067F000040020000A00000000E0000-000000067F000040020000A00000000E4000__0000003903F1CFE8\n000000067F000040020000A00000000E0000-000000067F000040020000A00000000E4000__0000003B99F7F8A0\n000000067F000040020000A00000000E0000-000000067F000040020000A00000000E4000__0000005D2FFFFB38\n000000067F000040020000A00000000E0068-000000067F000040020000A00000000E8A2D__0000001C725D0191-0000002070591C61\n000000067F000040020000A00000000E4000-000000067F000040020000A00000000E8000__00000038E67ABFA0\n000000067F000040020000A00000000E4000-000000067F000040020000A00000000E8000__0000003903F1CFE8\n000000067F000040020000A00000000E4000-000000067F000040020000A00000000E8000__0000003B99F7F8A0\n000000067F000040020000A00000000E4000-000000067F000040020000A00000000E8000__0000005D2FFFFB38\n000000067F000040020000A00000000E8000-000000067F000040020000A00000000EC000__00000038E67ABFA0\n000000067F000040020000A00000000E8000-000000067F000040020000A00000000EC000__0000003903F1CFE8\n000000067F000040020000A00000000E8000-000000067F000040020000A00000000EC000__0000003B99F7F8A0\n000000067F000040020000A00000000E8000-000000067F000040020000A00000000EC000__0000005D2FFFFB38\n000000067F000040020000A00000000E8A2D-000000067F000040020000A00000000F13E7__0000001C725D0191-0000002070591C61\n000000067F000040020000A00000000EC000-000000067F000040020000A00000000F0000__00000038E67ABFA0\n000000067F000040020000A00000000EC000-000000067F000040020000A00000000F0000__0000003903F1CFE8\n000000067F000040020000A00000000EC000-000000067F000040020000A00000000F0000__0000003B99F7F8A0\n000000067F000040020000A00000000EC000-000000067F000040020000A00000000F0000__0000005D2FFFFB38\n000000067F000040020000A00000000F0000-000000067F000040020000A00000000F4000__00000038E67ABFA0\n000000067F000040020000A00000000F0000-000000067F000040020000A00000000F4000__0000003903F1CFE8\n000000067F000040020000A00000000F0000-000000067F000040020000A00000000F4000__0000003B99F7F8A0\n000000067F000040020000A00000000F0000-000000067F000040020000A00000000F4000__0000005D2FFFFB38\n000000067F000040020000A00000000F13E7-000000067F000040020000A00000000F9DC4__0000001C725D0191-0000002070591C61\n000000067F000040020000A00000000F4000-000000067F000040020000A00000000F8000__00000038E67ABFA0\n000000067F000040020000A00000000F4000-000000067F000040020000A00000000F8000__0000003903F1CFE8\n000000067F000040020000A00000000F4000-000000067F000040020000A00000000F8000__0000003B99F7F8A0\n000000067F000040020000A00000000F4000-000000067F000040020000A00000000F8000__0000005D2FFFFB38\n000000067F000040020000A00000000F8000-000000067F000040020000A00000000FC000__00000038E67ABFA0\n000000067F000040020000A00000000F8000-000000067F000040020000A00000000FC000__0000003903F1CFE8\n000000067F000040020000A00000000F8000-000000067F000040020000A00000000FC000__0000003B99F7F8A0\n000000067F000040020000A00000000F8000-000000067F000040020000A00000000FC000__0000005D2FFFFB38\n000000067F000040020000A00000000F9DC4-000000067F000040020000A00000001027C3__0000001C725D0191-0000002070591C61\n000000067F000040020000A00000000FC000-000000067F000040020000A0000000100000__00000038E67ABFA0\n000000067F000040020000A00000000FC000-000000067F000040020000A0000000100000__0000003903F1CFE8\n000000067F000040020000A00000000FC000-000000067F000040020000A0000000100000__0000003B99F7F8A0\n000000067F000040020000A00000000FC000-000000067F000040020000A0000000100000__0000005D2FFFFB38\n000000067F000040020000A0000000100000-000000067F000040020000A0000000104000__00000038E67ABFA0\n000000067F000040020000A0000000100000-000000067F000040020000A0000000104000__0000003903F1CFE8\n000000067F000040020000A0000000100000-000000067F000040020000A0000000104000__0000003B99F7F8A0\n000000067F000040020000A0000000100000-000000067F000040020000A0000000104000__0000005D2FFFFB38\n000000067F000040020000A00000001027C3-000000067F000040020000A000000010B1C2__0000001C725D0191-0000002070591C61\n000000067F000040020000A0000000104000-000000067F000040020000A0000000108000__00000038E67ABFA0\n000000067F000040020000A0000000104000-000000067F000040020000A0000000108000__0000003903F1CFE8\n000000067F000040020000A0000000104000-000000067F000040020000A0000000108000__0000003B99F7F8A0\n000000067F000040020000A0000000104000-000000067F000040020000A0000000108000__0000005D2FFFFB38\n000000067F000040020000A0000000108000-000000067F000040020000A000000010C000__00000038E67ABFA0\n000000067F000040020000A0000000108000-000000067F000040020000A000000010C000__0000003903F1CFE8\n000000067F000040020000A0000000108000-000000067F000040020000A000000010C000__0000003B99F7F8A0\n000000067F000040020000A0000000108000-000000067F000040020000A000000010C000__0000005D2FFFFB38\n000000067F000040020000A000000010B1C2-000000067F000040020000A0000000113BB3__0000001C725D0191-0000002070591C61\n000000067F000040020000A000000010C000-000000067F000040020000A0000000110000__00000038E67ABFA0\n000000067F000040020000A000000010C000-000000067F000040020000A0000000110000__0000003903F1CFE8\n000000067F000040020000A000000010C000-000000067F000040020000A0000000110000__0000003B99F7F8A0\n000000067F000040020000A000000010C000-000000067F000040020000A0000000110000__0000005D2FFFFB38\n000000067F000040020000A0000000110000-000000067F000040020000A0000000114000__00000038E67ABFA0\n000000067F000040020000A0000000110000-000000067F000040020000A0000000114000__0000003903F1CFE8\n000000067F000040020000A0000000110000-000000067F000040020000A0000000114000__0000003B99F7F8A0\n000000067F000040020000A0000000110000-000000067F000040020000A0000000114000__0000005D2FFFFB38\n000000067F000040020000A0000000113BB3-000000067F000040020000A000000011C591__0000001C725D0191-0000002070591C61\n000000067F000040020000A0000000114000-000000067F000040020000A0000000118000__00000038E67ABFA0\n000000067F000040020000A0000000114000-000000067F000040020000A0000000118000__0000003903F1CFE8\n000000067F000040020000A0000000114000-000000067F000040020000A0000000118000__0000003B99F7F8A0\n000000067F000040020000A0000000114000-000000067F000040020000A0000000118000__0000005D2FFFFB38\n000000067F000040020000A0000000118000-000000067F000040020000A000000011C000__00000038E67ABFA0\n000000067F000040020000A0000000118000-000000067F000040020000A000000011C000__0000003903F1CFE8\n000000067F000040020000A0000000118000-000000067F000040020000A000000011C000__0000003B99F7F8A0\n000000067F000040020000A0000000118000-000000067F000040020000A000000011C000__0000005D2FFFFB38\n000000067F000040020000A000000011C000-000000067F000040020000A0000000120000__00000038E67ABFA0\n000000067F000040020000A000000011C000-000000067F000040020000A0000000120000__0000003903F1CFE8\n000000067F000040020000A000000011C000-000000067F000040020000A0000000120000__0000003B99F7F8A0\n000000067F000040020000A000000011C000-000000067F000040020000A0000000120000__0000005D2FFFFB38\n000000067F000040020000A000000011C591-000000067F000040020000A0000000124F48__0000001C725D0191-0000002070591C61\n000000067F000040020000A0000000120000-000000067F000040020000A0000000124000__00000038E67ABFA0\n000000067F000040020000A0000000120000-000000067F000040020000A0000000124000__0000003903F1CFE8\n000000067F000040020000A0000000120000-000000067F000040020000A0000000124000__0000003B99F7F8A0\n000000067F000040020000A0000000120000-000000067F000040020000A0000000124000__0000005D2FFFFB38\n000000067F000040020000A0000000124000-000000067F000040020000A0000000128000__00000038E67ABFA0\n000000067F000040020000A0000000124000-000000067F000040020000A0000000128000__0000003903F1CFE8\n000000067F000040020000A0000000124000-000000067F000040020000A0000000128000__0000003B99F7F8A0\n000000067F000040020000A0000000124000-000000067F000040020000A0000000128000__0000005D2FFFFB38\n000000067F000040020000A0000000124F48-000000067F000040020000A000000012D900__0000001C725D0191-0000002070591C61\n000000067F000040020000A0000000128000-000000067F000040020000A000000012C000__00000038E67ABFA0\n000000067F000040020000A0000000128000-000000067F000040020000A000000012C000__0000003903F1CFE8\n000000067F000040020000A0000000128000-000000067F000040020000A000000012C000__0000003B99F7F8A0\n000000067F000040020000A0000000128000-000000067F000040020000A000000012C000__0000005D2FFFFB38\n000000067F000040020000A000000012C000-000000067F000040020000A0000000130000__00000038E67ABFA0\n000000067F000040020000A000000012C000-000000067F000040020000A0000000130000__0000003903F1CFE8\n000000067F000040020000A000000012C000-000000067F000040020000A0000000130000__0000003B99F7F8A0\n000000067F000040020000A000000012C000-000000067F000040020000A0000000130000__0000005D2FFFFB38\n000000067F000040020000A000000012D900-000000067F000040020000A00000001362D3__0000001C725D0191-0000002070591C61\n000000067F000040020000A0000000130000-000000067F000040020000A0000000134000__00000038E67ABFA0\n000000067F000040020000A0000000130000-000000067F000040020000A0000000134000__0000003903F1CFE8\n000000067F000040020000A0000000130000-000000067F000040020000A0000000134000__0000003B99F7F8A0\n000000067F000040020000A0000000130000-000000067F000040020000A0000000134000__0000005D2FFFFB38\n000000067F000040020000A0000000134000-000000067F000040020000A0000000138000__00000038E67ABFA0\n000000067F000040020000A0000000134000-000000067F000040020000A0000000138000__0000003903F1CFE8\n000000067F000040020000A0000000134000-000000067F000040020000A0000000138000__0000003B99F7F8A0\n000000067F000040020000A0000000134000-000000067F000040020000A0000000138000__0000005D2FFFFB38\n000000067F000040020000A00000001362D3-000000067F000040020000A000000013ECD2__0000001C725D0191-0000002070591C61\n000000067F000040020000A0000000138000-000000067F000040020000A000000013C000__00000038E67ABFA0\n000000067F000040020000A0000000138000-000000067F000040020000A000000013C000__0000003903F1CFE8\n000000067F000040020000A0000000138000-000000067F000040020000A000000013C000__0000003B99F7F8A0\n000000067F000040020000A0000000138000-000000067F000040020000A000000013C000__0000005D2FFFFB38\n000000067F000040020000A000000013C000-000000067F000040020000A0000000140000__00000038E67ABFA0\n000000067F000040020000A000000013C000-000000067F000040020000A0000000140000__0000003903F1CFE8\n000000067F000040020000A000000013C000-000000067F000040020000A0000000140000__0000003B99F7F8A0\n000000067F000040020000A000000013C000-000000067F000040020000A0000000140000__0000005D2FFFFB38\n000000067F000040020000A000000013ECD2-000000067F000040020000A00000001476C8__0000001C725D0191-0000002070591C61\n000000067F000040020000A0000000140000-000000067F000040020000A0000000144000__00000038E67ABFA0\n000000067F000040020000A0000000140000-000000067F000040020000A0000000144000__0000003903F1CFE8\n000000067F000040020000A0000000140000-000000067F000040020000A0000000144000__0000003B99F7F8A0\n000000067F000040020000A0000000140000-000000067F000040020000A0000000144000__0000005D2FFFFB38\n000000067F000040020000A0000000144000-000000067F000040020000A0000000148000__00000038E67ABFA0\n000000067F000040020000A0000000144000-000000067F000040020000A0000000148000__0000003903F1CFE8\n000000067F000040020000A0000000144000-000000067F000040020000A0000000148000__0000003B99F7F8A0\n000000067F000040020000A0000000144000-000000067F000040020000A0000000148000__0000005D2FFFFB38\n000000067F000040020000A00000001476C8-000000067F000040020000A00000001500B9__0000001C725D0191-0000002070591C61\n000000067F000040020000A0000000148000-000000067F000040020000A000000014C000__00000038E67ABFA0\n000000067F000040020000A0000000148000-000000067F000040020000A000000014C000__0000003903F1CFE8\n000000067F000040020000A0000000148000-000000067F000040020000A000000014C000__0000003B99F7F8A0\n000000067F000040020000A0000000148000-000000067F000040020000A000000014C000__0000005D2FFFFB38\n000000067F000040020000A000000014C000-000000067F000040020000A0000000150000__00000038E67ABFA0\n000000067F000040020000A000000014C000-000000067F000040020000A0000000150000__0000003903F1CFE8\n000000067F000040020000A000000014C000-000000067F000040020000A0000000150000__0000003B99F7F8A0\n000000067F000040020000A000000014C000-000000067F000040020000A0000000150000__0000005D2FFFFB38\n000000067F000040020000A0000000150000-000000067F000040020000A0000000154000__00000038E67ABFA0\n000000067F000040020000A0000000150000-000000067F000040020000A0000000154000__0000003903F1CFE8\n000000067F000040020000A0000000150000-000000067F000040020000A0000000154000__0000003B99F7F8A0\n000000067F000040020000A0000000150000-000000067F000040020000A0000000154000__0000005D2FFFFB38\n000000067F000040020000A00000001500B9-000000067F000040020000A0000000158A91__0000001C725D0191-0000002070591C61\n000000067F000040020000A0000000154000-000000067F000040020000A0000000158000__00000038E67ABFA0\n000000067F000040020000A0000000154000-000000067F000040020000A0000000158000__0000003903F1CFE8\n000000067F000040020000A0000000154000-000000067F000040020000A0000000158000__0000003B99F7F8A0\n000000067F000040020000A0000000154000-000000067F000040020000A0000000158000__0000005D2FFFFB38\n000000067F000040020000A0000000158000-000000067F000040020000A000000015C000__00000038E67ABFA0\n000000067F000040020000A0000000158000-000000067F000040020000A000000015C000__0000003903F1CFE8\n000000067F000040020000A0000000158000-000000067F000040020000A000000015C000__0000003B99F7F8A0\n000000067F000040020000A0000000158000-000000067F000040020000A000000015C000__0000005D2FFFFB38\n000000067F000040020000A0000000158A91-000000067F000040020000A0000000161450__0000001C725D0191-0000002070591C61\n000000067F000040020000A000000015C000-000000067F000040020000A0000000160000__00000038E67ABFA0\n000000067F000040020000A000000015C000-000000067F000040020000A0000000160000__0000003903F1CFE8\n000000067F000040020000A000000015C000-000000067F000040020000A0000000160000__0000003B99F7F8A0\n000000067F000040020000A000000015C000-000000067F000040020000A0000000160000__0000005D2FFFFB38\n000000067F000040020000A0000000160000-000000067F000040020000A0000000164000__00000038E67ABFA0\n000000067F000040020000A0000000160000-000000067F000040020000A0000000164000__0000003903F1CFE8\n000000067F000040020000A0000000160000-000000067F000040020000A0000000164000__0000003B99F7F8A0\n000000067F000040020000A0000000160000-000000067F000040020000A0000000164000__0000005D2FFFFB38\n000000067F000040020000A0000000161450-000000067F000040020000A0000000169E01__0000001C725D0191-0000002070591C61\n000000067F000040020000A0000000164000-000000067F000040020000A0000000168000__00000038E67ABFA0\n000000067F000040020000A0000000164000-000000067F000040020000A0000000168000__0000003903F1CFE8\n000000067F000040020000A0000000164000-000000067F000040020000A0000000168000__0000003B99F7F8A0\n000000067F000040020000A0000000164000-000000067F000040020000A0000000168000__0000005D2FFFFB38\n000000067F000040020000A0000000168000-000000067F000040020000A000000016C000__00000038E67ABFA0\n000000067F000040020000A0000000168000-000000067F000040020000A000000016C000__0000003903F1CFE8\n000000067F000040020000A0000000168000-000000067F000040020000A000000016C000__0000003B99F7F8A0\n000000067F000040020000A0000000168000-000000067F000040020000A000000016C000__0000005D2FFFFB38\n000000067F000040020000A0000000169E01-000000067F000040020000A00000001727DF__0000001C725D0191-0000002070591C61\n000000067F000040020000A000000016C000-000000067F000040020000A0000000170000__00000038E67ABFA0\n000000067F000040020000A000000016C000-000000067F000040020000A0000000170000__0000003903F1CFE8\n000000067F000040020000A000000016C000-000000067F000040020000A0000000170000__0000003B99F7F8A0\n000000067F000040020000A000000016C000-000000067F000040020000A0000000170000__0000005D2FFFFB38\n000000067F000040020000A0000000170000-000000067F000040020000A0000000174000__00000038E67ABFA0\n000000067F000040020000A0000000170000-000000067F000040020000A0000000174000__0000003903F1CFE8\n000000067F000040020000A0000000170000-000000067F000040020000A0000000174000__0000003B99F7F8A0\n000000067F000040020000A0000000170000-000000067F000040020000A0000000174000__0000005D2FFFFB38\n000000067F000040020000A00000001727DF-000000067F000040020000A000000017B1E4__0000001C725D0191-0000002070591C61\n000000067F000040020000A0000000174000-000000067F000040020000A0000000178000__00000038E67ABFA0\n000000067F000040020000A0000000174000-000000067F000040020000A0000000178000__0000003903F1CFE8\n000000067F000040020000A0000000174000-000000067F000040020000A0000000178000__0000003B99F7F8A0\n000000067F000040020000A0000000174000-000000067F000040020000A0000000178000__0000005D2FFFFB38\n000000067F000040020000A0000000178000-000000067F000040020000A000000017C000__00000038E67ABFA0\n000000067F000040020000A0000000178000-000000067F000040020000A000000017C000__0000003903F1CFE8\n000000067F000040020000A0000000178000-000000067F000040020000A000000017C000__0000003B99F7F8A0\n000000067F000040020000A0000000178000-000000067F000040020000A000000017C000__0000005D2FFFFB38\n000000067F000040020000A000000017B1E4-000000067F000040020000A0000000183BE2__0000001C725D0191-0000002070591C61\n000000067F000040020000A000000017C000-000000067F000040020000A0000000180000__00000038E67ABFA0\n000000067F000040020000A000000017C000-000000067F000040020000A0000000180000__0000003903F1CFE8\n000000067F000040020000A000000017C000-000000067F000040020000A0000000180000__0000003B99F7F8A0\n000000067F000040020000A000000017C000-000000067F000040020000A0000000180000__0000005D2FFFFB38\n000000067F000040020000A0000000180000-000000067F000040020000A0000000184000__00000038E67ABFA0\n000000067F000040020000A0000000180000-000000067F000040020000A0000000184000__0000003903F1CFE8\n000000067F000040020000A0000000180000-000000067F000040020000A0000000184000__0000003B99F7F8A0\n000000067F000040020000A0000000180000-000000067F000040020000A0000000184000__0000005D2FFFFB38\n000000067F000040020000A0000000183BE2-000000067F000040020000A000000018C5D6__0000001C725D0191-0000002070591C61\n000000067F000040020000A0000000184000-000000067F000040020000A0000000188000__00000038E67ABFA0\n000000067F000040020000A0000000184000-000000067F000040020000A0000000188000__0000003903F1CFE8\n000000067F000040020000A0000000184000-000000067F000040020000A0000000188000__0000003B99F7F8A0\n000000067F000040020000A0000000184000-000000067F000040020000A0000000188000__0000005D2FFFFB38\n000000067F000040020000A0000000188000-000000067F000040020000A000000018C000__00000038E67ABFA0\n000000067F000040020000A0000000188000-000000067F000040020000A000000018C000__0000003903F1CFE8\n000000067F000040020000A0000000188000-000000067F000040020000A000000018C000__0000003B99F7F8A0\n000000067F000040020000A0000000188000-000000067F000040020000A000000018C000__0000005D2FFFFB38\n000000067F000040020000A000000018C000-000000067F000040020000A0000000190000__00000038E67ABFA0\n000000067F000040020000A000000018C000-000000067F000040020000A0000000190000__0000003903F1CFE8\n000000067F000040020000A000000018C000-000000067F000040020000A0000000190000__0000003B99F7F8A0\n000000067F000040020000A000000018C000-000000067F000040020000A0000000190000__0000005D2FFFFB38\n000000067F000040020000A000000018C5D6-000000067F000040020000A0000000194FB6__0000001C725D0191-0000002070591C61\n000000067F000040020000A0000000190000-000000067F000040020000A0000000194000__00000038E67ABFA0\n000000067F000040020000A0000000190000-000000067F000040020000A0000000194000__0000003903F1CFE8\n000000067F000040020000A0000000190000-000000067F000040020000A0000000194000__0000003B99F7F8A0\n000000067F000040020000A0000000190000-000000067F000040020000A0000000194000__0000005D2FFFFB38\n000000067F000040020000A0000000194000-000000067F000040020000A0000000198000__00000038E67ABFA0\n000000067F000040020000A0000000194000-000000067F000040020000A0000000198000__0000003903F1CFE8\n000000067F000040020000A0000000194000-000000067F000040020000A0000000198000__0000003B99F7F8A0\n000000067F000040020000A0000000194000-000000067F000040020000A0000000198000__0000005D2FFFFB38\n000000067F000040020000A0000000194FB6-000000067F000040020000A000000019D971__0000001C725D0191-0000002070591C61\n000000067F000040020000A0000000198000-000000067F000040020000A000000019C000__00000038E67ABFA0\n000000067F000040020000A0000000198000-000000067F000040020000A000000019C000__0000003903F1CFE8\n000000067F000040020000A0000000198000-000000067F000040020000A000000019C000__0000003B99F7F8A0\n000000067F000040020000A0000000198000-000000067F000040020000A000000019C000__0000005D2FFFFB38\n000000067F000040020000A000000019C000-000000067F000040020000A00000001A0000__00000038E67ABFA0\n000000067F000040020000A000000019C000-000000067F000040020000A00000001A0000__0000003903F1CFE8\n000000067F000040020000A000000019C000-000000067F000040020000A00000001A0000__0000003B99F7F8A0\n000000067F000040020000A000000019C000-000000067F000040020000A00000001A0000__0000005D2FFFFB38\n000000067F000040020000A000000019D971-000000067F000040020000A00000001A6321__0000001C725D0191-0000002070591C61\n000000067F000040020000A00000001A0000-000000067F000040020000A00000001A4000__00000038E67ABFA0\n000000067F000040020000A00000001A0000-000000067F000040020000A00000001A4000__0000003903F1CFE8\n000000067F000040020000A00000001A0000-000000067F000040020000A00000001A4000__0000003B99F7F8A0\n000000067F000040020000A00000001A0000-000000067F000040020000A00000001A4000__0000005D2FFFFB38\n000000067F000040020000A00000001A4000-000000067F000040020000A00000001A8000__00000038E67ABFA0\n000000067F000040020000A00000001A4000-000000067F000040020000A00000001A8000__0000003903F1CFE8\n000000067F000040020000A00000001A4000-000000067F000040020000A00000001A8000__0000003B99F7F8A0\n000000067F000040020000A00000001A4000-000000067F000040020000A00000001A8000__0000005D2FFFFB38\n000000067F000040020000A00000001A6321-000000067F000040020000A00000001AECFE__0000001C725D0191-0000002070591C61\n000000067F000040020000A00000001A8000-000000067F000040020000A00000001AC000__00000038E67ABFA0\n000000067F000040020000A00000001A8000-000000067F000040020000A00000001AC000__0000003903F1CFE8\n000000067F000040020000A00000001A8000-000000067F000040020000A00000001AC000__0000003B99F7F8A0\n000000067F000040020000A00000001A8000-000000067F000040020000A00000001AC000__0000005D2FFFFB38\n000000067F000040020000A00000001AC000-000000067F000040020000A00000001B0000__00000038E67ABFA0\n000000067F000040020000A00000001AC000-000000067F000040020000A00000001B0000__0000003903F1CFE8\n000000067F000040020000A00000001AC000-000000067F000040020000A00000001B0000__0000003B99F7F8A0\n000000067F000040020000A00000001AC000-000000067F000040020000A00000001B0000__0000005D2FFFFB38\n000000067F000040020000A00000001AECFE-000000067F000040020000A00000001B76FB__0000001C725D0191-0000002070591C61\n000000067F000040020000A00000001B0000-000000067F000040020000A00000001B4000__00000038E67ABFA0\n000000067F000040020000A00000001B0000-000000067F000040020000A00000001B4000__0000003903F1CFE8\n000000067F000040020000A00000001B0000-000000067F000040020000A00000001B4000__0000003B99F7F8A0\n000000067F000040020000A00000001B0000-000000067F000040020000A00000001B4000__0000005D2FFFFB38\n000000067F000040020000A00000001B4000-000000067F000040020000A00000001B8000__00000038E67ABFA0\n000000067F000040020000A00000001B4000-000000067F000040020000A00000001B8000__0000003903F1CFE8\n000000067F000040020000A00000001B4000-000000067F000040020000A00000001B8000__0000003B99F7F8A0\n000000067F000040020000A00000001B4000-000000067F000040020000A00000001B8000__0000005D2FFFFB38\n000000067F000040020000A00000001B76FB-000000067F000040020000A00000001C00F5__0000001C725D0191-0000002070591C61\n000000067F000040020000A00000001B8000-000000067F000040020000A00000001BC000__00000038E67ABFA0\n000000067F000040020000A00000001B8000-000000067F000040020000A00000001BC000__0000003903F1CFE8\n000000067F000040020000A00000001B8000-000000067F000040020000A00000001BC000__0000003B99F7F8A0\n000000067F000040020000A00000001B8000-000000067F000040020000A00000001BC000__0000005D2FFFFB38\n000000067F000040020000A00000001BC000-000000067F000040020000A00000001C0000__00000038E67ABFA0\n000000067F000040020000A00000001BC000-000000067F000040020000A00000001C0000__0000003903F1CFE8\n000000067F000040020000A00000001BC000-000000067F000040020000A00000001C0000__0000003B99F7F8A0\n000000067F000040020000A00000001BC000-000000067F000040020000A00000001C0000__0000005D2FFFFB38\n000000067F000040020000A00000001C0000-000000067F000040020000A00000001C4000__00000038E67ABFA0\n000000067F000040020000A00000001C0000-000000067F000040020000A00000001C4000__0000003903F1CFE8\n000000067F000040020000A00000001C0000-000000067F000040020000A00000001C4000__0000003B99F7F8A0\n000000067F000040020000A00000001C0000-000000067F000040020000A00000001C4000__0000005D2FFFFB38\n000000067F000040020000A00000001C00F5-000000067F000040020000A00000001C8AE1__0000001C725D0191-0000002070591C61\n000000067F000040020000A00000001C4000-000000067F000040020000A00000001C8000__00000038E67ABFA0\n000000067F000040020000A00000001C4000-000000067F000040020000A00000001C8000__0000003903F1CFE8\n000000067F000040020000A00000001C4000-000000067F000040020000A00000001C8000__0000003B99F7F8A0\n000000067F000040020000A00000001C4000-000000067F000040020000A00000001C8000__0000005D2FFFFB38\n000000067F000040020000A00000001C8000-000000067F000040020000A00000001CC000__00000038E67ABFA0\n000000067F000040020000A00000001C8000-000000067F000040020000A00000001CC000__0000003903F1CFE8\n000000067F000040020000A00000001C8000-000000067F000040020000A00000001CC000__0000003B99F7F8A0\n000000067F000040020000A00000001C8000-000000067F000040020000A00000001CC000__0000005D2FFFFB38\n000000067F000040020000A00000001C8AE1-000000067F000040020000A00000001D14C2__0000001C725D0191-0000002070591C61\n000000067F000040020000A00000001CC000-000000067F000040020000A00000001D0000__00000038E67ABFA0\n000000067F000040020000A00000001CC000-000000067F000040020000A00000001D0000__0000003903F1CFE8\n000000067F000040020000A00000001CC000-000000067F000040020000A00000001D0000__0000003B99F7F8A0\n000000067F000040020000A00000001CC000-000000067F000040020000A00000001D0000__0000005D2FFFFB38\n000000067F000040020000A00000001D0000-000000067F000040020000A00000001D4000__00000038E67ABFA0\n000000067F000040020000A00000001D0000-000000067F000040020000A00000001D4000__0000003903F1CFE8\n000000067F000040020000A00000001D0000-000000067F000040020000A00000001D4000__0000003B99F7F8A0\n000000067F000040020000A00000001D0000-000000067F000040020000A00000001D4000__0000005D2FFFFB38\n000000067F000040020000A00000001D14C2-000000067F000040020000A00000001D9E7E__0000001C725D0191-0000002070591C61\n000000067F000040020000A00000001D4000-000000067F000040020000A00000001D8000__00000038E67ABFA0\n000000067F000040020000A00000001D4000-000000067F000040020000A00000001D8000__0000003903F1CFE8\n000000067F000040020000A00000001D4000-000000067F000040020000A00000001D8000__0000003B99F7F8A0\n000000067F000040020000A00000001D4000-000000067F000040020000A00000001D8000__0000005D2FFFFB38\n000000067F000040020000A00000001D8000-000000067F000040020000A00000001DC000__00000038E67ABFA0\n000000067F000040020000A00000001D8000-000000067F000040020000A00000001DC000__0000003903F1CFE8\n000000067F000040020000A00000001D8000-000000067F000040020000A00000001DC000__0000003B99F7F8A0\n000000067F000040020000A00000001D8000-000000067F000040020000A00000001DC000__0000005D2FFFFB38\n000000067F000040020000A00000001D9E7E-000000067F000040020000A00000001E282E__0000001C725D0191-0000002070591C61\n000000067F000040020000A00000001DC000-000000067F000040020000A00000001E0000__00000038E67ABFA0\n000000067F000040020000A00000001DC000-000000067F000040020000A00000001E0000__0000003903F1CFE8\n000000067F000040020000A00000001DC000-000000067F000040020000A00000001E0000__0000003B99F7F8A0\n000000067F000040020000A00000001DC000-000000067F000040020000A00000001E0000__0000005D2FFFFB38\n000000067F000040020000A00000001E0000-000000067F000040020000A00000001E4000__00000038E67ABFA0\n000000067F000040020000A00000001E0000-000000067F000040020000A00000001E4000__0000003903F1CFE8\n000000067F000040020000A00000001E0000-000000067F000040020000A00000001E4000__0000003B99F7F8A0\n000000067F000040020000A00000001E0000-000000067F000040020000A00000001E4000__0000005D2FFFFB38\n000000067F000040020000A00000001E282E-000000067F000040020000A00000001EB21C__0000001C725D0191-0000002070591C61\n000000067F000040020000A00000001E4000-000000067F000040020000A00000001E8000__00000038E67ABFA0\n000000067F000040020000A00000001E4000-000000067F000040020000A00000001E8000__0000003903F1CFE8\n000000067F000040020000A00000001E4000-000000067F000040020000A00000001E8000__0000003B99F7F8A0\n000000067F000040020000A00000001E4000-000000067F000040020000A00000001E8000__0000005D2FFFFB38\n000000067F000040020000A00000001E8000-000000067F000040020000A00000001EC000__00000038E67ABFA0\n000000067F000040020000A00000001E8000-000000067F000040020000A00000001EC000__0000003903F1CFE8\n000000067F000040020000A00000001E8000-000000067F000040020000A00000001EC000__0000003B99F7F8A0\n000000067F000040020000A00000001E8000-000000067F000040020000A00000001EC000__0000005D2FFFFB38\n000000067F000040020000A00000001EB21C-000000067F000040020000A00000001F3C10__0000001C725D0191-0000002070591C61\n000000067F000040020000A00000001EC000-000000067F000040020000A00000001F0000__00000038E67ABFA0\n000000067F000040020000A00000001EC000-000000067F000040020000A00000001F0000__0000003903F1CFE8\n000000067F000040020000A00000001EC000-000000067F000040020000A00000001F0000__0000003B99F7F8A0\n000000067F000040020000A00000001EC000-000000067F000040020000A00000001F0000__0000005D2FFFFB38\n000000067F000040020000A00000001F0000-000000067F000040020000A00000001F4000__00000038E67ABFA0\n000000067F000040020000A00000001F0000-000000067F000040020000A00000001F4000__0000003903F1CFE8\n000000067F000040020000A00000001F0000-000000067F000040020000A00000001F4000__0000003B99F7F8A0\n000000067F000040020000A00000001F0000-000000067F000040020000A00000001F4000__0000005D2FFFFB38\n000000067F000040020000A00000001F3C10-000000067F000040020000A00000001FC601__0000001C725D0191-0000002070591C61\n000000067F000040020000A00000001F4000-000000067F000040020000A00000001F8000__00000038E67ABFA0\n000000067F000040020000A00000001F4000-000000067F000040020000A00000001F8000__0000003903F1CFE8\n000000067F000040020000A00000001F4000-000000067F000040020000A00000001F8000__0000003B99F7F8A0\n000000067F000040020000A00000001F4000-000000067F000040020000A00000001F8000__0000005D2FFFFB38\n000000067F000040020000A00000001F8000-000000067F000040020000A00000001FC000__00000038E67ABFA0\n000000067F000040020000A00000001F8000-000000067F000040020000A00000001FC000__0000003903F1CFE8\n000000067F000040020000A00000001F8000-000000067F000040020000A00000001FC000__0000003B99F7F8A0\n000000067F000040020000A00000001F8000-000000067F000040020000A00000001FC000__0000005D2FFFFB38\n000000067F000040020000A00000001FC000-000000067F000040020000A0000000200000__00000038E67ABFA0\n000000067F000040020000A00000001FC000-000000067F000040020000A0000000200000__0000003903F1CFE8\n000000067F000040020000A00000001FC000-000000067F000040020000A0000000200000__0000003B99F7F8A0\n000000067F000040020000A00000001FC000-000000067F000040020000A0000000200000__0000005D2FFFFB38\n000000067F000040020000A00000001FC601-000000067F000040020000A0000000204FDD__0000001C725D0191-0000002070591C61\n000000067F000040020000A0000000200000-000000067F000040020000A0000000204000__00000038E67ABFA0\n000000067F000040020000A0000000200000-000000067F000040020000A0000000204000__0000003903F1CFE8\n000000067F000040020000A0000000200000-000000067F000040020000A0000000204000__0000003B99F7F8A0\n000000067F000040020000A0000000200000-000000067F000040020000A0000000204000__0000005D2FFFFB38\n000000067F000040020000A0000000204000-000000067F000040020000A0000000208000__00000038E67ABFA0\n000000067F000040020000A0000000204000-000000067F000040020000A0000000208000__0000003903F1CFE8\n000000067F000040020000A0000000204000-000000067F000040020000A0000000208000__0000003B99F7F8A0\n000000067F000040020000A0000000204000-000000067F000040020000A0000000208000__0000005D2FFFFB38\n000000067F000040020000A0000000204FDD-000000067F000040020000A000000020D9BD__0000001C725D0191-0000002070591C61\n000000067F000040020000A0000000208000-000000067F000040020000A000000020C000__00000038E67ABFA0\n000000067F000040020000A0000000208000-000000067F000040020000A000000020C000__0000003903F1CFE8\n000000067F000040020000A0000000208000-000000067F000040020000A000000020C000__0000003B99F7F8A0\n000000067F000040020000A0000000208000-000000067F000040020000A000000020C000__0000005D2FFFFB38\n000000067F000040020000A000000020C000-000000067F000040020000A0000000210000__00000038E67ABFA0\n000000067F000040020000A000000020C000-000000067F000040020000A0000000210000__0000003903F1CFE8\n000000067F000040020000A000000020C000-000000067F000040020000A0000000210000__0000003B99F7F8A0\n000000067F000040020000A000000020C000-000000067F000040020000A0000000210000__0000005D2FFFFB38\n000000067F000040020000A000000020D9BD-000000067F000040020000A000000021637A__0000001C725D0191-0000002070591C61\n000000067F000040020000A0000000210000-000000067F000040020000A0000000214000__00000038E67ABFA0\n000000067F000040020000A0000000210000-000000067F000040020000A0000000214000__0000003903F1CFE8\n000000067F000040020000A0000000210000-000000067F000040020000A0000000214000__0000003B99F7F8A0\n000000067F000040020000A0000000210000-000000067F000040020000A0000000214000__0000005D2FFFFB38\n000000067F000040020000A0000000214000-000000067F000040020000A0000000218000__00000038E67ABFA0\n000000067F000040020000A0000000214000-000000067F000040020000A0000000218000__0000003903F1CFE8\n000000067F000040020000A0000000214000-000000067F000040020000A0000000218000__0000003B99F7F8A0\n000000067F000040020000A0000000214000-000000067F000040020000A0000000218000__0000005D2FFFFB38\n000000067F000040020000A000000021637A-000000067F000040020000A000000021ED3A__0000001C725D0191-0000002070591C61\n000000067F000040020000A0000000218000-000000067F000040020000A000000021C000__00000038E67ABFA0\n000000067F000040020000A0000000218000-000000067F000040020000A000000021C000__0000003903F1CFE8\n000000067F000040020000A0000000218000-000000067F000040020000A000000021C000__0000003B99F7F8A0\n000000067F000040020000A0000000218000-000000067F000040020000A000000021C000__0000005D2FFFFB38\n000000067F000040020000A000000021C000-000000067F000040020000A0000000220000__00000038E67ABFA0\n000000067F000040020000A000000021C000-000000067F000040020000A0000000220000__0000003903F1CFE8\n000000067F000040020000A000000021C000-000000067F000040020000A0000000220000__0000003B99F7F8A0\n000000067F000040020000A000000021C000-000000067F000040020000A0000000220000__0000005D2FFFFB38\n000000067F000040020000A000000021ED3A-000000067F000040020000A000000022772C__0000001C725D0191-0000002070591C61\n000000067F000040020000A0000000220000-000000067F000040020000A0000000224000__00000038E67ABFA0\n000000067F000040020000A0000000220000-000000067F000040020000A0000000224000__0000003903F1CFE8\n000000067F000040020000A0000000220000-000000067F000040020000A0000000224000__0000003B99F7F8A0\n000000067F000040020000A0000000220000-000000067F000040020000A0000000224000__0000005D2FFFFB38\n000000067F000040020000A0000000224000-000000067F000040020000A0000000228000__00000038E67ABFA0\n000000067F000040020000A0000000224000-000000067F000040020000A0000000228000__0000003903F1CFE8\n000000067F000040020000A0000000224000-000000067F000040020000A0000000228000__0000003B99F7F8A0\n000000067F000040020000A0000000224000-000000067F000040020000A0000000228000__0000005D2FFFFB38\n000000067F000040020000A000000022772C-030000000000000000000000000000000002__0000001C725D0191-0000002070591C61\n000000067F000040020000A0000000228000-000000067F000040020000A000000022C000__00000038E1ABFE28\n000000067F000040020000A0000000228000-000000067F000040020000A000000022C000__00000038E9AF7F00\n000000067F000040020000A0000000228000-000000067F000040020000A000000022C000__0000003903F1CFE8\n000000067F000040020000A0000000228000-000000067F000040020000A000000022C000__0000003B99F7F8A0\n000000067F000040020000A0000000228000-000000067F000040020000A000000022C000__0000005D2FFFFB38\n000000067F000040020000A000000022855D-000000067F000040020000A0000000230F51__0000002070591C61-000000211009E359\n000000067F000040020000A000000022C000-000000067F000040020000A0000000230000__00000038E1ABFE28\n000000067F000040020000A000000022C000-000000067F000040020000A0000000230000__00000038E9AF7F00\n000000067F000040020000A000000022C000-000000067F000040020000A0000000230000__0000003903F1CFE8\n000000067F000040020000A000000022C000-000000067F000040020000A0000000230000__0000003B99F7F8A0\n000000067F000040020000A000000022C000-000000067F000040020000A0000000230000__0000005D2FFFFB38\n000000067F000040020000A0000000230000-000000067F000040020000A0000000234000__00000038E1ABFE28\n000000067F000040020000A0000000230000-000000067F000040020000A0000000234000__00000038E9AF7F00\n000000067F000040020000A0000000230000-000000067F000040020000A0000000234000__0000003903F1CFE8\n000000067F000040020000A0000000230000-000000067F000040020000A0000000234000__0000003B99F7F8A0\n000000067F000040020000A0000000230000-000000067F000040020000A0000000234000__0000005D2FFFFB38\n000000067F000040020000A0000000230F51-000000067F000040020000A0000000239942__0000002070591C61-000000211009E359\n000000067F000040020000A0000000234000-000000067F000040020000A0000000238000__00000038E1ABFE28\n000000067F000040020000A0000000234000-000000067F000040020000A0000000238000__00000038E9AF7F00\n000000067F000040020000A0000000234000-000000067F000040020000A0000000238000__0000003903F1CFE8\n000000067F000040020000A0000000234000-000000067F000040020000A0000000238000__0000003B99F7F8A0\n000000067F000040020000A0000000234000-000000067F000040020000A0000000238000__0000005D2FFFFB38\n000000067F000040020000A0000000238000-000000067F000040020000A000000023C000__00000038E1ABFE28\n000000067F000040020000A0000000238000-000000067F000040020000A000000023C000__00000038E9AF7F00\n000000067F000040020000A0000000238000-000000067F000040020000A000000023C000__0000003903F1CFE8\n000000067F000040020000A0000000238000-000000067F000040020000A000000023C000__0000003B99F7F8A0\n000000067F000040020000A0000000238000-000000067F000040020000A000000023C000__0000005D2FFFFB38\n000000067F000040020000A0000000239942-000000067F000040020000A0000000242314__0000002070591C61-000000211009E359\n000000067F000040020000A000000023C000-000000067F000040020000A0000000240000__00000038E1ABFE28\n000000067F000040020000A000000023C000-000000067F000040020000A0000000240000__00000038E9AF7F00\n000000067F000040020000A000000023C000-000000067F000040020000A0000000240000__0000003903F1CFE8\n000000067F000040020000A000000023C000-000000067F000040020000A0000000240000__0000003B99F7F8A0\n000000067F000040020000A000000023C000-000000067F000040020000A0000000240000__0000005D2FFFFB38\n000000067F000040020000A0000000240000-000000067F000040020000A0000000244000__00000038E1ABFE28\n000000067F000040020000A0000000240000-000000067F000040020000A0000000244000__00000038E9AF7F00\n000000067F000040020000A0000000240000-000000067F000040020000A0000000244000__0000003903F1CFE8\n000000067F000040020000A0000000240000-000000067F000040020000A0000000244000__0000003B99F7F8A0\n000000067F000040020000A0000000240000-000000067F000040020000A0000000244000__0000005D2FFFFB38\n000000067F000040020000A0000000242314-000000067F000040020000A000000024ACDD__0000002070591C61-000000211009E359\n000000067F000040020000A0000000244000-000000067F000040020000A0000000248000__00000038E1ABFE28\n000000067F000040020000A0000000244000-000000067F000040020000A0000000248000__00000038E9AF7F00\n000000067F000040020000A0000000244000-000000067F000040020000A0000000248000__0000003903F1CFE8\n000000067F000040020000A0000000244000-000000067F000040020000A0000000248000__0000003B99F7F8A0\n000000067F000040020000A0000000244000-000000067F000040020000A0000000248000__0000005D2FFFFB38\n000000067F000040020000A0000000248000-000000067F000040020000A000000024C000__00000038E1ABFE28\n000000067F000040020000A0000000248000-000000067F000040020000A000000024C000__00000038E9AF7F00\n000000067F000040020000A0000000248000-000000067F000040020000A000000024C000__0000003903F1CFE8\n000000067F000040020000A0000000248000-000000067F000040020000A000000024C000__0000003B99F7F8A0\n000000067F000040020000A0000000248000-000000067F000040020000A000000024C000__0000005D2FFFFB38\n000000067F000040020000A000000024ACDD-000000067F000040020000A0000000253697__0000002070591C61-000000211009E359\n000000067F000040020000A000000024C000-000000067F000040020000A0000000250000__00000038E1ABFE28\n000000067F000040020000A000000024C000-000000067F000040020000A0000000250000__00000038E9AF7F00\n000000067F000040020000A000000024C000-000000067F000040020000A0000000250000__0000003903F1CFE8\n000000067F000040020000A000000024C000-000000067F000040020000A0000000250000__0000003B99F7F8A0\n000000067F000040020000A000000024C000-000000067F000040020000A0000000250000__0000005D2FFFFB38\n000000067F000040020000A0000000250000-000000067F000040020000A0000000254000__00000038E1ABFE28\n000000067F000040020000A0000000250000-000000067F000040020000A0000000254000__00000038E9AF7F00\n000000067F000040020000A0000000250000-000000067F000040020000A0000000254000__0000003903F1CFE8\n000000067F000040020000A0000000250000-000000067F000040020000A0000000254000__0000003B99F7F8A0\n000000067F000040020000A0000000250000-000000067F000040020000A0000000254000__0000005D2FFFFB38\n000000067F000040020000A0000000253697-000000067F000040020000A000000025C068__0000002070591C61-000000211009E359\n000000067F000040020000A0000000254000-000000067F000040020000A0000000258000__00000038E1ABFE28\n000000067F000040020000A0000000254000-000000067F000040020000A0000000258000__00000038E9AF7F00\n000000067F000040020000A0000000254000-000000067F000040020000A0000000258000__0000003903F1CFE8\n000000067F000040020000A0000000254000-000000067F000040020000A0000000258000__0000003B99F7F8A0\n000000067F000040020000A0000000254000-000000067F000040020000A0000000258000__0000005D2FFFFB38\n000000067F000040020000A0000000258000-000000067F000040020000A000000025C000__00000038E1ABFE28\n000000067F000040020000A0000000258000-000000067F000040020000A000000025C000__00000038E9AF7F00\n000000067F000040020000A0000000258000-000000067F000040020000A000000025C000__0000003903F1CFE8\n000000067F000040020000A0000000258000-000000067F000040020000A000000025C000__0000003B99F7F8A0\n000000067F000040020000A0000000258000-000000067F000040020000A000000025C000__0000005D2FFFFB38\n000000067F000040020000A000000025C000-000000067F000040020000A0000000260000__00000038E1ABFE28\n000000067F000040020000A000000025C000-000000067F000040020000A0000000260000__00000038E9AF7F00\n000000067F000040020000A000000025C000-000000067F000040020000A0000000260000__0000003903F1CFE8\n000000067F000040020000A000000025C000-000000067F000040020000A0000000260000__0000003B99F7F8A0\n000000067F000040020000A000000025C000-000000067F000040020000A0000000260000__0000005D2FFFFB38\n000000067F000040020000A000000025C068-000000067F000040020000A0000000264A5C__0000002070591C61-000000211009E359\n000000067F000040020000A0000000260000-000000067F000040020000A0000000264000__00000038E1ABFE28\n000000067F000040020000A0000000260000-000000067F000040020000A0000000264000__00000038E9AF7F00\n000000067F000040020000A0000000260000-000000067F000040020000A0000000264000__0000003903F1CFE8\n000000067F000040020000A0000000260000-000000067F000040020000A0000000264000__0000003B99F7F8A0\n000000067F000040020000A0000000260000-000000067F000040020000A0000000264000__0000005D2FFFFB38\n000000067F000040020000A0000000264000-000000067F000040020000A0000000268000__00000038E1ABFE28\n000000067F000040020000A0000000264000-000000067F000040020000A0000000268000__00000038E9AF7F00\n000000067F000040020000A0000000264000-000000067F000040020000A0000000268000__0000003903F1CFE8\n000000067F000040020000A0000000264000-000000067F000040020000A0000000268000__0000003B99F7F8A0\n000000067F000040020000A0000000264000-000000067F000040020000A0000000268000__0000005D2FFFFB38\n000000067F000040020000A0000000264A5C-000000067F000040020000A000000026D448__0000002070591C61-000000211009E359\n000000067F000040020000A0000000268000-000000067F000040020000A000000026C000__00000038E1ABFE28\n000000067F000040020000A0000000268000-000000067F000040020000A000000026C000__00000038E9AF7F00\n000000067F000040020000A0000000268000-000000067F000040020000A000000026C000__0000003903F1CFE8\n000000067F000040020000A0000000268000-000000067F000040020000A000000026C000__0000003B99F7F8A0\n000000067F000040020000A0000000268000-000000067F000040020000A000000026C000__0000005D2FFFFB38\n000000067F000040020000A000000026C000-000000067F000040020000A0000000270000__00000038E1ABFE28\n000000067F000040020000A000000026C000-000000067F000040020000A0000000270000__00000038E9AF7F00\n000000067F000040020000A000000026C000-000000067F000040020000A0000000270000__0000003903F1CFE8\n000000067F000040020000A000000026C000-000000067F000040020000A0000000270000__0000003B99F7F8A0\n000000067F000040020000A000000026C000-000000067F000040020000A0000000270000__0000005D2FFFFB38\n000000067F000040020000A000000026D448-000000067F000040020000A0000000275E35__0000002070591C61-000000211009E359\n000000067F000040020000A0000000270000-000000067F000040020000A0000000274000__00000038E1ABFE28\n000000067F000040020000A0000000270000-000000067F000040020000A0000000274000__00000038E9AF7F00\n000000067F000040020000A0000000270000-000000067F000040020000A0000000274000__0000003903F1CFE8\n000000067F000040020000A0000000270000-000000067F000040020000A0000000274000__0000003B99F7F8A0\n000000067F000040020000A0000000270000-000000067F000040020000A0000000274000__0000005D2FFFFB38\n000000067F000040020000A0000000274000-000000067F000040020000A0000000278000__00000038E1ABFE28\n000000067F000040020000A0000000274000-000000067F000040020000A0000000278000__00000038E9AF7F00\n000000067F000040020000A0000000274000-000000067F000040020000A0000000278000__0000003903F1CFE8\n000000067F000040020000A0000000274000-000000067F000040020000A0000000278000__0000003B99F7F8A0\n000000067F000040020000A0000000274000-000000067F000040020000A0000000278000__0000005D2FFFFB38\n000000067F000040020000A0000000275E35-000000067F000040020000A000000027E807__0000002070591C61-000000211009E359\n000000067F000040020000A0000000278000-000000067F000040020000A000000027C000__00000038E1ABFE28\n000000067F000040020000A0000000278000-000000067F000040020000A000000027C000__00000038E9AF7F00\n000000067F000040020000A0000000278000-000000067F000040020000A000000027C000__0000003903F1CFE8\n000000067F000040020000A0000000278000-000000067F000040020000A000000027C000__0000003B99F7F8A0\n000000067F000040020000A0000000278000-000000067F000040020000A000000027C000__0000005D2FFFFB38\n000000067F000040020000A000000027C000-000000067F000040020000A0000000280000__00000021DAB8B3D0\n000000067F000040020000A000000027C000-000000067F000040020000A0000000280000__00000038E9AF7F00\n000000067F000040020000A000000027C000-000000067F000040020000A0000000280000__0000003903F1CFE8\n000000067F000040020000A000000027C000-000000067F000040020000A0000000280000__0000003B99F7F8A0\n000000067F000040020000A000000027C000-000000067F000040020000A0000000280000__0000005D2FFFFB38\n000000067F000040020000A000000027E807-000000067F000040020000A0000200000000__0000002070591C61-000000211009E359\n000000067F000040020000A000000027E9D5-000000067F000040020000A00000002873AE__000000211009E359-00000021AFB9E1E9\n000000067F000040020000A0000000280000-000000067F000040020000A0000000284000__00000021DAB8B3D0\n000000067F000040020000A0000000280000-000000067F000040020000A0000000284000__00000038E9AF7F00\n000000067F000040020000A0000000280000-000000067F000040020000A0000000284000__0000003903F1CFE8\n000000067F000040020000A0000000280000-000000067F000040020000A0000000284000__0000003B99F7F8A0\n000000067F000040020000A0000000280000-000000067F000040020000A0000000284000__0000005D2FFFFB38\n000000067F000040020000A0000000284000-000000067F000040020000A0000000288000__00000021DAB8B3D0\n000000067F000040020000A0000000284000-000000067F000040020000A0000000288000__00000038E9AF7F00\n000000067F000040020000A0000000284000-000000067F000040020000A0000000288000__0000003903F1CFE8\n000000067F000040020000A0000000284000-000000067F000040020000A0000000288000__0000003B99F7F8A0\n000000067F000040020000A0000000284000-000000067F000040020000A0000000288000__0000005D2FFFFB38\n000000067F000040020000A00000002873AE-000000067F000040020000A000000028FD67__000000211009E359-00000021AFB9E1E9\n000000067F000040020000A0000000288000-000000067F000040020000A000000028C000__00000021DAB8B3D0\n000000067F000040020000A0000000288000-000000067F000040020000A000000028C000__00000038E9AF7F00\n000000067F000040020000A0000000288000-000000067F000040020000A000000028C000__0000003903F1CFE8\n000000067F000040020000A0000000288000-000000067F000040020000A000000028C000__0000003B99F7F8A0\n000000067F000040020000A0000000288000-000000067F000040020000A000000028C000__0000005D2FFFFB38\n000000067F000040020000A000000028C000-000000067F000040020000A0000000290000__00000021DAB8B3D0\n000000067F000040020000A000000028C000-000000067F000040020000A0000000290000__00000038E9AF7F00\n000000067F000040020000A000000028C000-000000067F000040020000A0000000290000__0000003903F1CFE8\n000000067F000040020000A000000028C000-000000067F000040020000A0000000290000__0000003B99F7F8A0\n000000067F000040020000A000000028C000-000000067F000040020000A0000000290000__0000005D2FFFFB38\n000000067F000040020000A000000028FD67-000000067F000040020000A0000000298739__000000211009E359-00000021AFB9E1E9\n000000067F000040020000A0000000290000-000000067F000040020000A0000000294000__00000021DAB8B3D0\n000000067F000040020000A0000000290000-000000067F000040020000A0000000294000__00000038E9AF7F00\n000000067F000040020000A0000000290000-000000067F000040020000A0000000294000__0000003903F1CFE8\n000000067F000040020000A0000000290000-000000067F000040020000A0000000294000__0000003B99F7F8A0\n000000067F000040020000A0000000290000-000000067F000040020000A0000000294000__0000005D2FFFFB38\n000000067F000040020000A0000000294000-000000067F000040020000A0000000298000__00000021DAB8B3D0\n000000067F000040020000A0000000294000-000000067F000040020000A0000000298000__00000038E9AF7F00\n000000067F000040020000A0000000294000-000000067F000040020000A0000000298000__0000003903F1CFE8\n000000067F000040020000A0000000294000-000000067F000040020000A0000000298000__0000003B99F7F8A0\n000000067F000040020000A0000000294000-000000067F000040020000A0000000298000__0000005D2FFFFB38\n000000067F000040020000A0000000298000-000000067F000040020000A000000029C000__00000021DAB8B3D0\n000000067F000040020000A0000000298000-000000067F000040020000A000000029C000__00000038E9AF7F00\n000000067F000040020000A0000000298000-000000067F000040020000A000000029C000__0000003903F1CFE8\n000000067F000040020000A0000000298000-000000067F000040020000A000000029C000__0000003B99F7F8A0\n000000067F000040020000A0000000298000-000000067F000040020000A000000029C000__0000005D2FFFFB38\n000000067F000040020000A0000000298739-000000067F000040020000A00000002A1125__000000211009E359-00000021AFB9E1E9\n000000067F000040020000A000000029C000-000000067F000040020000A00000002A0000__00000021DAB8B3D0\n000000067F000040020000A000000029C000-000000067F000040020000A00000002A0000__00000038E9AF7F00\n000000067F000040020000A000000029C000-000000067F000040020000A00000002A0000__0000003903F1CFE8\n000000067F000040020000A000000029C000-000000067F000040020000A00000002A0000__0000003B99F7F8A0\n000000067F000040020000A000000029C000-000000067F000040020000A00000002A0000__0000005D2FFFFB38\n000000067F000040020000A00000002A0000-000000067F000040020000A00000002A4000__00000021DAB8B3D0\n000000067F000040020000A00000002A0000-000000067F000040020000A00000002A4000__00000038E9AF7F00\n000000067F000040020000A00000002A0000-000000067F000040020000A00000002A4000__0000003903F1CFE8\n000000067F000040020000A00000002A0000-000000067F000040020000A00000002A4000__0000003B99F7F8A0\n000000067F000040020000A00000002A0000-000000067F000040020000A00000002A4000__0000005D2FFFFB38\n000000067F000040020000A00000002A1125-000000067F000040020000A00000002A9B12__000000211009E359-00000021AFB9E1E9\n000000067F000040020000A00000002A4000-000000067F000040020000A00000002A8000__00000021DAB8B3D0\n000000067F000040020000A00000002A4000-000000067F000040020000A00000002A8000__00000038E9AF7F00\n000000067F000040020000A00000002A4000-000000067F000040020000A00000002A8000__0000003903F1CFE8\n000000067F000040020000A00000002A4000-000000067F000040020000A00000002A8000__0000003B99F7F8A0\n000000067F000040020000A00000002A4000-000000067F000040020000A00000002A8000__0000005D2FFFFB38\n000000067F000040020000A00000002A8000-000000067F000040020000A00000002AC000__00000021DAB8B3D0\n000000067F000040020000A00000002A8000-000000067F000040020000A00000002AC000__00000038E9AF7F00\n000000067F000040020000A00000002A8000-000000067F000040020000A00000002AC000__0000003903F1CFE8\n000000067F000040020000A00000002A8000-000000067F000040020000A00000002AC000__0000003B99F7F8A0\n000000067F000040020000A00000002A8000-000000067F000040020000A00000002AC000__0000005D2FFFFB38\n000000067F000040020000A00000002A9B12-000000067F000040020000A00000002B24F9__000000211009E359-00000021AFB9E1E9\n000000067F000040020000A00000002AC000-000000067F000040020000A00000002B0000__00000021DAB8B3D0\n000000067F000040020000A00000002AC000-000000067F000040020000A00000002B0000__00000038E9AF7F00\n000000067F000040020000A00000002AC000-000000067F000040020000A00000002B0000__0000003903F1CFE8\n000000067F000040020000A00000002AC000-000000067F000040020000A00000002B0000__0000003B99F7F8A0\n000000067F000040020000A00000002AC000-000000067F000040020000A00000002B0000__0000005D2FFFFB38\n000000067F000040020000A00000002B0000-000000067F000040020000A00000002B4000__00000021DAB8B3D0\n000000067F000040020000A00000002B0000-000000067F000040020000A00000002B4000__00000038E9AF7F00\n000000067F000040020000A00000002B0000-000000067F000040020000A00000002B4000__0000003903F1CFE8\n000000067F000040020000A00000002B0000-000000067F000040020000A00000002B4000__0000003B99F7F8A0\n000000067F000040020000A00000002B0000-000000067F000040020000A00000002B4000__0000005D2FFFFB38\n000000067F000040020000A00000002B24F9-000000067F000040020000A00000002BAED2__000000211009E359-00000021AFB9E1E9\n000000067F000040020000A00000002B4000-000000067F000040020000A00000002B8000__00000021DAB8B3D0\n000000067F000040020000A00000002B4000-000000067F000040020000A00000002B8000__00000038E9AF7F00\n000000067F000040020000A00000002B4000-000000067F000040020000A00000002B8000__0000003903F1CFE8\n000000067F000040020000A00000002B4000-000000067F000040020000A00000002B8000__0000003B99F7F8A0\n000000067F000040020000A00000002B4000-000000067F000040020000A00000002B8000__0000005D2FFFFB38\n000000067F000040020000A00000002B8000-000000067F000040020000A00000002BC000__00000021DAB8B3D0\n000000067F000040020000A00000002B8000-000000067F000040020000A00000002BC000__00000038E9AF7F00\n000000067F000040020000A00000002B8000-000000067F000040020000A00000002BC000__0000003903F1CFE8\n000000067F000040020000A00000002B8000-000000067F000040020000A00000002BC000__0000003B99F7F8A0\n000000067F000040020000A00000002B8000-000000067F000040020000A00000002BC000__0000005D2FFFFB38\n000000067F000040020000A00000002BAED2-000000067F000040020000A00000002C3898__000000211009E359-00000021AFB9E1E9\n000000067F000040020000A00000002BC000-000000067F000040020000A00000002C0000__00000021DAB8B3D0\n000000067F000040020000A00000002BC000-000000067F000040020000A00000002C0000__00000038E9AF7F00\n000000067F000040020000A00000002BC000-000000067F000040020000A00000002C0000__0000003903F1CFE8\n000000067F000040020000A00000002BC000-000000067F000040020000A00000002C0000__0000003B99F7F8A0\n000000067F000040020000A00000002BC000-000000067F000040020000A00000002C0000__0000005D2FFFFB38\n000000067F000040020000A00000002C0000-000000067F000040020000A00000002C4000__00000021DAB8B3D0\n000000067F000040020000A00000002C0000-000000067F000040020000A00000002C4000__00000038E9AF7F00\n000000067F000040020000A00000002C0000-000000067F000040020000A00000002C4000__0000003903F1CFE8\n000000067F000040020000A00000002C0000-000000067F000040020000A00000002C4000__0000003B99F7F8A0\n000000067F000040020000A00000002C0000-000000067F000040020000A00000002C4000__0000005D2FFFFB38\n000000067F000040020000A00000002C3898-000000067F000040020000A00000002CC255__000000211009E359-00000021AFB9E1E9\n000000067F000040020000A00000002C4000-000000067F000040020000A00000002C8000__00000021DAB8B3D0\n000000067F000040020000A00000002C4000-000000067F000040020000A00000002C8000__00000038E9AF7F00\n000000067F000040020000A00000002C4000-000000067F000040020000A00000002C8000__0000003903F1CFE8\n000000067F000040020000A00000002C4000-000000067F000040020000A00000002C8000__0000003B99F7F8A0\n000000067F000040020000A00000002C4000-000000067F000040020000A00000002C8000__0000005D2FFFFB38\n000000067F000040020000A00000002C8000-000000067F000040020000A00000002CC000__00000021DAB8B3D0\n000000067F000040020000A00000002C8000-000000067F000040020000A00000002CC000__00000038E9AF7F00\n000000067F000040020000A00000002C8000-000000067F000040020000A00000002CC000__0000003903F1CFE8\n000000067F000040020000A00000002C8000-000000067F000040020000A00000002CC000__0000003B99F7F8A0\n000000067F000040020000A00000002C8000-000000067F000040020000A00000002CC000__0000005D2FFFFB38\n000000067F000040020000A00000002CC000-000000067F000040020000A00000002D0000__00000021DAB8B3D0\n000000067F000040020000A00000002CC000-000000067F000040020000A00000002D0000__00000038E9AF7F00\n000000067F000040020000A00000002CC000-000000067F000040020000A00000002D0000__0000003903F1CFE8\n000000067F000040020000A00000002CC000-000000067F000040020000A00000002D0000__0000003B99F7F8A0\n000000067F000040020000A00000002CC000-000000067F000040020000A00000002D0000__0000005D2FFFFB38\n000000067F000040020000A00000002CC255-000000067F000040020000A00000002D4C30__000000211009E359-00000021AFB9E1E9\n000000067F000040020000A00000002D0000-000000067F000040020000A00000002D4000__00000021DAB8B3D0\n000000067F000040020000A00000002D0000-000000067F000040020000A00000002D4000__00000038E9AF7F00\n000000067F000040020000A00000002D0000-000000067F000040020000A00000002D4000__0000003903F1CFE8\n000000067F000040020000A00000002D0000-000000067F000040020000A00000002D4000__0000003B99F7F8A0\n000000067F000040020000A00000002D0000-000000067F000040020000A00000002D4000__0000005D2FFFFB38\n000000067F000040020000A00000002D4000-000000067F000040020000A00000002D8000__00000021DAB8B3D0\n000000067F000040020000A00000002D4000-000000067F000040020000A00000002D8000__00000038E67ABFA0\n000000067F000040020000A00000002D4000-000000067F000040020000A00000002D8000__0000003903F1CFE8\n000000067F000040020000A00000002D4000-000000067F000040020000A00000002D8000__0000003B99F7F8A0\n000000067F000040020000A00000002D4000-000000067F000040020000A00000002D8000__0000005D2FFFFB38\n000000067F000040020000A00000002D4C30-000000067F000040020000A0000200000000__000000211009E359-00000021AFB9E1E9\n000000067F000040020000A00000002D4E9B-000000067F000040020000A00000002DD894__00000021AFB9E1E9-000000225F61DD41\n000000067F000040020000A00000002D8000-000000067F000040020000A00000002DC000__00000021DAB8B3D0\n000000067F000040020000A00000002D8000-000000067F000040020000A00000002DC000__00000038E67ABFA0\n000000067F000040020000A00000002D8000-000000067F000040020000A00000002DC000__0000003903F1CFE8\n000000067F000040020000A00000002D8000-000000067F000040020000A00000002DC000__0000003B99F7F8A0\n000000067F000040020000A00000002D8000-000000067F000040020000A00000002DC000__0000005D2FFFFB38\n000000067F000040020000A00000002DC000-000000067F000040020000A00000002E0000__00000021DAB8B3D0\n000000067F000040020000A00000002DC000-000000067F000040020000A00000002E0000__00000038E67ABFA0\n000000067F000040020000A00000002DC000-000000067F000040020000A00000002E0000__0000003903F1CFE8\n000000067F000040020000A00000002DC000-000000067F000040020000A00000002E0000__0000003B99F7F8A0\n000000067F000040020000A00000002DC000-000000067F000040020000A00000002E0000__0000005D2FFFFB38\n000000067F000040020000A00000002DD894-000000067F000040020000A00000002E6287__00000021AFB9E1E9-000000225F61DD41\n000000067F000040020000A00000002E0000-000000067F000040020000A00000002E4000__00000021DAB8B3D0\n000000067F000040020000A00000002E0000-000000067F000040020000A00000002E4000__00000038E67ABFA0\n000000067F000040020000A00000002E0000-000000067F000040020000A00000002E4000__0000003903F1CFE8\n000000067F000040020000A00000002E0000-000000067F000040020000A00000002E4000__0000003B99F7F8A0\n000000067F000040020000A00000002E0000-000000067F000040020000A00000002E4000__0000005D2FFFFB38\n000000067F000040020000A00000002E4000-000000067F000040020000A00000002E8000__00000021DAB8B3D0\n000000067F000040020000A00000002E4000-000000067F000040020000A00000002E8000__00000038E67ABFA0\n000000067F000040020000A00000002E4000-000000067F000040020000A00000002E8000__0000003903F1CFE8\n000000067F000040020000A00000002E4000-000000067F000040020000A00000002E8000__0000003B99F7F8A0\n000000067F000040020000A00000002E4000-000000067F000040020000A00000002E8000__0000005D2FFFFB38\n000000067F000040020000A00000002E6287-000000067F000040020000A00000002EEC65__00000021AFB9E1E9-000000225F61DD41\n000000067F000040020000A00000002E8000-000000067F000040020000A00000002EC000__00000021DAB8B3D0\n000000067F000040020000A00000002E8000-000000067F000040020000A00000002EC000__00000038E67ABFA0\n000000067F000040020000A00000002E8000-000000067F000040020000A00000002EC000__0000003903F1CFE8\n000000067F000040020000A00000002E8000-000000067F000040020000A00000002EC000__0000003B99F7F8A0\n000000067F000040020000A00000002E8000-000000067F000040020000A00000002EC000__0000005D2FFFFB38\n000000067F000040020000A00000002EC000-000000067F000040020000A00000002F0000__00000038E67ABFA0\n000000067F000040020000A00000002EC000-000000067F000040020000A00000002F0000__0000003903F1CFE8\n000000067F000040020000A00000002EC000-000000067F000040020000A00000002F0000__0000003B99F7F8A0\n000000067F000040020000A00000002EC000-000000067F000040020000A00000002F0000__0000005D2FFFFB38\n000000067F000040020000A00000002EC000-030000000000000000000000000000000002__00000021DAB8B3D0\n000000067F000040020000A00000002EEC65-000000067F000040020000A00000002F7636__00000021AFB9E1E9-000000225F61DD41\n000000067F000040020000A00000002F0000-000000067F000040020000A00000002F4000__00000038E67ABFA0\n000000067F000040020000A00000002F0000-000000067F000040020000A00000002F4000__0000003903F1CFE8\n000000067F000040020000A00000002F0000-000000067F000040020000A00000002F4000__0000003B99F7F8A0\n000000067F000040020000A00000002F0000-000000067F000040020000A00000002F4000__0000005D2FFFFB38\n000000067F000040020000A00000002F4000-000000067F000040020000A00000002F8000__00000038E67ABFA0\n000000067F000040020000A00000002F4000-000000067F000040020000A00000002F8000__0000003903F1CFE8\n000000067F000040020000A00000002F4000-000000067F000040020000A00000002F8000__0000003B99F7F8A0\n000000067F000040020000A00000002F4000-000000067F000040020000A00000002F8000__0000005D2FFFFB38\n000000067F000040020000A00000002F7636-000000067F000040020000A00000002FFFF6__00000021AFB9E1E9-000000225F61DD41\n000000067F000040020000A00000002F8000-000000067F000040020000A00000002FC000__00000038E67ABFA0\n000000067F000040020000A00000002F8000-000000067F000040020000A00000002FC000__0000003903F1CFE8\n000000067F000040020000A00000002F8000-000000067F000040020000A00000002FC000__0000003B99F7F8A0\n000000067F000040020000A00000002F8000-000000067F000040020000A00000002FC000__0000005D2FFFFB38\n000000067F000040020000A00000002FC000-000000067F000040020000A0000000300000__00000038E67ABFA0\n000000067F000040020000A00000002FC000-000000067F000040020000A0000000300000__0000003903F1CFE8\n000000067F000040020000A00000002FC000-000000067F000040020000A0000000300000__0000003B99F7F8A0\n000000067F000040020000A00000002FC000-000000067F000040020000A0000000300000__0000005D2FFFFB38\n000000067F000040020000A00000002FFFF6-000000067F000040020000A00000003089B9__00000021AFB9E1E9-000000225F61DD41\n000000067F000040020000A0000000300000-000000067F000040020000A0000000304000__00000038E67ABFA0\n000000067F000040020000A0000000300000-000000067F000040020000A0000000304000__0000003903F1CFE8\n000000067F000040020000A0000000300000-000000067F000040020000A0000000304000__0000003B99F7F8A0\n000000067F000040020000A0000000300000-000000067F000040020000A0000000304000__0000005D2FFFFB38\n000000067F000040020000A0000000304000-000000067F000040020000A0000000308000__00000038E67ABFA0\n000000067F000040020000A0000000304000-000000067F000040020000A0000000308000__0000003903F1CFE8\n000000067F000040020000A0000000304000-000000067F000040020000A0000000308000__0000003B99F7F8A0\n000000067F000040020000A0000000304000-000000067F000040020000A0000000308000__0000005D2FFFFB38\n000000067F000040020000A0000000308000-000000067F000040020000A000000030C000__00000038E67ABFA0\n000000067F000040020000A0000000308000-000000067F000040020000A000000030C000__0000003903F1CFE8\n000000067F000040020000A0000000308000-000000067F000040020000A000000030C000__0000003B99F7F8A0\n000000067F000040020000A0000000308000-000000067F000040020000A000000030C000__0000005D2FFFFB38\n000000067F000040020000A00000003089B9-000000067F000040020000A00000003113A3__00000021AFB9E1E9-000000225F61DD41\n000000067F000040020000A000000030C000-000000067F000040020000A0000000310000__00000038E67ABFA0\n000000067F000040020000A000000030C000-000000067F000040020000A0000000310000__0000003903F1CFE8\n000000067F000040020000A000000030C000-000000067F000040020000A0000000310000__0000003B99F7F8A0\n000000067F000040020000A000000030C000-000000067F000040020000A0000000310000__0000005D2FFFFB38\n000000067F000040020000A0000000310000-000000067F000040020000A0000000314000__00000038E67ABFA0\n000000067F000040020000A0000000310000-000000067F000040020000A0000000314000__0000003903F1CFE8\n000000067F000040020000A0000000310000-000000067F000040020000A0000000314000__0000003B99F7F8A0\n000000067F000040020000A0000000310000-000000067F000040020000A0000000314000__0000005D2FFFFB38\n000000067F000040020000A00000003113A3-000000067F000040020000A0000000319D9B__00000021AFB9E1E9-000000225F61DD41\n000000067F000040020000A0000000314000-000000067F000040020000A0000000318000__00000038E67ABFA0\n000000067F000040020000A0000000314000-000000067F000040020000A0000000318000__0000003903F1CFE8\n000000067F000040020000A0000000314000-000000067F000040020000A0000000318000__0000003B99F7F8A0\n000000067F000040020000A0000000314000-000000067F000040020000A0000000318000__0000005D2FFFFB38\n000000067F000040020000A0000000318000-000000067F000040020000A000000031C000__00000038E67ABFA0\n000000067F000040020000A0000000318000-000000067F000040020000A000000031C000__0000003903F1CFE8\n000000067F000040020000A0000000318000-000000067F000040020000A000000031C000__0000003B99F7F8A0\n000000067F000040020000A0000000318000-000000067F000040020000A000000031C000__0000005D2FFFFB38\n000000067F000040020000A0000000319D9B-000000067F000040020000A0000000322787__00000021AFB9E1E9-000000225F61DD41\n000000067F000040020000A000000031C000-000000067F000040020000A0000000320000__00000038E67ABFA0\n000000067F000040020000A000000031C000-000000067F000040020000A0000000320000__0000003903F1CFE8\n000000067F000040020000A000000031C000-000000067F000040020000A0000000320000__0000003B99F7F8A0\n000000067F000040020000A000000031C000-000000067F000040020000A0000000320000__0000005D2FFFFB38\n000000067F000040020000A0000000320000-000000067F000040020000A0000000324000__00000038E67ABFA0\n000000067F000040020000A0000000320000-000000067F000040020000A0000000324000__0000003903F1CFE8\n000000067F000040020000A0000000320000-000000067F000040020000A0000000324000__0000003B99F7F8A0\n000000067F000040020000A0000000320000-000000067F000040020000A0000000324000__0000005D2FFFFB38\n000000067F000040020000A0000000322787-000000067F000040020000A000000032B167__00000021AFB9E1E9-000000225F61DD41\n000000067F000040020000A0000000324000-000000067F000040020000A0000000328000__00000038E67ABFA0\n000000067F000040020000A0000000324000-000000067F000040020000A0000000328000__0000003903F1CFE8\n000000067F000040020000A0000000324000-000000067F000040020000A0000000328000__0000003B99F7F8A0\n000000067F000040020000A0000000324000-000000067F000040020000A0000000328000__0000005D2FFFFB38\n000000067F000040020000A0000000328000-000000067F000040020000A000000032C000__00000038E67ABFA0\n000000067F000040020000A0000000328000-000000067F000040020000A000000032C000__0000003903F1CFE8\n000000067F000040020000A0000000328000-000000067F000040020000A000000032C000__0000003B99F7F8A0\n000000067F000040020000A0000000328000-000000067F000040020000A000000032C000__0000005D2FFFFB38\n000000067F000040020000A000000032B167-000000067F000040020000A0000000333B49__00000021AFB9E1E9-000000225F61DD41\n000000067F000040020000A000000032C000-000000067F000040020000A0000000330000__00000038E67ABFA0\n000000067F000040020000A000000032C000-000000067F000040020000A0000000330000__0000003903F1CFE8\n000000067F000040020000A000000032C000-000000067F000040020000A0000000330000__0000003B99F7F8A0\n000000067F000040020000A000000032C000-000000067F000040020000A0000000330000__0000005D2FFFFB38\n000000067F000040020000A0000000330000-000000067F000040020000A0000000334000__00000038E1ABFE28\n000000067F000040020000A0000000330000-000000067F000040020000A0000000334000__00000038E9AF7F00\n000000067F000040020000A0000000330000-000000067F000040020000A0000000334000__0000003903F1CFE8\n000000067F000040020000A0000000330000-000000067F000040020000A0000000334000__0000003B99F7F8A0\n000000067F000040020000A0000000330000-000000067F000040020000A0000000334000__0000005D2FFFFB38\n000000067F000040020000A0000000333B49-000000067F000040020000A0000200000000__00000021AFB9E1E9-000000225F61DD41\n000000067F000040020000A0000000333D2A-000000067F000040020000A000000033C6E5__000000225F61DD41-000000230F09F3F1\n000000067F000040020000A0000000334000-000000067F000040020000A0000000338000__00000038E1ABFE28\n000000067F000040020000A0000000334000-000000067F000040020000A0000000338000__00000038E9AF7F00\n000000067F000040020000A0000000334000-000000067F000040020000A0000000338000__0000003903F1CFE8\n000000067F000040020000A0000000334000-000000067F000040020000A0000000338000__0000003B99F7F8A0\n000000067F000040020000A0000000334000-000000067F000040020000A0000000338000__0000005D2FFFFB38\n000000067F000040020000A0000000338000-000000067F000040020000A000000033C000__00000038E1ABFE28\n000000067F000040020000A0000000338000-000000067F000040020000A000000033C000__00000038E9AF7F00\n000000067F000040020000A0000000338000-000000067F000040020000A000000033C000__0000003903F1CFE8\n000000067F000040020000A0000000338000-000000067F000040020000A000000033C000__0000003B99F7F8A0\n000000067F000040020000A0000000338000-000000067F000040020000A000000033C000__0000005D2FFFFB38\n000000067F000040020000A000000033C000-000000067F000040020000A0000000340000__00000038E1ABFE28\n000000067F000040020000A000000033C000-000000067F000040020000A0000000340000__00000038E9AF7F00\n000000067F000040020000A000000033C000-000000067F000040020000A0000000340000__0000003903F1CFE8\n000000067F000040020000A000000033C000-000000067F000040020000A0000000340000__0000003B99F7F8A0\n000000067F000040020000A000000033C000-000000067F000040020000A0000000340000__0000005D2FFFFB38\n000000067F000040020000A000000033C6E5-000000067F000040020000A00000003450AA__000000225F61DD41-000000230F09F3F1\n000000067F000040020000A0000000340000-000000067F000040020000A0000000344000__00000038E1ABFE28\n000000067F000040020000A0000000340000-000000067F000040020000A0000000344000__00000038E9AF7F00\n000000067F000040020000A0000000340000-000000067F000040020000A0000000344000__0000003903F1CFE8\n000000067F000040020000A0000000340000-000000067F000040020000A0000000344000__0000003B99F7F8A0\n000000067F000040020000A0000000340000-000000067F000040020000A0000000344000__0000005D2FFFFB38\n000000067F000040020000A0000000344000-000000067F000040020000A0000000348000__00000038E1ABFE28\n000000067F000040020000A0000000344000-000000067F000040020000A0000000348000__00000038E9AF7F00\n000000067F000040020000A0000000344000-000000067F000040020000A0000000348000__0000003903F1CFE8\n000000067F000040020000A0000000344000-000000067F000040020000A0000000348000__0000003B99F7F8A0\n000000067F000040020000A0000000344000-000000067F000040020000A0000000348000__0000005D2FFFFB38\n000000067F000040020000A00000003450AA-000000067F000040020000A000000034DAA2__000000225F61DD41-000000230F09F3F1\n000000067F000040020000A0000000348000-000000067F000040020000A000000034C000__00000038E1ABFE28\n000000067F000040020000A0000000348000-000000067F000040020000A000000034C000__00000038E9AF7F00\n000000067F000040020000A0000000348000-000000067F000040020000A000000034C000__0000003903F1CFE8\n000000067F000040020000A0000000348000-000000067F000040020000A000000034C000__0000003B99F7F8A0\n000000067F000040020000A0000000348000-000000067F000040020000A000000034C000__0000005D2FFFFB38\n000000067F000040020000A000000034C000-000000067F000040020000A0000000350000__00000038E1ABFE28\n000000067F000040020000A000000034C000-000000067F000040020000A0000000350000__00000038E9AF7F00\n000000067F000040020000A000000034C000-000000067F000040020000A0000000350000__0000003903F1CFE8\n000000067F000040020000A000000034C000-000000067F000040020000A0000000350000__0000003B99F7F8A0\n000000067F000040020000A000000034C000-000000067F000040020000A0000000350000__0000005D2FFFFB38\n000000067F000040020000A000000034DAA2-000000067F000040020000A000000035649B__000000225F61DD41-000000230F09F3F1\n000000067F000040020000A0000000350000-000000067F000040020000A0000000354000__00000038E1ABFE28\n000000067F000040020000A0000000350000-000000067F000040020000A0000000354000__00000038E9AF7F00\n000000067F000040020000A0000000350000-000000067F000040020000A0000000354000__0000003903F1CFE8\n000000067F000040020000A0000000350000-000000067F000040020000A0000000354000__0000003B99F7F8A0\n000000067F000040020000A0000000350000-000000067F000040020000A0000000354000__0000005D2FFFFB38\n000000067F000040020000A0000000354000-000000067F000040020000A0000000358000__00000038E1ABFE28\n000000067F000040020000A0000000354000-000000067F000040020000A0000000358000__00000038E9AF7F00\n000000067F000040020000A0000000354000-000000067F000040020000A0000000358000__0000003903F1CFE8\n000000067F000040020000A0000000354000-000000067F000040020000A0000000358000__0000003B99F7F8A0\n000000067F000040020000A0000000354000-000000067F000040020000A0000000358000__0000005D2FFFFB38\n000000067F000040020000A000000035649B-000000067F000040020000A000000035EE91__000000225F61DD41-000000230F09F3F1\n000000067F000040020000A0000000358000-000000067F000040020000A000000035C000__00000038E1ABFE28\n000000067F000040020000A0000000358000-000000067F000040020000A000000035C000__00000038E9AF7F00\n000000067F000040020000A0000000358000-000000067F000040020000A000000035C000__0000003903F1CFE8\n000000067F000040020000A0000000358000-000000067F000040020000A000000035C000__0000003B99F7F8A0\n000000067F000040020000A0000000358000-000000067F000040020000A000000035C000__0000005D2FFFFB38\n000000067F000040020000A000000035C000-000000067F000040020000A0000000360000__00000038E1ABFE28\n000000067F000040020000A000000035C000-000000067F000040020000A0000000360000__00000038E9AF7F00\n000000067F000040020000A000000035C000-000000067F000040020000A0000000360000__0000003903F1CFE8\n000000067F000040020000A000000035C000-000000067F000040020000A0000000360000__0000003B99F7F8A0\n000000067F000040020000A000000035C000-000000067F000040020000A0000000360000__0000005D2FFFFB38\n000000067F000040020000A000000035EE91-000000067F000040020000A0000000367875__000000225F61DD41-000000230F09F3F1\n000000067F000040020000A0000000360000-000000067F000040020000A0000000364000__00000038E1ABFE28\n000000067F000040020000A0000000360000-000000067F000040020000A0000000364000__00000038E9AF7F00\n000000067F000040020000A0000000360000-000000067F000040020000A0000000364000__0000003903F1CFE8\n000000067F000040020000A0000000360000-000000067F000040020000A0000000364000__0000003B99F7F8A0\n000000067F000040020000A0000000360000-000000067F000040020000A0000000364000__0000005D2FFFFB38\n000000067F000040020000A0000000364000-000000067F000040020000A0000000368000__00000038E1ABFE28\n000000067F000040020000A0000000364000-000000067F000040020000A0000000368000__00000038E9AF7F00\n000000067F000040020000A0000000364000-000000067F000040020000A0000000368000__0000003903F1CFE8\n000000067F000040020000A0000000364000-000000067F000040020000A0000000368000__0000003B99F7F8A0\n000000067F000040020000A0000000364000-000000067F000040020000A0000000368000__0000005D2FFFFB38\n000000067F000040020000A0000000367875-000000067F000040020000A0000000370246__000000225F61DD41-000000230F09F3F1\n000000067F000040020000A0000000368000-000000067F000040020000A000000036C000__00000038E1ABFE28\n000000067F000040020000A0000000368000-000000067F000040020000A000000036C000__00000038E9AF7F00\n000000067F000040020000A0000000368000-000000067F000040020000A000000036C000__0000003903F1CFE8\n000000067F000040020000A0000000368000-000000067F000040020000A000000036C000__0000003B99F7F8A0\n000000067F000040020000A0000000368000-000000067F000040020000A000000036C000__0000005D2FFFFB38\n000000067F000040020000A000000036C000-000000067F000040020000A0000000370000__00000038E1ABFE28\n000000067F000040020000A000000036C000-000000067F000040020000A0000000370000__00000038E9AF7F00\n000000067F000040020000A000000036C000-000000067F000040020000A0000000370000__0000003903F1CFE8\n000000067F000040020000A000000036C000-000000067F000040020000A0000000370000__0000003B99F7F8A0\n000000067F000040020000A000000036C000-000000067F000040020000A0000000370000__0000005D2FFFFB38\n000000067F000040020000A0000000370000-000000067F000040020000A0000000374000__00000038E1ABFE28\n000000067F000040020000A0000000370000-000000067F000040020000A0000000374000__00000038E9AF7F00\n000000067F000040020000A0000000370000-000000067F000040020000A0000000374000__0000003903F1CFE8\n000000067F000040020000A0000000370000-000000067F000040020000A0000000374000__0000003B99F7F8A0\n000000067F000040020000A0000000370000-000000067F000040020000A0000000374000__0000005D2FFFFB38\n000000067F000040020000A0000000370246-000000067F000040020000A0000000378BFE__000000225F61DD41-000000230F09F3F1\n000000067F000040020000A0000000374000-000000067F000040020000A0000000378000__00000038E1ABFE28\n000000067F000040020000A0000000374000-000000067F000040020000A0000000378000__00000038E9AF7F00\n000000067F000040020000A0000000374000-000000067F000040020000A0000000378000__0000003903F1CFE8\n000000067F000040020000A0000000374000-000000067F000040020000A0000000378000__0000003B99F7F8A0\n000000067F000040020000A0000000374000-000000067F000040020000A0000000378000__0000005D2FFFFB38\n000000067F000040020000A0000000378000-000000067F000040020000A000000037C000__00000038E1ABFE28\n000000067F000040020000A0000000378000-000000067F000040020000A000000037C000__00000038E9AF7F00\n000000067F000040020000A0000000378000-000000067F000040020000A000000037C000__0000003903F1CFE8\n000000067F000040020000A0000000378000-000000067F000040020000A000000037C000__0000003B99F7F8A0\n000000067F000040020000A0000000378000-000000067F000040020000A000000037C000__0000005D2FFFFB38\n000000067F000040020000A0000000378BFE-000000067F000040020000A00000003815CC__000000225F61DD41-000000230F09F3F1\n000000067F000040020000A000000037C000-000000067F000040020000A0000000380000__00000038E1ABFE28\n000000067F000040020000A000000037C000-000000067F000040020000A0000000380000__00000038E9AF7F00\n000000067F000040020000A000000037C000-000000067F000040020000A0000000380000__0000003903F1CFE8\n000000067F000040020000A000000037C000-000000067F000040020000A0000000380000__0000003B99F7F8A0\n000000067F000040020000A000000037C000-000000067F000040020000A0000000380000__0000005D2FFFFB38\n000000067F000040020000A0000000380000-000000067F000040020000A0000000384000__00000038E1ABFE28\n000000067F000040020000A0000000380000-000000067F000040020000A0000000384000__00000038E9AF7F00\n000000067F000040020000A0000000380000-000000067F000040020000A0000000384000__0000003903F1CFE8\n000000067F000040020000A0000000380000-000000067F000040020000A0000000384000__0000003B99F7F8A0\n000000067F000040020000A0000000380000-000000067F000040020000A0000000384000__0000005D2FFFFB38\n000000067F000040020000A00000003815CC-000000067F000040020000A0000000389FCA__000000225F61DD41-000000230F09F3F1\n000000067F000040020000A0000000384000-000000067F000040020000A0000000388000__00000038E1ABFE28\n000000067F000040020000A0000000384000-000000067F000040020000A0000000388000__00000038E9AF7F00\n000000067F000040020000A0000000384000-000000067F000040020000A0000000388000__0000003903F1CFE8\n000000067F000040020000A0000000384000-000000067F000040020000A0000000388000__0000003B99F7F8A0\n000000067F000040020000A0000000384000-000000067F000040020000A0000000388000__0000005D2FFFFB38\n000000067F000040020000A0000000388000-000000067F000040020000A000000038C000__00000038E1ABFE28\n000000067F000040020000A0000000388000-000000067F000040020000A000000038C000__00000038E9AF7F00\n000000067F000040020000A0000000388000-000000067F000040020000A000000038C000__0000003903F1CFE8\n000000067F000040020000A0000000388000-000000067F000040020000A000000038C000__0000003B99F7F8A0\n000000067F000040020000A0000000388000-000000067F000040020000A000000038C000__0000005D2FFFFB38\n000000067F000040020000A0000000389FCA-000000067F000040020000A00000003929C4__000000225F61DD41-000000230F09F3F1\n000000067F000040020000A000000038C000-000000067F000040020000A0000000390000__00000038E1ABFE28\n000000067F000040020000A000000038C000-000000067F000040020000A0000000390000__00000038E9AF7F00\n000000067F000040020000A000000038C000-000000067F000040020000A0000000390000__0000003903F1CFE8\n000000067F000040020000A000000038C000-000000067F000040020000A0000000390000__0000003B99F7F8A0\n000000067F000040020000A000000038C000-000000067F000040020000A0000000390000__0000005D2FFFFB38\n000000067F000040020000A0000000390000-000000067F000040020000A0000000394000__00000023DF7FF060\n000000067F000040020000A0000000390000-000000067F000040020000A0000000394000__00000038E9AF7F00\n000000067F000040020000A0000000390000-000000067F000040020000A0000000394000__0000003903F1CFE8\n000000067F000040020000A0000000390000-000000067F000040020000A0000000394000__0000003B99F7F8A0\n000000067F000040020000A0000000390000-000000067F000040020000A0000000394000__0000005D2FFFFB38\n000000067F000040020000A00000003929C4-000000067F000040020000A0000200000000__000000225F61DD41-000000230F09F3F1\n000000067F000040020000A0000000392C61-000000067F000040020000A000000039B644__000000230F09F3F1-00000023AEB9F2B9\n000000067F000040020000A0000000394000-000000067F000040020000A0000000398000__00000023DF7FF060\n000000067F000040020000A0000000394000-000000067F000040020000A0000000398000__00000038E9AF7F00\n000000067F000040020000A0000000394000-000000067F000040020000A0000000398000__0000003903F1CFE8\n000000067F000040020000A0000000394000-000000067F000040020000A0000000398000__0000003B99F7F8A0\n000000067F000040020000A0000000394000-000000067F000040020000A0000000398000__0000005D2FFFFB38\n000000067F000040020000A0000000398000-000000067F000040020000A000000039C000__00000023DF7FF060\n000000067F000040020000A0000000398000-000000067F000040020000A000000039C000__00000038E9AF7F00\n000000067F000040020000A0000000398000-000000067F000040020000A000000039C000__0000003903F1CFE8\n000000067F000040020000A0000000398000-000000067F000040020000A000000039C000__0000003B99F7F8A0\n000000067F000040020000A0000000398000-000000067F000040020000A000000039C000__0000005D2FFFFB38\n000000067F000040020000A000000039B644-000000067F000040020000A00000003A4019__000000230F09F3F1-00000023AEB9F2B9\n000000067F000040020000A000000039C000-000000067F000040020000A00000003A0000__00000023DF7FF060\n000000067F000040020000A000000039C000-000000067F000040020000A00000003A0000__00000038E9AF7F00\n000000067F000040020000A000000039C000-000000067F000040020000A00000003A0000__0000003903F1CFE8\n000000067F000040020000A000000039C000-000000067F000040020000A00000003A0000__0000003B99F7F8A0\n000000067F000040020000A000000039C000-000000067F000040020000A00000003A0000__0000005D2FFFFB38\n000000067F000040020000A00000003A0000-000000067F000040020000A00000003A4000__00000023DF7FF060\n000000067F000040020000A00000003A0000-000000067F000040020000A00000003A4000__00000038E9AF7F00\n000000067F000040020000A00000003A0000-000000067F000040020000A00000003A4000__0000003903F1CFE8\n000000067F000040020000A00000003A0000-000000067F000040020000A00000003A4000__0000003B99F7F8A0\n000000067F000040020000A00000003A0000-000000067F000040020000A00000003A4000__0000005D2FFFFB38\n000000067F000040020000A00000003A4000-000000067F000040020000A00000003A8000__00000023DF7FF060\n000000067F000040020000A00000003A4000-000000067F000040020000A00000003A8000__00000038E9AF7F00\n000000067F000040020000A00000003A4000-000000067F000040020000A00000003A8000__0000003903F1CFE8\n000000067F000040020000A00000003A4000-000000067F000040020000A00000003A8000__0000003B99F7F8A0\n000000067F000040020000A00000003A4000-000000067F000040020000A00000003A8000__0000005D2FFFFB38\n000000067F000040020000A00000003A4019-000000067F000040020000A00000003AC9D6__000000230F09F3F1-00000023AEB9F2B9\n000000067F000040020000A00000003A8000-000000067F000040020000A00000003AC000__00000023DF7FF060\n000000067F000040020000A00000003A8000-000000067F000040020000A00000003AC000__00000038E9AF7F00\n000000067F000040020000A00000003A8000-000000067F000040020000A00000003AC000__0000003903F1CFE8\n000000067F000040020000A00000003A8000-000000067F000040020000A00000003AC000__0000003B99F7F8A0\n000000067F000040020000A00000003A8000-000000067F000040020000A00000003AC000__0000005D2FFFFB38\n000000067F000040020000A00000003AC000-000000067F000040020000A00000003B0000__00000023DF7FF060\n000000067F000040020000A00000003AC000-000000067F000040020000A00000003B0000__00000038E9AF7F00\n000000067F000040020000A00000003AC000-000000067F000040020000A00000003B0000__0000003903F1CFE8\n000000067F000040020000A00000003AC000-000000067F000040020000A00000003B0000__0000003B99F7F8A0\n000000067F000040020000A00000003AC000-000000067F000040020000A00000003B0000__0000005D2FFFFB38\n000000067F000040020000A00000003AC9D6-000000067F000040020000A00000003B5396__000000230F09F3F1-00000023AEB9F2B9\n000000067F000040020000A00000003B0000-000000067F000040020000A00000003B4000__00000023DF7FF060\n000000067F000040020000A00000003B0000-000000067F000040020000A00000003B4000__00000038E9AF7F00\n000000067F000040020000A00000003B0000-000000067F000040020000A00000003B4000__0000003903F1CFE8\n000000067F000040020000A00000003B0000-000000067F000040020000A00000003B4000__0000003B99F7F8A0\n000000067F000040020000A00000003B0000-000000067F000040020000A00000003B4000__0000005D2FFFFB38\n000000067F000040020000A00000003B4000-000000067F000040020000A00000003B8000__00000023DF7FF060\n000000067F000040020000A00000003B4000-000000067F000040020000A00000003B8000__00000038E9AF7F00\n000000067F000040020000A00000003B4000-000000067F000040020000A00000003B8000__0000003903F1CFE8\n000000067F000040020000A00000003B4000-000000067F000040020000A00000003B8000__0000003B99F7F8A0\n000000067F000040020000A00000003B4000-000000067F000040020000A00000003B8000__0000005D2FFFFB38\n000000067F000040020000A00000003B5396-000000067F000040020000A00000003BDD5F__000000230F09F3F1-00000023AEB9F2B9\n000000067F000040020000A00000003B8000-000000067F000040020000A00000003BC000__00000023DF7FF060\n000000067F000040020000A00000003B8000-000000067F000040020000A00000003BC000__00000038E9AF7F00\n000000067F000040020000A00000003B8000-000000067F000040020000A00000003BC000__0000003903F1CFE8\n000000067F000040020000A00000003B8000-000000067F000040020000A00000003BC000__0000003B99F7F8A0\n000000067F000040020000A00000003B8000-000000067F000040020000A00000003BC000__0000005D2FFFFB38\n000000067F000040020000A00000003BC000-000000067F000040020000A00000003C0000__00000023DF7FF060\n000000067F000040020000A00000003BC000-000000067F000040020000A00000003C0000__00000038E9AF7F00\n000000067F000040020000A00000003BC000-000000067F000040020000A00000003C0000__0000003903F1CFE8\n000000067F000040020000A00000003BC000-000000067F000040020000A00000003C0000__0000003B99F7F8A0\n000000067F000040020000A00000003BC000-000000067F000040020000A00000003C0000__0000005D2FFFFB38\n000000067F000040020000A00000003BDD5F-000000067F000040020000A00000003C6752__000000230F09F3F1-00000023AEB9F2B9\n000000067F000040020000A00000003C0000-000000067F000040020000A00000003C4000__00000023DF7FF060\n000000067F000040020000A00000003C0000-000000067F000040020000A00000003C4000__00000038E9AF7F00\n000000067F000040020000A00000003C0000-000000067F000040020000A00000003C4000__0000003903F1CFE8\n000000067F000040020000A00000003C0000-000000067F000040020000A00000003C4000__0000003B99F7F8A0\n000000067F000040020000A00000003C0000-000000067F000040020000A00000003C4000__0000005D2FFFFB38\n000000067F000040020000A00000003C4000-000000067F000040020000A00000003C8000__00000023DF7FF060\n000000067F000040020000A00000003C4000-000000067F000040020000A00000003C8000__00000038E9AF7F00\n000000067F000040020000A00000003C4000-000000067F000040020000A00000003C8000__0000003903F1CFE8\n000000067F000040020000A00000003C4000-000000067F000040020000A00000003C8000__0000003B99F7F8A0\n000000067F000040020000A00000003C4000-000000067F000040020000A00000003C8000__0000005D2FFFFB38\n000000067F000040020000A00000003C6752-000000067F000040020000A00000003CF144__000000230F09F3F1-00000023AEB9F2B9\n000000067F000040020000A00000003C8000-000000067F000040020000A00000003CC000__00000023DF7FF060\n000000067F000040020000A00000003C8000-000000067F000040020000A00000003CC000__00000038E9AF7F00\n000000067F000040020000A00000003C8000-000000067F000040020000A00000003CC000__0000003903F1CFE8\n000000067F000040020000A00000003C8000-000000067F000040020000A00000003CC000__0000003B99F7F8A0\n000000067F000040020000A00000003C8000-000000067F000040020000A00000003CC000__0000005D2FFFFB38\n000000067F000040020000A00000003CC000-000000067F000040020000A00000003D0000__00000023DF7FF060\n000000067F000040020000A00000003CC000-000000067F000040020000A00000003D0000__00000038E9AF7F00\n000000067F000040020000A00000003CC000-000000067F000040020000A00000003D0000__0000003903F1CFE8\n000000067F000040020000A00000003CC000-000000067F000040020000A00000003D0000__0000003B99F7F8A0\n000000067F000040020000A00000003CC000-000000067F000040020000A00000003D0000__0000005D2FFFFB38\n000000067F000040020000A00000003CF144-000000067F000040020000A00000003D7B34__000000230F09F3F1-00000023AEB9F2B9\n000000067F000040020000A00000003D0000-000000067F000040020000A00000003D4000__00000023DF7FF060\n000000067F000040020000A00000003D0000-000000067F000040020000A00000003D4000__00000038E9AF7F00\n000000067F000040020000A00000003D0000-000000067F000040020000A00000003D4000__0000003903F1CFE8\n000000067F000040020000A00000003D0000-000000067F000040020000A00000003D4000__0000003B99F7F8A0\n000000067F000040020000A00000003D0000-000000067F000040020000A00000003D4000__0000005D2FFFFB38\n000000067F000040020000A00000003D4000-000000067F000040020000A00000003D8000__00000023DF7FF060\n000000067F000040020000A00000003D4000-000000067F000040020000A00000003D8000__00000038E9AF7F00\n000000067F000040020000A00000003D4000-000000067F000040020000A00000003D8000__0000003903F1CFE8\n000000067F000040020000A00000003D4000-000000067F000040020000A00000003D8000__0000003B99F7F8A0\n000000067F000040020000A00000003D4000-000000067F000040020000A00000003D8000__0000005D2FFFFB38\n000000067F000040020000A00000003D7B34-000000067F000040020000A00000003E0508__000000230F09F3F1-00000023AEB9F2B9\n000000067F000040020000A00000003D8000-000000067F000040020000A00000003DC000__00000023DF7FF060\n000000067F000040020000A00000003D8000-000000067F000040020000A00000003DC000__00000038E9AF7F00\n000000067F000040020000A00000003D8000-000000067F000040020000A00000003DC000__0000003903F1CFE8\n000000067F000040020000A00000003D8000-000000067F000040020000A00000003DC000__0000003B99F7F8A0\n000000067F000040020000A00000003D8000-000000067F000040020000A00000003DC000__0000005D2FFFFB38\n000000067F000040020000A00000003DC000-000000067F000040020000A00000003E0000__00000023DF7FF060\n000000067F000040020000A00000003DC000-000000067F000040020000A00000003E0000__00000038E9AF7F00\n000000067F000040020000A00000003DC000-000000067F000040020000A00000003E0000__0000003903F1CFE8\n000000067F000040020000A00000003DC000-000000067F000040020000A00000003E0000__0000003B99F7F8A0\n000000067F000040020000A00000003DC000-000000067F000040020000A00000003E0000__0000005D2FFFFB38\n000000067F000040020000A00000003E0000-000000067F000040020000A00000003E4000__00000023DF7FF060\n000000067F000040020000A00000003E0000-000000067F000040020000A00000003E4000__00000038E9AF7F00\n000000067F000040020000A00000003E0000-000000067F000040020000A00000003E4000__0000003903F1CFE8\n000000067F000040020000A00000003E0000-000000067F000040020000A00000003E4000__0000003B99F7F8A0\n000000067F000040020000A00000003E0000-000000067F000040020000A00000003E4000__0000005D2FFFFB38\n000000067F000040020000A00000003E0508-000000067F000040020000A00000003E8EC9__000000230F09F3F1-00000023AEB9F2B9\n000000067F000040020000A00000003E4000-000000067F000040020000A00000003E8000__00000023DF7FF060\n000000067F000040020000A00000003E4000-000000067F000040020000A00000003E8000__00000038E9AF7F00\n000000067F000040020000A00000003E4000-000000067F000040020000A00000003E8000__0000003903F1CFE8\n000000067F000040020000A00000003E4000-000000067F000040020000A00000003E8000__0000003B99F7F8A0\n000000067F000040020000A00000003E4000-000000067F000040020000A00000003E8000__0000005D2FFFFB38\n000000067F000040020000A00000003E8000-000000067F000040020000A00000003EC000__00000023DF7FF060\n000000067F000040020000A00000003E8000-000000067F000040020000A00000003EC000__00000038E67ABFA0\n000000067F000040020000A00000003E8000-000000067F000040020000A00000003EC000__0000003903F1CFE8\n000000067F000040020000A00000003E8000-000000067F000040020000A00000003EC000__0000003B99F7F8A0\n000000067F000040020000A00000003E8000-000000067F000040020000A00000003EC000__0000005D2FFFFB38\n000000067F000040020000A00000003E8EC9-000000067F000040020000A0000200000000__000000230F09F3F1-00000023AEB9F2B9\n000000067F000040020000A00000003E9093-000000067F000040020000A00000003F1A44__00000023AEB9F2B9-000000244E69F8E9\n000000067F000040020000A00000003EC000-000000067F000040020000A00000003F0000__00000023DF7FF060\n000000067F000040020000A00000003EC000-000000067F000040020000A00000003F0000__00000038E67ABFA0\n000000067F000040020000A00000003EC000-000000067F000040020000A00000003F0000__0000003903F1CFE8\n000000067F000040020000A00000003EC000-000000067F000040020000A00000003F0000__0000003B99F7F8A0\n000000067F000040020000A00000003EC000-000000067F000040020000A00000003F0000__0000005D2FFFFB38\n000000067F000040020000A00000003F0000-000000067F000040020000A00000003F4000__00000023DF7FF060\n000000067F000040020000A00000003F0000-000000067F000040020000A00000003F4000__00000038E67ABFA0\n000000067F000040020000A00000003F0000-000000067F000040020000A00000003F4000__0000003903F1CFE8\n000000067F000040020000A00000003F0000-000000067F000040020000A00000003F4000__0000003B99F7F8A0\n000000067F000040020000A00000003F0000-000000067F000040020000A00000003F4000__0000005D2FFFFB38\n000000067F000040020000A00000003F1A44-000000067F000040020000A00000003FA41E__00000023AEB9F2B9-000000244E69F8E9\n000000067F000040020000A00000003F4000-000000067F000040020000A00000003F8000__00000023DF7FF060\n000000067F000040020000A00000003F4000-000000067F000040020000A00000003F8000__00000038E67ABFA0\n000000067F000040020000A00000003F4000-000000067F000040020000A00000003F8000__0000003903F1CFE8\n000000067F000040020000A00000003F4000-000000067F000040020000A00000003F8000__0000003B99F7F8A0\n000000067F000040020000A00000003F4000-000000067F000040020000A00000003F8000__0000005D2FFFFB38\n000000067F000040020000A00000003F8000-000000067F000040020000A00000003FC000__00000023DF7FF060\n000000067F000040020000A00000003F8000-000000067F000040020000A00000003FC000__00000038E67ABFA0\n000000067F000040020000A00000003F8000-000000067F000040020000A00000003FC000__0000003903F1CFE8\n000000067F000040020000A00000003F8000-000000067F000040020000A00000003FC000__0000003B99F7F8A0\n000000067F000040020000A00000003F8000-000000067F000040020000A00000003FC000__0000005D2FFFFB38\n000000067F000040020000A00000003FA41E-000000067F000040020000A0000000402E14__00000023AEB9F2B9-000000244E69F8E9\n000000067F000040020000A00000003FC000-000000067F000040020000A0000000400000__00000023DF7FF060\n000000067F000040020000A00000003FC000-000000067F000040020000A0000000400000__00000038E67ABFA0\n000000067F000040020000A00000003FC000-000000067F000040020000A0000000400000__0000003903F1CFE8\n000000067F000040020000A00000003FC000-000000067F000040020000A0000000400000__0000003B99F7F8A0\n000000067F000040020000A00000003FC000-000000067F000040020000A0000000400000__0000005D2FFFFB38\n000000067F000040020000A0000000400000-000000067F000040020000A0000000404000__00000038E67ABFA0\n000000067F000040020000A0000000400000-000000067F000040020000A0000000404000__0000003903F1CFE8\n000000067F000040020000A0000000400000-000000067F000040020000A0000000404000__0000003B99F7F8A0\n000000067F000040020000A0000000400000-000000067F000040020000A0000000404000__0000005D2FFFFB38\n000000067F000040020000A0000000400000-030000000000000000000000000000000002__00000023DF7FF060\n000000067F000040020000A0000000402E14-000000067F000040020000A000000040B7FC__00000023AEB9F2B9-000000244E69F8E9\n000000067F000040020000A0000000404000-000000067F000040020000A0000000408000__00000038E67ABFA0\n000000067F000040020000A0000000404000-000000067F000040020000A0000000408000__0000003903F1CFE8\n000000067F000040020000A0000000404000-000000067F000040020000A0000000408000__0000003B99F7F8A0\n000000067F000040020000A0000000404000-000000067F000040020000A0000000408000__0000005D2FFFFB38\n000000067F000040020000A0000000408000-000000067F000040020000A000000040C000__00000038E67ABFA0\n000000067F000040020000A0000000408000-000000067F000040020000A000000040C000__0000003903F1CFE8\n000000067F000040020000A0000000408000-000000067F000040020000A000000040C000__0000003B99F7F8A0\n000000067F000040020000A0000000408000-000000067F000040020000A000000040C000__0000005D2FFFFB38\n000000067F000040020000A000000040B7FC-000000067F000040020000A00000004141F2__00000023AEB9F2B9-000000244E69F8E9\n000000067F000040020000A000000040C000-000000067F000040020000A0000000410000__00000038E67ABFA0\n000000067F000040020000A000000040C000-000000067F000040020000A0000000410000__0000003903F1CFE8\n000000067F000040020000A000000040C000-000000067F000040020000A0000000410000__0000003B99F7F8A0\n000000067F000040020000A000000040C000-000000067F000040020000A0000000410000__0000005D2FFFFB38\n000000067F000040020000A0000000410000-000000067F000040020000A0000000414000__00000038E67ABFA0\n000000067F000040020000A0000000410000-000000067F000040020000A0000000414000__0000003903F1CFE8\n000000067F000040020000A0000000410000-000000067F000040020000A0000000414000__0000003B99F7F8A0\n000000067F000040020000A0000000410000-000000067F000040020000A0000000414000__0000005D2FFFFB38\n000000067F000040020000A0000000414000-000000067F000040020000A0000000418000__00000038E67ABFA0\n000000067F000040020000A0000000414000-000000067F000040020000A0000000418000__0000003903F1CFE8\n000000067F000040020000A0000000414000-000000067F000040020000A0000000418000__0000003B99F7F8A0\n000000067F000040020000A0000000414000-000000067F000040020000A0000000418000__0000005D2FFFFB38\n000000067F000040020000A00000004141F2-000000067F000040020000A000000041CBDA__00000023AEB9F2B9-000000244E69F8E9\n000000067F000040020000A0000000418000-000000067F000040020000A000000041C000__00000038E67ABFA0\n000000067F000040020000A0000000418000-000000067F000040020000A000000041C000__0000003903F1CFE8\n000000067F000040020000A0000000418000-000000067F000040020000A000000041C000__0000003B99F7F8A0\n000000067F000040020000A0000000418000-000000067F000040020000A000000041C000__0000005D2FFFFB38\n000000067F000040020000A000000041C000-000000067F000040020000A0000000420000__00000038E67ABFA0\n000000067F000040020000A000000041C000-000000067F000040020000A0000000420000__0000003903F1CFE8\n000000067F000040020000A000000041C000-000000067F000040020000A0000000420000__0000003B99F7F8A0\n000000067F000040020000A000000041C000-000000067F000040020000A0000000420000__0000005D2FFFFB38\n000000067F000040020000A000000041CBDA-000000067F000040020000A00000004255AE__00000023AEB9F2B9-000000244E69F8E9\n000000067F000040020000A0000000420000-000000067F000040020000A0000000424000__00000038E67ABFA0\n000000067F000040020000A0000000420000-000000067F000040020000A0000000424000__0000003903F1CFE8\n000000067F000040020000A0000000420000-000000067F000040020000A0000000424000__0000003B99F7F8A0\n000000067F000040020000A0000000420000-000000067F000040020000A0000000424000__0000005D2FFFFB38\n000000067F000040020000A0000000424000-000000067F000040020000A0000000428000__00000038E67ABFA0\n000000067F000040020000A0000000424000-000000067F000040020000A0000000428000__0000003903F1CFE8\n000000067F000040020000A0000000424000-000000067F000040020000A0000000428000__0000003B99F7F8A0\n000000067F000040020000A0000000424000-000000067F000040020000A0000000428000__0000005D2FFFFB38\n000000067F000040020000A00000004255AE-000000067F000040020000A000000042DF69__00000023AEB9F2B9-000000244E69F8E9\n000000067F000040020000A0000000428000-000000067F000040020000A000000042C000__00000038E67ABFA0\n000000067F000040020000A0000000428000-000000067F000040020000A000000042C000__0000003903F1CFE8\n000000067F000040020000A0000000428000-000000067F000040020000A000000042C000__0000003B99F7F8A0\n000000067F000040020000A0000000428000-000000067F000040020000A000000042C000__0000005D2FFFFB38\n000000067F000040020000A000000042C000-000000067F000040020000A0000000430000__00000038E67ABFA0\n000000067F000040020000A000000042C000-000000067F000040020000A0000000430000__0000003903F1CFE8\n000000067F000040020000A000000042C000-000000067F000040020000A0000000430000__0000003B99F7F8A0\n000000067F000040020000A000000042C000-000000067F000040020000A0000000430000__0000005D2FFFFB38\n000000067F000040020000A000000042DF69-000000067F000040020000A0000000436935__00000023AEB9F2B9-000000244E69F8E9\n000000067F000040020000A0000000430000-000000067F000040020000A0000000434000__00000038E67ABFA0\n000000067F000040020000A0000000430000-000000067F000040020000A0000000434000__0000003903F1CFE8\n000000067F000040020000A0000000430000-000000067F000040020000A0000000434000__0000003B99F7F8A0\n000000067F000040020000A0000000430000-000000067F000040020000A0000000434000__0000005D2FFFFB38\n000000067F000040020000A0000000434000-000000067F000040020000A0000000438000__00000038E67ABFA0\n000000067F000040020000A0000000434000-000000067F000040020000A0000000438000__0000003903F1CFE8\n000000067F000040020000A0000000434000-000000067F000040020000A0000000438000__0000003B99F7F8A0\n000000067F000040020000A0000000434000-000000067F000040020000A0000000438000__0000005D2FFFFB38\n000000067F000040020000A0000000436935-000000067F000040020000A000000043F31C__00000023AEB9F2B9-000000244E69F8E9\n000000067F000040020000A0000000438000-000000067F000040020000A000000043C000__00000038E67ABFA0\n000000067F000040020000A0000000438000-000000067F000040020000A000000043C000__0000003903F1CFE8\n000000067F000040020000A0000000438000-000000067F000040020000A000000043C000__0000003B99F7F8A0\n000000067F000040020000A0000000438000-000000067F000040020000A000000043C000__0000005D2FFFFB38\n000000067F000040020000A000000043C000-000000067F000040020000A0000000440000__00000038E1ABFE28\n000000067F000040020000A000000043C000-000000067F000040020000A0000000440000__00000038E9AF7F00\n000000067F000040020000A000000043C000-000000067F000040020000A0000000440000__0000003903F1CFE8\n000000067F000040020000A000000043C000-000000067F000040020000A0000000440000__0000003B99F7F8A0\n000000067F000040020000A000000043C000-000000067F000040020000A0000000440000__0000005D2FFFFB38\n000000067F000040020000A000000043F31C-000000067F000040020000A0000200000000__00000023AEB9F2B9-000000244E69F8E9\n000000067F000040020000A000000043F581-000000067F000040020000A0000000447F7E__000000244E69F8E9-00000024EE19EEF9\n000000067F000040020000A0000000440000-000000067F000040020000A0000000444000__00000038E1ABFE28\n000000067F000040020000A0000000440000-000000067F000040020000A0000000444000__00000038E9AF7F00\n000000067F000040020000A0000000440000-000000067F000040020000A0000000444000__0000003903F1CFE8\n000000067F000040020000A0000000440000-000000067F000040020000A0000000444000__0000003B99F7F8A0\n000000067F000040020000A0000000440000-000000067F000040020000A0000000444000__0000005D2FFFFB38\n000000067F000040020000A0000000444000-000000067F000040020000A0000000448000__00000038E1ABFE28\n000000067F000040020000A0000000444000-000000067F000040020000A0000000448000__00000038E9AF7F00\n000000067F000040020000A0000000444000-000000067F000040020000A0000000448000__0000003903F1CFE8\n000000067F000040020000A0000000444000-000000067F000040020000A0000000448000__0000003B99F7F8A0\n000000067F000040020000A0000000444000-000000067F000040020000A0000000448000__0000005D2FFFFB38\n000000067F000040020000A0000000447F7E-000000067F000040020000A000000045096D__000000244E69F8E9-00000024EE19EEF9\n000000067F000040020000A0000000448000-000000067F000040020000A000000044C000__00000038E1ABFE28\n000000067F000040020000A0000000448000-000000067F000040020000A000000044C000__00000038E9AF7F00\n000000067F000040020000A0000000448000-000000067F000040020000A000000044C000__0000003903F1CFE8\n000000067F000040020000A0000000448000-000000067F000040020000A000000044C000__0000003B99F7F8A0\n000000067F000040020000A0000000448000-000000067F000040020000A000000044C000__0000005D2FFFFB38\n000000067F000040020000A000000044C000-000000067F000040020000A0000000450000__00000038E1ABFE28\n000000067F000040020000A000000044C000-000000067F000040020000A0000000450000__00000038E9AF7F00\n000000067F000040020000A000000044C000-000000067F000040020000A0000000450000__0000003903F1CFE8\n000000067F000040020000A000000044C000-000000067F000040020000A0000000450000__0000003B99F7F8A0\n000000067F000040020000A000000044C000-000000067F000040020000A0000000450000__0000005D2FFFFB38\n000000067F000040020000A0000000450000-000000067F000040020000A0000000454000__00000038E1ABFE28\n000000067F000040020000A0000000450000-000000067F000040020000A0000000454000__00000038E9AF7F00\n000000067F000040020000A0000000450000-000000067F000040020000A0000000454000__0000003903F1CFE8\n000000067F000040020000A0000000450000-000000067F000040020000A0000000454000__0000003B99F7F8A0\n000000067F000040020000A0000000450000-000000067F000040020000A0000000454000__0000005D2FFFFB38\n000000067F000040020000A000000045096D-000000067F000040020000A000000045934B__000000244E69F8E9-00000024EE19EEF9\n000000067F000040020000A0000000454000-000000067F000040020000A0000000458000__00000038E1ABFE28\n000000067F000040020000A0000000454000-000000067F000040020000A0000000458000__00000038E9AF7F00\n000000067F000040020000A0000000454000-000000067F000040020000A0000000458000__0000003903F1CFE8\n000000067F000040020000A0000000454000-000000067F000040020000A0000000458000__0000003B99F7F8A0\n000000067F000040020000A0000000454000-000000067F000040020000A0000000458000__0000005D2FFFFB38\n000000067F000040020000A0000000458000-000000067F000040020000A000000045C000__00000038E1ABFE28\n000000067F000040020000A0000000458000-000000067F000040020000A000000045C000__00000038E9AF7F00\n000000067F000040020000A0000000458000-000000067F000040020000A000000045C000__0000003903F1CFE8\n000000067F000040020000A0000000458000-000000067F000040020000A000000045C000__0000003B99F7F8A0\n000000067F000040020000A0000000458000-000000067F000040020000A000000045C000__0000005D2FFFFB38\n000000067F000040020000A000000045934B-000000067F000040020000A0000000461D13__000000244E69F8E9-00000024EE19EEF9\n000000067F000040020000A000000045C000-000000067F000040020000A0000000460000__00000038E1ABFE28\n000000067F000040020000A000000045C000-000000067F000040020000A0000000460000__00000038E9AF7F00\n000000067F000040020000A000000045C000-000000067F000040020000A0000000460000__0000003903F1CFE8\n000000067F000040020000A000000045C000-000000067F000040020000A0000000460000__0000003B99F7F8A0\n000000067F000040020000A000000045C000-000000067F000040020000A0000000460000__0000005D2FFFFB38\n000000067F000040020000A0000000460000-000000067F000040020000A0000000464000__00000038E1ABFE28\n000000067F000040020000A0000000460000-000000067F000040020000A0000000464000__00000038E9AF7F00\n000000067F000040020000A0000000460000-000000067F000040020000A0000000464000__0000003903F1CFE8\n000000067F000040020000A0000000460000-000000067F000040020000A0000000464000__0000003B99F7F8A0\n000000067F000040020000A0000000460000-000000067F000040020000A0000000464000__0000005D2FFFFB38\n000000067F000040020000A0000000461D13-000000067F000040020000A000000046A6C6__000000244E69F8E9-00000024EE19EEF9\n000000067F000040020000A0000000464000-000000067F000040020000A0000000468000__00000038E1ABFE28\n000000067F000040020000A0000000464000-000000067F000040020000A0000000468000__00000038E9AF7F00\n000000067F000040020000A0000000464000-000000067F000040020000A0000000468000__0000003903F1CFE8\n000000067F000040020000A0000000464000-000000067F000040020000A0000000468000__0000003B99F7F8A0\n000000067F000040020000A0000000464000-000000067F000040020000A0000000468000__0000005D2FFFFB38\n000000067F000040020000A0000000468000-000000067F000040020000A000000046C000__00000038E1ABFE28\n000000067F000040020000A0000000468000-000000067F000040020000A000000046C000__00000038E9AF7F00\n000000067F000040020000A0000000468000-000000067F000040020000A000000046C000__0000003903F1CFE8\n000000067F000040020000A0000000468000-000000067F000040020000A000000046C000__0000003B99F7F8A0\n000000067F000040020000A0000000468000-000000067F000040020000A000000046C000__0000005D2FFFFB38\n000000067F000040020000A000000046A6C6-000000067F000040020000A00000004730A6__000000244E69F8E9-00000024EE19EEF9\n000000067F000040020000A000000046C000-000000067F000040020000A0000000470000__00000038E1ABFE28\n000000067F000040020000A000000046C000-000000067F000040020000A0000000470000__00000038E9AF7F00\n000000067F000040020000A000000046C000-000000067F000040020000A0000000470000__0000003903F1CFE8\n000000067F000040020000A000000046C000-000000067F000040020000A0000000470000__0000003B99F7F8A0\n000000067F000040020000A000000046C000-000000067F000040020000A0000000470000__0000005D2FFFFB38\n000000067F000040020000A0000000470000-000000067F000040020000A0000000474000__00000038E1ABFE28\n000000067F000040020000A0000000470000-000000067F000040020000A0000000474000__00000038E9AF7F00\n000000067F000040020000A0000000470000-000000067F000040020000A0000000474000__0000003903F1CFE8\n000000067F000040020000A0000000470000-000000067F000040020000A0000000474000__0000003B99F7F8A0\n000000067F000040020000A0000000470000-000000067F000040020000A0000000474000__0000005D2FFFFB38\n000000067F000040020000A00000004730A6-000000067F000040020000A000000047BA93__000000244E69F8E9-00000024EE19EEF9\n000000067F000040020000A0000000474000-000000067F000040020000A0000000478000__00000038E1ABFE28\n000000067F000040020000A0000000474000-000000067F000040020000A0000000478000__00000038E9AF7F00\n000000067F000040020000A0000000474000-000000067F000040020000A0000000478000__0000003903F1CFE8\n000000067F000040020000A0000000474000-000000067F000040020000A0000000478000__0000003B99F7F8A0\n000000067F000040020000A0000000474000-000000067F000040020000A0000000478000__0000005D2FFFFB38\n000000067F000040020000A0000000478000-000000067F000040020000A000000047C000__00000038E1ABFE28\n000000067F000040020000A0000000478000-000000067F000040020000A000000047C000__00000038E9AF7F00\n000000067F000040020000A0000000478000-000000067F000040020000A000000047C000__0000003903F1CFE8\n000000067F000040020000A0000000478000-000000067F000040020000A000000047C000__0000003B99F7F8A0\n000000067F000040020000A0000000478000-000000067F000040020000A000000047C000__0000005D2FFFFB38\n000000067F000040020000A000000047BA93-000000067F000040020000A0000000484484__000000244E69F8E9-00000024EE19EEF9\n000000067F000040020000A000000047C000-000000067F000040020000A0000000480000__00000038E1ABFE28\n000000067F000040020000A000000047C000-000000067F000040020000A0000000480000__00000038E9AF7F00\n000000067F000040020000A000000047C000-000000067F000040020000A0000000480000__0000003903F1CFE8\n000000067F000040020000A000000047C000-000000067F000040020000A0000000480000__0000003B99F7F8A0\n000000067F000040020000A000000047C000-000000067F000040020000A0000000480000__0000005D2FFFFB38\n000000067F000040020000A0000000480000-000000067F000040020000A0000000484000__00000038E1ABFE28\n000000067F000040020000A0000000480000-000000067F000040020000A0000000484000__00000038E9AF7F00\n000000067F000040020000A0000000480000-000000067F000040020000A0000000484000__0000003903F1CFE8\n000000067F000040020000A0000000480000-000000067F000040020000A0000000484000__0000003B99F7F8A0\n000000067F000040020000A0000000480000-000000067F000040020000A0000000484000__0000005D2FFFFB38\n000000067F000040020000A0000000484000-000000067F000040020000A0000000488000__00000038E1ABFE28\n000000067F000040020000A0000000484000-000000067F000040020000A0000000488000__00000038E9AF7F00\n000000067F000040020000A0000000484000-000000067F000040020000A0000000488000__0000003903F1CFE8\n000000067F000040020000A0000000484000-000000067F000040020000A0000000488000__0000003B99F7F8A0\n000000067F000040020000A0000000484000-000000067F000040020000A0000000488000__0000005D2FFFFB38\n000000067F000040020000A0000000484484-000000067F000040020000A000000048CE6F__000000244E69F8E9-00000024EE19EEF9\n000000067F000040020000A0000000488000-000000067F000040020000A000000048C000__00000038E1ABFE28\n000000067F000040020000A0000000488000-000000067F000040020000A000000048C000__00000038E9AF7F00\n000000067F000040020000A0000000488000-000000067F000040020000A000000048C000__0000003903F1CFE8\n000000067F000040020000A0000000488000-000000067F000040020000A000000048C000__0000003B99F7F8A0\n000000067F000040020000A0000000488000-000000067F000040020000A000000048C000__0000005D2FFFFB38\n000000067F000040020000A000000048C000-000000067F000040020000A0000000490000__00000038E1ABFE28\n000000067F000040020000A000000048C000-000000067F000040020000A0000000490000__00000038E9AF7F00\n000000067F000040020000A000000048C000-000000067F000040020000A0000000490000__0000003903F1CFE8\n000000067F000040020000A000000048C000-000000067F000040020000A0000000490000__0000003B99F7F8A0\n000000067F000040020000A000000048C000-000000067F000040020000A0000000490000__0000005D2FFFFB38\n000000067F000040020000A000000048CE6F-000000067F000040020000A0000000495855__000000244E69F8E9-00000024EE19EEF9\n000000067F000040020000A0000000490000-000000067F000040020000A0000000494000__00000038E1ABFE28\n000000067F000040020000A0000000490000-000000067F000040020000A0000000494000__00000038E9AF7F00\n000000067F000040020000A0000000490000-000000067F000040020000A0000000494000__0000003903F1CFE8\n000000067F000040020000A0000000490000-000000067F000040020000A0000000494000__0000003B99F7F8A0\n000000067F000040020000A0000000490000-000000067F000040020000A0000000494000__0000005D2FFFFB38\n000000067F000040020000A0000000494000-000000067F000040020000A0000000498000__00000025CABFE5B8\n000000067F000040020000A0000000494000-000000067F000040020000A0000000498000__00000038E9AF7F00\n000000067F000040020000A0000000494000-000000067F000040020000A0000000498000__0000003903F1CFE8\n000000067F000040020000A0000000494000-000000067F000040020000A0000000498000__0000003B99F7F8A0\n000000067F000040020000A0000000494000-000000067F000040020000A0000000498000__0000005D2FFFFB38\n000000067F000040020000A0000000495855-000000067F000040020000A0000200000000__000000244E69F8E9-00000024EE19EEF9\n000000067F000040020000A00000004959F7-000000067F000040020000A000000049E3C6__00000024EE19EEF9-000000259DC1F899\n000000067F000040020000A0000000498000-000000067F000040020000A000000049C000__00000025CABFE5B8\n000000067F000040020000A0000000498000-000000067F000040020000A000000049C000__00000038E9AF7F00\n000000067F000040020000A0000000498000-000000067F000040020000A000000049C000__0000003903F1CFE8\n000000067F000040020000A0000000498000-000000067F000040020000A000000049C000__0000003B99F7F8A0\n000000067F000040020000A0000000498000-000000067F000040020000A000000049C000__0000005D2FFFFB38\n000000067F000040020000A000000049C000-000000067F000040020000A00000004A0000__00000025CABFE5B8\n000000067F000040020000A000000049C000-000000067F000040020000A00000004A0000__00000038E9AF7F00\n000000067F000040020000A000000049C000-000000067F000040020000A00000004A0000__0000003903F1CFE8\n000000067F000040020000A000000049C000-000000067F000040020000A00000004A0000__0000003B99F7F8A0\n000000067F000040020000A000000049C000-000000067F000040020000A00000004A0000__0000005D2FFFFB38\n000000067F000040020000A000000049E3C6-000000067F000040020000A00000004A6D8C__00000024EE19EEF9-000000259DC1F899\n000000067F000040020000A00000004A0000-000000067F000040020000A00000004A4000__00000025CABFE5B8\n000000067F000040020000A00000004A0000-000000067F000040020000A00000004A4000__00000038E9AF7F00\n000000067F000040020000A00000004A0000-000000067F000040020000A00000004A4000__0000003903F1CFE8\n000000067F000040020000A00000004A0000-000000067F000040020000A00000004A4000__0000003B99F7F8A0\n000000067F000040020000A00000004A0000-000000067F000040020000A00000004A4000__0000005D2FFFFB38\n000000067F000040020000A00000004A4000-000000067F000040020000A00000004A8000__00000025CABFE5B8\n000000067F000040020000A00000004A4000-000000067F000040020000A00000004A8000__00000038E9AF7F00\n000000067F000040020000A00000004A4000-000000067F000040020000A00000004A8000__0000003903F1CFE8\n000000067F000040020000A00000004A4000-000000067F000040020000A00000004A8000__0000003B99F7F8A0\n000000067F000040020000A00000004A4000-000000067F000040020000A00000004A8000__0000005D2FFFFB38\n000000067F000040020000A00000004A6D8C-000000067F000040020000A00000004AF769__00000024EE19EEF9-000000259DC1F899\n000000067F000040020000A00000004A8000-000000067F000040020000A00000004AC000__00000025CABFE5B8\n000000067F000040020000A00000004A8000-000000067F000040020000A00000004AC000__00000038E9AF7F00\n000000067F000040020000A00000004A8000-000000067F000040020000A00000004AC000__0000003903F1CFE8\n000000067F000040020000A00000004A8000-000000067F000040020000A00000004AC000__0000003B99F7F8A0\n000000067F000040020000A00000004A8000-000000067F000040020000A00000004AC000__0000005D2FFFFB38\n000000067F000040020000A00000004AC000-000000067F000040020000A00000004B0000__00000025CABFE5B8\n000000067F000040020000A00000004AC000-000000067F000040020000A00000004B0000__00000038E9AF7F00\n000000067F000040020000A00000004AC000-000000067F000040020000A00000004B0000__0000003903F1CFE8\n000000067F000040020000A00000004AC000-000000067F000040020000A00000004B0000__0000003B99F7F8A0\n000000067F000040020000A00000004AC000-000000067F000040020000A00000004B0000__0000005D2FFFFB38\n000000067F000040020000A00000004AF769-000000067F000040020000A00000004B8152__00000024EE19EEF9-000000259DC1F899\n000000067F000040020000A00000004B0000-000000067F000040020000A00000004B4000__00000025CABFE5B8\n000000067F000040020000A00000004B0000-000000067F000040020000A00000004B4000__00000038E9AF7F00\n000000067F000040020000A00000004B0000-000000067F000040020000A00000004B4000__0000003903F1CFE8\n000000067F000040020000A00000004B0000-000000067F000040020000A00000004B4000__0000003B99F7F8A0\n000000067F000040020000A00000004B0000-000000067F000040020000A00000004B4000__0000005D2FFFFB38\n000000067F000040020000A00000004B4000-000000067F000040020000A00000004B8000__00000025CABFE5B8\n000000067F000040020000A00000004B4000-000000067F000040020000A00000004B8000__00000038E9AF7F00\n000000067F000040020000A00000004B4000-000000067F000040020000A00000004B8000__0000003903F1CFE8\n000000067F000040020000A00000004B4000-000000067F000040020000A00000004B8000__0000003B99F7F8A0\n000000067F000040020000A00000004B4000-000000067F000040020000A00000004B8000__0000005D2FFFFB38\n000000067F000040020000A00000004B8000-000000067F000040020000A00000004BC000__00000025CABFE5B8\n000000067F000040020000A00000004B8000-000000067F000040020000A00000004BC000__00000038E9AF7F00\n000000067F000040020000A00000004B8000-000000067F000040020000A00000004BC000__0000003903F1CFE8\n000000067F000040020000A00000004B8000-000000067F000040020000A00000004BC000__0000003B99F7F8A0\n000000067F000040020000A00000004B8000-000000067F000040020000A00000004BC000__0000005D2FFFFB38\n000000067F000040020000A00000004B8152-000000067F000040020000A00000004C0B3C__00000024EE19EEF9-000000259DC1F899\n000000067F000040020000A00000004BC000-000000067F000040020000A00000004C0000__00000025CABFE5B8\n000000067F000040020000A00000004BC000-000000067F000040020000A00000004C0000__00000038E9AF7F00\n000000067F000040020000A00000004BC000-000000067F000040020000A00000004C0000__0000003903F1CFE8\n000000067F000040020000A00000004BC000-000000067F000040020000A00000004C0000__0000003B99F7F8A0\n000000067F000040020000A00000004BC000-000000067F000040020000A00000004C0000__0000005D2FFFFB38\n000000067F000040020000A00000004C0000-000000067F000040020000A00000004C4000__00000025CABFE5B8\n000000067F000040020000A00000004C0000-000000067F000040020000A00000004C4000__00000038E9AF7F00\n000000067F000040020000A00000004C0000-000000067F000040020000A00000004C4000__0000003903F1CFE8\n000000067F000040020000A00000004C0000-000000067F000040020000A00000004C4000__0000003B99F7F8A0\n000000067F000040020000A00000004C0000-000000067F000040020000A00000004C4000__0000005D2FFFFB38\n000000067F000040020000A00000004C0B3C-000000067F000040020000A00000004C9523__00000024EE19EEF9-000000259DC1F899\n000000067F000040020000A00000004C4000-000000067F000040020000A00000004C8000__00000025CABFE5B8\n000000067F000040020000A00000004C4000-000000067F000040020000A00000004C8000__00000038E9AF7F00\n000000067F000040020000A00000004C4000-000000067F000040020000A00000004C8000__0000003903F1CFE8\n000000067F000040020000A00000004C4000-000000067F000040020000A00000004C8000__0000003B99F7F8A0\n000000067F000040020000A00000004C4000-000000067F000040020000A00000004C8000__0000005D2FFFFB38\n000000067F000040020000A00000004C8000-000000067F000040020000A00000004CC000__00000025CABFE5B8\n000000067F000040020000A00000004C8000-000000067F000040020000A00000004CC000__00000038E9AF7F00\n000000067F000040020000A00000004C8000-000000067F000040020000A00000004CC000__0000003903F1CFE8\n000000067F000040020000A00000004C8000-000000067F000040020000A00000004CC000__0000003B99F7F8A0\n000000067F000040020000A00000004C8000-000000067F000040020000A00000004CC000__0000005D2FFFFB38\n000000067F000040020000A00000004C9523-000000067F000040020000A00000004D1F01__00000024EE19EEF9-000000259DC1F899\n000000067F000040020000A00000004CC000-000000067F000040020000A00000004D0000__00000025CABFE5B8\n000000067F000040020000A00000004CC000-000000067F000040020000A00000004D0000__00000038E9AF7F00\n000000067F000040020000A00000004CC000-000000067F000040020000A00000004D0000__0000003903F1CFE8\n000000067F000040020000A00000004CC000-000000067F000040020000A00000004D0000__0000003B99F7F8A0\n000000067F000040020000A00000004CC000-000000067F000040020000A00000004D0000__0000005D2FFFFB38\n000000067F000040020000A00000004D0000-000000067F000040020000A00000004D4000__00000025CABFE5B8\n000000067F000040020000A00000004D0000-000000067F000040020000A00000004D4000__00000038E9AF7F00\n000000067F000040020000A00000004D0000-000000067F000040020000A00000004D4000__0000003903F1CFE8\n000000067F000040020000A00000004D0000-000000067F000040020000A00000004D4000__0000003B99F7F8A0\n000000067F000040020000A00000004D0000-000000067F000040020000A00000004D4000__0000005D2FFFFB38\n000000067F000040020000A00000004D1F01-000000067F000040020000A00000004DA8BF__00000024EE19EEF9-000000259DC1F899\n000000067F000040020000A00000004D4000-000000067F000040020000A00000004D8000__00000025CABFE5B8\n000000067F000040020000A00000004D4000-000000067F000040020000A00000004D8000__00000038E9AF7F00\n000000067F000040020000A00000004D4000-000000067F000040020000A00000004D8000__0000003903F1CFE8\n000000067F000040020000A00000004D4000-000000067F000040020000A00000004D8000__0000003B99F7F8A0\n000000067F000040020000A00000004D4000-000000067F000040020000A00000004D8000__0000005D2FFFFB38\n000000067F000040020000A00000004D8000-000000067F000040020000A00000004DC000__00000025CABFE5B8\n000000067F000040020000A00000004D8000-000000067F000040020000A00000004DC000__00000038E9AF7F00\n000000067F000040020000A00000004D8000-000000067F000040020000A00000004DC000__0000003903F1CFE8\n000000067F000040020000A00000004D8000-000000067F000040020000A00000004DC000__0000003B99F7F8A0\n000000067F000040020000A00000004D8000-000000067F000040020000A00000004DC000__0000005D2FFFFB38\n000000067F000040020000A00000004DA8BF-000000067F000040020000A00000004E327F__00000024EE19EEF9-000000259DC1F899\n000000067F000040020000A00000004DC000-000000067F000040020000A00000004E0000__00000025CABFE5B8\n000000067F000040020000A00000004DC000-000000067F000040020000A00000004E0000__00000038E9AF7F00\n000000067F000040020000A00000004DC000-000000067F000040020000A00000004E0000__0000003903F1CFE8\n000000067F000040020000A00000004DC000-000000067F000040020000A00000004E0000__0000003B99F7F8A0\n000000067F000040020000A00000004DC000-000000067F000040020000A00000004E0000__0000005D2FFFFB38\n000000067F000040020000A00000004E0000-000000067F000040020000A00000004E4000__00000025CABFE5B8\n000000067F000040020000A00000004E0000-000000067F000040020000A00000004E4000__00000038E9AF7F00\n000000067F000040020000A00000004E0000-000000067F000040020000A00000004E4000__0000003903F1CFE8\n000000067F000040020000A00000004E0000-000000067F000040020000A00000004E4000__0000003B99F7F8A0\n000000067F000040020000A00000004E0000-000000067F000040020000A00000004E4000__0000005D2FFFFB38\n000000067F000040020000A00000004E327F-000000067F000040020000A00000004EBC62__00000024EE19EEF9-000000259DC1F899\n000000067F000040020000A00000004E4000-000000067F000040020000A00000004E8000__00000025CABFE5B8\n000000067F000040020000A00000004E4000-000000067F000040020000A00000004E8000__00000038E9AF7F00\n000000067F000040020000A00000004E4000-000000067F000040020000A00000004E8000__0000003903F1CFE8\n000000067F000040020000A00000004E4000-000000067F000040020000A00000004E8000__0000003B99F7F8A0\n000000067F000040020000A00000004E4000-000000067F000040020000A00000004E8000__0000005D2FFFFB38\n000000067F000040020000A00000004E8000-000000067F000040020000A00000004EC000__00000025CABFE5B8\n000000067F000040020000A00000004E8000-000000067F000040020000A00000004EC000__00000038E9AF7F00\n000000067F000040020000A00000004E8000-000000067F000040020000A00000004EC000__0000003903F1CFE8\n000000067F000040020000A00000004E8000-000000067F000040020000A00000004EC000__0000003B99F7F8A0\n000000067F000040020000A00000004E8000-000000067F000040020000A00000004EC000__0000005D2FFFFB38\n000000067F000040020000A00000004EBC62-000000067F000040020000A00000004F4640__00000024EE19EEF9-000000259DC1F899\n000000067F000040020000A00000004EC000-000000067F000040020000A00000004F0000__00000025CABFE5B8\n000000067F000040020000A00000004EC000-000000067F000040020000A00000004F0000__00000038E9AF7F00\n000000067F000040020000A00000004EC000-000000067F000040020000A00000004F0000__0000003903F1CFE8\n000000067F000040020000A00000004EC000-000000067F000040020000A00000004F0000__0000003B99F7F8A0\n000000067F000040020000A00000004EC000-000000067F000040020000A00000004F0000__0000005D2FFFFB38\n000000067F000040020000A00000004F0000-000000067F000040020000A00000004F4000__00000025CABFE5B8\n000000067F000040020000A00000004F0000-000000067F000040020000A00000004F4000__00000038E9AF7F00\n000000067F000040020000A00000004F0000-000000067F000040020000A00000004F4000__0000003903F1CFE8\n000000067F000040020000A00000004F0000-000000067F000040020000A00000004F4000__0000003B99F7F8A0\n000000067F000040020000A00000004F0000-000000067F000040020000A00000004F4000__0000005D2FFFFB38\n000000067F000040020000A00000004F4000-000000067F000040020000A00000004F8000__00000025CABFE5B8\n000000067F000040020000A00000004F4000-000000067F000040020000A00000004F8000__00000038E67ABFA0\n000000067F000040020000A00000004F4000-000000067F000040020000A00000004F8000__0000003903F1CFE8\n000000067F000040020000A00000004F4000-000000067F000040020000A00000004F8000__0000003B99F7F8A0\n000000067F000040020000A00000004F4000-000000067F000040020000A00000004F8000__0000005D2FFFFB38\n000000067F000040020000A00000004F4640-000000067F000040020000A0000200000000__00000024EE19EEF9-000000259DC1F899\n000000067F000040020000A00000004F48EC-000000067F000040020000A00000004FD2E1__000000259DC1F899-000000263D71E6D9\n000000067F000040020000A00000004F8000-000000067F000040020000A00000004FC000__00000025CABFE5B8\n000000067F000040020000A00000004F8000-000000067F000040020000A00000004FC000__00000038E67ABFA0\n000000067F000040020000A00000004F8000-000000067F000040020000A00000004FC000__0000003903F1CFE8\n000000067F000040020000A00000004F8000-000000067F000040020000A00000004FC000__0000003B99F7F8A0\n000000067F000040020000A00000004F8000-000000067F000040020000A00000004FC000__0000005D2FFFFB38\n000000067F000040020000A00000004FC000-000000067F000040020000A0000000500000__00000025CABFE5B8\n000000067F000040020000A00000004FC000-000000067F000040020000A0000000500000__00000038E67ABFA0\n000000067F000040020000A00000004FC000-000000067F000040020000A0000000500000__0000003903F1CFE8\n000000067F000040020000A00000004FC000-000000067F000040020000A0000000500000__0000003B99F7F8A0\n000000067F000040020000A00000004FC000-000000067F000040020000A0000000500000__0000005D2FFFFB38\n000000067F000040020000A00000004FD2E1-000000067F000040020000A0000000505CD7__000000259DC1F899-000000263D71E6D9\n000000067F000040020000A0000000500000-000000067F000040020000A0000000504000__00000025CABFE5B8\n000000067F000040020000A0000000500000-000000067F000040020000A0000000504000__00000038E67ABFA0\n000000067F000040020000A0000000500000-000000067F000040020000A0000000504000__0000003903F1CFE8\n000000067F000040020000A0000000500000-000000067F000040020000A0000000504000__0000003B99F7F8A0\n000000067F000040020000A0000000500000-000000067F000040020000A0000000504000__0000005D2FFFFB38\n000000067F000040020000A0000000504000-000000067F000040020000A0000000508000__00000025CABFE5B8\n000000067F000040020000A0000000504000-000000067F000040020000A0000000508000__00000038E67ABFA0\n000000067F000040020000A0000000504000-000000067F000040020000A0000000508000__0000003903F1CFE8\n000000067F000040020000A0000000504000-000000067F000040020000A0000000508000__0000003B99F7F8A0\n000000067F000040020000A0000000504000-000000067F000040020000A0000000508000__0000005D2FFFFB38\n000000067F000040020000A0000000505CD7-000000067F000040020000A000000050E6C4__000000259DC1F899-000000263D71E6D9\n000000067F000040020000A0000000508000-000000067F000040020000A000000050C000__00000025CABFE5B8\n000000067F000040020000A0000000508000-000000067F000040020000A000000050C000__00000038E67ABFA0\n000000067F000040020000A0000000508000-000000067F000040020000A000000050C000__0000003903F1CFE8\n000000067F000040020000A0000000508000-000000067F000040020000A000000050C000__0000003B99F7F8A0\n000000067F000040020000A0000000508000-000000067F000040020000A000000050C000__0000005D2FFFFB38\n000000067F000040020000A000000050C000-000000067F000040020000A0000000510000__00000038E67ABFA0\n000000067F000040020000A000000050C000-000000067F000040020000A0000000510000__0000003903F1CFE8\n000000067F000040020000A000000050C000-000000067F000040020000A0000000510000__0000003B99F7F8A0\n000000067F000040020000A000000050C000-000000067F000040020000A0000000510000__0000005D2FFFFB38\n000000067F000040020000A000000050C000-030000000000000000000000000000000002__00000025CABFE5B8\n000000067F000040020000A000000050E6C4-000000067F000040020000A000000051708F__000000259DC1F899-000000263D71E6D9\n000000067F000040020000A0000000510000-000000067F000040020000A0000000514000__00000038E67ABFA0\n000000067F000040020000A0000000510000-000000067F000040020000A0000000514000__0000003903F1CFE8\n000000067F000040020000A0000000510000-000000067F000040020000A0000000514000__0000003B99F7F8A0\n000000067F000040020000A0000000510000-000000067F000040020000A0000000514000__0000005D2FFFFB38\n000000067F000040020000A0000000514000-000000067F000040020000A0000000518000__00000038E67ABFA0\n000000067F000040020000A0000000514000-000000067F000040020000A0000000518000__0000003903F1CFE8\n000000067F000040020000A0000000514000-000000067F000040020000A0000000518000__0000003B99F7F8A0\n000000067F000040020000A0000000514000-000000067F000040020000A0000000518000__0000005D2FFFFB38\n000000067F000040020000A000000051708F-000000067F000040020000A000000051FA56__000000259DC1F899-000000263D71E6D9\n000000067F000040020000A0000000518000-000000067F000040020000A000000051C000__00000038E67ABFA0\n000000067F000040020000A0000000518000-000000067F000040020000A000000051C000__0000003903F1CFE8\n000000067F000040020000A0000000518000-000000067F000040020000A000000051C000__0000003B99F7F8A0\n000000067F000040020000A0000000518000-000000067F000040020000A000000051C000__0000005D2FFFFB38\n000000067F000040020000A000000051C000-000000067F000040020000A0000000520000__00000038E67ABFA0\n000000067F000040020000A000000051C000-000000067F000040020000A0000000520000__0000003903F1CFE8\n000000067F000040020000A000000051C000-000000067F000040020000A0000000520000__0000003B99F7F8A0\n000000067F000040020000A000000051C000-000000067F000040020000A0000000520000__0000005D2FFFFB38\n000000067F000040020000A000000051FA56-000000067F000040020000A0000000528431__000000259DC1F899-000000263D71E6D9\n000000067F000040020000A0000000520000-000000067F000040020000A0000000524000__00000038E67ABFA0\n000000067F000040020000A0000000520000-000000067F000040020000A0000000524000__0000003903F1CFE8\n000000067F000040020000A0000000520000-000000067F000040020000A0000000524000__0000003B99F7F8A0\n000000067F000040020000A0000000520000-000000067F000040020000A0000000524000__0000005D2FFFFB38\n000000067F000040020000A0000000524000-000000067F000040020000A0000000528000__00000038E67ABFA0\n000000067F000040020000A0000000524000-000000067F000040020000A0000000528000__0000003903F1CFE8\n000000067F000040020000A0000000524000-000000067F000040020000A0000000528000__0000003B99F7F8A0\n000000067F000040020000A0000000524000-000000067F000040020000A0000000528000__0000005D2FFFFB38\n000000067F000040020000A0000000528000-000000067F000040020000A000000052C000__00000038E67ABFA0\n000000067F000040020000A0000000528000-000000067F000040020000A000000052C000__0000003903F1CFE8\n000000067F000040020000A0000000528000-000000067F000040020000A000000052C000__0000003B99F7F8A0\n000000067F000040020000A0000000528000-000000067F000040020000A000000052C000__0000005D2FFFFB38\n000000067F000040020000A0000000528431-000000067F000040020000A0000000530E07__000000259DC1F899-000000263D71E6D9\n000000067F000040020000A000000052C000-000000067F000040020000A0000000530000__00000038E67ABFA0\n000000067F000040020000A000000052C000-000000067F000040020000A0000000530000__0000003903F1CFE8\n000000067F000040020000A000000052C000-000000067F000040020000A0000000530000__0000003B99F7F8A0\n000000067F000040020000A000000052C000-000000067F000040020000A0000000530000__0000005D2FFFFB38\n000000067F000040020000A0000000530000-000000067F000040020000A0000000534000__00000038E67ABFA0\n000000067F000040020000A0000000530000-000000067F000040020000A0000000534000__0000003903F1CFE8\n000000067F000040020000A0000000530000-000000067F000040020000A0000000534000__0000003B99F7F8A0\n000000067F000040020000A0000000530000-000000067F000040020000A0000000534000__0000005D2FFFFB38\n000000067F000040020000A0000000530E07-000000067F000040020000A00000005397EE__000000259DC1F899-000000263D71E6D9\n000000067F000040020000A0000000534000-000000067F000040020000A0000000538000__00000038E67ABFA0\n000000067F000040020000A0000000534000-000000067F000040020000A0000000538000__0000003903F1CFE8\n000000067F000040020000A0000000534000-000000067F000040020000A0000000538000__0000003B99F7F8A0\n000000067F000040020000A0000000534000-000000067F000040020000A0000000538000__0000005D2FFFFB38\n000000067F000040020000A0000000538000-000000067F000040020000A000000053C000__00000038E67ABFA0\n000000067F000040020000A0000000538000-000000067F000040020000A000000053C000__0000003903F1CFE8\n000000067F000040020000A0000000538000-000000067F000040020000A000000053C000__0000003B99F7F8A0\n000000067F000040020000A0000000538000-000000067F000040020000A000000053C000__0000005D2FFFFB38\n000000067F000040020000A00000005397EE-000000067F000040020000A00000005421E0__000000259DC1F899-000000263D71E6D9\n000000067F000040020000A000000053C000-000000067F000040020000A0000000540000__00000038E67ABFA0\n000000067F000040020000A000000053C000-000000067F000040020000A0000000540000__0000003903F1CFE8\n000000067F000040020000A000000053C000-000000067F000040020000A0000000540000__0000003B99F7F8A0\n000000067F000040020000A000000053C000-000000067F000040020000A0000000540000__0000005D2FFFFB38\n000000067F000040020000A0000000540000-000000067F000040020000A0000000544000__00000038E67ABFA0\n000000067F000040020000A0000000540000-000000067F000040020000A0000000544000__0000003903F1CFE8\n000000067F000040020000A0000000540000-000000067F000040020000A0000000544000__0000003B99F7F8A0\n000000067F000040020000A0000000540000-000000067F000040020000A0000000544000__0000005D2FFFFB38\n000000067F000040020000A00000005421E0-000000067F000040020000A000000054ABC7__000000259DC1F899-000000263D71E6D9\n000000067F000040020000A0000000544000-000000067F000040020000A0000000548000__00000038E67ABFA0\n000000067F000040020000A0000000544000-000000067F000040020000A0000000548000__0000003903F1CFE8\n000000067F000040020000A0000000544000-000000067F000040020000A0000000548000__0000003B99F7F8A0\n000000067F000040020000A0000000544000-000000067F000040020000A0000000548000__0000005D2FFFFB38\n000000067F000040020000A0000000548000-000000067F000040020000A000000054C000__00000038E1ABFE28\n000000067F000040020000A0000000548000-000000067F000040020000A000000054C000__00000038E9AF7F00\n000000067F000040020000A0000000548000-000000067F000040020000A000000054C000__0000003903F1CFE8\n000000067F000040020000A0000000548000-000000067F000040020000A000000054C000__0000003B99F7F8A0\n000000067F000040020000A0000000548000-000000067F000040020000A000000054C000__0000005D2FFFFB38\n000000067F000040020000A000000054ABC7-000000067F000040020000A0000200000000__000000259DC1F899-000000263D71E6D9\n000000067F000040020000A000000054AD5E-000000067F000040020000A000000055371D__000000263D71E6D9-00000026ED17F009\n000000067F000040020000A000000054C000-000000067F000040020000A0000000550000__00000038E1ABFE28\n000000067F000040020000A000000054C000-000000067F000040020000A0000000550000__00000038E9AF7F00\n000000067F000040020000A000000054C000-000000067F000040020000A0000000550000__0000003903F1CFE8\n000000067F000040020000A000000054C000-000000067F000040020000A0000000550000__0000003B99F7F8A0\n000000067F000040020000A000000054C000-000000067F000040020000A0000000550000__0000005D2FFFFB38\n000000067F000040020000A0000000550000-000000067F000040020000A0000000554000__00000038E1ABFE28\n000000067F000040020000A0000000550000-000000067F000040020000A0000000554000__00000038E9AF7F00\n000000067F000040020000A0000000550000-000000067F000040020000A0000000554000__0000003903F1CFE8\n000000067F000040020000A0000000550000-000000067F000040020000A0000000554000__0000003B99F7F8A0\n000000067F000040020000A0000000550000-000000067F000040020000A0000000554000__0000005D2FFFFB38\n000000067F000040020000A000000055371D-000000067F000040020000A000000055C0DF__000000263D71E6D9-00000026ED17F009\n000000067F000040020000A0000000554000-000000067F000040020000A0000000558000__00000038E1ABFE28\n000000067F000040020000A0000000554000-000000067F000040020000A0000000558000__00000038E9AF7F00\n000000067F000040020000A0000000554000-000000067F000040020000A0000000558000__0000003903F1CFE8\n000000067F000040020000A0000000554000-000000067F000040020000A0000000558000__0000003B99F7F8A0\n000000067F000040020000A0000000554000-000000067F000040020000A0000000558000__0000005D2FFFFB38\n000000067F000040020000A0000000558000-000000067F000040020000A000000055C000__00000038E1ABFE28\n000000067F000040020000A0000000558000-000000067F000040020000A000000055C000__00000038E9AF7F00\n000000067F000040020000A0000000558000-000000067F000040020000A000000055C000__0000003903F1CFE8\n000000067F000040020000A0000000558000-000000067F000040020000A000000055C000__0000003B99F7F8A0\n000000067F000040020000A0000000558000-000000067F000040020000A000000055C000__0000005D2FFFFB38\n000000067F000040020000A000000055C000-000000067F000040020000A0000000560000__00000038E1ABFE28\n000000067F000040020000A000000055C000-000000067F000040020000A0000000560000__00000038E9AF7F00\n000000067F000040020000A000000055C000-000000067F000040020000A0000000560000__0000003903F1CFE8\n000000067F000040020000A000000055C000-000000067F000040020000A0000000560000__0000003B99F7F8A0\n000000067F000040020000A000000055C000-000000067F000040020000A0000000560000__0000005D2FFFFB38\n000000067F000040020000A000000055C0DF-000000067F000040020000A0000000564AC7__000000263D71E6D9-00000026ED17F009\n000000067F000040020000A0000000560000-000000067F000040020000A0000000564000__00000038E1ABFE28\n000000067F000040020000A0000000560000-000000067F000040020000A0000000564000__00000038E9AF7F00\n000000067F000040020000A0000000560000-000000067F000040020000A0000000564000__0000003903F1CFE8\n000000067F000040020000A0000000560000-000000067F000040020000A0000000564000__0000003B99F7F8A0\n000000067F000040020000A0000000560000-000000067F000040020000A0000000564000__0000005D2FFFFB38\n000000067F000040020000A0000000564000-000000067F000040020000A0000000568000__00000038E1ABFE28\n000000067F000040020000A0000000564000-000000067F000040020000A0000000568000__00000038E9AF7F00\n000000067F000040020000A0000000564000-000000067F000040020000A0000000568000__0000003903F1CFE8\n000000067F000040020000A0000000564000-000000067F000040020000A0000000568000__0000003B99F7F8A0\n000000067F000040020000A0000000564000-000000067F000040020000A0000000568000__0000005D2FFFFB38\n000000067F000040020000A0000000564AC7-000000067F000040020000A000000056D4B3__000000263D71E6D9-00000026ED17F009\n000000067F000040020000A0000000568000-000000067F000040020000A000000056C000__00000038E1ABFE28\n000000067F000040020000A0000000568000-000000067F000040020000A000000056C000__00000038E9AF7F00\n000000067F000040020000A0000000568000-000000067F000040020000A000000056C000__0000003903F1CFE8\n000000067F000040020000A0000000568000-000000067F000040020000A000000056C000__0000003B99F7F8A0\n000000067F000040020000A0000000568000-000000067F000040020000A000000056C000__0000005D2FFFFB38\n000000067F000040020000A000000056C000-000000067F000040020000A0000000570000__00000038E1ABFE28\n000000067F000040020000A000000056C000-000000067F000040020000A0000000570000__00000038E9AF7F00\n000000067F000040020000A000000056C000-000000067F000040020000A0000000570000__0000003903F1CFE8\n000000067F000040020000A000000056C000-000000067F000040020000A0000000570000__0000003B99F7F8A0\n000000067F000040020000A000000056C000-000000067F000040020000A0000000570000__0000005D2FFFFB38\n000000067F000040020000A000000056D4B3-000000067F000040020000A0000000575EA8__000000263D71E6D9-00000026ED17F009\n000000067F000040020000A0000000570000-000000067F000040020000A0000000574000__00000038E1ABFE28\n000000067F000040020000A0000000570000-000000067F000040020000A0000000574000__00000038E9AF7F00\n000000067F000040020000A0000000570000-000000067F000040020000A0000000574000__0000003903F1CFE8\n000000067F000040020000A0000000570000-000000067F000040020000A0000000574000__0000003B99F7F8A0\n000000067F000040020000A0000000570000-000000067F000040020000A0000000574000__0000005D2FFFFB38\n000000067F000040020000A0000000574000-000000067F000040020000A0000000578000__00000038E1ABFE28\n000000067F000040020000A0000000574000-000000067F000040020000A0000000578000__00000038E9AF7F00\n000000067F000040020000A0000000574000-000000067F000040020000A0000000578000__0000003903F1CFE8\n000000067F000040020000A0000000574000-000000067F000040020000A0000000578000__0000003B99F7F8A0\n000000067F000040020000A0000000574000-000000067F000040020000A0000000578000__0000005D2FFFFB38\n000000067F000040020000A0000000575EA8-000000067F000040020000A000000057E892__000000263D71E6D9-00000026ED17F009\n000000067F000040020000A0000000578000-000000067F000040020000A000000057C000__00000038E1ABFE28\n000000067F000040020000A0000000578000-000000067F000040020000A000000057C000__00000038E9AF7F00\n000000067F000040020000A0000000578000-000000067F000040020000A000000057C000__0000003903F1CFE8\n000000067F000040020000A0000000578000-000000067F000040020000A000000057C000__0000003B99F7F8A0\n000000067F000040020000A0000000578000-000000067F000040020000A000000057C000__0000005D2FFFFB38\n000000067F000040020000A000000057C000-000000067F000040020000A0000000580000__00000038E1ABFE28\n000000067F000040020000A000000057C000-000000067F000040020000A0000000580000__00000038E9AF7F00\n000000067F000040020000A000000057C000-000000067F000040020000A0000000580000__0000003903F1CFE8\n000000067F000040020000A000000057C000-000000067F000040020000A0000000580000__0000003B99F7F8A0\n000000067F000040020000A000000057C000-000000067F000040020000A0000000580000__0000005D2FFFFB38\n000000067F000040020000A000000057E892-000000067F000040020000A000000058726C__000000263D71E6D9-00000026ED17F009\n000000067F000040020000A0000000580000-000000067F000040020000A0000000584000__00000038E1ABFE28\n000000067F000040020000A0000000580000-000000067F000040020000A0000000584000__00000038E9AF7F00\n000000067F000040020000A0000000580000-000000067F000040020000A0000000584000__0000003903F1CFE8\n000000067F000040020000A0000000580000-000000067F000040020000A0000000584000__0000003B99F7F8A0\n000000067F000040020000A0000000580000-000000067F000040020000A0000000584000__0000005D2FFFFB38\n000000067F000040020000A0000000584000-000000067F000040020000A0000000588000__00000038E1ABFE28\n000000067F000040020000A0000000584000-000000067F000040020000A0000000588000__00000038E9AF7F00\n000000067F000040020000A0000000584000-000000067F000040020000A0000000588000__0000003903F1CFE8\n000000067F000040020000A0000000584000-000000067F000040020000A0000000588000__0000003B99F7F8A0\n000000067F000040020000A0000000584000-000000067F000040020000A0000000588000__0000005D2FFFFB38\n000000067F000040020000A000000058726C-000000067F000040020000A000000058FC31__000000263D71E6D9-00000026ED17F009\n000000067F000040020000A0000000588000-000000067F000040020000A000000058C000__00000038E1ABFE28\n000000067F000040020000A0000000588000-000000067F000040020000A000000058C000__00000038E9AF7F00\n000000067F000040020000A0000000588000-000000067F000040020000A000000058C000__0000003903F1CFE8\n000000067F000040020000A0000000588000-000000067F000040020000A000000058C000__0000003B99F7F8A0\n000000067F000040020000A0000000588000-000000067F000040020000A000000058C000__0000005D2FFFFB38\n000000067F000040020000A000000058C000-000000067F000040020000A0000000590000__00000038E1ABFE28\n000000067F000040020000A000000058C000-000000067F000040020000A0000000590000__00000038E9AF7F00\n000000067F000040020000A000000058C000-000000067F000040020000A0000000590000__0000003903F1CFE8\n000000067F000040020000A000000058C000-000000067F000040020000A0000000590000__0000003B99F7F8A0\n000000067F000040020000A000000058C000-000000067F000040020000A0000000590000__0000005D2FFFFB38\n000000067F000040020000A000000058FC31-000000067F000040020000A00000005985F9__000000263D71E6D9-00000026ED17F009\n000000067F000040020000A0000000590000-000000067F000040020000A0000000594000__00000038E1ABFE28\n000000067F000040020000A0000000590000-000000067F000040020000A0000000594000__00000038E9AF7F00\n000000067F000040020000A0000000590000-000000067F000040020000A0000000594000__0000003903F1CFE8\n000000067F000040020000A0000000590000-000000067F000040020000A0000000594000__0000003B99F7F8A0\n000000067F000040020000A0000000590000-000000067F000040020000A0000000594000__0000005D2FFFFB38\n000000067F000040020000A0000000594000-000000067F000040020000A0000000598000__00000038E1ABFE28\n000000067F000040020000A0000000594000-000000067F000040020000A0000000598000__00000038E9AF7F00\n000000067F000040020000A0000000594000-000000067F000040020000A0000000598000__0000003903F1CFE8\n000000067F000040020000A0000000594000-000000067F000040020000A0000000598000__0000003B99F7F8A0\n000000067F000040020000A0000000594000-000000067F000040020000A0000000598000__0000005D2FFFFB38\n000000067F000040020000A0000000598000-000000067F000040020000A000000059C000__00000038E1ABFE28\n000000067F000040020000A0000000598000-000000067F000040020000A000000059C000__00000038E9AF7F00\n000000067F000040020000A0000000598000-000000067F000040020000A000000059C000__0000003903F1CFE8\n000000067F000040020000A0000000598000-000000067F000040020000A000000059C000__0000003B99F7F8A0\n000000067F000040020000A0000000598000-000000067F000040020000A000000059C000__0000005D2FFFFB38\n000000067F000040020000A00000005985F9-000000067F000040020000A00000005A0FE9__000000263D71E6D9-00000026ED17F009\n000000067F000040020000A000000059C000-000000067F000040020000A00000005A0000__00000038E1ABFE28\n000000067F000040020000A000000059C000-000000067F000040020000A00000005A0000__00000038E9AF7F00\n000000067F000040020000A000000059C000-000000067F000040020000A00000005A0000__0000003903F1CFE8\n000000067F000040020000A000000059C000-000000067F000040020000A00000005A0000__0000003B99F7F8A0\n000000067F000040020000A000000059C000-000000067F000040020000A00000005A0000__0000005D2FFFFB38\n000000067F000040020000A00000005A0000-000000067F000040020000A00000005A4000__00000038E1ABFE28\n000000067F000040020000A00000005A0000-000000067F000040020000A00000005A4000__00000038E9AF7F00\n000000067F000040020000A00000005A0000-000000067F000040020000A00000005A4000__0000003903F1CFE8\n000000067F000040020000A00000005A0000-000000067F000040020000A00000005A4000__0000003B99F7F8A0\n000000067F000040020000A00000005A0000-000000067F000040020000A00000005A4000__0000005D2FFFFB38\n000000067F000040020000A00000005A0FE9-000000067F000040020000A00000005A99D4__000000263D71E6D9-00000026ED17F009\n000000067F000040020000A00000005A4000-000000067F000040020000A00000005A8000__00000038E1ABFE28\n000000067F000040020000A00000005A4000-000000067F000040020000A00000005A8000__00000038E9AF7F00\n000000067F000040020000A00000005A4000-000000067F000040020000A00000005A8000__0000003903F1CFE8\n000000067F000040020000A00000005A4000-000000067F000040020000A00000005A8000__0000003B99F7F8A0\n000000067F000040020000A00000005A4000-000000067F000040020000A00000005A8000__0000005D2FFFFB38\n000000067F000040020000A00000005A8000-000000067F000040020000A00000005AC000__00000027BCAFED20\n000000067F000040020000A00000005A8000-000000067F000040020000A00000005AC000__00000038E9AF7F00\n000000067F000040020000A00000005A8000-000000067F000040020000A00000005AC000__0000003903F1CFE8\n000000067F000040020000A00000005A8000-000000067F000040020000A00000005AC000__0000003B99F7F8A0\n000000067F000040020000A00000005A8000-000000067F000040020000A00000005AC000__0000005D2FFFFB38\n000000067F000040020000A00000005A99D4-000000067F000040020000A0000200000000__000000263D71E6D9-00000026ED17F009\n000000067F000040020000A00000005A9C62-000000067F000040020000A00000005B2656__00000026ED17F009-000000278CC7EF29\n000000067F000040020000A00000005AC000-000000067F000040020000A00000005B0000__00000027BCAFED20\n000000067F000040020000A00000005AC000-000000067F000040020000A00000005B0000__00000038E9AF7F00\n000000067F000040020000A00000005AC000-000000067F000040020000A00000005B0000__0000003903F1CFE8\n000000067F000040020000A00000005AC000-000000067F000040020000A00000005B0000__0000003B99F7F8A0\n000000067F000040020000A00000005AC000-000000067F000040020000A00000005B0000__0000005D2FFFFB38\n000000067F000040020000A00000005B0000-000000067F000040020000A00000005B4000__00000027BCAFED20\n000000067F000040020000A00000005B0000-000000067F000040020000A00000005B4000__00000038E9AF7F00\n000000067F000040020000A00000005B0000-000000067F000040020000A00000005B4000__0000003903F1CFE8\n000000067F000040020000A00000005B0000-000000067F000040020000A00000005B4000__0000003B99F7F8A0\n000000067F000040020000A00000005B0000-000000067F000040020000A00000005B4000__0000005D2FFFFB38\n000000067F000040020000A00000005B2656-000000067F000040020000A00000005BB03A__00000026ED17F009-000000278CC7EF29\n000000067F000040020000A00000005B4000-000000067F000040020000A00000005B8000__00000027BCAFED20\n000000067F000040020000A00000005B4000-000000067F000040020000A00000005B8000__00000038E9AF7F00\n000000067F000040020000A00000005B4000-000000067F000040020000A00000005B8000__0000003903F1CFE8\n000000067F000040020000A00000005B4000-000000067F000040020000A00000005B8000__0000003B99F7F8A0\n000000067F000040020000A00000005B4000-000000067F000040020000A00000005B8000__0000005D2FFFFB38\n000000067F000040020000A00000005B8000-000000067F000040020000A00000005BC000__00000027BCAFED20\n000000067F000040020000A00000005B8000-000000067F000040020000A00000005BC000__00000038E9AF7F00\n000000067F000040020000A00000005B8000-000000067F000040020000A00000005BC000__0000003903F1CFE8\n000000067F000040020000A00000005B8000-000000067F000040020000A00000005BC000__0000003B99F7F8A0\n000000067F000040020000A00000005B8000-000000067F000040020000A00000005BC000__0000005D2FFFFB38\n000000067F000040020000A00000005BB03A-000000067F000040020000A00000005C3A02__00000026ED17F009-000000278CC7EF29\n000000067F000040020000A00000005BC000-000000067F000040020000A00000005C0000__00000027BCAFED20\n000000067F000040020000A00000005BC000-000000067F000040020000A00000005C0000__00000038E9AF7F00\n000000067F000040020000A00000005BC000-000000067F000040020000A00000005C0000__0000003903F1CFE8\n000000067F000040020000A00000005BC000-000000067F000040020000A00000005C0000__0000003B99F7F8A0\n000000067F000040020000A00000005BC000-000000067F000040020000A00000005C0000__0000005D2FFFFB38\n000000067F000040020000A00000005C0000-000000067F000040020000A00000005C4000__00000027BCAFED20\n000000067F000040020000A00000005C0000-000000067F000040020000A00000005C4000__00000038E9AF7F00\n000000067F000040020000A00000005C0000-000000067F000040020000A00000005C4000__0000003903F1CFE8\n000000067F000040020000A00000005C0000-000000067F000040020000A00000005C4000__0000003B99F7F8A0\n000000067F000040020000A00000005C0000-000000067F000040020000A00000005C4000__0000005D2FFFFB38\n000000067F000040020000A00000005C3A02-000000067F000040020000A00000005CC3B7__00000026ED17F009-000000278CC7EF29\n000000067F000040020000A00000005C4000-000000067F000040020000A00000005C8000__00000027BCAFED20\n000000067F000040020000A00000005C4000-000000067F000040020000A00000005C8000__00000038E9AF7F00\n000000067F000040020000A00000005C4000-000000067F000040020000A00000005C8000__0000003903F1CFE8\n000000067F000040020000A00000005C4000-000000067F000040020000A00000005C8000__0000003B99F7F8A0\n000000067F000040020000A00000005C4000-000000067F000040020000A00000005C8000__0000005D2FFFFB38\n000000067F000040020000A00000005C8000-000000067F000040020000A00000005CC000__00000027BCAFED20\n000000067F000040020000A00000005C8000-000000067F000040020000A00000005CC000__00000038E9AF7F00\n000000067F000040020000A00000005C8000-000000067F000040020000A00000005CC000__0000003903F1CFE8\n000000067F000040020000A00000005C8000-000000067F000040020000A00000005CC000__0000003B99F7F8A0\n000000067F000040020000A00000005C8000-000000067F000040020000A00000005CC000__0000005D2FFFFB38\n000000067F000040020000A00000005CC000-000000067F000040020000A00000005D0000__00000027BCAFED20\n000000067F000040020000A00000005CC000-000000067F000040020000A00000005D0000__00000038E9AF7F00\n000000067F000040020000A00000005CC000-000000067F000040020000A00000005D0000__0000003903F1CFE8\n000000067F000040020000A00000005CC000-000000067F000040020000A00000005D0000__0000003B99F7F8A0\n000000067F000040020000A00000005CC000-000000067F000040020000A00000005D0000__0000005D2FFFFB38\n000000067F000040020000A00000005CC3B7-000000067F000040020000A00000005D4D88__00000026ED17F009-000000278CC7EF29\n000000067F000040020000A00000005D0000-000000067F000040020000A00000005D4000__00000027BCAFED20\n000000067F000040020000A00000005D0000-000000067F000040020000A00000005D4000__00000038E9AF7F00\n000000067F000040020000A00000005D0000-000000067F000040020000A00000005D4000__0000003903F1CFE8\n000000067F000040020000A00000005D0000-000000067F000040020000A00000005D4000__0000003B99F7F8A0\n000000067F000040020000A00000005D0000-000000067F000040020000A00000005D4000__0000005D2FFFFB38\n000000067F000040020000A00000005D4000-000000067F000040020000A00000005D8000__00000027BCAFED20\n000000067F000040020000A00000005D4000-000000067F000040020000A00000005D8000__00000038E9AF7F00\n000000067F000040020000A00000005D4000-000000067F000040020000A00000005D8000__0000003903F1CFE8\n000000067F000040020000A00000005D4000-000000067F000040020000A00000005D8000__0000003B99F7F8A0\n000000067F000040020000A00000005D4000-000000067F000040020000A00000005D8000__0000005D2FFFFB38\n000000067F000040020000A00000005D4D88-000000067F000040020000A00000005DD76C__00000026ED17F009-000000278CC7EF29\n000000067F000040020000A00000005D8000-000000067F000040020000A00000005DC000__00000027BCAFED20\n000000067F000040020000A00000005D8000-000000067F000040020000A00000005DC000__00000038E9AF7F00\n000000067F000040020000A00000005D8000-000000067F000040020000A00000005DC000__0000003903F1CFE8\n000000067F000040020000A00000005D8000-000000067F000040020000A00000005DC000__0000003B99F7F8A0\n000000067F000040020000A00000005D8000-000000067F000040020000A00000005DC000__0000005D2FFFFB38\n000000067F000040020000A00000005DC000-000000067F000040020000A00000005E0000__00000027BCAFED20\n000000067F000040020000A00000005DC000-000000067F000040020000A00000005E0000__00000038E9AF7F00\n000000067F000040020000A00000005DC000-000000067F000040020000A00000005E0000__0000003903F1CFE8\n000000067F000040020000A00000005DC000-000000067F000040020000A00000005E0000__0000003B99F7F8A0\n000000067F000040020000A00000005DC000-000000067F000040020000A00000005E0000__0000005D2FFFFB38\n000000067F000040020000A00000005DD76C-000000067F000040020000A00000005E6155__00000026ED17F009-000000278CC7EF29\n000000067F000040020000A00000005E0000-000000067F000040020000A00000005E4000__00000027BCAFED20\n000000067F000040020000A00000005E0000-000000067F000040020000A00000005E4000__00000038E9AF7F00\n000000067F000040020000A00000005E0000-000000067F000040020000A00000005E4000__0000003903F1CFE8\n000000067F000040020000A00000005E0000-000000067F000040020000A00000005E4000__0000003B99F7F8A0\n000000067F000040020000A00000005E0000-000000067F000040020000A00000005E4000__0000005D2FFFFB38\n000000067F000040020000A00000005E4000-000000067F000040020000A00000005E8000__00000027BCAFED20\n000000067F000040020000A00000005E4000-000000067F000040020000A00000005E8000__00000038E9AF7F00\n000000067F000040020000A00000005E4000-000000067F000040020000A00000005E8000__0000003903F1CFE8\n000000067F000040020000A00000005E4000-000000067F000040020000A00000005E8000__0000003B99F7F8A0\n000000067F000040020000A00000005E4000-000000067F000040020000A00000005E8000__0000005D2FFFFB38\n000000067F000040020000A00000005E6155-000000067F000040020000A00000005EEB42__00000026ED17F009-000000278CC7EF29\n000000067F000040020000A00000005E8000-000000067F000040020000A00000005EC000__00000027BCAFED20\n000000067F000040020000A00000005E8000-000000067F000040020000A00000005EC000__00000038E9AF7F00\n000000067F000040020000A00000005E8000-000000067F000040020000A00000005EC000__0000003903F1CFE8\n000000067F000040020000A00000005E8000-000000067F000040020000A00000005EC000__0000003B99F7F8A0\n000000067F000040020000A00000005E8000-000000067F000040020000A00000005EC000__0000005D2FFFFB38\n000000067F000040020000A00000005EC000-000000067F000040020000A00000005F0000__00000027BCAFED20\n000000067F000040020000A00000005EC000-000000067F000040020000A00000005F0000__00000038E9AF7F00\n000000067F000040020000A00000005EC000-000000067F000040020000A00000005F0000__0000003903F1CFE8\n000000067F000040020000A00000005EC000-000000067F000040020000A00000005F0000__0000003B99F7F8A0\n000000067F000040020000A00000005EC000-000000067F000040020000A00000005F0000__0000005D2FFFFB38\n000000067F000040020000A00000005EEB42-000000067F000040020000A00000005F7523__00000026ED17F009-000000278CC7EF29\n000000067F000040020000A00000005F0000-000000067F000040020000A00000005F4000__00000027BCAFED20\n000000067F000040020000A00000005F0000-000000067F000040020000A00000005F4000__00000038E9AF7F00\n000000067F000040020000A00000005F0000-000000067F000040020000A00000005F4000__0000003903F1CFE8\n000000067F000040020000A00000005F0000-000000067F000040020000A00000005F4000__0000003B99F7F8A0\n000000067F000040020000A00000005F0000-000000067F000040020000A00000005F4000__0000005D2FFFFB38\n000000067F000040020000A00000005F4000-000000067F000040020000A00000005F8000__00000027BCAFED20\n000000067F000040020000A00000005F4000-000000067F000040020000A00000005F8000__00000038E9AF7F00\n000000067F000040020000A00000005F4000-000000067F000040020000A00000005F8000__0000003903F1CFE8\n000000067F000040020000A00000005F4000-000000067F000040020000A00000005F8000__0000003B99F7F8A0\n000000067F000040020000A00000005F4000-000000067F000040020000A00000005F8000__0000005D2FFFFB38\n000000067F000040020000A00000005F7523-000000067F000040020000A00000005FFEE5__00000026ED17F009-000000278CC7EF29\n000000067F000040020000A00000005F8000-000000067F000040020000A00000005FC000__00000027BCAFED20\n000000067F000040020000A00000005F8000-000000067F000040020000A00000005FC000__00000038E9AF7F00\n000000067F000040020000A00000005F8000-000000067F000040020000A00000005FC000__0000003903F1CFE8\n000000067F000040020000A00000005F8000-000000067F000040020000A00000005FC000__0000003B99F7F8A0\n000000067F000040020000A00000005F8000-000000067F000040020000A00000005FC000__0000005D2FFFFB38\n000000067F000040020000A00000005FC000-000000067F000040020000A0000000600000__00000027BCAFED20\n000000067F000040020000A00000005FC000-000000067F000040020000A0000000600000__00000038E9AF7F00\n000000067F000040020000A00000005FC000-000000067F000040020000A0000000600000__0000003903F1CFE8\n000000067F000040020000A00000005FC000-000000067F000040020000A0000000600000__0000003B99F7F8A0\n000000067F000040020000A00000005FC000-000000067F000040020000A0000000600000__0000005D2FFFFB38\n000000067F000040020000A00000005FFEE5-000000067F000040020000A0000200000000__00000026ED17F009-000000278CC7EF29\n000000067F000040020000A0000000600000-000000067F000040020000A0000000604000__00000027BCAFED20\n000000067F000040020000A0000000600000-000000067F000040020000A0000000604000__00000038E67ABFA0\n000000067F000040020000A0000000600000-000000067F000040020000A0000000604000__0000003903F1CFE8\n000000067F000040020000A0000000600000-000000067F000040020000A0000000604000__0000003B99F7F8A0\n000000067F000040020000A0000000600000-000000067F000040020000A0000000604000__0000005D2FFFFB38\n000000067F000040020000A00000006000B1-000000067F000040020000A0000000608A6E__000000278CC7EF29-000000283C6FE2E9\n000000067F000040020000A0000000604000-000000067F000040020000A0000000608000__00000027BCAFED20\n000000067F000040020000A0000000604000-000000067F000040020000A0000000608000__00000038E67ABFA0\n000000067F000040020000A0000000604000-000000067F000040020000A0000000608000__0000003903F1CFE8\n000000067F000040020000A0000000604000-000000067F000040020000A0000000608000__0000003B99F7F8A0\n000000067F000040020000A0000000604000-000000067F000040020000A0000000608000__0000005D2FFFFB38\n000000067F000040020000A0000000608000-000000067F000040020000A000000060C000__00000027BCAFED20\n000000067F000040020000A0000000608000-000000067F000040020000A000000060C000__00000038E67ABFA0\n000000067F000040020000A0000000608000-000000067F000040020000A000000060C000__0000003903F1CFE8\n000000067F000040020000A0000000608000-000000067F000040020000A000000060C000__0000003B99F7F8A0\n000000067F000040020000A0000000608000-000000067F000040020000A000000060C000__0000005D2FFFFB38\n000000067F000040020000A0000000608A6E-000000067F000040020000A000000061143E__000000278CC7EF29-000000283C6FE2E9\n000000067F000040020000A000000060C000-000000067F000040020000A0000000610000__00000027BCAFED20\n000000067F000040020000A000000060C000-000000067F000040020000A0000000610000__00000038E67ABFA0\n000000067F000040020000A000000060C000-000000067F000040020000A0000000610000__0000003903F1CFE8\n000000067F000040020000A000000060C000-000000067F000040020000A0000000610000__0000003B99F7F8A0\n000000067F000040020000A000000060C000-000000067F000040020000A0000000610000__0000005D2FFFFB38\n000000067F000040020000A0000000610000-000000067F000040020000A0000000614000__00000027BCAFED20\n000000067F000040020000A0000000610000-000000067F000040020000A0000000614000__00000038E67ABFA0\n000000067F000040020000A0000000610000-000000067F000040020000A0000000614000__0000003903F1CFE8\n000000067F000040020000A0000000610000-000000067F000040020000A0000000614000__0000003B99F7F8A0\n000000067F000040020000A0000000610000-000000067F000040020000A0000000614000__0000005D2FFFFB38\n000000067F000040020000A000000061143E-000000067F000040020000A0000000619E1E__000000278CC7EF29-000000283C6FE2E9\n000000067F000040020000A0000000614000-000000067F000040020000A0000000618000__00000027BCAFED20\n000000067F000040020000A0000000614000-000000067F000040020000A0000000618000__00000038E67ABFA0\n000000067F000040020000A0000000614000-000000067F000040020000A0000000618000__0000003903F1CFE8\n000000067F000040020000A0000000614000-000000067F000040020000A0000000618000__0000003B99F7F8A0\n000000067F000040020000A0000000614000-000000067F000040020000A0000000618000__0000005D2FFFFB38\n000000067F000040020000A0000000618000-000000067F000040020000A000000061C000__00000038E67ABFA0\n000000067F000040020000A0000000618000-000000067F000040020000A000000061C000__0000003903F1CFE8\n000000067F000040020000A0000000618000-000000067F000040020000A000000061C000__0000003B99F7F8A0\n000000067F000040020000A0000000618000-000000067F000040020000A000000061C000__0000005D2FFFFB38\n000000067F000040020000A0000000618000-030000000000000000000000000000000002__00000027BCAFED20\n000000067F000040020000A0000000619E1E-000000067F000040020000A0000000622808__000000278CC7EF29-000000283C6FE2E9\n000000067F000040020000A000000061C000-000000067F000040020000A0000000620000__00000038E67ABFA0\n000000067F000040020000A000000061C000-000000067F000040020000A0000000620000__0000003903F1CFE8\n000000067F000040020000A000000061C000-000000067F000040020000A0000000620000__0000003B99F7F8A0\n000000067F000040020000A000000061C000-000000067F000040020000A0000000620000__0000005D2FFFFB38\n000000067F000040020000A0000000620000-000000067F000040020000A0000000624000__00000038E67ABFA0\n000000067F000040020000A0000000620000-000000067F000040020000A0000000624000__0000003903F1CFE8\n000000067F000040020000A0000000620000-000000067F000040020000A0000000624000__0000003B99F7F8A0\n000000067F000040020000A0000000620000-000000067F000040020000A0000000624000__0000005D2FFFFB38\n000000067F000040020000A0000000622808-000000067F000040020000A000000062B1F2__000000278CC7EF29-000000283C6FE2E9\n000000067F000040020000A0000000624000-000000067F000040020000A0000000628000__00000038E67ABFA0\n000000067F000040020000A0000000624000-000000067F000040020000A0000000628000__0000003903F1CFE8\n000000067F000040020000A0000000624000-000000067F000040020000A0000000628000__0000003B99F7F8A0\n000000067F000040020000A0000000624000-000000067F000040020000A0000000628000__0000005D2FFFFB38\n000000067F000040020000A0000000628000-000000067F000040020000A000000062C000__00000038E67ABFA0\n000000067F000040020000A0000000628000-000000067F000040020000A000000062C000__0000003903F1CFE8\n000000067F000040020000A0000000628000-000000067F000040020000A000000062C000__0000003B99F7F8A0\n000000067F000040020000A0000000628000-000000067F000040020000A000000062C000__0000005D2FFFFB38\n000000067F000040020000A000000062B1F2-000000067F000040020000A0000000633BDF__000000278CC7EF29-000000283C6FE2E9\n000000067F000040020000A000000062C000-000000067F000040020000A0000000630000__00000038E67ABFA0\n000000067F000040020000A000000062C000-000000067F000040020000A0000000630000__0000003903F1CFE8\n000000067F000040020000A000000062C000-000000067F000040020000A0000000630000__0000003B99F7F8A0\n000000067F000040020000A000000062C000-000000067F000040020000A0000000630000__0000005D2FFFFB38\n000000067F000040020000A0000000630000-000000067F000040020000A0000000634000__00000038E67ABFA0\n000000067F000040020000A0000000630000-000000067F000040020000A0000000634000__0000003903F1CFE8\n000000067F000040020000A0000000630000-000000067F000040020000A0000000634000__0000003B99F7F8A0\n000000067F000040020000A0000000630000-000000067F000040020000A0000000634000__0000005D2FFFFB38\n000000067F000040020000A0000000633BDF-000000067F000040020000A000000063C5BF__000000278CC7EF29-000000283C6FE2E9\n000000067F000040020000A0000000634000-000000067F000040020000A0000000638000__00000038E67ABFA0\n000000067F000040020000A0000000634000-000000067F000040020000A0000000638000__0000003903F1CFE8\n000000067F000040020000A0000000634000-000000067F000040020000A0000000638000__0000003B99F7F8A0\n000000067F000040020000A0000000634000-000000067F000040020000A0000000638000__0000005D2FFFFB38\n000000067F000040020000A0000000638000-000000067F000040020000A000000063C000__00000038E67ABFA0\n000000067F000040020000A0000000638000-000000067F000040020000A000000063C000__0000003903F1CFE8\n000000067F000040020000A0000000638000-000000067F000040020000A000000063C000__0000003B99F7F8A0\n000000067F000040020000A0000000638000-000000067F000040020000A000000063C000__0000005D2FFFFB38\n000000067F000040020000A000000063C000-000000067F000040020000A0000000640000__00000038E67ABFA0\n000000067F000040020000A000000063C000-000000067F000040020000A0000000640000__0000003903F1CFE8\n000000067F000040020000A000000063C000-000000067F000040020000A0000000640000__0000003B99F7F8A0\n000000067F000040020000A000000063C000-000000067F000040020000A0000000640000__0000005D2FFFFB38\n000000067F000040020000A000000063C5BF-000000067F000040020000A0000000644F80__000000278CC7EF29-000000283C6FE2E9\n000000067F000040020000A0000000640000-000000067F000040020000A0000000644000__00000038E67ABFA0\n000000067F000040020000A0000000640000-000000067F000040020000A0000000644000__0000003903F1CFE8\n000000067F000040020000A0000000640000-000000067F000040020000A0000000644000__0000003B99F7F8A0\n000000067F000040020000A0000000640000-000000067F000040020000A0000000644000__0000005D2FFFFB38\n000000067F000040020000A0000000644000-000000067F000040020000A0000000648000__00000038E67ABFA0\n000000067F000040020000A0000000644000-000000067F000040020000A0000000648000__0000003903F1CFE8\n000000067F000040020000A0000000644000-000000067F000040020000A0000000648000__0000003B99F7F8A0\n000000067F000040020000A0000000644000-000000067F000040020000A0000000648000__0000005D2FFFFB38\n000000067F000040020000A0000000644F80-000000067F000040020000A000000064D959__000000278CC7EF29-000000283C6FE2E9\n000000067F000040020000A0000000648000-000000067F000040020000A000000064C000__00000038E67ABFA0\n000000067F000040020000A0000000648000-000000067F000040020000A000000064C000__0000003903F1CFE8\n000000067F000040020000A0000000648000-000000067F000040020000A000000064C000__0000003B99F7F8A0\n000000067F000040020000A0000000648000-000000067F000040020000A000000064C000__0000005D2FFFFB38\n000000067F000040020000A000000064C000-000000067F000040020000A0000000650000__00000038E67ABFA0\n000000067F000040020000A000000064C000-000000067F000040020000A0000000650000__0000003903F1CFE8\n000000067F000040020000A000000064C000-000000067F000040020000A0000000650000__0000003B99F7F8A0\n000000067F000040020000A000000064C000-000000067F000040020000A0000000650000__0000005D2FFFFB38\n000000067F000040020000A000000064D959-000000067F000040020000A0000000656342__000000278CC7EF29-000000283C6FE2E9\n000000067F000040020000A0000000650000-000000067F000040020000A0000000654000__00000038E67ABFA0\n000000067F000040020000A0000000650000-000000067F000040020000A0000000654000__0000003903F1CFE8\n000000067F000040020000A0000000650000-000000067F000040020000A0000000654000__0000003B99F7F8A0\n000000067F000040020000A0000000650000-000000067F000040020000A0000000654000__0000005D2FFFFB38\n000000067F000040020000A0000000654000-000000067F000040020000A0000000658000__00000038E67ABFA0\n000000067F000040020000A0000000654000-000000067F000040020000A0000000658000__0000003903F1CFE8\n000000067F000040020000A0000000654000-000000067F000040020000A0000000658000__0000003B99F7F8A0\n000000067F000040020000A0000000654000-000000067F000040020000A0000000658000__0000005D2FFFFB38\n000000067F000040020000A0000000656342-000000067F000040020000A000000065ED2B__000000278CC7EF29-000000283C6FE2E9\n000000067F000040020000A0000000658000-000000067F000040020000A000000065C000__00000038E67ABFA0\n000000067F000040020000A0000000658000-000000067F000040020000A000000065C000__0000003903F1CFE8\n000000067F000040020000A0000000658000-000000067F000040020000A000000065C000__0000003B99F7F8A0\n000000067F000040020000A0000000658000-000000067F000040020000A000000065C000__0000005D2FFFFB38\n000000067F000040020000A000000065C000-000000067F000040020000A0000000660000__00000038E1ABFE28\n000000067F000040020000A000000065C000-000000067F000040020000A0000000660000__00000038E9AF7F00\n000000067F000040020000A000000065C000-000000067F000040020000A0000000660000__0000003903F1CFE8\n000000067F000040020000A000000065C000-000000067F000040020000A0000000660000__0000003B99F7F8A0\n000000067F000040020000A000000065C000-000000067F000040020000A0000000660000__0000005D2FFFFB38\n000000067F000040020000A000000065ED2B-000000067F000040020000A0000200000000__000000278CC7EF29-000000283C6FE2E9\n000000067F000040020000A000000065EFE8-000000067F000040020000A00000006679DA__000000283C6FE2E9-00000028DC1FE6F1\n000000067F000040020000A0000000660000-000000067F000040020000A0000000664000__00000038E1ABFE28\n000000067F000040020000A0000000660000-000000067F000040020000A0000000664000__00000038E9AF7F00\n000000067F000040020000A0000000660000-000000067F000040020000A0000000664000__0000003903F1CFE8\n000000067F000040020000A0000000660000-000000067F000040020000A0000000664000__0000003B99F7F8A0\n000000067F000040020000A0000000660000-000000067F000040020000A0000000664000__0000005D2FFFFB38\n000000067F000040020000A0000000664000-000000067F000040020000A0000000668000__00000038E1ABFE28\n000000067F000040020000A0000000664000-000000067F000040020000A0000000668000__00000038E9AF7F00\n000000067F000040020000A0000000664000-000000067F000040020000A0000000668000__0000003903F1CFE8\n000000067F000040020000A0000000664000-000000067F000040020000A0000000668000__0000003B99F7F8A0\n000000067F000040020000A0000000664000-000000067F000040020000A0000000668000__0000005D2FFFFB38\n000000067F000040020000A00000006679DA-000000067F000040020000A00000006703C5__000000283C6FE2E9-00000028DC1FE6F1\n000000067F000040020000A0000000668000-000000067F000040020000A000000066C000__00000038E1ABFE28\n000000067F000040020000A0000000668000-000000067F000040020000A000000066C000__00000038E9AF7F00\n000000067F000040020000A0000000668000-000000067F000040020000A000000066C000__0000003903F1CFE8\n000000067F000040020000A0000000668000-000000067F000040020000A000000066C000__0000003B99F7F8A0\n000000067F000040020000A0000000668000-000000067F000040020000A000000066C000__0000005D2FFFFB38\n000000067F000040020000A000000066C000-000000067F000040020000A0000000670000__00000038E1ABFE28\n000000067F000040020000A000000066C000-000000067F000040020000A0000000670000__00000038E9AF7F00\n000000067F000040020000A000000066C000-000000067F000040020000A0000000670000__0000003903F1CFE8\n000000067F000040020000A000000066C000-000000067F000040020000A0000000670000__0000003B99F7F8A0\n000000067F000040020000A000000066C000-000000067F000040020000A0000000670000__0000005D2FFFFB38\n000000067F000040020000A0000000670000-000000067F000040020000A0000000674000__00000038E1ABFE28\n000000067F000040020000A0000000670000-000000067F000040020000A0000000674000__00000038E9AF7F00\n000000067F000040020000A0000000670000-000000067F000040020000A0000000674000__0000003903F1CFE8\n000000067F000040020000A0000000670000-000000067F000040020000A0000000674000__0000003B99F7F8A0\n000000067F000040020000A0000000670000-000000067F000040020000A0000000674000__0000005D2FFFFB38\n000000067F000040020000A00000006703C5-000000067F000040020000A0000000678D98__000000283C6FE2E9-00000028DC1FE6F1\n000000067F000040020000A0000000674000-000000067F000040020000A0000000678000__00000038E1ABFE28\n000000067F000040020000A0000000674000-000000067F000040020000A0000000678000__00000038E9AF7F00\n000000067F000040020000A0000000674000-000000067F000040020000A0000000678000__0000003903F1CFE8\n000000067F000040020000A0000000674000-000000067F000040020000A0000000678000__0000003B99F7F8A0\n000000067F000040020000A0000000674000-000000067F000040020000A0000000678000__0000005D2FFFFB38\n000000067F000040020000A0000000678000-000000067F000040020000A000000067C000__00000038E1ABFE28\n000000067F000040020000A0000000678000-000000067F000040020000A000000067C000__00000038E9AF7F00\n000000067F000040020000A0000000678000-000000067F000040020000A000000067C000__0000003903F1CFE8\n000000067F000040020000A0000000678000-000000067F000040020000A000000067C000__0000003B99F7F8A0\n000000067F000040020000A0000000678000-000000067F000040020000A000000067C000__0000005D2FFFFB38\n000000067F000040020000A0000000678D98-000000067F000040020000A000000068175E__000000283C6FE2E9-00000028DC1FE6F1\n000000067F000040020000A000000067C000-000000067F000040020000A0000000680000__00000038E1ABFE28\n000000067F000040020000A000000067C000-000000067F000040020000A0000000680000__00000038E9AF7F00\n000000067F000040020000A000000067C000-000000067F000040020000A0000000680000__0000003903F1CFE8\n000000067F000040020000A000000067C000-000000067F000040020000A0000000680000__0000003B99F7F8A0\n000000067F000040020000A000000067C000-000000067F000040020000A0000000680000__0000005D2FFFFB38\n000000067F000040020000A0000000680000-000000067F000040020000A0000000684000__00000038E1ABFE28\n000000067F000040020000A0000000680000-000000067F000040020000A0000000684000__00000038E9AF7F00\n000000067F000040020000A0000000680000-000000067F000040020000A0000000684000__0000003903F1CFE8\n000000067F000040020000A0000000680000-000000067F000040020000A0000000684000__0000003B99F7F8A0\n000000067F000040020000A0000000680000-000000067F000040020000A0000000684000__0000005D2FFFFB38\n000000067F000040020000A000000068175E-000000067F000040020000A000000068A135__000000283C6FE2E9-00000028DC1FE6F1\n000000067F000040020000A0000000684000-000000067F000040020000A0000000688000__00000038E1ABFE28\n000000067F000040020000A0000000684000-000000067F000040020000A0000000688000__00000038E9AF7F00\n000000067F000040020000A0000000684000-000000067F000040020000A0000000688000__0000003903F1CFE8\n000000067F000040020000A0000000684000-000000067F000040020000A0000000688000__0000003B99F7F8A0\n000000067F000040020000A0000000684000-000000067F000040020000A0000000688000__0000005D2FFFFB38\n000000067F000040020000A0000000688000-000000067F000040020000A000000068C000__00000038E1ABFE28\n000000067F000040020000A0000000688000-000000067F000040020000A000000068C000__00000038E9AF7F00\n000000067F000040020000A0000000688000-000000067F000040020000A000000068C000__0000003903F1CFE8\n000000067F000040020000A0000000688000-000000067F000040020000A000000068C000__0000003B99F7F8A0\n000000067F000040020000A0000000688000-000000067F000040020000A000000068C000__0000005D2FFFFB38\n000000067F000040020000A000000068A135-000000067F000040020000A0000000692B17__000000283C6FE2E9-00000028DC1FE6F1\n000000067F000040020000A000000068C000-000000067F000040020000A0000000690000__00000038E1ABFE28\n000000067F000040020000A000000068C000-000000067F000040020000A0000000690000__00000038E9AF7F00\n000000067F000040020000A000000068C000-000000067F000040020000A0000000690000__0000003903F1CFE8\n000000067F000040020000A000000068C000-000000067F000040020000A0000000690000__0000003B99F7F8A0\n000000067F000040020000A000000068C000-000000067F000040020000A0000000690000__0000005D2FFFFB38\n000000067F000040020000A0000000690000-000000067F000040020000A0000000694000__00000038E1ABFE28\n000000067F000040020000A0000000690000-000000067F000040020000A0000000694000__00000038E9AF7F00\n000000067F000040020000A0000000690000-000000067F000040020000A0000000694000__0000003903F1CFE8\n000000067F000040020000A0000000690000-000000067F000040020000A0000000694000__0000003B99F7F8A0\n000000067F000040020000A0000000690000-000000067F000040020000A0000000694000__0000005D2FFFFB38\n000000067F000040020000A0000000692B17-000000067F000040020000A000000069B4FC__000000283C6FE2E9-00000028DC1FE6F1\n000000067F000040020000A0000000694000-000000067F000040020000A0000000698000__00000038E1ABFE28\n000000067F000040020000A0000000694000-000000067F000040020000A0000000698000__00000038E9AF7F00\n000000067F000040020000A0000000694000-000000067F000040020000A0000000698000__0000003903F1CFE8\n000000067F000040020000A0000000694000-000000067F000040020000A0000000698000__0000003B99F7F8A0\n000000067F000040020000A0000000694000-000000067F000040020000A0000000698000__0000005D2FFFFB38\n000000067F000040020000A0000000698000-000000067F000040020000A000000069C000__00000038E1ABFE28\n000000067F000040020000A0000000698000-000000067F000040020000A000000069C000__00000038E9AF7F00\n000000067F000040020000A0000000698000-000000067F000040020000A000000069C000__0000003903F1CFE8\n000000067F000040020000A0000000698000-000000067F000040020000A000000069C000__0000003B99F7F8A0\n000000067F000040020000A0000000698000-000000067F000040020000A000000069C000__0000005D2FFFFB38\n000000067F000040020000A000000069B4FC-000000067F000040020000A00000006A3EF3__000000283C6FE2E9-00000028DC1FE6F1\n000000067F000040020000A000000069C000-000000067F000040020000A00000006A0000__00000038E1ABFE28\n000000067F000040020000A000000069C000-000000067F000040020000A00000006A0000__00000038E9AF7F00\n000000067F000040020000A000000069C000-000000067F000040020000A00000006A0000__0000003903F1CFE8\n000000067F000040020000A000000069C000-000000067F000040020000A00000006A0000__0000003B99F7F8A0\n000000067F000040020000A000000069C000-000000067F000040020000A00000006A0000__0000005D2FFFFB38\n000000067F000040020000A00000006A0000-000000067F000040020000A00000006A4000__00000038E1ABFE28\n000000067F000040020000A00000006A0000-000000067F000040020000A00000006A4000__00000038E9AF7F00\n000000067F000040020000A00000006A0000-000000067F000040020000A00000006A4000__0000003903F1CFE8\n000000067F000040020000A00000006A0000-000000067F000040020000A00000006A4000__0000003B99F7F8A0\n000000067F000040020000A00000006A0000-000000067F000040020000A00000006A4000__0000005D2FFFFB38\n000000067F000040020000A00000006A3EF3-000000067F000040020000A00000006AC8DC__000000283C6FE2E9-00000028DC1FE6F1\n000000067F000040020000A00000006A4000-000000067F000040020000A00000006A8000__00000038E1ABFE28\n000000067F000040020000A00000006A4000-000000067F000040020000A00000006A8000__00000038E9AF7F00\n000000067F000040020000A00000006A4000-000000067F000040020000A00000006A8000__0000003903F1CFE8\n000000067F000040020000A00000006A4000-000000067F000040020000A00000006A8000__0000003B99F7F8A0\n000000067F000040020000A00000006A4000-000000067F000040020000A00000006A8000__0000005D2FFFFB38\n000000067F000040020000A00000006A8000-000000067F000040020000A00000006AC000__00000038E1ABFE28\n000000067F000040020000A00000006A8000-000000067F000040020000A00000006AC000__00000038E9AF7F00\n000000067F000040020000A00000006A8000-000000067F000040020000A00000006AC000__0000003903F1CFE8\n000000067F000040020000A00000006A8000-000000067F000040020000A00000006AC000__0000003B99F7F8A0\n000000067F000040020000A00000006A8000-000000067F000040020000A00000006AC000__0000005D2FFFFB38\n000000067F000040020000A00000006AC000-000000067F000040020000A00000006B0000__00000038E1ABFE28\n000000067F000040020000A00000006AC000-000000067F000040020000A00000006B0000__00000038E9AF7F00\n000000067F000040020000A00000006AC000-000000067F000040020000A00000006B0000__0000003903F1CFE8\n000000067F000040020000A00000006AC000-000000067F000040020000A00000006B0000__0000003B99F7F8A0\n000000067F000040020000A00000006AC000-000000067F000040020000A00000006B0000__0000005D2FFFFB38\n000000067F000040020000A00000006AC8DC-000000067F000040020000A00000006B52A7__000000283C6FE2E9-00000028DC1FE6F1\n000000067F000040020000A00000006B0000-000000067F000040020000A00000006B4000__00000038E1ABFE28\n000000067F000040020000A00000006B0000-000000067F000040020000A00000006B4000__00000038E9AF7F00\n000000067F000040020000A00000006B0000-000000067F000040020000A00000006B4000__0000003903F1CFE8\n000000067F000040020000A00000006B0000-000000067F000040020000A00000006B4000__0000003B99F7F8A0\n000000067F000040020000A00000006B0000-000000067F000040020000A00000006B4000__0000005D2FFFFB38\n000000067F000040020000A00000006B4000-000000067F000040020000A00000006B8000__00000029BBAFEDD8\n000000067F000040020000A00000006B4000-000000067F000040020000A00000006B8000__00000038E9AF7F00\n000000067F000040020000A00000006B4000-000000067F000040020000A00000006B8000__0000003903F1CFE8\n000000067F000040020000A00000006B4000-000000067F000040020000A00000006B8000__0000003B99F7F8A0\n000000067F000040020000A00000006B4000-000000067F000040020000A00000006B8000__0000005D2FFFFB38\n000000067F000040020000A00000006B52A7-000000067F000040020000A0000200000000__000000283C6FE2E9-00000028DC1FE6F1\n000000067F000040020000A00000006B543B-000000067F000040020000A00000006BDDFA__00000028DC1FE6F1-000000298BC7EAE1\n000000067F000040020000A00000006B8000-000000067F000040020000A00000006BC000__00000029BBAFEDD8\n000000067F000040020000A00000006B8000-000000067F000040020000A00000006BC000__00000038E9AF7F00\n000000067F000040020000A00000006B8000-000000067F000040020000A00000006BC000__0000003903F1CFE8\n000000067F000040020000A00000006B8000-000000067F000040020000A00000006BC000__0000003B99F7F8A0\n000000067F000040020000A00000006B8000-000000067F000040020000A00000006BC000__0000005D2FFFFB38\n000000067F000040020000A00000006BC000-000000067F000040020000A00000006C0000__00000029BBAFEDD8\n000000067F000040020000A00000006BC000-000000067F000040020000A00000006C0000__00000038E9AF7F00\n000000067F000040020000A00000006BC000-000000067F000040020000A00000006C0000__0000003903F1CFE8\n000000067F000040020000A00000006BC000-000000067F000040020000A00000006C0000__0000003B99F7F8A0\n000000067F000040020000A00000006BC000-000000067F000040020000A00000006C0000__0000005D2FFFFB38\n000000067F000040020000A00000006BDDFA-000000067F000040020000A00000006C67E9__00000028DC1FE6F1-000000298BC7EAE1\n000000067F000040020000A00000006C0000-000000067F000040020000A00000006C4000__00000029BBAFEDD8\n000000067F000040020000A00000006C0000-000000067F000040020000A00000006C4000__00000038E9AF7F00\n000000067F000040020000A00000006C0000-000000067F000040020000A00000006C4000__0000003903F1CFE8\n000000067F000040020000A00000006C0000-000000067F000040020000A00000006C4000__0000003B99F7F8A0\n000000067F000040020000A00000006C0000-000000067F000040020000A00000006C4000__0000005D2FFFFB38\n000000067F000040020000A00000006C4000-000000067F000040020000A00000006C8000__00000029BBAFEDD8\n000000067F000040020000A00000006C4000-000000067F000040020000A00000006C8000__00000038E9AF7F00\n000000067F000040020000A00000006C4000-000000067F000040020000A00000006C8000__0000003903F1CFE8\n000000067F000040020000A00000006C4000-000000067F000040020000A00000006C8000__0000003B99F7F8A0\n000000067F000040020000A00000006C4000-000000067F000040020000A00000006C8000__0000005D2FFFFB38\n000000067F000040020000A00000006C67E9-000000067F000040020000A00000006CF1D5__00000028DC1FE6F1-000000298BC7EAE1\n000000067F000040020000A00000006C8000-000000067F000040020000A00000006CC000__00000029BBAFEDD8\n000000067F000040020000A00000006C8000-000000067F000040020000A00000006CC000__00000038E9AF7F00\n000000067F000040020000A00000006C8000-000000067F000040020000A00000006CC000__0000003903F1CFE8\n000000067F000040020000A00000006C8000-000000067F000040020000A00000006CC000__0000003B99F7F8A0\n000000067F000040020000A00000006C8000-000000067F000040020000A00000006CC000__0000005D2FFFFB38\n000000067F000040020000A00000006CC000-000000067F000040020000A00000006D0000__00000029BBAFEDD8\n000000067F000040020000A00000006CC000-000000067F000040020000A00000006D0000__00000038E9AF7F00\n000000067F000040020000A00000006CC000-000000067F000040020000A00000006D0000__0000003903F1CFE8\n000000067F000040020000A00000006CC000-000000067F000040020000A00000006D0000__0000003B99F7F8A0\n000000067F000040020000A00000006CC000-000000067F000040020000A00000006D0000__0000005D2FFFFB38\n000000067F000040020000A00000006CF1D5-000000067F000040020000A00000006D7BC5__00000028DC1FE6F1-000000298BC7EAE1\n000000067F000040020000A00000006D0000-000000067F000040020000A00000006D4000__00000029BBAFEDD8\n000000067F000040020000A00000006D0000-000000067F000040020000A00000006D4000__00000038E9AF7F00\n000000067F000040020000A00000006D0000-000000067F000040020000A00000006D4000__0000003903F1CFE8\n000000067F000040020000A00000006D0000-000000067F000040020000A00000006D4000__0000003B99F7F8A0\n000000067F000040020000A00000006D0000-000000067F000040020000A00000006D4000__0000005D2FFFFB38\n000000067F000040020000A00000006D4000-000000067F000040020000A00000006D8000__00000029BBAFEDD8\n000000067F000040020000A00000006D4000-000000067F000040020000A00000006D8000__00000038E9AF7F00\n000000067F000040020000A00000006D4000-000000067F000040020000A00000006D8000__0000003903F1CFE8\n000000067F000040020000A00000006D4000-000000067F000040020000A00000006D8000__0000003B99F7F8A0\n000000067F000040020000A00000006D4000-000000067F000040020000A00000006D8000__0000005D2FFFFB38\n000000067F000040020000A00000006D7BC5-000000067F000040020000A00000006E05B2__00000028DC1FE6F1-000000298BC7EAE1\n000000067F000040020000A00000006D8000-000000067F000040020000A00000006DC000__00000029BBAFEDD8\n000000067F000040020000A00000006D8000-000000067F000040020000A00000006DC000__00000038E9AF7F00\n000000067F000040020000A00000006D8000-000000067F000040020000A00000006DC000__0000003903F1CFE8\n000000067F000040020000A00000006D8000-000000067F000040020000A00000006DC000__0000003B99F7F8A0\n000000067F000040020000A00000006D8000-000000067F000040020000A00000006DC000__0000005D2FFFFB38\n000000067F000040020000A00000006DC000-000000067F000040020000A00000006E0000__00000029BBAFEDD8\n000000067F000040020000A00000006DC000-000000067F000040020000A00000006E0000__00000038E9AF7F00\n000000067F000040020000A00000006DC000-000000067F000040020000A00000006E0000__0000003903F1CFE8\n000000067F000040020000A00000006DC000-000000067F000040020000A00000006E0000__0000003B99F7F8A0\n000000067F000040020000A00000006DC000-000000067F000040020000A00000006E0000__0000005D2FFFFB38\n000000067F000040020000A00000006E0000-000000067F000040020000A00000006E4000__00000029BBAFEDD8\n000000067F000040020000A00000006E0000-000000067F000040020000A00000006E4000__00000038E9AF7F00\n000000067F000040020000A00000006E0000-000000067F000040020000A00000006E4000__0000003903F1CFE8\n000000067F000040020000A00000006E0000-000000067F000040020000A00000006E4000__0000003B99F7F8A0\n000000067F000040020000A00000006E0000-000000067F000040020000A00000006E4000__0000005D2FFFFB38\n000000067F000040020000A00000006E05B2-000000067F000040020000A00000006E8F91__00000028DC1FE6F1-000000298BC7EAE1\n000000067F000040020000A00000006E4000-000000067F000040020000A00000006E8000__00000029BBAFEDD8\n000000067F000040020000A00000006E4000-000000067F000040020000A00000006E8000__00000038E9AF7F00\n000000067F000040020000A00000006E4000-000000067F000040020000A00000006E8000__0000003903F1CFE8\n000000067F000040020000A00000006E4000-000000067F000040020000A00000006E8000__0000003B99F7F8A0\n000000067F000040020000A00000006E4000-000000067F000040020000A00000006E8000__0000005D2FFFFB38\n000000067F000040020000A00000006E8000-000000067F000040020000A00000006EC000__00000029BBAFEDD8\n000000067F000040020000A00000006E8000-000000067F000040020000A00000006EC000__00000038E9AF7F00\n000000067F000040020000A00000006E8000-000000067F000040020000A00000006EC000__0000003903F1CFE8\n000000067F000040020000A00000006E8000-000000067F000040020000A00000006EC000__0000003B99F7F8A0\n000000067F000040020000A00000006E8000-000000067F000040020000A00000006EC000__0000005D2FFFFB38\n000000067F000040020000A00000006E8F91-000000067F000040020000A00000006F195B__00000028DC1FE6F1-000000298BC7EAE1\n000000067F000040020000A00000006EC000-000000067F000040020000A00000006F0000__00000029BBAFEDD8\n000000067F000040020000A00000006EC000-000000067F000040020000A00000006F0000__00000038E9AF7F00\n000000067F000040020000A00000006EC000-000000067F000040020000A00000006F0000__0000003903F1CFE8\n000000067F000040020000A00000006EC000-000000067F000040020000A00000006F0000__0000003B99F7F8A0\n000000067F000040020000A00000006EC000-000000067F000040020000A00000006F0000__0000005D2FFFFB38\n000000067F000040020000A00000006F0000-000000067F000040020000A00000006F4000__00000029BBAFEDD8\n000000067F000040020000A00000006F0000-000000067F000040020000A00000006F4000__00000038E9AF7F00\n000000067F000040020000A00000006F0000-000000067F000040020000A00000006F4000__0000003903F1CFE8\n000000067F000040020000A00000006F0000-000000067F000040020000A00000006F4000__0000003B99F7F8A0\n000000067F000040020000A00000006F0000-000000067F000040020000A00000006F4000__0000005D2FFFFB38\n000000067F000040020000A00000006F195B-000000067F000040020000A00000006FA318__00000028DC1FE6F1-000000298BC7EAE1\n000000067F000040020000A00000006F4000-000000067F000040020000A00000006F8000__00000029BBAFEDD8\n000000067F000040020000A00000006F4000-000000067F000040020000A00000006F8000__00000038E9AF7F00\n000000067F000040020000A00000006F4000-000000067F000040020000A00000006F8000__0000003903F1CFE8\n000000067F000040020000A00000006F4000-000000067F000040020000A00000006F8000__0000003B99F7F8A0\n000000067F000040020000A00000006F4000-000000067F000040020000A00000006F8000__0000005D2FFFFB38\n000000067F000040020000A00000006F8000-000000067F000040020000A00000006FC000__00000029BBAFEDD8\n000000067F000040020000A00000006F8000-000000067F000040020000A00000006FC000__00000038E9AF7F00\n000000067F000040020000A00000006F8000-000000067F000040020000A00000006FC000__0000003903F1CFE8\n000000067F000040020000A00000006F8000-000000067F000040020000A00000006FC000__0000003B99F7F8A0\n000000067F000040020000A00000006F8000-000000067F000040020000A00000006FC000__0000005D2FFFFB38\n000000067F000040020000A00000006FA318-000000067F000040020000A0000000702D03__00000028DC1FE6F1-000000298BC7EAE1\n000000067F000040020000A00000006FC000-000000067F000040020000A0000000700000__00000029BBAFEDD8\n000000067F000040020000A00000006FC000-000000067F000040020000A0000000700000__00000038E9AF7F00\n000000067F000040020000A00000006FC000-000000067F000040020000A0000000700000__0000003903F1CFE8\n000000067F000040020000A00000006FC000-000000067F000040020000A0000000700000__0000003B99F7F8A0\n000000067F000040020000A00000006FC000-000000067F000040020000A0000000700000__0000005D2FFFFB38\n000000067F000040020000A0000000700000-000000067F000040020000A0000000704000__00000029BBAFEDD8\n000000067F000040020000A0000000700000-000000067F000040020000A0000000704000__00000038E9AF7F00\n000000067F000040020000A0000000700000-000000067F000040020000A0000000704000__0000003903F1CFE8\n000000067F000040020000A0000000700000-000000067F000040020000A0000000704000__0000003B99F7F8A0\n000000067F000040020000A0000000700000-000000067F000040020000A0000000704000__0000005D2FFFFB38\n000000067F000040020000A0000000702D03-000000067F000040020000A000000070B6E2__00000028DC1FE6F1-000000298BC7EAE1\n000000067F000040020000A0000000704000-000000067F000040020000A0000000708000__00000029BBAFEDD8\n000000067F000040020000A0000000704000-000000067F000040020000A0000000708000__00000038E9AF7F00\n000000067F000040020000A0000000704000-000000067F000040020000A0000000708000__0000003903F1CFE8\n000000067F000040020000A0000000704000-000000067F000040020000A0000000708000__0000003B99F7F8A0\n000000067F000040020000A0000000704000-000000067F000040020000A0000000708000__0000005D2FFFFB38\n000000067F000040020000A0000000708000-000000067F000040020000A000000070C000__00000029BBAFEDD8\n000000067F000040020000A0000000708000-000000067F000040020000A000000070C000__00000038E9AF7F00\n000000067F000040020000A0000000708000-000000067F000040020000A000000070C000__0000003903F1CFE8\n000000067F000040020000A0000000708000-000000067F000040020000A000000070C000__0000003B99F7F8A0\n000000067F000040020000A0000000708000-000000067F000040020000A000000070C000__0000005D2FFFFB38\n000000067F000040020000A000000070B6E2-000000067F000040020000A00000007140D2__00000028DC1FE6F1-000000298BC7EAE1\n000000067F000040020000A000000070C000-000000067F000040020000A0000000710000__00000029BBAFEDD8\n000000067F000040020000A000000070C000-000000067F000040020000A0000000710000__00000038E9AF7F00\n000000067F000040020000A000000070C000-000000067F000040020000A0000000710000__0000003903F1CFE8\n000000067F000040020000A000000070C000-000000067F000040020000A0000000710000__0000003B99F7F8A0\n000000067F000040020000A000000070C000-000000067F000040020000A0000000710000__0000005D2FFFFB38\n000000067F000040020000A0000000710000-000000067F000040020000A0000000714000__00000029BBAFEDD8\n000000067F000040020000A0000000710000-000000067F000040020000A0000000714000__00000038E9AF7F00\n000000067F000040020000A0000000710000-000000067F000040020000A0000000714000__0000003903F1CFE8\n000000067F000040020000A0000000710000-000000067F000040020000A0000000714000__0000003B99F7F8A0\n000000067F000040020000A0000000710000-000000067F000040020000A0000000714000__0000005D2FFFFB38\n000000067F000040020000A0000000714000-000000067F000040020000A0000000718000__00000029BBAFEDD8\n000000067F000040020000A0000000714000-000000067F000040020000A0000000718000__00000038E67ABFA0\n000000067F000040020000A0000000714000-000000067F000040020000A0000000718000__0000003903F1CFE8\n000000067F000040020000A0000000714000-000000067F000040020000A0000000718000__0000003B99F7F8A0\n000000067F000040020000A0000000714000-000000067F000040020000A0000000718000__0000005D2FFFFB38\n000000067F000040020000A00000007140D2-000000067F000040020000A0000200000000__00000028DC1FE6F1-000000298BC7EAE1\n000000067F000040020000A0000000714378-000000067F000040020000A000000071CD56__000000298BC7EAE1-0000002A3B6FD871\n000000067F000040020000A0000000718000-000000067F000040020000A000000071C000__00000029BBAFEDD8\n000000067F000040020000A0000000718000-000000067F000040020000A000000071C000__00000038E67ABFA0\n000000067F000040020000A0000000718000-000000067F000040020000A000000071C000__0000003903F1CFE8\n000000067F000040020000A0000000718000-000000067F000040020000A000000071C000__0000003B99F7F8A0\n000000067F000040020000A0000000718000-000000067F000040020000A000000071C000__0000005D2FFFFB38\n000000067F000040020000A000000071C000-000000067F000040020000A0000000720000__00000029BBAFEDD8\n000000067F000040020000A000000071C000-000000067F000040020000A0000000720000__00000038E67ABFA0\n000000067F000040020000A000000071C000-000000067F000040020000A0000000720000__0000003903F1CFE8\n000000067F000040020000A000000071C000-000000067F000040020000A0000000720000__0000003B99F7F8A0\n000000067F000040020000A000000071C000-000000067F000040020000A0000000720000__0000005D2FFFFB38\n000000067F000040020000A000000071CD56-000000067F000040020000A0000000725723__000000298BC7EAE1-0000002A3B6FD871\n000000067F000040020000A0000000720000-000000067F000040020000A0000000724000__00000029BBAFEDD8\n000000067F000040020000A0000000720000-000000067F000040020000A0000000724000__00000038E67ABFA0\n000000067F000040020000A0000000720000-000000067F000040020000A0000000724000__0000003903F1CFE8\n000000067F000040020000A0000000720000-000000067F000040020000A0000000724000__0000003B99F7F8A0\n000000067F000040020000A0000000720000-000000067F000040020000A0000000724000__0000005D2FFFFB38\n000000067F000040020000A0000000724000-000000067F000040020000A0000000728000__00000029BBAFEDD8\n000000067F000040020000A0000000724000-000000067F000040020000A0000000728000__00000038E67ABFA0\n000000067F000040020000A0000000724000-000000067F000040020000A0000000728000__0000003903F1CFE8\n000000067F000040020000A0000000724000-000000067F000040020000A0000000728000__0000003B99F7F8A0\n000000067F000040020000A0000000724000-000000067F000040020000A0000000728000__0000005D2FFFFB38\n000000067F000040020000A0000000725723-000000067F000040020000A000000072E0E0__000000298BC7EAE1-0000002A3B6FD871\n000000067F000040020000A0000000728000-000000067F000040020000A000000072C000__00000029BBAFEDD8\n000000067F000040020000A0000000728000-000000067F000040020000A000000072C000__00000038E67ABFA0\n000000067F000040020000A0000000728000-000000067F000040020000A000000072C000__0000003903F1CFE8\n000000067F000040020000A0000000728000-000000067F000040020000A000000072C000__0000003B99F7F8A0\n000000067F000040020000A0000000728000-000000067F000040020000A000000072C000__0000005D2FFFFB38\n000000067F000040020000A000000072C000-000000067F000040020000A0000000730000__00000038E67ABFA0\n000000067F000040020000A000000072C000-000000067F000040020000A0000000730000__0000003903F1CFE8\n000000067F000040020000A000000072C000-000000067F000040020000A0000000730000__0000003B99F7F8A0\n000000067F000040020000A000000072C000-000000067F000040020000A0000000730000__0000005D2FFFFB38\n000000067F000040020000A000000072C000-030000000000000000000000000000000002__00000029BBAFEDD8\n000000067F000040020000A000000072E0E0-000000067F000040020000A0000000736AB6__000000298BC7EAE1-0000002A3B6FD871\n000000067F000040020000A0000000730000-000000067F000040020000A0000000734000__00000038E67ABFA0\n000000067F000040020000A0000000730000-000000067F000040020000A0000000734000__0000003903F1CFE8\n000000067F000040020000A0000000730000-000000067F000040020000A0000000734000__0000003B99F7F8A0\n000000067F000040020000A0000000730000-000000067F000040020000A0000000734000__0000005D2FFFFB38\n000000067F000040020000A0000000734000-000000067F000040020000A0000000738000__00000038E67ABFA0\n000000067F000040020000A0000000734000-000000067F000040020000A0000000738000__0000003903F1CFE8\n000000067F000040020000A0000000734000-000000067F000040020000A0000000738000__0000003B99F7F8A0\n000000067F000040020000A0000000734000-000000067F000040020000A0000000738000__0000005D2FFFFB38\n000000067F000040020000A0000000736AB6-000000067F000040020000A000000073F4A6__000000298BC7EAE1-0000002A3B6FD871\n000000067F000040020000A0000000738000-000000067F000040020000A000000073C000__00000038E67ABFA0\n000000067F000040020000A0000000738000-000000067F000040020000A000000073C000__0000003903F1CFE8\n000000067F000040020000A0000000738000-000000067F000040020000A000000073C000__0000003B99F7F8A0\n000000067F000040020000A0000000738000-000000067F000040020000A000000073C000__0000005D2FFFFB38\n000000067F000040020000A000000073C000-000000067F000040020000A0000000740000__00000038E67ABFA0\n000000067F000040020000A000000073C000-000000067F000040020000A0000000740000__0000003903F1CFE8\n000000067F000040020000A000000073C000-000000067F000040020000A0000000740000__0000003B99F7F8A0\n000000067F000040020000A000000073C000-000000067F000040020000A0000000740000__0000005D2FFFFB38\n000000067F000040020000A000000073F4A6-000000067F000040020000A0000000747E87__000000298BC7EAE1-0000002A3B6FD871\n000000067F000040020000A0000000740000-000000067F000040020000A0000000744000__00000038E67ABFA0\n000000067F000040020000A0000000740000-000000067F000040020000A0000000744000__0000003903F1CFE8\n000000067F000040020000A0000000740000-000000067F000040020000A0000000744000__0000003B99F7F8A0\n000000067F000040020000A0000000740000-000000067F000040020000A0000000744000__0000005D2FFFFB38\n000000067F000040020000A0000000744000-000000067F000040020000A0000000748000__00000038E67ABFA0\n000000067F000040020000A0000000744000-000000067F000040020000A0000000748000__0000003903F1CFE8\n000000067F000040020000A0000000744000-000000067F000040020000A0000000748000__0000003B99F7F8A0\n000000067F000040020000A0000000744000-000000067F000040020000A0000000748000__0000005D2FFFFB38\n000000067F000040020000A0000000747E87-000000067F000040020000A0000000750874__000000298BC7EAE1-0000002A3B6FD871\n000000067F000040020000A0000000748000-000000067F000040020000A000000074C000__00000038E67ABFA0\n000000067F000040020000A0000000748000-000000067F000040020000A000000074C000__0000003903F1CFE8\n000000067F000040020000A0000000748000-000000067F000040020000A000000074C000__0000003B99F7F8A0\n000000067F000040020000A0000000748000-000000067F000040020000A000000074C000__0000005D2FFFFB38\n000000067F000040020000A000000074C000-000000067F000040020000A0000000750000__00000038E67ABFA0\n000000067F000040020000A000000074C000-000000067F000040020000A0000000750000__0000003903F1CFE8\n000000067F000040020000A000000074C000-000000067F000040020000A0000000750000__0000003B99F7F8A0\n000000067F000040020000A000000074C000-000000067F000040020000A0000000750000__0000005D2FFFFB38\n000000067F000040020000A0000000750000-000000067F000040020000A0000000754000__00000038E67ABFA0\n000000067F000040020000A0000000750000-000000067F000040020000A0000000754000__0000003903F1CFE8\n000000067F000040020000A0000000750000-000000067F000040020000A0000000754000__0000003B99F7F8A0\n000000067F000040020000A0000000750000-000000067F000040020000A0000000754000__0000005D2FFFFB38\n000000067F000040020000A0000000750874-000000067F000040020000A0000000759257__000000298BC7EAE1-0000002A3B6FD871\n000000067F000040020000A0000000754000-000000067F000040020000A0000000758000__00000038E67ABFA0\n000000067F000040020000A0000000754000-000000067F000040020000A0000000758000__0000003903F1CFE8\n000000067F000040020000A0000000754000-000000067F000040020000A0000000758000__0000003B99F7F8A0\n000000067F000040020000A0000000754000-000000067F000040020000A0000000758000__0000005D2FFFFB38\n000000067F000040020000A0000000758000-000000067F000040020000A000000075C000__00000038E67ABFA0\n000000067F000040020000A0000000758000-000000067F000040020000A000000075C000__0000003903F1CFE8\n000000067F000040020000A0000000758000-000000067F000040020000A000000075C000__0000003B99F7F8A0\n000000067F000040020000A0000000758000-000000067F000040020000A000000075C000__0000005D2FFFFB38\n000000067F000040020000A0000000759257-000000067F000040020000A0000000761C22__000000298BC7EAE1-0000002A3B6FD871\n000000067F000040020000A000000075C000-000000067F000040020000A0000000760000__00000038E67ABFA0\n000000067F000040020000A000000075C000-000000067F000040020000A0000000760000__0000003903F1CFE8\n000000067F000040020000A000000075C000-000000067F000040020000A0000000760000__0000003B99F7F8A0\n000000067F000040020000A000000075C000-000000067F000040020000A0000000760000__0000005D2FFFFB38\n000000067F000040020000A0000000760000-000000067F000040020000A0000000764000__00000038E67ABFA0\n000000067F000040020000A0000000760000-000000067F000040020000A0000000764000__0000003903F1CFE8\n000000067F000040020000A0000000760000-000000067F000040020000A0000000764000__0000003B99F7F8A0\n000000067F000040020000A0000000760000-000000067F000040020000A0000000764000__0000005D2FFFFB38\n000000067F000040020000A0000000761C22-000000067F000040020000A000000076A5F3__000000298BC7EAE1-0000002A3B6FD871\n000000067F000040020000A0000000764000-000000067F000040020000A0000000768000__00000038E67ABFA0\n000000067F000040020000A0000000764000-000000067F000040020000A0000000768000__0000003903F1CFE8\n000000067F000040020000A0000000764000-000000067F000040020000A0000000768000__0000003B99F7F8A0\n000000067F000040020000A0000000764000-000000067F000040020000A0000000768000__0000005D2FFFFB38\n000000067F000040020000A0000000768000-000000067F000040020000A000000076C000__00000038E67ABFA0\n000000067F000040020000A0000000768000-000000067F000040020000A000000076C000__0000003903F1CFE8\n000000067F000040020000A0000000768000-000000067F000040020000A000000076C000__0000003B99F7F8A0\n000000067F000040020000A0000000768000-000000067F000040020000A000000076C000__0000005D2FFFFB38\n000000067F000040020000A000000076A5F3-000000067F000040020000A0000000772FD6__000000298BC7EAE1-0000002A3B6FD871\n000000067F000040020000A000000076C000-000000067F000040020000A0000000770000__00000038E67ABFA0\n000000067F000040020000A000000076C000-000000067F000040020000A0000000770000__0000003903F1CFE8\n000000067F000040020000A000000076C000-000000067F000040020000A0000000770000__0000003B99F7F8A0\n000000067F000040020000A000000076C000-000000067F000040020000A0000000770000__0000005D2FFFFB38\n000000067F000040020000A0000000770000-000000067F000040020000A0000000774000__00000038E1ABFE28\n000000067F000040020000A0000000770000-000000067F000040020000A0000000774000__00000038E9AF7F00\n000000067F000040020000A0000000770000-000000067F000040020000A0000000774000__0000003903F1CFE8\n000000067F000040020000A0000000770000-000000067F000040020000A0000000774000__0000003B99F7F8A0\n000000067F000040020000A0000000770000-000000067F000040020000A0000000774000__0000005D2FFFFB38\n000000067F000040020000A0000000772FD6-000000067F000040020000A0000200000000__000000298BC7EAE1-0000002A3B6FD871\n000000067F000040020000A00000007731C3-000000067F000040020000A000000077BBA6__0000002A3B6FD871-0000002ADB1FF0A9\n000000067F000040020000A0000000774000-000000067F000040020000A0000000778000__00000038E1ABFE28\n000000067F000040020000A0000000774000-000000067F000040020000A0000000778000__00000038E9AF7F00\n000000067F000040020000A0000000774000-000000067F000040020000A0000000778000__0000003903F1CFE8\n000000067F000040020000A0000000774000-000000067F000040020000A0000000778000__0000003B99F7F8A0\n000000067F000040020000A0000000774000-000000067F000040020000A0000000778000__0000005D2FFFFB38\n000000067F000040020000A0000000778000-000000067F000040020000A000000077C000__00000038E1ABFE28\n000000067F000040020000A0000000778000-000000067F000040020000A000000077C000__00000038E9AF7F00\n000000067F000040020000A0000000778000-000000067F000040020000A000000077C000__0000003903F1CFE8\n000000067F000040020000A0000000778000-000000067F000040020000A000000077C000__0000003B99F7F8A0\n000000067F000040020000A0000000778000-000000067F000040020000A000000077C000__0000005D2FFFFB38\n000000067F000040020000A000000077BBA6-000000067F000040020000A0000000784582__0000002A3B6FD871-0000002ADB1FF0A9\n000000067F000040020000A000000077C000-000000067F000040020000A0000000780000__00000038E1ABFE28\n000000067F000040020000A000000077C000-000000067F000040020000A0000000780000__00000038E9AF7F00\n000000067F000040020000A000000077C000-000000067F000040020000A0000000780000__0000003903F1CFE8\n000000067F000040020000A000000077C000-000000067F000040020000A0000000780000__0000003B99F7F8A0\n000000067F000040020000A000000077C000-000000067F000040020000A0000000780000__0000005D2FFFFB38\n000000067F000040020000A0000000780000-000000067F000040020000A0000000784000__00000038E1ABFE28\n000000067F000040020000A0000000780000-000000067F000040020000A0000000784000__00000038E9AF7F00\n000000067F000040020000A0000000780000-000000067F000040020000A0000000784000__0000003903F1CFE8\n000000067F000040020000A0000000780000-000000067F000040020000A0000000784000__0000003B99F7F8A0\n000000067F000040020000A0000000780000-000000067F000040020000A0000000784000__0000005D2FFFFB38\n000000067F000040020000A0000000784000-000000067F000040020000A0000000788000__00000038E1ABFE28\n000000067F000040020000A0000000784000-000000067F000040020000A0000000788000__00000038E9AF7F00\n000000067F000040020000A0000000784000-000000067F000040020000A0000000788000__0000003903F1CFE8\n000000067F000040020000A0000000784000-000000067F000040020000A0000000788000__0000003B99F7F8A0\n000000067F000040020000A0000000784000-000000067F000040020000A0000000788000__0000005D2FFFFB38\n000000067F000040020000A0000000784582-000000067F000040020000A000000078CF68__0000002A3B6FD871-0000002ADB1FF0A9\n000000067F000040020000A0000000788000-000000067F000040020000A000000078C000__00000038E1ABFE28\n000000067F000040020000A0000000788000-000000067F000040020000A000000078C000__00000038E9AF7F00\n000000067F000040020000A0000000788000-000000067F000040020000A000000078C000__0000003903F1CFE8\n000000067F000040020000A0000000788000-000000067F000040020000A000000078C000__0000003B99F7F8A0\n000000067F000040020000A0000000788000-000000067F000040020000A000000078C000__0000005D2FFFFB38\n000000067F000040020000A000000078C000-000000067F000040020000A0000000790000__00000038E1ABFE28\n000000067F000040020000A000000078C000-000000067F000040020000A0000000790000__00000038E9AF7F00\n000000067F000040020000A000000078C000-000000067F000040020000A0000000790000__0000003903F1CFE8\n000000067F000040020000A000000078C000-000000067F000040020000A0000000790000__0000003B99F7F8A0\n000000067F000040020000A000000078C000-000000067F000040020000A0000000790000__0000005D2FFFFB38\n000000067F000040020000A000000078CF68-000000067F000040020000A0000000795940__0000002A3B6FD871-0000002ADB1FF0A9\n000000067F000040020000A0000000790000-000000067F000040020000A0000000794000__00000038E1ABFE28\n000000067F000040020000A0000000790000-000000067F000040020000A0000000794000__00000038E9AF7F00\n000000067F000040020000A0000000790000-000000067F000040020000A0000000794000__0000003903F1CFE8\n000000067F000040020000A0000000790000-000000067F000040020000A0000000794000__0000003B99F7F8A0\n000000067F000040020000A0000000790000-000000067F000040020000A0000000794000__0000005D2FFFFB38\n000000067F000040020000A0000000794000-000000067F000040020000A0000000798000__00000038E1ABFE28\n000000067F000040020000A0000000794000-000000067F000040020000A0000000798000__00000038E9AF7F00\n000000067F000040020000A0000000794000-000000067F000040020000A0000000798000__0000003903F1CFE8\n000000067F000040020000A0000000794000-000000067F000040020000A0000000798000__0000003B99F7F8A0\n000000067F000040020000A0000000794000-000000067F000040020000A0000000798000__0000005D2FFFFB38\n000000067F000040020000A0000000795940-000000067F000040020000A000000079E314__0000002A3B6FD871-0000002ADB1FF0A9\n000000067F000040020000A0000000798000-000000067F000040020000A000000079C000__00000038E1ABFE28\n000000067F000040020000A0000000798000-000000067F000040020000A000000079C000__00000038E9AF7F00\n000000067F000040020000A0000000798000-000000067F000040020000A000000079C000__0000003903F1CFE8\n000000067F000040020000A0000000798000-000000067F000040020000A000000079C000__0000003B99F7F8A0\n000000067F000040020000A0000000798000-000000067F000040020000A000000079C000__0000005D2FFFFB38\n000000067F000040020000A000000079C000-000000067F000040020000A00000007A0000__00000038E1ABFE28\n000000067F000040020000A000000079C000-000000067F000040020000A00000007A0000__00000038E9AF7F00\n000000067F000040020000A000000079C000-000000067F000040020000A00000007A0000__0000003903F1CFE8\n000000067F000040020000A000000079C000-000000067F000040020000A00000007A0000__0000003B99F7F8A0\n000000067F000040020000A000000079C000-000000067F000040020000A00000007A0000__0000005D2FFFFB38\n000000067F000040020000A000000079E314-000000067F000040020000A00000007A6CDE__0000002A3B6FD871-0000002ADB1FF0A9\n000000067F000040020000A00000007A0000-000000067F000040020000A00000007A4000__00000038E1ABFE28\n000000067F000040020000A00000007A0000-000000067F000040020000A00000007A4000__00000038E9AF7F00\n000000067F000040020000A00000007A0000-000000067F000040020000A00000007A4000__0000003903F1CFE8\n000000067F000040020000A00000007A0000-000000067F000040020000A00000007A4000__0000003B99F7F8A0\n000000067F000040020000A00000007A0000-000000067F000040020000A00000007A4000__0000005D2FFFFB38\n000000067F000040020000A00000007A4000-000000067F000040020000A00000007A8000__00000038E1ABFE28\n000000067F000040020000A00000007A4000-000000067F000040020000A00000007A8000__00000038E9AF7F00\n000000067F000040020000A00000007A4000-000000067F000040020000A00000007A8000__0000003903F1CFE8\n000000067F000040020000A00000007A4000-000000067F000040020000A00000007A8000__0000003B99F7F8A0\n000000067F000040020000A00000007A4000-000000067F000040020000A00000007A8000__0000005D2FFFFB38\n000000067F000040020000A00000007A6CDE-000000067F000040020000A00000007AF6C2__0000002A3B6FD871-0000002ADB1FF0A9\n000000067F000040020000A00000007A8000-000000067F000040020000A00000007AC000__00000038E1ABFE28\n000000067F000040020000A00000007A8000-000000067F000040020000A00000007AC000__00000038E9AF7F00\n000000067F000040020000A00000007A8000-000000067F000040020000A00000007AC000__0000003903F1CFE8\n000000067F000040020000A00000007A8000-000000067F000040020000A00000007AC000__0000003B99F7F8A0\n000000067F000040020000A00000007A8000-000000067F000040020000A00000007AC000__0000005D2FFFFB38\n000000067F000040020000A00000007AC000-000000067F000040020000A00000007B0000__00000038E1ABFE28\n000000067F000040020000A00000007AC000-000000067F000040020000A00000007B0000__00000038E9AF7F00\n000000067F000040020000A00000007AC000-000000067F000040020000A00000007B0000__0000003903F1CFE8\n000000067F000040020000A00000007AC000-000000067F000040020000A00000007B0000__0000003B99F7F8A0\n000000067F000040020000A00000007AC000-000000067F000040020000A00000007B0000__0000005D2FFFFB38\n000000067F000040020000A00000007AF6C2-000000067F000040020000A00000007B8090__0000002A3B6FD871-0000002ADB1FF0A9\n000000067F000040020000A00000007B0000-000000067F000040020000A00000007B4000__00000038E1ABFE28\n000000067F000040020000A00000007B0000-000000067F000040020000A00000007B4000__00000038E9AF7F00\n000000067F000040020000A00000007B0000-000000067F000040020000A00000007B4000__0000003903F1CFE8\n000000067F000040020000A00000007B0000-000000067F000040020000A00000007B4000__0000003B99F7F8A0\n000000067F000040020000A00000007B0000-000000067F000040020000A00000007B4000__0000005D2FFFFB38\n000000067F000040020000A00000007B4000-000000067F000040020000A00000007B8000__00000038E1ABFE28\n000000067F000040020000A00000007B4000-000000067F000040020000A00000007B8000__00000038E9AF7F00\n000000067F000040020000A00000007B4000-000000067F000040020000A00000007B8000__0000003903F1CFE8\n000000067F000040020000A00000007B4000-000000067F000040020000A00000007B8000__0000003B99F7F8A0\n000000067F000040020000A00000007B4000-000000067F000040020000A00000007B8000__0000005D2FFFFB38\n000000067F000040020000A00000007B8000-000000067F000040020000A00000007BC000__00000038E1ABFE28\n000000067F000040020000A00000007B8000-000000067F000040020000A00000007BC000__00000038E9AF7F00\n000000067F000040020000A00000007B8000-000000067F000040020000A00000007BC000__0000003903F1CFE8\n000000067F000040020000A00000007B8000-000000067F000040020000A00000007BC000__0000003B99F7F8A0\n000000067F000040020000A00000007B8000-000000067F000040020000A00000007BC000__0000005D2FFFFB38\n000000067F000040020000A00000007B8090-000000067F000040020000A00000007C0A77__0000002A3B6FD871-0000002ADB1FF0A9\n000000067F000040020000A00000007BC000-000000067F000040020000A00000007C0000__00000038E1ABFE28\n000000067F000040020000A00000007BC000-000000067F000040020000A00000007C0000__00000038E9AF7F00\n000000067F000040020000A00000007BC000-000000067F000040020000A00000007C0000__0000003903F1CFE8\n000000067F000040020000A00000007BC000-000000067F000040020000A00000007C0000__0000003B99F7F8A0\n000000067F000040020000A00000007BC000-000000067F000040020000A00000007C0000__0000005D2FFFFB38\n000000067F000040020000A00000007C0000-000000067F000040020000A00000007C4000__00000038E1ABFE28\n000000067F000040020000A00000007C0000-000000067F000040020000A00000007C4000__00000038E9AF7F00\n000000067F000040020000A00000007C0000-000000067F000040020000A00000007C4000__0000003903F1CFE8\n000000067F000040020000A00000007C0000-000000067F000040020000A00000007C4000__0000003B99F7F8A0\n000000067F000040020000A00000007C0000-000000067F000040020000A00000007C4000__0000005D2FFFFB38\n000000067F000040020000A00000007C0A77-000000067F000040020000A00000007C945A__0000002A3B6FD871-0000002ADB1FF0A9\n000000067F000040020000A00000007C4000-000000067F000040020000A00000007C8000__00000038E1ABFE28\n000000067F000040020000A00000007C4000-000000067F000040020000A00000007C8000__00000038E9AF7F00\n000000067F000040020000A00000007C4000-000000067F000040020000A00000007C8000__0000003903F1CFE8\n000000067F000040020000A00000007C4000-000000067F000040020000A00000007C8000__0000003B99F7F8A0\n000000067F000040020000A00000007C4000-000000067F000040020000A00000007C8000__0000005D2FFFFB38\n000000067F000040020000A00000007C8000-000000067F000040020000A00000007CC000__0000002BAAB7E320\n000000067F000040020000A00000007C8000-000000067F000040020000A00000007CC000__00000038E9AF7F00\n000000067F000040020000A00000007C8000-000000067F000040020000A00000007CC000__0000003903F1CFE8\n000000067F000040020000A00000007C8000-000000067F000040020000A00000007CC000__0000003B99F7F8A0\n000000067F000040020000A00000007C8000-000000067F000040020000A00000007CC000__0000005D2FFFFB38\n000000067F000040020000A00000007C945A-000000067F000040020000A0000200000000__0000002A3B6FD871-0000002ADB1FF0A9\n000000067F000040020000A00000007C96D4-000000067F000040020000A00000007D20BA__0000002ADB1FF0A9-0000002B7ACFE3E1\n000000067F000040020000A00000007CC000-000000067F000040020000A00000007D0000__0000002BAAB7E320\n000000067F000040020000A00000007CC000-000000067F000040020000A00000007D0000__00000038E9AF7F00\n000000067F000040020000A00000007CC000-000000067F000040020000A00000007D0000__0000003903F1CFE8\n000000067F000040020000A00000007CC000-000000067F000040020000A00000007D0000__0000003B99F7F8A0\n000000067F000040020000A00000007CC000-000000067F000040020000A00000007D0000__0000005D2FFFFB38\n000000067F000040020000A00000007D0000-000000067F000040020000A00000007D4000__0000002BAAB7E320\n000000067F000040020000A00000007D0000-000000067F000040020000A00000007D4000__00000038E9AF7F00\n000000067F000040020000A00000007D0000-000000067F000040020000A00000007D4000__0000003903F1CFE8\n000000067F000040020000A00000007D0000-000000067F000040020000A00000007D4000__0000003B99F7F8A0\n000000067F000040020000A00000007D0000-000000067F000040020000A00000007D4000__0000005D2FFFFB38\n000000067F000040020000A00000007D20BA-000000067F000040020000A00000007DAA9B__0000002ADB1FF0A9-0000002B7ACFE3E1\n000000067F000040020000A00000007D4000-000000067F000040020000A00000007D8000__0000002BAAB7E320\n000000067F000040020000A00000007D4000-000000067F000040020000A00000007D8000__00000038E9AF7F00\n000000067F000040020000A00000007D4000-000000067F000040020000A00000007D8000__0000003903F1CFE8\n000000067F000040020000A00000007D4000-000000067F000040020000A00000007D8000__0000003B99F7F8A0\n000000067F000040020000A00000007D4000-000000067F000040020000A00000007D8000__0000005D2FFFFB38\n000000067F000040020000A00000007D8000-000000067F000040020000A00000007DC000__0000002BAAB7E320\n000000067F000040020000A00000007D8000-000000067F000040020000A00000007DC000__00000038E9AF7F00\n000000067F000040020000A00000007D8000-000000067F000040020000A00000007DC000__0000003903F1CFE8\n000000067F000040020000A00000007D8000-000000067F000040020000A00000007DC000__0000003B99F7F8A0\n000000067F000040020000A00000007D8000-000000067F000040020000A00000007DC000__0000005D2FFFFB38\n000000067F000040020000A00000007DAA9B-000000067F000040020000A00000007E3486__0000002ADB1FF0A9-0000002B7ACFE3E1\n000000067F000040020000A00000007DC000-000000067F000040020000A00000007E0000__0000002BAAB7E320\n000000067F000040020000A00000007DC000-000000067F000040020000A00000007E0000__00000038E9AF7F00\n000000067F000040020000A00000007DC000-000000067F000040020000A00000007E0000__0000003903F1CFE8\n000000067F000040020000A00000007DC000-000000067F000040020000A00000007E0000__0000003B99F7F8A0\n000000067F000040020000A00000007DC000-000000067F000040020000A00000007E0000__0000005D2FFFFB38\n000000067F000040020000A00000007E0000-000000067F000040020000A00000007E4000__0000002BAAB7E320\n000000067F000040020000A00000007E0000-000000067F000040020000A00000007E4000__00000038E9AF7F00\n000000067F000040020000A00000007E0000-000000067F000040020000A00000007E4000__0000003903F1CFE8\n000000067F000040020000A00000007E0000-000000067F000040020000A00000007E4000__0000003B99F7F8A0\n000000067F000040020000A00000007E0000-000000067F000040020000A00000007E4000__0000005D2FFFFB38\n000000067F000040020000A00000007E3486-000000067F000040020000A00000007EBE5F__0000002ADB1FF0A9-0000002B7ACFE3E1\n000000067F000040020000A00000007E4000-000000067F000040020000A00000007E8000__0000002BAAB7E320\n000000067F000040020000A00000007E4000-000000067F000040020000A00000007E8000__00000038E9AF7F00\n000000067F000040020000A00000007E4000-000000067F000040020000A00000007E8000__0000003903F1CFE8\n000000067F000040020000A00000007E4000-000000067F000040020000A00000007E8000__0000003B99F7F8A0\n000000067F000040020000A00000007E4000-000000067F000040020000A00000007E8000__0000005D2FFFFB38\n000000067F000040020000A00000007E8000-000000067F000040020000A00000007EC000__0000002BAAB7E320\n000000067F000040020000A00000007E8000-000000067F000040020000A00000007EC000__00000038E9AF7F00\n000000067F000040020000A00000007E8000-000000067F000040020000A00000007EC000__0000003903F1CFE8\n000000067F000040020000A00000007E8000-000000067F000040020000A00000007EC000__0000003B99F7F8A0\n000000067F000040020000A00000007E8000-000000067F000040020000A00000007EC000__0000005D2FFFFB38\n000000067F000040020000A00000007EBE5F-000000067F000040020000A00000007F4836__0000002ADB1FF0A9-0000002B7ACFE3E1\n000000067F000040020000A00000007EC000-000000067F000040020000A00000007F0000__0000002BAAB7E320\n000000067F000040020000A00000007EC000-000000067F000040020000A00000007F0000__00000038E9AF7F00\n000000067F000040020000A00000007EC000-000000067F000040020000A00000007F0000__0000003903F1CFE8\n000000067F000040020000A00000007EC000-000000067F000040020000A00000007F0000__0000003B99F7F8A0\n000000067F000040020000A00000007EC000-000000067F000040020000A00000007F0000__0000005D2FFFFB38\n000000067F000040020000A00000007F0000-000000067F000040020000A00000007F4000__0000002BAAB7E320\n000000067F000040020000A00000007F0000-000000067F000040020000A00000007F4000__00000038E9AF7F00\n000000067F000040020000A00000007F0000-000000067F000040020000A00000007F4000__0000003903F1CFE8\n000000067F000040020000A00000007F0000-000000067F000040020000A00000007F4000__0000003B99F7F8A0\n000000067F000040020000A00000007F0000-000000067F000040020000A00000007F4000__0000005D2FFFFB38\n000000067F000040020000A00000007F4000-000000067F000040020000A00000007F8000__0000002BAAB7E320\n000000067F000040020000A00000007F4000-000000067F000040020000A00000007F8000__00000038E9AF7F00\n000000067F000040020000A00000007F4000-000000067F000040020000A00000007F8000__0000003903F1CFE8\n000000067F000040020000A00000007F4000-000000067F000040020000A00000007F8000__0000003B99F7F8A0\n000000067F000040020000A00000007F4000-000000067F000040020000A00000007F8000__0000005D2FFFFB38\n000000067F000040020000A00000007F4836-000000067F000040020000A00000007FD216__0000002ADB1FF0A9-0000002B7ACFE3E1\n000000067F000040020000A00000007F8000-000000067F000040020000A00000007FC000__0000002BAAB7E320\n000000067F000040020000A00000007F8000-000000067F000040020000A00000007FC000__00000038E9AF7F00\n000000067F000040020000A00000007F8000-000000067F000040020000A00000007FC000__0000003903F1CFE8\n000000067F000040020000A00000007F8000-000000067F000040020000A00000007FC000__0000003B99F7F8A0\n000000067F000040020000A00000007F8000-000000067F000040020000A00000007FC000__0000005D2FFFFB38\n000000067F000040020000A00000007FC000-000000067F000040020000A0000000800000__0000002BAAB7E320\n000000067F000040020000A00000007FC000-000000067F000040020000A0000000800000__00000038E9AF7F00\n000000067F000040020000A00000007FC000-000000067F000040020000A0000000800000__0000003903F1CFE8\n000000067F000040020000A00000007FC000-000000067F000040020000A0000000800000__0000003B99F7F8A0\n000000067F000040020000A00000007FC000-000000067F000040020000A0000000800000__0000005D2FFFFB38\n000000067F000040020000A00000007FD216-000000067F000040020000A0000000805BEF__0000002ADB1FF0A9-0000002B7ACFE3E1\n000000067F000040020000A0000000800000-000000067F000040020000A0000000804000__0000002BAAB7E320\n000000067F000040020000A0000000800000-000000067F000040020000A0000000804000__00000038E9AF7F00\n000000067F000040020000A0000000800000-000000067F000040020000A0000000804000__0000003903F1CFE8\n000000067F000040020000A0000000800000-000000067F000040020000A0000000804000__0000003B99F7F8A0\n000000067F000040020000A0000000800000-000000067F000040020000A0000000804000__0000005D2FFFFB38\n000000067F000040020000A0000000804000-000000067F000040020000A0000000808000__0000002BAAB7E320\n000000067F000040020000A0000000804000-000000067F000040020000A0000000808000__00000038E9AF7F00\n000000067F000040020000A0000000804000-000000067F000040020000A0000000808000__0000003903F1CFE8\n000000067F000040020000A0000000804000-000000067F000040020000A0000000808000__0000003B99F7F8A0\n000000067F000040020000A0000000804000-000000067F000040020000A0000000808000__0000005D2FFFFB38\n000000067F000040020000A0000000805BEF-000000067F000040020000A000000080E5CA__0000002ADB1FF0A9-0000002B7ACFE3E1\n000000067F000040020000A0000000808000-000000067F000040020000A000000080C000__0000002BAAB7E320\n000000067F000040020000A0000000808000-000000067F000040020000A000000080C000__00000038E9AF7F00\n000000067F000040020000A0000000808000-000000067F000040020000A000000080C000__0000003903F1CFE8\n000000067F000040020000A0000000808000-000000067F000040020000A000000080C000__0000003B99F7F8A0\n000000067F000040020000A0000000808000-000000067F000040020000A000000080C000__0000005D2FFFFB38\n000000067F000040020000A000000080C000-000000067F000040020000A0000000810000__0000002BAAB7E320\n000000067F000040020000A000000080C000-000000067F000040020000A0000000810000__00000038E9AF7F00\n000000067F000040020000A000000080C000-000000067F000040020000A0000000810000__0000003903F1CFE8\n000000067F000040020000A000000080C000-000000067F000040020000A0000000810000__0000003B99F7F8A0\n000000067F000040020000A000000080C000-000000067F000040020000A0000000810000__0000005D2FFFFB38\n000000067F000040020000A000000080E5CA-000000067F000040020000A0000000816FB0__0000002ADB1FF0A9-0000002B7ACFE3E1\n000000067F000040020000A0000000810000-000000067F000040020000A0000000814000__0000002BAAB7E320\n000000067F000040020000A0000000810000-000000067F000040020000A0000000814000__00000038E9AF7F00\n000000067F000040020000A0000000810000-000000067F000040020000A0000000814000__0000003903F1CFE8\n000000067F000040020000A0000000810000-000000067F000040020000A0000000814000__0000003B99F7F8A0\n000000067F000040020000A0000000810000-000000067F000040020000A0000000814000__0000005D2FFFFB38\n000000067F000040020000A0000000814000-000000067F000040020000A0000000818000__0000002BAAB7E320\n000000067F000040020000A0000000814000-000000067F000040020000A0000000818000__00000038E9AF7F00\n000000067F000040020000A0000000814000-000000067F000040020000A0000000818000__0000003903F1CFE8\n000000067F000040020000A0000000814000-000000067F000040020000A0000000818000__0000003B99F7F8A0\n000000067F000040020000A0000000814000-000000067F000040020000A0000000818000__0000005D2FFFFB38\n000000067F000040020000A0000000816FB0-000000067F000040020000A000000081F994__0000002ADB1FF0A9-0000002B7ACFE3E1\n000000067F000040020000A0000000818000-000000067F000040020000A000000081C000__0000002BAAB7E320\n000000067F000040020000A0000000818000-000000067F000040020000A000000081C000__00000038E9AF7F00\n000000067F000040020000A0000000818000-000000067F000040020000A000000081C000__0000003903F1CFE8\n000000067F000040020000A0000000818000-000000067F000040020000A000000081C000__0000003B99F7F8A0\n000000067F000040020000A0000000818000-000000067F000040020000A000000081C000__0000005D2FFFFB38\n000000067F000040020000A000000081C000-000000067F000040020000A0000000820000__0000002BAAB7E320\n000000067F000040020000A000000081C000-000000067F000040020000A0000000820000__00000038E67ABFA0\n000000067F000040020000A000000081C000-000000067F000040020000A0000000820000__0000003903F1CFE8\n000000067F000040020000A000000081C000-000000067F000040020000A0000000820000__0000003B99F7F8A0\n000000067F000040020000A000000081C000-000000067F000040020000A0000000820000__0000005D2FFFFB38\n000000067F000040020000A000000081F994-000000067F000040020000A0000200000000__0000002ADB1FF0A9-0000002B7ACFE3E1\n000000067F000040020000A000000081FB32-000000067F000040020000A0000000828506__0000002B7ACFE3E1-0000002C1A7DEAD1\n000000067F000040020000A0000000820000-000000067F000040020000A0000000824000__0000002BAAB7E320\n000000067F000040020000A0000000820000-000000067F000040020000A0000000824000__00000038E67ABFA0\n000000067F000040020000A0000000820000-000000067F000040020000A0000000824000__0000003903F1CFE8\n000000067F000040020000A0000000820000-000000067F000040020000A0000000824000__0000003B99F7F8A0\n000000067F000040020000A0000000820000-000000067F000040020000A0000000824000__0000005D2FFFFB38\n000000067F000040020000A0000000824000-000000067F000040020000A0000000828000__0000002BAAB7E320\n000000067F000040020000A0000000824000-000000067F000040020000A0000000828000__00000038E67ABFA0\n000000067F000040020000A0000000824000-000000067F000040020000A0000000828000__0000003903F1CFE8\n000000067F000040020000A0000000824000-000000067F000040020000A0000000828000__0000003B99F7F8A0\n000000067F000040020000A0000000824000-000000067F000040020000A0000000828000__0000005D2FFFFB38\n000000067F000040020000A0000000828000-000000067F000040020000A000000082C000__0000002BAAB7E320\n000000067F000040020000A0000000828000-000000067F000040020000A000000082C000__00000038E67ABFA0\n000000067F000040020000A0000000828000-000000067F000040020000A000000082C000__0000003903F1CFE8\n000000067F000040020000A0000000828000-000000067F000040020000A000000082C000__0000003B99F7F8A0\n000000067F000040020000A0000000828000-000000067F000040020000A000000082C000__0000005D2FFFFB38\n000000067F000040020000A0000000828506-000000067F000040020000A0000000830EDA__0000002B7ACFE3E1-0000002C1A7DEAD1\n000000067F000040020000A000000082C000-000000067F000040020000A0000000830000__0000002BAAB7E320\n000000067F000040020000A000000082C000-000000067F000040020000A0000000830000__00000038E67ABFA0\n000000067F000040020000A000000082C000-000000067F000040020000A0000000830000__0000003903F1CFE8\n000000067F000040020000A000000082C000-000000067F000040020000A0000000830000__0000003B99F7F8A0\n000000067F000040020000A000000082C000-000000067F000040020000A0000000830000__0000005D2FFFFB38\n000000067F000040020000A0000000830000-000000067F000040020000A0000000834000__0000002BAAB7E320\n000000067F000040020000A0000000830000-000000067F000040020000A0000000834000__00000038E67ABFA0\n000000067F000040020000A0000000830000-000000067F000040020000A0000000834000__0000003903F1CFE8\n000000067F000040020000A0000000830000-000000067F000040020000A0000000834000__0000003B99F7F8A0\n000000067F000040020000A0000000830000-000000067F000040020000A0000000834000__0000005D2FFFFB38\n000000067F000040020000A0000000830EDA-000000067F000040020000A00000008398DB__0000002B7ACFE3E1-0000002C1A7DEAD1\n000000067F000040020000A0000000834000-000000067F000040020000A0000000838000__0000002BAAB7E320\n000000067F000040020000A0000000834000-000000067F000040020000A0000000838000__00000038E67ABFA0\n000000067F000040020000A0000000834000-000000067F000040020000A0000000838000__0000003903F1CFE8\n000000067F000040020000A0000000834000-000000067F000040020000A0000000838000__0000003B99F7F8A0\n000000067F000040020000A0000000834000-000000067F000040020000A0000000838000__0000005D2FFFFB38\n000000067F000040020000A0000000838000-000000067F000040020000A000000083C000__00000038E67ABFA0\n000000067F000040020000A0000000838000-000000067F000040020000A000000083C000__0000003903F1CFE8\n000000067F000040020000A0000000838000-000000067F000040020000A000000083C000__0000003B99F7F8A0\n000000067F000040020000A0000000838000-000000067F000040020000A000000083C000__0000005D2FFFFB38\n000000067F000040020000A0000000838000-030000000000000000000000000000000002__0000002BAAB7E320\n000000067F000040020000A00000008398DB-000000067F000040020000A00000008422C1__0000002B7ACFE3E1-0000002C1A7DEAD1\n000000067F000040020000A000000083C000-000000067F000040020000A0000000840000__00000038E67ABFA0\n000000067F000040020000A000000083C000-000000067F000040020000A0000000840000__0000003903F1CFE8\n000000067F000040020000A000000083C000-000000067F000040020000A0000000840000__0000003B99F7F8A0\n000000067F000040020000A000000083C000-000000067F000040020000A0000000840000__0000005D2FFFFB38\n000000067F000040020000A0000000840000-000000067F000040020000A0000000844000__00000038E67ABFA0\n000000067F000040020000A0000000840000-000000067F000040020000A0000000844000__0000003903F1CFE8\n000000067F000040020000A0000000840000-000000067F000040020000A0000000844000__0000003B99F7F8A0\n000000067F000040020000A0000000840000-000000067F000040020000A0000000844000__0000005D2FFFFB38\n000000067F000040020000A00000008422C1-000000067F000040020000A000000084AC98__0000002B7ACFE3E1-0000002C1A7DEAD1\n000000067F000040020000A0000000844000-000000067F000040020000A0000000848000__00000038E67ABFA0\n000000067F000040020000A0000000844000-000000067F000040020000A0000000848000__0000003903F1CFE8\n000000067F000040020000A0000000844000-000000067F000040020000A0000000848000__0000003B99F7F8A0\n000000067F000040020000A0000000844000-000000067F000040020000A0000000848000__0000005D2FFFFB38\n000000067F000040020000A0000000848000-000000067F000040020000A000000084C000__00000038E67ABFA0\n000000067F000040020000A0000000848000-000000067F000040020000A000000084C000__0000003903F1CFE8\n000000067F000040020000A0000000848000-000000067F000040020000A000000084C000__0000003B99F7F8A0\n000000067F000040020000A0000000848000-000000067F000040020000A000000084C000__0000005D2FFFFB38\n000000067F000040020000A000000084AC98-000000067F000040020000A000000085367F__0000002B7ACFE3E1-0000002C1A7DEAD1\n000000067F000040020000A000000084C000-000000067F000040020000A0000000850000__00000038E67ABFA0\n000000067F000040020000A000000084C000-000000067F000040020000A0000000850000__0000003903F1CFE8\n000000067F000040020000A000000084C000-000000067F000040020000A0000000850000__0000003B99F7F8A0\n000000067F000040020000A000000084C000-000000067F000040020000A0000000850000__0000005D2FFFFB38\n000000067F000040020000A0000000850000-000000067F000040020000A0000000854000__00000038E67ABFA0\n000000067F000040020000A0000000850000-000000067F000040020000A0000000854000__0000003903F1CFE8\n000000067F000040020000A0000000850000-000000067F000040020000A0000000854000__0000003B99F7F8A0\n000000067F000040020000A0000000850000-000000067F000040020000A0000000854000__0000005D2FFFFB38\n000000067F000040020000A000000085367F-000000067F000040020000A000000085C059__0000002B7ACFE3E1-0000002C1A7DEAD1\n000000067F000040020000A0000000854000-000000067F000040020000A0000000858000__00000038E67ABFA0\n000000067F000040020000A0000000854000-000000067F000040020000A0000000858000__0000003903F1CFE8\n000000067F000040020000A0000000854000-000000067F000040020000A0000000858000__0000003B99F7F8A0\n000000067F000040020000A0000000854000-000000067F000040020000A0000000858000__0000005D2FFFFB38\n000000067F000040020000A0000000858000-000000067F000040020000A000000085C000__00000038E67ABFA0\n000000067F000040020000A0000000858000-000000067F000040020000A000000085C000__0000003903F1CFE8\n000000067F000040020000A0000000858000-000000067F000040020000A000000085C000__0000003B99F7F8A0\n000000067F000040020000A0000000858000-000000067F000040020000A000000085C000__0000005D2FFFFB38\n000000067F000040020000A000000085C000-000000067F000040020000A0000000860000__00000038E67ABFA0\n000000067F000040020000A000000085C000-000000067F000040020000A0000000860000__0000003903F1CFE8\n000000067F000040020000A000000085C000-000000067F000040020000A0000000860000__0000003B99F7F8A0\n000000067F000040020000A000000085C000-000000067F000040020000A0000000860000__0000005D2FFFFB38\n000000067F000040020000A000000085C059-000000067F000040020000A0000000864A25__0000002B7ACFE3E1-0000002C1A7DEAD1\n000000067F000040020000A0000000860000-000000067F000040020000A0000000864000__00000038E67ABFA0\n000000067F000040020000A0000000860000-000000067F000040020000A0000000864000__0000003903F1CFE8\n000000067F000040020000A0000000860000-000000067F000040020000A0000000864000__0000003B99F7F8A0\n000000067F000040020000A0000000860000-000000067F000040020000A0000000864000__0000005D2FFFFB38\n000000067F000040020000A0000000864000-000000067F000040020000A0000000868000__00000038E67ABFA0\n000000067F000040020000A0000000864000-000000067F000040020000A0000000868000__0000003903F1CFE8\n000000067F000040020000A0000000864000-000000067F000040020000A0000000868000__0000003B99F7F8A0\n000000067F000040020000A0000000864000-000000067F000040020000A0000000868000__0000005D2FFFFB38\n000000067F000040020000A0000000864A25-000000067F000040020000A000000086D403__0000002B7ACFE3E1-0000002C1A7DEAD1\n000000067F000040020000A0000000868000-000000067F000040020000A000000086C000__00000038E67ABFA0\n000000067F000040020000A0000000868000-000000067F000040020000A000000086C000__0000003903F1CFE8\n000000067F000040020000A0000000868000-000000067F000040020000A000000086C000__0000003B99F7F8A0\n000000067F000040020000A0000000868000-000000067F000040020000A000000086C000__0000005D2FFFFB38\n000000067F000040020000A000000086C000-000000067F000040020000A0000000870000__00000038E67ABFA0\n000000067F000040020000A000000086C000-000000067F000040020000A0000000870000__0000003903F1CFE8\n000000067F000040020000A000000086C000-000000067F000040020000A0000000870000__0000003B99F7F8A0\n000000067F000040020000A000000086C000-000000067F000040020000A0000000870000__0000005D2FFFFB38\n000000067F000040020000A000000086D403-000000067F000040020000A0000000875DE0__0000002B7ACFE3E1-0000002C1A7DEAD1\n000000067F000040020000A0000000870000-000000067F000040020000A0000000874000__00000038E67ABFA0\n000000067F000040020000A0000000870000-000000067F000040020000A0000000874000__0000003903F1CFE8\n000000067F000040020000A0000000870000-000000067F000040020000A0000000874000__0000003B99F7F8A0\n000000067F000040020000A0000000870000-000000067F000040020000A0000000874000__0000005D2FFFFB38\n000000067F000040020000A0000000874000-000000067F000040020000A0000000878000__00000038E1ABFE28\n000000067F000040020000A0000000874000-000000067F000040020000A0000000878000__00000038E9AF7F00\n000000067F000040020000A0000000874000-000000067F000040020000A0000000878000__0000003903F1CFE8\n000000067F000040020000A0000000874000-000000067F000040020000A0000000878000__0000003B99F7F8A0\n000000067F000040020000A0000000874000-000000067F000040020000A0000000878000__0000005D2FFFFB38\n000000067F000040020000A0000000875DE0-000000067F000040020000A0000200000000__0000002B7ACFE3E1-0000002C1A7DEAD1\n000000067F000040020000A0000000876030-000000067F000040020000A000000087EA03__0000002C1A7DEAD1-0000002CBA2DFCE9\n000000067F000040020000A0000000878000-000000067F000040020000A000000087C000__00000038E1ABFE28\n000000067F000040020000A0000000878000-000000067F000040020000A000000087C000__00000038E9AF7F00\n000000067F000040020000A0000000878000-000000067F000040020000A000000087C000__0000003903F1CFE8\n000000067F000040020000A0000000878000-000000067F000040020000A000000087C000__0000003B99F7F8A0\n000000067F000040020000A0000000878000-000000067F000040020000A000000087C000__0000005D2FFFFB38\n000000067F000040020000A000000087C000-000000067F000040020000A0000000880000__00000038E1ABFE28\n000000067F000040020000A000000087C000-000000067F000040020000A0000000880000__00000038E9AF7F00\n000000067F000040020000A000000087C000-000000067F000040020000A0000000880000__0000003903F1CFE8\n000000067F000040020000A000000087C000-000000067F000040020000A0000000880000__0000003B99F7F8A0\n000000067F000040020000A000000087C000-000000067F000040020000A0000000880000__0000005D2FFFFB38\n000000067F000040020000A000000087EA03-000000067F000040020000A00000008873D2__0000002C1A7DEAD1-0000002CBA2DFCE9\n000000067F000040020000A0000000880000-000000067F000040020000A0000000884000__00000038E1ABFE28\n000000067F000040020000A0000000880000-000000067F000040020000A0000000884000__00000038E9AF7F00\n000000067F000040020000A0000000880000-000000067F000040020000A0000000884000__0000003903F1CFE8\n000000067F000040020000A0000000880000-000000067F000040020000A0000000884000__0000003B99F7F8A0\n000000067F000040020000A0000000880000-000000067F000040020000A0000000884000__0000005D2FFFFB38\n000000067F000040020000A0000000884000-000000067F000040020000A0000000888000__00000038E1ABFE28\n000000067F000040020000A0000000884000-000000067F000040020000A0000000888000__00000038E9AF7F00\n000000067F000040020000A0000000884000-000000067F000040020000A0000000888000__0000003903F1CFE8\n000000067F000040020000A0000000884000-000000067F000040020000A0000000888000__0000003B99F7F8A0\n000000067F000040020000A0000000884000-000000067F000040020000A0000000888000__0000005D2FFFFB38\n000000067F000040020000A00000008873D2-000000067F000040020000A000000088FDC5__0000002C1A7DEAD1-0000002CBA2DFCE9\n000000067F000040020000A0000000888000-000000067F000040020000A000000088C000__00000038E1ABFE28\n000000067F000040020000A0000000888000-000000067F000040020000A000000088C000__00000038E9AF7F00\n000000067F000040020000A0000000888000-000000067F000040020000A000000088C000__0000003903F1CFE8\n000000067F000040020000A0000000888000-000000067F000040020000A000000088C000__0000003B99F7F8A0\n000000067F000040020000A0000000888000-000000067F000040020000A000000088C000__0000005D2FFFFB38\n000000067F000040020000A000000088C000-000000067F000040020000A0000000890000__00000038E1ABFE28\n000000067F000040020000A000000088C000-000000067F000040020000A0000000890000__00000038E9AF7F00\n000000067F000040020000A000000088C000-000000067F000040020000A0000000890000__0000003903F1CFE8\n000000067F000040020000A000000088C000-000000067F000040020000A0000000890000__0000003B99F7F8A0\n000000067F000040020000A000000088C000-000000067F000040020000A0000000890000__0000005D2FFFFB38\n000000067F000040020000A000000088FDC5-000000067F000040020000A00000008987A7__0000002C1A7DEAD1-0000002CBA2DFCE9\n000000067F000040020000A0000000890000-000000067F000040020000A0000000894000__00000038E1ABFE28\n000000067F000040020000A0000000890000-000000067F000040020000A0000000894000__00000038E9AF7F00\n000000067F000040020000A0000000890000-000000067F000040020000A0000000894000__0000003903F1CFE8\n000000067F000040020000A0000000890000-000000067F000040020000A0000000894000__0000003B99F7F8A0\n000000067F000040020000A0000000890000-000000067F000040020000A0000000894000__0000005D2FFFFB38\n000000067F000040020000A0000000894000-000000067F000040020000A0000000898000__00000038E1ABFE28\n000000067F000040020000A0000000894000-000000067F000040020000A0000000898000__00000038E9AF7F00\n000000067F000040020000A0000000894000-000000067F000040020000A0000000898000__0000003903F1CFE8\n000000067F000040020000A0000000894000-000000067F000040020000A0000000898000__0000003B99F7F8A0\n000000067F000040020000A0000000894000-000000067F000040020000A0000000898000__0000005D2FFFFB38\n000000067F000040020000A0000000898000-000000067F000040020000A000000089C000__00000038E1ABFE28\n000000067F000040020000A0000000898000-000000067F000040020000A000000089C000__00000038E9AF7F00\n000000067F000040020000A0000000898000-000000067F000040020000A000000089C000__0000003903F1CFE8\n000000067F000040020000A0000000898000-000000067F000040020000A000000089C000__0000003B99F7F8A0\n000000067F000040020000A0000000898000-000000067F000040020000A000000089C000__0000005D2FFFFB38\n000000067F000040020000A00000008987A7-000000067F000040020000A00000008A117E__0000002C1A7DEAD1-0000002CBA2DFCE9\n000000067F000040020000A000000089C000-000000067F000040020000A00000008A0000__00000038E1ABFE28\n000000067F000040020000A000000089C000-000000067F000040020000A00000008A0000__00000038E9AF7F00\n000000067F000040020000A000000089C000-000000067F000040020000A00000008A0000__0000003903F1CFE8\n000000067F000040020000A000000089C000-000000067F000040020000A00000008A0000__0000003B99F7F8A0\n000000067F000040020000A000000089C000-000000067F000040020000A00000008A0000__0000005D2FFFFB38\n000000067F000040020000A00000008A0000-000000067F000040020000A00000008A4000__00000038E1ABFE28\n000000067F000040020000A00000008A0000-000000067F000040020000A00000008A4000__00000038E9AF7F00\n000000067F000040020000A00000008A0000-000000067F000040020000A00000008A4000__0000003903F1CFE8\n000000067F000040020000A00000008A0000-000000067F000040020000A00000008A4000__0000003B99F7F8A0\n000000067F000040020000A00000008A0000-000000067F000040020000A00000008A4000__0000005D2FFFFB38\n000000067F000040020000A00000008A117E-000000067F000040020000A00000008A9B5D__0000002C1A7DEAD1-0000002CBA2DFCE9\n000000067F000040020000A00000008A4000-000000067F000040020000A00000008A8000__00000038E1ABFE28\n000000067F000040020000A00000008A4000-000000067F000040020000A00000008A8000__00000038E9AF7F00\n000000067F000040020000A00000008A4000-000000067F000040020000A00000008A8000__0000003903F1CFE8\n000000067F000040020000A00000008A4000-000000067F000040020000A00000008A8000__0000003B99F7F8A0\n000000067F000040020000A00000008A4000-000000067F000040020000A00000008A8000__0000005D2FFFFB38\n000000067F000040020000A00000008A8000-000000067F000040020000A00000008AC000__00000038E1ABFE28\n000000067F000040020000A00000008A8000-000000067F000040020000A00000008AC000__00000038E9AF7F00\n000000067F000040020000A00000008A8000-000000067F000040020000A00000008AC000__0000003903F1CFE8\n000000067F000040020000A00000008A8000-000000067F000040020000A00000008AC000__0000003B99F7F8A0\n000000067F000040020000A00000008A8000-000000067F000040020000A00000008AC000__0000005D2FFFFB38\n000000067F000040020000A00000008A9B5D-000000067F000040020000A00000008B253E__0000002C1A7DEAD1-0000002CBA2DFCE9\n000000067F000040020000A00000008AC000-000000067F000040020000A00000008B0000__00000038E1ABFE28\n000000067F000040020000A00000008AC000-000000067F000040020000A00000008B0000__00000038E9AF7F00\n000000067F000040020000A00000008AC000-000000067F000040020000A00000008B0000__0000003903F1CFE8\n000000067F000040020000A00000008AC000-000000067F000040020000A00000008B0000__0000003B99F7F8A0\n000000067F000040020000A00000008AC000-000000067F000040020000A00000008B0000__0000005D2FFFFB38\n000000067F000040020000A00000008B0000-000000067F000040020000A00000008B4000__00000038E1ABFE28\n000000067F000040020000A00000008B0000-000000067F000040020000A00000008B4000__00000038E9AF7F00\n000000067F000040020000A00000008B0000-000000067F000040020000A00000008B4000__0000003903F1CFE8\n000000067F000040020000A00000008B0000-000000067F000040020000A00000008B4000__0000003B99F7F8A0\n000000067F000040020000A00000008B0000-000000067F000040020000A00000008B4000__0000005D2FFFFB38\n000000067F000040020000A00000008B253E-000000067F000040020000A00000008BAF04__0000002C1A7DEAD1-0000002CBA2DFCE9\n000000067F000040020000A00000008B4000-000000067F000040020000A00000008B8000__00000038E1ABFE28\n000000067F000040020000A00000008B4000-000000067F000040020000A00000008B8000__00000038E9AF7F00\n000000067F000040020000A00000008B4000-000000067F000040020000A00000008B8000__0000003903F1CFE8\n000000067F000040020000A00000008B4000-000000067F000040020000A00000008B8000__0000003B99F7F8A0\n000000067F000040020000A00000008B4000-000000067F000040020000A00000008B8000__0000005D2FFFFB38\n000000067F000040020000A00000008B8000-000000067F000040020000A00000008BC000__00000038E1ABFE28\n000000067F000040020000A00000008B8000-000000067F000040020000A00000008BC000__00000038E9AF7F00\n000000067F000040020000A00000008B8000-000000067F000040020000A00000008BC000__0000003903F1CFE8\n000000067F000040020000A00000008B8000-000000067F000040020000A00000008BC000__0000003B99F7F8A0\n000000067F000040020000A00000008B8000-000000067F000040020000A00000008BC000__0000005D2FFFFB38\n000000067F000040020000A00000008BAF04-000000067F000040020000A00000008C38D2__0000002C1A7DEAD1-0000002CBA2DFCE9\n000000067F000040020000A00000008BC000-000000067F000040020000A00000008C0000__00000038E1ABFE28\n000000067F000040020000A00000008BC000-000000067F000040020000A00000008C0000__00000038E9AF7F00\n000000067F000040020000A00000008BC000-000000067F000040020000A00000008C0000__0000003903F1CFE8\n000000067F000040020000A00000008BC000-000000067F000040020000A00000008C0000__0000003B99F7F8A0\n000000067F000040020000A00000008BC000-000000067F000040020000A00000008C0000__0000005D2FFFFB38\n000000067F000040020000A00000008C0000-000000067F000040020000A00000008C4000__00000038E1ABFE28\n000000067F000040020000A00000008C0000-000000067F000040020000A00000008C4000__00000038E9AF7F00\n000000067F000040020000A00000008C0000-000000067F000040020000A00000008C4000__0000003903F1CFE8\n000000067F000040020000A00000008C0000-000000067F000040020000A00000008C4000__0000003B99F7F8A0\n000000067F000040020000A00000008C0000-000000067F000040020000A00000008C4000__0000005D2FFFFB38\n000000067F000040020000A00000008C38D2-000000067F000040020000A00000008CC2C1__0000002C1A7DEAD1-0000002CBA2DFCE9\n000000067F000040020000A00000008C4000-000000067F000040020000A00000008C8000__00000038E1ABFE28\n000000067F000040020000A00000008C4000-000000067F000040020000A00000008C8000__00000038E9AF7F00\n000000067F000040020000A00000008C4000-000000067F000040020000A00000008C8000__0000003903F1CFE8\n000000067F000040020000A00000008C4000-000000067F000040020000A00000008C8000__0000003B99F7F8A0\n000000067F000040020000A00000008C4000-000000067F000040020000A00000008C8000__0000005D2FFFFB38\n000000067F000040020000A00000008C8000-000000067F000040020000A00000008CC000__00000038E1ABFE28\n000000067F000040020000A00000008C8000-000000067F000040020000A00000008CC000__00000038E9AF7F00\n000000067F000040020000A00000008C8000-000000067F000040020000A00000008CC000__0000003903F1CFE8\n000000067F000040020000A00000008C8000-000000067F000040020000A00000008CC000__0000003B99F7F8A0\n000000067F000040020000A00000008C8000-000000067F000040020000A00000008CC000__0000005D2FFFFB38\n000000067F000040020000A00000008CC000-000000067F000040020000A00000008D0000__0000002D89C52B28\n000000067F000040020000A00000008CC000-000000067F000040020000A00000008D0000__00000038E9AF7F00\n000000067F000040020000A00000008CC000-000000067F000040020000A00000008D0000__0000003903F1CFE8\n000000067F000040020000A00000008CC000-000000067F000040020000A00000008D0000__0000003B99F7F8A0\n000000067F000040020000A00000008CC000-000000067F000040020000A00000008D0000__0000005D2FFFFB38\n000000067F000040020000A00000008CC2C1-000000067F000040020000A0000200000000__0000002C1A7DEAD1-0000002CBA2DFCE9\n000000067F000040020000A00000008CC47E-000000067F000040020000A00000008D4E54__0000002CBA2DFCE9-0000002D59DDCFE9\n000000067F000040020000A00000008D0000-000000067F000040020000A00000008D4000__0000002D89C52B28\n000000067F000040020000A00000008D0000-000000067F000040020000A00000008D4000__00000038E9AF7F00\n000000067F000040020000A00000008D0000-000000067F000040020000A00000008D4000__0000003903F1CFE8\n000000067F000040020000A00000008D0000-000000067F000040020000A00000008D4000__0000003B99F7F8A0\n000000067F000040020000A00000008D0000-000000067F000040020000A00000008D4000__0000005D2FFFFB38\n000000067F000040020000A00000008D4000-000000067F000040020000A00000008D8000__0000002D89C52B28\n000000067F000040020000A00000008D4000-000000067F000040020000A00000008D8000__00000038E9AF7F00\n000000067F000040020000A00000008D4000-000000067F000040020000A00000008D8000__0000003903F1CFE8\n000000067F000040020000A00000008D4000-000000067F000040020000A00000008D8000__0000003B99F7F8A0\n000000067F000040020000A00000008D4000-000000067F000040020000A00000008D8000__0000005D2FFFFB38\n000000067F000040020000A00000008D4E54-000000067F000040020000A00000008DD830__0000002CBA2DFCE9-0000002D59DDCFE9\n000000067F000040020000A00000008D8000-000000067F000040020000A00000008DC000__0000002D89C52B28\n000000067F000040020000A00000008D8000-000000067F000040020000A00000008DC000__00000038E9AF7F00\n000000067F000040020000A00000008D8000-000000067F000040020000A00000008DC000__0000003903F1CFE8\n000000067F000040020000A00000008D8000-000000067F000040020000A00000008DC000__0000003B99F7F8A0\n000000067F000040020000A00000008D8000-000000067F000040020000A00000008DC000__0000005D2FFFFB38\n000000067F000040020000A00000008DC000-000000067F000040020000A00000008E0000__0000002D89C52B28\n000000067F000040020000A00000008DC000-000000067F000040020000A00000008E0000__00000038E9AF7F00\n000000067F000040020000A00000008DC000-000000067F000040020000A00000008E0000__0000003903F1CFE8\n000000067F000040020000A00000008DC000-000000067F000040020000A00000008E0000__0000003B99F7F8A0\n000000067F000040020000A00000008DC000-000000067F000040020000A00000008E0000__0000005D2FFFFB38\n000000067F000040020000A00000008DD830-000000067F000040020000A00000008E6201__0000002CBA2DFCE9-0000002D59DDCFE9\n000000067F000040020000A00000008E0000-000000067F000040020000A00000008E4000__0000002D89C52B28\n000000067F000040020000A00000008E0000-000000067F000040020000A00000008E4000__00000038E9AF7F00\n000000067F000040020000A00000008E0000-000000067F000040020000A00000008E4000__0000003903F1CFE8\n000000067F000040020000A00000008E0000-000000067F000040020000A00000008E4000__0000003B99F7F8A0\n000000067F000040020000A00000008E0000-000000067F000040020000A00000008E4000__0000005D2FFFFB38\n000000067F000040020000A00000008E4000-000000067F000040020000A00000008E8000__0000002D89C52B28\n000000067F000040020000A00000008E4000-000000067F000040020000A00000008E8000__00000038E9AF7F00\n000000067F000040020000A00000008E4000-000000067F000040020000A00000008E8000__0000003903F1CFE8\n000000067F000040020000A00000008E4000-000000067F000040020000A00000008E8000__0000003B99F7F8A0\n000000067F000040020000A00000008E4000-000000067F000040020000A00000008E8000__0000005D2FFFFB38\n000000067F000040020000A00000008E6201-000000067F000040020000A00000008EEBDC__0000002CBA2DFCE9-0000002D59DDCFE9\n000000067F000040020000A00000008E8000-000000067F000040020000A00000008EC000__0000002D89C52B28\n000000067F000040020000A00000008E8000-000000067F000040020000A00000008EC000__00000038E9AF7F00\n000000067F000040020000A00000008E8000-000000067F000040020000A00000008EC000__0000003903F1CFE8\n000000067F000040020000A00000008E8000-000000067F000040020000A00000008EC000__0000003B99F7F8A0\n000000067F000040020000A00000008E8000-000000067F000040020000A00000008EC000__0000005D2FFFFB38\n000000067F000040020000A00000008EC000-000000067F000040020000A00000008F0000__0000002D89C52B28\n000000067F000040020000A00000008EC000-000000067F000040020000A00000008F0000__00000038E9AF7F00\n000000067F000040020000A00000008EC000-000000067F000040020000A00000008F0000__0000003903F1CFE8\n000000067F000040020000A00000008EC000-000000067F000040020000A00000008F0000__0000003B99F7F8A0\n000000067F000040020000A00000008EC000-000000067F000040020000A00000008F0000__0000005D2FFFFB38\n000000067F000040020000A00000008EEBDC-000000067F000040020000A00000008F75B3__0000002CBA2DFCE9-0000002D59DDCFE9\n000000067F000040020000A00000008F0000-000000067F000040020000A00000008F4000__0000002D89C52B28\n000000067F000040020000A00000008F0000-000000067F000040020000A00000008F4000__00000038E9AF7F00\n000000067F000040020000A00000008F0000-000000067F000040020000A00000008F4000__0000003903F1CFE8\n000000067F000040020000A00000008F0000-000000067F000040020000A00000008F4000__0000003B99F7F8A0\n000000067F000040020000A00000008F0000-000000067F000040020000A00000008F4000__0000005D2FFFFB38\n000000067F000040020000A00000008F4000-000000067F000040020000A00000008F8000__0000002D89C52B28\n000000067F000040020000A00000008F4000-000000067F000040020000A00000008F8000__00000038E9AF7F00\n000000067F000040020000A00000008F4000-000000067F000040020000A00000008F8000__0000003903F1CFE8\n000000067F000040020000A00000008F4000-000000067F000040020000A00000008F8000__0000003B99F7F8A0\n000000067F000040020000A00000008F4000-000000067F000040020000A00000008F8000__0000005D2FFFFB38\n000000067F000040020000A00000008F75B3-000000067F000040020000A00000008FFF8B__0000002CBA2DFCE9-0000002D59DDCFE9\n000000067F000040020000A00000008F8000-000000067F000040020000A00000008FC000__0000002D89C52B28\n000000067F000040020000A00000008F8000-000000067F000040020000A00000008FC000__00000038E9AF7F00\n000000067F000040020000A00000008F8000-000000067F000040020000A00000008FC000__0000003903F1CFE8\n000000067F000040020000A00000008F8000-000000067F000040020000A00000008FC000__0000003B99F7F8A0\n000000067F000040020000A00000008F8000-000000067F000040020000A00000008FC000__0000005D2FFFFB38\n000000067F000040020000A00000008FC000-000000067F000040020000A0000000900000__0000002D89C52B28\n000000067F000040020000A00000008FC000-000000067F000040020000A0000000900000__00000038E9AF7F00\n000000067F000040020000A00000008FC000-000000067F000040020000A0000000900000__0000003903F1CFE8\n000000067F000040020000A00000008FC000-000000067F000040020000A0000000900000__0000003B99F7F8A0\n000000067F000040020000A00000008FC000-000000067F000040020000A0000000900000__0000005D2FFFFB38\n000000067F000040020000A00000008FFF8B-000000067F000040020000A000000090896E__0000002CBA2DFCE9-0000002D59DDCFE9\n000000067F000040020000A0000000900000-000000067F000040020000A0000000904000__0000002D89C52B28\n000000067F000040020000A0000000900000-000000067F000040020000A0000000904000__00000038E9AF7F00\n000000067F000040020000A0000000900000-000000067F000040020000A0000000904000__0000003903F1CFE8\n000000067F000040020000A0000000900000-000000067F000040020000A0000000904000__0000003B99F7F8A0\n000000067F000040020000A0000000900000-000000067F000040020000A0000000904000__0000005D2FFFFB38\n000000067F000040020000A0000000904000-000000067F000040020000A0000000908000__0000002D89C52B28\n000000067F000040020000A0000000904000-000000067F000040020000A0000000908000__00000038E9AF7F00\n000000067F000040020000A0000000904000-000000067F000040020000A0000000908000__0000003903F1CFE8\n000000067F000040020000A0000000904000-000000067F000040020000A0000000908000__0000003B99F7F8A0\n000000067F000040020000A0000000904000-000000067F000040020000A0000000908000__0000005D2FFFFB38\n000000067F000040020000A0000000908000-000000067F000040020000A000000090C000__0000002D89C52B28\n000000067F000040020000A0000000908000-000000067F000040020000A000000090C000__00000038E9AF7F00\n000000067F000040020000A0000000908000-000000067F000040020000A000000090C000__0000003903F1CFE8\n000000067F000040020000A0000000908000-000000067F000040020000A000000090C000__0000003B99F7F8A0\n000000067F000040020000A0000000908000-000000067F000040020000A000000090C000__0000005D2FFFFB38\n000000067F000040020000A000000090896E-000000067F000040020000A000000091134B__0000002CBA2DFCE9-0000002D59DDCFE9\n000000067F000040020000A000000090C000-000000067F000040020000A0000000910000__0000002D89C52B28\n000000067F000040020000A000000090C000-000000067F000040020000A0000000910000__00000038E9AF7F00\n000000067F000040020000A000000090C000-000000067F000040020000A0000000910000__0000003903F1CFE8\n000000067F000040020000A000000090C000-000000067F000040020000A0000000910000__0000003B99F7F8A0\n000000067F000040020000A000000090C000-000000067F000040020000A0000000910000__0000005D2FFFFB38\n000000067F000040020000A0000000910000-000000067F000040020000A0000000914000__0000002D89C52B28\n000000067F000040020000A0000000910000-000000067F000040020000A0000000914000__00000038E9AF7F00\n000000067F000040020000A0000000910000-000000067F000040020000A0000000914000__0000003903F1CFE8\n000000067F000040020000A0000000910000-000000067F000040020000A0000000914000__0000003B99F7F8A0\n000000067F000040020000A0000000910000-000000067F000040020000A0000000914000__0000005D2FFFFB38\n000000067F000040020000A000000091134B-000000067F000040020000A0000000919D16__0000002CBA2DFCE9-0000002D59DDCFE9\n000000067F000040020000A0000000914000-000000067F000040020000A0000000918000__0000002D89C52B28\n000000067F000040020000A0000000914000-000000067F000040020000A0000000918000__00000038E9AF7F00\n000000067F000040020000A0000000914000-000000067F000040020000A0000000918000__0000003903F1CFE8\n000000067F000040020000A0000000914000-000000067F000040020000A0000000918000__0000003B99F7F8A0\n000000067F000040020000A0000000914000-000000067F000040020000A0000000918000__0000005D2FFFFB38\n000000067F000040020000A0000000918000-000000067F000040020000A000000091C000__0000002D89C52B28\n000000067F000040020000A0000000918000-000000067F000040020000A000000091C000__00000038E9AF7F00\n000000067F000040020000A0000000918000-000000067F000040020000A000000091C000__0000003903F1CFE8\n000000067F000040020000A0000000918000-000000067F000040020000A000000091C000__0000003B99F7F8A0\n000000067F000040020000A0000000918000-000000067F000040020000A000000091C000__0000005D2FFFFB38\n000000067F000040020000A0000000919D16-000000067F000040020000A00000009226E9__0000002CBA2DFCE9-0000002D59DDCFE9\n000000067F000040020000A000000091C000-000000067F000040020000A0000000920000__0000002D89C52B28\n000000067F000040020000A000000091C000-000000067F000040020000A0000000920000__00000038E9AF7F00\n000000067F000040020000A000000091C000-000000067F000040020000A0000000920000__0000003903F1CFE8\n000000067F000040020000A000000091C000-000000067F000040020000A0000000920000__0000003B99F7F8A0\n000000067F000040020000A000000091C000-000000067F000040020000A0000000920000__0000005D2FFFFB38\n000000067F000040020000A0000000920000-000000067F000040020000A0000000924000__0000002D89C52B28\n000000067F000040020000A0000000920000-000000067F000040020000A0000000924000__00000038E67ABFA0\n000000067F000040020000A0000000920000-000000067F000040020000A0000000924000__0000003903F1CFE8\n000000067F000040020000A0000000920000-000000067F000040020000A0000000924000__0000003B99F7F8A0\n000000067F000040020000A0000000920000-000000067F000040020000A0000000924000__0000005D2FFFFB38\n000000067F000040020000A00000009226E9-000000067F000040020000A0000200000000__0000002CBA2DFCE9-0000002D59DDCFE9\n000000067F000040020000A0000000922977-000000067F000040020000A000000092B35C__0000002D59DDCFE9-0000002E0985D9D9\n000000067F000040020000A0000000924000-000000067F000040020000A0000000928000__0000002D89C52B28\n000000067F000040020000A0000000924000-000000067F000040020000A0000000928000__00000038E67ABFA0\n000000067F000040020000A0000000924000-000000067F000040020000A0000000928000__0000003903F1CFE8\n000000067F000040020000A0000000924000-000000067F000040020000A0000000928000__0000003B99F7F8A0\n000000067F000040020000A0000000924000-000000067F000040020000A0000000928000__0000005D2FFFFB38\n000000067F000040020000A0000000928000-000000067F000040020000A000000092C000__0000002D89C52B28\n000000067F000040020000A0000000928000-000000067F000040020000A000000092C000__00000038E67ABFA0\n000000067F000040020000A0000000928000-000000067F000040020000A000000092C000__0000003903F1CFE8\n000000067F000040020000A0000000928000-000000067F000040020000A000000092C000__0000003B99F7F8A0\n000000067F000040020000A0000000928000-000000067F000040020000A000000092C000__0000005D2FFFFB38\n000000067F000040020000A000000092B35C-000000067F000040020000A0000000933D30__0000002D59DDCFE9-0000002E0985D9D9\n000000067F000040020000A000000092C000-000000067F000040020000A0000000930000__0000002D89C52B28\n000000067F000040020000A000000092C000-000000067F000040020000A0000000930000__00000038E67ABFA0\n000000067F000040020000A000000092C000-000000067F000040020000A0000000930000__0000003903F1CFE8\n000000067F000040020000A000000092C000-000000067F000040020000A0000000930000__0000003B99F7F8A0\n000000067F000040020000A000000092C000-000000067F000040020000A0000000930000__0000005D2FFFFB38\n000000067F000040020000A0000000930000-000000067F000040020000A0000000934000__0000002D89C52B28\n000000067F000040020000A0000000930000-000000067F000040020000A0000000934000__00000038E67ABFA0\n000000067F000040020000A0000000930000-000000067F000040020000A0000000934000__0000003903F1CFE8\n000000067F000040020000A0000000930000-000000067F000040020000A0000000934000__0000003B99F7F8A0\n000000067F000040020000A0000000930000-000000067F000040020000A0000000934000__0000005D2FFFFB38\n000000067F000040020000A0000000933D30-000000067F000040020000A000000093C701__0000002D59DDCFE9-0000002E0985D9D9\n000000067F000040020000A0000000934000-000000067F000040020000A0000000938000__0000002D89C52B28\n000000067F000040020000A0000000934000-000000067F000040020000A0000000938000__00000038E67ABFA0\n000000067F000040020000A0000000934000-000000067F000040020000A0000000938000__0000003903F1CFE8\n000000067F000040020000A0000000934000-000000067F000040020000A0000000938000__0000003B99F7F8A0\n000000067F000040020000A0000000934000-000000067F000040020000A0000000938000__0000005D2FFFFB38\n000000067F000040020000A0000000938000-000000067F000040020000A000000093C000__0000002D89C52B28\n000000067F000040020000A0000000938000-000000067F000040020000A000000093C000__00000038E67ABFA0\n000000067F000040020000A0000000938000-000000067F000040020000A000000093C000__0000003903F1CFE8\n000000067F000040020000A0000000938000-000000067F000040020000A000000093C000__0000003B99F7F8A0\n000000067F000040020000A0000000938000-000000067F000040020000A000000093C000__0000005D2FFFFB38\n000000067F000040020000A000000093C000-000000067F000040020000A0000000940000__00000038E67ABFA0\n000000067F000040020000A000000093C000-000000067F000040020000A0000000940000__0000003903F1CFE8\n000000067F000040020000A000000093C000-000000067F000040020000A0000000940000__0000003B99F7F8A0\n000000067F000040020000A000000093C000-000000067F000040020000A0000000940000__0000005D2FFFFB38\n000000067F000040020000A000000093C000-030000000000000000000000000000000002__0000002D89C52B28\n000000067F000040020000A000000093C701-000000067F000040020000A00000009450E3__0000002D59DDCFE9-0000002E0985D9D9\n000000067F000040020000A0000000940000-000000067F000040020000A0000000944000__00000038E67ABFA0\n000000067F000040020000A0000000940000-000000067F000040020000A0000000944000__0000003903F1CFE8\n000000067F000040020000A0000000940000-000000067F000040020000A0000000944000__0000003B99F7F8A0\n000000067F000040020000A0000000940000-000000067F000040020000A0000000944000__0000005D2FFFFB38\n000000067F000040020000A0000000944000-000000067F000040020000A0000000948000__00000038E67ABFA0\n000000067F000040020000A0000000944000-000000067F000040020000A0000000948000__0000003903F1CFE8\n000000067F000040020000A0000000944000-000000067F000040020000A0000000948000__0000003B99F7F8A0\n000000067F000040020000A0000000944000-000000067F000040020000A0000000948000__0000005D2FFFFB38\n000000067F000040020000A00000009450E3-000000067F000040020000A000000094DAC0__0000002D59DDCFE9-0000002E0985D9D9\n000000067F000040020000A0000000948000-000000067F000040020000A000000094C000__00000038E67ABFA0\n000000067F000040020000A0000000948000-000000067F000040020000A000000094C000__0000003903F1CFE8\n000000067F000040020000A0000000948000-000000067F000040020000A000000094C000__0000003B99F7F8A0\n000000067F000040020000A0000000948000-000000067F000040020000A000000094C000__0000005D2FFFFB38\n000000067F000040020000A000000094C000-000000067F000040020000A0000000950000__00000038E67ABFA0\n000000067F000040020000A000000094C000-000000067F000040020000A0000000950000__0000003903F1CFE8\n000000067F000040020000A000000094C000-000000067F000040020000A0000000950000__0000003B99F7F8A0\n000000067F000040020000A000000094C000-000000067F000040020000A0000000950000__0000005D2FFFFB38\n000000067F000040020000A000000094DAC0-000000067F000040020000A0000000956495__0000002D59DDCFE9-0000002E0985D9D9\n000000067F000040020000A0000000950000-000000067F000040020000A0000000954000__00000038E67ABFA0\n000000067F000040020000A0000000950000-000000067F000040020000A0000000954000__0000003903F1CFE8\n000000067F000040020000A0000000950000-000000067F000040020000A0000000954000__0000003B99F7F8A0\n000000067F000040020000A0000000950000-000000067F000040020000A0000000954000__0000005D2FFFFB38\n000000067F000040020000A0000000954000-000000067F000040020000A0000000958000__00000038E67ABFA0\n000000067F000040020000A0000000954000-000000067F000040020000A0000000958000__0000003903F1CFE8\n000000067F000040020000A0000000954000-000000067F000040020000A0000000958000__0000003B99F7F8A0\n000000067F000040020000A0000000954000-000000067F000040020000A0000000958000__0000005D2FFFFB38\n000000067F000040020000A0000000956495-000000067F000040020000A000000095EE79__0000002D59DDCFE9-0000002E0985D9D9\n000000067F000040020000A0000000958000-000000067F000040020000A000000095C000__00000038E67ABFA0\n000000067F000040020000A0000000958000-000000067F000040020000A000000095C000__0000003903F1CFE8\n000000067F000040020000A0000000958000-000000067F000040020000A000000095C000__0000003B99F7F8A0\n000000067F000040020000A0000000958000-000000067F000040020000A000000095C000__0000005D2FFFFB38\n000000067F000040020000A000000095C000-000000067F000040020000A0000000960000__00000038E67ABFA0\n000000067F000040020000A000000095C000-000000067F000040020000A0000000960000__0000003903F1CFE8\n000000067F000040020000A000000095C000-000000067F000040020000A0000000960000__0000003B99F7F8A0\n000000067F000040020000A000000095C000-000000067F000040020000A0000000960000__0000005D2FFFFB38\n000000067F000040020000A000000095EE79-000000067F000040020000A0000000967850__0000002D59DDCFE9-0000002E0985D9D9\n000000067F000040020000A0000000960000-000000067F000040020000A0000000964000__00000038E67ABFA0\n000000067F000040020000A0000000960000-000000067F000040020000A0000000964000__0000003903F1CFE8\n000000067F000040020000A0000000960000-000000067F000040020000A0000000964000__0000003B99F7F8A0\n000000067F000040020000A0000000960000-000000067F000040020000A0000000964000__0000005D2FFFFB38\n000000067F000040020000A0000000964000-000000067F000040020000A0000000968000__00000038E67ABFA0\n000000067F000040020000A0000000964000-000000067F000040020000A0000000968000__0000003903F1CFE8\n000000067F000040020000A0000000964000-000000067F000040020000A0000000968000__0000003B99F7F8A0\n000000067F000040020000A0000000964000-000000067F000040020000A0000000968000__0000005D2FFFFB38\n000000067F000040020000A0000000967850-000000067F000040020000A000000097022A__0000002D59DDCFE9-0000002E0985D9D9\n000000067F000040020000A0000000968000-000000067F000040020000A000000096C000__00000038E67ABFA0\n000000067F000040020000A0000000968000-000000067F000040020000A000000096C000__0000003903F1CFE8\n000000067F000040020000A0000000968000-000000067F000040020000A000000096C000__0000003B99F7F8A0\n000000067F000040020000A0000000968000-000000067F000040020000A000000096C000__0000005D2FFFFB38\n000000067F000040020000A000000096C000-000000067F000040020000A0000000970000__00000038E67ABFA0\n000000067F000040020000A000000096C000-000000067F000040020000A0000000970000__0000003903F1CFE8\n000000067F000040020000A000000096C000-000000067F000040020000A0000000970000__0000003B99F7F8A0\n000000067F000040020000A000000096C000-000000067F000040020000A0000000970000__0000005D2FFFFB38\n000000067F000040020000A0000000970000-000000067F000040020000A0000000974000__00000038E67ABFA0\n000000067F000040020000A0000000970000-000000067F000040020000A0000000974000__0000003903F1CFE8\n000000067F000040020000A0000000970000-000000067F000040020000A0000000974000__0000003B99F7F8A0\n000000067F000040020000A0000000970000-000000067F000040020000A0000000974000__0000005D2FFFFB38\n000000067F000040020000A000000097022A-000000067F000040020000A0000000978BFD__0000002D59DDCFE9-0000002E0985D9D9\n000000067F000040020000A0000000974000-000000067F000040020000A0000000978000__00000038E67ABFA0\n000000067F000040020000A0000000974000-000000067F000040020000A0000000978000__0000003903F1CFE8\n000000067F000040020000A0000000974000-000000067F000040020000A0000000978000__0000003B99F7F8A0\n000000067F000040020000A0000000974000-000000067F000040020000A0000000978000__0000005D2FFFFB38\n000000067F000040020000A0000000978000-000000067F000040020000A000000097C000__00000038E67ABFA0\n000000067F000040020000A0000000978000-000000067F000040020000A000000097C000__0000003903F1CFE8\n000000067F000040020000A0000000978000-000000067F000040020000A000000097C000__0000003B99F7F8A0\n000000067F000040020000A0000000978000-000000067F000040020000A000000097C000__0000005D2FFFFB38\n000000067F000040020000A0000000978BFD-000000067F000040020000A00000009815F8__0000002D59DDCFE9-0000002E0985D9D9\n000000067F000040020000A000000097C000-000000067F000040020000A0000000980000__00000038E67ABFA0\n000000067F000040020000A000000097C000-000000067F000040020000A0000000980000__0000003903F1CFE8\n000000067F000040020000A000000097C000-000000067F000040020000A0000000980000__0000003B99F7F8A0\n000000067F000040020000A000000097C000-000000067F000040020000A0000000980000__0000005D2FFFFB38\n000000067F000040020000A0000000980000-000000067F000040020000A0000000984000__00000038E1ABFE28\n000000067F000040020000A0000000980000-000000067F000040020000A0000000984000__00000038E9AF7F00\n000000067F000040020000A0000000980000-000000067F000040020000A0000000984000__0000003903F1CFE8\n000000067F000040020000A0000000980000-000000067F000040020000A0000000984000__0000003B99F7F8A0\n000000067F000040020000A0000000980000-000000067F000040020000A0000000984000__0000005D2FFFFB38\n000000067F000040020000A00000009815F8-000000067F000040020000A0000200000000__0000002D59DDCFE9-0000002E0985D9D9\n000000067F000040020000A00000009817EC-000000067F000040020000A000000098A1CB__0000002E0985D9D9-0000002EB92DEAC1\n000000067F000040020000A0000000984000-000000067F000040020000A0000000988000__00000038E1ABFE28\n000000067F000040020000A0000000984000-000000067F000040020000A0000000988000__00000038E9AF7F00\n000000067F000040020000A0000000984000-000000067F000040020000A0000000988000__0000003903F1CFE8\n000000067F000040020000A0000000984000-000000067F000040020000A0000000988000__0000003B99F7F8A0\n000000067F000040020000A0000000984000-000000067F000040020000A0000000988000__0000005D2FFFFB38\n000000067F000040020000A0000000988000-000000067F000040020000A000000098C000__00000038E1ABFE28\n000000067F000040020000A0000000988000-000000067F000040020000A000000098C000__00000038E9AF7F00\n000000067F000040020000A0000000988000-000000067F000040020000A000000098C000__0000003903F1CFE8\n000000067F000040020000A0000000988000-000000067F000040020000A000000098C000__0000003B99F7F8A0\n000000067F000040020000A0000000988000-000000067F000040020000A000000098C000__0000005D2FFFFB38\n000000067F000040020000A000000098A1CB-000000067F000040020000A0000000992BA5__0000002E0985D9D9-0000002EB92DEAC1\n000000067F000040020000A000000098C000-000000067F000040020000A0000000990000__00000038E1ABFE28\n000000067F000040020000A000000098C000-000000067F000040020000A0000000990000__00000038E9AF7F00\n000000067F000040020000A000000098C000-000000067F000040020000A0000000990000__0000003903F1CFE8\n000000067F000040020000A000000098C000-000000067F000040020000A0000000990000__0000003B99F7F8A0\n000000067F000040020000A000000098C000-000000067F000040020000A0000000990000__0000005D2FFFFB38\n000000067F000040020000A0000000990000-000000067F000040020000A0000000994000__00000038E1ABFE28\n000000067F000040020000A0000000990000-000000067F000040020000A0000000994000__00000038E9AF7F00\n000000067F000040020000A0000000990000-000000067F000040020000A0000000994000__0000003903F1CFE8\n000000067F000040020000A0000000990000-000000067F000040020000A0000000994000__0000003B99F7F8A0\n000000067F000040020000A0000000990000-000000067F000040020000A0000000994000__0000005D2FFFFB38\n000000067F000040020000A0000000992BA5-000000067F000040020000A000000099B589__0000002E0985D9D9-0000002EB92DEAC1\n000000067F000040020000A0000000994000-000000067F000040020000A0000000998000__00000038E1ABFE28\n000000067F000040020000A0000000994000-000000067F000040020000A0000000998000__00000038E9AF7F00\n000000067F000040020000A0000000994000-000000067F000040020000A0000000998000__0000003903F1CFE8\n000000067F000040020000A0000000994000-000000067F000040020000A0000000998000__0000003B99F7F8A0\n000000067F000040020000A0000000994000-000000067F000040020000A0000000998000__0000005D2FFFFB38\n000000067F000040020000A0000000998000-000000067F000040020000A000000099C000__00000038E1ABFE28\n000000067F000040020000A0000000998000-000000067F000040020000A000000099C000__00000038E9AF7F00\n000000067F000040020000A0000000998000-000000067F000040020000A000000099C000__0000003903F1CFE8\n000000067F000040020000A0000000998000-000000067F000040020000A000000099C000__0000003B99F7F8A0\n000000067F000040020000A0000000998000-000000067F000040020000A000000099C000__0000005D2FFFFB38\n000000067F000040020000A000000099B589-000000067F000040020000A00000009A3F65__0000002E0985D9D9-0000002EB92DEAC1\n000000067F000040020000A000000099C000-000000067F000040020000A00000009A0000__00000038E1ABFE28\n000000067F000040020000A000000099C000-000000067F000040020000A00000009A0000__00000038E9AF7F00\n000000067F000040020000A000000099C000-000000067F000040020000A00000009A0000__0000003903F1CFE8\n000000067F000040020000A000000099C000-000000067F000040020000A00000009A0000__0000003B99F7F8A0\n000000067F000040020000A000000099C000-000000067F000040020000A00000009A0000__0000005D2FFFFB38\n000000067F000040020000A00000009A0000-000000067F000040020000A00000009A4000__00000038E1ABFE28\n000000067F000040020000A00000009A0000-000000067F000040020000A00000009A4000__00000038E9AF7F00\n000000067F000040020000A00000009A0000-000000067F000040020000A00000009A4000__0000003903F1CFE8\n000000067F000040020000A00000009A0000-000000067F000040020000A00000009A4000__0000003B99F7F8A0\n000000067F000040020000A00000009A0000-000000067F000040020000A00000009A4000__0000005D2FFFFB38\n000000067F000040020000A00000009A3F65-000000067F000040020000A00000009AC941__0000002E0985D9D9-0000002EB92DEAC1\n000000067F000040020000A00000009A4000-000000067F000040020000A00000009A8000__00000038E1ABFE28\n000000067F000040020000A00000009A4000-000000067F000040020000A00000009A8000__00000038E9AF7F00\n000000067F000040020000A00000009A4000-000000067F000040020000A00000009A8000__0000003903F1CFE8\n000000067F000040020000A00000009A4000-000000067F000040020000A00000009A8000__0000003B99F7F8A0\n000000067F000040020000A00000009A4000-000000067F000040020000A00000009A8000__0000005D2FFFFB38\n000000067F000040020000A00000009A8000-000000067F000040020000A00000009AC000__00000038E1ABFE28\n000000067F000040020000A00000009A8000-000000067F000040020000A00000009AC000__00000038E9AF7F00\n000000067F000040020000A00000009A8000-000000067F000040020000A00000009AC000__0000003903F1CFE8\n000000067F000040020000A00000009A8000-000000067F000040020000A00000009AC000__0000003B99F7F8A0\n000000067F000040020000A00000009A8000-000000067F000040020000A00000009AC000__0000005D2FFFFB38\n000000067F000040020000A00000009AC000-000000067F000040020000A00000009B0000__00000038E1ABFE28\n000000067F000040020000A00000009AC000-000000067F000040020000A00000009B0000__00000038E9AF7F00\n000000067F000040020000A00000009AC000-000000067F000040020000A00000009B0000__0000003903F1CFE8\n000000067F000040020000A00000009AC000-000000067F000040020000A00000009B0000__0000003B99F7F8A0\n000000067F000040020000A00000009AC000-000000067F000040020000A00000009B0000__0000005D2FFFFB38\n000000067F000040020000A00000009AC941-000000067F000040020000A00000009B531B__0000002E0985D9D9-0000002EB92DEAC1\n000000067F000040020000A00000009B0000-000000067F000040020000A00000009B4000__00000038E1ABFE28\n000000067F000040020000A00000009B0000-000000067F000040020000A00000009B4000__00000038E9AF7F00\n000000067F000040020000A00000009B0000-000000067F000040020000A00000009B4000__0000003903F1CFE8\n000000067F000040020000A00000009B0000-000000067F000040020000A00000009B4000__0000003B99F7F8A0\n000000067F000040020000A00000009B0000-000000067F000040020000A00000009B4000__0000005D2FFFFB38\n000000067F000040020000A00000009B4000-000000067F000040020000A00000009B8000__00000038E1ABFE28\n000000067F000040020000A00000009B4000-000000067F000040020000A00000009B8000__00000038E9AF7F00\n000000067F000040020000A00000009B4000-000000067F000040020000A00000009B8000__0000003903F1CFE8\n000000067F000040020000A00000009B4000-000000067F000040020000A00000009B8000__0000003B99F7F8A0\n000000067F000040020000A00000009B4000-000000067F000040020000A00000009B8000__0000005D2FFFFB38\n000000067F000040020000A00000009B531B-000000067F000040020000A00000009BDCFC__0000002E0985D9D9-0000002EB92DEAC1\n000000067F000040020000A00000009B8000-000000067F000040020000A00000009BC000__00000038E1ABFE28\n000000067F000040020000A00000009B8000-000000067F000040020000A00000009BC000__00000038E9AF7F00\n000000067F000040020000A00000009B8000-000000067F000040020000A00000009BC000__0000003903F1CFE8\n000000067F000040020000A00000009B8000-000000067F000040020000A00000009BC000__0000003B99F7F8A0\n000000067F000040020000A00000009B8000-000000067F000040020000A00000009BC000__0000005D2FFFFB38\n000000067F000040020000A00000009BC000-000000067F000040020000A00000009C0000__00000038E1ABFE28\n000000067F000040020000A00000009BC000-000000067F000040020000A00000009C0000__00000038E9AF7F00\n000000067F000040020000A00000009BC000-000000067F000040020000A00000009C0000__0000003903F1CFE8\n000000067F000040020000A00000009BC000-000000067F000040020000A00000009C0000__0000003B99F7F8A0\n000000067F000040020000A00000009BC000-000000067F000040020000A00000009C0000__0000005D2FFFFB38\n000000067F000040020000A00000009BDCFC-000000067F000040020000A00000009C66D1__0000002E0985D9D9-0000002EB92DEAC1\n000000067F000040020000A00000009C0000-000000067F000040020000A00000009C4000__00000038E1ABFE28\n000000067F000040020000A00000009C0000-000000067F000040020000A00000009C4000__00000038E9AF7F00\n000000067F000040020000A00000009C0000-000000067F000040020000A00000009C4000__0000003903F1CFE8\n000000067F000040020000A00000009C0000-000000067F000040020000A00000009C4000__0000003B99F7F8A0\n000000067F000040020000A00000009C0000-000000067F000040020000A00000009C4000__0000005D2FFFFB38\n000000067F000040020000A00000009C4000-000000067F000040020000A00000009C8000__00000038E1ABFE28\n000000067F000040020000A00000009C4000-000000067F000040020000A00000009C8000__00000038E9AF7F00\n000000067F000040020000A00000009C4000-000000067F000040020000A00000009C8000__0000003903F1CFE8\n000000067F000040020000A00000009C4000-000000067F000040020000A00000009C8000__0000003B99F7F8A0\n000000067F000040020000A00000009C4000-000000067F000040020000A00000009C8000__0000005D2FFFFB38\n000000067F000040020000A00000009C66D1-000000067F000040020000A00000009CF0AC__0000002E0985D9D9-0000002EB92DEAC1\n000000067F000040020000A00000009C8000-000000067F000040020000A00000009CC000__00000038E1ABFE28\n000000067F000040020000A00000009C8000-000000067F000040020000A00000009CC000__00000038E9AF7F00\n000000067F000040020000A00000009C8000-000000067F000040020000A00000009CC000__0000003903F1CFE8\n000000067F000040020000A00000009C8000-000000067F000040020000A00000009CC000__0000003B99F7F8A0\n000000067F000040020000A00000009C8000-000000067F000040020000A00000009CC000__0000005D2FFFFB38\n000000067F000040020000A00000009CC000-000000067F000040020000A00000009D0000__00000038E1ABFE28\n000000067F000040020000A00000009CC000-000000067F000040020000A00000009D0000__00000038E9AF7F00\n000000067F000040020000A00000009CC000-000000067F000040020000A00000009D0000__0000003903F1CFE8\n000000067F000040020000A00000009CC000-000000067F000040020000A00000009D0000__0000003B99F7F8A0\n000000067F000040020000A00000009CC000-000000067F000040020000A00000009D0000__0000005D2FFFFB38\n000000067F000040020000A00000009CF0AC-000000067F000040020000A00000009D7A91__0000002E0985D9D9-0000002EB92DEAC1\n000000067F000040020000A00000009D0000-000000067F000040020000A00000009D4000__00000038E1ABFE28\n000000067F000040020000A00000009D0000-000000067F000040020000A00000009D4000__00000038E9AF7F00\n000000067F000040020000A00000009D0000-000000067F000040020000A00000009D4000__0000003903F1CFE8\n000000067F000040020000A00000009D0000-000000067F000040020000A00000009D4000__0000003B99F7F8A0\n000000067F000040020000A00000009D0000-000000067F000040020000A00000009D4000__0000005D2FFFFB38\n000000067F000040020000A00000009D4000-000000067F000040020000A00000009D8000__00000038E1ABFE28\n000000067F000040020000A00000009D4000-000000067F000040020000A00000009D8000__00000038E9AF7F00\n000000067F000040020000A00000009D4000-000000067F000040020000A00000009D8000__0000003903F1CFE8\n000000067F000040020000A00000009D4000-000000067F000040020000A00000009D8000__0000003B99F7F8A0\n000000067F000040020000A00000009D4000-000000067F000040020000A00000009D8000__0000005D2FFFFB38\n000000067F000040020000A00000009D7A91-000000067F000040020000A00000009E0464__0000002E0985D9D9-0000002EB92DEAC1\n000000067F000040020000A00000009D8000-000000067F000040020000A00000009DC000__00000038E1ABFE28\n000000067F000040020000A00000009D8000-000000067F000040020000A00000009DC000__00000038E9AF7F00\n000000067F000040020000A00000009D8000-000000067F000040020000A00000009DC000__0000003903F1CFE8\n000000067F000040020000A00000009D8000-000000067F000040020000A00000009DC000__0000003B99F7F8A0\n000000067F000040020000A00000009D8000-000000067F000040020000A00000009DC000__0000005D2FFFFB38\n000000067F000040020000A00000009DC000-000000067F000040020000A00000009E0000__00000038E1ABFE28\n000000067F000040020000A00000009DC000-000000067F000040020000A00000009E0000__00000038E9AF7F00\n000000067F000040020000A00000009DC000-000000067F000040020000A00000009E0000__0000003903F1CFE8\n000000067F000040020000A00000009DC000-000000067F000040020000A00000009E0000__0000003B99F7F8A0\n000000067F000040020000A00000009DC000-000000067F000040020000A00000009E0000__0000005D2FFFFB38\n000000067F000040020000A00000009E0000-000000067F000040020000A00000009E4000__0000002F83FFFE68\n000000067F000040020000A00000009E0000-000000067F000040020000A00000009E4000__00000038E9AF7F00\n000000067F000040020000A00000009E0000-000000067F000040020000A00000009E4000__0000003903F1CFE8\n000000067F000040020000A00000009E0000-000000067F000040020000A00000009E4000__0000003B99F7F8A0\n000000067F000040020000A00000009E0000-000000067F000040020000A00000009E4000__0000005D2FFFFB38\n000000067F000040020000A00000009E0464-000000067F000040020000A0000200000000__0000002E0985D9D9-0000002EB92DEAC1\n000000067F000040020000A00000009E0707-000000067F000040020000A00000009E90D7__0000002EB92DEAC1-0000002F58DE5511\n000000067F000040020000A00000009E4000-000000067F000040020000A00000009E8000__0000002F83FFFE68\n000000067F000040020000A00000009E4000-000000067F000040020000A00000009E8000__00000038E9AF7F00\n000000067F000040020000A00000009E4000-000000067F000040020000A00000009E8000__0000003903F1CFE8\n000000067F000040020000A00000009E4000-000000067F000040020000A00000009E8000__0000003B99F7F8A0\n000000067F000040020000A00000009E4000-000000067F000040020000A00000009E8000__0000005D2FFFFB38\n000000067F000040020000A00000009E8000-000000067F000040020000A00000009EC000__0000002F83FFFE68\n000000067F000040020000A00000009E8000-000000067F000040020000A00000009EC000__00000038E9AF7F00\n000000067F000040020000A00000009E8000-000000067F000040020000A00000009EC000__0000003903F1CFE8\n000000067F000040020000A00000009E8000-000000067F000040020000A00000009EC000__0000003B99F7F8A0\n000000067F000040020000A00000009E8000-000000067F000040020000A00000009EC000__0000005D2FFFFB38\n000000067F000040020000A00000009E90D7-000000067F000040020000A00000009F1AB6__0000002EB92DEAC1-0000002F58DE5511\n000000067F000040020000A00000009EC000-000000067F000040020000A00000009F0000__0000002F83FFFE68\n000000067F000040020000A00000009EC000-000000067F000040020000A00000009F0000__00000038E9AF7F00\n000000067F000040020000A00000009EC000-000000067F000040020000A00000009F0000__0000003903F1CFE8\n000000067F000040020000A00000009EC000-000000067F000040020000A00000009F0000__0000003B99F7F8A0\n000000067F000040020000A00000009EC000-000000067F000040020000A00000009F0000__0000005D2FFFFB38\n000000067F000040020000A00000009F0000-000000067F000040020000A00000009F4000__0000002F83FFFE68\n000000067F000040020000A00000009F0000-000000067F000040020000A00000009F4000__00000038E9AF7F00\n000000067F000040020000A00000009F0000-000000067F000040020000A00000009F4000__0000003903F1CFE8\n000000067F000040020000A00000009F0000-000000067F000040020000A00000009F4000__0000003B99F7F8A0\n000000067F000040020000A00000009F0000-000000067F000040020000A00000009F4000__0000005D2FFFFB38\n000000067F000040020000A00000009F1AB6-000000067F000040020000A00000009FA4A4__0000002EB92DEAC1-0000002F58DE5511\n000000067F000040020000A00000009F4000-000000067F000040020000A00000009F8000__0000002F83FFFE68\n000000067F000040020000A00000009F4000-000000067F000040020000A00000009F8000__00000038E9AF7F00\n000000067F000040020000A00000009F4000-000000067F000040020000A00000009F8000__0000003903F1CFE8\n000000067F000040020000A00000009F4000-000000067F000040020000A00000009F8000__0000003B99F7F8A0\n000000067F000040020000A00000009F4000-000000067F000040020000A00000009F8000__0000005D2FFFFB38\n000000067F000040020000A00000009F8000-000000067F000040020000A00000009FC000__0000002F83FFFE68\n000000067F000040020000A00000009F8000-000000067F000040020000A00000009FC000__00000038E9AF7F00\n000000067F000040020000A00000009F8000-000000067F000040020000A00000009FC000__0000003903F1CFE8\n000000067F000040020000A00000009F8000-000000067F000040020000A00000009FC000__0000003B99F7F8A0\n000000067F000040020000A00000009F8000-000000067F000040020000A00000009FC000__0000005D2FFFFB38\n000000067F000040020000A00000009FA4A4-000000067F000040020000A0000000A02E70__0000002EB92DEAC1-0000002F58DE5511\n000000067F000040020000A00000009FC000-000000067F000040020000A0000000A00000__0000002F83FFFE68\n000000067F000040020000A00000009FC000-000000067F000040020000A0000000A00000__00000038E9AF7F00\n000000067F000040020000A00000009FC000-000000067F000040020000A0000000A00000__0000003903F1CFE8\n000000067F000040020000A00000009FC000-000000067F000040020000A0000000A00000__0000003B99F7F8A0\n000000067F000040020000A00000009FC000-000000067F000040020000A0000000A00000__0000005D2FFFFB38\n000000067F000040020000A0000000A00000-000000067F000040020000A0000000A04000__0000002F83FFFE68\n000000067F000040020000A0000000A00000-000000067F000040020000A0000000A04000__00000038E9AF7F00\n000000067F000040020000A0000000A00000-000000067F000040020000A0000000A04000__0000003903F1CFE8\n000000067F000040020000A0000000A00000-000000067F000040020000A0000000A04000__0000003B99F7F8A0\n000000067F000040020000A0000000A00000-000000067F000040020000A0000000A04000__0000005D2FFFFB38\n000000067F000040020000A0000000A02E70-000000067F000040020000A0000000A0B844__0000002EB92DEAC1-0000002F58DE5511\n000000067F000040020000A0000000A04000-000000067F000040020000A0000000A08000__0000002F83FFFE68\n000000067F000040020000A0000000A04000-000000067F000040020000A0000000A08000__00000038E9AF7F00\n000000067F000040020000A0000000A04000-000000067F000040020000A0000000A08000__0000003903F1CFE8\n000000067F000040020000A0000000A04000-000000067F000040020000A0000000A08000__0000003B99F7F8A0\n000000067F000040020000A0000000A04000-000000067F000040020000A0000000A08000__0000005D2FFFFB38\n000000067F000040020000A0000000A08000-000000067F000040020000A0000000A0C000__0000002F83FFFE68\n000000067F000040020000A0000000A08000-000000067F000040020000A0000000A0C000__00000038E9AF7F00\n000000067F000040020000A0000000A08000-000000067F000040020000A0000000A0C000__0000003903F1CFE8\n000000067F000040020000A0000000A08000-000000067F000040020000A0000000A0C000__0000003B99F7F8A0\n000000067F000040020000A0000000A08000-000000067F000040020000A0000000A0C000__0000005D2FFFFB38\n000000067F000040020000A0000000A0B844-000000067F000040020000A0000000A14223__0000002EB92DEAC1-0000002F58DE5511\n000000067F000040020000A0000000A0C000-000000067F000040020000A0000000A10000__0000002F83FFFE68\n000000067F000040020000A0000000A0C000-000000067F000040020000A0000000A10000__00000038E9AF7F00\n000000067F000040020000A0000000A0C000-000000067F000040020000A0000000A10000__0000003903F1CFE8\n000000067F000040020000A0000000A0C000-000000067F000040020000A0000000A10000__0000003B99F7F8A0\n000000067F000040020000A0000000A0C000-000000067F000040020000A0000000A10000__0000005D2FFFFB38\n000000067F000040020000A0000000A10000-000000067F000040020000A0000000A14000__0000002F83FFFE68\n000000067F000040020000A0000000A10000-000000067F000040020000A0000000A14000__00000038E9AF7F00\n000000067F000040020000A0000000A10000-000000067F000040020000A0000000A14000__0000003903F1CFE8\n000000067F000040020000A0000000A10000-000000067F000040020000A0000000A14000__0000003B99F7F8A0\n000000067F000040020000A0000000A10000-000000067F000040020000A0000000A14000__0000005D2FFFFB38\n000000067F000040020000A0000000A14000-000000067F000040020000A0000000A18000__0000002F83FFFE68\n000000067F000040020000A0000000A14000-000000067F000040020000A0000000A18000__00000038E9AF7F00\n000000067F000040020000A0000000A14000-000000067F000040020000A0000000A18000__0000003903F1CFE8\n000000067F000040020000A0000000A14000-000000067F000040020000A0000000A18000__0000003B99F7F8A0\n000000067F000040020000A0000000A14000-000000067F000040020000A0000000A18000__0000005D2FFFFB38\n000000067F000040020000A0000000A14223-000000067F000040020000A0000000A1CBFC__0000002EB92DEAC1-0000002F58DE5511\n000000067F000040020000A0000000A18000-000000067F000040020000A0000000A1C000__0000002F83FFFE68\n000000067F000040020000A0000000A18000-000000067F000040020000A0000000A1C000__00000038E9AF7F00\n000000067F000040020000A0000000A18000-000000067F000040020000A0000000A1C000__0000003903F1CFE8\n000000067F000040020000A0000000A18000-000000067F000040020000A0000000A1C000__0000003B99F7F8A0\n000000067F000040020000A0000000A18000-000000067F000040020000A0000000A1C000__0000005D2FFFFB38\n000000067F000040020000A0000000A1C000-000000067F000040020000A0000000A20000__0000002F83FFFE68\n000000067F000040020000A0000000A1C000-000000067F000040020000A0000000A20000__00000038E9AF7F00\n000000067F000040020000A0000000A1C000-000000067F000040020000A0000000A20000__0000003903F1CFE8\n000000067F000040020000A0000000A1C000-000000067F000040020000A0000000A20000__0000003B99F7F8A0\n000000067F000040020000A0000000A1C000-000000067F000040020000A0000000A20000__0000005D2FFFFB38\n000000067F000040020000A0000000A1CBFC-000000067F000040020000A0000000A255DB__0000002EB92DEAC1-0000002F58DE5511\n000000067F000040020000A0000000A20000-000000067F000040020000A0000000A24000__0000002F83FFFE68\n000000067F000040020000A0000000A20000-000000067F000040020000A0000000A24000__00000038E9AF7F00\n000000067F000040020000A0000000A20000-000000067F000040020000A0000000A24000__0000003903F1CFE8\n000000067F000040020000A0000000A20000-000000067F000040020000A0000000A24000__0000003B99F7F8A0\n000000067F000040020000A0000000A20000-000000067F000040020000A0000000A24000__0000005D2FFFFB38\n000000067F000040020000A0000000A24000-000000067F000040020000A0000000A28000__0000002F83FFFE68\n000000067F000040020000A0000000A24000-000000067F000040020000A0000000A28000__00000038E9AF7F00\n000000067F000040020000A0000000A24000-000000067F000040020000A0000000A28000__0000003903F1CFE8\n000000067F000040020000A0000000A24000-000000067F000040020000A0000000A28000__0000003B99F7F8A0\n000000067F000040020000A0000000A24000-000000067F000040020000A0000000A28000__0000005D2FFFFB38\n000000067F000040020000A0000000A255DB-000000067F000040020000A0000000A2DFCE__0000002EB92DEAC1-0000002F58DE5511\n000000067F000040020000A0000000A28000-000000067F000040020000A0000000A2C000__0000002F83FFFE68\n000000067F000040020000A0000000A28000-000000067F000040020000A0000000A2C000__00000038E9AF7F00\n000000067F000040020000A0000000A28000-000000067F000040020000A0000000A2C000__0000003903F1CFE8\n000000067F000040020000A0000000A28000-000000067F000040020000A0000000A2C000__0000003B99F7F8A0\n000000067F000040020000A0000000A28000-000000067F000040020000A0000000A2C000__0000005D2FFFFB38\n000000067F000040020000A0000000A2C000-000000067F000040020000A0000000A30000__0000002F83FFFE68\n000000067F000040020000A0000000A2C000-000000067F000040020000A0000000A30000__00000038E9AF7F00\n000000067F000040020000A0000000A2C000-000000067F000040020000A0000000A30000__0000003903F1CFE8\n000000067F000040020000A0000000A2C000-000000067F000040020000A0000000A30000__0000003B99F7F8A0\n000000067F000040020000A0000000A2C000-000000067F000040020000A0000000A30000__0000005D2FFFFB38\n000000067F000040020000A0000000A2DFCE-000000067F000040020000A0000000A369B3__0000002EB92DEAC1-0000002F58DE5511\n000000067F000040020000A0000000A30000-000000067F000040020000A0000000A34000__0000002F83FFFE68\n000000067F000040020000A0000000A30000-000000067F000040020000A0000000A34000__00000038E9AF7F00\n000000067F000040020000A0000000A30000-000000067F000040020000A0000000A34000__0000003903F1CFE8\n000000067F000040020000A0000000A30000-000000067F000040020000A0000000A34000__0000003B99F7F8A0\n000000067F000040020000A0000000A30000-000000067F000040020000A0000000A34000__0000005D2FFFFB38\n000000067F000040020000A0000000A34000-000000067F000040020000A0000000A38000__0000002F83FFFE68\n000000067F000040020000A0000000A34000-000000067F000040020000A0000000A38000__00000038E67ABFA0\n000000067F000040020000A0000000A34000-000000067F000040020000A0000000A38000__0000003903F1CFE8\n000000067F000040020000A0000000A34000-000000067F000040020000A0000000A38000__0000003B99F7F8A0\n000000067F000040020000A0000000A34000-000000067F000040020000A0000000A38000__0000005D2FFFFB38\n000000067F000040020000A0000000A369B3-000000067F000040020000A0000200000000__0000002EB92DEAC1-0000002F58DE5511\n000000067F000040020000A0000000A36B5B-000000067F000040020000A0000000A3F527__0000002F58DE5511-000000300885D069\n000000067F000040020000A0000000A38000-000000067F000040020000A0000000A3C000__0000002F83FFFE68\n000000067F000040020000A0000000A38000-000000067F000040020000A0000000A3C000__00000038E67ABFA0\n000000067F000040020000A0000000A38000-000000067F000040020000A0000000A3C000__0000003903F1CFE8\n000000067F000040020000A0000000A38000-000000067F000040020000A0000000A3C000__0000003B99F7F8A0\n000000067F000040020000A0000000A38000-000000067F000040020000A0000000A3C000__0000005D2FFFFB38\n000000067F000040020000A0000000A3C000-000000067F000040020000A0000000A40000__0000002F83FFFE68\n000000067F000040020000A0000000A3C000-000000067F000040020000A0000000A40000__00000038E67ABFA0\n000000067F000040020000A0000000A3C000-000000067F000040020000A0000000A40000__0000003903F1CFE8\n000000067F000040020000A0000000A3C000-000000067F000040020000A0000000A40000__0000003B99F7F8A0\n000000067F000040020000A0000000A3C000-000000067F000040020000A0000000A40000__0000005D2FFFFB38\n000000067F000040020000A0000000A3F527-000000067F000040020000A0000000A47EFA__0000002F58DE5511-000000300885D069\n000000067F000040020000A0000000A40000-000000067F000040020000A0000000A44000__0000002F83FFFE68\n000000067F000040020000A0000000A40000-000000067F000040020000A0000000A44000__00000038E67ABFA0\n000000067F000040020000A0000000A40000-000000067F000040020000A0000000A44000__0000003903F1CFE8\n000000067F000040020000A0000000A40000-000000067F000040020000A0000000A44000__0000003B99F7F8A0\n000000067F000040020000A0000000A40000-000000067F000040020000A0000000A44000__0000005D2FFFFB38\n000000067F000040020000A0000000A44000-000000067F000040020000A0000000A48000__0000002F83FFFE68\n000000067F000040020000A0000000A44000-000000067F000040020000A0000000A48000__00000038E67ABFA0\n000000067F000040020000A0000000A44000-000000067F000040020000A0000000A48000__0000003903F1CFE8\n000000067F000040020000A0000000A44000-000000067F000040020000A0000000A48000__0000003B99F7F8A0\n000000067F000040020000A0000000A44000-000000067F000040020000A0000000A48000__0000005D2FFFFB38\n000000067F000040020000A0000000A47EFA-000000067F000040020000A0000000A508E3__0000002F58DE5511-000000300885D069\n000000067F000040020000A0000000A48000-000000067F000040020000A0000000A4C000__0000002F83FFFE68\n000000067F000040020000A0000000A48000-000000067F000040020000A0000000A4C000__00000038E67ABFA0\n000000067F000040020000A0000000A48000-000000067F000040020000A0000000A4C000__0000003903F1CFE8\n000000067F000040020000A0000000A48000-000000067F000040020000A0000000A4C000__0000003B99F7F8A0\n000000067F000040020000A0000000A48000-000000067F000040020000A0000000A4C000__0000005D2FFFFB38\n000000067F000040020000A0000000A4C000-000000067F000040020000A0000000A50000__00000038E67ABFA0\n000000067F000040020000A0000000A4C000-000000067F000040020000A0000000A50000__0000003903F1CFE8\n000000067F000040020000A0000000A4C000-000000067F000040020000A0000000A50000__0000003B99F7F8A0\n000000067F000040020000A0000000A4C000-000000067F000040020000A0000000A50000__0000005D2FFFFB38\n000000067F000040020000A0000000A4C000-030000000000000000000000000000000002__0000002F83FFFE68\n000000067F000040020000A0000000A50000-000000067F000040020000A0000000A54000__00000038E67ABFA0\n000000067F000040020000A0000000A50000-000000067F000040020000A0000000A54000__0000003903F1CFE8\n000000067F000040020000A0000000A50000-000000067F000040020000A0000000A54000__0000003B99F7F8A0\n000000067F000040020000A0000000A50000-000000067F000040020000A0000000A54000__0000005D2FFFFB38\n000000067F000040020000A0000000A508E3-000000067F000040020000A0000000A592C6__0000002F58DE5511-000000300885D069\n000000067F000040020000A0000000A54000-000000067F000040020000A0000000A58000__00000038E67ABFA0\n000000067F000040020000A0000000A54000-000000067F000040020000A0000000A58000__0000003903F1CFE8\n000000067F000040020000A0000000A54000-000000067F000040020000A0000000A58000__0000003B99F7F8A0\n000000067F000040020000A0000000A54000-000000067F000040020000A0000000A58000__0000005D2FFFFB38\n000000067F000040020000A0000000A58000-000000067F000040020000A0000000A5C000__00000038E67ABFA0\n000000067F000040020000A0000000A58000-000000067F000040020000A0000000A5C000__0000003903F1CFE8\n000000067F000040020000A0000000A58000-000000067F000040020000A0000000A5C000__0000003B99F7F8A0\n000000067F000040020000A0000000A58000-000000067F000040020000A0000000A5C000__0000005D2FFFFB38\n000000067F000040020000A0000000A592C6-000000067F000040020000A0000000A61CA5__0000002F58DE5511-000000300885D069\n000000067F000040020000A0000000A5C000-000000067F000040020000A0000000A60000__00000038E67ABFA0\n000000067F000040020000A0000000A5C000-000000067F000040020000A0000000A60000__0000003903F1CFE8\n000000067F000040020000A0000000A5C000-000000067F000040020000A0000000A60000__0000003B99F7F8A0\n000000067F000040020000A0000000A5C000-000000067F000040020000A0000000A60000__0000005D2FFFFB38\n000000067F000040020000A0000000A60000-000000067F000040020000A0000000A64000__00000038E67ABFA0\n000000067F000040020000A0000000A60000-000000067F000040020000A0000000A64000__0000003903F1CFE8\n000000067F000040020000A0000000A60000-000000067F000040020000A0000000A64000__0000003B99F7F8A0\n000000067F000040020000A0000000A60000-000000067F000040020000A0000000A64000__0000005D2FFFFB38\n000000067F000040020000A0000000A61CA5-000000067F000040020000A0000000A6A68D__0000002F58DE5511-000000300885D069\n000000067F000040020000A0000000A64000-000000067F000040020000A0000000A68000__00000038E67ABFA0\n000000067F000040020000A0000000A64000-000000067F000040020000A0000000A68000__0000003903F1CFE8\n000000067F000040020000A0000000A64000-000000067F000040020000A0000000A68000__0000003B99F7F8A0\n000000067F000040020000A0000000A64000-000000067F000040020000A0000000A68000__0000005D2FFFFB38\n000000067F000040020000A0000000A68000-000000067F000040020000A0000000A6C000__00000038E67ABFA0\n000000067F000040020000A0000000A68000-000000067F000040020000A0000000A6C000__0000003903F1CFE8\n000000067F000040020000A0000000A68000-000000067F000040020000A0000000A6C000__0000003B99F7F8A0\n000000067F000040020000A0000000A68000-000000067F000040020000A0000000A6C000__0000005D2FFFFB38\n000000067F000040020000A0000000A6A68D-000000067F000040020000A0000000A73072__0000002F58DE5511-000000300885D069\n000000067F000040020000A0000000A6C000-000000067F000040020000A0000000A70000__00000038E67ABFA0\n000000067F000040020000A0000000A6C000-000000067F000040020000A0000000A70000__0000003903F1CFE8\n000000067F000040020000A0000000A6C000-000000067F000040020000A0000000A70000__0000003B99F7F8A0\n000000067F000040020000A0000000A6C000-000000067F000040020000A0000000A70000__0000005D2FFFFB38\n000000067F000040020000A0000000A70000-000000067F000040020000A0000000A74000__00000038E67ABFA0\n000000067F000040020000A0000000A70000-000000067F000040020000A0000000A74000__0000003903F1CFE8\n000000067F000040020000A0000000A70000-000000067F000040020000A0000000A74000__0000003B99F7F8A0\n000000067F000040020000A0000000A70000-000000067F000040020000A0000000A74000__0000005D2FFFFB38\n000000067F000040020000A0000000A73072-000000067F000040020000A0000000A7BA4E__0000002F58DE5511-000000300885D069\n000000067F000040020000A0000000A74000-000000067F000040020000A0000000A78000__00000038E67ABFA0\n000000067F000040020000A0000000A74000-000000067F000040020000A0000000A78000__0000003903F1CFE8\n000000067F000040020000A0000000A74000-000000067F000040020000A0000000A78000__0000003B99F7F8A0\n000000067F000040020000A0000000A74000-000000067F000040020000A0000000A78000__0000005D2FFFFB38\n000000067F000040020000A0000000A78000-000000067F000040020000A0000000A7C000__00000038E67ABFA0\n000000067F000040020000A0000000A78000-000000067F000040020000A0000000A7C000__0000003903F1CFE8\n000000067F000040020000A0000000A78000-000000067F000040020000A0000000A7C000__0000003B99F7F8A0\n000000067F000040020000A0000000A78000-000000067F000040020000A0000000A7C000__0000005D2FFFFB38\n000000067F000040020000A0000000A7BA4E-000000067F000040020000A0000000A84426__0000002F58DE5511-000000300885D069\n000000067F000040020000A0000000A7C000-000000067F000040020000A0000000A80000__00000038E67ABFA0\n000000067F000040020000A0000000A7C000-000000067F000040020000A0000000A80000__0000003903F1CFE8\n000000067F000040020000A0000000A7C000-000000067F000040020000A0000000A80000__0000003B99F7F8A0\n000000067F000040020000A0000000A7C000-000000067F000040020000A0000000A80000__0000005D2FFFFB38\n000000067F000040020000A0000000A80000-000000067F000040020000A0000000A84000__00000038E67ABFA0\n000000067F000040020000A0000000A80000-000000067F000040020000A0000000A84000__0000003903F1CFE8\n000000067F000040020000A0000000A80000-000000067F000040020000A0000000A84000__0000003B99F7F8A0\n000000067F000040020000A0000000A80000-000000067F000040020000A0000000A84000__0000005D2FFFFB38\n000000067F000040020000A0000000A84000-000000067F000040020000A0000000A88000__00000038E67ABFA0\n000000067F000040020000A0000000A84000-000000067F000040020000A0000000A88000__0000003903F1CFE8\n000000067F000040020000A0000000A84000-000000067F000040020000A0000000A88000__0000003B99F7F8A0\n000000067F000040020000A0000000A84000-000000067F000040020000A0000000A88000__0000005D2FFFFB38\n000000067F000040020000A0000000A84426-000000067F000040020000A0000000A8CDF4__0000002F58DE5511-000000300885D069\n000000067F000040020000A0000000A88000-000000067F000040020000A0000000A8C000__00000038E67ABFA0\n000000067F000040020000A0000000A88000-000000067F000040020000A0000000A8C000__0000003903F1CFE8\n000000067F000040020000A0000000A88000-000000067F000040020000A0000000A8C000__0000003B99F7F8A0\n000000067F000040020000A0000000A88000-000000067F000040020000A0000000A8C000__0000005D2FFFFB38\n000000067F000040020000A0000000A8C000-000000067F000040020000A0000000A90000__00000038E67ABFA0\n000000067F000040020000A0000000A8C000-000000067F000040020000A0000000A90000__0000003903F1CFE8\n000000067F000040020000A0000000A8C000-000000067F000040020000A0000000A90000__0000003B99F7F8A0\n000000067F000040020000A0000000A8C000-000000067F000040020000A0000000A90000__0000005D2FFFFB38\n000000067F000040020000A0000000A8CDF4-000000067F000040020000A0000000A957D8__0000002F58DE5511-000000300885D069\n000000067F000040020000A0000000A90000-000000067F000040020000A0000000A94000__00000038E67ABFA0\n000000067F000040020000A0000000A90000-000000067F000040020000A0000000A94000__0000003903F1CFE8\n000000067F000040020000A0000000A90000-000000067F000040020000A0000000A94000__0000003B99F7F8A0\n000000067F000040020000A0000000A90000-000000067F000040020000A0000000A94000__0000005D2FFFFB38\n000000067F000040020000A0000000A94000-000000067F000040020000A0000000A98000__00000038E1ABFE28\n000000067F000040020000A0000000A94000-000000067F000040020000A0000000A98000__00000038E9AF7F00\n000000067F000040020000A0000000A94000-000000067F000040020000A0000000A98000__0000003903F1CFE8\n000000067F000040020000A0000000A94000-000000067F000040020000A0000000A98000__0000003B99F7F8A0\n000000067F000040020000A0000000A94000-000000067F000040020000A0000000A98000__0000005D2FFFFB38\n000000067F000040020000A0000000A957D8-000000067F000040020000A0000200000000__0000002F58DE5511-000000300885D069\n000000067F000040020000A0000000A95A7F-000000067F000040020000A0000000A9E45C__000000300885D069-00000030B82DF289\n000000067F000040020000A0000000A98000-000000067F000040020000A0000000A9C000__00000038E1ABFE28\n000000067F000040020000A0000000A98000-000000067F000040020000A0000000A9C000__00000038E9AF7F00\n000000067F000040020000A0000000A98000-000000067F000040020000A0000000A9C000__0000003903F1CFE8\n000000067F000040020000A0000000A98000-000000067F000040020000A0000000A9C000__0000003B99F7F8A0\n000000067F000040020000A0000000A98000-000000067F000040020000A0000000A9C000__0000005D2FFFFB38\n000000067F000040020000A0000000A9C000-000000067F000040020000A0000000AA0000__00000038E1ABFE28\n000000067F000040020000A0000000A9C000-000000067F000040020000A0000000AA0000__00000038E9AF7F00\n000000067F000040020000A0000000A9C000-000000067F000040020000A0000000AA0000__0000003903F1CFE8\n000000067F000040020000A0000000A9C000-000000067F000040020000A0000000AA0000__0000003B99F7F8A0\n000000067F000040020000A0000000A9C000-000000067F000040020000A0000000AA0000__0000005D2FFFFB38\n000000067F000040020000A0000000A9E45C-000000067F000040020000A0000000AA6E3F__000000300885D069-00000030B82DF289\n000000067F000040020000A0000000AA0000-000000067F000040020000A0000000AA4000__00000038E1ABFE28\n000000067F000040020000A0000000AA0000-000000067F000040020000A0000000AA4000__00000038E9AF7F00\n000000067F000040020000A0000000AA0000-000000067F000040020000A0000000AA4000__0000003903F1CFE8\n000000067F000040020000A0000000AA0000-000000067F000040020000A0000000AA4000__0000003B99F7F8A0\n000000067F000040020000A0000000AA0000-000000067F000040020000A0000000AA4000__0000005D2FFFFB38\n000000067F000040020000A0000000AA4000-000000067F000040020000A0000000AA8000__00000038E1ABFE28\n000000067F000040020000A0000000AA4000-000000067F000040020000A0000000AA8000__00000038E9AF7F00\n000000067F000040020000A0000000AA4000-000000067F000040020000A0000000AA8000__0000003903F1CFE8\n000000067F000040020000A0000000AA4000-000000067F000040020000A0000000AA8000__0000003B99F7F8A0\n000000067F000040020000A0000000AA4000-000000067F000040020000A0000000AA8000__0000005D2FFFFB38\n000000067F000040020000A0000000AA6E3F-000000067F000040020000A0000000AAF81B__000000300885D069-00000030B82DF289\n000000067F000040020000A0000000AA8000-000000067F000040020000A0000000AAC000__00000038E1ABFE28\n000000067F000040020000A0000000AA8000-000000067F000040020000A0000000AAC000__00000038E9AF7F00\n000000067F000040020000A0000000AA8000-000000067F000040020000A0000000AAC000__0000003903F1CFE8\n000000067F000040020000A0000000AA8000-000000067F000040020000A0000000AAC000__0000003B99F7F8A0\n000000067F000040020000A0000000AA8000-000000067F000040020000A0000000AAC000__0000005D2FFFFB38\n000000067F000040020000A0000000AAC000-000000067F000040020000A0000000AB0000__00000038E1ABFE28\n000000067F000040020000A0000000AAC000-000000067F000040020000A0000000AB0000__00000038E9AF7F00\n000000067F000040020000A0000000AAC000-000000067F000040020000A0000000AB0000__0000003903F1CFE8\n000000067F000040020000A0000000AAC000-000000067F000040020000A0000000AB0000__0000003B99F7F8A0\n000000067F000040020000A0000000AAC000-000000067F000040020000A0000000AB0000__0000005D2FFFFB38\n000000067F000040020000A0000000AAF81B-000000067F000040020000A0000000AB81F8__000000300885D069-00000030B82DF289\n000000067F000040020000A0000000AB0000-000000067F000040020000A0000000AB4000__00000038E1ABFE28\n000000067F000040020000A0000000AB0000-000000067F000040020000A0000000AB4000__00000038E9AF7F00\n000000067F000040020000A0000000AB0000-000000067F000040020000A0000000AB4000__0000003903F1CFE8\n000000067F000040020000A0000000AB0000-000000067F000040020000A0000000AB4000__0000003B99F7F8A0\n000000067F000040020000A0000000AB0000-000000067F000040020000A0000000AB4000__0000005D2FFFFB38\n000000067F000040020000A0000000AB4000-000000067F000040020000A0000000AB8000__00000038E1ABFE28\n000000067F000040020000A0000000AB4000-000000067F000040020000A0000000AB8000__00000038E9AF7F00\n000000067F000040020000A0000000AB4000-000000067F000040020000A0000000AB8000__0000003903F1CFE8\n000000067F000040020000A0000000AB4000-000000067F000040020000A0000000AB8000__0000003B99F7F8A0\n000000067F000040020000A0000000AB4000-000000067F000040020000A0000000AB8000__0000005D2FFFFB38\n000000067F000040020000A0000000AB8000-000000067F000040020000A0000000ABC000__00000038E1ABFE28\n000000067F000040020000A0000000AB8000-000000067F000040020000A0000000ABC000__00000038E9AF7F00\n000000067F000040020000A0000000AB8000-000000067F000040020000A0000000ABC000__0000003903F1CFE8\n000000067F000040020000A0000000AB8000-000000067F000040020000A0000000ABC000__0000003B99F7F8A0\n000000067F000040020000A0000000AB8000-000000067F000040020000A0000000ABC000__0000005D2FFFFB38\n000000067F000040020000A0000000AB81F8-000000067F000040020000A0000000AC0BE2__000000300885D069-00000030B82DF289\n000000067F000040020000A0000000ABC000-000000067F000040020000A0000000AC0000__00000038E1ABFE28\n000000067F000040020000A0000000ABC000-000000067F000040020000A0000000AC0000__00000038E9AF7F00\n000000067F000040020000A0000000ABC000-000000067F000040020000A0000000AC0000__0000003903F1CFE8\n000000067F000040020000A0000000ABC000-000000067F000040020000A0000000AC0000__0000003B99F7F8A0\n000000067F000040020000A0000000ABC000-000000067F000040020000A0000000AC0000__0000005D2FFFFB38\n000000067F000040020000A0000000AC0000-000000067F000040020000A0000000AC4000__00000038E1ABFE28\n000000067F000040020000A0000000AC0000-000000067F000040020000A0000000AC4000__00000038E9AF7F00\n000000067F000040020000A0000000AC0000-000000067F000040020000A0000000AC4000__0000003903F1CFE8\n000000067F000040020000A0000000AC0000-000000067F000040020000A0000000AC4000__0000003B99F7F8A0\n000000067F000040020000A0000000AC0000-000000067F000040020000A0000000AC4000__0000005D2FFFFB38\n000000067F000040020000A0000000AC0BE2-000000067F000040020000A0000000AC95C0__000000300885D069-00000030B82DF289\n000000067F000040020000A0000000AC4000-000000067F000040020000A0000000AC8000__00000038E1ABFE28\n000000067F000040020000A0000000AC4000-000000067F000040020000A0000000AC8000__00000038E9AF7F00\n000000067F000040020000A0000000AC4000-000000067F000040020000A0000000AC8000__0000003903F1CFE8\n000000067F000040020000A0000000AC4000-000000067F000040020000A0000000AC8000__0000003B99F7F8A0\n000000067F000040020000A0000000AC4000-000000067F000040020000A0000000AC8000__0000005D2FFFFB38\n000000067F000040020000A0000000AC8000-000000067F000040020000A0000000ACC000__00000038E1ABFE28\n000000067F000040020000A0000000AC8000-000000067F000040020000A0000000ACC000__00000038E9AF7F00\n000000067F000040020000A0000000AC8000-000000067F000040020000A0000000ACC000__0000003903F1CFE8\n000000067F000040020000A0000000AC8000-000000067F000040020000A0000000ACC000__0000003B99F7F8A0\n000000067F000040020000A0000000AC8000-000000067F000040020000A0000000ACC000__0000005D2FFFFB38\n000000067F000040020000A0000000AC95C0-000000067F000040020000A0000000AD1F9F__000000300885D069-00000030B82DF289\n000000067F000040020000A0000000ACC000-000000067F000040020000A0000000AD0000__00000038E1ABFE28\n000000067F000040020000A0000000ACC000-000000067F000040020000A0000000AD0000__00000038E9AF7F00\n000000067F000040020000A0000000ACC000-000000067F000040020000A0000000AD0000__0000003903F1CFE8\n000000067F000040020000A0000000ACC000-000000067F000040020000A0000000AD0000__0000003B99F7F8A0\n000000067F000040020000A0000000ACC000-000000067F000040020000A0000000AD0000__0000005D2FFFFB38\n000000067F000040020000A0000000AD0000-000000067F000040020000A0000000AD4000__00000038E1ABFE28\n000000067F000040020000A0000000AD0000-000000067F000040020000A0000000AD4000__00000038E9AF7F00\n000000067F000040020000A0000000AD0000-000000067F000040020000A0000000AD4000__0000003903F1CFE8\n000000067F000040020000A0000000AD0000-000000067F000040020000A0000000AD4000__0000003B99F7F8A0\n000000067F000040020000A0000000AD0000-000000067F000040020000A0000000AD4000__0000005D2FFFFB38\n000000067F000040020000A0000000AD1F9F-000000067F000040020000A0000000ADA983__000000300885D069-00000030B82DF289\n000000067F000040020000A0000000AD4000-000000067F000040020000A0000000AD8000__00000038E1ABFE28\n000000067F000040020000A0000000AD4000-000000067F000040020000A0000000AD8000__00000038E9AF7F00\n000000067F000040020000A0000000AD4000-000000067F000040020000A0000000AD8000__0000003903F1CFE8\n000000067F000040020000A0000000AD4000-000000067F000040020000A0000000AD8000__0000003B99F7F8A0\n000000067F000040020000A0000000AD4000-000000067F000040020000A0000000AD8000__0000005D2FFFFB38\n000000067F000040020000A0000000AD8000-000000067F000040020000A0000000ADC000__00000038E1ABFE28\n000000067F000040020000A0000000AD8000-000000067F000040020000A0000000ADC000__00000038E9AF7F00\n000000067F000040020000A0000000AD8000-000000067F000040020000A0000000ADC000__0000003903F1CFE8\n000000067F000040020000A0000000AD8000-000000067F000040020000A0000000ADC000__0000003B99F7F8A0\n000000067F000040020000A0000000AD8000-000000067F000040020000A0000000ADC000__0000005D2FFFFB38\n000000067F000040020000A0000000ADA983-000000067F000040020000A0000000AE3365__000000300885D069-00000030B82DF289\n000000067F000040020000A0000000ADC000-000000067F000040020000A0000000AE0000__00000038E1ABFE28\n000000067F000040020000A0000000ADC000-000000067F000040020000A0000000AE0000__00000038E9AF7F00\n000000067F000040020000A0000000ADC000-000000067F000040020000A0000000AE0000__0000003903F1CFE8\n000000067F000040020000A0000000ADC000-000000067F000040020000A0000000AE0000__0000003B99F7F8A0\n000000067F000040020000A0000000ADC000-000000067F000040020000A0000000AE0000__0000005D2FFFFB38\n000000067F000040020000A0000000AE0000-000000067F000040020000A0000000AE4000__00000038E1ABFE28\n000000067F000040020000A0000000AE0000-000000067F000040020000A0000000AE4000__00000038E9AF7F00\n000000067F000040020000A0000000AE0000-000000067F000040020000A0000000AE4000__0000003903F1CFE8\n000000067F000040020000A0000000AE0000-000000067F000040020000A0000000AE4000__0000003B99F7F8A0\n000000067F000040020000A0000000AE0000-000000067F000040020000A0000000AE4000__0000005D2FFFFB38\n000000067F000040020000A0000000AE3365-000000067F000040020000A0000000AEBD39__000000300885D069-00000030B82DF289\n000000067F000040020000A0000000AE4000-000000067F000040020000A0000000AE8000__00000038E1ABFE28\n000000067F000040020000A0000000AE4000-000000067F000040020000A0000000AE8000__00000038E9AF7F00\n000000067F000040020000A0000000AE4000-000000067F000040020000A0000000AE8000__0000003903F1CFE8\n000000067F000040020000A0000000AE4000-000000067F000040020000A0000000AE8000__0000003B99F7F8A0\n000000067F000040020000A0000000AE4000-000000067F000040020000A0000000AE8000__0000005D2FFFFB38\n000000067F000040020000A0000000AE8000-000000067F000040020000A0000000AEC000__00000038E1ABFE28\n000000067F000040020000A0000000AE8000-000000067F000040020000A0000000AEC000__00000038E9AF7F00\n000000067F000040020000A0000000AE8000-000000067F000040020000A0000000AEC000__0000003903F1CFE8\n000000067F000040020000A0000000AE8000-000000067F000040020000A0000000AEC000__0000003B99F7F8A0\n000000067F000040020000A0000000AE8000-000000067F000040020000A0000000AEC000__0000005D2FFFFB38\n000000067F000040020000A0000000AEBD39-000000067F000040020000A0000000AF4712__000000300885D069-00000030B82DF289\n000000067F000040020000A0000000AEC000-000000067F000040020000A0000000AF0000__00000038E1ABFE28\n000000067F000040020000A0000000AEC000-000000067F000040020000A0000000AF0000__00000038E9AF7F00\n000000067F000040020000A0000000AEC000-000000067F000040020000A0000000AF0000__0000003903F1CFE8\n000000067F000040020000A0000000AEC000-000000067F000040020000A0000000AF0000__0000003B99F7F8A0\n000000067F000040020000A0000000AEC000-000000067F000040020000A0000000AF0000__0000005D2FFFFB38\n000000067F000040020000A0000000AF0000-000000067F000040020000A0000000AF4000__00000038E1ABFE28\n000000067F000040020000A0000000AF0000-000000067F000040020000A0000000AF4000__00000038E9AF7F00\n000000067F000040020000A0000000AF0000-000000067F000040020000A0000000AF4000__0000003903F1CFE8\n000000067F000040020000A0000000AF0000-000000067F000040020000A0000000AF4000__0000003B99F7F8A0\n000000067F000040020000A0000000AF0000-000000067F000040020000A0000000AF4000__0000005D2FFFFB38\n000000067F000040020000A0000000AF4000-000000067F000040020000A0000000AF8000__00000031853FEA98\n000000067F000040020000A0000000AF4000-000000067F000040020000A0000000AF8000__00000038E9AF7F00\n000000067F000040020000A0000000AF4000-000000067F000040020000A0000000AF8000__0000003903F1CFE8\n000000067F000040020000A0000000AF4000-000000067F000040020000A0000000AF8000__0000003B99F7F8A0\n000000067F000040020000A0000000AF4000-000000067F000040020000A0000000AF8000__0000005D2FFFFB38\n000000067F000040020000A0000000AF4712-000000067F000040020000A0000200000000__000000300885D069-00000030B82DF289\n000000067F000040020000A0000000AF4908-000000067F000040020000A0000000AFD2DF__00000030B82DF289-0000003157DDD551\n000000067F000040020000A0000000AF8000-000000067F000040020000A0000000AFC000__00000031853FEA98\n000000067F000040020000A0000000AF8000-000000067F000040020000A0000000AFC000__00000038E9AF7F00\n000000067F000040020000A0000000AF8000-000000067F000040020000A0000000AFC000__0000003903F1CFE8\n000000067F000040020000A0000000AF8000-000000067F000040020000A0000000AFC000__0000003B99F7F8A0\n000000067F000040020000A0000000AF8000-000000067F000040020000A0000000AFC000__0000005D2FFFFB38\n000000067F000040020000A0000000AFC000-000000067F000040020000A0000000B00000__00000031853FEA98\n000000067F000040020000A0000000AFC000-000000067F000040020000A0000000B00000__00000038E9AF7F00\n000000067F000040020000A0000000AFC000-000000067F000040020000A0000000B00000__0000003903F1CFE8\n000000067F000040020000A0000000AFC000-000000067F000040020000A0000000B00000__0000003B99F7F8A0\n000000067F000040020000A0000000AFC000-000000067F000040020000A0000000B00000__0000005D2FFFFB38\n000000067F000040020000A0000000AFD2DF-000000067F000040020000A0000000B05CBB__00000030B82DF289-0000003157DDD551\n000000067F000040020000A0000000B00000-000000067F000040020000A0000000B04000__00000031853FEA98\n000000067F000040020000A0000000B00000-000000067F000040020000A0000000B04000__00000038E9AF7F00\n000000067F000040020000A0000000B00000-000000067F000040020000A0000000B04000__0000003903F1CFE8\n000000067F000040020000A0000000B00000-000000067F000040020000A0000000B04000__0000003B99F7F8A0\n000000067F000040020000A0000000B00000-000000067F000040020000A0000000B04000__0000005D2FFFFB38\n000000067F000040020000A0000000B04000-000000067F000040020000A0000000B08000__00000031853FEA98\n000000067F000040020000A0000000B04000-000000067F000040020000A0000000B08000__00000038E9AF7F00\n000000067F000040020000A0000000B04000-000000067F000040020000A0000000B08000__0000003903F1CFE8\n000000067F000040020000A0000000B04000-000000067F000040020000A0000000B08000__0000003B99F7F8A0\n000000067F000040020000A0000000B04000-000000067F000040020000A0000000B08000__0000005D2FFFFB38\n000000067F000040020000A0000000B05CBB-000000067F000040020000A0000000B0E6A0__00000030B82DF289-0000003157DDD551\n000000067F000040020000A0000000B08000-000000067F000040020000A0000000B0C000__00000031853FEA98\n000000067F000040020000A0000000B08000-000000067F000040020000A0000000B0C000__00000038E9AF7F00\n000000067F000040020000A0000000B08000-000000067F000040020000A0000000B0C000__0000003903F1CFE8\n000000067F000040020000A0000000B08000-000000067F000040020000A0000000B0C000__0000003B99F7F8A0\n000000067F000040020000A0000000B08000-000000067F000040020000A0000000B0C000__0000005D2FFFFB38\n000000067F000040020000A0000000B0C000-000000067F000040020000A0000000B10000__00000031853FEA98\n000000067F000040020000A0000000B0C000-000000067F000040020000A0000000B10000__00000038E9AF7F00\n000000067F000040020000A0000000B0C000-000000067F000040020000A0000000B10000__0000003903F1CFE8\n000000067F000040020000A0000000B0C000-000000067F000040020000A0000000B10000__0000003B99F7F8A0\n000000067F000040020000A0000000B0C000-000000067F000040020000A0000000B10000__0000005D2FFFFB38\n000000067F000040020000A0000000B0E6A0-000000067F000040020000A0000000B1707D__00000030B82DF289-0000003157DDD551\n000000067F000040020000A0000000B10000-000000067F000040020000A0000000B14000__00000031853FEA98\n000000067F000040020000A0000000B10000-000000067F000040020000A0000000B14000__00000038E9AF7F00\n000000067F000040020000A0000000B10000-000000067F000040020000A0000000B14000__0000003903F1CFE8\n000000067F000040020000A0000000B10000-000000067F000040020000A0000000B14000__0000003B99F7F8A0\n000000067F000040020000A0000000B10000-000000067F000040020000A0000000B14000__0000005D2FFFFB38\n000000067F000040020000A0000000B14000-000000067F000040020000A0000000B18000__00000031853FEA98\n000000067F000040020000A0000000B14000-000000067F000040020000A0000000B18000__00000038E9AF7F00\n000000067F000040020000A0000000B14000-000000067F000040020000A0000000B18000__0000003903F1CFE8\n000000067F000040020000A0000000B14000-000000067F000040020000A0000000B18000__0000003B99F7F8A0\n000000067F000040020000A0000000B14000-000000067F000040020000A0000000B18000__0000005D2FFFFB38\n000000067F000040020000A0000000B1707D-000000067F000040020000A0000000B1FA5F__00000030B82DF289-0000003157DDD551\n000000067F000040020000A0000000B18000-000000067F000040020000A0000000B1C000__00000031853FEA98\n000000067F000040020000A0000000B18000-000000067F000040020000A0000000B1C000__00000038E9AF7F00\n000000067F000040020000A0000000B18000-000000067F000040020000A0000000B1C000__0000003903F1CFE8\n000000067F000040020000A0000000B18000-000000067F000040020000A0000000B1C000__0000003B99F7F8A0\n000000067F000040020000A0000000B18000-000000067F000040020000A0000000B1C000__0000005D2FFFFB38\n000000067F000040020000A0000000B1C000-000000067F000040020000A0000000B20000__00000031853FEA98\n000000067F000040020000A0000000B1C000-000000067F000040020000A0000000B20000__00000038E9AF7F00\n000000067F000040020000A0000000B1C000-000000067F000040020000A0000000B20000__0000003903F1CFE8\n000000067F000040020000A0000000B1C000-000000067F000040020000A0000000B20000__0000003B99F7F8A0\n000000067F000040020000A0000000B1C000-000000067F000040020000A0000000B20000__0000005D2FFFFB38\n000000067F000040020000A0000000B1FA5F-000000067F000040020000A0000000B28438__00000030B82DF289-0000003157DDD551\n000000067F000040020000A0000000B20000-000000067F000040020000A0000000B24000__00000031853FEA98\n000000067F000040020000A0000000B20000-000000067F000040020000A0000000B24000__00000038E9AF7F00\n000000067F000040020000A0000000B20000-000000067F000040020000A0000000B24000__0000003903F1CFE8\n000000067F000040020000A0000000B20000-000000067F000040020000A0000000B24000__0000003B99F7F8A0\n000000067F000040020000A0000000B20000-000000067F000040020000A0000000B24000__0000005D2FFFFB38\n000000067F000040020000A0000000B24000-000000067F000040020000A0000000B28000__00000031853FEA98\n000000067F000040020000A0000000B24000-000000067F000040020000A0000000B28000__00000038E9AF7F00\n000000067F000040020000A0000000B24000-000000067F000040020000A0000000B28000__0000003903F1CFE8\n000000067F000040020000A0000000B24000-000000067F000040020000A0000000B28000__0000003B99F7F8A0\n000000067F000040020000A0000000B24000-000000067F000040020000A0000000B28000__0000005D2FFFFB38\n000000067F000040020000A0000000B28000-000000067F000040020000A0000000B2C000__00000031853FEA98\n000000067F000040020000A0000000B28000-000000067F000040020000A0000000B2C000__00000038E9AF7F00\n000000067F000040020000A0000000B28000-000000067F000040020000A0000000B2C000__0000003903F1CFE8\n000000067F000040020000A0000000B28000-000000067F000040020000A0000000B2C000__0000003B99F7F8A0\n000000067F000040020000A0000000B28000-000000067F000040020000A0000000B2C000__0000005D2FFFFB38\n000000067F000040020000A0000000B28438-000000067F000040020000A0000000B30E0A__00000030B82DF289-0000003157DDD551\n000000067F000040020000A0000000B2C000-000000067F000040020000A0000000B30000__00000031853FEA98\n000000067F000040020000A0000000B2C000-000000067F000040020000A0000000B30000__00000038E9AF7F00\n000000067F000040020000A0000000B2C000-000000067F000040020000A0000000B30000__0000003903F1CFE8\n000000067F000040020000A0000000B2C000-000000067F000040020000A0000000B30000__0000003B99F7F8A0\n000000067F000040020000A0000000B2C000-000000067F000040020000A0000000B30000__0000005D2FFFFB38\n000000067F000040020000A0000000B30000-000000067F000040020000A0000000B34000__00000031853FEA98\n000000067F000040020000A0000000B30000-000000067F000040020000A0000000B34000__00000038E9AF7F00\n000000067F000040020000A0000000B30000-000000067F000040020000A0000000B34000__0000003903F1CFE8\n000000067F000040020000A0000000B30000-000000067F000040020000A0000000B34000__0000003B99F7F8A0\n000000067F000040020000A0000000B30000-000000067F000040020000A0000000B34000__0000005D2FFFFB38\n000000067F000040020000A0000000B30E0A-000000067F000040020000A0000000B397D4__00000030B82DF289-0000003157DDD551\n000000067F000040020000A0000000B34000-000000067F000040020000A0000000B38000__00000031853FEA98\n000000067F000040020000A0000000B34000-000000067F000040020000A0000000B38000__00000038E9AF7F00\n000000067F000040020000A0000000B34000-000000067F000040020000A0000000B38000__0000003903F1CFE8\n000000067F000040020000A0000000B34000-000000067F000040020000A0000000B38000__0000003B99F7F8A0\n000000067F000040020000A0000000B34000-000000067F000040020000A0000000B38000__0000005D2FFFFB38\n000000067F000040020000A0000000B38000-000000067F000040020000A0000000B3C000__00000031853FEA98\n000000067F000040020000A0000000B38000-000000067F000040020000A0000000B3C000__00000038E9AF7F00\n000000067F000040020000A0000000B38000-000000067F000040020000A0000000B3C000__0000003903F1CFE8\n000000067F000040020000A0000000B38000-000000067F000040020000A0000000B3C000__0000003B99F7F8A0\n000000067F000040020000A0000000B38000-000000067F000040020000A0000000B3C000__0000005D2FFFFB38\n000000067F000040020000A0000000B397D4-000000067F000040020000A0000000B421B1__00000030B82DF289-0000003157DDD551\n000000067F000040020000A0000000B3C000-000000067F000040020000A0000000B40000__00000031853FEA98\n000000067F000040020000A0000000B3C000-000000067F000040020000A0000000B40000__00000038E9AF7F00\n000000067F000040020000A0000000B3C000-000000067F000040020000A0000000B40000__0000003903F1CFE8\n000000067F000040020000A0000000B3C000-000000067F000040020000A0000000B40000__0000003B99F7F8A0\n000000067F000040020000A0000000B3C000-000000067F000040020000A0000000B40000__0000005D2FFFFB38\n000000067F000040020000A0000000B40000-000000067F000040020000A0000000B44000__00000031853FEA98\n000000067F000040020000A0000000B40000-000000067F000040020000A0000000B44000__00000038E9AF7F00\n000000067F000040020000A0000000B40000-000000067F000040020000A0000000B44000__0000003903F1CFE8\n000000067F000040020000A0000000B40000-000000067F000040020000A0000000B44000__0000003B99F7F8A0\n000000067F000040020000A0000000B40000-000000067F000040020000A0000000B44000__0000005D2FFFFB38\n000000067F000040020000A0000000B421B1-000000067F000040020000A0000000B4AB8F__00000030B82DF289-0000003157DDD551\n000000067F000040020000A0000000B44000-000000067F000040020000A0000000B48000__00000031853FEA98\n000000067F000040020000A0000000B44000-000000067F000040020000A0000000B48000__00000038E9AF7F00\n000000067F000040020000A0000000B44000-000000067F000040020000A0000000B48000__0000003903F1CFE8\n000000067F000040020000A0000000B44000-000000067F000040020000A0000000B48000__0000003B99F7F8A0\n000000067F000040020000A0000000B44000-000000067F000040020000A0000000B48000__0000005D2FFFFB38\n000000067F000040020000A0000000B48000-000000067F000040020000A0000000B4C000__00000031853FEA98\n000000067F000040020000A0000000B48000-000000067F000040020000A0000000B4C000__00000038E67ABFA0\n000000067F000040020000A0000000B48000-000000067F000040020000A0000000B4C000__0000003903F1CFE8\n000000067F000040020000A0000000B48000-000000067F000040020000A0000000B4C000__0000003B99F7F8A0\n000000067F000040020000A0000000B48000-000000067F000040020000A0000000B4C000__0000005D2FFFFB38\n000000067F000040020000A0000000B4AB8F-000000067F000040020000A0000200000000__00000030B82DF289-0000003157DDD551\n000000067F000040020000A0000000B4AE08-000000067F000040020000A0000000B537DE__0000003157DDD551-00000031F78DF129\n000000067F000040020000A0000000B4C000-000000067F000040020000A0000000B50000__00000031853FEA98\n000000067F000040020000A0000000B4C000-000000067F000040020000A0000000B50000__00000038E67ABFA0\n000000067F000040020000A0000000B4C000-000000067F000040020000A0000000B50000__0000003903F1CFE8\n000000067F000040020000A0000000B4C000-000000067F000040020000A0000000B50000__0000003B99F7F8A0\n000000067F000040020000A0000000B4C000-000000067F000040020000A0000000B50000__0000005D2FFFFB38\n000000067F000040020000A0000000B50000-000000067F000040020000A0000000B54000__00000031853FEA98\n000000067F000040020000A0000000B50000-000000067F000040020000A0000000B54000__00000038E67ABFA0\n000000067F000040020000A0000000B50000-000000067F000040020000A0000000B54000__0000003903F1CFE8\n000000067F000040020000A0000000B50000-000000067F000040020000A0000000B54000__0000003B99F7F8A0\n000000067F000040020000A0000000B50000-000000067F000040020000A0000000B54000__0000005D2FFFFB38\n000000067F000040020000A0000000B537DE-000000067F000040020000A0000000B5C1C1__0000003157DDD551-00000031F78DF129\n000000067F000040020000A0000000B54000-000000067F000040020000A0000000B58000__00000031853FEA98\n000000067F000040020000A0000000B54000-000000067F000040020000A0000000B58000__00000038E67ABFA0\n000000067F000040020000A0000000B54000-000000067F000040020000A0000000B58000__0000003903F1CFE8\n000000067F000040020000A0000000B54000-000000067F000040020000A0000000B58000__0000003B99F7F8A0\n000000067F000040020000A0000000B54000-000000067F000040020000A0000000B58000__0000005D2FFFFB38\n000000067F000040020000A0000000B58000-000000067F000040020000A0000000B5C000__00000031853FEA98\n000000067F000040020000A0000000B58000-000000067F000040020000A0000000B5C000__00000038E67ABFA0\n000000067F000040020000A0000000B58000-000000067F000040020000A0000000B5C000__0000003903F1CFE8\n000000067F000040020000A0000000B58000-000000067F000040020000A0000000B5C000__0000003B99F7F8A0\n000000067F000040020000A0000000B58000-000000067F000040020000A0000000B5C000__0000005D2FFFFB38\n000000067F000040020000A0000000B5C000-000000067F000040020000A0000000B60000__00000031853FEA98\n000000067F000040020000A0000000B5C000-000000067F000040020000A0000000B60000__00000038E67ABFA0\n000000067F000040020000A0000000B5C000-000000067F000040020000A0000000B60000__0000003903F1CFE8\n000000067F000040020000A0000000B5C000-000000067F000040020000A0000000B60000__0000003B99F7F8A0\n000000067F000040020000A0000000B5C000-000000067F000040020000A0000000B60000__0000005D2FFFFB38\n000000067F000040020000A0000000B5C1C1-000000067F000040020000A0000000B64BA0__0000003157DDD551-00000031F78DF129\n000000067F000040020000A0000000B60000-000000067F000040020000A0000000B635E2__00000031853FEA98\n000000067F000040020000A0000000B60000-000000067F000040020000A0000000B64000__00000031C7B24AB0\n000000067F000040020000A0000000B60000-000000067F000040020000A0000000B64000__00000038E67ABFA0\n000000067F000040020000A0000000B60000-000000067F000040020000A0000000B64000__0000003903F1CFE8\n000000067F000040020000A0000000B60000-000000067F000040020000A0000000B64000__0000003B99F7F8A0\n000000067F000040020000A0000000B60000-000000067F000040020000A0000000B64000__0000005D2FFFFB38\n000000067F000040020000A0000000B64000-000000067F000040020000A0000000B68000__00000031C7B24AB0\n000000067F000040020000A0000000B64000-000000067F000040020000A0000000B68000__00000038E67ABFA0\n000000067F000040020000A0000000B64000-000000067F000040020000A0000000B68000__0000003903F1CFE8\n000000067F000040020000A0000000B64000-000000067F000040020000A0000000B68000__0000003B99F7F8A0\n000000067F000040020000A0000000B64000-000000067F000040020000A0000000B68000__0000005D2FFFFB38\n000000067F000040020000A0000000B64BA0-000000067F000040020000A0000000B6D57C__0000003157DDD551-00000031F78DF129\n000000067F000040020000A0000000B68000-000000067F000040020000A0000000B6C000__00000031C7B24AB0\n000000067F000040020000A0000000B68000-000000067F000040020000A0000000B6C000__00000038E67ABFA0\n000000067F000040020000A0000000B68000-000000067F000040020000A0000000B6C000__0000003903F1CFE8\n000000067F000040020000A0000000B68000-000000067F000040020000A0000000B6C000__0000003B99F7F8A0\n000000067F000040020000A0000000B68000-000000067F000040020000A0000000B6C000__0000005D2FFFFB38\n000000067F000040020000A0000000B6C000-000000067F000040020000A0000000B70000__00000031C7B24AB0\n000000067F000040020000A0000000B6C000-000000067F000040020000A0000000B70000__00000038E67ABFA0\n000000067F000040020000A0000000B6C000-000000067F000040020000A0000000B70000__0000003903F1CFE8\n000000067F000040020000A0000000B6C000-000000067F000040020000A0000000B70000__0000003B99F7F8A0\n000000067F000040020000A0000000B6C000-000000067F000040020000A0000000B70000__0000005D2FFFFB38\n000000067F000040020000A0000000B6D57C-000000067F000040020000A0000000B75F57__0000003157DDD551-00000031F78DF129\n000000067F000040020000A0000000B70000-000000067F000040020000A0000000B74000__00000031C7B24AB0\n000000067F000040020000A0000000B70000-000000067F000040020000A0000000B74000__00000038E67ABFA0\n000000067F000040020000A0000000B70000-000000067F000040020000A0000000B74000__0000003903F1CFE8\n000000067F000040020000A0000000B70000-000000067F000040020000A0000000B74000__0000003B99F7F8A0\n000000067F000040020000A0000000B70000-000000067F000040020000A0000000B74000__0000005D2FFFFB38\n000000067F000040020000A0000000B74000-000000067F000040020000A0000000B78000__00000031C7B24AB0\n000000067F000040020000A0000000B74000-000000067F000040020000A0000000B78000__00000038E67ABFA0\n000000067F000040020000A0000000B74000-000000067F000040020000A0000000B78000__0000003903F1CFE8\n000000067F000040020000A0000000B74000-000000067F000040020000A0000000B78000__0000003B99F7F8A0\n000000067F000040020000A0000000B74000-000000067F000040020000A0000000B78000__0000005D2FFFFB38\n000000067F000040020000A0000000B75F57-000000067F000040020000A0000000B7E928__0000003157DDD551-00000031F78DF129\n000000067F000040020000A0000000B78000-000000067F000040020000A0000000B7C000__00000031C7B24AB0\n000000067F000040020000A0000000B78000-000000067F000040020000A0000000B7C000__00000038E67ABFA0\n000000067F000040020000A0000000B78000-000000067F000040020000A0000000B7C000__0000003903F1CFE8\n000000067F000040020000A0000000B78000-000000067F000040020000A0000000B7C000__0000003B99F7F8A0\n000000067F000040020000A0000000B78000-000000067F000040020000A0000000B7C000__0000005D2FFFFB38\n000000067F000040020000A0000000B7C000-000000067F000040020000A0000000B80000__00000031C7B24AB0\n000000067F000040020000A0000000B7C000-000000067F000040020000A0000000B80000__00000038E67ABFA0\n000000067F000040020000A0000000B7C000-000000067F000040020000A0000000B80000__0000003903F1CFE8\n000000067F000040020000A0000000B7C000-000000067F000040020000A0000000B80000__0000003B99F7F8A0\n000000067F000040020000A0000000B7C000-000000067F000040020000A0000000B80000__0000005D2FFFFB38\n000000067F000040020000A0000000B7E928-000000067F000040020000A0000000B872FE__0000003157DDD551-00000031F78DF129\n000000067F000040020000A0000000B80000-000000067F000040020000A0000000B84000__00000031C7B24AB0\n000000067F000040020000A0000000B80000-000000067F000040020000A0000000B84000__00000038E67ABFA0\n000000067F000040020000A0000000B80000-000000067F000040020000A0000000B84000__0000003903F1CFE8\n000000067F000040020000A0000000B80000-000000067F000040020000A0000000B84000__0000003B99F7F8A0\n000000067F000040020000A0000000B80000-000000067F000040020000A0000000B84000__0000005D2FFFFB38\n000000067F000040020000A0000000B84000-000000067F000040020000A0000000B87482__00000031C7B24AB0\n000000067F000040020000A0000000B84000-000000067F000040020000A0000000B88000__00000031EA7FFF60\n000000067F000040020000A0000000B84000-000000067F000040020000A0000000B88000__00000038E67ABFA0\n000000067F000040020000A0000000B84000-000000067F000040020000A0000000B88000__0000003903F1CFE8\n000000067F000040020000A0000000B84000-000000067F000040020000A0000000B88000__0000003B99F7F8A0\n000000067F000040020000A0000000B84000-000000067F000040020000A0000000B88000__0000005D2FFFFB38\n000000067F000040020000A0000000B872FE-000000067F000040020000A0000000B8FCED__0000003157DDD551-00000031F78DF129\n000000067F000040020000A0000000B88000-000000067F000040020000A0000000B8C000__00000031EA7FFF60\n000000067F000040020000A0000000B88000-000000067F000040020000A0000000B8C000__00000038E67ABFA0\n000000067F000040020000A0000000B88000-000000067F000040020000A0000000B8C000__0000003903F1CFE8\n000000067F000040020000A0000000B88000-000000067F000040020000A0000000B8C000__0000003B99F7F8A0\n000000067F000040020000A0000000B88000-000000067F000040020000A0000000B8C000__0000005D2FFFFB38\n000000067F000040020000A0000000B8C000-000000067F000040020000A0000000B90000__00000031EA7FFF60\n000000067F000040020000A0000000B8C000-000000067F000040020000A0000000B90000__00000038E67ABFA0\n000000067F000040020000A0000000B8C000-000000067F000040020000A0000000B90000__0000003903F1CFE8\n000000067F000040020000A0000000B8C000-000000067F000040020000A0000000B90000__0000003B99F7F8A0\n000000067F000040020000A0000000B8C000-000000067F000040020000A0000000B90000__0000005D2FFFFB38\n000000067F000040020000A0000000B8FCED-000000067F000040020000A0000000B986CE__0000003157DDD551-00000031F78DF129\n000000067F000040020000A0000000B90000-000000067F000040020000A0000000B94000__00000031EA7FFF60\n000000067F000040020000A0000000B90000-000000067F000040020000A0000000B94000__00000038E67ABFA0\n000000067F000040020000A0000000B90000-000000067F000040020000A0000000B94000__0000003903F1CFE8\n000000067F000040020000A0000000B90000-000000067F000040020000A0000000B94000__0000003B99F7F8A0\n000000067F000040020000A0000000B90000-000000067F000040020000A0000000B94000__0000005D2FFFFB38\n000000067F000040020000A0000000B94000-000000067F000040020000A0000000B98000__00000031EA7FFF60\n000000067F000040020000A0000000B94000-000000067F000040020000A0000000B98000__00000038E67ABFA0\n000000067F000040020000A0000000B94000-000000067F000040020000A0000000B98000__0000003903F1CFE8\n000000067F000040020000A0000000B94000-000000067F000040020000A0000000B98000__0000003B99F7F8A0\n000000067F000040020000A0000000B94000-000000067F000040020000A0000000B98000__0000005D2FFFFB38\n000000067F000040020000A0000000B98000-000000067F000040020000A0000000B9C000__00000038E67ABFA0\n000000067F000040020000A0000000B98000-000000067F000040020000A0000000B9C000__0000003903F1CFE8\n000000067F000040020000A0000000B98000-000000067F000040020000A0000000B9C000__0000003B99F7F8A0\n000000067F000040020000A0000000B98000-000000067F000040020000A0000000B9C000__0000005D2FFFFB38\n000000067F000040020000A0000000B98000-030000000000000000000000000000000002__00000031EA7FFF60\n000000067F000040020000A0000000B986CE-000000067F000040020000A0000000BA10BA__0000003157DDD551-00000031F78DF129\n000000067F000040020000A0000000B9C000-000000067F000040020000A0000000BA0000__00000038E67ABFA0\n000000067F000040020000A0000000B9C000-000000067F000040020000A0000000BA0000__0000003903F1CFE8\n000000067F000040020000A0000000B9C000-000000067F000040020000A0000000BA0000__0000003B99F7F8A0\n000000067F000040020000A0000000B9C000-000000067F000040020000A0000000BA0000__0000005D2FFFFB38\n000000067F000040020000A0000000BA0000-000000067F000040020000A0000000BA4000__00000038E1ABFE28\n000000067F000040020000A0000000BA0000-000000067F000040020000A0000000BA4000__00000038E9AF7F00\n000000067F000040020000A0000000BA0000-000000067F000040020000A0000000BA4000__0000003903F1CFE8\n000000067F000040020000A0000000BA0000-000000067F000040020000A0000000BA4000__0000003B99F7F8A0\n000000067F000040020000A0000000BA0000-000000067F000040020000A0000000BA4000__0000005D2FFFFB38\n000000067F000040020000A0000000BA10BA-000000067F000040020000A0000200000000__0000003157DDD551-00000031F78DF129\n000000067F000040020000A0000000BA1288-000000067F000040020000A0000000BA9C74__00000031F78DF129-00000032973DEDD1\n000000067F000040020000A0000000BA4000-000000067F000040020000A0000000BA8000__00000038E1ABFE28\n000000067F000040020000A0000000BA4000-000000067F000040020000A0000000BA8000__00000038E9AF7F00\n000000067F000040020000A0000000BA4000-000000067F000040020000A0000000BA8000__0000003903F1CFE8\n000000067F000040020000A0000000BA4000-000000067F000040020000A0000000BA8000__0000003B99F7F8A0\n000000067F000040020000A0000000BA4000-000000067F000040020000A0000000BA8000__0000005D2FFFFB38\n000000067F000040020000A0000000BA8000-000000067F000040020000A0000000BAC000__00000038E1ABFE28\n000000067F000040020000A0000000BA8000-000000067F000040020000A0000000BAC000__00000038E9AF7F00\n000000067F000040020000A0000000BA8000-000000067F000040020000A0000000BAC000__0000003903F1CFE8\n000000067F000040020000A0000000BA8000-000000067F000040020000A0000000BAC000__0000003B99F7F8A0\n000000067F000040020000A0000000BA8000-000000067F000040020000A0000000BAC000__0000005D2FFFFB38\n000000067F000040020000A0000000BA9C74-000000067F000040020000A0000000BB264F__00000031F78DF129-00000032973DEDD1\n000000067F000040020000A0000000BAC000-000000067F000040020000A0000000BB0000__00000038E1ABFE28\n000000067F000040020000A0000000BAC000-000000067F000040020000A0000000BB0000__00000038E9AF7F00\n000000067F000040020000A0000000BAC000-000000067F000040020000A0000000BB0000__0000003903F1CFE8\n000000067F000040020000A0000000BAC000-000000067F000040020000A0000000BB0000__0000003B99F7F8A0\n000000067F000040020000A0000000BAC000-000000067F000040020000A0000000BB0000__0000005D2FFFFB38\n000000067F000040020000A0000000BB0000-000000067F000040020000A0000000BB4000__00000038E1ABFE28\n000000067F000040020000A0000000BB0000-000000067F000040020000A0000000BB4000__00000038E9AF7F00\n000000067F000040020000A0000000BB0000-000000067F000040020000A0000000BB4000__0000003903F1CFE8\n000000067F000040020000A0000000BB0000-000000067F000040020000A0000000BB4000__0000003B99F7F8A0\n000000067F000040020000A0000000BB0000-000000067F000040020000A0000000BB4000__0000005D2FFFFB38\n000000067F000040020000A0000000BB264F-000000067F000040020000A0000000BBB01F__00000031F78DF129-00000032973DEDD1\n000000067F000040020000A0000000BB4000-000000067F000040020000A0000000BB8000__00000038E1ABFE28\n000000067F000040020000A0000000BB4000-000000067F000040020000A0000000BB8000__00000038E9AF7F00\n000000067F000040020000A0000000BB4000-000000067F000040020000A0000000BB8000__0000003903F1CFE8\n000000067F000040020000A0000000BB4000-000000067F000040020000A0000000BB8000__0000003B99F7F8A0\n000000067F000040020000A0000000BB4000-000000067F000040020000A0000000BB8000__0000005D2FFFFB38\n000000067F000040020000A0000000BB8000-000000067F000040020000A0000000BBC000__00000038E1ABFE28\n000000067F000040020000A0000000BB8000-000000067F000040020000A0000000BBC000__00000038E9AF7F00\n000000067F000040020000A0000000BB8000-000000067F000040020000A0000000BBC000__0000003903F1CFE8\n000000067F000040020000A0000000BB8000-000000067F000040020000A0000000BBC000__0000003B99F7F8A0\n000000067F000040020000A0000000BB8000-000000067F000040020000A0000000BBC000__0000005D2FFFFB38\n000000067F000040020000A0000000BBB01F-000000067F000040020000A0000000BC39F4__00000031F78DF129-00000032973DEDD1\n000000067F000040020000A0000000BBC000-000000067F000040020000A0000000BC0000__00000038E1ABFE28\n000000067F000040020000A0000000BBC000-000000067F000040020000A0000000BC0000__00000038E9AF7F00\n000000067F000040020000A0000000BBC000-000000067F000040020000A0000000BC0000__0000003903F1CFE8\n000000067F000040020000A0000000BBC000-000000067F000040020000A0000000BC0000__0000003B99F7F8A0\n000000067F000040020000A0000000BBC000-000000067F000040020000A0000000BC0000__0000005D2FFFFB38\n000000067F000040020000A0000000BC0000-000000067F000040020000A0000000BC4000__00000038E1ABFE28\n000000067F000040020000A0000000BC0000-000000067F000040020000A0000000BC4000__00000038E9AF7F00\n000000067F000040020000A0000000BC0000-000000067F000040020000A0000000BC4000__0000003903F1CFE8\n000000067F000040020000A0000000BC0000-000000067F000040020000A0000000BC4000__0000003B99F7F8A0\n000000067F000040020000A0000000BC0000-000000067F000040020000A0000000BC4000__0000005D2FFFFB38\n000000067F000040020000A0000000BC39F4-000000067F000040020000A0000000BCC3D7__00000031F78DF129-00000032973DEDD1\n000000067F000040020000A0000000BC4000-000000067F000040020000A0000000BC8000__00000038E1ABFE28\n000000067F000040020000A0000000BC4000-000000067F000040020000A0000000BC8000__00000038E9AF7F00\n000000067F000040020000A0000000BC4000-000000067F000040020000A0000000BC8000__0000003903F1CFE8\n000000067F000040020000A0000000BC4000-000000067F000040020000A0000000BC8000__0000003B99F7F8A0\n000000067F000040020000A0000000BC4000-000000067F000040020000A0000000BC8000__0000005D2FFFFB38\n000000067F000040020000A0000000BC8000-000000067F000040020000A0000000BCC000__00000038E1ABFE28\n000000067F000040020000A0000000BC8000-000000067F000040020000A0000000BCC000__00000038E9AF7F00\n000000067F000040020000A0000000BC8000-000000067F000040020000A0000000BCC000__0000003903F1CFE8\n000000067F000040020000A0000000BC8000-000000067F000040020000A0000000BCC000__0000003B99F7F8A0\n000000067F000040020000A0000000BC8000-000000067F000040020000A0000000BCC000__0000005D2FFFFB38\n000000067F000040020000A0000000BCC000-000000067F000040020000A0000000BD0000__00000038E1ABFE28\n000000067F000040020000A0000000BCC000-000000067F000040020000A0000000BD0000__00000038E9AF7F00\n000000067F000040020000A0000000BCC000-000000067F000040020000A0000000BD0000__0000003903F1CFE8\n000000067F000040020000A0000000BCC000-000000067F000040020000A0000000BD0000__0000003B99F7F8A0\n000000067F000040020000A0000000BCC000-000000067F000040020000A0000000BD0000__0000005D2FFFFB38\n000000067F000040020000A0000000BCC3D7-000000067F000040020000A0000000BD4DC4__00000031F78DF129-00000032973DEDD1\n000000067F000040020000A0000000BD0000-000000067F000040020000A0000000BD4000__00000038E1ABFE28\n000000067F000040020000A0000000BD0000-000000067F000040020000A0000000BD4000__00000038E9AF7F00\n000000067F000040020000A0000000BD0000-000000067F000040020000A0000000BD4000__0000003903F1CFE8\n000000067F000040020000A0000000BD0000-000000067F000040020000A0000000BD4000__0000003B99F7F8A0\n000000067F000040020000A0000000BD0000-000000067F000040020000A0000000BD4000__0000005D2FFFFB38\n000000067F000040020000A0000000BD4000-000000067F000040020000A0000000BD8000__00000038E1ABFE28\n000000067F000040020000A0000000BD4000-000000067F000040020000A0000000BD8000__00000038E9AF7F00\n000000067F000040020000A0000000BD4000-000000067F000040020000A0000000BD8000__0000003903F1CFE8\n000000067F000040020000A0000000BD4000-000000067F000040020000A0000000BD8000__0000003B99F7F8A0\n000000067F000040020000A0000000BD4000-000000067F000040020000A0000000BD8000__0000005D2FFFFB38\n000000067F000040020000A0000000BD4DC4-000000067F000040020000A0000000BDD7AA__00000031F78DF129-00000032973DEDD1\n000000067F000040020000A0000000BD8000-000000067F000040020000A0000000BDC000__00000038E1ABFE28\n000000067F000040020000A0000000BD8000-000000067F000040020000A0000000BDC000__00000038E9AF7F00\n000000067F000040020000A0000000BD8000-000000067F000040020000A0000000BDC000__0000003903F1CFE8\n000000067F000040020000A0000000BD8000-000000067F000040020000A0000000BDC000__0000003B99F7F8A0\n000000067F000040020000A0000000BD8000-000000067F000040020000A0000000BDC000__0000005D2FFFFB38\n000000067F000040020000A0000000BDC000-000000067F000040020000A0000000BE0000__00000038E1ABFE28\n000000067F000040020000A0000000BDC000-000000067F000040020000A0000000BE0000__00000038E9AF7F00\n000000067F000040020000A0000000BDC000-000000067F000040020000A0000000BE0000__0000003903F1CFE8\n000000067F000040020000A0000000BDC000-000000067F000040020000A0000000BE0000__0000003B99F7F8A0\n000000067F000040020000A0000000BDC000-000000067F000040020000A0000000BE0000__0000005D2FFFFB38\n000000067F000040020000A0000000BDD7AA-000000067F000040020000A0000000BE6184__00000031F78DF129-00000032973DEDD1\n000000067F000040020000A0000000BE0000-000000067F000040020000A0000000BE4000__00000038E1ABFE28\n000000067F000040020000A0000000BE0000-000000067F000040020000A0000000BE4000__00000038E9AF7F00\n000000067F000040020000A0000000BE0000-000000067F000040020000A0000000BE4000__0000003903F1CFE8\n000000067F000040020000A0000000BE0000-000000067F000040020000A0000000BE4000__0000003B99F7F8A0\n000000067F000040020000A0000000BE0000-000000067F000040020000A0000000BE4000__0000005D2FFFFB38\n000000067F000040020000A0000000BE4000-000000067F000040020000A0000000BE8000__00000038E1ABFE28\n000000067F000040020000A0000000BE4000-000000067F000040020000A0000000BE8000__00000038E9AF7F00\n000000067F000040020000A0000000BE4000-000000067F000040020000A0000000BE8000__0000003903F1CFE8\n000000067F000040020000A0000000BE4000-000000067F000040020000A0000000BE8000__0000003B99F7F8A0\n000000067F000040020000A0000000BE4000-000000067F000040020000A0000000BE8000__0000005D2FFFFB38\n000000067F000040020000A0000000BE6184-000000067F000040020000A0000000BEEB65__00000031F78DF129-00000032973DEDD1\n000000067F000040020000A0000000BE8000-000000067F000040020000A0000000BEC000__00000038E1ABFE28\n000000067F000040020000A0000000BE8000-000000067F000040020000A0000000BEC000__00000038E9AF7F00\n000000067F000040020000A0000000BE8000-000000067F000040020000A0000000BEC000__0000003903F1CFE8\n000000067F000040020000A0000000BE8000-000000067F000040020000A0000000BEC000__0000003B99F7F8A0\n000000067F000040020000A0000000BE8000-000000067F000040020000A0000000BEC000__0000005D2FFFFB38\n000000067F000040020000A0000000BEC000-000000067F000040020000A0000000BF0000__00000038E1ABFE28\n000000067F000040020000A0000000BEC000-000000067F000040020000A0000000BF0000__00000038E9AF7F00\n000000067F000040020000A0000000BEC000-000000067F000040020000A0000000BF0000__0000003903F1CFE8\n000000067F000040020000A0000000BEC000-000000067F000040020000A0000000BF0000__0000003B99F7F8A0\n000000067F000040020000A0000000BEC000-000000067F000040020000A0000000BF0000__0000005D2FFFFB38\n000000067F000040020000A0000000BEEB65-000000067F000040020000A0000000BF7534__00000031F78DF129-00000032973DEDD1\n000000067F000040020000A0000000BF0000-000000067F000040020000A0000000BF4000__00000038E1ABFE28\n000000067F000040020000A0000000BF0000-000000067F000040020000A0000000BF4000__00000038E9AF7F00\n000000067F000040020000A0000000BF0000-000000067F000040020000A0000000BF4000__0000003903F1CFE8\n000000067F000040020000A0000000BF0000-000000067F000040020000A0000000BF4000__0000003B99F7F8A0\n000000067F000040020000A0000000BF0000-000000067F000040020000A0000000BF4000__0000005D2FFFFB38\n000000067F000040020000A0000000BF4000-000000067F000040020000A0000000BF8000__00000033605476A8\n000000067F000040020000A0000000BF4000-000000067F000040020000A0000000BF8000__00000038E9AF7F00\n000000067F000040020000A0000000BF4000-000000067F000040020000A0000000BF8000__0000003903F1CFE8\n000000067F000040020000A0000000BF4000-000000067F000040020000A0000000BF8000__0000003B99F7F8A0\n000000067F000040020000A0000000BF4000-000000067F000040020000A0000000BF8000__0000005D2FFFFB38\n000000067F000040020000A0000000BF7534-000000067F000040020000A0000200000000__00000031F78DF129-00000032973DEDD1\n000000067F000040020000A0000000BF778F-000000067F000040020000A0000000C00165__00000032973DEDD1-0000003336EBF989\n000000067F000040020000A0000000BF8000-000000067F000040020000A0000000BFC000__00000033605476A8\n000000067F000040020000A0000000BF8000-000000067F000040020000A0000000BFC000__00000038E9AF7F00\n000000067F000040020000A0000000BF8000-000000067F000040020000A0000000BFC000__0000003903F1CFE8\n000000067F000040020000A0000000BF8000-000000067F000040020000A0000000BFC000__0000003B99F7F8A0\n000000067F000040020000A0000000BF8000-000000067F000040020000A0000000BFC000__0000005D2FFFFB38\n000000067F000040020000A0000000BFC000-000000067F000040020000A0000000C00000__00000033605476A8\n000000067F000040020000A0000000BFC000-000000067F000040020000A0000000C00000__00000038E9AF7F00\n000000067F000040020000A0000000BFC000-000000067F000040020000A0000000C00000__0000003903F1CFE8\n000000067F000040020000A0000000BFC000-000000067F000040020000A0000000C00000__0000003B99F7F8A0\n000000067F000040020000A0000000BFC000-000000067F000040020000A0000000C00000__0000005D2FFFFB38\n000000067F000040020000A0000000C00000-000000067F000040020000A0000000C04000__00000033605476A8\n000000067F000040020000A0000000C00000-000000067F000040020000A0000000C04000__00000038E9AF7F00\n000000067F000040020000A0000000C00000-000000067F000040020000A0000000C04000__0000003903F1CFE8\n000000067F000040020000A0000000C00000-000000067F000040020000A0000000C04000__0000003B99F7F8A0\n000000067F000040020000A0000000C00000-000000067F000040020000A0000000C04000__0000005D2FFFFB38\n000000067F000040020000A0000000C00165-000000067F000040020000A0000000C08B3A__00000032973DEDD1-0000003336EBF989\n000000067F000040020000A0000000C04000-000000067F000040020000A0000000C08000__00000033605476A8\n000000067F000040020000A0000000C04000-000000067F000040020000A0000000C08000__00000038E9AF7F00\n000000067F000040020000A0000000C04000-000000067F000040020000A0000000C08000__0000003903F1CFE8\n000000067F000040020000A0000000C04000-000000067F000040020000A0000000C08000__0000003B99F7F8A0\n000000067F000040020000A0000000C04000-000000067F000040020000A0000000C08000__0000005D2FFFFB38\n000000067F000040020000A0000000C08000-000000067F000040020000A0000000C0C000__00000033605476A8\n000000067F000040020000A0000000C08000-000000067F000040020000A0000000C0C000__00000038E9AF7F00\n000000067F000040020000A0000000C08000-000000067F000040020000A0000000C0C000__0000003903F1CFE8\n000000067F000040020000A0000000C08000-000000067F000040020000A0000000C0C000__0000003B99F7F8A0\n000000067F000040020000A0000000C08000-000000067F000040020000A0000000C0C000__0000005D2FFFFB38\n000000067F000040020000A0000000C08B3A-000000067F000040020000A0000000C1151B__00000032973DEDD1-0000003336EBF989\n000000067F000040020000A0000000C0C000-000000067F000040020000A0000000C10000__00000033605476A8\n000000067F000040020000A0000000C0C000-000000067F000040020000A0000000C10000__00000038E9AF7F00\n000000067F000040020000A0000000C0C000-000000067F000040020000A0000000C10000__0000003903F1CFE8\n000000067F000040020000A0000000C0C000-000000067F000040020000A0000000C10000__0000003B99F7F8A0\n000000067F000040020000A0000000C0C000-000000067F000040020000A0000000C10000__0000005D2FFFFB38\n000000067F000040020000A0000000C10000-000000067F000040020000A0000000C14000__00000033605476A8\n000000067F000040020000A0000000C10000-000000067F000040020000A0000000C14000__00000038E9AF7F00\n000000067F000040020000A0000000C10000-000000067F000040020000A0000000C14000__0000003903F1CFE8\n000000067F000040020000A0000000C10000-000000067F000040020000A0000000C14000__0000003B99F7F8A0\n000000067F000040020000A0000000C10000-000000067F000040020000A0000000C14000__0000005D2FFFFB38\n000000067F000040020000A0000000C1151B-000000067F000040020000A0000000C19EF6__00000032973DEDD1-0000003336EBF989\n000000067F000040020000A0000000C14000-000000067F000040020000A0000000C18000__00000033605476A8\n000000067F000040020000A0000000C14000-000000067F000040020000A0000000C18000__00000038E9AF7F00\n000000067F000040020000A0000000C14000-000000067F000040020000A0000000C18000__0000003903F1CFE8\n000000067F000040020000A0000000C14000-000000067F000040020000A0000000C18000__0000003B99F7F8A0\n000000067F000040020000A0000000C14000-000000067F000040020000A0000000C18000__0000005D2FFFFB38\n000000067F000040020000A0000000C18000-000000067F000040020000A0000000C1C000__00000033605476A8\n000000067F000040020000A0000000C18000-000000067F000040020000A0000000C1C000__00000038E9AF7F00\n000000067F000040020000A0000000C18000-000000067F000040020000A0000000C1C000__0000003903F1CFE8\n000000067F000040020000A0000000C18000-000000067F000040020000A0000000C1C000__0000003B99F7F8A0\n000000067F000040020000A0000000C18000-000000067F000040020000A0000000C1C000__0000005D2FFFFB38\n000000067F000040020000A0000000C19EF6-000000067F000040020000A0000000C228E6__00000032973DEDD1-0000003336EBF989\n000000067F000040020000A0000000C1C000-000000067F000040020000A0000000C20000__00000033605476A8\n000000067F000040020000A0000000C1C000-000000067F000040020000A0000000C20000__00000038E9AF7F00\n000000067F000040020000A0000000C1C000-000000067F000040020000A0000000C20000__0000003903F1CFE8\n000000067F000040020000A0000000C1C000-000000067F000040020000A0000000C20000__0000003B99F7F8A0\n000000067F000040020000A0000000C1C000-000000067F000040020000A0000000C20000__0000005D2FFFFB38\n000000067F000040020000A0000000C20000-000000067F000040020000A0000000C24000__00000033605476A8\n000000067F000040020000A0000000C20000-000000067F000040020000A0000000C24000__00000038E9AF7F00\n000000067F000040020000A0000000C20000-000000067F000040020000A0000000C24000__0000003903F1CFE8\n000000067F000040020000A0000000C20000-000000067F000040020000A0000000C24000__0000003B99F7F8A0\n000000067F000040020000A0000000C20000-000000067F000040020000A0000000C24000__0000005D2FFFFB38\n000000067F000040020000A0000000C228E6-000000067F000040020000A0000000C2B2C5__00000032973DEDD1-0000003336EBF989\n000000067F000040020000A0000000C24000-000000067F000040020000A0000000C28000__00000033605476A8\n000000067F000040020000A0000000C24000-000000067F000040020000A0000000C28000__00000038E9AF7F00\n000000067F000040020000A0000000C24000-000000067F000040020000A0000000C28000__0000003903F1CFE8\n000000067F000040020000A0000000C24000-000000067F000040020000A0000000C28000__0000003B99F7F8A0\n000000067F000040020000A0000000C24000-000000067F000040020000A0000000C28000__0000005D2FFFFB38\n000000067F000040020000A0000000C28000-000000067F000040020000A0000000C2C000__00000033605476A8\n000000067F000040020000A0000000C28000-000000067F000040020000A0000000C2C000__00000038E9AF7F00\n000000067F000040020000A0000000C28000-000000067F000040020000A0000000C2C000__0000003903F1CFE8\n000000067F000040020000A0000000C28000-000000067F000040020000A0000000C2C000__0000003B99F7F8A0\n000000067F000040020000A0000000C28000-000000067F000040020000A0000000C2C000__0000005D2FFFFB38\n000000067F000040020000A0000000C2B2C5-000000067F000040020000A0000000C33C9C__00000032973DEDD1-0000003336EBF989\n000000067F000040020000A0000000C2C000-000000067F000040020000A0000000C30000__00000033605476A8\n000000067F000040020000A0000000C2C000-000000067F000040020000A0000000C30000__00000038E9AF7F00\n000000067F000040020000A0000000C2C000-000000067F000040020000A0000000C30000__0000003903F1CFE8\n000000067F000040020000A0000000C2C000-000000067F000040020000A0000000C30000__0000003B99F7F8A0\n000000067F000040020000A0000000C2C000-000000067F000040020000A0000000C30000__0000005D2FFFFB38\n000000067F000040020000A0000000C30000-000000067F000040020000A0000000C34000__00000033605476A8\n000000067F000040020000A0000000C30000-000000067F000040020000A0000000C34000__00000038E9AF7F00\n000000067F000040020000A0000000C30000-000000067F000040020000A0000000C34000__0000003903F1CFE8\n000000067F000040020000A0000000C30000-000000067F000040020000A0000000C34000__0000003B99F7F8A0\n000000067F000040020000A0000000C30000-000000067F000040020000A0000000C34000__0000005D2FFFFB38\n000000067F000040020000A0000000C33C9C-000000067F000040020000A0000000C3C66D__00000032973DEDD1-0000003336EBF989\n000000067F000040020000A0000000C34000-000000067F000040020000A0000000C38000__00000033605476A8\n000000067F000040020000A0000000C34000-000000067F000040020000A0000000C38000__00000038E9AF7F00\n000000067F000040020000A0000000C34000-000000067F000040020000A0000000C38000__0000003903F1CFE8\n000000067F000040020000A0000000C34000-000000067F000040020000A0000000C38000__0000003B99F7F8A0\n000000067F000040020000A0000000C34000-000000067F000040020000A0000000C38000__0000005D2FFFFB38\n000000067F000040020000A0000000C38000-000000067F000040020000A0000000C3C000__00000033605476A8\n000000067F000040020000A0000000C38000-000000067F000040020000A0000000C3C000__00000038E9AF7F00\n000000067F000040020000A0000000C38000-000000067F000040020000A0000000C3C000__0000003903F1CFE8\n000000067F000040020000A0000000C38000-000000067F000040020000A0000000C3C000__0000003B99F7F8A0\n000000067F000040020000A0000000C38000-000000067F000040020000A0000000C3C000__0000005D2FFFFB38\n000000067F000040020000A0000000C3C000-000000067F000040020000A0000000C40000__00000033605476A8\n000000067F000040020000A0000000C3C000-000000067F000040020000A0000000C40000__00000038E9AF7F00\n000000067F000040020000A0000000C3C000-000000067F000040020000A0000000C40000__0000003903F1CFE8\n000000067F000040020000A0000000C3C000-000000067F000040020000A0000000C40000__0000003B99F7F8A0\n000000067F000040020000A0000000C3C000-000000067F000040020000A0000000C40000__0000005D2FFFFB38\n000000067F000040020000A0000000C3C66D-000000067F000040020000A0000000C45033__00000032973DEDD1-0000003336EBF989\n000000067F000040020000A0000000C40000-000000067F000040020000A0000000C44000__00000033605476A8\n000000067F000040020000A0000000C40000-000000067F000040020000A0000000C44000__00000038E9AF7F00\n000000067F000040020000A0000000C40000-000000067F000040020000A0000000C44000__0000003903F1CFE8\n000000067F000040020000A0000000C40000-000000067F000040020000A0000000C44000__0000003B99F7F8A0\n000000067F000040020000A0000000C40000-000000067F000040020000A0000000C44000__0000005D2FFFFB38\n000000067F000040020000A0000000C44000-000000067F000040020000A0000000C48000__00000033605476A8\n000000067F000040020000A0000000C44000-000000067F000040020000A0000000C48000__00000038E9AF7F00\n000000067F000040020000A0000000C44000-000000067F000040020000A0000000C48000__0000003903F1CFE8\n000000067F000040020000A0000000C44000-000000067F000040020000A0000000C48000__0000003B99F7F8A0\n000000067F000040020000A0000000C44000-000000067F000040020000A0000000C48000__0000005D2FFFFB38\n000000067F000040020000A0000000C45033-000000067F000040020000A0000000C4DA13__00000032973DEDD1-0000003336EBF989\n000000067F000040020000A0000000C48000-000000067F000040020000A0000000C4C000__00000033605476A8\n000000067F000040020000A0000000C48000-000000067F000040020000A0000000C4C000__00000038E9AF7F00\n000000067F000040020000A0000000C48000-000000067F000040020000A0000000C4C000__0000003903F1CFE8\n000000067F000040020000A0000000C48000-000000067F000040020000A0000000C4C000__0000003B99F7F8A0\n000000067F000040020000A0000000C48000-000000067F000040020000A0000000C4C000__0000005D2FFFFB38\n000000067F000040020000A0000000C4C000-000000067F000040020000A0000000C50000__00000033605476A8\n000000067F000040020000A0000000C4C000-000000067F000040020000A0000000C50000__00000038E67ABFA0\n000000067F000040020000A0000000C4C000-000000067F000040020000A0000000C50000__0000003903F1CFE8\n000000067F000040020000A0000000C4C000-000000067F000040020000A0000000C50000__0000003B99F7F8A0\n000000067F000040020000A0000000C4C000-000000067F000040020000A0000000C50000__0000005D2FFFFB38\n000000067F000040020000A0000000C4DA13-000000067F000040020000A0000200000000__00000032973DEDD1-0000003336EBF989\n000000067F000040020000A0000000C4DBC3-000000067F000040020000A0000000C565B4__0000003336EBF989-00000033D69BE889\n000000067F000040020000A0000000C50000-000000067F000040020000A0000000C54000__00000033605476A8\n000000067F000040020000A0000000C50000-000000067F000040020000A0000000C54000__00000038E67ABFA0\n000000067F000040020000A0000000C50000-000000067F000040020000A0000000C54000__0000003903F1CFE8\n000000067F000040020000A0000000C50000-000000067F000040020000A0000000C54000__0000003B99F7F8A0\n000000067F000040020000A0000000C50000-000000067F000040020000A0000000C54000__0000005D2FFFFB38\n000000067F000040020000A0000000C54000-000000067F000040020000A0000000C58000__00000033605476A8\n000000067F000040020000A0000000C54000-000000067F000040020000A0000000C58000__00000038E67ABFA0\n000000067F000040020000A0000000C54000-000000067F000040020000A0000000C58000__0000003903F1CFE8\n000000067F000040020000A0000000C54000-000000067F000040020000A0000000C58000__0000003B99F7F8A0\n000000067F000040020000A0000000C54000-000000067F000040020000A0000000C58000__0000005D2FFFFB38\n000000067F000040020000A0000000C565B4-000000067F000040020000A0000000C5EFA1__0000003336EBF989-00000033D69BE889\n000000067F000040020000A0000000C58000-000000067F000040020000A0000000C5C000__00000033605476A8\n000000067F000040020000A0000000C58000-000000067F000040020000A0000000C5C000__00000038E67ABFA0\n000000067F000040020000A0000000C58000-000000067F000040020000A0000000C5C000__0000003903F1CFE8\n000000067F000040020000A0000000C58000-000000067F000040020000A0000000C5C000__0000003B99F7F8A0\n000000067F000040020000A0000000C58000-000000067F000040020000A0000000C5C000__0000005D2FFFFB38\n000000067F000040020000A0000000C5C000-000000067F000040020000A0000000C60000__00000033605476A8\n000000067F000040020000A0000000C5C000-000000067F000040020000A0000000C60000__00000038E67ABFA0\n000000067F000040020000A0000000C5C000-000000067F000040020000A0000000C60000__0000003903F1CFE8\n000000067F000040020000A0000000C5C000-000000067F000040020000A0000000C60000__0000003B99F7F8A0\n000000067F000040020000A0000000C5C000-000000067F000040020000A0000000C60000__0000005D2FFFFB38\n000000067F000040020000A0000000C5EFA1-000000067F000040020000A0000000C6797A__0000003336EBF989-00000033D69BE889\n000000067F000040020000A0000000C60000-000000067F000040020000A0000000C64000__00000033605476A8\n000000067F000040020000A0000000C60000-000000067F000040020000A0000000C64000__00000038E67ABFA0\n000000067F000040020000A0000000C60000-000000067F000040020000A0000000C64000__0000003903F1CFE8\n000000067F000040020000A0000000C60000-000000067F000040020000A0000000C64000__0000003B99F7F8A0\n000000067F000040020000A0000000C60000-000000067F000040020000A0000000C64000__0000005D2FFFFB38\n000000067F000040020000A0000000C64000-000000067F000040020000A0000000C68000__00000038E67ABFA0\n000000067F000040020000A0000000C64000-000000067F000040020000A0000000C68000__0000003903F1CFE8\n000000067F000040020000A0000000C64000-000000067F000040020000A0000000C68000__0000003B99F7F8A0\n000000067F000040020000A0000000C64000-000000067F000040020000A0000000C68000__0000005D2FFFFB38\n000000067F000040020000A0000000C64000-030000000000000000000000000000000002__00000033605476A8\n000000067F000040020000A0000000C6797A-000000067F000040020000A0000000C7034B__0000003336EBF989-00000033D69BE889\n000000067F000040020000A0000000C68000-000000067F000040020000A0000000C6C000__00000038E67ABFA0\n000000067F000040020000A0000000C68000-000000067F000040020000A0000000C6C000__0000003903F1CFE8\n000000067F000040020000A0000000C68000-000000067F000040020000A0000000C6C000__0000003B99F7F8A0\n000000067F000040020000A0000000C68000-000000067F000040020000A0000000C6C000__0000005D2FFFFB38\n000000067F000040020000A0000000C6C000-000000067F000040020000A0000000C70000__00000038E67ABFA0\n000000067F000040020000A0000000C6C000-000000067F000040020000A0000000C70000__0000003903F1CFE8\n000000067F000040020000A0000000C6C000-000000067F000040020000A0000000C70000__0000003B99F7F8A0\n000000067F000040020000A0000000C6C000-000000067F000040020000A0000000C70000__0000005D2FFFFB38\n000000067F000040020000A0000000C70000-000000067F000040020000A0000000C74000__00000038E67ABFA0\n000000067F000040020000A0000000C70000-000000067F000040020000A0000000C74000__0000003903F1CFE8\n000000067F000040020000A0000000C70000-000000067F000040020000A0000000C74000__0000003B99F7F8A0\n000000067F000040020000A0000000C70000-000000067F000040020000A0000000C74000__0000005D2FFFFB38\n000000067F000040020000A0000000C7034B-000000067F000040020000A0000000C78D17__0000003336EBF989-00000033D69BE889\n000000067F000040020000A0000000C74000-000000067F000040020000A0000000C78000__00000038E67ABFA0\n000000067F000040020000A0000000C74000-000000067F000040020000A0000000C78000__0000003903F1CFE8\n000000067F000040020000A0000000C74000-000000067F000040020000A0000000C78000__0000003B99F7F8A0\n000000067F000040020000A0000000C74000-000000067F000040020000A0000000C78000__0000005D2FFFFB38\n000000067F000040020000A0000000C78000-000000067F000040020000A0000000C7C000__00000038E67ABFA0\n000000067F000040020000A0000000C78000-000000067F000040020000A0000000C7C000__0000003903F1CFE8\n000000067F000040020000A0000000C78000-000000067F000040020000A0000000C7C000__0000003B99F7F8A0\n000000067F000040020000A0000000C78000-000000067F000040020000A0000000C7C000__0000005D2FFFFB38\n000000067F000040020000A0000000C78D17-000000067F000040020000A0000000C816E2__0000003336EBF989-00000033D69BE889\n000000067F000040020000A0000000C7C000-000000067F000040020000A0000000C80000__00000038E67ABFA0\n000000067F000040020000A0000000C7C000-000000067F000040020000A0000000C80000__0000003903F1CFE8\n000000067F000040020000A0000000C7C000-000000067F000040020000A0000000C80000__0000003B99F7F8A0\n000000067F000040020000A0000000C7C000-000000067F000040020000A0000000C80000__0000005D2FFFFB38\n000000067F000040020000A0000000C80000-000000067F000040020000A0000000C84000__00000038E67ABFA0\n000000067F000040020000A0000000C80000-000000067F000040020000A0000000C84000__0000003903F1CFE8\n000000067F000040020000A0000000C80000-000000067F000040020000A0000000C84000__0000003B99F7F8A0\n000000067F000040020000A0000000C80000-000000067F000040020000A0000000C84000__0000005D2FFFFB38\n000000067F000040020000A0000000C816E2-000000067F000040020000A0000000C8A0D8__0000003336EBF989-00000033D69BE889\n000000067F000040020000A0000000C84000-000000067F000040020000A0000000C88000__00000038E67ABFA0\n000000067F000040020000A0000000C84000-000000067F000040020000A0000000C88000__0000003903F1CFE8\n000000067F000040020000A0000000C84000-000000067F000040020000A0000000C88000__0000003B99F7F8A0\n000000067F000040020000A0000000C84000-000000067F000040020000A0000000C88000__0000005D2FFFFB38\n000000067F000040020000A0000000C88000-000000067F000040020000A0000000C8C000__00000038E67ABFA0\n000000067F000040020000A0000000C88000-000000067F000040020000A0000000C8C000__0000003903F1CFE8\n000000067F000040020000A0000000C88000-000000067F000040020000A0000000C8C000__0000003B99F7F8A0\n000000067F000040020000A0000000C88000-000000067F000040020000A0000000C8C000__0000005D2FFFFB38\n000000067F000040020000A0000000C8A0D8-000000067F000040020000A0000000C92AC4__0000003336EBF989-00000033D69BE889\n000000067F000040020000A0000000C8C000-000000067F000040020000A0000000C90000__00000038E67ABFA0\n000000067F000040020000A0000000C8C000-000000067F000040020000A0000000C90000__0000003903F1CFE8\n000000067F000040020000A0000000C8C000-000000067F000040020000A0000000C90000__0000003B99F7F8A0\n000000067F000040020000A0000000C8C000-000000067F000040020000A0000000C90000__0000005D2FFFFB38\n000000067F000040020000A0000000C90000-000000067F000040020000A0000000C94000__00000038E67ABFA0\n000000067F000040020000A0000000C90000-000000067F000040020000A0000000C94000__0000003903F1CFE8\n000000067F000040020000A0000000C90000-000000067F000040020000A0000000C94000__0000003B99F7F8A0\n000000067F000040020000A0000000C90000-000000067F000040020000A0000000C94000__0000005D2FFFFB38\n000000067F000040020000A0000000C92AC4-000000067F000040020000A0000000C9B4AF__0000003336EBF989-00000033D69BE889\n000000067F000040020000A0000000C94000-000000067F000040020000A0000000C98000__00000038E67ABFA0\n000000067F000040020000A0000000C94000-000000067F000040020000A0000000C98000__0000003903F1CFE8\n000000067F000040020000A0000000C94000-000000067F000040020000A0000000C98000__0000003B99F7F8A0\n000000067F000040020000A0000000C94000-000000067F000040020000A0000000C98000__0000005D2FFFFB38\n000000067F000040020000A0000000C98000-000000067F000040020000A0000000C9C000__00000038E67ABFA0\n000000067F000040020000A0000000C98000-000000067F000040020000A0000000C9C000__0000003903F1CFE8\n000000067F000040020000A0000000C98000-000000067F000040020000A0000000C9C000__0000003B99F7F8A0\n000000067F000040020000A0000000C98000-000000067F000040020000A0000000C9C000__0000005D2FFFFB38\n000000067F000040020000A0000000C9B4AF-000000067F000040020000A0000000CA3E87__0000003336EBF989-00000033D69BE889\n000000067F000040020000A0000000C9C000-000000067F000040020000A0000000CA0000__00000038E67ABFA0\n000000067F000040020000A0000000C9C000-000000067F000040020000A0000000CA0000__0000003903F1CFE8\n000000067F000040020000A0000000C9C000-000000067F000040020000A0000000CA0000__0000003B99F7F8A0\n000000067F000040020000A0000000C9C000-000000067F000040020000A0000000CA0000__0000005D2FFFFB38\n000000067F000040020000A0000000CA0000-000000067F000040020000A0000000CA4000__00000038E67ABFA0\n000000067F000040020000A0000000CA0000-000000067F000040020000A0000000CA4000__0000003903F1CFE8\n000000067F000040020000A0000000CA0000-000000067F000040020000A0000000CA4000__0000003B99F7F8A0\n000000067F000040020000A0000000CA0000-000000067F000040020000A0000000CA4000__0000005D2FFFFB38\n000000067F000040020000A0000000CA3E87-000000067F000040020000A0000200000000__0000003336EBF989-00000033D69BE889\n000000067F000040020000A0000000CA4000-000000067F000040020000A0000000CA8000__00000038E1ABFE28\n000000067F000040020000A0000000CA4000-000000067F000040020000A0000000CA8000__00000038E9AF7F00\n000000067F000040020000A0000000CA4000-000000067F000040020000A0000000CA8000__0000003903F1CFE8\n000000067F000040020000A0000000CA4000-000000067F000040020000A0000000CA8000__0000003B99F7F8A0\n000000067F000040020000A0000000CA4000-000000067F000040020000A0000000CA8000__0000005D2FFFFB38\n000000067F000040020000A0000000CA403E-000000067F000040020000A0000000CACA12__00000033D69BE889-00000034764BE349\n000000067F000040020000A0000000CA8000-000000067F000040020000A0000000CAC000__00000038E1ABFE28\n000000067F000040020000A0000000CA8000-000000067F000040020000A0000000CAC000__00000038E9AF7F00\n000000067F000040020000A0000000CA8000-000000067F000040020000A0000000CAC000__0000003903F1CFE8\n000000067F000040020000A0000000CA8000-000000067F000040020000A0000000CAC000__0000003B99F7F8A0\n000000067F000040020000A0000000CA8000-000000067F000040020000A0000000CAC000__0000005D2FFFFB38\n000000067F000040020000A0000000CAC000-000000067F000040020000A0000000CB0000__00000038E1ABFE28\n000000067F000040020000A0000000CAC000-000000067F000040020000A0000000CB0000__00000038E9AF7F00\n000000067F000040020000A0000000CAC000-000000067F000040020000A0000000CB0000__0000003903F1CFE8\n000000067F000040020000A0000000CAC000-000000067F000040020000A0000000CB0000__0000003B99F7F8A0\n000000067F000040020000A0000000CAC000-000000067F000040020000A0000000CB0000__0000005D2FFFFB38\n000000067F000040020000A0000000CACA12-000000067F000040020000A0000000CB53E3__00000033D69BE889-00000034764BE349\n000000067F000040020000A0000000CB0000-000000067F000040020000A0000000CB4000__00000038E1ABFE28\n000000067F000040020000A0000000CB0000-000000067F000040020000A0000000CB4000__00000038E9AF7F00\n000000067F000040020000A0000000CB0000-000000067F000040020000A0000000CB4000__0000003903F1CFE8\n000000067F000040020000A0000000CB0000-000000067F000040020000A0000000CB4000__0000003B99F7F8A0\n000000067F000040020000A0000000CB0000-000000067F000040020000A0000000CB4000__0000005D2FFFFB38\n000000067F000040020000A0000000CB4000-000000067F000040020000A0000000CB8000__00000038E1ABFE28\n000000067F000040020000A0000000CB4000-000000067F000040020000A0000000CB8000__00000038E9AF7F00\n000000067F000040020000A0000000CB4000-000000067F000040020000A0000000CB8000__0000003903F1CFE8\n000000067F000040020000A0000000CB4000-000000067F000040020000A0000000CB8000__0000003B99F7F8A0\n000000067F000040020000A0000000CB4000-000000067F000040020000A0000000CB8000__0000005D2FFFFB38\n000000067F000040020000A0000000CB53E3-000000067F000040020000A0000000CBDDBA__00000033D69BE889-00000034764BE349\n000000067F000040020000A0000000CB8000-000000067F000040020000A0000000CBC000__00000038E1ABFE28\n000000067F000040020000A0000000CB8000-000000067F000040020000A0000000CBC000__00000038E9AF7F00\n000000067F000040020000A0000000CB8000-000000067F000040020000A0000000CBC000__0000003903F1CFE8\n000000067F000040020000A0000000CB8000-000000067F000040020000A0000000CBC000__0000003B99F7F8A0\n000000067F000040020000A0000000CB8000-000000067F000040020000A0000000CBC000__0000005D2FFFFB38\n000000067F000040020000A0000000CBC000-000000067F000040020000A0000000CC0000__00000038E1ABFE28\n000000067F000040020000A0000000CBC000-000000067F000040020000A0000000CC0000__00000038E9AF7F00\n000000067F000040020000A0000000CBC000-000000067F000040020000A0000000CC0000__0000003903F1CFE8\n000000067F000040020000A0000000CBC000-000000067F000040020000A0000000CC0000__0000003B99F7F8A0\n000000067F000040020000A0000000CBC000-000000067F000040020000A0000000CC0000__0000005D2FFFFB38\n000000067F000040020000A0000000CBDDBA-000000067F000040020000A0000000CC67A6__00000033D69BE889-00000034764BE349\n000000067F000040020000A0000000CC0000-000000067F000040020000A0000000CC4000__00000038E1ABFE28\n000000067F000040020000A0000000CC0000-000000067F000040020000A0000000CC4000__00000038E9AF7F00\n000000067F000040020000A0000000CC0000-000000067F000040020000A0000000CC4000__0000003903F1CFE8\n000000067F000040020000A0000000CC0000-000000067F000040020000A0000000CC4000__0000003B99F7F8A0\n000000067F000040020000A0000000CC0000-000000067F000040020000A0000000CC4000__0000005D2FFFFB38\n000000067F000040020000A0000000CC4000-000000067F000040020000A0000000CC8000__00000038E1ABFE28\n000000067F000040020000A0000000CC4000-000000067F000040020000A0000000CC8000__00000038E9AF7F00\n000000067F000040020000A0000000CC4000-000000067F000040020000A0000000CC8000__0000003903F1CFE8\n000000067F000040020000A0000000CC4000-000000067F000040020000A0000000CC8000__0000003B99F7F8A0\n000000067F000040020000A0000000CC4000-000000067F000040020000A0000000CC8000__0000005D2FFFFB38\n000000067F000040020000A0000000CC67A6-000000067F000040020000A0000000CCF196__00000033D69BE889-00000034764BE349\n000000067F000040020000A0000000CC8000-000000067F000040020000A0000000CCC000__00000038E1ABFE28\n000000067F000040020000A0000000CC8000-000000067F000040020000A0000000CCC000__00000038E9AF7F00\n000000067F000040020000A0000000CC8000-000000067F000040020000A0000000CCC000__0000003903F1CFE8\n000000067F000040020000A0000000CC8000-000000067F000040020000A0000000CCC000__0000003B99F7F8A0\n000000067F000040020000A0000000CC8000-000000067F000040020000A0000000CCC000__0000005D2FFFFB38\n000000067F000040020000A0000000CCC000-000000067F000040020000A0000000CD0000__00000038E1ABFE28\n000000067F000040020000A0000000CCC000-000000067F000040020000A0000000CD0000__00000038E9AF7F00\n000000067F000040020000A0000000CCC000-000000067F000040020000A0000000CD0000__0000003903F1CFE8\n000000067F000040020000A0000000CCC000-000000067F000040020000A0000000CD0000__0000003B99F7F8A0\n000000067F000040020000A0000000CCC000-000000067F000040020000A0000000CD0000__0000005D2FFFFB38\n000000067F000040020000A0000000CCF196-000000067F000040020000A0000000CD7BA1__00000033D69BE889-00000034764BE349\n000000067F000040020000A0000000CD0000-000000067F000040020000A0000000CD4000__00000038E1ABFE28\n000000067F000040020000A0000000CD0000-000000067F000040020000A0000000CD4000__00000038E9AF7F00\n000000067F000040020000A0000000CD0000-000000067F000040020000A0000000CD4000__0000003903F1CFE8\n000000067F000040020000A0000000CD0000-000000067F000040020000A0000000CD4000__0000003B99F7F8A0\n000000067F000040020000A0000000CD0000-000000067F000040020000A0000000CD4000__0000005D2FFFFB38\n000000067F000040020000A0000000CD4000-000000067F000040020000A0000000CD8000__00000038E1ABFE28\n000000067F000040020000A0000000CD4000-000000067F000040020000A0000000CD8000__00000038E9AF7F00\n000000067F000040020000A0000000CD4000-000000067F000040020000A0000000CD8000__0000003903F1CFE8\n000000067F000040020000A0000000CD4000-000000067F000040020000A0000000CD8000__0000003B99F7F8A0\n000000067F000040020000A0000000CD4000-000000067F000040020000A0000000CD8000__0000005D2FFFFB38\n000000067F000040020000A0000000CD7BA1-000000067F000040020000A0000000CE0577__00000033D69BE889-00000034764BE349\n000000067F000040020000A0000000CD8000-000000067F000040020000A0000000CDC000__00000038E1ABFE28\n000000067F000040020000A0000000CD8000-000000067F000040020000A0000000CDC000__00000038E9AF7F00\n000000067F000040020000A0000000CD8000-000000067F000040020000A0000000CDC000__0000003903F1CFE8\n000000067F000040020000A0000000CD8000-000000067F000040020000A0000000CDC000__0000003B99F7F8A0\n000000067F000040020000A0000000CD8000-000000067F000040020000A0000000CDC000__0000005D2FFFFB38\n000000067F000040020000A0000000CDC000-000000067F000040020000A0000000CE0000__00000038E1ABFE28\n000000067F000040020000A0000000CDC000-000000067F000040020000A0000000CE0000__00000038E9AF7F00\n000000067F000040020000A0000000CDC000-000000067F000040020000A0000000CE0000__0000003903F1CFE8\n000000067F000040020000A0000000CDC000-000000067F000040020000A0000000CE0000__0000003B99F7F8A0\n000000067F000040020000A0000000CDC000-000000067F000040020000A0000000CE0000__0000005D2FFFFB38\n000000067F000040020000A0000000CE0000-000000067F000040020000A0000000CE4000__00000038E1ABFE28\n000000067F000040020000A0000000CE0000-000000067F000040020000A0000000CE4000__00000038E9AF7F00\n000000067F000040020000A0000000CE0000-000000067F000040020000A0000000CE4000__0000003903F1CFE8\n000000067F000040020000A0000000CE0000-000000067F000040020000A0000000CE4000__0000003B99F7F8A0\n000000067F000040020000A0000000CE0000-000000067F000040020000A0000000CE4000__0000005D2FFFFB38\n000000067F000040020000A0000000CE0577-000000067F000040020000A0000000CE8F57__00000033D69BE889-00000034764BE349\n000000067F000040020000A0000000CE4000-000000067F000040020000A0000000CE8000__00000038E1ABFE28\n000000067F000040020000A0000000CE4000-000000067F000040020000A0000000CE8000__00000038E9AF7F00\n000000067F000040020000A0000000CE4000-000000067F000040020000A0000000CE8000__0000003903F1CFE8\n000000067F000040020000A0000000CE4000-000000067F000040020000A0000000CE8000__0000003B99F7F8A0\n000000067F000040020000A0000000CE4000-000000067F000040020000A0000000CE8000__0000005D2FFFFB38\n000000067F000040020000A0000000CE8000-000000067F000040020000A0000000CEC000__00000038E1ABFE28\n000000067F000040020000A0000000CE8000-000000067F000040020000A0000000CEC000__00000038E9AF7F00\n000000067F000040020000A0000000CE8000-000000067F000040020000A0000000CEC000__0000003903F1CFE8\n000000067F000040020000A0000000CE8000-000000067F000040020000A0000000CEC000__0000003B99F7F8A0\n000000067F000040020000A0000000CE8000-000000067F000040020000A0000000CEC000__0000005D2FFFFB38\n000000067F000040020000A0000000CE8F57-000000067F000040020000A0000000CF1933__00000033D69BE889-00000034764BE349\n000000067F000040020000A0000000CEC000-000000067F000040020000A0000000CF0000__00000038E1ABFE28\n000000067F000040020000A0000000CEC000-000000067F000040020000A0000000CF0000__00000038E9AF7F00\n000000067F000040020000A0000000CEC000-000000067F000040020000A0000000CF0000__0000003903F1CFE8\n000000067F000040020000A0000000CEC000-000000067F000040020000A0000000CF0000__0000003B99F7F8A0\n000000067F000040020000A0000000CEC000-000000067F000040020000A0000000CF0000__0000005D2FFFFB38\n000000067F000040020000A0000000CF0000-000000067F000040020000A0000000CF4000__00000038E1ABFE28\n000000067F000040020000A0000000CF0000-000000067F000040020000A0000000CF4000__00000038E9AF7F00\n000000067F000040020000A0000000CF0000-000000067F000040020000A0000000CF4000__0000003903F1CFE8\n000000067F000040020000A0000000CF0000-000000067F000040020000A0000000CF4000__0000003B99F7F8A0\n000000067F000040020000A0000000CF0000-000000067F000040020000A0000000CF4000__0000005D2FFFFB38\n000000067F000040020000A0000000CF1933-000000067F000040020000A0000000CFA300__00000033D69BE889-00000034764BE349\n000000067F000040020000A0000000CF4000-000000067F000040020000A0000000CF8000__00000038E1ABFE28\n000000067F000040020000A0000000CF4000-000000067F000040020000A0000000CF8000__00000038E9AF7F00\n000000067F000040020000A0000000CF4000-000000067F000040020000A0000000CF8000__0000003903F1CFE8\n000000067F000040020000A0000000CF4000-000000067F000040020000A0000000CF8000__0000003B99F7F8A0\n000000067F000040020000A0000000CF4000-000000067F000040020000A0000000CF8000__0000005D2FFFFB38\n000000067F000040020000A0000000CF8000-000000067F000040020000A0000000CFC000__0000003545E7DCF0\n000000067F000040020000A0000000CF8000-000000067F000040020000A0000000CFC000__00000038E9AF7F00\n000000067F000040020000A0000000CF8000-000000067F000040020000A0000000CFC000__0000003903F1CFE8\n000000067F000040020000A0000000CF8000-000000067F000040020000A0000000CFC000__0000003B99F7F8A0\n000000067F000040020000A0000000CF8000-000000067F000040020000A0000000CFC000__0000005D2FFFFB38\n000000067F000040020000A0000000CFA300-000000067F000040020000A0000200000000__00000033D69BE889-00000034764BE349\n000000067F000040020000A0000000CFA548-000000067F000040020000A0000000D02F25__00000034764BE349-0000003525F3D179\n000000067F000040020000A0000000CFC000-000000067F000040020000A0000000D00000__0000003545E7DCF0\n000000067F000040020000A0000000CFC000-000000067F000040020000A0000000D00000__00000038E9AF7F00\n000000067F000040020000A0000000CFC000-000000067F000040020000A0000000D00000__0000003903F1CFE8\n000000067F000040020000A0000000CFC000-000000067F000040020000A0000000D00000__0000003B99F7F8A0\n000000067F000040020000A0000000CFC000-000000067F000040020000A0000000D00000__0000005D2FFFFB38\n000000067F000040020000A0000000D00000-000000067F000040020000A0000000D04000__0000003545E7DCF0\n000000067F000040020000A0000000D00000-000000067F000040020000A0000000D04000__00000038E9AF7F00\n000000067F000040020000A0000000D00000-000000067F000040020000A0000000D04000__0000003903F1CFE8\n000000067F000040020000A0000000D00000-000000067F000040020000A0000000D04000__0000003B99F7F8A0\n000000067F000040020000A0000000D00000-000000067F000040020000A0000000D04000__0000005D2FFFFB38\n000000067F000040020000A0000000D02F25-000000067F000040020000A0000000D0B903__00000034764BE349-0000003525F3D179\n000000067F000040020000A0000000D04000-000000067F000040020000A0000000D08000__0000003545E7DCF0\n000000067F000040020000A0000000D04000-000000067F000040020000A0000000D08000__00000038E9AF7F00\n000000067F000040020000A0000000D04000-000000067F000040020000A0000000D08000__0000003903F1CFE8\n000000067F000040020000A0000000D04000-000000067F000040020000A0000000D08000__0000003B99F7F8A0\n000000067F000040020000A0000000D04000-000000067F000040020000A0000000D08000__0000005D2FFFFB38\n000000067F000040020000A0000000D08000-000000067F000040020000A0000000D0C000__0000003545E7DCF0\n000000067F000040020000A0000000D08000-000000067F000040020000A0000000D0C000__00000038E9AF7F00\n000000067F000040020000A0000000D08000-000000067F000040020000A0000000D0C000__0000003903F1CFE8\n000000067F000040020000A0000000D08000-000000067F000040020000A0000000D0C000__0000003B99F7F8A0\n000000067F000040020000A0000000D08000-000000067F000040020000A0000000D0C000__0000005D2FFFFB38\n000000067F000040020000A0000000D0B903-000000067F000040020000A0000000D142DA__00000034764BE349-0000003525F3D179\n000000067F000040020000A0000000D0C000-000000067F000040020000A0000000D10000__0000003545E7DCF0\n000000067F000040020000A0000000D0C000-000000067F000040020000A0000000D10000__00000038E9AF7F00\n000000067F000040020000A0000000D0C000-000000067F000040020000A0000000D10000__0000003903F1CFE8\n000000067F000040020000A0000000D0C000-000000067F000040020000A0000000D10000__0000003B99F7F8A0\n000000067F000040020000A0000000D0C000-000000067F000040020000A0000000D10000__0000005D2FFFFB38\n000000067F000040020000A0000000D10000-000000067F000040020000A0000000D14000__0000003545E7DCF0\n000000067F000040020000A0000000D10000-000000067F000040020000A0000000D14000__00000038E9AF7F00\n000000067F000040020000A0000000D10000-000000067F000040020000A0000000D14000__0000003903F1CFE8\n000000067F000040020000A0000000D10000-000000067F000040020000A0000000D14000__0000003B99F7F8A0\n000000067F000040020000A0000000D10000-000000067F000040020000A0000000D14000__0000005D2FFFFB38\n000000067F000040020000A0000000D14000-000000067F000040020000A0000000D18000__0000003545E7DCF0\n000000067F000040020000A0000000D14000-000000067F000040020000A0000000D18000__00000038E9AF7F00\n000000067F000040020000A0000000D14000-000000067F000040020000A0000000D18000__0000003903F1CFE8\n000000067F000040020000A0000000D14000-000000067F000040020000A0000000D18000__0000003B99F7F8A0\n000000067F000040020000A0000000D14000-000000067F000040020000A0000000D18000__0000005D2FFFFB38\n000000067F000040020000A0000000D142DA-000000067F000040020000A0000000D1CCBE__00000034764BE349-0000003525F3D179\n000000067F000040020000A0000000D18000-000000067F000040020000A0000000D1C000__0000003545E7DCF0\n000000067F000040020000A0000000D18000-000000067F000040020000A0000000D1C000__00000038E9AF7F00\n000000067F000040020000A0000000D18000-000000067F000040020000A0000000D1C000__0000003903F1CFE8\n000000067F000040020000A0000000D18000-000000067F000040020000A0000000D1C000__0000003B99F7F8A0\n000000067F000040020000A0000000D18000-000000067F000040020000A0000000D1C000__0000005D2FFFFB38\n000000067F000040020000A0000000D1C000-000000067F000040020000A0000000D20000__0000003545E7DCF0\n000000067F000040020000A0000000D1C000-000000067F000040020000A0000000D20000__00000038E9AF7F00\n000000067F000040020000A0000000D1C000-000000067F000040020000A0000000D20000__0000003903F1CFE8\n000000067F000040020000A0000000D1C000-000000067F000040020000A0000000D20000__0000003B99F7F8A0\n000000067F000040020000A0000000D1C000-000000067F000040020000A0000000D20000__0000005D2FFFFB38\n000000067F000040020000A0000000D1CCBE-000000067F000040020000A0000000D25694__00000034764BE349-0000003525F3D179\n000000067F000040020000A0000000D20000-000000067F000040020000A0000000D24000__0000003545E7DCF0\n000000067F000040020000A0000000D20000-000000067F000040020000A0000000D24000__00000038E9AF7F00\n000000067F000040020000A0000000D20000-000000067F000040020000A0000000D24000__0000003903F1CFE8\n000000067F000040020000A0000000D20000-000000067F000040020000A0000000D24000__0000003B99F7F8A0\n000000067F000040020000A0000000D20000-000000067F000040020000A0000000D24000__0000005D2FFFFB38\n000000067F000040020000A0000000D24000-000000067F000040020000A0000000D28000__0000003545E7DCF0\n000000067F000040020000A0000000D24000-000000067F000040020000A0000000D28000__00000038E9AF7F00\n000000067F000040020000A0000000D24000-000000067F000040020000A0000000D28000__0000003903F1CFE8\n000000067F000040020000A0000000D24000-000000067F000040020000A0000000D28000__0000003B99F7F8A0\n000000067F000040020000A0000000D24000-000000067F000040020000A0000000D28000__0000005D2FFFFB38\n000000067F000040020000A0000000D25694-000000067F000040020000A0000000D2E06B__00000034764BE349-0000003525F3D179\n000000067F000040020000A0000000D28000-000000067F000040020000A0000000D2C000__0000003545E7DCF0\n000000067F000040020000A0000000D28000-000000067F000040020000A0000000D2C000__00000038E9AF7F00\n000000067F000040020000A0000000D28000-000000067F000040020000A0000000D2C000__0000003903F1CFE8\n000000067F000040020000A0000000D28000-000000067F000040020000A0000000D2C000__0000003B99F7F8A0\n000000067F000040020000A0000000D28000-000000067F000040020000A0000000D2C000__0000005D2FFFFB38\n000000067F000040020000A0000000D2C000-000000067F000040020000A0000000D30000__0000003545E7DCF0\n000000067F000040020000A0000000D2C000-000000067F000040020000A0000000D30000__00000038E9AF7F00\n000000067F000040020000A0000000D2C000-000000067F000040020000A0000000D30000__0000003903F1CFE8\n000000067F000040020000A0000000D2C000-000000067F000040020000A0000000D30000__0000003B99F7F8A0\n000000067F000040020000A0000000D2C000-000000067F000040020000A0000000D30000__0000005D2FFFFB38\n000000067F000040020000A0000000D2E06B-000000067F000040020000A0000000D36A3F__00000034764BE349-0000003525F3D179\n000000067F000040020000A0000000D30000-000000067F000040020000A0000000D34000__0000003545E7DCF0\n000000067F000040020000A0000000D30000-000000067F000040020000A0000000D34000__00000038E9AF7F00\n000000067F000040020000A0000000D30000-000000067F000040020000A0000000D34000__0000003903F1CFE8\n000000067F000040020000A0000000D30000-000000067F000040020000A0000000D34000__0000003B99F7F8A0\n000000067F000040020000A0000000D30000-000000067F000040020000A0000000D34000__0000005D2FFFFB38\n000000067F000040020000A0000000D34000-000000067F000040020000A0000000D38000__0000003545E7DCF0\n000000067F000040020000A0000000D34000-000000067F000040020000A0000000D38000__00000038E9AF7F00\n000000067F000040020000A0000000D34000-000000067F000040020000A0000000D38000__0000003903F1CFE8\n000000067F000040020000A0000000D34000-000000067F000040020000A0000000D38000__0000003B99F7F8A0\n000000067F000040020000A0000000D34000-000000067F000040020000A0000000D38000__0000005D2FFFFB38\n000000067F000040020000A0000000D36A3F-000000067F000040020000A0000000D3F41D__00000034764BE349-0000003525F3D179\n000000067F000040020000A0000000D38000-000000067F000040020000A0000000D3C000__0000003545E7DCF0\n000000067F000040020000A0000000D38000-000000067F000040020000A0000000D3C000__00000038E9AF7F00\n000000067F000040020000A0000000D38000-000000067F000040020000A0000000D3C000__0000003903F1CFE8\n000000067F000040020000A0000000D38000-000000067F000040020000A0000000D3C000__0000003B99F7F8A0\n000000067F000040020000A0000000D38000-000000067F000040020000A0000000D3C000__0000005D2FFFFB38\n000000067F000040020000A0000000D3C000-000000067F000040020000A0000000D40000__0000003545E7DCF0\n000000067F000040020000A0000000D3C000-000000067F000040020000A0000000D40000__00000038E9AF7F00\n000000067F000040020000A0000000D3C000-000000067F000040020000A0000000D40000__0000003903F1CFE8\n000000067F000040020000A0000000D3C000-000000067F000040020000A0000000D40000__0000003B99F7F8A0\n000000067F000040020000A0000000D3C000-000000067F000040020000A0000000D40000__0000005D2FFFFB38\n000000067F000040020000A0000000D3F41D-000000067F000040020000A0000000D47DFC__00000034764BE349-0000003525F3D179\n000000067F000040020000A0000000D40000-000000067F000040020000A0000000D44000__0000003545E7DCF0\n000000067F000040020000A0000000D40000-000000067F000040020000A0000000D44000__00000038E9AF7F00\n000000067F000040020000A0000000D40000-000000067F000040020000A0000000D44000__0000003903F1CFE8\n000000067F000040020000A0000000D40000-000000067F000040020000A0000000D44000__0000003B99F7F8A0\n000000067F000040020000A0000000D40000-000000067F000040020000A0000000D44000__0000005D2FFFFB38\n000000067F000040020000A0000000D44000-000000067F000040020000A0000000D48000__0000003545E7DCF0\n000000067F000040020000A0000000D44000-000000067F000040020000A0000000D48000__00000038E9AF7F00\n000000067F000040020000A0000000D44000-000000067F000040020000A0000000D48000__0000003903F1CFE8\n000000067F000040020000A0000000D44000-000000067F000040020000A0000000D48000__0000003B99F7F8A0\n000000067F000040020000A0000000D44000-000000067F000040020000A0000000D48000__0000005D2FFFFB38\n000000067F000040020000A0000000D47DFC-000000067F000040020000A0000000D507EE__00000034764BE349-0000003525F3D179\n000000067F000040020000A0000000D48000-000000067F000040020000A0000000D4C000__0000003545E7DCF0\n000000067F000040020000A0000000D48000-000000067F000040020000A0000000D4C000__00000038E9AF7F00\n000000067F000040020000A0000000D48000-000000067F000040020000A0000000D4C000__0000003903F1CFE8\n000000067F000040020000A0000000D48000-000000067F000040020000A0000000D4C000__0000003B99F7F8A0\n000000067F000040020000A0000000D48000-000000067F000040020000A0000000D4C000__0000005D2FFFFB38\n000000067F000040020000A0000000D4C000-000000067F000040020000A0000000D50000__0000003545E7DCF0\n000000067F000040020000A0000000D4C000-000000067F000040020000A0000000D50000__00000038E9AF7F00\n000000067F000040020000A0000000D4C000-000000067F000040020000A0000000D50000__0000003903F1CFE8\n000000067F000040020000A0000000D4C000-000000067F000040020000A0000000D50000__0000003B99F7F8A0\n000000067F000040020000A0000000D4C000-000000067F000040020000A0000000D50000__0000005D2FFFFB38\n000000067F000040020000A0000000D50000-000000067F000040020000A0000000D54000__0000003545E7DCF0\n000000067F000040020000A0000000D50000-000000067F000040020000A0000000D54000__00000038E9AF7F00\n000000067F000040020000A0000000D50000-000000067F000040020000A0000000D54000__0000003903F1CFE8\n000000067F000040020000A0000000D50000-000000067F000040020000A0000000D54000__0000003B99F7F8A0\n000000067F000040020000A0000000D50000-000000067F000040020000A0000000D54000__0000005D2FFFFB38\n000000067F000040020000A0000000D507EE-000000067F000040020000A0000000D591D1__00000034764BE349-0000003525F3D179\n000000067F000040020000A0000000D54000-000000067F000040020000A0000000D58000__0000003545E7DCF0\n000000067F000040020000A0000000D54000-000000067F000040020000A0000000D58000__00000038E9AF7F00\n000000067F000040020000A0000000D54000-000000067F000040020000A0000000D58000__0000003903F1CFE8\n000000067F000040020000A0000000D54000-000000067F000040020000A0000000D58000__0000003B99F7F8A0\n000000067F000040020000A0000000D54000-000000067F000040020000A0000000D58000__0000005D2FFFFB38\n000000067F000040020000A0000000D58000-000000067F000040020000A0000000D5C000__0000003545E7DCF0\n000000067F000040020000A0000000D58000-000000067F000040020000A0000000D5C000__00000038E67ABFA0\n000000067F000040020000A0000000D58000-000000067F000040020000A0000000D5C000__0000003903F1CFE8\n000000067F000040020000A0000000D58000-000000067F000040020000A0000000D5C000__0000003B99F7F8A0\n000000067F000040020000A0000000D58000-000000067F000040020000A0000000D5C000__0000005D2FFFFB38\n000000067F000040020000A0000000D591D1-000000067F000040020000A0000200000000__00000034764BE349-0000003525F3D179\n000000067F000040020000A0000000D593E0-000000067F000040020000A0000000D61DB9__0000003525F3D179-00000035C5A3EE11\n000000067F000040020000A0000000D5C000-000000067F000040020000A0000000D60000__0000003545E7DCF0\n000000067F000040020000A0000000D5C000-000000067F000040020000A0000000D60000__00000038E67ABFA0\n000000067F000040020000A0000000D5C000-000000067F000040020000A0000000D60000__0000003903F1CFE8\n000000067F000040020000A0000000D5C000-000000067F000040020000A0000000D60000__0000003B99F7F8A0\n000000067F000040020000A0000000D5C000-000000067F000040020000A0000000D60000__0000005D2FFFFB38\n000000067F000040020000A0000000D60000-000000067F000040020000A0000000D64000__0000003545E7DCF0\n000000067F000040020000A0000000D60000-000000067F000040020000A0000000D64000__00000038E67ABFA0\n000000067F000040020000A0000000D60000-000000067F000040020000A0000000D64000__0000003903F1CFE8\n000000067F000040020000A0000000D60000-000000067F000040020000A0000000D64000__0000003B99F7F8A0\n000000067F000040020000A0000000D60000-000000067F000040020000A0000000D64000__0000005D2FFFFB38\n000000067F000040020000A0000000D61DB9-000000067F000040020000A0000000D6A793__0000003525F3D179-00000035C5A3EE11\n000000067F000040020000A0000000D64000-000000067F000040020000A0000000D68000__0000003545E7DCF0\n000000067F000040020000A0000000D64000-000000067F000040020000A0000000D68000__00000038E67ABFA0\n000000067F000040020000A0000000D64000-000000067F000040020000A0000000D68000__0000003903F1CFE8\n000000067F000040020000A0000000D64000-000000067F000040020000A0000000D68000__0000003B99F7F8A0\n000000067F000040020000A0000000D64000-000000067F000040020000A0000000D68000__0000005D2FFFFB38\n000000067F000040020000A0000000D68000-000000067F000040020000A0000000D6C000__00000038E67ABFA0\n000000067F000040020000A0000000D68000-000000067F000040020000A0000000D6C000__0000003903F1CFE8\n000000067F000040020000A0000000D68000-000000067F000040020000A0000000D6C000__0000003B99F7F8A0\n000000067F000040020000A0000000D68000-000000067F000040020000A0000000D6C000__0000005D2FFFFB38\n000000067F000040020000A0000000D68000-030000000000000000000000000000000002__0000003545E7DCF0\n000000067F000040020000A0000000D6A793-000000067F000040020000A0000000D73179__0000003525F3D179-00000035C5A3EE11\n000000067F000040020000A0000000D6C000-000000067F000040020000A0000000D70000__00000038E67ABFA0\n000000067F000040020000A0000000D6C000-000000067F000040020000A0000000D70000__0000003903F1CFE8\n000000067F000040020000A0000000D6C000-000000067F000040020000A0000000D70000__0000003B99F7F8A0\n000000067F000040020000A0000000D6C000-000000067F000040020000A0000000D70000__0000005D2FFFFB38\n000000067F000040020000A0000000D70000-000000067F000040020000A0000000D74000__00000038E67ABFA0\n000000067F000040020000A0000000D70000-000000067F000040020000A0000000D74000__0000003903F1CFE8\n000000067F000040020000A0000000D70000-000000067F000040020000A0000000D74000__0000003B99F7F8A0\n000000067F000040020000A0000000D70000-000000067F000040020000A0000000D74000__0000005D2FFFFB38\n000000067F000040020000A0000000D73179-000000067F000040020000A0000000D7BB57__0000003525F3D179-00000035C5A3EE11\n000000067F000040020000A0000000D74000-000000067F000040020000A0000000D78000__00000038E67ABFA0\n000000067F000040020000A0000000D74000-000000067F000040020000A0000000D78000__0000003903F1CFE8\n000000067F000040020000A0000000D74000-000000067F000040020000A0000000D78000__0000003B99F7F8A0\n000000067F000040020000A0000000D74000-000000067F000040020000A0000000D78000__0000005D2FFFFB38\n000000067F000040020000A0000000D78000-000000067F000040020000A0000000D7C000__00000038E67ABFA0\n000000067F000040020000A0000000D78000-000000067F000040020000A0000000D7C000__0000003903F1CFE8\n000000067F000040020000A0000000D78000-000000067F000040020000A0000000D7C000__0000003B99F7F8A0\n000000067F000040020000A0000000D78000-000000067F000040020000A0000000D7C000__0000005D2FFFFB38\n000000067F000040020000A0000000D7BB57-000000067F000040020000A0000000D8453C__0000003525F3D179-00000035C5A3EE11\n000000067F000040020000A0000000D7C000-000000067F000040020000A0000000D80000__00000038E67ABFA0\n000000067F000040020000A0000000D7C000-000000067F000040020000A0000000D80000__0000003903F1CFE8\n000000067F000040020000A0000000D7C000-000000067F000040020000A0000000D80000__0000003B99F7F8A0\n000000067F000040020000A0000000D7C000-000000067F000040020000A0000000D80000__0000005D2FFFFB38\n000000067F000040020000A0000000D80000-000000067F000040020000A0000000D84000__00000038E67ABFA0\n000000067F000040020000A0000000D80000-000000067F000040020000A0000000D84000__0000003903F1CFE8\n000000067F000040020000A0000000D80000-000000067F000040020000A0000000D84000__0000003B99F7F8A0\n000000067F000040020000A0000000D80000-000000067F000040020000A0000000D84000__0000005D2FFFFB38\n000000067F000040020000A0000000D84000-000000067F000040020000A0000000D88000__00000038E67ABFA0\n000000067F000040020000A0000000D84000-000000067F000040020000A0000000D88000__0000003903F1CFE8\n000000067F000040020000A0000000D84000-000000067F000040020000A0000000D88000__0000003B99F7F8A0\n000000067F000040020000A0000000D84000-000000067F000040020000A0000000D88000__0000005D2FFFFB38\n000000067F000040020000A0000000D8453C-000000067F000040020000A0000000D8CF1B__0000003525F3D179-00000035C5A3EE11\n000000067F000040020000A0000000D88000-000000067F000040020000A0000000D8C000__00000038E67ABFA0\n000000067F000040020000A0000000D88000-000000067F000040020000A0000000D8C000__0000003903F1CFE8\n000000067F000040020000A0000000D88000-000000067F000040020000A0000000D8C000__0000003B99F7F8A0\n000000067F000040020000A0000000D88000-000000067F000040020000A0000000D8C000__0000005D2FFFFB38\n000000067F000040020000A0000000D8C000-000000067F000040020000A0000000D90000__00000038E67ABFA0\n000000067F000040020000A0000000D8C000-000000067F000040020000A0000000D90000__0000003903F1CFE8\n000000067F000040020000A0000000D8C000-000000067F000040020000A0000000D90000__0000003B99F7F8A0\n000000067F000040020000A0000000D8C000-000000067F000040020000A0000000D90000__0000005D2FFFFB38\n000000067F000040020000A0000000D8CF1B-000000067F000040020000A0000000D958EB__0000003525F3D179-00000035C5A3EE11\n000000067F000040020000A0000000D90000-000000067F000040020000A0000000D94000__00000038E67ABFA0\n000000067F000040020000A0000000D90000-000000067F000040020000A0000000D94000__0000003903F1CFE8\n000000067F000040020000A0000000D90000-000000067F000040020000A0000000D94000__0000003B99F7F8A0\n000000067F000040020000A0000000D90000-000000067F000040020000A0000000D94000__0000005D2FFFFB38\n000000067F000040020000A0000000D94000-000000067F000040020000A0000000D98000__00000038E67ABFA0\n000000067F000040020000A0000000D94000-000000067F000040020000A0000000D98000__0000003903F1CFE8\n000000067F000040020000A0000000D94000-000000067F000040020000A0000000D98000__0000003B99F7F8A0\n000000067F000040020000A0000000D94000-000000067F000040020000A0000000D98000__0000005D2FFFFB38\n000000067F000040020000A0000000D958EB-000000067F000040020000A0000000D9E2CF__0000003525F3D179-00000035C5A3EE11\n000000067F000040020000A0000000D98000-000000067F000040020000A0000000D9C000__00000038E67ABFA0\n000000067F000040020000A0000000D98000-000000067F000040020000A0000000D9C000__0000003903F1CFE8\n000000067F000040020000A0000000D98000-000000067F000040020000A0000000D9C000__0000003B99F7F8A0\n000000067F000040020000A0000000D98000-000000067F000040020000A0000000D9C000__0000005D2FFFFB38\n000000067F000040020000A0000000D9C000-000000067F000040020000A0000000DA0000__00000038E67ABFA0\n000000067F000040020000A0000000D9C000-000000067F000040020000A0000000DA0000__0000003903F1CFE8\n000000067F000040020000A0000000D9C000-000000067F000040020000A0000000DA0000__0000003B99F7F8A0\n000000067F000040020000A0000000D9C000-000000067F000040020000A0000000DA0000__0000005D2FFFFB38\n000000067F000040020000A0000000D9E2CF-000000067F000040020000A0000000DA6CA5__0000003525F3D179-00000035C5A3EE11\n000000067F000040020000A0000000DA0000-000000067F000040020000A0000000DA4000__00000038E67ABFA0\n000000067F000040020000A0000000DA0000-000000067F000040020000A0000000DA4000__0000003903F1CFE8\n000000067F000040020000A0000000DA0000-000000067F000040020000A0000000DA4000__0000003B99F7F8A0\n000000067F000040020000A0000000DA0000-000000067F000040020000A0000000DA4000__0000005D2FFFFB38\n000000067F000040020000A0000000DA4000-000000067F000040020000A0000000DA8000__00000038E67ABFA0\n000000067F000040020000A0000000DA4000-000000067F000040020000A0000000DA8000__0000003903F1CFE8\n000000067F000040020000A0000000DA4000-000000067F000040020000A0000000DA8000__0000003B99F7F8A0\n000000067F000040020000A0000000DA4000-000000067F000040020000A0000000DA8000__0000005D2FFFFB38\n000000067F000040020000A0000000DA6CA5-000000067F000040020000A0000000DAF684__0000003525F3D179-00000035C5A3EE11\n000000067F000040020000A0000000DA8000-000000067F000040020000A0000000DAC000__00000038E67ABFA0\n000000067F000040020000A0000000DA8000-000000067F000040020000A0000000DAC000__0000003903F1CFE8\n000000067F000040020000A0000000DA8000-000000067F000040020000A0000000DAC000__0000003B99F7F8A0\n000000067F000040020000A0000000DA8000-000000067F000040020000A0000000DAC000__0000005D2FFFFB38\n000000067F000040020000A0000000DAC000-000000067F000040020000A0000000DB0000__00000038E1ABFE28\n000000067F000040020000A0000000DAC000-000000067F000040020000A0000000DB0000__00000038E9AF7F00\n000000067F000040020000A0000000DAC000-000000067F000040020000A0000000DB0000__0000003903F1CFE8\n000000067F000040020000A0000000DAC000-000000067F000040020000A0000000DB0000__0000003B99F7F8A0\n000000067F000040020000A0000000DAC000-000000067F000040020000A0000000DB0000__0000005D2FFFFB38\n000000067F000040020000A0000000DAF684-000000067F000040020000A0000200000000__0000003525F3D179-00000035C5A3EE11\n000000067F000040020000A0000000DAF8EF-000000067F000040020000A0000000DB82C7__00000035C5A3EE11-000000366553DF11\n000000067F000040020000A0000000DB0000-000000067F000040020000A0000000DB4000__00000038E1ABFE28\n000000067F000040020000A0000000DB0000-000000067F000040020000A0000000DB4000__00000038E9AF7F00\n000000067F000040020000A0000000DB0000-000000067F000040020000A0000000DB4000__0000003903F1CFE8\n000000067F000040020000A0000000DB0000-000000067F000040020000A0000000DB4000__0000003B99F7F8A0\n000000067F000040020000A0000000DB0000-000000067F000040020000A0000000DB4000__0000005D2FFFFB38\n000000067F000040020000A0000000DB4000-000000067F000040020000A0000000DB8000__00000038E1ABFE28\n000000067F000040020000A0000000DB4000-000000067F000040020000A0000000DB8000__00000038E9AF7F00\n000000067F000040020000A0000000DB4000-000000067F000040020000A0000000DB8000__0000003903F1CFE8\n000000067F000040020000A0000000DB4000-000000067F000040020000A0000000DB8000__0000003B99F7F8A0\n000000067F000040020000A0000000DB4000-000000067F000040020000A0000000DB8000__0000005D2FFFFB38\n000000067F000040020000A0000000DB8000-000000067F000040020000A0000000DBC000__00000038E1ABFE28\n000000067F000040020000A0000000DB8000-000000067F000040020000A0000000DBC000__00000038E9AF7F00\n000000067F000040020000A0000000DB8000-000000067F000040020000A0000000DBC000__0000003903F1CFE8\n000000067F000040020000A0000000DB8000-000000067F000040020000A0000000DBC000__0000003B99F7F8A0\n000000067F000040020000A0000000DB8000-000000067F000040020000A0000000DBC000__0000005D2FFFFB38\n000000067F000040020000A0000000DB82C7-000000067F000040020000A0000000DC0CB4__00000035C5A3EE11-000000366553DF11\n000000067F000040020000A0000000DBC000-000000067F000040020000A0000000DC0000__00000038E1ABFE28\n000000067F000040020000A0000000DBC000-000000067F000040020000A0000000DC0000__00000038E9AF7F00\n000000067F000040020000A0000000DBC000-000000067F000040020000A0000000DC0000__0000003903F1CFE8\n000000067F000040020000A0000000DBC000-000000067F000040020000A0000000DC0000__0000003B99F7F8A0\n000000067F000040020000A0000000DBC000-000000067F000040020000A0000000DC0000__0000005D2FFFFB38\n000000067F000040020000A0000000DC0000-000000067F000040020000A0000000DC4000__00000038E1ABFE28\n000000067F000040020000A0000000DC0000-000000067F000040020000A0000000DC4000__00000038E9AF7F00\n000000067F000040020000A0000000DC0000-000000067F000040020000A0000000DC4000__0000003903F1CFE8\n000000067F000040020000A0000000DC0000-000000067F000040020000A0000000DC4000__0000003B99F7F8A0\n000000067F000040020000A0000000DC0000-000000067F000040020000A0000000DC4000__0000005D2FFFFB38\n000000067F000040020000A0000000DC0CB4-000000067F000040020000A0000000DC9693__00000035C5A3EE11-000000366553DF11\n000000067F000040020000A0000000DC4000-000000067F000040020000A0000000DC8000__00000038E1ABFE28\n000000067F000040020000A0000000DC4000-000000067F000040020000A0000000DC8000__00000038E9AF7F00\n000000067F000040020000A0000000DC4000-000000067F000040020000A0000000DC8000__0000003903F1CFE8\n000000067F000040020000A0000000DC4000-000000067F000040020000A0000000DC8000__0000003B99F7F8A0\n000000067F000040020000A0000000DC4000-000000067F000040020000A0000000DC8000__0000005D2FFFFB38\n000000067F000040020000A0000000DC8000-000000067F000040020000A0000000DCC000__00000038E1ABFE28\n000000067F000040020000A0000000DC8000-000000067F000040020000A0000000DCC000__00000038E9AF7F00\n000000067F000040020000A0000000DC8000-000000067F000040020000A0000000DCC000__0000003903F1CFE8\n000000067F000040020000A0000000DC8000-000000067F000040020000A0000000DCC000__0000003B99F7F8A0\n000000067F000040020000A0000000DC8000-000000067F000040020000A0000000DCC000__0000005D2FFFFB38\n000000067F000040020000A0000000DC9693-000000067F000040020000A0000000DD2070__00000035C5A3EE11-000000366553DF11\n000000067F000040020000A0000000DCC000-000000067F000040020000A0000000DD0000__00000038E1ABFE28\n000000067F000040020000A0000000DCC000-000000067F000040020000A0000000DD0000__00000038E9AF7F00\n000000067F000040020000A0000000DCC000-000000067F000040020000A0000000DD0000__0000003903F1CFE8\n000000067F000040020000A0000000DCC000-000000067F000040020000A0000000DD0000__0000003B99F7F8A0\n000000067F000040020000A0000000DCC000-000000067F000040020000A0000000DD0000__0000005D2FFFFB38\n000000067F000040020000A0000000DD0000-000000067F000040020000A0000000DD4000__00000038E1ABFE28\n000000067F000040020000A0000000DD0000-000000067F000040020000A0000000DD4000__00000038E9AF7F00\n000000067F000040020000A0000000DD0000-000000067F000040020000A0000000DD4000__0000003903F1CFE8\n000000067F000040020000A0000000DD0000-000000067F000040020000A0000000DD4000__0000003B99F7F8A0\n000000067F000040020000A0000000DD0000-000000067F000040020000A0000000DD4000__0000005D2FFFFB38\n000000067F000040020000A0000000DD2070-000000067F000040020000A0000000DDAA44__00000035C5A3EE11-000000366553DF11\n000000067F000040020000A0000000DD4000-000000067F000040020000A0000000DD8000__00000038E1ABFE28\n000000067F000040020000A0000000DD4000-000000067F000040020000A0000000DD8000__00000038E9AF7F00\n000000067F000040020000A0000000DD4000-000000067F000040020000A0000000DD8000__0000003903F1CFE8\n000000067F000040020000A0000000DD4000-000000067F000040020000A0000000DD8000__0000003B99F7F8A0\n000000067F000040020000A0000000DD4000-000000067F000040020000A0000000DD8000__0000005D2FFFFB38\n000000067F000040020000A0000000DD8000-000000067F000040020000A0000000DDC000__00000038E1ABFE28\n000000067F000040020000A0000000DD8000-000000067F000040020000A0000000DDC000__00000038E9AF7F00\n000000067F000040020000A0000000DD8000-000000067F000040020000A0000000DDC000__0000003903F1CFE8\n000000067F000040020000A0000000DD8000-000000067F000040020000A0000000DDC000__0000003B99F7F8A0\n000000067F000040020000A0000000DD8000-000000067F000040020000A0000000DDC000__0000005D2FFFFB38\n000000067F000040020000A0000000DDAA44-000000067F000040020000A0000000DE341F__00000035C5A3EE11-000000366553DF11\n000000067F000040020000A0000000DDC000-000000067F000040020000A0000000DE0000__00000038E1ABFE28\n000000067F000040020000A0000000DDC000-000000067F000040020000A0000000DE0000__00000038E9AF7F00\n000000067F000040020000A0000000DDC000-000000067F000040020000A0000000DE0000__0000003903F1CFE8\n000000067F000040020000A0000000DDC000-000000067F000040020000A0000000DE0000__0000003B99F7F8A0\n000000067F000040020000A0000000DDC000-000000067F000040020000A0000000DE0000__0000005D2FFFFB38\n000000067F000040020000A0000000DE0000-000000067F000040020000A0000000DE4000__00000038E1ABFE28\n000000067F000040020000A0000000DE0000-000000067F000040020000A0000000DE4000__00000038E9AF7F00\n000000067F000040020000A0000000DE0000-000000067F000040020000A0000000DE4000__0000003903F1CFE8\n000000067F000040020000A0000000DE0000-000000067F000040020000A0000000DE4000__0000003B99F7F8A0\n000000067F000040020000A0000000DE0000-000000067F000040020000A0000000DE4000__0000005D2FFFFB38\n000000067F000040020000A0000000DE341F-000000067F000040020000A0000000DEBDF4__00000035C5A3EE11-000000366553DF11\n000000067F000040020000A0000000DE4000-000000067F000040020000A0000000DE8000__00000038E1ABFE28\n000000067F000040020000A0000000DE4000-000000067F000040020000A0000000DE8000__00000038E9AF7F00\n000000067F000040020000A0000000DE4000-000000067F000040020000A0000000DE8000__0000003903F1CFE8\n000000067F000040020000A0000000DE4000-000000067F000040020000A0000000DE8000__0000003B99F7F8A0\n000000067F000040020000A0000000DE4000-000000067F000040020000A0000000DE8000__0000005D2FFFFB38\n000000067F000040020000A0000000DE8000-000000067F000040020000A0000000DEC000__00000038E1ABFE28\n000000067F000040020000A0000000DE8000-000000067F000040020000A0000000DEC000__00000038E9AF7F00\n000000067F000040020000A0000000DE8000-000000067F000040020000A0000000DEC000__0000003903F1CFE8\n000000067F000040020000A0000000DE8000-000000067F000040020000A0000000DEC000__0000003B99F7F8A0\n000000067F000040020000A0000000DE8000-000000067F000040020000A0000000DEC000__0000005D2FFFFB38\n000000067F000040020000A0000000DEBDF4-000000067F000040020000A0000000DF47DB__00000035C5A3EE11-000000366553DF11\n000000067F000040020000A0000000DEC000-000000067F000040020000A0000000DF0000__00000038E1ABFE28\n000000067F000040020000A0000000DEC000-000000067F000040020000A0000000DF0000__00000038E9AF7F00\n000000067F000040020000A0000000DEC000-000000067F000040020000A0000000DF0000__0000003903F1CFE8\n000000067F000040020000A0000000DEC000-000000067F000040020000A0000000DF0000__0000003B99F7F8A0\n000000067F000040020000A0000000DEC000-000000067F000040020000A0000000DF0000__0000005D2FFFFB38\n000000067F000040020000A0000000DF0000-000000067F000040020000A0000000DF4000__00000038E1ABFE28\n000000067F000040020000A0000000DF0000-000000067F000040020000A0000000DF4000__00000038E9AF7F00\n000000067F000040020000A0000000DF0000-000000067F000040020000A0000000DF4000__0000003903F1CFE8\n000000067F000040020000A0000000DF0000-000000067F000040020000A0000000DF4000__0000003B99F7F8A0\n000000067F000040020000A0000000DF0000-000000067F000040020000A0000000DF4000__0000005D2FFFFB38\n000000067F000040020000A0000000DF4000-000000067F000040020000A0000000DF8000__00000038E1ABFE28\n000000067F000040020000A0000000DF4000-000000067F000040020000A0000000DF8000__00000038E9AF7F00\n000000067F000040020000A0000000DF4000-000000067F000040020000A0000000DF8000__0000003903F1CFE8\n000000067F000040020000A0000000DF4000-000000067F000040020000A0000000DF8000__0000003B99F7F8A0\n000000067F000040020000A0000000DF4000-000000067F000040020000A0000000DF8000__0000005D2FFFFB38\n000000067F000040020000A0000000DF47DB-000000067F000040020000A0000000DFD1C6__00000035C5A3EE11-000000366553DF11\n000000067F000040020000A0000000DF8000-000000067F000040020000A0000000DFC000__00000038E1ABFE28\n000000067F000040020000A0000000DF8000-000000067F000040020000A0000000DFC000__00000038E9AF7F00\n000000067F000040020000A0000000DF8000-000000067F000040020000A0000000DFC000__0000003903F1CFE8\n000000067F000040020000A0000000DF8000-000000067F000040020000A0000000DFC000__0000003B99F7F8A0\n000000067F000040020000A0000000DF8000-000000067F000040020000A0000000DFC000__0000005D2FFFFB38\n000000067F000040020000A0000000DFC000-000000067F000040020000A0000000E00000__00000038E1ABFE28\n000000067F000040020000A0000000DFC000-000000067F000040020000A0000000E00000__00000038E9AF7F00\n000000067F000040020000A0000000DFC000-000000067F000040020000A0000000E00000__0000003903F1CFE8\n000000067F000040020000A0000000DFC000-000000067F000040020000A0000000E00000__0000003B99F7F8A0\n000000067F000040020000A0000000DFC000-000000067F000040020000A0000000E00000__0000005D2FFFFB38\n000000067F000040020000A0000000DFD1C6-000000067F000040020000A0000000E05BAE__00000035C5A3EE11-000000366553DF11\n000000067F000040020000A0000000E00000-000000067F000040020000A0000000E04000__00000038E1ABFE28\n000000067F000040020000A0000000E00000-000000067F000040020000A0000000E04000__00000038E9AF7F00\n000000067F000040020000A0000000E00000-000000067F000040020000A0000000E04000__0000003903F1CFE8\n000000067F000040020000A0000000E00000-000000067F000040020000A0000000E04000__0000003B99F7F8A0\n000000067F000040020000A0000000E00000-000000067F000040020000A0000000E04000__0000005D2FFFFB38\n000000067F000040020000A0000000E04000-000000067F000040020000A0000000E08000__0000003734F16F18\n000000067F000040020000A0000000E04000-000000067F000040020000A0000000E08000__00000038E9AF7F00\n000000067F000040020000A0000000E04000-000000067F000040020000A0000000E08000__0000003903F1CFE8\n000000067F000040020000A0000000E04000-000000067F000040020000A0000000E08000__0000003B99F7F8A0\n000000067F000040020000A0000000E04000-000000067F000040020000A0000000E08000__0000005D2FFFFB38\n000000067F000040020000A0000000E05BAE-000000067F000040020000A0000200000000__00000035C5A3EE11-000000366553DF11\n000000067F000040020000A0000000E05D58-000000067F000040020000A0000000E0E727__000000366553DF11-000000370503E969\n000000067F000040020000A0000000E08000-000000067F000040020000A0000000E0C000__0000003734F16F18\n000000067F000040020000A0000000E08000-000000067F000040020000A0000000E0C000__00000038E9AF7F00\n000000067F000040020000A0000000E08000-000000067F000040020000A0000000E0C000__0000003903F1CFE8\n000000067F000040020000A0000000E08000-000000067F000040020000A0000000E0C000__0000003B99F7F8A0\n000000067F000040020000A0000000E08000-000000067F000040020000A0000000E0C000__0000005D2FFFFB38\n000000067F000040020000A0000000E0C000-000000067F000040020000A0000000E10000__0000003734F16F18\n000000067F000040020000A0000000E0C000-000000067F000040020000A0000000E10000__00000038E9AF7F00\n000000067F000040020000A0000000E0C000-000000067F000040020000A0000000E10000__0000003903F1CFE8\n000000067F000040020000A0000000E0C000-000000067F000040020000A0000000E10000__0000003B99F7F8A0\n000000067F000040020000A0000000E0C000-000000067F000040020000A0000000E10000__0000005D2FFFFB38\n000000067F000040020000A0000000E0E727-000000067F000040020000A0000000E17100__000000366553DF11-000000370503E969\n000000067F000040020000A0000000E10000-000000067F000040020000A0000000E14000__0000003734F16F18\n000000067F000040020000A0000000E10000-000000067F000040020000A0000000E14000__00000038E9AF7F00\n000000067F000040020000A0000000E10000-000000067F000040020000A0000000E14000__0000003903F1CFE8\n000000067F000040020000A0000000E10000-000000067F000040020000A0000000E14000__0000003B99F7F8A0\n000000067F000040020000A0000000E10000-000000067F000040020000A0000000E14000__0000005D2FFFFB38\n000000067F000040020000A0000000E14000-000000067F000040020000A0000000E18000__0000003734F16F18\n000000067F000040020000A0000000E14000-000000067F000040020000A0000000E18000__00000038E9AF7F00\n000000067F000040020000A0000000E14000-000000067F000040020000A0000000E18000__0000003903F1CFE8\n000000067F000040020000A0000000E14000-000000067F000040020000A0000000E18000__0000003B99F7F8A0\n000000067F000040020000A0000000E14000-000000067F000040020000A0000000E18000__0000005D2FFFFB38\n000000067F000040020000A0000000E17100-000000067F000040020000A0000000E1FAD2__000000366553DF11-000000370503E969\n000000067F000040020000A0000000E18000-000000067F000040020000A0000000E1C000__0000003734F16F18\n000000067F000040020000A0000000E18000-000000067F000040020000A0000000E1C000__00000038E9AF7F00\n000000067F000040020000A0000000E18000-000000067F000040020000A0000000E1C000__0000003903F1CFE8\n000000067F000040020000A0000000E18000-000000067F000040020000A0000000E1C000__0000003B99F7F8A0\n000000067F000040020000A0000000E18000-000000067F000040020000A0000000E1C000__0000005D2FFFFB38\n000000067F000040020000A0000000E1C000-000000067F000040020000A0000000E20000__0000003734F16F18\n000000067F000040020000A0000000E1C000-000000067F000040020000A0000000E20000__00000038E9AF7F00\n000000067F000040020000A0000000E1C000-000000067F000040020000A0000000E20000__0000003903F1CFE8\n000000067F000040020000A0000000E1C000-000000067F000040020000A0000000E20000__0000003B99F7F8A0\n000000067F000040020000A0000000E1C000-000000067F000040020000A0000000E20000__0000005D2FFFFB38\n000000067F000040020000A0000000E1FAD2-000000067F000040020000A0000000E284A9__000000366553DF11-000000370503E969\n000000067F000040020000A0000000E20000-000000067F000040020000A0000000E24000__0000003734F16F18\n000000067F000040020000A0000000E20000-000000067F000040020000A0000000E24000__00000038E9AF7F00\n000000067F000040020000A0000000E20000-000000067F000040020000A0000000E24000__0000003903F1CFE8\n000000067F000040020000A0000000E20000-000000067F000040020000A0000000E24000__0000003B99F7F8A0\n000000067F000040020000A0000000E20000-000000067F000040020000A0000000E24000__0000005D2FFFFB38\n000000067F000040020000A0000000E24000-000000067F000040020000A0000000E28000__0000003734F16F18\n000000067F000040020000A0000000E24000-000000067F000040020000A0000000E28000__00000038E9AF7F00\n000000067F000040020000A0000000E24000-000000067F000040020000A0000000E28000__0000003903F1CFE8\n000000067F000040020000A0000000E24000-000000067F000040020000A0000000E28000__0000003B99F7F8A0\n000000067F000040020000A0000000E24000-000000067F000040020000A0000000E28000__0000005D2FFFFB38\n000000067F000040020000A0000000E28000-000000067F000040020000A0000000E2C000__0000003734F16F18\n000000067F000040020000A0000000E28000-000000067F000040020000A0000000E2C000__00000038E9AF7F00\n000000067F000040020000A0000000E28000-000000067F000040020000A0000000E2C000__0000003903F1CFE8\n000000067F000040020000A0000000E28000-000000067F000040020000A0000000E2C000__0000003B99F7F8A0\n000000067F000040020000A0000000E28000-000000067F000040020000A0000000E2C000__0000005D2FFFFB38\n000000067F000040020000A0000000E284A9-000000067F000040020000A0000000E30E94__000000366553DF11-000000370503E969\n000000067F000040020000A0000000E2C000-000000067F000040020000A0000000E30000__0000003734F16F18\n000000067F000040020000A0000000E2C000-000000067F000040020000A0000000E30000__00000038E9AF7F00\n000000067F000040020000A0000000E2C000-000000067F000040020000A0000000E30000__0000003903F1CFE8\n000000067F000040020000A0000000E2C000-000000067F000040020000A0000000E30000__0000003B99F7F8A0\n000000067F000040020000A0000000E2C000-000000067F000040020000A0000000E30000__0000005D2FFFFB38\n000000067F000040020000A0000000E30000-000000067F000040020000A0000000E34000__0000003734F16F18\n000000067F000040020000A0000000E30000-000000067F000040020000A0000000E34000__00000038E9AF7F00\n000000067F000040020000A0000000E30000-000000067F000040020000A0000000E34000__0000003903F1CFE8\n000000067F000040020000A0000000E30000-000000067F000040020000A0000000E34000__0000003B99F7F8A0\n000000067F000040020000A0000000E30000-000000067F000040020000A0000000E34000__0000005D2FFFFB38\n000000067F000040020000A0000000E30E94-000000067F000040020000A0000000E39878__000000366553DF11-000000370503E969\n000000067F000040020000A0000000E34000-000000067F000040020000A0000000E38000__0000003734F16F18\n000000067F000040020000A0000000E34000-000000067F000040020000A0000000E38000__00000038E9AF7F00\n000000067F000040020000A0000000E34000-000000067F000040020000A0000000E38000__0000003903F1CFE8\n000000067F000040020000A0000000E34000-000000067F000040020000A0000000E38000__0000003B99F7F8A0\n000000067F000040020000A0000000E34000-000000067F000040020000A0000000E38000__0000005D2FFFFB38\n000000067F000040020000A0000000E38000-000000067F000040020000A0000000E3C000__0000003734F16F18\n000000067F000040020000A0000000E38000-000000067F000040020000A0000000E3C000__00000038E9AF7F00\n000000067F000040020000A0000000E38000-000000067F000040020000A0000000E3C000__0000003903F1CFE8\n000000067F000040020000A0000000E38000-000000067F000040020000A0000000E3C000__0000003B99F7F8A0\n000000067F000040020000A0000000E38000-000000067F000040020000A0000000E3C000__0000005D2FFFFB38\n000000067F000040020000A0000000E39878-000000067F000040020000A0000000E42256__000000366553DF11-000000370503E969\n000000067F000040020000A0000000E3C000-000000067F000040020000A0000000E40000__0000003734F16F18\n000000067F000040020000A0000000E3C000-000000067F000040020000A0000000E40000__00000038E9AF7F00\n000000067F000040020000A0000000E3C000-000000067F000040020000A0000000E40000__0000003903F1CFE8\n000000067F000040020000A0000000E3C000-000000067F000040020000A0000000E40000__0000003B99F7F8A0\n000000067F000040020000A0000000E3C000-000000067F000040020000A0000000E40000__0000005D2FFFFB38\n000000067F000040020000A0000000E40000-000000067F000040020000A0000000E44000__0000003734F16F18\n000000067F000040020000A0000000E40000-000000067F000040020000A0000000E44000__00000038E9AF7F00\n000000067F000040020000A0000000E40000-000000067F000040020000A0000000E44000__0000003903F1CFE8\n000000067F000040020000A0000000E40000-000000067F000040020000A0000000E44000__0000003B99F7F8A0\n000000067F000040020000A0000000E40000-000000067F000040020000A0000000E44000__0000005D2FFFFB38\n000000067F000040020000A0000000E42256-000000067F000040020000A0000000E4AC29__000000366553DF11-000000370503E969\n000000067F000040020000A0000000E44000-000000067F000040020000A0000000E48000__0000003734F16F18\n000000067F000040020000A0000000E44000-000000067F000040020000A0000000E48000__00000038E9AF7F00\n000000067F000040020000A0000000E44000-000000067F000040020000A0000000E48000__0000003903F1CFE8\n000000067F000040020000A0000000E44000-000000067F000040020000A0000000E48000__0000003B99F7F8A0\n000000067F000040020000A0000000E44000-000000067F000040020000A0000000E48000__0000005D2FFFFB38\n000000067F000040020000A0000000E48000-000000067F000040020000A0000000E4C000__0000003734F16F18\n000000067F000040020000A0000000E48000-000000067F000040020000A0000000E4C000__00000038E9AF7F00\n000000067F000040020000A0000000E48000-000000067F000040020000A0000000E4C000__0000003903F1CFE8\n000000067F000040020000A0000000E48000-000000067F000040020000A0000000E4C000__0000003B99F7F8A0\n000000067F000040020000A0000000E48000-000000067F000040020000A0000000E4C000__0000005D2FFFFB38\n000000067F000040020000A0000000E4AC29-000000067F000040020000A0000000E53600__000000366553DF11-000000370503E969\n000000067F000040020000A0000000E4C000-000000067F000040020000A0000000E50000__0000003734F16F18\n000000067F000040020000A0000000E4C000-000000067F000040020000A0000000E50000__00000038E9AF7F00\n000000067F000040020000A0000000E4C000-000000067F000040020000A0000000E50000__0000003903F1CFE8\n000000067F000040020000A0000000E4C000-000000067F000040020000A0000000E50000__0000003B99F7F8A0\n000000067F000040020000A0000000E4C000-000000067F000040020000A0000000E50000__0000005D2FFFFB38\n000000067F000040020000A0000000E50000-000000067F000040020000A0000000E54000__0000003734F16F18\n000000067F000040020000A0000000E50000-000000067F000040020000A0000000E54000__00000038E9AF7F00\n000000067F000040020000A0000000E50000-000000067F000040020000A0000000E54000__0000003903F1CFE8\n000000067F000040020000A0000000E50000-000000067F000040020000A0000000E54000__0000003B99F7F8A0\n000000067F000040020000A0000000E50000-000000067F000040020000A0000000E54000__0000005D2FFFFB38\n000000067F000040020000A0000000E53600-000000067F000040020000A0000000E5BFD2__000000366553DF11-000000370503E969\n000000067F000040020000A0000000E54000-000000067F000040020000A0000000E58000__0000003734F16F18\n000000067F000040020000A0000000E54000-000000067F000040020000A0000000E58000__00000038E9AF7F00\n000000067F000040020000A0000000E54000-000000067F000040020000A0000000E58000__0000003903F1CFE8\n000000067F000040020000A0000000E54000-000000067F000040020000A0000000E58000__0000003B99F7F8A0\n000000067F000040020000A0000000E54000-000000067F000040020000A0000000E58000__0000005D2FFFFB38\n000000067F000040020000A0000000E58000-000000067F000040020000A0000000E5C000__0000003734F16F18\n000000067F000040020000A0000000E58000-000000067F000040020000A0000000E5C000__00000038E9AF7F00\n000000067F000040020000A0000000E58000-000000067F000040020000A0000000E5C000__0000003903F1CFE8\n000000067F000040020000A0000000E58000-000000067F000040020000A0000000E5C000__0000003B99F7F8A0\n000000067F000040020000A0000000E58000-000000067F000040020000A0000000E5C000__0000005D2FFFFB38\n000000067F000040020000A0000000E5BFD2-000000067F000040020000A0000200000000__000000366553DF11-000000370503E969\n000000067F000040020000A0000000E5C000-000000067F000040020000A0000000E60000__0000003734F16F18\n000000067F000040020000A0000000E5C000-000000067F000040020000A0000000E60000__00000038E67ABFA0\n000000067F000040020000A0000000E5C000-000000067F000040020000A0000000E60000__0000003903F1CFE8\n000000067F000040020000A0000000E5C000-000000067F000040020000A0000000E60000__0000003B99F7F8A0\n000000067F000040020000A0000000E5C000-000000067F000040020000A0000000E60000__0000005D2FFFFB38\n000000067F000040020000A0000000E5C268-000000067F000040020000A0000000E64C37__000000370503E969-00000037A4B3E7B1\n000000067F000040020000A0000000E60000-000000067F000040020000A0000000E64000__0000003734F16F18\n000000067F000040020000A0000000E60000-000000067F000040020000A0000000E64000__00000038E67ABFA0\n000000067F000040020000A0000000E60000-000000067F000040020000A0000000E64000__0000003903F1CFE8\n000000067F000040020000A0000000E60000-000000067F000040020000A0000000E64000__0000003B99F7F8A0\n000000067F000040020000A0000000E60000-000000067F000040020000A0000000E64000__0000005D2FFFFB38\n000000067F000040020000A0000000E64000-000000067F000040020000A0000000E68000__0000003734F16F18\n000000067F000040020000A0000000E64000-000000067F000040020000A0000000E68000__00000038E67ABFA0\n000000067F000040020000A0000000E64000-000000067F000040020000A0000000E68000__0000003903F1CFE8\n000000067F000040020000A0000000E64000-000000067F000040020000A0000000E68000__0000003B99F7F8A0\n000000067F000040020000A0000000E64000-000000067F000040020000A0000000E68000__0000005D2FFFFB38\n000000067F000040020000A0000000E64C37-000000067F000040020000A0000000E6D618__000000370503E969-00000037A4B3E7B1\n000000067F000040020000A0000000E68000-000000067F000040020000A0000000E6C000__0000003734F16F18\n000000067F000040020000A0000000E68000-000000067F000040020000A0000000E6C000__00000038E67ABFA0\n000000067F000040020000A0000000E68000-000000067F000040020000A0000000E6C000__0000003903F1CFE8\n000000067F000040020000A0000000E68000-000000067F000040020000A0000000E6C000__0000003B99F7F8A0\n000000067F000040020000A0000000E68000-000000067F000040020000A0000000E6C000__0000005D2FFFFB38\n000000067F000040020000A0000000E6C000-000000067F000040020000A0000000E70000__0000003734F16F18\n000000067F000040020000A0000000E6C000-000000067F000040020000A0000000E70000__00000038E67ABFA0\n000000067F000040020000A0000000E6C000-000000067F000040020000A0000000E70000__0000003903F1CFE8\n000000067F000040020000A0000000E6C000-000000067F000040020000A0000000E70000__0000003B99F7F8A0\n000000067F000040020000A0000000E6C000-000000067F000040020000A0000000E70000__0000005D2FFFFB38\n000000067F000040020000A0000000E6D618-000000067F000040020000A0000000E75FEF__000000370503E969-00000037A4B3E7B1\n000000067F000040020000A0000000E70000-000000067F000040020000A0000000E74000__0000003734F16F18\n000000067F000040020000A0000000E70000-000000067F000040020000A0000000E74000__00000038E67ABFA0\n000000067F000040020000A0000000E70000-000000067F000040020000A0000000E74000__0000003903F1CFE8\n000000067F000040020000A0000000E70000-000000067F000040020000A0000000E74000__0000003B99F7F8A0\n000000067F000040020000A0000000E70000-000000067F000040020000A0000000E74000__0000005D2FFFFB38\n000000067F000040020000A0000000E74000-000000067F000040020000A0000000E78000__00000038E67ABFA0\n000000067F000040020000A0000000E74000-000000067F000040020000A0000000E78000__0000003903F1CFE8\n000000067F000040020000A0000000E74000-000000067F000040020000A0000000E78000__0000003B99F7F8A0\n000000067F000040020000A0000000E74000-000000067F000040020000A0000000E78000__0000005D2FFFFB38\n000000067F000040020000A0000000E74000-030000000000000000000000000000000002__0000003734F16F18\n000000067F000040020000A0000000E75FEF-000000067F000040020000A0000000E7E9D1__000000370503E969-00000037A4B3E7B1\n000000067F000040020000A0000000E78000-000000067F000040020000A0000000E7C000__00000038E67ABFA0\n000000067F000040020000A0000000E78000-000000067F000040020000A0000000E7C000__0000003903F1CFE8\n000000067F000040020000A0000000E78000-000000067F000040020000A0000000E7C000__0000003B99F7F8A0\n000000067F000040020000A0000000E78000-000000067F000040020000A0000000E7C000__0000005D2FFFFB38\n000000067F000040020000A0000000E7C000-000000067F000040020000A0000000E80000__00000038E67ABFA0\n000000067F000040020000A0000000E7C000-000000067F000040020000A0000000E80000__0000003903F1CFE8\n000000067F000040020000A0000000E7C000-000000067F000040020000A0000000E80000__0000003B99F7F8A0\n000000067F000040020000A0000000E7C000-000000067F000040020000A0000000E80000__0000005D2FFFFB38\n000000067F000040020000A0000000E7E9D1-000000067F000040020000A0000000E873AA__000000370503E969-00000037A4B3E7B1\n000000067F000040020000A0000000E80000-000000067F000040020000A0000000E84000__00000038E67ABFA0\n000000067F000040020000A0000000E80000-000000067F000040020000A0000000E84000__0000003903F1CFE8\n000000067F000040020000A0000000E80000-000000067F000040020000A0000000E84000__0000003B99F7F8A0\n000000067F000040020000A0000000E80000-000000067F000040020000A0000000E84000__0000005D2FFFFB38\n000000067F000040020000A0000000E84000-000000067F000040020000A0000000E88000__00000038E67ABFA0\n000000067F000040020000A0000000E84000-000000067F000040020000A0000000E88000__0000003903F1CFE8\n000000067F000040020000A0000000E84000-000000067F000040020000A0000000E88000__0000003B99F7F8A0\n000000067F000040020000A0000000E84000-000000067F000040020000A0000000E88000__0000005D2FFFFB38\n000000067F000040020000A0000000E873AA-000000067F000040020000A0000000E8FD88__000000370503E969-00000037A4B3E7B1\n000000067F000040020000A0000000E88000-000000067F000040020000A0000000E8C000__00000038E67ABFA0\n000000067F000040020000A0000000E88000-000000067F000040020000A0000000E8C000__0000003903F1CFE8\n000000067F000040020000A0000000E88000-000000067F000040020000A0000000E8C000__0000003B99F7F8A0\n000000067F000040020000A0000000E88000-000000067F000040020000A0000000E8C000__0000005D2FFFFB38\n000000067F000040020000A0000000E8C000-000000067F000040020000A0000000E90000__00000038E67ABFA0\n000000067F000040020000A0000000E8C000-000000067F000040020000A0000000E90000__0000003903F1CFE8\n000000067F000040020000A0000000E8C000-000000067F000040020000A0000000E90000__0000003B99F7F8A0\n000000067F000040020000A0000000E8C000-000000067F000040020000A0000000E90000__0000005D2FFFFB38\n000000067F000040020000A0000000E8FD88-000000067F000040020000A0000000E98764__000000370503E969-00000037A4B3E7B1\n000000067F000040020000A0000000E90000-000000067F000040020000A0000000E94000__00000038E67ABFA0\n000000067F000040020000A0000000E90000-000000067F000040020000A0000000E94000__0000003903F1CFE8\n000000067F000040020000A0000000E90000-000000067F000040020000A0000000E94000__0000003B99F7F8A0\n000000067F000040020000A0000000E90000-000000067F000040020000A0000000E94000__0000005D2FFFFB38\n000000067F000040020000A0000000E94000-000000067F000040020000A0000000E98000__00000038E67ABFA0\n000000067F000040020000A0000000E94000-000000067F000040020000A0000000E98000__0000003903F1CFE8\n000000067F000040020000A0000000E94000-000000067F000040020000A0000000E98000__0000003B99F7F8A0\n000000067F000040020000A0000000E94000-000000067F000040020000A0000000E98000__0000005D2FFFFB38\n000000067F000040020000A0000000E98000-000000067F000040020000A0000000E9C000__00000038E67ABFA0\n000000067F000040020000A0000000E98000-000000067F000040020000A0000000E9C000__0000003903F1CFE8\n000000067F000040020000A0000000E98000-000000067F000040020000A0000000E9C000__0000003B99F7F8A0\n000000067F000040020000A0000000E98000-000000067F000040020000A0000000E9C000__0000005D2FFFFB38\n000000067F000040020000A0000000E98764-000000067F000040020000A0000000EA1139__000000370503E969-00000037A4B3E7B1\n000000067F000040020000A0000000E9C000-000000067F000040020000A0000000EA0000__00000038E67ABFA0\n000000067F000040020000A0000000E9C000-000000067F000040020000A0000000EA0000__0000003903F1CFE8\n000000067F000040020000A0000000E9C000-000000067F000040020000A0000000EA0000__0000003B99F7F8A0\n000000067F000040020000A0000000E9C000-000000067F000040020000A0000000EA0000__0000005D2FFFFB38\n000000067F000040020000A0000000EA0000-000000067F000040020000A0000000EA4000__00000038E67ABFA0\n000000067F000040020000A0000000EA0000-000000067F000040020000A0000000EA4000__0000003903F1CFE8\n000000067F000040020000A0000000EA0000-000000067F000040020000A0000000EA4000__0000003B99F7F8A0\n000000067F000040020000A0000000EA0000-000000067F000040020000A0000000EA4000__0000005D2FFFFB38\n000000067F000040020000A0000000EA1139-000000067F000040020000A0000000EA9B11__000000370503E969-00000037A4B3E7B1\n000000067F000040020000A0000000EA4000-000000067F000040020000A0000000EA8000__00000038E67ABFA0\n000000067F000040020000A0000000EA4000-000000067F000040020000A0000000EA8000__0000003903F1CFE8\n000000067F000040020000A0000000EA4000-000000067F000040020000A0000000EA8000__0000003B99F7F8A0\n000000067F000040020000A0000000EA4000-000000067F000040020000A0000000EA8000__0000005D2FFFFB38\n000000067F000040020000A0000000EA8000-000000067F000040020000A0000000EAC000__00000038E67ABFA0\n000000067F000040020000A0000000EA8000-000000067F000040020000A0000000EAC000__0000003903F1CFE8\n000000067F000040020000A0000000EA8000-000000067F000040020000A0000000EAC000__0000003B99F7F8A0\n000000067F000040020000A0000000EA8000-000000067F000040020000A0000000EAC000__0000005D2FFFFB38\n000000067F000040020000A0000000EA9B11-000000067F000040020000A0000000EB24E9__000000370503E969-00000037A4B3E7B1\n000000067F000040020000A0000000EAC000-000000067F000040020000A0000000EB0000__00000038E67ABFA0\n000000067F000040020000A0000000EAC000-000000067F000040020000A0000000EB0000__0000003903F1CFE8\n000000067F000040020000A0000000EAC000-000000067F000040020000A0000000EB0000__0000003B99F7F8A0\n000000067F000040020000A0000000EAC000-000000067F000040020000A0000000EB0000__0000005D2FFFFB38\n000000067F000040020000A0000000EB0000-000000067F000040020000A0000000EB4000__00000038E1ABFE28\n000000067F000040020000A0000000EB0000-000000067F000040020000A0000000EB4000__00000038E9AF7F00\n000000067F000040020000A0000000EB0000-000000067F000040020000A0000000EB4000__0000003903F1CFE8\n000000067F000040020000A0000000EB0000-000000067F000040020000A0000000EB4000__0000003B99F7F8A0\n000000067F000040020000A0000000EB0000-000000067F000040020000A0000000EB4000__0000005D2FFFFB38\n000000067F000040020000A0000000EB24E9-000000067F000040020000A0000200000000__000000370503E969-00000037A4B3E7B1\n000000067F000040020000A0000000EB26A9-000000067F000040020000A0000000EBB084__00000037A4B3E7B1-000000384463E2C1\n000000067F000040020000A0000000EB4000-000000067F000040020000A0000000EB8000__00000038E1ABFE28\n000000067F000040020000A0000000EB4000-000000067F000040020000A0000000EB8000__00000038E9AF7F00\n000000067F000040020000A0000000EB4000-000000067F000040020000A0000000EB8000__0000003903F1CFE8\n000000067F000040020000A0000000EB4000-000000067F000040020000A0000000EB8000__0000003B99F7F8A0\n000000067F000040020000A0000000EB4000-000000067F000040020000A0000000EB8000__0000005D2FFFFB38\n000000067F000040020000A0000000EB8000-000000067F000040020000A0000000EBC000__00000038E1ABFE28\n000000067F000040020000A0000000EB8000-000000067F000040020000A0000000EBC000__00000038E9AF7F00\n000000067F000040020000A0000000EB8000-000000067F000040020000A0000000EBC000__0000003903F1CFE8\n000000067F000040020000A0000000EB8000-000000067F000040020000A0000000EBC000__0000003B99F7F8A0\n000000067F000040020000A0000000EB8000-000000067F000040020000A0000000EBC000__0000005D2FFFFB38\n000000067F000040020000A0000000EBB084-000000067F000040020000A0000000EC3A59__00000037A4B3E7B1-000000384463E2C1\n000000067F000040020000A0000000EBC000-000000067F000040020000A0000000EC0000__00000038E1ABFE28\n000000067F000040020000A0000000EBC000-000000067F000040020000A0000000EC0000__00000038E9AF7F00\n000000067F000040020000A0000000EBC000-000000067F000040020000A0000000EC0000__0000003903F1CFE8\n000000067F000040020000A0000000EBC000-000000067F000040020000A0000000EC0000__0000003B99F7F8A0\n000000067F000040020000A0000000EBC000-000000067F000040020000A0000000EC0000__0000005D2FFFFB38\n000000067F000040020000A0000000EC0000-000000067F000040020000A0000000EC4000__00000038E1ABFE28\n000000067F000040020000A0000000EC0000-000000067F000040020000A0000000EC4000__00000038E9AF7F00\n000000067F000040020000A0000000EC0000-000000067F000040020000A0000000EC4000__0000003903F1CFE8\n000000067F000040020000A0000000EC0000-000000067F000040020000A0000000EC4000__0000003B99F7F8A0\n000000067F000040020000A0000000EC0000-000000067F000040020000A0000000EC4000__0000005D2FFFFB38\n000000067F000040020000A0000000EC3A59-000000067F000040020000A0000000ECC43D__00000037A4B3E7B1-000000384463E2C1\n000000067F000040020000A0000000EC4000-000000067F000040020000A0000000EC8000__00000038E1ABFE28\n000000067F000040020000A0000000EC4000-000000067F000040020000A0000000EC8000__00000038E9AF7F00\n000000067F000040020000A0000000EC4000-000000067F000040020000A0000000EC8000__0000003903F1CFE8\n000000067F000040020000A0000000EC4000-000000067F000040020000A0000000EC8000__0000003B99F7F8A0\n000000067F000040020000A0000000EC4000-000000067F000040020000A0000000EC8000__0000005D2FFFFB38\n000000067F000040020000A0000000EC8000-000000067F000040020000A0000000ECC000__00000038E1ABFE28\n000000067F000040020000A0000000EC8000-000000067F000040020000A0000000ECC000__00000038E9AF7F00\n000000067F000040020000A0000000EC8000-000000067F000040020000A0000000ECC000__0000003903F1CFE8\n000000067F000040020000A0000000EC8000-000000067F000040020000A0000000ECC000__0000003B99F7F8A0\n000000067F000040020000A0000000EC8000-000000067F000040020000A0000000ECC000__0000005D2FFFFB38\n000000067F000040020000A0000000ECC000-000000067F000040020000A0000000ED0000__00000038E1ABFE28\n000000067F000040020000A0000000ECC000-000000067F000040020000A0000000ED0000__00000038E9AF7F00\n000000067F000040020000A0000000ECC000-000000067F000040020000A0000000ED0000__0000003903F1CFE8\n000000067F000040020000A0000000ECC000-000000067F000040020000A0000000ED0000__0000003B99F7F8A0\n000000067F000040020000A0000000ECC000-000000067F000040020000A0000000ED0000__0000005D2FFFFB38\n000000067F000040020000A0000000ECC43D-000000067F000040020000A0000000ED4E14__00000037A4B3E7B1-000000384463E2C1\n000000067F000040020000A0000000ED0000-000000067F000040020000A0000000ED4000__00000038E1ABFE28\n000000067F000040020000A0000000ED0000-000000067F000040020000A0000000ED4000__00000038E9AF7F00\n000000067F000040020000A0000000ED0000-000000067F000040020000A0000000ED4000__0000003903F1CFE8\n000000067F000040020000A0000000ED0000-000000067F000040020000A0000000ED4000__0000003B99F7F8A0\n000000067F000040020000A0000000ED0000-000000067F000040020000A0000000ED4000__0000005D2FFFFB38\n000000067F000040020000A0000000ED4000-000000067F000040020000A0000000ED8000__00000038E1ABFE28\n000000067F000040020000A0000000ED4000-000000067F000040020000A0000000ED8000__00000038E9AF7F00\n000000067F000040020000A0000000ED4000-000000067F000040020000A0000000ED8000__0000003903F1CFE8\n000000067F000040020000A0000000ED4000-000000067F000040020000A0000000ED8000__0000003B99F7F8A0\n000000067F000040020000A0000000ED4000-000000067F000040020000A0000000ED8000__0000005D2FFFFB38\n000000067F000040020000A0000000ED4E14-000000067F000040020000A0000000EDD7F0__00000037A4B3E7B1-000000384463E2C1\n000000067F000040020000A0000000ED8000-000000067F000040020000A0000000EDC000__00000038E1ABFE28\n000000067F000040020000A0000000ED8000-000000067F000040020000A0000000EDC000__00000038E9AF7F00\n000000067F000040020000A0000000ED8000-000000067F000040020000A0000000EDC000__0000003903F1CFE8\n000000067F000040020000A0000000ED8000-000000067F000040020000A0000000EDC000__0000003B99F7F8A0\n000000067F000040020000A0000000ED8000-000000067F000040020000A0000000EDC000__0000005D2FFFFB38\n000000067F000040020000A0000000EDC000-000000067F000040020000A0000000EE0000__00000038E1ABFE28\n000000067F000040020000A0000000EDC000-000000067F000040020000A0000000EE0000__00000038E9AF7F00\n000000067F000040020000A0000000EDC000-000000067F000040020000A0000000EE0000__0000003903F1CFE8\n000000067F000040020000A0000000EDC000-000000067F000040020000A0000000EE0000__0000003B99F7F8A0\n000000067F000040020000A0000000EDC000-000000067F000040020000A0000000EE0000__0000005D2FFFFB38\n000000067F000040020000A0000000EDD7F0-000000067F000040020000A0000000EE61D2__00000037A4B3E7B1-000000384463E2C1\n000000067F000040020000A0000000EE0000-000000067F000040020000A0000000EE4000__00000038E1ABFE28\n000000067F000040020000A0000000EE0000-000000067F000040020000A0000000EE4000__00000038E9AF7F00\n000000067F000040020000A0000000EE0000-000000067F000040020000A0000000EE4000__0000003903F1CFE8\n000000067F000040020000A0000000EE0000-000000067F000040020000A0000000EE4000__0000003B99F7F8A0\n000000067F000040020000A0000000EE0000-000000067F000040020000A0000000EE4000__0000005D2FFFFB38\n000000067F000040020000A0000000EE4000-000000067F000040020000A0000000EE8000__00000038E1ABFE28\n000000067F000040020000A0000000EE4000-000000067F000040020000A0000000EE8000__00000038E9AF7F00\n000000067F000040020000A0000000EE4000-000000067F000040020000A0000000EE8000__0000003903F1CFE8\n000000067F000040020000A0000000EE4000-000000067F000040020000A0000000EE8000__0000003B99F7F8A0\n000000067F000040020000A0000000EE4000-000000067F000040020000A0000000EE8000__0000005D2FFFFB38\n000000067F000040020000A0000000EE61D2-000000067F000040020000A0000000EEEBB3__00000037A4B3E7B1-000000384463E2C1\n000000067F000040020000A0000000EE8000-000000067F000040020000A0000000EEC000__00000038E1ABFE28\n000000067F000040020000A0000000EE8000-000000067F000040020000A0000000EEC000__00000038E9AF7F00\n000000067F000040020000A0000000EE8000-000000067F000040020000A0000000EEC000__0000003903F1CFE8\n000000067F000040020000A0000000EE8000-000000067F000040020000A0000000EEC000__0000003B99F7F8A0\n000000067F000040020000A0000000EE8000-000000067F000040020000A0000000EEC000__0000005D2FFFFB38\n000000067F000040020000A0000000EEC000-000000067F000040020000A0000000EF0000__00000038E1ABFE28\n000000067F000040020000A0000000EEC000-000000067F000040020000A0000000EF0000__00000038E9AF7F00\n000000067F000040020000A0000000EEC000-000000067F000040020000A0000000EF0000__0000003903F1CFE8\n000000067F000040020000A0000000EEC000-000000067F000040020000A0000000EF0000__0000003B99F7F8A0\n000000067F000040020000A0000000EEC000-000000067F000040020000A0000000EF0000__0000005D2FFFFB38\n000000067F000040020000A0000000EEEBB3-000000067F000040020000A0000000EF759F__00000037A4B3E7B1-000000384463E2C1\n000000067F000040020000A0000000EF0000-000000067F000040020000A0000000EF4000__00000038E1ABFE28\n000000067F000040020000A0000000EF0000-000000067F000040020000A0000000EF4000__00000038E9AF7F00\n000000067F000040020000A0000000EF0000-000000067F000040020000A0000000EF4000__0000003903F1CFE8\n000000067F000040020000A0000000EF0000-000000067F000040020000A0000000EF4000__0000003B99F7F8A0\n000000067F000040020000A0000000EF0000-000000067F000040020000A0000000EF4000__0000005D2FFFFB38\n000000067F000040020000A0000000EF4000-000000067F000040020000A0000000EF8000__00000038E1ABFE28\n000000067F000040020000A0000000EF4000-000000067F000040020000A0000000EF8000__00000038E9AF7F00\n000000067F000040020000A0000000EF4000-000000067F000040020000A0000000EF8000__0000003903F1CFE8\n000000067F000040020000A0000000EF4000-000000067F000040020000A0000000EF8000__0000003B99F7F8A0\n000000067F000040020000A0000000EF4000-000000067F000040020000A0000000EF8000__0000005D2FFFFB38\n000000067F000040020000A0000000EF759F-000000067F000040020000A0000000EFFF76__00000037A4B3E7B1-000000384463E2C1\n000000067F000040020000A0000000EF8000-000000067F000040020000A0000000EFC000__00000038E1ABFE28\n000000067F000040020000A0000000EF8000-000000067F000040020000A0000000EFC000__00000038E9AF7F00\n000000067F000040020000A0000000EF8000-000000067F000040020000A0000000EFC000__0000003903F1CFE8\n000000067F000040020000A0000000EF8000-000000067F000040020000A0000000EFC000__0000003B99F7F8A0\n000000067F000040020000A0000000EF8000-000000067F000040020000A0000000EFC000__0000005D2FFFFB38\n000000067F000040020000A0000000EFC000-000000067F000040020000A0000000F00000__00000038E1ABFE28\n000000067F000040020000A0000000EFC000-000000067F000040020000A0000000F00000__00000038E9AF7F00\n000000067F000040020000A0000000EFC000-000000067F000040020000A0000000F00000__0000003903F1CFE8\n000000067F000040020000A0000000EFC000-000000067F000040020000A0000000F00000__0000003B99F7F8A0\n000000067F000040020000A0000000EFC000-000000067F000040020000A0000000F00000__0000005D2FFFFB38\n000000067F000040020000A0000000EFFF76-000000067F000040020000A0000000F08950__00000037A4B3E7B1-000000384463E2C1\n000000067F000040020000A0000000F00000-000000067F000040020000A0000000F04000__00000038E1ABFE28\n000000067F000040020000A0000000F00000-000000067F000040020000A0000000F04000__00000038E9AF7F00\n000000067F000040020000A0000000F00000-000000067F000040020000A0000000F04000__0000003903F1CFE8\n000000067F000040020000A0000000F00000-000000067F000040020000A0000000F04000__0000003B99F7F8A0\n000000067F000040020000A0000000F00000-000000067F000040020000A0000000F04000__0000005D2FFFFB38\n000000067F000040020000A0000000F04000-000000067F000040020000A0000000F08000__00000038E1ABFE28\n000000067F000040020000A0000000F04000-000000067F000040020000A0000000F08000__00000038E9AF7F00\n000000067F000040020000A0000000F04000-000000067F000040020000A0000000F08000__0000003903F1CFE8\n000000067F000040020000A0000000F04000-000000067F000040020000A0000000F08000__0000003B99F7F8A0\n000000067F000040020000A0000000F04000-000000067F000040020000A0000000F08000__0000005D2FFFFB38\n000000067F000040020000A0000000F08000-000000067F000040020000A0000000F0C000__00000038E1ABFE28\n000000067F000040020000A0000000F08000-000000067F000040020000A0000000F0C000__00000038E9AF7F00\n000000067F000040020000A0000000F08000-000000067F000040020000A0000000F0C000__0000003903F1CFE8\n000000067F000040020000A0000000F08000-000000067F000040020000A0000000F0C000__0000003B99F7F8A0\n000000067F000040020000A0000000F08000-000000067F000040020000A0000000F0C000__0000005D2FFFFB38\n000000067F000040020000A0000000F08950-000000067F000040020000A0000200000000__00000037A4B3E7B1-000000384463E2C1\n000000067F000040020000A0000000F0C000-000000067F000040020000A0000000F10000__00000038E1ABFE28\n000000067F000040020000A0000000F0C000-000000067F000040020000A0000000F10000__00000038E9AF7F00\n000000067F000040020000A0000000F0C000-000000067F000040020000A0000000F10000__0000003903F1CFE8\n000000067F000040020000A0000000F0C000-000000067F000040020000A0000000F10000__0000003B99F7F8A0\n000000067F000040020000A0000000F0C000-000000067F000040020000A0000000F10000__0000005D2FFFFB38\n000000067F000040020000A0000000F10000-000000067F000040020000A0000000F14000__00000038E1ABFE28\n000000067F000040020000A0000000F10000-000000067F000040020000A0000000F14000__00000038E9AF7F00\n000000067F000040020000A0000000F10000-000000067F000040020000A0000000F14000__0000003903F1CFE8\n000000067F000040020000A0000000F10000-000000067F000040020000A0000000F14000__0000003B99F7F8A0\n000000067F000040020000A0000000F10000-000000067F000040020000A0000000F14000__0000005D2FFFFB38\n000000067F000040020000A0000000F11587-000000067F000040020000A0000000F19F63__000000384463E2C1-00000038E1E2FE19\n000000067F000040020000A0000000F14000-000000067F000040020000A0000000F18000__00000038E1ABFE28\n000000067F000040020000A0000000F14000-000000067F000040020000A0000000F18000__00000038E9AF7F00\n000000067F000040020000A0000000F14000-000000067F000040020000A0000000F18000__0000003903F1CFE8\n000000067F000040020000A0000000F14000-000000067F000040020000A0000000F18000__0000003B99F7F8A0\n000000067F000040020000A0000000F14000-000000067F000040020000A0000000F18000__0000005D2FFFFB38\n000000067F000040020000A0000000F18000-000000067F000040020000A0000000F1C000__00000038E1ABFE28\n000000067F000040020000A0000000F18000-000000067F000040020000A0000000F1C000__00000038E9AF7F00\n000000067F000040020000A0000000F18000-000000067F000040020000A0000000F1C000__0000003903F1CFE8\n000000067F000040020000A0000000F18000-000000067F000040020000A0000000F1C000__0000003B99F7F8A0\n000000067F000040020000A0000000F18000-000000067F000040020000A0000000F1C000__0000005D2FFFFB38\n000000067F000040020000A0000000F19F63-000000067F000040020000A0000000F2293B__000000384463E2C1-00000038E1E2FE19\n000000067F000040020000A0000000F1C000-000000067F000040020000A0000000F20000__00000038E1ABFE28\n000000067F000040020000A0000000F1C000-000000067F000040020000A0000000F20000__00000038E9AF7F00\n000000067F000040020000A0000000F1C000-000000067F000040020000A0000000F20000__0000003903F1CFE8\n000000067F000040020000A0000000F1C000-000000067F000040020000A0000000F20000__0000003B99F7F8A0\n000000067F000040020000A0000000F1C000-000000067F000040020000A0000000F20000__0000005D2FFFFB38\n000000067F000040020000A0000000F20000-000000067F000040020000A0000000F24000__00000038E1ABFE28\n000000067F000040020000A0000000F20000-000000067F000040020000A0000000F24000__00000038E9AF7F00\n000000067F000040020000A0000000F20000-000000067F000040020000A0000000F24000__0000003903F1CFE8\n000000067F000040020000A0000000F20000-000000067F000040020000A0000000F24000__0000003B99F7F8A0\n000000067F000040020000A0000000F20000-000000067F000040020000A0000000F24000__0000005D2FFFFB38\n000000067F000040020000A0000000F2293B-000000067F000040020000A0000000F2B30B__000000384463E2C1-00000038E1E2FE19\n000000067F000040020000A0000000F24000-000000067F000040020000A0000000F28000__00000038E1ABFE28\n000000067F000040020000A0000000F24000-000000067F000040020000A0000000F28000__00000038E9AF7F00\n000000067F000040020000A0000000F24000-000000067F000040020000A0000000F28000__0000003903F1CFE8\n000000067F000040020000A0000000F24000-000000067F000040020000A0000000F28000__0000003B99F7F8A0\n000000067F000040020000A0000000F24000-000000067F000040020000A0000000F28000__0000005D2FFFFB38\n000000067F000040020000A0000000F28000-000000067F000040020000A0000000F2C000__00000038E1ABFE28\n000000067F000040020000A0000000F28000-000000067F000040020000A0000000F2C000__00000038E9AF7F00\n000000067F000040020000A0000000F28000-000000067F000040020000A0000000F2C000__0000003903F1CFE8\n000000067F000040020000A0000000F28000-000000067F000040020000A0000000F2C000__0000003B99F7F8A0\n000000067F000040020000A0000000F28000-000000067F000040020000A0000000F2C000__0000005D2FFFFB38\n000000067F000040020000A0000000F2B30B-000000067F000040020000A0000000F33CE3__000000384463E2C1-00000038E1E2FE19\n000000067F000040020000A0000000F2C000-000000067F000040020000A0000000F30000__00000038E1ABFE28\n000000067F000040020000A0000000F2C000-000000067F000040020000A0000000F30000__00000038E9AF7F00\n000000067F000040020000A0000000F2C000-000000067F000040020000A0000000F30000__0000003903F1CFE8\n000000067F000040020000A0000000F2C000-000000067F000040020000A0000000F30000__0000003B99F7F8A0\n000000067F000040020000A0000000F2C000-000000067F000040020000A0000000F30000__0000005D2FFFFB38\n000000067F000040020000A0000000F30000-000000067F000040020000A0000000F34000__00000038E1ABFE28\n000000067F000040020000A0000000F30000-000000067F000040020000A0000000F34000__00000038E9AF7F00\n000000067F000040020000A0000000F30000-000000067F000040020000A0000000F34000__0000003903F1CFE8\n000000067F000040020000A0000000F30000-000000067F000040020000A0000000F34000__0000003B99F7F8A0\n000000067F000040020000A0000000F30000-000000067F000040020000A0000000F34000__0000005D2FFFFB38\n000000067F000040020000A0000000F33CE3-000000067F000040020000A0000000F3C6C9__000000384463E2C1-00000038E1E2FE19\n000000067F000040020000A0000000F34000-000000067F000040020000A0000000F38000__00000038E1ABFE28\n000000067F000040020000A0000000F34000-000000067F000040020000A0000000F38000__00000038E9AF7F00\n000000067F000040020000A0000000F34000-000000067F000040020000A0000000F38000__0000003903F1CFE8\n000000067F000040020000A0000000F34000-000000067F000040020000A0000000F38000__0000003B99F7F8A0\n000000067F000040020000A0000000F34000-000000067F000040020000A0000000F38000__0000005D2FFFFB38\n000000067F000040020000A0000000F38000-000000067F000040020000A0000000F3C000__00000038E1ABFE28\n000000067F000040020000A0000000F38000-000000067F000040020000A0000000F3C000__00000038E9AF7F00\n000000067F000040020000A0000000F38000-000000067F000040020000A0000000F3C000__0000003903F1CFE8\n000000067F000040020000A0000000F38000-000000067F000040020000A0000000F3C000__0000003B99F7F8A0\n000000067F000040020000A0000000F38000-000000067F000040020000A0000000F3C000__0000005D2FFFFB38\n000000067F000040020000A0000000F3C000-000000067F000040020000A0000000F40000__00000038E1ABFE28\n000000067F000040020000A0000000F3C000-000000067F000040020000A0000000F40000__00000038E9AF7F00\n000000067F000040020000A0000000F3C000-000000067F000040020000A0000000F40000__0000003903F1CFE8\n000000067F000040020000A0000000F3C000-000000067F000040020000A0000000F40000__0000003B99F7F8A0\n000000067F000040020000A0000000F3C000-000000067F000040020000A0000000F40000__0000005D2FFFFB38\n000000067F000040020000A0000000F3C6C9-000000067F000040020000A0000000F450AB__000000384463E2C1-00000038E1E2FE19\n000000067F000040020000A0000000F40000-000000067F000040020000A0000000F44000__00000038E1ABFE28\n000000067F000040020000A0000000F40000-000000067F000040020000A0000000F44000__00000038E9AF7F00\n000000067F000040020000A0000000F40000-000000067F000040020000A0000000F44000__0000003903F1CFE8\n000000067F000040020000A0000000F40000-000000067F000040020000A0000000F44000__0000003B99F7F8A0\n000000067F000040020000A0000000F40000-000000067F000040020000A0000000F44000__0000005D2FFFFB38\n000000067F000040020000A0000000F44000-000000067F000040020000A0000000F48000__00000038E1ABFE28\n000000067F000040020000A0000000F44000-000000067F000040020000A0000000F48000__00000038E9AF7F00\n000000067F000040020000A0000000F44000-000000067F000040020000A0000000F48000__0000003903F1CFE8\n000000067F000040020000A0000000F44000-000000067F000040020000A0000000F48000__0000003B99F7F8A0\n000000067F000040020000A0000000F44000-000000067F000040020000A0000000F48000__0000005D2FFFFB38\n000000067F000040020000A0000000F450AB-000000067F000040020000A0000000F4DA85__000000384463E2C1-00000038E1E2FE19\n000000067F000040020000A0000000F48000-000000067F000040020000A0000000F4C000__00000038E1ABFE28\n000000067F000040020000A0000000F48000-000000067F000040020000A0000000F4C000__00000038E9AF7F00\n000000067F000040020000A0000000F48000-000000067F000040020000A0000000F4C000__0000003903F1CFE8\n000000067F000040020000A0000000F48000-000000067F000040020000A0000000F4C000__0000003B99F7F8A0\n000000067F000040020000A0000000F48000-000000067F000040020000A0000000F4C000__0000005D2FFFFB38\n000000067F000040020000A0000000F4C000-000000067F000040020000A0000000F50000__00000038E1ABFE28\n000000067F000040020000A0000000F4C000-000000067F000040020000A0000000F50000__00000038E9AF7F00\n000000067F000040020000A0000000F4C000-000000067F000040020000A0000000F50000__0000003903F1CFE8\n000000067F000040020000A0000000F4C000-000000067F000040020000A0000000F50000__0000003B99F7F8A0\n000000067F000040020000A0000000F4C000-000000067F000040020000A0000000F50000__0000005D2FFFFB38\n000000067F000040020000A0000000F4DA85-000000067F000040020000A0000000F56464__000000384463E2C1-00000038E1E2FE19\n000000067F000040020000A0000000F50000-000000067F000040020000A0000000F54000__00000038E1ABFE28\n000000067F000040020000A0000000F50000-000000067F000040020000A0000000F54000__00000038E9AF7F00\n000000067F000040020000A0000000F50000-000000067F000040020000A0000000F54000__0000003903F1CFE8\n000000067F000040020000A0000000F50000-000000067F000040020000A0000000F54000__0000003B99F7F8A0\n000000067F000040020000A0000000F50000-000000067F000040020000A0000000F54000__0000005D2FFFFB38\n000000067F000040020000A0000000F54000-000000067F000040020000A0000000F58000__00000038E1ABFE28\n000000067F000040020000A0000000F54000-000000067F000040020000A0000000F58000__00000038E9AF7F00\n000000067F000040020000A0000000F54000-000000067F000040020000A0000000F58000__0000003903F1CFE8\n000000067F000040020000A0000000F54000-000000067F000040020000A0000000F58000__0000003B99F7F8A0\n000000067F000040020000A0000000F54000-000000067F000040020000A0000000F58000__0000005D2FFFFB38\n000000067F000040020000A0000000F56464-010000000000000001000000000000000001__000000384463E2C1-00000038E1E2FE19\n000000067F000040020000A0000000F58000-000000067F000040020000A0000000F5C000__00000038E1ABFE28\n000000067F000040020000A0000000F58000-000000067F000040020000A0000000F5C000__00000038E9AF7F00\n000000067F000040020000A0000000F58000-000000067F000040020000A0000000F5C000__0000003903F1CFE8\n000000067F000040020000A0000000F58000-000000067F000040020000A0000000F5C000__0000003B99F7F8A0\n000000067F000040020000A0000000F58000-000000067F000040020000A0000000F5C000__0000005D2FFFFB38\n000000067F000040020000A0000000F5C000-000000067F000040020000A0040100000000__00000038E9AF7F00\n000000067F000040020000A0000000F5C000-000000067F000040020000A0080100000000__0000003903F1CFE8\n000000067F000040020000A0000000F5C000-000000067F000040020000A0080100000000__0000003B99F7F8A0\n000000067F000040020000A0000000F5C000-000000067F000040020000A0080100000000__0000005D2FFFFB38\n000000067F000040020000A0000000F5C000-030000000000000000000000000000000002__00000038E1ABFE28\n000000067F000040020000A00000FFFFFFFF-030000000000000000000000000000000002__00000031853FEA98\n000000067F000040020000A0050000000000-000000067F000040020000A0050100000003__00000038E1E2FE19-00000038E3787F09\n000000067F000040020000A0050000000000-000000067F000040020000A0050200000000__00000038E3787F09-00000038E5077EE1\n000000067F000040020000A0050000000000-030000000000000000000000000000000002__00000038E4DFC4C8\n000000067F000040020000A0050000000000-030000000000000000000000000000000002__00000038E815BE18\n000000067F000040020000A0060000000000-000000067F000040020000A0060100000003__00000038E68FBE49-00000038E813FFC9\n000000067F000040020000A0060000000000-000000067F000040020000A0060200000000__00000038E813FFC9-00000038E99BFDE9\n000000067F000040020000A0070000000000-000000067F000040020000A0070100000003__00000038EAFDDF91-00000038EBFD1ED1\n000000067F000040020000A0070000000000-000000067F000040020000A0070100000003__00000038EBFD1ED1-00000038ECF55FD9\n000000067F000040020000A0070000000000-030000000000000000000000000000000002__00000038ECE35F08\n000000067F000040020000C0000000000000-000000067F000040020000C0000000004000__0000003903F1CFE8\n000000067F000040020000C0000000000000-000000067F000040020000C0000000004000__0000003B99F7F8A0\n000000067F000040020000C0000000000000-000000067F000040020000C0000000004000__0000005D2FFFFB38\n000000067F000040020000C0000000004000-000000067F000040020000C0000000008000__0000003903F1CFE8\n000000067F000040020000C0000000004000-000000067F000040020000C0000000008000__0000003B99F7F8A0\n000000067F000040020000C0000000004000-000000067F000040020000C0000000008000__0000005D2FFFFB38\n000000067F000040020000C0000000007F72-000000067F000040020000C000000000FEF5__00000038ED8FA069-0000003ABA685F11\n000000067F000040020000C0000000008000-000000067F000040020000C000000000C000__0000003903F1CFE8\n000000067F000040020000C0000000008000-000000067F000040020000C000000000C000__0000003B99F7F8A0\n000000067F000040020000C0000000008000-000000067F000040020000C000000000C000__0000005D2FFFFB38\n000000067F000040020000C000000000C000-000000067F000040020000C0000000010000__0000003903F1CFE8\n000000067F000040020000C000000000C000-000000067F000040020000C0000000010000__0000003B99F7F8A0\n000000067F000040020000C000000000C000-000000067F000040020000C0000000010000__0000005D2FFFFB38\n000000067F000040020000C000000000FEF5-000000067F000040020000C0000000017E78__00000038ED8FA069-0000003ABA685F11\n000000067F000040020000C0000000010000-000000067F000040020000C0000000014000__0000003B99F7F8A0\n000000067F000040020000C0000000010000-000000067F000040020000C0000000014000__0000005D2FFFFB38\n000000067F000040020000C0000000010000-030000000000000000000000000000000002__0000003903F1CFE8\n000000067F000040020000C0000000014000-000000067F000040020000C0000000018000__0000003B99F7F8A0\n000000067F000040020000C0000000014000-000000067F000040020000C0000000018000__0000005D2FFFFB38\n000000067F000040020000C0000000017E78-000000067F000040020000C000000001FDFB__00000038ED8FA069-0000003ABA685F11\n000000067F000040020000C0000000018000-000000067F000040020000C000000001C000__0000003B99F7F8A0\n000000067F000040020000C0000000018000-000000067F000040020000C000000001C000__0000005D2FFFFB38\n000000067F000040020000C000000001C000-000000067F000040020000C0000000020000__0000003B99F7F8A0\n000000067F000040020000C000000001C000-000000067F000040020000C0000000020000__0000005D2FFFFB38\n000000067F000040020000C000000001FDFB-000000067F000040020000C0000000027D7E__00000038ED8FA069-0000003ABA685F11\n000000067F000040020000C0000000020000-000000067F000040020000C0000000024000__0000003B99F7F8A0\n000000067F000040020000C0000000020000-000000067F000040020000C0000000024000__0000005D2FFFFB38\n000000067F000040020000C0000000024000-000000067F000040020000C0000000028000__0000003B99F7F8A0\n000000067F000040020000C0000000024000-000000067F000040020000C0000000028000__0000005D2FFFFB38\n000000067F000040020000C0000000027D7E-000000067F000040020000C000000002FD01__00000038ED8FA069-0000003ABA685F11\n000000067F000040020000C0000000028000-000000067F000040020000C000000002C000__0000003B99F7F8A0\n000000067F000040020000C0000000028000-000000067F000040020000C000000002C000__0000005D2FFFFB38\n000000067F000040020000C000000002C000-000000067F000040020000C0000000030000__0000003B99F7F8A0\n000000067F000040020000C000000002C000-000000067F000040020000C0000000030000__0000005D2FFFFB38\n000000067F000040020000C000000002FD01-000000067F000040020000C0000000037C84__00000038ED8FA069-0000003ABA685F11\n000000067F000040020000C0000000030000-000000067F000040020000C0000000034000__0000003B99F7F8A0\n000000067F000040020000C0000000030000-000000067F000040020000C0000000034000__0000005D2FFFFB38\n000000067F000040020000C0000000034000-000000067F000040020000C0000000038000__0000003B99F7F8A0\n000000067F000040020000C0000000034000-000000067F000040020000C0000000038000__0000005D2FFFFB38\n000000067F000040020000C0000000037C84-000000067F000040020000C000000003FC07__00000038ED8FA069-0000003ABA685F11\n000000067F000040020000C0000000038000-000000067F000040020000C000000003C000__0000003B99F7F8A0\n000000067F000040020000C0000000038000-000000067F000040020000C000000003C000__0000005D2FFFFB38\n000000067F000040020000C000000003C000-000000067F000040020000C0000000040000__0000003B99F7F8A0\n000000067F000040020000C000000003C000-000000067F000040020000C0000000040000__0000005D2FFFFB38\n000000067F000040020000C000000003FC07-000000067F000040020000C0000000047B8A__00000038ED8FA069-0000003ABA685F11\n000000067F000040020000C0000000040000-000000067F000040020000C0000000044000__0000003B99F7F8A0\n000000067F000040020000C0000000040000-000000067F000040020000C0000000044000__0000005D2FFFFB38\n000000067F000040020000C0000000044000-000000067F000040020000C0000000048000__0000003B99F7F8A0\n000000067F000040020000C0000000044000-000000067F000040020000C0000000048000__0000005D2FFFFB38\n000000067F000040020000C0000000047B8A-000000067F000040020000C000000004FB0D__00000038ED8FA069-0000003ABA685F11\n000000067F000040020000C0000000048000-000000067F000040020000C000000004C000__0000003B99F7F8A0\n000000067F000040020000C0000000048000-000000067F000040020000C000000004C000__0000005D2FFFFB38\n000000067F000040020000C000000004C000-000000067F000040020000C0000000050000__0000003B99F7F8A0\n000000067F000040020000C000000004C000-000000067F000040020000C0000000050000__0000005D2FFFFB38\n000000067F000040020000C000000004FB0D-000000067F000040020000C0000000057A90__00000038ED8FA069-0000003ABA685F11\n000000067F000040020000C0000000050000-000000067F000040020000C0000000054000__0000003B99F7F8A0\n000000067F000040020000C0000000050000-000000067F000040020000C0000000054000__0000005D2FFFFB38\n000000067F000040020000C0000000054000-000000067F000040020000C0000000058000__0000003B99F7F8A0\n000000067F000040020000C0000000054000-000000067F000040020000C0000000058000__0000005D2FFFFB38\n000000067F000040020000C0000000057A90-000000067F000040020000C000000005FA13__00000038ED8FA069-0000003ABA685F11\n000000067F000040020000C0000000058000-000000067F000040020000C000000005C000__0000003B99F7F8A0\n000000067F000040020000C0000000058000-000000067F000040020000C000000005C000__0000005D2FFFFB38\n000000067F000040020000C000000005C000-000000067F000040020000C0000000060000__0000003B99F7F8A0\n000000067F000040020000C000000005C000-000000067F000040020000C0000000060000__0000005D2FFFFB38\n000000067F000040020000C000000005FA13-000000067F000040020000C0000000067996__00000038ED8FA069-0000003ABA685F11\n000000067F000040020000C0000000060000-000000067F000040020000C0000000064000__0000003B99F7F8A0\n000000067F000040020000C0000000060000-000000067F000040020000C0000000064000__0000005D2FFFFB38\n000000067F000040020000C0000000064000-000000067F000040020000C0000000068000__0000003B99F7F8A0\n000000067F000040020000C0000000064000-000000067F000040020000C0000000068000__0000005D2FFFFB38\n000000067F000040020000C0000000067996-000000067F000040020000C000000006F919__00000038ED8FA069-0000003ABA685F11\n000000067F000040020000C0000000068000-000000067F000040020000C000000006C000__0000003B99F7F8A0\n000000067F000040020000C0000000068000-000000067F000040020000C000000006C000__0000005D2FFFFB38\n000000067F000040020000C000000006C000-000000067F000040020000C0000000070000__0000003B99F7F8A0\n000000067F000040020000C000000006C000-000000067F000040020000C0000000070000__0000005D2FFFFB38\n000000067F000040020000C000000006F919-000000067F000040020000C000000007789C__00000038ED8FA069-0000003ABA685F11\n000000067F000040020000C0000000070000-000000067F000040020000C0000000074000__0000003B99F7F8A0\n000000067F000040020000C0000000070000-000000067F000040020000C0000000074000__0000005D2FFFFB38\n000000067F000040020000C0000000074000-000000067F000040020000C0000000078000__0000003B99F7F8A0\n000000067F000040020000C0000000074000-000000067F000040020000C0000000078000__0000005D2FFFFB38\n000000067F000040020000C000000007789C-000000067F000040020000C000000007F81F__00000038ED8FA069-0000003ABA685F11\n000000067F000040020000C0000000078000-000000067F000040020000C000000007C000__0000003B99F7F8A0\n000000067F000040020000C0000000078000-000000067F000040020000C000000007C000__0000005D2FFFFB38\n000000067F000040020000C000000007C000-000000067F000040020000C0000000080000__0000003B99F7F8A0\n000000067F000040020000C000000007C000-000000067F000040020000C0000000080000__0000005D2FFFFB38\n000000067F000040020000C000000007F81F-000000067F000040020000C00000000877A2__00000038ED8FA069-0000003ABA685F11\n000000067F000040020000C0000000080000-000000067F000040020000C0000000084000__0000003B99F7F8A0\n000000067F000040020000C0000000080000-000000067F000040020000C0000000084000__0000005D2FFFFB38\n000000067F000040020000C0000000084000-000000067F000040020000C0000000088000__0000003B99F7F8A0\n000000067F000040020000C0000000084000-000000067F000040020000C0000000088000__0000005D2FFFFB38\n000000067F000040020000C00000000877A2-000000067F000040020000C000000008F725__00000038ED8FA069-0000003ABA685F11\n000000067F000040020000C0000000088000-000000067F000040020000C000000008C000__0000003B99F7F8A0\n000000067F000040020000C0000000088000-000000067F000040020000C000000008C000__0000005D2FFFFB38\n000000067F000040020000C000000008C000-000000067F000040020000C0000000090000__0000003B99F7F8A0\n000000067F000040020000C000000008C000-000000067F000040020000C0000000090000__0000005D2FFFFB38\n000000067F000040020000C000000008F725-000000067F000040020000C00000000976A8__00000038ED8FA069-0000003ABA685F11\n000000067F000040020000C0000000090000-000000067F000040020000C0000000094000__0000003B99F7F8A0\n000000067F000040020000C0000000090000-000000067F000040020000C0000000094000__0000005D2FFFFB38\n000000067F000040020000C0000000094000-000000067F000040020000C0000000098000__0000003B99F7F8A0\n000000067F000040020000C0000000094000-000000067F000040020000C0000000098000__0000005D2FFFFB38\n000000067F000040020000C00000000976A8-000000067F000040020000C000000009F62B__00000038ED8FA069-0000003ABA685F11\n000000067F000040020000C0000000098000-000000067F000040020000C000000009C000__0000003B99F7F8A0\n000000067F000040020000C0000000098000-000000067F000040020000C000000009C000__0000005D2FFFFB38\n000000067F000040020000C000000009C000-000000067F000040020000C00000000A0000__0000003B99F7F8A0\n000000067F000040020000C000000009C000-000000067F000040020000C00000000A0000__0000005D2FFFFB38\n000000067F000040020000C000000009F62B-000000067F000040020000C00000000A75AE__00000038ED8FA069-0000003ABA685F11\n000000067F000040020000C00000000A0000-000000067F000040020000C00000000A4000__0000003B99F7F8A0\n000000067F000040020000C00000000A0000-000000067F000040020000C00000000A4000__0000005D2FFFFB38\n000000067F000040020000C00000000A4000-000000067F000040020000C00000000A8000__0000003B99F7F8A0\n000000067F000040020000C00000000A4000-000000067F000040020000C00000000A8000__0000005D2FFFFB38\n000000067F000040020000C00000000A75AE-000000067F000040020000C00000000AF531__00000038ED8FA069-0000003ABA685F11\n000000067F000040020000C00000000A8000-000000067F000040020000C00000000AC000__0000003B99F7F8A0\n000000067F000040020000C00000000A8000-000000067F000040020000C00000000AC000__0000005D2FFFFB38\n000000067F000040020000C00000000AC000-000000067F000040020000C00000000B0000__0000003B99F7F8A0\n000000067F000040020000C00000000AC000-000000067F000040020000C00000000B0000__0000005D2FFFFB38\n000000067F000040020000C00000000AF531-000000067F000040020000C00000000B74B4__00000038ED8FA069-0000003ABA685F11\n000000067F000040020000C00000000B0000-000000067F000040020000C00000000B4000__0000003B99F7F8A0\n000000067F000040020000C00000000B0000-000000067F000040020000C00000000B4000__0000005D2FFFFB38\n000000067F000040020000C00000000B4000-000000067F000040020000C00000000B8000__0000003B99F7F8A0\n000000067F000040020000C00000000B4000-000000067F000040020000C00000000B8000__0000005D2FFFFB38\n000000067F000040020000C00000000B74B4-000000067F000040020000C00000000BF437__00000038ED8FA069-0000003ABA685F11\n000000067F000040020000C00000000B8000-000000067F000040020000C00000000BC000__0000003B99F7F8A0\n000000067F000040020000C00000000B8000-000000067F000040020000C00000000BC000__0000005D2FFFFB38\n000000067F000040020000C00000000BC000-000000067F000040020000C00000000C0000__0000003B99F7F8A0\n000000067F000040020000C00000000BC000-000000067F000040020000C00000000C0000__0000005D2FFFFB38\n000000067F000040020000C00000000BF437-000000067F000040020000C00000000C73BA__00000038ED8FA069-0000003ABA685F11\n000000067F000040020000C00000000C0000-000000067F000040020000C00000000C4000__0000003B99F7F8A0\n000000067F000040020000C00000000C0000-000000067F000040020000C00000000C4000__0000005D2FFFFB38\n000000067F000040020000C00000000C4000-000000067F000040020000C00000000C8000__0000003B99F7F8A0\n000000067F000040020000C00000000C4000-000000067F000040020000C00000000C8000__0000005D2FFFFB38\n000000067F000040020000C00000000C73BA-000000067F000040020000C00000000CF33D__00000038ED8FA069-0000003ABA685F11\n000000067F000040020000C00000000C8000-000000067F000040020000C00000000CC000__0000003B99F7F8A0\n000000067F000040020000C00000000C8000-000000067F000040020000C00000000CC000__0000005D2FFFFB38\n000000067F000040020000C00000000CC000-000000067F000040020000C00000000D0000__0000003B99F7F8A0\n000000067F000040020000C00000000CC000-000000067F000040020000C00000000D0000__0000005D2FFFFB38\n000000067F000040020000C00000000CF33D-000000067F000040020000C00000000D72C0__00000038ED8FA069-0000003ABA685F11\n000000067F000040020000C00000000D0000-000000067F000040020000C00000000D4000__0000003B99F7F8A0\n000000067F000040020000C00000000D0000-000000067F000040020000C00000000D4000__0000005D2FFFFB38\n000000067F000040020000C00000000D4000-000000067F000040020000C00000000D8000__0000003B99F7F8A0\n000000067F000040020000C00000000D4000-000000067F000040020000C00000000D8000__0000005D2FFFFB38\n000000067F000040020000C00000000D72C0-000000067F000040020000C00000000DF243__00000038ED8FA069-0000003ABA685F11\n000000067F000040020000C00000000D8000-000000067F000040020000C00000000DC000__0000003B99F7F8A0\n000000067F000040020000C00000000D8000-000000067F000040020000C00000000DC000__0000005D2FFFFB38\n000000067F000040020000C00000000DC000-000000067F000040020000C00000000E0000__0000003B99F7F8A0\n000000067F000040020000C00000000DC000-000000067F000040020000C00000000E0000__0000005D2FFFFB38\n000000067F000040020000C00000000DF243-000000067F000040020000C00000000E71C6__00000038ED8FA069-0000003ABA685F11\n000000067F000040020000C00000000E0000-000000067F000040020000C00000000E4000__0000003B99F7F8A0\n000000067F000040020000C00000000E0000-000000067F000040020000C00000000E4000__0000005D2FFFFB38\n000000067F000040020000C00000000E4000-000000067F000040020000C00000000E8000__0000003B99F7F8A0\n000000067F000040020000C00000000E4000-000000067F000040020000C00000000E8000__0000005D2FFFFB38\n000000067F000040020000C00000000E71C6-000000067F000040020000C00000000EF149__00000038ED8FA069-0000003ABA685F11\n000000067F000040020000C00000000E8000-000000067F000040020000C00000000EC000__0000003B99F7F8A0\n000000067F000040020000C00000000E8000-000000067F000040020000C00000000EC000__0000005D2FFFFB38\n000000067F000040020000C00000000EC000-000000067F000040020000C00000000F0000__0000003B99F7F8A0\n000000067F000040020000C00000000EC000-000000067F000040020000C00000000F0000__0000005D2FFFFB38\n000000067F000040020000C00000000EF149-000000067F000040020000C00000000F70CC__00000038ED8FA069-0000003ABA685F11\n000000067F000040020000C00000000F0000-000000067F000040020000C00000000F4000__0000003B99F7F8A0\n000000067F000040020000C00000000F0000-000000067F000040020000C00000000F4000__0000005D2FFFFB38\n000000067F000040020000C00000000F4000-000000067F000040020000C00000000F8000__0000003B99F7F8A0\n000000067F000040020000C00000000F4000-000000067F000040020000C00000000F8000__0000005D2FFFFB38\n000000067F000040020000C00000000F70CC-000000067F000040020000C00000000FF04F__00000038ED8FA069-0000003ABA685F11\n000000067F000040020000C00000000F8000-000000067F000040020000C00000000FC000__0000003B99F7F8A0\n000000067F000040020000C00000000F8000-000000067F000040020000C00000000FC000__0000005D2FFFFB38\n000000067F000040020000C00000000FC000-000000067F000040020000C0000000100000__0000003B99F7F8A0\n000000067F000040020000C00000000FC000-000000067F000040020000C0000000100000__0000005D2FFFFB38\n000000067F000040020000C00000000FF04F-000000067F000040020000C0000000106FD2__00000038ED8FA069-0000003ABA685F11\n000000067F000040020000C0000000100000-000000067F000040020000C0000000104000__0000003B99F7F8A0\n000000067F000040020000C0000000100000-000000067F000040020000C0000000104000__0000005D2FFFFB38\n000000067F000040020000C0000000104000-000000067F000040020000C0000000108000__0000003B99F7F8A0\n000000067F000040020000C0000000104000-000000067F000040020000C0000000108000__0000005D2FFFFB38\n000000067F000040020000C0000000106FD2-000000067F000040020000C000000010EF55__00000038ED8FA069-0000003ABA685F11\n000000067F000040020000C0000000108000-000000067F000040020000C000000010C000__0000003B99F7F8A0\n000000067F000040020000C0000000108000-000000067F000040020000C000000010C000__0000005D2FFFFB38\n000000067F000040020000C000000010C000-000000067F000040020000C0000000110000__0000003B99F7F8A0\n000000067F000040020000C000000010C000-000000067F000040020000C0000000110000__0000005D2FFFFB38\n000000067F000040020000C000000010EF55-000000067F000040020000C0000000116ED8__00000038ED8FA069-0000003ABA685F11\n000000067F000040020000C0000000110000-000000067F000040020000C0000000114000__0000003B99F7F8A0\n000000067F000040020000C0000000110000-000000067F000040020000C0000000114000__0000005D2FFFFB38\n000000067F000040020000C0000000114000-000000067F000040020000C0000000118000__0000003B99F7F8A0\n000000067F000040020000C0000000114000-000000067F000040020000C0000000118000__0000005D2FFFFB38\n000000067F000040020000C0000000116ED8-000000067F000040020000C000000011EE5B__00000038ED8FA069-0000003ABA685F11\n000000067F000040020000C0000000118000-000000067F000040020000C000000011C000__0000003B99F7F8A0\n000000067F000040020000C0000000118000-000000067F000040020000C000000011C000__0000005D2FFFFB38\n000000067F000040020000C000000011C000-000000067F000040020000C0000000120000__0000003B99F7F8A0\n000000067F000040020000C000000011C000-000000067F000040020000C0000000120000__0000005D2FFFFB38\n000000067F000040020000C000000011EE5B-000000067F000040020000C0000000126DDE__00000038ED8FA069-0000003ABA685F11\n000000067F000040020000C0000000120000-000000067F000040020000C0000000124000__0000003B99F7F8A0\n000000067F000040020000C0000000120000-000000067F000040020000C0000000124000__0000005D2FFFFB38\n000000067F000040020000C0000000124000-000000067F000040020000C0000000128000__0000003B99F7F8A0\n000000067F000040020000C0000000124000-000000067F000040020000C0000000128000__0000005D2FFFFB38\n000000067F000040020000C0000000126DDE-000000067F000040020000C000000012ED61__00000038ED8FA069-0000003ABA685F11\n000000067F000040020000C0000000128000-000000067F000040020000C000000012C000__0000003B99F7F8A0\n000000067F000040020000C0000000128000-000000067F000040020000C000000012C000__0000005D2FFFFB38\n000000067F000040020000C000000012C000-000000067F000040020000C0000000130000__0000003B99F7F8A0\n000000067F000040020000C000000012C000-000000067F000040020000C0000000130000__0000005D2FFFFB38\n000000067F000040020000C000000012ED61-000000067F000040020000C0000000136CE4__00000038ED8FA069-0000003ABA685F11\n000000067F000040020000C0000000130000-000000067F000040020000C0000000134000__0000003B99F7F8A0\n000000067F000040020000C0000000130000-000000067F000040020000C0000000134000__0000005D2FFFFB38\n000000067F000040020000C0000000134000-000000067F000040020000C0000000138000__0000003B99F7F8A0\n000000067F000040020000C0000000134000-000000067F000040020000C0000000138000__0000005D2FFFFB38\n000000067F000040020000C0000000136CE4-000000067F000040020000C000000013EC67__00000038ED8FA069-0000003ABA685F11\n000000067F000040020000C0000000138000-000000067F000040020000C000000013C000__0000003B99F7F8A0\n000000067F000040020000C0000000138000-000000067F000040020000C000000013C000__0000005D2FFFFB38\n000000067F000040020000C000000013C000-000000067F000040020000C0000000140000__0000003B99F7F8A0\n000000067F000040020000C000000013C000-000000067F000040020000C0000000140000__0000005D2FFFFB38\n000000067F000040020000C000000013EC67-000000067F000040020000C0000000146BEA__00000038ED8FA069-0000003ABA685F11\n000000067F000040020000C0000000140000-000000067F000040020000C0000000144000__0000003B99F7F8A0\n000000067F000040020000C0000000140000-000000067F000040020000C0000000144000__0000005D2FFFFB38\n000000067F000040020000C0000000144000-000000067F000040020000C0000000148000__0000003B99F7F8A0\n000000067F000040020000C0000000144000-000000067F000040020000C0000000148000__0000005D2FFFFB38\n000000067F000040020000C0000000146BEA-000000067F000040020000C000000014EB6D__00000038ED8FA069-0000003ABA685F11\n000000067F000040020000C0000000148000-000000067F000040020000C000000014C000__0000003B99F7F8A0\n000000067F000040020000C0000000148000-000000067F000040020000C000000014C000__0000005D2FFFFB38\n000000067F000040020000C000000014C000-000000067F000040020000C0000000150000__0000003B99F7F8A0\n000000067F000040020000C000000014C000-000000067F000040020000C0000000150000__0000005D2FFFFB38\n000000067F000040020000C000000014EB6D-000000067F000040020000C0000000156AF0__00000038ED8FA069-0000003ABA685F11\n000000067F000040020000C0000000150000-000000067F000040020000C0000000154000__0000003B99F7F8A0\n000000067F000040020000C0000000150000-000000067F000040020000C0000000154000__0000005D2FFFFB38\n000000067F000040020000C0000000154000-000000067F000040020000C0000000158000__0000003B99F7F8A0\n000000067F000040020000C0000000154000-000000067F000040020000C0000000158000__0000005D2FFFFB38\n000000067F000040020000C0000000156AF0-000000067F000040020000C000000015EA73__00000038ED8FA069-0000003ABA685F11\n000000067F000040020000C0000000158000-000000067F000040020000C000000015C000__0000003B99F7F8A0\n000000067F000040020000C0000000158000-000000067F000040020000C000000015C000__0000005D2FFFFB38\n000000067F000040020000C000000015C000-000000067F000040020000C0000000160000__0000003B99F7F8A0\n000000067F000040020000C000000015C000-000000067F000040020000C0000000160000__0000005D2FFFFB38\n000000067F000040020000C000000015EA73-000000067F000040020000C00000001669F6__00000038ED8FA069-0000003ABA685F11\n000000067F000040020000C0000000160000-000000067F000040020000C0000000164000__0000003B99F7F8A0\n000000067F000040020000C0000000160000-000000067F000040020000C0000000164000__0000005D2FFFFB38\n000000067F000040020000C0000000164000-000000067F000040020000C0000000168000__0000003B99F7F8A0\n000000067F000040020000C0000000164000-000000067F000040020000C0000000168000__0000005D2FFFFB38\n000000067F000040020000C00000001669F6-000000067F000040020000C000000016E979__00000038ED8FA069-0000003ABA685F11\n000000067F000040020000C0000000168000-000000067F000040020000C000000016C000__0000003B99F7F8A0\n000000067F000040020000C0000000168000-000000067F000040020000C000000016C000__0000005D2FFFFB38\n000000067F000040020000C000000016C000-000000067F000040020000C0000000170000__0000003B99F7F8A0\n000000067F000040020000C000000016C000-000000067F000040020000C0000000170000__0000005D2FFFFB38\n000000067F000040020000C000000016E979-000000067F000040020000C00000001768FC__00000038ED8FA069-0000003ABA685F11\n000000067F000040020000C0000000170000-000000067F000040020000C0000000174000__0000003B99F7F8A0\n000000067F000040020000C0000000170000-000000067F000040020000C0000000174000__0000005D2FFFFB38\n000000067F000040020000C0000000174000-000000067F000040020000C0000000178000__0000003B99F7F8A0\n000000067F000040020000C0000000174000-000000067F000040020000C0000000178000__0000005D2FFFFB38\n000000067F000040020000C00000001768FC-000000067F000040020000C000000017E87F__00000038ED8FA069-0000003ABA685F11\n000000067F000040020000C0000000178000-000000067F000040020000C000000017C000__0000003B99F7F8A0\n000000067F000040020000C0000000178000-000000067F000040020000C000000017C000__0000005D2FFFFB38\n000000067F000040020000C000000017C000-000000067F000040020000C0000000180000__0000003B99F7F8A0\n000000067F000040020000C000000017C000-000000067F000040020000C0000000180000__0000005D2FFFFB38\n000000067F000040020000C000000017E87F-030000000000000000000000000000000002__00000038ED8FA069-0000003ABA685F11\n000000067F000040020000C0000000180000-000000067F000040020000C0000100000000__0000003B99F7F8A0\n000000067F000040020000C0000000180000-000000067F000040020000C0000100000000__0000005D2FFFFB38\n000000067F000040020000E0000000000000-000000067F000040020000E0000000004000__0000003B99F7F8A0\n000000067F000040020000E0000000000000-000000067F000040020000E0000000004000__0000005D2FFFFB38\n000000067F000040020000E0000000000000-000000067F000040020000E0000000004000__00000073AD3FE6B8\n000000067F000040020000E0000000000000-000000067F000040020000E0000000004000__000000914E3F38F0\n000000067F000040020000E0000000000000-000000067F000040020000E0000000004000__000000931B9A2710\n000000067F000040020000E0000000004000-000000067F000040020000E0000000008000__0000003B99F7F8A0\n000000067F000040020000E0000000004000-000000067F000040020000E0000000008000__0000005D2FFFFB38\n000000067F000040020000E0000000004000-000000067F000040020000E0000000008000__00000073AD3FE6B8\n000000067F000040020000E0000000004000-000000067F000040020000E0000000008000__000000914E3F38F0\n000000067F000040020000E0000000004000-000000067F000040020000E0000000008000__000000931B9A2710\n000000067F000040020000E0000000008000-000000067F000040020000E000000000C000__0000003B99F7F8A0\n000000067F000040020000E0000000008000-000000067F000040020000E000000000C000__0000005D2FFFFB38\n000000067F000040020000E0000000008000-000000067F000040020000E000000000C000__00000073AD3FE6B8\n000000067F000040020000E0000000008000-000000067F000040020000E000000000C000__000000914E3F38F0\n000000067F000040020000E0000000008000-000000067F000040020000E000000000C000__000000931B9A2710\n000000067F000040020000E000000000899C-000000067F000040020000E000000001137C__0000003ABA698781-0000003B6A0FFB09\n000000067F000040020000E000000000C000-000000067F000040020000E0000000010000__0000003B99F7F8A0\n000000067F000040020000E000000000C000-000000067F000040020000E0000000010000__0000005D2FFFFB38\n000000067F000040020000E000000000C000-000000067F000040020000E0000000010000__00000073AD3FE6B8\n000000067F000040020000E000000000C000-000000067F000040020000E0000000010000__000000914E3F38F0\n000000067F000040020000E000000000C000-000000067F000040020000E0000000010000__000000931B9A2710\n000000067F000040020000E0000000010000-000000067F000040020000E0000000014000__0000003B99F7F8A0\n000000067F000040020000E0000000010000-000000067F000040020000E0000000014000__0000005D2FFFFB38\n000000067F000040020000E0000000010000-000000067F000040020000E0000000014000__00000073AD3FE6B8\n000000067F000040020000E0000000010000-000000067F000040020000E0000000014000__000000914E3F38F0\n000000067F000040020000E0000000010000-000000067F000040020000E0000000014000__000000931B9A2710\n000000067F000040020000E000000001137C-000000067F000040020000E0000000019D79__0000003ABA698781-0000003B6A0FFB09\n000000067F000040020000E0000000014000-000000067F000040020000E0000000018000__0000003B99F7F8A0\n000000067F000040020000E0000000014000-000000067F000040020000E0000000018000__0000005D2FFFFB38\n000000067F000040020000E0000000014000-000000067F000040020000E0000000018000__00000073AD3FE6B8\n000000067F000040020000E0000000014000-000000067F000040020000E0000000018000__000000914E3F38F0\n000000067F000040020000E0000000014000-000000067F000040020000E0000000018000__000000931B9A2710\n000000067F000040020000E0000000018000-000000067F000040020000E000000001C000__0000003B99F7F8A0\n000000067F000040020000E0000000018000-000000067F000040020000E000000001C000__0000005D2FFFFB38\n000000067F000040020000E0000000018000-000000067F000040020000E000000001C000__00000073AD3FE6B8\n000000067F000040020000E0000000018000-000000067F000040020000E000000001C000__000000914E3F38F0\n000000067F000040020000E0000000018000-000000067F000040020000E000000001C000__000000931B9A2710\n000000067F000040020000E0000000019D79-000000067F000040020000E0000000022776__0000003ABA698781-0000003B6A0FFB09\n000000067F000040020000E000000001C000-000000067F000040020000E0000000020000__0000003B99F7F8A0\n000000067F000040020000E000000001C000-000000067F000040020000E0000000020000__0000005D2FFFFB38\n000000067F000040020000E000000001C000-000000067F000040020000E0000000020000__00000073AD3FE6B8\n000000067F000040020000E000000001C000-000000067F000040020000E0000000020000__000000914E3F38F0\n000000067F000040020000E000000001C000-000000067F000040020000E0000000020000__000000931B9A2710\n000000067F000040020000E0000000020000-000000067F000040020000E0000000024000__0000003B99F7F8A0\n000000067F000040020000E0000000020000-000000067F000040020000E0000000024000__0000005D2FFFFB38\n000000067F000040020000E0000000020000-000000067F000040020000E0000000024000__00000073AD3FE6B8\n000000067F000040020000E0000000020000-000000067F000040020000E0000000024000__000000914E3F38F0\n000000067F000040020000E0000000020000-000000067F000040020000E0000000024000__000000931B9A2710\n000000067F000040020000E0000000022776-000000067F000040020000E000000002B15B__0000003ABA698781-0000003B6A0FFB09\n000000067F000040020000E0000000024000-000000067F000040020000E0000000028000__0000003B99F7F8A0\n000000067F000040020000E0000000024000-000000067F000040020000E0000000028000__0000005D2FFFFB38\n000000067F000040020000E0000000024000-000000067F000040020000E0000000028000__00000073AD3FE6B8\n000000067F000040020000E0000000024000-000000067F000040020000E0000000028000__000000914E3F38F0\n000000067F000040020000E0000000024000-000000067F000040020000E0000000028000__000000931B9A2710\n000000067F000040020000E0000000028000-000000067F000040020000E000000002C000__0000003B99F7F8A0\n000000067F000040020000E0000000028000-000000067F000040020000E000000002C000__0000005D2FFFFB38\n000000067F000040020000E0000000028000-000000067F000040020000E000000002C000__00000073AD3FE6B8\n000000067F000040020000E0000000028000-000000067F000040020000E000000002C000__000000914E3F38F0\n000000067F000040020000E0000000028000-000000067F000040020000E000000002C000__000000931B9A2710\n000000067F000040020000E000000002B15B-000000067F000040020000E0000000033B2F__0000003ABA698781-0000003B6A0FFB09\n000000067F000040020000E000000002C000-000000067F000040020000E0000000030000__0000003B99F7F8A0\n000000067F000040020000E000000002C000-000000067F000040020000E0000000030000__0000005D2FFFFB38\n000000067F000040020000E000000002C000-000000067F000040020000E0000000030000__00000073AD3FE6B8\n000000067F000040020000E000000002C000-000000067F000040020000E0000000030000__000000914E3F38F0\n000000067F000040020000E000000002C000-000000067F000040020000E0000000030000__000000931B9A2710\n000000067F000040020000E0000000030000-000000067F000040020000E0000000034000__0000003B99F7F8A0\n000000067F000040020000E0000000030000-000000067F000040020000E0000000034000__0000005D2FFFFB38\n000000067F000040020000E0000000030000-000000067F000040020000E0000000034000__00000073AD3FE6B8\n000000067F000040020000E0000000030000-000000067F000040020000E0000000034000__000000914E3F38F0\n000000067F000040020000E0000000030000-000000067F000040020000E0000000034000__000000931B9A2710\n000000067F000040020000E0000000033B2F-000000067F000040020000E000000003C4EA__0000003ABA698781-0000003B6A0FFB09\n000000067F000040020000E0000000034000-000000067F000040020000E0000000038000__0000003B99F7F8A0\n000000067F000040020000E0000000034000-000000067F000040020000E0000000038000__0000005D2FFFFB38\n000000067F000040020000E0000000034000-000000067F000040020000E0000000038000__00000073AD3FE6B8\n000000067F000040020000E0000000034000-000000067F000040020000E0000000038000__000000914E3F38F0\n000000067F000040020000E0000000034000-000000067F000040020000E0000000038000__000000931B9A2710\n000000067F000040020000E0000000038000-000000067F000040020000E000000003C000__0000003B99F7F8A0\n000000067F000040020000E0000000038000-000000067F000040020000E000000003C000__0000005D2FFFFB38\n000000067F000040020000E0000000038000-000000067F000040020000E000000003C000__00000073AD3FE6B8\n000000067F000040020000E0000000038000-000000067F000040020000E000000003C000__000000914E3F38F0\n000000067F000040020000E0000000038000-000000067F000040020000E000000003C000__000000931B9A2710\n000000067F000040020000E000000003C000-000000067F000040020000E0000000040000__0000003B99F7F8A0\n000000067F000040020000E000000003C000-000000067F000040020000E0000000040000__0000005D2FFFFB38\n000000067F000040020000E000000003C000-000000067F000040020000E0000000040000__00000073AD3FE6B8\n000000067F000040020000E000000003C000-000000067F000040020000E0000000040000__000000914E3F38F0\n000000067F000040020000E000000003C000-000000067F000040020000E0000000040000__000000931B9A2710\n000000067F000040020000E000000003C4EA-000000067F000040020000E0000000044EA8__0000003ABA698781-0000003B6A0FFB09\n000000067F000040020000E0000000040000-000000067F000040020000E0000000044000__0000003B99F7F8A0\n000000067F000040020000E0000000040000-000000067F000040020000E0000000044000__0000005D2FFFFB38\n000000067F000040020000E0000000040000-000000067F000040020000E0000000044000__00000073AD3FE6B8\n000000067F000040020000E0000000040000-000000067F000040020000E0000000044000__000000914E3F38F0\n000000067F000040020000E0000000040000-000000067F000040020000E0000000044000__000000931B9A2710\n000000067F000040020000E0000000044000-000000067F000040020000E0000000048000__0000003B99F7F8A0\n000000067F000040020000E0000000044000-000000067F000040020000E0000000048000__0000005D2FFFFB38\n000000067F000040020000E0000000044000-000000067F000040020000E0000000048000__00000073AD3FE6B8\n000000067F000040020000E0000000044000-000000067F000040020000E0000000048000__000000914E3F38F0\n000000067F000040020000E0000000044000-000000067F000040020000E0000000048000__000000931B9A2710\n000000067F000040020000E0000000044EA8-000000067F000040020000E000000004D890__0000003ABA698781-0000003B6A0FFB09\n000000067F000040020000E0000000048000-000000067F000040020000E000000004C000__0000003B99F7F8A0\n000000067F000040020000E0000000048000-000000067F000040020000E000000004C000__0000005D2FFFFB38\n000000067F000040020000E0000000048000-000000067F000040020000E000000004C000__00000073AD3FE6B8\n000000067F000040020000E0000000048000-000000067F000040020000E000000004C000__000000914E3F38F0\n000000067F000040020000E0000000048000-000000067F000040020000E000000004C000__000000931B9A2710\n000000067F000040020000E000000004C000-000000067F000040020000E0000000050000__0000003B99F7F8A0\n000000067F000040020000E000000004C000-000000067F000040020000E0000000050000__0000005D2FFFFB38\n000000067F000040020000E000000004C000-000000067F000040020000E0000000050000__00000073AD3FE6B8\n000000067F000040020000E000000004C000-000000067F000040020000E0000000050000__000000914E3F38F0\n000000067F000040020000E000000004C000-000000067F000040020000E0000000050000__000000931B9A2710\n000000067F000040020000E000000004D890-000000067F000040020000E0000000056296__0000003ABA698781-0000003B6A0FFB09\n000000067F000040020000E0000000050000-000000067F000040020000E0000000054000__0000003B99F7F8A0\n000000067F000040020000E0000000050000-000000067F000040020000E0000000054000__0000005D2FFFFB38\n000000067F000040020000E0000000050000-000000067F000040020000E0000000054000__00000073AD3FE6B8\n000000067F000040020000E0000000050000-000000067F000040020000E0000000054000__000000914E3F38F0\n000000067F000040020000E0000000050000-000000067F000040020000E0000000054000__000000931B9A2710\n000000067F000040020000E0000000054000-000000067F000040020000E0000000058000__0000003B99F7F8A0\n000000067F000040020000E0000000054000-000000067F000040020000E0000000058000__0000005D2FFFFB38\n000000067F000040020000E0000000054000-000000067F000040020000E0000000058000__00000073AD3FE6B8\n000000067F000040020000E0000000054000-000000067F000040020000E0000000058000__000000914E3F38F0\n000000067F000040020000E0000000054000-000000067F000040020000E0000000058000__000000931B9A2710\n000000067F000040020000E0000000056296-000000067F000040020000E000000005EC8C__0000003ABA698781-0000003B6A0FFB09\n000000067F000040020000E0000000058000-000000067F000040020000E000000005C000__0000003B99F7F8A0\n000000067F000040020000E0000000058000-000000067F000040020000E000000005C000__0000005D2FFFFB38\n000000067F000040020000E0000000058000-000000067F000040020000E000000005C000__00000073AD3FE6B8\n000000067F000040020000E0000000058000-000000067F000040020000E000000005C000__000000914E3F38F0\n000000067F000040020000E0000000058000-000000067F000040020000E000000005C000__000000931B9A2710\n000000067F000040020000E000000005C000-000000067F000040020000E0000000060000__0000003B99F7F8A0\n000000067F000040020000E000000005C000-000000067F000040020000E0000000060000__000000574B7FF240\n000000067F000040020000E000000005C000-000000067F000040020000E0000000060000__00000073AD3FE6B8\n000000067F000040020000E000000005C000-000000067F000040020000E0000000060000__000000914E3F38F0\n000000067F000040020000E000000005C000-000000067F000040020000E0000000060000__000000931B9A2710\n000000067F000040020000E000000005EC8C-030000000000000000000000000000000002__0000003ABA698781-0000003B6A0FFB09\n000000067F000040020000E000000005EF9E-000000067F000040020000E0000000067994__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000060000-000000067F000040020000E0000000064000__0000003B99F7F8A0\n000000067F000040020000E0000000060000-000000067F000040020000E0000000064000__000000574B7FF240\n000000067F000040020000E0000000060000-000000067F000040020000E0000000064000__00000073AD3FE6B8\n000000067F000040020000E0000000060000-000000067F000040020000E0000000064000__000000914E3F38F0\n000000067F000040020000E0000000060000-000000067F000040020000E0000000064000__000000931B9A2710\n000000067F000040020000E0000000064000-000000067F000040020000E0000000068000__0000003B99F7F8A0\n000000067F000040020000E0000000064000-000000067F000040020000E0000000068000__000000574B7FF240\n000000067F000040020000E0000000064000-000000067F000040020000E0000000068000__00000073AD3FE6B8\n000000067F000040020000E0000000064000-000000067F000040020000E0000000068000__000000914E3F38F0\n000000067F000040020000E0000000064000-000000067F000040020000E0000000068000__000000931B9A2710\n000000067F000040020000E0000000067994-000000067F000040020000E0000000070359__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000068000-000000067F000040020000E000000006C000__0000003B99F7F8A0\n000000067F000040020000E0000000068000-000000067F000040020000E000000006C000__000000574B7FF240\n000000067F000040020000E0000000068000-000000067F000040020000E000000006C000__00000073AD3FE6B8\n000000067F000040020000E0000000068000-000000067F000040020000E000000006C000__000000914E3F38F0\n000000067F000040020000E0000000068000-000000067F000040020000E000000006C000__000000931B9A2710\n000000067F000040020000E000000006C000-000000067F000040020000E0000000070000__0000003B99F7F8A0\n000000067F000040020000E000000006C000-000000067F000040020000E0000000070000__000000574B7FF240\n000000067F000040020000E000000006C000-000000067F000040020000E0000000070000__00000073AD3FE6B8\n000000067F000040020000E000000006C000-000000067F000040020000E0000000070000__000000914E3F38F0\n000000067F000040020000E000000006C000-000000067F000040020000E0000000070000__000000931B9A2710\n000000067F000040020000E0000000070000-000000067F000040020000E0000000074000__0000003B99F7F8A0\n000000067F000040020000E0000000070000-000000067F000040020000E0000000074000__000000574B7FF240\n000000067F000040020000E0000000070000-000000067F000040020000E0000000074000__00000073AD3FE6B8\n000000067F000040020000E0000000070000-000000067F000040020000E0000000074000__000000914E3F38F0\n000000067F000040020000E0000000070000-000000067F000040020000E0000000074000__000000931B9A2710\n000000067F000040020000E0000000070359-000000067F000040020000E0000000078D16__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000074000-000000067F000040020000E0000000078000__0000003B99F7F8A0\n000000067F000040020000E0000000074000-000000067F000040020000E0000000078000__000000574B7FF240\n000000067F000040020000E0000000074000-000000067F000040020000E0000000078000__00000073AD3FE6B8\n000000067F000040020000E0000000074000-000000067F000040020000E0000000078000__000000914E3F38F0\n000000067F000040020000E0000000074000-000000067F000040020000E0000000078000__000000931B9A2710\n000000067F000040020000E0000000078000-000000067F000040020000E000000007C000__000000574B7FF240\n000000067F000040020000E0000000078000-000000067F000040020000E000000007C000__00000073AD3FE6B8\n000000067F000040020000E0000000078000-000000067F000040020000E000000007C000__000000914E3F38F0\n000000067F000040020000E0000000078000-000000067F000040020000E000000007C000__000000931B9A2710\n000000067F000040020000E0000000078000-030000000000000000000000000000000002__0000003B99F7F8A0\n000000067F000040020000E0000000078D16-000000067F000040020000E00000000816CB__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E000000007C000-000000067F000040020000E0000000080000__000000574B7FF240\n000000067F000040020000E000000007C000-000000067F000040020000E0000000080000__00000073AD3FE6B8\n000000067F000040020000E000000007C000-000000067F000040020000E0000000080000__000000914E3F38F0\n000000067F000040020000E000000007C000-000000067F000040020000E0000000080000__000000931B9A2710\n000000067F000040020000E0000000080000-000000067F000040020000E0000000084000__000000574B7FF240\n000000067F000040020000E0000000080000-000000067F000040020000E0000000084000__00000073AD3FE6B8\n000000067F000040020000E0000000080000-000000067F000040020000E0000000084000__000000914E3F38F0\n000000067F000040020000E0000000080000-000000067F000040020000E0000000084000__000000931B9A2710\n000000067F000040020000E00000000816CB-000000067F000040020000E000000008A0C4__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000084000-000000067F000040020000E0000000088000__000000574B7FF240\n000000067F000040020000E0000000084000-000000067F000040020000E0000000088000__00000073AD3FE6B8\n000000067F000040020000E0000000084000-000000067F000040020000E0000000088000__000000914E3F38F0\n000000067F000040020000E0000000084000-000000067F000040020000E0000000088000__000000931B9A2710\n000000067F000040020000E0000000088000-000000067F000040020000E000000008C000__000000574B7FF240\n000000067F000040020000E0000000088000-000000067F000040020000E000000008C000__00000073AD3FE6B8\n000000067F000040020000E0000000088000-000000067F000040020000E000000008C000__000000914E3F38F0\n000000067F000040020000E0000000088000-000000067F000040020000E000000008C000__000000931B9A2710\n000000067F000040020000E000000008A0C4-000000067F000040020000E0000000092AC7__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E000000008C000-000000067F000040020000E0000000090000__000000574B7FF240\n000000067F000040020000E000000008C000-000000067F000040020000E0000000090000__00000073AD3FE6B8\n000000067F000040020000E000000008C000-000000067F000040020000E0000000090000__000000914E3F38F0\n000000067F000040020000E000000008C000-000000067F000040020000E0000000090000__000000931B9A2710\n000000067F000040020000E0000000090000-000000067F000040020000E0000000094000__000000574B7FF240\n000000067F000040020000E0000000090000-000000067F000040020000E0000000094000__00000073AD3FE6B8\n000000067F000040020000E0000000090000-000000067F000040020000E0000000094000__000000914E3F38F0\n000000067F000040020000E0000000090000-000000067F000040020000E0000000094000__000000931B9A2710\n000000067F000040020000E0000000092AC7-000000067F000040020000E000000009B4BC__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000094000-000000067F000040020000E0000000098000__000000574B7FF240\n000000067F000040020000E0000000094000-000000067F000040020000E0000000098000__00000073AD3FE6B8\n000000067F000040020000E0000000094000-000000067F000040020000E0000000098000__000000914E3F38F0\n000000067F000040020000E0000000094000-000000067F000040020000E0000000098000__000000931B9A2710\n000000067F000040020000E0000000098000-000000067F000040020000E000000009C000__000000574B7FF240\n000000067F000040020000E0000000098000-000000067F000040020000E000000009C000__00000073AD3FE6B8\n000000067F000040020000E0000000098000-000000067F000040020000E000000009C000__000000914E3F38F0\n000000067F000040020000E0000000098000-000000067F000040020000E000000009C000__000000931B9A2710\n000000067F000040020000E000000009B4BC-000000067F000040020000E00000000A3EA3__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E000000009C000-000000067F000040020000E00000000A0000__000000574B7FF240\n000000067F000040020000E000000009C000-000000067F000040020000E00000000A0000__00000073AD3FE6B8\n000000067F000040020000E000000009C000-000000067F000040020000E00000000A0000__000000914E3F38F0\n000000067F000040020000E000000009C000-000000067F000040020000E00000000A0000__000000931B9A2710\n000000067F000040020000E00000000A0000-000000067F000040020000E00000000A4000__000000574B7FF240\n000000067F000040020000E00000000A0000-000000067F000040020000E00000000A4000__00000073AD3FE6B8\n000000067F000040020000E00000000A0000-000000067F000040020000E00000000A4000__000000914E3F38F0\n000000067F000040020000E00000000A0000-000000067F000040020000E00000000A4000__000000931B9A2710\n000000067F000040020000E00000000A3EA3-000000067F000040020000E00000000AC86A__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E00000000A4000-000000067F000040020000E00000000A8000__000000574B7FF240\n000000067F000040020000E00000000A4000-000000067F000040020000E00000000A8000__00000073AD3FE6B8\n000000067F000040020000E00000000A4000-000000067F000040020000E00000000A8000__000000914E3F38F0\n000000067F000040020000E00000000A4000-000000067F000040020000E00000000A8000__000000931B9A2710\n000000067F000040020000E00000000A8000-000000067F000040020000E00000000AC000__000000574B7FF240\n000000067F000040020000E00000000A8000-000000067F000040020000E00000000AC000__00000073AD3FE6B8\n000000067F000040020000E00000000A8000-000000067F000040020000E00000000AC000__000000914E3F38F0\n000000067F000040020000E00000000A8000-000000067F000040020000E00000000AC000__000000931B9A2710\n000000067F000040020000E00000000AC000-000000067F000040020000E00000000B0000__000000574B7FF240\n000000067F000040020000E00000000AC000-000000067F000040020000E00000000B0000__00000073AD3FE6B8\n000000067F000040020000E00000000AC000-000000067F000040020000E00000000B0000__000000914E3F38F0\n000000067F000040020000E00000000AC000-000000067F000040020000E00000000B0000__000000931B9A2710\n000000067F000040020000E00000000AC86A-000000067F000040020000E00000000B5227__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E00000000B0000-000000067F000040020000E00000000B4000__000000574B7FF240\n000000067F000040020000E00000000B0000-000000067F000040020000E00000000B4000__00000073AD3FE6B8\n000000067F000040020000E00000000B0000-000000067F000040020000E00000000B4000__000000914E3F38F0\n000000067F000040020000E00000000B0000-000000067F000040020000E00000000B4000__000000931B9A2710\n000000067F000040020000E00000000B4000-000000067F000040020000E00000000B8000__000000574B7FF240\n000000067F000040020000E00000000B4000-000000067F000040020000E00000000B8000__00000073AD3FE6B8\n000000067F000040020000E00000000B4000-000000067F000040020000E00000000B8000__000000914E3F38F0\n000000067F000040020000E00000000B4000-000000067F000040020000E00000000B8000__000000931B9A2710\n000000067F000040020000E00000000B5227-000000067F000040020000E00000000BDBEB__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E00000000B8000-000000067F000040020000E00000000BC000__000000574B7FF240\n000000067F000040020000E00000000B8000-000000067F000040020000E00000000BC000__00000073AD3FE6B8\n000000067F000040020000E00000000B8000-000000067F000040020000E00000000BC000__000000914E3F38F0\n000000067F000040020000E00000000B8000-000000067F000040020000E00000000BC000__000000931B9A2710\n000000067F000040020000E00000000BC000-000000067F000040020000E00000000C0000__000000574B7FF240\n000000067F000040020000E00000000BC000-000000067F000040020000E00000000C0000__00000073AD3FE6B8\n000000067F000040020000E00000000BC000-000000067F000040020000E00000000C0000__000000914E3F38F0\n000000067F000040020000E00000000BC000-000000067F000040020000E00000000C0000__000000931B9A2710\n000000067F000040020000E00000000BDBEB-000000067F000040020000E00000000C65F2__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E00000000C0000-000000067F000040020000E00000000C4000__000000574B7FF240\n000000067F000040020000E00000000C0000-000000067F000040020000E00000000C4000__00000073AD3FE6B8\n000000067F000040020000E00000000C0000-000000067F000040020000E00000000C4000__000000914E3F38F0\n000000067F000040020000E00000000C0000-000000067F000040020000E00000000C4000__000000931B9A2710\n000000067F000040020000E00000000C4000-000000067F000040020000E00000000C8000__000000574B7FF240\n000000067F000040020000E00000000C4000-000000067F000040020000E00000000C8000__00000073AD3FE6B8\n000000067F000040020000E00000000C4000-000000067F000040020000E00000000C8000__000000914E3F38F0\n000000067F000040020000E00000000C4000-000000067F000040020000E00000000C8000__000000931B9A2710\n000000067F000040020000E00000000C65F2-000000067F000040020000E00000000CEFF3__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E00000000C8000-000000067F000040020000E00000000CC000__000000574B7FF240\n000000067F000040020000E00000000C8000-000000067F000040020000E00000000CC000__00000073AD3FE6B8\n000000067F000040020000E00000000C8000-000000067F000040020000E00000000CC000__000000914E3F38F0\n000000067F000040020000E00000000C8000-000000067F000040020000E00000000CC000__000000931B9A2710\n000000067F000040020000E00000000CC000-000000067F000040020000E00000000D0000__000000574B7FF240\n000000067F000040020000E00000000CC000-000000067F000040020000E00000000D0000__00000073AD3FE6B8\n000000067F000040020000E00000000CC000-000000067F000040020000E00000000D0000__000000914E3F38F0\n000000067F000040020000E00000000CC000-000000067F000040020000E00000000D0000__000000931B9A2710\n000000067F000040020000E00000000CEFF3-000000067F000040020000E00000000D79E6__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E00000000D0000-000000067F000040020000E00000000D4000__000000574B7FF240\n000000067F000040020000E00000000D0000-000000067F000040020000E00000000D4000__00000073AD3FE6B8\n000000067F000040020000E00000000D0000-000000067F000040020000E00000000D4000__000000914E3F38F0\n000000067F000040020000E00000000D0000-000000067F000040020000E00000000D4000__000000931B9A2710\n000000067F000040020000E00000000D4000-000000067F000040020000E00000000D8000__000000574B7FF240\n000000067F000040020000E00000000D4000-000000067F000040020000E00000000D8000__00000073AD3FE6B8\n000000067F000040020000E00000000D4000-000000067F000040020000E00000000D8000__000000914E3F38F0\n000000067F000040020000E00000000D4000-000000067F000040020000E00000000D8000__000000931B9A2710\n000000067F000040020000E00000000D79E6-000000067F000040020000E00000000E03C4__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E00000000D8000-000000067F000040020000E00000000DC000__000000574B7FF240\n000000067F000040020000E00000000D8000-000000067F000040020000E00000000DC000__00000073AD3FE6B8\n000000067F000040020000E00000000D8000-000000067F000040020000E00000000DC000__000000914E3F38F0\n000000067F000040020000E00000000D8000-000000067F000040020000E00000000DC000__000000931B9A2710\n000000067F000040020000E00000000DC000-000000067F000040020000E00000000E0000__000000574B7FF240\n000000067F000040020000E00000000DC000-000000067F000040020000E00000000E0000__00000073AD3FE6B8\n000000067F000040020000E00000000DC000-000000067F000040020000E00000000E0000__000000914E3F38F0\n000000067F000040020000E00000000DC000-000000067F000040020000E00000000E0000__000000931B9A2710\n000000067F000040020000E00000000E0000-000000067F000040020000E00000000E4000__000000574B7FF240\n000000067F000040020000E00000000E0000-000000067F000040020000E00000000E4000__00000073AD3FE6B8\n000000067F000040020000E00000000E0000-000000067F000040020000E00000000E4000__000000914E3F38F0\n000000067F000040020000E00000000E0000-000000067F000040020000E00000000E4000__000000931B9A2710\n000000067F000040020000E00000000E03C4-000000067F000040020000E00000000E8D95__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E00000000E4000-000000067F000040020000E00000000E8000__000000574B7FF240\n000000067F000040020000E00000000E4000-000000067F000040020000E00000000E8000__00000073AD3FE6B8\n000000067F000040020000E00000000E4000-000000067F000040020000E00000000E8000__000000914E3F38F0\n000000067F000040020000E00000000E4000-000000067F000040020000E00000000E8000__000000931B9A2710\n000000067F000040020000E00000000E8000-000000067F000040020000E00000000EC000__000000574B7FF240\n000000067F000040020000E00000000E8000-000000067F000040020000E00000000EC000__00000073AD3FE6B8\n000000067F000040020000E00000000E8000-000000067F000040020000E00000000EC000__000000914E3F38F0\n000000067F000040020000E00000000E8000-000000067F000040020000E00000000EC000__000000931B9A2710\n000000067F000040020000E00000000E8D95-000000067F000040020000E00000000F175E__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E00000000EC000-000000067F000040020000E00000000F0000__000000574B7FF240\n000000067F000040020000E00000000EC000-000000067F000040020000E00000000F0000__00000073AD3FE6B8\n000000067F000040020000E00000000EC000-000000067F000040020000E00000000F0000__000000914E3F38F0\n000000067F000040020000E00000000EC000-000000067F000040020000E00000000F0000__000000931B9A2710\n000000067F000040020000E00000000F0000-000000067F000040020000E00000000F4000__000000574B7FF240\n000000067F000040020000E00000000F0000-000000067F000040020000E00000000F4000__00000073AD3FE6B8\n000000067F000040020000E00000000F0000-000000067F000040020000E00000000F4000__000000914E3F38F0\n000000067F000040020000E00000000F0000-000000067F000040020000E00000000F4000__000000931B9A2710\n000000067F000040020000E00000000F175E-000000067F000040020000E00000000FA122__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E00000000F4000-000000067F000040020000E00000000F8000__000000574B7FF240\n000000067F000040020000E00000000F4000-000000067F000040020000E00000000F8000__00000073AD3FE6B8\n000000067F000040020000E00000000F4000-000000067F000040020000E00000000F8000__000000914E3F38F0\n000000067F000040020000E00000000F4000-000000067F000040020000E00000000F8000__000000931B9A2710\n000000067F000040020000E00000000F8000-000000067F000040020000E00000000FC000__000000574B7FF240\n000000067F000040020000E00000000F8000-000000067F000040020000E00000000FC000__00000073AD3FE6B8\n000000067F000040020000E00000000F8000-000000067F000040020000E00000000FC000__000000914E3F38F0\n000000067F000040020000E00000000F8000-000000067F000040020000E00000000FC000__000000931B9A2710\n000000067F000040020000E00000000FA122-000000067F000040020000E0000000102B0A__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E00000000FC000-000000067F000040020000E0000000100000__000000574B7FF240\n000000067F000040020000E00000000FC000-000000067F000040020000E0000000100000__00000073AD3FE6B8\n000000067F000040020000E00000000FC000-000000067F000040020000E0000000100000__000000914E3F38F0\n000000067F000040020000E00000000FC000-000000067F000040020000E0000000100000__000000931B9A2710\n000000067F000040020000E0000000100000-000000067F000040020000E0000000104000__000000574B7FF240\n000000067F000040020000E0000000100000-000000067F000040020000E0000000104000__00000073AD3FE6B8\n000000067F000040020000E0000000100000-000000067F000040020000E0000000104000__000000914E3F38F0\n000000067F000040020000E0000000100000-000000067F000040020000E0000000104000__000000931B9A2710\n000000067F000040020000E0000000102B0A-000000067F000040020000E000000010B4F8__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000104000-000000067F000040020000E0000000108000__000000574B7FF240\n000000067F000040020000E0000000104000-000000067F000040020000E0000000108000__00000073AD3FE6B8\n000000067F000040020000E0000000104000-000000067F000040020000E0000000108000__000000914E3F38F0\n000000067F000040020000E0000000104000-000000067F000040020000E0000000108000__000000931B9A2710\n000000067F000040020000E0000000108000-000000067F000040020000E000000010C000__000000574B7FF240\n000000067F000040020000E0000000108000-000000067F000040020000E000000010C000__00000073AD3FE6B8\n000000067F000040020000E0000000108000-000000067F000040020000E000000010C000__000000914E3F38F0\n000000067F000040020000E0000000108000-000000067F000040020000E000000010C000__000000931B9A2710\n000000067F000040020000E000000010B4F8-000000067F000040020000E0000000113EEA__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E000000010C000-000000067F000040020000E0000000110000__000000574B7FF240\n000000067F000040020000E000000010C000-000000067F000040020000E0000000110000__00000073AD3FE6B8\n000000067F000040020000E000000010C000-000000067F000040020000E0000000110000__000000914E3F38F0\n000000067F000040020000E000000010C000-000000067F000040020000E0000000110000__000000931B9A2710\n000000067F000040020000E0000000110000-000000067F000040020000E0000000114000__000000574B7FF240\n000000067F000040020000E0000000110000-000000067F000040020000E0000000114000__00000073AD3FE6B8\n000000067F000040020000E0000000110000-000000067F000040020000E0000000114000__000000914E3F38F0\n000000067F000040020000E0000000110000-000000067F000040020000E0000000114000__000000931B9A2710\n000000067F000040020000E0000000113EEA-000000067F000040020000E000000011C8D2__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000114000-000000067F000040020000E0000000118000__000000574B7FF240\n000000067F000040020000E0000000114000-000000067F000040020000E0000000118000__00000073AD3FE6B8\n000000067F000040020000E0000000114000-000000067F000040020000E0000000118000__000000914E3F38F0\n000000067F000040020000E0000000114000-000000067F000040020000E0000000118000__000000931B9A2710\n000000067F000040020000E0000000118000-000000067F000040020000E000000011C000__000000574B7FF240\n000000067F000040020000E0000000118000-000000067F000040020000E000000011C000__00000073AD3FE6B8\n000000067F000040020000E0000000118000-000000067F000040020000E000000011C000__000000914E3F38F0\n000000067F000040020000E0000000118000-000000067F000040020000E000000011C000__000000931B9A2710\n000000067F000040020000E000000011C000-000000067F000040020000E0000000120000__000000574B7FF240\n000000067F000040020000E000000011C000-000000067F000040020000E0000000120000__00000073AD3FE6B8\n000000067F000040020000E000000011C000-000000067F000040020000E0000000120000__000000914E3F38F0\n000000067F000040020000E000000011C000-000000067F000040020000E0000000120000__000000931B9A2710\n000000067F000040020000E000000011C8D2-000000067F000040020000E00000001252A2__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000120000-000000067F000040020000E0000000124000__000000574B7FF240\n000000067F000040020000E0000000120000-000000067F000040020000E0000000124000__00000073AD3FE6B8\n000000067F000040020000E0000000120000-000000067F000040020000E0000000124000__000000914E3F38F0\n000000067F000040020000E0000000120000-000000067F000040020000E0000000124000__000000931B9A2710\n000000067F000040020000E0000000124000-000000067F000040020000E0000000128000__000000574B7FF240\n000000067F000040020000E0000000124000-000000067F000040020000E0000000128000__00000073AD3FE6B8\n000000067F000040020000E0000000124000-000000067F000040020000E0000000128000__000000914E3F38F0\n000000067F000040020000E0000000124000-000000067F000040020000E0000000128000__000000931B9A2710\n000000067F000040020000E00000001252A2-000000067F000040020000E000000012DC5E__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000128000-000000067F000040020000E000000012C000__000000574B7FF240\n000000067F000040020000E0000000128000-000000067F000040020000E000000012C000__00000073AD3FE6B8\n000000067F000040020000E0000000128000-000000067F000040020000E000000012C000__000000914E3F38F0\n000000067F000040020000E0000000128000-000000067F000040020000E000000012C000__000000931B9A2710\n000000067F000040020000E000000012C000-000000067F000040020000E0000000130000__000000574B7FF240\n000000067F000040020000E000000012C000-000000067F000040020000E0000000130000__00000073AD3FE6B8\n000000067F000040020000E000000012C000-000000067F000040020000E0000000130000__000000914E3F38F0\n000000067F000040020000E000000012C000-000000067F000040020000E0000000130000__000000931B9A2710\n000000067F000040020000E000000012DC5E-000000067F000040020000E0000000136629__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000130000-000000067F000040020000E0000000134000__000000574B7FF240\n000000067F000040020000E0000000130000-000000067F000040020000E0000000134000__00000073AD3FE6B8\n000000067F000040020000E0000000130000-000000067F000040020000E0000000134000__000000914E3F38F0\n000000067F000040020000E0000000130000-000000067F000040020000E0000000134000__000000931B9A2710\n000000067F000040020000E0000000134000-000000067F000040020000E0000000138000__000000574B7FF240\n000000067F000040020000E0000000134000-000000067F000040020000E0000000138000__00000073AD3FE6B8\n000000067F000040020000E0000000134000-000000067F000040020000E0000000138000__000000914E3F38F0\n000000067F000040020000E0000000134000-000000067F000040020000E0000000138000__000000931B9A2710\n000000067F000040020000E0000000136629-000000067F000040020000E000000013F013__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000138000-000000067F000040020000E000000013C000__000000574B7FF240\n000000067F000040020000E0000000138000-000000067F000040020000E000000013C000__00000073AD3FE6B8\n000000067F000040020000E0000000138000-000000067F000040020000E000000013C000__000000914E3F38F0\n000000067F000040020000E0000000138000-000000067F000040020000E000000013C000__000000931B9A2710\n000000067F000040020000E000000013C000-000000067F000040020000E0000000140000__000000574B7FF240\n000000067F000040020000E000000013C000-000000067F000040020000E0000000140000__00000073AD3FE6B8\n000000067F000040020000E000000013C000-000000067F000040020000E0000000140000__000000914E3F38F0\n000000067F000040020000E000000013C000-000000067F000040020000E0000000140000__000000931B9A2710\n000000067F000040020000E000000013F013-000000067F000040020000E0000000147A01__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000140000-000000067F000040020000E0000000144000__000000574B7FF240\n000000067F000040020000E0000000140000-000000067F000040020000E0000000144000__00000073AD3FE6B8\n000000067F000040020000E0000000140000-000000067F000040020000E0000000144000__000000914E3F38F0\n000000067F000040020000E0000000140000-000000067F000040020000E0000000144000__000000931B9A2710\n000000067F000040020000E0000000144000-000000067F000040020000E0000000148000__000000574B7FF240\n000000067F000040020000E0000000144000-000000067F000040020000E0000000148000__00000073AD3FE6B8\n000000067F000040020000E0000000144000-000000067F000040020000E0000000148000__000000914E3F38F0\n000000067F000040020000E0000000144000-000000067F000040020000E0000000148000__000000931B9A2710\n000000067F000040020000E0000000147A01-000000067F000040020000E00000001503DC__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000148000-000000067F000040020000E000000014C000__000000574B7FF240\n000000067F000040020000E0000000148000-000000067F000040020000E000000014C000__00000073AD3FE6B8\n000000067F000040020000E0000000148000-000000067F000040020000E000000014C000__000000914E3F38F0\n000000067F000040020000E0000000148000-000000067F000040020000E000000014C000__000000931B9A2710\n000000067F000040020000E000000014C000-000000067F000040020000E0000000150000__000000574B7FF240\n000000067F000040020000E000000014C000-000000067F000040020000E0000000150000__00000073AD3FE6B8\n000000067F000040020000E000000014C000-000000067F000040020000E0000000150000__000000914E3F38F0\n000000067F000040020000E000000014C000-000000067F000040020000E0000000150000__000000931B9A2710\n000000067F000040020000E0000000150000-000000067F000040020000E0000000154000__000000574B7FF240\n000000067F000040020000E0000000150000-000000067F000040020000E0000000154000__00000073AD3FE6B8\n000000067F000040020000E0000000150000-000000067F000040020000E0000000154000__000000914E3F38F0\n000000067F000040020000E0000000150000-000000067F000040020000E0000000154000__000000931B9A2710\n000000067F000040020000E00000001503DC-000000067F000040020000E0000000158DC2__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000154000-000000067F000040020000E0000000158000__000000574B7FF240\n000000067F000040020000E0000000154000-000000067F000040020000E0000000158000__00000073AD3FE6B8\n000000067F000040020000E0000000154000-000000067F000040020000E0000000158000__000000914E3F38F0\n000000067F000040020000E0000000154000-000000067F000040020000E0000000158000__000000931B9A2710\n000000067F000040020000E0000000158000-000000067F000040020000E000000015C000__000000574B7FF240\n000000067F000040020000E0000000158000-000000067F000040020000E000000015C000__00000073AD3FE6B8\n000000067F000040020000E0000000158000-000000067F000040020000E000000015C000__000000914E3F38F0\n000000067F000040020000E0000000158000-000000067F000040020000E000000015C000__000000931B9A2710\n000000067F000040020000E0000000158DC2-000000067F000040020000E000000016178D__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E000000015C000-000000067F000040020000E0000000160000__000000574B7FF240\n000000067F000040020000E000000015C000-000000067F000040020000E0000000160000__00000073AD3FE6B8\n000000067F000040020000E000000015C000-000000067F000040020000E0000000160000__000000914E3F38F0\n000000067F000040020000E000000015C000-000000067F000040020000E0000000160000__000000931B9A2710\n000000067F000040020000E0000000160000-000000067F000040020000E0000000164000__000000574B7FF240\n000000067F000040020000E0000000160000-000000067F000040020000E0000000164000__00000073AD3FE6B8\n000000067F000040020000E0000000160000-000000067F000040020000E0000000164000__000000914E3F38F0\n000000067F000040020000E0000000160000-000000067F000040020000E0000000164000__000000931B9A2710\n000000067F000040020000E000000016178D-000000067F000040020000E000000016A148__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000164000-000000067F000040020000E0000000168000__000000574B7FF240\n000000067F000040020000E0000000164000-000000067F000040020000E0000000168000__00000073AD3FE6B8\n000000067F000040020000E0000000164000-000000067F000040020000E0000000168000__000000914E3F38F0\n000000067F000040020000E0000000164000-000000067F000040020000E0000000168000__000000931B9A2710\n000000067F000040020000E0000000168000-000000067F000040020000E000000016C000__000000574B7FF240\n000000067F000040020000E0000000168000-000000067F000040020000E000000016C000__00000073AD3FE6B8\n000000067F000040020000E0000000168000-000000067F000040020000E000000016C000__000000914E3F38F0\n000000067F000040020000E0000000168000-000000067F000040020000E000000016C000__000000931B9A2710\n000000067F000040020000E000000016A148-000000067F000040020000E0000000172B20__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E000000016C000-000000067F000040020000E0000000170000__000000574B7FF240\n000000067F000040020000E000000016C000-000000067F000040020000E0000000170000__00000073AD3FE6B8\n000000067F000040020000E000000016C000-000000067F000040020000E0000000170000__000000914E3F38F0\n000000067F000040020000E000000016C000-000000067F000040020000E0000000170000__000000931B9A2710\n000000067F000040020000E0000000170000-000000067F000040020000E0000000174000__000000574B7FF240\n000000067F000040020000E0000000170000-000000067F000040020000E0000000174000__00000073AD3FE6B8\n000000067F000040020000E0000000170000-000000067F000040020000E0000000174000__000000914E3F38F0\n000000067F000040020000E0000000170000-000000067F000040020000E0000000174000__000000931B9A2710\n000000067F000040020000E0000000172B20-000000067F000040020000E000000017B50C__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000174000-000000067F000040020000E0000000178000__000000574B7FF240\n000000067F000040020000E0000000174000-000000067F000040020000E0000000178000__00000073AD3FE6B8\n000000067F000040020000E0000000174000-000000067F000040020000E0000000178000__000000914E3F38F0\n000000067F000040020000E0000000174000-000000067F000040020000E0000000178000__000000931B9A2710\n000000067F000040020000E0000000178000-000000067F000040020000E000000017C000__000000574B7FF240\n000000067F000040020000E0000000178000-000000067F000040020000E000000017C000__00000073AD3FE6B8\n000000067F000040020000E0000000178000-000000067F000040020000E000000017C000__000000914E3F38F0\n000000067F000040020000E0000000178000-000000067F000040020000E000000017C000__000000931B9A2710\n000000067F000040020000E000000017B50C-000000067F000040020000E0000000183EF9__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E000000017C000-000000067F000040020000E0000000180000__000000574B7FF240\n000000067F000040020000E000000017C000-000000067F000040020000E0000000180000__00000073AD3FE6B8\n000000067F000040020000E000000017C000-000000067F000040020000E0000000180000__000000914E3F38F0\n000000067F000040020000E000000017C000-000000067F000040020000E0000000180000__000000931B9A2710\n000000067F000040020000E0000000180000-000000067F000040020000E0000000184000__000000574B7FF240\n000000067F000040020000E0000000180000-000000067F000040020000E0000000184000__00000073AD3FE6B8\n000000067F000040020000E0000000180000-000000067F000040020000E0000000184000__000000914E3F38F0\n000000067F000040020000E0000000180000-000000067F000040020000E0000000184000__000000931B9A2710\n000000067F000040020000E0000000183EF9-000000067F000040020000E000000018C8E8__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000184000-000000067F000040020000E0000000188000__000000574B7FF240\n000000067F000040020000E0000000184000-000000067F000040020000E0000000188000__00000073AD3FE6B8\n000000067F000040020000E0000000184000-000000067F000040020000E0000000188000__000000914E3F38F0\n000000067F000040020000E0000000184000-000000067F000040020000E0000000188000__000000931B9A2710\n000000067F000040020000E0000000188000-000000067F000040020000E000000018C000__000000574B7FF240\n000000067F000040020000E0000000188000-000000067F000040020000E000000018C000__00000073AD3FE6B8\n000000067F000040020000E0000000188000-000000067F000040020000E000000018C000__000000914E3F38F0\n000000067F000040020000E0000000188000-000000067F000040020000E000000018C000__000000931B9A2710\n000000067F000040020000E000000018C000-000000067F000040020000E0000000190000__000000574B7FF240\n000000067F000040020000E000000018C000-000000067F000040020000E0000000190000__00000073AD3FE6B8\n000000067F000040020000E000000018C000-000000067F000040020000E0000000190000__000000914E3F38F0\n000000067F000040020000E000000018C000-000000067F000040020000E0000000190000__000000931B9A2710\n000000067F000040020000E000000018C8E8-000000067F000040020000E00000001952CE__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000190000-000000067F000040020000E0000000194000__000000574B7FF240\n000000067F000040020000E0000000190000-000000067F000040020000E0000000194000__00000073AD3FE6B8\n000000067F000040020000E0000000190000-000000067F000040020000E0000000194000__000000914E3F38F0\n000000067F000040020000E0000000190000-000000067F000040020000E0000000194000__000000931B9A2710\n000000067F000040020000E0000000194000-000000067F000040020000E0000000198000__000000574B7FF240\n000000067F000040020000E0000000194000-000000067F000040020000E0000000198000__00000073AD3FE6B8\n000000067F000040020000E0000000194000-000000067F000040020000E0000000198000__000000914E3F38F0\n000000067F000040020000E0000000194000-000000067F000040020000E0000000198000__000000931B9A2710\n000000067F000040020000E00000001952CE-000000067F000040020000E000000019DC94__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000198000-000000067F000040020000E000000019C000__000000574B7FF240\n000000067F000040020000E0000000198000-000000067F000040020000E000000019C000__00000073AD3FE6B8\n000000067F000040020000E0000000198000-000000067F000040020000E000000019C000__000000914E3F38F0\n000000067F000040020000E0000000198000-000000067F000040020000E000000019C000__000000931B9A2710\n000000067F000040020000E000000019C000-000000067F000040020000E00000001A0000__000000574B7FF240\n000000067F000040020000E000000019C000-000000067F000040020000E00000001A0000__00000073AD3FE6B8\n000000067F000040020000E000000019C000-000000067F000040020000E00000001A0000__000000914E3F38F0\n000000067F000040020000E000000019C000-000000067F000040020000E00000001A0000__000000931B9A2710\n000000067F000040020000E000000019DC94-000000067F000040020000E00000001A6650__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E00000001A0000-000000067F000040020000E00000001A4000__000000574B7FF240\n000000067F000040020000E00000001A0000-000000067F000040020000E00000001A4000__00000073AD3FE6B8\n000000067F000040020000E00000001A0000-000000067F000040020000E00000001A4000__000000914E3F38F0\n000000067F000040020000E00000001A0000-000000067F000040020000E00000001A4000__000000931B9A2710\n000000067F000040020000E00000001A4000-000000067F000040020000E00000001A8000__000000574B7FF240\n000000067F000040020000E00000001A4000-000000067F000040020000E00000001A8000__00000073AD3FE6B8\n000000067F000040020000E00000001A4000-000000067F000040020000E00000001A8000__000000914E3F38F0\n000000067F000040020000E00000001A4000-000000067F000040020000E00000001A8000__000000931B9A2710\n000000067F000040020000E00000001A6650-000000067F000040020000E00000001AF031__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E00000001A8000-000000067F000040020000E00000001AC000__000000574B7FF240\n000000067F000040020000E00000001A8000-000000067F000040020000E00000001AC000__00000073AD3FE6B8\n000000067F000040020000E00000001A8000-000000067F000040020000E00000001AC000__000000914E3F38F0\n000000067F000040020000E00000001A8000-000000067F000040020000E00000001AC000__000000931B9A2710\n000000067F000040020000E00000001AC000-000000067F000040020000E00000001B0000__000000574B7FF240\n000000067F000040020000E00000001AC000-000000067F000040020000E00000001B0000__00000073AD3FE6B8\n000000067F000040020000E00000001AC000-000000067F000040020000E00000001B0000__000000914E3F38F0\n000000067F000040020000E00000001AC000-000000067F000040020000E00000001B0000__000000931B9A2710\n000000067F000040020000E00000001AF031-000000067F000040020000E00000001B7A19__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E00000001B0000-000000067F000040020000E00000001B4000__000000574B7FF240\n000000067F000040020000E00000001B0000-000000067F000040020000E00000001B4000__00000073AD3FE6B8\n000000067F000040020000E00000001B0000-000000067F000040020000E00000001B4000__000000914E3F38F0\n000000067F000040020000E00000001B0000-000000067F000040020000E00000001B4000__000000931B9A2710\n000000067F000040020000E00000001B4000-000000067F000040020000E00000001B8000__000000574B7FF240\n000000067F000040020000E00000001B4000-000000067F000040020000E00000001B8000__00000073AD3FE6B8\n000000067F000040020000E00000001B4000-000000067F000040020000E00000001B8000__000000914E3F38F0\n000000067F000040020000E00000001B4000-000000067F000040020000E00000001B8000__000000931B9A2710\n000000067F000040020000E00000001B7A19-000000067F000040020000E00000001C0402__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E00000001B8000-000000067F000040020000E00000001BC000__000000574B7FF240\n000000067F000040020000E00000001B8000-000000067F000040020000E00000001BC000__00000073AD3FE6B8\n000000067F000040020000E00000001B8000-000000067F000040020000E00000001BC000__000000914E3F38F0\n000000067F000040020000E00000001B8000-000000067F000040020000E00000001BC000__000000931B9A2710\n000000067F000040020000E00000001BC000-000000067F000040020000E00000001C0000__000000574B7FF240\n000000067F000040020000E00000001BC000-000000067F000040020000E00000001C0000__00000073AD3FE6B8\n000000067F000040020000E00000001BC000-000000067F000040020000E00000001C0000__000000914E3F38F0\n000000067F000040020000E00000001BC000-000000067F000040020000E00000001C0000__000000931B9A2710\n000000067F000040020000E00000001C0000-000000067F000040020000E00000001C4000__000000574B7FF240\n000000067F000040020000E00000001C0000-000000067F000040020000E00000001C4000__00000073AD3FE6B8\n000000067F000040020000E00000001C0000-000000067F000040020000E00000001C4000__000000914E3F38F0\n000000067F000040020000E00000001C0000-000000067F000040020000E00000001C4000__000000931B9A2710\n000000067F000040020000E00000001C0402-000000067F000040020000E00000001C8DD6__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E00000001C4000-000000067F000040020000E00000001C8000__000000574B7FF240\n000000067F000040020000E00000001C4000-000000067F000040020000E00000001C8000__00000073AD3FE6B8\n000000067F000040020000E00000001C4000-000000067F000040020000E00000001C8000__000000914E3F38F0\n000000067F000040020000E00000001C4000-000000067F000040020000E00000001C8000__000000931B9A2710\n000000067F000040020000E00000001C8000-000000067F000040020000E00000001CC000__000000574B7FF240\n000000067F000040020000E00000001C8000-000000067F000040020000E00000001CC000__00000073AD3FE6B8\n000000067F000040020000E00000001C8000-000000067F000040020000E00000001CC000__000000914E3F38F0\n000000067F000040020000E00000001C8000-000000067F000040020000E00000001CC000__000000931B9A2710\n000000067F000040020000E00000001C8DD6-000000067F000040020000E00000001D17B3__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E00000001CC000-000000067F000040020000E00000001D0000__000000574B7FF240\n000000067F000040020000E00000001CC000-000000067F000040020000E00000001D0000__00000073AD3FE6B8\n000000067F000040020000E00000001CC000-000000067F000040020000E00000001D0000__000000914E3F38F0\n000000067F000040020000E00000001CC000-000000067F000040020000E00000001D0000__000000931B9A2710\n000000067F000040020000E00000001D0000-000000067F000040020000E00000001D4000__000000574B7FF240\n000000067F000040020000E00000001D0000-000000067F000040020000E00000001D4000__00000073AD3FE6B8\n000000067F000040020000E00000001D0000-000000067F000040020000E00000001D4000__000000914E3F38F0\n000000067F000040020000E00000001D0000-000000067F000040020000E00000001D4000__000000931B9A2710\n000000067F000040020000E00000001D17B3-000000067F000040020000E00000001DA183__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E00000001D4000-000000067F000040020000E00000001D8000__000000574B7FF240\n000000067F000040020000E00000001D4000-000000067F000040020000E00000001D8000__00000073AD3FE6B8\n000000067F000040020000E00000001D4000-000000067F000040020000E00000001D8000__000000914E3F38F0\n000000067F000040020000E00000001D4000-000000067F000040020000E00000001D8000__000000931B9A2710\n000000067F000040020000E00000001D8000-000000067F000040020000E00000001DC000__000000574B7FF240\n000000067F000040020000E00000001D8000-000000067F000040020000E00000001DC000__00000073AD3FE6B8\n000000067F000040020000E00000001D8000-000000067F000040020000E00000001DC000__000000914E3F38F0\n000000067F000040020000E00000001D8000-000000067F000040020000E00000001DC000__000000931B9A2710\n000000067F000040020000E00000001DA183-000000067F000040020000E00000001E2B47__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E00000001DC000-000000067F000040020000E00000001E0000__000000574B7FF240\n000000067F000040020000E00000001DC000-000000067F000040020000E00000001E0000__00000073AD3FE6B8\n000000067F000040020000E00000001DC000-000000067F000040020000E00000001E0000__000000914E3F38F0\n000000067F000040020000E00000001DC000-000000067F000040020000E00000001E0000__000000931B9A2710\n000000067F000040020000E00000001E0000-000000067F000040020000E00000001E4000__000000574B7FF240\n000000067F000040020000E00000001E0000-000000067F000040020000E00000001E4000__00000073AD3FE6B8\n000000067F000040020000E00000001E0000-000000067F000040020000E00000001E4000__000000914E3F38F0\n000000067F000040020000E00000001E0000-000000067F000040020000E00000001E4000__000000931B9A2710\n000000067F000040020000E00000001E2B47-000000067F000040020000E00000001EB52B__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E00000001E4000-000000067F000040020000E00000001E8000__000000574B7FF240\n000000067F000040020000E00000001E4000-000000067F000040020000E00000001E8000__00000073AD3FE6B8\n000000067F000040020000E00000001E4000-000000067F000040020000E00000001E8000__000000914E3F38F0\n000000067F000040020000E00000001E4000-000000067F000040020000E00000001E8000__000000931B9A2710\n000000067F000040020000E00000001E8000-000000067F000040020000E00000001EC000__000000574B7FF240\n000000067F000040020000E00000001E8000-000000067F000040020000E00000001EC000__00000073AD3FE6B8\n000000067F000040020000E00000001E8000-000000067F000040020000E00000001EC000__000000914E3F38F0\n000000067F000040020000E00000001E8000-000000067F000040020000E00000001EC000__000000931B9A2710\n000000067F000040020000E00000001EB52B-000000067F000040020000E00000001F3F12__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E00000001EC000-000000067F000040020000E00000001F0000__000000574B7FF240\n000000067F000040020000E00000001EC000-000000067F000040020000E00000001F0000__00000073AD3FE6B8\n000000067F000040020000E00000001EC000-000000067F000040020000E00000001F0000__000000914E3F38F0\n000000067F000040020000E00000001EC000-000000067F000040020000E00000001F0000__000000931B9A2710\n000000067F000040020000E00000001F0000-000000067F000040020000E00000001F4000__000000574B7FF240\n000000067F000040020000E00000001F0000-000000067F000040020000E00000001F4000__00000073AD3FE6B8\n000000067F000040020000E00000001F0000-000000067F000040020000E00000001F4000__000000914E3F38F0\n000000067F000040020000E00000001F0000-000000067F000040020000E00000001F4000__000000931B9A2710\n000000067F000040020000E00000001F3F12-000000067F000040020000E00000001FC902__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E00000001F4000-000000067F000040020000E00000001F8000__000000574B7FF240\n000000067F000040020000E00000001F4000-000000067F000040020000E00000001F8000__00000073AD3FE6B8\n000000067F000040020000E00000001F4000-000000067F000040020000E00000001F8000__000000914E3F38F0\n000000067F000040020000E00000001F4000-000000067F000040020000E00000001F8000__000000931B9A2710\n000000067F000040020000E00000001F8000-000000067F000040020000E00000001FC000__000000574B7FF240\n000000067F000040020000E00000001F8000-000000067F000040020000E00000001FC000__00000073AD3FE6B8\n000000067F000040020000E00000001F8000-000000067F000040020000E00000001FC000__000000914E3F38F0\n000000067F000040020000E00000001F8000-000000067F000040020000E00000001FC000__000000931B9A2710\n000000067F000040020000E00000001FC000-000000067F000040020000E0000000200000__000000574B7FF240\n000000067F000040020000E00000001FC000-000000067F000040020000E0000000200000__00000073AD3FE6B8\n000000067F000040020000E00000001FC000-000000067F000040020000E0000000200000__000000914E3F38F0\n000000067F000040020000E00000001FC000-000000067F000040020000E0000000200000__000000931B9A2710\n000000067F000040020000E00000001FC902-000000067F000040020000E00000002052D8__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000200000-000000067F000040020000E0000000204000__000000574B7FF240\n000000067F000040020000E0000000200000-000000067F000040020000E0000000204000__00000073AD3FE6B8\n000000067F000040020000E0000000200000-000000067F000040020000E0000000204000__000000914E3F38F0\n000000067F000040020000E0000000200000-000000067F000040020000E0000000204000__000000931B9A2710\n000000067F000040020000E0000000204000-000000067F000040020000E0000000208000__000000574B7FF240\n000000067F000040020000E0000000204000-000000067F000040020000E0000000208000__00000073AD3FE6B8\n000000067F000040020000E0000000204000-000000067F000040020000E0000000208000__000000914E3F38F0\n000000067F000040020000E0000000204000-000000067F000040020000E0000000208000__000000931B9A2710\n000000067F000040020000E00000002052D8-000000067F000040020000E000000020DCB6__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000208000-000000067F000040020000E000000020C000__000000574B7FF240\n000000067F000040020000E0000000208000-000000067F000040020000E000000020C000__00000073AD3FE6B8\n000000067F000040020000E0000000208000-000000067F000040020000E000000020C000__000000914E3F38F0\n000000067F000040020000E0000000208000-000000067F000040020000E000000020C000__000000931B9A2710\n000000067F000040020000E000000020C000-000000067F000040020000E0000000210000__000000574B7FF240\n000000067F000040020000E000000020C000-000000067F000040020000E0000000210000__00000073AD3FE6B8\n000000067F000040020000E000000020C000-000000067F000040020000E0000000210000__000000914E3F38F0\n000000067F000040020000E000000020C000-000000067F000040020000E0000000210000__000000931B9A2710\n000000067F000040020000E000000020DCB6-000000067F000040020000E0000000216686__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000210000-000000067F000040020000E0000000214000__000000574B7FF240\n000000067F000040020000E0000000210000-000000067F000040020000E0000000214000__00000073AD3FE6B8\n000000067F000040020000E0000000210000-000000067F000040020000E0000000214000__000000914E3F38F0\n000000067F000040020000E0000000210000-000000067F000040020000E0000000214000__000000931B9A2710\n000000067F000040020000E0000000214000-000000067F000040020000E0000000218000__000000574B7FF240\n000000067F000040020000E0000000214000-000000067F000040020000E0000000218000__00000073AD3FE6B8\n000000067F000040020000E0000000214000-000000067F000040020000E0000000218000__000000914E3F38F0\n000000067F000040020000E0000000214000-000000067F000040020000E0000000218000__000000931B9A2710\n000000067F000040020000E0000000216686-000000067F000040020000E000000021F04B__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000218000-000000067F000040020000E000000021C000__000000574B7FF240\n000000067F000040020000E0000000218000-000000067F000040020000E000000021C000__00000073AD3FE6B8\n000000067F000040020000E0000000218000-000000067F000040020000E000000021C000__000000914E3F38F0\n000000067F000040020000E0000000218000-000000067F000040020000E000000021C000__000000931B9A2710\n000000067F000040020000E000000021C000-000000067F000040020000E0000000220000__000000574B7FF240\n000000067F000040020000E000000021C000-000000067F000040020000E0000000220000__00000073AD3FE6B8\n000000067F000040020000E000000021C000-000000067F000040020000E0000000220000__000000914E3F38F0\n000000067F000040020000E000000021C000-000000067F000040020000E0000000220000__000000931B9A2710\n000000067F000040020000E000000021F04B-000000067F000040020000E0000000227A38__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000220000-000000067F000040020000E0000000224000__000000574B7FF240\n000000067F000040020000E0000000220000-000000067F000040020000E0000000224000__00000073AD3FE6B8\n000000067F000040020000E0000000220000-000000067F000040020000E0000000224000__000000914E3F38F0\n000000067F000040020000E0000000220000-000000067F000040020000E0000000224000__000000931B9A2710\n000000067F000040020000E0000000224000-000000067F000040020000E0000000228000__000000574B7FF240\n000000067F000040020000E0000000224000-000000067F000040020000E0000000228000__00000073AD3FE6B8\n000000067F000040020000E0000000224000-000000067F000040020000E0000000228000__000000914E3F38F0\n000000067F000040020000E0000000224000-000000067F000040020000E0000000228000__000000931B9A2710\n000000067F000040020000E0000000227A38-000000067F000040020000E0000000230422__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000228000-000000067F000040020000E000000022C000__000000574B7FF240\n000000067F000040020000E0000000228000-000000067F000040020000E000000022C000__00000073AD3FE6B8\n000000067F000040020000E0000000228000-000000067F000040020000E000000022C000__000000914E3F38F0\n000000067F000040020000E0000000228000-000000067F000040020000E000000022C000__000000931B9A2710\n000000067F000040020000E000000022C000-000000067F000040020000E0000000230000__000000574B7FF240\n000000067F000040020000E000000022C000-000000067F000040020000E0000000230000__00000073AD3FE6B8\n000000067F000040020000E000000022C000-000000067F000040020000E0000000230000__000000914E3F38F0\n000000067F000040020000E000000022C000-000000067F000040020000E0000000230000__000000931B9A2710\n000000067F000040020000E0000000230000-000000067F000040020000E0000000234000__000000574B7FF240\n000000067F000040020000E0000000230000-000000067F000040020000E0000000234000__00000073AD3FE6B8\n000000067F000040020000E0000000230000-000000067F000040020000E0000000234000__000000914E3F38F0\n000000067F000040020000E0000000230000-000000067F000040020000E0000000234000__000000931B9A2710\n000000067F000040020000E0000000230422-000000067F000040020000E0000000238E0E__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000234000-000000067F000040020000E0000000238000__000000574B7FF240\n000000067F000040020000E0000000234000-000000067F000040020000E0000000238000__00000073AD3FE6B8\n000000067F000040020000E0000000234000-000000067F000040020000E0000000238000__000000914E3F38F0\n000000067F000040020000E0000000234000-000000067F000040020000E0000000238000__000000931B9A2710\n000000067F000040020000E0000000238000-000000067F000040020000E000000023C000__000000574B7FF240\n000000067F000040020000E0000000238000-000000067F000040020000E000000023C000__00000073AD3FE6B8\n000000067F000040020000E0000000238000-000000067F000040020000E000000023C000__000000914E3F38F0\n000000067F000040020000E0000000238000-000000067F000040020000E000000023C000__000000931B9A2710\n000000067F000040020000E0000000238E0E-000000067F000040020000E00000002417DF__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E000000023C000-000000067F000040020000E0000000240000__000000574B7FF240\n000000067F000040020000E000000023C000-000000067F000040020000E0000000240000__00000073AD3FE6B8\n000000067F000040020000E000000023C000-000000067F000040020000E0000000240000__000000914E3F38F0\n000000067F000040020000E000000023C000-000000067F000040020000E0000000240000__000000931B9A2710\n000000067F000040020000E0000000240000-000000067F000040020000E0000000244000__000000574B7FF240\n000000067F000040020000E0000000240000-000000067F000040020000E0000000244000__00000073AD3FE6B8\n000000067F000040020000E0000000240000-000000067F000040020000E0000000244000__000000914E3F38F0\n000000067F000040020000E0000000240000-000000067F000040020000E0000000244000__000000931B9A2710\n000000067F000040020000E00000002417DF-000000067F000040020000E000000024A1C0__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000244000-000000067F000040020000E0000000248000__000000574B7FF240\n000000067F000040020000E0000000244000-000000067F000040020000E0000000248000__00000073AD3FE6B8\n000000067F000040020000E0000000244000-000000067F000040020000E0000000248000__000000914E3F38F0\n000000067F000040020000E0000000244000-000000067F000040020000E0000000248000__000000931B9A2710\n000000067F000040020000E0000000248000-000000067F000040020000E000000024C000__000000574B7FF240\n000000067F000040020000E0000000248000-000000067F000040020000E000000024C000__00000073AD3FE6B8\n000000067F000040020000E0000000248000-000000067F000040020000E000000024C000__000000914E3F38F0\n000000067F000040020000E0000000248000-000000067F000040020000E000000024C000__000000931B9A2710\n000000067F000040020000E000000024A1C0-000000067F000040020000E0000000252B80__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E000000024C000-000000067F000040020000E0000000250000__000000574B7FF240\n000000067F000040020000E000000024C000-000000067F000040020000E0000000250000__00000073AD3FE6B8\n000000067F000040020000E000000024C000-000000067F000040020000E0000000250000__000000914E3F38F0\n000000067F000040020000E000000024C000-000000067F000040020000E0000000250000__000000931B9A2710\n000000067F000040020000E0000000250000-000000067F000040020000E0000000254000__000000574B7FF240\n000000067F000040020000E0000000250000-000000067F000040020000E0000000254000__00000073AD3FE6B8\n000000067F000040020000E0000000250000-000000067F000040020000E0000000254000__000000914E3F38F0\n000000067F000040020000E0000000250000-000000067F000040020000E0000000254000__000000931B9A2710\n000000067F000040020000E0000000252B80-000000067F000040020000E000000025B542__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000254000-000000067F000040020000E0000000258000__000000574B7FF240\n000000067F000040020000E0000000254000-000000067F000040020000E0000000258000__00000073AD3FE6B8\n000000067F000040020000E0000000254000-000000067F000040020000E0000000258000__000000914E3F38F0\n000000067F000040020000E0000000254000-000000067F000040020000E0000000258000__000000931B9A2710\n000000067F000040020000E0000000258000-000000067F000040020000E000000025C000__000000574B7FF240\n000000067F000040020000E0000000258000-000000067F000040020000E000000025C000__00000073AD3FE6B8\n000000067F000040020000E0000000258000-000000067F000040020000E000000025C000__000000914E3F38F0\n000000067F000040020000E0000000258000-000000067F000040020000E000000025C000__000000931B9A2710\n000000067F000040020000E000000025B542-000000067F000040020000E0000000263F2C__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E000000025C000-000000067F000040020000E0000000260000__000000574B7FF240\n000000067F000040020000E000000025C000-000000067F000040020000E0000000260000__00000073AD3FE6B8\n000000067F000040020000E000000025C000-000000067F000040020000E0000000260000__000000914E3F38F0\n000000067F000040020000E000000025C000-000000067F000040020000E0000000260000__000000931B9A2710\n000000067F000040020000E0000000260000-000000067F000040020000E0000000264000__000000574B7FF240\n000000067F000040020000E0000000260000-000000067F000040020000E0000000264000__00000073AD3FE6B8\n000000067F000040020000E0000000260000-000000067F000040020000E0000000264000__000000914E3F38F0\n000000067F000040020000E0000000260000-000000067F000040020000E0000000264000__000000931B9A2710\n000000067F000040020000E0000000263F2C-000000067F000040020000E000000026C925__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000264000-000000067F000040020000E0000000268000__000000574B7FF240\n000000067F000040020000E0000000264000-000000067F000040020000E0000000268000__00000073AD3FE6B8\n000000067F000040020000E0000000264000-000000067F000040020000E0000000268000__000000914E3F38F0\n000000067F000040020000E0000000264000-000000067F000040020000E0000000268000__000000931B9A2710\n000000067F000040020000E0000000268000-000000067F000040020000E000000026C000__000000574B7FF240\n000000067F000040020000E0000000268000-000000067F000040020000E000000026C000__00000073AD3FE6B8\n000000067F000040020000E0000000268000-000000067F000040020000E000000026C000__000000914E3F38F0\n000000067F000040020000E0000000268000-000000067F000040020000E000000026C000__000000931B9A2710\n000000067F000040020000E000000026C000-000000067F000040020000E0000000270000__000000574B7FF240\n000000067F000040020000E000000026C000-000000067F000040020000E0000000270000__00000073AD3FE6B8\n000000067F000040020000E000000026C000-000000067F000040020000E0000000270000__000000914E3F38F0\n000000067F000040020000E000000026C000-000000067F000040020000E0000000270000__000000931B9A2710\n000000067F000040020000E000000026C925-000000067F000040020000E0000000275309__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000270000-000000067F000040020000E0000000274000__000000574B7FF240\n000000067F000040020000E0000000270000-000000067F000040020000E0000000274000__00000073AD3FE6B8\n000000067F000040020000E0000000270000-000000067F000040020000E0000000274000__000000914E3F38F0\n000000067F000040020000E0000000270000-000000067F000040020000E0000000274000__000000931B9A2710\n000000067F000040020000E0000000274000-000000067F000040020000E0000000278000__000000574B7FF240\n000000067F000040020000E0000000274000-000000067F000040020000E0000000278000__00000073AD3FE6B8\n000000067F000040020000E0000000274000-000000067F000040020000E0000000278000__000000914E3F38F0\n000000067F000040020000E0000000274000-000000067F000040020000E0000000278000__000000931B9A2710\n000000067F000040020000E0000000275309-000000067F000040020000E000000027DCE0__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000278000-000000067F000040020000E000000027C000__000000574B7FF240\n000000067F000040020000E0000000278000-000000067F000040020000E000000027C000__00000073AD3FE6B8\n000000067F000040020000E0000000278000-000000067F000040020000E000000027C000__000000914E3F38F0\n000000067F000040020000E0000000278000-000000067F000040020000E000000027C000__000000931B9A2710\n000000067F000040020000E000000027C000-000000067F000040020000E0000000280000__000000574B7FF240\n000000067F000040020000E000000027C000-000000067F000040020000E0000000280000__00000073AD3FE6B8\n000000067F000040020000E000000027C000-000000067F000040020000E0000000280000__000000914E3F38F0\n000000067F000040020000E000000027C000-000000067F000040020000E0000000280000__000000931B9A2710\n000000067F000040020000E000000027DCE0-000000067F000040020000E00000002866B7__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000280000-000000067F000040020000E0000000284000__000000574B7FF240\n000000067F000040020000E0000000280000-000000067F000040020000E0000000284000__00000073AD3FE6B8\n000000067F000040020000E0000000280000-000000067F000040020000E0000000284000__000000914E3F38F0\n000000067F000040020000E0000000280000-000000067F000040020000E0000000284000__000000931B9A2710\n000000067F000040020000E0000000284000-000000067F000040020000E0000000288000__000000574B7FF240\n000000067F000040020000E0000000284000-000000067F000040020000E0000000288000__00000073AD3FE6B8\n000000067F000040020000E0000000284000-000000067F000040020000E0000000288000__000000914E3F38F0\n000000067F000040020000E0000000284000-000000067F000040020000E0000000288000__000000931B9A2710\n000000067F000040020000E00000002866B7-000000067F000040020000E000000028F073__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000288000-000000067F000040020000E000000028C000__000000574B7FF240\n000000067F000040020000E0000000288000-000000067F000040020000E000000028C000__00000073AD3FE6B8\n000000067F000040020000E0000000288000-000000067F000040020000E000000028C000__000000914E3F38F0\n000000067F000040020000E0000000288000-000000067F000040020000E000000028C000__000000931B9A2710\n000000067F000040020000E000000028C000-000000067F000040020000E0000000290000__000000574B7FF240\n000000067F000040020000E000000028C000-000000067F000040020000E0000000290000__00000073AD3FE6B8\n000000067F000040020000E000000028C000-000000067F000040020000E0000000290000__000000914E3F38F0\n000000067F000040020000E000000028C000-000000067F000040020000E0000000290000__000000931B9A2710\n000000067F000040020000E000000028F073-000000067F000040020000E0000000297A3B__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000290000-000000067F000040020000E0000000294000__000000574B7FF240\n000000067F000040020000E0000000290000-000000067F000040020000E0000000294000__00000073AD3FE6B8\n000000067F000040020000E0000000290000-000000067F000040020000E0000000294000__000000914E3F38F0\n000000067F000040020000E0000000290000-000000067F000040020000E0000000294000__000000931B9A2710\n000000067F000040020000E0000000294000-000000067F000040020000E0000000298000__000000574B7FF240\n000000067F000040020000E0000000294000-000000067F000040020000E0000000298000__00000073AD3FE6B8\n000000067F000040020000E0000000294000-000000067F000040020000E0000000298000__000000914E3F38F0\n000000067F000040020000E0000000294000-000000067F000040020000E0000000298000__000000931B9A2710\n000000067F000040020000E0000000297A3B-000000067F000040020000E00000002A0430__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000298000-000000067F000040020000E000000029C000__000000574B7FF240\n000000067F000040020000E0000000298000-000000067F000040020000E000000029C000__00000073AD3FE6B8\n000000067F000040020000E0000000298000-000000067F000040020000E000000029C000__000000914E3F38F0\n000000067F000040020000E0000000298000-000000067F000040020000E000000029C000__000000931B9A2710\n000000067F000040020000E000000029C000-000000067F000040020000E00000002A0000__000000574B7FF240\n000000067F000040020000E000000029C000-000000067F000040020000E00000002A0000__00000073AD3FE6B8\n000000067F000040020000E000000029C000-000000067F000040020000E00000002A0000__000000914E3F38F0\n000000067F000040020000E000000029C000-000000067F000040020000E00000002A0000__000000931B9A2710\n000000067F000040020000E00000002A0000-000000067F000040020000E00000002A4000__000000574B7FF240\n000000067F000040020000E00000002A0000-000000067F000040020000E00000002A4000__00000073AD3FE6B8\n000000067F000040020000E00000002A0000-000000067F000040020000E00000002A4000__000000914E3F38F0\n000000067F000040020000E00000002A0000-000000067F000040020000E00000002A4000__000000931B9A2710\n000000067F000040020000E00000002A0430-000000067F000040020000E00000002A8E24__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E00000002A4000-000000067F000040020000E00000002A8000__000000574B7FF240\n000000067F000040020000E00000002A4000-000000067F000040020000E00000002A8000__00000073AD3FE6B8\n000000067F000040020000E00000002A4000-000000067F000040020000E00000002A8000__000000914E3F38F0\n000000067F000040020000E00000002A4000-000000067F000040020000E00000002A8000__000000931B9A2710\n000000067F000040020000E00000002A8000-000000067F000040020000E00000002AC000__000000574B7FF240\n000000067F000040020000E00000002A8000-000000067F000040020000E00000002AC000__00000073AD3FE6B8\n000000067F000040020000E00000002A8000-000000067F000040020000E00000002AC000__000000914E3F38F0\n000000067F000040020000E00000002A8000-000000067F000040020000E00000002AC000__000000931B9A2710\n000000067F000040020000E00000002A8E24-000000067F000040020000E00000002B180A__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E00000002AC000-000000067F000040020000E00000002B0000__000000574B7FF240\n000000067F000040020000E00000002AC000-000000067F000040020000E00000002B0000__00000073AD3FE6B8\n000000067F000040020000E00000002AC000-000000067F000040020000E00000002B0000__000000914E3F38F0\n000000067F000040020000E00000002AC000-000000067F000040020000E00000002B0000__000000931B9A2710\n000000067F000040020000E00000002B0000-000000067F000040020000E00000002B4000__000000574B7FF240\n000000067F000040020000E00000002B0000-000000067F000040020000E00000002B4000__00000073AD3FE6B8\n000000067F000040020000E00000002B0000-000000067F000040020000E00000002B4000__000000914E3F38F0\n000000067F000040020000E00000002B0000-000000067F000040020000E00000002B4000__000000931B9A2710\n000000067F000040020000E00000002B180A-000000067F000040020000E00000002BA1E2__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E00000002B4000-000000067F000040020000E00000002B8000__000000574B7FF240\n000000067F000040020000E00000002B4000-000000067F000040020000E00000002B8000__00000073AD3FE6B8\n000000067F000040020000E00000002B4000-000000067F000040020000E00000002B8000__000000914E3F38F0\n000000067F000040020000E00000002B4000-000000067F000040020000E00000002B8000__000000931B9A2710\n000000067F000040020000E00000002B8000-000000067F000040020000E00000002BC000__000000574B7FF240\n000000067F000040020000E00000002B8000-000000067F000040020000E00000002BC000__00000073AD3FE6B8\n000000067F000040020000E00000002B8000-000000067F000040020000E00000002BC000__000000914E3F38F0\n000000067F000040020000E00000002B8000-000000067F000040020000E00000002BC000__000000931B9A2710\n000000067F000040020000E00000002BA1E2-000000067F000040020000E00000002C2BB0__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E00000002BC000-000000067F000040020000E00000002C0000__000000574B7FF240\n000000067F000040020000E00000002BC000-000000067F000040020000E00000002C0000__00000073AD3FE6B8\n000000067F000040020000E00000002BC000-000000067F000040020000E00000002C0000__000000914E3F38F0\n000000067F000040020000E00000002BC000-000000067F000040020000E00000002C0000__000000931B9A2710\n000000067F000040020000E00000002C0000-000000067F000040020000E00000002C4000__000000574B7FF240\n000000067F000040020000E00000002C0000-000000067F000040020000E00000002C4000__00000073AD3FE6B8\n000000067F000040020000E00000002C0000-000000067F000040020000E00000002C4000__000000914E3F38F0\n000000067F000040020000E00000002C0000-000000067F000040020000E00000002C4000__000000931B9A2710\n000000067F000040020000E00000002C2BB0-000000067F000040020000E00000002CB579__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E00000002C4000-000000067F000040020000E00000002C8000__000000574B7FF240\n000000067F000040020000E00000002C4000-000000067F000040020000E00000002C8000__00000073AD3FE6B8\n000000067F000040020000E00000002C4000-000000067F000040020000E00000002C8000__000000914E3F38F0\n000000067F000040020000E00000002C4000-000000067F000040020000E00000002C8000__000000931B9A2710\n000000067F000040020000E00000002C8000-000000067F000040020000E00000002CC000__000000574B7FF240\n000000067F000040020000E00000002C8000-000000067F000040020000E00000002CC000__00000073AD3FE6B8\n000000067F000040020000E00000002C8000-000000067F000040020000E00000002CC000__000000914E3F38F0\n000000067F000040020000E00000002C8000-000000067F000040020000E00000002CC000__000000931B9A2710\n000000067F000040020000E00000002CB579-000000067F000040020000E00000002D3F48__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E00000002CC000-000000067F000040020000E00000002D0000__000000574B7FF240\n000000067F000040020000E00000002CC000-000000067F000040020000E00000002D0000__00000073AD3FE6B8\n000000067F000040020000E00000002CC000-000000067F000040020000E00000002D0000__000000914E3F38F0\n000000067F000040020000E00000002CC000-000000067F000040020000E00000002D0000__000000931B9A2710\n000000067F000040020000E00000002D0000-000000067F000040020000E00000002D4000__000000574B7FF240\n000000067F000040020000E00000002D0000-000000067F000040020000E00000002D4000__00000073AD3FE6B8\n000000067F000040020000E00000002D0000-000000067F000040020000E00000002D4000__000000914E3F38F0\n000000067F000040020000E00000002D0000-000000067F000040020000E00000002D4000__000000931B9A2710\n000000067F000040020000E00000002D3F48-000000067F000040020000E00000002DC941__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E00000002D4000-000000067F000040020000E00000002D8000__000000574B7FF240\n000000067F000040020000E00000002D4000-000000067F000040020000E00000002D8000__00000073AD3FE6B8\n000000067F000040020000E00000002D4000-000000067F000040020000E00000002D8000__000000914E3F38F0\n000000067F000040020000E00000002D4000-000000067F000040020000E00000002D8000__000000931B9A2710\n000000067F000040020000E00000002D8000-000000067F000040020000E00000002DC000__000000574B7FF240\n000000067F000040020000E00000002D8000-000000067F000040020000E00000002DC000__00000073AD3FE6B8\n000000067F000040020000E00000002D8000-000000067F000040020000E00000002DC000__000000914E3F38F0\n000000067F000040020000E00000002D8000-000000067F000040020000E00000002DC000__000000931B9A2710\n000000067F000040020000E00000002DC000-000000067F000040020000E00000002E0000__000000574B7FF240\n000000067F000040020000E00000002DC000-000000067F000040020000E00000002E0000__00000073AD3FE6B8\n000000067F000040020000E00000002DC000-000000067F000040020000E00000002E0000__000000914E3F38F0\n000000067F000040020000E00000002DC000-000000067F000040020000E00000002E0000__000000931B9A2710\n000000067F000040020000E00000002DC941-000000067F000040020000E00000002E532B__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E00000002E0000-000000067F000040020000E00000002E4000__000000574B7FF240\n000000067F000040020000E00000002E0000-000000067F000040020000E00000002E4000__00000073AD3FE6B8\n000000067F000040020000E00000002E0000-000000067F000040020000E00000002E4000__000000914E3F38F0\n000000067F000040020000E00000002E0000-000000067F000040020000E00000002E4000__000000931B9A2710\n000000067F000040020000E00000002E4000-000000067F000040020000E00000002E8000__000000574B7FF240\n000000067F000040020000E00000002E4000-000000067F000040020000E00000002E8000__00000073AD3FE6B8\n000000067F000040020000E00000002E4000-000000067F000040020000E00000002E8000__000000914E3F38F0\n000000067F000040020000E00000002E4000-000000067F000040020000E00000002E8000__000000931B9A2710\n000000067F000040020000E00000002E532B-000000067F000040020000E00000002EDD10__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E00000002E8000-000000067F000040020000E00000002EC000__000000574B7FF240\n000000067F000040020000E00000002E8000-000000067F000040020000E00000002EC000__00000073AD3FE6B8\n000000067F000040020000E00000002E8000-000000067F000040020000E00000002EC000__000000914E3F38F0\n000000067F000040020000E00000002E8000-000000067F000040020000E00000002EC000__000000931B9A2710\n000000067F000040020000E00000002EC000-000000067F000040020000E00000002F0000__000000574B7FF240\n000000067F000040020000E00000002EC000-000000067F000040020000E00000002F0000__00000073AD3FE6B8\n000000067F000040020000E00000002EC000-000000067F000040020000E00000002F0000__000000914E3F38F0\n000000067F000040020000E00000002EC000-000000067F000040020000E00000002F0000__000000931B9A2710\n000000067F000040020000E00000002EDD10-000000067F000040020000E00000002F66E2__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E00000002F0000-000000067F000040020000E00000002F4000__000000574B7FF240\n000000067F000040020000E00000002F0000-000000067F000040020000E00000002F4000__00000073AD3FE6B8\n000000067F000040020000E00000002F0000-000000067F000040020000E00000002F4000__000000914E3F38F0\n000000067F000040020000E00000002F0000-000000067F000040020000E00000002F4000__000000931B9A2710\n000000067F000040020000E00000002F4000-000000067F000040020000E00000002F8000__000000574B7FF240\n000000067F000040020000E00000002F4000-000000067F000040020000E00000002F8000__00000073AD3FE6B8\n000000067F000040020000E00000002F4000-000000067F000040020000E00000002F8000__000000914E3F38F0\n000000067F000040020000E00000002F4000-000000067F000040020000E00000002F8000__000000931B9A2710\n000000067F000040020000E00000002F66E2-000000067F000040020000E00000002FF0B3__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E00000002F8000-000000067F000040020000E00000002FC000__000000574B7FF240\n000000067F000040020000E00000002F8000-000000067F000040020000E00000002FC000__00000073AD3FE6B8\n000000067F000040020000E00000002F8000-000000067F000040020000E00000002FC000__000000914E3F38F0\n000000067F000040020000E00000002F8000-000000067F000040020000E00000002FC000__000000931B9A2710\n000000067F000040020000E00000002FC000-000000067F000040020000E0000000300000__000000574B7FF240\n000000067F000040020000E00000002FC000-000000067F000040020000E0000000300000__00000073AD3FE6B8\n000000067F000040020000E00000002FC000-000000067F000040020000E0000000300000__000000914E3F38F0\n000000067F000040020000E00000002FC000-000000067F000040020000E0000000300000__000000931B9A2710\n000000067F000040020000E00000002FF0B3-000000067F000040020000E0000000307A76__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000300000-000000067F000040020000E0000000304000__000000574B7FF240\n000000067F000040020000E0000000300000-000000067F000040020000E0000000304000__00000073AD3FE6B8\n000000067F000040020000E0000000300000-000000067F000040020000E0000000304000__000000914E3F38F0\n000000067F000040020000E0000000300000-000000067F000040020000E0000000304000__000000931B9A2710\n000000067F000040020000E0000000304000-000000067F000040020000E0000000308000__000000574B7FF240\n000000067F000040020000E0000000304000-000000067F000040020000E0000000308000__00000073AD3FE6B8\n000000067F000040020000E0000000304000-000000067F000040020000E0000000308000__000000914E3F38F0\n000000067F000040020000E0000000304000-000000067F000040020000E0000000308000__000000931B9A2710\n000000067F000040020000E0000000307A76-000000067F000040020000E0000000310449__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000308000-000000067F000040020000E000000030C000__000000574B7FF240\n000000067F000040020000E0000000308000-000000067F000040020000E000000030C000__00000073AD3FE6B8\n000000067F000040020000E0000000308000-000000067F000040020000E000000030C000__000000914E3F38F0\n000000067F000040020000E0000000308000-000000067F000040020000E000000030C000__000000931B9A2710\n000000067F000040020000E000000030C000-000000067F000040020000E0000000310000__000000574B7FF240\n000000067F000040020000E000000030C000-000000067F000040020000E0000000310000__00000073AD3FE6B8\n000000067F000040020000E000000030C000-000000067F000040020000E0000000310000__000000914E3F38F0\n000000067F000040020000E000000030C000-000000067F000040020000E0000000310000__000000931B9A2710\n000000067F000040020000E0000000310000-000000067F000040020000E0000000314000__000000574B7FF240\n000000067F000040020000E0000000310000-000000067F000040020000E0000000314000__00000073AD3FE6B8\n000000067F000040020000E0000000310000-000000067F000040020000E0000000314000__000000914E3F38F0\n000000067F000040020000E0000000310000-000000067F000040020000E0000000314000__000000931B9A2710\n000000067F000040020000E0000000310449-000000067F000040020000E0000000318E4F__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000314000-000000067F000040020000E0000000318000__000000574B7FF240\n000000067F000040020000E0000000314000-000000067F000040020000E0000000318000__00000073AD3FE6B8\n000000067F000040020000E0000000314000-000000067F000040020000E0000000318000__000000914E3F38F0\n000000067F000040020000E0000000314000-000000067F000040020000E0000000318000__000000931B9A2710\n000000067F000040020000E0000000318000-000000067F000040020000E000000031C000__000000574B7FF240\n000000067F000040020000E0000000318000-000000067F000040020000E000000031C000__00000073AD3FE6B8\n000000067F000040020000E0000000318000-000000067F000040020000E000000031C000__000000914E3F38F0\n000000067F000040020000E0000000318000-000000067F000040020000E000000031C000__000000931B9A2710\n000000067F000040020000E0000000318E4F-000000067F000040020000E0000000321836__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E000000031C000-000000067F000040020000E0000000320000__000000574B7FF240\n000000067F000040020000E000000031C000-000000067F000040020000E0000000320000__00000073AD3FE6B8\n000000067F000040020000E000000031C000-000000067F000040020000E0000000320000__000000914E3F38F0\n000000067F000040020000E000000031C000-000000067F000040020000E0000000320000__000000931B9A2710\n000000067F000040020000E0000000320000-000000067F000040020000E0000000324000__000000574B7FF240\n000000067F000040020000E0000000320000-000000067F000040020000E0000000324000__00000073AD3FE6B8\n000000067F000040020000E0000000320000-000000067F000040020000E0000000324000__000000914E3F38F0\n000000067F000040020000E0000000320000-000000067F000040020000E0000000324000__000000931B9A2710\n000000067F000040020000E0000000321836-000000067F000040020000E000000032A20E__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000324000-000000067F000040020000E0000000328000__000000574B7FF240\n000000067F000040020000E0000000324000-000000067F000040020000E0000000328000__00000073AD3FE6B8\n000000067F000040020000E0000000324000-000000067F000040020000E0000000328000__000000914E3F38F0\n000000067F000040020000E0000000324000-000000067F000040020000E0000000328000__000000931B9A2710\n000000067F000040020000E0000000328000-000000067F000040020000E000000032C000__000000574B7FF240\n000000067F000040020000E0000000328000-000000067F000040020000E000000032C000__00000073AD3FE6B8\n000000067F000040020000E0000000328000-000000067F000040020000E000000032C000__000000914E3F38F0\n000000067F000040020000E0000000328000-000000067F000040020000E000000032C000__000000931B9A2710\n000000067F000040020000E000000032A20E-000000067F000040020000E0000000332BDA__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E000000032C000-000000067F000040020000E0000000330000__000000574B7FF240\n000000067F000040020000E000000032C000-000000067F000040020000E0000000330000__00000073AD3FE6B8\n000000067F000040020000E000000032C000-000000067F000040020000E0000000330000__000000914E3F38F0\n000000067F000040020000E000000032C000-000000067F000040020000E0000000330000__000000931B9A2710\n000000067F000040020000E0000000330000-000000067F000040020000E0000000334000__000000574B7FF240\n000000067F000040020000E0000000330000-000000067F000040020000E0000000334000__00000073AD3FE6B8\n000000067F000040020000E0000000330000-000000067F000040020000E0000000334000__000000914E3F38F0\n000000067F000040020000E0000000330000-000000067F000040020000E0000000334000__000000931B9A2710\n000000067F000040020000E0000000332BDA-000000067F000040020000E000000033B5AD__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000334000-000000067F000040020000E0000000338000__000000574B7FF240\n000000067F000040020000E0000000334000-000000067F000040020000E0000000338000__00000073AD3FE6B8\n000000067F000040020000E0000000334000-000000067F000040020000E0000000338000__000000914E3F38F0\n000000067F000040020000E0000000334000-000000067F000040020000E0000000338000__000000931B9A2710\n000000067F000040020000E0000000338000-000000067F000040020000E000000033C000__000000574B7FF240\n000000067F000040020000E0000000338000-000000067F000040020000E000000033C000__00000073AD3FE6B8\n000000067F000040020000E0000000338000-000000067F000040020000E000000033C000__000000914E3F38F0\n000000067F000040020000E0000000338000-000000067F000040020000E000000033C000__000000931B9A2710\n000000067F000040020000E000000033B5AD-000000067F000040020000E0000000343F77__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E000000033C000-000000067F000040020000E0000000340000__000000574B7FF240\n000000067F000040020000E000000033C000-000000067F000040020000E0000000340000__00000073AD3FE6B8\n000000067F000040020000E000000033C000-000000067F000040020000E0000000340000__000000914E3F38F0\n000000067F000040020000E000000033C000-000000067F000040020000E0000000340000__000000931B9A2710\n000000067F000040020000E0000000340000-000000067F000040020000E0000000344000__000000574B7FF240\n000000067F000040020000E0000000340000-000000067F000040020000E0000000344000__00000073AD3FE6B8\n000000067F000040020000E0000000340000-000000067F000040020000E0000000344000__000000914E3F38F0\n000000067F000040020000E0000000340000-000000067F000040020000E0000000344000__000000931B9A2710\n000000067F000040020000E0000000343F77-000000067F000040020000E000000034C95A__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000344000-000000067F000040020000E0000000348000__000000574B7FF240\n000000067F000040020000E0000000344000-000000067F000040020000E0000000348000__00000073AD3FE6B8\n000000067F000040020000E0000000344000-000000067F000040020000E0000000348000__000000914E3F38F0\n000000067F000040020000E0000000344000-000000067F000040020000E0000000348000__000000931B9A2710\n000000067F000040020000E0000000348000-000000067F000040020000E000000034C000__000000574B7FF240\n000000067F000040020000E0000000348000-000000067F000040020000E000000034C000__00000073AD3FE6B8\n000000067F000040020000E0000000348000-000000067F000040020000E000000034C000__000000914E3F38F0\n000000067F000040020000E0000000348000-000000067F000040020000E000000034C000__000000931B9A2710\n000000067F000040020000E000000034C000-000000067F000040020000E0000000350000__000000574B7FF240\n000000067F000040020000E000000034C000-000000067F000040020000E0000000350000__00000073AD3FE6B8\n000000067F000040020000E000000034C000-000000067F000040020000E0000000350000__000000914E3F38F0\n000000067F000040020000E000000034C000-000000067F000040020000E0000000350000__000000931B9A2710\n000000067F000040020000E000000034C95A-000000067F000040020000E0000000355348__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000350000-000000067F000040020000E0000000354000__000000574B7FF240\n000000067F000040020000E0000000350000-000000067F000040020000E0000000354000__00000073AD3FE6B8\n000000067F000040020000E0000000350000-000000067F000040020000E0000000354000__000000914E3F38F0\n000000067F000040020000E0000000350000-000000067F000040020000E0000000354000__000000931B9A2710\n000000067F000040020000E0000000354000-000000067F000040020000E0000000358000__000000574B7FF240\n000000067F000040020000E0000000354000-000000067F000040020000E0000000358000__00000073AD3FE6B8\n000000067F000040020000E0000000354000-000000067F000040020000E0000000358000__000000914E3F38F0\n000000067F000040020000E0000000354000-000000067F000040020000E0000000358000__000000931B9A2710\n000000067F000040020000E0000000355348-000000067F000040020000E000000035DD35__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000358000-000000067F000040020000E000000035C000__000000574B7FF240\n000000067F000040020000E0000000358000-000000067F000040020000E000000035C000__00000073AD3FE6B8\n000000067F000040020000E0000000358000-000000067F000040020000E000000035C000__000000914E3F38F0\n000000067F000040020000E0000000358000-000000067F000040020000E000000035C000__000000931B9A2710\n000000067F000040020000E000000035C000-000000067F000040020000E0000000360000__000000574B7FF240\n000000067F000040020000E000000035C000-000000067F000040020000E0000000360000__00000073AD3FE6B8\n000000067F000040020000E000000035C000-000000067F000040020000E0000000360000__000000914E3F38F0\n000000067F000040020000E000000035C000-000000067F000040020000E0000000360000__000000931B9A2710\n000000067F000040020000E000000035DD35-000000067F000040020000E000000036671D__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000360000-000000067F000040020000E0000000364000__000000574B7FF240\n000000067F000040020000E0000000360000-000000067F000040020000E0000000364000__00000073AD3FE6B8\n000000067F000040020000E0000000360000-000000067F000040020000E0000000364000__000000914E3F38F0\n000000067F000040020000E0000000360000-000000067F000040020000E0000000364000__000000931B9A2710\n000000067F000040020000E0000000364000-000000067F000040020000E0000000368000__000000574B7FF240\n000000067F000040020000E0000000364000-000000067F000040020000E0000000368000__00000073AD3FE6B8\n000000067F000040020000E0000000364000-000000067F000040020000E0000000368000__000000914E3F38F0\n000000067F000040020000E0000000364000-000000067F000040020000E0000000368000__000000931B9A2710\n000000067F000040020000E000000036671D-000000067F000040020000E000000036F0F0__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000368000-000000067F000040020000E000000036C000__000000574B7FF240\n000000067F000040020000E0000000368000-000000067F000040020000E000000036C000__00000073AD3FE6B8\n000000067F000040020000E0000000368000-000000067F000040020000E000000036C000__000000914E3F38F0\n000000067F000040020000E0000000368000-000000067F000040020000E000000036C000__000000931B9A2710\n000000067F000040020000E000000036C000-000000067F000040020000E0000000370000__000000574B7FF240\n000000067F000040020000E000000036C000-000000067F000040020000E0000000370000__00000073AD3FE6B8\n000000067F000040020000E000000036C000-000000067F000040020000E0000000370000__000000914E3F38F0\n000000067F000040020000E000000036C000-000000067F000040020000E0000000370000__000000931B9A2710\n000000067F000040020000E000000036F0F0-000000067F000040020000E0000000377AB4__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000370000-000000067F000040020000E0000000374000__000000574B7FF240\n000000067F000040020000E0000000370000-000000067F000040020000E0000000374000__00000073AD3FE6B8\n000000067F000040020000E0000000370000-000000067F000040020000E0000000374000__000000914E3F38F0\n000000067F000040020000E0000000370000-000000067F000040020000E0000000374000__000000931B9A2710\n000000067F000040020000E0000000374000-000000067F000040020000E0000000378000__000000574B7FF240\n000000067F000040020000E0000000374000-000000067F000040020000E0000000378000__00000073AD3FE6B8\n000000067F000040020000E0000000374000-000000067F000040020000E0000000378000__000000914E3F38F0\n000000067F000040020000E0000000374000-000000067F000040020000E0000000378000__000000931B9A2710\n000000067F000040020000E0000000377AB4-000000067F000040020000E000000038047C__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000378000-000000067F000040020000E000000037C000__000000574B7FF240\n000000067F000040020000E0000000378000-000000067F000040020000E000000037C000__00000073AD3FE6B8\n000000067F000040020000E0000000378000-000000067F000040020000E000000037C000__000000914E3F38F0\n000000067F000040020000E0000000378000-000000067F000040020000E000000037C000__000000931B9A2710\n000000067F000040020000E000000037C000-000000067F000040020000E0000000380000__000000574B7FF240\n000000067F000040020000E000000037C000-000000067F000040020000E0000000380000__00000073AD3FE6B8\n000000067F000040020000E000000037C000-000000067F000040020000E0000000380000__000000914E3F38F0\n000000067F000040020000E000000037C000-000000067F000040020000E0000000380000__000000931B9A2710\n000000067F000040020000E0000000380000-000000067F000040020000E0000000384000__000000574B7FF240\n000000067F000040020000E0000000380000-000000067F000040020000E0000000384000__00000073AD3FE6B8\n000000067F000040020000E0000000380000-000000067F000040020000E0000000384000__000000914E3F38F0\n000000067F000040020000E0000000380000-000000067F000040020000E0000000384000__000000931B9A2710\n000000067F000040020000E000000038047C-000000067F000040020000E0000000388E68__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000384000-000000067F000040020000E0000000388000__000000574B7FF240\n000000067F000040020000E0000000384000-000000067F000040020000E0000000388000__00000073AD3FE6B8\n000000067F000040020000E0000000384000-000000067F000040020000E0000000388000__000000914E3F38F0\n000000067F000040020000E0000000384000-000000067F000040020000E0000000388000__000000931B9A2710\n000000067F000040020000E0000000388000-000000067F000040020000E000000038C000__000000574B7FF240\n000000067F000040020000E0000000388000-000000067F000040020000E000000038C000__00000073AD3FE6B8\n000000067F000040020000E0000000388000-000000067F000040020000E000000038C000__000000914E3F38F0\n000000067F000040020000E0000000388000-000000067F000040020000E000000038C000__000000931B9A2710\n000000067F000040020000E0000000388E68-000000067F000040020000E0000000391852__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E000000038C000-000000067F000040020000E0000000390000__000000574B7FF240\n000000067F000040020000E000000038C000-000000067F000040020000E0000000390000__00000073AD3FE6B8\n000000067F000040020000E000000038C000-000000067F000040020000E0000000390000__000000914E3F38F0\n000000067F000040020000E000000038C000-000000067F000040020000E0000000390000__000000931B9A2710\n000000067F000040020000E0000000390000-000000067F000040020000E0000000394000__000000574B7FF240\n000000067F000040020000E0000000390000-000000067F000040020000E0000000394000__00000073AD3FE6B8\n000000067F000040020000E0000000390000-000000067F000040020000E0000000394000__000000914E3F38F0\n000000067F000040020000E0000000390000-000000067F000040020000E0000000394000__000000931B9A2710\n000000067F000040020000E0000000391852-000000067F000040020000E000000039A23F__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000394000-000000067F000040020000E0000000398000__000000574B7FF240\n000000067F000040020000E0000000394000-000000067F000040020000E0000000398000__00000073AD3FE6B8\n000000067F000040020000E0000000394000-000000067F000040020000E0000000398000__000000914E3F38F0\n000000067F000040020000E0000000394000-000000067F000040020000E0000000398000__000000931B9A2710\n000000067F000040020000E0000000398000-000000067F000040020000E000000039C000__000000574B7FF240\n000000067F000040020000E0000000398000-000000067F000040020000E000000039C000__00000073AD3FE6B8\n000000067F000040020000E0000000398000-000000067F000040020000E000000039C000__000000914E3F38F0\n000000067F000040020000E0000000398000-000000067F000040020000E000000039C000__000000931B9A2710\n000000067F000040020000E000000039A23F-000000067F000040020000E00000003A2C1E__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E000000039C000-000000067F000040020000E00000003A0000__000000574B7FF240\n000000067F000040020000E000000039C000-000000067F000040020000E00000003A0000__00000073AD3FE6B8\n000000067F000040020000E000000039C000-000000067F000040020000E00000003A0000__000000914E3F38F0\n000000067F000040020000E000000039C000-000000067F000040020000E00000003A0000__000000931B9A2710\n000000067F000040020000E00000003A0000-000000067F000040020000E00000003A4000__000000574B7FF240\n000000067F000040020000E00000003A0000-000000067F000040020000E00000003A4000__00000073AD3FE6B8\n000000067F000040020000E00000003A0000-000000067F000040020000E00000003A4000__000000914E3F38F0\n000000067F000040020000E00000003A0000-000000067F000040020000E00000003A4000__000000931B9A2710\n000000067F000040020000E00000003A2C1E-000000067F000040020000E00000003AB5EC__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E00000003A4000-000000067F000040020000E00000003A8000__000000574B7FF240\n000000067F000040020000E00000003A4000-000000067F000040020000E00000003A8000__00000073AD3FE6B8\n000000067F000040020000E00000003A4000-000000067F000040020000E00000003A8000__000000914E3F38F0\n000000067F000040020000E00000003A4000-000000067F000040020000E00000003A8000__000000931B9A2710\n000000067F000040020000E00000003A8000-000000067F000040020000E00000003AC000__000000574B7FF240\n000000067F000040020000E00000003A8000-000000067F000040020000E00000003AC000__00000073AD3FE6B8\n000000067F000040020000E00000003A8000-000000067F000040020000E00000003AC000__000000914E3F38F0\n000000067F000040020000E00000003A8000-000000067F000040020000E00000003AC000__000000931B9A2710\n000000067F000040020000E00000003AB5EC-000000067F000040020000E00000003B3FB2__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E00000003AC000-000000067F000040020000E00000003B0000__000000574B7FF240\n000000067F000040020000E00000003AC000-000000067F000040020000E00000003B0000__00000073AD3FE6B8\n000000067F000040020000E00000003AC000-000000067F000040020000E00000003B0000__000000914E3F38F0\n000000067F000040020000E00000003AC000-000000067F000040020000E00000003B0000__000000931B9A2710\n000000067F000040020000E00000003B0000-000000067F000040020000E00000003B4000__000000574B7FF240\n000000067F000040020000E00000003B0000-000000067F000040020000E00000003B4000__00000073AD3FE6B8\n000000067F000040020000E00000003B0000-000000067F000040020000E00000003B4000__000000914E3F38F0\n000000067F000040020000E00000003B0000-000000067F000040020000E00000003B4000__000000931B9A2710\n000000067F000040020000E00000003B3FB2-000000067F000040020000E00000003BC972__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E00000003B4000-000000067F000040020000E00000003B8000__000000574B7FF240\n000000067F000040020000E00000003B4000-000000067F000040020000E00000003B8000__00000073AD3FE6B8\n000000067F000040020000E00000003B4000-000000067F000040020000E00000003B8000__000000914E3F38F0\n000000067F000040020000E00000003B4000-000000067F000040020000E00000003B8000__000000931B9A2710\n000000067F000040020000E00000003B8000-000000067F000040020000E00000003BC000__000000574B7FF240\n000000067F000040020000E00000003B8000-000000067F000040020000E00000003BC000__00000073AD3FE6B8\n000000067F000040020000E00000003B8000-000000067F000040020000E00000003BC000__000000914E3F38F0\n000000067F000040020000E00000003B8000-000000067F000040020000E00000003BC000__000000931B9A2710\n000000067F000040020000E00000003BC000-000000067F000040020000E00000003C0000__000000574B7FF240\n000000067F000040020000E00000003BC000-000000067F000040020000E00000003C0000__00000073AD3FE6B8\n000000067F000040020000E00000003BC000-000000067F000040020000E00000003C0000__000000914E3F38F0\n000000067F000040020000E00000003BC000-000000067F000040020000E00000003C0000__000000931B9A2710\n000000067F000040020000E00000003BC972-000000067F000040020000E00000003C5369__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E00000003C0000-000000067F000040020000E00000003C4000__000000574B7FF240\n000000067F000040020000E00000003C0000-000000067F000040020000E00000003C4000__00000073AD3FE6B8\n000000067F000040020000E00000003C0000-000000067F000040020000E00000003C4000__000000914E3F38F0\n000000067F000040020000E00000003C0000-000000067F000040020000E00000003C4000__000000931B9A2710\n000000067F000040020000E00000003C4000-000000067F000040020000E00000003C8000__000000574B7FF240\n000000067F000040020000E00000003C4000-000000067F000040020000E00000003C8000__00000073AD3FE6B8\n000000067F000040020000E00000003C4000-000000067F000040020000E00000003C8000__000000914E3F38F0\n000000067F000040020000E00000003C4000-000000067F000040020000E00000003C8000__000000931B9A2710\n000000067F000040020000E00000003C5369-000000067F000040020000E00000003CDD67__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E00000003C8000-000000067F000040020000E00000003CC000__000000574B7FF240\n000000067F000040020000E00000003C8000-000000067F000040020000E00000003CC000__00000073AD3FE6B8\n000000067F000040020000E00000003C8000-000000067F000040020000E00000003CC000__000000914E3F38F0\n000000067F000040020000E00000003C8000-000000067F000040020000E00000003CC000__000000931B9A2710\n000000067F000040020000E00000003CC000-000000067F000040020000E00000003D0000__000000574B7FF240\n000000067F000040020000E00000003CC000-000000067F000040020000E00000003D0000__00000073AD3FE6B8\n000000067F000040020000E00000003CC000-000000067F000040020000E00000003D0000__000000914E3F38F0\n000000067F000040020000E00000003CC000-000000067F000040020000E00000003D0000__000000931B9A2710\n000000067F000040020000E00000003CDD67-000000067F000040020000E00000003D675B__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E00000003D0000-000000067F000040020000E00000003D4000__000000574B7FF240\n000000067F000040020000E00000003D0000-000000067F000040020000E00000003D4000__00000073AD3FE6B8\n000000067F000040020000E00000003D0000-000000067F000040020000E00000003D4000__000000914E3F38F0\n000000067F000040020000E00000003D0000-000000067F000040020000E00000003D4000__000000931B9A2710\n000000067F000040020000E00000003D4000-000000067F000040020000E00000003D8000__000000574B7FF240\n000000067F000040020000E00000003D4000-000000067F000040020000E00000003D8000__00000073AD3FE6B8\n000000067F000040020000E00000003D4000-000000067F000040020000E00000003D8000__000000914E3F38F0\n000000067F000040020000E00000003D4000-000000067F000040020000E00000003D8000__000000931B9A2710\n000000067F000040020000E00000003D675B-000000067F000040020000E00000003DF132__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E00000003D8000-000000067F000040020000E00000003DC000__000000574B7FF240\n000000067F000040020000E00000003D8000-000000067F000040020000E00000003DC000__00000073AD3FE6B8\n000000067F000040020000E00000003D8000-000000067F000040020000E00000003DC000__000000914E3F38F0\n000000067F000040020000E00000003D8000-000000067F000040020000E00000003DC000__000000931B9A2710\n000000067F000040020000E00000003DC000-000000067F000040020000E00000003E0000__000000574B7FF240\n000000067F000040020000E00000003DC000-000000067F000040020000E00000003E0000__00000073AD3FE6B8\n000000067F000040020000E00000003DC000-000000067F000040020000E00000003E0000__000000914E3F38F0\n000000067F000040020000E00000003DC000-000000067F000040020000E00000003E0000__000000931B9A2710\n000000067F000040020000E00000003DF132-000000067F000040020000E00000003E7AFE__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E00000003E0000-000000067F000040020000E00000003E4000__000000574B7FF240\n000000067F000040020000E00000003E0000-000000067F000040020000E00000003E4000__00000073AD3FE6B8\n000000067F000040020000E00000003E0000-000000067F000040020000E00000003E4000__000000914E3F38F0\n000000067F000040020000E00000003E0000-000000067F000040020000E00000003E4000__000000931B9A2710\n000000067F000040020000E00000003E4000-000000067F000040020000E00000003E8000__000000574B7FF240\n000000067F000040020000E00000003E4000-000000067F000040020000E00000003E8000__00000073AD3FE6B8\n000000067F000040020000E00000003E4000-000000067F000040020000E00000003E8000__000000914E3F38F0\n000000067F000040020000E00000003E4000-000000067F000040020000E00000003E8000__000000931B9A2710\n000000067F000040020000E00000003E7AFE-000000067F000040020000E00000003F04C7__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E00000003E8000-000000067F000040020000E00000003EC000__000000574B7FF240\n000000067F000040020000E00000003E8000-000000067F000040020000E00000003EC000__00000073AD3FE6B8\n000000067F000040020000E00000003E8000-000000067F000040020000E00000003EC000__000000914E3F38F0\n000000067F000040020000E00000003E8000-000000067F000040020000E00000003EC000__000000931B9A2710\n000000067F000040020000E00000003EC000-000000067F000040020000E00000003F0000__000000574B7FF240\n000000067F000040020000E00000003EC000-000000067F000040020000E00000003F0000__00000073AD3FE6B8\n000000067F000040020000E00000003EC000-000000067F000040020000E00000003F0000__000000914E3F38F0\n000000067F000040020000E00000003EC000-000000067F000040020000E00000003F0000__000000931B9A2710\n000000067F000040020000E00000003F0000-000000067F000040020000E00000003F4000__000000574B7FF240\n000000067F000040020000E00000003F0000-000000067F000040020000E00000003F4000__00000073AD3FE6B8\n000000067F000040020000E00000003F0000-000000067F000040020000E00000003F4000__000000914E3F38F0\n000000067F000040020000E00000003F0000-000000067F000040020000E00000003F4000__000000931B9A2710\n000000067F000040020000E00000003F04C7-000000067F000040020000E00000003F8E92__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E00000003F4000-000000067F000040020000E00000003F8000__000000574B7FF240\n000000067F000040020000E00000003F4000-000000067F000040020000E00000003F8000__00000073AD3FE6B8\n000000067F000040020000E00000003F4000-000000067F000040020000E00000003F8000__000000914E3F38F0\n000000067F000040020000E00000003F4000-000000067F000040020000E00000003F8000__000000931B9A2710\n000000067F000040020000E00000003F8000-000000067F000040020000E00000003FC000__000000574B7FF240\n000000067F000040020000E00000003F8000-000000067F000040020000E00000003FC000__00000073AD3FE6B8\n000000067F000040020000E00000003F8000-000000067F000040020000E00000003FC000__000000914E3F38F0\n000000067F000040020000E00000003F8000-000000067F000040020000E00000003FC000__000000931B9A2710\n000000067F000040020000E00000003F8E92-000000067F000040020000E000000040188E__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E00000003FC000-000000067F000040020000E0000000400000__000000574B7FF240\n000000067F000040020000E00000003FC000-000000067F000040020000E0000000400000__00000073AD3FE6B8\n000000067F000040020000E00000003FC000-000000067F000040020000E0000000400000__000000914E3F38F0\n000000067F000040020000E00000003FC000-000000067F000040020000E0000000400000__000000931B9A2710\n000000067F000040020000E0000000400000-000000067F000040020000E0000000404000__000000574B7FF240\n000000067F000040020000E0000000400000-000000067F000040020000E0000000404000__00000073AD3FE6B8\n000000067F000040020000E0000000400000-000000067F000040020000E0000000404000__000000914E3F38F0\n000000067F000040020000E0000000400000-000000067F000040020000E0000000404000__000000931B9A2710\n000000067F000040020000E000000040188E-000000067F000040020000E000000040A288__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000404000-000000067F000040020000E0000000408000__000000574B7FF240\n000000067F000040020000E0000000404000-000000067F000040020000E0000000408000__00000073AD3FE6B8\n000000067F000040020000E0000000404000-000000067F000040020000E0000000408000__000000914E3F38F0\n000000067F000040020000E0000000404000-000000067F000040020000E0000000408000__000000931B9A2710\n000000067F000040020000E0000000408000-000000067F000040020000E000000040C000__000000574B7FF240\n000000067F000040020000E0000000408000-000000067F000040020000E000000040C000__00000073AD3FE6B8\n000000067F000040020000E0000000408000-000000067F000040020000E000000040C000__000000914E3F38F0\n000000067F000040020000E0000000408000-000000067F000040020000E000000040C000__000000931B9A2710\n000000067F000040020000E000000040A288-000000067F000040020000E0000000412C77__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E000000040C000-000000067F000040020000E0000000410000__000000574B7FF240\n000000067F000040020000E000000040C000-000000067F000040020000E0000000410000__00000073AD3FE6B8\n000000067F000040020000E000000040C000-000000067F000040020000E0000000410000__000000914E3F38F0\n000000067F000040020000E000000040C000-000000067F000040020000E0000000410000__000000931B9A2710\n000000067F000040020000E0000000410000-000000067F000040020000E0000000414000__000000574B7FF240\n000000067F000040020000E0000000410000-000000067F000040020000E0000000414000__00000073AD3FE6B8\n000000067F000040020000E0000000410000-000000067F000040020000E0000000414000__000000914E3F38F0\n000000067F000040020000E0000000410000-000000067F000040020000E0000000414000__000000931B9A2710\n000000067F000040020000E0000000412C77-000000067F000040020000E000000041B646__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000414000-000000067F000040020000E0000000418000__000000574B7FF240\n000000067F000040020000E0000000414000-000000067F000040020000E0000000418000__00000073AD3FE6B8\n000000067F000040020000E0000000414000-000000067F000040020000E0000000418000__000000914E3F38F0\n000000067F000040020000E0000000414000-000000067F000040020000E0000000418000__000000931B9A2710\n000000067F000040020000E0000000418000-000000067F000040020000E000000041C000__000000574B7FF240\n000000067F000040020000E0000000418000-000000067F000040020000E000000041C000__00000073AD3FE6B8\n000000067F000040020000E0000000418000-000000067F000040020000E000000041C000__000000914E3F38F0\n000000067F000040020000E0000000418000-000000067F000040020000E000000041C000__000000931B9A2710\n000000067F000040020000E000000041B646-000000067F000040020000E000000042400E__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E000000041C000-000000067F000040020000E0000000420000__000000574B7FF240\n000000067F000040020000E000000041C000-000000067F000040020000E0000000420000__00000073AD3FE6B8\n000000067F000040020000E000000041C000-000000067F000040020000E0000000420000__000000914E3F38F0\n000000067F000040020000E000000041C000-000000067F000040020000E0000000420000__000000931B9A2710\n000000067F000040020000E0000000420000-000000067F000040020000E0000000424000__000000574B7FF240\n000000067F000040020000E0000000420000-000000067F000040020000E0000000424000__00000073AD3FE6B8\n000000067F000040020000E0000000420000-000000067F000040020000E0000000424000__000000914E3F38F0\n000000067F000040020000E0000000420000-000000067F000040020000E0000000424000__000000931B9A2710\n000000067F000040020000E0000000424000-000000067F000040020000E0000000428000__000000574B7FF240\n000000067F000040020000E0000000424000-000000067F000040020000E0000000428000__00000073AD3FE6B8\n000000067F000040020000E0000000424000-000000067F000040020000E0000000428000__000000914E3F38F0\n000000067F000040020000E0000000424000-000000067F000040020000E0000000428000__000000931B9A2710\n000000067F000040020000E000000042400E-000000067F000040020000E000000042C9CC__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000428000-000000067F000040020000E000000042C000__000000574B7FF240\n000000067F000040020000E0000000428000-000000067F000040020000E000000042C000__00000073AD3FE6B8\n000000067F000040020000E0000000428000-000000067F000040020000E000000042C000__000000914E3F38F0\n000000067F000040020000E0000000428000-000000067F000040020000E000000042C000__000000931B9A2710\n000000067F000040020000E000000042C000-000000067F000040020000E0000000430000__000000574B7FF240\n000000067F000040020000E000000042C000-000000067F000040020000E0000000430000__00000073AD3FE6B8\n000000067F000040020000E000000042C000-000000067F000040020000E0000000430000__000000914E3F38F0\n000000067F000040020000E000000042C000-000000067F000040020000E0000000430000__000000931B9A2710\n000000067F000040020000E000000042C9CC-000000067F000040020000E00000004353A5__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000430000-000000067F000040020000E0000000434000__000000574B7FF240\n000000067F000040020000E0000000430000-000000067F000040020000E0000000434000__00000073AD3FE6B8\n000000067F000040020000E0000000430000-000000067F000040020000E0000000434000__000000914E3F38F0\n000000067F000040020000E0000000430000-000000067F000040020000E0000000434000__000000931B9A2710\n000000067F000040020000E0000000434000-000000067F000040020000E0000000438000__000000574B7FF240\n000000067F000040020000E0000000434000-000000067F000040020000E0000000438000__00000073AD3FE6B8\n000000067F000040020000E0000000434000-000000067F000040020000E0000000438000__000000914E3F38F0\n000000067F000040020000E0000000434000-000000067F000040020000E0000000438000__000000931B9A2710\n000000067F000040020000E00000004353A5-000000067F000040020000E000000043DD9A__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000438000-000000067F000040020000E000000043C000__000000574B7FF240\n000000067F000040020000E0000000438000-000000067F000040020000E000000043C000__00000073AD3FE6B8\n000000067F000040020000E0000000438000-000000067F000040020000E000000043C000__000000914E3F38F0\n000000067F000040020000E0000000438000-000000067F000040020000E000000043C000__000000931B9A2710\n000000067F000040020000E000000043C000-000000067F000040020000E0000000440000__000000574B7FF240\n000000067F000040020000E000000043C000-000000067F000040020000E0000000440000__00000073AD3FE6B8\n000000067F000040020000E000000043C000-000000067F000040020000E0000000440000__000000914E3F38F0\n000000067F000040020000E000000043C000-000000067F000040020000E0000000440000__000000931B9A2710\n000000067F000040020000E000000043DD9A-000000067F000040020000E0000000446792__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000440000-000000067F000040020000E0000000444000__000000574B7FF240\n000000067F000040020000E0000000440000-000000067F000040020000E0000000444000__00000073AD3FE6B8\n000000067F000040020000E0000000440000-000000067F000040020000E0000000444000__000000914E3F38F0\n000000067F000040020000E0000000440000-000000067F000040020000E0000000444000__000000931B9A2710\n000000067F000040020000E0000000444000-000000067F000040020000E0000000448000__000000574B7FF240\n000000067F000040020000E0000000444000-000000067F000040020000E0000000448000__00000073AD3FE6B8\n000000067F000040020000E0000000444000-000000067F000040020000E0000000448000__000000914E3F38F0\n000000067F000040020000E0000000444000-000000067F000040020000E0000000448000__000000931B9A2710\n000000067F000040020000E0000000446792-000000067F000040020000E000000044F178__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000448000-000000067F000040020000E000000044C000__000000574B7FF240\n000000067F000040020000E0000000448000-000000067F000040020000E000000044C000__00000073AD3FE6B8\n000000067F000040020000E0000000448000-000000067F000040020000E000000044C000__000000914E3F38F0\n000000067F000040020000E0000000448000-000000067F000040020000E000000044C000__000000931B9A2710\n000000067F000040020000E000000044C000-000000067F000040020000E0000000450000__000000574B7FF240\n000000067F000040020000E000000044C000-000000067F000040020000E0000000450000__00000073AD3FE6B8\n000000067F000040020000E000000044C000-000000067F000040020000E0000000450000__000000914E3F38F0\n000000067F000040020000E000000044C000-000000067F000040020000E0000000450000__000000931B9A2710\n000000067F000040020000E000000044F178-000000067F000040020000E0000000457B4D__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000450000-000000067F000040020000E0000000454000__000000574B7FF240\n000000067F000040020000E0000000450000-000000067F000040020000E0000000454000__00000073AD3FE6B8\n000000067F000040020000E0000000450000-000000067F000040020000E0000000454000__000000914E3F38F0\n000000067F000040020000E0000000450000-000000067F000040020000E0000000454000__000000931B9A2710\n000000067F000040020000E0000000454000-000000067F000040020000E0000000458000__000000574B7FF240\n000000067F000040020000E0000000454000-000000067F000040020000E0000000458000__00000073AD3FE6B8\n000000067F000040020000E0000000454000-000000067F000040020000E0000000458000__000000914E3F38F0\n000000067F000040020000E0000000454000-000000067F000040020000E0000000458000__000000931B9A2710\n000000067F000040020000E0000000457B4D-000000067F000040020000E0000000460512__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000458000-000000067F000040020000E000000045C000__000000574B7FF240\n000000067F000040020000E0000000458000-000000067F000040020000E000000045C000__00000073AD3FE6B8\n000000067F000040020000E0000000458000-000000067F000040020000E000000045C000__000000914E3F38F0\n000000067F000040020000E0000000458000-000000067F000040020000E000000045C000__000000931B9A2710\n000000067F000040020000E000000045C000-000000067F000040020000E0000000460000__000000574B7FF240\n000000067F000040020000E000000045C000-000000067F000040020000E0000000460000__00000073AD3FE6B8\n000000067F000040020000E000000045C000-000000067F000040020000E0000000460000__000000914E3F38F0\n000000067F000040020000E000000045C000-000000067F000040020000E0000000460000__000000931B9A2710\n000000067F000040020000E0000000460000-000000067F000040020000E0000000464000__000000574B7FF240\n000000067F000040020000E0000000460000-000000067F000040020000E0000000464000__00000073AD3FE6B8\n000000067F000040020000E0000000460000-000000067F000040020000E0000000464000__000000914E3F38F0\n000000067F000040020000E0000000460000-000000067F000040020000E0000000464000__000000931B9A2710\n000000067F000040020000E0000000460512-000000067F000040020000E0000000468ECC__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000464000-000000067F000040020000E0000000468000__000000574B7FF240\n000000067F000040020000E0000000464000-000000067F000040020000E0000000468000__00000073AD3FE6B8\n000000067F000040020000E0000000464000-000000067F000040020000E0000000468000__000000914E3F38F0\n000000067F000040020000E0000000464000-000000067F000040020000E0000000468000__000000931B9A2710\n000000067F000040020000E0000000468000-000000067F000040020000E000000046C000__000000574B7FF240\n000000067F000040020000E0000000468000-000000067F000040020000E000000046C000__00000073AD3FE6B8\n000000067F000040020000E0000000468000-000000067F000040020000E000000046C000__000000914E3F38F0\n000000067F000040020000E0000000468000-000000067F000040020000E000000046C000__000000931B9A2710\n000000067F000040020000E0000000468ECC-000000067F000040020000E00000004718AA__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E000000046C000-000000067F000040020000E0000000470000__000000574B7FF240\n000000067F000040020000E000000046C000-000000067F000040020000E0000000470000__00000073AD3FE6B8\n000000067F000040020000E000000046C000-000000067F000040020000E0000000470000__000000914E3F38F0\n000000067F000040020000E000000046C000-000000067F000040020000E0000000470000__000000931B9A2710\n000000067F000040020000E0000000470000-000000067F000040020000E0000000474000__000000574B7FF240\n000000067F000040020000E0000000470000-000000067F000040020000E0000000474000__00000073AD3FE6B8\n000000067F000040020000E0000000470000-000000067F000040020000E0000000474000__000000914E3F38F0\n000000067F000040020000E0000000470000-000000067F000040020000E0000000474000__000000931B9A2710\n000000067F000040020000E00000004718AA-000000067F000040020000E000000047A299__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000474000-000000067F000040020000E0000000478000__000000574B7FF240\n000000067F000040020000E0000000474000-000000067F000040020000E0000000478000__00000073AD3FE6B8\n000000067F000040020000E0000000474000-000000067F000040020000E0000000478000__000000914E3F38F0\n000000067F000040020000E0000000474000-000000067F000040020000E0000000478000__000000931B9A2710\n000000067F000040020000E0000000478000-000000067F000040020000E000000047C000__000000574B7FF240\n000000067F000040020000E0000000478000-000000067F000040020000E000000047C000__00000073AD3FE6B8\n000000067F000040020000E0000000478000-000000067F000040020000E000000047C000__000000914E3F38F0\n000000067F000040020000E0000000478000-000000067F000040020000E000000047C000__000000931B9A2710\n000000067F000040020000E000000047A299-000000067F000040020000E0000000482C8C__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E000000047C000-000000067F000040020000E0000000480000__000000574B7FF240\n000000067F000040020000E000000047C000-000000067F000040020000E0000000480000__00000073AD3FE6B8\n000000067F000040020000E000000047C000-000000067F000040020000E0000000480000__000000914E3F38F0\n000000067F000040020000E000000047C000-000000067F000040020000E0000000480000__000000931B9A2710\n000000067F000040020000E0000000480000-000000067F000040020000E0000000484000__000000574B7FF240\n000000067F000040020000E0000000480000-000000067F000040020000E0000000484000__00000073AD3FE6B8\n000000067F000040020000E0000000480000-000000067F000040020000E0000000484000__000000914E3F38F0\n000000067F000040020000E0000000480000-000000067F000040020000E0000000484000__000000931B9A2710\n000000067F000040020000E0000000482C8C-000000067F000040020000E000000048B675__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000484000-000000067F000040020000E0000000488000__000000574B7FF240\n000000067F000040020000E0000000484000-000000067F000040020000E0000000488000__00000073AD3FE6B8\n000000067F000040020000E0000000484000-000000067F000040020000E0000000488000__000000914E3F38F0\n000000067F000040020000E0000000484000-000000067F000040020000E0000000488000__000000931B9A2710\n000000067F000040020000E0000000488000-000000067F000040020000E000000048C000__000000574B7FF240\n000000067F000040020000E0000000488000-000000067F000040020000E000000048C000__00000073AD3FE6B8\n000000067F000040020000E0000000488000-000000067F000040020000E000000048C000__000000914E3F38F0\n000000067F000040020000E0000000488000-000000067F000040020000E000000048C000__000000931B9A2710\n000000067F000040020000E000000048B675-000000067F000040020000E0000000494053__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E000000048C000-000000067F000040020000E0000000490000__000000574B7FF240\n000000067F000040020000E000000048C000-000000067F000040020000E0000000490000__00000073AD3FE6B8\n000000067F000040020000E000000048C000-000000067F000040020000E0000000490000__000000914E3F38F0\n000000067F000040020000E000000048C000-000000067F000040020000E0000000490000__000000931B9A2710\n000000067F000040020000E0000000490000-000000067F000040020000E0000000494000__000000574B7FF240\n000000067F000040020000E0000000490000-000000067F000040020000E0000000494000__00000073AD3FE6B8\n000000067F000040020000E0000000490000-000000067F000040020000E0000000494000__000000914E3F38F0\n000000067F000040020000E0000000490000-000000067F000040020000E0000000494000__000000931B9A2710\n000000067F000040020000E0000000494000-000000067F000040020000E0000000498000__000000574B7FF240\n000000067F000040020000E0000000494000-000000067F000040020000E0000000498000__00000073AD3FE6B8\n000000067F000040020000E0000000494000-000000067F000040020000E0000000498000__000000914E3F38F0\n000000067F000040020000E0000000494000-000000067F000040020000E0000000498000__000000931B9A2710\n000000067F000040020000E0000000494053-000000067F000040020000E000000049CA16__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000498000-000000067F000040020000E000000049C000__000000574B7FF240\n000000067F000040020000E0000000498000-000000067F000040020000E000000049C000__00000073AD3FE6B8\n000000067F000040020000E0000000498000-000000067F000040020000E000000049C000__000000914E3F38F0\n000000067F000040020000E0000000498000-000000067F000040020000E000000049C000__000000931B9A2710\n000000067F000040020000E000000049C000-000000067F000040020000E00000004A0000__000000574B7FF240\n000000067F000040020000E000000049C000-000000067F000040020000E00000004A0000__00000073AD3FE6B8\n000000067F000040020000E000000049C000-000000067F000040020000E00000004A0000__000000914E3F38F0\n000000067F000040020000E000000049C000-000000067F000040020000E00000004A0000__000000931B9A2710\n000000067F000040020000E000000049CA16-000000067F000040020000E00000004A53D6__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E00000004A0000-000000067F000040020000E00000004A4000__000000574B7FF240\n000000067F000040020000E00000004A0000-000000067F000040020000E00000004A4000__00000073AD3FE6B8\n000000067F000040020000E00000004A0000-000000067F000040020000E00000004A4000__000000914E3F38F0\n000000067F000040020000E00000004A0000-000000067F000040020000E00000004A4000__000000931B9A2710\n000000067F000040020000E00000004A4000-000000067F000040020000E00000004A8000__000000574B7FF240\n000000067F000040020000E00000004A4000-000000067F000040020000E00000004A8000__00000073AD3FE6B8\n000000067F000040020000E00000004A4000-000000067F000040020000E00000004A8000__000000914E3F38F0\n000000067F000040020000E00000004A4000-000000067F000040020000E00000004A8000__000000931B9A2710\n000000067F000040020000E00000004A53D6-000000067F000040020000E00000004ADDB9__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E00000004A8000-000000067F000040020000E00000004AC000__000000574B7FF240\n000000067F000040020000E00000004A8000-000000067F000040020000E00000004AC000__00000073AD3FE6B8\n000000067F000040020000E00000004A8000-000000067F000040020000E00000004AC000__000000914E3F38F0\n000000067F000040020000E00000004A8000-000000067F000040020000E00000004AC000__000000931B9A2710\n000000067F000040020000E00000004AC000-000000067F000040020000E00000004B0000__000000574B7FF240\n000000067F000040020000E00000004AC000-000000067F000040020000E00000004B0000__00000073AD3FE6B8\n000000067F000040020000E00000004AC000-000000067F000040020000E00000004B0000__000000914E3F38F0\n000000067F000040020000E00000004AC000-000000067F000040020000E00000004B0000__000000931B9A2710\n000000067F000040020000E00000004ADDB9-000000067F000040020000E00000004B67B7__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E00000004B0000-000000067F000040020000E00000004B4000__000000574B7FF240\n000000067F000040020000E00000004B0000-000000067F000040020000E00000004B4000__00000073AD3FE6B8\n000000067F000040020000E00000004B0000-000000067F000040020000E00000004B4000__000000914E3F38F0\n000000067F000040020000E00000004B0000-000000067F000040020000E00000004B4000__000000931B9A2710\n000000067F000040020000E00000004B4000-000000067F000040020000E00000004B8000__000000574B7FF240\n000000067F000040020000E00000004B4000-000000067F000040020000E00000004B8000__00000073AD3FE6B8\n000000067F000040020000E00000004B4000-000000067F000040020000E00000004B8000__000000914E3F38F0\n000000067F000040020000E00000004B4000-000000067F000040020000E00000004B8000__000000931B9A2710\n000000067F000040020000E00000004B67B7-000000067F000040020000E00000004BF1AD__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E00000004B8000-000000067F000040020000E00000004BC000__000000574B7FF240\n000000067F000040020000E00000004B8000-000000067F000040020000E00000004BC000__00000073AD3FE6B8\n000000067F000040020000E00000004B8000-000000067F000040020000E00000004BC000__000000914E3F38F0\n000000067F000040020000E00000004B8000-000000067F000040020000E00000004BC000__000000931B9A2710\n000000067F000040020000E00000004BC000-000000067F000040020000E00000004C0000__000000574B7FF240\n000000067F000040020000E00000004BC000-000000067F000040020000E00000004C0000__00000073AD3FE6B8\n000000067F000040020000E00000004BC000-000000067F000040020000E00000004C0000__000000914E3F38F0\n000000067F000040020000E00000004BC000-000000067F000040020000E00000004C0000__000000931B9A2710\n000000067F000040020000E00000004BF1AD-000000067F000040020000E00000004C7B96__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E00000004C0000-000000067F000040020000E00000004C4000__000000574B7FF240\n000000067F000040020000E00000004C0000-000000067F000040020000E00000004C4000__00000073AD3FE6B8\n000000067F000040020000E00000004C0000-000000067F000040020000E00000004C4000__000000914E3F38F0\n000000067F000040020000E00000004C0000-000000067F000040020000E00000004C4000__000000931B9A2710\n000000067F000040020000E00000004C4000-000000067F000040020000E00000004C8000__000000574B7FF240\n000000067F000040020000E00000004C4000-000000067F000040020000E00000004C8000__00000073AD3FE6B8\n000000067F000040020000E00000004C4000-000000067F000040020000E00000004C8000__000000914E3F38F0\n000000067F000040020000E00000004C4000-000000067F000040020000E00000004C8000__000000931B9A2710\n000000067F000040020000E00000004C7B96-000000067F000040020000E00000004D0568__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E00000004C8000-000000067F000040020000E00000004CC000__000000574B7FF240\n000000067F000040020000E00000004C8000-000000067F000040020000E00000004CC000__00000073AD3FE6B8\n000000067F000040020000E00000004C8000-000000067F000040020000E00000004CC000__000000914E3F38F0\n000000067F000040020000E00000004C8000-000000067F000040020000E00000004CC000__000000931B9A2710\n000000067F000040020000E00000004CC000-000000067F000040020000E00000004D0000__000000574B7FF240\n000000067F000040020000E00000004CC000-000000067F000040020000E00000004D0000__00000073AD3FE6B8\n000000067F000040020000E00000004CC000-000000067F000040020000E00000004D0000__000000914E3F38F0\n000000067F000040020000E00000004CC000-000000067F000040020000E00000004D0000__000000931B9A2710\n000000067F000040020000E00000004D0000-000000067F000040020000E00000004D4000__000000574B7FF240\n000000067F000040020000E00000004D0000-000000067F000040020000E00000004D4000__00000073AD3FE6B8\n000000067F000040020000E00000004D0000-000000067F000040020000E00000004D4000__000000914E3F38F0\n000000067F000040020000E00000004D0000-000000067F000040020000E00000004D4000__000000931B9A2710\n000000067F000040020000E00000004D0568-000000067F000040020000E00000004D8F2E__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E00000004D4000-000000067F000040020000E00000004D8000__000000574B7FF240\n000000067F000040020000E00000004D4000-000000067F000040020000E00000004D8000__00000073AD3FE6B8\n000000067F000040020000E00000004D4000-000000067F000040020000E00000004D8000__000000914E3F38F0\n000000067F000040020000E00000004D4000-000000067F000040020000E00000004D8000__000000931B9A2710\n000000067F000040020000E00000004D8000-000000067F000040020000E00000004DC000__000000574B7FF240\n000000067F000040020000E00000004D8000-000000067F000040020000E00000004DC000__00000073AD3FE6B8\n000000067F000040020000E00000004D8000-000000067F000040020000E00000004DC000__000000914E3F38F0\n000000067F000040020000E00000004D8000-000000067F000040020000E00000004DC000__000000931B9A2710\n000000067F000040020000E00000004D8F2E-000000067F000040020000E00000004E18E6__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E00000004DC000-000000067F000040020000E00000004E0000__000000574B7FF240\n000000067F000040020000E00000004DC000-000000067F000040020000E00000004E0000__00000073AD3FE6B8\n000000067F000040020000E00000004DC000-000000067F000040020000E00000004E0000__000000914E3F38F0\n000000067F000040020000E00000004DC000-000000067F000040020000E00000004E0000__000000931B9A2710\n000000067F000040020000E00000004E0000-000000067F000040020000E00000004E4000__000000574B7FF240\n000000067F000040020000E00000004E0000-000000067F000040020000E00000004E4000__00000073AD3FE6B8\n000000067F000040020000E00000004E0000-000000067F000040020000E00000004E4000__000000914E3F38F0\n000000067F000040020000E00000004E0000-000000067F000040020000E00000004E4000__000000931B9A2710\n000000067F000040020000E00000004E18E6-000000067F000040020000E00000004EA2D3__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E00000004E4000-000000067F000040020000E00000004E8000__000000574B7FF240\n000000067F000040020000E00000004E4000-000000067F000040020000E00000004E8000__00000073AD3FE6B8\n000000067F000040020000E00000004E4000-000000067F000040020000E00000004E8000__000000914E3F38F0\n000000067F000040020000E00000004E4000-000000067F000040020000E00000004E8000__000000931B9A2710\n000000067F000040020000E00000004E8000-000000067F000040020000E00000004EC000__000000574B7FF240\n000000067F000040020000E00000004E8000-000000067F000040020000E00000004EC000__00000073AD3FE6B8\n000000067F000040020000E00000004E8000-000000067F000040020000E00000004EC000__000000914E3F38F0\n000000067F000040020000E00000004E8000-000000067F000040020000E00000004EC000__000000931B9A2710\n000000067F000040020000E00000004EA2D3-000000067F000040020000E00000004F2CC7__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E00000004EC000-000000067F000040020000E00000004F0000__000000574B7FF240\n000000067F000040020000E00000004EC000-000000067F000040020000E00000004F0000__00000073AD3FE6B8\n000000067F000040020000E00000004EC000-000000067F000040020000E00000004F0000__000000914E3F38F0\n000000067F000040020000E00000004EC000-000000067F000040020000E00000004F0000__000000931B9A2710\n000000067F000040020000E00000004F0000-000000067F000040020000E00000004F4000__000000574B7FF240\n000000067F000040020000E00000004F0000-000000067F000040020000E00000004F4000__00000073AD3FE6B8\n000000067F000040020000E00000004F0000-000000067F000040020000E00000004F4000__000000914E3F38F0\n000000067F000040020000E00000004F0000-000000067F000040020000E00000004F4000__000000931B9A2710\n000000067F000040020000E00000004F2CC7-000000067F000040020000E00000004FB6B8__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E00000004F4000-000000067F000040020000E00000004F8000__000000574B7FF240\n000000067F000040020000E00000004F4000-000000067F000040020000E00000004F8000__00000073AD3FE6B8\n000000067F000040020000E00000004F4000-000000067F000040020000E00000004F8000__000000914E3F38F0\n000000067F000040020000E00000004F4000-000000067F000040020000E00000004F8000__000000931B9A2710\n000000067F000040020000E00000004F8000-000000067F000040020000E00000004FC000__000000574B7FF240\n000000067F000040020000E00000004F8000-000000067F000040020000E00000004FC000__00000073AD3FE6B8\n000000067F000040020000E00000004F8000-000000067F000040020000E00000004FC000__000000914E3F38F0\n000000067F000040020000E00000004F8000-000000067F000040020000E00000004FC000__000000931B9A2710\n000000067F000040020000E00000004FB6B8-000000067F000040020000E00000005040A3__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E00000004FC000-000000067F000040020000E0000000500000__000000574B7FF240\n000000067F000040020000E00000004FC000-000000067F000040020000E0000000500000__00000073AD3FE6B8\n000000067F000040020000E00000004FC000-000000067F000040020000E0000000500000__000000914E3F38F0\n000000067F000040020000E00000004FC000-000000067F000040020000E0000000500000__000000931B9A2710\n000000067F000040020000E0000000500000-000000067F000040020000E0000000504000__000000574B7FF240\n000000067F000040020000E0000000500000-000000067F000040020000E0000000504000__00000073AD3FE6B8\n000000067F000040020000E0000000500000-000000067F000040020000E0000000504000__000000914E3F38F0\n000000067F000040020000E0000000500000-000000067F000040020000E0000000504000__000000931B9A2710\n000000067F000040020000E0000000504000-000000067F000040020000E0000000508000__000000574B7FF240\n000000067F000040020000E0000000504000-000000067F000040020000E0000000508000__00000073AD3FE6B8\n000000067F000040020000E0000000504000-000000067F000040020000E0000000508000__000000914E3F38F0\n000000067F000040020000E0000000504000-000000067F000040020000E0000000508000__000000931B9A2710\n000000067F000040020000E00000005040A3-000000067F000040020000E000000050CA7A__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000508000-000000067F000040020000E000000050C000__000000574B7FF240\n000000067F000040020000E0000000508000-000000067F000040020000E000000050C000__00000073AD3FE6B8\n000000067F000040020000E0000000508000-000000067F000040020000E000000050C000__000000914E3F38F0\n000000067F000040020000E0000000508000-000000067F000040020000E000000050C000__000000931B9A2710\n000000067F000040020000E000000050C000-000000067F000040020000E0000000510000__000000574B7FF240\n000000067F000040020000E000000050C000-000000067F000040020000E0000000510000__00000073AD3FE6B8\n000000067F000040020000E000000050C000-000000067F000040020000E0000000510000__000000914E3F38F0\n000000067F000040020000E000000050C000-000000067F000040020000E0000000510000__000000931B9A2710\n000000067F000040020000E000000050CA7A-000000067F000040020000E0000000515448__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000510000-000000067F000040020000E0000000514000__000000574B7FF240\n000000067F000040020000E0000000510000-000000067F000040020000E0000000514000__00000073AD3FE6B8\n000000067F000040020000E0000000510000-000000067F000040020000E0000000514000__000000914E3F38F0\n000000067F000040020000E0000000510000-000000067F000040020000E0000000514000__000000931B9A2710\n000000067F000040020000E0000000514000-000000067F000040020000E0000000518000__000000574B7FF240\n000000067F000040020000E0000000514000-000000067F000040020000E0000000518000__00000073AD3FE6B8\n000000067F000040020000E0000000514000-000000067F000040020000E0000000518000__000000914E3F38F0\n000000067F000040020000E0000000514000-000000067F000040020000E0000000518000__000000931B9A2710\n000000067F000040020000E0000000515448-000000067F000040020000E000000051DE01__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000518000-000000067F000040020000E000000051C000__000000574B7FF240\n000000067F000040020000E0000000518000-000000067F000040020000E000000051C000__00000073AD3FE6B8\n000000067F000040020000E0000000518000-000000067F000040020000E000000051C000__000000914E3F38F0\n000000067F000040020000E0000000518000-000000067F000040020000E000000051C000__000000931B9A2710\n000000067F000040020000E000000051C000-000000067F000040020000E0000000520000__000000574B7FF240\n000000067F000040020000E000000051C000-000000067F000040020000E0000000520000__00000073AD3FE6B8\n000000067F000040020000E000000051C000-000000067F000040020000E0000000520000__000000914E3F38F0\n000000067F000040020000E000000051C000-000000067F000040020000E0000000520000__000000931B9A2710\n000000067F000040020000E000000051DE01-000000067F000040020000E00000005267E4__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000520000-000000067F000040020000E0000000524000__000000574B7FF240\n000000067F000040020000E0000000520000-000000067F000040020000E0000000524000__00000073AD3FE6B8\n000000067F000040020000E0000000520000-000000067F000040020000E0000000524000__000000914E3F38F0\n000000067F000040020000E0000000520000-000000067F000040020000E0000000524000__000000931B9A2710\n000000067F000040020000E0000000524000-000000067F000040020000E0000000528000__000000574B7FF240\n000000067F000040020000E0000000524000-000000067F000040020000E0000000528000__00000073AD3FE6B8\n000000067F000040020000E0000000524000-000000067F000040020000E0000000528000__000000914E3F38F0\n000000067F000040020000E0000000524000-000000067F000040020000E0000000528000__000000931B9A2710\n000000067F000040020000E00000005267E4-000000067F000040020000E000000052F1DD__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000528000-000000067F000040020000E000000052C000__000000574B7FF240\n000000067F000040020000E0000000528000-000000067F000040020000E000000052C000__00000073AD3FE6B8\n000000067F000040020000E0000000528000-000000067F000040020000E000000052C000__000000914E3F38F0\n000000067F000040020000E0000000528000-000000067F000040020000E000000052C000__000000931B9A2710\n000000067F000040020000E000000052C000-000000067F000040020000E0000000530000__000000574B7FF240\n000000067F000040020000E000000052C000-000000067F000040020000E0000000530000__00000073AD3FE6B8\n000000067F000040020000E000000052C000-000000067F000040020000E0000000530000__000000914E3F38F0\n000000067F000040020000E000000052C000-000000067F000040020000E0000000530000__000000931B9A2710\n000000067F000040020000E000000052F1DD-000000067F000040020000E0000000537BD3__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000530000-000000067F000040020000E0000000534000__000000574B7FF240\n000000067F000040020000E0000000530000-000000067F000040020000E0000000534000__00000073AD3FE6B8\n000000067F000040020000E0000000530000-000000067F000040020000E0000000534000__000000914E3F38F0\n000000067F000040020000E0000000530000-000000067F000040020000E0000000534000__000000931B9A2710\n000000067F000040020000E0000000534000-000000067F000040020000E0000000538000__000000574B7FF240\n000000067F000040020000E0000000534000-000000067F000040020000E0000000538000__00000073AD3FE6B8\n000000067F000040020000E0000000534000-000000067F000040020000E0000000538000__000000914E3F38F0\n000000067F000040020000E0000000534000-000000067F000040020000E0000000538000__000000931B9A2710\n000000067F000040020000E0000000537BD3-000000067F000040020000E00000005405B7__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000538000-000000067F000040020000E000000053C000__000000574B7FF240\n000000067F000040020000E0000000538000-000000067F000040020000E000000053C000__00000073AD3FE6B8\n000000067F000040020000E0000000538000-000000067F000040020000E000000053C000__000000914E3F38F0\n000000067F000040020000E0000000538000-000000067F000040020000E000000053C000__000000931B9A2710\n000000067F000040020000E000000053C000-000000067F000040020000E0000000540000__000000574B7FF240\n000000067F000040020000E000000053C000-000000067F000040020000E0000000540000__00000073AD3FE6B8\n000000067F000040020000E000000053C000-000000067F000040020000E0000000540000__000000914E3F38F0\n000000067F000040020000E000000053C000-000000067F000040020000E0000000540000__000000931B9A2710\n000000067F000040020000E0000000540000-000000067F000040020000E0000000544000__000000574B7FF240\n000000067F000040020000E0000000540000-000000067F000040020000E0000000544000__00000073AD3FE6B8\n000000067F000040020000E0000000540000-000000067F000040020000E0000000544000__000000914E3F38F0\n000000067F000040020000E0000000540000-000000067F000040020000E0000000544000__000000931B9A2710\n000000067F000040020000E00000005405B7-000000067F000040020000E0000000548F92__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000544000-000000067F000040020000E0000000548000__000000574B7FF240\n000000067F000040020000E0000000544000-000000067F000040020000E0000000548000__00000073AD3FE6B8\n000000067F000040020000E0000000544000-000000067F000040020000E0000000548000__000000914E3F38F0\n000000067F000040020000E0000000544000-000000067F000040020000E0000000548000__000000931B9A2710\n000000067F000040020000E0000000548000-000000067F000040020000E000000054C000__000000574B7FF240\n000000067F000040020000E0000000548000-000000067F000040020000E000000054C000__00000073AD3FE6B8\n000000067F000040020000E0000000548000-000000067F000040020000E000000054C000__000000914E3F38F0\n000000067F000040020000E0000000548000-000000067F000040020000E000000054C000__000000931B9A2710\n000000067F000040020000E0000000548F92-000000067F000040020000E000000055195C__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E000000054C000-000000067F000040020000E0000000550000__000000574B7FF240\n000000067F000040020000E000000054C000-000000067F000040020000E0000000550000__00000073AD3FE6B8\n000000067F000040020000E000000054C000-000000067F000040020000E0000000550000__000000914E3F38F0\n000000067F000040020000E000000054C000-000000067F000040020000E0000000550000__000000931B9A2710\n000000067F000040020000E0000000550000-000000067F000040020000E0000000554000__000000574B7FF240\n000000067F000040020000E0000000550000-000000067F000040020000E0000000554000__00000073AD3FE6B8\n000000067F000040020000E0000000550000-000000067F000040020000E0000000554000__000000914E3F38F0\n000000067F000040020000E0000000550000-000000067F000040020000E0000000554000__000000931B9A2710\n000000067F000040020000E000000055195C-000000067F000040020000E000000055A319__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000554000-000000067F000040020000E0000000558000__000000574B7FF240\n000000067F000040020000E0000000554000-000000067F000040020000E0000000558000__00000073AD3FE6B8\n000000067F000040020000E0000000554000-000000067F000040020000E0000000558000__000000914E3F38F0\n000000067F000040020000E0000000554000-000000067F000040020000E0000000558000__000000931B9A2710\n000000067F000040020000E0000000558000-000000067F000040020000E000000055C000__000000574B7FF240\n000000067F000040020000E0000000558000-000000067F000040020000E000000055C000__00000073AD3FE6B8\n000000067F000040020000E0000000558000-000000067F000040020000E000000055C000__000000914E3F38F0\n000000067F000040020000E0000000558000-000000067F000040020000E000000055C000__000000931B9A2710\n000000067F000040020000E000000055A319-000000067F000040020000E0000000562D04__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E000000055C000-000000067F000040020000E0000000560000__000000574B7FF240\n000000067F000040020000E000000055C000-000000067F000040020000E0000000560000__00000073AD3FE6B8\n000000067F000040020000E000000055C000-000000067F000040020000E0000000560000__000000914E3F38F0\n000000067F000040020000E000000055C000-000000067F000040020000E0000000560000__000000931B9A2710\n000000067F000040020000E0000000560000-000000067F000040020000E0000000564000__000000574B7FF240\n000000067F000040020000E0000000560000-000000067F000040020000E0000000564000__00000073AD3FE6B8\n000000067F000040020000E0000000560000-000000067F000040020000E0000000564000__000000914E3F38F0\n000000067F000040020000E0000000560000-000000067F000040020000E0000000564000__000000931B9A2710\n000000067F000040020000E0000000562D04-000000067F000040020000E000000056B6E9__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000564000-000000067F000040020000E0000000568000__000000574B7FF240\n000000067F000040020000E0000000564000-000000067F000040020000E0000000568000__00000073AD3FE6B8\n000000067F000040020000E0000000564000-000000067F000040020000E0000000568000__000000914E3F38F0\n000000067F000040020000E0000000564000-000000067F000040020000E0000000568000__000000931B9A2710\n000000067F000040020000E0000000568000-000000067F000040020000E000000056C000__000000574B7FF240\n000000067F000040020000E0000000568000-000000067F000040020000E000000056C000__00000073AD3FE6B8\n000000067F000040020000E0000000568000-000000067F000040020000E000000056C000__000000914E3F38F0\n000000067F000040020000E0000000568000-000000067F000040020000E000000056C000__000000931B9A2710\n000000067F000040020000E000000056B6E9-000000067F000040020000E00000005740DF__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E000000056C000-000000067F000040020000E0000000570000__000000574B7FF240\n000000067F000040020000E000000056C000-000000067F000040020000E0000000570000__00000073AD3FE6B8\n000000067F000040020000E000000056C000-000000067F000040020000E0000000570000__000000914E3F38F0\n000000067F000040020000E000000056C000-000000067F000040020000E0000000570000__000000931B9A2710\n000000067F000040020000E0000000570000-000000067F000040020000E0000000574000__000000574B7FF240\n000000067F000040020000E0000000570000-000000067F000040020000E0000000574000__00000073AD3FE6B8\n000000067F000040020000E0000000570000-000000067F000040020000E0000000574000__000000914E3F38F0\n000000067F000040020000E0000000570000-000000067F000040020000E0000000574000__000000931B9A2710\n000000067F000040020000E0000000574000-000000067F000040020000E0000000578000__000000574B7FF240\n000000067F000040020000E0000000574000-000000067F000040020000E0000000578000__00000073AD3FE6B8\n000000067F000040020000E0000000574000-000000067F000040020000E0000000578000__000000914E3F38F0\n000000067F000040020000E0000000574000-000000067F000040020000E0000000578000__000000931B9A2710\n000000067F000040020000E00000005740DF-000000067F000040020000E000000057CAB9__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000578000-000000067F000040020000E000000057C000__000000574B7FF240\n000000067F000040020000E0000000578000-000000067F000040020000E000000057C000__00000073AD3FE6B8\n000000067F000040020000E0000000578000-000000067F000040020000E000000057C000__000000914E3F38F0\n000000067F000040020000E0000000578000-000000067F000040020000E000000057C000__000000931B9A2710\n000000067F000040020000E000000057C000-000000067F000040020000E0000000580000__000000574B7FF240\n000000067F000040020000E000000057C000-000000067F000040020000E0000000580000__00000073AD3FE6B8\n000000067F000040020000E000000057C000-000000067F000040020000E0000000580000__000000914E3F38F0\n000000067F000040020000E000000057C000-000000067F000040020000E0000000580000__000000931B9A2710\n000000067F000040020000E000000057CAB9-000000067F000040020000E0000000585495__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000580000-000000067F000040020000E0000000584000__000000574B7FF240\n000000067F000040020000E0000000580000-000000067F000040020000E0000000584000__00000073AD3FE6B8\n000000067F000040020000E0000000580000-000000067F000040020000E0000000584000__000000914E3F38F0\n000000067F000040020000E0000000580000-000000067F000040020000E0000000584000__000000931B9A2710\n000000067F000040020000E0000000584000-000000067F000040020000E0000000588000__000000574B7FF240\n000000067F000040020000E0000000584000-000000067F000040020000E0000000588000__00000073AD3FE6B8\n000000067F000040020000E0000000584000-000000067F000040020000E0000000588000__000000914E3F38F0\n000000067F000040020000E0000000584000-000000067F000040020000E0000000588000__000000931B9A2710\n000000067F000040020000E0000000585495-000000067F000040020000E000000058DE64__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000588000-000000067F000040020000E000000058C000__000000574B7FF240\n000000067F000040020000E0000000588000-000000067F000040020000E000000058C000__00000073AD3FE6B8\n000000067F000040020000E0000000588000-000000067F000040020000E000000058C000__000000914E3F38F0\n000000067F000040020000E0000000588000-000000067F000040020000E000000058C000__000000931B9A2710\n000000067F000040020000E000000058C000-000000067F000040020000E0000000590000__000000574B7FF240\n000000067F000040020000E000000058C000-000000067F000040020000E0000000590000__00000073AD3FE6B8\n000000067F000040020000E000000058C000-000000067F000040020000E0000000590000__000000914E3F38F0\n000000067F000040020000E000000058C000-000000067F000040020000E0000000590000__000000931B9A2710\n000000067F000040020000E000000058DE64-000000067F000040020000E000000059682F__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000590000-000000067F000040020000E0000000594000__000000574B7FF240\n000000067F000040020000E0000000590000-000000067F000040020000E0000000594000__00000073AD3FE6B8\n000000067F000040020000E0000000590000-000000067F000040020000E0000000594000__000000914E3F38F0\n000000067F000040020000E0000000590000-000000067F000040020000E0000000594000__000000931B9A2710\n000000067F000040020000E0000000594000-000000067F000040020000E0000000598000__000000574B7FF240\n000000067F000040020000E0000000594000-000000067F000040020000E0000000598000__00000073AD3FE6B8\n000000067F000040020000E0000000594000-000000067F000040020000E0000000598000__000000914E3F38F0\n000000067F000040020000E0000000594000-000000067F000040020000E0000000598000__000000931B9A2710\n000000067F000040020000E000000059682F-000000067F000040020000E000000059F20F__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000598000-000000067F000040020000E000000059C000__000000574B7FF240\n000000067F000040020000E0000000598000-000000067F000040020000E000000059C000__00000073AD3FE6B8\n000000067F000040020000E0000000598000-000000067F000040020000E000000059C000__000000914E3F38F0\n000000067F000040020000E0000000598000-000000067F000040020000E000000059C000__000000931B9A2710\n000000067F000040020000E000000059C000-000000067F000040020000E00000005A0000__000000574B7FF240\n000000067F000040020000E000000059C000-000000067F000040020000E00000005A0000__00000073AD3FE6B8\n000000067F000040020000E000000059C000-000000067F000040020000E00000005A0000__000000914E3F38F0\n000000067F000040020000E000000059C000-000000067F000040020000E00000005A0000__000000931B9A2710\n000000067F000040020000E000000059F20F-000000067F000040020000E00000005A7BFC__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E00000005A0000-000000067F000040020000E00000005A4000__000000574B7FF240\n000000067F000040020000E00000005A0000-000000067F000040020000E00000005A4000__00000073AD3FE6B8\n000000067F000040020000E00000005A0000-000000067F000040020000E00000005A4000__000000914E3F38F0\n000000067F000040020000E00000005A0000-000000067F000040020000E00000005A4000__000000931B9A2710\n000000067F000040020000E00000005A4000-000000067F000040020000E00000005A8000__000000574B7FF240\n000000067F000040020000E00000005A4000-000000067F000040020000E00000005A8000__00000073AD3FE6B8\n000000067F000040020000E00000005A4000-000000067F000040020000E00000005A8000__000000914E3F38F0\n000000067F000040020000E00000005A4000-000000067F000040020000E00000005A8000__000000931B9A2710\n000000067F000040020000E00000005A7BFC-000000067F000040020000E00000005B05EF__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E00000005A8000-000000067F000040020000E00000005AC000__000000574B7FF240\n000000067F000040020000E00000005A8000-000000067F000040020000E00000005AC000__00000073AD3FE6B8\n000000067F000040020000E00000005A8000-000000067F000040020000E00000005AC000__000000914E3F38F0\n000000067F000040020000E00000005A8000-000000067F000040020000E00000005AC000__000000931B9A2710\n000000067F000040020000E00000005AC000-000000067F000040020000E00000005B0000__000000574B7FF240\n000000067F000040020000E00000005AC000-000000067F000040020000E00000005B0000__00000073AD3FE6B8\n000000067F000040020000E00000005AC000-000000067F000040020000E00000005B0000__000000914E3F38F0\n000000067F000040020000E00000005AC000-000000067F000040020000E00000005B0000__000000931B9A2710\n000000067F000040020000E00000005B0000-000000067F000040020000E00000005B4000__000000574B7FF240\n000000067F000040020000E00000005B0000-000000067F000040020000E00000005B4000__00000073AD3FE6B8\n000000067F000040020000E00000005B0000-000000067F000040020000E00000005B4000__000000914E3F38F0\n000000067F000040020000E00000005B0000-000000067F000040020000E00000005B4000__000000931B9A2710\n000000067F000040020000E00000005B05EF-000000067F000040020000E00000005B8FCE__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E00000005B4000-000000067F000040020000E00000005B8000__000000574B7FF240\n000000067F000040020000E00000005B4000-000000067F000040020000E00000005B8000__00000073AD3FE6B8\n000000067F000040020000E00000005B4000-000000067F000040020000E00000005B8000__000000914E3F38F0\n000000067F000040020000E00000005B4000-000000067F000040020000E00000005B8000__000000931B9A2710\n000000067F000040020000E00000005B8000-000000067F000040020000E00000005BC000__000000574B7FF240\n000000067F000040020000E00000005B8000-000000067F000040020000E00000005BC000__00000073AD3FE6B8\n000000067F000040020000E00000005B8000-000000067F000040020000E00000005BC000__000000914E3F38F0\n000000067F000040020000E00000005B8000-000000067F000040020000E00000005BC000__000000931B9A2710\n000000067F000040020000E00000005B8FCE-000000067F000040020000E00000005C19AA__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E00000005BC000-000000067F000040020000E00000005C0000__000000574B7FF240\n000000067F000040020000E00000005BC000-000000067F000040020000E00000005C0000__00000073AD3FE6B8\n000000067F000040020000E00000005BC000-000000067F000040020000E00000005C0000__000000914E3F38F0\n000000067F000040020000E00000005BC000-000000067F000040020000E00000005C0000__000000931B9A2710\n000000067F000040020000E00000005C0000-000000067F000040020000E00000005C4000__000000574B7FF240\n000000067F000040020000E00000005C0000-000000067F000040020000E00000005C4000__00000073AD3FE6B8\n000000067F000040020000E00000005C0000-000000067F000040020000E00000005C4000__000000914E3F38F0\n000000067F000040020000E00000005C0000-000000067F000040020000E00000005C4000__000000931B9A2710\n000000067F000040020000E00000005C19AA-000000067F000040020000E00000005CA378__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E00000005C4000-000000067F000040020000E00000005C8000__000000574B7FF240\n000000067F000040020000E00000005C4000-000000067F000040020000E00000005C8000__00000073AD3FE6B8\n000000067F000040020000E00000005C4000-000000067F000040020000E00000005C8000__000000914E3F38F0\n000000067F000040020000E00000005C4000-000000067F000040020000E00000005C8000__000000931B9A2710\n000000067F000040020000E00000005C8000-000000067F000040020000E00000005CC000__000000574B7FF240\n000000067F000040020000E00000005C8000-000000067F000040020000E00000005CC000__00000073AD3FE6B8\n000000067F000040020000E00000005C8000-000000067F000040020000E00000005CC000__000000914E3F38F0\n000000067F000040020000E00000005C8000-000000067F000040020000E00000005CC000__000000931B9A2710\n000000067F000040020000E00000005CA378-000000067F000040020000E00000005D2D45__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E00000005CC000-000000067F000040020000E00000005D0000__000000574B7FF240\n000000067F000040020000E00000005CC000-000000067F000040020000E00000005D0000__00000073AD3FE6B8\n000000067F000040020000E00000005CC000-000000067F000040020000E00000005D0000__000000914E3F38F0\n000000067F000040020000E00000005CC000-000000067F000040020000E00000005D0000__000000931B9A2710\n000000067F000040020000E00000005D0000-000000067F000040020000E00000005D4000__000000574B7FF240\n000000067F000040020000E00000005D0000-000000067F000040020000E00000005D4000__00000073AD3FE6B8\n000000067F000040020000E00000005D0000-000000067F000040020000E00000005D4000__000000914E3F38F0\n000000067F000040020000E00000005D0000-000000067F000040020000E00000005D4000__000000931B9A2710\n000000067F000040020000E00000005D2D45-000000067F000040020000E00000005DB728__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E00000005D4000-000000067F000040020000E00000005D8000__000000574B7FF240\n000000067F000040020000E00000005D4000-000000067F000040020000E00000005D8000__00000073AD3FE6B8\n000000067F000040020000E00000005D4000-000000067F000040020000E00000005D8000__000000914E3F38F0\n000000067F000040020000E00000005D4000-000000067F000040020000E00000005D8000__000000931B9A2710\n000000067F000040020000E00000005D8000-000000067F000040020000E00000005DC000__000000574B7FF240\n000000067F000040020000E00000005D8000-000000067F000040020000E00000005DC000__00000073AD3FE6B8\n000000067F000040020000E00000005D8000-000000067F000040020000E00000005DC000__000000914E3F38F0\n000000067F000040020000E00000005D8000-000000067F000040020000E00000005DC000__000000931B9A2710\n000000067F000040020000E00000005DB728-000000067F000040020000E00000005E4114__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E00000005DC000-000000067F000040020000E00000005E0000__000000574B7FF240\n000000067F000040020000E00000005DC000-000000067F000040020000E00000005E0000__00000073AD3FE6B8\n000000067F000040020000E00000005DC000-000000067F000040020000E00000005E0000__000000914E3F38F0\n000000067F000040020000E00000005DC000-000000067F000040020000E00000005E0000__000000931B9A2710\n000000067F000040020000E00000005E0000-000000067F000040020000E00000005E4000__000000574B7FF240\n000000067F000040020000E00000005E0000-000000067F000040020000E00000005E4000__00000073AD3FE6B8\n000000067F000040020000E00000005E0000-000000067F000040020000E00000005E4000__000000914E3F38F0\n000000067F000040020000E00000005E0000-000000067F000040020000E00000005E4000__000000931B9A2710\n000000067F000040020000E00000005E4000-000000067F000040020000E00000005E8000__000000574B7FF240\n000000067F000040020000E00000005E4000-000000067F000040020000E00000005E8000__00000073AD3FE6B8\n000000067F000040020000E00000005E4000-000000067F000040020000E00000005E8000__000000914E3F38F0\n000000067F000040020000E00000005E4000-000000067F000040020000E00000005E8000__000000931B9A2710\n000000067F000040020000E00000005E4114-000000067F000040020000E00000005ECAF0__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E00000005E8000-000000067F000040020000E00000005EC000__000000574B7FF240\n000000067F000040020000E00000005E8000-000000067F000040020000E00000005EC000__00000073AD3FE6B8\n000000067F000040020000E00000005E8000-000000067F000040020000E00000005EC000__000000914E3F38F0\n000000067F000040020000E00000005E8000-000000067F000040020000E00000005EC000__000000931B9A2710\n000000067F000040020000E00000005EC000-000000067F000040020000E00000005F0000__000000574B7FF240\n000000067F000040020000E00000005EC000-000000067F000040020000E00000005F0000__00000073AD3FE6B8\n000000067F000040020000E00000005EC000-000000067F000040020000E00000005F0000__000000914E3F38F0\n000000067F000040020000E00000005EC000-000000067F000040020000E00000005F0000__000000931B9A2710\n000000067F000040020000E00000005ECAF0-000000067F000040020000E00000005F54D3__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E00000005F0000-000000067F000040020000E00000005F4000__000000574B7FF240\n000000067F000040020000E00000005F0000-000000067F000040020000E00000005F4000__00000073AD3FE6B8\n000000067F000040020000E00000005F0000-000000067F000040020000E00000005F4000__000000914E3F38F0\n000000067F000040020000E00000005F0000-000000067F000040020000E00000005F4000__000000931B9A2710\n000000067F000040020000E00000005F4000-000000067F000040020000E00000005F8000__000000574B7FF240\n000000067F000040020000E00000005F4000-000000067F000040020000E00000005F8000__00000073AD3FE6B8\n000000067F000040020000E00000005F4000-000000067F000040020000E00000005F8000__000000914E3F38F0\n000000067F000040020000E00000005F4000-000000067F000040020000E00000005F8000__000000931B9A2710\n000000067F000040020000E00000005F54D3-000000067F000040020000E00000005FDEAC__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E00000005F8000-000000067F000040020000E00000005FC000__000000574B7FF240\n000000067F000040020000E00000005F8000-000000067F000040020000E00000005FC000__00000073AD3FE6B8\n000000067F000040020000E00000005F8000-000000067F000040020000E00000005FC000__000000914E3F38F0\n000000067F000040020000E00000005F8000-000000067F000040020000E00000005FC000__000000931B9A2710\n000000067F000040020000E00000005FC000-000000067F000040020000E0000000600000__000000574B7FF240\n000000067F000040020000E00000005FC000-000000067F000040020000E0000000600000__00000073AD3FE6B8\n000000067F000040020000E00000005FC000-000000067F000040020000E0000000600000__000000914E3F38F0\n000000067F000040020000E00000005FC000-000000067F000040020000E0000000600000__000000931B9A2710\n000000067F000040020000E00000005FDEAC-000000067F000040020000E000000060687C__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000600000-000000067F000040020000E0000000604000__000000574B7FF240\n000000067F000040020000E0000000600000-000000067F000040020000E0000000604000__00000073AD3FE6B8\n000000067F000040020000E0000000600000-000000067F000040020000E0000000604000__000000914E3F38F0\n000000067F000040020000E0000000600000-000000067F000040020000E0000000604000__000000931B9A2710\n000000067F000040020000E0000000604000-000000067F000040020000E0000000608000__000000574B7FF240\n000000067F000040020000E0000000604000-000000067F000040020000E0000000608000__00000073AD3FE6B8\n000000067F000040020000E0000000604000-000000067F000040020000E0000000608000__000000914E3F38F0\n000000067F000040020000E0000000604000-000000067F000040020000E0000000608000__000000931B9A2710\n000000067F000040020000E000000060687C-000000067F000040020000E000000060F25A__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000608000-000000067F000040020000E000000060C000__000000574B7FF240\n000000067F000040020000E0000000608000-000000067F000040020000E000000060C000__00000073AD3FE6B8\n000000067F000040020000E0000000608000-000000067F000040020000E000000060C000__000000914E3F38F0\n000000067F000040020000E0000000608000-000000067F000040020000E000000060C000__000000931B9A2710\n000000067F000040020000E000000060C000-000000067F000040020000E0000000610000__000000574B7FF240\n000000067F000040020000E000000060C000-000000067F000040020000E0000000610000__00000073AD3FE6B8\n000000067F000040020000E000000060C000-000000067F000040020000E0000000610000__000000914E3F38F0\n000000067F000040020000E000000060C000-000000067F000040020000E0000000610000__000000931B9A2710\n000000067F000040020000E000000060F25A-000000067F000040020000E0000000617C3B__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000610000-000000067F000040020000E0000000614000__000000574B7FF240\n000000067F000040020000E0000000610000-000000067F000040020000E0000000614000__00000073AD3FE6B8\n000000067F000040020000E0000000610000-000000067F000040020000E0000000614000__000000914E3F38F0\n000000067F000040020000E0000000610000-000000067F000040020000E0000000614000__000000931B9A2710\n000000067F000040020000E0000000614000-000000067F000040020000E0000000618000__000000574B7FF240\n000000067F000040020000E0000000614000-000000067F000040020000E0000000618000__00000073AD3FE6B8\n000000067F000040020000E0000000614000-000000067F000040020000E0000000618000__000000914E3F38F0\n000000067F000040020000E0000000614000-000000067F000040020000E0000000618000__000000931B9A2710\n000000067F000040020000E0000000617C3B-000000067F000040020000E0000000620625__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000618000-000000067F000040020000E000000061C000__000000574B7FF240\n000000067F000040020000E0000000618000-000000067F000040020000E000000061C000__00000073AD3FE6B8\n000000067F000040020000E0000000618000-000000067F000040020000E000000061C000__000000914E3F38F0\n000000067F000040020000E0000000618000-000000067F000040020000E000000061C000__000000931B9A2710\n000000067F000040020000E000000061C000-000000067F000040020000E0000000620000__000000574B7FF240\n000000067F000040020000E000000061C000-000000067F000040020000E0000000620000__00000073AD3FE6B8\n000000067F000040020000E000000061C000-000000067F000040020000E0000000620000__000000914E3F38F0\n000000067F000040020000E000000061C000-000000067F000040020000E0000000620000__000000931B9A2710\n000000067F000040020000E0000000620000-000000067F000040020000E0000000624000__000000574B7FF240\n000000067F000040020000E0000000620000-000000067F000040020000E0000000624000__00000073AD3FE6B8\n000000067F000040020000E0000000620000-000000067F000040020000E0000000624000__000000914E3F38F0\n000000067F000040020000E0000000620000-000000067F000040020000E0000000624000__000000931B9A2710\n000000067F000040020000E0000000620625-000000067F000040020000E0000000628FFC__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000624000-000000067F000040020000E0000000628000__000000574B7FF240\n000000067F000040020000E0000000624000-000000067F000040020000E0000000628000__00000073AD3FE6B8\n000000067F000040020000E0000000624000-000000067F000040020000E0000000628000__000000914E3F38F0\n000000067F000040020000E0000000624000-000000067F000040020000E0000000628000__000000931B9A2710\n000000067F000040020000E0000000628000-000000067F000040020000E000000062C000__000000574B7FF240\n000000067F000040020000E0000000628000-000000067F000040020000E000000062C000__00000073AD3FE6B8\n000000067F000040020000E0000000628000-000000067F000040020000E000000062C000__000000914E3F38F0\n000000067F000040020000E0000000628000-000000067F000040020000E000000062C000__000000931B9A2710\n000000067F000040020000E0000000628FFC-000000067F000040020000E00000006319E0__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E000000062C000-000000067F000040020000E0000000630000__000000574B7FF240\n000000067F000040020000E000000062C000-000000067F000040020000E0000000630000__00000073AD3FE6B8\n000000067F000040020000E000000062C000-000000067F000040020000E0000000630000__000000914E3F38F0\n000000067F000040020000E000000062C000-000000067F000040020000E0000000630000__000000931B9A2710\n000000067F000040020000E0000000630000-000000067F000040020000E0000000634000__000000574B7FF240\n000000067F000040020000E0000000630000-000000067F000040020000E0000000634000__00000073AD3FE6B8\n000000067F000040020000E0000000630000-000000067F000040020000E0000000634000__000000914E3F38F0\n000000067F000040020000E0000000630000-000000067F000040020000E0000000634000__000000931B9A2710\n000000067F000040020000E00000006319E0-000000067F000040020000E000000063A3B8__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000634000-000000067F000040020000E0000000638000__000000574B7FF240\n000000067F000040020000E0000000634000-000000067F000040020000E0000000638000__00000073AD3FE6B8\n000000067F000040020000E0000000634000-000000067F000040020000E0000000638000__000000914E3F38F0\n000000067F000040020000E0000000634000-000000067F000040020000E0000000638000__000000931B9A2710\n000000067F000040020000E0000000638000-000000067F000040020000E000000063C000__000000574B7FF240\n000000067F000040020000E0000000638000-000000067F000040020000E000000063C000__00000073AD3FE6B8\n000000067F000040020000E0000000638000-000000067F000040020000E000000063C000__000000914E3F38F0\n000000067F000040020000E0000000638000-000000067F000040020000E000000063C000__000000931B9A2710\n000000067F000040020000E000000063A3B8-000000067F000040020000E0000000642D80__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E000000063C000-000000067F000040020000E0000000640000__000000574B7FF240\n000000067F000040020000E000000063C000-000000067F000040020000E0000000640000__00000073AD3FE6B8\n000000067F000040020000E000000063C000-000000067F000040020000E0000000640000__000000914E3F38F0\n000000067F000040020000E000000063C000-000000067F000040020000E0000000640000__000000931B9A2710\n000000067F000040020000E0000000640000-000000067F000040020000E0000000644000__000000574B7FF240\n000000067F000040020000E0000000640000-000000067F000040020000E0000000644000__00000073AD3FE6B8\n000000067F000040020000E0000000640000-000000067F000040020000E0000000644000__000000914E3F38F0\n000000067F000040020000E0000000640000-000000067F000040020000E0000000644000__000000931B9A2710\n000000067F000040020000E0000000642D80-000000067F000040020000E000000064B762__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000644000-000000067F000040020000E0000000648000__000000574B7FF240\n000000067F000040020000E0000000644000-000000067F000040020000E0000000648000__00000073AD3FE6B8\n000000067F000040020000E0000000644000-000000067F000040020000E0000000648000__000000914E3F38F0\n000000067F000040020000E0000000644000-000000067F000040020000E0000000648000__000000931B9A2710\n000000067F000040020000E0000000648000-000000067F000040020000E000000064C000__000000574B7FF240\n000000067F000040020000E0000000648000-000000067F000040020000E000000064C000__00000073AD3FE6B8\n000000067F000040020000E0000000648000-000000067F000040020000E000000064C000__000000914E3F38F0\n000000067F000040020000E0000000648000-000000067F000040020000E000000064C000__000000931B9A2710\n000000067F000040020000E000000064B762-000000067F000040020000E000000065415B__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E000000064C000-000000067F000040020000E0000000650000__000000574B7FF240\n000000067F000040020000E000000064C000-000000067F000040020000E0000000650000__00000073AD3FE6B8\n000000067F000040020000E000000064C000-000000067F000040020000E0000000650000__000000914E3F38F0\n000000067F000040020000E000000064C000-000000067F000040020000E0000000650000__000000931B9A2710\n000000067F000040020000E0000000650000-000000067F000040020000E0000000654000__000000574B7FF240\n000000067F000040020000E0000000650000-000000067F000040020000E0000000654000__00000073AD3FE6B8\n000000067F000040020000E0000000650000-000000067F000040020000E0000000654000__000000914E3F38F0\n000000067F000040020000E0000000650000-000000067F000040020000E0000000654000__000000931B9A2710\n000000067F000040020000E0000000654000-000000067F000040020000E0000000658000__000000574B7FF240\n000000067F000040020000E0000000654000-000000067F000040020000E0000000658000__00000073AD3FE6B8\n000000067F000040020000E0000000654000-000000067F000040020000E0000000658000__000000914E3F38F0\n000000067F000040020000E0000000654000-000000067F000040020000E0000000658000__000000931B9A2710\n000000067F000040020000E000000065415B-000000067F000040020000E000000065CB43__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000658000-000000067F000040020000E000000065C000__000000574B7FF240\n000000067F000040020000E0000000658000-000000067F000040020000E000000065C000__00000073AD3FE6B8\n000000067F000040020000E0000000658000-000000067F000040020000E000000065C000__000000914E3F38F0\n000000067F000040020000E0000000658000-000000067F000040020000E000000065C000__000000931B9A2710\n000000067F000040020000E000000065C000-000000067F000040020000E0000000660000__000000574B7FF240\n000000067F000040020000E000000065C000-000000067F000040020000E0000000660000__00000073AD3FE6B8\n000000067F000040020000E000000065C000-000000067F000040020000E0000000660000__000000914E3F38F0\n000000067F000040020000E000000065C000-000000067F000040020000E0000000660000__000000931B9A2710\n000000067F000040020000E000000065CB43-000000067F000040020000E0000000665527__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000660000-000000067F000040020000E0000000664000__000000574B7FF240\n000000067F000040020000E0000000660000-000000067F000040020000E0000000664000__00000073AD3FE6B8\n000000067F000040020000E0000000660000-000000067F000040020000E0000000664000__000000914E3F38F0\n000000067F000040020000E0000000660000-000000067F000040020000E0000000664000__000000931B9A2710\n000000067F000040020000E0000000664000-000000067F000040020000E0000000668000__000000574B7FF240\n000000067F000040020000E0000000664000-000000067F000040020000E0000000668000__00000073AD3FE6B8\n000000067F000040020000E0000000664000-000000067F000040020000E0000000668000__000000914E3F38F0\n000000067F000040020000E0000000664000-000000067F000040020000E0000000668000__000000931B9A2710\n000000067F000040020000E0000000665527-000000067F000040020000E000000066DEEE__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000668000-000000067F000040020000E000000066C000__000000574B7FF240\n000000067F000040020000E0000000668000-000000067F000040020000E000000066C000__00000073AD3FE6B8\n000000067F000040020000E0000000668000-000000067F000040020000E000000066C000__000000914E3F38F0\n000000067F000040020000E0000000668000-000000067F000040020000E000000066C000__000000931B9A2710\n000000067F000040020000E000000066C000-000000067F000040020000E0000000670000__000000574B7FF240\n000000067F000040020000E000000066C000-000000067F000040020000E0000000670000__00000073AD3FE6B8\n000000067F000040020000E000000066C000-000000067F000040020000E0000000670000__000000914E3F38F0\n000000067F000040020000E000000066C000-000000067F000040020000E0000000670000__000000931B9A2710\n000000067F000040020000E000000066DEEE-000000067F000040020000E00000006768C5__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000670000-000000067F000040020000E0000000674000__000000574B7FF240\n000000067F000040020000E0000000670000-000000067F000040020000E0000000674000__00000073AD3FE6B8\n000000067F000040020000E0000000670000-000000067F000040020000E0000000674000__000000914E3F38F0\n000000067F000040020000E0000000670000-000000067F000040020000E0000000674000__000000931B9A2710\n000000067F000040020000E0000000674000-000000067F000040020000E0000000678000__000000574B7FF240\n000000067F000040020000E0000000674000-000000067F000040020000E0000000678000__00000073AD3FE6B8\n000000067F000040020000E0000000674000-000000067F000040020000E0000000678000__000000914E3F38F0\n000000067F000040020000E0000000674000-000000067F000040020000E0000000678000__000000931B9A2710\n000000067F000040020000E00000006768C5-000000067F000040020000E000000067F286__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000678000-000000067F000040020000E000000067C000__000000574B7FF240\n000000067F000040020000E0000000678000-000000067F000040020000E000000067C000__00000073AD3FE6B8\n000000067F000040020000E0000000678000-000000067F000040020000E000000067C000__000000914E3F38F0\n000000067F000040020000E0000000678000-000000067F000040020000E000000067C000__000000931B9A2710\n000000067F000040020000E000000067C000-000000067F000040020000E0000000680000__000000574B7FF240\n000000067F000040020000E000000067C000-000000067F000040020000E0000000680000__00000073AD3FE6B8\n000000067F000040020000E000000067C000-000000067F000040020000E0000000680000__000000914E3F38F0\n000000067F000040020000E000000067C000-000000067F000040020000E0000000680000__000000931B9A2710\n000000067F000040020000E000000067F286-000000067F000040020000E0000000687C67__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000680000-000000067F000040020000E0000000684000__000000574B7FF240\n000000067F000040020000E0000000680000-000000067F000040020000E0000000684000__00000073AD3FE6B8\n000000067F000040020000E0000000680000-000000067F000040020000E0000000684000__000000914E3F38F0\n000000067F000040020000E0000000680000-000000067F000040020000E0000000684000__000000931B9A2710\n000000067F000040020000E0000000684000-000000067F000040020000E0000000688000__000000574B7FF240\n000000067F000040020000E0000000684000-000000067F000040020000E0000000688000__00000073AD3FE6B8\n000000067F000040020000E0000000684000-000000067F000040020000E0000000688000__000000914E3F38F0\n000000067F000040020000E0000000684000-000000067F000040020000E0000000688000__000000931B9A2710\n000000067F000040020000E0000000687C67-000000067F000040020000E0000000690653__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000688000-000000067F000040020000E000000068C000__000000574B7FF240\n000000067F000040020000E0000000688000-000000067F000040020000E000000068C000__00000073AD3FE6B8\n000000067F000040020000E0000000688000-000000067F000040020000E000000068C000__000000914E3F38F0\n000000067F000040020000E0000000688000-000000067F000040020000E000000068C000__000000931B9A2710\n000000067F000040020000E000000068C000-000000067F000040020000E0000000690000__000000574B7FF240\n000000067F000040020000E000000068C000-000000067F000040020000E0000000690000__00000073AD3FE6B8\n000000067F000040020000E000000068C000-000000067F000040020000E0000000690000__000000914E3F38F0\n000000067F000040020000E000000068C000-000000067F000040020000E0000000690000__000000931B9A2710\n000000067F000040020000E0000000690000-000000067F000040020000E0000000694000__000000574B7FF240\n000000067F000040020000E0000000690000-000000067F000040020000E0000000694000__00000073AD3FE6B8\n000000067F000040020000E0000000690000-000000067F000040020000E0000000694000__000000914E3F38F0\n000000067F000040020000E0000000690000-000000067F000040020000E0000000694000__000000931B9A2710\n000000067F000040020000E0000000690653-000000067F000040020000E0000000699034__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E0000000694000-000000067F000040020000E0000000698000__000000574B7FF240\n000000067F000040020000E0000000694000-000000067F000040020000E0000000698000__00000073AD3FE6B8\n000000067F000040020000E0000000694000-000000067F000040020000E0000000698000__000000914E3F38F0\n000000067F000040020000E0000000694000-000000067F000040020000E0000000698000__000000931B9A2710\n000000067F000040020000E0000000698000-000000067F000040020000E000000069C000__000000574B7FF240\n000000067F000040020000E0000000698000-000000067F000040020000E000000069C000__00000073AD3FE6B8\n000000067F000040020000E0000000698000-000000067F000040020000E000000069C000__000000914E3F38F0\n000000067F000040020000E0000000698000-000000067F000040020000E000000069C000__000000931B9A2710\n000000067F000040020000E0000000699034-000000067F000040020000E00000006A1A0D__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E000000069C000-000000067F000040020000E00000006A0000__000000574B7FF240\n000000067F000040020000E000000069C000-000000067F000040020000E00000006A0000__00000073AD3FE6B8\n000000067F000040020000E000000069C000-000000067F000040020000E00000006A0000__000000914E3F38F0\n000000067F000040020000E000000069C000-000000067F000040020000E00000006A0000__000000931B9A2710\n000000067F000040020000E00000006A0000-000000067F000040020000E00000006A4000__000000574B7FF240\n000000067F000040020000E00000006A0000-000000067F000040020000E00000006A4000__00000073AD3FE6B8\n000000067F000040020000E00000006A0000-000000067F000040020000E00000006A4000__000000914E3F38F0\n000000067F000040020000E00000006A0000-000000067F000040020000E00000006A4000__000000931B9A2710\n000000067F000040020000E00000006A1A0D-000000067F000040020000E00000006AA3D8__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E00000006A4000-000000067F000040020000E00000006A8000__000000574B7FF240\n000000067F000040020000E00000006A4000-000000067F000040020000E00000006A8000__00000073AD3FE6B8\n000000067F000040020000E00000006A4000-000000067F000040020000E00000006A8000__000000914E3F38F0\n000000067F000040020000E00000006A4000-000000067F000040020000E00000006A8000__000000931B9A2710\n000000067F000040020000E00000006A8000-000000067F000040020000E00000006AC000__000000574B7FF240\n000000067F000040020000E00000006A8000-000000067F000040020000E00000006AC000__00000073AD3FE6B8\n000000067F000040020000E00000006A8000-000000067F000040020000E00000006AC000__000000914E3F38F0\n000000067F000040020000E00000006A8000-000000067F000040020000E00000006AC000__000000931B9A2710\n000000067F000040020000E00000006AA3D8-000000067F000040020000E00000006B2DB1__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E00000006AC000-000000067F000040020000E00000006B0000__000000574B7FF240\n000000067F000040020000E00000006AC000-000000067F000040020000E00000006B0000__00000073AD3FE6B8\n000000067F000040020000E00000006AC000-000000067F000040020000E00000006B0000__000000914E3F38F0\n000000067F000040020000E00000006AC000-000000067F000040020000E00000006B0000__000000931B9A2710\n000000067F000040020000E00000006B0000-000000067F000040020000E00000006B4000__000000574B7FF240\n000000067F000040020000E00000006B0000-000000067F000040020000E00000006B4000__00000073AD3FE6B8\n000000067F000040020000E00000006B0000-000000067F000040020000E00000006B4000__000000914E3F38F0\n000000067F000040020000E00000006B0000-000000067F000040020000E00000006B4000__000000931B9A2710\n000000067F000040020000E00000006B2DB1-000000067F000040020000E00000006BB77C__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E00000006B4000-000000067F000040020000E00000006B8000__000000574B7FF240\n000000067F000040020000E00000006B4000-000000067F000040020000E00000006B8000__00000073AD3FE6B8\n000000067F000040020000E00000006B4000-000000067F000040020000E00000006B8000__000000914E3F38F0\n000000067F000040020000E00000006B4000-000000067F000040020000E00000006B8000__000000931B9A2710\n000000067F000040020000E00000006B8000-000000067F000040020000E00000006BC000__000000574B7FF240\n000000067F000040020000E00000006B8000-000000067F000040020000E00000006BC000__00000073AD3FE6B8\n000000067F000040020000E00000006B8000-000000067F000040020000E00000006BC000__000000914E3F38F0\n000000067F000040020000E00000006B8000-000000067F000040020000E00000006BC000__000000931B9A2710\n000000067F000040020000E00000006BB77C-000000067F000040020000E00000006C416F__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E00000006BC000-000000067F000040020000E00000006C0000__000000574B7FF240\n000000067F000040020000E00000006BC000-000000067F000040020000E00000006C0000__00000073AD3FE6B8\n000000067F000040020000E00000006BC000-000000067F000040020000E00000006C0000__000000914E3F38F0\n000000067F000040020000E00000006BC000-000000067F000040020000E00000006C0000__000000931B9A2710\n000000067F000040020000E00000006C0000-000000067F000040020000E00000006C4000__000000574B7FF240\n000000067F000040020000E00000006C0000-000000067F000040020000E00000006C4000__00000073AD3FE6B8\n000000067F000040020000E00000006C0000-000000067F000040020000E00000006C4000__000000914E3F38F0\n000000067F000040020000E00000006C0000-000000067F000040020000E00000006C4000__000000931B9A2710\n000000067F000040020000E00000006C4000-000000067F000040020000E00000006C8000__000000574B7FF240\n000000067F000040020000E00000006C4000-000000067F000040020000E00000006C8000__00000073AD3FE6B8\n000000067F000040020000E00000006C4000-000000067F000040020000E00000006C8000__000000914E3F38F0\n000000067F000040020000E00000006C4000-000000067F000040020000E00000006C8000__000000931B9A2710\n000000067F000040020000E00000006C416F-000000067F000040020000E00000006C76FF__0000003B6A0FFB09-00000047441DEA39\n000000067F000040020000E00000006C76FF-000000067F000040020000E00000006D00F3__00000047441DEA39-0000004803BDE029\n000000067F000040020000E00000006C8000-000000067F000040020000E00000006CC000__000000574B7FF240\n000000067F000040020000E00000006C8000-000000067F000040020000E00000006CC000__00000073AD3FE6B8\n000000067F000040020000E00000006C8000-000000067F000040020000E00000006CC000__000000914E3F38F0\n000000067F000040020000E00000006C8000-000000067F000040020000E00000006CC000__000000931B9A2710\n000000067F000040020000E00000006CC000-000000067F000040020000E00000006D0000__000000574B7FF240\n000000067F000040020000E00000006CC000-000000067F000040020000E00000006D0000__00000073AD3FE6B8\n000000067F000040020000E00000006CC000-000000067F000040020000E00000006D0000__000000914E3F38F0\n000000067F000040020000E00000006CC000-000000067F000040020000E00000006D0000__000000931B9A2710\n000000067F000040020000E00000006D0000-000000067F000040020000E00000006D4000__000000574B7FF240\n000000067F000040020000E00000006D0000-000000067F000040020000E00000006D4000__00000073AD3FE6B8\n000000067F000040020000E00000006D0000-000000067F000040020000E00000006D4000__000000914E3F38F0\n000000067F000040020000E00000006D0000-000000067F000040020000E00000006D4000__000000931B9A2710\n000000067F000040020000E00000006D00F3-000000067F000040020000E00000006D8AD9__00000047441DEA39-0000004803BDE029\n000000067F000040020000E00000006D4000-000000067F000040020000E00000006D8000__000000574B7FF240\n000000067F000040020000E00000006D4000-000000067F000040020000E00000006D8000__00000073AD3FE6B8\n000000067F000040020000E00000006D4000-000000067F000040020000E00000006D8000__000000914E3F38F0\n000000067F000040020000E00000006D4000-000000067F000040020000E00000006D8000__000000931B9A2710\n000000067F000040020000E00000006D8000-000000067F000040020000E00000006DC000__000000574B7FF240\n000000067F000040020000E00000006D8000-000000067F000040020000E00000006DC000__00000073AD3FE6B8\n000000067F000040020000E00000006D8000-000000067F000040020000E00000006DC000__000000914E3F38F0\n000000067F000040020000E00000006D8000-000000067F000040020000E00000006DC000__000000931B9A2710\n000000067F000040020000E00000006D8AD9-000000067F000040020000E00000006E14B0__00000047441DEA39-0000004803BDE029\n000000067F000040020000E00000006DC000-000000067F000040020000E00000006E0000__000000574B7FF240\n000000067F000040020000E00000006DC000-000000067F000040020000E00000006E0000__00000073AD3FE6B8\n000000067F000040020000E00000006DC000-000000067F000040020000E00000006E0000__000000914E3F38F0\n000000067F000040020000E00000006DC000-000000067F000040020000E00000006E0000__000000931B9A2710\n000000067F000040020000E00000006E0000-000000067F000040020000E00000006E4000__000000574B7FF240\n000000067F000040020000E00000006E0000-000000067F000040020000E00000006E4000__00000073AD3FE6B8\n000000067F000040020000E00000006E0000-000000067F000040020000E00000006E4000__000000914E3F38F0\n000000067F000040020000E00000006E0000-000000067F000040020000E00000006E4000__000000931B9A2710\n000000067F000040020000E00000006E14B0-000000067F000040020000E00000006E9E91__00000047441DEA39-0000004803BDE029\n000000067F000040020000E00000006E4000-000000067F000040020000E00000006E8000__000000574B7FF240\n000000067F000040020000E00000006E4000-000000067F000040020000E00000006E8000__00000073AD3FE6B8\n000000067F000040020000E00000006E4000-000000067F000040020000E00000006E8000__000000914E3F38F0\n000000067F000040020000E00000006E4000-000000067F000040020000E00000006E8000__000000931B9A2710\n000000067F000040020000E00000006E8000-000000067F000040020000E00000006EC000__000000574B7FF240\n000000067F000040020000E00000006E8000-000000067F000040020000E00000006EC000__00000073AD3FE6B8\n000000067F000040020000E00000006E8000-000000067F000040020000E00000006EC000__000000914E3F38F0\n000000067F000040020000E00000006E8000-000000067F000040020000E00000006EC000__000000931B9A2710\n000000067F000040020000E00000006E9E91-000000067F000040020000E00000006F2877__00000047441DEA39-0000004803BDE029\n000000067F000040020000E00000006EC000-000000067F000040020000E00000006F0000__000000574B7FF240\n000000067F000040020000E00000006EC000-000000067F000040020000E00000006F0000__00000073AD3FE6B8\n000000067F000040020000E00000006EC000-000000067F000040020000E00000006F0000__000000914E3F38F0\n000000067F000040020000E00000006EC000-000000067F000040020000E00000006F0000__000000931B9A2710\n000000067F000040020000E00000006F0000-000000067F000040020000E00000006F4000__000000574B7FF240\n000000067F000040020000E00000006F0000-000000067F000040020000E00000006F4000__00000073AD3FE6B8\n000000067F000040020000E00000006F0000-000000067F000040020000E00000006F4000__000000914E3F38F0\n000000067F000040020000E00000006F0000-000000067F000040020000E00000006F4000__000000931B9A2710\n000000067F000040020000E00000006F2877-000000067F000040020000E00000006FB252__00000047441DEA39-0000004803BDE029\n000000067F000040020000E00000006F4000-000000067F000040020000E00000006F8000__000000574B7FF240\n000000067F000040020000E00000006F4000-000000067F000040020000E00000006F8000__00000073AD3FE6B8\n000000067F000040020000E00000006F4000-000000067F000040020000E00000006F8000__000000914E3F38F0\n000000067F000040020000E00000006F4000-000000067F000040020000E00000006F8000__000000931B9A2710\n000000067F000040020000E00000006F8000-000000067F000040020000E00000006FC000__000000574B7FF240\n000000067F000040020000E00000006F8000-000000067F000040020000E00000006FC000__00000073AD3FE6B8\n000000067F000040020000E00000006F8000-000000067F000040020000E00000006FC000__000000914E3F38F0\n000000067F000040020000E00000006F8000-000000067F000040020000E00000006FC000__000000931B9A2710\n000000067F000040020000E00000006FB252-000000067F000040020000E0000000703C35__00000047441DEA39-0000004803BDE029\n000000067F000040020000E00000006FC000-000000067F000040020000E0000000700000__000000574B7FF240\n000000067F000040020000E00000006FC000-000000067F000040020000E0000000700000__00000073AD3FE6B8\n000000067F000040020000E00000006FC000-000000067F000040020000E0000000700000__000000914E3F38F0\n000000067F000040020000E00000006FC000-000000067F000040020000E0000000700000__000000931B9A2710\n000000067F000040020000E0000000700000-000000067F000040020000E0000000704000__000000574B7FF240\n000000067F000040020000E0000000700000-000000067F000040020000E0000000704000__00000073AD3FE6B8\n000000067F000040020000E0000000700000-000000067F000040020000E0000000704000__000000914E3F38F0\n000000067F000040020000E0000000700000-000000067F000040020000E0000000704000__000000931B9A2710\n000000067F000040020000E0000000703C35-000000067F000040020000E000000070C617__00000047441DEA39-0000004803BDE029\n000000067F000040020000E0000000704000-000000067F000040020000E0000000708000__000000574B7FF240\n000000067F000040020000E0000000704000-000000067F000040020000E0000000708000__00000073AD3FE6B8\n000000067F000040020000E0000000704000-000000067F000040020000E0000000708000__000000914E3F38F0\n000000067F000040020000E0000000704000-000000067F000040020000E0000000708000__000000931B9A2710\n000000067F000040020000E0000000708000-000000067F000040020000E000000070C000__000000574B7FF240\n000000067F000040020000E0000000708000-000000067F000040020000E000000070C000__00000073AD3FE6B8\n000000067F000040020000E0000000708000-000000067F000040020000E000000070C000__000000914E3F38F0\n000000067F000040020000E0000000708000-000000067F000040020000E000000070C000__000000931B9A2710\n000000067F000040020000E000000070C000-000000067F000040020000E0000000710000__000000574B7FF240\n000000067F000040020000E000000070C000-000000067F000040020000E0000000710000__00000073AD3FE6B8\n000000067F000040020000E000000070C000-000000067F000040020000E0000000710000__000000914E3F38F0\n000000067F000040020000E000000070C000-000000067F000040020000E0000000710000__000000931B9A2710\n000000067F000040020000E000000070C617-000000067F000040020000E0000000714FEF__00000047441DEA39-0000004803BDE029\n000000067F000040020000E0000000710000-000000067F000040020000E0000000714000__000000574B7FF240\n000000067F000040020000E0000000710000-000000067F000040020000E0000000714000__00000073AD3FE6B8\n000000067F000040020000E0000000710000-000000067F000040020000E0000000714000__000000914E3F38F0\n000000067F000040020000E0000000710000-000000067F000040020000E0000000714000__000000931B9A2710\n000000067F000040020000E0000000714000-000000067F000040020000E0000000718000__000000574B7FF240\n000000067F000040020000E0000000714000-000000067F000040020000E0000000718000__00000073AD3FE6B8\n000000067F000040020000E0000000714000-000000067F000040020000E0000000718000__000000914E3F38F0\n000000067F000040020000E0000000714000-000000067F000040020000E0000000718000__000000931B9A2710\n000000067F000040020000E0000000714FEF-000000067F000040020000E000000071D9D3__00000047441DEA39-0000004803BDE029\n000000067F000040020000E0000000718000-000000067F000040020000E000000071C000__000000574B7FF240\n000000067F000040020000E0000000718000-000000067F000040020000E000000071C000__00000073AD3FE6B8\n000000067F000040020000E0000000718000-000000067F000040020000E000000071C000__000000914E3F38F0\n000000067F000040020000E0000000718000-000000067F000040020000E000000071C000__000000931B9A2710\n000000067F000040020000E000000071C000-000000067F000040020000E0000000720000__000000574B7FF240\n000000067F000040020000E000000071C000-000000067F000040020000E0000000720000__00000073AD3FE6B8\n000000067F000040020000E000000071C000-000000067F000040020000E0000000720000__000000914E3F38F0\n000000067F000040020000E000000071C000-000000067F000040020000E0000000720000__000000931B9A2710\n000000067F000040020000E000000071D9D3-000000067F000040020000E00000007263A7__00000047441DEA39-0000004803BDE029\n000000067F000040020000E0000000720000-000000067F000040020000E0000000724000__000000574B7FF240\n000000067F000040020000E0000000720000-000000067F000040020000E0000000724000__00000073AD3FE6B8\n000000067F000040020000E0000000720000-000000067F000040020000E0000000724000__000000914E3F38F0\n000000067F000040020000E0000000720000-000000067F000040020000E0000000724000__000000931B9A2710\n000000067F000040020000E0000000724000-000000067F000040020000E0000000728000__000000574B7FF240\n000000067F000040020000E0000000724000-000000067F000040020000E0000000728000__00000073AD3FE6B8\n000000067F000040020000E0000000724000-000000067F000040020000E0000000728000__000000914E3F38F0\n000000067F000040020000E0000000724000-000000067F000040020000E0000000728000__000000931B9A2710\n000000067F000040020000E00000007263A7-000000067F000040020000E000000072ED72__00000047441DEA39-0000004803BDE029\n000000067F000040020000E0000000728000-000000067F000040020000E000000072C000__000000574B7FF240\n000000067F000040020000E0000000728000-000000067F000040020000E000000072C000__00000073AD3FE6B8\n000000067F000040020000E0000000728000-000000067F000040020000E000000072C000__000000914E3F38F0\n000000067F000040020000E0000000728000-000000067F000040020000E000000072C000__000000931B9A2710\n000000067F000040020000E000000072C000-000000067F000040020000E0000000730000__000000572A7A05D8\n000000067F000040020000E000000072C000-000000067F000040020000E0000000730000__0000005D2FFFFB38\n000000067F000040020000E000000072C000-000000067F000040020000E0000000730000__00000073AD3FE6B8\n000000067F000040020000E000000072C000-000000067F000040020000E0000000730000__000000914E3F38F0\n000000067F000040020000E000000072C000-000000067F000040020000E0000000730000__000000931B9A2710\n000000067F000040020000E000000072ED72-000000067F000040020000E0000100000000__00000047441DEA39-0000004803BDE029\n000000067F000040020000E000000072F0A9-000000067F000040020000E0000000737A87__0000004803BDE029-00000048B365CD91\n000000067F000040020000E0000000730000-000000067F000040020000E0000000734000__000000572A7A05D8\n000000067F000040020000E0000000730000-000000067F000040020000E0000000734000__0000005D2FFFFB38\n000000067F000040020000E0000000730000-000000067F000040020000E0000000734000__00000073AD3FE6B8\n000000067F000040020000E0000000730000-000000067F000040020000E0000000734000__000000914E3F38F0\n000000067F000040020000E0000000730000-000000067F000040020000E0000000734000__000000931B9A2710\n000000067F000040020000E0000000734000-000000067F000040020000E0000000738000__000000572A7A05D8\n000000067F000040020000E0000000734000-000000067F000040020000E0000000738000__0000005D2FFFFB38\n000000067F000040020000E0000000734000-000000067F000040020000E0000000738000__00000073AD3FE6B8\n000000067F000040020000E0000000734000-000000067F000040020000E0000000738000__000000914E3F38F0\n000000067F000040020000E0000000734000-000000067F000040020000E0000000738000__000000931B9A2710\n000000067F000040020000E0000000737A87-000000067F000040020000E000000074046F__0000004803BDE029-00000048B365CD91\n000000067F000040020000E0000000738000-000000067F000040020000E000000073C000__000000572A7A05D8\n000000067F000040020000E0000000738000-000000067F000040020000E000000073C000__0000005D2FFFFB38\n000000067F000040020000E0000000738000-000000067F000040020000E000000073C000__00000073AD3FE6B8\n000000067F000040020000E0000000738000-000000067F000040020000E000000073C000__000000914E3F38F0\n000000067F000040020000E0000000738000-000000067F000040020000E000000073C000__000000931B9A2710\n000000067F000040020000E000000073C000-000000067F000040020000E0000000740000__000000572A7A05D8\n000000067F000040020000E000000073C000-000000067F000040020000E0000000740000__0000005D2FFFFB38\n000000067F000040020000E000000073C000-000000067F000040020000E0000000740000__00000073AD3FE6B8\n000000067F000040020000E000000073C000-000000067F000040020000E0000000740000__000000914E3F38F0\n000000067F000040020000E000000073C000-000000067F000040020000E0000000740000__000000931B9A2710\n000000067F000040020000E0000000740000-000000067F000040020000E0000000744000__000000572A7A05D8\n000000067F000040020000E0000000740000-000000067F000040020000E0000000744000__0000005D2FFFFB38\n000000067F000040020000E0000000740000-000000067F000040020000E0000000744000__00000073AD3FE6B8\n000000067F000040020000E0000000740000-000000067F000040020000E0000000744000__000000914E3F38F0\n000000067F000040020000E0000000740000-000000067F000040020000E0000000744000__000000931B9A2710\n000000067F000040020000E000000074046F-000000067F000040020000E0000000748E4A__0000004803BDE029-00000048B365CD91\n000000067F000040020000E0000000744000-000000067F000040020000E0000000748000__000000574B7FF240\n000000067F000040020000E0000000744000-000000067F000040020000E0000000748000__00000073AD3FE6B8\n000000067F000040020000E0000000744000-000000067F000040020000E0000000748000__000000914E3F38F0\n000000067F000040020000E0000000744000-000000067F000040020000E0000000748000__000000931B9A2710\n000000067F000040020000E0000000744000-030000000000000000000000000000000002__000000482DBFED58\n000000067F000040020000E0000000748000-000000067F000040020000E000000074C000__000000574B7FF240\n000000067F000040020000E0000000748000-000000067F000040020000E000000074C000__00000073AD3FE6B8\n000000067F000040020000E0000000748000-000000067F000040020000E000000074C000__000000914E3F38F0\n000000067F000040020000E0000000748000-000000067F000040020000E000000074C000__000000931B9A2710\n000000067F000040020000E0000000748E4A-000000067F000040020000E0000000751827__0000004803BDE029-00000048B365CD91\n000000067F000040020000E000000074C000-000000067F000040020000E0000000750000__000000574B7FF240\n000000067F000040020000E000000074C000-000000067F000040020000E0000000750000__00000073AD3FE6B8\n000000067F000040020000E000000074C000-000000067F000040020000E0000000750000__000000914E3F38F0\n000000067F000040020000E000000074C000-000000067F000040020000E0000000750000__000000931B9A2710\n000000067F000040020000E0000000750000-000000067F000040020000E0000000754000__000000574B7FF240\n000000067F000040020000E0000000750000-000000067F000040020000E0000000754000__00000073AD3FE6B8\n000000067F000040020000E0000000750000-000000067F000040020000E0000000754000__000000914E3F38F0\n000000067F000040020000E0000000750000-000000067F000040020000E0000000754000__000000931B9A2710\n000000067F000040020000E0000000751827-000000067F000040020000E000000075A1F6__0000004803BDE029-00000048B365CD91\n000000067F000040020000E0000000754000-000000067F000040020000E0000000758000__000000574B7FF240\n000000067F000040020000E0000000754000-000000067F000040020000E0000000758000__00000073AD3FE6B8\n000000067F000040020000E0000000754000-000000067F000040020000E0000000758000__000000914E3F38F0\n000000067F000040020000E0000000754000-000000067F000040020000E0000000758000__000000931B9A2710\n000000067F000040020000E0000000758000-000000067F000040020000E000000075C000__000000574B7FF240\n000000067F000040020000E0000000758000-000000067F000040020000E000000075C000__00000073AD3FE6B8\n000000067F000040020000E0000000758000-000000067F000040020000E000000075C000__000000914E3F38F0\n000000067F000040020000E0000000758000-000000067F000040020000E000000075C000__000000931B9A2710\n000000067F000040020000E000000075A1F6-000000067F000040020000E0000000762BD3__0000004803BDE029-00000048B365CD91\n000000067F000040020000E000000075C000-000000067F000040020000E0000000760000__000000574B7FF240\n000000067F000040020000E000000075C000-000000067F000040020000E0000000760000__00000073AD3FE6B8\n000000067F000040020000E000000075C000-000000067F000040020000E0000000760000__000000914E3F38F0\n000000067F000040020000E000000075C000-000000067F000040020000E0000000760000__000000931B9A2710\n000000067F000040020000E0000000760000-000000067F000040020000E0000000764000__000000574B7FF240\n000000067F000040020000E0000000760000-000000067F000040020000E0000000764000__00000073AD3FE6B8\n000000067F000040020000E0000000760000-000000067F000040020000E0000000764000__000000914E3F38F0\n000000067F000040020000E0000000760000-000000067F000040020000E0000000764000__000000931B9A2710\n000000067F000040020000E0000000762BD3-000000067F000040020000E000000076B5AA__0000004803BDE029-00000048B365CD91\n000000067F000040020000E0000000764000-000000067F000040020000E0000000768000__000000574B7FF240\n000000067F000040020000E0000000764000-000000067F000040020000E0000000768000__00000073AD3FE6B8\n000000067F000040020000E0000000764000-000000067F000040020000E0000000768000__000000914E3F38F0\n000000067F000040020000E0000000764000-000000067F000040020000E0000000768000__000000931B9A2710\n000000067F000040020000E0000000768000-000000067F000040020000E000000076C000__000000574B7FF240\n000000067F000040020000E0000000768000-000000067F000040020000E000000076C000__00000073AD3FE6B8\n000000067F000040020000E0000000768000-000000067F000040020000E000000076C000__000000914E3F38F0\n000000067F000040020000E0000000768000-000000067F000040020000E000000076C000__000000931B9A2710\n000000067F000040020000E000000076B5AA-000000067F000040020000E0000000773F85__0000004803BDE029-00000048B365CD91\n000000067F000040020000E000000076C000-000000067F000040020000E0000000770000__000000574B7FF240\n000000067F000040020000E000000076C000-000000067F000040020000E0000000770000__00000073AD3FE6B8\n000000067F000040020000E000000076C000-000000067F000040020000E0000000770000__000000914E3F38F0\n000000067F000040020000E000000076C000-000000067F000040020000E0000000770000__000000931B9A2710\n000000067F000040020000E0000000770000-000000067F000040020000E0000000774000__000000574B7FF240\n000000067F000040020000E0000000770000-000000067F000040020000E0000000774000__00000073AD3FE6B8\n000000067F000040020000E0000000770000-000000067F000040020000E0000000774000__000000914E3F38F0\n000000067F000040020000E0000000770000-000000067F000040020000E0000000774000__000000931B9A2710\n000000067F000040020000E0000000773F85-000000067F000040020000E000000077C960__0000004803BDE029-00000048B365CD91\n000000067F000040020000E0000000774000-000000067F000040020000E0000000778000__000000574B7FF240\n000000067F000040020000E0000000774000-000000067F000040020000E0000000778000__00000073AD3FE6B8\n000000067F000040020000E0000000774000-000000067F000040020000E0000000778000__000000914E3F38F0\n000000067F000040020000E0000000774000-000000067F000040020000E0000000778000__000000931B9A2710\n000000067F000040020000E0000000778000-000000067F000040020000E000000077C000__000000574B7FF240\n000000067F000040020000E0000000778000-000000067F000040020000E000000077C000__00000073AD3FE6B8\n000000067F000040020000E0000000778000-000000067F000040020000E000000077C000__000000914E3F38F0\n000000067F000040020000E0000000778000-000000067F000040020000E000000077C000__000000931B9A2710\n000000067F000040020000E000000077C000-000000067F000040020000E0000000780000__000000574B7FF240\n000000067F000040020000E000000077C000-000000067F000040020000E0000000780000__00000073AD3FE6B8\n000000067F000040020000E000000077C000-000000067F000040020000E0000000780000__000000914E3F38F0\n000000067F000040020000E000000077C000-000000067F000040020000E0000000780000__000000931B9A2710\n000000067F000040020000E000000077C960-000000067F000040020000E0000000785337__0000004803BDE029-00000048B365CD91\n000000067F000040020000E0000000780000-000000067F000040020000E0000000784000__000000574B7FF240\n000000067F000040020000E0000000780000-000000067F000040020000E0000000784000__00000073AD3FE6B8\n000000067F000040020000E0000000780000-000000067F000040020000E0000000784000__000000914E3F38F0\n000000067F000040020000E0000000780000-000000067F000040020000E0000000784000__000000931B9A2710\n000000067F000040020000E0000000784000-000000067F000040020000E0000000788000__000000574B7FF240\n000000067F000040020000E0000000784000-000000067F000040020000E0000000788000__00000073AD3FE6B8\n000000067F000040020000E0000000784000-000000067F000040020000E0000000788000__000000914E3F38F0\n000000067F000040020000E0000000784000-000000067F000040020000E0000000788000__000000931B9A2710\n000000067F000040020000E0000000785337-000000067F000040020000E000000078DD09__0000004803BDE029-00000048B365CD91\n000000067F000040020000E0000000788000-000000067F000040020000E000000078C000__000000574B7FF240\n000000067F000040020000E0000000788000-000000067F000040020000E000000078C000__00000073AD3FE6B8\n000000067F000040020000E0000000788000-000000067F000040020000E000000078C000__000000914E3F38F0\n000000067F000040020000E0000000788000-000000067F000040020000E000000078C000__000000931B9A2710\n000000067F000040020000E000000078C000-000000067F000040020000E0000000790000__000000572A7A05D8\n000000067F000040020000E000000078C000-000000067F000040020000E0000000790000__0000005D2FFFFB38\n000000067F000040020000E000000078C000-000000067F000040020000E0000000790000__00000073AD3FE6B8\n000000067F000040020000E000000078C000-000000067F000040020000E0000000790000__000000914E3F38F0\n000000067F000040020000E000000078C000-000000067F000040020000E0000000790000__000000931B9A2710\n000000067F000040020000E000000078DD09-000000067F000040020000E0000100000000__0000004803BDE029-00000048B365CD91\n000000067F000040020000E000000078E02B-000000067F000040020000E0000000796A04__00000048B365CD91-000000495313EB21\n000000067F000040020000E0000000790000-000000067F000040020000E0000000794000__000000572A7A05D8\n000000067F000040020000E0000000790000-000000067F000040020000E0000000794000__0000005D2FFFFB38\n000000067F000040020000E0000000790000-000000067F000040020000E0000000794000__00000073AD3FE6B8\n000000067F000040020000E0000000790000-000000067F000040020000E0000000794000__000000914E3F38F0\n000000067F000040020000E0000000790000-000000067F000040020000E0000000794000__000000931B9A2710\n000000067F000040020000E0000000794000-000000067F000040020000E0000000798000__000000572A7A05D8\n000000067F000040020000E0000000794000-000000067F000040020000E0000000798000__0000005D2FFFFB38\n000000067F000040020000E0000000794000-000000067F000040020000E0000000798000__00000073AD3FE6B8\n000000067F000040020000E0000000794000-000000067F000040020000E0000000798000__000000914E3F38F0\n000000067F000040020000E0000000794000-000000067F000040020000E0000000798000__000000931B9A2710\n000000067F000040020000E0000000796A04-000000067F000040020000E000000079F3DB__00000048B365CD91-000000495313EB21\n000000067F000040020000E0000000798000-000000067F000040020000E000000079C000__000000572A7A05D8\n000000067F000040020000E0000000798000-000000067F000040020000E000000079C000__0000005D2FFFFB38\n000000067F000040020000E0000000798000-000000067F000040020000E000000079C000__00000073AD3FE6B8\n000000067F000040020000E0000000798000-000000067F000040020000E000000079C000__000000914E3F38F0\n000000067F000040020000E0000000798000-000000067F000040020000E000000079C000__000000931B9A2710\n000000067F000040020000E000000079C000-000000067F000040020000E00000007A0000__000000572A7A05D8\n000000067F000040020000E000000079C000-000000067F000040020000E00000007A0000__0000005D2FFFFB38\n000000067F000040020000E000000079C000-000000067F000040020000E00000007A0000__00000073AD3FE6B8\n000000067F000040020000E000000079C000-000000067F000040020000E00000007A0000__000000914E3F38F0\n000000067F000040020000E000000079C000-000000067F000040020000E00000007A0000__000000931B9A2710\n000000067F000040020000E000000079F3DB-000000067F000040020000E00000007A7DC0__00000048B365CD91-000000495313EB21\n000000067F000040020000E00000007A0000-000000067F000040020000E00000007A4000__000000572A7A05D8\n000000067F000040020000E00000007A0000-000000067F000040020000E00000007A4000__0000005D2FFFFB38\n000000067F000040020000E00000007A0000-000000067F000040020000E00000007A4000__00000073AD3FE6B8\n000000067F000040020000E00000007A0000-000000067F000040020000E00000007A4000__000000914E3F38F0\n000000067F000040020000E00000007A0000-000000067F000040020000E00000007A4000__000000931B9A2710\n000000067F000040020000E00000007A4000-000000067F000040020000E00000007A8000__000000572A7A05D8\n000000067F000040020000E00000007A4000-000000067F000040020000E00000007A8000__0000005D2FFFFB38\n000000067F000040020000E00000007A4000-000000067F000040020000E00000007A8000__00000073AD3FE6B8\n000000067F000040020000E00000007A4000-000000067F000040020000E00000007A8000__000000914E3F38F0\n000000067F000040020000E00000007A4000-000000067F000040020000E00000007A8000__000000931B9A2710\n000000067F000040020000E00000007A7DC0-000000067F000040020000E00000007B079C__00000048B365CD91-000000495313EB21\n000000067F000040020000E00000007A8000-000000067F000040020000E00000007AC000__000000572A7A05D8\n000000067F000040020000E00000007A8000-000000067F000040020000E00000007AC000__0000005D2FFFFB38\n000000067F000040020000E00000007A8000-000000067F000040020000E00000007AC000__00000073AD3FE6B8\n000000067F000040020000E00000007A8000-000000067F000040020000E00000007AC000__000000914E3F38F0\n000000067F000040020000E00000007A8000-000000067F000040020000E00000007AC000__000000931B9A2710\n000000067F000040020000E00000007AC000-000000067F000040020000E00000007B0000__000000572A7A05D8\n000000067F000040020000E00000007AC000-000000067F000040020000E00000007B0000__0000005D2FFFFB38\n000000067F000040020000E00000007AC000-000000067F000040020000E00000007B0000__00000073AD3FE6B8\n000000067F000040020000E00000007AC000-000000067F000040020000E00000007B0000__000000914E3F38F0\n000000067F000040020000E00000007AC000-000000067F000040020000E00000007B0000__000000931B9A2710\n000000067F000040020000E00000007B0000-000000067F000040020000E00000007B4000__000000572A7A05D8\n000000067F000040020000E00000007B0000-000000067F000040020000E00000007B4000__0000005D2FFFFB38\n000000067F000040020000E00000007B0000-000000067F000040020000E00000007B4000__00000073AD3FE6B8\n000000067F000040020000E00000007B0000-000000067F000040020000E00000007B4000__000000914E3F38F0\n000000067F000040020000E00000007B0000-000000067F000040020000E00000007B4000__000000931B9A2710\n000000067F000040020000E00000007B079C-000000067F000040020000E00000007B9183__00000048B365CD91-000000495313EB21\n000000067F000040020000E00000007B4000-000000067F000040020000E00000007B8000__000000572A7A05D8\n000000067F000040020000E00000007B4000-000000067F000040020000E00000007B8000__0000005D2FFFFB38\n000000067F000040020000E00000007B4000-000000067F000040020000E00000007B8000__00000073AD3FE6B8\n000000067F000040020000E00000007B4000-000000067F000040020000E00000007B8000__000000914E3F38F0\n000000067F000040020000E00000007B4000-000000067F000040020000E00000007B8000__000000931B9A2710\n000000067F000040020000E00000007B8000-000000067F000040020000E00000007BC000__000000572A7A05D8\n000000067F000040020000E00000007B8000-000000067F000040020000E00000007BC000__0000005D2FFFFB38\n000000067F000040020000E00000007B8000-000000067F000040020000E00000007BC000__00000073AD3FE6B8\n000000067F000040020000E00000007B8000-000000067F000040020000E00000007BC000__000000914E3F38F0\n000000067F000040020000E00000007B8000-000000067F000040020000E00000007BC000__000000931B9A2710\n000000067F000040020000E00000007B9183-000000067F000040020000E00000007C1B60__00000048B365CD91-000000495313EB21\n000000067F000040020000E00000007BC000-000000067F000040020000E00000007C0000__000000572A7A05D8\n000000067F000040020000E00000007BC000-000000067F000040020000E00000007C0000__0000005D2FFFFB38\n000000067F000040020000E00000007BC000-000000067F000040020000E00000007C0000__00000073AD3FE6B8\n000000067F000040020000E00000007BC000-000000067F000040020000E00000007C0000__000000914E3F38F0\n000000067F000040020000E00000007BC000-000000067F000040020000E00000007C0000__000000931B9A2710\n000000067F000040020000E00000007C0000-000000067F000040020000E00000007C4000__000000572A7A05D8\n000000067F000040020000E00000007C0000-000000067F000040020000E00000007C4000__0000005D2FFFFB38\n000000067F000040020000E00000007C0000-000000067F000040020000E00000007C4000__00000073AD3FE6B8\n000000067F000040020000E00000007C0000-000000067F000040020000E00000007C4000__000000914E3F38F0\n000000067F000040020000E00000007C0000-000000067F000040020000E00000007C4000__000000931B9A2710\n000000067F000040020000E00000007C1B60-000000067F000040020000E00000007CA53A__00000048B365CD91-000000495313EB21\n000000067F000040020000E00000007C4000-000000067F000040020000E00000007C8000__000000572A7A05D8\n000000067F000040020000E00000007C4000-000000067F000040020000E00000007C8000__0000005D2FFFFB38\n000000067F000040020000E00000007C4000-000000067F000040020000E00000007C8000__00000073AD3FE6B8\n000000067F000040020000E00000007C4000-000000067F000040020000E00000007C8000__000000914E3F38F0\n000000067F000040020000E00000007C4000-000000067F000040020000E00000007C8000__000000931B9A2710\n000000067F000040020000E00000007C8000-000000067F000040020000E00000007CC000__000000572A7A05D8\n000000067F000040020000E00000007C8000-000000067F000040020000E00000007CC000__0000005D2FFFFB38\n000000067F000040020000E00000007C8000-000000067F000040020000E00000007CC000__00000073AD3FE6B8\n000000067F000040020000E00000007C8000-000000067F000040020000E00000007CC000__000000914E3F38F0\n000000067F000040020000E00000007C8000-000000067F000040020000E00000007CC000__000000931B9A2710\n000000067F000040020000E00000007CA53A-000000067F000040020000E00000007D2F02__00000048B365CD91-000000495313EB21\n000000067F000040020000E00000007CC000-000000067F000040020000E00000007D0000__000000572A7A05D8\n000000067F000040020000E00000007CC000-000000067F000040020000E00000007D0000__0000005D2FFFFB38\n000000067F000040020000E00000007CC000-000000067F000040020000E00000007D0000__00000073AD3FE6B8\n000000067F000040020000E00000007CC000-000000067F000040020000E00000007D0000__000000914E3F38F0\n000000067F000040020000E00000007CC000-000000067F000040020000E00000007D0000__000000931B9A2710\n000000067F000040020000E00000007D0000-000000067F000040020000E00000007D4000__000000572A7A05D8\n000000067F000040020000E00000007D0000-000000067F000040020000E00000007D4000__0000005D2FFFFB38\n000000067F000040020000E00000007D0000-000000067F000040020000E00000007D4000__00000073AD3FE6B8\n000000067F000040020000E00000007D0000-000000067F000040020000E00000007D4000__000000914E3F38F0\n000000067F000040020000E00000007D0000-000000067F000040020000E00000007D4000__000000931B9A2710\n000000067F000040020000E00000007D2F02-000000067F000040020000E00000007DB8D5__00000048B365CD91-000000495313EB21\n000000067F000040020000E00000007D4000-000000067F000040020000E00000007D8000__000000572A7A05D8\n000000067F000040020000E00000007D4000-000000067F000040020000E00000007D8000__0000005D2FFFFB38\n000000067F000040020000E00000007D4000-000000067F000040020000E00000007D8000__00000073AD3FE6B8\n000000067F000040020000E00000007D4000-000000067F000040020000E00000007D8000__000000914E3F38F0\n000000067F000040020000E00000007D4000-000000067F000040020000E00000007D8000__000000931B9A2710\n000000067F000040020000E00000007D8000-000000067F000040020000E00000007DC000__000000572A7A05D8\n000000067F000040020000E00000007D8000-000000067F000040020000E00000007DC000__0000005D2FFFFB38\n000000067F000040020000E00000007D8000-000000067F000040020000E00000007DC000__00000073AD3FE6B8\n000000067F000040020000E00000007D8000-000000067F000040020000E00000007DC000__000000914E3F38F0\n000000067F000040020000E00000007D8000-000000067F000040020000E00000007DC000__000000931B9A2710\n000000067F000040020000E00000007DB8D5-000000067F000040020000E00000007E42BB__00000048B365CD91-000000495313EB21\n000000067F000040020000E00000007DC000-000000067F000040020000E00000007E0000__000000572A7A05D8\n000000067F000040020000E00000007DC000-000000067F000040020000E00000007E0000__0000005D2FFFFB38\n000000067F000040020000E00000007DC000-000000067F000040020000E00000007E0000__00000073AD3FE6B8\n000000067F000040020000E00000007DC000-000000067F000040020000E00000007E0000__000000914E3F38F0\n000000067F000040020000E00000007DC000-000000067F000040020000E00000007E0000__000000931B9A2710\n000000067F000040020000E00000007E0000-000000067F000040020000E00000007E4000__000000572A7A05D8\n000000067F000040020000E00000007E0000-000000067F000040020000E00000007E4000__0000005D2FFFFB38\n000000067F000040020000E00000007E0000-000000067F000040020000E00000007E4000__00000073AD3FE6B8\n000000067F000040020000E00000007E0000-000000067F000040020000E00000007E4000__000000914E3F38F0\n000000067F000040020000E00000007E0000-000000067F000040020000E00000007E4000__000000931B9A2710\n000000067F000040020000E00000007E4000-000000067F000040020000E00000007E8000__0000004A297FFC38\n000000067F000040020000E00000007E4000-000000067F000040020000E00000007E8000__0000005D2FFFFB38\n000000067F000040020000E00000007E4000-000000067F000040020000E00000007E8000__00000073AD3FE6B8\n000000067F000040020000E00000007E4000-000000067F000040020000E00000007E8000__000000914E3F38F0\n000000067F000040020000E00000007E4000-000000067F000040020000E00000007E8000__000000931B9A2710\n000000067F000040020000E00000007E42BB-000000067F000040020000E0000100000000__00000048B365CD91-000000495313EB21\n000000067F000040020000E00000007E458D-000000067F000040020000E00000007ECF68__000000495313EB21-0000004A02BBD6B1\n000000067F000040020000E00000007E8000-000000067F000040020000E00000007EC000__0000004A297FFC38\n000000067F000040020000E00000007E8000-000000067F000040020000E00000007EC000__0000005D2FFFFB38\n000000067F000040020000E00000007E8000-000000067F000040020000E00000007EC000__00000073AD3FE6B8\n000000067F000040020000E00000007E8000-000000067F000040020000E00000007EC000__000000914E3F38F0\n000000067F000040020000E00000007E8000-000000067F000040020000E00000007EC000__000000931B9A2710\n000000067F000040020000E00000007EC000-000000067F000040020000E00000007F0000__0000004A297FFC38\n000000067F000040020000E00000007EC000-000000067F000040020000E00000007F0000__0000005D2FFFFB38\n000000067F000040020000E00000007EC000-000000067F000040020000E00000007F0000__00000073AD3FE6B8\n000000067F000040020000E00000007EC000-000000067F000040020000E00000007F0000__000000914E3F38F0\n000000067F000040020000E00000007EC000-000000067F000040020000E00000007F0000__000000931B9A2710\n000000067F000040020000E00000007ECF68-000000067F000040020000E00000007F594B__000000495313EB21-0000004A02BBD6B1\n000000067F000040020000E00000007F0000-000000067F000040020000E00000007F4000__0000004A297FFC38\n000000067F000040020000E00000007F0000-000000067F000040020000E00000007F4000__0000005D2FFFFB38\n000000067F000040020000E00000007F0000-000000067F000040020000E00000007F4000__00000073AD3FE6B8\n000000067F000040020000E00000007F0000-000000067F000040020000E00000007F4000__000000914E3F38F0\n000000067F000040020000E00000007F0000-000000067F000040020000E00000007F4000__000000931B9A2710\n000000067F000040020000E00000007F4000-000000067F000040020000E00000007F8000__0000004A297FFC38\n000000067F000040020000E00000007F4000-000000067F000040020000E00000007F8000__0000005D2FFFFB38\n000000067F000040020000E00000007F4000-000000067F000040020000E00000007F8000__00000073AD3FE6B8\n000000067F000040020000E00000007F4000-000000067F000040020000E00000007F8000__000000914E3F38F0\n000000067F000040020000E00000007F4000-000000067F000040020000E00000007F8000__000000931B9A2710\n000000067F000040020000E00000007F594B-000000067F000040020000E00000007FE326__000000495313EB21-0000004A02BBD6B1\n000000067F000040020000E00000007F8000-000000067F000040020000E00000007FC000__0000004A297FFC38\n000000067F000040020000E00000007F8000-000000067F000040020000E00000007FC000__0000005D2FFFFB38\n000000067F000040020000E00000007F8000-000000067F000040020000E00000007FC000__00000073AD3FE6B8\n000000067F000040020000E00000007F8000-000000067F000040020000E00000007FC000__000000914E3F38F0\n000000067F000040020000E00000007F8000-000000067F000040020000E00000007FC000__000000931B9A2710\n000000067F000040020000E00000007FC000-000000067F000040020000E0000000800000__0000004A297FFC38\n000000067F000040020000E00000007FC000-000000067F000040020000E0000000800000__0000005D2FFFFB38\n000000067F000040020000E00000007FC000-000000067F000040020000E0000000800000__00000073AD3FE6B8\n000000067F000040020000E00000007FC000-000000067F000040020000E0000000800000__000000914E3F38F0\n000000067F000040020000E00000007FC000-000000067F000040020000E0000000800000__000000931B9A2710\n000000067F000040020000E00000007FE326-000000067F000040020000E0000000806CF5__000000495313EB21-0000004A02BBD6B1\n000000067F000040020000E0000000800000-000000067F000040020000E0000000804000__0000004A297FFC38\n000000067F000040020000E0000000800000-000000067F000040020000E0000000804000__0000005D2FFFFB38\n000000067F000040020000E0000000800000-000000067F000040020000E0000000804000__00000073AD3FE6B8\n000000067F000040020000E0000000800000-000000067F000040020000E0000000804000__000000914E3F38F0\n000000067F000040020000E0000000800000-000000067F000040020000E0000000804000__000000931B9A2710\n000000067F000040020000E0000000804000-000000067F000040020000E0000000808000__0000004A297FFC38\n000000067F000040020000E0000000804000-000000067F000040020000E0000000808000__0000005D2FFFFB38\n000000067F000040020000E0000000804000-000000067F000040020000E0000000808000__00000073AD3FE6B8\n000000067F000040020000E0000000804000-000000067F000040020000E0000000808000__000000914E3F38F0\n000000067F000040020000E0000000804000-000000067F000040020000E0000000808000__000000931B9A2710\n000000067F000040020000E0000000806CF5-000000067F000040020000E000000080F6D5__000000495313EB21-0000004A02BBD6B1\n000000067F000040020000E0000000808000-000000067F000040020000E000000080C000__0000004A297FFC38\n000000067F000040020000E0000000808000-000000067F000040020000E000000080C000__0000005D2FFFFB38\n000000067F000040020000E0000000808000-000000067F000040020000E000000080C000__00000073AD3FE6B8\n000000067F000040020000E0000000808000-000000067F000040020000E000000080C000__000000914E3F38F0\n000000067F000040020000E0000000808000-000000067F000040020000E000000080C000__000000931B9A2710\n000000067F000040020000E000000080C000-000000067F000040020000E0000000810000__0000004A297FFC38\n000000067F000040020000E000000080C000-000000067F000040020000E0000000810000__0000005D2FFFFB38\n000000067F000040020000E000000080C000-000000067F000040020000E0000000810000__00000073AD3FE6B8\n000000067F000040020000E000000080C000-000000067F000040020000E0000000810000__000000914E3F38F0\n000000067F000040020000E000000080C000-000000067F000040020000E0000000810000__000000931B9A2710\n000000067F000040020000E000000080F6D5-000000067F000040020000E00000008180B1__000000495313EB21-0000004A02BBD6B1\n000000067F000040020000E0000000810000-000000067F000040020000E0000000814000__0000004A297FFC38\n000000067F000040020000E0000000810000-000000067F000040020000E0000000814000__0000005D2FFFFB38\n000000067F000040020000E0000000810000-000000067F000040020000E0000000814000__00000073AD3FE6B8\n000000067F000040020000E0000000810000-000000067F000040020000E0000000814000__000000914E3F38F0\n000000067F000040020000E0000000810000-000000067F000040020000E0000000814000__000000931B9A2710\n000000067F000040020000E0000000814000-000000067F000040020000E0000000818000__0000004A297FFC38\n000000067F000040020000E0000000814000-000000067F000040020000E0000000818000__0000005D2FFFFB38\n000000067F000040020000E0000000814000-000000067F000040020000E0000000818000__00000073AD3FE6B8\n000000067F000040020000E0000000814000-000000067F000040020000E0000000818000__000000914E3F38F0\n000000067F000040020000E0000000814000-000000067F000040020000E0000000818000__000000931B9A2710\n000000067F000040020000E0000000818000-000000067F000040020000E000000081C000__0000004A297FFC38\n000000067F000040020000E0000000818000-000000067F000040020000E000000081C000__0000005D2FFFFB38\n000000067F000040020000E0000000818000-000000067F000040020000E000000081C000__00000073AD3FE6B8\n000000067F000040020000E0000000818000-000000067F000040020000E000000081C000__000000914E3F38F0\n000000067F000040020000E0000000818000-000000067F000040020000E000000081C000__000000931B9A2710\n000000067F000040020000E00000008180B1-000000067F000040020000E0000000820A9A__000000495313EB21-0000004A02BBD6B1\n000000067F000040020000E000000081C000-000000067F000040020000E0000000820000__0000004A297FFC38\n000000067F000040020000E000000081C000-000000067F000040020000E0000000820000__0000005D2FFFFB38\n000000067F000040020000E000000081C000-000000067F000040020000E0000000820000__00000073AD3FE6B8\n000000067F000040020000E000000081C000-000000067F000040020000E0000000820000__000000914E3F38F0\n000000067F000040020000E000000081C000-000000067F000040020000E0000000820000__000000931B9A2710\n000000067F000040020000E0000000820000-000000067F000040020000E0000000824000__0000004A297FFC38\n000000067F000040020000E0000000820000-000000067F000040020000E0000000824000__0000005D2FFFFB38\n000000067F000040020000E0000000820000-000000067F000040020000E0000000824000__00000073AD3FE6B8\n000000067F000040020000E0000000820000-000000067F000040020000E0000000824000__000000914E3F38F0\n000000067F000040020000E0000000820000-000000067F000040020000E0000000824000__000000931B9A2710\n000000067F000040020000E0000000820A9A-000000067F000040020000E000000082946F__000000495313EB21-0000004A02BBD6B1\n000000067F000040020000E0000000824000-000000067F000040020000E0000000828000__0000004A297FFC38\n000000067F000040020000E0000000824000-000000067F000040020000E0000000828000__0000005D2FFFFB38\n000000067F000040020000E0000000824000-000000067F000040020000E0000000828000__00000073AD3FE6B8\n000000067F000040020000E0000000824000-000000067F000040020000E0000000828000__000000914E3F38F0\n000000067F000040020000E0000000824000-000000067F000040020000E0000000828000__000000931B9A2710\n000000067F000040020000E0000000828000-000000067F000040020000E000000082C000__0000004A297FFC38\n000000067F000040020000E0000000828000-000000067F000040020000E000000082C000__0000005D2FFFFB38\n000000067F000040020000E0000000828000-000000067F000040020000E000000082C000__00000073AD3FE6B8\n000000067F000040020000E0000000828000-000000067F000040020000E000000082C000__000000914E3F38F0\n000000067F000040020000E0000000828000-000000067F000040020000E000000082C000__000000931B9A2710\n000000067F000040020000E000000082946F-000000067F000040020000E0000000831E53__000000495313EB21-0000004A02BBD6B1\n000000067F000040020000E000000082C000-000000067F000040020000E0000000830000__0000004A297FFC38\n000000067F000040020000E000000082C000-000000067F000040020000E0000000830000__0000005D2FFFFB38\n000000067F000040020000E000000082C000-000000067F000040020000E0000000830000__00000073AD3FE6B8\n000000067F000040020000E000000082C000-000000067F000040020000E0000000830000__000000914E3F38F0\n000000067F000040020000E000000082C000-000000067F000040020000E0000000830000__000000931B9A2710\n000000067F000040020000E0000000830000-000000067F000040020000E0000000834000__0000004A297FFC38\n000000067F000040020000E0000000830000-000000067F000040020000E0000000834000__0000005D2FFFFB38\n000000067F000040020000E0000000830000-000000067F000040020000E0000000834000__00000073AD3FE6B8\n000000067F000040020000E0000000830000-000000067F000040020000E0000000834000__000000914E3F38F0\n000000067F000040020000E0000000830000-000000067F000040020000E0000000834000__000000931B9A2710\n000000067F000040020000E0000000831E53-000000067F000040020000E000000083A834__000000495313EB21-0000004A02BBD6B1\n000000067F000040020000E0000000834000-000000067F000040020000E0000000838000__0000004A297FFC38\n000000067F000040020000E0000000834000-000000067F000040020000E0000000838000__0000005D2FFFFB38\n000000067F000040020000E0000000834000-000000067F000040020000E0000000838000__00000073AD3FE6B8\n000000067F000040020000E0000000834000-000000067F000040020000E0000000838000__000000914E3F38F0\n000000067F000040020000E0000000834000-000000067F000040020000E0000000838000__000000931B9A2710\n000000067F000040020000E0000000838000-000000067F000040020000E000000083C000__0000004A297FFC38\n000000067F000040020000E0000000838000-000000067F000040020000E000000083C000__0000005D2FFFFB38\n000000067F000040020000E0000000838000-000000067F000040020000E000000083C000__00000073AD3FE6B8\n000000067F000040020000E0000000838000-000000067F000040020000E000000083C000__000000914E3F38F0\n000000067F000040020000E0000000838000-000000067F000040020000E000000083C000__000000931B9A2710\n000000067F000040020000E000000083A834-000000067F000040020000E0000000843201__000000495313EB21-0000004A02BBD6B1\n000000067F000040020000E000000083C000-000000067F000040020000E0000000840000__0000004A297FFC38\n000000067F000040020000E000000083C000-000000067F000040020000E0000000840000__0000005D2FFFFB38\n000000067F000040020000E000000083C000-000000067F000040020000E0000000840000__00000073AD3FE6B8\n000000067F000040020000E000000083C000-000000067F000040020000E0000000840000__000000914E3F38F0\n000000067F000040020000E000000083C000-000000067F000040020000E0000000840000__000000931B9A2710\n000000067F000040020000E0000000840000-000000067F000040020000E0000000844000__0000004A297FFC38\n000000067F000040020000E0000000840000-000000067F000040020000E0000000844000__000000574B7FF240\n000000067F000040020000E0000000840000-000000067F000040020000E0000000844000__00000073AD3FE6B8\n000000067F000040020000E0000000840000-000000067F000040020000E0000000844000__000000914E3F38F0\n000000067F000040020000E0000000840000-000000067F000040020000E0000000844000__000000931B9A2710\n000000067F000040020000E0000000843201-000000067F000040020000E0000100000000__000000495313EB21-0000004A02BBD6B1\n000000067F000040020000E0000000843529-000000067F000040020000E000000084BF01__0000004A02BBD6B1-0000004AA26BDB49\n000000067F000040020000E0000000844000-000000067F000040020000E0000000848000__0000004A297FFC38\n000000067F000040020000E0000000844000-000000067F000040020000E0000000848000__000000574B7FF240\n000000067F000040020000E0000000844000-000000067F000040020000E0000000848000__00000073AD3FE6B8\n000000067F000040020000E0000000844000-000000067F000040020000E0000000848000__000000914E3F38F0\n000000067F000040020000E0000000844000-000000067F000040020000E0000000848000__000000931B9A2710\n000000067F000040020000E0000000848000-000000067F000040020000E000000084C000__0000004A297FFC38\n000000067F000040020000E0000000848000-000000067F000040020000E000000084C000__000000574B7FF240\n000000067F000040020000E0000000848000-000000067F000040020000E000000084C000__00000073AD3FE6B8\n000000067F000040020000E0000000848000-000000067F000040020000E000000084C000__000000914E3F38F0\n000000067F000040020000E0000000848000-000000067F000040020000E000000084C000__000000931B9A2710\n000000067F000040020000E000000084BF01-000000067F000040020000E00000008548D9__0000004A02BBD6B1-0000004AA26BDB49\n000000067F000040020000E000000084C000-000000067F000040020000E0000000850000__0000004A297FFC38\n000000067F000040020000E000000084C000-000000067F000040020000E0000000850000__000000574B7FF240\n000000067F000040020000E000000084C000-000000067F000040020000E0000000850000__00000073AD3FE6B8\n000000067F000040020000E000000084C000-000000067F000040020000E0000000850000__000000914E3F38F0\n000000067F000040020000E000000084C000-000000067F000040020000E0000000850000__000000931B9A2710\n000000067F000040020000E0000000850000-000000067F000040020000E0000000854000__0000004A297FFC38\n000000067F000040020000E0000000850000-000000067F000040020000E0000000854000__000000574B7FF240\n000000067F000040020000E0000000850000-000000067F000040020000E0000000854000__00000073AD3FE6B8\n000000067F000040020000E0000000850000-000000067F000040020000E0000000854000__000000914E3F38F0\n000000067F000040020000E0000000850000-000000067F000040020000E0000000854000__000000931B9A2710\n000000067F000040020000E0000000854000-000000067F000040020000E0000000858000__0000004A297FFC38\n000000067F000040020000E0000000854000-000000067F000040020000E0000000858000__000000574B7FF240\n000000067F000040020000E0000000854000-000000067F000040020000E0000000858000__00000073AD3FE6B8\n000000067F000040020000E0000000854000-000000067F000040020000E0000000858000__000000914E3F38F0\n000000067F000040020000E0000000854000-000000067F000040020000E0000000858000__000000931B9A2710\n000000067F000040020000E00000008548D9-000000067F000040020000E000000085D2CA__0000004A02BBD6B1-0000004AA26BDB49\n000000067F000040020000E0000000858000-000000067F000040020000E000000085C000__000000574B7FF240\n000000067F000040020000E0000000858000-000000067F000040020000E000000085C000__00000073AD3FE6B8\n000000067F000040020000E0000000858000-000000067F000040020000E000000085C000__000000914E3F38F0\n000000067F000040020000E0000000858000-000000067F000040020000E000000085C000__000000931B9A2710\n000000067F000040020000E0000000858000-030000000000000000000000000000000002__0000004A297FFC38\n000000067F000040020000E000000085C000-000000067F000040020000E0000000860000__000000574B7FF240\n000000067F000040020000E000000085C000-000000067F000040020000E0000000860000__00000073AD3FE6B8\n000000067F000040020000E000000085C000-000000067F000040020000E0000000860000__000000914E3F38F0\n000000067F000040020000E000000085C000-000000067F000040020000E0000000860000__000000931B9A2710\n000000067F000040020000E000000085D2CA-000000067F000040020000E0000000865CB1__0000004A02BBD6B1-0000004AA26BDB49\n000000067F000040020000E0000000860000-000000067F000040020000E0000000864000__000000574B7FF240\n000000067F000040020000E0000000860000-000000067F000040020000E0000000864000__00000073AD3FE6B8\n000000067F000040020000E0000000860000-000000067F000040020000E0000000864000__000000914E3F38F0\n000000067F000040020000E0000000860000-000000067F000040020000E0000000864000__000000931B9A2710\n000000067F000040020000E0000000864000-000000067F000040020000E0000000868000__000000574B7FF240\n000000067F000040020000E0000000864000-000000067F000040020000E0000000868000__00000073AD3FE6B8\n000000067F000040020000E0000000864000-000000067F000040020000E0000000868000__000000914E3F38F0\n000000067F000040020000E0000000864000-000000067F000040020000E0000000868000__000000931B9A2710\n000000067F000040020000E0000000865CB1-000000067F000040020000E000000086E688__0000004A02BBD6B1-0000004AA26BDB49\n000000067F000040020000E0000000868000-000000067F000040020000E000000086C000__000000574B7FF240\n000000067F000040020000E0000000868000-000000067F000040020000E000000086C000__00000073AD3FE6B8\n000000067F000040020000E0000000868000-000000067F000040020000E000000086C000__000000914E3F38F0\n000000067F000040020000E0000000868000-000000067F000040020000E000000086C000__000000931B9A2710\n000000067F000040020000E000000086C000-000000067F000040020000E0000000870000__000000574B7FF240\n000000067F000040020000E000000086C000-000000067F000040020000E0000000870000__00000073AD3FE6B8\n000000067F000040020000E000000086C000-000000067F000040020000E0000000870000__000000914E3F38F0\n000000067F000040020000E000000086C000-000000067F000040020000E0000000870000__000000931B9A2710\n000000067F000040020000E000000086E688-000000067F000040020000E0000000877067__0000004A02BBD6B1-0000004AA26BDB49\n000000067F000040020000E0000000870000-000000067F000040020000E0000000874000__000000574B7FF240\n000000067F000040020000E0000000870000-000000067F000040020000E0000000874000__00000073AD3FE6B8\n000000067F000040020000E0000000870000-000000067F000040020000E0000000874000__000000914E3F38F0\n000000067F000040020000E0000000870000-000000067F000040020000E0000000874000__000000931B9A2710\n000000067F000040020000E0000000874000-000000067F000040020000E0000000878000__000000574B7FF240\n000000067F000040020000E0000000874000-000000067F000040020000E0000000878000__00000073AD3FE6B8\n000000067F000040020000E0000000874000-000000067F000040020000E0000000878000__000000914E3F38F0\n000000067F000040020000E0000000874000-000000067F000040020000E0000000878000__000000931B9A2710\n000000067F000040020000E0000000877067-000000067F000040020000E000000087FA40__0000004A02BBD6B1-0000004AA26BDB49\n000000067F000040020000E0000000878000-000000067F000040020000E000000087C000__000000574B7FF240\n000000067F000040020000E0000000878000-000000067F000040020000E000000087C000__00000073AD3FE6B8\n000000067F000040020000E0000000878000-000000067F000040020000E000000087C000__000000914E3F38F0\n000000067F000040020000E0000000878000-000000067F000040020000E000000087C000__000000931B9A2710\n000000067F000040020000E000000087C000-000000067F000040020000E0000000880000__000000574B7FF240\n000000067F000040020000E000000087C000-000000067F000040020000E0000000880000__00000073AD3FE6B8\n000000067F000040020000E000000087C000-000000067F000040020000E0000000880000__000000914E3F38F0\n000000067F000040020000E000000087C000-000000067F000040020000E0000000880000__000000931B9A2710\n000000067F000040020000E000000087FA40-000000067F000040020000E0000000888413__0000004A02BBD6B1-0000004AA26BDB49\n000000067F000040020000E0000000880000-000000067F000040020000E0000000884000__000000574B7FF240\n000000067F000040020000E0000000880000-000000067F000040020000E0000000884000__00000073AD3FE6B8\n000000067F000040020000E0000000880000-000000067F000040020000E0000000884000__000000914E3F38F0\n000000067F000040020000E0000000880000-000000067F000040020000E0000000884000__000000931B9A2710\n000000067F000040020000E0000000884000-000000067F000040020000E0000000888000__000000574B7FF240\n000000067F000040020000E0000000884000-000000067F000040020000E0000000888000__00000073AD3FE6B8\n000000067F000040020000E0000000884000-000000067F000040020000E0000000888000__000000914E3F38F0\n000000067F000040020000E0000000884000-000000067F000040020000E0000000888000__000000931B9A2710\n000000067F000040020000E0000000888000-000000067F000040020000E000000088C000__000000574B7FF240\n000000067F000040020000E0000000888000-000000067F000040020000E000000088C000__00000073AD3FE6B8\n000000067F000040020000E0000000888000-000000067F000040020000E000000088C000__000000914E3F38F0\n000000067F000040020000E0000000888000-000000067F000040020000E000000088C000__000000931B9A2710\n000000067F000040020000E0000000888413-000000067F000040020000E0000000890DE6__0000004A02BBD6B1-0000004AA26BDB49\n000000067F000040020000E000000088C000-000000067F000040020000E0000000890000__000000574B7FF240\n000000067F000040020000E000000088C000-000000067F000040020000E0000000890000__00000073AD3FE6B8\n000000067F000040020000E000000088C000-000000067F000040020000E0000000890000__000000914E3F38F0\n000000067F000040020000E000000088C000-000000067F000040020000E0000000890000__000000931B9A2710\n000000067F000040020000E0000000890000-000000067F000040020000E0000000894000__000000574B7FF240\n000000067F000040020000E0000000890000-000000067F000040020000E0000000894000__00000073AD3FE6B8\n000000067F000040020000E0000000890000-000000067F000040020000E0000000894000__000000914E3F38F0\n000000067F000040020000E0000000890000-000000067F000040020000E0000000894000__000000931B9A2710\n000000067F000040020000E0000000890DE6-000000067F000040020000E00000008997D0__0000004A02BBD6B1-0000004AA26BDB49\n000000067F000040020000E0000000894000-000000067F000040020000E0000000898000__000000574B7FF240\n000000067F000040020000E0000000894000-000000067F000040020000E0000000898000__00000073AD3FE6B8\n000000067F000040020000E0000000894000-000000067F000040020000E0000000898000__000000914E3F38F0\n000000067F000040020000E0000000894000-000000067F000040020000E0000000898000__000000931B9A2710\n000000067F000040020000E0000000898000-000000067F000040020000E000000089C000__000000572A7A05D8\n000000067F000040020000E0000000898000-000000067F000040020000E000000089C000__0000005D2FFFFB38\n000000067F000040020000E0000000898000-000000067F000040020000E000000089C000__00000073AD3FE6B8\n000000067F000040020000E0000000898000-000000067F000040020000E000000089C000__000000914E3F38F0\n000000067F000040020000E0000000898000-000000067F000040020000E000000089C000__000000931B9A2710\n000000067F000040020000E00000008997D0-000000067F000040020000E0000100000000__0000004A02BBD6B1-0000004AA26BDB49\n000000067F000040020000E0000000899AAA-000000067F000040020000E00000008A248D__0000004AA26BDB49-0000004B421BFF39\n000000067F000040020000E000000089C000-000000067F000040020000E00000008A0000__000000572A7A05D8\n000000067F000040020000E000000089C000-000000067F000040020000E00000008A0000__0000005D2FFFFB38\n000000067F000040020000E000000089C000-000000067F000040020000E00000008A0000__00000073AD3FE6B8\n000000067F000040020000E000000089C000-000000067F000040020000E00000008A0000__000000914E3F38F0\n000000067F000040020000E000000089C000-000000067F000040020000E00000008A0000__000000931B9A2710\n000000067F000040020000E00000008A0000-000000067F000040020000E00000008A4000__000000572A7A05D8\n000000067F000040020000E00000008A0000-000000067F000040020000E00000008A4000__0000005D2FFFFB38\n000000067F000040020000E00000008A0000-000000067F000040020000E00000008A4000__00000073AD3FE6B8\n000000067F000040020000E00000008A0000-000000067F000040020000E00000008A4000__000000914E3F38F0\n000000067F000040020000E00000008A0000-000000067F000040020000E00000008A4000__000000931B9A2710\n000000067F000040020000E00000008A248D-000000067F000040020000E00000008AAE5E__0000004AA26BDB49-0000004B421BFF39\n000000067F000040020000E00000008A4000-000000067F000040020000E00000008A8000__000000572A7A05D8\n000000067F000040020000E00000008A4000-000000067F000040020000E00000008A8000__0000005D2FFFFB38\n000000067F000040020000E00000008A4000-000000067F000040020000E00000008A8000__00000073AD3FE6B8\n000000067F000040020000E00000008A4000-000000067F000040020000E00000008A8000__000000914E3F38F0\n000000067F000040020000E00000008A4000-000000067F000040020000E00000008A8000__000000931B9A2710\n000000067F000040020000E00000008A8000-000000067F000040020000E00000008AC000__000000572A7A05D8\n000000067F000040020000E00000008A8000-000000067F000040020000E00000008AC000__0000005D2FFFFB38\n000000067F000040020000E00000008A8000-000000067F000040020000E00000008AC000__00000073AD3FE6B8\n000000067F000040020000E00000008A8000-000000067F000040020000E00000008AC000__000000914E3F38F0\n000000067F000040020000E00000008A8000-000000067F000040020000E00000008AC000__000000931B9A2710\n000000067F000040020000E00000008AAE5E-000000067F000040020000E00000008B383C__0000004AA26BDB49-0000004B421BFF39\n000000067F000040020000E00000008AC000-000000067F000040020000E00000008B0000__000000572A7A05D8\n000000067F000040020000E00000008AC000-000000067F000040020000E00000008B0000__0000005D2FFFFB38\n000000067F000040020000E00000008AC000-000000067F000040020000E00000008B0000__00000073AD3FE6B8\n000000067F000040020000E00000008AC000-000000067F000040020000E00000008B0000__000000914E3F38F0\n000000067F000040020000E00000008AC000-000000067F000040020000E00000008B0000__000000931B9A2710\n000000067F000040020000E00000008B0000-000000067F000040020000E00000008B4000__000000572A7A05D8\n000000067F000040020000E00000008B0000-000000067F000040020000E00000008B4000__0000005D2FFFFB38\n000000067F000040020000E00000008B0000-000000067F000040020000E00000008B4000__00000073AD3FE6B8\n000000067F000040020000E00000008B0000-000000067F000040020000E00000008B4000__000000914E3F38F0\n000000067F000040020000E00000008B0000-000000067F000040020000E00000008B4000__000000931B9A2710\n000000067F000040020000E00000008B383C-000000067F000040020000E00000008BC219__0000004AA26BDB49-0000004B421BFF39\n000000067F000040020000E00000008B4000-000000067F000040020000E00000008B8000__000000572A7A05D8\n000000067F000040020000E00000008B4000-000000067F000040020000E00000008B8000__0000005D2FFFFB38\n000000067F000040020000E00000008B4000-000000067F000040020000E00000008B8000__00000073AD3FE6B8\n000000067F000040020000E00000008B4000-000000067F000040020000E00000008B8000__000000914E3F38F0\n000000067F000040020000E00000008B4000-000000067F000040020000E00000008B8000__000000931B9A2710\n000000067F000040020000E00000008B8000-000000067F000040020000E00000008BC000__000000572A7A05D8\n000000067F000040020000E00000008B8000-000000067F000040020000E00000008BC000__0000005D2FFFFB38\n000000067F000040020000E00000008B8000-000000067F000040020000E00000008BC000__00000073AD3FE6B8\n000000067F000040020000E00000008B8000-000000067F000040020000E00000008BC000__000000914E3F38F0\n000000067F000040020000E00000008B8000-000000067F000040020000E00000008BC000__000000931B9A2710\n000000067F000040020000E00000008BC000-000000067F000040020000E00000008C0000__000000572A7A05D8\n000000067F000040020000E00000008BC000-000000067F000040020000E00000008C0000__0000005D2FFFFB38\n000000067F000040020000E00000008BC000-000000067F000040020000E00000008C0000__00000073AD3FE6B8\n000000067F000040020000E00000008BC000-000000067F000040020000E00000008C0000__000000914E3F38F0\n000000067F000040020000E00000008BC000-000000067F000040020000E00000008C0000__000000931B9A2710\n000000067F000040020000E00000008BC219-000000067F000040020000E00000008C4BE6__0000004AA26BDB49-0000004B421BFF39\n000000067F000040020000E00000008C0000-000000067F000040020000E00000008C4000__000000572A7A05D8\n000000067F000040020000E00000008C0000-000000067F000040020000E00000008C4000__0000005D2FFFFB38\n000000067F000040020000E00000008C0000-000000067F000040020000E00000008C4000__00000073AD3FE6B8\n000000067F000040020000E00000008C0000-000000067F000040020000E00000008C4000__000000914E3F38F0\n000000067F000040020000E00000008C0000-000000067F000040020000E00000008C4000__000000931B9A2710\n000000067F000040020000E00000008C4000-000000067F000040020000E00000008C8000__000000572A7A05D8\n000000067F000040020000E00000008C4000-000000067F000040020000E00000008C8000__0000005D2FFFFB38\n000000067F000040020000E00000008C4000-000000067F000040020000E00000008C8000__00000073AD3FE6B8\n000000067F000040020000E00000008C4000-000000067F000040020000E00000008C8000__000000914E3F38F0\n000000067F000040020000E00000008C4000-000000067F000040020000E00000008C8000__000000931B9A2710\n000000067F000040020000E00000008C4BE6-000000067F000040020000E00000008CD5D3__0000004AA26BDB49-0000004B421BFF39\n000000067F000040020000E00000008C8000-000000067F000040020000E00000008CC000__000000572A7A05D8\n000000067F000040020000E00000008C8000-000000067F000040020000E00000008CC000__0000005D2FFFFB38\n000000067F000040020000E00000008C8000-000000067F000040020000E00000008CC000__00000073AD3FE6B8\n000000067F000040020000E00000008C8000-000000067F000040020000E00000008CC000__000000914E3F38F0\n000000067F000040020000E00000008C8000-000000067F000040020000E00000008CC000__000000931B9A2710\n000000067F000040020000E00000008CC000-000000067F000040020000E00000008D0000__000000572A7A05D8\n000000067F000040020000E00000008CC000-000000067F000040020000E00000008D0000__0000005D2FFFFB38\n000000067F000040020000E00000008CC000-000000067F000040020000E00000008D0000__00000073AD3FE6B8\n000000067F000040020000E00000008CC000-000000067F000040020000E00000008D0000__000000914E3F38F0\n000000067F000040020000E00000008CC000-000000067F000040020000E00000008D0000__000000931B9A2710\n000000067F000040020000E00000008CD5D3-000000067F000040020000E00000008D5FBE__0000004AA26BDB49-0000004B421BFF39\n000000067F000040020000E00000008D0000-000000067F000040020000E00000008D4000__000000572A7A05D8\n000000067F000040020000E00000008D0000-000000067F000040020000E00000008D4000__0000005D2FFFFB38\n000000067F000040020000E00000008D0000-000000067F000040020000E00000008D4000__00000073AD3FE6B8\n000000067F000040020000E00000008D0000-000000067F000040020000E00000008D4000__000000914E3F38F0\n000000067F000040020000E00000008D0000-000000067F000040020000E00000008D4000__000000931B9A2710\n000000067F000040020000E00000008D4000-000000067F000040020000E00000008D8000__000000572A7A05D8\n000000067F000040020000E00000008D4000-000000067F000040020000E00000008D8000__0000005D2FFFFB38\n000000067F000040020000E00000008D4000-000000067F000040020000E00000008D8000__00000073AD3FE6B8\n000000067F000040020000E00000008D4000-000000067F000040020000E00000008D8000__000000914E3F38F0\n000000067F000040020000E00000008D4000-000000067F000040020000E00000008D8000__000000931B9A2710\n000000067F000040020000E00000008D5FBE-000000067F000040020000E00000008DE9A8__0000004AA26BDB49-0000004B421BFF39\n000000067F000040020000E00000008D8000-000000067F000040020000E00000008DC000__000000572A7A05D8\n000000067F000040020000E00000008D8000-000000067F000040020000E00000008DC000__0000005D2FFFFB38\n000000067F000040020000E00000008D8000-000000067F000040020000E00000008DC000__00000073AD3FE6B8\n000000067F000040020000E00000008D8000-000000067F000040020000E00000008DC000__000000914E3F38F0\n000000067F000040020000E00000008D8000-000000067F000040020000E00000008DC000__000000931B9A2710\n000000067F000040020000E00000008DC000-000000067F000040020000E00000008E0000__000000572A7A05D8\n000000067F000040020000E00000008DC000-000000067F000040020000E00000008E0000__0000005D2FFFFB38\n000000067F000040020000E00000008DC000-000000067F000040020000E00000008E0000__00000073AD3FE6B8\n000000067F000040020000E00000008DC000-000000067F000040020000E00000008E0000__000000914E3F38F0\n000000067F000040020000E00000008DC000-000000067F000040020000E00000008E0000__000000931B9A2710\n000000067F000040020000E00000008DE9A8-000000067F000040020000E00000008E737A__0000004AA26BDB49-0000004B421BFF39\n000000067F000040020000E00000008E0000-000000067F000040020000E00000008E4000__000000572A7A05D8\n000000067F000040020000E00000008E0000-000000067F000040020000E00000008E4000__0000005D2FFFFB38\n000000067F000040020000E00000008E0000-000000067F000040020000E00000008E4000__00000073AD3FE6B8\n000000067F000040020000E00000008E0000-000000067F000040020000E00000008E4000__000000914E3F38F0\n000000067F000040020000E00000008E0000-000000067F000040020000E00000008E4000__000000931B9A2710\n000000067F000040020000E00000008E4000-000000067F000040020000E00000008E8000__000000572A7A05D8\n000000067F000040020000E00000008E4000-000000067F000040020000E00000008E8000__0000005D2FFFFB38\n000000067F000040020000E00000008E4000-000000067F000040020000E00000008E8000__00000073AD3FE6B8\n000000067F000040020000E00000008E4000-000000067F000040020000E00000008E8000__000000914E3F38F0\n000000067F000040020000E00000008E4000-000000067F000040020000E00000008E8000__000000931B9A2710\n000000067F000040020000E00000008E737A-000000067F000040020000E00000008EFD57__0000004AA26BDB49-0000004B421BFF39\n000000067F000040020000E00000008E8000-000000067F000040020000E00000008EC000__000000572A7A05D8\n000000067F000040020000E00000008E8000-000000067F000040020000E00000008EC000__0000005D2FFFFB38\n000000067F000040020000E00000008E8000-000000067F000040020000E00000008EC000__00000073AD3FE6B8\n000000067F000040020000E00000008E8000-000000067F000040020000E00000008EC000__000000914E3F38F0\n000000067F000040020000E00000008E8000-000000067F000040020000E00000008EC000__000000931B9A2710\n000000067F000040020000E00000008EC000-000000067F000040020000E00000008F0000__000000572A7A05D8\n000000067F000040020000E00000008EC000-000000067F000040020000E00000008F0000__0000005D2FFFFB38\n000000067F000040020000E00000008EC000-000000067F000040020000E00000008F0000__00000073AD3FE6B8\n000000067F000040020000E00000008EC000-000000067F000040020000E00000008F0000__000000914E3F38F0\n000000067F000040020000E00000008EC000-000000067F000040020000E00000008F0000__000000931B9A2710\n000000067F000040020000E00000008EFD57-000000067F000040020000E0000100000000__0000004AA26BDB49-0000004B421BFF39\n000000067F000040020000E00000008F0000-000000067F000040020000E00000008F4000__0000004C0EBFF260\n000000067F000040020000E00000008F0000-000000067F000040020000E00000008F4000__0000005D2FFFFB38\n000000067F000040020000E00000008F0000-000000067F000040020000E00000008F4000__00000073AD3FE6B8\n000000067F000040020000E00000008F0000-000000067F000040020000E00000008F4000__000000914E3F38F0\n000000067F000040020000E00000008F0000-000000067F000040020000E00000008F4000__000000931B9A2710\n000000067F000040020000E00000008F0021-000000067F000040020000E00000008F89FC__0000004B421BFF39-0000004BE1CBD591\n000000067F000040020000E00000008F4000-000000067F000040020000E00000008F8000__0000004C0EBFF260\n000000067F000040020000E00000008F4000-000000067F000040020000E00000008F8000__0000005D2FFFFB38\n000000067F000040020000E00000008F4000-000000067F000040020000E00000008F8000__00000073AD3FE6B8\n000000067F000040020000E00000008F4000-000000067F000040020000E00000008F8000__000000914E3F38F0\n000000067F000040020000E00000008F4000-000000067F000040020000E00000008F8000__000000931B9A2710\n000000067F000040020000E00000008F8000-000000067F000040020000E00000008FC000__0000004C0EBFF260\n000000067F000040020000E00000008F8000-000000067F000040020000E00000008FC000__0000005D2FFFFB38\n000000067F000040020000E00000008F8000-000000067F000040020000E00000008FC000__00000073AD3FE6B8\n000000067F000040020000E00000008F8000-000000067F000040020000E00000008FC000__000000914E3F38F0\n000000067F000040020000E00000008F8000-000000067F000040020000E00000008FC000__000000931B9A2710\n000000067F000040020000E00000008F89FC-000000067F000040020000E00000009013D5__0000004B421BFF39-0000004BE1CBD591\n000000067F000040020000E00000008FC000-000000067F000040020000E0000000900000__0000004C0EBFF260\n000000067F000040020000E00000008FC000-000000067F000040020000E0000000900000__0000005D2FFFFB38\n000000067F000040020000E00000008FC000-000000067F000040020000E0000000900000__00000073AD3FE6B8\n000000067F000040020000E00000008FC000-000000067F000040020000E0000000900000__000000914E3F38F0\n000000067F000040020000E00000008FC000-000000067F000040020000E0000000900000__000000931B9A2710\n000000067F000040020000E0000000900000-000000067F000040020000E0000000904000__0000004C0EBFF260\n000000067F000040020000E0000000900000-000000067F000040020000E0000000904000__0000005D2FFFFB38\n000000067F000040020000E0000000900000-000000067F000040020000E0000000904000__00000073AD3FE6B8\n000000067F000040020000E0000000900000-000000067F000040020000E0000000904000__000000914E3F38F0\n000000067F000040020000E0000000900000-000000067F000040020000E0000000904000__000000931B9A2710\n000000067F000040020000E00000009013D5-000000067F000040020000E0000000909DC8__0000004B421BFF39-0000004BE1CBD591\n000000067F000040020000E0000000904000-000000067F000040020000E0000000908000__0000004C0EBFF260\n000000067F000040020000E0000000904000-000000067F000040020000E0000000908000__0000005D2FFFFB38\n000000067F000040020000E0000000904000-000000067F000040020000E0000000908000__00000073AD3FE6B8\n000000067F000040020000E0000000904000-000000067F000040020000E0000000908000__000000914E3F38F0\n000000067F000040020000E0000000904000-000000067F000040020000E0000000908000__000000931B9A2710\n000000067F000040020000E0000000908000-000000067F000040020000E000000090C000__0000004C0EBFF260\n000000067F000040020000E0000000908000-000000067F000040020000E000000090C000__0000005D2FFFFB38\n000000067F000040020000E0000000908000-000000067F000040020000E000000090C000__00000073AD3FE6B8\n000000067F000040020000E0000000908000-000000067F000040020000E000000090C000__000000914E3F38F0\n000000067F000040020000E0000000908000-000000067F000040020000E000000090C000__000000931B9A2710\n000000067F000040020000E0000000909DC8-000000067F000040020000E00000009127AB__0000004B421BFF39-0000004BE1CBD591\n000000067F000040020000E000000090C000-000000067F000040020000E0000000910000__0000004C0EBFF260\n000000067F000040020000E000000090C000-000000067F000040020000E0000000910000__0000005D2FFFFB38\n000000067F000040020000E000000090C000-000000067F000040020000E0000000910000__00000073AD3FE6B8\n000000067F000040020000E000000090C000-000000067F000040020000E0000000910000__000000914E3F38F0\n000000067F000040020000E000000090C000-000000067F000040020000E0000000910000__000000931B9A2710\n000000067F000040020000E0000000910000-000000067F000040020000E0000000914000__0000004C0EBFF260\n000000067F000040020000E0000000910000-000000067F000040020000E0000000914000__0000005D2FFFFB38\n000000067F000040020000E0000000910000-000000067F000040020000E0000000914000__00000073AD3FE6B8\n000000067F000040020000E0000000910000-000000067F000040020000E0000000914000__000000914E3F38F0\n000000067F000040020000E0000000910000-000000067F000040020000E0000000914000__000000931B9A2710\n000000067F000040020000E00000009127AB-000000067F000040020000E000000091B185__0000004B421BFF39-0000004BE1CBD591\n000000067F000040020000E0000000914000-000000067F000040020000E0000000918000__0000004C0EBFF260\n000000067F000040020000E0000000914000-000000067F000040020000E0000000918000__0000005D2FFFFB38\n000000067F000040020000E0000000914000-000000067F000040020000E0000000918000__00000073AD3FE6B8\n000000067F000040020000E0000000914000-000000067F000040020000E0000000918000__000000914E3F38F0\n000000067F000040020000E0000000914000-000000067F000040020000E0000000918000__000000931B9A2710\n000000067F000040020000E0000000918000-000000067F000040020000E000000091C000__0000004C0EBFF260\n000000067F000040020000E0000000918000-000000067F000040020000E000000091C000__0000005D2FFFFB38\n000000067F000040020000E0000000918000-000000067F000040020000E000000091C000__00000073AD3FE6B8\n000000067F000040020000E0000000918000-000000067F000040020000E000000091C000__000000914E3F38F0\n000000067F000040020000E0000000918000-000000067F000040020000E000000091C000__000000931B9A2710\n000000067F000040020000E000000091B185-000000067F000040020000E0000000923B55__0000004B421BFF39-0000004BE1CBD591\n000000067F000040020000E000000091C000-000000067F000040020000E0000000920000__0000004C0EBFF260\n000000067F000040020000E000000091C000-000000067F000040020000E0000000920000__0000005D2FFFFB38\n000000067F000040020000E000000091C000-000000067F000040020000E0000000920000__00000073AD3FE6B8\n000000067F000040020000E000000091C000-000000067F000040020000E0000000920000__000000914E3F38F0\n000000067F000040020000E000000091C000-000000067F000040020000E0000000920000__000000931B9A2710\n000000067F000040020000E0000000920000-000000067F000040020000E0000000924000__0000004C0EBFF260\n000000067F000040020000E0000000920000-000000067F000040020000E0000000924000__0000005D2FFFFB38\n000000067F000040020000E0000000920000-000000067F000040020000E0000000924000__00000073AD3FE6B8\n000000067F000040020000E0000000920000-000000067F000040020000E0000000924000__000000914E3F38F0\n000000067F000040020000E0000000920000-000000067F000040020000E0000000924000__000000931B9A2710\n000000067F000040020000E0000000923B55-000000067F000040020000E000000092C536__0000004B421BFF39-0000004BE1CBD591\n000000067F000040020000E0000000924000-000000067F000040020000E0000000928000__0000004C0EBFF260\n000000067F000040020000E0000000924000-000000067F000040020000E0000000928000__0000005D2FFFFB38\n000000067F000040020000E0000000924000-000000067F000040020000E0000000928000__00000073AD3FE6B8\n000000067F000040020000E0000000924000-000000067F000040020000E0000000928000__000000914E3F38F0\n000000067F000040020000E0000000924000-000000067F000040020000E0000000928000__000000931B9A2710\n000000067F000040020000E0000000928000-000000067F000040020000E000000092C000__0000004C0EBFF260\n000000067F000040020000E0000000928000-000000067F000040020000E000000092C000__0000005D2FFFFB38\n000000067F000040020000E0000000928000-000000067F000040020000E000000092C000__00000073AD3FE6B8\n000000067F000040020000E0000000928000-000000067F000040020000E000000092C000__000000914E3F38F0\n000000067F000040020000E0000000928000-000000067F000040020000E000000092C000__000000931B9A2710\n000000067F000040020000E000000092C000-000000067F000040020000E0000000930000__0000004C0EBFF260\n000000067F000040020000E000000092C000-000000067F000040020000E0000000930000__0000005D2FFFFB38\n000000067F000040020000E000000092C000-000000067F000040020000E0000000930000__00000073AD3FE6B8\n000000067F000040020000E000000092C000-000000067F000040020000E0000000930000__000000914E3F38F0\n000000067F000040020000E000000092C000-000000067F000040020000E0000000930000__000000931B9A2710\n000000067F000040020000E000000092C536-000000067F000040020000E0000000934F0F__0000004B421BFF39-0000004BE1CBD591\n000000067F000040020000E0000000930000-000000067F000040020000E0000000934000__0000004C0EBFF260\n000000067F000040020000E0000000930000-000000067F000040020000E0000000934000__0000005D2FFFFB38\n000000067F000040020000E0000000930000-000000067F000040020000E0000000934000__00000073AD3FE6B8\n000000067F000040020000E0000000930000-000000067F000040020000E0000000934000__000000914E3F38F0\n000000067F000040020000E0000000930000-000000067F000040020000E0000000934000__000000931B9A2710\n000000067F000040020000E0000000934000-000000067F000040020000E0000000938000__0000004C0EBFF260\n000000067F000040020000E0000000934000-000000067F000040020000E0000000938000__0000005D2FFFFB38\n000000067F000040020000E0000000934000-000000067F000040020000E0000000938000__00000073AD3FE6B8\n000000067F000040020000E0000000934000-000000067F000040020000E0000000938000__000000914E3F38F0\n000000067F000040020000E0000000934000-000000067F000040020000E0000000938000__000000931B9A2710\n000000067F000040020000E0000000934F0F-000000067F000040020000E000000093D8E2__0000004B421BFF39-0000004BE1CBD591\n000000067F000040020000E0000000938000-000000067F000040020000E000000093C000__0000004C0EBFF260\n000000067F000040020000E0000000938000-000000067F000040020000E000000093C000__0000005D2FFFFB38\n000000067F000040020000E0000000938000-000000067F000040020000E000000093C000__00000073AD3FE6B8\n000000067F000040020000E0000000938000-000000067F000040020000E000000093C000__000000914E3F38F0\n000000067F000040020000E0000000938000-000000067F000040020000E000000093C000__000000931B9A2710\n000000067F000040020000E000000093C000-000000067F000040020000E0000000940000__0000004C0EBFF260\n000000067F000040020000E000000093C000-000000067F000040020000E0000000940000__0000005D2FFFFB38\n000000067F000040020000E000000093C000-000000067F000040020000E0000000940000__00000073AD3FE6B8\n000000067F000040020000E000000093C000-000000067F000040020000E0000000940000__000000914E3F38F0\n000000067F000040020000E000000093C000-000000067F000040020000E0000000940000__000000931B9A2710\n000000067F000040020000E000000093D8E2-000000067F000040020000E00000009462D1__0000004B421BFF39-0000004BE1CBD591\n000000067F000040020000E0000000940000-000000067F000040020000E0000000944000__0000004C0EBFF260\n000000067F000040020000E0000000940000-000000067F000040020000E0000000944000__0000005D2FFFFB38\n000000067F000040020000E0000000940000-000000067F000040020000E0000000944000__00000073AD3FE6B8\n000000067F000040020000E0000000940000-000000067F000040020000E0000000944000__000000914E3F38F0\n000000067F000040020000E0000000940000-000000067F000040020000E0000000944000__000000931B9A2710\n000000067F000040020000E0000000944000-000000067F000040020000E0000000948000__0000004C0EBFF260\n000000067F000040020000E0000000944000-000000067F000040020000E0000000948000__000000574B7FF240\n000000067F000040020000E0000000944000-000000067F000040020000E0000000948000__00000073AD3FE6B8\n000000067F000040020000E0000000944000-000000067F000040020000E0000000948000__000000914E3F38F0\n000000067F000040020000E0000000944000-000000067F000040020000E0000000948000__000000931B9A2710\n000000067F000040020000E00000009462D1-000000067F000040020000E0000100000000__0000004B421BFF39-0000004BE1CBD591\n000000067F000040020000E000000094659B-000000067F000040020000E000000094EF81__0000004BE1CBD591-0000004C9173DB81\n000000067F000040020000E0000000948000-000000067F000040020000E000000094C000__0000004C0EBFF260\n000000067F000040020000E0000000948000-000000067F000040020000E000000094C000__000000574B7FF240\n000000067F000040020000E0000000948000-000000067F000040020000E000000094C000__00000073AD3FE6B8\n000000067F000040020000E0000000948000-000000067F000040020000E000000094C000__000000914E3F38F0\n000000067F000040020000E0000000948000-000000067F000040020000E000000094C000__000000931B9A2710\n000000067F000040020000E000000094C000-000000067F000040020000E0000000950000__0000004C0EBFF260\n000000067F000040020000E000000094C000-000000067F000040020000E0000000950000__000000574B7FF240\n000000067F000040020000E000000094C000-000000067F000040020000E0000000950000__00000073AD3FE6B8\n000000067F000040020000E000000094C000-000000067F000040020000E0000000950000__000000914E3F38F0\n000000067F000040020000E000000094C000-000000067F000040020000E0000000950000__000000931B9A2710\n000000067F000040020000E000000094EF81-000000067F000040020000E000000095795E__0000004BE1CBD591-0000004C9173DB81\n000000067F000040020000E0000000950000-000000067F000040020000E0000000954000__0000004C0EBFF260\n000000067F000040020000E0000000950000-000000067F000040020000E0000000954000__000000574B7FF240\n000000067F000040020000E0000000950000-000000067F000040020000E0000000954000__00000073AD3FE6B8\n000000067F000040020000E0000000950000-000000067F000040020000E0000000954000__000000914E3F38F0\n000000067F000040020000E0000000950000-000000067F000040020000E0000000954000__000000931B9A2710\n000000067F000040020000E0000000954000-000000067F000040020000E0000000958000__0000004C0EBFF260\n000000067F000040020000E0000000954000-000000067F000040020000E0000000958000__000000574B7FF240\n000000067F000040020000E0000000954000-000000067F000040020000E0000000958000__00000073AD3FE6B8\n000000067F000040020000E0000000954000-000000067F000040020000E0000000958000__000000914E3F38F0\n000000067F000040020000E0000000954000-000000067F000040020000E0000000958000__000000931B9A2710\n000000067F000040020000E000000095795E-000000067F000040020000E0000000960335__0000004BE1CBD591-0000004C9173DB81\n000000067F000040020000E0000000958000-000000067F000040020000E000000095C000__0000004C0EBFF260\n000000067F000040020000E0000000958000-000000067F000040020000E000000095C000__000000574B7FF240\n000000067F000040020000E0000000958000-000000067F000040020000E000000095C000__00000073AD3FE6B8\n000000067F000040020000E0000000958000-000000067F000040020000E000000095C000__000000914E3F38F0\n000000067F000040020000E0000000958000-000000067F000040020000E000000095C000__000000931B9A2710\n000000067F000040020000E000000095C000-000000067F000040020000E0000000960000__000000574B7FF240\n000000067F000040020000E000000095C000-000000067F000040020000E0000000960000__00000073AD3FE6B8\n000000067F000040020000E000000095C000-000000067F000040020000E0000000960000__000000914E3F38F0\n000000067F000040020000E000000095C000-000000067F000040020000E0000000960000__000000931B9A2710\n000000067F000040020000E000000095C000-030000000000000000000000000000000002__0000004C0EBFF260\n000000067F000040020000E0000000960000-000000067F000040020000E0000000964000__000000574B7FF240\n000000067F000040020000E0000000960000-000000067F000040020000E0000000964000__00000073AD3FE6B8\n000000067F000040020000E0000000960000-000000067F000040020000E0000000964000__000000914E3F38F0\n000000067F000040020000E0000000960000-000000067F000040020000E0000000964000__000000931B9A2710\n000000067F000040020000E0000000960335-000000067F000040020000E0000000968D05__0000004BE1CBD591-0000004C9173DB81\n000000067F000040020000E0000000964000-000000067F000040020000E0000000968000__000000574B7FF240\n000000067F000040020000E0000000964000-000000067F000040020000E0000000968000__00000073AD3FE6B8\n000000067F000040020000E0000000964000-000000067F000040020000E0000000968000__000000914E3F38F0\n000000067F000040020000E0000000964000-000000067F000040020000E0000000968000__000000931B9A2710\n000000067F000040020000E0000000968000-000000067F000040020000E000000096C000__000000574B7FF240\n000000067F000040020000E0000000968000-000000067F000040020000E000000096C000__00000073AD3FE6B8\n000000067F000040020000E0000000968000-000000067F000040020000E000000096C000__000000914E3F38F0\n000000067F000040020000E0000000968000-000000067F000040020000E000000096C000__000000931B9A2710\n000000067F000040020000E0000000968D05-000000067F000040020000E00000009716D4__0000004BE1CBD591-0000004C9173DB81\n000000067F000040020000E000000096C000-000000067F000040020000E0000000970000__000000574B7FF240\n000000067F000040020000E000000096C000-000000067F000040020000E0000000970000__00000073AD3FE6B8\n000000067F000040020000E000000096C000-000000067F000040020000E0000000970000__000000914E3F38F0\n000000067F000040020000E000000096C000-000000067F000040020000E0000000970000__000000931B9A2710\n000000067F000040020000E0000000970000-000000067F000040020000E0000000974000__000000574B7FF240\n000000067F000040020000E0000000970000-000000067F000040020000E0000000974000__00000073AD3FE6B8\n000000067F000040020000E0000000970000-000000067F000040020000E0000000974000__000000914E3F38F0\n000000067F000040020000E0000000970000-000000067F000040020000E0000000974000__000000931B9A2710\n000000067F000040020000E00000009716D4-000000067F000040020000E000000097A0B2__0000004BE1CBD591-0000004C9173DB81\n000000067F000040020000E0000000974000-000000067F000040020000E0000000978000__000000574B7FF240\n000000067F000040020000E0000000974000-000000067F000040020000E0000000978000__00000073AD3FE6B8\n000000067F000040020000E0000000974000-000000067F000040020000E0000000978000__000000914E3F38F0\n000000067F000040020000E0000000974000-000000067F000040020000E0000000978000__000000931B9A2710\n000000067F000040020000E0000000978000-000000067F000040020000E000000097C000__000000574B7FF240\n000000067F000040020000E0000000978000-000000067F000040020000E000000097C000__00000073AD3FE6B8\n000000067F000040020000E0000000978000-000000067F000040020000E000000097C000__000000914E3F38F0\n000000067F000040020000E0000000978000-000000067F000040020000E000000097C000__000000931B9A2710\n000000067F000040020000E000000097A0B2-000000067F000040020000E0000000982A9D__0000004BE1CBD591-0000004C9173DB81\n000000067F000040020000E000000097C000-000000067F000040020000E0000000980000__000000574B7FF240\n000000067F000040020000E000000097C000-000000067F000040020000E0000000980000__00000073AD3FE6B8\n000000067F000040020000E000000097C000-000000067F000040020000E0000000980000__000000914E3F38F0\n000000067F000040020000E000000097C000-000000067F000040020000E0000000980000__000000931B9A2710\n000000067F000040020000E0000000980000-000000067F000040020000E0000000984000__000000574B7FF240\n000000067F000040020000E0000000980000-000000067F000040020000E0000000984000__00000073AD3FE6B8\n000000067F000040020000E0000000980000-000000067F000040020000E0000000984000__000000914E3F38F0\n000000067F000040020000E0000000980000-000000067F000040020000E0000000984000__000000931B9A2710\n000000067F000040020000E0000000982A9D-000000067F000040020000E000000098B483__0000004BE1CBD591-0000004C9173DB81\n000000067F000040020000E0000000984000-000000067F000040020000E0000000988000__000000574B7FF240\n000000067F000040020000E0000000984000-000000067F000040020000E0000000988000__00000073AD3FE6B8\n000000067F000040020000E0000000984000-000000067F000040020000E0000000988000__000000914E3F38F0\n000000067F000040020000E0000000984000-000000067F000040020000E0000000988000__000000931B9A2710\n000000067F000040020000E0000000988000-000000067F000040020000E000000098C000__000000574B7FF240\n000000067F000040020000E0000000988000-000000067F000040020000E000000098C000__00000073AD3FE6B8\n000000067F000040020000E0000000988000-000000067F000040020000E000000098C000__000000914E3F38F0\n000000067F000040020000E0000000988000-000000067F000040020000E000000098C000__000000931B9A2710\n000000067F000040020000E000000098B483-000000067F000040020000E0000000993E61__0000004BE1CBD591-0000004C9173DB81\n000000067F000040020000E000000098C000-000000067F000040020000E0000000990000__000000574B7FF240\n000000067F000040020000E000000098C000-000000067F000040020000E0000000990000__00000073AD3FE6B8\n000000067F000040020000E000000098C000-000000067F000040020000E0000000990000__000000914E3F38F0\n000000067F000040020000E000000098C000-000000067F000040020000E0000000990000__000000931B9A2710\n000000067F000040020000E0000000990000-000000067F000040020000E0000000994000__000000574B7FF240\n000000067F000040020000E0000000990000-000000067F000040020000E0000000994000__00000073AD3FE6B8\n000000067F000040020000E0000000990000-000000067F000040020000E0000000994000__000000914E3F38F0\n000000067F000040020000E0000000990000-000000067F000040020000E0000000994000__000000931B9A2710\n000000067F000040020000E0000000993E61-000000067F000040020000E000000099C837__0000004BE1CBD591-0000004C9173DB81\n000000067F000040020000E0000000994000-000000067F000040020000E0000000998000__000000574B7FF240\n000000067F000040020000E0000000994000-000000067F000040020000E0000000998000__00000073AD3FE6B8\n000000067F000040020000E0000000994000-000000067F000040020000E0000000998000__000000914E3F38F0\n000000067F000040020000E0000000994000-000000067F000040020000E0000000998000__000000931B9A2710\n000000067F000040020000E0000000998000-000000067F000040020000E000000099C000__000000574B7FF240\n000000067F000040020000E0000000998000-000000067F000040020000E000000099C000__00000073AD3FE6B8\n000000067F000040020000E0000000998000-000000067F000040020000E000000099C000__000000914E3F38F0\n000000067F000040020000E0000000998000-000000067F000040020000E000000099C000__000000931B9A2710\n000000067F000040020000E000000099C000-000000067F000040020000E00000009A0000__000000574B7FF240\n000000067F000040020000E000000099C000-000000067F000040020000E00000009A0000__00000073AD3FE6B8\n000000067F000040020000E000000099C000-000000067F000040020000E00000009A0000__000000914E3F38F0\n000000067F000040020000E000000099C000-000000067F000040020000E00000009A0000__000000931B9A2710\n000000067F000040020000E000000099C837-000000067F000040020000E00000009A5205__0000004BE1CBD591-0000004C9173DB81\n000000067F000040020000E00000009A0000-000000067F000040020000E00000009A4000__000000574B7FF240\n000000067F000040020000E00000009A0000-000000067F000040020000E00000009A4000__00000073AD3FE6B8\n000000067F000040020000E00000009A0000-000000067F000040020000E00000009A4000__000000914E3F38F0\n000000067F000040020000E00000009A0000-000000067F000040020000E00000009A4000__000000931B9A2710\n000000067F000040020000E00000009A4000-000000067F000040020000E00000009A8000__000000572A7A05D8\n000000067F000040020000E00000009A4000-000000067F000040020000E00000009A8000__0000005D2FFFFB38\n000000067F000040020000E00000009A4000-000000067F000040020000E00000009A8000__00000073AD3FE6B8\n000000067F000040020000E00000009A4000-000000067F000040020000E00000009A8000__000000914E3F38F0\n000000067F000040020000E00000009A4000-000000067F000040020000E00000009A8000__000000931B9A2710\n000000067F000040020000E00000009A5205-000000067F000040020000E0000100000000__0000004BE1CBD591-0000004C9173DB81\n000000067F000040020000E00000009A552F-000000067F000040020000E00000009ADEFC__0000004C9173DB81-0000004D3123ED31\n000000067F000040020000E00000009A8000-000000067F000040020000E00000009AC000__000000572A7A05D8\n000000067F000040020000E00000009A8000-000000067F000040020000E00000009AC000__0000005D2FFFFB38\n000000067F000040020000E00000009A8000-000000067F000040020000E00000009AC000__00000073AD3FE6B8\n000000067F000040020000E00000009A8000-000000067F000040020000E00000009AC000__000000914E3F38F0\n000000067F000040020000E00000009A8000-000000067F000040020000E00000009AC000__000000931B9A2710\n000000067F000040020000E00000009AC000-000000067F000040020000E00000009B0000__000000572A7A05D8\n000000067F000040020000E00000009AC000-000000067F000040020000E00000009B0000__0000005D2FFFFB38\n000000067F000040020000E00000009AC000-000000067F000040020000E00000009B0000__00000073AD3FE6B8\n000000067F000040020000E00000009AC000-000000067F000040020000E00000009B0000__000000914E3F38F0\n000000067F000040020000E00000009AC000-000000067F000040020000E00000009B0000__000000931B9A2710\n000000067F000040020000E00000009ADEFC-000000067F000040020000E00000009B68E7__0000004C9173DB81-0000004D3123ED31\n000000067F000040020000E00000009B0000-000000067F000040020000E00000009B4000__000000572A7A05D8\n000000067F000040020000E00000009B0000-000000067F000040020000E00000009B4000__0000005D2FFFFB38\n000000067F000040020000E00000009B0000-000000067F000040020000E00000009B4000__00000073AD3FE6B8\n000000067F000040020000E00000009B0000-000000067F000040020000E00000009B4000__000000914E3F38F0\n000000067F000040020000E00000009B0000-000000067F000040020000E00000009B4000__000000931B9A2710\n000000067F000040020000E00000009B4000-000000067F000040020000E00000009B8000__000000572A7A05D8\n000000067F000040020000E00000009B4000-000000067F000040020000E00000009B8000__0000005D2FFFFB38\n000000067F000040020000E00000009B4000-000000067F000040020000E00000009B8000__00000073AD3FE6B8\n000000067F000040020000E00000009B4000-000000067F000040020000E00000009B8000__000000914E3F38F0\n000000067F000040020000E00000009B4000-000000067F000040020000E00000009B8000__000000931B9A2710\n000000067F000040020000E00000009B68E7-000000067F000040020000E00000009BF2D2__0000004C9173DB81-0000004D3123ED31\n000000067F000040020000E00000009B8000-000000067F000040020000E00000009BC000__000000572A7A05D8\n000000067F000040020000E00000009B8000-000000067F000040020000E00000009BC000__0000005D2FFFFB38\n000000067F000040020000E00000009B8000-000000067F000040020000E00000009BC000__00000073AD3FE6B8\n000000067F000040020000E00000009B8000-000000067F000040020000E00000009BC000__000000914E3F38F0\n000000067F000040020000E00000009B8000-000000067F000040020000E00000009BC000__000000931B9A2710\n000000067F000040020000E00000009BC000-000000067F000040020000E00000009C0000__000000572A7A05D8\n000000067F000040020000E00000009BC000-000000067F000040020000E00000009C0000__0000005D2FFFFB38\n000000067F000040020000E00000009BC000-000000067F000040020000E00000009C0000__00000073AD3FE6B8\n000000067F000040020000E00000009BC000-000000067F000040020000E00000009C0000__000000914E3F38F0\n000000067F000040020000E00000009BC000-000000067F000040020000E00000009C0000__000000931B9A2710\n000000067F000040020000E00000009BF2D2-000000067F000040020000E00000009C7CB7__0000004C9173DB81-0000004D3123ED31\n000000067F000040020000E00000009C0000-000000067F000040020000E00000009C4000__000000572A7A05D8\n000000067F000040020000E00000009C0000-000000067F000040020000E00000009C4000__0000005D2FFFFB38\n000000067F000040020000E00000009C0000-000000067F000040020000E00000009C4000__00000073AD3FE6B8\n000000067F000040020000E00000009C0000-000000067F000040020000E00000009C4000__000000914E3F38F0\n000000067F000040020000E00000009C0000-000000067F000040020000E00000009C4000__000000931B9A2710\n000000067F000040020000E00000009C4000-000000067F000040020000E00000009C8000__000000572A7A05D8\n000000067F000040020000E00000009C4000-000000067F000040020000E00000009C8000__0000005D2FFFFB38\n000000067F000040020000E00000009C4000-000000067F000040020000E00000009C8000__00000073AD3FE6B8\n000000067F000040020000E00000009C4000-000000067F000040020000E00000009C8000__000000914E3F38F0\n000000067F000040020000E00000009C4000-000000067F000040020000E00000009C8000__000000931B9A2710\n000000067F000040020000E00000009C7CB7-000000067F000040020000E00000009D0695__0000004C9173DB81-0000004D3123ED31\n000000067F000040020000E00000009C8000-000000067F000040020000E00000009CC000__000000572A7A05D8\n000000067F000040020000E00000009C8000-000000067F000040020000E00000009CC000__0000005D2FFFFB38\n000000067F000040020000E00000009C8000-000000067F000040020000E00000009CC000__00000073AD3FE6B8\n000000067F000040020000E00000009C8000-000000067F000040020000E00000009CC000__000000914E3F38F0\n000000067F000040020000E00000009C8000-000000067F000040020000E00000009CC000__000000931B9A2710\n000000067F000040020000E00000009CC000-000000067F000040020000E00000009D0000__000000572A7A05D8\n000000067F000040020000E00000009CC000-000000067F000040020000E00000009D0000__0000005D2FFFFB38\n000000067F000040020000E00000009CC000-000000067F000040020000E00000009D0000__00000073AD3FE6B8\n000000067F000040020000E00000009CC000-000000067F000040020000E00000009D0000__000000914E3F38F0\n000000067F000040020000E00000009CC000-000000067F000040020000E00000009D0000__000000931B9A2710\n000000067F000040020000E00000009D0000-000000067F000040020000E00000009D4000__000000572A7A05D8\n000000067F000040020000E00000009D0000-000000067F000040020000E00000009D4000__0000005D2FFFFB38\n000000067F000040020000E00000009D0000-000000067F000040020000E00000009D4000__00000073AD3FE6B8\n000000067F000040020000E00000009D0000-000000067F000040020000E00000009D4000__000000914E3F38F0\n000000067F000040020000E00000009D0000-000000067F000040020000E00000009D4000__000000931B9A2710\n000000067F000040020000E00000009D0695-000000067F000040020000E00000009D9071__0000004C9173DB81-0000004D3123ED31\n000000067F000040020000E00000009D4000-000000067F000040020000E00000009D8000__000000572A7A05D8\n000000067F000040020000E00000009D4000-000000067F000040020000E00000009D8000__0000005D2FFFFB38\n000000067F000040020000E00000009D4000-000000067F000040020000E00000009D8000__00000073AD3FE6B8\n000000067F000040020000E00000009D4000-000000067F000040020000E00000009D8000__000000914E3F38F0\n000000067F000040020000E00000009D4000-000000067F000040020000E00000009D8000__000000931B9A2710\n000000067F000040020000E00000009D8000-000000067F000040020000E00000009DC000__000000572A7A05D8\n000000067F000040020000E00000009D8000-000000067F000040020000E00000009DC000__0000005D2FFFFB38\n000000067F000040020000E00000009D8000-000000067F000040020000E00000009DC000__00000073AD3FE6B8\n000000067F000040020000E00000009D8000-000000067F000040020000E00000009DC000__000000914E3F38F0\n000000067F000040020000E00000009D8000-000000067F000040020000E00000009DC000__000000931B9A2710\n000000067F000040020000E00000009D9071-000000067F000040020000E00000009E1A46__0000004C9173DB81-0000004D3123ED31\n000000067F000040020000E00000009DC000-000000067F000040020000E00000009E0000__000000572A7A05D8\n000000067F000040020000E00000009DC000-000000067F000040020000E00000009E0000__0000005D2FFFFB38\n000000067F000040020000E00000009DC000-000000067F000040020000E00000009E0000__00000073AD3FE6B8\n000000067F000040020000E00000009DC000-000000067F000040020000E00000009E0000__000000914E3F38F0\n000000067F000040020000E00000009DC000-000000067F000040020000E00000009E0000__000000931B9A2710\n000000067F000040020000E00000009E0000-000000067F000040020000E00000009E4000__000000572A7A05D8\n000000067F000040020000E00000009E0000-000000067F000040020000E00000009E4000__0000005D2FFFFB38\n000000067F000040020000E00000009E0000-000000067F000040020000E00000009E4000__00000073AD3FE6B8\n000000067F000040020000E00000009E0000-000000067F000040020000E00000009E4000__000000914E3F38F0\n000000067F000040020000E00000009E0000-000000067F000040020000E00000009E4000__000000931B9A2710\n000000067F000040020000E00000009E1A46-000000067F000040020000E00000009EA421__0000004C9173DB81-0000004D3123ED31\n000000067F000040020000E00000009E4000-000000067F000040020000E00000009E8000__000000572A7A05D8\n000000067F000040020000E00000009E4000-000000067F000040020000E00000009E8000__0000005D2FFFFB38\n000000067F000040020000E00000009E4000-000000067F000040020000E00000009E8000__00000073AD3FE6B8\n000000067F000040020000E00000009E4000-000000067F000040020000E00000009E8000__000000914E3F38F0\n000000067F000040020000E00000009E4000-000000067F000040020000E00000009E8000__000000931B9A2710\n000000067F000040020000E00000009E8000-000000067F000040020000E00000009EC000__000000572A7A05D8\n000000067F000040020000E00000009E8000-000000067F000040020000E00000009EC000__0000005D2FFFFB38\n000000067F000040020000E00000009E8000-000000067F000040020000E00000009EC000__00000073AD3FE6B8\n000000067F000040020000E00000009E8000-000000067F000040020000E00000009EC000__000000914E3F38F0\n000000067F000040020000E00000009E8000-000000067F000040020000E00000009EC000__000000931B9A2710\n000000067F000040020000E00000009EA421-000000067F000040020000E00000009F2DFA__0000004C9173DB81-0000004D3123ED31\n000000067F000040020000E00000009EC000-000000067F000040020000E00000009F0000__000000572A7A05D8\n000000067F000040020000E00000009EC000-000000067F000040020000E00000009F0000__0000005D2FFFFB38\n000000067F000040020000E00000009EC000-000000067F000040020000E00000009F0000__00000073AD3FE6B8\n000000067F000040020000E00000009EC000-000000067F000040020000E00000009F0000__000000914E3F38F0\n000000067F000040020000E00000009EC000-000000067F000040020000E00000009F0000__000000931B9A2710\n000000067F000040020000E00000009F0000-000000067F000040020000E00000009F4000__000000572A7A05D8\n000000067F000040020000E00000009F0000-000000067F000040020000E00000009F4000__0000005D2FFFFB38\n000000067F000040020000E00000009F0000-000000067F000040020000E00000009F4000__00000073AD3FE6B8\n000000067F000040020000E00000009F0000-000000067F000040020000E00000009F4000__000000914E3F38F0\n000000067F000040020000E00000009F0000-000000067F000040020000E00000009F4000__000000931B9A2710\n000000067F000040020000E00000009F2DFA-000000067F000040020000E00000009FB7E4__0000004C9173DB81-0000004D3123ED31\n000000067F000040020000E00000009F4000-000000067F000040020000E00000009F8000__000000572A7A05D8\n000000067F000040020000E00000009F4000-000000067F000040020000E00000009F8000__0000005D2FFFFB38\n000000067F000040020000E00000009F4000-000000067F000040020000E00000009F8000__00000073AD3FE6B8\n000000067F000040020000E00000009F4000-000000067F000040020000E00000009F8000__000000914E3F38F0\n000000067F000040020000E00000009F4000-000000067F000040020000E00000009F8000__000000931B9A2710\n000000067F000040020000E00000009F8000-000000067F000040020000E00000009FC000__0000004E11956660\n000000067F000040020000E00000009F8000-000000067F000040020000E00000009FC000__0000005D2FFFFB38\n000000067F000040020000E00000009F8000-000000067F000040020000E00000009FC000__00000073AD3FE6B8\n000000067F000040020000E00000009F8000-000000067F000040020000E00000009FC000__000000914E3F38F0\n000000067F000040020000E00000009F8000-000000067F000040020000E00000009FC000__000000931B9A2710\n000000067F000040020000E00000009FB7E4-000000067F000040020000E0000100000000__0000004C9173DB81-0000004D3123ED31\n000000067F000040020000E00000009FBAAD-000000067F000040020000E0000000A0449F__0000004D3123ED31-0000004DE0CBDCD1\n000000067F000040020000E00000009FC000-000000067F000040020000E0000000A00000__0000004E11956660\n000000067F000040020000E00000009FC000-000000067F000040020000E0000000A00000__0000005D2FFFFB38\n000000067F000040020000E00000009FC000-000000067F000040020000E0000000A00000__00000073AD3FE6B8\n000000067F000040020000E00000009FC000-000000067F000040020000E0000000A00000__000000914E3F38F0\n000000067F000040020000E00000009FC000-000000067F000040020000E0000000A00000__000000931B9A2710\n000000067F000040020000E0000000A00000-000000067F000040020000E0000000A04000__0000004E11956660\n000000067F000040020000E0000000A00000-000000067F000040020000E0000000A04000__0000005D2FFFFB38\n000000067F000040020000E0000000A00000-000000067F000040020000E0000000A04000__00000073AD3FE6B8\n000000067F000040020000E0000000A00000-000000067F000040020000E0000000A04000__000000914E3F38F0\n000000067F000040020000E0000000A00000-000000067F000040020000E0000000A04000__000000931B9A2710\n000000067F000040020000E0000000A04000-000000067F000040020000E0000000A08000__0000004E11956660\n000000067F000040020000E0000000A04000-000000067F000040020000E0000000A08000__0000005D2FFFFB38\n000000067F000040020000E0000000A04000-000000067F000040020000E0000000A08000__00000073AD3FE6B8\n000000067F000040020000E0000000A04000-000000067F000040020000E0000000A08000__000000914E3F38F0\n000000067F000040020000E0000000A04000-000000067F000040020000E0000000A08000__000000931B9A2710\n000000067F000040020000E0000000A0449F-000000067F000040020000E0000000A0CE79__0000004D3123ED31-0000004DE0CBDCD1\n000000067F000040020000E0000000A08000-000000067F000040020000E0000000A0C000__0000004E11956660\n000000067F000040020000E0000000A08000-000000067F000040020000E0000000A0C000__0000005D2FFFFB38\n000000067F000040020000E0000000A08000-000000067F000040020000E0000000A0C000__00000073AD3FE6B8\n000000067F000040020000E0000000A08000-000000067F000040020000E0000000A0C000__000000914E3F38F0\n000000067F000040020000E0000000A08000-000000067F000040020000E0000000A0C000__000000931B9A2710\n000000067F000040020000E0000000A0C000-000000067F000040020000E0000000A10000__0000004E11956660\n000000067F000040020000E0000000A0C000-000000067F000040020000E0000000A10000__0000005D2FFFFB38\n000000067F000040020000E0000000A0C000-000000067F000040020000E0000000A10000__00000073AD3FE6B8\n000000067F000040020000E0000000A0C000-000000067F000040020000E0000000A10000__000000914E3F38F0\n000000067F000040020000E0000000A0C000-000000067F000040020000E0000000A10000__000000931B9A2710\n000000067F000040020000E0000000A0CE79-000000067F000040020000E0000000A15852__0000004D3123ED31-0000004DE0CBDCD1\n000000067F000040020000E0000000A10000-000000067F000040020000E0000000A14000__0000004E11956660\n000000067F000040020000E0000000A10000-000000067F000040020000E0000000A14000__0000005D2FFFFB38\n000000067F000040020000E0000000A10000-000000067F000040020000E0000000A14000__00000073AD3FE6B8\n000000067F000040020000E0000000A10000-000000067F000040020000E0000000A14000__000000914E3F38F0\n000000067F000040020000E0000000A10000-000000067F000040020000E0000000A14000__000000931B9A2710\n000000067F000040020000E0000000A14000-000000067F000040020000E0000000A18000__0000004E11956660\n000000067F000040020000E0000000A14000-000000067F000040020000E0000000A18000__0000005D2FFFFB38\n000000067F000040020000E0000000A14000-000000067F000040020000E0000000A18000__00000073AD3FE6B8\n000000067F000040020000E0000000A14000-000000067F000040020000E0000000A18000__000000914E3F38F0\n000000067F000040020000E0000000A14000-000000067F000040020000E0000000A18000__000000931B9A2710\n000000067F000040020000E0000000A15852-000000067F000040020000E0000000A1E225__0000004D3123ED31-0000004DE0CBDCD1\n000000067F000040020000E0000000A18000-000000067F000040020000E0000000A1C000__0000004E11956660\n000000067F000040020000E0000000A18000-000000067F000040020000E0000000A1C000__0000005D2FFFFB38\n000000067F000040020000E0000000A18000-000000067F000040020000E0000000A1C000__00000073AD3FE6B8\n000000067F000040020000E0000000A18000-000000067F000040020000E0000000A1C000__000000914E3F38F0\n000000067F000040020000E0000000A18000-000000067F000040020000E0000000A1C000__000000931B9A2710\n000000067F000040020000E0000000A1C000-000000067F000040020000E0000000A20000__0000004E11956660\n000000067F000040020000E0000000A1C000-000000067F000040020000E0000000A20000__0000005D2FFFFB38\n000000067F000040020000E0000000A1C000-000000067F000040020000E0000000A20000__00000073AD3FE6B8\n000000067F000040020000E0000000A1C000-000000067F000040020000E0000000A20000__000000914E3F38F0\n000000067F000040020000E0000000A1C000-000000067F000040020000E0000000A20000__000000931B9A2710\n000000067F000040020000E0000000A1E225-000000067F000040020000E0000000A26BF5__0000004D3123ED31-0000004DE0CBDCD1\n000000067F000040020000E0000000A20000-000000067F000040020000E0000000A24000__0000004E11956660\n000000067F000040020000E0000000A20000-000000067F000040020000E0000000A24000__0000005D2FFFFB38\n000000067F000040020000E0000000A20000-000000067F000040020000E0000000A24000__00000073AD3FE6B8\n000000067F000040020000E0000000A20000-000000067F000040020000E0000000A24000__000000914E3F38F0\n000000067F000040020000E0000000A20000-000000067F000040020000E0000000A24000__000000931B9A2710\n000000067F000040020000E0000000A24000-000000067F000040020000E0000000A28000__0000004E11956660\n000000067F000040020000E0000000A24000-000000067F000040020000E0000000A28000__0000005D2FFFFB38\n000000067F000040020000E0000000A24000-000000067F000040020000E0000000A28000__00000073AD3FE6B8\n000000067F000040020000E0000000A24000-000000067F000040020000E0000000A28000__000000914E3F38F0\n000000067F000040020000E0000000A24000-000000067F000040020000E0000000A28000__000000931B9A2710\n000000067F000040020000E0000000A26BF5-000000067F000040020000E0000000A2F5E1__0000004D3123ED31-0000004DE0CBDCD1\n000000067F000040020000E0000000A28000-000000067F000040020000E0000000A2C000__0000004E11956660\n000000067F000040020000E0000000A28000-000000067F000040020000E0000000A2C000__0000005D2FFFFB38\n000000067F000040020000E0000000A28000-000000067F000040020000E0000000A2C000__00000073AD3FE6B8\n000000067F000040020000E0000000A28000-000000067F000040020000E0000000A2C000__000000914E3F38F0\n000000067F000040020000E0000000A28000-000000067F000040020000E0000000A2C000__000000931B9A2710\n000000067F000040020000E0000000A2C000-000000067F000040020000E0000000A30000__0000004E11956660\n000000067F000040020000E0000000A2C000-000000067F000040020000E0000000A30000__0000005D2FFFFB38\n000000067F000040020000E0000000A2C000-000000067F000040020000E0000000A30000__00000073AD3FE6B8\n000000067F000040020000E0000000A2C000-000000067F000040020000E0000000A30000__000000914E3F38F0\n000000067F000040020000E0000000A2C000-000000067F000040020000E0000000A30000__000000931B9A2710\n000000067F000040020000E0000000A2F5E1-000000067F000040020000E0000000A37FC8__0000004D3123ED31-0000004DE0CBDCD1\n000000067F000040020000E0000000A30000-000000067F000040020000E0000000A34000__0000004E11956660\n000000067F000040020000E0000000A30000-000000067F000040020000E0000000A34000__0000005D2FFFFB38\n000000067F000040020000E0000000A30000-000000067F000040020000E0000000A34000__00000073AD3FE6B8\n000000067F000040020000E0000000A30000-000000067F000040020000E0000000A34000__000000914E3F38F0\n000000067F000040020000E0000000A30000-000000067F000040020000E0000000A34000__000000931B9A2710\n000000067F000040020000E0000000A34000-000000067F000040020000E0000000A38000__0000004E11956660\n000000067F000040020000E0000000A34000-000000067F000040020000E0000000A38000__0000005D2FFFFB38\n000000067F000040020000E0000000A34000-000000067F000040020000E0000000A38000__00000073AD3FE6B8\n000000067F000040020000E0000000A34000-000000067F000040020000E0000000A38000__000000914E3F38F0\n000000067F000040020000E0000000A34000-000000067F000040020000E0000000A38000__000000931B9A2710\n000000067F000040020000E0000000A37FC8-000000067F000040020000E0000000A409C6__0000004D3123ED31-0000004DE0CBDCD1\n000000067F000040020000E0000000A38000-000000067F000040020000E0000000A3C000__0000004E11956660\n000000067F000040020000E0000000A38000-000000067F000040020000E0000000A3C000__0000005D2FFFFB38\n000000067F000040020000E0000000A38000-000000067F000040020000E0000000A3C000__00000073AD3FE6B8\n000000067F000040020000E0000000A38000-000000067F000040020000E0000000A3C000__000000914E3F38F0\n000000067F000040020000E0000000A38000-000000067F000040020000E0000000A3C000__000000931B9A2710\n000000067F000040020000E0000000A3C000-000000067F000040020000E0000000A40000__0000004E11956660\n000000067F000040020000E0000000A3C000-000000067F000040020000E0000000A40000__0000005D2FFFFB38\n000000067F000040020000E0000000A3C000-000000067F000040020000E0000000A40000__00000073AD3FE6B8\n000000067F000040020000E0000000A3C000-000000067F000040020000E0000000A40000__000000914E3F38F0\n000000067F000040020000E0000000A3C000-000000067F000040020000E0000000A40000__000000931B9A2710\n000000067F000040020000E0000000A40000-000000067F000040020000E0000000A44000__0000004E11956660\n000000067F000040020000E0000000A40000-000000067F000040020000E0000000A44000__0000005D2FFFFB38\n000000067F000040020000E0000000A40000-000000067F000040020000E0000000A44000__00000073AD3FE6B8\n000000067F000040020000E0000000A40000-000000067F000040020000E0000000A44000__000000914E3F38F0\n000000067F000040020000E0000000A40000-000000067F000040020000E0000000A44000__000000931B9A2710\n000000067F000040020000E0000000A409C6-000000067F000040020000E0000000A4939A__0000004D3123ED31-0000004DE0CBDCD1\n000000067F000040020000E0000000A44000-000000067F000040020000E0000000A48000__0000004E11956660\n000000067F000040020000E0000000A44000-000000067F000040020000E0000000A48000__0000005D2FFFFB38\n000000067F000040020000E0000000A44000-000000067F000040020000E0000000A48000__00000073AD3FE6B8\n000000067F000040020000E0000000A44000-000000067F000040020000E0000000A48000__000000914E3F38F0\n000000067F000040020000E0000000A44000-000000067F000040020000E0000000A48000__000000931B9A2710\n000000067F000040020000E0000000A48000-000000067F000040020000E0000000A4C000__0000004E11956660\n000000067F000040020000E0000000A48000-000000067F000040020000E0000000A4C000__0000005D2FFFFB38\n000000067F000040020000E0000000A48000-000000067F000040020000E0000000A4C000__00000073AD3FE6B8\n000000067F000040020000E0000000A48000-000000067F000040020000E0000000A4C000__000000914E3F38F0\n000000067F000040020000E0000000A48000-000000067F000040020000E0000000A4C000__000000931B9A2710\n000000067F000040020000E0000000A4939A-000000067F000040020000E0000000A51D74__0000004D3123ED31-0000004DE0CBDCD1\n000000067F000040020000E0000000A4C000-000000067F000040020000E0000000A50000__0000004E11956660\n000000067F000040020000E0000000A4C000-000000067F000040020000E0000000A50000__0000005D2FFFFB38\n000000067F000040020000E0000000A4C000-000000067F000040020000E0000000A50000__00000073AD3FE6B8\n000000067F000040020000E0000000A4C000-000000067F000040020000E0000000A50000__000000914E3F38F0\n000000067F000040020000E0000000A4C000-000000067F000040020000E0000000A50000__000000931B9A2710\n000000067F000040020000E0000000A50000-000000067F000040020000E0000000A54000__0000004E11956660\n000000067F000040020000E0000000A50000-000000067F000040020000E0000000A54000__0000005D2FFFFB38\n000000067F000040020000E0000000A50000-000000067F000040020000E0000000A54000__00000073AD3FE6B8\n000000067F000040020000E0000000A50000-000000067F000040020000E0000000A54000__000000914E3F38F0\n000000067F000040020000E0000000A50000-000000067F000040020000E0000000A54000__000000931B9A2710\n000000067F000040020000E0000000A51D74-000000067F000040020000E0000000A5A745__0000004D3123ED31-0000004DE0CBDCD1\n000000067F000040020000E0000000A54000-000000067F000040020000E0000000A58000__0000004E11956660\n000000067F000040020000E0000000A54000-000000067F000040020000E0000000A58000__0000005D2FFFFB38\n000000067F000040020000E0000000A54000-000000067F000040020000E0000000A58000__00000073AD3FE6B8\n000000067F000040020000E0000000A54000-000000067F000040020000E0000000A58000__000000914E3F38F0\n000000067F000040020000E0000000A54000-000000067F000040020000E0000000A58000__000000931B9A2710\n000000067F000040020000E0000000A58000-000000067F000040020000E0000000A5C000__0000004E11956660\n000000067F000040020000E0000000A58000-000000067F000040020000E0000000A5C000__000000574B7FF240\n000000067F000040020000E0000000A58000-000000067F000040020000E0000000A5C000__00000073AD3FE6B8\n000000067F000040020000E0000000A58000-000000067F000040020000E0000000A5C000__000000914E3F38F0\n000000067F000040020000E0000000A58000-000000067F000040020000E0000000A5C000__000000931B9A2710\n000000067F000040020000E0000000A5A745-000000067F000040020000E0000100000000__0000004D3123ED31-0000004DE0CBDCD1\n000000067F000040020000E0000000A5AA3B-000000067F000040020000E0000000A6340F__0000004DE0CBDCD1-0000004E807BE039\n000000067F000040020000E0000000A5C000-000000067F000040020000E0000000A60000__0000004E11956660\n000000067F000040020000E0000000A5C000-000000067F000040020000E0000000A60000__000000574B7FF240\n000000067F000040020000E0000000A5C000-000000067F000040020000E0000000A60000__00000073AD3FE6B8\n000000067F000040020000E0000000A5C000-000000067F000040020000E0000000A60000__000000914E3F38F0\n000000067F000040020000E0000000A5C000-000000067F000040020000E0000000A60000__000000931B9A2710\n000000067F000040020000E0000000A60000-000000067F000040020000E0000000A64000__0000004E11956660\n000000067F000040020000E0000000A60000-000000067F000040020000E0000000A64000__000000574B7FF240\n000000067F000040020000E0000000A60000-000000067F000040020000E0000000A64000__00000073AD3FE6B8\n000000067F000040020000E0000000A60000-000000067F000040020000E0000000A64000__000000914E3F38F0\n000000067F000040020000E0000000A60000-000000067F000040020000E0000000A64000__000000931B9A2710\n000000067F000040020000E0000000A6340F-000000067F000040020000E0000000A6BDF1__0000004DE0CBDCD1-0000004E807BE039\n000000067F000040020000E0000000A64000-000000067F000040020000E0000000A68000__0000004E11956660\n000000067F000040020000E0000000A64000-000000067F000040020000E0000000A68000__000000574B7FF240\n000000067F000040020000E0000000A64000-000000067F000040020000E0000000A68000__00000073AD3FE6B8\n000000067F000040020000E0000000A64000-000000067F000040020000E0000000A68000__000000914E3F38F0\n000000067F000040020000E0000000A64000-000000067F000040020000E0000000A68000__000000931B9A2710\n000000067F000040020000E0000000A68000-000000067F000040020000E0000000A6C000__0000004E11956660\n000000067F000040020000E0000000A68000-000000067F000040020000E0000000A6C000__000000574B7FF240\n000000067F000040020000E0000000A68000-000000067F000040020000E0000000A6C000__00000073AD3FE6B8\n000000067F000040020000E0000000A68000-000000067F000040020000E0000000A6C000__000000914E3F38F0\n000000067F000040020000E0000000A68000-000000067F000040020000E0000000A6C000__000000931B9A2710\n000000067F000040020000E0000000A6BDF1-000000067F000040020000E0000000A747DF__0000004DE0CBDCD1-0000004E807BE039\n000000067F000040020000E0000000A6C000-000000067F000040020000E0000000A70000__0000004E11956660\n000000067F000040020000E0000000A6C000-000000067F000040020000E0000000A70000__000000574B7FF240\n000000067F000040020000E0000000A6C000-000000067F000040020000E0000000A70000__00000073AD3FE6B8\n000000067F000040020000E0000000A6C000-000000067F000040020000E0000000A70000__000000914E3F38F0\n000000067F000040020000E0000000A6C000-000000067F000040020000E0000000A70000__000000931B9A2710\n000000067F000040020000E0000000A70000-000000067F000040020000E0000000A74000__0000004E11956660\n000000067F000040020000E0000000A70000-000000067F000040020000E0000000A74000__000000574B7FF240\n000000067F000040020000E0000000A70000-000000067F000040020000E0000000A74000__00000073AD3FE6B8\n000000067F000040020000E0000000A70000-000000067F000040020000E0000000A74000__000000914E3F38F0\n000000067F000040020000E0000000A70000-000000067F000040020000E0000000A74000__000000931B9A2710\n000000067F000040020000E0000000A74000-000000067F000040020000E0000000A78000__000000574B7FF240\n000000067F000040020000E0000000A74000-000000067F000040020000E0000000A78000__00000073AD3FE6B8\n000000067F000040020000E0000000A74000-000000067F000040020000E0000000A78000__000000914E3F38F0\n000000067F000040020000E0000000A74000-000000067F000040020000E0000000A78000__000000931B9A2710\n000000067F000040020000E0000000A74000-030000000000000000000000000000000002__0000004E11956660\n000000067F000040020000E0000000A747DF-000000067F000040020000E0000000A7D1C5__0000004DE0CBDCD1-0000004E807BE039\n000000067F000040020000E0000000A78000-000000067F000040020000E0000000A7C000__000000574B7FF240\n000000067F000040020000E0000000A78000-000000067F000040020000E0000000A7C000__00000073AD3FE6B8\n000000067F000040020000E0000000A78000-000000067F000040020000E0000000A7C000__000000914E3F38F0\n000000067F000040020000E0000000A78000-000000067F000040020000E0000000A7C000__000000931B9A2710\n000000067F000040020000E0000000A7C000-000000067F000040020000E0000000A80000__000000574B7FF240\n000000067F000040020000E0000000A7C000-000000067F000040020000E0000000A80000__00000073AD3FE6B8\n000000067F000040020000E0000000A7C000-000000067F000040020000E0000000A80000__000000914E3F38F0\n000000067F000040020000E0000000A7C000-000000067F000040020000E0000000A80000__000000931B9A2710\n000000067F000040020000E0000000A7D1C5-000000067F000040020000E0000000A85B9E__0000004DE0CBDCD1-0000004E807BE039\n000000067F000040020000E0000000A80000-000000067F000040020000E0000000A84000__000000574B7FF240\n000000067F000040020000E0000000A80000-000000067F000040020000E0000000A84000__00000073AD3FE6B8\n000000067F000040020000E0000000A80000-000000067F000040020000E0000000A84000__000000914E3F38F0\n000000067F000040020000E0000000A80000-000000067F000040020000E0000000A84000__000000931B9A2710\n000000067F000040020000E0000000A84000-000000067F000040020000E0000000A88000__000000574B7FF240\n000000067F000040020000E0000000A84000-000000067F000040020000E0000000A88000__00000073AD3FE6B8\n000000067F000040020000E0000000A84000-000000067F000040020000E0000000A88000__000000914E3F38F0\n000000067F000040020000E0000000A84000-000000067F000040020000E0000000A88000__000000931B9A2710\n000000067F000040020000E0000000A85B9E-000000067F000040020000E0000000A8E573__0000004DE0CBDCD1-0000004E807BE039\n000000067F000040020000E0000000A88000-000000067F000040020000E0000000A8C000__000000574B7FF240\n000000067F000040020000E0000000A88000-000000067F000040020000E0000000A8C000__00000073AD3FE6B8\n000000067F000040020000E0000000A88000-000000067F000040020000E0000000A8C000__000000914E3F38F0\n000000067F000040020000E0000000A88000-000000067F000040020000E0000000A8C000__000000931B9A2710\n000000067F000040020000E0000000A8C000-000000067F000040020000E0000000A90000__000000574B7FF240\n000000067F000040020000E0000000A8C000-000000067F000040020000E0000000A90000__00000073AD3FE6B8\n000000067F000040020000E0000000A8C000-000000067F000040020000E0000000A90000__000000914E3F38F0\n000000067F000040020000E0000000A8C000-000000067F000040020000E0000000A90000__000000931B9A2710\n000000067F000040020000E0000000A8E573-000000067F000040020000E0000000A96F4D__0000004DE0CBDCD1-0000004E807BE039\n000000067F000040020000E0000000A90000-000000067F000040020000E0000000A94000__000000574B7FF240\n000000067F000040020000E0000000A90000-000000067F000040020000E0000000A94000__00000073AD3FE6B8\n000000067F000040020000E0000000A90000-000000067F000040020000E0000000A94000__000000914E3F38F0\n000000067F000040020000E0000000A90000-000000067F000040020000E0000000A94000__000000931B9A2710\n000000067F000040020000E0000000A94000-000000067F000040020000E0000000A98000__000000574B7FF240\n000000067F000040020000E0000000A94000-000000067F000040020000E0000000A98000__00000073AD3FE6B8\n000000067F000040020000E0000000A94000-000000067F000040020000E0000000A98000__000000914E3F38F0\n000000067F000040020000E0000000A94000-000000067F000040020000E0000000A98000__000000931B9A2710\n000000067F000040020000E0000000A96F4D-000000067F000040020000E0000000A9F922__0000004DE0CBDCD1-0000004E807BE039\n000000067F000040020000E0000000A98000-000000067F000040020000E0000000A9C000__000000574B7FF240\n000000067F000040020000E0000000A98000-000000067F000040020000E0000000A9C000__00000073AD3FE6B8\n000000067F000040020000E0000000A98000-000000067F000040020000E0000000A9C000__000000914E3F38F0\n000000067F000040020000E0000000A98000-000000067F000040020000E0000000A9C000__000000931B9A2710\n000000067F000040020000E0000000A9C000-000000067F000040020000E0000000AA0000__000000574B7FF240\n000000067F000040020000E0000000A9C000-000000067F000040020000E0000000AA0000__00000073AD3FE6B8\n000000067F000040020000E0000000A9C000-000000067F000040020000E0000000AA0000__000000914E3F38F0\n000000067F000040020000E0000000A9C000-000000067F000040020000E0000000AA0000__000000931B9A2710\n000000067F000040020000E0000000A9F922-000000067F000040020000E0000000AA8300__0000004DE0CBDCD1-0000004E807BE039\n000000067F000040020000E0000000AA0000-000000067F000040020000E0000000AA4000__000000574B7FF240\n000000067F000040020000E0000000AA0000-000000067F000040020000E0000000AA4000__00000073AD3FE6B8\n000000067F000040020000E0000000AA0000-000000067F000040020000E0000000AA4000__000000914E3F38F0\n000000067F000040020000E0000000AA0000-000000067F000040020000E0000000AA4000__000000931B9A2710\n000000067F000040020000E0000000AA4000-000000067F000040020000E0000000AA8000__000000574B7FF240\n000000067F000040020000E0000000AA4000-000000067F000040020000E0000000AA8000__00000073AD3FE6B8\n000000067F000040020000E0000000AA4000-000000067F000040020000E0000000AA8000__000000914E3F38F0\n000000067F000040020000E0000000AA4000-000000067F000040020000E0000000AA8000__000000931B9A2710\n000000067F000040020000E0000000AA8000-000000067F000040020000E0000000AAC000__000000574B7FF240\n000000067F000040020000E0000000AA8000-000000067F000040020000E0000000AAC000__00000073AD3FE6B8\n000000067F000040020000E0000000AA8000-000000067F000040020000E0000000AAC000__000000914E3F38F0\n000000067F000040020000E0000000AA8000-000000067F000040020000E0000000AAC000__000000931B9A2710\n000000067F000040020000E0000000AA8300-000000067F000040020000E0000000AB0CDB__0000004DE0CBDCD1-0000004E807BE039\n000000067F000040020000E0000000AAC000-000000067F000040020000E0000000AB0000__000000574B7FF240\n000000067F000040020000E0000000AAC000-000000067F000040020000E0000000AB0000__00000073AD3FE6B8\n000000067F000040020000E0000000AAC000-000000067F000040020000E0000000AB0000__000000914E3F38F0\n000000067F000040020000E0000000AAC000-000000067F000040020000E0000000AB0000__000000931B9A2710\n000000067F000040020000E0000000AB0000-000000067F000040020000E0000000AB4000__000000572A7A05D8\n000000067F000040020000E0000000AB0000-000000067F000040020000E0000000AB4000__0000005D2FFFFB38\n000000067F000040020000E0000000AB0000-000000067F000040020000E0000000AB4000__00000073AD3FE6B8\n000000067F000040020000E0000000AB0000-000000067F000040020000E0000000AB4000__000000914E3F38F0\n000000067F000040020000E0000000AB0000-000000067F000040020000E0000000AB4000__000000931B9A2710\n000000067F000040020000E0000000AB0CDB-000000067F000040020000E0000100000000__0000004DE0CBDCD1-0000004E807BE039\n000000067F000040020000E0000000AB0FBD-000000067F000040020000E0000000AB99A0__0000004E807BE039-0000004F2029EFA9\n000000067F000040020000E0000000AB4000-000000067F000040020000E0000000AB8000__000000572A7A05D8\n000000067F000040020000E0000000AB4000-000000067F000040020000E0000000AB8000__0000005D2FFFFB38\n000000067F000040020000E0000000AB4000-000000067F000040020000E0000000AB8000__00000073AD3FE6B8\n000000067F000040020000E0000000AB4000-000000067F000040020000E0000000AB8000__000000914E3F38F0\n000000067F000040020000E0000000AB4000-000000067F000040020000E0000000AB8000__000000931B9A2710\n000000067F000040020000E0000000AB8000-000000067F000040020000E0000000ABC000__000000572A7A05D8\n000000067F000040020000E0000000AB8000-000000067F000040020000E0000000ABC000__0000005D2FFFFB38\n000000067F000040020000E0000000AB8000-000000067F000040020000E0000000ABC000__00000073AD3FE6B8\n000000067F000040020000E0000000AB8000-000000067F000040020000E0000000ABC000__000000914E3F38F0\n000000067F000040020000E0000000AB8000-000000067F000040020000E0000000ABC000__000000931B9A2710\n000000067F000040020000E0000000AB99A0-000000067F000040020000E0000000AC237B__0000004E807BE039-0000004F2029EFA9\n000000067F000040020000E0000000ABC000-000000067F000040020000E0000000AC0000__000000572A7A05D8\n000000067F000040020000E0000000ABC000-000000067F000040020000E0000000AC0000__0000005D2FFFFB38\n000000067F000040020000E0000000ABC000-000000067F000040020000E0000000AC0000__00000073AD3FE6B8\n000000067F000040020000E0000000ABC000-000000067F000040020000E0000000AC0000__000000914E3F38F0\n000000067F000040020000E0000000ABC000-000000067F000040020000E0000000AC0000__000000931B9A2710\n000000067F000040020000E0000000AC0000-000000067F000040020000E0000000AC4000__000000572A7A05D8\n000000067F000040020000E0000000AC0000-000000067F000040020000E0000000AC4000__0000005D2FFFFB38\n000000067F000040020000E0000000AC0000-000000067F000040020000E0000000AC4000__00000073AD3FE6B8\n000000067F000040020000E0000000AC0000-000000067F000040020000E0000000AC4000__000000914E3F38F0\n000000067F000040020000E0000000AC0000-000000067F000040020000E0000000AC4000__000000931B9A2710\n000000067F000040020000E0000000AC237B-000000067F000040020000E0000000ACAD51__0000004E807BE039-0000004F2029EFA9\n000000067F000040020000E0000000AC4000-000000067F000040020000E0000000AC8000__000000572A7A05D8\n000000067F000040020000E0000000AC4000-000000067F000040020000E0000000AC8000__0000005D2FFFFB38\n000000067F000040020000E0000000AC4000-000000067F000040020000E0000000AC8000__00000073AD3FE6B8\n000000067F000040020000E0000000AC4000-000000067F000040020000E0000000AC8000__000000914E3F38F0\n000000067F000040020000E0000000AC4000-000000067F000040020000E0000000AC8000__000000931B9A2710\n000000067F000040020000E0000000AC8000-000000067F000040020000E0000000ACC000__000000572A7A05D8\n000000067F000040020000E0000000AC8000-000000067F000040020000E0000000ACC000__0000005D2FFFFB38\n000000067F000040020000E0000000AC8000-000000067F000040020000E0000000ACC000__00000073AD3FE6B8\n000000067F000040020000E0000000AC8000-000000067F000040020000E0000000ACC000__000000914E3F38F0\n000000067F000040020000E0000000AC8000-000000067F000040020000E0000000ACC000__000000931B9A2710\n000000067F000040020000E0000000ACAD51-000000067F000040020000E0000000AD372F__0000004E807BE039-0000004F2029EFA9\n000000067F000040020000E0000000ACC000-000000067F000040020000E0000000AD0000__000000572A7A05D8\n000000067F000040020000E0000000ACC000-000000067F000040020000E0000000AD0000__0000005D2FFFFB38\n000000067F000040020000E0000000ACC000-000000067F000040020000E0000000AD0000__00000073AD3FE6B8\n000000067F000040020000E0000000ACC000-000000067F000040020000E0000000AD0000__000000914E3F38F0\n000000067F000040020000E0000000ACC000-000000067F000040020000E0000000AD0000__000000931B9A2710\n000000067F000040020000E0000000AD0000-000000067F000040020000E0000000AD4000__000000572A7A05D8\n000000067F000040020000E0000000AD0000-000000067F000040020000E0000000AD4000__0000005D2FFFFB38\n000000067F000040020000E0000000AD0000-000000067F000040020000E0000000AD4000__00000073AD3FE6B8\n000000067F000040020000E0000000AD0000-000000067F000040020000E0000000AD4000__000000914E3F38F0\n000000067F000040020000E0000000AD0000-000000067F000040020000E0000000AD4000__000000931B9A2710\n000000067F000040020000E0000000AD372F-000000067F000040020000E0000000ADC0FD__0000004E807BE039-0000004F2029EFA9\n000000067F000040020000E0000000AD4000-000000067F000040020000E0000000AD8000__000000572A7A05D8\n000000067F000040020000E0000000AD4000-000000067F000040020000E0000000AD8000__0000005D2FFFFB38\n000000067F000040020000E0000000AD4000-000000067F000040020000E0000000AD8000__00000073AD3FE6B8\n000000067F000040020000E0000000AD4000-000000067F000040020000E0000000AD8000__000000914E3F38F0\n000000067F000040020000E0000000AD4000-000000067F000040020000E0000000AD8000__000000931B9A2710\n000000067F000040020000E0000000AD8000-000000067F000040020000E0000000ADC000__000000572A7A05D8\n000000067F000040020000E0000000AD8000-000000067F000040020000E0000000ADC000__0000005D2FFFFB38\n000000067F000040020000E0000000AD8000-000000067F000040020000E0000000ADC000__00000073AD3FE6B8\n000000067F000040020000E0000000AD8000-000000067F000040020000E0000000ADC000__000000914E3F38F0\n000000067F000040020000E0000000AD8000-000000067F000040020000E0000000ADC000__000000931B9A2710\n000000067F000040020000E0000000ADC000-000000067F000040020000E0000000AE0000__000000572A7A05D8\n000000067F000040020000E0000000ADC000-000000067F000040020000E0000000AE0000__0000005D2FFFFB38\n000000067F000040020000E0000000ADC000-000000067F000040020000E0000000AE0000__00000073AD3FE6B8\n000000067F000040020000E0000000ADC000-000000067F000040020000E0000000AE0000__000000914E3F38F0\n000000067F000040020000E0000000ADC000-000000067F000040020000E0000000AE0000__000000931B9A2710\n000000067F000040020000E0000000ADC0FD-000000067F000040020000E0000000AE4AE3__0000004E807BE039-0000004F2029EFA9\n000000067F000040020000E0000000AE0000-000000067F000040020000E0000000AE4000__000000572A7A05D8\n000000067F000040020000E0000000AE0000-000000067F000040020000E0000000AE4000__0000005D2FFFFB38\n000000067F000040020000E0000000AE0000-000000067F000040020000E0000000AE4000__00000073AD3FE6B8\n000000067F000040020000E0000000AE0000-000000067F000040020000E0000000AE4000__000000914E3F38F0\n000000067F000040020000E0000000AE0000-000000067F000040020000E0000000AE4000__000000931B9A2710\n000000067F000040020000E0000000AE4000-000000067F000040020000E0000000AE8000__000000572A7A05D8\n000000067F000040020000E0000000AE4000-000000067F000040020000E0000000AE8000__0000005D2FFFFB38\n000000067F000040020000E0000000AE4000-000000067F000040020000E0000000AE8000__00000073AD3FE6B8\n000000067F000040020000E0000000AE4000-000000067F000040020000E0000000AE8000__000000914E3F38F0\n000000067F000040020000E0000000AE4000-000000067F000040020000E0000000AE8000__000000931B9A2710\n000000067F000040020000E0000000AE4AE3-000000067F000040020000E0000000AED4D7__0000004E807BE039-0000004F2029EFA9\n000000067F000040020000E0000000AE8000-000000067F000040020000E0000000AEC000__000000572A7A05D8\n000000067F000040020000E0000000AE8000-000000067F000040020000E0000000AEC000__0000005D2FFFFB38\n000000067F000040020000E0000000AE8000-000000067F000040020000E0000000AEC000__00000073AD3FE6B8\n000000067F000040020000E0000000AE8000-000000067F000040020000E0000000AEC000__000000914E3F38F0\n000000067F000040020000E0000000AE8000-000000067F000040020000E0000000AEC000__000000931B9A2710\n000000067F000040020000E0000000AEC000-000000067F000040020000E0000000AF0000__000000572A7A05D8\n000000067F000040020000E0000000AEC000-000000067F000040020000E0000000AF0000__0000005D2FFFFB38\n000000067F000040020000E0000000AEC000-000000067F000040020000E0000000AF0000__00000073AD3FE6B8\n000000067F000040020000E0000000AEC000-000000067F000040020000E0000000AF0000__000000914E3F38F0\n000000067F000040020000E0000000AEC000-000000067F000040020000E0000000AF0000__000000931B9A2710\n000000067F000040020000E0000000AED4D7-000000067F000040020000E0000000AF5EBA__0000004E807BE039-0000004F2029EFA9\n000000067F000040020000E0000000AF0000-000000067F000040020000E0000000AF4000__000000572A7A05D8\n000000067F000040020000E0000000AF0000-000000067F000040020000E0000000AF4000__0000005D2FFFFB38\n000000067F000040020000E0000000AF0000-000000067F000040020000E0000000AF4000__00000073AD3FE6B8\n000000067F000040020000E0000000AF0000-000000067F000040020000E0000000AF4000__000000914E3F38F0\n000000067F000040020000E0000000AF0000-000000067F000040020000E0000000AF4000__000000931B9A2710\n000000067F000040020000E0000000AF4000-000000067F000040020000E0000000AF8000__000000572A7A05D8\n000000067F000040020000E0000000AF4000-000000067F000040020000E0000000AF8000__0000005D2FFFFB38\n000000067F000040020000E0000000AF4000-000000067F000040020000E0000000AF8000__00000073AD3FE6B8\n000000067F000040020000E0000000AF4000-000000067F000040020000E0000000AF8000__000000914E3F38F0\n000000067F000040020000E0000000AF4000-000000067F000040020000E0000000AF8000__000000931B9A2710\n000000067F000040020000E0000000AF5EBA-000000067F000040020000E0000000AFE88E__0000004E807BE039-0000004F2029EFA9\n000000067F000040020000E0000000AF8000-000000067F000040020000E0000000AFC000__000000572A7A05D8\n000000067F000040020000E0000000AF8000-000000067F000040020000E0000000AFC000__0000005D2FFFFB38\n000000067F000040020000E0000000AF8000-000000067F000040020000E0000000AFC000__00000073AD3FE6B8\n000000067F000040020000E0000000AF8000-000000067F000040020000E0000000AFC000__000000914E3F38F0\n000000067F000040020000E0000000AF8000-000000067F000040020000E0000000AFC000__000000931B9A2710\n000000067F000040020000E0000000AFC000-000000067F000040020000E0000000B00000__000000572A7A05D8\n000000067F000040020000E0000000AFC000-000000067F000040020000E0000000B00000__0000005D2FFFFB38\n000000067F000040020000E0000000AFC000-000000067F000040020000E0000000B00000__00000073AD3FE6B8\n000000067F000040020000E0000000AFC000-000000067F000040020000E0000000B00000__000000914E3F38F0\n000000067F000040020000E0000000AFC000-000000067F000040020000E0000000B00000__000000931B9A2710\n000000067F000040020000E0000000AFE88E-000000067F000040020000E0000000B07269__0000004E807BE039-0000004F2029EFA9\n000000067F000040020000E0000000B00000-000000067F000040020000E0000000B04000__000000572A7A05D8\n000000067F000040020000E0000000B00000-000000067F000040020000E0000000B04000__0000005D2FFFFB38\n000000067F000040020000E0000000B00000-000000067F000040020000E0000000B04000__00000073AD3FE6B8\n000000067F000040020000E0000000B00000-000000067F000040020000E0000000B04000__000000914E3F38F0\n000000067F000040020000E0000000B00000-000000067F000040020000E0000000B04000__000000931B9A2710\n000000067F000040020000E0000000B04000-000000067F000040020000E0000000B08000__0000004FEAB6F890\n000000067F000040020000E0000000B04000-000000067F000040020000E0000000B08000__0000005D2FFFFB38\n000000067F000040020000E0000000B04000-000000067F000040020000E0000000B08000__00000073AD3FE6B8\n000000067F000040020000E0000000B04000-000000067F000040020000E0000000B08000__000000914E3F38F0\n000000067F000040020000E0000000B04000-000000067F000040020000E0000000B08000__000000931B9A2710\n000000067F000040020000E0000000B07269-000000067F000040020000E0000100000000__0000004E807BE039-0000004F2029EFA9\n000000067F000040020000E0000000B07515-000000067F000040020000E0000000B0FEE8__0000004F2029EFA9-0000004FBFD9F391\n000000067F000040020000E0000000B08000-000000067F000040020000E0000000B0C000__0000004FEAB6F890\n000000067F000040020000E0000000B08000-000000067F000040020000E0000000B0C000__0000005D2FFFFB38\n000000067F000040020000E0000000B08000-000000067F000040020000E0000000B0C000__00000073AD3FE6B8\n000000067F000040020000E0000000B08000-000000067F000040020000E0000000B0C000__000000914E3F38F0\n000000067F000040020000E0000000B08000-000000067F000040020000E0000000B0C000__000000931B9A2710\n000000067F000040020000E0000000B0C000-000000067F000040020000E0000000B10000__0000004FEAB6F890\n000000067F000040020000E0000000B0C000-000000067F000040020000E0000000B10000__0000005D2FFFFB38\n000000067F000040020000E0000000B0C000-000000067F000040020000E0000000B10000__00000073AD3FE6B8\n000000067F000040020000E0000000B0C000-000000067F000040020000E0000000B10000__000000914E3F38F0\n000000067F000040020000E0000000B0C000-000000067F000040020000E0000000B10000__000000931B9A2710\n000000067F000040020000E0000000B0FEE8-000000067F000040020000E0000000B188C0__0000004F2029EFA9-0000004FBFD9F391\n000000067F000040020000E0000000B10000-000000067F000040020000E0000000B14000__0000004FEAB6F890\n000000067F000040020000E0000000B10000-000000067F000040020000E0000000B14000__0000005D2FFFFB38\n000000067F000040020000E0000000B10000-000000067F000040020000E0000000B14000__00000073AD3FE6B8\n000000067F000040020000E0000000B10000-000000067F000040020000E0000000B14000__000000914E3F38F0\n000000067F000040020000E0000000B10000-000000067F000040020000E0000000B14000__000000931B9A2710\n000000067F000040020000E0000000B14000-000000067F000040020000E0000000B18000__0000004FEAB6F890\n000000067F000040020000E0000000B14000-000000067F000040020000E0000000B18000__0000005D2FFFFB38\n000000067F000040020000E0000000B14000-000000067F000040020000E0000000B18000__00000073AD3FE6B8\n000000067F000040020000E0000000B14000-000000067F000040020000E0000000B18000__000000914E3F38F0\n000000067F000040020000E0000000B14000-000000067F000040020000E0000000B18000__000000931B9A2710\n000000067F000040020000E0000000B18000-000000067F000040020000E0000000B1C000__0000004FEAB6F890\n000000067F000040020000E0000000B18000-000000067F000040020000E0000000B1C000__0000005D2FFFFB38\n000000067F000040020000E0000000B18000-000000067F000040020000E0000000B1C000__00000073AD3FE6B8\n000000067F000040020000E0000000B18000-000000067F000040020000E0000000B1C000__000000914E3F38F0\n000000067F000040020000E0000000B18000-000000067F000040020000E0000000B1C000__000000931B9A2710\n000000067F000040020000E0000000B188C0-000000067F000040020000E0000000B212A1__0000004F2029EFA9-0000004FBFD9F391\n000000067F000040020000E0000000B1C000-000000067F000040020000E0000000B20000__0000004FEAB6F890\n000000067F000040020000E0000000B1C000-000000067F000040020000E0000000B20000__0000005D2FFFFB38\n000000067F000040020000E0000000B1C000-000000067F000040020000E0000000B20000__00000073AD3FE6B8\n000000067F000040020000E0000000B1C000-000000067F000040020000E0000000B20000__000000914E3F38F0\n000000067F000040020000E0000000B1C000-000000067F000040020000E0000000B20000__000000931B9A2710\n000000067F000040020000E0000000B20000-000000067F000040020000E0000000B24000__0000004FEAB6F890\n000000067F000040020000E0000000B20000-000000067F000040020000E0000000B24000__0000005D2FFFFB38\n000000067F000040020000E0000000B20000-000000067F000040020000E0000000B24000__00000073AD3FE6B8\n000000067F000040020000E0000000B20000-000000067F000040020000E0000000B24000__000000914E3F38F0\n000000067F000040020000E0000000B20000-000000067F000040020000E0000000B24000__000000931B9A2710\n000000067F000040020000E0000000B212A1-000000067F000040020000E0000000B29C85__0000004F2029EFA9-0000004FBFD9F391\n000000067F000040020000E0000000B24000-000000067F000040020000E0000000B28000__0000004FEAB6F890\n000000067F000040020000E0000000B24000-000000067F000040020000E0000000B28000__0000005D2FFFFB38\n000000067F000040020000E0000000B24000-000000067F000040020000E0000000B28000__00000073AD3FE6B8\n000000067F000040020000E0000000B24000-000000067F000040020000E0000000B28000__000000914E3F38F0\n000000067F000040020000E0000000B24000-000000067F000040020000E0000000B28000__000000931B9A2710\n000000067F000040020000E0000000B28000-000000067F000040020000E0000000B2C000__0000004FEAB6F890\n000000067F000040020000E0000000B28000-000000067F000040020000E0000000B2C000__0000005D2FFFFB38\n000000067F000040020000E0000000B28000-000000067F000040020000E0000000B2C000__00000073AD3FE6B8\n000000067F000040020000E0000000B28000-000000067F000040020000E0000000B2C000__000000914E3F38F0\n000000067F000040020000E0000000B28000-000000067F000040020000E0000000B2C000__000000931B9A2710\n000000067F000040020000E0000000B29C85-000000067F000040020000E0000000B3265D__0000004F2029EFA9-0000004FBFD9F391\n000000067F000040020000E0000000B2C000-000000067F000040020000E0000000B30000__0000004FEAB6F890\n000000067F000040020000E0000000B2C000-000000067F000040020000E0000000B30000__0000005D2FFFFB38\n000000067F000040020000E0000000B2C000-000000067F000040020000E0000000B30000__00000073AD3FE6B8\n000000067F000040020000E0000000B2C000-000000067F000040020000E0000000B30000__000000914E3F38F0\n000000067F000040020000E0000000B2C000-000000067F000040020000E0000000B30000__000000931B9A2710\n000000067F000040020000E0000000B30000-000000067F000040020000E0000000B34000__0000004FEAB6F890\n000000067F000040020000E0000000B30000-000000067F000040020000E0000000B34000__0000005D2FFFFB38\n000000067F000040020000E0000000B30000-000000067F000040020000E0000000B34000__00000073AD3FE6B8\n000000067F000040020000E0000000B30000-000000067F000040020000E0000000B34000__000000914E3F38F0\n000000067F000040020000E0000000B30000-000000067F000040020000E0000000B34000__000000931B9A2710\n000000067F000040020000E0000000B3265D-000000067F000040020000E0000000B3B036__0000004F2029EFA9-0000004FBFD9F391\n000000067F000040020000E0000000B34000-000000067F000040020000E0000000B38000__0000004FEAB6F890\n000000067F000040020000E0000000B34000-000000067F000040020000E0000000B38000__0000005D2FFFFB38\n000000067F000040020000E0000000B34000-000000067F000040020000E0000000B38000__00000073AD3FE6B8\n000000067F000040020000E0000000B34000-000000067F000040020000E0000000B38000__000000914E3F38F0\n000000067F000040020000E0000000B34000-000000067F000040020000E0000000B38000__000000931B9A2710\n000000067F000040020000E0000000B38000-000000067F000040020000E0000000B3C000__0000004FEAB6F890\n000000067F000040020000E0000000B38000-000000067F000040020000E0000000B3C000__0000005D2FFFFB38\n000000067F000040020000E0000000B38000-000000067F000040020000E0000000B3C000__00000073AD3FE6B8\n000000067F000040020000E0000000B38000-000000067F000040020000E0000000B3C000__000000914E3F38F0\n000000067F000040020000E0000000B38000-000000067F000040020000E0000000B3C000__000000931B9A2710\n000000067F000040020000E0000000B3B036-000000067F000040020000E0000000B43A10__0000004F2029EFA9-0000004FBFD9F391\n000000067F000040020000E0000000B3C000-000000067F000040020000E0000000B40000__0000004FEAB6F890\n000000067F000040020000E0000000B3C000-000000067F000040020000E0000000B40000__0000005D2FFFFB38\n000000067F000040020000E0000000B3C000-000000067F000040020000E0000000B40000__00000073AD3FE6B8\n000000067F000040020000E0000000B3C000-000000067F000040020000E0000000B40000__000000914E3F38F0\n000000067F000040020000E0000000B3C000-000000067F000040020000E0000000B40000__000000931B9A2710\n000000067F000040020000E0000000B40000-000000067F000040020000E0000000B44000__0000004FEAB6F890\n000000067F000040020000E0000000B40000-000000067F000040020000E0000000B44000__0000005D2FFFFB38\n000000067F000040020000E0000000B40000-000000067F000040020000E0000000B44000__00000073AD3FE6B8\n000000067F000040020000E0000000B40000-000000067F000040020000E0000000B44000__000000914E3F38F0\n000000067F000040020000E0000000B40000-000000067F000040020000E0000000B44000__000000931B9A2710\n000000067F000040020000E0000000B43A10-000000067F000040020000E0000000B4C3DD__0000004F2029EFA9-0000004FBFD9F391\n000000067F000040020000E0000000B44000-000000067F000040020000E0000000B48000__0000004FEAB6F890\n000000067F000040020000E0000000B44000-000000067F000040020000E0000000B48000__0000005D2FFFFB38\n000000067F000040020000E0000000B44000-000000067F000040020000E0000000B48000__00000073AD3FE6B8\n000000067F000040020000E0000000B44000-000000067F000040020000E0000000B48000__000000914E3F38F0\n000000067F000040020000E0000000B44000-000000067F000040020000E0000000B48000__000000931B9A2710\n000000067F000040020000E0000000B48000-000000067F000040020000E0000000B4C000__0000004FEAB6F890\n000000067F000040020000E0000000B48000-000000067F000040020000E0000000B4C000__0000005D2FFFFB38\n000000067F000040020000E0000000B48000-000000067F000040020000E0000000B4C000__00000073AD3FE6B8\n000000067F000040020000E0000000B48000-000000067F000040020000E0000000B4C000__000000914E3F38F0\n000000067F000040020000E0000000B48000-000000067F000040020000E0000000B4C000__000000931B9A2710\n000000067F000040020000E0000000B4C000-000000067F000040020000E0000000B50000__0000004FEAB6F890\n000000067F000040020000E0000000B4C000-000000067F000040020000E0000000B50000__0000005D2FFFFB38\n000000067F000040020000E0000000B4C000-000000067F000040020000E0000000B50000__00000073AD3FE6B8\n000000067F000040020000E0000000B4C000-000000067F000040020000E0000000B50000__000000914E3F38F0\n000000067F000040020000E0000000B4C000-000000067F000040020000E0000000B50000__000000931B9A2710\n000000067F000040020000E0000000B4C3DD-000000067F000040020000E0000000B54DC7__0000004F2029EFA9-0000004FBFD9F391\n000000067F000040020000E0000000B50000-000000067F000040020000E0000000B54000__0000004FEAB6F890\n000000067F000040020000E0000000B50000-000000067F000040020000E0000000B54000__0000005D2FFFFB38\n000000067F000040020000E0000000B50000-000000067F000040020000E0000000B54000__00000073AD3FE6B8\n000000067F000040020000E0000000B50000-000000067F000040020000E0000000B54000__000000914E3F38F0\n000000067F000040020000E0000000B50000-000000067F000040020000E0000000B54000__000000931B9A2710\n000000067F000040020000E0000000B54000-000000067F000040020000E0000000B58000__0000004FEAB6F890\n000000067F000040020000E0000000B54000-000000067F000040020000E0000000B58000__0000005D2FFFFB38\n000000067F000040020000E0000000B54000-000000067F000040020000E0000000B58000__00000073AD3FE6B8\n000000067F000040020000E0000000B54000-000000067F000040020000E0000000B58000__000000914E3F38F0\n000000067F000040020000E0000000B54000-000000067F000040020000E0000000B58000__000000931B9A2710\n000000067F000040020000E0000000B54DC7-000000067F000040020000E0000000B5D7A4__0000004F2029EFA9-0000004FBFD9F391\n000000067F000040020000E0000000B58000-000000067F000040020000E0000000B5C000__0000004FEAB6F890\n000000067F000040020000E0000000B58000-000000067F000040020000E0000000B5C000__0000005D2FFFFB38\n000000067F000040020000E0000000B58000-000000067F000040020000E0000000B5C000__00000073AD3FE6B8\n000000067F000040020000E0000000B58000-000000067F000040020000E0000000B5C000__000000914E3F38F0\n000000067F000040020000E0000000B58000-000000067F000040020000E0000000B5C000__000000931B9A2710\n000000067F000040020000E0000000B5C000-000000067F000040020000E0000000B60000__0000004FEAB6F890\n000000067F000040020000E0000000B5C000-000000067F000040020000E0000000B60000__000000574B7FF240\n000000067F000040020000E0000000B5C000-000000067F000040020000E0000000B60000__00000073AD3FE6B8\n000000067F000040020000E0000000B5C000-000000067F000040020000E0000000B60000__000000914E3F38F0\n000000067F000040020000E0000000B5C000-000000067F000040020000E0000000B60000__000000931B9A2710\n000000067F000040020000E0000000B5D7A4-000000067F000040020000E0000100000000__0000004F2029EFA9-0000004FBFD9F391\n000000067F000040020000E0000000B5DA84-000000067F000040020000E0000000B66465__0000004FBFD9F391-000000505F89E839\n000000067F000040020000E0000000B60000-000000067F000040020000E0000000B64000__0000004FEAB6F890\n000000067F000040020000E0000000B60000-000000067F000040020000E0000000B64000__000000574B7FF240\n000000067F000040020000E0000000B60000-000000067F000040020000E0000000B64000__00000073AD3FE6B8\n000000067F000040020000E0000000B60000-000000067F000040020000E0000000B64000__000000914E3F38F0\n000000067F000040020000E0000000B60000-000000067F000040020000E0000000B64000__000000931B9A2710\n000000067F000040020000E0000000B64000-000000067F000040020000E0000000B68000__0000004FEAB6F890\n000000067F000040020000E0000000B64000-000000067F000040020000E0000000B68000__000000574B7FF240\n000000067F000040020000E0000000B64000-000000067F000040020000E0000000B68000__00000073AD3FE6B8\n000000067F000040020000E0000000B64000-000000067F000040020000E0000000B68000__000000914E3F38F0\n000000067F000040020000E0000000B64000-000000067F000040020000E0000000B68000__000000931B9A2710\n000000067F000040020000E0000000B66465-000000067F000040020000E0000000B6EE49__0000004FBFD9F391-000000505F89E839\n000000067F000040020000E0000000B68000-000000067F000040020000E0000000B6C000__0000004FEAB6F890\n000000067F000040020000E0000000B68000-000000067F000040020000E0000000B6C000__000000574B7FF240\n000000067F000040020000E0000000B68000-000000067F000040020000E0000000B6C000__00000073AD3FE6B8\n000000067F000040020000E0000000B68000-000000067F000040020000E0000000B6C000__000000914E3F38F0\n000000067F000040020000E0000000B68000-000000067F000040020000E0000000B6C000__000000931B9A2710\n000000067F000040020000E0000000B6C000-000000067F000040020000E0000000B70000__0000004FEAB6F890\n000000067F000040020000E0000000B6C000-000000067F000040020000E0000000B70000__000000574B7FF240\n000000067F000040020000E0000000B6C000-000000067F000040020000E0000000B70000__00000073AD3FE6B8\n000000067F000040020000E0000000B6C000-000000067F000040020000E0000000B70000__000000914E3F38F0\n000000067F000040020000E0000000B6C000-000000067F000040020000E0000000B70000__000000931B9A2710\n000000067F000040020000E0000000B6EE49-000000067F000040020000E0000000B77817__0000004FBFD9F391-000000505F89E839\n000000067F000040020000E0000000B70000-000000067F000040020000E0000000B74000__0000004FEAB6F890\n000000067F000040020000E0000000B70000-000000067F000040020000E0000000B74000__000000574B7FF240\n000000067F000040020000E0000000B70000-000000067F000040020000E0000000B74000__00000073AD3FE6B8\n000000067F000040020000E0000000B70000-000000067F000040020000E0000000B74000__000000914E3F38F0\n000000067F000040020000E0000000B70000-000000067F000040020000E0000000B74000__000000931B9A2710\n000000067F000040020000E0000000B74000-000000067F000040020000E0000000B78000__000000574B7FF240\n000000067F000040020000E0000000B74000-000000067F000040020000E0000000B78000__00000073AD3FE6B8\n000000067F000040020000E0000000B74000-000000067F000040020000E0000000B78000__000000914E3F38F0\n000000067F000040020000E0000000B74000-000000067F000040020000E0000000B78000__000000931B9A2710\n000000067F000040020000E0000000B74000-030000000000000000000000000000000002__0000004FEAB6F890\n000000067F000040020000E0000000B77817-000000067F000040020000E0000000B801EA__0000004FBFD9F391-000000505F89E839\n000000067F000040020000E0000000B78000-000000067F000040020000E0000000B7C000__000000574B7FF240\n000000067F000040020000E0000000B78000-000000067F000040020000E0000000B7C000__00000073AD3FE6B8\n000000067F000040020000E0000000B78000-000000067F000040020000E0000000B7C000__000000914E3F38F0\n000000067F000040020000E0000000B78000-000000067F000040020000E0000000B7C000__000000931B9A2710\n000000067F000040020000E0000000B7C000-000000067F000040020000E0000000B80000__000000574B7FF240\n000000067F000040020000E0000000B7C000-000000067F000040020000E0000000B80000__00000073AD3FE6B8\n000000067F000040020000E0000000B7C000-000000067F000040020000E0000000B80000__000000914E3F38F0\n000000067F000040020000E0000000B7C000-000000067F000040020000E0000000B80000__000000931B9A2710\n000000067F000040020000E0000000B80000-000000067F000040020000E0000000B84000__000000574B7FF240\n000000067F000040020000E0000000B80000-000000067F000040020000E0000000B84000__00000073AD3FE6B8\n000000067F000040020000E0000000B80000-000000067F000040020000E0000000B84000__000000914E3F38F0\n000000067F000040020000E0000000B80000-000000067F000040020000E0000000B84000__000000931B9A2710\n000000067F000040020000E0000000B801EA-000000067F000040020000E0000000B88BCE__0000004FBFD9F391-000000505F89E839\n000000067F000040020000E0000000B84000-000000067F000040020000E0000000B88000__000000574B7FF240\n000000067F000040020000E0000000B84000-000000067F000040020000E0000000B88000__00000073AD3FE6B8\n000000067F000040020000E0000000B84000-000000067F000040020000E0000000B88000__000000914E3F38F0\n000000067F000040020000E0000000B84000-000000067F000040020000E0000000B88000__000000931B9A2710\n000000067F000040020000E0000000B88000-000000067F000040020000E0000000B8C000__000000574B7FF240\n000000067F000040020000E0000000B88000-000000067F000040020000E0000000B8C000__00000073AD3FE6B8\n000000067F000040020000E0000000B88000-000000067F000040020000E0000000B8C000__000000914E3F38F0\n000000067F000040020000E0000000B88000-000000067F000040020000E0000000B8C000__000000931B9A2710\n000000067F000040020000E0000000B88BCE-000000067F000040020000E0000000B915B5__0000004FBFD9F391-000000505F89E839\n000000067F000040020000E0000000B8C000-000000067F000040020000E0000000B90000__000000574B7FF240\n000000067F000040020000E0000000B8C000-000000067F000040020000E0000000B90000__00000073AD3FE6B8\n000000067F000040020000E0000000B8C000-000000067F000040020000E0000000B90000__000000914E3F38F0\n000000067F000040020000E0000000B8C000-000000067F000040020000E0000000B90000__000000931B9A2710\n000000067F000040020000E0000000B90000-000000067F000040020000E0000000B94000__000000574B7FF240\n000000067F000040020000E0000000B90000-000000067F000040020000E0000000B94000__00000073AD3FE6B8\n000000067F000040020000E0000000B90000-000000067F000040020000E0000000B94000__000000914E3F38F0\n000000067F000040020000E0000000B90000-000000067F000040020000E0000000B94000__000000931B9A2710\n000000067F000040020000E0000000B915B5-000000067F000040020000E0000000B99F95__0000004FBFD9F391-000000505F89E839\n000000067F000040020000E0000000B94000-000000067F000040020000E0000000B98000__000000574B7FF240\n000000067F000040020000E0000000B94000-000000067F000040020000E0000000B98000__00000073AD3FE6B8\n000000067F000040020000E0000000B94000-000000067F000040020000E0000000B98000__000000914E3F38F0\n000000067F000040020000E0000000B94000-000000067F000040020000E0000000B98000__000000931B9A2710\n000000067F000040020000E0000000B98000-000000067F000040020000E0000000B9C000__000000574B7FF240\n000000067F000040020000E0000000B98000-000000067F000040020000E0000000B9C000__00000073AD3FE6B8\n000000067F000040020000E0000000B98000-000000067F000040020000E0000000B9C000__000000914E3F38F0\n000000067F000040020000E0000000B98000-000000067F000040020000E0000000B9C000__000000931B9A2710\n000000067F000040020000E0000000B99F95-000000067F000040020000E0000000BA2971__0000004FBFD9F391-000000505F89E839\n000000067F000040020000E0000000B9C000-000000067F000040020000E0000000BA0000__000000574B7FF240\n000000067F000040020000E0000000B9C000-000000067F000040020000E0000000BA0000__00000073AD3FE6B8\n000000067F000040020000E0000000B9C000-000000067F000040020000E0000000BA0000__000000914E3F38F0\n000000067F000040020000E0000000B9C000-000000067F000040020000E0000000BA0000__000000931B9A2710\n000000067F000040020000E0000000BA0000-000000067F000040020000E0000000BA4000__000000574B7FF240\n000000067F000040020000E0000000BA0000-000000067F000040020000E0000000BA4000__00000073AD3FE6B8\n000000067F000040020000E0000000BA0000-000000067F000040020000E0000000BA4000__000000914E3F38F0\n000000067F000040020000E0000000BA0000-000000067F000040020000E0000000BA4000__000000931B9A2710\n000000067F000040020000E0000000BA2971-000000067F000040020000E0000000BAB356__0000004FBFD9F391-000000505F89E839\n000000067F000040020000E0000000BA4000-000000067F000040020000E0000000BA8000__000000574B7FF240\n000000067F000040020000E0000000BA4000-000000067F000040020000E0000000BA8000__00000073AD3FE6B8\n000000067F000040020000E0000000BA4000-000000067F000040020000E0000000BA8000__000000914E3F38F0\n000000067F000040020000E0000000BA4000-000000067F000040020000E0000000BA8000__000000931B9A2710\n000000067F000040020000E0000000BA8000-000000067F000040020000E0000000BAC000__000000574B7FF240\n000000067F000040020000E0000000BA8000-000000067F000040020000E0000000BAC000__00000073AD3FE6B8\n000000067F000040020000E0000000BA8000-000000067F000040020000E0000000BAC000__000000914E3F38F0\n000000067F000040020000E0000000BA8000-000000067F000040020000E0000000BAC000__000000931B9A2710\n000000067F000040020000E0000000BAB356-000000067F000040020000E0000000BB3D2B__0000004FBFD9F391-000000505F89E839\n000000067F000040020000E0000000BAC000-000000067F000040020000E0000000BB0000__000000574B7FF240\n000000067F000040020000E0000000BAC000-000000067F000040020000E0000000BB0000__00000073AD3FE6B8\n000000067F000040020000E0000000BAC000-000000067F000040020000E0000000BB0000__000000914E3F38F0\n000000067F000040020000E0000000BAC000-000000067F000040020000E0000000BB0000__000000931B9A2710\n000000067F000040020000E0000000BB0000-000000067F000040020000E0000000BB4000__000000574B7FF240\n000000067F000040020000E0000000BB0000-000000067F000040020000E0000000BB4000__00000073AD3FE6B8\n000000067F000040020000E0000000BB0000-000000067F000040020000E0000000BB4000__000000914E3F38F0\n000000067F000040020000E0000000BB0000-000000067F000040020000E0000000BB4000__000000931B9A2710\n000000067F000040020000E0000000BB3D2B-000000067F000040020000E0000100000000__0000004FBFD9F391-000000505F89E839\n000000067F000040020000E0000000BB4000-000000067F000040020000E0000000BB8000__000000572A7A05D8\n000000067F000040020000E0000000BB4000-000000067F000040020000E0000000BB8000__0000005D2FFFFB38\n000000067F000040020000E0000000BB4000-000000067F000040020000E0000000BB8000__00000073AD3FE6B8\n000000067F000040020000E0000000BB4000-000000067F000040020000E0000000BB8000__000000914E3F38F0\n000000067F000040020000E0000000BB4000-000000067F000040020000E0000000BB8000__000000931B9A2710\n000000067F000040020000E0000000BB4003-000000067F000040020000E0000000BBC9D3__000000505F89E839-000000510F31FEA9\n000000067F000040020000E0000000BB8000-000000067F000040020000E0000000BBC000__000000572A7A05D8\n000000067F000040020000E0000000BB8000-000000067F000040020000E0000000BBC000__0000005D2FFFFB38\n000000067F000040020000E0000000BB8000-000000067F000040020000E0000000BBC000__00000073AD3FE6B8\n000000067F000040020000E0000000BB8000-000000067F000040020000E0000000BBC000__000000914E3F38F0\n000000067F000040020000E0000000BB8000-000000067F000040020000E0000000BBC000__000000931B9A2710\n000000067F000040020000E0000000BBC000-000000067F000040020000E0000000BC0000__000000572A7A05D8\n000000067F000040020000E0000000BBC000-000000067F000040020000E0000000BC0000__0000005D2FFFFB38\n000000067F000040020000E0000000BBC000-000000067F000040020000E0000000BC0000__00000073AD3FE6B8\n000000067F000040020000E0000000BBC000-000000067F000040020000E0000000BC0000__000000914E3F38F0\n000000067F000040020000E0000000BBC000-000000067F000040020000E0000000BC0000__000000931B9A2710\n000000067F000040020000E0000000BBC9D3-000000067F000040020000E0000000BC53AC__000000505F89E839-000000510F31FEA9\n000000067F000040020000E0000000BC0000-000000067F000040020000E0000000BC4000__000000572A7A05D8\n000000067F000040020000E0000000BC0000-000000067F000040020000E0000000BC4000__0000005D2FFFFB38\n000000067F000040020000E0000000BC0000-000000067F000040020000E0000000BC4000__00000073AD3FE6B8\n000000067F000040020000E0000000BC0000-000000067F000040020000E0000000BC4000__000000914E3F38F0\n000000067F000040020000E0000000BC0000-000000067F000040020000E0000000BC4000__000000931B9A2710\n000000067F000040020000E0000000BC4000-000000067F000040020000E0000000BC8000__000000572A7A05D8\n000000067F000040020000E0000000BC4000-000000067F000040020000E0000000BC8000__0000005D2FFFFB38\n000000067F000040020000E0000000BC4000-000000067F000040020000E0000000BC8000__00000073AD3FE6B8\n000000067F000040020000E0000000BC4000-000000067F000040020000E0000000BC8000__000000914E3F38F0\n000000067F000040020000E0000000BC4000-000000067F000040020000E0000000BC8000__000000931B9A2710\n000000067F000040020000E0000000BC53AC-000000067F000040020000E0000000BCDD9B__000000505F89E839-000000510F31FEA9\n000000067F000040020000E0000000BC8000-000000067F000040020000E0000000BCC000__000000572A7A05D8\n000000067F000040020000E0000000BC8000-000000067F000040020000E0000000BCC000__0000005D2FFFFB38\n000000067F000040020000E0000000BC8000-000000067F000040020000E0000000BCC000__00000073AD3FE6B8\n000000067F000040020000E0000000BC8000-000000067F000040020000E0000000BCC000__000000914E3F38F0\n000000067F000040020000E0000000BC8000-000000067F000040020000E0000000BCC000__000000931B9A2710\n000000067F000040020000E0000000BCC000-000000067F000040020000E0000000BD0000__000000572A7A05D8\n000000067F000040020000E0000000BCC000-000000067F000040020000E0000000BD0000__0000005D2FFFFB38\n000000067F000040020000E0000000BCC000-000000067F000040020000E0000000BD0000__00000073AD3FE6B8\n000000067F000040020000E0000000BCC000-000000067F000040020000E0000000BD0000__000000914E3F38F0\n000000067F000040020000E0000000BCC000-000000067F000040020000E0000000BD0000__000000931B9A2710\n000000067F000040020000E0000000BCDD9B-000000067F000040020000E0000000BD6777__000000505F89E839-000000510F31FEA9\n000000067F000040020000E0000000BD0000-000000067F000040020000E0000000BD4000__000000572A7A05D8\n000000067F000040020000E0000000BD0000-000000067F000040020000E0000000BD4000__0000005D2FFFFB38\n000000067F000040020000E0000000BD0000-000000067F000040020000E0000000BD4000__00000073AD3FE6B8\n000000067F000040020000E0000000BD0000-000000067F000040020000E0000000BD4000__000000914E3F38F0\n000000067F000040020000E0000000BD0000-000000067F000040020000E0000000BD4000__000000931B9A2710\n000000067F000040020000E0000000BD4000-000000067F000040020000E0000000BD8000__000000572A7A05D8\n000000067F000040020000E0000000BD4000-000000067F000040020000E0000000BD8000__0000005D2FFFFB38\n000000067F000040020000E0000000BD4000-000000067F000040020000E0000000BD8000__00000073AD3FE6B8\n000000067F000040020000E0000000BD4000-000000067F000040020000E0000000BD8000__000000914E3F38F0\n000000067F000040020000E0000000BD4000-000000067F000040020000E0000000BD8000__000000931B9A2710\n000000067F000040020000E0000000BD6777-000000067F000040020000E0000000BDF149__000000505F89E839-000000510F31FEA9\n000000067F000040020000E0000000BD8000-000000067F000040020000E0000000BDC000__000000572A7A05D8\n000000067F000040020000E0000000BD8000-000000067F000040020000E0000000BDC000__0000005D2FFFFB38\n000000067F000040020000E0000000BD8000-000000067F000040020000E0000000BDC000__00000073AD3FE6B8\n000000067F000040020000E0000000BD8000-000000067F000040020000E0000000BDC000__000000914E3F38F0\n000000067F000040020000E0000000BD8000-000000067F000040020000E0000000BDC000__000000931B9A2710\n000000067F000040020000E0000000BDC000-000000067F000040020000E0000000BE0000__000000572A7A05D8\n000000067F000040020000E0000000BDC000-000000067F000040020000E0000000BE0000__0000005D2FFFFB38\n000000067F000040020000E0000000BDC000-000000067F000040020000E0000000BE0000__00000073AD3FE6B8\n000000067F000040020000E0000000BDC000-000000067F000040020000E0000000BE0000__000000914E3F38F0\n000000067F000040020000E0000000BDC000-000000067F000040020000E0000000BE0000__000000931B9A2710\n000000067F000040020000E0000000BDF149-000000067F000040020000E0000000BE7B28__000000505F89E839-000000510F31FEA9\n000000067F000040020000E0000000BE0000-000000067F000040020000E0000000BE4000__000000572A7A05D8\n000000067F000040020000E0000000BE0000-000000067F000040020000E0000000BE4000__0000005D2FFFFB38\n000000067F000040020000E0000000BE0000-000000067F000040020000E0000000BE4000__00000073AD3FE6B8\n000000067F000040020000E0000000BE0000-000000067F000040020000E0000000BE4000__000000914E3F38F0\n000000067F000040020000E0000000BE0000-000000067F000040020000E0000000BE4000__000000931B9A2710\n000000067F000040020000E0000000BE4000-000000067F000040020000E0000000BE8000__000000572A7A05D8\n000000067F000040020000E0000000BE4000-000000067F000040020000E0000000BE8000__0000005D2FFFFB38\n000000067F000040020000E0000000BE4000-000000067F000040020000E0000000BE8000__00000073AD3FE6B8\n000000067F000040020000E0000000BE4000-000000067F000040020000E0000000BE8000__000000914E3F38F0\n000000067F000040020000E0000000BE4000-000000067F000040020000E0000000BE8000__000000931B9A2710\n000000067F000040020000E0000000BE7B28-000000067F000040020000E0000000BF04FD__000000505F89E839-000000510F31FEA9\n000000067F000040020000E0000000BE8000-000000067F000040020000E0000000BEC000__000000572A7A05D8\n000000067F000040020000E0000000BE8000-000000067F000040020000E0000000BEC000__0000005D2FFFFB38\n000000067F000040020000E0000000BE8000-000000067F000040020000E0000000BEC000__00000073AD3FE6B8\n000000067F000040020000E0000000BE8000-000000067F000040020000E0000000BEC000__000000914E3F38F0\n000000067F000040020000E0000000BE8000-000000067F000040020000E0000000BEC000__000000931B9A2710\n000000067F000040020000E0000000BEC000-000000067F000040020000E0000000BF0000__000000572A7A05D8\n000000067F000040020000E0000000BEC000-000000067F000040020000E0000000BF0000__0000005D2FFFFB38\n000000067F000040020000E0000000BEC000-000000067F000040020000E0000000BF0000__00000073AD3FE6B8\n000000067F000040020000E0000000BEC000-000000067F000040020000E0000000BF0000__000000914E3F38F0\n000000067F000040020000E0000000BEC000-000000067F000040020000E0000000BF0000__000000931B9A2710\n000000067F000040020000E0000000BF0000-000000067F000040020000E0000000BF4000__000000572A7A05D8\n000000067F000040020000E0000000BF0000-000000067F000040020000E0000000BF4000__0000005D2FFFFB38\n000000067F000040020000E0000000BF0000-000000067F000040020000E0000000BF4000__00000073AD3FE6B8\n000000067F000040020000E0000000BF0000-000000067F000040020000E0000000BF4000__000000914E3F38F0\n000000067F000040020000E0000000BF0000-000000067F000040020000E0000000BF4000__000000931B9A2710\n000000067F000040020000E0000000BF04FD-000000067F000040020000E0000000BF8ED4__000000505F89E839-000000510F31FEA9\n000000067F000040020000E0000000BF4000-000000067F000040020000E0000000BF8000__000000572A7A05D8\n000000067F000040020000E0000000BF4000-000000067F000040020000E0000000BF8000__0000005D2FFFFB38\n000000067F000040020000E0000000BF4000-000000067F000040020000E0000000BF8000__00000073AD3FE6B8\n000000067F000040020000E0000000BF4000-000000067F000040020000E0000000BF8000__000000914E3F38F0\n000000067F000040020000E0000000BF4000-000000067F000040020000E0000000BF8000__000000931B9A2710\n000000067F000040020000E0000000BF8000-000000067F000040020000E0000000BFC000__000000572A7A05D8\n000000067F000040020000E0000000BF8000-000000067F000040020000E0000000BFC000__0000005D2FFFFB38\n000000067F000040020000E0000000BF8000-000000067F000040020000E0000000BFC000__00000073AD3FE6B8\n000000067F000040020000E0000000BF8000-000000067F000040020000E0000000BFC000__000000914E3F38F0\n000000067F000040020000E0000000BF8000-000000067F000040020000E0000000BFC000__000000931B9A2710\n000000067F000040020000E0000000BF8ED4-000000067F000040020000E0000000C018B5__000000505F89E839-000000510F31FEA9\n000000067F000040020000E0000000BFC000-000000067F000040020000E0000000C00000__000000572A7A05D8\n000000067F000040020000E0000000BFC000-000000067F000040020000E0000000C00000__0000005D2FFFFB38\n000000067F000040020000E0000000BFC000-000000067F000040020000E0000000C00000__00000073AD3FE6B8\n000000067F000040020000E0000000BFC000-000000067F000040020000E0000000C00000__000000914E3F38F0\n000000067F000040020000E0000000BFC000-000000067F000040020000E0000000C00000__000000931B9A2710\n000000067F000040020000E0000000C00000-000000067F000040020000E0000000C04000__000000572A7A05D8\n000000067F000040020000E0000000C00000-000000067F000040020000E0000000C04000__0000005D2FFFFB38\n000000067F000040020000E0000000C00000-000000067F000040020000E0000000C04000__00000073AD3FE6B8\n000000067F000040020000E0000000C00000-000000067F000040020000E0000000C04000__000000914E3F38F0\n000000067F000040020000E0000000C00000-000000067F000040020000E0000000C04000__000000931B9A2710\n000000067F000040020000E0000000C018B5-000000067F000040020000E0000000C0A2A7__000000505F89E839-000000510F31FEA9\n000000067F000040020000E0000000C04000-000000067F000040020000E0000000C08000__000000572A7A05D8\n000000067F000040020000E0000000C04000-000000067F000040020000E0000000C08000__0000005D2FFFFB38\n000000067F000040020000E0000000C04000-000000067F000040020000E0000000C08000__00000073AD3FE6B8\n000000067F000040020000E0000000C04000-000000067F000040020000E0000000C08000__000000914E3F38F0\n000000067F000040020000E0000000C04000-000000067F000040020000E0000000C08000__000000931B9A2710\n000000067F000040020000E0000000C08000-000000067F000040020000E0000000C0C000__000000572A7A05D8\n000000067F000040020000E0000000C08000-000000067F000040020000E0000000C0C000__0000005D2FFFFB38\n000000067F000040020000E0000000C08000-000000067F000040020000E0000000C0C000__00000073AD3FE6B8\n000000067F000040020000E0000000C08000-000000067F000040020000E0000000C0C000__000000914E3F38F0\n000000067F000040020000E0000000C08000-000000067F000040020000E0000000C0C000__000000931B9A2710\n000000067F000040020000E0000000C0A2A7-000000067F000040020000E0000000C12C83__000000505F89E839-000000510F31FEA9\n000000067F000040020000E0000000C0C000-000000067F000040020000E0000000C10000__000000572A7A05D8\n000000067F000040020000E0000000C0C000-000000067F000040020000E0000000C10000__0000005D2FFFFB38\n000000067F000040020000E0000000C0C000-000000067F000040020000E0000000C10000__00000073AD3FE6B8\n000000067F000040020000E0000000C0C000-000000067F000040020000E0000000C10000__000000914E3F38F0\n000000067F000040020000E0000000C0C000-000000067F000040020000E0000000C10000__000000931B9A2710\n000000067F000040020000E0000000C10000-000000067F000040020000E0000000C14000__00000051EEFFE900\n000000067F000040020000E0000000C10000-000000067F000040020000E0000000C14000__0000005D2FFFFB38\n000000067F000040020000E0000000C10000-000000067F000040020000E0000000C14000__00000073AD3FE6B8\n000000067F000040020000E0000000C10000-000000067F000040020000E0000000C14000__000000914E3F38F0\n000000067F000040020000E0000000C10000-000000067F000040020000E0000000C14000__000000931B9A2710\n000000067F000040020000E0000000C12C83-000000067F000040020000E0000100000000__000000505F89E839-000000510F31FEA9\n000000067F000040020000E0000000C12F96-000000067F000040020000E0000000C1B971__000000510F31FEA9-00000051BED9D7E1\n000000067F000040020000E0000000C14000-000000067F000040020000E0000000C18000__00000051EEFFE900\n000000067F000040020000E0000000C14000-000000067F000040020000E0000000C18000__0000005D2FFFFB38\n000000067F000040020000E0000000C14000-000000067F000040020000E0000000C18000__00000073AD3FE6B8\n000000067F000040020000E0000000C14000-000000067F000040020000E0000000C18000__000000914E3F38F0\n000000067F000040020000E0000000C14000-000000067F000040020000E0000000C18000__000000931B9A2710\n000000067F000040020000E0000000C18000-000000067F000040020000E0000000C1C000__00000051EEFFE900\n000000067F000040020000E0000000C18000-000000067F000040020000E0000000C1C000__0000005D2FFFFB38\n000000067F000040020000E0000000C18000-000000067F000040020000E0000000C1C000__00000073AD3FE6B8\n000000067F000040020000E0000000C18000-000000067F000040020000E0000000C1C000__000000914E3F38F0\n000000067F000040020000E0000000C18000-000000067F000040020000E0000000C1C000__000000931B9A2710\n000000067F000040020000E0000000C1B971-000000067F000040020000E0000000C24348__000000510F31FEA9-00000051BED9D7E1\n000000067F000040020000E0000000C1C000-000000067F000040020000E0000000C20000__00000051EEFFE900\n000000067F000040020000E0000000C1C000-000000067F000040020000E0000000C20000__0000005D2FFFFB38\n000000067F000040020000E0000000C1C000-000000067F000040020000E0000000C20000__00000073AD3FE6B8\n000000067F000040020000E0000000C1C000-000000067F000040020000E0000000C20000__000000914E3F38F0\n000000067F000040020000E0000000C1C000-000000067F000040020000E0000000C20000__000000931B9A2710\n000000067F000040020000E0000000C20000-000000067F000040020000E0000000C24000__00000051EEFFE900\n000000067F000040020000E0000000C20000-000000067F000040020000E0000000C24000__0000005D2FFFFB38\n000000067F000040020000E0000000C20000-000000067F000040020000E0000000C24000__00000073AD3FE6B8\n000000067F000040020000E0000000C20000-000000067F000040020000E0000000C24000__000000914E3F38F0\n000000067F000040020000E0000000C20000-000000067F000040020000E0000000C24000__000000931B9A2710\n000000067F000040020000E0000000C24000-000000067F000040020000E0000000C28000__00000051EEFFE900\n000000067F000040020000E0000000C24000-000000067F000040020000E0000000C28000__0000005D2FFFFB38\n000000067F000040020000E0000000C24000-000000067F000040020000E0000000C28000__00000073AD3FE6B8\n000000067F000040020000E0000000C24000-000000067F000040020000E0000000C28000__000000914E3F38F0\n000000067F000040020000E0000000C24000-000000067F000040020000E0000000C28000__000000931B9A2710\n000000067F000040020000E0000000C24348-000000067F000040020000E0000000C2CD23__000000510F31FEA9-00000051BED9D7E1\n000000067F000040020000E0000000C28000-000000067F000040020000E0000000C2C000__00000051EEFFE900\n000000067F000040020000E0000000C28000-000000067F000040020000E0000000C2C000__0000005D2FFFFB38\n000000067F000040020000E0000000C28000-000000067F000040020000E0000000C2C000__00000073AD3FE6B8\n000000067F000040020000E0000000C28000-000000067F000040020000E0000000C2C000__000000914E3F38F0\n000000067F000040020000E0000000C28000-000000067F000040020000E0000000C2C000__000000931B9A2710\n000000067F000040020000E0000000C2C000-000000067F000040020000E0000000C30000__00000051EEFFE900\n000000067F000040020000E0000000C2C000-000000067F000040020000E0000000C30000__0000005D2FFFFB38\n000000067F000040020000E0000000C2C000-000000067F000040020000E0000000C30000__00000073AD3FE6B8\n000000067F000040020000E0000000C2C000-000000067F000040020000E0000000C30000__000000914E3F38F0\n000000067F000040020000E0000000C2C000-000000067F000040020000E0000000C30000__000000931B9A2710\n000000067F000040020000E0000000C2CD23-000000067F000040020000E0000000C356F9__000000510F31FEA9-00000051BED9D7E1\n000000067F000040020000E0000000C30000-000000067F000040020000E0000000C34000__00000051EEFFE900\n000000067F000040020000E0000000C30000-000000067F000040020000E0000000C34000__0000005D2FFFFB38\n000000067F000040020000E0000000C30000-000000067F000040020000E0000000C34000__00000073AD3FE6B8\n000000067F000040020000E0000000C30000-000000067F000040020000E0000000C34000__000000914E3F38F0\n000000067F000040020000E0000000C30000-000000067F000040020000E0000000C34000__000000931B9A2710\n000000067F000040020000E0000000C34000-000000067F000040020000E0000000C38000__00000051EEFFE900\n000000067F000040020000E0000000C34000-000000067F000040020000E0000000C38000__0000005D2FFFFB38\n000000067F000040020000E0000000C34000-000000067F000040020000E0000000C38000__00000073AD3FE6B8\n000000067F000040020000E0000000C34000-000000067F000040020000E0000000C38000__000000914E3F38F0\n000000067F000040020000E0000000C34000-000000067F000040020000E0000000C38000__000000931B9A2710\n000000067F000040020000E0000000C356F9-000000067F000040020000E0000000C3E0DC__000000510F31FEA9-00000051BED9D7E1\n000000067F000040020000E0000000C38000-000000067F000040020000E0000000C3C000__00000051EEFFE900\n000000067F000040020000E0000000C38000-000000067F000040020000E0000000C3C000__0000005D2FFFFB38\n000000067F000040020000E0000000C38000-000000067F000040020000E0000000C3C000__00000073AD3FE6B8\n000000067F000040020000E0000000C38000-000000067F000040020000E0000000C3C000__000000914E3F38F0\n000000067F000040020000E0000000C38000-000000067F000040020000E0000000C3C000__000000931B9A2710\n000000067F000040020000E0000000C3C000-000000067F000040020000E0000000C40000__00000051EEFFE900\n000000067F000040020000E0000000C3C000-000000067F000040020000E0000000C40000__0000005D2FFFFB38\n000000067F000040020000E0000000C3C000-000000067F000040020000E0000000C40000__00000073AD3FE6B8\n000000067F000040020000E0000000C3C000-000000067F000040020000E0000000C40000__000000914E3F38F0\n000000067F000040020000E0000000C3C000-000000067F000040020000E0000000C40000__000000931B9A2710\n000000067F000040020000E0000000C3E0DC-000000067F000040020000E0000000C46AC7__000000510F31FEA9-00000051BED9D7E1\n000000067F000040020000E0000000C40000-000000067F000040020000E0000000C44000__00000051EEFFE900\n000000067F000040020000E0000000C40000-000000067F000040020000E0000000C44000__0000005D2FFFFB38\n000000067F000040020000E0000000C40000-000000067F000040020000E0000000C44000__00000073AD3FE6B8\n000000067F000040020000E0000000C40000-000000067F000040020000E0000000C44000__000000914E3F38F0\n000000067F000040020000E0000000C40000-000000067F000040020000E0000000C44000__000000931B9A2710\n000000067F000040020000E0000000C44000-000000067F000040020000E0000000C48000__00000051EEFFE900\n000000067F000040020000E0000000C44000-000000067F000040020000E0000000C48000__0000005D2FFFFB38\n000000067F000040020000E0000000C44000-000000067F000040020000E0000000C48000__00000073AD3FE6B8\n000000067F000040020000E0000000C44000-000000067F000040020000E0000000C48000__000000914E3F38F0\n000000067F000040020000E0000000C44000-000000067F000040020000E0000000C48000__000000931B9A2710\n000000067F000040020000E0000000C46AC7-000000067F000040020000E0000000C4F4A5__000000510F31FEA9-00000051BED9D7E1\n000000067F000040020000E0000000C48000-000000067F000040020000E0000000C4C000__00000051EEFFE900\n000000067F000040020000E0000000C48000-000000067F000040020000E0000000C4C000__0000005D2FFFFB38\n000000067F000040020000E0000000C48000-000000067F000040020000E0000000C4C000__00000073AD3FE6B8\n000000067F000040020000E0000000C48000-000000067F000040020000E0000000C4C000__000000914E3F38F0\n000000067F000040020000E0000000C48000-000000067F000040020000E0000000C4C000__000000931B9A2710\n000000067F000040020000E0000000C4C000-000000067F000040020000E0000000C50000__00000051EEFFE900\n000000067F000040020000E0000000C4C000-000000067F000040020000E0000000C50000__0000005D2FFFFB38\n000000067F000040020000E0000000C4C000-000000067F000040020000E0000000C50000__00000073AD3FE6B8\n000000067F000040020000E0000000C4C000-000000067F000040020000E0000000C50000__000000914E3F38F0\n000000067F000040020000E0000000C4C000-000000067F000040020000E0000000C50000__000000931B9A2710\n000000067F000040020000E0000000C4F4A5-000000067F000040020000E0000000C57E7D__000000510F31FEA9-00000051BED9D7E1\n000000067F000040020000E0000000C50000-000000067F000040020000E0000000C54000__00000051EEFFE900\n000000067F000040020000E0000000C50000-000000067F000040020000E0000000C54000__0000005D2FFFFB38\n000000067F000040020000E0000000C50000-000000067F000040020000E0000000C54000__00000073AD3FE6B8\n000000067F000040020000E0000000C50000-000000067F000040020000E0000000C54000__000000914E3F38F0\n000000067F000040020000E0000000C50000-000000067F000040020000E0000000C54000__000000931B9A2710\n000000067F000040020000E0000000C54000-000000067F000040020000E0000000C58000__00000051EEFFE900\n000000067F000040020000E0000000C54000-000000067F000040020000E0000000C58000__0000005D2FFFFB38\n000000067F000040020000E0000000C54000-000000067F000040020000E0000000C58000__00000073AD3FE6B8\n000000067F000040020000E0000000C54000-000000067F000040020000E0000000C58000__000000914E3F38F0\n000000067F000040020000E0000000C54000-000000067F000040020000E0000000C58000__000000931B9A2710\n000000067F000040020000E0000000C57E7D-000000067F000040020000E0000000C60858__000000510F31FEA9-00000051BED9D7E1\n000000067F000040020000E0000000C58000-000000067F000040020000E0000000C5C000__00000051EEFFE900\n000000067F000040020000E0000000C58000-000000067F000040020000E0000000C5C000__0000005D2FFFFB38\n000000067F000040020000E0000000C58000-000000067F000040020000E0000000C5C000__00000073AD3FE6B8\n000000067F000040020000E0000000C58000-000000067F000040020000E0000000C5C000__000000914E3F38F0\n000000067F000040020000E0000000C58000-000000067F000040020000E0000000C5C000__000000931B9A2710\n000000067F000040020000E0000000C5C000-000000067F000040020000E0000000C60000__00000051EEFFE900\n000000067F000040020000E0000000C5C000-000000067F000040020000E0000000C60000__0000005D2FFFFB38\n000000067F000040020000E0000000C5C000-000000067F000040020000E0000000C60000__00000073AD3FE6B8\n000000067F000040020000E0000000C5C000-000000067F000040020000E0000000C60000__000000914E3F38F0\n000000067F000040020000E0000000C5C000-000000067F000040020000E0000000C60000__000000931B9A2710\n000000067F000040020000E0000000C60000-000000067F000040020000E0000000C64000__00000051EEFFE900\n000000067F000040020000E0000000C60000-000000067F000040020000E0000000C64000__0000005D2FFFFB38\n000000067F000040020000E0000000C60000-000000067F000040020000E0000000C64000__00000073AD3FE6B8\n000000067F000040020000E0000000C60000-000000067F000040020000E0000000C64000__000000914E3F38F0\n000000067F000040020000E0000000C60000-000000067F000040020000E0000000C64000__000000931B9A2710\n000000067F000040020000E0000000C60858-000000067F000040020000E0000000C6922E__000000510F31FEA9-00000051BED9D7E1\n000000067F000040020000E0000000C64000-000000067F000040020000E0000000C68000__00000051EEFFE900\n000000067F000040020000E0000000C64000-000000067F000040020000E0000000C68000__0000005D2FFFFB38\n000000067F000040020000E0000000C64000-000000067F000040020000E0000000C68000__00000073AD3FE6B8\n000000067F000040020000E0000000C64000-000000067F000040020000E0000000C68000__000000914E3F38F0\n000000067F000040020000E0000000C64000-000000067F000040020000E0000000C68000__000000931B9A2710\n000000067F000040020000E0000000C68000-000000067F000040020000E0000000C6C000__00000051EEFFE900\n000000067F000040020000E0000000C68000-000000067F000040020000E0000000C6C000__0000005D2FFFFB38\n000000067F000040020000E0000000C68000-000000067F000040020000E0000000C6C000__00000073AD3FE6B8\n000000067F000040020000E0000000C68000-000000067F000040020000E0000000C6C000__000000914E3F38F0\n000000067F000040020000E0000000C68000-000000067F000040020000E0000000C6C000__000000931B9A2710\n000000067F000040020000E0000000C6922E-000000067F000040020000E0000000C71C02__000000510F31FEA9-00000051BED9D7E1\n000000067F000040020000E0000000C6C000-000000067F000040020000E0000000C70000__00000051EEFFE900\n000000067F000040020000E0000000C6C000-000000067F000040020000E0000000C70000__0000005D2FFFFB38\n000000067F000040020000E0000000C6C000-000000067F000040020000E0000000C70000__00000073AD3FE6B8\n000000067F000040020000E0000000C6C000-000000067F000040020000E0000000C70000__000000914E3F38F0\n000000067F000040020000E0000000C6C000-000000067F000040020000E0000000C70000__000000931B9A2710\n000000067F000040020000E0000000C70000-000000067F000040020000E0000000C74000__00000051EEFFE900\n000000067F000040020000E0000000C70000-000000067F000040020000E0000000C74000__000000574B7FF240\n000000067F000040020000E0000000C70000-000000067F000040020000E0000000C74000__00000073AD3FE6B8\n000000067F000040020000E0000000C70000-000000067F000040020000E0000000C74000__000000914E3F38F0\n000000067F000040020000E0000000C70000-000000067F000040020000E0000000C74000__000000931B9A2710\n000000067F000040020000E0000000C71C02-000000067F000040020000E0000100000000__000000510F31FEA9-00000051BED9D7E1\n000000067F000040020000E0000000C71F22-000000067F000040020000E0000000C7A8F1__00000051BED9D7E1-000000526E81F439\n000000067F000040020000E0000000C74000-000000067F000040020000E0000000C78000__00000051EEFFE900\n000000067F000040020000E0000000C74000-000000067F000040020000E0000000C78000__000000574B7FF240\n000000067F000040020000E0000000C74000-000000067F000040020000E0000000C78000__00000073AD3FE6B8\n000000067F000040020000E0000000C74000-000000067F000040020000E0000000C78000__000000914E3F38F0\n000000067F000040020000E0000000C74000-000000067F000040020000E0000000C78000__000000931B9A2710\n000000067F000040020000E0000000C78000-000000067F000040020000E0000000C7C000__00000051EEFFE900\n000000067F000040020000E0000000C78000-000000067F000040020000E0000000C7C000__000000574B7FF240\n000000067F000040020000E0000000C78000-000000067F000040020000E0000000C7C000__00000073AD3FE6B8\n000000067F000040020000E0000000C78000-000000067F000040020000E0000000C7C000__000000914E3F38F0\n000000067F000040020000E0000000C78000-000000067F000040020000E0000000C7C000__000000931B9A2710\n000000067F000040020000E0000000C7A8F1-000000067F000040020000E0000000C832D9__00000051BED9D7E1-000000526E81F439\n000000067F000040020000E0000000C7C000-000000067F000040020000E0000000C80000__00000051EEFFE900\n000000067F000040020000E0000000C7C000-000000067F000040020000E0000000C80000__000000574B7FF240\n000000067F000040020000E0000000C7C000-000000067F000040020000E0000000C80000__00000073AD3FE6B8\n000000067F000040020000E0000000C7C000-000000067F000040020000E0000000C80000__000000914E3F38F0\n000000067F000040020000E0000000C7C000-000000067F000040020000E0000000C80000__000000931B9A2710\n000000067F000040020000E0000000C80000-000000067F000040020000E0000000C84000__00000051EEFFE900\n000000067F000040020000E0000000C80000-000000067F000040020000E0000000C84000__000000574B7FF240\n000000067F000040020000E0000000C80000-000000067F000040020000E0000000C84000__00000073AD3FE6B8\n000000067F000040020000E0000000C80000-000000067F000040020000E0000000C84000__000000914E3F38F0\n000000067F000040020000E0000000C80000-000000067F000040020000E0000000C84000__000000931B9A2710\n000000067F000040020000E0000000C832D9-000000067F000040020000E0000000C8BCBC__00000051BED9D7E1-000000526E81F439\n000000067F000040020000E0000000C84000-000000067F000040020000E0000000C88000__00000051EEFFE900\n000000067F000040020000E0000000C84000-000000067F000040020000E0000000C88000__000000574B7FF240\n000000067F000040020000E0000000C84000-000000067F000040020000E0000000C88000__00000073AD3FE6B8\n000000067F000040020000E0000000C84000-000000067F000040020000E0000000C88000__000000914E3F38F0\n000000067F000040020000E0000000C84000-000000067F000040020000E0000000C88000__000000931B9A2710\n000000067F000040020000E0000000C88000-000000067F000040020000E0000000C8C000__000000574B7FF240\n000000067F000040020000E0000000C88000-000000067F000040020000E0000000C8C000__00000073AD3FE6B8\n000000067F000040020000E0000000C88000-000000067F000040020000E0000000C8C000__000000914E3F38F0\n000000067F000040020000E0000000C88000-000000067F000040020000E0000000C8C000__000000931B9A2710\n000000067F000040020000E0000000C88000-00000006800000000000000B1F0100000000__00000051EEFFE900\n000000067F000040020000E0000000C8BCBC-000000067F000040020000E0000000C946A4__00000051BED9D7E1-000000526E81F439\n000000067F000040020000E0000000C8C000-000000067F000040020000E0000000C90000__000000574B7FF240\n000000067F000040020000E0000000C8C000-000000067F000040020000E0000000C90000__00000073AD3FE6B8\n000000067F000040020000E0000000C8C000-000000067F000040020000E0000000C90000__000000914E3F38F0\n000000067F000040020000E0000000C8C000-000000067F000040020000E0000000C90000__000000931B9A2710\n000000067F000040020000E0000000C90000-000000067F000040020000E0000000C94000__000000574B7FF240\n000000067F000040020000E0000000C90000-000000067F000040020000E0000000C94000__00000073AD3FE6B8\n000000067F000040020000E0000000C90000-000000067F000040020000E0000000C94000__000000914E3F38F0\n000000067F000040020000E0000000C90000-000000067F000040020000E0000000C94000__000000931B9A2710\n000000067F000040020000E0000000C94000-000000067F000040020000E0000000C98000__000000574B7FF240\n000000067F000040020000E0000000C94000-000000067F000040020000E0000000C98000__00000073AD3FE6B8\n000000067F000040020000E0000000C94000-000000067F000040020000E0000000C98000__000000914E3F38F0\n000000067F000040020000E0000000C94000-000000067F000040020000E0000000C98000__000000931B9A2710\n000000067F000040020000E0000000C946A4-000000067F000040020000E0000000C9D07F__00000051BED9D7E1-000000526E81F439\n000000067F000040020000E0000000C98000-000000067F000040020000E0000000C9C000__000000574B7FF240\n000000067F000040020000E0000000C98000-000000067F000040020000E0000000C9C000__00000073AD3FE6B8\n000000067F000040020000E0000000C98000-000000067F000040020000E0000000C9C000__000000914E3F38F0\n000000067F000040020000E0000000C98000-000000067F000040020000E0000000C9C000__000000931B9A2710\n000000067F000040020000E0000000C9C000-000000067F000040020000E0000000CA0000__000000574B7FF240\n000000067F000040020000E0000000C9C000-000000067F000040020000E0000000CA0000__00000073AD3FE6B8\n000000067F000040020000E0000000C9C000-000000067F000040020000E0000000CA0000__000000914E3F38F0\n000000067F000040020000E0000000C9C000-000000067F000040020000E0000000CA0000__000000931B9A2710\n000000067F000040020000E0000000C9D07F-000000067F000040020000E0000000CA5A4E__00000051BED9D7E1-000000526E81F439\n000000067F000040020000E0000000CA0000-000000067F000040020000E0000000CA4000__000000574B7FF240\n000000067F000040020000E0000000CA0000-000000067F000040020000E0000000CA4000__00000073AD3FE6B8\n000000067F000040020000E0000000CA0000-000000067F000040020000E0000000CA4000__000000914E3F38F0\n000000067F000040020000E0000000CA0000-000000067F000040020000E0000000CA4000__000000931B9A2710\n000000067F000040020000E0000000CA4000-000000067F000040020000E0000000CA8000__000000574B7FF240\n000000067F000040020000E0000000CA4000-000000067F000040020000E0000000CA8000__00000073AD3FE6B8\n000000067F000040020000E0000000CA4000-000000067F000040020000E0000000CA8000__000000914E3F38F0\n000000067F000040020000E0000000CA4000-000000067F000040020000E0000000CA8000__000000931B9A2710\n000000067F000040020000E0000000CA5A4E-000000067F000040020000E0000000CAE42F__00000051BED9D7E1-000000526E81F439\n000000067F000040020000E0000000CA8000-000000067F000040020000E0000000CAC000__000000574B7FF240\n000000067F000040020000E0000000CA8000-000000067F000040020000E0000000CAC000__00000073AD3FE6B8\n000000067F000040020000E0000000CA8000-000000067F000040020000E0000000CAC000__000000914E3F38F0\n000000067F000040020000E0000000CA8000-000000067F000040020000E0000000CAC000__000000931B9A2710\n000000067F000040020000E0000000CAC000-000000067F000040020000E0000000CB0000__000000574B7FF240\n000000067F000040020000E0000000CAC000-000000067F000040020000E0000000CB0000__00000073AD3FE6B8\n000000067F000040020000E0000000CAC000-000000067F000040020000E0000000CB0000__000000914E3F38F0\n000000067F000040020000E0000000CAC000-000000067F000040020000E0000000CB0000__000000931B9A2710\n000000067F000040020000E0000000CAE42F-000000067F000040020000E0000000CB6E04__00000051BED9D7E1-000000526E81F439\n000000067F000040020000E0000000CB0000-000000067F000040020000E0000000CB4000__000000574B7FF240\n000000067F000040020000E0000000CB0000-000000067F000040020000E0000000CB4000__00000073AD3FE6B8\n000000067F000040020000E0000000CB0000-000000067F000040020000E0000000CB4000__000000914E3F38F0\n000000067F000040020000E0000000CB0000-000000067F000040020000E0000000CB4000__000000931B9A2710\n000000067F000040020000E0000000CB4000-000000067F000040020000E0000000CB8000__000000574B7FF240\n000000067F000040020000E0000000CB4000-000000067F000040020000E0000000CB8000__00000073AD3FE6B8\n000000067F000040020000E0000000CB4000-000000067F000040020000E0000000CB8000__000000914E3F38F0\n000000067F000040020000E0000000CB4000-000000067F000040020000E0000000CB8000__000000931B9A2710\n000000067F000040020000E0000000CB6E04-000000067F000040020000E0000000CBF7D9__00000051BED9D7E1-000000526E81F439\n000000067F000040020000E0000000CB8000-000000067F000040020000E0000000CBC000__000000574B7FF240\n000000067F000040020000E0000000CB8000-000000067F000040020000E0000000CBC000__00000073AD3FE6B8\n000000067F000040020000E0000000CB8000-000000067F000040020000E0000000CBC000__000000914E3F38F0\n000000067F000040020000E0000000CB8000-000000067F000040020000E0000000CBC000__000000931B9A2710\n000000067F000040020000E0000000CBC000-000000067F000040020000E0000000CC0000__000000574B7FF240\n000000067F000040020000E0000000CBC000-000000067F000040020000E0000000CC0000__00000073AD3FE6B8\n000000067F000040020000E0000000CBC000-000000067F000040020000E0000000CC0000__000000914E3F38F0\n000000067F000040020000E0000000CBC000-000000067F000040020000E0000000CC0000__000000931B9A2710\n000000067F000040020000E0000000CBF7D9-000000067F000040020000E0000000CC81BA__00000051BED9D7E1-000000526E81F439\n000000067F000040020000E0000000CC0000-000000067F000040020000E0000000CC4000__000000574B7FF240\n000000067F000040020000E0000000CC0000-000000067F000040020000E0000000CC4000__00000073AD3FE6B8\n000000067F000040020000E0000000CC0000-000000067F000040020000E0000000CC4000__000000914E3F38F0\n000000067F000040020000E0000000CC0000-000000067F000040020000E0000000CC4000__000000931B9A2710\n000000067F000040020000E0000000CC4000-000000067F000040020000E0000000CC8000__000000574B7FF240\n000000067F000040020000E0000000CC4000-000000067F000040020000E0000000CC8000__00000073AD3FE6B8\n000000067F000040020000E0000000CC4000-000000067F000040020000E0000000CC8000__000000914E3F38F0\n000000067F000040020000E0000000CC4000-000000067F000040020000E0000000CC8000__000000931B9A2710\n000000067F000040020000E0000000CC8000-000000067F000040020000E0000000CCC000__000000574B7FF240\n000000067F000040020000E0000000CC8000-000000067F000040020000E0000000CCC000__00000073AD3FE6B8\n000000067F000040020000E0000000CC8000-000000067F000040020000E0000000CCC000__000000914E3F38F0\n000000067F000040020000E0000000CC8000-000000067F000040020000E0000000CCC000__000000931B9A2710\n000000067F000040020000E0000000CC81BA-000000067F000040020000E0000000CD0B9F__00000051BED9D7E1-000000526E81F439\n000000067F000040020000E0000000CCC000-000000067F000040020000E0000000CD0000__000000574B7FF240\n000000067F000040020000E0000000CCC000-000000067F000040020000E0000000CD0000__00000073AD3FE6B8\n000000067F000040020000E0000000CCC000-000000067F000040020000E0000000CD0000__000000914E3F38F0\n000000067F000040020000E0000000CCC000-000000067F000040020000E0000000CD0000__000000931B9A2710\n000000067F000040020000E0000000CD0000-000000067F000040020000E0000000CD4000__000000572A7A05D8\n000000067F000040020000E0000000CD0000-000000067F000040020000E0000000CD4000__0000005D2FFFFB38\n000000067F000040020000E0000000CD0000-000000067F000040020000E0000000CD4000__00000073AD3FE6B8\n000000067F000040020000E0000000CD0000-000000067F000040020000E0000000CD4000__000000914E3F38F0\n000000067F000040020000E0000000CD0000-000000067F000040020000E0000000CD4000__000000931B9A2710\n000000067F000040020000E0000000CD0B9F-000000067F000040020000E0000100000000__00000051BED9D7E1-000000526E81F439\n000000067F000040020000E0000000CD0EB9-000000067F000040020000E0000000CD9893__000000526E81F439-000000531E29F559\n000000067F000040020000E0000000CD4000-000000067F000040020000E0000000CD8000__000000572A7A05D8\n000000067F000040020000E0000000CD4000-000000067F000040020000E0000000CD8000__0000005D2FFFFB38\n000000067F000040020000E0000000CD4000-000000067F000040020000E0000000CD8000__00000073AD3FE6B8\n000000067F000040020000E0000000CD4000-000000067F000040020000E0000000CD8000__000000914E3F38F0\n000000067F000040020000E0000000CD4000-000000067F000040020000E0000000CD8000__000000931B9A2710\n000000067F000040020000E0000000CD8000-000000067F000040020000E0000000CDC000__000000572A7A05D8\n000000067F000040020000E0000000CD8000-000000067F000040020000E0000000CDC000__0000005D2FFFFB38\n000000067F000040020000E0000000CD8000-000000067F000040020000E0000000CDC000__00000073AD3FE6B8\n000000067F000040020000E0000000CD8000-000000067F000040020000E0000000CDC000__000000914E3F38F0\n000000067F000040020000E0000000CD8000-000000067F000040020000E0000000CDC000__000000931B9A2710\n000000067F000040020000E0000000CD9893-000000067F000040020000E0000000CE226B__000000526E81F439-000000531E29F559\n000000067F000040020000E0000000CDC000-000000067F000040020000E0000000CE0000__000000572A7A05D8\n000000067F000040020000E0000000CDC000-000000067F000040020000E0000000CE0000__0000005D2FFFFB38\n000000067F000040020000E0000000CDC000-000000067F000040020000E0000000CE0000__00000073AD3FE6B8\n000000067F000040020000E0000000CDC000-000000067F000040020000E0000000CE0000__000000914E3F38F0\n000000067F000040020000E0000000CDC000-000000067F000040020000E0000000CE0000__000000931B9A2710\n000000067F000040020000E0000000CE0000-000000067F000040020000E0000000CE4000__000000572A7A05D8\n000000067F000040020000E0000000CE0000-000000067F000040020000E0000000CE4000__0000005D2FFFFB38\n000000067F000040020000E0000000CE0000-000000067F000040020000E0000000CE4000__00000073AD3FE6B8\n000000067F000040020000E0000000CE0000-000000067F000040020000E0000000CE4000__000000914E3F38F0\n000000067F000040020000E0000000CE0000-000000067F000040020000E0000000CE4000__000000931B9A2710\n000000067F000040020000E0000000CE226B-000000067F000040020000E0000000CEAC50__000000526E81F439-000000531E29F559\n000000067F000040020000E0000000CE4000-000000067F000040020000E0000000CE8000__000000572A7A05D8\n000000067F000040020000E0000000CE4000-000000067F000040020000E0000000CE8000__0000005D2FFFFB38\n000000067F000040020000E0000000CE4000-000000067F000040020000E0000000CE8000__00000073AD3FE6B8\n000000067F000040020000E0000000CE4000-000000067F000040020000E0000000CE8000__000000914E3F38F0\n000000067F000040020000E0000000CE4000-000000067F000040020000E0000000CE8000__000000931B9A2710\n000000067F000040020000E0000000CE8000-000000067F000040020000E0000000CEC000__000000572A7A05D8\n000000067F000040020000E0000000CE8000-000000067F000040020000E0000000CEC000__0000005D2FFFFB38\n000000067F000040020000E0000000CE8000-000000067F000040020000E0000000CEC000__00000073AD3FE6B8\n000000067F000040020000E0000000CE8000-000000067F000040020000E0000000CEC000__000000914E3F38F0\n000000067F000040020000E0000000CE8000-000000067F000040020000E0000000CEC000__000000931B9A2710\n000000067F000040020000E0000000CEAC50-000000067F000040020000E0000000CF3627__000000526E81F439-000000531E29F559\n000000067F000040020000E0000000CEC000-000000067F000040020000E0000000CF0000__000000572A7A05D8\n000000067F000040020000E0000000CEC000-000000067F000040020000E0000000CF0000__0000005D2FFFFB38\n000000067F000040020000E0000000CEC000-000000067F000040020000E0000000CF0000__00000073AD3FE6B8\n000000067F000040020000E0000000CEC000-000000067F000040020000E0000000CF0000__000000914E3F38F0\n000000067F000040020000E0000000CEC000-000000067F000040020000E0000000CF0000__000000931B9A2710\n000000067F000040020000E0000000CF0000-000000067F000040020000E0000000CF4000__000000572A7A05D8\n000000067F000040020000E0000000CF0000-000000067F000040020000E0000000CF4000__0000005D2FFFFB38\n000000067F000040020000E0000000CF0000-000000067F000040020000E0000000CF4000__00000073AD3FE6B8\n000000067F000040020000E0000000CF0000-000000067F000040020000E0000000CF4000__000000914E3F38F0\n000000067F000040020000E0000000CF0000-000000067F000040020000E0000000CF4000__000000931B9A2710\n000000067F000040020000E0000000CF3627-000000067F000040020000E0000000CFBFFE__000000526E81F439-000000531E29F559\n000000067F000040020000E0000000CF4000-000000067F000040020000E0000000CF8000__000000572A7A05D8\n000000067F000040020000E0000000CF4000-000000067F000040020000E0000000CF8000__0000005D2FFFFB38\n000000067F000040020000E0000000CF4000-000000067F000040020000E0000000CF8000__00000073AD3FE6B8\n000000067F000040020000E0000000CF4000-000000067F000040020000E0000000CF8000__000000914E3F38F0\n000000067F000040020000E0000000CF4000-000000067F000040020000E0000000CF8000__000000931B9A2710\n000000067F000040020000E0000000CF8000-000000067F000040020000E0000000CFC000__000000572A7A05D8\n000000067F000040020000E0000000CF8000-000000067F000040020000E0000000CFC000__0000005D2FFFFB38\n000000067F000040020000E0000000CF8000-000000067F000040020000E0000000CFC000__00000073AD3FE6B8\n000000067F000040020000E0000000CF8000-000000067F000040020000E0000000CFC000__000000914E3F38F0\n000000067F000040020000E0000000CF8000-000000067F000040020000E0000000CFC000__000000931B9A2710\n000000067F000040020000E0000000CFBFFE-000000067F000040020000E0000000D049E2__000000526E81F439-000000531E29F559\n000000067F000040020000E0000000CFC000-000000067F000040020000E0000000D00000__000000572A7A05D8\n000000067F000040020000E0000000CFC000-000000067F000040020000E0000000D00000__0000005D2FFFFB38\n000000067F000040020000E0000000CFC000-000000067F000040020000E0000000D00000__00000073AD3FE6B8\n000000067F000040020000E0000000CFC000-000000067F000040020000E0000000D00000__000000914E3F38F0\n000000067F000040020000E0000000CFC000-000000067F000040020000E0000000D00000__000000931B9A2710\n000000067F000040020000E0000000D00000-000000067F000040020000E0000000D04000__000000572A7A05D8\n000000067F000040020000E0000000D00000-000000067F000040020000E0000000D04000__0000005D2FFFFB38\n000000067F000040020000E0000000D00000-000000067F000040020000E0000000D04000__00000073AD3FE6B8\n000000067F000040020000E0000000D00000-000000067F000040020000E0000000D04000__000000914E3F38F0\n000000067F000040020000E0000000D00000-000000067F000040020000E0000000D04000__000000931B9A2710\n000000067F000040020000E0000000D04000-000000067F000040020000E0000000D08000__000000572A7A05D8\n000000067F000040020000E0000000D04000-000000067F000040020000E0000000D08000__0000005D2FFFFB38\n000000067F000040020000E0000000D04000-000000067F000040020000E0000000D08000__00000073AD3FE6B8\n000000067F000040020000E0000000D04000-000000067F000040020000E0000000D08000__000000914E3F38F0\n000000067F000040020000E0000000D04000-000000067F000040020000E0000000D08000__000000931B9A2710\n000000067F000040020000E0000000D049E2-000000067F000040020000E0000000D0D3C4__000000526E81F439-000000531E29F559\n000000067F000040020000E0000000D08000-000000067F000040020000E0000000D0C000__000000572A7A05D8\n000000067F000040020000E0000000D08000-000000067F000040020000E0000000D0C000__0000005D2FFFFB38\n000000067F000040020000E0000000D08000-000000067F000040020000E0000000D0C000__00000073AD3FE6B8\n000000067F000040020000E0000000D08000-000000067F000040020000E0000000D0C000__000000914E3F38F0\n000000067F000040020000E0000000D08000-000000067F000040020000E0000000D0C000__000000931B9A2710\n000000067F000040020000E0000000D0C000-000000067F000040020000E0000000D10000__000000572A7A05D8\n000000067F000040020000E0000000D0C000-000000067F000040020000E0000000D10000__0000005D2FFFFB38\n000000067F000040020000E0000000D0C000-000000067F000040020000E0000000D10000__00000073AD3FE6B8\n000000067F000040020000E0000000D0C000-000000067F000040020000E0000000D10000__000000914E3F38F0\n000000067F000040020000E0000000D0C000-000000067F000040020000E0000000D10000__000000931B9A2710\n000000067F000040020000E0000000D0D3C4-000000067F000040020000E0000000D15DA8__000000526E81F439-000000531E29F559\n000000067F000040020000E0000000D10000-000000067F000040020000E0000000D14000__000000572A7A05D8\n000000067F000040020000E0000000D10000-000000067F000040020000E0000000D14000__0000005D2FFFFB38\n000000067F000040020000E0000000D10000-000000067F000040020000E0000000D14000__00000073AD3FE6B8\n000000067F000040020000E0000000D10000-000000067F000040020000E0000000D14000__000000914E3F38F0\n000000067F000040020000E0000000D10000-000000067F000040020000E0000000D14000__000000931B9A2710\n000000067F000040020000E0000000D14000-000000067F000040020000E0000000D18000__000000572A7A05D8\n000000067F000040020000E0000000D14000-000000067F000040020000E0000000D18000__0000005D2FFFFB38\n000000067F000040020000E0000000D14000-000000067F000040020000E0000000D18000__00000073AD3FE6B8\n000000067F000040020000E0000000D14000-000000067F000040020000E0000000D18000__000000914E3F38F0\n000000067F000040020000E0000000D14000-000000067F000040020000E0000000D18000__000000931B9A2710\n000000067F000040020000E0000000D15DA8-000000067F000040020000E0000000D1E783__000000526E81F439-000000531E29F559\n000000067F000040020000E0000000D18000-000000067F000040020000E0000000D1C000__000000572A7A05D8\n000000067F000040020000E0000000D18000-000000067F000040020000E0000000D1C000__0000005D2FFFFB38\n000000067F000040020000E0000000D18000-000000067F000040020000E0000000D1C000__00000073AD3FE6B8\n000000067F000040020000E0000000D18000-000000067F000040020000E0000000D1C000__000000914E3F38F0\n000000067F000040020000E0000000D18000-000000067F000040020000E0000000D1C000__000000931B9A2710\n000000067F000040020000E0000000D1C000-000000067F000040020000E0000000D20000__000000572A7A05D8\n000000067F000040020000E0000000D1C000-000000067F000040020000E0000000D20000__0000005D2FFFFB38\n000000067F000040020000E0000000D1C000-000000067F000040020000E0000000D20000__00000073AD3FE6B8\n000000067F000040020000E0000000D1C000-000000067F000040020000E0000000D20000__000000914E3F38F0\n000000067F000040020000E0000000D1C000-000000067F000040020000E0000000D20000__000000931B9A2710\n000000067F000040020000E0000000D1E783-000000067F000040020000E0000000D27156__000000526E81F439-000000531E29F559\n000000067F000040020000E0000000D20000-000000067F000040020000E0000000D24000__000000572A7A05D8\n000000067F000040020000E0000000D20000-000000067F000040020000E0000000D24000__0000005D2FFFFB38\n000000067F000040020000E0000000D20000-000000067F000040020000E0000000D24000__00000073AD3FE6B8\n000000067F000040020000E0000000D20000-000000067F000040020000E0000000D24000__000000914E3F38F0\n000000067F000040020000E0000000D20000-000000067F000040020000E0000000D24000__000000931B9A2710\n000000067F000040020000E0000000D24000-000000067F000040020000E0000000D28000__000000572A7A05D8\n000000067F000040020000E0000000D24000-000000067F000040020000E0000000D28000__0000005D2FFFFB38\n000000067F000040020000E0000000D24000-000000067F000040020000E0000000D28000__00000073AD3FE6B8\n000000067F000040020000E0000000D24000-000000067F000040020000E0000000D28000__000000914E3F38F0\n000000067F000040020000E0000000D24000-000000067F000040020000E0000000D28000__000000931B9A2710\n000000067F000040020000E0000000D27156-000000067F000040020000E0000000D2FB43__000000526E81F439-000000531E29F559\n000000067F000040020000E0000000D28000-000000067F000040020000E0000000D2C000__000000572A7A05D8\n000000067F000040020000E0000000D28000-000000067F000040020000E0000000D2C000__0000005D2FFFFB38\n000000067F000040020000E0000000D28000-000000067F000040020000E0000000D2C000__00000073AD3FE6B8\n000000067F000040020000E0000000D28000-000000067F000040020000E0000000D2C000__000000914E3F38F0\n000000067F000040020000E0000000D28000-000000067F000040020000E0000000D2C000__000000931B9A2710\n000000067F000040020000E0000000D2C000-000000067F000040020000E0000000D30000__00000053FAFFF9D8\n000000067F000040020000E0000000D2C000-000000067F000040020000E0000000D30000__0000005D2FFFFB38\n000000067F000040020000E0000000D2C000-000000067F000040020000E0000000D30000__00000073AD3FE6B8\n000000067F000040020000E0000000D2C000-000000067F000040020000E0000000D30000__000000914E3F38F0\n000000067F000040020000E0000000D2C000-000000067F000040020000E0000000D30000__000000931B9A2710\n000000067F000040020000E0000000D2FB43-000000067F000040020000E0000100000000__000000526E81F439-000000531E29F559\n000000067F000040020000E0000000D2FE44-000000067F000040020000E0000000D3881B__000000531E29F559-00000053CDCFF331\n000000067F000040020000E0000000D30000-000000067F000040020000E0000000D34000__00000053FAFFF9D8\n000000067F000040020000E0000000D30000-000000067F000040020000E0000000D34000__0000005D2FFFFB38\n000000067F000040020000E0000000D30000-000000067F000040020000E0000000D34000__00000073AD3FE6B8\n000000067F000040020000E0000000D30000-000000067F000040020000E0000000D34000__000000914E3F38F0\n000000067F000040020000E0000000D30000-000000067F000040020000E0000000D34000__000000931B9A2710\n000000067F000040020000E0000000D34000-000000067F000040020000E0000000D38000__00000053FAFFF9D8\n000000067F000040020000E0000000D34000-000000067F000040020000E0000000D38000__0000005D2FFFFB38\n000000067F000040020000E0000000D34000-000000067F000040020000E0000000D38000__00000073AD3FE6B8\n000000067F000040020000E0000000D34000-000000067F000040020000E0000000D38000__000000914E3F38F0\n000000067F000040020000E0000000D34000-000000067F000040020000E0000000D38000__000000931B9A2710\n000000067F000040020000E0000000D38000-000000067F000040020000E0000000D3C000__00000053FAFFF9D8\n000000067F000040020000E0000000D38000-000000067F000040020000E0000000D3C000__0000005D2FFFFB38\n000000067F000040020000E0000000D38000-000000067F000040020000E0000000D3C000__00000073AD3FE6B8\n000000067F000040020000E0000000D38000-000000067F000040020000E0000000D3C000__000000914E3F38F0\n000000067F000040020000E0000000D38000-000000067F000040020000E0000000D3C000__000000931B9A2710\n000000067F000040020000E0000000D3881B-000000067F000040020000E0000000D411EF__000000531E29F559-00000053CDCFF331\n000000067F000040020000E0000000D3C000-000000067F000040020000E0000000D40000__00000053FAFFF9D8\n000000067F000040020000E0000000D3C000-000000067F000040020000E0000000D40000__0000005D2FFFFB38\n000000067F000040020000E0000000D3C000-000000067F000040020000E0000000D40000__00000073AD3FE6B8\n000000067F000040020000E0000000D3C000-000000067F000040020000E0000000D40000__000000914E3F38F0\n000000067F000040020000E0000000D3C000-000000067F000040020000E0000000D40000__000000931B9A2710\n000000067F000040020000E0000000D40000-000000067F000040020000E0000000D44000__00000053FAFFF9D8\n000000067F000040020000E0000000D40000-000000067F000040020000E0000000D44000__0000005D2FFFFB38\n000000067F000040020000E0000000D40000-000000067F000040020000E0000000D44000__00000073AD3FE6B8\n000000067F000040020000E0000000D40000-000000067F000040020000E0000000D44000__000000914E3F38F0\n000000067F000040020000E0000000D40000-000000067F000040020000E0000000D44000__000000931B9A2710\n000000067F000040020000E0000000D411EF-000000067F000040020000E0000000D49BD0__000000531E29F559-00000053CDCFF331\n000000067F000040020000E0000000D44000-000000067F000040020000E0000000D48000__00000053FAFFF9D8\n000000067F000040020000E0000000D44000-000000067F000040020000E0000000D48000__0000005D2FFFFB38\n000000067F000040020000E0000000D44000-000000067F000040020000E0000000D48000__00000073AD3FE6B8\n000000067F000040020000E0000000D44000-000000067F000040020000E0000000D48000__000000914E3F38F0\n000000067F000040020000E0000000D44000-000000067F000040020000E0000000D48000__000000931B9A2710\n000000067F000040020000E0000000D48000-000000067F000040020000E0000000D4C000__00000053FAFFF9D8\n000000067F000040020000E0000000D48000-000000067F000040020000E0000000D4C000__0000005D2FFFFB38\n000000067F000040020000E0000000D48000-000000067F000040020000E0000000D4C000__00000073AD3FE6B8\n000000067F000040020000E0000000D48000-000000067F000040020000E0000000D4C000__000000914E3F38F0\n000000067F000040020000E0000000D48000-000000067F000040020000E0000000D4C000__000000931B9A2710\n000000067F000040020000E0000000D49BD0-000000067F000040020000E0000000D525B0__000000531E29F559-00000053CDCFF331\n000000067F000040020000E0000000D4C000-000000067F000040020000E0000000D50000__00000053FAFFF9D8\n000000067F000040020000E0000000D4C000-000000067F000040020000E0000000D50000__0000005D2FFFFB38\n000000067F000040020000E0000000D4C000-000000067F000040020000E0000000D50000__00000073AD3FE6B8\n000000067F000040020000E0000000D4C000-000000067F000040020000E0000000D50000__000000914E3F38F0\n000000067F000040020000E0000000D4C000-000000067F000040020000E0000000D50000__000000931B9A2710\n000000067F000040020000E0000000D50000-000000067F000040020000E0000000D54000__00000053FAFFF9D8\n000000067F000040020000E0000000D50000-000000067F000040020000E0000000D54000__0000005D2FFFFB38\n000000067F000040020000E0000000D50000-000000067F000040020000E0000000D54000__00000073AD3FE6B8\n000000067F000040020000E0000000D50000-000000067F000040020000E0000000D54000__000000914E3F38F0\n000000067F000040020000E0000000D50000-000000067F000040020000E0000000D54000__000000931B9A2710\n000000067F000040020000E0000000D525B0-000000067F000040020000E0000000D5AF8E__000000531E29F559-00000053CDCFF331\n000000067F000040020000E0000000D54000-000000067F000040020000E0000000D58000__00000053FAFFF9D8\n000000067F000040020000E0000000D54000-000000067F000040020000E0000000D58000__0000005D2FFFFB38\n000000067F000040020000E0000000D54000-000000067F000040020000E0000000D58000__00000073AD3FE6B8\n000000067F000040020000E0000000D54000-000000067F000040020000E0000000D58000__000000914E3F38F0\n000000067F000040020000E0000000D54000-000000067F000040020000E0000000D58000__000000931B9A2710\n000000067F000040020000E0000000D58000-000000067F000040020000E0000000D5C000__00000053FAFFF9D8\n000000067F000040020000E0000000D58000-000000067F000040020000E0000000D5C000__0000005D2FFFFB38\n000000067F000040020000E0000000D58000-000000067F000040020000E0000000D5C000__00000073AD3FE6B8\n000000067F000040020000E0000000D58000-000000067F000040020000E0000000D5C000__000000914E3F38F0\n000000067F000040020000E0000000D58000-000000067F000040020000E0000000D5C000__000000931B9A2710\n000000067F000040020000E0000000D5AF8E-000000067F000040020000E0000000D63966__000000531E29F559-00000053CDCFF331\n000000067F000040020000E0000000D5C000-000000067F000040020000E0000000D60000__00000053FAFFF9D8\n000000067F000040020000E0000000D5C000-000000067F000040020000E0000000D60000__0000005D2FFFFB38\n000000067F000040020000E0000000D5C000-000000067F000040020000E0000000D60000__00000073AD3FE6B8\n000000067F000040020000E0000000D5C000-000000067F000040020000E0000000D60000__000000914E3F38F0\n000000067F000040020000E0000000D5C000-000000067F000040020000E0000000D60000__000000931B9A2710\n000000067F000040020000E0000000D60000-000000067F000040020000E0000000D64000__00000053FAFFF9D8\n000000067F000040020000E0000000D60000-000000067F000040020000E0000000D64000__0000005D2FFFFB38\n000000067F000040020000E0000000D60000-000000067F000040020000E0000000D64000__00000073AD3FE6B8\n000000067F000040020000E0000000D60000-000000067F000040020000E0000000D64000__000000914E3F38F0\n000000067F000040020000E0000000D60000-000000067F000040020000E0000000D64000__000000931B9A2710\n000000067F000040020000E0000000D63966-000000067F000040020000E0000000D6C344__000000531E29F559-00000053CDCFF331\n000000067F000040020000E0000000D64000-000000067F000040020000E0000000D68000__00000053FAFFF9D8\n000000067F000040020000E0000000D64000-000000067F000040020000E0000000D68000__0000005D2FFFFB38\n000000067F000040020000E0000000D64000-000000067F000040020000E0000000D68000__00000073AD3FE6B8\n000000067F000040020000E0000000D64000-000000067F000040020000E0000000D68000__000000914E3F38F0\n000000067F000040020000E0000000D64000-000000067F000040020000E0000000D68000__000000931B9A2710\n000000067F000040020000E0000000D68000-000000067F000040020000E0000000D6C000__00000053FAFFF9D8\n000000067F000040020000E0000000D68000-000000067F000040020000E0000000D6C000__0000005D2FFFFB38\n000000067F000040020000E0000000D68000-000000067F000040020000E0000000D6C000__00000073AD3FE6B8\n000000067F000040020000E0000000D68000-000000067F000040020000E0000000D6C000__000000914E3F38F0\n000000067F000040020000E0000000D68000-000000067F000040020000E0000000D6C000__000000931B9A2710\n000000067F000040020000E0000000D6C000-000000067F000040020000E0000000D70000__00000053FAFFF9D8\n000000067F000040020000E0000000D6C000-000000067F000040020000E0000000D70000__0000005D2FFFFB38\n000000067F000040020000E0000000D6C000-000000067F000040020000E0000000D70000__00000073AD3FE6B8\n000000067F000040020000E0000000D6C000-000000067F000040020000E0000000D70000__000000914E3F38F0\n000000067F000040020000E0000000D6C000-000000067F000040020000E0000000D70000__000000931B9A2710\n000000067F000040020000E0000000D6C344-000000067F000040020000E0000000D74D26__000000531E29F559-00000053CDCFF331\n000000067F000040020000E0000000D70000-000000067F000040020000E0000000D74000__00000053FAFFF9D8\n000000067F000040020000E0000000D70000-000000067F000040020000E0000000D74000__0000005D2FFFFB38\n000000067F000040020000E0000000D70000-000000067F000040020000E0000000D74000__00000073AD3FE6B8\n000000067F000040020000E0000000D70000-000000067F000040020000E0000000D74000__000000914E3F38F0\n000000067F000040020000E0000000D70000-000000067F000040020000E0000000D74000__000000931B9A2710\n000000067F000040020000E0000000D74000-000000067F000040020000E0000000D78000__00000053FAFFF9D8\n000000067F000040020000E0000000D74000-000000067F000040020000E0000000D78000__0000005D2FFFFB38\n000000067F000040020000E0000000D74000-000000067F000040020000E0000000D78000__00000073AD3FE6B8\n000000067F000040020000E0000000D74000-000000067F000040020000E0000000D78000__000000914E3F38F0\n000000067F000040020000E0000000D74000-000000067F000040020000E0000000D78000__000000931B9A2710\n000000067F000040020000E0000000D74D26-000000067F000040020000E0000000D7D701__000000531E29F559-00000053CDCFF331\n000000067F000040020000E0000000D78000-000000067F000040020000E0000000D7C000__00000053FAFFF9D8\n000000067F000040020000E0000000D78000-000000067F000040020000E0000000D7C000__0000005D2FFFFB38\n000000067F000040020000E0000000D78000-000000067F000040020000E0000000D7C000__00000073AD3FE6B8\n000000067F000040020000E0000000D78000-000000067F000040020000E0000000D7C000__000000914E3F38F0\n000000067F000040020000E0000000D78000-000000067F000040020000E0000000D7C000__000000931B9A2710\n000000067F000040020000E0000000D7C000-000000067F000040020000E0000000D80000__00000053FAFFF9D8\n000000067F000040020000E0000000D7C000-000000067F000040020000E0000000D80000__0000005D2FFFFB38\n000000067F000040020000E0000000D7C000-000000067F000040020000E0000000D80000__00000073AD3FE6B8\n000000067F000040020000E0000000D7C000-000000067F000040020000E0000000D80000__000000914E3F38F0\n000000067F000040020000E0000000D7C000-000000067F000040020000E0000000D80000__000000931B9A2710\n000000067F000040020000E0000000D7D701-000000067F000040020000E0000000D860CB__000000531E29F559-00000053CDCFF331\n000000067F000040020000E0000000D80000-000000067F000040020000E0000000D84000__00000053FAFFF9D8\n000000067F000040020000E0000000D80000-000000067F000040020000E0000000D84000__0000005D2FFFFB38\n000000067F000040020000E0000000D80000-000000067F000040020000E0000000D84000__00000073AD3FE6B8\n000000067F000040020000E0000000D80000-000000067F000040020000E0000000D84000__000000914E3F38F0\n000000067F000040020000E0000000D80000-000000067F000040020000E0000000D84000__000000931B9A2710\n000000067F000040020000E0000000D84000-000000067F000040020000E0000000D88000__00000053FAFFF9D8\n000000067F000040020000E0000000D84000-000000067F000040020000E0000000D88000__0000005D2FFFFB38\n000000067F000040020000E0000000D84000-000000067F000040020000E0000000D88000__00000073AD3FE6B8\n000000067F000040020000E0000000D84000-000000067F000040020000E0000000D88000__000000914E3F38F0\n000000067F000040020000E0000000D84000-000000067F000040020000E0000000D88000__000000931B9A2710\n000000067F000040020000E0000000D860CB-000000067F000040020000E0000000D8EAB0__000000531E29F559-00000053CDCFF331\n000000067F000040020000E0000000D88000-000000067F000040020000E0000000D8C000__00000053FAFFF9D8\n000000067F000040020000E0000000D88000-000000067F000040020000E0000000D8C000__0000005D2FFFFB38\n000000067F000040020000E0000000D88000-000000067F000040020000E0000000D8C000__00000073AD3FE6B8\n000000067F000040020000E0000000D88000-000000067F000040020000E0000000D8C000__000000914E3F38F0\n000000067F000040020000E0000000D88000-000000067F000040020000E0000000D8C000__000000931B9A2710\n000000067F000040020000E0000000D8C000-000000067F000040020000E0000000D90000__00000053FAFFF9D8\n000000067F000040020000E0000000D8C000-000000067F000040020000E0000000D90000__000000574B7FF240\n000000067F000040020000E0000000D8C000-000000067F000040020000E0000000D90000__00000073AD3FE6B8\n000000067F000040020000E0000000D8C000-000000067F000040020000E0000000D90000__000000914E3F38F0\n000000067F000040020000E0000000D8C000-000000067F000040020000E0000000D90000__000000931B9A2710\n000000067F000040020000E0000000D8EAB0-000000067F000040020000E0000100000000__000000531E29F559-00000053CDCFF331\n000000067F000040020000E0000000D8EDC6-000000067F000040020000E0000000D977A7__00000053CDCFF331-000000547D77D8A1\n000000067F000040020000E0000000D90000-000000067F000040020000E0000000D94000__00000053FAFFF9D8\n000000067F000040020000E0000000D90000-000000067F000040020000E0000000D94000__000000574B7FF240\n000000067F000040020000E0000000D90000-000000067F000040020000E0000000D94000__00000073AD3FE6B8\n000000067F000040020000E0000000D90000-000000067F000040020000E0000000D94000__000000914E3F38F0\n000000067F000040020000E0000000D90000-000000067F000040020000E0000000D94000__000000931B9A2710\n000000067F000040020000E0000000D94000-000000067F000040020000E0000000D98000__00000053FAFFF9D8\n000000067F000040020000E0000000D94000-000000067F000040020000E0000000D98000__000000574B7FF240\n000000067F000040020000E0000000D94000-000000067F000040020000E0000000D98000__00000073AD3FE6B8\n000000067F000040020000E0000000D94000-000000067F000040020000E0000000D98000__000000914E3F38F0\n000000067F000040020000E0000000D94000-000000067F000040020000E0000000D98000__000000931B9A2710\n000000067F000040020000E0000000D977A7-000000067F000040020000E0000000DA0176__00000053CDCFF331-000000547D77D8A1\n000000067F000040020000E0000000D98000-000000067F000040020000E0000000D9C000__00000053FAFFF9D8\n000000067F000040020000E0000000D98000-000000067F000040020000E0000000D9C000__000000574B7FF240\n000000067F000040020000E0000000D98000-000000067F000040020000E0000000D9C000__00000073AD3FE6B8\n000000067F000040020000E0000000D98000-000000067F000040020000E0000000D9C000__000000914E3F38F0\n000000067F000040020000E0000000D98000-000000067F000040020000E0000000D9C000__000000931B9A2710\n000000067F000040020000E0000000D9C000-000000067F000040020000E0000000DA0000__00000053FAFFF9D8\n000000067F000040020000E0000000D9C000-000000067F000040020000E0000000DA0000__000000574B7FF240\n000000067F000040020000E0000000D9C000-000000067F000040020000E0000000DA0000__00000073AD3FE6B8\n000000067F000040020000E0000000D9C000-000000067F000040020000E0000000DA0000__000000914E3F38F0\n000000067F000040020000E0000000D9C000-000000067F000040020000E0000000DA0000__000000931B9A2710\n000000067F000040020000E0000000DA0000-000000067F000040020000E0000000DA4000__00000053FAFFF9D8\n000000067F000040020000E0000000DA0000-000000067F000040020000E0000000DA4000__000000574B7FF240\n000000067F000040020000E0000000DA0000-000000067F000040020000E0000000DA4000__00000073AD3FE6B8\n000000067F000040020000E0000000DA0000-000000067F000040020000E0000000DA4000__000000914E3F38F0\n000000067F000040020000E0000000DA0000-000000067F000040020000E0000000DA4000__000000931B9A2710\n000000067F000040020000E0000000DA0176-000000067F000040020000E0000000DA8B58__00000053CDCFF331-000000547D77D8A1\n000000067F000040020000E0000000DA4000-000000067F000040020000E0000000DA8000__000000574B7FF240\n000000067F000040020000E0000000DA4000-000000067F000040020000E0000000DA8000__00000073AD3FE6B8\n000000067F000040020000E0000000DA4000-000000067F000040020000E0000000DA8000__000000914E3F38F0\n000000067F000040020000E0000000DA4000-000000067F000040020000E0000000DA8000__000000931B9A2710\n000000067F000040020000E0000000DA4000-030000000000000000000000000000000002__00000053FAFFF9D8\n000000067F000040020000E0000000DA8000-000000067F000040020000E0000000DAC000__000000574B7FF240\n000000067F000040020000E0000000DA8000-000000067F000040020000E0000000DAC000__00000073AD3FE6B8\n000000067F000040020000E0000000DA8000-000000067F000040020000E0000000DAC000__000000914E3F38F0\n000000067F000040020000E0000000DA8000-000000067F000040020000E0000000DAC000__000000931B9A2710\n000000067F000040020000E0000000DA8B58-000000067F000040020000E0000000DB1534__00000053CDCFF331-000000547D77D8A1\n000000067F000040020000E0000000DAC000-000000067F000040020000E0000000DB0000__000000574B7FF240\n000000067F000040020000E0000000DAC000-000000067F000040020000E0000000DB0000__00000073AD3FE6B8\n000000067F000040020000E0000000DAC000-000000067F000040020000E0000000DB0000__000000914E3F38F0\n000000067F000040020000E0000000DAC000-000000067F000040020000E0000000DB0000__000000931B9A2710\n000000067F000040020000E0000000DB0000-000000067F000040020000E0000000DB4000__000000574B7FF240\n000000067F000040020000E0000000DB0000-000000067F000040020000E0000000DB4000__00000073AD3FE6B8\n000000067F000040020000E0000000DB0000-000000067F000040020000E0000000DB4000__000000914E3F38F0\n000000067F000040020000E0000000DB0000-000000067F000040020000E0000000DB4000__000000931B9A2710\n000000067F000040020000E0000000DB1534-000000067F000040020000E0000000DB9F12__00000053CDCFF331-000000547D77D8A1\n000000067F000040020000E0000000DB4000-000000067F000040020000E0000000DB8000__000000574B7FF240\n000000067F000040020000E0000000DB4000-000000067F000040020000E0000000DB8000__00000073AD3FE6B8\n000000067F000040020000E0000000DB4000-000000067F000040020000E0000000DB8000__000000914E3F38F0\n000000067F000040020000E0000000DB4000-000000067F000040020000E0000000DB8000__000000931B9A2710\n000000067F000040020000E0000000DB8000-000000067F000040020000E0000000DBC000__000000574B7FF240\n000000067F000040020000E0000000DB8000-000000067F000040020000E0000000DBC000__00000073AD3FE6B8\n000000067F000040020000E0000000DB8000-000000067F000040020000E0000000DBC000__000000914E3F38F0\n000000067F000040020000E0000000DB8000-000000067F000040020000E0000000DBC000__000000931B9A2710\n000000067F000040020000E0000000DB9F12-000000067F000040020000E0000000DC28E0__00000053CDCFF331-000000547D77D8A1\n000000067F000040020000E0000000DBC000-000000067F000040020000E0000000DC0000__000000574B7FF240\n000000067F000040020000E0000000DBC000-000000067F000040020000E0000000DC0000__00000073AD3FE6B8\n000000067F000040020000E0000000DBC000-000000067F000040020000E0000000DC0000__000000914E3F38F0\n000000067F000040020000E0000000DBC000-000000067F000040020000E0000000DC0000__000000931B9A2710\n000000067F000040020000E0000000DC0000-000000067F000040020000E0000000DC4000__000000574B7FF240\n000000067F000040020000E0000000DC0000-000000067F000040020000E0000000DC4000__00000073AD3FE6B8\n000000067F000040020000E0000000DC0000-000000067F000040020000E0000000DC4000__000000914E3F38F0\n000000067F000040020000E0000000DC0000-000000067F000040020000E0000000DC4000__000000931B9A2710\n000000067F000040020000E0000000DC28E0-000000067F000040020000E0000000DCB2CC__00000053CDCFF331-000000547D77D8A1\n000000067F000040020000E0000000DC4000-000000067F000040020000E0000000DC8000__000000574B7FF240\n000000067F000040020000E0000000DC4000-000000067F000040020000E0000000DC8000__00000073AD3FE6B8\n000000067F000040020000E0000000DC4000-000000067F000040020000E0000000DC8000__000000914E3F38F0\n000000067F000040020000E0000000DC4000-000000067F000040020000E0000000DC8000__000000931B9A2710\n000000067F000040020000E0000000DC8000-000000067F000040020000E0000000DCC000__000000574B7FF240\n000000067F000040020000E0000000DC8000-000000067F000040020000E0000000DCC000__00000073AD3FE6B8\n000000067F000040020000E0000000DC8000-000000067F000040020000E0000000DCC000__000000914E3F38F0\n000000067F000040020000E0000000DC8000-000000067F000040020000E0000000DCC000__000000931B9A2710\n000000067F000040020000E0000000DCB2CC-000000067F000040020000E0000000DD3CB0__00000053CDCFF331-000000547D77D8A1\n000000067F000040020000E0000000DCC000-000000067F000040020000E0000000DD0000__000000574B7FF240\n000000067F000040020000E0000000DCC000-000000067F000040020000E0000000DD0000__00000073AD3FE6B8\n000000067F000040020000E0000000DCC000-000000067F000040020000E0000000DD0000__000000914E3F38F0\n000000067F000040020000E0000000DCC000-000000067F000040020000E0000000DD0000__000000931B9A2710\n000000067F000040020000E0000000DD0000-000000067F000040020000E0000000DD4000__000000574B7FF240\n000000067F000040020000E0000000DD0000-000000067F000040020000E0000000DD4000__00000073AD3FE6B8\n000000067F000040020000E0000000DD0000-000000067F000040020000E0000000DD4000__000000914E3F38F0\n000000067F000040020000E0000000DD0000-000000067F000040020000E0000000DD4000__000000931B9A2710\n000000067F000040020000E0000000DD3CB0-000000067F000040020000E0000000DDC69C__00000053CDCFF331-000000547D77D8A1\n000000067F000040020000E0000000DD4000-000000067F000040020000E0000000DD8000__000000574B7FF240\n000000067F000040020000E0000000DD4000-000000067F000040020000E0000000DD8000__00000073AD3FE6B8\n000000067F000040020000E0000000DD4000-000000067F000040020000E0000000DD8000__000000914E3F38F0\n000000067F000040020000E0000000DD4000-000000067F000040020000E0000000DD8000__000000931B9A2710\n000000067F000040020000E0000000DD8000-000000067F000040020000E0000000DDC000__000000574B7FF240\n000000067F000040020000E0000000DD8000-000000067F000040020000E0000000DDC000__00000073AD3FE6B8\n000000067F000040020000E0000000DD8000-000000067F000040020000E0000000DDC000__000000914E3F38F0\n000000067F000040020000E0000000DD8000-000000067F000040020000E0000000DDC000__000000931B9A2710\n000000067F000040020000E0000000DDC000-000000067F000040020000E0000000DE0000__000000574B7FF240\n000000067F000040020000E0000000DDC000-000000067F000040020000E0000000DE0000__00000073AD3FE6B8\n000000067F000040020000E0000000DDC000-000000067F000040020000E0000000DE0000__000000914E3F38F0\n000000067F000040020000E0000000DDC000-000000067F000040020000E0000000DE0000__000000931B9A2710\n000000067F000040020000E0000000DDC69C-000000067F000040020000E0000000DE5083__00000053CDCFF331-000000547D77D8A1\n000000067F000040020000E0000000DE0000-000000067F000040020000E0000000DE4000__000000574B7FF240\n000000067F000040020000E0000000DE0000-000000067F000040020000E0000000DE4000__00000073AD3FE6B8\n000000067F000040020000E0000000DE0000-000000067F000040020000E0000000DE4000__000000914E3F38F0\n000000067F000040020000E0000000DE0000-000000067F000040020000E0000000DE4000__000000931B9A2710\n000000067F000040020000E0000000DE4000-000000067F000040020000E0000000DE8000__000000574B7FF240\n000000067F000040020000E0000000DE4000-000000067F000040020000E0000000DE8000__00000073AD3FE6B8\n000000067F000040020000E0000000DE4000-000000067F000040020000E0000000DE8000__000000914E3F38F0\n000000067F000040020000E0000000DE4000-000000067F000040020000E0000000DE8000__000000931B9A2710\n000000067F000040020000E0000000DE5083-000000067F000040020000E0000000DEDA64__00000053CDCFF331-000000547D77D8A1\n000000067F000040020000E0000000DE8000-000000067F000040020000E0000000DEC000__000000574B7FF240\n000000067F000040020000E0000000DE8000-000000067F000040020000E0000000DEC000__00000073AD3FE6B8\n000000067F000040020000E0000000DE8000-000000067F000040020000E0000000DEC000__000000914E3F38F0\n000000067F000040020000E0000000DE8000-000000067F000040020000E0000000DEC000__000000931B9A2710\n000000067F000040020000E0000000DEC000-000000067F000040020000E0000000DF0000__000000572A7A05D8\n000000067F000040020000E0000000DEC000-000000067F000040020000E0000000DF0000__0000005D2FFFFB38\n000000067F000040020000E0000000DEC000-000000067F000040020000E0000000DF0000__00000073AD3FE6B8\n000000067F000040020000E0000000DEC000-000000067F000040020000E0000000DF0000__000000914E3F38F0\n000000067F000040020000E0000000DEC000-000000067F000040020000E0000000DF0000__000000931B9A2710\n000000067F000040020000E0000000DEDA64-000000067F000040020000E0000100000000__00000053CDCFF331-000000547D77D8A1\n000000067F000040020000E0000000DEDD69-000000067F000040020000E0000000DF6741__000000547D77D8A1-000000551D27ECC9\n000000067F000040020000E0000000DF0000-000000067F000040020000E0000000DF4000__000000572A7A05D8\n000000067F000040020000E0000000DF0000-000000067F000040020000E0000000DF4000__0000005D2FFFFB38\n000000067F000040020000E0000000DF0000-000000067F000040020000E0000000DF4000__00000073AD3FE6B8\n000000067F000040020000E0000000DF0000-000000067F000040020000E0000000DF4000__000000914E3F38F0\n000000067F000040020000E0000000DF0000-000000067F000040020000E0000000DF4000__000000931B9A2710\n000000067F000040020000E0000000DF4000-000000067F000040020000E0000000DF8000__000000572A7A05D8\n000000067F000040020000E0000000DF4000-000000067F000040020000E0000000DF8000__0000005D2FFFFB38\n000000067F000040020000E0000000DF4000-000000067F000040020000E0000000DF8000__00000073AD3FE6B8\n000000067F000040020000E0000000DF4000-000000067F000040020000E0000000DF8000__000000914E3F38F0\n000000067F000040020000E0000000DF4000-000000067F000040020000E0000000DF8000__000000931B9A2710\n000000067F000040020000E0000000DF6741-000000067F000040020000E0000000DFF11F__000000547D77D8A1-000000551D27ECC9\n000000067F000040020000E0000000DF8000-000000067F000040020000E0000000DFC000__000000572A7A05D8\n000000067F000040020000E0000000DF8000-000000067F000040020000E0000000DFC000__0000005D2FFFFB38\n000000067F000040020000E0000000DF8000-000000067F000040020000E0000000DFC000__00000073AD3FE6B8\n000000067F000040020000E0000000DF8000-000000067F000040020000E0000000DFC000__000000914E3F38F0\n000000067F000040020000E0000000DF8000-000000067F000040020000E0000000DFC000__000000931B9A2710\n000000067F000040020000E0000000DFC000-000000067F000040020000E0000000E00000__000000572A7A05D8\n000000067F000040020000E0000000DFC000-000000067F000040020000E0000000E00000__0000005D2FFFFB38\n000000067F000040020000E0000000DFC000-000000067F000040020000E0000000E00000__00000073AD3FE6B8\n000000067F000040020000E0000000DFC000-000000067F000040020000E0000000E00000__000000914E3F38F0\n000000067F000040020000E0000000DFC000-000000067F000040020000E0000000E00000__000000931B9A2710\n000000067F000040020000E0000000DFF11F-000000067F000040020000E0000000E07AED__000000547D77D8A1-000000551D27ECC9\n000000067F000040020000E0000000E00000-000000067F000040020000E0000000E04000__000000572A7A05D8\n000000067F000040020000E0000000E00000-000000067F000040020000E0000000E04000__0000005D2FFFFB38\n000000067F000040020000E0000000E00000-000000067F000040020000E0000000E04000__00000073AD3FE6B8\n000000067F000040020000E0000000E00000-000000067F000040020000E0000000E04000__000000914E3F38F0\n000000067F000040020000E0000000E00000-000000067F000040020000E0000000E04000__000000931B9A2710\n000000067F000040020000E0000000E04000-000000067F000040020000E0000000E08000__000000572A7A05D8\n000000067F000040020000E0000000E04000-000000067F000040020000E0000000E08000__0000005D2FFFFB38\n000000067F000040020000E0000000E04000-000000067F000040020000E0000000E08000__00000073AD3FE6B8\n000000067F000040020000E0000000E04000-000000067F000040020000E0000000E08000__000000914E3F38F0\n000000067F000040020000E0000000E04000-000000067F000040020000E0000000E08000__000000931B9A2710\n000000067F000040020000E0000000E07AED-000000067F000040020000E0000000E104CE__000000547D77D8A1-000000551D27ECC9\n000000067F000040020000E0000000E08000-000000067F000040020000E0000000E0C000__000000572A7A05D8\n000000067F000040020000E0000000E08000-000000067F000040020000E0000000E0C000__0000005D2FFFFB38\n000000067F000040020000E0000000E08000-000000067F000040020000E0000000E0C000__00000073AD3FE6B8\n000000067F000040020000E0000000E08000-000000067F000040020000E0000000E0C000__000000914E3F38F0\n000000067F000040020000E0000000E08000-000000067F000040020000E0000000E0C000__000000931B9A2710\n000000067F000040020000E0000000E0C000-000000067F000040020000E0000000E10000__000000572A7A05D8\n000000067F000040020000E0000000E0C000-000000067F000040020000E0000000E10000__0000005D2FFFFB38\n000000067F000040020000E0000000E0C000-000000067F000040020000E0000000E10000__00000073AD3FE6B8\n000000067F000040020000E0000000E0C000-000000067F000040020000E0000000E10000__000000914E3F38F0\n000000067F000040020000E0000000E0C000-000000067F000040020000E0000000E10000__000000931B9A2710\n000000067F000040020000E0000000E10000-000000067F000040020000E0000000E14000__000000572A7A05D8\n000000067F000040020000E0000000E10000-000000067F000040020000E0000000E14000__0000005D2FFFFB38\n000000067F000040020000E0000000E10000-000000067F000040020000E0000000E14000__00000073AD3FE6B8\n000000067F000040020000E0000000E10000-000000067F000040020000E0000000E14000__000000914E3F38F0\n000000067F000040020000E0000000E10000-000000067F000040020000E0000000E14000__000000931B9A2710\n000000067F000040020000E0000000E104CE-000000067F000040020000E0000000E18EAE__000000547D77D8A1-000000551D27ECC9\n000000067F000040020000E0000000E14000-000000067F000040020000E0000000E18000__000000572A7A05D8\n000000067F000040020000E0000000E14000-000000067F000040020000E0000000E18000__0000005D2FFFFB38\n000000067F000040020000E0000000E14000-000000067F000040020000E0000000E18000__00000073AD3FE6B8\n000000067F000040020000E0000000E14000-000000067F000040020000E0000000E18000__000000914E3F38F0\n000000067F000040020000E0000000E14000-000000067F000040020000E0000000E18000__000000931B9A2710\n000000067F000040020000E0000000E18000-000000067F000040020000E0000000E1C000__000000572A7A05D8\n000000067F000040020000E0000000E18000-000000067F000040020000E0000000E1C000__0000005D2FFFFB38\n000000067F000040020000E0000000E18000-000000067F000040020000E0000000E1C000__00000073AD3FE6B8\n000000067F000040020000E0000000E18000-000000067F000040020000E0000000E1C000__000000914E3F38F0\n000000067F000040020000E0000000E18000-000000067F000040020000E0000000E1C000__000000931B9A2710\n000000067F000040020000E0000000E18EAE-000000067F000040020000E0000000E2188E__000000547D77D8A1-000000551D27ECC9\n000000067F000040020000E0000000E1C000-000000067F000040020000E0000000E20000__000000572A7A05D8\n000000067F000040020000E0000000E1C000-000000067F000040020000E0000000E20000__0000005D2FFFFB38\n000000067F000040020000E0000000E1C000-000000067F000040020000E0000000E20000__00000073AD3FE6B8\n000000067F000040020000E0000000E1C000-000000067F000040020000E0000000E20000__000000914E3F38F0\n000000067F000040020000E0000000E1C000-000000067F000040020000E0000000E20000__000000931B9A2710\n000000067F000040020000E0000000E20000-000000067F000040020000E0000000E24000__000000572A7A05D8\n000000067F000040020000E0000000E20000-000000067F000040020000E0000000E24000__0000005D2FFFFB38\n000000067F000040020000E0000000E20000-000000067F000040020000E0000000E24000__00000073AD3FE6B8\n000000067F000040020000E0000000E20000-000000067F000040020000E0000000E24000__000000914E3F38F0\n000000067F000040020000E0000000E20000-000000067F000040020000E0000000E24000__000000931B9A2710\n000000067F000040020000E0000000E2188E-000000067F000040020000E0000000E2A276__000000547D77D8A1-000000551D27ECC9\n000000067F000040020000E0000000E24000-000000067F000040020000E0000000E28000__000000572A7A05D8\n000000067F000040020000E0000000E24000-000000067F000040020000E0000000E28000__0000005D2FFFFB38\n000000067F000040020000E0000000E24000-000000067F000040020000E0000000E28000__00000073AD3FE6B8\n000000067F000040020000E0000000E24000-000000067F000040020000E0000000E28000__000000914E3F38F0\n000000067F000040020000E0000000E24000-000000067F000040020000E0000000E28000__000000931B9A2710\n000000067F000040020000E0000000E28000-000000067F000040020000E0000000E2C000__000000572A7A05D8\n000000067F000040020000E0000000E28000-000000067F000040020000E0000000E2C000__0000005D2FFFFB38\n000000067F000040020000E0000000E28000-000000067F000040020000E0000000E2C000__00000073AD3FE6B8\n000000067F000040020000E0000000E28000-000000067F000040020000E0000000E2C000__000000914E3F38F0\n000000067F000040020000E0000000E28000-000000067F000040020000E0000000E2C000__000000931B9A2710\n000000067F000040020000E0000000E2A276-000000067F000040020000E0000000E32C4B__000000547D77D8A1-000000551D27ECC9\n000000067F000040020000E0000000E2C000-000000067F000040020000E0000000E30000__000000572A7A05D8\n000000067F000040020000E0000000E2C000-000000067F000040020000E0000000E30000__0000005D2FFFFB38\n000000067F000040020000E0000000E2C000-000000067F000040020000E0000000E30000__00000073AD3FE6B8\n000000067F000040020000E0000000E2C000-000000067F000040020000E0000000E30000__000000914E3F38F0\n000000067F000040020000E0000000E2C000-000000067F000040020000E0000000E30000__000000931B9A2710\n000000067F000040020000E0000000E30000-000000067F000040020000E0000000E34000__000000572A7A05D8\n000000067F000040020000E0000000E30000-000000067F000040020000E0000000E34000__0000005D2FFFFB38\n000000067F000040020000E0000000E30000-000000067F000040020000E0000000E34000__00000073AD3FE6B8\n000000067F000040020000E0000000E30000-000000067F000040020000E0000000E34000__000000914E3F38F0\n000000067F000040020000E0000000E30000-000000067F000040020000E0000000E34000__000000931B9A2710\n000000067F000040020000E0000000E32C4B-000000067F000040020000E0000000E3B629__000000547D77D8A1-000000551D27ECC9\n000000067F000040020000E0000000E34000-000000067F000040020000E0000000E38000__000000572A7A05D8\n000000067F000040020000E0000000E34000-000000067F000040020000E0000000E38000__0000005D2FFFFB38\n000000067F000040020000E0000000E34000-000000067F000040020000E0000000E38000__00000073AD3FE6B8\n000000067F000040020000E0000000E34000-000000067F000040020000E0000000E38000__000000914E3F38F0\n000000067F000040020000E0000000E34000-000000067F000040020000E0000000E38000__000000931B9A2710\n000000067F000040020000E0000000E38000-000000067F000040020000E0000000E3C000__000000572A7A05D8\n000000067F000040020000E0000000E38000-000000067F000040020000E0000000E3C000__0000005D2FFFFB38\n000000067F000040020000E0000000E38000-000000067F000040020000E0000000E3C000__00000073AD3FE6B8\n000000067F000040020000E0000000E38000-000000067F000040020000E0000000E3C000__000000914E3F38F0\n000000067F000040020000E0000000E38000-000000067F000040020000E0000000E3C000__000000931B9A2710\n000000067F000040020000E0000000E3B629-000000067F000040020000E0000000E43FF6__000000547D77D8A1-000000551D27ECC9\n000000067F000040020000E0000000E3C000-000000067F000040020000E0000000E40000__000000572A7A05D8\n000000067F000040020000E0000000E3C000-000000067F000040020000E0000000E40000__0000005D2FFFFB38\n000000067F000040020000E0000000E3C000-000000067F000040020000E0000000E40000__00000073AD3FE6B8\n000000067F000040020000E0000000E3C000-000000067F000040020000E0000000E40000__000000914E3F38F0\n000000067F000040020000E0000000E3C000-000000067F000040020000E0000000E40000__000000931B9A2710\n000000067F000040020000E0000000E40000-000000067F000040020000E0000000E44000__000000572A7A05D8\n000000067F000040020000E0000000E40000-000000067F000040020000E0000000E44000__0000005D2FFFFB38\n000000067F000040020000E0000000E40000-000000067F000040020000E0000000E44000__00000073AD3FE6B8\n000000067F000040020000E0000000E40000-000000067F000040020000E0000000E44000__000000914E3F38F0\n000000067F000040020000E0000000E40000-000000067F000040020000E0000000E44000__000000931B9A2710\n000000067F000040020000E0000000E43FF6-000000067F000040020000E0000100000000__000000547D77D8A1-000000551D27ECC9\n000000067F000040020000E0000000E44000-000000067F000040020000E0000000E48000__00000055ECBFFA00\n000000067F000040020000E0000000E44000-000000067F000040020000E0000000E48000__0000005D2FFFFB38\n000000067F000040020000E0000000E44000-000000067F000040020000E0000000E48000__00000073AD3FE6B8\n000000067F000040020000E0000000E44000-000000067F000040020000E0000000E48000__000000914E3F38F0\n000000067F000040020000E0000000E44000-000000067F000040020000E0000000E48000__000000931B9A2710\n000000067F000040020000E0000000E442D1-000000067F000040020000E0000000E4CCA9__000000551D27ECC9-00000055BCD7D459\n000000067F000040020000E0000000E48000-000000067F000040020000E0000000E4C000__00000055ECBFFA00\n000000067F000040020000E0000000E48000-000000067F000040020000E0000000E4C000__0000005D2FFFFB38\n000000067F000040020000E0000000E48000-000000067F000040020000E0000000E4C000__00000073AD3FE6B8\n000000067F000040020000E0000000E48000-000000067F000040020000E0000000E4C000__000000914E3F38F0\n000000067F000040020000E0000000E48000-000000067F000040020000E0000000E4C000__000000931B9A2710\n000000067F000040020000E0000000E4C000-000000067F000040020000E0000000E50000__00000055ECBFFA00\n000000067F000040020000E0000000E4C000-000000067F000040020000E0000000E50000__0000005D2FFFFB38\n000000067F000040020000E0000000E4C000-000000067F000040020000E0000000E50000__00000073AD3FE6B8\n000000067F000040020000E0000000E4C000-000000067F000040020000E0000000E50000__000000914E3F38F0\n000000067F000040020000E0000000E4C000-000000067F000040020000E0000000E50000__000000931B9A2710\n000000067F000040020000E0000000E4CCA9-000000067F000040020000E0000000E55690__000000551D27ECC9-00000055BCD7D459\n000000067F000040020000E0000000E50000-000000067F000040020000E0000000E54000__00000055ECBFFA00\n000000067F000040020000E0000000E50000-000000067F000040020000E0000000E54000__0000005D2FFFFB38\n000000067F000040020000E0000000E50000-000000067F000040020000E0000000E54000__00000073AD3FE6B8\n000000067F000040020000E0000000E50000-000000067F000040020000E0000000E54000__000000914E3F38F0\n000000067F000040020000E0000000E50000-000000067F000040020000E0000000E54000__000000931B9A2710\n000000067F000040020000E0000000E54000-000000067F000040020000E0000000E58000__00000055ECBFFA00\n000000067F000040020000E0000000E54000-000000067F000040020000E0000000E58000__0000005D2FFFFB38\n000000067F000040020000E0000000E54000-000000067F000040020000E0000000E58000__00000073AD3FE6B8\n000000067F000040020000E0000000E54000-000000067F000040020000E0000000E58000__000000914E3F38F0\n000000067F000040020000E0000000E54000-000000067F000040020000E0000000E58000__000000931B9A2710\n000000067F000040020000E0000000E55690-000000067F000040020000E0000000E5E072__000000551D27ECC9-00000055BCD7D459\n000000067F000040020000E0000000E58000-000000067F000040020000E0000000E5C000__00000055ECBFFA00\n000000067F000040020000E0000000E58000-000000067F000040020000E0000000E5C000__0000005D2FFFFB38\n000000067F000040020000E0000000E58000-000000067F000040020000E0000000E5C000__00000073AD3FE6B8\n000000067F000040020000E0000000E58000-000000067F000040020000E0000000E5C000__000000914E3F38F0\n000000067F000040020000E0000000E58000-000000067F000040020000E0000000E5C000__000000931B9A2710\n000000067F000040020000E0000000E5C000-000000067F000040020000E0000000E60000__00000055ECBFFA00\n000000067F000040020000E0000000E5C000-000000067F000040020000E0000000E60000__0000005D2FFFFB38\n000000067F000040020000E0000000E5C000-000000067F000040020000E0000000E60000__00000073AD3FE6B8\n000000067F000040020000E0000000E5C000-000000067F000040020000E0000000E60000__000000914E3F38F0\n000000067F000040020000E0000000E5C000-000000067F000040020000E0000000E60000__000000931B9A2710\n000000067F000040020000E0000000E5E072-000000067F000040020000E0000000E66A4D__000000551D27ECC9-00000055BCD7D459\n000000067F000040020000E0000000E60000-000000067F000040020000E0000000E64000__00000055ECBFFA00\n000000067F000040020000E0000000E60000-000000067F000040020000E0000000E64000__0000005D2FFFFB38\n000000067F000040020000E0000000E60000-000000067F000040020000E0000000E64000__00000073AD3FE6B8\n000000067F000040020000E0000000E60000-000000067F000040020000E0000000E64000__000000914E3F38F0\n000000067F000040020000E0000000E60000-000000067F000040020000E0000000E64000__000000931B9A2710\n000000067F000040020000E0000000E64000-000000067F000040020000E0000000E68000__00000055ECBFFA00\n000000067F000040020000E0000000E64000-000000067F000040020000E0000000E68000__0000005D2FFFFB38\n000000067F000040020000E0000000E64000-000000067F000040020000E0000000E68000__00000073AD3FE6B8\n000000067F000040020000E0000000E64000-000000067F000040020000E0000000E68000__000000914E3F38F0\n000000067F000040020000E0000000E64000-000000067F000040020000E0000000E68000__000000931B9A2710\n000000067F000040020000E0000000E66A4D-000000067F000040020000E0000000E6F424__000000551D27ECC9-00000055BCD7D459\n000000067F000040020000E0000000E68000-000000067F000040020000E0000000E6C000__00000055ECBFFA00\n000000067F000040020000E0000000E68000-000000067F000040020000E0000000E6C000__0000005D2FFFFB38\n000000067F000040020000E0000000E68000-000000067F000040020000E0000000E6C000__00000073AD3FE6B8\n000000067F000040020000E0000000E68000-000000067F000040020000E0000000E6C000__000000914E3F38F0\n000000067F000040020000E0000000E68000-000000067F000040020000E0000000E6C000__000000931B9A2710\n000000067F000040020000E0000000E6C000-000000067F000040020000E0000000E70000__00000055ECBFFA00\n000000067F000040020000E0000000E6C000-000000067F000040020000E0000000E70000__0000005D2FFFFB38\n000000067F000040020000E0000000E6C000-000000067F000040020000E0000000E70000__00000073AD3FE6B8\n000000067F000040020000E0000000E6C000-000000067F000040020000E0000000E70000__000000914E3F38F0\n000000067F000040020000E0000000E6C000-000000067F000040020000E0000000E70000__000000931B9A2710\n000000067F000040020000E0000000E6F424-000000067F000040020000E0000000E77E01__000000551D27ECC9-00000055BCD7D459\n000000067F000040020000E0000000E70000-000000067F000040020000E0000000E74000__00000055ECBFFA00\n000000067F000040020000E0000000E70000-000000067F000040020000E0000000E74000__0000005D2FFFFB38\n000000067F000040020000E0000000E70000-000000067F000040020000E0000000E74000__00000073AD3FE6B8\n000000067F000040020000E0000000E70000-000000067F000040020000E0000000E74000__000000914E3F38F0\n000000067F000040020000E0000000E70000-000000067F000040020000E0000000E74000__000000931B9A2710\n000000067F000040020000E0000000E74000-000000067F000040020000E0000000E78000__00000055ECBFFA00\n000000067F000040020000E0000000E74000-000000067F000040020000E0000000E78000__0000005D2FFFFB38\n000000067F000040020000E0000000E74000-000000067F000040020000E0000000E78000__00000073AD3FE6B8\n000000067F000040020000E0000000E74000-000000067F000040020000E0000000E78000__000000914E3F38F0\n000000067F000040020000E0000000E74000-000000067F000040020000E0000000E78000__000000931B9A2710\n000000067F000040020000E0000000E77E01-000000067F000040020000E0000000E807CF__000000551D27ECC9-00000055BCD7D459\n000000067F000040020000E0000000E78000-000000067F000040020000E0000000E7C000__00000055ECBFFA00\n000000067F000040020000E0000000E78000-000000067F000040020000E0000000E7C000__0000005D2FFFFB38\n000000067F000040020000E0000000E78000-000000067F000040020000E0000000E7C000__00000073AD3FE6B8\n000000067F000040020000E0000000E78000-000000067F000040020000E0000000E7C000__000000914E3F38F0\n000000067F000040020000E0000000E78000-000000067F000040020000E0000000E7C000__000000931B9A2710\n000000067F000040020000E0000000E7C000-000000067F000040020000E0000000E80000__00000055ECBFFA00\n000000067F000040020000E0000000E7C000-000000067F000040020000E0000000E80000__0000005D2FFFFB38\n000000067F000040020000E0000000E7C000-000000067F000040020000E0000000E80000__00000073AD3FE6B8\n000000067F000040020000E0000000E7C000-000000067F000040020000E0000000E80000__000000914E3F38F0\n000000067F000040020000E0000000E7C000-000000067F000040020000E0000000E80000__000000931B9A2710\n000000067F000040020000E0000000E80000-000000067F000040020000E0000000E84000__00000055ECBFFA00\n000000067F000040020000E0000000E80000-000000067F000040020000E0000000E84000__0000005D2FFFFB38\n000000067F000040020000E0000000E80000-000000067F000040020000E0000000E84000__00000073AD3FE6B8\n000000067F000040020000E0000000E80000-000000067F000040020000E0000000E84000__000000914E3F38F0\n000000067F000040020000E0000000E80000-000000067F000040020000E0000000E84000__000000931B9A2710\n000000067F000040020000E0000000E807CF-000000067F000040020000E0000000E891B1__000000551D27ECC9-00000055BCD7D459\n000000067F000040020000E0000000E84000-000000067F000040020000E0000000E88000__00000055ECBFFA00\n000000067F000040020000E0000000E84000-000000067F000040020000E0000000E88000__0000005D2FFFFB38\n000000067F000040020000E0000000E84000-000000067F000040020000E0000000E88000__00000073AD3FE6B8\n000000067F000040020000E0000000E84000-000000067F000040020000E0000000E88000__000000914E3F38F0\n000000067F000040020000E0000000E84000-000000067F000040020000E0000000E88000__000000931B9A2710\n000000067F000040020000E0000000E88000-000000067F000040020000E0000000E8C000__00000055ECBFFA00\n000000067F000040020000E0000000E88000-000000067F000040020000E0000000E8C000__0000005D2FFFFB38\n000000067F000040020000E0000000E88000-000000067F000040020000E0000000E8C000__00000073AD3FE6B8\n000000067F000040020000E0000000E88000-000000067F000040020000E0000000E8C000__000000914E3F38F0\n000000067F000040020000E0000000E88000-000000067F000040020000E0000000E8C000__000000931B9A2710\n000000067F000040020000E0000000E891B1-000000067F000040020000E0000000E91B9A__000000551D27ECC9-00000055BCD7D459\n000000067F000040020000E0000000E8C000-000000067F000040020000E0000000E90000__00000055ECBFFA00\n000000067F000040020000E0000000E8C000-000000067F000040020000E0000000E90000__0000005D2FFFFB38\n000000067F000040020000E0000000E8C000-000000067F000040020000E0000000E90000__00000073AD3FE6B8\n000000067F000040020000E0000000E8C000-000000067F000040020000E0000000E90000__000000914E3F38F0\n000000067F000040020000E0000000E8C000-000000067F000040020000E0000000E90000__000000931B9A2710\n000000067F000040020000E0000000E90000-000000067F000040020000E0000000E94000__00000055ECBFFA00\n000000067F000040020000E0000000E90000-000000067F000040020000E0000000E94000__0000005D2FFFFB38\n000000067F000040020000E0000000E90000-000000067F000040020000E0000000E94000__00000073AD3FE6B8\n000000067F000040020000E0000000E90000-000000067F000040020000E0000000E94000__000000914E3F38F0\n000000067F000040020000E0000000E90000-000000067F000040020000E0000000E94000__000000931B9A2710\n000000067F000040020000E0000000E91B9A-000000067F000040020000E0000000E9A57C__000000551D27ECC9-00000055BCD7D459\n000000067F000040020000E0000000E94000-000000067F000040020000E0000000E98000__00000055ECBFFA00\n000000067F000040020000E0000000E94000-000000067F000040020000E0000000E98000__0000005D2FFFFB38\n000000067F000040020000E0000000E94000-000000067F000040020000E0000000E98000__00000073AD3FE6B8\n000000067F000040020000E0000000E94000-000000067F000040020000E0000000E98000__000000914E3F38F0\n000000067F000040020000E0000000E94000-000000067F000040020000E0000000E98000__000000931B9A2710\n000000067F000040020000E0000000E98000-000000067F000040020000E0000000E9C000__00000055ECBFFA00\n000000067F000040020000E0000000E98000-000000067F000040020000E0000000E9C000__000000574B7FF240\n000000067F000040020000E0000000E98000-000000067F000040020000E0000000E9C000__00000073AD3FE6B8\n000000067F000040020000E0000000E98000-000000067F000040020000E0000000E9C000__000000914E3F38F0\n000000067F000040020000E0000000E98000-000000067F000040020000E0000000E9C000__000000931B9A2710\n000000067F000040020000E0000000E9A57C-000000067F000040020000E0000100000000__000000551D27ECC9-00000055BCD7D459\n000000067F000040020000E0000000E9A850-000000067F000040020000E0000000EA322A__00000055BCD7D459-000000565C87E419\n000000067F000040020000E0000000E9C000-000000067F000040020000E0000000EA0000__00000055ECBFFA00\n000000067F000040020000E0000000E9C000-000000067F000040020000E0000000EA0000__000000574B7FF240\n000000067F000040020000E0000000E9C000-000000067F000040020000E0000000EA0000__00000073AD3FE6B8\n000000067F000040020000E0000000E9C000-000000067F000040020000E0000000EA0000__000000914E3F38F0\n000000067F000040020000E0000000E9C000-000000067F000040020000E0000000EA0000__000000931B9A2710\n000000067F000040020000E0000000EA0000-000000067F000040020000E0000000EA4000__00000055ECBFFA00\n000000067F000040020000E0000000EA0000-000000067F000040020000E0000000EA4000__000000574B7FF240\n000000067F000040020000E0000000EA0000-000000067F000040020000E0000000EA4000__00000073AD3FE6B8\n000000067F000040020000E0000000EA0000-000000067F000040020000E0000000EA4000__000000914E3F38F0\n000000067F000040020000E0000000EA0000-000000067F000040020000E0000000EA4000__000000931B9A2710\n000000067F000040020000E0000000EA322A-000000067F000040020000E0000000EABBFA__00000055BCD7D459-000000565C87E419\n000000067F000040020000E0000000EA4000-000000067F000040020000E0000000EA8000__00000055ECBFFA00\n000000067F000040020000E0000000EA4000-000000067F000040020000E0000000EA8000__000000574B7FF240\n000000067F000040020000E0000000EA4000-000000067F000040020000E0000000EA8000__00000073AD3FE6B8\n000000067F000040020000E0000000EA4000-000000067F000040020000E0000000EA8000__000000914E3F38F0\n000000067F000040020000E0000000EA4000-000000067F000040020000E0000000EA8000__000000931B9A2710\n000000067F000040020000E0000000EA8000-000000067F000040020000E0000000EAC000__00000055ECBFFA00\n000000067F000040020000E0000000EA8000-000000067F000040020000E0000000EAC000__000000574B7FF240\n000000067F000040020000E0000000EA8000-000000067F000040020000E0000000EAC000__00000073AD3FE6B8\n000000067F000040020000E0000000EA8000-000000067F000040020000E0000000EAC000__000000914E3F38F0\n000000067F000040020000E0000000EA8000-000000067F000040020000E0000000EAC000__000000931B9A2710\n000000067F000040020000E0000000EABBFA-000000067F000040020000E0000000EB45E8__00000055BCD7D459-000000565C87E419\n000000067F000040020000E0000000EAC000-000000067F000040020000E0000000EB0000__00000055ECBFFA00\n000000067F000040020000E0000000EAC000-000000067F000040020000E0000000EB0000__000000574B7FF240\n000000067F000040020000E0000000EAC000-000000067F000040020000E0000000EB0000__00000073AD3FE6B8\n000000067F000040020000E0000000EAC000-000000067F000040020000E0000000EB0000__000000914E3F38F0\n000000067F000040020000E0000000EAC000-000000067F000040020000E0000000EB0000__000000931B9A2710\n000000067F000040020000E0000000EB0000-000000067F000040020000E0000000EB4000__00000055ECBFFA00\n000000067F000040020000E0000000EB0000-000000067F000040020000E0000000EB4000__000000574B7FF240\n000000067F000040020000E0000000EB0000-000000067F000040020000E0000000EB4000__00000073AD3FE6B8\n000000067F000040020000E0000000EB0000-000000067F000040020000E0000000EB4000__000000914E3F38F0\n000000067F000040020000E0000000EB0000-000000067F000040020000E0000000EB4000__000000931B9A2710\n000000067F000040020000E0000000EB4000-000000067F000040020000E0000000EB8000__000000574B7FF240\n000000067F000040020000E0000000EB4000-000000067F000040020000E0000000EB8000__00000073AD3FE6B8\n000000067F000040020000E0000000EB4000-000000067F000040020000E0000000EB8000__000000914E3F38F0\n000000067F000040020000E0000000EB4000-000000067F000040020000E0000000EB8000__000000931B9A2710\n000000067F000040020000E0000000EB4000-030000000000000000000000000000000002__00000055ECBFFA00\n000000067F000040020000E0000000EB45E8-000000067F000040020000E0000000EBCFC3__00000055BCD7D459-000000565C87E419\n000000067F000040020000E0000000EB8000-000000067F000040020000E0000000EBC000__000000574B7FF240\n000000067F000040020000E0000000EB8000-000000067F000040020000E0000000EBC000__00000073AD3FE6B8\n000000067F000040020000E0000000EB8000-000000067F000040020000E0000000EBC000__000000914E3F38F0\n000000067F000040020000E0000000EB8000-000000067F000040020000E0000000EBC000__000000931B9A2710\n000000067F000040020000E0000000EBC000-000000067F000040020000E0000000EC0000__000000574B7FF240\n000000067F000040020000E0000000EBC000-000000067F000040020000E0000000EC0000__00000073AD3FE6B8\n000000067F000040020000E0000000EBC000-000000067F000040020000E0000000EC0000__000000914E3F38F0\n000000067F000040020000E0000000EBC000-000000067F000040020000E0000000EC0000__000000931B9A2710\n000000067F000040020000E0000000EBCFC3-000000067F000040020000E0000000EC599B__00000055BCD7D459-000000565C87E419\n000000067F000040020000E0000000EC0000-000000067F000040020000E0000000EC4000__000000574B7FF240\n000000067F000040020000E0000000EC0000-000000067F000040020000E0000000EC4000__00000073AD3FE6B8\n000000067F000040020000E0000000EC0000-000000067F000040020000E0000000EC4000__000000914E3F38F0\n000000067F000040020000E0000000EC0000-000000067F000040020000E0000000EC4000__000000931B9A2710\n000000067F000040020000E0000000EC4000-000000067F000040020000E0000000EC8000__000000574B7FF240\n000000067F000040020000E0000000EC4000-000000067F000040020000E0000000EC8000__00000073AD3FE6B8\n000000067F000040020000E0000000EC4000-000000067F000040020000E0000000EC8000__000000914E3F38F0\n000000067F000040020000E0000000EC4000-000000067F000040020000E0000000EC8000__000000931B9A2710\n000000067F000040020000E0000000EC599B-000000067F000040020000E0000000ECE381__00000055BCD7D459-000000565C87E419\n000000067F000040020000E0000000EC8000-000000067F000040020000E0000000ECC000__000000574B7FF240\n000000067F000040020000E0000000EC8000-000000067F000040020000E0000000ECC000__00000073AD3FE6B8\n000000067F000040020000E0000000EC8000-000000067F000040020000E0000000ECC000__000000914E3F38F0\n000000067F000040020000E0000000EC8000-000000067F000040020000E0000000ECC000__000000931B9A2710\n000000067F000040020000E0000000ECC000-000000067F000040020000E0000000ED0000__000000574B7FF240\n000000067F000040020000E0000000ECC000-000000067F000040020000E0000000ED0000__00000073AD3FE6B8\n000000067F000040020000E0000000ECC000-000000067F000040020000E0000000ED0000__000000914E3F38F0\n000000067F000040020000E0000000ECC000-000000067F000040020000E0000000ED0000__000000931B9A2710\n000000067F000040020000E0000000ECE381-000000067F000040020000E0000000ED6D60__00000055BCD7D459-000000565C87E419\n000000067F000040020000E0000000ED0000-000000067F000040020000E0000000ED4000__000000574B7FF240\n000000067F000040020000E0000000ED0000-000000067F000040020000E0000000ED4000__00000073AD3FE6B8\n000000067F000040020000E0000000ED0000-000000067F000040020000E0000000ED4000__000000914E3F38F0\n000000067F000040020000E0000000ED0000-000000067F000040020000E0000000ED4000__000000931B9A2710\n000000067F000040020000E0000000ED4000-000000067F000040020000E0000000ED8000__000000574B7FF240\n000000067F000040020000E0000000ED4000-000000067F000040020000E0000000ED8000__00000073AD3FE6B8\n000000067F000040020000E0000000ED4000-000000067F000040020000E0000000ED8000__000000914E3F38F0\n000000067F000040020000E0000000ED4000-000000067F000040020000E0000000ED8000__000000931B9A2710\n000000067F000040020000E0000000ED6D60-000000067F000040020000E0000000EDF740__00000055BCD7D459-000000565C87E419\n000000067F000040020000E0000000ED8000-000000067F000040020000E0000000EDC000__000000574B7FF240\n000000067F000040020000E0000000ED8000-000000067F000040020000E0000000EDC000__00000073AD3FE6B8\n000000067F000040020000E0000000ED8000-000000067F000040020000E0000000EDC000__000000914E3F38F0\n000000067F000040020000E0000000ED8000-000000067F000040020000E0000000EDC000__000000931B9A2710\n000000067F000040020000E0000000EDC000-000000067F000040020000E0000000EE0000__000000574B7FF240\n000000067F000040020000E0000000EDC000-000000067F000040020000E0000000EE0000__00000073AD3FE6B8\n000000067F000040020000E0000000EDC000-000000067F000040020000E0000000EE0000__000000914E3F38F0\n000000067F000040020000E0000000EDC000-000000067F000040020000E0000000EE0000__000000931B9A2710\n000000067F000040020000E0000000EDF740-000000067F000040020000E0000000EE8111__00000055BCD7D459-000000565C87E419\n000000067F000040020000E0000000EE0000-000000067F000040020000E0000000EE4000__000000574B7FF240\n000000067F000040020000E0000000EE0000-000000067F000040020000E0000000EE4000__00000073AD3FE6B8\n000000067F000040020000E0000000EE0000-000000067F000040020000E0000000EE4000__000000914E3F38F0\n000000067F000040020000E0000000EE0000-000000067F000040020000E0000000EE4000__000000931B9A2710\n000000067F000040020000E0000000EE4000-000000067F000040020000E0000000EE8000__000000574B7FF240\n000000067F000040020000E0000000EE4000-000000067F000040020000E0000000EE8000__00000073AD3FE6B8\n000000067F000040020000E0000000EE4000-000000067F000040020000E0000000EE8000__000000914E3F38F0\n000000067F000040020000E0000000EE4000-000000067F000040020000E0000000EE8000__000000931B9A2710\n000000067F000040020000E0000000EE8000-000000067F000040020000E0000000EEC000__000000574B7FF240\n000000067F000040020000E0000000EE8000-000000067F000040020000E0000000EEC000__00000073AD3FE6B8\n000000067F000040020000E0000000EE8000-000000067F000040020000E0000000EEC000__000000914E3F38F0\n000000067F000040020000E0000000EE8000-000000067F000040020000E0000000EEC000__000000931B9A2710\n000000067F000040020000E0000000EE8111-000000067F000040020000E0000000EF0AF9__00000055BCD7D459-000000565C87E419\n000000067F000040020000E0000000EEC000-000000067F000040020000E0000000EF0000__000000574B7FF240\n000000067F000040020000E0000000EEC000-000000067F000040020000E0000000EF0000__00000073AD3FE6B8\n000000067F000040020000E0000000EEC000-000000067F000040020000E0000000EF0000__000000914E3F38F0\n000000067F000040020000E0000000EEC000-000000067F000040020000E0000000EF0000__000000931B9A2710\n000000067F000040020000E0000000EF0000-000000067F000040020000E0000000EF4000__000000572A7A05D8\n000000067F000040020000E0000000EF0000-000000067F000040020000E0000000EF4000__0000005D2FFFFB38\n000000067F000040020000E0000000EF0000-000000067F000040020000E0000000EF4000__00000073AD3FE6B8\n000000067F000040020000E0000000EF0000-000000067F000040020000E0000000EF4000__000000914E3F38F0\n000000067F000040020000E0000000EF0000-000000067F000040020000E0000000EF4000__000000931B9A2710\n000000067F000040020000E0000000EF0AF9-000000067F000040020000E0000100000000__00000055BCD7D459-000000565C87E419\n000000067F000040020000E0000000EF0DBC-000000067F000040020000E0000000EF979A__000000565C87E419-00000056FC37F3D9\n000000067F000040020000E0000000EF4000-000000067F000040020000E0000000EF8000__000000572A7A05D8\n000000067F000040020000E0000000EF4000-000000067F000040020000E0000000EF8000__0000005D2FFFFB38\n000000067F000040020000E0000000EF4000-000000067F000040020000E0000000EF8000__00000073AD3FE6B8\n000000067F000040020000E0000000EF4000-000000067F000040020000E0000000EF8000__000000914E3F38F0\n000000067F000040020000E0000000EF4000-000000067F000040020000E0000000EF8000__000000931B9A2710\n000000067F000040020000E0000000EF8000-000000067F000040020000E0000000EFC000__000000572A7A05D8\n000000067F000040020000E0000000EF8000-000000067F000040020000E0000000EFC000__0000005D2FFFFB38\n000000067F000040020000E0000000EF8000-000000067F000040020000E0000000EFC000__00000073AD3FE6B8\n000000067F000040020000E0000000EF8000-000000067F000040020000E0000000EFC000__000000914E3F38F0\n000000067F000040020000E0000000EF8000-000000067F000040020000E0000000EFC000__000000931B9A2710\n000000067F000040020000E0000000EF979A-000000067F000040020000E0000000F02175__000000565C87E419-00000056FC37F3D9\n000000067F000040020000E0000000EFC000-000000067F000040020000E0000000F00000__000000572A7A05D8\n000000067F000040020000E0000000EFC000-000000067F000040020000E0000000F00000__0000005D2FFFFB38\n000000067F000040020000E0000000EFC000-000000067F000040020000E0000000F00000__00000073AD3FE6B8\n000000067F000040020000E0000000EFC000-000000067F000040020000E0000000F00000__000000914E3F38F0\n000000067F000040020000E0000000EFC000-000000067F000040020000E0000000F00000__000000931B9A2710\n000000067F000040020000E0000000F00000-000000067F000040020000E0000000F04000__000000572A7A05D8\n000000067F000040020000E0000000F00000-000000067F000040020000E0000000F04000__0000005D2FFFFB38\n000000067F000040020000E0000000F00000-000000067F000040020000E0000000F04000__00000073AD3FE6B8\n000000067F000040020000E0000000F00000-000000067F000040020000E0000000F04000__000000914E3F38F0\n000000067F000040020000E0000000F00000-000000067F000040020000E0000000F04000__000000931B9A2710\n000000067F000040020000E0000000F02175-000000067F000040020000E0000000F0AB56__000000565C87E419-00000056FC37F3D9\n000000067F000040020000E0000000F04000-000000067F000040020000E0000000F08000__000000572A7A05D8\n000000067F000040020000E0000000F04000-000000067F000040020000E0000000F08000__0000005D2FFFFB38\n000000067F000040020000E0000000F04000-000000067F000040020000E0000000F08000__00000073AD3FE6B8\n000000067F000040020000E0000000F04000-000000067F000040020000E0000000F08000__000000914E3F38F0\n000000067F000040020000E0000000F04000-000000067F000040020000E0000000F08000__000000931B9A2710\n000000067F000040020000E0000000F08000-000000067F000040020000E0000000F0C000__000000572A7A05D8\n000000067F000040020000E0000000F08000-000000067F000040020000E0000000F0C000__0000005D2FFFFB38\n000000067F000040020000E0000000F08000-000000067F000040020000E0000000F0C000__00000073AD3FE6B8\n000000067F000040020000E0000000F08000-000000067F000040020000E0000000F0C000__000000914E3F38F0\n000000067F000040020000E0000000F08000-000000067F000040020000E0000000F0C000__000000931B9A2710\n000000067F000040020000E0000000F0AB56-000000067F000040020000E0000000F1352C__000000565C87E419-00000056FC37F3D9\n000000067F000040020000E0000000F0C000-000000067F000040020000E0000000F10000__000000572A7A05D8\n000000067F000040020000E0000000F0C000-000000067F000040020000E0000000F10000__0000005D2FFFFB38\n000000067F000040020000E0000000F0C000-000000067F000040020000E0000000F10000__00000073AD3FE6B8\n000000067F000040020000E0000000F0C000-000000067F000040020000E0000000F10000__000000914E3F38F0\n000000067F000040020000E0000000F0C000-000000067F000040020000E0000000F10000__000000931B9A2710\n000000067F000040020000E0000000F10000-000000067F000040020000E0000000F14000__000000572A7A05D8\n000000067F000040020000E0000000F10000-000000067F000040020000E0000000F14000__0000005D2FFFFB38\n000000067F000040020000E0000000F10000-000000067F000040020000E0000000F14000__00000073AD3FE6B8\n000000067F000040020000E0000000F10000-000000067F000040020000E0000000F14000__000000914E3F38F0\n000000067F000040020000E0000000F10000-000000067F000040020000E0000000F14000__000000931B9A2710\n000000067F000040020000E0000000F1352C-000000067F000040020000E0000000F1BF05__000000565C87E419-00000056FC37F3D9\n000000067F000040020000E0000000F14000-000000067F000040020000E0000000F18000__000000572A7A05D8\n000000067F000040020000E0000000F14000-000000067F000040020000E0000000F18000__0000005D2FFFFB38\n000000067F000040020000E0000000F14000-000000067F000040020000E0000000F18000__00000073AD3FE6B8\n000000067F000040020000E0000000F14000-000000067F000040020000E0000000F18000__000000914E3F38F0\n000000067F000040020000E0000000F14000-000000067F000040020000E0000000F18000__000000931B9A2710\n000000067F000040020000E0000000F18000-000000067F000040020000E0000000F1C000__000000572A7A05D8\n000000067F000040020000E0000000F18000-000000067F000040020000E0000000F1C000__0000005D2FFFFB38\n000000067F000040020000E0000000F18000-000000067F000040020000E0000000F1C000__00000073AD3FE6B8\n000000067F000040020000E0000000F18000-000000067F000040020000E0000000F1C000__000000914E3F38F0\n000000067F000040020000E0000000F18000-000000067F000040020000E0000000F1C000__000000931B9A2710\n000000067F000040020000E0000000F1BF05-000000067F000040020000E0000000F248DB__000000565C87E419-00000056FC37F3D9\n000000067F000040020000E0000000F1C000-000000067F000040020000E0000000F20000__000000572A7A05D8\n000000067F000040020000E0000000F1C000-000000067F000040020000E0000000F20000__0000005D2FFFFB38\n000000067F000040020000E0000000F1C000-000000067F000040020000E0000000F20000__00000073AD3FE6B8\n000000067F000040020000E0000000F1C000-000000067F000040020000E0000000F20000__000000914E3F38F0\n000000067F000040020000E0000000F1C000-000000067F000040020000E0000000F20000__000000931B9A2710\n000000067F000040020000E0000000F20000-000000067F000040020000E0000000F24000__000000572A7A05D8\n000000067F000040020000E0000000F20000-000000067F000040020000E0000000F24000__0000005D2FFFFB38\n000000067F000040020000E0000000F20000-000000067F000040020000E0000000F24000__00000073AD3FE6B8\n000000067F000040020000E0000000F20000-000000067F000040020000E0000000F24000__000000914E3F38F0\n000000067F000040020000E0000000F20000-000000067F000040020000E0000000F24000__000000931B9A2710\n000000067F000040020000E0000000F24000-000000067F000040020000E0000000F28000__000000572A7A05D8\n000000067F000040020000E0000000F24000-000000067F000040020000E0000000F28000__0000005D2FFFFB38\n000000067F000040020000E0000000F24000-000000067F000040020000E0000000F28000__00000073AD3FE6B8\n000000067F000040020000E0000000F24000-000000067F000040020000E0000000F28000__000000914E3F38F0\n000000067F000040020000E0000000F24000-000000067F000040020000E0000000F28000__000000931B9A2710\n000000067F000040020000E0000000F248DB-000000067F000040020000E0000000F2D2BA__000000565C87E419-00000056FC37F3D9\n000000067F000040020000E0000000F28000-000000067F000040020000E0000000F2C000__000000572A7A05D8\n000000067F000040020000E0000000F28000-000000067F000040020000E0000000F2C000__0000005D2FFFFB38\n000000067F000040020000E0000000F28000-000000067F000040020000E0000000F2C000__00000073AD3FE6B8\n000000067F000040020000E0000000F28000-000000067F000040020000E0000000F2C000__000000914E3F38F0\n000000067F000040020000E0000000F28000-000000067F000040020000E0000000F2C000__000000931B9A2710\n000000067F000040020000E0000000F2C000-000000067F000040020000E0000000F30000__000000572A7A05D8\n000000067F000040020000E0000000F2C000-000000067F000040020000E0000000F30000__0000005D2FFFFB38\n000000067F000040020000E0000000F2C000-000000067F000040020000E0000000F30000__00000073AD3FE6B8\n000000067F000040020000E0000000F2C000-000000067F000040020000E0000000F30000__000000914E3F38F0\n000000067F000040020000E0000000F2C000-000000067F000040020000E0000000F30000__000000931B9A2710\n000000067F000040020000E0000000F2D2BA-000000067F000040020000E0000000F35CA3__000000565C87E419-00000056FC37F3D9\n000000067F000040020000E0000000F30000-000000067F000040020000E0000000F34000__000000572A7A05D8\n000000067F000040020000E0000000F30000-000000067F000040020000E0000000F34000__0000005D2FFFFB38\n000000067F000040020000E0000000F30000-000000067F000040020000E0000000F34000__00000073AD3FE6B8\n000000067F000040020000E0000000F30000-000000067F000040020000E0000000F34000__000000914E3F38F0\n000000067F000040020000E0000000F30000-000000067F000040020000E0000000F34000__000000931B9A2710\n000000067F000040020000E0000000F34000-000000067F000040020000E0000000F38000__000000572A7A05D8\n000000067F000040020000E0000000F34000-000000067F000040020000E0000000F38000__0000005D2FFFFB38\n000000067F000040020000E0000000F34000-000000067F000040020000E0000000F38000__00000073AD3FE6B8\n000000067F000040020000E0000000F34000-000000067F000040020000E0000000F38000__000000914E3F38F0\n000000067F000040020000E0000000F34000-000000067F000040020000E0000000F38000__000000931B9A2710\n000000067F000040020000E0000000F35CA3-000000067F000040020000E0000000F3E680__000000565C87E419-00000056FC37F3D9\n000000067F000040020000E0000000F38000-000000067F000040020000E0000000F3C000__000000572A7A05D8\n000000067F000040020000E0000000F38000-000000067F000040020000E0000000F3C000__0000005D2FFFFB38\n000000067F000040020000E0000000F38000-000000067F000040020000E0000000F3C000__00000073AD3FE6B8\n000000067F000040020000E0000000F38000-000000067F000040020000E0000000F3C000__000000914E3F38F0\n000000067F000040020000E0000000F38000-000000067F000040020000E0000000F3C000__000000931B9A2710\n000000067F000040020000E0000000F3C000-000000067F000040020000E0000000F40000__000000572A7A05D8\n000000067F000040020000E0000000F3C000-000000067F000040020000E0000000F40000__0000005D2FFFFB38\n000000067F000040020000E0000000F3C000-000000067F000040020000E0000000F40000__00000073AD3FE6B8\n000000067F000040020000E0000000F3C000-000000067F000040020000E0000000F40000__000000914E3F38F0\n000000067F000040020000E0000000F3C000-000000067F000040020000E0000000F40000__000000931B9A2710\n000000067F000040020000E0000000F3E680-000000067F000040020000E0000000F4705B__000000565C87E419-00000056FC37F3D9\n000000067F000040020000E0000000F40000-000000067F000040020000E0000000F44000__000000572A7A05D8\n000000067F000040020000E0000000F40000-000000067F000040020000E0000000F44000__0000005D2FFFFB38\n000000067F000040020000E0000000F40000-000000067F000040020000E0000000F44000__00000073AD3FE6B8\n000000067F000040020000E0000000F40000-000000067F000040020000E0000000F44000__000000914E3F38F0\n000000067F000040020000E0000000F40000-000000067F000040020000E0000000F44000__000000931B9A2710\n000000067F000040020000E0000000F44000-000000067F000040020000E0000000F48000__000000572A7A05D8\n000000067F000040020000E0000000F44000-000000067F000040020000E0000000F48000__0000005D2FFFFB38\n000000067F000040020000E0000000F44000-000000067F000040020000E0000000F48000__00000073AD3FE6B8\n000000067F000040020000E0000000F44000-000000067F000040020000E0000000F48000__000000914E3F38F0\n000000067F000040020000E0000000F44000-000000067F000040020000E0000000F48000__000000931B9A2710\n000000067F000040020000E0000000F4705B-000000067F000040020000E0000100000000__000000565C87E419-00000056FC37F3D9\n000000067F000040020000E0000000F48000-000000067F000040020000E0000000F4C000__000000572A7A05D8\n000000067F000040020000E0000000F48000-000000067F000040020000E0000000F4C000__0000005D2FFFFB38\n000000067F000040020000E0000000F48000-000000067F000040020000E0000000F4C000__00000073AD3FE6B8\n000000067F000040020000E0000000F48000-000000067F000040020000E0000000F4C000__000000914E3F38F0\n000000067F000040020000E0000000F48000-000000067F000040020000E0000000F4C000__000000931B9A2710\n000000067F000040020000E0000000F4C000-000000067F000040020000E0000000F50000__000000572A7A05D8\n000000067F000040020000E0000000F4C000-000000067F000040020000E0000000F50000__0000005D2FFFFB38\n000000067F000040020000E0000000F4C000-000000067F000040020000E0000000F50000__00000073AD3FE6B8\n000000067F000040020000E0000000F4C000-000000067F000040020000E0000000F50000__000000914E3F38F0\n000000067F000040020000E0000000F4C000-000000067F000040020000E0000000F50000__000000931B9A2710\n000000067F000040020000E0000000F4FCF9-000000067F000040020000E0000000F586CE__00000056FC37F3D9-000000572A7B4CD9\n000000067F000040020000E0000000F50000-000000067F000040020000E0000000F54000__000000572A7A05D8\n000000067F000040020000E0000000F50000-000000067F000040020000E0000000F54000__0000005D2FFFFB38\n000000067F000040020000E0000000F50000-000000067F000040020000E0000000F54000__00000073AD3FE6B8\n000000067F000040020000E0000000F50000-000000067F000040020000E0000000F54000__000000914E3F38F0\n000000067F000040020000E0000000F50000-000000067F000040020000E0000000F54000__000000931B9A2710\n000000067F000040020000E0000000F54000-000000067F000040020000E0000000F58000__000000572A7A05D8\n000000067F000040020000E0000000F54000-000000067F000040020000E0000000F58000__0000005D2FFFFB38\n000000067F000040020000E0000000F54000-000000067F000040020000E0000000F58000__00000073AD3FE6B8\n000000067F000040020000E0000000F54000-000000067F000040020000E0000000F58000__000000914E3F38F0\n000000067F000040020000E0000000F54000-000000067F000040020000E0000000F58000__000000931B9A2710\n000000067F000040020000E0000000F58000-000000067F000040020000E0000000F5C000__000000572A7A05D8\n000000067F000040020000E0000000F58000-000000067F000040020000E0000000F5C000__0000005D2FFFFB38\n000000067F000040020000E0000000F58000-000000067F000040020000E0000000F5C000__00000073AD3FE6B8\n000000067F000040020000E0000000F58000-000000067F000040020000E0000000F5C000__000000914E3F38F0\n000000067F000040020000E0000000F58000-000000067F000040020000E0000000F5C000__000000931B9A2710\n000000067F000040020000E0000000F586CE-030000000000000000000000000000000002__00000056FC37F3D9-000000572A7B4CD9\n000000067F000040020000E0000000F5C000-000000067F000040020000E0000000F60000__000000572A7A05D8\n000000067F000040020000E0000000F5C000-000000067F000040020000E0000000F60000__0000005D2FFFFB38\n000000067F000040020000E0000000F5C000-000000067F000040020000E0000000F60000__00000073AD3FE6B8\n000000067F000040020000E0000000F5C000-000000067F000040020000E0000000F60000__000000914E3F38F0\n000000067F000040020000E0000000F5C000-000000067F000040020000E0000000F60000__000000931B9A2710\n000000067F000040020000E0000000F60000-000000067F000040020000E0050100000000__0000005D2FFFFB38\n000000067F000040020000E0000000F60000-000000067F000040020000E0050100000000__00000073AD3FE6B8\n000000067F000040020000E0000000F60000-000000067F000040020000E0050100000000__000000914E3F38F0\n000000067F000040020000E0000000F60000-000000067F000040020000E0050100000000__000000931B9A2710\n000000067F000040020000E0000000F60000-030000000000000000000000000000000002__000000572A7A05D8\n000000067F000040020000E00000FFFFFFFF-000000067F000040020000E0000100000000__0000003B6A101880-00000043C5DDFE18\n000000067F000040020000E00000FFFFFFFF-000000067F000040020000E0000100000000__00000043C5DDFE18-00000047441DEA39\n000000067F00004002000100000000000000-000000067F00004002000100000000004000__0000005D2FFFFB38\n000000067F00004002000100000000000000-000000067F00004002000100000000004000__00000073AD3FE6B8\n000000067F00004002000100000000000000-000000067F00004002000100000000004000__000000914E3F38F0\n000000067F00004002000100000000000000-000000067F00004002000100000000004000__000000931B9A2710\n000000067F00004002000100000000004000-000000067F00004002000100000000008000__0000005D2FFFFB38\n000000067F00004002000100000000004000-000000067F00004002000100000000008000__00000073AD3FE6B8\n000000067F00004002000100000000004000-000000067F00004002000100000000008000__000000914E3F38F0\n000000067F00004002000100000000004000-000000067F00004002000100000000008000__000000931B9A2710\n000000067F00004002000100000000008000-000000067F0000400200010000000000C000__0000005D2FFFFB38\n000000067F00004002000100000000008000-000000067F0000400200010000000000C000__00000073AD3FE6B8\n000000067F00004002000100000000008000-000000067F0000400200010000000000C000__000000914E3F38F0\n000000067F00004002000100000000008000-000000067F0000400200010000000000C000__000000931B9A2710\n000000067F0000400200010000000000899C-000000067F0000400200010000000001137C__000000572A7C74A1-0000005CA7BBD6F9\n000000067F0000400200010000000000C000-000000067F00004002000100000000010000__0000005D2FFFFB38\n000000067F0000400200010000000000C000-000000067F00004002000100000000010000__00000073AD3FE6B8\n000000067F0000400200010000000000C000-000000067F00004002000100000000010000__000000914E3F38F0\n000000067F0000400200010000000000C000-000000067F00004002000100000000010000__000000931B9A2710\n000000067F00004002000100000000010000-000000067F00004002000100000000014000__0000005D2FFFFB38\n000000067F00004002000100000000010000-000000067F00004002000100000000014000__00000073AD3FE6B8\n000000067F00004002000100000000010000-000000067F00004002000100000000014000__000000914E3F38F0\n000000067F00004002000100000000010000-000000067F00004002000100000000014000__000000931B9A2710\n000000067F0000400200010000000001137C-000000067F00004002000100000000019D79__000000572A7C74A1-0000005CA7BBD6F9\n000000067F00004002000100000000014000-000000067F00004002000100000000018000__0000005D2FFFFB38\n000000067F00004002000100000000014000-000000067F00004002000100000000018000__00000073AD3FE6B8\n000000067F00004002000100000000014000-000000067F00004002000100000000018000__000000914E3F38F0\n000000067F00004002000100000000014000-000000067F00004002000100000000018000__000000931B9A2710\n000000067F00004002000100000000018000-000000067F0000400200010000000001C000__0000005D2FFFFB38\n000000067F00004002000100000000018000-000000067F0000400200010000000001C000__00000073AD3FE6B8\n000000067F00004002000100000000018000-000000067F0000400200010000000001C000__000000914E3F38F0\n000000067F00004002000100000000018000-000000067F0000400200010000000001C000__000000931B9A2710\n000000067F00004002000100000000019D79-000000067F00004002000100000000022776__000000572A7C74A1-0000005CA7BBD6F9\n000000067F0000400200010000000001C000-000000067F00004002000100000000020000__0000005D2FFFFB38\n000000067F0000400200010000000001C000-000000067F00004002000100000000020000__00000073AD3FE6B8\n000000067F0000400200010000000001C000-000000067F00004002000100000000020000__000000914E3F38F0\n000000067F0000400200010000000001C000-000000067F00004002000100000000020000__000000931B9A2710\n000000067F00004002000100000000020000-000000067F00004002000100000000024000__0000005D2FFFFB38\n000000067F00004002000100000000020000-000000067F00004002000100000000024000__00000073AD3FE6B8\n000000067F00004002000100000000020000-000000067F00004002000100000000024000__000000914E3F38F0\n000000067F00004002000100000000020000-000000067F00004002000100000000024000__000000931B9A2710\n000000067F00004002000100000000022776-000000067F0000400200010000000002B15B__000000572A7C74A1-0000005CA7BBD6F9\n000000067F00004002000100000000024000-000000067F00004002000100000000028000__0000005D2FFFFB38\n000000067F00004002000100000000024000-000000067F00004002000100000000028000__00000073AD3FE6B8\n000000067F00004002000100000000024000-000000067F00004002000100000000028000__000000914E3F38F0\n000000067F00004002000100000000024000-000000067F00004002000100000000028000__000000931B9A2710\n000000067F00004002000100000000028000-000000067F0000400200010000000002C000__0000005D2FFFFB38\n000000067F00004002000100000000028000-000000067F0000400200010000000002C000__00000073AD3FE6B8\n000000067F00004002000100000000028000-000000067F0000400200010000000002C000__000000914E3F38F0\n000000067F00004002000100000000028000-000000067F0000400200010000000002C000__000000931B9A2710\n000000067F0000400200010000000002B15B-000000067F00004002000100000000033B2F__000000572A7C74A1-0000005CA7BBD6F9\n000000067F0000400200010000000002C000-000000067F00004002000100000000030000__0000005D2FFFFB38\n000000067F0000400200010000000002C000-000000067F00004002000100000000030000__00000073AD3FE6B8\n000000067F0000400200010000000002C000-000000067F00004002000100000000030000__000000914E3F38F0\n000000067F0000400200010000000002C000-000000067F00004002000100000000030000__000000931B9A2710\n000000067F00004002000100000000030000-000000067F00004002000100000000034000__0000005D2FFFFB38\n000000067F00004002000100000000030000-000000067F00004002000100000000034000__00000073AD3FE6B8\n000000067F00004002000100000000030000-000000067F00004002000100000000034000__000000914E3F38F0\n000000067F00004002000100000000030000-000000067F00004002000100000000034000__000000931B9A2710\n000000067F00004002000100000000033B2F-000000067F0000400200010000000003C4EA__000000572A7C74A1-0000005CA7BBD6F9\n000000067F00004002000100000000034000-000000067F00004002000100000000038000__0000005D2FFFFB38\n000000067F00004002000100000000034000-000000067F00004002000100000000038000__00000073AD3FE6B8\n000000067F00004002000100000000034000-000000067F00004002000100000000038000__000000914E3F38F0\n000000067F00004002000100000000034000-000000067F00004002000100000000038000__000000931B9A2710\n000000067F00004002000100000000038000-000000067F0000400200010000000003C000__0000005D2FFFFB38\n000000067F00004002000100000000038000-000000067F0000400200010000000003C000__00000073AD3FE6B8\n000000067F00004002000100000000038000-000000067F0000400200010000000003C000__000000914E3F38F0\n000000067F00004002000100000000038000-000000067F0000400200010000000003C000__000000931B9A2710\n000000067F0000400200010000000003C000-000000067F00004002000100000000040000__0000005D2FFFFB38\n000000067F0000400200010000000003C000-000000067F00004002000100000000040000__00000073AD3FE6B8\n000000067F0000400200010000000003C000-000000067F00004002000100000000040000__000000914E3F38F0\n000000067F0000400200010000000003C000-000000067F00004002000100000000040000__000000931B9A2710\n000000067F0000400200010000000003C4EA-000000067F00004002000100000000044EA8__000000572A7C74A1-0000005CA7BBD6F9\n000000067F00004002000100000000040000-000000067F00004002000100000000044000__0000005D2FFFFB38\n000000067F00004002000100000000040000-000000067F00004002000100000000044000__00000073AD3FE6B8\n000000067F00004002000100000000040000-000000067F00004002000100000000044000__000000914E3F38F0\n000000067F00004002000100000000040000-000000067F00004002000100000000044000__000000931B9A2710\n000000067F00004002000100000000044000-000000067F00004002000100000000048000__0000005D2FFFFB38\n000000067F00004002000100000000044000-000000067F00004002000100000000048000__00000073AD3FE6B8\n000000067F00004002000100000000044000-000000067F00004002000100000000048000__000000914E3F38F0\n000000067F00004002000100000000044000-000000067F00004002000100000000048000__000000931B9A2710\n000000067F00004002000100000000044EA8-000000067F0000400200010000000004D890__000000572A7C74A1-0000005CA7BBD6F9\n000000067F00004002000100000000048000-000000067F0000400200010000000004C000__0000005D2FFFFB38\n000000067F00004002000100000000048000-000000067F0000400200010000000004C000__00000073AD3FE6B8\n000000067F00004002000100000000048000-000000067F0000400200010000000004C000__000000914E3F38F0\n000000067F00004002000100000000048000-000000067F0000400200010000000004C000__000000931B9A2710\n000000067F0000400200010000000004C000-000000067F00004002000100000000050000__0000005D2FFFFB38\n000000067F0000400200010000000004C000-000000067F00004002000100000000050000__00000073AD3FE6B8\n000000067F0000400200010000000004C000-000000067F00004002000100000000050000__000000914E3F38F0\n000000067F0000400200010000000004C000-000000067F00004002000100000000050000__000000931B9A2710\n000000067F0000400200010000000004D890-000000067F00004002000100000000056296__000000572A7C74A1-0000005CA7BBD6F9\n000000067F00004002000100000000050000-000000067F00004002000100000000054000__0000005D2FFFFB38\n000000067F00004002000100000000050000-000000067F00004002000100000000054000__00000073AD3FE6B8\n000000067F00004002000100000000050000-000000067F00004002000100000000054000__000000914E3F38F0\n000000067F00004002000100000000050000-000000067F00004002000100000000054000__000000931B9A2710\n000000067F00004002000100000000054000-000000067F00004002000100000000058000__0000005D2FFFFB38\n000000067F00004002000100000000054000-000000067F00004002000100000000058000__00000073AD3FE6B8\n000000067F00004002000100000000054000-000000067F00004002000100000000058000__000000914E3F38F0\n000000067F00004002000100000000054000-000000067F00004002000100000000058000__000000931B9A2710\n000000067F00004002000100000000056296-000000067F0000400200010000000005EC8C__000000572A7C74A1-0000005CA7BBD6F9\n000000067F00004002000100000000058000-000000067F0000400200010000000005C000__0000005D2FFFFB38\n000000067F00004002000100000000058000-000000067F0000400200010000000005C000__00000073AD3FE6B8\n000000067F00004002000100000000058000-000000067F0000400200010000000005C000__000000914E3F38F0\n000000067F00004002000100000000058000-000000067F0000400200010000000005C000__000000931B9A2710\n000000067F0000400200010000000005C000-000000067F00004002000100000000060000__0000005D2FFFFB38\n000000067F0000400200010000000005C000-000000067F00004002000100000000060000__00000073AD3FE6B8\n000000067F0000400200010000000005C000-000000067F00004002000100000000060000__000000914E3F38F0\n000000067F0000400200010000000005C000-000000067F00004002000100000000060000__000000931B9A2710\n000000067F0000400200010000000005EC8C-000000067F00004002000100000000067682__000000572A7C74A1-0000005CA7BBD6F9\n000000067F00004002000100000000060000-000000067F00004002000100000000064000__0000005D2FFFFB38\n000000067F00004002000100000000060000-000000067F00004002000100000000064000__00000073AD3FE6B8\n000000067F00004002000100000000060000-000000067F00004002000100000000064000__000000914E3F38F0\n000000067F00004002000100000000060000-000000067F00004002000100000000064000__000000931B9A2710\n000000067F00004002000100000000064000-000000067F00004002000100000000068000__0000005D2FFFFB38\n000000067F00004002000100000000064000-000000067F00004002000100000000068000__00000073AD3FE6B8\n000000067F00004002000100000000064000-000000067F00004002000100000000068000__000000914E3F38F0\n000000067F00004002000100000000064000-000000067F00004002000100000000068000__000000931B9A2710\n000000067F00004002000100000000067682-000000067F00004002000100000000070046__000000572A7C74A1-0000005CA7BBD6F9\n000000067F00004002000100000000068000-000000067F0000400200010000000006C000__0000005D2FFFFB38\n000000067F00004002000100000000068000-000000067F0000400200010000000006C000__00000073AD3FE6B8\n000000067F00004002000100000000068000-000000067F0000400200010000000006C000__000000914E3F38F0\n000000067F00004002000100000000068000-000000067F0000400200010000000006C000__000000931B9A2710\n000000067F0000400200010000000006C000-000000067F00004002000100000000070000__0000005D2FFFFB38\n000000067F0000400200010000000006C000-000000067F00004002000100000000070000__00000073AD3FE6B8\n000000067F0000400200010000000006C000-000000067F00004002000100000000070000__000000914E3F38F0\n000000067F0000400200010000000006C000-000000067F00004002000100000000070000__000000931B9A2710\n000000067F00004002000100000000070000-000000067F00004002000100000000074000__0000005D2FFFFB38\n000000067F00004002000100000000070000-000000067F00004002000100000000074000__00000073AD3FE6B8\n000000067F00004002000100000000070000-000000067F00004002000100000000074000__000000914E3F38F0\n000000067F00004002000100000000070000-000000067F00004002000100000000074000__000000931B9A2710\n000000067F00004002000100000000070046-000000067F00004002000100000000078A01__000000572A7C74A1-0000005CA7BBD6F9\n000000067F00004002000100000000074000-000000067F00004002000100000000078000__0000005D2FFFFB38\n000000067F00004002000100000000074000-000000067F00004002000100000000078000__00000073AD3FE6B8\n000000067F00004002000100000000074000-000000067F00004002000100000000078000__000000914E3F38F0\n000000067F00004002000100000000074000-000000067F00004002000100000000078000__000000931B9A2710\n000000067F00004002000100000000078000-000000067F0000400200010000000007C000__0000005D2FFFFB38\n000000067F00004002000100000000078000-000000067F0000400200010000000007C000__00000073AD3FE6B8\n000000067F00004002000100000000078000-000000067F0000400200010000000007C000__000000914E3F38F0\n000000067F00004002000100000000078000-000000067F0000400200010000000007C000__000000931B9A2710\n000000067F00004002000100000000078A01-000000067F000040020001000000000813B5__000000572A7C74A1-0000005CA7BBD6F9\n000000067F0000400200010000000007C000-000000067F00004002000100000000080000__0000005D2FFFFB38\n000000067F0000400200010000000007C000-000000067F00004002000100000000080000__00000073AD3FE6B8\n000000067F0000400200010000000007C000-000000067F00004002000100000000080000__000000914E3F38F0\n000000067F0000400200010000000007C000-000000067F00004002000100000000080000__000000931B9A2710\n000000067F00004002000100000000080000-000000067F00004002000100000000084000__0000005D2FFFFB38\n000000067F00004002000100000000080000-000000067F00004002000100000000084000__00000073AD3FE6B8\n000000067F00004002000100000000080000-000000067F00004002000100000000084000__000000914E3F38F0\n000000067F00004002000100000000080000-000000067F00004002000100000000084000__000000931B9A2710\n000000067F000040020001000000000813B5-000000067F00004002000100000000089DAC__000000572A7C74A1-0000005CA7BBD6F9\n000000067F00004002000100000000084000-000000067F00004002000100000000088000__0000005D2FFFFB38\n000000067F00004002000100000000084000-000000067F00004002000100000000088000__00000073AD3FE6B8\n000000067F00004002000100000000084000-000000067F00004002000100000000088000__000000914E3F38F0\n000000067F00004002000100000000084000-000000067F00004002000100000000088000__000000931B9A2710\n000000067F00004002000100000000088000-000000067F0000400200010000000008C000__0000005D2FFFFB38\n000000067F00004002000100000000088000-000000067F0000400200010000000008C000__00000073AD3FE6B8\n000000067F00004002000100000000088000-000000067F0000400200010000000008C000__000000914E3F38F0\n000000067F00004002000100000000088000-000000067F0000400200010000000008C000__000000931B9A2710\n000000067F00004002000100000000089DAC-000000067F000040020001000000000927AD__000000572A7C74A1-0000005CA7BBD6F9\n000000067F0000400200010000000008C000-000000067F00004002000100000000090000__0000005D2FFFFB38\n000000067F0000400200010000000008C000-000000067F00004002000100000000090000__00000073AD3FE6B8\n000000067F0000400200010000000008C000-000000067F00004002000100000000090000__000000914E3F38F0\n000000067F0000400200010000000008C000-000000067F00004002000100000000090000__000000931B9A2710\n000000067F00004002000100000000090000-000000067F00004002000100000000094000__0000005D2FFFFB38\n000000067F00004002000100000000090000-000000067F00004002000100000000094000__00000073AD3FE6B8\n000000067F00004002000100000000090000-000000067F00004002000100000000094000__000000914E3F38F0\n000000067F00004002000100000000090000-000000067F00004002000100000000094000__000000931B9A2710\n000000067F000040020001000000000927AD-000000067F0000400200010000000009B1A0__000000572A7C74A1-0000005CA7BBD6F9\n000000067F00004002000100000000094000-000000067F00004002000100000000098000__0000005D2FFFFB38\n000000067F00004002000100000000094000-000000067F00004002000100000000098000__00000073AD3FE6B8\n000000067F00004002000100000000094000-000000067F00004002000100000000098000__000000914E3F38F0\n000000067F00004002000100000000094000-000000067F00004002000100000000098000__000000931B9A2710\n000000067F00004002000100000000098000-000000067F0000400200010000000009C000__0000005D2FFFFB38\n000000067F00004002000100000000098000-000000067F0000400200010000000009C000__00000073AD3FE6B8\n000000067F00004002000100000000098000-000000067F0000400200010000000009C000__000000914E3F38F0\n000000067F00004002000100000000098000-000000067F0000400200010000000009C000__000000931B9A2710\n000000067F0000400200010000000009B1A0-000000067F000040020001000000000A3B86__000000572A7C74A1-0000005CA7BBD6F9\n000000067F0000400200010000000009C000-000000067F000040020001000000000A0000__0000005D2FFFFB38\n000000067F0000400200010000000009C000-000000067F000040020001000000000A0000__00000073AD3FE6B8\n000000067F0000400200010000000009C000-000000067F000040020001000000000A0000__000000914E3F38F0\n000000067F0000400200010000000009C000-000000067F000040020001000000000A0000__000000931B9A2710\n000000067F000040020001000000000A0000-000000067F000040020001000000000A4000__0000005D2FFFFB38\n000000067F000040020001000000000A0000-000000067F000040020001000000000A4000__00000073AD3FE6B8\n000000067F000040020001000000000A0000-000000067F000040020001000000000A4000__000000914E3F38F0\n000000067F000040020001000000000A0000-000000067F000040020001000000000A4000__000000931B9A2710\n000000067F000040020001000000000A3B86-000000067F000040020001000000000AC549__000000572A7C74A1-0000005CA7BBD6F9\n000000067F000040020001000000000A4000-000000067F000040020001000000000A8000__0000005D2FFFFB38\n000000067F000040020001000000000A4000-000000067F000040020001000000000A8000__00000073AD3FE6B8\n000000067F000040020001000000000A4000-000000067F000040020001000000000A8000__000000914E3F38F0\n000000067F000040020001000000000A4000-000000067F000040020001000000000A8000__000000931B9A2710\n000000067F000040020001000000000A8000-000000067F000040020001000000000AC000__0000005D2FFFFB38\n000000067F000040020001000000000A8000-000000067F000040020001000000000AC000__00000073AD3FE6B8\n000000067F000040020001000000000A8000-000000067F000040020001000000000AC000__000000914E3F38F0\n000000067F000040020001000000000A8000-000000067F000040020001000000000AC000__000000931B9A2710\n000000067F000040020001000000000AC000-000000067F000040020001000000000B0000__0000005D2FFFFB38\n000000067F000040020001000000000AC000-000000067F000040020001000000000B0000__00000073AD3FE6B8\n000000067F000040020001000000000AC000-000000067F000040020001000000000B0000__000000914E3F38F0\n000000067F000040020001000000000AC000-000000067F000040020001000000000B0000__000000931B9A2710\n000000067F000040020001000000000AC549-000000067F000040020001000000000B4F06__000000572A7C74A1-0000005CA7BBD6F9\n000000067F000040020001000000000B0000-000000067F000040020001000000000B4000__0000005D2FFFFB38\n000000067F000040020001000000000B0000-000000067F000040020001000000000B4000__00000073AD3FE6B8\n000000067F000040020001000000000B0000-000000067F000040020001000000000B4000__000000914E3F38F0\n000000067F000040020001000000000B0000-000000067F000040020001000000000B4000__000000931B9A2710\n000000067F000040020001000000000B4000-000000067F000040020001000000000B8000__0000005D2FFFFB38\n000000067F000040020001000000000B4000-000000067F000040020001000000000B8000__00000073AD3FE6B8\n000000067F000040020001000000000B4000-000000067F000040020001000000000B8000__000000914E3F38F0\n000000067F000040020001000000000B4000-000000067F000040020001000000000B8000__000000931B9A2710\n000000067F000040020001000000000B4F06-000000067F000040020001000000000BD8C7__000000572A7C74A1-0000005CA7BBD6F9\n000000067F000040020001000000000B8000-000000067F000040020001000000000BC000__0000005D2FFFFB38\n000000067F000040020001000000000B8000-000000067F000040020001000000000BC000__00000073AD3FE6B8\n000000067F000040020001000000000B8000-000000067F000040020001000000000BC000__000000914E3F38F0\n000000067F000040020001000000000B8000-000000067F000040020001000000000BC000__000000931B9A2710\n000000067F000040020001000000000BC000-000000067F000040020001000000000C0000__0000005D2FFFFB38\n000000067F000040020001000000000BC000-000000067F000040020001000000000C0000__00000073AD3FE6B8\n000000067F000040020001000000000BC000-000000067F000040020001000000000C0000__000000914E3F38F0\n000000067F000040020001000000000BC000-000000067F000040020001000000000C0000__000000931B9A2710\n000000067F000040020001000000000BD8C7-000000067F000040020001000000000C62CB__000000572A7C74A1-0000005CA7BBD6F9\n000000067F000040020001000000000C0000-000000067F000040020001000000000C4000__0000005D2FFFFB38\n000000067F000040020001000000000C0000-000000067F000040020001000000000C4000__00000073AD3FE6B8\n000000067F000040020001000000000C0000-000000067F000040020001000000000C4000__000000914E3F38F0\n000000067F000040020001000000000C0000-000000067F000040020001000000000C4000__000000931B9A2710\n000000067F000040020001000000000C4000-000000067F000040020001000000000C8000__0000005D2FFFFB38\n000000067F000040020001000000000C4000-000000067F000040020001000000000C8000__00000073AD3FE6B8\n000000067F000040020001000000000C4000-000000067F000040020001000000000C8000__000000914E3F38F0\n000000067F000040020001000000000C4000-000000067F000040020001000000000C8000__000000931B9A2710\n000000067F000040020001000000000C62CB-000000067F000040020001000000000CECC9__000000572A7C74A1-0000005CA7BBD6F9\n000000067F000040020001000000000C8000-000000067F000040020001000000000CC000__0000005D2FFFFB38\n000000067F000040020001000000000C8000-000000067F000040020001000000000CC000__00000073AD3FE6B8\n000000067F000040020001000000000C8000-000000067F000040020001000000000CC000__000000914E3F38F0\n000000067F000040020001000000000C8000-000000067F000040020001000000000CC000__000000931B9A2710\n000000067F000040020001000000000CC000-000000067F000040020001000000000D0000__0000005D2FFFFB38\n000000067F000040020001000000000CC000-000000067F000040020001000000000D0000__00000073AD3FE6B8\n000000067F000040020001000000000CC000-000000067F000040020001000000000D0000__000000914E3F38F0\n000000067F000040020001000000000CC000-000000067F000040020001000000000D0000__000000931B9A2710\n000000067F000040020001000000000CECC9-000000067F000040020001000000000D76B8__000000572A7C74A1-0000005CA7BBD6F9\n000000067F000040020001000000000D0000-000000067F000040020001000000000D4000__0000005D2FFFFB38\n000000067F000040020001000000000D0000-000000067F000040020001000000000D4000__00000073AD3FE6B8\n000000067F000040020001000000000D0000-000000067F000040020001000000000D4000__000000914E3F38F0\n000000067F000040020001000000000D0000-000000067F000040020001000000000D4000__000000931B9A2710\n000000067F000040020001000000000D4000-000000067F000040020001000000000D8000__0000005D2FFFFB38\n000000067F000040020001000000000D4000-000000067F000040020001000000000D8000__00000073AD3FE6B8\n000000067F000040020001000000000D4000-000000067F000040020001000000000D8000__000000914E3F38F0\n000000067F000040020001000000000D4000-000000067F000040020001000000000D8000__000000931B9A2710\n000000067F000040020001000000000D76B8-000000067F000040020001000000000E0094__000000572A7C74A1-0000005CA7BBD6F9\n000000067F000040020001000000000D8000-000000067F000040020001000000000DC000__0000005D2FFFFB38\n000000067F000040020001000000000D8000-000000067F000040020001000000000DC000__00000073AD3FE6B8\n000000067F000040020001000000000D8000-000000067F000040020001000000000DC000__000000914E3F38F0\n000000067F000040020001000000000D8000-000000067F000040020001000000000DC000__000000931B9A2710\n000000067F000040020001000000000DC000-000000067F000040020001000000000E0000__0000005D2FFFFB38\n000000067F000040020001000000000DC000-000000067F000040020001000000000E0000__00000073AD3FE6B8\n000000067F000040020001000000000DC000-000000067F000040020001000000000E0000__000000914E3F38F0\n000000067F000040020001000000000DC000-000000067F000040020001000000000E0000__000000931B9A2710\n000000067F000040020001000000000E0000-000000067F000040020001000000000E4000__0000005D2FFFFB38\n000000067F000040020001000000000E0000-000000067F000040020001000000000E4000__00000073AD3FE6B8\n000000067F000040020001000000000E0000-000000067F000040020001000000000E4000__000000914E3F38F0\n000000067F000040020001000000000E0000-000000067F000040020001000000000E4000__000000931B9A2710\n000000067F000040020001000000000E0094-000000067F000040020001000000000E8A61__000000572A7C74A1-0000005CA7BBD6F9\n000000067F000040020001000000000E4000-000000067F000040020001000000000E8000__0000005D2FFFFB38\n000000067F000040020001000000000E4000-000000067F000040020001000000000E8000__00000073AD3FE6B8\n000000067F000040020001000000000E4000-000000067F000040020001000000000E8000__000000914E3F38F0\n000000067F000040020001000000000E4000-000000067F000040020001000000000E8000__000000931B9A2710\n000000067F000040020001000000000E8000-000000067F000040020001000000000EC000__0000005D2FFFFB38\n000000067F000040020001000000000E8000-000000067F000040020001000000000EC000__00000073AD3FE6B8\n000000067F000040020001000000000E8000-000000067F000040020001000000000EC000__000000914E3F38F0\n000000067F000040020001000000000E8000-000000067F000040020001000000000EC000__000000931B9A2710\n000000067F000040020001000000000E8A61-000000067F000040020001000000000F1423__000000572A7C74A1-0000005CA7BBD6F9\n000000067F000040020001000000000EC000-000000067F000040020001000000000F0000__0000005D2FFFFB38\n000000067F000040020001000000000EC000-000000067F000040020001000000000F0000__00000073AD3FE6B8\n000000067F000040020001000000000EC000-000000067F000040020001000000000F0000__000000914E3F38F0\n000000067F000040020001000000000EC000-000000067F000040020001000000000F0000__000000931B9A2710\n000000067F000040020001000000000F0000-000000067F000040020001000000000F4000__0000005D2FFFFB38\n000000067F000040020001000000000F0000-000000067F000040020001000000000F4000__00000073AD3FE6B8\n000000067F000040020001000000000F0000-000000067F000040020001000000000F4000__000000914E3F38F0\n000000067F000040020001000000000F0000-000000067F000040020001000000000F4000__000000931B9A2710\n000000067F000040020001000000000F1423-000000067F000040020001000000000F9DE5__000000572A7C74A1-0000005CA7BBD6F9\n000000067F000040020001000000000F4000-000000067F000040020001000000000F8000__0000005D2FFFFB38\n000000067F000040020001000000000F4000-000000067F000040020001000000000F8000__00000073AD3FE6B8\n000000067F000040020001000000000F4000-000000067F000040020001000000000F8000__000000914E3F38F0\n000000067F000040020001000000000F4000-000000067F000040020001000000000F8000__000000931B9A2710\n000000067F000040020001000000000F8000-000000067F000040020001000000000FC000__0000005D2FFFFB38\n000000067F000040020001000000000F8000-000000067F000040020001000000000FC000__00000073AD3FE6B8\n000000067F000040020001000000000F8000-000000067F000040020001000000000FC000__000000914E3F38F0\n000000067F000040020001000000000F8000-000000067F000040020001000000000FC000__000000931B9A2710\n000000067F000040020001000000000F9DE5-000000067F000040020001000000001027EC__000000572A7C74A1-0000005CA7BBD6F9\n000000067F000040020001000000000FC000-000000067F00004002000100000000100000__0000005D2FFFFB38\n000000067F000040020001000000000FC000-000000067F00004002000100000000100000__00000073AD3FE6B8\n000000067F000040020001000000000FC000-000000067F00004002000100000000100000__000000914E3F38F0\n000000067F000040020001000000000FC000-000000067F00004002000100000000100000__000000931B9A2710\n000000067F00004002000100000000100000-000000067F00004002000100000000104000__0000005D2FFFFB38\n000000067F00004002000100000000100000-000000067F00004002000100000000104000__00000073AD3FE6B8\n000000067F00004002000100000000100000-000000067F00004002000100000000104000__000000914E3F38F0\n000000067F00004002000100000000100000-000000067F00004002000100000000104000__000000931B9A2710\n000000067F000040020001000000001027EC-000000067F0000400200010000000010B1E9__000000572A7C74A1-0000005CA7BBD6F9\n000000067F00004002000100000000104000-000000067F00004002000100000000108000__0000005D2FFFFB38\n000000067F00004002000100000000104000-000000067F00004002000100000000108000__00000073AD3FE6B8\n000000067F00004002000100000000104000-000000067F00004002000100000000108000__000000914E3F38F0\n000000067F00004002000100000000104000-000000067F00004002000100000000108000__000000931B9A2710\n000000067F00004002000100000000108000-000000067F0000400200010000000010C000__0000005D2FFFFB38\n000000067F00004002000100000000108000-000000067F0000400200010000000010C000__00000073AD3FE6B8\n000000067F00004002000100000000108000-000000067F0000400200010000000010C000__000000914E3F38F0\n000000067F00004002000100000000108000-000000067F0000400200010000000010C000__000000931B9A2710\n000000067F0000400200010000000010B1E9-000000067F00004002000100000000113BDB__000000572A7C74A1-0000005CA7BBD6F9\n000000067F0000400200010000000010C000-000000067F00004002000100000000110000__0000005D2FFFFB38\n000000067F0000400200010000000010C000-000000067F00004002000100000000110000__00000073AD3FE6B8\n000000067F0000400200010000000010C000-000000067F00004002000100000000110000__000000914E3F38F0\n000000067F0000400200010000000010C000-000000067F00004002000100000000110000__000000931B9A2710\n000000067F00004002000100000000110000-000000067F00004002000100000000114000__0000005D2FFFFB38\n000000067F00004002000100000000110000-000000067F00004002000100000000114000__00000073AD3FE6B8\n000000067F00004002000100000000110000-000000067F00004002000100000000114000__000000914E3F38F0\n000000067F00004002000100000000110000-000000067F00004002000100000000114000__000000931B9A2710\n000000067F00004002000100000000113BDB-000000067F0000400200010000000011C5C3__000000572A7C74A1-0000005CA7BBD6F9\n000000067F00004002000100000000114000-000000067F00004002000100000000118000__0000005D2FFFFB38\n000000067F00004002000100000000114000-000000067F00004002000100000000118000__00000073AD3FE6B8\n000000067F00004002000100000000114000-000000067F00004002000100000000118000__000000914E3F38F0\n000000067F00004002000100000000114000-000000067F00004002000100000000118000__000000931B9A2710\n000000067F00004002000100000000118000-000000067F0000400200010000000011C000__0000005D2FFFFB38\n000000067F00004002000100000000118000-000000067F0000400200010000000011C000__00000073AD3FE6B8\n000000067F00004002000100000000118000-000000067F0000400200010000000011C000__000000914E3F38F0\n000000067F00004002000100000000118000-000000067F0000400200010000000011C000__000000931B9A2710\n000000067F0000400200010000000011C000-000000067F00004002000100000000120000__0000005D2FFFFB38\n000000067F0000400200010000000011C000-000000067F00004002000100000000120000__00000073AD3FE6B8\n000000067F0000400200010000000011C000-000000067F00004002000100000000120000__000000914E3F38F0\n000000067F0000400200010000000011C000-000000067F00004002000100000000120000__000000931B9A2710\n000000067F0000400200010000000011C5C3-000000067F00004002000100000000124F94__000000572A7C74A1-0000005CA7BBD6F9\n000000067F00004002000100000000120000-000000067F00004002000100000000124000__0000005D2FFFFB38\n000000067F00004002000100000000120000-000000067F00004002000100000000124000__00000073AD3FE6B8\n000000067F00004002000100000000120000-000000067F00004002000100000000124000__000000914E3F38F0\n000000067F00004002000100000000120000-000000067F00004002000100000000124000__000000931B9A2710\n000000067F00004002000100000000124000-000000067F00004002000100000000128000__0000005D2FFFFB38\n000000067F00004002000100000000124000-000000067F00004002000100000000128000__00000073AD3FE6B8\n000000067F00004002000100000000124000-000000067F00004002000100000000128000__000000914E3F38F0\n000000067F00004002000100000000124000-000000067F00004002000100000000128000__000000931B9A2710\n000000067F00004002000100000000124F94-000000067F0000400200010000000012D94F__000000572A7C74A1-0000005CA7BBD6F9\n000000067F00004002000100000000128000-000000067F0000400200010000000012C000__0000005D2FFFFB38\n000000067F00004002000100000000128000-000000067F0000400200010000000012C000__00000073AD3FE6B8\n000000067F00004002000100000000128000-000000067F0000400200010000000012C000__000000914E3F38F0\n000000067F00004002000100000000128000-000000067F0000400200010000000012C000__000000931B9A2710\n000000067F0000400200010000000012C000-000000067F00004002000100000000130000__0000005D2FFFFB38\n000000067F0000400200010000000012C000-000000067F00004002000100000000130000__00000073AD3FE6B8\n000000067F0000400200010000000012C000-000000067F00004002000100000000130000__000000914E3F38F0\n000000067F0000400200010000000012C000-000000067F00004002000100000000130000__000000931B9A2710\n000000067F0000400200010000000012D94F-000000067F00004002000100000000136318__000000572A7C74A1-0000005CA7BBD6F9\n000000067F00004002000100000000130000-000000067F00004002000100000000134000__0000005D2FFFFB38\n000000067F00004002000100000000130000-000000067F00004002000100000000134000__00000073AD3FE6B8\n000000067F00004002000100000000130000-000000067F00004002000100000000134000__000000914E3F38F0\n000000067F00004002000100000000130000-000000067F00004002000100000000134000__000000931B9A2710\n000000067F00004002000100000000134000-000000067F00004002000100000000138000__0000005D2FFFFB38\n000000067F00004002000100000000134000-000000067F00004002000100000000138000__00000073AD3FE6B8\n000000067F00004002000100000000134000-000000067F00004002000100000000138000__000000914E3F38F0\n000000067F00004002000100000000134000-000000067F00004002000100000000138000__000000931B9A2710\n000000067F00004002000100000000136318-000000067F0000400200010000000013ED01__000000572A7C74A1-0000005CA7BBD6F9\n000000067F00004002000100000000138000-000000067F0000400200010000000013C000__0000005D2FFFFB38\n000000067F00004002000100000000138000-000000067F0000400200010000000013C000__00000073AD3FE6B8\n000000067F00004002000100000000138000-000000067F0000400200010000000013C000__000000914E3F38F0\n000000067F00004002000100000000138000-000000067F0000400200010000000013C000__000000931B9A2710\n000000067F0000400200010000000013C000-000000067F00004002000100000000140000__0000005D2FFFFB38\n000000067F0000400200010000000013C000-000000067F00004002000100000000140000__00000073AD3FE6B8\n000000067F0000400200010000000013C000-000000067F00004002000100000000140000__000000914E3F38F0\n000000067F0000400200010000000013C000-000000067F00004002000100000000140000__000000931B9A2710\n000000067F0000400200010000000013ED01-000000067F000040020001000000001476ED__000000572A7C74A1-0000005CA7BBD6F9\n000000067F00004002000100000000140000-000000067F00004002000100000000144000__0000005D2FFFFB38\n000000067F00004002000100000000140000-000000067F00004002000100000000144000__00000073AD3FE6B8\n000000067F00004002000100000000140000-000000067F00004002000100000000144000__000000914E3F38F0\n000000067F00004002000100000000140000-000000067F00004002000100000000144000__000000931B9A2710\n000000067F00004002000100000000144000-000000067F00004002000100000000148000__0000005D2FFFFB38\n000000067F00004002000100000000144000-000000067F00004002000100000000148000__00000073AD3FE6B8\n000000067F00004002000100000000144000-000000067F00004002000100000000148000__000000914E3F38F0\n000000067F00004002000100000000144000-000000067F00004002000100000000148000__000000931B9A2710\n000000067F000040020001000000001476ED-000000067F000040020001000000001500D7__000000572A7C74A1-0000005CA7BBD6F9\n000000067F00004002000100000000148000-000000067F0000400200010000000014C000__0000005D2FFFFB38\n000000067F00004002000100000000148000-000000067F0000400200010000000014C000__00000073AD3FE6B8\n000000067F00004002000100000000148000-000000067F0000400200010000000014C000__000000914E3F38F0\n000000067F00004002000100000000148000-000000067F0000400200010000000014C000__000000931B9A2710\n000000067F0000400200010000000014C000-000000067F00004002000100000000150000__0000005D2FFFFB38\n000000067F0000400200010000000014C000-000000067F00004002000100000000150000__00000073AD3FE6B8\n000000067F0000400200010000000014C000-000000067F00004002000100000000150000__000000914E3F38F0\n000000067F0000400200010000000014C000-000000067F00004002000100000000150000__000000931B9A2710\n000000067F00004002000100000000150000-000000067F00004002000100000000154000__0000005D2FFFFB38\n000000067F00004002000100000000150000-000000067F00004002000100000000154000__00000073AD3FE6B8\n000000067F00004002000100000000150000-000000067F00004002000100000000154000__000000914E3F38F0\n000000067F00004002000100000000150000-000000067F00004002000100000000154000__000000931B9A2710\n000000067F000040020001000000001500D7-000000067F00004002000100000000158ABD__000000572A7C74A1-0000005CA7BBD6F9\n000000067F00004002000100000000154000-000000067F00004002000100000000158000__0000005D2FFFFB38\n000000067F00004002000100000000154000-000000067F00004002000100000000158000__00000073AD3FE6B8\n000000067F00004002000100000000154000-000000067F00004002000100000000158000__000000914E3F38F0\n000000067F00004002000100000000154000-000000067F00004002000100000000158000__000000931B9A2710\n000000067F00004002000100000000158000-000000067F0000400200010000000015C000__0000005D2FFFFB38\n000000067F00004002000100000000158000-000000067F0000400200010000000015C000__00000073AD3FE6B8\n000000067F00004002000100000000158000-000000067F0000400200010000000015C000__000000914E3F38F0\n000000067F00004002000100000000158000-000000067F0000400200010000000015C000__000000931B9A2710\n000000067F00004002000100000000158ABD-000000067F00004002000100000000161489__000000572A7C74A1-0000005CA7BBD6F9\n000000067F0000400200010000000015C000-000000067F00004002000100000000160000__0000005D2FFFFB38\n000000067F0000400200010000000015C000-000000067F00004002000100000000160000__00000073AD3FE6B8\n000000067F0000400200010000000015C000-000000067F00004002000100000000160000__000000914E3F38F0\n000000067F0000400200010000000015C000-000000067F00004002000100000000160000__000000931B9A2710\n000000067F00004002000100000000160000-000000067F00004002000100000000164000__0000005D2FFFFB38\n000000067F00004002000100000000160000-000000067F00004002000100000000164000__00000073AD3FE6B8\n000000067F00004002000100000000160000-000000067F00004002000100000000164000__000000914E3F38F0\n000000067F00004002000100000000160000-000000067F00004002000100000000164000__000000931B9A2710\n000000067F00004002000100000000161489-000000067F00004002000100000000169E43__000000572A7C74A1-0000005CA7BBD6F9\n000000067F00004002000100000000164000-000000067F00004002000100000000168000__0000005D2FFFFB38\n000000067F00004002000100000000164000-000000067F00004002000100000000168000__00000073AD3FE6B8\n000000067F00004002000100000000164000-000000067F00004002000100000000168000__000000914E3F38F0\n000000067F00004002000100000000164000-000000067F00004002000100000000168000__000000931B9A2710\n000000067F00004002000100000000168000-000000067F0000400200010000000016C000__0000005D2FFFFB38\n000000067F00004002000100000000168000-000000067F0000400200010000000016C000__00000073AD3FE6B8\n000000067F00004002000100000000168000-000000067F0000400200010000000016C000__000000914E3F38F0\n000000067F00004002000100000000168000-000000067F0000400200010000000016C000__000000931B9A2710\n000000067F00004002000100000000169E43-000000067F00004002000100000000172829__000000572A7C74A1-0000005CA7BBD6F9\n000000067F0000400200010000000016C000-000000067F00004002000100000000170000__0000005D2FFFFB38\n000000067F0000400200010000000016C000-000000067F00004002000100000000170000__00000073AD3FE6B8\n000000067F0000400200010000000016C000-000000067F00004002000100000000170000__000000914E3F38F0\n000000067F0000400200010000000016C000-000000067F00004002000100000000170000__000000931B9A2710\n000000067F00004002000100000000170000-000000067F00004002000100000000174000__0000005D2FFFFB38\n000000067F00004002000100000000170000-000000067F00004002000100000000174000__00000073AD3FE6B8\n000000067F00004002000100000000170000-000000067F00004002000100000000174000__000000914E3F38F0\n000000067F00004002000100000000170000-000000067F00004002000100000000174000__000000931B9A2710\n000000067F00004002000100000000172829-000000067F0000400200010000000017B215__000000572A7C74A1-0000005CA7BBD6F9\n000000067F00004002000100000000174000-000000067F00004002000100000000178000__0000005D2FFFFB38\n000000067F00004002000100000000174000-000000067F00004002000100000000178000__00000073AD3FE6B8\n000000067F00004002000100000000174000-000000067F00004002000100000000178000__000000914E3F38F0\n000000067F00004002000100000000174000-000000067F00004002000100000000178000__000000931B9A2710\n000000067F00004002000100000000178000-000000067F0000400200010000000017C000__0000005D2FFFFB38\n000000067F00004002000100000000178000-000000067F0000400200010000000017C000__00000073AD3FE6B8\n000000067F00004002000100000000178000-000000067F0000400200010000000017C000__000000914E3F38F0\n000000067F00004002000100000000178000-000000067F0000400200010000000017C000__000000931B9A2710\n000000067F0000400200010000000017B215-000000067F00004002000100000000183C02__000000572A7C74A1-0000005CA7BBD6F9\n000000067F0000400200010000000017C000-000000067F00004002000100000000180000__0000005D2FFFFB38\n000000067F0000400200010000000017C000-000000067F00004002000100000000180000__00000073AD3FE6B8\n000000067F0000400200010000000017C000-000000067F00004002000100000000180000__000000914E3F38F0\n000000067F0000400200010000000017C000-000000067F00004002000100000000180000__000000931B9A2710\n000000067F00004002000100000000180000-000000067F00004002000100000000184000__0000005D2FFFFB38\n000000067F00004002000100000000180000-000000067F00004002000100000000184000__00000073AD3FE6B8\n000000067F00004002000100000000180000-000000067F00004002000100000000184000__000000914E3F38F0\n000000067F00004002000100000000180000-000000067F00004002000100000000184000__000000931B9A2710\n000000067F00004002000100000000183C02-000000067F0000400200010000000018C5E0__000000572A7C74A1-0000005CA7BBD6F9\n000000067F00004002000100000000184000-000000067F00004002000100000000188000__0000005D2FFFFB38\n000000067F00004002000100000000184000-000000067F00004002000100000000188000__00000073AD3FE6B8\n000000067F00004002000100000000184000-000000067F00004002000100000000188000__000000914E3F38F0\n000000067F00004002000100000000184000-000000067F00004002000100000000188000__000000931B9A2710\n000000067F00004002000100000000188000-000000067F0000400200010000000018C000__0000005D2FFFFB38\n000000067F00004002000100000000188000-000000067F0000400200010000000018C000__00000073AD3FE6B8\n000000067F00004002000100000000188000-000000067F0000400200010000000018C000__000000914E3F38F0\n000000067F00004002000100000000188000-000000067F0000400200010000000018C000__000000931B9A2710\n000000067F0000400200010000000018C000-000000067F00004002000100000000190000__0000005D2FFFFB38\n000000067F0000400200010000000018C000-000000067F00004002000100000000190000__00000073AD3FE6B8\n000000067F0000400200010000000018C000-000000067F00004002000100000000190000__000000914E3F38F0\n000000067F0000400200010000000018C000-000000067F00004002000100000000190000__000000931B9A2710\n000000067F0000400200010000000018C5E0-000000067F00004002000100000000194FC7__000000572A7C74A1-0000005CA7BBD6F9\n000000067F00004002000100000000190000-000000067F00004002000100000000194000__0000005D2FFFFB38\n000000067F00004002000100000000190000-000000067F00004002000100000000194000__00000073AD3FE6B8\n000000067F00004002000100000000190000-000000067F00004002000100000000194000__000000914E3F38F0\n000000067F00004002000100000000190000-000000067F00004002000100000000194000__000000931B9A2710\n000000067F00004002000100000000194000-000000067F00004002000100000000198000__0000005D2FFFFB38\n000000067F00004002000100000000194000-000000067F00004002000100000000198000__00000073AD3FE6B8\n000000067F00004002000100000000194000-000000067F00004002000100000000198000__000000914E3F38F0\n000000067F00004002000100000000194000-000000067F00004002000100000000198000__000000931B9A2710\n000000067F00004002000100000000194FC7-000000067F0000400200010000000019D98D__000000572A7C74A1-0000005CA7BBD6F9\n000000067F00004002000100000000198000-000000067F0000400200010000000019C000__0000005D2FFFFB38\n000000067F00004002000100000000198000-000000067F0000400200010000000019C000__00000073AD3FE6B8\n000000067F00004002000100000000198000-000000067F0000400200010000000019C000__000000914E3F38F0\n000000067F00004002000100000000198000-000000067F0000400200010000000019C000__000000931B9A2710\n000000067F0000400200010000000019C000-000000067F000040020001000000001A0000__0000005D2FFFFB38\n000000067F0000400200010000000019C000-000000067F000040020001000000001A0000__00000073AD3FE6B8\n000000067F0000400200010000000019C000-000000067F000040020001000000001A0000__000000914E3F38F0\n000000067F0000400200010000000019C000-000000067F000040020001000000001A0000__000000931B9A2710\n000000067F0000400200010000000019D98D-000000067F000040020001000000001A6347__000000572A7C74A1-0000005CA7BBD6F9\n000000067F000040020001000000001A0000-000000067F000040020001000000001A4000__0000005D2FFFFB38\n000000067F000040020001000000001A0000-000000067F000040020001000000001A4000__00000073AD3FE6B8\n000000067F000040020001000000001A0000-000000067F000040020001000000001A4000__000000914E3F38F0\n000000067F000040020001000000001A0000-000000067F000040020001000000001A4000__000000931B9A2710\n000000067F000040020001000000001A4000-000000067F000040020001000000001A8000__0000005D2FFFFB38\n000000067F000040020001000000001A4000-000000067F000040020001000000001A8000__00000073AD3FE6B8\n000000067F000040020001000000001A4000-000000067F000040020001000000001A8000__000000914E3F38F0\n000000067F000040020001000000001A4000-000000067F000040020001000000001A8000__000000931B9A2710\n000000067F000040020001000000001A6347-000000067F000040020001000000001AED26__000000572A7C74A1-0000005CA7BBD6F9\n000000067F000040020001000000001A8000-000000067F000040020001000000001AC000__0000005D2FFFFB38\n000000067F000040020001000000001A8000-000000067F000040020001000000001AC000__00000073AD3FE6B8\n000000067F000040020001000000001A8000-000000067F000040020001000000001AC000__000000914E3F38F0\n000000067F000040020001000000001A8000-000000067F000040020001000000001AC000__000000931B9A2710\n000000067F000040020001000000001AC000-000000067F000040020001000000001B0000__0000005D2FFFFB38\n000000067F000040020001000000001AC000-000000067F000040020001000000001B0000__00000073AD3FE6B8\n000000067F000040020001000000001AC000-000000067F000040020001000000001B0000__000000914E3F38F0\n000000067F000040020001000000001AC000-000000067F000040020001000000001B0000__000000931B9A2710\n000000067F000040020001000000001AED26-000000067F000040020001000000001B770D__000000572A7C74A1-0000005CA7BBD6F9\n000000067F000040020001000000001B0000-000000067F000040020001000000001B4000__0000005D2FFFFB38\n000000067F000040020001000000001B0000-000000067F000040020001000000001B4000__00000073AD3FE6B8\n000000067F000040020001000000001B0000-000000067F000040020001000000001B4000__000000914E3F38F0\n000000067F000040020001000000001B0000-000000067F000040020001000000001B4000__000000931B9A2710\n000000067F000040020001000000001B4000-000000067F000040020001000000001B8000__0000005D2FFFFB38\n000000067F000040020001000000001B4000-000000067F000040020001000000001B8000__00000073AD3FE6B8\n000000067F000040020001000000001B4000-000000067F000040020001000000001B8000__000000914E3F38F0\n000000067F000040020001000000001B4000-000000067F000040020001000000001B8000__000000931B9A2710\n000000067F000040020001000000001B770D-000000067F000040020001000000001C00F6__000000572A7C74A1-0000005CA7BBD6F9\n000000067F000040020001000000001B8000-000000067F000040020001000000001BC000__0000005D2FFFFB38\n000000067F000040020001000000001B8000-000000067F000040020001000000001BC000__00000073AD3FE6B8\n000000067F000040020001000000001B8000-000000067F000040020001000000001BC000__000000914E3F38F0\n000000067F000040020001000000001B8000-000000067F000040020001000000001BC000__000000931B9A2710\n000000067F000040020001000000001BC000-000000067F000040020001000000001C0000__0000005D2FFFFB38\n000000067F000040020001000000001BC000-000000067F000040020001000000001C0000__00000073AD3FE6B8\n000000067F000040020001000000001BC000-000000067F000040020001000000001C0000__000000914E3F38F0\n000000067F000040020001000000001BC000-000000067F000040020001000000001C0000__000000931B9A2710\n000000067F000040020001000000001C0000-000000067F000040020001000000001C4000__0000005D2FFFFB38\n000000067F000040020001000000001C0000-000000067F000040020001000000001C4000__00000073AD3FE6B8\n000000067F000040020001000000001C0000-000000067F000040020001000000001C4000__000000914E3F38F0\n000000067F000040020001000000001C0000-000000067F000040020001000000001C4000__000000931B9A2710\n000000067F000040020001000000001C00F6-000000067F000040020001000000001C8ADD__000000572A7C74A1-0000005CA7BBD6F9\n000000067F000040020001000000001C4000-000000067F000040020001000000001C8000__0000005D2FFFFB38\n000000067F000040020001000000001C4000-000000067F000040020001000000001C8000__00000073AD3FE6B8\n000000067F000040020001000000001C4000-000000067F000040020001000000001C8000__000000914E3F38F0\n000000067F000040020001000000001C4000-000000067F000040020001000000001C8000__000000931B9A2710\n000000067F000040020001000000001C8000-000000067F000040020001000000001CC000__0000005D2FFFFB38\n000000067F000040020001000000001C8000-000000067F000040020001000000001CC000__00000073AD3FE6B8\n000000067F000040020001000000001C8000-000000067F000040020001000000001CC000__000000914E3F38F0\n000000067F000040020001000000001C8000-000000067F000040020001000000001CC000__000000931B9A2710\n000000067F000040020001000000001C8ADD-000000067F000040020001000000001D14BA__000000572A7C74A1-0000005CA7BBD6F9\n000000067F000040020001000000001CC000-000000067F000040020001000000001D0000__0000005D2FFFFB38\n000000067F000040020001000000001CC000-000000067F000040020001000000001D0000__00000073AD3FE6B8\n000000067F000040020001000000001CC000-000000067F000040020001000000001D0000__000000914E3F38F0\n000000067F000040020001000000001CC000-000000067F000040020001000000001D0000__000000931B9A2710\n000000067F000040020001000000001D0000-000000067F000040020001000000001D4000__0000005D2FFFFB38\n000000067F000040020001000000001D0000-000000067F000040020001000000001D4000__00000073AD3FE6B8\n000000067F000040020001000000001D0000-000000067F000040020001000000001D4000__000000914E3F38F0\n000000067F000040020001000000001D0000-000000067F000040020001000000001D4000__000000931B9A2710\n000000067F000040020001000000001D14BA-000000067F000040020001000000001D9E89__000000572A7C74A1-0000005CA7BBD6F9\n000000067F000040020001000000001D4000-000000067F000040020001000000001D8000__0000005D2FFFFB38\n000000067F000040020001000000001D4000-000000067F000040020001000000001D8000__00000073AD3FE6B8\n000000067F000040020001000000001D4000-000000067F000040020001000000001D8000__000000914E3F38F0\n000000067F000040020001000000001D4000-000000067F000040020001000000001D8000__000000931B9A2710\n000000067F000040020001000000001D8000-000000067F000040020001000000001DC000__0000005D2FFFFB38\n000000067F000040020001000000001D8000-000000067F000040020001000000001DC000__00000073AD3FE6B8\n000000067F000040020001000000001D8000-000000067F000040020001000000001DC000__000000914E3F38F0\n000000067F000040020001000000001D8000-000000067F000040020001000000001DC000__000000931B9A2710\n000000067F000040020001000000001D9E89-000000067F000040020001000000001E284E__000000572A7C74A1-0000005CA7BBD6F9\n000000067F000040020001000000001DC000-000000067F000040020001000000001E0000__0000005D2FFFFB38\n000000067F000040020001000000001DC000-000000067F000040020001000000001E0000__00000073AD3FE6B8\n000000067F000040020001000000001DC000-000000067F000040020001000000001E0000__000000914E3F38F0\n000000067F000040020001000000001DC000-000000067F000040020001000000001E0000__000000931B9A2710\n000000067F000040020001000000001E0000-000000067F000040020001000000001E4000__0000005D2FFFFB38\n000000067F000040020001000000001E0000-000000067F000040020001000000001E4000__00000073AD3FE6B8\n000000067F000040020001000000001E0000-000000067F000040020001000000001E4000__000000914E3F38F0\n000000067F000040020001000000001E0000-000000067F000040020001000000001E4000__000000931B9A2710\n000000067F000040020001000000001E284E-000000067F000040020001000000001EB231__000000572A7C74A1-0000005CA7BBD6F9\n000000067F000040020001000000001E4000-000000067F000040020001000000001E8000__0000005D2FFFFB38\n000000067F000040020001000000001E4000-000000067F000040020001000000001E8000__00000073AD3FE6B8\n000000067F000040020001000000001E4000-000000067F000040020001000000001E8000__000000914E3F38F0\n000000067F000040020001000000001E4000-000000067F000040020001000000001E8000__000000931B9A2710\n000000067F000040020001000000001E8000-000000067F000040020001000000001EC000__0000005D2FFFFB38\n000000067F000040020001000000001E8000-000000067F000040020001000000001EC000__00000073AD3FE6B8\n000000067F000040020001000000001E8000-000000067F000040020001000000001EC000__000000914E3F38F0\n000000067F000040020001000000001E8000-000000067F000040020001000000001EC000__000000931B9A2710\n000000067F000040020001000000001EB231-000000067F000040020001000000001F3C19__000000572A7C74A1-0000005CA7BBD6F9\n000000067F000040020001000000001EC000-000000067F000040020001000000001F0000__0000005D2FFFFB38\n000000067F000040020001000000001EC000-000000067F000040020001000000001F0000__00000073AD3FE6B8\n000000067F000040020001000000001EC000-000000067F000040020001000000001F0000__000000914E3F38F0\n000000067F000040020001000000001EC000-000000067F000040020001000000001F0000__000000931B9A2710\n000000067F000040020001000000001F0000-000000067F000040020001000000001F4000__0000005D2FFFFB38\n000000067F000040020001000000001F0000-000000067F000040020001000000001F4000__00000073AD3FE6B8\n000000067F000040020001000000001F0000-000000067F000040020001000000001F4000__000000914E3F38F0\n000000067F000040020001000000001F0000-000000067F000040020001000000001F4000__000000931B9A2710\n000000067F000040020001000000001F3C19-000000067F000040020001000000001FC608__000000572A7C74A1-0000005CA7BBD6F9\n000000067F000040020001000000001F4000-000000067F000040020001000000001F8000__0000005D2FFFFB38\n000000067F000040020001000000001F4000-000000067F000040020001000000001F8000__00000073AD3FE6B8\n000000067F000040020001000000001F4000-000000067F000040020001000000001F8000__000000914E3F38F0\n000000067F000040020001000000001F4000-000000067F000040020001000000001F8000__000000931B9A2710\n000000067F000040020001000000001F8000-000000067F000040020001000000001FC000__0000005D2FFFFB38\n000000067F000040020001000000001F8000-000000067F000040020001000000001FC000__00000073AD3FE6B8\n000000067F000040020001000000001F8000-000000067F000040020001000000001FC000__000000914E3F38F0\n000000067F000040020001000000001F8000-000000067F000040020001000000001FC000__000000931B9A2710\n000000067F000040020001000000001FC000-000000067F00004002000100000000200000__0000005D2FFFFB38\n000000067F000040020001000000001FC000-000000067F00004002000100000000200000__00000073AD3FE6B8\n000000067F000040020001000000001FC000-000000067F00004002000100000000200000__000000914E3F38F0\n000000067F000040020001000000001FC000-000000067F00004002000100000000200000__000000931B9A2710\n000000067F000040020001000000001FC608-000000067F00004002000100000000204FDF__000000572A7C74A1-0000005CA7BBD6F9\n000000067F00004002000100000000200000-000000067F00004002000100000000204000__0000005D2FFFFB38\n000000067F00004002000100000000200000-000000067F00004002000100000000204000__00000073AD3FE6B8\n000000067F00004002000100000000200000-000000067F00004002000100000000204000__000000914E3F38F0\n000000067F00004002000100000000200000-000000067F00004002000100000000204000__000000931B9A2710\n000000067F00004002000100000000204000-000000067F00004002000100000000208000__0000005D2FFFFB38\n000000067F00004002000100000000204000-000000067F00004002000100000000208000__00000073AD3FE6B8\n000000067F00004002000100000000204000-000000067F00004002000100000000208000__000000914E3F38F0\n000000067F00004002000100000000204000-000000067F00004002000100000000208000__000000931B9A2710\n000000067F00004002000100000000204FDF-000000067F0000400200010000000020D9BC__000000572A7C74A1-0000005CA7BBD6F9\n000000067F00004002000100000000208000-000000067F0000400200010000000020C000__0000005D2FFFFB38\n000000067F00004002000100000000208000-000000067F0000400200010000000020C000__00000073AD3FE6B8\n000000067F00004002000100000000208000-000000067F0000400200010000000020C000__000000914E3F38F0\n000000067F00004002000100000000208000-000000067F0000400200010000000020C000__000000931B9A2710\n000000067F0000400200010000000020C000-000000067F00004002000100000000210000__0000005D2FFFFB38\n000000067F0000400200010000000020C000-000000067F00004002000100000000210000__00000073AD3FE6B8\n000000067F0000400200010000000020C000-000000067F00004002000100000000210000__000000914E3F38F0\n000000067F0000400200010000000020C000-000000067F00004002000100000000210000__000000931B9A2710\n000000067F0000400200010000000020D9BC-000000067F0000400200010000000021638D__000000572A7C74A1-0000005CA7BBD6F9\n000000067F00004002000100000000210000-000000067F00004002000100000000214000__0000005D2FFFFB38\n000000067F00004002000100000000210000-000000067F00004002000100000000214000__00000073AD3FE6B8\n000000067F00004002000100000000210000-000000067F00004002000100000000214000__000000914E3F38F0\n000000067F00004002000100000000210000-000000067F00004002000100000000214000__000000931B9A2710\n000000067F00004002000100000000214000-000000067F00004002000100000000218000__0000005D2FFFFB38\n000000067F00004002000100000000214000-000000067F00004002000100000000218000__00000073AD3FE6B8\n000000067F00004002000100000000214000-000000067F00004002000100000000218000__000000914E3F38F0\n000000067F00004002000100000000214000-000000067F00004002000100000000218000__000000931B9A2710\n000000067F0000400200010000000021638D-000000067F0000400200010000000021ED51__000000572A7C74A1-0000005CA7BBD6F9\n000000067F00004002000100000000218000-000000067F0000400200010000000021C000__0000005D2FFFFB38\n000000067F00004002000100000000218000-000000067F0000400200010000000021C000__00000073AD3FE6B8\n000000067F00004002000100000000218000-000000067F0000400200010000000021C000__000000914E3F38F0\n000000067F00004002000100000000218000-000000067F0000400200010000000021C000__000000931B9A2710\n000000067F0000400200010000000021C000-000000067F00004002000100000000220000__0000005D2FFFFB38\n000000067F0000400200010000000021C000-000000067F00004002000100000000220000__00000073AD3FE6B8\n000000067F0000400200010000000021C000-000000067F00004002000100000000220000__000000914E3F38F0\n000000067F0000400200010000000021C000-000000067F00004002000100000000220000__000000931B9A2710\n000000067F0000400200010000000021ED51-000000067F0000400200010000000022773E__000000572A7C74A1-0000005CA7BBD6F9\n000000067F00004002000100000000220000-000000067F00004002000100000000224000__0000005D2FFFFB38\n000000067F00004002000100000000220000-000000067F00004002000100000000224000__00000073AD3FE6B8\n000000067F00004002000100000000220000-000000067F00004002000100000000224000__000000914E3F38F0\n000000067F00004002000100000000220000-000000067F00004002000100000000224000__000000931B9A2710\n000000067F00004002000100000000224000-000000067F00004002000100000000228000__0000005D2FFFFB38\n000000067F00004002000100000000224000-000000067F00004002000100000000228000__00000073AD3FE6B8\n000000067F00004002000100000000224000-000000067F00004002000100000000228000__000000914E3F38F0\n000000067F00004002000100000000224000-000000067F00004002000100000000228000__000000931B9A2710\n000000067F0000400200010000000022773E-000000067F00004002000100000000230129__000000572A7C74A1-0000005CA7BBD6F9\n000000067F00004002000100000000228000-000000067F0000400200010000000022C000__0000005D2FFFFB38\n000000067F00004002000100000000228000-000000067F0000400200010000000022C000__00000073AD3FE6B8\n000000067F00004002000100000000228000-000000067F0000400200010000000022C000__000000914E3F38F0\n000000067F00004002000100000000228000-000000067F0000400200010000000022C000__000000931B9A2710\n000000067F0000400200010000000022C000-000000067F00004002000100000000230000__0000005D2FFFFB38\n000000067F0000400200010000000022C000-000000067F00004002000100000000230000__00000073AD3FE6B8\n000000067F0000400200010000000022C000-000000067F00004002000100000000230000__000000914E3F38F0\n000000067F0000400200010000000022C000-000000067F00004002000100000000230000__000000931B9A2710\n000000067F00004002000100000000230000-000000067F00004002000100000000234000__0000005D2FFFFB38\n000000067F00004002000100000000230000-000000067F00004002000100000000234000__00000073AD3FE6B8\n000000067F00004002000100000000230000-000000067F00004002000100000000234000__000000914E3F38F0\n000000067F00004002000100000000230000-000000067F00004002000100000000234000__000000931B9A2710\n000000067F00004002000100000000230129-000000067F00004002000100000000238B15__000000572A7C74A1-0000005CA7BBD6F9\n000000067F00004002000100000000234000-000000067F00004002000100000000238000__0000005D2FFFFB38\n000000067F00004002000100000000234000-000000067F00004002000100000000238000__00000073AD3FE6B8\n000000067F00004002000100000000234000-000000067F00004002000100000000238000__000000914E3F38F0\n000000067F00004002000100000000234000-000000067F00004002000100000000238000__000000931B9A2710\n000000067F00004002000100000000238000-000000067F0000400200010000000023C000__0000005D2FFFFB38\n000000067F00004002000100000000238000-000000067F0000400200010000000023C000__00000073AD3FE6B8\n000000067F00004002000100000000238000-000000067F0000400200010000000023C000__000000914E3F38F0\n000000067F00004002000100000000238000-000000067F0000400200010000000023C000__000000931B9A2710\n000000067F00004002000100000000238B15-000000067F000040020001000000002414E7__000000572A7C74A1-0000005CA7BBD6F9\n000000067F0000400200010000000023C000-000000067F00004002000100000000240000__0000005D2FFFFB38\n000000067F0000400200010000000023C000-000000067F00004002000100000000240000__00000073AD3FE6B8\n000000067F0000400200010000000023C000-000000067F00004002000100000000240000__000000914E3F38F0\n000000067F0000400200010000000023C000-000000067F00004002000100000000240000__000000931B9A2710\n000000067F00004002000100000000240000-000000067F00004002000100000000244000__0000005D2FFFFB38\n000000067F00004002000100000000240000-000000067F00004002000100000000244000__00000073AD3FE6B8\n000000067F00004002000100000000240000-000000067F00004002000100000000244000__000000914E3F38F0\n000000067F00004002000100000000240000-000000067F00004002000100000000244000__000000931B9A2710\n000000067F000040020001000000002414E7-000000067F00004002000100000000249EC9__000000572A7C74A1-0000005CA7BBD6F9\n000000067F00004002000100000000244000-000000067F00004002000100000000248000__0000005D2FFFFB38\n000000067F00004002000100000000244000-000000067F00004002000100000000248000__00000073AD3FE6B8\n000000067F00004002000100000000244000-000000067F00004002000100000000248000__000000914E3F38F0\n000000067F00004002000100000000244000-000000067F00004002000100000000248000__000000931B9A2710\n000000067F00004002000100000000248000-000000067F0000400200010000000024C000__0000005D2FFFFB38\n000000067F00004002000100000000248000-000000067F0000400200010000000024C000__00000073AD3FE6B8\n000000067F00004002000100000000248000-000000067F0000400200010000000024C000__000000914E3F38F0\n000000067F00004002000100000000248000-000000067F0000400200010000000024C000__000000931B9A2710\n000000067F00004002000100000000249EC9-000000067F0000400200010000000025288A__000000572A7C74A1-0000005CA7BBD6F9\n000000067F0000400200010000000024C000-000000067F00004002000100000000250000__0000005D2FFFFB38\n000000067F0000400200010000000024C000-000000067F00004002000100000000250000__00000073AD3FE6B8\n000000067F0000400200010000000024C000-000000067F00004002000100000000250000__000000914E3F38F0\n000000067F0000400200010000000024C000-000000067F00004002000100000000250000__000000931B9A2710\n000000067F00004002000100000000250000-000000067F00004002000100000000254000__0000005D2FFFFB38\n000000067F00004002000100000000250000-000000067F00004002000100000000254000__00000073AD3FE6B8\n000000067F00004002000100000000250000-000000067F00004002000100000000254000__000000914E3F38F0\n000000067F00004002000100000000250000-000000067F00004002000100000000254000__000000931B9A2710\n000000067F0000400200010000000025288A-000000067F0000400200010000000025B24E__000000572A7C74A1-0000005CA7BBD6F9\n000000067F00004002000100000000254000-000000067F00004002000100000000258000__0000005D2FFFFB38\n000000067F00004002000100000000254000-000000067F00004002000100000000258000__00000073AD3FE6B8\n000000067F00004002000100000000254000-000000067F00004002000100000000258000__000000914E3F38F0\n000000067F00004002000100000000254000-000000067F00004002000100000000258000__000000931B9A2710\n000000067F00004002000100000000258000-000000067F0000400200010000000025C000__0000005D2FFFFB38\n000000067F00004002000100000000258000-000000067F0000400200010000000025C000__00000073AD3FE6B8\n000000067F00004002000100000000258000-000000067F0000400200010000000025C000__000000914E3F38F0\n000000067F00004002000100000000258000-000000067F0000400200010000000025C000__000000931B9A2710\n000000067F0000400200010000000025B24E-000000067F00004002000100000000263C37__000000572A7C74A1-0000005CA7BBD6F9\n000000067F0000400200010000000025C000-000000067F00004002000100000000260000__0000005D2FFFFB38\n000000067F0000400200010000000025C000-000000067F00004002000100000000260000__00000073AD3FE6B8\n000000067F0000400200010000000025C000-000000067F00004002000100000000260000__000000914E3F38F0\n000000067F0000400200010000000025C000-000000067F00004002000100000000260000__000000931B9A2710\n000000067F00004002000100000000260000-000000067F00004002000100000000264000__0000005D2FFFFB38\n000000067F00004002000100000000260000-000000067F00004002000100000000264000__00000073AD3FE6B8\n000000067F00004002000100000000260000-000000067F00004002000100000000264000__000000914E3F38F0\n000000067F00004002000100000000260000-000000067F00004002000100000000264000__000000931B9A2710\n000000067F00004002000100000000263C37-000000067F0000400200010000000026C620__000000572A7C74A1-0000005CA7BBD6F9\n000000067F00004002000100000000264000-000000067F00004002000100000000268000__0000005D2FFFFB38\n000000067F00004002000100000000264000-000000067F00004002000100000000268000__00000073AD3FE6B8\n000000067F00004002000100000000264000-000000067F00004002000100000000268000__000000914E3F38F0\n000000067F00004002000100000000264000-000000067F00004002000100000000268000__000000931B9A2710\n000000067F00004002000100000000268000-000000067F0000400200010000000026C000__0000005D2FFFFB38\n000000067F00004002000100000000268000-000000067F0000400200010000000026C000__00000073AD3FE6B8\n000000067F00004002000100000000268000-000000067F0000400200010000000026C000__000000914E3F38F0\n000000067F00004002000100000000268000-000000067F0000400200010000000026C000__000000931B9A2710\n000000067F0000400200010000000026C000-000000067F00004002000100000000270000__0000005D2FFFFB38\n000000067F0000400200010000000026C000-000000067F00004002000100000000270000__00000073AD3FE6B8\n000000067F0000400200010000000026C000-000000067F00004002000100000000270000__000000914E3F38F0\n000000067F0000400200010000000026C000-000000067F00004002000100000000270000__000000931B9A2710\n000000067F0000400200010000000026C620-000000067F00004002000100000000275003__000000572A7C74A1-0000005CA7BBD6F9\n000000067F00004002000100000000270000-000000067F00004002000100000000274000__0000005D2FFFFB38\n000000067F00004002000100000000270000-000000067F00004002000100000000274000__00000073AD3FE6B8\n000000067F00004002000100000000270000-000000067F00004002000100000000274000__000000914E3F38F0\n000000067F00004002000100000000270000-000000067F00004002000100000000274000__000000931B9A2710\n000000067F00004002000100000000274000-000000067F00004002000100000000278000__0000005D2FFFFB38\n000000067F00004002000100000000274000-000000067F00004002000100000000278000__00000073AD3FE6B8\n000000067F00004002000100000000274000-000000067F00004002000100000000278000__000000914E3F38F0\n000000067F00004002000100000000274000-000000067F00004002000100000000278000__000000931B9A2710\n000000067F00004002000100000000275003-000000067F0000400200010000000027D9DA__000000572A7C74A1-0000005CA7BBD6F9\n000000067F00004002000100000000278000-000000067F0000400200010000000027C000__0000005D2FFFFB38\n000000067F00004002000100000000278000-000000067F0000400200010000000027C000__00000073AD3FE6B8\n000000067F00004002000100000000278000-000000067F0000400200010000000027C000__000000914E3F38F0\n000000067F00004002000100000000278000-000000067F0000400200010000000027C000__000000931B9A2710\n000000067F0000400200010000000027C000-000000067F00004002000100000000280000__0000005D2FFFFB38\n000000067F0000400200010000000027C000-000000067F00004002000100000000280000__00000073AD3FE6B8\n000000067F0000400200010000000027C000-000000067F00004002000100000000280000__000000914E3F38F0\n000000067F0000400200010000000027C000-000000067F00004002000100000000280000__000000931B9A2710\n000000067F0000400200010000000027D9DA-000000067F000040020001000000002863B3__000000572A7C74A1-0000005CA7BBD6F9\n000000067F00004002000100000000280000-000000067F00004002000100000000284000__0000005D2FFFFB38\n000000067F00004002000100000000280000-000000067F00004002000100000000284000__00000073AD3FE6B8\n000000067F00004002000100000000280000-000000067F00004002000100000000284000__000000914E3F38F0\n000000067F00004002000100000000280000-000000067F00004002000100000000284000__000000931B9A2710\n000000067F00004002000100000000284000-000000067F00004002000100000000288000__0000005D2FFFFB38\n000000067F00004002000100000000284000-000000067F00004002000100000000288000__00000073AD3FE6B8\n000000067F00004002000100000000284000-000000067F00004002000100000000288000__000000914E3F38F0\n000000067F00004002000100000000284000-000000067F00004002000100000000288000__000000931B9A2710\n000000067F000040020001000000002863B3-000000067F0000400200010000000028ED6E__000000572A7C74A1-0000005CA7BBD6F9\n000000067F00004002000100000000288000-000000067F0000400200010000000028C000__0000005D2FFFFB38\n000000067F00004002000100000000288000-000000067F0000400200010000000028C000__00000073AD3FE6B8\n000000067F00004002000100000000288000-000000067F0000400200010000000028C000__000000914E3F38F0\n000000067F00004002000100000000288000-000000067F0000400200010000000028C000__000000931B9A2710\n000000067F0000400200010000000028C000-000000067F00004002000100000000290000__0000005D2FFFFB38\n000000067F0000400200010000000028C000-000000067F00004002000100000000290000__00000073AD3FE6B8\n000000067F0000400200010000000028C000-000000067F00004002000100000000290000__000000914E3F38F0\n000000067F0000400200010000000028C000-000000067F00004002000100000000290000__000000931B9A2710\n000000067F0000400200010000000028ED6E-000000067F00004002000100000000297734__000000572A7C74A1-0000005CA7BBD6F9\n000000067F00004002000100000000290000-000000067F00004002000100000000294000__0000005D2FFFFB38\n000000067F00004002000100000000290000-000000067F00004002000100000000294000__00000073AD3FE6B8\n000000067F00004002000100000000290000-000000067F00004002000100000000294000__000000914E3F38F0\n000000067F00004002000100000000290000-000000067F00004002000100000000294000__000000931B9A2710\n000000067F00004002000100000000294000-000000067F00004002000100000000298000__0000005D2FFFFB38\n000000067F00004002000100000000294000-000000067F00004002000100000000298000__00000073AD3FE6B8\n000000067F00004002000100000000294000-000000067F00004002000100000000298000__000000914E3F38F0\n000000067F00004002000100000000294000-000000067F00004002000100000000298000__000000931B9A2710\n000000067F00004002000100000000297734-000000067F000040020001000000002A0126__000000572A7C74A1-0000005CA7BBD6F9\n000000067F00004002000100000000298000-000000067F0000400200010000000029C000__0000005D2FFFFB38\n000000067F00004002000100000000298000-000000067F0000400200010000000029C000__00000073AD3FE6B8\n000000067F00004002000100000000298000-000000067F0000400200010000000029C000__000000914E3F38F0\n000000067F00004002000100000000298000-000000067F0000400200010000000029C000__000000931B9A2710\n000000067F0000400200010000000029C000-000000067F000040020001000000002A0000__0000005D2FFFFB38\n000000067F0000400200010000000029C000-000000067F000040020001000000002A0000__00000073AD3FE6B8\n000000067F0000400200010000000029C000-000000067F000040020001000000002A0000__000000914E3F38F0\n000000067F0000400200010000000029C000-000000067F000040020001000000002A0000__000000931B9A2710\n000000067F000040020001000000002A0000-000000067F000040020001000000002A4000__0000005D2FFFFB38\n000000067F000040020001000000002A0000-000000067F000040020001000000002A4000__00000073AD3FE6B8\n000000067F000040020001000000002A0000-000000067F000040020001000000002A4000__000000914E3F38F0\n000000067F000040020001000000002A0000-000000067F000040020001000000002A4000__000000931B9A2710\n000000067F000040020001000000002A0126-000000067F000040020001000000002A8B19__000000572A7C74A1-0000005CA7BBD6F9\n000000067F000040020001000000002A4000-000000067F000040020001000000002A8000__0000005D2FFFFB38\n000000067F000040020001000000002A4000-000000067F000040020001000000002A8000__00000073AD3FE6B8\n000000067F000040020001000000002A4000-000000067F000040020001000000002A8000__000000914E3F38F0\n000000067F000040020001000000002A4000-000000067F000040020001000000002A8000__000000931B9A2710\n000000067F000040020001000000002A8000-000000067F000040020001000000002AC000__0000005D2FFFFB38\n000000067F000040020001000000002A8000-000000067F000040020001000000002AC000__00000073AD3FE6B8\n000000067F000040020001000000002A8000-000000067F000040020001000000002AC000__000000914E3F38F0\n000000067F000040020001000000002A8000-000000067F000040020001000000002AC000__000000931B9A2710\n000000067F000040020001000000002A8B19-000000067F000040020001000000002B1501__000000572A7C74A1-0000005CA7BBD6F9\n000000067F000040020001000000002AC000-000000067F000040020001000000002B0000__0000005D2FFFFB38\n000000067F000040020001000000002AC000-000000067F000040020001000000002B0000__00000073AD3FE6B8\n000000067F000040020001000000002AC000-000000067F000040020001000000002B0000__000000914E3F38F0\n000000067F000040020001000000002AC000-000000067F000040020001000000002B0000__000000931B9A2710\n000000067F000040020001000000002B0000-000000067F000040020001000000002B4000__0000005D2FFFFB38\n000000067F000040020001000000002B0000-000000067F000040020001000000002B4000__00000073AD3FE6B8\n000000067F000040020001000000002B0000-000000067F000040020001000000002B4000__000000914E3F38F0\n000000067F000040020001000000002B0000-000000067F000040020001000000002B4000__000000931B9A2710\n000000067F000040020001000000002B1501-000000067F000040020001000000002B9EDA__000000572A7C74A1-0000005CA7BBD6F9\n000000067F000040020001000000002B4000-000000067F000040020001000000002B8000__0000005D2FFFFB38\n000000067F000040020001000000002B4000-000000067F000040020001000000002B8000__00000073AD3FE6B8\n000000067F000040020001000000002B4000-000000067F000040020001000000002B8000__000000914E3F38F0\n000000067F000040020001000000002B4000-000000067F000040020001000000002B8000__000000931B9A2710\n000000067F000040020001000000002B8000-000000067F000040020001000000002BC000__0000005D2FFFFB38\n000000067F000040020001000000002B8000-000000067F000040020001000000002BC000__00000073AD3FE6B8\n000000067F000040020001000000002B8000-000000067F000040020001000000002BC000__000000914E3F38F0\n000000067F000040020001000000002B8000-000000067F000040020001000000002BC000__000000931B9A2710\n000000067F000040020001000000002B9EDA-000000067F000040020001000000002C28A8__000000572A7C74A1-0000005CA7BBD6F9\n000000067F000040020001000000002BC000-000000067F000040020001000000002C0000__0000005D2FFFFB38\n000000067F000040020001000000002BC000-000000067F000040020001000000002C0000__00000073AD3FE6B8\n000000067F000040020001000000002BC000-000000067F000040020001000000002C0000__000000914E3F38F0\n000000067F000040020001000000002BC000-000000067F000040020001000000002C0000__000000931B9A2710\n000000067F000040020001000000002C0000-000000067F000040020001000000002C4000__0000005D2FFFFB38\n000000067F000040020001000000002C0000-000000067F000040020001000000002C4000__00000073AD3FE6B8\n000000067F000040020001000000002C0000-000000067F000040020001000000002C4000__000000914E3F38F0\n000000067F000040020001000000002C0000-000000067F000040020001000000002C4000__000000931B9A2710\n000000067F000040020001000000002C28A8-000000067F000040020001000000002CB271__000000572A7C74A1-0000005CA7BBD6F9\n000000067F000040020001000000002C4000-000000067F000040020001000000002C8000__0000005D2FFFFB38\n000000067F000040020001000000002C4000-000000067F000040020001000000002C8000__00000073AD3FE6B8\n000000067F000040020001000000002C4000-000000067F000040020001000000002C8000__000000914E3F38F0\n000000067F000040020001000000002C4000-000000067F000040020001000000002C8000__000000931B9A2710\n000000067F000040020001000000002C8000-000000067F000040020001000000002CC000__0000005D2FFFFB38\n000000067F000040020001000000002C8000-000000067F000040020001000000002CC000__00000073AD3FE6B8\n000000067F000040020001000000002C8000-000000067F000040020001000000002CC000__000000914E3F38F0\n000000067F000040020001000000002C8000-000000067F000040020001000000002CC000__000000931B9A2710\n000000067F000040020001000000002CB271-000000067F000040020001000000002D3C3E__000000572A7C74A1-0000005CA7BBD6F9\n000000067F000040020001000000002CC000-000000067F000040020001000000002D0000__0000005D2FFFFB38\n000000067F000040020001000000002CC000-000000067F000040020001000000002D0000__00000073AD3FE6B8\n000000067F000040020001000000002CC000-000000067F000040020001000000002D0000__000000914E3F38F0\n000000067F000040020001000000002CC000-000000067F000040020001000000002D0000__000000931B9A2710\n000000067F000040020001000000002D0000-000000067F000040020001000000002D4000__0000005D2FFFFB38\n000000067F000040020001000000002D0000-000000067F000040020001000000002D4000__00000073AD3FE6B8\n000000067F000040020001000000002D0000-000000067F000040020001000000002D4000__000000914E3F38F0\n000000067F000040020001000000002D0000-000000067F000040020001000000002D4000__000000931B9A2710\n000000067F000040020001000000002D3C3E-000000067F000040020001000000002DC636__000000572A7C74A1-0000005CA7BBD6F9\n000000067F000040020001000000002D4000-000000067F000040020001000000002D8000__0000005D2FFFFB38\n000000067F000040020001000000002D4000-000000067F000040020001000000002D8000__00000073AD3FE6B8\n000000067F000040020001000000002D4000-000000067F000040020001000000002D8000__000000914E3F38F0\n000000067F000040020001000000002D4000-000000067F000040020001000000002D8000__000000931B9A2710\n000000067F000040020001000000002D8000-000000067F000040020001000000002DC000__0000005D2FFFFB38\n000000067F000040020001000000002D8000-000000067F000040020001000000002DC000__00000073AD3FE6B8\n000000067F000040020001000000002D8000-000000067F000040020001000000002DC000__000000914E3F38F0\n000000067F000040020001000000002D8000-000000067F000040020001000000002DC000__000000931B9A2710\n000000067F000040020001000000002DC000-000000067F000040020001000000002E0000__0000005D2FFFFB38\n000000067F000040020001000000002DC000-000000067F000040020001000000002E0000__00000073AD3FE6B8\n000000067F000040020001000000002DC000-000000067F000040020001000000002E0000__000000914E3F38F0\n000000067F000040020001000000002DC000-000000067F000040020001000000002E0000__000000931B9A2710\n000000067F000040020001000000002DC636-000000067F000040020001000000002E5020__000000572A7C74A1-0000005CA7BBD6F9\n000000067F000040020001000000002E0000-000000067F000040020001000000002E4000__0000005D2FFFFB38\n000000067F000040020001000000002E0000-000000067F000040020001000000002E4000__00000073AD3FE6B8\n000000067F000040020001000000002E0000-000000067F000040020001000000002E4000__000000914E3F38F0\n000000067F000040020001000000002E0000-000000067F000040020001000000002E4000__000000931B9A2710\n000000067F000040020001000000002E4000-000000067F000040020001000000002E8000__0000005D2FFFFB38\n000000067F000040020001000000002E4000-000000067F000040020001000000002E8000__00000073AD3FE6B8\n000000067F000040020001000000002E4000-000000067F000040020001000000002E8000__000000914E3F38F0\n000000067F000040020001000000002E4000-000000067F000040020001000000002E8000__000000931B9A2710\n000000067F000040020001000000002E5020-000000067F000040020001000000002EDA05__000000572A7C74A1-0000005CA7BBD6F9\n000000067F000040020001000000002E8000-000000067F000040020001000000002EC000__0000005D2FFFFB38\n000000067F000040020001000000002E8000-000000067F000040020001000000002EC000__00000073AD3FE6B8\n000000067F000040020001000000002E8000-000000067F000040020001000000002EC000__000000914E3F38F0\n000000067F000040020001000000002E8000-000000067F000040020001000000002EC000__000000931B9A2710\n000000067F000040020001000000002EC000-000000067F000040020001000000002F0000__0000005D2FFFFB38\n000000067F000040020001000000002EC000-000000067F000040020001000000002F0000__00000073AD3FE6B8\n000000067F000040020001000000002EC000-000000067F000040020001000000002F0000__000000914E3F38F0\n000000067F000040020001000000002EC000-000000067F000040020001000000002F0000__000000931B9A2710\n000000067F000040020001000000002EDA05-000000067F000040020001000000002F63D8__000000572A7C74A1-0000005CA7BBD6F9\n000000067F000040020001000000002F0000-000000067F000040020001000000002F4000__0000005D2FFFFB38\n000000067F000040020001000000002F0000-000000067F000040020001000000002F4000__00000073AD3FE6B8\n000000067F000040020001000000002F0000-000000067F000040020001000000002F4000__000000914E3F38F0\n000000067F000040020001000000002F0000-000000067F000040020001000000002F4000__000000931B9A2710\n000000067F000040020001000000002F4000-000000067F000040020001000000002F8000__0000005D2FFFFB38\n000000067F000040020001000000002F4000-000000067F000040020001000000002F8000__00000073AD3FE6B8\n000000067F000040020001000000002F4000-000000067F000040020001000000002F8000__000000914E3F38F0\n000000067F000040020001000000002F4000-000000067F000040020001000000002F8000__000000931B9A2710\n000000067F000040020001000000002F63D8-030000000000000000000000000000000002__000000572A7C74A1-0000005CA7BBD6F9\n000000067F000040020001000000002F8000-000000067F000040020001000000002FC000__0000005D2FFFFB38\n000000067F000040020001000000002F8000-000000067F000040020001000000002FC000__00000073AD3FE6B8\n000000067F000040020001000000002F8000-000000067F000040020001000000002FC000__000000914E3F38F0\n000000067F000040020001000000002F8000-000000067F000040020001000000002FC000__000000931B9A2710\n000000067F000040020001000000002FC000-000000067F00004002000100000000300000__0000005D2FFFFB38\n000000067F000040020001000000002FC000-000000067F00004002000100000000300000__00000073AD3FE6B8\n000000067F000040020001000000002FC000-000000067F00004002000100000000300000__000000914E3F38F0\n000000067F000040020001000000002FC000-000000067F00004002000100000000300000__000000931B9A2710\n000000067F00004002000100000000300000-000000067F00004002000100000000304000__0000005D2FFFFB38\n000000067F00004002000100000000300000-000000067F00004002000100000000304000__00000073AD3FE6B8\n000000067F00004002000100000000300000-000000067F00004002000100000000304000__000000914E3F38F0\n000000067F00004002000100000000300000-000000067F00004002000100000000304000__000000931B9A2710\n000000067F0000400200010000000030067A-000000067F0000400200010000000030903C__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000304000-000000067F00004002000100000000308000__0000005D2FFFFB38\n000000067F00004002000100000000304000-000000067F00004002000100000000308000__00000073AD3FE6B8\n000000067F00004002000100000000304000-000000067F00004002000100000000308000__000000914E3F38F0\n000000067F00004002000100000000304000-000000067F00004002000100000000308000__000000931B9A2710\n000000067F00004002000100000000308000-000000067F0000400200010000000030C000__0000005D2FFFFB38\n000000067F00004002000100000000308000-000000067F0000400200010000000030C000__00000073AD3FE6B8\n000000067F00004002000100000000308000-000000067F0000400200010000000030C000__000000914E3F38F0\n000000067F00004002000100000000308000-000000067F0000400200010000000030C000__000000931B9A2710\n000000067F0000400200010000000030903C-000000067F00004002000100000000311A14__0000005CA7BBD6F9-000000739A8D1299\n000000067F0000400200010000000030C000-000000067F00004002000100000000310000__0000005D2FFFFB38\n000000067F0000400200010000000030C000-000000067F00004002000100000000310000__00000073AD3FE6B8\n000000067F0000400200010000000030C000-000000067F00004002000100000000310000__000000914E3F38F0\n000000067F0000400200010000000030C000-000000067F00004002000100000000310000__000000931B9A2710\n000000067F00004002000100000000310000-000000067F00004002000100000000314000__0000005D2FFFFB38\n000000067F00004002000100000000310000-000000067F00004002000100000000314000__00000073AD3FE6B8\n000000067F00004002000100000000310000-000000067F00004002000100000000314000__000000914E3F38F0\n000000067F00004002000100000000310000-000000067F00004002000100000000314000__000000931B9A2710\n000000067F00004002000100000000311A14-000000067F0000400200010000000031A404__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000314000-000000067F00004002000100000000318000__0000005D2FFFFB38\n000000067F00004002000100000000314000-000000067F00004002000100000000318000__00000073AD3FE6B8\n000000067F00004002000100000000314000-000000067F00004002000100000000318000__000000914E3F38F0\n000000067F00004002000100000000314000-000000067F00004002000100000000318000__000000931B9A2710\n000000067F00004002000100000000318000-000000067F0000400200010000000031C000__0000005D2FFFFB38\n000000067F00004002000100000000318000-000000067F0000400200010000000031C000__00000073AD3FE6B8\n000000067F00004002000100000000318000-000000067F0000400200010000000031C000__000000914E3F38F0\n000000067F00004002000100000000318000-000000067F0000400200010000000031C000__000000931B9A2710\n000000067F0000400200010000000031A404-000000067F00004002000100000000322DE1__0000005CA7BBD6F9-000000739A8D1299\n000000067F0000400200010000000031C000-000000067F00004002000100000000320000__0000005D2FFFFB38\n000000067F0000400200010000000031C000-000000067F00004002000100000000320000__00000073AD3FE6B8\n000000067F0000400200010000000031C000-000000067F00004002000100000000320000__000000914E3F38F0\n000000067F0000400200010000000031C000-000000067F00004002000100000000320000__000000931B9A2710\n000000067F00004002000100000000320000-000000067F00004002000100000000324000__0000005D2FFFFB38\n000000067F00004002000100000000320000-000000067F00004002000100000000324000__00000073AD3FE6B8\n000000067F00004002000100000000320000-000000067F00004002000100000000324000__000000914E3F38F0\n000000067F00004002000100000000320000-000000067F00004002000100000000324000__000000931B9A2710\n000000067F00004002000100000000322DE1-000000067F0000400200010000000032B7D4__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000324000-000000067F00004002000100000000328000__0000005D2FFFFB38\n000000067F00004002000100000000324000-000000067F00004002000100000000328000__00000073AD3FE6B8\n000000067F00004002000100000000324000-000000067F00004002000100000000328000__000000914E3F38F0\n000000067F00004002000100000000324000-000000067F00004002000100000000328000__000000931B9A2710\n000000067F00004002000100000000328000-000000067F0000400200010000000032C000__0000005D2FFFFB38\n000000067F00004002000100000000328000-000000067F0000400200010000000032C000__00000073AD3FE6B8\n000000067F00004002000100000000328000-000000067F0000400200010000000032C000__000000914E3F38F0\n000000067F00004002000100000000328000-000000067F0000400200010000000032C000__000000931B9A2710\n000000067F0000400200010000000032B7D4-000000067F000040020001000000003341AB__0000005CA7BBD6F9-000000739A8D1299\n000000067F0000400200010000000032C000-000000067F00004002000100000000330000__0000005D2FFFFB38\n000000067F0000400200010000000032C000-000000067F00004002000100000000330000__00000073AD3FE6B8\n000000067F0000400200010000000032C000-000000067F00004002000100000000330000__000000914E3F38F0\n000000067F0000400200010000000032C000-000000067F00004002000100000000330000__000000931B9A2710\n000000067F00004002000100000000330000-000000067F00004002000100000000334000__0000005D2FFFFB38\n000000067F00004002000100000000330000-000000067F00004002000100000000334000__00000073AD3FE6B8\n000000067F00004002000100000000330000-000000067F00004002000100000000334000__000000914E3F38F0\n000000067F00004002000100000000330000-000000067F00004002000100000000334000__000000931B9A2710\n000000067F00004002000100000000334000-000000067F00004002000100000000338000__0000005D2FFFFB38\n000000067F00004002000100000000334000-000000067F00004002000100000000338000__00000073AD3FE6B8\n000000067F00004002000100000000334000-000000067F00004002000100000000338000__000000914E3F38F0\n000000067F00004002000100000000334000-000000067F00004002000100000000338000__000000931B9A2710\n000000067F000040020001000000003341AB-000000067F0000400200010000000033CB80__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000338000-000000067F0000400200010000000033C000__0000005D2FFFFB38\n000000067F00004002000100000000338000-000000067F0000400200010000000033C000__00000073AD3FE6B8\n000000067F00004002000100000000338000-000000067F0000400200010000000033C000__000000914E3F38F0\n000000067F00004002000100000000338000-000000067F0000400200010000000033C000__000000931B9A2710\n000000067F0000400200010000000033C000-000000067F00004002000100000000340000__0000005D2FFFFB38\n000000067F0000400200010000000033C000-000000067F00004002000100000000340000__00000073AD3FE6B8\n000000067F0000400200010000000033C000-000000067F00004002000100000000340000__000000914E3F38F0\n000000067F0000400200010000000033C000-000000067F00004002000100000000340000__000000931B9A2710\n000000067F0000400200010000000033CB80-000000067F0000400200010000000034554A__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000340000-000000067F00004002000100000000344000__00000073AD3FE6B8\n000000067F00004002000100000000340000-000000067F00004002000100000000344000__000000914E3F38F0\n000000067F00004002000100000000340000-000000067F00004002000100000000344000__000000931B9A2710\n000000067F00004002000100000000340000-030000000000000000000000000000000002__0000005D2FFFFB38\n000000067F00004002000100000000344000-000000067F00004002000100000000348000__00000073AD3FE6B8\n000000067F00004002000100000000344000-000000067F00004002000100000000348000__000000914E3F38F0\n000000067F00004002000100000000344000-000000067F00004002000100000000348000__000000931B9A2710\n000000067F0000400200010000000034554A-000000067F0000400200010000000034DF2D__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000348000-000000067F0000400200010000000034C000__00000073AD3FE6B8\n000000067F00004002000100000000348000-000000067F0000400200010000000034C000__000000914E3F38F0\n000000067F00004002000100000000348000-000000067F0000400200010000000034C000__000000931B9A2710\n000000067F0000400200010000000034C000-000000067F00004002000100000000350000__00000073AD3FE6B8\n000000067F0000400200010000000034C000-000000067F00004002000100000000350000__000000914E3F38F0\n000000067F0000400200010000000034C000-000000067F00004002000100000000350000__000000931B9A2710\n000000067F0000400200010000000034DF2D-000000067F00004002000100000000356917__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000350000-000000067F00004002000100000000354000__00000073AD3FE6B8\n000000067F00004002000100000000350000-000000067F00004002000100000000354000__000000914E3F38F0\n000000067F00004002000100000000350000-000000067F00004002000100000000354000__000000931B9A2710\n000000067F00004002000100000000354000-000000067F00004002000100000000358000__00000073AD3FE6B8\n000000067F00004002000100000000354000-000000067F00004002000100000000358000__000000914E3F38F0\n000000067F00004002000100000000354000-000000067F00004002000100000000358000__000000931B9A2710\n000000067F00004002000100000000356917-000000067F0000400200010000000035F303__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000358000-000000067F0000400200010000000035C000__00000073AD3FE6B8\n000000067F00004002000100000000358000-000000067F0000400200010000000035C000__000000914E3F38F0\n000000067F00004002000100000000358000-000000067F0000400200010000000035C000__000000931B9A2710\n000000067F0000400200010000000035C000-000000067F00004002000100000000360000__00000073AD3FE6B8\n000000067F0000400200010000000035C000-000000067F00004002000100000000360000__000000914E3F38F0\n000000067F0000400200010000000035C000-000000067F00004002000100000000360000__000000931B9A2710\n000000067F0000400200010000000035F303-000000067F00004002000100000000367CE4__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000360000-000000067F00004002000100000000364000__00000073AD3FE6B8\n000000067F00004002000100000000360000-000000067F00004002000100000000364000__000000914E3F38F0\n000000067F00004002000100000000360000-000000067F00004002000100000000364000__000000931B9A2710\n000000067F00004002000100000000364000-000000067F00004002000100000000368000__00000073AD3FE6B8\n000000067F00004002000100000000364000-000000067F00004002000100000000368000__000000914E3F38F0\n000000067F00004002000100000000364000-000000067F00004002000100000000368000__000000931B9A2710\n000000067F00004002000100000000367CE4-000000067F000040020001000000003706C3__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000368000-000000067F0000400200010000000036C000__00000073AD3FE6B8\n000000067F00004002000100000000368000-000000067F0000400200010000000036C000__000000914E3F38F0\n000000067F00004002000100000000368000-000000067F0000400200010000000036C000__000000931B9A2710\n000000067F0000400200010000000036C000-000000067F00004002000100000000370000__00000073AD3FE6B8\n000000067F0000400200010000000036C000-000000067F00004002000100000000370000__000000914E3F38F0\n000000067F0000400200010000000036C000-000000067F00004002000100000000370000__000000931B9A2710\n000000067F00004002000100000000370000-000000067F00004002000100000000374000__00000073AD3FE6B8\n000000067F00004002000100000000370000-000000067F00004002000100000000374000__000000914E3F38F0\n000000067F00004002000100000000370000-000000067F00004002000100000000374000__000000931B9A2710\n000000067F000040020001000000003706C3-000000067F00004002000100000000379087__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000374000-000000067F00004002000100000000378000__00000073AD3FE6B8\n000000067F00004002000100000000374000-000000067F00004002000100000000378000__000000914E3F38F0\n000000067F00004002000100000000374000-000000067F00004002000100000000378000__000000931B9A2710\n000000067F00004002000100000000378000-000000067F0000400200010000000037C000__00000073AD3FE6B8\n000000067F00004002000100000000378000-000000067F0000400200010000000037C000__000000914E3F38F0\n000000067F00004002000100000000378000-000000067F0000400200010000000037C000__000000931B9A2710\n000000067F00004002000100000000379087-000000067F00004002000100000000381A53__0000005CA7BBD6F9-000000739A8D1299\n000000067F0000400200010000000037C000-000000067F00004002000100000000380000__00000073AD3FE6B8\n000000067F0000400200010000000037C000-000000067F00004002000100000000380000__000000914E3F38F0\n000000067F0000400200010000000037C000-000000067F00004002000100000000380000__000000931B9A2710\n000000067F00004002000100000000380000-000000067F00004002000100000000384000__00000073AD3FE6B8\n000000067F00004002000100000000380000-000000067F00004002000100000000384000__000000914E3F38F0\n000000067F00004002000100000000380000-000000067F00004002000100000000384000__000000931B9A2710\n000000067F00004002000100000000381A53-000000067F0000400200010000000038A43A__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000384000-000000067F00004002000100000000388000__00000073AD3FE6B8\n000000067F00004002000100000000384000-000000067F00004002000100000000388000__000000914E3F38F0\n000000067F00004002000100000000384000-000000067F00004002000100000000388000__000000931B9A2710\n000000067F00004002000100000000388000-000000067F0000400200010000000038C000__00000073AD3FE6B8\n000000067F00004002000100000000388000-000000067F0000400200010000000038C000__000000914E3F38F0\n000000067F00004002000100000000388000-000000067F0000400200010000000038C000__000000931B9A2710\n000000067F0000400200010000000038A43A-000000067F00004002000100000000392E24__0000005CA7BBD6F9-000000739A8D1299\n000000067F0000400200010000000038C000-000000067F00004002000100000000390000__00000073AD3FE6B8\n000000067F0000400200010000000038C000-000000067F00004002000100000000390000__000000914E3F38F0\n000000067F0000400200010000000038C000-000000067F00004002000100000000390000__000000931B9A2710\n000000067F00004002000100000000390000-000000067F00004002000100000000394000__00000073AD3FE6B8\n000000067F00004002000100000000390000-000000067F00004002000100000000394000__000000914E3F38F0\n000000067F00004002000100000000390000-000000067F00004002000100000000394000__000000931B9A2710\n000000067F00004002000100000000392E24-000000067F0000400200010000000039B80E__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000394000-000000067F00004002000100000000398000__00000073AD3FE6B8\n000000067F00004002000100000000394000-000000067F00004002000100000000398000__000000914E3F38F0\n000000067F00004002000100000000394000-000000067F00004002000100000000398000__000000931B9A2710\n000000067F00004002000100000000398000-000000067F0000400200010000000039C000__00000073AD3FE6B8\n000000067F00004002000100000000398000-000000067F0000400200010000000039C000__000000914E3F38F0\n000000067F00004002000100000000398000-000000067F0000400200010000000039C000__000000931B9A2710\n000000067F0000400200010000000039B80E-000000067F000040020001000000003A41E4__0000005CA7BBD6F9-000000739A8D1299\n000000067F0000400200010000000039C000-000000067F000040020001000000003A0000__00000073AD3FE6B8\n000000067F0000400200010000000039C000-000000067F000040020001000000003A0000__000000914E3F38F0\n000000067F0000400200010000000039C000-000000067F000040020001000000003A0000__000000931B9A2710\n000000067F000040020001000000003A0000-000000067F000040020001000000003A4000__00000073AD3FE6B8\n000000067F000040020001000000003A0000-000000067F000040020001000000003A4000__000000914E3F38F0\n000000067F000040020001000000003A0000-000000067F000040020001000000003A4000__000000931B9A2710\n000000067F000040020001000000003A4000-000000067F000040020001000000003A8000__00000073AD3FE6B8\n000000067F000040020001000000003A4000-000000067F000040020001000000003A8000__000000914E3F38F0\n000000067F000040020001000000003A4000-000000067F000040020001000000003A8000__000000931B9A2710\n000000067F000040020001000000003A41E4-000000067F000040020001000000003ACBC0__0000005CA7BBD6F9-000000739A8D1299\n000000067F000040020001000000003A8000-000000067F000040020001000000003AC000__00000073AD3FE6B8\n000000067F000040020001000000003A8000-000000067F000040020001000000003AC000__000000914E3F38F0\n000000067F000040020001000000003A8000-000000067F000040020001000000003AC000__000000931B9A2710\n000000067F000040020001000000003AC000-000000067F000040020001000000003B0000__00000073AD3FE6B8\n000000067F000040020001000000003AC000-000000067F000040020001000000003B0000__000000914E3F38F0\n000000067F000040020001000000003AC000-000000067F000040020001000000003B0000__000000931B9A2710\n000000067F000040020001000000003ACBC0-000000067F000040020001000000003B5581__0000005CA7BBD6F9-000000739A8D1299\n000000067F000040020001000000003B0000-000000067F000040020001000000003B4000__00000073AD3FE6B8\n000000067F000040020001000000003B0000-000000067F000040020001000000003B4000__000000914E3F38F0\n000000067F000040020001000000003B0000-000000067F000040020001000000003B4000__000000931B9A2710\n000000067F000040020001000000003B4000-000000067F000040020001000000003B8000__00000073AD3FE6B8\n000000067F000040020001000000003B4000-000000067F000040020001000000003B8000__000000914E3F38F0\n000000067F000040020001000000003B4000-000000067F000040020001000000003B8000__000000931B9A2710\n000000067F000040020001000000003B5581-000000067F000040020001000000003BDF45__0000005CA7BBD6F9-000000739A8D1299\n000000067F000040020001000000003B8000-000000067F000040020001000000003BC000__00000073AD3FE6B8\n000000067F000040020001000000003B8000-000000067F000040020001000000003BC000__000000914E3F38F0\n000000067F000040020001000000003B8000-000000067F000040020001000000003BC000__000000931B9A2710\n000000067F000040020001000000003BC000-000000067F000040020001000000003C0000__00000073AD3FE6B8\n000000067F000040020001000000003BC000-000000067F000040020001000000003C0000__000000914E3F38F0\n000000067F000040020001000000003BC000-000000067F000040020001000000003C0000__000000931B9A2710\n000000067F000040020001000000003BDF45-000000067F000040020001000000003C694A__0000005CA7BBD6F9-000000739A8D1299\n000000067F000040020001000000003C0000-000000067F000040020001000000003C4000__00000073AD3FE6B8\n000000067F000040020001000000003C0000-000000067F000040020001000000003C4000__000000914E3F38F0\n000000067F000040020001000000003C0000-000000067F000040020001000000003C4000__000000931B9A2710\n000000067F000040020001000000003C4000-000000067F000040020001000000003C8000__00000073AD3FE6B8\n000000067F000040020001000000003C4000-000000067F000040020001000000003C8000__000000914E3F38F0\n000000067F000040020001000000003C4000-000000067F000040020001000000003C8000__000000931B9A2710\n000000067F000040020001000000003C694A-000000067F000040020001000000003CF343__0000005CA7BBD6F9-000000739A8D1299\n000000067F000040020001000000003C8000-000000067F000040020001000000003CC000__00000073AD3FE6B8\n000000067F000040020001000000003C8000-000000067F000040020001000000003CC000__000000914E3F38F0\n000000067F000040020001000000003C8000-000000067F000040020001000000003CC000__000000931B9A2710\n000000067F000040020001000000003CC000-000000067F000040020001000000003D0000__00000073AD3FE6B8\n000000067F000040020001000000003CC000-000000067F000040020001000000003D0000__000000914E3F38F0\n000000067F000040020001000000003CC000-000000067F000040020001000000003D0000__000000931B9A2710\n000000067F000040020001000000003CF343-000000067F000040020001000000003D7D31__0000005CA7BBD6F9-000000739A8D1299\n000000067F000040020001000000003D0000-000000067F000040020001000000003D4000__00000073AD3FE6B8\n000000067F000040020001000000003D0000-000000067F000040020001000000003D4000__000000914E3F38F0\n000000067F000040020001000000003D0000-000000067F000040020001000000003D4000__000000931B9A2710\n000000067F000040020001000000003D4000-000000067F000040020001000000003D8000__00000073AD3FE6B8\n000000067F000040020001000000003D4000-000000067F000040020001000000003D8000__000000914E3F38F0\n000000067F000040020001000000003D4000-000000067F000040020001000000003D8000__000000931B9A2710\n000000067F000040020001000000003D7D31-000000067F000040020001000000003E0701__0000005CA7BBD6F9-000000739A8D1299\n000000067F000040020001000000003D8000-000000067F000040020001000000003DC000__00000073AD3FE6B8\n000000067F000040020001000000003D8000-000000067F000040020001000000003DC000__000000914E3F38F0\n000000067F000040020001000000003D8000-000000067F000040020001000000003DC000__000000931B9A2710\n000000067F000040020001000000003DC000-000000067F000040020001000000003E0000__00000073AD3FE6B8\n000000067F000040020001000000003DC000-000000067F000040020001000000003E0000__000000914E3F38F0\n000000067F000040020001000000003DC000-000000067F000040020001000000003E0000__000000931B9A2710\n000000067F000040020001000000003E0000-000000067F000040020001000000003E4000__00000073AD3FE6B8\n000000067F000040020001000000003E0000-000000067F000040020001000000003E4000__000000914E3F38F0\n000000067F000040020001000000003E0000-000000067F000040020001000000003E4000__000000931B9A2710\n000000067F000040020001000000003E0701-000000067F000040020001000000003E90C9__0000005CA7BBD6F9-000000739A8D1299\n000000067F000040020001000000003E4000-000000067F000040020001000000003E8000__00000073AD3FE6B8\n000000067F000040020001000000003E4000-000000067F000040020001000000003E8000__000000914E3F38F0\n000000067F000040020001000000003E4000-000000067F000040020001000000003E8000__000000931B9A2710\n000000067F000040020001000000003E8000-000000067F000040020001000000003EC000__00000073AD3FE6B8\n000000067F000040020001000000003E8000-000000067F000040020001000000003EC000__000000914E3F38F0\n000000067F000040020001000000003E8000-000000067F000040020001000000003EC000__000000931B9A2710\n000000067F000040020001000000003E90C9-000000067F000040020001000000003F1A8D__0000005CA7BBD6F9-000000739A8D1299\n000000067F000040020001000000003EC000-000000067F000040020001000000003F0000__00000073AD3FE6B8\n000000067F000040020001000000003EC000-000000067F000040020001000000003F0000__000000914E3F38F0\n000000067F000040020001000000003EC000-000000067F000040020001000000003F0000__000000931B9A2710\n000000067F000040020001000000003F0000-000000067F000040020001000000003F4000__00000073AD3FE6B8\n000000067F000040020001000000003F0000-000000067F000040020001000000003F4000__000000914E3F38F0\n000000067F000040020001000000003F0000-000000067F000040020001000000003F4000__000000931B9A2710\n000000067F000040020001000000003F1A8D-000000067F000040020001000000003FA45C__0000005CA7BBD6F9-000000739A8D1299\n000000067F000040020001000000003F4000-000000067F000040020001000000003F8000__00000073AD3FE6B8\n000000067F000040020001000000003F4000-000000067F000040020001000000003F8000__000000914E3F38F0\n000000067F000040020001000000003F4000-000000067F000040020001000000003F8000__000000931B9A2710\n000000067F000040020001000000003F8000-000000067F000040020001000000003FC000__00000073AD3FE6B8\n000000067F000040020001000000003F8000-000000067F000040020001000000003FC000__000000914E3F38F0\n000000067F000040020001000000003F8000-000000067F000040020001000000003FC000__000000931B9A2710\n000000067F000040020001000000003FA45C-000000067F00004002000100000000402E54__0000005CA7BBD6F9-000000739A8D1299\n000000067F000040020001000000003FC000-000000067F00004002000100000000400000__00000073AD3FE6B8\n000000067F000040020001000000003FC000-000000067F00004002000100000000400000__000000914E3F38F0\n000000067F000040020001000000003FC000-000000067F00004002000100000000400000__000000931B9A2710\n000000067F00004002000100000000400000-000000067F00004002000100000000404000__00000073AD3FE6B8\n000000067F00004002000100000000400000-000000067F00004002000100000000404000__000000914E3F38F0\n000000067F00004002000100000000400000-000000067F00004002000100000000404000__000000931B9A2710\n000000067F00004002000100000000402E54-000000067F0000400200010000000040B84B__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000404000-000000067F00004002000100000000408000__00000073AD3FE6B8\n000000067F00004002000100000000404000-000000067F00004002000100000000408000__000000914E3F38F0\n000000067F00004002000100000000404000-000000067F00004002000100000000408000__000000931B9A2710\n000000067F00004002000100000000408000-000000067F0000400200010000000040C000__00000073AD3FE6B8\n000000067F00004002000100000000408000-000000067F0000400200010000000040C000__000000914E3F38F0\n000000067F00004002000100000000408000-000000067F0000400200010000000040C000__000000931B9A2710\n000000067F0000400200010000000040B84B-000000067F00004002000100000000414230__0000005CA7BBD6F9-000000739A8D1299\n000000067F0000400200010000000040C000-000000067F00004002000100000000410000__00000073AD3FE6B8\n000000067F0000400200010000000040C000-000000067F00004002000100000000410000__000000914E3F38F0\n000000067F0000400200010000000040C000-000000067F00004002000100000000410000__000000931B9A2710\n000000067F00004002000100000000410000-000000067F00004002000100000000414000__00000073AD3FE6B8\n000000067F00004002000100000000410000-000000067F00004002000100000000414000__000000914E3F38F0\n000000067F00004002000100000000410000-000000067F00004002000100000000414000__000000931B9A2710\n000000067F00004002000100000000414000-000000067F00004002000100000000418000__00000073AD3FE6B8\n000000067F00004002000100000000414000-000000067F00004002000100000000418000__000000914E3F38F0\n000000067F00004002000100000000414000-000000067F00004002000100000000418000__000000931B9A2710\n000000067F00004002000100000000414230-000000067F0000400200010000000041CC01__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000418000-000000067F0000400200010000000041C000__00000073AD3FE6B8\n000000067F00004002000100000000418000-000000067F0000400200010000000041C000__000000914E3F38F0\n000000067F00004002000100000000418000-000000067F0000400200010000000041C000__000000931B9A2710\n000000067F0000400200010000000041C000-000000067F00004002000100000000420000__00000073AD3FE6B8\n000000067F0000400200010000000041C000-000000067F00004002000100000000420000__000000914E3F38F0\n000000067F0000400200010000000041C000-000000067F00004002000100000000420000__000000931B9A2710\n000000067F0000400200010000000041CC01-000000067F000040020001000000004255BE__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000420000-000000067F00004002000100000000424000__00000073AD3FE6B8\n000000067F00004002000100000000420000-000000067F00004002000100000000424000__000000914E3F38F0\n000000067F00004002000100000000420000-000000067F00004002000100000000424000__000000931B9A2710\n000000067F00004002000100000000424000-000000067F00004002000100000000428000__00000073AD3FE6B8\n000000067F00004002000100000000424000-000000067F00004002000100000000428000__000000914E3F38F0\n000000067F00004002000100000000424000-000000067F00004002000100000000428000__000000931B9A2710\n000000067F000040020001000000004255BE-000000067F0000400200010000000042DF85__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000428000-000000067F0000400200010000000042C000__00000073AD3FE6B8\n000000067F00004002000100000000428000-000000067F0000400200010000000042C000__000000914E3F38F0\n000000067F00004002000100000000428000-000000067F0000400200010000000042C000__000000931B9A2710\n000000067F0000400200010000000042C000-000000067F00004002000100000000430000__00000073AD3FE6B8\n000000067F0000400200010000000042C000-000000067F00004002000100000000430000__000000914E3F38F0\n000000067F0000400200010000000042C000-000000067F00004002000100000000430000__000000931B9A2710\n000000067F0000400200010000000042DF85-000000067F00004002000100000000436961__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000430000-000000067F00004002000100000000434000__00000073AD3FE6B8\n000000067F00004002000100000000430000-000000067F00004002000100000000434000__000000914E3F38F0\n000000067F00004002000100000000430000-000000067F00004002000100000000434000__000000931B9A2710\n000000067F00004002000100000000434000-000000067F00004002000100000000438000__00000073AD3FE6B8\n000000067F00004002000100000000434000-000000067F00004002000100000000438000__000000914E3F38F0\n000000067F00004002000100000000434000-000000067F00004002000100000000438000__000000931B9A2710\n000000067F00004002000100000000436961-000000067F0000400200010000000043F354__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000438000-000000067F0000400200010000000043C000__00000073AD3FE6B8\n000000067F00004002000100000000438000-000000067F0000400200010000000043C000__000000914E3F38F0\n000000067F00004002000100000000438000-000000067F0000400200010000000043C000__000000931B9A2710\n000000067F0000400200010000000043C000-000000067F00004002000100000000440000__00000073AD3FE6B8\n000000067F0000400200010000000043C000-000000067F00004002000100000000440000__000000914E3F38F0\n000000067F0000400200010000000043C000-000000067F00004002000100000000440000__000000931B9A2710\n000000067F0000400200010000000043F354-000000067F00004002000100000000447D42__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000440000-000000067F00004002000100000000444000__00000073AD3FE6B8\n000000067F00004002000100000000440000-000000067F00004002000100000000444000__000000914E3F38F0\n000000067F00004002000100000000440000-000000067F00004002000100000000444000__000000931B9A2710\n000000067F00004002000100000000444000-000000067F00004002000100000000448000__00000073AD3FE6B8\n000000067F00004002000100000000444000-000000067F00004002000100000000448000__000000914E3F38F0\n000000067F00004002000100000000444000-000000067F00004002000100000000448000__000000931B9A2710\n000000067F00004002000100000000447D42-000000067F00004002000100000000450730__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000448000-000000067F0000400200010000000044C000__00000073AD3FE6B8\n000000067F00004002000100000000448000-000000067F0000400200010000000044C000__000000914E3F38F0\n000000067F00004002000100000000448000-000000067F0000400200010000000044C000__000000931B9A2710\n000000067F0000400200010000000044C000-000000067F00004002000100000000450000__00000073AD3FE6B8\n000000067F0000400200010000000044C000-000000067F00004002000100000000450000__000000914E3F38F0\n000000067F0000400200010000000044C000-000000067F00004002000100000000450000__000000931B9A2710\n000000067F00004002000100000000450000-000000067F00004002000100000000454000__00000073AD3FE6B8\n000000067F00004002000100000000450000-000000067F00004002000100000000454000__000000914E3F38F0\n000000067F00004002000100000000450000-000000067F00004002000100000000454000__000000931B9A2710\n000000067F00004002000100000000450730-000000067F00004002000100000000459116__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000454000-000000067F00004002000100000000458000__00000073AD3FE6B8\n000000067F00004002000100000000454000-000000067F00004002000100000000458000__000000914E3F38F0\n000000067F00004002000100000000454000-000000067F00004002000100000000458000__000000931B9A2710\n000000067F00004002000100000000458000-000000067F0000400200010000000045C000__00000073AD3FE6B8\n000000067F00004002000100000000458000-000000067F0000400200010000000045C000__000000914E3F38F0\n000000067F00004002000100000000458000-000000067F0000400200010000000045C000__000000931B9A2710\n000000067F00004002000100000000459116-000000067F00004002000100000000461ACC__0000005CA7BBD6F9-000000739A8D1299\n000000067F0000400200010000000045C000-000000067F00004002000100000000460000__00000073AD3FE6B8\n000000067F0000400200010000000045C000-000000067F00004002000100000000460000__000000914E3F38F0\n000000067F0000400200010000000045C000-000000067F00004002000100000000460000__000000931B9A2710\n000000067F00004002000100000000460000-000000067F00004002000100000000464000__00000073AD3FE6B8\n000000067F00004002000100000000460000-000000067F00004002000100000000464000__000000914E3F38F0\n000000067F00004002000100000000460000-000000067F00004002000100000000464000__000000931B9A2710\n000000067F00004002000100000000461ACC-000000067F0000400200010000000046A495__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000464000-000000067F00004002000100000000468000__00000073AD3FE6B8\n000000067F00004002000100000000464000-000000067F00004002000100000000468000__000000914E3F38F0\n000000067F00004002000100000000464000-000000067F00004002000100000000468000__000000931B9A2710\n000000067F00004002000100000000468000-000000067F0000400200010000000046C000__00000073AD3FE6B8\n000000067F00004002000100000000468000-000000067F0000400200010000000046C000__000000914E3F38F0\n000000067F00004002000100000000468000-000000067F0000400200010000000046C000__000000931B9A2710\n000000067F0000400200010000000046A495-000000067F00004002000100000000472E71__0000005CA7BBD6F9-000000739A8D1299\n000000067F0000400200010000000046C000-000000067F00004002000100000000470000__00000073AD3FE6B8\n000000067F0000400200010000000046C000-000000067F00004002000100000000470000__000000914E3F38F0\n000000067F0000400200010000000046C000-000000067F00004002000100000000470000__000000931B9A2710\n000000067F00004002000100000000470000-000000067F00004002000100000000474000__00000073AD3FE6B8\n000000067F00004002000100000000470000-000000067F00004002000100000000474000__000000914E3F38F0\n000000067F00004002000100000000470000-000000067F00004002000100000000474000__000000931B9A2710\n000000067F00004002000100000000472E71-000000067F0000400200010000000047B85E__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000474000-000000067F00004002000100000000478000__00000073AD3FE6B8\n000000067F00004002000100000000474000-000000067F00004002000100000000478000__000000914E3F38F0\n000000067F00004002000100000000474000-000000067F00004002000100000000478000__000000931B9A2710\n000000067F00004002000100000000478000-000000067F0000400200010000000047C000__00000073AD3FE6B8\n000000067F00004002000100000000478000-000000067F0000400200010000000047C000__000000914E3F38F0\n000000067F00004002000100000000478000-000000067F0000400200010000000047C000__000000931B9A2710\n000000067F0000400200010000000047B85E-000000067F0000400200010000000048424F__0000005CA7BBD6F9-000000739A8D1299\n000000067F0000400200010000000047C000-000000067F00004002000100000000480000__00000073AD3FE6B8\n000000067F0000400200010000000047C000-000000067F00004002000100000000480000__000000914E3F38F0\n000000067F0000400200010000000047C000-000000067F00004002000100000000480000__000000931B9A2710\n000000067F00004002000100000000480000-000000067F00004002000100000000484000__00000073AD3FE6B8\n000000067F00004002000100000000480000-000000067F00004002000100000000484000__000000914E3F38F0\n000000067F00004002000100000000480000-000000067F00004002000100000000484000__000000931B9A2710\n000000067F00004002000100000000484000-000000067F00004002000100000000488000__00000073AD3FE6B8\n000000067F00004002000100000000484000-000000067F00004002000100000000488000__000000914E3F38F0\n000000067F00004002000100000000484000-000000067F00004002000100000000488000__000000931B9A2710\n000000067F0000400200010000000048424F-000000067F0000400200010000000048CC2F__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000488000-000000067F0000400200010000000048C000__00000073AD3FE6B8\n000000067F00004002000100000000488000-000000067F0000400200010000000048C000__000000914E3F38F0\n000000067F00004002000100000000488000-000000067F0000400200010000000048C000__000000931B9A2710\n000000067F0000400200010000000048C000-000000067F00004002000100000000490000__00000073AD3FE6B8\n000000067F0000400200010000000048C000-000000067F00004002000100000000490000__000000914E3F38F0\n000000067F0000400200010000000048C000-000000067F00004002000100000000490000__000000931B9A2710\n000000067F0000400200010000000048CC2F-000000067F00004002000100000000495603__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000490000-000000067F00004002000100000000494000__00000073AD3FE6B8\n000000067F00004002000100000000490000-000000067F00004002000100000000494000__000000914E3F38F0\n000000067F00004002000100000000490000-000000067F00004002000100000000494000__000000931B9A2710\n000000067F00004002000100000000494000-000000067F00004002000100000000498000__00000073AD3FE6B8\n000000067F00004002000100000000494000-000000067F00004002000100000000498000__000000914E3F38F0\n000000067F00004002000100000000494000-000000067F00004002000100000000498000__000000931B9A2710\n000000067F00004002000100000000495603-000000067F0000400200010000000049DFC0__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000498000-000000067F0000400200010000000049C000__00000073AD3FE6B8\n000000067F00004002000100000000498000-000000067F0000400200010000000049C000__000000914E3F38F0\n000000067F00004002000100000000498000-000000067F0000400200010000000049C000__000000931B9A2710\n000000067F0000400200010000000049C000-000000067F000040020001000000004A0000__00000073AD3FE6B8\n000000067F0000400200010000000049C000-000000067F000040020001000000004A0000__000000914E3F38F0\n000000067F0000400200010000000049C000-000000067F000040020001000000004A0000__000000931B9A2710\n000000067F0000400200010000000049DFC0-000000067F000040020001000000004A698B__0000005CA7BBD6F9-000000739A8D1299\n000000067F000040020001000000004A0000-000000067F000040020001000000004A4000__00000073AD3FE6B8\n000000067F000040020001000000004A0000-000000067F000040020001000000004A4000__000000914E3F38F0\n000000067F000040020001000000004A0000-000000067F000040020001000000004A4000__000000931B9A2710\n000000067F000040020001000000004A4000-000000067F000040020001000000004A8000__00000073AD3FE6B8\n000000067F000040020001000000004A4000-000000067F000040020001000000004A8000__000000914E3F38F0\n000000067F000040020001000000004A4000-000000067F000040020001000000004A8000__000000931B9A2710\n000000067F000040020001000000004A698B-000000067F000040020001000000004AF374__0000005CA7BBD6F9-000000739A8D1299\n000000067F000040020001000000004A8000-000000067F000040020001000000004AC000__00000073AD3FE6B8\n000000067F000040020001000000004A8000-000000067F000040020001000000004AC000__000000914E3F38F0\n000000067F000040020001000000004A8000-000000067F000040020001000000004AC000__000000931B9A2710\n000000067F000040020001000000004AC000-000000067F000040020001000000004B0000__00000073AD3FE6B8\n000000067F000040020001000000004AC000-000000067F000040020001000000004B0000__000000914E3F38F0\n000000067F000040020001000000004AC000-000000067F000040020001000000004B0000__000000931B9A2710\n000000067F000040020001000000004AF374-000000067F000040020001000000004B7D7B__0000005CA7BBD6F9-000000739A8D1299\n000000067F000040020001000000004B0000-000000067F000040020001000000004B4000__00000073AD3FE6B8\n000000067F000040020001000000004B0000-000000067F000040020001000000004B4000__000000914E3F38F0\n000000067F000040020001000000004B0000-000000067F000040020001000000004B4000__000000931B9A2710\n000000067F000040020001000000004B4000-000000067F000040020001000000004B8000__00000073AD3FE6B8\n000000067F000040020001000000004B4000-000000067F000040020001000000004B8000__000000914E3F38F0\n000000067F000040020001000000004B4000-000000067F000040020001000000004B8000__000000931B9A2710\n000000067F000040020001000000004B7D7B-000000067F000040020001000000004C0764__0000005CA7BBD6F9-000000739A8D1299\n000000067F000040020001000000004B8000-000000067F000040020001000000004BC000__00000073AD3FE6B8\n000000067F000040020001000000004B8000-000000067F000040020001000000004BC000__000000914E3F38F0\n000000067F000040020001000000004B8000-000000067F000040020001000000004BC000__000000931B9A2710\n000000067F000040020001000000004BC000-000000067F000040020001000000004C0000__00000073AD3FE6B8\n000000067F000040020001000000004BC000-000000067F000040020001000000004C0000__000000914E3F38F0\n000000067F000040020001000000004BC000-000000067F000040020001000000004C0000__000000931B9A2710\n000000067F000040020001000000004C0000-000000067F000040020001000000004C4000__00000073AD3FE6B8\n000000067F000040020001000000004C0000-000000067F000040020001000000004C4000__000000914E3F38F0\n000000067F000040020001000000004C0000-000000067F000040020001000000004C4000__000000931B9A2710\n000000067F000040020001000000004C0764-000000067F000040020001000000004C9146__0000005CA7BBD6F9-000000739A8D1299\n000000067F000040020001000000004C4000-000000067F000040020001000000004C8000__00000073AD3FE6B8\n000000067F000040020001000000004C4000-000000067F000040020001000000004C8000__000000914E3F38F0\n000000067F000040020001000000004C4000-000000067F000040020001000000004C8000__000000931B9A2710\n000000067F000040020001000000004C8000-000000067F000040020001000000004CC000__00000073AD3FE6B8\n000000067F000040020001000000004C8000-000000067F000040020001000000004CC000__000000914E3F38F0\n000000067F000040020001000000004C8000-000000067F000040020001000000004CC000__000000931B9A2710\n000000067F000040020001000000004C9146-000000067F000040020001000000004D1B16__0000005CA7BBD6F9-000000739A8D1299\n000000067F000040020001000000004CC000-000000067F000040020001000000004D0000__00000073AD3FE6B8\n000000067F000040020001000000004CC000-000000067F000040020001000000004D0000__000000914E3F38F0\n000000067F000040020001000000004CC000-000000067F000040020001000000004D0000__000000931B9A2710\n000000067F000040020001000000004D0000-000000067F000040020001000000004D4000__00000073AD3FE6B8\n000000067F000040020001000000004D0000-000000067F000040020001000000004D4000__000000914E3F38F0\n000000067F000040020001000000004D0000-000000067F000040020001000000004D4000__000000931B9A2710\n000000067F000040020001000000004D1B16-000000067F000040020001000000004DA4D9__0000005CA7BBD6F9-000000739A8D1299\n000000067F000040020001000000004D4000-000000067F000040020001000000004D8000__00000073AD3FE6B8\n000000067F000040020001000000004D4000-000000067F000040020001000000004D8000__000000914E3F38F0\n000000067F000040020001000000004D4000-000000067F000040020001000000004D8000__000000931B9A2710\n000000067F000040020001000000004D8000-000000067F000040020001000000004DC000__00000073AD3FE6B8\n000000067F000040020001000000004D8000-000000067F000040020001000000004DC000__000000914E3F38F0\n000000067F000040020001000000004D8000-000000067F000040020001000000004DC000__000000931B9A2710\n000000067F000040020001000000004DA4D9-000000067F000040020001000000004E2EAB__0000005CA7BBD6F9-000000739A8D1299\n000000067F000040020001000000004DC000-000000067F000040020001000000004E0000__00000073AD3FE6B8\n000000067F000040020001000000004DC000-000000067F000040020001000000004E0000__000000914E3F38F0\n000000067F000040020001000000004DC000-000000067F000040020001000000004E0000__000000931B9A2710\n000000067F000040020001000000004E0000-000000067F000040020001000000004E4000__00000073AD3FE6B8\n000000067F000040020001000000004E0000-000000067F000040020001000000004E4000__000000914E3F38F0\n000000067F000040020001000000004E0000-000000067F000040020001000000004E4000__000000931B9A2710\n000000067F000040020001000000004E2EAB-000000067F000040020001000000004EB89B__0000005CA7BBD6F9-000000739A8D1299\n000000067F000040020001000000004E4000-000000067F000040020001000000004E8000__00000073AD3FE6B8\n000000067F000040020001000000004E4000-000000067F000040020001000000004E8000__000000914E3F38F0\n000000067F000040020001000000004E4000-000000067F000040020001000000004E8000__000000931B9A2710\n000000067F000040020001000000004E8000-000000067F000040020001000000004EC000__00000073AD3FE6B8\n000000067F000040020001000000004E8000-000000067F000040020001000000004EC000__000000914E3F38F0\n000000067F000040020001000000004E8000-000000067F000040020001000000004EC000__000000931B9A2710\n000000067F000040020001000000004EB89B-000000067F000040020001000000004F428A__0000005CA7BBD6F9-000000739A8D1299\n000000067F000040020001000000004EC000-000000067F000040020001000000004F0000__00000073AD3FE6B8\n000000067F000040020001000000004EC000-000000067F000040020001000000004F0000__000000914E3F38F0\n000000067F000040020001000000004EC000-000000067F000040020001000000004F0000__000000931B9A2710\n000000067F000040020001000000004F0000-000000067F000040020001000000004F4000__00000073AD3FE6B8\n000000067F000040020001000000004F0000-000000067F000040020001000000004F4000__000000914E3F38F0\n000000067F000040020001000000004F0000-000000067F000040020001000000004F4000__000000931B9A2710\n000000067F000040020001000000004F4000-000000067F000040020001000000004F8000__00000073AD3FE6B8\n000000067F000040020001000000004F4000-000000067F000040020001000000004F8000__000000914E3F38F0\n000000067F000040020001000000004F4000-000000067F000040020001000000004F8000__000000931B9A2710\n000000067F000040020001000000004F428A-000000067F000040020001000000004FCC78__0000005CA7BBD6F9-000000739A8D1299\n000000067F000040020001000000004F8000-000000067F000040020001000000004FC000__00000073AD3FE6B8\n000000067F000040020001000000004F8000-000000067F000040020001000000004FC000__000000914E3F38F0\n000000067F000040020001000000004F8000-000000067F000040020001000000004FC000__000000931B9A2710\n000000067F000040020001000000004FC000-000000067F00004002000100000000500000__00000073AD3FE6B8\n000000067F000040020001000000004FC000-000000067F00004002000100000000500000__000000914E3F38F0\n000000067F000040020001000000004FC000-000000067F00004002000100000000500000__000000931B9A2710\n000000067F000040020001000000004FCC78-000000067F00004002000100000000505659__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000500000-000000067F00004002000100000000504000__00000073AD3FE6B8\n000000067F00004002000100000000500000-000000067F00004002000100000000504000__000000914E3F38F0\n000000067F00004002000100000000500000-000000067F00004002000100000000504000__000000931B9A2710\n000000067F00004002000100000000504000-000000067F00004002000100000000508000__00000073AD3FE6B8\n000000067F00004002000100000000504000-000000067F00004002000100000000508000__000000914E3F38F0\n000000067F00004002000100000000504000-000000067F00004002000100000000508000__000000931B9A2710\n000000067F00004002000100000000505659-000000067F0000400200010000000050E02B__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000508000-000000067F0000400200010000000050C000__00000073AD3FE6B8\n000000067F00004002000100000000508000-000000067F0000400200010000000050C000__000000914E3F38F0\n000000067F00004002000100000000508000-000000067F0000400200010000000050C000__000000931B9A2710\n000000067F0000400200010000000050C000-000000067F00004002000100000000510000__00000073AD3FE6B8\n000000067F0000400200010000000050C000-000000067F00004002000100000000510000__000000914E3F38F0\n000000067F0000400200010000000050C000-000000067F00004002000100000000510000__000000931B9A2710\n000000067F0000400200010000000050E02B-000000067F000040020001000000005169EF__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000510000-000000067F00004002000100000000514000__00000073AD3FE6B8\n000000067F00004002000100000000510000-000000067F00004002000100000000514000__000000914E3F38F0\n000000067F00004002000100000000510000-000000067F00004002000100000000514000__000000931B9A2710\n000000067F00004002000100000000514000-000000067F00004002000100000000518000__00000073AD3FE6B8\n000000067F00004002000100000000514000-000000067F00004002000100000000518000__000000914E3F38F0\n000000067F00004002000100000000514000-000000067F00004002000100000000518000__000000931B9A2710\n000000067F000040020001000000005169EF-000000067F0000400200010000000051F3BA__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000518000-000000067F0000400200010000000051C000__00000073AD3FE6B8\n000000067F00004002000100000000518000-000000067F0000400200010000000051C000__000000914E3F38F0\n000000067F00004002000100000000518000-000000067F0000400200010000000051C000__000000931B9A2710\n000000067F0000400200010000000051C000-000000067F00004002000100000000520000__00000073AD3FE6B8\n000000067F0000400200010000000051C000-000000067F00004002000100000000520000__000000914E3F38F0\n000000067F0000400200010000000051C000-000000067F00004002000100000000520000__000000931B9A2710\n000000067F0000400200010000000051F3BA-000000067F00004002000100000000527DAC__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000520000-000000067F00004002000100000000524000__00000073AD3FE6B8\n000000067F00004002000100000000520000-000000067F00004002000100000000524000__000000914E3F38F0\n000000067F00004002000100000000520000-000000067F00004002000100000000524000__000000931B9A2710\n000000067F00004002000100000000524000-000000067F00004002000100000000528000__00000073AD3FE6B8\n000000067F00004002000100000000524000-000000067F00004002000100000000528000__000000914E3F38F0\n000000067F00004002000100000000524000-000000067F00004002000100000000528000__000000931B9A2710\n000000067F00004002000100000000527DAC-000000067F0000400200010000000053079E__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000528000-000000067F0000400200010000000052C000__00000073AD3FE6B8\n000000067F00004002000100000000528000-000000067F0000400200010000000052C000__000000914E3F38F0\n000000067F00004002000100000000528000-000000067F0000400200010000000052C000__000000931B9A2710\n000000067F0000400200010000000052C000-000000067F00004002000100000000530000__00000073AD3FE6B8\n000000067F0000400200010000000052C000-000000067F00004002000100000000530000__000000914E3F38F0\n000000067F0000400200010000000052C000-000000067F00004002000100000000530000__000000931B9A2710\n000000067F00004002000100000000530000-000000067F00004002000100000000534000__00000073AD3FE6B8\n000000067F00004002000100000000530000-000000067F00004002000100000000534000__000000914E3F38F0\n000000067F00004002000100000000530000-000000067F00004002000100000000534000__000000931B9A2710\n000000067F0000400200010000000053079E-000000067F00004002000100000000539198__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000534000-000000067F00004002000100000000538000__00000073AD3FE6B8\n000000067F00004002000100000000534000-000000067F00004002000100000000538000__000000914E3F38F0\n000000067F00004002000100000000534000-000000067F00004002000100000000538000__000000931B9A2710\n000000067F00004002000100000000538000-000000067F0000400200010000000053C000__00000073AD3FE6B8\n000000067F00004002000100000000538000-000000067F0000400200010000000053C000__000000914E3F38F0\n000000067F00004002000100000000538000-000000067F0000400200010000000053C000__000000931B9A2710\n000000067F00004002000100000000539198-000000067F00004002000100000000541B6B__0000005CA7BBD6F9-000000739A8D1299\n000000067F0000400200010000000053C000-000000067F00004002000100000000540000__00000073AD3FE6B8\n000000067F0000400200010000000053C000-000000067F00004002000100000000540000__000000914E3F38F0\n000000067F0000400200010000000053C000-000000067F00004002000100000000540000__000000931B9A2710\n000000067F00004002000100000000540000-000000067F00004002000100000000544000__00000073AD3FE6B8\n000000067F00004002000100000000540000-000000067F00004002000100000000544000__000000914E3F38F0\n000000067F00004002000100000000540000-000000067F00004002000100000000544000__000000931B9A2710\n000000067F00004002000100000000541B6B-000000067F0000400200010000000054A544__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000544000-000000067F00004002000100000000548000__00000073AD3FE6B8\n000000067F00004002000100000000544000-000000067F00004002000100000000548000__000000914E3F38F0\n000000067F00004002000100000000544000-000000067F00004002000100000000548000__000000931B9A2710\n000000067F00004002000100000000548000-000000067F0000400200010000000054C000__00000073AD3FE6B8\n000000067F00004002000100000000548000-000000067F0000400200010000000054C000__000000914E3F38F0\n000000067F00004002000100000000548000-000000067F0000400200010000000054C000__000000931B9A2710\n000000067F0000400200010000000054A544-000000067F00004002000100000000552F06__0000005CA7BBD6F9-000000739A8D1299\n000000067F0000400200010000000054C000-000000067F00004002000100000000550000__00000073AD3FE6B8\n000000067F0000400200010000000054C000-000000067F00004002000100000000550000__000000914E3F38F0\n000000067F0000400200010000000054C000-000000067F00004002000100000000550000__000000931B9A2710\n000000067F00004002000100000000550000-000000067F00004002000100000000554000__00000073AD3FE6B8\n000000067F00004002000100000000550000-000000067F00004002000100000000554000__000000914E3F38F0\n000000067F00004002000100000000550000-000000067F00004002000100000000554000__000000931B9A2710\n000000067F00004002000100000000552F06-000000067F0000400200010000000055B8C8__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000554000-000000067F00004002000100000000558000__00000073AD3FE6B8\n000000067F00004002000100000000554000-000000067F00004002000100000000558000__000000914E3F38F0\n000000067F00004002000100000000554000-000000067F00004002000100000000558000__000000931B9A2710\n000000067F00004002000100000000558000-000000067F0000400200010000000055C000__00000073AD3FE6B8\n000000067F00004002000100000000558000-000000067F0000400200010000000055C000__000000914E3F38F0\n000000067F00004002000100000000558000-000000067F0000400200010000000055C000__000000931B9A2710\n000000067F0000400200010000000055B8C8-000000067F000040020001000000005642BF__0000005CA7BBD6F9-000000739A8D1299\n000000067F0000400200010000000055C000-000000067F00004002000100000000560000__00000073AD3FE6B8\n000000067F0000400200010000000055C000-000000067F00004002000100000000560000__000000914E3F38F0\n000000067F0000400200010000000055C000-000000067F00004002000100000000560000__000000931B9A2710\n000000067F00004002000100000000560000-000000067F00004002000100000000564000__00000073AD3FE6B8\n000000067F00004002000100000000560000-000000067F00004002000100000000564000__000000914E3F38F0\n000000067F00004002000100000000560000-000000067F00004002000100000000564000__000000931B9A2710\n000000067F00004002000100000000564000-000000067F00004002000100000000568000__00000073AD3FE6B8\n000000067F00004002000100000000564000-000000067F00004002000100000000568000__000000914E3F38F0\n000000067F00004002000100000000564000-000000067F00004002000100000000568000__000000931B9A2710\n000000067F000040020001000000005642BF-000000067F0000400200010000000056CCB6__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000568000-000000067F0000400200010000000056C000__00000073AD3FE6B8\n000000067F00004002000100000000568000-000000067F0000400200010000000056C000__000000914E3F38F0\n000000067F00004002000100000000568000-000000067F0000400200010000000056C000__000000931B9A2710\n000000067F0000400200010000000056C000-000000067F00004002000100000000570000__00000073AD3FE6B8\n000000067F0000400200010000000056C000-000000067F00004002000100000000570000__000000914E3F38F0\n000000067F0000400200010000000056C000-000000067F00004002000100000000570000__000000931B9A2710\n000000067F0000400200010000000056CCB6-000000067F000040020001000000005756A1__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000570000-000000067F00004002000100000000574000__00000073AD3FE6B8\n000000067F00004002000100000000570000-000000067F00004002000100000000574000__000000914E3F38F0\n000000067F00004002000100000000570000-000000067F00004002000100000000574000__000000931B9A2710\n000000067F00004002000100000000574000-000000067F00004002000100000000578000__00000073AD3FE6B8\n000000067F00004002000100000000574000-000000067F00004002000100000000578000__000000914E3F38F0\n000000067F00004002000100000000574000-000000067F00004002000100000000578000__000000931B9A2710\n000000067F000040020001000000005756A1-000000067F0000400200010000000057E077__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000578000-000000067F0000400200010000000057C000__00000073AD3FE6B8\n000000067F00004002000100000000578000-000000067F0000400200010000000057C000__000000914E3F38F0\n000000067F00004002000100000000578000-000000067F0000400200010000000057C000__000000931B9A2710\n000000067F0000400200010000000057C000-000000067F00004002000100000000580000__00000073AD3FE6B8\n000000067F0000400200010000000057C000-000000067F00004002000100000000580000__000000914E3F38F0\n000000067F0000400200010000000057C000-000000067F00004002000100000000580000__000000931B9A2710\n000000067F0000400200010000000057E077-000000067F00004002000100000000586A4E__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000580000-000000067F00004002000100000000584000__00000073AD3FE6B8\n000000067F00004002000100000000580000-000000067F00004002000100000000584000__000000914E3F38F0\n000000067F00004002000100000000580000-000000067F00004002000100000000584000__000000931B9A2710\n000000067F00004002000100000000584000-000000067F00004002000100000000588000__00000073AD3FE6B8\n000000067F00004002000100000000584000-000000067F00004002000100000000588000__000000914E3F38F0\n000000067F00004002000100000000584000-000000067F00004002000100000000588000__000000931B9A2710\n000000067F00004002000100000000586A4E-000000067F0000400200010000000058F415__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000588000-000000067F0000400200010000000058C000__00000073AD3FE6B8\n000000067F00004002000100000000588000-000000067F0000400200010000000058C000__000000914E3F38F0\n000000067F00004002000100000000588000-000000067F0000400200010000000058C000__000000931B9A2710\n000000067F0000400200010000000058C000-000000067F00004002000100000000590000__00000073AD3FE6B8\n000000067F0000400200010000000058C000-000000067F00004002000100000000590000__000000914E3F38F0\n000000067F0000400200010000000058C000-000000067F00004002000100000000590000__000000931B9A2710\n000000067F0000400200010000000058F415-000000067F00004002000100000000597DDF__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000590000-000000067F00004002000100000000594000__00000073AD3FE6B8\n000000067F00004002000100000000590000-000000067F00004002000100000000594000__000000914E3F38F0\n000000067F00004002000100000000590000-000000067F00004002000100000000594000__000000931B9A2710\n000000067F00004002000100000000594000-000000067F00004002000100000000598000__00000073AD3FE6B8\n000000067F00004002000100000000594000-000000067F00004002000100000000598000__000000914E3F38F0\n000000067F00004002000100000000594000-000000067F00004002000100000000598000__000000931B9A2710\n000000067F00004002000100000000597DDF-000000067F000040020001000000005A07CE__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000598000-000000067F0000400200010000000059C000__00000073AD3FE6B8\n000000067F00004002000100000000598000-000000067F0000400200010000000059C000__000000914E3F38F0\n000000067F00004002000100000000598000-000000067F0000400200010000000059C000__000000931B9A2710\n000000067F0000400200010000000059C000-000000067F000040020001000000005A0000__00000073AD3FE6B8\n000000067F0000400200010000000059C000-000000067F000040020001000000005A0000__000000914E3F38F0\n000000067F0000400200010000000059C000-000000067F000040020001000000005A0000__000000931B9A2710\n000000067F000040020001000000005A0000-000000067F000040020001000000005A4000__00000073AD3FE6B8\n000000067F000040020001000000005A0000-000000067F000040020001000000005A4000__000000914E3F38F0\n000000067F000040020001000000005A0000-000000067F000040020001000000005A4000__000000931B9A2710\n000000067F000040020001000000005A07CE-000000067F000040020001000000005A91C9__0000005CA7BBD6F9-000000739A8D1299\n000000067F000040020001000000005A4000-000000067F000040020001000000005A8000__00000073AD3FE6B8\n000000067F000040020001000000005A4000-000000067F000040020001000000005A8000__000000914E3F38F0\n000000067F000040020001000000005A4000-000000067F000040020001000000005A8000__000000931B9A2710\n000000067F000040020001000000005A8000-000000067F000040020001000000005AC000__00000073AD3FE6B8\n000000067F000040020001000000005A8000-000000067F000040020001000000005AC000__000000914E3F38F0\n000000067F000040020001000000005A8000-000000067F000040020001000000005AC000__000000931B9A2710\n000000067F000040020001000000005A91C9-000000067F000040020001000000005B1BB6__0000005CA7BBD6F9-000000739A8D1299\n000000067F000040020001000000005AC000-000000067F000040020001000000005B0000__00000073AD3FE6B8\n000000067F000040020001000000005AC000-000000067F000040020001000000005B0000__000000914E3F38F0\n000000067F000040020001000000005AC000-000000067F000040020001000000005B0000__000000931B9A2710\n000000067F000040020001000000005B0000-000000067F000040020001000000005B4000__00000073AD3FE6B8\n000000067F000040020001000000005B0000-000000067F000040020001000000005B4000__000000914E3F38F0\n000000067F000040020001000000005B0000-000000067F000040020001000000005B4000__000000931B9A2710\n000000067F000040020001000000005B1BB6-000000067F000040020001000000005BA58F__0000005CA7BBD6F9-000000739A8D1299\n000000067F000040020001000000005B4000-000000067F000040020001000000005B8000__00000073AD3FE6B8\n000000067F000040020001000000005B4000-000000067F000040020001000000005B8000__000000914E3F38F0\n000000067F000040020001000000005B4000-000000067F000040020001000000005B8000__000000931B9A2710\n000000067F000040020001000000005B8000-000000067F000040020001000000005BC000__00000073AD3FE6B8\n000000067F000040020001000000005B8000-000000067F000040020001000000005BC000__000000914E3F38F0\n000000067F000040020001000000005B8000-000000067F000040020001000000005BC000__000000931B9A2710\n000000067F000040020001000000005BA58F-000000067F000040020001000000005C2F60__0000005CA7BBD6F9-000000739A8D1299\n000000067F000040020001000000005BC000-000000067F000040020001000000005C0000__00000073AD3FE6B8\n000000067F000040020001000000005BC000-000000067F000040020001000000005C0000__000000914E3F38F0\n000000067F000040020001000000005BC000-000000067F000040020001000000005C0000__000000931B9A2710\n000000067F000040020001000000005C0000-000000067F000040020001000000005C4000__00000073AD3FE6B8\n000000067F000040020001000000005C0000-000000067F000040020001000000005C4000__000000914E3F38F0\n000000067F000040020001000000005C0000-000000067F000040020001000000005C4000__000000931B9A2710\n000000067F000040020001000000005C2F60-000000067F000040020001000000005CB925__0000005CA7BBD6F9-000000739A8D1299\n000000067F000040020001000000005C4000-000000067F000040020001000000005C8000__00000073AD3FE6B8\n000000067F000040020001000000005C4000-000000067F000040020001000000005C8000__000000914E3F38F0\n000000067F000040020001000000005C4000-000000067F000040020001000000005C8000__000000931B9A2710\n000000067F000040020001000000005C8000-000000067F000040020001000000005CC000__00000073AD3FE6B8\n000000067F000040020001000000005C8000-000000067F000040020001000000005CC000__000000914E3F38F0\n000000067F000040020001000000005C8000-000000067F000040020001000000005CC000__000000931B9A2710\n000000067F000040020001000000005CB925-000000067F000040020001000000005D42F3__0000005CA7BBD6F9-000000739A8D1299\n000000067F000040020001000000005CC000-000000067F000040020001000000005D0000__00000073AD3FE6B8\n000000067F000040020001000000005CC000-000000067F000040020001000000005D0000__000000914E3F38F0\n000000067F000040020001000000005CC000-000000067F000040020001000000005D0000__000000931B9A2710\n000000067F000040020001000000005D0000-000000067F000040020001000000005D4000__00000073AD3FE6B8\n000000067F000040020001000000005D0000-000000067F000040020001000000005D4000__000000914E3F38F0\n000000067F000040020001000000005D0000-000000067F000040020001000000005D4000__000000931B9A2710\n000000067F000040020001000000005D4000-000000067F000040020001000000005D8000__00000073AD3FE6B8\n000000067F000040020001000000005D4000-000000067F000040020001000000005D8000__000000914E3F38F0\n000000067F000040020001000000005D4000-000000067F000040020001000000005D8000__000000931B9A2710\n000000067F000040020001000000005D42F3-000000067F000040020001000000005DCCE4__0000005CA7BBD6F9-000000739A8D1299\n000000067F000040020001000000005D8000-000000067F000040020001000000005DC000__00000073AD3FE6B8\n000000067F000040020001000000005D8000-000000067F000040020001000000005DC000__000000914E3F38F0\n000000067F000040020001000000005D8000-000000067F000040020001000000005DC000__000000931B9A2710\n000000067F000040020001000000005DC000-000000067F000040020001000000005E0000__00000073AD3FE6B8\n000000067F000040020001000000005DC000-000000067F000040020001000000005E0000__000000914E3F38F0\n000000067F000040020001000000005DC000-000000067F000040020001000000005E0000__000000931B9A2710\n000000067F000040020001000000005DCCE4-000000067F000040020001000000005E56DD__0000005CA7BBD6F9-000000739A8D1299\n000000067F000040020001000000005E0000-000000067F000040020001000000005E4000__00000073AD3FE6B8\n000000067F000040020001000000005E0000-000000067F000040020001000000005E4000__000000914E3F38F0\n000000067F000040020001000000005E0000-000000067F000040020001000000005E4000__000000931B9A2710\n000000067F000040020001000000005E4000-000000067F000040020001000000005E8000__00000073AD3FE6B8\n000000067F000040020001000000005E4000-000000067F000040020001000000005E8000__000000914E3F38F0\n000000067F000040020001000000005E4000-000000067F000040020001000000005E8000__000000931B9A2710\n000000067F000040020001000000005E56DD-000000067F000040020001000000005EE0C5__0000005CA7BBD6F9-000000739A8D1299\n000000067F000040020001000000005E8000-000000067F000040020001000000005EC000__00000073AD3FE6B8\n000000067F000040020001000000005E8000-000000067F000040020001000000005EC000__000000914E3F38F0\n000000067F000040020001000000005E8000-000000067F000040020001000000005EC000__000000931B9A2710\n000000067F000040020001000000005EC000-000000067F000040020001000000005F0000__00000073AD3FE6B8\n000000067F000040020001000000005EC000-000000067F000040020001000000005F0000__000000914E3F38F0\n000000067F000040020001000000005EC000-000000067F000040020001000000005F0000__000000931B9A2710\n000000067F000040020001000000005EE0C5-000000067F000040020001000000005F6AA8__0000005CA7BBD6F9-000000739A8D1299\n000000067F000040020001000000005F0000-000000067F000040020001000000005F4000__00000073AD3FE6B8\n000000067F000040020001000000005F0000-000000067F000040020001000000005F4000__000000914E3F38F0\n000000067F000040020001000000005F0000-000000067F000040020001000000005F4000__000000931B9A2710\n000000067F000040020001000000005F4000-000000067F000040020001000000005F8000__00000073AD3FE6B8\n000000067F000040020001000000005F4000-000000067F000040020001000000005F8000__000000914E3F38F0\n000000067F000040020001000000005F4000-000000067F000040020001000000005F8000__000000931B9A2710\n000000067F000040020001000000005F6AA8-000000067F000040020001000000005FF476__0000005CA7BBD6F9-000000739A8D1299\n000000067F000040020001000000005F8000-000000067F000040020001000000005FC000__00000073AD3FE6B8\n000000067F000040020001000000005F8000-000000067F000040020001000000005FC000__000000914E3F38F0\n000000067F000040020001000000005F8000-000000067F000040020001000000005FC000__000000931B9A2710\n000000067F000040020001000000005FC000-000000067F00004002000100000000600000__00000073AD3FE6B8\n000000067F000040020001000000005FC000-000000067F00004002000100000000600000__000000914E3F38F0\n000000067F000040020001000000005FC000-000000067F00004002000100000000600000__000000931B9A2710\n000000067F000040020001000000005FF476-000000067F00004002000100000000607E40__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000600000-000000067F00004002000100000000604000__00000073AD3FE6B8\n000000067F00004002000100000000600000-000000067F00004002000100000000604000__000000914E3F38F0\n000000067F00004002000100000000600000-000000067F00004002000100000000604000__000000931B9A2710\n000000067F00004002000100000000604000-000000067F00004002000100000000608000__00000073AD3FE6B8\n000000067F00004002000100000000604000-000000067F00004002000100000000608000__000000914E3F38F0\n000000067F00004002000100000000604000-000000067F00004002000100000000608000__000000931B9A2710\n000000067F00004002000100000000607E40-000000067F0000400200010000000061081B__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000608000-000000067F0000400200010000000060C000__00000073AD3FE6B8\n000000067F00004002000100000000608000-000000067F0000400200010000000060C000__000000914E3F38F0\n000000067F00004002000100000000608000-000000067F0000400200010000000060C000__000000931B9A2710\n000000067F0000400200010000000060C000-000000067F00004002000100000000610000__00000073AD3FE6B8\n000000067F0000400200010000000060C000-000000067F00004002000100000000610000__000000914E3F38F0\n000000067F0000400200010000000060C000-000000067F00004002000100000000610000__000000931B9A2710\n000000067F00004002000100000000610000-000000067F00004002000100000000614000__00000073AD3FE6B8\n000000067F00004002000100000000610000-000000067F00004002000100000000614000__000000914E3F38F0\n000000067F00004002000100000000610000-000000067F00004002000100000000614000__000000931B9A2710\n000000067F0000400200010000000061081B-000000067F000040020001000000006191FB__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000614000-000000067F00004002000100000000618000__00000073AD3FE6B8\n000000067F00004002000100000000614000-000000067F00004002000100000000618000__000000914E3F38F0\n000000067F00004002000100000000614000-000000067F00004002000100000000618000__000000931B9A2710\n000000067F00004002000100000000618000-000000067F0000400200010000000061C000__00000073AD3FE6B8\n000000067F00004002000100000000618000-000000067F0000400200010000000061C000__000000914E3F38F0\n000000067F00004002000100000000618000-000000067F0000400200010000000061C000__000000931B9A2710\n000000067F000040020001000000006191FB-000000067F00004002000100000000621BEC__0000005CA7BBD6F9-000000739A8D1299\n000000067F0000400200010000000061C000-000000067F00004002000100000000620000__00000073AD3FE6B8\n000000067F0000400200010000000061C000-000000067F00004002000100000000620000__000000914E3F38F0\n000000067F0000400200010000000061C000-000000067F00004002000100000000620000__000000931B9A2710\n000000067F00004002000100000000620000-000000067F00004002000100000000624000__00000073AD3FE6B8\n000000067F00004002000100000000620000-000000067F00004002000100000000624000__000000914E3F38F0\n000000067F00004002000100000000620000-000000067F00004002000100000000624000__000000931B9A2710\n000000067F00004002000100000000621BEC-000000067F0000400200010000000062A5D2__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000624000-000000067F00004002000100000000628000__00000073AD3FE6B8\n000000067F00004002000100000000624000-000000067F00004002000100000000628000__000000914E3F38F0\n000000067F00004002000100000000624000-000000067F00004002000100000000628000__000000931B9A2710\n000000067F00004002000100000000628000-000000067F0000400200010000000062C000__00000073AD3FE6B8\n000000067F00004002000100000000628000-000000067F0000400200010000000062C000__000000914E3F38F0\n000000067F00004002000100000000628000-000000067F0000400200010000000062C000__000000931B9A2710\n000000067F0000400200010000000062A5D2-000000067F00004002000100000000632FB1__0000005CA7BBD6F9-000000739A8D1299\n000000067F0000400200010000000062C000-000000067F00004002000100000000630000__00000073AD3FE6B8\n000000067F0000400200010000000062C000-000000067F00004002000100000000630000__000000914E3F38F0\n000000067F0000400200010000000062C000-000000067F00004002000100000000630000__000000931B9A2710\n000000067F00004002000100000000630000-000000067F00004002000100000000634000__00000073AD3FE6B8\n000000067F00004002000100000000630000-000000067F00004002000100000000634000__000000914E3F38F0\n000000067F00004002000100000000630000-000000067F00004002000100000000634000__000000931B9A2710\n000000067F00004002000100000000632FB1-000000067F0000400200010000000063B985__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000634000-000000067F00004002000100000000638000__00000073AD3FE6B8\n000000067F00004002000100000000634000-000000067F00004002000100000000638000__000000914E3F38F0\n000000067F00004002000100000000634000-000000067F00004002000100000000638000__000000931B9A2710\n000000067F00004002000100000000638000-000000067F0000400200010000000063C000__00000073AD3FE6B8\n000000067F00004002000100000000638000-000000067F0000400200010000000063C000__000000914E3F38F0\n000000067F00004002000100000000638000-000000067F0000400200010000000063C000__000000931B9A2710\n000000067F0000400200010000000063B985-000000067F00004002000100000000644349__0000005CA7BBD6F9-000000739A8D1299\n000000067F0000400200010000000063C000-000000067F00004002000100000000640000__00000073AD3FE6B8\n000000067F0000400200010000000063C000-000000067F00004002000100000000640000__000000914E3F38F0\n000000067F0000400200010000000063C000-000000067F00004002000100000000640000__000000931B9A2710\n000000067F00004002000100000000640000-000000067F00004002000100000000644000__00000073AD3FE6B8\n000000067F00004002000100000000640000-000000067F00004002000100000000644000__000000914E3F38F0\n000000067F00004002000100000000640000-000000067F00004002000100000000644000__000000931B9A2710\n000000067F00004002000100000000644000-000000067F00004002000100000000648000__00000073AD3FE6B8\n000000067F00004002000100000000644000-000000067F00004002000100000000648000__000000914E3F38F0\n000000067F00004002000100000000644000-000000067F00004002000100000000648000__000000931B9A2710\n000000067F00004002000100000000644349-000000067F0000400200010000000064CD2B__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000648000-000000067F0000400200010000000064C000__00000073AD3FE6B8\n000000067F00004002000100000000648000-000000067F0000400200010000000064C000__000000914E3F38F0\n000000067F00004002000100000000648000-000000067F0000400200010000000064C000__000000931B9A2710\n000000067F0000400200010000000064C000-000000067F00004002000100000000650000__00000073AD3FE6B8\n000000067F0000400200010000000064C000-000000067F00004002000100000000650000__000000914E3F38F0\n000000067F0000400200010000000064C000-000000067F00004002000100000000650000__000000931B9A2710\n000000067F0000400200010000000064CD2B-000000067F00004002000100000000655712__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000650000-000000067F00004002000100000000654000__00000073AD3FE6B8\n000000067F00004002000100000000650000-000000067F00004002000100000000654000__000000914E3F38F0\n000000067F00004002000100000000650000-000000067F00004002000100000000654000__000000931B9A2710\n000000067F00004002000100000000654000-000000067F00004002000100000000658000__00000073AD3FE6B8\n000000067F00004002000100000000654000-000000067F00004002000100000000658000__000000914E3F38F0\n000000067F00004002000100000000654000-000000067F00004002000100000000658000__000000931B9A2710\n000000067F00004002000100000000655712-000000067F0000400200010000000065E0F3__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000658000-000000067F0000400200010000000065C000__00000073AD3FE6B8\n000000067F00004002000100000000658000-000000067F0000400200010000000065C000__000000914E3F38F0\n000000067F00004002000100000000658000-000000067F0000400200010000000065C000__000000931B9A2710\n000000067F0000400200010000000065C000-000000067F00004002000100000000660000__00000073AD3FE6B8\n000000067F0000400200010000000065C000-000000067F00004002000100000000660000__000000914E3F38F0\n000000067F0000400200010000000065C000-000000067F00004002000100000000660000__000000931B9A2710\n000000067F0000400200010000000065E0F3-000000067F00004002000100000000666AE2__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000660000-000000067F00004002000100000000664000__00000073AD3FE6B8\n000000067F00004002000100000000660000-000000067F00004002000100000000664000__000000914E3F38F0\n000000067F00004002000100000000660000-000000067F00004002000100000000664000__000000931B9A2710\n000000067F00004002000100000000664000-000000067F00004002000100000000668000__00000073AD3FE6B8\n000000067F00004002000100000000664000-000000067F00004002000100000000668000__000000914E3F38F0\n000000067F00004002000100000000664000-000000067F00004002000100000000668000__000000931B9A2710\n000000067F00004002000100000000666AE2-000000067F0000400200010000000066F4B5__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000668000-000000067F0000400200010000000066C000__00000073AD3FE6B8\n000000067F00004002000100000000668000-000000067F0000400200010000000066C000__000000914E3F38F0\n000000067F00004002000100000000668000-000000067F0000400200010000000066C000__000000931B9A2710\n000000067F0000400200010000000066C000-000000067F00004002000100000000670000__00000073AD3FE6B8\n000000067F0000400200010000000066C000-000000067F00004002000100000000670000__000000914E3F38F0\n000000067F0000400200010000000066C000-000000067F00004002000100000000670000__000000931B9A2710\n000000067F0000400200010000000066F4B5-000000067F00004002000100000000677E81__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000670000-000000067F00004002000100000000674000__00000073AD3FE6B8\n000000067F00004002000100000000670000-000000067F00004002000100000000674000__000000914E3F38F0\n000000067F00004002000100000000670000-000000067F00004002000100000000674000__000000931B9A2710\n000000067F00004002000100000000674000-000000067F00004002000100000000678000__00000073AD3FE6B8\n000000067F00004002000100000000674000-000000067F00004002000100000000678000__000000914E3F38F0\n000000067F00004002000100000000674000-000000067F00004002000100000000678000__000000931B9A2710\n000000067F00004002000100000000677E81-000000067F0000400200010000000068083C__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000678000-000000067F0000400200010000000067C000__00000073AD3FE6B8\n000000067F00004002000100000000678000-000000067F0000400200010000000067C000__000000914E3F38F0\n000000067F00004002000100000000678000-000000067F0000400200010000000067C000__000000931B9A2710\n000000067F0000400200010000000067C000-000000067F00004002000100000000680000__00000073AD3FE6B8\n000000067F0000400200010000000067C000-000000067F00004002000100000000680000__000000914E3F38F0\n000000067F0000400200010000000067C000-000000067F00004002000100000000680000__000000931B9A2710\n000000067F00004002000100000000680000-000000067F00004002000100000000684000__00000073AD3FE6B8\n000000067F00004002000100000000680000-000000067F00004002000100000000684000__000000914E3F38F0\n000000067F00004002000100000000680000-000000067F00004002000100000000684000__000000931B9A2710\n000000067F0000400200010000000068083C-000000067F00004002000100000000689223__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000684000-000000067F00004002000100000000688000__00000073AD3FE6B8\n000000067F00004002000100000000684000-000000067F00004002000100000000688000__000000914E3F38F0\n000000067F00004002000100000000684000-000000067F00004002000100000000688000__000000931B9A2710\n000000067F00004002000100000000688000-000000067F0000400200010000000068C000__00000073AD3FE6B8\n000000067F00004002000100000000688000-000000067F0000400200010000000068C000__000000914E3F38F0\n000000067F00004002000100000000688000-000000067F0000400200010000000068C000__000000931B9A2710\n000000067F00004002000100000000689223-000000067F00004002000100000000691C08__0000005CA7BBD6F9-000000739A8D1299\n000000067F0000400200010000000068C000-000000067F00004002000100000000690000__00000073AD3FE6B8\n000000067F0000400200010000000068C000-000000067F00004002000100000000690000__000000914E3F38F0\n000000067F0000400200010000000068C000-000000067F00004002000100000000690000__000000931B9A2710\n000000067F00004002000100000000690000-000000067F00004002000100000000694000__00000073AD3FE6B8\n000000067F00004002000100000000690000-000000067F00004002000100000000694000__000000914E3F38F0\n000000067F00004002000100000000690000-000000067F00004002000100000000694000__000000931B9A2710\n000000067F00004002000100000000691C08-000000067F0000400200010000000069A5E4__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000694000-000000067F00004002000100000000698000__00000073AD3FE6B8\n000000067F00004002000100000000694000-000000067F00004002000100000000698000__000000914E3F38F0\n000000067F00004002000100000000694000-000000067F00004002000100000000698000__000000931B9A2710\n000000067F00004002000100000000698000-000000067F0000400200010000000069C000__00000073AD3FE6B8\n000000067F00004002000100000000698000-000000067F0000400200010000000069C000__000000914E3F38F0\n000000067F00004002000100000000698000-000000067F0000400200010000000069C000__000000931B9A2710\n000000067F0000400200010000000069A5E4-000000067F000040020001000000006A2FC5__0000005CA7BBD6F9-000000739A8D1299\n000000067F0000400200010000000069C000-000000067F000040020001000000006A0000__00000073AD3FE6B8\n000000067F0000400200010000000069C000-000000067F000040020001000000006A0000__000000914E3F38F0\n000000067F0000400200010000000069C000-000000067F000040020001000000006A0000__000000931B9A2710\n000000067F000040020001000000006A0000-000000067F000040020001000000006A4000__00000073AD3FE6B8\n000000067F000040020001000000006A0000-000000067F000040020001000000006A4000__000000914E3F38F0\n000000067F000040020001000000006A0000-000000067F000040020001000000006A4000__000000931B9A2710\n000000067F000040020001000000006A2FC5-000000067F000040020001000000006AB99F__0000005CA7BBD6F9-000000739A8D1299\n000000067F000040020001000000006A4000-000000067F000040020001000000006A8000__00000073AD3FE6B8\n000000067F000040020001000000006A4000-000000067F000040020001000000006A8000__000000914E3F38F0\n000000067F000040020001000000006A4000-000000067F000040020001000000006A8000__000000931B9A2710\n000000067F000040020001000000006A8000-000000067F000040020001000000006AC000__00000073AD3FE6B8\n000000067F000040020001000000006A8000-000000067F000040020001000000006AC000__000000914E3F38F0\n000000067F000040020001000000006A8000-000000067F000040020001000000006AC000__000000931B9A2710\n000000067F000040020001000000006AB99F-000000067F000040020001000000006B4375__0000005CA7BBD6F9-000000739A8D1299\n000000067F000040020001000000006AC000-000000067F000040020001000000006B0000__00000073AD3FE6B8\n000000067F000040020001000000006AC000-000000067F000040020001000000006B0000__000000914E3F38F0\n000000067F000040020001000000006AC000-000000067F000040020001000000006B0000__000000931B9A2710\n000000067F000040020001000000006B0000-000000067F000040020001000000006B4000__00000073AD3FE6B8\n000000067F000040020001000000006B0000-000000067F000040020001000000006B4000__000000914E3F38F0\n000000067F000040020001000000006B0000-000000067F000040020001000000006B4000__000000931B9A2710\n000000067F000040020001000000006B4000-000000067F000040020001000000006B8000__00000073AD3FE6B8\n000000067F000040020001000000006B4000-000000067F000040020001000000006B8000__000000914E3F38F0\n000000067F000040020001000000006B4000-000000067F000040020001000000006B8000__000000931B9A2710\n000000067F000040020001000000006B4375-000000067F000040020001000000006BCD3D__0000005CA7BBD6F9-000000739A8D1299\n000000067F000040020001000000006B8000-000000067F000040020001000000006BC000__00000073AD3FE6B8\n000000067F000040020001000000006B8000-000000067F000040020001000000006BC000__000000914E3F38F0\n000000067F000040020001000000006B8000-000000067F000040020001000000006BC000__000000931B9A2710\n000000067F000040020001000000006BC000-000000067F000040020001000000006C0000__00000073AD3FE6B8\n000000067F000040020001000000006BC000-000000067F000040020001000000006C0000__000000914E3F38F0\n000000067F000040020001000000006BC000-000000067F000040020001000000006C0000__000000931B9A2710\n000000067F000040020001000000006BCD3D-000000067F000040020001000000006C571E__0000005CA7BBD6F9-000000739A8D1299\n000000067F000040020001000000006C0000-000000067F000040020001000000006C4000__00000073AD3FE6B8\n000000067F000040020001000000006C0000-000000067F000040020001000000006C4000__000000914E3F38F0\n000000067F000040020001000000006C0000-000000067F000040020001000000006C4000__000000931B9A2710\n000000067F000040020001000000006C4000-000000067F000040020001000000006C8000__00000073AD3FE6B8\n000000067F000040020001000000006C4000-000000067F000040020001000000006C8000__000000914E3F38F0\n000000067F000040020001000000006C4000-000000067F000040020001000000006C8000__000000931B9A2710\n000000067F000040020001000000006C571E-000000067F000040020001000000006CE101__0000005CA7BBD6F9-000000739A8D1299\n000000067F000040020001000000006C8000-000000067F000040020001000000006CC000__00000073AD3FE6B8\n000000067F000040020001000000006C8000-000000067F000040020001000000006CC000__000000914E3F38F0\n000000067F000040020001000000006C8000-000000067F000040020001000000006CC000__000000931B9A2710\n000000067F000040020001000000006CC000-000000067F000040020001000000006D0000__00000073AD3FE6B8\n000000067F000040020001000000006CC000-000000067F000040020001000000006D0000__000000914E3F38F0\n000000067F000040020001000000006CC000-000000067F000040020001000000006D0000__000000931B9A2710\n000000067F000040020001000000006CE101-000000067F000040020001000000006D6AD7__0000005CA7BBD6F9-000000739A8D1299\n000000067F000040020001000000006D0000-000000067F000040020001000000006D4000__00000073AD3FE6B8\n000000067F000040020001000000006D0000-000000067F000040020001000000006D4000__000000914E3F38F0\n000000067F000040020001000000006D0000-000000067F000040020001000000006D4000__000000931B9A2710\n000000067F000040020001000000006D4000-000000067F000040020001000000006D8000__00000073AD3FE6B8\n000000067F000040020001000000006D4000-000000067F000040020001000000006D8000__000000914E3F38F0\n000000067F000040020001000000006D4000-000000067F000040020001000000006D8000__000000931B9A2710\n000000067F000040020001000000006D6AD7-000000067F000040020001000000006DF4B3__0000005CA7BBD6F9-000000739A8D1299\n000000067F000040020001000000006D8000-000000067F000040020001000000006DC000__00000073AD3FE6B8\n000000067F000040020001000000006D8000-000000067F000040020001000000006DC000__000000914E3F38F0\n000000067F000040020001000000006D8000-000000067F000040020001000000006DC000__000000931B9A2710\n000000067F000040020001000000006DC000-000000067F000040020001000000006E0000__00000073AD3FE6B8\n000000067F000040020001000000006DC000-000000067F000040020001000000006E0000__000000914E3F38F0\n000000067F000040020001000000006DC000-000000067F000040020001000000006E0000__000000931B9A2710\n000000067F000040020001000000006DF4B3-000000067F000040020001000000006E7E8D__0000005CA7BBD6F9-000000739A8D1299\n000000067F000040020001000000006E0000-000000067F000040020001000000006E4000__00000073AD3FE6B8\n000000067F000040020001000000006E0000-000000067F000040020001000000006E4000__000000914E3F38F0\n000000067F000040020001000000006E0000-000000067F000040020001000000006E4000__000000931B9A2710\n000000067F000040020001000000006E4000-000000067F000040020001000000006E8000__00000073AD3FE6B8\n000000067F000040020001000000006E4000-000000067F000040020001000000006E8000__000000914E3F38F0\n000000067F000040020001000000006E4000-000000067F000040020001000000006E8000__000000931B9A2710\n000000067F000040020001000000006E7E8D-000000067F000040020001000000006F0867__0000005CA7BBD6F9-000000739A8D1299\n000000067F000040020001000000006E8000-000000067F000040020001000000006EC000__00000073AD3FE6B8\n000000067F000040020001000000006E8000-000000067F000040020001000000006EC000__000000914E3F38F0\n000000067F000040020001000000006E8000-000000067F000040020001000000006EC000__000000931B9A2710\n000000067F000040020001000000006EC000-000000067F000040020001000000006F0000__00000073AD3FE6B8\n000000067F000040020001000000006EC000-000000067F000040020001000000006F0000__000000914E3F38F0\n000000067F000040020001000000006EC000-000000067F000040020001000000006F0000__000000931B9A2710\n000000067F000040020001000000006F0000-000000067F000040020001000000006F4000__00000073AD3FE6B8\n000000067F000040020001000000006F0000-000000067F000040020001000000006F4000__000000914E3F38F0\n000000067F000040020001000000006F0000-000000067F000040020001000000006F4000__000000931B9A2710\n000000067F000040020001000000006F0867-000000067F000040020001000000006F923B__0000005CA7BBD6F9-000000739A8D1299\n000000067F000040020001000000006F4000-000000067F000040020001000000006F8000__00000073AD3FE6B8\n000000067F000040020001000000006F4000-000000067F000040020001000000006F8000__000000914E3F38F0\n000000067F000040020001000000006F4000-000000067F000040020001000000006F8000__000000931B9A2710\n000000067F000040020001000000006F8000-000000067F000040020001000000006FC000__00000073AD3FE6B8\n000000067F000040020001000000006F8000-000000067F000040020001000000006FC000__000000914E3F38F0\n000000067F000040020001000000006F8000-000000067F000040020001000000006FC000__000000931B9A2710\n000000067F000040020001000000006F923B-000000067F00004002000100000000701C1C__0000005CA7BBD6F9-000000739A8D1299\n000000067F000040020001000000006FC000-000000067F00004002000100000000700000__00000073AD3FE6B8\n000000067F000040020001000000006FC000-000000067F00004002000100000000700000__000000914E3F38F0\n000000067F000040020001000000006FC000-000000067F00004002000100000000700000__000000931B9A2710\n000000067F00004002000100000000700000-000000067F00004002000100000000704000__00000073AD3FE6B8\n000000067F00004002000100000000700000-000000067F00004002000100000000704000__000000914E3F38F0\n000000067F00004002000100000000700000-000000067F00004002000100000000704000__000000931B9A2710\n000000067F00004002000100000000701C1C-000000067F0000400200010000000070A601__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000704000-000000067F00004002000100000000708000__00000073AD3FE6B8\n000000067F00004002000100000000704000-000000067F00004002000100000000708000__000000914E3F38F0\n000000067F00004002000100000000704000-000000067F00004002000100000000708000__000000931B9A2710\n000000067F00004002000100000000708000-000000067F0000400200010000000070C000__00000073AD3FE6B8\n000000067F00004002000100000000708000-000000067F0000400200010000000070C000__000000914E3F38F0\n000000067F00004002000100000000708000-000000067F0000400200010000000070C000__000000931B9A2710\n000000067F0000400200010000000070A601-000000067F00004002000100000000712FD4__0000005CA7BBD6F9-000000739A8D1299\n000000067F0000400200010000000070C000-000000067F00004002000100000000710000__00000073AD3FE6B8\n000000067F0000400200010000000070C000-000000067F00004002000100000000710000__000000914E3F38F0\n000000067F0000400200010000000070C000-000000067F00004002000100000000710000__000000931B9A2710\n000000067F00004002000100000000710000-000000067F00004002000100000000714000__00000073AD3FE6B8\n000000067F00004002000100000000710000-000000067F00004002000100000000714000__000000914E3F38F0\n000000067F00004002000100000000710000-000000067F00004002000100000000714000__000000931B9A2710\n000000067F00004002000100000000712FD4-000000067F0000400200010000000071B9B4__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000714000-000000067F00004002000100000000718000__00000073AD3FE6B8\n000000067F00004002000100000000714000-000000067F00004002000100000000718000__000000914E3F38F0\n000000067F00004002000100000000714000-000000067F00004002000100000000718000__000000931B9A2710\n000000067F00004002000100000000718000-000000067F0000400200010000000071C000__00000073AD3FE6B8\n000000067F00004002000100000000718000-000000067F0000400200010000000071C000__000000914E3F38F0\n000000067F00004002000100000000718000-000000067F0000400200010000000071C000__000000931B9A2710\n000000067F0000400200010000000071B9B4-000000067F00004002000100000000724391__0000005CA7BBD6F9-000000739A8D1299\n000000067F0000400200010000000071C000-000000067F00004002000100000000720000__00000073AD3FE6B8\n000000067F0000400200010000000071C000-000000067F00004002000100000000720000__000000914E3F38F0\n000000067F0000400200010000000071C000-000000067F00004002000100000000720000__000000931B9A2710\n000000067F00004002000100000000720000-000000067F00004002000100000000724000__00000073AD3FE6B8\n000000067F00004002000100000000720000-000000067F00004002000100000000724000__000000914E3F38F0\n000000067F00004002000100000000720000-000000067F00004002000100000000724000__000000931B9A2710\n000000067F00004002000100000000724000-000000067F00004002000100000000728000__00000073AD3FE6B8\n000000067F00004002000100000000724000-000000067F00004002000100000000728000__000000914E3F38F0\n000000067F00004002000100000000724000-000000067F00004002000100000000728000__000000931B9A2710\n000000067F00004002000100000000724391-000000067F0000400200010000000072CD55__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000728000-000000067F0000400200010000000072C000__00000073AD3FE6B8\n000000067F00004002000100000000728000-000000067F0000400200010000000072C000__000000914E3F38F0\n000000067F00004002000100000000728000-000000067F0000400200010000000072C000__000000931B9A2710\n000000067F0000400200010000000072C000-000000067F00004002000100000000730000__00000073AD3FE6B8\n000000067F0000400200010000000072C000-000000067F00004002000100000000730000__000000914E3F38F0\n000000067F0000400200010000000072C000-000000067F00004002000100000000730000__000000931B9A2710\n000000067F0000400200010000000072CD55-000000067F00004002000100000000735725__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000730000-000000067F00004002000100000000734000__00000073AD3FE6B8\n000000067F00004002000100000000730000-000000067F00004002000100000000734000__000000914E3F38F0\n000000067F00004002000100000000730000-000000067F00004002000100000000734000__000000931B9A2710\n000000067F00004002000100000000734000-000000067F00004002000100000000738000__00000073AD3FE6B8\n000000067F00004002000100000000734000-000000067F00004002000100000000738000__000000914E3F38F0\n000000067F00004002000100000000734000-000000067F00004002000100000000738000__000000931B9A2710\n000000067F00004002000100000000735725-000000067F0000400200010000000073E109__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000738000-000000067F0000400200010000000073C000__00000073AD3FE6B8\n000000067F00004002000100000000738000-000000067F0000400200010000000073C000__000000914E3F38F0\n000000067F00004002000100000000738000-000000067F0000400200010000000073C000__000000931B9A2710\n000000067F0000400200010000000073C000-000000067F00004002000100000000740000__00000073AD3FE6B8\n000000067F0000400200010000000073C000-000000067F00004002000100000000740000__000000914E3F38F0\n000000067F0000400200010000000073C000-000000067F00004002000100000000740000__000000931B9A2710\n000000067F0000400200010000000073E109-000000067F00004002000100000000746AE4__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000740000-000000067F00004002000100000000744000__00000073AD3FE6B8\n000000067F00004002000100000000740000-000000067F00004002000100000000744000__000000914E3F38F0\n000000067F00004002000100000000740000-000000067F00004002000100000000744000__000000931B9A2710\n000000067F00004002000100000000744000-000000067F00004002000100000000748000__00000073AD3FE6B8\n000000067F00004002000100000000744000-000000067F00004002000100000000748000__000000914E3F38F0\n000000067F00004002000100000000744000-000000067F00004002000100000000748000__000000931B9A2710\n000000067F00004002000100000000746AE4-000000067F0000400200010000000074F4C9__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000748000-000000067F0000400200010000000074C000__00000073AD3FE6B8\n000000067F00004002000100000000748000-000000067F0000400200010000000074C000__000000914E3F38F0\n000000067F00004002000100000000748000-000000067F0000400200010000000074C000__000000931B9A2710\n000000067F0000400200010000000074C000-000000067F00004002000100000000750000__00000073AD3FE6B8\n000000067F0000400200010000000074C000-000000067F00004002000100000000750000__000000914E3F38F0\n000000067F0000400200010000000074C000-000000067F00004002000100000000750000__000000931B9A2710\n000000067F0000400200010000000074F4C9-000000067F00004002000100000000757E9F__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000750000-000000067F00004002000100000000754000__00000073AD3FE6B8\n000000067F00004002000100000000750000-000000067F00004002000100000000754000__000000914E3F38F0\n000000067F00004002000100000000750000-000000067F00004002000100000000754000__000000931B9A2710\n000000067F00004002000100000000754000-000000067F00004002000100000000758000__00000073AD3FE6B8\n000000067F00004002000100000000754000-000000067F00004002000100000000758000__000000914E3F38F0\n000000067F00004002000100000000754000-000000067F00004002000100000000758000__000000931B9A2710\n000000067F00004002000100000000757E9F-000000067F00004002000100000000760874__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000758000-000000067F0000400200010000000075C000__00000073AD3FE6B8\n000000067F00004002000100000000758000-000000067F0000400200010000000075C000__000000914E3F38F0\n000000067F00004002000100000000758000-000000067F0000400200010000000075C000__000000931B9A2710\n000000067F0000400200010000000075C000-000000067F00004002000100000000760000__00000073AD3FE6B8\n000000067F0000400200010000000075C000-000000067F00004002000100000000760000__000000914E3F38F0\n000000067F0000400200010000000075C000-000000067F00004002000100000000760000__000000931B9A2710\n000000067F00004002000100000000760000-000000067F00004002000100000000764000__00000073AD3FE6B8\n000000067F00004002000100000000760000-000000067F00004002000100000000764000__000000914E3F38F0\n000000067F00004002000100000000760000-000000067F00004002000100000000764000__000000931B9A2710\n000000067F00004002000100000000760874-000000067F0000400200010000000076924C__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000764000-000000067F00004002000100000000768000__00000073AD3FE6B8\n000000067F00004002000100000000764000-000000067F00004002000100000000768000__000000914E3F38F0\n000000067F00004002000100000000764000-000000067F00004002000100000000768000__000000931B9A2710\n000000067F00004002000100000000768000-000000067F0000400200010000000076C000__00000073AD3FE6B8\n000000067F00004002000100000000768000-000000067F0000400200010000000076C000__000000914E3F38F0\n000000067F00004002000100000000768000-000000067F0000400200010000000076C000__000000931B9A2710\n000000067F0000400200010000000076924C-000000067F00004002000100000000771C36__0000005CA7BBD6F9-000000739A8D1299\n000000067F0000400200010000000076C000-000000067F00004002000100000000770000__00000073AD3FE6B8\n000000067F0000400200010000000076C000-000000067F00004002000100000000770000__000000914E3F38F0\n000000067F0000400200010000000076C000-000000067F00004002000100000000770000__000000931B9A2710\n000000067F00004002000100000000770000-000000067F00004002000100000000774000__00000073AD3FE6B8\n000000067F00004002000100000000770000-000000067F00004002000100000000774000__000000914E3F38F0\n000000067F00004002000100000000770000-000000067F00004002000100000000774000__000000931B9A2710\n000000067F00004002000100000000771C36-000000067F0000400200010000000077A601__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000774000-000000067F00004002000100000000778000__00000073AD3FE6B8\n000000067F00004002000100000000774000-000000067F00004002000100000000778000__000000914E3F38F0\n000000067F00004002000100000000774000-000000067F00004002000100000000778000__000000931B9A2710\n000000067F00004002000100000000778000-000000067F0000400200010000000077C000__00000073AD3FE6B8\n000000067F00004002000100000000778000-000000067F0000400200010000000077C000__000000914E3F38F0\n000000067F00004002000100000000778000-000000067F0000400200010000000077C000__000000931B9A2710\n000000067F0000400200010000000077A601-000000067F00004002000100000000782FCF__0000005CA7BBD6F9-000000739A8D1299\n000000067F0000400200010000000077C000-000000067F00004002000100000000780000__00000073AD3FE6B8\n000000067F0000400200010000000077C000-000000067F00004002000100000000780000__000000914E3F38F0\n000000067F0000400200010000000077C000-000000067F00004002000100000000780000__000000931B9A2710\n000000067F00004002000100000000780000-000000067F00004002000100000000784000__00000073AD3FE6B8\n000000067F00004002000100000000780000-000000067F00004002000100000000784000__000000914E3F38F0\n000000067F00004002000100000000780000-000000067F00004002000100000000784000__000000931B9A2710\n000000067F00004002000100000000782FCF-000000067F0000400200010000000078B9BA__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000784000-000000067F00004002000100000000788000__00000073AD3FE6B8\n000000067F00004002000100000000784000-000000067F00004002000100000000788000__000000914E3F38F0\n000000067F00004002000100000000784000-000000067F00004002000100000000788000__000000931B9A2710\n000000067F00004002000100000000788000-000000067F0000400200010000000078C000__00000073AD3FE6B8\n000000067F00004002000100000000788000-000000067F0000400200010000000078C000__000000914E3F38F0\n000000067F00004002000100000000788000-000000067F0000400200010000000078C000__000000931B9A2710\n000000067F0000400200010000000078B9BA-000000067F0000400200010000000079439A__0000005CA7BBD6F9-000000739A8D1299\n000000067F0000400200010000000078C000-000000067F00004002000100000000790000__00000073AD3FE6B8\n000000067F0000400200010000000078C000-000000067F00004002000100000000790000__000000914E3F38F0\n000000067F0000400200010000000078C000-000000067F00004002000100000000790000__000000931B9A2710\n000000067F00004002000100000000790000-000000067F00004002000100000000794000__00000073AD3FE6B8\n000000067F00004002000100000000790000-000000067F00004002000100000000794000__000000914E3F38F0\n000000067F00004002000100000000790000-000000067F00004002000100000000794000__000000931B9A2710\n000000067F00004002000100000000794000-000000067F00004002000100000000798000__00000073AD3FE6B8\n000000067F00004002000100000000794000-000000067F00004002000100000000798000__000000914E3F38F0\n000000067F00004002000100000000794000-000000067F00004002000100000000798000__000000931B9A2710\n000000067F0000400200010000000079439A-000000067F0000400200010000000079CD75__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000798000-000000067F0000400200010000000079C000__00000073AD3FE6B8\n000000067F00004002000100000000798000-000000067F0000400200010000000079C000__000000914E3F38F0\n000000067F00004002000100000000798000-000000067F0000400200010000000079C000__000000931B9A2710\n000000067F0000400200010000000079C000-000000067F000040020001000000007A0000__00000073AD3FE6B8\n000000067F0000400200010000000079C000-000000067F000040020001000000007A0000__000000914E3F38F0\n000000067F0000400200010000000079C000-000000067F000040020001000000007A0000__000000931B9A2710\n000000067F0000400200010000000079CD75-000000067F000040020001000000007A5758__0000005CA7BBD6F9-000000739A8D1299\n000000067F000040020001000000007A0000-000000067F000040020001000000007A4000__00000073AD3FE6B8\n000000067F000040020001000000007A0000-000000067F000040020001000000007A4000__000000914E3F38F0\n000000067F000040020001000000007A0000-000000067F000040020001000000007A4000__000000931B9A2710\n000000067F000040020001000000007A4000-000000067F000040020001000000007A8000__00000073AD3FE6B8\n000000067F000040020001000000007A4000-000000067F000040020001000000007A8000__000000914E3F38F0\n000000067F000040020001000000007A4000-000000067F000040020001000000007A8000__000000931B9A2710\n000000067F000040020001000000007A5758-000000067F000040020001000000007AE12F__0000005CA7BBD6F9-000000739A8D1299\n000000067F000040020001000000007A8000-000000067F000040020001000000007AC000__00000073AD3FE6B8\n000000067F000040020001000000007A8000-000000067F000040020001000000007AC000__000000914E3F38F0\n000000067F000040020001000000007A8000-000000067F000040020001000000007AC000__000000931B9A2710\n000000067F000040020001000000007AC000-000000067F000040020001000000007B0000__00000073AD3FE6B8\n000000067F000040020001000000007AC000-000000067F000040020001000000007B0000__000000914E3F38F0\n000000067F000040020001000000007AC000-000000067F000040020001000000007B0000__000000931B9A2710\n000000067F000040020001000000007AE12F-000000067F000040020001000000007B6B09__0000005CA7BBD6F9-000000739A8D1299\n000000067F000040020001000000007B0000-000000067F000040020001000000007B4000__00000073AD3FE6B8\n000000067F000040020001000000007B0000-000000067F000040020001000000007B4000__000000914E3F38F0\n000000067F000040020001000000007B0000-000000067F000040020001000000007B4000__000000931B9A2710\n000000067F000040020001000000007B4000-000000067F000040020001000000007B8000__00000073AD3FE6B8\n000000067F000040020001000000007B4000-000000067F000040020001000000007B8000__000000914E3F38F0\n000000067F000040020001000000007B4000-000000067F000040020001000000007B8000__000000931B9A2710\n000000067F000040020001000000007B6B09-000000067F000040020001000000007BF4E1__0000005CA7BBD6F9-000000739A8D1299\n000000067F000040020001000000007B8000-000000067F000040020001000000007BC000__00000073AD3FE6B8\n000000067F000040020001000000007B8000-000000067F000040020001000000007BC000__000000914E3F38F0\n000000067F000040020001000000007B8000-000000067F000040020001000000007BC000__000000931B9A2710\n000000067F000040020001000000007BC000-000000067F000040020001000000007C0000__00000073AD3FE6B8\n000000067F000040020001000000007BC000-000000067F000040020001000000007C0000__000000914E3F38F0\n000000067F000040020001000000007BC000-000000067F000040020001000000007C0000__000000931B9A2710\n000000067F000040020001000000007BF4E1-000000067F000040020001000000007C7EBE__0000005CA7BBD6F9-000000739A8D1299\n000000067F000040020001000000007C0000-000000067F000040020001000000007C4000__00000073AD3FE6B8\n000000067F000040020001000000007C0000-000000067F000040020001000000007C4000__000000914E3F38F0\n000000067F000040020001000000007C0000-000000067F000040020001000000007C4000__000000931B9A2710\n000000067F000040020001000000007C4000-000000067F000040020001000000007C8000__00000073AD3FE6B8\n000000067F000040020001000000007C4000-000000067F000040020001000000007C8000__000000914E3F38F0\n000000067F000040020001000000007C4000-000000067F000040020001000000007C8000__000000931B9A2710\n000000067F000040020001000000007C7EBE-000000067F000040020001000000007D0891__0000005CA7BBD6F9-000000739A8D1299\n000000067F000040020001000000007C8000-000000067F000040020001000000007CC000__00000073AD3FE6B8\n000000067F000040020001000000007C8000-000000067F000040020001000000007CC000__000000914E3F38F0\n000000067F000040020001000000007C8000-000000067F000040020001000000007CC000__000000931B9A2710\n000000067F000040020001000000007CC000-000000067F000040020001000000007D0000__00000073AD3FE6B8\n000000067F000040020001000000007CC000-000000067F000040020001000000007D0000__000000914E3F38F0\n000000067F000040020001000000007CC000-000000067F000040020001000000007D0000__000000931B9A2710\n000000067F000040020001000000007D0000-000000067F000040020001000000007D4000__00000073AD3FE6B8\n000000067F000040020001000000007D0000-000000067F000040020001000000007D4000__000000914E3F38F0\n000000067F000040020001000000007D0000-000000067F000040020001000000007D4000__000000931B9A2710\n000000067F000040020001000000007D0891-000000067F000040020001000000007D926D__0000005CA7BBD6F9-000000739A8D1299\n000000067F000040020001000000007D4000-000000067F000040020001000000007D8000__00000073AD3FE6B8\n000000067F000040020001000000007D4000-000000067F000040020001000000007D8000__000000914E3F38F0\n000000067F000040020001000000007D4000-000000067F000040020001000000007D8000__000000931B9A2710\n000000067F000040020001000000007D8000-000000067F000040020001000000007DC000__00000073AD3FE6B8\n000000067F000040020001000000007D8000-000000067F000040020001000000007DC000__000000914E3F38F0\n000000067F000040020001000000007D8000-000000067F000040020001000000007DC000__000000931B9A2710\n000000067F000040020001000000007D926D-000000067F000040020001000000007E1C45__0000005CA7BBD6F9-000000739A8D1299\n000000067F000040020001000000007DC000-000000067F000040020001000000007E0000__00000073AD3FE6B8\n000000067F000040020001000000007DC000-000000067F000040020001000000007E0000__000000914E3F38F0\n000000067F000040020001000000007DC000-000000067F000040020001000000007E0000__000000931B9A2710\n000000067F000040020001000000007E0000-000000067F000040020001000000007E4000__00000073AD3FE6B8\n000000067F000040020001000000007E0000-000000067F000040020001000000007E4000__000000914E3F38F0\n000000067F000040020001000000007E0000-000000067F000040020001000000007E4000__000000931B9A2710\n000000067F000040020001000000007E1C45-000000067F000040020001000000007EA622__0000005CA7BBD6F9-000000739A8D1299\n000000067F000040020001000000007E4000-000000067F000040020001000000007E8000__00000073AD3FE6B8\n000000067F000040020001000000007E4000-000000067F000040020001000000007E8000__000000914E3F38F0\n000000067F000040020001000000007E4000-000000067F000040020001000000007E8000__000000931B9A2710\n000000067F000040020001000000007E8000-000000067F000040020001000000007EC000__00000073AD3FE6B8\n000000067F000040020001000000007E8000-000000067F000040020001000000007EC000__000000914E3F38F0\n000000067F000040020001000000007E8000-000000067F000040020001000000007EC000__000000931B9A2710\n000000067F000040020001000000007EA622-000000067F000040020001000000007F2FFC__0000005CA7BBD6F9-000000739A8D1299\n000000067F000040020001000000007EC000-000000067F000040020001000000007F0000__00000073AD3FE6B8\n000000067F000040020001000000007EC000-000000067F000040020001000000007F0000__000000914E3F38F0\n000000067F000040020001000000007EC000-000000067F000040020001000000007F0000__000000931B9A2710\n000000067F000040020001000000007F0000-000000067F000040020001000000007F4000__00000073AD3FE6B8\n000000067F000040020001000000007F0000-000000067F000040020001000000007F4000__000000914E3F38F0\n000000067F000040020001000000007F0000-000000067F000040020001000000007F4000__000000931B9A2710\n000000067F000040020001000000007F2FFC-000000067F000040020001000000007FB9E5__0000005CA7BBD6F9-000000739A8D1299\n000000067F000040020001000000007F4000-000000067F000040020001000000007F8000__00000073AD3FE6B8\n000000067F000040020001000000007F4000-000000067F000040020001000000007F8000__000000914E3F38F0\n000000067F000040020001000000007F4000-000000067F000040020001000000007F8000__000000931B9A2710\n000000067F000040020001000000007F8000-000000067F000040020001000000007FC000__00000073AD3FE6B8\n000000067F000040020001000000007F8000-000000067F000040020001000000007FC000__000000914E3F38F0\n000000067F000040020001000000007F8000-000000067F000040020001000000007FC000__000000931B9A2710\n000000067F000040020001000000007FB9E5-000000067F000040020001000000008043C6__0000005CA7BBD6F9-000000739A8D1299\n000000067F000040020001000000007FC000-000000067F00004002000100000000800000__00000073AD3FE6B8\n000000067F000040020001000000007FC000-000000067F00004002000100000000800000__000000914E3F38F0\n000000067F000040020001000000007FC000-000000067F00004002000100000000800000__000000931B9A2710\n000000067F00004002000100000000800000-000000067F00004002000100000000804000__00000073AD3FE6B8\n000000067F00004002000100000000800000-000000067F00004002000100000000804000__000000914E3F38F0\n000000067F00004002000100000000800000-000000067F00004002000100000000804000__000000931B9A2710\n000000067F00004002000100000000804000-000000067F00004002000100000000808000__00000073AD3FE6B8\n000000067F00004002000100000000804000-000000067F00004002000100000000808000__000000914E3F38F0\n000000067F00004002000100000000804000-000000067F00004002000100000000808000__000000931B9A2710\n000000067F000040020001000000008043C6-000000067F0000400200010000000080CD9F__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000808000-000000067F0000400200010000000080C000__00000073AD3FE6B8\n000000067F00004002000100000000808000-000000067F0000400200010000000080C000__000000914E3F38F0\n000000067F00004002000100000000808000-000000067F0000400200010000000080C000__000000931B9A2710\n000000067F0000400200010000000080C000-000000067F00004002000100000000810000__00000073AD3FE6B8\n000000067F0000400200010000000080C000-000000067F00004002000100000000810000__000000914E3F38F0\n000000067F0000400200010000000080C000-000000067F00004002000100000000810000__000000931B9A2710\n000000067F0000400200010000000080CD9F-000000067F00004002000100000000815785__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000810000-000000067F00004002000100000000814000__00000073AD3FE6B8\n000000067F00004002000100000000810000-000000067F00004002000100000000814000__000000914E3F38F0\n000000067F00004002000100000000810000-000000067F00004002000100000000814000__000000931B9A2710\n000000067F00004002000100000000814000-000000067F00004002000100000000818000__00000073AD3FE6B8\n000000067F00004002000100000000814000-000000067F00004002000100000000818000__000000914E3F38F0\n000000067F00004002000100000000814000-000000067F00004002000100000000818000__000000931B9A2710\n000000067F00004002000100000000815785-000000067F0000400200010000000081E161__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000818000-000000067F0000400200010000000081C000__00000073AD3FE6B8\n000000067F00004002000100000000818000-000000067F0000400200010000000081C000__000000914E3F38F0\n000000067F00004002000100000000818000-000000067F0000400200010000000081C000__000000931B9A2710\n000000067F0000400200010000000081C000-000000067F00004002000100000000820000__00000073AD3FE6B8\n000000067F0000400200010000000081C000-000000067F00004002000100000000820000__000000914E3F38F0\n000000067F0000400200010000000081C000-000000067F00004002000100000000820000__000000931B9A2710\n000000067F0000400200010000000081E161-000000067F00004002000100000000826B3A__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000820000-000000067F00004002000100000000824000__00000073AD3FE6B8\n000000067F00004002000100000000820000-000000067F00004002000100000000824000__000000914E3F38F0\n000000067F00004002000100000000820000-000000067F00004002000100000000824000__000000931B9A2710\n000000067F00004002000100000000824000-000000067F00004002000100000000828000__00000073AD3FE6B8\n000000067F00004002000100000000824000-000000067F00004002000100000000828000__000000914E3F38F0\n000000067F00004002000100000000824000-000000067F00004002000100000000828000__000000931B9A2710\n000000067F00004002000100000000826B3A-000000067F0000400200010000000082F516__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000828000-000000067F0000400200010000000082C000__00000073AD3FE6B8\n000000067F00004002000100000000828000-000000067F0000400200010000000082C000__000000914E3F38F0\n000000067F00004002000100000000828000-000000067F0000400200010000000082C000__000000931B9A2710\n000000067F0000400200010000000082C000-000000067F00004002000100000000830000__00000073AD3FE6B8\n000000067F0000400200010000000082C000-000000067F00004002000100000000830000__000000914E3F38F0\n000000067F0000400200010000000082C000-000000067F00004002000100000000830000__000000931B9A2710\n000000067F0000400200010000000082F516-000000067F00004002000100000000837EF5__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000830000-000000067F00004002000100000000834000__00000073AD3FE6B8\n000000067F00004002000100000000830000-000000067F00004002000100000000834000__000000914E3F38F0\n000000067F00004002000100000000830000-000000067F00004002000100000000834000__000000931B9A2710\n000000067F00004002000100000000834000-000000067F00004002000100000000838000__00000073AD3FE6B8\n000000067F00004002000100000000834000-000000067F00004002000100000000838000__000000914E3F38F0\n000000067F00004002000100000000834000-000000067F00004002000100000000838000__000000931B9A2710\n000000067F00004002000100000000837EF5-000000067F000040020001000000008408D5__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000838000-000000067F0000400200010000000083C000__00000073AD3FE6B8\n000000067F00004002000100000000838000-000000067F0000400200010000000083C000__000000914E3F38F0\n000000067F00004002000100000000838000-000000067F0000400200010000000083C000__000000931B9A2710\n000000067F0000400200010000000083C000-000000067F00004002000100000000840000__00000073AD3FE6B8\n000000067F0000400200010000000083C000-000000067F00004002000100000000840000__000000914E3F38F0\n000000067F0000400200010000000083C000-000000067F00004002000100000000840000__000000931B9A2710\n000000067F00004002000100000000840000-000000067F00004002000100000000844000__00000073AD3FE6B8\n000000067F00004002000100000000840000-000000067F00004002000100000000844000__000000914E3F38F0\n000000067F00004002000100000000840000-000000067F00004002000100000000844000__000000931B9A2710\n000000067F000040020001000000008408D5-000000067F000040020001000000008492B9__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000844000-000000067F00004002000100000000848000__00000073AD3FE6B8\n000000067F00004002000100000000844000-000000067F00004002000100000000848000__000000914E3F38F0\n000000067F00004002000100000000844000-000000067F00004002000100000000848000__000000931B9A2710\n000000067F00004002000100000000848000-000000067F0000400200010000000084C000__00000073AD3FE6B8\n000000067F00004002000100000000848000-000000067F0000400200010000000084C000__000000914E3F38F0\n000000067F00004002000100000000848000-000000067F0000400200010000000084C000__000000931B9A2710\n000000067F000040020001000000008492B9-000000067F00004002000100000000851C91__0000005CA7BBD6F9-000000739A8D1299\n000000067F0000400200010000000084C000-000000067F00004002000100000000850000__00000073AD3FE6B8\n000000067F0000400200010000000084C000-000000067F00004002000100000000850000__000000914E3F38F0\n000000067F0000400200010000000084C000-000000067F00004002000100000000850000__000000931B9A2710\n000000067F00004002000100000000850000-000000067F00004002000100000000854000__00000073AD3FE6B8\n000000067F00004002000100000000850000-000000067F00004002000100000000854000__000000914E3F38F0\n000000067F00004002000100000000850000-000000067F00004002000100000000854000__000000931B9A2710\n000000067F00004002000100000000851C91-000000067F0000400200010000000085A67F__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000854000-000000067F00004002000100000000858000__00000073AD3FE6B8\n000000067F00004002000100000000854000-000000067F00004002000100000000858000__000000914E3F38F0\n000000067F00004002000100000000854000-000000067F00004002000100000000858000__000000931B9A2710\n000000067F00004002000100000000858000-000000067F0000400200010000000085C000__00000073AD3FE6B8\n000000067F00004002000100000000858000-000000067F0000400200010000000085C000__000000914E3F38F0\n000000067F00004002000100000000858000-000000067F0000400200010000000085C000__000000931B9A2710\n000000067F0000400200010000000085A67F-000000067F00004002000100000000863061__0000005CA7BBD6F9-000000739A8D1299\n000000067F0000400200010000000085C000-000000067F00004002000100000000860000__00000073AD3FE6B8\n000000067F0000400200010000000085C000-000000067F00004002000100000000860000__000000914E3F38F0\n000000067F0000400200010000000085C000-000000067F00004002000100000000860000__000000931B9A2710\n000000067F00004002000100000000860000-000000067F00004002000100000000864000__00000073AD3FE6B8\n000000067F00004002000100000000860000-000000067F00004002000100000000864000__000000914E3F38F0\n000000067F00004002000100000000860000-000000067F00004002000100000000864000__000000931B9A2710\n000000067F00004002000100000000863061-000000067F0000400200010000000086BA3E__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000864000-000000067F00004002000100000000868000__00000073AD3FE6B8\n000000067F00004002000100000000864000-000000067F00004002000100000000868000__000000914E3F38F0\n000000067F00004002000100000000864000-000000067F00004002000100000000868000__000000931B9A2710\n000000067F00004002000100000000868000-000000067F0000400200010000000086C000__00000073AD3FE6B8\n000000067F00004002000100000000868000-000000067F0000400200010000000086C000__000000914E3F38F0\n000000067F00004002000100000000868000-000000067F0000400200010000000086C000__000000931B9A2710\n000000067F0000400200010000000086BA3E-000000067F0000400200010000000087440C__0000005CA7BBD6F9-000000739A8D1299\n000000067F0000400200010000000086C000-000000067F00004002000100000000870000__00000073AD3FE6B8\n000000067F0000400200010000000086C000-000000067F00004002000100000000870000__000000914E3F38F0\n000000067F0000400200010000000086C000-000000067F00004002000100000000870000__000000931B9A2710\n000000067F00004002000100000000870000-000000067F00004002000100000000874000__00000073AD3FE6B8\n000000067F00004002000100000000870000-000000067F00004002000100000000874000__000000914E3F38F0\n000000067F00004002000100000000870000-000000067F00004002000100000000874000__000000931B9A2710\n000000067F00004002000100000000874000-000000067F00004002000100000000878000__00000073AD3FE6B8\n000000067F00004002000100000000874000-000000067F00004002000100000000878000__000000914E3F38F0\n000000067F00004002000100000000874000-000000067F00004002000100000000878000__000000931B9A2710\n000000067F0000400200010000000087440C-000000067F0000400200010000000087CDE0__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000878000-000000067F0000400200010000000087C000__00000073AD3FE6B8\n000000067F00004002000100000000878000-000000067F0000400200010000000087C000__000000914E3F38F0\n000000067F00004002000100000000878000-000000067F0000400200010000000087C000__000000931B9A2710\n000000067F0000400200010000000087C000-000000067F00004002000100000000880000__00000073AD3FE6B8\n000000067F0000400200010000000087C000-000000067F00004002000100000000880000__000000914E3F38F0\n000000067F0000400200010000000087C000-000000067F00004002000100000000880000__000000931B9A2710\n000000067F0000400200010000000087CDE0-000000067F000040020001000000008857BF__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000880000-000000067F00004002000100000000884000__00000073AD3FE6B8\n000000067F00004002000100000000880000-000000067F00004002000100000000884000__000000914E3F38F0\n000000067F00004002000100000000880000-000000067F00004002000100000000884000__000000931B9A2710\n000000067F00004002000100000000884000-000000067F00004002000100000000888000__00000073AD3FE6B8\n000000067F00004002000100000000884000-000000067F00004002000100000000888000__000000914E3F38F0\n000000067F00004002000100000000884000-000000067F00004002000100000000888000__000000931B9A2710\n000000067F000040020001000000008857BF-000000067F0000400200010000000088E19E__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000888000-000000067F0000400200010000000088C000__00000073AD3FE6B8\n000000067F00004002000100000000888000-000000067F0000400200010000000088C000__000000914E3F38F0\n000000067F00004002000100000000888000-000000067F0000400200010000000088C000__000000931B9A2710\n000000067F0000400200010000000088C000-000000067F00004002000100000000890000__00000073AD3FE6B8\n000000067F0000400200010000000088C000-000000067F00004002000100000000890000__000000914E3F38F0\n000000067F0000400200010000000088C000-000000067F00004002000100000000890000__000000931B9A2710\n000000067F0000400200010000000088E19E-000000067F00004002000100000000896B7C__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000890000-000000067F00004002000100000000894000__00000073AD3FE6B8\n000000067F00004002000100000000890000-000000067F00004002000100000000894000__000000914E3F38F0\n000000067F00004002000100000000890000-000000067F00004002000100000000894000__000000931B9A2710\n000000067F00004002000100000000894000-000000067F00004002000100000000898000__00000073AD3FE6B8\n000000067F00004002000100000000894000-000000067F00004002000100000000898000__000000914E3F38F0\n000000067F00004002000100000000894000-000000067F00004002000100000000898000__000000931B9A2710\n000000067F00004002000100000000896B7C-000000067F0000400200010000000089F566__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000898000-000000067F0000400200010000000089C000__00000073AD3FE6B8\n000000067F00004002000100000000898000-000000067F0000400200010000000089C000__000000914E3F38F0\n000000067F00004002000100000000898000-000000067F0000400200010000000089C000__000000931B9A2710\n000000067F0000400200010000000089C000-000000067F000040020001000000008A0000__00000073AD3FE6B8\n000000067F0000400200010000000089C000-000000067F000040020001000000008A0000__000000914E3F38F0\n000000067F0000400200010000000089C000-000000067F000040020001000000008A0000__000000931B9A2710\n000000067F0000400200010000000089F566-000000067F000040020001000000008A7F45__0000005CA7BBD6F9-000000739A8D1299\n000000067F000040020001000000008A0000-000000067F000040020001000000008A4000__00000073AD3FE6B8\n000000067F000040020001000000008A0000-000000067F000040020001000000008A4000__000000914E3F38F0\n000000067F000040020001000000008A0000-000000067F000040020001000000008A4000__000000931B9A2710\n000000067F000040020001000000008A4000-000000067F000040020001000000008A8000__00000073AD3FE6B8\n000000067F000040020001000000008A4000-000000067F000040020001000000008A8000__000000914E3F38F0\n000000067F000040020001000000008A4000-000000067F000040020001000000008A8000__000000931B9A2710\n000000067F000040020001000000008A7F45-000000067F000040020001000000008B0918__0000005CA7BBD6F9-000000739A8D1299\n000000067F000040020001000000008A8000-000000067F000040020001000000008AC000__00000073AD3FE6B8\n000000067F000040020001000000008A8000-000000067F000040020001000000008AC000__000000914E3F38F0\n000000067F000040020001000000008A8000-000000067F000040020001000000008AC000__000000931B9A2710\n000000067F000040020001000000008AC000-000000067F000040020001000000008B0000__00000073AD3FE6B8\n000000067F000040020001000000008AC000-000000067F000040020001000000008B0000__000000914E3F38F0\n000000067F000040020001000000008AC000-000000067F000040020001000000008B0000__000000931B9A2710\n000000067F000040020001000000008B0000-000000067F000040020001000000008B4000__00000073AD3FE6B8\n000000067F000040020001000000008B0000-000000067F000040020001000000008B4000__000000914E3F38F0\n000000067F000040020001000000008B0000-000000067F000040020001000000008B4000__000000931B9A2710\n000000067F000040020001000000008B0918-000000067F000040020001000000008B92F6__0000005CA7BBD6F9-000000739A8D1299\n000000067F000040020001000000008B4000-000000067F000040020001000000008B8000__00000073AD3FE6B8\n000000067F000040020001000000008B4000-000000067F000040020001000000008B8000__000000914E3F38F0\n000000067F000040020001000000008B4000-000000067F000040020001000000008B8000__000000931B9A2710\n000000067F000040020001000000008B8000-000000067F000040020001000000008BC000__00000073AD3FE6B8\n000000067F000040020001000000008B8000-000000067F000040020001000000008BC000__000000914E3F38F0\n000000067F000040020001000000008B8000-000000067F000040020001000000008BC000__000000931B9A2710\n000000067F000040020001000000008B92F6-000000067F000040020001000000008C1CD8__0000005CA7BBD6F9-000000739A8D1299\n000000067F000040020001000000008BC000-000000067F000040020001000000008C0000__00000073AD3FE6B8\n000000067F000040020001000000008BC000-000000067F000040020001000000008C0000__000000914E3F38F0\n000000067F000040020001000000008BC000-000000067F000040020001000000008C0000__000000931B9A2710\n000000067F000040020001000000008C0000-000000067F000040020001000000008C4000__00000073AD3FE6B8\n000000067F000040020001000000008C0000-000000067F000040020001000000008C4000__000000914E3F38F0\n000000067F000040020001000000008C0000-000000067F000040020001000000008C4000__000000931B9A2710\n000000067F000040020001000000008C1CD8-000000067F000040020001000000008CA6C0__0000005CA7BBD6F9-000000739A8D1299\n000000067F000040020001000000008C4000-000000067F000040020001000000008C8000__00000073AD3FE6B8\n000000067F000040020001000000008C4000-000000067F000040020001000000008C8000__000000914E3F38F0\n000000067F000040020001000000008C4000-000000067F000040020001000000008C8000__000000931B9A2710\n000000067F000040020001000000008C8000-000000067F000040020001000000008CC000__00000073AD3FE6B8\n000000067F000040020001000000008C8000-000000067F000040020001000000008CC000__000000914E3F38F0\n000000067F000040020001000000008C8000-000000067F000040020001000000008CC000__000000931B9A2710\n000000067F000040020001000000008CA6C0-000000067F000040020001000000008D30A3__0000005CA7BBD6F9-000000739A8D1299\n000000067F000040020001000000008CC000-000000067F000040020001000000008D0000__00000073AD3FE6B8\n000000067F000040020001000000008CC000-000000067F000040020001000000008D0000__000000914E3F38F0\n000000067F000040020001000000008CC000-000000067F000040020001000000008D0000__000000931B9A2710\n000000067F000040020001000000008D0000-000000067F000040020001000000008D4000__00000073AD3FE6B8\n000000067F000040020001000000008D0000-000000067F000040020001000000008D4000__000000914E3F38F0\n000000067F000040020001000000008D0000-000000067F000040020001000000008D4000__000000931B9A2710\n000000067F000040020001000000008D30A3-000000067F000040020001000000008DBA92__0000005CA7BBD6F9-000000739A8D1299\n000000067F000040020001000000008D4000-000000067F000040020001000000008D8000__00000073AD3FE6B8\n000000067F000040020001000000008D4000-000000067F000040020001000000008D8000__000000914E3F38F0\n000000067F000040020001000000008D4000-000000067F000040020001000000008D8000__000000931B9A2710\n000000067F000040020001000000008D8000-000000067F000040020001000000008DC000__00000073AD3FE6B8\n000000067F000040020001000000008D8000-000000067F000040020001000000008DC000__000000914E3F38F0\n000000067F000040020001000000008D8000-000000067F000040020001000000008DC000__000000931B9A2710\n000000067F000040020001000000008DBA92-000000067F000040020001000000008E4465__0000005CA7BBD6F9-000000739A8D1299\n000000067F000040020001000000008DC000-000000067F000040020001000000008E0000__00000073AD3FE6B8\n000000067F000040020001000000008DC000-000000067F000040020001000000008E0000__000000914E3F38F0\n000000067F000040020001000000008DC000-000000067F000040020001000000008E0000__000000931B9A2710\n000000067F000040020001000000008E0000-000000067F000040020001000000008E4000__00000073AD3FE6B8\n000000067F000040020001000000008E0000-000000067F000040020001000000008E4000__000000914E3F38F0\n000000067F000040020001000000008E0000-000000067F000040020001000000008E4000__000000931B9A2710\n000000067F000040020001000000008E4000-000000067F000040020001000000008E8000__00000073AD3FE6B8\n000000067F000040020001000000008E4000-000000067F000040020001000000008E8000__000000914E3F38F0\n000000067F000040020001000000008E4000-000000067F000040020001000000008E8000__000000931B9A2710\n000000067F000040020001000000008E4465-000000067F000040020001000000008ECE3E__0000005CA7BBD6F9-000000739A8D1299\n000000067F000040020001000000008E8000-000000067F000040020001000000008EC000__00000073AD3FE6B8\n000000067F000040020001000000008E8000-000000067F000040020001000000008EC000__000000914E3F38F0\n000000067F000040020001000000008E8000-000000067F000040020001000000008EC000__000000931B9A2710\n000000067F000040020001000000008EC000-000000067F000040020001000000008F0000__00000073AD3FE6B8\n000000067F000040020001000000008EC000-000000067F000040020001000000008F0000__000000914E3F38F0\n000000067F000040020001000000008EC000-000000067F000040020001000000008F0000__000000931B9A2710\n000000067F000040020001000000008ECE3E-000000067F000040020001000000008F5814__0000005CA7BBD6F9-000000739A8D1299\n000000067F000040020001000000008F0000-000000067F000040020001000000008F4000__00000073AD3FE6B8\n000000067F000040020001000000008F0000-000000067F000040020001000000008F4000__000000914E3F38F0\n000000067F000040020001000000008F0000-000000067F000040020001000000008F4000__000000931B9A2710\n000000067F000040020001000000008F4000-000000067F000040020001000000008F8000__00000073AD3FE6B8\n000000067F000040020001000000008F4000-000000067F000040020001000000008F8000__000000914E3F38F0\n000000067F000040020001000000008F4000-000000067F000040020001000000008F8000__000000931B9A2710\n000000067F000040020001000000008F5814-000000067F000040020001000000008FE1EC__0000005CA7BBD6F9-000000739A8D1299\n000000067F000040020001000000008F8000-000000067F000040020001000000008FC000__00000073AD3FE6B8\n000000067F000040020001000000008F8000-000000067F000040020001000000008FC000__000000914E3F38F0\n000000067F000040020001000000008F8000-000000067F000040020001000000008FC000__000000931B9A2710\n000000067F000040020001000000008FC000-000000067F00004002000100000000900000__00000073AD3FE6B8\n000000067F000040020001000000008FC000-000000067F00004002000100000000900000__000000914E3F38F0\n000000067F000040020001000000008FC000-000000067F00004002000100000000900000__000000931B9A2710\n000000067F000040020001000000008FE1EC-000000067F00004002000100000000906BDF__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000900000-000000067F00004002000100000000904000__00000073AD3FE6B8\n000000067F00004002000100000000900000-000000067F00004002000100000000904000__000000914E3F38F0\n000000067F00004002000100000000900000-000000067F00004002000100000000904000__000000931B9A2710\n000000067F00004002000100000000904000-000000067F00004002000100000000908000__00000073AD3FE6B8\n000000067F00004002000100000000904000-000000067F00004002000100000000908000__000000914E3F38F0\n000000067F00004002000100000000904000-000000067F00004002000100000000908000__000000931B9A2710\n000000067F00004002000100000000906BDF-000000067F0000400200010000000090F5CA__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000908000-000000067F0000400200010000000090C000__00000073AD3FE6B8\n000000067F00004002000100000000908000-000000067F0000400200010000000090C000__000000914E3F38F0\n000000067F00004002000100000000908000-000000067F0000400200010000000090C000__000000931B9A2710\n000000067F0000400200010000000090C000-000000067F00004002000100000000910000__00000073AD3FE6B8\n000000067F0000400200010000000090C000-000000067F00004002000100000000910000__000000914E3F38F0\n000000067F0000400200010000000090C000-000000067F00004002000100000000910000__000000931B9A2710\n000000067F0000400200010000000090F5CA-000000067F00004002000100000000917FAA__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000910000-000000067F00004002000100000000914000__00000073AD3FE6B8\n000000067F00004002000100000000910000-000000067F00004002000100000000914000__000000914E3F38F0\n000000067F00004002000100000000910000-000000067F00004002000100000000914000__000000931B9A2710\n000000067F00004002000100000000914000-000000067F00004002000100000000918000__00000073AD3FE6B8\n000000067F00004002000100000000914000-000000067F00004002000100000000918000__000000914E3F38F0\n000000067F00004002000100000000914000-000000067F00004002000100000000918000__000000931B9A2710\n000000067F00004002000100000000917FAA-000000067F0000400200010000000092097C__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000918000-000000067F0000400200010000000091C000__00000073AD3FE6B8\n000000067F00004002000100000000918000-000000067F0000400200010000000091C000__000000914E3F38F0\n000000067F00004002000100000000918000-000000067F0000400200010000000091C000__000000931B9A2710\n000000067F0000400200010000000091C000-000000067F00004002000100000000920000__00000073AD3FE6B8\n000000067F0000400200010000000091C000-000000067F00004002000100000000920000__000000914E3F38F0\n000000067F0000400200010000000091C000-000000067F00004002000100000000920000__000000931B9A2710\n000000067F00004002000100000000920000-000000067F00004002000100000000924000__00000073AD3FE6B8\n000000067F00004002000100000000920000-000000067F00004002000100000000924000__000000914E3F38F0\n000000067F00004002000100000000920000-000000067F00004002000100000000924000__000000931B9A2710\n000000067F0000400200010000000092097C-000000067F0000400200010000000092935B__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000924000-000000067F00004002000100000000928000__00000073AD3FE6B8\n000000067F00004002000100000000924000-000000067F00004002000100000000928000__000000914E3F38F0\n000000067F00004002000100000000924000-000000067F00004002000100000000928000__000000931B9A2710\n000000067F00004002000100000000928000-000000067F0000400200010000000092C000__00000073AD3FE6B8\n000000067F00004002000100000000928000-000000067F0000400200010000000092C000__000000914E3F38F0\n000000067F00004002000100000000928000-000000067F0000400200010000000092C000__000000931B9A2710\n000000067F0000400200010000000092935B-000000067F00004002000100000000931D2F__0000005CA7BBD6F9-000000739A8D1299\n000000067F0000400200010000000092C000-000000067F00004002000100000000930000__00000073AD3FE6B8\n000000067F0000400200010000000092C000-000000067F00004002000100000000930000__000000914E3F38F0\n000000067F0000400200010000000092C000-000000067F00004002000100000000930000__000000931B9A2710\n000000067F00004002000100000000930000-000000067F00004002000100000000934000__00000073AD3FE6B8\n000000067F00004002000100000000930000-000000067F00004002000100000000934000__000000914E3F38F0\n000000067F00004002000100000000930000-000000067F00004002000100000000934000__000000931B9A2710\n000000067F00004002000100000000931D2F-000000067F0000400200010000000093A709__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000934000-000000067F00004002000100000000938000__00000073AD3FE6B8\n000000067F00004002000100000000934000-000000067F00004002000100000000938000__000000914E3F38F0\n000000067F00004002000100000000934000-000000067F00004002000100000000938000__000000931B9A2710\n000000067F00004002000100000000938000-000000067F0000400200010000000093C000__00000073AD3FE6B8\n000000067F00004002000100000000938000-000000067F0000400200010000000093C000__000000914E3F38F0\n000000067F00004002000100000000938000-000000067F0000400200010000000093C000__000000931B9A2710\n000000067F0000400200010000000093A709-000000067F000040020001000000009430E7__0000005CA7BBD6F9-000000739A8D1299\n000000067F0000400200010000000093C000-000000067F00004002000100000000940000__00000073AD3FE6B8\n000000067F0000400200010000000093C000-000000067F00004002000100000000940000__000000914E3F38F0\n000000067F0000400200010000000093C000-000000067F00004002000100000000940000__000000931B9A2710\n000000067F00004002000100000000940000-000000067F00004002000100000000944000__00000073AD3FE6B8\n000000067F00004002000100000000940000-000000067F00004002000100000000944000__000000914E3F38F0\n000000067F00004002000100000000940000-000000067F00004002000100000000944000__000000931B9A2710\n000000067F000040020001000000009430E7-000000067F0000400200010000000094BAD0__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000944000-000000067F00004002000100000000948000__00000073AD3FE6B8\n000000067F00004002000100000000944000-000000067F00004002000100000000948000__000000914E3F38F0\n000000067F00004002000100000000944000-000000067F00004002000100000000948000__000000931B9A2710\n000000067F00004002000100000000948000-000000067F0000400200010000000094C000__00000073AD3FE6B8\n000000067F00004002000100000000948000-000000067F0000400200010000000094C000__000000914E3F38F0\n000000067F00004002000100000000948000-000000067F0000400200010000000094C000__000000931B9A2710\n000000067F0000400200010000000094BAD0-000000067F000040020001000000009544BD__0000005CA7BBD6F9-000000739A8D1299\n000000067F0000400200010000000094C000-000000067F00004002000100000000950000__00000073AD3FE6B8\n000000067F0000400200010000000094C000-000000067F00004002000100000000950000__000000914E3F38F0\n000000067F0000400200010000000094C000-000000067F00004002000100000000950000__000000931B9A2710\n000000067F00004002000100000000950000-000000067F00004002000100000000954000__00000073AD3FE6B8\n000000067F00004002000100000000950000-000000067F00004002000100000000954000__000000914E3F38F0\n000000067F00004002000100000000950000-000000067F00004002000100000000954000__000000931B9A2710\n000000067F00004002000100000000954000-000000067F00004002000100000000958000__00000073AD3FE6B8\n000000067F00004002000100000000954000-000000067F00004002000100000000958000__000000914E3F38F0\n000000067F00004002000100000000954000-000000067F00004002000100000000958000__000000931B9A2710\n000000067F000040020001000000009544BD-000000067F0000400200010000000095CE95__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000958000-000000067F0000400200010000000095C000__00000073AD3FE6B8\n000000067F00004002000100000000958000-000000067F0000400200010000000095C000__000000914E3F38F0\n000000067F00004002000100000000958000-000000067F0000400200010000000095C000__000000931B9A2710\n000000067F0000400200010000000095C000-000000067F00004002000100000000960000__00000073AD3FE6B8\n000000067F0000400200010000000095C000-000000067F00004002000100000000960000__000000914E3F38F0\n000000067F0000400200010000000095C000-000000067F00004002000100000000960000__000000931B9A2710\n000000067F0000400200010000000095CE95-000000067F0000400200010000000096586F__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000960000-000000067F00004002000100000000964000__00000073AD3FE6B8\n000000067F00004002000100000000960000-000000067F00004002000100000000964000__000000914E3F38F0\n000000067F00004002000100000000960000-000000067F00004002000100000000964000__000000931B9A2710\n000000067F00004002000100000000964000-000000067F00004002000100000000968000__00000073AD3FE6B8\n000000067F00004002000100000000964000-000000067F00004002000100000000968000__000000914E3F38F0\n000000067F00004002000100000000964000-000000067F00004002000100000000968000__000000931B9A2710\n000000067F0000400200010000000096586F-000000067F0000400200010000000096E247__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000968000-000000067F0000400200010000000096C000__00000073AD3FE6B8\n000000067F00004002000100000000968000-000000067F0000400200010000000096C000__000000914E3F38F0\n000000067F00004002000100000000968000-000000067F0000400200010000000096C000__000000931B9A2710\n000000067F0000400200010000000096C000-000000067F00004002000100000000970000__00000073AD3FE6B8\n000000067F0000400200010000000096C000-000000067F00004002000100000000970000__000000914E3F38F0\n000000067F0000400200010000000096C000-000000067F00004002000100000000970000__000000931B9A2710\n000000067F0000400200010000000096E247-000000067F00004002000100000000976C0F__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000970000-000000067F00004002000100000000974000__00000073AD3FE6B8\n000000067F00004002000100000000970000-000000067F00004002000100000000974000__000000914E3F38F0\n000000067F00004002000100000000970000-000000067F00004002000100000000974000__000000931B9A2710\n000000067F00004002000100000000974000-000000067F00004002000100000000978000__00000073AD3FE6B8\n000000067F00004002000100000000974000-000000067F00004002000100000000978000__000000914E3F38F0\n000000067F00004002000100000000974000-000000067F00004002000100000000978000__000000931B9A2710\n000000067F00004002000100000000976C0F-000000067F0000400200010000000097F5F4__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000978000-000000067F0000400200010000000097C000__00000073AD3FE6B8\n000000067F00004002000100000000978000-000000067F0000400200010000000097C000__000000914E3F38F0\n000000067F00004002000100000000978000-000000067F0000400200010000000097C000__000000931B9A2710\n000000067F0000400200010000000097C000-000000067F00004002000100000000980000__00000073AD3FE6B8\n000000067F0000400200010000000097C000-000000067F00004002000100000000980000__000000914E3F38F0\n000000067F0000400200010000000097C000-000000067F00004002000100000000980000__000000931B9A2710\n000000067F0000400200010000000097F5F4-000000067F00004002000100000000987FD8__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000980000-000000067F00004002000100000000984000__00000073AD3FE6B8\n000000067F00004002000100000000980000-000000067F00004002000100000000984000__000000914E3F38F0\n000000067F00004002000100000000980000-000000067F00004002000100000000984000__000000931B9A2710\n000000067F00004002000100000000984000-000000067F00004002000100000000988000__00000073AD3FE6B8\n000000067F00004002000100000000984000-000000067F00004002000100000000988000__000000914E3F38F0\n000000067F00004002000100000000984000-000000067F00004002000100000000988000__000000931B9A2710\n000000067F00004002000100000000987FD8-000000067F000040020001000000009909C2__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000988000-000000067F0000400200010000000098C000__00000073AD3FE6B8\n000000067F00004002000100000000988000-000000067F0000400200010000000098C000__000000914E3F38F0\n000000067F00004002000100000000988000-000000067F0000400200010000000098C000__000000931B9A2710\n000000067F0000400200010000000098C000-000000067F00004002000100000000990000__00000073AD3FE6B8\n000000067F0000400200010000000098C000-000000067F00004002000100000000990000__000000914E3F38F0\n000000067F0000400200010000000098C000-000000067F00004002000100000000990000__000000931B9A2710\n000000067F00004002000100000000990000-000000067F00004002000100000000994000__00000073AD3FE6B8\n000000067F00004002000100000000990000-000000067F00004002000100000000994000__000000914E3F38F0\n000000067F00004002000100000000990000-000000067F00004002000100000000994000__000000931B9A2710\n000000067F000040020001000000009909C2-000000067F000040020001000000009993A0__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000994000-000000067F00004002000100000000998000__00000073AD3FE6B8\n000000067F00004002000100000000994000-000000067F00004002000100000000998000__000000914E3F38F0\n000000067F00004002000100000000994000-000000067F00004002000100000000998000__000000931B9A2710\n000000067F00004002000100000000998000-000000067F0000400200010000000099C000__00000073AD3FE6B8\n000000067F00004002000100000000998000-000000067F0000400200010000000099C000__000000914E3F38F0\n000000067F00004002000100000000998000-000000067F0000400200010000000099C000__000000931B9A2710\n000000067F000040020001000000009993A0-000000067F000040020001000000009A1D79__0000005CA7BBD6F9-000000739A8D1299\n000000067F0000400200010000000099C000-000000067F000040020001000000009A0000__00000073AD3FE6B8\n000000067F0000400200010000000099C000-000000067F000040020001000000009A0000__000000914E3F38F0\n000000067F0000400200010000000099C000-000000067F000040020001000000009A0000__000000931B9A2710\n000000067F000040020001000000009A0000-000000067F000040020001000000009A4000__00000073AD3FE6B8\n000000067F000040020001000000009A0000-000000067F000040020001000000009A4000__000000914E3F38F0\n000000067F000040020001000000009A0000-000000067F000040020001000000009A4000__000000931B9A2710\n000000067F000040020001000000009A1D79-000000067F000040020001000000009AA74E__0000005CA7BBD6F9-000000739A8D1299\n000000067F000040020001000000009A4000-000000067F000040020001000000009A8000__00000073AD3FE6B8\n000000067F000040020001000000009A4000-000000067F000040020001000000009A8000__000000914E3F38F0\n000000067F000040020001000000009A4000-000000067F000040020001000000009A8000__000000931B9A2710\n000000067F000040020001000000009A8000-000000067F000040020001000000009AC000__00000073AD3FE6B8\n000000067F000040020001000000009A8000-000000067F000040020001000000009AC000__000000914E3F38F0\n000000067F000040020001000000009A8000-000000067F000040020001000000009AC000__000000931B9A2710\n000000067F000040020001000000009AA74E-000000067F000040020001000000009B311D__0000005CA7BBD6F9-000000739A8D1299\n000000067F000040020001000000009AC000-000000067F000040020001000000009B0000__00000073AD3FE6B8\n000000067F000040020001000000009AC000-000000067F000040020001000000009B0000__000000914E3F38F0\n000000067F000040020001000000009AC000-000000067F000040020001000000009B0000__000000931B9A2710\n000000067F000040020001000000009B0000-000000067F000040020001000000009B4000__00000073AD3FE6B8\n000000067F000040020001000000009B0000-000000067F000040020001000000009B4000__000000914E3F38F0\n000000067F000040020001000000009B0000-000000067F000040020001000000009B4000__000000931B9A2710\n000000067F000040020001000000009B311D-000000067F000040020001000000009BBB01__0000005CA7BBD6F9-000000739A8D1299\n000000067F000040020001000000009B4000-000000067F000040020001000000009B8000__00000073AD3FE6B8\n000000067F000040020001000000009B4000-000000067F000040020001000000009B8000__000000914E3F38F0\n000000067F000040020001000000009B4000-000000067F000040020001000000009B8000__000000931B9A2710\n000000067F000040020001000000009B8000-000000067F000040020001000000009BC000__00000073AD3FE6B8\n000000067F000040020001000000009B8000-000000067F000040020001000000009BC000__000000914E3F38F0\n000000067F000040020001000000009B8000-000000067F000040020001000000009BC000__000000931B9A2710\n000000067F000040020001000000009BBB01-000000067F000040020001000000009C44DD__0000005CA7BBD6F9-000000739A8D1299\n000000067F000040020001000000009BC000-000000067F000040020001000000009C0000__00000073AD3FE6B8\n000000067F000040020001000000009BC000-000000067F000040020001000000009C0000__000000914E3F38F0\n000000067F000040020001000000009BC000-000000067F000040020001000000009C0000__000000931B9A2710\n000000067F000040020001000000009C0000-000000067F000040020001000000009C4000__00000073AD3FE6B8\n000000067F000040020001000000009C0000-000000067F000040020001000000009C4000__000000914E3F38F0\n000000067F000040020001000000009C0000-000000067F000040020001000000009C4000__000000931B9A2710\n000000067F000040020001000000009C4000-000000067F000040020001000000009C8000__00000073AD3FE6B8\n000000067F000040020001000000009C4000-000000067F000040020001000000009C8000__000000914E3F38F0\n000000067F000040020001000000009C4000-000000067F000040020001000000009C8000__000000931B9A2710\n000000067F000040020001000000009C44DD-000000067F000040020001000000009CCEC8__0000005CA7BBD6F9-000000739A8D1299\n000000067F000040020001000000009C8000-000000067F000040020001000000009CC000__00000073AD3FE6B8\n000000067F000040020001000000009C8000-000000067F000040020001000000009CC000__000000914E3F38F0\n000000067F000040020001000000009C8000-000000067F000040020001000000009CC000__000000931B9A2710\n000000067F000040020001000000009CC000-000000067F000040020001000000009D0000__00000073AD3FE6B8\n000000067F000040020001000000009CC000-000000067F000040020001000000009D0000__000000914E3F38F0\n000000067F000040020001000000009CC000-000000067F000040020001000000009D0000__000000931B9A2710\n000000067F000040020001000000009CCEC8-000000067F000040020001000000009D58A3__0000005CA7BBD6F9-000000739A8D1299\n000000067F000040020001000000009D0000-000000067F000040020001000000009D4000__00000073AD3FE6B8\n000000067F000040020001000000009D0000-000000067F000040020001000000009D4000__000000914E3F38F0\n000000067F000040020001000000009D0000-000000067F000040020001000000009D4000__000000931B9A2710\n000000067F000040020001000000009D4000-000000067F000040020001000000009D8000__00000073AD3FE6B8\n000000067F000040020001000000009D4000-000000067F000040020001000000009D8000__000000914E3F38F0\n000000067F000040020001000000009D4000-000000067F000040020001000000009D8000__000000931B9A2710\n000000067F000040020001000000009D58A3-000000067F000040020001000000009DE27F__0000005CA7BBD6F9-000000739A8D1299\n000000067F000040020001000000009D8000-000000067F000040020001000000009DC000__00000073AD3FE6B8\n000000067F000040020001000000009D8000-000000067F000040020001000000009DC000__000000914E3F38F0\n000000067F000040020001000000009D8000-000000067F000040020001000000009DC000__000000931B9A2710\n000000067F000040020001000000009DC000-000000067F000040020001000000009E0000__00000073AD3FE6B8\n000000067F000040020001000000009DC000-000000067F000040020001000000009E0000__000000914E3F38F0\n000000067F000040020001000000009DC000-000000067F000040020001000000009E0000__000000931B9A2710\n000000067F000040020001000000009DE27F-000000067F000040020001000000009E6C5B__0000005CA7BBD6F9-000000739A8D1299\n000000067F000040020001000000009E0000-000000067F000040020001000000009E4000__00000073AD3FE6B8\n000000067F000040020001000000009E0000-000000067F000040020001000000009E4000__000000914E3F38F0\n000000067F000040020001000000009E0000-000000067F000040020001000000009E4000__000000931B9A2710\n000000067F000040020001000000009E4000-000000067F000040020001000000009E8000__00000073AD3FE6B8\n000000067F000040020001000000009E4000-000000067F000040020001000000009E8000__000000914E3F38F0\n000000067F000040020001000000009E4000-000000067F000040020001000000009E8000__000000931B9A2710\n000000067F000040020001000000009E6C5B-000000067F000040020001000000009EF631__0000005CA7BBD6F9-000000739A8D1299\n000000067F000040020001000000009E8000-000000067F000040020001000000009EC000__00000073AD3FE6B8\n000000067F000040020001000000009E8000-000000067F000040020001000000009EC000__000000914E3F38F0\n000000067F000040020001000000009E8000-000000067F000040020001000000009EC000__000000931B9A2710\n000000067F000040020001000000009EC000-000000067F000040020001000000009F0000__00000073AD3FE6B8\n000000067F000040020001000000009EC000-000000067F000040020001000000009F0000__000000914E3F38F0\n000000067F000040020001000000009EC000-000000067F000040020001000000009F0000__000000931B9A2710\n000000067F000040020001000000009EF631-000000067F000040020001000000009F8011__0000005CA7BBD6F9-000000739A8D1299\n000000067F000040020001000000009F0000-000000067F000040020001000000009F4000__00000073AD3FE6B8\n000000067F000040020001000000009F0000-000000067F000040020001000000009F4000__000000914E3F38F0\n000000067F000040020001000000009F0000-000000067F000040020001000000009F4000__000000931B9A2710\n000000067F000040020001000000009F4000-000000067F000040020001000000009F8000__00000073AD3FE6B8\n000000067F000040020001000000009F4000-000000067F000040020001000000009F8000__000000914E3F38F0\n000000067F000040020001000000009F4000-000000067F000040020001000000009F8000__000000931B9A2710\n000000067F000040020001000000009F8000-000000067F000040020001000000009FC000__00000073AD3FE6B8\n000000067F000040020001000000009F8000-000000067F000040020001000000009FC000__000000914E3F38F0\n000000067F000040020001000000009F8000-000000067F000040020001000000009FC000__000000931B9A2710\n000000067F000040020001000000009F8011-000000067F00004002000100000000A009F2__0000005CA7BBD6F9-000000739A8D1299\n000000067F000040020001000000009FC000-000000067F00004002000100000000A00000__00000073AD3FE6B8\n000000067F000040020001000000009FC000-000000067F00004002000100000000A00000__000000914E3F38F0\n000000067F000040020001000000009FC000-000000067F00004002000100000000A00000__000000931B9A2710\n000000067F00004002000100000000A00000-000000067F00004002000100000000A04000__00000073AD3FE6B8\n000000067F00004002000100000000A00000-000000067F00004002000100000000A04000__000000914E3F38F0\n000000067F00004002000100000000A00000-000000067F00004002000100000000A04000__000000931B9A2710\n000000067F00004002000100000000A009F2-000000067F00004002000100000000A093E0__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000A04000-000000067F00004002000100000000A08000__00000073AD3FE6B8\n000000067F00004002000100000000A04000-000000067F00004002000100000000A08000__000000914E3F38F0\n000000067F00004002000100000000A04000-000000067F00004002000100000000A08000__000000931B9A2710\n000000067F00004002000100000000A08000-000000067F00004002000100000000A0C000__00000073AD3FE6B8\n000000067F00004002000100000000A08000-000000067F00004002000100000000A0C000__000000914E3F38F0\n000000067F00004002000100000000A08000-000000067F00004002000100000000A0C000__000000931B9A2710\n000000067F00004002000100000000A093E0-000000067F00004002000100000000A11DBB__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000A0C000-000000067F00004002000100000000A10000__00000073AD3FE6B8\n000000067F00004002000100000000A0C000-000000067F00004002000100000000A10000__000000914E3F38F0\n000000067F00004002000100000000A0C000-000000067F00004002000100000000A10000__000000931B9A2710\n000000067F00004002000100000000A10000-000000067F00004002000100000000A14000__00000073AD3FE6B8\n000000067F00004002000100000000A10000-000000067F00004002000100000000A14000__000000914E3F38F0\n000000067F00004002000100000000A10000-000000067F00004002000100000000A14000__000000931B9A2710\n000000067F00004002000100000000A11DBB-000000067F00004002000100000000A1A795__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000A14000-000000067F00004002000100000000A18000__00000073AD3FE6B8\n000000067F00004002000100000000A14000-000000067F00004002000100000000A18000__000000914E3F38F0\n000000067F00004002000100000000A14000-000000067F00004002000100000000A18000__000000931B9A2710\n000000067F00004002000100000000A18000-000000067F00004002000100000000A1C000__00000073AD3FE6B8\n000000067F00004002000100000000A18000-000000067F00004002000100000000A1C000__000000914E3F38F0\n000000067F00004002000100000000A18000-000000067F00004002000100000000A1C000__000000931B9A2710\n000000067F00004002000100000000A1A795-000000067F00004002000100000000A23173__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000A1C000-000000067F00004002000100000000A20000__00000073AD3FE6B8\n000000067F00004002000100000000A1C000-000000067F00004002000100000000A20000__000000914E3F38F0\n000000067F00004002000100000000A1C000-000000067F00004002000100000000A20000__000000931B9A2710\n000000067F00004002000100000000A20000-000000067F00004002000100000000A24000__00000073AD3FE6B8\n000000067F00004002000100000000A20000-000000067F00004002000100000000A24000__000000914E3F38F0\n000000067F00004002000100000000A20000-000000067F00004002000100000000A24000__000000931B9A2710\n000000067F00004002000100000000A23173-000000067F00004002000100000000A2BB4B__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000A24000-000000067F00004002000100000000A28000__00000073AD3FE6B8\n000000067F00004002000100000000A24000-000000067F00004002000100000000A28000__000000914E3F38F0\n000000067F00004002000100000000A24000-000000067F00004002000100000000A28000__000000931B9A2710\n000000067F00004002000100000000A28000-000000067F00004002000100000000A2C000__00000073AD3FE6B8\n000000067F00004002000100000000A28000-000000067F00004002000100000000A2C000__000000914E3F38F0\n000000067F00004002000100000000A28000-000000067F00004002000100000000A2C000__000000931B9A2710\n000000067F00004002000100000000A2BB4B-000000067F00004002000100000000A34529__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000A2C000-000000067F00004002000100000000A30000__00000073AD3FE6B8\n000000067F00004002000100000000A2C000-000000067F00004002000100000000A30000__000000914E3F38F0\n000000067F00004002000100000000A2C000-000000067F00004002000100000000A30000__000000931B9A2710\n000000067F00004002000100000000A30000-000000067F00004002000100000000A34000__00000073AD3FE6B8\n000000067F00004002000100000000A30000-000000067F00004002000100000000A34000__000000914E3F38F0\n000000067F00004002000100000000A30000-000000067F00004002000100000000A34000__000000931B9A2710\n000000067F00004002000100000000A34000-000000067F00004002000100000000A38000__00000073AD3FE6B8\n000000067F00004002000100000000A34000-000000067F00004002000100000000A38000__000000914E3F38F0\n000000067F00004002000100000000A34000-000000067F00004002000100000000A38000__000000931B9A2710\n000000067F00004002000100000000A34529-000000067F00004002000100000000A3CF0D__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000A38000-000000067F00004002000100000000A3C000__00000073AD3FE6B8\n000000067F00004002000100000000A38000-000000067F00004002000100000000A3C000__000000914E3F38F0\n000000067F00004002000100000000A38000-000000067F00004002000100000000A3C000__000000931B9A2710\n000000067F00004002000100000000A3C000-000000067F00004002000100000000A40000__00000073AD3FE6B8\n000000067F00004002000100000000A3C000-000000067F00004002000100000000A40000__000000914E3F38F0\n000000067F00004002000100000000A3C000-000000067F00004002000100000000A40000__000000931B9A2710\n000000067F00004002000100000000A3CF0D-000000067F00004002000100000000A458E2__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000A40000-000000067F00004002000100000000A44000__00000073AD3FE6B8\n000000067F00004002000100000000A40000-000000067F00004002000100000000A44000__000000914E3F38F0\n000000067F00004002000100000000A40000-000000067F00004002000100000000A44000__000000931B9A2710\n000000067F00004002000100000000A44000-000000067F00004002000100000000A48000__00000073AD3FE6B8\n000000067F00004002000100000000A44000-000000067F00004002000100000000A48000__000000914E3F38F0\n000000067F00004002000100000000A44000-000000067F00004002000100000000A48000__000000931B9A2710\n000000067F00004002000100000000A458E2-000000067F00004002000100000000A4E2BE__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000A48000-000000067F00004002000100000000A4C000__00000073AD3FE6B8\n000000067F00004002000100000000A48000-000000067F00004002000100000000A4C000__000000914E3F38F0\n000000067F00004002000100000000A48000-000000067F00004002000100000000A4C000__000000931B9A2710\n000000067F00004002000100000000A4C000-000000067F00004002000100000000A50000__00000073AD3FE6B8\n000000067F00004002000100000000A4C000-000000067F00004002000100000000A50000__000000914E3F38F0\n000000067F00004002000100000000A4C000-000000067F00004002000100000000A50000__000000931B9A2710\n000000067F00004002000100000000A4E2BE-000000067F00004002000100000000A56C93__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000A50000-000000067F00004002000100000000A54000__00000073AD3FE6B8\n000000067F00004002000100000000A50000-000000067F00004002000100000000A54000__000000914E3F38F0\n000000067F00004002000100000000A50000-000000067F00004002000100000000A54000__000000931B9A2710\n000000067F00004002000100000000A54000-000000067F00004002000100000000A58000__00000073AD3FE6B8\n000000067F00004002000100000000A54000-000000067F00004002000100000000A58000__000000914E3F38F0\n000000067F00004002000100000000A54000-000000067F00004002000100000000A58000__000000931B9A2710\n000000067F00004002000100000000A56C93-000000067F00004002000100000000A5F666__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000A58000-000000067F00004002000100000000A5C000__00000073AD3FE6B8\n000000067F00004002000100000000A58000-000000067F00004002000100000000A5C000__000000914E3F38F0\n000000067F00004002000100000000A58000-000000067F00004002000100000000A5C000__000000931B9A2710\n000000067F00004002000100000000A5C000-000000067F00004002000100000000A60000__00000073AD3FE6B8\n000000067F00004002000100000000A5C000-000000067F00004002000100000000A60000__000000914E3F38F0\n000000067F00004002000100000000A5C000-000000067F00004002000100000000A60000__000000931B9A2710\n000000067F00004002000100000000A5F666-000000067F00004002000100000000A68049__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000A60000-000000067F00004002000100000000A64000__00000073AD3FE6B8\n000000067F00004002000100000000A60000-000000067F00004002000100000000A64000__000000914E3F38F0\n000000067F00004002000100000000A60000-000000067F00004002000100000000A64000__000000931B9A2710\n000000067F00004002000100000000A64000-000000067F00004002000100000000A68000__00000073AD3FE6B8\n000000067F00004002000100000000A64000-000000067F00004002000100000000A68000__000000914E3F38F0\n000000067F00004002000100000000A64000-000000067F00004002000100000000A68000__000000931B9A2710\n000000067F00004002000100000000A68000-000000067F00004002000100000000A6C000__00000073AD3FE6B8\n000000067F00004002000100000000A68000-000000067F00004002000100000000A6C000__000000914E3F38F0\n000000067F00004002000100000000A68000-000000067F00004002000100000000A6C000__000000931B9A2710\n000000067F00004002000100000000A68049-000000067F00004002000100000000A70A2B__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000A6C000-000000067F00004002000100000000A70000__00000073AD3FE6B8\n000000067F00004002000100000000A6C000-000000067F00004002000100000000A70000__000000914E3F38F0\n000000067F00004002000100000000A6C000-000000067F00004002000100000000A70000__000000931B9A2710\n000000067F00004002000100000000A70000-000000067F00004002000100000000A74000__00000073AD3FE6B8\n000000067F00004002000100000000A70000-000000067F00004002000100000000A74000__000000914E3F38F0\n000000067F00004002000100000000A70000-000000067F00004002000100000000A74000__000000931B9A2710\n000000067F00004002000100000000A70A2B-000000067F00004002000100000000A7940C__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000A74000-000000067F00004002000100000000A78000__00000073AD3FE6B8\n000000067F00004002000100000000A74000-000000067F00004002000100000000A78000__000000914E3F38F0\n000000067F00004002000100000000A74000-000000067F00004002000100000000A78000__000000931B9A2710\n000000067F00004002000100000000A78000-000000067F00004002000100000000A7C000__00000073AD3FE6B8\n000000067F00004002000100000000A78000-000000067F00004002000100000000A7C000__000000914E3F38F0\n000000067F00004002000100000000A78000-000000067F00004002000100000000A7C000__000000931B9A2710\n000000067F00004002000100000000A7940C-000000067F00004002000100000000A81DD9__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000A7C000-000000067F00004002000100000000A80000__00000073AD3FE6B8\n000000067F00004002000100000000A7C000-000000067F00004002000100000000A80000__000000914E3F38F0\n000000067F00004002000100000000A7C000-000000067F00004002000100000000A80000__000000931B9A2710\n000000067F00004002000100000000A80000-000000067F00004002000100000000A84000__00000073AD3FE6B8\n000000067F00004002000100000000A80000-000000067F00004002000100000000A84000__000000914E3F38F0\n000000067F00004002000100000000A80000-000000067F00004002000100000000A84000__000000931B9A2710\n000000067F00004002000100000000A81DD9-000000067F00004002000100000000A8A7B8__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000A84000-000000067F00004002000100000000A88000__00000073AD3FE6B8\n000000067F00004002000100000000A84000-000000067F00004002000100000000A88000__000000914E3F38F0\n000000067F00004002000100000000A84000-000000067F00004002000100000000A88000__000000931B9A2710\n000000067F00004002000100000000A88000-000000067F00004002000100000000A8C000__00000073AD3FE6B8\n000000067F00004002000100000000A88000-000000067F00004002000100000000A8C000__000000914E3F38F0\n000000067F00004002000100000000A88000-000000067F00004002000100000000A8C000__000000931B9A2710\n000000067F00004002000100000000A8A7B8-000000067F00004002000100000000A9318F__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000A8C000-000000067F00004002000100000000A90000__00000073AD3FE6B8\n000000067F00004002000100000000A8C000-000000067F00004002000100000000A90000__000000914E3F38F0\n000000067F00004002000100000000A8C000-000000067F00004002000100000000A90000__000000931B9A2710\n000000067F00004002000100000000A90000-000000067F00004002000100000000A94000__00000073AD3FE6B8\n000000067F00004002000100000000A90000-000000067F00004002000100000000A94000__000000914E3F38F0\n000000067F00004002000100000000A90000-000000067F00004002000100000000A94000__000000931B9A2710\n000000067F00004002000100000000A9318F-000000067F00004002000100000000A9BB65__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000A94000-000000067F00004002000100000000A98000__00000073AD3FE6B8\n000000067F00004002000100000000A94000-000000067F00004002000100000000A98000__000000914E3F38F0\n000000067F00004002000100000000A94000-000000067F00004002000100000000A98000__000000931B9A2710\n000000067F00004002000100000000A98000-000000067F00004002000100000000A9C000__00000073AD3FE6B8\n000000067F00004002000100000000A98000-000000067F00004002000100000000A9C000__000000914E3F38F0\n000000067F00004002000100000000A98000-000000067F00004002000100000000A9C000__000000931B9A2710\n000000067F00004002000100000000A9BB65-000000067F00004002000100000000AA4546__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000A9C000-000000067F00004002000100000000AA0000__00000073AD3FE6B8\n000000067F00004002000100000000A9C000-000000067F00004002000100000000AA0000__000000914E3F38F0\n000000067F00004002000100000000A9C000-000000067F00004002000100000000AA0000__000000931B9A2710\n000000067F00004002000100000000AA0000-000000067F00004002000100000000AA4000__00000073AD3FE6B8\n000000067F00004002000100000000AA0000-000000067F00004002000100000000AA4000__000000914E3F38F0\n000000067F00004002000100000000AA0000-000000067F00004002000100000000AA4000__000000931B9A2710\n000000067F00004002000100000000AA4000-000000067F00004002000100000000AA8000__00000073AD3FE6B8\n000000067F00004002000100000000AA4000-000000067F00004002000100000000AA8000__000000914E3F38F0\n000000067F00004002000100000000AA4000-000000067F00004002000100000000AA8000__000000931B9A2710\n000000067F00004002000100000000AA4546-000000067F00004002000100000000AACF1E__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000AA8000-000000067F00004002000100000000AAC000__00000073AD3FE6B8\n000000067F00004002000100000000AA8000-000000067F00004002000100000000AAC000__000000914E3F38F0\n000000067F00004002000100000000AA8000-000000067F00004002000100000000AAC000__000000931B9A2710\n000000067F00004002000100000000AAC000-000000067F00004002000100000000AB0000__00000073AD3FE6B8\n000000067F00004002000100000000AAC000-000000067F00004002000100000000AB0000__000000914E3F38F0\n000000067F00004002000100000000AAC000-000000067F00004002000100000000AB0000__000000931B9A2710\n000000067F00004002000100000000AACF1E-000000067F00004002000100000000AB58FC__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000AB0000-000000067F00004002000100000000AB4000__00000073AD3FE6B8\n000000067F00004002000100000000AB0000-000000067F00004002000100000000AB4000__000000914E3F38F0\n000000067F00004002000100000000AB0000-000000067F00004002000100000000AB4000__000000931B9A2710\n000000067F00004002000100000000AB4000-000000067F00004002000100000000AB8000__00000073AD3FE6B8\n000000067F00004002000100000000AB4000-000000067F00004002000100000000AB8000__000000914E3F38F0\n000000067F00004002000100000000AB4000-000000067F00004002000100000000AB8000__000000931B9A2710\n000000067F00004002000100000000AB58FC-000000067F00004002000100000000ABE2E6__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000AB8000-000000067F00004002000100000000ABC000__00000073AD3FE6B8\n000000067F00004002000100000000AB8000-000000067F00004002000100000000ABC000__000000914E3F38F0\n000000067F00004002000100000000AB8000-000000067F00004002000100000000ABC000__000000931B9A2710\n000000067F00004002000100000000ABC000-000000067F00004002000100000000AC0000__00000073AD3FE6B8\n000000067F00004002000100000000ABC000-000000067F00004002000100000000AC0000__000000914E3F38F0\n000000067F00004002000100000000ABC000-000000067F00004002000100000000AC0000__000000931B9A2710\n000000067F00004002000100000000ABE2E6-000000067F00004002000100000000AC6CC2__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000AC0000-000000067F00004002000100000000AC4000__00000073AD3FE6B8\n000000067F00004002000100000000AC0000-000000067F00004002000100000000AC4000__000000914E3F38F0\n000000067F00004002000100000000AC0000-000000067F00004002000100000000AC4000__000000931B9A2710\n000000067F00004002000100000000AC4000-000000067F00004002000100000000AC8000__00000073AD3FE6B8\n000000067F00004002000100000000AC4000-000000067F00004002000100000000AC8000__000000914E3F38F0\n000000067F00004002000100000000AC4000-000000067F00004002000100000000AC8000__000000931B9A2710\n000000067F00004002000100000000AC6CC2-000000067F00004002000100000000ACF6A1__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000AC8000-000000067F00004002000100000000ACC000__00000073AD3FE6B8\n000000067F00004002000100000000AC8000-000000067F00004002000100000000ACC000__000000914E3F38F0\n000000067F00004002000100000000AC8000-000000067F00004002000100000000ACC000__000000931B9A2710\n000000067F00004002000100000000ACC000-000000067F00004002000100000000AD0000__00000073AD3FE6B8\n000000067F00004002000100000000ACC000-000000067F00004002000100000000AD0000__000000914E3F38F0\n000000067F00004002000100000000ACC000-000000067F00004002000100000000AD0000__000000931B9A2710\n000000067F00004002000100000000ACF6A1-000000067F00004002000100000000AD8072__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000AD0000-000000067F00004002000100000000AD4000__00000073AD3FE6B8\n000000067F00004002000100000000AD0000-000000067F00004002000100000000AD4000__000000914E3F38F0\n000000067F00004002000100000000AD0000-000000067F00004002000100000000AD4000__000000931B9A2710\n000000067F00004002000100000000AD4000-000000067F00004002000100000000AD8000__00000073AD3FE6B8\n000000067F00004002000100000000AD4000-000000067F00004002000100000000AD8000__000000914E3F38F0\n000000067F00004002000100000000AD4000-000000067F00004002000100000000AD8000__000000931B9A2710\n000000067F00004002000100000000AD8000-000000067F00004002000100000000ADC000__00000073AD3FE6B8\n000000067F00004002000100000000AD8000-000000067F00004002000100000000ADC000__000000914E3F38F0\n000000067F00004002000100000000AD8000-000000067F00004002000100000000ADC000__000000931B9A2710\n000000067F00004002000100000000AD8072-000000067F00004002000100000000AE0A4E__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000ADC000-000000067F00004002000100000000AE0000__00000073AD3FE6B8\n000000067F00004002000100000000ADC000-000000067F00004002000100000000AE0000__000000914E3F38F0\n000000067F00004002000100000000ADC000-000000067F00004002000100000000AE0000__000000931B9A2710\n000000067F00004002000100000000AE0000-000000067F00004002000100000000AE4000__00000073AD3FE6B8\n000000067F00004002000100000000AE0000-000000067F00004002000100000000AE4000__000000914E3F38F0\n000000067F00004002000100000000AE0000-000000067F00004002000100000000AE4000__000000931B9A2710\n000000067F00004002000100000000AE0A4E-000000067F00004002000100000000AE942F__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000AE4000-000000067F00004002000100000000AE8000__00000073AD3FE6B8\n000000067F00004002000100000000AE4000-000000067F00004002000100000000AE8000__000000914E3F38F0\n000000067F00004002000100000000AE4000-000000067F00004002000100000000AE8000__000000931B9A2710\n000000067F00004002000100000000AE8000-000000067F00004002000100000000AEC000__00000073AD3FE6B8\n000000067F00004002000100000000AE8000-000000067F00004002000100000000AEC000__000000914E3F38F0\n000000067F00004002000100000000AE8000-000000067F00004002000100000000AEC000__000000931B9A2710\n000000067F00004002000100000000AE942F-000000067F00004002000100000000AF1E0F__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000AEC000-000000067F00004002000100000000AF0000__00000073AD3FE6B8\n000000067F00004002000100000000AEC000-000000067F00004002000100000000AF0000__000000914E3F38F0\n000000067F00004002000100000000AEC000-000000067F00004002000100000000AF0000__000000931B9A2710\n000000067F00004002000100000000AF0000-000000067F00004002000100000000AF4000__00000073AD3FE6B8\n000000067F00004002000100000000AF0000-000000067F00004002000100000000AF4000__000000914E3F38F0\n000000067F00004002000100000000AF0000-000000067F00004002000100000000AF4000__000000931B9A2710\n000000067F00004002000100000000AF1E0F-000000067F00004002000100000000AFA7DD__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000AF4000-000000067F00004002000100000000AF8000__00000073AD3FE6B8\n000000067F00004002000100000000AF4000-000000067F00004002000100000000AF8000__000000914E3F38F0\n000000067F00004002000100000000AF4000-000000067F00004002000100000000AF8000__000000931B9A2710\n000000067F00004002000100000000AF8000-000000067F00004002000100000000AFC000__00000073AD3FE6B8\n000000067F00004002000100000000AF8000-000000067F00004002000100000000AFC000__000000914E3F38F0\n000000067F00004002000100000000AF8000-000000067F00004002000100000000AFC000__000000931B9A2710\n000000067F00004002000100000000AFA7DD-000000067F00004002000100000000B031B5__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000AFC000-000000067F00004002000100000000B00000__00000073AD3FE6B8\n000000067F00004002000100000000AFC000-000000067F00004002000100000000B00000__000000914E3F38F0\n000000067F00004002000100000000AFC000-000000067F00004002000100000000B00000__000000931B9A2710\n000000067F00004002000100000000B00000-000000067F00004002000100000000B04000__00000073AD3FE6B8\n000000067F00004002000100000000B00000-000000067F00004002000100000000B04000__000000914E3F38F0\n000000067F00004002000100000000B00000-000000067F00004002000100000000B04000__000000931B9A2710\n000000067F00004002000100000000B031B5-000000067F00004002000100000000B0BB95__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000B04000-000000067F00004002000100000000B08000__00000073AD3FE6B8\n000000067F00004002000100000000B04000-000000067F00004002000100000000B08000__000000914E3F38F0\n000000067F00004002000100000000B04000-000000067F00004002000100000000B08000__000000931B9A2710\n000000067F00004002000100000000B08000-000000067F00004002000100000000B0C000__00000073AD3FE6B8\n000000067F00004002000100000000B08000-000000067F00004002000100000000B0C000__000000914E3F38F0\n000000067F00004002000100000000B08000-000000067F00004002000100000000B0C000__000000931B9A2710\n000000067F00004002000100000000B0BB95-000000067F00004002000100000000B1456D__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000B0C000-000000067F00004002000100000000B10000__00000073AD3FE6B8\n000000067F00004002000100000000B0C000-000000067F00004002000100000000B10000__000000914E3F38F0\n000000067F00004002000100000000B0C000-000000067F00004002000100000000B10000__000000931B9A2710\n000000067F00004002000100000000B10000-000000067F00004002000100000000B14000__00000073AD3FE6B8\n000000067F00004002000100000000B10000-000000067F00004002000100000000B14000__000000914E3F38F0\n000000067F00004002000100000000B10000-000000067F00004002000100000000B14000__000000931B9A2710\n000000067F00004002000100000000B14000-000000067F00004002000100000000B18000__00000073AD3FE6B8\n000000067F00004002000100000000B14000-000000067F00004002000100000000B18000__000000914E3F38F0\n000000067F00004002000100000000B14000-000000067F00004002000100000000B18000__000000931B9A2710\n000000067F00004002000100000000B1456D-000000067F00004002000100000000B1CF4D__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000B18000-000000067F00004002000100000000B1C000__00000073AD3FE6B8\n000000067F00004002000100000000B18000-000000067F00004002000100000000B1C000__000000914E3F38F0\n000000067F00004002000100000000B18000-000000067F00004002000100000000B1C000__000000931B9A2710\n000000067F00004002000100000000B1C000-000000067F00004002000100000000B20000__00000073AD3FE6B8\n000000067F00004002000100000000B1C000-000000067F00004002000100000000B20000__000000914E3F38F0\n000000067F00004002000100000000B1C000-000000067F00004002000100000000B20000__000000931B9A2710\n000000067F00004002000100000000B1CF4D-000000067F00004002000100000000B2592E__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000B20000-000000067F00004002000100000000B24000__00000073AD3FE6B8\n000000067F00004002000100000000B20000-000000067F00004002000100000000B24000__000000914E3F38F0\n000000067F00004002000100000000B20000-000000067F00004002000100000000B24000__000000931B9A2710\n000000067F00004002000100000000B24000-000000067F00004002000100000000B28000__00000073AD3FE6B8\n000000067F00004002000100000000B24000-000000067F00004002000100000000B28000__000000914E3F38F0\n000000067F00004002000100000000B24000-000000067F00004002000100000000B28000__000000931B9A2710\n000000067F00004002000100000000B2592E-000000067F00004002000100000000B2E310__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000B28000-000000067F00004002000100000000B2C000__00000073AD3FE6B8\n000000067F00004002000100000000B28000-000000067F00004002000100000000B2C000__000000914E3F38F0\n000000067F00004002000100000000B28000-000000067F00004002000100000000B2C000__000000931B9A2710\n000000067F00004002000100000000B2C000-000000067F00004002000100000000B30000__00000073AD3FE6B8\n000000067F00004002000100000000B2C000-000000067F00004002000100000000B30000__000000914E3F38F0\n000000067F00004002000100000000B2C000-000000067F00004002000100000000B30000__000000931B9A2710\n000000067F00004002000100000000B2E310-000000067F00004002000100000000B36CE8__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000B30000-000000067F00004002000100000000B34000__00000073AD3FE6B8\n000000067F00004002000100000000B30000-000000067F00004002000100000000B34000__000000914E3F38F0\n000000067F00004002000100000000B30000-000000067F00004002000100000000B34000__000000931B9A2710\n000000067F00004002000100000000B34000-000000067F00004002000100000000B38000__00000073AD3FE6B8\n000000067F00004002000100000000B34000-000000067F00004002000100000000B38000__000000914E3F38F0\n000000067F00004002000100000000B34000-000000067F00004002000100000000B38000__000000931B9A2710\n000000067F00004002000100000000B36CE8-000000067F00004002000100000000B3F6C4__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000B38000-000000067F00004002000100000000B3C000__00000073AD3FE6B8\n000000067F00004002000100000000B38000-000000067F00004002000100000000B3C000__000000914E3F38F0\n000000067F00004002000100000000B38000-000000067F00004002000100000000B3C000__000000931B9A2710\n000000067F00004002000100000000B3C000-000000067F00004002000100000000B40000__00000073AD3FE6B8\n000000067F00004002000100000000B3C000-000000067F00004002000100000000B40000__000000914E3F38F0\n000000067F00004002000100000000B3C000-000000067F00004002000100000000B40000__000000931B9A2710\n000000067F00004002000100000000B3F6C4-000000067F00004002000100000000B480A3__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000B40000-000000067F00004002000100000000B44000__00000073AD3FE6B8\n000000067F00004002000100000000B40000-000000067F00004002000100000000B44000__000000914E3F38F0\n000000067F00004002000100000000B40000-000000067F00004002000100000000B44000__000000931B9A2710\n000000067F00004002000100000000B44000-000000067F00004002000100000000B48000__00000073AD3FE6B8\n000000067F00004002000100000000B44000-000000067F00004002000100000000B48000__000000914E3F38F0\n000000067F00004002000100000000B44000-000000067F00004002000100000000B48000__000000931B9A2710\n000000067F00004002000100000000B48000-000000067F00004002000100000000B4C000__00000073AD3FE6B8\n000000067F00004002000100000000B48000-000000067F00004002000100000000B4C000__000000914E3F38F0\n000000067F00004002000100000000B48000-000000067F00004002000100000000B4C000__000000931B9A2710\n000000067F00004002000100000000B480A3-000000067F00004002000100000000B50A7D__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000B4C000-000000067F00004002000100000000B50000__00000073AD3FE6B8\n000000067F00004002000100000000B4C000-000000067F00004002000100000000B50000__000000914E3F38F0\n000000067F00004002000100000000B4C000-000000067F00004002000100000000B50000__000000931B9A2710\n000000067F00004002000100000000B50000-000000067F00004002000100000000B54000__00000073AD3FE6B8\n000000067F00004002000100000000B50000-000000067F00004002000100000000B54000__000000914E3F38F0\n000000067F00004002000100000000B50000-000000067F00004002000100000000B54000__000000931B9A2710\n000000067F00004002000100000000B50A7D-000000067F00004002000100000000B59456__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000B54000-000000067F00004002000100000000B58000__00000073AD3FE6B8\n000000067F00004002000100000000B54000-000000067F00004002000100000000B58000__000000914E3F38F0\n000000067F00004002000100000000B54000-000000067F00004002000100000000B58000__000000931B9A2710\n000000067F00004002000100000000B58000-000000067F00004002000100000000B5C000__00000073AD3FE6B8\n000000067F00004002000100000000B58000-000000067F00004002000100000000B5C000__000000914E3F38F0\n000000067F00004002000100000000B58000-000000067F00004002000100000000B5C000__000000931B9A2710\n000000067F00004002000100000000B59456-000000067F00004002000100000000B61E31__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000B5C000-000000067F00004002000100000000B60000__00000073AD3FE6B8\n000000067F00004002000100000000B5C000-000000067F00004002000100000000B60000__000000914E3F38F0\n000000067F00004002000100000000B5C000-000000067F00004002000100000000B60000__000000931B9A2710\n000000067F00004002000100000000B60000-000000067F00004002000100000000B64000__00000073AD3FE6B8\n000000067F00004002000100000000B60000-000000067F00004002000100000000B64000__000000914E3F38F0\n000000067F00004002000100000000B60000-000000067F00004002000100000000B64000__000000931B9A2710\n000000067F00004002000100000000B61E31-000000067F00004002000100000000B6A810__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000B64000-000000067F00004002000100000000B68000__00000073AD3FE6B8\n000000067F00004002000100000000B64000-000000067F00004002000100000000B68000__000000914E3F38F0\n000000067F00004002000100000000B64000-000000067F00004002000100000000B68000__000000931B9A2710\n000000067F00004002000100000000B68000-000000067F00004002000100000000B6C000__00000073AD3FE6B8\n000000067F00004002000100000000B68000-000000067F00004002000100000000B6C000__000000914E3F38F0\n000000067F00004002000100000000B68000-000000067F00004002000100000000B6C000__000000931B9A2710\n000000067F00004002000100000000B6A810-000000067F00004002000100000000B731E5__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000B6C000-000000067F00004002000100000000B70000__00000073AD3FE6B8\n000000067F00004002000100000000B6C000-000000067F00004002000100000000B70000__000000914E3F38F0\n000000067F00004002000100000000B6C000-000000067F00004002000100000000B70000__000000931B9A2710\n000000067F00004002000100000000B70000-000000067F00004002000100000000B74000__00000073AD3FE6B8\n000000067F00004002000100000000B70000-000000067F00004002000100000000B74000__000000914E3F38F0\n000000067F00004002000100000000B70000-000000067F00004002000100000000B74000__000000931B9A2710\n000000067F00004002000100000000B731E5-000000067F00004002000100000000B7BBC4__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000B74000-000000067F00004002000100000000B78000__00000073AD3FE6B8\n000000067F00004002000100000000B74000-000000067F00004002000100000000B78000__000000914E3F38F0\n000000067F00004002000100000000B74000-000000067F00004002000100000000B78000__000000931B9A2710\n000000067F00004002000100000000B78000-000000067F00004002000100000000B7C000__00000073AD3FE6B8\n000000067F00004002000100000000B78000-000000067F00004002000100000000B7C000__000000914E3F38F0\n000000067F00004002000100000000B78000-000000067F00004002000100000000B7C000__000000931B9A2710\n000000067F00004002000100000000B7BBC4-000000067F00004002000100000000B845A5__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000B7C000-000000067F00004002000100000000B80000__00000073AD3FE6B8\n000000067F00004002000100000000B7C000-000000067F00004002000100000000B80000__000000914E3F38F0\n000000067F00004002000100000000B7C000-000000067F00004002000100000000B80000__000000931B9A2710\n000000067F00004002000100000000B80000-000000067F00004002000100000000B84000__00000073AD3FE6B8\n000000067F00004002000100000000B80000-000000067F00004002000100000000B84000__000000914E3F38F0\n000000067F00004002000100000000B80000-000000067F00004002000100000000B84000__000000931B9A2710\n000000067F00004002000100000000B84000-000000067F00004002000100000000B88000__00000073AD3FE6B8\n000000067F00004002000100000000B84000-000000067F00004002000100000000B88000__000000914E3F38F0\n000000067F00004002000100000000B84000-000000067F00004002000100000000B88000__000000931B9A2710\n000000067F00004002000100000000B845A5-000000067F00004002000100000000B8CF82__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000B88000-000000067F00004002000100000000B8C000__00000073AD3FE6B8\n000000067F00004002000100000000B88000-000000067F00004002000100000000B8C000__000000914E3F38F0\n000000067F00004002000100000000B88000-000000067F00004002000100000000B8C000__000000931B9A2710\n000000067F00004002000100000000B8C000-000000067F00004002000100000000B90000__00000073AD3FE6B8\n000000067F00004002000100000000B8C000-000000067F00004002000100000000B90000__000000914E3F38F0\n000000067F00004002000100000000B8C000-000000067F00004002000100000000B90000__000000931B9A2710\n000000067F00004002000100000000B8CF82-000000067F00004002000100000000B95960__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000B90000-000000067F00004002000100000000B94000__00000073AD3FE6B8\n000000067F00004002000100000000B90000-000000067F00004002000100000000B94000__000000914E3F38F0\n000000067F00004002000100000000B90000-000000067F00004002000100000000B94000__000000931B9A2710\n000000067F00004002000100000000B94000-000000067F00004002000100000000B98000__00000073AD3FE6B8\n000000067F00004002000100000000B94000-000000067F00004002000100000000B98000__000000914E3F38F0\n000000067F00004002000100000000B94000-000000067F00004002000100000000B98000__000000931B9A2710\n000000067F00004002000100000000B95960-000000067F00004002000100000000B9E33F__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000B98000-000000067F00004002000100000000B9C000__00000073AD3FE6B8\n000000067F00004002000100000000B98000-000000067F00004002000100000000B9C000__000000914E3F38F0\n000000067F00004002000100000000B98000-000000067F00004002000100000000B9C000__000000931B9A2710\n000000067F00004002000100000000B9C000-000000067F00004002000100000000BA0000__00000073AD3FE6B8\n000000067F00004002000100000000B9C000-000000067F00004002000100000000BA0000__000000914E3F38F0\n000000067F00004002000100000000B9C000-000000067F00004002000100000000BA0000__000000931B9A2710\n000000067F00004002000100000000B9E33F-000000067F00004002000100000000BA6D14__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000BA0000-000000067F00004002000100000000BA4000__00000073AD3FE6B8\n000000067F00004002000100000000BA0000-000000067F00004002000100000000BA4000__000000914E3F38F0\n000000067F00004002000100000000BA0000-000000067F00004002000100000000BA4000__000000931B9A2710\n000000067F00004002000100000000BA4000-000000067F00004002000100000000BA8000__00000073AD3FE6B8\n000000067F00004002000100000000BA4000-000000067F00004002000100000000BA8000__000000914E3F38F0\n000000067F00004002000100000000BA4000-000000067F00004002000100000000BA8000__000000931B9A2710\n000000067F00004002000100000000BA6D14-000000067F00004002000100000000BAF6EE__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000BA8000-000000067F00004002000100000000BAC000__00000073AD3FE6B8\n000000067F00004002000100000000BA8000-000000067F00004002000100000000BAC000__000000914E3F38F0\n000000067F00004002000100000000BA8000-000000067F00004002000100000000BAC000__000000931B9A2710\n000000067F00004002000100000000BAC000-000000067F00004002000100000000BB0000__00000073AD3FE6B8\n000000067F00004002000100000000BAC000-000000067F00004002000100000000BB0000__000000914E3F38F0\n000000067F00004002000100000000BAC000-000000067F00004002000100000000BB0000__000000931B9A2710\n000000067F00004002000100000000BAF6EE-000000067F00004002000100000000BB80C4__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000BB0000-000000067F00004002000100000000BB4000__00000073AD3FE6B8\n000000067F00004002000100000000BB0000-000000067F00004002000100000000BB4000__000000914E3F38F0\n000000067F00004002000100000000BB0000-000000067F00004002000100000000BB4000__000000931B9A2710\n000000067F00004002000100000000BB4000-000000067F00004002000100000000BB8000__00000073AD3FE6B8\n000000067F00004002000100000000BB4000-000000067F00004002000100000000BB8000__000000914E3F38F0\n000000067F00004002000100000000BB4000-000000067F00004002000100000000BB8000__000000931B9A2710\n000000067F00004002000100000000BB8000-000000067F00004002000100000000BBC000__00000073AD3FE6B8\n000000067F00004002000100000000BB8000-000000067F00004002000100000000BBC000__000000914E3F38F0\n000000067F00004002000100000000BB8000-000000067F00004002000100000000BBC000__000000931B9A2710\n000000067F00004002000100000000BB80C4-000000067F00004002000100000000BC0A9B__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000BBC000-000000067F00004002000100000000BC0000__00000073AD3FE6B8\n000000067F00004002000100000000BBC000-000000067F00004002000100000000BC0000__000000914E3F38F0\n000000067F00004002000100000000BBC000-000000067F00004002000100000000BC0000__000000931B9A2710\n000000067F00004002000100000000BC0000-000000067F00004002000100000000BC4000__00000073AD3FE6B8\n000000067F00004002000100000000BC0000-000000067F00004002000100000000BC4000__000000914E3F38F0\n000000067F00004002000100000000BC0000-000000067F00004002000100000000BC4000__000000931B9A2710\n000000067F00004002000100000000BC0A9B-000000067F00004002000100000000BC9480__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000BC4000-000000067F00004002000100000000BC8000__00000073AD3FE6B8\n000000067F00004002000100000000BC4000-000000067F00004002000100000000BC8000__000000914E3F38F0\n000000067F00004002000100000000BC4000-000000067F00004002000100000000BC8000__000000931B9A2710\n000000067F00004002000100000000BC8000-000000067F00004002000100000000BCC000__00000073AD3FE6B8\n000000067F00004002000100000000BC8000-000000067F00004002000100000000BCC000__000000914E3F38F0\n000000067F00004002000100000000BC8000-000000067F00004002000100000000BCC000__000000931B9A2710\n000000067F00004002000100000000BC9480-000000067F00004002000100000000BD1E68__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000BCC000-000000067F00004002000100000000BD0000__00000073AD3FE6B8\n000000067F00004002000100000000BCC000-000000067F00004002000100000000BD0000__000000914E3F38F0\n000000067F00004002000100000000BCC000-000000067F00004002000100000000BD0000__000000931B9A2710\n000000067F00004002000100000000BD0000-000000067F00004002000100000000BD4000__00000073AD3FE6B8\n000000067F00004002000100000000BD0000-000000067F00004002000100000000BD4000__000000914E3F38F0\n000000067F00004002000100000000BD0000-000000067F00004002000100000000BD4000__000000931B9A2710\n000000067F00004002000100000000BD1E68-000000067F00004002000100000000BDA835__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000BD4000-000000067F00004002000100000000BD8000__00000073AD3FE6B8\n000000067F00004002000100000000BD4000-000000067F00004002000100000000BD8000__000000914E3F38F0\n000000067F00004002000100000000BD4000-000000067F00004002000100000000BD8000__000000931B9A2710\n000000067F00004002000100000000BD8000-000000067F00004002000100000000BDC000__00000073AD3FE6B8\n000000067F00004002000100000000BD8000-000000067F00004002000100000000BDC000__000000914E3F38F0\n000000067F00004002000100000000BD8000-000000067F00004002000100000000BDC000__000000931B9A2710\n000000067F00004002000100000000BDA835-000000067F00004002000100000000BE320C__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000BDC000-000000067F00004002000100000000BE0000__00000073AD3FE6B8\n000000067F00004002000100000000BDC000-000000067F00004002000100000000BE0000__000000914E3F38F0\n000000067F00004002000100000000BDC000-000000067F00004002000100000000BE0000__000000931B9A2710\n000000067F00004002000100000000BE0000-000000067F00004002000100000000BE4000__00000073AD3FE6B8\n000000067F00004002000100000000BE0000-000000067F00004002000100000000BE4000__000000914E3F38F0\n000000067F00004002000100000000BE0000-000000067F00004002000100000000BE4000__000000931B9A2710\n000000067F00004002000100000000BE320C-000000067F00004002000100000000BEBBE5__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000BE4000-000000067F00004002000100000000BE8000__00000073AD3FE6B8\n000000067F00004002000100000000BE4000-000000067F00004002000100000000BE8000__000000914E3F38F0\n000000067F00004002000100000000BE4000-000000067F00004002000100000000BE8000__000000931B9A2710\n000000067F00004002000100000000BE8000-000000067F00004002000100000000BEC000__00000073AD3FE6B8\n000000067F00004002000100000000BE8000-000000067F00004002000100000000BEC000__000000914E3F38F0\n000000067F00004002000100000000BE8000-000000067F00004002000100000000BEC000__000000931B9A2710\n000000067F00004002000100000000BEBBE5-000000067F00004002000100000000BF45C3__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000BEC000-000000067F00004002000100000000BF0000__00000073AD3FE6B8\n000000067F00004002000100000000BEC000-000000067F00004002000100000000BF0000__000000914E3F38F0\n000000067F00004002000100000000BEC000-000000067F00004002000100000000BF0000__000000931B9A2710\n000000067F00004002000100000000BF0000-000000067F00004002000100000000BF4000__00000073AD3FE6B8\n000000067F00004002000100000000BF0000-000000067F00004002000100000000BF4000__000000914E3F38F0\n000000067F00004002000100000000BF0000-000000067F00004002000100000000BF4000__000000931B9A2710\n000000067F00004002000100000000BF4000-000000067F00004002000100000000BF8000__00000073AD3FE6B8\n000000067F00004002000100000000BF4000-000000067F00004002000100000000BF8000__000000914E3F38F0\n000000067F00004002000100000000BF4000-000000067F00004002000100000000BF8000__000000931B9A2710\n000000067F00004002000100000000BF45C3-000000067F00004002000100000000BFCF9A__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000BF8000-000000067F00004002000100000000BFC000__00000073AD3FE6B8\n000000067F00004002000100000000BF8000-000000067F00004002000100000000BFC000__000000914E3F38F0\n000000067F00004002000100000000BF8000-000000067F00004002000100000000BFC000__000000931B9A2710\n000000067F00004002000100000000BFC000-000000067F00004002000100000000C00000__00000073AD3FE6B8\n000000067F00004002000100000000BFC000-000000067F00004002000100000000C00000__000000914E3F38F0\n000000067F00004002000100000000BFC000-000000067F00004002000100000000C00000__000000931B9A2710\n000000067F00004002000100000000BFCF9A-000000067F00004002000100000000C0597F__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000C00000-000000067F00004002000100000000C04000__00000073AD3FE6B8\n000000067F00004002000100000000C00000-000000067F00004002000100000000C04000__000000914E3F38F0\n000000067F00004002000100000000C00000-000000067F00004002000100000000C04000__000000931B9A2710\n000000067F00004002000100000000C04000-000000067F00004002000100000000C08000__00000073AD3FE6B8\n000000067F00004002000100000000C04000-000000067F00004002000100000000C08000__000000914E3F38F0\n000000067F00004002000100000000C04000-000000067F00004002000100000000C08000__000000931B9A2710\n000000067F00004002000100000000C0597F-000000067F00004002000100000000C0E366__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000C08000-000000067F00004002000100000000C0C000__00000073AD3FE6B8\n000000067F00004002000100000000C08000-000000067F00004002000100000000C0C000__000000914E3F38F0\n000000067F00004002000100000000C08000-000000067F00004002000100000000C0C000__000000931B9A2710\n000000067F00004002000100000000C0C000-000000067F00004002000100000000C10000__00000073AD3FE6B8\n000000067F00004002000100000000C0C000-000000067F00004002000100000000C10000__000000914E3F38F0\n000000067F00004002000100000000C0C000-000000067F00004002000100000000C10000__000000931B9A2710\n000000067F00004002000100000000C0E366-000000067F00004002000100000000C16D38__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000C10000-000000067F00004002000100000000C14000__00000073AD3FE6B8\n000000067F00004002000100000000C10000-000000067F00004002000100000000C14000__000000914E3F38F0\n000000067F00004002000100000000C10000-000000067F00004002000100000000C14000__000000931B9A2710\n000000067F00004002000100000000C14000-000000067F00004002000100000000C18000__00000073AD3FE6B8\n000000067F00004002000100000000C14000-000000067F00004002000100000000C18000__000000914E3F38F0\n000000067F00004002000100000000C14000-000000067F00004002000100000000C18000__000000931B9A2710\n000000067F00004002000100000000C16D38-000000067F00004002000100000000C1F70B__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000C18000-000000067F00004002000100000000C1C000__00000073AD3FE6B8\n000000067F00004002000100000000C18000-000000067F00004002000100000000C1C000__000000914E3F38F0\n000000067F00004002000100000000C18000-000000067F00004002000100000000C1C000__000000931B9A2710\n000000067F00004002000100000000C1C000-000000067F00004002000100000000C20000__00000073AD3FE6B8\n000000067F00004002000100000000C1C000-000000067F00004002000100000000C20000__000000914E3F38F0\n000000067F00004002000100000000C1C000-000000067F00004002000100000000C20000__000000931B9A2710\n000000067F00004002000100000000C1F70B-000000067F00004002000100000000C280E6__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000C20000-000000067F00004002000100000000C24000__00000073AD3FE6B8\n000000067F00004002000100000000C20000-000000067F00004002000100000000C24000__000000914E3F38F0\n000000067F00004002000100000000C20000-000000067F00004002000100000000C24000__000000931B9A2710\n000000067F00004002000100000000C24000-000000067F00004002000100000000C28000__00000073AD3FE6B8\n000000067F00004002000100000000C24000-000000067F00004002000100000000C28000__000000914E3F38F0\n000000067F00004002000100000000C24000-000000067F00004002000100000000C28000__000000931B9A2710\n000000067F00004002000100000000C28000-000000067F00004002000100000000C2C000__00000073AD3FE6B8\n000000067F00004002000100000000C28000-000000067F00004002000100000000C2C000__000000914E3F38F0\n000000067F00004002000100000000C28000-000000067F00004002000100000000C2C000__000000931B9A2710\n000000067F00004002000100000000C280E6-000000067F00004002000100000000C30AC6__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000C2C000-000000067F00004002000100000000C30000__00000073AD3FE6B8\n000000067F00004002000100000000C2C000-000000067F00004002000100000000C30000__000000914E3F38F0\n000000067F00004002000100000000C2C000-000000067F00004002000100000000C30000__000000931B9A2710\n000000067F00004002000100000000C30000-000000067F00004002000100000000C34000__00000073AD3FE6B8\n000000067F00004002000100000000C30000-000000067F00004002000100000000C34000__000000914E3F38F0\n000000067F00004002000100000000C30000-000000067F00004002000100000000C34000__000000931B9A2710\n000000067F00004002000100000000C30AC6-000000067F00004002000100000000C394A4__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000C34000-000000067F00004002000100000000C38000__00000073AD3FE6B8\n000000067F00004002000100000000C34000-000000067F00004002000100000000C38000__000000914E3F38F0\n000000067F00004002000100000000C34000-000000067F00004002000100000000C38000__000000931B9A2710\n000000067F00004002000100000000C38000-000000067F00004002000100000000C3C000__00000073AD3FE6B8\n000000067F00004002000100000000C38000-000000067F00004002000100000000C3C000__000000914E3F38F0\n000000067F00004002000100000000C38000-000000067F00004002000100000000C3C000__000000931B9A2710\n000000067F00004002000100000000C394A4-000000067F00004002000100000000C41E88__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000C3C000-000000067F00004002000100000000C40000__00000073AD3FE6B8\n000000067F00004002000100000000C3C000-000000067F00004002000100000000C40000__000000914E3F38F0\n000000067F00004002000100000000C3C000-000000067F00004002000100000000C40000__000000931B9A2710\n000000067F00004002000100000000C40000-000000067F00004002000100000000C44000__00000073AD3FE6B8\n000000067F00004002000100000000C40000-000000067F00004002000100000000C44000__000000914E3F38F0\n000000067F00004002000100000000C40000-000000067F00004002000100000000C44000__000000931B9A2710\n000000067F00004002000100000000C41E88-000000067F00004002000100000000C4A868__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000C44000-000000067F00004002000100000000C48000__00000073AD3FE6B8\n000000067F00004002000100000000C44000-000000067F00004002000100000000C48000__000000914E3F38F0\n000000067F00004002000100000000C44000-000000067F00004002000100000000C48000__000000931B9A2710\n000000067F00004002000100000000C48000-000000067F00004002000100000000C4C000__00000073AD3FE6B8\n000000067F00004002000100000000C48000-000000067F00004002000100000000C4C000__000000914E3F38F0\n000000067F00004002000100000000C48000-000000067F00004002000100000000C4C000__000000931B9A2710\n000000067F00004002000100000000C4A868-000000067F00004002000100000000C53243__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000C4C000-000000067F00004002000100000000C50000__00000073AD3FE6B8\n000000067F00004002000100000000C4C000-000000067F00004002000100000000C50000__000000914E3F38F0\n000000067F00004002000100000000C4C000-000000067F00004002000100000000C50000__000000931B9A2710\n000000067F00004002000100000000C50000-000000067F00004002000100000000C54000__00000073AD3FE6B8\n000000067F00004002000100000000C50000-000000067F00004002000100000000C54000__000000914E3F38F0\n000000067F00004002000100000000C50000-000000067F00004002000100000000C54000__000000931B9A2710\n000000067F00004002000100000000C53243-000000067F00004002000100000000C5BC12__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000C54000-000000067F00004002000100000000C58000__00000073AD3FE6B8\n000000067F00004002000100000000C54000-000000067F00004002000100000000C58000__000000914E3F38F0\n000000067F00004002000100000000C54000-000000067F00004002000100000000C58000__000000931B9A2710\n000000067F00004002000100000000C58000-000000067F00004002000100000000C5C000__00000073AD3FE6B8\n000000067F00004002000100000000C58000-000000067F00004002000100000000C5C000__000000914E3F38F0\n000000067F00004002000100000000C58000-000000067F00004002000100000000C5C000__000000931B9A2710\n000000067F00004002000100000000C5BC12-000000067F00004002000100000000C645E7__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000C5C000-000000067F00004002000100000000C60000__00000073AD3FE6B8\n000000067F00004002000100000000C5C000-000000067F00004002000100000000C60000__000000914E3F38F0\n000000067F00004002000100000000C5C000-000000067F00004002000100000000C60000__000000931B9A2710\n000000067F00004002000100000000C60000-000000067F00004002000100000000C64000__00000073AD3FE6B8\n000000067F00004002000100000000C60000-000000067F00004002000100000000C64000__000000914E3F38F0\n000000067F00004002000100000000C60000-000000067F00004002000100000000C64000__000000931B9A2710\n000000067F00004002000100000000C64000-000000067F00004002000100000000C68000__00000073AD3FE6B8\n000000067F00004002000100000000C64000-000000067F00004002000100000000C68000__000000914E3F38F0\n000000067F00004002000100000000C64000-000000067F00004002000100000000C68000__000000931B9A2710\n000000067F00004002000100000000C645E7-000000067F00004002000100000000C6CFCD__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000C68000-000000067F00004002000100000000C6C000__00000073AD3FE6B8\n000000067F00004002000100000000C68000-000000067F00004002000100000000C6C000__000000914E3F38F0\n000000067F00004002000100000000C68000-000000067F00004002000100000000C6C000__000000931B9A2710\n000000067F00004002000100000000C6C000-000000067F00004002000100000000C70000__00000073AD3FE6B8\n000000067F00004002000100000000C6C000-000000067F00004002000100000000C70000__000000914E3F38F0\n000000067F00004002000100000000C6C000-000000067F00004002000100000000C70000__000000931B9A2710\n000000067F00004002000100000000C6CFCD-000000067F00004002000100000000C759AB__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000C70000-000000067F00004002000100000000C74000__00000073AD3FE6B8\n000000067F00004002000100000000C70000-000000067F00004002000100000000C74000__000000914E3F38F0\n000000067F00004002000100000000C70000-000000067F00004002000100000000C74000__000000931B9A2710\n000000067F00004002000100000000C74000-000000067F00004002000100000000C78000__00000073AD3FE6B8\n000000067F00004002000100000000C74000-000000067F00004002000100000000C78000__000000914E3F38F0\n000000067F00004002000100000000C74000-000000067F00004002000100000000C78000__000000931B9A2710\n000000067F00004002000100000000C759AB-000000067F00004002000100000000C7E38B__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000C78000-000000067F00004002000100000000C7C000__00000073AD3FE6B8\n000000067F00004002000100000000C78000-000000067F00004002000100000000C7C000__000000914E3F38F0\n000000067F00004002000100000000C78000-000000067F00004002000100000000C7C000__000000931B9A2710\n000000067F00004002000100000000C7C000-000000067F00004002000100000000C80000__00000073AD3FE6B8\n000000067F00004002000100000000C7C000-000000067F00004002000100000000C80000__000000914E3F38F0\n000000067F00004002000100000000C7C000-000000067F00004002000100000000C80000__000000931B9A2710\n000000067F00004002000100000000C7E38B-000000067F00004002000100000000C86D65__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000C80000-000000067F00004002000100000000C84000__00000073AD3FE6B8\n000000067F00004002000100000000C80000-000000067F00004002000100000000C84000__000000914E3F38F0\n000000067F00004002000100000000C80000-000000067F00004002000100000000C84000__000000931B9A2710\n000000067F00004002000100000000C84000-000000067F00004002000100000000C88000__00000073AD3FE6B8\n000000067F00004002000100000000C84000-000000067F00004002000100000000C88000__000000914E3F38F0\n000000067F00004002000100000000C84000-000000067F00004002000100000000C88000__000000931B9A2710\n000000067F00004002000100000000C86D65-000000067F00004002000100000000C8F758__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000C88000-000000067F00004002000100000000C8C000__00000073AD3FE6B8\n000000067F00004002000100000000C88000-000000067F00004002000100000000C8C000__000000914E3F38F0\n000000067F00004002000100000000C88000-000000067F00004002000100000000C8C000__000000931B9A2710\n000000067F00004002000100000000C8C000-000000067F00004002000100000000C90000__00000073AD3FE6B8\n000000067F00004002000100000000C8C000-000000067F00004002000100000000C90000__000000914E3F38F0\n000000067F00004002000100000000C8C000-000000067F00004002000100000000C90000__000000931B9A2710\n000000067F00004002000100000000C8F758-000000067F00004002000100000000C98142__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000C90000-000000067F00004002000100000000C94000__00000073AD3FE6B8\n000000067F00004002000100000000C90000-000000067F00004002000100000000C94000__000000914E3F38F0\n000000067F00004002000100000000C90000-000000067F00004002000100000000C94000__000000931B9A2710\n000000067F00004002000100000000C94000-000000067F00004002000100000000C98000__00000073AD3FE6B8\n000000067F00004002000100000000C94000-000000067F00004002000100000000C98000__000000914E3F38F0\n000000067F00004002000100000000C94000-000000067F00004002000100000000C98000__000000931B9A2710\n000000067F00004002000100000000C98000-000000067F00004002000100000000C9C000__00000073AD3FE6B8\n000000067F00004002000100000000C98000-000000067F00004002000100000000C9C000__000000914E3F38F0\n000000067F00004002000100000000C98000-000000067F00004002000100000000C9C000__000000931B9A2710\n000000067F00004002000100000000C98142-000000067F00004002000100000000CA0B11__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000C9C000-000000067F00004002000100000000CA0000__00000073AD3FE6B8\n000000067F00004002000100000000C9C000-000000067F00004002000100000000CA0000__000000914E3F38F0\n000000067F00004002000100000000C9C000-000000067F00004002000100000000CA0000__000000931B9A2710\n000000067F00004002000100000000CA0000-000000067F00004002000100000000CA4000__00000073AD3FE6B8\n000000067F00004002000100000000CA0000-000000067F00004002000100000000CA4000__000000914E3F38F0\n000000067F00004002000100000000CA0000-000000067F00004002000100000000CA4000__000000931B9A2710\n000000067F00004002000100000000CA0B11-000000067F00004002000100000000CA94E7__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000CA4000-000000067F00004002000100000000CA8000__00000073AD3FE6B8\n000000067F00004002000100000000CA4000-000000067F00004002000100000000CA8000__000000914E3F38F0\n000000067F00004002000100000000CA4000-000000067F00004002000100000000CA8000__000000931B9A2710\n000000067F00004002000100000000CA8000-000000067F00004002000100000000CAC000__00000073AD3FE6B8\n000000067F00004002000100000000CA8000-000000067F00004002000100000000CAC000__000000914E3F38F0\n000000067F00004002000100000000CA8000-000000067F00004002000100000000CAC000__000000931B9A2710\n000000067F00004002000100000000CA94E7-000000067F00004002000100000000CB1EC7__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000CAC000-000000067F00004002000100000000CB0000__00000073AD3FE6B8\n000000067F00004002000100000000CAC000-000000067F00004002000100000000CB0000__000000914E3F38F0\n000000067F00004002000100000000CAC000-000000067F00004002000100000000CB0000__000000931B9A2710\n000000067F00004002000100000000CB0000-000000067F00004002000100000000CB4000__00000073AD3FE6B8\n000000067F00004002000100000000CB0000-000000067F00004002000100000000CB4000__000000914E3F38F0\n000000067F00004002000100000000CB0000-000000067F00004002000100000000CB4000__000000931B9A2710\n000000067F00004002000100000000CB1EC7-000000067F00004002000100000000CBA8AE__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000CB4000-000000067F00004002000100000000CB8000__00000073AD3FE6B8\n000000067F00004002000100000000CB4000-000000067F00004002000100000000CB8000__000000914E3F38F0\n000000067F00004002000100000000CB4000-000000067F00004002000100000000CB8000__000000931B9A2710\n000000067F00004002000100000000CB8000-000000067F00004002000100000000CBC000__00000073AD3FE6B8\n000000067F00004002000100000000CB8000-000000067F00004002000100000000CBC000__000000914E3F38F0\n000000067F00004002000100000000CB8000-000000067F00004002000100000000CBC000__000000931B9A2710\n000000067F00004002000100000000CBA8AE-000000067F00004002000100000000CC3288__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000CBC000-000000067F00004002000100000000CC0000__00000073AD3FE6B8\n000000067F00004002000100000000CBC000-000000067F00004002000100000000CC0000__000000914E3F38F0\n000000067F00004002000100000000CBC000-000000067F00004002000100000000CC0000__000000931B9A2710\n000000067F00004002000100000000CC0000-000000067F00004002000100000000CC4000__00000073AD3FE6B8\n000000067F00004002000100000000CC0000-000000067F00004002000100000000CC4000__000000914E3F38F0\n000000067F00004002000100000000CC0000-000000067F00004002000100000000CC4000__000000931B9A2710\n000000067F00004002000100000000CC3288-000000067F00004002000100000000CCBC6F__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000CC4000-000000067F00004002000100000000CC8000__00000073AD3FE6B8\n000000067F00004002000100000000CC4000-000000067F00004002000100000000CC8000__000000914E3F38F0\n000000067F00004002000100000000CC4000-000000067F00004002000100000000CC8000__000000931B9A2710\n000000067F00004002000100000000CC8000-000000067F00004002000100000000CCC000__00000073AD3FE6B8\n000000067F00004002000100000000CC8000-000000067F00004002000100000000CCC000__000000914E3F38F0\n000000067F00004002000100000000CC8000-000000067F00004002000100000000CCC000__000000931B9A2710\n000000067F00004002000100000000CCBC6F-000000067F00004002000100000000CD4644__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000CCC000-000000067F00004002000100000000CD0000__00000073AD3FE6B8\n000000067F00004002000100000000CCC000-000000067F00004002000100000000CD0000__000000914E3F38F0\n000000067F00004002000100000000CCC000-000000067F00004002000100000000CD0000__000000931B9A2710\n000000067F00004002000100000000CD0000-000000067F00004002000100000000CD4000__00000073AD3FE6B8\n000000067F00004002000100000000CD0000-000000067F00004002000100000000CD4000__000000914E3F38F0\n000000067F00004002000100000000CD0000-000000067F00004002000100000000CD4000__000000931B9A2710\n000000067F00004002000100000000CD4000-000000067F00004002000100000000CD8000__00000073AD3FE6B8\n000000067F00004002000100000000CD4000-000000067F00004002000100000000CD8000__000000914E3F38F0\n000000067F00004002000100000000CD4000-000000067F00004002000100000000CD8000__000000931B9A2710\n000000067F00004002000100000000CD4644-000000067F00004002000100000000CDD014__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000CD8000-000000067F00004002000100000000CDC000__00000073AD3FE6B8\n000000067F00004002000100000000CD8000-000000067F00004002000100000000CDC000__000000914E3F38F0\n000000067F00004002000100000000CD8000-000000067F00004002000100000000CDC000__000000931B9A2710\n000000067F00004002000100000000CDC000-000000067F00004002000100000000CE0000__00000073AD3FE6B8\n000000067F00004002000100000000CDC000-000000067F00004002000100000000CE0000__000000914E3F38F0\n000000067F00004002000100000000CDC000-000000067F00004002000100000000CE0000__000000931B9A2710\n000000067F00004002000100000000CDD014-000000067F00004002000100000000CE59EF__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000CE0000-000000067F00004002000100000000CE4000__00000073AD3FE6B8\n000000067F00004002000100000000CE0000-000000067F00004002000100000000CE4000__000000914E3F38F0\n000000067F00004002000100000000CE0000-000000067F00004002000100000000CE4000__000000931B9A2710\n000000067F00004002000100000000CE4000-000000067F00004002000100000000CE8000__00000073AD3FE6B8\n000000067F00004002000100000000CE4000-000000067F00004002000100000000CE8000__000000914E3F38F0\n000000067F00004002000100000000CE4000-000000067F00004002000100000000CE8000__000000931B9A2710\n000000067F00004002000100000000CE59EF-000000067F00004002000100000000CEE3D4__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000CE8000-000000067F00004002000100000000CEC000__00000073AD3FE6B8\n000000067F00004002000100000000CE8000-000000067F00004002000100000000CEC000__000000914E3F38F0\n000000067F00004002000100000000CE8000-000000067F00004002000100000000CEC000__000000931B9A2710\n000000067F00004002000100000000CEC000-000000067F00004002000100000000CF0000__00000073AD3FE6B8\n000000067F00004002000100000000CEC000-000000067F00004002000100000000CF0000__000000914E3F38F0\n000000067F00004002000100000000CEC000-000000067F00004002000100000000CF0000__000000931B9A2710\n000000067F00004002000100000000CEE3D4-000000067F00004002000100000000CF6DB9__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000CF0000-000000067F00004002000100000000CF4000__00000073AD3FE6B8\n000000067F00004002000100000000CF0000-000000067F00004002000100000000CF4000__000000914E3F38F0\n000000067F00004002000100000000CF0000-000000067F00004002000100000000CF4000__000000931B9A2710\n000000067F00004002000100000000CF4000-000000067F00004002000100000000CF8000__00000073AD3FE6B8\n000000067F00004002000100000000CF4000-000000067F00004002000100000000CF8000__000000914E3F38F0\n000000067F00004002000100000000CF4000-000000067F00004002000100000000CF8000__000000931B9A2710\n000000067F00004002000100000000CF6DB9-000000067F00004002000100000000CFF798__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000CF8000-000000067F00004002000100000000CFC000__00000073AD3FE6B8\n000000067F00004002000100000000CF8000-000000067F00004002000100000000CFC000__000000914E3F38F0\n000000067F00004002000100000000CF8000-000000067F00004002000100000000CFC000__000000931B9A2710\n000000067F00004002000100000000CFC000-000000067F00004002000100000000D00000__00000073AD3FE6B8\n000000067F00004002000100000000CFC000-000000067F00004002000100000000D00000__000000914E3F38F0\n000000067F00004002000100000000CFC000-000000067F00004002000100000000D00000__000000931B9A2710\n000000067F00004002000100000000CFF798-000000067F00004002000100000000D08175__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000D00000-000000067F00004002000100000000D04000__00000073AD3FE6B8\n000000067F00004002000100000000D00000-000000067F00004002000100000000D04000__000000914E3F38F0\n000000067F00004002000100000000D00000-000000067F00004002000100000000D04000__000000931B9A2710\n000000067F00004002000100000000D04000-000000067F00004002000100000000D08000__00000073AD3FE6B8\n000000067F00004002000100000000D04000-000000067F00004002000100000000D08000__000000914E3F38F0\n000000067F00004002000100000000D04000-000000067F00004002000100000000D08000__000000931B9A2710\n000000067F00004002000100000000D08000-000000067F00004002000100000000D0C000__00000073AD3FE6B8\n000000067F00004002000100000000D08000-000000067F00004002000100000000D0C000__000000914E3F38F0\n000000067F00004002000100000000D08000-000000067F00004002000100000000D0C000__000000931B9A2710\n000000067F00004002000100000000D08175-000000067F00004002000100000000D10B4D__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000D0C000-000000067F00004002000100000000D10000__00000073AD3FE6B8\n000000067F00004002000100000000D0C000-000000067F00004002000100000000D10000__000000914E3F38F0\n000000067F00004002000100000000D0C000-000000067F00004002000100000000D10000__000000931B9A2710\n000000067F00004002000100000000D10000-000000067F00004002000100000000D14000__00000073AD3FE6B8\n000000067F00004002000100000000D10000-000000067F00004002000100000000D14000__000000914E3F38F0\n000000067F00004002000100000000D10000-000000067F00004002000100000000D14000__000000931B9A2710\n000000067F00004002000100000000D10B4D-000000067F00004002000100000000D19528__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000D14000-000000067F00004002000100000000D18000__00000073AD3FE6B8\n000000067F00004002000100000000D14000-000000067F00004002000100000000D18000__000000914E3F38F0\n000000067F00004002000100000000D14000-000000067F00004002000100000000D18000__000000931B9A2710\n000000067F00004002000100000000D18000-000000067F00004002000100000000D1C000__00000073AD3FE6B8\n000000067F00004002000100000000D18000-000000067F00004002000100000000D1C000__000000914E3F38F0\n000000067F00004002000100000000D18000-000000067F00004002000100000000D1C000__000000931B9A2710\n000000067F00004002000100000000D19528-000000067F00004002000100000000D21EFC__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000D1C000-000000067F00004002000100000000D20000__00000073AD3FE6B8\n000000067F00004002000100000000D1C000-000000067F00004002000100000000D20000__000000914E3F38F0\n000000067F00004002000100000000D1C000-000000067F00004002000100000000D20000__000000931B9A2710\n000000067F00004002000100000000D20000-000000067F00004002000100000000D24000__00000073AD3FE6B8\n000000067F00004002000100000000D20000-000000067F00004002000100000000D24000__000000914E3F38F0\n000000067F00004002000100000000D20000-000000067F00004002000100000000D24000__000000931B9A2710\n000000067F00004002000100000000D21EFC-000000067F00004002000100000000D2A8DC__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000D24000-000000067F00004002000100000000D28000__00000073AD3FE6B8\n000000067F00004002000100000000D24000-000000067F00004002000100000000D28000__000000914E3F38F0\n000000067F00004002000100000000D24000-000000067F00004002000100000000D28000__000000931B9A2710\n000000067F00004002000100000000D28000-000000067F00004002000100000000D2C000__00000073AD3FE6B8\n000000067F00004002000100000000D28000-000000067F00004002000100000000D2C000__000000914E3F38F0\n000000067F00004002000100000000D28000-000000067F00004002000100000000D2C000__000000931B9A2710\n000000067F00004002000100000000D2A8DC-000000067F00004002000100000000D332BD__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000D2C000-000000067F00004002000100000000D30000__00000073AD3FE6B8\n000000067F00004002000100000000D2C000-000000067F00004002000100000000D30000__000000914E3F38F0\n000000067F00004002000100000000D2C000-000000067F00004002000100000000D30000__000000931B9A2710\n000000067F00004002000100000000D30000-000000067F00004002000100000000D34000__00000073AD3FE6B8\n000000067F00004002000100000000D30000-000000067F00004002000100000000D34000__000000914E3F38F0\n000000067F00004002000100000000D30000-000000067F00004002000100000000D34000__000000931B9A2710\n000000067F00004002000100000000D332BD-000000067F00004002000100000000D3BC9F__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000D34000-000000067F00004002000100000000D38000__00000073AD3FE6B8\n000000067F00004002000100000000D34000-000000067F00004002000100000000D38000__000000914E3F38F0\n000000067F00004002000100000000D34000-000000067F00004002000100000000D38000__000000931B9A2710\n000000067F00004002000100000000D38000-000000067F00004002000100000000D3C000__00000073AD3FE6B8\n000000067F00004002000100000000D38000-000000067F00004002000100000000D3C000__000000914E3F38F0\n000000067F00004002000100000000D38000-000000067F00004002000100000000D3C000__000000931B9A2710\n000000067F00004002000100000000D3BC9F-000000067F00004002000100000000D4467B__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000D3C000-000000067F00004002000100000000D40000__00000073AD3FE6B8\n000000067F00004002000100000000D3C000-000000067F00004002000100000000D40000__000000914E3F38F0\n000000067F00004002000100000000D3C000-000000067F00004002000100000000D40000__000000931B9A2710\n000000067F00004002000100000000D40000-000000067F00004002000100000000D44000__00000073AD3FE6B8\n000000067F00004002000100000000D40000-000000067F00004002000100000000D44000__000000914E3F38F0\n000000067F00004002000100000000D40000-000000067F00004002000100000000D44000__000000931B9A2710\n000000067F00004002000100000000D44000-000000067F00004002000100000000D48000__00000073AD3FE6B8\n000000067F00004002000100000000D44000-000000067F00004002000100000000D48000__000000914E3F38F0\n000000067F00004002000100000000D44000-000000067F00004002000100000000D48000__000000931B9A2710\n000000067F00004002000100000000D4467B-000000067F00004002000100000000D4D058__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000D48000-000000067F00004002000100000000D4C000__00000073AD3FE6B8\n000000067F00004002000100000000D48000-000000067F00004002000100000000D4C000__000000914E3F38F0\n000000067F00004002000100000000D48000-000000067F00004002000100000000D4C000__000000931B9A2710\n000000067F00004002000100000000D4C000-000000067F00004002000100000000D50000__00000073AD3FE6B8\n000000067F00004002000100000000D4C000-000000067F00004002000100000000D50000__000000914E3F38F0\n000000067F00004002000100000000D4C000-000000067F00004002000100000000D50000__000000931B9A2710\n000000067F00004002000100000000D4D058-000000067F00004002000100000000D55A2B__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000D50000-000000067F00004002000100000000D54000__00000073AD3FE6B8\n000000067F00004002000100000000D50000-000000067F00004002000100000000D54000__000000914E3F38F0\n000000067F00004002000100000000D50000-000000067F00004002000100000000D54000__000000931B9A2710\n000000067F00004002000100000000D54000-000000067F00004002000100000000D58000__00000073AD3FE6B8\n000000067F00004002000100000000D54000-000000067F00004002000100000000D58000__000000914E3F38F0\n000000067F00004002000100000000D54000-000000067F00004002000100000000D58000__000000931B9A2710\n000000067F00004002000100000000D55A2B-000000067F00004002000100000000D5E400__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000D58000-000000067F00004002000100000000D5C000__00000073AD3FE6B8\n000000067F00004002000100000000D58000-000000067F00004002000100000000D5C000__000000914E3F38F0\n000000067F00004002000100000000D58000-000000067F00004002000100000000D5C000__000000931B9A2710\n000000067F00004002000100000000D5C000-000000067F00004002000100000000D60000__00000073AD3FE6B8\n000000067F00004002000100000000D5C000-000000067F00004002000100000000D60000__000000914E3F38F0\n000000067F00004002000100000000D5C000-000000067F00004002000100000000D60000__000000931B9A2710\n000000067F00004002000100000000D5E400-000000067F00004002000100000000D66DD2__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000D60000-000000067F00004002000100000000D64000__00000073AD3FE6B8\n000000067F00004002000100000000D60000-000000067F00004002000100000000D64000__000000914E3F38F0\n000000067F00004002000100000000D60000-000000067F00004002000100000000D64000__000000931B9A2710\n000000067F00004002000100000000D64000-000000067F00004002000100000000D68000__00000073AD3FE6B8\n000000067F00004002000100000000D64000-000000067F00004002000100000000D68000__000000914E3F38F0\n000000067F00004002000100000000D64000-000000067F00004002000100000000D68000__000000931B9A2710\n000000067F00004002000100000000D66DD2-000000067F00004002000100000000D6F7B8__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000D68000-000000067F00004002000100000000D6C000__00000073AD3FE6B8\n000000067F00004002000100000000D68000-000000067F00004002000100000000D6C000__000000914E3F38F0\n000000067F00004002000100000000D68000-000000067F00004002000100000000D6C000__000000931B9A2710\n000000067F00004002000100000000D6C000-000000067F00004002000100000000D70000__00000073AD3FE6B8\n000000067F00004002000100000000D6C000-000000067F00004002000100000000D70000__000000914E3F38F0\n000000067F00004002000100000000D6C000-000000067F00004002000100000000D70000__000000931B9A2710\n000000067F00004002000100000000D6F7B8-000000067F00004002000100000000D7819E__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000D70000-000000067F00004002000100000000D74000__00000073AD3FE6B8\n000000067F00004002000100000000D70000-000000067F00004002000100000000D74000__000000914E3F38F0\n000000067F00004002000100000000D70000-000000067F00004002000100000000D74000__000000931B9A2710\n000000067F00004002000100000000D74000-000000067F00004002000100000000D78000__00000073AD3FE6B8\n000000067F00004002000100000000D74000-000000067F00004002000100000000D78000__000000914E3F38F0\n000000067F00004002000100000000D74000-000000067F00004002000100000000D78000__000000931B9A2710\n000000067F00004002000100000000D78000-000000067F00004002000100000000D7C000__00000073AD3FE6B8\n000000067F00004002000100000000D78000-000000067F00004002000100000000D7C000__000000914E3F38F0\n000000067F00004002000100000000D78000-000000067F00004002000100000000D7C000__000000931B9A2710\n000000067F00004002000100000000D7819E-000000067F00004002000100000000D80B7F__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000D7C000-000000067F00004002000100000000D80000__00000073AD3FE6B8\n000000067F00004002000100000000D7C000-000000067F00004002000100000000D80000__000000914E3F38F0\n000000067F00004002000100000000D7C000-000000067F00004002000100000000D80000__000000931B9A2710\n000000067F00004002000100000000D80000-000000067F00004002000100000000D84000__00000073AD3FE6B8\n000000067F00004002000100000000D80000-000000067F00004002000100000000D84000__000000914E3F38F0\n000000067F00004002000100000000D80000-000000067F00004002000100000000D84000__000000931B9A2710\n000000067F00004002000100000000D80B7F-000000067F00004002000100000000D89552__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000D84000-000000067F00004002000100000000D88000__00000073AD3FE6B8\n000000067F00004002000100000000D84000-000000067F00004002000100000000D88000__000000914E3F38F0\n000000067F00004002000100000000D84000-000000067F00004002000100000000D88000__000000931B9A2710\n000000067F00004002000100000000D88000-000000067F00004002000100000000D8C000__00000073AD3FE6B8\n000000067F00004002000100000000D88000-000000067F00004002000100000000D8C000__000000914E3F38F0\n000000067F00004002000100000000D88000-000000067F00004002000100000000D8C000__000000931B9A2710\n000000067F00004002000100000000D89552-000000067F00004002000100000000D91F30__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000D8C000-000000067F00004002000100000000D90000__00000073AD3FE6B8\n000000067F00004002000100000000D8C000-000000067F00004002000100000000D90000__000000914E3F38F0\n000000067F00004002000100000000D8C000-000000067F00004002000100000000D90000__000000931B9A2710\n000000067F00004002000100000000D90000-000000067F00004002000100000000D94000__00000073AD3FE6B8\n000000067F00004002000100000000D90000-000000067F00004002000100000000D94000__000000914E3F38F0\n000000067F00004002000100000000D90000-000000067F00004002000100000000D94000__000000931B9A2710\n000000067F00004002000100000000D91F30-000000067F00004002000100000000D9A901__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000D94000-000000067F00004002000100000000D98000__00000073AD3FE6B8\n000000067F00004002000100000000D94000-000000067F00004002000100000000D98000__000000914E3F38F0\n000000067F00004002000100000000D94000-000000067F00004002000100000000D98000__000000931B9A2710\n000000067F00004002000100000000D98000-000000067F00004002000100000000D9C000__00000073AD3FE6B8\n000000067F00004002000100000000D98000-000000067F00004002000100000000D9C000__000000914E3F38F0\n000000067F00004002000100000000D98000-000000067F00004002000100000000D9C000__000000931B9A2710\n000000067F00004002000100000000D9A901-000000067F00004002000100000000DA32CC__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000D9C000-000000067F00004002000100000000DA0000__00000073AD3FE6B8\n000000067F00004002000100000000D9C000-000000067F00004002000100000000DA0000__000000914E3F38F0\n000000067F00004002000100000000D9C000-000000067F00004002000100000000DA0000__000000931B9A2710\n000000067F00004002000100000000DA0000-000000067F00004002000100000000DA4000__00000073AD3FE6B8\n000000067F00004002000100000000DA0000-000000067F00004002000100000000DA4000__000000914E3F38F0\n000000067F00004002000100000000DA0000-000000067F00004002000100000000DA4000__000000931B9A2710\n000000067F00004002000100000000DA32CC-000000067F00004002000100000000DABCB3__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000DA4000-000000067F00004002000100000000DA8000__00000073AD3FE6B8\n000000067F00004002000100000000DA4000-000000067F00004002000100000000DA8000__000000914E3F38F0\n000000067F00004002000100000000DA4000-000000067F00004002000100000000DA8000__000000931B9A2710\n000000067F00004002000100000000DA8000-000000067F00004002000100000000DAC000__00000073AD3FE6B8\n000000067F00004002000100000000DA8000-000000067F00004002000100000000DAC000__000000914E3F38F0\n000000067F00004002000100000000DA8000-000000067F00004002000100000000DAC000__000000931B9A2710\n000000067F00004002000100000000DABCB3-000000067F00004002000100000000DB469A__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000DAC000-000000067F00004002000100000000DB0000__00000073AD3FE6B8\n000000067F00004002000100000000DAC000-000000067F00004002000100000000DB0000__000000914E3F38F0\n000000067F00004002000100000000DAC000-000000067F00004002000100000000DB0000__000000931B9A2710\n000000067F00004002000100000000DB0000-000000067F00004002000100000000DB4000__00000073AD3FE6B8\n000000067F00004002000100000000DB0000-000000067F00004002000100000000DB4000__000000914E3F38F0\n000000067F00004002000100000000DB0000-000000067F00004002000100000000DB4000__000000931B9A2710\n000000067F00004002000100000000DB4000-000000067F00004002000100000000DB8000__00000073AD3FE6B8\n000000067F00004002000100000000DB4000-000000067F00004002000100000000DB8000__000000914E3F38F0\n000000067F00004002000100000000DB4000-000000067F00004002000100000000DB8000__000000931B9A2710\n000000067F00004002000100000000DB469A-000000067F00004002000100000000DBD075__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000DB8000-000000067F00004002000100000000DBC000__00000073AD3FE6B8\n000000067F00004002000100000000DB8000-000000067F00004002000100000000DBC000__000000914E3F38F0\n000000067F00004002000100000000DB8000-000000067F00004002000100000000DBC000__000000931B9A2710\n000000067F00004002000100000000DBC000-000000067F00004002000100000000DC0000__00000073AD3FE6B8\n000000067F00004002000100000000DBC000-000000067F00004002000100000000DC0000__000000914E3F38F0\n000000067F00004002000100000000DBC000-000000067F00004002000100000000DC0000__000000931B9A2710\n000000067F00004002000100000000DBD075-000000067F00004002000100000000DC5A50__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000DC0000-000000067F00004002000100000000DC4000__00000073AD3FE6B8\n000000067F00004002000100000000DC0000-000000067F00004002000100000000DC4000__000000914E3F38F0\n000000067F00004002000100000000DC0000-000000067F00004002000100000000DC4000__000000931B9A2710\n000000067F00004002000100000000DC4000-000000067F00004002000100000000DC8000__00000073AD3FE6B8\n000000067F00004002000100000000DC4000-000000067F00004002000100000000DC8000__000000914E3F38F0\n000000067F00004002000100000000DC4000-000000067F00004002000100000000DC8000__000000931B9A2710\n000000067F00004002000100000000DC5A50-000000067F00004002000100000000DCE430__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000DC8000-000000067F00004002000100000000DCC000__00000073AD3FE6B8\n000000067F00004002000100000000DC8000-000000067F00004002000100000000DCC000__000000914E3F38F0\n000000067F00004002000100000000DC8000-000000067F00004002000100000000DCC000__000000931B9A2710\n000000067F00004002000100000000DCC000-000000067F00004002000100000000DD0000__00000073AD3FE6B8\n000000067F00004002000100000000DCC000-000000067F00004002000100000000DD0000__000000914E3F38F0\n000000067F00004002000100000000DCC000-000000067F00004002000100000000DD0000__000000931B9A2710\n000000067F00004002000100000000DCE430-000000067F00004002000100000000DD6E06__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000DD0000-000000067F00004002000100000000DD4000__00000073AD3FE6B8\n000000067F00004002000100000000DD0000-000000067F00004002000100000000DD4000__000000914E3F38F0\n000000067F00004002000100000000DD0000-000000067F00004002000100000000DD4000__000000931B9A2710\n000000067F00004002000100000000DD4000-000000067F00004002000100000000DD8000__00000073AD3FE6B8\n000000067F00004002000100000000DD4000-000000067F00004002000100000000DD8000__000000914E3F38F0\n000000067F00004002000100000000DD4000-000000067F00004002000100000000DD8000__000000931B9A2710\n000000067F00004002000100000000DD6E06-000000067F00004002000100000000DDF7DB__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000DD8000-000000067F00004002000100000000DDC000__00000073AD3FE6B8\n000000067F00004002000100000000DD8000-000000067F00004002000100000000DDC000__000000914E3F38F0\n000000067F00004002000100000000DD8000-000000067F00004002000100000000DDC000__000000931B9A2710\n000000067F00004002000100000000DDC000-000000067F00004002000100000000DE0000__00000073AD3FE6B8\n000000067F00004002000100000000DDC000-000000067F00004002000100000000DE0000__000000914E3F38F0\n000000067F00004002000100000000DDC000-000000067F00004002000100000000DE0000__000000931B9A2710\n000000067F00004002000100000000DDF7DB-000000067F00004002000100000000DE81C3__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000DE0000-000000067F00004002000100000000DE4000__00000073AD3FE6B8\n000000067F00004002000100000000DE0000-000000067F00004002000100000000DE4000__000000914E3F38F0\n000000067F00004002000100000000DE0000-000000067F00004002000100000000DE4000__000000931B9A2710\n000000067F00004002000100000000DE4000-000000067F00004002000100000000DE8000__00000073AD3FE6B8\n000000067F00004002000100000000DE4000-000000067F00004002000100000000DE8000__000000914E3F38F0\n000000067F00004002000100000000DE4000-000000067F00004002000100000000DE8000__000000931B9A2710\n000000067F00004002000100000000DE8000-000000067F00004002000100000000DEC000__00000073AD3FE6B8\n000000067F00004002000100000000DE8000-000000067F00004002000100000000DEC000__000000914E3F38F0\n000000067F00004002000100000000DE8000-000000067F00004002000100000000DEC000__000000931B9A2710\n000000067F00004002000100000000DE81C3-000000067F00004002000100000000DF0B9F__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000DEC000-000000067F00004002000100000000DF0000__00000073AD3FE6B8\n000000067F00004002000100000000DEC000-000000067F00004002000100000000DF0000__000000914E3F38F0\n000000067F00004002000100000000DEC000-000000067F00004002000100000000DF0000__000000931B9A2710\n000000067F00004002000100000000DF0000-000000067F00004002000100000000DF4000__00000073AD3FE6B8\n000000067F00004002000100000000DF0000-000000067F00004002000100000000DF4000__000000914E3F38F0\n000000067F00004002000100000000DF0000-000000067F00004002000100000000DF4000__000000931B9A2710\n000000067F00004002000100000000DF0B9F-000000067F00004002000100000000DF9582__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000DF4000-000000067F00004002000100000000DF8000__00000073AD3FE6B8\n000000067F00004002000100000000DF4000-000000067F00004002000100000000DF8000__000000914E3F38F0\n000000067F00004002000100000000DF4000-000000067F00004002000100000000DF8000__000000931B9A2710\n000000067F00004002000100000000DF8000-000000067F00004002000100000000DFC000__00000073AD3FE6B8\n000000067F00004002000100000000DF8000-000000067F00004002000100000000DFC000__000000914E3F38F0\n000000067F00004002000100000000DF8000-000000067F00004002000100000000DFC000__000000931B9A2710\n000000067F00004002000100000000DF9582-000000067F00004002000100000000E01F62__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000DFC000-000000067F00004002000100000000E00000__00000073AD3FE6B8\n000000067F00004002000100000000DFC000-000000067F00004002000100000000E00000__000000914E3F38F0\n000000067F00004002000100000000DFC000-000000067F00004002000100000000E00000__000000931B9A2710\n000000067F00004002000100000000E00000-000000067F00004002000100000000E04000__00000073AD3FE6B8\n000000067F00004002000100000000E00000-000000067F00004002000100000000E04000__000000914E3F38F0\n000000067F00004002000100000000E00000-000000067F00004002000100000000E04000__000000931B9A2710\n000000067F00004002000100000000E01F62-000000067F00004002000100000000E0A930__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000E04000-000000067F00004002000100000000E08000__00000073AD3FE6B8\n000000067F00004002000100000000E04000-000000067F00004002000100000000E08000__000000914E3F38F0\n000000067F00004002000100000000E04000-000000067F00004002000100000000E08000__000000931B9A2710\n000000067F00004002000100000000E08000-000000067F00004002000100000000E0C000__00000073AD3FE6B8\n000000067F00004002000100000000E08000-000000067F00004002000100000000E0C000__000000914E3F38F0\n000000067F00004002000100000000E08000-000000067F00004002000100000000E0C000__000000931B9A2710\n000000067F00004002000100000000E0A930-000000067F00004002000100000000E13305__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000E0C000-000000067F00004002000100000000E10000__00000073AD3FE6B8\n000000067F00004002000100000000E0C000-000000067F00004002000100000000E10000__000000914E3F38F0\n000000067F00004002000100000000E0C000-000000067F00004002000100000000E10000__000000931B9A2710\n000000067F00004002000100000000E10000-000000067F00004002000100000000E14000__00000073AD3FE6B8\n000000067F00004002000100000000E10000-000000067F00004002000100000000E14000__000000914E3F38F0\n000000067F00004002000100000000E10000-000000067F00004002000100000000E14000__000000931B9A2710\n000000067F00004002000100000000E13305-000000067F00004002000100000000E1BCDD__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000E14000-000000067F00004002000100000000E18000__00000073AD3FE6B8\n000000067F00004002000100000000E14000-000000067F00004002000100000000E18000__000000914E3F38F0\n000000067F00004002000100000000E14000-000000067F00004002000100000000E18000__000000931B9A2710\n000000067F00004002000100000000E18000-000000067F00004002000100000000E1C000__00000073AD3FE6B8\n000000067F00004002000100000000E18000-000000067F00004002000100000000E1C000__000000914E3F38F0\n000000067F00004002000100000000E18000-000000067F00004002000100000000E1C000__000000931B9A2710\n000000067F00004002000100000000E1BCDD-000000067F00004002000100000000E246C0__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000E1C000-000000067F00004002000100000000E20000__00000073AD3FE6B8\n000000067F00004002000100000000E1C000-000000067F00004002000100000000E20000__000000914E3F38F0\n000000067F00004002000100000000E1C000-000000067F00004002000100000000E20000__000000931B9A2710\n000000067F00004002000100000000E20000-000000067F00004002000100000000E24000__00000073AD3FE6B8\n000000067F00004002000100000000E20000-000000067F00004002000100000000E24000__000000914E3F38F0\n000000067F00004002000100000000E20000-000000067F00004002000100000000E24000__000000931B9A2710\n000000067F00004002000100000000E24000-000000067F00004002000100000000E28000__00000073AD3FE6B8\n000000067F00004002000100000000E24000-000000067F00004002000100000000E28000__000000914E3F38F0\n000000067F00004002000100000000E24000-000000067F00004002000100000000E28000__000000931B9A2710\n000000067F00004002000100000000E246C0-000000067F00004002000100000000E2D0A2__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000E28000-000000067F00004002000100000000E2C000__00000073AD3FE6B8\n000000067F00004002000100000000E28000-000000067F00004002000100000000E2C000__000000914E3F38F0\n000000067F00004002000100000000E28000-000000067F00004002000100000000E2C000__000000931B9A2710\n000000067F00004002000100000000E2C000-000000067F00004002000100000000E30000__00000073AD3FE6B8\n000000067F00004002000100000000E2C000-000000067F00004002000100000000E30000__000000914E3F38F0\n000000067F00004002000100000000E2C000-000000067F00004002000100000000E30000__000000931B9A2710\n000000067F00004002000100000000E2D0A2-000000067F00004002000100000000E35A83__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000E30000-000000067F00004002000100000000E34000__00000073AD3FE6B8\n000000067F00004002000100000000E30000-000000067F00004002000100000000E34000__000000914E3F38F0\n000000067F00004002000100000000E30000-000000067F00004002000100000000E34000__000000931B9A2710\n000000067F00004002000100000000E34000-000000067F00004002000100000000E38000__00000073AD3FE6B8\n000000067F00004002000100000000E34000-000000067F00004002000100000000E38000__000000914E3F38F0\n000000067F00004002000100000000E34000-000000067F00004002000100000000E38000__000000931B9A2710\n000000067F00004002000100000000E35A83-000000067F00004002000100000000E3E45F__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000E38000-000000067F00004002000100000000E3C000__00000073AD3FE6B8\n000000067F00004002000100000000E38000-000000067F00004002000100000000E3C000__000000914E3F38F0\n000000067F00004002000100000000E38000-000000067F00004002000100000000E3C000__000000931B9A2710\n000000067F00004002000100000000E3C000-000000067F00004002000100000000E40000__00000073AD3FE6B8\n000000067F00004002000100000000E3C000-000000067F00004002000100000000E40000__000000914E3F38F0\n000000067F00004002000100000000E3C000-000000067F00004002000100000000E40000__000000931B9A2710\n000000067F00004002000100000000E3E45F-000000067F00004002000100000000E46E30__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000E40000-000000067F00004002000100000000E44000__00000073AD3FE6B8\n000000067F00004002000100000000E40000-000000067F00004002000100000000E44000__000000914E3F38F0\n000000067F00004002000100000000E40000-000000067F00004002000100000000E44000__000000931B9A2710\n000000067F00004002000100000000E44000-000000067F00004002000100000000E48000__00000073AD3FE6B8\n000000067F00004002000100000000E44000-000000067F00004002000100000000E48000__000000914E3F38F0\n000000067F00004002000100000000E44000-000000067F00004002000100000000E48000__000000931B9A2710\n000000067F00004002000100000000E46E30-000000067F00004002000100000000E4F802__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000E48000-000000067F00004002000100000000E4C000__00000073AD3FE6B8\n000000067F00004002000100000000E48000-000000067F00004002000100000000E4C000__000000914E3F38F0\n000000067F00004002000100000000E48000-000000067F00004002000100000000E4C000__000000931B9A2710\n000000067F00004002000100000000E4C000-000000067F00004002000100000000E50000__00000073AD3FE6B8\n000000067F00004002000100000000E4C000-000000067F00004002000100000000E50000__000000914E3F38F0\n000000067F00004002000100000000E4C000-000000067F00004002000100000000E50000__000000931B9A2710\n000000067F00004002000100000000E4F802-000000067F00004002000100000000E581E0__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000E50000-000000067F00004002000100000000E54000__00000073AD3FE6B8\n000000067F00004002000100000000E50000-000000067F00004002000100000000E54000__000000914E3F38F0\n000000067F00004002000100000000E50000-000000067F00004002000100000000E54000__000000931B9A2710\n000000067F00004002000100000000E54000-000000067F00004002000100000000E58000__00000073AD3FE6B8\n000000067F00004002000100000000E54000-000000067F00004002000100000000E58000__000000914E3F38F0\n000000067F00004002000100000000E54000-000000067F00004002000100000000E58000__000000931B9A2710\n000000067F00004002000100000000E58000-000000067F00004002000100000000E5C000__00000073AD3FE6B8\n000000067F00004002000100000000E58000-000000067F00004002000100000000E5C000__000000914E3F38F0\n000000067F00004002000100000000E58000-000000067F00004002000100000000E5C000__000000931B9A2710\n000000067F00004002000100000000E581E0-000000067F00004002000100000000E60BC6__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000E5C000-000000067F00004002000100000000E60000__00000073AD3FE6B8\n000000067F00004002000100000000E5C000-000000067F00004002000100000000E60000__000000914E3F38F0\n000000067F00004002000100000000E5C000-000000067F00004002000100000000E60000__000000931B9A2710\n000000067F00004002000100000000E60000-000000067F00004002000100000000E64000__00000073AD3FE6B8\n000000067F00004002000100000000E60000-000000067F00004002000100000000E64000__000000914E3F38F0\n000000067F00004002000100000000E60000-000000067F00004002000100000000E64000__000000931B9A2710\n000000067F00004002000100000000E60BC6-000000067F00004002000100000000E695A7__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000E64000-000000067F00004002000100000000E68000__00000073AD3FE6B8\n000000067F00004002000100000000E64000-000000067F00004002000100000000E68000__000000914E3F38F0\n000000067F00004002000100000000E64000-000000067F00004002000100000000E68000__000000931B9A2710\n000000067F00004002000100000000E68000-000000067F00004002000100000000E6C000__00000073AD3FE6B8\n000000067F00004002000100000000E68000-000000067F00004002000100000000E6C000__000000914E3F38F0\n000000067F00004002000100000000E68000-000000067F00004002000100000000E6C000__000000931B9A2710\n000000067F00004002000100000000E695A7-000000067F00004002000100000000E71F86__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000E6C000-000000067F00004002000100000000E70000__00000073AD3FE6B8\n000000067F00004002000100000000E6C000-000000067F00004002000100000000E70000__000000914E3F38F0\n000000067F00004002000100000000E6C000-000000067F00004002000100000000E70000__000000931B9A2710\n000000067F00004002000100000000E70000-000000067F00004002000100000000E74000__00000073AD3FE6B8\n000000067F00004002000100000000E70000-000000067F00004002000100000000E74000__000000914E3F38F0\n000000067F00004002000100000000E70000-000000067F00004002000100000000E74000__000000931B9A2710\n000000067F00004002000100000000E71F86-000000067F00004002000100000000E7A966__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000E74000-000000067F00004002000100000000E78000__00000073AD3FE6B8\n000000067F00004002000100000000E74000-000000067F00004002000100000000E78000__000000914E3F38F0\n000000067F00004002000100000000E74000-000000067F00004002000100000000E78000__000000931B9A2710\n000000067F00004002000100000000E78000-000000067F00004002000100000000E7C000__00000073AD3FE6B8\n000000067F00004002000100000000E78000-000000067F00004002000100000000E7C000__000000914E3F38F0\n000000067F00004002000100000000E78000-000000067F00004002000100000000E7C000__000000931B9A2710\n000000067F00004002000100000000E7A966-000000067F00004002000100000000E8333C__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000E7C000-000000067F00004002000100000000E80000__00000073AD3FE6B8\n000000067F00004002000100000000E7C000-000000067F00004002000100000000E80000__000000914E3F38F0\n000000067F00004002000100000000E7C000-000000067F00004002000100000000E80000__000000931B9A2710\n000000067F00004002000100000000E80000-000000067F00004002000100000000E84000__00000073AD3FE6B8\n000000067F00004002000100000000E80000-000000067F00004002000100000000E84000__000000914E3F38F0\n000000067F00004002000100000000E80000-000000067F00004002000100000000E84000__000000931B9A2710\n000000067F00004002000100000000E8333C-000000067F00004002000100000000E8BD17__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000E84000-000000067F00004002000100000000E88000__00000073AD3FE6B8\n000000067F00004002000100000000E84000-000000067F00004002000100000000E88000__000000914E3F38F0\n000000067F00004002000100000000E84000-000000067F00004002000100000000E88000__000000931B9A2710\n000000067F00004002000100000000E88000-000000067F00004002000100000000E8C000__00000073AD3FE6B8\n000000067F00004002000100000000E88000-000000067F00004002000100000000E8C000__000000914E3F38F0\n000000067F00004002000100000000E88000-000000067F00004002000100000000E8C000__000000931B9A2710\n000000067F00004002000100000000E8BD17-000000067F00004002000100000000E946F5__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000E8C000-000000067F00004002000100000000E90000__00000073AD3FE6B8\n000000067F00004002000100000000E8C000-000000067F00004002000100000000E90000__000000914E3F38F0\n000000067F00004002000100000000E8C000-000000067F00004002000100000000E90000__000000931B9A2710\n000000067F00004002000100000000E90000-000000067F00004002000100000000E94000__00000073AD3FE6B8\n000000067F00004002000100000000E90000-000000067F00004002000100000000E94000__000000914E3F38F0\n000000067F00004002000100000000E90000-000000067F00004002000100000000E94000__000000931B9A2710\n000000067F00004002000100000000E94000-000000067F00004002000100000000E98000__00000073AD3FE6B8\n000000067F00004002000100000000E94000-000000067F00004002000100000000E98000__000000914E3F38F0\n000000067F00004002000100000000E94000-000000067F00004002000100000000E98000__000000931B9A2710\n000000067F00004002000100000000E946F5-000000067F00004002000100000000E9D0D7__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000E98000-000000067F00004002000100000000E9C000__00000073AD3FE6B8\n000000067F00004002000100000000E98000-000000067F00004002000100000000E9C000__000000914E3F38F0\n000000067F00004002000100000000E98000-000000067F00004002000100000000E9C000__000000931B9A2710\n000000067F00004002000100000000E9C000-000000067F00004002000100000000EA0000__00000073AD3FE6B8\n000000067F00004002000100000000E9C000-000000067F00004002000100000000EA0000__000000914E3F38F0\n000000067F00004002000100000000E9C000-000000067F00004002000100000000EA0000__000000931B9A2710\n000000067F00004002000100000000E9D0D7-000000067F00004002000100000000EA5AB9__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000EA0000-000000067F00004002000100000000EA4000__00000073AD3FE6B8\n000000067F00004002000100000000EA0000-000000067F00004002000100000000EA4000__000000914E3F38F0\n000000067F00004002000100000000EA0000-000000067F00004002000100000000EA4000__000000931B9A2710\n000000067F00004002000100000000EA4000-000000067F00004002000100000000EA8000__00000073AD3FE6B8\n000000067F00004002000100000000EA4000-000000067F00004002000100000000EA8000__000000914E3F38F0\n000000067F00004002000100000000EA4000-000000067F00004002000100000000EA8000__000000931B9A2710\n000000067F00004002000100000000EA5AB9-000000067F00004002000100000000EAE49A__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000EA8000-000000067F00004002000100000000EAC000__00000073AD3FE6B8\n000000067F00004002000100000000EA8000-000000067F00004002000100000000EAC000__000000914E3F38F0\n000000067F00004002000100000000EA8000-000000067F00004002000100000000EAC000__000000931B9A2710\n000000067F00004002000100000000EAC000-000000067F00004002000100000000EB0000__00000073AD3FE6B8\n000000067F00004002000100000000EAC000-000000067F00004002000100000000EB0000__000000914E3F38F0\n000000067F00004002000100000000EAC000-000000067F00004002000100000000EB0000__000000931B9A2710\n000000067F00004002000100000000EAE49A-000000067F00004002000100000000EB6E78__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000EB0000-000000067F00004002000100000000EB4000__00000073AD3FE6B8\n000000067F00004002000100000000EB0000-000000067F00004002000100000000EB4000__000000914E3F38F0\n000000067F00004002000100000000EB0000-000000067F00004002000100000000EB4000__000000931B9A2710\n000000067F00004002000100000000EB4000-000000067F00004002000100000000EB8000__00000073AD3FE6B8\n000000067F00004002000100000000EB4000-000000067F00004002000100000000EB8000__000000914E3F38F0\n000000067F00004002000100000000EB4000-000000067F00004002000100000000EB8000__000000931B9A2710\n000000067F00004002000100000000EB6E78-000000067F00004002000100000000EBF851__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000EB8000-000000067F00004002000100000000EBC000__00000073AD3FE6B8\n000000067F00004002000100000000EB8000-000000067F00004002000100000000EBC000__000000914E3F38F0\n000000067F00004002000100000000EB8000-000000067F00004002000100000000EBC000__000000931B9A2710\n000000067F00004002000100000000EBC000-000000067F00004002000100000000EC0000__00000073AD3FE6B8\n000000067F00004002000100000000EBC000-000000067F00004002000100000000EC0000__000000914E3F38F0\n000000067F00004002000100000000EBC000-000000067F00004002000100000000EC0000__000000931B9A2710\n000000067F00004002000100000000EBF851-000000067F00004002000100000000EC8221__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000EC0000-000000067F00004002000100000000EC4000__00000073AD3FE6B8\n000000067F00004002000100000000EC0000-000000067F00004002000100000000EC4000__000000914E3F38F0\n000000067F00004002000100000000EC0000-000000067F00004002000100000000EC4000__000000931B9A2710\n000000067F00004002000100000000EC4000-000000067F00004002000100000000EC8000__00000073AD3FE6B8\n000000067F00004002000100000000EC4000-000000067F00004002000100000000EC8000__000000914E3F38F0\n000000067F00004002000100000000EC4000-000000067F00004002000100000000EC8000__000000931B9A2710\n000000067F00004002000100000000EC8000-000000067F00004002000100000000ECC000__00000073AD3FE6B8\n000000067F00004002000100000000EC8000-000000067F00004002000100000000ECC000__000000914E3F38F0\n000000067F00004002000100000000EC8000-000000067F00004002000100000000ECC000__000000931B9A2710\n000000067F00004002000100000000EC8221-000000067F00004002000100000000ED0BFD__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000ECC000-000000067F00004002000100000000ED0000__00000073AD3FE6B8\n000000067F00004002000100000000ECC000-000000067F00004002000100000000ED0000__000000914E3F38F0\n000000067F00004002000100000000ECC000-000000067F00004002000100000000ED0000__000000931B9A2710\n000000067F00004002000100000000ED0000-000000067F00004002000100000000ED4000__00000073AD3FE6B8\n000000067F00004002000100000000ED0000-000000067F00004002000100000000ED4000__000000914E3F38F0\n000000067F00004002000100000000ED0000-000000067F00004002000100000000ED4000__000000931B9A2710\n000000067F00004002000100000000ED0BFD-000000067F00004002000100000000ED95E5__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000ED4000-000000067F00004002000100000000ED8000__00000073AD3FE6B8\n000000067F00004002000100000000ED4000-000000067F00004002000100000000ED8000__000000914E3F38F0\n000000067F00004002000100000000ED4000-000000067F00004002000100000000ED8000__000000931B9A2710\n000000067F00004002000100000000ED8000-000000067F00004002000100000000EDC000__00000073AD3FE6B8\n000000067F00004002000100000000ED8000-000000067F00004002000100000000EDC000__000000914E3F38F0\n000000067F00004002000100000000ED8000-000000067F00004002000100000000EDC000__000000931B9A2710\n000000067F00004002000100000000ED95E5-000000067F00004002000100000000EE1FCD__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000EDC000-000000067F00004002000100000000EE0000__00000073AD3FE6B8\n000000067F00004002000100000000EDC000-000000067F00004002000100000000EE0000__000000914E3F38F0\n000000067F00004002000100000000EDC000-000000067F00004002000100000000EE0000__000000931B9A2710\n000000067F00004002000100000000EE0000-000000067F00004002000100000000EE4000__00000073AD3FE6B8\n000000067F00004002000100000000EE0000-000000067F00004002000100000000EE4000__000000914E3F38F0\n000000067F00004002000100000000EE0000-000000067F00004002000100000000EE4000__000000931B9A2710\n000000067F00004002000100000000EE1FCD-000000067F00004002000100000000EEA9A7__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000EE4000-000000067F00004002000100000000EE8000__00000073AD3FE6B8\n000000067F00004002000100000000EE4000-000000067F00004002000100000000EE8000__000000914E3F38F0\n000000067F00004002000100000000EE4000-000000067F00004002000100000000EE8000__000000931B9A2710\n000000067F00004002000100000000EE8000-000000067F00004002000100000000EEC000__00000073AD3FE6B8\n000000067F00004002000100000000EE8000-000000067F00004002000100000000EEC000__000000914E3F38F0\n000000067F00004002000100000000EE8000-000000067F00004002000100000000EEC000__000000931B9A2710\n000000067F00004002000100000000EEA9A7-000000067F00004002000100000000EF3387__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000EEC000-000000067F00004002000100000000EF0000__00000073AD3FE6B8\n000000067F00004002000100000000EEC000-000000067F00004002000100000000EF0000__000000914E3F38F0\n000000067F00004002000100000000EEC000-000000067F00004002000100000000EF0000__000000931B9A2710\n000000067F00004002000100000000EF0000-000000067F00004002000100000000EF4000__00000073AD3FE6B8\n000000067F00004002000100000000EF0000-000000067F00004002000100000000EF4000__000000914E3F38F0\n000000067F00004002000100000000EF0000-000000067F00004002000100000000EF4000__000000931B9A2710\n000000067F00004002000100000000EF3387-000000067F00004002000100000000EFBD62__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000EF4000-000000067F00004002000100000000EF8000__00000073AD3FE6B8\n000000067F00004002000100000000EF4000-000000067F00004002000100000000EF8000__000000914E3F38F0\n000000067F00004002000100000000EF4000-000000067F00004002000100000000EF8000__000000931B9A2710\n000000067F00004002000100000000EF8000-000000067F00004002000100000000EFC000__00000073AD3FE6B8\n000000067F00004002000100000000EF8000-000000067F00004002000100000000EFC000__000000914E3F38F0\n000000067F00004002000100000000EF8000-000000067F00004002000100000000EFC000__000000931B9A2710\n000000067F00004002000100000000EFBD62-000000067F00004002000100000000F0473E__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000EFC000-000000067F00004002000100000000F00000__00000073AD3FE6B8\n000000067F00004002000100000000EFC000-000000067F00004002000100000000F00000__000000914E3F38F0\n000000067F00004002000100000000EFC000-000000067F00004002000100000000F00000__000000931B9A2710\n000000067F00004002000100000000F00000-000000067F00004002000100000000F04000__00000073AD3FE6B8\n000000067F00004002000100000000F00000-000000067F00004002000100000000F04000__000000914E3F38F0\n000000067F00004002000100000000F00000-000000067F00004002000100000000F04000__000000931B9A2710\n000000067F00004002000100000000F04000-000000067F00004002000100000000F08000__00000073AD3FE6B8\n000000067F00004002000100000000F04000-000000067F00004002000100000000F08000__000000914E3F38F0\n000000067F00004002000100000000F04000-000000067F00004002000100000000F08000__000000931B9A2710\n000000067F00004002000100000000F0473E-000000067F00004002000100000000F0D116__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000F08000-000000067F00004002000100000000F0C000__00000073AD3FE6B8\n000000067F00004002000100000000F08000-000000067F00004002000100000000F0C000__000000914E3F38F0\n000000067F00004002000100000000F08000-000000067F00004002000100000000F0C000__000000931B9A2710\n000000067F00004002000100000000F0C000-000000067F00004002000100000000F10000__00000073AD3FE6B8\n000000067F00004002000100000000F0C000-000000067F00004002000100000000F10000__000000914E3F38F0\n000000067F00004002000100000000F0C000-000000067F00004002000100000000F10000__000000931B9A2710\n000000067F00004002000100000000F0D116-000000067F00004002000100000000F15AE9__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000F10000-000000067F00004002000100000000F14000__00000073AD3FE6B8\n000000067F00004002000100000000F10000-000000067F00004002000100000000F14000__000000914E3F38F0\n000000067F00004002000100000000F10000-000000067F00004002000100000000F14000__000000931B9A2710\n000000067F00004002000100000000F14000-000000067F00004002000100000000F18000__00000073AD3FE6B8\n000000067F00004002000100000000F14000-000000067F00004002000100000000F18000__000000914E3F38F0\n000000067F00004002000100000000F14000-000000067F00004002000100000000F18000__000000931B9A2710\n000000067F00004002000100000000F15AE9-000000067F00004002000100000000F1E4CB__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000F18000-000000067F00004002000100000000F1C000__00000073AD3FE6B8\n000000067F00004002000100000000F18000-000000067F00004002000100000000F1C000__000000914E3F38F0\n000000067F00004002000100000000F18000-000000067F00004002000100000000F1C000__000000931B9A2710\n000000067F00004002000100000000F1C000-000000067F00004002000100000000F20000__00000073AD3FE6B8\n000000067F00004002000100000000F1C000-000000067F00004002000100000000F20000__000000914E3F38F0\n000000067F00004002000100000000F1C000-000000067F00004002000100000000F20000__000000931B9A2710\n000000067F00004002000100000000F1E4CB-000000067F00004002000100000000F26EC1__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000F20000-000000067F00004002000100000000F24000__00000073AD3FE6B8\n000000067F00004002000100000000F20000-000000067F00004002000100000000F24000__000000914E3F38F0\n000000067F00004002000100000000F20000-000000067F00004002000100000000F24000__000000931B9A2710\n000000067F00004002000100000000F24000-000000067F00004002000100000000F28000__00000073AD3FE6B8\n000000067F00004002000100000000F24000-000000067F00004002000100000000F28000__000000914E3F38F0\n000000067F00004002000100000000F24000-000000067F00004002000100000000F28000__000000931B9A2710\n000000067F00004002000100000000F26EC1-000000067F00004002000100000000F2F8A1__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000F28000-000000067F00004002000100000000F2C000__00000073AD3FE6B8\n000000067F00004002000100000000F28000-000000067F00004002000100000000F2C000__000000914E3F38F0\n000000067F00004002000100000000F28000-000000067F00004002000100000000F2C000__000000931B9A2710\n000000067F00004002000100000000F2C000-000000067F00004002000100000000F30000__00000073AD3FE6B8\n000000067F00004002000100000000F2C000-000000067F00004002000100000000F30000__000000914E3F38F0\n000000067F00004002000100000000F2C000-000000067F00004002000100000000F30000__000000931B9A2710\n000000067F00004002000100000000F2F8A1-000000067F00004002000100000000F38278__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000F30000-000000067F00004002000100000000F34000__00000073AD3FE6B8\n000000067F00004002000100000000F30000-000000067F00004002000100000000F34000__000000914E3F38F0\n000000067F00004002000100000000F30000-000000067F00004002000100000000F34000__000000931B9A2710\n000000067F00004002000100000000F34000-000000067F00004002000100000000F38000__00000073AD3FE6B8\n000000067F00004002000100000000F34000-000000067F00004002000100000000F38000__000000914E3F38F0\n000000067F00004002000100000000F34000-000000067F00004002000100000000F38000__000000931B9A2710\n000000067F00004002000100000000F38000-000000067F00004002000100000000F3C000__00000073AD3FE6B8\n000000067F00004002000100000000F38000-000000067F00004002000100000000F3C000__000000914E3F38F0\n000000067F00004002000100000000F38000-000000067F00004002000100000000F3C000__000000931B9A2710\n000000067F00004002000100000000F38278-000000067F00004002000100000000F40C57__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000F3C000-000000067F00004002000100000000F40000__00000073AD3FE6B8\n000000067F00004002000100000000F3C000-000000067F00004002000100000000F40000__000000914E3F38F0\n000000067F00004002000100000000F3C000-000000067F00004002000100000000F40000__000000931B9A2710\n000000067F00004002000100000000F40000-000000067F00004002000100000000F44000__00000073AD3FE6B8\n000000067F00004002000100000000F40000-000000067F00004002000100000000F44000__000000914E3F38F0\n000000067F00004002000100000000F40000-000000067F00004002000100000000F44000__000000931B9A2710\n000000067F00004002000100000000F40C57-000000067F00004002000100000000F49630__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000F44000-000000067F00004002000100000000F48000__00000073AD3FE6B8\n000000067F00004002000100000000F44000-000000067F00004002000100000000F48000__000000914E3F38F0\n000000067F00004002000100000000F44000-000000067F00004002000100000000F48000__000000931B9A2710\n000000067F00004002000100000000F48000-000000067F00004002000100000000F4C000__00000073AD3FE6B8\n000000067F00004002000100000000F48000-000000067F00004002000100000000F4C000__000000914E3F38F0\n000000067F00004002000100000000F48000-000000067F00004002000100000000F4C000__000000931B9A2710\n000000067F00004002000100000000F49630-000000067F00004002000100000000F52007__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000F4C000-000000067F00004002000100000000F50000__00000073AD3FE6B8\n000000067F00004002000100000000F4C000-000000067F00004002000100000000F50000__000000914E3F38F0\n000000067F00004002000100000000F4C000-000000067F00004002000100000000F50000__000000931B9A2710\n000000067F00004002000100000000F50000-000000067F00004002000100000000F54000__00000073AD3FE6B8\n000000067F00004002000100000000F50000-000000067F00004002000100000000F54000__000000914E3F38F0\n000000067F00004002000100000000F50000-000000067F00004002000100000000F54000__000000931B9A2710\n000000067F00004002000100000000F52007-000000067F00004002000100000000F5A9DE__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000F54000-000000067F00004002000100000000F58000__00000073AD3FE6B8\n000000067F00004002000100000000F54000-000000067F00004002000100000000F58000__000000914E3F38F0\n000000067F00004002000100000000F54000-000000067F00004002000100000000F58000__000000931B9A2710\n000000067F00004002000100000000F58000-000000067F00004002000100000000F5C000__00000073AD3FE6B8\n000000067F00004002000100000000F58000-000000067F00004002000100000000F5C000__000000914E3F38F0\n000000067F00004002000100000000F58000-000000067F00004002000100000000F5C000__000000931B9A2710\n000000067F00004002000100000000F5A9DE-000000067F00004002000100000000F60351__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000100000000F5C000-000000067F00004002000100000000F60000__00000073AD3FE6B8\n000000067F00004002000100000000F5C000-000000067F00004002000100000000F60000__000000914E3F38F0\n000000067F00004002000100000000F5C000-000000067F00004002000100000000F60000__000000931B9A2710\n000000067F00004002000100000000F60000-000000067F00004002000100050100000000__00000073AD3FE6B8\n000000067F00004002000100000000F60000-000000067F00004002000100050100000000__000000914E3F38F0\n000000067F00004002000100000000F60000-000000067F00004002000100050100000000__000000931B9A2710\n000000067F00004002000100000000F60000-030000000000000000000000000000000002__000000739A8D1298\n000000067F000040020001000000FFFFFFFF-000000067F00004002000100000100000000__0000005CA7BBF4A0-00000064F391EC28\n000000067F000040020001000000FFFFFFFF-000000067F00004002000100000100000000__00000064F391EC28-0000006D3F67EDA8\n000000067F000040020001000000FFFFFFFF-000000067F00004002000100000100000000__0000006D3F67EDA8-000000739A8D1299\n000000067F000040020001000500FFFFFFFF-000000067F00004002000100050100000000__0000005CA7BBD6F9-000000739A8D1299\n000000067F00004002000140000000000000-000000067F00004002000140000000004000__00000073AD3FE6B8\n000000067F00004002000140000000000000-000000067F00004002000140000000004000__000000914E3F38F0\n000000067F00004002000140000000000000-000000067F00004002000140000000004000__000000931B9A2710\n000000067F00004002000140000000004000-000000067F00004002000140000000008000__00000073AD3FE6B8\n000000067F00004002000140000000004000-000000067F00004002000140000000008000__000000914E3F38F0\n000000067F00004002000140000000004000-000000067F00004002000140000000008000__000000931B9A2710\n000000067F00004002000140000000008000-000000067F0000400200014000000000C000__000000914E3F38F0\n000000067F00004002000140000000008000-000000067F0000400200014000000000C000__000000931B9A2710\n000000067F00004002000140000000008000-030000000000000000000000000000000002__00000073AD3FE6B8\n000000067F00004002000140000000008988-000000067F00004002000140000000011367__000000739A920D71-0000008D2DB5E0C1\n000000067F0000400200014000000000C000-000000067F00004002000140000000010000__000000914E3F38F0\n000000067F0000400200014000000000C000-000000067F00004002000140000000010000__000000931B9A2710\n000000067F00004002000140000000010000-000000067F00004002000140000000014000__000000914E3F38F0\n000000067F00004002000140000000010000-000000067F00004002000140000000014000__000000931B9A2710\n000000067F00004002000140000000011367-000000067F00004002000140000000019D71__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000014000-000000067F00004002000140000000018000__000000914E3F38F0\n000000067F00004002000140000000014000-000000067F00004002000140000000018000__000000931B9A2710\n000000067F00004002000140000000018000-000000067F0000400200014000000001C000__000000914E3F38F0\n000000067F00004002000140000000018000-000000067F0000400200014000000001C000__000000931B9A2710\n000000067F00004002000140000000019D71-000000067F00004002000140000000022769__000000739A920D71-0000008D2DB5E0C1\n000000067F0000400200014000000001C000-000000067F00004002000140000000020000__000000914E3F38F0\n000000067F0000400200014000000001C000-000000067F00004002000140000000020000__000000931B9A2710\n000000067F00004002000140000000020000-000000067F00004002000140000000024000__000000914E3F38F0\n000000067F00004002000140000000020000-000000067F00004002000140000000024000__000000931B9A2710\n000000067F00004002000140000000022769-000000067F0000400200014000000002B151__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000024000-000000067F00004002000140000000028000__000000914E3F38F0\n000000067F00004002000140000000024000-000000067F00004002000140000000028000__000000931B9A2710\n000000067F00004002000140000000028000-000000067F0000400200014000000002C000__000000914E3F38F0\n000000067F00004002000140000000028000-000000067F0000400200014000000002C000__000000931B9A2710\n000000067F0000400200014000000002B151-000000067F00004002000140000000033B28__000000739A920D71-0000008D2DB5E0C1\n000000067F0000400200014000000002C000-000000067F00004002000140000000030000__000000914E3F38F0\n000000067F0000400200014000000002C000-000000067F00004002000140000000030000__000000931B9A2710\n000000067F00004002000140000000030000-000000067F00004002000140000000034000__000000914E3F38F0\n000000067F00004002000140000000030000-000000067F00004002000140000000034000__000000931B9A2710\n000000067F00004002000140000000033B28-000000067F0000400200014000000003C4CB__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000034000-000000067F00004002000140000000038000__000000914E3F38F0\n000000067F00004002000140000000034000-000000067F00004002000140000000038000__000000931B9A2710\n000000067F00004002000140000000038000-000000067F0000400200014000000003C000__000000914E3F38F0\n000000067F00004002000140000000038000-000000067F0000400200014000000003C000__000000931B9A2710\n000000067F0000400200014000000003C000-000000067F00004002000140000000040000__000000914E3F38F0\n000000067F0000400200014000000003C000-000000067F00004002000140000000040000__000000931B9A2710\n000000067F0000400200014000000003C4CB-000000067F00004002000140000000044E80__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000040000-000000067F00004002000140000000044000__000000914E3F38F0\n000000067F00004002000140000000040000-000000067F00004002000140000000044000__000000931B9A2710\n000000067F00004002000140000000044000-000000067F00004002000140000000048000__000000914E3F38F0\n000000067F00004002000140000000044000-000000067F00004002000140000000048000__000000931B9A2710\n000000067F00004002000140000000044E80-000000067F0000400200014000000004D872__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000048000-000000067F0000400200014000000004C000__000000914E3F38F0\n000000067F00004002000140000000048000-000000067F0000400200014000000004C000__000000931B9A2710\n000000067F0000400200014000000004C000-000000067F00004002000140000000050000__000000914E3F38F0\n000000067F0000400200014000000004C000-000000067F00004002000140000000050000__000000931B9A2710\n000000067F0000400200014000000004D872-000000067F00004002000140000000056274__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000050000-000000067F00004002000140000000054000__000000914E3F38F0\n000000067F00004002000140000000050000-000000067F00004002000140000000054000__000000931B9A2710\n000000067F00004002000140000000054000-000000067F00004002000140000000058000__000000914E3F38F0\n000000067F00004002000140000000054000-000000067F00004002000140000000058000__000000931B9A2710\n000000067F00004002000140000000056274-000000067F0000400200014000000005EC6A__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000058000-000000067F0000400200014000000005C000__000000914E3F38F0\n000000067F00004002000140000000058000-000000067F0000400200014000000005C000__000000931B9A2710\n000000067F0000400200014000000005C000-000000067F00004002000140000000060000__000000914E3F38F0\n000000067F0000400200014000000005C000-000000067F00004002000140000000060000__000000931B9A2710\n000000067F0000400200014000000005EC6A-000000067F0000400200014000000006764E__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000060000-000000067F00004002000140000000064000__000000914E3F38F0\n000000067F00004002000140000000060000-000000067F00004002000140000000064000__000000931B9A2710\n000000067F00004002000140000000064000-000000067F00004002000140000000068000__000000914E3F38F0\n000000067F00004002000140000000064000-000000067F00004002000140000000068000__000000931B9A2710\n000000067F0000400200014000000006764E-000000067F00004002000140000000070013__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000068000-000000067F0000400200014000000006C000__000000914E3F38F0\n000000067F00004002000140000000068000-000000067F0000400200014000000006C000__000000931B9A2710\n000000067F0000400200014000000006C000-000000067F00004002000140000000070000__000000914E3F38F0\n000000067F0000400200014000000006C000-000000067F00004002000140000000070000__000000931B9A2710\n000000067F00004002000140000000070000-000000067F00004002000140000000074000__000000914E3F38F0\n000000067F00004002000140000000070000-000000067F00004002000140000000074000__000000931B9A2710\n000000067F00004002000140000000070013-000000067F000040020001400000000789BA__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000074000-000000067F00004002000140000000078000__000000914E3F38F0\n000000067F00004002000140000000074000-000000067F00004002000140000000078000__000000931B9A2710\n000000067F00004002000140000000078000-000000067F0000400200014000000007C000__000000914E3F38F0\n000000067F00004002000140000000078000-000000067F0000400200014000000007C000__000000931B9A2710\n000000067F000040020001400000000789BA-000000067F0000400200014000000008136D__000000739A920D71-0000008D2DB5E0C1\n000000067F0000400200014000000007C000-000000067F00004002000140000000080000__000000914E3F38F0\n000000067F0000400200014000000007C000-000000067F00004002000140000000080000__000000931B9A2710\n000000067F00004002000140000000080000-000000067F00004002000140000000084000__000000914E3F38F0\n000000067F00004002000140000000080000-000000067F00004002000140000000084000__000000931B9A2710\n000000067F0000400200014000000008136D-000000067F00004002000140000000089D5F__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000084000-000000067F00004002000140000000088000__000000914E3F38F0\n000000067F00004002000140000000084000-000000067F00004002000140000000088000__000000931B9A2710\n000000067F00004002000140000000088000-000000067F0000400200014000000008C000__000000914E3F38F0\n000000067F00004002000140000000088000-000000067F0000400200014000000008C000__000000931B9A2710\n000000067F00004002000140000000089D5F-000000067F0000400200014000000009275F__000000739A920D71-0000008D2DB5E0C1\n000000067F0000400200014000000008C000-000000067F00004002000140000000090000__000000914E3F38F0\n000000067F0000400200014000000008C000-000000067F00004002000140000000090000__000000931B9A2710\n000000067F00004002000140000000090000-000000067F00004002000140000000094000__000000914E3F38F0\n000000067F00004002000140000000090000-000000067F00004002000140000000094000__000000931B9A2710\n000000067F0000400200014000000009275F-000000067F0000400200014000000009B154__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000094000-000000067F00004002000140000000098000__000000914E3F38F0\n000000067F00004002000140000000094000-000000067F00004002000140000000098000__000000931B9A2710\n000000067F00004002000140000000098000-000000067F0000400200014000000009C000__000000914E3F38F0\n000000067F00004002000140000000098000-000000067F0000400200014000000009C000__000000931B9A2710\n000000067F0000400200014000000009B154-000000067F000040020001400000000A3B2B__000000739A920D71-0000008D2DB5E0C1\n000000067F0000400200014000000009C000-000000067F000040020001400000000A0000__000000914E3F38F0\n000000067F0000400200014000000009C000-000000067F000040020001400000000A0000__000000931B9A2710\n000000067F000040020001400000000A0000-000000067F000040020001400000000A4000__000000914E3F38F0\n000000067F000040020001400000000A0000-000000067F000040020001400000000A4000__000000931B9A2710\n000000067F000040020001400000000A3B2B-000000067F000040020001400000000AC4F0__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000000A4000-000000067F000040020001400000000A8000__000000914E3F38F0\n000000067F000040020001400000000A4000-000000067F000040020001400000000A8000__000000931B9A2710\n000000067F000040020001400000000A8000-000000067F000040020001400000000AC000__000000914E3F38F0\n000000067F000040020001400000000A8000-000000067F000040020001400000000AC000__000000931B9A2710\n000000067F000040020001400000000AC000-000000067F000040020001400000000B0000__000000914E3F38F0\n000000067F000040020001400000000AC000-000000067F000040020001400000000B0000__000000931B9A2710\n000000067F000040020001400000000AC4F0-000000067F000040020001400000000B4EAA__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000000B0000-000000067F000040020001400000000B4000__000000914E3F38F0\n000000067F000040020001400000000B0000-000000067F000040020001400000000B4000__000000931B9A2710\n000000067F000040020001400000000B4000-000000067F000040020001400000000B8000__000000914E3F38F0\n000000067F000040020001400000000B4000-000000067F000040020001400000000B8000__000000931B9A2710\n000000067F000040020001400000000B4EAA-000000067F000040020001400000000BD86C__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000000B8000-000000067F000040020001400000000BC000__000000914E3F38F0\n000000067F000040020001400000000B8000-000000067F000040020001400000000BC000__000000931B9A2710\n000000067F000040020001400000000BC000-000000067F000040020001400000000C0000__000000914E3F38F0\n000000067F000040020001400000000BC000-000000067F000040020001400000000C0000__000000931B9A2710\n000000067F000040020001400000000BD86C-000000067F000040020001400000000C6268__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000000C0000-000000067F000040020001400000000C4000__000000914E3F38F0\n000000067F000040020001400000000C0000-000000067F000040020001400000000C4000__000000931B9A2710\n000000067F000040020001400000000C4000-000000067F000040020001400000000C8000__000000914E3F38F0\n000000067F000040020001400000000C4000-000000067F000040020001400000000C8000__000000931B9A2710\n000000067F000040020001400000000C6268-000000067F000040020001400000000CEC64__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000000C8000-000000067F000040020001400000000CC000__000000914E3F38F0\n000000067F000040020001400000000C8000-000000067F000040020001400000000CC000__000000931B9A2710\n000000067F000040020001400000000CC000-000000067F000040020001400000000D0000__000000914E3F38F0\n000000067F000040020001400000000CC000-000000067F000040020001400000000D0000__000000931B9A2710\n000000067F000040020001400000000CEC64-000000067F000040020001400000000D7659__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000000D0000-000000067F000040020001400000000D4000__000000914E3F38F0\n000000067F000040020001400000000D0000-000000067F000040020001400000000D4000__000000931B9A2710\n000000067F000040020001400000000D4000-000000067F000040020001400000000D8000__000000914E3F38F0\n000000067F000040020001400000000D4000-000000067F000040020001400000000D8000__000000931B9A2710\n000000067F000040020001400000000D7659-000000067F000040020001400000000E0026__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000000D8000-000000067F000040020001400000000DC000__000000914E3F38F0\n000000067F000040020001400000000D8000-000000067F000040020001400000000DC000__000000931B9A2710\n000000067F000040020001400000000DC000-000000067F000040020001400000000E0000__000000914E3F38F0\n000000067F000040020001400000000DC000-000000067F000040020001400000000E0000__000000931B9A2710\n000000067F000040020001400000000E0000-000000067F000040020001400000000E4000__000000914E3F38F0\n000000067F000040020001400000000E0000-000000067F000040020001400000000E4000__000000931B9A2710\n000000067F000040020001400000000E0026-000000067F000040020001400000000E89F4__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000000E4000-000000067F000040020001400000000E8000__000000914E3F38F0\n000000067F000040020001400000000E4000-000000067F000040020001400000000E8000__000000931B9A2710\n000000067F000040020001400000000E8000-000000067F000040020001400000000EC000__000000914E3F38F0\n000000067F000040020001400000000E8000-000000067F000040020001400000000EC000__000000931B9A2710\n000000067F000040020001400000000E89F4-000000067F000040020001400000000F13B1__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000000EC000-000000067F000040020001400000000F0000__000000914E3F38F0\n000000067F000040020001400000000EC000-000000067F000040020001400000000F0000__000000931B9A2710\n000000067F000040020001400000000F0000-000000067F000040020001400000000F4000__000000914E3F38F0\n000000067F000040020001400000000F0000-000000067F000040020001400000000F4000__000000931B9A2710\n000000067F000040020001400000000F13B1-000000067F000040020001400000000F9D77__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000000F4000-000000067F000040020001400000000F8000__000000914E3F38F0\n000000067F000040020001400000000F4000-000000067F000040020001400000000F8000__000000931B9A2710\n000000067F000040020001400000000F8000-000000067F000040020001400000000FC000__000000914E3F38F0\n000000067F000040020001400000000F8000-000000067F000040020001400000000FC000__000000931B9A2710\n000000067F000040020001400000000F9D77-000000067F00004002000140000000102774__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000000FC000-000000067F00004002000140000000100000__000000914E3F38F0\n000000067F000040020001400000000FC000-000000067F00004002000140000000100000__000000931B9A2710\n000000067F00004002000140000000100000-000000067F00004002000140000000104000__000000914E3F38F0\n000000067F00004002000140000000100000-000000067F00004002000140000000104000__000000931B9A2710\n000000067F00004002000140000000102774-000000067F0000400200014000000010B172__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000104000-000000067F00004002000140000000108000__000000914E3F38F0\n000000067F00004002000140000000104000-000000067F00004002000140000000108000__000000931B9A2710\n000000067F00004002000140000000108000-000000067F0000400200014000000010C000__000000914E3F38F0\n000000067F00004002000140000000108000-000000067F0000400200014000000010C000__000000931B9A2710\n000000067F0000400200014000000010B172-000000067F00004002000140000000113B64__000000739A920D71-0000008D2DB5E0C1\n000000067F0000400200014000000010C000-000000067F00004002000140000000110000__000000914E3F38F0\n000000067F0000400200014000000010C000-000000067F00004002000140000000110000__000000931B9A2710\n000000067F00004002000140000000110000-000000067F00004002000140000000114000__000000914E3F38F0\n000000067F00004002000140000000110000-000000067F00004002000140000000114000__000000931B9A2710\n000000067F00004002000140000000113B64-000000067F0000400200014000000011C533__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000114000-000000067F00004002000140000000118000__000000914E3F38F0\n000000067F00004002000140000000114000-000000067F00004002000140000000118000__000000931B9A2710\n000000067F00004002000140000000118000-000000067F0000400200014000000011C000__000000914E3F38F0\n000000067F00004002000140000000118000-000000067F0000400200014000000011C000__000000931B9A2710\n000000067F0000400200014000000011C000-000000067F00004002000140000000120000__000000914E3F38F0\n000000067F0000400200014000000011C000-000000067F00004002000140000000120000__000000931B9A2710\n000000067F0000400200014000000011C533-000000067F00004002000140000000124EF8__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000120000-000000067F00004002000140000000124000__000000914E3F38F0\n000000067F00004002000140000000120000-000000067F00004002000140000000124000__000000931B9A2710\n000000067F00004002000140000000124000-000000067F00004002000140000000128000__000000914E3F38F0\n000000067F00004002000140000000124000-000000067F00004002000140000000128000__000000931B9A2710\n000000067F00004002000140000000124EF8-000000067F0000400200014000000012D8AC__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000128000-000000067F0000400200014000000012C000__000000914E3F38F0\n000000067F00004002000140000000128000-000000067F0000400200014000000012C000__000000931B9A2710\n000000067F0000400200014000000012C000-000000067F00004002000140000000130000__000000914E3F38F0\n000000067F0000400200014000000012C000-000000067F00004002000140000000130000__000000931B9A2710\n000000067F0000400200014000000012D8AC-000000067F00004002000140000000136277__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000130000-000000067F00004002000140000000134000__000000914E3F38F0\n000000067F00004002000140000000130000-000000067F00004002000140000000134000__000000931B9A2710\n000000067F00004002000140000000134000-000000067F00004002000140000000138000__000000914E3F38F0\n000000067F00004002000140000000134000-000000067F00004002000140000000138000__000000931B9A2710\n000000067F00004002000140000000136277-000000067F0000400200014000000013EC72__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000138000-000000067F0000400200014000000013C000__000000914E3F38F0\n000000067F00004002000140000000138000-000000067F0000400200014000000013C000__000000931B9A2710\n000000067F0000400200014000000013C000-000000067F00004002000140000000140000__000000914E3F38F0\n000000067F0000400200014000000013C000-000000067F00004002000140000000140000__000000931B9A2710\n000000067F0000400200014000000013EC72-000000067F0000400200014000000014766F__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000140000-000000067F00004002000140000000144000__000000914E3F38F0\n000000067F00004002000140000000140000-000000067F00004002000140000000144000__000000931B9A2710\n000000067F00004002000140000000144000-000000067F00004002000140000000148000__000000914E3F38F0\n000000067F00004002000140000000144000-000000067F00004002000140000000148000__000000931B9A2710\n000000067F0000400200014000000014766F-000000067F00004002000140000000150061__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000148000-000000067F0000400200014000000014C000__000000914E3F38F0\n000000067F00004002000140000000148000-000000067F0000400200014000000014C000__000000931B9A2710\n000000067F0000400200014000000014C000-000000067F00004002000140000000150000__000000914E3F38F0\n000000067F0000400200014000000014C000-000000067F00004002000140000000150000__000000931B9A2710\n000000067F00004002000140000000150000-000000067F00004002000140000000154000__000000914E3F38F0\n000000067F00004002000140000000150000-000000067F00004002000140000000154000__000000931B9A2710\n000000067F00004002000140000000150061-000000067F00004002000140000000158A3C__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000154000-000000067F00004002000140000000158000__000000914E3F38F0\n000000067F00004002000140000000154000-000000067F00004002000140000000158000__000000931B9A2710\n000000067F00004002000140000000158000-000000067F0000400200014000000015C000__000000914E3F38F0\n000000067F00004002000140000000158000-000000067F0000400200014000000015C000__000000931B9A2710\n000000067F00004002000140000000158A3C-000000067F000040020001400000001613FB__000000739A920D71-0000008D2DB5E0C1\n000000067F0000400200014000000015C000-000000067F00004002000140000000160000__000000914E3F38F0\n000000067F0000400200014000000015C000-000000067F00004002000140000000160000__000000931B9A2710\n000000067F00004002000140000000160000-000000067F00004002000140000000164000__000000914E3F38F0\n000000067F00004002000140000000160000-000000067F00004002000140000000164000__000000931B9A2710\n000000067F000040020001400000001613FB-000000067F00004002000140000000169DB2__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000164000-000000067F00004002000140000000168000__000000914E3F38F0\n000000067F00004002000140000000164000-000000067F00004002000140000000168000__000000931B9A2710\n000000067F00004002000140000000168000-000000067F0000400200014000000016C000__000000914E3F38F0\n000000067F00004002000140000000168000-000000067F0000400200014000000016C000__000000931B9A2710\n000000067F00004002000140000000169DB2-000000067F00004002000140000000172788__000000739A920D71-0000008D2DB5E0C1\n000000067F0000400200014000000016C000-000000067F00004002000140000000170000__000000914E3F38F0\n000000067F0000400200014000000016C000-000000067F00004002000140000000170000__000000931B9A2710\n000000067F00004002000140000000170000-000000067F00004002000140000000174000__000000914E3F38F0\n000000067F00004002000140000000170000-000000067F00004002000140000000174000__000000931B9A2710\n000000067F00004002000140000000172788-000000067F0000400200014000000017B17E__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000174000-000000067F00004002000140000000178000__000000914E3F38F0\n000000067F00004002000140000000174000-000000067F00004002000140000000178000__000000931B9A2710\n000000067F00004002000140000000178000-000000067F0000400200014000000017C000__000000914E3F38F0\n000000067F00004002000140000000178000-000000067F0000400200014000000017C000__000000931B9A2710\n000000067F0000400200014000000017B17E-000000067F00004002000140000000183B77__000000739A920D71-0000008D2DB5E0C1\n000000067F0000400200014000000017C000-000000067F00004002000140000000180000__000000914E3F38F0\n000000067F0000400200014000000017C000-000000067F00004002000140000000180000__000000931B9A2710\n000000067F00004002000140000000180000-000000067F00004002000140000000184000__000000914E3F38F0\n000000067F00004002000140000000180000-000000067F00004002000140000000184000__000000931B9A2710\n000000067F00004002000140000000183B77-000000067F0000400200014000000018C56B__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000184000-000000067F00004002000140000000188000__000000914E3F38F0\n000000067F00004002000140000000184000-000000067F00004002000140000000188000__000000931B9A2710\n000000067F00004002000140000000188000-000000067F0000400200014000000018C000__000000914E3F38F0\n000000067F00004002000140000000188000-000000067F0000400200014000000018C000__000000931B9A2710\n000000067F0000400200014000000018C000-000000067F00004002000140000000190000__000000914E3F38F0\n000000067F0000400200014000000018C000-000000067F00004002000140000000190000__000000931B9A2710\n000000067F0000400200014000000018C56B-000000067F00004002000140000000194F47__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000190000-000000067F00004002000140000000194000__000000914E3F38F0\n000000067F00004002000140000000190000-000000067F00004002000140000000194000__000000931B9A2710\n000000067F00004002000140000000194000-000000067F00004002000140000000198000__000000914E3F38F0\n000000067F00004002000140000000194000-000000067F00004002000140000000198000__000000931B9A2710\n000000067F00004002000140000000194F47-000000067F0000400200014000000019D8FE__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000198000-000000067F0000400200014000000019C000__000000914E3F38F0\n000000067F00004002000140000000198000-000000067F0000400200014000000019C000__000000931B9A2710\n000000067F0000400200014000000019C000-000000067F000040020001400000001A0000__000000914E3F38F0\n000000067F0000400200014000000019C000-000000067F000040020001400000001A0000__000000931B9A2710\n000000067F0000400200014000000019D8FE-000000067F000040020001400000001A62B8__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000001A0000-000000067F000040020001400000001A4000__000000914E3F38F0\n000000067F000040020001400000001A0000-000000067F000040020001400000001A4000__000000931B9A2710\n000000067F000040020001400000001A4000-000000067F000040020001400000001A8000__000000914E3F38F0\n000000067F000040020001400000001A4000-000000067F000040020001400000001A8000__000000931B9A2710\n000000067F000040020001400000001A62B8-000000067F000040020001400000001AEC8F__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000001A8000-000000067F000040020001400000001AC000__000000914E3F38F0\n000000067F000040020001400000001A8000-000000067F000040020001400000001AC000__000000931B9A2710\n000000067F000040020001400000001AC000-000000067F000040020001400000001B0000__000000914E3F38F0\n000000067F000040020001400000001AC000-000000067F000040020001400000001B0000__000000931B9A2710\n000000067F000040020001400000001AEC8F-000000067F000040020001400000001B7686__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000001B0000-000000067F000040020001400000001B4000__000000914E3F38F0\n000000067F000040020001400000001B0000-000000067F000040020001400000001B4000__000000931B9A2710\n000000067F000040020001400000001B4000-000000067F000040020001400000001B8000__000000914E3F38F0\n000000067F000040020001400000001B4000-000000067F000040020001400000001B8000__000000931B9A2710\n000000067F000040020001400000001B7686-000000067F000040020001400000001C0079__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000001B8000-000000067F000040020001400000001BC000__000000914E3F38F0\n000000067F000040020001400000001B8000-000000067F000040020001400000001BC000__000000931B9A2710\n000000067F000040020001400000001BC000-000000067F000040020001400000001C0000__000000914E3F38F0\n000000067F000040020001400000001BC000-000000067F000040020001400000001C0000__000000931B9A2710\n000000067F000040020001400000001C0000-000000067F000040020001400000001C4000__000000914E3F38F0\n000000067F000040020001400000001C0000-000000067F000040020001400000001C4000__000000931B9A2710\n000000067F000040020001400000001C0079-000000067F000040020001400000001C8A6F__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000001C4000-000000067F000040020001400000001C8000__000000914E3F38F0\n000000067F000040020001400000001C4000-000000067F000040020001400000001C8000__000000931B9A2710\n000000067F000040020001400000001C8000-000000067F000040020001400000001CC000__000000914E3F38F0\n000000067F000040020001400000001C8000-000000067F000040020001400000001CC000__000000931B9A2710\n000000067F000040020001400000001C8A6F-000000067F000040020001400000001D1442__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000001CC000-000000067F000040020001400000001D0000__000000914E3F38F0\n000000067F000040020001400000001CC000-000000067F000040020001400000001D0000__000000931B9A2710\n000000067F000040020001400000001D0000-000000067F000040020001400000001D4000__000000914E3F38F0\n000000067F000040020001400000001D0000-000000067F000040020001400000001D4000__000000931B9A2710\n000000067F000040020001400000001D1442-000000067F000040020001400000001D9DF3__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000001D4000-000000067F000040020001400000001D8000__000000914E3F38F0\n000000067F000040020001400000001D4000-000000067F000040020001400000001D8000__000000931B9A2710\n000000067F000040020001400000001D8000-000000067F000040020001400000001DC000__000000914E3F38F0\n000000067F000040020001400000001D8000-000000067F000040020001400000001DC000__000000931B9A2710\n000000067F000040020001400000001D9DF3-000000067F000040020001400000001E27AE__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000001DC000-000000067F000040020001400000001E0000__000000914E3F38F0\n000000067F000040020001400000001DC000-000000067F000040020001400000001E0000__000000931B9A2710\n000000067F000040020001400000001E0000-000000067F000040020001400000001E4000__000000914E3F38F0\n000000067F000040020001400000001E0000-000000067F000040020001400000001E4000__000000931B9A2710\n000000067F000040020001400000001E27AE-000000067F000040020001400000001EB193__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000001E4000-000000067F000040020001400000001E8000__000000914E3F38F0\n000000067F000040020001400000001E4000-000000067F000040020001400000001E8000__000000931B9A2710\n000000067F000040020001400000001E8000-000000067F000040020001400000001EC000__000000914E3F38F0\n000000067F000040020001400000001E8000-000000067F000040020001400000001EC000__000000931B9A2710\n000000067F000040020001400000001EB193-000000067F000040020001400000001F3B93__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000001EC000-000000067F000040020001400000001F0000__000000914E3F38F0\n000000067F000040020001400000001EC000-000000067F000040020001400000001F0000__000000931B9A2710\n000000067F000040020001400000001F0000-000000067F000040020001400000001F4000__000000914E3F38F0\n000000067F000040020001400000001F0000-000000067F000040020001400000001F4000__000000931B9A2710\n000000067F000040020001400000001F3B93-000000067F000040020001400000001FC594__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000001F4000-000000067F000040020001400000001F8000__000000914E3F38F0\n000000067F000040020001400000001F4000-000000067F000040020001400000001F8000__000000931B9A2710\n000000067F000040020001400000001F8000-000000067F000040020001400000001FC000__000000914E3F38F0\n000000067F000040020001400000001F8000-000000067F000040020001400000001FC000__000000931B9A2710\n000000067F000040020001400000001FC000-000000067F00004002000140000000200000__000000914E3F38F0\n000000067F000040020001400000001FC000-000000067F00004002000140000000200000__000000931B9A2710\n000000067F000040020001400000001FC594-000000067F00004002000140000000204F82__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000200000-000000067F00004002000140000000204000__000000914E3F38F0\n000000067F00004002000140000000200000-000000067F00004002000140000000204000__000000931B9A2710\n000000067F00004002000140000000204000-000000067F00004002000140000000208000__000000914E3F38F0\n000000067F00004002000140000000204000-000000067F00004002000140000000208000__000000931B9A2710\n000000067F00004002000140000000204F82-000000067F0000400200014000000020D952__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000208000-000000067F0000400200014000000020C000__000000914E3F38F0\n000000067F00004002000140000000208000-000000067F0000400200014000000020C000__000000931B9A2710\n000000067F0000400200014000000020C000-000000067F00004002000140000000210000__000000914E3F38F0\n000000067F0000400200014000000020C000-000000067F00004002000140000000210000__000000931B9A2710\n000000067F0000400200014000000020D952-000000067F00004002000140000000216305__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000210000-000000067F00004002000140000000214000__000000914E3F38F0\n000000067F00004002000140000000210000-000000067F00004002000140000000214000__000000931B9A2710\n000000067F00004002000140000000214000-000000067F00004002000140000000218000__000000914E3F38F0\n000000067F00004002000140000000214000-000000067F00004002000140000000218000__000000931B9A2710\n000000067F00004002000140000000216305-000000067F0000400200014000000021ECB6__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000218000-000000067F0000400200014000000021C000__000000914E3F38F0\n000000067F00004002000140000000218000-000000067F0000400200014000000021C000__000000931B9A2710\n000000067F0000400200014000000021C000-000000067F00004002000140000000220000__000000914E3F38F0\n000000067F0000400200014000000021C000-000000067F00004002000140000000220000__000000931B9A2710\n000000067F0000400200014000000021ECB6-000000067F000040020001400000002276A1__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000220000-000000067F00004002000140000000224000__000000914E3F38F0\n000000067F00004002000140000000220000-000000067F00004002000140000000224000__000000931B9A2710\n000000067F00004002000140000000224000-000000067F00004002000140000000228000__000000914E3F38F0\n000000067F00004002000140000000224000-000000067F00004002000140000000228000__000000931B9A2710\n000000067F000040020001400000002276A1-000000067F0000400200014000000023009D__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000228000-000000067F0000400200014000000022C000__000000914E3F38F0\n000000067F00004002000140000000228000-000000067F0000400200014000000022C000__000000931B9A2710\n000000067F0000400200014000000022C000-000000067F00004002000140000000230000__000000914E3F38F0\n000000067F0000400200014000000022C000-000000067F00004002000140000000230000__000000931B9A2710\n000000067F00004002000140000000230000-000000067F00004002000140000000234000__000000914E3F38F0\n000000067F00004002000140000000230000-000000067F00004002000140000000234000__000000931B9A2710\n000000067F0000400200014000000023009D-000000067F00004002000140000000238AA0__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000234000-000000067F00004002000140000000238000__000000914E3F38F0\n000000067F00004002000140000000234000-000000067F00004002000140000000238000__000000931B9A2710\n000000067F00004002000140000000238000-000000067F0000400200014000000023C000__000000914E3F38F0\n000000067F00004002000140000000238000-000000067F0000400200014000000023C000__000000931B9A2710\n000000067F00004002000140000000238AA0-000000067F00004002000140000000241480__000000739A920D71-0000008D2DB5E0C1\n000000067F0000400200014000000023C000-000000067F00004002000140000000240000__000000914E3F38F0\n000000067F0000400200014000000023C000-000000067F00004002000140000000240000__000000931B9A2710\n000000067F00004002000140000000240000-000000067F00004002000140000000244000__000000914E3F38F0\n000000067F00004002000140000000240000-000000067F00004002000140000000244000__000000931B9A2710\n000000067F00004002000140000000241480-000000067F00004002000140000000249E56__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000244000-000000067F00004002000140000000248000__000000914E3F38F0\n000000067F00004002000140000000244000-000000067F00004002000140000000248000__000000931B9A2710\n000000067F00004002000140000000248000-000000067F0000400200014000000024C000__000000914E3F38F0\n000000067F00004002000140000000248000-000000067F0000400200014000000024C000__000000931B9A2710\n000000067F00004002000140000000249E56-000000067F00004002000140000000252803__000000739A920D71-0000008D2DB5E0C1\n000000067F0000400200014000000024C000-000000067F00004002000140000000250000__000000914E3F38F0\n000000067F0000400200014000000024C000-000000067F00004002000140000000250000__000000931B9A2710\n000000067F00004002000140000000250000-000000067F00004002000140000000254000__000000914E3F38F0\n000000067F00004002000140000000250000-000000067F00004002000140000000254000__000000931B9A2710\n000000067F00004002000140000000252803-000000067F0000400200014000000025B1BA__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000254000-000000067F00004002000140000000258000__000000914E3F38F0\n000000067F00004002000140000000254000-000000067F00004002000140000000258000__000000931B9A2710\n000000067F00004002000140000000258000-000000067F0000400200014000000025C000__000000914E3F38F0\n000000067F00004002000140000000258000-000000067F0000400200014000000025C000__000000931B9A2710\n000000067F0000400200014000000025B1BA-000000067F00004002000140000000263BAA__000000739A920D71-0000008D2DB5E0C1\n000000067F0000400200014000000025C000-000000067F00004002000140000000260000__000000914E3F38F0\n000000067F0000400200014000000025C000-000000067F00004002000140000000260000__000000931B9A2710\n000000067F00004002000140000000260000-000000067F00004002000140000000264000__000000914E3F38F0\n000000067F00004002000140000000260000-000000067F00004002000140000000264000__000000931B9A2710\n000000067F00004002000140000000263BAA-000000067F0000400200014000000026C5A8__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000264000-000000067F00004002000140000000268000__000000914E3F38F0\n000000067F00004002000140000000264000-000000067F00004002000140000000268000__000000931B9A2710\n000000067F00004002000140000000268000-000000067F0000400200014000000026C000__000000914E3F38F0\n000000067F00004002000140000000268000-000000067F0000400200014000000026C000__000000931B9A2710\n000000067F0000400200014000000026C000-000000067F00004002000140000000270000__000000914E3F38F0\n000000067F0000400200014000000026C000-000000067F00004002000140000000270000__000000931B9A2710\n000000067F0000400200014000000026C5A8-000000067F00004002000140000000274FA4__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000270000-000000067F00004002000140000000274000__000000914E3F38F0\n000000067F00004002000140000000270000-000000067F00004002000140000000274000__000000931B9A2710\n000000067F00004002000140000000274000-000000067F00004002000140000000278000__000000914E3F38F0\n000000067F00004002000140000000274000-000000067F00004002000140000000278000__000000931B9A2710\n000000067F00004002000140000000274FA4-000000067F0000400200014000000027D982__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000278000-000000067F0000400200014000000027C000__000000914E3F38F0\n000000067F00004002000140000000278000-000000067F0000400200014000000027C000__000000931B9A2710\n000000067F0000400200014000000027C000-000000067F00004002000140000000280000__000000914E3F38F0\n000000067F0000400200014000000027C000-000000067F00004002000140000000280000__000000931B9A2710\n000000067F0000400200014000000027D982-000000067F0000400200014000000028634B__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000280000-000000067F00004002000140000000284000__000000914E3F38F0\n000000067F00004002000140000000280000-000000067F00004002000140000000284000__000000931B9A2710\n000000067F00004002000140000000284000-000000067F00004002000140000000288000__000000914E3F38F0\n000000067F00004002000140000000284000-000000067F00004002000140000000288000__000000931B9A2710\n000000067F0000400200014000000028634B-000000067F0000400200014000000028ED00__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000288000-000000067F0000400200014000000028C000__000000914E3F38F0\n000000067F00004002000140000000288000-000000067F0000400200014000000028C000__000000931B9A2710\n000000067F0000400200014000000028C000-000000067F00004002000140000000290000__000000914E3F38F0\n000000067F0000400200014000000028C000-000000067F00004002000140000000290000__000000931B9A2710\n000000067F0000400200014000000028ED00-000000067F000040020001400000002976BA__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000290000-000000067F00004002000140000000294000__000000914E3F38F0\n000000067F00004002000140000000290000-000000067F00004002000140000000294000__000000931B9A2710\n000000067F00004002000140000000294000-000000067F00004002000140000000298000__000000914E3F38F0\n000000067F00004002000140000000294000-000000067F00004002000140000000298000__000000931B9A2710\n000000067F000040020001400000002976BA-000000067F000040020001400000002A00B5__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000298000-000000067F0000400200014000000029C000__000000914E3F38F0\n000000067F00004002000140000000298000-000000067F0000400200014000000029C000__000000931B9A2710\n000000067F0000400200014000000029C000-000000067F000040020001400000002A0000__000000914E3F38F0\n000000067F0000400200014000000029C000-000000067F000040020001400000002A0000__000000931B9A2710\n000000067F000040020001400000002A0000-000000067F000040020001400000002A4000__000000914E3F38F0\n000000067F000040020001400000002A0000-000000067F000040020001400000002A4000__000000931B9A2710\n000000067F000040020001400000002A00B5-000000067F000040020001400000002A8AB5__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000002A4000-000000067F000040020001400000002A8000__000000914E3F38F0\n000000067F000040020001400000002A4000-000000067F000040020001400000002A8000__000000931B9A2710\n000000067F000040020001400000002A8000-000000067F000040020001400000002AC000__000000914E3F38F0\n000000067F000040020001400000002A8000-000000067F000040020001400000002AC000__000000931B9A2710\n000000067F000040020001400000002A8AB5-000000067F000040020001400000002B14B0__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000002AC000-000000067F000040020001400000002B0000__000000914E3F38F0\n000000067F000040020001400000002AC000-000000067F000040020001400000002B0000__000000931B9A2710\n000000067F000040020001400000002B0000-000000067F000040020001400000002B4000__000000914E3F38F0\n000000067F000040020001400000002B0000-000000067F000040020001400000002B4000__000000931B9A2710\n000000067F000040020001400000002B14B0-000000067F000040020001400000002B9E90__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000002B4000-000000067F000040020001400000002B8000__000000914E3F38F0\n000000067F000040020001400000002B4000-000000067F000040020001400000002B8000__000000931B9A2710\n000000067F000040020001400000002B8000-000000067F000040020001400000002BC000__000000914E3F38F0\n000000067F000040020001400000002B8000-000000067F000040020001400000002BC000__000000931B9A2710\n000000067F000040020001400000002B9E90-000000067F000040020001400000002C2852__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000002BC000-000000067F000040020001400000002C0000__000000914E3F38F0\n000000067F000040020001400000002BC000-000000067F000040020001400000002C0000__000000931B9A2710\n000000067F000040020001400000002C0000-000000067F000040020001400000002C4000__000000914E3F38F0\n000000067F000040020001400000002C0000-000000067F000040020001400000002C4000__000000931B9A2710\n000000067F000040020001400000002C2852-000000067F000040020001400000002CB205__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000002C4000-000000067F000040020001400000002C8000__000000914E3F38F0\n000000067F000040020001400000002C4000-000000067F000040020001400000002C8000__000000931B9A2710\n000000067F000040020001400000002C8000-000000067F000040020001400000002CC000__000000914E3F38F0\n000000067F000040020001400000002C8000-000000067F000040020001400000002CC000__000000931B9A2710\n000000067F000040020001400000002CB205-000000067F000040020001400000002D3BC7__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000002CC000-000000067F000040020001400000002D0000__000000914E3F38F0\n000000067F000040020001400000002CC000-000000067F000040020001400000002D0000__000000931B9A2710\n000000067F000040020001400000002D0000-000000067F000040020001400000002D4000__000000914E3F38F0\n000000067F000040020001400000002D0000-000000067F000040020001400000002D4000__000000931B9A2710\n000000067F000040020001400000002D3BC7-000000067F000040020001400000002DC5BB__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000002D4000-000000067F000040020001400000002D8000__000000914E3F38F0\n000000067F000040020001400000002D4000-000000067F000040020001400000002D8000__000000931B9A2710\n000000067F000040020001400000002D8000-000000067F000040020001400000002DC000__000000914E3F38F0\n000000067F000040020001400000002D8000-000000067F000040020001400000002DC000__000000931B9A2710\n000000067F000040020001400000002DC000-000000067F000040020001400000002E0000__000000914E3F38F0\n000000067F000040020001400000002DC000-000000067F000040020001400000002E0000__000000931B9A2710\n000000067F000040020001400000002DC5BB-000000067F000040020001400000002E4FBB__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000002E0000-000000067F000040020001400000002E4000__000000914E3F38F0\n000000067F000040020001400000002E0000-000000067F000040020001400000002E4000__000000931B9A2710\n000000067F000040020001400000002E4000-000000067F000040020001400000002E8000__000000914E3F38F0\n000000067F000040020001400000002E4000-000000067F000040020001400000002E8000__000000931B9A2710\n000000067F000040020001400000002E4FBB-000000067F000040020001400000002ED9B4__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000002E8000-000000067F000040020001400000002EC000__000000914E3F38F0\n000000067F000040020001400000002E8000-000000067F000040020001400000002EC000__000000931B9A2710\n000000067F000040020001400000002EC000-000000067F000040020001400000002F0000__000000914E3F38F0\n000000067F000040020001400000002EC000-000000067F000040020001400000002F0000__000000931B9A2710\n000000067F000040020001400000002ED9B4-000000067F000040020001400000002F6390__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000002F0000-000000067F000040020001400000002F4000__000000914E3F38F0\n000000067F000040020001400000002F0000-000000067F000040020001400000002F4000__000000931B9A2710\n000000067F000040020001400000002F4000-000000067F000040020001400000002F8000__000000914E3F38F0\n000000067F000040020001400000002F4000-000000067F000040020001400000002F8000__000000931B9A2710\n000000067F000040020001400000002F6390-000000067F000040020001400000002FED51__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000002F8000-000000067F000040020001400000002FC000__000000914E3F38F0\n000000067F000040020001400000002F8000-000000067F000040020001400000002FC000__000000931B9A2710\n000000067F000040020001400000002FC000-000000067F00004002000140000000300000__000000914E3F38F0\n000000067F000040020001400000002FC000-000000067F00004002000140000000300000__000000931B9A2710\n000000067F000040020001400000002FED51-000000067F00004002000140000000307706__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000300000-000000067F00004002000140000000304000__000000914E3F38F0\n000000067F00004002000140000000300000-000000067F00004002000140000000304000__000000931B9A2710\n000000067F00004002000140000000304000-000000067F00004002000140000000308000__000000914E3F38F0\n000000067F00004002000140000000304000-000000067F00004002000140000000308000__000000931B9A2710\n000000067F00004002000140000000307706-000000067F000040020001400000003100CD__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000308000-000000067F0000400200014000000030C000__000000914E3F38F0\n000000067F00004002000140000000308000-000000067F0000400200014000000030C000__000000931B9A2710\n000000067F0000400200014000000030C000-000000067F00004002000140000000310000__000000914E3F38F0\n000000067F0000400200014000000030C000-000000067F00004002000140000000310000__000000931B9A2710\n000000067F00004002000140000000310000-000000067F00004002000140000000314000__000000914E3F38F0\n000000067F00004002000140000000310000-000000067F00004002000140000000314000__000000931B9A2710\n000000067F000040020001400000003100CD-000000067F00004002000140000000318AC7__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000314000-000000067F00004002000140000000318000__000000914E3F38F0\n000000067F00004002000140000000314000-000000067F00004002000140000000318000__000000931B9A2710\n000000067F00004002000140000000318000-000000067F0000400200014000000031C000__000000914E3F38F0\n000000067F00004002000140000000318000-000000067F0000400200014000000031C000__000000931B9A2710\n000000067F00004002000140000000318AC7-000000067F000040020001400000003214C9__000000739A920D71-0000008D2DB5E0C1\n000000067F0000400200014000000031C000-000000067F00004002000140000000320000__000000914E3F38F0\n000000067F0000400200014000000031C000-000000067F00004002000140000000320000__000000931B9A2710\n000000067F00004002000140000000320000-000000067F00004002000140000000324000__000000914E3F38F0\n000000067F00004002000140000000320000-000000067F00004002000140000000324000__000000931B9A2710\n000000067F000040020001400000003214C9-000000067F00004002000140000000329EC1__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000324000-000000067F00004002000140000000328000__000000914E3F38F0\n000000067F00004002000140000000324000-000000067F00004002000140000000328000__000000931B9A2710\n000000067F00004002000140000000328000-000000067F0000400200014000000032C000__000000914E3F38F0\n000000067F00004002000140000000328000-000000067F0000400200014000000032C000__000000931B9A2710\n000000067F00004002000140000000329EC1-000000067F0000400200014000000033289A__000000739A920D71-0000008D2DB5E0C1\n000000067F0000400200014000000032C000-000000067F00004002000140000000330000__000000914E3F38F0\n000000067F0000400200014000000032C000-000000067F00004002000140000000330000__000000931B9A2710\n000000067F00004002000140000000330000-000000067F00004002000140000000334000__000000914E3F38F0\n000000067F00004002000140000000330000-000000067F00004002000140000000334000__000000931B9A2710\n000000067F0000400200014000000033289A-000000067F0000400200014000000033B25C__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000334000-000000067F00004002000140000000338000__000000914E3F38F0\n000000067F00004002000140000000334000-000000067F00004002000140000000338000__000000931B9A2710\n000000067F00004002000140000000338000-000000067F0000400200014000000033C000__000000914E3F38F0\n000000067F00004002000140000000338000-000000067F0000400200014000000033C000__000000931B9A2710\n000000067F0000400200014000000033B25C-000000067F00004002000140000000343C39__000000739A920D71-0000008D2DB5E0C1\n000000067F0000400200014000000033C000-000000067F00004002000140000000340000__000000914E3F38F0\n000000067F0000400200014000000033C000-000000067F00004002000140000000340000__000000931B9A2710\n000000067F00004002000140000000340000-000000067F00004002000140000000344000__000000914E3F38F0\n000000067F00004002000140000000340000-000000067F00004002000140000000344000__000000931B9A2710\n000000067F00004002000140000000343C39-000000067F0000400200014000000034C60F__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000344000-000000067F00004002000140000000348000__000000914E3F38F0\n000000067F00004002000140000000344000-000000067F00004002000140000000348000__000000931B9A2710\n000000067F00004002000140000000348000-000000067F0000400200014000000034C000__000000914E3F38F0\n000000067F00004002000140000000348000-000000067F0000400200014000000034C000__000000931B9A2710\n000000067F0000400200014000000034C000-000000067F00004002000140000000350000__000000914E3F38F0\n000000067F0000400200014000000034C000-000000067F00004002000140000000350000__000000931B9A2710\n000000067F0000400200014000000034C60F-000000067F00004002000140000000354FEE__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000350000-000000067F00004002000140000000354000__000000914E3F38F0\n000000067F00004002000140000000350000-000000067F00004002000140000000354000__000000931B9A2710\n000000067F00004002000140000000354000-000000067F00004002000140000000358000__000000914E3F38F0\n000000067F00004002000140000000354000-000000067F00004002000140000000358000__000000931B9A2710\n000000067F00004002000140000000354FEE-000000067F0000400200014000000035D9E2__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000358000-000000067F0000400200014000000035C000__000000914E3F38F0\n000000067F00004002000140000000358000-000000067F0000400200014000000035C000__000000931B9A2710\n000000067F0000400200014000000035C000-000000067F00004002000140000000360000__000000914E3F38F0\n000000067F0000400200014000000035C000-000000067F00004002000140000000360000__000000931B9A2710\n000000067F0000400200014000000035D9E2-000000067F000040020001400000003663D8__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000360000-000000067F00004002000140000000364000__000000914E3F38F0\n000000067F00004002000140000000360000-000000067F00004002000140000000364000__000000931B9A2710\n000000067F00004002000140000000364000-000000067F00004002000140000000368000__000000914E3F38F0\n000000067F00004002000140000000364000-000000067F00004002000140000000368000__000000931B9A2710\n000000067F000040020001400000003663D8-000000067F0000400200014000000036EDB9__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000368000-000000067F0000400200014000000036C000__000000914E3F38F0\n000000067F00004002000140000000368000-000000067F0000400200014000000036C000__000000931B9A2710\n000000067F0000400200014000000036C000-000000067F00004002000140000000370000__000000914E3F38F0\n000000067F0000400200014000000036C000-000000067F00004002000140000000370000__000000931B9A2710\n000000067F0000400200014000000036EDB9-000000067F00004002000140000000377794__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000370000-000000067F00004002000140000000374000__000000914E3F38F0\n000000067F00004002000140000000370000-000000067F00004002000140000000374000__000000931B9A2710\n000000067F00004002000140000000374000-000000067F00004002000140000000378000__000000914E3F38F0\n000000067F00004002000140000000374000-000000067F00004002000140000000378000__000000931B9A2710\n000000067F00004002000140000000377794-000000067F00004002000140000000380157__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000378000-000000067F0000400200014000000037C000__000000914E3F38F0\n000000067F00004002000140000000378000-000000067F0000400200014000000037C000__000000931B9A2710\n000000067F0000400200014000000037C000-000000067F00004002000140000000380000__000000914E3F38F0\n000000067F0000400200014000000037C000-000000067F00004002000140000000380000__000000931B9A2710\n000000067F00004002000140000000380000-000000067F00004002000140000000384000__000000914E3F38F0\n000000067F00004002000140000000380000-000000067F00004002000140000000384000__000000931B9A2710\n000000067F00004002000140000000380157-000000067F00004002000140000000388B37__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000384000-000000067F00004002000140000000388000__000000914E3F38F0\n000000067F00004002000140000000384000-000000067F00004002000140000000388000__000000931B9A2710\n000000067F00004002000140000000388000-000000067F0000400200014000000038C000__000000914E3F38F0\n000000067F00004002000140000000388000-000000067F0000400200014000000038C000__000000931B9A2710\n000000067F00004002000140000000388B37-000000067F0000400200014000000039151E__000000739A920D71-0000008D2DB5E0C1\n000000067F0000400200014000000038C000-000000067F00004002000140000000390000__000000914E3F38F0\n000000067F0000400200014000000038C000-000000067F00004002000140000000390000__000000931B9A2710\n000000067F00004002000140000000390000-000000067F00004002000140000000394000__000000914E3F38F0\n000000067F00004002000140000000390000-000000067F00004002000140000000394000__000000931B9A2710\n000000067F0000400200014000000039151E-000000067F00004002000140000000399F01__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000394000-000000067F00004002000140000000398000__000000914E3F38F0\n000000067F00004002000140000000394000-000000067F00004002000140000000398000__000000931B9A2710\n000000067F00004002000140000000398000-000000067F0000400200014000000039C000__000000914E3F38F0\n000000067F00004002000140000000398000-000000067F0000400200014000000039C000__000000931B9A2710\n000000067F00004002000140000000399F01-000000067F000040020001400000003A28D5__000000739A920D71-0000008D2DB5E0C1\n000000067F0000400200014000000039C000-000000067F000040020001400000003A0000__000000914E3F38F0\n000000067F0000400200014000000039C000-000000067F000040020001400000003A0000__000000931B9A2710\n000000067F000040020001400000003A0000-000000067F000040020001400000003A4000__000000914E3F38F0\n000000067F000040020001400000003A0000-000000067F000040020001400000003A4000__000000931B9A2710\n000000067F000040020001400000003A28D5-000000067F000040020001400000003AB2B1__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000003A4000-000000067F000040020001400000003A8000__000000914E3F38F0\n000000067F000040020001400000003A4000-000000067F000040020001400000003A8000__000000931B9A2710\n000000067F000040020001400000003A8000-000000067F000040020001400000003AC000__000000914E3F38F0\n000000067F000040020001400000003A8000-000000067F000040020001400000003AC000__000000931B9A2710\n000000067F000040020001400000003AB2B1-000000067F000040020001400000003B3C78__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000003AC000-000000067F000040020001400000003B0000__000000914E3F38F0\n000000067F000040020001400000003AC000-000000067F000040020001400000003B0000__000000931B9A2710\n000000067F000040020001400000003B0000-000000067F000040020001400000003B4000__000000914E3F38F0\n000000067F000040020001400000003B0000-000000067F000040020001400000003B4000__000000931B9A2710\n000000067F000040020001400000003B3C78-000000067F000040020001400000003BC640__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000003B4000-000000067F000040020001400000003B8000__000000914E3F38F0\n000000067F000040020001400000003B4000-000000067F000040020001400000003B8000__000000931B9A2710\n000000067F000040020001400000003B8000-000000067F000040020001400000003BC000__000000914E3F38F0\n000000067F000040020001400000003B8000-000000067F000040020001400000003BC000__000000931B9A2710\n000000067F000040020001400000003BC000-000000067F000040020001400000003C0000__000000914E3F38F0\n000000067F000040020001400000003BC000-000000067F000040020001400000003C0000__000000931B9A2710\n000000067F000040020001400000003BC640-000000067F000040020001400000003C5027__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000003C0000-000000067F000040020001400000003C4000__000000914E3F38F0\n000000067F000040020001400000003C0000-000000067F000040020001400000003C4000__000000931B9A2710\n000000067F000040020001400000003C4000-000000067F000040020001400000003C8000__000000914E3F38F0\n000000067F000040020001400000003C4000-000000067F000040020001400000003C8000__000000931B9A2710\n000000067F000040020001400000003C5027-000000067F000040020001400000003CDA16__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000003C8000-000000067F000040020001400000003CC000__000000914E3F38F0\n000000067F000040020001400000003C8000-000000067F000040020001400000003CC000__000000931B9A2710\n000000067F000040020001400000003CC000-000000067F000040020001400000003D0000__000000914E3F38F0\n000000067F000040020001400000003CC000-000000067F000040020001400000003D0000__000000931B9A2710\n000000067F000040020001400000003CDA16-000000067F000040020001400000003D6401__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000003D0000-000000067F000040020001400000003D4000__000000914E3F38F0\n000000067F000040020001400000003D0000-000000067F000040020001400000003D4000__000000931B9A2710\n000000067F000040020001400000003D4000-000000067F000040020001400000003D8000__000000914E3F38F0\n000000067F000040020001400000003D4000-000000067F000040020001400000003D8000__000000931B9A2710\n000000067F000040020001400000003D6401-000000067F000040020001400000003DEDD4__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000003D8000-000000067F000040020001400000003DC000__000000914E3F38F0\n000000067F000040020001400000003D8000-000000067F000040020001400000003DC000__000000931B9A2710\n000000067F000040020001400000003DC000-000000067F000040020001400000003E0000__000000914E3F38F0\n000000067F000040020001400000003DC000-000000067F000040020001400000003E0000__000000931B9A2710\n000000067F000040020001400000003DEDD4-000000067F000040020001400000003E77A4__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000003E0000-000000067F000040020001400000003E4000__000000914E3F38F0\n000000067F000040020001400000003E0000-000000067F000040020001400000003E4000__000000931B9A2710\n000000067F000040020001400000003E4000-000000067F000040020001400000003E8000__000000914E3F38F0\n000000067F000040020001400000003E4000-000000067F000040020001400000003E8000__000000931B9A2710\n000000067F000040020001400000003E77A4-000000067F000040020001400000003F016A__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000003E8000-000000067F000040020001400000003EC000__000000914E3F38F0\n000000067F000040020001400000003E8000-000000067F000040020001400000003EC000__000000931B9A2710\n000000067F000040020001400000003EC000-000000067F000040020001400000003F0000__000000914E3F38F0\n000000067F000040020001400000003EC000-000000067F000040020001400000003F0000__000000931B9A2710\n000000067F000040020001400000003F0000-000000067F000040020001400000003F4000__000000914E3F38F0\n000000067F000040020001400000003F0000-000000067F000040020001400000003F4000__000000931B9A2710\n000000067F000040020001400000003F016A-000000067F000040020001400000003F8B44__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000003F4000-000000067F000040020001400000003F8000__000000914E3F38F0\n000000067F000040020001400000003F4000-000000067F000040020001400000003F8000__000000931B9A2710\n000000067F000040020001400000003F8000-000000067F000040020001400000003FC000__000000914E3F38F0\n000000067F000040020001400000003F8000-000000067F000040020001400000003FC000__000000931B9A2710\n000000067F000040020001400000003F8B44-000000067F0000400200014000000040152F__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000003FC000-000000067F00004002000140000000400000__000000914E3F38F0\n000000067F000040020001400000003FC000-000000067F00004002000140000000400000__000000931B9A2710\n000000067F00004002000140000000400000-000000067F00004002000140000000404000__000000914E3F38F0\n000000067F00004002000140000000400000-000000067F00004002000140000000404000__000000931B9A2710\n000000067F0000400200014000000040152F-000000067F00004002000140000000409F1B__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000404000-000000067F00004002000140000000408000__000000914E3F38F0\n000000067F00004002000140000000404000-000000067F00004002000140000000408000__000000931B9A2710\n000000067F00004002000140000000408000-000000067F0000400200014000000040C000__000000914E3F38F0\n000000067F00004002000140000000408000-000000067F0000400200014000000040C000__000000931B9A2710\n000000067F00004002000140000000409F1B-000000067F000040020001400000004128FB__000000739A920D71-0000008D2DB5E0C1\n000000067F0000400200014000000040C000-000000067F00004002000140000000410000__000000914E3F38F0\n000000067F0000400200014000000040C000-000000067F00004002000140000000410000__000000931B9A2710\n000000067F00004002000140000000410000-000000067F00004002000140000000414000__000000914E3F38F0\n000000067F00004002000140000000410000-000000067F00004002000140000000414000__000000931B9A2710\n000000067F000040020001400000004128FB-000000067F0000400200014000000041B2E2__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000414000-000000067F00004002000140000000418000__000000914E3F38F0\n000000067F00004002000140000000414000-000000067F00004002000140000000418000__000000931B9A2710\n000000067F00004002000140000000418000-000000067F0000400200014000000041C000__000000914E3F38F0\n000000067F00004002000140000000418000-000000067F0000400200014000000041C000__000000931B9A2710\n000000067F0000400200014000000041B2E2-000000067F00004002000140000000423CB0__000000739A920D71-0000008D2DB5E0C1\n000000067F0000400200014000000041C000-000000067F00004002000140000000420000__000000914E3F38F0\n000000067F0000400200014000000041C000-000000067F00004002000140000000420000__000000931B9A2710\n000000067F00004002000140000000420000-000000067F00004002000140000000424000__000000914E3F38F0\n000000067F00004002000140000000420000-000000067F00004002000140000000424000__000000931B9A2710\n000000067F00004002000140000000423CB0-000000067F0000400200014000000042C674__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000424000-000000067F00004002000140000000428000__000000914E3F38F0\n000000067F00004002000140000000424000-000000067F00004002000140000000428000__000000931B9A2710\n000000067F00004002000140000000428000-000000067F0000400200014000000042C000__000000914E3F38F0\n000000067F00004002000140000000428000-000000067F0000400200014000000042C000__000000931B9A2710\n000000067F0000400200014000000042C000-000000067F00004002000140000000430000__000000914E3F38F0\n000000067F0000400200014000000042C000-000000067F00004002000140000000430000__000000931B9A2710\n000000067F0000400200014000000042C674-000000067F00004002000140000000435044__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000430000-000000067F00004002000140000000434000__000000914E3F38F0\n000000067F00004002000140000000430000-000000067F00004002000140000000434000__000000931B9A2710\n000000067F00004002000140000000434000-000000067F00004002000140000000438000__000000914E3F38F0\n000000067F00004002000140000000434000-000000067F00004002000140000000438000__000000931B9A2710\n000000067F00004002000140000000435044-000000067F0000400200014000000043DA33__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000438000-000000067F0000400200014000000043C000__000000914E3F38F0\n000000067F00004002000140000000438000-000000067F0000400200014000000043C000__000000931B9A2710\n000000067F0000400200014000000043C000-000000067F00004002000140000000440000__000000914E3F38F0\n000000067F0000400200014000000043C000-000000067F00004002000140000000440000__000000931B9A2710\n000000067F0000400200014000000043DA33-000000067F0000400200014000000044641A__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000440000-000000067F00004002000140000000444000__000000914E3F38F0\n000000067F00004002000140000000440000-000000067F00004002000140000000444000__000000931B9A2710\n000000067F00004002000140000000444000-000000067F00004002000140000000448000__000000914E3F38F0\n000000067F00004002000140000000444000-000000067F00004002000140000000448000__000000931B9A2710\n000000067F0000400200014000000044641A-000000067F0000400200014000000044EDF8__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000448000-000000067F0000400200014000000044C000__000000914E3F38F0\n000000067F00004002000140000000448000-000000067F0000400200014000000044C000__000000931B9A2710\n000000067F0000400200014000000044C000-000000067F00004002000140000000450000__000000914E3F38F0\n000000067F0000400200014000000044C000-000000067F00004002000140000000450000__000000931B9A2710\n000000067F0000400200014000000044EDF8-000000067F000040020001400000004577D5__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000450000-000000067F00004002000140000000454000__000000914E3F38F0\n000000067F00004002000140000000450000-000000067F00004002000140000000454000__000000931B9A2710\n000000067F00004002000140000000454000-000000067F00004002000140000000458000__000000914E3F38F0\n000000067F00004002000140000000454000-000000067F00004002000140000000458000__000000931B9A2710\n000000067F000040020001400000004577D5-000000067F000040020001400000004601A6__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000458000-000000067F0000400200014000000045C000__000000914E3F38F0\n000000067F00004002000140000000458000-000000067F0000400200014000000045C000__000000931B9A2710\n000000067F0000400200014000000045C000-000000067F00004002000140000000460000__000000914E3F38F0\n000000067F0000400200014000000045C000-000000067F00004002000140000000460000__000000931B9A2710\n000000067F00004002000140000000460000-000000067F00004002000140000000464000__000000914E3F38F0\n000000067F00004002000140000000460000-000000067F00004002000140000000464000__000000931B9A2710\n000000067F000040020001400000004601A6-000000067F00004002000140000000468B73__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000464000-000000067F00004002000140000000468000__000000914E3F38F0\n000000067F00004002000140000000464000-000000067F00004002000140000000468000__000000931B9A2710\n000000067F00004002000140000000468000-000000067F0000400200014000000046C000__000000914E3F38F0\n000000067F00004002000140000000468000-000000067F0000400200014000000046C000__000000931B9A2710\n000000067F00004002000140000000468B73-000000067F00004002000140000000471550__000000739A920D71-0000008D2DB5E0C1\n000000067F0000400200014000000046C000-000000067F00004002000140000000470000__000000914E3F38F0\n000000067F0000400200014000000046C000-000000067F00004002000140000000470000__000000931B9A2710\n000000067F00004002000140000000470000-000000067F00004002000140000000474000__000000914E3F38F0\n000000067F00004002000140000000470000-000000067F00004002000140000000474000__000000931B9A2710\n000000067F00004002000140000000471550-000000067F00004002000140000000479F3B__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000474000-000000067F00004002000140000000478000__000000914E3F38F0\n000000067F00004002000140000000474000-000000067F00004002000140000000478000__000000931B9A2710\n000000067F00004002000140000000478000-000000067F0000400200014000000047C000__000000914E3F38F0\n000000067F00004002000140000000478000-000000067F0000400200014000000047C000__000000931B9A2710\n000000067F00004002000140000000479F3B-000000067F00004002000140000000482925__000000739A920D71-0000008D2DB5E0C1\n000000067F0000400200014000000047C000-000000067F00004002000140000000480000__000000914E3F38F0\n000000067F0000400200014000000047C000-000000067F00004002000140000000480000__000000931B9A2710\n000000067F00004002000140000000480000-000000067F00004002000140000000484000__000000914E3F38F0\n000000067F00004002000140000000480000-000000067F00004002000140000000484000__000000931B9A2710\n000000067F00004002000140000000482925-000000067F0000400200014000000048B308__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000484000-000000067F00004002000140000000488000__000000914E3F38F0\n000000067F00004002000140000000484000-000000067F00004002000140000000488000__000000931B9A2710\n000000067F00004002000140000000488000-000000067F0000400200014000000048C000__000000914E3F38F0\n000000067F00004002000140000000488000-000000067F0000400200014000000048C000__000000931B9A2710\n000000067F0000400200014000000048B308-000000067F00004002000140000000493CD0__000000739A920D71-0000008D2DB5E0C1\n000000067F0000400200014000000048C000-000000067F00004002000140000000490000__000000914E3F38F0\n000000067F0000400200014000000048C000-000000067F00004002000140000000490000__000000931B9A2710\n000000067F00004002000140000000490000-000000067F00004002000140000000494000__000000914E3F38F0\n000000067F00004002000140000000490000-000000067F00004002000140000000494000__000000931B9A2710\n000000067F00004002000140000000493CD0-000000067F0000400200014000000049C6A6__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000494000-000000067F00004002000140000000498000__000000914E3F38F0\n000000067F00004002000140000000494000-000000067F00004002000140000000498000__000000931B9A2710\n000000067F00004002000140000000498000-000000067F0000400200014000000049C000__000000914E3F38F0\n000000067F00004002000140000000498000-000000067F0000400200014000000049C000__000000931B9A2710\n000000067F0000400200014000000049C000-000000067F000040020001400000004A0000__000000914E3F38F0\n000000067F0000400200014000000049C000-000000067F000040020001400000004A0000__000000931B9A2710\n000000067F0000400200014000000049C6A6-000000067F000040020001400000004A506F__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000004A0000-000000067F000040020001400000004A4000__000000914E3F38F0\n000000067F000040020001400000004A0000-000000067F000040020001400000004A4000__000000931B9A2710\n000000067F000040020001400000004A4000-000000067F000040020001400000004A8000__000000914E3F38F0\n000000067F000040020001400000004A4000-000000067F000040020001400000004A8000__000000931B9A2710\n000000067F000040020001400000004A506F-000000067F000040020001400000004ADA52__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000004A8000-000000067F000040020001400000004AC000__000000914E3F38F0\n000000067F000040020001400000004A8000-000000067F000040020001400000004AC000__000000931B9A2710\n000000067F000040020001400000004AC000-000000067F000040020001400000004B0000__000000914E3F38F0\n000000067F000040020001400000004AC000-000000067F000040020001400000004B0000__000000931B9A2710\n000000067F000040020001400000004ADA52-000000067F000040020001400000004B6437__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000004B0000-000000067F000040020001400000004B4000__000000914E3F38F0\n000000067F000040020001400000004B0000-000000067F000040020001400000004B4000__000000931B9A2710\n000000067F000040020001400000004B4000-000000067F000040020001400000004B8000__000000914E3F38F0\n000000067F000040020001400000004B4000-000000067F000040020001400000004B8000__000000931B9A2710\n000000067F000040020001400000004B6437-000000067F000040020001400000004BEE1E__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000004B8000-000000067F000040020001400000004BC000__000000914E3F38F0\n000000067F000040020001400000004B8000-000000067F000040020001400000004BC000__000000931B9A2710\n000000067F000040020001400000004BC000-000000067F000040020001400000004C0000__000000914E3F38F0\n000000067F000040020001400000004BC000-000000067F000040020001400000004C0000__000000931B9A2710\n000000067F000040020001400000004BEE1E-000000067F000040020001400000004C77FB__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000004C0000-000000067F000040020001400000004C4000__000000914E3F38F0\n000000067F000040020001400000004C0000-000000067F000040020001400000004C4000__000000931B9A2710\n000000067F000040020001400000004C4000-000000067F000040020001400000004C8000__000000914E3F38F0\n000000067F000040020001400000004C4000-000000067F000040020001400000004C8000__000000931B9A2710\n000000067F000040020001400000004C77FB-000000067F000040020001400000004D01CF__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000004C8000-000000067F000040020001400000004CC000__000000914E3F38F0\n000000067F000040020001400000004C8000-000000067F000040020001400000004CC000__000000931B9A2710\n000000067F000040020001400000004CC000-000000067F000040020001400000004D0000__000000914E3F38F0\n000000067F000040020001400000004CC000-000000067F000040020001400000004D0000__000000931B9A2710\n000000067F000040020001400000004D0000-000000067F000040020001400000004D4000__000000914E3F38F0\n000000067F000040020001400000004D0000-000000067F000040020001400000004D4000__000000931B9A2710\n000000067F000040020001400000004D01CF-000000067F000040020001400000004D8B9F__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000004D4000-000000067F000040020001400000004D8000__000000914E3F38F0\n000000067F000040020001400000004D4000-000000067F000040020001400000004D8000__000000931B9A2710\n000000067F000040020001400000004D8000-000000067F000040020001400000004DC000__000000914E3F38F0\n000000067F000040020001400000004D8000-000000067F000040020001400000004DC000__000000931B9A2710\n000000067F000040020001400000004D8B9F-000000067F000040020001400000004E1565__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000004DC000-000000067F000040020001400000004E0000__000000914E3F38F0\n000000067F000040020001400000004DC000-000000067F000040020001400000004E0000__000000931B9A2710\n000000067F000040020001400000004E0000-000000067F000040020001400000004E4000__000000914E3F38F0\n000000067F000040020001400000004E0000-000000067F000040020001400000004E4000__000000931B9A2710\n000000067F000040020001400000004E1565-000000067F000040020001400000004E9F47__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000004E4000-000000067F000040020001400000004E8000__000000914E3F38F0\n000000067F000040020001400000004E4000-000000067F000040020001400000004E8000__000000931B9A2710\n000000067F000040020001400000004E8000-000000067F000040020001400000004EC000__000000914E3F38F0\n000000067F000040020001400000004E8000-000000067F000040020001400000004EC000__000000931B9A2710\n000000067F000040020001400000004E9F47-000000067F000040020001400000004F2937__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000004EC000-000000067F000040020001400000004F0000__000000914E3F38F0\n000000067F000040020001400000004EC000-000000067F000040020001400000004F0000__000000931B9A2710\n000000067F000040020001400000004F0000-000000067F000040020001400000004F4000__000000914E3F38F0\n000000067F000040020001400000004F0000-000000067F000040020001400000004F4000__000000931B9A2710\n000000067F000040020001400000004F2937-000000067F000040020001400000004FB31B__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000004F4000-000000067F000040020001400000004F8000__000000914E3F38F0\n000000067F000040020001400000004F4000-000000067F000040020001400000004F8000__000000931B9A2710\n000000067F000040020001400000004F8000-000000067F000040020001400000004FC000__000000914E3F38F0\n000000067F000040020001400000004F8000-000000067F000040020001400000004FC000__000000931B9A2710\n000000067F000040020001400000004FB31B-000000067F00004002000140000000503CF8__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000004FC000-000000067F00004002000140000000500000__000000914E3F38F0\n000000067F000040020001400000004FC000-000000067F00004002000140000000500000__000000931B9A2710\n000000067F00004002000140000000500000-000000067F00004002000140000000504000__000000914E3F38F0\n000000067F00004002000140000000500000-000000067F00004002000140000000504000__000000931B9A2710\n000000067F00004002000140000000503CF8-000000067F0000400200014000000050C6D3__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000504000-000000067F00004002000140000000508000__000000914E3F38F0\n000000067F00004002000140000000504000-000000067F00004002000140000000508000__000000931B9A2710\n000000067F00004002000140000000508000-000000067F0000400200014000000050C000__000000914E3F38F0\n000000067F00004002000140000000508000-000000067F0000400200014000000050C000__000000931B9A2710\n000000067F0000400200014000000050C000-000000067F00004002000140000000510000__000000914E3F38F0\n000000067F0000400200014000000050C000-000000067F00004002000140000000510000__000000931B9A2710\n000000067F0000400200014000000050C6D3-000000067F000040020001400000005150A6__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000510000-000000067F00004002000140000000514000__000000914E3F38F0\n000000067F00004002000140000000510000-000000067F00004002000140000000514000__000000931B9A2710\n000000067F00004002000140000000514000-000000067F00004002000140000000518000__000000914E3F38F0\n000000067F00004002000140000000514000-000000067F00004002000140000000518000__000000931B9A2710\n000000067F000040020001400000005150A6-000000067F0000400200014000000051DA77__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000518000-000000067F0000400200014000000051C000__000000914E3F38F0\n000000067F00004002000140000000518000-000000067F0000400200014000000051C000__000000931B9A2710\n000000067F0000400200014000000051C000-000000067F00004002000140000000520000__000000914E3F38F0\n000000067F0000400200014000000051C000-000000067F00004002000140000000520000__000000931B9A2710\n000000067F0000400200014000000051DA77-000000067F0000400200014000000052645E__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000520000-000000067F00004002000140000000524000__000000914E3F38F0\n000000067F00004002000140000000520000-000000067F00004002000140000000524000__000000931B9A2710\n000000067F00004002000140000000524000-000000067F00004002000140000000528000__000000914E3F38F0\n000000067F00004002000140000000524000-000000067F00004002000140000000528000__000000931B9A2710\n000000067F0000400200014000000052645E-000000067F0000400200014000000052EE48__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000528000-000000067F0000400200014000000052C000__000000914E3F38F0\n000000067F00004002000140000000528000-000000067F0000400200014000000052C000__000000931B9A2710\n000000067F0000400200014000000052C000-000000067F00004002000140000000530000__000000914E3F38F0\n000000067F0000400200014000000052C000-000000067F00004002000140000000530000__000000931B9A2710\n000000067F0000400200014000000052EE48-000000067F00004002000140000000537826__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000530000-000000067F00004002000140000000534000__000000914E3F38F0\n000000067F00004002000140000000530000-000000067F00004002000140000000534000__000000931B9A2710\n000000067F00004002000140000000534000-000000067F00004002000140000000538000__000000914E3F38F0\n000000067F00004002000140000000534000-000000067F00004002000140000000538000__000000931B9A2710\n000000067F00004002000140000000537826-000000067F00004002000140000000540201__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000538000-000000067F0000400200014000000053C000__000000914E3F38F0\n000000067F00004002000140000000538000-000000067F0000400200014000000053C000__000000931B9A2710\n000000067F0000400200014000000053C000-000000067F00004002000140000000540000__000000914E3F38F0\n000000067F0000400200014000000053C000-000000067F00004002000140000000540000__000000931B9A2710\n000000067F00004002000140000000540000-000000067F00004002000140000000544000__000000914E3F38F0\n000000067F00004002000140000000540000-000000067F00004002000140000000544000__000000931B9A2710\n000000067F00004002000140000000540201-000000067F00004002000140000000548BCA__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000544000-000000067F00004002000140000000548000__000000914E3F38F0\n000000067F00004002000140000000544000-000000067F00004002000140000000548000__000000931B9A2710\n000000067F00004002000140000000548000-000000067F0000400200014000000054C000__000000914E3F38F0\n000000067F00004002000140000000548000-000000067F0000400200014000000054C000__000000931B9A2710\n000000067F00004002000140000000548BCA-000000067F0000400200014000000055159D__000000739A920D71-0000008D2DB5E0C1\n000000067F0000400200014000000054C000-000000067F00004002000140000000550000__000000914E3F38F0\n000000067F0000400200014000000054C000-000000067F00004002000140000000550000__000000931B9A2710\n000000067F00004002000140000000550000-000000067F00004002000140000000554000__000000914E3F38F0\n000000067F00004002000140000000550000-000000067F00004002000140000000554000__000000931B9A2710\n000000067F0000400200014000000055159D-000000067F00004002000140000000559F6D__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000554000-000000067F00004002000140000000558000__000000914E3F38F0\n000000067F00004002000140000000554000-000000067F00004002000140000000558000__000000931B9A2710\n000000067F00004002000140000000558000-000000067F0000400200014000000055C000__000000914E3F38F0\n000000067F00004002000140000000558000-000000067F0000400200014000000055C000__000000931B9A2710\n000000067F00004002000140000000559F6D-000000067F00004002000140000000562956__000000739A920D71-0000008D2DB5E0C1\n000000067F0000400200014000000055C000-000000067F00004002000140000000560000__000000914E3F38F0\n000000067F0000400200014000000055C000-000000067F00004002000140000000560000__000000931B9A2710\n000000067F00004002000140000000560000-000000067F00004002000140000000564000__000000914E3F38F0\n000000067F00004002000140000000560000-000000067F00004002000140000000564000__000000931B9A2710\n000000067F00004002000140000000562956-000000067F0000400200014000000056B340__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000564000-000000067F00004002000140000000568000__000000914E3F38F0\n000000067F00004002000140000000564000-000000067F00004002000140000000568000__000000931B9A2710\n000000067F00004002000140000000568000-000000067F0000400200014000000056C000__000000914E3F38F0\n000000067F00004002000140000000568000-000000067F0000400200014000000056C000__000000931B9A2710\n000000067F0000400200014000000056B340-000000067F00004002000140000000573D1E__000000739A920D71-0000008D2DB5E0C1\n000000067F0000400200014000000056C000-000000067F00004002000140000000570000__000000914E3F38F0\n000000067F0000400200014000000056C000-000000067F00004002000140000000570000__000000931B9A2710\n000000067F00004002000140000000570000-000000067F00004002000140000000574000__000000914E3F38F0\n000000067F00004002000140000000570000-000000067F00004002000140000000574000__000000931B9A2710\n000000067F00004002000140000000573D1E-000000067F0000400200014000000057C6F0__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000574000-000000067F00004002000140000000578000__000000914E3F38F0\n000000067F00004002000140000000574000-000000067F00004002000140000000578000__000000931B9A2710\n000000067F00004002000140000000578000-000000067F0000400200014000000057C000__000000914E3F38F0\n000000067F00004002000140000000578000-000000067F0000400200014000000057C000__000000931B9A2710\n000000067F0000400200014000000057C000-000000067F00004002000140000000580000__000000914E3F38F0\n000000067F0000400200014000000057C000-000000067F00004002000140000000580000__000000931B9A2710\n000000067F0000400200014000000057C6F0-000000067F000040020001400000005850C8__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000580000-000000067F00004002000140000000584000__000000914E3F38F0\n000000067F00004002000140000000580000-000000067F00004002000140000000584000__000000931B9A2710\n000000067F00004002000140000000584000-000000067F00004002000140000000588000__000000914E3F38F0\n000000067F00004002000140000000584000-000000067F00004002000140000000588000__000000931B9A2710\n000000067F000040020001400000005850C8-000000067F0000400200014000000058DA94__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000588000-000000067F0000400200014000000058C000__000000914E3F38F0\n000000067F00004002000140000000588000-000000067F0000400200014000000058C000__000000931B9A2710\n000000067F0000400200014000000058C000-000000067F00004002000140000000590000__000000914E3F38F0\n000000067F0000400200014000000058C000-000000067F00004002000140000000590000__000000931B9A2710\n000000067F0000400200014000000058DA94-000000067F00004002000140000000596465__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000590000-000000067F00004002000140000000594000__000000914E3F38F0\n000000067F00004002000140000000590000-000000067F00004002000140000000594000__000000931B9A2710\n000000067F00004002000140000000594000-000000067F00004002000140000000598000__000000914E3F38F0\n000000067F00004002000140000000594000-000000067F00004002000140000000598000__000000931B9A2710\n000000067F00004002000140000000596465-000000067F0000400200014000000059EE53__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000598000-000000067F0000400200014000000059C000__000000914E3F38F0\n000000067F00004002000140000000598000-000000067F0000400200014000000059C000__000000931B9A2710\n000000067F0000400200014000000059C000-000000067F000040020001400000005A0000__000000914E3F38F0\n000000067F0000400200014000000059C000-000000067F000040020001400000005A0000__000000931B9A2710\n000000067F0000400200014000000059EE53-000000067F000040020001400000005A783C__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000005A0000-000000067F000040020001400000005A4000__000000914E3F38F0\n000000067F000040020001400000005A0000-000000067F000040020001400000005A4000__000000931B9A2710\n000000067F000040020001400000005A4000-000000067F000040020001400000005A8000__000000914E3F38F0\n000000067F000040020001400000005A4000-000000067F000040020001400000005A8000__000000931B9A2710\n000000067F000040020001400000005A783C-000000067F000040020001400000005B0217__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000005A8000-000000067F000040020001400000005AC000__000000914E3F38F0\n000000067F000040020001400000005A8000-000000067F000040020001400000005AC000__000000931B9A2710\n000000067F000040020001400000005AC000-000000067F000040020001400000005B0000__000000914E3F38F0\n000000067F000040020001400000005AC000-000000067F000040020001400000005B0000__000000931B9A2710\n000000067F000040020001400000005B0000-000000067F000040020001400000005B4000__000000914E3F38F0\n000000067F000040020001400000005B0000-000000067F000040020001400000005B4000__000000931B9A2710\n000000067F000040020001400000005B0217-000000067F000040020001400000005B8BF1__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000005B4000-000000067F000040020001400000005B8000__000000914E3F38F0\n000000067F000040020001400000005B4000-000000067F000040020001400000005B8000__000000931B9A2710\n000000067F000040020001400000005B8000-000000067F000040020001400000005BC000__000000914E3F38F0\n000000067F000040020001400000005B8000-000000067F000040020001400000005BC000__000000931B9A2710\n000000067F000040020001400000005B8BF1-000000067F000040020001400000005C15C5__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000005BC000-000000067F000040020001400000005C0000__000000914E3F38F0\n000000067F000040020001400000005BC000-000000067F000040020001400000005C0000__000000931B9A2710\n000000067F000040020001400000005C0000-000000067F000040020001400000005C4000__000000914E3F38F0\n000000067F000040020001400000005C0000-000000067F000040020001400000005C4000__000000931B9A2710\n000000067F000040020001400000005C15C5-000000067F000040020001400000005C9F94__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000005C4000-000000067F000040020001400000005C8000__000000914E3F38F0\n000000067F000040020001400000005C4000-000000067F000040020001400000005C8000__000000931B9A2710\n000000067F000040020001400000005C8000-000000067F000040020001400000005CC000__000000914E3F38F0\n000000067F000040020001400000005C8000-000000067F000040020001400000005CC000__000000931B9A2710\n000000067F000040020001400000005C9F94-000000067F000040020001400000005D2970__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000005CC000-000000067F000040020001400000005D0000__000000914E3F38F0\n000000067F000040020001400000005CC000-000000067F000040020001400000005D0000__000000931B9A2710\n000000067F000040020001400000005D0000-000000067F000040020001400000005D4000__000000914E3F38F0\n000000067F000040020001400000005D0000-000000067F000040020001400000005D4000__000000931B9A2710\n000000067F000040020001400000005D2970-000000067F000040020001400000005DB35D__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000005D4000-000000067F000040020001400000005D8000__000000914E3F38F0\n000000067F000040020001400000005D4000-000000067F000040020001400000005D8000__000000931B9A2710\n000000067F000040020001400000005D8000-000000067F000040020001400000005DC000__000000914E3F38F0\n000000067F000040020001400000005D8000-000000067F000040020001400000005DC000__000000931B9A2710\n000000067F000040020001400000005DB35D-000000067F000040020001400000005E3D3C__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000005DC000-000000067F000040020001400000005E0000__000000914E3F38F0\n000000067F000040020001400000005DC000-000000067F000040020001400000005E0000__000000931B9A2710\n000000067F000040020001400000005E0000-000000067F000040020001400000005E4000__000000914E3F38F0\n000000067F000040020001400000005E0000-000000067F000040020001400000005E4000__000000931B9A2710\n000000067F000040020001400000005E3D3C-000000067F000040020001400000005EC713__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000005E4000-000000067F000040020001400000005E8000__000000914E3F38F0\n000000067F000040020001400000005E4000-000000067F000040020001400000005E8000__000000931B9A2710\n000000067F000040020001400000005E8000-000000067F000040020001400000005EC000__000000914E3F38F0\n000000067F000040020001400000005E8000-000000067F000040020001400000005EC000__000000931B9A2710\n000000067F000040020001400000005EC000-000000067F000040020001400000005F0000__000000914E3F38F0\n000000067F000040020001400000005EC000-000000067F000040020001400000005F0000__000000931B9A2710\n000000067F000040020001400000005EC713-000000067F000040020001400000005F50E5__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000005F0000-000000067F000040020001400000005F4000__000000914E3F38F0\n000000067F000040020001400000005F0000-000000067F000040020001400000005F4000__000000931B9A2710\n000000067F000040020001400000005F4000-000000067F000040020001400000005F8000__000000914E3F38F0\n000000067F000040020001400000005F4000-000000067F000040020001400000005F8000__000000931B9A2710\n000000067F000040020001400000005F50E5-000000067F000040020001400000005FDAC2__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000005F8000-000000067F000040020001400000005FC000__000000914E3F38F0\n000000067F000040020001400000005F8000-000000067F000040020001400000005FC000__000000931B9A2710\n000000067F000040020001400000005FC000-000000067F00004002000140000000600000__000000914E3F38F0\n000000067F000040020001400000005FC000-000000067F00004002000140000000600000__000000931B9A2710\n000000067F000040020001400000005FDAC2-000000067F0000400200014000000060648F__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000600000-000000067F00004002000140000000604000__000000914E3F38F0\n000000067F00004002000140000000600000-000000067F00004002000140000000604000__000000931B9A2710\n000000067F00004002000140000000604000-000000067F00004002000140000000608000__000000914E3F38F0\n000000067F00004002000140000000604000-000000067F00004002000140000000608000__000000931B9A2710\n000000067F0000400200014000000060648F-000000067F0000400200014000000060EE6E__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000608000-000000067F0000400200014000000060C000__000000914E3F38F0\n000000067F00004002000140000000608000-000000067F0000400200014000000060C000__000000931B9A2710\n000000067F0000400200014000000060C000-000000067F00004002000140000000610000__000000914E3F38F0\n000000067F0000400200014000000060C000-000000067F00004002000140000000610000__000000931B9A2710\n000000067F0000400200014000000060EE6E-000000067F00004002000140000000617862__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000610000-000000067F00004002000140000000614000__000000914E3F38F0\n000000067F00004002000140000000610000-000000067F00004002000140000000614000__000000931B9A2710\n000000067F00004002000140000000614000-000000067F00004002000140000000618000__000000914E3F38F0\n000000067F00004002000140000000614000-000000067F00004002000140000000618000__000000931B9A2710\n000000067F00004002000140000000617862-000000067F0000400200014000000062024A__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000618000-000000067F0000400200014000000061C000__000000914E3F38F0\n000000067F00004002000140000000618000-000000067F0000400200014000000061C000__000000931B9A2710\n000000067F0000400200014000000061C000-000000067F00004002000140000000620000__000000914E3F38F0\n000000067F0000400200014000000061C000-000000067F00004002000140000000620000__000000931B9A2710\n000000067F00004002000140000000620000-000000067F00004002000140000000624000__000000914E3F38F0\n000000067F00004002000140000000620000-000000067F00004002000140000000624000__000000931B9A2710\n000000067F0000400200014000000062024A-000000067F00004002000140000000628C1D__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000624000-000000067F00004002000140000000628000__000000914E3F38F0\n000000067F00004002000140000000624000-000000067F00004002000140000000628000__000000931B9A2710\n000000067F00004002000140000000628000-000000067F0000400200014000000062C000__000000914E3F38F0\n000000067F00004002000140000000628000-000000067F0000400200014000000062C000__000000931B9A2710\n000000067F00004002000140000000628C1D-000000067F000040020001400000006315E2__000000739A920D71-0000008D2DB5E0C1\n000000067F0000400200014000000062C000-000000067F00004002000140000000630000__000000914E3F38F0\n000000067F0000400200014000000062C000-000000067F00004002000140000000630000__000000931B9A2710\n000000067F00004002000140000000630000-000000067F00004002000140000000634000__000000914E3F38F0\n000000067F00004002000140000000630000-000000067F00004002000140000000634000__000000931B9A2710\n000000067F000040020001400000006315E2-000000067F00004002000140000000639FBE__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000634000-000000067F00004002000140000000638000__000000914E3F38F0\n000000067F00004002000140000000634000-000000067F00004002000140000000638000__000000931B9A2710\n000000067F00004002000140000000638000-000000067F0000400200014000000063C000__000000914E3F38F0\n000000067F00004002000140000000638000-000000067F0000400200014000000063C000__000000931B9A2710\n000000067F00004002000140000000639FBE-000000067F00004002000140000000642995__000000739A920D71-0000008D2DB5E0C1\n000000067F0000400200014000000063C000-000000067F00004002000140000000640000__000000914E3F38F0\n000000067F0000400200014000000063C000-000000067F00004002000140000000640000__000000931B9A2710\n000000067F00004002000140000000640000-000000067F00004002000140000000644000__000000914E3F38F0\n000000067F00004002000140000000640000-000000067F00004002000140000000644000__000000931B9A2710\n000000067F00004002000140000000642995-000000067F0000400200014000000064B370__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000644000-000000067F00004002000140000000648000__000000914E3F38F0\n000000067F00004002000140000000644000-000000067F00004002000140000000648000__000000931B9A2710\n000000067F00004002000140000000648000-000000067F0000400200014000000064C000__000000914E3F38F0\n000000067F00004002000140000000648000-000000067F0000400200014000000064C000__000000931B9A2710\n000000067F0000400200014000000064B370-000000067F00004002000140000000653D64__000000739A920D71-0000008D2DB5E0C1\n000000067F0000400200014000000064C000-000000067F00004002000140000000650000__000000914E3F38F0\n000000067F0000400200014000000064C000-000000067F00004002000140000000650000__000000931B9A2710\n000000067F00004002000140000000650000-000000067F00004002000140000000654000__000000914E3F38F0\n000000067F00004002000140000000650000-000000067F00004002000140000000654000__000000931B9A2710\n000000067F00004002000140000000653D64-000000067F0000400200014000000065C74F__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000654000-000000067F00004002000140000000658000__000000914E3F38F0\n000000067F00004002000140000000654000-000000067F00004002000140000000658000__000000931B9A2710\n000000067F00004002000140000000658000-000000067F0000400200014000000065C000__000000914E3F38F0\n000000067F00004002000140000000658000-000000067F0000400200014000000065C000__000000931B9A2710\n000000067F0000400200014000000065C000-000000067F00004002000140000000660000__000000914E3F38F0\n000000067F0000400200014000000065C000-000000067F00004002000140000000660000__000000931B9A2710\n000000067F0000400200014000000065C74F-000000067F00004002000140000000665130__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000660000-000000067F00004002000140000000664000__000000914E3F38F0\n000000067F00004002000140000000660000-000000067F00004002000140000000664000__000000931B9A2710\n000000067F00004002000140000000664000-000000067F00004002000140000000668000__000000914E3F38F0\n000000067F00004002000140000000664000-000000067F00004002000140000000668000__000000931B9A2710\n000000067F00004002000140000000665130-000000067F0000400200014000000066DAFC__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000668000-000000067F0000400200014000000066C000__000000914E3F38F0\n000000067F00004002000140000000668000-000000067F0000400200014000000066C000__000000931B9A2710\n000000067F0000400200014000000066C000-000000067F00004002000140000000670000__000000914E3F38F0\n000000067F0000400200014000000066C000-000000067F00004002000140000000670000__000000931B9A2710\n000000067F0000400200014000000066DAFC-000000067F000040020001400000006764CD__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000670000-000000067F00004002000140000000674000__000000914E3F38F0\n000000067F00004002000140000000670000-000000067F00004002000140000000674000__000000931B9A2710\n000000067F00004002000140000000674000-000000067F00004002000140000000678000__000000914E3F38F0\n000000067F00004002000140000000674000-000000067F00004002000140000000678000__000000931B9A2710\n000000067F000040020001400000006764CD-000000067F0000400200014000000067EEA5__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000678000-000000067F0000400200014000000067C000__000000914E3F38F0\n000000067F00004002000140000000678000-000000067F0000400200014000000067C000__000000931B9A2710\n000000067F0000400200014000000067C000-000000067F00004002000140000000680000__000000914E3F38F0\n000000067F0000400200014000000067C000-000000067F00004002000140000000680000__000000931B9A2710\n000000067F0000400200014000000067EEA5-000000067F0000400200014000000068788B__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000680000-000000067F00004002000140000000684000__000000914E3F38F0\n000000067F00004002000140000000680000-000000067F00004002000140000000684000__000000931B9A2710\n000000067F00004002000140000000684000-000000067F00004002000140000000688000__000000914E3F38F0\n000000067F00004002000140000000684000-000000067F00004002000140000000688000__000000931B9A2710\n000000067F0000400200014000000068788B-000000067F0000400200014000000069026F__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000688000-000000067F0000400200014000000068C000__000000914E3F38F0\n000000067F00004002000140000000688000-000000067F0000400200014000000068C000__000000931B9A2710\n000000067F0000400200014000000068C000-000000067F00004002000140000000690000__000000914E3F38F0\n000000067F0000400200014000000068C000-000000067F00004002000140000000690000__000000931B9A2710\n000000067F00004002000140000000690000-000000067F00004002000140000000694000__000000914E3F38F0\n000000067F00004002000140000000690000-000000067F00004002000140000000694000__000000931B9A2710\n000000067F0000400200014000000069026F-000000067F00004002000140000000698C51__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000694000-000000067F00004002000140000000698000__000000914E3F38F0\n000000067F00004002000140000000694000-000000067F00004002000140000000698000__000000931B9A2710\n000000067F00004002000140000000698000-000000067F0000400200014000000069C000__000000914E3F38F0\n000000067F00004002000140000000698000-000000067F0000400200014000000069C000__000000931B9A2710\n000000067F00004002000140000000698C51-000000067F000040020001400000006A1635__000000739A920D71-0000008D2DB5E0C1\n000000067F0000400200014000000069C000-000000067F000040020001400000006A0000__000000914E3F38F0\n000000067F0000400200014000000069C000-000000067F000040020001400000006A0000__000000931B9A2710\n000000067F000040020001400000006A0000-000000067F000040020001400000006A4000__000000914E3F38F0\n000000067F000040020001400000006A0000-000000067F000040020001400000006A4000__000000931B9A2710\n000000067F000040020001400000006A1635-000000067F000040020001400000006AA005__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000006A4000-000000067F000040020001400000006A8000__000000914E3F38F0\n000000067F000040020001400000006A4000-000000067F000040020001400000006A8000__000000931B9A2710\n000000067F000040020001400000006A8000-000000067F000040020001400000006AC000__000000914E3F38F0\n000000067F000040020001400000006A8000-000000067F000040020001400000006AC000__000000931B9A2710\n000000067F000040020001400000006AA005-000000067F000040020001400000006B29BB__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000006AC000-000000067F000040020001400000006B0000__000000914E3F38F0\n000000067F000040020001400000006AC000-000000067F000040020001400000006B0000__000000931B9A2710\n000000067F000040020001400000006B0000-000000067F000040020001400000006B4000__000000914E3F38F0\n000000067F000040020001400000006B0000-000000067F000040020001400000006B4000__000000931B9A2710\n000000067F000040020001400000006B29BB-000000067F000040020001400000006BB38D__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000006B4000-000000067F000040020001400000006B8000__000000914E3F38F0\n000000067F000040020001400000006B4000-000000067F000040020001400000006B8000__000000931B9A2710\n000000067F000040020001400000006B8000-000000067F000040020001400000006BC000__000000914E3F38F0\n000000067F000040020001400000006B8000-000000067F000040020001400000006BC000__000000931B9A2710\n000000067F000040020001400000006BB38D-000000067F000040020001400000006C3D79__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000006BC000-000000067F000040020001400000006C0000__000000914E3F38F0\n000000067F000040020001400000006BC000-000000067F000040020001400000006C0000__000000931B9A2710\n000000067F000040020001400000006C0000-000000067F000040020001400000006C4000__000000914E3F38F0\n000000067F000040020001400000006C0000-000000067F000040020001400000006C4000__000000931B9A2710\n000000067F000040020001400000006C3D79-000000067F000040020001400000006CC765__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000006C4000-000000067F000040020001400000006C8000__000000914E3F38F0\n000000067F000040020001400000006C4000-000000067F000040020001400000006C8000__000000931B9A2710\n000000067F000040020001400000006C8000-000000067F000040020001400000006CC000__000000914E3F38F0\n000000067F000040020001400000006C8000-000000067F000040020001400000006CC000__000000931B9A2710\n000000067F000040020001400000006CC000-000000067F000040020001400000006D0000__000000914E3F38F0\n000000067F000040020001400000006CC000-000000067F000040020001400000006D0000__000000931B9A2710\n000000067F000040020001400000006CC765-000000067F000040020001400000006D514B__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000006D0000-000000067F000040020001400000006D4000__000000914E3F38F0\n000000067F000040020001400000006D0000-000000067F000040020001400000006D4000__000000931B9A2710\n000000067F000040020001400000006D4000-000000067F000040020001400000006D8000__000000914E3F38F0\n000000067F000040020001400000006D4000-000000067F000040020001400000006D8000__000000931B9A2710\n000000067F000040020001400000006D514B-000000067F000040020001400000006DDB2A__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000006D8000-000000067F000040020001400000006DC000__000000914E3F38F0\n000000067F000040020001400000006D8000-000000067F000040020001400000006DC000__000000931B9A2710\n000000067F000040020001400000006DC000-000000067F000040020001400000006E0000__000000914E3F38F0\n000000067F000040020001400000006DC000-000000067F000040020001400000006E0000__000000931B9A2710\n000000067F000040020001400000006DDB2A-000000067F000040020001400000006E64F5__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000006E0000-000000067F000040020001400000006E4000__000000914E3F38F0\n000000067F000040020001400000006E0000-000000067F000040020001400000006E4000__000000931B9A2710\n000000067F000040020001400000006E4000-000000067F000040020001400000006E8000__000000914E3F38F0\n000000067F000040020001400000006E4000-000000067F000040020001400000006E8000__000000931B9A2710\n000000067F000040020001400000006E64F5-000000067F000040020001400000006EEEC0__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000006E8000-000000067F000040020001400000006EC000__000000914E3F38F0\n000000067F000040020001400000006E8000-000000067F000040020001400000006EC000__000000931B9A2710\n000000067F000040020001400000006EC000-000000067F000040020001400000006F0000__000000914E3F38F0\n000000067F000040020001400000006EC000-000000067F000040020001400000006F0000__000000931B9A2710\n000000067F000040020001400000006EEEC0-000000067F000040020001400000006F7891__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000006F0000-000000067F000040020001400000006F4000__000000914E3F38F0\n000000067F000040020001400000006F0000-000000067F000040020001400000006F4000__000000931B9A2710\n000000067F000040020001400000006F4000-000000067F000040020001400000006F8000__000000914E3F38F0\n000000067F000040020001400000006F4000-000000067F000040020001400000006F8000__000000931B9A2710\n000000067F000040020001400000006F7891-000000067F00004002000140000000700279__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000006F8000-000000067F000040020001400000006FC000__000000914E3F38F0\n000000067F000040020001400000006F8000-000000067F000040020001400000006FC000__000000931B9A2710\n000000067F000040020001400000006FC000-000000067F00004002000140000000700000__000000914E3F38F0\n000000067F000040020001400000006FC000-000000067F00004002000140000000700000__000000931B9A2710\n000000067F00004002000140000000700000-000000067F00004002000140000000704000__000000914E3F38F0\n000000067F00004002000140000000700000-000000067F00004002000140000000704000__000000931B9A2710\n000000067F00004002000140000000700279-000000067F00004002000140000000708C68__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000704000-000000067F00004002000140000000708000__000000914E3F38F0\n000000067F00004002000140000000704000-000000067F00004002000140000000708000__000000931B9A2710\n000000067F00004002000140000000708000-000000067F0000400200014000000070C000__000000914E3F38F0\n000000067F00004002000140000000708000-000000067F0000400200014000000070C000__000000931B9A2710\n000000067F00004002000140000000708C68-000000067F00004002000140000000711656__000000739A920D71-0000008D2DB5E0C1\n000000067F0000400200014000000070C000-000000067F00004002000140000000710000__000000914E3F38F0\n000000067F0000400200014000000070C000-000000067F00004002000140000000710000__000000931B9A2710\n000000067F00004002000140000000710000-000000067F00004002000140000000714000__000000914E3F38F0\n000000067F00004002000140000000710000-000000067F00004002000140000000714000__000000931B9A2710\n000000067F00004002000140000000711656-000000067F0000400200014000000071A02C__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000714000-000000067F00004002000140000000718000__000000914E3F38F0\n000000067F00004002000140000000714000-000000067F00004002000140000000718000__000000931B9A2710\n000000067F00004002000140000000718000-000000067F0000400200014000000071C000__000000914E3F38F0\n000000067F00004002000140000000718000-000000067F0000400200014000000071C000__000000931B9A2710\n000000067F0000400200014000000071A02C-000000067F000040020001400000007229FA__000000739A920D71-0000008D2DB5E0C1\n000000067F0000400200014000000071C000-000000067F00004002000140000000720000__000000914E3F38F0\n000000067F0000400200014000000071C000-000000067F00004002000140000000720000__000000931B9A2710\n000000067F00004002000140000000720000-000000067F00004002000140000000724000__000000914E3F38F0\n000000067F00004002000140000000720000-000000067F00004002000140000000724000__000000931B9A2710\n000000067F000040020001400000007229FA-000000067F0000400200014000000072B3C9__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000724000-000000067F00004002000140000000728000__000000914E3F38F0\n000000067F00004002000140000000724000-000000067F00004002000140000000728000__000000931B9A2710\n000000067F00004002000140000000728000-000000067F0000400200014000000072C000__000000914E3F38F0\n000000067F00004002000140000000728000-000000067F0000400200014000000072C000__000000931B9A2710\n000000067F0000400200014000000072B3C9-000000067F00004002000140000000733D9B__000000739A920D71-0000008D2DB5E0C1\n000000067F0000400200014000000072C000-000000067F00004002000140000000730000__000000914E3F38F0\n000000067F0000400200014000000072C000-000000067F00004002000140000000730000__000000931B9A2710\n000000067F00004002000140000000730000-000000067F00004002000140000000734000__000000914E3F38F0\n000000067F00004002000140000000730000-000000067F00004002000140000000734000__000000931B9A2710\n000000067F00004002000140000000733D9B-000000067F0000400200014000000073C77C__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000734000-000000067F00004002000140000000738000__000000914E3F38F0\n000000067F00004002000140000000734000-000000067F00004002000140000000738000__000000931B9A2710\n000000067F00004002000140000000738000-000000067F0000400200014000000073C000__000000914E3F38F0\n000000067F00004002000140000000738000-000000067F0000400200014000000073C000__000000931B9A2710\n000000067F0000400200014000000073C000-000000067F00004002000140000000740000__000000914E3F38F0\n000000067F0000400200014000000073C000-000000067F00004002000140000000740000__000000931B9A2710\n000000067F0000400200014000000073C77C-000000067F00004002000140000000745169__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000740000-000000067F00004002000140000000744000__000000914E3F38F0\n000000067F00004002000140000000740000-000000067F00004002000140000000744000__000000931B9A2710\n000000067F00004002000140000000744000-000000067F00004002000140000000748000__000000914E3F38F0\n000000067F00004002000140000000744000-000000067F00004002000140000000748000__000000931B9A2710\n000000067F00004002000140000000745169-000000067F0000400200014000000074DB4D__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000748000-000000067F0000400200014000000074C000__000000914E3F38F0\n000000067F00004002000140000000748000-000000067F0000400200014000000074C000__000000931B9A2710\n000000067F0000400200014000000074C000-000000067F00004002000140000000750000__000000914E3F38F0\n000000067F0000400200014000000074C000-000000067F00004002000140000000750000__000000931B9A2710\n000000067F0000400200014000000074DB4D-000000067F00004002000140000000756529__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000750000-000000067F00004002000140000000754000__000000914E3F38F0\n000000067F00004002000140000000750000-000000067F00004002000140000000754000__000000931B9A2710\n000000067F00004002000140000000754000-000000067F00004002000140000000758000__000000914E3F38F0\n000000067F00004002000140000000754000-000000067F00004002000140000000758000__000000931B9A2710\n000000067F00004002000140000000756529-000000067F0000400200014000000075EEF6__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000758000-000000067F0000400200014000000075C000__000000914E3F38F0\n000000067F00004002000140000000758000-000000067F0000400200014000000075C000__000000931B9A2710\n000000067F0000400200014000000075C000-000000067F00004002000140000000760000__000000914E3F38F0\n000000067F0000400200014000000075C000-000000067F00004002000140000000760000__000000931B9A2710\n000000067F0000400200014000000075EEF6-000000067F000040020001400000007678CA__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000760000-000000067F00004002000140000000764000__000000914E3F38F0\n000000067F00004002000140000000760000-000000067F00004002000140000000764000__000000931B9A2710\n000000067F00004002000140000000764000-000000067F00004002000140000000768000__000000914E3F38F0\n000000067F00004002000140000000764000-000000067F00004002000140000000768000__000000931B9A2710\n000000067F000040020001400000007678CA-000000067F000040020001400000007702AD__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000768000-000000067F0000400200014000000076C000__000000914E3F38F0\n000000067F00004002000140000000768000-000000067F0000400200014000000076C000__000000931B9A2710\n000000067F0000400200014000000076C000-000000067F00004002000140000000770000__000000914E3F38F0\n000000067F0000400200014000000076C000-000000067F00004002000140000000770000__000000931B9A2710\n000000067F00004002000140000000770000-000000067F00004002000140000000774000__000000914E3F38F0\n000000067F00004002000140000000770000-000000067F00004002000140000000774000__000000931B9A2710\n000000067F000040020001400000007702AD-000000067F00004002000140000000778C92__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000774000-000000067F00004002000140000000778000__000000914E3F38F0\n000000067F00004002000140000000774000-000000067F00004002000140000000778000__000000931B9A2710\n000000067F00004002000140000000778000-000000067F0000400200014000000077C000__000000914E3F38F0\n000000067F00004002000140000000778000-000000067F0000400200014000000077C000__000000931B9A2710\n000000067F00004002000140000000778C92-000000067F0000400200014000000078166A__000000739A920D71-0000008D2DB5E0C1\n000000067F0000400200014000000077C000-000000067F00004002000140000000780000__000000914E3F38F0\n000000067F0000400200014000000077C000-000000067F00004002000140000000780000__000000931B9A2710\n000000067F00004002000140000000780000-000000067F00004002000140000000784000__000000914E3F38F0\n000000067F00004002000140000000780000-000000067F00004002000140000000784000__000000931B9A2710\n000000067F0000400200014000000078166A-000000067F0000400200014000000078A042__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000784000-000000067F00004002000140000000788000__000000914E3F38F0\n000000067F00004002000140000000784000-000000067F00004002000140000000788000__000000931B9A2710\n000000067F00004002000140000000788000-000000067F0000400200014000000078C000__000000914E3F38F0\n000000067F00004002000140000000788000-000000067F0000400200014000000078C000__000000931B9A2710\n000000067F0000400200014000000078A042-000000067F00004002000140000000792A24__000000739A920D71-0000008D2DB5E0C1\n000000067F0000400200014000000078C000-000000067F00004002000140000000790000__000000914E3F38F0\n000000067F0000400200014000000078C000-000000067F00004002000140000000790000__000000931B9A2710\n000000067F00004002000140000000790000-000000067F00004002000140000000794000__000000914E3F38F0\n000000067F00004002000140000000790000-000000067F00004002000140000000794000__000000931B9A2710\n000000067F00004002000140000000792A24-000000067F0000400200014000000079B3FE__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000794000-000000067F00004002000140000000798000__000000914E3F38F0\n000000067F00004002000140000000794000-000000067F00004002000140000000798000__000000931B9A2710\n000000067F00004002000140000000798000-000000067F0000400200014000000079C000__000000914E3F38F0\n000000067F00004002000140000000798000-000000067F0000400200014000000079C000__000000931B9A2710\n000000067F0000400200014000000079B3FE-000000067F000040020001400000007A3DE6__000000739A920D71-0000008D2DB5E0C1\n000000067F0000400200014000000079C000-000000067F000040020001400000007A0000__000000914E3F38F0\n000000067F0000400200014000000079C000-000000067F000040020001400000007A0000__000000931B9A2710\n000000067F000040020001400000007A0000-000000067F000040020001400000007A4000__000000914E3F38F0\n000000067F000040020001400000007A0000-000000067F000040020001400000007A4000__000000931B9A2710\n000000067F000040020001400000007A3DE6-000000067F000040020001400000007AC7C4__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000007A4000-000000067F000040020001400000007A8000__000000914E3F38F0\n000000067F000040020001400000007A4000-000000067F000040020001400000007A8000__000000931B9A2710\n000000067F000040020001400000007A8000-000000067F000040020001400000007AC000__000000914E3F38F0\n000000067F000040020001400000007A8000-000000067F000040020001400000007AC000__000000931B9A2710\n000000067F000040020001400000007AC000-000000067F000040020001400000007B0000__000000914E3F38F0\n000000067F000040020001400000007AC000-000000067F000040020001400000007B0000__000000931B9A2710\n000000067F000040020001400000007AC7C4-000000067F000040020001400000007B51A6__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000007B0000-000000067F000040020001400000007B4000__000000914E3F38F0\n000000067F000040020001400000007B0000-000000067F000040020001400000007B4000__000000931B9A2710\n000000067F000040020001400000007B4000-000000067F000040020001400000007B8000__000000914E3F38F0\n000000067F000040020001400000007B4000-000000067F000040020001400000007B8000__000000931B9A2710\n000000067F000040020001400000007B51A6-000000067F000040020001400000007BDB7E__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000007B8000-000000067F000040020001400000007BC000__000000914E3F38F0\n000000067F000040020001400000007B8000-000000067F000040020001400000007BC000__000000931B9A2710\n000000067F000040020001400000007BC000-000000067F000040020001400000007C0000__000000914E3F38F0\n000000067F000040020001400000007BC000-000000067F000040020001400000007C0000__000000931B9A2710\n000000067F000040020001400000007BDB7E-000000067F000040020001400000007C6558__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000007C0000-000000067F000040020001400000007C4000__000000914E3F38F0\n000000067F000040020001400000007C0000-000000067F000040020001400000007C4000__000000931B9A2710\n000000067F000040020001400000007C4000-000000067F000040020001400000007C8000__000000914E3F38F0\n000000067F000040020001400000007C4000-000000067F000040020001400000007C8000__000000931B9A2710\n000000067F000040020001400000007C6558-000000067F000040020001400000007CEF2A__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000007C8000-000000067F000040020001400000007CC000__000000914E3F38F0\n000000067F000040020001400000007C8000-000000067F000040020001400000007CC000__000000931B9A2710\n000000067F000040020001400000007CC000-000000067F000040020001400000007D0000__000000914E3F38F0\n000000067F000040020001400000007CC000-000000067F000040020001400000007D0000__000000931B9A2710\n000000067F000040020001400000007CEF2A-000000067F000040020001400000007D7903__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000007D0000-000000067F000040020001400000007D4000__000000914E3F38F0\n000000067F000040020001400000007D0000-000000067F000040020001400000007D4000__000000931B9A2710\n000000067F000040020001400000007D4000-000000067F000040020001400000007D8000__000000914E3F38F0\n000000067F000040020001400000007D4000-000000067F000040020001400000007D8000__000000931B9A2710\n000000067F000040020001400000007D7903-000000067F000040020001400000007E02D9__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000007D8000-000000067F000040020001400000007DC000__000000914E3F38F0\n000000067F000040020001400000007D8000-000000067F000040020001400000007DC000__000000931B9A2710\n000000067F000040020001400000007DC000-000000067F000040020001400000007E0000__000000914E3F38F0\n000000067F000040020001400000007DC000-000000067F000040020001400000007E0000__000000931B9A2710\n000000067F000040020001400000007E0000-000000067F000040020001400000007E4000__000000914E3F38F0\n000000067F000040020001400000007E0000-000000067F000040020001400000007E4000__000000931B9A2710\n000000067F000040020001400000007E02D9-000000067F000040020001400000007E8CAF__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000007E4000-000000067F000040020001400000007E8000__000000914E3F38F0\n000000067F000040020001400000007E4000-000000067F000040020001400000007E8000__000000931B9A2710\n000000067F000040020001400000007E8000-000000067F000040020001400000007EC000__000000914E3F38F0\n000000067F000040020001400000007E8000-000000067F000040020001400000007EC000__000000931B9A2710\n000000067F000040020001400000007E8CAF-000000067F000040020001400000007F1692__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000007EC000-000000067F000040020001400000007F0000__000000914E3F38F0\n000000067F000040020001400000007EC000-000000067F000040020001400000007F0000__000000931B9A2710\n000000067F000040020001400000007F0000-000000067F000040020001400000007F4000__000000914E3F38F0\n000000067F000040020001400000007F0000-000000067F000040020001400000007F4000__000000931B9A2710\n000000067F000040020001400000007F1692-000000067F000040020001400000007FA06B__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000007F4000-000000067F000040020001400000007F8000__000000914E3F38F0\n000000067F000040020001400000007F4000-000000067F000040020001400000007F8000__000000931B9A2710\n000000067F000040020001400000007F8000-000000067F000040020001400000007FC000__000000914E3F38F0\n000000067F000040020001400000007F8000-000000067F000040020001400000007FC000__000000931B9A2710\n000000067F000040020001400000007FA06B-000000067F00004002000140000000802A45__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000007FC000-000000067F00004002000140000000800000__000000914E3F38F0\n000000067F000040020001400000007FC000-000000067F00004002000140000000800000__000000931B9A2710\n000000067F00004002000140000000800000-000000067F00004002000140000000804000__000000914E3F38F0\n000000067F00004002000140000000800000-000000067F00004002000140000000804000__000000931B9A2710\n000000067F00004002000140000000802A45-000000067F0000400200014000000080B41D__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000804000-000000067F00004002000140000000808000__000000914E3F38F0\n000000067F00004002000140000000804000-000000067F00004002000140000000808000__000000931B9A2710\n000000067F00004002000140000000808000-000000067F0000400200014000000080C000__000000914E3F38F0\n000000067F00004002000140000000808000-000000067F0000400200014000000080C000__000000931B9A2710\n000000067F0000400200014000000080B41D-000000067F00004002000140000000813DF8__000000739A920D71-0000008D2DB5E0C1\n000000067F0000400200014000000080C000-000000067F00004002000140000000810000__000000914E3F38F0\n000000067F0000400200014000000080C000-000000067F00004002000140000000810000__000000931B9A2710\n000000067F00004002000140000000810000-000000067F00004002000140000000814000__000000914E3F38F0\n000000067F00004002000140000000810000-000000067F00004002000140000000814000__000000931B9A2710\n000000067F00004002000140000000813DF8-000000067F0000400200014000000081C7DB__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000814000-000000067F00004002000140000000818000__000000914E3F38F0\n000000067F00004002000140000000814000-000000067F00004002000140000000818000__000000931B9A2710\n000000067F00004002000140000000818000-000000067F0000400200014000000081C000__000000914E3F38F0\n000000067F00004002000140000000818000-000000067F0000400200014000000081C000__000000931B9A2710\n000000067F0000400200014000000081C000-000000067F00004002000140000000820000__000000914E3F38F0\n000000067F0000400200014000000081C000-000000067F00004002000140000000820000__000000931B9A2710\n000000067F0000400200014000000081C7DB-000000067F000040020001400000008251B6__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000820000-000000067F00004002000140000000824000__000000914E3F38F0\n000000067F00004002000140000000820000-000000067F00004002000140000000824000__000000931B9A2710\n000000067F00004002000140000000824000-000000067F00004002000140000000828000__000000914E3F38F0\n000000067F00004002000140000000824000-000000067F00004002000140000000828000__000000931B9A2710\n000000067F000040020001400000008251B6-000000067F0000400200014000000082DB9B__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000828000-000000067F0000400200014000000082C000__000000914E3F38F0\n000000067F00004002000140000000828000-000000067F0000400200014000000082C000__000000931B9A2710\n000000067F0000400200014000000082C000-000000067F00004002000140000000830000__000000914E3F38F0\n000000067F0000400200014000000082C000-000000067F00004002000140000000830000__000000931B9A2710\n000000067F0000400200014000000082DB9B-000000067F00004002000140000000836584__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000830000-000000067F00004002000140000000834000__000000914E3F38F0\n000000067F00004002000140000000830000-000000067F00004002000140000000834000__000000931B9A2710\n000000067F00004002000140000000834000-000000067F00004002000140000000838000__000000914E3F38F0\n000000067F00004002000140000000834000-000000067F00004002000140000000838000__000000931B9A2710\n000000067F00004002000140000000836584-000000067F0000400200014000000083EF61__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000838000-000000067F0000400200014000000083C000__000000914E3F38F0\n000000067F00004002000140000000838000-000000067F0000400200014000000083C000__000000931B9A2710\n000000067F0000400200014000000083C000-000000067F00004002000140000000840000__000000914E3F38F0\n000000067F0000400200014000000083C000-000000067F00004002000140000000840000__000000931B9A2710\n000000067F0000400200014000000083EF61-000000067F00004002000140000000847939__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000840000-000000067F00004002000140000000844000__000000914E3F38F0\n000000067F00004002000140000000840000-000000067F00004002000140000000844000__000000931B9A2710\n000000067F00004002000140000000844000-000000067F00004002000140000000848000__000000914E3F38F0\n000000067F00004002000140000000844000-000000067F00004002000140000000848000__000000931B9A2710\n000000067F00004002000140000000847939-000000067F00004002000140000000850319__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000848000-000000067F0000400200014000000084C000__000000914E3F38F0\n000000067F00004002000140000000848000-000000067F0000400200014000000084C000__000000931B9A2710\n000000067F0000400200014000000084C000-000000067F00004002000140000000850000__000000914E3F38F0\n000000067F0000400200014000000084C000-000000067F00004002000140000000850000__000000931B9A2710\n000000067F00004002000140000000850000-000000067F00004002000140000000854000__000000914E3F38F0\n000000067F00004002000140000000850000-000000067F00004002000140000000854000__000000931B9A2710\n000000067F00004002000140000000850319-000000067F00004002000140000000858CEC__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000854000-000000067F00004002000140000000858000__000000914E3F38F0\n000000067F00004002000140000000854000-000000067F00004002000140000000858000__000000931B9A2710\n000000067F00004002000140000000858000-000000067F0000400200014000000085C000__000000914E3F38F0\n000000067F00004002000140000000858000-000000067F0000400200014000000085C000__000000931B9A2710\n000000067F00004002000140000000858CEC-000000067F000040020001400000008616C0__000000739A920D71-0000008D2DB5E0C1\n000000067F0000400200014000000085C000-000000067F00004002000140000000860000__000000914E3F38F0\n000000067F0000400200014000000085C000-000000067F00004002000140000000860000__000000931B9A2710\n000000067F00004002000140000000860000-000000067F00004002000140000000864000__000000914E3F38F0\n000000067F00004002000140000000860000-000000067F00004002000140000000864000__000000931B9A2710\n000000067F000040020001400000008616C0-000000067F0000400200014000000086A0A7__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000864000-000000067F00004002000140000000868000__000000914E3F38F0\n000000067F00004002000140000000864000-000000067F00004002000140000000868000__000000931B9A2710\n000000067F00004002000140000000868000-000000067F0000400200014000000086C000__000000914E3F38F0\n000000067F00004002000140000000868000-000000067F0000400200014000000086C000__000000931B9A2710\n000000067F0000400200014000000086A0A7-000000067F00004002000140000000872A82__000000739A920D71-0000008D2DB5E0C1\n000000067F0000400200014000000086C000-000000067F00004002000140000000870000__000000914E3F38F0\n000000067F0000400200014000000086C000-000000067F00004002000140000000870000__000000931B9A2710\n000000067F00004002000140000000870000-000000067F00004002000140000000874000__000000914E3F38F0\n000000067F00004002000140000000870000-000000067F00004002000140000000874000__000000931B9A2710\n000000067F00004002000140000000872A82-000000067F0000400200014000000087B45F__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000874000-000000067F00004002000140000000878000__000000914E3F38F0\n000000067F00004002000140000000874000-000000067F00004002000140000000878000__000000931B9A2710\n000000067F00004002000140000000878000-000000067F0000400200014000000087C000__000000914E3F38F0\n000000067F00004002000140000000878000-000000067F0000400200014000000087C000__000000931B9A2710\n000000067F0000400200014000000087B45F-000000067F00004002000140000000883E35__000000739A920D71-0000008D2DB5E0C1\n000000067F0000400200014000000087C000-000000067F00004002000140000000880000__000000914E3F38F0\n000000067F0000400200014000000087C000-000000067F00004002000140000000880000__000000931B9A2710\n000000067F00004002000140000000880000-000000067F00004002000140000000884000__000000914E3F38F0\n000000067F00004002000140000000880000-000000067F00004002000140000000884000__000000931B9A2710\n000000067F00004002000140000000883E35-000000067F0000400200014000000088C812__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000884000-000000067F00004002000140000000888000__000000914E3F38F0\n000000067F00004002000140000000884000-000000067F00004002000140000000888000__000000931B9A2710\n000000067F00004002000140000000888000-000000067F0000400200014000000088C000__000000914E3F38F0\n000000067F00004002000140000000888000-000000067F0000400200014000000088C000__000000931B9A2710\n000000067F0000400200014000000088C000-000000067F00004002000140000000890000__000000914E3F38F0\n000000067F0000400200014000000088C000-000000067F00004002000140000000890000__000000931B9A2710\n000000067F0000400200014000000088C812-000000067F000040020001400000008951E8__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000890000-000000067F00004002000140000000894000__000000914E3F38F0\n000000067F00004002000140000000890000-000000067F00004002000140000000894000__000000931B9A2710\n000000067F00004002000140000000894000-000000067F00004002000140000000898000__000000914E3F38F0\n000000067F00004002000140000000894000-000000067F00004002000140000000898000__000000931B9A2710\n000000067F000040020001400000008951E8-000000067F0000400200014000000089DBC5__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000898000-000000067F0000400200014000000089C000__000000914E3F38F0\n000000067F00004002000140000000898000-000000067F0000400200014000000089C000__000000931B9A2710\n000000067F0000400200014000000089C000-000000067F000040020001400000008A0000__000000914E3F38F0\n000000067F0000400200014000000089C000-000000067F000040020001400000008A0000__000000931B9A2710\n000000067F0000400200014000000089DBC5-000000067F000040020001400000008A65A8__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000008A0000-000000067F000040020001400000008A4000__000000914E3F38F0\n000000067F000040020001400000008A0000-000000067F000040020001400000008A4000__000000931B9A2710\n000000067F000040020001400000008A4000-000000067F000040020001400000008A8000__000000914E3F38F0\n000000067F000040020001400000008A4000-000000067F000040020001400000008A8000__000000931B9A2710\n000000067F000040020001400000008A65A8-000000067F000040020001400000008AEF88__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000008A8000-000000067F000040020001400000008AC000__000000914E3F38F0\n000000067F000040020001400000008A8000-000000067F000040020001400000008AC000__000000931B9A2710\n000000067F000040020001400000008AC000-000000067F000040020001400000008B0000__000000914E3F38F0\n000000067F000040020001400000008AC000-000000067F000040020001400000008B0000__000000931B9A2710\n000000067F000040020001400000008AEF88-000000067F000040020001400000008B7971__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000008B0000-000000067F000040020001400000008B4000__000000914E3F38F0\n000000067F000040020001400000008B0000-000000067F000040020001400000008B4000__000000931B9A2710\n000000067F000040020001400000008B4000-000000067F000040020001400000008B8000__000000914E3F38F0\n000000067F000040020001400000008B4000-000000067F000040020001400000008B8000__000000931B9A2710\n000000067F000040020001400000008B7971-000000067F000040020001400000008C034C__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000008B8000-000000067F000040020001400000008BC000__000000914E3F38F0\n000000067F000040020001400000008B8000-000000067F000040020001400000008BC000__000000931B9A2710\n000000067F000040020001400000008BC000-000000067F000040020001400000008C0000__000000914E3F38F0\n000000067F000040020001400000008BC000-000000067F000040020001400000008C0000__000000931B9A2710\n000000067F000040020001400000008C0000-000000067F000040020001400000008C4000__000000914E3F38F0\n000000067F000040020001400000008C0000-000000067F000040020001400000008C4000__000000931B9A2710\n000000067F000040020001400000008C034C-000000067F000040020001400000008C8D24__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000008C4000-000000067F000040020001400000008C8000__000000914E3F38F0\n000000067F000040020001400000008C4000-000000067F000040020001400000008C8000__000000931B9A2710\n000000067F000040020001400000008C8000-000000067F000040020001400000008CC000__000000914E3F38F0\n000000067F000040020001400000008C8000-000000067F000040020001400000008CC000__000000931B9A2710\n000000067F000040020001400000008C8D24-000000067F000040020001400000008D16F9__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000008CC000-000000067F000040020001400000008D0000__000000914E3F38F0\n000000067F000040020001400000008CC000-000000067F000040020001400000008D0000__000000931B9A2710\n000000067F000040020001400000008D0000-000000067F000040020001400000008D4000__000000914E3F38F0\n000000067F000040020001400000008D0000-000000067F000040020001400000008D4000__000000931B9A2710\n000000067F000040020001400000008D16F9-000000067F000040020001400000008DA0DC__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000008D4000-000000067F000040020001400000008D8000__000000914E3F38F0\n000000067F000040020001400000008D4000-000000067F000040020001400000008D8000__000000931B9A2710\n000000067F000040020001400000008D8000-000000067F000040020001400000008DC000__000000914E3F38F0\n000000067F000040020001400000008D8000-000000067F000040020001400000008DC000__000000931B9A2710\n000000067F000040020001400000008DA0DC-000000067F000040020001400000008E2AC6__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000008DC000-000000067F000040020001400000008E0000__000000914E3F38F0\n000000067F000040020001400000008DC000-000000067F000040020001400000008E0000__000000931B9A2710\n000000067F000040020001400000008E0000-000000067F000040020001400000008E4000__000000914E3F38F0\n000000067F000040020001400000008E0000-000000067F000040020001400000008E4000__000000931B9A2710\n000000067F000040020001400000008E2AC6-000000067F000040020001400000008EB4A1__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000008E4000-000000067F000040020001400000008E8000__000000914E3F38F0\n000000067F000040020001400000008E4000-000000067F000040020001400000008E8000__000000931B9A2710\n000000067F000040020001400000008E8000-000000067F000040020001400000008EC000__000000914E3F38F0\n000000067F000040020001400000008E8000-000000067F000040020001400000008EC000__000000931B9A2710\n000000067F000040020001400000008EB4A1-000000067F000040020001400000008F3E7F__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000008EC000-000000067F000040020001400000008F0000__000000914E3F38F0\n000000067F000040020001400000008EC000-000000067F000040020001400000008F0000__000000931B9A2710\n000000067F000040020001400000008F0000-000000067F000040020001400000008F4000__000000914E3F38F0\n000000067F000040020001400000008F0000-000000067F000040020001400000008F4000__000000931B9A2710\n000000067F000040020001400000008F3E7F-000000067F000040020001400000008FC85E__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000008F4000-000000067F000040020001400000008F8000__000000914E3F38F0\n000000067F000040020001400000008F4000-000000067F000040020001400000008F8000__000000931B9A2710\n000000067F000040020001400000008F8000-000000067F000040020001400000008FC000__000000914E3F38F0\n000000067F000040020001400000008F8000-000000067F000040020001400000008FC000__000000931B9A2710\n000000067F000040020001400000008FC000-000000067F00004002000140000000900000__000000914E3F38F0\n000000067F000040020001400000008FC000-000000067F00004002000140000000900000__000000931B9A2710\n000000067F000040020001400000008FC85E-000000067F0000400200014000000090523C__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000900000-000000067F00004002000140000000904000__000000914E3F38F0\n000000067F00004002000140000000900000-000000067F00004002000140000000904000__000000931B9A2710\n000000067F00004002000140000000904000-000000067F00004002000140000000908000__000000914E3F38F0\n000000067F00004002000140000000904000-000000067F00004002000140000000908000__000000931B9A2710\n000000067F0000400200014000000090523C-000000067F0000400200014000000090DC13__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000908000-000000067F0000400200014000000090C000__000000914E3F38F0\n000000067F00004002000140000000908000-000000067F0000400200014000000090C000__000000931B9A2710\n000000067F0000400200014000000090C000-000000067F00004002000140000000910000__000000914E3F38F0\n000000067F0000400200014000000090C000-000000067F00004002000140000000910000__000000931B9A2710\n000000067F0000400200014000000090DC13-000000067F000040020001400000009165D8__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000910000-000000067F00004002000140000000914000__000000914E3F38F0\n000000067F00004002000140000000910000-000000067F00004002000140000000914000__000000931B9A2710\n000000067F00004002000140000000914000-000000067F00004002000140000000918000__000000914E3F38F0\n000000067F00004002000140000000914000-000000067F00004002000140000000918000__000000931B9A2710\n000000067F000040020001400000009165D8-000000067F0000400200014000000091EFC0__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000918000-000000067F0000400200014000000091C000__000000914E3F38F0\n000000067F00004002000140000000918000-000000067F0000400200014000000091C000__000000931B9A2710\n000000067F0000400200014000000091C000-000000067F00004002000140000000920000__000000914E3F38F0\n000000067F0000400200014000000091C000-000000067F00004002000140000000920000__000000931B9A2710\n000000067F0000400200014000000091EFC0-000000067F000040020001400000009279A0__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000920000-000000067F00004002000140000000924000__000000914E3F38F0\n000000067F00004002000140000000920000-000000067F00004002000140000000924000__000000931B9A2710\n000000067F00004002000140000000924000-000000067F00004002000140000000928000__000000914E3F38F0\n000000067F00004002000140000000924000-000000067F00004002000140000000928000__000000931B9A2710\n000000067F000040020001400000009279A0-000000067F0000400200014000000093037A__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000928000-000000067F0000400200014000000092C000__000000914E3F38F0\n000000067F00004002000140000000928000-000000067F0000400200014000000092C000__000000931B9A2710\n000000067F0000400200014000000092C000-000000067F00004002000140000000930000__000000914E3F38F0\n000000067F0000400200014000000092C000-000000067F00004002000140000000930000__000000931B9A2710\n000000067F00004002000140000000930000-000000067F00004002000140000000934000__000000914E3F38F0\n000000067F00004002000140000000930000-000000067F00004002000140000000934000__000000931B9A2710\n000000067F0000400200014000000093037A-000000067F00004002000140000000938D5F__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000934000-000000067F00004002000140000000938000__000000914E3F38F0\n000000067F00004002000140000000934000-000000067F00004002000140000000938000__000000931B9A2710\n000000067F00004002000140000000938000-000000067F0000400200014000000093C000__000000914E3F38F0\n000000067F00004002000140000000938000-000000067F0000400200014000000093C000__000000931B9A2710\n000000067F00004002000140000000938D5F-000000067F00004002000140000000941744__000000739A920D71-0000008D2DB5E0C1\n000000067F0000400200014000000093C000-000000067F00004002000140000000940000__000000914E3F38F0\n000000067F0000400200014000000093C000-000000067F00004002000140000000940000__000000931B9A2710\n000000067F00004002000140000000940000-000000067F00004002000140000000944000__000000914E3F38F0\n000000067F00004002000140000000940000-000000067F00004002000140000000944000__000000931B9A2710\n000000067F00004002000140000000941744-000000067F0000400200014000000094A116__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000944000-000000067F00004002000140000000948000__000000914E3F38F0\n000000067F00004002000140000000944000-000000067F00004002000140000000948000__000000931B9A2710\n000000067F00004002000140000000948000-000000067F0000400200014000000094C000__000000914E3F38F0\n000000067F00004002000140000000948000-000000067F0000400200014000000094C000__000000931B9A2710\n000000067F0000400200014000000094A116-000000067F00004002000140000000952AE4__000000739A920D71-0000008D2DB5E0C1\n000000067F0000400200014000000094C000-000000067F00004002000140000000950000__000000914E3F38F0\n000000067F0000400200014000000094C000-000000067F00004002000140000000950000__000000931B9A2710\n000000067F00004002000140000000950000-000000067F00004002000140000000954000__000000914E3F38F0\n000000067F00004002000140000000950000-000000067F00004002000140000000954000__000000931B9A2710\n000000067F00004002000140000000952AE4-000000067F0000400200014000000095B4CF__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000954000-000000067F00004002000140000000958000__000000914E3F38F0\n000000067F00004002000140000000954000-000000067F00004002000140000000958000__000000931B9A2710\n000000067F00004002000140000000958000-000000067F0000400200014000000095C000__000000914E3F38F0\n000000067F00004002000140000000958000-000000067F0000400200014000000095C000__000000931B9A2710\n000000067F0000400200014000000095B4CF-000000067F00004002000140000000963EB4__000000739A920D71-0000008D2DB5E0C1\n000000067F0000400200014000000095C000-000000067F00004002000140000000960000__000000914E3F38F0\n000000067F0000400200014000000095C000-000000067F00004002000140000000960000__000000931B9A2710\n000000067F00004002000140000000960000-000000067F00004002000140000000964000__000000914E3F38F0\n000000067F00004002000140000000960000-000000067F00004002000140000000964000__000000931B9A2710\n000000067F00004002000140000000963EB4-000000067F0000400200014000000096C887__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000964000-000000067F00004002000140000000968000__000000914E3F38F0\n000000067F00004002000140000000964000-000000067F00004002000140000000968000__000000931B9A2710\n000000067F00004002000140000000968000-000000067F0000400200014000000096C000__000000914E3F38F0\n000000067F00004002000140000000968000-000000067F0000400200014000000096C000__000000931B9A2710\n000000067F0000400200014000000096C000-000000067F00004002000140000000970000__000000914E3F38F0\n000000067F0000400200014000000096C000-000000067F00004002000140000000970000__000000931B9A2710\n000000067F0000400200014000000096C887-000000067F0000400200014000000097527C__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000970000-000000067F00004002000140000000974000__000000914E3F38F0\n000000067F00004002000140000000970000-000000067F00004002000140000000974000__000000931B9A2710\n000000067F00004002000140000000974000-000000067F00004002000140000000978000__000000914E3F38F0\n000000067F00004002000140000000974000-000000067F00004002000140000000978000__000000931B9A2710\n000000067F0000400200014000000097527C-000000067F0000400200014000000097DC5A__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000978000-000000067F0000400200014000000097C000__000000914E3F38F0\n000000067F00004002000140000000978000-000000067F0000400200014000000097C000__000000931B9A2710\n000000067F0000400200014000000097C000-000000067F00004002000140000000980000__000000914E3F38F0\n000000067F0000400200014000000097C000-000000067F00004002000140000000980000__000000931B9A2710\n000000067F0000400200014000000097DC5A-000000067F00004002000140000000986635__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000980000-000000067F00004002000140000000984000__000000914E3F38F0\n000000067F00004002000140000000980000-000000067F00004002000140000000984000__000000931B9A2710\n000000067F00004002000140000000984000-000000067F00004002000140000000988000__000000914E3F38F0\n000000067F00004002000140000000984000-000000067F00004002000140000000988000__000000931B9A2710\n000000067F00004002000140000000986635-000000067F0000400200014000000098F001__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000988000-000000067F0000400200014000000098C000__000000914E3F38F0\n000000067F00004002000140000000988000-000000067F0000400200014000000098C000__000000931B9A2710\n000000067F0000400200014000000098C000-000000067F00004002000140000000990000__000000914E3F38F0\n000000067F0000400200014000000098C000-000000067F00004002000140000000990000__000000931B9A2710\n000000067F0000400200014000000098F001-000000067F000040020001400000009979DC__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000990000-000000067F00004002000140000000994000__000000914E3F38F0\n000000067F00004002000140000000990000-000000067F00004002000140000000994000__000000931B9A2710\n000000067F00004002000140000000994000-000000067F00004002000140000000998000__000000914E3F38F0\n000000067F00004002000140000000994000-000000067F00004002000140000000998000__000000931B9A2710\n000000067F000040020001400000009979DC-000000067F000040020001400000009A03BB__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000998000-000000067F0000400200014000000099C000__000000914E3F38F0\n000000067F00004002000140000000998000-000000067F0000400200014000000099C000__000000931B9A2710\n000000067F0000400200014000000099C000-000000067F000040020001400000009A0000__000000914E3F38F0\n000000067F0000400200014000000099C000-000000067F000040020001400000009A0000__000000931B9A2710\n000000067F000040020001400000009A0000-000000067F000040020001400000009A4000__000000914E3F38F0\n000000067F000040020001400000009A0000-000000067F000040020001400000009A4000__000000931B9A2710\n000000067F000040020001400000009A03BB-000000067F000040020001400000009A8D9C__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000009A4000-000000067F000040020001400000009A8000__000000914E3F38F0\n000000067F000040020001400000009A4000-000000067F000040020001400000009A8000__000000931B9A2710\n000000067F000040020001400000009A8000-000000067F000040020001400000009AC000__000000914E3F38F0\n000000067F000040020001400000009A8000-000000067F000040020001400000009AC000__000000931B9A2710\n000000067F000040020001400000009A8D9C-000000067F000040020001400000009B1778__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000009AC000-000000067F000040020001400000009B0000__000000914E3F38F0\n000000067F000040020001400000009AC000-000000067F000040020001400000009B0000__000000931B9A2710\n000000067F000040020001400000009B0000-000000067F000040020001400000009B4000__000000914E3F38F0\n000000067F000040020001400000009B0000-000000067F000040020001400000009B4000__000000931B9A2710\n000000067F000040020001400000009B1778-000000067F000040020001400000009BA15D__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000009B4000-000000067F000040020001400000009B8000__000000914E3F38F0\n000000067F000040020001400000009B4000-000000067F000040020001400000009B8000__000000931B9A2710\n000000067F000040020001400000009B8000-000000067F000040020001400000009BC000__000000914E3F38F0\n000000067F000040020001400000009B8000-000000067F000040020001400000009BC000__000000931B9A2710\n000000067F000040020001400000009BA15D-000000067F000040020001400000009C2B39__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000009BC000-000000067F000040020001400000009C0000__000000914E3F38F0\n000000067F000040020001400000009BC000-000000067F000040020001400000009C0000__000000931B9A2710\n000000067F000040020001400000009C0000-000000067F000040020001400000009C4000__000000914E3F38F0\n000000067F000040020001400000009C0000-000000067F000040020001400000009C4000__000000931B9A2710\n000000067F000040020001400000009C2B39-000000067F000040020001400000009CB50E__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000009C4000-000000067F000040020001400000009C8000__000000914E3F38F0\n000000067F000040020001400000009C4000-000000067F000040020001400000009C8000__000000931B9A2710\n000000067F000040020001400000009C8000-000000067F000040020001400000009CC000__000000914E3F38F0\n000000067F000040020001400000009C8000-000000067F000040020001400000009CC000__000000931B9A2710\n000000067F000040020001400000009CB50E-000000067F000040020001400000009D3EE6__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000009CC000-000000067F000040020001400000009D0000__000000914E3F38F0\n000000067F000040020001400000009CC000-000000067F000040020001400000009D0000__000000931B9A2710\n000000067F000040020001400000009D0000-000000067F000040020001400000009D4000__000000914E3F38F0\n000000067F000040020001400000009D0000-000000067F000040020001400000009D4000__000000931B9A2710\n000000067F000040020001400000009D3EE6-000000067F000040020001400000009DC8C6__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000009D4000-000000067F000040020001400000009D8000__000000914E3F38F0\n000000067F000040020001400000009D4000-000000067F000040020001400000009D8000__000000931B9A2710\n000000067F000040020001400000009D8000-000000067F000040020001400000009DC000__000000914E3F38F0\n000000067F000040020001400000009D8000-000000067F000040020001400000009DC000__000000931B9A2710\n000000067F000040020001400000009DC000-000000067F000040020001400000009E0000__000000914E3F38F0\n000000067F000040020001400000009DC000-000000067F000040020001400000009E0000__000000931B9A2710\n000000067F000040020001400000009DC8C6-000000067F000040020001400000009E52AA__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000009E0000-000000067F000040020001400000009E4000__000000914E3F38F0\n000000067F000040020001400000009E0000-000000067F000040020001400000009E4000__000000931B9A2710\n000000067F000040020001400000009E4000-000000067F000040020001400000009E8000__000000914E3F38F0\n000000067F000040020001400000009E4000-000000067F000040020001400000009E8000__000000931B9A2710\n000000067F000040020001400000009E52AA-000000067F000040020001400000009EDC8B__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000009E8000-000000067F000040020001400000009EC000__000000914E3F38F0\n000000067F000040020001400000009E8000-000000067F000040020001400000009EC000__000000931B9A2710\n000000067F000040020001400000009EC000-000000067F000040020001400000009F0000__000000914E3F38F0\n000000067F000040020001400000009EC000-000000067F000040020001400000009F0000__000000931B9A2710\n000000067F000040020001400000009EDC8B-000000067F000040020001400000009F666E__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000009F0000-000000067F000040020001400000009F4000__000000914E3F38F0\n000000067F000040020001400000009F0000-000000067F000040020001400000009F4000__000000931B9A2710\n000000067F000040020001400000009F4000-000000067F000040020001400000009F8000__000000914E3F38F0\n000000067F000040020001400000009F4000-000000067F000040020001400000009F8000__000000931B9A2710\n000000067F000040020001400000009F666E-000000067F000040020001400000009FF04D__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400000009F8000-000000067F000040020001400000009FC000__000000914E3F38F0\n000000067F000040020001400000009F8000-000000067F000040020001400000009FC000__000000931B9A2710\n000000067F000040020001400000009FC000-000000067F00004002000140000000A00000__000000914E3F38F0\n000000067F000040020001400000009FC000-000000067F00004002000140000000A00000__000000931B9A2710\n000000067F000040020001400000009FF04D-000000067F00004002000140000000A07A27__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000A00000-000000067F00004002000140000000A04000__000000914E3F38F0\n000000067F00004002000140000000A00000-000000067F00004002000140000000A04000__000000931B9A2710\n000000067F00004002000140000000A04000-000000067F00004002000140000000A08000__000000914E3F38F0\n000000067F00004002000140000000A04000-000000067F00004002000140000000A08000__000000931B9A2710\n000000067F00004002000140000000A07A27-000000067F00004002000140000000A103FD__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000A08000-000000067F00004002000140000000A0C000__000000914E3F38F0\n000000067F00004002000140000000A08000-000000067F00004002000140000000A0C000__000000931B9A2710\n000000067F00004002000140000000A0C000-000000067F00004002000140000000A10000__000000914E3F38F0\n000000067F00004002000140000000A0C000-000000067F00004002000140000000A10000__000000931B9A2710\n000000067F00004002000140000000A10000-000000067F00004002000140000000A14000__000000914E3F38F0\n000000067F00004002000140000000A10000-000000067F00004002000140000000A14000__000000931B9A2710\n000000067F00004002000140000000A103FD-000000067F00004002000140000000A18DD8__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000A14000-000000067F00004002000140000000A18000__000000914E3F38F0\n000000067F00004002000140000000A14000-000000067F00004002000140000000A18000__000000931B9A2710\n000000067F00004002000140000000A18000-000000067F00004002000140000000A1C000__000000914E3F38F0\n000000067F00004002000140000000A18000-000000067F00004002000140000000A1C000__000000931B9A2710\n000000067F00004002000140000000A18DD8-000000067F00004002000140000000A217BD__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000A1C000-000000067F00004002000140000000A20000__000000914E3F38F0\n000000067F00004002000140000000A1C000-000000067F00004002000140000000A20000__000000931B9A2710\n000000067F00004002000140000000A20000-000000067F00004002000140000000A24000__000000914E3F38F0\n000000067F00004002000140000000A20000-000000067F00004002000140000000A24000__000000931B9A2710\n000000067F00004002000140000000A217BD-000000067F00004002000140000000A2A192__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000A24000-000000067F00004002000140000000A28000__000000914E3F38F0\n000000067F00004002000140000000A24000-000000067F00004002000140000000A28000__000000931B9A2710\n000000067F00004002000140000000A28000-000000067F00004002000140000000A2C000__000000914E3F38F0\n000000067F00004002000140000000A28000-000000067F00004002000140000000A2C000__000000931B9A2710\n000000067F00004002000140000000A2A192-000000067F00004002000140000000A32B76__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000A2C000-000000067F00004002000140000000A30000__000000914E3F38F0\n000000067F00004002000140000000A2C000-000000067F00004002000140000000A30000__000000931B9A2710\n000000067F00004002000140000000A30000-000000067F00004002000140000000A34000__000000914E3F38F0\n000000067F00004002000140000000A30000-000000067F00004002000140000000A34000__000000931B9A2710\n000000067F00004002000140000000A32B76-000000067F00004002000140000000A3B553__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000A34000-000000067F00004002000140000000A38000__000000914E3F38F0\n000000067F00004002000140000000A34000-000000067F00004002000140000000A38000__000000931B9A2710\n000000067F00004002000140000000A38000-000000067F00004002000140000000A3C000__000000914E3F38F0\n000000067F00004002000140000000A38000-000000067F00004002000140000000A3C000__000000931B9A2710\n000000067F00004002000140000000A3B553-000000067F00004002000140000000A43F22__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000A3C000-000000067F00004002000140000000A40000__000000914E3F38F0\n000000067F00004002000140000000A3C000-000000067F00004002000140000000A40000__000000931B9A2710\n000000067F00004002000140000000A40000-000000067F00004002000140000000A44000__000000914E3F38F0\n000000067F00004002000140000000A40000-000000067F00004002000140000000A44000__000000931B9A2710\n000000067F00004002000140000000A43F22-000000067F00004002000140000000A4C8FF__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000A44000-000000067F00004002000140000000A48000__000000914E3F38F0\n000000067F00004002000140000000A44000-000000067F00004002000140000000A48000__000000931B9A2710\n000000067F00004002000140000000A48000-000000067F00004002000140000000A4C000__000000914E3F38F0\n000000067F00004002000140000000A48000-000000067F00004002000140000000A4C000__000000931B9A2710\n000000067F00004002000140000000A4C000-000000067F00004002000140000000A50000__000000914E3F38F0\n000000067F00004002000140000000A4C000-000000067F00004002000140000000A50000__000000931B9A2710\n000000067F00004002000140000000A4C8FF-000000067F00004002000140000000A552E5__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000A50000-000000067F00004002000140000000A54000__000000914E3F38F0\n000000067F00004002000140000000A50000-000000067F00004002000140000000A54000__000000931B9A2710\n000000067F00004002000140000000A54000-000000067F00004002000140000000A58000__000000914E3F38F0\n000000067F00004002000140000000A54000-000000067F00004002000140000000A58000__000000931B9A2710\n000000067F00004002000140000000A552E5-000000067F00004002000140000000A5DCCD__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000A58000-000000067F00004002000140000000A5C000__000000914E3F38F0\n000000067F00004002000140000000A58000-000000067F00004002000140000000A5C000__000000931B9A2710\n000000067F00004002000140000000A5C000-000000067F00004002000140000000A60000__000000914E3F38F0\n000000067F00004002000140000000A5C000-000000067F00004002000140000000A60000__000000931B9A2710\n000000067F00004002000140000000A5DCCD-000000067F00004002000140000000A666AB__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000A60000-000000067F00004002000140000000A64000__000000914E3F38F0\n000000067F00004002000140000000A60000-000000067F00004002000140000000A64000__000000931B9A2710\n000000067F00004002000140000000A64000-000000067F00004002000140000000A68000__000000914E3F38F0\n000000067F00004002000140000000A64000-000000067F00004002000140000000A68000__000000931B9A2710\n000000067F00004002000140000000A666AB-000000067F00004002000140000000A6F093__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000A68000-000000067F00004002000140000000A6C000__000000914E3F38F0\n000000067F00004002000140000000A68000-000000067F00004002000140000000A6C000__000000931B9A2710\n000000067F00004002000140000000A6C000-000000067F00004002000140000000A70000__000000914E3F38F0\n000000067F00004002000140000000A6C000-000000067F00004002000140000000A70000__000000931B9A2710\n000000067F00004002000140000000A6F093-000000067F00004002000140000000A77A6F__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000A70000-000000067F00004002000140000000A74000__000000914E3F38F0\n000000067F00004002000140000000A70000-000000067F00004002000140000000A74000__000000931B9A2710\n000000067F00004002000140000000A74000-000000067F00004002000140000000A78000__000000914E3F38F0\n000000067F00004002000140000000A74000-000000067F00004002000140000000A78000__000000931B9A2710\n000000067F00004002000140000000A77A6F-000000067F00004002000140000000A80445__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000A78000-000000067F00004002000140000000A7C000__000000914E3F38F0\n000000067F00004002000140000000A78000-000000067F00004002000140000000A7C000__000000931B9A2710\n000000067F00004002000140000000A7C000-000000067F00004002000140000000A80000__000000914E3F38F0\n000000067F00004002000140000000A7C000-000000067F00004002000140000000A80000__000000931B9A2710\n000000067F00004002000140000000A80000-000000067F00004002000140000000A84000__000000914E3F38F0\n000000067F00004002000140000000A80000-000000067F00004002000140000000A84000__000000931B9A2710\n000000067F00004002000140000000A80445-000000067F00004002000140000000A88E32__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000A84000-000000067F00004002000140000000A88000__000000914E3F38F0\n000000067F00004002000140000000A84000-000000067F00004002000140000000A88000__000000931B9A2710\n000000067F00004002000140000000A88000-000000067F00004002000140000000A8C000__000000914E3F38F0\n000000067F00004002000140000000A88000-000000067F00004002000140000000A8C000__000000931B9A2710\n000000067F00004002000140000000A88E32-000000067F00004002000140000000A91804__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000A8C000-000000067F00004002000140000000A90000__000000914E3F38F0\n000000067F00004002000140000000A8C000-000000067F00004002000140000000A90000__000000931B9A2710\n000000067F00004002000140000000A90000-000000067F00004002000140000000A94000__000000914E3F38F0\n000000067F00004002000140000000A90000-000000067F00004002000140000000A94000__000000931B9A2710\n000000067F00004002000140000000A91804-000000067F00004002000140000000A9A1D9__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000A94000-000000067F00004002000140000000A98000__000000914E3F38F0\n000000067F00004002000140000000A94000-000000067F00004002000140000000A98000__000000931B9A2710\n000000067F00004002000140000000A98000-000000067F00004002000140000000A9C000__000000914E3F38F0\n000000067F00004002000140000000A98000-000000067F00004002000140000000A9C000__000000931B9A2710\n000000067F00004002000140000000A9A1D9-000000067F00004002000140000000AA2BBC__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000A9C000-000000067F00004002000140000000AA0000__000000914E3F38F0\n000000067F00004002000140000000A9C000-000000067F00004002000140000000AA0000__000000931B9A2710\n000000067F00004002000140000000AA0000-000000067F00004002000140000000AA4000__000000914E3F38F0\n000000067F00004002000140000000AA0000-000000067F00004002000140000000AA4000__000000931B9A2710\n000000067F00004002000140000000AA2BBC-000000067F00004002000140000000AAB5A0__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000AA4000-000000067F00004002000140000000AA8000__000000914E3F38F0\n000000067F00004002000140000000AA4000-000000067F00004002000140000000AA8000__000000931B9A2710\n000000067F00004002000140000000AA8000-000000067F00004002000140000000AAC000__000000914E3F38F0\n000000067F00004002000140000000AA8000-000000067F00004002000140000000AAC000__000000931B9A2710\n000000067F00004002000140000000AAB5A0-000000067F00004002000140000000AB3F74__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000AAC000-000000067F00004002000140000000AB0000__000000914E3F38F0\n000000067F00004002000140000000AAC000-000000067F00004002000140000000AB0000__000000931B9A2710\n000000067F00004002000140000000AB0000-000000067F00004002000140000000AB4000__000000914E3F38F0\n000000067F00004002000140000000AB0000-000000067F00004002000140000000AB4000__000000931B9A2710\n000000067F00004002000140000000AB3F74-000000067F00004002000140000000ABC949__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000AB4000-000000067F00004002000140000000AB8000__000000914E3F38F0\n000000067F00004002000140000000AB4000-000000067F00004002000140000000AB8000__000000931B9A2710\n000000067F00004002000140000000AB8000-000000067F00004002000140000000ABC000__000000914E3F38F0\n000000067F00004002000140000000AB8000-000000067F00004002000140000000ABC000__000000931B9A2710\n000000067F00004002000140000000ABC000-000000067F00004002000140000000AC0000__000000914E3F38F0\n000000067F00004002000140000000ABC000-000000067F00004002000140000000AC0000__000000931B9A2710\n000000067F00004002000140000000ABC949-000000067F00004002000140000000AC5324__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000AC0000-000000067F00004002000140000000AC4000__000000914E3F38F0\n000000067F00004002000140000000AC0000-000000067F00004002000140000000AC4000__000000931B9A2710\n000000067F00004002000140000000AC4000-000000067F00004002000140000000AC8000__000000914E3F38F0\n000000067F00004002000140000000AC4000-000000067F00004002000140000000AC8000__000000931B9A2710\n000000067F00004002000140000000AC5324-000000067F00004002000140000000ACDCFB__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000AC8000-000000067F00004002000140000000ACC000__000000914E3F38F0\n000000067F00004002000140000000AC8000-000000067F00004002000140000000ACC000__000000931B9A2710\n000000067F00004002000140000000ACC000-000000067F00004002000140000000AD0000__000000914E3F38F0\n000000067F00004002000140000000ACC000-000000067F00004002000140000000AD0000__000000931B9A2710\n000000067F00004002000140000000ACDCFB-000000067F00004002000140000000AD66E4__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000AD0000-000000067F00004002000140000000AD4000__000000914E3F38F0\n000000067F00004002000140000000AD0000-000000067F00004002000140000000AD4000__000000931B9A2710\n000000067F00004002000140000000AD4000-000000067F00004002000140000000AD8000__000000914E3F38F0\n000000067F00004002000140000000AD4000-000000067F00004002000140000000AD8000__000000931B9A2710\n000000067F00004002000140000000AD66E4-000000067F00004002000140000000ADF0C3__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000AD8000-000000067F00004002000140000000ADC000__000000914E3F38F0\n000000067F00004002000140000000AD8000-000000067F00004002000140000000ADC000__000000931B9A2710\n000000067F00004002000140000000ADC000-000000067F00004002000140000000AE0000__000000914E3F38F0\n000000067F00004002000140000000ADC000-000000067F00004002000140000000AE0000__000000931B9A2710\n000000067F00004002000140000000ADF0C3-000000067F00004002000140000000AE7AA6__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000AE0000-000000067F00004002000140000000AE4000__000000914E3F38F0\n000000067F00004002000140000000AE0000-000000067F00004002000140000000AE4000__000000931B9A2710\n000000067F00004002000140000000AE4000-000000067F00004002000140000000AE8000__000000914E3F38F0\n000000067F00004002000140000000AE4000-000000067F00004002000140000000AE8000__000000931B9A2710\n000000067F00004002000140000000AE7AA6-000000067F00004002000140000000AF047C__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000AE8000-000000067F00004002000140000000AEC000__000000914E3F38F0\n000000067F00004002000140000000AE8000-000000067F00004002000140000000AEC000__000000931B9A2710\n000000067F00004002000140000000AEC000-000000067F00004002000140000000AF0000__000000914E3F38F0\n000000067F00004002000140000000AEC000-000000067F00004002000140000000AF0000__000000931B9A2710\n000000067F00004002000140000000AF0000-000000067F00004002000140000000AF4000__000000914E3F38F0\n000000067F00004002000140000000AF0000-000000067F00004002000140000000AF4000__000000931B9A2710\n000000067F00004002000140000000AF047C-000000067F00004002000140000000AF8E55__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000AF4000-000000067F00004002000140000000AF8000__000000914E3F38F0\n000000067F00004002000140000000AF4000-000000067F00004002000140000000AF8000__000000931B9A2710\n000000067F00004002000140000000AF8000-000000067F00004002000140000000AFC000__000000914E3F38F0\n000000067F00004002000140000000AF8000-000000067F00004002000140000000AFC000__000000931B9A2710\n000000067F00004002000140000000AF8E55-000000067F00004002000140000000B0182C__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000AFC000-000000067F00004002000140000000B00000__000000914E3F38F0\n000000067F00004002000140000000AFC000-000000067F00004002000140000000B00000__000000931B9A2710\n000000067F00004002000140000000B00000-000000067F00004002000140000000B04000__000000914E3F38F0\n000000067F00004002000140000000B00000-000000067F00004002000140000000B04000__000000931B9A2710\n000000067F00004002000140000000B0182C-000000067F00004002000140000000B0A1FF__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000B04000-000000067F00004002000140000000B08000__000000914E3F38F0\n000000067F00004002000140000000B04000-000000067F00004002000140000000B08000__000000931B9A2710\n000000067F00004002000140000000B08000-000000067F00004002000140000000B0C000__000000914E3F38F0\n000000067F00004002000140000000B08000-000000067F00004002000140000000B0C000__000000931B9A2710\n000000067F00004002000140000000B0A1FF-000000067F00004002000140000000B12BEC__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000B0C000-000000067F00004002000140000000B10000__000000914E3F38F0\n000000067F00004002000140000000B0C000-000000067F00004002000140000000B10000__000000931B9A2710\n000000067F00004002000140000000B10000-000000067F00004002000140000000B14000__000000914E3F38F0\n000000067F00004002000140000000B10000-000000067F00004002000140000000B14000__000000931B9A2710\n000000067F00004002000140000000B12BEC-000000067F00004002000140000000B1B5CE__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000B14000-000000067F00004002000140000000B18000__000000914E3F38F0\n000000067F00004002000140000000B14000-000000067F00004002000140000000B18000__000000931B9A2710\n000000067F00004002000140000000B18000-000000067F00004002000140000000B1C000__000000914E3F38F0\n000000067F00004002000140000000B18000-000000067F00004002000140000000B1C000__000000931B9A2710\n000000067F00004002000140000000B1B5CE-000000067F00004002000140000000B23FC0__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000B1C000-000000067F00004002000140000000B20000__000000914E3F38F0\n000000067F00004002000140000000B1C000-000000067F00004002000140000000B20000__000000931B9A2710\n000000067F00004002000140000000B20000-000000067F00004002000140000000B24000__000000914E3F38F0\n000000067F00004002000140000000B20000-000000067F00004002000140000000B24000__000000931B9A2710\n000000067F00004002000140000000B23FC0-000000067F00004002000140000000B2C997__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000B24000-000000067F00004002000140000000B28000__000000914E3F38F0\n000000067F00004002000140000000B24000-000000067F00004002000140000000B28000__000000931B9A2710\n000000067F00004002000140000000B28000-000000067F00004002000140000000B2C000__000000914E3F38F0\n000000067F00004002000140000000B28000-000000067F00004002000140000000B2C000__000000931B9A2710\n000000067F00004002000140000000B2C000-000000067F00004002000140000000B30000__000000914E3F38F0\n000000067F00004002000140000000B2C000-000000067F00004002000140000000B30000__000000931B9A2710\n000000067F00004002000140000000B2C997-000000067F00004002000140000000B35371__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000B30000-000000067F00004002000140000000B34000__000000914E3F38F0\n000000067F00004002000140000000B30000-000000067F00004002000140000000B34000__000000931B9A2710\n000000067F00004002000140000000B34000-000000067F00004002000140000000B38000__000000914E3F38F0\n000000067F00004002000140000000B34000-000000067F00004002000140000000B38000__000000931B9A2710\n000000067F00004002000140000000B35371-000000067F00004002000140000000B3DD41__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000B38000-000000067F00004002000140000000B3C000__000000914E3F38F0\n000000067F00004002000140000000B38000-000000067F00004002000140000000B3C000__000000931B9A2710\n000000067F00004002000140000000B3C000-000000067F00004002000140000000B40000__000000914E3F38F0\n000000067F00004002000140000000B3C000-000000067F00004002000140000000B40000__000000931B9A2710\n000000067F00004002000140000000B3DD41-000000067F00004002000140000000B46710__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000B40000-000000067F00004002000140000000B44000__000000914E3F38F0\n000000067F00004002000140000000B40000-000000067F00004002000140000000B44000__000000931B9A2710\n000000067F00004002000140000000B44000-000000067F00004002000140000000B48000__000000914E3F38F0\n000000067F00004002000140000000B44000-000000067F00004002000140000000B48000__000000931B9A2710\n000000067F00004002000140000000B46710-000000067F00004002000140000000B4F0EB__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000B48000-000000067F00004002000140000000B4C000__000000914E3F38F0\n000000067F00004002000140000000B48000-000000067F00004002000140000000B4C000__000000931B9A2710\n000000067F00004002000140000000B4C000-000000067F00004002000140000000B50000__000000914E3F38F0\n000000067F00004002000140000000B4C000-000000067F00004002000140000000B50000__000000931B9A2710\n000000067F00004002000140000000B4F0EB-000000067F00004002000140000000B57ACA__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000B50000-000000067F00004002000140000000B54000__000000914E3F38F0\n000000067F00004002000140000000B50000-000000067F00004002000140000000B54000__000000931B9A2710\n000000067F00004002000140000000B54000-000000067F00004002000140000000B58000__000000914E3F38F0\n000000067F00004002000140000000B54000-000000067F00004002000140000000B58000__000000931B9A2710\n000000067F00004002000140000000B57ACA-000000067F00004002000140000000B604B4__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000B58000-000000067F00004002000140000000B5C000__000000914E3F38F0\n000000067F00004002000140000000B58000-000000067F00004002000140000000B5C000__000000931B9A2710\n000000067F00004002000140000000B5C000-000000067F00004002000140000000B60000__000000914E3F38F0\n000000067F00004002000140000000B5C000-000000067F00004002000140000000B60000__000000931B9A2710\n000000067F00004002000140000000B60000-000000067F00004002000140000000B64000__000000914E3F38F0\n000000067F00004002000140000000B60000-000000067F00004002000140000000B64000__000000931B9A2710\n000000067F00004002000140000000B604B4-000000067F00004002000140000000B68E85__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000B64000-000000067F00004002000140000000B68000__000000914E3F38F0\n000000067F00004002000140000000B64000-000000067F00004002000140000000B68000__000000931B9A2710\n000000067F00004002000140000000B68000-000000067F00004002000140000000B6C000__000000914E3F38F0\n000000067F00004002000140000000B68000-000000067F00004002000140000000B6C000__000000931B9A2710\n000000067F00004002000140000000B68E85-000000067F00004002000140000000B71863__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000B6C000-000000067F00004002000140000000B70000__000000914E3F38F0\n000000067F00004002000140000000B6C000-000000067F00004002000140000000B70000__000000931B9A2710\n000000067F00004002000140000000B70000-000000067F00004002000140000000B74000__000000914E3F38F0\n000000067F00004002000140000000B70000-000000067F00004002000140000000B74000__000000931B9A2710\n000000067F00004002000140000000B71863-000000067F00004002000140000000B7A239__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000B74000-000000067F00004002000140000000B78000__000000914E3F38F0\n000000067F00004002000140000000B74000-000000067F00004002000140000000B78000__000000931B9A2710\n000000067F00004002000140000000B78000-000000067F00004002000140000000B7C000__000000914E3F38F0\n000000067F00004002000140000000B78000-000000067F00004002000140000000B7C000__000000931B9A2710\n000000067F00004002000140000000B7A239-000000067F00004002000140000000B82C06__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000B7C000-000000067F00004002000140000000B80000__000000914E3F38F0\n000000067F00004002000140000000B7C000-000000067F00004002000140000000B80000__000000931B9A2710\n000000067F00004002000140000000B80000-000000067F00004002000140000000B84000__000000914E3F38F0\n000000067F00004002000140000000B80000-000000067F00004002000140000000B84000__000000931B9A2710\n000000067F00004002000140000000B82C06-000000067F00004002000140000000B8B5E5__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000B84000-000000067F00004002000140000000B88000__000000914E3F38F0\n000000067F00004002000140000000B84000-000000067F00004002000140000000B88000__000000931B9A2710\n000000067F00004002000140000000B88000-000000067F00004002000140000000B8C000__000000914E3F38F0\n000000067F00004002000140000000B88000-000000067F00004002000140000000B8C000__000000931B9A2710\n000000067F00004002000140000000B8B5E5-000000067F00004002000140000000B93FD3__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000B8C000-000000067F00004002000140000000B90000__000000914E3F38F0\n000000067F00004002000140000000B8C000-000000067F00004002000140000000B90000__000000931B9A2710\n000000067F00004002000140000000B90000-000000067F00004002000140000000B94000__000000914E3F38F0\n000000067F00004002000140000000B90000-000000067F00004002000140000000B94000__000000931B9A2710\n000000067F00004002000140000000B93FD3-000000067F00004002000140000000B9C9B8__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000B94000-000000067F00004002000140000000B98000__000000914E3F38F0\n000000067F00004002000140000000B94000-000000067F00004002000140000000B98000__000000931B9A2710\n000000067F00004002000140000000B98000-000000067F00004002000140000000B9C000__000000914E3F38F0\n000000067F00004002000140000000B98000-000000067F00004002000140000000B9C000__000000931B9A2710\n000000067F00004002000140000000B9C000-000000067F00004002000140000000BA0000__000000914E3F38F0\n000000067F00004002000140000000B9C000-000000067F00004002000140000000BA0000__000000931B9A2710\n000000067F00004002000140000000B9C9B8-000000067F00004002000140000000BA538E__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000BA0000-000000067F00004002000140000000BA4000__000000914E3F38F0\n000000067F00004002000140000000BA0000-000000067F00004002000140000000BA4000__000000931B9A2710\n000000067F00004002000140000000BA4000-000000067F00004002000140000000BA8000__000000914E3F38F0\n000000067F00004002000140000000BA4000-000000067F00004002000140000000BA8000__000000931B9A2710\n000000067F00004002000140000000BA538E-000000067F00004002000140000000BADD73__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000BA8000-000000067F00004002000140000000BAC000__000000914E3F38F0\n000000067F00004002000140000000BA8000-000000067F00004002000140000000BAC000__000000931B9A2710\n000000067F00004002000140000000BAC000-000000067F00004002000140000000BB0000__000000914E3F38F0\n000000067F00004002000140000000BAC000-000000067F00004002000140000000BB0000__000000931B9A2710\n000000067F00004002000140000000BADD73-000000067F00004002000140000000BB674C__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000BB0000-000000067F00004002000140000000BB4000__000000914E3F38F0\n000000067F00004002000140000000BB0000-000000067F00004002000140000000BB4000__000000931B9A2710\n000000067F00004002000140000000BB4000-000000067F00004002000140000000BB8000__000000914E3F38F0\n000000067F00004002000140000000BB4000-000000067F00004002000140000000BB8000__000000931B9A2710\n000000067F00004002000140000000BB674C-000000067F00004002000140000000BBF113__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000BB8000-000000067F00004002000140000000BBC000__000000914E3F38F0\n000000067F00004002000140000000BB8000-000000067F00004002000140000000BBC000__000000931B9A2710\n000000067F00004002000140000000BBC000-000000067F00004002000140000000BC0000__000000914E3F38F0\n000000067F00004002000140000000BBC000-000000067F00004002000140000000BC0000__000000931B9A2710\n000000067F00004002000140000000BBF113-000000067F00004002000140000000BC7AEE__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000BC0000-000000067F00004002000140000000BC4000__000000914E3F38F0\n000000067F00004002000140000000BC0000-000000067F00004002000140000000BC4000__000000931B9A2710\n000000067F00004002000140000000BC4000-000000067F00004002000140000000BC8000__000000914E3F38F0\n000000067F00004002000140000000BC4000-000000067F00004002000140000000BC8000__000000931B9A2710\n000000067F00004002000140000000BC7AEE-000000067F00004002000140000000BD04E2__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000BC8000-000000067F00004002000140000000BCC000__000000914E3F38F0\n000000067F00004002000140000000BC8000-000000067F00004002000140000000BCC000__000000931B9A2710\n000000067F00004002000140000000BCC000-000000067F00004002000140000000BD0000__000000914E3F38F0\n000000067F00004002000140000000BCC000-000000067F00004002000140000000BD0000__000000931B9A2710\n000000067F00004002000140000000BD0000-000000067F00004002000140000000BD4000__000000914E3F38F0\n000000067F00004002000140000000BD0000-000000067F00004002000140000000BD4000__000000931B9A2710\n000000067F00004002000140000000BD04E2-000000067F00004002000140000000BD8EC2__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000BD4000-000000067F00004002000140000000BD8000__000000914E3F38F0\n000000067F00004002000140000000BD4000-000000067F00004002000140000000BD8000__000000931B9A2710\n000000067F00004002000140000000BD8000-000000067F00004002000140000000BDC000__000000914E3F38F0\n000000067F00004002000140000000BD8000-000000067F00004002000140000000BDC000__000000931B9A2710\n000000067F00004002000140000000BD8EC2-000000067F00004002000140000000BE18A8__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000BDC000-000000067F00004002000140000000BE0000__000000914E3F38F0\n000000067F00004002000140000000BDC000-000000067F00004002000140000000BE0000__000000931B9A2710\n000000067F00004002000140000000BE0000-000000067F00004002000140000000BE4000__000000914E3F38F0\n000000067F00004002000140000000BE0000-000000067F00004002000140000000BE4000__000000931B9A2710\n000000067F00004002000140000000BE18A8-000000067F00004002000140000000BEA27B__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000BE4000-000000067F00004002000140000000BE8000__000000914E3F38F0\n000000067F00004002000140000000BE4000-000000067F00004002000140000000BE8000__000000931B9A2710\n000000067F00004002000140000000BE8000-000000067F00004002000140000000BEC000__000000914E3F38F0\n000000067F00004002000140000000BE8000-000000067F00004002000140000000BEC000__000000931B9A2710\n000000067F00004002000140000000BEA27B-000000067F00004002000140000000BF2C4E__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000BEC000-000000067F00004002000140000000BF0000__000000914E3F38F0\n000000067F00004002000140000000BEC000-000000067F00004002000140000000BF0000__000000931B9A2710\n000000067F00004002000140000000BF0000-000000067F00004002000140000000BF4000__000000914E3F38F0\n000000067F00004002000140000000BF0000-000000067F00004002000140000000BF4000__000000931B9A2710\n000000067F00004002000140000000BF2C4E-000000067F00004002000140000000BFB624__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000BF4000-000000067F00004002000140000000BF8000__000000914E3F38F0\n000000067F00004002000140000000BF4000-000000067F00004002000140000000BF8000__000000931B9A2710\n000000067F00004002000140000000BF8000-000000067F00004002000140000000BFC000__000000914E3F38F0\n000000067F00004002000140000000BF8000-000000067F00004002000140000000BFC000__000000931B9A2710\n000000067F00004002000140000000BFB624-000000067F00004002000140000000C04004__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000BFC000-000000067F00004002000140000000C00000__000000914E3F38F0\n000000067F00004002000140000000BFC000-000000067F00004002000140000000C00000__000000931B9A2710\n000000067F00004002000140000000C00000-000000067F00004002000140000000C04000__000000914E3F38F0\n000000067F00004002000140000000C00000-000000067F00004002000140000000C04000__000000931B9A2710\n000000067F00004002000140000000C04000-000000067F00004002000140000000C08000__000000914E3F38F0\n000000067F00004002000140000000C04000-000000067F00004002000140000000C08000__000000931B9A2710\n000000067F00004002000140000000C04004-000000067F00004002000140000000C0C9DC__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000C08000-000000067F00004002000140000000C0C000__000000914E3F38F0\n000000067F00004002000140000000C08000-000000067F00004002000140000000C0C000__000000931B9A2710\n000000067F00004002000140000000C0C000-000000067F00004002000140000000C10000__000000914E3F38F0\n000000067F00004002000140000000C0C000-000000067F00004002000140000000C10000__000000931B9A2710\n000000067F00004002000140000000C0C9DC-000000067F00004002000140000000C153C5__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000C10000-000000067F00004002000140000000C14000__000000914E3F38F0\n000000067F00004002000140000000C10000-000000067F00004002000140000000C14000__000000931B9A2710\n000000067F00004002000140000000C14000-000000067F00004002000140000000C18000__000000914E3F38F0\n000000067F00004002000140000000C14000-000000067F00004002000140000000C18000__000000931B9A2710\n000000067F00004002000140000000C153C5-000000067F00004002000140000000C1DDA3__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000C18000-000000067F00004002000140000000C1C000__000000914E3F38F0\n000000067F00004002000140000000C18000-000000067F00004002000140000000C1C000__000000931B9A2710\n000000067F00004002000140000000C1C000-000000067F00004002000140000000C20000__000000914E3F38F0\n000000067F00004002000140000000C1C000-000000067F00004002000140000000C20000__000000931B9A2710\n000000067F00004002000140000000C1DDA3-000000067F00004002000140000000C2677D__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000C20000-000000067F00004002000140000000C24000__000000914E3F38F0\n000000067F00004002000140000000C20000-000000067F00004002000140000000C24000__000000931B9A2710\n000000067F00004002000140000000C24000-000000067F00004002000140000000C28000__000000914E3F38F0\n000000067F00004002000140000000C24000-000000067F00004002000140000000C28000__000000931B9A2710\n000000067F00004002000140000000C2677D-000000067F00004002000140000000C2F155__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000C28000-000000067F00004002000140000000C2C000__000000914E3F38F0\n000000067F00004002000140000000C28000-000000067F00004002000140000000C2C000__000000931B9A2710\n000000067F00004002000140000000C2C000-000000067F00004002000140000000C30000__000000914E3F38F0\n000000067F00004002000140000000C2C000-000000067F00004002000140000000C30000__000000931B9A2710\n000000067F00004002000140000000C2F155-000000067F00004002000140000000C37B30__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000C30000-000000067F00004002000140000000C34000__000000914E3F38F0\n000000067F00004002000140000000C30000-000000067F00004002000140000000C34000__000000931B9A2710\n000000067F00004002000140000000C34000-000000067F00004002000140000000C38000__000000914E3F38F0\n000000067F00004002000140000000C34000-000000067F00004002000140000000C38000__000000931B9A2710\n000000067F00004002000140000000C37B30-000000067F00004002000140000000C4050D__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000C38000-000000067F00004002000140000000C3C000__000000914E3F38F0\n000000067F00004002000140000000C38000-000000067F00004002000140000000C3C000__000000931B9A2710\n000000067F00004002000140000000C3C000-000000067F00004002000140000000C40000__000000914E3F38F0\n000000067F00004002000140000000C3C000-000000067F00004002000140000000C40000__000000931B9A2710\n000000067F00004002000140000000C40000-000000067F00004002000140000000C44000__000000914E3F38F0\n000000067F00004002000140000000C40000-000000067F00004002000140000000C44000__000000931B9A2710\n000000067F00004002000140000000C4050D-000000067F00004002000140000000C48EEF__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000C44000-000000067F00004002000140000000C48000__000000914E3F38F0\n000000067F00004002000140000000C44000-000000067F00004002000140000000C48000__000000931B9A2710\n000000067F00004002000140000000C48000-000000067F00004002000140000000C4C000__000000914E3F38F0\n000000067F00004002000140000000C48000-000000067F00004002000140000000C4C000__000000931B9A2710\n000000067F00004002000140000000C48EEF-000000067F00004002000140000000C518D3__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000C4C000-000000067F00004002000140000000C50000__000000914E3F38F0\n000000067F00004002000140000000C4C000-000000067F00004002000140000000C50000__000000931B9A2710\n000000067F00004002000140000000C50000-000000067F00004002000140000000C54000__000000914E3F38F0\n000000067F00004002000140000000C50000-000000067F00004002000140000000C54000__000000931B9A2710\n000000067F00004002000140000000C518D3-000000067F00004002000140000000C5A2AB__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000C54000-000000067F00004002000140000000C58000__000000914E3F38F0\n000000067F00004002000140000000C54000-000000067F00004002000140000000C58000__000000931B9A2710\n000000067F00004002000140000000C58000-000000067F00004002000140000000C5C000__000000914E3F38F0\n000000067F00004002000140000000C58000-000000067F00004002000140000000C5C000__000000931B9A2710\n000000067F00004002000140000000C5A2AB-000000067F00004002000140000000C62C8E__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000C5C000-000000067F00004002000140000000C60000__000000914E3F38F0\n000000067F00004002000140000000C5C000-000000067F00004002000140000000C60000__000000931B9A2710\n000000067F00004002000140000000C60000-000000067F00004002000140000000C64000__000000914E3F38F0\n000000067F00004002000140000000C60000-000000067F00004002000140000000C64000__000000931B9A2710\n000000067F00004002000140000000C62C8E-000000067F00004002000140000000C6B65C__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000C64000-000000067F00004002000140000000C68000__000000914E3F38F0\n000000067F00004002000140000000C64000-000000067F00004002000140000000C68000__000000931B9A2710\n000000067F00004002000140000000C68000-000000067F00004002000140000000C6C000__000000914E3F38F0\n000000067F00004002000140000000C68000-000000067F00004002000140000000C6C000__000000931B9A2710\n000000067F00004002000140000000C6B65C-000000067F00004002000140000000C74040__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000C6C000-000000067F00004002000140000000C70000__000000914E3F38F0\n000000067F00004002000140000000C6C000-000000067F00004002000140000000C70000__000000931B9A2710\n000000067F00004002000140000000C70000-000000067F00004002000140000000C74000__000000914E3F38F0\n000000067F00004002000140000000C70000-000000067F00004002000140000000C74000__000000931B9A2710\n000000067F00004002000140000000C74000-000000067F00004002000140000000C78000__000000914E3F38F0\n000000067F00004002000140000000C74000-000000067F00004002000140000000C78000__000000931B9A2710\n000000067F00004002000140000000C74040-000000067F00004002000140000000C7CA16__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000C78000-000000067F00004002000140000000C7C000__000000914E3F38F0\n000000067F00004002000140000000C78000-000000067F00004002000140000000C7C000__000000931B9A2710\n000000067F00004002000140000000C7C000-000000067F00004002000140000000C80000__000000914E3F38F0\n000000067F00004002000140000000C7C000-000000067F00004002000140000000C80000__000000931B9A2710\n000000067F00004002000140000000C7CA16-000000067F00004002000140000000C853EF__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000C80000-000000067F00004002000140000000C84000__000000914E3F38F0\n000000067F00004002000140000000C80000-000000067F00004002000140000000C84000__000000931B9A2710\n000000067F00004002000140000000C84000-000000067F00004002000140000000C88000__000000914E3F38F0\n000000067F00004002000140000000C84000-000000067F00004002000140000000C88000__000000931B9A2710\n000000067F00004002000140000000C853EF-000000067F00004002000140000000C8DDCD__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000C88000-000000067F00004002000140000000C8C000__000000914E3F38F0\n000000067F00004002000140000000C88000-000000067F00004002000140000000C8C000__000000931B9A2710\n000000067F00004002000140000000C8C000-000000067F00004002000140000000C90000__000000914E3F38F0\n000000067F00004002000140000000C8C000-000000067F00004002000140000000C90000__000000931B9A2710\n000000067F00004002000140000000C8DDCD-000000067F00004002000140000000C967AD__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000C90000-000000067F00004002000140000000C94000__000000914E3F38F0\n000000067F00004002000140000000C90000-000000067F00004002000140000000C94000__000000931B9A2710\n000000067F00004002000140000000C94000-000000067F00004002000140000000C98000__000000914E3F38F0\n000000067F00004002000140000000C94000-000000067F00004002000140000000C98000__000000931B9A2710\n000000067F00004002000140000000C967AD-000000067F00004002000140000000C9F189__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000C98000-000000067F00004002000140000000C9C000__000000914E3F38F0\n000000067F00004002000140000000C98000-000000067F00004002000140000000C9C000__000000931B9A2710\n000000067F00004002000140000000C9C000-000000067F00004002000140000000CA0000__000000914E3F38F0\n000000067F00004002000140000000C9C000-000000067F00004002000140000000CA0000__000000931B9A2710\n000000067F00004002000140000000C9F189-000000067F00004002000140000000CA7B70__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000CA0000-000000067F00004002000140000000CA4000__000000914E3F38F0\n000000067F00004002000140000000CA0000-000000067F00004002000140000000CA4000__000000931B9A2710\n000000067F00004002000140000000CA4000-000000067F00004002000140000000CA8000__000000914E3F38F0\n000000067F00004002000140000000CA4000-000000067F00004002000140000000CA8000__000000931B9A2710\n000000067F00004002000140000000CA7B70-000000067F00004002000140000000CB0544__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000CA8000-000000067F00004002000140000000CAC000__000000914E3F38F0\n000000067F00004002000140000000CA8000-000000067F00004002000140000000CAC000__000000931B9A2710\n000000067F00004002000140000000CAC000-000000067F00004002000140000000CB0000__000000914E3F38F0\n000000067F00004002000140000000CAC000-000000067F00004002000140000000CB0000__000000931B9A2710\n000000067F00004002000140000000CB0000-000000067F00004002000140000000CB4000__000000914E3F38F0\n000000067F00004002000140000000CB0000-000000067F00004002000140000000CB4000__000000931B9A2710\n000000067F00004002000140000000CB0544-000000067F00004002000140000000CB8F24__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000CB4000-000000067F00004002000140000000CB8000__000000914E3F38F0\n000000067F00004002000140000000CB4000-000000067F00004002000140000000CB8000__000000931B9A2710\n000000067F00004002000140000000CB8000-000000067F00004002000140000000CBC000__000000914E3F38F0\n000000067F00004002000140000000CB8000-000000067F00004002000140000000CBC000__000000931B9A2710\n000000067F00004002000140000000CB8F24-000000067F00004002000140000000CC1904__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000CBC000-000000067F00004002000140000000CC0000__000000914E3F38F0\n000000067F00004002000140000000CBC000-000000067F00004002000140000000CC0000__000000931B9A2710\n000000067F00004002000140000000CC0000-000000067F00004002000140000000CC4000__000000914E3F38F0\n000000067F00004002000140000000CC0000-000000067F00004002000140000000CC4000__000000931B9A2710\n000000067F00004002000140000000CC1904-000000067F00004002000140000000CCA2D5__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000CC4000-000000067F00004002000140000000CC8000__000000914E3F38F0\n000000067F00004002000140000000CC4000-000000067F00004002000140000000CC8000__000000931B9A2710\n000000067F00004002000140000000CC8000-000000067F00004002000140000000CCC000__000000914E3F38F0\n000000067F00004002000140000000CC8000-000000067F00004002000140000000CCC000__000000931B9A2710\n000000067F00004002000140000000CCA2D5-000000067F00004002000140000000CD2CB2__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000CCC000-000000067F00004002000140000000CD0000__000000914E3F38F0\n000000067F00004002000140000000CCC000-000000067F00004002000140000000CD0000__000000931B9A2710\n000000067F00004002000140000000CD0000-000000067F00004002000140000000CD4000__000000914E3F38F0\n000000067F00004002000140000000CD0000-000000067F00004002000140000000CD4000__000000931B9A2710\n000000067F00004002000140000000CD2CB2-000000067F00004002000140000000CDB695__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000CD4000-000000067F00004002000140000000CD8000__000000914E3F38F0\n000000067F00004002000140000000CD4000-000000067F00004002000140000000CD8000__000000931B9A2710\n000000067F00004002000140000000CD8000-000000067F00004002000140000000CDC000__000000914E3F38F0\n000000067F00004002000140000000CD8000-000000067F00004002000140000000CDC000__000000931B9A2710\n000000067F00004002000140000000CDB695-000000067F00004002000140000000CE4071__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000CDC000-000000067F00004002000140000000CE0000__000000914E3F38F0\n000000067F00004002000140000000CDC000-000000067F00004002000140000000CE0000__000000931B9A2710\n000000067F00004002000140000000CE0000-000000067F00004002000140000000CE4000__000000914E3F38F0\n000000067F00004002000140000000CE0000-000000067F00004002000140000000CE4000__000000931B9A2710\n000000067F00004002000140000000CE4000-000000067F00004002000140000000CE8000__000000914E3F38F0\n000000067F00004002000140000000CE4000-000000067F00004002000140000000CE8000__000000931B9A2710\n000000067F00004002000140000000CE4071-000000067F00004002000140000000CECA49__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000CE8000-000000067F00004002000140000000CEC000__000000914E3F38F0\n000000067F00004002000140000000CE8000-000000067F00004002000140000000CEC000__000000931B9A2710\n000000067F00004002000140000000CEC000-000000067F00004002000140000000CF0000__000000914E3F38F0\n000000067F00004002000140000000CEC000-000000067F00004002000140000000CF0000__000000931B9A2710\n000000067F00004002000140000000CECA49-000000067F00004002000140000000CF5427__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000CF0000-000000067F00004002000140000000CF4000__000000914E3F38F0\n000000067F00004002000140000000CF0000-000000067F00004002000140000000CF4000__000000931B9A2710\n000000067F00004002000140000000CF4000-000000067F00004002000140000000CF8000__000000914E3F38F0\n000000067F00004002000140000000CF4000-000000067F00004002000140000000CF8000__000000931B9A2710\n000000067F00004002000140000000CF5427-000000067F00004002000140000000CFDE02__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000CF8000-000000067F00004002000140000000CFC000__000000914E3F38F0\n000000067F00004002000140000000CF8000-000000067F00004002000140000000CFC000__000000931B9A2710\n000000067F00004002000140000000CFC000-000000067F00004002000140000000D00000__000000914E3F38F0\n000000067F00004002000140000000CFC000-000000067F00004002000140000000D00000__000000931B9A2710\n000000067F00004002000140000000CFDE02-000000067F00004002000140000000D067CC__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000D00000-000000067F00004002000140000000D04000__000000914E3F38F0\n000000067F00004002000140000000D00000-000000067F00004002000140000000D04000__000000931B9A2710\n000000067F00004002000140000000D04000-000000067F00004002000140000000D08000__000000914E3F38F0\n000000067F00004002000140000000D04000-000000067F00004002000140000000D08000__000000931B9A2710\n000000067F00004002000140000000D067CC-000000067F00004002000140000000D0F1C4__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000D08000-000000067F00004002000140000000D0C000__000000914E3F38F0\n000000067F00004002000140000000D08000-000000067F00004002000140000000D0C000__000000931B9A2710\n000000067F00004002000140000000D0C000-000000067F00004002000140000000D10000__000000914E3F38F0\n000000067F00004002000140000000D0C000-000000067F00004002000140000000D10000__000000931B9A2710\n000000067F00004002000140000000D0F1C4-000000067F00004002000140000000D17B9F__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000D10000-000000067F00004002000140000000D14000__000000914E3F38F0\n000000067F00004002000140000000D10000-000000067F00004002000140000000D14000__000000931B9A2710\n000000067F00004002000140000000D14000-000000067F00004002000140000000D18000__000000914E3F38F0\n000000067F00004002000140000000D14000-000000067F00004002000140000000D18000__000000931B9A2710\n000000067F00004002000140000000D17B9F-000000067F00004002000140000000D2057B__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000D18000-000000067F00004002000140000000D1C000__000000914E3F38F0\n000000067F00004002000140000000D18000-000000067F00004002000140000000D1C000__000000931B9A2710\n000000067F00004002000140000000D1C000-000000067F00004002000140000000D20000__000000914E3F38F0\n000000067F00004002000140000000D1C000-000000067F00004002000140000000D20000__000000931B9A2710\n000000067F00004002000140000000D20000-000000067F00004002000140000000D24000__000000914E3F38F0\n000000067F00004002000140000000D20000-000000067F00004002000140000000D24000__000000931B9A2710\n000000067F00004002000140000000D2057B-000000067F00004002000140000000D28F4A__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000D24000-000000067F00004002000140000000D28000__000000914E3F38F0\n000000067F00004002000140000000D24000-000000067F00004002000140000000D28000__000000931B9A2710\n000000067F00004002000140000000D28000-000000067F00004002000140000000D2C000__000000914E3F38F0\n000000067F00004002000140000000D28000-000000067F00004002000140000000D2C000__000000931B9A2710\n000000067F00004002000140000000D28F4A-000000067F00004002000140000000D31928__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000D2C000-000000067F00004002000140000000D30000__000000914E3F38F0\n000000067F00004002000140000000D2C000-000000067F00004002000140000000D30000__000000931B9A2710\n000000067F00004002000140000000D30000-000000067F00004002000140000000D34000__000000914E3F38F0\n000000067F00004002000140000000D30000-000000067F00004002000140000000D34000__000000931B9A2710\n000000067F00004002000140000000D31928-000000067F00004002000140000000D3A302__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000D34000-000000067F00004002000140000000D38000__000000914E3F38F0\n000000067F00004002000140000000D34000-000000067F00004002000140000000D38000__000000931B9A2710\n000000067F00004002000140000000D38000-000000067F00004002000140000000D3C000__000000914E3F38F0\n000000067F00004002000140000000D38000-000000067F00004002000140000000D3C000__000000931B9A2710\n000000067F00004002000140000000D3A302-000000067F00004002000140000000D42CCC__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000D3C000-000000067F00004002000140000000D40000__000000914E3F38F0\n000000067F00004002000140000000D3C000-000000067F00004002000140000000D40000__000000931B9A2710\n000000067F00004002000140000000D40000-000000067F00004002000140000000D44000__000000914E3F38F0\n000000067F00004002000140000000D40000-000000067F00004002000140000000D44000__000000931B9A2710\n000000067F00004002000140000000D42CCC-000000067F00004002000140000000D4B6AE__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000D44000-000000067F00004002000140000000D48000__000000914E3F38F0\n000000067F00004002000140000000D44000-000000067F00004002000140000000D48000__000000931B9A2710\n000000067F00004002000140000000D48000-000000067F00004002000140000000D4C000__000000914E3F38F0\n000000067F00004002000140000000D48000-000000067F00004002000140000000D4C000__000000931B9A2710\n000000067F00004002000140000000D4B6AE-000000067F00004002000140000000D5408F__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000D4C000-000000067F00004002000140000000D50000__000000914E3F38F0\n000000067F00004002000140000000D4C000-000000067F00004002000140000000D50000__000000931B9A2710\n000000067F00004002000140000000D50000-000000067F00004002000140000000D54000__000000914E3F38F0\n000000067F00004002000140000000D50000-000000067F00004002000140000000D54000__000000931B9A2710\n000000067F00004002000140000000D54000-000000067F00004002000140000000D58000__000000914E3F38F0\n000000067F00004002000140000000D54000-000000067F00004002000140000000D58000__000000931B9A2710\n000000067F00004002000140000000D5408F-000000067F00004002000140000000D5CA69__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000D58000-000000067F00004002000140000000D5C000__000000914E3F38F0\n000000067F00004002000140000000D58000-000000067F00004002000140000000D5C000__000000931B9A2710\n000000067F00004002000140000000D5C000-000000067F00004002000140000000D60000__000000914E3F38F0\n000000067F00004002000140000000D5C000-000000067F00004002000140000000D60000__000000931B9A2710\n000000067F00004002000140000000D5CA69-000000067F00004002000140000000D6543E__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000D60000-000000067F00004002000140000000D64000__000000914E3F38F0\n000000067F00004002000140000000D60000-000000067F00004002000140000000D64000__000000931B9A2710\n000000067F00004002000140000000D64000-000000067F00004002000140000000D68000__000000914E3F38F0\n000000067F00004002000140000000D64000-000000067F00004002000140000000D68000__000000931B9A2710\n000000067F00004002000140000000D6543E-000000067F00004002000140000000D6DE1B__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000D68000-000000067F00004002000140000000D6C000__000000914E3F38F0\n000000067F00004002000140000000D68000-000000067F00004002000140000000D6C000__000000931B9A2710\n000000067F00004002000140000000D6C000-000000067F00004002000140000000D70000__000000914E3F38F0\n000000067F00004002000140000000D6C000-000000067F00004002000140000000D70000__000000931B9A2710\n000000067F00004002000140000000D6DE1B-000000067F00004002000140000000D767FA__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000D70000-000000067F00004002000140000000D74000__000000914E3F38F0\n000000067F00004002000140000000D70000-000000067F00004002000140000000D74000__000000931B9A2710\n000000067F00004002000140000000D74000-000000067F00004002000140000000D78000__000000914E3F38F0\n000000067F00004002000140000000D74000-000000067F00004002000140000000D78000__000000931B9A2710\n000000067F00004002000140000000D767FA-000000067F00004002000140000000D7F1DD__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000D78000-000000067F00004002000140000000D7C000__000000914E3F38F0\n000000067F00004002000140000000D78000-000000067F00004002000140000000D7C000__000000931B9A2710\n000000067F00004002000140000000D7C000-000000067F00004002000140000000D80000__000000914E3F38F0\n000000067F00004002000140000000D7C000-000000067F00004002000140000000D80000__000000931B9A2710\n000000067F00004002000140000000D7F1DD-000000067F00004002000140000000D87BBA__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000D80000-000000067F00004002000140000000D84000__000000914E3F38F0\n000000067F00004002000140000000D80000-000000067F00004002000140000000D84000__000000931B9A2710\n000000067F00004002000140000000D84000-000000067F00004002000140000000D88000__000000914E3F38F0\n000000067F00004002000140000000D84000-000000067F00004002000140000000D88000__000000931B9A2710\n000000067F00004002000140000000D87BBA-000000067F00004002000140000000D9059C__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000D88000-000000067F00004002000140000000D8C000__000000914E3F38F0\n000000067F00004002000140000000D88000-000000067F00004002000140000000D8C000__000000931B9A2710\n000000067F00004002000140000000D8C000-000000067F00004002000140000000D90000__000000914E3F38F0\n000000067F00004002000140000000D8C000-000000067F00004002000140000000D90000__000000931B9A2710\n000000067F00004002000140000000D90000-000000067F00004002000140000000D94000__000000914E3F38F0\n000000067F00004002000140000000D90000-000000067F00004002000140000000D94000__000000931B9A2710\n000000067F00004002000140000000D9059C-000000067F00004002000140000000D98F7F__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000D94000-000000067F00004002000140000000D98000__000000914E3F38F0\n000000067F00004002000140000000D94000-000000067F00004002000140000000D98000__000000931B9A2710\n000000067F00004002000140000000D98000-000000067F00004002000140000000D9C000__000000914E3F38F0\n000000067F00004002000140000000D98000-000000067F00004002000140000000D9C000__000000931B9A2710\n000000067F00004002000140000000D98F7F-000000067F00004002000140000000DA1953__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000D9C000-000000067F00004002000140000000DA0000__000000914E3F38F0\n000000067F00004002000140000000D9C000-000000067F00004002000140000000DA0000__000000931B9A2710\n000000067F00004002000140000000DA0000-000000067F00004002000140000000DA4000__000000914E3F38F0\n000000067F00004002000140000000DA0000-000000067F00004002000140000000DA4000__000000931B9A2710\n000000067F00004002000140000000DA1953-000000067F00004002000140000000DAA32D__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000DA4000-000000067F00004002000140000000DA8000__000000914E3F38F0\n000000067F00004002000140000000DA4000-000000067F00004002000140000000DA8000__000000931B9A2710\n000000067F00004002000140000000DA8000-000000067F00004002000140000000DAC000__000000914E3F38F0\n000000067F00004002000140000000DA8000-000000067F00004002000140000000DAC000__000000931B9A2710\n000000067F00004002000140000000DAA32D-000000067F00004002000140000000DB2D0E__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000DAC000-000000067F00004002000140000000DB0000__000000914E3F38F0\n000000067F00004002000140000000DAC000-000000067F00004002000140000000DB0000__000000931B9A2710\n000000067F00004002000140000000DB0000-000000067F00004002000140000000DB4000__000000914E3F38F0\n000000067F00004002000140000000DB0000-000000067F00004002000140000000DB4000__000000931B9A2710\n000000067F00004002000140000000DB2D0E-000000067F00004002000140000000DBB6DF__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000DB4000-000000067F00004002000140000000DB8000__000000914E3F38F0\n000000067F00004002000140000000DB4000-000000067F00004002000140000000DB8000__000000931B9A2710\n000000067F00004002000140000000DB8000-000000067F00004002000140000000DBC000__000000914E3F38F0\n000000067F00004002000140000000DB8000-000000067F00004002000140000000DBC000__000000931B9A2710\n000000067F00004002000140000000DBB6DF-000000067F00004002000140000000DC40C3__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000DBC000-000000067F00004002000140000000DC0000__000000914E3F38F0\n000000067F00004002000140000000DBC000-000000067F00004002000140000000DC0000__000000931B9A2710\n000000067F00004002000140000000DC0000-000000067F00004002000140000000DC4000__000000914E3F38F0\n000000067F00004002000140000000DC0000-000000067F00004002000140000000DC4000__000000931B9A2710\n000000067F00004002000140000000DC4000-000000067F00004002000140000000DC8000__000000914E3F38F0\n000000067F00004002000140000000DC4000-000000067F00004002000140000000DC8000__000000931B9A2710\n000000067F00004002000140000000DC40C3-000000067F00004002000140000000DCCAA7__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000DC8000-000000067F00004002000140000000DCC000__000000914E3F38F0\n000000067F00004002000140000000DC8000-000000067F00004002000140000000DCC000__000000931B9A2710\n000000067F00004002000140000000DCC000-000000067F00004002000140000000DD0000__000000914E3F38F0\n000000067F00004002000140000000DCC000-000000067F00004002000140000000DD0000__000000931B9A2710\n000000067F00004002000140000000DCCAA7-000000067F00004002000140000000DD2050__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000000DD0000-000000067F00004002000140000000DD4000__000000914E3F38F0\n000000067F00004002000140000000DD0000-000000067F00004002000140000000DD4000__000000931B9A2710\n000000067F00004002000140000000DD2050-000000067F00004002000140000000DDAA27__0000008D2DB5E0C1-0000008E6D15F1F1\n000000067F00004002000140000000DD4000-000000067F00004002000140000000DD8000__000000914E3F38F0\n000000067F00004002000140000000DD4000-000000067F00004002000140000000DD8000__000000931B9A2710\n000000067F00004002000140000000DD8000-000000067F00004002000140000000DDC000__000000914E3F38F0\n000000067F00004002000140000000DD8000-000000067F00004002000140000000DDC000__000000931B9A2710\n000000067F00004002000140000000DDAA27-000000067F00004002000140000000DE3401__0000008D2DB5E0C1-0000008E6D15F1F1\n000000067F00004002000140000000DDC000-000000067F00004002000140000000DE0000__000000914E3F38F0\n000000067F00004002000140000000DDC000-000000067F00004002000140000000DE0000__000000931B9A2710\n000000067F00004002000140000000DE0000-000000067F00004002000140000000DE4000__000000914E3F38F0\n000000067F00004002000140000000DE0000-000000067F00004002000140000000DE4000__000000931B9A2710\n000000067F00004002000140000000DE3401-000000067F00004002000140000000DEBDCD__0000008D2DB5E0C1-0000008E6D15F1F1\n000000067F00004002000140000000DE4000-000000067F00004002000140000000DE8000__000000914E3F38F0\n000000067F00004002000140000000DE4000-000000067F00004002000140000000DE8000__000000931B9A2710\n000000067F00004002000140000000DE8000-000000067F00004002000140000000DEC000__000000914E3F38F0\n000000067F00004002000140000000DE8000-000000067F00004002000140000000DEC000__000000931B9A2710\n000000067F00004002000140000000DEBDCD-000000067F00004002000140000000DF47AF__0000008D2DB5E0C1-0000008E6D15F1F1\n000000067F00004002000140000000DEC000-000000067F00004002000140000000DF0000__000000914E3F38F0\n000000067F00004002000140000000DEC000-000000067F00004002000140000000DF0000__000000931B9A2710\n000000067F00004002000140000000DF0000-000000067F00004002000140000000DF4000__000000914E3F38F0\n000000067F00004002000140000000DF0000-000000067F00004002000140000000DF4000__000000931B9A2710\n000000067F00004002000140000000DF4000-000000067F00004002000140000000DF8000__000000914E3F38F0\n000000067F00004002000140000000DF4000-000000067F00004002000140000000DF8000__000000931B9A2710\n000000067F00004002000140000000DF47AF-000000067F00004002000140000000DFD196__0000008D2DB5E0C1-0000008E6D15F1F1\n000000067F00004002000140000000DF8000-000000067F00004002000140000000DFC000__000000914E3F38F0\n000000067F00004002000140000000DF8000-000000067F00004002000140000000DFC000__000000931B9A2710\n000000067F00004002000140000000DFC000-000000067F00004002000140000000E00000__000000914E3F38F0\n000000067F00004002000140000000DFC000-000000067F00004002000140000000E00000__000000931B9A2710\n000000067F00004002000140000000DFD196-000000067F00004002000140000000E05B74__0000008D2DB5E0C1-0000008E6D15F1F1\n000000067F00004002000140000000E00000-000000067F00004002000140000000E04000__000000914E3F38F0\n000000067F00004002000140000000E00000-000000067F00004002000140000000E04000__000000931B9A2710\n000000067F00004002000140000000E04000-000000067F00004002000140000000E08000__000000914E3F38F0\n000000067F00004002000140000000E04000-000000067F00004002000140000000E08000__000000931B9A2710\n000000067F00004002000140000000E05B74-000000067F00004002000140000000E0E54D__0000008D2DB5E0C1-0000008E6D15F1F1\n000000067F00004002000140000000E08000-000000067F00004002000140000000E0C000__000000914E3F38F0\n000000067F00004002000140000000E08000-000000067F00004002000140000000E0C000__000000931B9A2710\n000000067F00004002000140000000E0C000-000000067F00004002000140000000E10000__000000914E3F38F0\n000000067F00004002000140000000E0C000-000000067F00004002000140000000E10000__000000931B9A2710\n000000067F00004002000140000000E0E54D-000000067F00004002000140000000E16F24__0000008D2DB5E0C1-0000008E6D15F1F1\n000000067F00004002000140000000E10000-000000067F00004002000140000000E14000__000000914E3F38F0\n000000067F00004002000140000000E10000-000000067F00004002000140000000E14000__000000931B9A2710\n000000067F00004002000140000000E14000-000000067F00004002000140000000E18000__000000914E3F38F0\n000000067F00004002000140000000E14000-000000067F00004002000140000000E18000__000000931B9A2710\n000000067F00004002000140000000E16F24-000000067F00004002000140000000E1F8FB__0000008D2DB5E0C1-0000008E6D15F1F1\n000000067F00004002000140000000E18000-000000067F00004002000140000000E1C000__000000914E3F38F0\n000000067F00004002000140000000E18000-000000067F00004002000140000000E1C000__000000931B9A2710\n000000067F00004002000140000000E1C000-000000067F00004002000140000000E20000__000000914E3F38F0\n000000067F00004002000140000000E1C000-000000067F00004002000140000000E20000__000000931B9A2710\n000000067F00004002000140000000E1F8FB-000000067F00004002000140000000E282CC__0000008D2DB5E0C1-0000008E6D15F1F1\n000000067F00004002000140000000E20000-000000067F00004002000140000000E24000__000000914E3F38F0\n000000067F00004002000140000000E20000-000000067F00004002000140000000E24000__000000931B9A2710\n000000067F00004002000140000000E24000-000000067F00004002000140000000E28000__000000914E3F38F0\n000000067F00004002000140000000E24000-000000067F00004002000140000000E28000__000000931B9A2710\n000000067F00004002000140000000E28000-000000067F00004002000140000000E2C000__000000914E3F38F0\n000000067F00004002000140000000E28000-000000067F00004002000140000000E2C000__000000931B9A2710\n000000067F00004002000140000000E282CC-000000067F00004002000140000000E30CB1__0000008D2DB5E0C1-0000008E6D15F1F1\n000000067F00004002000140000000E2C000-000000067F00004002000140000000E30000__000000914E3F38F0\n000000067F00004002000140000000E2C000-000000067F00004002000140000000E30000__000000931B9A2710\n000000067F00004002000140000000E30000-000000067F00004002000140000000E34000__000000914E3F38F0\n000000067F00004002000140000000E30000-000000067F00004002000140000000E34000__000000931B9A2710\n000000067F00004002000140000000E30CB1-000000067F00004002000140000000E39694__0000008D2DB5E0C1-0000008E6D15F1F1\n000000067F00004002000140000000E34000-000000067F00004002000140000000E38000__000000914E3F38F0\n000000067F00004002000140000000E34000-000000067F00004002000140000000E38000__000000931B9A2710\n000000067F00004002000140000000E38000-000000067F00004002000140000000E3C000__000000914E3F38F0\n000000067F00004002000140000000E38000-000000067F00004002000140000000E3C000__000000931B9A2710\n000000067F00004002000140000000E39694-000000067F00004002000140000000E42072__0000008D2DB5E0C1-0000008E6D15F1F1\n000000067F00004002000140000000E3C000-000000067F00004002000140000000E40000__000000914E3F38F0\n000000067F00004002000140000000E3C000-000000067F00004002000140000000E40000__000000931B9A2710\n000000067F00004002000140000000E40000-000000067F00004002000140000000E44000__000000914E3F38F0\n000000067F00004002000140000000E40000-000000067F00004002000140000000E44000__000000931B9A2710\n000000067F00004002000140000000E42072-000000067F00004002000140000000E4AA53__0000008D2DB5E0C1-0000008E6D15F1F1\n000000067F00004002000140000000E44000-000000067F00004002000140000000E48000__000000914E3F38F0\n000000067F00004002000140000000E44000-000000067F00004002000140000000E48000__000000931B9A2710\n000000067F00004002000140000000E48000-000000067F00004002000140000000E4C000__000000914E3F38F0\n000000067F00004002000140000000E48000-000000067F00004002000140000000E4C000__000000931B9A2710\n000000067F00004002000140000000E4AA53-000000067F00004002000140000000E53428__0000008D2DB5E0C1-0000008E6D15F1F1\n000000067F00004002000140000000E4C000-000000067F00004002000140000000E50000__000000914E3F38F0\n000000067F00004002000140000000E4C000-000000067F00004002000140000000E50000__000000931B9A2710\n000000067F00004002000140000000E50000-000000067F00004002000140000000E54000__000000914E3F38F0\n000000067F00004002000140000000E50000-000000067F00004002000140000000E54000__000000931B9A2710\n000000067F00004002000140000000E53428-000000067F00004002000140000000E5BDF8__0000008D2DB5E0C1-0000008E6D15F1F1\n000000067F00004002000140000000E54000-000000067F00004002000140000000E58000__000000914E3F38F0\n000000067F00004002000140000000E54000-000000067F00004002000140000000E58000__000000931B9A2710\n000000067F00004002000140000000E58000-000000067F00004002000140000000E5C000__000000914E3F38F0\n000000067F00004002000140000000E58000-000000067F00004002000140000000E5C000__000000931B9A2710\n000000067F00004002000140000000E5BDF8-000000067F00004002000140000000E647D2__0000008D2DB5E0C1-0000008E6D15F1F1\n000000067F00004002000140000000E5C000-000000067F00004002000140000000E60000__000000914E3F38F0\n000000067F00004002000140000000E5C000-000000067F00004002000140000000E60000__000000931B9A2710\n000000067F00004002000140000000E60000-000000067F00004002000140000000E64000__000000914E3F38F0\n000000067F00004002000140000000E60000-000000067F00004002000140000000E64000__000000931B9A2710\n000000067F00004002000140000000E64000-000000067F00004002000140000000E68000__000000914E3F38F0\n000000067F00004002000140000000E64000-000000067F00004002000140000000E68000__000000931B9A2710\n000000067F00004002000140000000E647D2-000000067F00004002000140000000E6D1B1__0000008D2DB5E0C1-0000008E6D15F1F1\n000000067F00004002000140000000E68000-000000067F00004002000140000000E6C000__000000914E3F38F0\n000000067F00004002000140000000E68000-000000067F00004002000140000000E6C000__000000931B9A2710\n000000067F00004002000140000000E6C000-000000067F00004002000140000000E70000__000000914E3F38F0\n000000067F00004002000140000000E6C000-000000067F00004002000140000000E70000__000000931B9A2710\n000000067F00004002000140000000E6D1B1-000000067F00004002000140000000E75B9C__0000008D2DB5E0C1-0000008E6D15F1F1\n000000067F00004002000140000000E70000-000000067F00004002000140000000E74000__000000914E3F38F0\n000000067F00004002000140000000E70000-000000067F00004002000140000000E74000__000000931B9A2710\n000000067F00004002000140000000E74000-000000067F00004002000140000000E78000__000000914E3F38F0\n000000067F00004002000140000000E74000-000000067F00004002000140000000E78000__000000931B9A2710\n000000067F00004002000140000000E75B9C-000000067F00004002000140000000E7E573__0000008D2DB5E0C1-0000008E6D15F1F1\n000000067F00004002000140000000E78000-000000067F00004002000140000000E7C000__000000914E3F38F0\n000000067F00004002000140000000E78000-000000067F00004002000140000000E7C000__000000931B9A2710\n000000067F00004002000140000000E7C000-000000067F00004002000140000000E80000__000000900A539398\n000000067F00004002000140000000E7C000-000000067F00004002000140000000E80000__000000914E3F38F0\n000000067F00004002000140000000E7C000-000000067F00004002000140000000E80000__000000931B9A2710\n000000067F00004002000140000000E7E573-000000067F00004002000140000200000000__0000008D2DB5E0C1-0000008E6D15F1F1\n000000067F00004002000140000000E7E99B-000000067F00004002000140000000E87389__0000008E6D15F1F1-0000008F0CC5C6B1\n000000067F00004002000140000000E80000-000000067F00004002000140000000E84000__000000900A539398\n000000067F00004002000140000000E80000-000000067F00004002000140000000E84000__000000914E3F38F0\n000000067F00004002000140000000E80000-000000067F00004002000140000000E84000__000000931B9A2710\n000000067F00004002000140000000E84000-000000067F00004002000140000000E88000__000000900A539398\n000000067F00004002000140000000E84000-000000067F00004002000140000000E88000__000000914E3F38F0\n000000067F00004002000140000000E84000-000000067F00004002000140000000E88000__000000931B9A2710\n000000067F00004002000140000000E87389-000000067F00004002000140000000E8FD63__0000008E6D15F1F1-0000008F0CC5C6B1\n000000067F00004002000140000000E88000-000000067F00004002000140000000E8C000__000000900A539398\n000000067F00004002000140000000E88000-000000067F00004002000140000000E8C000__000000914E3F38F0\n000000067F00004002000140000000E88000-000000067F00004002000140000000E8C000__000000931B9A2710\n000000067F00004002000140000000E8C000-000000067F00004002000140000000E90000__000000900A539398\n000000067F00004002000140000000E8C000-000000067F00004002000140000000E90000__000000914E3F38F0\n000000067F00004002000140000000E8C000-000000067F00004002000140000000E90000__000000931B9A2710\n000000067F00004002000140000000E8FD63-000000067F00004002000140000000E98735__0000008E6D15F1F1-0000008F0CC5C6B1\n000000067F00004002000140000000E90000-000000067F00004002000140000000E94000__000000900A539398\n000000067F00004002000140000000E90000-000000067F00004002000140000000E94000__000000914E3F38F0\n000000067F00004002000140000000E90000-000000067F00004002000140000000E94000__000000931B9A2710\n000000067F00004002000140000000E94000-000000067F00004002000140000000E98000__000000900A539398\n000000067F00004002000140000000E94000-000000067F00004002000140000000E98000__000000914E3F38F0\n000000067F00004002000140000000E94000-000000067F00004002000140000000E98000__000000931B9A2710\n000000067F00004002000140000000E98000-000000067F00004002000140000000E9C000__000000900A539398\n000000067F00004002000140000000E98000-000000067F00004002000140000000E9C000__000000914E3F38F0\n000000067F00004002000140000000E98000-000000067F00004002000140000000E9C000__000000931B9A2710\n000000067F00004002000140000000E98735-000000067F00004002000140000000EA1109__0000008E6D15F1F1-0000008F0CC5C6B1\n000000067F00004002000140000000E9C000-000000067F00004002000140000000EA0000__000000900A539398\n000000067F00004002000140000000E9C000-000000067F00004002000140000000EA0000__000000914E3F38F0\n000000067F00004002000140000000E9C000-000000067F00004002000140000000EA0000__000000931B9A2710\n000000067F00004002000140000000EA0000-000000067F00004002000140000000EA4000__000000900A539398\n000000067F00004002000140000000EA0000-000000067F00004002000140000000EA4000__000000914E3F38F0\n000000067F00004002000140000000EA0000-000000067F00004002000140000000EA4000__000000931B9A2710\n000000067F00004002000140000000EA1109-000000067F00004002000140000000EA9AE3__0000008E6D15F1F1-0000008F0CC5C6B1\n000000067F00004002000140000000EA4000-000000067F00004002000140000000EA8000__000000900A539398\n000000067F00004002000140000000EA4000-000000067F00004002000140000000EA8000__000000914E3F38F0\n000000067F00004002000140000000EA4000-000000067F00004002000140000000EA8000__000000931B9A2710\n000000067F00004002000140000000EA8000-000000067F00004002000140000000EAC000__000000914E3F38F0\n000000067F00004002000140000000EA8000-000000067F00004002000140000000EAC000__000000931B9A2710\n000000067F00004002000140000000EA8000-030000000000000000000000000000000002__0000008EBDA82990\n000000067F00004002000140000000EA9AE3-000000067F00004002000140000000EB24C6__0000008E6D15F1F1-0000008F0CC5C6B1\n000000067F00004002000140000000EAC000-000000067F00004002000140000000EB0000__000000914E3F38F0\n000000067F00004002000140000000EAC000-000000067F00004002000140000000EB0000__000000931B9A2710\n000000067F00004002000140000000EB0000-000000067F00004002000140000000EB4000__000000914E3F38F0\n000000067F00004002000140000000EB0000-000000067F00004002000140000000EB4000__000000931B9A2710\n000000067F00004002000140000000EB24C6-000000067F00004002000140000000EBAEA6__0000008E6D15F1F1-0000008F0CC5C6B1\n000000067F00004002000140000000EB4000-000000067F00004002000140000000EB8000__000000914E3F38F0\n000000067F00004002000140000000EB4000-000000067F00004002000140000000EB8000__000000931B9A2710\n000000067F00004002000140000000EB8000-000000067F00004002000140000000EBC000__000000914E3F38F0\n000000067F00004002000140000000EB8000-000000067F00004002000140000000EBC000__000000931B9A2710\n000000067F00004002000140000000EBAEA6-000000067F00004002000140000000EC3890__0000008E6D15F1F1-0000008F0CC5C6B1\n000000067F00004002000140000000EBC000-000000067F00004002000140000000EC0000__000000914E3F38F0\n000000067F00004002000140000000EBC000-000000067F00004002000140000000EC0000__000000931B9A2710\n000000067F00004002000140000000EC0000-000000067F00004002000140000000EC4000__000000914E3F38F0\n000000067F00004002000140000000EC0000-000000067F00004002000140000000EC4000__000000931B9A2710\n000000067F00004002000140000000EC3890-000000067F00004002000140000000ECC269__0000008E6D15F1F1-0000008F0CC5C6B1\n000000067F00004002000140000000EC4000-000000067F00004002000140000000EC8000__000000914E3F38F0\n000000067F00004002000140000000EC4000-000000067F00004002000140000000EC8000__000000931B9A2710\n000000067F00004002000140000000EC8000-000000067F00004002000140000000ECC000__000000914E3F38F0\n000000067F00004002000140000000EC8000-000000067F00004002000140000000ECC000__000000931B9A2710\n000000067F00004002000140000000ECC000-000000067F00004002000140000000ED0000__000000914E3F38F0\n000000067F00004002000140000000ECC000-000000067F00004002000140000000ED0000__000000931B9A2710\n000000067F00004002000140000000ECC269-000000067F00004002000140000000ED4C46__0000008E6D15F1F1-0000008F0CC5C6B1\n000000067F00004002000140000000ED0000-000000067F00004002000140000000ED4000__000000914E3F38F0\n000000067F00004002000140000000ED0000-000000067F00004002000140000000ED4000__000000931B9A2710\n000000067F00004002000140000000ED4000-000000067F00004002000140000000ED8000__000000900A539398\n000000067F00004002000140000000ED4000-000000067F00004002000140000000ED8000__000000914E3F38F0\n000000067F00004002000140000000ED4000-000000067F00004002000140000000ED8000__000000931B9A2710\n000000067F00004002000140000000ED4C46-000000067F00004002000140000200000000__0000008E6D15F1F1-0000008F0CC5C6B1\n000000067F00004002000140000000ED4EBC-000000067F00004002000140000000EDD899__0000008F0CC5C6B1-0000008FAC75E259\n000000067F00004002000140000000ED8000-000000067F00004002000140000000EDC000__000000900A539398\n000000067F00004002000140000000ED8000-000000067F00004002000140000000EDC000__000000914E3F38F0\n000000067F00004002000140000000ED8000-000000067F00004002000140000000EDC000__000000931B9A2710\n000000067F00004002000140000000EDC000-000000067F00004002000140000000EE0000__000000900A539398\n000000067F00004002000140000000EDC000-000000067F00004002000140000000EE0000__000000914E3F38F0\n000000067F00004002000140000000EDC000-000000067F00004002000140000000EE0000__000000931B9A2710\n000000067F00004002000140000000EDD899-000000067F00004002000140000000EE6278__0000008F0CC5C6B1-0000008FAC75E259\n000000067F00004002000140000000EE0000-000000067F00004002000140000000EE4000__000000900A539398\n000000067F00004002000140000000EE0000-000000067F00004002000140000000EE4000__000000914E3F38F0\n000000067F00004002000140000000EE0000-000000067F00004002000140000000EE4000__000000931B9A2710\n000000067F00004002000140000000EE4000-000000067F00004002000140000000EE8000__000000900A539398\n000000067F00004002000140000000EE4000-000000067F00004002000140000000EE8000__000000914E3F38F0\n000000067F00004002000140000000EE4000-000000067F00004002000140000000EE8000__000000931B9A2710\n000000067F00004002000140000000EE6278-000000067F00004002000140000000EEEC50__0000008F0CC5C6B1-0000008FAC75E259\n000000067F00004002000140000000EE8000-000000067F00004002000140000000EEC000__000000900A539398\n000000067F00004002000140000000EE8000-000000067F00004002000140000000EEC000__000000914E3F38F0\n000000067F00004002000140000000EE8000-000000067F00004002000140000000EEC000__000000931B9A2710\n000000067F00004002000140000000EEC000-000000067F00004002000140000000EF0000__000000900A539398\n000000067F00004002000140000000EEC000-000000067F00004002000140000000EF0000__000000914E3F38F0\n000000067F00004002000140000000EEC000-000000067F00004002000140000000EF0000__000000931B9A2710\n000000067F00004002000140000000EEEC50-000000067F00004002000140000000EF7623__0000008F0CC5C6B1-0000008FAC75E259\n000000067F00004002000140000000EF0000-000000067F00004002000140000000EF4000__000000900A539398\n000000067F00004002000140000000EF0000-000000067F00004002000140000000EF4000__000000914E3F38F0\n000000067F00004002000140000000EF0000-000000067F00004002000140000000EF4000__000000931B9A2710\n000000067F00004002000140000000EF4000-000000067F00004002000140000000EF8000__000000900A539398\n000000067F00004002000140000000EF4000-000000067F00004002000140000000EF8000__000000914E3F38F0\n000000067F00004002000140000000EF4000-000000067F00004002000140000000EF8000__000000931B9A2710\n000000067F00004002000140000000EF7623-000000067F00004002000140000000EFFFFA__0000008F0CC5C6B1-0000008FAC75E259\n000000067F00004002000140000000EF8000-000000067F00004002000140000000EFC000__000000900A539398\n000000067F00004002000140000000EF8000-000000067F00004002000140000000EFC000__000000914E3F38F0\n000000067F00004002000140000000EF8000-000000067F00004002000140000000EFC000__000000931B9A2710\n000000067F00004002000140000000EFC000-000000067F00004002000140000000F00000__000000900A539398\n000000067F00004002000140000000EFC000-000000067F00004002000140000000F00000__000000914E3F38F0\n000000067F00004002000140000000EFC000-000000067F00004002000140000000F00000__000000931B9A2710\n000000067F00004002000140000000EFFFFA-000000067F00004002000140000000F089E5__0000008F0CC5C6B1-0000008FAC75E259\n000000067F00004002000140000000F00000-000000067F00004002000140000000F04000__000000900A539398\n000000067F00004002000140000000F00000-000000067F00004002000140000000F04000__000000914E3F38F0\n000000067F00004002000140000000F00000-000000067F00004002000140000000F04000__000000931B9A2710\n000000067F00004002000140000000F04000-000000067F00004002000140000000F08000__000000900A539398\n000000067F00004002000140000000F04000-000000067F00004002000140000000F08000__000000914E3F38F0\n000000067F00004002000140000000F04000-000000067F00004002000140000000F08000__000000931B9A2710\n000000067F00004002000140000000F08000-000000067F00004002000140000000F0C000__000000900A539398\n000000067F00004002000140000000F08000-000000067F00004002000140000000F0C000__000000914E3F38F0\n000000067F00004002000140000000F08000-000000067F00004002000140000000F0C000__000000931B9A2710\n000000067F00004002000140000000F089E5-000000067F00004002000140000000F113CD__0000008F0CC5C6B1-0000008FAC75E259\n000000067F00004002000140000000F0C000-000000067F00004002000140000000F10000__000000900A539398\n000000067F00004002000140000000F0C000-000000067F00004002000140000000F10000__000000914E3F38F0\n000000067F00004002000140000000F0C000-000000067F00004002000140000000F10000__000000931B9A2710\n000000067F00004002000140000000F10000-000000067F00004002000140000000F14000__000000900A539398\n000000067F00004002000140000000F10000-000000067F00004002000140000000F14000__000000914E3F38F0\n000000067F00004002000140000000F10000-000000067F00004002000140000000F14000__000000931B9A2710\n000000067F00004002000140000000F113CD-000000067F00004002000140000000F19DA8__0000008F0CC5C6B1-0000008FAC75E259\n000000067F00004002000140000000F14000-000000067F00004002000140000000F18000__000000900A539398\n000000067F00004002000140000000F14000-000000067F00004002000140000000F18000__000000914E3F38F0\n000000067F00004002000140000000F14000-000000067F00004002000140000000F18000__000000931B9A2710\n000000067F00004002000140000000F18000-000000067F00004002000140000000F1C000__000000900A539398\n000000067F00004002000140000000F18000-000000067F00004002000140000000F1C000__000000914E3F38F0\n000000067F00004002000140000000F18000-000000067F00004002000140000000F1C000__000000931B9A2710\n000000067F00004002000140000000F19DA8-000000067F00004002000140000000F22786__0000008F0CC5C6B1-0000008FAC75E259\n000000067F00004002000140000000F1C000-000000067F00004002000140000000F20000__000000900A539398\n000000067F00004002000140000000F1C000-000000067F00004002000140000000F20000__000000914E3F38F0\n000000067F00004002000140000000F1C000-000000067F00004002000140000000F20000__000000931B9A2710\n000000067F00004002000140000000F20000-000000067F00004002000140000000F24000__000000900A539398\n000000067F00004002000140000000F20000-000000067F00004002000140000000F24000__000000914E3F38F0\n000000067F00004002000140000000F20000-000000067F00004002000140000000F24000__000000931B9A2710\n000000067F00004002000140000000F22786-000000067F00004002000140000000F2B162__0000008F0CC5C6B1-0000008FAC75E259\n000000067F00004002000140000000F24000-000000067F00004002000140000000F28000__000000900A539398\n000000067F00004002000140000000F24000-000000067F00004002000140000000F28000__000000914E3F38F0\n000000067F00004002000140000000F24000-000000067F00004002000140000000F28000__000000931B9A2710\n000000067F00004002000140000000F28000-000000067F00004002000140000000F2C000__000000900A539398\n000000067F00004002000140000000F28000-000000067F00004002000140000000F2C000__000000914E3F38F0\n000000067F00004002000140000000F28000-000000067F00004002000140000000F2C000__000000931B9A2710\n000000067F00004002000140000000F2B162-000000067F00004002000140000200000000__0000008F0CC5C6B1-0000008FAC75E259\n000000067F00004002000140000000F2C000-000000067F00004002000140000000F30000__000000900A539398\n000000067F00004002000140000000F2C000-000000067F00004002000140000000F30000__000000914E3F38F0\n000000067F00004002000140000000F2C000-000000067F00004002000140000000F30000__000000931B9A2710\n000000067F00004002000140000000F30000-000000067F00004002000140000000F34000__000000900A539398\n000000067F00004002000140000000F30000-000000067F00004002000140000000F34000__000000914E3F38F0\n000000067F00004002000140000000F30000-000000067F00004002000140000000F34000__000000931B9A2710\n000000067F00004002000140000000F32D01-000000067F00004002000140000000F3B6CF__0000008FAC75E259-000000900BB52179\n000000067F00004002000140000000F34000-000000067F00004002000140000000F38000__000000900A539398\n000000067F00004002000140000000F34000-000000067F00004002000140000000F38000__000000914E3F38F0\n000000067F00004002000140000000F34000-000000067F00004002000140000000F38000__000000931B9A2710\n000000067F00004002000140000000F38000-000000067F00004002000140000000F3C000__000000900A539398\n000000067F00004002000140000000F38000-000000067F00004002000140000000F3C000__000000914E3F38F0\n000000067F00004002000140000000F38000-000000067F00004002000140000000F3C000__000000931B9A2710\n000000067F00004002000140000000F3B6CF-000000067F00004002000140000000F440B8__0000008FAC75E259-000000900BB52179\n000000067F00004002000140000000F3C000-000000067F00004002000140000000F40000__000000900A539398\n000000067F00004002000140000000F3C000-000000067F00004002000140000000F40000__000000914E3F38F0\n000000067F00004002000140000000F3C000-000000067F00004002000140000000F40000__000000931B9A2710\n000000067F00004002000140000000F40000-000000067F00004002000140000000F44000__000000900A539398\n000000067F00004002000140000000F40000-000000067F00004002000140000000F44000__000000914E3F38F0\n000000067F00004002000140000000F40000-000000067F00004002000140000000F44000__000000931B9A2710\n000000067F00004002000140000000F44000-000000067F00004002000140000000F48000__000000900A539398\n000000067F00004002000140000000F44000-000000067F00004002000140000000F48000__000000914E3F38F0\n000000067F00004002000140000000F44000-000000067F00004002000140000000F48000__000000931B9A2710\n000000067F00004002000140000000F440B8-000000067F00004002000140000000F4CA9B__0000008FAC75E259-000000900BB52179\n000000067F00004002000140000000F48000-000000067F00004002000140000000F4C000__000000900A539398\n000000067F00004002000140000000F48000-000000067F00004002000140000000F4C000__000000914E3F38F0\n000000067F00004002000140000000F48000-000000067F00004002000140000000F4C000__000000931B9A2710\n000000067F00004002000140000000F4C000-000000067F00004002000140000000F50000__000000900A539398\n000000067F00004002000140000000F4C000-000000067F00004002000140000000F50000__000000914E3F38F0\n000000067F00004002000140000000F4C000-000000067F00004002000140000000F50000__000000931B9A2710\n000000067F00004002000140000000F4CA9B-000000067F00004002000140000000F55479__0000008FAC75E259-000000900BB52179\n000000067F00004002000140000000F50000-000000067F00004002000140000000F54000__000000900A539398\n000000067F00004002000140000000F50000-000000067F00004002000140000000F54000__000000914E3F38F0\n000000067F00004002000140000000F50000-000000067F00004002000140000000F54000__000000931B9A2710\n000000067F00004002000140000000F54000-000000067F00004002000140000000F58000__000000900A539398\n000000067F00004002000140000000F54000-000000067F00004002000140000000F58000__000000914E3F38F0\n000000067F00004002000140000000F54000-000000067F00004002000140000000F58000__000000931B9A2710\n000000067F00004002000140000000F55479-000000067F00004002000140000000F5DE56__0000008FAC75E259-000000900BB52179\n000000067F00004002000140000000F58000-000000067F00004002000140000000F5C000__000000900A539398\n000000067F00004002000140000000F58000-000000067F00004002000140000000F5C000__000000914E3F38F0\n000000067F00004002000140000000F58000-000000067F00004002000140000000F5C000__000000931B9A2710\n000000067F00004002000140000000F5C000-000000067F00004002000140040100000000__000000914E3F38F0\n000000067F00004002000140000000F5C000-000000067F00004002000140040100000000__000000931B9A2710\n000000067F00004002000140000000F5C000-030000000000000000000000000000000002__000000900A539398\n000000067F00004002000140000000F5DE56-030000000000000000000000000000000002__0000008FAC75E259-000000900BB52179\n000000067F000040020001400000FFFFFFFF-000000067F00004002000140000100000000__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000140000100000000-000000067F00004002000140000100000DEC__000000739A920D71-0000008D2DB5E0C1\n000000067F000040020001400001FFFFFFFF-000000067F00004002000140000200000000__000000739A962E10-0000007F75893CE8\n000000067F000040020001400001FFFFFFFF-000000067F00004002000140000200000000__0000007F75893CE8-0000008B40129080\n000000067F000040020001400001FFFFFFFF-000000067F00004002000140000200000000__0000008B40129080-0000008D2DB5E0C1\n000000067F000040020001400300FFFFFFFF-030000000000000000000000000000000002__000000739A920D71-0000008D2DB5E0C1\n000000067F00004002000160000000000000-000000067F00004002000160000000004000__000000914E3F38F0\n000000067F00004002000160000000000000-000000067F00004002000160000000004000__000000931B9A2710\n000000067F00004002000160000000004000-000000067F00004002000160000000008000__000000914E3F38F0\n000000067F00004002000160000000004000-000000067F00004002000160000000008000__000000931B9A2710\n000000067F00004002000160000000007F7A-000000067F0000400200016000000000FEFD__000000900BB52179-0000009046EDA719\n000000067F00004002000160000000008000-000000067F0000400200016000000000C000__000000914E3F38F0\n000000067F00004002000160000000008000-000000067F0000400200016000000000C000__000000931B9A2710\n000000067F0000400200016000000000C000-000000067F00004002000160000000010000__000000914E3F38F0\n000000067F0000400200016000000000C000-000000067F00004002000160000000010000__000000931B9A2710\n000000067F0000400200016000000000FEFD-000000067F00004002000160000000017E80__000000900BB52179-0000009046EDA719\n000000067F00004002000160000000010000-000000067F00004002000160000000014000__000000914E3F38F0\n000000067F00004002000160000000010000-000000067F00004002000160000000014000__000000931B9A2710\n000000067F00004002000160000000014000-000000067F00004002000160000000018000__000000914E3F38F0\n000000067F00004002000160000000014000-000000067F00004002000160000000018000__000000931B9A2710\n000000067F00004002000160000000017E80-000000067F0000400200016000000001FE03__000000900BB52179-0000009046EDA719\n000000067F00004002000160000000018000-000000067F0000400200016000000001C000__000000914E3F38F0\n000000067F00004002000160000000018000-000000067F0000400200016000000001C000__000000931B9A2710\n000000067F0000400200016000000001C000-000000067F00004002000160000000020000__000000914E3F38F0\n000000067F0000400200016000000001C000-000000067F00004002000160000000020000__000000931B9A2710\n000000067F0000400200016000000001FE03-000000067F00004002000160000000027D86__000000900BB52179-0000009046EDA719\n000000067F00004002000160000000020000-000000067F00004002000160000000024000__000000914E3F38F0\n000000067F00004002000160000000020000-000000067F00004002000160000000024000__000000931B9A2710\n000000067F00004002000160000000024000-000000067F00004002000160000000028000__000000914E3F38F0\n000000067F00004002000160000000024000-000000067F00004002000160000000028000__000000931B9A2710\n000000067F00004002000160000000027D86-000000067F0000400200016000000002FD09__000000900BB52179-0000009046EDA719\n000000067F00004002000160000000028000-000000067F0000400200016000000002C000__000000914E3F38F0\n000000067F00004002000160000000028000-000000067F0000400200016000000002C000__000000931B9A2710\n000000067F0000400200016000000002C000-000000067F00004002000160000000030000__000000914E3F38F0\n000000067F0000400200016000000002C000-000000067F00004002000160000000030000__000000931B9A2710\n000000067F0000400200016000000002FD09-030000000000000000000000000000000002__000000900BB52179-0000009046EDA719\n000000067F00004002000160000000030000-000000067F00004002000160000000034000__000000914E3F38F0\n000000067F00004002000160000000030000-000000067F00004002000160000000034000__000000931B9A2710\n000000067F00004002000160000000034000-000000067F00004002000160000000038000__000000914E3F38F0\n000000067F00004002000160000000034000-000000067F00004002000160000000038000__000000931B9A2710\n000000067F00004002000160000000037E1D-000000067F0000400200016000000003FDA0__0000009046EDA719-000000914E3FE031\n000000067F00004002000160000000038000-000000067F0000400200016000000003C000__000000914E3F38F0\n000000067F00004002000160000000038000-000000067F0000400200016000000003C000__000000931B9A2710\n000000067F0000400200016000000003C000-000000067F00004002000160000000040000__000000914E3F38F0\n000000067F0000400200016000000003C000-000000067F00004002000160000000040000__000000931B9A2710\n000000067F0000400200016000000003FDA0-000000067F00004002000160000000047D23__0000009046EDA719-000000914E3FE031\n000000067F00004002000160000000040000-000000067F00004002000160000000044000__000000914E3F38F0\n000000067F00004002000160000000040000-000000067F00004002000160000000044000__000000931B9A2710\n000000067F00004002000160000000044000-000000067F00004002000160000000048000__000000914E3F38F0\n000000067F00004002000160000000044000-000000067F00004002000160000000048000__000000931B9A2710\n000000067F00004002000160000000047D23-000000067F0000400200016000000004FCA6__0000009046EDA719-000000914E3FE031\n000000067F00004002000160000000048000-000000067F0000400200016000000004C000__000000914E3F38F0\n000000067F00004002000160000000048000-000000067F0000400200016000000004C000__000000931B9A2710\n000000067F0000400200016000000004C000-000000067F00004002000160000000050000__000000914E3F38F0\n000000067F0000400200016000000004C000-000000067F00004002000160000000050000__000000931B9A2710\n000000067F0000400200016000000004FCA6-000000067F00004002000160000000057C29__0000009046EDA719-000000914E3FE031\n000000067F00004002000160000000050000-000000067F00004002000160000000054000__000000914E3F38F0\n000000067F00004002000160000000050000-000000067F00004002000160000000054000__000000931B9A2710\n000000067F00004002000160000000054000-000000067F00004002000160000000058000__000000914E3F38F0\n000000067F00004002000160000000054000-000000067F00004002000160000000058000__000000931B9A2710\n000000067F00004002000160000000057C29-000000067F0000400200016000000005FBAC__0000009046EDA719-000000914E3FE031\n000000067F00004002000160000000058000-000000067F0000400200016000000005C000__000000914E3F38F0\n000000067F00004002000160000000058000-000000067F0000400200016000000005C000__000000931B9A2710\n000000067F0000400200016000000005C000-000000067F00004002000160000000060000__000000914E3F38F0\n000000067F0000400200016000000005C000-000000067F00004002000160000000060000__000000931B9A2710\n000000067F0000400200016000000005FBAC-000000067F00004002000160000000067B2F__0000009046EDA719-000000914E3FE031\n000000067F00004002000160000000060000-000000067F00004002000160000000064000__000000914E3F38F0\n000000067F00004002000160000000060000-000000067F00004002000160000000064000__000000931B9A2710\n000000067F00004002000160000000064000-000000067F00004002000160000000068000__000000914E3F38F0\n000000067F00004002000160000000064000-000000067F00004002000160000000068000__000000931B9A2710\n000000067F00004002000160000000067B2F-000000067F0000400200016000000006FAB2__0000009046EDA719-000000914E3FE031\n000000067F00004002000160000000068000-000000067F0000400200016000000006C000__000000914E3F38F0\n000000067F00004002000160000000068000-000000067F0000400200016000000006C000__000000931B9A2710\n000000067F0000400200016000000006C000-000000067F00004002000160000000070000__000000914E3F38F0\n000000067F0000400200016000000006C000-000000067F00004002000160000000070000__000000931B9A2710\n000000067F0000400200016000000006FAB2-000000067F00004002000160000000077A35__0000009046EDA719-000000914E3FE031\n000000067F00004002000160000000070000-000000067F00004002000160000000074000__000000914E3F38F0\n000000067F00004002000160000000070000-000000067F00004002000160000000074000__000000931B9A2710\n000000067F00004002000160000000074000-000000067F00004002000160000000078000__000000914E3F38F0\n000000067F00004002000160000000074000-000000067F00004002000160000000078000__000000931B9A2710\n000000067F00004002000160000000077A35-000000067F0000400200016000000007F9B8__0000009046EDA719-000000914E3FE031\n000000067F00004002000160000000078000-000000067F0000400200016000000007C000__000000914E3F38F0\n000000067F00004002000160000000078000-000000067F0000400200016000000007C000__000000931B9A2710\n000000067F0000400200016000000007C000-000000067F00004002000160000000080000__000000914E3F38F0\n000000067F0000400200016000000007C000-000000067F00004002000160000000080000__000000931B9A2710\n000000067F0000400200016000000007F9B8-000000067F0000400200016000000008793B__0000009046EDA719-000000914E3FE031\n000000067F00004002000160000000080000-000000067F00004002000160000000084000__000000914E3F38F0\n000000067F00004002000160000000080000-000000067F00004002000160000000084000__000000931B9A2710\n000000067F00004002000160000000084000-000000067F00004002000160000000088000__000000914E3F38F0\n000000067F00004002000160000000084000-000000067F00004002000160000000088000__000000931B9A2710\n000000067F0000400200016000000008793B-000000067F0000400200016000000008F8BE__0000009046EDA719-000000914E3FE031\n000000067F00004002000160000000088000-000000067F0000400200016000000008C000__000000914E3F38F0\n000000067F00004002000160000000088000-000000067F0000400200016000000008C000__000000931B9A2710\n000000067F0000400200016000000008C000-000000067F00004002000160000000090000__000000914E3F38F0\n000000067F0000400200016000000008C000-000000067F00004002000160000000090000__000000931B9A2710\n000000067F0000400200016000000008F8BE-000000067F00004002000160000000097841__0000009046EDA719-000000914E3FE031\n000000067F00004002000160000000090000-000000067F00004002000160000000094000__000000914E3F38F0\n000000067F00004002000160000000090000-000000067F00004002000160000000094000__000000931B9A2710\n000000067F00004002000160000000094000-000000067F00004002000160000000098000__000000914E3F38F0\n000000067F00004002000160000000094000-000000067F00004002000160000000098000__000000931B9A2710\n000000067F00004002000160000000097841-000000067F0000400200016000000009F7C4__0000009046EDA719-000000914E3FE031\n000000067F00004002000160000000098000-000000067F0000400200016000000009C000__000000914E3F38F0\n000000067F00004002000160000000098000-000000067F0000400200016000000009C000__000000931B9A2710\n000000067F0000400200016000000009C000-000000067F000040020001600000000A0000__000000914E3F38F0\n000000067F0000400200016000000009C000-000000067F000040020001600000000A0000__000000931B9A2710\n000000067F0000400200016000000009F7C4-000000067F000040020001600000000A7747__0000009046EDA719-000000914E3FE031\n000000067F000040020001600000000A0000-000000067F000040020001600000000A4000__000000914E3F38F0\n000000067F000040020001600000000A0000-000000067F000040020001600000000A4000__000000931B9A2710\n000000067F000040020001600000000A4000-000000067F000040020001600000000A8000__000000914E3F38F0\n000000067F000040020001600000000A4000-000000067F000040020001600000000A8000__000000931B9A2710\n000000067F000040020001600000000A7747-000000067F000040020001600000000AF6CA__0000009046EDA719-000000914E3FE031\n000000067F000040020001600000000A8000-000000067F000040020001600000000AC000__000000914E3F38F0\n000000067F000040020001600000000A8000-000000067F000040020001600000000AC000__000000931B9A2710\n000000067F000040020001600000000AC000-000000067F000040020001600000000B0000__000000914E3F38F0\n000000067F000040020001600000000AC000-000000067F000040020001600000000B0000__000000931B9A2710\n000000067F000040020001600000000AF6CA-000000067F000040020001600000000B764D__0000009046EDA719-000000914E3FE031\n000000067F000040020001600000000B0000-000000067F000040020001600000000B4000__000000914E3F38F0\n000000067F000040020001600000000B0000-000000067F000040020001600000000B4000__000000931B9A2710\n000000067F000040020001600000000B4000-000000067F000040020001600000000B8000__000000914E3F38F0\n000000067F000040020001600000000B4000-000000067F000040020001600000000B8000__000000931B9A2710\n000000067F000040020001600000000B764D-000000067F000040020001600000000BF5D0__0000009046EDA719-000000914E3FE031\n000000067F000040020001600000000B8000-000000067F000040020001600000000BC000__000000914E3F38F0\n000000067F000040020001600000000B8000-000000067F000040020001600000000BC000__000000931B9A2710\n000000067F000040020001600000000BC000-000000067F000040020001600000000C0000__000000914E3F38F0\n000000067F000040020001600000000BC000-000000067F000040020001600000000C0000__000000931B9A2710\n000000067F000040020001600000000BF5D0-000000067F000040020001600000000C7553__0000009046EDA719-000000914E3FE031\n000000067F000040020001600000000C0000-000000067F000040020001600000000C4000__000000914E3F38F0\n000000067F000040020001600000000C0000-000000067F000040020001600000000C4000__000000931B9A2710\n000000067F000040020001600000000C4000-000000067F000040020001600000000C8000__000000914E3F38F0\n000000067F000040020001600000000C4000-000000067F000040020001600000000C8000__000000931B9A2710\n000000067F000040020001600000000C7553-000000067F000040020001600000000CF4D6__0000009046EDA719-000000914E3FE031\n000000067F000040020001600000000C8000-000000067F000040020001600000000CC000__000000914E3F38F0\n000000067F000040020001600000000C8000-000000067F000040020001600000000CC000__000000931B9A2710\n000000067F000040020001600000000CC000-000000067F000040020001600000000D0000__000000914E3F38F0\n000000067F000040020001600000000CC000-000000067F000040020001600000000D0000__000000931B9A2710\n000000067F000040020001600000000CF4D6-000000067F000040020001600000000D7459__0000009046EDA719-000000914E3FE031\n000000067F000040020001600000000D0000-000000067F000040020001600000000D4000__000000914E3F38F0\n000000067F000040020001600000000D0000-000000067F000040020001600000000D4000__000000931B9A2710\n000000067F000040020001600000000D4000-000000067F000040020001600000000D8000__000000914E3F38F0\n000000067F000040020001600000000D4000-000000067F000040020001600000000D8000__000000931B9A2710\n000000067F000040020001600000000D7459-000000067F000040020001600000000DF3DC__0000009046EDA719-000000914E3FE031\n000000067F000040020001600000000D8000-000000067F000040020001600000000DC000__000000914E3F38F0\n000000067F000040020001600000000D8000-000000067F000040020001600000000DC000__000000931B9A2710\n000000067F000040020001600000000DC000-000000067F000040020001600000000E0000__000000914E3F38F0\n000000067F000040020001600000000DC000-000000067F000040020001600000000E0000__000000931B9A2710\n000000067F000040020001600000000DF3DC-000000067F000040020001600000000E735F__0000009046EDA719-000000914E3FE031\n000000067F000040020001600000000E0000-000000067F000040020001600000000E4000__000000914E3F38F0\n000000067F000040020001600000000E0000-000000067F000040020001600000000E4000__000000931B9A2710\n000000067F000040020001600000000E4000-000000067F000040020001600000000E8000__000000914E3F38F0\n000000067F000040020001600000000E4000-000000067F000040020001600000000E8000__000000931B9A2710\n000000067F000040020001600000000E735F-000000067F000040020001600000000EF2E2__0000009046EDA719-000000914E3FE031\n000000067F000040020001600000000E8000-000000067F000040020001600000000EC000__000000914E3F38F0\n000000067F000040020001600000000E8000-000000067F000040020001600000000EC000__000000931B9A2710\n000000067F000040020001600000000EC000-000000067F000040020001600000000F0000__000000914E3F38F0\n000000067F000040020001600000000EC000-000000067F000040020001600000000F0000__000000931B9A2710\n000000067F000040020001600000000EF2E2-000000067F000040020001600000000F7265__0000009046EDA719-000000914E3FE031\n000000067F000040020001600000000F0000-000000067F000040020001600000000F4000__000000914E3F38F0\n000000067F000040020001600000000F0000-000000067F000040020001600000000F4000__000000931B9A2710\n000000067F000040020001600000000F4000-000000067F000040020001600000000F8000__000000914E3F38F0\n000000067F000040020001600000000F4000-000000067F000040020001600000000F8000__000000931B9A2710\n000000067F000040020001600000000F7265-000000067F000040020001600000000FF1E8__0000009046EDA719-000000914E3FE031\n000000067F000040020001600000000F8000-000000067F000040020001600000000FC000__000000914E3F38F0\n000000067F000040020001600000000F8000-000000067F000040020001600000000FC000__000000931B9A2710\n000000067F000040020001600000000FC000-000000067F00004002000160000000100000__000000914E3F38F0\n000000067F000040020001600000000FC000-000000067F00004002000160000000100000__000000931B9A2710\n000000067F000040020001600000000FF1E8-000000067F0000400200016000000010716B__0000009046EDA719-000000914E3FE031\n000000067F00004002000160000000100000-000000067F00004002000160000000104000__000000914E3F38F0\n000000067F00004002000160000000100000-000000067F00004002000160000000104000__000000931B9A2710\n000000067F00004002000160000000104000-000000067F00004002000160000000108000__000000914E3F38F0\n000000067F00004002000160000000104000-000000067F00004002000160000000108000__000000931B9A2710\n000000067F0000400200016000000010716B-030000000000000000000000000000000002__0000009046EDA719-000000914E3FE031\n000000067F00004002000160000000108000-000000067F0000400200016000000010C000__000000914E3F38F0\n000000067F00004002000160000000108000-000000067F0000400200016000000010C000__000000931B9A2710\n000000067F0000400200016000000010C000-000000067F00004002000160000100000000__000000931B9A2710\n000000067F0000400200016000000010C000-030000000000000000000000000000000002__000000914E3F38F0\n000000067F00004002000180000000000000-000000067F00004002000180000000004000__000000931B9A2710\n000000067F00004002000180000000004000-000000067F00004002000180000000008000__000000931B9A2710\n000000067F00004002000180000000007F7A-000000067F0000400200018000000000FEFD__000000914E3FE031-000000919CCE8B21\n000000067F00004002000180000000008000-000000067F0000400200018000000000C000__000000931B9A2710\n000000067F0000400200018000000000C000-000000067F00004002000180000000010000__000000931B9A2710\n000000067F0000400200018000000000FEFD-000000067F00004002000180000000017E80__000000914E3FE031-000000919CCE8B21\n000000067F00004002000180000000010000-000000067F00004002000180000000014000__000000931B9A2710\n000000067F00004002000180000000014000-000000067F00004002000180000000018000__000000931B9A2710\n000000067F00004002000180000000017E80-000000067F0000400200018000000001FE03__000000914E3FE031-000000919CCE8B21\n000000067F00004002000180000000018000-000000067F0000400200018000000001C000__000000931B9A2710\n000000067F0000400200018000000001C000-000000067F00004002000180000000020000__000000931B9A2710\n000000067F0000400200018000000001FE03-000000067F00004002000180000000027D86__000000914E3FE031-000000919CCE8B21\n000000067F00004002000180000000020000-000000067F00004002000180000000024000__000000931B9A2710\n000000067F00004002000180000000024000-000000067F00004002000180000000028000__000000931B9A2710\n000000067F00004002000180000000027D86-000000067F0000400200018000000002FD09__000000914E3FE031-000000919CCE8B21\n000000067F00004002000180000000028000-000000067F0000400200018000000002C000__000000931B9A2710\n000000067F0000400200018000000002C000-000000067F00004002000180000000030000__000000931B9A2710\n000000067F0000400200018000000002FD09-000000067F00004002000180000000037C8C__000000914E3FE031-000000919CCE8B21\n000000067F00004002000180000000030000-000000067F00004002000180000000034000__000000931B9A2710\n000000067F00004002000180000000034000-000000067F00004002000180000000038000__000000931B9A2710\n000000067F00004002000180000000037C8C-000000067F0000400200018000000003FC0F__000000914E3FE031-000000919CCE8B21\n000000067F00004002000180000000038000-000000067F0000400200018000000003C000__000000931B9A2710\n000000067F0000400200018000000003C000-000000067F00004002000180000000040000__000000926240EF70\n000000067F0000400200018000000003C000-000000067F00004002000180000000040000__000000931B9AFDF8\n000000067F0000400200018000000003FC0F-030000000000000000000000000000000002__000000914E3FE031-000000919CCE8B21\n000000067F0000400200018000000003FE20-000000067F00004002000180000000047DA3__000000919CCE8B21-000000921B6384B9\n000000067F00004002000180000000040000-000000067F00004002000180000000044000__000000926240EF70\n000000067F00004002000180000000040000-000000067F00004002000180000000044000__000000931B9AFDF8\n000000067F00004002000180000000044000-000000067F00004002000180000000048000__000000926240EF70\n000000067F00004002000180000000044000-000000067F00004002000180000000048000__000000931B9AFDF8\n000000067F00004002000180000000047DA3-000000067F0000400200018000000004FD26__000000919CCE8B21-000000921B6384B9\n000000067F00004002000180000000048000-000000067F0000400200018000000004C000__000000926240EF70\n000000067F00004002000180000000048000-000000067F0000400200018000000004C000__000000931B9AFDF8\n000000067F0000400200018000000004C000-000000067F00004002000180000000050000__000000926240EF70\n000000067F0000400200018000000004C000-000000067F00004002000180000000050000__000000931B9AFDF8\n000000067F0000400200018000000004FD26-000000067F00004002000180000000057CA9__000000919CCE8B21-000000921B6384B9\n000000067F00004002000180000000050000-000000067F00004002000180000000054000__000000926240EF70\n000000067F00004002000180000000050000-000000067F00004002000180000000054000__000000931B9AFDF8\n000000067F00004002000180000000054000-000000067F00004002000180000000058000__000000926240EF70\n000000067F00004002000180000000054000-000000067F00004002000180000000058000__000000931B9AFDF8\n000000067F00004002000180000000057CA9-000000067F0000400200018000000005FC2C__000000919CCE8B21-000000921B6384B9\n000000067F00004002000180000000058000-000000067F0000400200018000000005C000__000000926240EF70\n000000067F00004002000180000000058000-000000067F0000400200018000000005C000__000000931B9AFDF8\n000000067F0000400200018000000005C000-000000067F00004002000180000000060000__000000926240EF70\n000000067F0000400200018000000005C000-000000067F00004002000180000000060000__000000931B9AFDF8\n000000067F0000400200018000000005FC2C-000000067F00004002000180000000067BAF__000000919CCE8B21-000000921B6384B9\n000000067F00004002000180000000060000-000000067F00004002000180000000064000__000000926240EF70\n000000067F00004002000180000000060000-000000067F00004002000180000000064000__000000931B9AFDF8\n000000067F00004002000180000000064000-000000067F00004002000180000000068000__000000926240EF70\n000000067F00004002000180000000064000-000000067F00004002000180000000068000__000000931B9AFDF8\n000000067F00004002000180000000067BAF-000000067F0000400200018000000006FB32__000000919CCE8B21-000000921B6384B9\n000000067F00004002000180000000068000-000000067F0000400200018000000006C000__000000926240EF70\n000000067F00004002000180000000068000-000000067F0000400200018000000006C000__000000931B9AFDF8\n000000067F0000400200018000000006C000-000000067F00004002000180000000070000__000000926240EF70\n000000067F0000400200018000000006C000-000000067F00004002000180000000070000__000000931B9AFDF8\n000000067F0000400200018000000006FB32-000000067F00004002000180000000077AB5__000000919CCE8B21-000000921B6384B9\n000000067F00004002000180000000070000-000000067F00004002000180000000074000__000000926240EF70\n000000067F00004002000180000000070000-000000067F00004002000180000000074000__000000931B9AFDF8\n000000067F00004002000180000000074000-000000067F00004002000180000000078000__000000926240EF70\n000000067F00004002000180000000074000-000000067F00004002000180000000078000__000000931B9AFDF8\n000000067F00004002000180000000077AB5-000000067F0000400200018000000007FA38__000000919CCE8B21-000000921B6384B9\n000000067F00004002000180000000078000-000000067F0000400200018000000007C000__000000926240EF70\n000000067F00004002000180000000078000-000000067F0000400200018000000007C000__000000931B9AFDF8\n000000067F0000400200018000000007C000-000000067F00004002000180000000080000__000000926240EF70\n000000067F0000400200018000000007C000-000000067F00004002000180000000080000__000000931B9AFDF8\n000000067F0000400200018000000007FA38-000000067F000040020001800000000879BB__000000919CCE8B21-000000921B6384B9\n000000067F00004002000180000000080000-000000067F00004002000180000000084000__000000926240EF70\n000000067F00004002000180000000080000-000000067F00004002000180000000084000__000000931B9AFDF8\n000000067F00004002000180000000084000-000000067F00004002000180000000088000__000000926240EF70\n000000067F00004002000180000000084000-000000067F00004002000180000000088000__000000931B9AFDF8\n000000067F000040020001800000000879BB-000000067F0000400200018000000008F93E__000000919CCE8B21-000000921B6384B9\n000000067F00004002000180000000088000-000000067F0000400200018000000008C000__000000926240EF70\n000000067F00004002000180000000088000-000000067F0000400200018000000008C000__000000931B9AFDF8\n000000067F0000400200018000000008C000-000000067F00004002000180000000090000__000000926240EF70\n000000067F0000400200018000000008C000-000000067F00004002000180000000090000__000000931B9AFDF8\n000000067F0000400200018000000008F93E-000000067F000040020001800000000978C1__000000919CCE8B21-000000921B6384B9\n000000067F00004002000180000000090000-000000067F00004002000180000000094000__000000926240EF70\n000000067F00004002000180000000090000-000000067F00004002000180000000094000__000000931B9AFDF8\n000000067F00004002000180000000094000-000000067F00004002000180000000098000__000000926240EF70\n000000067F00004002000180000000094000-000000067F00004002000180000000098000__000000931B9AFDF8\n000000067F000040020001800000000978C1-000000067F0000400200018000000009F844__000000919CCE8B21-000000921B6384B9\n000000067F00004002000180000000098000-000000067F0000400200018000000009C000__000000926240EF70\n000000067F00004002000180000000098000-000000067F0000400200018000000009C000__000000931B9AFDF8\n000000067F0000400200018000000009C000-000000067F000040020001800000000A0000__000000926240EF70\n000000067F0000400200018000000009C000-000000067F000040020001800000000A0000__000000931B9AFDF8\n000000067F0000400200018000000009F844-000000067F000040020001800000000A77C7__000000919CCE8B21-000000921B6384B9\n000000067F000040020001800000000A0000-000000067F000040020001800000000A4000__000000926240EF70\n000000067F000040020001800000000A0000-000000067F000040020001800000000A4000__000000931B9AFDF8\n000000067F000040020001800000000A4000-000000067F000040020001800000000A8000__000000926240EF70\n000000067F000040020001800000000A4000-000000067F000040020001800000000A8000__000000931B9A2710\n000000067F000040020001800000000A77C7-000000067F00004002000180000100000000__000000919CCE8B21-000000921B6384B9\n000000067F000040020001800000000A7AE0-000000067F000040020001800000000AFA63__000000921B6384B9-00000092D346E5E9\n000000067F000040020001800000000A8000-000000067F000040020001800000000AC000__000000926240EF70\n000000067F000040020001800000000A8000-000000067F000040020001800000000AC000__000000931B9A2710\n000000067F000040020001800000000AC000-000000067F000040020001800000000B0000__000000926240EF70\n000000067F000040020001800000000AC000-000000067F000040020001800000000B0000__000000931B9A2710\n000000067F000040020001800000000AFA63-000000067F000040020001800000000B79E6__000000921B6384B9-00000092D346E5E9\n000000067F000040020001800000000B0000-000000067F000040020001800000000B4000__000000926240EF70\n000000067F000040020001800000000B0000-000000067F000040020001800000000B4000__000000931B9A2710\n000000067F000040020001800000000B4000-000000067F000040020001800000000B8000__000000926240EF70\n000000067F000040020001800000000B4000-000000067F000040020001800000000B8000__000000931B9A2710\n000000067F000040020001800000000B79E6-000000067F000040020001800000000BF969__000000921B6384B9-00000092D346E5E9\n000000067F000040020001800000000B8000-000000067F000040020001800000000BC000__000000926240EF70\n000000067F000040020001800000000B8000-000000067F000040020001800000000BC000__000000931B9A2710\n000000067F000040020001800000000BC000-000000067F000040020001800000000C0000__000000926240EF70\n000000067F000040020001800000000BC000-000000067F000040020001800000000C0000__000000931B9A2710\n000000067F000040020001800000000BF969-000000067F000040020001800000000C78EC__000000921B6384B9-00000092D346E5E9\n000000067F000040020001800000000C0000-000000067F000040020001800000000C4000__000000926240EF70\n000000067F000040020001800000000C0000-000000067F000040020001800000000C4000__000000931B9A2710\n000000067F000040020001800000000C4000-000000067F000040020001800000000C8000__000000926240EF70\n000000067F000040020001800000000C4000-000000067F000040020001800000000C8000__000000931B9A2710\n000000067F000040020001800000000C78EC-000000067F000040020001800000000CF86F__000000921B6384B9-00000092D346E5E9\n000000067F000040020001800000000C8000-000000067F000040020001800000000CC000__000000926240EF70\n000000067F000040020001800000000C8000-000000067F000040020001800000000CC000__000000931B9A2710\n000000067F000040020001800000000CC000-000000067F000040020001800000000D0000__000000926240EF70\n000000067F000040020001800000000CC000-000000067F000040020001800000000D0000__000000931B9A2710\n000000067F000040020001800000000CF86F-000000067F000040020001800000000D77F2__000000921B6384B9-00000092D346E5E9\n000000067F000040020001800000000D0000-000000067F000040020001800000000D4000__000000926240EF70\n000000067F000040020001800000000D0000-000000067F000040020001800000000D4000__000000931B9A2710\n000000067F000040020001800000000D4000-000000067F000040020001800000000D8000__000000926240EF70\n000000067F000040020001800000000D4000-000000067F000040020001800000000D8000__000000931B9A2710\n000000067F000040020001800000000D77F2-000000067F000040020001800000000DF775__000000921B6384B9-00000092D346E5E9\n000000067F000040020001800000000D8000-000000067F000040020001800000000DC000__000000926240EF70\n000000067F000040020001800000000D8000-000000067F000040020001800000000DC000__000000931B9A2710\n000000067F000040020001800000000DC000-000000067F000040020001800000000E0000__000000926240EF70\n000000067F000040020001800000000DC000-000000067F000040020001800000000E0000__000000931B9A2710\n000000067F000040020001800000000DF775-000000067F000040020001800000000E76F8__000000921B6384B9-00000092D346E5E9\n000000067F000040020001800000000E0000-000000067F000040020001800000000E4000__000000926240EF70\n000000067F000040020001800000000E0000-000000067F000040020001800000000E4000__000000931B9A2710\n000000067F000040020001800000000E4000-000000067F000040020001800000000E8000__000000931B9A2710\n000000067F000040020001800000000E4000-030000000000000000000000000000000002__000000926240EF70\n000000067F000040020001800000000E76F8-000000067F000040020001800000000EF67B__000000921B6384B9-00000092D346E5E9\n000000067F000040020001800000000E8000-000000067F000040020001800000000EC000__000000931B9A2710\n000000067F000040020001800000000EC000-000000067F000040020001800000000F0000__000000931B9A2710\n000000067F000040020001800000000EF67B-000000067F000040020001800000000F75FE__000000921B6384B9-00000092D346E5E9\n000000067F000040020001800000000F0000-000000067F000040020001800000000F4000__000000931B9A2710\n000000067F000040020001800000000F4000-000000067F000040020001800000000F8000__000000931B9A2710\n000000067F000040020001800000000F75FE-000000067F000040020001800000000FF581__000000921B6384B9-00000092D346E5E9\n000000067F000040020001800000000F8000-000000067F000040020001800000000FC000__000000931B9A2710\n000000067F000040020001800000000FC000-000000067F00004002000180000000100000__000000931B9A2710\n000000067F000040020001800000000FF581-000000067F00004002000180000000107504__000000921B6384B9-00000092D346E5E9\n000000067F00004002000180000000100000-000000067F00004002000180000000104000__000000931B9A2710\n000000067F00004002000180000000104000-000000067F00004002000180000000108000__000000931B9A2710\n000000067F00004002000180000000107504-000000067F0000400200018000000010F487__000000921B6384B9-00000092D346E5E9\n000000067F00004002000180000000108000-000000067F0000400200018000000010C000__000000931B9A2710\n000000067F0000400200018000000010C000-000000067F00004002000180000000110000__000000931B9A2710\n000000067F0000400200018000000010F487-000000067F0000400200018000000011740A__000000921B6384B9-00000092D346E5E9\n000000067F00004002000180000000110000-000000067F00004002000180000000114000__000000931B9A2710\n000000067F00004002000180000000114000-000000067F00004002000180000000118000__000000931B9A2710\n000000067F0000400200018000000011740A-000000067F0000400200018000000011F38D__000000921B6384B9-00000092D346E5E9\n000000067F00004002000180000000118000-000000067F0000400200018000000011C000__000000931B9A2710\n000000067F0000400200018000000011C000-000000067F00004002000180000000120000__000000931B9A2710\n000000067F0000400200018000000011F38D-000000067F00004002000180000000127310__000000921B6384B9-00000092D346E5E9\n000000067F00004002000180000000120000-000000067F00004002000180000000124000__000000931B9A2710\n000000067F00004002000180000000124000-000000067F00004002000180000000128000__000000931B9A2710\n000000067F00004002000180000000127310-000000067F0000400200018000000012F293__000000921B6384B9-00000092D346E5E9\n000000067F00004002000180000000128000-000000067F0000400200018000000012C000__000000931B9A2710\n000000067F0000400200018000000012C000-000000067F00004002000180000000130000__000000931B9A2710\n000000067F0000400200018000000012F293-000000067F00004002000180000000137216__000000921B6384B9-00000092D346E5E9\n000000067F00004002000180000000130000-000000067F00004002000180000000134000__000000931B9A2710\n000000067F00004002000180000000134000-000000067F00004002000180000000138000__000000931B9A2710\n000000067F00004002000180000000137216-000000067F0000400200018000000013F199__000000921B6384B9-00000092D346E5E9\n000000067F00004002000180000000138000-000000067F0000400200018000000013C000__000000931B9A2710\n000000067F0000400200018000000013C000-000000067F00004002000180000000140000__000000931B9A2710\n000000067F0000400200018000000013F199-000000067F0000400200018000000014711C__000000921B6384B9-00000092D346E5E9\n000000067F00004002000180000000140000-000000067F00004002000180000000144000__000000931B9A2710\n000000067F00004002000180000000144000-000000067F00004002000180000000148000__000000931B9A2710\n000000067F0000400200018000000014711C-000000067F00004002000180000100000000__000000921B6384B9-00000092D346E5E9\n000000067F00004002000180000000148000-000000067F0000400200018000000014C000__000000931B9A2710\n000000067F0000400200018000000014C000-000000067F00004002000180000000150000__000000931B9A2710\n000000067F0000400200018000000014F52F-000000067F000040020001800000001574B2__00000092D346E5E9-000000931B991E09\n000000067F00004002000180000000150000-000000067F00004002000180000000154000__000000931B9A2710\n000000067F00004002000180000000154000-000000067F00004002000180000000158000__000000931B9A2710\n000000067F000040020001800000001574B2-000000067F0000400200018000000015F435__00000092D346E5E9-000000931B991E09\n000000067F00004002000180000000158000-000000067F0000400200018000000015C000__000000931B9A2710\n000000067F0000400200018000000015C000-000000067F00004002000180000000160000__000000931B9A2710\n000000067F0000400200018000000015F435-000000067F000040020001800000001673B8__00000092D346E5E9-000000931B991E09\n000000067F00004002000180000000160000-000000067F00004002000180000000164000__000000931B9A2710\n000000067F00004002000180000000164000-000000067F00004002000180000000168000__000000931B9A2710\n000000067F000040020001800000001673B8-000000067F0000400200018000000016F33B__00000092D346E5E9-000000931B991E09\n000000067F00004002000180000000168000-000000067F0000400200018000000016C000__000000931B9A2710\n000000067F0000400200018000000016C000-000000067F00004002000180000000170000__000000931B9A2710\n000000067F0000400200018000000016F33B-000000067F000040020001800000001772BE__00000092D346E5E9-000000931B991E09\n000000067F00004002000180000000170000-000000067F00004002000180000000174000__000000931B9A2710\n000000067F00004002000180000000174000-000000067F00004002000180000000178000__000000931B9A2710\n000000067F000040020001800000001772BE-000000067F0000400200018000000017F241__00000092D346E5E9-000000931B991E09\n000000067F00004002000180000000178000-000000067F0000400200018000000017C000__000000931B9A2710\n000000067F0000400200018000000017C000-000000067F00004002000180000000180000__000000931B9A2710\n000000067F0000400200018000000017F241-030000000000000000000000000000000002__00000092D346E5E9-000000931B991E09\n000000067F00004002000180000000180000-000000067F00004002000180000000184000__000000931B9A2710\n000000067F00004002000180000000184000-030000000000000000000000000000000002__000000931B9A2710\n"
  },
  {
    "path": "pageserver/benches/upload_queue.rs",
    "content": "//! Upload queue benchmarks.\n\nuse std::str::FromStr as _;\nuse std::sync::Arc;\nuse std::sync::atomic::AtomicU32;\n\nuse criterion::{Bencher, Criterion, criterion_group, criterion_main};\nuse pageserver::tenant::IndexPart;\nuse pageserver::tenant::metadata::TimelineMetadata;\nuse pageserver::tenant::remote_timeline_client::index::LayerFileMetadata;\nuse pageserver::tenant::storage_layer::LayerName;\nuse pageserver::tenant::upload_queue::{Delete, UploadOp, UploadQueue, UploadTask};\nuse pprof::criterion::{Output, PProfProfiler};\nuse utils::generation::Generation;\nuse utils::shard::{ShardCount, ShardIndex, ShardNumber};\n\n// Register benchmarks with Criterion.\ncriterion_group!(\n    name = benches;\n    config = Criterion::default().with_profiler(PProfProfiler::new(100, Output::Flamegraph(None)));\n    targets = bench_upload_queue_next_ready,\n);\ncriterion_main!(benches);\n\n/// Benchmarks the cost of UploadQueue::next_ready() with the given number of in-progress tasks\n/// (which is equivalent to tasks ahead of it in the queue). This has linear cost, and the upload\n/// queue as a whole is thus quadratic.\n///\n/// UploadOp::UploadLayer requires an entire tenant and timeline to construct, so we just test\n/// Delete and UploadMetadata instead. This is incidentally the most expensive case.\nfn bench_upload_queue_next_ready(c: &mut Criterion) {\n    let mut g = c.benchmark_group(\"upload_queue_next_ready\");\n    for inprogress in [0, 1, 10, 100, 1_000, 10_000, 100_000, 1_000_000] {\n        g.bench_function(format!(\"inprogress={inprogress}\"), |b| {\n            run_bench(b, inprogress).unwrap()\n        });\n    }\n\n    fn run_bench(b: &mut Bencher, inprogress: usize) -> anyhow::Result<()> {\n        // Construct two layers. layer0 is in the indexes, layer1 will be deleted.\n        let layer0 = LayerName::from_str(\"000000000000000000000000000000000000-100000000000000000000000000000000000__00000000016B59D8-00000000016B5A51\").expect(\"invalid name\");\n        let layer1 = LayerName::from_str(\"100000000000000000000000000000000001-200000000000000000000000000000000000__00000000016B59D8-00000000016B5A51\").expect(\"invalid name\");\n\n        let metadata = LayerFileMetadata {\n            shard: ShardIndex::new(ShardNumber(1), ShardCount(2)),\n            generation: Generation::Valid(1),\n            file_size: 0,\n        };\n\n        // Construct the (initial and uploaded) index with layer0.\n        let mut index = IndexPart::empty(TimelineMetadata::example());\n        index.layer_metadata.insert(layer0, metadata.clone());\n\n        // Construct the queue.\n        let mut queue = UploadQueue::Uninitialized;\n        let queue = queue.initialize_with_current_remote_index_part(&index, 0)?;\n\n        // Populate inprogress_tasks with a bunch of layer1 deletions.\n        let delete = UploadOp::Delete(Delete {\n            layers: vec![(layer1, metadata)],\n        });\n\n        for task_id in 0..(inprogress as u64) {\n            queue.inprogress_tasks.insert(\n                task_id,\n                Arc::new(UploadTask {\n                    task_id,\n                    retries: AtomicU32::new(0),\n                    op: delete.clone(),\n                    coalesced_ops: Vec::new(),\n                }),\n            );\n        }\n\n        // Benchmark index upload scheduling.\n        let index_upload = UploadOp::UploadMetadata {\n            uploaded: Box::new(index),\n        };\n\n        b.iter(|| {\n            queue.queued_operations.push_front(index_upload.clone());\n            assert!(queue.next_ready().is_some());\n        });\n\n        Ok(())\n    }\n}\n"
  },
  {
    "path": "pageserver/client/Cargo.toml",
    "content": "[package]\nname = \"pageserver_client\"\nversion = \"0.1.0\"\nedition.workspace = true\nlicense.workspace = true\n\n[features]\ntesting = [ \"pageserver_api/testing\" ]\n\n[dependencies]\npageserver_api.workspace = true\nthiserror.workspace = true\nreqwest = { workspace = true, features = [ \"stream\" ] }\nhttp-utils.workspace = true\nutils.workspace = true\nserde.workspace = true\nworkspace_hack = { version = \"0.1\", path = \"../../workspace_hack\" }\ntokio-postgres.workspace = true\ntokio-stream.workspace = true\ntokio.workspace = true\npostgres_versioninfo.workspace = true\nfutures.workspace = true\ntokio-util.workspace = true\nanyhow.workspace = true\nbytes.workspace = true\n"
  },
  {
    "path": "pageserver/client/src/lib.rs",
    "content": "pub mod mgmt_api;\npub mod page_service;\n\n/// For timeline_block_unblock_gc, distinguish the two different operations. This could be a bool.\n// If file structure is per-kind not per-feature then where to put this?\n#[derive(Clone, Copy)]\npub enum BlockUnblock {\n    Block,\n    Unblock,\n}\n\nimpl std::fmt::Display for BlockUnblock {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        let s = match self {\n            BlockUnblock::Block => \"block\",\n            BlockUnblock::Unblock => \"unblock\",\n        };\n        f.write_str(s)\n    }\n}\n"
  },
  {
    "path": "pageserver/client/src/mgmt_api/util.rs",
    "content": "//! Helpers to do common higher-level tasks with the [`Client`].\n\nuse std::sync::Arc;\n\nuse pageserver_api::shard::TenantShardId;\nuse tokio::task::JoinSet;\nuse utils::id::{TenantId, TenantTimelineId};\n\nuse super::Client;\n\n/// Retrieve a list of all of the pageserver's timelines.\n///\n/// Fails if there are sharded tenants present on the pageserver.\npub async fn get_pageserver_tenant_timelines_unsharded(\n    api_client: &Arc<Client>,\n) -> anyhow::Result<Vec<TenantTimelineId>> {\n    let mut timelines: Vec<TenantTimelineId> = Vec::new();\n    let mut tenants: Vec<TenantId> = Vec::new();\n    for ti in api_client.list_tenants().await? {\n        if !ti.id.is_unsharded() {\n            anyhow::bail!(\n                \"only unsharded tenants are supported at this time: {}\",\n                ti.id\n            );\n        }\n        tenants.push(ti.id.tenant_id)\n    }\n    let mut js = JoinSet::new();\n    for tenant_id in tenants {\n        js.spawn({\n            let mgmt_api_client = Arc::clone(api_client);\n            async move {\n                (\n                    tenant_id,\n                    mgmt_api_client\n                        .tenant_details(TenantShardId::unsharded(tenant_id))\n                        .await\n                        .unwrap(),\n                )\n            }\n        });\n    }\n    while let Some(res) = js.join_next().await {\n        let (tenant_id, details) = res.unwrap();\n        for timeline_id in details.timelines {\n            timelines.push(TenantTimelineId {\n                tenant_id,\n                timeline_id,\n            });\n        }\n    }\n    Ok(timelines)\n}\n"
  },
  {
    "path": "pageserver/client/src/mgmt_api.rs",
    "content": "use std::collections::{BTreeMap, HashMap};\nuse std::error::Error as _;\nuse std::time::Duration;\n\nuse bytes::Bytes;\nuse detach_ancestor::AncestorDetached;\nuse http_utils::error::HttpErrorBody;\nuse pageserver_api::models::*;\nuse pageserver_api::shard::TenantShardId;\nuse postgres_versioninfo::PgMajorVersion;\npub use reqwest::Body as ReqwestBody;\nuse reqwest::{IntoUrl, Method, StatusCode, Url};\nuse utils::id::{TenantId, TimelineId};\nuse utils::lsn::Lsn;\n\nuse crate::BlockUnblock;\n\npub mod util;\n\n#[derive(Debug, Clone)]\npub struct Client {\n    mgmt_api_endpoint: String,\n    authorization_header: Option<String>,\n    client: reqwest::Client,\n}\n\n#[derive(thiserror::Error, Debug)]\npub enum Error {\n    #[error(\"send request: {0}{}\", .0.source().map(|e| format!(\": {e}\")).unwrap_or_default())]\n    SendRequest(reqwest::Error),\n\n    #[error(\"receive body: {0}{}\", .0.source().map(|e| format!(\": {e}\")).unwrap_or_default())]\n    ReceiveBody(reqwest::Error),\n\n    #[error(\"receive error body: {0}\")]\n    ReceiveErrorBody(String),\n\n    #[error(\"pageserver API: {1}\")]\n    ApiError(StatusCode, String),\n\n    #[error(\"Cancelled\")]\n    Cancelled,\n\n    #[error(\"request timed out: {0}\")]\n    Timeout(String),\n}\n\npub type Result<T> = std::result::Result<T, Error>;\n\npub trait ResponseErrorMessageExt: Sized {\n    fn error_from_body(self) -> impl std::future::Future<Output = Result<Self>> + Send;\n}\n\nimpl ResponseErrorMessageExt for reqwest::Response {\n    async fn error_from_body(self) -> Result<Self> {\n        let status = self.status();\n        if !(status.is_client_error() || status.is_server_error()) {\n            return Ok(self);\n        }\n\n        let url = self.url().to_owned();\n        Err(match self.json::<HttpErrorBody>().await {\n            Ok(HttpErrorBody { msg }) => Error::ApiError(status, msg),\n            Err(_) => {\n                Error::ReceiveErrorBody(format!(\"Http error ({}) at {}.\", status.as_u16(), url))\n            }\n        })\n    }\n}\n\npub enum ForceAwaitLogicalSize {\n    Yes,\n    No,\n}\n\nimpl Client {\n    pub fn new(client: reqwest::Client, mgmt_api_endpoint: String, jwt: Option<&str>) -> Self {\n        Self {\n            mgmt_api_endpoint,\n            authorization_header: jwt.map(|jwt| format!(\"Bearer {jwt}\")),\n            client,\n        }\n    }\n\n    pub async fn list_tenants(&self) -> Result<Vec<pageserver_api::models::TenantInfo>> {\n        let uri = format!(\"{}/v1/tenant\", self.mgmt_api_endpoint);\n        let resp = self.get(&uri).await?;\n        resp.json().await.map_err(Error::ReceiveBody)\n    }\n\n    /// Send an HTTP request to an arbitrary path with a desired HTTP method and returning a streaming\n    /// Response.  This function is suitable for pass-through/proxy use cases where we don't care\n    /// what the response content looks like.\n    ///\n    /// Use/add one of the properly typed methods below if you know aren't proxying, and\n    /// know what kind of response you expect.\n    pub async fn op_raw(&self, method: Method, path: String) -> Result<reqwest::Response> {\n        debug_assert!(path.starts_with('/'));\n        let uri = format!(\"{}{}\", self.mgmt_api_endpoint, path);\n\n        let mut req = self.client.request(method, uri);\n        if let Some(value) = &self.authorization_header {\n            req = req.header(reqwest::header::AUTHORIZATION, value);\n        }\n        req.send().await.map_err(Error::ReceiveBody)\n    }\n\n    pub async fn tenant_details(\n        &self,\n        tenant_shard_id: TenantShardId,\n    ) -> Result<pageserver_api::models::TenantDetails> {\n        let uri = format!(\"{}/v1/tenant/{tenant_shard_id}\", self.mgmt_api_endpoint);\n        self.get(uri)\n            .await?\n            .json()\n            .await\n            .map_err(Error::ReceiveBody)\n    }\n\n    pub async fn list_timelines(\n        &self,\n        tenant_shard_id: TenantShardId,\n    ) -> Result<Vec<pageserver_api::models::TimelineInfo>> {\n        let uri = format!(\n            \"{}/v1/tenant/{tenant_shard_id}/timeline\",\n            self.mgmt_api_endpoint\n        );\n        self.get(&uri)\n            .await?\n            .json()\n            .await\n            .map_err(Error::ReceiveBody)\n    }\n\n    pub async fn timeline_info(\n        &self,\n        tenant_shard_id: TenantShardId,\n        timeline_id: TimelineId,\n        force_await_logical_size: ForceAwaitLogicalSize,\n    ) -> Result<pageserver_api::models::TimelineInfo> {\n        let uri = format!(\n            \"{}/v1/tenant/{tenant_shard_id}/timeline/{timeline_id}\",\n            self.mgmt_api_endpoint\n        );\n\n        let uri = match force_await_logical_size {\n            ForceAwaitLogicalSize::Yes => format!(\"{}?force-await-logical-size={}\", uri, true),\n            ForceAwaitLogicalSize::No => uri,\n        };\n\n        self.get(&uri)\n            .await?\n            .json()\n            .await\n            .map_err(Error::ReceiveBody)\n    }\n\n    pub async fn keyspace(\n        &self,\n        tenant_shard_id: TenantShardId,\n        timeline_id: TimelineId,\n    ) -> Result<pageserver_api::models::partitioning::Partitioning> {\n        let uri = format!(\n            \"{}/v1/tenant/{tenant_shard_id}/timeline/{timeline_id}/keyspace\",\n            self.mgmt_api_endpoint\n        );\n        self.get(&uri)\n            .await?\n            .json()\n            .await\n            .map_err(Error::ReceiveBody)\n    }\n\n    async fn get<U: IntoUrl>(&self, uri: U) -> Result<reqwest::Response> {\n        self.request(Method::GET, uri, ()).await\n    }\n\n    fn start_request<U: reqwest::IntoUrl>(\n        &self,\n        method: Method,\n        uri: U,\n    ) -> reqwest::RequestBuilder {\n        let req = self.client.request(method, uri);\n        if let Some(value) = &self.authorization_header {\n            req.header(reqwest::header::AUTHORIZATION, value)\n        } else {\n            req\n        }\n    }\n\n    async fn request_noerror<B: serde::Serialize, U: reqwest::IntoUrl>(\n        &self,\n        method: Method,\n        uri: U,\n        body: B,\n    ) -> Result<reqwest::Response> {\n        self.start_request(method, uri)\n            .json(&body)\n            .send()\n            .await\n            .map_err(Error::ReceiveBody)\n    }\n\n    async fn request<B: serde::Serialize, U: reqwest::IntoUrl>(\n        &self,\n        method: Method,\n        uri: U,\n        body: B,\n    ) -> Result<reqwest::Response> {\n        let res = self.request_noerror(method, uri, body).await?;\n        let response = res.error_from_body().await?;\n        Ok(response)\n    }\n\n    pub async fn status(&self) -> Result<()> {\n        let uri = format!(\"{}/v1/status\", self.mgmt_api_endpoint);\n        self.get(&uri).await?;\n        Ok(())\n    }\n\n    /// The tenant deletion API can return 202 if deletion is incomplete, or\n    /// 404 if it is complete.  Callers are responsible for checking the status\n    /// code and retrying.  Error codes other than 404 will return Err().\n    pub async fn tenant_delete(&self, tenant_shard_id: TenantShardId) -> Result<StatusCode> {\n        let uri = format!(\"{}/v1/tenant/{tenant_shard_id}\", self.mgmt_api_endpoint);\n\n        match self.request(Method::DELETE, &uri, ()).await {\n            Err(Error::ApiError(status_code, msg)) => {\n                if status_code == StatusCode::NOT_FOUND {\n                    Ok(StatusCode::NOT_FOUND)\n                } else {\n                    Err(Error::ApiError(status_code, msg))\n                }\n            }\n            Err(e) => Err(e),\n            Ok(response) => Ok(response.status()),\n        }\n    }\n\n    pub async fn tenant_time_travel_remote_storage(\n        &self,\n        tenant_shard_id: TenantShardId,\n        timestamp: &str,\n        done_if_after: &str,\n    ) -> Result<()> {\n        let uri = format!(\n            \"{}/v1/tenant/{tenant_shard_id}/time_travel_remote_storage?travel_to={timestamp}&done_if_after={done_if_after}\",\n            self.mgmt_api_endpoint\n        );\n        self.request(Method::PUT, &uri, ()).await?;\n        Ok(())\n    }\n\n    pub async fn tenant_timeline_compact(\n        &self,\n        tenant_shard_id: TenantShardId,\n        timeline_id: TimelineId,\n        force_image_layer_creation: bool,\n        must_force_image_layer_creation: bool,\n        scheduled: bool,\n        wait_until_done: bool,\n    ) -> Result<()> {\n        let mut path = reqwest::Url::parse(&format!(\n            \"{}/v1/tenant/{tenant_shard_id}/timeline/{timeline_id}/compact\",\n            self.mgmt_api_endpoint\n        ))\n        .expect(\"Cannot build URL\");\n\n        if force_image_layer_creation {\n            path.query_pairs_mut()\n                .append_pair(\"force_image_layer_creation\", \"true\");\n        }\n\n        if must_force_image_layer_creation {\n            path.query_pairs_mut()\n                .append_pair(\"must_force_image_layer_creation\", \"true\");\n        }\n\n        if scheduled {\n            path.query_pairs_mut().append_pair(\"scheduled\", \"true\");\n        }\n        if wait_until_done {\n            path.query_pairs_mut()\n                .append_pair(\"wait_until_scheduled_compaction_done\", \"true\");\n            path.query_pairs_mut()\n                .append_pair(\"wait_until_uploaded\", \"true\");\n        }\n        self.request(Method::PUT, path, ()).await?;\n        Ok(())\n    }\n\n    /* BEGIN_HADRON */\n    pub async fn tenant_timeline_describe(\n        &self,\n        tenant_shard_id: &TenantShardId,\n        timeline_id: &TimelineId,\n    ) -> Result<TimelineInfo> {\n        let mut path = reqwest::Url::parse(&format!(\n            \"{}/v1/tenant/{tenant_shard_id}/timeline/{timeline_id}\",\n            self.mgmt_api_endpoint\n        ))\n        .expect(\"Cannot build URL\");\n        path.query_pairs_mut()\n            .append_pair(\"include-image-consistent-lsn\", \"true\");\n\n        let response: reqwest::Response = self.request(Method::GET, path, ()).await?;\n        let body = response.json().await.map_err(Error::ReceiveBody)?;\n        Ok(body)\n    }\n\n    pub async fn list_tenant_visible_size(&self) -> Result<BTreeMap<TenantShardId, u64>> {\n        let uri = format!(\"{}/v1/list_tenant_visible_size\", self.mgmt_api_endpoint);\n        let resp = self.get(&uri).await?;\n        resp.json().await.map_err(Error::ReceiveBody)\n    }\n    /* END_HADRON */\n\n    pub async fn tenant_scan_remote_storage(\n        &self,\n        tenant_id: TenantId,\n    ) -> Result<TenantScanRemoteStorageResponse> {\n        let uri = format!(\n            \"{}/v1/tenant/{tenant_id}/scan_remote_storage\",\n            self.mgmt_api_endpoint\n        );\n        let response = self.request(Method::GET, &uri, ()).await?;\n        let body = response.json().await.map_err(Error::ReceiveBody)?;\n        Ok(body)\n    }\n\n    pub async fn set_tenant_config(&self, req: &TenantConfigRequest) -> Result<()> {\n        let uri = format!(\"{}/v1/tenant/config\", self.mgmt_api_endpoint);\n        self.request(Method::PUT, &uri, req).await?;\n        Ok(())\n    }\n\n    pub async fn patch_tenant_config(&self, req: &TenantConfigPatchRequest) -> Result<()> {\n        let uri = format!(\"{}/v1/tenant/config\", self.mgmt_api_endpoint);\n        self.request(Method::PATCH, &uri, req).await?;\n        Ok(())\n    }\n\n    pub async fn tenant_secondary_download(\n        &self,\n        tenant_id: TenantShardId,\n        wait: Option<std::time::Duration>,\n    ) -> Result<(StatusCode, SecondaryProgress)> {\n        let mut path = reqwest::Url::parse(&format!(\n            \"{}/v1/tenant/{}/secondary/download\",\n            self.mgmt_api_endpoint, tenant_id\n        ))\n        .expect(\"Cannot build URL\");\n\n        if let Some(wait) = wait {\n            path.query_pairs_mut()\n                .append_pair(\"wait_ms\", &format!(\"{}\", wait.as_millis()));\n        }\n\n        let response = self.request(Method::POST, path, ()).await?;\n        let status = response.status();\n        let progress: SecondaryProgress = response.json().await.map_err(Error::ReceiveBody)?;\n        Ok((status, progress))\n    }\n\n    pub async fn tenant_secondary_status(\n        &self,\n        tenant_shard_id: TenantShardId,\n    ) -> Result<SecondaryProgress> {\n        let path = reqwest::Url::parse(&format!(\n            \"{}/v1/tenant/{}/secondary/status\",\n            self.mgmt_api_endpoint, tenant_shard_id\n        ))\n        .expect(\"Cannot build URL\");\n\n        self.request(Method::GET, path, ())\n            .await?\n            .json()\n            .await\n            .map_err(Error::ReceiveBody)\n    }\n\n    pub async fn tenant_heatmap_upload(&self, tenant_id: TenantShardId) -> Result<()> {\n        let path = reqwest::Url::parse(&format!(\n            \"{}/v1/tenant/{}/heatmap_upload\",\n            self.mgmt_api_endpoint, tenant_id\n        ))\n        .expect(\"Cannot build URL\");\n\n        self.request(Method::POST, path, ()).await?;\n        Ok(())\n    }\n\n    pub async fn location_config(\n        &self,\n        tenant_shard_id: TenantShardId,\n        config: LocationConfig,\n        flush_ms: Option<std::time::Duration>,\n        lazy: bool,\n    ) -> Result<()> {\n        let req_body = TenantLocationConfigRequest { config };\n\n        let mut path = reqwest::Url::parse(&format!(\n            \"{}/v1/tenant/{}/location_config\",\n            self.mgmt_api_endpoint, tenant_shard_id\n        ))\n        // Should always work: mgmt_api_endpoint is configuration, not user input.\n        .expect(\"Cannot build URL\");\n\n        if lazy {\n            path.query_pairs_mut().append_pair(\"lazy\", \"true\");\n        }\n\n        if let Some(flush_ms) = flush_ms {\n            path.query_pairs_mut()\n                .append_pair(\"flush_ms\", &format!(\"{}\", flush_ms.as_millis()));\n        }\n\n        self.request(Method::PUT, path, &req_body).await?;\n        Ok(())\n    }\n\n    pub async fn list_location_config(&self) -> Result<LocationConfigListResponse> {\n        let path = format!(\"{}/v1/location_config\", self.mgmt_api_endpoint);\n        self.request(Method::GET, &path, ())\n            .await?\n            .json()\n            .await\n            .map_err(Error::ReceiveBody)\n    }\n\n    pub async fn get_location_config(\n        &self,\n        tenant_shard_id: TenantShardId,\n    ) -> Result<Option<LocationConfig>> {\n        let path = format!(\n            \"{}/v1/location_config/{tenant_shard_id}\",\n            self.mgmt_api_endpoint\n        );\n        self.request(Method::GET, &path, ())\n            .await?\n            .json()\n            .await\n            .map_err(Error::ReceiveBody)\n    }\n\n    pub async fn timeline_create(\n        &self,\n        tenant_shard_id: TenantShardId,\n        req: &TimelineCreateRequest,\n    ) -> Result<TimelineInfo> {\n        let uri = format!(\n            \"{}/v1/tenant/{}/timeline\",\n            self.mgmt_api_endpoint, tenant_shard_id\n        );\n        self.request(Method::POST, &uri, req)\n            .await?\n            .json()\n            .await\n            .map_err(Error::ReceiveBody)\n    }\n\n    /// The timeline deletion API can return 201 if deletion is incomplete, or\n    /// 403 if it is complete.  Callers are responsible for checking the status\n    /// code and retrying.  Error codes other than 403 will return Err().\n    pub async fn timeline_delete(\n        &self,\n        tenant_shard_id: TenantShardId,\n        timeline_id: TimelineId,\n    ) -> Result<StatusCode> {\n        let uri = format!(\n            \"{}/v1/tenant/{tenant_shard_id}/timeline/{timeline_id}\",\n            self.mgmt_api_endpoint\n        );\n\n        match self.request(Method::DELETE, &uri, ()).await {\n            Err(Error::ApiError(status_code, msg)) => {\n                if status_code == StatusCode::NOT_FOUND {\n                    Ok(StatusCode::NOT_FOUND)\n                } else {\n                    Err(Error::ApiError(status_code, msg))\n                }\n            }\n            Err(e) => Err(e),\n            Ok(response) => Ok(response.status()),\n        }\n    }\n\n    pub async fn timeline_detail(\n        &self,\n        tenant_shard_id: TenantShardId,\n        timeline_id: TimelineId,\n    ) -> Result<TimelineInfo> {\n        let uri = format!(\n            \"{}/v1/tenant/{tenant_shard_id}/timeline/{timeline_id}\",\n            self.mgmt_api_endpoint\n        );\n\n        self.request(Method::GET, &uri, ())\n            .await?\n            .json()\n            .await\n            .map_err(Error::ReceiveBody)\n    }\n\n    pub async fn timeline_archival_config(\n        &self,\n        tenant_shard_id: TenantShardId,\n        timeline_id: TimelineId,\n        req: &TimelineArchivalConfigRequest,\n    ) -> Result<()> {\n        let uri = format!(\n            \"{}/v1/tenant/{tenant_shard_id}/timeline/{timeline_id}/archival_config\",\n            self.mgmt_api_endpoint\n        );\n\n        self.request(Method::PUT, &uri, req)\n            .await?\n            .json()\n            .await\n            .map_err(Error::ReceiveBody)\n    }\n\n    pub async fn timeline_detach_ancestor(\n        &self,\n        tenant_shard_id: TenantShardId,\n        timeline_id: TimelineId,\n        behavior: Option<DetachBehavior>,\n    ) -> Result<AncestorDetached> {\n        let uri = format!(\n            \"{}/v1/tenant/{tenant_shard_id}/timeline/{timeline_id}/detach_ancestor\",\n            self.mgmt_api_endpoint\n        );\n        let mut uri = Url::parse(&uri)\n            .map_err(|e| Error::ApiError(StatusCode::INTERNAL_SERVER_ERROR, format!(\"{e}\")))?;\n\n        if let Some(behavior) = behavior {\n            uri.query_pairs_mut()\n                .append_pair(\"detach_behavior\", &behavior.to_string());\n        }\n\n        self.request(Method::PUT, uri, ())\n            .await?\n            .json()\n            .await\n            .map_err(Error::ReceiveBody)\n    }\n\n    pub async fn timeline_block_unblock_gc(\n        &self,\n        tenant_shard_id: TenantShardId,\n        timeline_id: TimelineId,\n        dir: BlockUnblock,\n    ) -> Result<()> {\n        let uri = format!(\n            \"{}/v1/tenant/{tenant_shard_id}/timeline/{timeline_id}/{dir}_gc\",\n            self.mgmt_api_endpoint,\n        );\n\n        self.request(Method::POST, &uri, ()).await.map(|_| ())\n    }\n\n    pub async fn timeline_download_heatmap_layers(\n        &self,\n        tenant_shard_id: TenantShardId,\n        timeline_id: TimelineId,\n        concurrency: Option<usize>,\n        recurse: bool,\n    ) -> Result<()> {\n        let mut path = reqwest::Url::parse(&format!(\n            \"{}/v1/tenant/{}/timeline/{}/download_heatmap_layers\",\n            self.mgmt_api_endpoint, tenant_shard_id, timeline_id\n        ))\n        .expect(\"Cannot build URL\");\n\n        path.query_pairs_mut()\n            .append_pair(\"recurse\", &format!(\"{recurse}\"));\n\n        if let Some(concurrency) = concurrency {\n            path.query_pairs_mut()\n                .append_pair(\"concurrency\", &format!(\"{concurrency}\"));\n        }\n\n        self.request(Method::POST, path, ()).await.map(|_| ())\n    }\n\n    pub async fn tenant_reset(&self, tenant_shard_id: TenantShardId) -> Result<()> {\n        let uri = format!(\n            \"{}/v1/tenant/{}/reset\",\n            self.mgmt_api_endpoint, tenant_shard_id\n        );\n        self.request(Method::POST, &uri, ())\n            .await?\n            .json()\n            .await\n            .map_err(Error::ReceiveBody)\n    }\n\n    pub async fn tenant_shard_split(\n        &self,\n        tenant_shard_id: TenantShardId,\n        req: TenantShardSplitRequest,\n    ) -> Result<TenantShardSplitResponse> {\n        let uri = format!(\n            \"{}/v1/tenant/{}/shard_split\",\n            self.mgmt_api_endpoint, tenant_shard_id\n        );\n        self.request(Method::PUT, &uri, req)\n            .await?\n            .json()\n            .await\n            .map_err(Error::ReceiveBody)\n    }\n\n    pub async fn timeline_list(\n        &self,\n        tenant_shard_id: &TenantShardId,\n    ) -> Result<Vec<TimelineInfo>> {\n        let uri = format!(\n            \"{}/v1/tenant/{}/timeline\",\n            self.mgmt_api_endpoint, tenant_shard_id\n        );\n        self.get(&uri)\n            .await?\n            .json()\n            .await\n            .map_err(Error::ReceiveBody)\n    }\n\n    pub async fn tenant_synthetic_size(\n        &self,\n        tenant_shard_id: TenantShardId,\n    ) -> Result<TenantHistorySize> {\n        let uri = format!(\n            \"{}/v1/tenant/{}/synthetic_size\",\n            self.mgmt_api_endpoint, tenant_shard_id\n        );\n        self.get(&uri)\n            .await?\n            .json()\n            .await\n            .map_err(Error::ReceiveBody)\n    }\n\n    pub async fn put_io_engine(\n        &self,\n        engine: &pageserver_api::models::virtual_file::IoEngineKind,\n    ) -> Result<()> {\n        let uri = format!(\"{}/v1/io_engine\", self.mgmt_api_endpoint);\n        self.request(Method::PUT, uri, engine)\n            .await?\n            .json()\n            .await\n            .map_err(Error::ReceiveBody)\n    }\n\n    /// Configs io mode at runtime.\n    pub async fn put_io_mode(\n        &self,\n        mode: &pageserver_api::models::virtual_file::IoMode,\n    ) -> Result<()> {\n        let uri = format!(\"{}/v1/io_mode\", self.mgmt_api_endpoint);\n        self.request(Method::PUT, uri, mode)\n            .await?\n            .json()\n            .await\n            .map_err(Error::ReceiveBody)\n    }\n\n    pub async fn get_utilization(&self) -> Result<PageserverUtilization> {\n        let uri = format!(\"{}/v1/utilization\", self.mgmt_api_endpoint);\n        self.get(uri)\n            .await?\n            .json()\n            .await\n            .map_err(Error::ReceiveBody)\n    }\n\n    pub async fn top_tenant_shards(\n        &self,\n        request: TopTenantShardsRequest,\n    ) -> Result<TopTenantShardsResponse> {\n        let uri = format!(\"{}/v1/top_tenants\", self.mgmt_api_endpoint);\n        self.request(Method::POST, uri, request)\n            .await?\n            .json()\n            .await\n            .map_err(Error::ReceiveBody)\n    }\n\n    pub async fn layer_map_info(\n        &self,\n        tenant_shard_id: TenantShardId,\n        timeline_id: TimelineId,\n    ) -> Result<LayerMapInfo> {\n        let uri = format!(\n            \"{}/v1/tenant/{}/timeline/{}/layer\",\n            self.mgmt_api_endpoint, tenant_shard_id, timeline_id,\n        );\n        self.get(&uri)\n            .await?\n            .json()\n            .await\n            .map_err(Error::ReceiveBody)\n    }\n\n    pub async fn layer_evict(\n        &self,\n        tenant_shard_id: TenantShardId,\n        timeline_id: TimelineId,\n        layer_file_name: &str,\n    ) -> Result<bool> {\n        let uri = format!(\n            \"{}/v1/tenant/{}/timeline/{}/layer/{}\",\n            self.mgmt_api_endpoint, tenant_shard_id, timeline_id, layer_file_name\n        );\n        let resp = self.request_noerror(Method::DELETE, &uri, ()).await?;\n        match resp.status() {\n            StatusCode::OK => Ok(true),\n            StatusCode::NOT_MODIFIED => Ok(false),\n            // TODO: dedupe this pattern / introduce separate error variant?\n            status => Err(match resp.json::<HttpErrorBody>().await {\n                Ok(HttpErrorBody { msg }) => Error::ApiError(status, msg),\n                Err(_) => {\n                    Error::ReceiveErrorBody(format!(\"Http error ({}) at {}.\", status.as_u16(), uri))\n                }\n            }),\n        }\n    }\n\n    pub async fn layer_ondemand_download(\n        &self,\n        tenant_shard_id: TenantShardId,\n        timeline_id: TimelineId,\n        layer_file_name: &str,\n    ) -> Result<bool> {\n        let uri = format!(\n            \"{}/v1/tenant/{}/timeline/{}/layer/{}\",\n            self.mgmt_api_endpoint, tenant_shard_id, timeline_id, layer_file_name\n        );\n        let resp = self.request_noerror(Method::GET, &uri, ()).await?;\n        match resp.status() {\n            StatusCode::OK => Ok(true),\n            StatusCode::NOT_MODIFIED => Ok(false),\n            // TODO: dedupe this pattern / introduce separate error variant?\n            status => Err(match resp.json::<HttpErrorBody>().await {\n                Ok(HttpErrorBody { msg }) => Error::ApiError(status, msg),\n                Err(_) => {\n                    Error::ReceiveErrorBody(format!(\"Http error ({}) at {}.\", status.as_u16(), uri))\n                }\n            }),\n        }\n    }\n\n    pub async fn ingest_aux_files(\n        &self,\n        tenant_shard_id: TenantShardId,\n        timeline_id: TimelineId,\n        aux_files: HashMap<String, String>,\n    ) -> Result<bool> {\n        let uri = format!(\n            \"{}/v1/tenant/{}/timeline/{}/ingest_aux_files\",\n            self.mgmt_api_endpoint, tenant_shard_id, timeline_id\n        );\n        let resp = self\n            .request_noerror(Method::POST, &uri, IngestAuxFilesRequest { aux_files })\n            .await?;\n        match resp.status() {\n            StatusCode::OK => Ok(true),\n            status => Err(match resp.json::<HttpErrorBody>().await {\n                Ok(HttpErrorBody { msg }) => Error::ApiError(status, msg),\n                Err(_) => {\n                    Error::ReceiveErrorBody(format!(\"Http error ({}) at {}.\", status.as_u16(), uri))\n                }\n            }),\n        }\n    }\n\n    pub async fn list_aux_files(\n        &self,\n        tenant_shard_id: TenantShardId,\n        timeline_id: TimelineId,\n        lsn: Lsn,\n    ) -> Result<HashMap<String, Bytes>> {\n        let uri = format!(\n            \"{}/v1/tenant/{}/timeline/{}/list_aux_files\",\n            self.mgmt_api_endpoint, tenant_shard_id, timeline_id\n        );\n        let resp = self\n            .request_noerror(Method::POST, &uri, ListAuxFilesRequest { lsn })\n            .await?;\n        match resp.status() {\n            StatusCode::OK => {\n                let resp: HashMap<String, Bytes> = resp.json().await.map_err(|e| {\n                    Error::ApiError(StatusCode::INTERNAL_SERVER_ERROR, format!(\"{e}\"))\n                })?;\n                Ok(resp)\n            }\n            status => Err(match resp.json::<HttpErrorBody>().await {\n                Ok(HttpErrorBody { msg }) => Error::ApiError(status, msg),\n                Err(_) => {\n                    Error::ReceiveErrorBody(format!(\"Http error ({}) at {}.\", status.as_u16(), uri))\n                }\n            }),\n        }\n    }\n\n    pub async fn import_basebackup(\n        &self,\n        tenant_id: TenantId,\n        timeline_id: TimelineId,\n        base_lsn: Lsn,\n        end_lsn: Lsn,\n        pg_version: PgMajorVersion,\n        basebackup_tarball: ReqwestBody,\n    ) -> Result<()> {\n        let pg_version = pg_version.major_version_num();\n\n        let uri = format!(\n            \"{}/v1/tenant/{tenant_id}/timeline/{timeline_id}/import_basebackup?base_lsn={base_lsn}&end_lsn={end_lsn}&pg_version={pg_version}\",\n            self.mgmt_api_endpoint,\n        );\n        self.start_request(Method::PUT, uri)\n            .body(basebackup_tarball)\n            .send()\n            .await\n            .map_err(Error::SendRequest)?\n            .error_from_body()\n            .await?\n            .json()\n            .await\n            .map_err(Error::ReceiveBody)\n    }\n\n    pub async fn import_wal(\n        &self,\n        tenant_id: TenantId,\n        timeline_id: TimelineId,\n        start_lsn: Lsn,\n        end_lsn: Lsn,\n        wal_tarball: ReqwestBody,\n    ) -> Result<()> {\n        let uri = format!(\n            \"{}/v1/tenant/{tenant_id}/timeline/{timeline_id}/import_wal?start_lsn={start_lsn}&end_lsn={end_lsn}\",\n            self.mgmt_api_endpoint,\n        );\n        self.start_request(Method::PUT, uri)\n            .body(wal_tarball)\n            .send()\n            .await\n            .map_err(Error::SendRequest)?\n            .error_from_body()\n            .await?\n            .json()\n            .await\n            .map_err(Error::ReceiveBody)\n    }\n\n    pub async fn timeline_init_lsn_lease(\n        &self,\n        tenant_shard_id: TenantShardId,\n        timeline_id: TimelineId,\n        lsn: Lsn,\n    ) -> Result<LsnLease> {\n        let uri = format!(\n            \"{}/v1/tenant/{tenant_shard_id}/timeline/{timeline_id}/lsn_lease\",\n            self.mgmt_api_endpoint,\n        );\n\n        self.request(Method::POST, &uri, LsnLeaseRequest { lsn })\n            .await?\n            .json()\n            .await\n            .map_err(Error::ReceiveBody)\n    }\n\n    pub async fn reset_alert_gauges(&self) -> Result<()> {\n        let uri = format!(\n            \"{}/hadron-internal/reset_alert_gauges\",\n            self.mgmt_api_endpoint\n        );\n        self.start_request(Method::POST, uri)\n            .send()\n            .await\n            .map_err(Error::SendRequest)?\n            .error_from_body()\n            .await?\n            .json()\n            .await\n            .map_err(Error::ReceiveBody)\n    }\n\n    pub async fn wait_lsn(\n        &self,\n        tenant_shard_id: TenantShardId,\n        request: TenantWaitLsnRequest,\n    ) -> Result<StatusCode> {\n        let uri = format!(\n            \"{}/v1/tenant/{tenant_shard_id}/wait_lsn\",\n            self.mgmt_api_endpoint,\n        );\n\n        self.request_noerror(Method::POST, uri, request)\n            .await\n            .map(|resp| resp.status())\n    }\n\n    pub async fn activate_post_import(\n        &self,\n        tenant_shard_id: TenantShardId,\n        timeline_id: TimelineId,\n        activate_timeline_timeout: Duration,\n    ) -> Result<TimelineInfo> {\n        let uri = format!(\n            \"{}/v1/tenant/{}/timeline/{}/activate_post_import?timeline_activate_timeout_ms={}\",\n            self.mgmt_api_endpoint,\n            tenant_shard_id,\n            timeline_id,\n            activate_timeline_timeout.as_millis()\n        );\n\n        self.request(Method::PUT, uri, ())\n            .await?\n            .json()\n            .await\n            .map_err(Error::ReceiveBody)\n    }\n\n    pub async fn update_feature_flag_spec(&self, spec: String) -> Result<()> {\n        let uri = format!(\"{}/v1/feature_flag_spec\", self.mgmt_api_endpoint);\n        self.request(Method::POST, uri, spec)\n            .await?\n            .json()\n            .await\n            .map_err(Error::ReceiveBody)\n    }\n}\n"
  },
  {
    "path": "pageserver/client/src/page_service.rs",
    "content": "use std::sync::{Arc, Mutex};\n\nuse futures::stream::{SplitSink, SplitStream};\nuse futures::{SinkExt, StreamExt};\nuse pageserver_api::pagestream_api::{\n    PagestreamBeMessage, PagestreamFeMessage, PagestreamGetPageRequest, PagestreamGetPageResponse,\n};\nuse pageserver_api::reltag::RelTag;\nuse tokio::task::JoinHandle;\nuse tokio_postgres::CopyOutStream;\nuse tokio_util::sync::CancellationToken;\nuse utils::id::{TenantId, TimelineId};\nuse utils::lsn::Lsn;\n\npub struct Client {\n    client: tokio_postgres::Client,\n    cancel_on_client_drop: Option<tokio_util::sync::DropGuard>,\n    conn_task: JoinHandle<()>,\n}\n\npub struct BasebackupRequest {\n    pub tenant_id: TenantId,\n    pub timeline_id: TimelineId,\n    pub lsn: Option<Lsn>,\n    pub gzip: bool,\n}\n\nimpl Client {\n    pub async fn new(connstring: String) -> anyhow::Result<Self> {\n        let (client, connection) =\n            tokio_postgres::connect(&connstring, tokio_postgres::NoTls).await?;\n\n        let conn_task_cancel = CancellationToken::new();\n        let conn_task = tokio::spawn({\n            let conn_task_cancel = conn_task_cancel.clone();\n            async move {\n                tokio::select! {\n                    _ = conn_task_cancel.cancelled() => { }\n                    res = connection => {\n                        res.unwrap();\n                    }\n                }\n            }\n        });\n        Ok(Self {\n            cancel_on_client_drop: Some(conn_task_cancel.drop_guard()),\n            conn_task,\n            client,\n        })\n    }\n\n    pub async fn pagestream(\n        self,\n        tenant_id: TenantId,\n        timeline_id: TimelineId,\n    ) -> anyhow::Result<PagestreamClient> {\n        let copy_both: tokio_postgres::CopyBothDuplex<bytes::Bytes> = self\n            .client\n            .copy_both_simple(&format!(\"pagestream_v3 {tenant_id} {timeline_id}\"))\n            .await?;\n        let (sink, stream) = copy_both.split(); // TODO: actually support splitting of the CopyBothDuplex so the lock inside this split adaptor goes away.\n        let Client {\n            cancel_on_client_drop,\n            conn_task,\n            client: _,\n        } = self;\n        let shared = Arc::new(Mutex::new(PagestreamShared::ConnTaskRunning(\n            ConnTaskRunning {\n                cancel_on_client_drop,\n                conn_task,\n            },\n        )));\n        Ok(PagestreamClient {\n            sink: PagestreamSender {\n                shared: shared.clone(),\n                sink,\n            },\n            stream: PagestreamReceiver {\n                shared: shared.clone(),\n                stream,\n            },\n            shared,\n        })\n    }\n\n    pub async fn basebackup(&self, req: &BasebackupRequest) -> anyhow::Result<CopyOutStream> {\n        let BasebackupRequest {\n            tenant_id,\n            timeline_id,\n            lsn,\n            gzip,\n        } = req;\n        let mut args = Vec::with_capacity(5);\n        args.push(\"basebackup\".to_string());\n        args.push(format!(\"{tenant_id}\"));\n        args.push(format!(\"{timeline_id}\"));\n        if let Some(lsn) = lsn {\n            args.push(format!(\"{lsn}\"));\n        }\n        if *gzip {\n            args.push(\"--gzip\".to_string())\n        }\n        Ok(self.client.copy_out(&args.join(\" \")).await?)\n    }\n}\n\n/// Create using [`Client::pagestream`].\npub struct PagestreamClient {\n    shared: Arc<Mutex<PagestreamShared>>,\n    sink: PagestreamSender,\n    stream: PagestreamReceiver,\n}\n\npub struct PagestreamSender {\n    #[allow(dead_code)]\n    shared: Arc<Mutex<PagestreamShared>>,\n    sink: SplitSink<tokio_postgres::CopyBothDuplex<bytes::Bytes>, bytes::Bytes>,\n}\n\npub struct PagestreamReceiver {\n    #[allow(dead_code)]\n    shared: Arc<Mutex<PagestreamShared>>,\n    stream: SplitStream<tokio_postgres::CopyBothDuplex<bytes::Bytes>>,\n}\n\nenum PagestreamShared {\n    ConnTaskRunning(ConnTaskRunning),\n    ConnTaskCancelledJoinHandleReturnedOrDropped,\n}\nstruct ConnTaskRunning {\n    cancel_on_client_drop: Option<tokio_util::sync::DropGuard>,\n    conn_task: JoinHandle<()>,\n}\n\npub struct RelTagBlockNo {\n    pub rel_tag: RelTag,\n    pub block_no: u32,\n}\n\nimpl PagestreamClient {\n    pub async fn shutdown(self) {\n        let Self {\n            shared,\n            sink,\n            stream,\n        } = { self };\n        // The `copy_both` split into `sink` and `stream` contains internal channel sender, the receiver of which is polled by `conn_task`.\n        // When `conn_task` observes the sender has been dropped, it sends a `FeMessage::CopyFail` into the connection.\n        // (see https://github.com/neondatabase/rust-postgres/blob/2005bf79573b8add5cf205b52a2b208e356cc8b0/tokio-postgres/src/copy_both.rs#L56).\n        //\n        // If we drop(copy_both) first, but then immediately drop the `cancel_on_client_drop`,\n        // the CopyFail mesage only makes it to the socket sometimes (i.e., it's a race).\n        //\n        // Further, the pageserver makes a lot of noise when it receives CopyFail.\n        // Computes don't send it in practice, they just hard-close the connection.\n        //\n        // So, let's behave like the computes and suppress the CopyFail as follows:\n        // kill the socket first, then drop copy_both.\n        //\n        // See also: https://www.postgresql.org/docs/current/protocol-flow.html#PROTOCOL-COPY\n        //\n        // NB: page_service doesn't have a use case to exit the `pagestream` mode currently.\n        // => https://github.com/neondatabase/neon/issues/6390\n        let ConnTaskRunning {\n            cancel_on_client_drop,\n            conn_task,\n        } = {\n            let mut guard = shared.lock().unwrap();\n            match std::mem::replace(\n                &mut *guard,\n                PagestreamShared::ConnTaskCancelledJoinHandleReturnedOrDropped,\n            ) {\n                PagestreamShared::ConnTaskRunning(conn_task_running) => conn_task_running,\n                PagestreamShared::ConnTaskCancelledJoinHandleReturnedOrDropped => unreachable!(),\n            }\n        };\n        let _ = cancel_on_client_drop.unwrap();\n        conn_task.await.unwrap();\n\n        // Now drop the split copy_both.\n        drop(sink);\n        drop(stream);\n    }\n\n    pub fn split(self) -> (PagestreamSender, PagestreamReceiver) {\n        let Self {\n            shared: _,\n            sink,\n            stream,\n        } = self;\n        (sink, stream)\n    }\n\n    pub async fn getpage(\n        &mut self,\n        req: PagestreamGetPageRequest,\n    ) -> anyhow::Result<PagestreamGetPageResponse> {\n        self.getpage_send(req).await?;\n        self.getpage_recv().await\n    }\n\n    pub async fn getpage_send(&mut self, req: PagestreamGetPageRequest) -> anyhow::Result<()> {\n        self.sink.getpage_send(req).await\n    }\n\n    pub async fn getpage_recv(&mut self) -> anyhow::Result<PagestreamGetPageResponse> {\n        self.stream.getpage_recv().await\n    }\n}\n\nimpl PagestreamSender {\n    // TODO: maybe make this impl Sink instead for better composability?\n    pub async fn send(&mut self, msg: PagestreamFeMessage) -> anyhow::Result<()> {\n        let msg = msg.serialize();\n        self.sink.send_all(&mut tokio_stream::once(Ok(msg))).await?;\n        Ok(())\n    }\n\n    pub async fn getpage_send(&mut self, req: PagestreamGetPageRequest) -> anyhow::Result<()> {\n        self.send(PagestreamFeMessage::GetPage(req)).await\n    }\n}\n\nimpl PagestreamReceiver {\n    // TODO: maybe make this impl Stream instead for better composability?\n    pub async fn recv(&mut self) -> anyhow::Result<PagestreamBeMessage> {\n        let next: Option<Result<bytes::Bytes, _>> = self.stream.next().await;\n        let next: bytes::Bytes = next.unwrap()?;\n        PagestreamBeMessage::deserialize(next)\n    }\n\n    pub async fn getpage_recv(&mut self) -> anyhow::Result<PagestreamGetPageResponse> {\n        let next: PagestreamBeMessage = self.recv().await?;\n        match next {\n            PagestreamBeMessage::GetPage(p) => Ok(p),\n            PagestreamBeMessage::Error(e) => anyhow::bail!(\"Error: {:?}\", e),\n            PagestreamBeMessage::Exists(_)\n            | PagestreamBeMessage::Nblocks(_)\n            | PagestreamBeMessage::DbSize(_)\n            | PagestreamBeMessage::GetSlruSegment(_) => {\n                anyhow::bail!(\n                    \"unexpected be message kind in response to getpage request: {}\",\n                    next.kind()\n                )\n            }\n            #[cfg(feature = \"testing\")]\n            PagestreamBeMessage::Test(_) => {\n                anyhow::bail!(\n                    \"unexpected be message kind in response to getpage request: {}\",\n                    next.kind()\n                )\n            }\n        }\n    }\n}\n"
  },
  {
    "path": "pageserver/client_grpc/Cargo.toml",
    "content": "[package]\nname = \"pageserver_client_grpc\"\nversion = \"0.1.0\"\nedition.workspace = true\nlicense.workspace = true\n\n[features]\ntesting = [\"pageserver_api/testing\"]\n\n[dependencies]\nanyhow.workspace = true\narc-swap.workspace = true\nbytes.workspace = true\ncompute_api.workspace = true\nfutures.workspace = true\npageserver_api.workspace = true\npageserver_page_api.workspace = true\ntokio.workspace = true\ntokio-stream.workspace = true\ntokio-util.workspace = true\ntonic.workspace = true\ntracing.workspace = true\nutils.workspace = true\nworkspace_hack.workspace = true\n"
  },
  {
    "path": "pageserver/client_grpc/src/client.rs",
    "content": "use std::collections::HashMap;\nuse std::num::NonZero;\nuse std::pin::pin;\nuse std::sync::Arc;\nuse std::time::{Duration, Instant};\n\nuse anyhow::anyhow;\nuse arc_swap::ArcSwap;\nuse futures::stream::FuturesUnordered;\nuse futures::{FutureExt as _, StreamExt as _};\nuse tonic::codec::CompressionEncoding;\nuse tracing::{debug, instrument};\nuse utils::logging::warn_slow;\n\nuse crate::pool::{ChannelPool, ClientGuard, ClientPool, StreamGuard, StreamPool};\nuse crate::retry::Retry;\nuse compute_api::spec::PageserverProtocol;\nuse pageserver_page_api as page_api;\nuse pageserver_page_api::GetPageSplitter;\nuse utils::id::{TenantId, TimelineId};\nuse utils::shard::{ShardCount, ShardIndex, ShardNumber, ShardStripeSize};\n\n/// Max number of concurrent clients per channel (i.e. TCP connection). New channels will be spun up\n/// when full.\n///\n/// Normal requests are small, and we don't pipeline them, so we can afford a large number of\n/// streams per connection.\n///\n/// TODO: tune all of these constants, and consider making them configurable.\nconst MAX_CLIENTS_PER_CHANNEL: NonZero<usize> = NonZero::new(64).unwrap();\n\n/// Max number of concurrent bulk GetPage streams per channel (i.e. TCP connection). These use a\n/// dedicated channel pool with a lower client limit, to avoid TCP-level head-of-line blocking and\n/// transmission delays. This also concentrates large window sizes on a smaller set of\n/// streams/connections, presumably reducing memory use.\nconst MAX_BULK_CLIENTS_PER_CHANNEL: NonZero<usize> = NonZero::new(16).unwrap();\n\n/// The batch size threshold at which a GetPage request will use the bulk stream pool.\n///\n/// The gRPC initial window size is 64 KB. Each page is 8 KB, so let's avoid increasing the window\n/// size for the normal stream pool, and route requests for >= 5 pages (>32 KB) to the bulk pool.\nconst BULK_THRESHOLD_BATCH_SIZE: usize = 5;\n\n/// The overall request call timeout, including retries and pool acquisition.\n/// TODO: should we retry forever? Should the caller decide?\nconst CALL_TIMEOUT: Duration = Duration::from_secs(60);\n\n/// The per-request (retry attempt) timeout, including any lazy connection establishment.\nconst REQUEST_TIMEOUT: Duration = Duration::from_secs(10);\n\n/// The initial request retry backoff duration. The first retry does not back off.\n/// TODO: use a different backoff for ResourceExhausted (rate limiting)? Needs server support.\nconst BASE_BACKOFF: Duration = Duration::from_millis(5);\n\n/// The maximum request retry backoff duration.\nconst MAX_BACKOFF: Duration = Duration::from_secs(5);\n\n/// Threshold and interval for warning about slow operation.\nconst SLOW_THRESHOLD: Duration = Duration::from_secs(3);\n\n/// A rich Pageserver gRPC client for a single tenant timeline. This client is more capable than the\n/// basic `page_api::Client` gRPC client, and supports:\n///\n/// * Sharded tenants across multiple Pageservers.\n/// * Pooling of connections, clients, and streams for efficient resource use.\n/// * Concurrent use by many callers.\n/// * Internal handling of GetPage bidirectional streams.\n/// * Automatic retries.\n/// * Observability.\n///\n/// The client has dedicated connection/client/stream pools per shard, for resource reuse. These\n/// pools are unbounded: we allow scaling out as many concurrent streams as needed to serve all\n/// concurrent callers, which mostly eliminates head-of-line blocking. Idle streams are fairly\n/// cheap: the server task currently uses 26 KB of memory, so we can comfortably fit 100,000\n/// concurrent idle streams (2.5 GB memory). The worst case degenerates to the old libpq case with\n/// one stream per backend, but without the TCP connection overhead. In the common case we expect\n/// significantly lower stream counts due to stream sharing, driven e.g. by idle backends, LFC hits,\n/// read coalescing, sharding (backends typically only talk to one shard at a time), etc.\n///\n/// TODO: this client does not support base backups or LSN leases, as these are only used by\n/// compute_ctl. Consider adding this, but LSN leases need concurrent requests on all shards.\npub struct PageserverClient {\n    /// The tenant ID.\n    tenant_id: TenantId,\n    /// The timeline ID.\n    timeline_id: TimelineId,\n    /// The JWT auth token for this tenant, if any.\n    auth_token: Option<String>,\n    /// The compression to use, if any.\n    compression: Option<CompressionEncoding>,\n    /// The shards for this tenant.\n    shards: ArcSwap<Shards>,\n}\n\nimpl PageserverClient {\n    /// Creates a new Pageserver client for a given tenant and timeline. Uses the Pageservers given\n    /// in the shard spec, which must be complete and must use gRPC URLs.\n    pub fn new(\n        tenant_id: TenantId,\n        timeline_id: TimelineId,\n        shard_spec: ShardSpec,\n        auth_token: Option<String>,\n        compression: Option<CompressionEncoding>,\n    ) -> anyhow::Result<Self> {\n        let shards = Shards::new(\n            tenant_id,\n            timeline_id,\n            shard_spec,\n            auth_token.clone(),\n            compression,\n        )?;\n        Ok(Self {\n            tenant_id,\n            timeline_id,\n            auth_token,\n            compression,\n            shards: ArcSwap::new(Arc::new(shards)),\n        })\n    }\n\n    /// Updates the shards from the given shard spec. In-flight requests will complete using the\n    /// existing shards, but may retry with the new shards if they fail.\n    ///\n    /// TODO: verify that in-flight requests are allowed to complete, and that the old pools are\n    /// properly spun down and dropped afterwards.\n    pub fn update_shards(&self, shard_spec: ShardSpec) -> anyhow::Result<()> {\n        // Validate the shard spec. We should really use `ArcSwap::rcu` for this, to avoid races\n        // with concurrent updates, but that involves creating a new `Shards` on every attempt,\n        // which spins up a bunch of Tokio tasks and such. These should already be checked elsewhere\n        // in the stack, and if they're violated then we already have problems elsewhere, so a\n        // best-effort but possibly-racy check is okay here.\n        let old = self.shards.load_full();\n        if shard_spec.count < old.count {\n            return Err(anyhow!(\n                \"can't reduce shard count from {} to {}\",\n                old.count,\n                shard_spec.count\n            ));\n        }\n        if !old.count.is_unsharded() && shard_spec.stripe_size != old.stripe_size {\n            return Err(anyhow!(\n                \"can't change stripe size from {} to {}\",\n                old.stripe_size.expect(\"always Some when sharded\"),\n                shard_spec.stripe_size.expect(\"always Some when sharded\")\n            ));\n        }\n\n        let shards = Shards::new(\n            self.tenant_id,\n            self.timeline_id,\n            shard_spec,\n            self.auth_token.clone(),\n            self.compression,\n        )?;\n        self.shards.store(Arc::new(shards));\n        Ok(())\n    }\n\n    /// Returns the total size of a database, as # of bytes.\n    #[instrument(skip_all, fields(db_oid=%req.db_oid, lsn=%req.read_lsn))]\n    pub async fn get_db_size(\n        &self,\n        req: page_api::GetDbSizeRequest,\n    ) -> tonic::Result<page_api::GetDbSizeResponse> {\n        debug!(\"sending request: {req:?}\");\n        let resp = Self::with_retries(CALL_TIMEOUT, async |_| {\n            // Relation metadata is only available on shard 0.\n            let mut client = self.shards.load_full().get_zero().client().await?;\n            Self::with_timeout(REQUEST_TIMEOUT, client.get_db_size(req)).await\n        })\n        .await?;\n        debug!(\"received response: {resp:?}\");\n        Ok(resp)\n    }\n\n    /// Fetches pages. The `request_id` must be unique across all in-flight requests, and the\n    /// `attempt` must be 0 (incremented on retry). Automatically splits requests that straddle\n    /// shard boundaries, and assembles the responses.\n    ///\n    /// Unlike `page_api::Client`, this automatically converts `status_code` into `tonic::Status`\n    /// errors. All responses will have `GetPageStatusCode::Ok`.\n    #[instrument(skip_all, fields(\n        req_id = %req.request_id,\n        class = %req.request_class,\n        rel = %req.rel,\n        blkno = %req.block_numbers[0],\n        blks = %req.block_numbers.len(),\n        lsn = %req.read_lsn,\n    ))]\n    pub async fn get_page(\n        &self,\n        req: page_api::GetPageRequest,\n    ) -> tonic::Result<page_api::GetPageResponse> {\n        // Make sure we have at least one page.\n        if req.block_numbers.is_empty() {\n            return Err(tonic::Status::invalid_argument(\"no block number\"));\n        }\n        // The request attempt must be 0. The client will increment it internally.\n        if req.request_id.attempt != 0 {\n            return Err(tonic::Status::invalid_argument(\"request attempt must be 0\"));\n        }\n\n        debug!(\"sending request: {req:?}\");\n\n        // The shards may change while we're fetching pages. We execute the request using a stable\n        // view of the shards (especially important for requests that span shards), but retry the\n        // top-level (pre-split) request to pick up shard changes. This can lead to unnecessary\n        // retries and re-splits in some cases where requests span shards, but these are expected to\n        // be rare.\n        //\n        // TODO: the gRPC server and client doesn't yet properly support shard splits. Revisit this\n        // once we figure out how to handle these.\n        let resp = Self::with_retries(CALL_TIMEOUT, async |attempt| {\n            let mut req = req.clone();\n            req.request_id.attempt = attempt as u32;\n            let shards = self.shards.load_full();\n            Self::with_timeout(REQUEST_TIMEOUT, Self::get_page_with_shards(req, &shards)).await\n        })\n        .await?;\n\n        debug!(\"received response: {resp:?}\");\n        Ok(resp)\n    }\n\n    /// Fetches pages using the given shards. This uses a stable view of the shards, regardless of\n    /// concurrent shard updates. Does not retry internally, but is retried by `get_page()`.\n    async fn get_page_with_shards(\n        req: page_api::GetPageRequest,\n        shards: &Shards,\n    ) -> tonic::Result<page_api::GetPageResponse> {\n        // Fast path: request is for a single shard.\n        if let Some(shard_id) =\n            GetPageSplitter::for_single_shard(&req, shards.count, shards.stripe_size)?\n        {\n            return Self::get_page_with_shard(req, shards.get(shard_id)?).await;\n        }\n\n        // Request spans multiple shards. Split it, dispatch concurrent per-shard requests, and\n        // reassemble the responses.\n        let mut splitter = GetPageSplitter::split(req, shards.count, shards.stripe_size)?;\n\n        let mut shard_requests = FuturesUnordered::new();\n        for (shard_id, shard_req) in splitter.drain_requests() {\n            let future = Self::get_page_with_shard(shard_req, shards.get(shard_id)?)\n                .map(move |result| result.map(|resp| (shard_id, resp)));\n            shard_requests.push(future);\n        }\n\n        while let Some((shard_id, shard_response)) = shard_requests.next().await.transpose()? {\n            splitter.add_response(shard_id, shard_response)?;\n        }\n\n        Ok(splitter.collect_response()?)\n    }\n\n    /// Fetches pages on the given shard. Does not retry internally.\n    async fn get_page_with_shard(\n        req: page_api::GetPageRequest,\n        shard: &Shard,\n    ) -> tonic::Result<page_api::GetPageResponse> {\n        let mut stream = shard.stream(Self::is_bulk(&req)).await?;\n        let resp = stream.send(req.clone()).await?;\n\n        // Convert per-request errors into a tonic::Status.\n        if resp.status_code != page_api::GetPageStatusCode::Ok {\n            return Err(tonic::Status::new(\n                resp.status_code.into(),\n                resp.reason.unwrap_or_else(|| String::from(\"unknown error\")),\n            ));\n        }\n\n        // Check that we received the expected pages.\n        if req.rel != resp.rel {\n            return Err(tonic::Status::internal(format!(\n                \"shard {} returned wrong relation, expected {} got {}\",\n                shard.id, req.rel, resp.rel\n            )));\n        }\n        if !req\n            .block_numbers\n            .iter()\n            .copied()\n            .eq(resp.pages.iter().map(|p| p.block_number))\n        {\n            return Err(tonic::Status::internal(format!(\n                \"shard {} returned wrong pages, expected {:?} got {:?}\",\n                shard.id,\n                req.block_numbers,\n                resp.pages\n                    .iter()\n                    .map(|page| page.block_number)\n                    .collect::<Vec<_>>()\n            )));\n        }\n\n        Ok(resp)\n    }\n\n    /// Returns the size of a relation, as # of blocks.\n    #[instrument(skip_all, fields(rel=%req.rel, lsn=%req.read_lsn))]\n    pub async fn get_rel_size(\n        &self,\n        req: page_api::GetRelSizeRequest,\n    ) -> tonic::Result<page_api::GetRelSizeResponse> {\n        debug!(\"sending request: {req:?}\");\n        let resp = Self::with_retries(CALL_TIMEOUT, async |_| {\n            // Relation metadata is only available on shard 0.\n            let mut client = self.shards.load_full().get_zero().client().await?;\n            Self::with_timeout(REQUEST_TIMEOUT, client.get_rel_size(req)).await\n        })\n        .await?;\n        debug!(\"received response: {resp:?}\");\n        Ok(resp)\n    }\n\n    /// Fetches an SLRU segment.\n    #[instrument(skip_all, fields(kind=%req.kind, segno=%req.segno, lsn=%req.read_lsn))]\n    pub async fn get_slru_segment(\n        &self,\n        req: page_api::GetSlruSegmentRequest,\n    ) -> tonic::Result<page_api::GetSlruSegmentResponse> {\n        debug!(\"sending request: {req:?}\");\n        let resp = Self::with_retries(CALL_TIMEOUT, async |_| {\n            // SLRU segments are only available on shard 0.\n            let mut client = self.shards.load_full().get_zero().client().await?;\n            Self::with_timeout(REQUEST_TIMEOUT, client.get_slru_segment(req)).await\n        })\n        .await?;\n        debug!(\"received response: {resp:?}\");\n        Ok(resp)\n    }\n\n    /// Runs the given async closure with retries up to the given timeout. Only certain gRPC status\n    /// codes are retried, see [`Retry::should_retry`]. Returns `DeadlineExceeded` on timeout.\n    async fn with_retries<T, F, O>(timeout: Duration, f: F) -> tonic::Result<T>\n    where\n        F: FnMut(usize) -> O, // pass attempt number, starting at 0\n        O: Future<Output = tonic::Result<T>>,\n    {\n        Retry {\n            timeout: Some(timeout),\n            base_backoff: BASE_BACKOFF,\n            max_backoff: MAX_BACKOFF,\n        }\n        .with(f)\n        .await\n    }\n\n    /// Runs the given future with a timeout. Returns `DeadlineExceeded` on timeout.\n    async fn with_timeout<T>(\n        timeout: Duration,\n        f: impl Future<Output = tonic::Result<T>>,\n    ) -> tonic::Result<T> {\n        let started = Instant::now();\n        tokio::time::timeout(timeout, f).await.map_err(|_| {\n            tonic::Status::deadline_exceeded(format!(\n                \"request timed out after {:.3}s\",\n                started.elapsed().as_secs_f64()\n            ))\n        })?\n    }\n\n    /// Returns true if the request is considered a bulk request and should use the bulk pool.\n    fn is_bulk(req: &page_api::GetPageRequest) -> bool {\n        req.block_numbers.len() >= BULK_THRESHOLD_BATCH_SIZE\n    }\n}\n\n/// Shard specification for a PageserverClient.\npub struct ShardSpec {\n    /// Maps shard indices to gRPC URLs.\n    ///\n    /// INVARIANT: every shard 0..count is present, and shard 0 is always present.\n    /// INVARIANT: every URL is valid and uses grpc:// scheme.\n    urls: HashMap<ShardIndex, String>,\n    /// The shard count.\n    ///\n    /// NB: this is 0 for unsharded tenants, following `ShardIndex::unsharded()` convention.\n    count: ShardCount,\n    /// The stripe size for these shards.\n    ///\n    /// INVARIANT: None for unsharded tenants, Some for sharded.\n    stripe_size: Option<ShardStripeSize>,\n}\n\nimpl ShardSpec {\n    /// Creates a new shard spec with the given URLs and stripe size. All shards must be given.\n    /// The stripe size must be Some for sharded tenants, or None for unsharded tenants.\n    pub fn new(\n        urls: HashMap<ShardIndex, String>,\n        stripe_size: Option<ShardStripeSize>,\n    ) -> anyhow::Result<Self> {\n        // Compute the shard count.\n        let count = match urls.len() {\n            0 => return Err(anyhow!(\"no shards provided\")),\n            1 => ShardCount::new(0), // NB: unsharded tenants use 0, like `ShardIndex::unsharded()`\n            n if n > u8::MAX as usize => return Err(anyhow!(\"too many shards: {n}\")),\n            n => ShardCount::new(n as u8),\n        };\n\n        // Validate the stripe size.\n        if stripe_size.is_none() && !count.is_unsharded() {\n            return Err(anyhow!(\"stripe size must be given for sharded tenants\"));\n        }\n        if stripe_size.is_some() && count.is_unsharded() {\n            return Err(anyhow!(\"stripe size can't be given for unsharded tenants\"));\n        }\n\n        // Validate the shard spec.\n        for (shard_id, url) in &urls {\n            // The shard index must match the computed shard count, even for unsharded tenants.\n            if shard_id.shard_count != count {\n                return Err(anyhow!(\"invalid shard index {shard_id}, expected {count}\"));\n            }\n            // The shard index' number and count must be consistent.\n            if !shard_id.is_unsharded() && shard_id.shard_number.0 >= shard_id.shard_count.0 {\n                return Err(anyhow!(\"invalid shard index {shard_id}\"));\n            }\n            // The above conditions guarantee that we have all shards 0..count: len() matches count,\n            // shard number < count, and numbers are unique (via hashmap).\n\n            // Validate the URL.\n            if PageserverProtocol::from_connstring(url)? != PageserverProtocol::Grpc {\n                return Err(anyhow!(\"invalid shard URL {url}: must use gRPC\"));\n            }\n        }\n\n        Ok(Self {\n            urls,\n            count,\n            stripe_size,\n        })\n    }\n}\n\n/// Tracks the tenant's shards.\nstruct Shards {\n    /// Shards by shard index.\n    ///\n    /// INVARIANT: every shard 0..count is present.\n    /// INVARIANT: shard 0 is always present.\n    by_index: HashMap<ShardIndex, Shard>,\n    /// The shard count.\n    ///\n    /// NB: this is 0 for unsharded tenants, following `ShardIndex::unsharded()` convention.\n    count: ShardCount,\n    /// The stripe size.\n    ///\n    /// INVARIANT: None for unsharded tenants, Some for sharded.\n    stripe_size: Option<ShardStripeSize>,\n}\n\nimpl Shards {\n    /// Creates a new set of shards based on a shard spec.\n    fn new(\n        tenant_id: TenantId,\n        timeline_id: TimelineId,\n        shard_spec: ShardSpec,\n        auth_token: Option<String>,\n        compression: Option<CompressionEncoding>,\n    ) -> anyhow::Result<Self> {\n        // NB: the shard spec has already been validated when constructed.\n        let mut shards = HashMap::with_capacity(shard_spec.urls.len());\n        for (shard_id, url) in shard_spec.urls {\n            shards.insert(\n                shard_id,\n                Shard::new(\n                    url,\n                    tenant_id,\n                    timeline_id,\n                    shard_id,\n                    auth_token.clone(),\n                    compression,\n                )?,\n            );\n        }\n\n        Ok(Self {\n            by_index: shards,\n            count: shard_spec.count,\n            stripe_size: shard_spec.stripe_size,\n        })\n    }\n\n    /// Looks up the given shard.\n    #[allow(clippy::result_large_err)] // TODO: check perf impact\n    fn get(&self, shard_id: ShardIndex) -> tonic::Result<&Shard> {\n        self.by_index\n            .get(&shard_id)\n            .ok_or_else(|| tonic::Status::not_found(format!(\"unknown shard {shard_id}\")))\n    }\n\n    /// Returns shard 0.\n    fn get_zero(&self) -> &Shard {\n        self.get(ShardIndex::new(ShardNumber(0), self.count))\n            .expect(\"always present\")\n    }\n}\n\n/// A single shard. Has dedicated resource pools with the following structure:\n///\n/// * Channel pool: MAX_CLIENTS_PER_CHANNEL.\n///   * Client pool: unbounded.\n///     * Stream pool: unbounded.\n/// * Bulk channel pool: MAX_BULK_CLIENTS_PER_CHANNEL.\n///   * Bulk client pool: unbounded.\n///     * Bulk stream pool: unbounded.\n///\n/// We use a separate bulk channel pool with a lower concurrency limit for large batch requests.\n/// This avoids TCP-level head-of-line blocking, and also concentrates large window sizes on a\n/// smaller set of streams/connections, which presumably reduces memory use. Neither of these pools\n/// are bounded, nor do they pipeline requests, so the latency characteristics should be mostly\n/// similar (except for TCP transmission time).\n///\n/// TODO: since we never use bounded pools, we could consider removing the pool limiters. However,\n/// the code is fairly trivial, so we may as well keep them around for now in case we need them.\nstruct Shard {\n    /// The shard ID.\n    id: ShardIndex,\n    /// Unary gRPC client pool.\n    client_pool: Arc<ClientPool>,\n    /// GetPage stream pool.\n    stream_pool: Arc<StreamPool>,\n    /// GetPage stream pool for bulk requests.\n    bulk_stream_pool: Arc<StreamPool>,\n}\n\nimpl Shard {\n    /// Creates a new shard. It has its own dedicated resource pools.\n    fn new(\n        url: String,\n        tenant_id: TenantId,\n        timeline_id: TimelineId,\n        shard_id: ShardIndex,\n        auth_token: Option<String>,\n        compression: Option<CompressionEncoding>,\n    ) -> anyhow::Result<Self> {\n        // Shard pools for unary requests and non-bulk GetPage requests.\n        let client_pool = ClientPool::new(\n            ChannelPool::new(url.clone(), MAX_CLIENTS_PER_CHANNEL)?,\n            tenant_id,\n            timeline_id,\n            shard_id,\n            auth_token.clone(),\n            compression,\n            None, // unbounded\n        );\n        let stream_pool = StreamPool::new(client_pool.clone(), None); // unbounded\n\n        // Bulk GetPage stream pool for large batches (prefetches, sequential scans, vacuum, etc.).\n        let bulk_stream_pool = StreamPool::new(\n            ClientPool::new(\n                ChannelPool::new(url, MAX_BULK_CLIENTS_PER_CHANNEL)?,\n                tenant_id,\n                timeline_id,\n                shard_id,\n                auth_token,\n                compression,\n                None, // unbounded,\n            ),\n            None, // unbounded\n        );\n\n        Ok(Self {\n            id: shard_id,\n            client_pool,\n            stream_pool,\n            bulk_stream_pool,\n        })\n    }\n\n    /// Returns a pooled client for this shard.\n    #[instrument(skip_all)]\n    async fn client(&self) -> tonic::Result<ClientGuard> {\n        warn_slow(\n            \"client pool acquisition\",\n            SLOW_THRESHOLD,\n            pin!(self.client_pool.get()),\n        )\n        .await\n    }\n\n    /// Returns a pooled stream for this shard. If `bulk` is `true`, uses the dedicated bulk pool.\n    #[instrument(skip_all, fields(bulk))]\n    async fn stream(&self, bulk: bool) -> tonic::Result<StreamGuard> {\n        let pool = match bulk {\n            false => &self.stream_pool,\n            true => &self.bulk_stream_pool,\n        };\n        warn_slow(\"stream pool acquisition\", SLOW_THRESHOLD, pin!(pool.get())).await\n    }\n}\n"
  },
  {
    "path": "pageserver/client_grpc/src/lib.rs",
    "content": "mod client;\nmod pool;\nmod retry;\n\npub use client::{PageserverClient, ShardSpec};\n"
  },
  {
    "path": "pageserver/client_grpc/src/pool.rs",
    "content": "//! This module provides various Pageserver gRPC client resource pools.\n//!\n//! These pools are designed to reuse gRPC resources (connections, clients, and streams) across\n//! multiple concurrent callers (i.e. Postgres backends). This avoids the resource cost and latency\n//! of creating dedicated TCP connections and server tasks for every Postgres backend.\n//!\n//! Each resource has its own, nested pool. The pools are custom-built for the properties of each\n//! resource -- they are different enough that a generic pool isn't suitable.\n//!\n//! * ChannelPool: manages gRPC channels (TCP connections) to a single Pageserver. Multiple clients\n//!   can acquire and use the same channel concurrently (via HTTP/2 stream multiplexing), up to a\n//!   per-channel client limit. Channels are closed immediately when empty, and indirectly rely on\n//!   client/stream idle timeouts.\n//!\n//! * ClientPool: manages gRPC clients for a single tenant shard. Each client acquires a (shared)\n//!   channel from the ChannelPool for the client's lifetime. A client can only be acquired by a\n//!   single caller at a time, and is returned to the pool when dropped. Idle clients are removed\n//!   from the pool after a while to free up resources.\n//!\n//! * StreamPool: manages bidirectional gRPC GetPage streams. Each stream acquires a client from the\n//!   ClientPool for the stream's lifetime. A stream can only be acquired by a single caller at a\n//!   time, and is returned to the pool when dropped. Idle streams are removed from the pool after\n//!   a while to free up resources.\n//!\n//!   The stream only supports sending a single, synchronous request at a time, and does not support\n//!   pipelining multiple requests from different callers onto the same stream -- instead, we scale\n//!   out concurrent streams to improve throughput. There are many reasons for this design choice:\n//!\n//!     * It (mostly) eliminates head-of-line blocking. A single stream is processed sequentially by\n//!       a single server task, which may block e.g. on layer downloads, LSN waits, etc.\n//!\n//!     * Cancellation becomes trivial, by closing the stream. Otherwise, if a caller goes away\n//!       (e.g. because of a timeout), the request would still be processed by the server and block\n//!       requests behind it in the stream. It might even block its own timeout retry.\n//!\n//!     * Stream scheduling becomes significantly simpler and cheaper.\n//!\n//!     * Individual callers can still use client-side batching for pipelining.\n//!\n//!     * Idle streams are cheap. Benchmarks show that an idle GetPage stream takes up about 26 KB\n//!       per stream (2.5 GB for 100,000 streams), so we can afford to scale out.\n//!\n//! Each channel corresponds to one TCP connection. Each client unary request and each stream\n//! corresponds to one HTTP/2 stream and server task.\n//!\n//! TODO: error handling (including custom error types).\n//! TODO: observability.\n\nuse std::collections::BTreeMap;\nuse std::num::NonZero;\nuse std::ops::{Deref, DerefMut};\nuse std::pin::Pin;\nuse std::sync::atomic::{AtomicUsize, Ordering};\nuse std::sync::{Arc, Mutex, Weak};\nuse std::time::{Duration, Instant};\n\nuse futures::{Stream, StreamExt as _};\nuse tokio::sync::{OwnedSemaphorePermit, Semaphore, watch};\nuse tokio_stream::wrappers::WatchStream;\nuse tokio_util::sync::CancellationToken;\nuse tonic::codec::CompressionEncoding;\nuse tonic::transport::{Channel, Endpoint};\n\nuse pageserver_page_api as page_api;\nuse utils::id::{TenantId, TimelineId};\nuse utils::shard::ShardIndex;\n\n/// Reap clients/streams that have been idle for this long. Channels are reaped immediately when\n/// empty, and indirectly rely on the client/stream idle timeouts.\n///\n/// A stream's client will be reaped after 2x the idle threshold (first stream the client), but\n/// that's okay -- if the stream closes abruptly (e.g. due to timeout or cancellation), we want to\n/// keep its client around in the pool for a while.\nconst REAP_IDLE_THRESHOLD: Duration = match cfg!(any(test, feature = \"testing\")) {\n    false => Duration::from_secs(180),\n    true => Duration::from_secs(1), // exercise reaping in tests\n};\n\n/// Reap idle resources with this interval.\nconst REAP_IDLE_INTERVAL: Duration = match cfg!(any(test, feature = \"testing\")) {\n    false => Duration::from_secs(10),\n    true => Duration::from_secs(1), // exercise reaping in tests\n};\n\n/// A gRPC channel pool, for a single Pageserver. A channel is shared by many clients (via HTTP/2\n/// stream multiplexing), up to `clients_per_channel` -- a new channel will be spun up beyond this.\n/// The pool does not limit the number of channels, and instead relies on `ClientPool` or\n/// `StreamPool` to limit the number of concurrent clients.\n///\n/// The pool is always wrapped in an outer `Arc`, to allow long-lived guards across tasks/threads.\n///\n/// TODO: consider prewarming a set of channels, to avoid initial connection latency.\n/// TODO: consider adding a circuit breaker for errors and fail fast.\npub struct ChannelPool {\n    /// Pageserver endpoint to connect to.\n    endpoint: Endpoint,\n    /// Max number of clients per channel. Beyond this, a new channel will be created.\n    max_clients_per_channel: NonZero<usize>,\n    /// Open channels.\n    channels: Mutex<BTreeMap<ChannelID, ChannelEntry>>,\n    /// Channel ID generator.\n    next_channel_id: AtomicUsize,\n}\n\ntype ChannelID = usize;\n\nstruct ChannelEntry {\n    /// The gRPC channel (i.e. TCP connection). Shared by multiple clients.\n    channel: Channel,\n    /// Number of clients using this channel.\n    clients: usize,\n}\n\nimpl ChannelPool {\n    /// Creates a new channel pool for the given Pageserver endpoint.\n    pub fn new<E>(endpoint: E, max_clients_per_channel: NonZero<usize>) -> anyhow::Result<Arc<Self>>\n    where\n        E: TryInto<Endpoint> + Send + Sync + 'static,\n        <E as TryInto<Endpoint>>::Error: std::error::Error + Send + Sync,\n    {\n        Ok(Arc::new(Self {\n            endpoint: endpoint.try_into()?,\n            max_clients_per_channel,\n            channels: Mutex::default(),\n            next_channel_id: AtomicUsize::default(),\n        }))\n    }\n\n    /// Acquires a gRPC channel for a client. Multiple clients may acquire the same channel.\n    ///\n    /// This never blocks (except for mutex acquisition). The channel is connected lazily on first\n    /// use, and the `ChannelPool` does not have a channel limit. Channels will be re-established\n    /// automatically on failure (TODO: verify).\n    ///\n    /// Callers should not clone the returned channel, and must hold onto the returned guard as long\n    /// as the channel is in use. It is unfortunately not possible to enforce this: the Protobuf\n    /// client requires an owned `Channel` and we don't have access to the channel's internal\n    /// refcount.\n    ///\n    /// This is not performance-sensitive. It is only called when creating a new client, and clients\n    /// are pooled and reused by `ClientPool`. The total number of channels will also be small. O(n)\n    /// performance is therefore okay.\n    pub fn get(self: &Arc<Self>) -> ChannelGuard {\n        let mut channels = self.channels.lock().unwrap();\n\n        // Try to find an existing channel with available capacity. We check entries in BTreeMap\n        // order, to fill up the lower-ordered channels first. The client/stream pools also prefer\n        // clients with lower-ordered channel IDs first. This will cluster clients in lower-ordered\n        // channels, and free up higher-ordered channels such that they can be reaped.\n        for (&id, entry) in channels.iter_mut() {\n            assert!(\n                entry.clients <= self.max_clients_per_channel.get(),\n                \"channel overflow\"\n            );\n            assert_ne!(entry.clients, 0, \"empty channel not reaped\");\n            if entry.clients < self.max_clients_per_channel.get() {\n                entry.clients += 1;\n                return ChannelGuard {\n                    pool: Arc::downgrade(self),\n                    id,\n                    channel: Some(entry.channel.clone()),\n                };\n            }\n        }\n\n        // Create a new channel. We connect lazily on first use, such that we don't block here and\n        // other clients can join onto the same channel while it's connecting.\n        let channel = self.endpoint.connect_lazy();\n\n        let id = self.next_channel_id.fetch_add(1, Ordering::Relaxed);\n        let entry = ChannelEntry {\n            channel: channel.clone(),\n            clients: 1, // account for the guard below\n        };\n        channels.insert(id, entry);\n\n        ChannelGuard {\n            pool: Arc::downgrade(self),\n            id,\n            channel: Some(channel),\n        }\n    }\n}\n\n/// Tracks a channel acquired from the pool. The owned inner channel can be obtained with `take()`,\n/// since the gRPC client requires an owned `Channel`.\npub struct ChannelGuard {\n    pool: Weak<ChannelPool>,\n    id: ChannelID,\n    channel: Option<Channel>,\n}\n\nimpl ChannelGuard {\n    /// Returns the inner owned channel. Panics if called more than once. The caller must hold onto\n    /// the guard as long as the channel is in use, and should not clone it.\n    pub fn take(&mut self) -> Channel {\n        self.channel.take().expect(\"channel already taken\")\n    }\n}\n\n/// Returns the channel to the pool. The channel is closed when empty.\nimpl Drop for ChannelGuard {\n    fn drop(&mut self) {\n        let Some(pool) = self.pool.upgrade() else {\n            return; // pool was dropped\n        };\n\n        let mut channels = pool.channels.lock().unwrap();\n        let entry = channels.get_mut(&self.id).expect(\"unknown channel\");\n        assert!(entry.clients > 0, \"channel underflow\");\n        entry.clients -= 1;\n\n        // Reap empty channels immediately.\n        if entry.clients == 0 {\n            channels.remove(&self.id);\n        }\n    }\n}\n\n/// A pool of gRPC clients for a single tenant shard. Each client acquires a channel from the inner\n/// `ChannelPool`. A client is only given out to single caller at a time. The pool limits the total\n/// number of concurrent clients to `max_clients` via semaphore.\n///\n/// The pool is always wrapped in an outer `Arc`, to allow long-lived guards across tasks/threads.\npub struct ClientPool {\n    /// Tenant ID.\n    tenant_id: TenantId,\n    /// Timeline ID.\n    timeline_id: TimelineId,\n    /// Shard ID.\n    shard_id: ShardIndex,\n    /// Authentication token, if any.\n    auth_token: Option<String>,\n    /// Compression to use.\n    compression: Option<CompressionEncoding>,\n    /// Channel pool to acquire channels from.\n    channel_pool: Arc<ChannelPool>,\n    /// Limits the max number of concurrent clients for this pool. None if the pool is unbounded.\n    limiter: Option<Arc<Semaphore>>,\n    /// Idle pooled clients. Acquired clients are removed from here and returned on drop.\n    ///\n    /// The first client in the map will be acquired next. The map is sorted by client ID, which in\n    /// turn is sorted by its channel ID, such that we prefer acquiring idle clients from\n    /// lower-ordered channels. This allows us to free up and reap higher-ordered channels.\n    idle: Mutex<BTreeMap<ClientID, ClientEntry>>,\n    /// Reaps idle clients.\n    idle_reaper: Reaper,\n    /// Unique client ID generator.\n    next_client_id: AtomicUsize,\n}\n\ntype ClientID = (ChannelID, usize);\n\nstruct ClientEntry {\n    /// The pooled gRPC client.\n    client: page_api::Client,\n    /// The channel guard for the channel used by the client.\n    channel_guard: ChannelGuard,\n    /// The client has been idle since this time. All clients in `ClientPool::idle` are idle by\n    /// definition, so this is the time when it was added back to the pool.\n    idle_since: Instant,\n}\n\nimpl ClientPool {\n    /// Creates a new client pool for the given tenant shard. Channels are acquired from the given\n    /// `ChannelPool`, which must point to a Pageserver that hosts the tenant shard. Allows up to\n    /// `max_clients` concurrent clients, or unbounded if None.\n    pub fn new(\n        channel_pool: Arc<ChannelPool>,\n        tenant_id: TenantId,\n        timeline_id: TimelineId,\n        shard_id: ShardIndex,\n        auth_token: Option<String>,\n        compression: Option<CompressionEncoding>,\n        max_clients: Option<NonZero<usize>>,\n    ) -> Arc<Self> {\n        let pool = Arc::new(Self {\n            tenant_id,\n            timeline_id,\n            shard_id,\n            auth_token,\n            compression,\n            channel_pool,\n            idle: Mutex::default(),\n            idle_reaper: Reaper::new(REAP_IDLE_THRESHOLD, REAP_IDLE_INTERVAL),\n            limiter: max_clients.map(|max| Arc::new(Semaphore::new(max.get()))),\n            next_client_id: AtomicUsize::default(),\n        });\n        pool.idle_reaper.spawn(&pool);\n        pool\n    }\n\n    /// Gets a client from the pool, or creates a new one if necessary. Connections are established\n    /// lazily and do not block, but this call can block if the pool is at `max_clients`. The client\n    /// is returned to the pool when the guard is dropped.\n    ///\n    /// This is moderately performance-sensitive. It is called for every unary request, but these\n    /// establish a new gRPC stream per request so they're already expensive. GetPage requests use\n    /// the `StreamPool` instead.\n    pub async fn get(self: &Arc<Self>) -> tonic::Result<ClientGuard> {\n        // Acquire a permit if the pool is bounded.\n        let mut permit = None;\n        if let Some(limiter) = self.limiter.clone() {\n            permit = Some(limiter.acquire_owned().await.expect(\"never closed\"));\n        }\n\n        // Fast path: acquire an idle client from the pool.\n        if let Some((id, entry)) = self.idle.lock().unwrap().pop_first() {\n            return Ok(ClientGuard {\n                pool: Arc::downgrade(self),\n                id,\n                client: Some(entry.client),\n                channel_guard: Some(entry.channel_guard),\n                permit,\n            });\n        }\n\n        // Construct a new client.\n        let mut channel_guard = self.channel_pool.get();\n        let client = page_api::Client::new(\n            channel_guard.take(),\n            self.tenant_id,\n            self.timeline_id,\n            self.shard_id,\n            self.auth_token.clone(),\n            self.compression,\n        )\n        .map_err(|err| tonic::Status::internal(format!(\"failed to create client: {err}\")))?;\n\n        Ok(ClientGuard {\n            pool: Arc::downgrade(self),\n            id: (\n                channel_guard.id,\n                self.next_client_id.fetch_add(1, Ordering::Relaxed),\n            ),\n            client: Some(client),\n            channel_guard: Some(channel_guard),\n            permit,\n        })\n    }\n}\n\nimpl Reapable for ClientPool {\n    /// Reaps clients that have been idle since before the cutoff.\n    fn reap_idle(&self, cutoff: Instant) {\n        self.idle\n            .lock()\n            .unwrap()\n            .retain(|_, entry| entry.idle_since >= cutoff)\n    }\n}\n\n/// A client acquired from the pool. The inner client can be accessed via Deref. The client is\n/// returned to the pool when dropped.\npub struct ClientGuard {\n    pool: Weak<ClientPool>,\n    id: ClientID,\n    client: Option<page_api::Client>,     // Some until dropped\n    channel_guard: Option<ChannelGuard>,  // Some until dropped\n    permit: Option<OwnedSemaphorePermit>, // None if pool is unbounded\n}\n\nimpl Deref for ClientGuard {\n    type Target = page_api::Client;\n\n    fn deref(&self) -> &Self::Target {\n        self.client.as_ref().expect(\"not dropped\")\n    }\n}\n\nimpl DerefMut for ClientGuard {\n    fn deref_mut(&mut self) -> &mut Self::Target {\n        self.client.as_mut().expect(\"not dropped\")\n    }\n}\n\n/// Returns the client to the pool.\nimpl Drop for ClientGuard {\n    fn drop(&mut self) {\n        let Some(pool) = self.pool.upgrade() else {\n            return; // pool was dropped\n        };\n\n        let entry = ClientEntry {\n            client: self.client.take().expect(\"dropped once\"),\n            channel_guard: self.channel_guard.take().expect(\"dropped once\"),\n            idle_since: Instant::now(),\n        };\n        pool.idle.lock().unwrap().insert(self.id, entry);\n\n        _ = self.permit; // returned on drop, referenced for visibility\n    }\n}\n\n/// A pool of bidirectional gRPC streams. Currently only used for GetPage streams. Each stream\n/// acquires a client from the inner `ClientPool` for the stream's lifetime.\n///\n/// Individual streams only send a single request at a time, and do not pipeline multiple callers\n/// onto the same stream. Instead, we scale out the number of concurrent streams. This is primarily\n/// to eliminate head-of-line blocking. See the module documentation for more details.\n///\n/// TODO: consider making this generic over request and response types; not currently needed.\npub struct StreamPool {\n    /// The client pool to acquire clients from. Must be unbounded.\n    client_pool: Arc<ClientPool>,\n    /// Idle pooled streams. Acquired streams are removed from here and returned on drop.\n    ///\n    /// The first stream in the map will be acquired next. The map is sorted by stream ID, which is\n    /// equivalent to the client ID and in turn sorted by its channel ID. This way we prefer\n    /// acquiring idle streams from lower-ordered channels, which allows us to free up and reap\n    /// higher-ordered channels.\n    idle: Mutex<BTreeMap<StreamID, StreamEntry>>,\n    /// Limits the max number of concurrent streams. None if the pool is unbounded.\n    limiter: Option<Arc<Semaphore>>,\n    /// Reaps idle streams.\n    idle_reaper: Reaper,\n}\n\n/// The stream ID. Reuses the inner client ID.\ntype StreamID = ClientID;\n\n/// A pooled stream.\nstruct StreamEntry {\n    /// The bidirectional stream.\n    stream: BiStream,\n    /// The time when this stream was last used, i.e. when it was put back into `StreamPool::idle`.\n    idle_since: Instant,\n}\n\n/// A bidirectional GetPage stream and its client. Can send requests and receive responses.\nstruct BiStream {\n    /// The owning client. Holds onto the channel slot while the stream is alive.\n    client: ClientGuard,\n    /// Stream for sending requests. Uses a watch channel, so it can only send a single request at a\n    /// time, and the caller must await the response before sending another request. This is\n    /// enforced by `StreamGuard::send`.\n    sender: watch::Sender<page_api::GetPageRequest>,\n    /// Stream for receiving responses.\n    receiver: Pin<Box<dyn Stream<Item = tonic::Result<page_api::GetPageResponse>> + Send>>,\n}\n\nimpl StreamPool {\n    /// Creates a new stream pool, using the given client pool. It will use up to `max_streams`\n    /// concurrent streams.\n    ///\n    /// The client pool must be unbounded. The stream pool will enforce its own limits, and because\n    /// streams are long-lived they can cause persistent starvation if they exhaust the client pool.\n    /// The stream pool should generally have its own dedicated client pool (but it can share a\n    /// channel pool with others since these are always unbounded).\n    pub fn new(client_pool: Arc<ClientPool>, max_streams: Option<NonZero<usize>>) -> Arc<Self> {\n        assert!(client_pool.limiter.is_none(), \"bounded client pool\");\n        let pool = Arc::new(Self {\n            client_pool,\n            idle: Mutex::default(),\n            limiter: max_streams.map(|max_streams| Arc::new(Semaphore::new(max_streams.get()))),\n            idle_reaper: Reaper::new(REAP_IDLE_THRESHOLD, REAP_IDLE_INTERVAL),\n        });\n        pool.idle_reaper.spawn(&pool);\n        pool\n    }\n\n    /// Acquires an available stream from the pool, or spins up a new stream if all streams are\n    /// full. Returns a guard that can be used to send requests and await the responses. Blocks if\n    /// the pool is full.\n    ///\n    /// This is very performance-sensitive, as it is on the GetPage hot path.\n    ///\n    /// TODO: is a `Mutex<BTreeMap>` performant enough? Will it become too contended? We can't\n    /// trivially use e.g. DashMap or sharding, because we want to pop lower-ordered streams first\n    /// to free up higher-ordered channels.\n    pub async fn get(self: &Arc<Self>) -> tonic::Result<StreamGuard> {\n        // Acquire a permit if the pool is bounded.\n        let mut permit = None;\n        if let Some(limiter) = self.limiter.clone() {\n            permit = Some(limiter.acquire_owned().await.expect(\"never closed\"));\n        }\n\n        // Fast path: acquire an idle stream from the pool.\n        if let Some((_, entry)) = self.idle.lock().unwrap().pop_first() {\n            return Ok(StreamGuard {\n                pool: Arc::downgrade(self),\n                stream: Some(entry.stream),\n                can_reuse: true,\n                permit,\n            });\n        }\n\n        // Spin up a new stream. Uses a watch channel to send a single request at a time, since\n        // `StreamGuard::send` enforces this anyway and it avoids unnecessary channel overhead.\n        let mut client = self.client_pool.get().await?;\n\n        let (req_tx, req_rx) = watch::channel(page_api::GetPageRequest::default());\n        let req_stream = WatchStream::from_changes(req_rx);\n        let resp_stream = client.get_pages(req_stream).await?;\n\n        Ok(StreamGuard {\n            pool: Arc::downgrade(self),\n            stream: Some(BiStream {\n                client,\n                sender: req_tx,\n                receiver: Box::pin(resp_stream),\n            }),\n            can_reuse: true,\n            permit,\n        })\n    }\n}\n\nimpl Reapable for StreamPool {\n    /// Reaps streams that have been idle since before the cutoff.\n    fn reap_idle(&self, cutoff: Instant) {\n        self.idle\n            .lock()\n            .unwrap()\n            .retain(|_, entry| entry.idle_since >= cutoff);\n    }\n}\n\n/// A stream acquired from the pool. Returned to the pool when dropped, unless there are still\n/// in-flight requests on the stream, or the stream failed.\npub struct StreamGuard {\n    pool: Weak<StreamPool>,\n    stream: Option<BiStream>,             // Some until dropped\n    can_reuse: bool,                      // returned to pool if true\n    permit: Option<OwnedSemaphorePermit>, // None if pool is unbounded\n}\n\nimpl StreamGuard {\n    /// Sends a request on the stream and awaits the response. If the future is dropped before it\n    /// resolves (e.g. due to a timeout or cancellation), the stream will be closed to cancel the\n    /// request and is not returned to the pool. The same is true if the stream errors, in which\n    /// case the caller can't send further requests on the stream.\n    ///\n    /// We only support sending a single request at a time, to eliminate head-of-line blocking. See\n    /// module documentation for details.\n    ///\n    /// NB: errors are often returned as `GetPageResponse::status_code` instead of `tonic::Status`\n    /// to avoid tearing down the stream for per-request errors. Callers must check this.\n    pub async fn send(\n        &mut self,\n        req: page_api::GetPageRequest,\n    ) -> tonic::Result<page_api::GetPageResponse> {\n        let req_id = req.request_id;\n        let stream = self.stream.as_mut().expect(\"not dropped\");\n\n        // Mark the stream as not reusable while the request is in flight. We can't return the\n        // stream to the pool until we receive the response, to avoid head-of-line blocking and\n        // stale responses. Failed streams can't be reused either.\n        if !self.can_reuse {\n            return Err(tonic::Status::internal(\"stream can't be reused\"));\n        }\n        self.can_reuse = false;\n\n        // Send the request and receive the response.\n        //\n        // NB: this uses a watch channel, so it's unsafe to change this code to pipeline requests.\n        stream\n            .sender\n            .send(req)\n            .map_err(|_| tonic::Status::unavailable(\"stream closed\"))?;\n\n        let resp = stream\n            .receiver\n            .next()\n            .await\n            .ok_or_else(|| tonic::Status::unavailable(\"stream closed\"))??;\n\n        if resp.request_id != req_id {\n            return Err(tonic::Status::internal(format!(\n                \"response ID {} does not match request ID {}\",\n                resp.request_id, req_id\n            )));\n        }\n\n        // Success, mark the stream as reusable.\n        self.can_reuse = true;\n\n        Ok(resp)\n    }\n}\n\nimpl Drop for StreamGuard {\n    fn drop(&mut self) {\n        let Some(pool) = self.pool.upgrade() else {\n            return; // pool was dropped\n        };\n\n        // If the stream isn't reusable, it can't be returned to the pool.\n        if !self.can_reuse {\n            return;\n        }\n\n        // Place the idle stream back into the pool.\n        let entry = StreamEntry {\n            stream: self.stream.take().expect(\"dropped once\"),\n            idle_since: Instant::now(),\n        };\n        pool.idle\n            .lock()\n            .unwrap()\n            .insert(entry.stream.client.id, entry);\n\n        _ = self.permit; // returned on drop, referenced for visibility\n    }\n}\n\n/// Periodically reaps idle resources from a pool.\nstruct Reaper {\n    /// The task check interval.\n    interval: Duration,\n    /// The threshold for reaping idle resources.\n    threshold: Duration,\n    /// Cancels the reaper task. Cancelled when the reaper is dropped.\n    cancel: CancellationToken,\n}\n\nimpl Reaper {\n    /// Creates a new reaper.\n    pub fn new(threshold: Duration, interval: Duration) -> Self {\n        Self {\n            cancel: CancellationToken::new(),\n            threshold,\n            interval,\n        }\n    }\n\n    /// Spawns a task to periodically reap idle resources from the given task pool. The task is\n    /// cancelled when the reaper is dropped.\n    pub fn spawn(&self, pool: &Arc<impl Reapable>) {\n        // NB: hold a weak pool reference, otherwise the task will prevent dropping the pool.\n        let pool = Arc::downgrade(pool);\n        let cancel = self.cancel.clone();\n        let (interval, threshold) = (self.interval, self.threshold);\n\n        tokio::spawn(async move {\n            loop {\n                tokio::select! {\n                    _ = tokio::time::sleep(interval) => {\n                        let Some(pool) = pool.upgrade() else {\n                            return; // pool was dropped\n                        };\n                        pool.reap_idle(Instant::now() - threshold);\n                    }\n\n                    _ = cancel.cancelled() => return,\n                }\n            }\n        });\n    }\n}\n\nimpl Drop for Reaper {\n    fn drop(&mut self) {\n        self.cancel.cancel(); // cancel reaper task\n    }\n}\n\n/// A reapable resource pool.\ntrait Reapable: Send + Sync + 'static {\n    /// Reaps resources that have been idle since before the given cutoff.\n    fn reap_idle(&self, cutoff: Instant);\n}\n"
  },
  {
    "path": "pageserver/client_grpc/src/retry.rs",
    "content": "use std::time::Duration;\n\nuse futures::future::pending;\nuse tokio::time::Instant;\nuse tracing::{error, info, warn};\n\nuse utils::backoff::exponential_backoff_duration;\n\n/// A retry handler for Pageserver gRPC requests.\n///\n/// This is used instead of backoff::retry for better control and observability.\npub struct Retry {\n    /// Timeout across all retry attempts. If None, retries forever.\n    pub timeout: Option<Duration>,\n    /// The initial backoff duration. The first retry does not use a backoff.\n    pub base_backoff: Duration,\n    /// The maximum backoff duration.\n    pub max_backoff: Duration,\n}\n\nimpl Retry {\n    /// Runs the given async closure with timeouts and retries (exponential backoff). Logs errors,\n    /// using the current tracing span for context.\n    ///\n    /// Only certain gRPC status codes are retried, see [`Self::should_retry`].\n    pub async fn with<T, F, O>(&self, mut f: F) -> tonic::Result<T>\n    where\n        F: FnMut(usize) -> O, // pass attempt number, starting at 0\n        O: Future<Output = tonic::Result<T>>,\n    {\n        let started = Instant::now();\n        let deadline = self.timeout.map(|timeout| started + timeout);\n        let mut last_error = None;\n        let mut retries = 0;\n        loop {\n            // Set up a future to wait for the backoff, if any, and run the closure.\n            let backoff_and_try = async {\n                // NB: sleep() always sleeps 1ms, even when given a 0 argument. See:\n                // https://github.com/tokio-rs/tokio/issues/6866\n                if let Some(backoff) = self.backoff_duration(retries) {\n                    tokio::time::sleep(backoff).await;\n                }\n\n                f(retries).await\n            };\n\n            // Set up a future for the timeout, if any.\n            let timeout = async {\n                match deadline {\n                    Some(deadline) => tokio::time::sleep_until(deadline).await,\n                    None => pending().await,\n                }\n            };\n\n            // Wait for the backoff and request, or bail out if the timeout is exceeded.\n            let result = tokio::select! {\n                result = backoff_and_try => result,\n\n                _ = timeout => {\n                    let last_error = last_error.unwrap_or_else(|| {\n                        tonic::Status::deadline_exceeded(format!(\n                            \"request timed out after {:.3}s\",\n                            started.elapsed().as_secs_f64()\n                        ))\n                    });\n                    error!(\n                        \"giving up after {:.3}s and {retries} retries, last error {:?}: {}\",\n                        started.elapsed().as_secs_f64(), last_error.code(), last_error.message(),\n                    );\n                    return Err(last_error);\n                }\n            };\n\n            match result {\n                // Success, return the result.\n                Ok(result) => {\n                    if retries > 0 {\n                        info!(\n                            \"request succeeded after {retries} retries in {:.3}s\",\n                            started.elapsed().as_secs_f64(),\n                        );\n                    }\n\n                    return Ok(result);\n                }\n\n                // Error, retry or bail out.\n                Err(status) => {\n                    let (code, message) = (status.code(), status.message());\n                    let attempt = retries + 1;\n\n                    if !Self::should_retry(code) {\n                        // NB: include the attempt here too. This isn't necessarily the first\n                        // attempt, because the error may change between attempts.\n                        error!(\n                            \"request failed with {code:?}: {message}, not retrying (attempt {attempt})\"\n                        );\n                        return Err(status);\n                    }\n\n                    warn!(\"request failed with {code:?}: {message}, retrying (attempt {attempt})\");\n\n                    retries += 1;\n                    last_error = Some(status);\n                }\n            }\n        }\n    }\n\n    /// Returns the backoff duration for the given retry attempt, or None for no backoff. The first\n    /// attempt and first retry never backs off, so this returns None for 0 and 1 retries.\n    fn backoff_duration(&self, retries: usize) -> Option<Duration> {\n        let backoff = exponential_backoff_duration(\n            (retries as u32).saturating_sub(1), // first retry does not back off\n            self.base_backoff.as_secs_f64(),\n            self.max_backoff.as_secs_f64(),\n        );\n        (!backoff.is_zero()).then_some(backoff)\n    }\n\n    /// Returns true if the given status code should be retries.\n    fn should_retry(code: tonic::Code) -> bool {\n        match code {\n            tonic::Code::Ok => panic!(\"unexpected Ok status code\"),\n\n            // These codes are transient, so retry them.\n            tonic::Code::Aborted => true,\n            tonic::Code::Cancelled => true,\n            tonic::Code::DeadlineExceeded => true, // maybe transient slowness\n            tonic::Code::ResourceExhausted => true,\n            tonic::Code::Unavailable => true,\n\n            // The following codes will like continue to fail, so don't retry.\n            tonic::Code::AlreadyExists => false,\n            tonic::Code::DataLoss => false,\n            tonic::Code::FailedPrecondition => false,\n            // NB: don't retry Internal. It is intended for serious errors such as invariant\n            // violations, and is also used for client-side invariant checks that would otherwise\n            // result in retry loops.\n            tonic::Code::Internal => false,\n            tonic::Code::InvalidArgument => false,\n            tonic::Code::NotFound => false,\n            tonic::Code::OutOfRange => false,\n            tonic::Code::PermissionDenied => false,\n            tonic::Code::Unauthenticated => false,\n            tonic::Code::Unimplemented => false,\n            tonic::Code::Unknown => false,\n        }\n    }\n}\n"
  },
  {
    "path": "pageserver/compaction/Cargo.toml",
    "content": "[package]\nname = \"pageserver_compaction\"\nversion = \"0.1.0\"\nedition.workspace = true\nlicense.workspace = true\n\n[features]\ndefault = []\n\n[dependencies]\nanyhow.workspace = true\nasync-stream.workspace = true\nclap = { workspace = true, features = [\"string\"] }\nfutures.workspace = true\nitertools.workspace = true\nonce_cell.workspace = true\npageserver_api.workspace = true\npin-project-lite.workspace = true\nrand.workspace = true\nsvg_fmt.workspace = true\ntokio = { workspace = true, features = [\"process\", \"sync\", \"fs\", \"rt\", \"io-util\", \"time\"] }\ntracing.workspace = true\ntracing-subscriber.workspace = true\nutils.workspace = true\nworkspace_hack.workspace = true\n\n[dev-dependencies]\ncriterion.workspace = true\nhex-literal.workspace = true\ntokio = { workspace = true, features = [\"process\", \"sync\", \"fs\", \"rt\", \"io-util\", \"time\", \"test-util\"] }\n"
  },
  {
    "path": "pageserver/compaction/TODO.md",
    "content": "# TODO\n\n- If the key space can be perfectly partitioned at some key, perform planning on each\n  partition separately. For example, if we are compacting a level with layers like this:\n\n  ```\n              :\n  +--+ +----+ :  +------+\n  |  | |    | :  |      |\n  +--+ +----+ :  +------+\n              :\n  +-----+ +-+ : +--------+\n  |     | | | : |        |\n  +-----+ +-+ : +--------+\n              :\n  ```\n\n  At the dotted line, there is a natural split in the key space, such that all\n  layers are either on the left or the right of it. We can compact the\n  partitions separately.  We could choose to create image layers for one\n  partition but not the other one, for example.\n\n- All the layers don't have to be exactly the same size, we can choose to cut a\n  layer short or stretch it a little larger than the target size, if it helps\n  the overall system. We can help perfect partitions (see previous bullet point)\n  to happen more frequently, by choosing the cut points wisely. For example, try\n  to cut layers at boundaries of underlying image layers. And \"snap to grid\",\n  i.e. don't cut layers at any key, but e.g. only when key % 10000 = 0.\n\n- Avoid rewriting layers when we'd just create an identical layer to an input\n  layer.\n\n- Parallelism. The code is already split up into planning and execution, so that\n  we first split up the compaction work into \"Jobs\", and then execute them.\n  It would be straightforward to execute multiple jobs in parallel.\n\n- Materialize extra pages in delta layers during compaction. This would reduce\n  read amplification. There has been the idea of partial image layers. Materializing\n  extra pages in the delta layers achieve the same goal, without introducing a new\n  concept.\n\n## Simulator\n\n- Expand the simulator for more workloads\n- Automate a test suite that runs the simluator with different workloads and\n  spits out a table of results\n- Model read amplification\n- More sanity checking. One idea is to keep a reference count of each\n  MockRecord, i.e. use Arc<MockRecord> instead of plain MockRecord, and panic if\n  a MockRecord that is newer than PITR horizon is completely dropped. That would\n  indicate that the record was lost.\n"
  },
  {
    "path": "pageserver/compaction/src/bin/compaction-simulator.rs",
    "content": "use std::io::Write;\nuse std::path::{Path, PathBuf};\nuse std::sync::OnceLock;\n\nuse clap::{Parser, Subcommand};\nuse pageserver_compaction::helpers::PAGE_SZ;\nuse pageserver_compaction::simulator::MockTimeline;\nuse rand::Rng;\nuse utils::project_git_version;\n\nproject_git_version!(GIT_VERSION);\n\n#[derive(Parser)]\n#[command(\n    version = GIT_VERSION,\n    about = \"Neon Pageserver compaction simulator\",\n    long_about = \"A developer tool to visualize and test compaction\"\n)]\n#[command(propagate_version = true)]\nstruct CliOpts {\n    #[command(subcommand)]\n    command: Commands,\n}\n\n#[derive(Subcommand)]\nenum Commands {\n    RunSuite,\n    Simulate(SimulateCmd),\n}\n\n#[derive(Clone, clap::ValueEnum)]\nenum Distribution {\n    Uniform,\n    HotCold,\n}\n\n/// Read and update pageserver metadata file\n#[derive(Parser)]\nstruct SimulateCmd {\n    distribution: Distribution,\n\n    /// Number of records to digest\n    num_records: u64,\n    /// Record length\n    record_len: u64,\n\n    // Logical database size in MB\n    logical_size: u64,\n}\n\nasync fn simulate(cmd: &SimulateCmd, results_path: &Path) -> anyhow::Result<()> {\n    let mut executor = MockTimeline::new();\n\n    // Convert the logical size in MB into a key range.\n    let key_range = 0..((cmd.logical_size * 1024 * 1024) / PAGE_SZ);\n    //let key_range = u64::MIN..u64::MAX;\n    println!(\n        \"starting simulation with key range {:016X}-{:016X}\",\n        key_range.start, key_range.end\n    );\n\n    // helper function to print progress indicator\n    let print_progress = |i| -> anyhow::Result<()> {\n        if i == 0 || (i + 1) % 10000 == 0 || i == cmd.num_records - 1 {\n            print!(\n                \"\\ringested {} / {} records, {} MiB / {} MiB...\",\n                i + 1,\n                cmd.num_records,\n                (i + 1) * cmd.record_len / (1_000_000),\n                cmd.num_records * cmd.record_len / (1_000_000),\n            );\n            std::io::stdout().flush()?;\n        }\n        Ok(())\n    };\n\n    match cmd.distribution {\n        Distribution::Uniform => {\n            for i in 0..cmd.num_records {\n                executor.ingest_uniform(1, cmd.record_len, &key_range)?;\n                executor.compact_if_needed().await?;\n\n                print_progress(i)?;\n            }\n        }\n        Distribution::HotCold => {\n            let splitpoint = key_range.start + (key_range.end - key_range.start) / 10;\n            let hot_key_range = 0..splitpoint;\n            let cold_key_range = splitpoint..key_range.end;\n\n            for i in 0..cmd.num_records {\n                let chosen_range = if rand::rng().random_bool(0.9) {\n                    &hot_key_range\n                } else {\n                    &cold_key_range\n                };\n                executor.ingest_uniform(1, cmd.record_len, chosen_range)?;\n                executor.compact_if_needed().await?;\n\n                print_progress(i)?;\n            }\n        }\n    }\n    println!(\"done!\");\n    executor.flush_l0();\n    executor.compact_if_needed().await?;\n    let stats = executor.stats()?;\n\n    // Print the stats to stdout, and also to a file\n    print!(\"{stats}\");\n    std::fs::write(results_path.join(\"stats.txt\"), stats)?;\n\n    let animation_path = results_path.join(\"compaction-animation.html\");\n    executor.draw_history(std::fs::File::create(&animation_path)?)?;\n    println!(\n        \"animation: file://{}\",\n        animation_path.canonicalize()?.display()\n    );\n\n    Ok(())\n}\n\nasync fn run_suite_cmd(results_path: &Path, workload: &SimulateCmd) -> anyhow::Result<()> {\n    std::fs::create_dir(results_path)?;\n\n    set_log_file(File::create(results_path.join(\"log\"))?);\n    let result = simulate(workload, results_path).await;\n    set_log_stdout();\n    result\n}\n\nasync fn run_suite() -> anyhow::Result<()> {\n    let top_results_path = PathBuf::from(format!(\n        \"compaction-suite-results.{}\",\n        std::time::SystemTime::UNIX_EPOCH.elapsed()?.as_secs()\n    ));\n    std::fs::create_dir(&top_results_path)?;\n\n    let workload = SimulateCmd {\n        distribution: Distribution::Uniform,\n        // Generate 20 GB of WAL\n        record_len: 1_000,\n        num_records: 20_000_000,\n        // Logical size 5 GB\n        logical_size: 5_000,\n    };\n\n    run_suite_cmd(&top_results_path.join(\"uniform-20GB-5GB\"), &workload).await?;\n\n    println!(\n        \"All tests finished. Results in {}\",\n        top_results_path.display()\n    );\n    Ok(())\n}\n\nuse std::fs::File;\nuse std::io::Stdout;\nuse std::sync::Mutex;\n\nuse tracing_subscriber::fmt::MakeWriter;\nuse tracing_subscriber::fmt::writer::EitherWriter;\n\nstatic LOG_FILE: OnceLock<Mutex<EitherWriter<File, Stdout>>> = OnceLock::new();\nfn get_log_output() -> &'static Mutex<EitherWriter<File, Stdout>> {\n    LOG_FILE.get_or_init(|| std::sync::Mutex::new(EitherWriter::B(std::io::stdout())))\n}\n\nfn set_log_file(f: File) {\n    *get_log_output().lock().unwrap() = EitherWriter::A(f);\n}\n\nfn set_log_stdout() {\n    *get_log_output().lock().unwrap() = EitherWriter::B(std::io::stdout());\n}\n\nfn init_logging() -> anyhow::Result<()> {\n    // We fall back to printing all spans at info-level or above if\n    // the RUST_LOG environment variable is not set.\n    let rust_log_env_filter = || {\n        tracing_subscriber::EnvFilter::try_from_default_env()\n            .unwrap_or_else(|_| tracing_subscriber::EnvFilter::new(\"info\"))\n    };\n\n    // NB: the order of the with() calls does not matter.\n    // See https://docs.rs/tracing-subscriber/0.3.16/tracing_subscriber/layer/index.html#per-layer-filtering\n    use tracing_subscriber::prelude::*;\n    tracing_subscriber::registry()\n        .with({\n            let log_layer = tracing_subscriber::fmt::layer()\n                .with_target(false)\n                .with_ansi(false)\n                .with_writer(|| get_log_output().make_writer());\n            log_layer.with_filter(rust_log_env_filter())\n        })\n        .init();\n\n    Ok(())\n}\n\n#[tokio::main]\nasync fn main() -> anyhow::Result<()> {\n    let cli = CliOpts::parse();\n\n    init_logging()?;\n\n    match cli.command {\n        Commands::Simulate(cmd) => {\n            simulate(&cmd, &PathBuf::from(\"/tmp/compactions.html\")).await?;\n        }\n        Commands::RunSuite => {\n            run_suite().await?;\n        }\n    };\n    Ok(())\n}\n"
  },
  {
    "path": "pageserver/compaction/src/compact_tiered.rs",
    "content": "//! # Tiered compaction algorithm.\n//!\n//! Read all the input delta files, and write a new set of delta files that\n//! include all the input WAL records. See retile_deltas().\n//!\n//! In a \"normal\" LSM tree, you get to remove any values that are overwritten by\n//! later values, but in our system, we keep all the history. So the reshuffling\n//! doesn't remove any garbage, it just reshuffles the records to reduce read\n//! amplification, i.e. the number of files that you need to access to find the\n//! WAL records for a given key.\n//!\n//! If the new delta files would be very \"narrow\", i.e. each file would cover\n//! only a narrow key range, then we create a new set of image files\n//! instead. The current threshold is that if the estimated total size of the\n//! image layers is smaller than the size of the deltas, then we create image\n//! layers. That amounts to 2x storage amplification, and it means that the\n//! distance of image layers in LSN dimension is roughly equal to the logical\n//! database size. For example, if the logical database size is 10 GB, we would\n//! generate new image layers every 10 GB of WAL.\nuse std::collections::{HashSet, VecDeque};\nuse std::ops::Range;\n\nuse futures::StreamExt;\nuse pageserver_api::shard::ShardIdentity;\nuse tracing::{debug, info};\nuse utils::lsn::Lsn;\n\nuse crate::helpers::{\n    PAGE_SZ, accum_key_values, keyspace_total_size, merge_delta_keys_buffered, overlaps_with,\n};\nuse crate::identify_levels::identify_level;\nuse crate::interface::*;\n\n/// Main entry point to compaction.\n///\n/// The starting point is a cutoff LSN (`end_lsn`). The compaction is run on\n/// everything below that point, that needs compaction. The cutoff LSN must\n/// partition the layers so that there are no layers that span across that\n/// LSN. To start compaction at the top of the tree, pass the end LSN of the\n/// written last L0 layer.\npub async fn compact_tiered<E: CompactionJobExecutor>(\n    executor: &mut E,\n    end_lsn: Lsn,\n    target_file_size: u64,\n    fanout: u64,\n    ctx: &E::RequestContext,\n) -> anyhow::Result<()> {\n    assert!(fanout >= 1, \"fanout needs to be at least 1 but is {fanout}\");\n    let exp_base = fanout.max(2);\n    // Start at L0\n    let mut current_level_no = 0;\n    let mut current_level_target_height = target_file_size;\n    loop {\n        // end LSN +1 to include possible image layers exactly at 'end_lsn'.\n        let all_layers = executor\n            .get_layers(\n                &(E::Key::MIN..E::Key::MAX),\n                &(Lsn(u64::MIN)..end_lsn + 1),\n                ctx,\n            )\n            .await?;\n        info!(\n            \"Compacting L{}, total # of layers: {}\",\n            current_level_no,\n            all_layers.len()\n        );\n\n        // Identify the range of LSNs that belong to this level. We assume that\n        // each file in this level spans an LSN range up to 1.75x target file\n        // size. That should give us enough slop that if we created a slightly\n        // oversized L0 layer, e.g. because flushing the in-memory layer was\n        // delayed for some reason, we don't consider the oversized layer to\n        // belong to L1. But not too much slop, that we don't accidentally\n        // \"skip\" levels.\n        let max_height = (current_level_target_height as f64 * 1.75) as u64;\n        let Some(level) = identify_level(all_layers, end_lsn, max_height).await? else {\n            break;\n        };\n\n        // Calculate the height of this level. If the # of tiers exceeds the\n        // fanout parameter, it's time to compact it.\n        let depth = level.depth();\n        info!(\n            \"Level {} identified as LSN range {}-{}: depth {}\",\n            current_level_no, level.lsn_range.start, level.lsn_range.end, depth\n        );\n        for l in &level.layers {\n            debug!(\"LEVEL {} layer: {}\", current_level_no, l.short_id());\n        }\n        if depth < fanout {\n            debug!(\n                level = current_level_no,\n                depth = depth,\n                fanout,\n                \"too few deltas to compact\"\n            );\n            break;\n        }\n\n        compact_level(\n            &level.lsn_range,\n            &level.layers,\n            executor,\n            target_file_size,\n            ctx,\n        )\n        .await?;\n        if current_level_target_height == u64::MAX {\n            // our target height includes all possible lsns\n            info!(\n                level = current_level_no,\n                depth = depth,\n                \"compaction loop reached max current_level_target_height\"\n            );\n            break;\n        }\n        current_level_no += 1;\n        current_level_target_height = current_level_target_height.saturating_mul(exp_base);\n    }\n    Ok(())\n}\n\nasync fn compact_level<E: CompactionJobExecutor>(\n    lsn_range: &Range<Lsn>,\n    layers: &[E::Layer],\n    executor: &mut E,\n    target_file_size: u64,\n    ctx: &E::RequestContext,\n) -> anyhow::Result<bool> {\n    let mut layer_fragments = Vec::new();\n    for l in layers {\n        layer_fragments.push(LayerFragment::new(l.clone()));\n    }\n\n    let mut state = LevelCompactionState {\n        shard_identity: *executor.get_shard_identity(),\n        target_file_size,\n        _lsn_range: lsn_range.clone(),\n        layers: layer_fragments,\n        jobs: Vec::new(),\n        job_queue: Vec::new(),\n        next_level: false,\n        executor,\n    };\n\n    let first_job = CompactionJob {\n        key_range: E::Key::MIN..E::Key::MAX,\n        lsn_range: lsn_range.clone(),\n        strategy: CompactionStrategy::Divide,\n        input_layers: state\n            .layers\n            .iter()\n            .enumerate()\n            .map(|i| LayerId(i.0))\n            .collect(),\n        completed: false,\n    };\n\n    state.jobs.push(first_job);\n    state.job_queue.push(JobId(0));\n    state.execute(ctx).await?;\n\n    info!(\n        \"compaction completed! Need to process next level: {}\",\n        state.next_level\n    );\n\n    Ok(state.next_level)\n}\n\n/// Blackboard that keeps track of the state of all the jobs and work remaining\nstruct LevelCompactionState<'a, E>\nwhere\n    E: CompactionJobExecutor,\n{\n    shard_identity: ShardIdentity,\n\n    // parameters\n    target_file_size: u64,\n\n    _lsn_range: Range<Lsn>,\n    layers: Vec<LayerFragment<E>>,\n\n    // job queue\n    jobs: Vec<CompactionJob<E>>,\n    job_queue: Vec<JobId>,\n\n    /// If false, no need to compact levels below this\n    next_level: bool,\n\n    /// Interface to the outside world\n    executor: &'a mut E,\n}\n\n#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)]\nstruct LayerId(usize);\n#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)]\nstruct JobId(usize);\n\nstruct PendingJobSet {\n    pending: HashSet<JobId>,\n    completed: HashSet<JobId>,\n}\n\nimpl PendingJobSet {\n    fn new() -> Self {\n        PendingJobSet {\n            pending: HashSet::new(),\n            completed: HashSet::new(),\n        }\n    }\n\n    fn complete_job(&mut self, job_id: JobId) {\n        self.pending.remove(&job_id);\n        self.completed.insert(job_id);\n    }\n\n    fn all_completed(&self) -> bool {\n        self.pending.is_empty()\n    }\n}\n\n// When we decide to rewrite a set of layers, LayerFragment is used to keep\n// track which new layers supersede an old layer. When all the stakeholder jobs\n// have completed, this layer can be deleted.\nstruct LayerFragment<E>\nwhere\n    E: CompactionJobExecutor,\n{\n    layer: E::Layer,\n\n    // If we will write new layers to replace this one, this keeps track of the\n    // jobs that need to complete before this layer can be deleted. As the jobs\n    // complete, they are moved from 'pending' to 'completed' set. Once the\n    // 'pending' set becomes empty, the layer can be deleted.\n    //\n    // If None, this layer is not rewritten and must not be deleted.\n    deletable_after: Option<PendingJobSet>,\n\n    deleted: bool,\n}\n\nimpl<E> LayerFragment<E>\nwhere\n    E: CompactionJobExecutor,\n{\n    fn new(layer: E::Layer) -> Self {\n        LayerFragment {\n            layer,\n            deletable_after: None,\n            deleted: false,\n        }\n    }\n}\n\n#[derive(PartialEq)]\nenum CompactionStrategy {\n    Divide,\n    CreateDelta,\n    CreateImage,\n}\n\nstruct CompactionJob<E: CompactionJobExecutor> {\n    key_range: Range<E::Key>,\n    lsn_range: Range<Lsn>,\n\n    strategy: CompactionStrategy,\n\n    input_layers: Vec<LayerId>,\n\n    completed: bool,\n}\n\nimpl<E> LevelCompactionState<'_, E>\nwhere\n    E: CompactionJobExecutor,\n{\n    /// Main loop of the executor.\n    ///\n    /// In each iteration, we take the next job from the queue, and execute it.\n    /// The execution might add new jobs to the queue. Keep going until the\n    /// queue is empty.\n    ///\n    /// Initially, the job queue consists of one Divide job over the whole\n    /// level. On first call, it is divided into smaller jobs.\n    async fn execute(&mut self, ctx: &E::RequestContext) -> anyhow::Result<()> {\n        // TODO: this would be pretty straightforward to parallelize with FuturesUnordered\n        while let Some(next_job_id) = self.job_queue.pop() {\n            info!(\"executing job {}\", next_job_id.0);\n            self.execute_job(next_job_id, ctx).await?;\n        }\n\n        // all done!\n        Ok(())\n    }\n\n    async fn execute_job(&mut self, job_id: JobId, ctx: &E::RequestContext) -> anyhow::Result<()> {\n        let job = &self.jobs[job_id.0];\n        match job.strategy {\n            CompactionStrategy::Divide => {\n                self.divide_job(job_id, ctx).await?;\n                Ok(())\n            }\n            CompactionStrategy::CreateDelta => {\n                let mut deltas: Vec<E::DeltaLayer> = Vec::new();\n                let mut layer_ids: Vec<LayerId> = Vec::new();\n                for layer_id in &job.input_layers {\n                    let layer = &self.layers[layer_id.0].layer;\n                    if let Some(dl) = self.executor.downcast_delta_layer(layer, ctx).await? {\n                        deltas.push(dl.clone());\n                        layer_ids.push(*layer_id);\n                    }\n                }\n\n                self.executor\n                    .create_delta(&job.lsn_range, &job.key_range, &deltas, ctx)\n                    .await?;\n                self.jobs[job_id.0].completed = true;\n\n                // did we complete any fragments?\n                for layer_id in layer_ids {\n                    let l = &mut self.layers[layer_id.0];\n                    if let Some(deletable_after) = l.deletable_after.as_mut() {\n                        deletable_after.complete_job(job_id);\n                        if deletable_after.all_completed() {\n                            self.executor.delete_layer(&l.layer, ctx).await?;\n                            l.deleted = true;\n                        }\n                    }\n                }\n\n                self.next_level = true;\n\n                Ok(())\n            }\n            CompactionStrategy::CreateImage => {\n                self.executor\n                    .create_image(job.lsn_range.end, &job.key_range, ctx)\n                    .await?;\n                self.jobs[job_id.0].completed = true;\n\n                // TODO: we could check if any layers < PITR horizon became deletable\n                Ok(())\n            }\n        }\n    }\n\n    fn push_job(&mut self, job: CompactionJob<E>) -> JobId {\n        let job_id = JobId(self.jobs.len());\n        self.jobs.push(job);\n        self.job_queue.push(job_id);\n        job_id\n    }\n\n    /// Take a partition of the key space, and decide how to compact it.\n    ///\n    /// TODO: Currently, this is called exactly once for the level, and we\n    /// decide whether to create new image layers to cover the whole level, or\n    /// write a new set of deltas. In the future, this should try to partition\n    /// the key space, and make the decision separately for each partition.\n    async fn divide_job(&mut self, job_id: JobId, ctx: &E::RequestContext) -> anyhow::Result<()> {\n        let job = &self.jobs[job_id.0];\n        assert!(job.strategy == CompactionStrategy::Divide);\n\n        // Check for dummy cases\n        if job.input_layers.is_empty() {\n            return Ok(());\n        }\n\n        let job = &self.jobs[job_id.0];\n        assert!(job.strategy == CompactionStrategy::Divide);\n\n        // Would it be better to create images for this partition?\n        // Decide based on the average density of the level\n        let keyspace_size = keyspace_total_size(\n            &self\n                .executor\n                .get_keyspace(&job.key_range, job.lsn_range.end, ctx)\n                .await?,\n            &self.shard_identity,\n        ) * PAGE_SZ;\n\n        let wal_size = job\n            .input_layers\n            .iter()\n            .filter(|layer_id| self.layers[layer_id.0].layer.is_delta())\n            .map(|layer_id| self.layers[layer_id.0].layer.file_size())\n            .sum::<u64>();\n        if keyspace_size < wal_size {\n            // seems worth it\n            info!(\n                \"covering with images, because keyspace_size is {}, size of deltas between {}-{} is {}\",\n                keyspace_size, job.lsn_range.start, job.lsn_range.end, wal_size\n            );\n            self.cover_with_images(job_id, ctx).await\n        } else {\n            // do deltas\n            info!(\n                \"coverage not worth it, keyspace_size {}, wal_size {}\",\n                keyspace_size, wal_size\n            );\n            self.retile_deltas(job_id, ctx).await\n        }\n    }\n\n    // LSN\n    //  ^\n    //  |\n    //  |                          ###|###|#####\n    //  | +--+-----+--+            +--+-----+--+\n    //  | |  |     |  |            |  |     |  |\n    //  | +--+--+--+--+            +--+--+--+--+\n    //  | |     |     |            |     |     |\n    //  | +---+-+-+---+     ==>    +---+-+-+---+\n    //  | |   |   |   |            |   |   |   |\n    //  | +---+-+-++--+            +---+-+-++--+\n    //  | |     |  |  |            |     |  |  |\n    //  | +-----+--+--+            +-----+--+--+\n    //  |\n    //  +--------------> key\n    //\n    async fn cover_with_images(\n        &mut self,\n        job_id: JobId,\n        ctx: &E::RequestContext,\n    ) -> anyhow::Result<()> {\n        let job = &self.jobs[job_id.0];\n        assert!(job.strategy == CompactionStrategy::Divide);\n\n        // XXX: do we still need the \"holes\" stuff?\n\n        let mut new_jobs = Vec::new();\n\n        // Slide a window through the keyspace\n        let keyspace = self\n            .executor\n            .get_keyspace(&job.key_range, job.lsn_range.end, ctx)\n            .await?;\n\n        let mut window = KeyspaceWindow::new(\n            E::Key::MIN..E::Key::MAX,\n            keyspace,\n            self.target_file_size / PAGE_SZ,\n        );\n        while let Some(key_range) = window.choose_next_image(&self.shard_identity) {\n            new_jobs.push(CompactionJob::<E> {\n                key_range,\n                lsn_range: job.lsn_range.clone(),\n                strategy: CompactionStrategy::CreateImage,\n                input_layers: Vec::new(), // XXX: Is it OK for  this to be empty for image layer?\n                completed: false,\n            });\n        }\n\n        for j in new_jobs.into_iter().rev() {\n            let _job_id = self.push_job(j);\n\n            // TODO: image layers don't let us delete anything. unless < PITR horizon\n            //let j = &self.jobs[job_id.0];\n            // for layer_id in j.input_layers.iter() {\n            //    self.layers[layer_id.0].pending_stakeholders.insert(job_id);\n            //}\n        }\n\n        Ok(())\n    }\n\n    // Merge the contents of all the input delta layers into a new set\n    // of delta layers, based on the current partitioning.\n    //\n    // We split the new delta layers on the key dimension. We iterate through\n    // the key space, and for each key, check if including the next key to the\n    // current output layer we're building would cause the layer to become too\n    // large. If so, dump the current output layer and start new one.  It's\n    // possible that there is a single key with so many page versions that\n    // storing all of them in a single layer file would be too large. In that\n    // case, we also split on the LSN dimension.\n    //\n    // LSN\n    //  ^\n    //  |\n    //  | +-----------+            +--+--+--+--+\n    //  | |           |            |  |  |  |  |\n    //  | +-----------+            |  |  |  |  |\n    //  | |           |            |  |  |  |  |\n    //  | +-----------+     ==>    |  |  |  |  |\n    //  | |           |            |  |  |  |  |\n    //  | +-----------+            |  |  |  |  |\n    //  | |           |            |  |  |  |  |\n    //  | +-----------+            +--+--+--+--+\n    //  |\n    //  +--------------> key\n    //\n    //\n    // If one key (X) has a lot of page versions:\n    //\n    // LSN\n    //  ^\n    //  |                                 (X)\n    //  | +-----------+            +--+--+--+--+\n    //  | |           |            |  |  |  |  |\n    //  | +-----------+            |  |  +--+  |\n    //  | |           |            |  |  |  |  |\n    //  | +-----------+     ==>    |  |  |  |  |\n    //  | |           |            |  |  +--+  |\n    //  | +-----------+            |  |  |  |  |\n    //  | |           |            |  |  |  |  |\n    //  | +-----------+            +--+--+--+--+\n    //  |\n    //  +--------------> key\n    //\n    // TODO: this actually divides the layers into fixed-size chunks, not\n    // based on the partitioning.\n    //\n    // TODO: we should also opportunistically materialize and\n    // garbage collect what we can.\n    async fn retile_deltas(\n        &mut self,\n        job_id: JobId,\n        ctx: &E::RequestContext,\n    ) -> anyhow::Result<()> {\n        let job = &self.jobs[job_id.0];\n        assert!(job.strategy == CompactionStrategy::Divide);\n\n        // Sweep the key space left to right, running an estimate of how much\n        // disk size and keyspace we have accumulated\n        //\n        // Once the disk size reaches the target threshold, stop and think.\n        // If we have accumulated only a narrow band of keyspace, create an\n        // image layer. Otherwise write a delta layer.\n\n        // FIXME: we are ignoring images here. Did we already divide the work\n        // so that we won't encounter them here?\n\n        let mut deltas: Vec<E::DeltaLayer> = Vec::new();\n        for layer_id in &job.input_layers {\n            let l = &self.layers[layer_id.0];\n            if let Some(dl) = self.executor.downcast_delta_layer(&l.layer, ctx).await? {\n                deltas.push(dl.clone());\n            }\n        }\n        // Open stream\n        let key_value_stream = std::pin::pin!(\n            merge_delta_keys_buffered::<E>(deltas.as_slice(), ctx)\n                .await?\n                .map(Result::<_, anyhow::Error>::Ok)\n        );\n        let mut new_jobs = Vec::new();\n\n        // Slide a window through the keyspace\n        let mut key_accum =\n            std::pin::pin!(accum_key_values(key_value_stream, self.target_file_size));\n        let mut all_in_window: bool = false;\n        let mut window = Window::new();\n\n        // Helper function to create a job for a new delta layer with given key-lsn\n        // rectangle.\n        let create_delta_job = |key_range, lsn_range: &Range<Lsn>, new_jobs: &mut Vec<_>| {\n            // The inputs for the job are all the input layers of the original job that\n            // overlap with the rectangle.\n            let batch_layers: Vec<LayerId> = job\n                .input_layers\n                .iter()\n                .filter(|layer_id| {\n                    overlaps_with(self.layers[layer_id.0].layer.key_range(), &key_range)\n                })\n                .cloned()\n                .collect();\n            assert!(!batch_layers.is_empty());\n            new_jobs.push(CompactionJob {\n                key_range,\n                lsn_range: lsn_range.clone(),\n                strategy: CompactionStrategy::CreateDelta,\n                input_layers: batch_layers,\n                completed: false,\n            });\n        };\n\n        loop {\n            if all_in_window && window.is_empty() {\n                // All done!\n                break;\n            }\n\n            // If we now have enough keyspace for next delta layer in the window, create a\n            // new delta layer\n            if let Some(key_range) = window.choose_next_delta(self.target_file_size, !all_in_window)\n            {\n                create_delta_job(key_range, &job.lsn_range, &mut new_jobs);\n                continue;\n            }\n            assert!(!all_in_window);\n\n            // Process next key in the key space\n            match key_accum.next().await.transpose()? {\n                None => {\n                    all_in_window = true;\n                }\n                Some(next_key) if next_key.partition_lsns.is_empty() => {\n                    // Normal case: extend the window by the key\n                    window.feed(next_key.key, next_key.size);\n                }\n                Some(next_key) => {\n                    // A key with too large size impact for a single delta layer. This\n                    // case occurs if you make a huge number of updates for a single key.\n                    //\n                    // Drain the window with has_more = false to make a clean cut before\n                    // the key, and then make dedicated delta layers for the single key.\n                    //\n                    // We cannot cluster the key with the others, because we don't want\n                    // layer files to overlap with each other in the lsn,key space (no\n                    // overlaps for the rectangles).\n                    let key = next_key.key;\n                    debug!(\"key {key} with size impact larger than the layer size\");\n                    while !window.is_empty() {\n                        let has_more = false;\n                        let key_range = window.choose_next_delta(self.target_file_size, has_more)\n                            .expect(\"with has_more==false, choose_next_delta always returns something for a non-empty Window\");\n                        create_delta_job(key_range, &job.lsn_range, &mut new_jobs);\n                    }\n\n                    // Not really required: but here for future resilience:\n                    // We make a \"gap\" here, so any structure the window holds should\n                    // probably be reset.\n                    window = Window::new();\n\n                    let mut prior_lsn = job.lsn_range.start;\n                    let mut lsn_ranges = Vec::new();\n                    for (lsn, _size) in next_key.partition_lsns.iter() {\n                        lsn_ranges.push(prior_lsn..*lsn);\n                        prior_lsn = *lsn;\n                    }\n                    lsn_ranges.push(prior_lsn..job.lsn_range.end);\n                    for lsn_range in lsn_ranges {\n                        let key_range = key..key.next();\n                        create_delta_job(key_range, &lsn_range, &mut new_jobs);\n                    }\n                }\n            }\n        }\n\n        // All the input files are rewritten. Set up the tracking for when they can\n        // be deleted.\n        for layer_id in job.input_layers.iter() {\n            let l = &mut self.layers[layer_id.0];\n            assert!(l.deletable_after.is_none());\n            l.deletable_after = Some(PendingJobSet::new());\n        }\n        for j in new_jobs.into_iter().rev() {\n            let job_id = self.push_job(j);\n            let j = &self.jobs[job_id.0];\n            for layer_id in j.input_layers.iter() {\n                self.layers[layer_id.0]\n                    .deletable_after\n                    .as_mut()\n                    .unwrap()\n                    .pending\n                    .insert(job_id);\n            }\n        }\n\n        Ok(())\n    }\n}\n\n/// Sliding window through keyspace and values for image layer\n/// This is used by [`LevelCompactionState::cover_with_images`] to decide on good split points\nstruct KeyspaceWindow<K> {\n    head: KeyspaceWindowHead<K>,\n\n    start_pos: KeyspaceWindowPos<K>,\n}\nstruct KeyspaceWindowHead<K> {\n    // overall key range to cover\n    key_range: Range<K>,\n\n    keyspace: Vec<Range<K>>,\n    target_keysize: u64,\n}\n\n#[derive(Clone)]\nstruct KeyspaceWindowPos<K> {\n    end_key: K,\n\n    keyspace_idx: usize,\n\n    accum_keysize: u64,\n}\nimpl<K: CompactionKey> KeyspaceWindowPos<K> {\n    fn reached_end(&self, w: &KeyspaceWindowHead<K>) -> bool {\n        self.keyspace_idx == w.keyspace.len()\n    }\n\n    // Advance the cursor until it reaches 'target_keysize'.\n    fn advance_until_size(\n        &mut self,\n        w: &KeyspaceWindowHead<K>,\n        max_size: u64,\n        shard_identity: &ShardIdentity,\n    ) {\n        while self.accum_keysize < max_size && !self.reached_end(w) {\n            let curr_range = &w.keyspace[self.keyspace_idx];\n            if self.end_key < curr_range.start {\n                // skip over any unused space\n                self.end_key = curr_range.start;\n            }\n\n            // We're now within 'curr_range'. Can we advance past it completely?\n            let distance = K::key_range_size(&(self.end_key..curr_range.end), shard_identity);\n            if (self.accum_keysize + distance as u64) < max_size {\n                // oh yeah, it fits\n                self.end_key = curr_range.end;\n                self.keyspace_idx += 1;\n                self.accum_keysize += distance as u64;\n            } else {\n                // advance within the range\n                let skip_key = self.end_key.skip_some();\n                let distance = K::key_range_size(&(self.end_key..skip_key), shard_identity);\n                if (self.accum_keysize + distance as u64) < max_size {\n                    self.end_key = skip_key;\n                    self.accum_keysize += distance as u64;\n                } else {\n                    self.end_key = self.end_key.next();\n                    self.accum_keysize += 1;\n                }\n            }\n        }\n    }\n}\n\nimpl<K> KeyspaceWindow<K>\nwhere\n    K: CompactionKey,\n{\n    fn new(key_range: Range<K>, keyspace: CompactionKeySpace<K>, target_keysize: u64) -> Self {\n        assert!(keyspace.first().unwrap().start >= key_range.start);\n\n        let start_key = key_range.start;\n        let start_pos = KeyspaceWindowPos::<K> {\n            end_key: start_key,\n            keyspace_idx: 0,\n            accum_keysize: 0,\n        };\n        Self {\n            head: KeyspaceWindowHead::<K> {\n                key_range,\n                keyspace,\n                target_keysize,\n            },\n            start_pos,\n        }\n    }\n\n    fn choose_next_image(&mut self, shard_identity: &ShardIdentity) -> Option<Range<K>> {\n        if self.start_pos.keyspace_idx == self.head.keyspace.len() {\n            // we've reached the end\n            return None;\n        }\n\n        let mut next_pos = self.start_pos.clone();\n        next_pos.advance_until_size(\n            &self.head,\n            self.start_pos.accum_keysize + self.head.target_keysize,\n            shard_identity,\n        );\n\n        // See if we can gobble up the rest of the keyspace if we stretch out the layer, up to\n        // 1.25x target size\n        let mut end_pos = next_pos.clone();\n        end_pos.advance_until_size(\n            &self.head,\n            self.start_pos.accum_keysize + (self.head.target_keysize * 5 / 4),\n            shard_identity,\n        );\n        if end_pos.reached_end(&self.head) {\n            // gobble up any unused keyspace between the last used key and end of the range\n            assert!(end_pos.end_key <= self.head.key_range.end);\n            end_pos.end_key = self.head.key_range.end;\n            next_pos = end_pos;\n        }\n\n        let start_key = self.start_pos.end_key;\n        self.start_pos = next_pos;\n        Some(start_key..self.start_pos.end_key)\n    }\n}\n\n// Take previous partitioning, based on the image layers below.\n//\n// Candidate is at the front:\n//\n// Consider stretching an image layer to next divider? If it's close enough,\n// that's the image candidate\n//\n// If it's too far, consider splitting at a reasonable point\n//\n// Is the image candidate smaller than the equivalent delta? If so,\n// split off the image. Otherwise, split off one delta.\n// Try to snap off the delta at a reasonable point\n\nstruct WindowElement<K> {\n    start_key: K, // inclusive\n    last_key: K,  // inclusive\n    accum_size: u64,\n}\n\n/// Sliding window through keyspace and values for delta layer tiling\n///\n/// This is used to decide which delta layer to write next.\nstruct Window<K> {\n    elems: VecDeque<WindowElement<K>>,\n\n    // last key that was split off, inclusive\n    splitoff_key: Option<K>,\n    splitoff_size: u64,\n}\n\nimpl<K> Window<K>\nwhere\n    K: CompactionKey,\n{\n    fn new() -> Self {\n        Self {\n            elems: VecDeque::new(),\n            splitoff_key: None,\n            splitoff_size: 0,\n        }\n    }\n\n    fn feed(&mut self, key: K, size: u64) {\n        let last_size;\n        if let Some(last) = self.elems.back_mut() {\n            // We require the keys to be strictly increasing for the window.\n            // Keys should already have been deduplicated by `accum_key_values`\n            assert!(\n                last.last_key < key,\n                \"last_key(={}) >= key(={key})\",\n                last.last_key\n            );\n            last_size = last.accum_size;\n        } else {\n            last_size = 0;\n        }\n        // This is a new key.\n        let elem = WindowElement {\n            start_key: key,\n            last_key: key,\n            accum_size: last_size + size,\n        };\n        self.elems.push_back(elem);\n    }\n\n    fn remain_size(&self) -> u64 {\n        self.elems.back().unwrap().accum_size - self.splitoff_size\n    }\n\n    fn peek_size(&self) -> u64 {\n        self.elems.front().unwrap().accum_size - self.splitoff_size\n    }\n\n    fn is_empty(&self) -> bool {\n        self.elems.is_empty()\n    }\n\n    fn commit_upto(&mut self, mut upto: usize) {\n        while upto > 1 {\n            let popped = self.elems.pop_front().unwrap();\n            self.elems.front_mut().unwrap().start_key = popped.start_key;\n            upto -= 1;\n        }\n    }\n\n    fn find_size_split(&self, target_size: u64) -> usize {\n        self.elems\n            .partition_point(|elem| elem.accum_size - self.splitoff_size < target_size)\n    }\n\n    fn pop(&mut self) {\n        let first = self.elems.pop_front().unwrap();\n        self.splitoff_size = first.accum_size;\n\n        self.splitoff_key = Some(first.last_key);\n    }\n\n    // the difference between delta and image is that an image covers\n    // any unused keyspace before and after, while a delta tries to\n    // minimize that. TODO: difference not implemented\n    fn pop_delta(&mut self) -> Range<K> {\n        let first = self.elems.front().unwrap();\n        let key_range = first.start_key..first.last_key.next();\n\n        self.pop();\n        key_range\n    }\n\n    // Prerequisite: we have enough input in the window\n    //\n    // On return None, the caller should feed more data and call again\n    fn choose_next_delta(&mut self, target_size: u64, has_more: bool) -> Option<Range<K>> {\n        if has_more && self.elems.is_empty() {\n            // Starting up\n            return None;\n        }\n\n        // If we still have an undersized candidate, just keep going\n        while self.peek_size() < target_size {\n            if self.elems.len() > 1 {\n                self.commit_upto(2);\n            } else if has_more {\n                return None;\n            } else {\n                break;\n            }\n        }\n\n        // Ensure we have enough input in the window to make a good decision\n        if has_more && self.remain_size() < target_size * 5 / 4 {\n            return None;\n        }\n\n        // The candidate on the front is now large enough, for a delta.\n        // And we have enough data in the window to decide.\n\n        // If we're willing to stretch it up to 1.25 target size, could we\n        // gobble up the rest of the work? This avoids creating very small\n        // \"tail\" layers at the end of the keyspace\n        if !has_more && self.remain_size() < target_size * 5 / 4 {\n            self.commit_upto(self.elems.len());\n        } else {\n            let delta_split_at = self.find_size_split(target_size);\n            self.commit_upto(delta_split_at);\n\n            // If it's still not large enough, request the caller to fill the window\n            if self.elems.len() == 1 && has_more {\n                return None;\n            }\n        }\n        Some(self.pop_delta())\n    }\n}\n"
  },
  {
    "path": "pageserver/compaction/src/helpers.rs",
    "content": "//! This file contains generic utility functions over the interface types,\n//! which could be handy for any compaction implementation.\nuse std::collections::{BinaryHeap, VecDeque};\nuse std::fmt::Display;\nuse std::future::Future;\nuse std::ops::{DerefMut, Range};\nuse std::pin::Pin;\nuse std::task::{Poll, ready};\n\nuse futures::future::BoxFuture;\nuse futures::{Stream, StreamExt};\nuse itertools::Itertools;\nuse pageserver_api::shard::ShardIdentity;\nuse pin_project_lite::pin_project;\nuse utils::lsn::Lsn;\n\nuse crate::interface::*;\n\npub const PAGE_SZ: u64 = 8192;\n\npub fn keyspace_total_size<K>(\n    keyspace: &CompactionKeySpace<K>,\n    shard_identity: &ShardIdentity,\n) -> u64\nwhere\n    K: CompactionKey,\n{\n    keyspace\n        .iter()\n        .map(|r| K::key_range_size(r, shard_identity) as u64)\n        .sum()\n}\n\npub fn overlaps_with<T: Ord>(a: &Range<T>, b: &Range<T>) -> bool {\n    !(a.end <= b.start || b.end <= a.start)\n}\n\n/// Whether a fully contains b, example as below\n/// ```plain\n/// |      a       |\n///       |  b  |\n/// ```\npub fn fully_contains<T: Ord>(a: &Range<T>, b: &Range<T>) -> bool {\n    a.start <= b.start && a.end >= b.end\n}\n\npub fn union_to_keyspace<K: Ord>(a: &mut CompactionKeySpace<K>, b: CompactionKeySpace<K>) {\n    let x = std::mem::take(a);\n    let mut all_ranges_iter = [x.into_iter(), b.into_iter()]\n        .into_iter()\n        .kmerge_by(|a, b| a.start < b.start);\n    let mut ranges = Vec::new();\n    if let Some(first) = all_ranges_iter.next() {\n        let (mut start, mut end) = (first.start, first.end);\n\n        for r in all_ranges_iter {\n            assert!(r.start >= start);\n            if r.start > end {\n                ranges.push(start..end);\n                start = r.start;\n                end = r.end;\n            } else if r.end > end {\n                end = r.end;\n            }\n        }\n        ranges.push(start..end);\n    }\n    *a = ranges\n}\n\npub fn intersect_keyspace<K: Ord + Clone + Copy>(\n    a: &CompactionKeySpace<K>,\n    r: &Range<K>,\n) -> CompactionKeySpace<K> {\n    let mut ranges: Vec<Range<K>> = Vec::new();\n\n    for x in a.iter() {\n        if x.end <= r.start {\n            continue;\n        }\n        if x.start >= r.end {\n            break;\n        }\n        ranges.push(x.clone())\n    }\n\n    // trim the ends\n    if let Some(first) = ranges.first_mut() {\n        first.start = std::cmp::max(first.start, r.start);\n    }\n    if let Some(last) = ranges.last_mut() {\n        last.end = std::cmp::min(last.end, r.end);\n    }\n    ranges\n}\n\n/// Create a stream that iterates through all DeltaEntrys among all input\n/// layers, in key-lsn order.\n///\n/// This is public because the create_delta() implementation likely wants to use this too\n/// TODO: move to a more shared place\npub fn merge_delta_keys<'a, E: CompactionJobExecutor>(\n    layers: &'a [E::DeltaLayer],\n    ctx: &'a E::RequestContext,\n) -> MergeDeltaKeys<'a, E> {\n    // Use a binary heap to merge the layers. Each input layer is initially\n    // represented by a LazyLoadLayer::Unloaded element, which uses the start of\n    // the layer's key range as the key. The first time a layer reaches the top\n    // of the heap, all the keys of the layer are loaded into a sorted vector.\n    //\n    // This helps to keep the memory usage reasonable: we only need to hold in\n    // memory the DeltaEntrys of the layers that overlap with the \"current\" key.\n    let mut heap: BinaryHeap<LazyLoadLayer<'a, E>> = BinaryHeap::new();\n    for l in layers {\n        heap.push(LazyLoadLayer::Unloaded(l));\n    }\n    MergeDeltaKeys {\n        heap,\n        ctx,\n        load_future: None,\n    }\n}\n\npub async fn merge_delta_keys_buffered<'a, E: CompactionJobExecutor + 'a>(\n    layers: &'a [E::DeltaLayer],\n    ctx: &'a E::RequestContext,\n) -> anyhow::Result<impl Stream<Item = <E::DeltaLayer as CompactionDeltaLayer<E>>::DeltaEntry<'a>>>\n{\n    let mut keys = Vec::new();\n    for l in layers {\n        // Boxing and casting to LoadFuture is required to obtain the right Sync bound.\n        // If we do l.load_keys(ctx).await? directly, there is a compilation error.\n        let load_future: LoadFuture<'a, _> = Box::pin(l.load_keys(ctx));\n        keys.extend(load_future.await?.into_iter());\n    }\n    keys.sort_by_key(|k| (k.key(), k.lsn()));\n    let stream = futures::stream::iter(keys.into_iter());\n    Ok(stream)\n}\n\nenum LazyLoadLayer<'a, E: CompactionJobExecutor> {\n    Loaded(VecDeque<<E::DeltaLayer as CompactionDeltaLayer<E>>::DeltaEntry<'a>>),\n    Unloaded(&'a E::DeltaLayer),\n}\nimpl<E: CompactionJobExecutor> LazyLoadLayer<'_, E> {\n    fn min_key(&self) -> E::Key {\n        match self {\n            Self::Loaded(entries) => entries.front().unwrap().key(),\n            Self::Unloaded(dl) => dl.key_range().start,\n        }\n    }\n    fn min_lsn(&self) -> Lsn {\n        match self {\n            Self::Loaded(entries) => entries.front().unwrap().lsn(),\n            Self::Unloaded(dl) => dl.lsn_range().start,\n        }\n    }\n}\nimpl<E: CompactionJobExecutor> PartialOrd for LazyLoadLayer<'_, E> {\n    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {\n        Some(self.cmp(other))\n    }\n}\nimpl<E: CompactionJobExecutor> Ord for LazyLoadLayer<'_, E> {\n    fn cmp(&self, other: &Self) -> std::cmp::Ordering {\n        // reverse order so that we get a min-heap\n        (other.min_key(), other.min_lsn()).cmp(&(self.min_key(), self.min_lsn()))\n    }\n}\nimpl<E: CompactionJobExecutor> PartialEq for LazyLoadLayer<'_, E> {\n    fn eq(&self, other: &Self) -> bool {\n        self.cmp(other) == std::cmp::Ordering::Equal\n    }\n}\nimpl<E: CompactionJobExecutor> Eq for LazyLoadLayer<'_, E> {}\n\ntype LoadFuture<'a, E> = BoxFuture<'a, anyhow::Result<Vec<E>>>;\n\n// Stream returned by `merge_delta_keys`\npin_project! {\n#[allow(clippy::type_complexity)]\npub struct MergeDeltaKeys<'a, E: CompactionJobExecutor> {\n    heap: BinaryHeap<LazyLoadLayer<'a, E>>,\n\n    #[pin]\n    load_future: Option<LoadFuture<'a, <E::DeltaLayer as CompactionDeltaLayer<E>>::DeltaEntry<'a>>>,\n\n    ctx: &'a E::RequestContext,\n}\n}\n\nimpl<'a, E> Stream for MergeDeltaKeys<'a, E>\nwhere\n    E: CompactionJobExecutor + 'a,\n{\n    type Item = anyhow::Result<<E::DeltaLayer as CompactionDeltaLayer<E>>::DeltaEntry<'a>>;\n\n    fn poll_next(\n        self: Pin<&mut Self>,\n        cx: &mut std::task::Context<'_>,\n    ) -> Poll<std::option::Option<<Self as futures::Stream>::Item>> {\n        let mut this = self.project();\n        loop {\n            if let Some(mut load_future) = this.load_future.as_mut().as_pin_mut() {\n                // We are waiting for loading the keys to finish\n                match ready!(load_future.as_mut().poll(cx)) {\n                    Ok(entries) => {\n                        this.load_future.set(None);\n                        *this.heap.peek_mut().unwrap() =\n                            LazyLoadLayer::Loaded(VecDeque::from(entries));\n                    }\n                    Err(e) => {\n                        return Poll::Ready(Some(Err(e)));\n                    }\n                }\n            }\n\n            // If the topmost layer in the heap hasn't been loaded yet, start\n            // loading it. Otherwise return the next entry from it and update\n            // the layer's position in the heap (this decreaseKey operation is\n            // performed implicitly when `top` is dropped).\n            if let Some(mut top) = this.heap.peek_mut() {\n                match top.deref_mut() {\n                    LazyLoadLayer::Unloaded(l) => {\n                        let fut = l.load_keys(this.ctx);\n                        this.load_future.set(Some(Box::pin(fut)));\n                        continue;\n                    }\n                    LazyLoadLayer::Loaded(entries) => {\n                        let result = entries.pop_front().unwrap();\n                        if entries.is_empty() {\n                            std::collections::binary_heap::PeekMut::pop(top);\n                        }\n                        return Poll::Ready(Some(Ok(result)));\n                    }\n                }\n            } else {\n                return Poll::Ready(None);\n            }\n        }\n    }\n}\n\n// Accumulate values at key boundaries\npub struct KeySize<K> {\n    pub key: K,\n    pub num_values: u64,\n    pub size: u64,\n    /// The lsns to partition at (if empty then no per-lsn partitioning)\n    pub partition_lsns: Vec<(Lsn, u64)>,\n}\n\npub fn accum_key_values<'a, I, K, D, E>(\n    input: I,\n    target_size: u64,\n) -> impl Stream<Item = Result<KeySize<K>, E>>\nwhere\n    K: Eq + PartialOrd + Display + Copy,\n    I: Stream<Item = Result<D, E>>,\n    D: CompactionDeltaEntry<'a, K>,\n{\n    async_stream::try_stream! {\n        // Initialize the state from the first value\n        let mut input = std::pin::pin!(input);\n\n        if let Some(first) = input.next().await {\n            let first = first?;\n            let mut part_size = first.size();\n            let mut accum: KeySize<K> = KeySize {\n                key: first.key(),\n                num_values: 1,\n                size: part_size,\n                partition_lsns: Vec::new(),\n            };\n            let mut last_key = accum.key;\n            while let Some(this) = input.next().await {\n                let this = this?;\n                if this.key() == accum.key {\n                    let add_size = this.size();\n                    if part_size + add_size > target_size {\n                        accum.partition_lsns.push((this.lsn(), part_size));\n                        part_size = 0;\n                    }\n                    part_size += add_size;\n                    accum.size += add_size;\n                    accum.num_values += 1;\n                } else {\n                    assert!(last_key <= accum.key, \"last_key={last_key} <= accum.key={}\", accum.key);\n                    last_key = accum.key;\n                    yield accum;\n                    part_size = this.size();\n                    accum = KeySize {\n                        key: this.key(),\n                        num_values: 1,\n                        size: part_size,\n                        partition_lsns: Vec::new(),\n                    };\n                }\n            }\n            assert!(last_key <= accum.key, \"last_key={last_key} <= accum.key={}\", accum.key);\n            yield accum;\n        }\n    }\n}\n"
  },
  {
    "path": "pageserver/compaction/src/identify_levels.rs",
    "content": "//! An LSM tree consists of multiple levels, each exponentially larger than the\n//! previous level. And each level consists of multiple \"tiers\". With tiered\n//! compaction, a level is compacted when it has accumulated more than N tiers,\n//! forming one tier on the next level.\n//!\n//! In the pageserver, we don't explicitly track the levels and tiers. Instead,\n//! we identify them by looking at the shapes of the layers. It's an easy task\n//! for a human, but it's not straightforward to come up with the exact\n//! rules. Especially if there are cases like interrupted, half-finished\n//! compactions, or highly skewed data distributions that have let us \"skip\"\n//! some levels. It's not critical to classify all cases correctly; at worst we\n//! delay some compaction work, and suffer from more read amplification, or we\n//! perform some unnecessary compaction work.\n//!\n//! `identify_level` performs that shape-matching.\n//!\n//! It returns a Level struct, which has `depth()` function to count the number\n//! of \"tiers\" in the level. The tier count is the max depth of stacked layers\n//! within the level. That's a good measure, because the point of compacting is\n//! to reduce read amplification, and the depth is what determines that.\n//!\n//! One interesting effect of this is that if we generate very small delta\n//! layers at L0, e.g. because the L0 layers are flushed by timeout rather than\n//! because they reach the target size, the L0 compaction will combine them to\n//! one larger file. But if the combined file is still smaller than the target\n//! file size, the file will still be considered to be part of L0 at the next\n//! iteration.\n\nuse std::collections::BTreeSet;\nuse std::ops::Range;\n\nuse anyhow::bail;\nuse tracing::{info, trace};\nuse utils::lsn::Lsn;\n\nuse crate::interface::*;\n\npub struct Level<L> {\n    pub lsn_range: Range<Lsn>,\n    pub layers: Vec<L>,\n}\n\n/// Identify an LSN > `end_lsn` that partitions the LSN space, so that there are\n/// no layers that cross the boundary LSN.\n///\n/// A further restriction is that all layers in the returned partition cover at\n/// most 'lsn_max_size' LSN bytes.\npub async fn identify_level<K, L>(\n    all_layers: Vec<L>,\n    end_lsn: Lsn,\n    lsn_max_size: u64,\n) -> anyhow::Result<Option<Level<L>>>\nwhere\n    K: CompactionKey,\n    L: CompactionLayer<K> + Clone,\n{\n    // filter out layers that are above the `end_lsn`, they are completely irrelevant.\n    let mut layers = Vec::new();\n    for l in all_layers {\n        if l.lsn_range().start < end_lsn && l.lsn_range().end > end_lsn {\n            // shouldn't happen. Indicates that the caller passed a bogus\n            // end_lsn.\n            bail!(\n                \"identify_level() called with end_lsn that does not partition the LSN space: end_lsn {} intersects with layer {}\",\n                end_lsn,\n                l.short_id()\n            );\n        }\n        // include image layers sitting exacty at `end_lsn`.\n        let is_image = !l.is_delta();\n        if (is_image && l.lsn_range().start > end_lsn)\n            || (!is_image && l.lsn_range().start >= end_lsn)\n        {\n            continue;\n        }\n        layers.push(l);\n    }\n    // All the remaining layers either belong to this level, or are below it.\n    info!(\n        \"identify level at {}, size {}, num layers below: {}\",\n        end_lsn,\n        lsn_max_size,\n        layers.len()\n    );\n    if layers.is_empty() {\n        return Ok(None);\n    }\n\n    // Walk the ranges in LSN order.\n    //\n    // ----- end_lsn\n    //  |\n    //  |\n    //  v\n    //\n    layers.sort_by_key(|l| l.lsn_range().end);\n    let mut candidate_start_lsn = end_lsn;\n    let mut candidate_layers: Vec<L> = Vec::new();\n    let mut current_best_start_lsn = end_lsn;\n    let mut current_best_layers: Vec<L> = Vec::new();\n    let mut iter = layers.into_iter();\n    loop {\n        let Some(l) = iter.next_back() else {\n            // Reached end. Accept the last candidate\n            current_best_start_lsn = candidate_start_lsn;\n            current_best_layers.extend_from_slice(&std::mem::take(&mut candidate_layers));\n            break;\n        };\n        trace!(\n            \"inspecting {} for candidate {}, current best {}\",\n            l.short_id(),\n            candidate_start_lsn,\n            current_best_start_lsn\n        );\n\n        let r = l.lsn_range();\n\n        // Image layers don't restrict our choice of cutoff LSN\n        if l.is_delta() {\n            // Is this candidate workable? In other words, are there any\n            // delta layers that span across this LSN\n            //\n            // Valid:                 Not valid:\n            //  +                     +\n            //  |                     | +\n            //  +  <- candidate       + |   <- candidate\n            //     +                    +\n            //     |\n            //     +\n            if r.end <= candidate_start_lsn {\n                // Hooray, there are no crossing LSNs. And we have visited\n                // through all the layers within candidate..end_lsn. The\n                // current candidate can be accepted.\n                current_best_start_lsn = r.end;\n                current_best_layers.extend_from_slice(&std::mem::take(&mut candidate_layers));\n                candidate_start_lsn = r.start;\n            }\n\n            // Is it small enough to be considered part of this level?\n            if r.end.0 - r.start.0 > lsn_max_size {\n                // Too large, this layer belongs to next level. Stop.\n                trace!(\n                    \"too large {}, size {} vs {}\",\n                    l.short_id(),\n                    r.end.0 - r.start.0,\n                    lsn_max_size\n                );\n                break;\n            }\n\n            // If this crosses the candidate lsn, push it down.\n            if r.start < candidate_start_lsn {\n                trace!(\n                    \"layer {} prevents from stopping at {}\",\n                    l.short_id(),\n                    candidate_start_lsn\n                );\n                candidate_start_lsn = r.start;\n            }\n        }\n\n        // Include this layer in our candidate\n        candidate_layers.push(l);\n    }\n\n    Ok(if current_best_start_lsn == end_lsn {\n        // empty level\n        None\n    } else {\n        Some(Level {\n            lsn_range: current_best_start_lsn..end_lsn,\n            layers: current_best_layers,\n        })\n    })\n}\n\nimpl<L> Level<L> {\n    /// Count the number of deltas stacked on each other.\n    pub fn depth<K>(&self) -> u64\n    where\n        K: CompactionKey,\n        L: CompactionLayer<K>,\n    {\n        struct Event<K> {\n            key: K,\n            layer_idx: usize,\n            start: bool,\n        }\n        let mut events: Vec<Event<K>> = Vec::new();\n        for (idx, l) in self.layers.iter().enumerate() {\n            let key_range = l.key_range();\n            if key_range.end == key_range.start.next() && l.is_delta() {\n                // Ignore single-key delta layers as they can be stacked on top of each other\n                // as that is the only way to cut further.\n                continue;\n            }\n            events.push(Event {\n                key: l.key_range().start,\n                layer_idx: idx,\n                start: true,\n            });\n            events.push(Event {\n                key: l.key_range().end,\n                layer_idx: idx,\n                start: false,\n            });\n        }\n        events.sort_by_key(|e| (e.key, e.start));\n\n        // Sweep the key space left to right. Stop at each distinct key, and\n        // count the number of deltas on top of the highest image at that key.\n        //\n        // This is a little inefficient, as we walk through the active_set on\n        // every key. We could increment/decrement a counter on each step\n        // instead, but that'd require a bit more complex bookkeeping.\n        let mut active_set: BTreeSet<(Lsn, bool, usize)> = BTreeSet::new();\n        let mut max_depth = 0;\n        let mut events_iter = events.iter().peekable();\n        while let Some(e) = events_iter.next() {\n            let l = &self.layers[e.layer_idx];\n            let is_image = !l.is_delta();\n\n            // update the active set\n            if e.start {\n                active_set.insert((l.lsn_range().end, is_image, e.layer_idx));\n            } else {\n                active_set.remove(&(l.lsn_range().end, is_image, e.layer_idx));\n            }\n\n            // recalculate depth if this was the last event at this point\n            let more_events_at_this_key =\n                events_iter.peek().is_some_and(|next_e| next_e.key == e.key);\n            if !more_events_at_this_key {\n                let mut active_depth = 0;\n                for (_end_lsn, is_image, _idx) in active_set.iter().rev() {\n                    if *is_image {\n                        break;\n                    }\n                    active_depth += 1;\n                }\n                if active_depth > max_depth {\n                    max_depth = active_depth;\n                }\n            }\n        }\n        debug_assert_eq!(active_set, BTreeSet::new());\n        max_depth\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use std::sync::{Arc, Mutex};\n\n    use super::*;\n    use crate::simulator::{Key, MockDeltaLayer, MockImageLayer, MockLayer};\n\n    fn delta(key_range: Range<Key>, lsn_range: Range<Lsn>) -> MockLayer {\n        MockLayer::Delta(Arc::new(MockDeltaLayer {\n            key_range,\n            lsn_range,\n            // identify_level() doesn't pay attention to the rest of the fields\n            file_size: 0,\n            deleted: Mutex::new(false),\n            records: vec![],\n        }))\n    }\n\n    fn image(key_range: Range<Key>, lsn: Lsn) -> MockLayer {\n        MockLayer::Image(Arc::new(MockImageLayer {\n            key_range,\n            lsn_range: lsn..(lsn + 1),\n            // identify_level() doesn't pay attention to the rest of the fields\n            file_size: 0,\n            deleted: Mutex::new(false),\n        }))\n    }\n\n    #[tokio::test]\n    async fn test_identify_level() -> anyhow::Result<()> {\n        let layers = vec![\n            delta(Key::MIN..Key::MAX, Lsn(0x8000)..Lsn(0x9000)),\n            delta(Key::MIN..Key::MAX, Lsn(0x5000)..Lsn(0x7000)),\n            delta(Key::MIN..Key::MAX, Lsn(0x4000)..Lsn(0x5000)),\n            delta(Key::MIN..Key::MAX, Lsn(0x3000)..Lsn(0x4000)),\n            delta(Key::MIN..Key::MAX, Lsn(0x2000)..Lsn(0x3000)),\n            delta(Key::MIN..Key::MAX, Lsn(0x1000)..Lsn(0x2000)),\n        ];\n\n        // All layers fit in the max file size\n        let level = identify_level(layers.clone(), Lsn(0x10000), 0x2000)\n            .await?\n            .unwrap();\n        assert_eq!(level.depth(), 6);\n\n        // Same LSN with smaller max file size. The second layer from the top is larger\n        // and belongs to next level.\n        let level = identify_level(layers.clone(), Lsn(0x10000), 0x1000)\n            .await?\n            .unwrap();\n        assert_eq!(level.depth(), 1);\n\n        // Call with a smaller LSN\n        let level = identify_level(layers.clone(), Lsn(0x3000), 0x1000)\n            .await?\n            .unwrap();\n        assert_eq!(level.depth(), 2);\n\n        // Call with an LSN that doesn't partition the space\n        let result = identify_level(layers, Lsn(0x6000), 0x1000).await;\n        assert!(result.is_err());\n        Ok(())\n    }\n\n    #[tokio::test]\n    async fn test_overlapping_lsn_ranges() -> anyhow::Result<()> {\n        // The files LSN ranges overlap, so even though there are more files that\n        // fit under the file size, they are not included in the level because they\n        // overlap so that we'd need to include the oldest file, too, which is\n        // larger\n        let layers = vec![\n            delta(Key::MIN..Key::MAX, Lsn(0x4000)..Lsn(0x5000)),\n            delta(Key::MIN..Key::MAX, Lsn(0x3000)..Lsn(0x4000)), // overlap\n            delta(Key::MIN..Key::MAX, Lsn(0x2500)..Lsn(0x3500)), // overlap\n            delta(Key::MIN..Key::MAX, Lsn(0x2000)..Lsn(0x3000)), // overlap\n            delta(Key::MIN..Key::MAX, Lsn(0x1000)..Lsn(0x2500)), // larger\n        ];\n\n        let level = identify_level(layers.clone(), Lsn(0x10000), 0x1000)\n            .await?\n            .unwrap();\n        assert_eq!(level.depth(), 1);\n\n        Ok(())\n    }\n\n    #[tokio::test]\n    async fn test_depth_nonoverlapping() -> anyhow::Result<()> {\n        // The key ranges don't overlap, so depth is only 1.\n        let layers = vec![\n            delta(4000..5000, Lsn(0x6000)..Lsn(0x7000)),\n            delta(3000..4000, Lsn(0x7000)..Lsn(0x8000)),\n            delta(1000..2000, Lsn(0x8000)..Lsn(0x9000)),\n        ];\n\n        let level = identify_level(layers.clone(), Lsn(0x10000), 0x2000)\n            .await?\n            .unwrap();\n        assert_eq!(level.layers.len(), 3);\n        assert_eq!(level.depth(), 1);\n\n        // Staggered. The 1st and 3rd layer don't overlap with each other.\n        let layers = vec![\n            delta(1000..2000, Lsn(0x8000)..Lsn(0x9000)),\n            delta(1500..2500, Lsn(0x7000)..Lsn(0x8000)),\n            delta(2000..3000, Lsn(0x6000)..Lsn(0x7000)),\n        ];\n\n        let level = identify_level(layers.clone(), Lsn(0x10000), 0x2000)\n            .await?\n            .unwrap();\n        assert_eq!(level.layers.len(), 3);\n        assert_eq!(level.depth(), 2);\n        Ok(())\n    }\n\n    #[tokio::test]\n    async fn test_depth_images() -> anyhow::Result<()> {\n        let layers: Vec<MockLayer> = vec![\n            delta(1000..2000, Lsn(0x8000)..Lsn(0x9000)),\n            delta(1500..2500, Lsn(0x7000)..Lsn(0x8000)),\n            delta(2000..3000, Lsn(0x6000)..Lsn(0x7000)),\n            // This covers the same key range as the 2nd delta layer. The depth\n            // in that key range is therefore 0.\n            image(1500..2500, Lsn(0x9000)),\n        ];\n\n        let level = identify_level(layers.clone(), Lsn(0x10000), 0x2000)\n            .await?\n            .unwrap();\n        assert_eq!(level.layers.len(), 4);\n        assert_eq!(level.depth(), 1);\n        Ok(())\n    }\n}\n"
  },
  {
    "path": "pageserver/compaction/src/interface.rs",
    "content": "//! This is what the compaction implementation needs to know about\n//! layers, keyspace etc.\n//!\n//! All the heavy lifting is done by the create_image and create_delta\n//! functions that the implementor provides.\nuse std::ops::Range;\n\nuse futures::Future;\nuse pageserver_api::key::Key;\nuse pageserver_api::keyspace::ShardedRange;\nuse pageserver_api::shard::ShardIdentity;\nuse utils::lsn::Lsn;\n\n/// Public interface. This is the main thing that the implementor needs to provide\npub trait CompactionJobExecutor {\n    // Type system.\n    //\n    // We assume that there are two kinds of layers, deltas and images. The\n    // compaction doesn't distinguish whether they are stored locally or\n    // remotely.\n    //\n    // The keyspace is defined by the CompactionKey trait.\n    type Key: CompactionKey;\n\n    type Layer: CompactionLayer<Self::Key> + Clone;\n    type DeltaLayer: CompactionDeltaLayer<Self> + Clone;\n    type ImageLayer: CompactionImageLayer<Self> + Clone;\n\n    // This is passed through to all the interface functions. The compaction\n    // implementation doesn't do anything with it, but it might be useful for\n    // the interface implementation.\n    type RequestContext: CompactionRequestContext;\n\n    // ----\n    // Functions that the planner uses to support its decisions\n    // ----\n\n    fn get_shard_identity(&self) -> &ShardIdentity;\n\n    /// Return all layers that overlap the given bounding box.\n    fn get_layers(\n        &mut self,\n        key_range: &Range<Self::Key>,\n        lsn_range: &Range<Lsn>,\n        ctx: &Self::RequestContext,\n    ) -> impl Future<Output = anyhow::Result<Vec<Self::Layer>>> + Send;\n\n    fn get_keyspace(\n        &mut self,\n        key_range: &Range<Self::Key>,\n        lsn: Lsn,\n        ctx: &Self::RequestContext,\n    ) -> impl Future<Output = anyhow::Result<CompactionKeySpace<Self::Key>>> + Send;\n\n    /// NB: This is a pretty expensive operation. In the real pageserver\n    /// implementation, it downloads the layer, and keeps it resident\n    /// until the DeltaLayer is dropped.\n    fn downcast_delta_layer(\n        &self,\n        layer: &Self::Layer,\n        ctx: &Self::RequestContext,\n    ) -> impl Future<Output = anyhow::Result<Option<Self::DeltaLayer>>> + Send;\n\n    // ----\n    // Functions to execute the plan\n    // ----\n\n    /// Create a new image layer, materializing all the values in the key range,\n    /// at given 'lsn'.\n    fn create_image(\n        &mut self,\n        lsn: Lsn,\n        key_range: &Range<Self::Key>,\n        ctx: &Self::RequestContext,\n    ) -> impl Future<Output = anyhow::Result<()>> + Send;\n\n    /// Create a new delta layer, containing all the values from 'input_layers'\n    /// in the given key and LSN range.\n    fn create_delta(\n        &mut self,\n        lsn_range: &Range<Lsn>,\n        key_range: &Range<Self::Key>,\n        input_layers: &[Self::DeltaLayer],\n        ctx: &Self::RequestContext,\n    ) -> impl Future<Output = anyhow::Result<()>> + Send;\n\n    /// Delete a layer. The compaction implementation will call this only after\n    /// all the create_image() or create_delta() calls that deletion of this\n    /// layer depends on have finished. But if the implementor has extra lazy\n    /// background tasks, like uploading the index json file to remote storage.\n    /// it is the implementation's responsibility to track those.\n    fn delete_layer(\n        &mut self,\n        layer: &Self::Layer,\n        ctx: &Self::RequestContext,\n    ) -> impl Future<Output = anyhow::Result<()>> + Send;\n}\n\npub trait CompactionKey: std::cmp::Ord + Clone + Copy + std::fmt::Display {\n    const MIN: Self;\n    const MAX: Self;\n\n    /// Calculate distance between key_range.start and key_range.end.\n    ///\n    /// This returns u32, for compatibility with Repository::key. If the\n    /// distance is larger, return u32::MAX.\n    fn key_range_size(key_range: &Range<Self>, shard_identity: &ShardIdentity) -> u32;\n\n    // return \"self + 1\"\n    fn next(&self) -> Self;\n\n    // return \"self + <some decent amount to skip>\". The amount to skip\n    // is left to the implementation.\n    // FIXME: why not just \"add(u32)\" ?  This is hard to use\n    fn skip_some(&self) -> Self;\n}\n\nimpl CompactionKey for Key {\n    const MIN: Self = Self::MIN;\n    const MAX: Self = Self::MAX;\n\n    fn key_range_size(r: &std::ops::Range<Self>, shard_identity: &ShardIdentity) -> u32 {\n        ShardedRange::new(r.clone(), shard_identity).page_count()\n    }\n    fn next(&self) -> Key {\n        (self as &Key).next()\n    }\n    fn skip_some(&self) -> Key {\n        self.add(128)\n    }\n}\n\n/// Contiguous ranges of keys that belong to the key space. In key order, and\n/// with no overlap.\npub type CompactionKeySpace<K> = Vec<Range<K>>;\n\n/// Functions needed from all layers.\npub trait CompactionLayer<K: CompactionKey> {\n    fn key_range(&self) -> &Range<K>;\n    fn lsn_range(&self) -> &Range<Lsn>;\n\n    fn file_size(&self) -> u64;\n\n    /// For debugging, short human-readable representation of the layer. E.g. filename.\n    fn short_id(&self) -> String;\n\n    fn is_delta(&self) -> bool;\n}\npub trait CompactionDeltaLayer<E: CompactionJobExecutor + ?Sized>: CompactionLayer<E::Key> {\n    type DeltaEntry<'a>: CompactionDeltaEntry<'a, E::Key>\n    where\n        Self: 'a;\n\n    /// Return all keys in this delta layer.\n    fn load_keys(\n        &self,\n        ctx: &E::RequestContext,\n    ) -> impl Future<Output = anyhow::Result<Vec<Self::DeltaEntry<'_>>>> + Send;\n}\n\npub trait CompactionImageLayer<E: CompactionJobExecutor + ?Sized>: CompactionLayer<E::Key> {}\n\npub trait CompactionDeltaEntry<'a, K> {\n    fn key(&self) -> K;\n    fn lsn(&self) -> Lsn;\n    fn size(&self) -> u64;\n}\n\npub trait CompactionRequestContext {}\n"
  },
  {
    "path": "pageserver/compaction/src/lib.rs",
    "content": "// The main module implementing the compaction algorithm\npub mod compact_tiered;\npub(crate) mod identify_levels;\n\n// Traits that the caller of the compaction needs to implement\npub mod interface;\n\n// Utility functions, useful for the implementation\npub mod helpers;\n\n// A simulator with mock implementations of 'interface'\npub mod simulator;\n"
  },
  {
    "path": "pageserver/compaction/src/simulator/draw.rs",
    "content": "use std::cmp::Ordering;\nuse std::collections::{BTreeMap, BTreeSet, HashSet};\nuse std::fmt::Write;\nuse std::ops::Range;\n\nuse anyhow::Result;\nuse svg_fmt::{BeginSvg, EndSvg, Fill, Stroke, Style, rgb};\nuse utils::lsn::Lsn;\n\nuse super::Key;\n\n// Map values to their compressed coordinate - the index the value\n// would have in a sorted and deduplicated list of all values.\nstruct CoordinateMap<T: Ord + Copy> {\n    map: BTreeMap<T, usize>,\n    stretch: f32,\n}\n\nimpl<T: Ord + Copy> CoordinateMap<T> {\n    fn new(coords: Vec<T>, stretch: f32) -> Self {\n        let set: BTreeSet<T> = coords.into_iter().collect();\n\n        let mut map: BTreeMap<T, usize> = BTreeMap::new();\n        for (i, e) in set.iter().enumerate() {\n            map.insert(*e, i);\n        }\n\n        Self { map, stretch }\n    }\n\n    // This assumes that the map contains an exact point for this.\n    // Use map_inexact for values inbetween\n    fn map(&self, val: T) -> f32 {\n        *self.map.get(&val).unwrap() as f32 * self.stretch\n    }\n\n    // the value is still assumed to be within the min/max bounds\n    // (this is currently unused)\n    fn _map_inexact(&self, val: T) -> f32 {\n        let prev = *self.map.range(..=val).next().unwrap().1;\n        let next = *self.map.range(val..).next().unwrap().1;\n\n        // interpolate\n        (prev as f32 + (next - prev) as f32) * self.stretch\n    }\n\n    fn max(&self) -> f32 {\n        self.map.len() as f32 * self.stretch\n    }\n}\n\n#[derive(PartialEq, Hash, Eq)]\npub enum LayerTraceOp {\n    Flush,\n    CreateDelta,\n    CreateImage,\n    Delete,\n}\n\nimpl std::fmt::Display for LayerTraceOp {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {\n        let op_str = match self {\n            LayerTraceOp::Flush => \"flush\",\n            LayerTraceOp::CreateDelta => \"create_delta\",\n            LayerTraceOp::CreateImage => \"create_image\",\n            LayerTraceOp::Delete => \"delete\",\n        };\n        f.write_str(op_str)\n    }\n}\n\n#[derive(PartialEq, Hash, Eq, Clone)]\npub struct LayerTraceFile {\n    pub filename: String,\n    pub key_range: Range<Key>,\n    pub lsn_range: Range<Lsn>,\n}\n\nimpl LayerTraceFile {\n    fn is_image(&self) -> bool {\n        self.lsn_range.end == self.lsn_range.start\n    }\n}\n\npub struct LayerTraceEvent {\n    pub time_rel: u64,\n    pub op: LayerTraceOp,\n    pub file: LayerTraceFile,\n}\n\npub fn draw_history<W: std::io::Write>(history: &[LayerTraceEvent], mut output: W) -> Result<()> {\n    let mut files: Vec<LayerTraceFile> = Vec::new();\n\n    for event in history {\n        files.push(event.file.clone());\n    }\n    let last_time_rel = history.last().unwrap().time_rel;\n\n    // Collect all coordinates\n    let mut keys: Vec<Key> = vec![];\n    let mut lsns: Vec<Lsn> = vec![];\n    for f in files.iter() {\n        keys.push(f.key_range.start);\n        keys.push(f.key_range.end);\n        lsns.push(f.lsn_range.start);\n        lsns.push(f.lsn_range.end);\n    }\n\n    // Analyze\n    let key_map = CoordinateMap::new(keys, 2.0);\n    // Stretch out vertically for better visibility\n    let lsn_map = CoordinateMap::new(lsns, 3.0);\n\n    let mut svg = String::new();\n\n    // Draw\n    writeln!(\n        svg,\n        \"{}\",\n        BeginSvg {\n            w: key_map.max(),\n            h: lsn_map.max(),\n        }\n    )?;\n    let lsn_max = lsn_map.max();\n\n    // Sort the files by LSN, but so that image layers go after all delta layers\n    // The SVG is painted in the order the elements appear, and we want to draw\n    // image layers on top of the delta layers if they overlap\n    //\n    // (This could also be implemented via z coordinates: image layers get one z\n    // coord, delta layers get another z coord.)\n    let mut files_sorted: Vec<LayerTraceFile> = files.into_iter().collect();\n    files_sorted.sort_by(|a, b| {\n        if a.is_image() && !b.is_image() {\n            Ordering::Greater\n        } else if !a.is_image() && b.is_image() {\n            Ordering::Less\n        } else {\n            a.lsn_range.end.cmp(&b.lsn_range.end)\n        }\n    });\n\n    writeln!(svg, \"<!-- layers -->\")?;\n    let mut files_seen = HashSet::new();\n    for f in files_sorted {\n        if files_seen.contains(&f) {\n            continue;\n        }\n        let key_start = key_map.map(f.key_range.start);\n        let key_end = key_map.map(f.key_range.end);\n        let key_diff = key_end - key_start;\n\n        if key_start >= key_end {\n            panic!(\"Invalid key range {key_start}-{key_end}\");\n        }\n\n        let lsn_start = lsn_map.map(f.lsn_range.start);\n        let lsn_end = lsn_map.map(f.lsn_range.end);\n\n        // Fill in and thicken rectangle if it's an\n        // image layer so that we can see it.\n        let mut style = Style {\n            fill: Fill::Color(rgb(0x80, 0x80, 0x80)),\n            stroke: Stroke::Color(rgb(0, 0, 0), 0.5),\n            opacity: 1.0,\n            stroke_opacity: 1.0,\n        };\n\n        let y_start = lsn_max - lsn_start;\n        let y_end = lsn_max - lsn_end;\n\n        let x_margin = 0.25;\n        let y_margin = 0.5;\n\n        match f.lsn_range.start.cmp(&f.lsn_range.end) {\n            Ordering::Less => {\n                write!(\n                    svg,\n                    r#\"    <rect id=\"layer_{}\" x=\"{}\" y=\"{}\" width=\"{}\" height=\"{}\" ry=\"{}\" style=\"{}\">\"#,\n                    f.filename,\n                    key_start + x_margin,\n                    y_end + y_margin,\n                    key_diff - x_margin * 2.0,\n                    y_start - y_end - y_margin * 2.0,\n                    1.0, // border_radius,\n                    style,\n                )?;\n                write!(svg, \"<title>{}</title>\", f.filename)?;\n                writeln!(svg, \"</rect>\")?;\n            }\n            Ordering::Equal => {\n                //lsn_diff = 0.3;\n                //lsn_offset = -lsn_diff / 2.0;\n                //margin = 0.05;\n                style.fill = Fill::Color(rgb(0x80, 0, 0x80));\n                style.stroke = Stroke::Color(rgb(0x80, 0, 0x80), 3.0);\n                write!(\n                    svg,\n                    r#\"    <line id=\"layer_{}\" x1=\"{}\" y1=\"{}\" x2=\"{}\" y2=\"{}\" style=\"{}\">\"#,\n                    f.filename,\n                    key_start + x_margin,\n                    y_end,\n                    key_end - x_margin,\n                    y_end,\n                    style,\n                )?;\n                write!(\n                    svg,\n                    \"<title>{}<br>{} - {}</title>\",\n                    f.filename, lsn_end, y_end\n                )?;\n                writeln!(svg, \"</line>\")?;\n            }\n            Ordering::Greater => panic!(\"Invalid lsn range {lsn_start}-{lsn_end}\"),\n        }\n        files_seen.insert(f);\n    }\n\n    writeln!(svg, \"{EndSvg}\")?;\n\n    let mut layer_events_str = String::new();\n    let mut first = true;\n    for e in history {\n        if !first {\n            writeln!(layer_events_str, \",\")?;\n        }\n        write!(\n            layer_events_str,\n            r#\"  {{\"time_rel\": {}, \"filename\": \"{}\", \"op\": \"{}\"}}\"#,\n            e.time_rel, e.file.filename, e.op\n        )?;\n        first = false;\n    }\n    writeln!(layer_events_str)?;\n\n    writeln!(\n        output,\n        r#\"<!DOCTYPE html>\n<html>\n<head>\n<style>\n/* Keep the slider pinned at top */\n.topbar {{\n  display: block;\n  overflow: hidden;\n  background-color: lightgrey;\n  position: fixed;\n  top: 0;\n  width: 100%;\n/*  width: 500px; */\n}}\n.slidercontainer {{\n  float: left;\n  width: 50%;\n  margin-right: 200px;\n}}\n.slider {{\n  float: left;\n  width: 100%;\n}}\n.legend {{\n  width: 200px;\n  float: right;\n}}\n\n/* Main content */\n.main {{\n  margin-top: 50px; /* Add a top margin to avoid content overlay */\n}}\n</style>\n</head>\n\n  <body onload=\"init()\">\n    <script type=\"text/javascript\">\n\n      var layer_events = [{layer_events_str}]\n\n      let ticker;\n\n      function init() {{\n          for (let i = 0; i < layer_events.length; i++) {{\n              var layer = document.getElementById(\"layer_\" + layer_events[i].filename);\n              layer.style.visibility = \"hidden\";\n          }}\n          last_layer_event = -1;\n          moveSlider(last_slider_pos)\n      }}\n\n      function startAnimation() {{\n          ticker = setInterval(animateStep, 100);\n      }}\n      function stopAnimation() {{\n          clearInterval(ticker);\n      }}\n\n      function animateStep() {{\n          if (last_layer_event < layer_events.length - 1) {{\n              var slider = document.getElementById(\"time-slider\");\n              let prevPos = slider.value\n              let nextEvent = last_layer_event + 1\n              while (nextEvent <= layer_events.length - 1) {{\n                  if (layer_events[nextEvent].time_rel > prevPos) {{\n                      break;\n                  }}\n                  nextEvent += 1;\n              }}\n              let nextPos = layer_events[nextEvent].time_rel\n              slider.value = nextPos\n              moveSlider(nextPos)\n          }}\n      }}\n\n      function redoLayerEvent(n, dir) {{\n          var layer = document.getElementById(\"layer_\" + layer_events[n].filename);\n          switch (layer_events[n].op) {{\n              case \"flush\":\n                  layer.style.visibility = \"visible\";\n                  break;\n              case \"create_delta\":\n                  layer.style.visibility = \"visible\";\n                  break;\n              case \"create_image\":\n                  layer.style.visibility = \"visible\";\n                  break;\n              case \"delete\":\n                  layer.style.visibility = \"hidden\";\n                  break;\n          }}\n      }}\n      function undoLayerEvent(n) {{\n          var layer = document.getElementById(\"layer_\" + layer_events[n].filename);\n          switch (layer_events[n].op) {{\n              case \"flush\":\n                  layer.style.visibility = \"hidden\";\n                  break;\n              case \"create_delta\":\n                  layer.style.visibility = \"hidden\";\n                  break;\n              case \"create_image\":\n                  layer.style.visibility = \"hidden\";\n                  break;\n              case \"delete\":\n                  layer.style.visibility = \"visible\";\n                  break;\n          }}\n      }}\n\n      var last_slider_pos = 0\n      var last_layer_event = 0\n\n      var moveSlider = function(new_pos) {{\n          if (new_pos > last_slider_pos) {{\n              while (last_layer_event < layer_events.length - 1) {{\n                  if (layer_events[last_layer_event + 1].time_rel > new_pos) {{\n                      break;\n                  }}\n                  last_layer_event += 1;\n                  redoLayerEvent(last_layer_event)\n              }}\n          }}\n          if (new_pos < last_slider_pos) {{\n              while (last_layer_event >= 0) {{\n                  if (layer_events[last_layer_event].time_rel <= new_pos) {{\n                      break;\n                  }}\n                  undoLayerEvent(last_layer_event)\n                  last_layer_event -= 1;\n              }}\n          }}\n          last_slider_pos = new_pos;\n          document.getElementById(\"debug_pos\").textContent=new_pos;\n          if (last_layer_event >= 0) {{\n              document.getElementById(\"debug_layer_event\").textContent=last_layer_event + \" \" + layer_events[last_layer_event].time_rel + \" \" + layer_events[last_layer_event].op;\n          }} else {{\n              document.getElementById(\"debug_layer_event\").textContent=\"begin\";\n          }}\n      }}\n    </script>\n\n    <div class=\"topbar\">\n      <div class=\"slidercontainer\">\n        <label for=\"time-slider\">TIME</label>:\n        <input id=\"time-slider\" class=\"slider\" type=\"range\" min=\"0\" max=\"{last_time_rel}\" value=\"0\" oninput=\"moveSlider(this.value)\"><br>\n\n        pos: <span id=\"debug_pos\"></span><br>\n        event: <span id=\"debug_layer_event\"></span><br>\n        gc: <span id=\"debug_gc_event\"></span><br>\n      </div>\n\n      <button onclick=\"startAnimation()\">Play</button>\n      <button onclick=\"stopAnimation()\">Stop</button>\n\n      <svg class=\"legend\">\n        <rect x=5 y=0 width=20 height=20 style=\"fill:rgb(128,128,128);stroke:rgb(0,0,0);stroke-width:0.5;fill-opacity:1;stroke-opacity:1;\"/>\n        <line x1=5 y1=30 x2=25 y2=30 style=\"fill:rgb(128,0,128);stroke:rgb(128,0,128);stroke-width:3;fill-opacity:1;stroke-opacity:1;\"/>\n        <line x1=0 y1=40 x2=30 y2=40 style=\"fill:none;stroke:rgb(255,0,0);stroke-width:0.5;fill-opacity:1;stroke-opacity:1;\"/>\n      </svg>\n    </div>\n\n    <div class=\"main\">\n{svg}\n    </div>\n  </body>\n</html>\n\"#\n    )?;\n\n    Ok(())\n}\n"
  },
  {
    "path": "pageserver/compaction/src/simulator.rs",
    "content": "mod draw;\n\nuse std::fmt::Write;\nuse std::ops::Range;\nuse std::sync::{Arc, Mutex};\n\nuse draw::{LayerTraceEvent, LayerTraceFile, LayerTraceOp};\nuse futures::StreamExt;\nuse pageserver_api::shard::ShardIdentity;\nuse rand::Rng;\nuse tracing::info;\nuse utils::lsn::Lsn;\n\nuse crate::helpers::{PAGE_SZ, merge_delta_keys, overlaps_with};\nuse crate::interface;\nuse crate::interface::CompactionLayer;\n\n//\n// Implementation for the CompactionExecutor interface\n//\npub struct MockTimeline {\n    // Parameters for the compaction algorithm\n    pub target_file_size: u64,\n    tiers_per_level: u64,\n\n    num_l0_flushes: u64,\n    last_compact_at_flush: u64,\n    last_flush_lsn: Lsn,\n\n    // In-memory layer\n    records: Vec<MockRecord>,\n    total_len: u64,\n    start_lsn: Lsn,\n    end_lsn: Lsn,\n\n    // Current keyspace at `end_lsn`. This is updated on every ingested record.\n    keyspace: KeySpace,\n\n    // historic keyspaces\n    old_keyspaces: Vec<(Lsn, KeySpace)>,\n\n    // \"on-disk\" layers\n    pub live_layers: Vec<MockLayer>,\n\n    num_deleted_layers: u64,\n\n    // Statistics\n    wal_ingested: u64,\n    bytes_written: u64,\n    bytes_deleted: u64,\n    layers_created: u64,\n    layers_deleted: u64,\n\n    // All the events - creation and deletion of files - are collected\n    // in 'history'. It is used to draw the SVG animation at the end.\n    time: u64,\n    history: Vec<draw::LayerTraceEvent>,\n}\n\ntype KeySpace = interface::CompactionKeySpace<Key>;\n\npub struct MockRequestContext {}\nimpl interface::CompactionRequestContext for MockRequestContext {}\n\npub type Key = u64;\n\nimpl interface::CompactionKey for Key {\n    const MIN: Self = u64::MIN;\n    const MAX: Self = u64::MAX;\n\n    fn key_range_size(key_range: &Range<Self>, _shard_identity: &ShardIdentity) -> u32 {\n        std::cmp::min(key_range.end - key_range.start, u32::MAX as u64) as u32\n    }\n\n    fn next(&self) -> Self {\n        self + 1\n    }\n    fn skip_some(&self) -> Self {\n        // round up to next xx\n        self + 100\n    }\n}\n\n#[derive(Clone)]\npub struct MockRecord {\n    lsn: Lsn,\n    key: Key,\n    len: u64,\n}\n\nimpl interface::CompactionDeltaEntry<'_, Key> for MockRecord {\n    fn key(&self) -> Key {\n        self.key\n    }\n    fn lsn(&self) -> Lsn {\n        self.lsn\n    }\n    fn size(&self) -> u64 {\n        self.len\n    }\n}\n\npub struct MockDeltaLayer {\n    pub key_range: Range<Key>,\n    pub lsn_range: Range<Lsn>,\n\n    pub file_size: u64,\n\n    pub deleted: Mutex<bool>,\n\n    pub records: Vec<MockRecord>,\n}\n\nimpl interface::CompactionLayer<Key> for Arc<MockDeltaLayer> {\n    fn key_range(&self) -> &Range<Key> {\n        &self.key_range\n    }\n    fn lsn_range(&self) -> &Range<Lsn> {\n        &self.lsn_range\n    }\n\n    fn file_size(&self) -> u64 {\n        self.file_size\n    }\n\n    fn short_id(&self) -> String {\n        format!(\n            \"{:016X}-{:016X}__{:08X}-{:08X}\",\n            self.key_range.start, self.key_range.end, self.lsn_range.start.0, self.lsn_range.end.0\n        )\n    }\n\n    fn is_delta(&self) -> bool {\n        true\n    }\n}\n\nimpl interface::CompactionDeltaLayer<MockTimeline> for Arc<MockDeltaLayer> {\n    type DeltaEntry<'a> = MockRecord;\n\n    async fn load_keys(&self, _ctx: &MockRequestContext) -> anyhow::Result<Vec<MockRecord>> {\n        Ok(self.records.clone())\n    }\n}\n\npub struct MockImageLayer {\n    pub key_range: Range<Key>,\n    pub lsn_range: Range<Lsn>,\n\n    pub file_size: u64,\n\n    pub deleted: Mutex<bool>,\n}\n\nimpl interface::CompactionImageLayer<MockTimeline> for Arc<MockImageLayer> {}\n\nimpl interface::CompactionLayer<Key> for Arc<MockImageLayer> {\n    fn key_range(&self) -> &Range<Key> {\n        &self.key_range\n    }\n    fn lsn_range(&self) -> &Range<Lsn> {\n        &self.lsn_range\n    }\n\n    fn file_size(&self) -> u64 {\n        self.file_size\n    }\n\n    fn short_id(&self) -> String {\n        format!(\n            \"{:016X}-{:016X}__{:08X}\",\n            self.key_range.start, self.key_range.end, self.lsn_range.start.0,\n        )\n    }\n\n    fn is_delta(&self) -> bool {\n        false\n    }\n}\n\nimpl MockTimeline {\n    pub fn new() -> Self {\n        MockTimeline {\n            target_file_size: 256 * 1024 * 1024,\n            tiers_per_level: 4,\n\n            num_l0_flushes: 0,\n            last_compact_at_flush: 0,\n            last_flush_lsn: Lsn(0),\n\n            records: Vec::new(),\n            total_len: 0,\n            start_lsn: Lsn(1000),\n            end_lsn: Lsn(1000),\n            keyspace: KeySpace::new(),\n\n            old_keyspaces: vec![],\n\n            live_layers: vec![],\n\n            num_deleted_layers: 0,\n\n            wal_ingested: 0,\n            bytes_written: 0,\n            bytes_deleted: 0,\n            layers_created: 0,\n            layers_deleted: 0,\n\n            time: 0,\n            history: Vec::new(),\n        }\n    }\n\n    pub async fn compact(&mut self) -> anyhow::Result<()> {\n        let ctx = MockRequestContext {};\n\n        crate::compact_tiered::compact_tiered(\n            self,\n            self.last_flush_lsn,\n            self.target_file_size,\n            self.tiers_per_level,\n            &ctx,\n        )\n        .await?;\n\n        Ok(())\n    }\n\n    // Ingest one record to the timeline\n    pub fn ingest_record(&mut self, key: Key, len: u64) {\n        self.records.push(MockRecord {\n            lsn: self.end_lsn,\n            key,\n            len,\n        });\n        self.total_len += len;\n        self.end_lsn += len;\n\n        if self.total_len > self.target_file_size {\n            self.flush_l0();\n        }\n    }\n\n    pub async fn compact_if_needed(&mut self) -> anyhow::Result<()> {\n        if self.num_l0_flushes - self.last_compact_at_flush >= self.tiers_per_level {\n            self.compact().await?;\n            self.last_compact_at_flush = self.num_l0_flushes;\n        }\n        Ok(())\n    }\n\n    pub fn flush_l0(&mut self) {\n        if self.records.is_empty() {\n            return;\n        }\n\n        let mut records = std::mem::take(&mut self.records);\n        records.sort_by_key(|rec| rec.key);\n\n        let lsn_range = self.start_lsn..self.end_lsn;\n        let new_layer = Arc::new(MockDeltaLayer {\n            key_range: Key::MIN..Key::MAX,\n            lsn_range: lsn_range.clone(),\n            file_size: self.total_len,\n            records,\n            deleted: Mutex::new(false),\n        });\n        info!(\"flushed L0 layer {}\", new_layer.short_id());\n        self.live_layers.push(MockLayer::from(&new_layer));\n\n        // reset L0\n        self.start_lsn = self.end_lsn;\n        self.total_len = 0;\n        self.records = Vec::new();\n\n        self.layers_created += 1;\n        self.bytes_written += new_layer.file_size;\n\n        self.time += 1;\n        self.history.push(LayerTraceEvent {\n            time_rel: self.time,\n            op: LayerTraceOp::Flush,\n            file: LayerTraceFile {\n                filename: new_layer.short_id(),\n                key_range: new_layer.key_range.clone(),\n                lsn_range: new_layer.lsn_range.clone(),\n            },\n        });\n\n        self.num_l0_flushes += 1;\n        self.last_flush_lsn = self.end_lsn;\n    }\n\n    // Ingest `num_records' records to the timeline, with random keys\n    // uniformly distributed in `key_range`\n    pub fn ingest_uniform(\n        &mut self,\n        num_records: u64,\n        len: u64,\n        key_range: &Range<Key>,\n    ) -> anyhow::Result<()> {\n        crate::helpers::union_to_keyspace(&mut self.keyspace, vec![key_range.clone()]);\n        let mut rng = rand::rng();\n        for _ in 0..num_records {\n            self.ingest_record(rng.random_range(key_range.clone()), len);\n            self.wal_ingested += len;\n        }\n        Ok(())\n    }\n\n    pub fn stats(&self) -> anyhow::Result<String> {\n        let mut s = String::new();\n\n        writeln!(s, \"STATISTICS:\")?;\n        writeln!(\n            s,\n            \"WAL ingested:   {:>10} MB\",\n            self.wal_ingested / (1024 * 1024)\n        )?;\n        writeln!(\n            s,\n            \"size created:   {:>10} MB\",\n            self.bytes_written / (1024 * 1024)\n        )?;\n        writeln!(\n            s,\n            \"size deleted:   {:>10} MB\",\n            self.bytes_deleted / (1024 * 1024)\n        )?;\n        writeln!(s, \"files created:     {:>10}\", self.layers_created)?;\n        writeln!(s, \"files deleted:     {:>10}\", self.layers_deleted)?;\n        writeln!(\n            s,\n            \"write amp:         {:>10.2}\",\n            self.bytes_written as f64 / self.wal_ingested as f64\n        )?;\n        writeln!(\n            s,\n            \"storage amp:       {:>10.2}\",\n            (self.bytes_written - self.bytes_deleted) as f64 / self.wal_ingested as f64\n        )?;\n\n        Ok(s)\n    }\n\n    pub fn draw_history<W: std::io::Write>(&self, output: W) -> anyhow::Result<()> {\n        draw::draw_history(&self.history, output)\n    }\n}\n\nimpl Default for MockTimeline {\n    fn default() -> Self {\n        Self::new()\n    }\n}\n\n#[derive(Clone)]\npub enum MockLayer {\n    Delta(Arc<MockDeltaLayer>),\n    Image(Arc<MockImageLayer>),\n}\n\nimpl interface::CompactionLayer<Key> for MockLayer {\n    fn key_range(&self) -> &Range<Key> {\n        match self {\n            MockLayer::Delta(this) => this.key_range(),\n            MockLayer::Image(this) => this.key_range(),\n        }\n    }\n    fn lsn_range(&self) -> &Range<Lsn> {\n        match self {\n            MockLayer::Delta(this) => this.lsn_range(),\n            MockLayer::Image(this) => this.lsn_range(),\n        }\n    }\n    fn file_size(&self) -> u64 {\n        match self {\n            MockLayer::Delta(this) => this.file_size,\n            MockLayer::Image(this) => this.file_size,\n        }\n    }\n    fn short_id(&self) -> String {\n        match self {\n            MockLayer::Delta(this) => this.short_id(),\n            MockLayer::Image(this) => this.short_id(),\n        }\n    }\n\n    fn is_delta(&self) -> bool {\n        match self {\n            MockLayer::Delta(_) => true,\n            MockLayer::Image(_) => false,\n        }\n    }\n}\n\nimpl MockLayer {\n    fn is_deleted(&self) -> bool {\n        let guard = match self {\n            MockLayer::Delta(this) => this.deleted.lock().unwrap(),\n            MockLayer::Image(this) => this.deleted.lock().unwrap(),\n        };\n        *guard\n    }\n    fn mark_deleted(&self) {\n        let mut deleted_guard = match self {\n            MockLayer::Delta(this) => this.deleted.lock().unwrap(),\n            MockLayer::Image(this) => this.deleted.lock().unwrap(),\n        };\n        assert!(!*deleted_guard, \"layer already deleted\");\n        *deleted_guard = true;\n    }\n}\n\nimpl From<&Arc<MockDeltaLayer>> for MockLayer {\n    fn from(l: &Arc<MockDeltaLayer>) -> Self {\n        MockLayer::Delta(l.clone())\n    }\n}\n\nimpl From<&Arc<MockImageLayer>> for MockLayer {\n    fn from(l: &Arc<MockImageLayer>) -> Self {\n        MockLayer::Image(l.clone())\n    }\n}\n\nimpl interface::CompactionJobExecutor for MockTimeline {\n    type Key = Key;\n    type Layer = MockLayer;\n    type DeltaLayer = Arc<MockDeltaLayer>;\n    type ImageLayer = Arc<MockImageLayer>;\n    type RequestContext = MockRequestContext;\n\n    fn get_shard_identity(&self) -> &ShardIdentity {\n        static IDENTITY: ShardIdentity = ShardIdentity::unsharded();\n        &IDENTITY\n    }\n\n    async fn get_layers(\n        &mut self,\n        key_range: &Range<Self::Key>,\n        lsn_range: &Range<Lsn>,\n        _ctx: &Self::RequestContext,\n    ) -> anyhow::Result<Vec<Self::Layer>> {\n        // Clear any deleted layers from our vec\n        self.live_layers.retain(|l| !l.is_deleted());\n\n        let layers: Vec<MockLayer> = self\n            .live_layers\n            .iter()\n            .filter(|l| {\n                overlaps_with(l.lsn_range(), lsn_range) && overlaps_with(l.key_range(), key_range)\n            })\n            .cloned()\n            .collect();\n\n        Ok(layers)\n    }\n\n    async fn get_keyspace(\n        &mut self,\n        key_range: &Range<Self::Key>,\n        _lsn: Lsn,\n        _ctx: &Self::RequestContext,\n    ) -> anyhow::Result<interface::CompactionKeySpace<Key>> {\n        // find it in the levels\n        if self.old_keyspaces.is_empty() {\n            Ok(crate::helpers::intersect_keyspace(\n                &self.keyspace,\n                key_range,\n            ))\n        } else {\n            // not implemented\n\n            // The mock implementation only allows requesting the\n            // keyspace at the level's end LSN. That's all that the\n            // current implementation needs.\n            panic!(\"keyspace not available for requested lsn\");\n        }\n    }\n\n    async fn downcast_delta_layer(\n        &self,\n        layer: &MockLayer,\n        _ctx: &MockRequestContext,\n    ) -> anyhow::Result<Option<Arc<MockDeltaLayer>>> {\n        Ok(match layer {\n            MockLayer::Delta(l) => Some(l.clone()),\n            MockLayer::Image(_) => None,\n        })\n    }\n\n    async fn create_image(\n        &mut self,\n        lsn: Lsn,\n        key_range: &Range<Key>,\n        ctx: &MockRequestContext,\n    ) -> anyhow::Result<()> {\n        let keyspace = self.get_keyspace(key_range, lsn, ctx).await?;\n\n        let mut accum_size: u64 = 0;\n        for r in keyspace {\n            accum_size += r.end - r.start;\n        }\n\n        let new_layer = Arc::new(MockImageLayer {\n            key_range: key_range.clone(),\n            lsn_range: lsn..lsn,\n            file_size: accum_size * PAGE_SZ,\n            deleted: Mutex::new(false),\n        });\n        info!(\n            \"created image layer, size {}: {}\",\n            new_layer.file_size,\n            new_layer.short_id()\n        );\n        self.live_layers.push(MockLayer::Image(new_layer.clone()));\n\n        // update stats\n        self.bytes_written += new_layer.file_size;\n        self.layers_created += 1;\n\n        self.time += 1;\n        self.history.push(LayerTraceEvent {\n            time_rel: self.time,\n            op: LayerTraceOp::CreateImage,\n            file: LayerTraceFile {\n                filename: new_layer.short_id(),\n                key_range: new_layer.key_range.clone(),\n                lsn_range: new_layer.lsn_range.clone(),\n            },\n        });\n\n        Ok(())\n    }\n\n    async fn create_delta(\n        &mut self,\n        lsn_range: &Range<Lsn>,\n        key_range: &Range<Key>,\n        input_layers: &[Arc<MockDeltaLayer>],\n        ctx: &MockRequestContext,\n    ) -> anyhow::Result<()> {\n        let mut key_value_stream =\n            std::pin::pin!(merge_delta_keys::<MockTimeline>(input_layers, ctx));\n        let mut records: Vec<MockRecord> = Vec::new();\n        let mut total_len = 2;\n        while let Some(delta_entry) = key_value_stream.next().await {\n            let delta_entry: MockRecord = delta_entry?;\n            if key_range.contains(&delta_entry.key) && lsn_range.contains(&delta_entry.lsn) {\n                total_len += delta_entry.len;\n                records.push(delta_entry);\n            }\n        }\n        let total_records = records.len();\n        let new_layer = Arc::new(MockDeltaLayer {\n            key_range: key_range.clone(),\n            lsn_range: lsn_range.clone(),\n            file_size: total_len,\n            records,\n            deleted: Mutex::new(false),\n        });\n        info!(\n            \"created delta layer, recs {}, size {}: {}\",\n            total_records,\n            total_len,\n            new_layer.short_id()\n        );\n        self.live_layers.push(MockLayer::Delta(new_layer.clone()));\n\n        // update stats\n        self.bytes_written += total_len;\n        self.layers_created += 1;\n\n        self.time += 1;\n        self.history.push(LayerTraceEvent {\n            time_rel: self.time,\n            op: LayerTraceOp::CreateDelta,\n            file: LayerTraceFile {\n                filename: new_layer.short_id(),\n                key_range: new_layer.key_range.clone(),\n                lsn_range: new_layer.lsn_range.clone(),\n            },\n        });\n\n        Ok(())\n    }\n\n    async fn delete_layer(\n        &mut self,\n        layer: &Self::Layer,\n        _ctx: &MockRequestContext,\n    ) -> anyhow::Result<()> {\n        let layer = std::pin::pin!(layer);\n        info!(\"deleting layer: {}\", layer.short_id());\n        self.num_deleted_layers += 1;\n        self.bytes_deleted += layer.file_size();\n        layer.mark_deleted();\n\n        self.time += 1;\n        self.history.push(LayerTraceEvent {\n            time_rel: self.time,\n            op: LayerTraceOp::Delete,\n            file: LayerTraceFile {\n                filename: layer.short_id(),\n                key_range: layer.key_range().clone(),\n                lsn_range: layer.lsn_range().clone(),\n            },\n        });\n\n        Ok(())\n    }\n}\n"
  },
  {
    "path": "pageserver/compaction/tests/tests.rs",
    "content": "use once_cell::sync::OnceCell;\nuse pageserver_compaction::interface::CompactionLayer;\nuse pageserver_compaction::simulator::MockTimeline;\nuse utils::logging;\n\nstatic LOG_HANDLE: OnceCell<()> = OnceCell::new();\n\npub(crate) fn setup_logging() {\n    LOG_HANDLE.get_or_init(|| {\n        logging::init(\n            logging::LogFormat::Test,\n            logging::TracingErrorLayerEnablement::EnableWithRustLogFilter,\n            logging::Output::Stdout,\n        )\n        .expect(\"Failed to init test logging\");\n    });\n}\n\n/// Test the extreme case that there are so many updates for a single key that\n/// even if we produce an extremely narrow delta layer, spanning just that one\n/// key, we still too many records to fit in the target file size. We need to\n/// split in the LSN dimension too in that case.\n#[tokio::test]\nasync fn test_many_updates_for_single_key() {\n    setup_logging();\n    let mut executor = MockTimeline::new();\n    executor.target_file_size = 1_000_000; // 1 MB\n\n    // Ingest 10 MB of updates to a single key.\n    for _ in 1..1000 {\n        executor.ingest_uniform(100, 10, &(0..100_000)).unwrap();\n        executor.ingest_uniform(1000, 10, &(0..1)).unwrap();\n        executor.compact().await.unwrap();\n    }\n\n    // Check that all the layers are smaller than the target size (with some slop)\n    for l in executor.live_layers.iter() {\n        println!(\"layer {}: {}\", l.short_id(), l.file_size());\n    }\n    for l in executor.live_layers.iter() {\n        assert!(l.file_size() < executor.target_file_size * 2);\n        // Sanity check that none of the delta layers are empty either.\n        if l.is_delta() {\n            assert!(l.file_size() > 0);\n        }\n    }\n}\n\n#[tokio::test]\nasync fn test_simple_updates() {\n    setup_logging();\n    let mut executor = MockTimeline::new();\n    executor.target_file_size = 500_000; // 500 KB\n\n    // Ingest some traffic.\n    for _ in 1..400 {\n        executor.ingest_uniform(100, 500, &(0..100_000)).unwrap();\n    }\n\n    for l in executor.live_layers.iter() {\n        println!(\"layer {}: {}\", l.short_id(), l.file_size());\n    }\n\n    println!(\"Running compaction...\");\n    executor.compact().await.unwrap();\n\n    for l in executor.live_layers.iter() {\n        println!(\"layer {}: {}\", l.short_id(), l.file_size());\n    }\n}\n"
  },
  {
    "path": "pageserver/ctl/Cargo.toml",
    "content": "[package]\nname = \"pagectl\"\nversion = \"0.1.0\"\nedition.workspace = true\nlicense.workspace = true\n\n# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html\n\n[dependencies]\nanyhow.workspace = true\nbincode.workspace = true\ncamino.workspace = true\nclap = { workspace = true, features = [\"string\"] }\nhumantime.workspace = true\nitertools.workspace = true\npageserver = { path = \"..\" }\npageserver_api.workspace = true\nremote_storage = { path = \"../../libs/remote_storage\" }\npostgres_ffi.workspace = true\nserde.workspace = true\nthiserror.workspace = true\ntokio.workspace = true\ntokio-util.workspace = true\nutils.workspace = true\nsvg_fmt.workspace = true\nworkspace_hack.workspace = true\nserde_json.workspace = true\n"
  },
  {
    "path": "pageserver/ctl/src/download_remote_object.rs",
    "content": "use camino::Utf8PathBuf;\nuse clap::Parser;\nuse tokio_util::sync::CancellationToken;\n\n/// Download a specific object from remote storage to a local file.\n///\n/// The remote storage configuration is supplied via the `REMOTE_STORAGE_CONFIG` environment\n/// variable, in the same TOML format that the pageserver itself understands. This allows the\n/// command to work with any cloud supported by the `remote_storage` crate (currently AWS S3,\n/// Azure Blob Storage and local files), as long as the credentials are available via the\n/// standard environment variables expected by the underlying SDKs.\n///\n/// Examples for setting the environment variable:\n///\n/// ```bash\n/// # AWS S3 (region can also be provided via AWS_REGION)\n/// export REMOTE_STORAGE_CONFIG='remote_storage = { bucket_name = \"my-bucket\", bucket_region = \"us-east-2\" }'\n///\n/// # Azure Blob Storage (account key picked up from AZURE_STORAGE_ACCOUNT_KEY)\n/// export REMOTE_STORAGE_CONFIG='remote_storage = { container = \"my-container\", account = \"my-account\" }'\n/// ```\n#[derive(Parser)]\npub(crate) struct DownloadRemoteObjectCmd {\n    /// Key / path of the object to download (relative to the remote storage prefix).\n    ///\n    /// Examples:\n    ///   \"wal/3aa8f.../00000001000000000000000A\"\n    ///   \"pageserver/v1/tenants/<tenant_id>/timelines/<timeline_id>/layer_12345\"\n    pub remote_path: String,\n\n    /// Path of the local file to create. Existing file will be overwritten.\n    ///\n    /// Examples:\n    ///   \"./segment\"\n    ///   \"/tmp/layer_12345.parquet\"\n    pub output_file: Utf8PathBuf,\n}\n\npub(crate) async fn main(cmd: &DownloadRemoteObjectCmd) -> anyhow::Result<()> {\n    use remote_storage::{DownloadOpts, GenericRemoteStorage, RemotePath, RemoteStorageConfig};\n\n    // Fetch remote storage configuration from the environment\n    let config_str = std::env::var(\"REMOTE_STORAGE_CONFIG\").map_err(|_| {\n        anyhow::anyhow!(\n            \"'REMOTE_STORAGE_CONFIG' environment variable must be set to a valid remote storage TOML config\"\n        )\n    })?;\n\n    let config = RemoteStorageConfig::from_toml_str(&config_str)?;\n\n    // Initialise remote storage client\n    let storage = GenericRemoteStorage::from_config(&config).await?;\n\n    // RemotePath must be relative – leading slashes confuse the parser.\n    let remote_path_str = cmd.remote_path.trim_start_matches('/');\n    let remote_path = RemotePath::from_string(remote_path_str)?;\n\n    let cancel = CancellationToken::new();\n\n    println!(\n        \"Downloading '{remote_path}' from remote storage bucket {:?} ...\",\n        config.storage.bucket_name()\n    );\n\n    // Start the actual download\n    let download = storage\n        .download(&remote_path, &DownloadOpts::default(), &cancel)\n        .await?;\n\n    // Stream to file\n    let mut reader = tokio_util::io::StreamReader::new(download.download_stream);\n    let tmp_path = cmd.output_file.with_extension(\"tmp\");\n    let mut file = tokio::fs::File::create(&tmp_path).await?;\n    tokio::io::copy(&mut reader, &mut file).await?;\n    file.sync_all().await?;\n    // Atomically move into place\n    tokio::fs::rename(&tmp_path, &cmd.output_file).await?;\n\n    println!(\n        \"Downloaded to '{}'. Last modified: {:?}, etag: {}\",\n        cmd.output_file, download.last_modified, download.etag\n    );\n\n    Ok(())\n}\n"
  },
  {
    "path": "pageserver/ctl/src/draw_timeline_dir.rs",
    "content": "//! A tool for visualizing the arrangement of layerfiles within a timeline.\n//!\n//! It reads filenames from stdin and prints a svg on stdout. The image is a plot in\n//! page-lsn space, where every delta layer is a rectangle and every image layer is a\n//! thick line. Legend:\n//! - The x axis (left to right) represents page index.\n//! - The y axis represents LSN, growing upwards.\n//!\n//! Coordinates in both axis are compressed for better readability.\n//! (see <https://medium.com/algorithms-digest/coordinate-compression-2fff95326fb>)\n//!\n//! The plain text API was chosen so that we can easily work with filenames from various\n//! sources; see the Usage section below for examples.\n//!\n//! # Usage\n//!\n//! ## Producing the SVG\n//!\n//! ```bash\n//!\n//! # local timeline dir\n//! ls test_output/test_pgbench\\[neon-45-684\\]/repo/tenants/$TENANT/timelines/$TIMELINE | \\\n//!     grep \"__\" | cargo run --release --bin pagectl draw-timeline > out.svg\n//!\n//! # Layer map dump from `/v1/tenant/$TENANT/timeline/$TIMELINE/layer`\n//! (jq -r '.historic_layers[] | .layer_file_name' | cargo  run -p pagectl draw-timeline) < layer-map.json > out.svg\n//!\n//! # From an `index_part.json` in S3\n//! (jq -r '.layer_metadata | keys[]' | cargo  run -p pagectl draw-timeline ) < index_part.json-00000016 > out.svg\n//!\n//! # enrich with lines for gc_cutoff and a child branch point\n//! cat <(jq -r '.historic_layers[] | .layer_file_name' < layers.json) <(echo -e 'gc_cutoff:0000001CE3FE32C9\\nbranch:0000001DE3FE32C9') | cargo run --bin pagectl draw-timeline >| out.svg\n//! ```\n//!\n//! ## Viewing\n//!\n//! **Inkscape** is better than the built-in viewers in browsers.\n//!\n//! After selecting a layer file rectangle, use \"Open XML Editor\" (Ctrl|Cmd + Shift + X)\n//! to see the layer file name in the comment field.\n//!\n//! ```bash\n//!\n//! # Linux\n//! inkscape out.svg\n//!\n//! # macOS\n//! /Applications/Inkscape.app/Contents/MacOS/inkscape out.svg\n//!\n//! ```\n//!\n\nuse std::cmp::Ordering;\nuse std::collections::{BTreeMap, BTreeSet};\nuse std::io::{self, BufRead};\nuse std::ops::Range;\nuse std::path::PathBuf;\nuse std::str::FromStr;\n\nuse anyhow::{Context, Result};\nuse pageserver_api::key::Key;\nuse svg_fmt::{BeginSvg, EndSvg, Fill, Stroke, rectangle, rgb};\nuse utils::lsn::Lsn;\nuse utils::project_git_version;\n\nproject_git_version!(GIT_VERSION);\n\n// Map values to their compressed coordinate - the index the value\n// would have in a sorted and deduplicated list of all values.\nfn build_coordinate_compression_map<T: Ord + Copy>(coords: Vec<T>) -> BTreeMap<T, usize> {\n    let set: BTreeSet<T> = coords.into_iter().collect();\n\n    let mut map: BTreeMap<T, usize> = BTreeMap::new();\n    for (i, e) in set.iter().enumerate() {\n        map.insert(*e, i);\n    }\n\n    map\n}\n\nfn parse_filename(name: &str) -> (Range<Key>, Range<Lsn>) {\n    let split: Vec<&str> = name.split(\"__\").collect();\n    let keys: Vec<&str> = split[0].split('-').collect();\n\n    // Remove the temporary file extension, e.g., remove the `.d20a.___temp` part from the following filename:\n    // 000000067F000040490000404A00441B0000-000000067F000040490000404A00441B4000__000043483A34CE00.d20a.___temp\n    let lsns = split[1].split('.').collect::<Vec<&str>>()[0];\n    let mut lsns: Vec<&str> = lsns.split('-').collect();\n\n    // The current format of the layer file name: 000000067F0000000400000B150100000000-000000067F0000000400000D350100000000__00000000014B7AC8-v1-00000001\n\n    // Handle generation number `-00000001` part\n    if lsns.last().expect(\"should\").len() == 8 {\n        lsns.pop();\n    }\n\n    // Handle version number `-v1` part\n    if lsns.last().expect(\"should\").starts_with('v') {\n        lsns.pop();\n    }\n\n    if lsns.len() == 1 {\n        lsns.push(lsns[0]);\n    }\n\n    let keys = Key::from_hex(keys[0]).unwrap()..Key::from_hex(keys[1]).unwrap();\n    let lsns = Lsn::from_hex(lsns[0]).unwrap()..Lsn::from_hex(lsns[1]).unwrap();\n    (keys, lsns)\n}\n\n#[derive(Clone, Copy)]\nenum LineKind {\n    GcCutoff,\n    Branch,\n}\n\nimpl From<LineKind> for Fill {\n    fn from(value: LineKind) -> Self {\n        match value {\n            LineKind::GcCutoff => Fill::Color(rgb(255, 0, 0)),\n            LineKind::Branch => Fill::Color(rgb(0, 255, 0)),\n        }\n    }\n}\n\nimpl FromStr for LineKind {\n    type Err = anyhow::Error;\n\n    fn from_str(s: &str) -> std::prelude::v1::Result<Self, Self::Err> {\n        Ok(match s {\n            \"gc_cutoff\" => LineKind::GcCutoff,\n            \"branch\" => LineKind::Branch,\n            _ => anyhow::bail!(\"unsupported linekind: {s}\"),\n        })\n    }\n}\n\npub fn main() -> Result<()> {\n    // Parse layer filenames from stdin\n    struct Layer {\n        filename: String,\n        key_range: Range<Key>,\n        lsn_range: Range<Lsn>,\n    }\n    let mut files: Vec<Layer> = vec![];\n    let stdin = io::stdin();\n\n    let mut lines: Vec<(Lsn, LineKind)> = vec![];\n\n    for (lineno, line) in stdin.lock().lines().enumerate() {\n        let lineno = lineno + 1;\n\n        let line = line.unwrap();\n        if let Some((kind, lsn)) = line.split_once(':') {\n            let (kind, lsn) = LineKind::from_str(kind)\n                .context(\"parse kind\")\n                .and_then(|kind| {\n                    if lsn.contains('/') {\n                        Lsn::from_str(lsn)\n                    } else {\n                        Lsn::from_hex(lsn)\n                    }\n                    .map(|lsn| (kind, lsn))\n                    .context(\"parse lsn\")\n                })\n                .with_context(|| format!(\"parse {line:?} on {lineno}\"))?;\n            lines.push((lsn, kind));\n            continue;\n        }\n        let line = PathBuf::from_str(&line).unwrap();\n        let filename = line.file_name().unwrap();\n        let filename = filename.to_str().unwrap();\n        let (key_range, lsn_range) = parse_filename(filename);\n        files.push(Layer {\n            filename: filename.to_owned(),\n            key_range,\n            lsn_range,\n        });\n    }\n\n    // Collect all coordinates\n    let mut keys: Vec<Key> = Vec::with_capacity(files.len());\n    let mut lsns: Vec<Lsn> = Vec::with_capacity(files.len() + lines.len());\n\n    for Layer {\n        key_range: keyr,\n        lsn_range: lsnr,\n        ..\n    } in &files\n    {\n        keys.push(keyr.start);\n        keys.push(keyr.end);\n        lsns.push(lsnr.start);\n        lsns.push(lsnr.end);\n    }\n\n    lsns.extend(lines.iter().map(|(lsn, _)| *lsn));\n\n    // Analyze\n    let key_map = build_coordinate_compression_map(keys);\n    let lsn_map = build_coordinate_compression_map(lsns);\n\n    // Initialize stats\n    let mut num_deltas = 0;\n    let mut num_images = 0;\n\n    // Draw\n    let stretch = 3.0; // Stretch out vertically for better visibility\n    println!(\n        \"{}\",\n        BeginSvg {\n            w: (key_map.len() + 10) as f32,\n            h: stretch * lsn_map.len() as f32\n        }\n    );\n\n    let xmargin = 0.05; // Height-dependent margin to disambiguate overlapping deltas\n\n    for Layer {\n        filename,\n        key_range: keyr,\n        lsn_range: lsnr,\n    } in &files\n    {\n        let key_start = *key_map.get(&keyr.start).unwrap();\n        let key_end = *key_map.get(&keyr.end).unwrap();\n        let key_diff = key_end - key_start;\n        let lsn_max = lsn_map.len();\n\n        if key_start >= key_end {\n            panic!(\"Invalid key range {key_start}-{key_end}\");\n        }\n\n        let lsn_start = *lsn_map.get(&lsnr.start).unwrap();\n        let lsn_end = *lsn_map.get(&lsnr.end).unwrap();\n\n        let mut lsn_diff = (lsn_end - lsn_start) as f32;\n        let mut fill = Fill::None;\n        let mut ymargin = 0.05 * lsn_diff; // Height-dependent margin to disambiguate overlapping deltas\n        let mut lsn_offset = 0.0;\n\n        // Fill in and thicken rectangle if it's an\n        // image layer so that we can see it.\n        match lsn_start.cmp(&lsn_end) {\n            Ordering::Less => num_deltas += 1,\n            Ordering::Equal => {\n                num_images += 1;\n                lsn_diff = 0.3;\n                lsn_offset = -lsn_diff / 2.0;\n                ymargin = 0.05;\n                fill = Fill::Color(rgb(0, 0, 0));\n            }\n            Ordering::Greater => panic!(\"Invalid lsn range {lsn_start}-{lsn_end}\"),\n        }\n\n        println!(\n            \"    {}\",\n            rectangle(\n                5.0 + key_start as f32 + stretch * xmargin,\n                stretch * (lsn_max as f32 - (lsn_end as f32 - ymargin - lsn_offset)),\n                key_diff as f32 - stretch * 2.0 * xmargin,\n                stretch * (lsn_diff - 2.0 * ymargin)\n            )\n            .fill(fill)\n            .stroke(Stroke::Color(rgb(0, 0, 0), 0.1))\n            .border_radius(0.4)\n            .comment(filename)\n        );\n    }\n\n    for (lsn, kind) in lines {\n        let lsn_start = *lsn_map.get(&lsn).unwrap();\n        let lsn_end = lsn_start;\n        let stretch = 2.0;\n        let lsn_diff = 0.3;\n        let lsn_offset = -lsn_diff / 2.0;\n        let ymargin = 0.05;\n        println!(\n            \"{}\",\n            rectangle(\n                0.0f32 + stretch * xmargin,\n                stretch * (lsn_map.len() as f32 - (lsn_end as f32 - ymargin - lsn_offset)),\n                (key_map.len() + 10) as f32,\n                stretch * (lsn_diff - 2.0 * ymargin)\n            )\n            .fill(kind)\n        );\n    }\n\n    println!(\"{EndSvg}\");\n\n    eprintln!(\"num_images: {num_images}\");\n    eprintln!(\"num_deltas: {num_deltas}\");\n\n    Ok(())\n}\n"
  },
  {
    "path": "pageserver/ctl/src/index_part.rs",
    "content": "use std::str::FromStr;\n\nuse anyhow::{Context, Ok};\nuse camino::Utf8PathBuf;\nuse pageserver::tenant::{\n    IndexPart,\n    layer_map::{LayerMap, SearchResult},\n    remote_timeline_client::{index::LayerFileMetadata, remote_layer_path},\n    storage_layer::{LayerName, LayerVisibilityHint, PersistentLayerDesc, ReadableLayerWeak},\n};\nuse pageserver_api::key::Key;\nuse serde::Serialize;\nuse std::collections::BTreeMap;\nuse utils::{\n    id::{TenantId, TimelineId},\n    lsn::Lsn,\n    shard::TenantShardId,\n};\n\n#[derive(clap::Subcommand)]\npub(crate) enum IndexPartCmd {\n    Dump {\n        path: Utf8PathBuf,\n    },\n    /// Find all layers that need to be searched to construct the given page at the given LSN.\n    Search {\n        #[arg(long)]\n        tenant_id: String,\n        #[arg(long)]\n        timeline_id: String,\n        #[arg(long)]\n        path: Utf8PathBuf,\n        #[arg(long)]\n        key: String,\n        #[arg(long)]\n        lsn: String,\n    },\n    /// List all visible delta and image layers at the latest LSN.\n    ListVisibleLayers {\n        #[arg(long)]\n        path: Utf8PathBuf,\n    },\n}\n\nfn create_layer_map_from_index_part(\n    index_part: &IndexPart,\n    tenant_shard_id: TenantShardId,\n    timeline_id: TimelineId,\n) -> LayerMap {\n    let mut layer_map = LayerMap::default();\n    {\n        let mut updates = layer_map.batch_update();\n        for (key, value) in index_part.layer_metadata.iter() {\n            updates.insert_historic(PersistentLayerDesc::from_filename(\n                tenant_shard_id,\n                timeline_id,\n                key.clone(),\n                value.file_size,\n            ));\n        }\n    }\n    layer_map\n}\n\nasync fn search_layers(\n    tenant_id: &str,\n    timeline_id: &str,\n    path: &Utf8PathBuf,\n    key: &str,\n    lsn: &str,\n) -> anyhow::Result<()> {\n    let tenant_id = TenantId::from_str(tenant_id).unwrap();\n    let tenant_shard_id = TenantShardId::unsharded(tenant_id);\n    let timeline_id = TimelineId::from_str(timeline_id).unwrap();\n    let index_json = {\n        let bytes = tokio::fs::read(path).await?;\n        IndexPart::from_json_bytes(&bytes).unwrap()\n    };\n    let layer_map = create_layer_map_from_index_part(&index_json, tenant_shard_id, timeline_id);\n    let key = Key::from_hex(key)?;\n\n    let lsn = Lsn::from_str(lsn).unwrap();\n    let mut end_lsn = lsn;\n    loop {\n        let result = layer_map.search(key, end_lsn);\n        match result {\n            Some(SearchResult { layer, lsn_floor }) => {\n                let disk_layer = match layer {\n                    ReadableLayerWeak::PersistentLayer(layer) => layer,\n                    ReadableLayerWeak::InMemoryLayer(_) => {\n                        anyhow::bail!(\"unexpected in-memory layer\")\n                    }\n                };\n\n                let metadata = index_json\n                    .layer_metadata\n                    .get(&disk_layer.layer_name())\n                    .unwrap();\n                println!(\n                    \"{}\",\n                    remote_layer_path(\n                        &tenant_id,\n                        &timeline_id,\n                        metadata.shard,\n                        &disk_layer.layer_name(),\n                        metadata.generation\n                    )\n                );\n                end_lsn = lsn_floor;\n            }\n            None => break,\n        }\n    }\n    Ok(())\n}\n\n#[derive(Debug, Clone, Serialize)]\nstruct VisibleLayers {\n    pub total_images: u64,\n    pub total_image_bytes: u64,\n    pub total_deltas: u64,\n    pub total_delta_bytes: u64,\n    pub layer_metadata: BTreeMap<LayerName, LayerFileMetadata>,\n}\n\nimpl VisibleLayers {\n    pub fn new() -> Self {\n        Self {\n            layer_metadata: BTreeMap::new(),\n            total_images: 0,\n            total_image_bytes: 0,\n            total_deltas: 0,\n            total_delta_bytes: 0,\n        }\n    }\n\n    pub fn add_layer(&mut self, name: LayerName, layer: LayerFileMetadata) {\n        match name {\n            LayerName::Image(_) => {\n                self.total_images += 1;\n                self.total_image_bytes += layer.file_size;\n            }\n            LayerName::Delta(_) => {\n                self.total_deltas += 1;\n                self.total_delta_bytes += layer.file_size;\n            }\n        }\n        self.layer_metadata.insert(name, layer);\n    }\n}\n\nasync fn list_visible_layers(path: &Utf8PathBuf) -> anyhow::Result<()> {\n    let tenant_id = TenantId::generate();\n    let tenant_shard_id = TenantShardId::unsharded(tenant_id);\n    let timeline_id = TimelineId::generate();\n\n    let bytes = tokio::fs::read(path).await.context(\"read file\")?;\n    let index_part = IndexPart::from_json_bytes(&bytes).context(\"deserialize\")?;\n    let layer_map = create_layer_map_from_index_part(&index_part, tenant_shard_id, timeline_id);\n    let mut visible_layers = VisibleLayers::new();\n    let (layers, _key_space) = layer_map.get_visibility(Vec::new());\n    for (layer, visibility) in layers {\n        if visibility == LayerVisibilityHint::Visible {\n            visible_layers.add_layer(\n                layer.layer_name(),\n                index_part\n                    .layer_metadata\n                    .get(&layer.layer_name())\n                    .unwrap()\n                    .clone(),\n            );\n        }\n    }\n    let output = serde_json::to_string_pretty(&visible_layers).context(\"serialize output\")?;\n    println!(\"{output}\");\n\n    Ok(())\n}\n\npub(crate) async fn main(cmd: &IndexPartCmd) -> anyhow::Result<()> {\n    match cmd {\n        IndexPartCmd::Dump { path } => {\n            let bytes = tokio::fs::read(path).await.context(\"read file\")?;\n            let des: IndexPart = IndexPart::from_json_bytes(&bytes).context(\"deserialize\")?;\n            let output = serde_json::to_string_pretty(&des).context(\"serialize output\")?;\n            println!(\"{output}\");\n            Ok(())\n        }\n        IndexPartCmd::Search {\n            tenant_id,\n            timeline_id,\n            path,\n            key,\n            lsn,\n        } => search_layers(tenant_id, timeline_id, path, key, lsn).await,\n        IndexPartCmd::ListVisibleLayers { path } => list_visible_layers(path).await,\n    }\n}\n"
  },
  {
    "path": "pageserver/ctl/src/key.rs",
    "content": "use std::str::FromStr;\n\nuse anyhow::Context;\nuse clap::Parser;\nuse pageserver_api::key::Key;\nuse pageserver_api::reltag::{BlockNumber, RelTag, SlruKind};\nuse pageserver_api::shard::{DEFAULT_STRIPE_SIZE, ShardCount, ShardStripeSize};\n\n#[derive(Parser)]\npub(super) struct DescribeKeyCommand {\n    /// Key material in one of the forms: hex, span attributes captured from log, reltag blocknum\n    input: Vec<String>,\n\n    /// The number of shards to calculate what Keys placement would be.\n    #[arg(long)]\n    shard_count: Option<CustomShardCount>,\n\n    /// The sharding stripe size.\n    ///\n    /// The default is hardcoded. It makes no sense to provide this without providing\n    /// `--shard-count`.\n    #[arg(long, requires = \"shard_count\")]\n    stripe_size: Option<u32>,\n}\n\n/// Sharded shard count without unsharded count, which the actual ShardCount supports.\n#[derive(Clone, Copy)]\npub(super) struct CustomShardCount(std::num::NonZeroU8);\n\n#[derive(Debug, thiserror::Error)]\npub(super) enum InvalidShardCount {\n    #[error(transparent)]\n    ParsingFailed(#[from] std::num::ParseIntError),\n    #[error(\"too few shards\")]\n    TooFewShards,\n}\n\nimpl FromStr for CustomShardCount {\n    type Err = InvalidShardCount;\n\n    fn from_str(s: &str) -> Result<Self, Self::Err> {\n        let inner: std::num::NonZeroU8 = s.parse()?;\n        if inner.get() < 2 {\n            Err(InvalidShardCount::TooFewShards)\n        } else {\n            Ok(CustomShardCount(inner))\n        }\n    }\n}\n\nimpl From<CustomShardCount> for ShardCount {\n    fn from(value: CustomShardCount) -> Self {\n        ShardCount::new(value.0.get())\n    }\n}\n\nimpl DescribeKeyCommand {\n    pub(super) fn execute(self) {\n        let DescribeKeyCommand {\n            input,\n            shard_count,\n            stripe_size,\n        } = self;\n\n        let material = KeyMaterial::try_from(input.as_slice()).unwrap();\n        let kind = material.kind();\n        let key = Key::from(material);\n\n        println!(\"parsed from {kind}: {key}:\");\n        println!();\n        println!(\"{key:?}\");\n\n        macro_rules! kind_query {\n            ([$($name:ident),*$(,)?]) => {{[$(kind_query!($name)),*]}};\n            ($name:ident) => {{\n                let s: &'static str = stringify!($name);\n                let s = s.strip_prefix(\"is_\").unwrap_or(s);\n                let s = s.strip_suffix(\"_key\").unwrap_or(s);\n\n                #[allow(clippy::needless_borrow)]\n                (s, key.$name())\n            }};\n        }\n\n        // the current characterization is a mess of these boolean queries and separate\n        // \"recognization\". I think it accurately represents how strictly we model the Key\n        // right now, but could of course be made less confusing.\n\n        let queries = kind_query!([\n            is_rel_block_key,\n            is_rel_vm_block_key,\n            is_rel_fsm_block_key,\n            is_slru_block_key,\n            is_inherited_key,\n            is_rel_size_key,\n            is_slru_segment_size_key,\n        ]);\n\n        let recognized_kind = \"recognized kind\";\n        let metadata_key = \"metadata key\";\n        let shard_placement = \"shard placement\";\n\n        let longest = queries\n            .iter()\n            .map(|t| t.0)\n            .chain([recognized_kind, metadata_key, shard_placement])\n            .map(|s| s.len())\n            .max()\n            .unwrap();\n\n        let colon = 1;\n        let padding = 1;\n\n        for (name, is) in queries {\n            let width = longest - name.len() + colon + padding;\n            println!(\"{}{:width$}{}\", name, \":\", is);\n        }\n\n        let width = longest - recognized_kind.len() + colon + padding;\n        println!(\n            \"{}{:width$}{:?}\",\n            recognized_kind,\n            \":\",\n            RecognizedKeyKind::new(key),\n        );\n\n        if let Some(shard_count) = shard_count {\n            // seeing the sharding placement might be confusing, so leave it out unless shard\n            // count was given.\n\n            let stripe_size = stripe_size\n                .map(ShardStripeSize)\n                .unwrap_or(DEFAULT_STRIPE_SIZE);\n            println!(\n                \"# placement with shard_count: {} and stripe_size: {}:\",\n                shard_count.0, stripe_size.0\n            );\n            let width = longest - shard_placement.len() + colon + padding;\n            println!(\n                \"{}{:width$}{:?}\",\n                shard_placement,\n                \":\",\n                pageserver_api::shard::describe(&key, shard_count.into(), stripe_size)\n            );\n        }\n    }\n}\n\n/// Hand-wavy \"inputs we accept\" for a key.\n#[derive(Debug)]\npub(super) enum KeyMaterial {\n    Hex(Key),\n    String(SpanAttributesFromLogs),\n    Split(RelTag, BlockNumber),\n}\n\nimpl KeyMaterial {\n    fn kind(&self) -> &'static str {\n        match self {\n            KeyMaterial::Hex(_) => \"hex\",\n            KeyMaterial::String(_) | KeyMaterial::Split(_, _) => \"split\",\n        }\n    }\n}\n\nimpl From<KeyMaterial> for Key {\n    fn from(value: KeyMaterial) -> Self {\n        match value {\n            KeyMaterial::Hex(key) => key,\n            KeyMaterial::String(SpanAttributesFromLogs(rt, blocknum))\n            | KeyMaterial::Split(rt, blocknum) => {\n                pageserver_api::key::rel_block_to_key(rt, blocknum)\n            }\n        }\n    }\n}\n\nimpl<S: AsRef<str>> TryFrom<&[S]> for KeyMaterial {\n    type Error = anyhow::Error;\n\n    fn try_from(value: &[S]) -> Result<Self, Self::Error> {\n        match value {\n            [] => anyhow::bail!(\n                \"need 1..N positional arguments describing the key, try hex or a log line\"\n            ),\n            [one] => {\n                let one = one.as_ref();\n\n                let key = Key::from_hex(one).map(KeyMaterial::Hex);\n\n                let attrs = SpanAttributesFromLogs::from_str(one).map(KeyMaterial::String);\n\n                match (key, attrs) {\n                    (Ok(key), _) => Ok(key),\n                    (_, Ok(s)) => Ok(s),\n                    (Err(e1), Err(e2)) => anyhow::bail!(\n                        \"failed to parse {one:?} as hex or span attributes:\\n- {e1:#}\\n- {e2:#}\"\n                    ),\n                }\n            }\n            more => {\n                // assume going left to right one of these is a reltag and then we find a blocknum\n                // this works, because we don't have plain numbers at least right after reltag in\n                // logs. for some definition of \"works\".\n\n                let Some((reltag_at, reltag)) = more\n                    .iter()\n                    .map(AsRef::as_ref)\n                    .enumerate()\n                    .find_map(|(i, s)| {\n                        s.split_once(\"rel=\")\n                            .map(|(_garbage, actual)| actual)\n                            .unwrap_or(s)\n                            .parse::<RelTag>()\n                            .ok()\n                            .map(|rt| (i, rt))\n                    })\n                else {\n                    anyhow::bail!(\"found no RelTag in arguments\");\n                };\n\n                let Some(blocknum) = more\n                    .iter()\n                    .map(AsRef::as_ref)\n                    .skip(reltag_at)\n                    .find_map(|s| {\n                        s.split_once(\"blkno=\")\n                            .map(|(_garbage, actual)| actual)\n                            .unwrap_or(s)\n                            .parse::<BlockNumber>()\n                            .ok()\n                    })\n                else {\n                    anyhow::bail!(\"found no blocknum in arguments\");\n                };\n\n                Ok(KeyMaterial::Split(reltag, blocknum))\n            }\n        }\n    }\n}\n\n#[derive(Debug)]\npub(super) struct SpanAttributesFromLogs(RelTag, BlockNumber);\n\nimpl std::str::FromStr for SpanAttributesFromLogs {\n    type Err = anyhow::Error;\n\n    fn from_str(s: &str) -> Result<Self, Self::Err> {\n        // accept the span separator but do not require or fail if either is missing\n        // \"whatever{rel=1663/16389/24615 blkno=1052204 req_lsn=FFFFFFFF/FFFFFFFF}\"\n        let (_, reltag) = s\n            .split_once(\"rel=\")\n            .ok_or_else(|| anyhow::anyhow!(\"cannot find 'rel='\"))?;\n        let reltag = reltag.split_whitespace().next().unwrap();\n\n        let (_, blocknum) = s\n            .split_once(\"blkno=\")\n            .ok_or_else(|| anyhow::anyhow!(\"cannot find 'blkno='\"))?;\n        let blocknum = blocknum.split_whitespace().next().unwrap();\n\n        let reltag = reltag\n            .parse()\n            .with_context(|| format!(\"parse reltag from {reltag:?}\"))?;\n        let blocknum = blocknum\n            .parse()\n            .with_context(|| format!(\"parse blocknum from {blocknum:?}\"))?;\n\n        Ok(Self(reltag, blocknum))\n    }\n}\n\n#[derive(Debug)]\n#[allow(dead_code)] // debug print is used\nenum RecognizedKeyKind {\n    DbDir,\n    ControlFile,\n    Checkpoint,\n    AuxFilesV1,\n    SlruDir(Result<SlruKind, u32>),\n    RelMap(RelTagish<2>),\n    RelDir(RelTagish<2>),\n    AuxFileV2(Result<AuxFileV2, utils::Hex<[u8; 16]>>),\n}\n\n#[derive(Debug, PartialEq)]\n#[allow(unused)]\nenum AuxFileV2 {\n    Recognized(&'static str, utils::Hex<[u8; 13]>),\n    OtherWithPrefix(&'static str, utils::Hex<[u8; 13]>),\n    Other(utils::Hex<[u8; 13]>),\n}\n\nimpl RecognizedKeyKind {\n    fn new(key: Key) -> Option<Self> {\n        use RecognizedKeyKind::{\n            AuxFilesV1, Checkpoint, ControlFile, DbDir, RelDir, RelMap, SlruDir,\n        };\n\n        let slru_dir_kind = pageserver_api::key::slru_dir_kind(&key);\n\n        Some(match key {\n            pageserver_api::key::DBDIR_KEY => DbDir,\n            pageserver_api::key::CONTROLFILE_KEY => ControlFile,\n            pageserver_api::key::CHECKPOINT_KEY => Checkpoint,\n            pageserver_api::key::AUX_FILES_KEY => AuxFilesV1,\n            _ if slru_dir_kind.is_some() => SlruDir(slru_dir_kind.unwrap()),\n            _ if key.field1 == 0 && key.field4 == 0 && key.field5 == 0 && key.field6 == 0 => {\n                RelMap([key.field2, key.field3].into())\n            }\n            _ if key.field1 == 0 && key.field4 == 0 && key.field5 == 0 && key.field6 == 1 => {\n                RelDir([key.field2, key.field3].into())\n            }\n            _ if key.is_metadata_key() => RecognizedKeyKind::AuxFileV2(\n                AuxFileV2::new(key).ok_or_else(|| utils::Hex(key.to_i128().to_be_bytes())),\n            ),\n            _ => return None,\n        })\n    }\n}\n\nimpl AuxFileV2 {\n    fn new(key: Key) -> Option<AuxFileV2> {\n        const EMPTY_HASH: [u8; 13] = {\n            let mut out = [0u8; 13];\n            let hash = pageserver::aux_file::fnv_hash(b\"\").to_be_bytes();\n            let mut i = 3;\n            while i < 16 {\n                out[i - 3] = hash[i];\n                i += 1;\n            }\n            out\n        };\n\n        let bytes = key.to_i128().to_be_bytes();\n        let hash = utils::Hex(<[u8; 13]>::try_from(&bytes[3..]).unwrap());\n\n        assert_eq!(EMPTY_HASH.len(), hash.0.len());\n\n        // TODO: we could probably find the preimages for the hashes\n\n        Some(match (bytes[1], bytes[2]) {\n            (1, 1) => AuxFileV2::Recognized(\"pg_logical/mappings/\", hash),\n            (1, 2) => AuxFileV2::Recognized(\"pg_logical/snapshots/\", hash),\n            (1, 3) if hash.0 == EMPTY_HASH => {\n                AuxFileV2::Recognized(\"pg_logical/replorigin_checkpoint\", hash)\n            }\n            (2, 1) => AuxFileV2::Recognized(\"pg_replslot/\", hash),\n            (3, 1) => AuxFileV2::Recognized(\"pg_stat/pgstat.stat\", hash),\n            (1, 0xff) => AuxFileV2::OtherWithPrefix(\"pg_logical/\", hash),\n            (0xff, 0xff) => AuxFileV2::Other(hash),\n            _ => return None,\n        })\n    }\n}\n\n/// Prefix of RelTag, currently only known use cases are the two item versions.\n///\n/// Renders like a reltag with `/`, nothing else.\nstruct RelTagish<const N: usize>([u32; N]);\n\nimpl<const N: usize> From<[u32; N]> for RelTagish<N> {\n    fn from(val: [u32; N]) -> Self {\n        RelTagish(val)\n    }\n}\n\nimpl<const N: usize> std::fmt::Debug for RelTagish<N> {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        use std::fmt::Write as _;\n        let mut first = true;\n        self.0.iter().try_for_each(|x| {\n            if !first {\n                f.write_char('/')?;\n            }\n            first = false;\n            write!(f, \"{x}\")\n        })\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use pageserver::aux_file::encode_aux_file_key;\n\n    use super::*;\n\n    #[test]\n    fn hex_is_key_material() {\n        let m = KeyMaterial::try_from(&[\"000000067F0000400200DF927900FFFFFFFF\"][..]).unwrap();\n        assert!(matches!(m, KeyMaterial::Hex(_)), \"{m:?}\");\n    }\n\n    #[test]\n    fn single_positional_spanalike_is_key_material() {\n        // why is this needed? if you are checking many, then copypaste starts to appeal\n        let strings = [\n            (\n                line!(),\n                \"2024-05-15T15:33:49.873906Z ERROR page_service_conn_main{peer_addr=A:B}:process_query{tenant_id=C timeline_id=D}:handle_pagerequests:handle_get_page_at_lsn_request{rel=1663/208101/2620_fsm blkno=2 req_lsn=0/238D98C8}: error reading relation or page version: Read error: could not find data for key 000000067F00032CE5000000000000000001 (shard ShardNumber(0)) at LSN 0/1D0A16C1, request LSN 0/238D98C8, ancestor 0/0\",\n            ),\n            (line!(), \"rel=1663/208101/2620_fsm blkno=2\"),\n            (line!(), \"rel=1663/208101/2620.1 blkno=2\"),\n        ];\n\n        let mut first: Option<Key> = None;\n\n        for (line, example) in strings {\n            let m = KeyMaterial::try_from(&[example][..])\n                .unwrap_or_else(|e| panic!(\"failed to parse example from line {line}: {e:?}\"));\n            let key = Key::from(m);\n            if let Some(first) = first {\n                assert_eq!(first, key);\n            } else {\n                first = Some(key);\n            }\n        }\n\n        // not supporting this is rather accidential, but I think the input parsing is lenient\n        // enough already\n        KeyMaterial::try_from(&[\"1663/208101/2620_fsm 2\"][..]).unwrap_err();\n    }\n\n    #[test]\n    fn multiple_spanlike_args() {\n        let strings = [\n            (\n                line!(),\n                &[\n                    \"process_query{tenant_id=C\",\n                    \"timeline_id=D}:handle_pagerequests:handle_get_page_at_lsn_request{rel=1663/208101/2620_fsm\",\n                    \"blkno=2\",\n                    \"req_lsn=0/238D98C8}\",\n                ][..],\n            ),\n            (line!(), &[\"rel=1663/208101/2620_fsm\", \"blkno=2\"][..]),\n            (line!(), &[\"1663/208101/2620_fsm\", \"2\"][..]),\n        ];\n\n        let mut first: Option<Key> = None;\n\n        for (line, example) in strings {\n            let m = KeyMaterial::try_from(example)\n                .unwrap_or_else(|e| panic!(\"failed to parse example from line {line}: {e:?}\"));\n            let key = Key::from(m);\n            if let Some(first) = first {\n                assert_eq!(first, key);\n            } else {\n                first = Some(key);\n            }\n        }\n    }\n    #[test]\n    fn recognized_auxfiles() {\n        use AuxFileV2::*;\n\n        let empty = [\n            0x2e, 0x07, 0xbb, 0x01, 0x42, 0x62, 0xb8, 0x21, 0x75, 0x62, 0x95, 0xc5, 0x8d,\n        ];\n        let foobar = [\n            0x62, 0x79, 0x3c, 0x64, 0xbf, 0x6f, 0x0d, 0x35, 0x97, 0xba, 0x44, 0x6f, 0x18,\n        ];\n\n        #[rustfmt::skip]\n        let examples = [\n            (line!(), \"pg_logical/mappings/foobar\", Recognized(\"pg_logical/mappings/\", utils::Hex(foobar))),\n            (line!(), \"pg_logical/snapshots/foobar\", Recognized(\"pg_logical/snapshots/\", utils::Hex(foobar))),\n            (line!(), \"pg_logical/replorigin_checkpoint\", Recognized(\"pg_logical/replorigin_checkpoint\", utils::Hex(empty))),\n            (line!(), \"pg_logical/foobar\", OtherWithPrefix(\"pg_logical/\", utils::Hex(foobar))),\n            (line!(), \"pg_replslot/foobar\", Recognized(\"pg_replslot/\", utils::Hex(foobar))),\n            (line!(), \"foobar\", Other(utils::Hex(foobar))),\n        ];\n\n        for (line, path, expected) in examples {\n            let key = encode_aux_file_key(path);\n            let recognized =\n                AuxFileV2::new(key).unwrap_or_else(|| panic!(\"line {line} example failed\"));\n\n            assert_eq!(recognized, expected);\n        }\n\n        assert_eq!(\n            AuxFileV2::new(Key::from_hex(\"600000102000000000000000000000000000\").unwrap()),\n            None,\n            \"example key has one too few 0 after 6 before 1\"\n        );\n    }\n}\n"
  },
  {
    "path": "pageserver/ctl/src/layer_map_analyzer.rs",
    "content": "//! Tool for extracting content-dependent metadata about layers. Useful for scanning real project layer files and evaluating the effectiveness of different heuristics on them.\n//!\n//! Currently it only analyzes holes, which are regions within the layer range that the layer contains no updates for. In the future it might do more analysis (maybe key quantiles?) but it should never return sensitive data.\n\nuse std::cmp::Ordering;\nuse std::collections::BinaryHeap;\nuse std::ops::Range;\nuse std::str::FromStr;\nuse std::{fs, str};\n\nuse anyhow::{Result, anyhow};\nuse camino::{Utf8Path, Utf8PathBuf};\nuse pageserver::context::{DownloadBehavior, RequestContext};\nuse pageserver::page_cache::{self, PAGE_SZ};\nuse pageserver::task_mgr::TaskKind;\nuse pageserver::tenant::block_io::FileBlockReader;\nuse pageserver::tenant::disk_btree::{DiskBtreeReader, VisitDirection};\nuse pageserver::tenant::storage_layer::delta_layer::{DELTA_KEY_SIZE, Summary};\nuse pageserver::tenant::storage_layer::{LayerName, range_overlaps};\nuse pageserver::tenant::{TENANTS_SEGMENT_NAME, TIMELINES_SEGMENT_NAME};\nuse pageserver::virtual_file::api::IoMode;\nuse pageserver::virtual_file::{self, VirtualFile};\nuse pageserver_api::key::{KEY_SIZE, Key};\nuse utils::bin_ser::BeSer;\nuse utils::lsn::Lsn;\n\nuse crate::AnalyzeLayerMapCmd;\n\nconst MIN_HOLE_LENGTH: i128 = (128 * 1024 * 1024 / PAGE_SZ) as i128;\nconst DEFAULT_MAX_HOLES: usize = 10;\n\n/// Wrapper for key range to provide reverse ordering by range length for BinaryHeap\n#[derive(PartialEq, Eq)]\npub struct Hole(Range<Key>);\n\nimpl Ord for Hole {\n    fn cmp(&self, other: &Self) -> Ordering {\n        let other_len = other.0.end.to_i128() - other.0.start.to_i128();\n        let self_len = self.0.end.to_i128() - self.0.start.to_i128();\n        other_len.cmp(&self_len)\n    }\n}\n\nimpl PartialOrd for Hole {\n    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {\n        Some(self.cmp(other))\n    }\n}\n\npub(crate) struct LayerFile {\n    pub key_range: Range<Key>,\n    pub lsn_range: Range<Lsn>,\n    pub is_delta: bool,\n    pub holes: Vec<Hole>,\n}\n\nimpl LayerFile {\n    fn skips(&self, key_range: &Range<Key>) -> bool {\n        if !range_overlaps(&self.key_range, key_range) {\n            return false;\n        }\n        let start = match self\n            .holes\n            .binary_search_by_key(&key_range.start, |hole| hole.0.start)\n        {\n            Ok(index) => index,\n            Err(index) => {\n                if index == 0 {\n                    return false;\n                }\n                index - 1\n            }\n        };\n        self.holes[start].0.end >= key_range.end\n    }\n}\n\npub(crate) fn parse_filename(name: &str) -> anyhow::Result<LayerFile> {\n    let layer_name =\n        LayerName::from_str(name).map_err(|e| anyhow!(\"failed to parse layer name: {e}\"))?;\n\n    let holes = Vec::new();\n    Ok(LayerFile {\n        key_range: layer_name.key_range().clone(),\n        lsn_range: layer_name.lsn_as_range(),\n        is_delta: layer_name.is_delta(),\n        holes,\n    })\n}\n\n// Finds the max_holes largest holes, ignoring any that are smaller than MIN_HOLE_LENGTH\"\nasync fn get_holes(path: &Utf8Path, max_holes: usize, ctx: &RequestContext) -> Result<Vec<Hole>> {\n    let file = VirtualFile::open(path, ctx).await?;\n    let file_id = page_cache::next_file_id();\n    let block_reader = FileBlockReader::new(&file, file_id);\n    let summary_blk = block_reader.read_blk(0, ctx).await?;\n    let actual_summary = Summary::des_prefix(summary_blk.as_ref())?;\n    let tree_reader = DiskBtreeReader::<_, DELTA_KEY_SIZE>::new(\n        actual_summary.index_start_blk,\n        actual_summary.index_root_blk,\n        block_reader,\n    );\n    // min-heap (reserve space for one more element added before eviction)\n    let mut heap: BinaryHeap<Hole> = BinaryHeap::with_capacity(max_holes + 1);\n    let mut prev_key: Option<Key> = None;\n    tree_reader\n        .visit(\n            &[0u8; DELTA_KEY_SIZE],\n            VisitDirection::Forwards,\n            |key, _value| {\n                let curr = Key::from_slice(&key[..KEY_SIZE]);\n                if let Some(prev) = prev_key {\n                    if curr.to_i128() - prev.to_i128() >= MIN_HOLE_LENGTH {\n                        heap.push(Hole(prev..curr));\n                        if heap.len() > max_holes {\n                            heap.pop(); // remove smallest hole\n                        }\n                    }\n                }\n                prev_key = Some(curr.next());\n                true\n            },\n            ctx,\n        )\n        .await?;\n    let mut holes = heap.into_vec();\n    holes.sort_by_key(|hole| hole.0.start);\n    Ok(holes)\n}\n\npub(crate) async fn main(cmd: &AnalyzeLayerMapCmd) -> Result<()> {\n    let storage_path = &cmd.path;\n    let max_holes = cmd.max_holes.unwrap_or(DEFAULT_MAX_HOLES);\n    let ctx =\n        RequestContext::new(TaskKind::DebugTool, DownloadBehavior::Error).with_scope_debug_tools();\n\n    // Initialize virtual_file (file desriptor cache) and page cache which are needed to access layer persistent B-Tree.\n    pageserver::virtual_file::init(\n        10,\n        virtual_file::api::IoEngineKind::StdFs,\n        IoMode::preferred(),\n        virtual_file::SyncMode::Sync,\n    );\n    pageserver::page_cache::init(100);\n\n    let mut total_delta_layers = 0usize;\n    let mut total_image_layers = 0usize;\n    let mut total_excess_layers = 0usize;\n    for tenant in fs::read_dir(storage_path.join(TENANTS_SEGMENT_NAME))? {\n        let tenant = tenant?;\n        if !tenant.file_type()?.is_dir() {\n            continue;\n        }\n        for timeline in fs::read_dir(tenant.path().join(TIMELINES_SEGMENT_NAME))? {\n            let timeline = timeline?;\n            if !timeline.file_type()?.is_dir() {\n                continue;\n            }\n            // Collect sorted vec of layers and count deltas\n            let mut layers = Vec::new();\n            let mut n_deltas = 0usize;\n\n            for layer in fs::read_dir(timeline.path())? {\n                let layer = layer?;\n                if let Ok(mut layer_file) =\n                    parse_filename(&layer.file_name().into_string().unwrap())\n                {\n                    if layer_file.is_delta {\n                        let layer_path =\n                            Utf8PathBuf::from_path_buf(layer.path()).expect(\"non-Unicode path\");\n                        layer_file.holes = get_holes(&layer_path, max_holes, &ctx).await?;\n                        n_deltas += 1;\n                    }\n                    layers.push(layer_file);\n                }\n            }\n            layers.sort_by_key(|layer| layer.lsn_range.end);\n\n            // Count the number of holes and number of excess layers.\n            // Excess layer is image layer generated when holes in delta layers are not considered.\n            let mut n_excess_layers = 0usize;\n            let mut n_holes = 0usize;\n\n            for i in 0..layers.len() {\n                if !layers[i].is_delta {\n                    let mut n_deltas_since_last_image = 0usize;\n                    let mut n_skipped = 0usize;\n                    let img_key_range = &layers[i].key_range;\n                    for j in (0..i).rev() {\n                        if range_overlaps(img_key_range, &layers[j].key_range) {\n                            if layers[j].is_delta {\n                                n_deltas_since_last_image += 1;\n                                if layers[j].skips(img_key_range) {\n                                    n_skipped += 1;\n                                }\n                            } else {\n                                // Image layer is always dense, despite to the fact that it doesn't contain all possible\n                                // key values in the specified range: there are may be no keys in the storage belonging\n                                // to the image layer range but not present in the image layer.\n                                break;\n                            }\n                        }\n                    }\n                    if n_deltas_since_last_image >= 3 && n_deltas_since_last_image - n_skipped < 3 {\n                        // It is just approximation: it doesn't take in account all image coverage.\n                        // Moreover the new layer map doesn't count total deltas, but the max stack of overlapping deltas.\n                        n_excess_layers += 1;\n                    }\n                    n_holes += n_skipped;\n                }\n            }\n            println!(\n                \"Tenant {} timeline {} delta layers {} image layers {} excess layers {} holes {}\",\n                tenant.file_name().into_string().unwrap(),\n                timeline.file_name().into_string().unwrap(),\n                n_deltas,\n                layers.len() - n_deltas,\n                n_excess_layers,\n                n_holes\n            );\n            total_delta_layers += n_deltas;\n            total_image_layers += layers.len() - n_deltas;\n            total_excess_layers += n_excess_layers;\n        }\n    }\n    println!(\n        \"Total delta layers {total_delta_layers} image layers {total_image_layers} excess layers {total_excess_layers}\"\n    );\n    Ok(())\n}\n"
  },
  {
    "path": "pageserver/ctl/src/layers.rs",
    "content": "use std::fs::{self, File};\nuse std::path::{Path, PathBuf};\n\nuse anyhow::Result;\nuse camino::{Utf8Path, Utf8PathBuf};\nuse clap::Subcommand;\nuse pageserver::context::{DownloadBehavior, RequestContext};\nuse pageserver::task_mgr::TaskKind;\nuse pageserver::tenant::storage_layer::{DeltaLayer, ImageLayer, delta_layer, image_layer};\nuse pageserver::tenant::{TENANTS_SEGMENT_NAME, TIMELINES_SEGMENT_NAME};\nuse pageserver::virtual_file::api::IoMode;\nuse pageserver::{page_cache, virtual_file};\nuse pageserver_api::key::Key;\nuse utils::id::{TenantId, TimelineId};\n\nuse crate::layer_map_analyzer::{LayerFile, parse_filename};\n\n#[derive(Subcommand)]\npub(crate) enum LayerCmd {\n    /// List all tenants and timelines under the pageserver path\n    ///\n    /// Example: `cargo run --bin pagectl layer list .neon/`\n    List { path: PathBuf },\n    /// List all layers of a given tenant and timeline\n    ///\n    /// Example: `cargo run --bin pagectl layer list .neon/`\n    ListLayer {\n        path: PathBuf,\n        tenant: String,\n        timeline: String,\n        key: Option<Key>,\n    },\n    /// Dump all information of a layer file\n    DumpLayer {\n        path: PathBuf,\n        tenant: String,\n        timeline: String,\n        /// The id from list-layer command\n        id: usize,\n    },\n    /// Dump all information of a layer file locally\n    DumpLayerLocal { path: PathBuf },\n    RewriteSummary {\n        layer_file_path: Utf8PathBuf,\n        #[clap(long)]\n        new_tenant_id: Option<TenantId>,\n        #[clap(long)]\n        new_timeline_id: Option<TimelineId>,\n    },\n}\n\nasync fn read_delta_file(path: impl AsRef<Path>, ctx: &RequestContext) -> Result<()> {\n    virtual_file::init(\n        10,\n        virtual_file::api::IoEngineKind::StdFs,\n        IoMode::preferred(),\n        virtual_file::SyncMode::Sync,\n    );\n    page_cache::init(100);\n    let path = Utf8Path::from_path(path.as_ref()).expect(\"non-Unicode path\");\n    let file = File::open(path)?;\n    let delta_layer = DeltaLayer::new_for_path(path, file)?;\n    delta_layer.dump(true, ctx).await?;\n    Ok(())\n}\n\nasync fn read_image_file(path: impl AsRef<Path>, ctx: &RequestContext) -> Result<()> {\n    virtual_file::init(\n        10,\n        virtual_file::api::IoEngineKind::StdFs,\n        IoMode::preferred(),\n        virtual_file::SyncMode::Sync,\n    );\n    page_cache::init(100);\n    let path = Utf8Path::from_path(path.as_ref()).expect(\"non-Unicode path\");\n    let file = File::open(path)?;\n    let image_layer = ImageLayer::new_for_path(path, file)?;\n    image_layer.dump(true, ctx).await?;\n    Ok(())\n}\n\npub(crate) async fn main(cmd: &LayerCmd) -> Result<()> {\n    let ctx =\n        RequestContext::new(TaskKind::DebugTool, DownloadBehavior::Error).with_scope_debug_tools();\n    match cmd {\n        LayerCmd::List { path } => {\n            for tenant in fs::read_dir(path.join(TENANTS_SEGMENT_NAME))? {\n                let tenant = tenant?;\n                if !tenant.file_type()?.is_dir() {\n                    continue;\n                }\n                println!(\"tenant {}\", tenant.file_name().to_string_lossy());\n                for timeline in fs::read_dir(tenant.path().join(TIMELINES_SEGMENT_NAME))? {\n                    let timeline = timeline?;\n                    if !timeline.file_type()?.is_dir() {\n                        continue;\n                    }\n                    println!(\"- timeline {}\", timeline.file_name().to_string_lossy());\n                }\n            }\n            Ok(())\n        }\n        LayerCmd::ListLayer {\n            path,\n            tenant,\n            timeline,\n            key,\n        } => {\n            let timeline_path = path\n                .join(TENANTS_SEGMENT_NAME)\n                .join(tenant)\n                .join(TIMELINES_SEGMENT_NAME)\n                .join(timeline);\n            let mut idx = 0;\n            let mut to_print = Vec::default();\n            for layer in fs::read_dir(timeline_path)? {\n                let layer = layer?;\n                if let Ok(layer_file) = parse_filename(&layer.file_name().into_string().unwrap()) {\n                    if let Some(key) = key {\n                        if layer_file.key_range.start <= *key && *key < layer_file.key_range.end {\n                            to_print.push((idx, layer_file));\n                        }\n                    } else {\n                        to_print.push((idx, layer_file));\n                    }\n                    idx += 1;\n                }\n            }\n\n            if key.is_some() {\n                to_print\n                    .sort_by_key(|(_idx, layer_file)| std::cmp::Reverse(layer_file.lsn_range.end));\n            }\n\n            for (idx, layer_file) in to_print {\n                print_layer_file(idx, &layer_file);\n            }\n            Ok(())\n        }\n        LayerCmd::DumpLayer {\n            path,\n            tenant,\n            timeline,\n            id,\n        } => {\n            let timeline_path = path\n                .join(\"tenants\")\n                .join(tenant)\n                .join(\"timelines\")\n                .join(timeline);\n            let mut idx = 0;\n            for layer in fs::read_dir(timeline_path)? {\n                let layer = layer?;\n                if let Ok(layer_file) = parse_filename(&layer.file_name().into_string().unwrap()) {\n                    if *id == idx {\n                        print_layer_file(idx, &layer_file);\n\n                        if layer_file.is_delta {\n                            read_delta_file(layer.path(), &ctx).await?;\n                        } else {\n                            read_image_file(layer.path(), &ctx).await?;\n                        }\n\n                        break;\n                    }\n                    idx += 1;\n                }\n            }\n            Ok(())\n        }\n        LayerCmd::DumpLayerLocal { path } => {\n            if let Ok(layer_file) = parse_filename(path.file_name().unwrap().to_str().unwrap()) {\n                print_layer_file(0, &layer_file);\n\n                if layer_file.is_delta {\n                    read_delta_file(path, &ctx).await?;\n                } else {\n                    read_image_file(path, &ctx).await?;\n                }\n            }\n            Ok(())\n        }\n        LayerCmd::RewriteSummary {\n            layer_file_path,\n            new_tenant_id,\n            new_timeline_id,\n        } => {\n            pageserver::virtual_file::init(\n                10,\n                virtual_file::api::IoEngineKind::StdFs,\n                IoMode::preferred(),\n                virtual_file::SyncMode::Sync,\n            );\n            pageserver::page_cache::init(100);\n\n            let ctx = RequestContext::new(TaskKind::DebugTool, DownloadBehavior::Error)\n                .with_scope_debug_tools();\n\n            macro_rules! rewrite_closure {\n                ($($summary_ty:tt)*) => {{\n                    |summary| $($summary_ty)* {\n                        tenant_id: new_tenant_id.unwrap_or(summary.tenant_id),\n                        timeline_id: new_timeline_id.unwrap_or(summary.timeline_id),\n                        ..summary\n                    }\n                }};\n            }\n\n            let res = ImageLayer::rewrite_summary(\n                layer_file_path,\n                rewrite_closure!(image_layer::Summary),\n                &ctx,\n            )\n            .await;\n            match res {\n                Ok(()) => {\n                    println!(\"Successfully rewrote summary of image layer {layer_file_path}\");\n                    return Ok(());\n                }\n                Err(image_layer::RewriteSummaryError::MagicMismatch) => (), // fallthrough\n                Err(image_layer::RewriteSummaryError::Other(e)) => {\n                    return Err(e);\n                }\n            }\n\n            let res = DeltaLayer::rewrite_summary(\n                layer_file_path,\n                rewrite_closure!(delta_layer::Summary),\n                &ctx,\n            )\n            .await;\n            match res {\n                Ok(()) => {\n                    println!(\"Successfully rewrote summary of delta layer {layer_file_path}\");\n                    return Ok(());\n                }\n                Err(delta_layer::RewriteSummaryError::MagicMismatch) => (), // fallthrough\n                Err(delta_layer::RewriteSummaryError::Other(e)) => {\n                    return Err(e);\n                }\n            }\n\n            anyhow::bail!(\"not an image or delta layer: {layer_file_path}\");\n        }\n    }\n}\n\nfn print_layer_file(idx: usize, layer_file: &LayerFile) {\n    println!(\n        \"[{:3}]  key:{}-{}\\n       lsn:{}-{}\\n       delta:{}\",\n        idx,\n        layer_file.key_range.start,\n        layer_file.key_range.end,\n        layer_file.lsn_range.start,\n        layer_file.lsn_range.end,\n        layer_file.is_delta,\n    );\n}\n"
  },
  {
    "path": "pageserver/ctl/src/main.rs",
    "content": "//! A helper tool to manage pageserver binary files.\n//! Accepts a file as an argument, attempts to parse it with all ways possible\n//! and prints its interpreted context.\n//!\n//! Separate, `metadata` subcommand allows to print and update pageserver's metadata file.\n\nmod download_remote_object;\nmod draw_timeline_dir;\nmod index_part;\nmod key;\nmod layer_map_analyzer;\nmod layers;\nmod page_trace;\n\nuse std::str::FromStr;\nuse std::time::{Duration, SystemTime};\n\nuse camino::{Utf8Path, Utf8PathBuf};\nuse clap::{Parser, Subcommand};\nuse download_remote_object::DownloadRemoteObjectCmd;\nuse index_part::IndexPartCmd;\nuse layers::LayerCmd;\nuse page_trace::PageTraceCmd;\nuse pageserver::context::{DownloadBehavior, RequestContext};\nuse pageserver::page_cache;\nuse pageserver::task_mgr::TaskKind;\nuse pageserver::tenant::dump_layerfile_from_path;\nuse pageserver::tenant::metadata::TimelineMetadata;\nuse pageserver::virtual_file::api::IoMode;\nuse pageserver::virtual_file::{self};\nuse pageserver_api::shard::TenantShardId;\nuse postgres_ffi::ControlFileData;\nuse remote_storage::{RemotePath, RemoteStorageConfig};\nuse tokio_util::sync::CancellationToken;\nuse utils::id::TimelineId;\nuse utils::logging::{self, LogFormat, TracingErrorLayerEnablement};\nuse utils::lsn::Lsn;\nuse utils::project_git_version;\n\nproject_git_version!(GIT_VERSION);\n\n#[derive(Parser)]\n#[command(\n    version = GIT_VERSION,\n    about = \"Neon Pageserver binutils\",\n    long_about = \"Reads pageserver (and related) binary files management utility\"\n)]\n#[command(propagate_version = true)]\nstruct CliOpts {\n    #[command(subcommand)]\n    command: Commands,\n}\n\n#[derive(Subcommand)]\nenum Commands {\n    Metadata(MetadataCmd),\n    #[command(subcommand)]\n    IndexPart(IndexPartCmd),\n    PrintLayerFile(PrintLayerFileCmd),\n    TimeTravelRemotePrefix(TimeTravelRemotePrefixCmd),\n    DrawTimeline {},\n    AnalyzeLayerMap(AnalyzeLayerMapCmd),\n    #[command(subcommand)]\n    Layer(LayerCmd),\n    /// Debug print a hex key found from logs\n    Key(key::DescribeKeyCommand),\n    PageTrace(PageTraceCmd),\n    DownloadRemoteObject(DownloadRemoteObjectCmd),\n}\n\n/// Read and update pageserver metadata file\n#[derive(Parser)]\nstruct MetadataCmd {\n    /// Input metadata file path\n    metadata_path: Utf8PathBuf,\n    /// Replace disk consistent Lsn\n    disk_consistent_lsn: Option<Lsn>,\n    /// Replace previous record Lsn\n    prev_record_lsn: Option<Lsn>,\n    /// Replace latest gc cuttoff\n    latest_gc_cuttoff: Option<Lsn>,\n}\n\n#[derive(Parser)]\nstruct PrintLayerFileCmd {\n    /// Pageserver data path\n    path: Utf8PathBuf,\n}\n\n/// Roll back the time for the specified prefix using S3 history.\n///\n/// The command is fairly low level and powerful. Validation is only very light,\n/// so it is more powerful, and thus potentially more dangerous.\n#[derive(Parser)]\nstruct TimeTravelRemotePrefixCmd {\n    /// A configuration string for the remote_storage configuration.\n    ///\n    /// Example: `remote_storage = { bucket_name = \"aws-storage-bucket-name\", bucket_region = \"us-east-2\" }`\n    config_toml_str: String,\n    /// remote prefix to time travel recover. For safety reasons, we require it to contain\n    /// a timeline or tenant ID in the prefix.\n    prefix: String,\n    /// Timestamp to travel to. Given in format like `2024-01-20T10:45:45Z`. Assumes UTC and second accuracy.\n    travel_to: String,\n    /// Timestamp of the start of the operation, must be after any changes we want to roll back and after.\n    /// You can use a few seconds before invoking the command. Same format as `travel_to`.\n    done_if_after: Option<String>,\n}\n\n#[derive(Parser)]\nstruct AnalyzeLayerMapCmd {\n    /// Pageserver data path\n    path: Utf8PathBuf,\n    /// Max holes\n    max_holes: Option<usize>,\n}\n\n#[tokio::main]\nasync fn main() -> anyhow::Result<()> {\n    logging::init(\n        LogFormat::Plain,\n        TracingErrorLayerEnablement::EnableWithRustLogFilter,\n        logging::Output::Stdout,\n    )?;\n\n    logging::replace_panic_hook_with_tracing_panic_hook().forget();\n\n    let cli = CliOpts::parse();\n\n    match cli.command {\n        Commands::Layer(cmd) => {\n            layers::main(&cmd).await?;\n        }\n        Commands::Metadata(cmd) => {\n            handle_metadata(&cmd)?;\n        }\n        Commands::IndexPart(cmd) => {\n            index_part::main(&cmd).await?;\n        }\n        Commands::DrawTimeline {} => {\n            draw_timeline_dir::main()?;\n        }\n        Commands::AnalyzeLayerMap(cmd) => {\n            layer_map_analyzer::main(&cmd).await?;\n        }\n        Commands::PrintLayerFile(cmd) => {\n            if let Err(e) = read_pg_control_file(&cmd.path) {\n                println!(\n                    \"Failed to read input file as a pg control one: {e:#}\\n\\\n                    Attempting to read it as layer file\"\n                );\n                print_layerfile(&cmd.path).await?;\n            }\n        }\n        Commands::TimeTravelRemotePrefix(cmd) => {\n            let timestamp = humantime::parse_rfc3339(&cmd.travel_to)\n                .map_err(|_e| anyhow::anyhow!(\"Invalid time for travel_to: '{}'\", cmd.travel_to))?;\n\n            let done_if_after = if let Some(done_if_after) = &cmd.done_if_after {\n                humantime::parse_rfc3339(done_if_after).map_err(|_e| {\n                    anyhow::anyhow!(\"Invalid time for done_if_after: '{}'\", done_if_after)\n                })?\n            } else {\n                const SAFETY_MARGIN: Duration = Duration::from_secs(3);\n                tokio::time::sleep(SAFETY_MARGIN).await;\n                // Convert to string representation and back to get rid of sub-second values\n                let done_if_after = SystemTime::now();\n                tokio::time::sleep(SAFETY_MARGIN).await;\n                done_if_after\n            };\n\n            let timestamp = strip_subsecond(timestamp);\n            let done_if_after = strip_subsecond(done_if_after);\n\n            let Some(prefix) = validate_prefix(&cmd.prefix) else {\n                println!(\"specified prefix '{}' failed validation\", cmd.prefix);\n                return Ok(());\n            };\n            let config = RemoteStorageConfig::from_toml_str(&cmd.config_toml_str)?;\n            let storage = remote_storage::GenericRemoteStorage::from_config(&config).await;\n            let cancel = CancellationToken::new();\n            // Complexity limit: as we are running this command locally, we should have a lot of memory available, and we do not\n            // need to limit the number of versions we are going to delete.\n            storage\n                .unwrap()\n                .time_travel_recover(Some(&prefix), timestamp, done_if_after, &cancel, None)\n                .await?;\n        }\n        Commands::Key(dkc) => dkc.execute(),\n        Commands::PageTrace(cmd) => page_trace::main(&cmd)?,\n        Commands::DownloadRemoteObject(cmd) => {\n            download_remote_object::main(&cmd).await?;\n        }\n    };\n    Ok(())\n}\n\nfn read_pg_control_file(control_file_path: &Utf8Path) -> anyhow::Result<()> {\n    let control_file = ControlFileData::decode(&std::fs::read(control_file_path)?)?;\n    println!(\"{control_file:?}\");\n    let control_file_initdb = Lsn(control_file.checkPoint);\n    println!(\n        \"pg_initdb_lsn: {}, aligned: {}\",\n        control_file_initdb,\n        control_file_initdb.align()\n    );\n    Ok(())\n}\n\nasync fn print_layerfile(path: &Utf8Path) -> anyhow::Result<()> {\n    // Basic initialization of things that don't change after startup\n    virtual_file::init(\n        10,\n        virtual_file::api::IoEngineKind::StdFs,\n        IoMode::preferred(),\n        virtual_file::SyncMode::Sync,\n    );\n    page_cache::init(100);\n    let ctx =\n        RequestContext::new(TaskKind::DebugTool, DownloadBehavior::Error).with_scope_debug_tools();\n    dump_layerfile_from_path(path, true, &ctx).await\n}\n\nfn handle_metadata(\n    MetadataCmd {\n        metadata_path: path,\n        disk_consistent_lsn,\n        prev_record_lsn,\n        latest_gc_cuttoff,\n    }: &MetadataCmd,\n) -> Result<(), anyhow::Error> {\n    let metadata_bytes = std::fs::read(path)?;\n    let mut meta = TimelineMetadata::from_bytes(&metadata_bytes)?;\n    println!(\"Current metadata:\\n{meta:?}\");\n    let mut update_meta = false;\n    // TODO: simplify this part\n    if let Some(disk_consistent_lsn) = disk_consistent_lsn {\n        meta = TimelineMetadata::new(\n            *disk_consistent_lsn,\n            meta.prev_record_lsn(),\n            meta.ancestor_timeline(),\n            meta.ancestor_lsn(),\n            meta.latest_gc_cutoff_lsn(),\n            meta.initdb_lsn(),\n            meta.pg_version(),\n        );\n        update_meta = true;\n    }\n    if let Some(prev_record_lsn) = prev_record_lsn {\n        meta = TimelineMetadata::new(\n            meta.disk_consistent_lsn(),\n            Some(*prev_record_lsn),\n            meta.ancestor_timeline(),\n            meta.ancestor_lsn(),\n            meta.latest_gc_cutoff_lsn(),\n            meta.initdb_lsn(),\n            meta.pg_version(),\n        );\n        update_meta = true;\n    }\n    if let Some(latest_gc_cuttoff) = latest_gc_cuttoff {\n        meta = TimelineMetadata::new(\n            meta.disk_consistent_lsn(),\n            meta.prev_record_lsn(),\n            meta.ancestor_timeline(),\n            meta.ancestor_lsn(),\n            *latest_gc_cuttoff,\n            meta.initdb_lsn(),\n            meta.pg_version(),\n        );\n        update_meta = true;\n    }\n\n    if update_meta {\n        let metadata_bytes = meta.to_bytes()?;\n        std::fs::write(path, metadata_bytes)?;\n    }\n\n    Ok(())\n}\n\n/// Ensures that the given S3 prefix is sufficiently constrained.\n/// The command is very risky already and we don't want to expose something\n/// that allows usually unintentional and quite catastrophic time travel of\n/// an entire bucket, which would be a major catastrophy and away\n/// by only one character change (similar to \"rm -r /home /username/foobar\").\nfn validate_prefix(prefix: &str) -> Option<RemotePath> {\n    if prefix.is_empty() {\n        // Empty prefix means we want to specify the *whole* bucket\n        return None;\n    }\n    let components = prefix.split('/').collect::<Vec<_>>();\n    let (last, components) = {\n        let last = components.last()?;\n        if last.is_empty() {\n            (\n                components.iter().nth_back(1)?,\n                &components[..(components.len() - 1)],\n            )\n        } else {\n            (last, &components[..])\n        }\n    };\n    'valid: {\n        if let Ok(_timeline_id) = TimelineId::from_str(last) {\n            // Ends in either a tenant or timeline ID\n            break 'valid;\n        }\n        if *last == \"timelines\" {\n            if let Some(before_last) = components.iter().nth_back(1) {\n                if let Ok(_tenant_id) = TenantShardId::from_str(before_last) {\n                    // Has a valid tenant id\n                    break 'valid;\n                }\n            }\n        }\n\n        return None;\n    }\n    RemotePath::from_string(prefix).ok()\n}\n\nfn strip_subsecond(timestamp: SystemTime) -> SystemTime {\n    let ts_str = humantime::format_rfc3339_seconds(timestamp).to_string();\n    humantime::parse_rfc3339(&ts_str).expect(\"can't parse just created timestamp\")\n}\n\n#[cfg(test)]\nmod tests {\n    use super::*;\n\n    #[test]\n    fn test_validate_prefix() {\n        assert_eq!(validate_prefix(\"\"), None);\n        assert_eq!(validate_prefix(\"/\"), None);\n        #[track_caller]\n        fn assert_valid(prefix: &str) {\n            let remote_path = RemotePath::from_string(prefix).unwrap();\n            assert_eq!(validate_prefix(prefix), Some(remote_path));\n        }\n        assert_valid(\"wal/3aa8fcc61f6d357410b7de754b1d9001/641e5342083b2235ee3deb8066819683/\");\n        // Path is not relative but absolute\n        assert_eq!(\n            validate_prefix(\n                \"/wal/3aa8fcc61f6d357410b7de754b1d9001/641e5342083b2235ee3deb8066819683/\"\n            ),\n            None\n        );\n        assert_valid(\"wal/3aa8fcc61f6d357410b7de754b1d9001/\");\n        // Partial tenant IDs should be invalid, S3 will match all tenants with the specific ID prefix\n        assert_eq!(validate_prefix(\"wal/3aa8fcc61f6d357410b7d\"), None);\n        assert_eq!(validate_prefix(\"wal\"), None);\n        assert_eq!(validate_prefix(\"/wal/\"), None);\n        assert_valid(\"pageserver/v1/tenants/3aa8fcc61f6d357410b7de754b1d9001\");\n        // Partial tenant ID\n        assert_eq!(\n            validate_prefix(\"pageserver/v1/tenants/3aa8fcc61f6d357410b\"),\n            None\n        );\n        assert_valid(\"pageserver/v1/tenants/3aa8fcc61f6d357410b7de754b1d9001/timelines\");\n        assert_valid(\"pageserver/v1/tenants/3aa8fcc61f6d357410b7de754b1d9001-0004/timelines\");\n        assert_valid(\"pageserver/v1/tenants/3aa8fcc61f6d357410b7de754b1d9001/timelines/\");\n        assert_valid(\n            \"pageserver/v1/tenants/3aa8fcc61f6d357410b7de754b1d9001/timelines/641e5342083b2235ee3deb8066819683\",\n        );\n        assert_eq!(validate_prefix(\"pageserver/v1/tenants/\"), None);\n    }\n}\n"
  },
  {
    "path": "pageserver/ctl/src/page_trace.rs",
    "content": "use std::collections::HashMap;\nuse std::io::BufReader;\n\nuse camino::Utf8PathBuf;\nuse clap::Parser;\nuse itertools::Itertools as _;\nuse pageserver_api::key::{CompactKey, Key};\nuse pageserver_api::models::PageTraceEvent;\nuse pageserver_api::reltag::RelTag;\n\n/// Parses a page trace (as emitted by the `page_trace` timeline API), and outputs stats.\n#[derive(Parser)]\npub(crate) struct PageTraceCmd {\n    /// Trace input file.\n    path: Utf8PathBuf,\n}\n\npub(crate) fn main(cmd: &PageTraceCmd) -> anyhow::Result<()> {\n    let mut file = BufReader::new(std::fs::OpenOptions::new().read(true).open(&cmd.path)?);\n    let mut events: Vec<PageTraceEvent> = Vec::new();\n    loop {\n        match bincode::deserialize_from(&mut file) {\n            Ok(event) => events.push(event),\n            Err(err) => {\n                if let bincode::ErrorKind::Io(ref err) = *err {\n                    if err.kind() == std::io::ErrorKind::UnexpectedEof {\n                        break;\n                    }\n                }\n                return Err(err.into());\n            }\n        }\n    }\n\n    let mut reads_by_relation: HashMap<RelTag, i64> = HashMap::new();\n    let mut reads_by_key: HashMap<CompactKey, i64> = HashMap::new();\n\n    for event in events {\n        let key = Key::from_compact(event.key);\n        let reltag = RelTag {\n            spcnode: key.field2,\n            dbnode: key.field3,\n            relnode: key.field4,\n            forknum: key.field5,\n        };\n\n        *reads_by_relation.entry(reltag).or_default() += 1;\n        *reads_by_key.entry(event.key).or_default() += 1;\n    }\n\n    let multi_read_keys = reads_by_key\n        .into_iter()\n        .filter(|(_, count)| *count > 1)\n        .sorted_by_key(|(key, count)| (-*count, *key))\n        .collect_vec();\n\n    println!(\"Multi-read keys: {}\", multi_read_keys.len());\n    for (key, count) in multi_read_keys {\n        println!(\"  {key}: {count}\");\n    }\n\n    let reads_by_relation = reads_by_relation\n        .into_iter()\n        .sorted_by_key(|(rel, count)| (-*count, *rel))\n        .collect_vec();\n\n    println!(\"Reads by relation:\");\n    for (reltag, count) in reads_by_relation {\n        println!(\"  {reltag}: {count}\");\n    }\n\n    Ok(())\n}\n"
  },
  {
    "path": "pageserver/page_api/Cargo.toml",
    "content": "[package]\nname = \"pageserver_page_api\"\nversion = \"0.1.0\"\nedition.workspace = true\nlicense.workspace = true\n\n[dependencies]\nanyhow.workspace = true\nbytes.workspace = true\nfutures.workspace = true\npageserver_api.workspace = true\npostgres_ffi_types.workspace = true\nprost.workspace = true\nprost-types.workspace = true\nstrum.workspace = true\nstrum_macros.workspace = true\nthiserror.workspace = true\ntokio.workspace = true\ntokio-util.workspace = true\ntonic.workspace = true\nutils.workspace = true\nworkspace_hack.workspace = true\n\n[build-dependencies]\ntonic-build.workspace = true\n"
  },
  {
    "path": "pageserver/page_api/build.rs",
    "content": "use std::env;\nuse std::path::PathBuf;\n\n/// Generates Rust code from .proto Protobuf schemas, along with a binary file\n/// descriptor set for Protobuf schema reflection.\nfn main() -> Result<(), Box<dyn std::error::Error>> {\n    let out_dir = PathBuf::from(env::var(\"OUT_DIR\")?);\n    tonic_build::configure()\n        .bytes([\".\"])\n        .file_descriptor_set_path(out_dir.join(\"page_api_descriptor.bin\"))\n        .compile_protos(&[\"proto/page_service.proto\"], &[\"proto\"])\n        .map_err(|err| err.into())\n}\n"
  },
  {
    "path": "pageserver/page_api/proto/page_service.proto",
    "content": "// Page service, presented by pageservers for computes.\n//\n// This is the compute read path. It primarily serves page versions at given\n// LSNs, but also base backups, SLRU segments, and relation metadata.\n//\n// EXPERIMENTAL: this is still under development and subject to change.\n//\n// Request metadata headers:\n// - authorization: JWT token (\"Bearer <token>\"), if auth is enabled\n// - neon-tenant-id: tenant ID (\"7c4a1f9e3bd6470c8f3e21a65bd2e980\")\n// - neon-shard-id: shard ID, as <number><count> in hex (\"0b10\" = shard 11 of 16, 0-based)\n// - neon-timeline-id: timeline ID (\"f08c4e9a2d5f76b1e3a7c2d8910f4b3e\")\n//\n// The service can be accessed via e.g. grpcurl:\n//\n//    ```\n//    grpcurl \\\n//      -plaintext \\\n//      -H \"neon-tenant-id: 7c4a1f9e3bd6470c8f3e21a65bd2e980\" \\\n//      -H \"neon-shard-id: 0000\" \\\n//      -H \"neon-timeline-id: f08c4e9a2d5f76b1e3a7c2d8910f4b3e\" \\\n//      -H \"authorization: Bearer $JWT\" \\\n//      -d '{\"read_lsn\": {\"request_lsn\": 100000000, \"not_modified_since_lsn\": 1}, \"db_oid\": 1}' \\\n//      localhost:51051 page_api.PageService/GetDbSize\n//    ```\n//\n// TODO: consider adding neon-compute-mode (\"primary\", \"static\", \"replica\").\n// However, this will require reconnecting when changing modes.\n//\n// TODO: write implementation guidance on\n// - Health checks\n// - Tracing, OpenTelemetry\n// - Compression\n\nsyntax = \"proto3\";\npackage page_api;\n\nimport \"google/protobuf/timestamp.proto\";\n\nservice PageService {\n  // NB: unlike libpq, there is no CheckRelExists in gRPC, at the compute team's request. Instead,\n  // use GetRelSize with allow_missing=true to check existence.\n\n  // Fetches a base backup.\n  rpc GetBaseBackup (GetBaseBackupRequest) returns (stream GetBaseBackupResponseChunk);\n\n  // Returns the total size of a database, as # of bytes.\n  rpc GetDbSize (GetDbSizeRequest) returns (GetDbSizeResponse);\n\n  // Fetches pages.\n  //\n  // This is implemented as a bidirectional streaming RPC for performance. Unary\n  // requests incur costs for e.g. HTTP/2 stream setup, header parsing,\n  // authentication, and so on -- with streaming, we only pay these costs during\n  // the initial stream setup. This ~doubles throughput in benchmarks. Other\n  // RPCs use regular unary requests, since they are not as frequent and\n  // performance-critical, and this simplifies implementation.\n  //\n  // NB: a gRPC status response (e.g. errors) will terminate the stream. The\n  // stream may be shared by multiple Postgres backends, so we avoid this by\n  // sending them as GetPageResponse.status_code instead.\n  rpc GetPages (stream GetPageRequest) returns (stream GetPageResponse);\n\n  // Returns the size of a relation, as # of blocks.\n  rpc GetRelSize (GetRelSizeRequest) returns (GetRelSizeResponse);\n\n  // Fetches an SLRU segment.\n  rpc GetSlruSegment (GetSlruSegmentRequest) returns (GetSlruSegmentResponse);\n\n  // Acquires or extends a lease on the given LSN. This guarantees that the Pageserver won't garbage\n  // collect the LSN until the lease expires. Must be acquired on all relevant shards.\n  rpc LeaseLsn (LeaseLsnRequest) returns (LeaseLsnResponse);\n}\n\n// The LSN a request should read at.\nmessage ReadLsn {\n  // The request's read LSN. Required.\n  uint64 request_lsn = 1;\n  // If given, the caller guarantees that the page has not been modified since\n  // this LSN. Must be smaller than or equal to request_lsn. This allows the\n  // Pageserver to serve an old page without waiting for the request LSN to\n  // arrive. Valid for all request types.\n  //\n  // It is undefined behaviour to make a request such that the page was, in\n  // fact, modified between request_lsn and not_modified_since_lsn. The\n  // Pageserver might detect it and return an error, or it might return the old\n  // page version or the new page version. Setting not_modified_since_lsn equal\n  // to request_lsn is always safe, but can lead to unnecessary waiting.\n  uint64 not_modified_since_lsn = 2;\n}\n\n// A relation identifier.\nmessage RelTag {\n    uint32 spc_oid = 1;\n    uint32 db_oid = 2;\n    uint32 rel_number = 3;\n    uint32 fork_number = 4;\n}\n\n// Requests a base backup.\nmessage GetBaseBackupRequest {\n  // The LSN to fetch the base backup at. 0 or absent means the latest LSN known to the Pageserver.\n  uint64 lsn = 1;\n  // If true, logical replication slots will not be created.\n  bool replica = 2;\n  // If true, include relation files in the base backup. Mainly for debugging and tests.\n  bool full = 3;\n  // Compression algorithm to use. Base backups send a compressed payload instead of using gRPC\n  // compression, so that we can cache compressed backups on the server.\n  BaseBackupCompression compression = 4;\n}\n\n// Base backup compression algorithms.\nenum BaseBackupCompression {\n  // Unknown algorithm. Used when clients send an unsupported algorithm.\n  BASE_BACKUP_COMPRESSION_UNKNOWN = 0;\n  // No compression.\n  BASE_BACKUP_COMPRESSION_NONE = 1;\n  // GZIP compression.\n  BASE_BACKUP_COMPRESSION_GZIP = 2;\n}\n\n// Base backup response chunk, returned as an ordered stream.\nmessage GetBaseBackupResponseChunk {\n  // A basebackup data chunk. The size is undefined, but bounded by the 4 MB\n  // gRPC message size limit.\n  bytes chunk = 1;\n}\n\n// Requests the size of a database, as # of bytes. Only valid on shard 0, other\n// shards will error.\nmessage GetDbSizeRequest {\n  ReadLsn read_lsn = 1;\n  uint32 db_oid = 2;\n}\n\nmessage GetDbSizeResponse {\n  uint64 num_bytes = 1;\n}\n\n// Requests one or more pages.\nmessage GetPageRequest {\n  // A request ID. Will be included in the response. Should be unique for\n  // in-flight requests on the stream.\n  RequestID request_id = 1;\n  // The request class.\n  GetPageClass request_class = 2;\n  // The LSN to read at.\n  ReadLsn read_lsn = 3;\n  // The relation to read from.\n  RelTag rel = 4;\n  // Page numbers to read. Must belong to the remote shard.\n  //\n  // Multiple pages will be executed as a single batch by the Pageserver,\n  // amortizing layer access costs and parallelizing them. This may increase the\n  // latency of any individual request, but improves the overall latency and\n  // throughput of the batch as a whole.\n  //\n  // TODO: this causes an allocation in the common single-block case. The sender\n  // can use a SmallVec to stack-allocate it, but Prost will always deserialize\n  // into a heap-allocated Vec. Consider optimizing this.\n  //\n  // TODO: we might be able to avoid a sort or something if we mandate that these\n  // are always in order. But we can't currenly rely on this on the server, because\n  // of compatibility with the libpq protocol handler.\n  repeated uint32 block_number = 5;\n}\n\n// A Request ID. Should be unique for in-flight requests on a stream. Included in the response.\nmessage RequestID {\n  // The base request ID.\n  uint64 id = 1;\n  // The request attempt. Starts at 0, incremented on each retry.\n  uint32 attempt = 2;\n}\n\n// A GetPageRequest class. Primarily intended for observability, but may also be\n// used for prioritization in the future.\nenum GetPageClass {\n  // Unknown class. For backwards compatibility: used when an older client version sends a class\n  // that a newer server version has removed.\n  GET_PAGE_CLASS_UNKNOWN = 0;\n  // A normal request. This is the default.\n  GET_PAGE_CLASS_NORMAL = 1;\n  // A prefetch request. NB: can only be classified on pg < 18.\n  GET_PAGE_CLASS_PREFETCH = 2;\n  // A background request (e.g. vacuum).\n  GET_PAGE_CLASS_BACKGROUND = 3;\n}\n\n// A GetPage response.\n//\n// A batch response will contain all of the requested pages. We could eagerly\n// emit individual pages as soon as they are ready, but on a readv() Postgres\n// holds buffer pool locks on all pages in the batch and we'll only return once\n// the entire batch is ready, so no one can make use of the individual pages.\nmessage GetPageResponse {\n  // The original request's ID.\n  RequestID request_id = 1;\n  // The response status code. If not OK, the rel and page fields will be empty.\n  GetPageStatusCode status_code = 2;\n  // A string describing the status, if any.\n  string reason = 3;\n  // The relation that the pages belong to.\n  RelTag rel = 4;\n  // The page(s), in the same order as the request.\n  repeated Page page = 5;\n}\n\n// A page.\n//\n// TODO: it would be slightly more efficient (but less convenient) to have separate arrays of block\n// numbers and images, but given the 8KB page size it's probably negligible. Benchmark it anyway.\nmessage Page {\n  // The page number.\n  uint32 block_number = 1;\n  // The materialized page image, as an 8KB byte vector.\n  bytes image = 2;\n}\n\n// A GetPageResponse status code.\n//\n// These are effectively equivalent to gRPC statuses. However, we use a bidirectional stream\n// (potentially shared by many backends), and a gRPC status response would terminate the stream so\n// we send GetPageResponse messages with these codes instead.\nenum GetPageStatusCode {\n  // Unknown status. For forwards compatibility: used when an older client version receives a new\n  // status code from a newer server version.\n  GET_PAGE_STATUS_CODE_UNKNOWN = 0;\n  // The request was successful.\n  GET_PAGE_STATUS_CODE_OK = 1;\n  // The page did not exist. The tenant/timeline/shard has already been\n  // validated during stream setup.\n  GET_PAGE_STATUS_CODE_NOT_FOUND = 2;\n  // The request was invalid.\n  GET_PAGE_STATUS_CODE_INVALID_REQUEST = 3;\n  // The request failed due to an internal server error.\n  GET_PAGE_STATUS_CODE_INTERNAL_ERROR = 4;\n  // The tenant is rate limited. Slow down and retry later.\n  GET_PAGE_STATUS_CODE_SLOW_DOWN = 5;\n  // NB: shutdown errors are emitted as a gRPC Unavailable status.\n  //\n  // TODO: consider adding a GET_PAGE_STATUS_CODE_LAYER_DOWNLOAD in the case of a layer download.\n  // This could free up the server task to process other requests while the download is in progress.\n}\n\n// Fetches the size of a relation at a given LSN, as # of blocks. Only valid on\n// shard 0, other shards will error.\nmessage GetRelSizeRequest {\n  ReadLsn read_lsn = 1;\n  RelTag rel = 2;\n  // If true, return missing=true for missing relations instead of a NotFound error.\n  bool allow_missing = 3;\n}\n\nmessage GetRelSizeResponse {\n  // The number of blocks in the relation.\n  uint32 num_blocks = 1;\n  // If allow_missing=true, this is true for missing relations.\n  bool missing = 2;\n}\n\n// Requests an SLRU segment. Only valid on shard 0, other shards will error.\nmessage GetSlruSegmentRequest {\n  ReadLsn read_lsn = 1;\n  uint32 kind = 2;\n  uint32 segno = 3;\n}\n\n// Returns an SLRU segment.\n//\n// These are up 32 pages (256 KB), so we can send them as a single response.\nmessage GetSlruSegmentResponse {\n  bytes segment = 1;\n}\n\n// Acquires or extends a lease on the given LSN. This guarantees that the Pageserver won't garbage\n// collect the LSN until the lease expires. Must be acquired on all relevant shards.\nmessage LeaseLsnRequest {\n  // The LSN to lease. Can't be 0 or below the current GC cutoff.\n  uint64 lsn = 1;\n}\n\n// Lease acquisition response. If the lease could not be granted because the LSN has already been\n// garbage collected, a FailedPrecondition status will be returned instead.\nmessage LeaseLsnResponse {\n  // The lease expiration time.\n  google.protobuf.Timestamp expires = 1;\n}\n"
  },
  {
    "path": "pageserver/page_api/src/client.rs",
    "content": "use anyhow::Context as _;\nuse futures::future::ready;\nuse futures::{Stream, StreamExt as _, TryStreamExt as _};\nuse tokio::io::AsyncRead;\nuse tokio_util::io::StreamReader;\nuse tonic::codec::CompressionEncoding;\nuse tonic::metadata::AsciiMetadataValue;\nuse tonic::service::Interceptor;\nuse tonic::service::interceptor::InterceptedService;\nuse tonic::transport::{Channel, Endpoint};\n\nuse utils::id::{TenantId, TimelineId};\nuse utils::shard::ShardIndex;\n\nuse crate::model::*;\nuse crate::proto;\n\n/// A basic Pageserver gRPC client, for a single tenant shard. This API uses native Rust domain\n/// types from `model` rather than generated Protobuf types.\npub struct Client {\n    inner: proto::PageServiceClient<InterceptedService<Channel, AuthInterceptor>>,\n}\n\nimpl Client {\n    /// Connects to the given gRPC endpoint.\n    pub async fn connect<E>(\n        endpoint: E,\n        tenant_id: TenantId,\n        timeline_id: TimelineId,\n        shard_id: ShardIndex,\n        auth_token: Option<String>,\n        compression: Option<CompressionEncoding>,\n    ) -> anyhow::Result<Self>\n    where\n        E: TryInto<Endpoint> + Send + Sync + 'static,\n        <E as TryInto<Endpoint>>::Error: std::error::Error + Send + Sync,\n    {\n        let endpoint: Endpoint = endpoint.try_into().context(\"invalid endpoint\")?;\n        let channel = endpoint.connect().await?;\n        Self::new(\n            channel,\n            tenant_id,\n            timeline_id,\n            shard_id,\n            auth_token,\n            compression,\n        )\n    }\n\n    /// Creates a new client using the given gRPC channel.\n    pub fn new(\n        channel: Channel,\n        tenant_id: TenantId,\n        timeline_id: TimelineId,\n        shard_id: ShardIndex,\n        auth_token: Option<String>,\n        compression: Option<CompressionEncoding>,\n    ) -> anyhow::Result<Self> {\n        let auth = AuthInterceptor::new(tenant_id, timeline_id, shard_id, auth_token)?;\n        let mut inner = proto::PageServiceClient::with_interceptor(channel, auth);\n\n        if let Some(compression) = compression {\n            // TODO: benchmark this (including network latency).\n            inner = inner\n                .accept_compressed(compression)\n                .send_compressed(compression);\n        }\n\n        Ok(Self { inner })\n    }\n\n    /// Fetches a base backup.\n    pub async fn get_base_backup(\n        &mut self,\n        req: GetBaseBackupRequest,\n    ) -> tonic::Result<impl AsyncRead + use<>> {\n        let req = proto::GetBaseBackupRequest::from(req);\n        let chunks = self.inner.get_base_backup(req).await?.into_inner();\n        Ok(StreamReader::new(\n            chunks\n                .map_ok(|resp| resp.chunk)\n                .map_err(std::io::Error::other),\n        ))\n    }\n\n    /// Returns the total size of a database, as # of bytes.\n    pub async fn get_db_size(&mut self, req: GetDbSizeRequest) -> tonic::Result<GetDbSizeResponse> {\n        let req = proto::GetDbSizeRequest::from(req);\n        let resp = self.inner.get_db_size(req).await?.into_inner();\n        Ok(resp.into())\n    }\n\n    /// Fetches pages.\n    ///\n    /// This is implemented as a bidirectional streaming RPC for performance. Per-request errors are\n    /// typically returned as status_code instead of errors, to avoid tearing down the entire stream\n    /// via a tonic::Status error.\n    pub async fn get_pages(\n        &mut self,\n        reqs: impl Stream<Item = GetPageRequest> + Send + 'static,\n    ) -> tonic::Result<impl Stream<Item = tonic::Result<GetPageResponse>> + Send + 'static> {\n        let reqs = reqs.map(proto::GetPageRequest::from);\n        let resps = self.inner.get_pages(reqs).await?.into_inner();\n        Ok(resps.and_then(|resp| ready(GetPageResponse::try_from(resp).map_err(|err| err.into()))))\n    }\n\n    /// Returns the size of a relation as # of blocks, or None if allow_missing=true and the\n    /// relation does not exist.\n    pub async fn get_rel_size(\n        &mut self,\n        req: GetRelSizeRequest,\n    ) -> tonic::Result<GetRelSizeResponse> {\n        let req = proto::GetRelSizeRequest::from(req);\n        let resp = self.inner.get_rel_size(req).await?.into_inner();\n        Ok(resp.into())\n    }\n\n    /// Fetches an SLRU segment.\n    pub async fn get_slru_segment(\n        &mut self,\n        req: GetSlruSegmentRequest,\n    ) -> tonic::Result<GetSlruSegmentResponse> {\n        let req = proto::GetSlruSegmentRequest::from(req);\n        let resp = self.inner.get_slru_segment(req).await?.into_inner();\n        Ok(resp.try_into()?)\n    }\n\n    /// Acquires or extends a lease on the given LSN. This guarantees that the Pageserver won't\n    /// garbage collect the LSN until the lease expires. Must be acquired on all relevant shards.\n    ///\n    /// Returns the lease expiration time, or a FailedPrecondition status if the lease could not be\n    /// acquired because the LSN has already been garbage collected.\n    pub async fn lease_lsn(&mut self, req: LeaseLsnRequest) -> tonic::Result<LeaseLsnResponse> {\n        let req = proto::LeaseLsnRequest::from(req);\n        let resp = self.inner.lease_lsn(req).await?.into_inner();\n        Ok(resp.try_into()?)\n    }\n}\n\n/// Adds authentication metadata to gRPC requests.\n#[derive(Clone)]\nstruct AuthInterceptor {\n    tenant_id: AsciiMetadataValue,\n    timeline_id: AsciiMetadataValue,\n    shard_id: AsciiMetadataValue,\n    auth_header: Option<AsciiMetadataValue>, // including \"Bearer \" prefix\n}\n\nimpl AuthInterceptor {\n    fn new(\n        tenant_id: TenantId,\n        timeline_id: TimelineId,\n        shard_id: ShardIndex,\n        auth_token: Option<String>,\n    ) -> anyhow::Result<Self> {\n        Ok(Self {\n            tenant_id: tenant_id.to_string().try_into()?,\n            timeline_id: timeline_id.to_string().try_into()?,\n            shard_id: shard_id.to_string().try_into()?,\n            auth_header: auth_token\n                .map(|token| format!(\"Bearer {token}\").try_into())\n                .transpose()?,\n        })\n    }\n}\n\nimpl Interceptor for AuthInterceptor {\n    fn call(&mut self, mut req: tonic::Request<()>) -> tonic::Result<tonic::Request<()>> {\n        let metadata = req.metadata_mut();\n        metadata.insert(\"neon-tenant-id\", self.tenant_id.clone());\n        metadata.insert(\"neon-timeline-id\", self.timeline_id.clone());\n        metadata.insert(\"neon-shard-id\", self.shard_id.clone());\n        if let Some(ref auth_header) = self.auth_header {\n            metadata.insert(\"authorization\", auth_header.clone());\n        }\n        Ok(req)\n    }\n}\n"
  },
  {
    "path": "pageserver/page_api/src/lib.rs",
    "content": "//! This crate provides the Pageserver's page API. It contains:\n//!\n//! * proto/page_service.proto: the Protobuf schema for the page API.\n//! * proto: auto-generated Protobuf types for gRPC.\n//!\n//! This crate is used by both the client and the server. Try to keep it slim.\n\n// Code generated by protobuf.\npub mod proto {\n    tonic::include_proto!(\"page_api\");\n\n    /// File descriptor set for Protobuf schema reflection. This allows using\n    /// e.g. grpcurl with the API.\n    pub const FILE_DESCRIPTOR_SET: &[u8] =\n        tonic::include_file_descriptor_set!(\"page_api_descriptor\");\n\n    pub use page_service_client::PageServiceClient;\n    pub use page_service_server::{PageService, PageServiceServer};\n}\n\nmod client;\nmod model;\nmod split;\n\npub use client::Client;\npub use model::*;\npub use split::{GetPageSplitter, SplitError};\n"
  },
  {
    "path": "pageserver/page_api/src/model.rs",
    "content": "//! Structs representing the canonical page service API.\n//!\n//! These mirror the autogenerated Protobuf types. The differences are:\n//!\n//! - Types that are in fact required by the API are not Options. The protobuf \"required\"\n//!   attribute is deprecated and 'prost' marks a lot of members as optional because of that.\n//!   (See <https://github.com/tokio-rs/prost/issues/800> for a gripe on this)\n//!\n//! - Use more precise datatypes, e.g. Lsn and uints shorter than 32 bits.\n//!\n//! - Validate protocol invariants, via try_from() and try_into().\n//!\n//! Validation only happens on the receiver side, i.e. when converting from Protobuf to domain\n//! types. This is where it matters -- the Protobuf types are less strict than the domain types, and\n//! receivers should expect all sorts of junk from senders. This also allows the sender to use e.g.\n//! stream combinators without dealing with errors, and avoids validating the same message twice.\n\nuse std::fmt::Display;\nuse std::time::{Duration, SystemTime, UNIX_EPOCH};\n\nuse bytes::Bytes;\nuse postgres_ffi_types::Oid;\n// TODO: split out Lsn, RelTag, SlruKind and other basic types to a separate crate, to avoid\n// pulling in all of their other crate dependencies when building the client.\nuse utils::lsn::Lsn;\n\nuse crate::proto;\n\n/// A protocol error. Typically returned via try_from() or try_into().\n#[derive(thiserror::Error, Clone, Debug)]\npub enum ProtocolError {\n    #[error(\"field '{0}' has invalid value '{1}'\")]\n    Invalid(&'static str, String),\n    #[error(\"required field '{0}' is missing\")]\n    Missing(&'static str),\n}\n\nimpl ProtocolError {\n    /// Helper to generate a new ProtocolError::Invalid for the given field and value.\n    pub fn invalid(field: &'static str, value: impl std::fmt::Debug) -> Self {\n        Self::Invalid(field, format!(\"{value:?}\"))\n    }\n}\n\nimpl From<ProtocolError> for tonic::Status {\n    fn from(err: ProtocolError) -> Self {\n        tonic::Status::invalid_argument(format!(\"{err}\"))\n    }\n}\n\n/// The LSN a request should read at.\n#[derive(Clone, Copy, Debug, Default)]\npub struct ReadLsn {\n    /// The request's read LSN.\n    pub request_lsn: Lsn,\n    /// If given, the caller guarantees that the page has not been modified since this LSN. Must be\n    /// smaller than or equal to request_lsn. This allows the Pageserver to serve an old page\n    /// without waiting for the request LSN to arrive. If not given, the request will read at the\n    /// request_lsn and wait for it to arrive if necessary. Valid for all request types.\n    ///\n    /// It is undefined behaviour to make a request such that the page was, in fact, modified\n    /// between request_lsn and not_modified_since_lsn. The Pageserver might detect it and return an\n    /// error, or it might return the old page version or the new page version. Setting\n    /// not_modified_since_lsn equal to request_lsn is always safe, but can lead to unnecessary\n    /// waiting.\n    pub not_modified_since_lsn: Option<Lsn>,\n}\n\nimpl Display for ReadLsn {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        let req_lsn = self.request_lsn;\n        if let Some(mod_lsn) = self.not_modified_since_lsn {\n            write!(f, \"{req_lsn}>={mod_lsn}\")\n        } else {\n            req_lsn.fmt(f)\n        }\n    }\n}\n\nimpl TryFrom<proto::ReadLsn> for ReadLsn {\n    type Error = ProtocolError;\n\n    fn try_from(pb: proto::ReadLsn) -> Result<Self, Self::Error> {\n        if pb.request_lsn == 0 {\n            return Err(ProtocolError::invalid(\"request_lsn\", pb.request_lsn));\n        }\n        if pb.not_modified_since_lsn > pb.request_lsn {\n            return Err(ProtocolError::invalid(\n                \"not_modified_since_lsn\",\n                pb.not_modified_since_lsn,\n            ));\n        }\n        Ok(Self {\n            request_lsn: Lsn(pb.request_lsn),\n            not_modified_since_lsn: match pb.not_modified_since_lsn {\n                0 => None,\n                lsn => Some(Lsn(lsn)),\n            },\n        })\n    }\n}\n\nimpl From<ReadLsn> for proto::ReadLsn {\n    fn from(read_lsn: ReadLsn) -> Self {\n        Self {\n            request_lsn: read_lsn.request_lsn.0,\n            not_modified_since_lsn: read_lsn.not_modified_since_lsn.unwrap_or_default().0,\n        }\n    }\n}\n\n// RelTag is defined in pageserver_api::reltag.\npub type RelTag = pageserver_api::reltag::RelTag;\n\nimpl TryFrom<proto::RelTag> for RelTag {\n    type Error = ProtocolError;\n\n    fn try_from(pb: proto::RelTag) -> Result<Self, Self::Error> {\n        Ok(Self {\n            spcnode: pb.spc_oid,\n            dbnode: pb.db_oid,\n            relnode: pb.rel_number,\n            forknum: pb\n                .fork_number\n                .try_into()\n                .map_err(|_| ProtocolError::invalid(\"fork_number\", pb.fork_number))?,\n        })\n    }\n}\n\nimpl From<RelTag> for proto::RelTag {\n    fn from(rel_tag: RelTag) -> Self {\n        Self {\n            spc_oid: rel_tag.spcnode,\n            db_oid: rel_tag.dbnode,\n            rel_number: rel_tag.relnode,\n            fork_number: rel_tag.forknum as u32,\n        }\n    }\n}\n\n/// Requests a base backup.\n#[derive(Clone, Copy, Debug)]\npub struct GetBaseBackupRequest {\n    /// The LSN to fetch a base backup at. If None, uses the latest LSN known to the Pageserver.\n    pub lsn: Option<Lsn>,\n    /// If true, logical replication slots will not be created.\n    pub replica: bool,\n    /// If true, include relation files in the base backup. Mainly for debugging and tests.\n    pub full: bool,\n    /// Compression algorithm to use. Base backups send a compressed payload instead of using gRPC\n    /// compression, so that we can cache compressed backups on the server.\n    pub compression: BaseBackupCompression,\n}\n\nimpl TryFrom<proto::GetBaseBackupRequest> for GetBaseBackupRequest {\n    type Error = ProtocolError;\n\n    fn try_from(pb: proto::GetBaseBackupRequest) -> Result<Self, Self::Error> {\n        Ok(Self {\n            lsn: (pb.lsn != 0).then_some(Lsn(pb.lsn)),\n            replica: pb.replica,\n            full: pb.full,\n            compression: pb.compression.try_into()?,\n        })\n    }\n}\n\nimpl From<GetBaseBackupRequest> for proto::GetBaseBackupRequest {\n    fn from(request: GetBaseBackupRequest) -> Self {\n        Self {\n            lsn: request.lsn.unwrap_or_default().0,\n            replica: request.replica,\n            full: request.full,\n            compression: request.compression.into(),\n        }\n    }\n}\n\n/// Base backup compression algorithm.\n#[derive(Clone, Copy, Debug)]\npub enum BaseBackupCompression {\n    None,\n    Gzip,\n}\n\nimpl TryFrom<proto::BaseBackupCompression> for BaseBackupCompression {\n    type Error = ProtocolError;\n\n    fn try_from(pb: proto::BaseBackupCompression) -> Result<Self, Self::Error> {\n        match pb {\n            proto::BaseBackupCompression::Unknown => Err(ProtocolError::invalid(\"compression\", pb)),\n            proto::BaseBackupCompression::None => Ok(Self::None),\n            proto::BaseBackupCompression::Gzip => Ok(Self::Gzip),\n        }\n    }\n}\n\nimpl TryFrom<i32> for BaseBackupCompression {\n    type Error = ProtocolError;\n\n    fn try_from(compression: i32) -> Result<Self, Self::Error> {\n        proto::BaseBackupCompression::try_from(compression)\n            .map_err(|_| ProtocolError::invalid(\"compression\", compression))\n            .and_then(Self::try_from)\n    }\n}\n\nimpl From<BaseBackupCompression> for proto::BaseBackupCompression {\n    fn from(compression: BaseBackupCompression) -> Self {\n        match compression {\n            BaseBackupCompression::None => Self::None,\n            BaseBackupCompression::Gzip => Self::Gzip,\n        }\n    }\n}\n\nimpl From<BaseBackupCompression> for i32 {\n    fn from(compression: BaseBackupCompression) -> Self {\n        proto::BaseBackupCompression::from(compression).into()\n    }\n}\n\npub type GetBaseBackupResponseChunk = Bytes;\n\nimpl TryFrom<proto::GetBaseBackupResponseChunk> for GetBaseBackupResponseChunk {\n    type Error = ProtocolError;\n\n    fn try_from(pb: proto::GetBaseBackupResponseChunk) -> Result<Self, Self::Error> {\n        if pb.chunk.is_empty() {\n            return Err(ProtocolError::Missing(\"chunk\"));\n        }\n        Ok(pb.chunk)\n    }\n}\n\nimpl From<GetBaseBackupResponseChunk> for proto::GetBaseBackupResponseChunk {\n    fn from(chunk: GetBaseBackupResponseChunk) -> Self {\n        Self { chunk }\n    }\n}\n\n/// Requests the size of a database, as # of bytes. Only valid on shard 0, other shards will error.\n#[derive(Clone, Copy, Debug)]\npub struct GetDbSizeRequest {\n    pub read_lsn: ReadLsn,\n    pub db_oid: Oid,\n}\n\nimpl TryFrom<proto::GetDbSizeRequest> for GetDbSizeRequest {\n    type Error = ProtocolError;\n\n    fn try_from(pb: proto::GetDbSizeRequest) -> Result<Self, Self::Error> {\n        Ok(Self {\n            read_lsn: pb\n                .read_lsn\n                .ok_or(ProtocolError::Missing(\"read_lsn\"))?\n                .try_into()?,\n            db_oid: pb.db_oid,\n        })\n    }\n}\n\nimpl From<GetDbSizeRequest> for proto::GetDbSizeRequest {\n    fn from(request: GetDbSizeRequest) -> Self {\n        Self {\n            read_lsn: Some(request.read_lsn.into()),\n            db_oid: request.db_oid,\n        }\n    }\n}\n\npub type GetDbSizeResponse = u64;\n\nimpl From<proto::GetDbSizeResponse> for GetDbSizeResponse {\n    fn from(pb: proto::GetDbSizeResponse) -> Self {\n        pb.num_bytes\n    }\n}\n\nimpl From<GetDbSizeResponse> for proto::GetDbSizeResponse {\n    fn from(num_bytes: GetDbSizeResponse) -> Self {\n        Self { num_bytes }\n    }\n}\n\n/// Requests one or more pages.\n#[derive(Clone, Debug, Default)]\npub struct GetPageRequest {\n    /// A request ID. Will be included in the response. Should be unique for in-flight requests on\n    /// the stream.\n    pub request_id: RequestID,\n    /// The request class.\n    pub request_class: GetPageClass,\n    /// The LSN to read at.\n    pub read_lsn: ReadLsn,\n    /// The relation to read from.\n    pub rel: RelTag,\n    /// Page numbers to read. Must belong to the remote shard.\n    ///\n    /// Multiple pages will be executed as a single batch by the Pageserver, amortizing layer access\n    /// costs and parallelizing them. This may increase the latency of any individual request, but\n    /// improves the overall latency and throughput of the batch as a whole.\n    pub block_numbers: Vec<u32>,\n}\n\nimpl TryFrom<proto::GetPageRequest> for GetPageRequest {\n    type Error = ProtocolError;\n\n    fn try_from(pb: proto::GetPageRequest) -> Result<Self, Self::Error> {\n        if pb.block_number.is_empty() {\n            return Err(ProtocolError::Missing(\"block_number\"));\n        }\n        Ok(Self {\n            request_id: pb\n                .request_id\n                .ok_or(ProtocolError::Missing(\"request_id\"))?\n                .into(),\n            request_class: pb.request_class.into(),\n            read_lsn: pb\n                .read_lsn\n                .ok_or(ProtocolError::Missing(\"read_lsn\"))?\n                .try_into()?,\n            rel: pb.rel.ok_or(ProtocolError::Missing(\"rel\"))?.try_into()?,\n            block_numbers: pb.block_number,\n        })\n    }\n}\n\nimpl From<GetPageRequest> for proto::GetPageRequest {\n    fn from(request: GetPageRequest) -> Self {\n        Self {\n            request_id: Some(request.request_id.into()),\n            request_class: request.request_class.into(),\n            read_lsn: Some(request.read_lsn.into()),\n            rel: Some(request.rel.into()),\n            block_number: request.block_numbers,\n        }\n    }\n}\n\n/// A GetPage request ID and retry attempt. Should be unique for in-flight requests on a stream.\n#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Hash, PartialOrd, Ord)]\npub struct RequestID {\n    /// The base request ID.\n    pub id: u64,\n    // The request attempt. Starts at 0, incremented on each retry.\n    pub attempt: u32,\n}\n\nimpl RequestID {\n    /// Creates a new RequestID with the given ID and an initial attempt of 0.\n    pub fn new(id: u64) -> Self {\n        Self { id, attempt: 0 }\n    }\n}\n\nimpl Display for RequestID {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        write!(f, \"{}.{}\", self.id, self.attempt)\n    }\n}\n\nimpl From<proto::RequestId> for RequestID {\n    fn from(pb: proto::RequestId) -> Self {\n        Self {\n            id: pb.id,\n            attempt: pb.attempt,\n        }\n    }\n}\n\nimpl From<u64> for RequestID {\n    fn from(id: u64) -> Self {\n        Self::new(id)\n    }\n}\n\nimpl From<RequestID> for proto::RequestId {\n    fn from(request_id: RequestID) -> Self {\n        Self {\n            id: request_id.id,\n            attempt: request_id.attempt,\n        }\n    }\n}\n\n/// A GetPage request class.\n#[derive(Clone, Copy, Debug, Default, strum_macros::Display)]\npub enum GetPageClass {\n    /// Unknown class. For backwards compatibility: used when an older client version sends a class\n    /// that a newer server version has removed.\n    Unknown,\n    /// A normal request. This is the default.\n    #[default]\n    Normal,\n    /// A prefetch request. NB: can only be classified on pg < 18.\n    Prefetch,\n    /// A background request (e.g. vacuum).\n    Background,\n}\n\nimpl From<proto::GetPageClass> for GetPageClass {\n    fn from(pb: proto::GetPageClass) -> Self {\n        match pb {\n            proto::GetPageClass::Unknown => Self::Unknown,\n            proto::GetPageClass::Normal => Self::Normal,\n            proto::GetPageClass::Prefetch => Self::Prefetch,\n            proto::GetPageClass::Background => Self::Background,\n        }\n    }\n}\n\nimpl From<i32> for GetPageClass {\n    fn from(class: i32) -> Self {\n        proto::GetPageClass::try_from(class)\n            .unwrap_or(proto::GetPageClass::Unknown)\n            .into()\n    }\n}\n\nimpl From<GetPageClass> for proto::GetPageClass {\n    fn from(class: GetPageClass) -> Self {\n        match class {\n            GetPageClass::Unknown => Self::Unknown,\n            GetPageClass::Normal => Self::Normal,\n            GetPageClass::Prefetch => Self::Prefetch,\n            GetPageClass::Background => Self::Background,\n        }\n    }\n}\n\nimpl From<GetPageClass> for i32 {\n    fn from(class: GetPageClass) -> Self {\n        proto::GetPageClass::from(class).into()\n    }\n}\n\n/// A GetPage response.\n///\n/// A batch response will contain all of the requested pages. We could eagerly emit individual pages\n/// as soon as they are ready, but on a readv() Postgres holds buffer pool locks on all pages in the\n/// batch and we'll only return once the entire batch is ready, so no one can make use of the\n/// individual pages.\n#[derive(Clone, Debug)]\npub struct GetPageResponse {\n    /// The original request's ID.\n    pub request_id: RequestID,\n    /// The response status code. If not OK, the `rel` and `pages` fields will be empty.\n    pub status_code: GetPageStatusCode,\n    /// A string describing the status, if any.\n    pub reason: Option<String>,\n    /// The relation that the pages belong to.\n    pub rel: RelTag,\n    // The page(s), in the same order as the request.\n    pub pages: Vec<Page>,\n}\n\nimpl TryFrom<proto::GetPageResponse> for GetPageResponse {\n    type Error = ProtocolError;\n\n    fn try_from(pb: proto::GetPageResponse) -> Result<Self, ProtocolError> {\n        Ok(Self {\n            request_id: pb\n                .request_id\n                .ok_or(ProtocolError::Missing(\"request_id\"))?\n                .into(),\n            status_code: pb.status_code.into(),\n            reason: Some(pb.reason).filter(|r| !r.is_empty()),\n            rel: pb.rel.ok_or(ProtocolError::Missing(\"rel\"))?.try_into()?,\n            pages: pb.page.into_iter().map(Page::from).collect(),\n        })\n    }\n}\n\nimpl From<GetPageResponse> for proto::GetPageResponse {\n    fn from(response: GetPageResponse) -> Self {\n        Self {\n            request_id: Some(response.request_id.into()),\n            status_code: response.status_code.into(),\n            reason: response.reason.unwrap_or_default(),\n            rel: Some(response.rel.into()),\n            page: response.pages.into_iter().map(proto::Page::from).collect(),\n        }\n    }\n}\n\nimpl GetPageResponse {\n    /// Attempts to represent a tonic::Status as a GetPageResponse if appropriate. Returning a\n    /// tonic::Status will terminate the GetPage stream, so per-request errors are emitted as a\n    /// GetPageResponse with a non-OK status code instead.\n    #[allow(clippy::result_large_err)]\n    pub fn try_from_status(\n        status: tonic::Status,\n        request_id: RequestID,\n    ) -> Result<Self, tonic::Status> {\n        // We shouldn't see an OK status here, because we're emitting an error.\n        debug_assert_ne!(status.code(), tonic::Code::Ok);\n        if status.code() == tonic::Code::Ok {\n            return Err(tonic::Status::internal(format!(\n                \"unexpected OK status: {status:?}\",\n            )));\n        }\n\n        // If we can't convert the tonic::Code to a GetPageStatusCode, this is not a per-request\n        // error and we should return a tonic::Status to terminate the stream.\n        let Ok(status_code) = status.code().try_into() else {\n            return Err(status);\n        };\n\n        // Return a GetPageResponse for the status.\n        Ok(Self {\n            request_id,\n            status_code,\n            reason: Some(status.message().to_string()),\n            rel: RelTag::default(),\n            pages: Vec::new(),\n        })\n    }\n}\n\n// A page.\n#[derive(Clone, Debug)]\npub struct Page {\n    /// The page number.\n    pub block_number: u32,\n    /// The materialized page image, as an 8KB byte vector.\n    pub image: Bytes,\n}\n\nimpl From<proto::Page> for Page {\n    fn from(pb: proto::Page) -> Self {\n        Self {\n            block_number: pb.block_number,\n            image: pb.image,\n        }\n    }\n}\n\nimpl From<Page> for proto::Page {\n    fn from(page: Page) -> Self {\n        Self {\n            block_number: page.block_number,\n            image: page.image,\n        }\n    }\n}\n\n/// A GetPage response status code.\n///\n/// These are effectively equivalent to gRPC statuses. However, we use a bidirectional stream\n/// (potentially shared by many backends), and a gRPC status response would terminate the stream so\n/// we send GetPageResponse messages with these codes instead.\n#[derive(Clone, Copy, Debug, PartialEq, strum_macros::Display)]\npub enum GetPageStatusCode {\n    /// Unknown status. For forwards compatibility: used when an older client version receives a new\n    /// status code from a newer server version.\n    Unknown,\n    /// The request was successful.\n    Ok,\n    /// The page did not exist. The tenant/timeline/shard has already been validated during stream\n    /// setup.\n    NotFound,\n    /// The request was invalid.\n    InvalidRequest,\n    /// The request failed due to an internal server error.\n    InternalError,\n    /// The tenant is rate limited. Slow down and retry later.\n    SlowDown,\n}\n\nimpl From<proto::GetPageStatusCode> for GetPageStatusCode {\n    fn from(pb: proto::GetPageStatusCode) -> Self {\n        match pb {\n            proto::GetPageStatusCode::Unknown => Self::Unknown,\n            proto::GetPageStatusCode::Ok => Self::Ok,\n            proto::GetPageStatusCode::NotFound => Self::NotFound,\n            proto::GetPageStatusCode::InvalidRequest => Self::InvalidRequest,\n            proto::GetPageStatusCode::InternalError => Self::InternalError,\n            proto::GetPageStatusCode::SlowDown => Self::SlowDown,\n        }\n    }\n}\n\nimpl From<i32> for GetPageStatusCode {\n    fn from(status_code: i32) -> Self {\n        proto::GetPageStatusCode::try_from(status_code)\n            .unwrap_or(proto::GetPageStatusCode::Unknown)\n            .into()\n    }\n}\n\nimpl From<GetPageStatusCode> for proto::GetPageStatusCode {\n    fn from(status_code: GetPageStatusCode) -> Self {\n        match status_code {\n            GetPageStatusCode::Unknown => Self::Unknown,\n            GetPageStatusCode::Ok => Self::Ok,\n            GetPageStatusCode::NotFound => Self::NotFound,\n            GetPageStatusCode::InvalidRequest => Self::InvalidRequest,\n            GetPageStatusCode::InternalError => Self::InternalError,\n            GetPageStatusCode::SlowDown => Self::SlowDown,\n        }\n    }\n}\n\nimpl From<GetPageStatusCode> for i32 {\n    fn from(status_code: GetPageStatusCode) -> Self {\n        proto::GetPageStatusCode::from(status_code).into()\n    }\n}\n\nimpl TryFrom<tonic::Code> for GetPageStatusCode {\n    type Error = tonic::Code;\n\n    fn try_from(code: tonic::Code) -> Result<Self, Self::Error> {\n        use tonic::Code;\n\n        let status_code = match code {\n            Code::Ok => Self::Ok,\n\n            // These are per-request errors, which should be returned as GetPageResponses.\n            Code::AlreadyExists => Self::InvalidRequest,\n            Code::DataLoss => Self::InternalError,\n            Code::FailedPrecondition => Self::InvalidRequest,\n            Code::InvalidArgument => Self::InvalidRequest,\n            Code::Internal => Self::InternalError,\n            Code::NotFound => Self::NotFound,\n            Code::OutOfRange => Self::InvalidRequest,\n            Code::ResourceExhausted => Self::SlowDown,\n\n            // These should terminate the stream by returning a tonic::Status.\n            Code::Aborted\n            | Code::Cancelled\n            | Code::DeadlineExceeded\n            | Code::PermissionDenied\n            | Code::Unauthenticated\n            | Code::Unavailable\n            | Code::Unimplemented\n            | Code::Unknown => return Err(code),\n        };\n        Ok(status_code)\n    }\n}\n\nimpl From<GetPageStatusCode> for tonic::Code {\n    fn from(status_code: GetPageStatusCode) -> Self {\n        use tonic::Code;\n\n        match status_code {\n            GetPageStatusCode::Unknown => Code::Unknown,\n            GetPageStatusCode::Ok => Code::Ok,\n            GetPageStatusCode::NotFound => Code::NotFound,\n            GetPageStatusCode::InvalidRequest => Code::InvalidArgument,\n            GetPageStatusCode::InternalError => Code::Internal,\n            GetPageStatusCode::SlowDown => Code::ResourceExhausted,\n        }\n    }\n}\n\n// Fetches the size of a relation at a given LSN, as # of blocks. Only valid on shard 0, other\n// shards will error.\n#[derive(Clone, Copy, Debug)]\npub struct GetRelSizeRequest {\n    pub read_lsn: ReadLsn,\n    pub rel: RelTag,\n    /// If true, return missing=true for missing relations instead of a NotFound error.\n    pub allow_missing: bool,\n}\n\nimpl TryFrom<proto::GetRelSizeRequest> for GetRelSizeRequest {\n    type Error = ProtocolError;\n\n    fn try_from(proto: proto::GetRelSizeRequest) -> Result<Self, Self::Error> {\n        Ok(Self {\n            read_lsn: proto\n                .read_lsn\n                .ok_or(ProtocolError::Missing(\"read_lsn\"))?\n                .try_into()?,\n            rel: proto.rel.ok_or(ProtocolError::Missing(\"rel\"))?.try_into()?,\n            allow_missing: proto.allow_missing,\n        })\n    }\n}\n\nimpl From<GetRelSizeRequest> for proto::GetRelSizeRequest {\n    fn from(request: GetRelSizeRequest) -> Self {\n        Self {\n            read_lsn: Some(request.read_lsn.into()),\n            rel: Some(request.rel.into()),\n            allow_missing: request.allow_missing,\n        }\n    }\n}\n\n/// The size of a relation as number of blocks, or None if `allow_missing=true` and the relation\n/// does not exist.\n///\n/// INVARIANT: never None if `allow_missing=false` (returns `NotFound` error instead).\npub type GetRelSizeResponse = Option<u32>;\n\nimpl From<proto::GetRelSizeResponse> for GetRelSizeResponse {\n    fn from(pb: proto::GetRelSizeResponse) -> Self {\n        (!pb.missing).then_some(pb.num_blocks)\n    }\n}\n\nimpl From<GetRelSizeResponse> for proto::GetRelSizeResponse {\n    fn from(resp: GetRelSizeResponse) -> Self {\n        Self {\n            num_blocks: resp.unwrap_or_default(),\n            missing: resp.is_none(),\n        }\n    }\n}\n\n/// Requests an SLRU segment. Only valid on shard 0, other shards will error.\n#[derive(Clone, Copy, Debug)]\npub struct GetSlruSegmentRequest {\n    pub read_lsn: ReadLsn,\n    pub kind: SlruKind,\n    pub segno: u32,\n}\n\nimpl TryFrom<proto::GetSlruSegmentRequest> for GetSlruSegmentRequest {\n    type Error = ProtocolError;\n\n    fn try_from(pb: proto::GetSlruSegmentRequest) -> Result<Self, Self::Error> {\n        Ok(Self {\n            read_lsn: pb\n                .read_lsn\n                .ok_or(ProtocolError::Missing(\"read_lsn\"))?\n                .try_into()?,\n            kind: u8::try_from(pb.kind)\n                .ok()\n                .and_then(SlruKind::from_repr)\n                .ok_or_else(|| ProtocolError::invalid(\"slru_kind\", pb.kind))?,\n            segno: pb.segno,\n        })\n    }\n}\n\nimpl From<GetSlruSegmentRequest> for proto::GetSlruSegmentRequest {\n    fn from(request: GetSlruSegmentRequest) -> Self {\n        Self {\n            read_lsn: Some(request.read_lsn.into()),\n            kind: request.kind as u32,\n            segno: request.segno,\n        }\n    }\n}\n\npub type GetSlruSegmentResponse = Bytes;\n\nimpl TryFrom<proto::GetSlruSegmentResponse> for GetSlruSegmentResponse {\n    type Error = ProtocolError;\n\n    fn try_from(pb: proto::GetSlruSegmentResponse) -> Result<Self, Self::Error> {\n        if pb.segment.is_empty() {\n            return Err(ProtocolError::Missing(\"segment\"));\n        }\n        Ok(pb.segment)\n    }\n}\n\nimpl From<GetSlruSegmentResponse> for proto::GetSlruSegmentResponse {\n    fn from(segment: GetSlruSegmentResponse) -> Self {\n        Self { segment }\n    }\n}\n\n// SlruKind is defined in pageserver_api::reltag.\npub type SlruKind = pageserver_api::reltag::SlruKind;\n\n/// Acquires or extends a lease on the given LSN. This guarantees that the Pageserver won't garbage\n/// collect the LSN until the lease expires.\npub struct LeaseLsnRequest {\n    /// The LSN to lease.\n    pub lsn: Lsn,\n}\n\nimpl TryFrom<proto::LeaseLsnRequest> for LeaseLsnRequest {\n    type Error = ProtocolError;\n\n    fn try_from(pb: proto::LeaseLsnRequest) -> Result<Self, Self::Error> {\n        if pb.lsn == 0 {\n            return Err(ProtocolError::Missing(\"lsn\"));\n        }\n        Ok(Self { lsn: Lsn(pb.lsn) })\n    }\n}\n\nimpl From<LeaseLsnRequest> for proto::LeaseLsnRequest {\n    fn from(request: LeaseLsnRequest) -> Self {\n        Self { lsn: request.lsn.0 }\n    }\n}\n\n/// Lease expiration time. If the lease could not be granted because the LSN has already been\n/// garbage collected, a FailedPrecondition status will be returned instead.\npub type LeaseLsnResponse = SystemTime;\n\nimpl TryFrom<proto::LeaseLsnResponse> for LeaseLsnResponse {\n    type Error = ProtocolError;\n\n    fn try_from(pb: proto::LeaseLsnResponse) -> Result<Self, Self::Error> {\n        let expires = pb.expires.ok_or(ProtocolError::Missing(\"expires\"))?;\n        UNIX_EPOCH\n            .checked_add(Duration::new(expires.seconds as u64, expires.nanos as u32))\n            .ok_or_else(|| ProtocolError::invalid(\"expires\", expires))\n    }\n}\n\nimpl From<LeaseLsnResponse> for proto::LeaseLsnResponse {\n    fn from(response: LeaseLsnResponse) -> Self {\n        let expires = response.duration_since(UNIX_EPOCH).unwrap_or_default();\n        Self {\n            expires: Some(prost_types::Timestamp {\n                seconds: expires.as_secs() as i64,\n                nanos: expires.subsec_nanos() as i32,\n            }),\n        }\n    }\n}\n"
  },
  {
    "path": "pageserver/page_api/src/split.rs",
    "content": "use std::collections::HashMap;\n\nuse bytes::Bytes;\n\nuse crate::model::*;\nuse pageserver_api::key::rel_block_to_key;\nuse pageserver_api::shard::key_to_shard_number;\nuse utils::shard::{ShardCount, ShardIndex, ShardStripeSize};\n\n/// Splits GetPageRequests that straddle shard boundaries and assembles the responses.\n/// TODO: add tests for this.\npub struct GetPageSplitter {\n    /// Split requests by shard index.\n    requests: HashMap<ShardIndex, GetPageRequest>,\n    /// The response being assembled. Preallocated with empty pages, to be filled in.\n    response: GetPageResponse,\n    /// Maps the offset in `request.block_numbers` and `response.pages` to the owning shard. Used\n    /// to assemble the response pages in the same order as the original request.\n    block_shards: Vec<ShardIndex>,\n}\n\nimpl GetPageSplitter {\n    /// Checks if the given request only touches a single shard, and returns the shard ID. This is\n    /// the common case, so we check first in order to avoid unnecessary allocations and overhead.\n    pub fn for_single_shard(\n        req: &GetPageRequest,\n        count: ShardCount,\n        stripe_size: Option<ShardStripeSize>,\n    ) -> Result<Option<ShardIndex>, SplitError> {\n        // Fast path: unsharded tenant.\n        if count.is_unsharded() {\n            return Ok(Some(ShardIndex::unsharded()));\n        }\n\n        let Some(stripe_size) = stripe_size else {\n            return Err(\"stripe size must be given for sharded tenants\".into());\n        };\n\n        // Find the first page's shard, for comparison.\n        let Some(&first_page) = req.block_numbers.first() else {\n            return Err(\"no block numbers in request\".into());\n        };\n        let key = rel_block_to_key(req.rel, first_page);\n        let shard_number = key_to_shard_number(count, stripe_size, &key);\n\n        Ok(req\n            .block_numbers\n            .iter()\n            .skip(1) // computed above\n            .all(|&blkno| {\n                let key = rel_block_to_key(req.rel, blkno);\n                key_to_shard_number(count, stripe_size, &key) == shard_number\n            })\n            .then_some(ShardIndex::new(shard_number, count)))\n    }\n\n    /// Splits the given request.\n    pub fn split(\n        req: GetPageRequest,\n        count: ShardCount,\n        stripe_size: Option<ShardStripeSize>,\n    ) -> Result<Self, SplitError> {\n        // The caller should make sure we don't split requests unnecessarily.\n        debug_assert!(\n            Self::for_single_shard(&req, count, stripe_size)?.is_none(),\n            \"unnecessary request split\"\n        );\n\n        if count.is_unsharded() {\n            return Err(\"unsharded tenant, no point in splitting request\".into());\n        }\n        let Some(stripe_size) = stripe_size else {\n            return Err(\"stripe size must be given for sharded tenants\".into());\n        };\n\n        // Split the requests by shard index.\n        let mut requests = HashMap::with_capacity(2); // common case\n        let mut block_shards = Vec::with_capacity(req.block_numbers.len());\n        for &blkno in &req.block_numbers {\n            let key = rel_block_to_key(req.rel, blkno);\n            let shard_number = key_to_shard_number(count, stripe_size, &key);\n            let shard_id = ShardIndex::new(shard_number, count);\n\n            requests\n                .entry(shard_id)\n                .or_insert_with(|| GetPageRequest {\n                    request_id: req.request_id,\n                    request_class: req.request_class,\n                    rel: req.rel,\n                    read_lsn: req.read_lsn,\n                    block_numbers: Vec::new(),\n                })\n                .block_numbers\n                .push(blkno);\n            block_shards.push(shard_id);\n        }\n\n        // Construct a response to be populated by shard responses. Preallocate empty page slots\n        // with the expected block numbers.\n        let response = GetPageResponse {\n            request_id: req.request_id,\n            status_code: GetPageStatusCode::Ok,\n            reason: None,\n            rel: req.rel,\n            pages: req\n                .block_numbers\n                .into_iter()\n                .map(|block_number| {\n                    Page {\n                        block_number,\n                        image: Bytes::new(), // empty page slot to be filled in\n                    }\n                })\n                .collect(),\n        };\n\n        Ok(Self {\n            requests,\n            response,\n            block_shards,\n        })\n    }\n\n    /// Drains the per-shard requests, moving them out of the splitter to avoid extra allocations.\n    pub fn drain_requests(&mut self) -> impl Iterator<Item = (ShardIndex, GetPageRequest)> {\n        self.requests.drain()\n    }\n\n    /// Adds a response from the given shard. The response must match the request ID and have an OK\n    /// status code. A response must not already exist for the given shard ID.\n    pub fn add_response(\n        &mut self,\n        shard_id: ShardIndex,\n        response: GetPageResponse,\n    ) -> Result<(), SplitError> {\n        // The caller should already have converted status codes into tonic::Status.\n        if response.status_code != GetPageStatusCode::Ok {\n            return Err(SplitError(format!(\n                \"unexpected non-OK response for shard {shard_id}: {} {}\",\n                response.status_code,\n                response.reason.unwrap_or_default()\n            )));\n        }\n\n        if response.request_id != self.response.request_id {\n            return Err(SplitError(format!(\n                \"response ID mismatch for shard {shard_id}: expected {}, got {}\",\n                self.response.request_id, response.request_id\n            )));\n        }\n\n        if response.request_id != self.response.request_id {\n            return Err(SplitError(format!(\n                \"response ID mismatch for shard {shard_id}: expected {}, got {}\",\n                self.response.request_id, response.request_id\n            )));\n        }\n\n        // Place the shard response pages into the assembled response, in request order.\n        let mut pages = response.pages.into_iter();\n\n        for (i, &s) in self.block_shards.iter().enumerate() {\n            if shard_id != s {\n                continue;\n            }\n\n            let Some(slot) = self.response.pages.get_mut(i) else {\n                return Err(SplitError(format!(\n                    \"no block_shards slot {i} for shard {shard_id}\"\n                )));\n            };\n            let Some(page) = pages.next() else {\n                return Err(SplitError(format!(\n                    \"missing page {} in shard {shard_id} response\",\n                    slot.block_number\n                )));\n            };\n            if page.block_number != slot.block_number {\n                return Err(SplitError(format!(\n                    \"shard {shard_id} returned wrong page at index {i}, expected {} got {}\",\n                    slot.block_number, page.block_number\n                )));\n            }\n            if !slot.image.is_empty() {\n                return Err(SplitError(format!(\n                    \"shard {shard_id} returned duplicate page {} at index {i}\",\n                    slot.block_number\n                )));\n            }\n\n            *slot = page;\n        }\n\n        // Make sure we've consumed all pages from the shard response.\n        if let Some(extra_page) = pages.next() {\n            return Err(SplitError(format!(\n                \"shard {shard_id} returned extra page: {}\",\n                extra_page.block_number\n            )));\n        }\n\n        Ok(())\n    }\n\n    /// Collects the final, assembled response.\n    pub fn collect_response(self) -> Result<GetPageResponse, SplitError> {\n        // Check that the response is complete.\n        for (i, page) in self.response.pages.iter().enumerate() {\n            if page.image.is_empty() {\n                return Err(SplitError(format!(\n                    \"missing page {} for shard {}\",\n                    page.block_number,\n                    self.block_shards\n                        .get(i)\n                        .map(|s| s.to_string())\n                        .unwrap_or_else(|| \"?\".to_string())\n                )));\n            }\n        }\n\n        Ok(self.response)\n    }\n}\n\n/// A GetPageSplitter error.\n#[derive(Debug, thiserror::Error)]\n#[error(\"{0}\")]\npub struct SplitError(String);\n\nimpl From<&str> for SplitError {\n    fn from(err: &str) -> Self {\n        SplitError(err.to_string())\n    }\n}\n\nimpl From<String> for SplitError {\n    fn from(err: String) -> Self {\n        SplitError(err)\n    }\n}\n\nimpl From<SplitError> for tonic::Status {\n    fn from(err: SplitError) -> Self {\n        tonic::Status::internal(err.0)\n    }\n}\n"
  },
  {
    "path": "pageserver/pagebench/Cargo.toml",
    "content": "[package]\nname = \"pagebench\"\nversion = \"0.1.0\"\nedition.workspace = true\nlicense.workspace = true\n\n# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html\n\n[dependencies]\nanyhow.workspace = true\nasync-trait.workspace = true\nbytes.workspace = true\ncamino.workspace = true\nclap.workspace = true\nfutures.workspace = true\nhdrhistogram.workspace = true\nhumantime.workspace = true\nhumantime-serde.workspace = true\npprof.workspace = true\nrand.workspace = true\nreqwest.workspace = true\nserde.workspace = true\nserde_json.workspace = true\ntracing.workspace = true\ntokio.workspace = true\ntokio-stream.workspace = true\ntokio-util.workspace = true\ntonic.workspace = true\nurl.workspace = true\n\npageserver_api.workspace = true\npageserver_client.workspace = true\npageserver_client_grpc.workspace = true\npageserver_page_api.workspace = true\nutils = { path = \"../../libs/utils/\" }\nworkspace_hack = { version = \"0.1\", path = \"../../workspace_hack\" }\n"
  },
  {
    "path": "pageserver/pagebench/src/cmd/aux_files.rs",
    "content": "use std::collections::HashMap;\nuse std::sync::Arc;\nuse std::time::Instant;\n\nuse pageserver_api::models::{TenantConfig, TenantConfigRequest};\nuse pageserver_api::shard::TenantShardId;\nuse utils::id::TenantTimelineId;\nuse utils::lsn::Lsn;\n\n/// Ingest aux files into the pageserver.\n#[derive(clap::Parser)]\npub(crate) struct Args {\n    #[clap(long, default_value = \"http://localhost:9898\")]\n    mgmt_api_endpoint: String,\n    #[clap(long, default_value = \"postgres://postgres@localhost:64000\")]\n    page_service_connstring: String,\n    #[clap(long)]\n    pageserver_jwt: Option<String>,\n\n    targets: Option<Vec<TenantTimelineId>>,\n}\n\npub(crate) fn main(args: Args) -> anyhow::Result<()> {\n    let rt = tokio::runtime::Builder::new_multi_thread()\n        .enable_all()\n        .build()\n        .unwrap();\n\n    let main_task = rt.spawn(main_impl(args));\n    rt.block_on(main_task).unwrap()\n}\n\nasync fn main_impl(args: Args) -> anyhow::Result<()> {\n    let args: &'static Args = Box::leak(Box::new(args));\n\n    let mgmt_api_client = Arc::new(pageserver_client::mgmt_api::Client::new(\n        reqwest::Client::new(), // TODO: support ssl_ca_file for https APIs in pagebench.\n        args.mgmt_api_endpoint.clone(),\n        args.pageserver_jwt.as_deref(),\n    ));\n\n    // discover targets\n    let timelines: Vec<TenantTimelineId> = crate::util::cli::targets::discover(\n        &mgmt_api_client,\n        crate::util::cli::targets::Spec {\n            limit_to_first_n_targets: None,\n            targets: {\n                if let Some(targets) = &args.targets {\n                    if targets.len() != 1 {\n                        anyhow::bail!(\"must specify exactly one target\");\n                    }\n                    Some(targets.clone())\n                } else {\n                    None\n                }\n            },\n        },\n    )\n    .await?;\n\n    let timeline = timelines[0];\n    let tenant_shard_id = TenantShardId::unsharded(timeline.tenant_id);\n    let timeline_id = timeline.timeline_id;\n\n    println!(\"operating on timeline {timeline}\");\n\n    mgmt_api_client\n        .set_tenant_config(&TenantConfigRequest {\n            tenant_id: timeline.tenant_id,\n            config: TenantConfig::default(),\n        })\n        .await?;\n\n    for batch in 0..100 {\n        let items = (0..100)\n            .map(|id| {\n                (\n                    format!(\"pg_logical/mappings/{batch:03}.{id:03}\"),\n                    format!(\"{id:08}\"),\n                )\n            })\n            .collect::<HashMap<_, _>>();\n        let file_cnt = items.len();\n        mgmt_api_client\n            .ingest_aux_files(tenant_shard_id, timeline_id, items)\n            .await?;\n        println!(\"ingested {file_cnt} files\");\n    }\n\n    for _ in 0..100 {\n        let start = Instant::now();\n        let files = mgmt_api_client\n            .list_aux_files(tenant_shard_id, timeline_id, Lsn(Lsn::MAX.0 - 1))\n            .await?;\n        println!(\n            \"{} files found in {}s\",\n            files.len(),\n            start.elapsed().as_secs_f64()\n        );\n    }\n\n    anyhow::Ok(())\n}\n"
  },
  {
    "path": "pageserver/pagebench/src/cmd/basebackup.rs",
    "content": "use std::collections::HashMap;\nuse std::num::NonZeroUsize;\nuse std::ops::Range;\nuse std::pin::Pin;\nuse std::sync::atomic::{AtomicU64, Ordering};\nuse std::sync::{Arc, Mutex};\nuse std::time::Instant;\n\nuse anyhow::anyhow;\nuse futures::TryStreamExt as _;\nuse pageserver_api::shard::TenantShardId;\nuse pageserver_client::mgmt_api::ForceAwaitLogicalSize;\nuse pageserver_client::page_service::BasebackupRequest;\nuse pageserver_page_api as page_api;\nuse rand::prelude::*;\nuse tokio::io::AsyncRead;\nuse tokio::sync::Barrier;\nuse tokio::task::JoinSet;\nuse tokio_util::compat::{TokioAsyncReadCompatExt as _, TokioAsyncWriteCompatExt as _};\nuse tokio_util::io::StreamReader;\nuse tonic::async_trait;\nuse tracing::{info, instrument};\nuse url::Url;\nuse utils::id::TenantTimelineId;\nuse utils::lsn::Lsn;\nuse utils::shard::ShardIndex;\n\nuse crate::util::tokio_thread_local_stats::AllThreadLocalStats;\nuse crate::util::{request_stats, tokio_thread_local_stats};\n\n/// basebackup@LatestLSN\n#[derive(clap::Parser)]\npub(crate) struct Args {\n    #[clap(long, default_value = \"http://localhost:9898\")]\n    mgmt_api_endpoint: String,\n    /// The Pageserver to connect to. Use postgresql:// for libpq, or grpc:// for gRPC.\n    #[clap(long, default_value = \"postgresql://postgres@localhost:64000\")]\n    page_service_connstring: String,\n    #[clap(long)]\n    pageserver_jwt: Option<String>,\n    #[clap(long, default_value = \"1\")]\n    num_clients: NonZeroUsize,\n    #[clap(long)]\n    no_compression: bool,\n    #[clap(long)]\n    runtime: Option<humantime::Duration>,\n    #[clap(long)]\n    limit_to_first_n_targets: Option<usize>,\n    targets: Option<Vec<TenantTimelineId>>,\n}\n\n#[derive(Debug, Default)]\nstruct LiveStats {\n    completed_requests: AtomicU64,\n}\n\nimpl LiveStats {\n    fn inc(&self) {\n        self.completed_requests.fetch_add(1, Ordering::Relaxed);\n    }\n}\n\nstruct Target {\n    timeline: TenantTimelineId,\n    lsn_range: Option<Range<Lsn>>,\n}\n\n#[derive(serde::Serialize)]\nstruct Output {\n    total: request_stats::Output,\n}\n\ntokio_thread_local_stats::declare!(STATS: request_stats::Stats);\n\npub(crate) fn main(args: Args) -> anyhow::Result<()> {\n    tokio_thread_local_stats::main!(STATS, move |thread_local_stats| {\n        main_impl(args, thread_local_stats)\n    })\n}\n\nasync fn main_impl(\n    args: Args,\n    all_thread_local_stats: AllThreadLocalStats<request_stats::Stats>,\n) -> anyhow::Result<()> {\n    let args: &'static Args = Box::leak(Box::new(args));\n\n    let mgmt_api_client = Arc::new(pageserver_client::mgmt_api::Client::new(\n        reqwest::Client::new(), // TODO: support ssl_ca_file for https APIs in pagebench.\n        args.mgmt_api_endpoint.clone(),\n        args.pageserver_jwt.as_deref(),\n    ));\n\n    // discover targets\n    let timelines: Vec<TenantTimelineId> = crate::util::cli::targets::discover(\n        &mgmt_api_client,\n        crate::util::cli::targets::Spec {\n            limit_to_first_n_targets: args.limit_to_first_n_targets,\n            targets: args.targets.clone(),\n        },\n    )\n    .await?;\n    let mut js = JoinSet::new();\n    for timeline in &timelines {\n        js.spawn({\n            let timeline = *timeline;\n            let info = mgmt_api_client\n                .timeline_info(\n                    TenantShardId::unsharded(timeline.tenant_id),\n                    timeline.timeline_id,\n                    ForceAwaitLogicalSize::No,\n                )\n                .await\n                .unwrap();\n            async move {\n                anyhow::Ok(Target {\n                    timeline,\n                    // TODO: support lsn_range != latest LSN\n                    lsn_range: Some(info.last_record_lsn..(info.last_record_lsn + 1)),\n                })\n            }\n        });\n    }\n    let mut all_targets: Vec<Target> = Vec::new();\n    while let Some(res) = js.join_next().await {\n        all_targets.push(res.unwrap().unwrap());\n    }\n\n    let live_stats = Arc::new(LiveStats::default());\n\n    let num_client_tasks = timelines.len();\n    let num_live_stats_dump = 1;\n    let num_work_sender_tasks = 1;\n\n    let start_work_barrier = Arc::new(tokio::sync::Barrier::new(\n        num_client_tasks + num_live_stats_dump + num_work_sender_tasks,\n    ));\n    let all_work_done_barrier = Arc::new(tokio::sync::Barrier::new(num_client_tasks));\n\n    tokio::spawn({\n        let stats = Arc::clone(&live_stats);\n        let start_work_barrier = Arc::clone(&start_work_barrier);\n        async move {\n            start_work_barrier.wait().await;\n            loop {\n                let start = std::time::Instant::now();\n                tokio::time::sleep(std::time::Duration::from_secs(1)).await;\n                let completed_requests = stats.completed_requests.swap(0, Ordering::Relaxed);\n                let elapsed = start.elapsed();\n                info!(\n                    \"RPS: {:.0}\",\n                    completed_requests as f64 / elapsed.as_secs_f64()\n                );\n            }\n        }\n    });\n\n    let mut work_senders = HashMap::new();\n    let mut tasks = Vec::new();\n    let scheme = match Url::parse(&args.page_service_connstring) {\n        Ok(url) => url.scheme().to_lowercase().to_string(),\n        Err(url::ParseError::RelativeUrlWithoutBase) => \"postgresql\".to_string(),\n        Err(err) => return Err(anyhow!(\"invalid connstring: {err}\")),\n    };\n    for &tl in &timelines {\n        let (sender, receiver) = tokio::sync::mpsc::channel(1); // TODO: not sure what the implications of this are\n        work_senders.insert(tl, sender);\n\n        let client: Box<dyn Client> = match scheme.as_str() {\n            \"postgresql\" | \"postgres\" => Box::new(\n                LibpqClient::new(&args.page_service_connstring, tl, !args.no_compression).await?,\n            ),\n            \"grpc\" => Box::new(\n                GrpcClient::new(&args.page_service_connstring, tl, !args.no_compression).await?,\n            ),\n            scheme => return Err(anyhow!(\"invalid scheme {scheme}\")),\n        };\n\n        tasks.push(tokio::spawn(run_worker(\n            client,\n            Arc::clone(&start_work_barrier),\n            receiver,\n            Arc::clone(&all_work_done_barrier),\n            Arc::clone(&live_stats),\n        )));\n    }\n\n    let work_sender = async move {\n        start_work_barrier.wait().await;\n        loop {\n            let (timeline, work) = {\n                let mut rng = rand::rng();\n                let target = all_targets.choose(&mut rng).unwrap();\n                let lsn = target.lsn_range.clone().map(|r| rng.random_range(r));\n                (target.timeline, Work { lsn })\n            };\n            let sender = work_senders.get(&timeline).unwrap();\n            // TODO: what if this blocks?\n            sender.send(work).await.ok().unwrap();\n        }\n    };\n\n    if let Some(runtime) = args.runtime {\n        match tokio::time::timeout(runtime.into(), work_sender).await {\n            Ok(()) => unreachable!(\"work sender never terminates\"),\n            Err(_timeout) => {\n                // this implicitly drops the work_senders, making all the clients exit\n            }\n        }\n    } else {\n        work_sender.await;\n        unreachable!(\"work sender never terminates\");\n    }\n\n    for t in tasks {\n        t.await.unwrap();\n    }\n\n    let output = Output {\n        total: {\n            let mut agg_stats = request_stats::Stats::new();\n            for stats in all_thread_local_stats.lock().unwrap().iter() {\n                let stats = stats.lock().unwrap();\n                agg_stats.add(&stats);\n            }\n            agg_stats.output()\n        },\n    };\n\n    let output = serde_json::to_string_pretty(&output).unwrap();\n    println!(\"{output}\");\n\n    anyhow::Ok(())\n}\n\n#[derive(Copy, Clone)]\nstruct Work {\n    lsn: Option<Lsn>,\n}\n\n#[instrument(skip_all)]\nasync fn run_worker(\n    mut client: Box<dyn Client>,\n    start_work_barrier: Arc<Barrier>,\n    mut work: tokio::sync::mpsc::Receiver<Work>,\n    all_work_done_barrier: Arc<Barrier>,\n    live_stats: Arc<LiveStats>,\n) {\n    start_work_barrier.wait().await;\n\n    while let Some(Work { lsn }) = work.recv().await {\n        let start = Instant::now();\n        let stream = client.basebackup(lsn).await.unwrap();\n\n        let size = futures::io::copy(stream.compat(), &mut tokio::io::sink().compat_write())\n            .await\n            .unwrap();\n        info!(\"basebackup size is {size} bytes\");\n        let elapsed = start.elapsed();\n        live_stats.inc();\n        STATS.with(|stats| {\n            stats.borrow().lock().unwrap().observe(elapsed).unwrap();\n        });\n    }\n\n    all_work_done_barrier.wait().await;\n}\n\n/// A basebackup client. This allows switching out the client protocol implementation.\n#[async_trait]\ntrait Client: Send {\n    async fn basebackup(\n        &mut self,\n        lsn: Option<Lsn>,\n    ) -> anyhow::Result<Pin<Box<dyn AsyncRead + Send>>>;\n}\n\n/// A libpq-based Pageserver client.\nstruct LibpqClient {\n    inner: pageserver_client::page_service::Client,\n    ttid: TenantTimelineId,\n    compression: bool,\n}\n\nimpl LibpqClient {\n    async fn new(\n        connstring: &str,\n        ttid: TenantTimelineId,\n        compression: bool,\n    ) -> anyhow::Result<Self> {\n        Ok(Self {\n            inner: pageserver_client::page_service::Client::new(connstring.to_string()).await?,\n            ttid,\n            compression,\n        })\n    }\n}\n\n#[async_trait]\nimpl Client for LibpqClient {\n    async fn basebackup(\n        &mut self,\n        lsn: Option<Lsn>,\n    ) -> anyhow::Result<Pin<Box<dyn AsyncRead + Send + 'static>>> {\n        let req = BasebackupRequest {\n            tenant_id: self.ttid.tenant_id,\n            timeline_id: self.ttid.timeline_id,\n            lsn,\n            gzip: self.compression,\n        };\n        let stream = self.inner.basebackup(&req).await?;\n        Ok(Box::pin(StreamReader::new(\n            stream.map_err(std::io::Error::other),\n        )))\n    }\n}\n\n/// A gRPC Pageserver client.\nstruct GrpcClient {\n    inner: page_api::Client,\n    compression: page_api::BaseBackupCompression,\n}\n\nimpl GrpcClient {\n    async fn new(\n        connstring: &str,\n        ttid: TenantTimelineId,\n        compression: bool,\n    ) -> anyhow::Result<Self> {\n        let inner = page_api::Client::connect(\n            connstring.to_string(),\n            ttid.tenant_id,\n            ttid.timeline_id,\n            ShardIndex::unsharded(),\n            None,\n            None, // NB: uses payload compression\n        )\n        .await?;\n        let compression = match compression {\n            true => page_api::BaseBackupCompression::Gzip,\n            false => page_api::BaseBackupCompression::None,\n        };\n        Ok(Self { inner, compression })\n    }\n}\n\n#[async_trait]\nimpl Client for GrpcClient {\n    async fn basebackup(\n        &mut self,\n        lsn: Option<Lsn>,\n    ) -> anyhow::Result<Pin<Box<dyn AsyncRead + Send + 'static>>> {\n        let req = page_api::GetBaseBackupRequest {\n            lsn,\n            replica: false,\n            full: false,\n            compression: self.compression,\n        };\n        Ok(Box::pin(self.inner.get_base_backup(req).await?))\n    }\n}\n"
  },
  {
    "path": "pageserver/pagebench/src/cmd/getpage_latest_lsn.rs",
    "content": "use std::collections::{HashMap, HashSet, VecDeque};\nuse std::future::Future;\nuse std::num::NonZeroUsize;\nuse std::pin::Pin;\nuse std::sync::atomic::{AtomicU64, Ordering};\nuse std::sync::{Arc, Mutex};\nuse std::time::{Duration, Instant};\n\nuse anyhow::Context;\nuse async_trait::async_trait;\nuse bytes::Bytes;\nuse camino::Utf8PathBuf;\nuse futures::stream::FuturesUnordered;\nuse futures::{Stream, StreamExt as _};\nuse pageserver_api::key::Key;\nuse pageserver_api::keyspace::KeySpaceAccum;\nuse pageserver_api::pagestream_api::{PagestreamGetPageRequest, PagestreamRequest};\nuse pageserver_api::reltag::RelTag;\nuse pageserver_api::shard::TenantShardId;\nuse pageserver_client_grpc::{self as client_grpc, ShardSpec};\nuse pageserver_page_api as page_api;\nuse rand::prelude::*;\nuse tokio::task::JoinSet;\nuse tokio_util::sync::CancellationToken;\nuse tracing::info;\nuse url::Url;\nuse utils::id::TenantTimelineId;\nuse utils::lsn::Lsn;\nuse utils::shard::ShardIndex;\n\nuse crate::util::tokio_thread_local_stats::AllThreadLocalStats;\nuse crate::util::{request_stats, tokio_thread_local_stats};\n\n/// GetPage@LatestLSN, uniformly distributed across the compute-accessible keyspace.\n#[derive(clap::Parser)]\npub(crate) struct Args {\n    #[clap(long, default_value = \"http://localhost:9898\")]\n    mgmt_api_endpoint: String,\n    /// Pageserver connection string. Supports postgresql:// and grpc:// protocols.\n    #[clap(long, default_value = \"postgres://postgres@localhost:64000\")]\n    page_service_connstring: String,\n    /// Use the rich gRPC Pageserver client `client_grpc::PageserverClient`, rather than the basic\n    /// no-frills `page_api::Client`. Only valid with grpc:// connstrings.\n    #[clap(long)]\n    rich_client: bool,\n    #[clap(long)]\n    pageserver_jwt: Option<String>,\n    #[clap(long, default_value = \"1\")]\n    num_clients: NonZeroUsize,\n    #[clap(long)]\n    runtime: Option<humantime::Duration>,\n    /// If true, enable compression (only for gRPC).\n    #[clap(long)]\n    compression: bool,\n    /// Each client sends requests at the given rate.\n    ///\n    /// If a request takes too long and we should be issuing a new request already,\n    /// we skip that request and account it as `MISSED`.\n    #[clap(long)]\n    per_client_rate: Option<usize>,\n    /// Probability for sending `latest=true` in the request (uniform distribution).\n    #[clap(long, default_value = \"1\")]\n    req_latest_probability: f64,\n    #[clap(long)]\n    limit_to_first_n_targets: Option<usize>,\n    /// For large pageserver installations, enumerating the keyspace takes a lot of time.\n    /// If specified, the specified path is used to maintain a cache of the keyspace enumeration result.\n    /// The cache is tagged and auto-invalided by the tenant/timeline ids only.\n    /// It doesn't get invalidated if the keyspace changes under the hood, e.g., due to new ingested data or compaction.\n    #[clap(long)]\n    keyspace_cache: Option<Utf8PathBuf>,\n    /// Before starting the benchmark, live-reconfigure the pageserver to use the given\n    /// [`pageserver_api::models::virtual_file::IoEngineKind`].\n    #[clap(long)]\n    set_io_engine: Option<pageserver_api::models::virtual_file::IoEngineKind>,\n\n    /// Before starting the benchmark, live-reconfigure the pageserver to use specified io mode (buffered vs. direct).\n    #[clap(long)]\n    set_io_mode: Option<pageserver_api::models::virtual_file::IoMode>,\n\n    /// Queue depth generated in each client.\n    #[clap(long, default_value = \"1\")]\n    queue_depth: NonZeroUsize,\n\n    /// Batch size of contiguous pages generated by each client. This is equivalent to how Postgres\n    /// will request page batches (e.g. prefetches or vectored reads). A batch counts as 1 RPS and\n    /// 1 queue depth.\n    ///\n    /// The libpq protocol does not support client-side batching, and will submit batches as many\n    /// individual requests, in the hope that the server will batch them. Each batch still counts as\n    /// 1 RPS and 1 queue depth.\n    #[clap(long, default_value = \"1\")]\n    batch_size: NonZeroUsize,\n\n    #[clap(long)]\n    only_relnode: Option<u32>,\n\n    targets: Option<Vec<TenantTimelineId>>,\n}\n\n/// State shared by all clients\n#[derive(Debug)]\nstruct SharedState {\n    start_work_barrier: tokio::sync::Barrier,\n    live_stats: LiveStats,\n}\n\n#[derive(Debug, Default)]\nstruct LiveStats {\n    completed_requests: AtomicU64,\n    missed: AtomicU64,\n}\n\nimpl LiveStats {\n    fn request_done(&self) {\n        self.completed_requests.fetch_add(1, Ordering::Relaxed);\n    }\n    fn missed(&self, n: u64) {\n        self.missed.fetch_add(n, Ordering::Relaxed);\n    }\n}\n\n#[derive(Clone, serde::Serialize, serde::Deserialize)]\nstruct KeyRange {\n    timeline: TenantTimelineId,\n    timeline_lsn: Lsn,\n    start: i128,\n    end: i128,\n}\n\nimpl KeyRange {\n    fn len(&self) -> i128 {\n        self.end - self.start\n    }\n}\n\n#[derive(PartialEq, Eq, Hash, Copy, Clone)]\nstruct WorkerId {\n    timeline: TenantTimelineId,\n    num_client: usize, // from 0..args.num_clients\n}\n\n#[derive(serde::Serialize)]\nstruct Output {\n    total: request_stats::Output,\n}\n\ntokio_thread_local_stats::declare!(STATS: request_stats::Stats);\n\npub(crate) fn main(args: Args) -> anyhow::Result<()> {\n    tokio_thread_local_stats::main!(STATS, move |thread_local_stats| {\n        main_impl(args, thread_local_stats)\n    })\n}\n\nasync fn main_impl(\n    args: Args,\n    all_thread_local_stats: AllThreadLocalStats<request_stats::Stats>,\n) -> anyhow::Result<()> {\n    let args: &'static Args = Box::leak(Box::new(args));\n\n    let mgmt_api_client = Arc::new(pageserver_client::mgmt_api::Client::new(\n        reqwest::Client::new(), // TODO: support ssl_ca_file for https APIs in pagebench.\n        args.mgmt_api_endpoint.clone(),\n        args.pageserver_jwt.as_deref(),\n    ));\n\n    if let Some(engine_str) = &args.set_io_engine {\n        mgmt_api_client.put_io_engine(engine_str).await?;\n    }\n\n    if let Some(mode) = &args.set_io_mode {\n        mgmt_api_client.put_io_mode(mode).await?;\n    }\n\n    // discover targets\n    let timelines: Vec<TenantTimelineId> = crate::util::cli::targets::discover(\n        &mgmt_api_client,\n        crate::util::cli::targets::Spec {\n            limit_to_first_n_targets: args.limit_to_first_n_targets,\n            targets: args.targets.clone(),\n        },\n    )\n    .await?;\n\n    #[derive(serde::Deserialize)]\n    struct KeyspaceCacheDe {\n        tag: Vec<TenantTimelineId>,\n        data: Vec<KeyRange>,\n    }\n    #[derive(serde::Serialize)]\n    struct KeyspaceCacheSer<'a> {\n        tag: &'a [TenantTimelineId],\n        data: &'a [KeyRange],\n    }\n    let cache = args\n        .keyspace_cache\n        .as_ref()\n        .map(|keyspace_cache_file| {\n            let contents = match std::fs::read(keyspace_cache_file) {\n                Err(e) if e.kind() == std::io::ErrorKind::NotFound => {\n                    return anyhow::Ok(None);\n                }\n                x => x.context(\"read keyspace cache file\")?,\n            };\n            let cache: KeyspaceCacheDe =\n                serde_json::from_slice(&contents).context(\"deserialize cache file\")?;\n            let tag_ok = HashSet::<TenantTimelineId>::from_iter(cache.tag.into_iter())\n                == HashSet::from_iter(timelines.iter().cloned());\n            info!(\"keyspace cache file matches tag: {tag_ok}\");\n            anyhow::Ok(if tag_ok { Some(cache.data) } else { None })\n        })\n        .transpose()?\n        .flatten();\n    let all_ranges: Vec<KeyRange> = if let Some(cached) = cache {\n        info!(\"using keyspace cache file\");\n        cached\n    } else {\n        let mut js = JoinSet::new();\n        for timeline in &timelines {\n            js.spawn({\n                let mgmt_api_client = Arc::clone(&mgmt_api_client);\n                let timeline = *timeline;\n                async move {\n                    let partitioning = mgmt_api_client\n                        .keyspace(\n                            TenantShardId::unsharded(timeline.tenant_id),\n                            timeline.timeline_id,\n                        )\n                        .await?;\n                    let lsn = partitioning.at_lsn;\n                    let start = Instant::now();\n                    let mut filtered = KeySpaceAccum::new();\n                    // let's hope this is inlined and vectorized...\n                    // TODO: turn this loop into a is_rel_block_range() function.\n                    for r in partitioning.keys.ranges.iter() {\n                        let mut i = r.start;\n                        while i != r.end {\n                            let mut include = true;\n                            include &= i.is_rel_block_key();\n                            if let Some(only_relnode) = args.only_relnode {\n                                include &= i.is_rel_block_of_rel(only_relnode);\n                            }\n                            if include {\n                                filtered.add_key(i);\n                            }\n                            i = i.next();\n                        }\n                    }\n                    let filtered = filtered.to_keyspace();\n                    let filter_duration = start.elapsed();\n\n                    anyhow::Ok((\n                        filter_duration,\n                        filtered.ranges.into_iter().map(move |r| KeyRange {\n                            timeline,\n                            timeline_lsn: lsn,\n                            start: r.start.to_i128(),\n                            end: r.end.to_i128(),\n                        }),\n                    ))\n                }\n            });\n        }\n        let mut total_filter_duration = Duration::from_secs(0);\n        let mut all_ranges: Vec<KeyRange> = Vec::new();\n        while let Some(res) = js.join_next().await {\n            let (filter_duration, range) = res.unwrap().unwrap();\n            all_ranges.extend(range);\n            total_filter_duration += filter_duration;\n        }\n        info!(\"filter duration: {}\", total_filter_duration.as_secs_f64());\n        if let Some(cachefile) = args.keyspace_cache.as_ref() {\n            let cache = KeyspaceCacheSer {\n                tag: &timelines,\n                data: &all_ranges,\n            };\n            let bytes = serde_json::to_vec(&cache).context(\"serialize keyspace for cache file\")?;\n            std::fs::write(cachefile, bytes).context(\"write keyspace cache file to disk\")?;\n            info!(\"successfully wrote keyspace cache file\");\n        }\n        all_ranges\n    };\n\n    let num_live_stats_dump = 1;\n    let num_work_sender_tasks = args.num_clients.get() * timelines.len();\n    let num_main_impl = 1;\n\n    let shared_state = Arc::new(SharedState {\n        start_work_barrier: tokio::sync::Barrier::new(\n            num_live_stats_dump + num_work_sender_tasks + num_main_impl,\n        ),\n        live_stats: LiveStats::default(),\n    });\n    let cancel = CancellationToken::new();\n\n    let ss = shared_state.clone();\n    tokio::spawn({\n        async move {\n            ss.start_work_barrier.wait().await;\n            loop {\n                let start = std::time::Instant::now();\n                tokio::time::sleep(std::time::Duration::from_secs(1)).await;\n                let stats = &ss.live_stats;\n                let completed_requests = stats.completed_requests.swap(0, Ordering::Relaxed);\n                let missed = stats.missed.swap(0, Ordering::Relaxed);\n                let elapsed = start.elapsed();\n                info!(\n                    \"RPS: {:.0}   MISSED: {:.0}\",\n                    completed_requests as f64 / elapsed.as_secs_f64(),\n                    missed as f64 / elapsed.as_secs_f64()\n                );\n            }\n        }\n    });\n\n    let rps_period = args\n        .per_client_rate\n        .map(|rps_limit| Duration::from_secs_f64(1.0 / (rps_limit as f64)));\n    let make_worker: &dyn Fn(WorkerId) -> Pin<Box<dyn Send + Future<Output = ()>>> = &|worker_id| {\n        let ss = shared_state.clone();\n        let cancel = cancel.clone();\n        let ranges: Vec<KeyRange> = all_ranges\n            .iter()\n            .filter(|r| r.timeline == worker_id.timeline)\n            .cloned()\n            .collect();\n        let weights =\n            rand::distr::weighted::WeightedIndex::new(ranges.iter().map(|v| v.len())).unwrap();\n\n        Box::pin(async move {\n            let scheme = match Url::parse(&args.page_service_connstring) {\n                Ok(url) => url.scheme().to_lowercase().to_string(),\n                Err(url::ParseError::RelativeUrlWithoutBase) => \"postgresql\".to_string(),\n                Err(err) => panic!(\"invalid connstring: {err}\"),\n            };\n            let client: Box<dyn Client> = match scheme.as_str() {\n                \"postgresql\" | \"postgres\" => {\n                    assert!(!args.compression, \"libpq does not support compression\");\n                    assert!(!args.rich_client, \"rich client requires grpc://\");\n                    Box::new(\n                        LibpqClient::new(&args.page_service_connstring, worker_id.timeline)\n                            .await\n                            .unwrap(),\n                    )\n                }\n\n                \"grpc\" if args.rich_client => Box::new(\n                    RichGrpcClient::new(\n                        &args.page_service_connstring,\n                        worker_id.timeline,\n                        args.compression,\n                    )\n                    .await\n                    .unwrap(),\n                ),\n\n                \"grpc\" => Box::new(\n                    GrpcClient::new(\n                        &args.page_service_connstring,\n                        worker_id.timeline,\n                        args.compression,\n                    )\n                    .await\n                    .unwrap(),\n                ),\n\n                scheme => panic!(\"unsupported scheme {scheme}\"),\n            };\n            run_worker(args, client, ss, cancel, rps_period, ranges, weights).await\n        })\n    };\n\n    info!(\"spawning workers\");\n    let mut workers = JoinSet::new();\n    for timeline in timelines.iter().cloned() {\n        for num_client in 0..args.num_clients.get() {\n            let worker_id = WorkerId {\n                timeline,\n                num_client,\n            };\n            workers.spawn(make_worker(worker_id));\n        }\n    }\n    let workers = async move {\n        while let Some(res) = workers.join_next().await {\n            res.unwrap();\n        }\n    };\n\n    info!(\"waiting for everything to become ready\");\n    shared_state.start_work_barrier.wait().await;\n    info!(\"work started\");\n    if let Some(runtime) = args.runtime {\n        tokio::time::sleep(runtime.into()).await;\n        info!(\"runtime over, signalling cancellation\");\n        cancel.cancel();\n        workers.await;\n        info!(\"work sender exited\");\n    } else {\n        workers.await;\n        unreachable!(\"work sender never terminates\");\n    }\n\n    let output = Output {\n        total: {\n            let mut agg_stats = request_stats::Stats::new();\n            for stats in all_thread_local_stats.lock().unwrap().iter() {\n                let stats = stats.lock().unwrap();\n                agg_stats.add(&stats);\n            }\n            agg_stats.output()\n        },\n    };\n\n    let output = serde_json::to_string_pretty(&output).unwrap();\n    println!(\"{output}\");\n\n    anyhow::Ok(())\n}\n\nasync fn run_worker(\n    args: &Args,\n    mut client: Box<dyn Client>,\n    shared_state: Arc<SharedState>,\n    cancel: CancellationToken,\n    rps_period: Option<Duration>,\n    ranges: Vec<KeyRange>,\n    weights: rand::distr::weighted::WeightedIndex<i128>,\n) {\n    shared_state.start_work_barrier.wait().await;\n    let client_start = Instant::now();\n    let mut ticks_processed = 0;\n    let mut req_id = 0;\n    let batch_size: usize = args.batch_size.into();\n\n    // Track inflight requests by request ID and start time. This times the request duration, and\n    // ensures responses match requests. We don't expect responses back in any particular order.\n    //\n    // NB: this does not check that all requests received a response, because we don't wait for the\n    // inflight requests to complete when the duration elapses.\n    let mut inflight: HashMap<u64, Instant> = HashMap::new();\n\n    while !cancel.is_cancelled() {\n        // Detect if a request took longer than the RPS rate\n        if let Some(period) = &rps_period {\n            let periods_passed_until_now =\n                usize::try_from(client_start.elapsed().as_micros() / period.as_micros()).unwrap();\n\n            if periods_passed_until_now > ticks_processed {\n                shared_state\n                    .live_stats\n                    .missed((periods_passed_until_now - ticks_processed) as u64);\n            }\n            ticks_processed = periods_passed_until_now;\n        }\n\n        while inflight.len() < args.queue_depth.get() {\n            req_id += 1;\n            let start = Instant::now();\n            let (req_lsn, mod_lsn, rel, blks) = {\n                /// Converts a compact i128 key to a relation tag and block number.\n                fn key_to_block(key: i128) -> (RelTag, u32) {\n                    let key = Key::from_i128(key);\n                    assert!(key.is_rel_block_key());\n                    key.to_rel_block()\n                        .expect(\"we filter non-rel-block keys out above\")\n                }\n\n                // Pick a random page from a random relation.\n                let mut rng = rand::rng();\n                let r = &ranges[weights.sample(&mut rng)];\n                let key: i128 = rng.random_range(r.start..r.end);\n                let (rel_tag, block_no) = key_to_block(key);\n\n                let mut blks = VecDeque::with_capacity(batch_size);\n                blks.push_back(block_no);\n\n                // If requested, populate a batch of sequential pages. This is how Postgres will\n                // request page batches (e.g. prefetches). If we hit the end of the relation, we\n                // grow the batch towards the start too.\n                for i in 1..batch_size {\n                    let (r, b) = key_to_block(key + i as i128);\n                    if r != rel_tag {\n                        break; // went outside relation\n                    }\n                    blks.push_back(b)\n                }\n\n                if blks.len() < batch_size {\n                    // Grow batch backwards if needed.\n                    for i in 1..batch_size {\n                        let (r, b) = key_to_block(key - i as i128);\n                        if r != rel_tag {\n                            break; // went outside relation\n                        }\n                        blks.push_front(b)\n                    }\n                }\n\n                // We assume that the entire batch can fit within the relation.\n                assert_eq!(blks.len(), batch_size, \"incomplete batch\");\n\n                let req_lsn = if rng.random_bool(args.req_latest_probability) {\n                    Lsn::MAX\n                } else {\n                    r.timeline_lsn\n                };\n                (req_lsn, r.timeline_lsn, rel_tag, blks.into())\n            };\n            client\n                .send_get_page(req_id, req_lsn, mod_lsn, rel, blks)\n                .await\n                .unwrap();\n            let old = inflight.insert(req_id, start);\n            assert!(old.is_none(), \"duplicate request ID {req_id}\");\n        }\n\n        let (req_id, pages) = client.recv_get_page().await.unwrap();\n        assert_eq!(pages.len(), batch_size, \"unexpected page count\");\n        assert!(pages.iter().all(|p| !p.is_empty()), \"empty page\");\n        let start = inflight\n            .remove(&req_id)\n            .expect(\"response for unknown request ID\");\n        let end = Instant::now();\n        shared_state.live_stats.request_done();\n        ticks_processed += 1;\n        STATS.with(|stats| {\n            stats\n                .borrow()\n                .lock()\n                .unwrap()\n                .observe(end.duration_since(start))\n                .unwrap();\n        });\n\n        if let Some(period) = &rps_period {\n            let next_at = client_start\n                + Duration::from_micros(\n                    (ticks_processed) as u64 * u64::try_from(period.as_micros()).unwrap(),\n                );\n            tokio::time::sleep_until(next_at.into()).await;\n        }\n    }\n}\n\n/// A benchmark client, to allow switching out the transport protocol.\n///\n/// For simplicity, this just uses separate asynchronous send/recv methods. The send method could\n/// return a future that resolves when the response is received, but we don't really need it.\n#[async_trait]\ntrait Client: Send {\n    /// Sends an asynchronous GetPage request to the pageserver.\n    async fn send_get_page(\n        &mut self,\n        req_id: u64,\n        req_lsn: Lsn,\n        mod_lsn: Lsn,\n        rel: RelTag,\n        blks: Vec<u32>,\n    ) -> anyhow::Result<()>;\n\n    /// Receives the next GetPage response from the pageserver.\n    async fn recv_get_page(&mut self) -> anyhow::Result<(u64, Vec<Bytes>)>;\n}\n\n/// A libpq-based Pageserver client.\nstruct LibpqClient {\n    inner: pageserver_client::page_service::PagestreamClient,\n    // Track sent batches, so we know how many responses to expect.\n    batch_sizes: VecDeque<usize>,\n}\n\nimpl LibpqClient {\n    async fn new(connstring: &str, ttid: TenantTimelineId) -> anyhow::Result<Self> {\n        let inner = pageserver_client::page_service::Client::new(connstring.to_string())\n            .await?\n            .pagestream(ttid.tenant_id, ttid.timeline_id)\n            .await?;\n        Ok(Self {\n            inner,\n            batch_sizes: VecDeque::new(),\n        })\n    }\n}\n\n#[async_trait]\nimpl Client for LibpqClient {\n    async fn send_get_page(\n        &mut self,\n        req_id: u64,\n        req_lsn: Lsn,\n        mod_lsn: Lsn,\n        rel: RelTag,\n        blks: Vec<u32>,\n    ) -> anyhow::Result<()> {\n        // libpq doesn't support client-side batches, so we send a bunch of individual requests\n        // instead in the hope that the server will batch them for us. We use the same request ID\n        // for all, because we'll return a single batch response.\n        self.batch_sizes.push_back(blks.len());\n        for blkno in blks {\n            let req = PagestreamGetPageRequest {\n                hdr: PagestreamRequest {\n                    reqid: req_id,\n                    request_lsn: req_lsn,\n                    not_modified_since: mod_lsn,\n                },\n                rel,\n                blkno,\n            };\n            self.inner.getpage_send(req).await?;\n        }\n        Ok(())\n    }\n\n    async fn recv_get_page(&mut self) -> anyhow::Result<(u64, Vec<Bytes>)> {\n        let batch_size = self.batch_sizes.pop_front().unwrap();\n        let mut batch = Vec::with_capacity(batch_size);\n        let mut req_id = None;\n        for _ in 0..batch_size {\n            let resp = self.inner.getpage_recv().await?;\n            if req_id.is_none() {\n                req_id = Some(resp.req.hdr.reqid);\n            }\n            assert_eq!(req_id, Some(resp.req.hdr.reqid), \"request ID mismatch\");\n            batch.push(resp.page);\n        }\n        Ok((req_id.unwrap(), batch))\n    }\n}\n\n/// A gRPC Pageserver client.\nstruct GrpcClient {\n    req_tx: tokio::sync::mpsc::Sender<page_api::GetPageRequest>,\n    resp_rx: Pin<Box<dyn Stream<Item = Result<page_api::GetPageResponse, tonic::Status>> + Send>>,\n}\n\nimpl GrpcClient {\n    async fn new(\n        connstring: &str,\n        ttid: TenantTimelineId,\n        compression: bool,\n    ) -> anyhow::Result<Self> {\n        let mut client = page_api::Client::connect(\n            connstring.to_string(),\n            ttid.tenant_id,\n            ttid.timeline_id,\n            ShardIndex::unsharded(),\n            None,\n            compression.then_some(tonic::codec::CompressionEncoding::Zstd),\n        )\n        .await?;\n\n        // The channel has a buffer size of 1, since 0 is not allowed. It does not matter, since the\n        // benchmark will control the queue depth (i.e. in-flight requests) anyway, and requests are\n        // buffered by Tonic and the OS too.\n        let (req_tx, req_rx) = tokio::sync::mpsc::channel(1);\n        let req_stream = tokio_stream::wrappers::ReceiverStream::new(req_rx);\n        let resp_rx = Box::pin(client.get_pages(req_stream).await?);\n\n        Ok(Self { req_tx, resp_rx })\n    }\n}\n\n#[async_trait]\nimpl Client for GrpcClient {\n    async fn send_get_page(\n        &mut self,\n        req_id: u64,\n        req_lsn: Lsn,\n        mod_lsn: Lsn,\n        rel: RelTag,\n        blks: Vec<u32>,\n    ) -> anyhow::Result<()> {\n        let req = page_api::GetPageRequest {\n            request_id: req_id.into(),\n            request_class: page_api::GetPageClass::Normal,\n            read_lsn: page_api::ReadLsn {\n                request_lsn: req_lsn,\n                not_modified_since_lsn: Some(mod_lsn),\n            },\n            rel,\n            block_numbers: blks,\n        };\n        self.req_tx.send(req).await?;\n        Ok(())\n    }\n\n    async fn recv_get_page(&mut self) -> anyhow::Result<(u64, Vec<Bytes>)> {\n        let resp = self.resp_rx.next().await.unwrap().unwrap();\n        anyhow::ensure!(\n            resp.status_code == page_api::GetPageStatusCode::Ok,\n            \"unexpected status code: {}\",\n            resp.status_code,\n        );\n        Ok((\n            resp.request_id.id,\n            resp.pages.into_iter().map(|p| p.image).collect(),\n        ))\n    }\n}\n\n/// A rich gRPC Pageserver client.\nstruct RichGrpcClient {\n    inner: Arc<client_grpc::PageserverClient>,\n    requests: FuturesUnordered<\n        Pin<Box<dyn Future<Output = anyhow::Result<page_api::GetPageResponse>> + Send>>,\n    >,\n}\n\nimpl RichGrpcClient {\n    async fn new(\n        connstring: &str,\n        ttid: TenantTimelineId,\n        compression: bool,\n    ) -> anyhow::Result<Self> {\n        let inner = Arc::new(client_grpc::PageserverClient::new(\n            ttid.tenant_id,\n            ttid.timeline_id,\n            ShardSpec::new(\n                [(ShardIndex::unsharded(), connstring.to_string())].into(),\n                None,\n            )?,\n            None,\n            compression.then_some(tonic::codec::CompressionEncoding::Zstd),\n        )?);\n        Ok(Self {\n            inner,\n            requests: FuturesUnordered::new(),\n        })\n    }\n}\n\n#[async_trait]\nimpl Client for RichGrpcClient {\n    async fn send_get_page(\n        &mut self,\n        req_id: u64,\n        req_lsn: Lsn,\n        mod_lsn: Lsn,\n        rel: RelTag,\n        blks: Vec<u32>,\n    ) -> anyhow::Result<()> {\n        let req = page_api::GetPageRequest {\n            request_id: req_id.into(),\n            request_class: page_api::GetPageClass::Normal,\n            read_lsn: page_api::ReadLsn {\n                request_lsn: req_lsn,\n                not_modified_since_lsn: Some(mod_lsn),\n            },\n            rel,\n            block_numbers: blks,\n        };\n        let inner = self.inner.clone();\n        self.requests.push(Box::pin(async move {\n            inner\n                .get_page(req)\n                .await\n                .map_err(|err| anyhow::anyhow!(\"{err}\"))\n        }));\n        Ok(())\n    }\n\n    async fn recv_get_page(&mut self) -> anyhow::Result<(u64, Vec<Bytes>)> {\n        let resp = self.requests.next().await.unwrap()?;\n        Ok((\n            resp.request_id.id,\n            resp.pages.into_iter().map(|p| p.image).collect(),\n        ))\n    }\n}\n"
  },
  {
    "path": "pageserver/pagebench/src/cmd/idle_streams.rs",
    "content": "use std::sync::Arc;\n\nuse anyhow::anyhow;\nuse futures::StreamExt;\nuse tonic::transport::Endpoint;\nuse tracing::info;\n\nuse pageserver_page_api::{GetPageClass, GetPageRequest, GetPageStatusCode, ReadLsn, RelTag};\nuse utils::id::TenantTimelineId;\nuse utils::lsn::Lsn;\nuse utils::shard::ShardIndex;\n\n/// Starts a large number of idle gRPC GetPage streams.\n#[derive(clap::Parser)]\npub(crate) struct Args {\n    /// The Pageserver to connect to. Must use grpc://.\n    #[clap(long, default_value = \"grpc://localhost:51051\")]\n    server: String,\n    /// The Pageserver HTTP API.\n    #[clap(long, default_value = \"http://localhost:9898\")]\n    http_server: String,\n    /// The number of streams to open.\n    #[clap(long, default_value = \"100000\")]\n    count: usize,\n    /// Number of streams per connection.\n    #[clap(long, default_value = \"100\")]\n    per_connection: usize,\n    /// Send a single GetPage request on each stream.\n    #[clap(long, default_value_t = false)]\n    send_request: bool,\n}\n\npub(crate) fn main(args: Args) -> anyhow::Result<()> {\n    let rt = tokio::runtime::Builder::new_multi_thread()\n        .enable_all()\n        .build()?;\n\n    rt.block_on(main_impl(args))\n}\n\nasync fn main_impl(args: Args) -> anyhow::Result<()> {\n    // Discover a tenant and timeline to use.\n    let mgmt_api_client = Arc::new(pageserver_client::mgmt_api::Client::new(\n        reqwest::Client::new(),\n        args.http_server.clone(),\n        None,\n    ));\n    let timelines: Vec<TenantTimelineId> = crate::util::cli::targets::discover(\n        &mgmt_api_client,\n        crate::util::cli::targets::Spec {\n            limit_to_first_n_targets: Some(1),\n            targets: None,\n        },\n    )\n    .await?;\n    let ttid = timelines\n        .first()\n        .ok_or_else(|| anyhow!(\"no timelines found\"))?;\n\n    // Set up the initial client.\n    let endpoint = Endpoint::from_shared(args.server.clone())?;\n\n    let connect = async || {\n        pageserver_page_api::Client::new(\n            endpoint.connect().await?,\n            ttid.tenant_id,\n            ttid.timeline_id,\n            ShardIndex::unsharded(),\n            None,\n            None,\n        )\n    };\n\n    let mut client = connect().await?;\n    let mut streams = Vec::with_capacity(args.count);\n\n    // Create streams.\n    for i in 0..args.count {\n        if i % 100 == 0 {\n            info!(\"opened {}/{} streams\", i, args.count);\n        }\n        if i % args.per_connection == 0 && i > 0 {\n            client = connect().await?;\n        }\n\n        let (req_tx, req_rx) = tokio::sync::mpsc::unbounded_channel();\n        let req_stream = tokio_stream::wrappers::UnboundedReceiverStream::new(req_rx);\n        let mut resp_stream = client.get_pages(req_stream).await?;\n\n        // Send request if specified.\n        if args.send_request {\n            req_tx.send(GetPageRequest {\n                request_id: 1.into(),\n                request_class: GetPageClass::Normal,\n                read_lsn: ReadLsn {\n                    request_lsn: Lsn::MAX,\n                    not_modified_since_lsn: Some(Lsn(1)),\n                },\n                rel: RelTag {\n                    spcnode: 1664, // pg_global\n                    dbnode: 0,     // shared database\n                    relnode: 1262, // pg_authid\n                    forknum: 0,    // init\n                },\n                block_numbers: vec![0],\n            })?;\n            let resp = resp_stream\n                .next()\n                .await\n                .transpose()?\n                .ok_or_else(|| anyhow!(\"no response\"))?;\n            if resp.status_code != GetPageStatusCode::Ok {\n                return Err(anyhow!(\"{} response\", resp.status_code));\n            }\n        }\n\n        // Hold onto streams to avoid closing them.\n        streams.push((req_tx, resp_stream));\n    }\n\n    info!(\"opened {} streams, sleeping\", args.count);\n\n    // Block forever, to hold the idle streams open for inspection.\n    futures::future::pending::<()>().await;\n\n    Ok(())\n}\n"
  },
  {
    "path": "pageserver/pagebench/src/cmd/ondemand_download_churn.rs",
    "content": "use std::f64;\nuse std::num::NonZeroUsize;\nuse std::sync::Arc;\nuse std::sync::atomic::{AtomicU64, Ordering};\nuse std::time::{Duration, Instant};\n\nuse pageserver_api::models::HistoricLayerInfo;\nuse pageserver_api::shard::TenantShardId;\nuse pageserver_client::mgmt_api;\nuse rand::seq::IndexedMutRandom;\nuse tokio::sync::{OwnedSemaphorePermit, mpsc};\nuse tokio::task::JoinSet;\nuse tokio_util::sync::CancellationToken;\nuse tracing::{debug, info};\nuse utils::id::{TenantTimelineId, TimelineId};\n\n/// Evict & on-demand download random layers.\n#[derive(clap::Parser)]\npub(crate) struct Args {\n    #[clap(long, default_value = \"http://localhost:9898\")]\n    mgmt_api_endpoint: String,\n    #[clap(long)]\n    pageserver_jwt: Option<String>,\n    #[clap(long)]\n    runtime: Option<humantime::Duration>,\n    #[clap(long, default_value = \"1\")]\n    tasks_per_target: NonZeroUsize,\n    #[clap(long, default_value = \"1\")]\n    concurrency_per_target: NonZeroUsize,\n    /// Probability for sending `latest=true` in the request (uniform distribution).\n    #[clap(long)]\n    limit_to_first_n_targets: Option<usize>,\n    /// Before starting the benchmark, live-reconfigure the pageserver to use the given\n    /// [`pageserver_api::models::virtual_file::IoEngineKind`].\n    #[clap(long)]\n    set_io_engine: Option<pageserver_api::models::virtual_file::IoEngineKind>,\n    targets: Option<Vec<TenantTimelineId>>,\n}\n\npub(crate) fn main(args: Args) -> anyhow::Result<()> {\n    let rt = tokio::runtime::Builder::new_multi_thread()\n        .enable_all()\n        .build()?;\n    let task = rt.spawn(main_impl(args));\n    rt.block_on(task).unwrap().unwrap();\n    Ok(())\n}\n\n#[derive(serde::Serialize)]\nstruct Output {\n    downloads_count: u64,\n    downloads_bytes: u64,\n    evictions_count: u64,\n    timeline_restarts: u64,\n    #[serde(with = \"humantime_serde\")]\n    runtime: Duration,\n}\n\n#[derive(Debug, Default)]\nstruct LiveStats {\n    evictions_count: AtomicU64,\n    downloads_count: AtomicU64,\n    downloads_bytes: AtomicU64,\n    timeline_restarts: AtomicU64,\n}\n\nimpl LiveStats {\n    fn eviction_done(&self) {\n        self.evictions_count.fetch_add(1, Ordering::Relaxed);\n    }\n    fn download_done(&self, size: u64) {\n        self.downloads_count.fetch_add(1, Ordering::Relaxed);\n        self.downloads_bytes.fetch_add(size, Ordering::Relaxed);\n    }\n    fn timeline_restart_done(&self) {\n        self.timeline_restarts.fetch_add(1, Ordering::Relaxed);\n    }\n}\n\nasync fn main_impl(args: Args) -> anyhow::Result<()> {\n    let args: &'static Args = Box::leak(Box::new(args));\n\n    let mgmt_api_client = Arc::new(pageserver_client::mgmt_api::Client::new(\n        reqwest::Client::new(), // TODO: support ssl_ca_file for https APIs in pagebench.\n        args.mgmt_api_endpoint.clone(),\n        args.pageserver_jwt.as_deref(),\n    ));\n\n    if let Some(engine_str) = &args.set_io_engine {\n        mgmt_api_client.put_io_engine(engine_str).await?;\n    }\n\n    // discover targets\n    let timelines: Vec<TenantTimelineId> = crate::util::cli::targets::discover(\n        &mgmt_api_client,\n        crate::util::cli::targets::Spec {\n            limit_to_first_n_targets: args.limit_to_first_n_targets,\n            targets: args.targets.clone(),\n        },\n    )\n    .await?;\n\n    let token = CancellationToken::new();\n    let mut tasks = JoinSet::new();\n\n    let periodic_stats = Arc::new(LiveStats::default());\n    let total_stats = Arc::new(LiveStats::default());\n\n    let start = Instant::now();\n    tasks.spawn({\n        let periodic_stats = Arc::clone(&periodic_stats);\n        let total_stats = Arc::clone(&total_stats);\n        let cloned_token = token.clone();\n        async move {\n            let mut last_at = Instant::now();\n            loop {\n                if cloned_token.is_cancelled() {\n                    return;\n                }\n                tokio::time::sleep_until((last_at + Duration::from_secs(1)).into()).await;\n                let now = Instant::now();\n                let delta: Duration = now - last_at;\n                last_at = now;\n\n                let LiveStats {\n                    evictions_count,\n                    downloads_count,\n                    downloads_bytes,\n                    timeline_restarts,\n                } = &*periodic_stats;\n                let evictions_count = evictions_count.swap(0, Ordering::Relaxed);\n                let downloads_count = downloads_count.swap(0, Ordering::Relaxed);\n                let downloads_bytes = downloads_bytes.swap(0, Ordering::Relaxed);\n                let timeline_restarts = timeline_restarts.swap(0, Ordering::Relaxed);\n\n                total_stats.evictions_count.fetch_add(evictions_count, Ordering::Relaxed);\n                total_stats.downloads_count.fetch_add(downloads_count, Ordering::Relaxed);\n                total_stats.downloads_bytes.fetch_add(downloads_bytes, Ordering::Relaxed);\n                total_stats.timeline_restarts.fetch_add(timeline_restarts, Ordering::Relaxed);\n\n                let evictions_per_s = evictions_count as f64 / delta.as_secs_f64();\n                let downloads_per_s = downloads_count as f64 / delta.as_secs_f64();\n                let downloads_mibs_per_s = downloads_bytes as f64 / delta.as_secs_f64() / ((1 << 20) as f64);\n\n                info!(\"evictions={evictions_per_s:.2}/s downloads={downloads_per_s:.2}/s download_bytes={downloads_mibs_per_s:.2}MiB/s timeline_restarts={timeline_restarts}\");\n            }\n        }\n    });\n\n    for tl in timelines {\n        for _ in 0..args.tasks_per_target.get() {\n            tasks.spawn(timeline_actor(\n                args,\n                Arc::clone(&mgmt_api_client),\n                tl,\n                Arc::clone(&periodic_stats),\n                token.clone(),\n            ));\n        }\n    }\n    if let Some(runtime) = args.runtime {\n        tokio::spawn(async move {\n            tokio::time::sleep(runtime.into()).await;\n            token.cancel();\n        });\n    }\n\n    while let Some(res) = tasks.join_next().await {\n        res.unwrap();\n    }\n    let end = Instant::now();\n    let duration: Duration = end - start;\n\n    let output = {\n        let LiveStats {\n            evictions_count,\n            downloads_count,\n            downloads_bytes,\n            timeline_restarts,\n        } = &*total_stats;\n        Output {\n            downloads_count: downloads_count.load(Ordering::Relaxed),\n            downloads_bytes: downloads_bytes.load(Ordering::Relaxed),\n            evictions_count: evictions_count.load(Ordering::Relaxed),\n            timeline_restarts: timeline_restarts.load(Ordering::Relaxed),\n            runtime: duration,\n        }\n    };\n    let output = serde_json::to_string_pretty(&output).unwrap();\n    println!(\"{output}\");\n\n    Ok(())\n}\n\nasync fn timeline_actor(\n    args: &'static Args,\n    mgmt_api_client: Arc<pageserver_client::mgmt_api::Client>,\n    timeline: TenantTimelineId,\n    live_stats: Arc<LiveStats>,\n    token: CancellationToken,\n) {\n    // TODO: support sharding\n    let tenant_shard_id = TenantShardId::unsharded(timeline.tenant_id);\n\n    struct Timeline {\n        joinset: JoinSet<()>,\n        layers: Vec<mpsc::Sender<OwnedSemaphorePermit>>,\n        concurrency: Arc<tokio::sync::Semaphore>,\n    }\n    while !token.is_cancelled() {\n        debug!(\"restarting timeline\");\n        let layer_map_info = mgmt_api_client\n            .layer_map_info(tenant_shard_id, timeline.timeline_id)\n            .await\n            .unwrap();\n        let concurrency = Arc::new(tokio::sync::Semaphore::new(\n            args.concurrency_per_target.get(),\n        ));\n\n        let mut joinset = JoinSet::new();\n        let layers = layer_map_info\n            .historic_layers\n            .into_iter()\n            .map(|historic_layer| {\n                let (tx, rx) = mpsc::channel(1);\n                joinset.spawn(layer_actor(\n                    tenant_shard_id,\n                    timeline.timeline_id,\n                    historic_layer,\n                    rx,\n                    Arc::clone(&mgmt_api_client),\n                    Arc::clone(&live_stats),\n                ));\n                tx\n            })\n            .collect::<Vec<_>>();\n\n        let mut timeline = Timeline {\n            joinset,\n            layers,\n            concurrency,\n        };\n\n        live_stats.timeline_restart_done();\n\n        while !token.is_cancelled() {\n            assert!(!timeline.joinset.is_empty());\n            if let Some(res) = timeline.joinset.try_join_next() {\n                debug!(?res, \"a layer actor exited, should not happen\");\n                timeline.joinset.shutdown().await;\n                break;\n            }\n\n            let mut permit = Some(\n                Arc::clone(&timeline.concurrency)\n                    .acquire_owned()\n                    .await\n                    .unwrap(),\n            );\n\n            loop {\n                let layer_tx = {\n                    let mut rng = rand::rng();\n                    timeline.layers.choose_mut(&mut rng).expect(\"no layers\")\n                };\n                match layer_tx.try_send(permit.take().unwrap()) {\n                    Ok(_) => break,\n                    Err(e) => match e {\n                        mpsc::error::TrySendError::Full(back) => {\n                            // TODO: retrying introduces bias away from slow downloaders\n                            permit.replace(back);\n                        }\n                        mpsc::error::TrySendError::Closed(_) => panic!(),\n                    },\n                }\n            }\n        }\n    }\n}\n\nasync fn layer_actor(\n    tenant_shard_id: TenantShardId,\n    timeline_id: TimelineId,\n    mut layer: HistoricLayerInfo,\n    mut rx: mpsc::Receiver<tokio::sync::OwnedSemaphorePermit>,\n    mgmt_api_client: Arc<mgmt_api::Client>,\n    live_stats: Arc<LiveStats>,\n) {\n    #[derive(Clone, Copy)]\n    enum Action {\n        Evict,\n        OnDemandDownload,\n    }\n\n    while let Some(_permit) = rx.recv().await {\n        let action = if layer.is_remote() {\n            Action::OnDemandDownload\n        } else {\n            Action::Evict\n        };\n\n        let did_it = match action {\n            Action::Evict => {\n                let did_it = mgmt_api_client\n                    .layer_evict(tenant_shard_id, timeline_id, layer.layer_file_name())\n                    .await\n                    .unwrap();\n                live_stats.eviction_done();\n                did_it\n            }\n            Action::OnDemandDownload => {\n                let did_it = mgmt_api_client\n                    .layer_ondemand_download(tenant_shard_id, timeline_id, layer.layer_file_name())\n                    .await\n                    .unwrap();\n                live_stats.download_done(layer.layer_file_size());\n                did_it\n            }\n        };\n        if !did_it {\n            debug!(\"local copy of layer map appears out of sync, re-downloading\");\n            return;\n        }\n        debug!(\"did it\");\n        layer.set_remote(match action {\n            Action::Evict => true,\n            Action::OnDemandDownload => false,\n        });\n    }\n}\n"
  },
  {
    "path": "pageserver/pagebench/src/cmd/trigger_initial_size_calculation.rs",
    "content": "use std::sync::Arc;\n\nuse humantime::Duration;\nuse pageserver_api::shard::TenantShardId;\nuse pageserver_client::mgmt_api::ForceAwaitLogicalSize;\nuse tokio::task::JoinSet;\nuse utils::id::TenantTimelineId;\n\n#[derive(clap::Parser)]\npub(crate) struct Args {\n    #[clap(long, default_value = \"http://localhost:9898\")]\n    mgmt_api_endpoint: String,\n    #[clap(long, default_value = \"localhost:64000\")]\n    page_service_host_port: String,\n    #[clap(long)]\n    pageserver_jwt: Option<String>,\n    #[clap(\n        long,\n        help = \"if specified, poll mgmt api to check whether init logical size calculation has completed\"\n    )]\n    poll_for_completion: Option<Duration>,\n    #[clap(long)]\n    limit_to_first_n_targets: Option<usize>,\n    targets: Option<Vec<TenantTimelineId>>,\n}\n\npub(crate) fn main(args: Args) -> anyhow::Result<()> {\n    let rt = tokio::runtime::Builder::new_multi_thread()\n        .enable_all()\n        .build()\n        .unwrap();\n\n    let main_task = rt.spawn(main_impl(args));\n    rt.block_on(main_task).unwrap()\n}\n\nasync fn main_impl(args: Args) -> anyhow::Result<()> {\n    let args: &'static Args = Box::leak(Box::new(args));\n\n    let mgmt_api_client = Arc::new(pageserver_client::mgmt_api::Client::new(\n        reqwest::Client::new(), // TODO: support ssl_ca_file for https APIs in pagebench.\n        args.mgmt_api_endpoint.clone(),\n        args.pageserver_jwt.as_deref(),\n    ));\n\n    // discover targets\n    let timelines: Vec<TenantTimelineId> = crate::util::cli::targets::discover(\n        &mgmt_api_client,\n        crate::util::cli::targets::Spec {\n            limit_to_first_n_targets: args.limit_to_first_n_targets,\n            targets: args.targets.clone(),\n        },\n    )\n    .await?;\n\n    // kick it off\n\n    let mut js = JoinSet::new();\n    for tl in timelines {\n        let mgmt_api_client = Arc::clone(&mgmt_api_client);\n        js.spawn(async move {\n            let info = mgmt_api_client\n                .timeline_info(\n                    TenantShardId::unsharded(tl.tenant_id),\n                    tl.timeline_id,\n                    ForceAwaitLogicalSize::Yes,\n                )\n                .await\n                .unwrap();\n\n            // Polling should not be strictly required here since we await\n            // for the initial logical size, however it's possible for the request\n            // to land before the timeline is initialised. This results in an approximate\n            // logical size.\n            if let Some(period) = args.poll_for_completion {\n                let mut ticker = tokio::time::interval(period.into());\n                ticker.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Delay);\n                let mut info = info;\n                while !info.current_logical_size_is_accurate {\n                    ticker.tick().await;\n                    info = mgmt_api_client\n                        .timeline_info(\n                            TenantShardId::unsharded(tl.tenant_id),\n                            tl.timeline_id,\n                            ForceAwaitLogicalSize::Yes,\n                        )\n                        .await\n                        .unwrap();\n                }\n            }\n        });\n    }\n    while let Some(res) = js.join_next().await {\n        let _: () = res.unwrap();\n    }\n    Ok(())\n}\n"
  },
  {
    "path": "pageserver/pagebench/src/main.rs",
    "content": "use std::fs::File;\n\nuse clap::Parser;\nuse tracing::info;\nuse utils::logging;\n\n/// Re-usable pieces of code that aren't CLI-specific.\nmod util {\n    pub(crate) mod request_stats;\n    #[macro_use]\n    pub(crate) mod tokio_thread_local_stats;\n    /// Re-usable pieces of CLI-specific code.\n    pub(crate) mod cli {\n        pub(crate) mod targets;\n    }\n}\n\n/// The pagebench CLI sub-commands, dispatched in [`main`] below.\nmod cmd {\n    pub(super) mod aux_files;\n    pub(super) mod basebackup;\n    pub(super) mod getpage_latest_lsn;\n    pub(super) mod idle_streams;\n    pub(super) mod ondemand_download_churn;\n    pub(super) mod trigger_initial_size_calculation;\n}\n\n/// Component-level performance test for pageserver.\n#[derive(clap::Parser)]\nstruct Args {\n    /// Takes a client CPU profile into profile.svg. The benchmark must exit cleanly before it's\n    /// written, e.g. via --runtime.\n    #[arg(long)]\n    profile: bool,\n\n    #[command(subcommand)]\n    subcommand: Subcommand,\n}\n\n#[derive(clap::Subcommand)]\nenum Subcommand {\n    Basebackup(cmd::basebackup::Args),\n    GetPageLatestLsn(cmd::getpage_latest_lsn::Args),\n    TriggerInitialSizeCalculation(cmd::trigger_initial_size_calculation::Args),\n    OndemandDownloadChurn(cmd::ondemand_download_churn::Args),\n    AuxFiles(cmd::aux_files::Args),\n    IdleStreams(cmd::idle_streams::Args),\n}\n\nfn main() -> anyhow::Result<()> {\n    logging::init(\n        logging::LogFormat::Plain,\n        logging::TracingErrorLayerEnablement::Disabled,\n        logging::Output::Stderr,\n    )?;\n    logging::replace_panic_hook_with_tracing_panic_hook().forget();\n\n    let args = Args::parse();\n\n    // Start a CPU profile if requested.\n    let mut profiler = None;\n    if args.profile {\n        profiler = Some(\n            pprof::ProfilerGuardBuilder::default()\n                .frequency(1000)\n                .blocklist(&[\"libc\", \"libgcc\", \"pthread\", \"vdso\"])\n                .build()?,\n        );\n    }\n\n    match args.subcommand {\n        Subcommand::Basebackup(args) => cmd::basebackup::main(args),\n        Subcommand::GetPageLatestLsn(args) => cmd::getpage_latest_lsn::main(args),\n        Subcommand::TriggerInitialSizeCalculation(args) => {\n            cmd::trigger_initial_size_calculation::main(args)\n        }\n        Subcommand::OndemandDownloadChurn(args) => cmd::ondemand_download_churn::main(args),\n        Subcommand::AuxFiles(args) => cmd::aux_files::main(args),\n        Subcommand::IdleStreams(args) => cmd::idle_streams::main(args),\n    }?;\n\n    // Generate a CPU flamegraph if requested.\n    if let Some(profiler) = profiler {\n        let report = profiler.report().build()?;\n        drop(profiler); // stop profiling\n        let file = File::create(\"profile.svg\")?;\n        report.flamegraph(file)?;\n        info!(\"wrote CPU profile flamegraph to profile.svg\")\n    }\n\n    Ok(())\n}\n"
  },
  {
    "path": "pageserver/pagebench/src/util/cli/targets.rs",
    "content": "use std::sync::Arc;\n\nuse pageserver_client::mgmt_api;\nuse tracing::info;\nuse utils::id::TenantTimelineId;\n\npub(crate) struct Spec {\n    pub(crate) limit_to_first_n_targets: Option<usize>,\n    pub(crate) targets: Option<Vec<TenantTimelineId>>,\n}\n\npub(crate) async fn discover(\n    api_client: &Arc<mgmt_api::Client>,\n    spec: Spec,\n) -> anyhow::Result<Vec<TenantTimelineId>> {\n    let mut timelines = if let Some(targets) = spec.targets {\n        targets\n    } else {\n        mgmt_api::util::get_pageserver_tenant_timelines_unsharded(api_client).await?\n    };\n\n    if let Some(limit) = spec.limit_to_first_n_targets {\n        timelines.sort(); // for determinism\n        timelines.truncate(limit);\n        if timelines.len() < limit {\n            anyhow::bail!(\"pageserver has less than limit_to_first_n_targets={limit} tenants\");\n        }\n    }\n\n    info!(\"timelines:\\n{:?}\", timelines);\n    info!(\"number of timelines:\\n{:?}\", timelines.len());\n\n    Ok(timelines)\n}\n"
  },
  {
    "path": "pageserver/pagebench/src/util/request_stats.rs",
    "content": "use std::time::Duration;\n\nuse anyhow::Context;\n\npub(crate) struct Stats {\n    latency_histo: hdrhistogram::Histogram<u64>,\n}\n\nimpl Stats {\n    pub(crate) fn new() -> Self {\n        Self {\n            // Initialize with fixed bounds so that we panic at runtime instead of resizing the histogram,\n            // which would skew the benchmark results.\n            latency_histo: hdrhistogram::Histogram::new_with_bounds(1, 1_000_000_000, 3).unwrap(),\n        }\n    }\n    pub(crate) fn observe(&mut self, latency: Duration) -> anyhow::Result<()> {\n        let micros: u64 = latency\n            .as_micros()\n            .try_into()\n            .context(\"latency greater than u64\")?;\n        self.latency_histo\n            .record(micros)\n            .context(\"add to histogram\")?;\n        Ok(())\n    }\n    pub(crate) fn output(&self) -> Output {\n        let latency_percentiles = std::array::from_fn(|idx| {\n            let micros = self\n                .latency_histo\n                .value_at_percentile(LATENCY_PERCENTILES[idx]);\n            Duration::from_micros(micros)\n        });\n        Output {\n            request_count: self.latency_histo.len(),\n            latency_mean: Duration::from_micros(self.latency_histo.mean() as u64),\n            latency_percentiles: LatencyPercentiles {\n                latency_percentiles,\n            },\n        }\n    }\n    pub(crate) fn add(&mut self, other: &Self) {\n        let Self { latency_histo } = self;\n        latency_histo.add(&other.latency_histo).unwrap();\n    }\n}\n\nimpl Default for Stats {\n    fn default() -> Self {\n        Self::new()\n    }\n}\n\nconst LATENCY_PERCENTILES: [f64; 4] = [95.0, 99.00, 99.90, 99.99];\n\nstruct LatencyPercentiles {\n    latency_percentiles: [Duration; 4],\n}\n\nimpl serde::Serialize for LatencyPercentiles {\n    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>\n    where\n        S: serde::Serializer,\n    {\n        use serde::ser::SerializeMap;\n        let mut ser = serializer.serialize_map(Some(LATENCY_PERCENTILES.len()))?;\n        for (p, v) in LATENCY_PERCENTILES.iter().zip(&self.latency_percentiles) {\n            ser.serialize_entry(\n                &format!(\"p{p}\"),\n                &format!(\"{}\", humantime::format_duration(*v)),\n            )?;\n        }\n        ser.end()\n    }\n}\n\n#[derive(serde::Serialize)]\npub(crate) struct Output {\n    request_count: u64,\n    #[serde(with = \"humantime_serde\")]\n    latency_mean: Duration,\n    latency_percentiles: LatencyPercentiles,\n}\n"
  },
  {
    "path": "pageserver/pagebench/src/util/tokio_thread_local_stats.rs",
    "content": "pub(crate) type ThreadLocalStats<T> = Arc<Mutex<T>>;\npub(crate) type AllThreadLocalStats<T> = Arc<Mutex<Vec<ThreadLocalStats<T>>>>;\n\nmacro_rules! declare {\n    ($THREAD_LOCAL_NAME:ident: $T:ty) => {\n        thread_local! {\n            pub static $THREAD_LOCAL_NAME: std::cell::RefCell<crate::util::tokio_thread_local_stats::ThreadLocalStats<$T>> = std::cell::RefCell::new(\n                std::sync::Arc::new(std::sync::Mutex::new(Default::default()))\n            );\n        }\n    };\n}\n\nuse std::sync::{Arc, Mutex};\n\npub(crate) use declare;\n\nmacro_rules! main {\n    ($THREAD_LOCAL_NAME:ident, $main_impl:expr) => {{\n        let main_impl = $main_impl;\n        let all = Arc::new(Mutex::new(Vec::new()));\n\n        let rt = tokio::runtime::Builder::new_multi_thread()\n            .on_thread_start({\n                let all = Arc::clone(&all);\n                move || {\n                    // pre-initialize the thread local stats by accessesing them\n                    // (some stats like requests_stats::Stats are quite costly to initialize,\n                    //  we don't want to pay that cost during the measurement period)\n                    $THREAD_LOCAL_NAME.with(|stats| {\n                        let stats: Arc<_> = Arc::clone(&*stats.borrow());\n                        all.lock().unwrap().push(stats);\n                    });\n                }\n            })\n            .enable_all()\n            .build()\n            .unwrap();\n\n        let main_task = rt.spawn(main_impl(all));\n        rt.block_on(main_task).unwrap()\n    }};\n}\n\npub(crate) use main;\n"
  },
  {
    "path": "pageserver/src/assert_u64_eq_usize.rs",
    "content": "//! `u64`` and `usize`` aren't guaranteed to be identical in Rust, but life is much simpler if that's the case.\n\npub(crate) const _ASSERT_U64_EQ_USIZE: () = {\n    if std::mem::size_of::<usize>() != std::mem::size_of::<u64>() {\n        panic!(\n            \"the traits defined in this module assume that usize and u64 can be converted to each other without loss of information\"\n        );\n    }\n};\n\npub(crate) trait U64IsUsize {\n    fn into_usize(self) -> usize;\n}\n\nimpl U64IsUsize for u64 {\n    #[inline(always)]\n    fn into_usize(self) -> usize {\n        #[allow(clippy::let_unit_value)]\n        let _ = _ASSERT_U64_EQ_USIZE;\n        self as usize\n    }\n}\n\npub(crate) trait UsizeIsU64 {\n    fn into_u64(self) -> u64;\n}\n\nimpl UsizeIsU64 for usize {\n    #[inline(always)]\n    fn into_u64(self) -> u64 {\n        #[allow(clippy::let_unit_value)]\n        let _ = _ASSERT_U64_EQ_USIZE;\n        self as u64\n    }\n}\n\npub const fn u64_to_usize(x: u64) -> usize {\n    #[allow(clippy::let_unit_value)]\n    let _ = _ASSERT_U64_EQ_USIZE;\n    x as usize\n}\n"
  },
  {
    "path": "pageserver/src/auth.rs",
    "content": "use utils::auth::{AuthError, Claims, Scope};\nuse utils::id::TenantId;\n\npub fn check_permission(claims: &Claims, tenant_id: Option<TenantId>) -> Result<(), AuthError> {\n    match (&claims.scope, tenant_id) {\n        (Scope::Tenant, None) => Err(AuthError(\n            \"Attempt to access management api with tenant scope. Permission denied\".into(),\n        )),\n        (Scope::Tenant, Some(tenant_id)) => {\n            if claims.tenant_id.unwrap() != tenant_id {\n                return Err(AuthError(\"Tenant id mismatch. Permission denied\".into()));\n            }\n            Ok(())\n        }\n        (Scope::PageServerApi, None) => Ok(()), // access to management api for PageServerApi scope\n        (Scope::PageServerApi, Some(_)) => Ok(()), // access to tenant api using PageServerApi scope\n        (\n            Scope::Admin\n            | Scope::SafekeeperData\n            | Scope::GenerationsApi\n            | Scope::Infra\n            | Scope::Scrubber\n            | Scope::ControllerPeer\n            | Scope::TenantEndpoint,\n            _,\n        ) => Err(AuthError(\n            format!(\n                \"JWT scope '{:?}' is ineligible for Pageserver auth\",\n                claims.scope\n            )\n            .into(),\n        )),\n    }\n}\n"
  },
  {
    "path": "pageserver/src/aux_file.rs",
    "content": "use std::sync::Arc;\n\nuse ::metrics::IntGauge;\nuse bytes::{Buf, BufMut, Bytes};\nuse pageserver_api::key::{AUX_KEY_PREFIX, Key, METADATA_KEY_SIZE};\nuse tracing::warn;\n\n// BEGIN Copyright (c) 2017 Servo Contributors\n\n/// Const version of FNV hash.\n#[inline]\n#[must_use]\npub const fn fnv_hash(bytes: &[u8]) -> u128 {\n    const INITIAL_STATE: u128 = 0x6c62272e07bb014262b821756295c58d;\n    const PRIME: u128 = 0x0000000001000000000000000000013B;\n\n    let mut hash = INITIAL_STATE;\n    let mut i = 0;\n    while i < bytes.len() {\n        hash ^= bytes[i] as u128;\n        hash = hash.wrapping_mul(PRIME);\n        i += 1;\n    }\n    hash\n}\n\n// END Copyright (c) 2017 Servo Contributors\n\n/// Create a metadata key from a hash, encoded as [AUX_KEY_PREFIX, 2B directory prefix, least significant 13B of FNV hash].\nfn aux_hash_to_metadata_key(dir_level1: u8, dir_level2: u8, data: &[u8]) -> Key {\n    let mut key: [u8; 16] = [0; METADATA_KEY_SIZE];\n    let hash = fnv_hash(data).to_be_bytes();\n    key[0] = AUX_KEY_PREFIX;\n    key[1] = dir_level1;\n    key[2] = dir_level2;\n    key[3..16].copy_from_slice(&hash[3..16]);\n    Key::from_metadata_key_fixed_size(&key)\n}\n\nconst AUX_DIR_PG_LOGICAL: u8 = 0x01;\nconst AUX_DIR_PG_REPLSLOT: u8 = 0x02;\nconst AUX_DIR_PG_STAT: u8 = 0x03;\nconst AUX_DIR_PG_UNKNOWN: u8 = 0xFF;\n\n/// Encode the aux file into a fixed-size key.\n///\n/// The first byte is the AUX key prefix. We use the next 2 bytes of the key for the directory / aux file type.\n/// We have one-to-one mapping for each of the aux file that we support. We hash the remaining part of the path\n/// (usually a single file name, or several components) into 13-byte hash. The way we determine the 2-byte prefix\n/// is roughly based on the first two components of the path, one unique number for one component.\n///\n/// * pg_logical/mappings -> 0x0101\n/// * pg_logical/snapshots -> 0x0102\n/// * pg_logical/replorigin_checkpoint -> 0x0103\n/// * pg_logical/others -> 0x01FF\n/// * pg_replslot/ -> 0x0201\n/// * pg_stat/pgstat.stat -> 0x0301\n/// * others -> 0xFFFF\n///\n/// If you add new AUX files to this function, please also add a test case to `test_encoding_portable`.\n/// The new file type must have never been written to the storage before. Otherwise, there could be data\n/// corruptions as the new file belongs to a new prefix but it might have been stored under the `others` prefix.\npub fn encode_aux_file_key(path: &str) -> Key {\n    if let Some(fname) = path.strip_prefix(\"pg_logical/mappings/\") {\n        aux_hash_to_metadata_key(AUX_DIR_PG_LOGICAL, 0x01, fname.as_bytes())\n    } else if let Some(fname) = path.strip_prefix(\"pg_logical/snapshots/\") {\n        aux_hash_to_metadata_key(AUX_DIR_PG_LOGICAL, 0x02, fname.as_bytes())\n    } else if path == \"pg_logical/replorigin_checkpoint\" {\n        aux_hash_to_metadata_key(AUX_DIR_PG_LOGICAL, 0x03, b\"\")\n    } else if let Some(fname) = path.strip_prefix(\"pg_logical/\") {\n        if cfg!(debug_assertions) {\n            warn!(\n                \"unsupported pg_logical aux file type: {}, putting to 0x01FF, would affect path scanning\",\n                path\n            );\n        }\n        aux_hash_to_metadata_key(AUX_DIR_PG_LOGICAL, 0xFF, fname.as_bytes())\n    } else if let Some(fname) = path.strip_prefix(\"pg_replslot/\") {\n        aux_hash_to_metadata_key(AUX_DIR_PG_REPLSLOT, 0x01, fname.as_bytes())\n    } else if let Some(fname) = path.strip_prefix(\"pg_stat/\") {\n        aux_hash_to_metadata_key(AUX_DIR_PG_STAT, 0x01, fname.as_bytes())\n    } else {\n        if cfg!(debug_assertions) {\n            warn!(\n                \"unsupported aux file type: {}, putting to 0xFFFF, would affect path scanning\",\n                path\n            );\n        }\n        aux_hash_to_metadata_key(AUX_DIR_PG_UNKNOWN, 0xFF, path.as_bytes())\n    }\n}\n\nconst AUX_FILE_ENCODING_VERSION: u8 = 0x01;\n\npub fn decode_file_value(val: &[u8]) -> anyhow::Result<Vec<(&str, &[u8])>> {\n    let mut ptr = val;\n    if ptr.is_empty() {\n        // empty value = no files\n        return Ok(Vec::new());\n    }\n    assert_eq!(\n        ptr.get_u8(),\n        AUX_FILE_ENCODING_VERSION,\n        \"unsupported aux file value\"\n    );\n    let mut files = vec![];\n    while ptr.has_remaining() {\n        let key_len = ptr.get_u32() as usize;\n        let key = &ptr[..key_len];\n        ptr.advance(key_len);\n        let val_len = ptr.get_u32() as usize;\n        let content = &ptr[..val_len];\n        ptr.advance(val_len);\n\n        let path = std::str::from_utf8(key)?;\n        files.push((path, content));\n    }\n    Ok(files)\n}\n\n/// Decode an aux file key-value pair into a list of files. The returned `Bytes` contains reference\n/// to the original value slice. Be cautious about memory consumption.\npub fn decode_file_value_bytes(val: &Bytes) -> anyhow::Result<Vec<(String, Bytes)>> {\n    let mut ptr = val.clone();\n    if ptr.is_empty() {\n        // empty value = no files\n        return Ok(Vec::new());\n    }\n    assert_eq!(\n        ptr.get_u8(),\n        AUX_FILE_ENCODING_VERSION,\n        \"unsupported aux file value\"\n    );\n    let mut files = vec![];\n    while ptr.has_remaining() {\n        let key_len = ptr.get_u32() as usize;\n        let key = ptr.slice(..key_len);\n        ptr.advance(key_len);\n        let val_len = ptr.get_u32() as usize;\n        let content = ptr.slice(..val_len);\n        ptr.advance(val_len);\n\n        let path = std::str::from_utf8(&key)?.to_string();\n        files.push((path, content));\n    }\n    Ok(files)\n}\n\npub fn encode_file_value(files: &[(&str, &[u8])]) -> anyhow::Result<Vec<u8>> {\n    if files.is_empty() {\n        // no files = empty value\n        return Ok(Vec::new());\n    }\n    let mut encoded = vec![];\n    encoded.put_u8(AUX_FILE_ENCODING_VERSION);\n    for (path, content) in files {\n        if path.len() > u32::MAX as usize {\n            anyhow::bail!(\"{} exceeds path size limit\", path);\n        }\n        encoded.put_u32(path.len() as u32);\n        encoded.put_slice(path.as_bytes());\n        if content.len() > u32::MAX as usize {\n            anyhow::bail!(\"{} exceeds content size limit\", path);\n        }\n        encoded.put_u32(content.len() as u32);\n        encoded.put_slice(content);\n    }\n    Ok(encoded)\n}\n\n/// An estimation of the size of aux files.\npub struct AuxFileSizeEstimator {\n    aux_file_size_gauge: IntGauge,\n    size: Arc<std::sync::Mutex<Option<isize>>>,\n}\n\nimpl AuxFileSizeEstimator {\n    pub fn new(aux_file_size_gauge: IntGauge) -> Self {\n        Self {\n            aux_file_size_gauge,\n            size: Arc::new(std::sync::Mutex::new(None)),\n        }\n    }\n\n    /// When generating base backup or doing initial logical size calculation\n    pub fn on_initial(&self, new_size: usize) {\n        let mut guard = self.size.lock().unwrap();\n        *guard = Some(new_size as isize);\n        self.report(new_size as isize);\n    }\n\n    pub fn on_add(&self, file_size: usize) {\n        let mut guard = self.size.lock().unwrap();\n        if let Some(size) = &mut *guard {\n            *size += file_size as isize;\n            self.report(*size);\n        }\n    }\n\n    pub fn on_remove(&self, file_size: usize) {\n        let mut guard = self.size.lock().unwrap();\n        if let Some(size) = &mut *guard {\n            *size -= file_size as isize;\n            self.report(*size);\n        }\n    }\n\n    pub fn on_update(&self, old_size: usize, new_size: usize) {\n        let mut guard = self.size.lock().unwrap();\n        if let Some(size) = &mut *guard {\n            *size += new_size as isize - old_size as isize;\n            self.report(*size);\n        }\n    }\n\n    pub fn report(&self, size: isize) {\n        self.aux_file_size_gauge.set(size as i64);\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use super::*;\n\n    #[test]\n    fn test_hash_portable() {\n        // AUX file encoding requires the hash to be portable across all platforms. This test case checks\n        // if the algorithm produces the same hash across different environments.\n\n        assert_eq!(\n            265160408618497461376862998434862070044,\n            super::fnv_hash(\"test1\".as_bytes())\n        );\n        assert_eq!(\n            295486155126299629456360817749600553988,\n            super::fnv_hash(\"test/test2\".as_bytes())\n        );\n        assert_eq!(\n            144066263297769815596495629667062367629,\n            super::fnv_hash(\"\".as_bytes())\n        );\n    }\n\n    #[test]\n    fn test_encoding_portable() {\n        // To correct retrieve AUX files, the generated keys for the same file must be the same for all versions\n        // of the page server.\n        assert_eq!(\n            \"62000001017F8B83D94F7081693471ABF91C\",\n            encode_aux_file_key(\"pg_logical/mappings/test1\").to_string(),\n        );\n        assert_eq!(\n            \"62000001027F8E83D94F7081693471ABFCCD\",\n            encode_aux_file_key(\"pg_logical/snapshots/test2\").to_string(),\n        );\n        assert_eq!(\n            \"62000001032E07BB014262B821756295C58D\",\n            encode_aux_file_key(\"pg_logical/replorigin_checkpoint\").to_string(),\n        );\n        assert_eq!(\n            \"62000001FF4F38E1C74754E7D03C1A660178\",\n            encode_aux_file_key(\"pg_logical/unsupported\").to_string(),\n        );\n        assert_eq!(\n            \"62000002017F8D83D94F7081693471ABFB92\",\n            encode_aux_file_key(\"pg_replslot/test3\").to_string()\n        );\n        assert_eq!(\n            \"620000FFFF2B6ECC8AEF93F643DC44F15E03\",\n            encode_aux_file_key(\"other_file_not_supported\").to_string(),\n        );\n    }\n\n    #[test]\n    fn test_value_encoding() {\n        let files = vec![\n            (\"pg_logical/1.file\", \"1111\".as_bytes()),\n            (\"pg_logical/2.file\", \"2222\".as_bytes()),\n        ];\n        assert_eq!(\n            files,\n            decode_file_value(&encode_file_value(&files).unwrap()).unwrap()\n        );\n        let files = vec![];\n        assert_eq!(\n            files,\n            decode_file_value(&encode_file_value(&files).unwrap()).unwrap()\n        );\n    }\n}\n"
  },
  {
    "path": "pageserver/src/basebackup.rs",
    "content": "//!\n//! Generate a tarball with files needed to bootstrap ComputeNode.\n//!\n//! TODO: this module has nothing to do with PostgreSQL pg_basebackup.\n//! It could use a better name.\n//!\n//! Stateless Postgres compute node is launched by sending a tarball\n//! which contains non-relational data (multixacts, clog, filenodemaps, twophase files),\n//! generated pg_control and dummy segment of WAL.\n//! This module is responsible for creation of such tarball\n//! from data stored in object storage.\n//!\nuse std::fmt::Write as FmtWrite;\nuse std::sync::Arc;\nuse std::time::{Instant, SystemTime};\n\nuse anyhow::{Context, anyhow};\nuse async_compression::tokio::write::GzipEncoder;\nuse bytes::{BufMut, Bytes, BytesMut};\nuse fail::fail_point;\nuse pageserver_api::key::{Key, rel_block_to_key};\nuse pageserver_api::reltag::{RelTag, SlruKind};\nuse postgres_ffi::pg_constants::{PG_HBA, PGDATA_SPECIAL_FILES};\nuse postgres_ffi::{\n    BLCKSZ, PG_TLI, PgMajorVersion, RELSEG_SIZE, WAL_SEGMENT_SIZE, XLogFileName,\n    dispatch_pgversion, pg_constants,\n};\nuse postgres_ffi_types::constants::{DEFAULTTABLESPACE_OID, GLOBALTABLESPACE_OID};\nuse postgres_ffi_types::forknum::{INIT_FORKNUM, MAIN_FORKNUM};\nuse tokio::io::{self, AsyncWrite, AsyncWriteExt as _};\nuse tokio_tar::{Builder, EntryType, Header};\nuse tracing::*;\nuse utils::lsn::Lsn;\n\nuse crate::context::RequestContext;\nuse crate::pgdatadir_mapping::Version;\nuse crate::tenant::storage_layer::IoConcurrency;\nuse crate::tenant::timeline::{GetVectoredError, VersionedKeySpaceQuery};\nuse crate::tenant::{PageReconstructError, Timeline};\n\n#[derive(Debug, thiserror::Error)]\npub enum BasebackupError {\n    #[error(\"basebackup pageserver error {0:#}\")]\n    Server(#[from] anyhow::Error),\n    #[error(\"basebackup client error {0:#} when {1}\")]\n    Client(#[source] io::Error, &'static str),\n    #[error(\"basebackup during shutdown\")]\n    Shutdown,\n}\n\nimpl From<PageReconstructError> for BasebackupError {\n    fn from(value: PageReconstructError) -> Self {\n        match value {\n            PageReconstructError::Cancelled => BasebackupError::Shutdown,\n            err => BasebackupError::Server(err.into()),\n        }\n    }\n}\n\nimpl From<GetVectoredError> for BasebackupError {\n    fn from(value: GetVectoredError) -> Self {\n        match value {\n            GetVectoredError::Cancelled => BasebackupError::Shutdown,\n            err => BasebackupError::Server(err.into()),\n        }\n    }\n}\n\nimpl From<BasebackupError> for postgres_backend::QueryError {\n    fn from(err: BasebackupError) -> Self {\n        use postgres_backend::QueryError;\n        use pq_proto::framed::ConnectionError;\n        match err {\n            BasebackupError::Client(err, _) => QueryError::Disconnected(ConnectionError::Io(err)),\n            BasebackupError::Server(err) => QueryError::Other(err),\n            BasebackupError::Shutdown => QueryError::Shutdown,\n        }\n    }\n}\n\nimpl From<BasebackupError> for tonic::Status {\n    fn from(err: BasebackupError) -> Self {\n        use tonic::Code;\n        let code = match &err {\n            BasebackupError::Client(_, _) => Code::Cancelled,\n            BasebackupError::Server(_) => Code::Internal,\n            BasebackupError::Shutdown => Code::Unavailable,\n        };\n        tonic::Status::new(code, err.to_string())\n    }\n}\n\n/// Create basebackup with non-rel data in it.\n/// Only include relational data if 'full_backup' is true.\n///\n/// Currently we use empty 'req_lsn' in two cases:\n///  * During the basebackup right after timeline creation\n///  * When working without safekeepers. In this situation it is important to match the lsn\n///    we are taking basebackup on with the lsn that is used in pageserver's walreceiver\n///    to start the replication.\n#[allow(clippy::too_many_arguments)]\npub async fn send_basebackup_tarball<'a, W>(\n    write: &'a mut W,\n    timeline: &'a Timeline,\n    req_lsn: Option<Lsn>,\n    prev_lsn: Option<Lsn>,\n    full_backup: bool,\n    replica: bool,\n    gzip_level: Option<async_compression::Level>,\n    ctx: &'a RequestContext,\n) -> Result<(), BasebackupError>\nwhere\n    W: AsyncWrite + Send + Sync + Unpin,\n{\n    // Compute postgres doesn't have any previous WAL files, but the first\n    // record that it's going to write needs to include the LSN of the\n    // previous record (xl_prev). We include prev_record_lsn in the\n    // \"neon.signal\" file, so that postgres can read it during startup.\n    //\n    // We don't keep full history of record boundaries in the page server,\n    // however, only the predecessor of the latest record on each\n    // timeline. So we can only provide prev_record_lsn when you take a\n    // base backup at the end of the timeline, i.e. at last_record_lsn.\n    // Even at the end of the timeline, we sometimes don't have a valid\n    // prev_lsn value; that happens if the timeline was just branched from\n    // an old LSN and it doesn't have any WAL of its own yet. We will set\n    // prev_lsn to Lsn(0) if we cannot provide the correct value.\n    let (backup_prev, lsn) = if let Some(req_lsn) = req_lsn {\n        // Backup was requested at a particular LSN. The caller should've\n        // already checked that it's a valid LSN.\n\n        // If the requested point is the end of the timeline, we can\n        // provide prev_lsn. (get_last_record_rlsn() might return it as\n        // zero, though, if no WAL has been generated on this timeline\n        // yet.)\n        let end_of_timeline = timeline.get_last_record_rlsn();\n        if req_lsn == end_of_timeline.last {\n            (end_of_timeline.prev, req_lsn)\n        } else {\n            (Lsn(0), req_lsn)\n        }\n    } else {\n        // Backup was requested at end of the timeline.\n        let end_of_timeline = timeline.get_last_record_rlsn();\n        (end_of_timeline.prev, end_of_timeline.last)\n    };\n\n    // Consolidate the derived and the provided prev_lsn values\n    let prev_record_lsn = if let Some(provided_prev_lsn) = prev_lsn {\n        if backup_prev != Lsn(0) && backup_prev != provided_prev_lsn {\n            return Err(BasebackupError::Server(anyhow!(\n                \"backup_prev {backup_prev} != provided_prev_lsn {provided_prev_lsn}\"\n            )));\n        }\n        provided_prev_lsn\n    } else {\n        backup_prev\n    };\n\n    info!(\n        \"taking basebackup lsn={lsn}, prev_lsn={prev_record_lsn} \\\n        (full_backup={full_backup}, replica={replica}, gzip={gzip_level:?})\",\n    );\n    let span = info_span!(\"send_tarball\", backup_lsn=%lsn);\n\n    let io_concurrency = IoConcurrency::spawn_from_conf(\n        timeline.conf.get_vectored_concurrent_io,\n        timeline\n            .gate\n            .enter()\n            .map_err(|_| BasebackupError::Shutdown)?,\n    );\n\n    if let Some(gzip_level) = gzip_level {\n        let mut encoder = GzipEncoder::with_quality(write, gzip_level);\n        Basebackup {\n            ar: Builder::new_non_terminated(&mut encoder),\n            timeline,\n            lsn,\n            prev_record_lsn,\n            full_backup,\n            replica,\n            ctx,\n            io_concurrency,\n        }\n        .send_tarball()\n        .instrument(span)\n        .await?;\n        encoder\n            .shutdown()\n            .await\n            .map_err(|err| BasebackupError::Client(err, \"gzip\"))?;\n    } else {\n        Basebackup {\n            ar: Builder::new_non_terminated(write),\n            timeline,\n            lsn,\n            prev_record_lsn,\n            full_backup,\n            replica,\n            ctx,\n            io_concurrency,\n        }\n        .send_tarball()\n        .instrument(span)\n        .await?;\n    }\n\n    Ok(())\n}\n\n/// This is short-living object only for the time of tarball creation,\n/// created mostly to avoid passing a lot of parameters between various functions\n/// used for constructing tarball.\nstruct Basebackup<'a, W>\nwhere\n    W: AsyncWrite + Send + Sync + Unpin,\n{\n    ar: Builder<&'a mut W>,\n    timeline: &'a Timeline,\n    lsn: Lsn,\n    prev_record_lsn: Lsn,\n    full_backup: bool,\n    replica: bool,\n    ctx: &'a RequestContext,\n    io_concurrency: IoConcurrency,\n}\n\n/// A sink that accepts SLRU blocks ordered by key and forwards\n/// full segments to the archive.\nstruct SlruSegmentsBuilder<'a, 'b, W>\nwhere\n    W: AsyncWrite + Send + Sync + Unpin,\n{\n    ar: &'a mut Builder<&'b mut W>,\n    buf: Vec<u8>,\n    current_segment: Option<(SlruKind, u32)>,\n    total_blocks: usize,\n}\n\nimpl<'a, 'b, W> SlruSegmentsBuilder<'a, 'b, W>\nwhere\n    W: AsyncWrite + Send + Sync + Unpin,\n{\n    fn new(ar: &'a mut Builder<&'b mut W>) -> Self {\n        Self {\n            ar,\n            buf: Vec::new(),\n            current_segment: None,\n            total_blocks: 0,\n        }\n    }\n\n    async fn add_block(&mut self, key: &Key, block: Bytes) -> Result<(), BasebackupError> {\n        let (kind, segno, _) = key.to_slru_block()?;\n\n        match kind {\n            SlruKind::Clog => {\n                if !(block.len() == BLCKSZ as usize || block.len() == BLCKSZ as usize + 8) {\n                    return Err(BasebackupError::Server(anyhow!(\n                        \"invalid SlruKind::Clog record: block.len()={}\",\n                        block.len()\n                    )));\n                }\n            }\n            SlruKind::MultiXactMembers | SlruKind::MultiXactOffsets => {\n                if block.len() != BLCKSZ as usize {\n                    return Err(BasebackupError::Server(anyhow!(\n                        \"invalid {:?} record: block.len()={}\",\n                        kind,\n                        block.len()\n                    )));\n                }\n            }\n        }\n\n        let segment = (kind, segno);\n        match self.current_segment {\n            None => {\n                self.current_segment = Some(segment);\n                self.buf\n                    .extend_from_slice(block.slice(..BLCKSZ as usize).as_ref());\n            }\n            Some(current_seg) if current_seg == segment => {\n                self.buf\n                    .extend_from_slice(block.slice(..BLCKSZ as usize).as_ref());\n            }\n            Some(_) => {\n                self.flush().await?;\n\n                self.current_segment = Some(segment);\n                self.buf\n                    .extend_from_slice(block.slice(..BLCKSZ as usize).as_ref());\n            }\n        }\n\n        Ok(())\n    }\n\n    async fn flush(&mut self) -> Result<(), BasebackupError> {\n        let nblocks = self.buf.len() / BLCKSZ as usize;\n        let (kind, segno) = self.current_segment.take().unwrap();\n        let segname = format!(\"{kind}/{segno:>04X}\");\n        let header = new_tar_header(&segname, self.buf.len() as u64)?;\n        self.ar\n            .append(&header, self.buf.as_slice())\n            .await\n            .map_err(|e| BasebackupError::Client(e, \"flush\"))?;\n\n        self.total_blocks += nblocks;\n        debug!(\"Added to basebackup slru {} relsize {}\", segname, nblocks);\n\n        self.buf.clear();\n\n        Ok(())\n    }\n\n    async fn finish(mut self) -> Result<(), BasebackupError> {\n        let res = if self.current_segment.is_none() || self.buf.is_empty() {\n            Ok(())\n        } else {\n            self.flush().await\n        };\n\n        info!(\"Collected {} SLRU blocks\", self.total_blocks);\n\n        res\n    }\n}\n\nimpl<W> Basebackup<'_, W>\nwhere\n    W: AsyncWrite + Send + Sync + Unpin,\n{\n    async fn send_tarball(mut self) -> Result<(), BasebackupError> {\n        // TODO include checksum\n\n        // Construct the pg_control file from the persisted checkpoint and pg_control\n        // information. But we only add this to the tarball at the end, so that if the\n        // writing is interrupted half-way through, the resulting incomplete tarball will\n        // be missing the pg_control file, which prevents PostgreSQL from starting up on\n        // it. With proper error handling, you should never try to start up from an\n        // incomplete basebackup in the first place, of course, but this is a nice little\n        // extra safety measure.\n        let checkpoint_bytes = self\n            .timeline\n            .get_checkpoint(self.lsn, self.ctx)\n            .await\n            .context(\"failed to get checkpoint bytes\")?;\n        let pg_control_bytes = self\n            .timeline\n            .get_control_file(self.lsn, self.ctx)\n            .await\n            .context(\"failed to get control bytes\")?;\n        let (pg_control_bytes, system_identifier, was_shutdown) =\n            postgres_ffi::generate_pg_control(\n                &pg_control_bytes,\n                &checkpoint_bytes,\n                self.lsn,\n                self.timeline.pg_version,\n            )?;\n\n        let lazy_slru_download = self.timeline.get_lazy_slru_download() && !self.full_backup;\n\n        let pgversion = self.timeline.pg_version;\n        let subdirs = dispatch_pgversion!(pgversion, &pgv::bindings::PGDATA_SUBDIRS[..]);\n\n        // Create pgdata subdirs structure\n        for dir in subdirs.iter() {\n            let header = new_tar_header_dir(dir)?;\n            self.ar\n                .append(&header, io::empty())\n                .await\n                .map_err(|e| BasebackupError::Client(e, \"send_tarball\"))?;\n        }\n\n        // Send config files.\n        for filepath in PGDATA_SPECIAL_FILES.iter() {\n            if *filepath == \"pg_hba.conf\" {\n                let data = PG_HBA.as_bytes();\n                let header = new_tar_header(filepath, data.len() as u64)?;\n                self.ar\n                    .append(&header, data)\n                    .await\n                    .map_err(|e| BasebackupError::Client(e, \"send_tarball,pg_hba.conf\"))?;\n            } else {\n                let header = new_tar_header(filepath, 0)?;\n                self.ar\n                    .append(&header, io::empty())\n                    .await\n                    .map_err(|e| BasebackupError::Client(e, \"send_tarball,add_config_file\"))?;\n            }\n        }\n        if !lazy_slru_download {\n            // Gather non-relational files from object storage pages.\n            let slru_partitions = self\n                .timeline\n                .get_slru_keyspace(Version::at(self.lsn), self.ctx)\n                .await?\n                .partition(\n                    self.timeline.get_shard_identity(),\n                    self.timeline.conf.max_get_vectored_keys.get() as u64 * BLCKSZ as u64,\n                    BLCKSZ as u64,\n                );\n\n            let mut slru_builder = SlruSegmentsBuilder::new(&mut self.ar);\n\n            for part in slru_partitions.parts {\n                let query = VersionedKeySpaceQuery::uniform(part, self.lsn);\n                let blocks = self\n                    .timeline\n                    .get_vectored(query, self.io_concurrency.clone(), self.ctx)\n                    .await?;\n\n                for (key, block) in blocks {\n                    let block = block?;\n                    slru_builder.add_block(&key, block).await?;\n                }\n            }\n            slru_builder.finish().await?;\n        }\n\n        let mut min_restart_lsn: Lsn = Lsn::MAX;\n\n        let mut dbdir_cnt = 0;\n        let mut rel_cnt = 0;\n\n        // Create tablespace directories\n        for ((spcnode, dbnode), has_relmap_file) in\n            self.timeline.list_dbdirs(self.lsn, self.ctx).await?\n        {\n            self.add_dbdir(spcnode, dbnode, has_relmap_file).await?;\n            dbdir_cnt += 1;\n            // If full backup is requested, include all relation files.\n            // Otherwise only include init forks of unlogged relations.\n            let rels = self\n                .timeline\n                .list_rels(spcnode, dbnode, Version::at(self.lsn), self.ctx)\n                .await?;\n            for &rel in rels.iter() {\n                rel_cnt += 1;\n                // Send init fork as main fork to provide well formed empty\n                // contents of UNLOGGED relations. Postgres copies it in\n                // `reinit.c` during recovery.\n                if rel.forknum == INIT_FORKNUM {\n                    // I doubt we need _init fork itself, but having it at least\n                    // serves as a marker relation is unlogged.\n                    self.add_rel(rel, rel).await?;\n                    self.add_rel(rel, rel.with_forknum(MAIN_FORKNUM)).await?;\n                    continue;\n                }\n\n                if self.full_backup {\n                    if rel.forknum == MAIN_FORKNUM && rels.contains(&rel.with_forknum(INIT_FORKNUM))\n                    {\n                        // skip this, will include it when we reach the init fork\n                        continue;\n                    }\n                    self.add_rel(rel, rel).await?;\n                }\n            }\n        }\n\n        self.timeline\n            .db_rel_count\n            .store(Some(Arc::new((dbdir_cnt, rel_cnt))));\n\n        let start_time = Instant::now();\n        let aux_files = self\n            .timeline\n            .list_aux_files(self.lsn, self.ctx, self.io_concurrency.clone())\n            .await?;\n        let aux_scan_time = start_time.elapsed();\n        let aux_estimated_size = aux_files\n            .values()\n            .map(|content| content.len())\n            .sum::<usize>();\n        info!(\n            \"Scanned {} aux files in {}ms, aux file content size = {}\",\n            aux_files.len(),\n            aux_scan_time.as_millis(),\n            aux_estimated_size\n        );\n\n        for (path, content) in aux_files {\n            if path.starts_with(\"pg_replslot\") {\n                // Do not create LR slots at standby because they are not used but prevent WAL truncation\n                if self.replica {\n                    continue;\n                }\n                let offs = pg_constants::REPL_SLOT_ON_DISK_OFFSETOF_RESTART_LSN;\n                let restart_lsn = Lsn(u64::from_le_bytes(\n                    content[offs..offs + 8].try_into().unwrap(),\n                ));\n                info!(\"Replication slot {} restart LSN={}\", path, restart_lsn);\n                min_restart_lsn = Lsn::min(min_restart_lsn, restart_lsn);\n            } else if path == \"pg_logical/replorigin_checkpoint\" {\n                // replorigin_checkoint is written only on compute shutdown, so it contains\n                // deteriorated values. So we generate our own version of this file for the particular LSN\n                // based on information about replorigins extracted from transaction commit records.\n                // In future we will not generate AUX record for \"pg_logical/replorigin_checkpoint\" at all,\n                // but now we should handle (skip) it for backward compatibility.\n                continue;\n            } else if path == \"pg_stat/pgstat.stat\" && !was_shutdown {\n                // Drop statistic in case of abnormal termination, i.e. if we're not starting from the exact LSN\n                // of a shutdown checkpoint.\n                continue;\n            }\n            let header = new_tar_header(&path, content.len() as u64)?;\n            self.ar\n                .append(&header, &*content)\n                .await\n                .map_err(|e| BasebackupError::Client(e, \"send_tarball,add_aux_file\"))?;\n        }\n\n        if min_restart_lsn != Lsn::MAX {\n            info!(\n                \"Min restart LSN for logical replication is {}\",\n                min_restart_lsn\n            );\n            let data = min_restart_lsn.0.to_le_bytes();\n            let header = new_tar_header(\"restart.lsn\", data.len() as u64)?;\n            self.ar\n                .append(&header, &data[..])\n                .await\n                .map_err(|e| BasebackupError::Client(e, \"send_tarball,restart.lsn\"))?;\n        }\n        for xid in self\n            .timeline\n            .list_twophase_files(self.lsn, self.ctx)\n            .await?\n        {\n            self.add_twophase_file(xid).await?;\n        }\n        let repl_origins = self\n            .timeline\n            .get_replorigins(self.lsn, self.ctx, self.io_concurrency.clone())\n            .await?;\n        let n_origins = repl_origins.len();\n        if n_origins != 0 {\n            //\n            // Construct \"pg_logical/replorigin_checkpoint\" file based on information about replication origins\n            // extracted from transaction commit record. We are using this file to pass information about replication\n            // origins to compute to allow logical replication to restart from proper point.\n            //\n            let mut content = Vec::with_capacity(n_origins * 16 + 8);\n            content.extend_from_slice(&pg_constants::REPLICATION_STATE_MAGIC.to_le_bytes());\n            for (origin_id, origin_lsn) in repl_origins {\n                content.extend_from_slice(&origin_id.to_le_bytes());\n                content.extend_from_slice(&[0u8; 6]); // align to 8 bytes\n                content.extend_from_slice(&origin_lsn.0.to_le_bytes());\n            }\n            let crc32 = crc32c::crc32c(&content);\n            content.extend_from_slice(&crc32.to_le_bytes());\n            let header = new_tar_header(\"pg_logical/replorigin_checkpoint\", content.len() as u64)?;\n            self.ar.append(&header, &*content).await.map_err(|e| {\n                BasebackupError::Client(e, \"send_tarball,pg_logical/replorigin_checkpoint\")\n            })?;\n        }\n\n        fail_point!(\"basebackup-before-control-file\", |_| {\n            Err(BasebackupError::Server(anyhow!(\n                \"failpoint basebackup-before-control-file\"\n            )))\n        });\n\n        // Last, add the pg_control file and bootstrap WAL segment.\n        self.add_pgcontrol_file(pg_control_bytes, system_identifier)\n            .await?;\n        self.ar\n            .finish()\n            .await\n            .map_err(|e| BasebackupError::Client(e, \"send_tarball,finish\"))?;\n        debug!(\"all tarred up!\");\n        Ok(())\n    }\n\n    /// Add contents of relfilenode `src`, naming it as `dst`.\n    async fn add_rel(&mut self, src: RelTag, dst: RelTag) -> Result<(), BasebackupError> {\n        let nblocks = self\n            .timeline\n            .get_rel_size(src, Version::at(self.lsn), self.ctx)\n            .await?;\n\n        // If the relation is empty, create an empty file\n        if nblocks == 0 {\n            let file_name = dst.to_segfile_name(0);\n            let header = new_tar_header(&file_name, 0)?;\n            self.ar\n                .append(&header, io::empty())\n                .await\n                .map_err(|e| BasebackupError::Client(e, \"add_rel,empty\"))?;\n            return Ok(());\n        }\n\n        // Add a file for each chunk of blocks (aka segment)\n        let mut startblk = 0;\n        let mut seg = 0;\n        while startblk < nblocks {\n            let endblk = std::cmp::min(startblk + RELSEG_SIZE, nblocks);\n\n            let mut segment_data: Vec<u8> = vec![];\n            for blknum in startblk..endblk {\n                let img = self\n                    .timeline\n                    // TODO: investigate using get_vectored for the entire startblk..endblk range.\n                    // But this code path is not on the critical path for most basebackups (?).\n                    .get(rel_block_to_key(src, blknum), self.lsn, self.ctx)\n                    .await?;\n                segment_data.extend_from_slice(&img[..]);\n            }\n\n            let file_name = dst.to_segfile_name(seg as u32);\n            let header = new_tar_header(&file_name, segment_data.len() as u64)?;\n            self.ar\n                .append(&header, segment_data.as_slice())\n                .await\n                .map_err(|e| BasebackupError::Client(e, \"add_rel,segment\"))?;\n\n            seg += 1;\n            startblk = endblk;\n        }\n\n        Ok(())\n    }\n\n    //\n    // Include database/tablespace directories.\n    //\n    // Each directory contains a PG_VERSION file, and the default database\n    // directories also contain pg_filenode.map files.\n    //\n    async fn add_dbdir(\n        &mut self,\n        spcnode: u32,\n        dbnode: u32,\n        has_relmap_file: bool,\n    ) -> Result<(), BasebackupError> {\n        let relmap_img = if has_relmap_file {\n            let img = self\n                .timeline\n                .get_relmap_file(spcnode, dbnode, Version::at(self.lsn), self.ctx)\n                .await?;\n\n            if img.len()\n                != dispatch_pgversion!(self.timeline.pg_version, pgv::bindings::SIZEOF_RELMAPFILE)\n            {\n                return Err(BasebackupError::Server(anyhow!(\n                    \"img.len() != SIZE_OF_RELMAPFILE, img.len()={}\",\n                    img.len(),\n                )));\n            }\n\n            Some(img)\n        } else {\n            None\n        };\n\n        if spcnode == GLOBALTABLESPACE_OID {\n            let pg_version_str = self.timeline.pg_version.versionfile_string();\n            let header = new_tar_header(\"PG_VERSION\", pg_version_str.len() as u64)?;\n            self.ar\n                .append(&header, pg_version_str.as_bytes())\n                .await\n                .map_err(|e| BasebackupError::Client(e, \"add_dbdir,PG_VERSION\"))?;\n\n            info!(\"timeline.pg_version {}\", self.timeline.pg_version);\n\n            if let Some(img) = relmap_img {\n                // filenode map for global tablespace\n                let header = new_tar_header(\"global/pg_filenode.map\", img.len() as u64)?;\n                self.ar\n                    .append(&header, &img[..])\n                    .await\n                    .map_err(|e| BasebackupError::Client(e, \"add_dbdir,global/pg_filenode.map\"))?;\n            } else {\n                warn!(\"global/pg_filenode.map is missing\");\n            }\n        } else {\n            // User defined tablespaces are not supported. However, as\n            // a special case, if a tablespace/db directory is\n            // completely empty, we can leave it out altogether. This\n            // makes taking a base backup after the 'tablespace'\n            // regression test pass, because the test drops the\n            // created tablespaces after the tests.\n            //\n            // FIXME: this wouldn't be necessary, if we handled\n            // XLOG_TBLSPC_DROP records. But we probably should just\n            // throw an error on CREATE TABLESPACE in the first place.\n            if !has_relmap_file\n                && self\n                    .timeline\n                    .list_rels(spcnode, dbnode, Version::at(self.lsn), self.ctx)\n                    .await?\n                    .is_empty()\n            {\n                return Ok(());\n            }\n            // User defined tablespaces are not supported\n            if spcnode != DEFAULTTABLESPACE_OID {\n                return Err(BasebackupError::Server(anyhow!(\n                    \"spcnode != DEFAULTTABLESPACE_OID, spcnode={spcnode}\"\n                )));\n            }\n\n            // Append dir path for each database\n            let path = format!(\"base/{dbnode}\");\n            let header = new_tar_header_dir(&path)?;\n            self.ar\n                .append(&header, io::empty())\n                .await\n                .map_err(|e| BasebackupError::Client(e, \"add_dbdir,base\"))?;\n\n            if let Some(img) = relmap_img {\n                let dst_path = format!(\"base/{dbnode}/PG_VERSION\");\n\n                let pg_version_str = self.timeline.pg_version.versionfile_string();\n                let header = new_tar_header(&dst_path, pg_version_str.len() as u64)?;\n                self.ar\n                    .append(&header, pg_version_str.as_bytes())\n                    .await\n                    .map_err(|e| BasebackupError::Client(e, \"add_dbdir,base/PG_VERSION\"))?;\n\n                let relmap_path = format!(\"base/{dbnode}/pg_filenode.map\");\n                let header = new_tar_header(&relmap_path, img.len() as u64)?;\n                self.ar\n                    .append(&header, &img[..])\n                    .await\n                    .map_err(|e| BasebackupError::Client(e, \"add_dbdir,base/pg_filenode.map\"))?;\n            }\n        };\n        Ok(())\n    }\n\n    //\n    // Extract twophase state files\n    //\n    async fn add_twophase_file(&mut self, xid: u64) -> Result<(), BasebackupError> {\n        let img = self\n            .timeline\n            .get_twophase_file(xid, self.lsn, self.ctx)\n            .await?;\n\n        let mut buf = BytesMut::new();\n        buf.extend_from_slice(&img[..]);\n        let crc = crc32c::crc32c(&img[..]);\n        buf.put_u32_le(crc);\n        let path = if self.timeline.pg_version < PgMajorVersion::PG17 {\n            format!(\"pg_twophase/{xid:>08X}\")\n        } else {\n            format!(\"pg_twophase/{xid:>016X}\")\n        };\n        let header = new_tar_header(&path, buf.len() as u64)?;\n        self.ar\n            .append(&header, &buf[..])\n            .await\n            .map_err(|e| BasebackupError::Client(e, \"add_twophase_file\"))?;\n\n        Ok(())\n    }\n\n    //\n    // Add generated pg_control file and bootstrap WAL segment.\n    // Also send neon.signal and zenith.signal file with extra bootstrap data.\n    //\n    async fn add_pgcontrol_file(\n        &mut self,\n        pg_control_bytes: Bytes,\n        system_identifier: u64,\n    ) -> Result<(), BasebackupError> {\n        // add neon.signal file\n        let mut neon_signal = String::new();\n        if self.prev_record_lsn == Lsn(0) {\n            if self.timeline.is_ancestor_lsn(self.lsn) {\n                write!(neon_signal, \"PREV LSN: none\")\n                    .map_err(|e| BasebackupError::Server(e.into()))?;\n            } else {\n                write!(neon_signal, \"PREV LSN: invalid\")\n                    .map_err(|e| BasebackupError::Server(e.into()))?;\n            }\n        } else {\n            write!(neon_signal, \"PREV LSN: {}\", self.prev_record_lsn)\n                .map_err(|e| BasebackupError::Server(e.into()))?;\n        }\n\n        // TODO: Remove zenith.signal once all historical computes have been replaced\n        // ... and thus support the neon.signal file.\n        for signalfilename in [\"neon.signal\", \"zenith.signal\"] {\n            self.ar\n                .append(\n                    &new_tar_header(signalfilename, neon_signal.len() as u64)?,\n                    neon_signal.as_bytes(),\n                )\n                .await\n                .map_err(|e| BasebackupError::Client(e, \"add_pgcontrol_file,neon.signal\"))?;\n        }\n\n        //send pg_control\n        let header = new_tar_header(\"global/pg_control\", pg_control_bytes.len() as u64)?;\n        self.ar\n            .append(&header, &pg_control_bytes[..])\n            .await\n            .map_err(|e| BasebackupError::Client(e, \"add_pgcontrol_file,pg_control\"))?;\n\n        //send wal segment\n        let segno = self.lsn.segment_number(WAL_SEGMENT_SIZE);\n        let wal_file_name = XLogFileName(PG_TLI, segno, WAL_SEGMENT_SIZE);\n        let wal_file_path = format!(\"pg_wal/{wal_file_name}\");\n        let header = new_tar_header(&wal_file_path, WAL_SEGMENT_SIZE as u64)?;\n\n        let wal_seg = postgres_ffi::generate_wal_segment(\n            segno,\n            system_identifier,\n            self.timeline.pg_version,\n            self.lsn,\n        )\n        .map_err(|e| anyhow!(e).context(\"Failed generating wal segment\"))?;\n        if wal_seg.len() != WAL_SEGMENT_SIZE {\n            return Err(BasebackupError::Server(anyhow!(\n                \"wal_seg.len() != WAL_SEGMENT_SIZE, wal_seg.len()={}\",\n                wal_seg.len()\n            )));\n        }\n        self.ar\n            .append(&header, &wal_seg[..])\n            .await\n            .map_err(|e| BasebackupError::Client(e, \"add_pgcontrol_file,wal_segment\"))?;\n        Ok(())\n    }\n}\n\n//\n// Create new tarball entry header\n//\nfn new_tar_header(path: &str, size: u64) -> anyhow::Result<Header> {\n    let mut header = Header::new_gnu();\n    header.set_size(size);\n    header.set_path(path)?;\n    header.set_mode(0b110000000); // -rw-------\n    header.set_mtime(\n        // use currenttime as last modified time\n        SystemTime::now()\n            .duration_since(SystemTime::UNIX_EPOCH)\n            .unwrap()\n            .as_secs(),\n    );\n    header.set_cksum();\n    Ok(header)\n}\n\nfn new_tar_header_dir(path: &str) -> anyhow::Result<Header> {\n    let mut header = Header::new_gnu();\n    header.set_size(0);\n    header.set_path(path)?;\n    header.set_mode(0o755); // -rw-------\n    header.set_entry_type(EntryType::dir());\n    header.set_mtime(\n        // use currenttime as last modified time\n        SystemTime::now()\n            .duration_since(SystemTime::UNIX_EPOCH)\n            .unwrap()\n            .as_secs(),\n    );\n    header.set_cksum();\n    Ok(header)\n}\n"
  },
  {
    "path": "pageserver/src/basebackup_cache.rs",
    "content": "use std::{collections::HashMap, sync::Arc};\n\nuse anyhow::Context;\nuse camino::{Utf8Path, Utf8PathBuf};\nuse metrics::core::{AtomicU64, GenericCounter};\nuse pageserver_api::{config::BasebackupCacheConfig, models::TenantState};\nuse tokio::{\n    io::{AsyncWriteExt, BufWriter},\n    sync::mpsc::{Receiver, Sender, error::TrySendError},\n};\nuse tokio_util::sync::CancellationToken;\nuse utils::{\n    id::{TenantId, TenantTimelineId, TimelineId},\n    lsn::Lsn,\n    shard::TenantShardId,\n};\n\nuse crate::{\n    basebackup::send_basebackup_tarball,\n    context::{DownloadBehavior, RequestContext},\n    metrics::{\n        BASEBACKUP_CACHE_ENTRIES, BASEBACKUP_CACHE_PREPARE, BASEBACKUP_CACHE_PREPARE_QUEUE_SIZE,\n        BASEBACKUP_CACHE_READ, BASEBACKUP_CACHE_SIZE,\n    },\n    task_mgr::TaskKind,\n    tenant::{\n        Timeline,\n        mgr::{TenantManager, TenantSlot},\n    },\n};\n\npub struct BasebackupPrepareRequest {\n    pub tenant_shard_id: TenantShardId,\n    pub timeline_id: TimelineId,\n    pub lsn: Lsn,\n}\n\npub type BasebackupPrepareSender = Sender<BasebackupPrepareRequest>;\npub type BasebackupPrepareReceiver = Receiver<BasebackupPrepareRequest>;\n\n#[derive(Clone)]\nstruct CacheEntry {\n    /// LSN at which the basebackup was taken.\n    lsn: Lsn,\n    /// Size of the basebackup archive in bytes.\n    size_bytes: u64,\n}\n\n/// BasebackupCache stores cached basebackup archives for timelines on local disk.\n///\n/// The main purpose of this cache is to speed up the startup process of compute nodes\n/// after scaling to zero.\n/// Thus, the basebackup is stored only for the latest LSN of the timeline and with\n/// fixed set of parameters (gzip=true, full_backup=false, replica=false, prev_lsn=none).\n///\n/// The cache receives prepare requests through the `BasebackupPrepareSender` channel,\n/// generates a basebackup from the timeline in the background, and stores it on disk.\n///\n/// Basebackup requests are pretty rare. We expect ~thousands of entries in the cache\n/// and ~1 RPS for get requests.\npub struct BasebackupCache {\n    data_dir: Utf8PathBuf,\n    config: Option<BasebackupCacheConfig>,\n\n    entries: std::sync::Mutex<HashMap<TenantTimelineId, CacheEntry>>,\n\n    prepare_sender: BasebackupPrepareSender,\n\n    read_hit_count: GenericCounter<AtomicU64>,\n    read_miss_count: GenericCounter<AtomicU64>,\n    read_err_count: GenericCounter<AtomicU64>,\n\n    prepare_skip_count: GenericCounter<AtomicU64>,\n}\n\nimpl BasebackupCache {\n    /// Create a new BasebackupCache instance.\n    /// Also returns a BasebackupPrepareReceiver which is needed to start\n    /// the background task.\n    /// The cache is initialized from the data_dir in the background task.\n    /// The cache will return `None` for any get requests until the initialization is complete.\n    /// The background task is spawned separately using [`Self::spawn_background_task`]\n    /// to avoid a circular dependency between the cache and the tenant manager.\n    pub fn new(\n        data_dir: Utf8PathBuf,\n        config: Option<BasebackupCacheConfig>,\n    ) -> (Arc<Self>, BasebackupPrepareReceiver) {\n        let chan_size = config.as_ref().map(|c| c.max_size_entries).unwrap_or(1);\n\n        let (prepare_sender, prepare_receiver) = tokio::sync::mpsc::channel(chan_size);\n\n        let cache = Arc::new(BasebackupCache {\n            data_dir,\n            config,\n            entries: std::sync::Mutex::new(HashMap::new()),\n            prepare_sender,\n\n            read_hit_count: BASEBACKUP_CACHE_READ.with_label_values(&[\"hit\"]),\n            read_miss_count: BASEBACKUP_CACHE_READ.with_label_values(&[\"miss\"]),\n            read_err_count: BASEBACKUP_CACHE_READ.with_label_values(&[\"error\"]),\n\n            prepare_skip_count: BASEBACKUP_CACHE_PREPARE.with_label_values(&[\"skip\"]),\n        });\n\n        (cache, prepare_receiver)\n    }\n\n    /// Spawns the background task.\n    /// The background task initializes the cache from the disk,\n    /// processes prepare requests, and cleans up outdated cache entries.\n    /// Noop if the cache is disabled (config is None).\n    pub fn spawn_background_task(\n        self: Arc<Self>,\n        runtime_handle: &tokio::runtime::Handle,\n        prepare_receiver: BasebackupPrepareReceiver,\n        tenant_manager: Arc<TenantManager>,\n        cancel: CancellationToken,\n    ) {\n        if let Some(config) = self.config.clone() {\n            let background = BackgroundTask {\n                c: self,\n\n                config,\n                tenant_manager,\n                cancel,\n\n                entry_count: 0,\n                total_size_bytes: 0,\n\n                prepare_ok_count: BASEBACKUP_CACHE_PREPARE.with_label_values(&[\"ok\"]),\n                prepare_skip_count: BASEBACKUP_CACHE_PREPARE.with_label_values(&[\"skip\"]),\n                prepare_err_count: BASEBACKUP_CACHE_PREPARE.with_label_values(&[\"error\"]),\n            };\n            runtime_handle.spawn(background.run(prepare_receiver));\n        }\n    }\n\n    /// Send a basebackup prepare request to the background task.\n    /// The basebackup will be prepared asynchronously, it does not block the caller.\n    /// The request will be skipped if any cache limits are exceeded.\n    pub fn send_prepare(&self, tenant_shard_id: TenantShardId, timeline_id: TimelineId, lsn: Lsn) {\n        let req = BasebackupPrepareRequest {\n            tenant_shard_id,\n            timeline_id,\n            lsn,\n        };\n\n        BASEBACKUP_CACHE_PREPARE_QUEUE_SIZE.inc();\n        let res = self.prepare_sender.try_send(req);\n\n        if let Err(e) = res {\n            BASEBACKUP_CACHE_PREPARE_QUEUE_SIZE.dec();\n            self.prepare_skip_count.inc();\n            match e {\n                TrySendError::Full(_) => {\n                    // Basebackup prepares are pretty rare, normally we should not hit this.\n                    tracing::info!(\n                        tenant_id = %tenant_shard_id.tenant_id,\n                        %timeline_id,\n                        %lsn,\n                        \"Basebackup prepare channel is full, skipping the request\"\n                    );\n                }\n                TrySendError::Closed(_) => {\n                    // Normal during shutdown, not critical.\n                    tracing::info!(\n                        tenant_id = %tenant_shard_id.tenant_id,\n                        %timeline_id,\n                        %lsn,\n                        \"Basebackup prepare channel is closed, skipping the request\"\n                    );\n                }\n            }\n        }\n    }\n\n    /// Gets a basebackup entry from the cache.\n    /// If the entry is found, opens a file with the basebackup archive and returns it.\n    /// The open file descriptor will prevent the file system from deleting the file\n    /// even if the entry is removed from the cache in the background.\n    pub async fn get(\n        &self,\n        tenant_id: TenantId,\n        timeline_id: TimelineId,\n        lsn: Lsn,\n    ) -> Option<tokio::fs::File> {\n        if !self.is_enabled() {\n            return None;\n        }\n\n        // Fast path. Check if the entry exists using the in-memory state.\n        let tti = TenantTimelineId::new(tenant_id, timeline_id);\n        if self.entries.lock().unwrap().get(&tti).map(|e| e.lsn) != Some(lsn) {\n            self.read_miss_count.inc();\n            return None;\n        }\n\n        let path = self.entry_path(tenant_id, timeline_id, lsn);\n\n        match tokio::fs::File::open(path).await {\n            Ok(file) => {\n                self.read_hit_count.inc();\n                Some(file)\n            }\n            Err(e) => {\n                if e.kind() == std::io::ErrorKind::NotFound {\n                    // We may end up here if the basebackup was concurrently removed by the cleanup task.\n                    self.read_miss_count.inc();\n                } else {\n                    self.read_err_count.inc();\n                    tracing::warn!(\"Unexpected error opening basebackup cache file: {:?}\", e);\n                }\n                None\n            }\n        }\n    }\n\n    pub fn is_enabled(&self) -> bool {\n        self.config.is_some()\n    }\n\n    // Private methods.\n\n    fn entry_filename(tenant_id: TenantId, timeline_id: TimelineId, lsn: Lsn) -> String {\n        // The default format for LSN is 0/ABCDEF.\n        // The backslash is not filename friendly, so serialize it as plain hex.\n        let lsn = lsn.0;\n        format!(\"basebackup_{tenant_id}_{timeline_id}_{lsn:016X}.tar.gz\")\n    }\n\n    fn entry_path(&self, tenant_id: TenantId, timeline_id: TimelineId, lsn: Lsn) -> Utf8PathBuf {\n        self.data_dir\n            .join(Self::entry_filename(tenant_id, timeline_id, lsn))\n    }\n}\n\n/// The background task that does the job to prepare basebackups\n/// and manage the cache entries on disk.\n/// It is a separate struct from BasebackupCache to allow holding\n/// a mutable reference to this state without a mutex lock,\n/// while BasebackupCache is referenced by the clients.\nstruct BackgroundTask {\n    c: Arc<BasebackupCache>,\n\n    config: BasebackupCacheConfig,\n    tenant_manager: Arc<TenantManager>,\n    cancel: CancellationToken,\n\n    /// Number of the entries in the cache.\n    /// This counter is used for metrics and applying cache limits.\n    /// It generally should be equal to c.entries.len(), but it's calculated\n    /// pessimistically for abnormal situations: if we encountered some errors\n    /// during removing the entry from disk, we won't decrement this counter to\n    /// make sure that we don't exceed the limit with \"trashed\" files on the disk.\n    /// It will also count files in the data_dir that are not valid cache entries.\n    entry_count: usize,\n    /// Total size of all the entries on the disk.\n    /// This counter is used for metrics and applying cache limits.\n    /// Similar to entry_count, it is calculated pessimistically for abnormal situations.\n    total_size_bytes: u64,\n\n    prepare_ok_count: GenericCounter<AtomicU64>,\n    prepare_skip_count: GenericCounter<AtomicU64>,\n    prepare_err_count: GenericCounter<AtomicU64>,\n}\n\nimpl BackgroundTask {\n    fn tmp_dir(&self) -> Utf8PathBuf {\n        self.c.data_dir.join(\"tmp\")\n    }\n\n    fn entry_tmp_path(\n        &self,\n        tenant_id: TenantId,\n        timeline_id: TimelineId,\n        lsn: Lsn,\n    ) -> Utf8PathBuf {\n        self.tmp_dir()\n            .join(BasebackupCache::entry_filename(tenant_id, timeline_id, lsn))\n    }\n\n    fn parse_entry_filename(filename: &str) -> Option<(TenantId, TimelineId, Lsn)> {\n        let parts: Vec<&str> = filename\n            .strip_prefix(\"basebackup_\")?\n            .strip_suffix(\".tar.gz\")?\n            .split('_')\n            .collect();\n        if parts.len() != 3 {\n            return None;\n        }\n        let tenant_id = parts[0].parse::<TenantId>().ok()?;\n        let timeline_id = parts[1].parse::<TimelineId>().ok()?;\n        let lsn = Lsn(u64::from_str_radix(parts[2], 16).ok()?);\n\n        Some((tenant_id, timeline_id, lsn))\n    }\n\n    // Recreate the tmp directory to clear all files in it.\n    async fn clean_tmp_dir(&self) -> anyhow::Result<()> {\n        let tmp_dir = self.tmp_dir();\n        if tmp_dir.exists() {\n            tokio::fs::remove_dir_all(&tmp_dir).await?;\n        }\n        tokio::fs::create_dir_all(&tmp_dir).await?;\n        Ok(())\n    }\n\n    async fn cleanup(&mut self) -> anyhow::Result<()> {\n        self.clean_tmp_dir().await?;\n\n        // Leave only up-to-date entries.\n        let entries_old = self.c.entries.lock().unwrap().clone();\n        let mut entries_new = HashMap::new();\n        for (tenant_shard_id, tenant_slot) in self.tenant_manager.list() {\n            if !tenant_shard_id.is_shard_zero() {\n                continue;\n            }\n            let TenantSlot::Attached(tenant) = tenant_slot else {\n                continue;\n            };\n            let tenant_id = tenant_shard_id.tenant_id;\n\n            for timeline in tenant.list_timelines() {\n                let tti = TenantTimelineId::new(tenant_id, timeline.timeline_id);\n                if let Some(entry) = entries_old.get(&tti) {\n                    if timeline.get_last_record_lsn() <= entry.lsn {\n                        entries_new.insert(tti, entry.clone());\n                    }\n                }\n            }\n        }\n\n        // Try to remove all entries that are not up-to-date.\n        for (&tti, entry) in entries_old.iter() {\n            if !entries_new.contains_key(&tti) {\n                self.try_remove_entry(tti.tenant_id, tti.timeline_id, entry)\n                    .await;\n            }\n        }\n\n        // Note: BackgroundTask is the only writer for self.c.entries,\n        // so it couldn't have been modified concurrently.\n        *self.c.entries.lock().unwrap() = entries_new;\n\n        Ok(())\n    }\n\n    async fn on_startup(&mut self) -> anyhow::Result<()> {\n        // Create data_dir if it does not exist.\n        tokio::fs::create_dir_all(&self.c.data_dir)\n            .await\n            .context(\"Failed to create basebackup cache data directory\")?;\n\n        self.clean_tmp_dir()\n            .await\n            .context(\"Failed to clean tmp directory\")?;\n\n        // Read existing entries from the data_dir and add them to in-memory state.\n        let mut entries = HashMap::<TenantTimelineId, CacheEntry>::new();\n        let mut dir = tokio::fs::read_dir(&self.c.data_dir).await?;\n        while let Some(dir_entry) = dir.next_entry().await? {\n            let filename = dir_entry.file_name();\n\n            if filename == \"tmp\" {\n                // Skip the tmp directory.\n                continue;\n            }\n\n            let size_bytes = dir_entry\n                .metadata()\n                .await\n                .map_err(|e| {\n                    anyhow::anyhow!(\"Failed to read metadata for file {:?}: {:?}\", filename, e)\n                })?\n                .len();\n\n            self.entry_count += 1;\n            BASEBACKUP_CACHE_ENTRIES.set(self.entry_count as u64);\n\n            self.total_size_bytes += size_bytes;\n            BASEBACKUP_CACHE_SIZE.set(self.total_size_bytes);\n\n            let parsed = Self::parse_entry_filename(filename.to_string_lossy().as_ref());\n            let Some((tenant_id, timeline_id, lsn)) = parsed else {\n                tracing::warn!(\"Invalid basebackup cache file name: {:?}\", filename);\n                continue;\n            };\n\n            let cur_entry = CacheEntry { lsn, size_bytes };\n\n            let tti = TenantTimelineId::new(tenant_id, timeline_id);\n\n            use std::collections::hash_map::Entry::*;\n\n            match entries.entry(tti) {\n                Occupied(mut entry) => {\n                    let found_entry = entry.get();\n                    // Leave only the latest entry, remove the old one.\n                    if cur_entry.lsn < found_entry.lsn {\n                        self.try_remove_entry(tenant_id, timeline_id, &cur_entry)\n                            .await;\n                    } else if cur_entry.lsn > found_entry.lsn {\n                        self.try_remove_entry(tenant_id, timeline_id, found_entry)\n                            .await;\n                        entry.insert(cur_entry);\n                    } else {\n                        // Two different filenames parsed to the same timline_id and LSN.\n                        // Should never happen.\n                        return Err(anyhow::anyhow!(\n                            \"Duplicate basebackup cache entry with the same LSN: {:?}\",\n                            filename\n                        ));\n                    }\n                }\n                Vacant(entry) => {\n                    entry.insert(cur_entry);\n                }\n            }\n        }\n\n        *self.c.entries.lock().unwrap() = entries;\n\n        Ok(())\n    }\n\n    async fn run(mut self, mut prepare_receiver: BasebackupPrepareReceiver) {\n        // Panic in the background is a safe fallback.\n        // It will drop receivers and the cache will be effectively disabled.\n        self.on_startup()\n            .await\n            .expect(\"Failed to initialize basebackup cache\");\n\n        let mut cleanup_ticker = tokio::time::interval(self.config.cleanup_period);\n        cleanup_ticker.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Skip);\n\n        loop {\n            tokio::select! {\n                Some(req) = prepare_receiver.recv() => {\n                    BASEBACKUP_CACHE_PREPARE_QUEUE_SIZE.dec();\n                    if let Err(err) = self.prepare_basebackup(\n                        req.tenant_shard_id,\n                        req.timeline_id,\n                        req.lsn,\n                    ).await {\n                        tracing::info!(\"Failed to prepare basebackup: {:#}\", err);\n                        self.prepare_err_count.inc();\n                        continue;\n                    }\n                }\n                _ = cleanup_ticker.tick() => {\n                    self.cleanup().await.unwrap_or_else(|e| {\n                        tracing::warn!(\"Failed to clean up basebackup cache: {:#}\", e);\n                    });\n                }\n                _ = self.cancel.cancelled() => {\n                    tracing::info!(\"BasebackupCache background task cancelled\");\n                    break;\n                }\n            }\n        }\n    }\n\n    /// Try to remove an entry from disk.\n    /// The caller is responsible for removing the entry from the in-memory state.\n    /// Updates size counters and corresponding metrics.\n    /// Ignores the filesystem errors as not-so-important, but the size counters\n    /// are not decremented in this case, so the file will continue to be counted\n    /// towards the size limits.\n    async fn try_remove_entry(\n        &mut self,\n        tenant_id: TenantId,\n        timeline_id: TimelineId,\n        entry: &CacheEntry,\n    ) {\n        let entry_path = self.c.entry_path(tenant_id, timeline_id, entry.lsn);\n\n        match tokio::fs::remove_file(&entry_path).await {\n            Ok(_) => {}\n            Err(e) if e.kind() == std::io::ErrorKind::NotFound => {}\n            Err(e) => {\n                tracing::warn!(\n                    \"Failed to remove basebackup cache file for tenant {} timeline {} LSN {}: {:#}\",\n                    tenant_id,\n                    timeline_id,\n                    entry.lsn,\n                    e\n                );\n                return;\n            }\n        }\n\n        self.entry_count -= 1;\n        BASEBACKUP_CACHE_ENTRIES.set(self.entry_count as u64);\n\n        self.total_size_bytes -= entry.size_bytes;\n        BASEBACKUP_CACHE_SIZE.set(self.total_size_bytes);\n    }\n\n    /// Insert the cache entry into in-memory state and update the size counters.\n    /// Assumes that the file for the entry already exists on disk.\n    /// If the entry already exists with previous LSN, it will be removed.\n    async fn upsert_entry(\n        &mut self,\n        tenant_id: TenantId,\n        timeline_id: TimelineId,\n        entry: CacheEntry,\n    ) {\n        let tti = TenantTimelineId::new(tenant_id, timeline_id);\n\n        self.entry_count += 1;\n        BASEBACKUP_CACHE_ENTRIES.set(self.entry_count as u64);\n\n        self.total_size_bytes += entry.size_bytes;\n        BASEBACKUP_CACHE_SIZE.set(self.total_size_bytes);\n\n        let old_entry = self.c.entries.lock().unwrap().insert(tti, entry);\n\n        if let Some(old_entry) = old_entry {\n            self.try_remove_entry(tenant_id, timeline_id, &old_entry)\n                .await;\n        }\n    }\n\n    /// Prepare a basebackup for the given timeline.\n    ///\n    /// If the basebackup already exists with a higher LSN or the timeline already\n    /// has a higher last_record_lsn, skip the preparation.\n    ///\n    /// The basebackup is prepared in a temporary directory and then moved to the final\n    /// location to make the operation atomic.\n    async fn prepare_basebackup(\n        &mut self,\n        tenant_shard_id: TenantShardId,\n        timeline_id: TimelineId,\n        req_lsn: Lsn,\n    ) -> anyhow::Result<()> {\n        tracing::info!(\n            tenant_id = %tenant_shard_id.tenant_id,\n            %timeline_id,\n            %req_lsn,\n            \"Preparing basebackup for timeline\",\n        );\n\n        let tti = TenantTimelineId::new(tenant_shard_id.tenant_id, timeline_id);\n\n        // TODO(diko): I don't think we will hit the limit,\n        // but if we do, it makes sense to try to evict oldest entries. here\n        if self.entry_count >= self.config.max_size_entries {\n            tracing::info!(\n                %tenant_shard_id,\n                %timeline_id,\n                %req_lsn,\n                \"Basebackup cache is full (max_size_entries), skipping basebackup\",\n            );\n            self.prepare_skip_count.inc();\n            return Ok(());\n        }\n\n        if self.total_size_bytes >= self.config.max_total_size_bytes {\n            tracing::info!(\n                %tenant_shard_id,\n                %timeline_id,\n                %req_lsn,\n                \"Basebackup cache is full (max_total_size_bytes), skipping basebackup\",\n            );\n            self.prepare_skip_count.inc();\n            return Ok(());\n        }\n\n        {\n            let entries = self.c.entries.lock().unwrap();\n            if let Some(entry) = entries.get(&tti) {\n                if entry.lsn >= req_lsn {\n                    tracing::info!(\n                        %timeline_id,\n                        %req_lsn,\n                        %entry.lsn,\n                        \"Basebackup entry already exists for timeline with higher LSN, skipping basebackup\",\n                    );\n                    self.prepare_skip_count.inc();\n                    return Ok(());\n                }\n            }\n        }\n\n        let tenant = self\n            .tenant_manager\n            .get_attached_tenant_shard(tenant_shard_id)?;\n\n        let tenant_state = tenant.current_state();\n        if tenant_state != TenantState::Active {\n            anyhow::bail!(\n                \"Tenant {} is not active, current state: {:?}\",\n                tenant_shard_id.tenant_id,\n                tenant_state\n            )\n        }\n\n        let timeline = tenant.get_timeline(timeline_id, true)?;\n\n        let last_record_lsn = timeline.get_last_record_lsn();\n        if last_record_lsn > req_lsn {\n            tracing::info!(\n                %timeline_id,\n                %req_lsn,\n                %last_record_lsn,\n                \"Timeline has a higher LSN than the requested one, skipping basebackup\",\n            );\n            self.prepare_skip_count.inc();\n            return Ok(());\n        }\n\n        let entry_tmp_path = self.entry_tmp_path(tenant_shard_id.tenant_id, timeline_id, req_lsn);\n\n        let res = self\n            .prepare_basebackup_tmp(&entry_tmp_path, &timeline, req_lsn)\n            .await;\n\n        let entry = match res {\n            Ok(entry) => entry,\n            Err(err) => {\n                tracing::info!(\"Failed to prepare basebackup tmp file: {:#}\", err);\n                // Try to clean up tmp file. If we fail, the background clean up task will take care of it.\n                match tokio::fs::remove_file(&entry_tmp_path).await {\n                    Ok(_) => {}\n                    Err(e) if e.kind() == std::io::ErrorKind::NotFound => {}\n                    Err(e) => {\n                        tracing::info!(\"Failed to remove basebackup tmp file: {:?}\", e);\n                    }\n                }\n                return Err(err);\n            }\n        };\n\n        // Move the tmp file to the final location atomically.\n        // The tmp file is fsynced, so it's guaranteed that we will not have a partial file\n        // in the main directory.\n        // It's not necessary to fsync the inode after renaming, because the worst case is that\n        // the rename operation will be rolled back on the disk failure, the entry will disappear\n        // from the main directory, and the entry access will cause a cache miss.\n        let entry_path = self\n            .c\n            .entry_path(tenant_shard_id.tenant_id, timeline_id, req_lsn);\n        tokio::fs::rename(&entry_tmp_path, &entry_path).await?;\n\n        self.upsert_entry(tenant_shard_id.tenant_id, timeline_id, entry)\n            .await;\n\n        self.prepare_ok_count.inc();\n        Ok(())\n    }\n\n    /// Prepares a basebackup in a temporary file.\n    /// Guarantees that the tmp file is fsynced before returning.\n    async fn prepare_basebackup_tmp(\n        &self,\n        entry_tmp_path: &Utf8Path,\n        timeline: &Arc<Timeline>,\n        req_lsn: Lsn,\n    ) -> anyhow::Result<CacheEntry> {\n        let ctx = RequestContext::new(TaskKind::BasebackupCache, DownloadBehavior::Download);\n        let ctx = ctx.with_scope_timeline(timeline);\n\n        let file = tokio::fs::File::create(entry_tmp_path).await?;\n        let mut writer = BufWriter::new(file);\n\n        // We may receive a request before the WAL record is applied to the timeline.\n        // Wait for the requested LSN to be applied.\n        timeline\n            .wait_lsn(\n                req_lsn,\n                crate::tenant::timeline::WaitLsnWaiter::BaseBackupCache,\n                crate::tenant::timeline::WaitLsnTimeout::Default,\n                &ctx,\n            )\n            .await?;\n\n        send_basebackup_tarball(\n            &mut writer,\n            timeline,\n            Some(req_lsn),\n            None,\n            false,\n            false,\n            // Level::Best because compression is not on the hot path of basebackup requests.\n            // The decompression is almost not affected by the compression level.\n            Some(async_compression::Level::Best),\n            &ctx,\n        )\n        .await?;\n\n        writer.flush().await?;\n        writer.into_inner().sync_all().await?;\n\n        // TODO(diko): we can count it via Writer wrapper instead of a syscall.\n        let size_bytes = tokio::fs::metadata(entry_tmp_path).await?.len();\n\n        Ok(CacheEntry {\n            lsn: req_lsn,\n            size_bytes,\n        })\n    }\n}\n"
  },
  {
    "path": "pageserver/src/bin/pageserver.rs",
    "content": "#![recursion_limit = \"300\"]\n\n//! Main entry point for the Page Server executable.\n\nuse std::env;\nuse std::env::{VarError, var};\nuse std::io::Read;\nuse std::str::FromStr;\nuse std::sync::Arc;\nuse std::time::Duration;\n\nuse anyhow::{Context, anyhow};\nuse camino::Utf8Path;\nuse clap::{Arg, ArgAction, Command};\nuse http_utils::tls_certs::ReloadingCertificateResolver;\nuse metrics::launch_timestamp::{LaunchTimestamp, set_launch_timestamp_metric};\nuse metrics::set_build_info_metric;\nuse nix::sys::socket::{setsockopt, sockopt};\nuse pageserver::basebackup_cache::BasebackupCache;\nuse pageserver::config::{PageServerConf, PageserverIdentity, ignored_fields};\nuse pageserver::controller_upcall_client::StorageControllerUpcallClient;\nuse pageserver::deletion_queue::DeletionQueue;\nuse pageserver::disk_usage_eviction_task::{self, launch_disk_usage_global_eviction_task};\nuse pageserver::feature_resolver::FeatureResolver;\nuse pageserver::metrics::{STARTUP_DURATION, STARTUP_IS_LOADING};\nuse pageserver::page_service::GrpcPageServiceHandler;\nuse pageserver::task_mgr::{\n    BACKGROUND_RUNTIME, COMPUTE_REQUEST_RUNTIME, MGMT_REQUEST_RUNTIME, WALRECEIVER_RUNTIME,\n};\nuse pageserver::tenant::{TenantSharedResources, mgr, secondary};\nuse pageserver::{\n    CancellableTask, ConsumptionMetricsTasks, HttpEndpointListener, HttpsEndpointListener,\n    MetricsCollectionTask, http, page_cache, page_service, task_mgr, virtual_file,\n};\nuse postgres_backend::AuthType;\nuse remote_storage::GenericRemoteStorage;\nuse tokio::time::Instant;\nuse tokio_util::sync::CancellationToken;\nuse tracing::*;\nuse tracing_utils::OtelGuard;\nuse utils::auth::{JwtAuth, SwappableJwtAuth};\nuse utils::crashsafe::syncfs;\nuse utils::logging::TracingErrorLayerEnablement;\nuse utils::metrics_collector::{METRICS_COLLECTION_INTERVAL, METRICS_COLLECTOR};\nuse utils::sentry_init::init_sentry;\nuse utils::{failpoint_support, logging, project_build_tag, project_git_version, tcp_listener};\n\nproject_git_version!(GIT_VERSION);\nproject_build_tag!(BUILD_TAG);\n\n#[global_allocator]\nstatic GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;\n\n/// Configure jemalloc to profile heap allocations by sampling stack traces every 2 MB (1 << 21).\n/// This adds roughly 3% overhead for allocations on average, which is acceptable considering\n/// performance-sensitive code will avoid allocations as far as possible anyway.\n#[allow(non_upper_case_globals)]\n#[unsafe(export_name = \"malloc_conf\")]\npub static malloc_conf: &[u8] = b\"prof:true,prof_active:true,lg_prof_sample:21\\0\";\n\nconst PID_FILE_NAME: &str = \"pageserver.pid\";\n\nconst FEATURES: &[&str] = &[\n    #[cfg(feature = \"testing\")]\n    \"testing\",\n];\n\nfn version() -> String {\n    format!(\n        \"{GIT_VERSION} failpoints: {}, features: {:?}\",\n        fail::has_failpoints(),\n        FEATURES,\n    )\n}\n\nfn main() -> anyhow::Result<()> {\n    let launch_ts = Box::leak(Box::new(LaunchTimestamp::generate()));\n\n    let arg_matches = cli().get_matches();\n\n    if arg_matches.get_flag(\"enabled-features\") {\n        println!(\"{{\\\"features\\\": {FEATURES:?} }}\");\n        return Ok(());\n    }\n\n    // Initialize up failpoints support\n    let scenario = failpoint_support::init();\n\n    let workdir = arg_matches\n        .get_one::<String>(\"workdir\")\n        .map(Utf8Path::new)\n        .unwrap_or_else(|| Utf8Path::new(\".neon\"));\n    let workdir = workdir\n        .canonicalize_utf8()\n        .with_context(|| format!(\"Error opening workdir '{workdir}'\"))?;\n\n    let cfg_file_path = workdir.join(\"pageserver.toml\");\n    let identity_file_path = workdir.join(\"identity.toml\");\n\n    // Set CWD to workdir for non-daemon modes\n    env::set_current_dir(&workdir)\n        .with_context(|| format!(\"Failed to set application's current dir to '{workdir}'\"))?;\n\n    let (conf, ignored) = initialize_config(&identity_file_path, &cfg_file_path, &workdir)?;\n\n    // Initialize logging.\n    //\n    // It must be initialized before the custom panic hook is installed below.\n    //\n    // Regarding tracing_error enablement: at this time, we only use the\n    // tracing_error crate to debug_assert that log spans contain tenant and timeline ids.\n    // See `debug_assert_current_span_has_tenant_and_timeline_id` in the timeline module\n    let tracing_error_layer_enablement = if cfg!(debug_assertions) {\n        TracingErrorLayerEnablement::EnableWithRustLogFilter\n    } else {\n        TracingErrorLayerEnablement::Disabled\n    };\n\n    logging::init(\n        conf.log_format,\n        tracing_error_layer_enablement,\n        logging::Output::Stdout,\n    )?;\n\n    let otel_enablement = match &conf.tracing {\n        Some(cfg) => tracing_utils::OtelEnablement::Enabled {\n            service_name: \"pageserver\".to_string(),\n            export_config: (&cfg.export_config).into(),\n        },\n        None => tracing_utils::OtelEnablement::Disabled,\n    };\n\n    let otel_guard = tracing_utils::init_performance_tracing(otel_enablement);\n\n    if otel_guard.is_some() {\n        info!(?conf.tracing, \"starting with OTEL tracing enabled\");\n    }\n\n    // mind the order required here: 1. logging, 2. panic_hook, 3. sentry.\n    // disarming this hook on pageserver, because we never tear down tracing.\n    logging::replace_panic_hook_with_tracing_panic_hook().forget();\n\n    // initialize sentry if SENTRY_DSN is provided\n    let _sentry_guard = init_sentry(\n        Some(GIT_VERSION.into()),\n        &[(\"node_id\", &conf.id.to_string())],\n    );\n\n    // Warn about ignored config items; see pageserver_api::config::ConfigToml\n    // doc comment for rationale why we prefer this over serde(deny_unknown_fields).\n    {\n        let ignored_fields::Paths { paths } = &ignored;\n        for path in paths {\n            warn!(?path, \"ignoring unknown configuration item\");\n        }\n    }\n\n    // Log configuration items for feature-flag-like config\n    // (maybe we should automate this with a visitor?).\n    info!(?conf.virtual_file_io_engine, \"starting with virtual_file IO engine\");\n    info!(?conf.virtual_file_io_mode, \"starting with virtual_file IO mode\");\n    info!(?conf.validate_wal_contiguity, \"starting with WAL contiguity validation\");\n    info!(?conf.page_service_pipelining, \"starting with page service pipelining config\");\n    info!(?conf.get_vectored_concurrent_io, \"starting with get_vectored IO concurrency config\");\n\n    // The tenants directory contains all the pageserver local disk state.\n    // Create if not exists and make sure all the contents are durable before proceeding.\n    // Ensuring durability eliminates a whole bug class where we come up after an unclean shutdown.\n    // After unclea shutdown, we don't know if all the filesystem content we can read via syscalls is actually durable or not.\n    // Examples for that: OOM kill, systemd killing us during shutdown, self abort due to unrecoverable IO error.\n    let tenants_path = conf.tenants_path();\n    {\n        let open = || {\n            nix::dir::Dir::open(\n                tenants_path.as_std_path(),\n                nix::fcntl::OFlag::O_DIRECTORY | nix::fcntl::OFlag::O_RDONLY,\n                nix::sys::stat::Mode::empty(),\n            )\n        };\n        let dirfd = match open() {\n            Ok(dirfd) => dirfd,\n            Err(e) => match e {\n                nix::errno::Errno::ENOENT => {\n                    utils::crashsafe::create_dir_all(&tenants_path).with_context(|| {\n                        format!(\"Failed to create tenants root dir at '{tenants_path}'\")\n                    })?;\n                    open().context(\"open tenants dir after creating it\")?\n                }\n                e => anyhow::bail!(e),\n            },\n        };\n\n        if conf.no_sync {\n            info!(\"Skipping syncfs on startup\");\n        } else {\n            let started = Instant::now();\n            syncfs(dirfd)?;\n            let elapsed = started.elapsed();\n            info!(\n                elapsed_ms = elapsed.as_millis(),\n                \"made tenant directory contents durable\"\n            );\n        }\n    }\n\n    // Basic initialization of things that don't change after startup\n    tracing::info!(\"Initializing virtual_file...\");\n    virtual_file::init(\n        conf.max_file_descriptors,\n        conf.virtual_file_io_engine,\n        conf.virtual_file_io_mode,\n        if conf.no_sync {\n            virtual_file::SyncMode::UnsafeNoSync\n        } else {\n            virtual_file::SyncMode::Sync\n        },\n    );\n    tracing::info!(\"Initializing page_cache...\");\n    page_cache::init(conf.page_cache_size);\n\n    start_pageserver(launch_ts, conf, ignored, otel_guard).context(\"Failed to start pageserver\")?;\n\n    scenario.teardown();\n    Ok(())\n}\n\nfn initialize_config(\n    identity_file_path: &Utf8Path,\n    cfg_file_path: &Utf8Path,\n    workdir: &Utf8Path,\n) -> anyhow::Result<(&'static PageServerConf, ignored_fields::Paths)> {\n    // The deployment orchestrator writes out an indentity file containing the node id\n    // for all pageservers. This file is the source of truth for the node id. In order\n    // to allow for rolling back pageserver releases, the node id is also included in\n    // the pageserver config that the deployment orchestrator writes to disk for the pageserver.\n    // A rolled back version of the pageserver will get the node id from the pageserver.toml\n    // config file.\n    let identity = match std::fs::File::open(identity_file_path) {\n        Ok(mut f) => {\n            let md = f.metadata().context(\"stat config file\")?;\n            if !md.is_file() {\n                anyhow::bail!(\n                    \"Pageserver found identity file but it is a dir entry: {identity_file_path}. Aborting start up ...\"\n                );\n            }\n\n            let mut s = String::new();\n            f.read_to_string(&mut s).context(\"read identity file\")?;\n            toml_edit::de::from_str::<PageserverIdentity>(&s)?\n        }\n        Err(e) => {\n            anyhow::bail!(\n                \"Pageserver could not read identity file: {identity_file_path}: {e}. Aborting start up ...\"\n            );\n        }\n    };\n\n    let config_file_contents =\n        std::fs::read_to_string(cfg_file_path).context(\"read config file from filesystem\")?;\n\n    // Deserialize the config file contents into a ConfigToml.\n    let config_toml: pageserver_api::config::ConfigToml = {\n        let deserializer = toml_edit::de::Deserializer::from_str(&config_file_contents)\n            .context(\"build toml deserializer\")?;\n        let mut path_to_error_track = serde_path_to_error::Track::new();\n        let deserializer =\n            serde_path_to_error::Deserializer::new(deserializer, &mut path_to_error_track);\n        serde::Deserialize::deserialize(deserializer).context(\"deserialize config toml\")?\n    };\n\n    // Find unknown fields by re-serializing the parsed ConfigToml and comparing it to the on-disk file.\n    // Any fields that are only in the on-disk version are unknown.\n    // (The assumption here is that the ConfigToml doesn't to skip_serializing_if.)\n    // (Make sure to read the ConfigToml doc comment on why we only want to warn about, but not fail startup, on unknown fields).\n    let ignored = {\n        let ondisk_toml = config_file_contents\n            .parse::<toml_edit::DocumentMut>()\n            .context(\"parse original config as toml document\")?;\n        let parsed_toml = toml_edit::ser::to_document(&config_toml)\n            .context(\"re-serialize config to toml document\")?;\n        pageserver::config::ignored_fields::find(ondisk_toml, parsed_toml)\n    };\n\n    // Construct the runtime god object (it's called PageServerConf but actually is just global shared state).\n    let conf = PageServerConf::parse_and_validate(identity.id, config_toml, workdir)\n        .context(\"runtime-validation of config toml\")?;\n    let conf = Box::leak(Box::new(conf));\n\n    Ok((conf, ignored))\n}\n\nstruct WaitForPhaseResult<F: std::future::Future + Unpin> {\n    timeout_remaining: Duration,\n    skipped: Option<F>,\n}\n\n/// During startup, we apply a timeout to our waits for readiness, to avoid\n/// stalling the whole service if one Tenant experiences some problem.  Each\n/// phase may consume some of the timeout: this function returns the updated\n/// timeout for use in the next call.\nasync fn wait_for_phase<F>(phase: &str, mut fut: F, timeout: Duration) -> WaitForPhaseResult<F>\nwhere\n    F: std::future::Future + Unpin,\n{\n    let initial_t = Instant::now();\n    let skipped = match tokio::time::timeout(timeout, &mut fut).await {\n        Ok(_) => None,\n        Err(_) => {\n            tracing::info!(\n                timeout_millis = timeout.as_millis(),\n                %phase,\n                \"Startup phase timed out, proceeding anyway\"\n            );\n            Some(fut)\n        }\n    };\n\n    WaitForPhaseResult {\n        timeout_remaining: timeout\n            .checked_sub(Instant::now().duration_since(initial_t))\n            .unwrap_or(Duration::ZERO),\n        skipped,\n    }\n}\n\nfn startup_checkpoint(started_at: Instant, phase: &str, human_phase: &str) {\n    let elapsed = started_at.elapsed();\n    let secs = elapsed.as_secs_f64();\n    STARTUP_DURATION.with_label_values(&[phase]).set(secs);\n\n    info!(\n        elapsed_ms = elapsed.as_millis(),\n        \"{human_phase} ({secs:.3}s since start)\"\n    )\n}\n\nfn start_pageserver(\n    launch_ts: &'static LaunchTimestamp,\n    conf: &'static PageServerConf,\n    ignored: ignored_fields::Paths,\n    otel_guard: Option<OtelGuard>,\n) -> anyhow::Result<()> {\n    // Monotonic time for later calculating startup duration\n    let started_startup_at = Instant::now();\n\n    // Print version and launch timestamp to the log,\n    // and expose them as prometheus metrics.\n    // A changed version string indicates changed software.\n    // A changed launch timestamp indicates a pageserver restart.\n    info!(\n        \"version: {} launch_timestamp: {} build_tag: {}\",\n        version(),\n        launch_ts.to_string(),\n        BUILD_TAG,\n    );\n    info!(\n        \"IO buffer alignment: {} bytes\",\n        pageserver_api::config::defaults::DEFAULT_IO_BUFFER_ALIGNMENT\n    );\n    set_build_info_metric(GIT_VERSION, BUILD_TAG);\n    set_launch_timestamp_metric(launch_ts);\n    #[cfg(target_os = \"linux\")]\n    metrics::register_internal(Box::new(metrics::more_process_metrics::Collector::new())).unwrap();\n    metrics::register_internal(Box::new(\n        pageserver::metrics::tokio_epoll_uring::Collector::new(),\n    ))\n    .unwrap();\n    pageserver::preinitialize_metrics(conf, ignored);\n\n    // If any failpoints were set from FAILPOINTS environment variable,\n    // print them to the log for debugging purposes\n    let failpoints = fail::list();\n    if !failpoints.is_empty() {\n        info!(\n            \"started with failpoints: {}\",\n            failpoints\n                .iter()\n                .map(|(name, actions)| format!(\"{name}={actions}\"))\n                .collect::<Vec<String>>()\n                .join(\";\")\n        )\n    }\n\n    // Create and lock PID file. This ensures that there cannot be more than one\n    // pageserver process running at the same time.\n    let lock_file_path = conf.workdir.join(PID_FILE_NAME);\n    info!(\"Claiming pid file at {lock_file_path:?}...\");\n    let lock_file =\n        utils::pid_file::claim_for_current_process(&lock_file_path).context(\"claim pid file\")?;\n    info!(\"Claimed pid file at {lock_file_path:?}\");\n\n    // Ensure that the lock file is held even if the main thread of the process panics.\n    // We need to release the lock file only when the process exits.\n    std::mem::forget(lock_file);\n\n    // Bind the HTTP, libpq, and gRPC ports early, to error out if they are\n    // already in use.\n    info!(\n        \"Starting pageserver http handler on {} with auth {:#?}\",\n        conf.listen_http_addr, conf.http_auth_type\n    );\n    let http_listener = tcp_listener::bind(&conf.listen_http_addr)?;\n\n    let https_listener = match conf.listen_https_addr.as_ref() {\n        Some(https_addr) => {\n            info!(\n                \"Starting pageserver https handler on {https_addr} with auth {:#?}\",\n                conf.http_auth_type\n            );\n            Some(tcp_listener::bind(https_addr)?)\n        }\n        None => None,\n    };\n\n    info!(\n        \"Starting pageserver pg protocol handler on {} with auth {:#?}\",\n        conf.listen_pg_addr, conf.pg_auth_type,\n    );\n    let pageserver_listener = tcp_listener::bind(&conf.listen_pg_addr)?;\n\n    // Enable SO_KEEPALIVE on the socket, to detect dead connections faster.\n    // These are configured via net.ipv4.tcp_keepalive_* sysctls.\n    //\n    // TODO: also set this on the walreceiver socket, but tokio-postgres doesn't\n    // support enabling keepalives while using the default OS sysctls.\n    setsockopt(&pageserver_listener, sockopt::KeepAlive, &true)?;\n\n    let mut grpc_listener = None;\n    if let Some(grpc_addr) = &conf.listen_grpc_addr {\n        info!(\n            \"Starting pageserver gRPC handler on {grpc_addr} with auth {:#?}\",\n            conf.grpc_auth_type\n        );\n        grpc_listener = Some(tcp_listener::bind(grpc_addr).map_err(|e| anyhow!(\"{e}\"))?);\n    }\n\n    // Launch broker client\n    // The storage_broker::connect call needs to happen inside a tokio runtime thread.\n    let broker_client = WALRECEIVER_RUNTIME\n        .block_on(async {\n            let tls_config = storage_broker::ClientTlsConfig::new().ca_certificates(\n                conf.ssl_ca_certs\n                    .iter()\n                    .map(pem::encode)\n                    .map(storage_broker::Certificate::from_pem),\n            );\n            // Note: we do not attempt connecting here (but validate endpoints sanity).\n            storage_broker::connect(\n                conf.broker_endpoint.clone(),\n                conf.broker_keepalive_interval,\n                tls_config,\n            )\n        })\n        .with_context(|| {\n            format!(\n                \"create broker client for uri={:?} keepalive_interval={:?}\",\n                &conf.broker_endpoint, conf.broker_keepalive_interval,\n            )\n        })?;\n\n    // Initialize authentication for incoming connections\n    let http_auth;\n    let pg_auth;\n    let grpc_auth;\n    if [conf.http_auth_type, conf.pg_auth_type, conf.grpc_auth_type].contains(&AuthType::NeonJWT) {\n        // unwrap is ok because check is performed when creating config, so path is set and exists\n        let key_path = conf.auth_validation_public_key_path.as_ref().unwrap();\n        info!(\"Loading public key(s) for verifying JWT tokens from {key_path:?}\");\n\n        let jwt_auth = JwtAuth::from_key_path(key_path)?;\n        let auth: Arc<SwappableJwtAuth> = Arc::new(SwappableJwtAuth::new(jwt_auth));\n\n        http_auth = match conf.http_auth_type {\n            AuthType::Trust => None,\n            AuthType::NeonJWT => Some(auth.clone()),\n        };\n        pg_auth = match conf.pg_auth_type {\n            AuthType::Trust => None,\n            AuthType::NeonJWT => Some(auth.clone()),\n        };\n        grpc_auth = match conf.grpc_auth_type {\n            AuthType::Trust => None,\n            AuthType::NeonJWT => Some(auth),\n        };\n    } else {\n        http_auth = None;\n        pg_auth = None;\n        grpc_auth = None;\n    }\n\n    let tls_server_config = if conf.listen_https_addr.is_some() || conf.enable_tls_page_service_api\n    {\n        let resolver = BACKGROUND_RUNTIME.block_on(ReloadingCertificateResolver::new(\n            \"main\",\n            &conf.ssl_key_file,\n            &conf.ssl_cert_file,\n            conf.ssl_cert_reload_period,\n        ))?;\n\n        let server_config = rustls::ServerConfig::builder()\n            .with_no_client_auth()\n            .with_cert_resolver(resolver);\n\n        Some(Arc::new(server_config))\n    } else {\n        None\n    };\n\n    match var(\"NEON_AUTH_TOKEN\") {\n        Ok(v) => {\n            info!(\"Loaded JWT token for authentication with Safekeeper\");\n            pageserver::config::SAFEKEEPER_AUTH_TOKEN\n                .set(Arc::new(v))\n                .map_err(|_| anyhow!(\"Could not initialize SAFEKEEPER_AUTH_TOKEN\"))?;\n        }\n        Err(VarError::NotPresent) => {\n            info!(\"No JWT token for authentication with Safekeeper detected\");\n        }\n        Err(e) => return Err(e).with_context(\n            || \"Failed to either load to detect non-present NEON_AUTH_TOKEN environment variable\",\n        ),\n    };\n\n    // Top-level cancellation token for the process\n    let shutdown_pageserver = tokio_util::sync::CancellationToken::new();\n\n    // Set up remote storage client\n    let remote_storage = BACKGROUND_RUNTIME.block_on(create_remote_storage_client(conf))?;\n\n    let feature_resolver = create_feature_resolver(\n        conf,\n        shutdown_pageserver.clone(),\n        BACKGROUND_RUNTIME.handle(),\n    )?;\n\n    // Set up deletion queue\n    let (deletion_queue, deletion_workers) = DeletionQueue::new(\n        remote_storage.clone(),\n        StorageControllerUpcallClient::new(conf, &shutdown_pageserver),\n        conf,\n    );\n    deletion_workers.spawn_with(BACKGROUND_RUNTIME.handle());\n\n    // Up to this point no significant I/O has been done: this should have been fast.  Record\n    // duration prior to starting I/O intensive phase of startup.\n    startup_checkpoint(started_startup_at, \"initial\", \"Starting loading tenants\");\n    STARTUP_IS_LOADING.set(1);\n\n    // Startup staging or optimizing:\n    //\n    // We want to minimize downtime for `page_service` connections, and trying not to overload\n    // BACKGROUND_RUNTIME by doing initial compactions and initial logical sizes at the same time.\n    //\n    // init_done_rx will notify when all initial load operations have completed.\n    //\n    // background_jobs_can_start (same name used to hold off background jobs from starting at\n    // consumer side) will be dropped once we can start the background jobs. Currently it is behind\n    // completing all initial logical size calculations (init_logical_size_done_rx) and a timeout\n    // (background_task_maximum_delay).\n    let (init_remote_done_tx, init_remote_done_rx) = utils::completion::channel();\n    let (init_done_tx, init_done_rx) = utils::completion::channel();\n\n    let (background_jobs_can_start, background_jobs_barrier) = utils::completion::channel();\n\n    let order = pageserver::InitializationOrder {\n        initial_tenant_load_remote: Some(init_done_tx),\n        initial_tenant_load: Some(init_remote_done_tx),\n        background_jobs_can_start: background_jobs_barrier.clone(),\n    };\n\n    info!(config=?conf.l0_flush, \"using l0_flush config\");\n    let l0_flush_global_state =\n        pageserver::l0_flush::L0FlushGlobalState::new(conf.l0_flush.clone());\n\n    // Scan the local 'tenants/' directory and start loading the tenants\n    let (basebackup_cache, basebackup_prepare_receiver) = BasebackupCache::new(\n        conf.basebackup_cache_dir(),\n        conf.basebackup_cache_config.clone(),\n    );\n    let deletion_queue_client = deletion_queue.new_client();\n    let background_purges = mgr::BackgroundPurges::default();\n\n    let tenant_manager = mgr::init(\n        conf,\n        background_purges.clone(),\n        TenantSharedResources {\n            broker_client: broker_client.clone(),\n            remote_storage: remote_storage.clone(),\n            deletion_queue_client,\n            l0_flush_global_state,\n            basebackup_cache: Arc::clone(&basebackup_cache),\n            feature_resolver: feature_resolver.clone(),\n        },\n        shutdown_pageserver.clone(),\n    );\n    let tenant_manager = Arc::new(tenant_manager);\n    BACKGROUND_RUNTIME.block_on(mgr::init_tenant_mgr(tenant_manager.clone(), order))?;\n\n    basebackup_cache.spawn_background_task(\n        BACKGROUND_RUNTIME.handle(),\n        basebackup_prepare_receiver,\n        Arc::clone(&tenant_manager),\n        shutdown_pageserver.child_token(),\n    );\n\n    BACKGROUND_RUNTIME.spawn({\n        let shutdown_pageserver = shutdown_pageserver.clone();\n        let drive_init = async move {\n            // NOTE: unlike many futures in pageserver, this one is cancellation-safe\n            let guard = scopeguard::guard_on_success((), |_| {\n                tracing::info!(\"Cancelled before initial load completed\")\n            });\n\n            let timeout = conf.background_task_maximum_delay;\n\n            let init_remote_done = std::pin::pin!(async {\n                init_remote_done_rx.wait().await;\n                startup_checkpoint(\n                    started_startup_at,\n                    \"initial_tenant_load_remote\",\n                    \"Remote part of initial load completed\",\n                );\n            });\n\n            let WaitForPhaseResult {\n                timeout_remaining: timeout,\n                skipped: init_remote_skipped,\n            } = wait_for_phase(\"initial_tenant_load_remote\", init_remote_done, timeout).await;\n\n            let init_load_done = std::pin::pin!(async {\n                init_done_rx.wait().await;\n                startup_checkpoint(\n                    started_startup_at,\n                    \"initial_tenant_load\",\n                    \"Initial load completed\",\n                );\n                STARTUP_IS_LOADING.set(0);\n            });\n\n            let WaitForPhaseResult {\n                timeout_remaining: _timeout,\n                skipped: init_load_skipped,\n            } = wait_for_phase(\"initial_tenant_load\", init_load_done, timeout).await;\n\n            // initial logical sizes can now start, as they were waiting on init_done_rx.\n\n            scopeguard::ScopeGuard::into_inner(guard);\n\n            // allow background jobs to start: we either completed prior stages, or they reached timeout\n            // and were skipped.  It is important that we do not let them block background jobs indefinitely,\n            // because things like consumption metrics for billing are blocked by this barrier.\n            drop(background_jobs_can_start);\n            startup_checkpoint(\n                started_startup_at,\n                \"background_jobs_can_start\",\n                \"Starting background jobs\",\n            );\n\n            // We are done. If we skipped any phases due to timeout, run them to completion here so that\n            // they will eventually update their startup_checkpoint, and so that we do not declare the\n            // 'complete' stage until all the other stages are really done.\n            let guard = scopeguard::guard_on_success((), |_| {\n                tracing::info!(\"Cancelled before waiting for skipped phases done\")\n            });\n            if let Some(f) = init_remote_skipped {\n                f.await;\n            }\n            if let Some(f) = init_load_skipped {\n                f.await;\n            }\n            scopeguard::ScopeGuard::into_inner(guard);\n\n            startup_checkpoint(started_startup_at, \"complete\", \"Startup complete\");\n        };\n\n        async move {\n            let mut drive_init = std::pin::pin!(drive_init);\n            // just race these tasks\n            tokio::select! {\n                _ = shutdown_pageserver.cancelled() => {},\n                _ = &mut drive_init => {},\n            }\n        }\n    });\n\n    let (secondary_controller, secondary_controller_tasks) = secondary::spawn_tasks(\n        tenant_manager.clone(),\n        remote_storage.clone(),\n        background_jobs_barrier.clone(),\n        shutdown_pageserver.clone(),\n    );\n\n    // shared state between the disk-usage backed eviction background task and the http endpoint\n    // that allows triggering disk-usage based eviction manually. note that the http endpoint\n    // is still accessible even if background task is not configured as long as remote storage has\n    // been configured.\n    let disk_usage_eviction_state: Arc<disk_usage_eviction_task::State> = Arc::default();\n\n    let disk_usage_eviction_task = launch_disk_usage_global_eviction_task(\n        conf,\n        remote_storage.clone(),\n        disk_usage_eviction_state.clone(),\n        tenant_manager.clone(),\n        background_jobs_barrier.clone(),\n    );\n\n    // Start up the service to handle HTTP mgmt API request. We created the\n    // listener earlier already.\n    let (http_endpoint_listener, https_endpoint_listener) = {\n        let _rt_guard = MGMT_REQUEST_RUNTIME.enter(); // for hyper\n\n        let router_state = Arc::new(\n            http::routes::State::new(\n                conf,\n                tenant_manager.clone(),\n                http_auth.clone(),\n                remote_storage.clone(),\n                broker_client.clone(),\n                disk_usage_eviction_state,\n                deletion_queue.new_client(),\n                secondary_controller,\n                feature_resolver.clone(),\n            )\n            .context(\"Failed to initialize router state\")?,\n        );\n\n        let router = http::make_router(router_state, launch_ts, http_auth.clone())?\n            .build()\n            .map_err(|err| anyhow!(err))?;\n\n        let service =\n            Arc::new(http_utils::RequestServiceBuilder::new(router).map_err(|err| anyhow!(err))?);\n\n        let http_task = {\n            let server =\n                http_utils::server::Server::new(Arc::clone(&service), http_listener, None)?;\n            let cancel = CancellationToken::new();\n\n            let task = MGMT_REQUEST_RUNTIME.spawn(task_mgr::exit_on_panic_or_error(\n                \"http endpoint listener\",\n                server.serve(cancel.clone()),\n            ));\n            HttpEndpointListener(CancellableTask { task, cancel })\n        };\n\n        let https_task = match https_listener {\n            Some(https_listener) => {\n                let tls_server_config = tls_server_config\n                    .clone()\n                    .expect(\"tls_server_config is set earlier if https is enabled\");\n\n                let tls_acceptor = tokio_rustls::TlsAcceptor::from(tls_server_config);\n\n                let server =\n                    http_utils::server::Server::new(service, https_listener, Some(tls_acceptor))?;\n                let cancel = CancellationToken::new();\n\n                let task = MGMT_REQUEST_RUNTIME.spawn(task_mgr::exit_on_panic_or_error(\n                    \"https endpoint listener\",\n                    server.serve(cancel.clone()),\n                ));\n                Some(HttpsEndpointListener(CancellableTask { task, cancel }))\n            }\n            None => None,\n        };\n\n        (http_task, https_task)\n    };\n\n    /* BEGIN_HADRON */\n    let metrics_collection_task = {\n        let cancel = shutdown_pageserver.child_token();\n        let task = crate::BACKGROUND_RUNTIME.spawn({\n            let cancel = cancel.clone();\n            let background_jobs_barrier = background_jobs_barrier.clone();\n            async move {\n                if conf.force_metric_collection_on_scrape {\n                    return;\n                }\n\n                // first wait until background jobs are cleared to launch.\n                tokio::select! {\n                    _ = cancel.cancelled() => { return; },\n                    _ = background_jobs_barrier.wait() => {}\n                };\n                let mut interval = tokio::time::interval(METRICS_COLLECTION_INTERVAL);\n                loop {\n                    tokio::select! {\n                        _ = cancel.cancelled() => {\n                            tracing::info!(\"cancelled metrics collection task, exiting...\");\n                             break;\n                        },\n                        _ = interval.tick() => {}\n                    }\n                    tokio::task::spawn_blocking(|| {\n                        METRICS_COLLECTOR.run_once(true);\n                    });\n                }\n            }\n        });\n        MetricsCollectionTask(CancellableTask { task, cancel })\n    };\n    /* END_HADRON */\n\n    let consumption_metrics_tasks = {\n        let cancel = shutdown_pageserver.child_token();\n        let task = crate::BACKGROUND_RUNTIME.spawn({\n            let tenant_manager = tenant_manager.clone();\n            let cancel = cancel.clone();\n            async move {\n                // first wait until background jobs are cleared to launch.\n                //\n                // this is because we only process active tenants and timelines, and the\n                // Timeline::get_current_logical_size will spawn the logical size calculation,\n                // which will not be rate-limited.\n                tokio::select! {\n                    _ = cancel.cancelled() => { return; },\n                    _ = background_jobs_barrier.wait() => {}\n                };\n\n                pageserver::consumption_metrics::run(conf, tenant_manager, cancel).await;\n            }\n        });\n        ConsumptionMetricsTasks(CancellableTask { task, cancel })\n    };\n\n    // Spawn a task to listen for libpq connections. It will spawn further tasks\n    // for each connection. We created the listener earlier already.\n    let perf_trace_dispatch = otel_guard.as_ref().map(|g| g.dispatch.clone());\n    let page_service = page_service::spawn(\n        conf,\n        tenant_manager.clone(),\n        pg_auth,\n        perf_trace_dispatch,\n        {\n            let _entered = COMPUTE_REQUEST_RUNTIME.enter(); // TcpListener::from_std requires it\n            pageserver_listener\n                .set_nonblocking(true)\n                .context(\"set listener to nonblocking\")?;\n            tokio::net::TcpListener::from_std(pageserver_listener)\n                .context(\"create tokio listener\")?\n        },\n        if conf.enable_tls_page_service_api {\n            tls_server_config\n        } else {\n            None\n        },\n        feature_resolver.clone(),\n    );\n\n    // Spawn a Pageserver gRPC server task. It will spawn separate tasks for each request/stream.\n    // It uses a separate compute request Tokio runtime (COMPUTE_REQUEST_RUNTIME).\n    //\n    // NB: this port is exposed to computes. It should only provide services that we're okay with\n    // computes accessing. Internal services should use a separate port.\n    let mut page_service_grpc = None;\n    if let Some(grpc_listener) = grpc_listener {\n        page_service_grpc = Some(GrpcPageServiceHandler::spawn(\n            tenant_manager.clone(),\n            grpc_auth,\n            otel_guard.as_ref().map(|g| g.dispatch.clone()),\n            conf.get_vectored_concurrent_io,\n            grpc_listener,\n        )?);\n    }\n\n    // All started up! Now just sit and wait for shutdown signal.\n    BACKGROUND_RUNTIME.block_on(async move {\n        let signal_token = CancellationToken::new();\n        let signal_cancel = signal_token.child_token();\n\n        tokio::spawn(utils::signals::signal_handler(signal_token));\n\n        // Wait for cancellation signal and shut down the pageserver.\n        //\n        // This cancels the `shutdown_pageserver` cancellation tree. Right now that tree doesn't\n        // reach very far, and `task_mgr` is used instead. The plan is to change that over time.\n        signal_cancel.cancelled().await;\n\n        shutdown_pageserver.cancel();\n        pageserver::shutdown_pageserver(\n            http_endpoint_listener,\n            https_endpoint_listener,\n            page_service,\n            page_service_grpc,\n            metrics_collection_task,\n            consumption_metrics_tasks,\n            disk_usage_eviction_task,\n            &tenant_manager,\n            background_purges,\n            deletion_queue.clone(),\n            secondary_controller_tasks,\n            0,\n        )\n        .await;\n        unreachable!();\n    })\n}\n\nfn create_feature_resolver(\n    conf: &'static PageServerConf,\n    shutdown_pageserver: CancellationToken,\n    handle: &tokio::runtime::Handle,\n) -> anyhow::Result<FeatureResolver> {\n    FeatureResolver::spawn(conf, shutdown_pageserver, handle)\n}\n\nasync fn create_remote_storage_client(\n    conf: &'static PageServerConf,\n) -> anyhow::Result<GenericRemoteStorage> {\n    let config = if let Some(config) = &conf.remote_storage_config {\n        config\n    } else {\n        anyhow::bail!(\"no remote storage configured, this is a deprecated configuration\");\n    };\n\n    // Create the client\n    let mut remote_storage = GenericRemoteStorage::from_config(config).await?;\n\n    // If `test_remote_failures` is non-zero, wrap the client with a\n    // wrapper that simulates failures.\n    if conf.test_remote_failures > 0 {\n        info!(\n            \"Simulating remote failures for first {} attempts of each op\",\n            conf.test_remote_failures\n        );\n        remote_storage = GenericRemoteStorage::unreliable_wrapper(\n            remote_storage,\n            conf.test_remote_failures,\n            conf.test_remote_failures_probability,\n        );\n    }\n\n    Ok(remote_storage)\n}\n\nfn cli() -> Command {\n    Command::new(\"Neon page server\")\n        .about(\"Materializes WAL stream to pages and serves them to the postgres\")\n        .version(version())\n        .arg(\n            Arg::new(\"workdir\")\n                .short('D')\n                .long(\"workdir\")\n                .help(\"Working directory for the pageserver\"),\n        )\n        .arg(\n            Arg::new(\"enabled-features\")\n                .long(\"enabled-features\")\n                .action(ArgAction::SetTrue)\n                .help(\"Show enabled compile time features\"),\n        )\n}\n\n#[test]\nfn verify_cli() {\n    cli().debug_assert();\n}\n"
  },
  {
    "path": "pageserver/src/bin/test_helper_slow_client_reads.rs",
    "content": "use std::io::{Read, Write, stdin, stdout};\nuse std::time::Duration;\n\nuse clap::Parser;\nuse pageserver_api::pagestream_api::{\n    PagestreamFeMessage, PagestreamRequest, PagestreamTestRequest,\n};\nuse utils::id::{TenantId, TimelineId};\nuse utils::lsn::Lsn;\n\n#[derive(clap::Parser)]\nstruct Args {\n    connstr: String,\n    tenant_id: TenantId,\n    timeline_id: TimelineId,\n}\n\n#[tokio::main]\nasync fn main() -> anyhow::Result<()> {\n    let Args {\n        connstr,\n        tenant_id,\n        timeline_id,\n    } = Args::parse();\n    let client = pageserver_client::page_service::Client::new(connstr).await?;\n    let client = client.pagestream(tenant_id, timeline_id).await?;\n    let (mut sender, _receiver) = client.split();\n\n    eprintln!(\"filling the pipe\");\n    let mut msg = 0;\n    loop {\n        msg += 1;\n        let fut = sender.send(PagestreamFeMessage::Test(PagestreamTestRequest {\n            hdr: PagestreamRequest {\n                reqid: 0,\n                request_lsn: Lsn(23),\n                not_modified_since: Lsn(23),\n            },\n            batch_key: 42,\n            message: format!(\"message {msg}\"),\n        }));\n        let Ok(res) = tokio::time::timeout(Duration::from_secs(10), fut).await else {\n            eprintln!(\"pipe seems full\");\n            break;\n        };\n        let _: () = res?;\n    }\n\n    let n = stdout().write(b\"R\")?;\n    assert_eq!(n, 1);\n    stdout().flush()?;\n\n    eprintln!(\"waiting for signal to tell us to exit\");\n\n    let mut buf = [0u8; 1];\n    stdin().read_exact(&mut buf)?;\n\n    eprintln!(\"termination signal received, exiting\");\n\n    anyhow::Ok(())\n}\n"
  },
  {
    "path": "pageserver/src/config/ignored_fields.rs",
    "content": "//! Check for fields in the on-disk config file that were ignored when\n//! deserializing [`pageserver_api::config::ConfigToml`].\n//!\n//! This could have been part of the [`pageserver_api::config`] module,\n//! but the way we identify unused fields in this module\n//! is specific to the format (TOML) and the implementation of the\n//! deserialization for that format ([`toml_edit`]).\n\nuse std::collections::HashSet;\n\nuse itertools::Itertools;\n\n/// Pass in the user-specified config and the re-serialized [`pageserver_api::config::ConfigToml`].\n/// The returned [`Paths`] contains the paths to the fields that were ignored by deserialization\n/// of the [`pageserver_api::config::ConfigToml`].\npub fn find(user_specified: toml_edit::DocumentMut, reserialized: toml_edit::DocumentMut) -> Paths {\n    let user_specified = paths(user_specified);\n    let reserialized = paths(reserialized);\n    fn paths(doc: toml_edit::DocumentMut) -> HashSet<String> {\n        let mut out = Vec::new();\n        let mut visitor = PathsVisitor::new(&mut out);\n        visitor.visit_table_like(doc.as_table());\n        HashSet::from_iter(out)\n    }\n\n    let mut ignored = HashSet::new();\n\n    // O(n) because of HashSet\n    for path in user_specified {\n        if !reserialized.contains(&path) {\n            ignored.insert(path);\n        }\n    }\n\n    Paths {\n        paths: ignored\n            .into_iter()\n            // sort lexicographically for deterministic output\n            .sorted()\n            .collect(),\n    }\n}\n\npub struct Paths {\n    pub paths: Vec<String>,\n}\n\nstruct PathsVisitor<'a> {\n    stack: Vec<String>,\n    out: &'a mut Vec<String>,\n}\n\nimpl<'a> PathsVisitor<'a> {\n    fn new(out: &'a mut Vec<String>) -> Self {\n        Self {\n            stack: Vec::new(),\n            out,\n        }\n    }\n\n    fn visit_table_like(&mut self, table_like: &dyn toml_edit::TableLike) {\n        for (entry, item) in table_like.iter() {\n            self.stack.push(entry.to_string());\n            self.visit_item(item);\n            self.stack.pop();\n        }\n    }\n\n    fn visit_item(&mut self, item: &toml_edit::Item) {\n        match item {\n            toml_edit::Item::None => (),\n            toml_edit::Item::Value(value) => self.visit_value(value),\n            toml_edit::Item::Table(table) => {\n                self.visit_table_like(table);\n            }\n            toml_edit::Item::ArrayOfTables(array_of_tables) => {\n                for (i, table) in array_of_tables.iter().enumerate() {\n                    self.stack.push(format!(\"[{i}]\"));\n                    self.visit_table_like(table);\n                    self.stack.pop();\n                }\n            }\n        }\n    }\n\n    fn visit_value(&mut self, value: &toml_edit::Value) {\n        match value {\n            toml_edit::Value::String(_)\n            | toml_edit::Value::Integer(_)\n            | toml_edit::Value::Float(_)\n            | toml_edit::Value::Boolean(_)\n            | toml_edit::Value::Datetime(_) => self.out.push(self.stack.join(\".\")),\n            toml_edit::Value::Array(array) => {\n                for (i, value) in array.iter().enumerate() {\n                    self.stack.push(format!(\"[{i}]\"));\n                    self.visit_value(value);\n                    self.stack.pop();\n                }\n            }\n            toml_edit::Value::InlineTable(inline_table) => self.visit_table_like(inline_table),\n        }\n    }\n}\n\n#[cfg(test)]\npub(crate) mod tests {\n\n    fn test_impl(original: &str, parsed: &str, expect: [&str; 1]) {\n        let original: toml_edit::DocumentMut = original.parse().expect(\"parse original config\");\n        let parsed: toml_edit::DocumentMut = parsed.parse().expect(\"parse re-serialized config\");\n\n        let super::Paths { paths: actual } = super::find(original, parsed);\n        assert_eq!(actual, &expect);\n    }\n\n    #[test]\n    fn top_level() {\n        test_impl(\n            r#\"\n                [a]\n                b = 1\n                c = 2\n                d = 3\n            \"#,\n            r#\"\n                [a]\n                b = 1\n                c = 2\n            \"#,\n            [\"a.d\"],\n        );\n    }\n\n    #[test]\n    fn nested() {\n        test_impl(\n            r#\"\n                [a.b.c]\n                d = 23\n            \"#,\n            r#\"\n                [a]\n                e = 42\n            \"#,\n            [\"a.b.c.d\"],\n        );\n    }\n\n    #[test]\n    fn array_of_tables() {\n        test_impl(\n            r#\"\n                [[a]]\n                b = 1\n                c = 2\n                d = 3\n            \"#,\n            r#\"\n                [[a]]\n                b = 1\n                c = 2\n            \"#,\n            [\"a.[0].d\"],\n        );\n    }\n\n    #[test]\n    fn array() {\n        test_impl(\n            r#\"\n            foo = [ {bar = 23} ]\n            \"#,\n            r#\"\n            foo = [ { blup = 42 }]\n            \"#,\n            [\"foo.[0].bar\"],\n        );\n    }\n}\n"
  },
  {
    "path": "pageserver/src/config.rs",
    "content": "//! Functions for handling page server configuration options\n//!\n//! Configuration options can be set in the pageserver.toml configuration\n//! file, or on the command line.\n//! See also `settings.md` for better description on every parameter.\n\npub mod ignored_fields;\n\nuse std::env;\nuse std::num::NonZeroUsize;\nuse std::sync::Arc;\nuse std::time::Duration;\n\nuse anyhow::{Context, ensure};\nuse camino::{Utf8Path, Utf8PathBuf};\nuse once_cell::sync::OnceCell;\nuse pageserver_api::config::{\n    DiskUsageEvictionTaskConfig, MaxGetVectoredKeys, MaxVectoredReadBytes,\n    PageServicePipeliningConfig, PageServicePipeliningConfigPipelined, PostHogConfig,\n};\nuse pageserver_api::models::ImageCompressionAlgorithm;\nuse pageserver_api::shard::TenantShardId;\nuse pem::Pem;\nuse postgres_backend::AuthType;\nuse postgres_ffi::PgMajorVersion;\nuse remote_storage::{RemotePath, RemoteStorageConfig};\nuse reqwest::Url;\nuse storage_broker::Uri;\nuse utils::id::{NodeId, TimelineId};\nuse utils::logging::{LogFormat, SecretString};\n\nuse crate::tenant::storage_layer::inmemory_layer::IndexEntry;\nuse crate::tenant::{TENANTS_SEGMENT_NAME, TIMELINES_SEGMENT_NAME};\nuse crate::virtual_file::io_engine;\nuse crate::{TENANT_HEATMAP_BASENAME, TENANT_LOCATION_CONFIG_NAME, virtual_file};\n\n/// Global state of pageserver.\n///\n/// It's mostly immutable configuration, but some semaphores and the\n/// like crept in over time and the name stuck.\n///\n/// Instantiated by deserializing `pageserver.toml` into  [`pageserver_api::config::ConfigToml`]\n/// and passing that to [`PageServerConf::parse_and_validate`].\n///\n/// # Adding a New Field\n///\n/// 1. Add the field to `pageserver_api::config::ConfigToml`.\n/// 2. Fix compiler errors (exhaustive destructuring will guide you).\n///\n/// For fields that require additional validation or filling in of defaults at runtime,\n/// check for examples in the [`PageServerConf::parse_and_validate`] method.\n#[derive(Debug, Clone)]\npub struct PageServerConf {\n    // Identifier of that particular pageserver so e g safekeepers\n    // can safely distinguish different pageservers\n    pub id: NodeId,\n\n    /// Example (default): 127.0.0.1:64000\n    pub listen_pg_addr: String,\n    /// Example (default): 127.0.0.1:9898\n    pub listen_http_addr: String,\n    /// Example: 127.0.0.1:9899\n    pub listen_https_addr: Option<String>,\n    /// If set, expose a gRPC API on this address.\n    /// Example: 127.0.0.1:51051\n    ///\n    /// EXPERIMENTAL: this protocol is unstable and under active development.\n    pub listen_grpc_addr: Option<String>,\n\n    /// Path to a file with certificate's private key for https and gRPC API.\n    /// Default: server.key\n    pub ssl_key_file: Utf8PathBuf,\n    /// Path to a file with a X509 certificate for https and gRPC API.\n    /// Default: server.crt\n    pub ssl_cert_file: Utf8PathBuf,\n    /// Period to reload certificate and private key from files.\n    /// Default: 60s.\n    pub ssl_cert_reload_period: Duration,\n    /// Trusted root CA certificates to use in https APIs in PEM format.\n    pub ssl_ca_certs: Vec<Pem>,\n\n    /// Current availability zone. Used for traffic metrics.\n    pub availability_zone: Option<String>,\n\n    // Timeout when waiting for WAL receiver to catch up to an LSN given in a GetPage@LSN call.\n    pub wait_lsn_timeout: Duration,\n    // How long to wait for WAL redo to complete.\n    pub wal_redo_timeout: Duration,\n\n    pub superuser: String,\n    pub locale: String,\n\n    pub page_cache_size: usize,\n    pub max_file_descriptors: usize,\n\n    // Repository directory, relative to current working directory.\n    // Normally, the page server changes the current working directory\n    // to the repository, and 'workdir' is always '.'. But we don't do\n    // that during unit testing, because the current directory is global\n    // to the process but different unit tests work on different\n    // repositories.\n    pub workdir: Utf8PathBuf,\n\n    pub pg_distrib_dir: Utf8PathBuf,\n\n    // Authentication\n    /// authentication method for the HTTP mgmt API\n    pub http_auth_type: AuthType,\n    /// authentication method for libpq connections from compute\n    pub pg_auth_type: AuthType,\n    /// authentication method for gRPC connections from compute\n    pub grpc_auth_type: AuthType,\n    /// Path to a file or directory containing public key(s) for verifying JWT tokens.\n    /// Used for both mgmt and compute auth, if enabled.\n    pub auth_validation_public_key_path: Option<Utf8PathBuf>,\n\n    pub remote_storage_config: Option<RemoteStorageConfig>,\n\n    pub default_tenant_conf: pageserver_api::config::TenantConfigToml,\n\n    /// Storage broker endpoints to connect to.\n    pub broker_endpoint: Uri,\n    pub broker_keepalive_interval: Duration,\n\n    pub log_format: LogFormat,\n\n    /// Number of tenants which will be concurrently loaded from remote storage proactively on startup or attach.\n    ///\n    /// A lower value implicitly deprioritizes loading such tenants, vs. other work in the system.\n    pub concurrent_tenant_warmup: ConfigurableSemaphore,\n\n    /// Number of concurrent [`TenantShard::gather_size_inputs`](crate::tenant::TenantShard::gather_size_inputs) allowed.\n    pub concurrent_tenant_size_logical_size_queries: ConfigurableSemaphore,\n    /// Limit of concurrent [`TenantShard::gather_size_inputs`] issued by module `eviction_task`.\n    /// The number of permits is the same as `concurrent_tenant_size_logical_size_queries`.\n    /// See the comment in `eviction_task` for details.\n    ///\n    /// [`TenantShard::gather_size_inputs`]: crate::tenant::TenantShard::gather_size_inputs\n    pub eviction_task_immitated_concurrent_logical_size_queries: ConfigurableSemaphore,\n\n    // How often to collect metrics and send them to the metrics endpoint.\n    pub metric_collection_interval: Duration,\n    // How often to send unchanged cached metrics to the metrics endpoint.\n    pub metric_collection_endpoint: Option<Url>,\n    pub metric_collection_bucket: Option<RemoteStorageConfig>,\n    pub synthetic_size_calculation_interval: Duration,\n\n    pub disk_usage_based_eviction: DiskUsageEvictionTaskConfig,\n\n    // The number of allowed failures in remote storage operations.\n    pub test_remote_failures: u64,\n    // The probability of failure in remote storage operations. Only works when test_remote_failures > 1.\n    // Use 100 for 100% failure, 0 for no failure.\n    pub test_remote_failures_probability: u64,\n\n    pub ondemand_download_behavior_treat_error_as_warn: bool,\n\n    /// How long will background tasks be delayed at most after initial load of tenants.\n    ///\n    /// Our largest initialization completions are in the range of 100-200s, so perhaps 10s works\n    /// as we now isolate initial loading, initial logical size calculation and background tasks.\n    /// Smaller nodes will have background tasks \"not running\" for this long unless every timeline\n    /// has it's initial logical size calculated. Not running background tasks for some seconds is\n    /// not terrible.\n    pub background_task_maximum_delay: Duration,\n\n    pub control_plane_api: Url,\n\n    /// JWT token for use with the control plane API.\n    pub control_plane_api_token: Option<SecretString>,\n\n    pub import_pgdata_upcall_api: Option<Url>,\n    pub import_pgdata_upcall_api_token: Option<SecretString>,\n    pub import_pgdata_aws_endpoint_url: Option<Url>,\n\n    /// If true, pageserver will make best-effort to operate without a control plane: only\n    /// for use in major incidents.\n    pub control_plane_emergency_mode: bool,\n\n    /// How many heatmap uploads may be done concurrency: lower values implicitly deprioritize\n    /// heatmap uploads vs. other remote storage operations.\n    pub heatmap_upload_concurrency: usize,\n\n    /// How many remote storage downloads may be done for secondary tenants concurrently.  Implicitly\n    /// deprioritises secondary downloads vs. remote storage operations for attached tenants.\n    pub secondary_download_concurrency: usize,\n\n    /// Maximum number of WAL records to be ingested and committed at the same time\n    pub ingest_batch_size: u64,\n\n    pub virtual_file_io_engine: virtual_file::IoEngineKind,\n\n    pub max_vectored_read_bytes: MaxVectoredReadBytes,\n\n    /// Maximum number of keys to be read in a single get_vectored call.\n    pub max_get_vectored_keys: MaxGetVectoredKeys,\n\n    pub image_compression: ImageCompressionAlgorithm,\n\n    /// Whether to offload archived timelines automatically\n    pub timeline_offloading: bool,\n\n    /// How many bytes of ephemeral layer content will we allow per kilobyte of RAM.  When this\n    /// is exceeded, we start proactively closing ephemeral layers to limit the total amount\n    /// of ephemeral data.\n    ///\n    /// Setting this to zero disables limits on total ephemeral layer size.\n    pub ephemeral_bytes_per_memory_kb: usize,\n\n    pub l0_flush: crate::l0_flush::L0FlushConfig,\n\n    /// Direct IO settings\n    pub virtual_file_io_mode: virtual_file::IoMode,\n\n    /// Optionally disable disk syncs (unsafe!)\n    pub no_sync: bool,\n\n    pub page_service_pipelining: pageserver_api::config::PageServicePipeliningConfig,\n\n    pub get_vectored_concurrent_io: pageserver_api::config::GetVectoredConcurrentIo,\n\n    /// Enable read path debugging. If enabled, read key errors will print a backtrace of the layer\n    /// files read.\n    pub enable_read_path_debugging: bool,\n\n    /// Interpreted protocol feature: if enabled, validate that the logical WAL received from\n    /// safekeepers does not have gaps.\n    pub validate_wal_contiguity: bool,\n\n    /// When set, the previously written to disk heatmap is loaded on tenant attach and used\n    /// to avoid clobbering the heatmap from new, cold, attached locations.\n    pub load_previous_heatmap: bool,\n\n    /// When set, include visible layers in the next uploaded heatmaps of an unarchived timeline.\n    pub generate_unarchival_heatmap: bool,\n\n    pub tracing: Option<pageserver_api::config::Tracing>,\n\n    /// Enable TLS in page service API.\n    /// Does not force TLS: the client negotiates TLS usage during the handshake.\n    /// Uses key and certificate from ssl_key_file/ssl_cert_file.\n    pub enable_tls_page_service_api: bool,\n\n    /// Run in development mode, which disables certain safety checks\n    /// such as authentication requirements for HTTP and PostgreSQL APIs.\n    /// This is insecure and should only be used in development environments.\n    pub dev_mode: bool,\n\n    /// PostHog integration config.\n    pub posthog_config: Option<PostHogConfig>,\n\n    pub timeline_import_config: pageserver_api::config::TimelineImportConfig,\n\n    pub basebackup_cache_config: Option<pageserver_api::config::BasebackupCacheConfig>,\n\n    /// Defines what is a big tenant for the purpose of image layer generation.\n    /// See Timeline::should_check_if_image_layers_required\n    pub image_layer_generation_large_timeline_threshold: Option<u64>,\n\n    /// Controls whether to collect all metrics on each scrape or to return potentially stale\n    /// results.\n    pub force_metric_collection_on_scrape: bool,\n}\n\n/// Token for authentication to safekeepers\n///\n/// We do not want to store this in a PageServerConf because the latter may be logged\n/// and/or serialized at a whim, while the token is secret. Currently this token is the\n/// same for accessing all tenants/timelines, but may become per-tenant/per-timeline in\n/// the future, more tokens and auth may arrive for storage broker, completely changing the logic.\n/// Hence, we resort to a global variable for now instead of passing the token from the\n/// startup code to the connection code through a dozen layers.\npub static SAFEKEEPER_AUTH_TOKEN: OnceCell<Arc<String>> = OnceCell::new();\n\nimpl PageServerConf {\n    //\n    // Repository paths, relative to workdir.\n    //\n\n    pub fn tenants_path(&self) -> Utf8PathBuf {\n        self.workdir.join(TENANTS_SEGMENT_NAME)\n    }\n\n    pub fn deletion_prefix(&self) -> Utf8PathBuf {\n        self.workdir.join(\"deletion\")\n    }\n\n    pub fn metadata_path(&self) -> Utf8PathBuf {\n        self.workdir.join(\"metadata.json\")\n    }\n\n    pub fn basebackup_cache_dir(&self) -> Utf8PathBuf {\n        self.workdir.join(\"basebackup_cache\")\n    }\n\n    pub fn deletion_list_path(&self, sequence: u64) -> Utf8PathBuf {\n        // Encode a version in the filename, so that if we ever switch away from JSON we can\n        // increment this.\n        const VERSION: u8 = 1;\n\n        self.deletion_prefix()\n            .join(format!(\"{sequence:016x}-{VERSION:02x}.list\"))\n    }\n\n    pub fn deletion_header_path(&self) -> Utf8PathBuf {\n        // Encode a version in the filename, so that if we ever switch away from JSON we can\n        // increment this.\n        const VERSION: u8 = 1;\n\n        self.deletion_prefix().join(format!(\"header-{VERSION:02x}\"))\n    }\n\n    pub fn tenant_path(&self, tenant_shard_id: &TenantShardId) -> Utf8PathBuf {\n        self.tenants_path().join(tenant_shard_id.to_string())\n    }\n\n    /// Points to a place in pageserver's local directory,\n    /// where certain tenant's LocationConf be stored.\n    pub(crate) fn tenant_location_config_path(\n        &self,\n        tenant_shard_id: &TenantShardId,\n    ) -> Utf8PathBuf {\n        self.tenant_path(tenant_shard_id)\n            .join(TENANT_LOCATION_CONFIG_NAME)\n    }\n\n    pub(crate) fn tenant_heatmap_path(&self, tenant_shard_id: &TenantShardId) -> Utf8PathBuf {\n        self.tenant_path(tenant_shard_id)\n            .join(TENANT_HEATMAP_BASENAME)\n    }\n\n    pub fn timelines_path(&self, tenant_shard_id: &TenantShardId) -> Utf8PathBuf {\n        self.tenant_path(tenant_shard_id)\n            .join(TIMELINES_SEGMENT_NAME)\n    }\n\n    pub fn timeline_path(\n        &self,\n        tenant_shard_id: &TenantShardId,\n        timeline_id: &TimelineId,\n    ) -> Utf8PathBuf {\n        self.timelines_path(tenant_shard_id)\n            .join(timeline_id.to_string())\n    }\n\n    /// Turns storage remote path of a file into its local path.\n    pub fn local_path(&self, remote_path: &RemotePath) -> Utf8PathBuf {\n        remote_path.with_base(&self.workdir)\n    }\n\n    //\n    // Postgres distribution paths\n    //\n    pub fn pg_distrib_dir(&self, pg_version: PgMajorVersion) -> anyhow::Result<Utf8PathBuf> {\n        let path = self.pg_distrib_dir.clone();\n\n        Ok(path.join(pg_version.v_str()))\n    }\n\n    pub fn pg_bin_dir(&self, pg_version: PgMajorVersion) -> anyhow::Result<Utf8PathBuf> {\n        Ok(self.pg_distrib_dir(pg_version)?.join(\"bin\"))\n    }\n    pub fn pg_lib_dir(&self, pg_version: PgMajorVersion) -> anyhow::Result<Utf8PathBuf> {\n        Ok(self.pg_distrib_dir(pg_version)?.join(\"lib\"))\n    }\n\n    /// Parse a configuration file (pageserver.toml) into a PageServerConf struct,\n    /// validating the input and failing on errors.\n    ///\n    /// This leaves any options not present in the file in the built-in defaults.\n    pub fn parse_and_validate(\n        id: NodeId,\n        config_toml: pageserver_api::config::ConfigToml,\n        workdir: &Utf8Path,\n    ) -> anyhow::Result<Self> {\n        let pageserver_api::config::ConfigToml {\n            listen_pg_addr,\n            listen_http_addr,\n            listen_https_addr,\n            listen_grpc_addr,\n            ssl_key_file,\n            ssl_cert_file,\n            ssl_cert_reload_period,\n            ssl_ca_file,\n            availability_zone,\n            wait_lsn_timeout,\n            wal_redo_timeout,\n            superuser,\n            locale,\n            page_cache_size,\n            max_file_descriptors,\n            pg_distrib_dir,\n            http_auth_type,\n            pg_auth_type,\n            grpc_auth_type,\n            auth_validation_public_key_path,\n            remote_storage,\n            broker_endpoint,\n            broker_keepalive_interval,\n            log_format,\n            metric_collection_interval,\n            metric_collection_endpoint,\n            metric_collection_bucket,\n            synthetic_size_calculation_interval,\n            disk_usage_based_eviction,\n            test_remote_failures,\n            test_remote_failures_probability,\n            ondemand_download_behavior_treat_error_as_warn,\n            background_task_maximum_delay,\n            control_plane_api,\n            control_plane_api_token,\n            control_plane_emergency_mode,\n            import_pgdata_upcall_api,\n            import_pgdata_upcall_api_token,\n            import_pgdata_aws_endpoint_url,\n            heatmap_upload_concurrency,\n            secondary_download_concurrency,\n            ingest_batch_size,\n            max_vectored_read_bytes,\n            max_get_vectored_keys,\n            image_compression,\n            timeline_offloading,\n            ephemeral_bytes_per_memory_kb,\n            l0_flush,\n            virtual_file_io_mode,\n            concurrent_tenant_warmup,\n            concurrent_tenant_size_logical_size_queries,\n            virtual_file_io_engine,\n            tenant_config,\n            no_sync,\n            page_service_pipelining,\n            get_vectored_concurrent_io,\n            enable_read_path_debugging,\n            validate_wal_contiguity,\n            load_previous_heatmap,\n            generate_unarchival_heatmap,\n            tracing,\n            enable_tls_page_service_api,\n            dev_mode,\n            posthog_config,\n            timeline_import_config,\n            basebackup_cache_config,\n            image_layer_generation_large_timeline_threshold,\n            force_metric_collection_on_scrape,\n        } = config_toml;\n\n        let mut conf = PageServerConf {\n            // ------------------------------------------------------------\n            // fields that are already fully validated by the ConfigToml Deserialize impl\n            // ------------------------------------------------------------\n            listen_pg_addr,\n            listen_http_addr,\n            listen_https_addr,\n            listen_grpc_addr,\n            ssl_key_file,\n            ssl_cert_file,\n            ssl_cert_reload_period,\n            availability_zone,\n            wait_lsn_timeout,\n            wal_redo_timeout,\n            superuser,\n            locale,\n            page_cache_size,\n            max_file_descriptors,\n            http_auth_type,\n            pg_auth_type,\n            grpc_auth_type,\n            auth_validation_public_key_path,\n            remote_storage_config: remote_storage,\n            broker_endpoint,\n            broker_keepalive_interval,\n            log_format,\n            metric_collection_interval,\n            metric_collection_endpoint,\n            metric_collection_bucket,\n            synthetic_size_calculation_interval,\n            disk_usage_based_eviction,\n            test_remote_failures,\n            test_remote_failures_probability,\n            ondemand_download_behavior_treat_error_as_warn,\n            background_task_maximum_delay,\n            control_plane_api: control_plane_api\n                .ok_or_else(|| anyhow::anyhow!(\"`control_plane_api` must be set\"))?,\n            control_plane_emergency_mode,\n            heatmap_upload_concurrency,\n            secondary_download_concurrency,\n            ingest_batch_size,\n            max_vectored_read_bytes,\n            max_get_vectored_keys,\n            image_compression,\n            timeline_offloading,\n            ephemeral_bytes_per_memory_kb,\n            import_pgdata_upcall_api,\n            import_pgdata_upcall_api_token: import_pgdata_upcall_api_token.map(SecretString::from),\n            import_pgdata_aws_endpoint_url,\n            page_service_pipelining,\n            get_vectored_concurrent_io,\n            tracing,\n            enable_tls_page_service_api,\n            dev_mode,\n            timeline_import_config,\n            basebackup_cache_config,\n            image_layer_generation_large_timeline_threshold,\n            force_metric_collection_on_scrape,\n\n            // ------------------------------------------------------------\n            // fields that require additional validation or custom handling\n            // ------------------------------------------------------------\n            workdir: workdir.to_owned(),\n            pg_distrib_dir: pg_distrib_dir.unwrap_or_else(|| {\n                std::env::current_dir()\n                    .expect(\"current_dir() failed\")\n                    .try_into()\n                    .expect(\"current_dir() is not a valid Utf8Path\")\n            }),\n            control_plane_api_token: control_plane_api_token.map(SecretString::from),\n            id,\n            default_tenant_conf: tenant_config,\n            concurrent_tenant_warmup: ConfigurableSemaphore::new(concurrent_tenant_warmup),\n            concurrent_tenant_size_logical_size_queries: ConfigurableSemaphore::new(\n                concurrent_tenant_size_logical_size_queries,\n            ),\n            eviction_task_immitated_concurrent_logical_size_queries: ConfigurableSemaphore::new(\n                // re-use `concurrent_tenant_size_logical_size_queries`\n                concurrent_tenant_size_logical_size_queries,\n            ),\n            virtual_file_io_engine: match virtual_file_io_engine {\n                Some(v) => v,\n                None => match crate::virtual_file::io_engine_feature_test()\n                    .context(\"auto-detect virtual_file_io_engine\")?\n                {\n                    io_engine::FeatureTestResult::PlatformPreferred(v) => v, // make no noise\n                    io_engine::FeatureTestResult::Worse { engine, remark } => {\n                        // TODO: bubble this up to the caller so we can tracing::warn! it.\n                        eprintln!(\n                            \"auto-detected IO engine is not platform-preferred: engine={engine:?} remark={remark:?}\"\n                        );\n                        engine\n                    }\n                },\n            },\n            l0_flush: l0_flush\n                .map(crate::l0_flush::L0FlushConfig::from)\n                .unwrap_or_default(),\n            virtual_file_io_mode: virtual_file_io_mode.unwrap_or(virtual_file::IoMode::preferred()),\n            no_sync: no_sync.unwrap_or(false),\n            enable_read_path_debugging: enable_read_path_debugging.unwrap_or(false),\n            validate_wal_contiguity: validate_wal_contiguity.unwrap_or(false),\n            load_previous_heatmap: load_previous_heatmap.unwrap_or(true),\n            generate_unarchival_heatmap: generate_unarchival_heatmap.unwrap_or(true),\n            ssl_ca_certs: match ssl_ca_file {\n                Some(ssl_ca_file) => {\n                    let buf = std::fs::read(ssl_ca_file)?;\n                    pem::parse_many(&buf)?\n                        .into_iter()\n                        .filter(|pem| pem.tag() == \"CERTIFICATE\")\n                        .collect()\n                }\n                None => Vec::new(),\n            },\n            posthog_config,\n        };\n\n        // ------------------------------------------------------------\n        // custom validation code that covers more than one field in isolation\n        // ------------------------------------------------------------\n\n        if [conf.http_auth_type, conf.pg_auth_type, conf.grpc_auth_type]\n            .contains(&AuthType::NeonJWT)\n        {\n            let auth_validation_public_key_path = conf\n                .auth_validation_public_key_path\n                .get_or_insert_with(|| workdir.join(\"auth_public_key.pem\"));\n            ensure!(\n                auth_validation_public_key_path.exists(),\n                format!(\n                    \"Can't find auth_validation_public_key at '{auth_validation_public_key_path}'\",\n                )\n            );\n        }\n\n        if let Some(tracing_config) = conf.tracing.as_ref() {\n            let ratio = &tracing_config.sampling_ratio;\n            ensure!(\n                ratio.denominator != 0 && ratio.denominator >= ratio.numerator,\n                format!(\n                    \"Invalid sampling ratio: {}/{}\",\n                    ratio.numerator, ratio.denominator\n                )\n            );\n\n            let url = Url::parse(&tracing_config.export_config.endpoint)\n                .map_err(anyhow::Error::msg)\n                .with_context(|| {\n                    format!(\n                        \"tracing endpoint URL is invalid : {}\",\n                        tracing_config.export_config.endpoint\n                    )\n                })?;\n\n            ensure!(\n                url.scheme() == \"http\" || url.scheme() == \"https\",\n                format!(\n                    \"tracing endpoint URL must start with http:// or https://: {}\",\n                    tracing_config.export_config.endpoint\n                )\n            );\n        }\n\n        IndexEntry::validate_checkpoint_distance(conf.default_tenant_conf.checkpoint_distance)\n            .map_err(anyhow::Error::msg)\n            .with_context(|| {\n                format!(\n                    \"effective checkpoint distance is unsupported: {}\",\n                    conf.default_tenant_conf.checkpoint_distance\n                )\n            })?;\n\n        if let PageServicePipeliningConfig::Pipelined(PageServicePipeliningConfigPipelined {\n            max_batch_size,\n            ..\n        }) = conf.page_service_pipelining\n        {\n            if max_batch_size.get() > conf.max_get_vectored_keys.get() {\n                return Err(anyhow::anyhow!(\n                    \"`max_batch_size` ({max_batch_size}) must be less than or equal to `max_get_vectored_keys` ({})\",\n                    conf.max_get_vectored_keys.get()\n                ));\n            }\n        };\n\n        Ok(conf)\n    }\n\n    #[cfg(test)]\n    pub fn test_repo_dir(test_name: &str) -> Utf8PathBuf {\n        let test_output_dir = std::env::var(\"TEST_OUTPUT\").unwrap_or(\"../tmp_check\".into());\n\n        let test_id = uuid::Uuid::new_v4();\n        Utf8PathBuf::from(format!(\"{test_output_dir}/test_{test_name}_{test_id}\"))\n    }\n\n    pub fn dummy_conf(repo_dir: Utf8PathBuf) -> Self {\n        let pg_distrib_dir = Utf8PathBuf::from(env!(\"CARGO_MANIFEST_DIR\")).join(\"../pg_install\");\n\n        let mut config_toml = pageserver_api::config::ConfigToml {\n            wait_lsn_timeout: Duration::from_secs(60),\n            wal_redo_timeout: Duration::from_secs(60),\n            pg_distrib_dir: Some(pg_distrib_dir),\n            metric_collection_interval: Duration::from_secs(60),\n            synthetic_size_calculation_interval: Duration::from_secs(60),\n            background_task_maximum_delay: Duration::ZERO,\n            load_previous_heatmap: Some(true),\n            generate_unarchival_heatmap: Some(true),\n            control_plane_api: Some(Url::parse(\"http://localhost:6666\").unwrap()),\n            ..Default::default()\n        };\n\n        // Test authors tend to forget about the default 10min initial lease deadline\n        // when writing tests, which turns their immediate gc requests via mgmt API\n        // into no-ops. Override the binary default here, such that there is no initial\n        // lease deadline by default in tests. Tests that care can always override it\n        // themselves.\n        // Cf https://databricks.atlassian.net/browse/LKB-92?focusedCommentId=6722329\n        config_toml.tenant_config.lsn_lease_length = Duration::from_secs(0);\n\n        PageServerConf::parse_and_validate(NodeId(0), config_toml, &repo_dir).unwrap()\n    }\n}\n\n#[derive(serde::Deserialize, serde::Serialize)]\npub struct PageserverIdentity {\n    pub id: NodeId,\n}\n\n/// Configurable semaphore permits setting.\n///\n/// Does not allow semaphore permits to be zero, because at runtime initially zero permits and empty\n/// semaphore cannot be distinguished, leading any feature using these to await forever (or until\n/// new permits are added).\n#[derive(Debug, Clone)]\npub struct ConfigurableSemaphore {\n    initial_permits: NonZeroUsize,\n    inner: std::sync::Arc<tokio::sync::Semaphore>,\n}\n\nimpl ConfigurableSemaphore {\n    /// Initializse using a non-zero amount of permits.\n    ///\n    /// Require a non-zero initial permits, because using permits == 0 is a crude way to disable a\n    /// feature such as [`TenantShard::gather_size_inputs`]. Otherwise any semaphore using future will\n    /// behave like [`futures::future::pending`], just waiting until new permits are added.\n    ///\n    /// [`TenantShard::gather_size_inputs`]: crate::tenant::TenantShard::gather_size_inputs\n    pub fn new(initial_permits: NonZeroUsize) -> Self {\n        ConfigurableSemaphore {\n            initial_permits,\n            inner: std::sync::Arc::new(tokio::sync::Semaphore::new(initial_permits.get())),\n        }\n    }\n\n    /// Returns the configured amount of permits.\n    pub fn initial_permits(&self) -> NonZeroUsize {\n        self.initial_permits\n    }\n}\n\nimpl PartialEq for ConfigurableSemaphore {\n    fn eq(&self, other: &Self) -> bool {\n        // the number of permits can be increased at runtime, so we cannot really fulfill the\n        // PartialEq value equality otherwise\n        self.initial_permits == other.initial_permits\n    }\n}\n\nimpl Eq for ConfigurableSemaphore {}\n\nimpl ConfigurableSemaphore {\n    pub fn inner(&self) -> &std::sync::Arc<tokio::sync::Semaphore> {\n        &self.inner\n    }\n}\n\n#[cfg(test)]\nmod tests {\n\n    use std::time::Duration;\n\n    use camino::Utf8PathBuf;\n    use pageserver_api::config::{DiskUsageEvictionTaskConfig, EvictionOrder};\n    use rstest::rstest;\n    use utils::{id::NodeId, serde_percent::Percent};\n\n    use super::PageServerConf;\n\n    #[test]\n    fn test_minimal_config_toml_is_valid() {\n        // The minimal valid config for running a pageserver:\n        // - control_plane_api is mandatory, as pageservers cannot run in isolation\n        // - we use Default impl of everything else in this situation\n        let input = r#\"\n            control_plane_api = \"http://localhost:6666\"\n        \"#;\n        let config_toml = toml_edit::de::from_str::<pageserver_api::config::ConfigToml>(input)\n            .expect(\"empty config is valid\");\n        let workdir = Utf8PathBuf::from(\"/nonexistent\");\n        PageServerConf::parse_and_validate(NodeId(0), config_toml, &workdir)\n            .expect(\"parse_and_validate\");\n    }\n\n    #[test]\n    fn test_config_tracing_endpoint_is_invalid() {\n        let input = r#\"\n            control_plane_api = \"http://localhost:6666\"\n\n            [tracing]\n\n            sampling_ratio = { numerator = 1, denominator = 0 }\n\n            [tracing.export_config]\n            endpoint = \"localhost:4317\"\n            protocol = \"http-binary\"\n            timeout = \"1ms\"\n        \"#;\n        let config_toml = toml_edit::de::from_str::<pageserver_api::config::ConfigToml>(input)\n            .expect(\"config has valid fields\");\n        let workdir = Utf8PathBuf::from(\"/nonexistent\");\n        PageServerConf::parse_and_validate(NodeId(0), config_toml, &workdir)\n            .expect_err(\"parse_and_validate should fail for endpoint without scheme\");\n    }\n\n    #[rstest]\n    #[case(32, 32, true)]\n    #[case(64, 32, false)]\n    #[case(64, 64, true)]\n    #[case(128, 128, true)]\n    fn test_config_max_batch_size_is_valid(\n        #[case] max_batch_size: usize,\n        #[case] max_get_vectored_keys: usize,\n        #[case] is_valid: bool,\n    ) {\n        let input = format!(\n            r#\"\n            control_plane_api = \"http://localhost:6666\"\n            max_get_vectored_keys = {max_get_vectored_keys}\n            page_service_pipelining = {{ mode=\"pipelined\", execution=\"concurrent-futures\", max_batch_size={max_batch_size}, batching=\"uniform-lsn\" }}\n        \"#,\n        );\n        let config_toml = toml_edit::de::from_str::<pageserver_api::config::ConfigToml>(&input)\n            .expect(\"config has valid fields\");\n        let workdir = Utf8PathBuf::from(\"/nonexistent\");\n        let result = PageServerConf::parse_and_validate(NodeId(0), config_toml, &workdir);\n        assert_eq!(result.is_ok(), is_valid);\n    }\n\n    #[test]\n    fn test_config_posthog_config_is_valid() {\n        let input = r#\"\n            control_plane_api = \"http://localhost:6666\"\n\n            [posthog_config]\n            server_api_key = \"phs_AAA\"\n            client_api_key = \"phc_BBB\"\n            project_id = \"000\"\n            private_api_url = \"https://us.posthog.com\"\n            public_api_url = \"https://us.i.posthog.com\"\n        \"#;\n        let config_toml = toml_edit::de::from_str::<pageserver_api::config::ConfigToml>(input)\n            .expect(\"posthogconfig is valid\");\n        let workdir = Utf8PathBuf::from(\"/nonexistent\");\n        PageServerConf::parse_and_validate(NodeId(0), config_toml, &workdir)\n            .expect(\"parse_and_validate\");\n    }\n\n    #[test]\n    fn test_config_posthog_incomplete_config_is_valid() {\n        let input = r#\"\n            control_plane_api = \"http://localhost:6666\"\n\n            [posthog_config]\n            server_api_key = \"phs_AAA\"\n            private_api_url = \"https://us.posthog.com\"\n            public_api_url = \"https://us.i.posthog.com\"\n        \"#;\n        let config_toml = toml_edit::de::from_str::<pageserver_api::config::ConfigToml>(input)\n            .expect(\"posthogconfig is valid\");\n        let workdir = Utf8PathBuf::from(\"/nonexistent\");\n        PageServerConf::parse_and_validate(NodeId(0), config_toml, &workdir)\n            .expect(\"parse_and_validate\");\n    }\n\n    #[rstest]\n    #[\n        case::omit_the_whole_config(\n            DiskUsageEvictionTaskConfig {\n                max_usage_pct: Percent::new(80).unwrap(),\n                min_avail_bytes: 2_000_000_000,\n                period: Duration::from_secs(60),\n                eviction_order: Default::default(),\n                #[cfg(feature = \"testing\")]\n                mock_statvfs: None,\n                enabled: true,\n            },\n        r#\"\n            control_plane_api = \"http://localhost:6666\"\n        \"#,\n    )]\n    #[\n        case::omit_enabled_field(\n            DiskUsageEvictionTaskConfig {\n                max_usage_pct: Percent::new(80).unwrap(),\n                min_avail_bytes: 1_000_000_000,\n                period: Duration::from_secs(60),\n                eviction_order: EvictionOrder::RelativeAccessed {\n                    highest_layer_count_loses_first: true,\n                },\n                #[cfg(feature = \"testing\")]\n                mock_statvfs: None,\n                enabled: true,\n            },\n        r#\"\n            control_plane_api = \"http://localhost:6666\"\n            disk_usage_based_eviction = { max_usage_pct = 80, min_avail_bytes = 1000000000, period = \"60s\" }\n        \"#,\n    )]\n    #[case::disabled(\n        DiskUsageEvictionTaskConfig {\n            max_usage_pct: Percent::new(80).unwrap(),\n            min_avail_bytes: 2_000_000_000,\n            period: Duration::from_secs(60),\n            eviction_order: EvictionOrder::RelativeAccessed {\n                highest_layer_count_loses_first: true,\n            },\n            #[cfg(feature = \"testing\")]\n            mock_statvfs: None,\n            enabled: false,\n        },\n        r#\"\n            control_plane_api = \"http://localhost:6666\"\n            disk_usage_based_eviction = { enabled = false }\n        \"#\n    )]\n    fn test_config_disk_usage_based_eviction_is_valid(\n        #[case] expected_disk_usage_based_eviction: DiskUsageEvictionTaskConfig,\n        #[case] input: &str,\n    ) {\n        let config_toml = toml_edit::de::from_str::<pageserver_api::config::ConfigToml>(input)\n            .expect(\"disk_usage_based_eviction is valid\");\n        let workdir = Utf8PathBuf::from(\"/nonexistent\");\n        let config = PageServerConf::parse_and_validate(NodeId(0), config_toml, &workdir).unwrap();\n        let disk_usage_based_eviction = config.disk_usage_based_eviction;\n        assert_eq!(\n            expected_disk_usage_based_eviction,\n            disk_usage_based_eviction\n        );\n    }\n}\n"
  },
  {
    "path": "pageserver/src/consumption_metrics/disk_cache.rs",
    "content": "use std::sync::Arc;\n\nuse anyhow::Context;\nuse camino::{Utf8Path, Utf8PathBuf};\n\nuse super::{NewMetricsRoot, NewRawMetric, RawMetric};\nuse crate::consumption_metrics::NewMetricsRefRoot;\n\npub(super) fn read_metrics_from_serde_value(\n    json_value: serde_json::Value,\n) -> anyhow::Result<Vec<NewRawMetric>> {\n    if NewMetricsRoot::is_v2_metrics(&json_value) {\n        let root = serde_json::from_value::<NewMetricsRoot>(json_value)?;\n        Ok(root.metrics)\n    } else {\n        let all_metrics = serde_json::from_value::<Vec<RawMetric>>(json_value)?;\n        let all_metrics = all_metrics\n            .into_iter()\n            .map(|(key, (event_type, value))| NewRawMetric {\n                key,\n                kind: event_type,\n                value,\n            })\n            .collect();\n        Ok(all_metrics)\n    }\n}\n\npub(super) async fn read_metrics_from_disk(\n    path: Arc<Utf8PathBuf>,\n) -> anyhow::Result<Vec<NewRawMetric>> {\n    // do not add context to each error, callsite will log with full path\n    let span = tracing::Span::current();\n    tokio::task::spawn_blocking(move || {\n        let _e = span.entered();\n\n        if let Some(parent) = path.parent() {\n            if let Err(e) = scan_and_delete_with_same_prefix(&path) {\n                tracing::info!(\"failed to cleanup temporary files in {parent:?}: {e:#}\");\n            }\n        }\n\n        let mut file = std::fs::File::open(&*path)?;\n        let reader = std::io::BufReader::new(&mut file);\n        let json_value = serde_json::from_reader::<_, serde_json::Value>(reader)?;\n        read_metrics_from_serde_value(json_value)\n    })\n    .await\n    .context(\"read metrics join error\")\n    .and_then(|x| x)\n}\n\nfn scan_and_delete_with_same_prefix(path: &Utf8Path) -> std::io::Result<()> {\n    let it = std::fs::read_dir(path.parent().expect(\"caller checked\"))?;\n\n    let prefix = path.file_name().expect(\"caller checked\").to_string();\n\n    for entry in it {\n        let entry = entry?;\n        if !entry.metadata()?.is_file() {\n            continue;\n        }\n        let file_name = entry.file_name();\n\n        if path.file_name().unwrap() == file_name {\n            // do not remove our actual file\n            continue;\n        }\n\n        let file_name = file_name.to_string_lossy();\n\n        if !file_name.starts_with(&*prefix) {\n            continue;\n        }\n\n        let path = entry.path();\n\n        if let Err(e) = std::fs::remove_file(&path) {\n            tracing::warn!(\"cleaning up old tempfile {file_name:?} failed: {e:#}\");\n        } else {\n            tracing::info!(\"cleaned up old tempfile {file_name:?}\");\n        }\n    }\n\n    Ok(())\n}\n\npub(super) async fn flush_metrics_to_disk(\n    current_metrics: &Arc<Vec<NewRawMetric>>,\n    path: &Arc<Utf8PathBuf>,\n) -> anyhow::Result<()> {\n    use std::io::Write;\n\n    anyhow::ensure!(path.parent().is_some(), \"path must have parent: {path:?}\");\n    anyhow::ensure!(\n        path.file_name().is_some(),\n        \"path must have filename: {path:?}\"\n    );\n\n    let span = tracing::Span::current();\n    tokio::task::spawn_blocking({\n        let current_metrics = current_metrics.clone();\n        let path = path.clone();\n        move || {\n            let _e = span.entered();\n\n            let parent = path.parent().expect(\"existence checked\");\n            let file_name = path.file_name().expect(\"existence checked\");\n            let mut tempfile = camino_tempfile::Builder::new()\n                .prefix(file_name)\n                .suffix(\".tmp\")\n                .tempfile_in(parent)?;\n\n            tracing::debug!(\"using tempfile {:?}\", tempfile.path());\n\n            // write out all of the raw metrics, to be read out later on restart as cached values\n            {\n                let mut writer = std::io::BufWriter::new(&mut tempfile);\n                serde_json::to_writer(\n                    &mut writer,\n                    &NewMetricsRefRoot::new(current_metrics.as_ref()),\n                )\n                .context(\"serialize metrics\")?;\n                writer\n                    .into_inner()\n                    .map_err(|_| anyhow::anyhow!(\"flushing metrics failed\"))?;\n            }\n\n            tempfile.flush()?;\n            tempfile.as_file().sync_all()?;\n\n            fail::fail_point!(\"before-persist-last-metrics-collected\");\n\n            drop(tempfile.persist(&*path).map_err(|e| e.error)?);\n\n            let f = std::fs::File::open(path.parent().unwrap())?;\n            f.sync_all()?;\n\n            anyhow::Ok(())\n        }\n    })\n    .await\n    .with_context(|| format!(\"write metrics to {path:?} join error\"))\n    .and_then(|x| x.with_context(|| format!(\"write metrics to {path:?}\")))\n}\n"
  },
  {
    "path": "pageserver/src/consumption_metrics/metrics/tests.rs",
    "content": "use std::collections::HashMap;\n\nuse super::*;\nuse crate::consumption_metrics::RawMetric;\n\n#[test]\nfn startup_collected_timeline_metrics_before_advancing() {\n    let tenant_id = TenantId::generate();\n    let timeline_id = TimelineId::generate();\n\n    let mut metrics = Vec::new();\n    let cache = HashMap::new();\n\n    let initdb_lsn = Lsn(0x10000);\n    let pitr_cutoff = Lsn(0x11000);\n    let disk_consistent_lsn = Lsn(initdb_lsn.0 * 2);\n    let logical_size = 0x42000;\n\n    let snap = TimelineSnapshot {\n        loaded_at: (disk_consistent_lsn, SystemTime::now()),\n        last_record_lsn: disk_consistent_lsn,\n        ancestor_lsn: Lsn(0),\n        current_exact_logical_size: Some(logical_size),\n        pitr_enabled: true,\n        pitr_cutoff: Some(pitr_cutoff),\n    };\n\n    let now = DateTime::<Utc>::from(SystemTime::now());\n\n    snap.to_metrics(tenant_id, timeline_id, now, &mut metrics, &cache);\n\n    assert_eq!(\n        metrics,\n        &[\n            MetricsKey::written_size_delta(tenant_id, timeline_id).from_until(\n                snap.loaded_at.1.into(),\n                now,\n                0\n            ),\n            MetricsKey::written_size(tenant_id, timeline_id).at(now, disk_consistent_lsn.0),\n            MetricsKey::written_size_since_parent(tenant_id, timeline_id)\n                .at(now, disk_consistent_lsn.0),\n            MetricsKey::pitr_history_size_since_parent(tenant_id, timeline_id)\n                .at(now, disk_consistent_lsn.0 - pitr_cutoff.0),\n            MetricsKey::timeline_logical_size(tenant_id, timeline_id).at(now, logical_size)\n        ]\n    );\n}\n\n#[test]\nfn startup_collected_timeline_metrics_second_round() {\n    let tenant_id = TenantId::generate();\n    let timeline_id = TimelineId::generate();\n\n    let [now, before, init] = time_backwards();\n\n    let now = DateTime::<Utc>::from(now);\n    let before = DateTime::<Utc>::from(before);\n\n    let initdb_lsn = Lsn(0x10000);\n    let pitr_cutoff = Lsn(0x11000);\n    let disk_consistent_lsn = Lsn(initdb_lsn.0 * 2);\n    let logical_size = 0x42000;\n\n    let mut metrics = Vec::new();\n    let cache = HashMap::from([MetricsKey::written_size(tenant_id, timeline_id)\n        .at(before, disk_consistent_lsn.0)\n        .to_kv_pair()]);\n\n    let snap = TimelineSnapshot {\n        loaded_at: (disk_consistent_lsn, init),\n        last_record_lsn: disk_consistent_lsn,\n        ancestor_lsn: Lsn(0),\n        current_exact_logical_size: Some(logical_size),\n        pitr_enabled: true,\n        pitr_cutoff: Some(pitr_cutoff),\n    };\n\n    snap.to_metrics(tenant_id, timeline_id, now, &mut metrics, &cache);\n\n    assert_eq!(\n        metrics,\n        &[\n            MetricsKey::written_size_delta(tenant_id, timeline_id).from_until(before, now, 0),\n            MetricsKey::written_size(tenant_id, timeline_id).at(now, disk_consistent_lsn.0),\n            MetricsKey::written_size_since_parent(tenant_id, timeline_id)\n                .at(now, disk_consistent_lsn.0),\n            MetricsKey::pitr_history_size_since_parent(tenant_id, timeline_id)\n                .at(now, disk_consistent_lsn.0 - pitr_cutoff.0),\n            MetricsKey::timeline_logical_size(tenant_id, timeline_id).at(now, logical_size)\n        ]\n    );\n}\n\n#[test]\nfn startup_collected_timeline_metrics_nth_round_at_same_lsn() {\n    let tenant_id = TenantId::generate();\n    let timeline_id = TimelineId::generate();\n\n    let [now, just_before, before, init] = time_backwards();\n\n    let now = DateTime::<Utc>::from(now);\n    let just_before = DateTime::<Utc>::from(just_before);\n    let before = DateTime::<Utc>::from(before);\n\n    let initdb_lsn = Lsn(0x10000);\n    let pitr_cutoff = Lsn(0x11000);\n    let disk_consistent_lsn = Lsn(initdb_lsn.0 * 2);\n    let logical_size = 0x42000;\n\n    let mut metrics = Vec::new();\n    let cache = HashMap::from([\n        // at t=before was the last time the last_record_lsn changed\n        MetricsKey::written_size(tenant_id, timeline_id)\n            .at(before, disk_consistent_lsn.0)\n            .to_kv_pair(),\n        // end time of this event is used for the next ones\n        MetricsKey::written_size_delta(tenant_id, timeline_id)\n            .from_until(before, just_before, 0)\n            .to_kv_pair(),\n    ]);\n\n    let snap = TimelineSnapshot {\n        loaded_at: (disk_consistent_lsn, init),\n        last_record_lsn: disk_consistent_lsn,\n        ancestor_lsn: Lsn(0),\n        current_exact_logical_size: Some(logical_size),\n        pitr_enabled: true,\n        pitr_cutoff: Some(pitr_cutoff),\n    };\n\n    snap.to_metrics(tenant_id, timeline_id, now, &mut metrics, &cache);\n\n    assert_eq!(\n        metrics,\n        &[\n            MetricsKey::written_size_delta(tenant_id, timeline_id).from_until(just_before, now, 0),\n            MetricsKey::written_size(tenant_id, timeline_id).at(now, disk_consistent_lsn.0),\n            MetricsKey::written_size_since_parent(tenant_id, timeline_id)\n                .at(now, disk_consistent_lsn.0),\n            MetricsKey::pitr_history_size_since_parent(tenant_id, timeline_id)\n                .at(now, disk_consistent_lsn.0 - pitr_cutoff.0),\n            MetricsKey::timeline_logical_size(tenant_id, timeline_id).at(now, logical_size)\n        ]\n    );\n}\n\n/// Tests that written sizes do not regress across restarts.\n#[test]\nfn post_restart_written_sizes_with_rolled_back_last_record_lsn() {\n    let tenant_id = TenantId::generate();\n    let timeline_id = TimelineId::generate();\n\n    let [later, now, at_restart] = time_backwards();\n\n    // FIXME: tests would be so much easier if we did not need to juggle back and forth\n    // SystemTime and DateTime::<Utc> ... Could do the conversion only at upload time?\n    let now = DateTime::<Utc>::from(now);\n    let later = DateTime::<Utc>::from(later);\n    let before_restart = at_restart - std::time::Duration::from_secs(5 * 60);\n    let way_before = before_restart - std::time::Duration::from_secs(10 * 60);\n    let before_restart = DateTime::<Utc>::from(before_restart);\n    let way_before = DateTime::<Utc>::from(way_before);\n\n    let snap = TimelineSnapshot {\n        loaded_at: (Lsn(50), at_restart),\n        last_record_lsn: Lsn(50),\n        ancestor_lsn: Lsn(0),\n        current_exact_logical_size: None,\n        pitr_enabled: true,\n        pitr_cutoff: Some(Lsn(20)),\n    };\n\n    let mut cache = HashMap::from([\n        MetricsKey::written_size(tenant_id, timeline_id)\n            .at(before_restart, 100)\n            .to_kv_pair(),\n        MetricsKey::written_size_delta(tenant_id, timeline_id)\n            .from_until(\n                way_before,\n                before_restart,\n                // not taken into account, but the timestamps are important\n                999_999_999,\n            )\n            .to_kv_pair(),\n    ]);\n\n    let mut metrics = Vec::new();\n    snap.to_metrics(tenant_id, timeline_id, now, &mut metrics, &cache);\n\n    assert_eq!(\n        metrics,\n        &[\n            MetricsKey::written_size_delta(tenant_id, timeline_id).from_until(\n                before_restart,\n                now,\n                0\n            ),\n            MetricsKey::written_size(tenant_id, timeline_id).at(now, 100),\n            MetricsKey::written_size_since_parent(tenant_id, timeline_id).at(now, 100),\n            MetricsKey::pitr_history_size_since_parent(tenant_id, timeline_id).at(now, 80),\n        ]\n    );\n\n    // now if we cache these metrics, and re-run while \"still in recovery\"\n    cache.extend(metrics.drain(..).map(|x| x.to_kv_pair()));\n\n    // \"still in recovery\", because our snapshot did not change\n    snap.to_metrics(tenant_id, timeline_id, later, &mut metrics, &cache);\n\n    assert_eq!(\n        metrics,\n        &[\n            MetricsKey::written_size_delta(tenant_id, timeline_id).from_until(now, later, 0),\n            MetricsKey::written_size(tenant_id, timeline_id).at(later, 100),\n            MetricsKey::written_size_since_parent(tenant_id, timeline_id).at(later, 100),\n            MetricsKey::pitr_history_size_since_parent(tenant_id, timeline_id).at(later, 80),\n        ]\n    );\n}\n\n/// Tests that written sizes do not regress across restarts, even on child branches.\n#[test]\nfn post_restart_written_sizes_with_rolled_back_last_record_lsn_and_ancestor_lsn() {\n    let tenant_id = TenantId::generate();\n    let timeline_id = TimelineId::generate();\n\n    let [later, now, at_restart] = time_backwards();\n\n    // FIXME: tests would be so much easier if we did not need to juggle back and forth\n    // SystemTime and DateTime::<Utc> ... Could do the conversion only at upload time?\n    let now = DateTime::<Utc>::from(now);\n    let later = DateTime::<Utc>::from(later);\n    let before_restart = at_restart - std::time::Duration::from_secs(5 * 60);\n    let way_before = before_restart - std::time::Duration::from_secs(10 * 60);\n    let before_restart = DateTime::<Utc>::from(before_restart);\n    let way_before = DateTime::<Utc>::from(way_before);\n\n    let snap = TimelineSnapshot {\n        loaded_at: (Lsn(50), at_restart),\n        last_record_lsn: Lsn(50),\n        ancestor_lsn: Lsn(40),\n        current_exact_logical_size: None,\n        pitr_enabled: true,\n        pitr_cutoff: Some(Lsn(20)),\n    };\n\n    let mut cache = HashMap::from([\n        MetricsKey::written_size(tenant_id, timeline_id)\n            .at(before_restart, 100)\n            .to_kv_pair(),\n        MetricsKey::written_size_delta(tenant_id, timeline_id)\n            .from_until(\n                way_before,\n                before_restart,\n                // not taken into account, but the timestamps are important\n                999_999_999,\n            )\n            .to_kv_pair(),\n    ]);\n\n    let mut metrics = Vec::new();\n    snap.to_metrics(tenant_id, timeline_id, now, &mut metrics, &cache);\n\n    assert_eq!(\n        metrics,\n        &[\n            MetricsKey::written_size_delta(tenant_id, timeline_id).from_until(\n                before_restart,\n                now,\n                0\n            ),\n            MetricsKey::written_size(tenant_id, timeline_id).at(now, 100),\n            MetricsKey::written_size_since_parent(tenant_id, timeline_id).at(now, 60),\n            MetricsKey::pitr_history_size_since_parent(tenant_id, timeline_id).at(now, 60),\n        ]\n    );\n\n    // now if we cache these metrics, and re-run while \"still in recovery\"\n    cache.extend(metrics.drain(..).map(|x| x.to_kv_pair()));\n\n    // \"still in recovery\", because our snapshot did not change\n    snap.to_metrics(tenant_id, timeline_id, later, &mut metrics, &cache);\n\n    assert_eq!(\n        metrics,\n        &[\n            MetricsKey::written_size_delta(tenant_id, timeline_id).from_until(now, later, 0),\n            MetricsKey::written_size(tenant_id, timeline_id).at(later, 100),\n            MetricsKey::written_size_since_parent(tenant_id, timeline_id).at(later, 60),\n            MetricsKey::pitr_history_size_since_parent(tenant_id, timeline_id).at(later, 60),\n        ]\n    );\n}\n\n/// Tests that written sizes do not regress across restarts, even on child branches and\n/// with a PITR cutoff after the branch point.\n#[test]\nfn post_restart_written_sizes_with_rolled_back_last_record_lsn_and_ancestor_lsn_and_pitr_cutoff() {\n    let tenant_id = TenantId::generate();\n    let timeline_id = TimelineId::generate();\n\n    let [later, now, at_restart] = time_backwards();\n\n    // FIXME: tests would be so much easier if we did not need to juggle back and forth\n    // SystemTime and DateTime::<Utc> ... Could do the conversion only at upload time?\n    let now = DateTime::<Utc>::from(now);\n    let later = DateTime::<Utc>::from(later);\n    let before_restart = at_restart - std::time::Duration::from_secs(5 * 60);\n    let way_before = before_restart - std::time::Duration::from_secs(10 * 60);\n    let before_restart = DateTime::<Utc>::from(before_restart);\n    let way_before = DateTime::<Utc>::from(way_before);\n\n    let snap = TimelineSnapshot {\n        loaded_at: (Lsn(50), at_restart),\n        last_record_lsn: Lsn(50),\n        ancestor_lsn: Lsn(30),\n        current_exact_logical_size: None,\n        pitr_enabled: true,\n        pitr_cutoff: Some(Lsn(40)),\n    };\n\n    let mut cache = HashMap::from([\n        MetricsKey::written_size(tenant_id, timeline_id)\n            .at(before_restart, 100)\n            .to_kv_pair(),\n        MetricsKey::written_size_delta(tenant_id, timeline_id)\n            .from_until(\n                way_before,\n                before_restart,\n                // not taken into account, but the timestamps are important\n                999_999_999,\n            )\n            .to_kv_pair(),\n    ]);\n\n    let mut metrics = Vec::new();\n    snap.to_metrics(tenant_id, timeline_id, now, &mut metrics, &cache);\n\n    assert_eq!(\n        metrics,\n        &[\n            MetricsKey::written_size_delta(tenant_id, timeline_id).from_until(\n                before_restart,\n                now,\n                0\n            ),\n            MetricsKey::written_size(tenant_id, timeline_id).at(now, 100),\n            MetricsKey::written_size_since_parent(tenant_id, timeline_id).at(now, 70),\n            MetricsKey::pitr_history_size_since_parent(tenant_id, timeline_id).at(now, 60),\n        ]\n    );\n\n    // now if we cache these metrics, and re-run while \"still in recovery\"\n    cache.extend(metrics.drain(..).map(|x| x.to_kv_pair()));\n\n    // \"still in recovery\", because our snapshot did not change\n    snap.to_metrics(tenant_id, timeline_id, later, &mut metrics, &cache);\n\n    assert_eq!(\n        metrics,\n        &[\n            MetricsKey::written_size_delta(tenant_id, timeline_id).from_until(now, later, 0),\n            MetricsKey::written_size(tenant_id, timeline_id).at(later, 100),\n            MetricsKey::written_size_since_parent(tenant_id, timeline_id).at(later, 70),\n            MetricsKey::pitr_history_size_since_parent(tenant_id, timeline_id).at(later, 60),\n        ]\n    );\n}\n\n#[test]\nfn post_restart_current_exact_logical_size_uses_cached() {\n    let tenant_id = TenantId::generate();\n    let timeline_id = TimelineId::generate();\n\n    let [now, at_restart] = time_backwards();\n\n    let now = DateTime::<Utc>::from(now);\n    let before_restart = at_restart - std::time::Duration::from_secs(5 * 60);\n    let before_restart = DateTime::<Utc>::from(before_restart);\n\n    let snap = TimelineSnapshot {\n        loaded_at: (Lsn(50), at_restart),\n        last_record_lsn: Lsn(50),\n        ancestor_lsn: Lsn(0),\n        current_exact_logical_size: None,\n        pitr_enabled: true,\n        pitr_cutoff: None,\n    };\n\n    let cache = HashMap::from([MetricsKey::timeline_logical_size(tenant_id, timeline_id)\n        .at(before_restart, 100)\n        .to_kv_pair()]);\n\n    let mut metrics = Vec::new();\n    snap.to_metrics(tenant_id, timeline_id, now, &mut metrics, &cache);\n\n    metrics.retain(|item| item.key.metric == Name::LogicalSize);\n\n    assert_eq!(\n        metrics,\n        &[MetricsKey::timeline_logical_size(tenant_id, timeline_id).at(now, 100)]\n    );\n}\n\n#[test]\nfn post_restart_synthetic_size_uses_cached_if_available() {\n    let tenant_id = TenantId::generate();\n\n    let ts = TenantSnapshot {\n        remote_size: 1000,\n        // not yet calculated\n        synthetic_size: 0,\n    };\n\n    let now = SystemTime::now();\n    let before_restart = DateTime::<Utc>::from(now - std::time::Duration::from_secs(5 * 60));\n    let now = DateTime::<Utc>::from(now);\n\n    let cached = HashMap::from([MetricsKey::synthetic_size(tenant_id)\n        .at(before_restart, 1000)\n        .to_kv_pair()]);\n\n    let mut metrics = Vec::new();\n    ts.to_metrics(tenant_id, now, &cached, &mut metrics);\n\n    assert_eq!(\n        metrics,\n        &[\n            MetricsKey::remote_storage_size(tenant_id).at(now, 1000),\n            MetricsKey::synthetic_size(tenant_id).at(now, 1000),\n        ]\n    );\n}\n\n#[test]\nfn post_restart_synthetic_size_is_not_sent_when_not_cached() {\n    let tenant_id = TenantId::generate();\n\n    let ts = TenantSnapshot {\n        remote_size: 1000,\n        // not yet calculated\n        synthetic_size: 0,\n    };\n\n    let now = SystemTime::now();\n    let now = DateTime::<Utc>::from(now);\n\n    let cached = HashMap::new();\n\n    let mut metrics = Vec::new();\n    ts.to_metrics(tenant_id, now, &cached, &mut metrics);\n\n    assert_eq!(\n        metrics,\n        &[\n            MetricsKey::remote_storage_size(tenant_id).at(now, 1000),\n            // no synthetic size here\n        ]\n    );\n}\n\nfn time_backwards<const N: usize>() -> [std::time::SystemTime; N] {\n    let mut times = [std::time::SystemTime::UNIX_EPOCH; N];\n    times[0] = std::time::SystemTime::now();\n    for behind in 1..N {\n        times[behind] = times[0] - std::time::Duration::from_secs(behind as u64);\n    }\n\n    times\n}\n\n/// Tests that disabled PITR history does not yield any history size, even when the PITR cutoff\n/// indicates otherwise.\n#[test]\nfn pitr_disabled_yields_no_history_size() {\n    let tenant_id = TenantId::generate();\n    let timeline_id = TimelineId::generate();\n\n    let mut metrics = Vec::new();\n    let cache = HashMap::new();\n\n    let initdb_lsn = Lsn(0x10000);\n    let pitr_cutoff = Lsn(0x11000);\n    let disk_consistent_lsn = Lsn(initdb_lsn.0 * 2);\n\n    let snap = TimelineSnapshot {\n        loaded_at: (disk_consistent_lsn, SystemTime::now()),\n        last_record_lsn: disk_consistent_lsn,\n        ancestor_lsn: Lsn(0),\n        current_exact_logical_size: None,\n        pitr_enabled: false,\n        pitr_cutoff: Some(pitr_cutoff),\n    };\n\n    let now = DateTime::<Utc>::from(SystemTime::now());\n\n    snap.to_metrics(tenant_id, timeline_id, now, &mut metrics, &cache);\n\n    assert_eq!(\n        metrics,\n        &[\n            MetricsKey::written_size_delta(tenant_id, timeline_id).from_until(\n                snap.loaded_at.1.into(),\n                now,\n                0\n            ),\n            MetricsKey::written_size(tenant_id, timeline_id).at(now, disk_consistent_lsn.0),\n            MetricsKey::written_size_since_parent(tenant_id, timeline_id)\n                .at(now, disk_consistent_lsn.0),\n            MetricsKey::pitr_history_size_since_parent(tenant_id, timeline_id).at(now, 0),\n        ]\n    );\n}\n\n/// Tests that uninitialized PITR cutoff does not emit any history size metric at all.\n#[test]\nfn pitr_uninitialized_does_not_emit_history_size() {\n    let tenant_id = TenantId::generate();\n    let timeline_id = TimelineId::generate();\n\n    let mut metrics = Vec::new();\n    let cache = HashMap::new();\n\n    let initdb_lsn = Lsn(0x10000);\n    let disk_consistent_lsn = Lsn(initdb_lsn.0 * 2);\n\n    let snap = TimelineSnapshot {\n        loaded_at: (disk_consistent_lsn, SystemTime::now()),\n        last_record_lsn: disk_consistent_lsn,\n        ancestor_lsn: Lsn(0),\n        current_exact_logical_size: None,\n        pitr_enabled: true,\n        pitr_cutoff: None,\n    };\n\n    let now = DateTime::<Utc>::from(SystemTime::now());\n\n    snap.to_metrics(tenant_id, timeline_id, now, &mut metrics, &cache);\n\n    assert_eq!(\n        metrics,\n        &[\n            MetricsKey::written_size_delta(tenant_id, timeline_id).from_until(\n                snap.loaded_at.1.into(),\n                now,\n                0\n            ),\n            MetricsKey::written_size(tenant_id, timeline_id).at(now, disk_consistent_lsn.0),\n            MetricsKey::written_size_since_parent(tenant_id, timeline_id)\n                .at(now, disk_consistent_lsn.0),\n        ]\n    );\n}\n\npub(crate) const fn metric_examples_old(\n    tenant_id: TenantId,\n    timeline_id: TimelineId,\n    now: DateTime<Utc>,\n    before: DateTime<Utc>,\n) -> [RawMetric; 7] {\n    [\n        MetricsKey::written_size(tenant_id, timeline_id).at_old_format(now, 0),\n        MetricsKey::written_size_delta(tenant_id, timeline_id)\n            .from_until_old_format(before, now, 0),\n        MetricsKey::written_size_since_parent(tenant_id, timeline_id).at_old_format(now, 0),\n        MetricsKey::pitr_history_size_since_parent(tenant_id, timeline_id).at_old_format(now, 0),\n        MetricsKey::timeline_logical_size(tenant_id, timeline_id).at_old_format(now, 0),\n        MetricsKey::remote_storage_size(tenant_id).at_old_format(now, 0),\n        MetricsKey::synthetic_size(tenant_id).at_old_format(now, 1),\n    ]\n}\n\npub(crate) const fn metric_examples(\n    tenant_id: TenantId,\n    timeline_id: TimelineId,\n    now: DateTime<Utc>,\n    before: DateTime<Utc>,\n) -> [NewRawMetric; 7] {\n    [\n        MetricsKey::written_size(tenant_id, timeline_id).at(now, 0),\n        MetricsKey::written_size_delta(tenant_id, timeline_id).from_until(before, now, 0),\n        MetricsKey::written_size_since_parent(tenant_id, timeline_id).at(now, 0),\n        MetricsKey::pitr_history_size_since_parent(tenant_id, timeline_id).at(now, 0),\n        MetricsKey::timeline_logical_size(tenant_id, timeline_id).at(now, 0),\n        MetricsKey::remote_storage_size(tenant_id).at(now, 0),\n        MetricsKey::synthetic_size(tenant_id).at(now, 1),\n    ]\n}\n"
  },
  {
    "path": "pageserver/src/consumption_metrics/metrics.rs",
    "content": "use std::sync::Arc;\nuse std::time::SystemTime;\n\nuse chrono::{DateTime, Utc};\nuse consumption_metrics::EventType;\nuse futures::stream::StreamExt;\nuse utils::id::{TenantId, TimelineId};\nuse utils::lsn::Lsn;\n\nuse super::{Cache, NewRawMetric};\nuse crate::context::RequestContext;\nuse crate::tenant::mgr::TenantManager;\nuse crate::tenant::timeline::logical_size::CurrentLogicalSize;\n\n/// Name of the metric, used by `MetricsKey` factory methods and `deserialize_cached_events`\n/// instead of static str.\n// Do not rename any of these without first consulting with data team and partner\n// management.\n#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]\npub(super) enum Name {\n    /// Timeline last_record_lsn, absolute.\n    #[serde(rename = \"written_size\")]\n    WrittenSize,\n    /// Timeline last_record_lsn, incremental\n    #[serde(rename = \"written_data_bytes_delta\")]\n    WrittenSizeDelta,\n    /// Written bytes only on this timeline (not including ancestors):\n    /// written_size - ancestor_lsn\n    ///\n    /// On the root branch, this is equivalent to `written_size`.\n    #[serde(rename = \"written_size_since_parent\")]\n    WrittenSizeSinceParent,\n    /// PITR history size only on this timeline (not including ancestors):\n    /// last_record_lsn - max(pitr_cutoff, ancestor_lsn).\n    ///\n    /// On the root branch, this is its entire PITR history size. Not emitted if GC hasn't computed\n    /// the PITR cutoff yet. 0 if PITR is disabled.\n    #[serde(rename = \"pitr_history_size_since_parent\")]\n    PitrHistorySizeSinceParent,\n    /// Timeline logical size\n    #[serde(rename = \"timeline_logical_size\")]\n    LogicalSize,\n    /// Tenant remote size\n    #[serde(rename = \"remote_storage_size\")]\n    RemoteSize,\n    /// Tenant synthetic size\n    #[serde(rename = \"synthetic_storage_size\")]\n    SyntheticSize,\n}\n\n/// Key that uniquely identifies the object this metric describes.\n///\n/// This is a denormalization done at the MetricsKey const methods; these should not be constructed\n/// elsewhere.\n#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]\npub(crate) struct MetricsKey {\n    pub(super) tenant_id: TenantId,\n\n    #[serde(skip_serializing_if = \"Option::is_none\")]\n    pub(super) timeline_id: Option<TimelineId>,\n\n    pub(super) metric: Name,\n}\n\nimpl MetricsKey {\n    const fn absolute_values(self) -> AbsoluteValueFactory {\n        AbsoluteValueFactory(self)\n    }\n    const fn incremental_values(self) -> IncrementalValueFactory {\n        IncrementalValueFactory(self)\n    }\n}\n\n/// Helper type which each individual metric kind can return to produce only absolute values.\nstruct AbsoluteValueFactory(MetricsKey);\n\nimpl AbsoluteValueFactory {\n    #[cfg(test)]\n    const fn at_old_format(self, time: DateTime<Utc>, val: u64) -> super::RawMetric {\n        let key = self.0;\n        (key, (EventType::Absolute { time }, val))\n    }\n\n    const fn at(self, time: DateTime<Utc>, val: u64) -> NewRawMetric {\n        let key = self.0;\n        NewRawMetric {\n            key,\n            kind: EventType::Absolute { time },\n            value: val,\n        }\n    }\n\n    fn key(&self) -> &MetricsKey {\n        &self.0\n    }\n}\n\n/// Helper type which each individual metric kind can return to produce only incremental values.\nstruct IncrementalValueFactory(MetricsKey);\n\nimpl IncrementalValueFactory {\n    #[allow(clippy::wrong_self_convention)]\n    const fn from_until(\n        self,\n        prev_end: DateTime<Utc>,\n        up_to: DateTime<Utc>,\n        val: u64,\n    ) -> NewRawMetric {\n        let key = self.0;\n        // cannot assert prev_end < up_to because these are realtime clock based\n        let when = EventType::Incremental {\n            start_time: prev_end,\n            stop_time: up_to,\n        };\n        NewRawMetric {\n            key,\n            kind: when,\n            value: val,\n        }\n    }\n\n    #[allow(clippy::wrong_self_convention)]\n    #[cfg(test)]\n    const fn from_until_old_format(\n        self,\n        prev_end: DateTime<Utc>,\n        up_to: DateTime<Utc>,\n        val: u64,\n    ) -> super::RawMetric {\n        let key = self.0;\n        // cannot assert prev_end < up_to because these are realtime clock based\n        let when = EventType::Incremental {\n            start_time: prev_end,\n            stop_time: up_to,\n        };\n        (key, (when, val))\n    }\n\n    fn key(&self) -> &MetricsKey {\n        &self.0\n    }\n}\n\n// the static part of a MetricsKey\nimpl MetricsKey {\n    /// Absolute value of [`Timeline::get_last_record_lsn`].\n    ///\n    /// [`Timeline::get_last_record_lsn`]: crate::tenant::Timeline::get_last_record_lsn\n    const fn written_size(tenant_id: TenantId, timeline_id: TimelineId) -> AbsoluteValueFactory {\n        MetricsKey {\n            tenant_id,\n            timeline_id: Some(timeline_id),\n            metric: Name::WrittenSize,\n        }\n        .absolute_values()\n    }\n\n    /// Values will be the difference of the latest [`MetricsKey::written_size`] to what we\n    /// previously sent, starting from the previously sent incremental time range ending at the\n    /// latest absolute measurement.\n    const fn written_size_delta(\n        tenant_id: TenantId,\n        timeline_id: TimelineId,\n    ) -> IncrementalValueFactory {\n        MetricsKey {\n            tenant_id,\n            timeline_id: Some(timeline_id),\n            metric: Name::WrittenSizeDelta,\n        }\n        .incremental_values()\n    }\n\n    /// `written_size` - `ancestor_lsn`.\n    const fn written_size_since_parent(\n        tenant_id: TenantId,\n        timeline_id: TimelineId,\n    ) -> AbsoluteValueFactory {\n        MetricsKey {\n            tenant_id,\n            timeline_id: Some(timeline_id),\n            metric: Name::WrittenSizeSinceParent,\n        }\n        .absolute_values()\n    }\n\n    /// `written_size` - max(`pitr_cutoff`, `ancestor_lsn`).\n    const fn pitr_history_size_since_parent(\n        tenant_id: TenantId,\n        timeline_id: TimelineId,\n    ) -> AbsoluteValueFactory {\n        MetricsKey {\n            tenant_id,\n            timeline_id: Some(timeline_id),\n            metric: Name::PitrHistorySizeSinceParent,\n        }\n        .absolute_values()\n    }\n\n    /// Exact [`Timeline::get_current_logical_size`].\n    ///\n    /// [`Timeline::get_current_logical_size`]: crate::tenant::Timeline::get_current_logical_size\n    const fn timeline_logical_size(\n        tenant_id: TenantId,\n        timeline_id: TimelineId,\n    ) -> AbsoluteValueFactory {\n        MetricsKey {\n            tenant_id,\n            timeline_id: Some(timeline_id),\n            metric: Name::LogicalSize,\n        }\n        .absolute_values()\n    }\n\n    /// [`TenantShard::remote_size`]\n    ///\n    /// [`TenantShard::remote_size`]: crate::tenant::TenantShard::remote_size\n    const fn remote_storage_size(tenant_id: TenantId) -> AbsoluteValueFactory {\n        MetricsKey {\n            tenant_id,\n            timeline_id: None,\n            metric: Name::RemoteSize,\n        }\n        .absolute_values()\n    }\n\n    /// [`TenantShard::cached_synthetic_size`] as refreshed by [`calculate_synthetic_size_worker`].\n    ///\n    /// [`TenantShard::cached_synthetic_size`]: crate::tenant::TenantShard::cached_synthetic_size\n    /// [`calculate_synthetic_size_worker`]: super::calculate_synthetic_size_worker\n    const fn synthetic_size(tenant_id: TenantId) -> AbsoluteValueFactory {\n        MetricsKey {\n            tenant_id,\n            timeline_id: None,\n            metric: Name::SyntheticSize,\n        }\n        .absolute_values()\n    }\n}\n\npub(super) async fn collect_all_metrics(\n    tenant_manager: &Arc<TenantManager>,\n    cached_metrics: &Cache,\n    ctx: &RequestContext,\n) -> Vec<NewRawMetric> {\n    use pageserver_api::models::TenantState;\n\n    let started_at = std::time::Instant::now();\n\n    let tenants = match tenant_manager.list_tenants() {\n        Ok(tenants) => tenants,\n        Err(err) => {\n            tracing::error!(\"failed to list tenants: {:?}\", err);\n            return vec![];\n        }\n    };\n\n    let tenants = futures::stream::iter(tenants).filter_map(|(id, state, _)| async move {\n        if state != TenantState::Active || !id.is_shard_zero() {\n            None\n        } else {\n            tenant_manager\n                .get_attached_tenant_shard(id)\n                .ok()\n                .map(|tenant| (id.tenant_id, tenant))\n        }\n    });\n\n    let res = collect(tenants, cached_metrics, ctx).await;\n\n    tracing::info!(\n        elapsed_ms = started_at.elapsed().as_millis(),\n        total = res.len(),\n        \"collected metrics\"\n    );\n\n    res\n}\n\nasync fn collect<S>(tenants: S, cache: &Cache, ctx: &RequestContext) -> Vec<NewRawMetric>\nwhere\n    S: futures::stream::Stream<Item = (TenantId, Arc<crate::tenant::TenantShard>)>,\n{\n    let mut current_metrics: Vec<NewRawMetric> = Vec::new();\n\n    let mut tenants = std::pin::pin!(tenants);\n\n    while let Some((tenant_id, tenant)) = tenants.next().await {\n        let timelines = tenant.list_timelines();\n        for timeline in timelines {\n            let timeline_id = timeline.timeline_id;\n\n            match TimelineSnapshot::collect(&timeline, ctx) {\n                Ok(Some(snap)) => {\n                    snap.to_metrics(\n                        tenant_id,\n                        timeline_id,\n                        Utc::now(),\n                        &mut current_metrics,\n                        cache,\n                    );\n                }\n                Ok(None) => {}\n                Err(e) => {\n                    tracing::error!(\n                        \"failed to get metrics values for tenant {tenant_id} timeline {}: {e:#?}\",\n                        timeline.timeline_id\n                    );\n                    continue;\n                }\n            }\n        }\n\n        let snap = TenantSnapshot::collect(&tenant);\n        snap.to_metrics(tenant_id, Utc::now(), cache, &mut current_metrics);\n    }\n\n    current_metrics\n}\n\n/// In-between abstraction to allow testing metrics without actual Tenants.\nstruct TenantSnapshot {\n    remote_size: u64,\n    synthetic_size: u64,\n}\n\nimpl TenantSnapshot {\n    /// Collect tenant status to have metrics created out of it.\n    fn collect(t: &Arc<crate::tenant::TenantShard>) -> Self {\n        TenantSnapshot {\n            remote_size: t.remote_size(),\n            // Note that this metric is calculated in a separate bgworker\n            // Here we only use cached value, which may lag behind the real latest one\n            synthetic_size: t.cached_synthetic_size(),\n        }\n    }\n\n    fn to_metrics(\n        &self,\n        tenant_id: TenantId,\n        now: DateTime<Utc>,\n        cached: &Cache,\n        metrics: &mut Vec<NewRawMetric>,\n    ) {\n        let remote_size = MetricsKey::remote_storage_size(tenant_id).at(now, self.remote_size);\n\n        let synthetic_size = {\n            let factory = MetricsKey::synthetic_size(tenant_id);\n            let mut synthetic_size = self.synthetic_size;\n\n            if synthetic_size == 0 {\n                if let Some(item) = cached.get(factory.key()) {\n                    // use the latest value from previous session, TODO: check generation number\n                    synthetic_size = item.value;\n                }\n            }\n\n            if synthetic_size != 0 {\n                // only send non-zeroes because otherwise these show up as errors in logs\n                Some(factory.at(now, synthetic_size))\n            } else {\n                None\n            }\n        };\n\n        metrics.extend([Some(remote_size), synthetic_size].into_iter().flatten());\n    }\n}\n\n/// Internal type to make timeline metric production testable.\n///\n/// As this value type contains all of the information needed from a timeline to produce the\n/// metrics, it can easily be created with different values in test.\nstruct TimelineSnapshot {\n    loaded_at: (Lsn, SystemTime),\n    last_record_lsn: Lsn,\n    ancestor_lsn: Lsn,\n    current_exact_logical_size: Option<u64>,\n    /// Whether PITR is enabled (pitr_interval > 0).\n    pitr_enabled: bool,\n    /// The PITR cutoff LSN. None if not yet initialized. If PITR is disabled, this is approximately\n    /// Some(last_record_lsn), but may lag behind it since it's computed periodically.\n    pitr_cutoff: Option<Lsn>,\n}\n\nimpl TimelineSnapshot {\n    /// Collect the metrics from an actual timeline.\n    ///\n    /// Fails currently only when [`Timeline::get_current_logical_size`] fails.\n    ///\n    /// [`Timeline::get_current_logical_size`]: crate::tenant::Timeline::get_current_logical_size\n    fn collect(\n        t: &Arc<crate::tenant::Timeline>,\n        ctx: &RequestContext,\n    ) -> anyhow::Result<Option<Self>> {\n        if !t.is_active() {\n            // no collection for broken or stopping needed, we will still keep the cached values\n            // though at the caller.\n            Ok(None)\n        } else {\n            let loaded_at = t.loaded_at;\n            let last_record_lsn = t.get_last_record_lsn();\n            let ancestor_lsn = t.get_ancestor_lsn();\n            let pitr_enabled = !t.get_pitr_interval().is_zero();\n            let pitr_cutoff = t.gc_info.read().unwrap().cutoffs.time;\n\n            let current_exact_logical_size = {\n                let span = tracing::info_span!(\"collect_metrics_iteration\", tenant_id = %t.tenant_shard_id.tenant_id, timeline_id = %t.timeline_id);\n                let size = span.in_scope(|| {\n                    t.get_current_logical_size(\n                        crate::tenant::timeline::GetLogicalSizePriority::Background,\n                        ctx,\n                    )\n                });\n                match size {\n                    // Only send timeline logical size when it is fully calculated.\n                    CurrentLogicalSize::Exact(ref size) => Some(size.into()),\n                    CurrentLogicalSize::Approximate(_) => None,\n                }\n            };\n\n            Ok(Some(TimelineSnapshot {\n                loaded_at,\n                last_record_lsn,\n                ancestor_lsn,\n                current_exact_logical_size,\n                pitr_enabled,\n                pitr_cutoff,\n            }))\n        }\n    }\n\n    /// Produce the timeline consumption metrics into the `metrics` argument.\n    fn to_metrics(\n        &self,\n        tenant_id: TenantId,\n        timeline_id: TimelineId,\n        now: DateTime<Utc>,\n        metrics: &mut Vec<NewRawMetric>,\n        cache: &Cache,\n    ) {\n        let timeline_written_size = u64::from(self.last_record_lsn);\n\n        let written_size_delta_key = MetricsKey::written_size_delta(tenant_id, timeline_id);\n\n        let last_stop_time = cache.get(written_size_delta_key.key()).map(|item| {\n            item.kind\n                .incremental_timerange()\n                .expect(\"never create EventType::Absolute for written_size_delta\")\n                .end\n        });\n\n        let written_size_now =\n            MetricsKey::written_size(tenant_id, timeline_id).at(now, timeline_written_size);\n\n        // by default, use the last sent written_size as the basis for\n        // calculating the delta. if we don't yet have one, use the load time value.\n        let prev: (DateTime<Utc>, u64) = cache\n            .get(&written_size_now.key)\n            .map(|item| {\n                // use the prev time from our last incremental update, or default to latest\n                // absolute update on the first round.\n                let prev_at = item\n                    .kind\n                    .absolute_time()\n                    .expect(\"never create EventType::Incremental for written_size\");\n                let prev_at = last_stop_time.unwrap_or(prev_at);\n                (*prev_at, item.value)\n            })\n            .unwrap_or_else(|| {\n                // if we don't have a previous point of comparison, compare to the load time\n                // lsn.\n                let (disk_consistent_lsn, loaded_at) = &self.loaded_at;\n                (DateTime::from(*loaded_at), disk_consistent_lsn.0)\n            });\n\n        let up_to = now;\n\n        let written_size_last = written_size_now.value.max(prev.1); // don't regress\n\n        if let Some(delta) = written_size_now.value.checked_sub(prev.1) {\n            let key_value = written_size_delta_key.from_until(prev.0, up_to, delta);\n            // written_size_delta\n            metrics.push(key_value);\n            // written_size\n            metrics.push(written_size_now);\n        } else {\n            // the cached value was ahead of us, report zero until we've caught up\n            metrics.push(written_size_delta_key.from_until(prev.0, up_to, 0));\n            // the cached value was ahead of us, report the same until we've caught up\n            metrics.push(NewRawMetric {\n                key: written_size_now.key,\n                kind: written_size_now.kind,\n                value: prev.1,\n            });\n        }\n\n        // Compute the branch-local written size.\n        let written_size_since_parent_key =\n            MetricsKey::written_size_since_parent(tenant_id, timeline_id);\n        metrics.push(\n            written_size_since_parent_key\n                .at(now, written_size_last.saturating_sub(self.ancestor_lsn.0)),\n        );\n\n        // Compute the branch-local PITR history size. Not emitted if GC hasn't yet computed the\n        // PITR cutoff. 0 if PITR is disabled.\n        let pitr_history_size_since_parent_key =\n            MetricsKey::pitr_history_size_since_parent(tenant_id, timeline_id);\n        if !self.pitr_enabled {\n            metrics.push(pitr_history_size_since_parent_key.at(now, 0));\n        } else if let Some(pitr_cutoff) = self.pitr_cutoff {\n            metrics.push(pitr_history_size_since_parent_key.at(\n                now,\n                written_size_last.saturating_sub(pitr_cutoff.max(self.ancestor_lsn).0),\n            ));\n        }\n\n        {\n            let factory = MetricsKey::timeline_logical_size(tenant_id, timeline_id);\n            let current_or_previous = self\n                .current_exact_logical_size\n                .or_else(|| cache.get(factory.key()).map(|item| item.value));\n\n            if let Some(size) = current_or_previous {\n                metrics.push(factory.at(now, size));\n            }\n        }\n    }\n}\n\n#[cfg(test)]\nmod tests;\n\n#[cfg(test)]\npub(crate) use tests::{metric_examples, metric_examples_old};\n"
  },
  {
    "path": "pageserver/src/consumption_metrics/upload.rs",
    "content": "use std::error::Error as _;\nuse std::time::SystemTime;\n\nuse chrono::{DateTime, Utc};\nuse consumption_metrics::{CHUNK_SIZE, Event, EventChunk, IdempotencyKey};\nuse remote_storage::{GenericRemoteStorage, RemotePath};\nuse tokio::io::AsyncWriteExt;\nuse tokio_util::sync::CancellationToken;\nuse tracing::Instrument;\nuse utils::id::{TenantId, TimelineId};\n\nuse super::metrics::Name;\nuse super::{Cache, MetricsKey, NewRawMetric, RawMetric};\n\n/// How the metrics from pageserver are identified.\n#[derive(serde::Serialize, serde::Deserialize, Debug, Clone, Copy, PartialEq)]\nstruct Ids {\n    pub(super) tenant_id: TenantId,\n    #[serde(skip_serializing_if = \"Option::is_none\")]\n    pub(super) timeline_id: Option<TimelineId>,\n}\n\n/// Serialize and write metrics to an HTTP endpoint\n#[tracing::instrument(skip_all, fields(metrics_total = %metrics.len()))]\npub(super) async fn upload_metrics_http(\n    client: &reqwest::Client,\n    metric_collection_endpoint: &reqwest::Url,\n    cancel: &CancellationToken,\n    metrics: &[NewRawMetric],\n    cached_metrics: &mut Cache,\n    idempotency_keys: &[IdempotencyKey<'_>],\n) -> anyhow::Result<()> {\n    let mut uploaded = 0;\n    let mut failed = 0;\n\n    let started_at = std::time::Instant::now();\n\n    let mut iter = serialize_in_chunks(CHUNK_SIZE, metrics, idempotency_keys);\n\n    while let Some(res) = iter.next() {\n        let (chunk, body) = res?;\n\n        let event_bytes = body.len();\n\n        let is_last = iter.len() == 0;\n\n        let res = upload(client, metric_collection_endpoint, body, cancel, is_last)\n            .instrument(tracing::info_span!(\n                \"upload\",\n                %event_bytes,\n                uploaded,\n                total = metrics.len(),\n            ))\n            .await;\n\n        match res {\n            Ok(()) => {\n                for item in chunk {\n                    cached_metrics.insert(item.key, item.clone());\n                }\n                uploaded += chunk.len();\n            }\n            Err(_) => {\n                // failure(s) have already been logged\n                //\n                // however this is an inconsistency: if we crash here, we will start with the\n                // values as uploaded. in practice, the rejections no longer happen.\n                failed += chunk.len();\n            }\n        }\n    }\n\n    let elapsed = started_at.elapsed();\n\n    tracing::info!(\n        uploaded,\n        failed,\n        elapsed_ms = elapsed.as_millis(),\n        \"done sending metrics\"\n    );\n\n    Ok(())\n}\n\n/// Serialize and write metrics to a remote storage object\n#[tracing::instrument(skip_all, fields(metrics_total = %metrics.len()))]\npub(super) async fn upload_metrics_bucket(\n    client: &GenericRemoteStorage,\n    cancel: &CancellationToken,\n    node_id: &str,\n    metrics: &[NewRawMetric],\n    idempotency_keys: &[IdempotencyKey<'_>],\n) -> anyhow::Result<()> {\n    if metrics.is_empty() {\n        // Skip uploads if we have no metrics, so that readers don't have to handle the edge case\n        // of an empty object.\n        return Ok(());\n    }\n\n    // Compose object path\n    let datetime: DateTime<Utc> = SystemTime::now().into();\n    let ts_prefix = datetime.format(\"year=%Y/month=%m/day=%d/hour=%H/%H:%M:%SZ\");\n    let path = RemotePath::from_string(&format!(\"{ts_prefix}_{node_id}.ndjson.gz\"))?;\n\n    // Set up a gzip writer into a buffer\n    let mut compressed_bytes: Vec<u8> = Vec::new();\n    let compressed_writer = std::io::Cursor::new(&mut compressed_bytes);\n    let mut gzip_writer = async_compression::tokio::write::GzipEncoder::new(compressed_writer);\n\n    // Serialize and write into compressed buffer\n    let started_at = std::time::Instant::now();\n    for res in serialize_in_chunks_ndjson(CHUNK_SIZE, metrics, idempotency_keys) {\n        let (_chunk, body) = res?;\n        gzip_writer.write_all(&body).await?;\n    }\n    gzip_writer.flush().await?;\n    gzip_writer.shutdown().await?;\n    let compressed_length = compressed_bytes.len();\n\n    // Write to remote storage\n    client\n        .upload_storage_object(\n            futures::stream::once(futures::future::ready(Ok(compressed_bytes.into()))),\n            compressed_length,\n            &path,\n            cancel,\n        )\n        .await?;\n    let elapsed = started_at.elapsed();\n\n    tracing::info!(\n        compressed_length,\n        elapsed_ms = elapsed.as_millis(),\n        \"write metrics bucket at {path}\",\n    );\n\n    Ok(())\n}\n\n/// Serializes the input metrics as JSON in chunks of chunk_size. The provided\n/// idempotency keys are injected into the corresponding metric events (reused\n/// across different metrics sinks), and must have the same length as input.\nfn serialize_in_chunks<'a>(\n    chunk_size: usize,\n    input: &'a [NewRawMetric],\n    idempotency_keys: &'a [IdempotencyKey<'a>],\n) -> impl ExactSizeIterator<Item = Result<(&'a [NewRawMetric], bytes::Bytes), serde_json::Error>> + 'a\n{\n    use bytes::BufMut;\n\n    assert_eq!(input.len(), idempotency_keys.len());\n\n    struct Iter<'a> {\n        inner: std::slice::Chunks<'a, NewRawMetric>,\n        idempotency_keys: std::slice::Iter<'a, IdempotencyKey<'a>>,\n        chunk_size: usize,\n\n        // write to a BytesMut so that we can cheaply clone the frozen Bytes for retries\n        buffer: bytes::BytesMut,\n        // chunk amount of events are reused to produce the serialized document\n        scratch: Vec<Event<Ids, Name>>,\n    }\n\n    impl<'a> Iterator for Iter<'a> {\n        type Item = Result<(&'a [NewRawMetric], bytes::Bytes), serde_json::Error>;\n\n        fn next(&mut self) -> Option<Self::Item> {\n            let chunk = self.inner.next()?;\n\n            if self.scratch.is_empty() {\n                // first round: create events with N strings\n                self.scratch.extend(\n                    chunk\n                        .iter()\n                        .zip(&mut self.idempotency_keys)\n                        .map(|(raw_metric, key)| raw_metric.as_event(key)),\n                );\n            } else {\n                // next rounds: update_in_place to reuse allocations\n                assert_eq!(self.scratch.len(), self.chunk_size);\n                itertools::izip!(self.scratch.iter_mut(), chunk, &mut self.idempotency_keys)\n                    .for_each(|(slot, raw_metric, key)| raw_metric.update_in_place(slot, key));\n            }\n\n            let res = serde_json::to_writer(\n                (&mut self.buffer).writer(),\n                &EventChunk {\n                    events: (&self.scratch[..chunk.len()]).into(),\n                },\n            );\n\n            match res {\n                Ok(()) => Some(Ok((chunk, self.buffer.split().freeze()))),\n                Err(e) => Some(Err(e)),\n            }\n        }\n\n        fn size_hint(&self) -> (usize, Option<usize>) {\n            self.inner.size_hint()\n        }\n    }\n\n    impl ExactSizeIterator for Iter<'_> {}\n\n    let buffer = bytes::BytesMut::new();\n    let inner = input.chunks(chunk_size);\n    let idempotency_keys = idempotency_keys.iter();\n    let scratch = Vec::new();\n\n    Iter {\n        inner,\n        idempotency_keys,\n        chunk_size,\n        buffer,\n        scratch,\n    }\n}\n\n/// Serializes the input metrics as NDJSON in chunks of chunk_size. Each event\n/// is serialized as a separate JSON object on its own line. The provided\n/// idempotency keys are injected into the corresponding metric events (reused\n/// across different metrics sinks), and must have the same length as input.\nfn serialize_in_chunks_ndjson<'a>(\n    chunk_size: usize,\n    input: &'a [NewRawMetric],\n    idempotency_keys: &'a [IdempotencyKey<'a>],\n) -> impl ExactSizeIterator<Item = Result<(&'a [NewRawMetric], bytes::Bytes), serde_json::Error>> + 'a\n{\n    use bytes::BufMut;\n\n    assert_eq!(input.len(), idempotency_keys.len());\n\n    struct Iter<'a> {\n        inner: std::slice::Chunks<'a, NewRawMetric>,\n        idempotency_keys: std::slice::Iter<'a, IdempotencyKey<'a>>,\n        chunk_size: usize,\n\n        // write to a BytesMut so that we can cheaply clone the frozen Bytes for retries\n        buffer: bytes::BytesMut,\n        // chunk amount of events are reused to produce the serialized document\n        scratch: Vec<Event<Ids, Name>>,\n    }\n\n    impl<'a> Iterator for Iter<'a> {\n        type Item = Result<(&'a [NewRawMetric], bytes::Bytes), serde_json::Error>;\n\n        fn next(&mut self) -> Option<Self::Item> {\n            let chunk = self.inner.next()?;\n\n            if self.scratch.is_empty() {\n                // first round: create events with N strings\n                self.scratch.extend(\n                    chunk\n                        .iter()\n                        .zip(&mut self.idempotency_keys)\n                        .map(|(raw_metric, key)| raw_metric.as_event(key)),\n                );\n            } else {\n                // next rounds: update_in_place to reuse allocations\n                assert_eq!(self.scratch.len(), self.chunk_size);\n                itertools::izip!(self.scratch.iter_mut(), chunk, &mut self.idempotency_keys)\n                    .for_each(|(slot, raw_metric, key)| raw_metric.update_in_place(slot, key));\n            }\n\n            // Serialize each event as NDJSON (one JSON object per line)\n            for event in self.scratch[..chunk.len()].iter() {\n                let res = serde_json::to_writer((&mut self.buffer).writer(), event);\n                if let Err(e) = res {\n                    return Some(Err(e));\n                }\n                // Add newline after each event to follow NDJSON format\n                self.buffer.put_u8(b'\\n');\n            }\n\n            Some(Ok((chunk, self.buffer.split().freeze())))\n        }\n\n        fn size_hint(&self) -> (usize, Option<usize>) {\n            self.inner.size_hint()\n        }\n    }\n\n    impl ExactSizeIterator for Iter<'_> {}\n\n    let buffer = bytes::BytesMut::new();\n    let inner = input.chunks(chunk_size);\n    let idempotency_keys = idempotency_keys.iter();\n    let scratch = Vec::new();\n\n    Iter {\n        inner,\n        idempotency_keys,\n        chunk_size,\n        buffer,\n        scratch,\n    }\n}\n\ntrait RawMetricExt {\n    fn as_event(&self, key: &IdempotencyKey<'_>) -> Event<Ids, Name>;\n    fn update_in_place(&self, event: &mut Event<Ids, Name>, key: &IdempotencyKey<'_>);\n}\n\nimpl RawMetricExt for RawMetric {\n    fn as_event(&self, key: &IdempotencyKey<'_>) -> Event<Ids, Name> {\n        let MetricsKey {\n            metric,\n            tenant_id,\n            timeline_id,\n        } = self.0;\n\n        let (kind, value) = self.1;\n\n        Event {\n            kind,\n            metric,\n            idempotency_key: key.to_string(),\n            value,\n            extra: Ids {\n                tenant_id,\n                timeline_id,\n            },\n        }\n    }\n\n    fn update_in_place(&self, event: &mut Event<Ids, Name>, key: &IdempotencyKey<'_>) {\n        use std::fmt::Write;\n\n        let MetricsKey {\n            metric,\n            tenant_id,\n            timeline_id,\n        } = self.0;\n\n        let (kind, value) = self.1;\n\n        *event = Event {\n            kind,\n            metric,\n            idempotency_key: {\n                event.idempotency_key.clear();\n                write!(event.idempotency_key, \"{key}\").unwrap();\n                std::mem::take(&mut event.idempotency_key)\n            },\n            value,\n            extra: Ids {\n                tenant_id,\n                timeline_id,\n            },\n        };\n    }\n}\n\nimpl RawMetricExt for NewRawMetric {\n    fn as_event(&self, key: &IdempotencyKey<'_>) -> Event<Ids, Name> {\n        let MetricsKey {\n            metric,\n            tenant_id,\n            timeline_id,\n        } = self.key;\n\n        let kind = self.kind;\n        let value = self.value;\n\n        Event {\n            kind,\n            metric,\n            idempotency_key: key.to_string(),\n            value,\n            extra: Ids {\n                tenant_id,\n                timeline_id,\n            },\n        }\n    }\n\n    fn update_in_place(&self, event: &mut Event<Ids, Name>, key: &IdempotencyKey<'_>) {\n        use std::fmt::Write;\n\n        let MetricsKey {\n            metric,\n            tenant_id,\n            timeline_id,\n        } = self.key;\n\n        let kind = self.kind;\n        let value = self.value;\n\n        *event = Event {\n            kind,\n            metric,\n            idempotency_key: {\n                event.idempotency_key.clear();\n                write!(event.idempotency_key, \"{key}\").unwrap();\n                std::mem::take(&mut event.idempotency_key)\n            },\n            value,\n            extra: Ids {\n                tenant_id,\n                timeline_id,\n            },\n        };\n    }\n}\n\npub(crate) trait KeyGen<'a> {\n    fn generate(&self) -> IdempotencyKey<'a>;\n}\n\nimpl<'a> KeyGen<'a> for &'a str {\n    fn generate(&self) -> IdempotencyKey<'a> {\n        IdempotencyKey::generate(self)\n    }\n}\n\nenum UploadError {\n    Rejected(reqwest::StatusCode),\n    Reqwest(reqwest::Error),\n    Cancelled,\n}\n\nimpl std::fmt::Debug for UploadError {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        // use same impl because backoff::retry will log this using both\n        std::fmt::Display::fmt(self, f)\n    }\n}\n\nimpl std::fmt::Display for UploadError {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        use UploadError::*;\n\n        match self {\n            Rejected(code) => write!(f, \"server rejected the metrics with {code}\"),\n            Reqwest(e) => write!(\n                f,\n                \"request failed: {e}{}\",\n                e.source().map(|e| format!(\": {e}\")).unwrap_or_default()\n            ),\n            Cancelled => write!(f, \"cancelled\"),\n        }\n    }\n}\n\nimpl UploadError {\n    fn is_reject(&self) -> bool {\n        matches!(self, UploadError::Rejected(_))\n    }\n}\n\n// this is consumed by the test verifiers\nstatic LAST_IN_BATCH: reqwest::header::HeaderName =\n    reqwest::header::HeaderName::from_static(\"pageserver-metrics-last-upload-in-batch\");\n\nasync fn upload(\n    client: &reqwest::Client,\n    metric_collection_endpoint: &reqwest::Url,\n    body: bytes::Bytes,\n    cancel: &CancellationToken,\n    is_last: bool,\n) -> Result<(), UploadError> {\n    let warn_after = 3;\n    let max_attempts = 10;\n\n    // this is used only with tests so far\n    let last_value = if is_last { \"true\" } else { \"false\" };\n\n    let res = utils::backoff::retry(\n        || async {\n            let res = client\n                .post(metric_collection_endpoint.clone())\n                .header(reqwest::header::CONTENT_TYPE, \"application/json\")\n                .header(LAST_IN_BATCH.clone(), last_value)\n                .body(body.clone())\n                .send()\n                .await;\n\n            let res = res.and_then(|res| res.error_for_status());\n\n            // 10 redirects are normally allowed, so we don't need worry about 3xx\n            match res {\n                Ok(_response) => Ok(()),\n                Err(e) => {\n                    let status = e.status().filter(|s| s.is_client_error());\n                    if let Some(status) = status {\n                        // rejection used to be a thing when the server could reject a\n                        // whole batch of metrics if one metric was bad.\n                        Err(UploadError::Rejected(status))\n                    } else {\n                        Err(UploadError::Reqwest(e))\n                    }\n                }\n            }\n        },\n        UploadError::is_reject,\n        warn_after,\n        max_attempts,\n        \"upload consumption_metrics\",\n        cancel,\n    )\n    .await\n    .ok_or_else(|| UploadError::Cancelled)\n    .and_then(|x| x);\n\n    match &res {\n        Ok(_) => {}\n        Err(e) if e.is_reject() => {\n            // permanent errors currently do not get logged by backoff::retry\n            // display alternate has no effect, but keeping it here for easier pattern matching.\n            tracing::error!(\"failed to upload metrics: {e:#}\");\n        }\n        Err(_) => {\n            // these have been logged already\n        }\n    }\n\n    res\n}\n\n#[cfg(test)]\nmod tests {\n    use chrono::{DateTime, Utc};\n    use once_cell::sync::Lazy;\n\n    use super::*;\n    use crate::consumption_metrics::NewMetricsRefRoot;\n    use crate::consumption_metrics::disk_cache::read_metrics_from_serde_value;\n\n    #[test]\n    fn chunked_serialization() {\n        let examples = metric_samples();\n        assert!(examples.len() > 1);\n\n        let now = Utc::now();\n        let idempotency_keys = (0..examples.len())\n            .map(|i| FixedGen::new(now, \"1\", i as u16).generate())\n            .collect::<Vec<_>>();\n\n        // need to use Event here because serde_json::Value uses default hashmap, not linked\n        // hashmap\n        #[derive(serde::Deserialize)]\n        struct EventChunk {\n            events: Vec<Event<Ids, Name>>,\n        }\n\n        let correct = serialize_in_chunks(examples.len(), &examples, &idempotency_keys)\n            .map(|res| res.unwrap().1)\n            .flat_map(|body| serde_json::from_slice::<EventChunk>(&body).unwrap().events)\n            .collect::<Vec<_>>();\n\n        for chunk_size in 1..examples.len() {\n            let actual = serialize_in_chunks(chunk_size, &examples, &idempotency_keys)\n                .map(|res| res.unwrap().1)\n                .flat_map(|body| serde_json::from_slice::<EventChunk>(&body).unwrap().events)\n                .collect::<Vec<_>>();\n\n            // if these are equal, it means that multi-chunking version works as well\n            assert_eq!(correct, actual);\n        }\n    }\n\n    #[test]\n    fn chunked_serialization_ndjson() {\n        let examples = metric_samples();\n        assert!(examples.len() > 1);\n\n        let now = Utc::now();\n        let idempotency_keys = (0..examples.len())\n            .map(|i| FixedGen::new(now, \"1\", i as u16).generate())\n            .collect::<Vec<_>>();\n\n        // Parse NDJSON format - each line is a separate JSON object\n        let parse_ndjson = |body: &[u8]| -> Vec<Event<Ids, Name>> {\n            let body_str = std::str::from_utf8(body).unwrap();\n            body_str\n                .trim_end_matches('\\n')\n                .lines()\n                .filter(|line| !line.is_empty())\n                .map(|line| serde_json::from_str::<Event<Ids, Name>>(line).unwrap())\n                .collect()\n        };\n\n        let correct = serialize_in_chunks_ndjson(examples.len(), &examples, &idempotency_keys)\n            .map(|res| res.unwrap().1)\n            .flat_map(|body| parse_ndjson(&body))\n            .collect::<Vec<_>>();\n\n        for chunk_size in 1..examples.len() {\n            let actual = serialize_in_chunks_ndjson(chunk_size, &examples, &idempotency_keys)\n                .map(|res| res.unwrap().1)\n                .flat_map(|body| parse_ndjson(&body))\n                .collect::<Vec<_>>();\n\n            // if these are equal, it means that multi-chunking version works as well\n            assert_eq!(correct, actual);\n        }\n    }\n\n    #[derive(Clone, Copy)]\n    struct FixedGen<'a>(chrono::DateTime<chrono::Utc>, &'a str, u16);\n\n    impl<'a> FixedGen<'a> {\n        fn new(now: chrono::DateTime<chrono::Utc>, node_id: &'a str, nonce: u16) -> Self {\n            FixedGen(now, node_id, nonce)\n        }\n    }\n\n    impl<'a> KeyGen<'a> for FixedGen<'a> {\n        fn generate(&self) -> IdempotencyKey<'a> {\n            IdempotencyKey::for_tests(self.0, self.1, self.2)\n        }\n    }\n\n    static SAMPLES_NOW: Lazy<DateTime<Utc>> = Lazy::new(|| {\n        DateTime::parse_from_rfc3339(\"2023-09-15T00:00:00.123456789Z\")\n            .unwrap()\n            .into()\n    });\n\n    #[test]\n    fn metric_image_stability() {\n        // it is important that these strings stay as they are\n\n        let examples = [\n            (\n                line!(),\n                r#\"{\"type\":\"absolute\",\"time\":\"2023-09-15T00:00:00.123456789Z\",\"metric\":\"written_size\",\"idempotency_key\":\"2023-09-15 00:00:00.123456789 UTC-1-0000\",\"value\":0,\"tenant_id\":\"00000000000000000000000000000000\",\"timeline_id\":\"ffffffffffffffffffffffffffffffff\"}\"#,\n            ),\n            (\n                line!(),\n                r#\"{\"type\":\"incremental\",\"start_time\":\"2023-09-14T00:00:00.123456789Z\",\"stop_time\":\"2023-09-15T00:00:00.123456789Z\",\"metric\":\"written_data_bytes_delta\",\"idempotency_key\":\"2023-09-15 00:00:00.123456789 UTC-1-0000\",\"value\":0,\"tenant_id\":\"00000000000000000000000000000000\",\"timeline_id\":\"ffffffffffffffffffffffffffffffff\"}\"#,\n            ),\n            (\n                line!(),\n                r#\"{\"type\":\"absolute\",\"time\":\"2023-09-15T00:00:00.123456789Z\",\"metric\":\"written_size_since_parent\",\"idempotency_key\":\"2023-09-15 00:00:00.123456789 UTC-1-0000\",\"value\":0,\"tenant_id\":\"00000000000000000000000000000000\",\"timeline_id\":\"ffffffffffffffffffffffffffffffff\"}\"#,\n            ),\n            (\n                line!(),\n                r#\"{\"type\":\"absolute\",\"time\":\"2023-09-15T00:00:00.123456789Z\",\"metric\":\"pitr_history_size_since_parent\",\"idempotency_key\":\"2023-09-15 00:00:00.123456789 UTC-1-0000\",\"value\":0,\"tenant_id\":\"00000000000000000000000000000000\",\"timeline_id\":\"ffffffffffffffffffffffffffffffff\"}\"#,\n            ),\n            (\n                line!(),\n                r#\"{\"type\":\"absolute\",\"time\":\"2023-09-15T00:00:00.123456789Z\",\"metric\":\"timeline_logical_size\",\"idempotency_key\":\"2023-09-15 00:00:00.123456789 UTC-1-0000\",\"value\":0,\"tenant_id\":\"00000000000000000000000000000000\",\"timeline_id\":\"ffffffffffffffffffffffffffffffff\"}\"#,\n            ),\n            (\n                line!(),\n                r#\"{\"type\":\"absolute\",\"time\":\"2023-09-15T00:00:00.123456789Z\",\"metric\":\"remote_storage_size\",\"idempotency_key\":\"2023-09-15 00:00:00.123456789 UTC-1-0000\",\"value\":0,\"tenant_id\":\"00000000000000000000000000000000\"}\"#,\n            ),\n            (\n                line!(),\n                r#\"{\"type\":\"absolute\",\"time\":\"2023-09-15T00:00:00.123456789Z\",\"metric\":\"synthetic_storage_size\",\"idempotency_key\":\"2023-09-15 00:00:00.123456789 UTC-1-0000\",\"value\":1,\"tenant_id\":\"00000000000000000000000000000000\"}\"#,\n            ),\n        ];\n\n        let idempotency_key = consumption_metrics::IdempotencyKey::for_tests(*SAMPLES_NOW, \"1\", 0);\n        let examples = examples.into_iter().zip(metric_samples());\n\n        for ((line, expected), item) in examples {\n            let e = consumption_metrics::Event {\n                kind: item.kind,\n                metric: item.key.metric,\n                idempotency_key: idempotency_key.to_string(),\n                value: item.value,\n                extra: Ids {\n                    tenant_id: item.key.tenant_id,\n                    timeline_id: item.key.timeline_id,\n                },\n            };\n            let actual = serde_json::to_string(&e).unwrap();\n            assert_eq!(\n                expected, actual,\n                \"example for {:?} from line {line}\",\n                item.kind\n            );\n        }\n    }\n\n    #[test]\n    fn disk_format_upgrade() {\n        let old_samples_json = serde_json::to_value(metric_samples_old()).unwrap();\n        let new_samples =\n            serde_json::to_value(NewMetricsRefRoot::new(metric_samples().as_ref())).unwrap();\n        let upgraded_samples = read_metrics_from_serde_value(old_samples_json).unwrap();\n        let new_samples = read_metrics_from_serde_value(new_samples).unwrap();\n        assert_eq!(upgraded_samples, new_samples);\n    }\n\n    fn metric_samples_old() -> [RawMetric; 7] {\n        let tenant_id = TenantId::from_array([0; 16]);\n        let timeline_id = TimelineId::from_array([0xff; 16]);\n\n        let before = DateTime::parse_from_rfc3339(\"2023-09-14T00:00:00.123456789Z\")\n            .unwrap()\n            .into();\n        let [now, before] = [*SAMPLES_NOW, before];\n\n        super::super::metrics::metric_examples_old(tenant_id, timeline_id, now, before)\n    }\n\n    fn metric_samples() -> [NewRawMetric; 7] {\n        let tenant_id = TenantId::from_array([0; 16]);\n        let timeline_id = TimelineId::from_array([0xff; 16]);\n\n        let before = DateTime::parse_from_rfc3339(\"2023-09-14T00:00:00.123456789Z\")\n            .unwrap()\n            .into();\n        let [now, before] = [*SAMPLES_NOW, before];\n\n        super::super::metrics::metric_examples(tenant_id, timeline_id, now, before)\n    }\n}\n"
  },
  {
    "path": "pageserver/src/consumption_metrics.rs",
    "content": "//! Periodically collect consumption metrics for all active tenants\n//! and push them to a HTTP endpoint.\nuse std::collections::HashMap;\nuse std::sync::Arc;\nuse std::time::{Duration, SystemTime};\n\nuse camino::Utf8PathBuf;\nuse consumption_metrics::EventType;\nuse itertools::Itertools as _;\nuse pageserver_api::models::TenantState;\nuse remote_storage::{GenericRemoteStorage, RemoteStorageConfig};\nuse reqwest::Url;\nuse serde::{Deserialize, Serialize};\nuse tokio::time::Instant;\nuse tokio_util::sync::CancellationToken;\nuse tracing::*;\nuse utils::id::NodeId;\n\nuse crate::config::PageServerConf;\nuse crate::consumption_metrics::metrics::MetricsKey;\nuse crate::consumption_metrics::upload::KeyGen as _;\nuse crate::context::{DownloadBehavior, RequestContext};\nuse crate::task_mgr::{self, BACKGROUND_RUNTIME, TaskKind};\nuse crate::tenant::mgr::TenantManager;\nuse crate::tenant::size::CalculateSyntheticSizeError;\nuse crate::tenant::tasks::BackgroundLoopKind;\nuse crate::tenant::{LogicalSizeCalculationCause, TenantShard};\n\nmod disk_cache;\nmod metrics;\nmod upload;\n\nconst DEFAULT_HTTP_REPORTING_TIMEOUT: Duration = Duration::from_secs(60);\n\n/// Basically a key-value pair, but usually in a Vec except for [`Cache`].\n///\n/// This is as opposed to `consumption_metrics::Event` which is the externally communicated form.\n/// Difference is basically the missing idempotency key, which lives only for the duration of\n/// upload attempts.\ntype RawMetric = (MetricsKey, (EventType, u64));\n\n/// The new serializable metrics format\n#[derive(Serialize, Deserialize)]\nstruct NewMetricsRoot {\n    version: usize,\n    metrics: Vec<NewRawMetric>,\n}\n\nimpl NewMetricsRoot {\n    pub fn is_v2_metrics(json_value: &serde_json::Value) -> bool {\n        if let Some(ver) = json_value.get(\"version\") {\n            if let Some(2) = ver.as_u64() {\n                return true;\n            }\n        }\n        false\n    }\n}\n\n/// The new serializable metrics format\n#[derive(Serialize)]\nstruct NewMetricsRefRoot<'a> {\n    version: usize,\n    metrics: &'a [NewRawMetric],\n}\n\nimpl<'a> NewMetricsRefRoot<'a> {\n    fn new(metrics: &'a [NewRawMetric]) -> Self {\n        Self {\n            version: 2,\n            metrics,\n        }\n    }\n}\n\n/// The new serializable metrics format\n#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]\nstruct NewRawMetric {\n    key: MetricsKey,\n    kind: EventType,\n    value: u64,\n    // TODO: add generation field and check against generations\n}\n\nimpl NewRawMetric {\n    #[cfg(test)]\n    fn to_kv_pair(&self) -> (MetricsKey, NewRawMetric) {\n        (self.key, self.clone())\n    }\n}\n\n/// Caches the [`RawMetric`]s\n///\n/// In practice, during startup, last sent values are stored here to be used in calculating new\n/// ones. After successful uploading, the cached values are updated to cache. This used to be used\n/// for deduplication, but that is no longer needed.\ntype Cache = HashMap<MetricsKey, NewRawMetric>;\n\npub async fn run(\n    conf: &'static PageServerConf,\n    tenant_manager: Arc<TenantManager>,\n    cancel: CancellationToken,\n) {\n    let Some(metric_collection_endpoint) = conf.metric_collection_endpoint.as_ref() else {\n        return;\n    };\n\n    let local_disk_storage = conf.workdir.join(\"last_consumption_metrics.json\");\n\n    let metrics_ctx = RequestContext::todo_child(\n        TaskKind::MetricsCollection,\n        // This task itself shouldn't download anything.\n        // The actual size calculation does need downloads, and\n        // creates a child context with the right DownloadBehavior.\n        DownloadBehavior::Error,\n    );\n    let collect_metrics = BACKGROUND_RUNTIME.spawn(task_mgr::exit_on_panic_or_error(\n        \"consumption metrics collection\",\n        collect_metrics(\n            tenant_manager.clone(),\n            metric_collection_endpoint,\n            &conf.metric_collection_bucket,\n            conf.metric_collection_interval,\n            conf.id,\n            local_disk_storage,\n            cancel.clone(),\n            metrics_ctx,\n        )\n        .instrument(info_span!(\"metrics_collection\")),\n    ));\n\n    let worker_ctx =\n        RequestContext::todo_child(TaskKind::CalculateSyntheticSize, DownloadBehavior::Download);\n    let synthetic_size_worker = BACKGROUND_RUNTIME.spawn(task_mgr::exit_on_panic_or_error(\n        \"synthetic size calculation\",\n        calculate_synthetic_size_worker(\n            tenant_manager.clone(),\n            conf.synthetic_size_calculation_interval,\n            cancel.clone(),\n            worker_ctx,\n        )\n        .instrument(info_span!(\"synthetic_size_worker\")),\n    ));\n\n    let (collect_metrics, synthetic_size_worker) =\n        futures::future::join(collect_metrics, synthetic_size_worker).await;\n    collect_metrics\n        .expect(\"unreachable: exit_on_panic_or_error would catch the panic and exit the process\");\n    synthetic_size_worker\n        .expect(\"unreachable: exit_on_panic_or_error would catch the panic and exit the process\");\n}\n\n/// Main thread that serves metrics collection\n#[allow(clippy::too_many_arguments)]\nasync fn collect_metrics(\n    tenant_manager: Arc<TenantManager>,\n    metric_collection_endpoint: &Url,\n    metric_collection_bucket: &Option<RemoteStorageConfig>,\n    metric_collection_interval: Duration,\n    node_id: NodeId,\n    local_disk_storage: Utf8PathBuf,\n    cancel: CancellationToken,\n    ctx: RequestContext,\n) -> anyhow::Result<()> {\n    let path: Arc<Utf8PathBuf> = Arc::new(local_disk_storage);\n\n    let restore_and_reschedule = restore_and_reschedule(&path, metric_collection_interval);\n\n    let mut cached_metrics = tokio::select! {\n        _ = cancel.cancelled() => return Ok(()),\n        ret = restore_and_reschedule => ret,\n    };\n\n    // define client here to reuse it for all requests\n    let client = reqwest::ClientBuilder::new()\n        .timeout(DEFAULT_HTTP_REPORTING_TIMEOUT)\n        .build()\n        .expect(\"Failed to create http client with timeout\");\n\n    let bucket_client = if let Some(bucket_config) = metric_collection_bucket {\n        match GenericRemoteStorage::from_config(bucket_config).await {\n            Ok(client) => Some(client),\n            Err(e) => {\n                // Non-fatal error: if we were given an invalid config, we will proceed\n                // with sending metrics over the network, but not to S3.\n                tracing::warn!(\"Invalid configuration for metric_collection_bucket: {e}\");\n                None\n            }\n        }\n    } else {\n        None\n    };\n\n    let node_id = node_id.to_string();\n\n    loop {\n        let started_at = Instant::now();\n\n        // these are point in time, with variable \"now\"\n        let metrics = metrics::collect_all_metrics(&tenant_manager, &cached_metrics, &ctx).await;\n\n        // Pre-generate event idempotency keys, to reuse them across the bucket\n        // and HTTP sinks.\n        let idempotency_keys = std::iter::repeat_with(|| node_id.as_str().generate())\n            .take(metrics.len())\n            .collect_vec();\n\n        let metrics = Arc::new(metrics);\n\n        // why not race cancellation here? because we are one of the last tasks, and if we are\n        // already here, better to try to flush the new values.\n\n        let flush = async {\n            match disk_cache::flush_metrics_to_disk(&metrics, &path).await {\n                Ok(()) => {\n                    tracing::debug!(\"flushed metrics to disk\");\n                }\n                Err(e) => {\n                    // idea here is that if someone creates a directory as our path, then they\n                    // might notice it from the logs before shutdown and remove it\n                    tracing::error!(\"failed to persist metrics to {path:?}: {e:#}\");\n                }\n            }\n\n            if let Some(bucket_client) = &bucket_client {\n                let res = upload::upload_metrics_bucket(\n                    bucket_client,\n                    &cancel,\n                    &node_id,\n                    &metrics,\n                    &idempotency_keys,\n                )\n                .await;\n                if let Err(e) = res {\n                    tracing::error!(\"failed to upload to remote storage: {e:#}\");\n                }\n            }\n        };\n\n        let upload = async {\n            let res = upload::upload_metrics_http(\n                &client,\n                metric_collection_endpoint,\n                &cancel,\n                &metrics,\n                &mut cached_metrics,\n                &idempotency_keys,\n            )\n            .await;\n            if let Err(e) = res {\n                // serialization error which should never happen\n                tracing::error!(\"failed to upload via HTTP due to {e:#}\");\n            }\n        };\n\n        // let these run concurrently\n        let (_, _) = tokio::join!(flush, upload);\n\n        crate::tenant::tasks::warn_when_period_overrun(\n            started_at.elapsed(),\n            metric_collection_interval,\n            BackgroundLoopKind::ConsumptionMetricsCollectMetrics,\n        );\n\n        let res =\n            tokio::time::timeout_at(started_at + metric_collection_interval, cancel.cancelled())\n                .await;\n        if res.is_ok() {\n            return Ok(());\n        }\n    }\n}\n\n/// Called on the first iteration in an attempt to join the metric uploading schedule from previous\n/// pageserver session. Pageserver is supposed to upload at intervals regardless of restarts.\n///\n/// Cancellation safe.\nasync fn restore_and_reschedule(\n    path: &Arc<Utf8PathBuf>,\n    metric_collection_interval: Duration,\n) -> Cache {\n    let (cached, earlier_metric_at) = match disk_cache::read_metrics_from_disk(path.clone()).await {\n        Ok(found_some) => {\n            // there is no min needed because we write these sequentially in\n            // collect_all_metrics\n            let earlier_metric_at = found_some\n                .iter()\n                .map(|item| item.kind.recorded_at())\n                .copied()\n                .next();\n\n            let cached = found_some\n                .into_iter()\n                .map(|item| (item.key, item))\n                .collect::<Cache>();\n\n            (cached, earlier_metric_at)\n        }\n        Err(e) => {\n            use std::io::{Error, ErrorKind};\n\n            let root = e.root_cause();\n            let maybe_ioerr = root.downcast_ref::<Error>();\n            let is_not_found = maybe_ioerr.is_some_and(|e| e.kind() == ErrorKind::NotFound);\n\n            if !is_not_found {\n                tracing::info!(\"failed to read any previous metrics from {path:?}: {e:#}\");\n            }\n\n            (HashMap::new(), None)\n        }\n    };\n\n    if let Some(earlier_metric_at) = earlier_metric_at {\n        let earlier_metric_at: SystemTime = earlier_metric_at.into();\n\n        let error = reschedule(earlier_metric_at, metric_collection_interval).await;\n\n        if let Some(error) = error {\n            if error.as_secs() >= 60 {\n                tracing::info!(\n                    error_ms = error.as_millis(),\n                    \"startup scheduling error due to restart\"\n                )\n            }\n        }\n    }\n\n    cached\n}\n\nasync fn reschedule(\n    earlier_metric_at: SystemTime,\n    metric_collection_interval: Duration,\n) -> Option<Duration> {\n    let now = SystemTime::now();\n    match now.duration_since(earlier_metric_at) {\n        Ok(from_last_send) if from_last_send < metric_collection_interval => {\n            let sleep_for = metric_collection_interval - from_last_send;\n\n            let deadline = std::time::Instant::now() + sleep_for;\n\n            tokio::time::sleep_until(deadline.into()).await;\n\n            let now = std::time::Instant::now();\n\n            // executor threads might be busy, add extra measurements\n            Some(if now < deadline {\n                deadline - now\n            } else {\n                now - deadline\n            })\n        }\n        Ok(from_last_send) => Some(from_last_send.saturating_sub(metric_collection_interval)),\n        Err(_) => {\n            tracing::warn!(\n                ?now,\n                ?earlier_metric_at,\n                \"oldest recorded metric is in future; first values will come out with inconsistent timestamps\"\n            );\n            earlier_metric_at.duration_since(now).ok()\n        }\n    }\n}\n\n/// Caclculate synthetic size for each active tenant\nasync fn calculate_synthetic_size_worker(\n    tenant_manager: Arc<TenantManager>,\n    synthetic_size_calculation_interval: Duration,\n    cancel: CancellationToken,\n    ctx: RequestContext,\n) -> anyhow::Result<()> {\n    info!(\"starting calculate_synthetic_size_worker\");\n    scopeguard::defer! {\n        info!(\"calculate_synthetic_size_worker stopped\");\n    };\n\n    loop {\n        let started_at = Instant::now();\n\n        let tenants = match tenant_manager.list_tenants() {\n            Ok(tenants) => tenants,\n            Err(e) => {\n                warn!(\"cannot get tenant list: {e:#}\");\n                continue;\n            }\n        };\n\n        for (tenant_shard_id, tenant_state, _gen) in tenants {\n            if tenant_state != TenantState::Active {\n                continue;\n            }\n\n            if !tenant_shard_id.is_shard_zero() {\n                // We only send consumption metrics from shard 0, so don't waste time calculating\n                // synthetic size on other shards.\n                continue;\n            }\n\n            let Ok(tenant) = tenant_manager.get_attached_tenant_shard(tenant_shard_id) else {\n                continue;\n            };\n\n            if !tenant.is_active() {\n                continue;\n            }\n\n            // there is never any reason to exit calculate_synthetic_size_worker following any\n            // return value -- we don't need to care about shutdown because no tenant is found when\n            // pageserver is shut down.\n            calculate_and_log(&tenant, &cancel, &ctx).await;\n        }\n\n        crate::tenant::tasks::warn_when_period_overrun(\n            started_at.elapsed(),\n            synthetic_size_calculation_interval,\n            BackgroundLoopKind::ConsumptionMetricsSyntheticSizeWorker,\n        );\n\n        let res = tokio::time::timeout_at(\n            started_at + synthetic_size_calculation_interval,\n            cancel.cancelled(),\n        )\n        .await;\n        if res.is_ok() {\n            return Ok(());\n        }\n    }\n}\n\nasync fn calculate_and_log(tenant: &TenantShard, cancel: &CancellationToken, ctx: &RequestContext) {\n    const CAUSE: LogicalSizeCalculationCause =\n        LogicalSizeCalculationCause::ConsumptionMetricsSyntheticSize;\n\n    // TODO should we use concurrent_background_tasks_rate_limit() here, like the other background tasks?\n    // We can put in some prioritization for consumption metrics.\n    // Same for the loop that fetches computed metrics.\n    // By using the same limiter, we centralize metrics collection for \"start\" and \"finished\" counters,\n    // which turns out is really handy to understand the system.\n    match tenant.calculate_synthetic_size(CAUSE, cancel, ctx).await {\n        Ok(_) => {}\n        Err(CalculateSyntheticSizeError::Cancelled) => {}\n        Err(e) => {\n            let tenant_shard_id = tenant.tenant_shard_id();\n            error!(\"failed to calculate synthetic size for tenant {tenant_shard_id}: {e:#}\");\n        }\n    }\n}\n"
  },
  {
    "path": "pageserver/src/context.rs",
    "content": "//! Defines [`RequestContext`].\n//!\n//! It is a structure that we use throughout the pageserver to propagate\n//! high-level context from places that _originate_ activity down to the\n//! shared code paths at the heart of the pageserver. It's inspired by\n//! Golang's `context.Context`.\n//!\n//! For example, in `Timeline::get(page_nr, lsn)` we need to answer the following questions:\n//! 1. What high-level activity ([`TaskKind`]) needs this page?\n//!    We need that information as a categorical dimension for page access\n//!    statistics, which we, in turn, need to guide layer eviction policy design.\n//! 2. How should we behave if, to produce the page image, we need to\n//!    on-demand download a layer file ([`DownloadBehavior`]).\n//!\n//! [`RequestContext`] satisfies those needs.\n//! The current implementation is a small `struct` that is passed through\n//! the call chain by reference.\n//!\n//! ### Future Work\n//!\n//! However, we do not intend to stop here, since there are other needs that\n//! require carrying information from high to low levels of the app.\n//!\n//! Most importantly, **cancellation signaling** in response to\n//! 1. timeouts (page_service max response time) and\n//! 2. lifecycle requests (detach tenant, delete timeline).\n//!\n//! Related to that, there is sometimes a need to ensure that all tokio tasks spawned\n//! by the transitive callees of a request have finished. The keyword here\n//! is **Structured Concurrency**, and right now, we use `task_mgr` in most places,\n//! `TaskHandle` in some places, and careful code review around `FuturesUnordered`\n//! or `JoinSet` in other places.\n//!\n//! We do not yet have a systematic cancellation story in pageserver, and it is\n//! pretty clear that [`RequestContext`] will be responsible for that.\n//! So, the API already prepares for this role through the\n//! [`RequestContext::detached_child`] and [`RequestContext::attached_child`]  methods.\n//! See their doc comments for details on how we will use them in the future.\n//!\n//! It is not clear whether or how we will enforce Structured Concurrency, and\n//! what role [`RequestContext`] will play there.\n//! So, the API doesn't prepare us for this topic.\n//!\n//! Other future uses of `RequestContext`:\n//! - Communicate compute & IO priorities (user-initiated request vs. background-loop)\n//! - Request IDs for distributed tracing\n//! - Request/Timeline/Tenant-scoped log levels\n//!\n//! RequestContext might look quite different once it supports those features.\n//! Likely, it will have a shape similar to Golang's `context.Context`.\n//!\n//! ### Why A Struct Instead Of Method Parameters\n//!\n//! What's typical about such information is that it needs to be passed down\n//! along the call chain from high level to low level, but few of the functions\n//! in the middle need to understand it.\n//! Further, it is to be expected that we will need to propagate more data\n//! in the future (see the earlier section on future work).\n//! Hence, for functions in the middle of the call chain, we have the following\n//! requirements:\n//! 1. It should be easy to forward the context to callees.\n//! 2. To propagate more data from high-level to low-level code, the functions in\n//!    the middle should not need to be modified.\n//!\n//! The solution is to have a container structure ([`RequestContext`]) that\n//! carries the information. Functions that don't care about what's in it\n//! pass it along to callees.\n//!\n//! ### Why Not Task-Local Variables\n//!\n//! One could use task-local variables (the equivalent of thread-local variables)\n//! to address the immediate needs outlined above.\n//! However, we reject task-local variables because:\n//! 1. they are implicit, thereby making it harder to trace the data flow in code\n//!    reviews and during debugging,\n//! 2. they can be mutable, which enables implicit return data flow,\n//! 3. they are restrictive in that code which fans out into multiple tasks,\n//!    or even threads, needs to carefully propagate the state.\n//!\n//! In contrast, information flow with [`RequestContext`] is\n//! 1. always explicit,\n//! 2. strictly uni-directional because RequestContext is immutable,\n//! 3. tangible because a [`RequestContext`] is just a value.\n//!    When creating child activities, regardless of whether it's a task,\n//!    thread, or even an RPC to another service, the value can\n//!    be used like any other argument.\n//!\n//! The solution is that all code paths are infected with precisely one\n//! [`RequestContext`] argument. Functions in the middle of the call chain\n//! only need to pass it on.\n\nuse std::{sync::Arc, time::Duration};\n\nuse once_cell::sync::Lazy;\nuse tracing::warn;\nuse utils::{id::TimelineId, shard::TenantShardId};\n\nuse crate::{\n    metrics::{StorageIoSizeMetrics, TimelineMetrics},\n    task_mgr::TaskKind,\n    tenant::Timeline,\n};\nuse futures::FutureExt;\nuse futures::future::BoxFuture;\nuse std::future::Future;\nuse tracing_utils::perf_span::{PerfInstrument, PerfSpan};\n\nuse tracing::{Dispatch, Span};\n\n// The main structure of this module, see module-level comment.\npub struct RequestContext {\n    task_kind: TaskKind,\n    download_behavior: DownloadBehavior,\n    access_stats_behavior: AccessStatsBehavior,\n    page_content_kind: PageContentKind,\n    read_path_debug: bool,\n    scope: Scope,\n    perf_span: Option<PerfSpan>,\n    perf_span_dispatch: Option<Dispatch>,\n}\n\n#[derive(Clone)]\npub(crate) enum Scope {\n    Global {\n        io_size_metrics: &'static crate::metrics::StorageIoSizeMetrics,\n    },\n    SecondaryTenant {\n        io_size_metrics: &'static crate::metrics::StorageIoSizeMetrics,\n    },\n    SecondaryTimeline {\n        io_size_metrics: crate::metrics::StorageIoSizeMetrics,\n    },\n    Timeline {\n        // We wrap the `Arc<TimelineMetrics>`s inside another Arc to avoid child\n        // context creation contending for the ref counters of the Arc<TimelineMetrics>,\n        // which are shared among all tasks that operate on the timeline, especially\n        // concurrent page_service connections.\n        #[allow(clippy::redundant_allocation)]\n        arc_arc: Arc<Arc<TimelineMetrics>>,\n    },\n    #[cfg(test)]\n    UnitTest {\n        io_size_metrics: &'static crate::metrics::StorageIoSizeMetrics,\n    },\n    DebugTools {\n        io_size_metrics: &'static crate::metrics::StorageIoSizeMetrics,\n    },\n}\n\nstatic GLOBAL_IO_SIZE_METRICS: Lazy<crate::metrics::StorageIoSizeMetrics> =\n    Lazy::new(|| crate::metrics::StorageIoSizeMetrics::new(\"*\", \"*\", \"*\"));\n\nimpl Scope {\n    pub(crate) fn new_global() -> Self {\n        Scope::Global {\n            io_size_metrics: &GLOBAL_IO_SIZE_METRICS,\n        }\n    }\n    /// NB: this allocates, so, use only at relatively long-lived roots, e.g., at start\n    /// of a compaction iteration.\n    pub(crate) fn new_timeline(timeline: &Timeline) -> Self {\n        Scope::Timeline {\n            arc_arc: Arc::new(Arc::clone(&timeline.metrics)),\n        }\n    }\n    pub(crate) fn new_page_service_pagestream(\n        timeline_handle: &crate::tenant::timeline::handle::Handle<\n            crate::page_service::TenantManagerTypes,\n        >,\n    ) -> Self {\n        Scope::Timeline {\n            arc_arc: Arc::clone(&timeline_handle.metrics),\n        }\n    }\n    pub(crate) fn new_secondary_timeline(\n        tenant_shard_id: &TenantShardId,\n        timeline_id: &TimelineId,\n    ) -> Self {\n        // TODO(https://github.com/neondatabase/neon/issues/11156): secondary timelines have no infrastructure for metrics lifecycle.\n\n        let tenant_id = tenant_shard_id.tenant_id.to_string();\n        let shard_id = tenant_shard_id.shard_slug().to_string();\n        let timeline_id = timeline_id.to_string();\n\n        let io_size_metrics =\n            crate::metrics::StorageIoSizeMetrics::new(&tenant_id, &shard_id, &timeline_id);\n        Scope::SecondaryTimeline { io_size_metrics }\n    }\n    pub(crate) fn new_secondary_tenant(_tenant_shard_id: &TenantShardId) -> Self {\n        // Before propagating metrics via RequestContext, the labels were inferred from file path.\n        // The only user of VirtualFile at tenant scope is the heatmap download & read.\n        // The inferred labels for the path of the heatmap file on local disk were that of the global metric (*,*,*).\n        // Thus, we do the same here, and extend that for anything secondary-tenant scoped.\n        //\n        // If we want to have (tenant_id, shard_id, '*') labels for secondary tenants in the future,\n        // we will need to think about the metric lifecycle, i.e., remove them during secondary tenant shutdown,\n        // like we do for attached timelines. (We don't have attached-tenant-scoped usage of VirtualFile\n        // at this point, so, we were able to completely side-step tenant-scoped stuff there).\n        Scope::SecondaryTenant {\n            io_size_metrics: &GLOBAL_IO_SIZE_METRICS,\n        }\n    }\n    #[cfg(test)]\n    pub(crate) fn new_unit_test() -> Self {\n        Scope::UnitTest {\n            io_size_metrics: &GLOBAL_IO_SIZE_METRICS,\n        }\n    }\n\n    pub(crate) fn new_debug_tools() -> Self {\n        Scope::DebugTools {\n            io_size_metrics: &GLOBAL_IO_SIZE_METRICS,\n        }\n    }\n}\n\n/// The kind of access to the page cache.\n#[derive(Clone, Copy, PartialEq, Eq, Debug, enum_map::Enum, strum_macros::IntoStaticStr)]\npub enum PageContentKind {\n    Unknown,\n    DeltaLayerSummary,\n    DeltaLayerBtreeNode,\n    DeltaLayerValue,\n    ImageLayerSummary,\n    ImageLayerBtreeNode,\n    ImageLayerValue,\n    InMemoryLayer,\n}\n\n/// Desired behavior if the operation requires an on-demand download\n/// to proceed.\n#[derive(Clone, Copy, PartialEq, Eq, Debug)]\npub enum DownloadBehavior {\n    /// Download the layer file. It can take a while.\n    Download,\n\n    /// Download the layer file, but print a warning to the log. This should be used\n    /// in code where the layer file is expected to already exist locally.\n    Warn,\n\n    /// Return a PageReconstructError::NeedsDownload error\n    Error,\n}\n\n/// Whether this request should update access times used in LRU eviction\n#[derive(Clone, Copy, PartialEq, Eq, Debug)]\npub(crate) enum AccessStatsBehavior {\n    /// Update access times: this request's access to data should be taken\n    /// as a hint that the accessed layer is likely to be accessed again\n    Update,\n\n    /// Do not update access times: this request is accessing the layer\n    /// but does not want to indicate that the layer should be retained in cache,\n    /// perhaps because the requestor is a compaction routine that will soon cover\n    /// this layer with another.\n    Skip,\n}\n\npub struct RequestContextBuilder {\n    inner: RequestContext,\n}\n\nimpl RequestContextBuilder {\n    /// A new builder with default settings\n    pub fn new(task_kind: TaskKind) -> Self {\n        Self {\n            inner: RequestContext {\n                task_kind,\n                download_behavior: DownloadBehavior::Download,\n                access_stats_behavior: AccessStatsBehavior::Update,\n                page_content_kind: PageContentKind::Unknown,\n                read_path_debug: false,\n                scope: Scope::new_global(),\n                perf_span: None,\n                perf_span_dispatch: None,\n            },\n        }\n    }\n\n    pub fn from(original: &RequestContext) -> Self {\n        Self {\n            inner: original.clone(),\n        }\n    }\n\n    pub fn task_kind(mut self, k: TaskKind) -> Self {\n        self.inner.task_kind = k;\n        self\n    }\n\n    /// Configure the DownloadBehavior of the context: whether to\n    /// download missing layers, and/or warn on the download.\n    pub fn download_behavior(mut self, b: DownloadBehavior) -> Self {\n        self.inner.download_behavior = b;\n        self\n    }\n\n    /// Configure the AccessStatsBehavior of the context: whether layer\n    /// accesses should update the access time of the layer.\n    pub(crate) fn access_stats_behavior(mut self, b: AccessStatsBehavior) -> Self {\n        self.inner.access_stats_behavior = b;\n        self\n    }\n\n    pub(crate) fn page_content_kind(mut self, k: PageContentKind) -> Self {\n        self.inner.page_content_kind = k;\n        self\n    }\n\n    pub(crate) fn read_path_debug(mut self, b: bool) -> Self {\n        self.inner.read_path_debug = b;\n        self\n    }\n\n    pub(crate) fn scope(mut self, s: Scope) -> Self {\n        self.inner.scope = s;\n        self\n    }\n\n    pub(crate) fn perf_span_dispatch(mut self, dispatch: Option<Dispatch>) -> Self {\n        self.inner.perf_span_dispatch = dispatch;\n        self\n    }\n\n    pub fn root_perf_span<Fn>(mut self, make_span: Fn) -> Self\n    where\n        Fn: FnOnce() -> Span,\n    {\n        assert!(self.inner.perf_span.is_none());\n        assert!(self.inner.perf_span_dispatch.is_some());\n\n        let dispatcher = self.inner.perf_span_dispatch.as_ref().unwrap();\n        let new_span = tracing::dispatcher::with_default(dispatcher, make_span);\n\n        self.inner.perf_span = Some(PerfSpan::new(new_span, dispatcher.clone()));\n\n        self\n    }\n\n    pub fn perf_span<Fn>(mut self, make_span: Fn) -> Self\n    where\n        Fn: FnOnce(&Span) -> Span,\n    {\n        if let Some(ref perf_span) = self.inner.perf_span {\n            assert!(self.inner.perf_span_dispatch.is_some());\n            let dispatcher = self.inner.perf_span_dispatch.as_ref().unwrap();\n\n            let new_span =\n                tracing::dispatcher::with_default(dispatcher, || make_span(perf_span.inner()));\n\n            self.inner.perf_span = Some(PerfSpan::new(new_span, dispatcher.clone()));\n        }\n\n        self\n    }\n\n    pub fn root(self) -> RequestContext {\n        self.inner\n    }\n\n    pub fn attached_child(self) -> RequestContext {\n        self.inner\n    }\n\n    pub fn detached_child(self) -> RequestContext {\n        self.inner\n    }\n}\n\nimpl RequestContext {\n    /// Private clone implementation\n    ///\n    /// Callers should use the [`RequestContextBuilder`] or child spaning APIs of\n    /// [`RequestContext`].\n    fn clone(&self) -> Self {\n        Self {\n            task_kind: self.task_kind,\n            download_behavior: self.download_behavior,\n            access_stats_behavior: self.access_stats_behavior,\n            page_content_kind: self.page_content_kind,\n            read_path_debug: self.read_path_debug,\n            scope: self.scope.clone(),\n            perf_span: self.perf_span.clone(),\n            perf_span_dispatch: self.perf_span_dispatch.clone(),\n        }\n    }\n\n    /// Create a new RequestContext that has no parent.\n    ///\n    /// The function is called `new` because, once we add children\n    /// to it using `detached_child` or `attached_child`, the context\n    /// form a tree (not implemented yet since cancellation will be\n    /// the first feature that requires a tree).\n    ///\n    /// # Future: Cancellation\n    ///\n    /// The only reason why a context like this one can be canceled is\n    /// because someone explicitly canceled it.\n    /// It has no parent, so it cannot inherit cancellation from there.\n    pub fn new(task_kind: TaskKind, download_behavior: DownloadBehavior) -> Self {\n        RequestContextBuilder::new(task_kind)\n            .download_behavior(download_behavior)\n            .root()\n    }\n\n    /// Create a detached child context for a task that may outlive `self`.\n    ///\n    /// Use this when spawning new background activity that should complete\n    /// even if the current request is canceled.\n    ///\n    /// # Future: Cancellation\n    ///\n    /// Cancellation of `self` will not propagate to the child context returned\n    /// by this method.\n    ///\n    /// # Future: Structured Concurrency\n    ///\n    /// We could add the Future as a parameter to this function, spawn it as a task,\n    /// and pass to the new task the child context as an argument.\n    /// That would be an ergonomic improvement.\n    ///\n    /// We could make new calls to this function fail if `self` is already canceled.\n    pub fn detached_child(&self, task_kind: TaskKind, download_behavior: DownloadBehavior) -> Self {\n        RequestContextBuilder::from(self)\n            .task_kind(task_kind)\n            .download_behavior(download_behavior)\n            .detached_child()\n    }\n\n    /// Create a child of context `self` for a task that shall not outlive `self`.\n    ///\n    /// Use this when fanning-out work to other async tasks.\n    ///\n    /// # Future: Cancellation\n    ///\n    /// Cancelling a context will propagate to its attached children.\n    ///\n    /// # Future: Structured Concurrency\n    ///\n    /// We could add the Future as a parameter to this function, spawn it as a task,\n    /// and track its `JoinHandle` inside the `RequestContext`.\n    ///\n    /// We could then provide another method to allow waiting for all child tasks\n    /// to finish.\n    ///\n    /// We could make new calls to this function fail if `self` is already canceled.\n    /// Alternatively, we could allow the creation but not spawn the task.\n    /// The method to wait for child tasks would return an error, indicating\n    /// that the child task was not started because the context was canceled.\n    pub fn attached_child(&self) -> Self {\n        RequestContextBuilder::from(self).attached_child()\n    }\n\n    /// Use this function when you should be creating a child context using\n    /// [`attached_child`] or [`detached_child`], but your caller doesn't provide\n    /// a context and you are unwilling to change all callers to provide one.\n    ///\n    /// Before we add cancellation, we should get rid of this method.\n    ///\n    /// [`attached_child`]: Self::attached_child\n    /// [`detached_child`]: Self::detached_child\n    pub fn todo_child(task_kind: TaskKind, download_behavior: DownloadBehavior) -> Self {\n        Self::new(task_kind, download_behavior)\n    }\n\n    pub fn with_scope_timeline(&self, timeline: &Arc<Timeline>) -> Self {\n        RequestContextBuilder::from(self)\n            .scope(Scope::new_timeline(timeline))\n            .attached_child()\n    }\n\n    pub(crate) fn with_scope_page_service_pagestream(\n        &self,\n        timeline_handle: &crate::tenant::timeline::handle::Handle<\n            crate::page_service::TenantManagerTypes,\n        >,\n    ) -> Self {\n        RequestContextBuilder::from(self)\n            .scope(Scope::new_page_service_pagestream(timeline_handle))\n            .attached_child()\n    }\n\n    pub fn with_scope_secondary_timeline(\n        &self,\n        tenant_shard_id: &TenantShardId,\n        timeline_id: &TimelineId,\n    ) -> Self {\n        RequestContextBuilder::from(self)\n            .scope(Scope::new_secondary_timeline(tenant_shard_id, timeline_id))\n            .attached_child()\n    }\n\n    pub fn with_scope_secondary_tenant(&self, tenant_shard_id: &TenantShardId) -> Self {\n        RequestContextBuilder::from(self)\n            .scope(Scope::new_secondary_tenant(tenant_shard_id))\n            .attached_child()\n    }\n\n    #[cfg(test)]\n    pub fn with_scope_unit_test(&self) -> Self {\n        RequestContextBuilder::from(self)\n            .task_kind(TaskKind::UnitTest)\n            .scope(Scope::new_unit_test())\n            .attached_child()\n    }\n\n    pub fn with_scope_debug_tools(&self) -> Self {\n        RequestContextBuilder::from(self)\n            .task_kind(TaskKind::DebugTool)\n            .scope(Scope::new_debug_tools())\n            .attached_child()\n    }\n\n    pub fn task_kind(&self) -> TaskKind {\n        self.task_kind\n    }\n\n    pub fn download_behavior(&self) -> DownloadBehavior {\n        self.download_behavior\n    }\n\n    pub(crate) fn access_stats_behavior(&self) -> AccessStatsBehavior {\n        self.access_stats_behavior\n    }\n\n    pub(crate) fn page_content_kind(&self) -> PageContentKind {\n        self.page_content_kind\n    }\n\n    pub(crate) fn read_path_debug(&self) -> bool {\n        self.read_path_debug\n    }\n\n    pub(crate) fn io_size_metrics(&self) -> &StorageIoSizeMetrics {\n        match &self.scope {\n            Scope::Global { io_size_metrics } => {\n                let is_unit_test = cfg!(test);\n                let is_regress_test_build = cfg!(feature = \"testing\");\n                if is_unit_test || is_regress_test_build {\n                    panic!(\"all VirtualFile instances are timeline-scoped\");\n                } else {\n                    use once_cell::sync::Lazy;\n                    use std::sync::Mutex;\n                    use std::time::Duration;\n                    use utils::rate_limit::RateLimit;\n                    static LIMIT: Lazy<Mutex<RateLimit>> =\n                        Lazy::new(|| Mutex::new(RateLimit::new(Duration::from_secs(1))));\n                    let mut guard = LIMIT.lock().unwrap();\n                    guard.call2(|rate_limit_stats| {\n                        warn!(\n                            %rate_limit_stats,\n                            backtrace=%std::backtrace::Backtrace::force_capture(),\n                            \"all VirtualFile instances are timeline-scoped\",\n                        );\n                    });\n\n                    io_size_metrics\n                }\n            }\n            Scope::Timeline { arc_arc } => &arc_arc.storage_io_size,\n            Scope::SecondaryTimeline { io_size_metrics } => io_size_metrics,\n            Scope::SecondaryTenant { io_size_metrics } => io_size_metrics,\n            #[cfg(test)]\n            Scope::UnitTest { io_size_metrics } => io_size_metrics,\n            Scope::DebugTools { io_size_metrics } => io_size_metrics,\n        }\n    }\n\n    pub(crate) fn ondemand_download_wait_observe(&self, duration: Duration) {\n        if duration == Duration::ZERO {\n            return;\n        }\n\n        match &self.scope {\n            Scope::Timeline { arc_arc } => arc_arc\n                .wait_ondemand_download_time\n                .observe(self.task_kind, duration),\n            _ => {\n                use once_cell::sync::Lazy;\n                use std::sync::Mutex;\n                use std::time::Duration;\n                use utils::rate_limit::RateLimit;\n                static LIMIT: Lazy<Mutex<RateLimit>> =\n                    Lazy::new(|| Mutex::new(RateLimit::new(Duration::from_secs(1))));\n                let mut guard = LIMIT.lock().unwrap();\n                guard.call2(|rate_limit_stats| {\n                    warn!(\n                        %rate_limit_stats,\n                        backtrace=%std::backtrace::Backtrace::force_capture(),\n                        \"ondemand downloads should always happen within timeline scope\",\n                    );\n                });\n            }\n        }\n    }\n\n    pub(crate) fn perf_follows_from(&self, from: &RequestContext) {\n        if let (Some(span), Some(from_span)) = (&self.perf_span, &from.perf_span) {\n            span.inner().follows_from(from_span.inner());\n        }\n    }\n\n    pub(crate) fn has_perf_span(&self) -> bool {\n        self.perf_span.is_some()\n    }\n}\n\n/// [`Future`] extension trait that allow for creating performance\n/// spans on sampled requests\npub(crate) trait PerfInstrumentFutureExt<'a>: Future + Send {\n    /// Instrument this future with a new performance span when the\n    /// provided request context indicates the originator request\n    /// was sampled. Otherwise, just box the future and return it as is.\n    fn maybe_perf_instrument<Fn>(\n        self,\n        ctx: &RequestContext,\n        make_span: Fn,\n    ) -> BoxFuture<'a, Self::Output>\n    where\n        Self: Sized + 'a,\n        Fn: FnOnce(&Span) -> Span,\n    {\n        match &ctx.perf_span {\n            Some(perf_span) => {\n                assert!(ctx.perf_span_dispatch.is_some());\n                let dispatcher = ctx.perf_span_dispatch.as_ref().unwrap();\n\n                let new_span =\n                    tracing::dispatcher::with_default(dispatcher, || make_span(perf_span.inner()));\n\n                let new_perf_span = PerfSpan::new(new_span, dispatcher.clone());\n                self.instrument(new_perf_span).boxed()\n            }\n            None => self.boxed(),\n        }\n    }\n}\n\n// Implement the trait for all types that satisfy the trait bounds\nimpl<'a, T: Future + Send + 'a> PerfInstrumentFutureExt<'a> for T {}\n"
  },
  {
    "path": "pageserver/src/controller_upcall_client.rs",
    "content": "use std::collections::HashMap;\nuse std::net::IpAddr;\n\nuse futures::Future;\nuse pageserver_api::config::NodeMetadata;\nuse pageserver_api::controller_api::{AvailabilityZone, NodeRegisterRequest};\nuse pageserver_api::models::ShardImportStatus;\nuse pageserver_api::shard::TenantShardId;\nuse pageserver_api::upcall_api::{\n    PutTimelineImportStatusRequest, ReAttachRequest, ReAttachResponse, ReAttachResponseTenant,\n    TimelineImportStatusRequest, ValidateRequest, ValidateRequestTenant, ValidateResponse,\n};\nuse reqwest::Certificate;\nuse serde::Serialize;\nuse serde::de::DeserializeOwned;\nuse tokio_util::sync::CancellationToken;\nuse url::Url;\nuse utils::generation::Generation;\nuse utils::id::{NodeId, TimelineId};\nuse utils::{backoff, failpoint_support, ip_address};\n\nuse crate::config::PageServerConf;\nuse crate::virtual_file::on_fatal_io_error;\n\n/// The Pageserver's client for using the storage controller upcall API: this is a small API\n/// for dealing with generations (see docs/rfcs/025-generation-numbers.md).\npub struct StorageControllerUpcallClient {\n    http_client: reqwest::Client,\n    base_url: Url,\n    node_id: NodeId,\n    node_ip_addr: Option<IpAddr>,\n    cancel: CancellationToken,\n}\n\n/// Represent operations which internally retry on all errors other than\n/// cancellation token firing: the only way they can fail is ShuttingDown.\npub enum RetryForeverError {\n    ShuttingDown,\n}\n\npub trait StorageControllerUpcallApi {\n    fn re_attach(\n        &self,\n        conf: &PageServerConf,\n        empty_local_disk: bool,\n    ) -> impl Future<\n        Output = Result<HashMap<TenantShardId, ReAttachResponseTenant>, RetryForeverError>,\n    > + Send;\n    fn validate(\n        &self,\n        tenants: Vec<(TenantShardId, Generation)>,\n    ) -> impl Future<Output = Result<HashMap<TenantShardId, bool>, RetryForeverError>> + Send;\n    fn put_timeline_import_status(\n        &self,\n        tenant_shard_id: TenantShardId,\n        timeline_id: TimelineId,\n        generation: Generation,\n        status: ShardImportStatus,\n    ) -> impl Future<Output = Result<(), RetryForeverError>> + Send;\n    fn get_timeline_import_status(\n        &self,\n        tenant_shard_id: TenantShardId,\n        timeline_id: TimelineId,\n        generation: Generation,\n    ) -> impl Future<Output = Result<ShardImportStatus, RetryForeverError>> + Send;\n}\n\nimpl StorageControllerUpcallClient {\n    /// A None return value indicates that the input `conf` object does not have control\n    /// plane API enabled.\n    pub fn new(conf: &'static PageServerConf, cancel: &CancellationToken) -> Self {\n        let mut url = conf.control_plane_api.clone();\n\n        if let Ok(mut segs) = url.path_segments_mut() {\n            // This ensures that `url` ends with a slash if it doesn't already.\n            // That way, we can subsequently use join() to safely attach extra path elements.\n            segs.pop_if_empty().push(\"\");\n        }\n\n        let mut client = reqwest::ClientBuilder::new();\n\n        if let Some(jwt) = &conf.control_plane_api_token {\n            let mut headers = reqwest::header::HeaderMap::new();\n            headers.insert(\n                \"Authorization\",\n                format!(\"Bearer {}\", jwt.get_contents()).parse().unwrap(),\n            );\n            client = client.default_headers(headers);\n        }\n\n        for cert in &conf.ssl_ca_certs {\n            client = client.add_root_certificate(\n                Certificate::from_der(cert.contents()).expect(\"Invalid certificate in config\"),\n            );\n        }\n\n        // Intentionally panics if we encountered any errors parsing or reading the IP address.\n        // Note that if the required environment variable is not set, `read_node_ip_addr_from_env` returns `Ok(None)`\n        // instead of an error.\n        let node_ip_addr =\n            ip_address::read_node_ip_addr_from_env().expect(\"Error reading node IP address.\");\n\n        Self {\n            http_client: client.build().expect(\"Failed to construct HTTP client\"),\n            base_url: url,\n            node_id: conf.id,\n            cancel: cancel.clone(),\n            node_ip_addr,\n        }\n    }\n\n    #[tracing::instrument(skip_all)]\n    async fn retry_http_forever<R, T>(\n        &self,\n        url: &url::Url,\n        request: R,\n        method: reqwest::Method,\n    ) -> Result<T, RetryForeverError>\n    where\n        R: Serialize,\n        T: DeserializeOwned,\n    {\n        let res = backoff::retry(\n            || async {\n                let response = self\n                    .http_client\n                    .request(method.clone(), url.clone())\n                    .json(&request)\n                    .send()\n                    .await?;\n\n                response.error_for_status_ref()?;\n                response.json::<T>().await\n            },\n            |_| false,\n            3,\n            u32::MAX,\n            \"storage controller upcall\",\n            &self.cancel,\n        )\n        .await\n        .ok_or(RetryForeverError::ShuttingDown)?\n        .expect(\"We retry forever, this should never be reached\");\n\n        Ok(res)\n    }\n\n    pub(crate) fn base_url(&self) -> &Url {\n        &self.base_url\n    }\n}\n\nimpl StorageControllerUpcallApi for StorageControllerUpcallClient {\n    /// Block until we get a successful response, or error out if we are shut down\n    #[tracing::instrument(skip_all)] // so that warning logs from retry_http_forever have context\n    async fn re_attach(\n        &self,\n        conf: &PageServerConf,\n        empty_local_disk: bool,\n    ) -> Result<HashMap<TenantShardId, ReAttachResponseTenant>, RetryForeverError> {\n        let url = self\n            .base_url\n            .join(\"re-attach\")\n            .expect(\"Failed to build re-attach path\");\n\n        // Include registration content in the re-attach request if a metadata file is readable\n        let metadata_path = conf.metadata_path();\n        let register = match tokio::fs::read_to_string(&metadata_path).await {\n            Ok(metadata_str) => match serde_json::from_str::<NodeMetadata>(&metadata_str) {\n                Ok(m) => {\n                    // Since we run one time at startup, be generous in our logging and\n                    // dump all metadata.\n                    tracing::info!(\"Loaded node metadata: {m}\");\n\n                    let az_id = {\n                        let az_id_from_metadata = m\n                            .other\n                            .get(\"availability_zone_id\")\n                            .and_then(|jv| jv.as_str().map(|str| str.to_owned()));\n\n                        match az_id_from_metadata {\n                            Some(az_id) => Some(AvailabilityZone(az_id)),\n                            None => {\n                                tracing::warn!(\n                                    \"metadata.json does not contain an 'availability_zone_id' field\"\n                                );\n                                conf.availability_zone.clone().map(AvailabilityZone)\n                            }\n                        }\n                    };\n\n                    if az_id.is_none() {\n                        panic!(\n                            \"Availablity zone id could not be inferred from metadata.json or pageserver config\"\n                        );\n                    }\n\n                    Some(NodeRegisterRequest {\n                        node_id: conf.id,\n                        listen_pg_addr: m.postgres_host,\n                        listen_pg_port: m.postgres_port,\n                        listen_grpc_addr: m.grpc_host,\n                        listen_grpc_port: m.grpc_port,\n                        listen_http_addr: m.http_host,\n                        listen_http_port: m.http_port,\n                        listen_https_port: m.https_port,\n                        node_ip_addr: self.node_ip_addr,\n                        availability_zone_id: az_id.expect(\"Checked above\"),\n                    })\n                }\n                Err(e) => {\n                    tracing::error!(\"Unreadable metadata in {metadata_path}: {e}\");\n                    None\n                }\n            },\n            Err(e) => {\n                if e.kind() == std::io::ErrorKind::NotFound {\n                    // This is legal: we may have been deployed with some external script\n                    // doing registration for us.\n                    tracing::info!(\"Metadata file not found at {metadata_path}\");\n                } else {\n                    on_fatal_io_error(&e, &format!(\"Loading metadata at {metadata_path}\"))\n                }\n                None\n            }\n        };\n\n        let request = ReAttachRequest {\n            node_id: self.node_id,\n            register: register.clone(),\n            empty_local_disk: Some(empty_local_disk),\n        };\n\n        let response: ReAttachResponse = self\n            .retry_http_forever(&url, request, reqwest::Method::POST)\n            .await?;\n        tracing::info!(\n            \"Received re-attach response with {} tenants (node {}, register: {:?})\",\n            response.tenants.len(),\n            self.node_id,\n            register,\n        );\n\n        failpoint_support::sleep_millis_async!(\"control-plane-client-re-attach\");\n\n        Ok(response\n            .tenants\n            .into_iter()\n            .map(|rart| (rart.id, rart))\n            .collect::<HashMap<_, _>>())\n    }\n\n    /// Block until we get a successful response, or error out if we are shut down\n    #[tracing::instrument(skip_all)] // so that warning logs from retry_http_forever have context\n    async fn validate(\n        &self,\n        tenants: Vec<(TenantShardId, Generation)>,\n    ) -> Result<HashMap<TenantShardId, bool>, RetryForeverError> {\n        let url = self\n            .base_url\n            .join(\"validate\")\n            .expect(\"Failed to build validate path\");\n\n        // When sending validate requests, break them up into chunks so that we\n        // avoid possible edge cases of generating any HTTP requests that\n        // require database I/O across many thousands of tenants.\n        let mut result: HashMap<TenantShardId, bool> = HashMap::with_capacity(tenants.len());\n        for tenant_chunk in (tenants).chunks(128) {\n            let request = ValidateRequest {\n                tenants: tenant_chunk\n                    .iter()\n                    .map(|(id, generation)| ValidateRequestTenant {\n                        id: *id,\n                        r#gen: (*generation).into().expect(\n                            \"Generation should always be valid for a Tenant doing deletions\",\n                        ),\n                    })\n                    .collect(),\n            };\n\n            failpoint_support::sleep_millis_async!(\n                \"control-plane-client-validate-sleep\",\n                &self.cancel\n            );\n            if self.cancel.is_cancelled() {\n                return Err(RetryForeverError::ShuttingDown);\n            }\n\n            let response: ValidateResponse = self\n                .retry_http_forever(&url, request, reqwest::Method::POST)\n                .await?;\n            for rt in response.tenants {\n                result.insert(rt.id, rt.valid);\n            }\n        }\n\n        Ok(result.into_iter().collect())\n    }\n\n    /// Send a shard import status to the storage controller\n    ///\n    /// The implementation must have at-least-once delivery semantics.\n    /// To this end, we retry the request until it succeeds. If the pageserver\n    /// restarts or crashes, the shard import will start again from the beggining.\n    #[tracing::instrument(skip_all)] // so that warning logs from retry_http_forever have context\n    async fn put_timeline_import_status(\n        &self,\n        tenant_shard_id: TenantShardId,\n        timeline_id: TimelineId,\n        generation: Generation,\n        status: ShardImportStatus,\n    ) -> Result<(), RetryForeverError> {\n        let url = self\n            .base_url\n            .join(\"timeline_import_status\")\n            .expect(\"Failed to build path\");\n\n        let request = PutTimelineImportStatusRequest {\n            tenant_shard_id,\n            timeline_id,\n            generation,\n            status,\n        };\n\n        self.retry_http_forever(&url, request, reqwest::Method::POST)\n            .await\n    }\n\n    #[tracing::instrument(skip_all)] // so that warning logs from retry_http_forever have context\n    async fn get_timeline_import_status(\n        &self,\n        tenant_shard_id: TenantShardId,\n        timeline_id: TimelineId,\n        generation: Generation,\n    ) -> Result<ShardImportStatus, RetryForeverError> {\n        let url = self\n            .base_url\n            .join(\"timeline_import_status\")\n            .expect(\"Failed to build path\");\n\n        let request = TimelineImportStatusRequest {\n            tenant_shard_id,\n            timeline_id,\n            generation,\n        };\n\n        let response: ShardImportStatus = self\n            .retry_http_forever(&url, request, reqwest::Method::GET)\n            .await?;\n        Ok(response)\n    }\n}\n"
  },
  {
    "path": "pageserver/src/deletion_queue/deleter.rs",
    "content": "//! The deleter is the final stage in the deletion queue.  It accumulates remote\n//! paths to delete, and periodically executes them in batches of up to 1000\n//! using the DeleteObjects request.\n//!\n//! Its purpose is to increase efficiency of remote storage I/O by issuing a smaller\n//! number of full-sized DeleteObjects requests, rather than a larger number of\n//! smaller requests.\n\nuse std::time::Duration;\n\nuse remote_storage::{GenericRemoteStorage, RemotePath, TimeoutOrCancel};\nuse tokio_util::sync::CancellationToken;\nuse tracing::{info, warn};\nuse utils::{backoff, pausable_failpoint};\n\nuse super::{DeletionQueueError, FlushOp};\nuse crate::metrics;\n\nconst AUTOFLUSH_INTERVAL: Duration = Duration::from_secs(10);\n\npub(super) enum DeleterMessage {\n    Delete(Vec<RemotePath>),\n    Flush(FlushOp),\n}\n\n/// Non-persistent deletion queue, for coalescing multiple object deletes into\n/// larger DeleteObjects requests.\npub(super) struct Deleter {\n    // Accumulate up to 1000 keys for the next deletion operation\n    accumulator: Vec<RemotePath>,\n\n    rx: tokio::sync::mpsc::Receiver<DeleterMessage>,\n\n    cancel: CancellationToken,\n    remote_storage: GenericRemoteStorage,\n}\n\nimpl Deleter {\n    pub(super) fn new(\n        remote_storage: GenericRemoteStorage,\n        rx: tokio::sync::mpsc::Receiver<DeleterMessage>,\n        cancel: CancellationToken,\n    ) -> Self {\n        Self {\n            remote_storage,\n            rx,\n            cancel,\n            accumulator: Vec::new(),\n        }\n    }\n\n    /// Wrap the remote `delete_objects` with a failpoint\n    async fn remote_delete(&self) -> Result<(), anyhow::Error> {\n        // A backoff::retry is used here for two reasons:\n        // - To provide a backoff rather than busy-polling the API on errors\n        // - To absorb transient 429/503 conditions without hitting our error\n        //   logging path for issues deleting objects.\n        backoff::retry(\n            || async {\n                fail::fail_point!(\"deletion-queue-before-execute\", |_| {\n                    info!(\"Skipping execution, failpoint set\");\n\n                    metrics::DELETION_QUEUE\n                        .remote_errors\n                        .with_label_values(&[\"failpoint\"])\n                        .inc();\n                    Err(anyhow::anyhow!(\"failpoint: deletion-queue-before-execute\"))\n                });\n\n                self.remote_storage\n                    .delete_objects(&self.accumulator, &self.cancel)\n                    .await\n            },\n            TimeoutOrCancel::caused_by_cancel,\n            3,\n            10,\n            \"executing deletion batch\",\n            &self.cancel,\n        )\n        .await\n        .ok_or_else(|| anyhow::anyhow!(\"Shutting down\"))\n        .and_then(|x| x)\n    }\n\n    /// Block until everything in accumulator has been executed\n    async fn flush(&mut self) -> Result<(), DeletionQueueError> {\n        while !self.accumulator.is_empty() && !self.cancel.is_cancelled() {\n            pausable_failpoint!(\"deletion-queue-before-execute-pause\");\n            match self.remote_delete().await {\n                Ok(()) => {\n                    // Note: we assume that the remote storage layer returns Ok(()) if some\n                    // or all of the deleted objects were already gone.\n                    metrics::DELETION_QUEUE\n                        .keys_executed\n                        .inc_by(self.accumulator.len() as u64);\n                    info!(\n                        \"Executed deletion batch {}..{}\",\n                        self.accumulator\n                            .first()\n                            .expect(\"accumulator should be non-empty\"),\n                        self.accumulator\n                            .last()\n                            .expect(\"accumulator should be non-empty\"),\n                    );\n                    self.accumulator.clear();\n                }\n                Err(e) => {\n                    if self.cancel.is_cancelled() {\n                        return Err(DeletionQueueError::ShuttingDown);\n                    }\n                    warn!(\"DeleteObjects request failed: {e:#}, will continue trying\");\n                    metrics::DELETION_QUEUE\n                        .remote_errors\n                        .with_label_values(&[\"execute\"])\n                        .inc();\n                }\n            };\n        }\n        if self.cancel.is_cancelled() {\n            // Expose an error because we may not have actually flushed everything\n            Err(DeletionQueueError::ShuttingDown)\n        } else {\n            Ok(())\n        }\n    }\n\n    pub(super) async fn background(&mut self) -> Result<(), DeletionQueueError> {\n        let max_keys_per_delete = self.remote_storage.max_keys_per_delete();\n        self.accumulator.reserve(max_keys_per_delete);\n\n        loop {\n            if self.cancel.is_cancelled() {\n                return Err(DeletionQueueError::ShuttingDown);\n            }\n\n            let msg = match tokio::time::timeout(AUTOFLUSH_INTERVAL, self.rx.recv()).await {\n                Ok(Some(m)) => m,\n                Ok(None) => {\n                    // All queue senders closed\n                    info!(\"Shutting down\");\n                    return Err(DeletionQueueError::ShuttingDown);\n                }\n                Err(_) => {\n                    // Timeout, we hit deadline to execute whatever we have in hand.  These functions will\n                    // return immediately if no work is pending\n                    self.flush().await?;\n\n                    continue;\n                }\n            };\n\n            match msg {\n                DeleterMessage::Delete(mut list) => {\n                    while !list.is_empty() || self.accumulator.len() == max_keys_per_delete {\n                        if self.accumulator.len() == max_keys_per_delete {\n                            self.flush().await?;\n                            // If we have received this number of keys, proceed with attempting to execute\n                            assert_eq!(self.accumulator.len(), 0);\n                        }\n\n                        let available_slots = max_keys_per_delete - self.accumulator.len();\n                        let take_count = std::cmp::min(available_slots, list.len());\n                        for path in list.drain(list.len() - take_count..) {\n                            self.accumulator.push(path);\n                        }\n                    }\n                }\n                DeleterMessage::Flush(flush_op) => {\n                    // If flush() errors, we drop the flush_op and the caller will get\n                    // an error recv()'ing their oneshot channel.\n                    self.flush().await?;\n                    flush_op.notify();\n                }\n            }\n        }\n    }\n}\n"
  },
  {
    "path": "pageserver/src/deletion_queue/list_writer.rs",
    "content": "//! The list writer is the first stage in the deletion queue.  It accumulates\n//! layers to delete, and periodically writes out these layers into a persistent\n//! DeletionList.\n//!\n//! The purpose of writing DeletionLists is to decouple the decision to\n//! delete an object from the validation required to execute it: even if\n//! validation is not possible, e.g. due to a control plane outage, we can\n//! still persist our intent to delete an object, in a way that would\n//! survive a restart.\n//!\n//! DeletionLists are passed onwards to the Validator.\n\nuse std::collections::HashMap;\nuse std::fs::create_dir_all;\nuse std::time::Duration;\n\nuse pageserver_api::shard::TenantShardId;\nuse regex::Regex;\nuse remote_storage::RemotePath;\nuse tokio_util::sync::CancellationToken;\nuse tracing::{debug, info, warn};\nuse utils::generation::Generation;\nuse utils::id::TimelineId;\n\nuse super::{DeletionHeader, DeletionList, FlushOp, ValidatorQueueMessage};\nuse crate::config::PageServerConf;\nuse crate::deletion_queue::TEMP_SUFFIX;\nuse crate::metrics;\nuse crate::tenant::remote_timeline_client::{LayerFileMetadata, remote_layer_path};\nuse crate::tenant::storage_layer::LayerName;\nuse crate::virtual_file::{MaybeFatalIo, on_fatal_io_error};\n\n// The number of keys in a DeletionList before we will proactively persist it\n// (without reaching a flush deadline).  This aims to deliver objects of the order\n// of magnitude 1MB when we are under heavy delete load.\nconst DELETION_LIST_TARGET_SIZE: usize = 16384;\n\n// Ordinarily, we only flush to DeletionList periodically, to bound the window during\n// which we might leak objects from not flushing a DeletionList after\n// the objects are already unlinked from timeline metadata.\nconst FRONTEND_DEFAULT_TIMEOUT: Duration = Duration::from_millis(10000);\n\n// If someone is waiting for a flush to DeletionList, only delay a little to accumulate\n// more objects before doing the flush.\nconst FRONTEND_FLUSHING_TIMEOUT: Duration = Duration::from_millis(100);\n\n#[derive(Debug)]\npub(super) struct DeletionOp {\n    pub(super) tenant_shard_id: TenantShardId,\n    pub(super) timeline_id: TimelineId,\n    // `layers` and `objects` are both just lists of objects.  `layers` is used if you do not\n    // have a config object handy to project it to a remote key, and need the consuming worker\n    // to do it for you.\n    pub(super) layers: Vec<(LayerName, LayerFileMetadata)>,\n    pub(super) objects: Vec<RemotePath>,\n\n    /// The _current_ generation of the Tenant shard attachment in which we are enqueuing\n    /// this deletion.\n    pub(super) generation: Generation,\n}\n\n#[derive(Debug)]\npub(super) struct RecoverOp {\n    pub(super) attached_tenants: HashMap<TenantShardId, Generation>,\n}\n\n#[derive(Debug)]\npub(super) enum ListWriterQueueMessage {\n    Delete(DeletionOp),\n    // Wait until all prior deletions make it into a persistent DeletionList\n    Flush(FlushOp),\n    // Wait until all prior deletions have been executed (i.e. objects are actually deleted)\n    FlushExecute(FlushOp),\n    // Call once after re-attaching to control plane, to notify the deletion queue about\n    // latest attached generations & load any saved deletion lists from disk.\n    Recover(RecoverOp),\n}\n\npub(super) struct ListWriter {\n    conf: &'static PageServerConf,\n\n    // Incoming frontend requests to delete some keys\n    rx: tokio::sync::mpsc::UnboundedReceiver<ListWriterQueueMessage>,\n\n    // Outbound requests to the backend to execute deletion lists we have composed.\n    tx: tokio::sync::mpsc::Sender<ValidatorQueueMessage>,\n\n    // The list we are currently building, contains a buffer of keys to delete\n    // and our next sequence number\n    pending: DeletionList,\n\n    // These FlushOps should notify the next time we flush\n    pending_flushes: Vec<FlushOp>,\n\n    // Worker loop is torn down when this fires.\n    cancel: CancellationToken,\n\n    // Safety guard to do recovery exactly once\n    recovered: bool,\n}\n\nimpl ListWriter {\n    // Initially DeletionHeader.validated_sequence is zero.  The place we start our\n    // sequence numbers must be higher than that.\n    const BASE_SEQUENCE: u64 = 1;\n\n    pub(super) fn new(\n        conf: &'static PageServerConf,\n        rx: tokio::sync::mpsc::UnboundedReceiver<ListWriterQueueMessage>,\n        tx: tokio::sync::mpsc::Sender<ValidatorQueueMessage>,\n        cancel: CancellationToken,\n    ) -> Self {\n        Self {\n            pending: DeletionList::new(Self::BASE_SEQUENCE),\n            conf,\n            rx,\n            tx,\n            pending_flushes: Vec::new(),\n            cancel,\n            recovered: false,\n        }\n    }\n\n    /// Try to flush `list` to persistent storage\n    ///\n    /// This does not return errors, because on failure to flush we do not lose\n    /// any state: flushing will be retried implicitly on the next deadline\n    async fn flush(&mut self) {\n        if self.pending.is_empty() {\n            for f in self.pending_flushes.drain(..) {\n                f.notify();\n            }\n            return;\n        }\n\n        match self.pending.save(self.conf).await {\n            Ok(_) => {\n                info!(sequence = self.pending.sequence, \"Stored deletion list\");\n\n                for f in self.pending_flushes.drain(..) {\n                    f.notify();\n                }\n\n                // Take the list we've accumulated, replace it with a fresh list for the next sequence\n                let next_list = DeletionList::new(self.pending.sequence + 1);\n                let list = std::mem::replace(&mut self.pending, next_list);\n\n                if let Err(e) = self.tx.send(ValidatorQueueMessage::Delete(list)).await {\n                    // This is allowed to fail: it will only happen if the backend worker is shut down,\n                    // so we can just drop this on the floor.\n                    info!(\"Deletion list dropped, this is normal during shutdown ({e:#})\");\n                }\n            }\n            Err(e) => {\n                metrics::DELETION_QUEUE.unexpected_errors.inc();\n                warn!(\n                    sequence = self.pending.sequence,\n                    \"Failed to write deletion list, will retry later ({e:#})\"\n                );\n            }\n        }\n    }\n\n    /// Load the header, to learn the sequence number up to which deletions\n    /// have been validated.  We will apply validated=true to DeletionLists\n    /// <= this sequence when loading them.\n    ///\n    /// It is not an error for the header to not exist: we return None, and\n    /// the caller should act as if validated_sequence is 0\n    async fn load_validated_sequence(&self) -> Result<Option<u64>, anyhow::Error> {\n        let header_path = self.conf.deletion_header_path();\n        match tokio::fs::read(&header_path).await {\n            Ok(header_bytes) => {\n                match serde_json::from_slice::<DeletionHeader>(&header_bytes) {\n                    Ok(h) => Ok(Some(h.validated_sequence)),\n                    Err(e) => {\n                        warn!(\n                            \"Failed to deserialize deletion header, ignoring {header_path}: {e:#}\",\n                        );\n                        // This should never happen unless we make a mistake with our serialization.\n                        // Ignoring a deletion header is not consequential for correctnes because all deletions\n                        // are ultimately allowed to fail: worst case we leak some objects for the scrubber to clean up.\n                        metrics::DELETION_QUEUE.unexpected_errors.inc();\n                        Ok(None)\n                    }\n                }\n            }\n            Err(e) => {\n                if e.kind() == std::io::ErrorKind::NotFound {\n                    debug!(\"Deletion header {header_path} not found, first start?\");\n                    Ok(None)\n                } else {\n                    on_fatal_io_error(&e, \"reading deletion header\");\n                }\n            }\n        }\n    }\n\n    async fn recover(\n        &mut self,\n        attached_tenants: HashMap<TenantShardId, Generation>,\n    ) -> Result<(), anyhow::Error> {\n        debug!(\n            \"recovering with {} attached tenants\",\n            attached_tenants.len()\n        );\n\n        // Load the header\n        let validated_sequence = self.load_validated_sequence().await?.unwrap_or(0);\n\n        self.pending.sequence = validated_sequence + 1;\n\n        let deletion_directory = self.conf.deletion_prefix();\n        let mut dir = tokio::fs::read_dir(&deletion_directory)\n            .await\n            .fatal_err(\"read deletion directory\");\n\n        let list_name_pattern =\n            Regex::new(\"(?<sequence>[a-zA-Z0-9]{16})-(?<version>[a-zA-Z0-9]{2}).list\").unwrap();\n\n        let temp_extension = format!(\".{TEMP_SUFFIX}\");\n        let header_path = self.conf.deletion_header_path();\n        let mut seqs: Vec<u64> = Vec::new();\n        while let Some(dentry) = dir.next_entry().await.fatal_err(\"read deletion dentry\") {\n            let file_name = dentry.file_name();\n            let dentry_str = file_name.to_string_lossy();\n\n            if file_name == header_path.file_name().unwrap_or(\"\") {\n                // Don't try and parse the header's name like a list\n                continue;\n            }\n\n            if dentry_str.ends_with(&temp_extension) {\n                info!(\"Cleaning up temporary file {dentry_str}\");\n                let absolute_path =\n                    deletion_directory.join(dentry.file_name().to_str().expect(\"non-Unicode path\"));\n                tokio::fs::remove_file(&absolute_path)\n                    .await\n                    .fatal_err(\"delete temp file\");\n\n                continue;\n            }\n\n            let file_name = dentry.file_name().to_owned();\n            let basename = file_name.to_string_lossy();\n            let seq_part = if let Some(m) = list_name_pattern.captures(&basename) {\n                m.name(\"sequence\")\n                    .expect(\"Non optional group should be present\")\n                    .as_str()\n            } else {\n                warn!(\"Unexpected key in deletion queue: {basename}\");\n                metrics::DELETION_QUEUE.unexpected_errors.inc();\n                continue;\n            };\n\n            let seq: u64 = match u64::from_str_radix(seq_part, 16) {\n                Ok(s) => s,\n                Err(e) => {\n                    warn!(\"Malformed key '{basename}': {e}\");\n                    metrics::DELETION_QUEUE.unexpected_errors.inc();\n                    continue;\n                }\n            };\n            seqs.push(seq);\n        }\n        seqs.sort();\n\n        // Start our next deletion list from after the last location validated by\n        // previous process lifetime, or after the last location found (it is updated\n        // below after enumerating the deletion lists)\n        self.pending.sequence = validated_sequence + 1;\n        if let Some(max_list_seq) = seqs.last() {\n            self.pending.sequence = std::cmp::max(self.pending.sequence, max_list_seq + 1);\n        }\n\n        for s in seqs {\n            let list_path = self.conf.deletion_list_path(s);\n\n            let list_bytes = tokio::fs::read(&list_path)\n                .await\n                .fatal_err(\"read deletion list\");\n\n            let mut deletion_list = match serde_json::from_slice::<DeletionList>(&list_bytes) {\n                Ok(l) => l,\n                Err(e) => {\n                    // Drop the list on the floor: any objects it referenced will be left behind\n                    // for scrubbing to clean up.  This should never happen unless we have a serialization bug.\n                    warn!(sequence = s, \"Failed to deserialize deletion list: {e}\");\n                    metrics::DELETION_QUEUE.unexpected_errors.inc();\n                    continue;\n                }\n            };\n\n            if deletion_list.sequence <= validated_sequence {\n                // If the deletion list falls below valid_seq, we may assume that it was\n                // already validated the last time this pageserver ran.  Otherwise, we still\n                // load it, as it may still contain content valid in this generation.\n                deletion_list.validated = true;\n            } else {\n                // Special case optimization: if a tenant is still attached, and no other\n                // generation was issued to another node in the interval while we restarted,\n                // then we may treat deletion lists from the previous generation as if they\n                // belong to our currently attached generation, and proceed to validate & execute.\n                for (tenant_shard_id, tenant_list) in &mut deletion_list.tenants {\n                    if let Some(attached_gen) = attached_tenants.get(tenant_shard_id) {\n                        if attached_gen.previous() == tenant_list.generation {\n                            info!(\n                                seq=%s, tenant_id=%tenant_shard_id.tenant_id,\n                                shard_id=%tenant_shard_id.shard_slug(),\n                                old_gen=?tenant_list.generation, new_gen=?attached_gen,\n                                \"Updating gen on recovered list\");\n                            tenant_list.generation = *attached_gen;\n                        } else {\n                            info!(\n                                seq=%s, tenant_id=%tenant_shard_id.tenant_id,\n                                shard_id=%tenant_shard_id.shard_slug(),\n                                old_gen=?tenant_list.generation, new_gen=?attached_gen,\n                                \"Encountered stale generation on recovered list\");\n                        }\n                    }\n                }\n            }\n\n            info!(\n                validated = deletion_list.validated,\n                sequence = deletion_list.sequence,\n                \"Recovered deletion list\"\n            );\n\n            // We will drop out of recovery if this fails: it indicates that we are shutting down\n            // or the backend has panicked\n            metrics::DELETION_QUEUE\n                .keys_submitted\n                .inc_by(deletion_list.len() as u64);\n            self.tx\n                .send(ValidatorQueueMessage::Delete(deletion_list))\n                .await?;\n        }\n\n        info!(next_sequence = self.pending.sequence, \"Replay complete\");\n\n        Ok(())\n    }\n\n    /// This is the front-end ingest, where we bundle up deletion requests into DeletionList\n    /// and write them out, for later validation by the backend and execution by the executor.\n    pub(super) async fn background(&mut self) {\n        info!(\"Started deletion frontend worker\");\n\n        // Synchronous, but we only do it once per process lifetime so it's tolerable\n        if let Err(e) = create_dir_all(self.conf.deletion_prefix()) {\n            tracing::error!(\n                \"Failed to create deletion list directory {}, deletions will not be executed ({e})\",\n                self.conf.deletion_prefix(),\n            );\n            metrics::DELETION_QUEUE.unexpected_errors.inc();\n            return;\n        }\n\n        while !self.cancel.is_cancelled() {\n            let timeout = if self.pending_flushes.is_empty() {\n                FRONTEND_DEFAULT_TIMEOUT\n            } else {\n                FRONTEND_FLUSHING_TIMEOUT\n            };\n\n            let msg = match tokio::time::timeout(timeout, self.rx.recv()).await {\n                Ok(Some(msg)) => msg,\n                Ok(None) => {\n                    // Queue sender destroyed, shutting down\n                    break;\n                }\n                Err(_) => {\n                    // Hit deadline, flush.\n                    self.flush().await;\n                    continue;\n                }\n            };\n\n            match msg {\n                ListWriterQueueMessage::Delete(op) => {\n                    assert!(\n                        self.recovered,\n                        \"Cannot process deletions before recovery.  This is a bug.\"\n                    );\n\n                    debug!(\n                        \"Delete: ingesting {} layers, {} other objects\",\n                        op.layers.len(),\n                        op.objects.len()\n                    );\n\n                    let mut layer_paths = Vec::new();\n                    for (layer, meta) in op.layers {\n                        layer_paths.push(remote_layer_path(\n                            &op.tenant_shard_id.tenant_id,\n                            &op.timeline_id,\n                            meta.shard,\n                            &layer,\n                            meta.generation,\n                        ));\n                    }\n                    layer_paths.extend(op.objects);\n\n                    if !self.pending.push(\n                        &op.tenant_shard_id,\n                        &op.timeline_id,\n                        op.generation,\n                        &mut layer_paths,\n                    ) {\n                        self.flush().await;\n                        let retry_succeeded = self.pending.push(\n                            &op.tenant_shard_id,\n                            &op.timeline_id,\n                            op.generation,\n                            &mut layer_paths,\n                        );\n                        if !retry_succeeded {\n                            // Unexpected: after we flush, we should have\n                            // drained self.pending, so a conflict on\n                            // generation numbers should be impossible.\n                            tracing::error!(\n                                \"Failed to enqueue deletions, leaking objects.  This is a bug.\"\n                            );\n                            metrics::DELETION_QUEUE.unexpected_errors.inc();\n                        }\n                    }\n                }\n                ListWriterQueueMessage::Flush(op) => {\n                    if self.pending.is_empty() {\n                        // Execute immediately\n                        debug!(\"Flush: No pending objects, flushing immediately\");\n                        op.notify()\n                    } else {\n                        // Execute next time we flush\n                        debug!(\"Flush: adding to pending flush list for next deadline flush\");\n                        self.pending_flushes.push(op);\n                    }\n                }\n                ListWriterQueueMessage::FlushExecute(op) => {\n                    debug!(\"FlushExecute: passing through to backend\");\n                    // We do not flush to a deletion list here: the client sends a Flush before the FlushExecute\n                    if let Err(e) = self.tx.send(ValidatorQueueMessage::Flush(op)).await {\n                        info!(\"Can't flush, shutting down ({e})\");\n                        // Caller will get error when their oneshot sender was dropped.\n                    }\n                }\n                ListWriterQueueMessage::Recover(op) => {\n                    if self.recovered {\n                        tracing::error!(\n                            \"Deletion queue recovery called more than once.  This is a bug.\"\n                        );\n                        metrics::DELETION_QUEUE.unexpected_errors.inc();\n                        // Non-fatal: although this is a bug, since we did recovery at least once we may proceed.\n                        continue;\n                    }\n\n                    if let Err(e) = self.recover(op.attached_tenants).await {\n                        // This should only happen in truly unrecoverable cases, like the recovery finding that the backend\n                        // queue receiver has been dropped, or something is critically broken with\n                        // the local filesystem holding deletion lists.\n                        info!(\n                            \"Deletion queue recover aborted, deletion queue will not proceed ({e})\"\n                        );\n                        metrics::DELETION_QUEUE.unexpected_errors.inc();\n                        return;\n                    } else {\n                        self.recovered = true;\n                    }\n                }\n            }\n\n            if self.pending.len() > DELETION_LIST_TARGET_SIZE || !self.pending_flushes.is_empty() {\n                self.flush().await;\n            }\n        }\n        info!(\"Deletion queue shut down.\");\n    }\n}\n"
  },
  {
    "path": "pageserver/src/deletion_queue/validator.rs",
    "content": "//! The validator is responsible for validating DeletionLists for execution,\n//! based on whether the generation in the DeletionList is still the latest\n//! generation for a tenant.\n//!\n//! The purpose of validation is to ensure split-brain safety in the cluster\n//! of pageservers: a deletion may only be executed if the tenant generation\n//! that originated it is still current.  See docs/rfcs/025-generation-numbers.md\n//! The purpose of accumulating lists before validating them is to reduce load\n//! on the control plane API by issuing fewer, larger requests.\n//!\n//! In addition to validating DeletionLists, the validator validates updates to remote_consistent_lsn\n//! for timelines: these are logically deletions because the safekeepers use remote_consistent_lsn\n//! to decide when old\n//!\n//! Deletions are passed onward to the Deleter.\n\nuse std::collections::HashMap;\nuse std::sync::Arc;\nuse std::time::Duration;\n\nuse camino::Utf8PathBuf;\nuse tokio_util::sync::CancellationToken;\nuse tracing::{debug, info, warn};\n\nuse super::deleter::DeleterMessage;\nuse super::{DeletionHeader, DeletionList, DeletionQueueError, FlushOp, VisibleLsnUpdates};\nuse crate::config::PageServerConf;\nuse crate::controller_upcall_client::{RetryForeverError, StorageControllerUpcallApi};\nuse crate::metrics;\nuse crate::virtual_file::MaybeFatalIo;\n\n// After this length of time, do any validation work that is pending,\n// even if we haven't accumulated many keys to delete.\n//\n// This also causes updates to remote_consistent_lsn to be validated, even\n// if there were no deletions enqueued.\nconst AUTOFLUSH_INTERVAL: Duration = Duration::from_secs(10);\n\n// If we have received this number of keys, proceed with attempting to execute\nconst AUTOFLUSH_KEY_COUNT: usize = 16384;\n\n#[derive(Debug)]\npub(super) enum ValidatorQueueMessage {\n    Delete(DeletionList),\n    Flush(FlushOp),\n}\npub(super) struct Validator<C>\nwhere\n    C: StorageControllerUpcallApi,\n{\n    conf: &'static PageServerConf,\n    rx: tokio::sync::mpsc::Receiver<ValidatorQueueMessage>,\n    tx: tokio::sync::mpsc::Sender<DeleterMessage>,\n\n    // Client for calling into control plane API for validation of deletes\n    controller_upcall_client: C,\n\n    // DeletionLists which are waiting generation validation.  Not safe to\n    // execute until [`validate`] has processed them.\n    pending_lists: Vec<DeletionList>,\n\n    // DeletionLists which have passed validation and are ready to execute.\n    validated_lists: Vec<DeletionList>,\n\n    // Sum of all the lengths of lists in pending_lists\n    pending_key_count: usize,\n\n    // Lsn validation state: we read projected LSNs and write back visible LSNs\n    // after validation.  This is the LSN equivalent of `pending_validation_lists`:\n    // it is drained in [`validate`]\n    lsn_table: Arc<std::sync::RwLock<VisibleLsnUpdates>>,\n\n    // If we failed to rewrite a deletion list due to local filesystem I/O failure,\n    // we must remember that and refuse to advance our persistent validated sequence\n    // number past the failure.\n    list_write_failed: Option<u64>,\n\n    cancel: CancellationToken,\n}\n\nimpl<C> Validator<C>\nwhere\n    C: StorageControllerUpcallApi,\n{\n    pub(super) fn new(\n        conf: &'static PageServerConf,\n        rx: tokio::sync::mpsc::Receiver<ValidatorQueueMessage>,\n        tx: tokio::sync::mpsc::Sender<DeleterMessage>,\n        controller_upcall_client: C,\n        lsn_table: Arc<std::sync::RwLock<VisibleLsnUpdates>>,\n        cancel: CancellationToken,\n    ) -> Self {\n        Self {\n            conf,\n            rx,\n            tx,\n            controller_upcall_client,\n            lsn_table,\n            pending_lists: Vec::new(),\n            validated_lists: Vec::new(),\n            pending_key_count: 0,\n            list_write_failed: None,\n            cancel,\n        }\n    }\n    /// Process any outstanding validations of generations of pending LSN updates or pending\n    /// DeletionLists.\n    ///\n    /// Valid LSN updates propagate back to Timelines immediately, valid DeletionLists\n    /// go into the queue of ready-to-execute lists.\n    async fn validate(&mut self) -> Result<(), DeletionQueueError> {\n        let mut tenant_generations = HashMap::new();\n        for list in &self.pending_lists {\n            for (tenant_id, tenant_list) in &list.tenants {\n                // Note: DeletionLists are in logical time order, so generation always\n                // goes up.  By doing a simple insert() we will always end up with\n                // the latest generation seen for a tenant.\n                tenant_generations.insert(*tenant_id, tenant_list.generation);\n            }\n        }\n\n        let pending_lsn_updates = {\n            let mut lsn_table = self.lsn_table.write().expect(\"Lock should not be poisoned\");\n            std::mem::take(&mut *lsn_table)\n        };\n        for (tenant_id, update) in &pending_lsn_updates.tenants {\n            let entry = tenant_generations\n                .entry(*tenant_id)\n                .or_insert(update.generation);\n            if update.generation > *entry {\n                *entry = update.generation;\n            }\n        }\n\n        if tenant_generations.is_empty() {\n            // No work to do\n            return Ok(());\n        }\n\n        let tenants_valid = match self\n            .controller_upcall_client\n            .validate(tenant_generations.iter().map(|(k, v)| (*k, *v)).collect())\n            .await\n        {\n            Ok(tenants) => tenants,\n            Err(RetryForeverError::ShuttingDown) => {\n                // The only way a validation call returns an error is when the cancellation token fires\n                return Err(DeletionQueueError::ShuttingDown);\n            }\n        };\n\n        let mut validated_sequence: Option<u64> = None;\n\n        // Apply the validation results to the pending LSN updates\n        for (tenant_id, tenant_lsn_state) in pending_lsn_updates.tenants {\n            let validated_generation = tenant_generations\n                .get(&tenant_id)\n                .expect(\"Map was built from the same keys we're reading\");\n\n            let valid = tenants_valid\n                .get(&tenant_id)\n                .copied()\n                // If the tenant was missing from the validation response, it has been deleted.\n                // The Timeline that requested the LSN update is probably already torn down,\n                // or will be torn down soon.  In this case, drop the update by setting valid=false.\n                .unwrap_or(false);\n\n            if valid && *validated_generation == tenant_lsn_state.generation {\n                for (timeline_id, pending_lsn) in tenant_lsn_state.timelines {\n                    tracing::debug!(\n                        %tenant_id,\n                        %timeline_id,\n                        current = %pending_lsn.result_slot.load(),\n                        projected = %pending_lsn.projected,\n                        \"advancing validated remote_consistent_lsn\",\n                    );\n                    pending_lsn.result_slot.store(pending_lsn.projected);\n                }\n            } else {\n                // If we failed validation, then do not apply any of the projected updates\n                info!(\n                    \"Dropped remote consistent LSN updates for tenant {tenant_id} in stale generation {:?}\",\n                    tenant_lsn_state.generation\n                );\n                metrics::DELETION_QUEUE.dropped_lsn_updates.inc();\n            }\n        }\n\n        // Apply the validation results to the pending deletion lists\n        for list in &mut self.pending_lists {\n            // Filter the list based on whether the server responded valid: true.\n            // If a tenant is omitted in the response, it has been deleted, and we should\n            // proceed with deletion.\n            let mut mutated = false;\n            list.tenants.retain(|tenant_id, tenant| {\n                let validated_generation = tenant_generations\n                    .get(tenant_id)\n                    .expect(\"Map was built from the same keys we're reading\");\n\n                // If the tenant was missing from the validation response, it has been deleted.\n                // This means that a deletion is valid, but also redundant since the tenant's\n                // objects should have already been deleted.  Treat it as invalid to drop the\n                // redundant deletion.\n                let valid = tenants_valid.get(tenant_id).copied().unwrap_or(false);\n\n                // A list is valid if it comes from the current _or previous_ generation.\n                // - The previous generation case is permitted due to how we store deletion lists locally:\n                // if we see the immediately previous generation in a locally stored deletion list,\n                // it proves that this node's disk was used for both current & previous generations,\n                // and therefore no other node was involved in between: the two generations may be\n                // logically treated as the same.\n                // - In that previous generation case, we rewrote it to the current generation\n                // in recover(), so the comparison here is simply an equality.\n\n                let this_list_valid = valid\n                    && (tenant.generation == *validated_generation);\n\n                if !this_list_valid {\n                    info!(\"Dropping stale deletions for tenant {tenant_id} in generation {:?}, objects may be leaked\", tenant.generation);\n                    metrics::DELETION_QUEUE.keys_dropped.inc_by(tenant.len() as u64);\n                    mutated = true;\n                } else {\n                    metrics::DELETION_QUEUE.keys_validated.inc_by(tenant.len() as u64);\n                }\n                this_list_valid\n            });\n            list.validated = true;\n\n            if mutated {\n                // Save the deletion list if we had to make changes due to stale generations.  The\n                // saved list is valid for execution.\n                if let Err(e) = list.save(self.conf).await {\n                    // Highly unexpected.  Could happen if e.g. disk full.\n                    // If we didn't save the trimmed list, it is _not_ valid to execute.\n                    warn!(\"Failed to save modified deletion list {list}: {e:#}\");\n                    metrics::DELETION_QUEUE.unexpected_errors.inc();\n\n                    // Rather than have a complex retry process, just drop it and leak the objects,\n                    // scrubber will clean up eventually.\n                    list.tenants.clear(); // Result is a valid-but-empty list, which is a no-op for execution.\n\n                    // We must remember this failure, to prevent later writing out a header that\n                    // would imply the unwritable list was valid on disk.\n                    if self.list_write_failed.is_none() {\n                        self.list_write_failed = Some(list.sequence);\n                    }\n                }\n            }\n\n            validated_sequence = Some(list.sequence);\n        }\n\n        if let Some(validated_sequence) = validated_sequence {\n            if let Some(list_write_failed) = self.list_write_failed {\n                // Rare error case: we failed to write out a deletion list to excise invalid\n                // entries, so we cannot advance the header's valid sequence number past that point.\n                //\n                // In this state we will continue to validate, execute and delete deletion lists,\n                // we just cannot update the header.  It should be noticed and fixed by a human due to\n                // the nonzero value of our unexpected_errors metric.\n                warn!(\n                    sequence_number = list_write_failed,\n                    \"Cannot write header because writing a deletion list failed earlier\",\n                );\n            } else {\n                // Write the queue header to record how far validation progressed.  This avoids having\n                // to rewrite each DeletionList to set validated=true in it.\n                let header = DeletionHeader::new(validated_sequence);\n\n                // Drop result because the validated_sequence is an optimization.  If we fail to save it,\n                // then restart, we will drop some deletion lists, creating work for scrubber.\n                // The save() function logs a warning on error.\n                if let Err(e) = header.save(self.conf).await {\n                    warn!(\"Failed to write deletion queue header: {e:#}\");\n                    metrics::DELETION_QUEUE.unexpected_errors.inc();\n                }\n            }\n        }\n\n        // Transfer the validated lists to the validated queue, for eventual execution\n        self.validated_lists.append(&mut self.pending_lists);\n\n        Ok(())\n    }\n\n    async fn cleanup_lists(&mut self, list_paths: Vec<Utf8PathBuf>) {\n        for list_path in list_paths {\n            debug!(\"Removing deletion list {list_path}\");\n            tokio::fs::remove_file(&list_path)\n                .await\n                .fatal_err(\"remove deletion list\");\n        }\n    }\n\n    async fn flush(&mut self) -> Result<(), DeletionQueueError> {\n        tracing::debug!(\"Flushing with {} pending lists\", self.pending_lists.len());\n\n        // Issue any required generation validation calls to the control plane\n        self.validate().await?;\n\n        // After successful validation, nothing is pending: any lists that\n        // made it through validation will be in validated_lists.\n        assert!(self.pending_lists.is_empty());\n        self.pending_key_count = 0;\n\n        tracing::debug!(\n            \"Validation complete, have {} validated lists\",\n            self.validated_lists.len()\n        );\n\n        // Return quickly if we have no validated lists to execute.  This avoids flushing the\n        // executor when an idle backend hits its autoflush interval\n        if self.validated_lists.is_empty() {\n            return Ok(());\n        }\n\n        // Drain `validated_lists` into the executor\n        let mut executing_lists = Vec::new();\n        for list in self.validated_lists.drain(..) {\n            let list_path = self.conf.deletion_list_path(list.sequence);\n            let objects = list.into_remote_paths();\n            self.tx\n                .send(DeleterMessage::Delete(objects))\n                .await\n                .map_err(|_| DeletionQueueError::ShuttingDown)?;\n            executing_lists.push(list_path);\n        }\n\n        self.flush_executor().await?;\n\n        // Erase the deletion lists whose keys have all be deleted from remote storage\n        self.cleanup_lists(executing_lists).await;\n\n        Ok(())\n    }\n\n    async fn flush_executor(&mut self) -> Result<(), DeletionQueueError> {\n        // Flush the executor, so that all the keys referenced by these deletion lists\n        // are actually removed from remote storage.  This is a precondition to deleting\n        // the deletion lists themselves.\n        let (flush_op, rx) = FlushOp::new();\n        self.tx\n            .send(DeleterMessage::Flush(flush_op))\n            .await\n            .map_err(|_| DeletionQueueError::ShuttingDown)?;\n\n        rx.await.map_err(|_| DeletionQueueError::ShuttingDown)\n    }\n\n    pub(super) async fn background(&mut self) {\n        tracing::info!(\"Started deletion backend worker\");\n\n        while !self.cancel.is_cancelled() {\n            let msg = match tokio::time::timeout(AUTOFLUSH_INTERVAL, self.rx.recv()).await {\n                Ok(Some(m)) => m,\n                Ok(None) => {\n                    // All queue senders closed\n                    info!(\"Shutting down\");\n                    break;\n                }\n                Err(_) => {\n                    // Timeout, we hit deadline to execute whatever we have in hand.  These functions will\n                    // return immediately if no work is pending.\n                    match self.flush().await {\n                        Ok(()) => {}\n                        Err(DeletionQueueError::ShuttingDown) => {\n                            // If we are shutting down, then auto-flush can safely be skipped\n                        }\n                    }\n\n                    continue;\n                }\n            };\n\n            match msg {\n                ValidatorQueueMessage::Delete(list) => {\n                    if list.validated {\n                        // A pre-validated list may only be seen during recovery, if we are recovering\n                        // a DeletionList whose on-disk state has validated=true\n                        self.validated_lists.push(list)\n                    } else {\n                        self.pending_key_count += list.len();\n                        self.pending_lists.push(list);\n                    }\n\n                    if self.pending_key_count > AUTOFLUSH_KEY_COUNT {\n                        match self.flush().await {\n                            Ok(()) => {}\n                            Err(DeletionQueueError::ShuttingDown) => {\n                                // If we are shutting down, then auto-flush can safely be skipped\n                            }\n                        }\n                    }\n                }\n                ValidatorQueueMessage::Flush(op) => {\n                    match self.flush().await {\n                        Ok(()) => {\n                            op.notify();\n                        }\n                        Err(DeletionQueueError::ShuttingDown) => {\n                            // If we fail due to shutting down, we will just drop `op` to propagate that status.\n                        }\n                    }\n                }\n            }\n        }\n    }\n}\n"
  },
  {
    "path": "pageserver/src/deletion_queue.rs",
    "content": "mod deleter;\nmod list_writer;\nmod validator;\n\nuse std::collections::HashMap;\nuse std::sync::Arc;\nuse std::time::Duration;\n\nuse anyhow::Context;\nuse camino::Utf8PathBuf;\nuse deleter::DeleterMessage;\nuse list_writer::ListWriterQueueMessage;\nuse pageserver_api::shard::TenantShardId;\nuse remote_storage::{GenericRemoteStorage, RemotePath};\nuse serde::{Deserialize, Serialize};\nuse thiserror::Error;\nuse tokio_util::sync::CancellationToken;\nuse tracing::{Instrument, debug, error};\nuse utils::crashsafe::path_with_suffix_extension;\nuse utils::generation::Generation;\nuse utils::id::TimelineId;\nuse utils::lsn::{AtomicLsn, Lsn};\nuse validator::ValidatorQueueMessage;\n\nuse self::deleter::Deleter;\nuse self::list_writer::{DeletionOp, ListWriter, RecoverOp};\nuse self::validator::Validator;\nuse crate::config::PageServerConf;\nuse crate::controller_upcall_client::StorageControllerUpcallApi;\nuse crate::metrics;\nuse crate::tenant::remote_timeline_client::{LayerFileMetadata, remote_timeline_path};\nuse crate::tenant::storage_layer::LayerName;\nuse crate::virtual_file::{MaybeFatalIo, VirtualFile};\n\n// TODO: configurable for how long to wait before executing deletions\n\n/// We aggregate object deletions from many tenants in one place, for several reasons:\n/// - Coalesce deletions into fewer DeleteObjects calls\n/// - Enable Tenant/Timeline lifetimes to be shorter than the time it takes\n///   to flush any outstanding deletions.\n/// - Globally control throughput of deletions, as these are a low priority task: do\n///   not compete with the same S3 clients/connections used for higher priority uploads.\n/// - Enable gating deletions on validation of a tenant's generation number, to make\n///   it safe to multi-attach tenants (see docs/rfcs/025-generation-numbers.md)\n///\n/// There are two kinds of deletion: deferred and immediate.  A deferred deletion\n/// may be intentionally delayed to protect passive readers of S3 data, and is\n/// subject to a generation number validation step.  An immediate deletion is\n/// ready to execute immediately, and is only queued up so that it can be coalesced\n/// with other deletions in flight.\n///\n/// Deferred deletions pass through three steps:\n/// - ListWriter: accumulate deletion requests from Timelines, and batch them up into\n///   DeletionLists, which are persisted to disk.\n/// - Validator: accumulate deletion lists, and validate them en-masse prior to passing\n///   the keys in the list onward for actual deletion.  Also validate remote_consistent_lsn\n///   updates for running timelines.\n/// - Deleter: accumulate object keys that the validator has validated, and execute them in\n///   batches of 1000 keys via DeleteObjects.\n///\n/// Non-deferred deletions, such as during timeline deletion, bypass the first\n/// two stages and are passed straight into the Deleter.\n///\n/// Internally, each stage is joined by a channel to the next.  On disk, there is only\n/// one queue (of DeletionLists), which is written by the frontend and consumed\n/// by the backend.\n#[derive(Clone)]\npub struct DeletionQueue {\n    client: DeletionQueueClient,\n\n    // Parent cancellation token for the tokens passed into background workers\n    cancel: CancellationToken,\n}\n\n/// Opaque wrapper around individual worker tasks, to avoid making the\n/// worker objects themselves public\npub struct DeletionQueueWorkers<C>\nwhere\n    C: StorageControllerUpcallApi + Send + Sync,\n{\n    frontend: ListWriter,\n    backend: Validator<C>,\n    executor: Deleter,\n}\n\nimpl<C> DeletionQueueWorkers<C>\nwhere\n    C: StorageControllerUpcallApi + Send + Sync + 'static,\n{\n    pub fn spawn_with(mut self, runtime: &tokio::runtime::Handle) -> tokio::task::JoinHandle<()> {\n        let jh_frontend = runtime.spawn(async move {\n            self.frontend\n                .background()\n                .instrument(tracing::info_span!(parent:None, \"deletion frontend\"))\n                .await\n        });\n        let jh_backend = runtime.spawn(async move {\n            self.backend\n                .background()\n                .instrument(tracing::info_span!(parent:None, \"deletion backend\"))\n                .await\n        });\n        let jh_executor = runtime.spawn(async move {\n            self.executor\n                .background()\n                .instrument(tracing::info_span!(parent:None, \"deletion executor\"))\n                .await\n        });\n\n        runtime.spawn({\n            async move {\n                jh_frontend.await.expect(\"error joining frontend worker\");\n                jh_backend.await.expect(\"error joining backend worker\");\n                drop(jh_executor.await.expect(\"error joining executor worker\"));\n            }\n        })\n    }\n}\n\n/// A FlushOp is just a oneshot channel, where we send the transmit side down\n/// another channel, and the receive side will receive a message when the channel\n/// we're flushing has reached the FlushOp we sent into it.\n///\n/// The only extra behavior beyond the channel is that the notify() method does not\n/// return an error when the receive side has been dropped, because in this use case\n/// it is harmless (the code that initiated the flush no longer cares about the result).\n#[derive(Debug)]\nstruct FlushOp {\n    tx: tokio::sync::oneshot::Sender<()>,\n}\n\nimpl FlushOp {\n    fn new() -> (Self, tokio::sync::oneshot::Receiver<()>) {\n        let (tx, rx) = tokio::sync::oneshot::channel::<()>();\n        (Self { tx }, rx)\n    }\n\n    fn notify(self) {\n        if self.tx.send(()).is_err() {\n            // oneshot channel closed. This is legal: a client could be destroyed while waiting for a flush.\n            debug!(\"deletion queue flush from dropped client\");\n        };\n    }\n}\n\n#[derive(Clone, Debug)]\npub struct DeletionQueueClient {\n    tx: tokio::sync::mpsc::UnboundedSender<ListWriterQueueMessage>,\n    executor_tx: tokio::sync::mpsc::Sender<DeleterMessage>,\n\n    lsn_table: Arc<std::sync::RwLock<VisibleLsnUpdates>>,\n}\n\n#[derive(Debug, Serialize, Deserialize, PartialEq, Eq)]\nstruct TenantDeletionList {\n    /// For each Timeline, a list of key fragments to append to the timeline remote path\n    /// when reconstructing a full key\n    timelines: HashMap<TimelineId, Vec<String>>,\n\n    /// The generation in which this deletion was emitted: note that this may not be the\n    /// same as the generation of any layers being deleted.  The generation of the layer\n    /// has already been absorbed into the keys in `objects`\n    generation: Generation,\n}\n\nimpl TenantDeletionList {\n    pub(crate) fn len(&self) -> usize {\n        self.timelines.values().map(|v| v.len()).sum()\n    }\n}\n\n/// Files ending with this suffix will be ignored and erased\n/// during recovery as startup.\nconst TEMP_SUFFIX: &str = \"tmp\";\n\n#[derive(Debug, Serialize, Deserialize, PartialEq, Eq)]\nstruct DeletionList {\n    /// Serialization version, for future use\n    version: u8,\n\n    /// Used for constructing a unique key for each deletion list we write out.\n    sequence: u64,\n\n    /// To avoid repeating tenant/timeline IDs in every key, we store keys in\n    /// nested HashMaps by TenantTimelineID.  Each Tenant only appears once\n    /// with one unique generation ID: if someone tries to push a second generation\n    /// ID for the same tenant, we will start a new DeletionList.\n    tenants: HashMap<TenantShardId, TenantDeletionList>,\n\n    /// Avoid having to walk `tenants` to calculate the number of keys in\n    /// the nested deletion lists\n    size: usize,\n\n    /// Set to true when the list has undergone validation with the control\n    /// plane and the remaining contents of `tenants` are valid.  A list may\n    /// also be implicitly marked valid by DeletionHeader.validated_sequence\n    /// advancing to >= DeletionList.sequence\n    #[serde(default)]\n    #[serde(skip_serializing_if = \"std::ops::Not::not\")]\n    validated: bool,\n}\n\n#[derive(Debug, Serialize, Deserialize)]\nstruct DeletionHeader {\n    /// Serialization version, for future use\n    version: u8,\n\n    /// The highest sequence number (inclusive) that has been validated.  All deletion\n    /// lists on disk with a sequence <= this value are safe to execute.\n    validated_sequence: u64,\n}\n\nimpl DeletionHeader {\n    const VERSION_LATEST: u8 = 1;\n\n    fn new(validated_sequence: u64) -> Self {\n        Self {\n            version: Self::VERSION_LATEST,\n            validated_sequence,\n        }\n    }\n\n    async fn save(&self, conf: &'static PageServerConf) -> anyhow::Result<()> {\n        debug!(\"Saving deletion list header {:?}\", self);\n        let header_bytes = serde_json::to_vec(self).context(\"serialize deletion header\")?;\n        let header_path = conf.deletion_header_path();\n        let temp_path = path_with_suffix_extension(&header_path, TEMP_SUFFIX);\n        VirtualFile::crashsafe_overwrite(header_path, temp_path, header_bytes)\n            .await\n            .maybe_fatal_err(\"save deletion header\")?;\n\n        Ok(())\n    }\n}\n\nimpl DeletionList {\n    const VERSION_LATEST: u8 = 1;\n    fn new(sequence: u64) -> Self {\n        Self {\n            version: Self::VERSION_LATEST,\n            sequence,\n            tenants: HashMap::new(),\n            size: 0,\n            validated: false,\n        }\n    }\n\n    fn is_empty(&self) -> bool {\n        self.tenants.is_empty()\n    }\n\n    fn len(&self) -> usize {\n        self.size\n    }\n\n    /// Returns true if the push was accepted, false if the caller must start a new\n    /// deletion list.\n    fn push(\n        &mut self,\n        tenant: &TenantShardId,\n        timeline: &TimelineId,\n        generation: Generation,\n        objects: &mut Vec<RemotePath>,\n    ) -> bool {\n        if objects.is_empty() {\n            // Avoid inserting an empty TimelineDeletionList: this preserves the property\n            // that if we have no keys, then self.objects is empty (used in Self::is_empty)\n            return true;\n        }\n\n        let tenant_entry = self\n            .tenants\n            .entry(*tenant)\n            .or_insert_with(|| TenantDeletionList {\n                timelines: HashMap::new(),\n                generation,\n            });\n\n        if tenant_entry.generation != generation {\n            // Only one generation per tenant per list: signal to\n            // caller to start a new list.\n            return false;\n        }\n\n        let timeline_entry = tenant_entry.timelines.entry(*timeline).or_default();\n\n        let timeline_remote_path = remote_timeline_path(tenant, timeline);\n\n        self.size += objects.len();\n        timeline_entry.extend(objects.drain(..).map(|p| {\n            p.strip_prefix(&timeline_remote_path)\n                .expect(\"Timeline paths always start with the timeline prefix\")\n                .to_string()\n        }));\n        true\n    }\n\n    fn into_remote_paths(self) -> Vec<RemotePath> {\n        let mut result = Vec::new();\n        for (tenant, tenant_deletions) in self.tenants.into_iter() {\n            for (timeline, timeline_layers) in tenant_deletions.timelines.into_iter() {\n                let timeline_remote_path = remote_timeline_path(&tenant, &timeline);\n                result.extend(\n                    timeline_layers\n                        .into_iter()\n                        .map(|l| timeline_remote_path.join(Utf8PathBuf::from(l))),\n                );\n            }\n        }\n\n        result\n    }\n\n    async fn save(&self, conf: &'static PageServerConf) -> anyhow::Result<()> {\n        let path = conf.deletion_list_path(self.sequence);\n        let temp_path = path_with_suffix_extension(&path, TEMP_SUFFIX);\n\n        let bytes = serde_json::to_vec(self).expect(\"Failed to serialize deletion list\");\n\n        VirtualFile::crashsafe_overwrite(path, temp_path, bytes)\n            .await\n            .maybe_fatal_err(\"save deletion list\")\n            .map_err(Into::into)\n    }\n}\n\nimpl std::fmt::Display for DeletionList {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        write!(\n            f,\n            \"DeletionList<seq={}, tenants={}, keys={}>\",\n            self.sequence,\n            self.tenants.len(),\n            self.size\n        )\n    }\n}\n\nstruct PendingLsn {\n    projected: Lsn,\n    result_slot: Arc<AtomicLsn>,\n}\n\nstruct TenantLsnState {\n    timelines: HashMap<TimelineId, PendingLsn>,\n\n    // In what generation was the most recent update proposed?\n    generation: Generation,\n}\n\n#[derive(Default)]\nstruct VisibleLsnUpdates {\n    tenants: HashMap<TenantShardId, TenantLsnState>,\n}\n\nimpl VisibleLsnUpdates {\n    fn new() -> Self {\n        Self {\n            tenants: HashMap::new(),\n        }\n    }\n}\n\nimpl std::fmt::Debug for VisibleLsnUpdates {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        write!(f, \"VisibleLsnUpdates({} tenants)\", self.tenants.len())\n    }\n}\n\n#[derive(Error, Debug)]\npub enum DeletionQueueError {\n    #[error(\"Deletion queue unavailable during shutdown\")]\n    ShuttingDown,\n}\n\nimpl DeletionQueueClient {\n    /// This is cancel-safe.  If you drop the future before it completes, the message\n    /// is not pushed, although in the context of the deletion queue it doesn't matter: once\n    /// we decide to do a deletion the decision is always final.\n    fn do_push<T>(\n        &self,\n        queue: &tokio::sync::mpsc::UnboundedSender<T>,\n        msg: T,\n    ) -> Result<(), DeletionQueueError> {\n        match queue.send(msg) {\n            Ok(_) => Ok(()),\n            Err(e) => {\n                // This shouldn't happen, we should shut down all tenants before\n                // we shut down the global delete queue.  If we encounter a bug like this,\n                // we may leak objects as deletions won't be processed.\n                error!(\"Deletion queue closed while pushing, shutting down? ({e})\");\n                Err(DeletionQueueError::ShuttingDown)\n            }\n        }\n    }\n\n    pub(crate) fn recover(\n        &self,\n        attached_tenants: HashMap<TenantShardId, Generation>,\n    ) -> Result<(), DeletionQueueError> {\n        self.do_push(\n            &self.tx,\n            ListWriterQueueMessage::Recover(RecoverOp { attached_tenants }),\n        )\n    }\n\n    /// When a Timeline wishes to update the remote_consistent_lsn that it exposes to the outside\n    /// world, it must validate its generation number before doing so.  Rather than do this synchronously,\n    /// we allow the timeline to publish updates at will via this API, and then read back what LSN was most\n    /// recently validated separately.\n    ///\n    /// In this function we publish the LSN to the `projected` field of the timeline's entry in the VisibleLsnUpdates.  The\n    /// backend will later wake up and notice that the tenant's generation requires validation.\n    pub(crate) async fn update_remote_consistent_lsn(\n        &self,\n        tenant_shard_id: TenantShardId,\n        timeline_id: TimelineId,\n        current_generation: Generation,\n        lsn: Lsn,\n        result_slot: Arc<AtomicLsn>,\n    ) {\n        let mut locked = self\n            .lsn_table\n            .write()\n            .expect(\"Lock should never be poisoned\");\n\n        let tenant_entry = locked\n            .tenants\n            .entry(tenant_shard_id)\n            .or_insert(TenantLsnState {\n                timelines: HashMap::new(),\n                generation: current_generation,\n            });\n\n        if tenant_entry.generation != current_generation {\n            // Generation might have changed if we were detached and then re-attached: in this case,\n            // state from the previous generation cannot be trusted.\n            tenant_entry.timelines.clear();\n            tenant_entry.generation = current_generation;\n        }\n\n        tenant_entry.timelines.insert(\n            timeline_id,\n            PendingLsn {\n                projected: lsn,\n                result_slot,\n            },\n        );\n    }\n\n    /// Submit a list of layers for deletion: this function will return before the deletion is\n    /// persistent, but it may be executed at any time after this function enters: do not push\n    /// layers until you're sure they can be deleted safely (i.e. remote metadata no longer\n    /// references them).\n    ///\n    /// The `current_generation` is the generation of this pageserver's current attachment.  The\n    /// generations in `layers` are the generations in which those layers were written.\n    pub(crate) fn push_layers(\n        &self,\n        tenant_shard_id: TenantShardId,\n        timeline_id: TimelineId,\n        current_generation: Generation,\n        layers: Vec<(LayerName, LayerFileMetadata)>,\n    ) -> Result<(), DeletionQueueError> {\n        // None generations are not valid for attached tenants: they must always be attached in\n        // a known generation.  None generations are still permitted for layers in the index because\n        // they may be historical.\n        assert!(!current_generation.is_none());\n\n        metrics::DELETION_QUEUE\n            .keys_submitted\n            .inc_by(layers.len() as u64);\n        self.do_push(\n            &self.tx,\n            ListWriterQueueMessage::Delete(DeletionOp {\n                tenant_shard_id,\n                timeline_id,\n                layers,\n                generation: current_generation,\n                objects: Vec::new(),\n            }),\n        )\n    }\n\n    /// This is cancel-safe.  If you drop the future the flush may still happen in the background.\n    async fn do_flush<T>(\n        &self,\n        queue: &tokio::sync::mpsc::UnboundedSender<T>,\n        msg: T,\n        rx: tokio::sync::oneshot::Receiver<()>,\n    ) -> Result<(), DeletionQueueError> {\n        self.do_push(queue, msg)?;\n        if rx.await.is_err() {\n            // This shouldn't happen if tenants are shut down before deletion queue.  If we\n            // encounter a bug like this, then a flusher will incorrectly believe it has flushed\n            // when it hasn't, possibly leading to leaking objects.\n            error!(\"Deletion queue dropped flush op while client was still waiting\");\n            Err(DeletionQueueError::ShuttingDown)\n        } else {\n            Ok(())\n        }\n    }\n\n    /// Wait until all previous deletions are persistent (either executed, or written to a DeletionList)\n    ///\n    /// This is cancel-safe.  If you drop the future the flush may still happen in the background.\n    pub async fn flush(&self) -> Result<(), DeletionQueueError> {\n        let (flush_op, rx) = FlushOp::new();\n        self.do_flush(&self.tx, ListWriterQueueMessage::Flush(flush_op), rx)\n            .await\n    }\n\n    /// Issue a flush without waiting for it to complete.  This is useful on advisory flushes where\n    /// the caller wants to avoid the risk of waiting for lots of enqueued work, such as on tenant\n    /// detach where flushing is nice but not necessary.\n    ///\n    /// This function provides no guarantees of work being done.\n    pub fn flush_advisory(&self) {\n        let (flush_op, _) = FlushOp::new();\n\n        // Transmit the flush message, ignoring any result (such as a closed channel during shutdown).\n        drop(self.tx.send(ListWriterQueueMessage::FlushExecute(flush_op)));\n    }\n\n    // Wait until all previous deletions are executed\n    pub(crate) async fn flush_execute(&self) -> Result<(), DeletionQueueError> {\n        debug!(\"flush_execute: flushing to deletion lists...\");\n        // Flush any buffered work to deletion lists\n        self.flush().await?;\n\n        // Flush the backend into the executor of deletion lists\n        let (flush_op, rx) = FlushOp::new();\n        debug!(\"flush_execute: flushing backend...\");\n        self.do_flush(&self.tx, ListWriterQueueMessage::FlushExecute(flush_op), rx)\n            .await?;\n        debug!(\"flush_execute: finished flushing backend...\");\n\n        // Flush any immediate-mode deletions (the above backend flush will only flush\n        // the executor if deletions had flowed through the backend)\n        debug!(\"flush_execute: flushing execution...\");\n        self.flush_immediate().await?;\n        debug!(\"flush_execute: finished flushing execution...\");\n        Ok(())\n    }\n\n    /// This interface bypasses the persistent deletion queue, and any validation\n    /// that this pageserver is still elegible to execute the deletions.  It is for\n    /// use in timeline deletions, where the control plane is telling us we may\n    /// delete everything in the timeline.\n    ///\n    /// DO NOT USE THIS FROM GC OR COMPACTION CODE.  Use the regular `push_layers`.\n    pub(crate) async fn push_immediate(\n        &self,\n        objects: Vec<RemotePath>,\n    ) -> Result<(), DeletionQueueError> {\n        metrics::DELETION_QUEUE\n            .keys_submitted\n            .inc_by(objects.len() as u64);\n        self.executor_tx\n            .send(DeleterMessage::Delete(objects))\n            .await\n            .map_err(|_| DeletionQueueError::ShuttingDown)\n    }\n\n    /// Companion to push_immediate.  When this returns Ok, all prior objects sent\n    /// into push_immediate have been deleted from remote storage.\n    pub(crate) async fn flush_immediate(&self) -> Result<(), DeletionQueueError> {\n        let (flush_op, rx) = FlushOp::new();\n        self.executor_tx\n            .send(DeleterMessage::Flush(flush_op))\n            .await\n            .map_err(|_| DeletionQueueError::ShuttingDown)?;\n\n        rx.await.map_err(|_| DeletionQueueError::ShuttingDown)\n    }\n}\n\nimpl DeletionQueue {\n    pub fn new_client(&self) -> DeletionQueueClient {\n        self.client.clone()\n    }\n\n    /// Caller may use the returned object to construct clients with new_client.\n    /// Caller should tokio::spawn the background() members of the two worker objects returned:\n    /// we don't spawn those inside new() so that the caller can use their runtime/spans of choice.\n    pub fn new<C>(\n        remote_storage: GenericRemoteStorage,\n        controller_upcall_client: C,\n        conf: &'static PageServerConf,\n    ) -> (Self, DeletionQueueWorkers<C>)\n    where\n        C: StorageControllerUpcallApi + Send + Sync,\n    {\n        // Unbounded channel: enables non-async functions to submit deletions.  The actual length is\n        // constrained by how promptly the ListWriter wakes up and drains it, which should be frequent\n        // enough to avoid this taking pathologically large amount of memory.\n        let (tx, rx) = tokio::sync::mpsc::unbounded_channel();\n\n        // Shallow channel: it carries DeletionLists which each contain up to thousands of deletions\n        let (backend_tx, backend_rx) = tokio::sync::mpsc::channel(16);\n\n        // Shallow channel: it carries lists of paths, and we expect the main queueing to\n        // happen in the backend (persistent), not in this queue.\n        let (executor_tx, executor_rx) = tokio::sync::mpsc::channel(16);\n\n        let lsn_table = Arc::new(std::sync::RwLock::new(VisibleLsnUpdates::new()));\n\n        // The deletion queue has an independent cancellation token to\n        // the general pageserver shutdown token, because it stays alive a bit\n        // longer to flush after Tenants have all been torn down.\n        let cancel = CancellationToken::new();\n\n        (\n            Self {\n                client: DeletionQueueClient {\n                    tx,\n                    executor_tx: executor_tx.clone(),\n                    lsn_table: lsn_table.clone(),\n                },\n                cancel: cancel.clone(),\n            },\n            DeletionQueueWorkers {\n                frontend: ListWriter::new(conf, rx, backend_tx, cancel.clone()),\n                backend: Validator::new(\n                    conf,\n                    backend_rx,\n                    executor_tx,\n                    controller_upcall_client,\n                    lsn_table.clone(),\n                    cancel.clone(),\n                ),\n                executor: Deleter::new(remote_storage, executor_rx, cancel.clone()),\n            },\n        )\n    }\n\n    pub async fn shutdown(&mut self, timeout: Duration) {\n        match tokio::time::timeout(timeout, self.client.flush()).await {\n            Ok(Ok(())) => {\n                tracing::info!(\"Deletion queue flushed successfully on shutdown\")\n            }\n            Ok(Err(DeletionQueueError::ShuttingDown)) => {\n                // This is not harmful for correctness, but is unexpected: the deletion\n                // queue's workers should stay alive as long as there are any client handles instantiated.\n                tracing::warn!(\"Deletion queue stopped prematurely\");\n            }\n            Err(_timeout) => {\n                tracing::warn!(\"Timed out flushing deletion queue on shutdown\")\n            }\n        }\n\n        // We only cancel _after_ flushing: otherwise we would be shutting down the\n        // components that do the flush.\n        self.cancel.cancel();\n    }\n}\n\n#[cfg(test)]\nmod test {\n    use std::io::ErrorKind;\n    use std::time::Duration;\n\n    use camino::Utf8Path;\n    use hex_literal::hex;\n    use pageserver_api::key::Key;\n    use pageserver_api::models::ShardImportStatus;\n    use pageserver_api::shard::ShardIndex;\n    use pageserver_api::upcall_api::ReAttachResponseTenant;\n    use remote_storage::{RemoteStorageConfig, RemoteStorageKind};\n    use tokio::task::JoinHandle;\n    use tracing::info;\n\n    use super::*;\n    use crate::controller_upcall_client::RetryForeverError;\n    use crate::tenant::harness::TenantHarness;\n    use crate::tenant::storage_layer::DeltaLayerName;\n    pub const TIMELINE_ID: TimelineId =\n        TimelineId::from_array(hex!(\"11223344556677881122334455667788\"));\n\n    pub const EXAMPLE_LAYER_NAME: LayerName = LayerName::Delta(DeltaLayerName {\n        key_range: Key::from_i128(0x0)..Key::from_i128(0xFFFFFFFFFFFFFFFF),\n        lsn_range: Lsn(0x00000000016B59D8)..Lsn(0x00000000016B5A51),\n    });\n\n    // When you need a second layer in a test.\n    pub const EXAMPLE_LAYER_NAME_ALT: LayerName = LayerName::Delta(DeltaLayerName {\n        key_range: Key::from_i128(0x0)..Key::from_i128(0xFFFFFFFFFFFFFFFF),\n        lsn_range: Lsn(0x00000000016B5A51)..Lsn(0x00000000016B5A61),\n    });\n\n    struct TestSetup {\n        harness: TenantHarness,\n        remote_fs_dir: Utf8PathBuf,\n        storage: GenericRemoteStorage,\n        mock_control_plane: MockStorageController,\n        deletion_queue: DeletionQueue,\n        worker_join: JoinHandle<()>,\n    }\n\n    impl TestSetup {\n        /// Simulate a pageserver restart by destroying and recreating the deletion queue\n        async fn restart(&mut self) {\n            let (deletion_queue, workers) = DeletionQueue::new(\n                self.storage.clone(),\n                self.mock_control_plane.clone(),\n                self.harness.conf,\n            );\n\n            tracing::debug!(\"Spawning worker for new queue queue\");\n            let worker_join = workers.spawn_with(&tokio::runtime::Handle::current());\n\n            let old_worker_join = std::mem::replace(&mut self.worker_join, worker_join);\n            let old_deletion_queue = std::mem::replace(&mut self.deletion_queue, deletion_queue);\n\n            tracing::debug!(\"Joining worker from previous queue\");\n            old_deletion_queue.cancel.cancel();\n            old_worker_join\n                .await\n                .expect(\"Failed to join workers for previous deletion queue\");\n        }\n\n        fn set_latest_generation(&self, gen_: Generation) {\n            let tenant_shard_id = self.harness.tenant_shard_id;\n            self.mock_control_plane\n                .latest_generation\n                .lock()\n                .unwrap()\n                .insert(tenant_shard_id, gen_);\n        }\n\n        /// Returns remote layer file name, suitable for use in assert_remote_files\n        fn write_remote_layer(\n            &self,\n            file_name: LayerName,\n            gen_: Generation,\n        ) -> anyhow::Result<String> {\n            let tenant_shard_id = self.harness.tenant_shard_id;\n            let relative_remote_path = remote_timeline_path(&tenant_shard_id, &TIMELINE_ID);\n            let remote_timeline_path = self.remote_fs_dir.join(relative_remote_path.get_path());\n            std::fs::create_dir_all(&remote_timeline_path)?;\n            let remote_layer_file_name = format!(\"{}{}\", file_name, gen_.get_suffix());\n\n            let content: Vec<u8> = format!(\"placeholder contents of {file_name}\").into();\n\n            std::fs::write(\n                remote_timeline_path.join(remote_layer_file_name.clone()),\n                content,\n            )?;\n\n            Ok(remote_layer_file_name)\n        }\n    }\n\n    #[derive(Debug, Clone)]\n    struct MockStorageController {\n        pub latest_generation: std::sync::Arc<std::sync::Mutex<HashMap<TenantShardId, Generation>>>,\n    }\n\n    impl MockStorageController {\n        fn new() -> Self {\n            Self {\n                latest_generation: Arc::default(),\n            }\n        }\n    }\n\n    impl StorageControllerUpcallApi for MockStorageController {\n        async fn re_attach(\n            &self,\n            _conf: &PageServerConf,\n            _empty_local_disk: bool,\n        ) -> Result<HashMap<TenantShardId, ReAttachResponseTenant>, RetryForeverError> {\n            unimplemented!()\n        }\n\n        async fn validate(\n            &self,\n            tenants: Vec<(TenantShardId, Generation)>,\n        ) -> Result<HashMap<TenantShardId, bool>, RetryForeverError> {\n            let mut result = HashMap::new();\n\n            let latest_generation = self.latest_generation.lock().unwrap();\n\n            for (tenant_shard_id, generation) in tenants {\n                if let Some(latest) = latest_generation.get(&tenant_shard_id) {\n                    result.insert(tenant_shard_id, *latest == generation);\n                }\n            }\n\n            Ok(result)\n        }\n\n        async fn put_timeline_import_status(\n            &self,\n            _tenant_shard_id: TenantShardId,\n            _timeline_id: TimelineId,\n            _generation: Generation,\n            _status: pageserver_api::models::ShardImportStatus,\n        ) -> Result<(), RetryForeverError> {\n            unimplemented!()\n        }\n\n        async fn get_timeline_import_status(\n            &self,\n            _tenant_shard_id: TenantShardId,\n            _timeline_id: TimelineId,\n            _generation: Generation,\n        ) -> Result<ShardImportStatus, RetryForeverError> {\n            unimplemented!()\n        }\n    }\n\n    async fn setup(test_name: &str) -> anyhow::Result<TestSetup> {\n        let test_name = Box::leak(Box::new(format!(\"deletion_queue__{test_name}\")));\n        let harness = TenantHarness::create(test_name).await?;\n\n        // We do not load() the harness: we only need its config and remote_storage\n\n        // Set up a GenericRemoteStorage targetting a directory\n        let remote_fs_dir = harness.conf.workdir.join(\"remote_fs\");\n        std::fs::create_dir_all(remote_fs_dir)?;\n        let remote_fs_dir = harness.conf.workdir.join(\"remote_fs\").canonicalize_utf8()?;\n        let storage_config = RemoteStorageConfig {\n            storage: RemoteStorageKind::LocalFs {\n                local_path: remote_fs_dir.clone(),\n            },\n            timeout: RemoteStorageConfig::DEFAULT_TIMEOUT,\n            small_timeout: RemoteStorageConfig::DEFAULT_SMALL_TIMEOUT,\n        };\n        let storage = GenericRemoteStorage::from_config(&storage_config)\n            .await\n            .unwrap();\n\n        let mock_control_plane = MockStorageController::new();\n\n        let (deletion_queue, worker) =\n            DeletionQueue::new(storage.clone(), mock_control_plane.clone(), harness.conf);\n\n        let worker_join = worker.spawn_with(&tokio::runtime::Handle::current());\n\n        Ok(TestSetup {\n            harness,\n            remote_fs_dir,\n            storage,\n            mock_control_plane,\n            deletion_queue,\n            worker_join,\n        })\n    }\n\n    // TODO: put this in a common location so that we can share with remote_timeline_client's tests\n    fn assert_remote_files(expected: &[&str], remote_path: &Utf8Path) {\n        let mut expected: Vec<String> = expected.iter().map(|x| String::from(*x)).collect();\n        expected.sort();\n\n        let mut found: Vec<String> = Vec::new();\n        let dir = match std::fs::read_dir(remote_path) {\n            Ok(d) => d,\n            Err(e) => {\n                if e.kind() == ErrorKind::NotFound {\n                    if expected.is_empty() {\n                        // We are asserting prefix is empty: it is expected that the dir is missing\n                        return;\n                    } else {\n                        assert_eq!(expected, Vec::<String>::new());\n                        unreachable!();\n                    }\n                } else {\n                    panic!(\"Unexpected error listing {remote_path}: {e}\");\n                }\n            }\n        };\n\n        for entry in dir.flatten() {\n            let entry_name = entry.file_name();\n            let fname = entry_name.to_str().unwrap();\n            found.push(String::from(fname));\n        }\n        found.sort();\n\n        assert_eq!(expected, found);\n    }\n\n    fn assert_local_files(expected: &[&str], directory: &Utf8Path) {\n        let dir = match std::fs::read_dir(directory) {\n            Ok(d) => d,\n            Err(_) => {\n                assert_eq!(expected, &Vec::<String>::new());\n                return;\n            }\n        };\n        let mut found = Vec::new();\n        for dentry in dir {\n            let dentry = dentry.unwrap();\n            let file_name = dentry.file_name();\n            let file_name_str = file_name.to_string_lossy();\n            found.push(file_name_str.to_string());\n        }\n        found.sort();\n        assert_eq!(expected, found);\n    }\n\n    #[tokio::test]\n    async fn deletion_queue_smoke() -> anyhow::Result<()> {\n        // Basic test that the deletion queue processes the deletions we pass into it\n        let ctx = setup(\"deletion_queue_smoke\")\n            .await\n            .expect(\"Failed test setup\");\n        let client = ctx.deletion_queue.new_client();\n        client.recover(HashMap::new())?;\n\n        let layer_file_name_1: LayerName = \"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51\".parse().unwrap();\n        let tenant_shard_id = ctx.harness.tenant_shard_id;\n\n        let content: Vec<u8> = \"victim1 contents\".into();\n        let relative_remote_path = remote_timeline_path(&tenant_shard_id, &TIMELINE_ID);\n        let remote_timeline_path = ctx.remote_fs_dir.join(relative_remote_path.get_path());\n        let deletion_prefix = ctx.harness.conf.deletion_prefix();\n\n        // Exercise the distinction between the generation of the layers\n        // we delete, and the generation of the running Tenant.\n        let layer_generation = Generation::new(0xdeadbeef);\n        let now_generation = Generation::new(0xfeedbeef);\n        let layer_metadata =\n            LayerFileMetadata::new(0xf00, layer_generation, ShardIndex::unsharded());\n\n        let remote_layer_file_name_1 =\n            format!(\"{}{}\", layer_file_name_1, layer_generation.get_suffix());\n\n        // Set mock control plane state to valid for our generation\n        ctx.set_latest_generation(now_generation);\n\n        // Inject a victim file to remote storage\n        info!(\"Writing\");\n        std::fs::create_dir_all(&remote_timeline_path)?;\n        std::fs::write(\n            remote_timeline_path.join(remote_layer_file_name_1.clone()),\n            content,\n        )?;\n        assert_remote_files(&[&remote_layer_file_name_1], &remote_timeline_path);\n\n        // File should still be there after we push it to the queue (we haven't pushed enough to flush anything)\n        info!(\"Pushing\");\n        client.push_layers(\n            tenant_shard_id,\n            TIMELINE_ID,\n            now_generation,\n            [(layer_file_name_1.clone(), layer_metadata)].to_vec(),\n        )?;\n        assert_remote_files(&[&remote_layer_file_name_1], &remote_timeline_path);\n\n        assert_local_files(&[], &deletion_prefix);\n\n        // File should still be there after we write a deletion list (we haven't pushed enough to execute anything)\n        info!(\"Flushing\");\n        client.flush().await?;\n        assert_remote_files(&[&remote_layer_file_name_1], &remote_timeline_path);\n        assert_local_files(&[\"0000000000000001-01.list\"], &deletion_prefix);\n\n        // File should go away when we execute\n        info!(\"Flush-executing\");\n        client.flush_execute().await?;\n        assert_remote_files(&[], &remote_timeline_path);\n        assert_local_files(&[\"header-01\"], &deletion_prefix);\n\n        // Flushing on an empty queue should succeed immediately, and not write any lists\n        info!(\"Flush-executing on empty\");\n        client.flush_execute().await?;\n        assert_local_files(&[\"header-01\"], &deletion_prefix);\n\n        Ok(())\n    }\n\n    #[tokio::test]\n    async fn deletion_queue_validation() -> anyhow::Result<()> {\n        let ctx = setup(\"deletion_queue_validation\")\n            .await\n            .expect(\"Failed test setup\");\n        let client = ctx.deletion_queue.new_client();\n        client.recover(HashMap::new())?;\n\n        // Generation that the control plane thinks is current\n        let latest_generation = Generation::new(0xdeadbeef);\n        // Generation that our DeletionQueue thinks the tenant is running with\n        let stale_generation = latest_generation.previous();\n        // Generation that our example layer file was written with\n        let layer_generation = stale_generation.previous();\n        let layer_metadata =\n            LayerFileMetadata::new(0xf00, layer_generation, ShardIndex::unsharded());\n\n        ctx.set_latest_generation(latest_generation);\n\n        let tenant_shard_id = ctx.harness.tenant_shard_id;\n        let relative_remote_path = remote_timeline_path(&tenant_shard_id, &TIMELINE_ID);\n        let remote_timeline_path = ctx.remote_fs_dir.join(relative_remote_path.get_path());\n\n        // Initial state: a remote layer exists\n        let remote_layer_name = ctx.write_remote_layer(EXAMPLE_LAYER_NAME, layer_generation)?;\n        assert_remote_files(&[&remote_layer_name], &remote_timeline_path);\n\n        tracing::debug!(\"Pushing...\");\n        client.push_layers(\n            tenant_shard_id,\n            TIMELINE_ID,\n            stale_generation,\n            [(EXAMPLE_LAYER_NAME.clone(), layer_metadata.clone())].to_vec(),\n        )?;\n\n        // We enqueued the operation in a stale generation: it should have failed validation\n        tracing::debug!(\"Flushing...\");\n        tokio::time::timeout(Duration::from_secs(5), client.flush_execute()).await??;\n        assert_remote_files(&[&remote_layer_name], &remote_timeline_path);\n\n        tracing::debug!(\"Pushing...\");\n        client.push_layers(\n            tenant_shard_id,\n            TIMELINE_ID,\n            latest_generation,\n            [(EXAMPLE_LAYER_NAME.clone(), layer_metadata.clone())].to_vec(),\n        )?;\n\n        // We enqueued the operation in a fresh generation: it should have passed validation\n        tracing::debug!(\"Flushing...\");\n        tokio::time::timeout(Duration::from_secs(5), client.flush_execute()).await??;\n        assert_remote_files(&[], &remote_timeline_path);\n\n        Ok(())\n    }\n\n    #[tokio::test]\n    async fn deletion_queue_recovery() -> anyhow::Result<()> {\n        // Basic test that the deletion queue processes the deletions we pass into it\n        let mut ctx = setup(\"deletion_queue_recovery\")\n            .await\n            .expect(\"Failed test setup\");\n        let client = ctx.deletion_queue.new_client();\n        client.recover(HashMap::new())?;\n\n        let tenant_shard_id = ctx.harness.tenant_shard_id;\n\n        let relative_remote_path = remote_timeline_path(&tenant_shard_id, &TIMELINE_ID);\n        let remote_timeline_path = ctx.remote_fs_dir.join(relative_remote_path.get_path());\n        let deletion_prefix = ctx.harness.conf.deletion_prefix();\n\n        let layer_generation = Generation::new(0xdeadbeef);\n        let now_generation = Generation::new(0xfeedbeef);\n        let layer_metadata =\n            LayerFileMetadata::new(0xf00, layer_generation, ShardIndex::unsharded());\n\n        // Inject a deletion in the generation before generation_now: after restart,\n        // this deletion should _not_ get executed (only the immediately previous\n        // generation gets that treatment)\n        let remote_layer_file_name_historical =\n            ctx.write_remote_layer(EXAMPLE_LAYER_NAME, layer_generation)?;\n        client.push_layers(\n            tenant_shard_id,\n            TIMELINE_ID,\n            now_generation.previous(),\n            [(EXAMPLE_LAYER_NAME.clone(), layer_metadata.clone())].to_vec(),\n        )?;\n\n        // Inject a deletion in the generation before generation_now: after restart,\n        // this deletion should get executed, because we execute deletions in the\n        // immediately previous generation on the same node.\n        let remote_layer_file_name_previous =\n            ctx.write_remote_layer(EXAMPLE_LAYER_NAME_ALT, layer_generation)?;\n        client.push_layers(\n            tenant_shard_id,\n            TIMELINE_ID,\n            now_generation,\n            [(EXAMPLE_LAYER_NAME_ALT.clone(), layer_metadata.clone())].to_vec(),\n        )?;\n\n        client.flush().await?;\n        assert_remote_files(\n            &[\n                &remote_layer_file_name_historical,\n                &remote_layer_file_name_previous,\n            ],\n            &remote_timeline_path,\n        );\n\n        // Different generatinos for the same tenant will cause two separate\n        // deletion lists to be emitted.\n        assert_local_files(\n            &[\"0000000000000001-01.list\", \"0000000000000002-01.list\"],\n            &deletion_prefix,\n        );\n\n        // Simulate a node restart: the latest generation advances\n        let now_generation = now_generation.next();\n        ctx.set_latest_generation(now_generation);\n\n        // Restart the deletion queue\n        drop(client);\n        ctx.restart().await;\n        let client = ctx.deletion_queue.new_client();\n        client.recover(HashMap::from([(tenant_shard_id, now_generation)]))?;\n\n        info!(\"Flush-executing\");\n        client.flush_execute().await?;\n        // The deletion from immediately prior generation was executed, the one from\n        // an older generation was not.\n        assert_remote_files(&[&remote_layer_file_name_historical], &remote_timeline_path);\n        Ok(())\n    }\n}\n\n/// A lightweight queue which can issue ordinary DeletionQueueClient objects, but doesn't do any persistence\n/// or coalescing, and doesn't actually execute any deletions unless you call pump() to kick it.\n#[cfg(test)]\npub(crate) mod mock {\n    use std::sync::atomic::{AtomicUsize, Ordering};\n\n    use tracing::info;\n\n    use super::*;\n    use crate::tenant::remote_timeline_client::remote_layer_path;\n\n    pub struct ConsumerState {\n        rx: tokio::sync::mpsc::UnboundedReceiver<ListWriterQueueMessage>,\n        executor_rx: tokio::sync::mpsc::Receiver<DeleterMessage>,\n        cancel: CancellationToken,\n        executed: Arc<AtomicUsize>,\n    }\n\n    impl ConsumerState {\n        async fn consume(&mut self, remote_storage: &GenericRemoteStorage) {\n            info!(\"Executing all pending deletions\");\n\n            // Transform all executor messages to generic frontend messages\n            loop {\n                use either::Either;\n                let msg = tokio::select! {\n                    left = self.executor_rx.recv() => Either::Left(left),\n                    right = self.rx.recv() => Either::Right(right),\n                };\n                match msg {\n                    Either::Left(None) => break,\n                    Either::Right(None) => break,\n                    Either::Left(Some(DeleterMessage::Delete(objects))) => {\n                        for path in objects {\n                            match remote_storage.delete(&path, &self.cancel).await {\n                                Ok(_) => {\n                                    debug!(\"Deleted {path}\");\n                                }\n                                Err(e) => {\n                                    error!(\"Failed to delete {path}, leaking object! ({e})\");\n                                }\n                            }\n                            self.executed.fetch_add(1, Ordering::Relaxed);\n                        }\n                    }\n                    Either::Left(Some(DeleterMessage::Flush(flush_op))) => {\n                        flush_op.notify();\n                    }\n                    Either::Right(Some(ListWriterQueueMessage::Delete(op))) => {\n                        let mut objects = op.objects;\n                        for (layer, meta) in op.layers {\n                            objects.push(remote_layer_path(\n                                &op.tenant_shard_id.tenant_id,\n                                &op.timeline_id,\n                                meta.shard,\n                                &layer,\n                                meta.generation,\n                            ));\n                        }\n\n                        for path in objects {\n                            info!(\"Executing deletion {path}\");\n                            match remote_storage.delete(&path, &self.cancel).await {\n                                Ok(_) => {\n                                    debug!(\"Deleted {path}\");\n                                }\n                                Err(e) => {\n                                    error!(\"Failed to delete {path}, leaking object! ({e})\");\n                                }\n                            }\n                            self.executed.fetch_add(1, Ordering::Relaxed);\n                        }\n                    }\n                    Either::Right(Some(ListWriterQueueMessage::Flush(op))) => {\n                        op.notify();\n                    }\n                    Either::Right(Some(ListWriterQueueMessage::FlushExecute(op))) => {\n                        // We have already executed all prior deletions because mock does them inline\n                        op.notify();\n                    }\n                    Either::Right(Some(ListWriterQueueMessage::Recover(_))) => {\n                        // no-op in mock\n                    }\n                }\n            }\n        }\n    }\n\n    pub struct MockDeletionQueue {\n        tx: tokio::sync::mpsc::UnboundedSender<ListWriterQueueMessage>,\n        executor_tx: tokio::sync::mpsc::Sender<DeleterMessage>,\n        lsn_table: Arc<std::sync::RwLock<VisibleLsnUpdates>>,\n    }\n\n    impl MockDeletionQueue {\n        pub fn new(remote_storage: Option<GenericRemoteStorage>) -> Self {\n            let (tx, rx) = tokio::sync::mpsc::unbounded_channel();\n            let (executor_tx, executor_rx) = tokio::sync::mpsc::channel(16384);\n\n            let executed = Arc::new(AtomicUsize::new(0));\n\n            let mut consumer = ConsumerState {\n                rx,\n                executor_rx,\n                cancel: CancellationToken::new(),\n                executed: executed.clone(),\n            };\n\n            tokio::spawn(async move {\n                if let Some(remote_storage) = &remote_storage {\n                    consumer.consume(remote_storage).await;\n                }\n            });\n\n            Self {\n                tx,\n                executor_tx,\n                lsn_table: Arc::new(std::sync::RwLock::new(VisibleLsnUpdates::new())),\n            }\n        }\n\n        #[allow(clippy::await_holding_lock)]\n        pub async fn pump(&self) {\n            let (tx, rx) = tokio::sync::oneshot::channel();\n            self.executor_tx\n                .send(DeleterMessage::Flush(FlushOp { tx }))\n                .await\n                .expect(\"Failed to send flush message\");\n            rx.await.ok();\n        }\n\n        pub(crate) fn new_client(&self) -> DeletionQueueClient {\n            DeletionQueueClient {\n                tx: self.tx.clone(),\n                executor_tx: self.executor_tx.clone(),\n                lsn_table: self.lsn_table.clone(),\n            }\n        }\n    }\n\n    /// Test round-trip serialization/deserialization, and test stability of the format\n    /// vs. a static expected string for the serialized version.\n    #[test]\n    fn deletion_list_serialization() -> anyhow::Result<()> {\n        let tenant_id = \"ad6c1a56f5680419d3a16ff55d97ec3c\"\n            .to_string()\n            .parse::<TenantShardId>()?;\n        let timeline_id = \"be322c834ed9e709e63b5c9698691910\"\n            .to_string()\n            .parse::<TimelineId>()?;\n        let generation = Generation::new(123);\n\n        let object =\n            RemotePath::from_string(&format!(\"tenants/{tenant_id}/timelines/{timeline_id}/foo\"))?;\n        let mut objects = [object].to_vec();\n\n        let mut example = DeletionList::new(1);\n        example.push(&tenant_id, &timeline_id, generation, &mut objects);\n\n        let encoded = serde_json::to_string(&example)?;\n\n        let expected = \"{\\\"version\\\":1,\\\"sequence\\\":1,\\\"tenants\\\":{\\\"ad6c1a56f5680419d3a16ff55d97ec3c\\\":{\\\"timelines\\\":{\\\"be322c834ed9e709e63b5c9698691910\\\":[\\\"foo\\\"]},\\\"generation\\\":123}},\\\"size\\\":1}\".to_string();\n        assert_eq!(encoded, expected);\n\n        let decoded = serde_json::from_str::<DeletionList>(&encoded)?;\n        assert_eq!(example, decoded);\n\n        Ok(())\n    }\n}\n"
  },
  {
    "path": "pageserver/src/disk_usage_eviction_task.rs",
    "content": "//! This module implements the pageserver-global disk-usage-based layer eviction task.\n//!\n//! # Mechanics\n//!\n//! Function `launch_disk_usage_global_eviction_task` starts a pageserver-global background\n//! loop that evicts layers in response to a shortage of available bytes\n//! in the $repo/tenants directory's filesystem.\n//!\n//! The loop runs periodically at a configurable `period`.\n//!\n//! Each loop iteration uses `statvfs` to determine filesystem-level space usage.\n//! It compares the returned usage data against two different types of thresholds.\n//! The iteration tries to evict layers until app-internal accounting says we should be below the thresholds.\n//! We cross-check this internal accounting with the real world by making another `statvfs` at the end of the iteration.\n//! We're good if that second statvfs shows that we're _actually_ below the configured thresholds.\n//! If we're still above one or more thresholds, we emit a warning log message, leaving it to the operator to investigate further.\n//!\n//! # Eviction Policy\n//!\n//! There are two thresholds:\n//! `max_usage_pct` is the relative available space, expressed in percent of the total filesystem space.\n//! If the actual usage is higher, the threshold is exceeded.\n//! `min_avail_bytes` is the absolute available space in bytes.\n//! If the actual usage is lower, the threshold is exceeded.\n//! If either of these thresholds is exceeded, the system is considered to have \"disk pressure\", and eviction\n//! is performed on the next iteration, to release disk space and bring the usage below the thresholds again.\n//! The iteration evicts layers in LRU fashion, but, with a weak reservation per tenant.\n//! The reservation is to keep the most recently accessed X bytes per tenant resident.\n//! If we cannot relieve pressure by evicting layers outside of the reservation, we\n//! start evicting layers that are part of the reservation, LRU first.\n//!\n//! The value for the per-tenant reservation is referred to as `tenant_min_resident_size`\n//! throughout the code, but, no actual variable carries that name.\n//! The per-tenant default value is the `max(tenant's layer file sizes, regardless of local or remote)`.\n//! The idea is to allow at least one layer to be resident per tenant, to ensure it can make forward progress\n//! during page reconstruction.\n//! An alternative default for all tenants can be specified in the `tenant_config` section of the config.\n//! Lastly, each tenant can have an override in their respective tenant config (`min_resident_size_override`).\n\n// Implementation notes:\n// - The `#[allow(dead_code)]` above various structs are to suppress warnings about only the Debug impl\n//   reading these fields. We use the Debug impl for semi-structured logging, though.\n\nuse std::sync::Arc;\nuse std::time::SystemTime;\n\nuse anyhow::Context;\nuse pageserver_api::config::DiskUsageEvictionTaskConfig;\nuse pageserver_api::shard::TenantShardId;\nuse remote_storage::GenericRemoteStorage;\nuse serde::Serialize;\nuse tokio::time::Instant;\nuse tokio_util::sync::CancellationToken;\nuse tracing::{Instrument, debug, error, info, instrument, warn};\nuse utils::completion;\nuse utils::id::TimelineId;\n\nuse crate::config::PageServerConf;\nuse crate::metrics::disk_usage_based_eviction::METRICS;\nuse crate::task_mgr::{self, BACKGROUND_RUNTIME};\nuse crate::tenant::mgr::TenantManager;\nuse crate::tenant::remote_timeline_client::LayerFileMetadata;\nuse crate::tenant::secondary::SecondaryTenant;\nuse crate::tenant::storage_layer::{\n    AsLayerDesc, EvictionError, Layer, LayerName, LayerVisibilityHint,\n};\nuse crate::tenant::tasks::sleep_random;\nuse crate::{CancellableTask, DiskUsageEvictionTask};\n\n/// Selects the sort order for eviction candidates *after* per tenant `min_resident_size`\n/// partitioning.\n#[derive(Debug, Clone, Copy, PartialEq, Eq)]\npub enum EvictionOrder {\n    /// Order the layers to be evicted by how recently they have been accessed relatively within\n    /// the set of resident layers of a tenant.\n    RelativeAccessed {\n        /// Determines if the tenant with most layers should lose first.\n        ///\n        /// Having this enabled is currently the only reasonable option, because the order in which\n        /// we read tenants is deterministic. If we find the need to use this as `false`, we need\n        /// to ensure nondeterminism by adding in a random number to break the\n        /// `relative_last_activity==0.0` ties.\n        highest_layer_count_loses_first: bool,\n    },\n}\n\nimpl From<pageserver_api::config::EvictionOrder> for EvictionOrder {\n    fn from(value: pageserver_api::config::EvictionOrder) -> Self {\n        match value {\n            pageserver_api::config::EvictionOrder::RelativeAccessed {\n                highest_layer_count_loses_first,\n            } => Self::RelativeAccessed {\n                highest_layer_count_loses_first,\n            },\n        }\n    }\n}\n\nimpl EvictionOrder {\n    fn sort(&self, candidates: &mut [(EvictionPartition, EvictionCandidate)]) {\n        use EvictionOrder::*;\n\n        match self {\n            RelativeAccessed { .. } => candidates.sort_unstable_by_key(|(partition, candidate)| {\n                (*partition, candidate.relative_last_activity)\n            }),\n        }\n    }\n\n    /// Called to fill in the [`EvictionCandidate::relative_last_activity`] while iterating tenants\n    /// layers in **most** recently used order.\n    fn relative_last_activity(&self, total: usize, index: usize) -> finite_f32::FiniteF32 {\n        use EvictionOrder::*;\n\n        match self {\n            RelativeAccessed {\n                highest_layer_count_loses_first,\n            } => {\n                // keeping the -1 or not decides if every tenant should lose their least recently accessed\n                // layer OR if this should happen in the order of having highest layer count:\n                let fudge = if *highest_layer_count_loses_first {\n                    // relative_last_activity vs. tenant layer count:\n                    // - 0.1..=1.0 (10 layers)\n                    // - 0.01..=1.0 (100 layers)\n                    // - 0.001..=1.0 (1000 layers)\n                    //\n                    // leading to evicting less of the smallest tenants.\n                    0\n                } else {\n                    // use full 0.0..=1.0 range, which means even the smallest tenants could always lose a\n                    // layer. the actual ordering is unspecified: for 10k tenants on a pageserver it could\n                    // be that less than 10k layer evictions is enough, so we would not need to evict from\n                    // all tenants.\n                    //\n                    // as the tenant ordering is now deterministic this could hit the same tenants\n                    // disproportionetly on multiple invocations. alternative could be to remember how many\n                    // layers did we evict last time from this tenant, and inject that as an additional\n                    // fudge here.\n                    1\n                };\n\n                let total = total.checked_sub(fudge).filter(|&x| x > 1).unwrap_or(1);\n                let divider = total as f32;\n\n                // most recently used is always (total - 0) / divider == 1.0\n                // least recently used depends on the fudge:\n                // -       (total - 1) - (total - 1) / total => 0 / total\n                // -             total - (total - 1) / total => 1 / total\n                let distance = (total - index) as f32;\n\n                finite_f32::FiniteF32::try_from_normalized(distance / divider)\n                    .unwrap_or_else(|val| {\n                        tracing::warn!(%fudge, \"calculated invalid relative_last_activity for i={index}, total={total}: {val}\");\n                        finite_f32::FiniteF32::ZERO\n                    })\n            }\n        }\n    }\n}\n\n#[derive(Default)]\npub struct State {\n    /// Exclude http requests and background task from running at the same time.\n    mutex: tokio::sync::Mutex<()>,\n}\n\npub fn launch_disk_usage_global_eviction_task(\n    conf: &'static PageServerConf,\n    storage: GenericRemoteStorage,\n    state: Arc<State>,\n    tenant_manager: Arc<TenantManager>,\n    background_jobs_barrier: completion::Barrier,\n) -> Option<DiskUsageEvictionTask> {\n    let task_config = &conf.disk_usage_based_eviction;\n    if !task_config.enabled {\n        info!(\"disk usage based eviction task not configured\");\n        return None;\n    };\n\n    info!(\"launching disk usage based eviction task\");\n\n    let cancel = CancellationToken::new();\n    let task = BACKGROUND_RUNTIME.spawn(task_mgr::exit_on_panic_or_error(\n        \"disk usage based eviction\",\n        {\n            let cancel = cancel.clone();\n            async move {\n                // wait until initial load is complete, because we cannot evict from loading tenants.\n                tokio::select! {\n                    _ = cancel.cancelled() => { return anyhow::Ok(()); },\n                    _ = background_jobs_barrier.wait() => { }\n                };\n\n                disk_usage_eviction_task(&state, task_config, &storage, tenant_manager, cancel)\n                    .await;\n                anyhow::Ok(())\n            }\n        },\n    ));\n\n    Some(DiskUsageEvictionTask(CancellableTask { cancel, task }))\n}\n\n#[instrument(skip_all)]\nasync fn disk_usage_eviction_task(\n    state: &State,\n    task_config: &DiskUsageEvictionTaskConfig,\n    storage: &GenericRemoteStorage,\n    tenant_manager: Arc<TenantManager>,\n    cancel: CancellationToken,\n) {\n    scopeguard::defer! {\n        info!(\"disk usage based eviction task finishing\");\n    };\n\n    if sleep_random(task_config.period, &cancel).await.is_err() {\n        return;\n    }\n\n    let mut iteration_no = 0;\n    loop {\n        iteration_no += 1;\n        let start = Instant::now();\n\n        async {\n            let res = disk_usage_eviction_task_iteration(\n                state,\n                task_config,\n                storage,\n                &tenant_manager,\n                &cancel,\n            )\n            .await;\n\n            match res {\n                Ok(()) => {}\n                Err(e) => {\n                    // these stat failures are expected to be very rare\n                    warn!(\"iteration failed, unexpected error: {e:#}\");\n                }\n            }\n        }\n        .instrument(tracing::info_span!(\"iteration\", iteration_no))\n        .await;\n\n        let sleep_until = start + task_config.period;\n        if tokio::time::timeout_at(sleep_until, cancel.cancelled())\n            .await\n            .is_ok()\n        {\n            break;\n        }\n    }\n}\n\npub trait Usage: Clone + Copy + std::fmt::Debug {\n    fn has_pressure(&self) -> bool;\n    fn add_available_bytes(&mut self, bytes: u64);\n}\n\nasync fn disk_usage_eviction_task_iteration(\n    state: &State,\n    task_config: &DiskUsageEvictionTaskConfig,\n    storage: &GenericRemoteStorage,\n    tenant_manager: &Arc<TenantManager>,\n    cancel: &CancellationToken,\n) -> anyhow::Result<()> {\n    let tenants_dir = tenant_manager.get_conf().tenants_path();\n    let usage_pre = filesystem_level_usage::get(&tenants_dir, task_config)\n        .context(\"get filesystem-level disk usage before evictions\")?;\n    let res = disk_usage_eviction_task_iteration_impl(\n        state,\n        storage,\n        usage_pre,\n        tenant_manager,\n        task_config.eviction_order.into(),\n        cancel,\n    )\n    .await;\n    match res {\n        Ok(outcome) => {\n            debug!(?outcome, \"disk_usage_eviction_iteration finished\");\n            match outcome {\n                IterationOutcome::NoPressure | IterationOutcome::Cancelled => {\n                    // nothing to do, select statement below will handle things\n                }\n                IterationOutcome::Finished(outcome) => {\n                    // Verify with statvfs whether we made any real progress\n                    let after = filesystem_level_usage::get(&tenants_dir, task_config)\n                        // It's quite unlikely to hit the error here. Keep the code simple and bail out.\n                        .context(\"get filesystem-level disk usage after evictions\")?;\n\n                    debug!(?after, \"disk usage\");\n\n                    if after.has_pressure() {\n                        // Don't bother doing an out-of-order iteration here now.\n                        // In practice, the task period is set to a value in the tens-of-seconds range,\n                        // which will cause another iteration to happen soon enough.\n                        // TODO: deltas between the three different usages would be helpful,\n                        // consider MiB, GiB, TiB\n                        warn!(?outcome, ?after, \"disk usage still high\");\n                    } else {\n                        info!(?outcome, ?after, \"disk usage pressure relieved\");\n                    }\n                }\n            }\n        }\n        Err(e) => {\n            error!(\"disk_usage_eviction_iteration failed: {:#}\", e);\n        }\n    }\n\n    Ok(())\n}\n\n#[derive(Debug, Serialize)]\n#[allow(clippy::large_enum_variant)]\npub enum IterationOutcome<U> {\n    NoPressure,\n    Cancelled,\n    Finished(IterationOutcomeFinished<U>),\n}\n\n#[derive(Debug, Serialize)]\npub struct IterationOutcomeFinished<U> {\n    /// The actual usage observed before we started the iteration.\n    before: U,\n    /// The expected value for `after`, according to internal accounting, after phase 1.\n    planned: PlannedUsage<U>,\n    /// The outcome of phase 2, where we actually do the evictions.\n    ///\n    /// If all layers that phase 1 planned to evict _can_ actually get evicted, this will\n    /// be the same as `planned`.\n    assumed: AssumedUsage<U>,\n}\n\n#[derive(Debug, Serialize)]\nstruct AssumedUsage<U> {\n    /// The expected value for `after`, after phase 2.\n    projected_after: U,\n    /// The layers we failed to evict during phase 2.\n    failed: LayerCount,\n}\n\n#[derive(Debug, Serialize)]\nstruct PlannedUsage<U> {\n    respecting_tenant_min_resident_size: U,\n    fallback_to_global_lru: Option<U>,\n}\n\n#[derive(Debug, Default, Serialize)]\nstruct LayerCount {\n    file_sizes: u64,\n    count: usize,\n}\n\npub(crate) async fn disk_usage_eviction_task_iteration_impl<U: Usage>(\n    state: &State,\n    _storage: &GenericRemoteStorage,\n    usage_pre: U,\n    tenant_manager: &Arc<TenantManager>,\n    eviction_order: EvictionOrder,\n    cancel: &CancellationToken,\n) -> anyhow::Result<IterationOutcome<U>> {\n    // use tokio's mutex to get a Sync guard (instead of std::sync::Mutex)\n    let _g = state\n        .mutex\n        .try_lock()\n        .map_err(|_| anyhow::anyhow!(\"iteration is already executing\"))?;\n\n    debug!(?usage_pre, \"disk usage\");\n\n    if !usage_pre.has_pressure() {\n        return Ok(IterationOutcome::NoPressure);\n    }\n\n    warn!(\n        ?usage_pre,\n        \"running disk usage based eviction due to pressure\"\n    );\n\n    let (candidates, collection_time) = {\n        let started_at = std::time::Instant::now();\n        match collect_eviction_candidates(tenant_manager, eviction_order, cancel).await? {\n            EvictionCandidates::Cancelled => {\n                return Ok(IterationOutcome::Cancelled);\n            }\n            EvictionCandidates::Finished(partitioned) => (partitioned, started_at.elapsed()),\n        }\n    };\n\n    METRICS.layers_collected.inc_by(candidates.len() as u64);\n\n    tracing::info!(\n        elapsed_ms = collection_time.as_millis(),\n        total_layers = candidates.len(),\n        \"collection completed\"\n    );\n\n    // Debug-log the list of candidates\n    let now = SystemTime::now();\n    for (i, (partition, candidate)) in candidates.iter().enumerate() {\n        let nth = i + 1;\n        let total_candidates = candidates.len();\n        let size = candidate.layer.get_file_size();\n        let rel = candidate.relative_last_activity;\n        debug!(\n            \"cand {nth}/{total_candidates}: size={size}, rel_last_activity={rel}, no_access_for={}us, partition={partition:?}, {}/{}/{}\",\n            now.duration_since(candidate.last_activity_ts)\n                .unwrap()\n                .as_micros(),\n            candidate.layer.get_tenant_shard_id(),\n            candidate.layer.get_timeline_id(),\n            candidate.layer.get_name(),\n        );\n    }\n\n    // phase1: select victims to relieve pressure\n    //\n    // Walk through the list of candidates, until we have accumulated enough layers to get\n    // us back under the pressure threshold. 'usage_planned' is updated so that it tracks\n    // how much disk space would be used after evicting all the layers up to the current\n    // point in the list.\n    //\n    // If we get far enough in the list that we start to evict layers that are below\n    // the tenant's min-resident-size threshold, print a warning, and memorize the disk\n    // usage at that point, in 'usage_planned_min_resident_size_respecting'.\n\n    let (evicted_amount, usage_planned) =\n        select_victims(&candidates, usage_pre).into_amount_and_planned();\n\n    METRICS.layers_selected.inc_by(evicted_amount as u64);\n\n    // phase2: evict layers\n\n    let mut js = tokio::task::JoinSet::new();\n    let limit = 1000;\n\n    let mut evicted = candidates.into_iter().take(evicted_amount).fuse();\n    let mut consumed_all = false;\n\n    // After the evictions, `usage_assumed` is the post-eviction usage,\n    // according to internal accounting.\n    let mut usage_assumed = usage_pre;\n    let mut evictions_failed = LayerCount::default();\n\n    let evict_layers = async move {\n        loop {\n            let next = if js.len() >= limit || consumed_all {\n                js.join_next().await\n            } else if !js.is_empty() {\n                // opportunistically consume ready result, one per each new evicted\n                futures::future::FutureExt::now_or_never(js.join_next()).and_then(|x| x)\n            } else {\n                None\n            };\n\n            if let Some(next) = next {\n                match next {\n                    Ok(Ok(file_size)) => {\n                        METRICS.layers_evicted.inc();\n                        /*BEGIN_HADRON */\n                        METRICS.bytes_evicted.inc_by(file_size);\n                        /*END_HADRON */\n                        usage_assumed.add_available_bytes(file_size);\n                    }\n                    Ok(Err((\n                        file_size,\n                        EvictionError::NotFound\n                        | EvictionError::Downloaded\n                        | EvictionError::Timeout,\n                    ))) => {\n                        evictions_failed.file_sizes += file_size;\n                        evictions_failed.count += 1;\n                    }\n                    Err(je) if je.is_cancelled() => unreachable!(\"not used\"),\n                    Err(je) if je.is_panic() => { /* already logged */ }\n                    Err(je) => tracing::error!(\"unknown JoinError: {je:?}\"),\n                }\n            }\n\n            if consumed_all && js.is_empty() {\n                break;\n            }\n\n            // calling again when consumed_all is fine as evicted is fused.\n            let Some((_partition, candidate)) = evicted.next() else {\n                if !consumed_all {\n                    tracing::info!(\"all evictions started, waiting\");\n                    consumed_all = true;\n                }\n                continue;\n            };\n\n            match candidate.layer {\n                EvictionLayer::Attached(layer) => {\n                    let file_size = layer.layer_desc().file_size;\n                    js.spawn(async move {\n                        // have a low eviction waiting timeout because our LRU calculations go stale fast;\n                        // also individual layer evictions could hang because of bugs and we do not want to\n                        // pause disk_usage_based_eviction for such.\n                        let timeout = std::time::Duration::from_secs(5);\n\n                        match layer.evict_and_wait(timeout).await {\n                            Ok(()) => Ok(file_size),\n                            Err(e) => Err((file_size, e)),\n                        }\n                    });\n                }\n                EvictionLayer::Secondary(layer) => {\n                    let file_size = layer.metadata.file_size;\n\n                    js.spawn(async move {\n                        layer\n                            .secondary_tenant\n                            .evict_layer(layer.timeline_id, layer.name)\n                            .await;\n                        Ok(file_size)\n                    });\n                }\n            }\n            tokio::task::yield_now().await;\n        }\n\n        (usage_assumed, evictions_failed)\n    };\n\n    let started_at = std::time::Instant::now();\n\n    let evict_layers = async move {\n        let mut evict_layers = std::pin::pin!(evict_layers);\n\n        let maximum_expected = std::time::Duration::from_secs(10);\n\n        let res = tokio::time::timeout(maximum_expected, &mut evict_layers).await;\n        let tuple = if let Ok(tuple) = res {\n            tuple\n        } else {\n            let elapsed = started_at.elapsed();\n            tracing::info!(elapsed_ms = elapsed.as_millis(), \"still ongoing\");\n            evict_layers.await\n        };\n\n        let elapsed = started_at.elapsed();\n        tracing::info!(elapsed_ms = elapsed.as_millis(), \"completed\");\n        tuple\n    };\n\n    let evict_layers =\n        evict_layers.instrument(tracing::info_span!(\"evict_layers\", layers=%evicted_amount));\n\n    let (usage_assumed, evictions_failed) = tokio::select! {\n        tuple = evict_layers => { tuple },\n        _ = cancel.cancelled() => {\n            // dropping joinset will abort all pending evict_and_waits and that is fine, our\n            // requests will still stand\n            return Ok(IterationOutcome::Cancelled);\n        }\n    };\n\n    Ok(IterationOutcome::Finished(IterationOutcomeFinished {\n        before: usage_pre,\n        planned: usage_planned,\n        assumed: AssumedUsage {\n            projected_after: usage_assumed,\n            failed: evictions_failed,\n        },\n    }))\n}\n\n#[derive(Clone)]\npub(crate) struct EvictionSecondaryLayer {\n    pub(crate) secondary_tenant: Arc<SecondaryTenant>,\n    pub(crate) timeline_id: TimelineId,\n    pub(crate) name: LayerName,\n    pub(crate) metadata: LayerFileMetadata,\n}\n\n/// Full [`Layer`] objects are specific to tenants in attached mode.  This type is a layer\n/// of indirection to store either a `Layer`, or a reference to a secondary tenant and a layer name.\n#[derive(Clone)]\npub(crate) enum EvictionLayer {\n    Attached(Layer),\n    Secondary(EvictionSecondaryLayer),\n}\n\nimpl From<Layer> for EvictionLayer {\n    fn from(value: Layer) -> Self {\n        Self::Attached(value)\n    }\n}\n\nimpl EvictionLayer {\n    pub(crate) fn get_tenant_shard_id(&self) -> &TenantShardId {\n        match self {\n            Self::Attached(l) => &l.layer_desc().tenant_shard_id,\n            Self::Secondary(sl) => sl.secondary_tenant.get_tenant_shard_id(),\n        }\n    }\n\n    pub(crate) fn get_timeline_id(&self) -> &TimelineId {\n        match self {\n            Self::Attached(l) => &l.layer_desc().timeline_id,\n            Self::Secondary(sl) => &sl.timeline_id,\n        }\n    }\n\n    pub(crate) fn get_name(&self) -> LayerName {\n        match self {\n            Self::Attached(l) => l.layer_desc().layer_name(),\n            Self::Secondary(sl) => sl.name.clone(),\n        }\n    }\n\n    pub(crate) fn get_file_size(&self) -> u64 {\n        match self {\n            Self::Attached(l) => l.layer_desc().file_size,\n            Self::Secondary(sl) => sl.metadata.file_size,\n        }\n    }\n}\n\n#[derive(Clone)]\npub(crate) struct EvictionCandidate {\n    pub(crate) layer: EvictionLayer,\n    pub(crate) last_activity_ts: SystemTime,\n    pub(crate) relative_last_activity: finite_f32::FiniteF32,\n    pub(crate) visibility: LayerVisibilityHint,\n}\n\nimpl std::fmt::Display for EvictionLayer {\n    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {\n        match self {\n            Self::Attached(l) => l.fmt(f),\n            Self::Secondary(sl) => {\n                write!(f, \"{}/{}\", sl.timeline_id, sl.name)\n            }\n        }\n    }\n}\n\n#[derive(Default)]\npub(crate) struct DiskUsageEvictionInfo {\n    /// Timeline's largest layer (remote or resident)\n    pub max_layer_size: Option<u64>,\n    /// Timeline's resident layers\n    pub resident_layers: Vec<EvictionCandidate>,\n}\n\nimpl std::fmt::Debug for EvictionCandidate {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        // format the tv_sec, tv_nsec into rfc3339 in case someone is looking at it\n        // having to allocate a string to this is bad, but it will rarely be formatted\n        let ts = chrono::DateTime::<chrono::Utc>::from(self.last_activity_ts);\n        let ts = ts.to_rfc3339_opts(chrono::SecondsFormat::Nanos, true);\n        struct DisplayIsDebug<'a, T>(&'a T);\n        impl<T: std::fmt::Display> std::fmt::Debug for DisplayIsDebug<'_, T> {\n            fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n                write!(f, \"{}\", self.0)\n            }\n        }\n        f.debug_struct(\"LocalLayerInfoForDiskUsageEviction\")\n            .field(\"layer\", &DisplayIsDebug(&self.layer))\n            .field(\"last_activity\", &ts)\n            .finish()\n    }\n}\n\n#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]\nenum EvictionPartition {\n    // A layer that is un-wanted by the tenant: evict all these first, before considering\n    // any other layers\n    EvictNow,\n\n    // Above the minimum size threshold: this layer is a candidate for eviction.\n    Above,\n\n    // Below the minimum size threshold: this layer should only be evicted if all the\n    // tenants' layers above the minimum size threshold have already been considered.\n    Below,\n}\n\nenum EvictionCandidates {\n    Cancelled,\n    Finished(Vec<(EvictionPartition, EvictionCandidate)>),\n}\n\n/// Gather the eviction candidates.\n///\n/// The returned `Ok(EvictionCandidates::Finished(candidates))` is sorted in eviction\n/// order. A caller that evicts in that order, until pressure is relieved, implements\n/// the eviction policy outlined in the module comment.\n///\n/// # Example with EvictionOrder::AbsoluteAccessed\n///\n/// Imagine that there are two tenants, A and B, with five layers each, a-e.\n/// Each layer has size 100, and both tenant's min_resident_size is 150.\n/// The eviction order would be\n///\n/// ```text\n/// partition last_activity_ts tenant/layer\n/// Above     18:30            A/c\n/// Above     19:00            A/b\n/// Above     18:29            B/c\n/// Above     19:05            B/b\n/// Above     20:00            B/a\n/// Above     20:03            A/a\n/// Below     20:30            A/d\n/// Below     20:40            B/d\n/// Below     20:45            B/e\n/// Below     20:58            A/e\n/// ```\n///\n/// Now, if we need to evict 300 bytes to relieve pressure, we'd evict `A/c, A/b, B/c`.\n/// They are all in the `Above` partition, so, we respected each tenant's min_resident_size.\n///\n/// But, if we need to evict 900 bytes to relieve pressure, we'd evict\n/// `A/c, A/b, B/c, B/b, B/a, A/a, A/d, B/d, B/e`, reaching into the `Below` partition\n/// after exhauting the `Above` partition.\n/// So, we did not respect each tenant's min_resident_size.\n///\n/// # Example with EvictionOrder::RelativeAccessed\n///\n/// ```text\n/// partition relative_age last_activity_ts tenant/layer\n/// Above     0/4          18:30            A/c\n/// Above     0/4          18:29            B/c\n/// Above     1/4          19:00            A/b\n/// Above     1/4          19:05            B/b\n/// Above     2/4          20:00            B/a\n/// Above     2/4          20:03            A/a\n/// Below     3/4          20:30            A/d\n/// Below     3/4          20:40            B/d\n/// Below     4/4          20:45            B/e\n/// Below     4/4          20:58            A/e\n/// ```\n///\n/// With tenants having the same number of layers the picture does not change much. The same with\n/// A having many more layers **resident** (not all of them listed):\n///\n/// ```text\n/// Above       0/100      18:30            A/c\n/// Above       0/4        18:29            B/c\n/// Above       1/100      19:00            A/b\n/// Above       2/100      20:03            A/a\n/// Above       3/100      20:03            A/nth_3\n/// Above       4/100      20:03            A/nth_4\n///             ...\n/// Above       1/4        19:05            B/b\n/// Above      25/100      20:04            A/nth_25\n///             ...\n/// Above       2/4        20:00            B/a\n/// Above      50/100      20:10            A/nth_50\n///             ...\n/// Below       3/4        20:40            B/d\n/// Below      99/100      20:30            A/nth_99\n/// Below       4/4        20:45            B/e\n/// Below     100/100      20:58            A/nth_100\n/// ```\n///\n/// Now it's easier to see that because A has grown fast it has more layers to get evicted. What is\n/// difficult to see is what happens on the next round assuming the evicting 23 from the above list\n/// relieves the pressure (22 A layers gone, 1 B layers gone) but a new fast growing tenant C has\n/// appeared:\n///\n/// ```text\n/// Above       0/87       20:04            A/nth_23\n/// Above       0/3        19:05            B/b\n/// Above       0/50       20:59            C/nth_0\n/// Above       1/87       20:04            A/nth_24\n/// Above       1/50       21:00            C/nth_1\n/// Above       2/87       20:04            A/nth_25\n///             ...\n/// Above      16/50       21:02            C/nth_16\n/// Above       1/3        20:00            B/a\n/// Above      27/87       20:10            A/nth_50\n///             ...\n/// Below       2/3        20:40            B/d\n/// Below      49/50       21:05            C/nth_49\n/// Below      86/87       20:30            A/nth_99\n/// Below       3/3        20:45            B/e\n/// Below      50/50       21:05            C/nth_50\n/// Below      87/87       20:58            A/nth_100\n/// ```\n///\n/// Now relieving pressure with 23 layers would cost:\n/// - tenant A 14 layers\n/// - tenant B 1 layer\n/// - tenant C 8 layers\nasync fn collect_eviction_candidates(\n    tenant_manager: &Arc<TenantManager>,\n    eviction_order: EvictionOrder,\n    cancel: &CancellationToken,\n) -> anyhow::Result<EvictionCandidates> {\n    const LOG_DURATION_THRESHOLD: std::time::Duration = std::time::Duration::from_secs(10);\n\n    // get a snapshot of the list of tenants\n    let tenants = tenant_manager\n        .list_tenants()\n        .context(\"get list of tenants\")?;\n\n    // TODO: avoid listing every layer in every tenant: this loop can block the executor,\n    // and the resulting data structure can be huge.\n    // (https://github.com/neondatabase/neon/issues/6224)\n    let mut candidates = Vec::new();\n\n    for (tenant_id, _state, _gen) in tenants {\n        if cancel.is_cancelled() {\n            return Ok(EvictionCandidates::Cancelled);\n        }\n        let tenant = match tenant_manager.get_attached_tenant_shard(tenant_id) {\n            Ok(tenant) if tenant.is_active() => tenant,\n            Ok(_) => {\n                debug!(tenant_id=%tenant_id.tenant_id, shard_id=%tenant_id.shard_slug(), \"Tenant shard is not active\");\n                continue;\n            }\n            Err(e) => {\n                // this can happen if tenant has lifecycle transition after we fetched it\n                debug!(\"failed to get tenant: {e:#}\");\n                continue;\n            }\n        };\n\n        if tenant.cancel.is_cancelled() {\n            info!(%tenant_id, \"Skipping tenant for eviction, it is shutting down\");\n            continue;\n        }\n\n        let started_at = std::time::Instant::now();\n\n        // collect layers from all timelines in this tenant\n        //\n        // If one of the timelines becomes `!is_active()` during the iteration,\n        // for example because we're shutting down, then `max_layer_size` can be too small.\n        // That's OK. This code only runs under a disk pressure situation, and being\n        // a little unfair to tenants during shutdown in such a situation is tolerable.\n        let mut tenant_candidates = Vec::new();\n        let mut max_layer_size = 0;\n        for tl in tenant.list_timelines() {\n            if !tl.is_active() {\n                continue;\n            }\n            let info = tl.get_local_layers_for_disk_usage_eviction().await;\n            debug!(\n                tenant_id=%tl.tenant_shard_id.tenant_id,\n                shard_id=%tl.tenant_shard_id.shard_slug(),\n                timeline_id=%tl.timeline_id,\n                \"timeline resident layers count: {}\", info.resident_layers.len()\n            );\n\n            tenant_candidates.extend(info.resident_layers.into_iter());\n            max_layer_size = max_layer_size.max(info.max_layer_size.unwrap_or(0));\n\n            if cancel.is_cancelled() {\n                return Ok(EvictionCandidates::Cancelled);\n            }\n        }\n\n        // Also consider layers of timelines being imported for eviction\n        for tl in tenant.list_importing_timelines() {\n            let info = tl.timeline.get_local_layers_for_disk_usage_eviction().await;\n            debug!(\n                tenant_id=%tl.timeline.tenant_shard_id.tenant_id,\n                shard_id=%tl.timeline.tenant_shard_id.shard_slug(),\n                timeline_id=%tl.timeline.timeline_id,\n                \"timeline resident layers count: {}\", info.resident_layers.len()\n            );\n\n            tenant_candidates.extend(info.resident_layers.into_iter());\n            max_layer_size = max_layer_size.max(info.max_layer_size.unwrap_or(0));\n\n            if cancel.is_cancelled() {\n                return Ok(EvictionCandidates::Cancelled);\n            }\n        }\n\n        // `min_resident_size` defaults to maximum layer file size of the tenant.\n        // This ensures that each tenant can have at least one layer resident at a given time,\n        // ensuring forward progress for a single Timeline::get in that tenant.\n        // It's a questionable heuristic since, usually, there are many Timeline::get\n        // requests going on for a tenant, and, at least in Neon prod, the median\n        // layer file size is much smaller than the compaction target size.\n        // We could be better here, e.g., sum of all L0 layers + most recent L1 layer.\n        // That's what's typically used by the various background loops.\n        //\n        // The default can be overridden with a fixed value in the tenant conf.\n        // A default override can be put in the default tenant conf in the pageserver.toml.\n        let min_resident_size = if let Some(s) = tenant.get_min_resident_size_override() {\n            debug!(\n                tenant_id=%tenant.tenant_shard_id().tenant_id,\n                shard_id=%tenant.tenant_shard_id().shard_slug(),\n                overridden_size=s,\n                \"using overridden min resident size for tenant\"\n            );\n            s\n        } else {\n            debug!(\n                tenant_id=%tenant.tenant_shard_id().tenant_id,\n                shard_id=%tenant.tenant_shard_id().shard_slug(),\n                max_layer_size,\n                \"using max layer size as min_resident_size for tenant\",\n            );\n            max_layer_size\n        };\n\n        // Sort layers most-recently-used first, then calculate [`EvictionPartition`] for each layer,\n        // where the inputs are:\n        //  - whether the layer is visible\n        //  - whether the layer is above/below the min_resident_size cutline\n        tenant_candidates\n            .sort_unstable_by_key(|layer_info| std::cmp::Reverse(layer_info.last_activity_ts));\n        let mut cumsum: i128 = 0;\n\n        let total = tenant_candidates.len();\n\n        let tenant_candidates =\n            tenant_candidates\n                .into_iter()\n                .enumerate()\n                .map(|(i, mut candidate)| {\n                    // as we iterate this reverse sorted list, the most recently accessed layer will always\n                    // be 1.0; this is for us to evict it last.\n                    candidate.relative_last_activity =\n                        eviction_order.relative_last_activity(total, i);\n\n                    let partition = match candidate.visibility {\n                        LayerVisibilityHint::Covered => {\n                            // Covered layers are evicted first\n                            EvictionPartition::EvictNow\n                        }\n                        LayerVisibilityHint::Visible => {\n                            cumsum += i128::from(candidate.layer.get_file_size());\n\n                            if cumsum > min_resident_size as i128 {\n                                EvictionPartition::Above\n                            } else {\n                                // The most recent layers below the min_resident_size threshold\n                                // are the last to be evicted.\n                                EvictionPartition::Below\n                            }\n                        }\n                    };\n\n                    (partition, candidate)\n                });\n\n        METRICS\n            .tenant_layer_count\n            .observe(tenant_candidates.len() as f64);\n\n        candidates.extend(tenant_candidates);\n\n        let elapsed = started_at.elapsed();\n        METRICS\n            .tenant_collection_time\n            .observe(elapsed.as_secs_f64());\n\n        if elapsed > LOG_DURATION_THRESHOLD {\n            tracing::info!(\n                tenant_id=%tenant.tenant_shard_id().tenant_id,\n                shard_id=%tenant.tenant_shard_id().shard_slug(),\n                elapsed_ms = elapsed.as_millis(),\n                \"collection took longer than threshold\"\n            );\n        }\n    }\n\n    // Note: the same tenant ID might be hit twice, if it transitions from attached to\n    // secondary while we run.  That is okay: when we eventually try and run the eviction,\n    // the `Gate` on the object will ensure that whichever one has already been shut down\n    // will not delete anything.\n\n    let mut secondary_tenants = Vec::new();\n    tenant_manager.foreach_secondary_tenants(\n        |_tenant_shard_id: &TenantShardId, state: &Arc<SecondaryTenant>| {\n            secondary_tenants.push(state.clone());\n        },\n    );\n\n    for tenant in secondary_tenants {\n        // for secondary tenants we use a sum of on_disk layers and already evicted layers. this is\n        // to prevent repeated disk usage based evictions from completely draining less often\n        // updating secondaries.\n        let (mut layer_info, total_layers) = tenant.get_layers_for_eviction();\n\n        debug_assert!(\n            total_layers >= layer_info.resident_layers.len(),\n            \"total_layers ({total_layers}) must be at least the resident_layers.len() ({})\",\n            layer_info.resident_layers.len()\n        );\n\n        let started_at = std::time::Instant::now();\n\n        layer_info\n            .resident_layers\n            .sort_unstable_by_key(|layer_info| std::cmp::Reverse(layer_info.last_activity_ts));\n\n        let tenant_candidates =\n            layer_info\n                .resident_layers\n                .into_iter()\n                .enumerate()\n                .map(|(i, mut candidate)| {\n                    candidate.relative_last_activity =\n                        eviction_order.relative_last_activity(total_layers, i);\n                    (\n                        // Secondary locations' layers are always considered above the min resident size,\n                        // i.e. secondary locations are permitted to be trimmed to zero layers if all\n                        // the layers have sufficiently old access times.\n                        EvictionPartition::Above,\n                        candidate,\n                    )\n                });\n\n        METRICS\n            .tenant_layer_count\n            .observe(tenant_candidates.len() as f64);\n        candidates.extend(tenant_candidates);\n\n        tokio::task::yield_now().await;\n\n        let elapsed = started_at.elapsed();\n\n        METRICS\n            .tenant_collection_time\n            .observe(elapsed.as_secs_f64());\n\n        if elapsed > LOG_DURATION_THRESHOLD {\n            tracing::info!(\n                tenant_id=%tenant.tenant_shard_id().tenant_id,\n                shard_id=%tenant.tenant_shard_id().shard_slug(),\n                elapsed_ms = elapsed.as_millis(),\n                \"collection took longer than threshold\"\n            );\n        }\n    }\n\n    debug_assert!(\n        EvictionPartition::Above < EvictionPartition::Below,\n        \"as explained in the function's doc comment, layers that aren't in the tenant's min_resident_size are evicted first\"\n    );\n    debug_assert!(\n        EvictionPartition::EvictNow < EvictionPartition::Above,\n        \"as explained in the function's doc comment, layers that aren't in the tenant's min_resident_size are evicted first\"\n    );\n\n    eviction_order.sort(&mut candidates);\n\n    Ok(EvictionCandidates::Finished(candidates))\n}\n\n/// Given a pre-sorted vec of all layers in the system, select the first N which are enough to\n/// relieve pressure.\n///\n/// Returns the amount of candidates selected, with the planned usage.\nfn select_victims<U: Usage>(\n    candidates: &[(EvictionPartition, EvictionCandidate)],\n    usage_pre: U,\n) -> VictimSelection<U> {\n    let mut usage_when_switched = None;\n    let mut usage_planned = usage_pre;\n    let mut evicted_amount = 0;\n\n    for (i, (partition, candidate)) in candidates.iter().enumerate() {\n        if !usage_planned.has_pressure() {\n            break;\n        }\n\n        if partition == &EvictionPartition::Below && usage_when_switched.is_none() {\n            usage_when_switched = Some((usage_planned, i));\n        }\n\n        usage_planned.add_available_bytes(candidate.layer.get_file_size());\n        evicted_amount += 1;\n    }\n\n    VictimSelection {\n        amount: evicted_amount,\n        usage_pre,\n        usage_when_switched,\n        usage_planned,\n    }\n}\n\nstruct VictimSelection<U> {\n    amount: usize,\n    usage_pre: U,\n    usage_when_switched: Option<(U, usize)>,\n    usage_planned: U,\n}\n\nimpl<U: Usage> VictimSelection<U> {\n    fn into_amount_and_planned(self) -> (usize, PlannedUsage<U>) {\n        debug!(\n            evicted_amount=%self.amount,\n            \"took enough candidates for pressure to be relieved\"\n        );\n\n        if let Some((usage_planned, candidate_no)) = self.usage_when_switched.as_ref() {\n            warn!(usage_pre=?self.usage_pre, ?usage_planned, candidate_no, \"tenant_min_resident_size-respecting LRU would not relieve pressure, evicting more following global LRU policy\");\n        }\n\n        let planned = match self.usage_when_switched {\n            Some((respecting_tenant_min_resident_size, _)) => PlannedUsage {\n                respecting_tenant_min_resident_size,\n                fallback_to_global_lru: Some(self.usage_planned),\n            },\n            None => PlannedUsage {\n                respecting_tenant_min_resident_size: self.usage_planned,\n                fallback_to_global_lru: None,\n            },\n        };\n\n        (self.amount, planned)\n    }\n}\n\n/// A totally ordered f32 subset we can use with sorting functions.\npub(crate) mod finite_f32 {\n\n    /// A totally ordered f32 subset we can use with sorting functions.\n    #[derive(Clone, Copy, PartialEq)]\n    pub struct FiniteF32(f32);\n\n    impl std::fmt::Debug for FiniteF32 {\n        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n            std::fmt::Debug::fmt(&self.0, f)\n        }\n    }\n\n    impl std::fmt::Display for FiniteF32 {\n        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n            std::fmt::Display::fmt(&self.0, f)\n        }\n    }\n\n    impl std::cmp::Eq for FiniteF32 {}\n\n    impl std::cmp::PartialOrd for FiniteF32 {\n        fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {\n            Some(self.cmp(other))\n        }\n    }\n\n    impl std::cmp::Ord for FiniteF32 {\n        fn cmp(&self, other: &Self) -> std::cmp::Ordering {\n            self.0.total_cmp(&other.0)\n        }\n    }\n\n    impl TryFrom<f32> for FiniteF32 {\n        type Error = f32;\n\n        fn try_from(value: f32) -> Result<Self, Self::Error> {\n            if value.is_finite() {\n                Ok(FiniteF32(value))\n            } else {\n                Err(value)\n            }\n        }\n    }\n\n    impl From<FiniteF32> for f32 {\n        fn from(value: FiniteF32) -> f32 {\n            value.0\n        }\n    }\n\n    impl FiniteF32 {\n        pub const ZERO: FiniteF32 = FiniteF32(0.0);\n\n        pub fn try_from_normalized(value: f32) -> Result<Self, f32> {\n            if (0.0..=1.0).contains(&value) {\n                // -0.0 is within the range, make sure it is assumed 0.0..=1.0\n                let value = value.abs();\n                Ok(FiniteF32(value))\n            } else {\n                Err(value)\n            }\n        }\n\n        pub fn into_inner(self) -> f32 {\n            self.into()\n        }\n    }\n}\n\nmod filesystem_level_usage {\n    use anyhow::Context;\n    use camino::Utf8Path;\n\n    use super::DiskUsageEvictionTaskConfig;\n    use crate::statvfs::Statvfs;\n\n    #[derive(Debug, Clone, Copy)]\n    pub struct Usage<'a> {\n        config: &'a DiskUsageEvictionTaskConfig,\n\n        /// Filesystem capacity\n        total_bytes: u64,\n        /// Free filesystem space\n        avail_bytes: u64,\n    }\n\n    impl super::Usage for Usage<'_> {\n        fn has_pressure(&self) -> bool {\n            let usage_pct =\n                (100.0 * (1.0 - ((self.avail_bytes as f64) / (self.total_bytes as f64)))) as u64;\n\n            let pressures = [\n                (\n                    \"min_avail_bytes\",\n                    self.avail_bytes < self.config.min_avail_bytes,\n                ),\n                (\n                    \"max_usage_pct\",\n                    usage_pct >= self.config.max_usage_pct.get() as u64,\n                ),\n            ];\n\n            pressures.into_iter().any(|(_, has_pressure)| has_pressure)\n        }\n\n        fn add_available_bytes(&mut self, bytes: u64) {\n            self.avail_bytes += bytes;\n        }\n    }\n\n    pub fn get<'a>(\n        tenants_dir: &Utf8Path,\n        config: &'a DiskUsageEvictionTaskConfig,\n    ) -> anyhow::Result<Usage<'a>> {\n        let mock_config = {\n            #[cfg(feature = \"testing\")]\n            {\n                config.mock_statvfs.as_ref()\n            }\n            #[cfg(not(feature = \"testing\"))]\n            {\n                None\n            }\n        };\n\n        let stat = Statvfs::get(tenants_dir, mock_config)\n            .context(\"statvfs failed, presumably directory got unlinked\")?;\n\n        let (avail_bytes, total_bytes) = stat.get_avail_total_bytes();\n\n        Ok(Usage {\n            config,\n            total_bytes,\n            avail_bytes,\n        })\n    }\n\n    #[test]\n    fn max_usage_pct_pressure() {\n        use std::time::Duration;\n\n        use utils::serde_percent::Percent;\n\n        use super::Usage as _;\n\n        let mut usage = Usage {\n            config: &DiskUsageEvictionTaskConfig {\n                max_usage_pct: Percent::new(85).unwrap(),\n                min_avail_bytes: 0,\n                period: Duration::MAX,\n                #[cfg(feature = \"testing\")]\n                mock_statvfs: None,\n                eviction_order: pageserver_api::config::EvictionOrder::default(),\n                enabled: true,\n            },\n            total_bytes: 100_000,\n            avail_bytes: 0,\n        };\n\n        assert!(usage.has_pressure(), \"expected pressure at 100%\");\n\n        usage.add_available_bytes(14_000);\n        assert!(usage.has_pressure(), \"expected pressure at 86%\");\n\n        usage.add_available_bytes(999);\n        assert!(usage.has_pressure(), \"expected pressure at 85.001%\");\n\n        usage.add_available_bytes(1);\n        assert!(usage.has_pressure(), \"expected pressure at precisely 85%\");\n\n        usage.add_available_bytes(1);\n        assert!(!usage.has_pressure(), \"no pressure at 84.999%\");\n\n        usage.add_available_bytes(999);\n        assert!(!usage.has_pressure(), \"no pressure at 84%\");\n\n        usage.add_available_bytes(16_000);\n        assert!(!usage.has_pressure());\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use super::*;\n\n    #[test]\n    fn relative_equal_bounds() {\n        let order = EvictionOrder::RelativeAccessed {\n            highest_layer_count_loses_first: false,\n        };\n\n        let len = 10;\n        let v = (0..len)\n            .map(|i| order.relative_last_activity(len, i).into_inner())\n            .collect::<Vec<_>>();\n\n        assert_eq!(v.first(), Some(&1.0));\n        assert_eq!(v.last(), Some(&0.0));\n        assert!(v.windows(2).all(|slice| slice[0] > slice[1]));\n    }\n\n    #[test]\n    fn relative_spare_bounds() {\n        let order = EvictionOrder::RelativeAccessed {\n            highest_layer_count_loses_first: true,\n        };\n\n        let len = 10;\n        let v = (0..len)\n            .map(|i| order.relative_last_activity(len, i).into_inner())\n            .collect::<Vec<_>>();\n\n        assert_eq!(v.first(), Some(&1.0));\n        assert_eq!(v.last(), Some(&0.1));\n        assert!(v.windows(2).all(|slice| slice[0] > slice[1]));\n    }\n}\n"
  },
  {
    "path": "pageserver/src/feature_resolver.rs",
    "content": "use std::{\n    collections::HashMap,\n    sync::{Arc, atomic::AtomicBool},\n    time::Duration,\n};\n\nuse arc_swap::ArcSwap;\nuse pageserver_api::config::NodeMetadata;\nuse posthog_client_lite::{\n    CaptureEvent, FeatureResolverBackgroundLoop, PostHogEvaluationError,\n    PostHogFlagFilterPropertyValue,\n};\nuse rand::Rng;\nuse remote_storage::RemoteStorageKind;\nuse serde_json::json;\nuse tokio_util::sync::CancellationToken;\nuse utils::id::TenantId;\n\nuse crate::{config::PageServerConf, metrics::FEATURE_FLAG_EVALUATION, tenant::TenantShard};\n\nconst DEFAULT_POSTHOG_REFRESH_INTERVAL: Duration = Duration::from_secs(600);\n\n#[derive(Clone)]\npub struct FeatureResolver {\n    inner: Option<Arc<FeatureResolverBackgroundLoop>>,\n    internal_properties: Option<Arc<HashMap<String, PostHogFlagFilterPropertyValue>>>,\n    force_overrides_for_testing: Arc<ArcSwap<HashMap<String, String>>>,\n}\n\nimpl FeatureResolver {\n    pub fn new_disabled() -> Self {\n        Self {\n            inner: None,\n            internal_properties: None,\n            force_overrides_for_testing: Arc::new(ArcSwap::new(Arc::new(HashMap::new()))),\n        }\n    }\n\n    pub fn update(&self, spec: String) -> anyhow::Result<()> {\n        if let Some(inner) = &self.inner {\n            inner.update(spec)?;\n        }\n        Ok(())\n    }\n\n    pub fn spawn(\n        conf: &PageServerConf,\n        shutdown_pageserver: CancellationToken,\n        handle: &tokio::runtime::Handle,\n    ) -> anyhow::Result<Self> {\n        // DO NOT block in this function: make it return as fast as possible to avoid startup delays.\n        if let Some(posthog_config) = &conf.posthog_config {\n            let posthog_client_config = match posthog_config.clone().try_into_posthog_config() {\n                Ok(config) => config,\n                Err(e) => {\n                    tracing::warn!(\n                        \"invalid posthog config, skipping posthog integration: {}\",\n                        e\n                    );\n                    return Ok(FeatureResolver {\n                        inner: None,\n                        internal_properties: None,\n                        force_overrides_for_testing: Arc::new(ArcSwap::new(Arc::new(\n                            HashMap::new(),\n                        ))),\n                    });\n                }\n            };\n            let inner =\n                FeatureResolverBackgroundLoop::new(posthog_client_config, shutdown_pageserver);\n            let inner = Arc::new(inner);\n\n            // The properties shared by all tenants on this pageserver.\n            let internal_properties = {\n                let mut properties = HashMap::new();\n                properties.insert(\n                    \"pageserver_id\".to_string(),\n                    PostHogFlagFilterPropertyValue::String(conf.id.to_string()),\n                );\n                if let Some(availability_zone) = &conf.availability_zone {\n                    properties.insert(\n                        \"availability_zone\".to_string(),\n                        PostHogFlagFilterPropertyValue::String(availability_zone.clone()),\n                    );\n                }\n                // Infer region based on the remote storage config.\n                if let Some(remote_storage) = &conf.remote_storage_config {\n                    match &remote_storage.storage {\n                        RemoteStorageKind::AwsS3(config) => {\n                            properties.insert(\n                                \"region\".to_string(),\n                                PostHogFlagFilterPropertyValue::String(format!(\n                                    \"aws-{}\",\n                                    config.bucket_region\n                                )),\n                            );\n                        }\n                        RemoteStorageKind::AzureContainer(config) => {\n                            properties.insert(\n                                \"region\".to_string(),\n                                PostHogFlagFilterPropertyValue::String(format!(\n                                    \"azure-{}\",\n                                    config.container_region\n                                )),\n                            );\n                        }\n                        RemoteStorageKind::LocalFs { .. } => {\n                            properties.insert(\n                                \"region\".to_string(),\n                                PostHogFlagFilterPropertyValue::String(\"local\".to_string()),\n                            );\n                        }\n                        RemoteStorageKind::GCS { .. } => {\n                            properties.insert(\n                                \"region\".to_string(),\n                                PostHogFlagFilterPropertyValue::String(\"local\".to_string()),\n                            );\n                        }\n                    }\n                }\n                // TODO: move this to a background task so that we don't block startup in case of slow disk\n                let metadata_path = conf.metadata_path();\n                match std::fs::read_to_string(&metadata_path) {\n                    Ok(metadata_str) => match serde_json::from_str::<NodeMetadata>(&metadata_str) {\n                        Ok(metadata) => {\n                            properties.insert(\n                                \"hostname\".to_string(),\n                                PostHogFlagFilterPropertyValue::String(metadata.http_host),\n                            );\n                            if let Some(cplane_region) = metadata.other.get(\"region_id\") {\n                                if let Some(cplane_region) = cplane_region.as_str() {\n                                    // This region contains the cell number\n                                    properties.insert(\n                                        \"neon_region\".to_string(),\n                                        PostHogFlagFilterPropertyValue::String(\n                                            cplane_region.to_string(),\n                                        ),\n                                    );\n                                }\n                            }\n                        }\n                        Err(e) => {\n                            tracing::warn!(\"Failed to parse metadata.json: {}\", e);\n                        }\n                    },\n                    Err(e) => {\n                        tracing::warn!(\"Failed to read metadata.json: {}\", e);\n                    }\n                }\n                Arc::new(properties)\n            };\n\n            let fake_tenants = {\n                let mut tenants = Vec::new();\n                for i in 0..10 {\n                    let distinct_id = format!(\n                        \"fake_tenant_{}_{}_{}\",\n                        conf.availability_zone.as_deref().unwrap_or_default(),\n                        conf.id,\n                        i\n                    );\n\n                    let tenant_properties = PerTenantProperties {\n                        remote_size_mb: Some(rand::rng().random_range(100.0..1000000.00)),\n                        db_count_max: Some(rand::rng().random_range(1..1000)),\n                        rel_count_max: Some(rand::rng().random_range(1..1000)),\n                    }\n                    .into_posthog_properties();\n\n                    let properties = Self::collect_properties_inner(\n                        distinct_id.clone(),\n                        Some(&internal_properties),\n                        &tenant_properties,\n                    );\n                    tenants.push(CaptureEvent {\n                        event: \"initial_tenant_report\".to_string(),\n                        distinct_id,\n                        properties: json!({ \"$set\": properties }), // use `$set` to set the person properties instead of the event properties\n                    });\n                }\n                tenants\n            };\n            inner.clone().spawn(\n                handle,\n                posthog_config\n                    .refresh_interval\n                    .unwrap_or(DEFAULT_POSTHOG_REFRESH_INTERVAL),\n                fake_tenants,\n            );\n            Ok(FeatureResolver {\n                inner: Some(inner),\n                internal_properties: Some(internal_properties),\n                force_overrides_for_testing: Arc::new(ArcSwap::new(Arc::new(HashMap::new()))),\n            })\n        } else {\n            Ok(FeatureResolver {\n                inner: None,\n                internal_properties: None,\n                force_overrides_for_testing: Arc::new(ArcSwap::new(Arc::new(HashMap::new()))),\n            })\n        }\n    }\n\n    fn collect_properties_inner(\n        tenant_id: String,\n        internal_properties: Option<&HashMap<String, PostHogFlagFilterPropertyValue>>,\n        tenant_properties: &HashMap<String, PostHogFlagFilterPropertyValue>,\n    ) -> HashMap<String, PostHogFlagFilterPropertyValue> {\n        let mut properties = HashMap::new();\n        if let Some(internal_properties) = internal_properties {\n            for (key, value) in internal_properties.iter() {\n                properties.insert(key.clone(), value.clone());\n            }\n        }\n        properties.insert(\n            \"tenant_id\".to_string(),\n            PostHogFlagFilterPropertyValue::String(tenant_id),\n        );\n        for (key, value) in tenant_properties.iter() {\n            properties.insert(key.clone(), value.clone());\n        }\n        properties\n    }\n\n    /// Collect all properties availble for the feature flag evaluation.\n    pub(crate) fn collect_properties(\n        &self,\n        tenant_id: TenantId,\n        tenant_properties: &HashMap<String, PostHogFlagFilterPropertyValue>,\n    ) -> HashMap<String, PostHogFlagFilterPropertyValue> {\n        Self::collect_properties_inner(\n            tenant_id.to_string(),\n            self.internal_properties.as_deref(),\n            tenant_properties,\n        )\n    }\n\n    /// Evaluate a multivariate feature flag. Currently, we do not support any properties.\n    ///\n    /// Error handling: the caller should inspect the error and decide the behavior when a feature flag\n    /// cannot be evaluated (i.e., default to false if it cannot be resolved). The error should *not* be\n    /// propagated beyond where the feature flag gets resolved.\n    pub fn evaluate_multivariate(\n        &self,\n        flag_key: &str,\n        tenant_id: TenantId,\n        tenant_properties: &HashMap<String, PostHogFlagFilterPropertyValue>,\n    ) -> Result<String, PostHogEvaluationError> {\n        let force_overrides = self.force_overrides_for_testing.load();\n        if let Some(value) = force_overrides.get(flag_key) {\n            return Ok(value.clone());\n        }\n\n        if let Some(inner) = &self.inner {\n            let res = inner.feature_store().evaluate_multivariate(\n                flag_key,\n                &tenant_id.to_string(),\n                &self.collect_properties(tenant_id, tenant_properties),\n            );\n            match &res {\n                Ok(value) => {\n                    FEATURE_FLAG_EVALUATION\n                        .with_label_values(&[flag_key, \"ok\", value])\n                        .inc();\n                }\n                Err(e) => {\n                    FEATURE_FLAG_EVALUATION\n                        .with_label_values(&[flag_key, \"error\", e.as_variant_str()])\n                        .inc();\n                }\n            }\n            res\n        } else {\n            Err(PostHogEvaluationError::NotAvailable(\n                \"PostHog integration is not enabled\".to_string(),\n            ))\n        }\n    }\n\n    /// Evaluate a boolean feature flag. Currently, we do not support any properties.\n    ///\n    /// Returns `Ok(())` if the flag is evaluated to true, otherwise returns an error.\n    ///\n    /// Error handling: the caller should inspect the error and decide the behavior when a feature flag\n    /// cannot be evaluated (i.e., default to false if it cannot be resolved). The error should *not* be\n    /// propagated beyond where the feature flag gets resolved.\n    pub fn evaluate_boolean(\n        &self,\n        flag_key: &str,\n        tenant_id: TenantId,\n        tenant_properties: &HashMap<String, PostHogFlagFilterPropertyValue>,\n    ) -> Result<(), PostHogEvaluationError> {\n        let force_overrides = self.force_overrides_for_testing.load();\n        if let Some(value) = force_overrides.get(flag_key) {\n            return if value == \"true\" {\n                Ok(())\n            } else {\n                Err(PostHogEvaluationError::NoConditionGroupMatched)\n            };\n        }\n\n        if let Some(inner) = &self.inner {\n            let res = inner.feature_store().evaluate_boolean(\n                flag_key,\n                &tenant_id.to_string(),\n                &self.collect_properties(tenant_id, tenant_properties),\n            );\n            match &res {\n                Ok(()) => {\n                    FEATURE_FLAG_EVALUATION\n                        .with_label_values(&[flag_key, \"ok\", \"true\"])\n                        .inc();\n                }\n                Err(e) => {\n                    FEATURE_FLAG_EVALUATION\n                        .with_label_values(&[flag_key, \"error\", e.as_variant_str()])\n                        .inc();\n                }\n            }\n            res\n        } else {\n            Err(PostHogEvaluationError::NotAvailable(\n                \"PostHog integration is not enabled\".to_string(),\n            ))\n        }\n    }\n\n    pub fn is_feature_flag_boolean(&self, flag_key: &str) -> Result<bool, PostHogEvaluationError> {\n        if let Some(inner) = &self.inner {\n            inner.feature_store().is_feature_flag_boolean(flag_key)\n        } else {\n            Err(PostHogEvaluationError::NotAvailable(\n                \"PostHog integration is not enabled, cannot auto-determine the flag type\"\n                    .to_string(),\n            ))\n        }\n    }\n\n    /// Force override a feature flag for testing. This is only for testing purposes. Assume the caller only call it\n    /// from a single thread so it won't race.\n    pub fn force_override_for_testing(&self, flag_key: &str, value: Option<&str>) {\n        let mut force_overrides = self.force_overrides_for_testing.load().as_ref().clone();\n        if let Some(value) = value {\n            force_overrides.insert(flag_key.to_string(), value.to_string());\n        } else {\n            force_overrides.remove(flag_key);\n        }\n        self.force_overrides_for_testing\n            .store(Arc::new(force_overrides));\n    }\n}\n\nstruct PerTenantProperties {\n    pub remote_size_mb: Option<f64>,\n    pub db_count_max: Option<usize>,\n    pub rel_count_max: Option<usize>,\n}\n\nimpl PerTenantProperties {\n    pub fn into_posthog_properties(self) -> HashMap<String, PostHogFlagFilterPropertyValue> {\n        let mut properties = HashMap::new();\n        if let Some(remote_size_mb) = self.remote_size_mb {\n            properties.insert(\n                \"tenant_remote_size_mb\".to_string(),\n                PostHogFlagFilterPropertyValue::Number(remote_size_mb),\n            );\n        }\n        if let Some(db_count) = self.db_count_max {\n            properties.insert(\n                \"tenant_db_count_max\".to_string(),\n                PostHogFlagFilterPropertyValue::Number(db_count as f64),\n            );\n        }\n        if let Some(rel_count) = self.rel_count_max {\n            properties.insert(\n                \"tenant_rel_count_max\".to_string(),\n                PostHogFlagFilterPropertyValue::Number(rel_count as f64),\n            );\n        }\n        properties\n    }\n}\n\npub struct TenantFeatureResolver {\n    inner: FeatureResolver,\n    tenant_id: TenantId,\n    cached_tenant_properties: ArcSwap<HashMap<String, PostHogFlagFilterPropertyValue>>,\n\n    // Add feature flag on the critical path below.\n    //\n    // If a feature flag will be used on the critical path, we will update it in the tenant housekeeping loop insetad of\n    // resolving directly by calling `evaluate_multivariate` or `evaluate_boolean`. Remember to update the flag in the\n    // housekeeping loop. The user should directly read this atomic flag instead of using the set of evaluate functions.\n    pub feature_test_remote_size_flag: AtomicBool,\n}\n\nimpl TenantFeatureResolver {\n    pub fn new(inner: FeatureResolver, tenant_id: TenantId) -> Self {\n        Self {\n            inner,\n            tenant_id,\n            cached_tenant_properties: ArcSwap::new(Arc::new(HashMap::new())),\n            feature_test_remote_size_flag: AtomicBool::new(false),\n        }\n    }\n\n    pub fn evaluate_multivariate(&self, flag_key: &str) -> Result<String, PostHogEvaluationError> {\n        self.inner.evaluate_multivariate(\n            flag_key,\n            self.tenant_id,\n            &self.cached_tenant_properties.load(),\n        )\n    }\n\n    pub fn evaluate_boolean(&self, flag_key: &str) -> Result<(), PostHogEvaluationError> {\n        self.inner.evaluate_boolean(\n            flag_key,\n            self.tenant_id,\n            &self.cached_tenant_properties.load(),\n        )\n    }\n\n    pub fn collect_properties(&self) -> HashMap<String, PostHogFlagFilterPropertyValue> {\n        self.inner\n            .collect_properties(self.tenant_id, &self.cached_tenant_properties.load())\n    }\n\n    pub fn is_feature_flag_boolean(&self, flag_key: &str) -> Result<bool, PostHogEvaluationError> {\n        self.inner.is_feature_flag_boolean(flag_key)\n    }\n\n    /// Refresh the cached properties and flags on the critical path.\n    pub fn refresh_properties_and_flags(&self, tenant_shard: &TenantShard) {\n        // Any of the remote size is none => this property is none.\n        let mut remote_size_mb = Some(0.0);\n        // Any of the db or rel count is available => this property is available.\n        let mut db_count_max = None;\n        let mut rel_count_max = None;\n        for timeline in tenant_shard.list_timelines() {\n            let size = timeline.metrics.resident_physical_size_get();\n            if size == 0 {\n                remote_size_mb = None;\n                break;\n            }\n            if let Some(ref mut remote_size_mb) = remote_size_mb {\n                *remote_size_mb += size as f64 / 1024.0 / 1024.0;\n            }\n            if let Some(data) = timeline.db_rel_count.load_full() {\n                let (db_count, rel_count) = *data.as_ref();\n                if db_count_max.is_none() {\n                    db_count_max = Some(db_count);\n                }\n                if rel_count_max.is_none() {\n                    rel_count_max = Some(rel_count);\n                }\n                db_count_max = db_count_max.map(|max| max.max(db_count));\n                rel_count_max = rel_count_max.map(|max| max.max(rel_count));\n            }\n        }\n        self.cached_tenant_properties.store(Arc::new(\n            PerTenantProperties {\n                remote_size_mb,\n                db_count_max,\n                rel_count_max,\n            }\n            .into_posthog_properties(),\n        ));\n\n        // BEGIN: Update the feature flag on the critical path.\n        self.feature_test_remote_size_flag.store(\n            self.evaluate_boolean(\"test-remote-size-flag\").is_ok(),\n            std::sync::atomic::Ordering::Relaxed,\n        );\n        // END: Update the feature flag on the critical path.\n    }\n}\n"
  },
  {
    "path": "pageserver/src/http/mod.rs",
    "content": "pub mod routes;\npub use routes::make_router;\n"
  },
  {
    "path": "pageserver/src/http/openapi_spec.yml",
    "content": "openapi: \"3.0.2\"\ninfo:\n  title: Page Server API\n  description: Neon Pageserver API\n  version: \"1.0\"\n  license:\n    name: \"Apache\"\n    url: https://github.com/neondatabase/neon/blob/main/LICENSE\nservers:\n  - url: \"\"\npaths:\n  /v1/status:\n    description: Healthcheck endpoint\n    get:\n      description: Healthcheck\n      security: []\n      responses:\n        \"200\":\n          description: OK\n          content:\n            application/json:\n              schema:\n                type: object\n                required:\n                  - id\n                properties:\n                  id:\n                    type: integer\n\n  /v1/disk_usage_eviction/run:\n    put:\n      description: Do an iteration of disk-usage-based eviction to evict a given amount of disk space.\n      security: []\n      requestBody:\n        content:\n          application/json:\n            schema:\n              type: object\n              required:\n                - evict_bytes\n              properties:\n                evict_bytes:\n                  type: integer\n      responses:\n        \"200\":\n          description: |\n            The run completed.\n            This does not necessarily mean that we actually evicted `evict_bytes`.\n            Examine the returned object for detail, or, just watch the actual effect of the call using `du` or `df`.\n          content:\n            application/json:\n              schema:\n                type: object\n\n  /v1/reload_auth_validation_keys:\n    post:\n      description: Reloads the JWT public keys from their pre-configured location on disk.\n      responses:\n        \"200\":\n          description: The reload completed successfully.\n\n  /v1/tenant/{tenant_id}:\n    parameters:\n      - name: tenant_id\n        in: path\n        required: true\n        schema:\n          type: string\n    get:\n      description: Get tenant status\n      responses:\n        \"200\":\n          description: Currently returns the flag whether the tenant has inprogress timeline downloads\n          content:\n            application/json:\n              schema:\n                $ref: \"#/components/schemas/TenantInfo\"\n\n    delete:\n      description: |\n        Attempts to delete specified tenant. 500, 503 and 409 errors should be retried.  Deleting\n        a non-existent tenant is considered successful (returns 200).\n      responses:\n        \"200\":\n          description: Tenant was successfully deleted, or was already not found.\n        \"503\":\n          description: Service is unavailable, or tenant is already being modified (perhaps concurrently deleted)\n\n\n  /v1/tenant/{tenant_id}/time_travel_remote_storage:\n    parameters:\n      - name: tenant_id\n        in: path\n        required: true\n        schema:\n          type: string\n      - name: travel_to\n        in: query\n        required: true\n        schema:\n          type: string\n          format: date-time\n      - name: done_if_after\n        in: query\n        required: true\n        schema:\n          type: string\n          format: date-time\n    put:\n      description: Time travel the tenant's remote storage\n      responses:\n        \"200\":\n          description: OK\n          content:\n            application/json:\n              schema:\n                type: string\n\n  /v1/tenant/{tenant_id}/timeline/{timeline_id}:\n    parameters:\n      - name: tenant_id\n        in: path\n        required: true\n        schema:\n          type: string\n      - name: timeline_id\n        in: path\n        required: true\n        schema:\n          type: string\n          format: hex\n    get:\n      description: Get info about the timeline\n      responses:\n        \"200\":\n          description: TimelineInfo\n          content:\n            application/json:\n              schema:\n                $ref: \"#/components/schemas/TimelineInfo\"\n\n    delete:\n      description: \"Attempts to delete specified timeline. 500 and 409 errors should be retried\"\n      responses:\n        \"404\":\n          description: Timeline not found. This is the success path.\n          content:\n            application/json:\n              schema:\n                $ref: \"#/components/schemas/NotFoundError\"\n        \"409\":\n          description: Deletion is already in progress, continue polling\n          content:\n            application/json:\n              schema:\n                $ref: \"#/components/schemas/ConflictError\"\n        \"412\":\n          description: Tenant is missing, or timeline has children\n          content:\n            application/json:\n              schema:\n                $ref: \"#/components/schemas/PreconditionFailedError\"\n\n  /v1/tenant/{tenant_id}/timeline/{timeline_id}/get_timestamp_of_lsn:\n    parameters:\n      - name: tenant_id\n        in: path\n        required: true\n        schema:\n          type: string\n      - name: timeline_id\n        in: path\n        required: true\n        schema:\n          type: string\n          format: hex\n    get:\n      description: Get timestamp for a given LSN\n      parameters:\n        - name: lsn\n          in: query\n          required: true\n          schema:\n            type: string\n            format: hex\n          description: A LSN to get the timestamp\n      responses:\n        \"200\":\n          description: OK\n          content:\n            application/json:\n              schema:\n                type: string\n                format: date-time\n        \"412\":\n          description: No timestamp is found for given LSN, e.g. if there had been no commits till LSN\n          content:\n            application/json:\n              schema:\n                $ref: \"#/components/schemas/PreconditionFailedError\"\n\n  /v1/tenant/{tenant_id}/timeline/{timeline_id}/get_lsn_by_timestamp:\n    parameters:\n      - name: tenant_id\n        in: path\n        required: true\n        schema:\n          type: string\n      - name: timeline_id\n        in: path\n        required: true\n        schema:\n          type: string\n          format: hex\n    get:\n      description: Get LSN by a timestamp\n      parameters:\n        - name: timestamp\n          in: query\n          required: true\n          schema:\n            type: string\n            format: date-time\n          description: A timestamp to get the LSN\n        - name: with_lease\n          in: query\n          required: false\n          schema:\n            type: boolean\n          description: Whether to grant a lease to the corresponding LSN. Default to false.\n\n      responses:\n        \"200\":\n          description: OK\n          content:\n            application/json:\n              schema:\n                $ref: \"#/components/schemas/LsnByTimestampResponse\"\n\n  /v1/tenant/{tenant_shard_id}/timeline/{timeline_id}/lsn_lease:\n    parameters:\n      - name: tenant_shard_id\n        in: path\n        required: true\n        schema:\n          type: string\n      - name: timeline_id\n        in: path\n        required: true\n        schema:\n          type: string\n          format: hex\n    post:\n      description: Obtains a lease for the given LSN.\n      requestBody:\n        content:\n          application/json:\n            schema:\n              type: object\n              required:\n               - lsn\n              properties:\n                lsn:\n                  description: A LSN to obtain the lease for.\n                  type: string\n                  format: hex\n      responses:\n        \"200\":\n          description: OK\n          content:\n            application/json:\n              schema:\n                $ref: \"#/components/schemas/LsnLease\"\n\n  /v1/tenant/{tenant_id}/timeline/{timeline_id}/do_gc:\n    parameters:\n      - name: tenant_id\n        in: path\n        required: true\n        schema:\n          type: string\n      - name: timeline_id\n        in: path\n        required: true\n        schema:\n          type: string\n          format: hex\n    put:\n      description: Garbage collect given timeline\n      responses:\n        \"200\":\n          description: OK\n          content:\n            application/json:\n              schema:\n                type: string\n\n  /v1/tenant/{tenant_shard_id}/timeline/{timeline_id}/block_gc:\n    parameters:\n      - name: tenant_shard_id\n        in: path\n        required: true\n        schema:\n          type: string\n      - name: timeline_id\n        in: path\n        required: true\n        schema:\n          type: string\n          format: hex\n    post:\n      description: Persistently add a gc blocking at the tenant level because of this timeline\n      responses:\n        \"200\":\n          description: OK\n\n  /v1/tenant/{tenant_shard_id}/timeline/{timeline_id}/unblock_gc:\n    parameters:\n      - name: tenant_shard_id\n        in: path\n        required: true\n        schema:\n          type: string\n      - name: timeline_id\n        in: path\n        required: true\n        schema:\n          type: string\n          format: hex\n    post:\n      description: Persistently remove a tenant level gc blocking for this timeline\n      responses:\n        \"200\":\n          description: OK\n\n  /v1/tenant/{tenant_shard_id}/timeline/{timeline_id}/mark_invisible:\n    parameters:\n      - name: tenant_shard_id\n        in: path\n        required: true\n        schema:\n          type: string\n      - name: timeline_id\n        in: path\n        required: true\n        schema:\n          type: string\n          format: hex\n    put:\n      requestBody:\n        content:\n          application/json:\n            schema:\n              type: object\n              properties:\n                is_visible:\n                  type: boolean\n                  default: false\n      responses:\n        \"200\":\n          description: OK\n\n  /v1/tenant/{tenant_shard_id}/location_config:\n    parameters:\n      - name: tenant_shard_id\n        in: path\n        required: true\n        schema:\n          type: string\n      - name: flush_ms\n        in: query\n        required: false\n        schema:\n          type: integer\n      - name: lazy\n        in: query\n        required: false\n        schema:\n          type: boolean\n        description: Set to true for attaches to queue up until activated by compute. Eager (false) is the default.\n    put:\n      description: |\n        Configures a _tenant location_, that is how a particular pageserver handles\n        a particular tenant.  This includes _attached_ tenants, i.e. those ingesting WAL\n        and page service requests, and _secondary_ tenants, i.e. those which are just keeping\n        a warm cache in anticipation of transitioning to attached state in the future.\n\n        This is a declarative, idempotent API: there are not separate endpoints\n        for different tenant location configurations.  Rather, this single endpoint accepts\n        a description of the desired location configuration, and makes whatever changes\n        are required to reach that state.\n\n        In imperative terms, this API is used to attach and detach tenants, and\n        to transition tenants to and from secondary mode.\n\n        This is a synchronous API: there is no 202 response.  State transitions should always\n        be fast (milliseconds), with the exception of requests setting `flush_ms`, in which case\n        the caller controls the runtime of the request.\n\n        In some state transitions, it makes sense to flush dirty data to remote storage: this includes transitions\n        to AttachedStale and Detached.  Flushing is never necessary for correctness, but is an\n        important optimization when doing migrations.  The `flush_ms` parameter controls whether\n        flushing should be attempted, and how much time is allowed for flushing.  If the time limit expires,\n        the requested transition will continue without waiting for any outstanding data to flush.  Callers\n        should use a duration which is substantially less than their HTTP client's request\n        timeout.  It is safe to supply flush_ms irrespective of the request body: in state transitions\n        where flushing doesn't make sense, the server will ignore it.\n\n        It is safe to retry requests, but if one receives a 409 or 503 response, it is not\n        useful to retry aggressively: there is probably an existing request still ongoing.\n      requestBody:\n        required: false\n        content:\n          application/json:\n            schema:\n              $ref: \"#/components/schemas/TenantLocationConfigRequest\"\n      responses:\n        \"200\":\n          description: Tenant is now in requested state\n          content:\n            application/json:\n              schema:\n                $ref: \"#/components/schemas/TenantLocationConfigResponse\"\n        \"409\":\n          description: |\n            The tenant is already being modified, perhaps by a concurrent call to this API\n          content:\n            application/json:\n              schema:\n                $ref: \"#/components/schemas/ConflictError\"\n\n  /v1/tenant/{tenant_id}/timeline/{timeline_id}/preserve_initdb_archive:\n    parameters:\n      - name: tenant_id\n        in: path\n        required: true\n        schema:\n          type: string\n      - name: timeline_id\n        in: path\n        required: true\n        schema:\n          type: string\n    post:\n      description: |\n        Marks the initdb archive for preservation upon deletion of the timeline or tenant.\n        This is meant to be part of the disaster recovery process.\n      responses:\n        \"202\":\n          description: Tenant scheduled to load successfully\n\n  /v1/tenant/{tenant_shard_id}/timeline/{timeline_id}/archival_config:\n    parameters:\n      - name: tenant_shard_id\n        in: path\n        required: true\n        schema:\n          type: string\n      - name: timeline_id\n        in: path\n        required: true\n        schema:\n          type: string\n    put:\n      description: |\n        Either archives or unarchives the given timeline.\n        An archived timeline may not have any non-archived children.\n      requestBody:\n        required: true\n        content:\n          application/json:\n            schema:\n              $ref: \"#/components/schemas/ArchivalConfigRequest\"\n      responses:\n        \"200\":\n          description: Timeline (un)archived successfully\n        \"409\":\n          description: |\n            The tenant/timeline is already being modified, perhaps by a concurrent call to this API\n          content:\n            application/json:\n              schema:\n                $ref: \"#/components/schemas/ConflictError\"\n        \"500\":\n          description: Generic operation error\n          content:\n            application/json:\n              schema:\n                $ref: \"#/components/schemas/Error\"\n        \"503\":\n          description: Temporarily unavailable, please retry.\n          content:\n            application/json:\n              schema:\n                $ref: \"#/components/schemas/ServiceUnavailableError\"\n\n  /v1/tenant/{tenant_id}/synthetic_size:\n    parameters:\n      - name: tenant_id\n        in: path\n        required: true\n        schema:\n          type: string\n      - name: inputs_only\n        in: query\n        required: false\n        schema:\n          type: boolean\n        description: |\n          When true, skip calculation and only provide the model inputs (for debugging). Defaults to false.\n      - name: retention_period\n        in: query\n        required: false\n        schema:\n          type: integer\n        description: |\n          Override the default retention period (in bytes) used for size calculation.\n    get:\n      description: |\n        Calculate tenant's size, which is a mixture of WAL (bytes) and logical_size (bytes).\n      responses:\n        \"200\":\n          description: OK,\n          content:\n            application/json:\n              schema:\n                $ref: \"#/components/schemas/SyntheticSizeResponse\"\n            text/html:\n              schema:\n                type: string\n                description: SVG representation of the tenant and its timelines.\n        \"401\":\n          description: Unauthorized Error\n          content:\n            application/json:\n              schema:\n                $ref: \"#/components/schemas/UnauthorizedError\"\n        \"403\":\n          description: Forbidden Error\n          content:\n            application/json:\n              schema:\n                $ref: \"#/components/schemas/ForbiddenError\"\n        \"500\":\n          description: Generic operation error\n          content:\n            application/json:\n              schema:\n                $ref: \"#/components/schemas/Error\"\n        \"503\":\n          description: Temporarily unavailable, please retry.\n          content:\n            application/json:\n              schema:\n                $ref: \"#/components/schemas/ServiceUnavailableError\"\n\n  /v1/tenant/{tenant_shard_id}/heatmap_upload:\n    parameters:\n      - name: tenant_shard_id\n        in: path\n        required: true\n        schema:\n          type: string\n    post:\n      description: |\n        If the location is in an attached mode, upload the current state to the remote heatmap\n      responses:\n        \"200\":\n          description: Success\n\n  /v1/tenant/{tenant_shard_id}/secondary/download:\n    parameters:\n      - name: tenant_shard_id\n        in: path\n        required: true\n        schema:\n          type: string\n      - name: wait_ms\n        description: If set, we will wait this long for download to complete, and if it isn't complete then return 202\n        in: query\n        required: false\n        schema:\n          type: integer\n    post:\n      description: |\n        If the location is in secondary mode, download latest heatmap and layers\n      responses:\n        \"200\":\n          description: Success\n          content:\n            application/json:\n              schema:\n                $ref: \"#/components/schemas/SecondaryProgress\"\n        \"202\":\n          description: Download has started but not yet finished\n          content:\n            application/json:\n              schema:\n                $ref: \"#/components/schemas/SecondaryProgress\"\n\n  /v1/tenant/{tenant_id}/timeline:\n    parameters:\n      - name: tenant_id\n        in: path\n        required: true\n        schema:\n          type: string\n    post:\n      description: |\n        Create a timeline. Returns new timeline id on success.\n        Recreating the same timeline will succeed if the parameters match the existing timeline.\n        If no pg_version is specified, assume DEFAULT_PG_VERSION hardcoded in the pageserver.\n\n        To ensure durability, the caller must retry the creation until success.\n        Just because the timeline is visible via other endpoints does not mean it is durable.\n        Future versions may stop showing timelines that are not yet durable.\n      requestBody:\n        content:\n          application/json:\n            schema:\n              type: object\n              required:\n                - new_timeline_id\n              properties:\n                new_timeline_id:\n                  type: string\n                  format: hex\n                ancestor_timeline_id:\n                  type: string\n                  format: hex\n                ancestor_start_lsn:\n                  type: string\n                  format: hex\n                pg_version:\n                  type: integer\n                read_only:\n                  type: boolean\n                existing_initdb_timeline_id:\n                  type: string\n                  format: hex\n                import_pgdata:\n                  $ref: \"#/components/schemas/TimelineCreateRequestImportPgdata\"\n      responses:\n        \"201\":\n          description: Timeline was created, or already existed with matching parameters\n          content:\n            application/json:\n              schema:\n                $ref: \"#/components/schemas/TimelineInfo\"\n        \"406\":\n          description: Permanently unsatisfiable request, don't retry.\n          content:\n            application/json:\n              schema:\n                $ref: \"#/components/schemas/Error\"\n        \"409\":\n          description: Timeline already exists, with different parameters.  Creation cannot proceed.\n          content:\n            application/json:\n              schema:\n                $ref: \"#/components/schemas/ConflictError\"\n        \"429\":\n          description: A creation request was sent for the same Timeline Id while a creation was already in progress.  Back off and retry.\n          content:\n            application/json:\n              schema:\n                $ref: \"#/components/schemas/Error\"\n    get:\n      description: Get timelines for tenant\n      responses:\n        \"200\":\n          description: TimelineInfo\n          content:\n            application/json:\n              schema:\n                type: array\n                items:\n                  $ref: \"#/components/schemas/TimelineInfo\"\n\n  /v1/tenant/{tenant_shard_id}/timeline/{timeline_id}/detach_ancestor:\n    parameters:\n      - name: tenant_shard_id\n        in: path\n        required: true\n        schema:\n          type: string\n      - name: timeline_id\n        in: path\n        required: true\n        schema:\n          type: string\n\n    put:\n      description: |\n        Detach a timeline from its ancestor and reparent all ancestors timelines with lower `ancestor_lsn`.\n        Current implementation might not be retryable across failure cases, but will be enhanced in future.\n        Detaching should be expected to be expensive operation. Timeouts should be retried.\n      parameters:\n        - name: detach_behavior\n          in: query\n          required: false\n          schema:\n            description: Currently valid values are `v1`, `v2`\n            type: string\n      responses:\n        \"200\":\n          description: |\n            The timeline has been detached from it's ancestor (now or earlier), and at least the returned timelines have been reparented.\n            If any timelines were deleted after reparenting, they might not be on this list.\n          content:\n            application/json:\n              schema:\n                $ref: \"#/components/schemas/AncestorDetached\"\n\n        \"400\":\n          description: |\n            Number of early checks meaning the timeline cannot be detached now:\n              - the ancestor of timeline has an ancestor: not supported, see RFC\n          content:\n            application/json:\n              schema:\n                $ref: \"#/components/schemas/Error\"\n\n        \"404\":\n          description: Tenant or timeline not found.\n          content:\n            application/json:\n              schema:\n                $ref: \"#/components/schemas/NotFoundError\"\n\n        \"409\":\n          description: |\n            The timeline can never be detached:\n              - timeline has no ancestor, implying that the timeline has never had an ancestor\n          content:\n            application/json:\n              schema:\n                $ref: \"#/components/schemas/ConflictError\"\n\n        \"500\":\n          description: |\n            Transient error, for example, pageserver shutdown happened while\n            processing the request but we were unable to distinguish that. Must\n            be retried.\n          content:\n            application/json:\n              schema:\n                $ref: \"#/components/schemas/Error\"\n\n        \"503\":\n          description: |\n            Temporarily unavailable, please retry. Possible reasons:\n              - another timeline detach for the same tenant is underway, please retry later\n              - detected shutdown error\n          content:\n            application/json:\n              schema:\n                $ref: \"#/components/schemas/ServiceUnavailableError\"\n\n\n  /v1/tenant:\n    get:\n      description: Get tenants list\n      responses:\n        \"200\":\n          description: TenantInfo\n          content:\n            application/json:\n              schema:\n                type: array\n                items:\n                  $ref: \"#/components/schemas/TenantInfo\"\n\n    post:\n      description: |\n        Create a tenant. Returns new tenant id on success.\n\n        If no new tenant id is specified in parameters, it would be generated. It's an error to recreate the same tenant.\n\n        Invalid fields in the tenant config will cause the request to be rejected with status 400.\n      requestBody:\n        content:\n          application/json:\n            schema:\n              $ref: \"#/components/schemas/TenantCreateRequest\"\n      responses:\n        \"201\":\n          description: New tenant created successfully\n          content:\n            application/json:\n              schema:\n                type: string\n        \"409\":\n          description: Tenant already exists, creation skipped\n          content:\n            application/json:\n              schema:\n                $ref: \"#/components/schemas/ConflictError\"\n\n  /v1/tenant/config:\n    put:\n      description: |\n        Update tenant's config by setting it to the provided value\n\n        Invalid fields in the tenant config will cause the request to be rejected with status 400.\n      requestBody:\n        content:\n          application/json:\n            schema:\n              $ref: \"#/components/schemas/TenantConfigRequest\"\n      responses:\n        \"200\":\n          description: OK\n          content:\n            application/json:\n              schema:\n                type: array\n                items:\n                  $ref: \"#/components/schemas/TenantInfo\"\n    patch:\n      description: |\n        Update tenant's config additively by patching the updated fields provided.\n        Null values unset the field and non-null values upsert it.\n\n        Invalid fields in the tenant config will cause the request to be rejected with status 400.\n      requestBody:\n        content:\n          application/json:\n            schema:\n              $ref: \"#/components/schemas/TenantConfigRequest\"\n      responses:\n        \"200\":\n          description: OK\n          content:\n            application/json:\n              schema:\n                type: array\n                items:\n                  $ref: \"#/components/schemas/TenantInfo\"\n\n  /v1/tenant/{tenant_id}/config:\n    parameters:\n      - name: tenant_id\n        in: path\n        required: true\n        schema:\n          type: string\n    get:\n      description: |\n        Returns tenant's config description: specific config overrides a tenant has\n        and the effective config.\n      responses:\n        \"200\":\n          description: Tenant config, specific and effective\n          content:\n            application/json:\n              schema:\n                $ref: \"#/components/schemas/TenantConfigResponse\"\n\n  /v1/tenant/{tenant_shard_id}/timeline/{timeline_id}/download_heatmap_layers:\n    parameters:\n      - name: tenant_shard_id\n        in: path\n        required: true\n        schema:\n          type: string\n      - name: timeline_id\n        in: path\n        required: true\n        schema:\n          type: string\n      - name: concurrency\n        description: Maximum number of concurrent downloads (capped at remote storage concurrency)\n        in: query\n        required: false\n        schema:\n          type: integer\n      - name: recurse\n        description: When set, will recurse with the downloads into ancestor timelines\n        in: query\n        required: false\n        schema:\n          type: boolean\n    post:\n      description: |\n        Download all layers in the specified timeline's heatmap. The `tenant_shard_id` parameter\n        may be used to target all shards of a tenant when the unsharded form is used, or a specific\n        tenant shard with the sharded form.\n      responses:\n        \"200\":\n          description: Success\n    delete:\n      description: Stop any on-going background downloads of heatmap layers for the specified timeline.\n      responses:\n        \"200\":\n          description: Success\n\n  /v1/utilization:\n    get:\n      description: |\n        Returns the pageservers current utilization and fitness score for new tenants.\n\n      responses:\n        \"200\":\n            description: Pageserver utilization and fitness score\n            content:\n              application/json:\n                schema:\n                  $ref: \"#/components/schemas/PageserverUtilization\"\n\ncomponents:\n  securitySchemes:\n    JWT:\n      type: http\n      scheme: bearer\n      bearerFormat: JWT\n  schemas:\n    TenantInfo:\n      type: object\n      required:\n        - id\n        - attachment_status\n      properties:\n        id:\n          type: string\n        current_physical_size:\n          type: integer\n        attachment_status:\n          description: |\n            Status of this tenant's attachment to this pageserver.\n\n            - `maybe` means almost nothing, don't read anything into it\n              except for the fact that the pageserver _might_ be already\n              writing to the tenant's S3 state, so, DO NOT ATTACH the\n              tenant to any other pageserver, or we risk split-brain.\n            - `attached` means that the attach operation has completed,\n              successfully\n            - `failed` means that attach has failed. For reason check corresponding `reason` failed.\n              `failed` is the terminal state, retrying attach call wont resolve the issue.\n              For example this can be caused by s3 being unreachable. The retry may be implemented\n              with call to detach, though it would be better to not automate it and inspec failed state\n              manually before proceeding with a retry.\n          type: object\n          required:\n            - slug\n            - data\n          properties:\n            slug:\n              type: string\n              enum: [ \"maybe\", \"attached\", \"failed\" ]\n            data:\n              type: object\n              properties:\n                reason:\n                  type: string\n        gc_blocking:\n          type: string\n\n    TenantCreateRequest:\n      allOf:\n        - $ref: '#/components/schemas/TenantConfig'\n        - $ref: '#/components/schemas/TenantLoadRequest'\n        - type: object\n          required:\n            - new_tenant_id\n          properties:\n            new_tenant_id:\n              type: string\n    TenantLoadRequest:\n      type: object\n      properties:\n        generation:\n          type: integer\n          description: Attachment generation number.\n    TenantConfigRequest:\n      allOf:\n        - $ref: '#/components/schemas/TenantConfig'\n        - type: object\n          required:\n            - tenant_id\n          properties:\n            tenant_id:\n              type: string\n    TenantLocationConfigRequest:\n      type: object\n      required:\n        - mode\n      properties:\n        mode:\n          type: string\n          enum: [\"AttachedSingle\", \"AttachedMulti\", \"AttachedStale\", \"Secondary\", \"Detached\"]\n          description: Mode of functionality that this pageserver will run in for this tenant.\n        generation:\n          type: integer\n          description: Attachment generation number, mandatory when `mode` is an attached state\n        secondary_conf:\n          $ref: '#/components/schemas/SecondaryConfig'\n        tenant_conf:\n          $ref: '#/components/schemas/TenantConfig'\n    TenantLocationConfigResponse:\n      type: object\n      required:\n        - shards\n      properties:\n        shards:\n          description: Pageservers where this tenant's shards are attached.  Not populated for secondary locations.\n          type: array\n          items:\n            $ref: \"#/components/schemas/TenantShardLocation\"\n        stripe_size:\n          description: If multiple shards are present, this field contains the sharding stripe size, else it is null.\n          type: integer\n          nullable: true\n    TenantShardLocation:\n      type: object\n      required:\n        - node_id\n        - shard_id\n      properties:\n        node_id:\n          description: Pageserver node ID where this shard is attached\n          type: integer\n        shard_id:\n          description: Tenant shard ID of the shard\n          type: string\n    SecondaryConfig:\n      type: object\n      properties:\n        warm:\n          type: boolean\n          description: Whether to poll remote storage for layers to download.  If false, secondary locations don't download anything.\n    ArchivalConfigRequest:\n      type: object\n      required:\n        - state\n      properties:\n        state:\n          description: The archival state of a timeline\n          type: string\n          enum: [\"Archived\", \"Unarchived\"]\n    TenantConfig:\n      type: object\n      properties:\n        gc_period:\n          type: string\n        gc_horizon:\n          type: integer\n        pitr_interval:\n          type: string\n        checkpoint_distance:\n          type: integer\n        checkpoint_timeout:\n          type: string\n        compaction_target_size:\n          type: integer\n        compaction_period:\n          type: string\n        compaction_threshold:\n          type: string\n        compaction_upper_limit:\n          type: string\n        image_creation_threshold:\n          type: integer\n        walreceiver_connect_timeout:\n          type: string\n        lagging_wal_timeout:\n          type: string\n        max_lsn_wal_lag:\n          type: integer\n        heatmap_period:\n          type: string\n    TenantConfigResponse:\n      type: object\n      properties:\n        tenant_specific_overrides:\n          $ref: \"#/components/schemas/TenantConfig\"\n        effective_config:\n          $ref: \"#/components/schemas/TenantConfig\"\n    TimelineCreateRequestImportPgdata:\n      type: object\n      required:\n        - location\n        - idempotency_key\n      properties:\n        idempotency_key:\n          type: string\n        location:\n          $ref: \"#/components/schemas/TimelineCreateRequestImportPgdataLocation\"\n    TimelineCreateRequestImportPgdataLocation:\n      type: object\n      properties:\n        AwsS3:\n          $ref: \"#/components/schemas/TimelineCreateRequestImportPgdataLocationAwsS3\"\n    TimelineCreateRequestImportPgdataLocationAwsS3:\n      type: object\n      properties:\n        region:\n          type: string\n        bucket:\n          type: string\n        key:\n          type: string\n      required:\n        - region\n        - bucket\n        - key\n    TimelineInfo:\n      type: object\n      required:\n        - timeline_id\n        - tenant_id\n        - last_record_lsn\n        - disk_consistent_lsn\n        - state\n        - min_readable_lsn\n      properties:\n        timeline_id:\n          type: string\n          format: hex\n        tenant_id:\n          type: string\n        last_record_lsn:\n          type: string\n          format: hex\n        disk_consistent_lsn:\n          type: string\n          format: hex\n        remote_consistent_lsn:\n          type: string\n          format: hex\n        remote_consistent_lsn_visible:\n          type: string\n          format: hex\n        ancestor_timeline_id:\n          type: string\n          format: hex\n        ancestor_lsn:\n          type: string\n          format: hex\n        prev_record_lsn:\n          type: string\n          format: hex\n        current_logical_size:\n          type: integer\n        current_physical_size:\n          type: integer\n        wal_source_connstr:\n          type: string\n        last_received_msg_lsn:\n          type: string\n          format: hex\n        last_received_msg_ts:\n          type: integer\n        state:\n          type: string\n        min_readable_lsn:\n          type: string\n          format: hex\n        applied_gc_cutoff_lsn:\n          type: string\n          format: hex\n        safekeepers:\n          $ref: \"#/components/schemas/TimelineSafekeepersInfo\"\n\n    TimelineSafekeepersInfo:\n      type: object\n      required:\n        - tenant_id\n        - timeline_id\n        - generation\n        - safekeepers\n      properties:\n        tenant_id:\n          type: string\n          format: hex\n        timeline_id:\n          type: string\n          format: hex\n        generation:\n          type: integer\n        safekeepers:\n          type: array\n          items:\n            $ref: \"#/components/schemas/TimelineSafekeeperInfo\"\n\n    TimelineSafekeeperInfo:\n      type: object\n      required:\n        - id\n        - hostname\n      properties:\n        id:\n          type: integer\n        hostname:\n          type: string\n\n    SyntheticSizeResponse:\n      type: object\n      required:\n        - id\n        - size\n        - segment_sizes\n        - inputs\n      properties:\n        id:\n          type: string\n          format: hex\n        size:\n          type: integer\n          nullable: true\n          description: |\n            Size metric in bytes or null if inputs_only=true was given.\n        segment_sizes:\n          type: array\n          items:\n            $ref: \"#/components/schemas/SegmentSize\"\n        inputs:\n          type: object\n          properties:\n            segments:\n              type: array\n              items:\n                $ref: \"#/components/schemas/SegmentData\"\n            timeline_inputs:\n              type: array\n              items:\n                $ref: \"#/components/schemas/TimelineInput\"\n\n    SegmentSize:\n      type: object\n      required:\n        - method\n        - accum_size\n      properties:\n        method:\n          type: string\n        accum_size:\n          type: integer\n\n    SegmentData:\n      type: object\n      required:\n        - segment\n      properties:\n        segment:\n          type: object\n          required:\n            - lsn\n          properties:\n            parent:\n              type: integer\n            lsn:\n              type: integer\n            size:\n              type: integer\n            needed:\n              type: boolean\n        timeline_id:\n          type: string\n          format: hex\n        kind:\n          type: string\n\n    TimelineInput:\n      type: object\n      required:\n        - timeline_id\n      properties:\n        ancestor_id:\n          type: string\n        ancestor_lsn:\n          type: string\n        timeline_id:\n          type: string\n          format: hex\n\n    LsnByTimestampResponse:\n      type: object\n      required:\n        - lsn\n        - kind\n      properties:\n        lsn:\n          type: string\n          format: hex\n        kind:\n          type: string\n          enum: [past, present, future, nodata]\n        valid_until:\n          type: string\n          format: date-time\n          description: The expiration time of the granted lease.\n\n    LsnLease:\n      type: object\n      required:\n        - valid_until\n      properties:\n        valid_until:\n          type: string\n          format: date-time\n\n    PageserverUtilization:\n      type: object\n      required:\n        - disk_usage_bytes\n        - free_space_bytes\n        - utilization_score\n      properties:\n        disk_usage_bytes:\n          type: integer\n          format: int64\n          minimum: 0\n          description: The amount of disk space currently used.\n        free_space_bytes:\n          type: integer\n          format: int64\n          minimum: 0\n          description: The amount of usable disk space left.\n        utilization_score:\n          type: integer\n          format: int64\n          minimum: 0\n          maximum: 9223372036854775807\n          default: 9223372036854775807\n          description: |\n            Lower is better score for how good this pageserver would be for the next tenant.\n            The default or maximum value can be returned in situations when a proper score cannot (yet) be calculated.\n\n    SecondaryProgress:\n      type: object\n      required:\n        - heatmap_mtime\n        - layers_downloaded\n        - layers_total\n        - bytes_downloaded\n        - bytes_total\n      properties:\n        heatmap_mtime:\n          type: string\n          format: date-time\n          description: Modification time of the most recently downloaded layer heatmap (RFC 3339 format)\n        layers_downloaded:\n          type: integer\n          format: int64\n          description: How many layers from the latest layer heatmap are present on disk\n        bytes_downloaded:\n          type: integer\n          format: int64\n          description: How many bytes of layer content from the latest layer heatmap are present on disk\n        layers_total:\n          type: integer\n          format: int64\n          description: How many layers were in the latest layer heatmap\n        bytes_total:\n          type: integer\n          format: int64\n          description: How many bytes of layer content were in the latest layer heatmap\n\n    AncestorDetached:\n      type: object\n      required:\n        - reparented_timelines\n      properties:\n        reparented_timelines:\n          type: array\n          description: Set of reparented timeline ids\n          items:\n            type: string\n            format: hex\n            description: TimelineId\n\n\n    Error:\n      type: object\n      required:\n        - msg\n      properties:\n        msg:\n          type: string\n    UnauthorizedError:\n      type: object\n      required:\n        - msg\n      properties:\n        msg:\n          type: string\n    ForbiddenError:\n      type: object\n      required:\n        - msg\n      properties:\n        msg:\n          type: string\n    ServiceUnavailableError:\n      type: object\n      required:\n        - msg\n      properties:\n        msg:\n          type: string\n    NotFoundError:\n      type: object\n      required:\n        - msg\n      properties:\n        msg:\n          type: string\n    ConflictError:\n      type: object\n      required:\n        - msg\n      properties:\n        msg:\n          type: string\n    PreconditionFailedError:\n      type: object\n      required:\n        - msg\n      properties:\n        msg:\n          type: string\n\nsecurity:\n  - JWT: []\n"
  },
  {
    "path": "pageserver/src/http/routes.rs",
    "content": "//!\n//! Management HTTP API\n//!\nuse std::cmp::Reverse;\nuse std::collections::BTreeMap;\nuse std::collections::BinaryHeap;\nuse std::collections::HashMap;\nuse std::str::FromStr;\nuse std::sync::Arc;\nuse std::time::Duration;\n\nuse anyhow::{Context, Result, anyhow};\nuse bytes::Bytes;\nuse enumset::EnumSet;\nuse futures::future::join_all;\nuse futures::{StreamExt, TryFutureExt};\nuse http_utils::endpoint::{\n    self, attach_openapi_ui, auth_middleware, check_permission_with, profile_cpu_handler,\n    profile_heap_handler, prometheus_metrics_handler, request_span,\n};\nuse http_utils::error::{ApiError, HttpErrorBody};\nuse http_utils::failpoints::failpoints_handler;\nuse http_utils::json::{json_request, json_request_maybe, json_response};\nuse http_utils::request::{\n    get_request_param, must_get_query_param, must_parse_query_param, parse_query_param,\n    parse_request_param,\n};\nuse http_utils::{RequestExt, RouterBuilder};\nuse humantime::format_rfc3339;\nuse hyper::{Body, Request, Response, StatusCode, Uri, header};\nuse metrics::launch_timestamp::LaunchTimestamp;\nuse pageserver_api::models::virtual_file::IoMode;\nuse pageserver_api::models::{\n    DetachBehavior, DownloadRemoteLayersTaskSpawnRequest, IngestAuxFilesRequest,\n    ListAuxFilesRequest, LocationConfig, LocationConfigListResponse, LocationConfigMode, LsnLease,\n    LsnLeaseRequest, OffloadedTimelineInfo, PageTraceEvent, ShardParameters, StatusResponse,\n    TenantConfigPatchRequest, TenantConfigRequest, TenantDetails, TenantInfo,\n    TenantLocationConfigRequest, TenantLocationConfigResponse, TenantScanRemoteStorageResponse,\n    TenantScanRemoteStorageShard, TenantShardLocation, TenantShardSplitRequest,\n    TenantShardSplitResponse, TenantSorting, TenantState, TenantWaitLsnRequest,\n    TimelineArchivalConfigRequest, TimelineCreateRequest, TimelineCreateRequestMode,\n    TimelineCreateRequestModeImportPgdata, TimelineGcRequest, TimelineInfo,\n    TimelinePatchIndexPartRequest, TimelineVisibilityState, TimelinesInfoAndOffloaded,\n    TopTenantShardItem, TopTenantShardsRequest, TopTenantShardsResponse,\n};\nuse pageserver_api::shard::{ShardCount, TenantShardId};\nuse postgres_ffi::PgMajorVersion;\nuse remote_storage::{DownloadError, GenericRemoteStorage, TimeTravelError};\nuse scopeguard::defer;\nuse serde::{Deserialize, Serialize};\nuse serde_json::json;\nuse tenant_size_model::svg::SvgBranchKind;\nuse tenant_size_model::{SizeResult, StorageModel};\nuse tokio::time::Instant;\nuse tokio_util::io::StreamReader;\nuse tokio_util::sync::CancellationToken;\nuse tracing::*;\nuse utils::auth::SwappableJwtAuth;\nuse utils::generation::Generation;\nuse utils::id::{TenantId, TimelineId};\nuse utils::lsn::Lsn;\nuse wal_decoder::models::record::NeonWalRecord;\n\nuse crate::config::PageServerConf;\nuse crate::context;\nuse crate::context::{DownloadBehavior, RequestContext, RequestContextBuilder};\nuse crate::deletion_queue::DeletionQueueClient;\nuse crate::feature_resolver::FeatureResolver;\nuse crate::metrics::LOCAL_DATA_LOSS_SUSPECTED;\nuse crate::pgdatadir_mapping::LsnForTimestamp;\nuse crate::task_mgr::TaskKind;\nuse crate::tenant::config::LocationConf;\nuse crate::tenant::mgr::{\n    GetActiveTenantError, GetTenantError, TenantManager, TenantMapError, TenantMapInsertError,\n    TenantSlot, TenantSlotError, TenantSlotUpsertError, TenantStateError, UpsertLocationError,\n};\nuse crate::tenant::remote_timeline_client::index::GcCompactionState;\nuse crate::tenant::remote_timeline_client::{\n    download_index_part, download_tenant_manifest, list_remote_tenant_shards, list_remote_timelines,\n};\nuse crate::tenant::secondary::SecondaryController;\nuse crate::tenant::size::ModelInputs;\nuse crate::tenant::storage_layer::ValuesReconstructState;\nuse crate::tenant::storage_layer::{IoConcurrency, LayerAccessStatsReset, LayerName};\nuse crate::tenant::timeline::layer_manager::LayerManagerLockHolder;\nuse crate::tenant::timeline::offload::{OffloadError, offload_timeline};\nuse crate::tenant::timeline::{\n    CompactFlags, CompactOptions, CompactRequest, MarkInvisibleRequest, Timeline, WaitLsnTimeout,\n    WaitLsnWaiter, import_pgdata,\n};\nuse crate::tenant::{\n    GetTimelineError, LogicalSizeCalculationCause, OffloadedTimeline, PageReconstructError,\n    remote_timeline_client,\n};\nuse crate::{DEFAULT_PG_VERSION, disk_usage_eviction_task, tenant};\n\n// For APIs that require an Active tenant, how long should we block waiting for that state?\n// This is not functionally necessary (clients will retry), but avoids generating a lot of\n// failed API calls while tenants are activating.\n#[cfg(not(feature = \"testing\"))]\npub(crate) const ACTIVE_TENANT_TIMEOUT: Duration = Duration::from_millis(5000);\n\n// Tests run on slow/oversubscribed nodes, and may need to wait much longer for tenants to\n// finish attaching, if calls to remote storage are slow.\n#[cfg(feature = \"testing\")]\npub(crate) const ACTIVE_TENANT_TIMEOUT: Duration = Duration::from_millis(30000);\n\npub struct State {\n    conf: &'static PageServerConf,\n    tenant_manager: Arc<TenantManager>,\n    auth: Option<Arc<SwappableJwtAuth>>,\n    allowlist_routes: &'static [&'static str],\n    remote_storage: GenericRemoteStorage,\n    broker_client: storage_broker::BrokerClientChannel,\n    disk_usage_eviction_state: Arc<disk_usage_eviction_task::State>,\n    deletion_queue_client: DeletionQueueClient,\n    secondary_controller: SecondaryController,\n    latest_utilization: tokio::sync::Mutex<Option<(std::time::Instant, bytes::Bytes)>>,\n    feature_resolver: FeatureResolver,\n}\n\nimpl State {\n    #[allow(clippy::too_many_arguments)]\n    pub fn new(\n        conf: &'static PageServerConf,\n        tenant_manager: Arc<TenantManager>,\n        auth: Option<Arc<SwappableJwtAuth>>,\n        remote_storage: GenericRemoteStorage,\n        broker_client: storage_broker::BrokerClientChannel,\n        disk_usage_eviction_state: Arc<disk_usage_eviction_task::State>,\n        deletion_queue_client: DeletionQueueClient,\n        secondary_controller: SecondaryController,\n        feature_resolver: FeatureResolver,\n    ) -> anyhow::Result<Self> {\n        let allowlist_routes = &[\n            \"/v1/status\",\n            \"/v1/doc\",\n            \"/swagger.yml\",\n            \"/metrics\",\n            \"/profile/cpu\",\n            \"/profile/heap\",\n        ];\n        Ok(Self {\n            conf,\n            tenant_manager,\n            auth,\n            allowlist_routes,\n            remote_storage,\n            broker_client,\n            disk_usage_eviction_state,\n            deletion_queue_client,\n            secondary_controller,\n            latest_utilization: Default::default(),\n            feature_resolver,\n        })\n    }\n}\n\n#[inline(always)]\nfn get_state(request: &Request<Body>) -> &State {\n    request\n        .data::<Arc<State>>()\n        .expect(\"unknown state type\")\n        .as_ref()\n}\n\n#[inline(always)]\nfn get_config(request: &Request<Body>) -> &'static PageServerConf {\n    get_state(request).conf\n}\n\n/// Check that the requester is authorized to operate on given tenant\nfn check_permission(request: &Request<Body>, tenant_id: Option<TenantId>) -> Result<(), ApiError> {\n    check_permission_with(request, |claims| {\n        crate::auth::check_permission(claims, tenant_id)\n    })\n}\n\nimpl From<PageReconstructError> for ApiError {\n    fn from(pre: PageReconstructError) -> ApiError {\n        match pre {\n            PageReconstructError::Other(other) => ApiError::InternalServerError(other),\n            PageReconstructError::MissingKey(e) => ApiError::InternalServerError(e.into()),\n            PageReconstructError::Cancelled => ApiError::Cancelled,\n            PageReconstructError::AncestorLsnTimeout(e) => ApiError::Timeout(format!(\"{e}\").into()),\n            PageReconstructError::WalRedo(pre) => ApiError::InternalServerError(pre),\n        }\n    }\n}\n\nimpl From<TenantMapInsertError> for ApiError {\n    fn from(tmie: TenantMapInsertError) -> ApiError {\n        match tmie {\n            TenantMapInsertError::SlotError(e) => e.into(),\n            TenantMapInsertError::SlotUpsertError(e) => e.into(),\n            TenantMapInsertError::Other(e) => ApiError::InternalServerError(e),\n        }\n    }\n}\n\nimpl From<TenantSlotError> for ApiError {\n    fn from(e: TenantSlotError) -> ApiError {\n        use TenantSlotError::*;\n        match e {\n            NotFound(tenant_id) => {\n                ApiError::NotFound(anyhow::anyhow!(\"NotFound: tenant {tenant_id}\").into())\n            }\n            InProgress => {\n                ApiError::ResourceUnavailable(\"Tenant is being modified concurrently\".into())\n            }\n            MapState(e) => e.into(),\n        }\n    }\n}\n\nimpl From<TenantSlotUpsertError> for ApiError {\n    fn from(e: TenantSlotUpsertError) -> ApiError {\n        use TenantSlotUpsertError::*;\n        match e {\n            InternalError(e) => ApiError::InternalServerError(anyhow::anyhow!(\"{e}\")),\n            MapState(e) => e.into(),\n            ShuttingDown(_) => ApiError::ShuttingDown,\n        }\n    }\n}\n\nimpl From<UpsertLocationError> for ApiError {\n    fn from(e: UpsertLocationError) -> ApiError {\n        use UpsertLocationError::*;\n        match e {\n            BadRequest(e) => ApiError::BadRequest(e),\n            Unavailable(_) => ApiError::ShuttingDown,\n            e @ InProgress => ApiError::Conflict(format!(\"{e}\")),\n            Flush(e) | InternalError(e) => ApiError::InternalServerError(e),\n        }\n    }\n}\n\nimpl From<TenantMapError> for ApiError {\n    fn from(e: TenantMapError) -> ApiError {\n        use TenantMapError::*;\n        match e {\n            StillInitializing | ShuttingDown => {\n                ApiError::ResourceUnavailable(format!(\"{e}\").into())\n            }\n        }\n    }\n}\n\nimpl From<TenantStateError> for ApiError {\n    fn from(tse: TenantStateError) -> ApiError {\n        match tse {\n            TenantStateError::IsStopping(_) => {\n                ApiError::ResourceUnavailable(\"Tenant is stopping\".into())\n            }\n            TenantStateError::SlotError(e) => e.into(),\n            TenantStateError::SlotUpsertError(e) => e.into(),\n            TenantStateError::Other(e) => ApiError::InternalServerError(anyhow!(e)),\n        }\n    }\n}\n\nimpl From<GetTenantError> for ApiError {\n    fn from(tse: GetTenantError) -> ApiError {\n        match tse {\n            GetTenantError::NotFound(tid) => ApiError::NotFound(anyhow!(\"tenant {tid}\").into()),\n            GetTenantError::ShardNotFound(tid) => {\n                ApiError::NotFound(anyhow!(\"tenant {tid}\").into())\n            }\n            GetTenantError::NotActive(_) => {\n                // Why is this not `ApiError::NotFound`?\n                // Because we must be careful to never return 404 for a tenant if it does\n                // in fact exist locally. If we did, the caller could draw the conclusion\n                // that it can attach the tenant to another PS and we'd be in split-brain.\n                ApiError::ResourceUnavailable(\"Tenant not yet active\".into())\n            }\n            GetTenantError::MapState(e) => ApiError::ResourceUnavailable(format!(\"{e}\").into()),\n        }\n    }\n}\n\nimpl From<GetTimelineError> for ApiError {\n    fn from(gte: GetTimelineError) -> Self {\n        // Rationale: tenant is activated only after eligble timelines activate\n        ApiError::NotFound(gte.into())\n    }\n}\n\nimpl From<GetActiveTenantError> for ApiError {\n    fn from(e: GetActiveTenantError) -> ApiError {\n        match e {\n            GetActiveTenantError::Broken(reason) => {\n                ApiError::InternalServerError(anyhow!(\"tenant is broken: {}\", reason))\n            }\n            GetActiveTenantError::WillNotBecomeActive(TenantState::Stopping { .. }) => {\n                ApiError::ShuttingDown\n            }\n            GetActiveTenantError::WillNotBecomeActive(_) => ApiError::Conflict(format!(\"{e}\")),\n            GetActiveTenantError::Cancelled => ApiError::ShuttingDown,\n            GetActiveTenantError::NotFound(gte) => gte.into(),\n            GetActiveTenantError::WaitForActiveTimeout { .. } => {\n                ApiError::ResourceUnavailable(format!(\"{e}\").into())\n            }\n            GetActiveTenantError::SwitchedTenant => {\n                // in our HTTP handlers, this error doesn't happen\n                // TODO: separate error types\n                ApiError::ResourceUnavailable(\"switched tenant\".into())\n            }\n        }\n    }\n}\n\nimpl From<crate::tenant::DeleteTimelineError> for ApiError {\n    fn from(value: crate::tenant::DeleteTimelineError) -> Self {\n        use crate::tenant::DeleteTimelineError::*;\n        match value {\n            NotFound => ApiError::NotFound(anyhow::anyhow!(\"timeline not found\").into()),\n            HasChildren(children) => ApiError::PreconditionFailed(\n                format!(\"Cannot delete timeline which has child timelines: {children:?}\")\n                    .into_boxed_str(),\n            ),\n            a @ AlreadyInProgress(_) => ApiError::Conflict(a.to_string()),\n            Cancelled => ApiError::ResourceUnavailable(\"shutting down\".into()),\n            Other(e) => ApiError::InternalServerError(e),\n        }\n    }\n}\n\nimpl From<crate::tenant::TimelineArchivalError> for ApiError {\n    fn from(value: crate::tenant::TimelineArchivalError) -> Self {\n        use crate::tenant::TimelineArchivalError::*;\n        match value {\n            NotFound => ApiError::NotFound(anyhow::anyhow!(\"timeline not found\").into()),\n            Timeout => ApiError::Timeout(\"hit pageserver internal timeout\".into()),\n            Cancelled => ApiError::ShuttingDown,\n            e @ HasArchivedParent(_) => {\n                ApiError::PreconditionFailed(e.to_string().into_boxed_str())\n            }\n            HasUnarchivedChildren(children) => ApiError::PreconditionFailed(\n                format!(\n                    \"Cannot archive timeline which has non-archived child timelines: {children:?}\"\n                )\n                .into_boxed_str(),\n            ),\n            a @ AlreadyInProgress => ApiError::Conflict(a.to_string()),\n            Other(e) => ApiError::InternalServerError(e),\n        }\n    }\n}\n\nimpl From<crate::tenant::mgr::DeleteTimelineError> for ApiError {\n    fn from(value: crate::tenant::mgr::DeleteTimelineError) -> Self {\n        use crate::tenant::mgr::DeleteTimelineError::*;\n        match value {\n            // Report Precondition failed so client can distinguish between\n            // \"tenant is missing\" case from \"timeline is missing\"\n            Tenant(GetTenantError::NotFound(..)) => ApiError::PreconditionFailed(\n                \"Requested tenant is missing\".to_owned().into_boxed_str(),\n            ),\n            Tenant(t) => ApiError::from(t),\n            Timeline(t) => ApiError::from(t),\n        }\n    }\n}\n\nimpl From<crate::tenant::mgr::DeleteTenantError> for ApiError {\n    fn from(value: crate::tenant::mgr::DeleteTenantError) -> Self {\n        use crate::tenant::mgr::DeleteTenantError::*;\n        match value {\n            SlotError(e) => e.into(),\n            Other(o) => ApiError::InternalServerError(o),\n            Cancelled => ApiError::ShuttingDown,\n        }\n    }\n}\n\nimpl From<crate::tenant::secondary::SecondaryTenantError> for ApiError {\n    fn from(ste: crate::tenant::secondary::SecondaryTenantError) -> ApiError {\n        use crate::tenant::secondary::SecondaryTenantError;\n        match ste {\n            SecondaryTenantError::GetTenant(gte) => gte.into(),\n            SecondaryTenantError::ShuttingDown => ApiError::ShuttingDown,\n        }\n    }\n}\n\nimpl From<crate::tenant::FinalizeTimelineImportError> for ApiError {\n    fn from(err: crate::tenant::FinalizeTimelineImportError) -> ApiError {\n        use crate::tenant::FinalizeTimelineImportError::*;\n        match err {\n            ImportTaskStillRunning => {\n                ApiError::ResourceUnavailable(\"Import task still running\".into())\n            }\n            ShuttingDown => ApiError::ShuttingDown,\n        }\n    }\n}\n\n// Helper function to construct a TimelineInfo struct for a timeline\nasync fn build_timeline_info(\n    timeline: &Arc<Timeline>,\n    include_non_incremental_logical_size: bool,\n    force_await_initial_logical_size: bool,\n    include_image_consistent_lsn: bool,\n    ctx: &RequestContext,\n) -> anyhow::Result<TimelineInfo> {\n    crate::tenant::debug_assert_current_span_has_tenant_and_timeline_id();\n\n    if force_await_initial_logical_size {\n        timeline.clone().await_initial_logical_size().await\n    }\n\n    let mut info = build_timeline_info_common(\n        timeline,\n        ctx,\n        tenant::timeline::GetLogicalSizePriority::Background,\n    )\n    .await?;\n    if include_non_incremental_logical_size {\n        // XXX we should be using spawn_ondemand_logical_size_calculation here.\n        // Otherwise, if someone deletes the timeline / detaches the tenant while\n        // we're executing this function, we will outlive the timeline on-disk state.\n        info.current_logical_size_non_incremental = Some(\n            timeline\n                .get_current_logical_size_non_incremental(info.last_record_lsn, ctx)\n                .await?,\n        );\n    }\n    // HADRON\n    if include_image_consistent_lsn {\n        info.image_consistent_lsn = Some(timeline.compute_image_consistent_lsn().await?);\n    }\n    Ok(info)\n}\n\nasync fn build_timeline_info_common(\n    timeline: &Arc<Timeline>,\n    ctx: &RequestContext,\n    logical_size_task_priority: tenant::timeline::GetLogicalSizePriority,\n) -> anyhow::Result<TimelineInfo> {\n    crate::tenant::debug_assert_current_span_has_tenant_and_timeline_id();\n    let initdb_lsn = timeline.initdb_lsn;\n    let last_record_lsn = timeline.get_last_record_lsn();\n    let (wal_source_connstr, last_received_msg_lsn, last_received_msg_ts) = {\n        let guard = timeline.last_received_wal.lock().unwrap();\n        if let Some(info) = guard.as_ref() {\n            (\n                Some(format!(\"{}\", info.wal_source_connconf)), // Password is hidden, but it's for statistics only.\n                Some(info.last_received_msg_lsn),\n                Some(info.last_received_msg_ts),\n            )\n        } else {\n            (None, None, None)\n        }\n    };\n\n    let ancestor_timeline_id = timeline.get_ancestor_timeline_id();\n    let ancestor_lsn = match timeline.get_ancestor_lsn() {\n        Lsn(0) => None,\n        lsn @ Lsn(_) => Some(lsn),\n    };\n    let current_logical_size = timeline.get_current_logical_size(logical_size_task_priority, ctx);\n    let current_physical_size = Some(timeline.layer_size_sum().await);\n    let state = timeline.current_state();\n    // Report is_archived = false if the timeline is still loading\n    let is_archived = timeline.is_archived().unwrap_or(false);\n    let remote_consistent_lsn_projected = timeline\n        .get_remote_consistent_lsn_projected()\n        .unwrap_or(Lsn(0));\n    let remote_consistent_lsn_visible = timeline\n        .get_remote_consistent_lsn_visible()\n        .unwrap_or(Lsn(0));\n    let is_invisible = timeline.remote_client.is_invisible().unwrap_or(false);\n\n    let walreceiver_status = timeline.walreceiver_status();\n\n    let (pitr_history_size, within_ancestor_pitr) = timeline.get_pitr_history_stats();\n\n    // Externally, expose the lowest LSN that can be used to create a branch.\n    // Internally we distinguish between the planned GC cutoff (PITR point) and the \"applied\" GC cutoff (where we\n    // actually trimmed data to), which can pass each other when PITR is changed.\n    let min_readable_lsn = std::cmp::max(\n        timeline.get_gc_cutoff_lsn().unwrap_or_default(),\n        *timeline.get_applied_gc_cutoff_lsn(),\n    );\n\n    let (rel_size_migration, rel_size_migrated_at) = timeline.get_rel_size_v2_status();\n\n    let info = TimelineInfo {\n        tenant_id: timeline.tenant_shard_id,\n        timeline_id: timeline.timeline_id,\n        ancestor_timeline_id,\n        ancestor_lsn,\n        disk_consistent_lsn: timeline.get_disk_consistent_lsn(),\n        remote_consistent_lsn: remote_consistent_lsn_projected,\n        remote_consistent_lsn_visible,\n        initdb_lsn,\n        last_record_lsn,\n        prev_record_lsn: Some(timeline.get_prev_record_lsn()),\n        min_readable_lsn,\n        applied_gc_cutoff_lsn: *timeline.get_applied_gc_cutoff_lsn(),\n        current_logical_size: current_logical_size.size_dont_care_about_accuracy(),\n        current_logical_size_is_accurate: match current_logical_size.accuracy() {\n            tenant::timeline::logical_size::Accuracy::Approximate => false,\n            tenant::timeline::logical_size::Accuracy::Exact => true,\n        },\n        directory_entries_counts: timeline.get_directory_metrics().to_vec(),\n        current_physical_size,\n        current_logical_size_non_incremental: None,\n        pitr_history_size,\n        within_ancestor_pitr,\n        timeline_dir_layer_file_size_sum: None,\n        wal_source_connstr,\n        last_received_msg_lsn,\n        last_received_msg_ts,\n        pg_version: timeline.pg_version,\n\n        state,\n        is_archived: Some(is_archived),\n        rel_size_migration: Some(rel_size_migration),\n        rel_size_migrated_at,\n        is_invisible: Some(is_invisible),\n\n        walreceiver_status,\n        // HADRON\n        image_consistent_lsn: None,\n    };\n    Ok(info)\n}\n\nfn build_timeline_offloaded_info(offloaded: &Arc<OffloadedTimeline>) -> OffloadedTimelineInfo {\n    let &OffloadedTimeline {\n        tenant_shard_id,\n        timeline_id,\n        ancestor_retain_lsn,\n        ancestor_timeline_id,\n        archived_at,\n        ..\n    } = offloaded.as_ref();\n    OffloadedTimelineInfo {\n        tenant_id: tenant_shard_id,\n        timeline_id,\n        ancestor_retain_lsn,\n        ancestor_timeline_id,\n        archived_at: archived_at.and_utc(),\n    }\n}\n\n// healthcheck handler\nasync fn status_handler(\n    request: Request<Body>,\n    _cancel: CancellationToken,\n) -> Result<Response<Body>, ApiError> {\n    check_permission(&request, None)?;\n    let config = get_config(&request);\n    json_response(StatusCode::OK, StatusResponse { id: config.id })\n}\n\nasync fn reload_auth_validation_keys_handler(\n    request: Request<Body>,\n    _cancel: CancellationToken,\n) -> Result<Response<Body>, ApiError> {\n    check_permission(&request, None)?;\n    let config = get_config(&request);\n    let state = get_state(&request);\n    let Some(shared_auth) = &state.auth else {\n        return json_response(StatusCode::BAD_REQUEST, ());\n    };\n    // unwrap is ok because check is performed when creating config, so path is set and exists\n    let key_path = config.auth_validation_public_key_path.as_ref().unwrap();\n    info!(\"Reloading public key(s) for verifying JWT tokens from {key_path:?}\");\n\n    match utils::auth::JwtAuth::from_key_path(key_path) {\n        Ok(new_auth) => {\n            shared_auth.swap(new_auth);\n            json_response(StatusCode::OK, ())\n        }\n        Err(e) => {\n            let err_msg = \"Error reloading public keys\";\n            warn!(\"Error reloading public keys from {key_path:?}: {e:}\");\n            json_response(\n                StatusCode::INTERNAL_SERVER_ERROR,\n                HttpErrorBody::from_msg(err_msg.to_string()),\n            )\n        }\n    }\n}\n\nasync fn timeline_create_handler(\n    mut request: Request<Body>,\n    _cancel: CancellationToken,\n) -> Result<Response<Body>, ApiError> {\n    let tenant_shard_id: TenantShardId = parse_request_param(&request, \"tenant_shard_id\")?;\n    let request_data: TimelineCreateRequest = json_request(&mut request).await?;\n    check_permission(&request, Some(tenant_shard_id.tenant_id))?;\n\n    let new_timeline_id = request_data.new_timeline_id;\n    // fill in the default pg_version if not provided & convert request into domain model\n    let params: tenant::CreateTimelineParams = match request_data.mode {\n        TimelineCreateRequestMode::Bootstrap {\n            existing_initdb_timeline_id,\n            pg_version,\n        } => tenant::CreateTimelineParams::Bootstrap(tenant::CreateTimelineParamsBootstrap {\n            new_timeline_id,\n            existing_initdb_timeline_id,\n            pg_version: pg_version.unwrap_or(DEFAULT_PG_VERSION),\n        }),\n        TimelineCreateRequestMode::Branch {\n            ancestor_timeline_id,\n            ancestor_start_lsn,\n            read_only: _,\n            pg_version: _,\n        } => tenant::CreateTimelineParams::Branch(tenant::CreateTimelineParamsBranch {\n            new_timeline_id,\n            ancestor_timeline_id,\n            ancestor_start_lsn,\n        }),\n        TimelineCreateRequestMode::ImportPgdata {\n            import_pgdata:\n                TimelineCreateRequestModeImportPgdata {\n                    location,\n                    idempotency_key,\n                },\n        } => tenant::CreateTimelineParams::ImportPgdata(tenant::CreateTimelineParamsImportPgdata {\n            idempotency_key: import_pgdata::index_part_format::IdempotencyKey::new(\n                idempotency_key.0,\n            ),\n            new_timeline_id,\n            location: {\n                use import_pgdata::index_part_format::Location;\n                use pageserver_api::models::ImportPgdataLocation;\n                match location {\n                    #[cfg(feature = \"testing\")]\n                    ImportPgdataLocation::LocalFs { path } => Location::LocalFs { path },\n                    ImportPgdataLocation::AwsS3 {\n                        region,\n                        bucket,\n                        key,\n                    } => Location::AwsS3 {\n                        region,\n                        bucket,\n                        key,\n                    },\n                }\n            },\n        }),\n    };\n\n    let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Error);\n\n    let state = get_state(&request);\n\n    async {\n        let tenant = state\n            .tenant_manager\n            .get_attached_tenant_shard(tenant_shard_id)?;\n\n        tenant.wait_to_become_active(ACTIVE_TENANT_TIMEOUT).await?;\n\n        // earlier versions of the code had pg_version and ancestor_lsn in the span\n        // => continue to provide that information, but, through a log message that doesn't require us to destructure\n        tracing::info!(?params, \"creating timeline\");\n\n        match tenant\n            .create_timeline(params, state.broker_client.clone(), &ctx)\n            .await\n        {\n            Ok(new_timeline) => {\n                // Created. Construct a TimelineInfo for it.\n                let timeline_info = build_timeline_info_common(\n                    &new_timeline,\n                    &ctx,\n                    tenant::timeline::GetLogicalSizePriority::User,\n                )\n                .await\n                .map_err(ApiError::InternalServerError)?;\n                json_response(StatusCode::CREATED, timeline_info)\n            }\n            Err(_) if tenant.cancel.is_cancelled() => {\n                // In case we get some ugly error type during shutdown, cast it into a clean 503.\n                json_response(\n                    StatusCode::SERVICE_UNAVAILABLE,\n                    HttpErrorBody::from_msg(\"Tenant shutting down\".to_string()),\n                )\n            }\n            Err(e @ tenant::CreateTimelineError::Conflict) => {\n                json_response(StatusCode::CONFLICT, HttpErrorBody::from_msg(e.to_string()))\n            }\n            Err(e @ tenant::CreateTimelineError::AlreadyCreating) => json_response(\n                StatusCode::TOO_MANY_REQUESTS,\n                HttpErrorBody::from_msg(e.to_string()),\n            ),\n            Err(tenant::CreateTimelineError::AncestorLsn(err)) => json_response(\n                StatusCode::NOT_ACCEPTABLE,\n                HttpErrorBody::from_msg(format!(\"{err:#}\")),\n            ),\n            Err(e @ tenant::CreateTimelineError::AncestorNotActive) => json_response(\n                StatusCode::SERVICE_UNAVAILABLE,\n                HttpErrorBody::from_msg(e.to_string()),\n            ),\n            Err(e @ tenant::CreateTimelineError::AncestorArchived) => json_response(\n                StatusCode::NOT_ACCEPTABLE,\n                HttpErrorBody::from_msg(e.to_string()),\n            ),\n            Err(tenant::CreateTimelineError::ShuttingDown) => json_response(\n                StatusCode::SERVICE_UNAVAILABLE,\n                HttpErrorBody::from_msg(\"tenant shutting down\".to_string()),\n            ),\n            Err(tenant::CreateTimelineError::Other(err)) => Err(ApiError::InternalServerError(err)),\n        }\n    }\n    .instrument(info_span!(\"timeline_create\",\n        tenant_id = %tenant_shard_id.tenant_id,\n        shard_id = %tenant_shard_id.shard_slug(),\n        timeline_id = %new_timeline_id,\n    ))\n    .await\n}\n\nasync fn timeline_list_handler(\n    request: Request<Body>,\n    _cancel: CancellationToken,\n) -> Result<Response<Body>, ApiError> {\n    let tenant_shard_id: TenantShardId = parse_request_param(&request, \"tenant_shard_id\")?;\n    let include_non_incremental_logical_size: Option<bool> =\n        parse_query_param(&request, \"include-non-incremental-logical-size\")?;\n    let force_await_initial_logical_size: Option<bool> =\n        parse_query_param(&request, \"force-await-initial-logical-size\")?;\n    let include_image_consistent_lsn: Option<bool> =\n        parse_query_param(&request, \"include-image-consistent-lsn\")?;\n    check_permission(&request, Some(tenant_shard_id.tenant_id))?;\n\n    let state = get_state(&request);\n    let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);\n\n    let response_data = async {\n        let tenant = state\n            .tenant_manager\n            .get_attached_tenant_shard(tenant_shard_id)?;\n\n        tenant.wait_to_become_active(ACTIVE_TENANT_TIMEOUT).await?;\n\n        let timelines = tenant.list_timelines();\n\n        let mut response_data = Vec::with_capacity(timelines.len());\n        for timeline in timelines {\n            let timeline_info = build_timeline_info(\n                &timeline,\n                include_non_incremental_logical_size.unwrap_or(false),\n                force_await_initial_logical_size.unwrap_or(false),\n                include_image_consistent_lsn.unwrap_or(false),\n                &ctx,\n            )\n            .instrument(info_span!(\"build_timeline_info\", timeline_id = %timeline.timeline_id))\n            .await\n            .context(\"Failed to build timeline info\")\n            .map_err(ApiError::InternalServerError)?;\n\n            response_data.push(timeline_info);\n        }\n        Ok::<Vec<TimelineInfo>, ApiError>(response_data)\n    }\n    .instrument(info_span!(\"timeline_list\",\n                tenant_id = %tenant_shard_id.tenant_id,\n                shard_id = %tenant_shard_id.shard_slug()))\n    .await?;\n\n    json_response(StatusCode::OK, response_data)\n}\n\nasync fn timeline_and_offloaded_list_handler(\n    request: Request<Body>,\n    _cancel: CancellationToken,\n) -> Result<Response<Body>, ApiError> {\n    let tenant_shard_id: TenantShardId = parse_request_param(&request, \"tenant_shard_id\")?;\n    let include_non_incremental_logical_size: Option<bool> =\n        parse_query_param(&request, \"include-non-incremental-logical-size\")?;\n    let force_await_initial_logical_size: Option<bool> =\n        parse_query_param(&request, \"force-await-initial-logical-size\")?;\n    let include_image_consistent_lsn: Option<bool> =\n        parse_query_param(&request, \"include-image-consistent-lsn\")?;\n\n    check_permission(&request, Some(tenant_shard_id.tenant_id))?;\n\n    let state = get_state(&request);\n    let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);\n\n    let response_data = async {\n        let tenant = state\n            .tenant_manager\n            .get_attached_tenant_shard(tenant_shard_id)?;\n\n        tenant.wait_to_become_active(ACTIVE_TENANT_TIMEOUT).await?;\n\n        let (timelines, offloadeds) = tenant.list_timelines_and_offloaded();\n\n        let mut timeline_infos = Vec::with_capacity(timelines.len());\n        for timeline in timelines {\n            let timeline_info = build_timeline_info(\n                &timeline,\n                include_non_incremental_logical_size.unwrap_or(false),\n                force_await_initial_logical_size.unwrap_or(false),\n                include_image_consistent_lsn.unwrap_or(false),\n                &ctx,\n            )\n            .instrument(info_span!(\"build_timeline_info\", timeline_id = %timeline.timeline_id))\n            .await\n            .context(\"Failed to build timeline info\")\n            .map_err(ApiError::InternalServerError)?;\n\n            timeline_infos.push(timeline_info);\n        }\n        let offloaded_infos = offloadeds\n            .into_iter()\n            .map(|offloaded| build_timeline_offloaded_info(&offloaded))\n            .collect::<Vec<_>>();\n        let res = TimelinesInfoAndOffloaded {\n            timelines: timeline_infos,\n            offloaded: offloaded_infos,\n        };\n        Ok::<TimelinesInfoAndOffloaded, ApiError>(res)\n    }\n    .instrument(info_span!(\"timeline_and_offloaded_list\",\n                tenant_id = %tenant_shard_id.tenant_id,\n                shard_id = %tenant_shard_id.shard_slug()))\n    .await?;\n\n    json_response(StatusCode::OK, response_data)\n}\n\nasync fn timeline_preserve_initdb_handler(\n    request: Request<Body>,\n    _cancel: CancellationToken,\n) -> Result<Response<Body>, ApiError> {\n    let tenant_shard_id: TenantShardId = parse_request_param(&request, \"tenant_shard_id\")?;\n    let timeline_id: TimelineId = parse_request_param(&request, \"timeline_id\")?;\n    check_permission(&request, Some(tenant_shard_id.tenant_id))?;\n    let state = get_state(&request);\n\n    // Part of the process for disaster recovery from safekeeper-stored WAL:\n    // If we don't recover into a new timeline but want to keep the timeline ID,\n    // then the initdb archive is deleted. This endpoint copies it to a different\n    // location where timeline recreation cand find it.\n\n    async {\n        let tenant = state\n            .tenant_manager\n            .get_attached_tenant_shard(tenant_shard_id)?;\n\n        let timeline = tenant.get_timeline(timeline_id, false)?;\n\n        timeline\n            .preserve_initdb_archive()\n            .await\n            .context(\"preserving initdb archive\")\n            .map_err(ApiError::InternalServerError)?;\n\n        Ok::<_, ApiError>(())\n    }\n    .instrument(info_span!(\"timeline_preserve_initdb_archive\",\n                tenant_id = %tenant_shard_id.tenant_id,\n                shard_id = %tenant_shard_id.shard_slug(),\n                %timeline_id))\n    .await?;\n\n    json_response(StatusCode::OK, ())\n}\n\nasync fn timeline_archival_config_handler(\n    mut request: Request<Body>,\n    _cancel: CancellationToken,\n) -> Result<Response<Body>, ApiError> {\n    let tenant_shard_id: TenantShardId = parse_request_param(&request, \"tenant_shard_id\")?;\n    let timeline_id: TimelineId = parse_request_param(&request, \"timeline_id\")?;\n\n    let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Warn);\n\n    let request_data: TimelineArchivalConfigRequest = json_request(&mut request).await?;\n    check_permission(&request, Some(tenant_shard_id.tenant_id))?;\n    let state = get_state(&request);\n\n    async {\n        let tenant = state\n            .tenant_manager\n            .get_attached_tenant_shard(tenant_shard_id)?;\n\n        tenant.wait_to_become_active(ACTIVE_TENANT_TIMEOUT).await?;\n\n        tenant\n            .apply_timeline_archival_config(\n                timeline_id,\n                request_data.state,\n                state.broker_client.clone(),\n                ctx,\n            )\n            .await?;\n        Ok::<_, ApiError>(())\n    }\n    .instrument(info_span!(\"timeline_archival_config\",\n                tenant_id = %tenant_shard_id.tenant_id,\n                shard_id = %tenant_shard_id.shard_slug(),\n                state = ?request_data.state,\n                %timeline_id))\n    .await?;\n\n    json_response(StatusCode::OK, ())\n}\n\n/// This API is used to patch the index part of a timeline. You must ensure such patches are safe to apply. Use this API as an emergency\n/// measure only.\n///\n/// Some examples of safe patches:\n/// - Increase the gc_cutoff and gc_compaction_cutoff to a larger value in case of a bug that didn't bump the cutoff and cause read errors.\n/// - Force set the index part to use reldir v2 (migrating/migrated).\n///\n/// Some examples of unsafe patches:\n/// - Force set the index part from v2 to v1 (legacy). This will cause the code path to ignore anything written to the new keyspace and cause\n///   errors.\n/// - Decrease the gc_cutoff without validating the data really exists. It will cause read errors in the background.\nasync fn timeline_patch_index_part_handler(\n    mut request: Request<Body>,\n    _cancel: CancellationToken,\n) -> Result<Response<Body>, ApiError> {\n    let tenant_shard_id: TenantShardId = parse_request_param(&request, \"tenant_shard_id\")?;\n    let timeline_id: TimelineId = parse_request_param(&request, \"timeline_id\")?;\n\n    let request_data: TimelinePatchIndexPartRequest = json_request(&mut request).await?;\n    check_permission(&request, None)?; // require global permission for this request\n    let state = get_state(&request);\n\n    async {\n        let timeline =\n            active_timeline_of_active_tenant(&state.tenant_manager, tenant_shard_id, timeline_id)\n                .await?;\n\n        if request_data.rel_size_migration.is_none() && request_data.rel_size_migrated_at.is_some()\n        {\n            return Err(ApiError::BadRequest(anyhow!(\n                \"updating rel_size_migrated_at without rel_size_migration is not allowed\"\n            )));\n        }\n\n        if let Some(rel_size_migration) = request_data.rel_size_migration {\n            timeline\n                .update_rel_size_v2_status(rel_size_migration, request_data.rel_size_migrated_at)\n                .map_err(ApiError::InternalServerError)?;\n        }\n\n        if let Some(gc_compaction_last_completed_lsn) =\n            request_data.gc_compaction_last_completed_lsn\n        {\n            timeline\n                .update_gc_compaction_state(GcCompactionState {\n                    last_completed_lsn: gc_compaction_last_completed_lsn,\n                })\n                .map_err(ApiError::InternalServerError)?;\n        }\n\n        if let Some(applied_gc_cutoff_lsn) = request_data.applied_gc_cutoff_lsn {\n            {\n                let guard = timeline.applied_gc_cutoff_lsn.lock_for_write();\n                guard.store_and_unlock(applied_gc_cutoff_lsn);\n            }\n        }\n\n        if request_data.force_index_update {\n            timeline\n                .remote_client\n                .force_schedule_index_upload()\n                .context(\"force schedule index upload\")\n                .map_err(ApiError::InternalServerError)?;\n        }\n\n        Ok::<_, ApiError>(())\n    }\n    .instrument(info_span!(\"timeline_patch_index_part\",\n                tenant_id = %tenant_shard_id.tenant_id,\n                shard_id = %tenant_shard_id.shard_slug(),\n                %timeline_id))\n    .await?;\n\n    json_response(StatusCode::OK, ())\n}\n\nasync fn timeline_detail_handler(\n    request: Request<Body>,\n    _cancel: CancellationToken,\n) -> Result<Response<Body>, ApiError> {\n    let tenant_shard_id: TenantShardId = parse_request_param(&request, \"tenant_shard_id\")?;\n    let timeline_id: TimelineId = parse_request_param(&request, \"timeline_id\")?;\n    let include_non_incremental_logical_size: Option<bool> =\n        parse_query_param(&request, \"include-non-incremental-logical-size\")?;\n    let force_await_initial_logical_size: Option<bool> =\n        parse_query_param(&request, \"force-await-initial-logical-size\")?;\n    // HADRON\n    let include_image_consistent_lsn: Option<bool> =\n        parse_query_param(&request, \"include-image-consistent-lsn\")?;\n    check_permission(&request, Some(tenant_shard_id.tenant_id))?;\n\n    // Logical size calculation needs downloading.\n    let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);\n    let state = get_state(&request);\n\n    let timeline_info = async {\n        let tenant = state\n            .tenant_manager\n            .get_attached_tenant_shard(tenant_shard_id)?;\n\n        tenant.wait_to_become_active(ACTIVE_TENANT_TIMEOUT).await?;\n\n        let timeline = tenant.get_timeline(timeline_id, false)?;\n        let ctx = &ctx.with_scope_timeline(&timeline);\n\n        let timeline_info = build_timeline_info(\n            &timeline,\n            include_non_incremental_logical_size.unwrap_or(false),\n            force_await_initial_logical_size.unwrap_or(false),\n            include_image_consistent_lsn.unwrap_or(false),\n            ctx,\n        )\n        .await\n        .context(\"get local timeline info\")\n        .map_err(ApiError::InternalServerError)?;\n\n        Ok::<_, ApiError>(timeline_info)\n    }\n    .instrument(info_span!(\"timeline_detail\",\n                tenant_id = %tenant_shard_id.tenant_id,\n                shard_id = %tenant_shard_id.shard_slug(),\n                %timeline_id))\n    .await?;\n\n    json_response(StatusCode::OK, timeline_info)\n}\n\nasync fn get_lsn_by_timestamp_handler(\n    request: Request<Body>,\n    cancel: CancellationToken,\n) -> Result<Response<Body>, ApiError> {\n    let tenant_shard_id: TenantShardId = parse_request_param(&request, \"tenant_shard_id\")?;\n    check_permission(&request, Some(tenant_shard_id.tenant_id))?;\n    let state = get_state(&request);\n\n    if !tenant_shard_id.is_shard_zero() {\n        // Requires SLRU contents, which are only stored on shard zero\n        return Err(ApiError::BadRequest(anyhow!(\n            \"Lsn calculations by timestamp are only available on shard zero\"\n        )));\n    }\n\n    let timeline_id: TimelineId = parse_request_param(&request, \"timeline_id\")?;\n    let timestamp_raw = must_get_query_param(&request, \"timestamp\")?;\n    let timestamp = humantime::parse_rfc3339(&timestamp_raw)\n        .with_context(|| format!(\"Invalid time: {timestamp_raw:?}\"))\n        .map_err(ApiError::BadRequest)?;\n    let timestamp_pg = postgres_ffi::to_pg_timestamp(timestamp);\n\n    let with_lease = parse_query_param(&request, \"with_lease\")?.unwrap_or(false);\n\n    let timeline =\n        active_timeline_of_active_tenant(&state.tenant_manager, tenant_shard_id, timeline_id)\n            .await?;\n    let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download)\n        .with_scope_timeline(&timeline);\n    let result = timeline\n        .find_lsn_for_timestamp(timestamp_pg, &cancel, &ctx)\n        .await?;\n\n    #[derive(serde::Serialize, Debug)]\n    struct Result {\n        lsn: Lsn,\n        kind: &'static str,\n        #[serde(default)]\n        #[serde(skip_serializing_if = \"Option::is_none\")]\n        #[serde(flatten)]\n        lease: Option<LsnLease>,\n    }\n    let (lsn, kind) = match result {\n        LsnForTimestamp::Present(lsn) => (lsn, \"present\"),\n        LsnForTimestamp::Future(lsn) => (lsn, \"future\"),\n        LsnForTimestamp::Past(lsn) => (lsn, \"past\"),\n        LsnForTimestamp::NoData(lsn) => (lsn, \"nodata\"),\n    };\n\n    let lease = if with_lease {\n        timeline\n            .init_lsn_lease(lsn, timeline.get_lsn_lease_length_for_ts(), &ctx)\n            .inspect_err(|_| {\n                warn!(\"fail to grant a lease to {}\", lsn);\n            })\n            .ok()\n    } else {\n        None\n    };\n\n    let result = Result { lsn, kind, lease };\n    let valid_until = result\n        .lease\n        .as_ref()\n        .map(|l| humantime::format_rfc3339_millis(l.valid_until).to_string());\n    tracing::info!(\n        lsn=?result.lsn,\n        kind=%result.kind,\n        timestamp=%timestamp_raw,\n        valid_until=?valid_until,\n        \"lsn_by_timestamp finished\"\n    );\n    json_response(StatusCode::OK, result)\n}\n\nasync fn get_timestamp_of_lsn_handler(\n    request: Request<Body>,\n    _cancel: CancellationToken,\n) -> Result<Response<Body>, ApiError> {\n    let tenant_shard_id: TenantShardId = parse_request_param(&request, \"tenant_shard_id\")?;\n    check_permission(&request, Some(tenant_shard_id.tenant_id))?;\n    let state = get_state(&request);\n\n    if !tenant_shard_id.is_shard_zero() {\n        // Requires SLRU contents, which are only stored on shard zero\n        return Err(ApiError::BadRequest(anyhow!(\n            \"Timestamp calculations by lsn are only available on shard zero\"\n        )));\n    }\n\n    let timeline_id: TimelineId = parse_request_param(&request, \"timeline_id\")?;\n\n    let lsn_str = must_get_query_param(&request, \"lsn\")?;\n    let lsn = Lsn::from_str(&lsn_str)\n        .with_context(|| format!(\"Invalid LSN: {lsn_str:?}\"))\n        .map_err(ApiError::BadRequest)?;\n\n    let timeline =\n        active_timeline_of_active_tenant(&state.tenant_manager, tenant_shard_id, timeline_id)\n            .await?;\n    let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download)\n        .with_scope_timeline(&timeline);\n    let result = timeline.get_timestamp_for_lsn(lsn, &ctx).await?;\n\n    match result {\n        Some(time) => {\n            let time = format_rfc3339(\n                postgres_ffi::try_from_pg_timestamp(time).map_err(ApiError::InternalServerError)?,\n            )\n            .to_string();\n            json_response(StatusCode::OK, time)\n        }\n        None => Err(ApiError::PreconditionFailed(\n            format!(\"Timestamp for lsn {lsn} not found\").into(),\n        )),\n    }\n}\n\nasync fn timeline_delete_handler(\n    request: Request<Body>,\n    _cancel: CancellationToken,\n) -> Result<Response<Body>, ApiError> {\n    let tenant_shard_id: TenantShardId = parse_request_param(&request, \"tenant_shard_id\")?;\n    let timeline_id: TimelineId = parse_request_param(&request, \"timeline_id\")?;\n    check_permission(&request, Some(tenant_shard_id.tenant_id))?;\n\n    let state = get_state(&request);\n\n    let tenant = state\n        .tenant_manager\n        .get_attached_tenant_shard(tenant_shard_id)\n        .map_err(|e| {\n            match e {\n                // GetTenantError has a built-in conversion to ApiError, but in this context we don't\n                // want to treat missing tenants as 404, to avoid ambiguity with successful deletions.\n                GetTenantError::NotFound(_) | GetTenantError::ShardNotFound(_) => {\n                    ApiError::PreconditionFailed(\n                        \"Requested tenant is missing\".to_string().into_boxed_str(),\n                    )\n                }\n                e => e.into(),\n            }\n        })?;\n    tenant.wait_to_become_active(ACTIVE_TENANT_TIMEOUT).await?;\n    tenant.delete_timeline(timeline_id).instrument(info_span!(\"timeline_delete\", tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(), %timeline_id))\n        .await?;\n\n    json_response(StatusCode::ACCEPTED, ())\n}\n\nasync fn tenant_reset_handler(\n    request: Request<Body>,\n    _cancel: CancellationToken,\n) -> Result<Response<Body>, ApiError> {\n    let tenant_shard_id: TenantShardId = parse_request_param(&request, \"tenant_shard_id\")?;\n    check_permission(&request, Some(tenant_shard_id.tenant_id))?;\n\n    let drop_cache: Option<bool> = parse_query_param(&request, \"drop_cache\")?;\n\n    let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Warn);\n    let state = get_state(&request);\n    state\n        .tenant_manager\n        .reset_tenant(tenant_shard_id, drop_cache.unwrap_or(false), &ctx)\n        .await\n        .map_err(ApiError::InternalServerError)?;\n\n    json_response(StatusCode::OK, ())\n}\n\nasync fn tenant_list_handler(\n    request: Request<Body>,\n    _cancel: CancellationToken,\n) -> Result<Response<Body>, ApiError> {\n    check_permission(&request, None)?;\n    let state = get_state(&request);\n\n    let response_data = state\n        .tenant_manager\n        .list_tenants()\n        .map_err(|_| {\n            ApiError::ResourceUnavailable(\"Tenant map is initializing or shutting down\".into())\n        })?\n        .iter()\n        .map(|(id, state, gen_)| TenantInfo {\n            id: *id,\n            state: state.clone(),\n            current_physical_size: None,\n            attachment_status: state.attachment_status(),\n            generation: (*gen_)\n                .into()\n                .expect(\"Tenants are always attached with a generation\"),\n            gc_blocking: None,\n        })\n        .collect::<Vec<TenantInfo>>();\n\n    json_response(StatusCode::OK, response_data)\n}\n\nasync fn tenant_status(\n    request: Request<Body>,\n    _cancel: CancellationToken,\n) -> Result<Response<Body>, ApiError> {\n    let tenant_shard_id: TenantShardId = parse_request_param(&request, \"tenant_shard_id\")?;\n    check_permission(&request, Some(tenant_shard_id.tenant_id))?;\n    let state = get_state(&request);\n\n    // In tests, sometimes we want to query the state of a tenant without auto-activating it if it's currently waiting.\n    let activate = true;\n    #[cfg(feature = \"testing\")]\n    let activate = parse_query_param(&request, \"activate\")?.unwrap_or(activate);\n\n    let tenant_info = async {\n        let tenant = state\n            .tenant_manager\n            .get_attached_tenant_shard(tenant_shard_id)?;\n\n        if activate {\n            // This is advisory: we prefer to let the tenant activate on-demand when this function is\n            // called, but it is still valid to return 200 and describe the current state of the tenant\n            // if it doesn't make it into an active state.\n            tenant\n                .wait_to_become_active(ACTIVE_TENANT_TIMEOUT)\n                .await\n                .ok();\n        }\n\n        // Calculate total physical size of all timelines\n        let mut current_physical_size = 0;\n        for timeline in tenant.list_timelines().iter() {\n            current_physical_size += timeline.layer_size_sum().await;\n        }\n\n        let state = tenant.current_state();\n        Result::<_, ApiError>::Ok(TenantDetails {\n            tenant_info: TenantInfo {\n                id: tenant_shard_id,\n                state: state.clone(),\n                current_physical_size: Some(current_physical_size),\n                attachment_status: state.attachment_status(),\n                generation: tenant\n                    .generation()\n                    .into()\n                    .expect(\"Tenants are always attached with a generation\"),\n                gc_blocking: tenant.gc_block.summary().map(|x| format!(\"{x:?}\")),\n            },\n            walredo: tenant.wal_redo_manager_status(),\n            timelines: tenant.list_timeline_ids(),\n        })\n    }\n    .instrument(info_span!(\"tenant_status_handler\",\n                tenant_id = %tenant_shard_id.tenant_id,\n                shard_id = %tenant_shard_id.shard_slug()))\n    .await?;\n\n    json_response(StatusCode::OK, tenant_info)\n}\n\nasync fn tenant_delete_handler(\n    request: Request<Body>,\n    _cancel: CancellationToken,\n) -> Result<Response<Body>, ApiError> {\n    // TODO openapi spec\n    let tenant_shard_id: TenantShardId = parse_request_param(&request, \"tenant_shard_id\")?;\n    check_permission(&request, Some(tenant_shard_id.tenant_id))?;\n\n    let state = get_state(&request);\n\n    state\n        .tenant_manager\n        .delete_tenant(tenant_shard_id)\n        .instrument(info_span!(\"tenant_delete_handler\",\n            tenant_id = %tenant_shard_id.tenant_id,\n            shard_id = %tenant_shard_id.shard_slug()\n        ))\n        .await?;\n\n    json_response(StatusCode::OK, ())\n}\n\n/// HTTP endpoint to query the current tenant_size of a tenant.\n///\n/// This is not used by consumption metrics under [`crate::consumption_metrics`], but can be used\n/// to debug any of the calculations. Requires `tenant_id` request parameter, supports\n/// `inputs_only=true|false` (default false) which supports debugging failure to calculate model\n/// values.\n///\n/// 'retention_period' query parameter overrides the cutoff that is used to calculate the size\n/// (only if it is shorter than the real cutoff).\n///\n/// Note: we don't update the cached size and prometheus metric here.\n/// The retention period might be different, and it's nice to have a method to just calculate it\n/// without modifying anything anyway.\nasync fn tenant_size_handler(\n    request: Request<Body>,\n    cancel: CancellationToken,\n) -> Result<Response<Body>, ApiError> {\n    let tenant_shard_id: TenantShardId = parse_request_param(&request, \"tenant_shard_id\")?;\n    check_permission(&request, Some(tenant_shard_id.tenant_id))?;\n    let inputs_only: Option<bool> = parse_query_param(&request, \"inputs_only\")?;\n    let retention_period: Option<u64> = parse_query_param(&request, \"retention_period\")?;\n    let headers = request.headers();\n    let state = get_state(&request);\n\n    if !tenant_shard_id.is_shard_zero() {\n        return Err(ApiError::BadRequest(anyhow!(\n            \"Size calculations are only available on shard zero\"\n        )));\n    }\n\n    let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);\n    let tenant = state\n        .tenant_manager\n        .get_attached_tenant_shard(tenant_shard_id)?;\n    tenant.wait_to_become_active(ACTIVE_TENANT_TIMEOUT).await?;\n\n    // this can be long operation\n    let inputs = tenant\n        .gather_size_inputs(\n            retention_period,\n            LogicalSizeCalculationCause::TenantSizeHandler,\n            &cancel,\n            &ctx,\n        )\n        .await\n        .map_err(|e| match e {\n            crate::tenant::size::CalculateSyntheticSizeError::Cancelled => ApiError::ShuttingDown,\n            other => ApiError::InternalServerError(anyhow::anyhow!(other)),\n        })?;\n\n    let mut sizes = None;\n    let accepts_html = headers\n        .get(header::ACCEPT)\n        .map(|v| v == \"text/html\")\n        .unwrap_or_default();\n    if !inputs_only.unwrap_or(false) {\n        let storage_model = inputs.calculate_model();\n        let size = storage_model.calculate();\n\n        // If request header expects html, return html\n        if accepts_html {\n            return synthetic_size_html_response(inputs, storage_model, size);\n        }\n        sizes = Some(size);\n    } else if accepts_html {\n        return Err(ApiError::BadRequest(anyhow!(\n            \"inputs_only parameter is incompatible with html output request\"\n        )));\n    }\n\n    /// The type resides in the pageserver not to expose `ModelInputs`.\n    #[derive(serde::Serialize)]\n    struct TenantHistorySize {\n        id: TenantId,\n        /// Size is a mixture of WAL and logical size, so the unit is bytes.\n        ///\n        /// Will be none if `?inputs_only=true` was given.\n        size: Option<u64>,\n        /// Size of each segment used in the model.\n        /// Will be null if `?inputs_only=true` was given.\n        segment_sizes: Option<Vec<tenant_size_model::SegmentSizeResult>>,\n        inputs: crate::tenant::size::ModelInputs,\n    }\n\n    json_response(\n        StatusCode::OK,\n        TenantHistorySize {\n            id: tenant_shard_id.tenant_id,\n            size: sizes.as_ref().map(|x| x.total_size),\n            segment_sizes: sizes.map(|x| x.segments),\n            inputs,\n        },\n    )\n}\n\nasync fn tenant_shard_split_handler(\n    mut request: Request<Body>,\n    _cancel: CancellationToken,\n) -> Result<Response<Body>, ApiError> {\n    let req: TenantShardSplitRequest = json_request(&mut request).await?;\n\n    let tenant_shard_id: TenantShardId = parse_request_param(&request, \"tenant_shard_id\")?;\n    let state = get_state(&request);\n    let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Warn);\n\n    let tenant = state\n        .tenant_manager\n        .get_attached_tenant_shard(tenant_shard_id)?;\n    tenant.wait_to_become_active(ACTIVE_TENANT_TIMEOUT).await?;\n\n    let new_shards = state\n        .tenant_manager\n        .shard_split(\n            tenant,\n            ShardCount::new(req.new_shard_count),\n            req.new_stripe_size,\n            &ctx,\n        )\n        .await\n        .map_err(ApiError::InternalServerError)?;\n\n    json_response(StatusCode::OK, TenantShardSplitResponse { new_shards })\n}\n\nasync fn layer_map_info_handler(\n    request: Request<Body>,\n    _cancel: CancellationToken,\n) -> Result<Response<Body>, ApiError> {\n    let tenant_shard_id: TenantShardId = parse_request_param(&request, \"tenant_shard_id\")?;\n    let timeline_id: TimelineId = parse_request_param(&request, \"timeline_id\")?;\n    let reset: LayerAccessStatsReset =\n        parse_query_param(&request, \"reset\")?.unwrap_or(LayerAccessStatsReset::NoReset);\n    let state = get_state(&request);\n\n    check_permission(&request, Some(tenant_shard_id.tenant_id))?;\n\n    let timeline =\n        active_timeline_of_active_tenant(&state.tenant_manager, tenant_shard_id, timeline_id)\n            .await?;\n    let layer_map_info = timeline\n        .layer_map_info(reset)\n        .await\n        .map_err(|_shutdown| ApiError::ShuttingDown)?;\n\n    json_response(StatusCode::OK, layer_map_info)\n}\n\n#[instrument(skip_all, fields(tenant_id, shard_id, timeline_id, layer_name))]\nasync fn timeline_layer_scan_disposable_keys(\n    request: Request<Body>,\n    cancel: CancellationToken,\n) -> Result<Response<Body>, ApiError> {\n    let tenant_shard_id: TenantShardId = parse_request_param(&request, \"tenant_shard_id\")?;\n    let timeline_id: TimelineId = parse_request_param(&request, \"timeline_id\")?;\n    let layer_name: LayerName = parse_request_param(&request, \"layer_name\")?;\n\n    tracing::Span::current().record(\n        \"tenant_id\",\n        tracing::field::display(&tenant_shard_id.tenant_id),\n    );\n    tracing::Span::current().record(\n        \"shard_id\",\n        tracing::field::display(tenant_shard_id.shard_slug()),\n    );\n    tracing::Span::current().record(\"timeline_id\", tracing::field::display(&timeline_id));\n    tracing::Span::current().record(\"layer_name\", tracing::field::display(&layer_name));\n\n    let state = get_state(&request);\n\n    check_permission(&request, Some(tenant_shard_id.tenant_id))?;\n\n    // technically the timeline need not be active for this scan to complete\n    let timeline =\n        active_timeline_of_active_tenant(&state.tenant_manager, tenant_shard_id, timeline_id)\n            .await?;\n\n    let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download)\n        .with_scope_timeline(&timeline);\n\n    let guard = timeline\n        .layers\n        .read(LayerManagerLockHolder::GetLayerMapInfo)\n        .await;\n    let Some(layer) = guard.try_get_from_key(&layer_name.clone().into()) else {\n        return Err(ApiError::NotFound(\n            anyhow::anyhow!(\"Layer {tenant_shard_id}/{timeline_id}/{layer_name} not found\").into(),\n        ));\n    };\n\n    let resident_layer = layer\n        .download_and_keep_resident(&ctx)\n        .await\n        .map_err(|err| match err {\n            tenant::storage_layer::layer::DownloadError::TimelineShutdown\n            | tenant::storage_layer::layer::DownloadError::DownloadCancelled => {\n                ApiError::ShuttingDown\n            }\n            tenant::storage_layer::layer::DownloadError::ContextAndConfigReallyDeniesDownloads\n            | tenant::storage_layer::layer::DownloadError::DownloadRequired\n            | tenant::storage_layer::layer::DownloadError::NotFile(_)\n            | tenant::storage_layer::layer::DownloadError::DownloadFailed\n            | tenant::storage_layer::layer::DownloadError::PreStatFailed(_) => {\n                ApiError::InternalServerError(err.into())\n            }\n            #[cfg(test)]\n            tenant::storage_layer::layer::DownloadError::Failpoint(_) => {\n                ApiError::InternalServerError(err.into())\n            }\n        })?;\n\n    let keys = resident_layer\n        .load_keys(&ctx)\n        .await\n        .map_err(ApiError::InternalServerError)?;\n\n    let shard_identity = timeline.get_shard_identity();\n\n    let mut disposable_count = 0;\n    let mut not_disposable_count = 0;\n    let cancel = cancel.clone();\n    for (i, key) in keys.into_iter().enumerate() {\n        if shard_identity.is_key_disposable(&key) {\n            disposable_count += 1;\n            tracing::debug!(key = %key, key.dbg=?key, \"disposable key\");\n        } else {\n            not_disposable_count += 1;\n        }\n        #[allow(clippy::collapsible_if)]\n        if i % 10000 == 0 {\n            if cancel.is_cancelled() || timeline.cancel.is_cancelled() || timeline.is_stopping() {\n                return Err(ApiError::ShuttingDown);\n            }\n        }\n    }\n\n    json_response(\n        StatusCode::OK,\n        pageserver_api::models::ScanDisposableKeysResponse {\n            disposable_count,\n            not_disposable_count,\n        },\n    )\n}\n\nasync fn timeline_download_heatmap_layers_handler(\n    request: Request<Body>,\n    _cancel: CancellationToken,\n) -> Result<Response<Body>, ApiError> {\n    // Only used in the case where remote storage is not configured.\n    const DEFAULT_MAX_CONCURRENCY: usize = 100;\n    // A conservative default.\n    const DEFAULT_CONCURRENCY: usize = 16;\n\n    let tenant_shard_id: TenantShardId = parse_request_param(&request, \"tenant_shard_id\")?;\n    let timeline_id: TimelineId = parse_request_param(&request, \"timeline_id\")?;\n\n    let desired_concurrency =\n        parse_query_param(&request, \"concurrency\")?.unwrap_or(DEFAULT_CONCURRENCY);\n    let recurse = parse_query_param(&request, \"recurse\")?.unwrap_or(false);\n\n    check_permission(&request, Some(tenant_shard_id.tenant_id))?;\n\n    let state = get_state(&request);\n    let timeline =\n        active_timeline_of_active_tenant(&state.tenant_manager, tenant_shard_id, timeline_id)\n            .await?;\n    let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download)\n        .with_scope_timeline(&timeline);\n\n    let max_concurrency = get_config(&request)\n        .remote_storage_config\n        .as_ref()\n        .map(|c| c.concurrency_limit())\n        .unwrap_or(DEFAULT_MAX_CONCURRENCY);\n    let concurrency = std::cmp::min(max_concurrency, desired_concurrency);\n\n    timeline.start_heatmap_layers_download(concurrency, recurse, &ctx)?;\n\n    json_response(StatusCode::ACCEPTED, ())\n}\n\nasync fn timeline_shutdown_download_heatmap_layers_handler(\n    request: Request<Body>,\n    _cancel: CancellationToken,\n) -> Result<Response<Body>, ApiError> {\n    let tenant_shard_id: TenantShardId = parse_request_param(&request, \"tenant_shard_id\")?;\n    let timeline_id: TimelineId = parse_request_param(&request, \"timeline_id\")?;\n\n    check_permission(&request, Some(tenant_shard_id.tenant_id))?;\n\n    let state = get_state(&request);\n    let timeline =\n        active_timeline_of_active_tenant(&state.tenant_manager, tenant_shard_id, timeline_id)\n            .await?;\n\n    timeline.stop_and_drain_heatmap_layers_download().await;\n\n    json_response(StatusCode::OK, ())\n}\n\nasync fn layer_download_handler(\n    request: Request<Body>,\n    _cancel: CancellationToken,\n) -> Result<Response<Body>, ApiError> {\n    let tenant_shard_id: TenantShardId = parse_request_param(&request, \"tenant_shard_id\")?;\n    let timeline_id: TimelineId = parse_request_param(&request, \"timeline_id\")?;\n    let layer_file_name = get_request_param(&request, \"layer_file_name\")?;\n    check_permission(&request, Some(tenant_shard_id.tenant_id))?;\n    let layer_name = LayerName::from_str(layer_file_name)\n        .map_err(|s| ApiError::BadRequest(anyhow::anyhow!(s)))?;\n    let state = get_state(&request);\n\n    let timeline =\n        active_timeline_of_active_tenant(&state.tenant_manager, tenant_shard_id, timeline_id)\n            .await?;\n    let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download)\n        .with_scope_timeline(&timeline);\n    let downloaded = timeline\n        .download_layer(&layer_name, &ctx)\n        .await\n        .map_err(|e| match e {\n            tenant::storage_layer::layer::DownloadError::TimelineShutdown\n            | tenant::storage_layer::layer::DownloadError::DownloadCancelled => {\n                ApiError::ShuttingDown\n            }\n            other => ApiError::InternalServerError(other.into()),\n        })?;\n\n    match downloaded {\n        Some(true) => json_response(StatusCode::OK, ()),\n        Some(false) => json_response(StatusCode::NOT_MODIFIED, ()),\n        None => json_response(\n            StatusCode::BAD_REQUEST,\n            format!(\"Layer {tenant_shard_id}/{timeline_id}/{layer_file_name} not found\"),\n        ),\n    }\n}\n\nasync fn evict_timeline_layer_handler(\n    request: Request<Body>,\n    _cancel: CancellationToken,\n) -> Result<Response<Body>, ApiError> {\n    let tenant_shard_id: TenantShardId = parse_request_param(&request, \"tenant_shard_id\")?;\n    check_permission(&request, Some(tenant_shard_id.tenant_id))?;\n    let timeline_id: TimelineId = parse_request_param(&request, \"timeline_id\")?;\n    let layer_file_name = get_request_param(&request, \"layer_file_name\")?;\n    let state = get_state(&request);\n\n    let layer_name = LayerName::from_str(layer_file_name)\n        .map_err(|s| ApiError::BadRequest(anyhow::anyhow!(s)))?;\n\n    let timeline =\n        active_timeline_of_active_tenant(&state.tenant_manager, tenant_shard_id, timeline_id)\n            .await?;\n    let evicted = timeline\n        .evict_layer(&layer_name)\n        .await\n        .map_err(ApiError::InternalServerError)?;\n\n    match evicted {\n        Some(true) => json_response(StatusCode::OK, ()),\n        Some(false) => json_response(StatusCode::NOT_MODIFIED, ()),\n        None => json_response(\n            StatusCode::BAD_REQUEST,\n            format!(\"Layer {tenant_shard_id}/{timeline_id}/{layer_file_name} not found\"),\n        ),\n    }\n}\n\nasync fn timeline_gc_blocking_handler(\n    request: Request<Body>,\n    _cancel: CancellationToken,\n) -> Result<Response<Body>, ApiError> {\n    block_or_unblock_gc(request, true).await\n}\n\nasync fn timeline_gc_unblocking_handler(\n    request: Request<Body>,\n    _cancel: CancellationToken,\n) -> Result<Response<Body>, ApiError> {\n    block_or_unblock_gc(request, false).await\n}\n\n/// Traces GetPage@LSN requests for a timeline, and emits metadata in an efficient binary encoding.\n/// Use the `pagectl page-trace` command to decode and analyze the output.\nasync fn timeline_page_trace_handler(\n    request: Request<Body>,\n    cancel: CancellationToken,\n) -> Result<Response<Body>, ApiError> {\n    let tenant_shard_id: TenantShardId = parse_request_param(&request, \"tenant_shard_id\")?;\n    let timeline_id: TimelineId = parse_request_param(&request, \"timeline_id\")?;\n    let state = get_state(&request);\n    check_permission(&request, None)?;\n\n    let size_limit: usize = parse_query_param(&request, \"size_limit_bytes\")?.unwrap_or(1024 * 1024);\n    let time_limit_secs: u64 = parse_query_param(&request, \"time_limit_secs\")?.unwrap_or(5);\n\n    // Convert size limit to event limit based on the serialized size of an event. The event size is\n    // fixed, as the default bincode serializer uses fixed-width integer encoding.\n    let event_size = bincode::serialize(&PageTraceEvent::default())\n        .map_err(|err| ApiError::InternalServerError(err.into()))?\n        .len();\n    let event_limit = size_limit / event_size;\n\n    let timeline =\n        active_timeline_of_active_tenant(&state.tenant_manager, tenant_shard_id, timeline_id)\n            .await?;\n\n    // Install a page trace, unless one is already in progress. We just use a buffered channel,\n    // which may 2x the memory usage in the worst case, but it's still bounded.\n    let (trace_tx, mut trace_rx) = tokio::sync::mpsc::channel(event_limit);\n    let cur = timeline.page_trace.load();\n    let installed = cur.is_none()\n        && timeline\n            .page_trace\n            .compare_and_swap(cur, Some(Arc::new(trace_tx)))\n            .is_none();\n    if !installed {\n        return Err(ApiError::Conflict(\"page trace already active\".to_string()));\n    }\n    defer!(timeline.page_trace.store(None)); // uninstall on return\n\n    // Collect the trace and return it to the client. We could stream the response, but this is\n    // simple and fine.\n    let mut body = Vec::with_capacity(size_limit);\n    let deadline = Instant::now() + Duration::from_secs(time_limit_secs);\n\n    while body.len() < size_limit {\n        tokio::select! {\n            event = trace_rx.recv() => {\n                let Some(event) = event else {\n                    break; // shouldn't happen (sender doesn't close, unless timeline dropped)\n                };\n                bincode::serialize_into(&mut body, &event)\n                    .map_err(|err| ApiError::InternalServerError(err.into()))?;\n            }\n            _ = tokio::time::sleep_until(deadline) => break, // time limit reached\n            _ = cancel.cancelled() => return Err(ApiError::Cancelled),\n        }\n    }\n\n    Ok(Response::builder()\n        .status(StatusCode::OK)\n        .header(header::CONTENT_TYPE, \"application/octet-stream\")\n        .body(hyper::Body::from(body))\n        .unwrap())\n}\n\n/// Adding a block is `POST ../block_gc`, removing a block is `POST ../unblock_gc`.\n///\n/// Both are technically unsafe because they might fire off index uploads, thus they are POST.\nasync fn block_or_unblock_gc(\n    request: Request<Body>,\n    block: bool,\n) -> Result<Response<Body>, ApiError> {\n    use crate::tenant::remote_timeline_client::WaitCompletionError;\n    use crate::tenant::upload_queue::NotInitialized;\n    let tenant_shard_id: TenantShardId = parse_request_param(&request, \"tenant_shard_id\")?;\n    check_permission(&request, Some(tenant_shard_id.tenant_id))?;\n    let timeline_id: TimelineId = parse_request_param(&request, \"timeline_id\")?;\n    let state = get_state(&request);\n\n    let tenant = state\n        .tenant_manager\n        .get_attached_tenant_shard(tenant_shard_id)?;\n\n    tenant.wait_to_become_active(ACTIVE_TENANT_TIMEOUT).await?;\n\n    let timeline = tenant.get_timeline(timeline_id, true)?;\n\n    let fut = async {\n        if block {\n            timeline.block_gc(&tenant).await.map(|_| ())\n        } else {\n            timeline.unblock_gc(&tenant).await\n        }\n    };\n\n    let span = tracing::info_span!(\n        \"block_or_unblock_gc\",\n        tenant_id = %tenant_shard_id.tenant_id,\n        shard_id = %tenant_shard_id.shard_slug(),\n        timeline_id = %timeline_id,\n        block = block,\n    );\n\n    let res = fut.instrument(span).await;\n\n    res.map_err(|e| {\n        if e.is::<NotInitialized>() || e.is::<WaitCompletionError>() {\n            ApiError::ShuttingDown\n        } else {\n            ApiError::InternalServerError(e)\n        }\n    })?;\n\n    json_response(StatusCode::OK, ())\n}\n\n/// Get tenant_size SVG graph along with the JSON data.\nfn synthetic_size_html_response(\n    inputs: ModelInputs,\n    storage_model: StorageModel,\n    sizes: SizeResult,\n) -> Result<Response<Body>, ApiError> {\n    let mut timeline_ids: Vec<String> = Vec::new();\n    let mut timeline_map: HashMap<TimelineId, usize> = HashMap::new();\n    for (index, ti) in inputs.timeline_inputs.iter().enumerate() {\n        timeline_map.insert(ti.timeline_id, index);\n        timeline_ids.push(ti.timeline_id.to_string());\n    }\n    let seg_to_branch: Vec<(usize, SvgBranchKind)> = inputs\n        .segments\n        .iter()\n        .map(|seg| {\n            (\n                *timeline_map.get(&seg.timeline_id).unwrap(),\n                seg.kind.into(),\n            )\n        })\n        .collect();\n\n    let svg =\n        tenant_size_model::svg::draw_svg(&storage_model, &timeline_ids, &seg_to_branch, &sizes)\n            .map_err(ApiError::InternalServerError)?;\n\n    let mut response = String::new();\n\n    use std::fmt::Write;\n    write!(response, \"<html>\\n<body>\\n\").unwrap();\n    write!(response, \"<div>\\n{svg}\\n</div>\").unwrap();\n    writeln!(response, \"Project size: {}\", sizes.total_size).unwrap();\n    writeln!(response, \"<pre>\").unwrap();\n    writeln!(\n        response,\n        \"{}\",\n        serde_json::to_string_pretty(&inputs).unwrap()\n    )\n    .unwrap();\n    writeln!(\n        response,\n        \"{}\",\n        serde_json::to_string_pretty(&sizes.segments).unwrap()\n    )\n    .unwrap();\n    writeln!(response, \"</pre>\").unwrap();\n    write!(response, \"</body>\\n</html>\\n\").unwrap();\n\n    html_response(StatusCode::OK, response)\n}\n\npub fn html_response(status: StatusCode, data: String) -> Result<Response<Body>, ApiError> {\n    let response = Response::builder()\n        .status(status)\n        .header(header::CONTENT_TYPE, \"text/html\")\n        .body(Body::from(data.as_bytes().to_vec()))\n        .map_err(|e| ApiError::InternalServerError(e.into()))?;\n    Ok(response)\n}\n\nasync fn get_tenant_config_handler(\n    request: Request<Body>,\n    _cancel: CancellationToken,\n) -> Result<Response<Body>, ApiError> {\n    let tenant_shard_id: TenantShardId = parse_request_param(&request, \"tenant_shard_id\")?;\n    check_permission(&request, Some(tenant_shard_id.tenant_id))?;\n    let state = get_state(&request);\n\n    let tenant = state\n        .tenant_manager\n        .get_attached_tenant_shard(tenant_shard_id)?;\n\n    let response = HashMap::from([\n        (\n            \"tenant_specific_overrides\",\n            serde_json::to_value(tenant.tenant_specific_overrides())\n                .context(\"serializing tenant specific overrides\")\n                .map_err(ApiError::InternalServerError)?,\n        ),\n        (\n            \"effective_config\",\n            serde_json::to_value(tenant.effective_config())\n                .context(\"serializing effective config\")\n                .map_err(ApiError::InternalServerError)?,\n        ),\n    ]);\n\n    json_response(StatusCode::OK, response)\n}\n\nasync fn update_tenant_config_handler(\n    mut request: Request<Body>,\n    _cancel: CancellationToken,\n) -> Result<Response<Body>, ApiError> {\n    let request_data: TenantConfigRequest = json_request(&mut request).await?;\n    let tenant_id = request_data.tenant_id;\n    check_permission(&request, Some(tenant_id))?;\n\n    let new_tenant_conf = request_data.config;\n\n    let state = get_state(&request);\n\n    let tenant_shard_id = TenantShardId::unsharded(tenant_id);\n\n    let tenant = state\n        .tenant_manager\n        .get_attached_tenant_shard(tenant_shard_id)?;\n    tenant.wait_to_become_active(ACTIVE_TENANT_TIMEOUT).await?;\n\n    // This is a legacy API that only operates on attached tenants: the preferred\n    // API to use is the location_config/ endpoint, which lets the caller provide\n    // the full LocationConf.\n    let location_conf = LocationConf::attached_single(\n        new_tenant_conf.clone(),\n        tenant.get_generation(),\n        ShardParameters::from(tenant.get_shard_identity()),\n    );\n\n    tenant\n        .get_shard_identity()\n        .assert_equal(location_conf.shard); // not strictly necessary since we construct it above\n\n    crate::tenant::TenantShard::persist_tenant_config(state.conf, &tenant_shard_id, &location_conf)\n        .await\n        .map_err(|e| ApiError::InternalServerError(anyhow::anyhow!(e)))?;\n\n    let _ = tenant\n        .update_tenant_config(|_crnt| Ok(new_tenant_conf.clone()))\n        .expect(\"Closure returns Ok()\");\n\n    json_response(StatusCode::OK, ())\n}\n\nasync fn patch_tenant_config_handler(\n    mut request: Request<Body>,\n    _cancel: CancellationToken,\n) -> Result<Response<Body>, ApiError> {\n    let request_data: TenantConfigPatchRequest = json_request(&mut request).await?;\n    let tenant_id = request_data.tenant_id;\n    check_permission(&request, Some(tenant_id))?;\n\n    let state = get_state(&request);\n\n    let tenant_shard_id = TenantShardId::unsharded(tenant_id);\n\n    let tenant = state\n        .tenant_manager\n        .get_attached_tenant_shard(tenant_shard_id)?;\n    tenant.wait_to_become_active(ACTIVE_TENANT_TIMEOUT).await?;\n\n    let updated = tenant\n        .update_tenant_config(|crnt| {\n            crnt.apply_patch(request_data.config.clone())\n                .map_err(anyhow::Error::new)\n        })\n        .map_err(ApiError::BadRequest)?;\n\n    // This is a legacy API that only operates on attached tenants: the preferred\n    // API to use is the location_config/ endpoint, which lets the caller provide\n    // the full LocationConf.\n    let location_conf = LocationConf::attached_single(\n        updated,\n        tenant.get_generation(),\n        ShardParameters::from(tenant.get_shard_identity()),\n    );\n\n    tenant\n        .get_shard_identity()\n        .assert_equal(location_conf.shard); // not strictly necessary since we construct it above\n\n    crate::tenant::TenantShard::persist_tenant_config(state.conf, &tenant_shard_id, &location_conf)\n        .await\n        .map_err(|e| ApiError::InternalServerError(anyhow::anyhow!(e)))?;\n\n    json_response(StatusCode::OK, ())\n}\n\nasync fn put_tenant_location_config_handler(\n    mut request: Request<Body>,\n    _cancel: CancellationToken,\n) -> Result<Response<Body>, ApiError> {\n    let tenant_shard_id: TenantShardId = parse_request_param(&request, \"tenant_shard_id\")?;\n\n    let request_data: TenantLocationConfigRequest = json_request(&mut request).await?;\n    let flush = parse_query_param(&request, \"flush_ms\")?.map(Duration::from_millis);\n    let lazy = parse_query_param(&request, \"lazy\")?.unwrap_or(false);\n    check_permission(&request, Some(tenant_shard_id.tenant_id))?;\n\n    let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Warn);\n    let state = get_state(&request);\n    let conf = state.conf;\n\n    fail::fail_point!(\"put-location-conf-handler\", |_| {\n        Err(ApiError::ResourceUnavailable(\"failpoint\".into()))\n    });\n\n    // The `Detached` state is special, it doesn't upsert a tenant, it removes\n    // its local disk content and drops it from memory.\n    if let LocationConfigMode::Detached = request_data.config.mode {\n        if let Err(e) = state\n            .tenant_manager\n            .detach_tenant(conf, tenant_shard_id, &state.deletion_queue_client)\n            .instrument(info_span!(\"tenant_detach\",\n                tenant_id = %tenant_shard_id.tenant_id,\n                shard_id = %tenant_shard_id.shard_slug()\n            ))\n            .await\n        {\n            match e {\n                TenantStateError::SlotError(TenantSlotError::NotFound(_)) => {\n                    // This API is idempotent: a NotFound on a detach is fine.\n                }\n                _ => return Err(e.into()),\n            }\n        }\n        return json_response(StatusCode::OK, ());\n    }\n\n    let location_conf =\n        LocationConf::try_from(&request_data.config).map_err(ApiError::BadRequest)?;\n\n    // lazy==true queues up for activation or jumps the queue like normal when a compute connects,\n    // similar to at startup ordering.\n    let spawn_mode = if lazy {\n        tenant::SpawnMode::Lazy\n    } else {\n        tenant::SpawnMode::Eager\n    };\n\n    let tenant = state\n        .tenant_manager\n        .upsert_location(tenant_shard_id, location_conf, flush, spawn_mode, &ctx)\n        .await?;\n    let stripe_size = tenant.as_ref().map(|t| t.get_shard_stripe_size());\n    let attached = tenant.is_some();\n\n    if let Some(_flush_ms) = flush {\n        match state\n            .secondary_controller\n            .upload_tenant(tenant_shard_id)\n            .await\n        {\n            Ok(()) => {\n                tracing::info!(\"Uploaded heatmap during flush\");\n            }\n            Err(e) => {\n                tracing::warn!(\"Failed to flush heatmap: {e}\");\n            }\n        }\n    } else {\n        tracing::info!(\"No flush requested when configuring\");\n    }\n\n    // This API returns a vector of pageservers where the tenant is attached: this is\n    // primarily for use in the sharding service.  For compatibilty, we also return this\n    // when called directly on a pageserver, but the payload is always zero or one shards.\n    let mut response = TenantLocationConfigResponse {\n        shards: Vec::new(),\n        stripe_size: None,\n    };\n    if attached {\n        response.shards.push(TenantShardLocation {\n            shard_id: tenant_shard_id,\n            node_id: state.conf.id,\n        });\n        if tenant_shard_id.shard_count.count() > 1 {\n            // Stripe size should be set if we are attached\n            debug_assert!(stripe_size.is_some());\n            response.stripe_size = stripe_size;\n        }\n    }\n\n    json_response(StatusCode::OK, response)\n}\n\nasync fn list_location_config_handler(\n    request: Request<Body>,\n    _cancel: CancellationToken,\n) -> Result<Response<Body>, ApiError> {\n    let state = get_state(&request);\n    let slots = state.tenant_manager.list();\n    let result = LocationConfigListResponse {\n        tenant_shards: slots\n            .into_iter()\n            .map(|(tenant_shard_id, slot)| {\n                let v = match slot {\n                    TenantSlot::Attached(t) => Some(t.get_location_conf()),\n                    TenantSlot::Secondary(s) => Some(s.get_location_conf()),\n                    TenantSlot::InProgress(_) => None,\n                };\n                (tenant_shard_id, v)\n            })\n            .collect(),\n    };\n    json_response(StatusCode::OK, result)\n}\n\nasync fn get_location_config_handler(\n    request: Request<Body>,\n    _cancel: CancellationToken,\n) -> Result<Response<Body>, ApiError> {\n    let state = get_state(&request);\n    let tenant_shard_id: TenantShardId = parse_request_param(&request, \"tenant_shard_id\")?;\n    let slot = state.tenant_manager.get(tenant_shard_id);\n\n    let Some(slot) = slot else {\n        return Err(ApiError::NotFound(\n            anyhow::anyhow!(\"Tenant shard not found\").into(),\n        ));\n    };\n\n    let result: Option<LocationConfig> = match slot {\n        TenantSlot::Attached(t) => Some(t.get_location_conf()),\n        TenantSlot::Secondary(s) => Some(s.get_location_conf()),\n        TenantSlot::InProgress(_) => None,\n    };\n\n    json_response(StatusCode::OK, result)\n}\n\n// Do a time travel recovery on the given tenant/tenant shard. Tenant needs to be detached\n// (from all pageservers) as it invalidates consistency assumptions.\nasync fn tenant_time_travel_remote_storage_handler(\n    request: Request<Body>,\n    cancel: CancellationToken,\n) -> Result<Response<Body>, ApiError> {\n    let tenant_shard_id: TenantShardId = parse_request_param(&request, \"tenant_shard_id\")?;\n\n    check_permission(&request, Some(tenant_shard_id.tenant_id))?;\n\n    let timestamp_raw = must_get_query_param(&request, \"travel_to\")?;\n    let timestamp = humantime::parse_rfc3339(&timestamp_raw)\n        .with_context(|| format!(\"Invalid time for travel_to: {timestamp_raw:?}\"))\n        .map_err(ApiError::BadRequest)?;\n\n    let done_if_after_raw = must_get_query_param(&request, \"done_if_after\")?;\n    let done_if_after = humantime::parse_rfc3339(&done_if_after_raw)\n        .with_context(|| format!(\"Invalid time for done_if_after: {done_if_after_raw:?}\"))\n        .map_err(ApiError::BadRequest)?;\n\n    // This is just a sanity check to fend off naive wrong usages of the API:\n    // the tenant needs to be detached *everywhere*\n    let state = get_state(&request);\n    let we_manage_tenant = state.tenant_manager.manages_tenant_shard(tenant_shard_id);\n    if we_manage_tenant {\n        return Err(ApiError::BadRequest(anyhow!(\n            \"Tenant {tenant_shard_id} is already attached at this pageserver\"\n        )));\n    }\n\n    if timestamp > done_if_after {\n        return Err(ApiError::BadRequest(anyhow!(\n            \"The done_if_after timestamp comes before the timestamp to recover to\"\n        )));\n    }\n\n    tracing::info!(\n        \"Issuing time travel request internally. timestamp={timestamp_raw}, done_if_after={done_if_after_raw}\"\n    );\n\n    remote_timeline_client::upload::time_travel_recover_tenant(\n        &state.remote_storage,\n        &tenant_shard_id,\n        timestamp,\n        done_if_after,\n        &cancel,\n    )\n    .await\n    .map_err(|e| match e {\n        TimeTravelError::BadInput(e) => {\n            warn!(\"bad input error: {e}\");\n            ApiError::BadRequest(anyhow!(\"bad input error\"))\n        }\n        TimeTravelError::Unimplemented => {\n            ApiError::BadRequest(anyhow!(\"unimplemented for the configured remote storage\"))\n        }\n        TimeTravelError::Cancelled => ApiError::InternalServerError(anyhow!(\"cancelled\")),\n        TimeTravelError::TooManyVersions => {\n            ApiError::InternalServerError(anyhow!(\"too many versions in remote storage\"))\n        }\n        TimeTravelError::Other(e) => {\n            warn!(\"internal error: {e}\");\n            ApiError::InternalServerError(anyhow!(\"internal error\"))\n        }\n    })?;\n\n    json_response(StatusCode::OK, ())\n}\n\n/// Testing helper to transition a tenant to [`crate::tenant::TenantState::Broken`].\nasync fn handle_tenant_break(\n    r: Request<Body>,\n    _cancel: CancellationToken,\n) -> Result<Response<Body>, ApiError> {\n    let tenant_shard_id: TenantShardId = parse_request_param(&r, \"tenant_shard_id\")?;\n\n    let state = get_state(&r);\n    state\n        .tenant_manager\n        .get_attached_tenant_shard(tenant_shard_id)?\n        .set_broken(\"broken from test\".to_owned())\n        .await;\n\n    json_response(StatusCode::OK, ())\n}\n\n// Obtains an lsn lease on the given timeline.\nasync fn lsn_lease_handler(\n    mut request: Request<Body>,\n    _cancel: CancellationToken,\n) -> Result<Response<Body>, ApiError> {\n    let tenant_shard_id: TenantShardId = parse_request_param(&request, \"tenant_shard_id\")?;\n    let timeline_id: TimelineId = parse_request_param(&request, \"timeline_id\")?;\n    check_permission(&request, Some(tenant_shard_id.tenant_id))?;\n    let lsn = json_request::<LsnLeaseRequest>(&mut request).await?.lsn;\n\n    let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);\n\n    let state = get_state(&request);\n\n    let timeline =\n        active_timeline_of_active_tenant(&state.tenant_manager, tenant_shard_id, timeline_id)\n            .await?;\n\n    let result = async {\n        timeline\n            .init_lsn_lease(lsn, timeline.get_lsn_lease_length(), &ctx)\n            .map_err(|e| {\n                ApiError::InternalServerError(\n                    e.context(format!(\"invalid lsn lease request at {lsn}\")),\n                )\n            })\n    }\n    .instrument(info_span!(\"init_lsn_lease\", tenant_id = %tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug(), %timeline_id))\n    .await?;\n\n    json_response(StatusCode::OK, result)\n}\n\n// Run GC immediately on given timeline.\nasync fn timeline_gc_handler(\n    mut request: Request<Body>,\n    cancel: CancellationToken,\n) -> Result<Response<Body>, ApiError> {\n    let tenant_shard_id: TenantShardId = parse_request_param(&request, \"tenant_shard_id\")?;\n    let timeline_id: TimelineId = parse_request_param(&request, \"timeline_id\")?;\n    check_permission(&request, Some(tenant_shard_id.tenant_id))?;\n\n    let gc_req: TimelineGcRequest = json_request(&mut request).await?;\n\n    let state = get_state(&request);\n\n    let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);\n    let gc_result = state\n        .tenant_manager\n        .immediate_gc(tenant_shard_id, timeline_id, gc_req, cancel, &ctx)\n        .await?;\n\n    json_response(StatusCode::OK, gc_result)\n}\n\n// Cancel scheduled compaction tasks\nasync fn timeline_cancel_compact_handler(\n    request: Request<Body>,\n    _cancel: CancellationToken,\n) -> Result<Response<Body>, ApiError> {\n    let tenant_shard_id: TenantShardId = parse_request_param(&request, \"tenant_shard_id\")?;\n    let timeline_id: TimelineId = parse_request_param(&request, \"timeline_id\")?;\n    check_permission(&request, Some(tenant_shard_id.tenant_id))?;\n    let state = get_state(&request);\n    async {\n        let tenant = state\n            .tenant_manager\n            .get_attached_tenant_shard(tenant_shard_id)?;\n        tenant.cancel_scheduled_compaction(timeline_id);\n        json_response(StatusCode::OK, ())\n    }\n    .instrument(info_span!(\"timeline_cancel_compact\", tenant_id = %tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug(), %timeline_id))\n    .await\n}\n\n// Get compact info of a timeline\nasync fn timeline_compact_info_handler(\n    request: Request<Body>,\n    _cancel: CancellationToken,\n) -> Result<Response<Body>, ApiError> {\n    let tenant_shard_id: TenantShardId = parse_request_param(&request, \"tenant_shard_id\")?;\n    let timeline_id: TimelineId = parse_request_param(&request, \"timeline_id\")?;\n    check_permission(&request, Some(tenant_shard_id.tenant_id))?;\n    let state = get_state(&request);\n    async {\n        let tenant = state\n            .tenant_manager\n            .get_attached_tenant_shard(tenant_shard_id)?;\n        let resp = tenant.get_scheduled_compaction_tasks(timeline_id);\n        json_response(StatusCode::OK, resp)\n    }\n    .instrument(info_span!(\"timeline_compact_info\", tenant_id = %tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug(), %timeline_id))\n    .await\n}\n\n// Run compaction immediately on given timeline.\nasync fn timeline_compact_handler(\n    mut request: Request<Body>,\n    cancel: CancellationToken,\n) -> Result<Response<Body>, ApiError> {\n    let tenant_shard_id: TenantShardId = parse_request_param(&request, \"tenant_shard_id\")?;\n    let timeline_id: TimelineId = parse_request_param(&request, \"timeline_id\")?;\n    check_permission(&request, Some(tenant_shard_id.tenant_id))?;\n\n    let compact_request = json_request_maybe::<Option<CompactRequest>>(&mut request).await?;\n\n    let state = get_state(&request);\n\n    let mut flags = EnumSet::empty();\n\n    if Some(true) == parse_query_param::<_, bool>(&request, \"force_l0_compaction\")? {\n        flags |= CompactFlags::ForceL0Compaction;\n    }\n    if Some(true) == parse_query_param::<_, bool>(&request, \"force_repartition\")? {\n        flags |= CompactFlags::ForceRepartition;\n    }\n    if Some(true) == parse_query_param::<_, bool>(&request, \"force_image_layer_creation\")? {\n        flags |= CompactFlags::ForceImageLayerCreation;\n    }\n    if Some(true) == parse_query_param::<_, bool>(&request, \"enhanced_gc_bottom_most_compaction\")? {\n        flags |= CompactFlags::EnhancedGcBottomMostCompaction;\n    }\n    if Some(true) == parse_query_param::<_, bool>(&request, \"dry_run\")? {\n        flags |= CompactFlags::DryRun;\n    }\n    // Manual compaction does not yield for L0.\n\n    let wait_until_uploaded =\n        parse_query_param::<_, bool>(&request, \"wait_until_uploaded\")?.unwrap_or(false);\n\n    let wait_until_scheduled_compaction_done =\n        parse_query_param::<_, bool>(&request, \"wait_until_scheduled_compaction_done\")?\n            .unwrap_or(false);\n\n    let sub_compaction = compact_request\n        .as_ref()\n        .map(|r| r.sub_compaction)\n        .unwrap_or(false);\n    let sub_compaction_max_job_size_mb = compact_request\n        .as_ref()\n        .and_then(|r| r.sub_compaction_max_job_size_mb);\n\n    let options = CompactOptions {\n        compact_key_range: compact_request\n            .as_ref()\n            .and_then(|r| r.compact_key_range.clone()),\n        compact_lsn_range: compact_request\n            .as_ref()\n            .and_then(|r| r.compact_lsn_range.clone()),\n        flags,\n        sub_compaction,\n        sub_compaction_max_job_size_mb,\n        gc_compaction_do_metadata_compaction: false,\n    };\n\n    let scheduled = compact_request\n        .as_ref()\n        .map(|r| r.scheduled)\n        .unwrap_or(false);\n\n    async {\n        let timeline = active_timeline_of_active_tenant(&state.tenant_manager, tenant_shard_id, timeline_id).await?;\n        let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download).with_scope_timeline(&timeline);\n        if scheduled {\n            let tenant = state\n                .tenant_manager\n                .get_attached_tenant_shard(tenant_shard_id)?;\n            let rx = tenant.schedule_compaction(timeline_id, options).await.map_err(ApiError::InternalServerError)?;\n            if wait_until_scheduled_compaction_done {\n                // It is possible that this will take a long time, dropping the HTTP request will not cancel the compaction.\n                rx.await.ok();\n            }\n        } else {\n            timeline\n                .compact_with_options(&cancel, options, &ctx)\n                .await\n                .map_err(|e| ApiError::InternalServerError(e.into()))?;\n            if wait_until_uploaded {\n                timeline.remote_client.wait_completion().await\n                // XXX map to correct ApiError for the cases where it's due to shutdown\n                .context(\"wait completion\").map_err(ApiError::InternalServerError)?;\n            }\n        }\n        json_response(StatusCode::OK, ())\n    }\n    .instrument(info_span!(\"manual_compaction\", tenant_id = %tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug(), %timeline_id))\n    .await\n}\n\nasync fn timeline_mark_invisible_handler(\n    mut request: Request<Body>,\n    _cancel: CancellationToken,\n) -> Result<Response<Body>, ApiError> {\n    let tenant_shard_id: TenantShardId = parse_request_param(&request, \"tenant_shard_id\")?;\n    let timeline_id: TimelineId = parse_request_param(&request, \"timeline_id\")?;\n    check_permission(&request, Some(tenant_shard_id.tenant_id))?;\n\n    let compact_request = json_request_maybe::<Option<MarkInvisibleRequest>>(&mut request).await?;\n\n    let state = get_state(&request);\n\n    let visibility = match compact_request {\n        Some(req) => match req.is_visible {\n            Some(true) => TimelineVisibilityState::Visible,\n            Some(false) | None => TimelineVisibilityState::Invisible,\n        },\n        None => TimelineVisibilityState::Invisible,\n    };\n\n    async {\n        let tenant = state\n            .tenant_manager\n            .get_attached_tenant_shard(tenant_shard_id)?;\n        let timeline = tenant.get_timeline(timeline_id, true)?;\n        timeline.remote_client.schedule_index_upload_for_timeline_invisible_state(visibility).map_err(ApiError::InternalServerError)?;\n        json_response(StatusCode::OK, ())\n    }\n    .instrument(info_span!(\"manual_timeline_mark_invisible\", tenant_id = %tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug(), %timeline_id))\n    .await\n}\n\n// Run offload immediately on given timeline.\nasync fn timeline_offload_handler(\n    request: Request<Body>,\n    _cancel: CancellationToken,\n) -> Result<Response<Body>, ApiError> {\n    let tenant_shard_id: TenantShardId = parse_request_param(&request, \"tenant_shard_id\")?;\n    let timeline_id: TimelineId = parse_request_param(&request, \"timeline_id\")?;\n    check_permission(&request, Some(tenant_shard_id.tenant_id))?;\n\n    let state = get_state(&request);\n\n    async {\n        let tenant = state\n            .tenant_manager\n            .get_attached_tenant_shard(tenant_shard_id)?;\n\n        if tenant.get_offloaded_timeline(timeline_id).is_ok() {\n            return json_response(StatusCode::OK, ());\n        }\n        let timeline =\n            active_timeline_of_active_tenant(&state.tenant_manager, tenant_shard_id, timeline_id)\n                .await?;\n\n        if !tenant.timeline_has_no_attached_children(timeline_id) {\n            return Err(ApiError::PreconditionFailed(\n                \"timeline has attached children\".into(),\n            ));\n        }\n        if let (false, reason) = timeline.can_offload() {\n            return Err(ApiError::PreconditionFailed(\n                format!(\"Timeline::can_offload() check failed: {reason}\") .into(),\n            ));\n        }\n        offload_timeline(&tenant, &timeline)\n            .await\n            .map_err(|e| {\n                match e {\n                    OffloadError::Cancelled => ApiError::ResourceUnavailable(\"Timeline shutting down\".into()),\n                    OffloadError::AlreadyInProgress => ApiError::Conflict(\"Timeline already being offloaded or deleted\".into()),\n                    _ => ApiError::InternalServerError(anyhow!(e))\n                }\n            })?;\n\n        json_response(StatusCode::OK, ())\n    }\n    .instrument(info_span!(\"manual_timeline_offload\", tenant_id = %tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug(), %timeline_id))\n    .await\n}\n\n// Run checkpoint immediately on given timeline.\nasync fn timeline_checkpoint_handler(\n    request: Request<Body>,\n    cancel: CancellationToken,\n) -> Result<Response<Body>, ApiError> {\n    let tenant_shard_id: TenantShardId = parse_request_param(&request, \"tenant_shard_id\")?;\n    let timeline_id: TimelineId = parse_request_param(&request, \"timeline_id\")?;\n    check_permission(&request, Some(tenant_shard_id.tenant_id))?;\n\n    let state = get_state(&request);\n\n    let mut flags = EnumSet::empty();\n    if Some(true) == parse_query_param::<_, bool>(&request, \"force_l0_compaction\")? {\n        flags |= CompactFlags::ForceL0Compaction;\n    }\n    if Some(true) == parse_query_param::<_, bool>(&request, \"force_repartition\")? {\n        flags |= CompactFlags::ForceRepartition;\n    }\n    if Some(true) == parse_query_param::<_, bool>(&request, \"force_image_layer_creation\")? {\n        flags |= CompactFlags::ForceImageLayerCreation;\n    }\n\n    // By default, checkpoints come with a compaction, but this may be optionally disabled by tests that just want to flush + upload.\n    let compact = parse_query_param::<_, bool>(&request, \"compact\")?.unwrap_or(true);\n\n    let wait_until_flushed: bool =\n        parse_query_param(&request, \"wait_until_flushed\")?.unwrap_or(true);\n\n    let wait_until_uploaded =\n        parse_query_param::<_, bool>(&request, \"wait_until_uploaded\")?.unwrap_or(false);\n\n    async {\n        let timeline = active_timeline_of_active_tenant(&state.tenant_manager, tenant_shard_id, timeline_id).await?;\n        let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download).with_scope_timeline(&timeline);\n        if wait_until_flushed {\n            timeline.freeze_and_flush().await\n        } else {\n            timeline.freeze().await.and(Ok(()))\n        }.map_err(|e| {\n                match e {\n                    tenant::timeline::FlushLayerError::Cancelled => ApiError::ShuttingDown,\n                    other => ApiError::InternalServerError(other.into()),\n\n                }\n            })?;\n        if compact {\n            timeline\n                .compact(&cancel, flags, &ctx)\n                .await\n                .map_err(|e|\n                    if e.is_cancel() {\n                        ApiError::ShuttingDown\n                    } else {\n                        ApiError::InternalServerError(e.into_anyhow())\n                    }\n                )?;\n        }\n\n        if wait_until_uploaded {\n            tracing::info!(\"Waiting for uploads to complete...\");\n            timeline.remote_client.wait_completion().await\n            // XXX map to correct ApiError for the cases where it's due to shutdown\n            .context(\"wait completion\").map_err(ApiError::InternalServerError)?;\n            tracing::info!(\"Uploads completed up to {}\", timeline.get_remote_consistent_lsn_projected().unwrap_or(Lsn(0)));\n        }\n\n        json_response(StatusCode::OK, ())\n    }\n    .instrument(info_span!(\"manual_checkpoint\", tenant_id = %tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug(), %timeline_id))\n    .await\n}\n\nasync fn timeline_download_remote_layers_handler_post(\n    mut request: Request<Body>,\n    _cancel: CancellationToken,\n) -> Result<Response<Body>, ApiError> {\n    let tenant_shard_id: TenantShardId = parse_request_param(&request, \"tenant_shard_id\")?;\n    let timeline_id: TimelineId = parse_request_param(&request, \"timeline_id\")?;\n    let body: DownloadRemoteLayersTaskSpawnRequest = json_request(&mut request).await?;\n    check_permission(&request, Some(tenant_shard_id.tenant_id))?;\n\n    let state = get_state(&request);\n\n    let timeline =\n        active_timeline_of_active_tenant(&state.tenant_manager, tenant_shard_id, timeline_id)\n            .await?;\n    let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download)\n        .with_scope_timeline(&timeline);\n    match timeline.spawn_download_all_remote_layers(body, &ctx).await {\n        Ok(st) => json_response(StatusCode::ACCEPTED, st),\n        Err(st) => json_response(StatusCode::CONFLICT, st),\n    }\n}\n\nasync fn timeline_download_remote_layers_handler_get(\n    request: Request<Body>,\n    _cancel: CancellationToken,\n) -> Result<Response<Body>, ApiError> {\n    let tenant_shard_id: TenantShardId = parse_request_param(&request, \"tenant_shard_id\")?;\n    check_permission(&request, Some(tenant_shard_id.tenant_id))?;\n    let timeline_id: TimelineId = parse_request_param(&request, \"timeline_id\")?;\n    let state = get_state(&request);\n\n    let timeline =\n        active_timeline_of_active_tenant(&state.tenant_manager, tenant_shard_id, timeline_id)\n            .await?;\n    let info = timeline\n        .get_download_all_remote_layers_task_info()\n        .context(\"task never started since last pageserver process start\")\n        .map_err(|e| ApiError::NotFound(e.into()))?;\n    json_response(StatusCode::OK, info)\n}\n\nasync fn timeline_detach_ancestor_handler(\n    request: Request<Body>,\n    _cancel: CancellationToken,\n) -> Result<Response<Body>, ApiError> {\n    use pageserver_api::models::detach_ancestor::AncestorDetached;\n\n    use crate::tenant::timeline::detach_ancestor;\n\n    let tenant_shard_id: TenantShardId = parse_request_param(&request, \"tenant_shard_id\")?;\n    check_permission(&request, Some(tenant_shard_id.tenant_id))?;\n    let timeline_id: TimelineId = parse_request_param(&request, \"timeline_id\")?;\n    let behavior: Option<DetachBehavior> = parse_query_param(&request, \"detach_behavior\")?;\n\n    let behavior = behavior.unwrap_or_default();\n\n    let span = tracing::info_span!(\"detach_ancestor\", tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(), %timeline_id);\n\n    async move {\n        let mut options = detach_ancestor::Options::default();\n\n        let rewrite_concurrency =\n            parse_query_param::<_, std::num::NonZeroUsize>(&request, \"rewrite_concurrency\")?;\n        let copy_concurrency =\n            parse_query_param::<_, std::num::NonZeroUsize>(&request, \"copy_concurrency\")?;\n\n        [\n            (&mut options.rewrite_concurrency, rewrite_concurrency),\n            (&mut options.copy_concurrency, copy_concurrency),\n        ]\n        .into_iter()\n        .filter_map(|(target, val)| val.map(|val| (target, val)))\n        .for_each(|(target, val)| *target = val);\n\n        let state = get_state(&request);\n\n        let tenant = state\n            .tenant_manager\n            .get_attached_tenant_shard(tenant_shard_id)?;\n\n        tenant.wait_to_become_active(ACTIVE_TENANT_TIMEOUT).await?;\n\n        let ctx = RequestContext::new(TaskKind::DetachAncestor, DownloadBehavior::Download);\n        let ctx = &ctx;\n\n        // Flush the upload queues of all timelines before detaching ancestor. We do the same thing again\n        // during shutdown. This early upload ensures the pageserver does not need to upload too many\n        // things and creates downtime during timeline reloads.\n        for timeline in tenant.list_timelines() {\n            timeline\n                .remote_client\n                .wait_completion()\n                .await\n                .map_err(|e| {\n                    ApiError::PreconditionFailed(format!(\"cannot drain upload queue: {e}\").into())\n                })?;\n        }\n\n        tracing::info!(\"all timeline upload queues are drained\");\n\n        let timeline = tenant.get_timeline(timeline_id, true)?;\n        let ctx = &ctx.with_scope_timeline(&timeline);\n\n        let progress = timeline\n            .prepare_to_detach_from_ancestor(&tenant, options, behavior, ctx)\n            .await?;\n\n        // uncomment to allow early as possible Tenant::drop\n        // drop(tenant);\n\n        let resp = match progress {\n            detach_ancestor::Progress::Prepared(attempt, prepared) => {\n                // it would be great to tag the guard on to the tenant activation future\n                let reparented_timelines = state\n                    .tenant_manager\n                    .complete_detaching_timeline_ancestor(\n                        tenant_shard_id,\n                        timeline_id,\n                        prepared,\n                        behavior,\n                        attempt,\n                        ctx,\n                    )\n                    .await?;\n\n                AncestorDetached {\n                    reparented_timelines,\n                }\n            }\n            detach_ancestor::Progress::Done(resp) => resp,\n        };\n\n        json_response(StatusCode::OK, resp)\n    }\n    .instrument(span)\n    .await\n}\n\nasync fn deletion_queue_flush(\n    r: Request<Body>,\n    cancel: CancellationToken,\n) -> Result<Response<Body>, ApiError> {\n    let state = get_state(&r);\n\n    let execute = parse_query_param(&r, \"execute\")?.unwrap_or(false);\n\n    let flush = async {\n        if execute {\n            state.deletion_queue_client.flush_execute().await\n        } else {\n            state.deletion_queue_client.flush().await\n        }\n    }\n    // DeletionQueueError's only case is shutting down.\n    .map_err(|_| ApiError::ShuttingDown);\n\n    tokio::select! {\n        res = flush => {\n            res.map(|()| json_response(StatusCode::OK, ()))?\n        }\n        _ = cancel.cancelled() => {\n            Err(ApiError::ShuttingDown)\n        }\n    }\n}\n\n/// Try if `GetPage@Lsn` is successful, useful for manual debugging.\n#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]\nstruct GetPageResponse {\n    pub page: Bytes,\n    pub layers_visited: u32,\n    pub delta_layers_visited: u32,\n    pub records: Vec<(Lsn, NeonWalRecord)>,\n    pub img: Option<(Lsn, Bytes)>,\n}\n\nasync fn getpage_at_lsn_handler(\n    request: Request<Body>,\n    cancel: CancellationToken,\n) -> Result<Response<Body>, ApiError> {\n    getpage_at_lsn_handler_inner(false, request, cancel).await\n}\n\nasync fn touchpage_at_lsn_handler(\n    request: Request<Body>,\n    cancel: CancellationToken,\n) -> Result<Response<Body>, ApiError> {\n    getpage_at_lsn_handler_inner(true, request, cancel).await\n}\n\n/// Try if `GetPage@Lsn` is successful, useful for manual debugging.\nasync fn getpage_at_lsn_handler_inner(\n    touch: bool,\n    request: Request<Body>,\n    _cancel: CancellationToken,\n) -> Result<Response<Body>, ApiError> {\n    let tenant_shard_id: TenantShardId = parse_request_param(&request, \"tenant_shard_id\")?;\n    let timeline_id: TimelineId = parse_request_param(&request, \"timeline_id\")?;\n    // Require pageserver admin permission for this API instead of only tenant-level token.\n    check_permission(&request, None)?;\n    let state = get_state(&request);\n\n    struct Key(pageserver_api::key::Key);\n\n    impl std::str::FromStr for Key {\n        type Err = anyhow::Error;\n\n        fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {\n            pageserver_api::key::Key::from_hex(s).map(Key)\n        }\n    }\n\n    let key: Key = parse_query_param(&request, \"key\")?\n        .ok_or_else(|| ApiError::BadRequest(anyhow!(\"missing 'key' query parameter\")))?;\n    let lsn: Option<Lsn> = parse_query_param(&request, \"lsn\")?;\n\n    async {\n        let timeline = active_timeline_of_active_tenant(&state.tenant_manager, tenant_shard_id, timeline_id).await?;\n        let ctx = RequestContextBuilder::new(TaskKind::MgmtRequest)\n            .download_behavior(DownloadBehavior::Download)\n            .scope(context::Scope::new_timeline(&timeline))\n            .read_path_debug(true)\n            .root();\n\n        // Use last_record_lsn if no lsn is provided\n        let lsn = lsn.unwrap_or_else(|| timeline.get_last_record_lsn());\n\n        if touch {\n            json_response(StatusCode::OK, ())\n        } else {\n            let mut reconstruct_state = ValuesReconstructState::new_with_debug(IoConcurrency::sequential());\n            let page = timeline.debug_get(key.0, lsn, &ctx, &mut reconstruct_state).await?;\n            let response = GetPageResponse {\n                page,\n                layers_visited: reconstruct_state.get_layers_visited(),\n                delta_layers_visited: reconstruct_state.get_delta_layers_visited(),\n                records: reconstruct_state.debug_state.records.clone(),\n                img: reconstruct_state.debug_state.img.clone(),\n            };\n\n            json_response(StatusCode::OK, response)\n        }\n    }\n    .instrument(info_span!(\"timeline_debug_get\", tenant_id = %tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug(), %timeline_id))\n    .await\n}\n\nasync fn timeline_collect_keyspace(\n    request: Request<Body>,\n    _cancel: CancellationToken,\n) -> Result<Response<Body>, ApiError> {\n    let tenant_shard_id: TenantShardId = parse_request_param(&request, \"tenant_shard_id\")?;\n    let timeline_id: TimelineId = parse_request_param(&request, \"timeline_id\")?;\n    check_permission(&request, Some(tenant_shard_id.tenant_id))?;\n    let state = get_state(&request);\n\n    let at_lsn: Option<Lsn> = parse_query_param(&request, \"at_lsn\")?;\n\n    async {\n        let timeline = active_timeline_of_active_tenant(&state.tenant_manager, tenant_shard_id, timeline_id).await?;\n        let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download).with_scope_timeline(&timeline);\n        let at_lsn = at_lsn.unwrap_or_else(|| timeline.get_last_record_lsn());\n        let (dense_ks, sparse_ks) = timeline\n            .collect_keyspace(at_lsn, &ctx)\n            .await\n            .map_err(|e| ApiError::InternalServerError(e.into()))?;\n\n        // This API is currently used by pagebench. Pagebench will iterate all keys within the keyspace.\n        // Therefore, we split dense/sparse keys in this API.\n        let res = pageserver_api::models::partitioning::Partitioning { keys: dense_ks, sparse_keys: sparse_ks, at_lsn };\n\n        json_response(StatusCode::OK, res)\n    }\n    .instrument(info_span!(\"timeline_collect_keyspace\", tenant_id = %tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug(), %timeline_id))\n    .await\n}\n\nasync fn active_timeline_of_active_tenant(\n    tenant_manager: &TenantManager,\n    tenant_shard_id: TenantShardId,\n    timeline_id: TimelineId,\n) -> Result<Arc<Timeline>, ApiError> {\n    let tenant = tenant_manager.get_attached_tenant_shard(tenant_shard_id)?;\n\n    tenant.wait_to_become_active(ACTIVE_TENANT_TIMEOUT).await?;\n\n    Ok(tenant.get_timeline(timeline_id, true)?)\n}\n\nasync fn always_panic_handler(\n    req: Request<Body>,\n    _cancel: CancellationToken,\n) -> Result<Response<Body>, ApiError> {\n    // Deliberately cause a panic to exercise the panic hook registered via std::panic::set_hook().\n    // For pageserver, the relevant panic hook is `tracing_panic_hook` , and the `sentry` crate's wrapper around it.\n    // Use catch_unwind to ensure that tokio nor hyper are distracted by our panic.\n    let query = req.uri().query();\n    let _ = std::panic::catch_unwind(|| {\n        panic!(\"unconditional panic for testing panic hook integration; request query: {query:?}\")\n    });\n    json_response(StatusCode::NO_CONTENT, ())\n}\n\nasync fn disk_usage_eviction_run(\n    mut r: Request<Body>,\n    cancel: CancellationToken,\n) -> Result<Response<Body>, ApiError> {\n    check_permission(&r, None)?;\n\n    #[derive(Debug, Clone, Copy, serde::Serialize, serde::Deserialize)]\n    struct Config {\n        /// How many bytes to evict before reporting that pressure is relieved.\n        evict_bytes: u64,\n\n        #[serde(default)]\n        eviction_order: pageserver_api::config::EvictionOrder,\n    }\n\n    #[derive(Debug, Clone, Copy, serde::Serialize)]\n    struct Usage {\n        // remains unchanged after instantiation of the struct\n        evict_bytes: u64,\n        // updated by `add_available_bytes`\n        freed_bytes: u64,\n    }\n\n    impl crate::disk_usage_eviction_task::Usage for Usage {\n        fn has_pressure(&self) -> bool {\n            self.evict_bytes > self.freed_bytes\n        }\n\n        fn add_available_bytes(&mut self, bytes: u64) {\n            self.freed_bytes += bytes;\n        }\n    }\n\n    let config = json_request::<Config>(&mut r).await?;\n\n    let usage = Usage {\n        evict_bytes: config.evict_bytes,\n        freed_bytes: 0,\n    };\n\n    let state = get_state(&r);\n    let eviction_state = state.disk_usage_eviction_state.clone();\n\n    let res = crate::disk_usage_eviction_task::disk_usage_eviction_task_iteration_impl(\n        &eviction_state,\n        &state.remote_storage,\n        usage,\n        &state.tenant_manager,\n        config.eviction_order.into(),\n        &cancel,\n    )\n    .await;\n\n    info!(?res, \"disk_usage_eviction_task_iteration_impl finished\");\n\n    let res = res.map_err(ApiError::InternalServerError)?;\n\n    json_response(StatusCode::OK, res)\n}\n\nasync fn secondary_upload_handler(\n    request: Request<Body>,\n    _cancel: CancellationToken,\n) -> Result<Response<Body>, ApiError> {\n    let state = get_state(&request);\n    let tenant_shard_id: TenantShardId = parse_request_param(&request, \"tenant_shard_id\")?;\n    state\n        .secondary_controller\n        .upload_tenant(tenant_shard_id)\n        .await?;\n\n    json_response(StatusCode::OK, ())\n}\n\nasync fn tenant_scan_remote_handler(\n    request: Request<Body>,\n    cancel: CancellationToken,\n) -> Result<Response<Body>, ApiError> {\n    let state = get_state(&request);\n    let tenant_id: TenantId = parse_request_param(&request, \"tenant_id\")?;\n\n    let mut response = TenantScanRemoteStorageResponse::default();\n\n    let (shards, _other_keys) =\n        list_remote_tenant_shards(&state.remote_storage, tenant_id, cancel.clone())\n            .await\n            .map_err(|e| ApiError::InternalServerError(anyhow::anyhow!(e)))?;\n\n    for tenant_shard_id in shards {\n        let (timeline_ids, _other_keys) =\n            list_remote_timelines(&state.remote_storage, tenant_shard_id, cancel.clone())\n                .await\n                .map_err(|e| ApiError::InternalServerError(anyhow::anyhow!(e)))?;\n\n        let mut generation = Generation::none();\n        for timeline_id in timeline_ids {\n            match download_index_part(\n                &state.remote_storage,\n                &tenant_shard_id,\n                &timeline_id,\n                Generation::MAX,\n                &cancel,\n            )\n            .instrument(info_span!(\"download_index_part\",\n                         tenant_id=%tenant_shard_id.tenant_id,\n                         shard_id=%tenant_shard_id.shard_slug(),\n                         %timeline_id))\n            .await\n            {\n                Ok((index_part, index_generation, _index_mtime)) => {\n                    tracing::info!(\n                        \"Found timeline {tenant_shard_id}/{timeline_id} metadata (gen {index_generation:?}, {} layers, {} consistent LSN)\",\n                        index_part.layer_metadata.len(),\n                        index_part.metadata.disk_consistent_lsn()\n                    );\n                    generation = std::cmp::max(generation, index_generation);\n                }\n                Err(DownloadError::NotFound) => {\n                    // This is normal for tenants that were created with multiple shards: they have an unsharded path\n                    // containing the timeline's initdb tarball but no index.  Otherwise it is a bit strange.\n                    tracing::info!(\n                        \"Timeline path {tenant_shard_id}/{timeline_id} exists in remote storage but has no index, skipping\"\n                    );\n                    continue;\n                }\n                Err(e) => {\n                    return Err(ApiError::InternalServerError(anyhow::anyhow!(e)));\n                }\n            };\n        }\n\n        let result =\n            download_tenant_manifest(&state.remote_storage, &tenant_shard_id, generation, &cancel)\n                .instrument(info_span!(\"download_tenant_manifest\",\n                            tenant_id=%tenant_shard_id.tenant_id,\n                            shard_id=%tenant_shard_id.shard_slug()))\n                .await;\n        let stripe_size = match result {\n            Ok((manifest, _, _)) => manifest.stripe_size,\n            Err(DownloadError::NotFound) => None,\n            Err(err) => return Err(ApiError::InternalServerError(anyhow!(err))),\n        };\n\n        response.shards.push(TenantScanRemoteStorageShard {\n            tenant_shard_id,\n            generation: generation.into(),\n            stripe_size,\n        });\n    }\n\n    if response.shards.is_empty() {\n        return Err(ApiError::NotFound(\n            anyhow::anyhow!(\"No shards found for tenant ID {tenant_id}\").into(),\n        ));\n    }\n\n    json_response(StatusCode::OK, response)\n}\n\nasync fn secondary_download_handler(\n    request: Request<Body>,\n    _cancel: CancellationToken,\n) -> Result<Response<Body>, ApiError> {\n    let state = get_state(&request);\n    let tenant_shard_id: TenantShardId = parse_request_param(&request, \"tenant_shard_id\")?;\n    let wait = parse_query_param(&request, \"wait_ms\")?.map(Duration::from_millis);\n\n    // We don't need this to issue the download request, but:\n    // - it enables us to cleanly return 404 if we get a request for an absent shard\n    // - we will use this to provide status feedback in the response\n    let Some(secondary_tenant) = state\n        .tenant_manager\n        .get_secondary_tenant_shard(tenant_shard_id)\n    else {\n        return Err(ApiError::NotFound(\n            anyhow::anyhow!(\"Shard {} not found\", tenant_shard_id).into(),\n        ));\n    };\n\n    let timeout = wait.unwrap_or(Duration::MAX);\n\n    let result = tokio::time::timeout(\n        timeout,\n        state.secondary_controller.download_tenant(tenant_shard_id),\n    )\n    .await;\n\n    let progress = secondary_tenant.progress.lock().unwrap().clone();\n\n    let status = match result {\n        Ok(Ok(())) => {\n            if progress.layers_downloaded >= progress.layers_total {\n                // Download job ran to completion\n                StatusCode::OK\n            } else {\n                // Download dropped out without errors because it ran out of time budget\n                StatusCode::ACCEPTED\n            }\n        }\n        // Edge case: downloads aren't usually fallible: things like a missing heatmap are considered\n        // okay.  We could get an error here in the unlikely edge case that the tenant\n        // was detached between our check above and executing the download job.\n        Ok(Err(e)) => return Err(e.into()),\n        // A timeout is not an error: we have started the download, we're just not done\n        // yet.  The caller will get a response body indicating status.\n        Err(_) => StatusCode::ACCEPTED,\n    };\n\n    json_response(status, progress)\n}\n\nasync fn wait_lsn_handler(\n    mut request: Request<Body>,\n    cancel: CancellationToken,\n) -> Result<Response<Body>, ApiError> {\n    let tenant_shard_id: TenantShardId = parse_request_param(&request, \"tenant_shard_id\")?;\n    let wait_lsn_request: TenantWaitLsnRequest = json_request(&mut request).await?;\n\n    let state = get_state(&request);\n    let tenant = state\n        .tenant_manager\n        .get_attached_tenant_shard(tenant_shard_id)?;\n\n    let mut wait_futures = Vec::default();\n    for timeline in tenant.list_timelines() {\n        let Some(lsn) = wait_lsn_request.timelines.get(&timeline.timeline_id) else {\n            continue;\n        };\n\n        let fut = {\n            let timeline = timeline.clone();\n            let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Error);\n            async move {\n                timeline\n                    .wait_lsn(\n                        *lsn,\n                        WaitLsnWaiter::HttpEndpoint,\n                        WaitLsnTimeout::Custom(wait_lsn_request.timeout),\n                        &ctx,\n                    )\n                    .await\n            }\n        };\n        wait_futures.push(fut);\n    }\n\n    if wait_futures.is_empty() {\n        return json_response(StatusCode::NOT_FOUND, ());\n    }\n\n    let all_done = tokio::select! {\n        results = join_all(wait_futures) => {\n            results.iter().all(|res| res.is_ok())\n        },\n        _ = cancel.cancelled() => {\n            return Err(ApiError::Cancelled);\n        }\n    };\n\n    let status = if all_done {\n        StatusCode::OK\n    } else {\n        StatusCode::ACCEPTED\n    };\n\n    json_response(status, ())\n}\n\nasync fn secondary_status_handler(\n    request: Request<Body>,\n    _cancel: CancellationToken,\n) -> Result<Response<Body>, ApiError> {\n    let state = get_state(&request);\n    let tenant_shard_id: TenantShardId = parse_request_param(&request, \"tenant_shard_id\")?;\n\n    let Some(secondary_tenant) = state\n        .tenant_manager\n        .get_secondary_tenant_shard(tenant_shard_id)\n    else {\n        return Err(ApiError::NotFound(\n            anyhow::anyhow!(\"Shard {} not found\", tenant_shard_id).into(),\n        ));\n    };\n\n    let progress = secondary_tenant.progress.lock().unwrap().clone();\n\n    json_response(StatusCode::OK, progress)\n}\n\nasync fn handler_404(_: Request<Body>) -> Result<Response<Body>, ApiError> {\n    json_response(\n        StatusCode::NOT_FOUND,\n        HttpErrorBody::from_msg(\"page not found\".to_owned()),\n    )\n}\n\nasync fn post_tracing_event_handler(\n    mut r: Request<Body>,\n    _cancel: CancellationToken,\n) -> Result<Response<Body>, ApiError> {\n    #[derive(Debug, serde::Deserialize)]\n    #[serde(rename_all = \"lowercase\")]\n    enum Level {\n        Error,\n        Warn,\n        Info,\n        Debug,\n        Trace,\n    }\n    #[derive(Debug, serde::Deserialize)]\n    struct Request {\n        level: Level,\n        message: String,\n    }\n    let body: Request = json_request(&mut r)\n        .await\n        .map_err(|_| ApiError::BadRequest(anyhow::anyhow!(\"invalid JSON body\")))?;\n\n    match body.level {\n        Level::Error => tracing::error!(?body.message),\n        Level::Warn => tracing::warn!(?body.message),\n        Level::Info => tracing::info!(?body.message),\n        Level::Debug => tracing::debug!(?body.message),\n        Level::Trace => tracing::trace!(?body.message),\n    }\n\n    json_response(StatusCode::OK, ())\n}\n\nasync fn put_io_engine_handler(\n    mut r: Request<Body>,\n    _cancel: CancellationToken,\n) -> Result<Response<Body>, ApiError> {\n    check_permission(&r, None)?;\n    let kind: crate::virtual_file::IoEngineKind = json_request(&mut r).await?;\n    crate::virtual_file::io_engine::set(kind);\n    json_response(StatusCode::OK, ())\n}\n\nasync fn put_io_mode_handler(\n    mut r: Request<Body>,\n    _cancel: CancellationToken,\n) -> Result<Response<Body>, ApiError> {\n    check_permission(&r, None)?;\n    let mode: IoMode = json_request(&mut r).await?;\n    crate::virtual_file::set_io_mode(mode);\n    json_response(StatusCode::OK, ())\n}\n\n/// Polled by control plane.\n///\n/// See [`crate::utilization`].\nasync fn get_utilization(\n    r: Request<Body>,\n    _cancel: CancellationToken,\n) -> Result<Response<Body>, ApiError> {\n    fail::fail_point!(\"get-utilization-http-handler\", |_| {\n        Err(ApiError::ResourceUnavailable(\"failpoint\".into()))\n    });\n\n    // this probably could be completely public, but lets make that change later.\n    check_permission(&r, None)?;\n\n    let state = get_state(&r);\n    let mut g = state.latest_utilization.lock().await;\n\n    let regenerate_every = Duration::from_secs(1);\n    let still_valid = g\n        .as_ref()\n        .is_some_and(|(captured_at, _)| captured_at.elapsed() < regenerate_every);\n\n    // avoid needless statvfs calls even though those should be non-blocking fast.\n    // regenerate at most 1Hz to allow polling at any rate.\n    if !still_valid {\n        let path = state.conf.tenants_path();\n        let doc =\n            crate::utilization::regenerate(state.conf, path.as_std_path(), &state.tenant_manager)\n                .map_err(ApiError::InternalServerError)?;\n\n        let mut buf = Vec::new();\n        serde_json::to_writer(&mut buf, &doc)\n            .context(\"serialize\")\n            .map_err(ApiError::InternalServerError)?;\n\n        let body = bytes::Bytes::from(buf);\n\n        *g = Some((std::time::Instant::now(), body));\n    }\n\n    // hyper 0.14 doesn't yet have Response::clone so this is a bit of extra legwork\n    let cached = g.as_ref().expect(\"just set\").1.clone();\n\n    Response::builder()\n        .header(hyper::http::header::CONTENT_TYPE, \"application/json\")\n        // thought of using http date header, but that is second precision which does not give any\n        // debugging aid\n        .status(StatusCode::OK)\n        .body(hyper::Body::from(cached))\n        .context(\"build response\")\n        .map_err(ApiError::InternalServerError)\n}\n\n/// HADRON\nasync fn list_tenant_visible_size_handler(\n    request: Request<Body>,\n    _cancel: CancellationToken,\n) -> Result<Response<Body>, ApiError> {\n    check_permission(&request, None)?;\n    let state = get_state(&request);\n\n    let mut map = BTreeMap::new();\n    for (tenant_shard_id, slot) in state.tenant_manager.list() {\n        match slot {\n            TenantSlot::Attached(tenant) => {\n                let visible_size = tenant.get_visible_size();\n                map.insert(tenant_shard_id, visible_size);\n            }\n            TenantSlot::Secondary(_) | TenantSlot::InProgress(_) => {\n                continue;\n            }\n        }\n    }\n\n    json_response(StatusCode::OK, map)\n}\n\nasync fn list_aux_files(\n    mut request: Request<Body>,\n    _cancel: CancellationToken,\n) -> Result<Response<Body>, ApiError> {\n    let tenant_shard_id: TenantShardId = parse_request_param(&request, \"tenant_shard_id\")?;\n    let timeline_id: TimelineId = parse_request_param(&request, \"timeline_id\")?;\n    let body: ListAuxFilesRequest = json_request(&mut request).await?;\n    check_permission(&request, Some(tenant_shard_id.tenant_id))?;\n\n    let state = get_state(&request);\n\n    let timeline =\n        active_timeline_of_active_tenant(&state.tenant_manager, tenant_shard_id, timeline_id)\n            .await?;\n\n    let io_concurrency = IoConcurrency::spawn_from_conf(\n        state.conf.get_vectored_concurrent_io,\n        timeline.gate.enter().map_err(|_| ApiError::Cancelled)?,\n    );\n\n    let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download)\n        .with_scope_timeline(&timeline);\n    let files = timeline\n        .list_aux_files(body.lsn, &ctx, io_concurrency)\n        .await?;\n    json_response(StatusCode::OK, files)\n}\n\nasync fn perf_info(\n    request: Request<Body>,\n    _cancel: CancellationToken,\n) -> Result<Response<Body>, ApiError> {\n    let tenant_shard_id: TenantShardId = parse_request_param(&request, \"tenant_shard_id\")?;\n    let timeline_id: TimelineId = parse_request_param(&request, \"timeline_id\")?;\n    check_permission(&request, Some(tenant_shard_id.tenant_id))?;\n\n    let state = get_state(&request);\n\n    let timeline =\n        active_timeline_of_active_tenant(&state.tenant_manager, tenant_shard_id, timeline_id)\n            .await?;\n\n    let result = timeline.perf_info().await;\n\n    json_response(StatusCode::OK, result)\n}\n\nasync fn ingest_aux_files(\n    mut request: Request<Body>,\n    _cancel: CancellationToken,\n) -> Result<Response<Body>, ApiError> {\n    let tenant_shard_id: TenantShardId = parse_request_param(&request, \"tenant_shard_id\")?;\n    let timeline_id: TimelineId = parse_request_param(&request, \"timeline_id\")?;\n    let body: IngestAuxFilesRequest = json_request(&mut request).await?;\n    check_permission(&request, Some(tenant_shard_id.tenant_id))?;\n\n    let state = get_state(&request);\n\n    let timeline =\n        active_timeline_of_active_tenant(&state.tenant_manager, tenant_shard_id, timeline_id)\n            .await?;\n\n    let mut modification = timeline.begin_modification(\n        Lsn(timeline.get_last_record_lsn().0 + 8), /* advance LSN by 8 */\n    );\n    let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);\n    for (fname, content) in body.aux_files {\n        modification\n            .put_file(&fname, content.as_bytes(), &ctx)\n            .await\n            .map_err(|e| ApiError::InternalServerError(e.into()))?;\n    }\n    modification\n        .commit(&ctx)\n        .await\n        .map_err(ApiError::InternalServerError)?;\n\n    json_response(StatusCode::OK, ())\n}\n\n/// Report on the largest tenants on this pageserver, for the storage controller to identify\n/// candidates for splitting\nasync fn post_top_tenants(\n    mut r: Request<Body>,\n    _cancel: CancellationToken,\n) -> Result<Response<Body>, ApiError> {\n    check_permission(&r, None)?;\n    let request: TopTenantShardsRequest = json_request(&mut r).await?;\n    let state = get_state(&r);\n\n    fn get_size_metric(sizes: &TopTenantShardItem, order_by: &TenantSorting) -> u64 {\n        match order_by {\n            TenantSorting::ResidentSize => sizes.resident_size,\n            TenantSorting::MaxLogicalSize => sizes.max_logical_size,\n            TenantSorting::MaxLogicalSizePerShard => sizes.max_logical_size_per_shard,\n        }\n    }\n\n    #[derive(Eq, PartialEq)]\n    struct HeapItem {\n        metric: u64,\n        sizes: TopTenantShardItem,\n    }\n\n    impl PartialOrd for HeapItem {\n        fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {\n            Some(self.cmp(other))\n        }\n    }\n\n    /// Heap items have reverse ordering on their metric: this enables using BinaryHeap, which\n    /// supports popping the greatest item but not the smallest.\n    impl Ord for HeapItem {\n        fn cmp(&self, other: &Self) -> std::cmp::Ordering {\n            Reverse(self.metric).cmp(&Reverse(other.metric))\n        }\n    }\n\n    let mut top_n: BinaryHeap<HeapItem> = BinaryHeap::with_capacity(request.limit);\n\n    // FIXME: this is a lot of clones to take this tenant list\n    for (tenant_shard_id, tenant_slot) in state.tenant_manager.list() {\n        if let Some(shards_lt) = request.where_shards_lt {\n            // Ignore tenants which already have >= this many shards\n            if tenant_shard_id.shard_count >= shards_lt {\n                continue;\n            }\n        }\n\n        let sizes = match tenant_slot {\n            TenantSlot::Attached(tenant) => tenant.get_sizes(),\n            TenantSlot::Secondary(_) | TenantSlot::InProgress(_) => {\n                continue;\n            }\n        };\n        let metric = get_size_metric(&sizes, &request.order_by);\n\n        if let Some(gt) = request.where_gt {\n            // Ignore tenants whose metric is <= the lower size threshold, to do less sorting work\n            if metric <= gt {\n                continue;\n            }\n        };\n\n        match top_n.peek() {\n            None => {\n                // Top N list is empty: candidate becomes first member\n                top_n.push(HeapItem { metric, sizes });\n            }\n            Some(i) if i.metric > metric && top_n.len() < request.limit => {\n                // Lowest item in list is greater than our candidate, but we aren't at limit yet: push to end\n                top_n.push(HeapItem { metric, sizes });\n            }\n            Some(i) if i.metric > metric => {\n                // List is at limit and lowest value is greater than our candidate, drop it.\n            }\n            Some(_) => top_n.push(HeapItem { metric, sizes }),\n        }\n\n        while top_n.len() > request.limit {\n            top_n.pop();\n        }\n    }\n\n    json_response(\n        StatusCode::OK,\n        TopTenantShardsResponse {\n            shards: top_n.into_iter().map(|i| i.sizes).collect(),\n        },\n    )\n}\n\nasync fn put_tenant_timeline_import_basebackup(\n    request: Request<Body>,\n    _cancel: CancellationToken,\n) -> Result<Response<Body>, ApiError> {\n    let tenant_id: TenantId = parse_request_param(&request, \"tenant_id\")?;\n    let timeline_id: TimelineId = parse_request_param(&request, \"timeline_id\")?;\n    let base_lsn: Lsn = must_parse_query_param(&request, \"base_lsn\")?;\n    let end_lsn: Lsn = must_parse_query_param(&request, \"end_lsn\")?;\n    let pg_version: PgMajorVersion = must_parse_query_param(&request, \"pg_version\")?;\n\n    check_permission(&request, Some(tenant_id))?;\n\n    let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Warn);\n\n    let tenant_shard_id = TenantShardId::unsharded(tenant_id);\n\n    let span = info_span!(\"import_basebackup\",\n        tenant_id=%tenant_id, timeline_id=%timeline_id, shard_id=%tenant_shard_id.shard_slug(),\n        base_lsn=%base_lsn, end_lsn=%end_lsn, pg_version=%pg_version);\n    async move {\n        let state = get_state(&request);\n        let tenant = state\n            .tenant_manager\n            .get_attached_tenant_shard(tenant_shard_id)?;\n\n        let broker_client = state.broker_client.clone();\n\n        let mut body = StreamReader::new(\n            request\n                .into_body()\n                .map(|res| res.map_err(|error| std::io::Error::other(anyhow::anyhow!(error)))),\n        );\n\n        tenant.wait_to_become_active(ACTIVE_TENANT_TIMEOUT).await?;\n\n        let (timeline, timeline_ctx) = tenant\n            .create_empty_timeline(timeline_id, base_lsn, pg_version, &ctx)\n            .map_err(ApiError::InternalServerError)\n            .await?;\n\n        // TODO mark timeline as not ready until it reaches end_lsn.\n        // We might have some wal to import as well, and we should prevent compute\n        // from connecting before that and writing conflicting wal.\n        //\n        // This is not relevant for pageserver->pageserver migrations, since there's\n        // no wal to import. But should be fixed if we want to import from postgres.\n\n        // TODO leave clean state on error. For now you can use detach to clean\n        // up broken state from a failed import.\n\n        // Import basebackup provided via CopyData\n        info!(\"importing basebackup\");\n\n        timeline\n            .import_basebackup_from_tar(\n                tenant.clone(),\n                &mut body,\n                base_lsn,\n                broker_client,\n                &timeline_ctx,\n            )\n            .await\n            .map_err(ApiError::InternalServerError)?;\n\n        // Read the end of the tar archive.\n        read_tar_eof(body)\n            .await\n            .map_err(ApiError::InternalServerError)?;\n\n        // TODO check checksum\n        // Meanwhile you can verify client-side by taking fullbackup\n        // and checking that it matches in size with what was imported.\n        // It wouldn't work if base came from vanilla postgres though,\n        // since we discard some log files.\n\n        info!(\"done\");\n        json_response(StatusCode::OK, ())\n    }\n    .instrument(span)\n    .await\n}\n\nasync fn put_tenant_timeline_import_wal(\n    request: Request<Body>,\n    _cancel: CancellationToken,\n) -> Result<Response<Body>, ApiError> {\n    let tenant_id: TenantId = parse_request_param(&request, \"tenant_id\")?;\n    let timeline_id: TimelineId = parse_request_param(&request, \"timeline_id\")?;\n    let start_lsn: Lsn = must_parse_query_param(&request, \"start_lsn\")?;\n    let end_lsn: Lsn = must_parse_query_param(&request, \"end_lsn\")?;\n\n    check_permission(&request, Some(tenant_id))?;\n\n    let span = info_span!(\"import_wal\", tenant_id=%tenant_id, timeline_id=%timeline_id, start_lsn=%start_lsn, end_lsn=%end_lsn);\n    async move {\n        let state = get_state(&request);\n\n        let timeline = active_timeline_of_active_tenant(&state.tenant_manager, TenantShardId::unsharded(tenant_id), timeline_id).await?;\n        let ctx = RequestContextBuilder::new(TaskKind::MgmtRequest)\n            .download_behavior(DownloadBehavior::Warn)\n            .scope(context::Scope::new_timeline(&timeline))\n            .root();\n\n        let mut body = StreamReader::new(request.into_body().map(|res| {\n            res.map_err(|error| {\n                std::io::Error::other( anyhow::anyhow!(error))\n            })\n        }));\n\n        let last_record_lsn = timeline.get_last_record_lsn();\n        if last_record_lsn != start_lsn {\n            return Err(ApiError::InternalServerError(anyhow::anyhow!(\"Cannot import WAL from Lsn {start_lsn} because timeline does not start from the same lsn: {last_record_lsn}\")));\n        }\n\n        // TODO leave clean state on error. For now you can use detach to clean\n        // up broken state from a failed import.\n\n        // Import wal provided via CopyData\n        info!(\"importing wal\");\n        crate::import_datadir::import_wal_from_tar(&timeline, &mut body, start_lsn, end_lsn, &ctx).await.map_err(ApiError::InternalServerError)?;\n        info!(\"wal import complete\");\n\n        // Read the end of the tar archive.\n        read_tar_eof(body).await.map_err(ApiError::InternalServerError)?;\n\n        // TODO Does it make sense to overshoot?\n        if timeline.get_last_record_lsn() < end_lsn {\n            return Err(ApiError::InternalServerError(anyhow::anyhow!(\"Cannot import WAL from Lsn {start_lsn} because timeline does not start from the same lsn: {last_record_lsn}\")));\n        }\n\n        // Flush data to disk, then upload to s3. No need for a forced checkpoint.\n        // We only want to persist the data, and it doesn't matter if it's in the\n        // shape of deltas or images.\n        info!(\"flushing layers\");\n        timeline.freeze_and_flush().await.map_err(|e| match e {\n            tenant::timeline::FlushLayerError::Cancelled => ApiError::ShuttingDown,\n            other => ApiError::InternalServerError(anyhow::anyhow!(other)),\n        })?;\n\n        info!(\"done\");\n\n        json_response(StatusCode::OK, ())\n    }.instrument(span).await\n}\n\n/// Activate a timeline after its import has completed\n///\n/// The endpoint is idempotent and callers are expected to retry all\n/// errors until a successful response.\nasync fn activate_post_import_handler(\n    request: Request<Body>,\n    _cancel: CancellationToken,\n) -> Result<Response<Body>, ApiError> {\n    let tenant_shard_id: TenantShardId = parse_request_param(&request, \"tenant_shard_id\")?;\n    check_permission(&request, Some(tenant_shard_id.tenant_id))?;\n\n    let timeline_id: TimelineId = parse_request_param(&request, \"timeline_id\")?;\n    const DEFAULT_ACTIVATE_TIMEOUT: Duration = Duration::from_secs(1);\n    let activate_timeout = parse_query_param(&request, \"timeline_activate_timeout_ms\")?\n        .map(Duration::from_millis)\n        .unwrap_or(DEFAULT_ACTIVATE_TIMEOUT);\n\n    let span = info_span!(\n        \"activate_post_import_handler\",\n        tenant_id=%tenant_shard_id.tenant_id,\n        timeline_id=%timeline_id,\n        shard_id=%tenant_shard_id.shard_slug()\n    );\n\n    async move {\n        let state = get_state(&request);\n        let tenant = state\n            .tenant_manager\n            .get_attached_tenant_shard(tenant_shard_id)?;\n\n        tenant.wait_to_become_active(ACTIVE_TENANT_TIMEOUT).await?;\n\n        tenant.finalize_importing_timeline(timeline_id).await?;\n\n        match tenant.get_timeline(timeline_id, false) {\n            Ok(_timeline) => {\n                // Timeline is already visible. Reset not required: fall through.\n            }\n            Err(GetTimelineError::NotFound { .. }) => {\n                // This is crude: we reset the whole tenant such that the new timeline is detected\n                // and activated. We can come up with something more granular in the future.\n                //\n                // Note that we only reset the tenant if required: when the timeline is\n                // not present in [`Tenant::timelines`].\n                let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Warn);\n                state\n                    .tenant_manager\n                    .reset_tenant(tenant_shard_id, false, &ctx)\n                    .await\n                    .map_err(ApiError::InternalServerError)?;\n            }\n            Err(GetTimelineError::ShuttingDown) => {\n                return Err(ApiError::ShuttingDown);\n            }\n            Err(GetTimelineError::NotActive { .. }) => {\n                unreachable!(\"Called get_timeline with active_only=false\");\n            }\n        }\n\n        let timeline = tenant.get_timeline(timeline_id, false)?;\n\n        let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Warn)\n            .with_scope_timeline(&timeline);\n\n        let result =\n            tokio::time::timeout(activate_timeout, timeline.wait_to_become_active(&ctx)).await;\n        match result {\n            Ok(Ok(())) => {\n                // fallthrough\n            }\n            // Timeline reached some other state that's not active\n            // TODO(vlad): if the tenant is broken, return a permananet error\n            Ok(Err(_timeline_state)) => {\n                return Err(ApiError::InternalServerError(anyhow::anyhow!(\n                    \"Timeline activation failed\"\n                )));\n            }\n            // Activation timed out\n            Err(_) => {\n                return Err(ApiError::Timeout(\"Timeline activation timed out\".into()));\n            }\n        }\n\n        let timeline_info = build_timeline_info(\n            &timeline, false, // include_non_incremental_logical_size,\n            false, // force_await_initial_logical_size\n            false, // include_image_consistent_lsn\n            &ctx,\n        )\n        .await\n        .context(\"get local timeline info\")\n        .map_err(ApiError::InternalServerError)?;\n\n        json_response(StatusCode::OK, timeline_info)\n    }\n    .instrument(span)\n    .await\n}\n\n// [Hadron] Reset gauge metrics that are used to raised alerts. We need this API as a stop-gap measure to reset alerts\n// after we manually rectify situations such as local SSD data loss. We will eventually automate this.\nasync fn hadron_reset_alert_gauges(\n    request: Request<Body>,\n    _cancel: CancellationToken,\n) -> Result<Response<Body>, ApiError> {\n    check_permission(&request, None)?;\n    LOCAL_DATA_LOSS_SUSPECTED.set(0);\n    json_response(StatusCode::OK, ())\n}\n\n/// Read the end of a tar archive.\n///\n/// A tar archive normally ends with two consecutive blocks of zeros, 512 bytes each.\n/// `tokio_tar` already read the first such block. Read the second all-zeros block,\n/// and check that there is no more data after the EOF marker.\n///\n/// 'tar' command can also write extra blocks of zeros, up to a record\n/// size, controlled by the --record-size argument. Ignore them too.\nasync fn read_tar_eof(mut reader: (impl tokio::io::AsyncRead + Unpin)) -> anyhow::Result<()> {\n    use tokio::io::AsyncReadExt;\n    let mut buf = [0u8; 512];\n\n    // Read the all-zeros block, and verify it\n    let mut total_bytes = 0;\n    while total_bytes < 512 {\n        let nbytes = reader.read(&mut buf[total_bytes..]).await?;\n        total_bytes += nbytes;\n        if nbytes == 0 {\n            break;\n        }\n    }\n    if total_bytes < 512 {\n        anyhow::bail!(\"incomplete or invalid tar EOF marker\");\n    }\n    if !buf.iter().all(|&x| x == 0) {\n        anyhow::bail!(\"invalid tar EOF marker\");\n    }\n\n    // Drain any extra zero-blocks after the EOF marker\n    let mut trailing_bytes = 0;\n    let mut seen_nonzero_bytes = false;\n    loop {\n        let nbytes = reader.read(&mut buf).await?;\n        trailing_bytes += nbytes;\n        if !buf.iter().all(|&x| x == 0) {\n            seen_nonzero_bytes = true;\n        }\n        if nbytes == 0 {\n            break;\n        }\n    }\n    if seen_nonzero_bytes {\n        anyhow::bail!(\"unexpected non-zero bytes after the tar archive\");\n    }\n    if trailing_bytes % 512 != 0 {\n        anyhow::bail!(\n            \"unexpected number of zeros ({trailing_bytes}), not divisible by tar block size (512 bytes), after the tar archive\"\n        );\n    }\n    Ok(())\n}\n\nasync fn force_refresh_feature_flag(\n    request: Request<Body>,\n    _cancel: CancellationToken,\n) -> Result<Response<Body>, ApiError> {\n    let tenant_shard_id: TenantShardId = parse_request_param(&request, \"tenant_shard_id\")?;\n    check_permission(&request, Some(tenant_shard_id.tenant_id))?;\n\n    let state = get_state(&request);\n    let tenant = state\n        .tenant_manager\n        .get_attached_tenant_shard(tenant_shard_id)?;\n    tenant\n        .feature_resolver\n        .refresh_properties_and_flags(&tenant);\n    json_response(StatusCode::OK, ())\n}\n\nasync fn tenant_evaluate_feature_flag(\n    request: Request<Body>,\n    _cancel: CancellationToken,\n) -> Result<Response<Body>, ApiError> {\n    let tenant_shard_id: TenantShardId = parse_request_param(&request, \"tenant_shard_id\")?;\n    check_permission(&request, Some(tenant_shard_id.tenant_id))?;\n\n    let flag: String = parse_request_param(&request, \"flag_key\")?;\n    let as_type: Option<String> = parse_query_param(&request, \"as\")?;\n\n    let state = get_state(&request);\n\n    async {\n        let tenant = state\n            .tenant_manager\n            .get_attached_tenant_shard(tenant_shard_id)?;\n        // TODO: the properties we get here might be stale right after it is collected. But such races are rare (updated every 10s)\n        // and we don't need to worry about it for now.\n        let properties = tenant.feature_resolver.collect_properties();\n        if as_type.as_deref() == Some(\"boolean\") {\n            let result = tenant.feature_resolver.evaluate_boolean(&flag);\n            let result = result.map(|_| true).map_err(|e| e.to_string());\n            json_response(StatusCode::OK, json!({ \"result\": result, \"properties\": properties }))\n        } else if as_type.as_deref() == Some(\"multivariate\") {\n            let result = tenant.feature_resolver.evaluate_multivariate(&flag).map_err(|e| e.to_string());\n            json_response(StatusCode::OK, json!({ \"result\": result, \"properties\": properties }))\n        } else {\n            // Auto infer the type of the feature flag.\n            let is_boolean = tenant.feature_resolver.is_feature_flag_boolean(&flag).map_err(|e| ApiError::InternalServerError(anyhow::anyhow!(\"{e}\")))?;\n            if is_boolean {\n                let result = tenant.feature_resolver.evaluate_boolean(&flag);\n                let result = result.map(|_| true).map_err(|e| e.to_string());\n                json_response(StatusCode::OK, json!({ \"result\": result, \"properties\": properties }))\n            } else {\n                let result = tenant.feature_resolver.evaluate_multivariate(&flag).map_err(|e| e.to_string());\n                json_response(StatusCode::OK, json!({ \"result\": result, \"properties\": properties }))\n            }\n        }\n    }\n    .instrument(info_span!(\"tenant_evaluate_feature_flag\", tenant_id = %tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug()))\n    .await\n}\n\nasync fn force_override_feature_flag_for_testing_put(\n    request: Request<Body>,\n    _cancel: CancellationToken,\n) -> Result<Response<Body>, ApiError> {\n    check_permission(&request, None)?;\n\n    let flag: String = parse_request_param(&request, \"flag_key\")?;\n    let value: String = must_parse_query_param(&request, \"value\")?;\n    let state = get_state(&request);\n    state\n        .feature_resolver\n        .force_override_for_testing(&flag, Some(&value));\n    json_response(StatusCode::OK, ())\n}\n\nasync fn force_override_feature_flag_for_testing_delete(\n    request: Request<Body>,\n    _cancel: CancellationToken,\n) -> Result<Response<Body>, ApiError> {\n    check_permission(&request, None)?;\n\n    let flag: String = parse_request_param(&request, \"flag_key\")?;\n    let state = get_state(&request);\n    state\n        .feature_resolver\n        .force_override_for_testing(&flag, None);\n    json_response(StatusCode::OK, ())\n}\n\nasync fn update_feature_flag_spec(\n    mut request: Request<Body>,\n    _cancel: CancellationToken,\n) -> Result<Response<Body>, ApiError> {\n    check_permission(&request, None)?;\n    let body = json_request(&mut request).await?;\n    let state = get_state(&request);\n    state\n        .feature_resolver\n        .update(body)\n        .map_err(ApiError::InternalServerError)?;\n    json_response(StatusCode::OK, ())\n}\n\n/// Common functionality of all the HTTP API handlers.\n///\n/// - Adds a tracing span to each request (by `request_span`)\n/// - Logs the request depending on the request method (by `request_span`)\n/// - Logs the response if it was not successful (by `request_span`\n/// - Shields the handler function from async cancellations. Hyper can drop the handler\n///   Future if the connection to the client is lost, but most of the pageserver code is\n///   not async cancellation safe. This converts the dropped future into a graceful cancellation\n///   request with a CancellationToken.\nasync fn api_handler<R, H>(request: Request<Body>, handler: H) -> Result<Response<Body>, ApiError>\nwhere\n    R: std::future::Future<Output = Result<Response<Body>, ApiError>> + Send + 'static,\n    H: FnOnce(Request<Body>, CancellationToken) -> R + Send + Sync + 'static,\n{\n    if request.uri() != &\"/v1/failpoints\".parse::<Uri>().unwrap() {\n        fail::fail_point!(\"api-503\", |_| Err(ApiError::ResourceUnavailable(\n            \"failpoint\".into()\n        )));\n\n        fail::fail_point!(\"api-500\", |_| Err(ApiError::InternalServerError(\n            anyhow::anyhow!(\"failpoint\")\n        )));\n    }\n\n    // Spawn a new task to handle the request, to protect the handler from unexpected\n    // async cancellations. Most pageserver functions are not async cancellation safe.\n    // We arm a drop-guard, so that if Hyper drops the Future, we signal the task\n    // with the cancellation token.\n    let token = CancellationToken::new();\n    let cancel_guard = token.clone().drop_guard();\n    let result = request_span(request, move |r| async {\n        let handle = tokio::spawn(\n            async {\n                let token_cloned = token.clone();\n                let result = handler(r, token).await;\n                if token_cloned.is_cancelled() {\n                    // dropguard has executed: we will never turn this result into response.\n                    //\n                    // at least temporarily do {:?} logging; these failures are rare enough but\n                    // could hide difficult errors.\n                    match &result {\n                        Ok(response) => {\n                            let status = response.status();\n                            info!(%status, \"Cancelled request finished successfully\")\n                        }\n                        Err(e) => match e {\n                            ApiError::ShuttingDown | ApiError::ResourceUnavailable(_) => {\n                                // Don't log this at error severity: they are normal during lifecycle of tenants/process\n                                info!(\"Cancelled request aborted for shutdown\")\n                            }\n                            _ => {\n                                // Log these in a highly visible way, because we have no client to send the response to, but\n                                // would like to know that something went wrong.\n                                error!(\"Cancelled request finished with an error: {e:?}\")\n                            }\n                        },\n                    }\n                }\n                // only logging for cancelled panicked request handlers is the tracing_panic_hook,\n                // which should suffice.\n                //\n                // there is still a chance to lose the result due to race between\n                // returning from here and the actual connection closing happening\n                // before outer task gets to execute. leaving that up for #5815.\n                result\n            }\n            .in_current_span(),\n        );\n\n        match handle.await {\n            // TODO: never actually return Err from here, always Ok(...) so that we can log\n            // spanned errors. Call api_error_handler instead and return appropriate Body.\n            Ok(result) => result,\n            Err(e) => {\n                // The handler task panicked. We have a global panic handler that logs the\n                // panic with its backtrace, so no need to log that here. Only log a brief\n                // message to make it clear that we returned the error to the client.\n                error!(\"HTTP request handler task panicked: {e:#}\");\n\n                // Don't return an Error here, because then fallback error handler that was\n                // installed in make_router() will print the error. Instead, construct the\n                // HTTP error response and return that.\n                Ok(\n                    ApiError::InternalServerError(anyhow!(\"HTTP request handler task panicked\"))\n                        .into_response(),\n                )\n            }\n        }\n    })\n    .await;\n\n    cancel_guard.disarm();\n\n    result\n}\n\n/// Like api_handler, but returns an error response if the server is built without\n/// the 'testing' feature.\nasync fn testing_api_handler<R, H>(\n    desc: &str,\n    request: Request<Body>,\n    handler: H,\n) -> Result<Response<Body>, ApiError>\nwhere\n    R: std::future::Future<Output = Result<Response<Body>, ApiError>> + Send + 'static,\n    H: FnOnce(Request<Body>, CancellationToken) -> R + Send + Sync + 'static,\n{\n    if cfg!(feature = \"testing\") {\n        api_handler(request, handler).await\n    } else {\n        std::future::ready(Err(ApiError::BadRequest(anyhow!(\n            \"Cannot {desc} because pageserver was compiled without testing APIs\",\n        ))))\n        .await\n    }\n}\n\npub fn make_router(\n    state: Arc<State>,\n    launch_ts: &'static LaunchTimestamp,\n    auth: Option<Arc<SwappableJwtAuth>>,\n) -> anyhow::Result<RouterBuilder<hyper::Body, ApiError>> {\n    let spec = include_bytes!(\"openapi_spec.yml\");\n    let mut router = attach_openapi_ui(endpoint::make_router(), spec, \"/swagger.yml\", \"/v1/doc\");\n    if auth.is_some() {\n        router = router.middleware(auth_middleware(|request| {\n            let state = get_state(request);\n            if state.allowlist_routes.contains(&request.uri().path()) {\n                None\n            } else {\n                state.auth.as_deref()\n            }\n        }))\n    }\n\n    router = router.middleware(\n        endpoint::add_response_header_middleware(\n            \"PAGESERVER_LAUNCH_TIMESTAMP\",\n            &launch_ts.to_string(),\n        )\n        .expect(\"construct launch timestamp header middleware\"),\n    );\n\n    let force_metric_collection_on_scrape = state.conf.force_metric_collection_on_scrape;\n\n    let prometheus_metrics_handler_wrapper =\n        move |req| prometheus_metrics_handler(req, force_metric_collection_on_scrape);\n\n    Ok(router\n        .data(state)\n        .get(\"/metrics\", move |r| request_span(r, prometheus_metrics_handler_wrapper))\n        .get(\"/profile/cpu\", |r| request_span(r, profile_cpu_handler))\n        .get(\"/profile/heap\", |r| request_span(r, profile_heap_handler))\n        .get(\"/v1/status\", |r| api_handler(r, status_handler))\n        .put(\"/v1/failpoints\", |r| {\n            testing_api_handler(\"manage failpoints\", r, failpoints_handler)\n        })\n        .post(\"/v1/reload_auth_validation_keys\", |r| {\n            api_handler(r, reload_auth_validation_keys_handler)\n        })\n        .get(\"/v1/tenant\", |r| api_handler(r, tenant_list_handler))\n        .get(\"/v1/tenant/:tenant_shard_id\", |r| {\n            api_handler(r, tenant_status)\n        })\n        .delete(\"/v1/tenant/:tenant_shard_id\", |r| {\n            api_handler(r, tenant_delete_handler)\n        })\n        .get(\"/v1/tenant/:tenant_shard_id/synthetic_size\", |r| {\n            api_handler(r, tenant_size_handler)\n        })\n        .patch(\"/v1/tenant/config\", |r| {\n            api_handler(r, patch_tenant_config_handler)\n        })\n        .put(\"/v1/tenant/config\", |r| {\n            api_handler(r, update_tenant_config_handler)\n        })\n        .put(\"/v1/tenant/:tenant_shard_id/shard_split\", |r| {\n            api_handler(r, tenant_shard_split_handler)\n        })\n        .get(\"/v1/tenant/:tenant_shard_id/config\", |r| {\n            api_handler(r, get_tenant_config_handler)\n        })\n        .put(\"/v1/tenant/:tenant_shard_id/location_config\", |r| {\n            api_handler(r, put_tenant_location_config_handler)\n        })\n        .get(\"/v1/location_config\", |r| {\n            api_handler(r, list_location_config_handler)\n        })\n        .get(\"/v1/location_config/:tenant_shard_id\", |r| {\n            api_handler(r, get_location_config_handler)\n        })\n        .put(\n            \"/v1/tenant/:tenant_shard_id/time_travel_remote_storage\",\n            |r| api_handler(r, tenant_time_travel_remote_storage_handler),\n        )\n        .get(\"/v1/tenant/:tenant_shard_id/timeline\", |r| {\n            api_handler(r, timeline_list_handler)\n        })\n        .get(\"/v1/tenant/:tenant_shard_id/timeline_and_offloaded\", |r| {\n            api_handler(r, timeline_and_offloaded_list_handler)\n        })\n        .post(\"/v1/tenant/:tenant_shard_id/timeline\", |r| {\n            api_handler(r, timeline_create_handler)\n        })\n        .post(\"/v1/tenant/:tenant_shard_id/reset\", |r| {\n            api_handler(r, tenant_reset_handler)\n        })\n        .post(\n            \"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/preserve_initdb_archive\",\n            |r| api_handler(r, timeline_preserve_initdb_handler),\n        )\n        .put(\n            \"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/archival_config\",\n            |r| api_handler(r, timeline_archival_config_handler),\n        )\n        .get(\"/v1/tenant/:tenant_shard_id/timeline/:timeline_id\", |r| {\n            api_handler(r, timeline_detail_handler)\n        })\n        .get(\n            \"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/get_lsn_by_timestamp\",\n            |r| api_handler(r, get_lsn_by_timestamp_handler),\n        )\n        .get(\n            \"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/get_timestamp_of_lsn\",\n            |r| api_handler(r, get_timestamp_of_lsn_handler),\n        )\n        .post(\n            \"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/patch_index_part\",\n            |r| api_handler(r, timeline_patch_index_part_handler),\n        )\n        .post(\n            \"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/lsn_lease\",\n            |r| api_handler(r, lsn_lease_handler),\n        )\n        .put(\n            \"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/do_gc\",\n            |r| api_handler(r, timeline_gc_handler),\n        )\n        .get(\n            \"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/compact\",\n            |r| api_handler(r, timeline_compact_info_handler),\n        )\n        .put(\n            \"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/compact\",\n            |r| api_handler(r, timeline_compact_handler),\n        )\n        .delete(\n            \"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/compact\",\n            |r| api_handler(r, timeline_cancel_compact_handler),\n        )\n        .put(\n            \"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/offload\",\n            |r| testing_api_handler(\"attempt timeline offload\", r, timeline_offload_handler),\n        )\n        .put(\n            \"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/mark_invisible\",\n            |r| api_handler( r, timeline_mark_invisible_handler),\n        )\n        .put(\n            \"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/checkpoint\",\n            |r| testing_api_handler(\"run timeline checkpoint\", r, timeline_checkpoint_handler),\n        )\n        .post(\n            \"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/download_remote_layers\",\n            |r| api_handler(r, timeline_download_remote_layers_handler_post),\n        )\n        .get(\n            \"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/download_remote_layers\",\n            |r| api_handler(r, timeline_download_remote_layers_handler_get),\n        )\n        .put(\n            \"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/detach_ancestor\",\n            |r| api_handler(r, timeline_detach_ancestor_handler),\n        )\n        .delete(\"/v1/tenant/:tenant_shard_id/timeline/:timeline_id\", |r| {\n            api_handler(r, timeline_delete_handler)\n        })\n        .get(\n            \"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/layer\",\n            |r| api_handler(r, layer_map_info_handler),\n        )\n        .post(\n            \"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/download_heatmap_layers\",\n            |r| api_handler(r, timeline_download_heatmap_layers_handler),\n        )\n        .delete(\n            \"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/download_heatmap_layers\",\n            |r| api_handler(r, timeline_shutdown_download_heatmap_layers_handler),\n        )\n        .get(\n            \"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/layer/:layer_file_name\",\n            |r| api_handler(r, layer_download_handler),\n        )\n        .delete(\n            \"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/layer/:layer_file_name\",\n            |r| api_handler(r, evict_timeline_layer_handler),\n        )\n        .post(\n            \"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/layer/:layer_name/scan_disposable_keys\",\n            |r| testing_api_handler(\"timeline_layer_scan_disposable_keys\", r, timeline_layer_scan_disposable_keys),\n        )\n        .post(\n            \"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/block_gc\",\n            |r| api_handler(r, timeline_gc_blocking_handler),\n        )\n        .post(\n            \"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/unblock_gc\",\n            |r| api_handler(r, timeline_gc_unblocking_handler),\n        )\n        .get(\n            \"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/page_trace\",\n            |r| api_handler(r, timeline_page_trace_handler),\n        )\n        .post(\"/v1/tenant/:tenant_shard_id/heatmap_upload\", |r| {\n            api_handler(r, secondary_upload_handler)\n        })\n        .get(\"/v1/tenant/:tenant_id/scan_remote_storage\", |r| {\n            api_handler(r, tenant_scan_remote_handler)\n        })\n        .put(\"/v1/disk_usage_eviction/run\", |r| {\n            api_handler(r, disk_usage_eviction_run)\n        })\n        .put(\"/v1/deletion_queue/flush\", |r| {\n            api_handler(r, deletion_queue_flush)\n        })\n        .get(\"/v1/tenant/:tenant_shard_id/secondary/status\", |r| {\n            api_handler(r, secondary_status_handler)\n        })\n        .post(\"/v1/tenant/:tenant_shard_id/secondary/download\", |r| {\n            api_handler(r, secondary_download_handler)\n        })\n        .post(\"/v1/tenant/:tenant_shard_id/wait_lsn\", |r| {\n            api_handler(r, wait_lsn_handler)\n        })\n        .put(\"/v1/tenant/:tenant_shard_id/break\", |r| {\n            testing_api_handler(\"set tenant state to broken\", r, handle_tenant_break)\n        })\n        .get(\"/v1/panic\", |r| api_handler(r, always_panic_handler))\n        .post(\"/v1/tracing/event\", |r| {\n            testing_api_handler(\"emit a tracing event\", r, post_tracing_event_handler)\n        })\n        .get(\n            \"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/getpage\",\n            |r|  testing_api_handler(\"getpage@lsn\", r, getpage_at_lsn_handler),\n        )\n        .get(\n            \"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/touchpage\",\n            |r| api_handler(r, touchpage_at_lsn_handler),\n        )\n        .get(\n            \"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/keyspace\",\n            |r| api_handler(r, timeline_collect_keyspace),\n        )\n        .put(\"/v1/io_engine\", |r| api_handler(r, put_io_engine_handler))\n        .put(\"/v1/io_mode\", |r| api_handler(r, put_io_mode_handler))\n        .get(\"/v1/utilization\", |r| api_handler(r, get_utilization))\n        .get(\"/v1/list_tenant_visible_size\", |r| api_handler(r, list_tenant_visible_size_handler))\n        .post(\n            \"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/ingest_aux_files\",\n            |r| testing_api_handler(\"ingest_aux_files\", r, ingest_aux_files),\n        )\n        .post(\n            \"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/list_aux_files\",\n            |r| testing_api_handler(\"list_aux_files\", r, list_aux_files),\n        )\n        .post(\"/v1/top_tenants\", |r| api_handler(r, post_top_tenants))\n        .post(\n            \"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/perf_info\",\n            |r| testing_api_handler(\"perf_info\", r, perf_info),\n        )\n        .put(\n            \"/v1/tenant/:tenant_id/timeline/:timeline_id/import_basebackup\",\n            |r| api_handler(r, put_tenant_timeline_import_basebackup),\n        )\n        .put(\n            \"/v1/tenant/:tenant_id/timeline/:timeline_id/import_wal\",\n            |r| api_handler(r, put_tenant_timeline_import_wal),\n        )\n        .put(\n            \"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/activate_post_import\",\n            |r| api_handler(r, activate_post_import_handler),\n        )\n        .get(\"/v1/tenant/:tenant_shard_id/feature_flag/:flag_key\", |r| {\n            api_handler(r, tenant_evaluate_feature_flag)\n        })\n        .post(\"/v1/tenant/:tenant_shard_id/force_refresh_feature_flag\", |r| {\n            api_handler(r, force_refresh_feature_flag)\n        })\n        .put(\"/v1/feature_flag/:flag_key\", |r| {\n            testing_api_handler(\"force override feature flag - put\", r, force_override_feature_flag_for_testing_put)\n        })\n        .delete(\"/v1/feature_flag/:flag_key\", |r| {\n            testing_api_handler(\"force override feature flag - delete\", r, force_override_feature_flag_for_testing_delete)\n        })\n        .post(\"/v1/feature_flag_spec\", |r| {\n            api_handler(r, update_feature_flag_spec)\n        })\n        .post(\"/hadron-internal/reset_alert_gauges\", |r| {\n            api_handler(r, hadron_reset_alert_gauges)\n        })\n        .any(handler_404))\n}\n"
  },
  {
    "path": "pageserver/src/import_datadir.rs",
    "content": "//!\n//! Import data and WAL from a PostgreSQL data directory and WAL segments into\n//! a neon Timeline.\n//!\nuse std::path::{Path, PathBuf};\n\nuse anyhow::{Context, Result, bail, ensure};\nuse bytes::Bytes;\nuse camino::Utf8Path;\nuse futures::StreamExt;\nuse pageserver_api::key::rel_block_to_key;\nuse pageserver_api::reltag::{RelTag, SlruKind};\nuse postgres_ffi::relfile_utils::*;\nuse postgres_ffi::waldecoder::WalStreamDecoder;\nuse postgres_ffi::{\n    BLCKSZ, ControlFileData, DBState_DB_SHUTDOWNED, Oid, WAL_SEGMENT_SIZE, XLogFileName,\n    pg_constants,\n};\nuse tokio::io::{AsyncRead, AsyncReadExt};\nuse tokio_tar::Archive;\nuse tracing::*;\nuse utils::lsn::Lsn;\nuse wal_decoder::models::InterpretedWalRecord;\nuse walkdir::WalkDir;\n\nuse crate::context::RequestContext;\nuse crate::metrics::WAL_INGEST;\nuse crate::pgdatadir_mapping::*;\nuse crate::tenant::Timeline;\nuse crate::walingest::{WalIngest, WalIngestErrorKind};\n\n// Returns checkpoint LSN from controlfile\npub fn get_lsn_from_controlfile(path: &Utf8Path) -> Result<Lsn> {\n    // Read control file to extract the LSN\n    let controlfile_path = path.join(\"global\").join(\"pg_control\");\n    let controlfile_buf = std::fs::read(&controlfile_path)\n        .with_context(|| format!(\"reading controlfile: {controlfile_path}\"))?;\n    let controlfile = ControlFileData::decode(&controlfile_buf)?;\n    let lsn = controlfile.checkPoint;\n\n    Ok(Lsn(lsn))\n}\n\n///\n/// Import all relation data pages from local disk into the repository.\n///\n/// This is currently only used to import a cluster freshly created by initdb.\n/// The code that deals with the checkpoint would not work right if the\n/// cluster was not shut down cleanly.\npub async fn import_timeline_from_postgres_datadir(\n    tline: &Timeline,\n    pgdata_path: &Utf8Path,\n    pgdata_lsn: Lsn,\n    ctx: &RequestContext,\n) -> Result<()> {\n    let mut pg_control: Option<ControlFileData> = None;\n\n    // TODO this shoud be start_lsn, which is not necessarily equal to end_lsn (aka lsn)\n    // Then fishing out pg_control would be unnecessary\n    let mut modification = tline.begin_modification_for_import(pgdata_lsn);\n    modification.init_empty()?;\n\n    // Import all but pg_wal\n    let all_but_wal = WalkDir::new(pgdata_path)\n        .into_iter()\n        .filter_entry(|entry| !entry.path().ends_with(\"pg_wal\"));\n    for entry in all_but_wal {\n        let entry = entry?;\n        let metadata = entry.metadata().expect(\"error getting dir entry metadata\");\n        if metadata.is_file() {\n            let absolute_path = entry.path();\n            let relative_path = absolute_path.strip_prefix(pgdata_path)?;\n\n            let mut file = tokio::fs::File::open(absolute_path).await?;\n            let len = metadata.len() as usize;\n            if let Some(control_file) =\n                import_file(&mut modification, relative_path, &mut file, len, ctx).await?\n            {\n                pg_control = Some(control_file);\n            }\n            modification.flush(ctx).await?;\n        }\n    }\n\n    // We're done importing all the data files.\n    modification.commit(ctx).await?;\n\n    // We expect the Postgres server to be shut down cleanly.\n    let pg_control = pg_control.context(\"pg_control file not found\")?;\n    ensure!(\n        pg_control.state == DBState_DB_SHUTDOWNED,\n        \"Postgres cluster was not shut down cleanly\"\n    );\n    ensure!(\n        pg_control.checkPointCopy.redo == pgdata_lsn.0,\n        \"unexpected checkpoint REDO pointer\"\n    );\n\n    // Import WAL. This is needed even when starting from a shutdown checkpoint, because\n    // this reads the checkpoint record itself, advancing the tip of the timeline to\n    // *after* the checkpoint record. And crucially, it initializes the 'prev_lsn'.\n    import_wal(\n        &pgdata_path.join(\"pg_wal\"),\n        tline,\n        Lsn(pg_control.checkPointCopy.redo),\n        pgdata_lsn,\n        ctx,\n    )\n    .await?;\n\n    Ok(())\n}\n\n// subroutine of import_timeline_from_postgres_datadir(), to load one relation file.\nasync fn import_rel(\n    modification: &mut DatadirModification<'_>,\n    path: &Path,\n    spcoid: Oid,\n    dboid: Oid,\n    reader: &mut (impl AsyncRead + Unpin),\n    len: usize,\n    ctx: &RequestContext,\n) -> anyhow::Result<()> {\n    // Does it look like a relation file?\n    trace!(\"importing rel file {}\", path.display());\n\n    let filename = &path\n        .file_name()\n        .expect(\"missing rel filename\")\n        .to_string_lossy();\n    let (relnode, forknum, segno) = parse_relfilename(filename).map_err(|e| {\n        warn!(\"unrecognized file in postgres datadir: {:?} ({})\", path, e);\n        e\n    })?;\n\n    let mut buf: [u8; 8192] = [0u8; 8192];\n\n    ensure!(len % BLCKSZ as usize == 0);\n    let nblocks = len / BLCKSZ as usize;\n\n    let rel = RelTag {\n        spcnode: spcoid,\n        dbnode: dboid,\n        relnode,\n        forknum,\n    };\n\n    let mut blknum: u32 = segno * (1024 * 1024 * 1024 / BLCKSZ as u32);\n\n    // Call put_rel_creation for every segment of the relation,\n    // because there is no guarantee about the order in which we are processing segments.\n    // ignore \"relation already exists\" error\n    //\n    // FIXME: Keep track of which relations we've already created?\n    // https://github.com/neondatabase/neon/issues/3309\n    if let Err(e) = modification\n        .put_rel_creation(rel, nblocks as u32, ctx)\n        .await\n    {\n        match e.kind {\n            WalIngestErrorKind::RelationAlreadyExists(rel) => {\n                debug!(\"Relation {rel} already exists. We must be extending it.\")\n            }\n            _ => return Err(e.into()),\n        }\n    }\n\n    loop {\n        let r = reader.read_exact(&mut buf).await;\n        match r {\n            Ok(_) => {\n                let key = rel_block_to_key(rel, blknum);\n                if modification.tline.get_shard_identity().is_key_local(&key) {\n                    modification.put_rel_page_image(rel, blknum, Bytes::copy_from_slice(&buf))?;\n                }\n            }\n\n            // TODO: UnexpectedEof is expected\n            Err(err) => match err.kind() {\n                std::io::ErrorKind::UnexpectedEof => {\n                    // reached EOF. That's expected.\n                    let relative_blknum = blknum - segno * (1024 * 1024 * 1024 / BLCKSZ as u32);\n                    ensure!(relative_blknum == nblocks as u32, \"unexpected EOF\");\n                    break;\n                }\n                _ => {\n                    bail!(\"error reading file {}: {:#}\", path.display(), err);\n                }\n            },\n        };\n        blknum += 1;\n    }\n\n    // Update relation size\n    //\n    // If we process rel segments out of order,\n    // put_rel_extend will skip the update.\n    modification.put_rel_extend(rel, blknum, ctx).await?;\n\n    Ok(())\n}\n\n/// Import an SLRU segment file\n///\nasync fn import_slru(\n    modification: &mut DatadirModification<'_>,\n    slru: SlruKind,\n    path: &Path,\n    reader: &mut (impl AsyncRead + Unpin),\n    len: usize,\n    ctx: &RequestContext,\n) -> anyhow::Result<()> {\n    info!(\"importing slru file {path:?}\");\n\n    let mut buf: [u8; 8192] = [0u8; 8192];\n    let filename = &path\n        .file_name()\n        .with_context(|| format!(\"missing slru filename for path {path:?}\"))?\n        .to_string_lossy();\n    let segno = u32::from_str_radix(filename, 16)?;\n\n    ensure!(len % BLCKSZ as usize == 0); // we assume SLRU block size is the same as BLCKSZ\n    let nblocks = len / BLCKSZ as usize;\n\n    ensure!(nblocks <= pg_constants::SLRU_PAGES_PER_SEGMENT as usize);\n\n    modification\n        .put_slru_segment_creation(slru, segno, nblocks as u32, ctx)\n        .await?;\n\n    let mut rpageno = 0;\n    loop {\n        let r = reader.read_exact(&mut buf).await;\n        match r {\n            Ok(_) => {\n                modification.put_slru_page_image(\n                    slru,\n                    segno,\n                    rpageno,\n                    Bytes::copy_from_slice(&buf),\n                )?;\n            }\n\n            // TODO: UnexpectedEof is expected\n            Err(err) => match err.kind() {\n                std::io::ErrorKind::UnexpectedEof => {\n                    // reached EOF. That's expected.\n                    ensure!(rpageno == nblocks as u32, \"unexpected EOF\");\n                    break;\n                }\n                _ => {\n                    bail!(\"error reading file {}: {:#}\", path.display(), err);\n                }\n            },\n        };\n        rpageno += 1;\n    }\n\n    Ok(())\n}\n\n/// Scan PostgreSQL WAL files in given directory and load all records between\n/// 'startpoint' and 'endpoint' into the repository.\nasync fn import_wal(\n    walpath: &Utf8Path,\n    tline: &Timeline,\n    startpoint: Lsn,\n    endpoint: Lsn,\n    ctx: &RequestContext,\n) -> anyhow::Result<()> {\n    let mut waldecoder = WalStreamDecoder::new(startpoint, tline.pg_version);\n\n    let mut segno = startpoint.segment_number(WAL_SEGMENT_SIZE);\n    let mut offset = startpoint.segment_offset(WAL_SEGMENT_SIZE);\n    let mut last_lsn = startpoint;\n\n    let mut walingest = WalIngest::new(tline, startpoint, ctx).await?;\n\n    let shard = vec![*tline.get_shard_identity()];\n\n    while last_lsn <= endpoint {\n        // FIXME: assume postgresql tli 1 for now\n        let filename = XLogFileName(1, segno, WAL_SEGMENT_SIZE);\n        let mut buf = Vec::new();\n\n        // Read local file\n        let mut path = walpath.join(&filename);\n\n        // It could be as .partial\n        if !PathBuf::from(&path).exists() {\n            path = walpath.join(filename + \".partial\");\n        }\n\n        // Slurp the WAL file\n        let mut file = std::fs::File::open(&path)?;\n\n        if offset > 0 {\n            use std::io::Seek;\n            file.seek(std::io::SeekFrom::Start(offset as u64))?;\n        }\n\n        use std::io::Read;\n        let nread = file.read_to_end(&mut buf)?;\n        if nread != WAL_SEGMENT_SIZE - offset {\n            // Maybe allow this for .partial files?\n            error!(\"read only {} bytes from WAL file\", nread);\n        }\n\n        waldecoder.feed_bytes(&buf);\n\n        let mut nrecords = 0;\n        let mut modification = tline.begin_modification_for_import(last_lsn);\n        while last_lsn <= endpoint {\n            if let Some((lsn, recdata)) = waldecoder.poll_decode()? {\n                let interpreted = InterpretedWalRecord::from_bytes_filtered(\n                    recdata,\n                    &shard,\n                    lsn,\n                    tline.pg_version,\n                )?\n                .remove(tline.get_shard_identity())\n                .unwrap();\n\n                walingest\n                    .ingest_record(interpreted, &mut modification, ctx)\n                    .await?;\n                WAL_INGEST.records_committed.inc();\n\n                modification.commit(ctx).await?;\n                last_lsn = lsn;\n\n                nrecords += 1;\n\n                trace!(\"imported record at {} (end {})\", lsn, endpoint);\n            }\n        }\n\n        debug!(\"imported {} records up to {}\", nrecords, last_lsn);\n\n        segno += 1;\n        offset = 0;\n    }\n\n    if last_lsn != startpoint {\n        info!(\"reached end of WAL at {}\", last_lsn);\n    } else {\n        info!(\"no WAL to import at {}\", last_lsn);\n    }\n\n    Ok(())\n}\n\npub async fn import_basebackup_from_tar(\n    tline: &Timeline,\n    reader: &mut (impl AsyncRead + Send + Sync + Unpin),\n    base_lsn: Lsn,\n    ctx: &RequestContext,\n) -> Result<()> {\n    info!(\"importing base at {base_lsn}\");\n    let mut modification = tline.begin_modification_for_import(base_lsn);\n    modification.init_empty()?;\n\n    let mut pg_control: Option<ControlFileData> = None;\n\n    // Import base\n    let mut entries = Archive::new(reader).entries()?;\n    while let Some(base_tar_entry) = entries.next().await {\n        let mut entry = base_tar_entry?;\n        let header = entry.header();\n        let len = header.entry_size()? as usize;\n        let file_path = header.path()?.into_owned();\n\n        match header.entry_type() {\n            tokio_tar::EntryType::Regular => {\n                if let Some(res) =\n                    import_file(&mut modification, file_path.as_ref(), &mut entry, len, ctx).await?\n                {\n                    // We found the pg_control file.\n                    pg_control = Some(res);\n                }\n                modification.flush(ctx).await?;\n            }\n            tokio_tar::EntryType::Directory => {\n                debug!(\"directory {:?}\", file_path);\n            }\n            _ => {\n                bail!(\n                    \"entry {} in backup tar archive is of unexpected type: {:?}\",\n                    file_path.display(),\n                    header.entry_type()\n                );\n            }\n        }\n    }\n\n    // sanity check: ensure that pg_control is loaded\n    let _pg_control = pg_control.context(\"pg_control file not found\")?;\n\n    modification.commit(ctx).await?;\n    Ok(())\n}\n\npub async fn import_wal_from_tar(\n    tline: &Timeline,\n    reader: &mut (impl AsyncRead + Send + Sync + Unpin),\n    start_lsn: Lsn,\n    end_lsn: Lsn,\n    ctx: &RequestContext,\n) -> Result<()> {\n    // Set up walingest mutable state\n    let mut waldecoder = WalStreamDecoder::new(start_lsn, tline.pg_version);\n    let mut segno = start_lsn.segment_number(WAL_SEGMENT_SIZE);\n    let mut offset = start_lsn.segment_offset(WAL_SEGMENT_SIZE);\n    let mut last_lsn = start_lsn;\n    let mut walingest = WalIngest::new(tline, start_lsn, ctx).await?;\n    let shard = vec![*tline.get_shard_identity()];\n\n    // Ingest wal until end_lsn\n    info!(\"importing wal until {}\", end_lsn);\n    let mut pg_wal_tar = Archive::new(reader);\n    let mut pg_wal_entries = pg_wal_tar.entries()?;\n    while last_lsn <= end_lsn {\n        let bytes = {\n            let mut entry = pg_wal_entries\n                .next()\n                .await\n                .ok_or_else(|| anyhow::anyhow!(\"expected more wal\"))??;\n            let header = entry.header();\n            let file_path = header.path()?.into_owned();\n\n            match header.entry_type() {\n                tokio_tar::EntryType::Regular => {\n                    // FIXME: assume postgresql tli 1 for now\n                    let expected_filename = XLogFileName(1, segno, WAL_SEGMENT_SIZE);\n                    let file_name = file_path\n                        .file_name()\n                        .expect(\"missing wal filename\")\n                        .to_string_lossy();\n                    ensure!(expected_filename == file_name);\n\n                    debug!(\"processing wal file {:?}\", file_path);\n                    read_all_bytes(&mut entry).await?\n                }\n                tokio_tar::EntryType::Directory => {\n                    debug!(\"directory {:?}\", file_path);\n                    continue;\n                }\n                _ => {\n                    bail!(\n                        \"entry {} in WAL tar archive is of unexpected type: {:?}\",\n                        file_path.display(),\n                        header.entry_type()\n                    );\n                }\n            }\n        };\n\n        waldecoder.feed_bytes(&bytes[offset..]);\n\n        let mut modification = tline.begin_modification_for_import(last_lsn);\n        while last_lsn <= end_lsn {\n            if let Some((lsn, recdata)) = waldecoder.poll_decode()? {\n                let interpreted = InterpretedWalRecord::from_bytes_filtered(\n                    recdata,\n                    &shard,\n                    lsn,\n                    tline.pg_version,\n                )?\n                .remove(tline.get_shard_identity())\n                .unwrap();\n\n                walingest\n                    .ingest_record(interpreted, &mut modification, ctx)\n                    .await?;\n                modification.commit(ctx).await?;\n                last_lsn = lsn;\n\n                debug!(\"imported record at {} (end {})\", lsn, end_lsn);\n            }\n        }\n\n        debug!(\"imported records up to {}\", last_lsn);\n        segno += 1;\n        offset = 0;\n    }\n\n    if last_lsn != start_lsn {\n        info!(\"reached end of WAL at {}\", last_lsn);\n    } else {\n        info!(\"there was no WAL to import at {}\", last_lsn);\n    }\n\n    // Log any extra unused files\n    while let Some(e) = pg_wal_entries.next().await {\n        let entry = e?;\n        let header = entry.header();\n        let file_path = header.path()?.into_owned();\n        info!(\"skipping {:?}\", file_path);\n    }\n\n    Ok(())\n}\n\nasync fn import_file(\n    modification: &mut DatadirModification<'_>,\n    file_path: &Path,\n    reader: &mut (impl AsyncRead + Send + Sync + Unpin),\n    len: usize,\n    ctx: &RequestContext,\n) -> Result<Option<ControlFileData>> {\n    let file_name = match file_path.file_name() {\n        Some(name) => name.to_string_lossy(),\n        None => return Ok(None),\n    };\n\n    if file_name.starts_with('.') {\n        // tar archives on macOs, created without COPYFILE_DISABLE=1 env var\n        // will contain \"fork files\", skip them.\n        return Ok(None);\n    }\n\n    if file_path.starts_with(\"global\") {\n        let spcnode = postgres_ffi_types::constants::GLOBALTABLESPACE_OID;\n        let dbnode = 0;\n\n        match file_name.as_ref() {\n            \"pg_control\" => {\n                let bytes = read_all_bytes(reader).await?;\n\n                // Extract the checkpoint record and import it separately.\n                let pg_control = ControlFileData::decode(&bytes[..])?;\n                let checkpoint_bytes = pg_control.checkPointCopy.encode()?;\n                modification.put_checkpoint(checkpoint_bytes)?;\n                debug!(\"imported control file\");\n\n                // Import it as ControlFile\n                modification.put_control_file(bytes)?;\n                return Ok(Some(pg_control));\n            }\n            \"pg_filenode.map\" => {\n                let bytes = read_all_bytes(reader).await?;\n                modification\n                    .put_relmap_file(spcnode, dbnode, bytes, ctx)\n                    .await?;\n                debug!(\"imported relmap file\")\n            }\n            \"PG_VERSION\" => {\n                debug!(\"ignored PG_VERSION file\");\n            }\n            _ => {\n                import_rel(modification, file_path, spcnode, dbnode, reader, len, ctx).await?;\n                debug!(\"imported rel creation\");\n            }\n        }\n    } else if file_path.starts_with(\"base\") {\n        let spcnode = postgres_ffi_types::constants::DEFAULTTABLESPACE_OID;\n        let dbnode: u32 = file_path\n            .iter()\n            .nth(1)\n            .expect(\"invalid file path, expected dbnode\")\n            .to_string_lossy()\n            .parse()?;\n\n        match file_name.as_ref() {\n            \"pg_filenode.map\" => {\n                let bytes = read_all_bytes(reader).await?;\n                modification\n                    .put_relmap_file(spcnode, dbnode, bytes, ctx)\n                    .await?;\n                debug!(\"imported relmap file\")\n            }\n            \"PG_VERSION\" => {\n                debug!(\"ignored PG_VERSION file\");\n            }\n            _ => {\n                import_rel(modification, file_path, spcnode, dbnode, reader, len, ctx).await?;\n                debug!(\"imported rel creation\");\n            }\n        }\n    } else if file_path.starts_with(\"pg_xact\") {\n        let slru = SlruKind::Clog;\n\n        if modification.tline.tenant_shard_id.is_shard_zero() {\n            import_slru(modification, slru, file_path, reader, len, ctx).await?;\n            debug!(\"imported clog slru\");\n        }\n    } else if file_path.starts_with(\"pg_multixact/offsets\") {\n        let slru = SlruKind::MultiXactOffsets;\n\n        if modification.tline.tenant_shard_id.is_shard_zero() {\n            import_slru(modification, slru, file_path, reader, len, ctx).await?;\n            debug!(\"imported multixact offsets slru\");\n        }\n    } else if file_path.starts_with(\"pg_multixact/members\") {\n        let slru = SlruKind::MultiXactMembers;\n\n        if modification.tline.tenant_shard_id.is_shard_zero() {\n            import_slru(modification, slru, file_path, reader, len, ctx).await?;\n            debug!(\"imported multixact members slru\");\n        }\n    } else if file_path.starts_with(\"pg_twophase\") {\n        let bytes = read_all_bytes(reader).await?;\n\n        // In PostgreSQL v17, this is a 64-bit FullTransactionid. In previous versions,\n        // it's a 32-bit TransactionId, which fits in u64 anyway.\n        let xid = u64::from_str_radix(file_name.as_ref(), 16)?;\n        modification\n            .put_twophase_file(xid, Bytes::copy_from_slice(&bytes[..]), ctx)\n            .await?;\n        debug!(\"imported twophase file\");\n    } else if file_path.starts_with(\"pg_wal\") {\n        debug!(\"found wal file in base section. ignore it\");\n    } else if file_path.starts_with(\"zenith.signal\") || file_path.starts_with(\"neon.signal\") {\n        // Parse zenith signal file to set correct previous LSN\n        let bytes = read_all_bytes(reader).await?;\n        // neon.signal format is \"PREV LSN: prev_lsn\"\n        // TODO write serialization and deserialization in the same place.\n        let neon_signal = std::str::from_utf8(&bytes)?.trim();\n        let prev_lsn = match neon_signal {\n            \"PREV LSN: none\" => Lsn(0),\n            \"PREV LSN: invalid\" => Lsn(0),\n            other => {\n                let split = other.split(':').collect::<Vec<_>>();\n                split[1]\n                    .trim()\n                    .parse::<Lsn>()\n                    .context(\"can't parse neon.signal\")?\n            }\n        };\n\n        // neon.signal is not necessarily the last file, that we handle\n        // but it is ok to call `finish_write()`, because final `modification.commit()`\n        // will update lsn once more to the final one.\n        let writer = modification.tline.writer().await;\n        writer.finish_write(prev_lsn);\n\n        debug!(\"imported neon signal {}\", prev_lsn);\n    } else if file_path.starts_with(\"pg_tblspc\") {\n        // TODO Backups exported from neon won't have pg_tblspc, but we will need\n        // this to import arbitrary postgres databases.\n        bail!(\"Importing pg_tblspc is not implemented\");\n    } else {\n        debug!(\n            \"ignoring unrecognized file \\\"{}\\\" in tar archive\",\n            file_path.display()\n        );\n    }\n\n    Ok(None)\n}\n\nasync fn read_all_bytes(reader: &mut (impl AsyncRead + Unpin)) -> Result<Bytes> {\n    let mut buf: Vec<u8> = vec![];\n    reader.read_to_end(&mut buf).await?;\n    Ok(Bytes::from(buf))\n}\n"
  },
  {
    "path": "pageserver/src/l0_flush.rs",
    "content": "use std::num::NonZeroUsize;\nuse std::sync::Arc;\n\n#[derive(Debug, PartialEq, Eq, Clone)]\npub enum L0FlushConfig {\n    Direct { max_concurrency: NonZeroUsize },\n}\n\nimpl Default for L0FlushConfig {\n    fn default() -> Self {\n        Self::Direct {\n            // TODO: using num_cpus results in different peak memory usage on different instance types.\n            max_concurrency: NonZeroUsize::new(usize::max(1, num_cpus::get())).unwrap(),\n        }\n    }\n}\n\nimpl From<pageserver_api::models::L0FlushConfig> for L0FlushConfig {\n    fn from(config: pageserver_api::models::L0FlushConfig) -> Self {\n        match config {\n            pageserver_api::models::L0FlushConfig::Direct { max_concurrency } => {\n                Self::Direct { max_concurrency }\n            }\n        }\n    }\n}\n\n#[derive(Clone)]\npub struct L0FlushGlobalState(Arc<Inner>);\n\npub enum Inner {\n    Direct { semaphore: tokio::sync::Semaphore },\n}\n\nimpl L0FlushGlobalState {\n    pub fn new(config: L0FlushConfig) -> Self {\n        match config {\n            L0FlushConfig::Direct { max_concurrency } => {\n                let semaphore = tokio::sync::Semaphore::new(max_concurrency.get());\n                Self(Arc::new(Inner::Direct { semaphore }))\n            }\n        }\n    }\n\n    pub fn inner(&self) -> &Arc<Inner> {\n        &self.0\n    }\n}\n"
  },
  {
    "path": "pageserver/src/lib.rs",
    "content": "#![recursion_limit = \"300\"]\n#![deny(clippy::undocumented_unsafe_blocks)]\n\nmod auth;\npub mod basebackup;\npub mod basebackup_cache;\npub mod config;\npub mod consumption_metrics;\npub mod context;\npub mod controller_upcall_client;\npub mod deletion_queue;\npub mod disk_usage_eviction_task;\npub mod feature_resolver;\npub mod http;\npub mod import_datadir;\npub mod l0_flush;\n\nextern crate hyper0 as hyper;\n\nuse futures::StreamExt;\nuse futures::stream::FuturesUnordered;\npub use pageserver_api::keyspace;\nuse tokio_util::sync::CancellationToken;\nmod assert_u64_eq_usize;\npub mod aux_file;\npub mod metrics;\npub mod page_cache;\npub mod page_service;\npub mod pgdatadir_mapping;\npub mod span;\npub(crate) mod statvfs;\npub mod task_mgr;\npub mod tenant;\npub mod utilization;\npub mod virtual_file;\npub mod walingest;\npub mod walredo;\n\nuse camino::Utf8Path;\nuse deletion_queue::DeletionQueue;\nuse postgres_ffi::PgMajorVersion;\nuse tenant::mgr::{BackgroundPurges, TenantManager};\nuse tenant::secondary;\nuse tracing::{info, info_span};\n\n/// Current storage format version\n///\n/// This is embedded in the header of all the layer files.\n/// If you make any backwards-incompatible changes to the storage\n/// format, bump this!\n/// Note that TimelineMetadata uses its own version number to track\n/// backwards-compatible changes to the metadata format.\npub const STORAGE_FORMAT_VERSION: u16 = 3;\n\npub const DEFAULT_PG_VERSION: PgMajorVersion = PgMajorVersion::PG17;\n\n// Magic constants used to identify different kinds of files\npub const IMAGE_FILE_MAGIC: u16 = 0x5A60;\npub const DELTA_FILE_MAGIC: u16 = 0x5A61;\n\n// Target used for performance traces.\npub const PERF_TRACE_TARGET: &str = \"P\";\n\nstatic ZERO_PAGE: bytes::Bytes = bytes::Bytes::from_static(&[0u8; 8192]);\n\npub use crate::metrics::preinitialize_metrics;\n\npub struct CancellableTask {\n    pub task: tokio::task::JoinHandle<()>,\n    pub cancel: CancellationToken,\n}\npub struct HttpEndpointListener(pub CancellableTask);\npub struct HttpsEndpointListener(pub CancellableTask);\npub struct ConsumptionMetricsTasks(pub CancellableTask);\npub struct DiskUsageEvictionTask(pub CancellableTask);\n// HADRON\npub struct MetricsCollectionTask(pub CancellableTask);\n\nimpl CancellableTask {\n    pub async fn shutdown(self) {\n        self.cancel.cancel();\n        self.task.await.unwrap();\n    }\n}\n\n#[tracing::instrument(skip_all, fields(%exit_code))]\n#[allow(clippy::too_many_arguments)]\npub async fn shutdown_pageserver(\n    http_listener: HttpEndpointListener,\n    https_listener: Option<HttpsEndpointListener>,\n    page_service: page_service::Listener,\n    grpc_task: Option<CancellableTask>,\n    metrics_collection_task: MetricsCollectionTask,\n    consumption_metrics_worker: ConsumptionMetricsTasks,\n    disk_usage_eviction_task: Option<DiskUsageEvictionTask>,\n    tenant_manager: &TenantManager,\n    background_purges: BackgroundPurges,\n    mut deletion_queue: DeletionQueue,\n    secondary_controller_tasks: secondary::GlobalTasks,\n    exit_code: i32,\n) {\n    use std::time::Duration;\n\n    let started_at = std::time::Instant::now();\n\n    // If the orderly shutdown below takes too long, we still want to make\n    // sure that all walredo processes are killed and wait()ed on by us, not systemd.\n    //\n    // (Leftover walredo processes are the hypothesized trigger for the systemd freezes\n    //  that we keep seeing in prod => https://github.com/neondatabase/cloud/issues/11387.\n    //\n    // We use a thread instead of a tokio task because the background runtime is likely busy\n    // with the final flushing / uploads. This activity here has priority, and due to lack\n    // of scheduling priority feature sin the tokio scheduler, using a separate thread is\n    // an effective priority booster.\n    let walredo_extraordinary_shutdown_thread_span = {\n        let span = info_span!(parent: None, \"walredo_extraordinary_shutdown_thread\");\n        span.follows_from(tracing::Span::current());\n        span\n    };\n    let walredo_extraordinary_shutdown_thread_cancel = CancellationToken::new();\n    let walredo_extraordinary_shutdown_thread = std::thread::spawn({\n        let walredo_extraordinary_shutdown_thread_cancel =\n            walredo_extraordinary_shutdown_thread_cancel.clone();\n        move || {\n            let rt = tokio::runtime::Builder::new_current_thread()\n                .enable_all()\n                .build()\n                .unwrap();\n            let _entered = rt.enter();\n            let _entered = walredo_extraordinary_shutdown_thread_span.enter();\n            if let Ok(()) = rt.block_on(tokio::time::timeout(\n                Duration::from_secs(8),\n                walredo_extraordinary_shutdown_thread_cancel.cancelled(),\n            )) {\n                info!(\"cancellation requested\");\n                return;\n            }\n            let managers = tenant::WALREDO_MANAGERS\n                .lock()\n                .unwrap()\n                // prevents new walredo managers from being inserted\n                .take()\n                .expect(\"only we take()\");\n            // Use FuturesUnordered to get in queue early for each manager's\n            // heavier_once_cell semaphore wait list.\n            // Also, for idle tenants that for some reason haven't\n            // shut down yet, it's quite likely that we're not going\n            // to get Poll::Pending once.\n            let mut futs: FuturesUnordered<_> = managers\n                .into_iter()\n                .filter_map(|(_, mgr)| mgr.upgrade())\n                .map(|mgr| async move { tokio::task::unconstrained(mgr.shutdown()).await })\n                .collect();\n            info!(count=%futs.len(), \"built FuturesUnordered\");\n            let mut last_log_at = std::time::Instant::now();\n            #[derive(Debug, Default)]\n            struct Results {\n                initiated: u64,\n                already: u64,\n            }\n            let mut results = Results::default();\n            while let Some(we_initiated) = rt.block_on(futs.next()) {\n                if we_initiated {\n                    results.initiated += 1;\n                } else {\n                    results.already += 1;\n                }\n                if last_log_at.elapsed() > Duration::from_millis(100) {\n                    info!(remaining=%futs.len(), ?results, \"progress\");\n                    last_log_at = std::time::Instant::now();\n                }\n            }\n            info!(?results, \"done\");\n        }\n    });\n\n    // Shut down the libpq endpoint task. This prevents new connections from\n    // being accepted.\n    let remaining_connections = timed(\n        page_service.stop_accepting(),\n        \"shutdown LibpqEndpointListener\",\n        Duration::from_secs(1),\n    )\n    .await;\n\n    // Shut down the gRPC server task, including request handlers.\n    if let Some(grpc_task) = grpc_task {\n        timed(\n            grpc_task.shutdown(),\n            \"shutdown gRPC PageRequestHandler\",\n            Duration::from_secs(3),\n        )\n        .await;\n    }\n\n    // Shut down all the tenants. This flushes everything to disk and kills\n    // the checkpoint and GC tasks.\n    timed(\n        tenant_manager.shutdown(),\n        \"shutdown all tenants\",\n        Duration::from_secs(5),\n    )\n    .await;\n\n    // Shut down any page service tasks: any in-progress work for particular timelines or tenants\n    // should already have been canclled via mgr::shutdown_all_tenants\n    timed(\n        remaining_connections.shutdown(),\n        \"shutdown PageRequestHandlers\",\n        Duration::from_secs(1),\n    )\n    .await;\n\n    // Best effort to persist any outstanding deletions, to avoid leaking objects\n    deletion_queue.shutdown(Duration::from_secs(5)).await;\n\n    // HADRON\n    timed(\n        metrics_collection_task.0.shutdown(),\n        \"shutdown metrics collections metrics\",\n        Duration::from_secs(1),\n    )\n    .await;\n\n    timed(\n        consumption_metrics_worker.0.shutdown(),\n        \"shutdown consumption metrics\",\n        Duration::from_secs(1),\n    )\n    .await;\n\n    timed(\n        futures::future::OptionFuture::from(disk_usage_eviction_task.map(|t| t.0.shutdown())),\n        \"shutdown disk usage eviction\",\n        Duration::from_secs(1),\n    )\n    .await;\n\n    timed(\n        background_purges.shutdown(),\n        \"shutdown background purges\",\n        Duration::from_secs(1),\n    )\n    .await;\n\n    if let Some(https_listener) = https_listener {\n        timed(\n            https_listener.0.shutdown(),\n            \"shutdown https\",\n            Duration::from_secs(1),\n        )\n        .await;\n    }\n\n    // Shut down the HTTP endpoint last, so that you can still check the server's\n    // status while it's shutting down.\n    // FIXME: We should probably stop accepting commands like attach/detach earlier.\n    timed(\n        http_listener.0.shutdown(),\n        \"shutdown http\",\n        Duration::from_secs(1),\n    )\n    .await;\n\n    timed(\n        secondary_controller_tasks.wait(), // cancellation happened in caller\n        \"secondary controller wait\",\n        Duration::from_secs(1),\n    )\n    .await;\n\n    // There should be nothing left, but let's be sure\n    timed(\n        task_mgr::shutdown_tasks(None, None, None),\n        \"shutdown leftovers\",\n        Duration::from_secs(1),\n    )\n    .await;\n\n    info!(\"cancel & join walredo_extraordinary_shutdown_thread\");\n    walredo_extraordinary_shutdown_thread_cancel.cancel();\n    walredo_extraordinary_shutdown_thread.join().unwrap();\n    info!(\"walredo_extraordinary_shutdown_thread done\");\n\n    info!(\n        elapsed_ms = started_at.elapsed().as_millis(),\n        \"Shut down successfully completed\"\n    );\n    std::process::exit(exit_code);\n}\n\n/// Per-tenant configuration file.\n/// Full path: `tenants/<tenant_id>/config-v1`.\npub(crate) const TENANT_LOCATION_CONFIG_NAME: &str = \"config-v1\";\n\n/// Per-tenant copy of their remote heatmap, downloaded into the local\n/// tenant path while in secondary mode.\npub(crate) const TENANT_HEATMAP_BASENAME: &str = \"heatmap-v1.json\";\n\n/// A suffix used for various temporary files. Any temporary files found in the\n/// data directory at pageserver startup can be automatically removed.\npub(crate) const TEMP_FILE_SUFFIX: &str = \"___temp\";\n\npub fn is_temporary(path: &Utf8Path) -> bool {\n    match path.file_name() {\n        Some(name) => name.ends_with(TEMP_FILE_SUFFIX),\n        None => false,\n    }\n}\n\n/// During pageserver startup, we need to order operations not to exhaust tokio worker threads by\n/// blocking.\n///\n/// The instances of this value exist only during startup, otherwise `None` is provided, meaning no\n/// delaying is needed.\n#[derive(Clone)]\npub struct InitializationOrder {\n    /// Each initial tenant load task carries this until it is done loading timelines from remote storage\n    pub initial_tenant_load_remote: Option<utils::completion::Completion>,\n\n    /// Each initial tenant load task carries this until completion.\n    pub initial_tenant_load: Option<utils::completion::Completion>,\n\n    /// Barrier for when we can start any background jobs.\n    ///\n    /// This can be broken up later on, but right now there is just one class of a background job.\n    pub background_jobs_can_start: utils::completion::Barrier,\n}\n\n/// Time the future with a warning when it exceeds a threshold.\nasync fn timed<Fut: std::future::Future>(\n    fut: Fut,\n    name: &str,\n    warn_at: std::time::Duration,\n) -> <Fut as std::future::Future>::Output {\n    let started = std::time::Instant::now();\n\n    let mut fut = std::pin::pin!(fut);\n\n    match tokio::time::timeout(warn_at, &mut fut).await {\n        Ok(ret) => {\n            tracing::info!(\n                stage = name,\n                elapsed_ms = started.elapsed().as_millis(),\n                \"completed\"\n            );\n            ret\n        }\n        Err(_) => {\n            tracing::info!(\n                stage = name,\n                elapsed_ms = started.elapsed().as_millis(),\n                \"still waiting, taking longer than expected...\"\n            );\n\n            let ret = fut.await;\n\n            // this has a global allowed_errors\n            tracing::warn!(\n                stage = name,\n                elapsed_ms = started.elapsed().as_millis(),\n                \"completed, took longer than expected\"\n            );\n\n            ret\n        }\n    }\n}\n\n/// Like [`timed`], but the warning timeout only starts after `cancel` has been cancelled.\nasync fn timed_after_cancellation<Fut: std::future::Future>(\n    fut: Fut,\n    name: &str,\n    warn_at: std::time::Duration,\n    cancel: &CancellationToken,\n) -> <Fut as std::future::Future>::Output {\n    let mut fut = std::pin::pin!(fut);\n\n    tokio::select! {\n        _ = cancel.cancelled() => {\n            timed(fut, name, warn_at).await\n        }\n        ret = &mut fut => {\n            ret\n        }\n    }\n}\n\n#[cfg(test)]\nmod timed_tests {\n    use std::time::Duration;\n\n    use super::timed;\n\n    #[tokio::test]\n    async fn timed_completes_when_inner_future_completes() {\n        // A future that completes on time should have its result returned\n        let r1 = timed(\n            async move {\n                tokio::time::sleep(Duration::from_millis(10)).await;\n                123\n            },\n            \"test 1\",\n            Duration::from_millis(50),\n        )\n        .await;\n        assert_eq!(r1, 123);\n\n        // A future that completes too slowly should also have its result returned\n        let r1 = timed(\n            async move {\n                tokio::time::sleep(Duration::from_millis(50)).await;\n                456\n            },\n            \"test 1\",\n            Duration::from_millis(10),\n        )\n        .await;\n        assert_eq!(r1, 456);\n    }\n}\n"
  },
  {
    "path": "pageserver/src/metrics.rs",
    "content": "use std::cell::Cell;\nuse std::collections::HashMap;\nuse std::num::NonZeroUsize;\nuse std::os::fd::RawFd;\nuse std::sync::atomic::AtomicU64;\nuse std::sync::{Arc, Mutex};\nuse std::time::{Duration, Instant};\n\nuse enum_map::{Enum as _, EnumMap};\nuse futures::Future;\nuse metrics::{\n    Counter, CounterVec, GaugeVec, Histogram, HistogramVec, IntCounter, IntCounterPair,\n    IntCounterPairVec, IntCounterVec, IntGauge, IntGaugeVec, UIntGauge, UIntGaugeVec,\n    register_counter_vec, register_gauge_vec, register_histogram, register_histogram_vec,\n    register_int_counter, register_int_counter_pair_vec, register_int_counter_vec,\n    register_int_gauge, register_int_gauge_vec, register_uint_gauge, register_uint_gauge_vec,\n};\nuse once_cell::sync::Lazy;\nuse pageserver_api::config::defaults::DEFAULT_MAX_GET_VECTORED_KEYS;\nuse pageserver_api::config::{\n    PageServicePipeliningConfig, PageServicePipeliningConfigPipelined,\n    PageServiceProtocolPipelinedBatchingStrategy, PageServiceProtocolPipelinedExecutionStrategy,\n};\nuse pageserver_api::models::InMemoryLayerInfo;\nuse pageserver_api::shard::TenantShardId;\nuse postgres_backend::{QueryError, is_expected_io_error};\nuse pq_proto::framed::ConnectionError;\nuse strum::{EnumCount, IntoEnumIterator as _, VariantNames};\nuse strum_macros::{IntoStaticStr, VariantNames};\nuse utils::id::TimelineId;\n\nuse crate::config;\nuse crate::config::PageServerConf;\nuse crate::context::{PageContentKind, RequestContext};\nuse crate::pgdatadir_mapping::DatadirModificationStats;\nuse crate::task_mgr::TaskKind;\nuse crate::tenant::layer_map::LayerMap;\nuse crate::tenant::mgr::TenantSlot;\nuse crate::tenant::storage_layer::{InMemoryLayer, PersistentLayerDesc};\nuse crate::tenant::tasks::BackgroundLoopKind;\nuse crate::tenant::throttle::ThrottleResult;\n\n/// Prometheus histogram buckets (in seconds) for operations in the critical\n/// path. In other words, operations that directly affect that latency of user\n/// queries.\n///\n/// The buckets capture the majority of latencies in the microsecond and\n/// millisecond range but also extend far enough up to distinguish \"bad\" from\n/// \"really bad\".\nconst CRITICAL_OP_BUCKETS: &[f64] = &[\n    0.000_001, 0.000_010, 0.000_100, // 1 us, 10 us, 100 us\n    0.001_000, 0.010_000, 0.100_000, // 1 ms, 10 ms, 100 ms\n    1.0, 10.0, 100.0, // 1 s, 10 s, 100 s\n];\n\n// Metrics collected on operations on the storage repository.\n#[derive(Debug, VariantNames, IntoStaticStr)]\n#[strum(serialize_all = \"kebab_case\")]\npub(crate) enum StorageTimeOperation {\n    #[strum(serialize = \"layer flush\")]\n    LayerFlush,\n\n    #[strum(serialize = \"layer flush delay\")]\n    LayerFlushDelay,\n\n    #[strum(serialize = \"compact\")]\n    Compact,\n\n    #[strum(serialize = \"create images\")]\n    CreateImages,\n\n    #[strum(serialize = \"logical size\")]\n    LogicalSize,\n\n    #[strum(serialize = \"imitate logical size\")]\n    ImitateLogicalSize,\n\n    #[strum(serialize = \"load layer map\")]\n    LoadLayerMap,\n\n    #[strum(serialize = \"gc\")]\n    Gc,\n\n    #[strum(serialize = \"find gc cutoffs\")]\n    FindGcCutoffs,\n}\n\npub(crate) static STORAGE_TIME_SUM_PER_TIMELINE: Lazy<CounterVec> = Lazy::new(|| {\n    register_counter_vec!(\n        \"pageserver_storage_operations_seconds_sum\",\n        \"Total time spent on storage operations with operation, tenant and timeline dimensions\",\n        &[\"operation\", \"tenant_id\", \"shard_id\", \"timeline_id\"],\n    )\n    .expect(\"failed to define a metric\")\n});\n\npub(crate) static STORAGE_TIME_COUNT_PER_TIMELINE: Lazy<IntCounterVec> = Lazy::new(|| {\n    register_int_counter_vec!(\n        \"pageserver_storage_operations_seconds_count\",\n        \"Count of storage operations with operation, tenant and timeline dimensions\",\n        &[\"operation\", \"tenant_id\", \"shard_id\", \"timeline_id\"],\n    )\n    .expect(\"failed to define a metric\")\n});\n\n/* BEGIN_HADRON */\npub(crate) static STORAGE_ACTIVE_COUNT_PER_TIMELINE: Lazy<IntGaugeVec> = Lazy::new(|| {\n    register_int_gauge_vec!(\n        \"pageserver_active_storage_operations_count\",\n        \"Count of active storage operations with operation, tenant and timeline dimensions\",\n        &[\"operation\", \"tenant_id\", \"shard_id\", \"timeline_id\"],\n    )\n    .expect(\"failed to define a metric\")\n});\n/*END_HADRON */\n\n// Buckets for background operations like compaction, GC, size calculation\nconst STORAGE_OP_BUCKETS: &[f64] = &[0.010, 0.100, 1.0, 10.0, 100.0, 1000.0];\n\npub(crate) static STORAGE_TIME_GLOBAL: Lazy<HistogramVec> = Lazy::new(|| {\n    register_histogram_vec!(\n        \"pageserver_storage_operations_seconds_global\",\n        \"Time spent on storage operations\",\n        &[\"operation\"],\n        STORAGE_OP_BUCKETS.into(),\n    )\n    .expect(\"failed to define a metric\")\n});\n\n/// Measures layers visited per read (i.e. read amplification).\n///\n/// NB: for a batch, we count all visited layers towards each read. While the cost of layer visits\n/// are amortized across the batch, and some layers may not intersect with a given key, each visited\n/// layer contributes directly to the observed latency for every read in the batch, which is what we\n/// care about.\npub(crate) static LAYERS_PER_READ: Lazy<HistogramVec> = Lazy::new(|| {\n    register_histogram_vec!(\n        \"pageserver_layers_per_read\",\n        \"Layers visited to serve a single read (read amplification). In a batch, all visited layers count towards every read.\",\n        &[\"tenant_id\", \"shard_id\", \"timeline_id\"],\n        // Low resolution to reduce cardinality.\n        vec![4.0, 8.0, 16.0, 32.0, 64.0, 128.0, 256.0],\n    )\n    .expect(\"failed to define a metric\")\n});\n\npub(crate) static LAYERS_PER_READ_GLOBAL: Lazy<Histogram> = Lazy::new(|| {\n    register_histogram!(\n        \"pageserver_layers_per_read_global\",\n        \"Layers visited to serve a single read (read amplification). In a batch, all visited layers count towards every read.\",\n        vec![1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0, 128.0, 256.0, 512.0, 1024.0],\n    )\n    .expect(\"failed to define a metric\")\n});\n\npub(crate) static LAYERS_PER_READ_BATCH_GLOBAL: Lazy<Histogram> = Lazy::new(|| {\n    register_histogram!(\n        \"pageserver_layers_per_read_batch_global\",\n        \"Layers visited to serve a single read batch (read amplification), regardless of number of reads.\",\n        vec![\n            1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0, 128.0, 256.0, 512.0, 1024.0\n        ],\n    )\n    .expect(\"failed to define a metric\")\n});\n\npub(crate) static LAYERS_PER_READ_AMORTIZED_GLOBAL: Lazy<Histogram> = Lazy::new(|| {\n    register_histogram!(\n        \"pageserver_layers_per_read_amortized_global\",\n        \"Layers visited to serve a single read (read amplification). Amortized across a batch: \\\n            all visited layers are divided by number of reads.\",\n        vec![\n            1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0, 128.0, 256.0, 512.0, 1024.0\n        ],\n    )\n    .expect(\"failed to define a metric\")\n});\n\npub(crate) static DELTAS_PER_READ_GLOBAL: Lazy<Histogram> = Lazy::new(|| {\n    // We expect this to be low because of Postgres checkpoints. Let's see if that holds.\n    register_histogram!(\n        \"pageserver_deltas_per_read_global\",\n        \"Number of delta pages applied to image page per read\",\n        vec![0.0, 1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0, 128.0, 256.0],\n    )\n    .expect(\"failed to define a metric\")\n});\n\npub(crate) static CONCURRENT_INITDBS: Lazy<UIntGauge> = Lazy::new(|| {\n    register_uint_gauge!(\n        \"pageserver_concurrent_initdb\",\n        \"Number of initdb processes running\"\n    )\n    .expect(\"failed to define a metric\")\n});\n\npub(crate) static INITDB_SEMAPHORE_ACQUISITION_TIME: Lazy<Histogram> = Lazy::new(|| {\n    register_histogram!(\n        \"pageserver_initdb_semaphore_seconds_global\",\n        \"Time spent getting a permit from the global initdb semaphore\",\n        STORAGE_OP_BUCKETS.into()\n    )\n    .expect(\"failed to define metric\")\n});\n\npub(crate) static INITDB_RUN_TIME: Lazy<Histogram> = Lazy::new(|| {\n    register_histogram!(\n        \"pageserver_initdb_seconds_global\",\n        \"Time spent performing initdb\",\n        STORAGE_OP_BUCKETS.into()\n    )\n    .expect(\"failed to define metric\")\n});\n\npub(crate) struct GetVectoredLatency {\n    map: EnumMap<TaskKind, Option<Histogram>>,\n}\n\n#[allow(dead_code)]\npub(crate) struct ScanLatency {\n    map: EnumMap<TaskKind, Option<Histogram>>,\n}\n\nimpl GetVectoredLatency {\n    // Only these task types perform vectored gets. Filter all other tasks out to reduce total\n    // cardinality of the metric.\n    const TRACKED_TASK_KINDS: [TaskKind; 2] = [TaskKind::Compaction, TaskKind::PageRequestHandler];\n\n    pub(crate) fn for_task_kind(&self, task_kind: TaskKind) -> Option<&Histogram> {\n        self.map[task_kind].as_ref()\n    }\n}\n\nimpl ScanLatency {\n    // Only these task types perform vectored gets. Filter all other tasks out to reduce total\n    // cardinality of the metric.\n    const TRACKED_TASK_KINDS: [TaskKind; 1] = [TaskKind::PageRequestHandler];\n\n    pub(crate) fn for_task_kind(&self, task_kind: TaskKind) -> Option<&Histogram> {\n        self.map[task_kind].as_ref()\n    }\n}\n\npub(crate) struct ScanLatencyOngoingRecording<'a> {\n    parent: &'a Histogram,\n    start: std::time::Instant,\n}\n\nimpl<'a> ScanLatencyOngoingRecording<'a> {\n    pub(crate) fn start_recording(parent: &'a Histogram) -> ScanLatencyOngoingRecording<'a> {\n        let start = Instant::now();\n        ScanLatencyOngoingRecording { parent, start }\n    }\n\n    pub(crate) fn observe(self) {\n        let elapsed = self.start.elapsed();\n        self.parent.observe(elapsed.as_secs_f64());\n    }\n}\n\npub(crate) static GET_VECTORED_LATENCY: Lazy<GetVectoredLatency> = Lazy::new(|| {\n    let inner = register_histogram_vec!(\n        \"pageserver_get_vectored_seconds\",\n        \"Time spent in get_vectored.\",\n        &[\"task_kind\"],\n        CRITICAL_OP_BUCKETS.into(),\n    )\n    .expect(\"failed to define a metric\");\n\n    GetVectoredLatency {\n        map: EnumMap::from_array(std::array::from_fn(|task_kind_idx| {\n            let task_kind = TaskKind::from_usize(task_kind_idx);\n\n            if GetVectoredLatency::TRACKED_TASK_KINDS.contains(&task_kind) {\n                let task_kind = task_kind.into();\n                Some(inner.with_label_values(&[task_kind]))\n            } else {\n                None\n            }\n        })),\n    }\n});\n\npub(crate) static SCAN_LATENCY: Lazy<ScanLatency> = Lazy::new(|| {\n    let inner = register_histogram_vec!(\n        \"pageserver_scan_seconds\",\n        \"Time spent in scan.\",\n        &[\"task_kind\"],\n        CRITICAL_OP_BUCKETS.into(),\n    )\n    .expect(\"failed to define a metric\");\n\n    ScanLatency {\n        map: EnumMap::from_array(std::array::from_fn(|task_kind_idx| {\n            let task_kind = TaskKind::from_usize(task_kind_idx);\n\n            if ScanLatency::TRACKED_TASK_KINDS.contains(&task_kind) {\n                let task_kind = task_kind.into();\n                Some(inner.with_label_values(&[task_kind]))\n            } else {\n                None\n            }\n        })),\n    }\n});\n\npub(crate) struct PageCacheMetricsForTaskKind {\n    pub read_accesses_immutable: IntCounter,\n    pub read_hits_immutable: IntCounter,\n}\n\npub(crate) struct PageCacheMetrics {\n    map: EnumMap<TaskKind, EnumMap<PageContentKind, PageCacheMetricsForTaskKind>>,\n}\n\nstatic PAGE_CACHE_READ_HITS: Lazy<IntCounterVec> = Lazy::new(|| {\n    register_int_counter_vec!(\n        \"pageserver_page_cache_read_hits_total\",\n        \"Number of read accesses to the page cache that hit\",\n        &[\"task_kind\", \"key_kind\", \"content_kind\", \"hit_kind\"]\n    )\n    .expect(\"failed to define a metric\")\n});\n\nstatic PAGE_CACHE_READ_ACCESSES: Lazy<IntCounterVec> = Lazy::new(|| {\n    register_int_counter_vec!(\n        \"pageserver_page_cache_read_accesses_total\",\n        \"Number of read accesses to the page cache\",\n        &[\"task_kind\", \"key_kind\", \"content_kind\"]\n    )\n    .expect(\"failed to define a metric\")\n});\n\npub(crate) static PAGE_CACHE: Lazy<PageCacheMetrics> = Lazy::new(|| PageCacheMetrics {\n    map: EnumMap::from_array(std::array::from_fn(|task_kind| {\n        let task_kind = TaskKind::from_usize(task_kind);\n        let task_kind: &'static str = task_kind.into();\n        EnumMap::from_array(std::array::from_fn(|content_kind| {\n            let content_kind = PageContentKind::from_usize(content_kind);\n            let content_kind: &'static str = content_kind.into();\n            PageCacheMetricsForTaskKind {\n                read_accesses_immutable: {\n                    PAGE_CACHE_READ_ACCESSES\n                        .get_metric_with_label_values(&[task_kind, \"immutable\", content_kind])\n                        .unwrap()\n                },\n\n                read_hits_immutable: {\n                    PAGE_CACHE_READ_HITS\n                        .get_metric_with_label_values(&[task_kind, \"immutable\", content_kind, \"-\"])\n                        .unwrap()\n                },\n            }\n        }))\n    })),\n});\n\nimpl PageCacheMetrics {\n    pub(crate) fn for_ctx(&self, ctx: &RequestContext) -> &PageCacheMetricsForTaskKind {\n        &self.map[ctx.task_kind()][ctx.page_content_kind()]\n    }\n}\n\npub(crate) struct PageCacheSizeMetrics {\n    pub max_bytes: UIntGauge,\n\n    pub current_bytes_immutable: UIntGauge,\n}\n\nstatic PAGE_CACHE_SIZE_CURRENT_BYTES: Lazy<UIntGaugeVec> = Lazy::new(|| {\n    register_uint_gauge_vec!(\n        \"pageserver_page_cache_size_current_bytes\",\n        \"Current size of the page cache in bytes, by key kind\",\n        &[\"key_kind\"]\n    )\n    .expect(\"failed to define a metric\")\n});\n\npub(crate) static PAGE_CACHE_SIZE: Lazy<PageCacheSizeMetrics> =\n    Lazy::new(|| PageCacheSizeMetrics {\n        max_bytes: {\n            register_uint_gauge!(\n                \"pageserver_page_cache_size_max_bytes\",\n                \"Maximum size of the page cache in bytes\"\n            )\n            .expect(\"failed to define a metric\")\n        },\n        current_bytes_immutable: {\n            PAGE_CACHE_SIZE_CURRENT_BYTES\n                .get_metric_with_label_values(&[\"immutable\"])\n                .unwrap()\n        },\n    });\n\npub(crate) mod page_cache_eviction_metrics {\n    use std::num::NonZeroUsize;\n\n    use metrics::{IntCounter, IntCounterVec, register_int_counter_vec};\n    use once_cell::sync::Lazy;\n\n    #[derive(Clone, Copy)]\n    pub(crate) enum Outcome {\n        FoundSlotUnused { iters: NonZeroUsize },\n        FoundSlotEvicted { iters: NonZeroUsize },\n        ItersExceeded { iters: NonZeroUsize },\n    }\n\n    static ITERS_TOTAL_VEC: Lazy<IntCounterVec> = Lazy::new(|| {\n        register_int_counter_vec!(\n            \"pageserver_page_cache_find_victim_iters_total\",\n            \"Counter for the number of iterations in the find_victim loop\",\n            &[\"outcome\"],\n        )\n        .expect(\"failed to define a metric\")\n    });\n\n    static CALLS_VEC: Lazy<IntCounterVec> = Lazy::new(|| {\n        register_int_counter_vec!(\n            \"pageserver_page_cache_find_victim_calls\",\n            \"Incremented at the end of each find_victim() call.\\\n             Filter by outcome to get e.g., eviction rate.\",\n            &[\"outcome\"]\n        )\n        .unwrap()\n    });\n\n    pub(crate) fn observe(outcome: Outcome) {\n        macro_rules! dry {\n            ($label:literal, $iters:expr) => {{\n                static LABEL: &'static str = $label;\n                static ITERS_TOTAL: Lazy<IntCounter> =\n                    Lazy::new(|| ITERS_TOTAL_VEC.with_label_values(&[LABEL]));\n                static CALLS: Lazy<IntCounter> =\n                    Lazy::new(|| CALLS_VEC.with_label_values(&[LABEL]));\n                ITERS_TOTAL.inc_by(($iters.get()) as u64);\n                CALLS.inc();\n            }};\n        }\n        match outcome {\n            Outcome::FoundSlotUnused { iters } => dry!(\"found_empty\", iters),\n            Outcome::FoundSlotEvicted { iters } => {\n                dry!(\"found_evicted\", iters)\n            }\n            Outcome::ItersExceeded { iters } => {\n                dry!(\"err_iters_exceeded\", iters);\n                super::page_cache_errors_inc(super::PageCacheErrorKind::EvictIterLimit);\n            }\n        }\n    }\n}\n\nstatic PAGE_CACHE_ERRORS: Lazy<IntCounterVec> = Lazy::new(|| {\n    register_int_counter_vec!(\n        \"page_cache_errors_total\",\n        \"Number of timeouts while acquiring a pinned slot in the page cache\",\n        &[\"error_kind\"]\n    )\n    .expect(\"failed to define a metric\")\n});\n\npub(crate) static FEATURE_FLAG_EVALUATION: Lazy<CounterVec> = Lazy::new(|| {\n    register_counter_vec!(\n        \"pageserver_feature_flag_evaluation\",\n        \"Number of times a feature flag is evaluated\",\n        &[\"flag_key\", \"status\", \"value\"],\n    )\n    .unwrap()\n});\n\n#[derive(IntoStaticStr)]\n#[strum(serialize_all = \"kebab_case\")]\npub(crate) enum PageCacheErrorKind {\n    AcquirePinnedSlotTimeout,\n    EvictIterLimit,\n}\n\npub(crate) fn page_cache_errors_inc(error_kind: PageCacheErrorKind) {\n    PAGE_CACHE_ERRORS\n        .get_metric_with_label_values(&[error_kind.into()])\n        .unwrap()\n        .inc();\n}\n\npub(crate) static WAIT_LSN_TIME: Lazy<Histogram> = Lazy::new(|| {\n    register_histogram!(\n        \"pageserver_wait_lsn_seconds\",\n        \"Time spent waiting for WAL to arrive. Updated on completion of the wait_lsn operation.\",\n        CRITICAL_OP_BUCKETS.into(),\n    )\n    .expect(\"failed to define a metric\")\n});\n\npub(crate) static WAIT_LSN_START_FINISH_COUNTERPAIR: Lazy<IntCounterPairVec> = Lazy::new(|| {\n    register_int_counter_pair_vec!(\n        \"pageserver_wait_lsn_started_count\",\n        \"Number of wait_lsn operations started.\",\n        \"pageserver_wait_lsn_finished_count\",\n        \"Number of wait_lsn operations finished.\",\n        &[\"tenant_id\", \"shard_id\", \"timeline_id\"],\n    )\n    .expect(\"failed to define a metric\")\n});\n\npub(crate) static WAIT_LSN_IN_PROGRESS_MICROS: Lazy<IntCounterVec> = Lazy::new(|| {\n    register_int_counter_vec!(\n        \"pageserver_wait_lsn_in_progress_micros\",\n        \"Time spent waiting for WAL to arrive, by timeline_id. Updated periodically while waiting.\",\n        &[\"tenant_id\", \"shard_id\", \"timeline_id\"],\n    )\n    .expect(\"failed to define a metric\")\n});\n\npub(crate) static WAIT_LSN_IN_PROGRESS_GLOBAL_MICROS: Lazy<IntCounter> = Lazy::new(|| {\n    register_int_counter!(\n        \"pageserver_wait_lsn_in_progress_micros_global\",\n        \"Time spent waiting for WAL to arrive, globally. Updated periodically while waiting.\"\n    )\n    .expect(\"failed to define a metric\")\n});\n\npub(crate) static ONDEMAND_DOWNLOAD_BYTES: Lazy<IntCounterVec> = Lazy::new(|| {\n    register_int_counter_vec!(\n        \"pageserver_ondemand_download_bytes_total\",\n        \"Total bytes of layers on-demand downloaded\",\n        &[\"task_kind\"]\n    )\n    .expect(\"failed to define a metric\")\n});\n\npub(crate) static ONDEMAND_DOWNLOAD_COUNT: Lazy<IntCounterVec> = Lazy::new(|| {\n    register_int_counter_vec!(\n        \"pageserver_ondemand_download_count\",\n        \"Total count of layers on-demand downloaded\",\n        &[\"task_kind\"]\n    )\n    .expect(\"failed to define a metric\")\n});\n\npub(crate) mod wait_ondemand_download_time {\n    use super::*;\n    const WAIT_ONDEMAND_DOWNLOAD_TIME_BUCKETS: &[f64] = &[\n        0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, // 10 ms - 100ms\n        0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, // 100ms to 1s\n        1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, // 1s to 10s\n        10.0, 20.0, 30.0, 40.0, 50.0, 60.0, // 10s to 1m\n    ];\n\n    /// The task kinds for which we want to track wait times for on-demand downloads.\n    /// Other task kinds' wait times are accumulated in label value `unknown`.\n    pub(crate) const WAIT_ONDEMAND_DOWNLOAD_METRIC_TASK_KINDS: [TaskKind; 2] = [\n        TaskKind::PageRequestHandler,\n        TaskKind::WalReceiverConnectionHandler,\n    ];\n\n    pub(crate) static WAIT_ONDEMAND_DOWNLOAD_TIME_GLOBAL: Lazy<Vec<Histogram>> = Lazy::new(|| {\n        let histo = register_histogram_vec!(\n            \"pageserver_wait_ondemand_download_seconds_global\",\n            \"Observations are individual tasks' wait times for on-demand downloads. \\\n         If N tasks coalesce on an on-demand download, and it takes 10s, than we observe N * 10s.\",\n            &[\"task_kind\"],\n            WAIT_ONDEMAND_DOWNLOAD_TIME_BUCKETS.into(),\n        )\n        .expect(\"failed to define a metric\");\n        WAIT_ONDEMAND_DOWNLOAD_METRIC_TASK_KINDS\n            .iter()\n            .map(|task_kind| histo.with_label_values(&[task_kind.into()]))\n            .collect::<Vec<_>>()\n    });\n\n    pub(crate) static WAIT_ONDEMAND_DOWNLOAD_TIME_SUM: Lazy<CounterVec> = Lazy::new(|| {\n        register_counter_vec!(\n            // use a name that _could_ be evolved into a per-timeline histogram later\n            \"pageserver_wait_ondemand_download_seconds_sum\",\n            \"Like `pageserver_wait_ondemand_download_seconds_global` but per timeline\",\n            &[\"tenant_id\", \"shard_id\", \"timeline_id\", \"task_kind\"],\n        )\n        .unwrap()\n    });\n\n    pub struct WaitOndemandDownloadTimeSum {\n        counters: [Counter; WAIT_ONDEMAND_DOWNLOAD_METRIC_TASK_KINDS.len()],\n    }\n\n    impl WaitOndemandDownloadTimeSum {\n        pub(crate) fn new(tenant_id: &str, shard_id: &str, timeline_id: &str) -> Self {\n            let counters = WAIT_ONDEMAND_DOWNLOAD_METRIC_TASK_KINDS\n                .iter()\n                .map(|task_kind| {\n                    WAIT_ONDEMAND_DOWNLOAD_TIME_SUM\n                        .get_metric_with_label_values(&[\n                            tenant_id,\n                            shard_id,\n                            timeline_id,\n                            task_kind.into(),\n                        ])\n                        .unwrap()\n                })\n                .collect::<Vec<_>>();\n            Self {\n                counters: counters.try_into().unwrap(),\n            }\n        }\n        pub(crate) fn observe(&self, task_kind: TaskKind, duration: Duration) {\n            let maybe = WAIT_ONDEMAND_DOWNLOAD_METRIC_TASK_KINDS\n                .iter()\n                .enumerate()\n                .find(|(_, kind)| **kind == task_kind);\n            let Some((idx, _)) = maybe else {\n                return;\n            };\n            WAIT_ONDEMAND_DOWNLOAD_TIME_GLOBAL[idx].observe(duration.as_secs_f64());\n            let counter = &self.counters[idx];\n            counter.inc_by(duration.as_secs_f64());\n        }\n    }\n\n    pub(crate) fn shutdown_timeline(tenant_id: &str, shard_id: &str, timeline_id: &str) {\n        for task_kind in WAIT_ONDEMAND_DOWNLOAD_METRIC_TASK_KINDS {\n            let _ = WAIT_ONDEMAND_DOWNLOAD_TIME_SUM.remove_label_values(&[\n                tenant_id,\n                shard_id,\n                timeline_id,\n                task_kind.into(),\n            ]);\n        }\n    }\n\n    pub(crate) fn preinitialize_global_metrics() {\n        Lazy::force(&WAIT_ONDEMAND_DOWNLOAD_TIME_GLOBAL);\n    }\n}\n\nstatic LAST_RECORD_LSN: Lazy<IntGaugeVec> = Lazy::new(|| {\n    register_int_gauge_vec!(\n        \"pageserver_last_record_lsn\",\n        \"Last record LSN grouped by timeline\",\n        &[\"tenant_id\", \"shard_id\", \"timeline_id\"]\n    )\n    .expect(\"failed to define a metric\")\n});\n\nstatic DISK_CONSISTENT_LSN: Lazy<IntGaugeVec> = Lazy::new(|| {\n    register_int_gauge_vec!(\n        \"pageserver_disk_consistent_lsn\",\n        \"Disk consistent LSN grouped by timeline\",\n        &[\"tenant_id\", \"shard_id\", \"timeline_id\"]\n    )\n    .expect(\"failed to define a metric\")\n});\n\npub(crate) static PROJECTED_REMOTE_CONSISTENT_LSN: Lazy<UIntGaugeVec> = Lazy::new(|| {\n    register_uint_gauge_vec!(\n        \"pageserver_projected_remote_consistent_lsn\",\n        \"Projected remote consistent LSN grouped by timeline\",\n        &[\"tenant_id\", \"shard_id\", \"timeline_id\"]\n    )\n    .expect(\"failed to define a metric\")\n});\n\nstatic PITR_HISTORY_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {\n    register_uint_gauge_vec!(\n        \"pageserver_pitr_history_size\",\n        \"Data written since PITR cutoff on this timeline\",\n        &[\"tenant_id\", \"shard_id\", \"timeline_id\"]\n    )\n    .expect(\"failed to define a metric\")\n});\n\n#[derive(\n    strum_macros::EnumIter,\n    strum_macros::EnumString,\n    strum_macros::Display,\n    strum_macros::IntoStaticStr,\n)]\n#[strum(serialize_all = \"kebab_case\")]\npub(crate) enum LayerKind {\n    Delta,\n    Image,\n}\n\n#[derive(\n    strum_macros::EnumIter,\n    strum_macros::EnumString,\n    strum_macros::Display,\n    strum_macros::IntoStaticStr,\n)]\n#[strum(serialize_all = \"kebab_case\")]\npub(crate) enum LayerLevel {\n    // We don't track the currently open ephemeral layer, since there's always exactly 1 and its\n    // size changes. See `TIMELINE_EPHEMERAL_BYTES`.\n    Frozen,\n    L0,\n    L1,\n}\n\nstatic TIMELINE_LAYER_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {\n    register_uint_gauge_vec!(\n        \"pageserver_layer_bytes\",\n        \"Sum of frozen, L0, and L1 layer physical sizes in bytes (excluding the open ephemeral layer)\",\n        &[\"tenant_id\", \"shard_id\", \"timeline_id\", \"level\", \"kind\"]\n    )\n    .expect(\"failed to define a metric\")\n});\n\nstatic TIMELINE_LAYER_COUNT: Lazy<UIntGaugeVec> = Lazy::new(|| {\n    register_uint_gauge_vec!(\n        \"pageserver_layer_count\",\n        \"Number of frozen, L0, and L1 layers (excluding the open ephemeral layer)\",\n        &[\"tenant_id\", \"shard_id\", \"timeline_id\", \"level\", \"kind\"]\n    )\n    .expect(\"failed to define a metric\")\n});\n\nstatic TIMELINE_ARCHIVE_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {\n    register_uint_gauge_vec!(\n        \"pageserver_archive_size\",\n        \"Timeline's logical size if it is considered eligible for archival (outside PITR window), else zero\",\n        &[\"tenant_id\", \"shard_id\", \"timeline_id\"]\n    )\n    .expect(\"failed to define a metric\")\n});\n\nstatic STANDBY_HORIZON: Lazy<IntGaugeVec> = Lazy::new(|| {\n    register_int_gauge_vec!(\n        \"pageserver_standby_horizon\",\n        \"Standby apply LSN for which GC is hold off, by timeline.\",\n        &[\"tenant_id\", \"shard_id\", \"timeline_id\"]\n    )\n    .expect(\"failed to define a metric\")\n});\n\nstatic RESIDENT_PHYSICAL_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {\n    register_uint_gauge_vec!(\n        \"pageserver_resident_physical_size\",\n        \"The size of the layer files present in the pageserver's filesystem, for attached locations.\",\n        &[\"tenant_id\", \"shard_id\", \"timeline_id\"]\n    )\n    .expect(\"failed to define a metric\")\n});\n\nstatic VISIBLE_PHYSICAL_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {\n    register_uint_gauge_vec!(\n        \"pageserver_visible_physical_size\",\n        \"The size of the layer files present in the pageserver's filesystem.\",\n        &[\"tenant_id\", \"shard_id\", \"timeline_id\"]\n    )\n    .expect(\"failed to define a metric\")\n});\n\npub(crate) static RESIDENT_PHYSICAL_SIZE_GLOBAL: Lazy<UIntGauge> = Lazy::new(|| {\n    register_uint_gauge!(\n        \"pageserver_resident_physical_size_global\",\n        \"Like `pageserver_resident_physical_size`, but without tenant/timeline dimensions.\"\n    )\n    .expect(\"failed to define a metric\")\n});\n\nstatic REMOTE_PHYSICAL_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {\n    register_uint_gauge_vec!(\n        \"pageserver_remote_physical_size\",\n        \"The size of the layer files present in the remote storage that are listed in the remote index_part.json.\",\n        // Corollary: If any files are missing from the index part, they won't be included here.\n        &[\"tenant_id\", \"shard_id\", \"timeline_id\"]\n    )\n    .expect(\"failed to define a metric\")\n});\n\nstatic REMOTE_PHYSICAL_SIZE_GLOBAL: Lazy<UIntGauge> = Lazy::new(|| {\n    register_uint_gauge!(\n        \"pageserver_remote_physical_size_global\",\n        \"Like `pageserver_remote_physical_size`, but without tenant/timeline dimensions.\"\n    )\n    .expect(\"failed to define a metric\")\n});\n\npub(crate) static REMOTE_ONDEMAND_DOWNLOADED_LAYERS: Lazy<IntCounter> = Lazy::new(|| {\n    register_int_counter!(\n        \"pageserver_remote_ondemand_downloaded_layers_total\",\n        \"Total on-demand downloaded layers\"\n    )\n    .unwrap()\n});\n\npub(crate) static REMOTE_ONDEMAND_DOWNLOADED_BYTES: Lazy<IntCounter> = Lazy::new(|| {\n    register_int_counter!(\n        \"pageserver_remote_ondemand_downloaded_bytes_total\",\n        \"Total bytes of layers on-demand downloaded\",\n    )\n    .unwrap()\n});\n\nstatic CURRENT_LOGICAL_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {\n    register_uint_gauge_vec!(\n        \"pageserver_current_logical_size\",\n        \"Current logical size grouped by timeline\",\n        &[\"tenant_id\", \"shard_id\", \"timeline_id\"]\n    )\n    .expect(\"failed to define current logical size metric\")\n});\n\nstatic AUX_FILE_SIZE: Lazy<IntGaugeVec> = Lazy::new(|| {\n    register_int_gauge_vec!(\n        \"pageserver_aux_file_estimated_size\",\n        \"The size of all aux files for a timeline in aux file v2 store.\",\n        &[\"tenant_id\", \"shard_id\", \"timeline_id\"]\n    )\n    .expect(\"failed to define a metric\")\n});\n\nstatic VALID_LSN_LEASE_COUNT: Lazy<UIntGaugeVec> = Lazy::new(|| {\n    register_uint_gauge_vec!(\n        \"pageserver_valid_lsn_lease_count\",\n        \"The number of valid leases after refreshing gc info.\",\n        &[\"tenant_id\", \"shard_id\", \"timeline_id\"],\n    )\n    .expect(\"failed to define a metric\")\n});\n\npub(crate) static CIRCUIT_BREAKERS_BROKEN: Lazy<IntCounter> = Lazy::new(|| {\n    register_int_counter!(\n        \"pageserver_circuit_breaker_broken\",\n        \"How many times a circuit breaker has broken\"\n    )\n    .expect(\"failed to define a metric\")\n});\n\npub(crate) static CIRCUIT_BREAKERS_UNBROKEN: Lazy<IntCounter> = Lazy::new(|| {\n    register_int_counter!(\n        \"pageserver_circuit_breaker_unbroken\",\n        \"How many times a circuit breaker has been un-broken (recovered)\"\n    )\n    .expect(\"failed to define a metric\")\n});\n\npub(crate) static COMPRESSION_IMAGE_INPUT_BYTES: Lazy<IntCounter> = Lazy::new(|| {\n    register_int_counter!(\n        \"pageserver_compression_image_in_bytes_total\",\n        \"Size of data written into image layers before compression\"\n    )\n    .expect(\"failed to define a metric\")\n});\n\npub(crate) static COMPRESSION_IMAGE_INPUT_BYTES_CONSIDERED: Lazy<IntCounter> = Lazy::new(|| {\n    register_int_counter!(\n        \"pageserver_compression_image_in_bytes_considered\",\n        \"Size of potentially compressible data written into image layers before compression\"\n    )\n    .expect(\"failed to define a metric\")\n});\n\npub(crate) static COMPRESSION_IMAGE_INPUT_BYTES_CHOSEN: Lazy<IntCounter> = Lazy::new(|| {\n    register_int_counter!(\n        \"pageserver_compression_image_in_bytes_chosen\",\n        \"Size of data whose compressed form was written into image layers\"\n    )\n    .expect(\"failed to define a metric\")\n});\n\npub(crate) static COMPRESSION_IMAGE_OUTPUT_BYTES: Lazy<IntCounter> = Lazy::new(|| {\n    register_int_counter!(\n        \"pageserver_compression_image_out_bytes_total\",\n        \"Size of compressed image layer written\"\n    )\n    .expect(\"failed to define a metric\")\n});\n\npub(crate) static RELSIZE_LATEST_CACHE_ENTRIES: Lazy<UIntGauge> = Lazy::new(|| {\n    register_uint_gauge!(\n        \"pageserver_relsize_latest_cache_entries\",\n        \"Number of entries in the latest relation size cache\",\n    )\n    .expect(\"failed to define a metric\")\n});\n\npub(crate) static RELSIZE_LATEST_CACHE_HITS: Lazy<IntCounter> = Lazy::new(|| {\n    register_int_counter!(\n        \"pageserver_relsize_latest_cache_hits\",\n        \"Latest relation size cache hits\",\n    )\n    .expect(\"failed to define a metric\")\n});\n\npub(crate) static RELSIZE_LATEST_CACHE_MISSES: Lazy<IntCounter> = Lazy::new(|| {\n    register_int_counter!(\n        \"pageserver_relsize_latest_cache_misses\",\n        \"Relation size latest cache misses\",\n    )\n    .expect(\"failed to define a metric\")\n});\n\npub(crate) static RELSIZE_SNAPSHOT_CACHE_ENTRIES: Lazy<UIntGauge> = Lazy::new(|| {\n    register_uint_gauge!(\n        \"pageserver_relsize_snapshot_cache_entries\",\n        \"Number of entries in the pitr relation size cache\",\n    )\n    .expect(\"failed to define a metric\")\n});\n\npub(crate) static RELSIZE_SNAPSHOT_CACHE_HITS: Lazy<IntCounter> = Lazy::new(|| {\n    register_int_counter!(\n        \"pageserver_relsize_snapshot_cache_hits\",\n        \"Pitr relation size cache hits\",\n    )\n    .expect(\"failed to define a metric\")\n});\n\npub(crate) static RELSIZE_SNAPSHOT_CACHE_MISSES: Lazy<IntCounter> = Lazy::new(|| {\n    register_int_counter!(\n        \"pageserver_relsize_snapshot_cache_misses\",\n        \"Relation size snapshot cache misses\",\n    )\n    .expect(\"failed to define a metric\")\n});\n\npub(crate) static RELSIZE_CACHE_MISSES_OLD: Lazy<IntCounter> = Lazy::new(|| {\n    register_int_counter!(\n        \"pageserver_relsize_cache_misses_old\",\n        \"Relation size cache misses where the lookup LSN is older than the last relation update\"\n    )\n    .expect(\"failed to define a metric\")\n});\n\npub(crate) mod initial_logical_size {\n    use metrics::{IntCounter, IntCounterVec, register_int_counter, register_int_counter_vec};\n    use once_cell::sync::Lazy;\n\n    pub(crate) struct StartCalculation(IntCounterVec);\n    pub(crate) static START_CALCULATION: Lazy<StartCalculation> = Lazy::new(|| {\n        StartCalculation(\n            register_int_counter_vec!(\n                \"pageserver_initial_logical_size_start_calculation\",\n                \"Incremented each time we start an initial logical size calculation attempt. \\\n                 The `circumstances` label provides some additional details.\",\n                &[\"attempt\", \"circumstances\"]\n            )\n            .unwrap(),\n        )\n    });\n\n    struct DropCalculation {\n        first: IntCounter,\n        retry: IntCounter,\n    }\n\n    static DROP_CALCULATION: Lazy<DropCalculation> = Lazy::new(|| {\n        let vec = register_int_counter_vec!(\n            \"pageserver_initial_logical_size_drop_calculation\",\n            \"Incremented each time we abort a started size calculation attmpt.\",\n            &[\"attempt\"]\n        )\n        .unwrap();\n        DropCalculation {\n            first: vec.with_label_values(&[\"first\"]),\n            retry: vec.with_label_values(&[\"retry\"]),\n        }\n    });\n\n    pub(crate) struct Calculated {\n        pub(crate) births: IntCounter,\n        pub(crate) deaths: IntCounter,\n    }\n\n    pub(crate) static CALCULATED: Lazy<Calculated> = Lazy::new(|| Calculated {\n        births: register_int_counter!(\n            \"pageserver_initial_logical_size_finish_calculation\",\n            \"Incremented every time we finish calculation of initial logical size.\\\n             If everything is working well, this should happen at most once per Timeline object.\"\n        )\n        .unwrap(),\n        deaths: register_int_counter!(\n            \"pageserver_initial_logical_size_drop_finished_calculation\",\n            \"Incremented when we drop a finished initial logical size calculation result.\\\n             Mainly useful to turn pageserver_initial_logical_size_finish_calculation into a gauge.\"\n        )\n        .unwrap(),\n    });\n\n    pub(crate) struct OngoingCalculationGuard {\n        inc_drop_calculation: Option<IntCounter>,\n    }\n\n    #[derive(strum_macros::IntoStaticStr)]\n    pub(crate) enum StartCircumstances {\n        EmptyInitial,\n        SkippedConcurrencyLimiter,\n        AfterBackgroundTasksRateLimit,\n    }\n\n    impl StartCalculation {\n        pub(crate) fn first(&self, circumstances: StartCircumstances) -> OngoingCalculationGuard {\n            let circumstances_label: &'static str = circumstances.into();\n            self.0\n                .with_label_values(&[\"first\", circumstances_label])\n                .inc();\n            OngoingCalculationGuard {\n                inc_drop_calculation: Some(DROP_CALCULATION.first.clone()),\n            }\n        }\n        pub(crate) fn retry(&self, circumstances: StartCircumstances) -> OngoingCalculationGuard {\n            let circumstances_label: &'static str = circumstances.into();\n            self.0\n                .with_label_values(&[\"retry\", circumstances_label])\n                .inc();\n            OngoingCalculationGuard {\n                inc_drop_calculation: Some(DROP_CALCULATION.retry.clone()),\n            }\n        }\n    }\n\n    impl Drop for OngoingCalculationGuard {\n        fn drop(&mut self) {\n            if let Some(counter) = self.inc_drop_calculation.take() {\n                counter.inc();\n            }\n        }\n    }\n\n    impl OngoingCalculationGuard {\n        pub(crate) fn calculation_result_saved(mut self) -> FinishedCalculationGuard {\n            drop(self.inc_drop_calculation.take());\n            CALCULATED.births.inc();\n            FinishedCalculationGuard {\n                inc_on_drop: CALCULATED.deaths.clone(),\n            }\n        }\n    }\n\n    pub(crate) struct FinishedCalculationGuard {\n        inc_on_drop: IntCounter,\n    }\n\n    impl Drop for FinishedCalculationGuard {\n        fn drop(&mut self) {\n            self.inc_on_drop.inc();\n        }\n    }\n\n    // context: https://github.com/neondatabase/neon/issues/5963\n    pub(crate) static TIMELINES_WHERE_WALRECEIVER_GOT_APPROXIMATE_SIZE: Lazy<IntCounter> =\n        Lazy::new(|| {\n            register_int_counter!(\n                \"pageserver_initial_logical_size_timelines_where_walreceiver_got_approximate_size\",\n                \"Counter for the following event: walreceiver calls\\\n                 Timeline::get_current_logical_size() and it returns `Approximate` for the first time.\"\n            )\n            .unwrap()\n        });\n}\n\nstatic DIRECTORY_ENTRIES_COUNT: Lazy<UIntGaugeVec> = Lazy::new(|| {\n    register_uint_gauge_vec!(\n        \"pageserver_directory_entries_count\",\n        \"Sum of the entries in pageserver-stored directory listings\",\n        &[\"tenant_id\", \"shard_id\", \"timeline_id\"]\n    )\n    .expect(\"failed to define a metric\")\n});\n\npub(crate) static TENANT_STATE_METRIC: Lazy<UIntGaugeVec> = Lazy::new(|| {\n    register_uint_gauge_vec!(\n        \"pageserver_tenant_states_count\",\n        \"Count of tenants per state\",\n        &[\"state\"]\n    )\n    .expect(\"Failed to register pageserver_tenant_states_count metric\")\n});\n\npub(crate) static TIMELINE_STATE_METRIC: Lazy<UIntGaugeVec> = Lazy::new(|| {\n    register_uint_gauge_vec!(\n        \"pageserver_timeline_states_count\",\n        \"Count of timelines per state\",\n        &[\"state\"]\n    )\n    .expect(\"Failed to register pageserver_timeline_states_count metric\")\n});\n\n/// A set of broken tenants.\n///\n/// These are expected to be so rare that a set is fine. Set as in a new timeseries per each broken\n/// tenant.\npub(crate) static BROKEN_TENANTS_SET: Lazy<UIntGaugeVec> = Lazy::new(|| {\n    register_uint_gauge_vec!(\n        \"pageserver_broken_tenants_count\",\n        \"Set of broken tenants\",\n        &[\"tenant_id\", \"shard_id\"]\n    )\n    .expect(\"Failed to register pageserver_tenant_states_count metric\")\n});\n\npub(crate) static TENANT_SYNTHETIC_SIZE_METRIC: Lazy<UIntGaugeVec> = Lazy::new(|| {\n    register_uint_gauge_vec!(\n        \"pageserver_tenant_synthetic_cached_size_bytes\",\n        \"Synthetic size of each tenant in bytes\",\n        &[\"tenant_id\"]\n    )\n    .expect(\"Failed to register pageserver_tenant_synthetic_cached_size_bytes metric\")\n});\n\npub(crate) static TENANT_OFFLOADED_TIMELINES: Lazy<UIntGaugeVec> = Lazy::new(|| {\n    register_uint_gauge_vec!(\n        \"pageserver_tenant_offloaded_timelines\",\n        \"Number of offloaded timelines of a tenant\",\n        &[\"tenant_id\", \"shard_id\"]\n    )\n    .expect(\"Failed to register pageserver_tenant_offloaded_timelines metric\")\n});\n\npub(crate) static EVICTION_ITERATION_DURATION: Lazy<HistogramVec> = Lazy::new(|| {\n    register_histogram_vec!(\n        \"pageserver_eviction_iteration_duration_seconds_global\",\n        \"Time spent on a single eviction iteration\",\n        &[\"period_secs\", \"threshold_secs\"],\n        STORAGE_OP_BUCKETS.into(),\n    )\n    .expect(\"failed to define a metric\")\n});\n\nstatic EVICTIONS: Lazy<IntCounterVec> = Lazy::new(|| {\n    register_int_counter_vec!(\n        \"pageserver_evictions\",\n        \"Number of layers evicted from the pageserver\",\n        &[\"tenant_id\", \"shard_id\", \"timeline_id\"]\n    )\n    .expect(\"failed to define a metric\")\n});\n\nstatic EVICTIONS_WITH_LOW_RESIDENCE_DURATION: Lazy<IntCounterVec> = Lazy::new(|| {\n    register_int_counter_vec!(\n        \"pageserver_evictions_with_low_residence_duration\",\n        \"If a layer is evicted that was resident for less than `low_threshold`, it is counted to this counter. \\\n         Residence duration is determined using the `residence_duration_data_source`.\",\n        &[\"tenant_id\", \"shard_id\", \"timeline_id\", \"residence_duration_data_source\", \"low_threshold_secs\"]\n    )\n    .expect(\"failed to define a metric\")\n});\n\npub(crate) static UNEXPECTED_ONDEMAND_DOWNLOADS: Lazy<IntCounter> = Lazy::new(|| {\n    register_int_counter!(\n        \"pageserver_unexpected_ondemand_downloads_count\",\n        \"Number of unexpected on-demand downloads. \\\n         We log more context for each increment, so, forgo any labels in this metric.\",\n    )\n    .expect(\"failed to define a metric\")\n});\n\n/// How long did we take to start up?  Broken down by labels to describe\n/// different phases of startup.\npub static STARTUP_DURATION: Lazy<GaugeVec> = Lazy::new(|| {\n    register_gauge_vec!(\n        \"pageserver_startup_duration_seconds\",\n        \"Time taken by phases of pageserver startup, in seconds\",\n        &[\"phase\"]\n    )\n    .expect(\"Failed to register pageserver_startup_duration_seconds metric\")\n});\n\npub static STARTUP_IS_LOADING: Lazy<UIntGauge> = Lazy::new(|| {\n    register_uint_gauge!(\n        \"pageserver_startup_is_loading\",\n        \"1 while in initial startup load of tenants, 0 at other times\"\n    )\n    .expect(\"Failed to register pageserver_startup_is_loading\")\n});\n\npub(crate) static TIMELINE_EPHEMERAL_BYTES: Lazy<UIntGauge> = Lazy::new(|| {\n    register_uint_gauge!(\n        \"pageserver_timeline_ephemeral_bytes\",\n        \"Total number of bytes in ephemeral layers, summed for all timelines.  Approximate, lazily updated.\"\n    )\n    .expect(\"Failed to register metric\")\n});\n\n/// Metrics related to the lifecycle of a [`crate::tenant::TenantShard`] object: things\n/// like how long it took to load.\n///\n/// Note that these are process-global metrics, _not_ per-tenant metrics.  Per-tenant\n/// metrics are rather expensive, and usually fine grained stuff makes more sense\n/// at a timeline level than tenant level.\npub(crate) struct TenantMetrics {\n    /// How long did tenants take to go from construction to active state?\n    pub(crate) activation: Histogram,\n    pub(crate) preload: Histogram,\n    pub(crate) attach: Histogram,\n\n    /// How many tenants are included in the initial startup of the pagesrever?\n    pub(crate) startup_scheduled: IntCounter,\n    pub(crate) startup_complete: IntCounter,\n}\n\npub(crate) static TENANT: Lazy<TenantMetrics> = Lazy::new(|| {\n    TenantMetrics {\n    activation: register_histogram!(\n        \"pageserver_tenant_activation_seconds\",\n        \"Time taken by tenants to activate, in seconds\",\n        CRITICAL_OP_BUCKETS.into()\n    )\n    .expect(\"Failed to register metric\"),\n    preload: register_histogram!(\n        \"pageserver_tenant_preload_seconds\",\n        \"Time taken by tenants to load remote metadata on startup/attach, in seconds\",\n        CRITICAL_OP_BUCKETS.into()\n    )\n    .expect(\"Failed to register metric\"),\n    attach: register_histogram!(\n        \"pageserver_tenant_attach_seconds\",\n        \"Time taken by tenants to intialize, after remote metadata is already loaded\",\n        CRITICAL_OP_BUCKETS.into()\n    )\n    .expect(\"Failed to register metric\"),\n    startup_scheduled: register_int_counter!(\n        \"pageserver_tenant_startup_scheduled\",\n        \"Number of tenants included in pageserver startup (doesn't count tenants attached later)\"\n    ).expect(\"Failed to register metric\"),\n    startup_complete: register_int_counter!(\n        \"pageserver_tenant_startup_complete\",\n        \"Number of tenants that have completed warm-up, or activated on-demand during initial startup: \\\n         should eventually reach `pageserver_tenant_startup_scheduled_total`.  Does not include broken \\\n         tenants: such cases will lead to this metric never reaching the scheduled count.\"\n    ).expect(\"Failed to register metric\"),\n}\n});\n\n/// Each `Timeline`'s  [`EVICTIONS_WITH_LOW_RESIDENCE_DURATION`] metric.\n#[derive(Debug)]\npub(crate) struct EvictionsWithLowResidenceDuration {\n    data_source: &'static str,\n    threshold: Duration,\n    counter: Option<IntCounter>,\n}\n\npub(crate) struct EvictionsWithLowResidenceDurationBuilder {\n    data_source: &'static str,\n    threshold: Duration,\n}\n\nimpl EvictionsWithLowResidenceDurationBuilder {\n    pub fn new(data_source: &'static str, threshold: Duration) -> Self {\n        Self {\n            data_source,\n            threshold,\n        }\n    }\n\n    fn build(\n        &self,\n        tenant_id: &str,\n        shard_id: &str,\n        timeline_id: &str,\n    ) -> EvictionsWithLowResidenceDuration {\n        let counter = EVICTIONS_WITH_LOW_RESIDENCE_DURATION\n            .get_metric_with_label_values(&[\n                tenant_id,\n                shard_id,\n                timeline_id,\n                self.data_source,\n                &EvictionsWithLowResidenceDuration::threshold_label_value(self.threshold),\n            ])\n            .unwrap();\n        EvictionsWithLowResidenceDuration {\n            data_source: self.data_source,\n            threshold: self.threshold,\n            counter: Some(counter),\n        }\n    }\n}\n\nimpl EvictionsWithLowResidenceDuration {\n    fn threshold_label_value(threshold: Duration) -> String {\n        format!(\"{}\", threshold.as_secs())\n    }\n\n    pub fn observe(&self, observed_value: Duration) {\n        if observed_value < self.threshold {\n            self.counter\n                .as_ref()\n                .expect(\"nobody calls this function after `remove_from_vec`\")\n                .inc();\n        }\n    }\n\n    pub fn change_threshold(\n        &mut self,\n        tenant_id: &str,\n        shard_id: &str,\n        timeline_id: &str,\n        new_threshold: Duration,\n    ) {\n        if new_threshold == self.threshold {\n            return;\n        }\n        let mut with_new = EvictionsWithLowResidenceDurationBuilder::new(\n            self.data_source,\n            new_threshold,\n        )\n        .build(tenant_id, shard_id, timeline_id);\n        std::mem::swap(self, &mut with_new);\n        with_new.remove(tenant_id, shard_id, timeline_id);\n    }\n\n    // This could be a `Drop` impl, but, we need the `tenant_id` and `timeline_id`.\n    fn remove(&mut self, tenant_id: &str, shard_id: &str, timeline_id: &str) {\n        let Some(_counter) = self.counter.take() else {\n            return;\n        };\n\n        let threshold = Self::threshold_label_value(self.threshold);\n\n        let removed = EVICTIONS_WITH_LOW_RESIDENCE_DURATION.remove_label_values(&[\n            tenant_id,\n            shard_id,\n            timeline_id,\n            self.data_source,\n            &threshold,\n        ]);\n\n        match removed {\n            Err(e) => {\n                // this has been hit in staging as\n                // <https://neondatabase.sentry.io/issues/4142396994/>, but we don't know how.\n                // because we can be in the drop path already, don't risk:\n                // - \"double-panic => illegal instruction\" or\n                // - future \"drop panick => abort\"\n                //\n                // so just nag: (the error has the labels)\n                tracing::warn!(\n                    \"failed to remove EvictionsWithLowResidenceDuration, it was already removed? {e:#?}\"\n                );\n            }\n            Ok(()) => {\n                // to help identify cases where we double-remove the same values, let's log all\n                // deletions?\n                tracing::info!(\n                    \"removed EvictionsWithLowResidenceDuration with {tenant_id}, {timeline_id}, {}, {threshold}\",\n                    self.data_source\n                );\n            }\n        }\n    }\n}\n\n// Metrics collected on disk IO operations\n//\n// Roughly logarithmic scale.\nconst STORAGE_IO_TIME_BUCKETS: &[f64] = &[\n    0.00005,  // 50us\n    0.00006,  // 60us\n    0.00007,  // 70us\n    0.00008,  // 80us\n    0.00009,  // 90us\n    0.0001,   // 100us\n    0.000110, // 110us\n    0.000120, // 120us\n    0.000130, // 130us\n    0.000140, // 140us\n    0.000150, // 150us\n    0.000160, // 160us\n    0.000170, // 170us\n    0.000180, // 180us\n    0.000190, // 190us\n    0.000200, // 200us\n    0.000210, // 210us\n    0.000220, // 220us\n    0.000230, // 230us\n    0.000240, // 240us\n    0.000250, // 250us\n    0.000300, // 300us\n    0.000350, // 350us\n    0.000400, // 400us\n    0.000450, // 450us\n    0.000500, // 500us\n    0.000600, // 600us\n    0.000700, // 700us\n    0.000800, // 800us\n    0.000900, // 900us\n    0.001000, // 1ms\n    0.002000, // 2ms\n    0.003000, // 3ms\n    0.004000, // 4ms\n    0.005000, // 5ms\n    0.01000,  // 10ms\n    0.02000,  // 20ms\n    0.05000,  // 50ms\n];\n\n/// VirtualFile fs operation variants.\n///\n/// Operations:\n/// - open ([`std::fs::OpenOptions::open`])\n/// - close (dropping [`crate::virtual_file::VirtualFile`])\n/// - close-by-replace (close by replacement algorithm)\n/// - read (`read_at`)\n/// - write (`write_at`)\n/// - seek (modify internal position or file length query)\n/// - fsync ([`std::fs::File::sync_all`])\n/// - metadata ([`std::fs::File::metadata`])\n#[derive(\n    Debug, Clone, Copy, strum_macros::EnumCount, strum_macros::EnumIter, strum_macros::FromRepr,\n)]\npub(crate) enum StorageIoOperation {\n    Open,\n    OpenAfterReplace,\n    Close,\n    CloseByReplace,\n    Read,\n    Write,\n    Seek,\n    Fsync,\n    Metadata,\n    SetLen,\n}\n\nimpl StorageIoOperation {\n    pub fn as_str(&self) -> &'static str {\n        match self {\n            StorageIoOperation::Open => \"open\",\n            StorageIoOperation::OpenAfterReplace => \"open-after-replace\",\n            StorageIoOperation::Close => \"close\",\n            StorageIoOperation::CloseByReplace => \"close-by-replace\",\n            StorageIoOperation::Read => \"read\",\n            StorageIoOperation::Write => \"write\",\n            StorageIoOperation::Seek => \"seek\",\n            StorageIoOperation::Fsync => \"fsync\",\n            StorageIoOperation::Metadata => \"metadata\",\n            StorageIoOperation::SetLen => \"set_len\",\n        }\n    }\n}\n\n/// Tracks time taken by fs operations near VirtualFile.\n#[derive(Debug)]\npub(crate) struct StorageIoTime {\n    metrics: [Histogram; StorageIoOperation::COUNT],\n}\n\nimpl StorageIoTime {\n    fn new() -> Self {\n        let storage_io_histogram_vec = register_histogram_vec!(\n            \"pageserver_io_operations_seconds\",\n            \"Time spent in IO operations\",\n            &[\"operation\"],\n            STORAGE_IO_TIME_BUCKETS.into()\n        )\n        .expect(\"failed to define a metric\");\n        let metrics = std::array::from_fn(|i| {\n            let op = StorageIoOperation::from_repr(i).unwrap();\n            storage_io_histogram_vec\n                .get_metric_with_label_values(&[op.as_str()])\n                .unwrap()\n        });\n        Self { metrics }\n    }\n\n    pub(crate) fn get(&self, op: StorageIoOperation) -> &Histogram {\n        &self.metrics[op as usize]\n    }\n}\n\npub(crate) static STORAGE_IO_TIME_METRIC: Lazy<StorageIoTime> = Lazy::new(StorageIoTime::new);\n\n#[derive(Clone, Copy)]\n#[repr(usize)]\npub(crate) enum StorageIoSizeOperation {\n    Read,\n    Write,\n}\n\nimpl StorageIoSizeOperation {\n    pub(crate) const VARIANTS: &'static [&'static str] = &[\"read\", \"write\"];\n\n    fn as_str(&self) -> &'static str {\n        Self::VARIANTS[*self as usize]\n    }\n}\n\n// Needed for the https://neonprod.grafana.net/d/5uK9tHL4k/picking-tenant-for-relocation?orgId=1\npub(crate) static STORAGE_IO_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {\n    register_uint_gauge_vec!(\n        \"pageserver_io_operations_bytes_total\",\n        \"Total amount of bytes read/written in IO operations\",\n        &[\"operation\", \"tenant_id\", \"shard_id\", \"timeline_id\"]\n    )\n    .expect(\"failed to define a metric\")\n});\n\n#[derive(Clone, Debug)]\npub(crate) struct StorageIoSizeMetrics {\n    pub read: UIntGauge,\n    pub write: UIntGauge,\n}\n\nimpl StorageIoSizeMetrics {\n    pub(crate) fn new(tenant_id: &str, shard_id: &str, timeline_id: &str) -> Self {\n        let read = STORAGE_IO_SIZE\n            .get_metric_with_label_values(&[\n                StorageIoSizeOperation::Read.as_str(),\n                tenant_id,\n                shard_id,\n                timeline_id,\n            ])\n            .unwrap();\n        let write = STORAGE_IO_SIZE\n            .get_metric_with_label_values(&[\n                StorageIoSizeOperation::Write.as_str(),\n                tenant_id,\n                shard_id,\n                timeline_id,\n            ])\n            .unwrap();\n        Self { read, write }\n    }\n}\n\n#[cfg(not(test))]\npub(crate) mod virtual_file_descriptor_cache {\n    use super::*;\n\n    pub(crate) static SIZE_MAX: Lazy<UIntGauge> = Lazy::new(|| {\n        register_uint_gauge!(\n            \"pageserver_virtual_file_descriptor_cache_size_max\",\n            \"Maximum number of open file descriptors in the cache.\"\n        )\n        .unwrap()\n    });\n\n    // SIZE_CURRENT: derive it like so:\n    // ```\n    // sum (pageserver_io_operations_seconds_count{operation=~\"^(open|open-after-replace)$\")\n    // -ignoring(operation)\n    // sum(pageserver_io_operations_seconds_count{operation=~\"^(close|close-by-replace)$\"}\n    // ```\n}\n\n#[cfg(not(test))]\npub(crate) mod virtual_file_io_engine {\n    use super::*;\n\n    pub(crate) static KIND: Lazy<UIntGaugeVec> = Lazy::new(|| {\n        register_uint_gauge_vec!(\n            \"pageserver_virtual_file_io_engine_kind\",\n            \"The configured io engine for VirtualFile\",\n            &[\"kind\"],\n        )\n        .unwrap()\n    });\n}\n\npub(crate) struct SmgrOpTimer(Option<SmgrOpTimerInner>);\npub(crate) struct SmgrOpTimerInner {\n    global_execution_latency_histo: Histogram,\n    per_timeline_execution_latency_histo: Option<Histogram>,\n\n    global_batch_wait_time: Histogram,\n    per_timeline_batch_wait_time: Histogram,\n\n    global_flush_in_progress_micros: IntCounter,\n    per_timeline_flush_in_progress_micros: IntCounter,\n\n    throttling: Arc<tenant_throttling::Pagestream>,\n\n    timings: SmgrOpTimerState,\n}\n\n/// The stages of request processing are represented by the enum variants.\n/// Used as part of [`SmgrOpTimerInner::timings`].\n///\n/// Request processing calls into the `SmgrOpTimer::observe_*` methods at the\n/// transition points.\n/// These methods bump relevant counters and then update [`SmgrOpTimerInner::timings`]\n/// to the next state.\n///\n/// Each request goes through every stage, in all configurations.\n///\n#[derive(Debug)]\nenum SmgrOpTimerState {\n    Received {\n        // In the future, we may want to track the full time the request spent\n        // inside pageserver process (time spent in kernel buffers can't be tracked).\n        // `received_at` would be used for that.\n        #[allow(dead_code)]\n        received_at: Instant,\n    },\n    Throttling {\n        throttle_started_at: Instant,\n    },\n    Batching {\n        throttle_done_at: Instant,\n    },\n    Executing {\n        execution_started_at: Instant,\n    },\n    Flushing,\n    // NB: when adding observation points, remember to update the Drop impl.\n}\n\n// NB: when adding observation points, remember to update the Drop impl.\nimpl SmgrOpTimer {\n    /// See [`SmgrOpTimerState`] for more context.\n    pub(crate) fn observe_throttle_start(&mut self, at: Instant) {\n        let Some(inner) = self.0.as_mut() else {\n            return;\n        };\n        let SmgrOpTimerState::Received { received_at: _ } = &mut inner.timings else {\n            return;\n        };\n        inner.throttling.count_accounted_start.inc();\n        inner.timings = SmgrOpTimerState::Throttling {\n            throttle_started_at: at,\n        };\n    }\n\n    /// See [`SmgrOpTimerState`] for more context.\n    pub(crate) fn observe_throttle_done(&mut self, throttle: ThrottleResult) {\n        let Some(inner) = self.0.as_mut() else {\n            return;\n        };\n        let SmgrOpTimerState::Throttling {\n            throttle_started_at,\n        } = &inner.timings\n        else {\n            return;\n        };\n        inner.throttling.count_accounted_finish.inc();\n        match throttle {\n            ThrottleResult::NotThrottled { end } => {\n                inner.timings = SmgrOpTimerState::Batching {\n                    throttle_done_at: end,\n                };\n            }\n            ThrottleResult::Throttled { end } => {\n                // update metrics\n                inner.throttling.count_throttled.inc();\n                inner\n                    .throttling\n                    .wait_time\n                    .inc_by((end - *throttle_started_at).as_micros().try_into().unwrap());\n                // state transition\n                inner.timings = SmgrOpTimerState::Batching {\n                    throttle_done_at: end,\n                };\n            }\n        }\n    }\n\n    /// See [`SmgrOpTimerState`] for more context.\n    pub(crate) fn observe_execution_start(&mut self, at: Instant) {\n        let Some(inner) = self.0.as_mut() else {\n            return;\n        };\n        let SmgrOpTimerState::Batching { throttle_done_at } = &inner.timings else {\n            return;\n        };\n        // update metrics\n        let batch = at - *throttle_done_at;\n        inner.global_batch_wait_time.observe(batch.as_secs_f64());\n        inner\n            .per_timeline_batch_wait_time\n            .observe(batch.as_secs_f64());\n        // state transition\n        inner.timings = SmgrOpTimerState::Executing {\n            execution_started_at: at,\n        }\n    }\n\n    /// For all but the first caller, this is a no-op.\n    /// The first callers receives Some, subsequent ones None.\n    ///\n    /// See [`SmgrOpTimerState`] for more context.\n    pub(crate) fn observe_execution_end(&mut self, at: Instant) -> Option<SmgrOpFlushInProgress> {\n        // NB: unlike the other observe_* methods, this one take()s.\n        #[allow(clippy::question_mark)] // maintain similar code pattern.\n        let Some(mut inner) = self.0.take() else {\n            return None;\n        };\n        let SmgrOpTimerState::Executing {\n            execution_started_at,\n        } = &inner.timings\n        else {\n            return None;\n        };\n        // update metrics\n        let execution = at - *execution_started_at;\n        inner\n            .global_execution_latency_histo\n            .observe(execution.as_secs_f64());\n        if let Some(per_timeline_execution_latency_histo) =\n            &inner.per_timeline_execution_latency_histo\n        {\n            per_timeline_execution_latency_histo.observe(execution.as_secs_f64());\n        }\n\n        // state transition\n        inner.timings = SmgrOpTimerState::Flushing;\n\n        // return the flush in progress object which\n        // will do the remaining metrics updates\n        let SmgrOpTimerInner {\n            global_flush_in_progress_micros,\n            per_timeline_flush_in_progress_micros,\n            ..\n        } = inner;\n        Some(SmgrOpFlushInProgress {\n            global_micros: global_flush_in_progress_micros,\n            per_timeline_micros: per_timeline_flush_in_progress_micros,\n        })\n    }\n}\n\n/// The last stage of request processing is serializing and flushing the request\n/// into the TCP connection. We want to make slow flushes observable\n/// _while they are occuring_, so this struct provides a wrapper method [`Self::measure`]\n/// to periodically bump the metric.\n///\n/// If in the future we decide that we're not interested in live updates, we can\n/// add another `observe_*` method to [`SmgrOpTimer`], follow the existing pattern there,\n/// and remove this struct from the code base.\npub(crate) struct SmgrOpFlushInProgress {\n    global_micros: IntCounter,\n    per_timeline_micros: IntCounter,\n}\n\nimpl Drop for SmgrOpTimer {\n    fn drop(&mut self) {\n        // In case of early drop, update any of the remaining metrics with\n        // observations so that (started,finished) counter pairs balance out\n        // and all counters on the latency path have the the same number of\n        // observations.\n        // It's technically lying and it would be better if each metric had\n        // a separate label or similar for cancelled requests.\n        // But we don't have that right now and counter pairs balancing\n        // out is useful when using the metrics in panels and whatnot.\n        let now = Instant::now();\n        self.observe_throttle_start(now);\n        self.observe_throttle_done(ThrottleResult::NotThrottled { end: now });\n        self.observe_execution_start(now);\n        let maybe_flush_timer = self.observe_execution_end(now);\n        drop(maybe_flush_timer);\n    }\n}\n\nimpl SmgrOpFlushInProgress {\n    /// The caller must guarantee that `socket_fd`` outlives this function.\n    pub(crate) async fn measure<Fut, O>(self, started_at: Instant, fut: Fut, socket_fd: RawFd) -> O\n    where\n        Fut: std::future::Future<Output = O>,\n    {\n        let mut fut = std::pin::pin!(fut);\n\n        let mut logged = false;\n        let mut last_counter_increment_at = started_at;\n        let mut observe_guard = scopeguard::guard(\n            |is_timeout| {\n                let now = Instant::now();\n\n                // Increment counter\n                {\n                    let elapsed_since_last_observe = now - last_counter_increment_at;\n                    self.global_micros\n                        .inc_by(u64::try_from(elapsed_since_last_observe.as_micros()).unwrap());\n                    self.per_timeline_micros\n                        .inc_by(u64::try_from(elapsed_since_last_observe.as_micros()).unwrap());\n                    last_counter_increment_at = now;\n                }\n\n                // Log something on every timeout, and on completion but only if we hit a timeout.\n                if is_timeout || logged {\n                    logged = true;\n                    let elapsed_total = now - started_at;\n                    let msg = if is_timeout {\n                        \"slow flush ongoing\"\n                    } else {\n                        \"slow flush completed or cancelled\"\n                    };\n\n                    let (inq, outq) = {\n                        // SAFETY: caller guarantees that `socket_fd` outlives this function.\n                        #[cfg(target_os = \"linux\")]\n                        unsafe {\n                            (\n                                utils::linux_socket_ioctl::inq(socket_fd).unwrap_or(-2),\n                                utils::linux_socket_ioctl::outq(socket_fd).unwrap_or(-2),\n                            )\n                        }\n                        #[cfg(not(target_os = \"linux\"))]\n                        {\n                            _ = socket_fd; // appease unused lint on macOS\n                            (-1, -1)\n                        }\n                    };\n\n                    let elapsed_total_secs = format!(\"{:.6}\", elapsed_total.as_secs_f64());\n                    tracing::info!(elapsed_total_secs, inq, outq, msg);\n                }\n            },\n            |mut observe| {\n                observe(false);\n            },\n        );\n\n        loop {\n            match tokio::time::timeout(Duration::from_secs(10), &mut fut).await {\n                Ok(v) => return v,\n                Err(_timeout) => {\n                    (*observe_guard)(true);\n                }\n            }\n        }\n    }\n}\n\n#[derive(\n    Debug,\n    Clone,\n    Copy,\n    IntoStaticStr,\n    strum_macros::EnumCount,\n    strum_macros::EnumIter,\n    strum_macros::FromRepr,\n    enum_map::Enum,\n)]\n#[strum(serialize_all = \"snake_case\")]\npub enum SmgrQueryType {\n    GetRelExists,\n    GetRelSize,\n    GetPageAtLsn,\n    GetDbSize,\n    GetSlruSegment,\n    #[cfg(feature = \"testing\")]\n    Test,\n}\n\n#[derive(\n    Debug,\n    Clone,\n    Copy,\n    IntoStaticStr,\n    strum_macros::EnumCount,\n    strum_macros::EnumIter,\n    strum_macros::FromRepr,\n    enum_map::Enum,\n)]\n#[strum(serialize_all = \"snake_case\")]\npub enum GetPageBatchBreakReason {\n    BatchFull,\n    NonBatchableRequest,\n    NonUniformLsn,\n    SamePageAtDifferentLsn,\n    NonUniformTimeline,\n    ExecutorSteal,\n    #[cfg(feature = \"testing\")]\n    NonUniformKey,\n}\n\npub(crate) struct SmgrQueryTimePerTimeline {\n    global_started: [IntCounter; SmgrQueryType::COUNT],\n    global_latency: [Histogram; SmgrQueryType::COUNT],\n    per_timeline_getpage_started: IntCounter,\n    per_timeline_getpage_latency: Histogram,\n    global_batch_size: Histogram,\n    per_timeline_batch_size: Histogram,\n    global_flush_in_progress_micros: IntCounter,\n    per_timeline_flush_in_progress_micros: IntCounter,\n    global_batch_wait_time: Histogram,\n    per_timeline_batch_wait_time: Histogram,\n    global_batch_break_reason: [IntCounter; GetPageBatchBreakReason::COUNT],\n    per_timeline_batch_break_reason: GetPageBatchBreakReasonTimelineMetrics,\n    throttling: Arc<tenant_throttling::Pagestream>,\n}\n\nstatic SMGR_QUERY_STARTED_GLOBAL: Lazy<IntCounterVec> = Lazy::new(|| {\n    register_int_counter_vec!(\n        // it's a counter, but, name is prepared to extend it to a histogram of queue depth\n        \"pageserver_smgr_query_started_global_count\",\n        \"Number of smgr queries started, aggregated by query type.\",\n        &[\"smgr_query_type\"],\n    )\n    .expect(\"failed to define a metric\")\n});\n\nstatic SMGR_QUERY_STARTED_PER_TENANT_TIMELINE: Lazy<IntCounterVec> = Lazy::new(|| {\n    register_int_counter_vec!(\n        // it's a counter, but, name is prepared to extend it to a histogram of queue depth\n        \"pageserver_smgr_query_started_count\",\n        \"Number of smgr queries started, aggregated by query type and tenant/timeline.\",\n        &[\"smgr_query_type\", \"tenant_id\", \"shard_id\", \"timeline_id\"],\n    )\n    .expect(\"failed to define a metric\")\n});\n\n/// Per-timeline smgr histogram buckets should be the same as the compute buckets, such that the\n/// metrics are comparable across compute and Pageserver. See also:\n/// <https://github.com/neondatabase/neon/blob/1a87975d956a8ad17ec8b85da32a137ec4893fcc/pgxn/neon/neon_perf_counters.h#L18-L27>\n/// <https://github.com/neondatabase/flux-fleet/blob/556182a939edda87ff1d85a6b02e5cec901e0e9e/apps/base/compute-metrics/scrape-compute-sql-exporter.yaml#L29-L35>\nstatic SMGR_QUERY_TIME_PER_TENANT_TIMELINE_BUCKETS: &[f64] =\n    &[0.0006, 0.001, 0.003, 0.006, 0.01, 0.03, 0.1, 1.0, 3.0];\n\nstatic SMGR_QUERY_TIME_PER_TENANT_TIMELINE: Lazy<HistogramVec> = Lazy::new(|| {\n    register_histogram_vec!(\n        \"pageserver_smgr_query_seconds\",\n        \"Time spent _executing_ smgr query handling, excluding batch and throttle delays.\",\n        &[\"smgr_query_type\", \"tenant_id\", \"shard_id\", \"timeline_id\"],\n        SMGR_QUERY_TIME_PER_TENANT_TIMELINE_BUCKETS.into(),\n    )\n    .expect(\"failed to define a metric\")\n});\n\nstatic SMGR_QUERY_TIME_GLOBAL_BUCKETS: Lazy<Vec<f64>> = Lazy::new(|| {\n    [\n        1,\n        10,\n        20,\n        40,\n        60,\n        80,\n        100,\n        200,\n        300,\n        400,\n        500,\n        600,\n        700,\n        800,\n        900,\n        1_000, // 1ms\n        2_000,\n        4_000,\n        6_000,\n        8_000,\n        10_000, // 10ms\n        20_000,\n        40_000,\n        60_000,\n        80_000,\n        100_000,\n        200_000,\n        400_000,\n        600_000,\n        800_000,\n        1_000_000, // 1s\n        2_000_000,\n        4_000_000,\n        6_000_000,\n        8_000_000,\n        10_000_000, // 10s\n        20_000_000,\n        50_000_000,\n        100_000_000,\n        200_000_000,\n        1_000_000_000, // 1000s\n    ]\n    .into_iter()\n    .map(Duration::from_micros)\n    .map(|d| d.as_secs_f64())\n    .collect()\n});\n\nstatic SMGR_QUERY_TIME_GLOBAL: Lazy<HistogramVec> = Lazy::new(|| {\n    register_histogram_vec!(\n        \"pageserver_smgr_query_seconds_global\",\n        \"Like pageserver_smgr_query_seconds, but aggregated to instance level.\",\n        &[\"smgr_query_type\"],\n        SMGR_QUERY_TIME_GLOBAL_BUCKETS.clone(),\n    )\n    .expect(\"failed to define a metric\")\n});\n\nstatic PAGE_SERVICE_BATCH_SIZE_BUCKETS_GLOBAL: Lazy<Vec<f64>> = Lazy::new(|| {\n    (1..=u32::try_from(DEFAULT_MAX_GET_VECTORED_KEYS).unwrap())\n        .map(|v| v.into())\n        .collect()\n});\n\nstatic PAGE_SERVICE_BATCH_SIZE_GLOBAL: Lazy<Histogram> = Lazy::new(|| {\n    register_histogram!(\n        \"pageserver_page_service_batch_size_global\",\n        \"Batch size of pageserver page service requests\",\n        PAGE_SERVICE_BATCH_SIZE_BUCKETS_GLOBAL.clone(),\n    )\n    .expect(\"failed to define a metric\")\n});\n\nstatic PAGE_SERVICE_BATCH_SIZE_BUCKETS_PER_TIMELINE: Lazy<Vec<f64>> = Lazy::new(|| {\n    let mut buckets = Vec::new();\n    for i in 0.. {\n        let bucket = 1 << i;\n        if bucket > u32::try_from(DEFAULT_MAX_GET_VECTORED_KEYS).unwrap() {\n            break;\n        }\n        buckets.push(bucket.into());\n    }\n    buckets\n});\n\nstatic PAGE_SERVICE_BATCH_SIZE_PER_TENANT_TIMELINE: Lazy<HistogramVec> = Lazy::new(|| {\n    register_histogram_vec!(\n        \"pageserver_page_service_batch_size\",\n        \"Batch size of pageserver page service requests\",\n        &[\"tenant_id\", \"shard_id\", \"timeline_id\"],\n        PAGE_SERVICE_BATCH_SIZE_BUCKETS_PER_TIMELINE.clone()\n    )\n    .expect(\"failed to define a metric\")\n});\n\nstatic PAGE_SERVICE_BATCH_BREAK_REASON_GLOBAL: Lazy<IntCounterVec> = Lazy::new(|| {\n    register_int_counter_vec!(\n        // it's a counter, but, name is prepared to extend it to a histogram of queue depth\n        \"pageserver_page_service_batch_break_reason_global\",\n        \"Reason for breaking batches of get page requests\",\n        &[\"reason\"],\n    )\n    .expect(\"failed to define a metric\")\n});\n\nstruct GetPageBatchBreakReasonTimelineMetrics {\n    map: EnumMap<GetPageBatchBreakReason, IntCounter>,\n}\n\nimpl GetPageBatchBreakReasonTimelineMetrics {\n    fn new(tenant_id: &str, shard_slug: &str, timeline_id: &str) -> Self {\n        GetPageBatchBreakReasonTimelineMetrics {\n            map: EnumMap::from_array(std::array::from_fn(|reason_idx| {\n                let reason = GetPageBatchBreakReason::from_usize(reason_idx);\n                PAGE_SERVICE_BATCH_BREAK_REASON_PER_TENANT_TIMELINE.with_label_values(&[\n                    tenant_id,\n                    shard_slug,\n                    timeline_id,\n                    reason.into(),\n                ])\n            })),\n        }\n    }\n\n    fn inc(&self, reason: GetPageBatchBreakReason) {\n        self.map[reason].inc()\n    }\n}\n\nstatic PAGE_SERVICE_BATCH_BREAK_REASON_PER_TENANT_TIMELINE: Lazy<IntCounterVec> = Lazy::new(|| {\n    register_int_counter_vec!(\n        \"pageserver_page_service_batch_break_reason\",\n        \"Reason for breaking batches of get page requests\",\n        &[\"tenant_id\", \"shard_id\", \"timeline_id\", \"reason\"],\n    )\n    .expect(\"failed to define a metric\")\n});\n\npub(crate) static PAGE_SERVICE_CONFIG_MAX_BATCH_SIZE: Lazy<IntGaugeVec> = Lazy::new(|| {\n    register_int_gauge_vec!(\n        \"pageserver_page_service_config_max_batch_size\",\n        \"Configured maximum batch size for the server-side batching functionality of page_service. \\\n         Labels expose more of the configuration parameters.\",\n        &[\"mode\", \"execution\", \"batching\"]\n    )\n    .expect(\"failed to define a metric\")\n});\n\nfn set_page_service_config_max_batch_size(conf: &PageServicePipeliningConfig) {\n    PAGE_SERVICE_CONFIG_MAX_BATCH_SIZE.reset();\n    let (label_values, value) = match conf {\n        PageServicePipeliningConfig::Serial => ([\"serial\", \"-\", \"-\"], 1),\n        PageServicePipeliningConfig::Pipelined(PageServicePipeliningConfigPipelined {\n            max_batch_size,\n            execution,\n            batching,\n        }) => {\n            let mode = \"pipelined\";\n            let execution = match execution {\n                PageServiceProtocolPipelinedExecutionStrategy::ConcurrentFutures => {\n                    \"concurrent-futures\"\n                }\n                PageServiceProtocolPipelinedExecutionStrategy::Tasks => \"tasks\",\n            };\n            let batching = match batching {\n                PageServiceProtocolPipelinedBatchingStrategy::UniformLsn => \"uniform-lsn\",\n                PageServiceProtocolPipelinedBatchingStrategy::ScatteredLsn => \"scattered-lsn\",\n            };\n\n            ([mode, execution, batching], max_batch_size.get())\n        }\n    };\n    PAGE_SERVICE_CONFIG_MAX_BATCH_SIZE\n        .with_label_values(&label_values)\n        .set(value.try_into().unwrap());\n}\n\nstatic PAGE_SERVICE_SMGR_FLUSH_INPROGRESS_MICROS: Lazy<IntCounterVec> = Lazy::new(|| {\n    register_int_counter_vec!(\n        \"pageserver_page_service_pagestream_flush_in_progress_micros\",\n        \"Counter that sums up the microseconds that a pagestream response was being flushed into the TCP connection. \\\n         If the flush is particularly slow, this counter will be updated periodically to make slow flushes \\\n         easily discoverable in monitoring. \\\n         Hence, this is NOT a completion latency historgram.\",\n        &[\"tenant_id\", \"shard_id\", \"timeline_id\"],\n    )\n    .expect(\"failed to define a metric\")\n});\n\nstatic PAGE_SERVICE_SMGR_FLUSH_INPROGRESS_MICROS_GLOBAL: Lazy<IntCounter> = Lazy::new(|| {\n    register_int_counter!(\n        \"pageserver_page_service_pagestream_flush_in_progress_micros_global\",\n        \"Like pageserver_page_service_pagestream_flush_in_progress_seconds, but instance-wide.\",\n    )\n    .expect(\"failed to define a metric\")\n});\n\nstatic PAGE_SERVICE_SMGR_BATCH_WAIT_TIME: Lazy<HistogramVec> = Lazy::new(|| {\n    register_histogram_vec!(\n        \"pageserver_page_service_pagestream_batch_wait_time_seconds\",\n        \"Time a request spent waiting in its batch until the batch moved to throttle&execution.\",\n        &[\"tenant_id\", \"shard_id\", \"timeline_id\"],\n        SMGR_QUERY_TIME_PER_TENANT_TIMELINE_BUCKETS.into(),\n    )\n    .expect(\"failed to define a metric\")\n});\n\nstatic PAGE_SERVICE_SMGR_BATCH_WAIT_TIME_GLOBAL: Lazy<Histogram> = Lazy::new(|| {\n    register_histogram!(\n        \"pageserver_page_service_pagestream_batch_wait_time_seconds_global\",\n        \"Like pageserver_page_service_pagestream_batch_wait_time_seconds, but aggregated to instance level.\",\n        SMGR_QUERY_TIME_GLOBAL_BUCKETS.to_vec(),\n    )\n    .expect(\"failed to define a metric\")\n});\n\nimpl SmgrQueryTimePerTimeline {\n    pub(crate) fn new(\n        tenant_shard_id: &TenantShardId,\n        timeline_id: &TimelineId,\n        pagestream_throttle_metrics: Arc<tenant_throttling::Pagestream>,\n    ) -> Self {\n        let tenant_id = tenant_shard_id.tenant_id.to_string();\n        let shard_slug = format!(\"{}\", tenant_shard_id.shard_slug());\n        let timeline_id = timeline_id.to_string();\n        let global_started = std::array::from_fn(|i| {\n            let op = SmgrQueryType::from_repr(i).unwrap();\n            SMGR_QUERY_STARTED_GLOBAL\n                .get_metric_with_label_values(&[op.into()])\n                .unwrap()\n        });\n        let global_latency = std::array::from_fn(|i| {\n            let op = SmgrQueryType::from_repr(i).unwrap();\n            SMGR_QUERY_TIME_GLOBAL\n                .get_metric_with_label_values(&[op.into()])\n                .unwrap()\n        });\n\n        let per_timeline_getpage_started = SMGR_QUERY_STARTED_PER_TENANT_TIMELINE\n            .get_metric_with_label_values(&[\n                SmgrQueryType::GetPageAtLsn.into(),\n                &tenant_id,\n                &shard_slug,\n                &timeline_id,\n            ])\n            .unwrap();\n        let per_timeline_getpage_latency = SMGR_QUERY_TIME_PER_TENANT_TIMELINE\n            .get_metric_with_label_values(&[\n                SmgrQueryType::GetPageAtLsn.into(),\n                &tenant_id,\n                &shard_slug,\n                &timeline_id,\n            ])\n            .unwrap();\n\n        let global_batch_size = PAGE_SERVICE_BATCH_SIZE_GLOBAL.clone();\n        let per_timeline_batch_size = PAGE_SERVICE_BATCH_SIZE_PER_TENANT_TIMELINE\n            .get_metric_with_label_values(&[&tenant_id, &shard_slug, &timeline_id])\n            .unwrap();\n\n        let global_batch_wait_time = PAGE_SERVICE_SMGR_BATCH_WAIT_TIME_GLOBAL.clone();\n        let per_timeline_batch_wait_time = PAGE_SERVICE_SMGR_BATCH_WAIT_TIME\n            .get_metric_with_label_values(&[&tenant_id, &shard_slug, &timeline_id])\n            .unwrap();\n\n        let global_batch_break_reason = std::array::from_fn(|i| {\n            let reason = GetPageBatchBreakReason::from_usize(i);\n            PAGE_SERVICE_BATCH_BREAK_REASON_GLOBAL\n                .get_metric_with_label_values(&[reason.into()])\n                .unwrap()\n        });\n        let per_timeline_batch_break_reason =\n            GetPageBatchBreakReasonTimelineMetrics::new(&tenant_id, &shard_slug, &timeline_id);\n\n        let global_flush_in_progress_micros =\n            PAGE_SERVICE_SMGR_FLUSH_INPROGRESS_MICROS_GLOBAL.clone();\n        let per_timeline_flush_in_progress_micros = PAGE_SERVICE_SMGR_FLUSH_INPROGRESS_MICROS\n            .get_metric_with_label_values(&[&tenant_id, &shard_slug, &timeline_id])\n            .unwrap();\n\n        Self {\n            global_started,\n            global_latency,\n            per_timeline_getpage_latency,\n            per_timeline_getpage_started,\n            global_batch_size,\n            per_timeline_batch_size,\n            global_flush_in_progress_micros,\n            per_timeline_flush_in_progress_micros,\n            global_batch_wait_time,\n            per_timeline_batch_wait_time,\n            global_batch_break_reason,\n            per_timeline_batch_break_reason,\n            throttling: pagestream_throttle_metrics,\n        }\n    }\n    pub(crate) fn start_smgr_op(&self, op: SmgrQueryType, received_at: Instant) -> SmgrOpTimer {\n        self.global_started[op as usize].inc();\n\n        let per_timeline_latency_histo = if matches!(op, SmgrQueryType::GetPageAtLsn) {\n            self.per_timeline_getpage_started.inc();\n            Some(self.per_timeline_getpage_latency.clone())\n        } else {\n            None\n        };\n\n        SmgrOpTimer(Some(SmgrOpTimerInner {\n            global_execution_latency_histo: self.global_latency[op as usize].clone(),\n            per_timeline_execution_latency_histo: per_timeline_latency_histo,\n            global_flush_in_progress_micros: self.global_flush_in_progress_micros.clone(),\n            per_timeline_flush_in_progress_micros: self\n                .per_timeline_flush_in_progress_micros\n                .clone(),\n            global_batch_wait_time: self.global_batch_wait_time.clone(),\n            per_timeline_batch_wait_time: self.per_timeline_batch_wait_time.clone(),\n            throttling: self.throttling.clone(),\n            timings: SmgrOpTimerState::Received { received_at },\n        }))\n    }\n\n    /// TODO: do something about this? seems odd, we have a similar call on SmgrOpTimer\n    pub(crate) fn observe_getpage_batch_start(\n        &self,\n        batch_size: usize,\n        break_reason: GetPageBatchBreakReason,\n    ) {\n        self.global_batch_size.observe(batch_size as f64);\n        self.per_timeline_batch_size.observe(batch_size as f64);\n\n        self.global_batch_break_reason[break_reason.into_usize()].inc();\n        self.per_timeline_batch_break_reason.inc(break_reason);\n    }\n}\n\n// keep in sync with control plane Go code so that we can validate\n// compute's basebackup_ms metric with our perspective in the context of SLI/SLO.\nstatic COMPUTE_STARTUP_BUCKETS: Lazy<[f64; 28]> = Lazy::new(|| {\n    // Go code uses milliseconds. Variable is called `computeStartupBuckets`\n    [\n        5, 10, 20, 30, 50, 70, 100, 120, 150, 200, 250, 300, 350, 400, 450, 500, 600, 800, 1000,\n        1500, 2000, 2500, 3000, 5000, 10000, 20000, 40000, 60000,\n    ]\n    .map(|ms| (ms as f64) / 1000.0)\n});\n\npub(crate) struct BasebackupQueryTime {\n    ok: Histogram,\n    error: Histogram,\n    client_error: Histogram,\n}\n\npub(crate) static BASEBACKUP_QUERY_TIME: Lazy<BasebackupQueryTime> = Lazy::new(|| {\n    let vec = register_histogram_vec!(\n        \"pageserver_basebackup_query_seconds\",\n        \"Histogram of basebackup queries durations, by result type\",\n        &[\"result\"],\n        COMPUTE_STARTUP_BUCKETS.to_vec(),\n    )\n    .expect(\"failed to define a metric\");\n    BasebackupQueryTime {\n        ok: vec.get_metric_with_label_values(&[\"ok\"]).unwrap(),\n        error: vec.get_metric_with_label_values(&[\"error\"]).unwrap(),\n        client_error: vec.get_metric_with_label_values(&[\"client_error\"]).unwrap(),\n    }\n});\n\npub(crate) struct BasebackupQueryTimeOngoingRecording<'a> {\n    parent: &'a BasebackupQueryTime,\n    start: std::time::Instant,\n}\n\nimpl BasebackupQueryTime {\n    pub(crate) fn start_recording(&self) -> BasebackupQueryTimeOngoingRecording<'_> {\n        let start = Instant::now();\n        BasebackupQueryTimeOngoingRecording {\n            parent: self,\n            start,\n        }\n    }\n}\n\nimpl BasebackupQueryTimeOngoingRecording<'_> {\n    pub(crate) fn observe<T>(self, res: &Result<T, QueryError>) {\n        let elapsed = self.start.elapsed().as_secs_f64();\n        // If you want to change categorize of a specific error, also change it in `log_query_error`.\n        let metric = match res {\n            Ok(_) => &self.parent.ok,\n            Err(QueryError::Shutdown) | Err(QueryError::Reconnect) => {\n                // Do not observe ok/err for shutdown/reconnect.\n                // Reconnect error might be raised when the operation is waiting for LSN and the tenant shutdown interrupts\n                // the operation. A reconnect error will be issued and the client will retry.\n                return;\n            }\n            Err(QueryError::Disconnected(ConnectionError::Io(io_error)))\n                if is_expected_io_error(io_error) =>\n            {\n                &self.parent.client_error\n            }\n            Err(_) => &self.parent.error,\n        };\n        metric.observe(elapsed);\n    }\n}\n\npub(crate) static LIVE_CONNECTIONS: Lazy<IntCounterPairVec> = Lazy::new(|| {\n    register_int_counter_pair_vec!(\n        \"pageserver_live_connections_started\",\n        \"Number of network connections that we started handling\",\n        \"pageserver_live_connections_finished\",\n        \"Number of network connections that we finished handling\",\n        &[\"pageserver_connection_kind\"]\n    )\n    .expect(\"failed to define a metric\")\n});\n\n#[derive(Clone, Copy, enum_map::Enum, IntoStaticStr)]\npub(crate) enum ComputeCommandKind {\n    PageStreamV3,\n    PageStreamV2,\n    Basebackup,\n    Fullbackup,\n    LeaseLsn,\n}\n\npub(crate) struct ComputeCommandCounters {\n    map: EnumMap<ComputeCommandKind, IntCounter>,\n}\n\npub(crate) static COMPUTE_COMMANDS_COUNTERS: Lazy<ComputeCommandCounters> = Lazy::new(|| {\n    let inner = register_int_counter_vec!(\n        \"pageserver_compute_commands\",\n        \"Number of compute -> pageserver commands processed\",\n        &[\"command\"]\n    )\n    .expect(\"failed to define a metric\");\n\n    ComputeCommandCounters {\n        map: EnumMap::from_array(std::array::from_fn(|i| {\n            let command = ComputeCommandKind::from_usize(i);\n            let command_str: &'static str = command.into();\n            inner.with_label_values(&[command_str])\n        })),\n    }\n});\n\nimpl ComputeCommandCounters {\n    pub(crate) fn for_command(&self, command: ComputeCommandKind) -> &IntCounter {\n        &self.map[command]\n    }\n}\n\n// remote storage metrics\n\nstatic REMOTE_TIMELINE_CLIENT_CALLS: Lazy<IntCounterPairVec> = Lazy::new(|| {\n    register_int_counter_pair_vec!(\n        \"pageserver_remote_timeline_client_calls_started\",\n        \"Number of started calls to remote timeline client.\",\n        \"pageserver_remote_timeline_client_calls_finished\",\n        \"Number of finshed calls to remote timeline client.\",\n        &[\n            \"tenant_id\",\n            \"shard_id\",\n            \"timeline_id\",\n            \"file_kind\",\n            \"op_kind\"\n        ],\n    )\n    .unwrap()\n});\n\nstatic REMOTE_TIMELINE_CLIENT_BYTES_STARTED_COUNTER: Lazy<IntCounterVec> =\n    Lazy::new(|| {\n        register_int_counter_vec!(\n        \"pageserver_remote_timeline_client_bytes_started\",\n        \"Incremented by the number of bytes associated with a remote timeline client operation. \\\n         The increment happens when the operation is scheduled.\",\n        &[\"tenant_id\", \"shard_id\", \"timeline_id\", \"file_kind\", \"op_kind\"],\n    )\n        .expect(\"failed to define a metric\")\n    });\n\nstatic REMOTE_TIMELINE_CLIENT_BYTES_FINISHED_COUNTER: Lazy<IntCounterVec> = Lazy::new(|| {\n    register_int_counter_vec!(\n        \"pageserver_remote_timeline_client_bytes_finished\",\n        \"Incremented by the number of bytes associated with a remote timeline client operation. \\\n         The increment happens when the operation finishes (regardless of success/failure/shutdown).\",\n        &[\"tenant_id\", \"shard_id\", \"timeline_id\", \"file_kind\", \"op_kind\"],\n    )\n    .expect(\"failed to define a metric\")\n});\n\npub(crate) struct TenantManagerMetrics {\n    tenant_slots_attached: UIntGauge,\n    tenant_slots_secondary: UIntGauge,\n    tenant_slots_inprogress: UIntGauge,\n    pub(crate) tenant_slot_writes: IntCounter,\n    pub(crate) unexpected_errors: IntCounter,\n}\n\nimpl TenantManagerMetrics {\n    /// Helpers for tracking slots.  Note that these do not track the lifetime of TenantSlot objects\n    /// exactly: they track the lifetime of the slots _in the tenant map_.\n    pub(crate) fn slot_inserted(&self, slot: &TenantSlot) {\n        match slot {\n            TenantSlot::Attached(_) => {\n                self.tenant_slots_attached.inc();\n            }\n            TenantSlot::Secondary(_) => {\n                self.tenant_slots_secondary.inc();\n            }\n            TenantSlot::InProgress(_) => {\n                self.tenant_slots_inprogress.inc();\n            }\n        }\n    }\n\n    pub(crate) fn slot_removed(&self, slot: &TenantSlot) {\n        match slot {\n            TenantSlot::Attached(_) => {\n                self.tenant_slots_attached.dec();\n            }\n            TenantSlot::Secondary(_) => {\n                self.tenant_slots_secondary.dec();\n            }\n            TenantSlot::InProgress(_) => {\n                self.tenant_slots_inprogress.dec();\n            }\n        }\n    }\n\n    #[cfg(all(debug_assertions, not(test)))]\n    pub(crate) fn slots_total(&self) -> u64 {\n        self.tenant_slots_attached.get()\n            + self.tenant_slots_secondary.get()\n            + self.tenant_slots_inprogress.get()\n    }\n}\n\npub(crate) static TENANT_MANAGER: Lazy<TenantManagerMetrics> = Lazy::new(|| {\n    let tenant_slots = register_uint_gauge_vec!(\n        \"pageserver_tenant_manager_slots\",\n        \"How many slots currently exist, including all attached, secondary and in-progress operations\",\n        &[\"mode\"]\n    )\n    .expect(\"failed to define a metric\");\n    TenantManagerMetrics {\n        tenant_slots_attached: tenant_slots\n            .get_metric_with_label_values(&[\"attached\"])\n            .unwrap(),\n        tenant_slots_secondary: tenant_slots\n            .get_metric_with_label_values(&[\"secondary\"])\n            .unwrap(),\n        tenant_slots_inprogress: tenant_slots\n            .get_metric_with_label_values(&[\"inprogress\"])\n            .unwrap(),\n        tenant_slot_writes: register_int_counter!(\n            \"pageserver_tenant_manager_slot_writes\",\n            \"Writes to a tenant slot, including all of create/attach/detach/delete\"\n        )\n        .expect(\"failed to define a metric\"),\n        unexpected_errors: register_int_counter!(\n            \"pageserver_tenant_manager_unexpected_errors_total\",\n            \"Number of unexpected conditions encountered: nonzero value indicates a non-fatal bug.\"\n        )\n        .expect(\"failed to define a metric\"),\n    }\n});\n\npub(crate) struct DeletionQueueMetrics {\n    pub(crate) keys_submitted: IntCounter,\n    pub(crate) keys_dropped: IntCounter,\n    pub(crate) keys_executed: IntCounter,\n    pub(crate) keys_validated: IntCounter,\n    pub(crate) dropped_lsn_updates: IntCounter,\n    pub(crate) unexpected_errors: IntCounter,\n    pub(crate) remote_errors: IntCounterVec,\n}\npub(crate) static DELETION_QUEUE: Lazy<DeletionQueueMetrics> = Lazy::new(|| {\n    DeletionQueueMetrics{\n\n    keys_submitted: register_int_counter!(\n        \"pageserver_deletion_queue_submitted_total\",\n        \"Number of objects submitted for deletion\"\n    )\n    .expect(\"failed to define a metric\"),\n\n    keys_dropped: register_int_counter!(\n        \"pageserver_deletion_queue_dropped_total\",\n        \"Number of object deletions dropped due to stale generation.\"\n    )\n    .expect(\"failed to define a metric\"),\n\n    keys_executed: register_int_counter!(\n        \"pageserver_deletion_queue_executed_total\",\n        \"Number of objects deleted. Only includes objects that we actually deleted, sum with pageserver_deletion_queue_dropped_total for the total number of keys processed to completion\"\n    )\n    .expect(\"failed to define a metric\"),\n\n    keys_validated: register_int_counter!(\n        \"pageserver_deletion_queue_validated_total\",\n        \"Number of keys validated for deletion.  Sum with pageserver_deletion_queue_dropped_total for the total number of keys that have passed through the validation stage.\"\n    )\n    .expect(\"failed to define a metric\"),\n\n    dropped_lsn_updates: register_int_counter!(\n        \"pageserver_deletion_queue_dropped_lsn_updates_total\",\n        \"Updates to remote_consistent_lsn dropped due to stale generation number.\"\n    )\n    .expect(\"failed to define a metric\"),\n    unexpected_errors: register_int_counter!(\n        \"pageserver_deletion_queue_unexpected_errors_total\",\n        \"Number of unexpected condiions that may stall the queue: any value above zero is unexpected.\"\n    )\n    .expect(\"failed to define a metric\"),\n    remote_errors: register_int_counter_vec!(\n        \"pageserver_deletion_queue_remote_errors_total\",\n        \"Retryable remote I/O errors while executing deletions, for example 503 responses to DeleteObjects\",\n        &[\"op_kind\"],\n    )\n    .expect(\"failed to define a metric\")\n}\n});\n\npub(crate) struct SecondaryModeMetrics {\n    pub(crate) upload_heatmap: IntCounter,\n    pub(crate) upload_heatmap_errors: IntCounter,\n    pub(crate) upload_heatmap_duration: Histogram,\n    pub(crate) download_heatmap: IntCounter,\n    pub(crate) download_layer: IntCounter,\n}\npub(crate) static SECONDARY_MODE: Lazy<SecondaryModeMetrics> = Lazy::new(|| {\n    SecondaryModeMetrics {\n    upload_heatmap: register_int_counter!(\n        \"pageserver_secondary_upload_heatmap\",\n        \"Number of heatmaps written to remote storage by attached tenants\"\n    )\n    .expect(\"failed to define a metric\"),\n    upload_heatmap_errors: register_int_counter!(\n        \"pageserver_secondary_upload_heatmap_errors\",\n        \"Failures writing heatmap to remote storage\"\n    )\n    .expect(\"failed to define a metric\"),\n    upload_heatmap_duration: register_histogram!(\n        \"pageserver_secondary_upload_heatmap_duration\",\n        \"Time to build and upload a heatmap, including any waiting inside the remote storage client\"\n    )\n    .expect(\"failed to define a metric\"),\n    download_heatmap: register_int_counter!(\n        \"pageserver_secondary_download_heatmap\",\n        \"Number of downloads of heatmaps by secondary mode locations, including when it hasn't changed\"\n    )\n    .expect(\"failed to define a metric\"),\n    download_layer: register_int_counter!(\n        \"pageserver_secondary_download_layer\",\n        \"Number of downloads of layers by secondary mode locations\"\n    )\n    .expect(\"failed to define a metric\"),\n}\n});\n\npub(crate) static SECONDARY_RESIDENT_PHYSICAL_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {\n    register_uint_gauge_vec!(\n        \"pageserver_secondary_resident_physical_size\",\n        \"The size of the layer files present in the pageserver's filesystem, for secondary locations.\",\n        &[\"tenant_id\", \"shard_id\"]\n    )\n    .expect(\"failed to define a metric\")\n});\n\npub(crate) static NODE_UTILIZATION_SCORE: Lazy<UIntGauge> = Lazy::new(|| {\n    register_uint_gauge!(\n        \"pageserver_utilization_score\",\n        \"The utilization score we report to the storage controller for scheduling, where 0 is empty, 1000000 is full, and anything above is considered overloaded\",\n    )\n    .expect(\"failed to define a metric\")\n});\n\npub(crate) static SECONDARY_HEATMAP_TOTAL_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {\n    register_uint_gauge_vec!(\n        \"pageserver_secondary_heatmap_total_size\",\n        \"The total size in bytes of all layers in the most recently downloaded heatmap.\",\n        &[\"tenant_id\", \"shard_id\"]\n    )\n    .expect(\"failed to define a metric\")\n});\n\n#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]\npub enum RemoteOpKind {\n    Upload,\n    Download,\n    Delete,\n}\nimpl RemoteOpKind {\n    pub fn as_str(&self) -> &'static str {\n        match self {\n            Self::Upload => \"upload\",\n            Self::Download => \"download\",\n            Self::Delete => \"delete\",\n        }\n    }\n}\n\n#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)]\npub enum RemoteOpFileKind {\n    Layer,\n    Index,\n}\nimpl RemoteOpFileKind {\n    pub fn as_str(&self) -> &'static str {\n        match self {\n            Self::Layer => \"layer\",\n            Self::Index => \"index\",\n        }\n    }\n}\n\npub(crate) static REMOTE_TIMELINE_CLIENT_COMPLETION_LATENCY: Lazy<HistogramVec> = Lazy::new(|| {\n    register_histogram_vec!(\n        \"pageserver_remote_timeline_client_seconds_global\",\n        \"Time spent on remote timeline client operations. \\\n        Grouped by task_kind, file_kind, operation_kind and status. \\\n        The task_kind is \\\n          - for layer downloads, populated from RequestContext (primary objective of having the label) \\\n          - for index downloads, set to 'unknown' \\\n          - for any upload operation, set to 'RemoteUploadTask' \\\n        This keeps dimensionality at bay. \\\n        Does not account for time spent waiting in remote timeline client's queues.\",\n        &[\"task_kind\", \"file_kind\", \"op_kind\", \"status\"]\n    )\n    .expect(\"failed to define a metric\")\n});\n\npub(crate) static TENANT_TASK_EVENTS: Lazy<IntCounterVec> = Lazy::new(|| {\n    register_int_counter_vec!(\n        \"pageserver_tenant_task_events\",\n        \"Number of task start/stop/fail events.\",\n        &[\"event\"],\n    )\n    .expect(\"Failed to register tenant_task_events metric\")\n});\n\npub struct BackgroundLoopSemaphoreMetrics {\n    counters: EnumMap<BackgroundLoopKind, IntCounterPair>,\n    durations: EnumMap<BackgroundLoopKind, Histogram>,\n    waiting_tasks: EnumMap<BackgroundLoopKind, IntGauge>,\n    running_tasks: EnumMap<BackgroundLoopKind, IntGauge>,\n}\n\npub(crate) static BACKGROUND_LOOP_SEMAPHORE: Lazy<BackgroundLoopSemaphoreMetrics> =\n    Lazy::new(|| {\n        let counters = register_int_counter_pair_vec!(\n            \"pageserver_background_loop_semaphore_wait_start_count\",\n            \"Counter for background loop concurrency-limiting semaphore acquire calls started\",\n            \"pageserver_background_loop_semaphore_wait_finish_count\",\n            \"Counter for background loop concurrency-limiting semaphore acquire calls finished\",\n            &[\"task\"],\n        )\n        .unwrap();\n\n        let durations = register_histogram_vec!(\n            \"pageserver_background_loop_semaphore_wait_seconds\",\n            \"Seconds spent waiting on background loop semaphore acquisition\",\n            &[\"task\"],\n            vec![0.01, 1.0, 5.0, 10.0, 30.0, 60.0, 180.0, 300.0, 600.0],\n        )\n        .unwrap();\n\n        let waiting_tasks = register_int_gauge_vec!(\n            \"pageserver_background_loop_semaphore_waiting_tasks\",\n            \"Number of background loop tasks waiting for semaphore\",\n            &[\"task\"],\n        )\n        .unwrap();\n\n        let running_tasks = register_int_gauge_vec!(\n            \"pageserver_background_loop_semaphore_running_tasks\",\n            \"Number of background loop tasks running concurrently\",\n            &[\"task\"],\n        )\n        .unwrap();\n\n        BackgroundLoopSemaphoreMetrics {\n            counters: EnumMap::from_array(std::array::from_fn(|i| {\n                let kind = BackgroundLoopKind::from_usize(i);\n                counters.with_label_values(&[kind.into()])\n            })),\n            durations: EnumMap::from_array(std::array::from_fn(|i| {\n                let kind = BackgroundLoopKind::from_usize(i);\n                durations.with_label_values(&[kind.into()])\n            })),\n            waiting_tasks: EnumMap::from_array(std::array::from_fn(|i| {\n                let kind = BackgroundLoopKind::from_usize(i);\n                waiting_tasks.with_label_values(&[kind.into()])\n            })),\n            running_tasks: EnumMap::from_array(std::array::from_fn(|i| {\n                let kind = BackgroundLoopKind::from_usize(i);\n                running_tasks.with_label_values(&[kind.into()])\n            })),\n        }\n    });\n\nimpl BackgroundLoopSemaphoreMetrics {\n    /// Starts recording semaphore metrics. Call `acquired()` on the returned recorder when the\n    /// semaphore is acquired, and drop it when the task completes or is cancelled.\n    pub(crate) fn record(\n        &self,\n        task: BackgroundLoopKind,\n    ) -> BackgroundLoopSemaphoreMetricsRecorder {\n        BackgroundLoopSemaphoreMetricsRecorder::start(self, task)\n    }\n}\n\n/// Records metrics for a background task.\npub struct BackgroundLoopSemaphoreMetricsRecorder<'a> {\n    metrics: &'a BackgroundLoopSemaphoreMetrics,\n    task: BackgroundLoopKind,\n    start: Instant,\n    wait_counter_guard: Option<metrics::IntCounterPairGuard>,\n}\n\nimpl<'a> BackgroundLoopSemaphoreMetricsRecorder<'a> {\n    /// Starts recording semaphore metrics, by recording wait time and incrementing\n    /// `wait_start_count` and `waiting_tasks`.\n    fn start(metrics: &'a BackgroundLoopSemaphoreMetrics, task: BackgroundLoopKind) -> Self {\n        metrics.waiting_tasks[task].inc();\n        Self {\n            metrics,\n            task,\n            start: Instant::now(),\n            wait_counter_guard: Some(metrics.counters[task].guard()),\n        }\n    }\n\n    /// Signals that the semaphore has been acquired, and updates relevant metrics.\n    pub fn acquired(&mut self) -> Duration {\n        let waited = self.start.elapsed();\n        self.wait_counter_guard.take().expect(\"already acquired\");\n        self.metrics.durations[self.task].observe(waited.as_secs_f64());\n        self.metrics.waiting_tasks[self.task].dec();\n        self.metrics.running_tasks[self.task].inc();\n        waited\n    }\n}\n\nimpl Drop for BackgroundLoopSemaphoreMetricsRecorder<'_> {\n    /// The task either completed or was cancelled.\n    fn drop(&mut self) {\n        if self.wait_counter_guard.take().is_some() {\n            // Waiting.\n            self.metrics.durations[self.task].observe(self.start.elapsed().as_secs_f64());\n            self.metrics.waiting_tasks[self.task].dec();\n        } else {\n            // Running.\n            self.metrics.running_tasks[self.task].dec();\n        }\n    }\n}\n\npub(crate) static BACKGROUND_LOOP_PERIOD_OVERRUN_COUNT: Lazy<IntCounterVec> = Lazy::new(|| {\n    register_int_counter_vec!(\n        \"pageserver_background_loop_period_overrun_count\",\n        \"Incremented whenever warn_when_period_overrun() logs a warning.\",\n        &[\"task\", \"period\"],\n    )\n    .expect(\"failed to define a metric\")\n});\n\n// walreceiver metrics\n\npub(crate) static WALRECEIVER_STARTED_CONNECTIONS: Lazy<IntCounter> = Lazy::new(|| {\n    register_int_counter!(\n        \"pageserver_walreceiver_started_connections_total\",\n        \"Number of started walreceiver connections\"\n    )\n    .expect(\"failed to define a metric\")\n});\n\npub(crate) static WALRECEIVER_ACTIVE_MANAGERS: Lazy<IntGauge> = Lazy::new(|| {\n    register_int_gauge!(\n        \"pageserver_walreceiver_active_managers\",\n        \"Number of active walreceiver managers\"\n    )\n    .expect(\"failed to define a metric\")\n});\n\npub(crate) static WALRECEIVER_SWITCHES: Lazy<IntCounterVec> = Lazy::new(|| {\n    register_int_counter_vec!(\n        \"pageserver_walreceiver_switches_total\",\n        \"Number of walreceiver manager change_connection calls\",\n        &[\"reason\"]\n    )\n    .expect(\"failed to define a metric\")\n});\n\npub(crate) static WALRECEIVER_BROKER_UPDATES: Lazy<IntCounter> = Lazy::new(|| {\n    register_int_counter!(\n        \"pageserver_walreceiver_broker_updates_total\",\n        \"Number of received broker updates in walreceiver\"\n    )\n    .expect(\"failed to define a metric\")\n});\n\npub(crate) static WALRECEIVER_CANDIDATES_EVENTS: Lazy<IntCounterVec> = Lazy::new(|| {\n    register_int_counter_vec!(\n        \"pageserver_walreceiver_candidates_events_total\",\n        \"Number of walreceiver candidate events\",\n        &[\"event\"]\n    )\n    .expect(\"failed to define a metric\")\n});\n\npub(crate) static WALRECEIVER_CANDIDATES_ADDED: Lazy<IntCounter> =\n    Lazy::new(|| WALRECEIVER_CANDIDATES_EVENTS.with_label_values(&[\"add\"]));\n\npub(crate) static WALRECEIVER_CANDIDATES_REMOVED: Lazy<IntCounter> =\n    Lazy::new(|| WALRECEIVER_CANDIDATES_EVENTS.with_label_values(&[\"remove\"]));\n\npub(crate) static LOCAL_DATA_LOSS_SUSPECTED: Lazy<IntGauge> = Lazy::new(|| {\n    register_int_gauge!(\n        \"pageserver_local_data_loss_suspected\",\n        \"Non-zero value indicates that pageserver local data loss is suspected (and highly likely).\"\n    )\n    .expect(\"failed to define a metric\")\n});\n\n// Counter keeping track of misrouted PageStream requests. Spelling out PageStream requests here to distinguish\n// it from other types of reqeusts (SK wal replication, http requests, etc.). PageStream requests are used by\n// Postgres compute to fetch data from pageservers.\n// A misrouted PageStream request is registered if the pageserver cannot find the tenant identified in the\n// request, or if the pageserver is not the \"primary\" serving the tenant shard. These error almost always identify\n// issues with compute configuration, caused by either the compute node itself being stuck in the wrong\n// configuration or Storage Controller reconciliation bugs. Misrouted requests are expected during tenant migration\n// and/or during recovery following a pageserver failure, but persistently high rates of misrouted requests\n// are indicative of bugs (and unavailability).\npub(crate) static MISROUTED_PAGESTREAM_REQUESTS: Lazy<IntCounter> = Lazy::new(|| {\n    register_int_counter!(\n        \"pageserver_misrouted_pagestream_requests_total\",\n        \"Number of pageserver pagestream requests that were routed to the wrong pageserver\"\n    )\n    .expect(\"failed to define a metric\")\n});\n\n// Global counter for PageStream request results by outcome. Outcomes are divided into 3 categories:\n// - success\n// - internal_error: errors that indicate bugs in the storage cluster (e.g. page reconstruction errors, misrouted requests, LSN timeout errors)\n// - other_error: transient error conditions that are expected in normal operation or indicate bugs with other parts of the system (e.g. error due to pageserver shutdown, malformed requests etc.)\npub(crate) static PAGESTREAM_HANDLER_RESULTS_TOTAL: Lazy<IntCounterVec> = Lazy::new(|| {\n    register_int_counter_vec!(\n        \"pageserver_pagestream_handler_results_total\",\n        \"Number of pageserver pagestream handler results by outcome (success, internal_error, other_error)\",\n        &[\"outcome\"]\n    )\n    .expect(\"failed to define a metric\")\n});\n\n// Constants for pageserver_pagestream_handler_results_total's outcome labels\npub(crate) const PAGESTREAM_HANDLER_OUTCOME_SUCCESS: &str = \"success\";\npub(crate) const PAGESTREAM_HANDLER_OUTCOME_INTERNAL_ERROR: &str = \"internal_error\";\npub(crate) const PAGESTREAM_HANDLER_OUTCOME_OTHER_ERROR: &str = \"other_error\";\n\n// Metrics collected on WAL redo operations\n//\n// We collect the time spent in actual WAL redo ('redo'), and time waiting\n// for access to the postgres process ('wait') since there is only one for\n// each tenant.\n\n/// Time buckets are small because we want to be able to measure the\n/// smallest redo processing times. These buckets allow us to measure down\n/// to 5us, which equates to 200'000 pages/sec, which equates to 1.6GB/sec.\n/// This is much better than the previous 5ms aka 200 pages/sec aka 1.6MB/sec.\n///\n/// Values up to 1s are recorded because metrics show that we have redo\n/// durations and lock times larger than 0.250s.\nmacro_rules! redo_histogram_time_buckets {\n    () => {\n        vec![\n            0.000_005, 0.000_010, 0.000_025, 0.000_050, 0.000_100, 0.000_250, 0.000_500, 0.001_000,\n            0.002_500, 0.005_000, 0.010_000, 0.025_000, 0.050_000, 0.100_000, 0.250_000, 0.500_000,\n            1.000_000,\n        ]\n    };\n}\n\n/// While we're at it, also measure the amount of records replayed in each\n/// operation. We have a global 'total replayed' counter, but that's not\n/// as useful as 'what is the skew for how many records we replay in one\n/// operation'.\nmacro_rules! redo_histogram_count_buckets {\n    () => {\n        vec![0.0, 1.0, 2.0, 5.0, 10.0, 25.0, 50.0, 100.0, 250.0, 500.0]\n    };\n}\n\nmacro_rules! redo_bytes_histogram_count_buckets {\n    () => {\n        // powers of (2^.5), from 2^4.5 to 2^15 (22 buckets)\n        // rounded up to the next multiple of 8 to capture any MAXALIGNed record of that size, too.\n        vec![\n            24.0, 32.0, 48.0, 64.0, 96.0, 128.0, 184.0, 256.0, 368.0, 512.0, 728.0, 1024.0, 1456.0,\n            2048.0, 2904.0, 4096.0, 5800.0, 8192.0, 11592.0, 16384.0, 23176.0, 32768.0,\n        ]\n    };\n}\n\npub(crate) struct WalIngestMetrics {\n    pub(crate) bytes_received: IntCounter,\n    pub(crate) records_received: IntCounter,\n    pub(crate) records_observed: IntCounter,\n    pub(crate) records_committed: IntCounter,\n    pub(crate) values_committed_metadata_images: IntCounter,\n    pub(crate) values_committed_metadata_deltas: IntCounter,\n    pub(crate) values_committed_data_images: IntCounter,\n    pub(crate) values_committed_data_deltas: IntCounter,\n    pub(crate) gap_blocks_zeroed_on_rel_extend: IntCounter,\n}\n\nimpl WalIngestMetrics {\n    pub(crate) fn inc_values_committed(&self, stats: &DatadirModificationStats) {\n        if stats.metadata_images > 0 {\n            self.values_committed_metadata_images\n                .inc_by(stats.metadata_images);\n        }\n        if stats.metadata_deltas > 0 {\n            self.values_committed_metadata_deltas\n                .inc_by(stats.metadata_deltas);\n        }\n        if stats.data_images > 0 {\n            self.values_committed_data_images.inc_by(stats.data_images);\n        }\n        if stats.data_deltas > 0 {\n            self.values_committed_data_deltas.inc_by(stats.data_deltas);\n        }\n    }\n}\n\npub(crate) static WAL_INGEST: Lazy<WalIngestMetrics> = Lazy::new(|| {\n    let values_committed = register_int_counter_vec!(\n        \"pageserver_wal_ingest_values_committed\",\n        \"Number of values committed to pageserver storage from WAL records\",\n        &[\"class\", \"kind\"],\n    )\n    .expect(\"failed to define a metric\");\n\n    WalIngestMetrics {\n    bytes_received: register_int_counter!(\n        \"pageserver_wal_ingest_bytes_received\",\n        \"Bytes of WAL ingested from safekeepers\",\n    )\n    .unwrap(),\n    records_received: register_int_counter!(\n        \"pageserver_wal_ingest_records_received\",\n        \"Number of WAL records received from safekeepers\"\n    )\n    .expect(\"failed to define a metric\"),\n    records_observed: register_int_counter!(\n        \"pageserver_wal_ingest_records_observed\",\n        \"Number of WAL records observed from safekeepers. These are metadata only records for shard 0.\"\n    )\n    .expect(\"failed to define a metric\"),\n    records_committed: register_int_counter!(\n        \"pageserver_wal_ingest_records_committed\",\n        \"Number of WAL records which resulted in writes to pageserver storage\"\n    )\n    .expect(\"failed to define a metric\"),\n    values_committed_metadata_images: values_committed.with_label_values(&[\"metadata\", \"image\"]),\n    values_committed_metadata_deltas: values_committed.with_label_values(&[\"metadata\", \"delta\"]),\n    values_committed_data_images: values_committed.with_label_values(&[\"data\", \"image\"]),\n    values_committed_data_deltas: values_committed.with_label_values(&[\"data\", \"delta\"]),\n    gap_blocks_zeroed_on_rel_extend: register_int_counter!(\n        \"pageserver_gap_blocks_zeroed_on_rel_extend\",\n        \"Total number of zero gap blocks written on relation extends\"\n    )\n    .expect(\"failed to define a metric\"),\n}\n});\n\npub(crate) static PAGESERVER_TIMELINE_WAL_RECORDS_RECEIVED: Lazy<IntCounterVec> = Lazy::new(|| {\n    register_int_counter_vec!(\n        \"pageserver_timeline_wal_records_received\",\n        \"Number of WAL records received per shard\",\n        &[\"tenant_id\", \"shard_id\", \"timeline_id\"]\n    )\n    .expect(\"failed to define a metric\")\n});\n\npub(crate) static WAL_REDO_TIME: Lazy<Histogram> = Lazy::new(|| {\n    register_histogram!(\n        \"pageserver_wal_redo_seconds\",\n        \"Time spent on WAL redo\",\n        redo_histogram_time_buckets!()\n    )\n    .expect(\"failed to define a metric\")\n});\n\npub(crate) static WAL_REDO_RECORDS_HISTOGRAM: Lazy<Histogram> = Lazy::new(|| {\n    register_histogram!(\n        \"pageserver_wal_redo_records_histogram\",\n        \"Histogram of number of records replayed per redo in the Postgres WAL redo process\",\n        redo_histogram_count_buckets!(),\n    )\n    .expect(\"failed to define a metric\")\n});\n\npub(crate) static WAL_REDO_BYTES_HISTOGRAM: Lazy<Histogram> = Lazy::new(|| {\n    register_histogram!(\n        \"pageserver_wal_redo_bytes_histogram\",\n        \"Histogram of number of records replayed per redo sent to Postgres\",\n        redo_bytes_histogram_count_buckets!(),\n    )\n    .expect(\"failed to define a metric\")\n});\n\n// FIXME: isn't this already included by WAL_REDO_RECORDS_HISTOGRAM which has _count?\npub(crate) static WAL_REDO_RECORD_COUNTER: Lazy<IntCounter> = Lazy::new(|| {\n    register_int_counter!(\n        \"pageserver_replayed_wal_records_total\",\n        \"Number of WAL records replayed in WAL redo process\"\n    )\n    .unwrap()\n});\n\n#[rustfmt::skip]\npub(crate) static WAL_REDO_PROCESS_LAUNCH_DURATION_HISTOGRAM: Lazy<Histogram> = Lazy::new(|| {\n    register_histogram!(\n        \"pageserver_wal_redo_process_launch_duration\",\n        \"Histogram of the duration of successful WalRedoProcess::launch calls\",\n        vec![\n            0.0002, 0.0004, 0.0006, 0.0008, 0.0010,\n            0.0020, 0.0040, 0.0060, 0.0080, 0.0100,\n            0.0200, 0.0400, 0.0600, 0.0800, 0.1000,\n            0.2000, 0.4000, 0.6000, 0.8000, 1.0000,\n            1.5000, 2.0000, 2.5000, 3.0000, 4.0000, 10.0000\n        ],\n    )\n    .expect(\"failed to define a metric\")\n});\n\npub(crate) struct WalRedoProcessCounters {\n    pub(crate) started: IntCounter,\n    pub(crate) killed_by_cause: EnumMap<WalRedoKillCause, IntCounter>,\n    pub(crate) active_stderr_logger_tasks_started: IntCounter,\n    pub(crate) active_stderr_logger_tasks_finished: IntCounter,\n}\n\n#[derive(Debug, enum_map::Enum, strum_macros::IntoStaticStr)]\npub(crate) enum WalRedoKillCause {\n    WalRedoProcessDrop,\n    NoLeakChildDrop,\n    Startup,\n}\n\nimpl Default for WalRedoProcessCounters {\n    fn default() -> Self {\n        let started = register_int_counter!(\n            \"pageserver_wal_redo_process_started_total\",\n            \"Number of WAL redo processes started\",\n        )\n        .unwrap();\n\n        let killed = register_int_counter_vec!(\n            \"pageserver_wal_redo_process_stopped_total\",\n            \"Number of WAL redo processes stopped\",\n            &[\"cause\"],\n        )\n        .unwrap();\n\n        let active_stderr_logger_tasks_started = register_int_counter!(\n            \"pageserver_walredo_stderr_logger_tasks_started_total\",\n            \"Number of active walredo stderr logger tasks that have started\",\n        )\n        .unwrap();\n\n        let active_stderr_logger_tasks_finished = register_int_counter!(\n            \"pageserver_walredo_stderr_logger_tasks_finished_total\",\n            \"Number of active walredo stderr logger tasks that have finished\",\n        )\n        .unwrap();\n\n        Self {\n            started,\n            killed_by_cause: EnumMap::from_array(std::array::from_fn(|i| {\n                let cause = WalRedoKillCause::from_usize(i);\n                let cause_str: &'static str = cause.into();\n                killed.with_label_values(&[cause_str])\n            })),\n            active_stderr_logger_tasks_started,\n            active_stderr_logger_tasks_finished,\n        }\n    }\n}\n\npub(crate) static WAL_REDO_PROCESS_COUNTERS: Lazy<WalRedoProcessCounters> =\n    Lazy::new(WalRedoProcessCounters::default);\n\n/// Similar to `prometheus::HistogramTimer` but does not record on drop.\npub(crate) struct StorageTimeMetricsTimer {\n    metrics: StorageTimeMetrics,\n    start: Instant,\n    stopped: Cell<bool>,\n}\n\nimpl StorageTimeMetricsTimer {\n    fn new(metrics: StorageTimeMetrics) -> Self {\n        /*BEGIN_HADRON */\n        // record the active operation as the timer starts\n        metrics.timeline_active_count.inc();\n        /*END_HADRON */\n        Self {\n            metrics,\n            start: Instant::now(),\n            stopped: Cell::new(false),\n        }\n    }\n\n    /// Returns the elapsed duration of the timer.\n    pub fn elapsed(&self) -> Duration {\n        self.start.elapsed()\n    }\n\n    /// Record the time from creation to now and return it.\n    pub fn stop_and_record(self) -> Duration {\n        let duration = self.elapsed();\n        let seconds = duration.as_secs_f64();\n        self.metrics.timeline_sum.inc_by(seconds);\n        self.metrics.timeline_count.inc();\n        self.metrics.global_histogram.observe(seconds);\n        /* BEGIN_HADRON*/\n        self.stopped.set(true);\n        self.metrics.timeline_active_count.dec();\n        /*END_HADRON */\n        duration\n    }\n\n    /// Turns this timer into a timer, which will always record -- usually this means recording\n    /// regardless an early `?` path was taken in a function.\n    pub(crate) fn record_on_drop(self) -> AlwaysRecordingStorageTimeMetricsTimer {\n        AlwaysRecordingStorageTimeMetricsTimer(Some(self))\n    }\n}\n\n/*BEGIN_HADRON */\nimpl Drop for StorageTimeMetricsTimer {\n    fn drop(&mut self) {\n        if !self.stopped.get() {\n            self.metrics.timeline_active_count.dec();\n        }\n    }\n}\n/*END_HADRON */\n\npub(crate) struct AlwaysRecordingStorageTimeMetricsTimer(Option<StorageTimeMetricsTimer>);\n\nimpl Drop for AlwaysRecordingStorageTimeMetricsTimer {\n    fn drop(&mut self) {\n        if let Some(inner) = self.0.take() {\n            inner.stop_and_record();\n        }\n    }\n}\n\nimpl AlwaysRecordingStorageTimeMetricsTimer {\n    /// Returns the elapsed duration of the timer.\n    pub fn elapsed(&self) -> Duration {\n        self.0.as_ref().expect(\"not dropped yet\").elapsed()\n    }\n}\n\n/// Timing facilities for an globally histogrammed metric, which is supported by per tenant and\n/// timeline total sum and count.\n#[derive(Clone, Debug)]\npub(crate) struct StorageTimeMetrics {\n    /// Sum of f64 seconds, per operation, tenant_id and timeline_id\n    timeline_sum: Counter,\n    /// Number of oeprations, per operation, tenant_id and timeline_id\n    timeline_count: IntCounter,\n    /*BEGIN_HADRON */\n    /// Number of active operations per operation, tenant_id, and timeline_id\n    timeline_active_count: IntGauge,\n    /*END_HADRON */\n    /// Global histogram having only the \"operation\" label.\n    global_histogram: Histogram,\n}\n\nimpl StorageTimeMetrics {\n    pub fn new(\n        operation: StorageTimeOperation,\n        tenant_id: &str,\n        shard_id: &str,\n        timeline_id: &str,\n    ) -> Self {\n        let operation: &'static str = operation.into();\n\n        let timeline_sum = STORAGE_TIME_SUM_PER_TIMELINE\n            .get_metric_with_label_values(&[operation, tenant_id, shard_id, timeline_id])\n            .unwrap();\n        let timeline_count = STORAGE_TIME_COUNT_PER_TIMELINE\n            .get_metric_with_label_values(&[operation, tenant_id, shard_id, timeline_id])\n            .unwrap();\n        /*BEGIN_HADRON */\n        let timeline_active_count = STORAGE_ACTIVE_COUNT_PER_TIMELINE\n            .get_metric_with_label_values(&[operation, tenant_id, shard_id, timeline_id])\n            .unwrap();\n        /*END_HADRON */\n        let global_histogram = STORAGE_TIME_GLOBAL\n            .get_metric_with_label_values(&[operation])\n            .unwrap();\n\n        StorageTimeMetrics {\n            timeline_sum,\n            timeline_count,\n            timeline_active_count,\n            global_histogram,\n        }\n    }\n\n    /// Starts timing a new operation.\n    ///\n    /// Note: unlike `prometheus::HistogramTimer` the returned timer does not record on drop.\n    pub fn start_timer(&self) -> StorageTimeMetricsTimer {\n        StorageTimeMetricsTimer::new(self.clone())\n    }\n}\n\npub(crate) struct TimelineMetrics {\n    tenant_id: String,\n    shard_id: String,\n    timeline_id: String,\n    pub flush_time_histo: StorageTimeMetrics,\n    pub flush_delay_histo: StorageTimeMetrics,\n    pub compact_time_histo: StorageTimeMetrics,\n    pub create_images_time_histo: StorageTimeMetrics,\n    pub logical_size_histo: StorageTimeMetrics,\n    pub imitate_logical_size_histo: StorageTimeMetrics,\n    pub load_layer_map_histo: StorageTimeMetrics,\n    pub garbage_collect_histo: StorageTimeMetrics,\n    pub find_gc_cutoffs_histo: StorageTimeMetrics,\n    pub last_record_lsn_gauge: IntGauge,\n    pub disk_consistent_lsn_gauge: IntGauge,\n    pub pitr_history_size: UIntGauge,\n    pub archival_size: UIntGauge,\n    pub layers_per_read: Histogram,\n    pub standby_horizon_gauge: IntGauge,\n    pub resident_physical_size_gauge: UIntGauge,\n    pub visible_physical_size_gauge: UIntGauge,\n    /// copy of LayeredTimeline.current_logical_size\n    pub current_logical_size_gauge: UIntGauge,\n    pub aux_file_size_gauge: IntGauge,\n    pub directory_entries_count_gauge: Lazy<UIntGauge, Box<dyn Send + Fn() -> UIntGauge>>,\n    pub evictions: IntCounter,\n    pub evictions_with_low_residence_duration: std::sync::RwLock<EvictionsWithLowResidenceDuration>,\n    /// Number of valid LSN leases.\n    pub valid_lsn_lease_count_gauge: UIntGauge,\n    pub wal_records_received: IntCounter,\n    pub storage_io_size: StorageIoSizeMetrics,\n    pub wait_lsn_in_progress_micros: GlobalAndPerTenantIntCounter,\n    pub wait_lsn_start_finish_counterpair: IntCounterPair,\n    pub wait_ondemand_download_time: wait_ondemand_download_time::WaitOndemandDownloadTimeSum,\n    shutdown: std::sync::atomic::AtomicBool,\n}\n\nimpl TimelineMetrics {\n    pub fn new(\n        tenant_shard_id: &TenantShardId,\n        timeline_id_raw: &TimelineId,\n        evictions_with_low_residence_duration_builder: EvictionsWithLowResidenceDurationBuilder,\n    ) -> Self {\n        let tenant_id = tenant_shard_id.tenant_id.to_string();\n        let shard_id = format!(\"{}\", tenant_shard_id.shard_slug());\n        let timeline_id = timeline_id_raw.to_string();\n        let flush_time_histo = StorageTimeMetrics::new(\n            StorageTimeOperation::LayerFlush,\n            &tenant_id,\n            &shard_id,\n            &timeline_id,\n        );\n        let flush_delay_histo = StorageTimeMetrics::new(\n            StorageTimeOperation::LayerFlushDelay,\n            &tenant_id,\n            &shard_id,\n            &timeline_id,\n        );\n        let compact_time_histo = StorageTimeMetrics::new(\n            StorageTimeOperation::Compact,\n            &tenant_id,\n            &shard_id,\n            &timeline_id,\n        );\n        let create_images_time_histo = StorageTimeMetrics::new(\n            StorageTimeOperation::CreateImages,\n            &tenant_id,\n            &shard_id,\n            &timeline_id,\n        );\n        let logical_size_histo = StorageTimeMetrics::new(\n            StorageTimeOperation::LogicalSize,\n            &tenant_id,\n            &shard_id,\n            &timeline_id,\n        );\n        let imitate_logical_size_histo = StorageTimeMetrics::new(\n            StorageTimeOperation::ImitateLogicalSize,\n            &tenant_id,\n            &shard_id,\n            &timeline_id,\n        );\n        let load_layer_map_histo = StorageTimeMetrics::new(\n            StorageTimeOperation::LoadLayerMap,\n            &tenant_id,\n            &shard_id,\n            &timeline_id,\n        );\n        let garbage_collect_histo = StorageTimeMetrics::new(\n            StorageTimeOperation::Gc,\n            &tenant_id,\n            &shard_id,\n            &timeline_id,\n        );\n        let find_gc_cutoffs_histo = StorageTimeMetrics::new(\n            StorageTimeOperation::FindGcCutoffs,\n            &tenant_id,\n            &shard_id,\n            &timeline_id,\n        );\n        let last_record_lsn_gauge = LAST_RECORD_LSN\n            .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])\n            .unwrap();\n\n        let disk_consistent_lsn_gauge = DISK_CONSISTENT_LSN\n            .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])\n            .unwrap();\n\n        let pitr_history_size = PITR_HISTORY_SIZE\n            .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])\n            .unwrap();\n\n        let archival_size = TIMELINE_ARCHIVE_SIZE\n            .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])\n            .unwrap();\n\n        let layers_per_read = LAYERS_PER_READ\n            .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])\n            .unwrap();\n\n        let standby_horizon_gauge = STANDBY_HORIZON\n            .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])\n            .unwrap();\n        let resident_physical_size_gauge = RESIDENT_PHYSICAL_SIZE\n            .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])\n            .unwrap();\n        let visible_physical_size_gauge = VISIBLE_PHYSICAL_SIZE\n            .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])\n            .unwrap();\n        // TODO: we shouldn't expose this metric\n        let current_logical_size_gauge = CURRENT_LOGICAL_SIZE\n            .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])\n            .unwrap();\n        let aux_file_size_gauge = AUX_FILE_SIZE\n            .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])\n            .unwrap();\n        // TODO use impl Trait syntax here once we have ability to use it: https://github.com/rust-lang/rust/issues/63065\n        let directory_entries_count_gauge_closure = {\n            let tenant_shard_id = *tenant_shard_id;\n            let timeline_id_raw = *timeline_id_raw;\n            move || {\n                let tenant_id = tenant_shard_id.tenant_id.to_string();\n                let shard_id = format!(\"{}\", tenant_shard_id.shard_slug());\n                let timeline_id = timeline_id_raw.to_string();\n                let gauge: UIntGauge = DIRECTORY_ENTRIES_COUNT\n                    .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])\n                    .unwrap();\n                gauge\n            }\n        };\n        let directory_entries_count_gauge: Lazy<UIntGauge, Box<dyn Send + Fn() -> UIntGauge>> =\n            Lazy::new(Box::new(directory_entries_count_gauge_closure));\n        let evictions = EVICTIONS\n            .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])\n            .unwrap();\n        let evictions_with_low_residence_duration = evictions_with_low_residence_duration_builder\n            .build(&tenant_id, &shard_id, &timeline_id);\n\n        let valid_lsn_lease_count_gauge = VALID_LSN_LEASE_COUNT\n            .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])\n            .unwrap();\n\n        let wal_records_received = PAGESERVER_TIMELINE_WAL_RECORDS_RECEIVED\n            .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])\n            .unwrap();\n\n        let storage_io_size = StorageIoSizeMetrics::new(&tenant_id, &shard_id, &timeline_id);\n\n        let wait_lsn_in_progress_micros = GlobalAndPerTenantIntCounter {\n            global: WAIT_LSN_IN_PROGRESS_GLOBAL_MICROS.clone(),\n            per_tenant: WAIT_LSN_IN_PROGRESS_MICROS\n                .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])\n                .unwrap(),\n        };\n\n        let wait_lsn_start_finish_counterpair = WAIT_LSN_START_FINISH_COUNTERPAIR\n            .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])\n            .unwrap();\n\n        let wait_ondemand_download_time =\n            wait_ondemand_download_time::WaitOndemandDownloadTimeSum::new(\n                &tenant_id,\n                &shard_id,\n                &timeline_id,\n            );\n\n        TIMELINE_STATE_METRIC.with_label_values(&[\"active\"]).inc();\n\n        TimelineMetrics {\n            tenant_id,\n            shard_id,\n            timeline_id,\n            flush_time_histo,\n            flush_delay_histo,\n            compact_time_histo,\n            create_images_time_histo,\n            logical_size_histo,\n            imitate_logical_size_histo,\n            garbage_collect_histo,\n            find_gc_cutoffs_histo,\n            load_layer_map_histo,\n            last_record_lsn_gauge,\n            disk_consistent_lsn_gauge,\n            pitr_history_size,\n            archival_size,\n            layers_per_read,\n            standby_horizon_gauge,\n            resident_physical_size_gauge,\n            visible_physical_size_gauge,\n            current_logical_size_gauge,\n            aux_file_size_gauge,\n            directory_entries_count_gauge,\n            evictions,\n            evictions_with_low_residence_duration: std::sync::RwLock::new(\n                evictions_with_low_residence_duration,\n            ),\n            storage_io_size,\n            valid_lsn_lease_count_gauge,\n            wal_records_received,\n            wait_lsn_in_progress_micros,\n            wait_lsn_start_finish_counterpair,\n            wait_ondemand_download_time,\n            shutdown: std::sync::atomic::AtomicBool::default(),\n        }\n    }\n\n    pub(crate) fn record_new_file_metrics(&self, sz: u64) {\n        self.resident_physical_size_add(sz);\n    }\n\n    pub(crate) fn resident_physical_size_sub(&self, sz: u64) {\n        self.resident_physical_size_gauge.sub(sz);\n        crate::metrics::RESIDENT_PHYSICAL_SIZE_GLOBAL.sub(sz);\n    }\n\n    pub(crate) fn resident_physical_size_add(&self, sz: u64) {\n        self.resident_physical_size_gauge.add(sz);\n        crate::metrics::RESIDENT_PHYSICAL_SIZE_GLOBAL.add(sz);\n    }\n\n    pub(crate) fn resident_physical_size_get(&self) -> u64 {\n        self.resident_physical_size_gauge.get()\n    }\n\n    /// Generates TIMELINE_LAYER labels for a persistent layer.\n    fn make_layer_labels(&self, layer_desc: &PersistentLayerDesc) -> [&str; 5] {\n        let level = match LayerMap::is_l0(&layer_desc.key_range, layer_desc.is_delta()) {\n            true => LayerLevel::L0,\n            false => LayerLevel::L1,\n        };\n        let kind = match layer_desc.is_delta() {\n            true => LayerKind::Delta,\n            false => LayerKind::Image,\n        };\n        [\n            &self.tenant_id,\n            &self.shard_id,\n            &self.timeline_id,\n            level.into(),\n            kind.into(),\n        ]\n    }\n\n    /// Generates TIMELINE_LAYER labels for a frozen ephemeral layer.\n    fn make_frozen_layer_labels(&self, _layer: &InMemoryLayer) -> [&str; 5] {\n        [\n            &self.tenant_id,\n            &self.shard_id,\n            &self.timeline_id,\n            LayerLevel::Frozen.into(),\n            LayerKind::Delta.into(), // by definition\n        ]\n    }\n\n    /// Removes a frozen ephemeral layer to TIMELINE_LAYER metrics.\n    pub fn dec_frozen_layer(&self, layer: &InMemoryLayer) {\n        assert!(matches!(layer.info(), InMemoryLayerInfo::Frozen { .. }));\n        let labels = self.make_frozen_layer_labels(layer);\n        let size = layer.len();\n        TIMELINE_LAYER_COUNT\n            .get_metric_with_label_values(&labels)\n            .unwrap()\n            .dec();\n        TIMELINE_LAYER_SIZE\n            .get_metric_with_label_values(&labels)\n            .unwrap()\n            .sub(size);\n    }\n\n    /// Adds a frozen ephemeral layer to TIMELINE_LAYER metrics.\n    pub fn inc_frozen_layer(&self, layer: &InMemoryLayer) {\n        assert!(matches!(layer.info(), InMemoryLayerInfo::Frozen { .. }));\n        let labels = self.make_frozen_layer_labels(layer);\n        let size = layer.len();\n        TIMELINE_LAYER_COUNT\n            .get_metric_with_label_values(&labels)\n            .unwrap()\n            .inc();\n        TIMELINE_LAYER_SIZE\n            .get_metric_with_label_values(&labels)\n            .unwrap()\n            .add(size);\n    }\n\n    /// Removes a persistent layer from TIMELINE_LAYER metrics.\n    pub fn dec_layer(&self, layer_desc: &PersistentLayerDesc) {\n        let labels = self.make_layer_labels(layer_desc);\n        TIMELINE_LAYER_COUNT\n            .get_metric_with_label_values(&labels)\n            .unwrap()\n            .dec();\n        TIMELINE_LAYER_SIZE\n            .get_metric_with_label_values(&labels)\n            .unwrap()\n            .sub(layer_desc.file_size);\n    }\n\n    /// Adds a persistent layer to TIMELINE_LAYER metrics.\n    pub fn inc_layer(&self, layer_desc: &PersistentLayerDesc) {\n        let labels = self.make_layer_labels(layer_desc);\n        TIMELINE_LAYER_COUNT\n            .get_metric_with_label_values(&labels)\n            .unwrap()\n            .inc();\n        TIMELINE_LAYER_SIZE\n            .get_metric_with_label_values(&labels)\n            .unwrap()\n            .add(layer_desc.file_size);\n    }\n\n    pub(crate) fn shutdown(&self) {\n        let was_shutdown = self\n            .shutdown\n            .swap(true, std::sync::atomic::Ordering::Relaxed);\n\n        if was_shutdown {\n            // this happens on tenant deletion because tenant first shuts down timelines, then\n            // invokes timeline deletion which first shuts down the timeline again.\n            // TODO: this can be removed once https://github.com/neondatabase/neon/issues/5080\n            return;\n        }\n\n        TIMELINE_STATE_METRIC.with_label_values(&[\"active\"]).dec();\n\n        let tenant_id = &self.tenant_id;\n        let timeline_id = &self.timeline_id;\n        let shard_id = &self.shard_id;\n        let _ = LAST_RECORD_LSN.remove_label_values(&[tenant_id, shard_id, timeline_id]);\n        let _ = DISK_CONSISTENT_LSN.remove_label_values(&[tenant_id, shard_id, timeline_id]);\n        let _ = STANDBY_HORIZON.remove_label_values(&[tenant_id, shard_id, timeline_id]);\n        {\n            RESIDENT_PHYSICAL_SIZE_GLOBAL.sub(self.resident_physical_size_get());\n            let _ = RESIDENT_PHYSICAL_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]);\n        }\n        let _ = VISIBLE_PHYSICAL_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]);\n        let _ = CURRENT_LOGICAL_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]);\n        if let Some(metric) = Lazy::get(&DIRECTORY_ENTRIES_COUNT) {\n            let _ = metric.remove_label_values(&[tenant_id, shard_id, timeline_id]);\n        }\n\n        let _ = TIMELINE_ARCHIVE_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]);\n        let _ = PITR_HISTORY_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]);\n\n        for ref level in LayerLevel::iter() {\n            for ref kind in LayerKind::iter() {\n                let labels: [&str; 5] =\n                    [tenant_id, shard_id, timeline_id, level.into(), kind.into()];\n                let _ = TIMELINE_LAYER_SIZE.remove_label_values(&labels);\n                let _ = TIMELINE_LAYER_COUNT.remove_label_values(&labels);\n            }\n        }\n\n        let _ = LAYERS_PER_READ.remove_label_values(&[tenant_id, shard_id, timeline_id]);\n\n        let _ = EVICTIONS.remove_label_values(&[tenant_id, shard_id, timeline_id]);\n        let _ = AUX_FILE_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]);\n        let _ = VALID_LSN_LEASE_COUNT.remove_label_values(&[tenant_id, shard_id, timeline_id]);\n\n        self.evictions_with_low_residence_duration\n            .write()\n            .unwrap()\n            .remove(tenant_id, shard_id, timeline_id);\n\n        // The following metrics are born outside of the TimelineMetrics lifecycle but still\n        // removed at the end of it. The idea is to have the metrics outlive the\n        // entity during which they're observed, e.g., the smgr metrics shall\n        // outlive an individual smgr connection, but not the timeline.\n\n        for op in StorageTimeOperation::VARIANTS {\n            let _ = STORAGE_TIME_SUM_PER_TIMELINE.remove_label_values(&[\n                op,\n                tenant_id,\n                shard_id,\n                timeline_id,\n            ]);\n            let _ = STORAGE_TIME_COUNT_PER_TIMELINE.remove_label_values(&[\n                op,\n                tenant_id,\n                shard_id,\n                timeline_id,\n            ]);\n            /* BEGIN_HADRON */\n            let _ = STORAGE_ACTIVE_COUNT_PER_TIMELINE.remove_label_values(&[\n                op,\n                tenant_id,\n                shard_id,\n                timeline_id,\n            ]);\n            /*END_HADRON */\n        }\n\n        for op in StorageIoSizeOperation::VARIANTS {\n            let _ = STORAGE_IO_SIZE.remove_label_values(&[op, tenant_id, shard_id, timeline_id]);\n        }\n\n        let _ =\n            WAIT_LSN_IN_PROGRESS_MICROS.remove_label_values(&[tenant_id, shard_id, timeline_id]);\n\n        {\n            let mut res = [Ok(()), Ok(())];\n            WAIT_LSN_START_FINISH_COUNTERPAIR\n                .remove_label_values(&mut res, &[tenant_id, shard_id, timeline_id]);\n        }\n\n        wait_ondemand_download_time::shutdown_timeline(tenant_id, shard_id, timeline_id);\n\n        let _ = SMGR_QUERY_STARTED_PER_TENANT_TIMELINE.remove_label_values(&[\n            SmgrQueryType::GetPageAtLsn.into(),\n            tenant_id,\n            shard_id,\n            timeline_id,\n        ]);\n        let _ = SMGR_QUERY_TIME_PER_TENANT_TIMELINE.remove_label_values(&[\n            SmgrQueryType::GetPageAtLsn.into(),\n            tenant_id,\n            shard_id,\n            timeline_id,\n        ]);\n        let _ = PAGE_SERVICE_BATCH_SIZE_PER_TENANT_TIMELINE.remove_label_values(&[\n            tenant_id,\n            shard_id,\n            timeline_id,\n        ]);\n        let _ = PAGESERVER_TIMELINE_WAL_RECORDS_RECEIVED.remove_label_values(&[\n            tenant_id,\n            shard_id,\n            timeline_id,\n        ]);\n        let _ = PAGE_SERVICE_SMGR_FLUSH_INPROGRESS_MICROS.remove_label_values(&[\n            tenant_id,\n            shard_id,\n            timeline_id,\n        ]);\n        let _ = PAGE_SERVICE_SMGR_BATCH_WAIT_TIME.remove_label_values(&[\n            tenant_id,\n            shard_id,\n            timeline_id,\n        ]);\n\n        for reason in GetPageBatchBreakReason::iter() {\n            let _ = PAGE_SERVICE_BATCH_BREAK_REASON_PER_TENANT_TIMELINE.remove_label_values(&[\n                tenant_id,\n                shard_id,\n                timeline_id,\n                reason.into(),\n            ]);\n        }\n    }\n}\n\npub(crate) fn remove_tenant_metrics(tenant_shard_id: &TenantShardId) {\n    let tid = tenant_shard_id.tenant_id.to_string();\n    let shard_id = tenant_shard_id.shard_slug().to_string();\n\n    // Only shard zero deals in synthetic sizes\n    if tenant_shard_id.is_shard_zero() {\n        let _ = TENANT_SYNTHETIC_SIZE_METRIC.remove_label_values(&[&tid]);\n    }\n    let _ = TENANT_OFFLOADED_TIMELINES.remove_label_values(&[&tid, &shard_id]);\n\n    tenant_throttling::remove_tenant_metrics(tenant_shard_id);\n\n    // we leave the BROKEN_TENANTS_SET entry if any\n}\n\n/// Maintain a per timeline gauge in addition to the global gauge.\npub(crate) struct PerTimelineRemotePhysicalSizeGauge {\n    last_set: AtomicU64,\n    gauge: UIntGauge,\n}\n\nimpl PerTimelineRemotePhysicalSizeGauge {\n    fn new(per_timeline_gauge: UIntGauge) -> Self {\n        Self {\n            last_set: AtomicU64::new(0),\n            gauge: per_timeline_gauge,\n        }\n    }\n    pub(crate) fn set(&self, sz: u64) {\n        self.gauge.set(sz);\n        let prev = self.last_set.swap(sz, std::sync::atomic::Ordering::Relaxed);\n        if sz < prev {\n            REMOTE_PHYSICAL_SIZE_GLOBAL.sub(prev - sz);\n        } else {\n            REMOTE_PHYSICAL_SIZE_GLOBAL.add(sz - prev);\n        };\n    }\n    pub(crate) fn get(&self) -> u64 {\n        self.gauge.get()\n    }\n}\n\nimpl Drop for PerTimelineRemotePhysicalSizeGauge {\n    fn drop(&mut self) {\n        REMOTE_PHYSICAL_SIZE_GLOBAL.sub(self.last_set.load(std::sync::atomic::Ordering::Relaxed));\n    }\n}\n\npub(crate) struct RemoteTimelineClientMetrics {\n    tenant_id: String,\n    shard_id: String,\n    timeline_id: String,\n    pub(crate) remote_physical_size_gauge: PerTimelineRemotePhysicalSizeGauge,\n    calls: Mutex<HashMap<(&'static str, &'static str), IntCounterPair>>,\n    bytes_started_counter: Mutex<HashMap<(&'static str, &'static str), IntCounter>>,\n    bytes_finished_counter: Mutex<HashMap<(&'static str, &'static str), IntCounter>>,\n    pub(crate) projected_remote_consistent_lsn_gauge: UIntGauge,\n}\n\nimpl RemoteTimelineClientMetrics {\n    pub fn new(tenant_shard_id: &TenantShardId, timeline_id: &TimelineId) -> Self {\n        let tenant_id_str = tenant_shard_id.tenant_id.to_string();\n        let shard_id_str = format!(\"{}\", tenant_shard_id.shard_slug());\n        let timeline_id_str = timeline_id.to_string();\n\n        let remote_physical_size_gauge = PerTimelineRemotePhysicalSizeGauge::new(\n            REMOTE_PHYSICAL_SIZE\n                .get_metric_with_label_values(&[&tenant_id_str, &shard_id_str, &timeline_id_str])\n                .unwrap(),\n        );\n\n        let projected_remote_consistent_lsn_gauge = PROJECTED_REMOTE_CONSISTENT_LSN\n            .get_metric_with_label_values(&[&tenant_id_str, &shard_id_str, &timeline_id_str])\n            .unwrap();\n\n        RemoteTimelineClientMetrics {\n            tenant_id: tenant_id_str,\n            shard_id: shard_id_str,\n            timeline_id: timeline_id_str,\n            calls: Mutex::new(HashMap::default()),\n            bytes_started_counter: Mutex::new(HashMap::default()),\n            bytes_finished_counter: Mutex::new(HashMap::default()),\n            remote_physical_size_gauge,\n            projected_remote_consistent_lsn_gauge,\n        }\n    }\n\n    pub fn remote_operation_time(\n        &self,\n        task_kind: Option<TaskKind>,\n        file_kind: &RemoteOpFileKind,\n        op_kind: &RemoteOpKind,\n        status: &'static str,\n    ) -> Histogram {\n        REMOTE_TIMELINE_CLIENT_COMPLETION_LATENCY\n            .get_metric_with_label_values(&[\n                task_kind.as_ref().map(|tk| tk.into()).unwrap_or(\"unknown\"),\n                file_kind.as_str(),\n                op_kind.as_str(),\n                status,\n            ])\n            .unwrap()\n    }\n\n    fn calls_counter_pair(\n        &self,\n        file_kind: &RemoteOpFileKind,\n        op_kind: &RemoteOpKind,\n    ) -> IntCounterPair {\n        let mut guard = self.calls.lock().unwrap();\n        let key = (file_kind.as_str(), op_kind.as_str());\n        let metric = guard.entry(key).or_insert_with(move || {\n            REMOTE_TIMELINE_CLIENT_CALLS\n                .get_metric_with_label_values(&[\n                    &self.tenant_id,\n                    &self.shard_id,\n                    &self.timeline_id,\n                    key.0,\n                    key.1,\n                ])\n                .unwrap()\n        });\n        metric.clone()\n    }\n\n    fn bytes_started_counter(\n        &self,\n        file_kind: &RemoteOpFileKind,\n        op_kind: &RemoteOpKind,\n    ) -> IntCounter {\n        let mut guard = self.bytes_started_counter.lock().unwrap();\n        let key = (file_kind.as_str(), op_kind.as_str());\n        let metric = guard.entry(key).or_insert_with(move || {\n            REMOTE_TIMELINE_CLIENT_BYTES_STARTED_COUNTER\n                .get_metric_with_label_values(&[\n                    &self.tenant_id,\n                    &self.shard_id,\n                    &self.timeline_id,\n                    key.0,\n                    key.1,\n                ])\n                .unwrap()\n        });\n        metric.clone()\n    }\n\n    fn bytes_finished_counter(\n        &self,\n        file_kind: &RemoteOpFileKind,\n        op_kind: &RemoteOpKind,\n    ) -> IntCounter {\n        let mut guard = self.bytes_finished_counter.lock().unwrap();\n        let key = (file_kind.as_str(), op_kind.as_str());\n        let metric = guard.entry(key).or_insert_with(move || {\n            REMOTE_TIMELINE_CLIENT_BYTES_FINISHED_COUNTER\n                .get_metric_with_label_values(&[\n                    &self.tenant_id,\n                    &self.shard_id,\n                    &self.timeline_id,\n                    key.0,\n                    key.1,\n                ])\n                .unwrap()\n        });\n        metric.clone()\n    }\n}\n\n#[cfg(test)]\nimpl RemoteTimelineClientMetrics {\n    pub fn get_bytes_started_counter_value(\n        &self,\n        file_kind: &RemoteOpFileKind,\n        op_kind: &RemoteOpKind,\n    ) -> Option<u64> {\n        let guard = self.bytes_started_counter.lock().unwrap();\n        let key = (file_kind.as_str(), op_kind.as_str());\n        guard.get(&key).map(|counter| counter.get())\n    }\n\n    pub fn get_bytes_finished_counter_value(\n        &self,\n        file_kind: &RemoteOpFileKind,\n        op_kind: &RemoteOpKind,\n    ) -> Option<u64> {\n        let guard = self.bytes_finished_counter.lock().unwrap();\n        let key = (file_kind.as_str(), op_kind.as_str());\n        guard.get(&key).map(|counter| counter.get())\n    }\n}\n\n/// See [`RemoteTimelineClientMetrics::call_begin`].\n#[must_use]\npub(crate) struct RemoteTimelineClientCallMetricGuard {\n    /// Decremented on drop.\n    calls_counter_pair: Option<IntCounterPair>,\n    /// If Some(), this references the bytes_finished metric, and we increment it by the given `u64` on drop.\n    bytes_finished: Option<(IntCounter, u64)>,\n}\n\nimpl RemoteTimelineClientCallMetricGuard {\n    /// Consume this guard object without performing the metric updates it would do on `drop()`.\n    /// The caller vouches to do the metric updates manually.\n    pub fn will_decrement_manually(mut self) {\n        let RemoteTimelineClientCallMetricGuard {\n            calls_counter_pair,\n            bytes_finished,\n        } = &mut self;\n        calls_counter_pair.take();\n        bytes_finished.take();\n    }\n}\n\nimpl Drop for RemoteTimelineClientCallMetricGuard {\n    fn drop(&mut self) {\n        let RemoteTimelineClientCallMetricGuard {\n            calls_counter_pair,\n            bytes_finished,\n        } = self;\n        if let Some(guard) = calls_counter_pair.take() {\n            guard.dec();\n        }\n        if let Some((bytes_finished_metric, value)) = bytes_finished {\n            bytes_finished_metric.inc_by(*value);\n        }\n    }\n}\n\n/// The enum variants communicate to the [`RemoteTimelineClientMetrics`] whether to\n/// track the byte size of this call in applicable metric(s).\npub(crate) enum RemoteTimelineClientMetricsCallTrackSize {\n    /// Do not account for this call's byte size in any metrics.\n    /// The `reason` field is there to make the call sites self-documenting\n    /// about why they don't need the metric.\n    DontTrackSize { reason: &'static str },\n    /// Track the byte size of the call in applicable metric(s).\n    Bytes(u64),\n}\n\nimpl RemoteTimelineClientMetrics {\n    /// Update the metrics that change when a call to the remote timeline client instance starts.\n    ///\n    /// Drop the returned guard object once the operation is finished to updates corresponding metrics that track completions.\n    /// Or, use [`RemoteTimelineClientCallMetricGuard::will_decrement_manually`] and [`call_end`](Self::call_end) if that\n    /// is more suitable.\n    /// Never do both.\n    pub(crate) fn call_begin(\n        &self,\n        file_kind: &RemoteOpFileKind,\n        op_kind: &RemoteOpKind,\n        size: RemoteTimelineClientMetricsCallTrackSize,\n    ) -> RemoteTimelineClientCallMetricGuard {\n        let calls_counter_pair = self.calls_counter_pair(file_kind, op_kind);\n        calls_counter_pair.inc();\n\n        let bytes_finished = match size {\n            RemoteTimelineClientMetricsCallTrackSize::DontTrackSize { reason: _reason } => {\n                // nothing to do\n                None\n            }\n            RemoteTimelineClientMetricsCallTrackSize::Bytes(size) => {\n                self.bytes_started_counter(file_kind, op_kind).inc_by(size);\n                let finished_counter = self.bytes_finished_counter(file_kind, op_kind);\n                Some((finished_counter, size))\n            }\n        };\n        RemoteTimelineClientCallMetricGuard {\n            calls_counter_pair: Some(calls_counter_pair),\n            bytes_finished,\n        }\n    }\n\n    /// Manually udpate the metrics that track completions, instead of using the guard object.\n    /// Using the guard object is generally preferable.\n    /// See [`call_begin`](Self::call_begin) for more context.\n    pub(crate) fn call_end(\n        &self,\n        file_kind: &RemoteOpFileKind,\n        op_kind: &RemoteOpKind,\n        size: RemoteTimelineClientMetricsCallTrackSize,\n    ) {\n        let calls_counter_pair = self.calls_counter_pair(file_kind, op_kind);\n        calls_counter_pair.dec();\n        match size {\n            RemoteTimelineClientMetricsCallTrackSize::DontTrackSize { reason: _reason } => {}\n            RemoteTimelineClientMetricsCallTrackSize::Bytes(size) => {\n                self.bytes_finished_counter(file_kind, op_kind).inc_by(size);\n            }\n        }\n    }\n}\n\nimpl Drop for RemoteTimelineClientMetrics {\n    fn drop(&mut self) {\n        let RemoteTimelineClientMetrics {\n            tenant_id,\n            shard_id,\n            timeline_id,\n            remote_physical_size_gauge,\n            calls,\n            bytes_started_counter,\n            bytes_finished_counter,\n            projected_remote_consistent_lsn_gauge,\n        } = self;\n        for ((a, b), _) in calls.get_mut().unwrap().drain() {\n            let mut res = [Ok(()), Ok(())];\n            REMOTE_TIMELINE_CLIENT_CALLS\n                .remove_label_values(&mut res, &[tenant_id, shard_id, timeline_id, a, b]);\n            // don't care about results\n        }\n        for ((a, b), _) in bytes_started_counter.get_mut().unwrap().drain() {\n            let _ = REMOTE_TIMELINE_CLIENT_BYTES_STARTED_COUNTER.remove_label_values(&[\n                tenant_id,\n                shard_id,\n                timeline_id,\n                a,\n                b,\n            ]);\n        }\n        for ((a, b), _) in bytes_finished_counter.get_mut().unwrap().drain() {\n            let _ = REMOTE_TIMELINE_CLIENT_BYTES_FINISHED_COUNTER.remove_label_values(&[\n                tenant_id,\n                shard_id,\n                timeline_id,\n                a,\n                b,\n            ]);\n        }\n        {\n            let _ = remote_physical_size_gauge; // use to avoid 'unused' warning in desctructuring above\n            let _ = REMOTE_PHYSICAL_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]);\n        }\n        {\n            let _ = projected_remote_consistent_lsn_gauge;\n            let _ = PROJECTED_REMOTE_CONSISTENT_LSN.remove_label_values(&[\n                tenant_id,\n                shard_id,\n                timeline_id,\n            ]);\n        }\n    }\n}\n\n/// Wrapper future that measures the time spent by a remote storage operation,\n/// and records the time and success/failure as a prometheus metric.\npub(crate) trait MeasureRemoteOp<O, E>: Sized + Future<Output = Result<O, E>> {\n    async fn measure_remote_op(\n        self,\n        task_kind: Option<TaskKind>, // not all caller contexts have a RequestContext / TaskKind handy\n        file_kind: RemoteOpFileKind,\n        op: RemoteOpKind,\n        metrics: Arc<RemoteTimelineClientMetrics>,\n    ) -> Result<O, E> {\n        let start = Instant::now();\n        let res = self.await;\n        let duration = start.elapsed();\n        let status = if res.is_ok() { &\"success\" } else { &\"failure\" };\n        metrics\n            .remote_operation_time(task_kind, &file_kind, &op, status)\n            .observe(duration.as_secs_f64());\n        res\n    }\n}\n\nimpl<Fut, O, E> MeasureRemoteOp<O, E> for Fut where Fut: Sized + Future<Output = Result<O, E>> {}\n\npub mod tokio_epoll_uring {\n    use std::collections::HashMap;\n    use std::sync::{Arc, Mutex};\n\n    use metrics::{Histogram, LocalHistogram, UIntGauge, register_histogram, register_int_counter};\n    use once_cell::sync::Lazy;\n\n    /// Shared storage for tokio-epoll-uring thread local metrics.\n    pub(crate) static THREAD_LOCAL_METRICS_STORAGE: Lazy<ThreadLocalMetricsStorage> =\n        Lazy::new(|| {\n            let slots_submission_queue_depth = register_histogram!(\n                \"pageserver_tokio_epoll_uring_slots_submission_queue_depth\",\n                \"The slots waiters queue depth of each tokio_epoll_uring system\",\n                vec![\n                    1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0, 128.0, 256.0, 512.0, 1024.0\n                ],\n            )\n            .expect(\"failed to define a metric\");\n            ThreadLocalMetricsStorage {\n                observers: Mutex::new(HashMap::new()),\n                slots_submission_queue_depth,\n            }\n        });\n\n    pub struct ThreadLocalMetricsStorage {\n        /// List of thread local metrics observers.\n        observers: Mutex<HashMap<u64, Arc<ThreadLocalMetrics>>>,\n        /// A histogram shared between all thread local systems\n        /// for collecting slots submission queue depth.\n        slots_submission_queue_depth: Histogram,\n    }\n\n    /// Each thread-local [`tokio_epoll_uring::System`] gets one of these as its\n    /// [`tokio_epoll_uring::metrics::PerSystemMetrics`] generic.\n    ///\n    /// The System makes observations into [`Self`] and periodically, the collector\n    /// comes along and flushes [`Self`] into the shared storage [`THREAD_LOCAL_METRICS_STORAGE`].\n    ///\n    /// [`LocalHistogram`] is `!Send`, so, we need to put it behind a [`Mutex`].\n    /// But except for the periodic flush, the lock is uncontended so there's no waiting\n    /// for cache coherence protocol to get an exclusive cache line.\n    pub struct ThreadLocalMetrics {\n        /// Local observer of thread local tokio-epoll-uring system's slots waiters queue depth.\n        slots_submission_queue_depth: Mutex<LocalHistogram>,\n    }\n\n    impl ThreadLocalMetricsStorage {\n        /// Registers a new thread local system. Returns a thread local metrics observer.\n        pub fn register_system(&self, id: u64) -> Arc<ThreadLocalMetrics> {\n            let per_system_metrics = Arc::new(ThreadLocalMetrics::new(\n                self.slots_submission_queue_depth.local(),\n            ));\n            let mut g = self.observers.lock().unwrap();\n            g.insert(id, Arc::clone(&per_system_metrics));\n            per_system_metrics\n        }\n\n        /// Removes metrics observer for a thread local system.\n        /// This should be called before dropping a thread local system.\n        pub fn remove_system(&self, id: u64) {\n            let mut g = self.observers.lock().unwrap();\n            g.remove(&id);\n        }\n\n        /// Flush all thread local metrics to the shared storage.\n        pub fn flush_thread_local_metrics(&self) {\n            let g = self.observers.lock().unwrap();\n            g.values().for_each(|local| {\n                local.flush();\n            });\n        }\n    }\n\n    impl ThreadLocalMetrics {\n        pub fn new(slots_submission_queue_depth: LocalHistogram) -> Self {\n            ThreadLocalMetrics {\n                slots_submission_queue_depth: Mutex::new(slots_submission_queue_depth),\n            }\n        }\n\n        /// Flushes the thread local metrics to shared aggregator.\n        pub fn flush(&self) {\n            let Self {\n                slots_submission_queue_depth,\n            } = self;\n            slots_submission_queue_depth.lock().unwrap().flush();\n        }\n    }\n\n    impl tokio_epoll_uring::metrics::PerSystemMetrics for ThreadLocalMetrics {\n        fn observe_slots_submission_queue_depth(&self, queue_depth: u64) {\n            let Self {\n                slots_submission_queue_depth,\n            } = self;\n            slots_submission_queue_depth\n                .lock()\n                .unwrap()\n                .observe(queue_depth as f64);\n        }\n    }\n\n    pub struct Collector {\n        descs: Vec<metrics::core::Desc>,\n        systems_created: UIntGauge,\n        systems_destroyed: UIntGauge,\n        thread_local_metrics_storage: &'static ThreadLocalMetricsStorage,\n    }\n\n    impl metrics::core::Collector for Collector {\n        fn desc(&self) -> Vec<&metrics::core::Desc> {\n            self.descs.iter().collect()\n        }\n\n        fn collect(&self) -> Vec<metrics::proto::MetricFamily> {\n            let mut mfs = Vec::with_capacity(Self::NMETRICS);\n            let tokio_epoll_uring::metrics::GlobalMetrics {\n                systems_created,\n                systems_destroyed,\n            } = tokio_epoll_uring::metrics::global();\n            self.systems_created.set(systems_created);\n            mfs.extend(self.systems_created.collect());\n            self.systems_destroyed.set(systems_destroyed);\n            mfs.extend(self.systems_destroyed.collect());\n\n            self.thread_local_metrics_storage\n                .flush_thread_local_metrics();\n\n            mfs.extend(\n                self.thread_local_metrics_storage\n                    .slots_submission_queue_depth\n                    .collect(),\n            );\n            mfs\n        }\n    }\n\n    impl Collector {\n        const NMETRICS: usize = 3;\n\n        #[allow(clippy::new_without_default)]\n        pub fn new() -> Self {\n            let mut descs = Vec::new();\n\n            let systems_created = UIntGauge::new(\n                \"pageserver_tokio_epoll_uring_systems_created\",\n                \"counter of tokio-epoll-uring systems that were created\",\n            )\n            .unwrap();\n            descs.extend(\n                metrics::core::Collector::desc(&systems_created)\n                    .into_iter()\n                    .cloned(),\n            );\n\n            let systems_destroyed = UIntGauge::new(\n                \"pageserver_tokio_epoll_uring_systems_destroyed\",\n                \"counter of tokio-epoll-uring systems that were destroyed\",\n            )\n            .unwrap();\n            descs.extend(\n                metrics::core::Collector::desc(&systems_destroyed)\n                    .into_iter()\n                    .cloned(),\n            );\n\n            Self {\n                descs,\n                systems_created,\n                systems_destroyed,\n                thread_local_metrics_storage: &THREAD_LOCAL_METRICS_STORAGE,\n            }\n        }\n    }\n\n    pub(crate) static THREAD_LOCAL_LAUNCH_SUCCESSES: Lazy<metrics::IntCounter> = Lazy::new(|| {\n        register_int_counter!(\n            \"pageserver_tokio_epoll_uring_pageserver_thread_local_launch_success_count\",\n            \"Number of times where thread_local_system creation spanned multiple executor threads\",\n        )\n        .unwrap()\n    });\n\n    pub(crate) static THREAD_LOCAL_LAUNCH_FAILURES: Lazy<metrics::IntCounter> = Lazy::new(|| {\n        register_int_counter!(\n            \"pageserver_tokio_epoll_uring_pageserver_thread_local_launch_failures_count\",\n            \"Number of times thread_local_system creation failed and was retried after back-off.\",\n        )\n        .unwrap()\n    });\n}\n\npub(crate) struct GlobalAndPerTenantIntCounter {\n    global: IntCounter,\n    per_tenant: IntCounter,\n}\n\nimpl GlobalAndPerTenantIntCounter {\n    #[inline(always)]\n    pub(crate) fn inc(&self) {\n        self.inc_by(1)\n    }\n    #[inline(always)]\n    pub(crate) fn inc_by(&self, n: u64) {\n        self.global.inc_by(n);\n        self.per_tenant.inc_by(n);\n    }\n}\n\npub(crate) mod tenant_throttling {\n    use metrics::register_int_counter_vec;\n    use once_cell::sync::Lazy;\n    use utils::shard::TenantShardId;\n\n    use super::GlobalAndPerTenantIntCounter;\n\n    pub(crate) struct Metrics<const KIND: usize> {\n        pub(super) count_accounted_start: GlobalAndPerTenantIntCounter,\n        pub(super) count_accounted_finish: GlobalAndPerTenantIntCounter,\n        pub(super) wait_time: GlobalAndPerTenantIntCounter,\n        pub(super) count_throttled: GlobalAndPerTenantIntCounter,\n    }\n\n    static COUNT_ACCOUNTED_START: Lazy<metrics::IntCounterVec> = Lazy::new(|| {\n        register_int_counter_vec!(\n            \"pageserver_tenant_throttling_count_accounted_start_global\",\n            \"Count of tenant throttling starts, by kind of throttle.\",\n            &[\"kind\"]\n        )\n        .unwrap()\n    });\n    static COUNT_ACCOUNTED_START_PER_TENANT: Lazy<metrics::IntCounterVec> = Lazy::new(|| {\n        register_int_counter_vec!(\n            \"pageserver_tenant_throttling_count_accounted_start\",\n            \"Count of tenant throttling starts, by kind of throttle.\",\n            &[\"kind\", \"tenant_id\", \"shard_id\"]\n        )\n        .unwrap()\n    });\n    static COUNT_ACCOUNTED_FINISH: Lazy<metrics::IntCounterVec> = Lazy::new(|| {\n        register_int_counter_vec!(\n            \"pageserver_tenant_throttling_count_accounted_finish_global\",\n            \"Count of tenant throttling finishes, by kind of throttle.\",\n            &[\"kind\"]\n        )\n        .unwrap()\n    });\n    static COUNT_ACCOUNTED_FINISH_PER_TENANT: Lazy<metrics::IntCounterVec> = Lazy::new(|| {\n        register_int_counter_vec!(\n            \"pageserver_tenant_throttling_count_accounted_finish\",\n            \"Count of tenant throttling finishes, by kind of throttle.\",\n            &[\"kind\", \"tenant_id\", \"shard_id\"]\n        )\n        .unwrap()\n    });\n    static WAIT_USECS: Lazy<metrics::IntCounterVec> = Lazy::new(|| {\n        register_int_counter_vec!(\n            \"pageserver_tenant_throttling_wait_usecs_sum_global\",\n            \"Sum of microseconds that spent waiting throttle by kind of throttle.\",\n            &[\"kind\"]\n        )\n        .unwrap()\n    });\n    static WAIT_USECS_PER_TENANT: Lazy<metrics::IntCounterVec> = Lazy::new(|| {\n        register_int_counter_vec!(\n            \"pageserver_tenant_throttling_wait_usecs_sum\",\n            \"Sum of microseconds that spent waiting throttle by kind of throttle.\",\n            &[\"kind\", \"tenant_id\", \"shard_id\"]\n        )\n        .unwrap()\n    });\n\n    static WAIT_COUNT: Lazy<metrics::IntCounterVec> = Lazy::new(|| {\n        register_int_counter_vec!(\n            \"pageserver_tenant_throttling_count_global\",\n            \"Count of tenant throttlings, by kind of throttle.\",\n            &[\"kind\"]\n        )\n        .unwrap()\n    });\n    static WAIT_COUNT_PER_TENANT: Lazy<metrics::IntCounterVec> = Lazy::new(|| {\n        register_int_counter_vec!(\n            \"pageserver_tenant_throttling_count\",\n            \"Count of tenant throttlings, by kind of throttle.\",\n            &[\"kind\", \"tenant_id\", \"shard_id\"]\n        )\n        .unwrap()\n    });\n\n    const KINDS: &[&str] = &[\"pagestream\"];\n    pub type Pagestream = Metrics<0>;\n\n    impl<const KIND: usize> Metrics<KIND> {\n        pub(crate) fn new(tenant_shard_id: &TenantShardId) -> Self {\n            let per_tenant_label_values = &[\n                KINDS[KIND],\n                &tenant_shard_id.tenant_id.to_string(),\n                &tenant_shard_id.shard_slug().to_string(),\n            ];\n            Metrics {\n                count_accounted_start: {\n                    GlobalAndPerTenantIntCounter {\n                        global: COUNT_ACCOUNTED_START.with_label_values(&[KINDS[KIND]]),\n                        per_tenant: COUNT_ACCOUNTED_START_PER_TENANT\n                            .with_label_values(per_tenant_label_values),\n                    }\n                },\n                count_accounted_finish: {\n                    GlobalAndPerTenantIntCounter {\n                        global: COUNT_ACCOUNTED_FINISH.with_label_values(&[KINDS[KIND]]),\n                        per_tenant: COUNT_ACCOUNTED_FINISH_PER_TENANT\n                            .with_label_values(per_tenant_label_values),\n                    }\n                },\n                wait_time: {\n                    GlobalAndPerTenantIntCounter {\n                        global: WAIT_USECS.with_label_values(&[KINDS[KIND]]),\n                        per_tenant: WAIT_USECS_PER_TENANT\n                            .with_label_values(per_tenant_label_values),\n                    }\n                },\n                count_throttled: {\n                    GlobalAndPerTenantIntCounter {\n                        global: WAIT_COUNT.with_label_values(&[KINDS[KIND]]),\n                        per_tenant: WAIT_COUNT_PER_TENANT\n                            .with_label_values(per_tenant_label_values),\n                    }\n                },\n            }\n        }\n    }\n\n    pub(crate) fn preinitialize_global_metrics() {\n        Lazy::force(&COUNT_ACCOUNTED_START);\n        Lazy::force(&COUNT_ACCOUNTED_FINISH);\n        Lazy::force(&WAIT_USECS);\n        Lazy::force(&WAIT_COUNT);\n    }\n\n    pub(crate) fn remove_tenant_metrics(tenant_shard_id: &TenantShardId) {\n        for m in &[\n            &COUNT_ACCOUNTED_START_PER_TENANT,\n            &COUNT_ACCOUNTED_FINISH_PER_TENANT,\n            &WAIT_USECS_PER_TENANT,\n            &WAIT_COUNT_PER_TENANT,\n        ] {\n            for kind in KINDS {\n                let _ = m.remove_label_values(&[\n                    kind,\n                    &tenant_shard_id.tenant_id.to_string(),\n                    &tenant_shard_id.shard_slug().to_string(),\n                ]);\n            }\n        }\n    }\n}\n\npub(crate) mod disk_usage_based_eviction {\n    use super::*;\n\n    pub(crate) struct Metrics {\n        pub(crate) tenant_collection_time: Histogram,\n        pub(crate) tenant_layer_count: Histogram,\n        pub(crate) layers_collected: IntCounter,\n        pub(crate) layers_selected: IntCounter,\n        pub(crate) layers_evicted: IntCounter,\n        /*BEGIN_HADRON */\n        pub(crate) bytes_evicted: IntCounter,\n        /*END_HADRON */\n    }\n\n    impl Default for Metrics {\n        fn default() -> Self {\n            let tenant_collection_time = register_histogram!(\n                \"pageserver_disk_usage_based_eviction_tenant_collection_seconds\",\n                \"Time spent collecting layers from a tenant -- not normalized by collected layer amount\",\n                vec![0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1.0, 5.0, 10.0]\n            )\n            .unwrap();\n\n            let tenant_layer_count = register_histogram!(\n                \"pageserver_disk_usage_based_eviction_tenant_collected_layers\",\n                \"Amount of layers gathered from a tenant\",\n                vec![5.0, 50.0, 500.0, 5000.0, 50000.0]\n            )\n            .unwrap();\n\n            let layers_collected = register_int_counter!(\n                \"pageserver_disk_usage_based_eviction_collected_layers_total\",\n                \"Amount of layers collected\"\n            )\n            .unwrap();\n\n            let layers_selected = register_int_counter!(\n                \"pageserver_disk_usage_based_eviction_select_layers_total\",\n                \"Amount of layers selected\"\n            )\n            .unwrap();\n\n            let layers_evicted = register_int_counter!(\n                \"pageserver_disk_usage_based_eviction_evicted_layers_total\",\n                \"Amount of layers successfully evicted\"\n            )\n            .unwrap();\n\n            /*BEGIN_HADRON */\n            let bytes_evicted = register_int_counter!(\n                \"pageserver_disk_usage_based_eviction_evicted_bytes_total\",\n                \"Amount of bytes successfully evicted\"\n            )\n            .unwrap();\n            /*END_HADRON */\n\n            Self {\n                tenant_collection_time,\n                tenant_layer_count,\n                layers_collected,\n                layers_selected,\n                layers_evicted,\n                bytes_evicted,\n            }\n        }\n    }\n\n    pub(crate) static METRICS: Lazy<Metrics> = Lazy::new(Metrics::default);\n}\n\nstatic TOKIO_EXECUTOR_THREAD_COUNT: Lazy<UIntGaugeVec> = Lazy::new(|| {\n    register_uint_gauge_vec!(\n        \"pageserver_tokio_executor_thread_configured_count\",\n        \"Total number of configued tokio executor threads in the process.\n         The `setup` label denotes whether we're running with multiple runtimes or a single runtime.\",\n        &[\"setup\"],\n    )\n    .unwrap()\n});\n\npub(crate) fn set_tokio_runtime_setup(setup: &str, num_threads: NonZeroUsize) {\n    static SERIALIZE: std::sync::Mutex<()> = std::sync::Mutex::new(());\n    let _guard = SERIALIZE.lock().unwrap();\n    TOKIO_EXECUTOR_THREAD_COUNT.reset();\n    TOKIO_EXECUTOR_THREAD_COUNT\n        .get_metric_with_label_values(&[setup])\n        .unwrap()\n        .set(u64::try_from(num_threads.get()).unwrap());\n}\n\npub(crate) static BASEBACKUP_CACHE_READ: Lazy<IntCounterVec> = Lazy::new(|| {\n    register_int_counter_vec!(\n        \"pageserver_basebackup_cache_read_total\",\n        \"Number of read accesses to the basebackup cache grouped by hit/miss/error\",\n        &[\"result\"]\n    )\n    .expect(\"failed to define a metric\")\n});\n\npub(crate) static BASEBACKUP_CACHE_PREPARE: Lazy<IntCounterVec> = Lazy::new(|| {\n    register_int_counter_vec!(\n        \"pageserver_basebackup_cache_prepare_total\",\n        \"Number of prepare requests processed by the basebackup cache grouped by ok/skip/error\",\n        &[\"result\"]\n    )\n    .expect(\"failed to define a metric\")\n});\n\npub(crate) static BASEBACKUP_CACHE_ENTRIES: Lazy<UIntGauge> = Lazy::new(|| {\n    register_uint_gauge!(\n        \"pageserver_basebackup_cache_entries_total\",\n        \"Number of entries in the basebackup cache\"\n    )\n    .expect(\"failed to define a metric\")\n});\n\npub(crate) static BASEBACKUP_CACHE_SIZE: Lazy<UIntGauge> = Lazy::new(|| {\n    register_uint_gauge!(\n        \"pageserver_basebackup_cache_size_bytes\",\n        \"Total size of all basebackup cache entries on disk in bytes\"\n    )\n    .expect(\"failed to define a metric\")\n});\n\npub(crate) static BASEBACKUP_CACHE_PREPARE_QUEUE_SIZE: Lazy<UIntGauge> = Lazy::new(|| {\n    register_uint_gauge!(\n        \"pageserver_basebackup_cache_prepare_queue_size\",\n        \"Number of requests in the basebackup prepare channel\"\n    )\n    .expect(\"failed to define a metric\")\n});\n\nstatic PAGESERVER_CONFIG_IGNORED_ITEMS: Lazy<UIntGaugeVec> = Lazy::new(|| {\n    register_uint_gauge_vec!(\n        \"pageserver_config_ignored_items\",\n        \"TOML items present in the on-disk configuration file but ignored by the pageserver config parser.\\\n         The `item` label is the dot-separated path of the ignored item in the on-disk configuration file.\\\n         The value for an unknown config item is always 1.\\\n         There is a special label value \\\"\\\", which is 0, so that there is always a metric exposed (simplifies dashboards).\",\n        &[\"item\"]\n    )\n    .unwrap()\n});\n\npub fn preinitialize_metrics(\n    conf: &'static PageServerConf,\n    ignored: config::ignored_fields::Paths,\n) {\n    set_page_service_config_max_batch_size(&conf.page_service_pipelining);\n\n    PAGESERVER_CONFIG_IGNORED_ITEMS\n        .with_label_values(&[\"\"])\n        .set(0);\n    for path in &ignored.paths {\n        PAGESERVER_CONFIG_IGNORED_ITEMS\n            .with_label_values(&[path])\n            .set(1);\n    }\n\n    // Python tests need these and on some we do alerting.\n    //\n    // FIXME(4813): make it so that we have no top level metrics as this fn will easily fall out of\n    // order:\n    // - global metrics reside in a Lazy<PageserverMetrics>\n    //   - access via crate::metrics::PS_METRICS.some_metric.inc()\n    // - could move the statics into TimelineMetrics::new()?\n\n    // counters\n    [\n        &UNEXPECTED_ONDEMAND_DOWNLOADS,\n        &WALRECEIVER_STARTED_CONNECTIONS,\n        &WALRECEIVER_BROKER_UPDATES,\n        &WALRECEIVER_CANDIDATES_ADDED,\n        &WALRECEIVER_CANDIDATES_REMOVED,\n        &tokio_epoll_uring::THREAD_LOCAL_LAUNCH_FAILURES,\n        &tokio_epoll_uring::THREAD_LOCAL_LAUNCH_SUCCESSES,\n        &REMOTE_ONDEMAND_DOWNLOADED_LAYERS,\n        &REMOTE_ONDEMAND_DOWNLOADED_BYTES,\n        &CIRCUIT_BREAKERS_BROKEN,\n        &CIRCUIT_BREAKERS_UNBROKEN,\n        &PAGE_SERVICE_SMGR_FLUSH_INPROGRESS_MICROS_GLOBAL,\n        &WAIT_LSN_IN_PROGRESS_GLOBAL_MICROS,\n        &MISROUTED_PAGESTREAM_REQUESTS,\n    ]\n    .into_iter()\n    .for_each(|c| {\n        Lazy::force(c);\n    });\n\n    // Deletion queue stats\n    Lazy::force(&DELETION_QUEUE);\n\n    // Tenant stats\n    Lazy::force(&TENANT);\n\n    // Tenant manager stats\n    Lazy::force(&TENANT_MANAGER);\n\n    Lazy::force(&crate::tenant::storage_layer::layer::LAYER_IMPL_METRICS);\n    Lazy::force(&disk_usage_based_eviction::METRICS);\n\n    for state_name in pageserver_api::models::TenantState::VARIANTS {\n        // initialize the metric for all gauges, otherwise the time series might seemingly show\n        // values from last restart.\n        TENANT_STATE_METRIC.with_label_values(&[state_name]).set(0);\n    }\n\n    // countervecs\n    [\n        &BACKGROUND_LOOP_PERIOD_OVERRUN_COUNT,\n        &SMGR_QUERY_STARTED_GLOBAL,\n        &PAGE_SERVICE_BATCH_BREAK_REASON_GLOBAL,\n    ]\n    .into_iter()\n    .for_each(|c| {\n        Lazy::force(c);\n    });\n\n    // gauges\n    WALRECEIVER_ACTIVE_MANAGERS.get();\n    LOCAL_DATA_LOSS_SUSPECTED.get();\n\n    // histograms\n    [\n        &LAYERS_PER_READ_GLOBAL,\n        &LAYERS_PER_READ_BATCH_GLOBAL,\n        &LAYERS_PER_READ_AMORTIZED_GLOBAL,\n        &DELTAS_PER_READ_GLOBAL,\n        &WAIT_LSN_TIME,\n        &WAL_REDO_TIME,\n        &WAL_REDO_RECORDS_HISTOGRAM,\n        &WAL_REDO_BYTES_HISTOGRAM,\n        &WAL_REDO_PROCESS_LAUNCH_DURATION_HISTOGRAM,\n        &PAGE_SERVICE_BATCH_SIZE_GLOBAL,\n        &PAGE_SERVICE_SMGR_BATCH_WAIT_TIME_GLOBAL,\n    ]\n    .into_iter()\n    .for_each(|h| {\n        Lazy::force(h);\n    });\n\n    // Custom\n    Lazy::force(&BASEBACKUP_QUERY_TIME);\n    Lazy::force(&COMPUTE_COMMANDS_COUNTERS);\n    Lazy::force(&tokio_epoll_uring::THREAD_LOCAL_METRICS_STORAGE);\n\n    tenant_throttling::preinitialize_global_metrics();\n    wait_ondemand_download_time::preinitialize_global_metrics();\n}\n"
  },
  {
    "path": "pageserver/src/page_cache.rs",
    "content": "//!\n//! Global page cache\n//!\n//! The page cache uses up most of the memory in the page server. It is shared\n//! by all tenants, and it is used to store different kinds of pages. Sharing\n//! the cache allows memory to be dynamically allocated where it's needed the\n//! most.\n//!\n//! The page cache consists of fixed-size buffers, 8 kB each to match the\n//! PostgreSQL buffer size, and a Slot struct for each buffer to contain\n//! information about what's stored in the buffer.\n//!\n//! # Types Of Pages\n//!\n//! [`PageCache`] only supports immutable pages.\n//! Hence there is no need to worry about coherency.\n//!\n//! Two types of pages are supported:\n//!\n//! * **Immutable File pages**, filled & used by [`crate::tenant::block_io`] and [`crate::tenant::ephemeral_file`].\n//!\n//! Note that [`crate::tenant::ephemeral_file::EphemeralFile`] is generally mutable, but, it's append-only.\n//! It uses the page cache only for the blocks that are already fully written and immutable.\n//!\n//! # Filling The Page Cache\n//!\n//! Page cache maps from a cache key to a buffer slot.\n//! The cache key uniquely identifies the piece of data that is being cached.\n//!\n//! The cache key for **immutable file** pages is [`FileId`] and a block number.\n//! Users of page cache that wish to page-cache an arbitrary (immutable!) on-disk file do the following:\n//! * Have a mechanism to deterministically associate the on-disk file with a [`FileId`].\n//! * Get a [`FileId`] using [`next_file_id`].\n//! * Use the mechanism to associate the on-disk file with the returned [`FileId`].\n//! * Use [`PageCache::read_immutable_buf`] to get a [`ReadBufResult`].\n//! * If the page was already cached, it'll be the [`ReadBufResult::Found`] variant that contains\n//!   a read guard for the page. Just use it.\n//! * If the page was not cached, it'll be the [`ReadBufResult::NotFound`] variant that contains\n//!   a write guard for the page. Fill the page with the contents of the on-disk file.\n//!   Then call [`PageWriteGuard::mark_valid`] to mark the page as valid.\n//!   Then try again to [`PageCache::read_immutable_buf`].\n//!   Unless there's high cache pressure, the page should now be cached.\n//!   (TODO: allow downgrading the write guard to a read guard to ensure forward progress.)\n//!\n//! # Locking\n//!\n//! There are two levels of locking involved: There's one lock for the \"mapping\"\n//! from page identifier (tenant ID, timeline ID, rel, block, LSN) to the buffer\n//! slot, and a separate lock on each slot. To read or write the contents of a\n//! slot, you must hold the lock on the slot in read or write mode,\n//! respectively. To change the mapping of a slot, i.e. to evict a page or to\n//! assign a buffer for a page, you must hold the mapping lock and the lock on\n//! the slot at the same time.\n//!\n//! Whenever you need to hold both locks simultaneously, the slot lock must be\n//! acquired first. This consistent ordering avoids deadlocks. To look up a page\n//! in the cache, you would first look up the mapping, while holding the mapping\n//! lock, and then lock the slot. You must release the mapping lock in between,\n//! to obey the lock ordering and avoid deadlock.\n//!\n//! A slot can momentarily have invalid contents, even if it's already been\n//! inserted to the mapping, but you must hold the write-lock on the slot until\n//! the contents are valid. If you need to release the lock without initializing\n//! the contents, you must remove the mapping first. We make that easy for the\n//! callers with PageWriteGuard: the caller must explicitly call guard.mark_valid() after it has\n//! initialized it. If the guard is dropped without calling mark_valid(), the\n//! mapping is automatically removed and the slot is marked free.\n//!\n\nuse std::collections::HashMap;\nuse std::collections::hash_map::Entry;\nuse std::sync::atomic::{AtomicU8, AtomicU64, AtomicUsize, Ordering};\nuse std::sync::{Arc, Weak};\nuse std::time::Duration;\n\nuse anyhow::Context;\nuse once_cell::sync::OnceCell;\n\nuse crate::context::RequestContext;\nuse crate::metrics::{PageCacheSizeMetrics, page_cache_eviction_metrics};\nuse crate::virtual_file::{IoBufferMut, IoPageSlice};\n\nstatic PAGE_CACHE: OnceCell<PageCache> = OnceCell::new();\nconst TEST_PAGE_CACHE_SIZE: usize = 50;\n\n///\n/// Initialize the page cache. This must be called once at page server startup.\n///\npub fn init(size: usize) {\n    if PAGE_CACHE.set(PageCache::new(size)).is_err() {\n        panic!(\"page cache already initialized\");\n    }\n}\n\n///\n/// Get a handle to the page cache.\n///\npub fn get() -> &'static PageCache {\n    //\n    // In unit tests, page server startup doesn't happen and no one calls\n    // page_cache::init(). Initialize it here with a tiny cache, so that the\n    // page cache is usable in unit tests.\n    //\n    if cfg!(test) {\n        PAGE_CACHE.get_or_init(|| PageCache::new(TEST_PAGE_CACHE_SIZE))\n    } else {\n        PAGE_CACHE.get().expect(\"page cache not initialized\")\n    }\n}\n\npub const PAGE_SZ: usize = postgres_ffi::BLCKSZ as usize;\nconst MAX_USAGE_COUNT: u8 = 5;\n\n/// See module-level comment.\n#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]\npub struct FileId(u64);\n\nstatic NEXT_ID: AtomicU64 = AtomicU64::new(1);\n\n/// See module-level comment.\npub fn next_file_id() -> FileId {\n    FileId(NEXT_ID.fetch_add(1, Ordering::Relaxed))\n}\n\n///\n/// CacheKey uniquely identifies a \"thing\" to cache in the page cache.\n///\n#[derive(Debug, PartialEq, Eq, Clone)]\n#[allow(clippy::enum_variant_names)]\nenum CacheKey {\n    ImmutableFilePage { file_id: FileId, blkno: u32 },\n}\n\nstruct Slot {\n    inner: tokio::sync::RwLock<SlotInner>,\n    usage_count: AtomicU8,\n}\n\nstruct SlotInner {\n    key: Option<CacheKey>,\n    // for `coalesce_readers_permit`\n    permit: std::sync::Mutex<Weak<PinnedSlotsPermit>>,\n    buf: IoPageSlice<'static>,\n}\n\nimpl Slot {\n    /// Increment usage count on the buffer, with ceiling at MAX_USAGE_COUNT.\n    fn inc_usage_count(&self) {\n        let _ = self\n            .usage_count\n            .fetch_update(Ordering::Relaxed, Ordering::Relaxed, |val| {\n                if val == MAX_USAGE_COUNT {\n                    None\n                } else {\n                    Some(val + 1)\n                }\n            });\n    }\n\n    /// Decrement usage count on the buffer, unless it's already zero.  Returns\n    /// the old usage count.\n    fn dec_usage_count(&self) -> u8 {\n        let count_res =\n            self.usage_count\n                .fetch_update(Ordering::Relaxed, Ordering::Relaxed, |val| {\n                    if val == 0 { None } else { Some(val - 1) }\n                });\n\n        match count_res {\n            Ok(usage_count) => usage_count,\n            Err(usage_count) => usage_count,\n        }\n    }\n\n    /// Sets the usage count to a specific value.\n    fn set_usage_count(&self, count: u8) {\n        self.usage_count.store(count, Ordering::Relaxed);\n    }\n}\n\nimpl SlotInner {\n    /// If there is aready a reader, drop our permit and share its permit, just like we share read access.\n    fn coalesce_readers_permit(&self, permit: PinnedSlotsPermit) -> Arc<PinnedSlotsPermit> {\n        let mut guard = self.permit.lock().unwrap();\n        if let Some(existing_permit) = guard.upgrade() {\n            drop(guard);\n            drop(permit);\n            existing_permit\n        } else {\n            let permit = Arc::new(permit);\n            *guard = Arc::downgrade(&permit);\n            permit\n        }\n    }\n}\n\npub struct PageCache {\n    immutable_page_map: std::sync::RwLock<HashMap<(FileId, u32), usize>>,\n\n    /// The actual buffers with their metadata.\n    slots: Box<[Slot]>,\n\n    pinned_slots: Arc<tokio::sync::Semaphore>,\n\n    /// Index of the next candidate to evict, for the Clock replacement algorithm.\n    /// This is interpreted modulo the page cache size.\n    next_evict_slot: AtomicUsize,\n\n    size_metrics: &'static PageCacheSizeMetrics,\n}\n\nstruct PinnedSlotsPermit {\n    _permit: tokio::sync::OwnedSemaphorePermit,\n}\n\n///\n/// PageReadGuard is a \"lease\" on a buffer, for reading. The page is kept locked\n/// until the guard is dropped.\n///\npub struct PageReadGuard<'i> {\n    _permit: Arc<PinnedSlotsPermit>,\n    slot_guard: tokio::sync::RwLockReadGuard<'i, SlotInner>,\n}\n\nimpl std::ops::Deref for PageReadGuard<'_> {\n    type Target = [u8; PAGE_SZ];\n\n    fn deref(&self) -> &Self::Target {\n        self.slot_guard.buf.deref()\n    }\n}\n\nimpl AsRef<[u8; PAGE_SZ]> for PageReadGuard<'_> {\n    fn as_ref(&self) -> &[u8; PAGE_SZ] {\n        self.slot_guard.buf.as_ref()\n    }\n}\n\n///\n/// PageWriteGuard is a lease on a buffer for modifying it. The page is kept locked\n/// until the guard is dropped.\n///\n/// Counterintuitively, this is used even for a read, if the requested page is not\n/// currently found in the page cache. In that case, the caller of lock_for_read()\n/// is expected to fill in the page contents and call mark_valid().\npub struct PageWriteGuard<'i> {\n    state: PageWriteGuardState<'i>,\n}\n\nenum PageWriteGuardState<'i> {\n    Invalid {\n        inner: tokio::sync::RwLockWriteGuard<'i, SlotInner>,\n        _permit: PinnedSlotsPermit,\n    },\n    Downgraded,\n}\n\nimpl std::ops::DerefMut for PageWriteGuard<'_> {\n    fn deref_mut(&mut self) -> &mut Self::Target {\n        match &mut self.state {\n            PageWriteGuardState::Invalid { inner, _permit } => inner.buf.deref_mut(),\n            PageWriteGuardState::Downgraded => unreachable!(),\n        }\n    }\n}\n\nimpl std::ops::Deref for PageWriteGuard<'_> {\n    type Target = [u8; PAGE_SZ];\n\n    fn deref(&self) -> &Self::Target {\n        match &self.state {\n            PageWriteGuardState::Invalid { inner, _permit } => inner.buf.deref(),\n            PageWriteGuardState::Downgraded => unreachable!(),\n        }\n    }\n}\n\nimpl<'a> PageWriteGuard<'a> {\n    /// Mark that the buffer contents are now valid.\n    #[must_use]\n    pub fn mark_valid(mut self) -> PageReadGuard<'a> {\n        let prev = std::mem::replace(&mut self.state, PageWriteGuardState::Downgraded);\n        match prev {\n            PageWriteGuardState::Invalid { inner, _permit } => {\n                assert!(inner.key.is_some());\n                PageReadGuard {\n                    _permit: Arc::new(_permit),\n                    slot_guard: inner.downgrade(),\n                }\n            }\n            PageWriteGuardState::Downgraded => unreachable!(),\n        }\n    }\n}\n\nimpl Drop for PageWriteGuard<'_> {\n    ///\n    /// If the buffer was allocated for a page that was not already in the\n    /// cache, but the lock_for_read/write() caller dropped the buffer without\n    /// initializing it, remove the mapping from the page cache.\n    ///\n    fn drop(&mut self) {\n        match &mut self.state {\n            PageWriteGuardState::Invalid { inner, _permit } => {\n                assert!(inner.key.is_some());\n                let self_key = inner.key.as_ref().unwrap();\n                PAGE_CACHE.get().unwrap().remove_mapping(self_key);\n                inner.key = None;\n            }\n            PageWriteGuardState::Downgraded => {}\n        }\n    }\n}\n\n/// lock_for_read() return value\npub enum ReadBufResult<'a> {\n    Found(PageReadGuard<'a>),\n    NotFound(PageWriteGuard<'a>),\n}\n\nimpl PageCache {\n    pub async fn read_immutable_buf(\n        &self,\n        file_id: FileId,\n        blkno: u32,\n        ctx: &RequestContext,\n    ) -> anyhow::Result<ReadBufResult> {\n        self.lock_for_read(&(CacheKey::ImmutableFilePage { file_id, blkno }), ctx)\n            .await\n    }\n\n    //\n    // Section 2: Internal interface functions for lookup/update.\n    //\n    // To add support for a new kind of \"thing\" to cache, you will need\n    // to add public interface routines above, and code to deal with the\n    // \"mappings\" after this section. But the routines in this section should\n    // not require changes.\n\n    async fn try_get_pinned_slot_permit(&self) -> anyhow::Result<PinnedSlotsPermit> {\n        match tokio::time::timeout(\n            // Choose small timeout, neon_smgr does its own retries.\n            // https://neondb.slack.com/archives/C04DGM6SMTM/p1694786876476869\n            Duration::from_secs(10),\n            Arc::clone(&self.pinned_slots).acquire_owned(),\n        )\n        .await\n        {\n            Ok(res) => Ok(PinnedSlotsPermit {\n                _permit: res.expect(\"this semaphore is never closed\"),\n            }),\n            Err(_timeout) => {\n                crate::metrics::page_cache_errors_inc(\n                    crate::metrics::PageCacheErrorKind::AcquirePinnedSlotTimeout,\n                );\n                anyhow::bail!(\"timeout: there were page guards alive for all page cache slots\")\n            }\n        }\n    }\n\n    /// Look up a page in the cache.\n    ///\n    async fn try_lock_for_read(\n        &self,\n        cache_key: &CacheKey,\n        permit: &mut Option<PinnedSlotsPermit>,\n    ) -> Option<PageReadGuard> {\n        if let Some(slot_idx) = self.search_mapping(cache_key) {\n            // The page was found in the mapping. Lock the slot, and re-check\n            // that it's still what we expected (because we released the mapping\n            // lock already, another thread could have evicted the page)\n            let slot = &self.slots[slot_idx];\n            let inner = slot.inner.read().await;\n            if inner.key.as_ref() == Some(cache_key) {\n                slot.inc_usage_count();\n                return Some(PageReadGuard {\n                    _permit: inner.coalesce_readers_permit(permit.take().unwrap()),\n                    slot_guard: inner,\n                });\n            }\n        }\n        None\n    }\n\n    /// Return a locked buffer for given block.\n    ///\n    /// Like try_lock_for_read(), if the search criteria is not exact and the\n    /// page is already found in the cache, *cache_key is updated.\n    ///\n    /// If the page is not found in the cache, this allocates a new buffer for\n    /// it. The caller may then initialize the buffer with the contents, and\n    /// call mark_valid().\n    ///\n    /// Example usage:\n    ///\n    /// ```ignore\n    /// let cache = page_cache::get();\n    ///\n    /// match cache.lock_for_read(&key) {\n    ///     ReadBufResult::Found(read_guard) => {\n    ///         // The page was found in cache. Use it\n    ///     },\n    ///     ReadBufResult::NotFound(write_guard) => {\n    ///         // The page was not found in cache. Read it from disk into the\n    ///         // buffer.\n    ///         //read_my_page_from_disk(write_guard);\n    ///\n    ///         // The buffer contents are now valid. Tell the page cache.\n    ///         write_guard.mark_valid();\n    ///     },\n    /// }\n    /// ```\n    ///\n    async fn lock_for_read(\n        &self,\n        cache_key: &CacheKey,\n        ctx: &RequestContext,\n    ) -> anyhow::Result<ReadBufResult> {\n        let mut permit = Some(self.try_get_pinned_slot_permit().await?);\n\n        let (read_access, hit) = match cache_key {\n            CacheKey::ImmutableFilePage { .. } => (\n                &crate::metrics::PAGE_CACHE\n                    .for_ctx(ctx)\n                    .read_accesses_immutable,\n                &crate::metrics::PAGE_CACHE.for_ctx(ctx).read_hits_immutable,\n            ),\n        };\n        read_access.inc();\n\n        let mut is_first_iteration = true;\n        loop {\n            // First check if the key already exists in the cache.\n            if let Some(read_guard) = self.try_lock_for_read(cache_key, &mut permit).await {\n                debug_assert!(permit.is_none());\n                if is_first_iteration {\n                    hit.inc();\n                }\n                return Ok(ReadBufResult::Found(read_guard));\n            }\n            debug_assert!(permit.is_some());\n            is_first_iteration = false;\n\n            // Not found. Find a victim buffer\n            let (slot_idx, mut inner) = self\n                .find_victim(permit.as_ref().unwrap())\n                .await\n                .context(\"Failed to find evict victim\")?;\n\n            // Insert mapping for this. At this point, we may find that another\n            // thread did the same thing concurrently. In that case, we evicted\n            // our victim buffer unnecessarily. Put it into the free list and\n            // continue with the slot that the other thread chose.\n            if let Some(_existing_slot_idx) = self.try_insert_mapping(cache_key, slot_idx) {\n                // TODO: put to free list\n\n                // We now just loop back to start from beginning. This is not\n                // optimal, we'll perform the lookup in the mapping again, which\n                // is not really necessary because we already got\n                // 'existing_slot_idx'.  But this shouldn't happen often enough\n                // to matter much.\n                continue;\n            }\n\n            // Make the slot ready\n            let slot = &self.slots[slot_idx];\n            inner.key = Some(cache_key.clone());\n            slot.set_usage_count(1);\n\n            debug_assert!(\n                {\n                    let guard = inner.permit.lock().unwrap();\n                    guard.upgrade().is_none()\n                },\n                \"we hold a write lock, so, no one else should have a permit\"\n            );\n\n            return Ok(ReadBufResult::NotFound(PageWriteGuard {\n                state: PageWriteGuardState::Invalid {\n                    _permit: permit.take().unwrap(),\n                    inner,\n                },\n            }));\n        }\n    }\n\n    //\n    // Section 3: Mapping functions\n    //\n\n    /// Search for a page in the cache using the given search key.\n    ///\n    /// Returns the slot index, if any.\n    ///\n    /// NOTE: We don't hold any lock on the mapping on return, so the slot might\n    /// get recycled for an unrelated page immediately after this function\n    /// returns.  The caller is responsible for re-checking that the slot still\n    /// contains the page with the same key before using it.\n    ///\n    fn search_mapping(&self, cache_key: &CacheKey) -> Option<usize> {\n        match cache_key {\n            CacheKey::ImmutableFilePage { file_id, blkno } => {\n                let map = self.immutable_page_map.read().unwrap();\n                Some(*map.get(&(*file_id, *blkno))?)\n            }\n        }\n    }\n\n    ///\n    /// Remove mapping for given key.\n    ///\n    fn remove_mapping(&self, old_key: &CacheKey) {\n        match old_key {\n            CacheKey::ImmutableFilePage { file_id, blkno } => {\n                let mut map = self.immutable_page_map.write().unwrap();\n                map.remove(&(*file_id, *blkno))\n                    .expect(\"could not find old key in mapping\");\n                self.size_metrics.current_bytes_immutable.sub_page_sz(1);\n            }\n        }\n    }\n\n    ///\n    /// Insert mapping for given key.\n    ///\n    /// If a mapping already existed for the given key, returns the slot index\n    /// of the existing mapping and leaves it untouched.\n    fn try_insert_mapping(&self, new_key: &CacheKey, slot_idx: usize) -> Option<usize> {\n        match new_key {\n            CacheKey::ImmutableFilePage { file_id, blkno } => {\n                let mut map = self.immutable_page_map.write().unwrap();\n                match map.entry((*file_id, *blkno)) {\n                    Entry::Occupied(entry) => Some(*entry.get()),\n                    Entry::Vacant(entry) => {\n                        entry.insert(slot_idx);\n                        self.size_metrics.current_bytes_immutable.add_page_sz(1);\n                        None\n                    }\n                }\n            }\n        }\n    }\n\n    //\n    // Section 4: Misc internal helpers\n    //\n\n    /// Find a slot to evict.\n    ///\n    /// On return, the slot is empty and write-locked.\n    async fn find_victim(\n        &self,\n        _permit_witness: &PinnedSlotsPermit,\n    ) -> anyhow::Result<(usize, tokio::sync::RwLockWriteGuard<SlotInner>)> {\n        let iter_limit = self.slots.len() * 10;\n        let mut iters = 0;\n        loop {\n            iters += 1;\n            let slot_idx = self.next_evict_slot.fetch_add(1, Ordering::Relaxed) % self.slots.len();\n\n            let slot = &self.slots[slot_idx];\n\n            if slot.dec_usage_count() == 0 {\n                let mut inner = match slot.inner.try_write() {\n                    Ok(inner) => inner,\n                    Err(_err) => {\n                        if iters > iter_limit {\n                            // NB: Even with the permits, there's no hard guarantee that we will find a slot with\n                            // any particular number of iterations: other threads might race ahead and acquire and\n                            // release pins just as we're scanning the array.\n                            //\n                            // Imagine that nslots is 2, and as starting point, usage_count==1 on all\n                            // slots. There are two threads running concurrently, A and B. A has just\n                            // acquired the permit from the semaphore.\n                            //\n                            //   A: Look at slot 1. Its usage_count == 1, so decrement it to zero, and continue the search\n                            //   B: Acquire permit.\n                            //   B: Look at slot 2, decrement its usage_count to zero and continue the search\n                            //   B: Look at slot 1. Its usage_count is zero, so pin it and bump up its usage_count to 1.\n                            //   B: Release pin and permit again\n                            //   B: Acquire permit.\n                            //   B: Look at slot 2. Its usage_count is zero, so pin it and bump up its usage_count to 1.\n                            //   B: Release pin and permit again\n                            //\n                            // Now we're back in the starting situation that both slots have\n                            // usage_count 1, but A has now been through one iteration of the\n                            // find_victim() loop. This can repeat indefinitely and on each\n                            // iteration, A's iteration count increases by one.\n                            //\n                            // So, even though the semaphore for the permits is fair, the victim search\n                            // itself happens in parallel and is not fair.\n                            // Hence even with a permit, a task can theoretically be starved.\n                            // To avoid this, we'd need tokio to give priority to tasks that are holding\n                            // permits for longer.\n                            // Note that just yielding to tokio during iteration without such\n                            // priority boosting is likely counter-productive. We'd just give more opportunities\n                            // for B to bump usage count, further starving A.\n                            page_cache_eviction_metrics::observe(\n                                page_cache_eviction_metrics::Outcome::ItersExceeded {\n                                    iters: iters.try_into().unwrap(),\n                                },\n                            );\n                            anyhow::bail!(\"exceeded evict iter limit\");\n                        }\n                        continue;\n                    }\n                };\n                if let Some(old_key) = &inner.key {\n                    // remove mapping for old buffer\n                    self.remove_mapping(old_key);\n                    inner.key = None;\n                    page_cache_eviction_metrics::observe(\n                        page_cache_eviction_metrics::Outcome::FoundSlotEvicted {\n                            iters: iters.try_into().unwrap(),\n                        },\n                    );\n                } else {\n                    page_cache_eviction_metrics::observe(\n                        page_cache_eviction_metrics::Outcome::FoundSlotUnused {\n                            iters: iters.try_into().unwrap(),\n                        },\n                    );\n                }\n                return Ok((slot_idx, inner));\n            }\n        }\n    }\n\n    /// Initialize a new page cache\n    ///\n    /// This should be called only once at page server startup.\n    fn new(num_pages: usize) -> Self {\n        assert!(num_pages > 0, \"page cache size must be > 0\");\n\n        // We could use Vec::leak here, but that potentially also leaks\n        // uninitialized reserved capacity. With into_boxed_slice and Box::leak\n        // this is avoided.\n        let page_buffer = IoBufferMut::with_capacity_zeroed(num_pages * PAGE_SZ).leak();\n\n        let size_metrics = &crate::metrics::PAGE_CACHE_SIZE;\n        size_metrics.max_bytes.set_page_sz(num_pages);\n        size_metrics.current_bytes_immutable.set_page_sz(0);\n\n        let slots = page_buffer\n            .chunks_exact_mut(PAGE_SZ)\n            .map(|chunk| {\n                // SAFETY: Each chunk has `PAGE_SZ` (8192) bytes, greater than 512, still aligned.\n                let buf = unsafe { IoPageSlice::new_unchecked(chunk.try_into().unwrap()) };\n\n                Slot {\n                    inner: tokio::sync::RwLock::new(SlotInner {\n                        key: None,\n                        buf,\n                        permit: std::sync::Mutex::new(Weak::new()),\n                    }),\n                    usage_count: AtomicU8::new(0),\n                }\n            })\n            .collect();\n\n        Self {\n            immutable_page_map: Default::default(),\n            slots,\n            next_evict_slot: AtomicUsize::new(0),\n            size_metrics,\n            pinned_slots: Arc::new(tokio::sync::Semaphore::new(num_pages)),\n        }\n    }\n}\n\ntrait PageSzBytesMetric {\n    fn set_page_sz(&self, count: usize);\n    fn add_page_sz(&self, count: usize);\n    fn sub_page_sz(&self, count: usize);\n}\n\n#[inline(always)]\nfn count_times_page_sz(count: usize) -> u64 {\n    u64::try_from(count).unwrap() * u64::try_from(PAGE_SZ).unwrap()\n}\n\nimpl PageSzBytesMetric for metrics::UIntGauge {\n    fn set_page_sz(&self, count: usize) {\n        self.set(count_times_page_sz(count));\n    }\n    fn add_page_sz(&self, count: usize) {\n        self.add(count_times_page_sz(count));\n    }\n    fn sub_page_sz(&self, count: usize) {\n        self.sub(count_times_page_sz(count));\n    }\n}\n"
  },
  {
    "path": "pageserver/src/page_service.rs",
    "content": "//! The Page Service listens for client connections and serves their GetPage@LSN\n//! requests.\n\nuse std::any::Any;\nuse std::borrow::Cow;\nuse std::num::NonZeroUsize;\nuse std::os::fd::AsRawFd;\nuse std::pin::Pin;\nuse std::str::FromStr;\nuse std::sync::Arc;\nuse std::task::{Context, Poll};\nuse std::time::{Duration, Instant, SystemTime};\nuse std::{io, str};\n\nuse anyhow::{Context as _, bail};\nuse bytes::{Buf as _, BufMut as _, BytesMut};\nuse chrono::Utc;\nuse futures::future::BoxFuture;\nuse futures::stream::FuturesUnordered;\nuse futures::{FutureExt, Stream, StreamExt as _};\nuse itertools::Itertools;\nuse jsonwebtoken::TokenData;\nuse once_cell::sync::OnceCell;\nuse pageserver_api::config::{\n    GetVectoredConcurrentIo, PageServicePipeliningConfig, PageServicePipeliningConfigPipelined,\n    PageServiceProtocolPipelinedBatchingStrategy, PageServiceProtocolPipelinedExecutionStrategy,\n};\nuse pageserver_api::key::rel_block_to_key;\nuse pageserver_api::models::{PageTraceEvent, TenantState};\nuse pageserver_api::pagestream_api::{\n    self, PagestreamBeMessage, PagestreamDbSizeRequest, PagestreamDbSizeResponse,\n    PagestreamErrorResponse, PagestreamExistsRequest, PagestreamExistsResponse,\n    PagestreamFeMessage, PagestreamGetPageRequest, PagestreamGetSlruSegmentRequest,\n    PagestreamGetSlruSegmentResponse, PagestreamNblocksRequest, PagestreamNblocksResponse,\n    PagestreamProtocolVersion, PagestreamRequest,\n};\nuse pageserver_api::reltag::SlruKind;\nuse pageserver_api::shard::TenantShardId;\nuse pageserver_page_api::proto;\nuse pageserver_page_api::{self as page_api, GetPageSplitter};\nuse postgres_backend::{\n    AuthType, PostgresBackend, PostgresBackendReader, QueryError, is_expected_io_error,\n};\nuse postgres_ffi::BLCKSZ;\nuse postgres_ffi_types::constants::DEFAULTTABLESPACE_OID;\nuse pq_proto::framed::ConnectionError;\nuse pq_proto::{BeMessage, FeMessage, FeStartupPacket, RowDescriptor};\nuse smallvec::{SmallVec, smallvec};\nuse strum_macros::IntoStaticStr;\nuse tokio::io::{AsyncRead, AsyncReadExt as _, AsyncWrite, AsyncWriteExt as _, BufWriter};\nuse tokio::task::JoinHandle;\nuse tokio_util::sync::CancellationToken;\nuse tonic::service::Interceptor as _;\nuse tonic::transport::server::TcpConnectInfo;\nuse tracing::*;\nuse utils::auth::{Claims, Scope, SwappableJwtAuth};\nuse utils::id::{TenantId, TenantTimelineId, TimelineId};\nuse utils::logging::log_slow;\nuse utils::lsn::Lsn;\nuse utils::shard::ShardIndex;\nuse utils::simple_rcu::RcuReadGuard;\nuse utils::sync::gate::{Gate, GateGuard};\nuse utils::sync::spsc_fold;\nuse utils::{failpoint_support, span_record};\n\nuse crate::auth::check_permission;\nuse crate::basebackup::{self, BasebackupError};\nuse crate::config::PageServerConf;\nuse crate::context::{\n    DownloadBehavior, PerfInstrumentFutureExt, RequestContext, RequestContextBuilder,\n};\nuse crate::feature_resolver::FeatureResolver;\nuse crate::metrics::{\n    self, COMPUTE_COMMANDS_COUNTERS, ComputeCommandKind, GetPageBatchBreakReason, LIVE_CONNECTIONS,\n    MISROUTED_PAGESTREAM_REQUESTS, PAGESTREAM_HANDLER_RESULTS_TOTAL, SmgrOpTimer, TimelineMetrics,\n};\nuse crate::pgdatadir_mapping::{LsnRange, Version};\nuse crate::span::{\n    debug_assert_current_span_has_tenant_and_timeline_id,\n    debug_assert_current_span_has_tenant_and_timeline_id_no_shard_id,\n};\nuse crate::task_mgr::{self, COMPUTE_REQUEST_RUNTIME, TaskKind};\nuse crate::tenant::mgr::{\n    GetActiveTenantError, GetTenantError, ShardResolveResult, ShardSelector, TenantManager,\n};\nuse crate::tenant::storage_layer::IoConcurrency;\nuse crate::tenant::timeline::handle::{Handle, HandleUpgradeError, WeakHandle};\nuse crate::tenant::timeline::{self, WaitLsnError, WaitLsnTimeout, WaitLsnWaiter};\nuse crate::tenant::{GetTimelineError, PageReconstructError, Timeline};\nuse crate::{CancellableTask, PERF_TRACE_TARGET, timed_after_cancellation};\n\n/// How long we may wait for a [`crate::tenant::mgr::TenantSlot::InProgress`]` and/or a [`crate::tenant::TenantShard`] which\n/// is not yet in state [`TenantState::Active`].\n///\n/// NB: this is a different value than [`crate::http::routes::ACTIVE_TENANT_TIMEOUT`].\n/// HADRON: reduced timeout and we will retry in Cache::get().\nconst ACTIVE_TENANT_TIMEOUT: Duration = Duration::from_millis(5000);\n\n/// Threshold at which to log slow GetPage requests.\nconst LOG_SLOW_GETPAGE_THRESHOLD: Duration = Duration::from_secs(30);\n\n/// The idle time before sending TCP keepalive probes for gRPC connections. The\n/// interval and timeout between each probe is configured via sysctl. This\n/// allows detecting dead connections sooner.\nconst GRPC_TCP_KEEPALIVE_TIME: Duration = Duration::from_secs(60);\n\n/// Whether to enable TCP nodelay for gRPC connections. This disables Nagle's\n/// algorithm, which can cause latency spikes for small messages.\nconst GRPC_TCP_NODELAY: bool = true;\n\n/// The interval between HTTP2 keepalive pings. This allows shutting down server\n/// tasks when clients are unresponsive.\nconst GRPC_HTTP2_KEEPALIVE_INTERVAL: Duration = Duration::from_secs(30);\n\n/// The timeout for HTTP2 keepalive pings. Should be <= GRPC_KEEPALIVE_INTERVAL.\nconst GRPC_HTTP2_KEEPALIVE_TIMEOUT: Duration = Duration::from_secs(20);\n\n/// Number of concurrent gRPC streams per TCP connection. We expect something\n/// like 8 GetPage streams per connections, plus any unary requests.\nconst GRPC_MAX_CONCURRENT_STREAMS: u32 = 256;\n\n///////////////////////////////////////////////////////////////////////////////\n\npub struct Listener {\n    cancel: CancellationToken,\n    /// Cancel the listener task through `listen_cancel` to shut down the listener\n    /// and get a handle on the existing connections.\n    task: JoinHandle<Connections>,\n}\n\npub struct Connections {\n    cancel: CancellationToken,\n    tasks: tokio::task::JoinSet<ConnectionHandlerResult>,\n    gate: Gate,\n}\n\npub fn spawn(\n    conf: &'static PageServerConf,\n    tenant_manager: Arc<TenantManager>,\n    pg_auth: Option<Arc<SwappableJwtAuth>>,\n    perf_trace_dispatch: Option<Dispatch>,\n    tcp_listener: tokio::net::TcpListener,\n    tls_config: Option<Arc<rustls::ServerConfig>>,\n    feature_resolver: FeatureResolver,\n) -> Listener {\n    let cancel = CancellationToken::new();\n    let libpq_ctx = RequestContext::todo_child(\n        TaskKind::LibpqEndpointListener,\n        // listener task shouldn't need to download anything. (We will\n        // create a separate sub-contexts for each connection, with their\n        // own download behavior. This context is used only to listen and\n        // accept connections.)\n        DownloadBehavior::Error,\n    );\n    let task = COMPUTE_REQUEST_RUNTIME.spawn(task_mgr::exit_on_panic_or_error(\n        \"libpq listener\",\n        libpq_listener_main(\n            conf,\n            tenant_manager,\n            pg_auth,\n            perf_trace_dispatch,\n            tcp_listener,\n            conf.pg_auth_type,\n            tls_config,\n            conf.page_service_pipelining.clone(),\n            feature_resolver,\n            libpq_ctx,\n            cancel.clone(),\n        )\n        .map(anyhow::Ok),\n    ));\n\n    Listener { cancel, task }\n}\n\nimpl Listener {\n    pub async fn stop_accepting(self) -> Connections {\n        self.cancel.cancel();\n        self.task\n            .await\n            .expect(\"unreachable: we wrap the listener task in task_mgr::exit_on_panic_or_error\")\n    }\n}\nimpl Connections {\n    pub(crate) async fn shutdown(self) {\n        let Self {\n            cancel,\n            mut tasks,\n            gate,\n        } = self;\n        cancel.cancel();\n        while let Some(res) = tasks.join_next().await {\n            Self::handle_connection_completion(res);\n        }\n        gate.close().await;\n    }\n\n    fn handle_connection_completion(res: Result<anyhow::Result<()>, tokio::task::JoinError>) {\n        match res {\n            Ok(Ok(())) => {}\n            Ok(Err(e)) => error!(\"error in page_service connection task: {:?}\", e),\n            Err(e) => error!(\"page_service connection task panicked: {:?}\", e),\n        }\n    }\n}\n\n///\n/// Main loop of the page service.\n///\n/// Listens for connections, and launches a new handler task for each.\n///\n/// Returns Ok(()) upon cancellation via `cancel`, returning the set of\n/// open connections.\n///\n#[allow(clippy::too_many_arguments)]\npub async fn libpq_listener_main(\n    conf: &'static PageServerConf,\n    tenant_manager: Arc<TenantManager>,\n    auth: Option<Arc<SwappableJwtAuth>>,\n    perf_trace_dispatch: Option<Dispatch>,\n    listener: tokio::net::TcpListener,\n    auth_type: AuthType,\n    tls_config: Option<Arc<rustls::ServerConfig>>,\n    pipelining_config: PageServicePipeliningConfig,\n    feature_resolver: FeatureResolver,\n    listener_ctx: RequestContext,\n    listener_cancel: CancellationToken,\n) -> Connections {\n    let connections_cancel = CancellationToken::new();\n    let connections_gate = Gate::default();\n    let mut connection_handler_tasks = tokio::task::JoinSet::default();\n\n    loop {\n        let gate_guard = match connections_gate.enter() {\n            Ok(guard) => guard,\n            Err(_) => break,\n        };\n\n        let accepted = tokio::select! {\n            biased;\n            _ = listener_cancel.cancelled() => break,\n            next = connection_handler_tasks.join_next(), if !connection_handler_tasks.is_empty() => {\n                let res = next.expect(\"we dont poll while empty\");\n                Connections::handle_connection_completion(res);\n                continue;\n            }\n            accepted = listener.accept() => accepted,\n        };\n\n        match accepted {\n            Ok((socket, peer_addr)) => {\n                // Connection established. Spawn a new task to handle it.\n                debug!(\"accepted connection from {}\", peer_addr);\n                let local_auth = auth.clone();\n                let connection_ctx = RequestContextBuilder::from(&listener_ctx)\n                    .task_kind(TaskKind::PageRequestHandler)\n                    .download_behavior(DownloadBehavior::Download)\n                    .perf_span_dispatch(perf_trace_dispatch.clone())\n                    .detached_child();\n\n                connection_handler_tasks.spawn(page_service_conn_main(\n                    conf,\n                    tenant_manager.clone(),\n                    local_auth,\n                    socket,\n                    auth_type,\n                    tls_config.clone(),\n                    pipelining_config.clone(),\n                    feature_resolver.clone(),\n                    connection_ctx,\n                    connections_cancel.child_token(),\n                    gate_guard,\n                ));\n            }\n            Err(err) => {\n                // accept() failed. Log the error, and loop back to retry on next connection.\n                error!(\"accept() failed: {:?}\", err);\n            }\n        }\n    }\n\n    debug!(\"page_service listener loop terminated\");\n\n    Connections {\n        cancel: connections_cancel,\n        tasks: connection_handler_tasks,\n        gate: connections_gate,\n    }\n}\n\ntype ConnectionHandlerResult = anyhow::Result<()>;\n\n/// Perf root spans start at the per-request level, after shard routing.\n/// This struct carries connection-level information to the root perf span definition.\n#[derive(Clone, Default)]\nstruct ConnectionPerfSpanFields {\n    peer_addr: String,\n    application_name: Option<String>,\n    compute_mode: Option<String>,\n}\n\n#[instrument(skip_all, fields(peer_addr, application_name, compute_mode))]\n#[allow(clippy::too_many_arguments)]\nasync fn page_service_conn_main(\n    conf: &'static PageServerConf,\n    tenant_manager: Arc<TenantManager>,\n    auth: Option<Arc<SwappableJwtAuth>>,\n    socket: tokio::net::TcpStream,\n    auth_type: AuthType,\n    tls_config: Option<Arc<rustls::ServerConfig>>,\n    pipelining_config: PageServicePipeliningConfig,\n    feature_resolver: FeatureResolver,\n    connection_ctx: RequestContext,\n    cancel: CancellationToken,\n    gate_guard: GateGuard,\n) -> ConnectionHandlerResult {\n    let _guard = LIVE_CONNECTIONS\n        .with_label_values(&[\"page_service\"])\n        .guard();\n\n    socket\n        .set_nodelay(true)\n        .context(\"could not set TCP_NODELAY\")?;\n\n    let socket_fd = socket.as_raw_fd();\n\n    let peer_addr = socket.peer_addr().context(\"get peer address\")?;\n\n    let perf_span_fields = ConnectionPerfSpanFields {\n        peer_addr: peer_addr.to_string(),\n        application_name: None, // filled in later\n        compute_mode: None,     // filled in later\n    };\n    tracing::Span::current().record(\"peer_addr\", field::display(peer_addr));\n\n    // setup read timeout of 10 minutes. the timeout is rather arbitrary for requirements:\n    // - long enough for most valid compute connections\n    // - less than infinite to stop us from \"leaking\" connections to long-gone computes\n    //\n    // no write timeout is used, because the kernel is assumed to error writes after some time.\n    let mut socket = tokio_io_timeout::TimeoutReader::new(socket);\n\n    let default_timeout_ms = 10 * 60 * 1000; // 10 minutes by default\n    let socket_timeout_ms = (|| {\n        fail::fail_point!(\"simulated-bad-compute-connection\", |avg_timeout_ms| {\n            // Exponential distribution for simulating\n            // poor network conditions, expect about avg_timeout_ms to be around 15\n            // in tests\n            if let Some(avg_timeout_ms) = avg_timeout_ms {\n                let avg = avg_timeout_ms.parse::<i64>().unwrap() as f32;\n                let u = rand::random::<f32>();\n                ((1.0 - u).ln() / (-avg)) as u64\n            } else {\n                default_timeout_ms\n            }\n        });\n        default_timeout_ms\n    })();\n\n    // A timeout here does not mean the client died, it can happen if it's just idle for\n    // a while: we will tear down this PageServerHandler and instantiate a new one if/when\n    // they reconnect.\n    socket.set_timeout(Some(std::time::Duration::from_millis(socket_timeout_ms)));\n    let socket = Box::pin(socket);\n\n    fail::fail_point!(\"ps::connection-start::pre-login\");\n\n    // XXX: pgbackend.run() should take the connection_ctx,\n    // and create a child per-query context when it invokes process_query.\n    // But it's in a shared crate, so, we store connection_ctx inside PageServerHandler\n    // and create the per-query context in process_query ourselves.\n    let mut conn_handler = PageServerHandler::new(\n        tenant_manager,\n        auth,\n        pipelining_config,\n        conf.get_vectored_concurrent_io,\n        perf_span_fields,\n        connection_ctx,\n        cancel.clone(),\n        feature_resolver.clone(),\n        gate_guard,\n    );\n    let pgbackend =\n        PostgresBackend::new_from_io(socket_fd, socket, peer_addr, auth_type, tls_config)?;\n\n    match pgbackend.run(&mut conn_handler, &cancel).await {\n        Ok(()) => {\n            // we've been requested to shut down\n            Ok(())\n        }\n        Err(QueryError::Disconnected(ConnectionError::Io(io_error))) => {\n            if is_expected_io_error(&io_error) {\n                info!(\"Postgres client disconnected ({io_error})\");\n                Ok(())\n            } else {\n                let tenant_id = conn_handler.timeline_handles.as_ref().unwrap().tenant_id();\n                Err(io_error).context(format!(\n                    \"Postgres connection error for tenant_id={tenant_id:?} client at peer_addr={peer_addr}\"\n                ))\n            }\n        }\n        other => {\n            let tenant_id = conn_handler.timeline_handles.as_ref().unwrap().tenant_id();\n            other.context(format!(\n                \"Postgres query error for tenant_id={tenant_id:?} client peer_addr={peer_addr}\"\n            ))\n        }\n    }\n}\n\n/// Page service connection handler.\nstruct PageServerHandler {\n    auth: Option<Arc<SwappableJwtAuth>>,\n    claims: Option<Claims>,\n\n    /// The context created for the lifetime of the connection\n    /// services by this PageServerHandler.\n    /// For each query received over the connection,\n    /// `process_query` creates a child context from this one.\n    connection_ctx: RequestContext,\n\n    perf_span_fields: ConnectionPerfSpanFields,\n\n    cancel: CancellationToken,\n\n    /// None only while pagestream protocol is being processed.\n    timeline_handles: Option<TimelineHandles>,\n\n    pipelining_config: PageServicePipeliningConfig,\n    get_vectored_concurrent_io: GetVectoredConcurrentIo,\n\n    feature_resolver: FeatureResolver,\n\n    gate_guard: GateGuard,\n}\n\nstruct TimelineHandles {\n    wrapper: TenantManagerWrapper,\n    /// Note on size: the typical size of this map is 1.  The largest size we expect\n    /// to see is the number of shards divided by the number of pageservers (typically < 2),\n    /// or the ratio used when splitting shards (i.e. how many children created from one)\n    /// parent shard, where a \"large\" number might be ~8.\n    handles: timeline::handle::Cache<TenantManagerTypes>,\n}\n\nimpl TimelineHandles {\n    fn new(tenant_manager: Arc<TenantManager>) -> Self {\n        Self {\n            wrapper: TenantManagerWrapper {\n                tenant_manager,\n                tenant_id: OnceCell::new(),\n            },\n            handles: Default::default(),\n        }\n    }\n    async fn get(\n        &mut self,\n        tenant_id: TenantId,\n        timeline_id: TimelineId,\n        shard_selector: ShardSelector,\n    ) -> Result<Handle<TenantManagerTypes>, GetActiveTimelineError> {\n        if *self.wrapper.tenant_id.get_or_init(|| tenant_id) != tenant_id {\n            return Err(GetActiveTimelineError::Tenant(\n                GetActiveTenantError::SwitchedTenant,\n            ));\n        }\n        self.handles\n            .get(timeline_id, shard_selector, &self.wrapper)\n            .await\n    }\n\n    fn tenant_id(&self) -> Option<TenantId> {\n        self.wrapper.tenant_id.get().copied()\n    }\n}\n\npub(crate) struct TenantManagerWrapper {\n    tenant_manager: Arc<TenantManager>,\n    // We do not support switching tenant_id on a connection at this point.\n    // We can can add support for this later if needed without changing\n    // the protocol.\n    tenant_id: once_cell::sync::OnceCell<TenantId>,\n}\n\npub(crate) struct TenantManagerTypes;\n\nimpl timeline::handle::Types for TenantManagerTypes {\n    type TenantManager = TenantManagerWrapper;\n    type Timeline = TenantManagerCacheItem;\n}\n\npub(crate) struct TenantManagerCacheItem {\n    pub(crate) timeline: Arc<Timeline>,\n    // allow() for cheap propagation through RequestContext inside a task\n    #[allow(clippy::redundant_allocation)]\n    pub(crate) metrics: Arc<Arc<TimelineMetrics>>,\n    #[allow(dead_code)] // we store it to keep the gate open\n    pub(crate) gate_guard: GateGuard,\n}\n\nimpl std::ops::Deref for TenantManagerCacheItem {\n    type Target = Arc<Timeline>;\n    fn deref(&self) -> &Self::Target {\n        &self.timeline\n    }\n}\n\nimpl timeline::handle::Timeline<TenantManagerTypes> for TenantManagerCacheItem {\n    fn shard_timeline_id(&self) -> timeline::handle::ShardTimelineId {\n        Timeline::shard_timeline_id(&self.timeline)\n    }\n\n    fn per_timeline_state(&self) -> &timeline::handle::PerTimelineState<TenantManagerTypes> {\n        &self.timeline.handles\n    }\n\n    fn get_shard_identity(&self) -> &pageserver_api::shard::ShardIdentity {\n        Timeline::get_shard_identity(&self.timeline)\n    }\n}\n\nimpl timeline::handle::TenantManager<TenantManagerTypes> for TenantManagerWrapper {\n    async fn resolve(\n        &self,\n        timeline_id: TimelineId,\n        shard_selector: ShardSelector,\n    ) -> Result<TenantManagerCacheItem, GetActiveTimelineError> {\n        let tenant_id = self.tenant_id.get().expect(\"we set this in get()\");\n        let timeout = ACTIVE_TENANT_TIMEOUT;\n        let wait_start = Instant::now();\n        let deadline = wait_start + timeout;\n        let tenant_shard = loop {\n            let resolved = self\n                .tenant_manager\n                .resolve_attached_shard(tenant_id, shard_selector);\n            match resolved {\n                ShardResolveResult::Found(tenant_shard) => break tenant_shard,\n                ShardResolveResult::NotFound => {\n                    MISROUTED_PAGESTREAM_REQUESTS.inc();\n                    return Err(GetActiveTimelineError::Tenant(\n                        GetActiveTenantError::NotFound(GetTenantError::NotFound(*tenant_id)),\n                    ));\n                }\n                ShardResolveResult::InProgress(barrier) => {\n                    // We can't authoritatively answer right now: wait for InProgress state\n                    // to end, then try again\n                    tokio::select! {\n                        _  = barrier.wait() => {\n                            // The barrier completed: proceed around the loop to try looking up again\n                        },\n                        _ = tokio::time::sleep(deadline.duration_since(Instant::now())) => {\n                            return Err(GetActiveTimelineError::Tenant(GetActiveTenantError::WaitForActiveTimeout {\n                                latest_state: None,\n                                wait_time: timeout,\n                            }));\n                        }\n                    }\n                }\n            };\n        };\n\n        tracing::debug!(\"Waiting for tenant to enter active state...\");\n        tenant_shard\n            .wait_to_become_active(deadline.duration_since(Instant::now()))\n            .await\n            .map_err(GetActiveTimelineError::Tenant)?;\n\n        let timeline = tenant_shard\n            .get_timeline(timeline_id, true)\n            .map_err(GetActiveTimelineError::Timeline)?;\n\n        let gate_guard = match timeline.gate.enter() {\n            Ok(guard) => guard,\n            Err(_) => {\n                return Err(GetActiveTimelineError::Timeline(\n                    GetTimelineError::ShuttingDown,\n                ));\n            }\n        };\n\n        let metrics = Arc::new(Arc::clone(&timeline.metrics));\n\n        Ok(TenantManagerCacheItem {\n            timeline,\n            metrics,\n            gate_guard,\n        })\n    }\n}\n\n/// Whether to hold the applied GC cutoff guard when processing GetPage requests.\n/// This is determined once at the start of pagestream subprotocol handling based on\n/// feature flags, configuration, and test conditions.\n#[derive(Debug, Clone, Copy)]\nenum HoldAppliedGcCutoffGuard {\n    Yes,\n    No,\n}\n\n#[derive(thiserror::Error, Debug)]\nenum PageStreamError {\n    /// We encountered an error that should prompt the client to reconnect:\n    /// in practice this means we drop the connection without sending a response.\n    #[error(\"Reconnect required: {0}\")]\n    Reconnect(Cow<'static, str>),\n\n    /// We were instructed to shutdown while processing the query\n    #[error(\"Shutting down\")]\n    Shutdown,\n\n    /// Something went wrong reading a page: this likely indicates a pageserver bug\n    #[error(\"Read error\")]\n    Read(#[source] PageReconstructError),\n\n    /// Ran out of time waiting for an LSN\n    #[error(\"LSN timeout: {0}\")]\n    LsnTimeout(WaitLsnError),\n\n    /// The entity required to serve the request (tenant or timeline) is not found,\n    /// or is not found in a suitable state to serve a request.\n    #[error(\"Not found: {0}\")]\n    NotFound(Cow<'static, str>),\n\n    /// Request asked for something that doesn't make sense, like an invalid LSN\n    #[error(\"Bad request: {0}\")]\n    BadRequest(Cow<'static, str>),\n}\n\nimpl From<PageStreamError> for tonic::Status {\n    fn from(err: PageStreamError) -> Self {\n        use tonic::Code;\n        let message = err.to_string();\n        let code = match err {\n            PageStreamError::Reconnect(_) => Code::Unavailable,\n            PageStreamError::Shutdown => Code::Unavailable,\n            PageStreamError::Read(err) => match err {\n                PageReconstructError::Cancelled => Code::Unavailable,\n                PageReconstructError::MissingKey(_) => Code::NotFound,\n                PageReconstructError::AncestorLsnTimeout(err) => tonic::Status::from(err).code(),\n                PageReconstructError::Other(_) => Code::Internal,\n                PageReconstructError::WalRedo(_) => Code::Internal,\n            },\n            PageStreamError::LsnTimeout(err) => tonic::Status::from(err).code(),\n            PageStreamError::NotFound(_) => Code::NotFound,\n            PageStreamError::BadRequest(_) => Code::InvalidArgument,\n        };\n        tonic::Status::new(code, message)\n    }\n}\n\nimpl From<PageReconstructError> for PageStreamError {\n    fn from(value: PageReconstructError) -> Self {\n        match value {\n            PageReconstructError::Cancelled => Self::Shutdown,\n            e => Self::Read(e),\n        }\n    }\n}\n\nimpl From<GetActiveTimelineError> for PageStreamError {\n    fn from(value: GetActiveTimelineError) -> Self {\n        match value {\n            GetActiveTimelineError::Tenant(GetActiveTenantError::Cancelled)\n            | GetActiveTimelineError::Tenant(GetActiveTenantError::WillNotBecomeActive(\n                TenantState::Stopping { .. },\n            ))\n            | GetActiveTimelineError::Timeline(GetTimelineError::ShuttingDown) => Self::Shutdown,\n            GetActiveTimelineError::Tenant(e) => Self::NotFound(format!(\"{e}\").into()),\n            GetActiveTimelineError::Timeline(e) => Self::NotFound(format!(\"{e}\").into()),\n        }\n    }\n}\n\nimpl From<WaitLsnError> for PageStreamError {\n    fn from(value: WaitLsnError) -> Self {\n        match value {\n            e @ WaitLsnError::Timeout(_) => Self::LsnTimeout(e),\n            WaitLsnError::Shutdown => Self::Shutdown,\n            e @ WaitLsnError::BadState { .. } => Self::Reconnect(format!(\"{e}\").into()),\n        }\n    }\n}\n\nimpl From<WaitLsnError> for QueryError {\n    fn from(value: WaitLsnError) -> Self {\n        match value {\n            e @ WaitLsnError::Timeout(_) => Self::Other(anyhow::Error::new(e)),\n            WaitLsnError::Shutdown => Self::Shutdown,\n            WaitLsnError::BadState { .. } => Self::Reconnect,\n        }\n    }\n}\n\n#[derive(thiserror::Error, Debug)]\nstruct BatchedPageStreamError {\n    req: PagestreamRequest,\n    err: PageStreamError,\n}\n\nimpl std::fmt::Display for BatchedPageStreamError {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        self.err.fmt(f)\n    }\n}\n\nstruct BatchedGetPageRequest {\n    req: PagestreamGetPageRequest,\n    timer: SmgrOpTimer,\n    lsn_range: LsnRange,\n    ctx: RequestContext,\n    // If the request is perf enabled, this contains a context\n    // with a perf span tracking the time spent waiting for the executor.\n    batch_wait_ctx: Option<RequestContext>,\n}\n\n#[cfg(feature = \"testing\")]\nstruct BatchedTestRequest {\n    req: pagestream_api::PagestreamTestRequest,\n    timer: SmgrOpTimer,\n}\n\n/// NB: we only hold [`timeline::handle::WeakHandle`] inside this enum,\n/// so that we don't keep the [`Timeline::gate`] open while the batch\n/// is being built up inside the [`spsc_fold`] (pagestream pipelining).\n#[derive(IntoStaticStr)]\n#[allow(clippy::large_enum_variant)]\nenum BatchedFeMessage {\n    Exists {\n        span: Span,\n        timer: SmgrOpTimer,\n        shard: WeakHandle<TenantManagerTypes>,\n        req: PagestreamExistsRequest,\n    },\n    Nblocks {\n        span: Span,\n        timer: SmgrOpTimer,\n        shard: WeakHandle<TenantManagerTypes>,\n        req: PagestreamNblocksRequest,\n    },\n    GetPage {\n        span: Span,\n        shard: WeakHandle<TenantManagerTypes>,\n        applied_gc_cutoff_guard: Option<RcuReadGuard<Lsn>>,\n        pages: SmallVec<[BatchedGetPageRequest; 1]>,\n        batch_break_reason: GetPageBatchBreakReason,\n    },\n    DbSize {\n        span: Span,\n        timer: SmgrOpTimer,\n        shard: WeakHandle<TenantManagerTypes>,\n        req: PagestreamDbSizeRequest,\n    },\n    GetSlruSegment {\n        span: Span,\n        timer: SmgrOpTimer,\n        shard: WeakHandle<TenantManagerTypes>,\n        req: PagestreamGetSlruSegmentRequest,\n    },\n    #[cfg(feature = \"testing\")]\n    Test {\n        span: Span,\n        shard: WeakHandle<TenantManagerTypes>,\n        requests: Vec<BatchedTestRequest>,\n    },\n    RespondError {\n        span: Span,\n        error: BatchedPageStreamError,\n    },\n}\n\nimpl BatchedFeMessage {\n    fn as_static_str(&self) -> &'static str {\n        self.into()\n    }\n\n    fn observe_execution_start(&mut self, at: Instant) {\n        match self {\n            BatchedFeMessage::Exists { timer, .. }\n            | BatchedFeMessage::Nblocks { timer, .. }\n            | BatchedFeMessage::DbSize { timer, .. }\n            | BatchedFeMessage::GetSlruSegment { timer, .. } => {\n                timer.observe_execution_start(at);\n            }\n            BatchedFeMessage::GetPage { pages, .. } => {\n                for page in pages {\n                    page.timer.observe_execution_start(at);\n                }\n            }\n            #[cfg(feature = \"testing\")]\n            BatchedFeMessage::Test { requests, .. } => {\n                for req in requests {\n                    req.timer.observe_execution_start(at);\n                }\n            }\n            BatchedFeMessage::RespondError { .. } => {}\n        }\n    }\n\n    fn should_break_batch(\n        &self,\n        other: &BatchedFeMessage,\n        max_batch_size: NonZeroUsize,\n        batching_strategy: PageServiceProtocolPipelinedBatchingStrategy,\n    ) -> Option<GetPageBatchBreakReason> {\n        match (self, other) {\n            (\n                BatchedFeMessage::GetPage {\n                    shard: accum_shard,\n                    pages: accum_pages,\n                    ..\n                },\n                BatchedFeMessage::GetPage {\n                    shard: this_shard,\n                    pages: this_pages,\n                    ..\n                },\n            ) => {\n                assert_eq!(this_pages.len(), 1);\n                if accum_pages.len() >= max_batch_size.get() {\n                    trace!(%max_batch_size, \"stopping batching because of batch size\");\n                    assert_eq!(accum_pages.len(), max_batch_size.get());\n\n                    return Some(GetPageBatchBreakReason::BatchFull);\n                }\n                if !accum_shard.is_same_handle_as(this_shard) {\n                    trace!(\"stopping batching because timeline object mismatch\");\n                    // TODO: we _could_ batch & execute each shard seperately (and in parallel).\n                    // But the current logic for keeping responses in order does not support that.\n\n                    return Some(GetPageBatchBreakReason::NonUniformTimeline);\n                }\n\n                match batching_strategy {\n                    PageServiceProtocolPipelinedBatchingStrategy::UniformLsn => {\n                        if let Some(last_in_batch) = accum_pages.last() {\n                            if last_in_batch.lsn_range.effective_lsn\n                                != this_pages[0].lsn_range.effective_lsn\n                            {\n                                trace!(\n                                    accum_lsn = %last_in_batch.lsn_range.effective_lsn,\n                                    this_lsn = %this_pages[0].lsn_range.effective_lsn,\n                                    \"stopping batching because LSN changed\"\n                                );\n\n                                return Some(GetPageBatchBreakReason::NonUniformLsn);\n                            }\n                        }\n                    }\n                    PageServiceProtocolPipelinedBatchingStrategy::ScatteredLsn => {\n                        // The read path doesn't curently support serving the same page at different LSNs.\n                        // While technically possible, it's uncertain if the complexity is worth it.\n                        // Break the batch if such a case is encountered.\n                        let same_page_different_lsn = accum_pages.iter().any(|batched| {\n                            batched.req.rel == this_pages[0].req.rel\n                                && batched.req.blkno == this_pages[0].req.blkno\n                                && batched.lsn_range.effective_lsn\n                                    != this_pages[0].lsn_range.effective_lsn\n                        });\n\n                        if same_page_different_lsn {\n                            trace!(\n                                rel=%this_pages[0].req.rel,\n                                blkno=%this_pages[0].req.blkno,\n                                lsn=%this_pages[0].lsn_range.effective_lsn,\n                                \"stopping batching because same page was requested at different LSNs\"\n                            );\n\n                            return Some(GetPageBatchBreakReason::SamePageAtDifferentLsn);\n                        }\n                    }\n                }\n\n                None\n            }\n            #[cfg(feature = \"testing\")]\n            (\n                BatchedFeMessage::Test {\n                    shard: accum_shard,\n                    requests: accum_requests,\n                    ..\n                },\n                BatchedFeMessage::Test {\n                    shard: this_shard,\n                    requests: this_requests,\n                    ..\n                },\n            ) => {\n                assert!(this_requests.len() == 1);\n                if accum_requests.len() >= max_batch_size.get() {\n                    trace!(%max_batch_size, \"stopping batching because of batch size\");\n                    assert_eq!(accum_requests.len(), max_batch_size.get());\n                    return Some(GetPageBatchBreakReason::BatchFull);\n                }\n                if !accum_shard.is_same_handle_as(this_shard) {\n                    trace!(\"stopping batching because timeline object mismatch\");\n                    // TODO: we _could_ batch & execute each shard seperately (and in parallel).\n                    // But the current logic for keeping responses in order does not support that.\n                    return Some(GetPageBatchBreakReason::NonUniformTimeline);\n                }\n                let this_batch_key = this_requests[0].req.batch_key;\n                let accum_batch_key = accum_requests[0].req.batch_key;\n                if this_requests[0].req.batch_key != accum_requests[0].req.batch_key {\n                    trace!(%accum_batch_key, %this_batch_key, \"stopping batching because batch key changed\");\n                    return Some(GetPageBatchBreakReason::NonUniformKey);\n                }\n                None\n            }\n            (_, _) => Some(GetPageBatchBreakReason::NonBatchableRequest),\n        }\n    }\n}\n\nimpl PageServerHandler {\n    #[allow(clippy::too_many_arguments)]\n    pub fn new(\n        tenant_manager: Arc<TenantManager>,\n        auth: Option<Arc<SwappableJwtAuth>>,\n        pipelining_config: PageServicePipeliningConfig,\n        get_vectored_concurrent_io: GetVectoredConcurrentIo,\n        perf_span_fields: ConnectionPerfSpanFields,\n        connection_ctx: RequestContext,\n        cancel: CancellationToken,\n        feature_resolver: FeatureResolver,\n        gate_guard: GateGuard,\n    ) -> Self {\n        PageServerHandler {\n            auth,\n            claims: None,\n            connection_ctx,\n            perf_span_fields,\n            timeline_handles: Some(TimelineHandles::new(tenant_manager)),\n            cancel,\n            pipelining_config,\n            get_vectored_concurrent_io,\n            feature_resolver,\n            gate_guard,\n        }\n    }\n\n    /// This function always respects cancellation of any timeline in `[Self::shard_timelines]`.  Pass in\n    /// a cancellation token at the next scope up (such as a tenant cancellation token) to ensure we respect\n    /// cancellation if there aren't any timelines in the cache.\n    ///\n    /// If calling from a function that doesn't use the `[Self::shard_timelines]` cache, then pass in the\n    /// timeline cancellation token.\n    async fn flush_cancellable<IO>(\n        &self,\n        pgb: &mut PostgresBackend<IO>,\n        cancel: &CancellationToken,\n    ) -> Result<(), QueryError>\n    where\n        IO: AsyncRead + AsyncWrite + Send + Sync + Unpin,\n    {\n        tokio::select!(\n            flush_r = pgb.flush() => {\n                Ok(flush_r?)\n            },\n            _ = cancel.cancelled() => {\n                Err(QueryError::Shutdown)\n            }\n        )\n    }\n\n    #[allow(clippy::too_many_arguments)]\n    async fn pagestream_read_message<IO>(\n        pgb: &mut PostgresBackendReader<IO>,\n        tenant_id: TenantId,\n        timeline_id: TimelineId,\n        timeline_handles: &mut TimelineHandles,\n        conn_perf_span_fields: &ConnectionPerfSpanFields,\n        cancel: &CancellationToken,\n        ctx: &RequestContext,\n        protocol_version: PagestreamProtocolVersion,\n        parent_span: Span,\n        hold_gc_cutoff_guard: HoldAppliedGcCutoffGuard,\n    ) -> Result<Option<BatchedFeMessage>, QueryError>\n    where\n        IO: AsyncRead + AsyncWrite + Send + Sync + Unpin + 'static,\n    {\n        let msg = tokio::select! {\n            biased;\n            _ = cancel.cancelled() => {\n                return Err(QueryError::Shutdown)\n            }\n            msg = pgb.read_message() => { msg }\n        };\n\n        let received_at = Instant::now();\n\n        let copy_data_bytes = match msg? {\n            Some(FeMessage::CopyData(bytes)) => bytes,\n            Some(FeMessage::Terminate) => {\n                return Ok(None);\n            }\n            Some(m) => {\n                return Err(QueryError::Other(anyhow::anyhow!(\n                    \"unexpected message: {m:?} during COPY\"\n                )));\n            }\n            None => {\n                return Ok(None);\n            } // client disconnected\n        };\n        trace!(\"query: {copy_data_bytes:?}\");\n\n        fail::fail_point!(\"ps::handle-pagerequest-message\");\n\n        // parse request\n        let neon_fe_msg =\n            PagestreamFeMessage::parse(&mut copy_data_bytes.reader(), protocol_version)?;\n\n        let batched_msg = match neon_fe_msg {\n            PagestreamFeMessage::Exists(req) => {\n                let shard = timeline_handles\n                    .get(tenant_id, timeline_id, ShardSelector::Zero)\n                    .await?;\n                debug_assert_current_span_has_tenant_and_timeline_id_no_shard_id();\n                let span = tracing::info_span!(parent: &parent_span, \"handle_get_rel_exists_request\", rel = %req.rel, req_lsn = %req.hdr.request_lsn, shard_id = %shard.tenant_shard_id.shard_slug());\n                let timer = Self::record_op_start_and_throttle(\n                    &shard,\n                    metrics::SmgrQueryType::GetRelExists,\n                    received_at,\n                )\n                .await?;\n                BatchedFeMessage::Exists {\n                    span,\n                    timer,\n                    shard: shard.downgrade(),\n                    req,\n                }\n            }\n            PagestreamFeMessage::Nblocks(req) => {\n                let shard = timeline_handles\n                    .get(tenant_id, timeline_id, ShardSelector::Zero)\n                    .await?;\n                let span = tracing::info_span!(parent: &parent_span, \"handle_get_nblocks_request\", rel = %req.rel, req_lsn = %req.hdr.request_lsn, shard_id = %shard.tenant_shard_id.shard_slug());\n                let timer = Self::record_op_start_and_throttle(\n                    &shard,\n                    metrics::SmgrQueryType::GetRelSize,\n                    received_at,\n                )\n                .await?;\n                BatchedFeMessage::Nblocks {\n                    span,\n                    timer,\n                    shard: shard.downgrade(),\n                    req,\n                }\n            }\n            PagestreamFeMessage::DbSize(req) => {\n                let shard = timeline_handles\n                    .get(tenant_id, timeline_id, ShardSelector::Zero)\n                    .await?;\n                let span = tracing::info_span!(parent: &parent_span, \"handle_db_size_request\", dbnode = %req.dbnode, req_lsn = %req.hdr.request_lsn, shard_id = %shard.tenant_shard_id.shard_slug());\n                let timer = Self::record_op_start_and_throttle(\n                    &shard,\n                    metrics::SmgrQueryType::GetDbSize,\n                    received_at,\n                )\n                .await?;\n                BatchedFeMessage::DbSize {\n                    span,\n                    timer,\n                    shard: shard.downgrade(),\n                    req,\n                }\n            }\n            PagestreamFeMessage::GetSlruSegment(req) => {\n                let shard = timeline_handles\n                    .get(tenant_id, timeline_id, ShardSelector::Zero)\n                    .await?;\n                let span = tracing::info_span!(parent: &parent_span, \"handle_get_slru_segment_request\", kind = %req.kind, segno = %req.segno, req_lsn = %req.hdr.request_lsn, shard_id = %shard.tenant_shard_id.shard_slug());\n                let timer = Self::record_op_start_and_throttle(\n                    &shard,\n                    metrics::SmgrQueryType::GetSlruSegment,\n                    received_at,\n                )\n                .await?;\n                BatchedFeMessage::GetSlruSegment {\n                    span,\n                    timer,\n                    shard: shard.downgrade(),\n                    req,\n                }\n            }\n            PagestreamFeMessage::GetPage(req) => {\n                // avoid a somewhat costly Span::record() by constructing the entire span in one go.\n                macro_rules! mkspan {\n                    (before shard routing) => {{\n                        tracing::info_span!(\n                            parent: &parent_span,\n                            \"handle_get_page_request\",\n                            request_id = %req.hdr.reqid,\n                            rel = %req.rel,\n                            blkno = %req.blkno,\n                            req_lsn = %req.hdr.request_lsn,\n                            not_modified_since_lsn = %req.hdr.not_modified_since,\n                        )\n                    }};\n                    ($shard_id:expr) => {{\n                        tracing::info_span!(\n                            parent: &parent_span,\n                            \"handle_get_page_request\",\n                            request_id = %req.hdr.reqid,\n                            rel = %req.rel,\n                            blkno = %req.blkno,\n                            req_lsn = %req.hdr.request_lsn,\n                            not_modified_since_lsn = %req.hdr.not_modified_since,\n                            shard_id = %$shard_id,\n                        )\n                    }};\n                }\n\n                macro_rules! respond_error {\n                    ($span:expr, $error:expr) => {{\n                        let error = BatchedFeMessage::RespondError {\n                            span: $span,\n                            error: BatchedPageStreamError {\n                                req: req.hdr,\n                                err: $error,\n                            },\n                        };\n                        Ok(Some(error))\n                    }};\n                }\n\n                let key = rel_block_to_key(req.rel, req.blkno);\n\n                let res = timeline_handles\n                    .get(tenant_id, timeline_id, ShardSelector::Page(key))\n                    .await;\n\n                let shard = match res {\n                    Ok(tl) => tl,\n                    Err(e) => {\n                        let span = mkspan!(before shard routing);\n                        match e {\n                            GetActiveTimelineError::Tenant(GetActiveTenantError::NotFound(_)) => {\n                                // We already know this tenant exists in general, because we resolved it at\n                                // start of connection.  Getting a NotFound here indicates that the shard containing\n                                // the requested page is not present on this node: the client's knowledge of shard->pageserver\n                                // mapping is out of date.\n                                //\n                                // Closing the connection by returning ``::Reconnect` has the side effect of rate-limiting above message, via\n                                // client's reconnect backoff, as well as hopefully prompting the client to load its updated configuration\n                                // and talk to a different pageserver.\n                                MISROUTED_PAGESTREAM_REQUESTS.inc();\n                                return respond_error!(\n                                    span,\n                                    PageStreamError::Reconnect(\n                                        \"getpage@lsn request routed to wrong shard\".into()\n                                    )\n                                );\n                            }\n                            e => {\n                                return respond_error!(span, e.into());\n                            }\n                        }\n                    }\n                };\n\n                let ctx = if shard.is_get_page_request_sampled() {\n                    RequestContextBuilder::from(ctx)\n                        .root_perf_span(|| {\n                            info_span!(\n                            target: PERF_TRACE_TARGET,\n                            \"GET_PAGE\",\n                            peer_addr = conn_perf_span_fields.peer_addr,\n                            application_name = conn_perf_span_fields.application_name,\n                            compute_mode = conn_perf_span_fields.compute_mode,\n                            tenant_id = %tenant_id,\n                            shard_id = %shard.get_shard_identity().shard_slug(),\n                            timeline_id = %timeline_id,\n                            lsn = %req.hdr.request_lsn,\n                            not_modified_since_lsn = %req.hdr.not_modified_since,\n                            request_id = %req.hdr.reqid,\n                            key = %key,\n                            )\n                        })\n                        .attached_child()\n                } else {\n                    ctx.attached_child()\n                };\n\n                // This ctx travels as part of the BatchedFeMessage through\n                // batching into the request handler.\n                // The request handler needs to do some per-request work\n                // (relsize check) before dispatching the batch as a single\n                // get_vectored call to the Timeline.\n                // This ctx will be used for the reslize check, whereas the\n                // get_vectored call will be a different ctx with separate\n                // perf span.\n                let ctx = ctx.with_scope_page_service_pagestream(&shard);\n\n                // Similar game for this `span`: we funnel it through so that\n                // request handler log messages contain the request-specific fields.\n                let span = mkspan!(shard.tenant_shard_id.shard_slug());\n\n                let timer = Self::record_op_start_and_throttle(\n                    &shard,\n                    metrics::SmgrQueryType::GetPageAtLsn,\n                    received_at,\n                )\n                .maybe_perf_instrument(&ctx, |current_perf_span| {\n                    info_span!(\n                        target: PERF_TRACE_TARGET,\n                        parent: current_perf_span,\n                        \"THROTTLE\",\n                    )\n                })\n                .await?;\n\n                let applied_gc_cutoff_guard = shard.get_applied_gc_cutoff_lsn(); // hold guard\n                // We're holding the Handle\n                let effective_lsn = match Self::effective_request_lsn(\n                    &shard,\n                    shard.get_last_record_lsn(),\n                    req.hdr.request_lsn,\n                    req.hdr.not_modified_since,\n                    &applied_gc_cutoff_guard,\n                ) {\n                    Ok(lsn) => lsn,\n                    Err(e) => {\n                        return respond_error!(span, e);\n                    }\n                };\n                let applied_gc_cutoff_guard = match hold_gc_cutoff_guard {\n                    HoldAppliedGcCutoffGuard::Yes => Some(applied_gc_cutoff_guard),\n                    HoldAppliedGcCutoffGuard::No => {\n                        drop(applied_gc_cutoff_guard);\n                        None\n                    }\n                };\n\n                let batch_wait_ctx = if ctx.has_perf_span() {\n                    Some(\n                        RequestContextBuilder::from(&ctx)\n                            .perf_span(|crnt_perf_span| {\n                                info_span!(\n                                    target: PERF_TRACE_TARGET,\n                                    parent: crnt_perf_span,\n                                    \"WAIT_EXECUTOR\",\n                                )\n                            })\n                            .attached_child(),\n                    )\n                } else {\n                    None\n                };\n\n                BatchedFeMessage::GetPage {\n                    span,\n                    shard: shard.downgrade(),\n                    applied_gc_cutoff_guard,\n                    pages: smallvec![BatchedGetPageRequest {\n                        req,\n                        timer,\n                        lsn_range: LsnRange {\n                            effective_lsn,\n                            request_lsn: req.hdr.request_lsn\n                        },\n                        ctx,\n                        batch_wait_ctx,\n                    }],\n                    // The executor grabs the batch when it becomes idle.\n                    // Hence, [`GetPageBatchBreakReason::ExecutorSteal`] is the\n                    // default reason for breaking the batch.\n                    batch_break_reason: GetPageBatchBreakReason::ExecutorSteal,\n                }\n            }\n            #[cfg(feature = \"testing\")]\n            PagestreamFeMessage::Test(req) => {\n                let shard = timeline_handles\n                    .get(tenant_id, timeline_id, ShardSelector::Zero)\n                    .await?;\n                let span = tracing::info_span!(parent: &parent_span, \"handle_test_request\", shard_id = %shard.tenant_shard_id.shard_slug());\n                let timer = Self::record_op_start_and_throttle(\n                    &shard,\n                    metrics::SmgrQueryType::Test,\n                    received_at,\n                )\n                .await?;\n                BatchedFeMessage::Test {\n                    span,\n                    shard: shard.downgrade(),\n                    requests: vec![BatchedTestRequest { req, timer }],\n                }\n            }\n        };\n        Ok(Some(batched_msg))\n    }\n\n    /// Starts a SmgrOpTimer at received_at and throttles the request.\n    async fn record_op_start_and_throttle(\n        shard: &Handle<TenantManagerTypes>,\n        op: metrics::SmgrQueryType,\n        received_at: Instant,\n    ) -> Result<SmgrOpTimer, QueryError> {\n        // It's important to start the smgr op metric recorder as early as possible\n        // so that the _started counters are incremented before we do\n        // any serious waiting, e.g., for throttle, batching, or actual request handling.\n        let mut timer = shard.query_metrics.start_smgr_op(op, received_at);\n        let now = Instant::now();\n        timer.observe_throttle_start(now);\n        let throttled = tokio::select! {\n            res = shard.pagestream_throttle.throttle(1, now) => res,\n            _ = shard.cancel.cancelled() => return Err(QueryError::Shutdown),\n        };\n        timer.observe_throttle_done(throttled);\n        Ok(timer)\n    }\n\n    /// Post-condition: `batch` is Some()\n    #[instrument(skip_all, level = tracing::Level::TRACE)]\n    #[allow(clippy::boxed_local)]\n    fn pagestream_do_batch(\n        batching_strategy: PageServiceProtocolPipelinedBatchingStrategy,\n        max_batch_size: NonZeroUsize,\n        batch: &mut Result<BatchedFeMessage, QueryError>,\n        this_msg: Result<BatchedFeMessage, QueryError>,\n    ) -> Result<(), Result<BatchedFeMessage, QueryError>> {\n        debug_assert_current_span_has_tenant_and_timeline_id_no_shard_id();\n\n        let this_msg = match this_msg {\n            Ok(this_msg) => this_msg,\n            Err(e) => return Err(Err(e)),\n        };\n\n        let eligible_batch = match batch {\n            Ok(b) => b,\n            Err(_) => {\n                return Err(Ok(this_msg));\n            }\n        };\n\n        let batch_break =\n            eligible_batch.should_break_batch(&this_msg, max_batch_size, batching_strategy);\n\n        match batch_break {\n            Some(reason) => {\n                if let BatchedFeMessage::GetPage {\n                    batch_break_reason, ..\n                } = eligible_batch\n                {\n                    *batch_break_reason = reason;\n                }\n\n                Err(Ok(this_msg))\n            }\n            None => {\n                // ok to batch\n                match (eligible_batch, this_msg) {\n                    (\n                        BatchedFeMessage::GetPage {\n                            pages: accum_pages,\n                            applied_gc_cutoff_guard: accum_applied_gc_cutoff_guard,\n                            ..\n                        },\n                        BatchedFeMessage::GetPage {\n                            pages: this_pages,\n                            applied_gc_cutoff_guard: this_applied_gc_cutoff_guard,\n                            ..\n                        },\n                    ) => {\n                        accum_pages.extend(this_pages);\n                        // the minimum of the two guards will keep data for both alive\n                        match (&accum_applied_gc_cutoff_guard, this_applied_gc_cutoff_guard) {\n                            (None, None) => (),\n                            (None, Some(this)) => *accum_applied_gc_cutoff_guard = Some(this),\n                            (Some(_), None) => (),\n                            (Some(accum), Some(this)) => {\n                                if **accum > *this {\n                                    *accum_applied_gc_cutoff_guard = Some(this);\n                                }\n                            }\n                        };\n                        Ok(())\n                    }\n                    #[cfg(feature = \"testing\")]\n                    (\n                        BatchedFeMessage::Test {\n                            requests: accum_requests,\n                            ..\n                        },\n                        BatchedFeMessage::Test {\n                            requests: this_requests,\n                            ..\n                        },\n                    ) => {\n                        accum_requests.extend(this_requests);\n                        Ok(())\n                    }\n                    // Shape guaranteed by [`BatchedFeMessage::should_break_batch`]\n                    _ => unreachable!(),\n                }\n            }\n        }\n    }\n\n    #[instrument(level = tracing::Level::DEBUG, skip_all)]\n    async fn pagestream_handle_batched_message<IO>(\n        &mut self,\n        pgb_writer: &mut PostgresBackend<IO>,\n        batch: BatchedFeMessage,\n        io_concurrency: IoConcurrency,\n        cancel: &CancellationToken,\n        protocol_version: PagestreamProtocolVersion,\n        ctx: &RequestContext,\n    ) -> Result<(), QueryError>\n    where\n        IO: AsyncRead + AsyncWrite + Send + Sync + Unpin,\n    {\n        let started_at = Instant::now();\n        let batch = {\n            let mut batch = batch;\n            batch.observe_execution_start(started_at);\n            batch\n        };\n\n        // Dispatch the batch to the appropriate request handler.\n        let log_slow_name = batch.as_static_str();\n        let (mut handler_results, span) = {\n            // TODO: we unfortunately have to pin the future on the heap, since GetPage futures are huge and\n            // won't fit on the stack.\n            let mut boxpinned = Box::pin(Self::pagestream_dispatch_batched_message(\n                batch,\n                io_concurrency,\n                ctx,\n            ));\n            log_slow(\n                log_slow_name,\n                LOG_SLOW_GETPAGE_THRESHOLD,\n                boxpinned.as_mut(),\n            )\n            .await?\n        };\n\n        // We purposefully don't count flush time into the smgr operation timer.\n        //\n        // The reason is that current compute client will not perform protocol processing\n        // if the postgres backend process is doing things other than `->smgr_read()`.\n        // This is especially the case for prefetch.\n        //\n        // If the compute doesn't read from the connection, eventually TCP will backpressure\n        // all the way into our flush call below.\n        //\n        // The timer's underlying metric is used for a storage-internal latency SLO and\n        // we don't want to include latency in it that we can't control.\n        // And as pointed out above, in this case, we don't control the time that flush will take.\n        //\n        // We put each response in the batch onto the wire in a separate pgb_writer.flush()\n        // call, which (all unmeasured) adds syscall overhead but reduces time to first byte\n        // and avoids building up a \"giant\" contiguous userspace buffer to hold the entire response.\n        // TODO: vectored socket IO would be great, but pgb_writer doesn't support that.\n        let flush_timers = {\n            let flushing_start_time = Instant::now();\n            let mut flush_timers = Vec::with_capacity(handler_results.len());\n            for handler_result in &mut handler_results {\n                let flush_timer = match handler_result {\n                    Ok((_response, timer, _ctx)) => Some(\n                        timer\n                            .observe_execution_end(flushing_start_time)\n                            .expect(\"we are the first caller\"),\n                    ),\n                    Err(_) => {\n                        // TODO: measure errors\n                        None\n                    }\n                };\n                flush_timers.push(flush_timer);\n            }\n            assert_eq!(flush_timers.len(), handler_results.len());\n            flush_timers\n        };\n\n        // Map handler result to protocol behavior.\n        // Some handler errors cause exit from pagestream protocol.\n        // Other handler errors are sent back as an error message and we stay in pagestream protocol.\n        for (handler_result, flushing_timer) in handler_results.into_iter().zip(flush_timers) {\n            let (response_msg, ctx) = match handler_result {\n                Err(e) => match &e.err {\n                    PageStreamError::Shutdown => {\n                        // BEGIN HADRON\n                        PAGESTREAM_HANDLER_RESULTS_TOTAL\n                            .with_label_values(&[metrics::PAGESTREAM_HANDLER_OUTCOME_OTHER_ERROR])\n                            .inc();\n                        // END HADRON\n\n                        // If we fail to fulfil a request during shutdown, which may be _because_ of\n                        // shutdown, then do not send the error to the client.  Instead just drop the\n                        // connection.\n                        span.in_scope(|| info!(\"dropping connection due to shutdown\"));\n                        return Err(QueryError::Shutdown);\n                    }\n                    PageStreamError::Reconnect(_reason) => {\n                        span.in_scope(|| {\n                            // BEGIN HADRON\n                            // We can get here because the compute node is pointing at the wrong PS. We\n                            // already have a metric to keep track of this so suppressing this log to\n                            // reduce log spam. The information in this log message is not going to be that\n                            // helpful given the volume of logs that can be generated.\n                            // info!(\"handler requested reconnect: {reason}\")\n                            // END HADRON\n                        });\n                        // BEGIN HADRON\n                        PAGESTREAM_HANDLER_RESULTS_TOTAL\n                            .with_label_values(&[\n                                metrics::PAGESTREAM_HANDLER_OUTCOME_INTERNAL_ERROR,\n                            ])\n                            .inc();\n                        // END HADRON\n                        return Err(QueryError::Reconnect);\n                    }\n                    PageStreamError::Read(_)\n                    | PageStreamError::LsnTimeout(_)\n                    | PageStreamError::NotFound(_)\n                    | PageStreamError::BadRequest(_) => {\n                        // BEGIN HADRON\n                        if let PageStreamError::Read(_) | PageStreamError::LsnTimeout(_) = &e.err {\n                            PAGESTREAM_HANDLER_RESULTS_TOTAL\n                                .with_label_values(&[\n                                    metrics::PAGESTREAM_HANDLER_OUTCOME_INTERNAL_ERROR,\n                                ])\n                                .inc();\n                        } else {\n                            PAGESTREAM_HANDLER_RESULTS_TOTAL\n                                .with_label_values(&[\n                                    metrics::PAGESTREAM_HANDLER_OUTCOME_OTHER_ERROR,\n                                ])\n                                .inc();\n                        }\n                        // END HADRON\n\n                        // print the all details to the log with {:#}, but for the client the\n                        // error message is enough.  Do not log if shutting down, as the anyhow::Error\n                        // here includes cancellation which is not an error.\n                        let full = utils::error::report_compact_sources(&e.err);\n                        span.in_scope(|| {\n                            error!(\"error reading relation or page version: {full:#}\")\n                        });\n\n                        (\n                            PagestreamBeMessage::Error(PagestreamErrorResponse {\n                                req: e.req,\n                                message: e.err.to_string(),\n                            }),\n                            None,\n                        )\n                    }\n                },\n                Ok((response_msg, _op_timer_already_observed, ctx)) => {\n                    // BEGIN HADRON\n                    PAGESTREAM_HANDLER_RESULTS_TOTAL\n                        .with_label_values(&[metrics::PAGESTREAM_HANDLER_OUTCOME_SUCCESS])\n                        .inc();\n                    // END HADRON\n\n                    (response_msg, Some(ctx))\n                }\n            };\n\n            let ctx = ctx.map(|req_ctx| {\n                RequestContextBuilder::from(&req_ctx)\n                    .perf_span(|crnt_perf_span| {\n                        info_span!(\n                            target: PERF_TRACE_TARGET,\n                            parent: crnt_perf_span,\n                            \"FLUSH_RESPONSE\",\n                        )\n                    })\n                    .attached_child()\n            });\n\n            //\n            // marshal & transmit response message\n            //\n\n            pgb_writer.write_message_noflush(&BeMessage::CopyData(\n                &response_msg.serialize(protocol_version),\n            ))?;\n\n            failpoint_support::sleep_millis_async!(\"before-pagestream-msg-flush\", cancel);\n\n            // what we want to do\n            let socket_fd = pgb_writer.socket_fd;\n            let flush_fut = pgb_writer.flush();\n            // metric for how long flushing takes\n            let flush_fut = match flushing_timer {\n                Some(flushing_timer) => futures::future::Either::Left(flushing_timer.measure(\n                    Instant::now(),\n                    flush_fut,\n                    socket_fd,\n                )),\n                None => futures::future::Either::Right(flush_fut),\n            };\n\n            let flush_fut = if let Some(req_ctx) = ctx.as_ref() {\n                futures::future::Either::Left(\n                    flush_fut.maybe_perf_instrument(req_ctx, |current_perf_span| {\n                        current_perf_span.clone()\n                    }),\n                )\n            } else {\n                futures::future::Either::Right(flush_fut)\n            };\n\n            // do it while respecting cancellation\n            let _: () = async move {\n                tokio::select! {\n                    biased;\n                    _ = cancel.cancelled() => {\n                        // We were requested to shut down.\n                        info!(\"shutdown request received in page handler\");\n                        return Err(QueryError::Shutdown)\n                    }\n                    res = flush_fut => {\n                        res?;\n                    }\n                }\n                Ok(())\n            }\n            .await?;\n        }\n        Ok(())\n    }\n\n    /// Helper which dispatches a batched message to the appropriate handler.\n    /// Returns a vec of results, along with the extracted trace span.\n    async fn pagestream_dispatch_batched_message(\n        batch: BatchedFeMessage,\n        io_concurrency: IoConcurrency,\n        ctx: &RequestContext,\n    ) -> Result<\n        (\n            Vec<Result<(PagestreamBeMessage, SmgrOpTimer, RequestContext), BatchedPageStreamError>>,\n            Span,\n        ),\n        QueryError,\n    > {\n        macro_rules! upgrade_handle_and_set_context {\n            ($shard:ident) => {{\n                let weak_handle = &$shard;\n                let handle = weak_handle.upgrade()?;\n                let ctx = ctx.with_scope_page_service_pagestream(&handle);\n                (handle, ctx)\n            }};\n        }\n        Ok(match batch {\n            BatchedFeMessage::Exists {\n                span,\n                timer,\n                shard,\n                req,\n            } => {\n                fail::fail_point!(\"ps::handle-pagerequest-message::exists\");\n                let (shard, ctx) = upgrade_handle_and_set_context!(shard);\n                (\n                    vec![\n                        Self::handle_get_rel_exists_request(&shard, &req, &ctx)\n                            .instrument(span.clone())\n                            .await\n                            .map(|msg| (PagestreamBeMessage::Exists(msg), timer, ctx))\n                            .map_err(|err| BatchedPageStreamError { err, req: req.hdr }),\n                    ],\n                    span,\n                )\n            }\n            BatchedFeMessage::Nblocks {\n                span,\n                timer,\n                shard,\n                req,\n            } => {\n                fail::fail_point!(\"ps::handle-pagerequest-message::nblocks\");\n                let (shard, ctx) = upgrade_handle_and_set_context!(shard);\n                (\n                    vec![\n                        Self::handle_get_nblocks_request(&shard, &req, false, &ctx)\n                            .instrument(span.clone())\n                            .await\n                            .map(|msg| msg.expect(\"allow_missing=false\"))\n                            .map(|msg| (PagestreamBeMessage::Nblocks(msg), timer, ctx))\n                            .map_err(|err| BatchedPageStreamError { err, req: req.hdr }),\n                    ],\n                    span,\n                )\n            }\n            BatchedFeMessage::GetPage {\n                span,\n                shard,\n                applied_gc_cutoff_guard,\n                pages,\n                batch_break_reason,\n            } => {\n                fail::fail_point!(\"ps::handle-pagerequest-message::getpage\");\n                let (shard, ctx) = upgrade_handle_and_set_context!(shard);\n                (\n                    {\n                        let npages = pages.len();\n                        trace!(npages, \"handling getpage request\");\n                        let res = Self::handle_get_page_at_lsn_request_batched(\n                            &shard,\n                            pages,\n                            io_concurrency,\n                            batch_break_reason,\n                            &ctx,\n                        )\n                        .instrument(span.clone())\n                        .await;\n                        assert_eq!(res.len(), npages);\n                        drop(applied_gc_cutoff_guard);\n                        res\n                    },\n                    span,\n                )\n            }\n            BatchedFeMessage::DbSize {\n                span,\n                timer,\n                shard,\n                req,\n            } => {\n                fail::fail_point!(\"ps::handle-pagerequest-message::dbsize\");\n                let (shard, ctx) = upgrade_handle_and_set_context!(shard);\n                (\n                    vec![\n                        Self::handle_db_size_request(&shard, &req, &ctx)\n                            .instrument(span.clone())\n                            .await\n                            .map(|msg| (PagestreamBeMessage::DbSize(msg), timer, ctx))\n                            .map_err(|err| BatchedPageStreamError { err, req: req.hdr }),\n                    ],\n                    span,\n                )\n            }\n            BatchedFeMessage::GetSlruSegment {\n                span,\n                timer,\n                shard,\n                req,\n            } => {\n                fail::fail_point!(\"ps::handle-pagerequest-message::slrusegment\");\n                let (shard, ctx) = upgrade_handle_and_set_context!(shard);\n                (\n                    vec![\n                        Self::handle_get_slru_segment_request(&shard, &req, &ctx)\n                            .instrument(span.clone())\n                            .await\n                            .map(|msg| (PagestreamBeMessage::GetSlruSegment(msg), timer, ctx))\n                            .map_err(|err| BatchedPageStreamError { err, req: req.hdr }),\n                    ],\n                    span,\n                )\n            }\n            #[cfg(feature = \"testing\")]\n            BatchedFeMessage::Test {\n                span,\n                shard,\n                requests,\n            } => {\n                fail::fail_point!(\"ps::handle-pagerequest-message::test\");\n                let (shard, ctx) = upgrade_handle_and_set_context!(shard);\n                (\n                    {\n                        let npages = requests.len();\n                        trace!(npages, \"handling getpage request\");\n                        let res = Self::handle_test_request_batch(&shard, requests, &ctx)\n                            .instrument(span.clone())\n                            .await;\n                        assert_eq!(res.len(), npages);\n                        res\n                    },\n                    span,\n                )\n            }\n            BatchedFeMessage::RespondError { span, error } => {\n                // We've already decided to respond with an error, so we don't need to\n                // call the handler.\n                (vec![Err(error)], span)\n            }\n        })\n    }\n\n    /// Pagestream sub-protocol handler.\n    ///\n    /// It is a simple request-response protocol inside a COPYBOTH session.\n    ///\n    /// # Coding Discipline\n    ///\n    /// Coding discipline within this function: all interaction with the `pgb` connection\n    /// needs to be sensitive to connection shutdown, currently signalled via [`Self::cancel`].\n    /// This is so that we can shutdown page_service quickly.\n    #[instrument(skip_all, fields(hold_gc_cutoff_guard))]\n    async fn handle_pagerequests<IO>(\n        &mut self,\n        pgb: &mut PostgresBackend<IO>,\n        tenant_id: TenantId,\n        timeline_id: TimelineId,\n        protocol_version: PagestreamProtocolVersion,\n        ctx: RequestContext,\n    ) -> Result<(), QueryError>\n    where\n        IO: AsyncRead + AsyncWrite + Send + Sync + Unpin + 'static,\n    {\n        debug_assert_current_span_has_tenant_and_timeline_id_no_shard_id();\n\n        // switch client to COPYBOTH\n        pgb.write_message_noflush(&BeMessage::CopyBothResponse)?;\n        tokio::select! {\n            biased;\n            _ = self.cancel.cancelled() => {\n                return Err(QueryError::Shutdown)\n            }\n            res = pgb.flush() => {\n                res?;\n            }\n        }\n\n        let io_concurrency = IoConcurrency::spawn_from_conf(\n            self.get_vectored_concurrent_io,\n            match self.gate_guard.try_clone() {\n                Ok(guard) => guard,\n                Err(_) => {\n                    info!(\"shutdown request received in page handler\");\n                    return Err(QueryError::Shutdown);\n                }\n            },\n        );\n\n        let pgb_reader = pgb\n            .split()\n            .context(\"implementation error: split pgb into reader and writer\")?;\n\n        let timeline_handles = self\n            .timeline_handles\n            .take()\n            .expect(\"implementation error: timeline_handles should not be locked\");\n\n        // Evaluate the expensive feature resolver check once per pagestream subprotocol handling\n        // instead of once per GetPage request. This is shared between pipelined and serial paths.\n        let hold_gc_cutoff_guard = if cfg!(test) || cfg!(feature = \"testing\") {\n            HoldAppliedGcCutoffGuard::Yes\n        } else {\n            // Use the global feature resolver with the tenant ID directly, avoiding the need\n            // to get a timeline/shard which might not be available on this pageserver node.\n            let empty_properties = std::collections::HashMap::new();\n            match self.feature_resolver.evaluate_boolean(\n                \"page-service-getpage-hold-applied-gc-cutoff-guard\",\n                tenant_id,\n                &empty_properties,\n            ) {\n                Ok(()) => HoldAppliedGcCutoffGuard::Yes,\n                Err(_) => HoldAppliedGcCutoffGuard::No,\n            }\n        };\n        // record it in the span of handle_pagerequests so that both the request_span\n        // and the pipeline implementation spans contains the field.\n        Span::current().record(\n            \"hold_gc_cutoff_guard\",\n            tracing::field::debug(&hold_gc_cutoff_guard),\n        );\n\n        let request_span = info_span!(\"request\");\n        let ((pgb_reader, timeline_handles), result) = match self.pipelining_config.clone() {\n            PageServicePipeliningConfig::Pipelined(pipelining_config) => {\n                self.handle_pagerequests_pipelined(\n                    pgb,\n                    pgb_reader,\n                    tenant_id,\n                    timeline_id,\n                    timeline_handles,\n                    request_span,\n                    pipelining_config,\n                    protocol_version,\n                    io_concurrency,\n                    hold_gc_cutoff_guard,\n                    &ctx,\n                )\n                .await\n            }\n            PageServicePipeliningConfig::Serial => {\n                self.handle_pagerequests_serial(\n                    pgb,\n                    pgb_reader,\n                    tenant_id,\n                    timeline_id,\n                    timeline_handles,\n                    request_span,\n                    protocol_version,\n                    io_concurrency,\n                    hold_gc_cutoff_guard,\n                    &ctx,\n                )\n                .await\n            }\n        };\n\n        debug!(\"pagestream subprotocol shut down cleanly\");\n\n        pgb.unsplit(pgb_reader)\n            .context(\"implementation error: unsplit pgb\")?;\n\n        let replaced = self.timeline_handles.replace(timeline_handles);\n        assert!(replaced.is_none());\n\n        result\n    }\n\n    #[allow(clippy::too_many_arguments)]\n    async fn handle_pagerequests_serial<IO>(\n        &mut self,\n        pgb_writer: &mut PostgresBackend<IO>,\n        mut pgb_reader: PostgresBackendReader<IO>,\n        tenant_id: TenantId,\n        timeline_id: TimelineId,\n        mut timeline_handles: TimelineHandles,\n        request_span: Span,\n        protocol_version: PagestreamProtocolVersion,\n        io_concurrency: IoConcurrency,\n        hold_gc_cutoff_guard: HoldAppliedGcCutoffGuard,\n        ctx: &RequestContext,\n    ) -> (\n        (PostgresBackendReader<IO>, TimelineHandles),\n        Result<(), QueryError>,\n    )\n    where\n        IO: AsyncRead + AsyncWrite + Send + Sync + Unpin + 'static,\n    {\n        let cancel = self.cancel.clone();\n\n        let err = loop {\n            let msg = Self::pagestream_read_message(\n                &mut pgb_reader,\n                tenant_id,\n                timeline_id,\n                &mut timeline_handles,\n                &self.perf_span_fields,\n                &cancel,\n                ctx,\n                protocol_version,\n                request_span.clone(),\n                hold_gc_cutoff_guard,\n            )\n            .await;\n            let msg = match msg {\n                Ok(msg) => msg,\n                Err(e) => break e,\n            };\n            let msg = match msg {\n                Some(msg) => msg,\n                None => {\n                    debug!(\"pagestream subprotocol end observed\");\n                    return ((pgb_reader, timeline_handles), Ok(()));\n                }\n            };\n\n            let result = self\n                .pagestream_handle_batched_message(\n                    pgb_writer,\n                    msg,\n                    io_concurrency.clone(),\n                    &cancel,\n                    protocol_version,\n                    ctx,\n                )\n                .await;\n            match result {\n                Ok(()) => {}\n                Err(e) => break e,\n            }\n        };\n        ((pgb_reader, timeline_handles), Err(err))\n    }\n\n    /// # Cancel-Safety\n    ///\n    /// May leak tokio tasks if not polled to completion.\n    #[allow(clippy::too_many_arguments)]\n    async fn handle_pagerequests_pipelined<IO>(\n        &mut self,\n        pgb_writer: &mut PostgresBackend<IO>,\n        pgb_reader: PostgresBackendReader<IO>,\n        tenant_id: TenantId,\n        timeline_id: TimelineId,\n        mut timeline_handles: TimelineHandles,\n        request_span: Span,\n        pipelining_config: PageServicePipeliningConfigPipelined,\n        protocol_version: PagestreamProtocolVersion,\n        io_concurrency: IoConcurrency,\n        hold_gc_cutoff_guard: HoldAppliedGcCutoffGuard,\n        ctx: &RequestContext,\n    ) -> (\n        (PostgresBackendReader<IO>, TimelineHandles),\n        Result<(), QueryError>,\n    )\n    where\n        IO: AsyncRead + AsyncWrite + Send + Sync + Unpin + 'static,\n    {\n        //\n        // Pipelined pagestream handling consists of\n        // - a Batcher that reads requests off the wire and\n        //   and batches them if possible,\n        // - an Executor that processes the batched requests.\n        //\n        // The batch is built up inside an `spsc_fold` channel,\n        // shared betwen Batcher (Sender) and Executor (Receiver).\n        //\n        // The Batcher continously folds client requests into the batch,\n        // while the Executor can at any time take out what's in the batch\n        // in order to process it.\n        // This means the next batch builds up while the Executor\n        // executes the last batch.\n        //\n        // CANCELLATION\n        //\n        // We run both Batcher and Executor futures to completion before\n        // returning from this function.\n        //\n        // If Executor exits first, it signals cancellation to the Batcher\n        // via a CancellationToken that is child of `self.cancel`.\n        // If Batcher exits first, it signals cancellation to the Executor\n        // by dropping the spsc_fold channel Sender.\n        //\n        // CLEAN SHUTDOWN\n        //\n        // Clean shutdown means that the client ends the COPYBOTH session.\n        // In response to such a client message, the Batcher exits.\n        // The Executor continues to run, draining the spsc_fold channel.\n        // Once drained, the spsc_fold recv will fail with a distinct error\n        // indicating that the sender disconnected.\n        // The Executor exits with Ok(()) in response to that error.\n        //\n        // Server initiated shutdown is not clean shutdown, but instead\n        // is an error Err(QueryError::Shutdown) that is propagated through\n        // error propagation.\n        //\n        // ERROR PROPAGATION\n        //\n        // When the Batcher encounter an error, it sends it as a value\n        // through the spsc_fold channel and exits afterwards.\n        // When the Executor observes such an error in the channel,\n        // it exits returning that error value.\n        //\n        // This design ensures that the Executor stage will still process\n        // the batch that was in flight when the Batcher encountered an error,\n        // thereby beahving identical to a serial implementation.\n\n        let PageServicePipeliningConfigPipelined {\n            max_batch_size,\n            execution,\n            batching: batching_strategy,\n        } = pipelining_config;\n\n        // Macro to _define_ a pipeline stage.\n        macro_rules! pipeline_stage {\n            ($name:literal, $cancel:expr, $make_fut:expr) => {{\n                let cancel: CancellationToken = $cancel;\n                let stage_fut = $make_fut(cancel.clone());\n                async move {\n                    scopeguard::defer! {\n                        debug!(\"exiting\");\n                    }\n                    timed_after_cancellation(stage_fut, $name, Duration::from_millis(100), &cancel)\n                        .await\n                }\n                .instrument(tracing::info_span!($name))\n            }};\n        }\n\n        //\n        // Batcher\n        //\n\n        let perf_span_fields = self.perf_span_fields.clone();\n\n        let cancel_batcher = self.cancel.child_token();\n        let (mut batch_tx, mut batch_rx) = spsc_fold::channel();\n        let batcher = pipeline_stage!(\"batcher\", cancel_batcher.clone(), move |cancel_batcher| {\n            let ctx = ctx.attached_child();\n            async move {\n                let mut pgb_reader = pgb_reader;\n                let mut exit = false;\n                while !exit {\n                    let read_res = Self::pagestream_read_message(\n                        &mut pgb_reader,\n                        tenant_id,\n                        timeline_id,\n                        &mut timeline_handles,\n                        &perf_span_fields,\n                        &cancel_batcher,\n                        &ctx,\n                        protocol_version,\n                        request_span.clone(),\n                        hold_gc_cutoff_guard,\n                    )\n                    .await;\n                    let Some(read_res) = read_res.transpose() else {\n                        debug!(\"client-initiated shutdown\");\n                        break;\n                    };\n                    exit |= read_res.is_err();\n                    let could_send = batch_tx\n                        .send(read_res, |batch, res| {\n                            Self::pagestream_do_batch(batching_strategy, max_batch_size, batch, res)\n                        })\n                        .await;\n                    exit |= could_send.is_err();\n                }\n                (pgb_reader, timeline_handles)\n            }\n        });\n\n        //\n        // Executor\n        //\n\n        let executor = pipeline_stage!(\"executor\", self.cancel.clone(), move |cancel| {\n            let ctx = ctx.attached_child();\n            async move {\n                let _cancel_batcher = cancel_batcher.drop_guard();\n                loop {\n                    let maybe_batch = batch_rx.recv().await;\n                    let batch = match maybe_batch {\n                        Ok(batch) => batch,\n                        Err(spsc_fold::RecvError::SenderGone) => {\n                            debug!(\"upstream gone\");\n                            return Ok(());\n                        }\n                    };\n                    let mut batch = match batch {\n                        Ok(batch) => batch,\n                        Err(e) => {\n                            return Err(e);\n                        }\n                    };\n\n                    if let BatchedFeMessage::GetPage {\n                        pages,\n                        span: _,\n                        shard: _,\n                        applied_gc_cutoff_guard: _,\n                        batch_break_reason: _,\n                    } = &mut batch\n                    {\n                        for req in pages {\n                            req.batch_wait_ctx.take();\n                        }\n                    }\n\n                    self.pagestream_handle_batched_message(\n                        pgb_writer,\n                        batch,\n                        io_concurrency.clone(),\n                        &cancel,\n                        protocol_version,\n                        &ctx,\n                    )\n                    .await?;\n                }\n            }\n        });\n\n        //\n        // Execute the stages.\n        //\n\n        match execution {\n            PageServiceProtocolPipelinedExecutionStrategy::ConcurrentFutures => {\n                tokio::join!(batcher, executor)\n            }\n            PageServiceProtocolPipelinedExecutionStrategy::Tasks => {\n                // These tasks are not tracked anywhere.\n                let read_messages_task = tokio::spawn(batcher);\n                let (read_messages_task_res, executor_res_) =\n                    tokio::join!(read_messages_task, executor,);\n                (\n                    read_messages_task_res.expect(\"propagated panic from read_messages\"),\n                    executor_res_,\n                )\n            }\n        }\n    }\n\n    /// Helper function to handle the LSN from client request.\n    ///\n    /// Each GetPage (and Exists and Nblocks) request includes information about\n    /// which version of the page is being requested. The primary compute node\n    /// will always request the latest page version, by setting 'request_lsn' to\n    /// the last inserted or flushed WAL position, while a standby will request\n    /// a version at the LSN that it's currently caught up to.\n    ///\n    /// In either case, if the page server hasn't received the WAL up to the\n    /// requested LSN yet, we will wait for it to arrive. The return value is\n    /// the LSN that should be used to look up the page versions.\n    ///\n    /// In addition to the request LSN, each request carries another LSN,\n    /// 'not_modified_since', which is a hint to the pageserver that the client\n    /// knows that the page has not been modified between 'not_modified_since'\n    /// and the request LSN. This allows skipping the wait, as long as the WAL\n    /// up to 'not_modified_since' has arrived. If the client doesn't have any\n    /// information about when the page was modified, it will use\n    /// not_modified_since == lsn. If the client lies and sends a too low\n    /// not_modified_hint such that there are in fact later page versions, the\n    /// behavior is undefined: the pageserver may return any of the page versions\n    /// or an error.\n    async fn wait_or_get_last_lsn(\n        timeline: &Timeline,\n        request_lsn: Lsn,\n        not_modified_since: Lsn,\n        latest_gc_cutoff_lsn: &RcuReadGuard<Lsn>,\n        ctx: &RequestContext,\n    ) -> Result<Lsn, PageStreamError> {\n        let last_record_lsn = timeline.get_last_record_lsn();\n        let effective_request_lsn = Self::effective_request_lsn(\n            timeline,\n            last_record_lsn,\n            request_lsn,\n            not_modified_since,\n            latest_gc_cutoff_lsn,\n        )?;\n\n        if effective_request_lsn > last_record_lsn {\n            timeline\n                .wait_lsn(\n                    not_modified_since,\n                    crate::tenant::timeline::WaitLsnWaiter::PageService,\n                    timeline::WaitLsnTimeout::Default,\n                    ctx,\n                )\n                .await?;\n\n            // Since we waited for 'effective_request_lsn' to arrive, that is now the last\n            // record LSN. (Or close enough for our purposes; the last-record LSN can\n            // advance immediately after we return anyway)\n        }\n\n        Ok(effective_request_lsn)\n    }\n\n    fn effective_request_lsn(\n        timeline: &Timeline,\n        last_record_lsn: Lsn,\n        request_lsn: Lsn,\n        not_modified_since: Lsn,\n        latest_gc_cutoff_lsn: &RcuReadGuard<Lsn>,\n    ) -> Result<Lsn, PageStreamError> {\n        // Sanity check the request\n        if request_lsn < not_modified_since {\n            return Err(PageStreamError::BadRequest(\n                format!(\n                    \"invalid request with request LSN {request_lsn} and not_modified_since {not_modified_since}\",\n                )\n                .into(),\n            ));\n        }\n\n        // Check explicitly for INVALID just to get a less scary error message if the request is obviously bogus\n        if request_lsn == Lsn::INVALID {\n            return Err(PageStreamError::BadRequest(\n                \"invalid LSN(0) in request\".into(),\n            ));\n        }\n\n        // Clients should only read from recent LSNs on their timeline, or from locations holding an LSN lease.\n        //\n        // We may have older data available, but we make a best effort to detect this case and return an error,\n        // to distinguish a misbehaving client (asking for old LSN) from a storage issue (data missing at a legitimate LSN).\n        if request_lsn < **latest_gc_cutoff_lsn && !timeline.is_gc_blocked_by_lsn_lease_deadline() {\n            let gc_info = &timeline.gc_info.read().unwrap();\n            if !gc_info.lsn_covered_by_lease(request_lsn) {\n                return Err(\n                    PageStreamError::BadRequest(format!(\n                        \"tried to request a page version that was garbage collected. requested at {} gc cutoff {}\",\n                        request_lsn, **latest_gc_cutoff_lsn\n                    ).into())\n                );\n            }\n        }\n\n        if not_modified_since > last_record_lsn {\n            Ok(not_modified_since)\n        } else {\n            // It might be better to use max(not_modified_since, latest_gc_cutoff_lsn)\n            // here instead. That would give the same result, since we know that there\n            // haven't been any modifications since 'not_modified_since'. Using an older\n            // LSN might be faster, because that could allow skipping recent layers when\n            // finding the page. However, we have historically used 'last_record_lsn', so\n            // stick to that for now.\n            Ok(std::cmp::min(last_record_lsn, request_lsn))\n        }\n    }\n\n    /// Handles the lsn lease request.\n    /// If a lease cannot be obtained, the client will receive NULL.\n    #[instrument(skip_all, fields(shard_id, %lsn))]\n    async fn handle_make_lsn_lease<IO>(\n        &mut self,\n        pgb: &mut PostgresBackend<IO>,\n        tenant_shard_id: TenantShardId,\n        timeline_id: TimelineId,\n        lsn: Lsn,\n        ctx: &RequestContext,\n    ) -> Result<(), QueryError>\n    where\n        IO: AsyncRead + AsyncWrite + Send + Sync + Unpin,\n    {\n        let timeline = self\n            .timeline_handles\n            .as_mut()\n            .unwrap()\n            .get(\n                tenant_shard_id.tenant_id,\n                timeline_id,\n                ShardSelector::Known(tenant_shard_id.to_index()),\n            )\n            .await?;\n        set_tracing_field_shard_id(&timeline);\n\n        let lease = timeline\n            .renew_lsn_lease(lsn, timeline.get_lsn_lease_length(), ctx)\n            .inspect_err(|e| {\n                warn!(\"{e}\");\n            })\n            .ok();\n        let valid_until_str = lease.map(|l| {\n            l.valid_until\n                .duration_since(SystemTime::UNIX_EPOCH)\n                .expect(\"valid_until is earlier than UNIX_EPOCH\")\n                .as_millis()\n                .to_string()\n        });\n\n        info!(\n            \"acquired lease for {} until {}\",\n            lsn,\n            valid_until_str.as_deref().unwrap_or(\"<unknown>\")\n        );\n\n        let bytes = valid_until_str.as_ref().map(|x| x.as_bytes());\n\n        pgb.write_message_noflush(&BeMessage::RowDescription(&[RowDescriptor::text_col(\n            b\"valid_until\",\n        )]))?\n        .write_message_noflush(&BeMessage::DataRow(&[bytes]))?;\n\n        Ok(())\n    }\n\n    #[instrument(skip_all, fields(shard_id))]\n    async fn handle_get_rel_exists_request(\n        timeline: &Timeline,\n        req: &PagestreamExistsRequest,\n        ctx: &RequestContext,\n    ) -> Result<PagestreamExistsResponse, PageStreamError> {\n        let latest_gc_cutoff_lsn = timeline.get_applied_gc_cutoff_lsn();\n        let lsn = Self::wait_or_get_last_lsn(\n            timeline,\n            req.hdr.request_lsn,\n            req.hdr.not_modified_since,\n            &latest_gc_cutoff_lsn,\n            ctx,\n        )\n        .await?;\n\n        let exists = timeline\n            .get_rel_exists(\n                req.rel,\n                Version::LsnRange(LsnRange {\n                    effective_lsn: lsn,\n                    request_lsn: req.hdr.request_lsn,\n                }),\n                ctx,\n            )\n            .await?;\n\n        Ok(PagestreamExistsResponse { req: *req, exists })\n    }\n\n    /// If `allow_missing` is true, returns None instead of Err on missing relations. Otherwise,\n    /// never returns None. It is only supported by the gRPC protocol, so we pass it separately to\n    /// avoid changing the libpq protocol types.\n    #[instrument(skip_all, fields(shard_id))]\n    async fn handle_get_nblocks_request(\n        timeline: &Timeline,\n        req: &PagestreamNblocksRequest,\n        allow_missing: bool,\n        ctx: &RequestContext,\n    ) -> Result<Option<PagestreamNblocksResponse>, PageStreamError> {\n        let latest_gc_cutoff_lsn = timeline.get_applied_gc_cutoff_lsn();\n        let lsn = Self::wait_or_get_last_lsn(\n            timeline,\n            req.hdr.request_lsn,\n            req.hdr.not_modified_since,\n            &latest_gc_cutoff_lsn,\n            ctx,\n        )\n        .await?;\n\n        let n_blocks = timeline\n            .get_rel_size_in_reldir(\n                req.rel,\n                Version::LsnRange(LsnRange {\n                    effective_lsn: lsn,\n                    request_lsn: req.hdr.request_lsn,\n                }),\n                None,\n                allow_missing,\n                ctx,\n            )\n            .await?;\n        let Some(n_blocks) = n_blocks else {\n            return Ok(None);\n        };\n\n        Ok(Some(PagestreamNblocksResponse {\n            req: *req,\n            n_blocks,\n        }))\n    }\n\n    #[instrument(skip_all, fields(shard_id))]\n    async fn handle_db_size_request(\n        timeline: &Timeline,\n        req: &PagestreamDbSizeRequest,\n        ctx: &RequestContext,\n    ) -> Result<PagestreamDbSizeResponse, PageStreamError> {\n        let latest_gc_cutoff_lsn = timeline.get_applied_gc_cutoff_lsn();\n        let lsn = Self::wait_or_get_last_lsn(\n            timeline,\n            req.hdr.request_lsn,\n            req.hdr.not_modified_since,\n            &latest_gc_cutoff_lsn,\n            ctx,\n        )\n        .await?;\n\n        let total_blocks = timeline\n            .get_db_size(\n                DEFAULTTABLESPACE_OID,\n                req.dbnode,\n                Version::LsnRange(LsnRange {\n                    effective_lsn: lsn,\n                    request_lsn: req.hdr.request_lsn,\n                }),\n                ctx,\n            )\n            .await?;\n        let db_size = total_blocks as i64 * BLCKSZ as i64;\n\n        Ok(PagestreamDbSizeResponse { req: *req, db_size })\n    }\n\n    #[instrument(skip_all)]\n    async fn handle_get_page_at_lsn_request_batched(\n        timeline: &Timeline,\n        requests: SmallVec<[BatchedGetPageRequest; 1]>,\n        io_concurrency: IoConcurrency,\n        batch_break_reason: GetPageBatchBreakReason,\n        ctx: &RequestContext,\n    ) -> Vec<Result<(PagestreamBeMessage, SmgrOpTimer, RequestContext), BatchedPageStreamError>>\n    {\n        debug_assert_current_span_has_tenant_and_timeline_id();\n\n        timeline\n            .query_metrics\n            .observe_getpage_batch_start(requests.len(), batch_break_reason);\n\n        // If a page trace is running, submit an event for this request.\n        if let Some(page_trace) = timeline.page_trace.load().as_ref() {\n            let time = SystemTime::now();\n            for batch in &requests {\n                let key = rel_block_to_key(batch.req.rel, batch.req.blkno).to_compact();\n                // Ignore error (trace buffer may be full or tracer may have disconnected).\n                _ = page_trace.try_send(PageTraceEvent {\n                    key,\n                    effective_lsn: batch.lsn_range.effective_lsn,\n                    time,\n                });\n            }\n        }\n\n        // If any request in the batch needs to wait for LSN, then do so now.\n        let mut perf_instrument = false;\n        let max_effective_lsn = requests\n            .iter()\n            .map(|req| {\n                if req.ctx.has_perf_span() {\n                    perf_instrument = true;\n                }\n\n                req.lsn_range.effective_lsn\n            })\n            .max()\n            .expect(\"batch is never empty\");\n\n        let ctx = match perf_instrument {\n            true => RequestContextBuilder::from(ctx)\n                .root_perf_span(|| {\n                    info_span!(\n                        target: PERF_TRACE_TARGET,\n                        \"GET_VECTORED\",\n                        tenant_id = %timeline.tenant_shard_id.tenant_id,\n                        timeline_id = %timeline.timeline_id,\n                        shard = %timeline.tenant_shard_id.shard_slug(),\n                        %max_effective_lsn\n                    )\n                })\n                .attached_child(),\n            false => ctx.attached_child(),\n        };\n\n        let last_record_lsn = timeline.get_last_record_lsn();\n        if max_effective_lsn > last_record_lsn {\n            if let Err(e) = timeline\n                .wait_lsn(\n                    max_effective_lsn,\n                    crate::tenant::timeline::WaitLsnWaiter::PageService,\n                    timeline::WaitLsnTimeout::Default,\n                    &ctx,\n                )\n                .maybe_perf_instrument(&ctx, |current_perf_span| {\n                    info_span!(\n                        target: PERF_TRACE_TARGET,\n                        parent: current_perf_span,\n                        \"WAIT_LSN\",\n                    )\n                })\n                .await\n            {\n                return Vec::from_iter(requests.into_iter().map(|req| {\n                    Err(BatchedPageStreamError {\n                        err: PageStreamError::from(e.clone()),\n                        req: req.req.hdr,\n                    })\n                }));\n            }\n        }\n\n        let results = timeline\n            .get_rel_page_at_lsn_batched(\n                requests.iter().map(|p| {\n                    (\n                        &p.req.rel,\n                        &p.req.blkno,\n                        p.lsn_range,\n                        p.ctx.attached_child(),\n                    )\n                }),\n                io_concurrency,\n                &ctx,\n            )\n            .await;\n        assert_eq!(results.len(), requests.len());\n\n        // TODO: avoid creating the new Vec here\n        Vec::from_iter(\n            requests\n                .into_iter()\n                .zip(results.into_iter())\n                .map(|(req, res)| {\n                    res.map(|page| {\n                        (\n                            PagestreamBeMessage::GetPage(\n                                pagestream_api::PagestreamGetPageResponse { req: req.req, page },\n                            ),\n                            req.timer,\n                            req.ctx,\n                        )\n                    })\n                    .map_err(|e| BatchedPageStreamError {\n                        err: PageStreamError::from(e),\n                        req: req.req.hdr,\n                    })\n                }),\n        )\n    }\n\n    #[instrument(skip_all, fields(shard_id))]\n    async fn handle_get_slru_segment_request(\n        timeline: &Timeline,\n        req: &PagestreamGetSlruSegmentRequest,\n        ctx: &RequestContext,\n    ) -> Result<PagestreamGetSlruSegmentResponse, PageStreamError> {\n        let latest_gc_cutoff_lsn = timeline.get_applied_gc_cutoff_lsn();\n        let lsn = Self::wait_or_get_last_lsn(\n            timeline,\n            req.hdr.request_lsn,\n            req.hdr.not_modified_since,\n            &latest_gc_cutoff_lsn,\n            ctx,\n        )\n        .await?;\n\n        let kind = SlruKind::from_repr(req.kind)\n            .ok_or(PageStreamError::BadRequest(\"invalid SLRU kind\".into()))?;\n        let segment = timeline.get_slru_segment(kind, req.segno, lsn, ctx).await?;\n\n        Ok(PagestreamGetSlruSegmentResponse { req: *req, segment })\n    }\n\n    // NB: this impl mimics what we do for batched getpage requests.\n    #[cfg(feature = \"testing\")]\n    #[instrument(skip_all, fields(shard_id))]\n    async fn handle_test_request_batch(\n        timeline: &Timeline,\n        requests: Vec<BatchedTestRequest>,\n        _ctx: &RequestContext,\n    ) -> Vec<Result<(PagestreamBeMessage, SmgrOpTimer, RequestContext), BatchedPageStreamError>>\n    {\n        // real requests would do something with the timeline\n        let mut results = Vec::with_capacity(requests.len());\n        for _req in requests.iter() {\n            tokio::task::yield_now().await;\n\n            results.push({\n                if timeline.cancel.is_cancelled() {\n                    Err(PageReconstructError::Cancelled)\n                } else {\n                    Ok(())\n                }\n            });\n        }\n\n        // TODO: avoid creating the new Vec here\n        Vec::from_iter(\n            requests\n                .into_iter()\n                .zip(results.into_iter())\n                .map(|(req, res)| {\n                    res.map(|()| {\n                        (\n                            PagestreamBeMessage::Test(pagestream_api::PagestreamTestResponse {\n                                req: req.req.clone(),\n                            }),\n                            req.timer,\n                            RequestContext::new(\n                                TaskKind::PageRequestHandler,\n                                DownloadBehavior::Warn,\n                            ),\n                        )\n                    })\n                    .map_err(|e| BatchedPageStreamError {\n                        err: PageStreamError::from(e),\n                        req: req.req.hdr,\n                    })\n                }),\n        )\n    }\n\n    /// Note on \"fullbackup\":\n    /// Full basebackups should only be used for debugging purposes.\n    /// Originally, it was introduced to enable breaking storage format changes,\n    /// but that is not applicable anymore.\n    ///\n    /// # Coding Discipline\n    ///\n    /// Coding discipline within this function: all interaction with the `pgb` connection\n    /// needs to be sensitive to connection shutdown, currently signalled via [`Self::cancel`].\n    /// This is so that we can shutdown page_service quickly.\n    ///\n    /// TODO: wrap the pgb that we pass to the basebackup handler so that it's sensitive\n    /// to connection cancellation.\n    #[allow(clippy::too_many_arguments)]\n    #[instrument(skip_all, fields(shard_id, ?lsn, ?prev_lsn, %full_backup))]\n    async fn handle_basebackup_request<IO>(\n        &mut self,\n        pgb: &mut PostgresBackend<IO>,\n        tenant_id: TenantId,\n        timeline_id: TimelineId,\n        lsn: Option<Lsn>,\n        prev_lsn: Option<Lsn>,\n        full_backup: bool,\n        gzip: bool,\n        replica: bool,\n        ctx: &RequestContext,\n    ) -> Result<(), QueryError>\n    where\n        IO: AsyncRead + AsyncWrite + Send + Sync + Unpin,\n    {\n        let started = std::time::Instant::now();\n\n        let timeline = self\n            .timeline_handles\n            .as_mut()\n            .unwrap()\n            .get(tenant_id, timeline_id, ShardSelector::Zero)\n            .await?;\n        set_tracing_field_shard_id(&timeline);\n        let ctx = ctx.with_scope_timeline(&timeline);\n\n        if timeline.is_archived() == Some(true) {\n            tracing::info!(\n                \"timeline {tenant_id}/{timeline_id} is archived, but got basebackup request for it.\"\n            );\n            return Err(QueryError::NotFound(\"timeline is archived\".into()));\n        }\n\n        let latest_gc_cutoff_lsn = timeline.get_applied_gc_cutoff_lsn();\n        if let Some(lsn) = lsn {\n            // Backup was requested at a particular LSN. Wait for it to arrive.\n            info!(\"waiting for {}\", lsn);\n            timeline\n                .wait_lsn(\n                    lsn,\n                    crate::tenant::timeline::WaitLsnWaiter::PageService,\n                    crate::tenant::timeline::WaitLsnTimeout::Default,\n                    &ctx,\n                )\n                .await?;\n            timeline\n                .check_lsn_is_in_scope(lsn, &latest_gc_cutoff_lsn)\n                .context(\"invalid basebackup lsn\")?;\n        }\n\n        let lsn_awaited_after = started.elapsed();\n\n        // switch client to COPYOUT\n        pgb.write_message_noflush(&BeMessage::CopyOutResponse)\n            .map_err(QueryError::Disconnected)?;\n        self.flush_cancellable(pgb, &self.cancel).await?;\n\n        let mut from_cache = false;\n\n        // Send a tarball of the latest layer on the timeline. Compress if not\n        // fullbackup. TODO Compress in that case too (tests need to be updated)\n        if full_backup {\n            let mut writer = pgb.copyout_writer();\n            basebackup::send_basebackup_tarball(\n                &mut writer,\n                &timeline,\n                lsn,\n                prev_lsn,\n                full_backup,\n                replica,\n                None,\n                &ctx,\n            )\n            .await?;\n        } else {\n            let mut writer = BufWriter::new(pgb.copyout_writer());\n\n            let cached = timeline\n                .get_cached_basebackup_if_enabled(lsn, prev_lsn, full_backup, replica, gzip)\n                .await;\n\n            if let Some(mut cached) = cached {\n                from_cache = true;\n                tokio::io::copy(&mut cached, &mut writer)\n                    .await\n                    .map_err(|err| {\n                        BasebackupError::Client(err, \"handle_basebackup_request,cached,copy\")\n                    })?;\n            } else {\n                basebackup::send_basebackup_tarball(\n                    &mut writer,\n                    &timeline,\n                    lsn,\n                    prev_lsn,\n                    full_backup,\n                    replica,\n                    // NB: using fast compression because it's on the critical path for compute\n                    // startup. For an empty database, we get <100KB with this method. The\n                    // Level::Best compression method gives us <20KB, but maybe we should add\n                    // basebackup caching on compute shutdown first.\n                    gzip.then_some(async_compression::Level::Fastest),\n                    &ctx,\n                )\n                .await?;\n            }\n            writer\n                .flush()\n                .await\n                .map_err(|err| BasebackupError::Client(err, \"handle_basebackup_request,flush\"))?;\n        }\n\n        pgb.write_message_noflush(&BeMessage::CopyDone)\n            .map_err(QueryError::Disconnected)?;\n        self.flush_cancellable(pgb, &timeline.cancel).await?;\n\n        let basebackup_after = started\n            .elapsed()\n            .checked_sub(lsn_awaited_after)\n            .unwrap_or(Duration::ZERO);\n\n        info!(\n            lsn_await_millis = lsn_awaited_after.as_millis(),\n            basebackup_millis = basebackup_after.as_millis(),\n            %from_cache,\n            \"basebackup complete\"\n        );\n\n        Ok(())\n    }\n\n    // when accessing management api supply None as an argument\n    // when using to authorize tenant pass corresponding tenant id\n    fn check_permission(&self, tenant_id: Option<TenantId>) -> Result<(), QueryError> {\n        if self.auth.is_none() {\n            // auth is set to Trust, nothing to check so just return ok\n            return Ok(());\n        }\n        // auth is some, just checked above, when auth is some\n        // then claims are always present because of checks during connection init\n        // so this expect won't trigger\n        let claims = self\n            .claims\n            .as_ref()\n            .expect(\"claims presence already checked\");\n        check_permission(claims, tenant_id).map_err(|e| QueryError::Unauthorized(e.0))\n    }\n}\n\n/// `basebackup tenant timeline [lsn] [--gzip] [--replica]`\n#[derive(Debug, Clone, Eq, PartialEq)]\nstruct BaseBackupCmd {\n    tenant_id: TenantId,\n    timeline_id: TimelineId,\n    lsn: Option<Lsn>,\n    gzip: bool,\n    replica: bool,\n}\n\n/// `fullbackup tenant timeline [lsn] [prev_lsn]`\n#[derive(Debug, Clone, Eq, PartialEq)]\nstruct FullBackupCmd {\n    tenant_id: TenantId,\n    timeline_id: TimelineId,\n    lsn: Option<Lsn>,\n    prev_lsn: Option<Lsn>,\n}\n\n/// `pagestream_v2 tenant timeline`\n#[derive(Debug, Clone, Eq, PartialEq)]\nstruct PageStreamCmd {\n    tenant_id: TenantId,\n    timeline_id: TimelineId,\n    protocol_version: PagestreamProtocolVersion,\n}\n\n/// `lease lsn tenant timeline lsn`\n#[derive(Debug, Clone, Eq, PartialEq)]\nstruct LeaseLsnCmd {\n    tenant_shard_id: TenantShardId,\n    timeline_id: TimelineId,\n    lsn: Lsn,\n}\n\n#[derive(Debug, Clone, Eq, PartialEq)]\nenum PageServiceCmd {\n    Set,\n    PageStream(PageStreamCmd),\n    BaseBackup(BaseBackupCmd),\n    FullBackup(FullBackupCmd),\n    LeaseLsn(LeaseLsnCmd),\n}\n\nimpl PageStreamCmd {\n    fn parse(query: &str, protocol_version: PagestreamProtocolVersion) -> anyhow::Result<Self> {\n        let parameters = query.split_whitespace().collect_vec();\n        if parameters.len() != 2 {\n            bail!(\n                \"invalid number of parameters for pagestream command: {}\",\n                query\n            );\n        }\n        let tenant_id = TenantId::from_str(parameters[0])\n            .with_context(|| format!(\"Failed to parse tenant id from {}\", parameters[0]))?;\n        let timeline_id = TimelineId::from_str(parameters[1])\n            .with_context(|| format!(\"Failed to parse timeline id from {}\", parameters[1]))?;\n        Ok(Self {\n            tenant_id,\n            timeline_id,\n            protocol_version,\n        })\n    }\n}\n\nimpl FullBackupCmd {\n    fn parse(query: &str) -> anyhow::Result<Self> {\n        let parameters = query.split_whitespace().collect_vec();\n        if parameters.len() < 2 || parameters.len() > 4 {\n            bail!(\n                \"invalid number of parameters for basebackup command: {}\",\n                query\n            );\n        }\n        let tenant_id = TenantId::from_str(parameters[0])\n            .with_context(|| format!(\"Failed to parse tenant id from {}\", parameters[0]))?;\n        let timeline_id = TimelineId::from_str(parameters[1])\n            .with_context(|| format!(\"Failed to parse timeline id from {}\", parameters[1]))?;\n        // The caller is responsible for providing correct lsn and prev_lsn.\n        let lsn = if let Some(lsn_str) = parameters.get(2) {\n            Some(\n                Lsn::from_str(lsn_str)\n                    .with_context(|| format!(\"Failed to parse Lsn from {lsn_str}\"))?,\n            )\n        } else {\n            None\n        };\n        let prev_lsn = if let Some(prev_lsn_str) = parameters.get(3) {\n            Some(\n                Lsn::from_str(prev_lsn_str)\n                    .with_context(|| format!(\"Failed to parse Lsn from {prev_lsn_str}\"))?,\n            )\n        } else {\n            None\n        };\n        Ok(Self {\n            tenant_id,\n            timeline_id,\n            lsn,\n            prev_lsn,\n        })\n    }\n}\n\nimpl BaseBackupCmd {\n    fn parse(query: &str) -> anyhow::Result<Self> {\n        let parameters = query.split_whitespace().collect_vec();\n        if parameters.len() < 2 {\n            bail!(\n                \"invalid number of parameters for basebackup command: {}\",\n                query\n            );\n        }\n        let tenant_id = TenantId::from_str(parameters[0])\n            .with_context(|| format!(\"Failed to parse tenant id from {}\", parameters[0]))?;\n        let timeline_id = TimelineId::from_str(parameters[1])\n            .with_context(|| format!(\"Failed to parse timeline id from {}\", parameters[1]))?;\n        let lsn;\n        let flags_parse_from;\n        if let Some(maybe_lsn) = parameters.get(2) {\n            if *maybe_lsn == \"latest\" {\n                lsn = None;\n                flags_parse_from = 3;\n            } else if maybe_lsn.starts_with(\"--\") {\n                lsn = None;\n                flags_parse_from = 2;\n            } else {\n                lsn = Some(\n                    Lsn::from_str(maybe_lsn)\n                        .with_context(|| format!(\"Failed to parse lsn from {maybe_lsn}\"))?,\n                );\n                flags_parse_from = 3;\n            }\n        } else {\n            lsn = None;\n            flags_parse_from = 2;\n        }\n\n        let mut gzip = false;\n        let mut replica = false;\n\n        for &param in &parameters[flags_parse_from..] {\n            match param {\n                \"--gzip\" => {\n                    if gzip {\n                        bail!(\"duplicate parameter for basebackup command: {param}\")\n                    }\n                    gzip = true\n                }\n                \"--replica\" => {\n                    if replica {\n                        bail!(\"duplicate parameter for basebackup command: {param}\")\n                    }\n                    replica = true\n                }\n                _ => bail!(\"invalid parameter for basebackup command: {param}\"),\n            }\n        }\n        Ok(Self {\n            tenant_id,\n            timeline_id,\n            lsn,\n            gzip,\n            replica,\n        })\n    }\n}\n\nimpl LeaseLsnCmd {\n    fn parse(query: &str) -> anyhow::Result<Self> {\n        let parameters = query.split_whitespace().collect_vec();\n        if parameters.len() != 3 {\n            bail!(\n                \"invalid number of parameters for lease lsn command: {}\",\n                query\n            );\n        }\n        let tenant_shard_id = TenantShardId::from_str(parameters[0])\n            .with_context(|| format!(\"Failed to parse tenant id from {}\", parameters[0]))?;\n        let timeline_id = TimelineId::from_str(parameters[1])\n            .with_context(|| format!(\"Failed to parse timeline id from {}\", parameters[1]))?;\n        let lsn = Lsn::from_str(parameters[2])\n            .with_context(|| format!(\"Failed to parse lsn from {}\", parameters[2]))?;\n        Ok(Self {\n            tenant_shard_id,\n            timeline_id,\n            lsn,\n        })\n    }\n}\n\nimpl PageServiceCmd {\n    fn parse(query: &str) -> anyhow::Result<Self> {\n        let query = query.trim();\n        let Some((cmd, other)) = query.split_once(' ') else {\n            bail!(\"cannot parse query: {query}\")\n        };\n        match cmd.to_ascii_lowercase().as_str() {\n            \"pagestream_v2\" => Ok(Self::PageStream(PageStreamCmd::parse(\n                other,\n                PagestreamProtocolVersion::V2,\n            )?)),\n            \"pagestream_v3\" => Ok(Self::PageStream(PageStreamCmd::parse(\n                other,\n                PagestreamProtocolVersion::V3,\n            )?)),\n            \"basebackup\" => Ok(Self::BaseBackup(BaseBackupCmd::parse(other)?)),\n            \"fullbackup\" => Ok(Self::FullBackup(FullBackupCmd::parse(other)?)),\n            \"lease\" => {\n                let Some((cmd2, other)) = other.split_once(' ') else {\n                    bail!(\"invalid lease command: {cmd}\");\n                };\n                let cmd2 = cmd2.to_ascii_lowercase();\n                if cmd2 == \"lsn\" {\n                    Ok(Self::LeaseLsn(LeaseLsnCmd::parse(other)?))\n                } else {\n                    bail!(\"invalid lease command: {cmd}\");\n                }\n            }\n            \"set\" => Ok(Self::Set),\n            _ => Err(anyhow::anyhow!(\"unsupported command {cmd} in {query}\")),\n        }\n    }\n}\n\n/// Parse the startup options from the postgres wire protocol startup packet.\n///\n/// It takes a sequence of `-c option=X` or `-coption=X`. It parses the options string\n/// by best effort and returns all the options parsed (key-value pairs) and a bool indicating\n/// whether all options are successfully parsed. There could be duplicates in the options\n/// if the caller passed such parameters.\nfn parse_options(options: &str) -> (Vec<(String, String)>, bool) {\n    let mut parsing_config = false;\n    let mut has_error = false;\n    let mut config = Vec::new();\n    for item in options.split_whitespace() {\n        if item == \"-c\" {\n            if !parsing_config {\n                parsing_config = true;\n            } else {\n                // \"-c\" followed with another \"-c\"\n                tracing::warn!(\"failed to parse the startup options: {options}\");\n                has_error = true;\n                break;\n            }\n        } else if item.starts_with(\"-c\") || parsing_config {\n            let Some((mut key, value)) = item.split_once('=') else {\n                // \"-c\" followed with an invalid option\n                tracing::warn!(\"failed to parse the startup options: {options}\");\n                has_error = true;\n                break;\n            };\n            if !parsing_config {\n                // Parse \"-coptions=X\"\n                let Some(stripped_key) = key.strip_prefix(\"-c\") else {\n                    tracing::warn!(\"failed to parse the startup options: {options}\");\n                    has_error = true;\n                    break;\n                };\n                key = stripped_key;\n            }\n            config.push((key.to_string(), value.to_string()));\n            parsing_config = false;\n        } else {\n            tracing::warn!(\"failed to parse the startup options: {options}\");\n            has_error = true;\n            break;\n        }\n    }\n    if parsing_config {\n        // \"-c\" without the option\n        tracing::warn!(\"failed to parse the startup options: {options}\");\n        has_error = true;\n    }\n    (config, has_error)\n}\n\nimpl<IO> postgres_backend::Handler<IO> for PageServerHandler\nwhere\n    IO: AsyncRead + AsyncWrite + Send + Sync + Unpin + 'static,\n{\n    fn check_auth_jwt(\n        &mut self,\n        _pgb: &mut PostgresBackend<IO>,\n        jwt_response: &[u8],\n    ) -> Result<(), QueryError> {\n        // this unwrap is never triggered, because check_auth_jwt only called when auth_type is NeonJWT\n        // which requires auth to be present\n        let data: TokenData<Claims> = self\n            .auth\n            .as_ref()\n            .unwrap()\n            .decode(str::from_utf8(jwt_response).context(\"jwt response is not UTF-8\")?)\n            .map_err(|e| QueryError::Unauthorized(e.0))?;\n\n        if matches!(data.claims.scope, Scope::Tenant) && data.claims.tenant_id.is_none() {\n            return Err(QueryError::Unauthorized(\n                \"jwt token scope is Tenant, but tenant id is missing\".into(),\n            ));\n        }\n\n        debug!(\n            \"jwt scope check succeeded for scope: {:#?} by tenant id: {:?}\",\n            data.claims.scope, data.claims.tenant_id,\n        );\n\n        self.claims = Some(data.claims);\n        Ok(())\n    }\n\n    fn startup(\n        &mut self,\n        _pgb: &mut PostgresBackend<IO>,\n        sm: &FeStartupPacket,\n    ) -> Result<(), QueryError> {\n        fail::fail_point!(\"ps::connection-start::startup-packet\");\n\n        if let FeStartupPacket::StartupMessage { params, .. } = sm {\n            if let Some(app_name) = params.get(\"application_name\") {\n                self.perf_span_fields.application_name = Some(app_name.to_string());\n                Span::current().record(\"application_name\", field::display(app_name));\n            }\n            if let Some(options) = params.get(\"options\") {\n                let (config, _) = parse_options(options);\n                for (key, value) in config {\n                    if key == \"neon.compute_mode\" {\n                        self.perf_span_fields.compute_mode = Some(value.clone());\n                        Span::current().record(\"compute_mode\", field::display(value));\n                    }\n                }\n            }\n        };\n\n        Ok(())\n    }\n\n    #[instrument(skip_all, fields(tenant_id, timeline_id))]\n    async fn process_query(\n        &mut self,\n        pgb: &mut PostgresBackend<IO>,\n        query_string: &str,\n    ) -> Result<(), QueryError> {\n        fail::fail_point!(\"simulated-bad-compute-connection\", |_| {\n            info!(\"Hit failpoint for bad connection\");\n            Err(QueryError::SimulatedConnectionError)\n        });\n\n        fail::fail_point!(\"ps::connection-start::process-query\");\n\n        let ctx = self.connection_ctx.attached_child();\n        debug!(\"process query {query_string}\");\n        let query = PageServiceCmd::parse(query_string)?;\n        match query {\n            PageServiceCmd::PageStream(PageStreamCmd {\n                tenant_id,\n                timeline_id,\n                protocol_version,\n            }) => {\n                tracing::Span::current()\n                    .record(\"tenant_id\", field::display(tenant_id))\n                    .record(\"timeline_id\", field::display(timeline_id));\n\n                self.check_permission(Some(tenant_id))?;\n                let command_kind = match protocol_version {\n                    PagestreamProtocolVersion::V2 => ComputeCommandKind::PageStreamV2,\n                    PagestreamProtocolVersion::V3 => ComputeCommandKind::PageStreamV3,\n                };\n                COMPUTE_COMMANDS_COUNTERS.for_command(command_kind).inc();\n\n                self.handle_pagerequests(pgb, tenant_id, timeline_id, protocol_version, ctx)\n                    .await?;\n            }\n            PageServiceCmd::BaseBackup(BaseBackupCmd {\n                tenant_id,\n                timeline_id,\n                lsn,\n                gzip,\n                replica,\n            }) => {\n                tracing::Span::current()\n                    .record(\"tenant_id\", field::display(tenant_id))\n                    .record(\"timeline_id\", field::display(timeline_id));\n\n                self.check_permission(Some(tenant_id))?;\n\n                COMPUTE_COMMANDS_COUNTERS\n                    .for_command(ComputeCommandKind::Basebackup)\n                    .inc();\n                let metric_recording = metrics::BASEBACKUP_QUERY_TIME.start_recording();\n                let res = async {\n                    self.handle_basebackup_request(\n                        pgb,\n                        tenant_id,\n                        timeline_id,\n                        lsn,\n                        None,\n                        false,\n                        gzip,\n                        replica,\n                        &ctx,\n                    )\n                    .await?;\n                    pgb.write_message_noflush(&BeMessage::CommandComplete(b\"SELECT 1\"))?;\n                    Result::<(), QueryError>::Ok(())\n                }\n                .await;\n                metric_recording.observe(&res);\n                res?;\n            }\n            // same as basebackup, but result includes relational data as well\n            PageServiceCmd::FullBackup(FullBackupCmd {\n                tenant_id,\n                timeline_id,\n                lsn,\n                prev_lsn,\n            }) => {\n                tracing::Span::current()\n                    .record(\"tenant_id\", field::display(tenant_id))\n                    .record(\"timeline_id\", field::display(timeline_id));\n\n                self.check_permission(Some(tenant_id))?;\n\n                COMPUTE_COMMANDS_COUNTERS\n                    .for_command(ComputeCommandKind::Fullbackup)\n                    .inc();\n\n                // Check that the timeline exists\n                self.handle_basebackup_request(\n                    pgb,\n                    tenant_id,\n                    timeline_id,\n                    lsn,\n                    prev_lsn,\n                    true,\n                    false,\n                    false,\n                    &ctx,\n                )\n                .await?;\n                pgb.write_message_noflush(&BeMessage::CommandComplete(b\"SELECT 1\"))?;\n            }\n            PageServiceCmd::Set => {\n                // important because psycopg2 executes \"SET datestyle TO 'ISO'\"\n                // on connect\n                // TODO: allow setting options, i.e., application_name/compute_mode via SET commands\n                pgb.write_message_noflush(&BeMessage::CommandComplete(b\"SELECT 1\"))?;\n            }\n            PageServiceCmd::LeaseLsn(LeaseLsnCmd {\n                tenant_shard_id,\n                timeline_id,\n                lsn,\n            }) => {\n                tracing::Span::current()\n                    .record(\"tenant_id\", field::display(tenant_shard_id))\n                    .record(\"timeline_id\", field::display(timeline_id));\n\n                self.check_permission(Some(tenant_shard_id.tenant_id))?;\n\n                COMPUTE_COMMANDS_COUNTERS\n                    .for_command(ComputeCommandKind::LeaseLsn)\n                    .inc();\n\n                match self\n                    .handle_make_lsn_lease(pgb, tenant_shard_id, timeline_id, lsn, &ctx)\n                    .await\n                {\n                    Ok(()) => {\n                        pgb.write_message_noflush(&BeMessage::CommandComplete(b\"SELECT 1\"))?\n                    }\n                    Err(e) => {\n                        error!(\"error obtaining lsn lease for {lsn}: {e:?}\");\n                        pgb.write_message_noflush(&BeMessage::ErrorResponse(\n                            &e.to_string(),\n                            Some(e.pg_error_code()),\n                        ))?\n                    }\n                };\n            }\n        }\n\n        Ok(())\n    }\n}\n\n/// Serves the page service over gRPC. Dispatches to PageServerHandler for request processing.\n///\n/// TODO: rename to PageServiceHandler when libpq impl is removed.\npub struct GrpcPageServiceHandler {\n    tenant_manager: Arc<TenantManager>,\n    ctx: RequestContext,\n\n    /// Cancelled to shut down the server. Tonic will shut down in response to this, but wait for\n    /// in-flight requests to complete. Any tasks we spawn ourselves must respect this token.\n    cancel: CancellationToken,\n\n    /// Any tasks we spawn ourselves should clone this gate guard, so that we can wait for them to\n    /// complete during shutdown. Request handlers implicitly hold this guard already.\n    gate_guard: GateGuard,\n\n    /// `get_vectored` concurrency setting.\n    get_vectored_concurrent_io: GetVectoredConcurrentIo,\n}\n\nimpl GrpcPageServiceHandler {\n    /// Spawns a gRPC server for the page service.\n    ///\n    /// Returns a `CancellableTask` handle that can be used to shut down the server. It waits for\n    /// any in-flight requests and tasks to complete first.\n    ///\n    /// TODO: this doesn't support TLS. We need TLS reloading via ReloadingCertificateResolver, so we\n    /// need to reimplement the TCP+TLS accept loop ourselves.\n    pub fn spawn(\n        tenant_manager: Arc<TenantManager>,\n        auth: Option<Arc<SwappableJwtAuth>>,\n        perf_trace_dispatch: Option<Dispatch>,\n        get_vectored_concurrent_io: GetVectoredConcurrentIo,\n        listener: std::net::TcpListener,\n    ) -> anyhow::Result<CancellableTask> {\n        // Set up a cancellation token for shutting down the server, and a gate to wait for all\n        // requests and spawned tasks to complete.\n        let cancel = CancellationToken::new();\n        let gate = Gate::default();\n\n        let ctx = RequestContextBuilder::new(TaskKind::PageRequestHandler)\n            .download_behavior(DownloadBehavior::Download)\n            .perf_span_dispatch(perf_trace_dispatch)\n            .detached_child();\n\n        // Set up the TCP socket. We take a preconfigured TcpListener to bind the\n        // port early during startup.\n        let incoming = {\n            let _runtime = COMPUTE_REQUEST_RUNTIME.enter(); // required by TcpListener::from_std\n            listener.set_nonblocking(true)?;\n            tonic::transport::server::TcpIncoming::from(tokio::net::TcpListener::from_std(\n                listener,\n            )?)\n            .with_nodelay(Some(GRPC_TCP_NODELAY))\n            .with_keepalive(Some(GRPC_TCP_KEEPALIVE_TIME))\n        };\n\n        // Set up the gRPC server.\n        //\n        // TODO: consider tuning window sizes.\n        let mut server = tonic::transport::Server::builder()\n            .http2_keepalive_interval(Some(GRPC_HTTP2_KEEPALIVE_INTERVAL))\n            .http2_keepalive_timeout(Some(GRPC_HTTP2_KEEPALIVE_TIMEOUT))\n            .max_concurrent_streams(Some(GRPC_MAX_CONCURRENT_STREAMS));\n\n        // Main page service stack. Uses a mix of Tonic interceptors and Tower layers:\n        //\n        // * Interceptors: can inspect and modify the gRPC request. Sync code only, runs before service.\n        //\n        // * Layers: allow async code, can run code after the service response. However, only has access\n        //   to the raw HTTP request/response, not the gRPC types.\n        let page_service_handler = GrpcPageServiceHandler {\n            tenant_manager,\n            ctx,\n            cancel: cancel.clone(),\n            gate_guard: gate.enter().expect(\"gate was just created\"),\n            get_vectored_concurrent_io,\n        };\n\n        let observability_layer = ObservabilityLayer;\n        let mut tenant_interceptor = TenantMetadataInterceptor;\n        let mut auth_interceptor = TenantAuthInterceptor::new(auth);\n\n        let page_service = tower::ServiceBuilder::new()\n            // Create tracing span and record request start time.\n            .layer(observability_layer)\n            // Intercept gRPC requests.\n            .layer(tonic::service::InterceptorLayer::new(move |mut req| {\n                // Extract tenant metadata.\n                req = tenant_interceptor.call(req)?;\n                // Authenticate tenant JWT token.\n                req = auth_interceptor.call(req)?;\n                Ok(req)\n            }))\n            // Run the page service.\n            .service(\n                proto::PageServiceServer::new(page_service_handler)\n                    // Support both gzip and zstd compression. The client decides what to use.\n                    .accept_compressed(tonic::codec::CompressionEncoding::Gzip)\n                    .accept_compressed(tonic::codec::CompressionEncoding::Zstd)\n                    .send_compressed(tonic::codec::CompressionEncoding::Gzip)\n                    .send_compressed(tonic::codec::CompressionEncoding::Zstd),\n            );\n        let server = server.add_service(page_service);\n\n        // Reflection service for use with e.g. grpcurl.\n        let reflection_service = tonic_reflection::server::Builder::configure()\n            .register_encoded_file_descriptor_set(proto::FILE_DESCRIPTOR_SET)\n            .build_v1()?;\n        let server = server.add_service(reflection_service);\n\n        // Spawn server task. It runs until the cancellation token fires and in-flight requests and\n        // tasks complete. The `CancellableTask` will wait for the task's join handle, which\n        // implicitly waits for the gate to close.\n        let task_cancel = cancel.clone();\n        let task = COMPUTE_REQUEST_RUNTIME.spawn(task_mgr::exit_on_panic_or_error(\n            \"grpc pageservice listener\",\n            async move {\n                server\n                    .serve_with_incoming_shutdown(incoming, task_cancel.cancelled())\n                    .await?;\n                // Server exited cleanly. All requests should have completed by now. Wait for any\n                // spawned tasks to complete as well (e.g. IoConcurrency sidecars) via the gate.\n                gate.close().await;\n                anyhow::Ok(())\n            },\n        ));\n\n        Ok(CancellableTask { task, cancel })\n    }\n\n    /// Generates a PagestreamRequest header from a ReadLsn and request ID.\n    fn make_hdr(\n        read_lsn: page_api::ReadLsn,\n        req_id: Option<page_api::RequestID>,\n    ) -> PagestreamRequest {\n        PagestreamRequest {\n            reqid: req_id.map(|r| r.id).unwrap_or_default(),\n            request_lsn: read_lsn.request_lsn,\n            not_modified_since: read_lsn\n                .not_modified_since_lsn\n                .unwrap_or(read_lsn.request_lsn),\n        }\n    }\n\n    /// Acquires a timeline handle for the given request. The shard index must match a local shard.\n    ///\n    /// NB: this will fail during shard splits, see comment on [`Self::maybe_split_get_page`].\n    async fn get_request_timeline(\n        &self,\n        req: &tonic::Request<impl Any>,\n    ) -> Result<Handle<TenantManagerTypes>, GetActiveTimelineError> {\n        let TenantTimelineId {\n            tenant_id,\n            timeline_id,\n        } = *extract::<TenantTimelineId>(req);\n        let shard_index = *extract::<ShardIndex>(req);\n\n        // TODO: untangle acquisition from TenantManagerWrapper::resolve() and Cache::get(), to\n        // avoid the unnecessary overhead.\n        TimelineHandles::new(self.tenant_manager.clone())\n            .get(tenant_id, timeline_id, ShardSelector::Known(shard_index))\n            .await\n    }\n\n    /// Acquires a timeline handle for the given request, which must be for shard zero. Most\n    /// metadata requests are only valid on shard zero.\n    ///\n    /// NB: during an ongoing shard split, the compute will keep talking to the parent shard until\n    /// the split is committed, but the parent shard may have been removed in the meanwhile. In that\n    /// case, we reroute the request to the new child shard. See [`Self::maybe_split_get_page`].\n    ///\n    /// TODO: revamp the split protocol to avoid this child routing.\n    async fn get_request_timeline_shard_zero(\n        &self,\n        req: &tonic::Request<impl Any>,\n    ) -> Result<Handle<TenantManagerTypes>, tonic::Status> {\n        let TenantTimelineId {\n            tenant_id,\n            timeline_id,\n        } = *extract::<TenantTimelineId>(req);\n        let shard_index = *extract::<ShardIndex>(req);\n\n        if shard_index.shard_number.0 != 0 {\n            return Err(tonic::Status::invalid_argument(format!(\n                \"request only valid on shard zero (requested shard {shard_index})\",\n            )));\n        }\n\n        // TODO: untangle acquisition from TenantManagerWrapper::resolve() and Cache::get(), to\n        // avoid the unnecessary overhead.\n        let mut handles = TimelineHandles::new(self.tenant_manager.clone());\n        match handles\n            .get(tenant_id, timeline_id, ShardSelector::Known(shard_index))\n            .await\n        {\n            Ok(timeline) => Ok(timeline),\n            Err(err) => {\n                // We may be in the middle of a shard split. Try to find a child shard 0.\n                if let Ok(timeline) = handles\n                    .get(tenant_id, timeline_id, ShardSelector::Zero)\n                    .await\n                    && timeline.get_shard_index().shard_count > shard_index.shard_count\n                {\n                    return Ok(timeline);\n                }\n                Err(err.into())\n            }\n        }\n    }\n\n    /// Starts a SmgrOpTimer at received_at, throttles the request, and records execution start.\n    /// Only errors if the timeline is shutting down.\n    ///\n    /// TODO: move timer construction to ObservabilityLayer (see TODO there).\n    /// TODO: decouple rate limiting (middleware?), and return SlowDown errors instead.\n    async fn record_op_start_and_throttle(\n        timeline: &Handle<TenantManagerTypes>,\n        op: metrics::SmgrQueryType,\n        received_at: Instant,\n    ) -> Result<SmgrOpTimer, tonic::Status> {\n        let mut timer = PageServerHandler::record_op_start_and_throttle(timeline, op, received_at)\n            .await\n            .map_err(|err| match err {\n                // record_op_start_and_throttle() only returns Shutdown.\n                QueryError::Shutdown => tonic::Status::unavailable(format!(\"{err}\")),\n                err => tonic::Status::internal(format!(\"unexpected error: {err}\")),\n            })?;\n        timer.observe_execution_start(Instant::now());\n        Ok(timer)\n    }\n\n    /// Processes a GetPage batch request, via the GetPages bidirectional streaming RPC.\n    ///\n    /// NB: errors returned from here are intercepted in get_pages(), and may be converted to a\n    /// GetPageResponse with an appropriate status code to avoid terminating the stream.\n    ///\n    /// TODO: get_vectored() currently enforces a batch limit of 32. Postgres will typically send\n    /// batches up to effective_io_concurrency = 100. Either we have to accept large batches, or\n    /// split them up in the client or server.\n    #[instrument(skip_all, fields(\n        req_id = %req.request_id,\n        rel = %req.rel,\n        blkno = %req.block_numbers[0],\n        blks = %req.block_numbers.len(),\n        lsn = %req.read_lsn,\n    ))]\n    async fn get_page(\n        ctx: &RequestContext,\n        timeline: Handle<TenantManagerTypes>,\n        req: page_api::GetPageRequest,\n        io_concurrency: IoConcurrency,\n        received_at: Instant,\n    ) -> Result<page_api::GetPageResponse, tonic::Status> {\n        let ctx = ctx.with_scope_page_service_pagestream(&timeline);\n\n        for &blkno in &req.block_numbers {\n            let shard = timeline.get_shard_identity();\n            let key = rel_block_to_key(req.rel, blkno);\n            if !shard.is_key_local(&key) {\n                return Err(tonic::Status::invalid_argument(format!(\n                    \"block {blkno} of relation {} requested on wrong shard {} (is on {})\",\n                    req.rel,\n                    timeline.get_shard_index(),\n                    ShardIndex::new(shard.get_shard_number(&key), shard.count),\n                )));\n            }\n        }\n\n        let latest_gc_cutoff_lsn = timeline.get_applied_gc_cutoff_lsn(); // hold guard\n        let effective_lsn = PageServerHandler::effective_request_lsn(\n            &timeline,\n            timeline.get_last_record_lsn(),\n            req.read_lsn.request_lsn,\n            req.read_lsn\n                .not_modified_since_lsn\n                .unwrap_or(req.read_lsn.request_lsn),\n            &latest_gc_cutoff_lsn,\n        )?;\n\n        let mut batch = SmallVec::with_capacity(req.block_numbers.len());\n        for blkno in req.block_numbers {\n            // TODO: this creates one timer per page and throttles it. We should have a timer for\n            // the entire batch, and throttle only the batch, but this is equivalent to what\n            // PageServerHandler does already so we keep it for now.\n            let timer = Self::record_op_start_and_throttle(\n                &timeline,\n                metrics::SmgrQueryType::GetPageAtLsn,\n                received_at,\n            )\n            .await?;\n\n            batch.push(BatchedGetPageRequest {\n                req: PagestreamGetPageRequest {\n                    hdr: Self::make_hdr(req.read_lsn, Some(req.request_id)),\n                    rel: req.rel,\n                    blkno,\n                },\n                lsn_range: LsnRange {\n                    effective_lsn,\n                    request_lsn: req.read_lsn.request_lsn,\n                },\n                timer,\n                ctx: ctx.attached_child(),\n                batch_wait_ctx: None, // TODO: add tracing\n            });\n        }\n\n        // TODO: this does a relation size query for every page in the batch. Since this batch is\n        // all for one relation, we could do this only once. However, this is not the case for the\n        // libpq implementation.\n        let results = PageServerHandler::handle_get_page_at_lsn_request_batched(\n            &timeline,\n            batch,\n            io_concurrency,\n            GetPageBatchBreakReason::BatchFull, // TODO: not relevant for gRPC batches\n            &ctx,\n        )\n        .await;\n\n        let mut resp = page_api::GetPageResponse {\n            request_id: req.request_id,\n            status_code: page_api::GetPageStatusCode::Ok,\n            reason: None,\n            rel: req.rel,\n            pages: Vec::with_capacity(results.len()),\n        };\n\n        for result in results {\n            match result {\n                Ok((PagestreamBeMessage::GetPage(r), _, _)) => resp.pages.push(page_api::Page {\n                    block_number: r.req.blkno,\n                    image: r.page,\n                }),\n                Ok((resp, _, _)) => {\n                    return Err(tonic::Status::internal(format!(\n                        \"unexpected response: {resp:?}\"\n                    )));\n                }\n                Err(err) => return Err(err.err.into()),\n            };\n        }\n\n        Ok(resp)\n    }\n\n    /// Processes a GetPage request when there is a potential shard split in progress. We have to\n    /// reroute the request to any local child shards, and split batch requests that straddle\n    /// multiple child shards.\n    ///\n    /// Parent shards are split and removed incrementally (there may be many parent shards when\n    /// splitting an already-sharded tenant), but the compute is only notified once the overall\n    /// split commits, which can take several minutes. In the meanwhile, the compute will be sending\n    /// requests to the parent shards.\n    ///\n    /// TODO: add test infrastructure to provoke this situation frequently and for long periods of\n    /// time, to properly exercise it.\n    ///\n    /// TODO: revamp the split protocol to avoid this, e.g.:\n    /// * Keep the parent shard until the split commits and the compute is notified.\n    /// * Notify the compute about each subsplit.\n    /// * Return an error that updates the compute's shard map.\n    #[instrument(skip_all)]\n    #[allow(clippy::too_many_arguments)]\n    async fn maybe_split_get_page(\n        ctx: &RequestContext,\n        handles: &mut TimelineHandles,\n        tenant_id: TenantId,\n        timeline_id: TimelineId,\n        parent: ShardIndex,\n        req: page_api::GetPageRequest,\n        io_concurrency: IoConcurrency,\n        received_at: Instant,\n    ) -> Result<page_api::GetPageResponse, tonic::Status> {\n        // Check the first page to see if we have any child shards at all. Otherwise, the compute is\n        // just talking to the wrong Pageserver. If the parent has been split, the shard now owning\n        // the page must have a higher shard count.\n        let timeline = handles\n            .get(\n                tenant_id,\n                timeline_id,\n                ShardSelector::Page(rel_block_to_key(req.rel, req.block_numbers[0])),\n            )\n            .await?;\n\n        let shard_id = timeline.get_shard_identity();\n        if shard_id.count <= parent.shard_count {\n            return Err(HandleUpgradeError::ShutDown.into()); // emulate original error\n        }\n\n        // Fast path: the request fits in a single shard.\n        if let Some(shard_index) =\n            GetPageSplitter::for_single_shard(&req, shard_id.count, Some(shard_id.stripe_size))?\n        {\n            // We got the shard ID from the first page, so these must be equal.\n            assert_eq!(shard_index.shard_number, shard_id.number);\n            assert_eq!(shard_index.shard_count, shard_id.count);\n            return Self::get_page(ctx, timeline, req, io_concurrency, received_at).await;\n        }\n\n        // The request spans multiple shards; split it and dispatch parallel requests. All pages\n        // were originally in the parent shard, and during a split all children are local, so we\n        // expect to find local shards for all pages.\n        let mut splitter = GetPageSplitter::split(req, shard_id.count, Some(shard_id.stripe_size))?;\n\n        let mut shard_requests = FuturesUnordered::new();\n        for (shard_index, shard_req) in splitter.drain_requests() {\n            let timeline = handles\n                .get(tenant_id, timeline_id, ShardSelector::Known(shard_index))\n                .await?;\n            let future = Self::get_page(\n                ctx,\n                timeline,\n                shard_req,\n                io_concurrency.clone(),\n                received_at,\n            )\n            .map(move |result| result.map(|resp| (shard_index, resp)));\n            shard_requests.push(future);\n        }\n\n        while let Some((shard_index, shard_response)) = shard_requests.next().await.transpose()? {\n            splitter.add_response(shard_index, shard_response)?;\n        }\n\n        Ok(splitter.collect_response()?)\n    }\n}\n\n/// Implements the gRPC page service.\n///\n/// On client disconnect (e.g. timeout or client shutdown), Tonic will drop the request handler\n/// futures, so the read path must be cancellation-safe. On server shutdown, Tonic will wait for\n/// in-flight requests to complete.\n///\n/// TODO: when the libpq impl is removed, remove the Pagestream types and inline the handler code.\n#[tonic::async_trait]\nimpl proto::PageService for GrpcPageServiceHandler {\n    type GetBaseBackupStream = Pin<\n        Box<dyn Stream<Item = Result<proto::GetBaseBackupResponseChunk, tonic::Status>> + Send>,\n    >;\n\n    type GetPagesStream =\n        Pin<Box<dyn Stream<Item = Result<proto::GetPageResponse, tonic::Status>> + Send>>;\n\n    #[instrument(skip_all, fields(lsn))]\n    async fn get_base_backup(\n        &self,\n        req: tonic::Request<proto::GetBaseBackupRequest>,\n    ) -> Result<tonic::Response<Self::GetBaseBackupStream>, tonic::Status> {\n        // Send chunks of 256 KB to avoid large memory allocations. pagebench basebackup shows this\n        // to be the sweet spot where throughput is saturated.\n        const CHUNK_SIZE: usize = 256 * 1024;\n\n        let timeline = self.get_request_timeline_shard_zero(&req).await?;\n        let ctx = self.ctx.with_scope_timeline(&timeline);\n\n        // Validate the request and decorate the span.\n        if timeline.is_archived() == Some(true) {\n            return Err(tonic::Status::failed_precondition(\"timeline is archived\"));\n        }\n        let req: page_api::GetBaseBackupRequest = req.into_inner().try_into()?;\n\n        span_record!(lsn=?req.lsn);\n\n        // Wait for the LSN to arrive, if given.\n        if let Some(lsn) = req.lsn {\n            let latest_gc_cutoff_lsn = timeline.get_applied_gc_cutoff_lsn();\n            timeline\n                .wait_lsn(\n                    lsn,\n                    WaitLsnWaiter::PageService,\n                    WaitLsnTimeout::Default,\n                    &ctx,\n                )\n                .await?;\n            timeline\n                .check_lsn_is_in_scope(lsn, &latest_gc_cutoff_lsn)\n                .map_err(|err| {\n                    tonic::Status::invalid_argument(format!(\"invalid basebackup LSN: {err}\"))\n                })?;\n        }\n\n        // Spawn a task to run the basebackup.\n        let span = Span::current();\n        let gate_guard = self\n            .gate_guard\n            .try_clone()\n            .map_err(|_| tonic::Status::unavailable(\"shutting down\"))?;\n        let (mut simplex_read, mut simplex_write) = tokio::io::simplex(CHUNK_SIZE);\n        let jh = tokio::spawn(async move {\n            let _gate_guard = gate_guard; // keep gate open until task completes\n\n            let gzip_level = match req.compression {\n                page_api::BaseBackupCompression::None => None,\n                // NB: using fast compression because it's on the critical path for compute\n                // startup. For an empty database, we get <100KB with this method. The\n                // Level::Best compression method gives us <20KB, but maybe we should add\n                // basebackup caching on compute shutdown first.\n                page_api::BaseBackupCompression::Gzip => Some(async_compression::Level::Fastest),\n            };\n\n            // Check for a cached basebackup.\n            let cached = timeline\n                .get_cached_basebackup_if_enabled(\n                    req.lsn,\n                    None,\n                    req.full,\n                    req.replica,\n                    gzip_level.is_some(),\n                )\n                .await;\n\n            let result = if let Some(mut cached) = cached {\n                // If we have a cached basebackup, send it.\n                tokio::io::copy(&mut cached, &mut simplex_write)\n                    .await\n                    .map(|_| ())\n                    .map_err(|err| BasebackupError::Client(err, \"cached,copy\"))\n            } else {\n                basebackup::send_basebackup_tarball(\n                    &mut simplex_write,\n                    &timeline,\n                    req.lsn,\n                    None,\n                    req.full,\n                    req.replica,\n                    gzip_level,\n                    &ctx,\n                )\n                .instrument(span) // propagate request span\n                .await\n            };\n            simplex_write\n                .shutdown()\n                .await\n                .map_err(|err| BasebackupError::Client(err, \"simplex_write\"))?;\n            result\n        });\n\n        // Emit chunks of size CHUNK_SIZE.\n        let chunks = async_stream::try_stream! {\n            loop {\n                let mut chunk = BytesMut::with_capacity(CHUNK_SIZE).limit(CHUNK_SIZE);\n                loop {\n                    let n = simplex_read.read_buf(&mut chunk).await.map_err(|err| {\n                        tonic::Status::internal(format!(\"failed to read basebackup chunk: {err}\"))\n                    })?;\n                    if n == 0 {\n                        break; // full chunk or closed stream\n                    }\n                }\n                let chunk = chunk.into_inner().freeze();\n                if chunk.is_empty() {\n                    break;\n                }\n                yield proto::GetBaseBackupResponseChunk::from(chunk);\n            }\n            // Wait for the basebackup task to exit and check for errors.\n            jh.await.map_err(|err| {\n                tonic::Status::internal(format!(\"basebackup failed: {err}\"))\n            })??;\n        };\n\n        Ok(tonic::Response::new(Box::pin(chunks)))\n    }\n\n    #[instrument(skip_all, fields(db_oid, lsn))]\n    async fn get_db_size(\n        &self,\n        req: tonic::Request<proto::GetDbSizeRequest>,\n    ) -> Result<tonic::Response<proto::GetDbSizeResponse>, tonic::Status> {\n        let received_at = extract::<ReceivedAt>(&req).0;\n        let timeline = self.get_request_timeline_shard_zero(&req).await?;\n        let ctx = self.ctx.with_scope_page_service_pagestream(&timeline);\n\n        // Validate the request, decorate the span, and convert it to a Pagestream request.\n        let req: page_api::GetDbSizeRequest = req.into_inner().try_into()?;\n\n        span_record!(db_oid=%req.db_oid, lsn=%req.read_lsn);\n\n        let req = PagestreamDbSizeRequest {\n            hdr: Self::make_hdr(req.read_lsn, None),\n            dbnode: req.db_oid,\n        };\n\n        // Execute the request and convert the response.\n        let _timer = Self::record_op_start_and_throttle(\n            &timeline,\n            metrics::SmgrQueryType::GetDbSize,\n            received_at,\n        )\n        .await?;\n\n        let resp = PageServerHandler::handle_db_size_request(&timeline, &req, &ctx).await?;\n        let resp = resp.db_size as page_api::GetDbSizeResponse;\n        Ok(tonic::Response::new(resp.into()))\n    }\n\n    // NB: don't instrument this, instrument each streamed request.\n    async fn get_pages(\n        &self,\n        req: tonic::Request<tonic::Streaming<proto::GetPageRequest>>,\n    ) -> Result<tonic::Response<Self::GetPagesStream>, tonic::Status> {\n        // Extract the timeline from the request and check that it exists.\n        //\n        // NB: during shard splits, the compute may still send requests to the parent shard. We'll\n        // reroute requests to the child shards below, but we also detect the common cases here\n        // where either the shard exists or no shards exist at all. If we have a child shard, we\n        // can't acquire a weak handle because we don't know which child shard to use yet.\n        let TenantTimelineId {\n            tenant_id,\n            timeline_id,\n        } = *extract::<TenantTimelineId>(&req);\n        let shard_index = *extract::<ShardIndex>(&req);\n\n        let mut handles = TimelineHandles::new(self.tenant_manager.clone());\n        let timeline = match handles\n            .get(tenant_id, timeline_id, ShardSelector::Known(shard_index))\n            .await\n        {\n            // The timeline shard exists. Keep a weak handle to reuse for each request.\n            Ok(timeline) => Some(timeline.downgrade()),\n            // The shard doesn't exist, but a child shard does. We'll reroute requests later.\n            Err(_) if self.tenant_manager.has_child_shard(tenant_id, shard_index) => None,\n            // Failed to fetch the timeline, and no child shard exists. Error out.\n            Err(err) => return Err(err.into()),\n        };\n\n        // Spawn an IoConcurrency sidecar, if enabled.\n        let gate_guard = self\n            .gate_guard\n            .try_clone()\n            .map_err(|_| tonic::Status::unavailable(\"shutting down\"))?;\n        let io_concurrency =\n            IoConcurrency::spawn_from_conf(self.get_vectored_concurrent_io, gate_guard);\n\n        // Construct the GetPageRequest stream handler.\n        let span = Span::current();\n        let ctx = self.ctx.attached_child();\n        let cancel = self.cancel.clone();\n        let mut reqs = req.into_inner();\n\n        let resps = async_stream::try_stream! {\n            loop {\n                // Wait for the next client request.\n                //\n                // NB: Tonic considers the entire stream to be an in-flight request and will wait\n                // for it to complete before shutting down. React to cancellation between requests.\n                let req = tokio::select! {\n                    biased;\n                    _ = cancel.cancelled() => Err(tonic::Status::unavailable(\"shutting down\")),\n\n                    result = reqs.message() => match result {\n                        Ok(Some(req)) => Ok(req),\n                        Ok(None) => break, // client closed the stream\n                        Err(err) => Err(err),\n                    },\n                }?;\n\n                let received_at = Instant::now();\n                let req_id = req.request_id.map(page_api::RequestID::from).unwrap_or_default();\n\n                // Process the request, using a closure to capture errors.\n                let process_request = async || {\n                    let req = page_api::GetPageRequest::try_from(req)?;\n\n                    // Fast path: use the pre-acquired timeline handle.\n                    if let Some(Ok(timeline)) = timeline.as_ref().map(|t| t.upgrade()) {\n                        return Self::get_page(&ctx, timeline, req, io_concurrency.clone(), received_at)\n                            .instrument(span.clone()) // propagate request span\n                            .await\n                    }\n\n                    // The timeline handle is stale. During shard splits, the compute may still be\n                    // sending requests to the parent shard. Try to re-route requests to the child\n                    // shards, and split any batch requests that straddle multiple child shards.\n                    Self::maybe_split_get_page(\n                        &ctx,\n                        &mut handles,\n                        tenant_id,\n                        timeline_id,\n                        shard_index,\n                        req,\n                        io_concurrency.clone(),\n                        received_at,\n                    )\n                    .instrument(span.clone()) // propagate request span\n                    .await\n                };\n\n                // Return the response. Convert per-request errors to GetPageResponses if\n                // appropriate, or terminate the stream with a tonic::Status.\n                yield match process_request().await {\n                    Ok(resp) => resp.into(),\n                    Err(status) => {\n                        // Log the error, since ObservabilityLayer won't see stream errors.\n                        // TODO: it would be nice if we could propagate the get_page() fields here.\n                        span.in_scope(|| {\n                            warn!(\"request failed with {:?}: {}\", status.code(), status.message());\n                        });\n                        page_api::GetPageResponse::try_from_status(status, req_id)?.into()\n                    }\n                }\n            }\n        };\n\n        Ok(tonic::Response::new(Box::pin(resps)))\n    }\n\n    #[instrument(skip_all, fields(rel, lsn, allow_missing))]\n    async fn get_rel_size(\n        &self,\n        req: tonic::Request<proto::GetRelSizeRequest>,\n    ) -> Result<tonic::Response<proto::GetRelSizeResponse>, tonic::Status> {\n        let received_at = extract::<ReceivedAt>(&req).0;\n        let timeline = self.get_request_timeline_shard_zero(&req).await?;\n        let ctx = self.ctx.with_scope_page_service_pagestream(&timeline);\n\n        // Validate the request, decorate the span, and convert it to a Pagestream request.\n        let req: page_api::GetRelSizeRequest = req.into_inner().try_into()?;\n        let allow_missing = req.allow_missing;\n\n        span_record!(rel=%req.rel, lsn=%req.read_lsn, allow_missing=%req.allow_missing);\n\n        let req = PagestreamNblocksRequest {\n            hdr: Self::make_hdr(req.read_lsn, None),\n            rel: req.rel,\n        };\n\n        // Execute the request and convert the response.\n        let _timer = Self::record_op_start_and_throttle(\n            &timeline,\n            metrics::SmgrQueryType::GetRelSize,\n            received_at,\n        )\n        .await?;\n\n        let resp =\n            PageServerHandler::handle_get_nblocks_request(&timeline, &req, allow_missing, &ctx)\n                .await?;\n        let resp: page_api::GetRelSizeResponse = resp.map(|resp| resp.n_blocks);\n\n        Ok(tonic::Response::new(resp.into()))\n    }\n\n    #[instrument(skip_all, fields(kind, segno, lsn))]\n    async fn get_slru_segment(\n        &self,\n        req: tonic::Request<proto::GetSlruSegmentRequest>,\n    ) -> Result<tonic::Response<proto::GetSlruSegmentResponse>, tonic::Status> {\n        let received_at = extract::<ReceivedAt>(&req).0;\n        let timeline = self.get_request_timeline_shard_zero(&req).await?;\n        let ctx = self.ctx.with_scope_page_service_pagestream(&timeline);\n\n        // Validate the request, decorate the span, and convert it to a Pagestream request.\n        let req: page_api::GetSlruSegmentRequest = req.into_inner().try_into()?;\n\n        span_record!(kind=%req.kind, segno=%req.segno, lsn=%req.read_lsn);\n\n        let req = PagestreamGetSlruSegmentRequest {\n            hdr: Self::make_hdr(req.read_lsn, None),\n            kind: req.kind as u8,\n            segno: req.segno,\n        };\n\n        // Execute the request and convert the response.\n        let _timer = Self::record_op_start_and_throttle(\n            &timeline,\n            metrics::SmgrQueryType::GetSlruSegment,\n            received_at,\n        )\n        .await?;\n\n        let resp =\n            PageServerHandler::handle_get_slru_segment_request(&timeline, &req, &ctx).await?;\n        let resp: page_api::GetSlruSegmentResponse = resp.segment;\n        Ok(tonic::Response::new(resp.into()))\n    }\n\n    #[instrument(skip_all, fields(lsn))]\n    async fn lease_lsn(\n        &self,\n        req: tonic::Request<proto::LeaseLsnRequest>,\n    ) -> Result<tonic::Response<proto::LeaseLsnResponse>, tonic::Status> {\n        // TODO: this won't work during shard splits, as the request is directed at a specific shard\n        // but the parent shard is removed before the split commits and the compute is notified\n        // (which can take several minutes for large tenants). That's also the case for the libpq\n        // implementation, so we keep the behavior for now.\n        let timeline = self.get_request_timeline(&req).await?;\n        let ctx = self.ctx.with_scope_timeline(&timeline);\n\n        // Validate and convert the request, and decorate the span.\n        let req: page_api::LeaseLsnRequest = req.into_inner().try_into()?;\n\n        span_record!(lsn=%req.lsn);\n\n        // Attempt to acquire a lease. Return FailedPrecondition if the lease could not be granted.\n        let lease_length = timeline.get_lsn_lease_length();\n        let expires = match timeline.renew_lsn_lease(req.lsn, lease_length, &ctx) {\n            Ok(lease) => lease.valid_until,\n            Err(err) => return Err(tonic::Status::failed_precondition(format!(\"{err}\"))),\n        };\n\n        // TODO: is this spammy? Move it compute-side?\n        info!(\n            \"acquired lease for {} until {}\",\n            req.lsn,\n            chrono::DateTime::<Utc>::from(expires).to_rfc3339()\n        );\n\n        Ok(tonic::Response::new(expires.into()))\n    }\n}\n\n/// gRPC middleware layer that handles observability concerns:\n///\n/// * Creates and enters a tracing span.\n/// * Records the request start time as a ReceivedAt request extension.\n///\n/// TODO: add perf tracing.\n/// TODO: add timing and metrics.\n/// TODO: add logging.\n#[derive(Clone)]\nstruct ObservabilityLayer;\n\nimpl<S: tonic::server::NamedService> tower::Layer<S> for ObservabilityLayer {\n    type Service = ObservabilityLayerService<S>;\n\n    fn layer(&self, inner: S) -> Self::Service {\n        Self::Service { inner }\n    }\n}\n\n#[derive(Clone)]\nstruct ObservabilityLayerService<S> {\n    inner: S,\n}\n\n#[derive(Clone, Copy)]\nstruct ReceivedAt(Instant);\n\nimpl<S: tonic::server::NamedService> tonic::server::NamedService for ObservabilityLayerService<S> {\n    const NAME: &'static str = S::NAME; // propagate inner service name\n}\n\nimpl<S, Req, Resp> tower::Service<http::Request<Req>> for ObservabilityLayerService<S>\nwhere\n    S: tower::Service<http::Request<Req>, Response = http::Response<Resp>> + Send,\n    S::Future: Send + 'static,\n{\n    type Response = S::Response;\n    type Error = S::Error;\n    type Future = BoxFuture<'static, Result<Self::Response, Self::Error>>;\n\n    fn call(&mut self, mut req: http::Request<Req>) -> Self::Future {\n        // Record the request start time as a request extension.\n        //\n        // TODO: we should start a timer here instead, but it currently requires a timeline handle\n        // and SmgrQueryType, which we don't have yet. Refactor it to provide it later.\n        req.extensions_mut().insert(ReceivedAt(Instant::now()));\n\n        // Extract the peer address and gRPC method.\n        let peer = req\n            .extensions()\n            .get::<TcpConnectInfo>()\n            .and_then(|info| info.remote_addr())\n            .map(|addr| addr.to_string())\n            .unwrap_or_default();\n\n        let method = req\n            .uri()\n            .path()\n            .split('/')\n            .nth(2)\n            .unwrap_or(req.uri().path())\n            .to_string();\n\n        // Create a basic tracing span.\n        //\n        // Enter the span for the current thread and instrument the future. It is not sufficient to\n        // only instrument the future, since it only takes effect after the future is returned and\n        // polled, not when the inner service is called below (e.g. during interceptor execution).\n        let span = info_span!(\n            \"grpc:pageservice\",\n            // These will be populated by TenantMetadataInterceptor.\n            tenant_id = field::Empty,\n            timeline_id = field::Empty,\n            shard_id = field::Empty,\n            // NB: empty fields must be listed first above. Otherwise, the field names will be\n            // clobbered when the empty fields are populated. They will be output last regardless.\n            %peer,\n            %method,\n        );\n        let _guard = span.enter();\n\n        // Construct a future for calling the inner service, but don't await it. This avoids having\n        // to clone the inner service into the future below.\n        let call = self.inner.call(req);\n\n        async move {\n            // Await the inner service call.\n            let result = call.await;\n\n            // Log gRPC error statuses. This won't include request info from handler spans, but it\n            // will catch all errors (even those emitted before handler spans are constructed). Only\n            // unary request errors are logged here, not streaming response errors.\n            if let Ok(ref resp) = result\n                && let Some(status) = tonic::Status::from_header_map(resp.headers())\n                && status.code() != tonic::Code::Ok\n            {\n                // TODO: it would be nice if we could propagate the handler span's request fields\n                // here. This could e.g. be done by attaching the request fields to\n                // tonic::Status::metadata via a proc macro.\n                warn!(\n                    \"request failed with {:?}: {}\",\n                    status.code(),\n                    status.message()\n                );\n            }\n\n            result\n        }\n        .instrument(span.clone())\n        .boxed()\n    }\n\n    fn poll_ready(&mut self, cx: &mut Context<'_>) -> Poll<Result<(), Self::Error>> {\n        self.inner.poll_ready(cx)\n    }\n}\n\n/// gRPC interceptor that decodes tenant metadata and stores it as request extensions of type\n/// TenantTimelineId and ShardIndex.\n#[derive(Clone)]\nstruct TenantMetadataInterceptor;\n\nimpl tonic::service::Interceptor for TenantMetadataInterceptor {\n    fn call(&mut self, mut req: tonic::Request<()>) -> Result<tonic::Request<()>, tonic::Status> {\n        // Decode the tenant ID.\n        let tenant_id = req\n            .metadata()\n            .get(\"neon-tenant-id\")\n            .ok_or_else(|| tonic::Status::invalid_argument(\"missing neon-tenant-id\"))?\n            .to_str()\n            .map_err(|_| tonic::Status::invalid_argument(\"invalid neon-tenant-id\"))?;\n        let tenant_id = TenantId::from_str(tenant_id)\n            .map_err(|_| tonic::Status::invalid_argument(\"invalid neon-tenant-id\"))?;\n\n        // Decode the timeline ID.\n        let timeline_id = req\n            .metadata()\n            .get(\"neon-timeline-id\")\n            .ok_or_else(|| tonic::Status::invalid_argument(\"missing neon-timeline-id\"))?\n            .to_str()\n            .map_err(|_| tonic::Status::invalid_argument(\"invalid neon-timeline-id\"))?;\n        let timeline_id = TimelineId::from_str(timeline_id)\n            .map_err(|_| tonic::Status::invalid_argument(\"invalid neon-timeline-id\"))?;\n\n        // Decode the shard ID.\n        let shard_id = req\n            .metadata()\n            .get(\"neon-shard-id\")\n            .ok_or_else(|| tonic::Status::invalid_argument(\"missing neon-shard-id\"))?\n            .to_str()\n            .map_err(|_| tonic::Status::invalid_argument(\"invalid neon-shard-id\"))?;\n        let shard_id = ShardIndex::from_str(shard_id)\n            .map_err(|_| tonic::Status::invalid_argument(\"invalid neon-shard-id\"))?;\n\n        // Stash them in the request.\n        let extensions = req.extensions_mut();\n        extensions.insert(TenantTimelineId::new(tenant_id, timeline_id));\n        extensions.insert(shard_id);\n\n        // Decorate the tracing span.\n        span_record!(%tenant_id, %timeline_id, %shard_id);\n\n        Ok(req)\n    }\n}\n\n/// Authenticates gRPC page service requests.\n#[derive(Clone)]\nstruct TenantAuthInterceptor {\n    auth: Option<Arc<SwappableJwtAuth>>,\n}\n\nimpl TenantAuthInterceptor {\n    fn new(auth: Option<Arc<SwappableJwtAuth>>) -> Self {\n        Self { auth }\n    }\n}\n\nimpl tonic::service::Interceptor for TenantAuthInterceptor {\n    fn call(&mut self, req: tonic::Request<()>) -> Result<tonic::Request<()>, tonic::Status> {\n        // Do nothing if auth is disabled.\n        let Some(auth) = self.auth.as_ref() else {\n            return Ok(req);\n        };\n\n        // Fetch the tenant ID from the request extensions (set by TenantMetadataInterceptor).\n        let TenantTimelineId { tenant_id, .. } = *extract::<TenantTimelineId>(&req);\n\n        // Fetch and decode the JWT token.\n        let jwt = req\n            .metadata()\n            .get(\"authorization\")\n            .ok_or_else(|| tonic::Status::unauthenticated(\"no authorization header\"))?\n            .to_str()\n            .map_err(|_| tonic::Status::invalid_argument(\"invalid authorization header\"))?\n            .strip_prefix(\"Bearer \")\n            .ok_or_else(|| tonic::Status::invalid_argument(\"invalid authorization header\"))?\n            .trim();\n        let jwtdata: TokenData<Claims> = auth\n            .decode(jwt)\n            .map_err(|err| tonic::Status::invalid_argument(format!(\"invalid JWT token: {err}\")))?;\n        let claims = jwtdata.claims;\n\n        // Check if the token is valid for this tenant.\n        check_permission(&claims, Some(tenant_id))\n            .map_err(|err| tonic::Status::permission_denied(err.to_string()))?;\n\n        // TODO: consider stashing the claims in the request extensions, if needed.\n\n        Ok(req)\n    }\n}\n\n/// Extracts the given type from the request extensions, or panics if it is missing.\nfn extract<T: Send + Sync + 'static>(req: &tonic::Request<impl Any>) -> &T {\n    extract_from(req.extensions())\n}\n\n/// Extract the given type from the request extensions, or panics if it is missing. This variant\n/// can extract both from a tonic::Request and http::Request.\nfn extract_from<T: Send + Sync + 'static>(ext: &http::Extensions) -> &T {\n    let Some(value) = ext.get::<T>() else {\n        let name = std::any::type_name::<T>();\n        panic!(\"extension {name} should be set by middleware\");\n    };\n    value\n}\n\n#[derive(Debug, thiserror::Error)]\npub(crate) enum GetActiveTimelineError {\n    #[error(transparent)]\n    Tenant(GetActiveTenantError),\n    #[error(transparent)]\n    Timeline(#[from] GetTimelineError),\n}\n\nimpl From<GetActiveTimelineError> for QueryError {\n    fn from(e: GetActiveTimelineError) -> Self {\n        match e {\n            GetActiveTimelineError::Tenant(GetActiveTenantError::Cancelled) => QueryError::Shutdown,\n            GetActiveTimelineError::Tenant(e) => e.into(),\n            GetActiveTimelineError::Timeline(e) => QueryError::NotFound(format!(\"{e}\").into()),\n        }\n    }\n}\n\nimpl From<GetActiveTimelineError> for tonic::Status {\n    fn from(err: GetActiveTimelineError) -> Self {\n        let message = err.to_string();\n        let code = match err {\n            GetActiveTimelineError::Tenant(err) => tonic::Status::from(err).code(),\n            GetActiveTimelineError::Timeline(err) => tonic::Status::from(err).code(),\n        };\n        tonic::Status::new(code, message)\n    }\n}\n\nimpl From<GetTimelineError> for tonic::Status {\n    fn from(err: GetTimelineError) -> Self {\n        use tonic::Code;\n        let code = match &err {\n            GetTimelineError::NotFound { .. } => Code::NotFound,\n            GetTimelineError::NotActive { .. } => Code::Unavailable,\n            GetTimelineError::ShuttingDown => Code::Unavailable,\n        };\n        tonic::Status::new(code, err.to_string())\n    }\n}\n\nimpl From<GetActiveTenantError> for QueryError {\n    fn from(e: GetActiveTenantError) -> Self {\n        match e {\n            GetActiveTenantError::WaitForActiveTimeout { .. } => QueryError::Disconnected(\n                ConnectionError::Io(io::Error::new(io::ErrorKind::TimedOut, e.to_string())),\n            ),\n            GetActiveTenantError::Cancelled\n            | GetActiveTenantError::WillNotBecomeActive(TenantState::Stopping { .. }) => {\n                QueryError::Shutdown\n            }\n            e @ GetActiveTenantError::NotFound(_) => QueryError::NotFound(format!(\"{e}\").into()),\n            e => QueryError::Other(anyhow::anyhow!(e)),\n        }\n    }\n}\n\nimpl From<GetActiveTenantError> for tonic::Status {\n    fn from(err: GetActiveTenantError) -> Self {\n        use tonic::Code;\n        let code = match &err {\n            GetActiveTenantError::Broken(_) => Code::Internal,\n            GetActiveTenantError::Cancelled => Code::Unavailable,\n            GetActiveTenantError::NotFound(_) => Code::NotFound,\n            GetActiveTenantError::SwitchedTenant => Code::Unavailable,\n            GetActiveTenantError::WaitForActiveTimeout { .. } => Code::Unavailable,\n            GetActiveTenantError::WillNotBecomeActive(_) => Code::Unavailable,\n        };\n        tonic::Status::new(code, err.to_string())\n    }\n}\n\nimpl From<HandleUpgradeError> for QueryError {\n    fn from(e: HandleUpgradeError) -> Self {\n        match e {\n            HandleUpgradeError::ShutDown => QueryError::Shutdown,\n        }\n    }\n}\n\nimpl From<HandleUpgradeError> for tonic::Status {\n    fn from(err: HandleUpgradeError) -> Self {\n        match err {\n            HandleUpgradeError::ShutDown => tonic::Status::unavailable(\"timeline is shutting down\"),\n        }\n    }\n}\n\nfn set_tracing_field_shard_id(timeline: &Timeline) {\n    debug_assert_current_span_has_tenant_and_timeline_id_no_shard_id();\n    tracing::Span::current().record(\n        \"shard_id\",\n        tracing::field::display(timeline.tenant_shard_id.shard_slug()),\n    );\n    debug_assert_current_span_has_tenant_and_timeline_id();\n}\n\nstruct WaitedForLsn(Lsn);\nimpl From<WaitedForLsn> for Lsn {\n    fn from(WaitedForLsn(lsn): WaitedForLsn) -> Self {\n        lsn\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use utils::shard::ShardCount;\n\n    use super::*;\n\n    #[test]\n    fn pageservice_cmd_parse() {\n        let tenant_id = TenantId::generate();\n        let timeline_id = TimelineId::generate();\n        let cmd =\n            PageServiceCmd::parse(&format!(\"pagestream_v2 {tenant_id} {timeline_id}\")).unwrap();\n        assert_eq!(\n            cmd,\n            PageServiceCmd::PageStream(PageStreamCmd {\n                tenant_id,\n                timeline_id,\n                protocol_version: PagestreamProtocolVersion::V2,\n            })\n        );\n        let cmd = PageServiceCmd::parse(&format!(\"basebackup {tenant_id} {timeline_id}\")).unwrap();\n        assert_eq!(\n            cmd,\n            PageServiceCmd::BaseBackup(BaseBackupCmd {\n                tenant_id,\n                timeline_id,\n                lsn: None,\n                gzip: false,\n                replica: false\n            })\n        );\n        let cmd =\n            PageServiceCmd::parse(&format!(\"basebackup {tenant_id} {timeline_id} --gzip\")).unwrap();\n        assert_eq!(\n            cmd,\n            PageServiceCmd::BaseBackup(BaseBackupCmd {\n                tenant_id,\n                timeline_id,\n                lsn: None,\n                gzip: true,\n                replica: false\n            })\n        );\n        let cmd =\n            PageServiceCmd::parse(&format!(\"basebackup {tenant_id} {timeline_id} latest\")).unwrap();\n        assert_eq!(\n            cmd,\n            PageServiceCmd::BaseBackup(BaseBackupCmd {\n                tenant_id,\n                timeline_id,\n                lsn: None,\n                gzip: false,\n                replica: false\n            })\n        );\n        let cmd = PageServiceCmd::parse(&format!(\"basebackup {tenant_id} {timeline_id} 0/16ABCDE\"))\n            .unwrap();\n        assert_eq!(\n            cmd,\n            PageServiceCmd::BaseBackup(BaseBackupCmd {\n                tenant_id,\n                timeline_id,\n                lsn: Some(Lsn::from_str(\"0/16ABCDE\").unwrap()),\n                gzip: false,\n                replica: false\n            })\n        );\n        let cmd = PageServiceCmd::parse(&format!(\n            \"basebackup {tenant_id} {timeline_id} --replica --gzip\"\n        ))\n        .unwrap();\n        assert_eq!(\n            cmd,\n            PageServiceCmd::BaseBackup(BaseBackupCmd {\n                tenant_id,\n                timeline_id,\n                lsn: None,\n                gzip: true,\n                replica: true\n            })\n        );\n        let cmd = PageServiceCmd::parse(&format!(\n            \"basebackup {tenant_id} {timeline_id} 0/16ABCDE --replica --gzip\"\n        ))\n        .unwrap();\n        assert_eq!(\n            cmd,\n            PageServiceCmd::BaseBackup(BaseBackupCmd {\n                tenant_id,\n                timeline_id,\n                lsn: Some(Lsn::from_str(\"0/16ABCDE\").unwrap()),\n                gzip: true,\n                replica: true\n            })\n        );\n        let cmd = PageServiceCmd::parse(&format!(\"fullbackup {tenant_id} {timeline_id}\")).unwrap();\n        assert_eq!(\n            cmd,\n            PageServiceCmd::FullBackup(FullBackupCmd {\n                tenant_id,\n                timeline_id,\n                lsn: None,\n                prev_lsn: None\n            })\n        );\n        let cmd = PageServiceCmd::parse(&format!(\n            \"fullbackup {tenant_id} {timeline_id} 0/16ABCDE 0/16ABCDF\"\n        ))\n        .unwrap();\n        assert_eq!(\n            cmd,\n            PageServiceCmd::FullBackup(FullBackupCmd {\n                tenant_id,\n                timeline_id,\n                lsn: Some(Lsn::from_str(\"0/16ABCDE\").unwrap()),\n                prev_lsn: Some(Lsn::from_str(\"0/16ABCDF\").unwrap()),\n            })\n        );\n        let tenant_shard_id = TenantShardId::unsharded(tenant_id);\n        let cmd = PageServiceCmd::parse(&format!(\n            \"lease lsn {tenant_shard_id} {timeline_id} 0/16ABCDE\"\n        ))\n        .unwrap();\n        assert_eq!(\n            cmd,\n            PageServiceCmd::LeaseLsn(LeaseLsnCmd {\n                tenant_shard_id,\n                timeline_id,\n                lsn: Lsn::from_str(\"0/16ABCDE\").unwrap(),\n            })\n        );\n        let tenant_shard_id = TenantShardId::split(&tenant_shard_id, ShardCount(8))[1];\n        let cmd = PageServiceCmd::parse(&format!(\n            \"lease lsn {tenant_shard_id} {timeline_id} 0/16ABCDE\"\n        ))\n        .unwrap();\n        assert_eq!(\n            cmd,\n            PageServiceCmd::LeaseLsn(LeaseLsnCmd {\n                tenant_shard_id,\n                timeline_id,\n                lsn: Lsn::from_str(\"0/16ABCDE\").unwrap(),\n            })\n        );\n        let cmd = PageServiceCmd::parse(\"set a = b\").unwrap();\n        assert_eq!(cmd, PageServiceCmd::Set);\n        let cmd = PageServiceCmd::parse(\"SET foo\").unwrap();\n        assert_eq!(cmd, PageServiceCmd::Set);\n    }\n\n    #[test]\n    fn pageservice_cmd_err_handling() {\n        let tenant_id = TenantId::generate();\n        let timeline_id = TimelineId::generate();\n        let cmd = PageServiceCmd::parse(\"unknown_command\");\n        assert!(cmd.is_err());\n        let cmd = PageServiceCmd::parse(\"pagestream_v2\");\n        assert!(cmd.is_err());\n        let cmd = PageServiceCmd::parse(&format!(\"pagestream_v2 {tenant_id}xxx\"));\n        assert!(cmd.is_err());\n        let cmd = PageServiceCmd::parse(&format!(\"pagestream_v2 {tenant_id}xxx {timeline_id}xxx\"));\n        assert!(cmd.is_err());\n        let cmd = PageServiceCmd::parse(&format!(\n            \"basebackup {tenant_id} {timeline_id} --gzip --gzip\"\n        ));\n        assert!(cmd.is_err());\n        let cmd = PageServiceCmd::parse(&format!(\n            \"basebackup {tenant_id} {timeline_id} --gzip --unknown\"\n        ));\n        assert!(cmd.is_err());\n        let cmd = PageServiceCmd::parse(&format!(\n            \"basebackup {tenant_id} {timeline_id} --gzip 0/16ABCDE\"\n        ));\n        assert!(cmd.is_err());\n        let cmd = PageServiceCmd::parse(&format!(\"lease {tenant_id} {timeline_id} gzip 0/16ABCDE\"));\n        assert!(cmd.is_err());\n    }\n\n    #[test]\n    fn test_parse_options() {\n        let (config, has_error) = parse_options(\" -c neon.compute_mode=primary \");\n        assert!(!has_error);\n        assert_eq!(\n            config,\n            vec![(\"neon.compute_mode\".to_string(), \"primary\".to_string())]\n        );\n\n        let (config, has_error) = parse_options(\" -c neon.compute_mode=primary -c foo=bar \");\n        assert!(!has_error);\n        assert_eq!(\n            config,\n            vec![\n                (\"neon.compute_mode\".to_string(), \"primary\".to_string()),\n                (\"foo\".to_string(), \"bar\".to_string()),\n            ]\n        );\n\n        let (config, has_error) = parse_options(\" -c neon.compute_mode=primary -cfoo=bar\");\n        assert!(!has_error);\n        assert_eq!(\n            config,\n            vec![\n                (\"neon.compute_mode\".to_string(), \"primary\".to_string()),\n                (\"foo\".to_string(), \"bar\".to_string()),\n            ]\n        );\n\n        let (_, has_error) = parse_options(\"-c\");\n        assert!(has_error);\n\n        let (_, has_error) = parse_options(\"-c foo=bar -c -c\");\n        assert!(has_error);\n\n        let (_, has_error) = parse_options(\"    \");\n        assert!(!has_error);\n\n        let (_, has_error) = parse_options(\" -c neon.compute_mode\");\n        assert!(has_error);\n    }\n}\n"
  },
  {
    "path": "pageserver/src/pgdatadir_mapping.rs",
    "content": "//!\n//! This provides an abstraction to store PostgreSQL relations and other files\n//! in the key-value store that implements the Repository interface.\n//!\n//! (TODO: The line between PUT-functions here and walingest.rs is a bit blurry, as\n//! walingest.rs handles a few things like implicit relation creation and extension.\n//! Clarify that)\n//!\nuse std::collections::{BTreeSet, HashMap, HashSet, hash_map};\nuse std::ops::{ControlFlow, Range};\nuse std::sync::Arc;\n\nuse crate::walingest::{WalIngestError, WalIngestErrorKind};\nuse crate::{PERF_TRACE_TARGET, ensure_walingest};\nuse anyhow::Context;\nuse bytes::{Buf, Bytes, BytesMut};\nuse enum_map::Enum;\nuse pageserver_api::key::{\n    AUX_FILES_KEY, CHECKPOINT_KEY, CONTROLFILE_KEY, CompactKey, DBDIR_KEY, Key, RelDirExists,\n    TWOPHASEDIR_KEY, dbdir_key_range, rel_block_to_key, rel_dir_to_key, rel_key_range,\n    rel_size_to_key, rel_tag_sparse_key, rel_tag_sparse_key_range, relmap_file_key,\n    repl_origin_key, repl_origin_key_range, slru_block_to_key, slru_dir_to_key,\n    slru_segment_key_range, slru_segment_size_to_key, twophase_file_key, twophase_key_range,\n};\nuse pageserver_api::keyspace::{KeySpaceRandomAccum, SparseKeySpace};\nuse pageserver_api::models::RelSizeMigration;\nuse pageserver_api::reltag::{BlockNumber, RelTag, SlruKind};\nuse pageserver_api::shard::ShardIdentity;\nuse postgres_ffi::{BLCKSZ, PgMajorVersion, TransactionId};\nuse postgres_ffi_types::forknum::{FSM_FORKNUM, VISIBILITYMAP_FORKNUM};\nuse postgres_ffi_types::{Oid, RepOriginId, TimestampTz};\nuse serde::{Deserialize, Serialize};\nuse strum::IntoEnumIterator;\nuse tokio_util::sync::CancellationToken;\nuse tracing::{debug, info, info_span, trace, warn};\nuse utils::bin_ser::{BeSer, DeserializeError};\nuse utils::lsn::Lsn;\nuse utils::pausable_failpoint;\nuse wal_decoder::models::record::NeonWalRecord;\nuse wal_decoder::models::value::Value;\nuse wal_decoder::serialized_batch::{SerializedValueBatch, ValueMeta};\n\nuse super::tenant::{PageReconstructError, Timeline};\nuse crate::aux_file;\nuse crate::context::{PerfInstrumentFutureExt, RequestContext, RequestContextBuilder};\nuse crate::keyspace::{KeySpace, KeySpaceAccum};\nuse crate::metrics::{\n    RELSIZE_CACHE_MISSES_OLD, RELSIZE_LATEST_CACHE_ENTRIES, RELSIZE_LATEST_CACHE_HITS,\n    RELSIZE_LATEST_CACHE_MISSES, RELSIZE_SNAPSHOT_CACHE_ENTRIES, RELSIZE_SNAPSHOT_CACHE_HITS,\n    RELSIZE_SNAPSHOT_CACHE_MISSES,\n};\nuse crate::span::{\n    debug_assert_current_span_has_tenant_and_timeline_id,\n    debug_assert_current_span_has_tenant_and_timeline_id_no_shard_id,\n};\nuse crate::tenant::storage_layer::IoConcurrency;\nuse crate::tenant::timeline::{GetVectoredError, VersionedKeySpaceQuery};\n\n/// Max delta records appended to the AUX_FILES_KEY (for aux v1). The write path will write a full image once this threshold is reached.\npub const MAX_AUX_FILE_DELTAS: usize = 1024;\n\n/// Max number of aux-file-related delta layers. The compaction will create a new image layer once this threshold is reached.\npub const MAX_AUX_FILE_V2_DELTAS: usize = 16;\n\n#[derive(Debug)]\npub enum LsnForTimestamp {\n    /// Found commits both before and after the given timestamp\n    Present(Lsn),\n\n    /// Found no commits after the given timestamp, this means\n    /// that the newest data in the branch is older than the given\n    /// timestamp.\n    ///\n    /// All commits <= LSN happened before the given timestamp\n    Future(Lsn),\n\n    /// The queried timestamp is past our horizon we look back at (PITR)\n    ///\n    /// All commits > LSN happened after the given timestamp,\n    /// but any commits < LSN might have happened before or after\n    /// the given timestamp. We don't know because no data before\n    /// the given lsn is available.\n    Past(Lsn),\n\n    /// We have found no commit with a timestamp,\n    /// so we can't return anything meaningful.\n    ///\n    /// The associated LSN is the lower bound value we can safely\n    /// create branches on, but no statement is made if it is\n    /// older or newer than the timestamp.\n    ///\n    /// This variant can e.g. be returned right after a\n    /// cluster import.\n    NoData(Lsn),\n}\n\n/// Each request to page server contains LSN range: `not_modified_since..request_lsn`.\n/// See comments libs/pageserver_api/src/models.rs.\n/// Based on this range and `last_record_lsn` PS calculates `effective_lsn`.\n/// But to distinguish requests from primary and replicas we need also to pass `request_lsn`.\n#[derive(Debug, Clone, Copy, Default)]\npub struct LsnRange {\n    pub effective_lsn: Lsn,\n    pub request_lsn: Lsn,\n}\n\nimpl LsnRange {\n    pub fn at(lsn: Lsn) -> LsnRange {\n        LsnRange {\n            effective_lsn: lsn,\n            request_lsn: lsn,\n        }\n    }\n    pub fn is_latest(&self) -> bool {\n        self.request_lsn == Lsn::MAX\n    }\n}\n\n#[derive(Debug, thiserror::Error)]\npub(crate) enum CalculateLogicalSizeError {\n    #[error(\"cancelled\")]\n    Cancelled,\n\n    /// Something went wrong while reading the metadata we use to calculate logical size\n    /// Note that cancellation variants of `PageReconstructError` are transformed to [`Self::Cancelled`]\n    /// in the `From` implementation for this variant.\n    #[error(transparent)]\n    PageRead(PageReconstructError),\n\n    /// Something went wrong deserializing metadata that we read to calculate logical size\n    #[error(\"decode error: {0}\")]\n    Decode(#[from] DeserializeError),\n}\n\n#[derive(Debug, thiserror::Error)]\npub(crate) enum CollectKeySpaceError {\n    #[error(transparent)]\n    Decode(#[from] DeserializeError),\n    #[error(transparent)]\n    PageRead(PageReconstructError),\n    #[error(\"cancelled\")]\n    Cancelled,\n}\n\nimpl CollectKeySpaceError {\n    pub(crate) fn is_cancel(&self) -> bool {\n        match self {\n            CollectKeySpaceError::Decode(_) => false,\n            CollectKeySpaceError::PageRead(e) => e.is_cancel(),\n            CollectKeySpaceError::Cancelled => true,\n        }\n    }\n    pub(crate) fn into_anyhow(self) -> anyhow::Error {\n        match self {\n            CollectKeySpaceError::Decode(e) => anyhow::Error::new(e),\n            CollectKeySpaceError::PageRead(e) => anyhow::Error::new(e),\n            CollectKeySpaceError::Cancelled => anyhow::Error::new(self),\n        }\n    }\n}\n\nimpl From<PageReconstructError> for CollectKeySpaceError {\n    fn from(err: PageReconstructError) -> Self {\n        match err {\n            PageReconstructError::Cancelled => Self::Cancelled,\n            err => Self::PageRead(err),\n        }\n    }\n}\n\nimpl From<PageReconstructError> for CalculateLogicalSizeError {\n    fn from(pre: PageReconstructError) -> Self {\n        match pre {\n            PageReconstructError::Cancelled => Self::Cancelled,\n            _ => Self::PageRead(pre),\n        }\n    }\n}\n\n#[derive(Debug, thiserror::Error)]\npub enum RelationError {\n    #[error(\"invalid relnode\")]\n    InvalidRelnode,\n}\n\n///\n/// This impl provides all the functionality to store PostgreSQL relations, SLRUs,\n/// and other special kinds of files, in a versioned key-value store. The\n/// Timeline struct provides the key-value store.\n///\n/// This is a separate impl, so that we can easily include all these functions in a Timeline\n/// implementation, and might be moved into a separate struct later.\nimpl Timeline {\n    /// Start ingesting a WAL record, or other atomic modification of\n    /// the timeline.\n    ///\n    /// This provides a transaction-like interface to perform a bunch\n    /// of modifications atomically.\n    ///\n    /// To ingest a WAL record, call begin_modification(lsn) to get a\n    /// DatadirModification object. Use the functions in the object to\n    /// modify the repository state, updating all the pages and metadata\n    /// that the WAL record affects. When you're done, call commit() to\n    /// commit the changes.\n    ///\n    /// Lsn stored in modification is advanced by `ingest_record` and\n    /// is used by `commit()` to update `last_record_lsn`.\n    ///\n    /// Calling commit() will flush all the changes and reset the state,\n    /// so the `DatadirModification` struct can be reused to perform the next modification.\n    ///\n    /// Note that any pending modifications you make through the\n    /// modification object won't be visible to calls to the 'get' and list\n    /// functions of the timeline until you finish! And if you update the\n    /// same page twice, the last update wins.\n    ///\n    pub fn begin_modification(&self, lsn: Lsn) -> DatadirModification\n    where\n        Self: Sized,\n    {\n        DatadirModification {\n            tline: self,\n            pending_lsns: Vec::new(),\n            pending_metadata_pages: HashMap::new(),\n            pending_data_batch: None,\n            pending_deletions: Vec::new(),\n            pending_nblocks: 0,\n            pending_directory_entries: Vec::new(),\n            pending_metadata_bytes: 0,\n            is_importing_pgdata: false,\n            lsn,\n        }\n    }\n\n    pub fn begin_modification_for_import(&self, lsn: Lsn) -> DatadirModification\n    where\n        Self: Sized,\n    {\n        DatadirModification {\n            tline: self,\n            pending_lsns: Vec::new(),\n            pending_metadata_pages: HashMap::new(),\n            pending_data_batch: None,\n            pending_deletions: Vec::new(),\n            pending_nblocks: 0,\n            pending_directory_entries: Vec::new(),\n            pending_metadata_bytes: 0,\n            is_importing_pgdata: true,\n            lsn,\n        }\n    }\n\n    //------------------------------------------------------------------------------\n    // Public GET functions\n    //------------------------------------------------------------------------------\n\n    /// Look up given page version.\n    pub(crate) async fn get_rel_page_at_lsn(\n        &self,\n        tag: RelTag,\n        blknum: BlockNumber,\n        version: Version<'_>,\n        ctx: &RequestContext,\n        io_concurrency: IoConcurrency,\n    ) -> Result<Bytes, PageReconstructError> {\n        match version {\n            Version::LsnRange(lsns) => {\n                let pages: smallvec::SmallVec<[_; 1]> = smallvec::smallvec![(tag, blknum)];\n                let res = self\n                    .get_rel_page_at_lsn_batched(\n                        pages\n                            .iter()\n                            .map(|(tag, blknum)| (tag, blknum, lsns, ctx.attached_child())),\n                        io_concurrency.clone(),\n                        ctx,\n                    )\n                    .await;\n                assert_eq!(res.len(), 1);\n                res.into_iter().next().unwrap()\n            }\n            Version::Modified(modification) => {\n                if tag.relnode == 0 {\n                    return Err(PageReconstructError::Other(\n                        RelationError::InvalidRelnode.into(),\n                    ));\n                }\n\n                let nblocks = self.get_rel_size(tag, version, ctx).await?;\n                if blknum >= nblocks {\n                    debug!(\n                        \"read beyond EOF at {} blk {} at {}, size is {}: returning all-zeros page\",\n                        tag,\n                        blknum,\n                        version.get_lsn(),\n                        nblocks\n                    );\n                    return Ok(ZERO_PAGE.clone());\n                }\n\n                let key = rel_block_to_key(tag, blknum);\n                modification.get(key, ctx).await\n            }\n        }\n    }\n\n    /// Like [`Self::get_rel_page_at_lsn`], but returns a batch of pages.\n    ///\n    /// The ordering of the returned vec corresponds to the ordering of `pages`.\n    ///\n    /// NB: the read path must be cancellation-safe. The Tonic gRPC service will drop the future\n    /// if the client goes away (e.g. due to timeout or cancellation).\n    /// TODO: verify that it actually is cancellation-safe.\n    pub(crate) async fn get_rel_page_at_lsn_batched(\n        &self,\n        pages: impl ExactSizeIterator<Item = (&RelTag, &BlockNumber, LsnRange, RequestContext)>,\n        io_concurrency: IoConcurrency,\n        ctx: &RequestContext,\n    ) -> Vec<Result<Bytes, PageReconstructError>> {\n        debug_assert_current_span_has_tenant_and_timeline_id();\n\n        let mut slots_filled = 0;\n        let page_count = pages.len();\n\n        // Would be nice to use smallvec here but it doesn't provide the spare_capacity_mut() API.\n        let mut result = Vec::with_capacity(pages.len());\n        let result_slots = result.spare_capacity_mut();\n\n        let mut keys_slots: HashMap<Key, smallvec::SmallVec<[(usize, RequestContext); 1]>> =\n            HashMap::with_capacity(pages.len());\n\n        let mut req_keyspaces: HashMap<Lsn, KeySpaceRandomAccum> =\n            HashMap::with_capacity(pages.len());\n\n        for (response_slot_idx, (tag, blknum, lsns, ctx)) in pages.enumerate() {\n            if tag.relnode == 0 {\n                result_slots[response_slot_idx].write(Err(PageReconstructError::Other(\n                    RelationError::InvalidRelnode.into(),\n                )));\n\n                slots_filled += 1;\n                continue;\n            }\n            let lsn = lsns.effective_lsn;\n            let nblocks = {\n                let ctx = RequestContextBuilder::from(&ctx)\n                    .perf_span(|crnt_perf_span| {\n                        info_span!(\n                            target: PERF_TRACE_TARGET,\n                            parent: crnt_perf_span,\n                            \"GET_REL_SIZE\",\n                            reltag=%tag,\n                            lsn=%lsn,\n                        )\n                    })\n                    .attached_child();\n\n                match self\n                    .get_rel_size(*tag, Version::LsnRange(lsns), &ctx)\n                    .maybe_perf_instrument(&ctx, |crnt_perf_span| crnt_perf_span.clone())\n                    .await\n                {\n                    Ok(nblocks) => nblocks,\n                    Err(err) => {\n                        result_slots[response_slot_idx].write(Err(err));\n                        slots_filled += 1;\n                        continue;\n                    }\n                }\n            };\n\n            if *blknum >= nblocks {\n                debug!(\n                    \"read beyond EOF at {} blk {} at {}, size is {}: returning all-zeros page\",\n                    tag, blknum, lsn, nblocks\n                );\n                result_slots[response_slot_idx].write(Ok(ZERO_PAGE.clone()));\n                slots_filled += 1;\n                continue;\n            }\n\n            let key = rel_block_to_key(*tag, *blknum);\n\n            let ctx = RequestContextBuilder::from(&ctx)\n                .perf_span(|crnt_perf_span| {\n                    info_span!(\n                        target: PERF_TRACE_TARGET,\n                        parent: crnt_perf_span,\n                        \"GET_BATCH\",\n                        batch_size = %page_count,\n                    )\n                })\n                .attached_child();\n\n            let key_slots = keys_slots.entry(key).or_default();\n            key_slots.push((response_slot_idx, ctx));\n\n            let acc = req_keyspaces.entry(lsn).or_default();\n            acc.add_key(key);\n        }\n\n        let query: Vec<(Lsn, KeySpace)> = req_keyspaces\n            .into_iter()\n            .map(|(lsn, acc)| (lsn, acc.to_keyspace()))\n            .collect();\n\n        let query = VersionedKeySpaceQuery::scattered(query);\n        let res = self\n            .get_vectored(query, io_concurrency, ctx)\n            .maybe_perf_instrument(ctx, |current_perf_span| current_perf_span.clone())\n            .await;\n\n        match res {\n            Ok(results) => {\n                for (key, res) in results {\n                    let mut key_slots = keys_slots.remove(&key).unwrap().into_iter();\n                    let (first_slot, first_req_ctx) = key_slots.next().unwrap();\n\n                    for (slot, req_ctx) in key_slots {\n                        let clone = match &res {\n                            Ok(buf) => Ok(buf.clone()),\n                            Err(err) => Err(match err {\n                                PageReconstructError::Cancelled => PageReconstructError::Cancelled,\n\n                                x @ PageReconstructError::Other(_)\n                                | x @ PageReconstructError::AncestorLsnTimeout(_)\n                                | x @ PageReconstructError::WalRedo(_)\n                                | x @ PageReconstructError::MissingKey(_) => {\n                                    PageReconstructError::Other(anyhow::anyhow!(\n                                        \"there was more than one request for this key in the batch, error logged once: {x:?}\"\n                                    ))\n                                }\n                            }),\n                        };\n\n                        result_slots[slot].write(clone);\n                        // There is no standardized way to express that the batched span followed from N request spans.\n                        // So, abuse the system and mark the request contexts as follows_from the batch span, so we get\n                        // some linkage in our trace viewer. It allows us to answer: which GET_VECTORED did this GET_PAGE wait for.\n                        req_ctx.perf_follows_from(ctx);\n                        slots_filled += 1;\n                    }\n\n                    result_slots[first_slot].write(res);\n                    first_req_ctx.perf_follows_from(ctx);\n                    slots_filled += 1;\n                }\n            }\n            Err(err) => {\n                // this cannot really happen because get_vectored only errors globally on invalid LSN or too large batch size\n                // (We enforce the max batch size outside of this function, in the code that constructs the batch request.)\n                for (slot, req_ctx) in keys_slots.values().flatten() {\n                    // this whole `match` is a lot like `From<GetVectoredError> for PageReconstructError`\n                    // but without taking ownership of the GetVectoredError\n                    let err = match &err {\n                        GetVectoredError::Cancelled => Err(PageReconstructError::Cancelled),\n                        // TODO: restructure get_vectored API to make this error per-key\n                        GetVectoredError::MissingKey(err) => {\n                            Err(PageReconstructError::Other(anyhow::anyhow!(\n                                \"whole vectored get request failed because one or more of the requested keys were missing: {err:?}\"\n                            )))\n                        }\n                        // TODO: restructure get_vectored API to make this error per-key\n                        GetVectoredError::GetReadyAncestorError(err) => {\n                            Err(PageReconstructError::Other(anyhow::anyhow!(\n                                \"whole vectored get request failed because one or more key required ancestor that wasn't ready: {err:?}\"\n                            )))\n                        }\n                        // TODO: restructure get_vectored API to make this error per-key\n                        GetVectoredError::Other(err) => Err(PageReconstructError::Other(\n                            anyhow::anyhow!(\"whole vectored get request failed: {err:?}\"),\n                        )),\n                        // TODO: we can prevent this error class by moving this check into the type system\n                        GetVectoredError::InvalidLsn(e) => {\n                            Err(anyhow::anyhow!(\"invalid LSN: {e:?}\").into())\n                        }\n                        // NB: this should never happen in practice because we limit batch size to be smaller than max_get_vectored_keys\n                        // TODO: we can prevent this error class by moving this check into the type system\n                        GetVectoredError::Oversized(err, max) => {\n                            Err(anyhow::anyhow!(\"batching oversized: {err} > {max}\").into())\n                        }\n                    };\n\n                    req_ctx.perf_follows_from(ctx);\n                    result_slots[*slot].write(err);\n                }\n\n                slots_filled += keys_slots.values().map(|slots| slots.len()).sum::<usize>();\n            }\n        };\n\n        assert_eq!(slots_filled, page_count);\n        // SAFETY:\n        // 1. `result` and any of its uninint members are not read from until this point\n        // 2. The length below is tracked at run-time and matches the number of requested pages.\n        unsafe {\n            result.set_len(page_count);\n        }\n\n        result\n    }\n\n    /// Get size of a database in blocks. This is only accurate on shard 0. It will undercount on\n    /// other shards, by only accounting for relations the shard has pages for, and only accounting\n    /// for pages up to the highest page number it has stored.\n    pub(crate) async fn get_db_size(\n        &self,\n        spcnode: Oid,\n        dbnode: Oid,\n        version: Version<'_>,\n        ctx: &RequestContext,\n    ) -> Result<usize, PageReconstructError> {\n        let mut total_blocks = 0;\n\n        let rels = self.list_rels(spcnode, dbnode, version, ctx).await?;\n\n        if rels.is_empty() {\n            return Ok(0);\n        }\n\n        // Pre-deserialize the rel directory to avoid duplicated work in `get_relsize_cached`.\n        let reldir_key = rel_dir_to_key(spcnode, dbnode);\n        let buf = version.get(self, reldir_key, ctx).await?;\n        let reldir = RelDirectory::des(&buf)?;\n\n        for rel in rels {\n            let n_blocks = self\n                .get_rel_size_in_reldir(rel, version, Some((reldir_key, &reldir)), false, ctx)\n                .await?\n                .expect(\"allow_missing=false\");\n            total_blocks += n_blocks as usize;\n        }\n        Ok(total_blocks)\n    }\n\n    /// Get size of a relation file. The relation must exist, otherwise an error is returned.\n    ///\n    /// This is only accurate on shard 0. On other shards, it will return the size up to the highest\n    /// page number stored in the shard.\n    pub(crate) async fn get_rel_size(\n        &self,\n        tag: RelTag,\n        version: Version<'_>,\n        ctx: &RequestContext,\n    ) -> Result<BlockNumber, PageReconstructError> {\n        Ok(self\n            .get_rel_size_in_reldir(tag, version, None, false, ctx)\n            .await?\n            .expect(\"allow_missing=false\"))\n    }\n\n    /// Get size of a relation file. If `allow_missing` is true, returns None for missing relations,\n    /// otherwise errors.\n    ///\n    /// INVARIANT: never returns None if `allow_missing=false`.\n    ///\n    /// See [`Self::get_rel_exists_in_reldir`] on why we need `deserialized_reldir_v1`.\n    pub(crate) async fn get_rel_size_in_reldir(\n        &self,\n        tag: RelTag,\n        version: Version<'_>,\n        deserialized_reldir_v1: Option<(Key, &RelDirectory)>,\n        allow_missing: bool,\n        ctx: &RequestContext,\n    ) -> Result<Option<BlockNumber>, PageReconstructError> {\n        if tag.relnode == 0 {\n            return Err(PageReconstructError::Other(\n                RelationError::InvalidRelnode.into(),\n            ));\n        }\n\n        if let Some(nblocks) = self.get_cached_rel_size(&tag, version) {\n            return Ok(Some(nblocks));\n        }\n\n        if allow_missing\n            && !self\n                .get_rel_exists_in_reldir(tag, version, deserialized_reldir_v1, ctx)\n                .await?\n        {\n            return Ok(None);\n        }\n\n        if (tag.forknum == FSM_FORKNUM || tag.forknum == VISIBILITYMAP_FORKNUM)\n            && !self\n                .get_rel_exists_in_reldir(tag, version, deserialized_reldir_v1, ctx)\n                .await?\n        {\n            // FIXME: Postgres sometimes calls smgrcreate() to create\n            // FSM, and smgrnblocks() on it immediately afterwards,\n            // without extending it.  Tolerate that by claiming that\n            // any non-existent FSM fork has size 0.\n            return Ok(Some(0));\n        }\n\n        let key = rel_size_to_key(tag);\n        let mut buf = version.get(self, key, ctx).await?;\n        let nblocks = buf.get_u32_le();\n\n        self.update_cached_rel_size(tag, version, nblocks);\n\n        Ok(Some(nblocks))\n    }\n\n    /// Does the relation exist?\n    ///\n    /// Only shard 0 has a full view of the relations. Other shards only know about relations that\n    /// the shard stores pages for.\n    ///\n    pub(crate) async fn get_rel_exists(\n        &self,\n        tag: RelTag,\n        version: Version<'_>,\n        ctx: &RequestContext,\n    ) -> Result<bool, PageReconstructError> {\n        self.get_rel_exists_in_reldir(tag, version, None, ctx).await\n    }\n\n    async fn get_rel_exists_in_reldir_v1(\n        &self,\n        tag: RelTag,\n        version: Version<'_>,\n        deserialized_reldir_v1: Option<(Key, &RelDirectory)>,\n        ctx: &RequestContext,\n    ) -> Result<bool, PageReconstructError> {\n        let key = rel_dir_to_key(tag.spcnode, tag.dbnode);\n        if let Some((cached_key, dir)) = deserialized_reldir_v1 {\n            if cached_key == key {\n                return Ok(dir.rels.contains(&(tag.relnode, tag.forknum)));\n            } else if cfg!(test) || cfg!(feature = \"testing\") {\n                panic!(\"cached reldir key mismatch: {cached_key} != {key}\");\n            } else {\n                warn!(\"cached reldir key mismatch: {cached_key} != {key}\");\n            }\n            // Fallback to reading the directory from the datadir.\n        }\n\n        let buf = version.get(self, key, ctx).await?;\n\n        let dir = RelDirectory::des(&buf)?;\n        Ok(dir.rels.contains(&(tag.relnode, tag.forknum)))\n    }\n\n    async fn get_rel_exists_in_reldir_v2(\n        &self,\n        tag: RelTag,\n        version: Version<'_>,\n        ctx: &RequestContext,\n    ) -> Result<bool, PageReconstructError> {\n        let key = rel_tag_sparse_key(tag.spcnode, tag.dbnode, tag.relnode, tag.forknum);\n        let buf = RelDirExists::decode_option(version.sparse_get(self, key, ctx).await?).map_err(\n            |_| {\n                PageReconstructError::Other(anyhow::anyhow!(\n                    \"invalid reldir key: decode failed, {}\",\n                    key\n                ))\n            },\n        )?;\n        let exists_v2 = buf == RelDirExists::Exists;\n        Ok(exists_v2)\n    }\n\n    /// Does the relation exist? With a cached deserialized `RelDirectory`.\n    ///\n    /// There are some cases where the caller loops across all relations. In that specific case,\n    /// the caller should obtain the deserialized `RelDirectory` first and then call this function\n    /// to avoid duplicated work of deserliazation. This is a hack and should be removed by introducing\n    /// a new API (e.g., `get_rel_exists_batched`).\n    pub(crate) async fn get_rel_exists_in_reldir(\n        &self,\n        tag: RelTag,\n        version: Version<'_>,\n        deserialized_reldir_v1: Option<(Key, &RelDirectory)>,\n        ctx: &RequestContext,\n    ) -> Result<bool, PageReconstructError> {\n        if tag.relnode == 0 {\n            return Err(PageReconstructError::Other(\n                RelationError::InvalidRelnode.into(),\n            ));\n        }\n\n        // first try to lookup relation in cache\n        if let Some(_nblocks) = self.get_cached_rel_size(&tag, version) {\n            return Ok(true);\n        }\n        // then check if the database was already initialized.\n        // get_rel_exists can be called before dbdir is created.\n        let buf = version.get(self, DBDIR_KEY, ctx).await?;\n        let dbdirs = DbDirectory::des(&buf)?.dbdirs;\n        if !dbdirs.contains_key(&(tag.spcnode, tag.dbnode)) {\n            return Ok(false);\n        }\n\n        let (v2_status, migrated_lsn) = self.get_rel_size_v2_status();\n\n        match v2_status {\n            RelSizeMigration::Legacy => {\n                let v1_exists = self\n                    .get_rel_exists_in_reldir_v1(tag, version, deserialized_reldir_v1, ctx)\n                    .await?;\n                Ok(v1_exists)\n            }\n            RelSizeMigration::Migrating | RelSizeMigration::Migrated\n                if version.get_lsn() < migrated_lsn.unwrap_or(Lsn(0)) =>\n            {\n                // For requests below the migrated LSN, we still use the v1 read path.\n                let v1_exists = self\n                    .get_rel_exists_in_reldir_v1(tag, version, deserialized_reldir_v1, ctx)\n                    .await?;\n                Ok(v1_exists)\n            }\n            RelSizeMigration::Migrating => {\n                let v1_exists = self\n                    .get_rel_exists_in_reldir_v1(tag, version, deserialized_reldir_v1, ctx)\n                    .await?;\n                let v2_exists_res = self.get_rel_exists_in_reldir_v2(tag, version, ctx).await;\n                match v2_exists_res {\n                    Ok(v2_exists) if v1_exists == v2_exists => {}\n                    Ok(v2_exists) => {\n                        tracing::warn!(\n                            \"inconsistent v1/v2 reldir keyspace for rel {}: v1_exists={}, v2_exists={}\",\n                            tag,\n                            v1_exists,\n                            v2_exists\n                        );\n                    }\n                    Err(e) => {\n                        tracing::warn!(\"failed to get rel exists in v2: {e}\");\n                    }\n                }\n                Ok(v1_exists)\n            }\n            RelSizeMigration::Migrated => {\n                let v2_exists = self.get_rel_exists_in_reldir_v2(tag, version, ctx).await?;\n                Ok(v2_exists)\n            }\n        }\n    }\n\n    async fn list_rels_v1(\n        &self,\n        spcnode: Oid,\n        dbnode: Oid,\n        version: Version<'_>,\n        ctx: &RequestContext,\n    ) -> Result<HashSet<RelTag>, PageReconstructError> {\n        let key = rel_dir_to_key(spcnode, dbnode);\n        let buf = version.get(self, key, ctx).await?;\n        let dir = RelDirectory::des(&buf)?;\n        let rels_v1: HashSet<RelTag> =\n            HashSet::from_iter(dir.rels.iter().map(|(relnode, forknum)| RelTag {\n                spcnode,\n                dbnode,\n                relnode: *relnode,\n                forknum: *forknum,\n            }));\n        Ok(rels_v1)\n    }\n\n    async fn list_rels_v2(\n        &self,\n        spcnode: Oid,\n        dbnode: Oid,\n        version: Version<'_>,\n        ctx: &RequestContext,\n    ) -> Result<HashSet<RelTag>, PageReconstructError> {\n        let key_range = rel_tag_sparse_key_range(spcnode, dbnode);\n        let io_concurrency = IoConcurrency::spawn_from_conf(\n            self.conf.get_vectored_concurrent_io,\n            self.gate\n                .enter()\n                .map_err(|_| PageReconstructError::Cancelled)?,\n        );\n        let results = self\n            .scan(\n                KeySpace::single(key_range),\n                version.get_lsn(),\n                ctx,\n                io_concurrency,\n            )\n            .await?;\n        let mut rels = HashSet::new();\n        for (key, val) in results {\n            let val = RelDirExists::decode(&val?).map_err(|_| {\n                PageReconstructError::Other(anyhow::anyhow!(\n                    \"invalid reldir key: decode failed, {}\",\n                    key\n                ))\n            })?;\n            if key.field6 != 1 {\n                return Err(PageReconstructError::Other(anyhow::anyhow!(\n                    \"invalid reldir key: field6 != 1, {}\",\n                    key\n                )));\n            }\n            if key.field2 != spcnode {\n                return Err(PageReconstructError::Other(anyhow::anyhow!(\n                    \"invalid reldir key: field2 != spcnode, {}\",\n                    key\n                )));\n            }\n            if key.field3 != dbnode {\n                return Err(PageReconstructError::Other(anyhow::anyhow!(\n                    \"invalid reldir key: field3 != dbnode, {}\",\n                    key\n                )));\n            }\n            let tag = RelTag {\n                spcnode,\n                dbnode,\n                relnode: key.field4,\n                forknum: key.field5,\n            };\n            if val == RelDirExists::Removed {\n                debug_assert!(!rels.contains(&tag), \"removed reltag in v2\");\n                continue;\n            }\n            let did_not_contain = rels.insert(tag);\n            debug_assert!(did_not_contain, \"duplicate reltag in v2\");\n        }\n        Ok(rels)\n    }\n\n    /// Get a list of all existing relations in given tablespace and database.\n    ///\n    /// Only shard 0 has a full view of the relations. Other shards only know about relations that\n    /// the shard stores pages for.\n    ///\n    /// # Cancel-Safety\n    ///\n    /// This method is cancellation-safe.\n    pub(crate) async fn list_rels(\n        &self,\n        spcnode: Oid,\n        dbnode: Oid,\n        version: Version<'_>,\n        ctx: &RequestContext,\n    ) -> Result<HashSet<RelTag>, PageReconstructError> {\n        let (v2_status, migrated_lsn) = self.get_rel_size_v2_status();\n\n        match v2_status {\n            RelSizeMigration::Legacy => {\n                let rels_v1 = self.list_rels_v1(spcnode, dbnode, version, ctx).await?;\n                Ok(rels_v1)\n            }\n            RelSizeMigration::Migrating | RelSizeMigration::Migrated\n                if version.get_lsn() < migrated_lsn.unwrap_or(Lsn(0)) =>\n            {\n                // For requests below the migrated LSN, we still use the v1 read path.\n                let rels_v1 = self.list_rels_v1(spcnode, dbnode, version, ctx).await?;\n                Ok(rels_v1)\n            }\n            RelSizeMigration::Migrating => {\n                let rels_v1 = self.list_rels_v1(spcnode, dbnode, version, ctx).await?;\n                let rels_v2_res = self.list_rels_v2(spcnode, dbnode, version, ctx).await;\n                match rels_v2_res {\n                    Ok(rels_v2) if rels_v1 == rels_v2 => {}\n                    Ok(rels_v2) => {\n                        tracing::warn!(\n                            \"inconsistent v1/v2 reldir keyspace for db {} {}: v1_rels.len()={}, v2_rels.len()={}\",\n                            spcnode,\n                            dbnode,\n                            rels_v1.len(),\n                            rels_v2.len()\n                        );\n                    }\n                    Err(e) => {\n                        tracing::warn!(\"failed to list rels in v2: {e}\");\n                    }\n                }\n                Ok(rels_v1)\n            }\n            RelSizeMigration::Migrated => {\n                let rels_v2 = self.list_rels_v2(spcnode, dbnode, version, ctx).await?;\n                Ok(rels_v2)\n            }\n        }\n    }\n\n    /// Get the whole SLRU segment\n    pub(crate) async fn get_slru_segment(\n        &self,\n        kind: SlruKind,\n        segno: u32,\n        lsn: Lsn,\n        ctx: &RequestContext,\n    ) -> Result<Bytes, PageReconstructError> {\n        assert!(self.tenant_shard_id.is_shard_zero());\n        let n_blocks = self\n            .get_slru_segment_size(kind, segno, Version::at(lsn), ctx)\n            .await?;\n\n        let keyspace = KeySpace::single(\n            slru_block_to_key(kind, segno, 0)..slru_block_to_key(kind, segno, n_blocks),\n        );\n\n        let batches = keyspace.partition(\n            self.get_shard_identity(),\n            self.conf.max_get_vectored_keys.get() as u64 * BLCKSZ as u64,\n            BLCKSZ as u64,\n        );\n\n        let io_concurrency = IoConcurrency::spawn_from_conf(\n            self.conf.get_vectored_concurrent_io,\n            self.gate\n                .enter()\n                .map_err(|_| PageReconstructError::Cancelled)?,\n        );\n\n        let mut segment = BytesMut::with_capacity(n_blocks as usize * BLCKSZ as usize);\n        for batch in batches.parts {\n            let query = VersionedKeySpaceQuery::uniform(batch, lsn);\n            let blocks = self\n                .get_vectored(query, io_concurrency.clone(), ctx)\n                .await?;\n\n            for (_key, block) in blocks {\n                let block = block?;\n                segment.extend_from_slice(&block[..BLCKSZ as usize]);\n            }\n        }\n\n        Ok(segment.freeze())\n    }\n\n    /// Get size of an SLRU segment\n    pub(crate) async fn get_slru_segment_size(\n        &self,\n        kind: SlruKind,\n        segno: u32,\n        version: Version<'_>,\n        ctx: &RequestContext,\n    ) -> Result<BlockNumber, PageReconstructError> {\n        assert!(self.tenant_shard_id.is_shard_zero());\n        let key = slru_segment_size_to_key(kind, segno);\n        let mut buf = version.get(self, key, ctx).await?;\n        Ok(buf.get_u32_le())\n    }\n\n    /// Does the slru segment exist?\n    pub(crate) async fn get_slru_segment_exists(\n        &self,\n        kind: SlruKind,\n        segno: u32,\n        version: Version<'_>,\n        ctx: &RequestContext,\n    ) -> Result<bool, PageReconstructError> {\n        assert!(self.tenant_shard_id.is_shard_zero());\n        // fetch directory listing\n        let key = slru_dir_to_key(kind);\n        let buf = version.get(self, key, ctx).await?;\n\n        let dir = SlruSegmentDirectory::des(&buf)?;\n        Ok(dir.segments.contains(&segno))\n    }\n\n    /// Locate LSN, such that all transactions that committed before\n    /// 'search_timestamp' are visible, but nothing newer is.\n    ///\n    /// This is not exact. Commit timestamps are not guaranteed to be ordered,\n    /// so it's not well defined which LSN you get if there were multiple commits\n    /// \"in flight\" at that point in time.\n    ///\n    pub(crate) async fn find_lsn_for_timestamp(\n        &self,\n        search_timestamp: TimestampTz,\n        cancel: &CancellationToken,\n        ctx: &RequestContext,\n    ) -> Result<LsnForTimestamp, PageReconstructError> {\n        pausable_failpoint!(\"find-lsn-for-timestamp-pausable\");\n\n        let gc_cutoff_lsn_guard = self.get_applied_gc_cutoff_lsn();\n        let gc_cutoff_planned = {\n            let gc_info = self.gc_info.read().unwrap();\n            info!(cutoffs=?gc_info.cutoffs, applied_cutoff=%*gc_cutoff_lsn_guard, \"starting find_lsn_for_timestamp\");\n            gc_info.min_cutoff()\n        };\n        // Usually the planned cutoff is newer than the cutoff of the last gc run,\n        // but let's be defensive.\n        let gc_cutoff = gc_cutoff_planned.max(*gc_cutoff_lsn_guard);\n        // We use this method to figure out the branching LSN for the new branch, but the\n        // GC cutoff could be before the branching point and we cannot create a new branch\n        // with LSN < `ancestor_lsn`. Thus, pick the maximum of these two to be\n        // on the safe side.\n        let min_lsn = std::cmp::max(gc_cutoff, self.get_ancestor_lsn());\n        let max_lsn = self.get_last_record_lsn();\n\n        // LSNs are always 8-byte aligned. low/mid/high represent the\n        // LSN divided by 8.\n        let mut low = min_lsn.0 / 8;\n        let mut high = max_lsn.0 / 8 + 1;\n\n        let mut found_smaller = false;\n        let mut found_larger = false;\n\n        while low < high {\n            if cancel.is_cancelled() {\n                return Err(PageReconstructError::Cancelled);\n            }\n            // cannot overflow, high and low are both smaller than u64::MAX / 2\n            let mid = (high + low) / 2;\n\n            let cmp = match self\n                .is_latest_commit_timestamp_ge_than(\n                    search_timestamp,\n                    Lsn(mid * 8),\n                    &mut found_smaller,\n                    &mut found_larger,\n                    ctx,\n                )\n                .await\n            {\n                Ok(res) => res,\n                Err(PageReconstructError::MissingKey(e)) => {\n                    warn!(\n                        \"Missing key while find_lsn_for_timestamp. Either we might have already garbage-collected that data or the key is really missing. Last error: {:#}\",\n                        e\n                    );\n                    // Return that we didn't find any requests smaller than the LSN, and logging the error.\n                    return Ok(LsnForTimestamp::Past(min_lsn));\n                }\n                Err(e) => return Err(e),\n            };\n\n            if cmp {\n                high = mid;\n            } else {\n                low = mid + 1;\n            }\n        }\n\n        // If `found_smaller == true`, `low = t + 1` where `t` is the target LSN,\n        // so the LSN of the last commit record before or at `search_timestamp`.\n        // Remove one from `low` to get `t`.\n        //\n        // FIXME: it would be better to get the LSN of the previous commit.\n        // Otherwise, if you restore to the returned LSN, the database will\n        // include physical changes from later commits that will be marked\n        // as aborted, and will need to be vacuumed away.\n        let commit_lsn = Lsn((low - 1) * 8);\n        match (found_smaller, found_larger) {\n            (false, false) => {\n                // This can happen if no commit records have been processed yet, e.g.\n                // just after importing a cluster.\n                Ok(LsnForTimestamp::NoData(min_lsn))\n            }\n            (false, true) => {\n                // Didn't find any commit timestamps smaller than the request\n                Ok(LsnForTimestamp::Past(min_lsn))\n            }\n            (true, _) if commit_lsn < min_lsn => {\n                // the search above did set found_smaller to true but it never increased the lsn.\n                // Then, low is still the old min_lsn, and the subtraction above gave a value\n                // below the min_lsn. We should never do that.\n                Ok(LsnForTimestamp::Past(min_lsn))\n            }\n            (true, false) => {\n                // Only found commits with timestamps smaller than the request.\n                // It's still a valid case for branch creation, return it.\n                // And `update_gc_info()` ignores LSN for a `LsnForTimestamp::Future`\n                // case, anyway.\n                Ok(LsnForTimestamp::Future(commit_lsn))\n            }\n            (true, true) => Ok(LsnForTimestamp::Present(commit_lsn)),\n        }\n    }\n\n    /// Subroutine of find_lsn_for_timestamp(). Returns true, if there are any\n    /// commits that committed after 'search_timestamp', at LSN 'probe_lsn'.\n    ///\n    /// Additionally, sets 'found_smaller'/'found_Larger, if encounters any commits\n    /// with a smaller/larger timestamp.\n    ///\n    pub(crate) async fn is_latest_commit_timestamp_ge_than(\n        &self,\n        search_timestamp: TimestampTz,\n        probe_lsn: Lsn,\n        found_smaller: &mut bool,\n        found_larger: &mut bool,\n        ctx: &RequestContext,\n    ) -> Result<bool, PageReconstructError> {\n        self.map_all_timestamps(probe_lsn, ctx, |timestamp| {\n            if timestamp >= search_timestamp {\n                *found_larger = true;\n                return ControlFlow::Break(true);\n            } else {\n                *found_smaller = true;\n            }\n            ControlFlow::Continue(())\n        })\n        .await\n    }\n\n    /// Obtain the timestamp for the given lsn.\n    ///\n    /// If the lsn has no timestamps (e.g. no commits), returns None.\n    pub(crate) async fn get_timestamp_for_lsn(\n        &self,\n        probe_lsn: Lsn,\n        ctx: &RequestContext,\n    ) -> Result<Option<TimestampTz>, PageReconstructError> {\n        let mut max: Option<TimestampTz> = None;\n        self.map_all_timestamps::<()>(probe_lsn, ctx, |timestamp| {\n            if let Some(max_prev) = max {\n                max = Some(max_prev.max(timestamp));\n            } else {\n                max = Some(timestamp);\n            }\n            ControlFlow::Continue(())\n        })\n        .await?;\n\n        Ok(max)\n    }\n\n    /// Runs the given function on all the timestamps for a given lsn\n    ///\n    /// The return value is either given by the closure, or set to the `Default`\n    /// impl's output.\n    async fn map_all_timestamps<T: Default>(\n        &self,\n        probe_lsn: Lsn,\n        ctx: &RequestContext,\n        mut f: impl FnMut(TimestampTz) -> ControlFlow<T>,\n    ) -> Result<T, PageReconstructError> {\n        for segno in self\n            .list_slru_segments(SlruKind::Clog, Version::at(probe_lsn), ctx)\n            .await?\n        {\n            let nblocks = self\n                .get_slru_segment_size(SlruKind::Clog, segno, Version::at(probe_lsn), ctx)\n                .await?;\n\n            let keyspace = KeySpace::single(\n                slru_block_to_key(SlruKind::Clog, segno, 0)\n                    ..slru_block_to_key(SlruKind::Clog, segno, nblocks),\n            );\n\n            let batches = keyspace.partition(\n                self.get_shard_identity(),\n                self.conf.max_get_vectored_keys.get() as u64 * BLCKSZ as u64,\n                BLCKSZ as u64,\n            );\n\n            let io_concurrency = IoConcurrency::spawn_from_conf(\n                self.conf.get_vectored_concurrent_io,\n                self.gate\n                    .enter()\n                    .map_err(|_| PageReconstructError::Cancelled)?,\n            );\n\n            for batch in batches.parts.into_iter().rev() {\n                let query = VersionedKeySpaceQuery::uniform(batch, probe_lsn);\n                let blocks = self\n                    .get_vectored(query, io_concurrency.clone(), ctx)\n                    .await?;\n\n                for (_key, clog_page) in blocks.into_iter().rev() {\n                    let clog_page = clog_page?;\n\n                    if clog_page.len() == BLCKSZ as usize + 8 {\n                        let mut timestamp_bytes = [0u8; 8];\n                        timestamp_bytes.copy_from_slice(&clog_page[BLCKSZ as usize..]);\n                        let timestamp = TimestampTz::from_be_bytes(timestamp_bytes);\n\n                        match f(timestamp) {\n                            ControlFlow::Break(b) => return Ok(b),\n                            ControlFlow::Continue(()) => (),\n                        }\n                    }\n                }\n            }\n        }\n        Ok(Default::default())\n    }\n\n    pub(crate) async fn get_slru_keyspace(\n        &self,\n        version: Version<'_>,\n        ctx: &RequestContext,\n    ) -> Result<KeySpace, PageReconstructError> {\n        let mut accum = KeySpaceAccum::new();\n\n        for kind in SlruKind::iter() {\n            let mut segments: Vec<u32> = self\n                .list_slru_segments(kind, version, ctx)\n                .await?\n                .into_iter()\n                .collect();\n            segments.sort_unstable();\n\n            for seg in segments {\n                let block_count = self.get_slru_segment_size(kind, seg, version, ctx).await?;\n\n                accum.add_range(\n                    slru_block_to_key(kind, seg, 0)..slru_block_to_key(kind, seg, block_count),\n                );\n            }\n        }\n\n        Ok(accum.to_keyspace())\n    }\n\n    /// Get a list of SLRU segments\n    pub(crate) async fn list_slru_segments(\n        &self,\n        kind: SlruKind,\n        version: Version<'_>,\n        ctx: &RequestContext,\n    ) -> Result<HashSet<u32>, PageReconstructError> {\n        // fetch directory entry\n        let key = slru_dir_to_key(kind);\n\n        let buf = version.get(self, key, ctx).await?;\n        Ok(SlruSegmentDirectory::des(&buf)?.segments)\n    }\n\n    pub(crate) async fn get_relmap_file(\n        &self,\n        spcnode: Oid,\n        dbnode: Oid,\n        version: Version<'_>,\n        ctx: &RequestContext,\n    ) -> Result<Bytes, PageReconstructError> {\n        let key = relmap_file_key(spcnode, dbnode);\n\n        let buf = version.get(self, key, ctx).await?;\n        Ok(buf)\n    }\n\n    pub(crate) async fn list_dbdirs(\n        &self,\n        lsn: Lsn,\n        ctx: &RequestContext,\n    ) -> Result<HashMap<(Oid, Oid), bool>, PageReconstructError> {\n        // fetch directory entry\n        let buf = self.get(DBDIR_KEY, lsn, ctx).await?;\n\n        Ok(DbDirectory::des(&buf)?.dbdirs)\n    }\n\n    pub(crate) async fn get_twophase_file(\n        &self,\n        xid: u64,\n        lsn: Lsn,\n        ctx: &RequestContext,\n    ) -> Result<Bytes, PageReconstructError> {\n        let key = twophase_file_key(xid);\n        let buf = self.get(key, lsn, ctx).await?;\n        Ok(buf)\n    }\n\n    pub(crate) async fn list_twophase_files(\n        &self,\n        lsn: Lsn,\n        ctx: &RequestContext,\n    ) -> Result<HashSet<u64>, PageReconstructError> {\n        // fetch directory entry\n        let buf = self.get(TWOPHASEDIR_KEY, lsn, ctx).await?;\n\n        if self.pg_version >= PgMajorVersion::PG17 {\n            Ok(TwoPhaseDirectoryV17::des(&buf)?.xids)\n        } else {\n            Ok(TwoPhaseDirectory::des(&buf)?\n                .xids\n                .iter()\n                .map(|x| u64::from(*x))\n                .collect())\n        }\n    }\n\n    pub(crate) async fn get_control_file(\n        &self,\n        lsn: Lsn,\n        ctx: &RequestContext,\n    ) -> Result<Bytes, PageReconstructError> {\n        self.get(CONTROLFILE_KEY, lsn, ctx).await\n    }\n\n    pub(crate) async fn get_checkpoint(\n        &self,\n        lsn: Lsn,\n        ctx: &RequestContext,\n    ) -> Result<Bytes, PageReconstructError> {\n        self.get(CHECKPOINT_KEY, lsn, ctx).await\n    }\n\n    async fn list_aux_files_v2(\n        &self,\n        lsn: Lsn,\n        ctx: &RequestContext,\n        io_concurrency: IoConcurrency,\n    ) -> Result<HashMap<String, Bytes>, PageReconstructError> {\n        let kv = self\n            .scan(\n                KeySpace::single(Key::metadata_aux_key_range()),\n                lsn,\n                ctx,\n                io_concurrency,\n            )\n            .await?;\n        let mut result = HashMap::new();\n        let mut sz = 0;\n        for (_, v) in kv {\n            let v = v?;\n            let v = aux_file::decode_file_value_bytes(&v)\n                .context(\"value decode\")\n                .map_err(PageReconstructError::Other)?;\n            for (fname, content) in v {\n                sz += fname.len();\n                sz += content.len();\n                result.insert(fname, content);\n            }\n        }\n        self.aux_file_size_estimator.on_initial(sz);\n        Ok(result)\n    }\n\n    pub(crate) async fn trigger_aux_file_size_computation(\n        &self,\n        lsn: Lsn,\n        ctx: &RequestContext,\n        io_concurrency: IoConcurrency,\n    ) -> Result<(), PageReconstructError> {\n        self.list_aux_files_v2(lsn, ctx, io_concurrency).await?;\n        Ok(())\n    }\n\n    pub(crate) async fn list_aux_files(\n        &self,\n        lsn: Lsn,\n        ctx: &RequestContext,\n        io_concurrency: IoConcurrency,\n    ) -> Result<HashMap<String, Bytes>, PageReconstructError> {\n        self.list_aux_files_v2(lsn, ctx, io_concurrency).await\n    }\n\n    pub(crate) async fn get_replorigins(\n        &self,\n        lsn: Lsn,\n        ctx: &RequestContext,\n        io_concurrency: IoConcurrency,\n    ) -> Result<HashMap<RepOriginId, Lsn>, PageReconstructError> {\n        let kv = self\n            .scan(\n                KeySpace::single(repl_origin_key_range()),\n                lsn,\n                ctx,\n                io_concurrency,\n            )\n            .await?;\n        let mut result = HashMap::new();\n        for (k, v) in kv {\n            let v = v?;\n            if v.is_empty() {\n                // This is a tombstone -- we can skip it.\n                // Originally, the replorigin code uses `Lsn::INVALID` to represent a tombstone. However, as it part of\n                // the sparse keyspace and the sparse keyspace uses an empty image to universally represent a tombstone,\n                // we also need to consider that. Such tombstones might be written on the detach ancestor code path to\n                // avoid the value going into the child branch. (See [`crate::tenant::timeline::detach_ancestor::generate_tombstone_image_layer`] for more details.)\n                continue;\n            }\n            let origin_id = k.field6 as RepOriginId;\n            let origin_lsn = Lsn::des(&v)\n                .with_context(|| format!(\"decode replorigin value for {origin_id}: {v:?}\"))?;\n            if origin_lsn != Lsn::INVALID {\n                result.insert(origin_id, origin_lsn);\n            }\n        }\n        Ok(result)\n    }\n\n    /// Does the same as get_current_logical_size but counted on demand.\n    /// Used to initialize the logical size tracking on startup.\n    ///\n    /// Only relation blocks are counted currently. That excludes metadata,\n    /// SLRUs, twophase files etc.\n    ///\n    /// # Cancel-Safety\n    ///\n    /// This method is cancellation-safe.\n    pub(crate) async fn get_current_logical_size_non_incremental(\n        &self,\n        lsn: Lsn,\n        ctx: &RequestContext,\n    ) -> Result<u64, CalculateLogicalSizeError> {\n        debug_assert_current_span_has_tenant_and_timeline_id_no_shard_id();\n\n        fail::fail_point!(\"skip-logical-size-calculation\", |_| { Ok(0) });\n\n        // Fetch list of database dirs and iterate them\n        let buf = self.get(DBDIR_KEY, lsn, ctx).await?;\n        let dbdir = DbDirectory::des(&buf)?;\n\n        let mut total_size: u64 = 0;\n        let mut dbdir_cnt = 0;\n        let mut rel_cnt = 0;\n\n        for &(spcnode, dbnode) in dbdir.dbdirs.keys() {\n            dbdir_cnt += 1;\n            for rel in self\n                .list_rels(spcnode, dbnode, Version::at(lsn), ctx)\n                .await?\n            {\n                rel_cnt += 1;\n                if self.cancel.is_cancelled() {\n                    return Err(CalculateLogicalSizeError::Cancelled);\n                }\n                let relsize_key = rel_size_to_key(rel);\n                let mut buf = self.get(relsize_key, lsn, ctx).await?;\n                let relsize = buf.get_u32_le();\n\n                total_size += relsize as u64;\n            }\n        }\n\n        self.db_rel_count\n            .store(Some(Arc::new((dbdir_cnt, rel_cnt))));\n\n        Ok(total_size * BLCKSZ as u64)\n    }\n\n    /// Get a KeySpace that covers all the Keys that are in use at AND below the given LSN. This is only used\n    /// for gc-compaction.\n    ///\n    /// gc-compaction cannot use the same `collect_keyspace` function as the legacy compaction because it\n    /// processes data at multiple LSNs and needs to be aware of the fact that some key ranges might need to\n    /// be kept only for a specific range of LSN.\n    ///\n    /// Consider the case that the user created branches at LSN 10 and 20, where the user created a table A at\n    /// LSN 10 and dropped that table at LSN 20. `collect_keyspace` at LSN 10 will return the key range\n    /// corresponding to that table, while LSN 20 won't. The keyspace info at a single LSN is not enough to\n    /// determine which keys to retain/drop for gc-compaction.\n    ///\n    /// For now, it only drops AUX-v1 keys. But in the future, the function will be extended to return the keyspace\n    /// to be retained for each of the branch LSN.\n    ///\n    /// The return value is (dense keyspace, sparse keyspace).\n    pub(crate) async fn collect_gc_compaction_keyspace(\n        &self,\n    ) -> Result<(KeySpace, SparseKeySpace), CollectKeySpaceError> {\n        let metadata_key_begin = Key::metadata_key_range().start;\n        let aux_v1_key = AUX_FILES_KEY;\n        let dense_keyspace = KeySpace {\n            ranges: vec![Key::MIN..aux_v1_key, aux_v1_key.next()..metadata_key_begin],\n        };\n        Ok((\n            dense_keyspace,\n            SparseKeySpace(KeySpace::single(Key::metadata_key_range())),\n        ))\n    }\n\n    ///\n    /// Get a KeySpace that covers all the Keys that are in use at the given LSN.\n    /// Anything that's not listed maybe removed from the underlying storage (from\n    /// that LSN forwards).\n    ///\n    /// The return value is (dense keyspace, sparse keyspace).\n    pub(crate) async fn collect_keyspace(\n        &self,\n        lsn: Lsn,\n        ctx: &RequestContext,\n    ) -> Result<(KeySpace, SparseKeySpace), CollectKeySpaceError> {\n        // Iterate through key ranges, greedily packing them into partitions\n        let mut result = KeySpaceAccum::new();\n\n        // The dbdir metadata always exists\n        result.add_key(DBDIR_KEY);\n\n        // Fetch list of database dirs and iterate them\n        let dbdir = self.list_dbdirs(lsn, ctx).await?;\n        let mut dbs: Vec<((Oid, Oid), bool)> = dbdir.into_iter().collect();\n\n        dbs.sort_unstable_by(|(k_a, _), (k_b, _)| k_a.cmp(k_b));\n        for ((spcnode, dbnode), has_relmap_file) in dbs {\n            if has_relmap_file {\n                result.add_key(relmap_file_key(spcnode, dbnode));\n            }\n            result.add_key(rel_dir_to_key(spcnode, dbnode));\n\n            let mut rels: Vec<RelTag> = self\n                .list_rels(spcnode, dbnode, Version::at(lsn), ctx)\n                .await?\n                .into_iter()\n                .collect();\n            rels.sort_unstable();\n            for rel in rels {\n                let relsize_key = rel_size_to_key(rel);\n                let mut buf = self.get(relsize_key, lsn, ctx).await?;\n                let relsize = buf.get_u32_le();\n\n                result.add_range(rel_block_to_key(rel, 0)..rel_block_to_key(rel, relsize));\n                result.add_key(relsize_key);\n            }\n        }\n\n        // Iterate SLRUs next\n        if self.tenant_shard_id.is_shard_zero() {\n            for kind in [\n                SlruKind::Clog,\n                SlruKind::MultiXactMembers,\n                SlruKind::MultiXactOffsets,\n            ] {\n                let slrudir_key = slru_dir_to_key(kind);\n                result.add_key(slrudir_key);\n                let buf = self.get(slrudir_key, lsn, ctx).await?;\n                let dir = SlruSegmentDirectory::des(&buf)?;\n                let mut segments: Vec<u32> = dir.segments.iter().cloned().collect();\n                segments.sort_unstable();\n                for segno in segments {\n                    let segsize_key = slru_segment_size_to_key(kind, segno);\n                    let mut buf = self.get(segsize_key, lsn, ctx).await?;\n                    let segsize = buf.get_u32_le();\n\n                    result.add_range(\n                        slru_block_to_key(kind, segno, 0)..slru_block_to_key(kind, segno, segsize),\n                    );\n                    result.add_key(segsize_key);\n                }\n            }\n        }\n\n        // Then pg_twophase\n        result.add_key(TWOPHASEDIR_KEY);\n\n        let mut xids: Vec<u64> = self\n            .list_twophase_files(lsn, ctx)\n            .await?\n            .iter()\n            .cloned()\n            .collect();\n        xids.sort_unstable();\n        for xid in xids {\n            result.add_key(twophase_file_key(xid));\n        }\n\n        result.add_key(CONTROLFILE_KEY);\n        result.add_key(CHECKPOINT_KEY);\n\n        // Add extra keyspaces in the test cases. Some test cases write keys into the storage without\n        // creating directory keys. These test cases will add such keyspaces into `extra_test_dense_keyspace`\n        // and the keys will not be garbage-colllected.\n        #[cfg(test)]\n        {\n            let guard = self.extra_test_dense_keyspace.load();\n            for kr in &guard.ranges {\n                result.add_range(kr.clone());\n            }\n        }\n\n        let dense_keyspace = result.to_keyspace();\n        let sparse_keyspace = SparseKeySpace(KeySpace {\n            ranges: vec![\n                Key::metadata_aux_key_range(),\n                repl_origin_key_range(),\n                Key::rel_dir_sparse_key_range(),\n            ],\n        });\n\n        if cfg!(debug_assertions) {\n            // Verify if the sparse keyspaces are ordered and non-overlapping.\n\n            // We do not use KeySpaceAccum for sparse_keyspace because we want to ensure each\n            // category of sparse keys are split into their own image/delta files. If there\n            // are overlapping keyspaces, they will be automatically merged by keyspace accum,\n            // and we want the developer to keep the keyspaces separated.\n\n            let ranges = &sparse_keyspace.0.ranges;\n\n            // TODO: use a single overlaps_with across the codebase\n            fn overlaps_with<T: Ord>(a: &Range<T>, b: &Range<T>) -> bool {\n                !(a.end <= b.start || b.end <= a.start)\n            }\n            for i in 0..ranges.len() {\n                for j in 0..i {\n                    if overlaps_with(&ranges[i], &ranges[j]) {\n                        panic!(\n                            \"overlapping sparse keyspace: {}..{} and {}..{}\",\n                            ranges[i].start, ranges[i].end, ranges[j].start, ranges[j].end\n                        );\n                    }\n                }\n            }\n            for i in 1..ranges.len() {\n                assert!(\n                    ranges[i - 1].end <= ranges[i].start,\n                    \"unordered sparse keyspace: {}..{} and {}..{}\",\n                    ranges[i - 1].start,\n                    ranges[i - 1].end,\n                    ranges[i].start,\n                    ranges[i].end\n                );\n            }\n        }\n\n        Ok((dense_keyspace, sparse_keyspace))\n    }\n\n    /// Get cached size of relation. There are two caches: one for primary updates, it captures the latest state of\n    /// of the timeline and snapshot cache, which key includes LSN and so can be used by replicas to get relation size\n    /// at the particular LSN (snapshot).\n    pub fn get_cached_rel_size(&self, tag: &RelTag, version: Version<'_>) -> Option<BlockNumber> {\n        let lsn = version.get_lsn();\n        {\n            let rel_size_cache = self.rel_size_latest_cache.read().unwrap();\n            if let Some((cached_lsn, nblocks)) = rel_size_cache.get(tag) {\n                if lsn >= *cached_lsn {\n                    RELSIZE_LATEST_CACHE_HITS.inc();\n                    return Some(*nblocks);\n                }\n                RELSIZE_CACHE_MISSES_OLD.inc();\n            }\n        }\n        {\n            let mut rel_size_cache = self.rel_size_snapshot_cache.lock().unwrap();\n            if let Some(nblock) = rel_size_cache.get(&(lsn, *tag)) {\n                RELSIZE_SNAPSHOT_CACHE_HITS.inc();\n                return Some(*nblock);\n            }\n        }\n        if version.is_latest() {\n            RELSIZE_LATEST_CACHE_MISSES.inc();\n        } else {\n            RELSIZE_SNAPSHOT_CACHE_MISSES.inc();\n        }\n        None\n    }\n\n    /// Update cached relation size if there is no more recent update\n    pub fn update_cached_rel_size(&self, tag: RelTag, version: Version<'_>, nblocks: BlockNumber) {\n        let lsn = version.get_lsn();\n        if version.is_latest() {\n            let mut rel_size_cache = self.rel_size_latest_cache.write().unwrap();\n            match rel_size_cache.entry(tag) {\n                hash_map::Entry::Occupied(mut entry) => {\n                    let cached_lsn = entry.get_mut();\n                    if lsn >= cached_lsn.0 {\n                        *cached_lsn = (lsn, nblocks);\n                    }\n                }\n                hash_map::Entry::Vacant(entry) => {\n                    entry.insert((lsn, nblocks));\n                    RELSIZE_LATEST_CACHE_ENTRIES.inc();\n                }\n            }\n        } else {\n            let mut rel_size_cache = self.rel_size_snapshot_cache.lock().unwrap();\n            if rel_size_cache.capacity() != 0 {\n                rel_size_cache.insert((lsn, tag), nblocks);\n                RELSIZE_SNAPSHOT_CACHE_ENTRIES.set(rel_size_cache.len() as u64);\n            }\n        }\n    }\n\n    /// Store cached relation size\n    pub fn set_cached_rel_size(&self, tag: RelTag, lsn: Lsn, nblocks: BlockNumber) {\n        let mut rel_size_cache = self.rel_size_latest_cache.write().unwrap();\n        if rel_size_cache.insert(tag, (lsn, nblocks)).is_none() {\n            RELSIZE_LATEST_CACHE_ENTRIES.inc();\n        }\n    }\n\n    /// Remove cached relation size\n    pub fn remove_cached_rel_size(&self, tag: &RelTag) {\n        let mut rel_size_cache = self.rel_size_latest_cache.write().unwrap();\n        if rel_size_cache.remove(tag).is_some() {\n            RELSIZE_LATEST_CACHE_ENTRIES.dec();\n        }\n    }\n}\n\n/// DatadirModification represents an operation to ingest an atomic set of\n/// updates to the repository.\n///\n/// It is created by the 'begin_record' function. It is called for each WAL\n/// record, so that all the modifications by a one WAL record appear atomic.\npub struct DatadirModification<'a> {\n    /// The timeline this modification applies to. You can access this to\n    /// read the state, but note that any pending updates are *not* reflected\n    /// in the state in 'tline' yet.\n    pub tline: &'a Timeline,\n\n    /// Current LSN of the modification\n    lsn: Lsn,\n\n    // The modifications are not applied directly to the underlying key-value store.\n    // The put-functions add the modifications here, and they are flushed to the\n    // underlying key-value store by the 'finish' function.\n    pending_lsns: Vec<Lsn>,\n    pending_deletions: Vec<(Range<Key>, Lsn)>,\n    pending_nblocks: i64,\n\n    /// Metadata writes, indexed by key so that they can be read from not-yet-committed modifications\n    /// while ingesting subsequent records. See [`Self::is_data_key`] for the definition of 'metadata'.\n    pending_metadata_pages: HashMap<CompactKey, Vec<(Lsn, usize, Value)>>,\n\n    /// Data writes, ready to be flushed into an ephemeral layer. See [`Self::is_data_key`] for\n    /// which keys are stored here.\n    pending_data_batch: Option<SerializedValueBatch>,\n\n    /// For special \"directory\" keys that store key-value maps, track the size of the map\n    /// if it was updated in this modification.\n    pending_directory_entries: Vec<(DirectoryKind, MetricsUpdate)>,\n\n    /// An **approximation** of how many metadata bytes will be written to the EphemeralFile.\n    pending_metadata_bytes: usize,\n\n    /// Whether we are importing a pgdata directory.\n    is_importing_pgdata: bool,\n}\n\n#[derive(Debug, Clone, Copy, PartialEq, Eq)]\npub enum MetricsUpdate {\n    /// Set the metrics to this value\n    Set(u64),\n    /// Increment the metrics by this value\n    Add(u64),\n    /// Decrement the metrics by this value\n    Sub(u64),\n}\n\n/// Controls the behavior of the reldir keyspace.\npub struct RelDirMode {\n    // Whether we can read the v2 keyspace or not.\n    current_status: RelSizeMigration,\n    // Whether we should initialize the v2 keyspace or not.\n    initialize: bool,\n}\n\nimpl DatadirModification<'_> {\n    // When a DatadirModification is committed, we do a monolithic serialization of all its contents.  WAL records can\n    // contain multiple pages, so the pageserver's record-based batch size isn't sufficient to bound this allocation: we\n    // additionally specify a limit on how much payload a DatadirModification may contain before it should be committed.\n    pub(crate) const MAX_PENDING_BYTES: usize = 8 * 1024 * 1024;\n\n    /// Get the current lsn\n    pub(crate) fn get_lsn(&self) -> Lsn {\n        self.lsn\n    }\n\n    pub(crate) fn approx_pending_bytes(&self) -> usize {\n        self.pending_data_batch\n            .as_ref()\n            .map_or(0, |b| b.buffer_size())\n            + self.pending_metadata_bytes\n    }\n\n    pub(crate) fn has_dirty_data(&self) -> bool {\n        self.pending_data_batch\n            .as_ref()\n            .is_some_and(|b| b.has_data())\n    }\n\n    /// Returns statistics about the currently pending modifications.\n    pub(crate) fn stats(&self) -> DatadirModificationStats {\n        let mut stats = DatadirModificationStats::default();\n        for (_, _, value) in self.pending_metadata_pages.values().flatten() {\n            match value {\n                Value::Image(_) => stats.metadata_images += 1,\n                Value::WalRecord(r) if r.will_init() => stats.metadata_images += 1,\n                Value::WalRecord(_) => stats.metadata_deltas += 1,\n            }\n        }\n        for valuemeta in self.pending_data_batch.iter().flat_map(|b| &b.metadata) {\n            match valuemeta {\n                ValueMeta::Serialized(s) if s.will_init => stats.data_images += 1,\n                ValueMeta::Serialized(_) => stats.data_deltas += 1,\n                ValueMeta::Observed(_) => {}\n            }\n        }\n        stats\n    }\n\n    /// Set the current lsn\n    pub(crate) fn set_lsn(&mut self, lsn: Lsn) -> Result<(), WalIngestError> {\n        ensure_walingest!(\n            lsn >= self.lsn,\n            \"setting an older lsn {} than {} is not allowed\",\n            lsn,\n            self.lsn\n        );\n\n        if lsn > self.lsn {\n            self.pending_lsns.push(self.lsn);\n            self.lsn = lsn;\n        }\n        Ok(())\n    }\n\n    /// In this context, 'metadata' means keys that are only read by the pageserver internally, and 'data' means\n    /// keys that represent literal blocks that postgres can read.  So data includes relation blocks and\n    /// SLRU blocks, which are read directly by postgres, and everything else is considered metadata.\n    ///\n    /// The distinction is important because data keys are handled on a fast path where dirty writes are\n    /// not readable until this modification is committed, whereas metadata keys are visible for read\n    /// via [`Self::get`] as soon as their record has been ingested.\n    fn is_data_key(key: &Key) -> bool {\n        key.is_rel_block_key() || key.is_slru_block_key()\n    }\n\n    /// Initialize a completely new repository.\n    ///\n    /// This inserts the directory metadata entries that are assumed to\n    /// always exist.\n    pub fn init_empty(&mut self) -> anyhow::Result<()> {\n        let buf = DbDirectory::ser(&DbDirectory {\n            dbdirs: HashMap::new(),\n        })?;\n        self.pending_directory_entries\n            .push((DirectoryKind::Db, MetricsUpdate::Set(0)));\n        self.put(DBDIR_KEY, Value::Image(buf.into()));\n\n        let buf = if self.tline.pg_version >= PgMajorVersion::PG17 {\n            TwoPhaseDirectoryV17::ser(&TwoPhaseDirectoryV17 {\n                xids: HashSet::new(),\n            })\n        } else {\n            TwoPhaseDirectory::ser(&TwoPhaseDirectory {\n                xids: HashSet::new(),\n            })\n        }?;\n        self.pending_directory_entries\n            .push((DirectoryKind::TwoPhase, MetricsUpdate::Set(0)));\n        self.put(TWOPHASEDIR_KEY, Value::Image(buf.into()));\n\n        let buf: Bytes = SlruSegmentDirectory::ser(&SlruSegmentDirectory::default())?.into();\n        let empty_dir = Value::Image(buf);\n\n        // Initialize SLRUs on shard 0 only: creating these on other shards would be\n        // harmless but they'd just be dropped on later compaction.\n        if self.tline.tenant_shard_id.is_shard_zero() {\n            self.put(slru_dir_to_key(SlruKind::Clog), empty_dir.clone());\n            self.pending_directory_entries.push((\n                DirectoryKind::SlruSegment(SlruKind::Clog),\n                MetricsUpdate::Set(0),\n            ));\n            self.put(\n                slru_dir_to_key(SlruKind::MultiXactMembers),\n                empty_dir.clone(),\n            );\n            self.pending_directory_entries.push((\n                DirectoryKind::SlruSegment(SlruKind::Clog),\n                MetricsUpdate::Set(0),\n            ));\n            self.put(slru_dir_to_key(SlruKind::MultiXactOffsets), empty_dir);\n            self.pending_directory_entries.push((\n                DirectoryKind::SlruSegment(SlruKind::MultiXactOffsets),\n                MetricsUpdate::Set(0),\n            ));\n        }\n\n        Ok(())\n    }\n\n    #[cfg(test)]\n    pub fn init_empty_test_timeline(&mut self) -> anyhow::Result<()> {\n        self.init_empty()?;\n        self.put_control_file(bytes::Bytes::from_static(\n            b\"control_file contents do not matter\",\n        ))\n        .context(\"put_control_file\")?;\n        self.put_checkpoint(bytes::Bytes::from_static(\n            b\"checkpoint_file contents do not matter\",\n        ))\n        .context(\"put_checkpoint_file\")?;\n        Ok(())\n    }\n\n    /// Creates a relation if it is not already present.\n    /// Returns the current size of the relation\n    pub(crate) async fn create_relation_if_required(\n        &mut self,\n        rel: RelTag,\n        ctx: &RequestContext,\n    ) -> Result<u32, WalIngestError> {\n        // Get current size and put rel creation if rel doesn't exist\n        //\n        // NOTE: we check the cache first even though get_rel_exists and get_rel_size would\n        //       check the cache too. This is because eagerly checking the cache results in\n        //       less work overall and 10% better performance. It's more work on cache miss\n        //       but cache miss is rare.\n        if let Some(nblocks) = self\n            .tline\n            .get_cached_rel_size(&rel, Version::Modified(self))\n        {\n            Ok(nblocks)\n        } else if !self\n            .tline\n            .get_rel_exists(rel, Version::Modified(self), ctx)\n            .await?\n        {\n            // create it with 0 size initially, the logic below will extend it\n            self.put_rel_creation(rel, 0, ctx).await?;\n            Ok(0)\n        } else {\n            Ok(self\n                .tline\n                .get_rel_size(rel, Version::Modified(self), ctx)\n                .await?)\n        }\n    }\n\n    /// Given a block number for a relation (which represents a newly written block),\n    /// the previous block count of the relation, and the shard info, find the gaps\n    /// that were created by the newly written block if any.\n    fn find_gaps(\n        rel: RelTag,\n        blkno: u32,\n        previous_nblocks: u32,\n        shard: &ShardIdentity,\n    ) -> Option<KeySpace> {\n        let mut key = rel_block_to_key(rel, blkno);\n        let mut gap_accum = None;\n\n        for gap_blkno in previous_nblocks..blkno {\n            key.field6 = gap_blkno;\n\n            if shard.get_shard_number(&key) != shard.number {\n                continue;\n            }\n\n            gap_accum\n                .get_or_insert_with(KeySpaceAccum::new)\n                .add_key(key);\n        }\n\n        gap_accum.map(|accum| accum.to_keyspace())\n    }\n\n    pub async fn ingest_batch(\n        &mut self,\n        mut batch: SerializedValueBatch,\n        // TODO(vlad): remove this argument and replace the shard check with is_key_local\n        shard: &ShardIdentity,\n        ctx: &RequestContext,\n    ) -> Result<(), WalIngestError> {\n        let mut gaps_at_lsns = Vec::default();\n\n        for meta in batch.metadata.iter() {\n            let key = Key::from_compact(meta.key());\n            let (rel, blkno) = key\n                .to_rel_block()\n                .map_err(|_| WalIngestErrorKind::InvalidKey(key, meta.lsn()))?;\n            let new_nblocks = blkno + 1;\n\n            let old_nblocks = self.create_relation_if_required(rel, ctx).await?;\n            if new_nblocks > old_nblocks {\n                self.put_rel_extend(rel, new_nblocks, ctx).await?;\n            }\n\n            if let Some(gaps) = Self::find_gaps(rel, blkno, old_nblocks, shard) {\n                gaps_at_lsns.push((gaps, meta.lsn()));\n            }\n        }\n\n        if !gaps_at_lsns.is_empty() {\n            batch.zero_gaps(gaps_at_lsns);\n        }\n\n        match self.pending_data_batch.as_mut() {\n            Some(pending_batch) => {\n                pending_batch.extend(batch);\n            }\n            None if batch.has_data() => {\n                self.pending_data_batch = Some(batch);\n            }\n            None => {\n                // Nothing to initialize the batch with\n            }\n        }\n\n        Ok(())\n    }\n\n    /// Put a new page version that can be constructed from a WAL record\n    ///\n    /// NOTE: this will *not* implicitly extend the relation, if the page is beyond the\n    /// current end-of-file. It's up to the caller to check that the relation size\n    /// matches the blocks inserted!\n    pub fn put_rel_wal_record(\n        &mut self,\n        rel: RelTag,\n        blknum: BlockNumber,\n        rec: NeonWalRecord,\n    ) -> Result<(), WalIngestError> {\n        ensure_walingest!(rel.relnode != 0, RelationError::InvalidRelnode);\n        self.put(rel_block_to_key(rel, blknum), Value::WalRecord(rec));\n        Ok(())\n    }\n\n    // Same, but for an SLRU.\n    pub fn put_slru_wal_record(\n        &mut self,\n        kind: SlruKind,\n        segno: u32,\n        blknum: BlockNumber,\n        rec: NeonWalRecord,\n    ) -> Result<(), WalIngestError> {\n        if !self.tline.tenant_shard_id.is_shard_zero() {\n            return Ok(());\n        }\n\n        self.put(\n            slru_block_to_key(kind, segno, blknum),\n            Value::WalRecord(rec),\n        );\n        Ok(())\n    }\n\n    /// Like put_wal_record, but with ready-made image of the page.\n    pub fn put_rel_page_image(\n        &mut self,\n        rel: RelTag,\n        blknum: BlockNumber,\n        img: Bytes,\n    ) -> Result<(), WalIngestError> {\n        ensure_walingest!(rel.relnode != 0, RelationError::InvalidRelnode);\n        let key = rel_block_to_key(rel, blknum);\n        if !key.is_valid_key_on_write_path() {\n            Err(WalIngestErrorKind::InvalidKey(key, self.lsn))?;\n        }\n        self.put(rel_block_to_key(rel, blknum), Value::Image(img));\n        Ok(())\n    }\n\n    pub fn put_slru_page_image(\n        &mut self,\n        kind: SlruKind,\n        segno: u32,\n        blknum: BlockNumber,\n        img: Bytes,\n    ) -> Result<(), WalIngestError> {\n        assert!(self.tline.tenant_shard_id.is_shard_zero());\n\n        let key = slru_block_to_key(kind, segno, blknum);\n        if !key.is_valid_key_on_write_path() {\n            Err(WalIngestErrorKind::InvalidKey(key, self.lsn))?;\n        }\n        self.put(key, Value::Image(img));\n        Ok(())\n    }\n\n    pub(crate) fn put_rel_page_image_zero(\n        &mut self,\n        rel: RelTag,\n        blknum: BlockNumber,\n    ) -> Result<(), WalIngestError> {\n        ensure_walingest!(rel.relnode != 0, RelationError::InvalidRelnode);\n        let key = rel_block_to_key(rel, blknum);\n        if !key.is_valid_key_on_write_path() {\n            Err(WalIngestErrorKind::InvalidKey(key, self.lsn))?;\n        }\n\n        let batch = self\n            .pending_data_batch\n            .get_or_insert_with(SerializedValueBatch::default);\n\n        batch.put(key.to_compact(), Value::Image(ZERO_PAGE.clone()), self.lsn);\n\n        Ok(())\n    }\n\n    pub(crate) fn put_slru_page_image_zero(\n        &mut self,\n        kind: SlruKind,\n        segno: u32,\n        blknum: BlockNumber,\n    ) -> Result<(), WalIngestError> {\n        assert!(self.tline.tenant_shard_id.is_shard_zero());\n        let key = slru_block_to_key(kind, segno, blknum);\n        if !key.is_valid_key_on_write_path() {\n            Err(WalIngestErrorKind::InvalidKey(key, self.lsn))?;\n        }\n\n        let batch = self\n            .pending_data_batch\n            .get_or_insert_with(SerializedValueBatch::default);\n\n        batch.put(key.to_compact(), Value::Image(ZERO_PAGE.clone()), self.lsn);\n\n        Ok(())\n    }\n\n    /// Returns `true` if the rel_size_v2 write path is enabled. If it is the first time that\n    /// we enable it, we also need to persist it in `index_part.json` (initialize is true).\n    ///\n    /// As this function is only used on the write path, we do not need to read the migrated_at\n    /// field.\n    pub fn maybe_enable_rel_size_v2(&mut self, is_create: bool) -> anyhow::Result<RelDirMode> {\n        // TODO: define the behavior of the tenant-level config flag and use feature flag to enable this feature\n\n        let (status, _) = self.tline.get_rel_size_v2_status();\n        let config = self.tline.get_rel_size_v2_enabled();\n        match (config, status) {\n            (false, RelSizeMigration::Legacy) => {\n                // tenant config didn't enable it and we didn't write any reldir_v2 key yet\n                Ok(RelDirMode {\n                    current_status: RelSizeMigration::Legacy,\n                    initialize: false,\n                })\n            }\n            (false, status @ RelSizeMigration::Migrating | status @ RelSizeMigration::Migrated) => {\n                // index_part already persisted that the timeline has enabled rel_size_v2\n                Ok(RelDirMode {\n                    current_status: status,\n                    initialize: false,\n                })\n            }\n            (true, RelSizeMigration::Legacy) => {\n                // The first time we enable it, we need to persist it in `index_part.json`\n                // The caller should update the reldir status once the initialization is done.\n                //\n                // Only initialize the v2 keyspace on new relation creation. No initialization\n                // during `timeline_create` (TODO: fix this, we should allow, but currently it\n                // hits consistency issues).\n                Ok(RelDirMode {\n                    current_status: RelSizeMigration::Legacy,\n                    initialize: is_create && !self.is_importing_pgdata,\n                })\n            }\n            (true, status @ RelSizeMigration::Migrating | status @ RelSizeMigration::Migrated) => {\n                // index_part already persisted that the timeline has enabled rel_size_v2\n                // and we don't need to do anything\n                Ok(RelDirMode {\n                    current_status: status,\n                    initialize: false,\n                })\n            }\n        }\n    }\n\n    /// Store a relmapper file (pg_filenode.map) in the repository\n    pub async fn put_relmap_file(\n        &mut self,\n        spcnode: Oid,\n        dbnode: Oid,\n        img: Bytes,\n        ctx: &RequestContext,\n    ) -> Result<(), WalIngestError> {\n        let v2_mode = self\n            .maybe_enable_rel_size_v2(false)\n            .map_err(WalIngestErrorKind::MaybeRelSizeV2Error)?;\n\n        // Add it to the directory (if it doesn't exist already)\n        let buf = self.get(DBDIR_KEY, ctx).await?;\n        let mut dbdir = DbDirectory::des(&buf)?;\n\n        let r = dbdir.dbdirs.insert((spcnode, dbnode), true);\n        if r.is_none() || r == Some(false) {\n            // The dbdir entry didn't exist, or it contained a\n            // 'false'. The 'insert' call already updated it with\n            // 'true', now write the updated 'dbdirs' map back.\n            let buf = DbDirectory::ser(&dbdir)?;\n            self.put(DBDIR_KEY, Value::Image(buf.into()));\n        }\n        if r.is_none() {\n            if v2_mode.current_status != RelSizeMigration::Legacy {\n                self.pending_directory_entries\n                    .push((DirectoryKind::RelV2, MetricsUpdate::Set(0)));\n            }\n\n            // Create RelDirectory in v1 keyspace. TODO: if we have fully migrated to v2, no need to create this directory.\n            // Some code path relies on this directory to be present. We should remove it once we starts to set tenants to\n            // `RelSizeMigration::Migrated` state (currently we don't, all tenants will have `RelSizeMigration::Migrating`).\n            let buf = RelDirectory::ser(&RelDirectory {\n                rels: HashSet::new(),\n            })?;\n            self.pending_directory_entries\n                .push((DirectoryKind::Rel, MetricsUpdate::Set(0)));\n            self.put(\n                rel_dir_to_key(spcnode, dbnode),\n                Value::Image(Bytes::from(buf)),\n            );\n        }\n\n        self.put(relmap_file_key(spcnode, dbnode), Value::Image(img));\n        Ok(())\n    }\n\n    pub async fn put_twophase_file(\n        &mut self,\n        xid: u64,\n        img: Bytes,\n        ctx: &RequestContext,\n    ) -> Result<(), WalIngestError> {\n        // Add it to the directory entry\n        let dirbuf = self.get(TWOPHASEDIR_KEY, ctx).await?;\n        let newdirbuf = if self.tline.pg_version >= PgMajorVersion::PG17 {\n            let mut dir = TwoPhaseDirectoryV17::des(&dirbuf)?;\n            if !dir.xids.insert(xid) {\n                Err(WalIngestErrorKind::FileAlreadyExists(xid))?;\n            }\n            self.pending_directory_entries.push((\n                DirectoryKind::TwoPhase,\n                MetricsUpdate::Set(dir.xids.len() as u64),\n            ));\n            Bytes::from(TwoPhaseDirectoryV17::ser(&dir)?)\n        } else {\n            let xid = xid as u32;\n            let mut dir = TwoPhaseDirectory::des(&dirbuf)?;\n            if !dir.xids.insert(xid) {\n                Err(WalIngestErrorKind::FileAlreadyExists(xid.into()))?;\n            }\n            self.pending_directory_entries.push((\n                DirectoryKind::TwoPhase,\n                MetricsUpdate::Set(dir.xids.len() as u64),\n            ));\n            Bytes::from(TwoPhaseDirectory::ser(&dir)?)\n        };\n        self.put(TWOPHASEDIR_KEY, Value::Image(newdirbuf));\n\n        self.put(twophase_file_key(xid), Value::Image(img));\n        Ok(())\n    }\n\n    pub async fn set_replorigin(\n        &mut self,\n        origin_id: RepOriginId,\n        origin_lsn: Lsn,\n    ) -> Result<(), WalIngestError> {\n        let key = repl_origin_key(origin_id);\n        self.put(key, Value::Image(origin_lsn.ser().unwrap().into()));\n        Ok(())\n    }\n\n    pub async fn drop_replorigin(&mut self, origin_id: RepOriginId) -> Result<(), WalIngestError> {\n        self.set_replorigin(origin_id, Lsn::INVALID).await\n    }\n\n    pub fn put_control_file(&mut self, img: Bytes) -> Result<(), WalIngestError> {\n        self.put(CONTROLFILE_KEY, Value::Image(img));\n        Ok(())\n    }\n\n    pub fn put_checkpoint(&mut self, img: Bytes) -> Result<(), WalIngestError> {\n        self.put(CHECKPOINT_KEY, Value::Image(img));\n        Ok(())\n    }\n\n    pub async fn drop_dbdir(\n        &mut self,\n        spcnode: Oid,\n        dbnode: Oid,\n        ctx: &RequestContext,\n    ) -> Result<(), WalIngestError> {\n        let total_blocks = self\n            .tline\n            .get_db_size(spcnode, dbnode, Version::Modified(self), ctx)\n            .await?;\n\n        // Remove entry from dbdir\n        let buf = self.get(DBDIR_KEY, ctx).await?;\n        let mut dir = DbDirectory::des(&buf)?;\n        if dir.dbdirs.remove(&(spcnode, dbnode)).is_some() {\n            let buf = DbDirectory::ser(&dir)?;\n            self.pending_directory_entries.push((\n                DirectoryKind::Db,\n                MetricsUpdate::Set(dir.dbdirs.len() as u64),\n            ));\n            self.put(DBDIR_KEY, Value::Image(buf.into()));\n        } else {\n            warn!(\n                \"dropped dbdir for spcnode {} dbnode {} did not exist in db directory\",\n                spcnode, dbnode\n            );\n        }\n\n        // Update logical database size.\n        self.pending_nblocks -= total_blocks as i64;\n\n        // Delete all relations and metadata files for the spcnode/dnode\n        self.delete(dbdir_key_range(spcnode, dbnode));\n        Ok(())\n    }\n\n    async fn initialize_rel_size_v2_keyspace(\n        &mut self,\n        ctx: &RequestContext,\n        dbdir: &DbDirectory,\n    ) -> Result<(), WalIngestError> {\n        // Copy everything from relv1 to relv2; TODO: check if there's any key in the v2 keyspace, if so, abort.\n        tracing::info!(\"initializing rel_size_v2 keyspace\");\n        let mut rel_cnt = 0;\n        // relmap_exists (the value of dbdirs hashmap) does not affect the migration: we need to copy things over anyways\n        for &(spcnode, dbnode) in dbdir.dbdirs.keys() {\n            let rel_dir_key = rel_dir_to_key(spcnode, dbnode);\n            let rel_dir = RelDirectory::des(&self.get(rel_dir_key, ctx).await?)?;\n            for (relnode, forknum) in rel_dir.rels {\n                let sparse_rel_dir_key = rel_tag_sparse_key(spcnode, dbnode, relnode, forknum);\n                self.put(\n                    sparse_rel_dir_key,\n                    Value::Image(RelDirExists::Exists.encode()),\n                );\n                tracing::info!(\n                    \"migrated rel_size_v2: {}\",\n                    RelTag {\n                        spcnode,\n                        dbnode,\n                        relnode,\n                        forknum\n                    }\n                );\n                rel_cnt += 1;\n            }\n        }\n        tracing::info!(\n            \"initialized rel_size_v2 keyspace at lsn {}: migrated {} relations\",\n            self.lsn,\n            rel_cnt\n        );\n        self.tline\n            .update_rel_size_v2_status(RelSizeMigration::Migrating, Some(self.lsn))\n            .map_err(WalIngestErrorKind::MaybeRelSizeV2Error)?;\n        Ok::<_, WalIngestError>(())\n    }\n\n    async fn put_rel_creation_v1(\n        &mut self,\n        rel: RelTag,\n        dbdir_exists: bool,\n        ctx: &RequestContext,\n    ) -> Result<(), WalIngestError> {\n        // Reldir v1 write path\n        let rel_dir_key = rel_dir_to_key(rel.spcnode, rel.dbnode);\n        let mut rel_dir = if !dbdir_exists {\n            // Create the RelDirectory\n            RelDirectory::default()\n        } else {\n            // reldir already exists, fetch it\n            RelDirectory::des(&self.get(rel_dir_key, ctx).await?)?\n        };\n\n        // Add the new relation to the rel directory entry, and write it back\n        if !rel_dir.rels.insert((rel.relnode, rel.forknum)) {\n            Err(WalIngestErrorKind::RelationAlreadyExists(rel))?;\n        }\n        if !dbdir_exists {\n            self.pending_directory_entries\n                .push((DirectoryKind::Rel, MetricsUpdate::Set(0)))\n        }\n        self.pending_directory_entries\n            .push((DirectoryKind::Rel, MetricsUpdate::Add(1)));\n        self.put(\n            rel_dir_key,\n            Value::Image(Bytes::from(RelDirectory::ser(&rel_dir)?)),\n        );\n        Ok(())\n    }\n\n    async fn put_rel_creation_v2(\n        &mut self,\n        rel: RelTag,\n        dbdir_exists: bool,\n        ctx: &RequestContext,\n    ) -> Result<(), WalIngestError> {\n        // Reldir v2 write path\n        let sparse_rel_dir_key =\n            rel_tag_sparse_key(rel.spcnode, rel.dbnode, rel.relnode, rel.forknum);\n        // check if the rel_dir_key exists in v2\n        let val = self.sparse_get(sparse_rel_dir_key, ctx).await?;\n        let val = RelDirExists::decode_option(val)\n            .map_err(|_| WalIngestErrorKind::InvalidRelDirKey(sparse_rel_dir_key))?;\n        if val == RelDirExists::Exists {\n            Err(WalIngestErrorKind::RelationAlreadyExists(rel))?;\n        }\n        self.put(\n            sparse_rel_dir_key,\n            Value::Image(RelDirExists::Exists.encode()),\n        );\n        if !dbdir_exists {\n            self.pending_directory_entries\n                .push((DirectoryKind::RelV2, MetricsUpdate::Set(0)));\n        }\n        self.pending_directory_entries\n            .push((DirectoryKind::RelV2, MetricsUpdate::Add(1)));\n        Ok(())\n    }\n\n    /// Create a relation fork.\n    ///\n    /// 'nblocks' is the initial size.\n    pub async fn put_rel_creation(\n        &mut self,\n        rel: RelTag,\n        nblocks: BlockNumber,\n        ctx: &RequestContext,\n    ) -> Result<(), WalIngestError> {\n        if rel.relnode == 0 {\n            Err(WalIngestErrorKind::LogicalError(anyhow::anyhow!(\n                \"invalid relnode\"\n            )))?;\n        }\n        // It's possible that this is the first rel for this db in this\n        // tablespace.  Create the reldir entry for it if so.\n        let mut dbdir = DbDirectory::des(&self.get(DBDIR_KEY, ctx).await?)?;\n\n        let dbdir_exists =\n            if let hash_map::Entry::Vacant(e) = dbdir.dbdirs.entry((rel.spcnode, rel.dbnode)) {\n                // Didn't exist. Update dbdir\n                e.insert(false);\n                let buf = DbDirectory::ser(&dbdir)?;\n                self.pending_directory_entries.push((\n                    DirectoryKind::Db,\n                    MetricsUpdate::Set(dbdir.dbdirs.len() as u64),\n                ));\n                self.put(DBDIR_KEY, Value::Image(buf.into()));\n                false\n            } else {\n                true\n            };\n\n        let mut v2_mode = self\n            .maybe_enable_rel_size_v2(true)\n            .map_err(WalIngestErrorKind::MaybeRelSizeV2Error)?;\n\n        if v2_mode.initialize {\n            if let Err(e) = self.initialize_rel_size_v2_keyspace(ctx, &dbdir).await {\n                tracing::warn!(\"error initializing rel_size_v2 keyspace: {}\", e);\n                // TODO: circuit breaker so that it won't retry forever\n            } else {\n                v2_mode.current_status = RelSizeMigration::Migrating;\n            }\n        }\n\n        if v2_mode.current_status != RelSizeMigration::Migrated {\n            self.put_rel_creation_v1(rel, dbdir_exists, ctx).await?;\n        }\n\n        if v2_mode.current_status != RelSizeMigration::Legacy {\n            let write_v2_res = self.put_rel_creation_v2(rel, dbdir_exists, ctx).await;\n            if let Err(e) = write_v2_res {\n                if v2_mode.current_status == RelSizeMigration::Migrated {\n                    return Err(e);\n                }\n                tracing::warn!(\"error writing rel_size_v2 keyspace: {}\", e);\n            }\n        }\n\n        // Put size\n        let size_key = rel_size_to_key(rel);\n        let buf = nblocks.to_le_bytes();\n        self.put(size_key, Value::Image(Bytes::from(buf.to_vec())));\n\n        self.pending_nblocks += nblocks as i64;\n\n        // Update relation size cache\n        self.tline.set_cached_rel_size(rel, self.lsn, nblocks);\n\n        // Even if nblocks > 0, we don't insert any actual blocks here. That's up to the\n        // caller.\n        Ok(())\n    }\n\n    /// Truncate relation\n    pub async fn put_rel_truncation(\n        &mut self,\n        rel: RelTag,\n        nblocks: BlockNumber,\n        ctx: &RequestContext,\n    ) -> Result<(), WalIngestError> {\n        ensure_walingest!(rel.relnode != 0, RelationError::InvalidRelnode);\n        if self\n            .tline\n            .get_rel_exists(rel, Version::Modified(self), ctx)\n            .await?\n        {\n            let size_key = rel_size_to_key(rel);\n            // Fetch the old size first\n            let old_size = self.get(size_key, ctx).await?.get_u32_le();\n\n            // Update the entry with the new size.\n            let buf = nblocks.to_le_bytes();\n            self.put(size_key, Value::Image(Bytes::from(buf.to_vec())));\n\n            // Update relation size cache\n            self.tline.set_cached_rel_size(rel, self.lsn, nblocks);\n\n            // Update logical database size.\n            self.pending_nblocks -= old_size as i64 - nblocks as i64;\n        }\n        Ok(())\n    }\n\n    /// Extend relation\n    /// If new size is smaller, do nothing.\n    pub async fn put_rel_extend(\n        &mut self,\n        rel: RelTag,\n        nblocks: BlockNumber,\n        ctx: &RequestContext,\n    ) -> Result<(), WalIngestError> {\n        ensure_walingest!(rel.relnode != 0, RelationError::InvalidRelnode);\n\n        // Put size\n        let size_key = rel_size_to_key(rel);\n        let old_size = self.get(size_key, ctx).await?.get_u32_le();\n\n        // only extend relation here. never decrease the size\n        if nblocks > old_size {\n            let buf = nblocks.to_le_bytes();\n            self.put(size_key, Value::Image(Bytes::from(buf.to_vec())));\n\n            // Update relation size cache\n            self.tline.set_cached_rel_size(rel, self.lsn, nblocks);\n\n            self.pending_nblocks += nblocks as i64 - old_size as i64;\n        }\n        Ok(())\n    }\n\n    async fn put_rel_drop_v1(\n        &mut self,\n        drop_relations: HashMap<(u32, u32), Vec<RelTag>>,\n        ctx: &RequestContext,\n    ) -> Result<BTreeSet<RelTag>, WalIngestError> {\n        let mut dropped_rels = BTreeSet::new();\n        for ((spc_node, db_node), rel_tags) in drop_relations {\n            let dir_key = rel_dir_to_key(spc_node, db_node);\n            let buf = self.get(dir_key, ctx).await?;\n            let mut dir = RelDirectory::des(&buf)?;\n\n            let mut dirty = false;\n            for rel_tag in rel_tags {\n                let found = if dir.rels.remove(&(rel_tag.relnode, rel_tag.forknum)) {\n                    self.pending_directory_entries\n                        .push((DirectoryKind::Rel, MetricsUpdate::Sub(1)));\n                    dirty = true;\n                    dropped_rels.insert(rel_tag);\n                    true\n                } else {\n                    false\n                };\n\n                if found {\n                    // update logical size\n                    let size_key = rel_size_to_key(rel_tag);\n                    let old_size = self.get(size_key, ctx).await?.get_u32_le();\n                    self.pending_nblocks -= old_size as i64;\n\n                    // Remove entry from relation size cache\n                    self.tline.remove_cached_rel_size(&rel_tag);\n\n                    // Delete size entry, as well as all blocks; this is currently a no-op because we haven't implemented tombstones in storage.\n                    self.delete(rel_key_range(rel_tag));\n                }\n            }\n\n            if dirty {\n                self.put(dir_key, Value::Image(Bytes::from(RelDirectory::ser(&dir)?)));\n            }\n        }\n        Ok(dropped_rels)\n    }\n\n    async fn put_rel_drop_v2(\n        &mut self,\n        drop_relations: HashMap<(u32, u32), Vec<RelTag>>,\n        ctx: &RequestContext,\n    ) -> Result<BTreeSet<RelTag>, WalIngestError> {\n        let mut dropped_rels = BTreeSet::new();\n        for ((spc_node, db_node), rel_tags) in drop_relations {\n            for rel_tag in rel_tags {\n                let key = rel_tag_sparse_key(spc_node, db_node, rel_tag.relnode, rel_tag.forknum);\n                let val = RelDirExists::decode_option(self.sparse_get(key, ctx).await?)\n                    .map_err(|_| WalIngestErrorKind::InvalidKey(key, self.lsn))?;\n                if val == RelDirExists::Exists {\n                    dropped_rels.insert(rel_tag);\n                    self.pending_directory_entries\n                        .push((DirectoryKind::RelV2, MetricsUpdate::Sub(1)));\n                    // put tombstone\n                    self.put(key, Value::Image(RelDirExists::Removed.encode()));\n                }\n            }\n        }\n        Ok(dropped_rels)\n    }\n\n    /// Drop some relations\n    pub(crate) async fn put_rel_drops(\n        &mut self,\n        drop_relations: HashMap<(u32, u32), Vec<RelTag>>,\n        ctx: &RequestContext,\n    ) -> Result<(), WalIngestError> {\n        let v2_mode = self\n            .maybe_enable_rel_size_v2(false)\n            .map_err(WalIngestErrorKind::MaybeRelSizeV2Error)?;\n        match v2_mode.current_status {\n            RelSizeMigration::Legacy => {\n                self.put_rel_drop_v1(drop_relations, ctx).await?;\n            }\n            RelSizeMigration::Migrating => {\n                let dropped_rels_v1 = self.put_rel_drop_v1(drop_relations.clone(), ctx).await?;\n                let dropped_rels_v2_res = self.put_rel_drop_v2(drop_relations, ctx).await;\n                match dropped_rels_v2_res {\n                    Ok(dropped_rels_v2) => {\n                        if dropped_rels_v1 != dropped_rels_v2 {\n                            tracing::warn!(\n                                \"inconsistent v1/v2 rel drop: dropped_rels_v1.len()={}, dropped_rels_v2.len()={}\",\n                                dropped_rels_v1.len(),\n                                dropped_rels_v2.len()\n                            );\n                        }\n                    }\n                    Err(e) => {\n                        tracing::warn!(\"error dropping rels: {}\", e);\n                    }\n                }\n            }\n            RelSizeMigration::Migrated => {\n                self.put_rel_drop_v2(drop_relations, ctx).await?;\n            }\n        }\n        Ok(())\n    }\n\n    pub async fn put_slru_segment_creation(\n        &mut self,\n        kind: SlruKind,\n        segno: u32,\n        nblocks: BlockNumber,\n        ctx: &RequestContext,\n    ) -> Result<(), WalIngestError> {\n        assert!(self.tline.tenant_shard_id.is_shard_zero());\n\n        // Add it to the directory entry\n        let dir_key = slru_dir_to_key(kind);\n        let buf = self.get(dir_key, ctx).await?;\n        let mut dir = SlruSegmentDirectory::des(&buf)?;\n\n        if !dir.segments.insert(segno) {\n            Err(WalIngestErrorKind::SlruAlreadyExists(kind, segno))?;\n        }\n        self.pending_directory_entries.push((\n            DirectoryKind::SlruSegment(kind),\n            MetricsUpdate::Set(dir.segments.len() as u64),\n        ));\n        self.put(\n            dir_key,\n            Value::Image(Bytes::from(SlruSegmentDirectory::ser(&dir)?)),\n        );\n\n        // Put size\n        let size_key = slru_segment_size_to_key(kind, segno);\n        let buf = nblocks.to_le_bytes();\n        self.put(size_key, Value::Image(Bytes::from(buf.to_vec())));\n\n        // even if nblocks > 0, we don't insert any actual blocks here\n\n        Ok(())\n    }\n\n    /// Extend SLRU segment\n    pub fn put_slru_extend(\n        &mut self,\n        kind: SlruKind,\n        segno: u32,\n        nblocks: BlockNumber,\n    ) -> Result<(), WalIngestError> {\n        assert!(self.tline.tenant_shard_id.is_shard_zero());\n\n        // Put size\n        let size_key = slru_segment_size_to_key(kind, segno);\n        let buf = nblocks.to_le_bytes();\n        self.put(size_key, Value::Image(Bytes::from(buf.to_vec())));\n        Ok(())\n    }\n\n    /// This method is used for marking truncated SLRU files\n    pub async fn drop_slru_segment(\n        &mut self,\n        kind: SlruKind,\n        segno: u32,\n        ctx: &RequestContext,\n    ) -> Result<(), WalIngestError> {\n        // Remove it from the directory entry\n        let dir_key = slru_dir_to_key(kind);\n        let buf = self.get(dir_key, ctx).await?;\n        let mut dir = SlruSegmentDirectory::des(&buf)?;\n\n        if !dir.segments.remove(&segno) {\n            warn!(\"slru segment {:?}/{} does not exist\", kind, segno);\n        }\n        self.pending_directory_entries.push((\n            DirectoryKind::SlruSegment(kind),\n            MetricsUpdate::Set(dir.segments.len() as u64),\n        ));\n        self.put(\n            dir_key,\n            Value::Image(Bytes::from(SlruSegmentDirectory::ser(&dir)?)),\n        );\n\n        // Delete size entry, as well as all blocks\n        self.delete(slru_segment_key_range(kind, segno));\n\n        Ok(())\n    }\n\n    /// Drop a relmapper file (pg_filenode.map)\n    pub fn drop_relmap_file(&mut self, _spcnode: Oid, _dbnode: Oid) -> Result<(), WalIngestError> {\n        // TODO\n        Ok(())\n    }\n\n    /// This method is used for marking truncated SLRU files\n    pub async fn drop_twophase_file(\n        &mut self,\n        xid: u64,\n        ctx: &RequestContext,\n    ) -> Result<(), WalIngestError> {\n        // Remove it from the directory entry\n        let buf = self.get(TWOPHASEDIR_KEY, ctx).await?;\n        let newdirbuf = if self.tline.pg_version >= PgMajorVersion::PG17 {\n            let mut dir = TwoPhaseDirectoryV17::des(&buf)?;\n\n            if !dir.xids.remove(&xid) {\n                warn!(\"twophase file for xid {} does not exist\", xid);\n            }\n            self.pending_directory_entries.push((\n                DirectoryKind::TwoPhase,\n                MetricsUpdate::Set(dir.xids.len() as u64),\n            ));\n            Bytes::from(TwoPhaseDirectoryV17::ser(&dir)?)\n        } else {\n            let xid: u32 = u32::try_from(xid)\n                .map_err(|e| WalIngestErrorKind::LogicalError(anyhow::Error::from(e)))?;\n            let mut dir = TwoPhaseDirectory::des(&buf)?;\n\n            if !dir.xids.remove(&xid) {\n                warn!(\"twophase file for xid {} does not exist\", xid);\n            }\n            self.pending_directory_entries.push((\n                DirectoryKind::TwoPhase,\n                MetricsUpdate::Set(dir.xids.len() as u64),\n            ));\n            Bytes::from(TwoPhaseDirectory::ser(&dir)?)\n        };\n        self.put(TWOPHASEDIR_KEY, Value::Image(newdirbuf));\n\n        // Delete it\n        self.delete(twophase_key_range(xid));\n\n        Ok(())\n    }\n\n    pub async fn put_file(\n        &mut self,\n        path: &str,\n        content: &[u8],\n        ctx: &RequestContext,\n    ) -> Result<(), WalIngestError> {\n        let key = aux_file::encode_aux_file_key(path);\n        // retrieve the key from the engine\n        let old_val = match self.get(key, ctx).await {\n            Ok(val) => Some(val),\n            Err(PageReconstructError::MissingKey(_)) => None,\n            Err(e) => return Err(e.into()),\n        };\n        let files: Vec<(&str, &[u8])> = if let Some(ref old_val) = old_val {\n            aux_file::decode_file_value(old_val).map_err(WalIngestErrorKind::EncodeAuxFileError)?\n        } else {\n            Vec::new()\n        };\n        let mut other_files = Vec::with_capacity(files.len());\n        let mut modifying_file = None;\n        for file @ (p, content) in files {\n            if path == p {\n                assert!(\n                    modifying_file.is_none(),\n                    \"duplicated entries found for {path}\"\n                );\n                modifying_file = Some(content);\n            } else {\n                other_files.push(file);\n            }\n        }\n        let mut new_files = other_files;\n        match (modifying_file, content.is_empty()) {\n            (Some(old_content), false) => {\n                self.tline\n                    .aux_file_size_estimator\n                    .on_update(old_content.len(), content.len());\n                new_files.push((path, content));\n            }\n            (Some(old_content), true) => {\n                self.tline\n                    .aux_file_size_estimator\n                    .on_remove(old_content.len());\n                // not adding the file key to the final `new_files` vec.\n            }\n            (None, false) => {\n                self.tline.aux_file_size_estimator.on_add(content.len());\n                new_files.push((path, content));\n            }\n            // Compute may request delete of old version of pgstat AUX file if new one exceeds size limit.\n            // Compute doesn't know if previous version of this file exists or not, so\n            // attempt to delete non-existing file can cause this message.\n            // To avoid false alarms, log it as info rather than warning.\n            (None, true) if path.starts_with(\"pg_stat/\") => {\n                info!(\"removing non-existing pg_stat file: {}\", path)\n            }\n            (None, true) => warn!(\"removing non-existing aux file: {}\", path),\n        }\n        let new_val = aux_file::encode_file_value(&new_files)\n            .map_err(WalIngestErrorKind::EncodeAuxFileError)?;\n        self.put(key, Value::Image(new_val.into()));\n\n        Ok(())\n    }\n\n    ///\n    /// Flush changes accumulated so far to the underlying repository.\n    ///\n    /// Usually, changes made in DatadirModification are atomic, but this allows\n    /// you to flush them to the underlying repository before the final `commit`.\n    /// That allows to free up the memory used to hold the pending changes.\n    ///\n    /// Currently only used during bulk import of a data directory. In that\n    /// context, breaking the atomicity is OK. If the import is interrupted, the\n    /// whole import fails and the timeline will be deleted anyway.\n    /// (Or to be precise, it will be left behind for debugging purposes and\n    /// ignored, see <https://github.com/neondatabase/neon/pull/1809>)\n    ///\n    /// Note: A consequence of flushing the pending operations is that they\n    /// won't be visible to subsequent operations until `commit`. The function\n    /// retains all the metadata, but data pages are flushed. That's again OK\n    /// for bulk import, where you are just loading data pages and won't try to\n    /// modify the same pages twice.\n    pub(crate) async fn flush(&mut self, ctx: &RequestContext) -> anyhow::Result<()> {\n        // Unless we have accumulated a decent amount of changes, it's not worth it\n        // to scan through the pending_updates list.\n        let pending_nblocks = self.pending_nblocks;\n        if pending_nblocks < 10000 {\n            return Ok(());\n        }\n\n        let mut writer = self.tline.writer().await;\n\n        // Flush relation and  SLRU data blocks, keep metadata.\n        if let Some(batch) = self.pending_data_batch.take() {\n            tracing::debug!(\n                \"Flushing batch with max_lsn={}. Last record LSN is {}\",\n                batch.max_lsn,\n                self.tline.get_last_record_lsn()\n            );\n\n            // This bails out on first error without modifying pending_updates.\n            // That's Ok, cf this function's doc comment.\n            writer.put_batch(batch, ctx).await?;\n        }\n\n        if pending_nblocks != 0 {\n            writer.update_current_logical_size(pending_nblocks * i64::from(BLCKSZ));\n            self.pending_nblocks = 0;\n        }\n\n        for (kind, count) in std::mem::take(&mut self.pending_directory_entries) {\n            writer.update_directory_entries_count(kind, count);\n        }\n\n        Ok(())\n    }\n\n    ///\n    /// Finish this atomic update, writing all the updated keys to the\n    /// underlying timeline.\n    /// All the modifications in this atomic update are stamped by the specified LSN.\n    ///\n    pub async fn commit(&mut self, ctx: &RequestContext) -> anyhow::Result<()> {\n        let mut writer = self.tline.writer().await;\n\n        let pending_nblocks = self.pending_nblocks;\n        self.pending_nblocks = 0;\n\n        // Ordering: the items in this batch do not need to be in any global order, but values for\n        // a particular Key must be in Lsn order relative to one another.  InMemoryLayer relies on\n        // this to do efficient updates to its index.  See [`wal_decoder::serialized_batch`] for\n        // more details.\n\n        let metadata_batch = {\n            let pending_meta = self\n                .pending_metadata_pages\n                .drain()\n                .flat_map(|(key, values)| {\n                    values\n                        .into_iter()\n                        .map(move |(lsn, value_size, value)| (key, lsn, value_size, value))\n                })\n                .collect::<Vec<_>>();\n\n            if pending_meta.is_empty() {\n                None\n            } else {\n                Some(SerializedValueBatch::from_values(pending_meta))\n            }\n        };\n\n        let data_batch = self.pending_data_batch.take();\n\n        let maybe_batch = match (data_batch, metadata_batch) {\n            (Some(mut data), Some(metadata)) => {\n                data.extend(metadata);\n                Some(data)\n            }\n            (Some(data), None) => Some(data),\n            (None, Some(metadata)) => Some(metadata),\n            (None, None) => None,\n        };\n\n        if let Some(batch) = maybe_batch {\n            tracing::debug!(\n                \"Flushing batch with max_lsn={}. Last record LSN is {}\",\n                batch.max_lsn,\n                self.tline.get_last_record_lsn()\n            );\n\n            // This bails out on first error without modifying pending_updates.\n            // That's Ok, cf this function's doc comment.\n            writer.put_batch(batch, ctx).await?;\n        }\n\n        if !self.pending_deletions.is_empty() {\n            writer.delete_batch(&self.pending_deletions, ctx).await?;\n            self.pending_deletions.clear();\n        }\n\n        self.pending_lsns.push(self.lsn);\n        for pending_lsn in self.pending_lsns.drain(..) {\n            // TODO(vlad): pretty sure the comment below is not valid anymore\n            // and we can call finish write with the latest LSN\n            //\n            // Ideally, we should be able to call writer.finish_write() only once\n            // with the highest LSN. However, the last_record_lsn variable in the\n            // timeline keeps track of the latest LSN and the immediate previous LSN\n            // so we need to record every LSN to not leave a gap between them.\n            writer.finish_write(pending_lsn);\n        }\n\n        if pending_nblocks != 0 {\n            writer.update_current_logical_size(pending_nblocks * i64::from(BLCKSZ));\n        }\n\n        for (kind, count) in std::mem::take(&mut self.pending_directory_entries) {\n            writer.update_directory_entries_count(kind, count);\n        }\n\n        self.pending_metadata_bytes = 0;\n\n        Ok(())\n    }\n\n    pub(crate) fn len(&self) -> usize {\n        self.pending_metadata_pages.len()\n            + self.pending_data_batch.as_ref().map_or(0, |b| b.len())\n            + self.pending_deletions.len()\n    }\n\n    /// Read a page from the Timeline we are writing to.  For metadata pages, this passes through\n    /// a cache in Self, which makes writes earlier in this modification visible to WAL records later\n    /// in the modification.\n    ///\n    /// For data pages, reads pass directly to the owning Timeline: any ingest code which reads a data\n    /// page must ensure that the pages they read are already committed in Timeline, for example\n    /// DB create operations are always preceded by a call to commit().  This is special cased because\n    /// it's rare: all the 'normal' WAL operations will only read metadata pages such as relation sizes,\n    /// and not data pages.\n    async fn get(&self, key: Key, ctx: &RequestContext) -> Result<Bytes, PageReconstructError> {\n        if !Self::is_data_key(&key) {\n            // Have we already updated the same key? Read the latest pending updated\n            // version in that case.\n            //\n            // Note: we don't check pending_deletions. It is an error to request a\n            // value that has been removed, deletion only avoids leaking storage.\n            if let Some(values) = self.pending_metadata_pages.get(&key.to_compact()) {\n                if let Some((_, _, value)) = values.last() {\n                    return if let Value::Image(img) = value {\n                        Ok(img.clone())\n                    } else {\n                        // Currently, we never need to read back a WAL record that we\n                        // inserted in the same \"transaction\". All the metadata updates\n                        // work directly with Images, and we never need to read actual\n                        // data pages. We could handle this if we had to, by calling\n                        // the walredo manager, but let's keep it simple for now.\n                        Err(PageReconstructError::Other(anyhow::anyhow!(\n                            \"unexpected pending WAL record\"\n                        )))\n                    };\n                }\n            }\n        } else {\n            // This is an expensive check, so we only do it in debug mode. If reading a data key,\n            // this key should never be present in pending_data_pages. We ensure this by committing\n            // modifications before ingesting DB create operations, which are the only kind that reads\n            // data pages during ingest.\n            if cfg!(debug_assertions) {\n                assert!(\n                    !self\n                        .pending_data_batch\n                        .as_ref()\n                        .is_some_and(|b| b.updates_key(&key))\n                );\n            }\n        }\n\n        // Metadata page cache miss, or we're reading a data page.\n        let lsn = Lsn::max(self.tline.get_last_record_lsn(), self.lsn);\n        self.tline.get(key, lsn, ctx).await\n    }\n\n    /// Get a key from the sparse keyspace. Automatically converts the missing key error\n    /// and the empty value into None.\n    async fn sparse_get(\n        &self,\n        key: Key,\n        ctx: &RequestContext,\n    ) -> Result<Option<Bytes>, PageReconstructError> {\n        let val = self.get(key, ctx).await;\n        match val {\n            Ok(val) if val.is_empty() => Ok(None),\n            Ok(val) => Ok(Some(val)),\n            Err(PageReconstructError::MissingKey(_)) => Ok(None),\n            Err(e) => Err(e),\n        }\n    }\n\n    #[cfg(test)]\n    pub fn put_for_unit_test(&mut self, key: Key, val: Value) {\n        self.put(key, val);\n    }\n\n    fn put(&mut self, key: Key, val: Value) {\n        if Self::is_data_key(&key) {\n            self.put_data(key.to_compact(), val)\n        } else {\n            self.put_metadata(key.to_compact(), val)\n        }\n    }\n\n    fn put_data(&mut self, key: CompactKey, val: Value) {\n        let batch = self\n            .pending_data_batch\n            .get_or_insert_with(SerializedValueBatch::default);\n        batch.put(key, val, self.lsn);\n    }\n\n    fn put_metadata(&mut self, key: CompactKey, val: Value) {\n        let values = self.pending_metadata_pages.entry(key).or_default();\n        // Replace the previous value if it exists at the same lsn\n        if let Some((last_lsn, last_value_ser_size, last_value)) = values.last_mut() {\n            if *last_lsn == self.lsn {\n                // Update the pending_metadata_bytes contribution from this entry, and update the serialized size in place\n                self.pending_metadata_bytes -= *last_value_ser_size;\n                *last_value_ser_size = val.serialized_size().unwrap() as usize;\n                self.pending_metadata_bytes += *last_value_ser_size;\n\n                // Use the latest value, this replaces any earlier write to the same (key,lsn), such as much\n                // have been generated by synthesized zero page writes prior to the first real write to a page.\n                *last_value = val;\n                return;\n            }\n        }\n\n        let val_serialized_size = val.serialized_size().unwrap() as usize;\n        self.pending_metadata_bytes += val_serialized_size;\n        values.push((self.lsn, val_serialized_size, val));\n\n        if key == CHECKPOINT_KEY.to_compact() {\n            tracing::debug!(\"Checkpoint key added to pending with size {val_serialized_size}\");\n        }\n    }\n\n    fn delete(&mut self, key_range: Range<Key>) {\n        trace!(\"DELETE {}-{}\", key_range.start, key_range.end);\n        self.pending_deletions.push((key_range, self.lsn));\n    }\n}\n\n/// Statistics for a DatadirModification.\n#[derive(Default)]\npub struct DatadirModificationStats {\n    pub metadata_images: u64,\n    pub metadata_deltas: u64,\n    pub data_images: u64,\n    pub data_deltas: u64,\n}\n\n/// This struct facilitates accessing either a committed key from the timeline at a\n/// specific LSN, or the latest uncommitted key from a pending modification.\n///\n/// During WAL ingestion, the records from multiple LSNs may be batched in the same\n/// modification before being flushed to the timeline. Hence, the routines in WalIngest\n/// need to look up the keys in the modification first before looking them up in the\n/// timeline to not miss the latest updates.\n#[derive(Clone, Copy)]\npub enum Version<'a> {\n    LsnRange(LsnRange),\n    Modified(&'a DatadirModification<'a>),\n}\n\nimpl Version<'_> {\n    async fn get(\n        &self,\n        timeline: &Timeline,\n        key: Key,\n        ctx: &RequestContext,\n    ) -> Result<Bytes, PageReconstructError> {\n        match self {\n            Version::LsnRange(lsns) => timeline.get(key, lsns.effective_lsn, ctx).await,\n            Version::Modified(modification) => modification.get(key, ctx).await,\n        }\n    }\n\n    /// Get a key from the sparse keyspace. Automatically converts the missing key error\n    /// and the empty value into None.\n    async fn sparse_get(\n        &self,\n        timeline: &Timeline,\n        key: Key,\n        ctx: &RequestContext,\n    ) -> Result<Option<Bytes>, PageReconstructError> {\n        let val = self.get(timeline, key, ctx).await;\n        match val {\n            Ok(val) if val.is_empty() => Ok(None),\n            Ok(val) => Ok(Some(val)),\n            Err(PageReconstructError::MissingKey(_)) => Ok(None),\n            Err(e) => Err(e),\n        }\n    }\n\n    pub fn is_latest(&self) -> bool {\n        match self {\n            Version::LsnRange(lsns) => lsns.is_latest(),\n            Version::Modified(_) => true,\n        }\n    }\n\n    pub fn get_lsn(&self) -> Lsn {\n        match self {\n            Version::LsnRange(lsns) => lsns.effective_lsn,\n            Version::Modified(modification) => modification.lsn,\n        }\n    }\n\n    pub fn at(lsn: Lsn) -> Self {\n        Version::LsnRange(LsnRange {\n            effective_lsn: lsn,\n            request_lsn: lsn,\n        })\n    }\n}\n\n//--- Metadata structs stored in key-value pairs in the repository.\n\n#[derive(Debug, Serialize, Deserialize)]\npub(crate) struct DbDirectory {\n    // (spcnode, dbnode) -> (do relmapper and PG_VERSION files exist)\n    pub(crate) dbdirs: HashMap<(Oid, Oid), bool>,\n}\n\n// The format of TwoPhaseDirectory changed in PostgreSQL v17, because the filenames of\n// pg_twophase files was expanded from 32-bit XIDs to 64-bit XIDs.  Previously, the files\n// were named like \"pg_twophase/000002E5\", now they're like\n// \"pg_twophsae/0000000A000002E4\".\n\n#[derive(Debug, Serialize, Deserialize)]\npub(crate) struct TwoPhaseDirectory {\n    pub(crate) xids: HashSet<TransactionId>,\n}\n\n#[derive(Debug, Serialize, Deserialize)]\nstruct TwoPhaseDirectoryV17 {\n    xids: HashSet<u64>,\n}\n\n#[derive(Debug, Serialize, Deserialize, Default)]\npub(crate) struct RelDirectory {\n    // Set of relations that exist. (relfilenode, forknum)\n    //\n    // TODO: Store it as a btree or radix tree or something else that spans multiple\n    // key-value pairs, if you have a lot of relations\n    pub(crate) rels: HashSet<(Oid, u8)>,\n}\n\n#[derive(Debug, Serialize, Deserialize)]\nstruct RelSizeEntry {\n    nblocks: u32,\n}\n\n#[derive(Debug, Serialize, Deserialize, Default)]\npub(crate) struct SlruSegmentDirectory {\n    // Set of SLRU segments that exist.\n    pub(crate) segments: HashSet<u32>,\n}\n\n#[derive(Copy, Clone, PartialEq, Eq, Debug, enum_map::Enum)]\n#[repr(u8)]\npub(crate) enum DirectoryKind {\n    Db,\n    TwoPhase,\n    Rel,\n    AuxFiles,\n    SlruSegment(SlruKind),\n    RelV2,\n}\n\nimpl DirectoryKind {\n    pub(crate) const KINDS_NUM: usize = <DirectoryKind as Enum>::LENGTH;\n    pub(crate) fn offset(&self) -> usize {\n        self.into_usize()\n    }\n}\n\nstatic ZERO_PAGE: Bytes = Bytes::from_static(&[0u8; BLCKSZ as usize]);\n\n#[allow(clippy::bool_assert_comparison)]\n#[cfg(test)]\nmod tests {\n    use hex_literal::hex;\n    use pageserver_api::models::ShardParameters;\n    use utils::id::TimelineId;\n    use utils::shard::{ShardCount, ShardNumber, ShardStripeSize};\n\n    use super::*;\n    use crate::DEFAULT_PG_VERSION;\n    use crate::tenant::harness::TenantHarness;\n\n    /// Test a round trip of aux file updates, from DatadirModification to reading back from the Timeline\n    #[tokio::test]\n    async fn aux_files_round_trip() -> anyhow::Result<()> {\n        let name = \"aux_files_round_trip\";\n        let harness = TenantHarness::create(name).await?;\n\n        pub const TIMELINE_ID: TimelineId =\n            TimelineId::from_array(hex!(\"11223344556677881122334455667788\"));\n\n        let (tenant, ctx) = harness.load().await;\n        let (tline, ctx) = tenant\n            .create_empty_timeline(TIMELINE_ID, Lsn(0x10), DEFAULT_PG_VERSION, &ctx)\n            .await?;\n        let tline = tline.raw_timeline().unwrap();\n\n        // First modification: insert two keys\n        let mut modification = tline.begin_modification(Lsn(0x1000));\n        modification.put_file(\"foo/bar1\", b\"content1\", &ctx).await?;\n        modification.set_lsn(Lsn(0x1008))?;\n        modification.put_file(\"foo/bar2\", b\"content2\", &ctx).await?;\n        modification.commit(&ctx).await?;\n        let expect_1008 = HashMap::from([\n            (\"foo/bar1\".to_string(), Bytes::from_static(b\"content1\")),\n            (\"foo/bar2\".to_string(), Bytes::from_static(b\"content2\")),\n        ]);\n\n        let io_concurrency = IoConcurrency::spawn_for_test();\n\n        let readback = tline\n            .list_aux_files(Lsn(0x1008), &ctx, io_concurrency.clone())\n            .await?;\n        assert_eq!(readback, expect_1008);\n\n        // Second modification: update one key, remove the other\n        let mut modification = tline.begin_modification(Lsn(0x2000));\n        modification.put_file(\"foo/bar1\", b\"content3\", &ctx).await?;\n        modification.set_lsn(Lsn(0x2008))?;\n        modification.put_file(\"foo/bar2\", b\"\", &ctx).await?;\n        modification.commit(&ctx).await?;\n        let expect_2008 =\n            HashMap::from([(\"foo/bar1\".to_string(), Bytes::from_static(b\"content3\"))]);\n\n        let readback = tline\n            .list_aux_files(Lsn(0x2008), &ctx, io_concurrency.clone())\n            .await?;\n        assert_eq!(readback, expect_2008);\n\n        // Reading back in time works\n        let readback = tline\n            .list_aux_files(Lsn(0x1008), &ctx, io_concurrency.clone())\n            .await?;\n        assert_eq!(readback, expect_1008);\n\n        Ok(())\n    }\n\n    #[test]\n    fn gap_finding() {\n        let rel = RelTag {\n            spcnode: 1663,\n            dbnode: 208101,\n            relnode: 2620,\n            forknum: 0,\n        };\n        let base_blkno = 1;\n\n        let base_key = rel_block_to_key(rel, base_blkno);\n        let before_base_key = rel_block_to_key(rel, base_blkno - 1);\n\n        let shard = ShardIdentity::unsharded();\n\n        let mut previous_nblocks = 0;\n        for i in 0..10 {\n            let crnt_blkno = base_blkno + i;\n            let gaps = DatadirModification::find_gaps(rel, crnt_blkno, previous_nblocks, &shard);\n\n            previous_nblocks = crnt_blkno + 1;\n\n            if i == 0 {\n                // The first block we write is 1, so we should find the gap.\n                assert_eq!(gaps.unwrap(), KeySpace::single(before_base_key..base_key));\n            } else {\n                assert!(gaps.is_none());\n            }\n        }\n\n        // This is an update to an already existing block. No gaps here.\n        let update_blkno = 5;\n        let gaps = DatadirModification::find_gaps(rel, update_blkno, previous_nblocks, &shard);\n        assert!(gaps.is_none());\n\n        // This is an update past the current end block.\n        let after_gap_blkno = 20;\n        let gaps = DatadirModification::find_gaps(rel, after_gap_blkno, previous_nblocks, &shard);\n\n        let gap_start_key = rel_block_to_key(rel, previous_nblocks);\n        let after_gap_key = rel_block_to_key(rel, after_gap_blkno);\n        assert_eq!(\n            gaps.unwrap(),\n            KeySpace::single(gap_start_key..after_gap_key)\n        );\n    }\n\n    #[test]\n    fn sharded_gap_finding() {\n        let rel = RelTag {\n            spcnode: 1663,\n            dbnode: 208101,\n            relnode: 2620,\n            forknum: 0,\n        };\n\n        let first_blkno = 6;\n\n        // This shard will get the even blocks\n        let shard = ShardIdentity::from_params(\n            ShardNumber(0),\n            ShardParameters {\n                count: ShardCount(2),\n                stripe_size: ShardStripeSize(1),\n            },\n        );\n\n        // Only keys belonging to this shard are considered as gaps.\n        let mut previous_nblocks = 0;\n        let gaps =\n            DatadirModification::find_gaps(rel, first_blkno, previous_nblocks, &shard).unwrap();\n        assert!(!gaps.ranges.is_empty());\n        for gap_range in gaps.ranges {\n            let mut k = gap_range.start;\n            while k != gap_range.end {\n                assert_eq!(shard.get_shard_number(&k), shard.number);\n                k = k.next();\n            }\n        }\n\n        previous_nblocks = first_blkno;\n\n        let update_blkno = 2;\n        let gaps = DatadirModification::find_gaps(rel, update_blkno, previous_nblocks, &shard);\n        assert!(gaps.is_none());\n    }\n\n    /*\n        fn assert_current_logical_size<R: Repository>(timeline: &DatadirTimeline<R>, lsn: Lsn) {\n            let incremental = timeline.get_current_logical_size();\n            let non_incremental = timeline\n                .get_current_logical_size_non_incremental(lsn)\n                .unwrap();\n            assert_eq!(incremental, non_incremental);\n        }\n    */\n\n    /*\n    ///\n    /// Test list_rels() function, with branches and dropped relations\n    ///\n    #[test]\n    fn test_list_rels_drop() -> Result<()> {\n        let repo = RepoHarness::create(\"test_list_rels_drop\")?.load();\n        let tline = create_empty_timeline(repo, TIMELINE_ID)?;\n        const TESTDB: u32 = 111;\n\n        // Import initial dummy checkpoint record, otherwise the get_timeline() call\n        // after branching fails below\n        let mut writer = tline.begin_record(Lsn(0x10));\n        writer.put_checkpoint(ZERO_CHECKPOINT.clone())?;\n        writer.finish()?;\n\n        // Create a relation on the timeline\n        let mut writer = tline.begin_record(Lsn(0x20));\n        writer.put_rel_page_image(TESTREL_A, 0, TEST_IMG(\"foo blk 0 at 2\"))?;\n        writer.finish()?;\n\n        let writer = tline.begin_record(Lsn(0x00));\n        writer.finish()?;\n\n        // Check that list_rels() lists it after LSN 2, but no before it\n        assert!(!tline.list_rels(0, TESTDB, Lsn(0x10))?.contains(&TESTREL_A));\n        assert!(tline.list_rels(0, TESTDB, Lsn(0x20))?.contains(&TESTREL_A));\n        assert!(tline.list_rels(0, TESTDB, Lsn(0x30))?.contains(&TESTREL_A));\n\n        // Create a branch, check that the relation is visible there\n        repo.branch_timeline(&tline, NEW_TIMELINE_ID, Lsn(0x30))?;\n        let newtline = match repo.get_timeline(NEW_TIMELINE_ID)?.local_timeline() {\n            Some(timeline) => timeline,\n            None => panic!(\"Should have a local timeline\"),\n        };\n        let newtline = DatadirTimelineImpl::new(newtline);\n        assert!(newtline\n            .list_rels(0, TESTDB, Lsn(0x30))?\n            .contains(&TESTREL_A));\n\n        // Drop it on the branch\n        let mut new_writer = newtline.begin_record(Lsn(0x40));\n        new_writer.drop_relation(TESTREL_A)?;\n        new_writer.finish()?;\n\n        // Check that it's no longer listed on the branch after the point where it was dropped\n        assert!(newtline\n            .list_rels(0, TESTDB, Lsn(0x30))?\n            .contains(&TESTREL_A));\n        assert!(!newtline\n            .list_rels(0, TESTDB, Lsn(0x40))?\n            .contains(&TESTREL_A));\n\n        // Run checkpoint and garbage collection and check that it's still not visible\n        newtline.checkpoint(CheckpointConfig::Forced)?;\n        repo.gc_iteration(Some(NEW_TIMELINE_ID), 0, true)?;\n\n        assert!(!newtline\n            .list_rels(0, TESTDB, Lsn(0x40))?\n            .contains(&TESTREL_A));\n\n        Ok(())\n    }\n     */\n\n    /*\n    #[test]\n    fn test_read_beyond_eof() -> Result<()> {\n        let repo = RepoHarness::create(\"test_read_beyond_eof\")?.load();\n        let tline = create_test_timeline(repo, TIMELINE_ID)?;\n\n        make_some_layers(&tline, Lsn(0x20))?;\n        let mut writer = tline.begin_record(Lsn(0x60));\n        walingest.put_rel_page_image(\n            &mut writer,\n            TESTREL_A,\n            0,\n            TEST_IMG(&format!(\"foo blk 0 at {}\", Lsn(0x60))),\n        )?;\n        writer.finish()?;\n\n        // Test read before rel creation. Should error out.\n        assert!(tline.get_rel_page_at_lsn(TESTREL_A, 1, Lsn(0x10), false).is_err());\n\n        // Read block beyond end of relation at different points in time.\n        // These reads should fall into different delta, image, and in-memory layers.\n        assert_eq!(tline.get_rel_page_at_lsn(TESTREL_A, 1, Lsn(0x20), false)?, ZERO_PAGE);\n        assert_eq!(tline.get_rel_page_at_lsn(TESTREL_A, 1, Lsn(0x25), false)?, ZERO_PAGE);\n        assert_eq!(tline.get_rel_page_at_lsn(TESTREL_A, 1, Lsn(0x30), false)?, ZERO_PAGE);\n        assert_eq!(tline.get_rel_page_at_lsn(TESTREL_A, 1, Lsn(0x35), false)?, ZERO_PAGE);\n        assert_eq!(tline.get_rel_page_at_lsn(TESTREL_A, 1, Lsn(0x40), false)?, ZERO_PAGE);\n        assert_eq!(tline.get_rel_page_at_lsn(TESTREL_A, 1, Lsn(0x45), false)?, ZERO_PAGE);\n        assert_eq!(tline.get_rel_page_at_lsn(TESTREL_A, 1, Lsn(0x50), false)?, ZERO_PAGE);\n        assert_eq!(tline.get_rel_page_at_lsn(TESTREL_A, 1, Lsn(0x55), false)?, ZERO_PAGE);\n        assert_eq!(tline.get_rel_page_at_lsn(TESTREL_A, 1, Lsn(0x60), false)?, ZERO_PAGE);\n\n        // Test on an in-memory layer with no preceding layer\n        let mut writer = tline.begin_record(Lsn(0x70));\n        walingest.put_rel_page_image(\n            &mut writer,\n            TESTREL_B,\n            0,\n            TEST_IMG(&format!(\"foo blk 0 at {}\", Lsn(0x70))),\n        )?;\n        writer.finish()?;\n\n        assert_eq!(tline.get_rel_page_at_lsn(TESTREL_B, 1, Lsn(0x70), false)?6, ZERO_PAGE);\n\n        Ok(())\n    }\n     */\n}\n"
  },
  {
    "path": "pageserver/src/span.rs",
    "content": "use utils::tracing_span_assert::check_fields_present;\n\nmod extractors {\n    use utils::tracing_span_assert::ConstExtractor;\n\n    pub(super) const TENANT_ID: ConstExtractor = ConstExtractor::new(\"tenant_id\");\n    pub(super) const SHARD_ID: ConstExtractor = ConstExtractor::new(\"shard_id\");\n    pub(super) const TIMELINE_ID: ConstExtractor = ConstExtractor::new(\"timeline_id\");\n}\n\n#[track_caller]\npub(crate) fn debug_assert_current_span_has_tenant_id() {\n    if cfg!(debug_assertions) {\n        if let Err(missing) = check_fields_present!([&extractors::TENANT_ID, &extractors::SHARD_ID])\n        {\n            panic!(\"missing extractors: {missing:?}\")\n        }\n    }\n}\n\n#[track_caller]\npub(crate) fn debug_assert_current_span_has_tenant_and_timeline_id() {\n    if cfg!(debug_assertions) {\n        if let Err(missing) = check_fields_present!([\n            &extractors::TENANT_ID,\n            &extractors::SHARD_ID,\n            &extractors::TIMELINE_ID,\n        ]) {\n            panic!(\"missing extractors: {missing:?}\")\n        }\n    }\n}\n\n#[track_caller]\npub(crate) fn debug_assert_current_span_has_tenant_and_timeline_id_no_shard_id() {\n    if cfg!(debug_assertions) {\n        if let Err(missing) =\n            check_fields_present!([&extractors::TENANT_ID, &extractors::TIMELINE_ID,])\n        {\n            panic!(\"missing extractors: {missing:?}\")\n        }\n    }\n}\n"
  },
  {
    "path": "pageserver/src/statvfs.rs",
    "content": "//! Wrapper around nix::sys::statvfs::Statvfs that allows for mocking.\n\nuse camino::Utf8Path;\n\npub enum Statvfs {\n    Real(nix::sys::statvfs::Statvfs),\n    Mock(mock::Statvfs),\n}\n\n// NB: on macOS, the block count type of struct statvfs is u32.\n// The workaround seems to be to use the non-standard statfs64 call.\n// Sincce it should only be a problem on > 2TiB disks, let's ignore\n// the problem for now and upcast to u64.\nimpl Statvfs {\n    pub fn get(tenants_dir: &Utf8Path, mocked: Option<&mock::Behavior>) -> nix::Result<Self> {\n        if let Some(mocked) = mocked {\n            Ok(Statvfs::Mock(mock::get(tenants_dir, mocked)?))\n        } else {\n            Ok(Statvfs::Real(nix::sys::statvfs::statvfs(\n                tenants_dir.as_std_path(),\n            )?))\n        }\n    }\n\n    // NB: allow() because the block count type is u32 on macOS.\n    #[allow(clippy::useless_conversion, clippy::unnecessary_fallible_conversions)]\n    pub fn blocks(&self) -> u64 {\n        match self {\n            Statvfs::Real(stat) => u64::try_from(stat.blocks()).unwrap(),\n            Statvfs::Mock(stat) => stat.blocks,\n        }\n    }\n\n    // NB: allow() because the block count type is u32 on macOS.\n    #[allow(clippy::useless_conversion, clippy::unnecessary_fallible_conversions)]\n    pub fn blocks_available(&self) -> u64 {\n        match self {\n            Statvfs::Real(stat) => u64::try_from(stat.blocks_available()).unwrap(),\n            Statvfs::Mock(stat) => stat.blocks_available,\n        }\n    }\n\n    pub fn fragment_size(&self) -> u64 {\n        match self {\n            Statvfs::Real(stat) => stat.fragment_size(),\n            Statvfs::Mock(stat) => stat.fragment_size,\n        }\n    }\n\n    pub fn block_size(&self) -> u64 {\n        match self {\n            Statvfs::Real(stat) => stat.block_size(),\n            Statvfs::Mock(stat) => stat.block_size,\n        }\n    }\n\n    /// Get the available and total bytes on the filesystem.\n    pub fn get_avail_total_bytes(&self) -> (u64, u64) {\n        // https://unix.stackexchange.com/a/703650\n        let blocksize = if self.fragment_size() > 0 {\n            self.fragment_size()\n        } else {\n            self.block_size()\n        };\n\n        // use blocks_available (b_avail) since, pageserver runs as unprivileged user\n        let avail_bytes = self.blocks_available() * blocksize;\n        let total_bytes = self.blocks() * blocksize;\n\n        (avail_bytes, total_bytes)\n    }\n}\n\npub mod mock {\n    use camino::Utf8Path;\n    pub use pageserver_api::config::statvfs::mock::Behavior;\n    use regex::Regex;\n    use tracing::log::info;\n\n    pub fn get(tenants_dir: &Utf8Path, behavior: &Behavior) -> nix::Result<Statvfs> {\n        info!(\"running mocked statvfs\");\n\n        match behavior {\n            Behavior::Success {\n                blocksize,\n                total_blocks,\n                name_filter,\n            } => {\n                let used_bytes = walk_dir_disk_usage(tenants_dir, name_filter.as_deref()).unwrap();\n\n                // round it up to the nearest block multiple\n                let used_blocks = used_bytes.div_ceil(*blocksize);\n\n                if used_blocks > *total_blocks {\n                    panic!(\n                        \"mocking error: used_blocks > total_blocks: {used_blocks} > {total_blocks}\"\n                    );\n                }\n\n                let avail_blocks = total_blocks - used_blocks;\n\n                Ok(Statvfs {\n                    blocks: *total_blocks,\n                    blocks_available: avail_blocks,\n                    fragment_size: *blocksize,\n                    block_size: *blocksize,\n                })\n            }\n            #[cfg(feature = \"testing\")]\n            Behavior::Failure { mocked_error } => Err((*mocked_error).into()),\n        }\n    }\n\n    fn walk_dir_disk_usage(path: &Utf8Path, name_filter: Option<&Regex>) -> anyhow::Result<u64> {\n        let mut total = 0;\n        for entry in walkdir::WalkDir::new(path) {\n            let entry = entry?;\n            if !entry.file_type().is_file() {\n                continue;\n            }\n            if !name_filter\n                .as_ref()\n                .map(|filter| filter.is_match(entry.file_name().to_str().unwrap()))\n                .unwrap_or(true)\n            {\n                continue;\n            }\n            let m = match entry.metadata() {\n                Ok(m) => m,\n                Err(e) if is_not_found(&e) => {\n                    // some temp file which got removed right as we are walking\n                    continue;\n                }\n                Err(e) => {\n                    return Err(anyhow::Error::new(e)\n                        .context(format!(\"get metadata of {:?}\", entry.path())));\n                }\n            };\n            total += m.len();\n        }\n        Ok(total)\n    }\n\n    fn is_not_found(e: &walkdir::Error) -> bool {\n        let Some(io_error) = e.io_error() else {\n            return false;\n        };\n        let kind = io_error.kind();\n        matches!(kind, std::io::ErrorKind::NotFound)\n    }\n\n    pub struct Statvfs {\n        pub blocks: u64,\n        pub blocks_available: u64,\n        pub fragment_size: u64,\n        pub block_size: u64,\n    }\n}\n"
  },
  {
    "path": "pageserver/src/task_mgr.rs",
    "content": "//!\n//! This module provides centralized handling of tokio tasks in the Page Server.\n//!\n//! We provide a few basic facilities:\n//! - A global registry of tasks that lists what kind of tasks they are, and\n//!   which tenant or timeline they are working on\n//!\n//! - The ability to request a task to shut down.\n//!\n//!\n//! # How it works?\n//!\n//! There is a global hashmap of all the tasks (`TASKS`). Whenever a new\n//! task is spawned, a PageServerTask entry is added there, and when a\n//! task dies, it removes itself from the hashmap. If you want to kill a\n//! task, you can scan the hashmap to find it.\n//!\n//! # Task shutdown\n//!\n//! To kill a task, we rely on co-operation from the victim. Each task is\n//! expected to periodically call the `is_shutdown_requested()` function, and\n//! if it returns true, exit gracefully. In addition to that, when waiting for\n//! the network or other long-running operation, you can use\n//! `shutdown_watcher()` function to get a Future that will become ready if\n//! the current task has been requested to shut down. You can use that with\n//! Tokio select!().\n//!\n//! TODO: This would be a good place to also handle panics in a somewhat sane way.\n//! Depending on what task panics, we might want to kill the whole server, or\n//! only a single tenant or timeline.\n//!\n\nuse std::collections::HashMap;\nuse std::fmt;\nuse std::future::Future;\nuse std::num::NonZeroUsize;\nuse std::panic::AssertUnwindSafe;\nuse std::str::FromStr;\nuse std::sync::atomic::{AtomicU64, Ordering};\nuse std::sync::{Arc, Mutex};\nuse std::time::Duration;\n\nuse futures::FutureExt;\nuse once_cell::sync::Lazy;\nuse pageserver_api::shard::TenantShardId;\nuse tokio::task::JoinHandle;\nuse tokio::task_local;\nuse tokio_util::sync::CancellationToken;\nuse tracing::{debug, error, info, warn};\nuse utils::env;\nuse utils::id::TimelineId;\n\nuse crate::metrics::set_tokio_runtime_setup;\n\n//\n// There are four runtimes:\n//\n// Compute request runtime\n//  - used to handle connections from compute nodes. Any tasks related to satisfying\n//    GetPage requests, base backups, import, and other such compute node operations\n//    are handled by the Compute request runtime\n//  - page_service.rs\n//  - this includes layer downloads from remote storage, if a layer is needed to\n//    satisfy a GetPage request\n//\n// Management request runtime\n//  - used to handle HTTP API requests\n//\n// WAL receiver runtime:\n//  - used to handle WAL receiver connections.\n//  - and to receiver updates from storage_broker\n//\n// Background runtime\n//  - layer flushing\n//  - garbage collection\n//  - compaction\n//  - remote storage uploads\n//  - initial tenant loading\n//\n// Everything runs in a tokio task. If you spawn new tasks, spawn it using the correct\n// runtime.\n//\n// There might be situations when one task needs to wait for a task running in another\n// Runtime to finish. For example, if a background operation needs a layer from remote\n// storage, it will start to download it. If a background operation needs a remote layer,\n// and the download was already initiated by a GetPage request, the background task\n// will wait for the download - running in the Page server runtime - to finish.\n// Another example: the initial tenant loading tasks are launched in the background ops\n// runtime. If a GetPage request comes in before the load of a tenant has finished, the\n// GetPage request will wait for the tenant load to finish.\n//\n// The core Timeline code is synchronous, and uses a bunch of std Mutexes and RWLocks to\n// protect data structures. Let's keep it that way. Synchronous code is easier to debug\n// and analyze, and there's a lot of hairy, low-level, performance critical code there.\n//\n// It's nice to have different runtimes, so that you can quickly eyeball how much CPU\n// time each class of operations is taking, with 'top -H' or similar.\n//\n// It's also good to avoid hogging all threads that would be needed to process\n// other operations, if the upload tasks e.g. get blocked on locks. It shouldn't\n// happen, but still.\n//\n\npub(crate) static TOKIO_WORKER_THREADS: Lazy<NonZeroUsize> = Lazy::new(|| {\n    // replicates tokio-1.28.1::loom::sys::num_cpus which is not available publicly\n    // tokio would had already panicked for parsing errors or NotUnicode\n    //\n    // this will be wrong if any of the runtimes gets their worker threads configured to something\n    // else, but that has not been needed in a long time.\n    NonZeroUsize::new(\n        std::env::var(\"TOKIO_WORKER_THREADS\")\n            .map(|s| s.parse::<usize>().unwrap())\n            .unwrap_or_else(|_e| usize::max(2, num_cpus::get())),\n    )\n    .expect(\"the max() ensures that this is not zero\")\n});\n\nenum TokioRuntimeMode {\n    SingleThreaded,\n    MultiThreaded { num_workers: NonZeroUsize },\n}\n\nimpl FromStr for TokioRuntimeMode {\n    type Err = String;\n\n    fn from_str(s: &str) -> Result<Self, Self::Err> {\n        match s {\n            \"current_thread\" => Ok(TokioRuntimeMode::SingleThreaded),\n            s => match s.strip_prefix(\"multi_thread:\") {\n                Some(\"default\") => Ok(TokioRuntimeMode::MultiThreaded {\n                    num_workers: *TOKIO_WORKER_THREADS,\n                }),\n                Some(suffix) => {\n                    let num_workers = suffix.parse::<NonZeroUsize>().map_err(|e| {\n                        format!(\n                            \"invalid number of multi-threaded runtime workers ({suffix:?}): {e}\",\n                        )\n                    })?;\n                    Ok(TokioRuntimeMode::MultiThreaded { num_workers })\n                }\n                None => Err(format!(\"invalid runtime config: {s:?}\")),\n            },\n        }\n    }\n}\n\nstatic TOKIO_THREAD_STACK_SIZE: Lazy<NonZeroUsize> = Lazy::new(|| {\n    env::var(\"NEON_PAGESERVER_TOKIO_THREAD_STACK_SIZE\")\n        // the default 2MiB are insufficent, especially in debug mode\n        .unwrap_or_else(|| NonZeroUsize::new(4 * 1024 * 1024).unwrap())\n});\n\nstatic ONE_RUNTIME: Lazy<Option<tokio::runtime::Runtime>> = Lazy::new(|| {\n    let thread_name = \"pageserver-tokio\";\n    let Some(mode) = env::var(\"NEON_PAGESERVER_USE_ONE_RUNTIME\") else {\n        // If the env var is not set, leave this static as None.\n        set_tokio_runtime_setup(\n            \"multiple-runtimes\",\n            NUM_MULTIPLE_RUNTIMES\n                .checked_mul(*TOKIO_WORKER_THREADS)\n                .unwrap(),\n        );\n        return None;\n    };\n    Some(match mode {\n        TokioRuntimeMode::SingleThreaded => {\n            set_tokio_runtime_setup(\"one-runtime-single-threaded\", NonZeroUsize::new(1).unwrap());\n            tokio::runtime::Builder::new_current_thread()\n                .thread_name(thread_name)\n                .enable_all()\n                .thread_stack_size(TOKIO_THREAD_STACK_SIZE.get())\n                .build()\n                .expect(\"failed to create one single runtime\")\n        }\n        TokioRuntimeMode::MultiThreaded { num_workers } => {\n            set_tokio_runtime_setup(\"one-runtime-multi-threaded\", num_workers);\n            tokio::runtime::Builder::new_multi_thread()\n                .thread_name(thread_name)\n                .enable_all()\n                .worker_threads(num_workers.get())\n                .thread_stack_size(TOKIO_THREAD_STACK_SIZE.get())\n                .build()\n                .expect(\"failed to create one multi-threaded runtime\")\n        }\n    })\n});\n\n/// Declare a lazy static variable named `$varname` that will resolve\n/// to a tokio runtime handle. If the env var `NEON_PAGESERVER_USE_ONE_RUNTIME`\n/// is set, this will resolve to `ONE_RUNTIME`. Otherwise, the macro invocation\n/// declares a separate runtime and the lazy static variable `$varname`\n/// will resolve to that separate runtime.\n///\n/// The result is is that `$varname.spawn()` will use `ONE_RUNTIME` if\n/// `NEON_PAGESERVER_USE_ONE_RUNTIME` is set, and will use the separate runtime\n/// otherwise.\nmacro_rules! pageserver_runtime {\n    ($varname:ident, $name:literal) => {\n        pub static $varname: Lazy<&'static tokio::runtime::Runtime> = Lazy::new(|| {\n            if let Some(runtime) = &*ONE_RUNTIME {\n                return runtime;\n            }\n            static RUNTIME: Lazy<tokio::runtime::Runtime> = Lazy::new(|| {\n                tokio::runtime::Builder::new_multi_thread()\n                    .thread_name($name)\n                    .worker_threads(TOKIO_WORKER_THREADS.get())\n                    .enable_all()\n                    .thread_stack_size(TOKIO_THREAD_STACK_SIZE.get())\n                    .build()\n                    .expect(std::concat!(\"Failed to create runtime \", $name))\n            });\n            &*RUNTIME\n        });\n    };\n}\n\npageserver_runtime!(COMPUTE_REQUEST_RUNTIME, \"compute request worker\");\npageserver_runtime!(MGMT_REQUEST_RUNTIME, \"mgmt request worker\");\npageserver_runtime!(WALRECEIVER_RUNTIME, \"walreceiver worker\");\npageserver_runtime!(BACKGROUND_RUNTIME, \"background op worker\");\n// Bump this number when adding a new pageserver_runtime!\nconst NUM_MULTIPLE_RUNTIMES: NonZeroUsize = NonZeroUsize::new(4).unwrap();\n\n#[derive(Debug, Clone, Copy)]\npub struct PageserverTaskId(u64);\n\nimpl fmt::Display for PageserverTaskId {\n    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {\n        self.0.fmt(f)\n    }\n}\n\n/// Each task that we track is associated with a \"task ID\". It's just an\n/// increasing number that we assign. Note that it is different from tokio::task::Id.\nstatic NEXT_TASK_ID: AtomicU64 = AtomicU64::new(1);\n\n/// Global registry of tasks\nstatic TASKS: Lazy<Mutex<HashMap<u64, Arc<PageServerTask>>>> =\n    Lazy::new(|| Mutex::new(HashMap::new()));\n\ntask_local! {\n    // This is a cancellation token which will be cancelled when a task needs to shut down. The\n    // root token is kept in the global registry, so that anyone can send the signal to request\n    // task shutdown.\n    static SHUTDOWN_TOKEN: CancellationToken;\n\n    // Each task holds reference to its own PageServerTask here.\n    static CURRENT_TASK: Arc<PageServerTask>;\n}\n\n///\n/// There are many kinds of tasks in the system. Some are associated with a particular\n/// tenant or timeline, while others are global.\n///\n/// Note that we don't try to limit how many task of a certain kind can be running\n/// at the same time.\n///\n#[derive(\n    Debug,\n    // NB: enumset::EnumSetType derives PartialEq, Eq, Clone, Copy\n    enumset::EnumSetType,\n    enum_map::Enum,\n    serde::Serialize,\n    serde::Deserialize,\n    strum_macros::IntoStaticStr,\n    strum_macros::EnumString,\n)]\npub enum TaskKind {\n    // Pageserver startup, i.e., `main`\n    Startup,\n\n    // libpq listener task. It just accepts connection and spawns a\n    // PageRequestHandler task for each connection.\n    LibpqEndpointListener,\n\n    // HTTP endpoint listener.\n    HttpEndpointListener,\n\n    /// Task that handles a single page service connection. A PageRequestHandler\n    /// task starts detached from any particular tenant or timeline, but it can\n    /// be associated with one later, after receiving a command from the client.\n    /// Also used for the gRPC page service API, including the main server task.\n    PageRequestHandler,\n\n    /// Manages the WAL receiver connection for one timeline.\n    /// It subscribes to events from storage_broker and decides which safekeeper to connect to.\n    /// Once the decision has been made, it establishes the connection using the `tokio-postgres` library.\n    /// There is at most one connection at any given time.\n    ///\n    /// That `tokio-postgres` library represents a connection as two objects: a `Client` and a `Connection`.\n    /// The `Client` object is what library users use to make requests & get responses.\n    /// Internally, `Client` hands over requests to the `Connection` object.\n    /// The `Connection` object is responsible for speaking the wire protocol.\n    ///\n    /// Walreceiver uses a legacy abstraction called `TaskHandle` to represent the activity of establishing and handling a connection.\n    /// The `WalReceiverManager` task ensures that this `TaskHandle` task does not outlive the `WalReceiverManager` task.\n    /// For the `RequestContext` that we hand to the TaskHandle, we use the [`WalReceiverConnectionHandler`] task kind.\n    ///\n    /// Once the connection is established, the `TaskHandle` task spawns a\n    /// [`WalReceiverConnectionPoller`] task that is responsible for polling\n    /// the `Connection` object.\n    /// A `CancellationToken` created by the `TaskHandle` task ensures\n    /// that the [`WalReceiverConnectionPoller`] task will cancel soon after as the `TaskHandle` is dropped.\n    ///\n    /// [`WalReceiverConnectionHandler`]: Self::WalReceiverConnectionHandler\n    /// [`WalReceiverConnectionPoller`]: Self::WalReceiverConnectionPoller\n    WalReceiverManager,\n\n    /// The `TaskHandle` task that executes `handle_walreceiver_connection`.\n    /// See the comment on [`WalReceiverManager`].\n    ///\n    /// [`WalReceiverManager`]: Self::WalReceiverManager\n    WalReceiverConnectionHandler,\n\n    /// The task that polls the `tokio-postgres::Connection` object.\n    /// Spawned by task [`WalReceiverConnectionHandler`](Self::WalReceiverConnectionHandler).\n    /// See the comment on [`WalReceiverManager`](Self::WalReceiverManager).\n    WalReceiverConnectionPoller,\n\n    // Garbage collection worker. One per tenant\n    GarbageCollector,\n\n    // Compaction. One per tenant.\n    Compaction,\n\n    // Eviction. One per timeline.\n    Eviction,\n\n    // Tenant housekeeping (flush idle ephemeral layers, shut down idle walredo, etc.).\n    TenantHousekeeping,\n\n    /// See [`crate::disk_usage_eviction_task`].\n    DiskUsageEviction,\n\n    /// See [`crate::tenant::secondary`].\n    SecondaryDownloads,\n\n    /// See [`crate::tenant::secondary`].\n    SecondaryUploads,\n\n    // Initial logical size calculation\n    InitialLogicalSizeCalculation,\n\n    OndemandLogicalSizeCalculation,\n\n    // Task that flushes frozen in-memory layers to disk\n    LayerFlushTask,\n\n    // Task that uploads a file to remote storage\n    RemoteUploadTask,\n\n    // task that handles the initial downloading of all tenants\n    InitialLoad,\n\n    // task that handles attaching a tenant\n    Attach,\n\n    // Used mostly for background deletion from s3\n    TimelineDeletionWorker,\n\n    // task that handhes metrics collection\n    MetricsCollection,\n\n    // task that drives downloading layers\n    DownloadAllRemoteLayers,\n    // Task that calculates synthetis size for all active tenants\n    CalculateSyntheticSize,\n\n    // A request that comes in via the pageserver HTTP API.\n    MgmtRequest,\n\n    DebugTool,\n\n    EphemeralFilePreWarmPageCache,\n\n    LayerDownload,\n\n    #[cfg(test)]\n    UnitTest,\n\n    DetachAncestor,\n\n    ImportPgdata,\n\n    /// Background task of [`crate::basebackup_cache::BasebackupCache`].\n    /// Prepares basebackups and clears outdated entries.\n    BasebackupCache,\n}\n\n#[derive(Default)]\nstruct MutableTaskState {\n    /// Handle for waiting for the task to exit. It can be None, if the\n    /// the task has already exited.\n    join_handle: Option<JoinHandle<()>>,\n}\n\nstruct PageServerTask {\n    task_id: PageserverTaskId,\n\n    kind: TaskKind,\n\n    name: String,\n\n    // To request task shutdown, just cancel this token.\n    cancel: CancellationToken,\n\n    /// Tasks may optionally be launched for a particular tenant/timeline, enabling\n    /// later cancelling tasks for that tenant/timeline in [`shutdown_tasks`]\n    tenant_shard_id: TenantShardId,\n    timeline_id: Option<TimelineId>,\n\n    mutable: Mutex<MutableTaskState>,\n}\n\n/// Launch a new task\n/// Note: if shutdown_process_on_error is set to true failure\n///   of the task will lead to shutdown of entire process\npub fn spawn<F>(\n    runtime: &tokio::runtime::Handle,\n    kind: TaskKind,\n    tenant_shard_id: TenantShardId,\n    timeline_id: Option<TimelineId>,\n    name: &str,\n    future: F,\n) -> PageserverTaskId\nwhere\n    F: Future<Output = anyhow::Result<()>> + Send + 'static,\n{\n    let cancel = CancellationToken::new();\n    let task_id = NEXT_TASK_ID.fetch_add(1, Ordering::Relaxed);\n    let task = Arc::new(PageServerTask {\n        task_id: PageserverTaskId(task_id),\n        kind,\n        name: name.to_string(),\n        cancel: cancel.clone(),\n        tenant_shard_id,\n        timeline_id,\n        mutable: Mutex::new(MutableTaskState { join_handle: None }),\n    });\n\n    TASKS.lock().unwrap().insert(task_id, Arc::clone(&task));\n\n    let mut task_mut = task.mutable.lock().unwrap();\n\n    let task_name = name.to_string();\n    let task_cloned = Arc::clone(&task);\n    let join_handle = runtime.spawn(task_wrapper(\n        task_name,\n        task_id,\n        task_cloned,\n        cancel,\n        future,\n    ));\n    task_mut.join_handle = Some(join_handle);\n    drop(task_mut);\n\n    // The task is now running. Nothing more to do here\n    PageserverTaskId(task_id)\n}\n\n/// This wrapper function runs in a newly-spawned task. It initializes the\n/// task-local variables and calls the payload function.\nasync fn task_wrapper<F>(\n    task_name: String,\n    task_id: u64,\n    task: Arc<PageServerTask>,\n    shutdown_token: CancellationToken,\n    future: F,\n) where\n    F: Future<Output = anyhow::Result<()>> + Send + 'static,\n{\n    debug!(\"Starting task '{}'\", task_name);\n\n    // wrap the future so we log panics and errors\n    let tenant_shard_id = task.tenant_shard_id;\n    let timeline_id = task.timeline_id;\n    let fut = async move {\n        // We use AssertUnwindSafe here so that the payload function\n        // doesn't need to be UnwindSafe. We don't do anything after the\n        // unwinding that would expose us to unwind-unsafe behavior.\n        let result = AssertUnwindSafe(future).catch_unwind().await;\n        match result {\n            Ok(Ok(())) => {\n                debug!(\"Task '{}' exited normally\", task_name);\n            }\n            Ok(Err(err)) => {\n                error!(\n                    \"Task '{}' tenant_shard_id: {:?}, timeline_id: {:?} exited with error: {:?}\",\n                    task_name, tenant_shard_id, timeline_id, err\n                );\n            }\n            Err(err) => {\n                error!(\n                    \"Task '{}' tenant_shard_id: {:?}, timeline_id: {:?} panicked: {:?}\",\n                    task_name, tenant_shard_id, timeline_id, err\n                );\n            }\n        }\n    };\n\n    // add the task-locals\n    let fut = CURRENT_TASK.scope(task, fut);\n    let fut = SHUTDOWN_TOKEN.scope(shutdown_token, fut);\n\n    // poll future to completion\n    fut.await;\n\n    // Remove our entry from the global hashmap.\n    TASKS\n        .lock()\n        .unwrap()\n        .remove(&task_id)\n        .expect(\"no task in registry\");\n}\n\npub async fn exit_on_panic_or_error<T, E>(\n    task_name: &'static str,\n    future: impl Future<Output = Result<T, E>>,\n) -> T\nwhere\n    E: std::fmt::Debug,\n{\n    // We use AssertUnwindSafe here so that the payload function\n    // doesn't need to be UnwindSafe. We don't do anything after the\n    // unwinding that would expose us to unwind-unsafe behavior.\n    let result = AssertUnwindSafe(future).catch_unwind().await;\n    match result {\n        Ok(Ok(val)) => val,\n        Ok(Err(err)) => {\n            error!(\n                task_name,\n                \"Task exited with error, exiting process: {err:?}\"\n            );\n            std::process::exit(1);\n        }\n        Err(panic_obj) => {\n            error!(task_name, \"Task panicked, exiting process: {panic_obj:?}\");\n            std::process::exit(1);\n        }\n    }\n}\n\n/// Signal and wait for tasks to shut down.\n///\n///\n/// The arguments are used to select the tasks to kill. Any None arguments are\n/// ignored. For example, to shut down all WalReceiver tasks:\n///\n///   shutdown_tasks(Some(TaskKind::WalReceiver), None, None)\n///\n/// Or to shut down all tasks for given timeline:\n///\n///   shutdown_tasks(None, Some(tenant_shard_id), Some(timeline_id))\n///\npub async fn shutdown_tasks(\n    kind: Option<TaskKind>,\n    tenant_shard_id: Option<TenantShardId>,\n    timeline_id: Option<TimelineId>,\n) {\n    let mut victim_tasks = Vec::new();\n\n    {\n        let tasks = TASKS.lock().unwrap();\n        for task in tasks.values() {\n            if (kind.is_none() || Some(task.kind) == kind)\n                && (tenant_shard_id.is_none() || Some(task.tenant_shard_id) == tenant_shard_id)\n                && (timeline_id.is_none() || task.timeline_id == timeline_id)\n            {\n                task.cancel.cancel();\n                victim_tasks.push((\n                    Arc::clone(task),\n                    task.kind,\n                    task.tenant_shard_id,\n                    task.timeline_id,\n                ));\n            }\n        }\n    }\n\n    let log_all = kind.is_none() && tenant_shard_id.is_none() && timeline_id.is_none();\n\n    for (task, task_kind, tenant_shard_id, timeline_id) in victim_tasks {\n        let join_handle = {\n            let mut task_mut = task.mutable.lock().unwrap();\n            task_mut.join_handle.take()\n        };\n        if let Some(mut join_handle) = join_handle {\n            if log_all {\n                // warn to catch these in tests; there shouldn't be any\n                warn!(name = task.name, tenant_shard_id = ?tenant_shard_id, timeline_id = ?timeline_id, kind = ?task_kind, \"stopping left-over\");\n            }\n            const INITIAL_COMPLAIN_TIMEOUT: Duration = Duration::from_secs(1);\n            const PERIODIC_COMPLAIN_TIMEOUT: Duration = Duration::from_secs(60);\n            if tokio::time::timeout(INITIAL_COMPLAIN_TIMEOUT, &mut join_handle)\n                .await\n                .is_err()\n            {\n                // allow some time to elapse before logging to cut down the number of log\n                // lines.\n                info!(\"waiting for task {} to shut down\", task.name);\n                loop {\n                    tokio::select! {\n                        // we never handled this return value, but:\n                        // - we don't deschedule which would lead to is_cancelled\n                        // - panics are already logged (is_panicked)\n                        // - task errors are already logged in the wrapper\n                        _ = &mut join_handle => break,\n                        _ = tokio::time::sleep(PERIODIC_COMPLAIN_TIMEOUT) => info!(\"still waiting for task {} to shut down\", task.name),\n                    }\n                }\n                info!(\"task {} completed\", task.name);\n            }\n        } else {\n            // Possibly one of:\n            //  * The task had not even fully started yet.\n            //  * It was shut down concurrently and already exited\n        }\n    }\n}\n\npub fn current_task_kind() -> Option<TaskKind> {\n    CURRENT_TASK.try_with(|ct| ct.kind).ok()\n}\n\npub fn current_task_id() -> Option<PageserverTaskId> {\n    CURRENT_TASK.try_with(|ct| ct.task_id).ok()\n}\n\n/// A Future that can be used to check if the current task has been requested to\n/// shut down.\npub async fn shutdown_watcher() {\n    let token = SHUTDOWN_TOKEN\n        .try_with(|t| t.clone())\n        .expect(\"shutdown_watcher() called in an unexpected task or thread\");\n\n    token.cancelled().await;\n}\n\n/// Clone the current task's cancellation token, which can be moved across tasks.\n///\n/// When the task which is currently executing is shutdown, the cancellation token will be\n/// cancelled. It can however be moved to other tasks, such as `tokio::task::spawn_blocking` or\n/// `tokio::task::JoinSet::spawn`.\npub fn shutdown_token() -> CancellationToken {\n    let res = SHUTDOWN_TOKEN.try_with(|t| t.clone());\n\n    if cfg!(test) {\n        // in tests this method is called from non-taskmgr spawned tasks, and that is all ok.\n        res.unwrap_or_default()\n    } else {\n        res.expect(\"shutdown_token() called in an unexpected task or thread\")\n    }\n}\n\n/// Has the current task been requested to shut down?\npub fn is_shutdown_requested() -> bool {\n    if let Ok(true_or_false) = SHUTDOWN_TOKEN.try_with(|t| t.is_cancelled()) {\n        true_or_false\n    } else {\n        if !cfg!(test) {\n            warn!(\"is_shutdown_requested() called in an unexpected task or thread\");\n        }\n        false\n    }\n}\n"
  },
  {
    "path": "pageserver/src/tenant/blob_io.rs",
    "content": "//!\n//! Functions for reading and writing variable-sized \"blobs\".\n//!\n//! Each blob begins with a 1- or 4-byte length field, followed by the\n//! actual data. If the length is smaller than 128 bytes, the length\n//! is written as a one byte. If it's larger than that, the length\n//! is written as a four-byte integer, in big-endian, with the high\n//! bit set. This way, we can detect whether it's 1- or 4-byte header\n//! by peeking at the first byte. For blobs larger than 128 bits,\n//! we also specify three reserved bits, only one of the three bit\n//! patterns is currently in use (0b011) and signifies compression\n//! with zstd.\n//!\n//! len <  128: 0XXXXXXX\n//! len >= 128: 1CCCXXXX XXXXXXXX XXXXXXXX XXXXXXXX\n//!\nuse std::cmp::min;\n\nuse anyhow::Context;\nuse async_compression::Level;\nuse bytes::{BufMut, BytesMut};\nuse pageserver_api::models::ImageCompressionAlgorithm;\nuse tokio::io::AsyncWriteExt;\nuse tokio_epoll_uring::IoBuf;\nuse tokio_util::sync::CancellationToken;\nuse tracing::warn;\n\nuse crate::context::RequestContext;\nuse crate::page_cache::PAGE_SZ;\nuse crate::tenant::block_io::BlockCursor;\nuse crate::virtual_file::IoBufferMut;\nuse crate::virtual_file::owned_buffers_io::io_buf_ext::{FullSlice, IoBufExt};\nuse crate::virtual_file::owned_buffers_io::write::{BufferedWriter, FlushTaskError};\nuse crate::virtual_file::owned_buffers_io::write::{BufferedWriterShutdownMode, OwnedAsyncWriter};\n\n#[derive(Copy, Clone, Debug)]\npub struct CompressionInfo {\n    pub written_compressed: bool,\n    pub compressed_size: Option<usize>,\n}\n\n/// A blob header, with header+data length and compression info.\n///\n/// TODO: use this more widely, and add an encode() method too.\n/// TODO: document the header format.\n#[derive(Clone, Copy, Default)]\npub struct Header {\n    pub header_len: usize,\n    pub data_len: usize,\n    pub compression_bits: u8,\n}\n\nimpl Header {\n    /// Decodes a header from a byte slice.\n    pub fn decode(bytes: &[u8]) -> anyhow::Result<Self> {\n        let Some(&first_header_byte) = bytes.first() else {\n            anyhow::bail!(\"zero-length blob header\");\n        };\n\n        // If the first bit is 0, this is just a 1-byte length prefix up to 128 bytes.\n        if first_header_byte < 0x80 {\n            return Ok(Self {\n                header_len: 1, // by definition\n                data_len: first_header_byte as usize,\n                compression_bits: BYTE_UNCOMPRESSED,\n            });\n        }\n\n        // Otherwise, this is a 4-byte header containing compression information and length.\n        const HEADER_LEN: usize = 4;\n        let mut header_buf: [u8; HEADER_LEN] = bytes[0..HEADER_LEN]\n            .try_into()\n            .map_err(|_| anyhow::anyhow!(\"blob header too short: {bytes:?}\"))?;\n\n        // TODO: verify the compression bits and convert to an enum.\n        let compression_bits = header_buf[0] & LEN_COMPRESSION_BIT_MASK;\n        header_buf[0] &= !LEN_COMPRESSION_BIT_MASK;\n        let data_len = u32::from_be_bytes(header_buf) as usize;\n\n        Ok(Self {\n            header_len: HEADER_LEN,\n            data_len,\n            compression_bits,\n        })\n    }\n\n    /// Returns the total header+data length.\n    pub fn total_len(&self) -> usize {\n        self.header_len + self.data_len\n    }\n}\n\n#[derive(Debug, thiserror::Error)]\npub enum WriteBlobError {\n    #[error(transparent)]\n    Flush(FlushTaskError),\n    #[error(transparent)]\n    Other(anyhow::Error),\n}\n\nimpl WriteBlobError {\n    pub fn is_cancel(&self) -> bool {\n        match self {\n            WriteBlobError::Flush(e) => e.is_cancel(),\n            WriteBlobError::Other(_) => false,\n        }\n    }\n    pub fn into_anyhow(self) -> anyhow::Error {\n        match self {\n            WriteBlobError::Flush(e) => e.into_anyhow(),\n            WriteBlobError::Other(e) => e,\n        }\n    }\n}\n\nimpl BlockCursor<'_> {\n    /// Read a blob into a new buffer.\n    pub async fn read_blob(\n        &self,\n        offset: u64,\n        ctx: &RequestContext,\n    ) -> Result<Vec<u8>, std::io::Error> {\n        let mut buf = Vec::new();\n        self.read_blob_into_buf(offset, &mut buf, ctx).await?;\n        Ok(buf)\n    }\n    /// Read blob into the given buffer. Any previous contents in the buffer\n    /// are overwritten.\n    pub async fn read_blob_into_buf(\n        &self,\n        offset: u64,\n        dstbuf: &mut Vec<u8>,\n        ctx: &RequestContext,\n    ) -> Result<(), std::io::Error> {\n        let mut blknum = (offset / PAGE_SZ as u64) as u32;\n        let mut off = (offset % PAGE_SZ as u64) as usize;\n\n        let mut buf = self.read_blk(blknum, ctx).await?;\n\n        // peek at the first byte, to determine if it's a 1- or 4-byte length\n        let first_len_byte = buf[off];\n        let len: usize = if first_len_byte < 0x80 {\n            // 1-byte length header\n            off += 1;\n            first_len_byte as usize\n        } else {\n            // 4-byte length header\n            let mut len_buf = [0u8; 4];\n            let thislen = PAGE_SZ - off;\n            if thislen < 4 {\n                // it is split across two pages\n                len_buf[..thislen].copy_from_slice(&buf[off..PAGE_SZ]);\n                blknum += 1;\n                buf = self.read_blk(blknum, ctx).await?;\n                len_buf[thislen..].copy_from_slice(&buf[0..4 - thislen]);\n                off = 4 - thislen;\n            } else {\n                len_buf.copy_from_slice(&buf[off..off + 4]);\n                off += 4;\n            }\n            let bit_mask = if self.read_compressed {\n                !LEN_COMPRESSION_BIT_MASK\n            } else {\n                0x7f\n            };\n            len_buf[0] &= bit_mask;\n            u32::from_be_bytes(len_buf) as usize\n        };\n        let compression_bits = first_len_byte & LEN_COMPRESSION_BIT_MASK;\n\n        let mut tmp_buf = Vec::new();\n        let buf_to_write;\n        let compression = if compression_bits <= BYTE_UNCOMPRESSED || !self.read_compressed {\n            if compression_bits > BYTE_UNCOMPRESSED {\n                warn!(\"reading key above future limit ({len} bytes)\");\n            }\n            buf_to_write = dstbuf;\n            None\n        } else if compression_bits == BYTE_ZSTD {\n            buf_to_write = &mut tmp_buf;\n            Some(dstbuf)\n        } else {\n            let error = std::io::Error::new(\n                std::io::ErrorKind::InvalidData,\n                format!(\"invalid compression byte {compression_bits:x}\"),\n            );\n            return Err(error);\n        };\n\n        buf_to_write.clear();\n        buf_to_write.reserve(len);\n\n        // Read the payload\n        let mut remain = len;\n        while remain > 0 {\n            let mut page_remain = PAGE_SZ - off;\n            if page_remain == 0 {\n                // continue on next page\n                blknum += 1;\n                buf = self.read_blk(blknum, ctx).await?;\n                off = 0;\n                page_remain = PAGE_SZ;\n            }\n            let this_blk_len = min(remain, page_remain);\n            buf_to_write.extend_from_slice(&buf[off..off + this_blk_len]);\n            remain -= this_blk_len;\n            off += this_blk_len;\n        }\n\n        if let Some(dstbuf) = compression {\n            if compression_bits == BYTE_ZSTD {\n                let mut decoder = async_compression::tokio::write::ZstdDecoder::new(dstbuf);\n                decoder.write_all(buf_to_write).await?;\n                decoder.flush().await?;\n            } else {\n                unreachable!(\"already checked above\")\n            }\n        }\n\n        Ok(())\n    }\n}\n\n/// Reserved bits for length and compression\npub(super) const LEN_COMPRESSION_BIT_MASK: u8 = 0xf0;\n\n/// The maximum size of blobs we support. The highest few bits\n/// are reserved for compression and other further uses.\npub(crate) const MAX_SUPPORTED_BLOB_LEN: usize = 0x0fff_ffff;\n\npub(super) const BYTE_UNCOMPRESSED: u8 = 0x80;\npub(super) const BYTE_ZSTD: u8 = BYTE_UNCOMPRESSED | 0x10;\n\n/// A wrapper of `VirtualFile` that allows users to write blobs.\npub struct BlobWriter<W> {\n    /// We do tiny writes for the length headers; they need to be in an owned buffer;\n    io_buf: Option<BytesMut>,\n    writer: BufferedWriter<IoBufferMut, W>,\n    offset: u64,\n}\n\nimpl<W> BlobWriter<W>\nwhere\n    W: OwnedAsyncWriter + std::fmt::Debug + Send + Sync + 'static,\n{\n    /// See [`BufferedWriter`] struct-level doc comment for semantics of `start_offset`.\n    pub fn new(\n        file: W,\n        start_offset: u64,\n        gate: &utils::sync::gate::Gate,\n        cancel: CancellationToken,\n        ctx: &RequestContext,\n        flush_task_span: tracing::Span,\n    ) -> anyhow::Result<Self> {\n        Ok(Self {\n            io_buf: Some(BytesMut::new()),\n            writer: BufferedWriter::new(\n                file,\n                start_offset,\n                || IoBufferMut::with_capacity(Self::CAPACITY),\n                gate.enter()?,\n                cancel,\n                ctx,\n                flush_task_span,\n            ),\n            offset: start_offset,\n        })\n    }\n\n    pub fn size(&self) -> u64 {\n        self.offset\n    }\n\n    const CAPACITY: usize = 64 * 1024;\n\n    /// Writes `src_buf` to the file at the current offset.\n    async fn write_all<Buf: IoBuf + Send>(\n        &mut self,\n        src_buf: FullSlice<Buf>,\n        ctx: &RequestContext,\n    ) -> (FullSlice<Buf>, Result<(), FlushTaskError>) {\n        let res = self\n            .writer\n            // TODO: why are we taking a FullSlice if we're going to pass a borrow downstack?\n            // Can remove all the complexity around owned buffers upstack\n            .write_buffered_borrowed(&src_buf, ctx)\n            .await\n            .map(|len| {\n                self.offset += len as u64;\n            });\n\n        (src_buf, res)\n    }\n\n    /// Write a blob of data. Returns the offset that it was written to,\n    /// which can be used to retrieve the data later.\n    pub async fn write_blob<Buf: IoBuf + Send>(\n        &mut self,\n        srcbuf: FullSlice<Buf>,\n        ctx: &RequestContext,\n    ) -> (FullSlice<Buf>, Result<u64, WriteBlobError>) {\n        let (buf, res) = self\n            .write_blob_maybe_compressed(srcbuf, ctx, ImageCompressionAlgorithm::Disabled)\n            .await;\n        (buf, res.map(|(off, _compression_info)| off))\n    }\n\n    /// Write a blob of data. Returns the offset that it was written to,\n    /// which can be used to retrieve the data later.\n    pub(crate) async fn write_blob_maybe_compressed<Buf: IoBuf + Send>(\n        &mut self,\n        srcbuf: FullSlice<Buf>,\n        ctx: &RequestContext,\n        algorithm: ImageCompressionAlgorithm,\n    ) -> (\n        FullSlice<Buf>,\n        Result<(u64, CompressionInfo), WriteBlobError>,\n    ) {\n        let offset = self.offset;\n        let mut compression_info = CompressionInfo {\n            written_compressed: false,\n            compressed_size: None,\n        };\n\n        let len = srcbuf.len();\n\n        let mut io_buf = self.io_buf.take().expect(\"we always put it back below\");\n        io_buf.clear();\n        let mut compressed_buf = None;\n        let ((io_buf_slice, hdr_res), srcbuf) = async {\n            if len < 128 {\n                // Short blob. Write a 1-byte length header\n                io_buf.put_u8(len as u8);\n                let (slice, res) = self.write_all(io_buf.slice_len(), ctx).await;\n                let res = res.map_err(WriteBlobError::Flush);\n                ((slice, res), srcbuf)\n            } else {\n                // Write a 4-byte length header\n                if len > MAX_SUPPORTED_BLOB_LEN {\n                    return (\n                        (\n                            io_buf.slice_len(),\n                            Err(WriteBlobError::Other(anyhow::anyhow!(\n                                \"blob too large ({len} bytes)\"\n                            ))),\n                        ),\n                        srcbuf,\n                    );\n                }\n                let (high_bit_mask, len_written, srcbuf) = match algorithm {\n                    ImageCompressionAlgorithm::Zstd { level } => {\n                        let mut encoder = if let Some(level) = level {\n                            async_compression::tokio::write::ZstdEncoder::with_quality(\n                                Vec::new(),\n                                Level::Precise(level.into()),\n                            )\n                        } else {\n                            async_compression::tokio::write::ZstdEncoder::new(Vec::new())\n                        };\n                        encoder.write_all(&srcbuf[..]).await.unwrap();\n                        encoder.shutdown().await.unwrap();\n                        let compressed = encoder.into_inner();\n                        compression_info.compressed_size = Some(compressed.len());\n                        if compressed.len() < len {\n                            compression_info.written_compressed = true;\n                            let compressed_len = compressed.len();\n                            compressed_buf = Some(compressed);\n                            (BYTE_ZSTD, compressed_len, srcbuf)\n                        } else {\n                            (BYTE_UNCOMPRESSED, len, srcbuf)\n                        }\n                    }\n                    ImageCompressionAlgorithm::Disabled => (BYTE_UNCOMPRESSED, len, srcbuf),\n                };\n                let mut len_buf = (len_written as u32).to_be_bytes();\n                assert_eq!(len_buf[0] & 0xf0, 0);\n                len_buf[0] |= high_bit_mask;\n                io_buf.extend_from_slice(&len_buf[..]);\n                let (slice, res) = self.write_all(io_buf.slice_len(), ctx).await;\n                let res = res.map_err(WriteBlobError::Flush);\n                ((slice, res), srcbuf)\n            }\n        }\n        .await;\n        self.io_buf = Some(io_buf_slice.into_raw_slice().into_inner());\n        match hdr_res {\n            Ok(_) => (),\n            Err(e) => return (srcbuf, Err(e)),\n        }\n        let (srcbuf, res) = if let Some(compressed_buf) = compressed_buf {\n            let (_buf, res) = self.write_all(compressed_buf.slice_len(), ctx).await;\n            (srcbuf, res)\n        } else {\n            self.write_all(srcbuf, ctx).await\n        };\n        let res = res.map_err(WriteBlobError::Flush);\n        (srcbuf, res.map(|_| (offset, compression_info)))\n    }\n\n    /// Writes a raw blob containing both header and data, returning its offset.\n    pub(crate) async fn write_blob_raw<Buf: IoBuf + Send>(\n        &mut self,\n        raw_with_header: FullSlice<Buf>,\n        ctx: &RequestContext,\n    ) -> (FullSlice<Buf>, Result<u64, WriteBlobError>) {\n        // Verify the header, to ensure we don't write invalid/corrupt data.\n        let header = match Header::decode(&raw_with_header)\n            .context(\"decoding blob header\")\n            .map_err(WriteBlobError::Other)\n        {\n            Ok(header) => header,\n            Err(err) => return (raw_with_header, Err(err)),\n        };\n        if raw_with_header.len() != header.total_len() {\n            let header_total_len = header.total_len();\n            let raw_len = raw_with_header.len();\n            return (\n                raw_with_header,\n                Err(WriteBlobError::Other(anyhow::anyhow!(\n                    \"header length mismatch: {header_total_len} != {raw_len}\"\n                ))),\n            );\n        }\n\n        let offset = self.offset;\n        let (raw_with_header, result) = self.write_all(raw_with_header, ctx).await;\n        let result = result.map_err(WriteBlobError::Flush);\n        (raw_with_header, result.map(|_| offset))\n    }\n\n    /// Finish this blob writer and return the underlying `W`.\n    pub async fn shutdown(\n        self,\n        mode: BufferedWriterShutdownMode,\n        ctx: &RequestContext,\n    ) -> Result<W, FlushTaskError> {\n        let (_, file) = self.writer.shutdown(mode, ctx).await?;\n        Ok(file)\n    }\n}\n\n#[cfg(test)]\npub(crate) mod tests {\n    use camino::Utf8PathBuf;\n    use camino_tempfile::Utf8TempDir;\n    use rand::{Rng, SeedableRng};\n    use tracing::info_span;\n\n    use super::*;\n    use crate::context::DownloadBehavior;\n    use crate::task_mgr::TaskKind;\n    use crate::tenant::block_io::BlockReaderRef;\n    use crate::virtual_file;\n    use crate::virtual_file::TempVirtualFile;\n    use crate::virtual_file::VirtualFile;\n\n    async fn round_trip_test(blobs: &[Vec<u8>]) -> anyhow::Result<()> {\n        round_trip_test_compressed(blobs, false).await\n    }\n\n    pub(crate) async fn write_maybe_compressed(\n        blobs: &[Vec<u8>],\n        compression: bool,\n        ctx: &RequestContext,\n    ) -> anyhow::Result<(Utf8TempDir, Utf8PathBuf, Vec<u64>)> {\n        let temp_dir = camino_tempfile::tempdir()?;\n        let pathbuf = temp_dir.path().join(\"file\");\n        let gate = utils::sync::gate::Gate::default();\n        let cancel = CancellationToken::new();\n\n        // Write part (in block to drop the file)\n        let mut offsets = Vec::new();\n        {\n            let file = TempVirtualFile::new(\n                VirtualFile::open_with_options_v2(\n                    pathbuf.as_path(),\n                    virtual_file::OpenOptions::new()\n                        .create_new(true)\n                        .write(true),\n                    ctx,\n                )\n                .await?,\n                gate.enter()?,\n            );\n            let mut wtr =\n                BlobWriter::new(file, 0, &gate, cancel.clone(), ctx, info_span!(\"test\")).unwrap();\n            for blob in blobs.iter() {\n                let (_, res) = if compression {\n                    let res = wtr\n                        .write_blob_maybe_compressed(\n                            blob.clone().slice_len(),\n                            ctx,\n                            ImageCompressionAlgorithm::Zstd { level: Some(1) },\n                        )\n                        .await;\n                    (res.0, res.1.map(|(off, _)| off))\n                } else {\n                    wtr.write_blob(blob.clone().slice_len(), ctx).await\n                };\n                let offs = res?;\n                offsets.push(offs);\n            }\n            let file = wtr\n                .shutdown(\n                    BufferedWriterShutdownMode::ZeroPadToNextMultiple(PAGE_SZ),\n                    ctx,\n                )\n                .await?;\n            file.disarm_into_inner()\n        };\n        Ok((temp_dir, pathbuf, offsets))\n    }\n\n    async fn round_trip_test_compressed(\n        blobs: &[Vec<u8>],\n        compression: bool,\n    ) -> anyhow::Result<()> {\n        let ctx =\n            RequestContext::new(TaskKind::UnitTest, DownloadBehavior::Error).with_scope_unit_test();\n        let (_temp_dir, pathbuf, offsets) =\n            write_maybe_compressed(blobs, compression, &ctx).await?;\n\n        println!(\"Done writing!\");\n        let file = VirtualFile::open_v2(pathbuf, &ctx).await?;\n        let rdr = BlockReaderRef::VirtualFile(&file);\n        let rdr = BlockCursor::new_with_compression(rdr, compression);\n        for (idx, (blob, offset)) in blobs.iter().zip(offsets.iter()).enumerate() {\n            let blob_read = rdr.read_blob(*offset, &ctx).await?;\n            assert_eq!(\n                blob, &blob_read,\n                \"mismatch for idx={idx} at offset={offset}\"\n            );\n        }\n        Ok(())\n    }\n\n    pub(crate) fn random_array(len: usize) -> Vec<u8> {\n        let mut rng = rand::rng();\n        (0..len).map(|_| rng.random()).collect::<_>()\n    }\n\n    #[tokio::test]\n    async fn test_one() -> anyhow::Result<()> {\n        let blobs = &[vec![12, 21, 22]];\n        round_trip_test(blobs).await?;\n        Ok(())\n    }\n\n    #[tokio::test]\n    async fn test_hello_simple() -> anyhow::Result<()> {\n        let blobs = &[\n            vec![0, 1, 2, 3],\n            b\"Hello, World!\".to_vec(),\n            Vec::new(),\n            b\"foobar\".to_vec(),\n        ];\n        round_trip_test(blobs).await?;\n        round_trip_test_compressed(blobs, true).await?;\n        Ok(())\n    }\n\n    #[tokio::test]\n    async fn test_really_big_array() -> anyhow::Result<()> {\n        let blobs = &[\n            b\"test\".to_vec(),\n            random_array(10 * PAGE_SZ),\n            b\"hello\".to_vec(),\n            random_array(66 * PAGE_SZ),\n            vec![0xf3; 24 * PAGE_SZ],\n            b\"foobar\".to_vec(),\n        ];\n        round_trip_test(blobs).await?;\n        round_trip_test_compressed(blobs, true).await?;\n        Ok(())\n    }\n\n    #[tokio::test]\n    async fn test_arrays_inc() -> anyhow::Result<()> {\n        let blobs = (0..PAGE_SZ / 8)\n            .map(|v| random_array(v * 16))\n            .collect::<Vec<_>>();\n        round_trip_test(&blobs).await?;\n        Ok(())\n    }\n\n    #[tokio::test]\n    async fn test_arrays_random_size() -> anyhow::Result<()> {\n        let mut rng = rand::rngs::StdRng::seed_from_u64(42);\n        let blobs = (0..1024)\n            .map(|_| {\n                let mut sz: u16 = rng.random();\n                // Make 50% of the arrays small\n                if rng.random() {\n                    sz &= 63;\n                }\n                random_array(sz.into())\n            })\n            .collect::<Vec<_>>();\n        round_trip_test(&blobs).await?;\n        Ok(())\n    }\n\n    #[tokio::test]\n    async fn test_arrays_page_boundary() -> anyhow::Result<()> {\n        let blobs = &[\n            random_array(PAGE_SZ - 4),\n            random_array(PAGE_SZ - 4),\n            random_array(PAGE_SZ - 4),\n        ];\n        round_trip_test(blobs).await?;\n        Ok(())\n    }\n}\n"
  },
  {
    "path": "pageserver/src/tenant/block_io.rs",
    "content": "//!\n//! Low-level Block-oriented I/O functions\n//!\n\nuse std::ops::Deref;\n\nuse super::storage_layer::delta_layer::{Adapter, DeltaLayerInner};\nuse crate::context::RequestContext;\nuse crate::page_cache::{self, FileId, PAGE_SZ, PageReadGuard, PageWriteGuard, ReadBufResult};\n#[cfg(test)]\nuse crate::virtual_file::IoBufferMut;\nuse crate::virtual_file::{IoBuffer, VirtualFile};\n\n/// This is implemented by anything that can read 8 kB (PAGE_SZ)\n/// blocks, using the page cache\n///\n/// There are currently two implementations: EphemeralFile, and FileBlockReader\n/// below.\npub trait BlockReader {\n    ///\n    /// Create a new \"cursor\" for reading from this reader.\n    ///\n    /// A cursor caches the last accessed page, allowing for faster\n    /// access if the same block is accessed repeatedly.\n    fn block_cursor(&self) -> BlockCursor<'_>;\n}\n\nimpl<B> BlockReader for &B\nwhere\n    B: BlockReader,\n{\n    fn block_cursor(&self) -> BlockCursor<'_> {\n        (*self).block_cursor()\n    }\n}\n\n/// Reference to an in-memory copy of an immutable on-disk block.\npub enum BlockLease<'a> {\n    PageReadGuard(PageReadGuard<'static>),\n    EphemeralFileMutableTail(&'a [u8; PAGE_SZ]),\n    Slice(&'a [u8; PAGE_SZ]),\n    #[cfg(test)]\n    Arc(std::sync::Arc<[u8; PAGE_SZ]>),\n    #[cfg(test)]\n    IoBufferMut(IoBufferMut),\n}\n\nimpl From<PageReadGuard<'static>> for BlockLease<'static> {\n    fn from(value: PageReadGuard<'static>) -> BlockLease<'static> {\n        BlockLease::PageReadGuard(value)\n    }\n}\n\n#[cfg(test)]\nimpl From<std::sync::Arc<[u8; PAGE_SZ]>> for BlockLease<'_> {\n    fn from(value: std::sync::Arc<[u8; PAGE_SZ]>) -> Self {\n        BlockLease::Arc(value)\n    }\n}\n\nimpl Deref for BlockLease<'_> {\n    type Target = [u8; PAGE_SZ];\n\n    fn deref(&self) -> &Self::Target {\n        match self {\n            BlockLease::PageReadGuard(v) => v.deref(),\n            BlockLease::EphemeralFileMutableTail(v) => v,\n            BlockLease::Slice(v) => v,\n            #[cfg(test)]\n            BlockLease::Arc(v) => v.deref(),\n            #[cfg(test)]\n            BlockLease::IoBufferMut(v) => {\n                TryFrom::try_from(&v[..]).expect(\"caller must ensure that v has PAGE_SZ\")\n            }\n        }\n    }\n}\n\n/// Provides the ability to read blocks from different sources,\n/// similar to using traits for this purpose.\n///\n/// Unlike traits, we also support the read function to be async though.\npub(crate) enum BlockReaderRef<'a> {\n    FileBlockReader(&'a FileBlockReader<'a>),\n    Adapter(Adapter<&'a DeltaLayerInner>),\n    #[cfg(test)]\n    TestDisk(&'a super::disk_btree::tests::TestDisk),\n    #[cfg(test)]\n    VirtualFile(&'a VirtualFile),\n}\n\nimpl BlockReaderRef<'_> {\n    #[inline(always)]\n    async fn read_blk(\n        &self,\n        blknum: u32,\n        ctx: &RequestContext,\n    ) -> Result<BlockLease, std::io::Error> {\n        use BlockReaderRef::*;\n        match self {\n            FileBlockReader(r) => r.read_blk(blknum, ctx).await,\n            Adapter(r) => r.read_blk(blknum, ctx).await,\n            #[cfg(test)]\n            TestDisk(r) => r.read_blk(blknum),\n            #[cfg(test)]\n            VirtualFile(r) => r.read_blk(blknum, ctx).await,\n        }\n    }\n}\n\n///\n/// A \"cursor\" for efficiently reading multiple pages from a BlockReader\n///\n/// You can access the last page with `*cursor`. 'read_blk' returns 'self', so\n/// that in many cases you can use a BlockCursor as a drop-in replacement for\n/// the underlying BlockReader. For example:\n///\n/// ```no_run\n/// # use pageserver::tenant::block_io::{BlockReader, FileBlockReader};\n/// # use pageserver::context::RequestContext;\n/// # let reader: FileBlockReader = unimplemented!(\"stub\");\n/// # let ctx: RequestContext = unimplemented!(\"stub\");\n/// let cursor = reader.block_cursor();\n/// let buf = cursor.read_blk(1, &ctx);\n/// // do stuff with 'buf'\n/// let buf = cursor.read_blk(2, &ctx);\n/// // do stuff with 'buf'\n/// ```\n///\npub struct BlockCursor<'a> {\n    pub(super) read_compressed: bool,\n    reader: BlockReaderRef<'a>,\n}\n\nimpl<'a> BlockCursor<'a> {\n    pub(crate) fn new(reader: BlockReaderRef<'a>) -> Self {\n        Self::new_with_compression(reader, false)\n    }\n    pub(crate) fn new_with_compression(reader: BlockReaderRef<'a>, read_compressed: bool) -> Self {\n        BlockCursor {\n            read_compressed,\n            reader,\n        }\n    }\n    // Needed by cli\n    pub fn new_fileblockreader(reader: &'a FileBlockReader) -> Self {\n        BlockCursor {\n            read_compressed: false,\n            reader: BlockReaderRef::FileBlockReader(reader),\n        }\n    }\n\n    /// Read a block.\n    ///\n    /// Returns a \"lease\" object that can be used to\n    /// access to the contents of the page. (For the page cache, the\n    /// lease object represents a lock on the buffer.)\n    #[inline(always)]\n    pub async fn read_blk(\n        &self,\n        blknum: u32,\n        ctx: &RequestContext,\n    ) -> Result<BlockLease, std::io::Error> {\n        self.reader.read_blk(blknum, ctx).await\n    }\n}\n\n/// An adapter for reading a (virtual) file using the page cache.\n///\n/// The file is assumed to be immutable. This doesn't provide any functions\n/// for modifying the file, nor for invalidating the cache if it is modified.\n#[derive(Clone)]\npub struct FileBlockReader<'a> {\n    pub file: &'a VirtualFile,\n\n    /// Unique ID of this file, used as key in the page cache.\n    file_id: page_cache::FileId,\n\n    compressed_reads: bool,\n}\n\nimpl<'a> FileBlockReader<'a> {\n    pub fn new(file: &'a VirtualFile, file_id: FileId) -> Self {\n        FileBlockReader {\n            file_id,\n            file,\n            compressed_reads: true,\n        }\n    }\n\n    /// Read a page from the underlying file into given buffer.\n    async fn fill_buffer(\n        &self,\n        buf: PageWriteGuard<'static>,\n        blkno: u32,\n        ctx: &RequestContext,\n    ) -> Result<PageWriteGuard<'static>, std::io::Error> {\n        assert!(buf.len() == PAGE_SZ);\n        self.file\n            .read_exact_at_page(buf, blkno as u64 * PAGE_SZ as u64, ctx)\n            .await\n    }\n    /// Read a block.\n    ///\n    /// Returns a \"lease\" object that can be used to\n    /// access to the contents of the page. (For the page cache, the\n    /// lease object represents a lock on the buffer.)\n    pub async fn read_blk<'b>(\n        &self,\n        blknum: u32,\n        ctx: &RequestContext,\n    ) -> Result<BlockLease<'b>, std::io::Error> {\n        let cache = page_cache::get();\n        match cache\n            .read_immutable_buf(self.file_id, blknum, ctx)\n            .await\n            .map_err(|e| std::io::Error::other(format!(\"Failed to read immutable buf: {e:#}\")))?\n        {\n            ReadBufResult::Found(guard) => Ok(guard.into()),\n            ReadBufResult::NotFound(write_guard) => {\n                // Read the page from disk into the buffer\n                let write_guard = self.fill_buffer(write_guard, blknum, ctx).await?;\n                Ok(write_guard.mark_valid().into())\n            }\n        }\n    }\n}\n\nimpl BlockReader for FileBlockReader<'_> {\n    fn block_cursor(&self) -> BlockCursor<'_> {\n        BlockCursor::new_with_compression(\n            BlockReaderRef::FileBlockReader(self),\n            self.compressed_reads,\n        )\n    }\n}\n\n///\n/// Trait for block-oriented output\n///\npub trait BlockWriter {\n    ///\n    /// Write a page to the underlying storage.\n    ///\n    /// 'buf' must be of size PAGE_SZ. Returns the block number the page was\n    /// written to.\n    ///\n    fn write_blk(&mut self, buf: IoBuffer) -> Result<u32, std::io::Error>;\n}\n\n///\n/// A simple in-memory buffer of blocks.\n///\npub struct BlockBuf {\n    pub blocks: Vec<IoBuffer>,\n}\nimpl BlockWriter for BlockBuf {\n    fn write_blk(&mut self, buf: IoBuffer) -> Result<u32, std::io::Error> {\n        assert!(buf.len() == PAGE_SZ);\n        let blknum = self.blocks.len();\n        self.blocks.push(buf);\n        Ok(blknum as u32)\n    }\n}\n\nimpl BlockBuf {\n    pub fn new() -> Self {\n        BlockBuf { blocks: Vec::new() }\n    }\n\n    pub fn size(&self) -> u64 {\n        (self.blocks.len() * PAGE_SZ) as u64\n    }\n}\nimpl Default for BlockBuf {\n    fn default() -> Self {\n        Self::new()\n    }\n}\n"
  },
  {
    "path": "pageserver/src/tenant/checks.rs",
    "content": "use std::collections::BTreeSet;\n\nuse itertools::Itertools;\nuse pageserver_compaction::helpers::overlaps_with;\n\nuse super::storage_layer::LayerName;\n\n/// Checks whether a layer map is valid (i.e., is a valid result of the current compaction algorithm if nothing goes wrong).\n///\n/// The function implements a fast path check and a slow path check.\n///\n/// The fast path checks if we can split the LSN range of a delta layer only at the LSNs of the delta layers. For example,\n///\n/// ```plain\n/// |       |                 |       |\n/// |   1   |    |   2   |    |   3   |\n/// |       |    |       |    |       |\n/// ```\n///\n/// This is not a valid layer map because the LSN range of layer 1 intersects with the LSN range of layer 2. 1 and 2 should have\n/// the same LSN range.\n///\n/// The exception is that when layer 2 only contains a single key, it could be split over the LSN range. For example,\n///\n/// ```plain\n/// |       |    |   2   |    |       |\n/// |   1   |    |-------|    |   3   |\n/// |       |    |   4   |    |       |\n///\n/// If layer 2 and 4 contain the same single key, this is also a valid layer map.\n///\n/// However, if a partial compaction is still going on, it is possible that we get a layer map not satisfying the above condition.\n/// Therefore, we fallback to simply check if any of the two delta layers overlap. (See \"A slow path...\")\npub fn check_valid_layermap(metadata: &[LayerName]) -> Option<String> {\n    let mut lsn_split_point = BTreeSet::new(); // TODO: use a better data structure (range tree / range set?)\n    let mut all_delta_layers = Vec::new();\n    for name in metadata {\n        if let LayerName::Delta(layer) = name {\n            all_delta_layers.push(layer.clone());\n        }\n    }\n    for layer in &all_delta_layers {\n        if layer.key_range.start.next() != layer.key_range.end {\n            let lsn_range = &layer.lsn_range;\n            lsn_split_point.insert(lsn_range.start);\n            lsn_split_point.insert(lsn_range.end);\n        }\n    }\n    for (idx, layer) in all_delta_layers.iter().enumerate() {\n        if layer.key_range.start.next() == layer.key_range.end {\n            continue;\n        }\n        let lsn_range = layer.lsn_range.clone();\n        let intersects = lsn_split_point.range(lsn_range).collect_vec();\n        if intersects.len() > 1 {\n            // A slow path to check if the layer intersects with any other delta layer.\n            for (other_idx, other_layer) in all_delta_layers.iter().enumerate() {\n                if other_idx == idx {\n                    // do not check self intersects with self\n                    continue;\n                }\n                if overlaps_with(&layer.lsn_range, &other_layer.lsn_range)\n                    && overlaps_with(&layer.key_range, &other_layer.key_range)\n                {\n                    let err = format!(\n                        \"layer violates the layer map LSN split assumption: layer {layer} intersects with layer {other_layer}\"\n                    );\n                    return Some(err);\n                }\n            }\n        }\n    }\n    None\n}\n"
  },
  {
    "path": "pageserver/src/tenant/config.rs",
    "content": "//! Functions for handling per-tenant configuration options\n//!\n//! If tenant is created with --config option,\n//! the tenant-specific config will be stored in tenant's directory.\n//! Otherwise, global pageserver's config is used.\n//!\n//! If the tenant config file is corrupted, the tenant will be disabled.\n//! We cannot use global or default config instead, because wrong settings\n//! may lead to a data loss.\n//!\n\nuse pageserver_api::models;\nuse pageserver_api::shard::{ShardCount, ShardIdentity, ShardNumber, ShardStripeSize};\nuse serde::{Deserialize, Serialize};\nuse utils::critical;\nuse utils::generation::Generation;\n\n#[derive(Debug, Copy, Clone, Serialize, Deserialize, PartialEq, Eq)]\npub(crate) enum AttachmentMode {\n    /// Our generation is current as far as we know, and as far as we know we are the only attached\n    /// pageserver.  This is the \"normal\" attachment mode.\n    Single,\n    /// Our generation number is current as far as we know, but we are advised that another\n    /// pageserver is still attached, and therefore to avoid executing deletions.   This is\n    /// the attachment mode of a pagesever that is the destination of a migration.\n    Multi,\n    /// Our generation number is superseded, or about to be superseded.  We are advised\n    /// to avoid remote storage writes if possible, and to avoid sending billing data.  This\n    /// is the attachment mode of a pageserver that is the origin of a migration.\n    Stale,\n}\n\n#[derive(Debug, Copy, Clone, Serialize, Deserialize, PartialEq, Eq)]\npub(crate) struct AttachedLocationConfig {\n    pub(crate) generation: Generation,\n    pub(crate) attach_mode: AttachmentMode,\n    // TODO: add a flag to override AttachmentMode's policies under\n    // disk pressure (i.e. unblock uploads under disk pressure in Stale\n    // state, unblock deletions after timeout in Multi state)\n}\n\n#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]\npub(crate) struct SecondaryLocationConfig {\n    /// If true, keep the local cache warm by polling remote storage\n    pub(crate) warm: bool,\n}\n\n#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]\npub(crate) enum LocationMode {\n    Attached(AttachedLocationConfig),\n    Secondary(SecondaryLocationConfig),\n}\n\n/// Per-tenant, per-pageserver configuration.  All pageservers use the same TenantConf,\n/// but have distinct LocationConf.\n#[derive(Clone, PartialEq, Eq, Serialize, Deserialize)]\npub(crate) struct LocationConf {\n    /// The location-specific part of the configuration, describes the operating\n    /// mode of this pageserver for this tenant.\n    pub(crate) mode: LocationMode,\n\n    /// The detailed shard identity.  This structure is already scoped within\n    /// a TenantShardId, but we need the full ShardIdentity to enable calculating\n    /// key->shard mappings.\n    ///\n    /// NB: we store this even for unsharded tenants, so that we agree with storcon on the intended\n    /// stripe size. Otherwise, a split request that does not specify a stripe size may use a\n    /// different default than storcon, which can lead to incorrect stripe sizes and corruption.\n    pub(crate) shard: ShardIdentity,\n\n    /// The pan-cluster tenant configuration, the same on all locations\n    pub(crate) tenant_conf: pageserver_api::models::TenantConfig,\n}\n\nimpl std::fmt::Debug for LocationConf {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        match &self.mode {\n            LocationMode::Attached(conf) => {\n                write!(\n                    f,\n                    \"Attached {:?}, gen={:?}\",\n                    conf.attach_mode, conf.generation\n                )\n            }\n            LocationMode::Secondary(conf) => {\n                write!(f, \"Secondary, warm={}\", conf.warm)\n            }\n        }\n    }\n}\n\nimpl AttachedLocationConfig {\n    /// Consult attachment mode to determine whether we are currently permitted\n    /// to delete layers.  This is only advisory, not required for data safety.\n    /// See [`AttachmentMode`] for more context.\n    pub(crate) fn may_delete_layers_hint(&self) -> bool {\n        // TODO: add an override for disk pressure in AttachedLocationConfig,\n        // and respect it here.\n        match &self.attach_mode {\n            AttachmentMode::Single => true,\n            AttachmentMode::Multi | AttachmentMode::Stale => {\n                // In Multi mode we avoid doing deletions because some other\n                // attached pageserver might get 404 while trying to read\n                // a layer we delete which is still referenced in their metadata.\n                //\n                // In Stale mode, we avoid doing deletions because we expect\n                // that they would ultimately fail validation in the deletion\n                // queue due to our stale generation.\n                false\n            }\n        }\n    }\n\n    /// Whether we are currently hinted that it is worthwhile to upload layers.\n    /// This is only advisory, not required for data safety.\n    /// See [`AttachmentMode`] for more context.\n    pub(crate) fn may_upload_layers_hint(&self) -> bool {\n        // TODO: add an override for disk pressure in AttachedLocationConfig,\n        // and respect it here.\n        match &self.attach_mode {\n            AttachmentMode::Single | AttachmentMode::Multi => true,\n            AttachmentMode::Stale => {\n                // In Stale mode, we avoid doing uploads because we expect that\n                // our replacement pageserver will already have started its own\n                // IndexPart that will never reference layers we upload: it is\n                // wasteful.\n                false\n            }\n        }\n    }\n}\n\nimpl LocationConf {\n    /// For use when loading from a legacy configuration: presence of a tenant\n    /// implies it is in AttachmentMode::Single, which used to be the only\n    /// possible state.  This function should eventually be removed.\n    pub(crate) fn attached_single(\n        tenant_conf: pageserver_api::models::TenantConfig,\n        generation: Generation,\n        shard_params: models::ShardParameters,\n    ) -> Self {\n        Self {\n            mode: LocationMode::Attached(AttachedLocationConfig {\n                generation,\n                attach_mode: AttachmentMode::Single,\n            }),\n            shard: ShardIdentity::from_params(ShardNumber(0), shard_params),\n            tenant_conf,\n        }\n    }\n\n    /// For use when attaching/re-attaching: update the generation stored in this\n    /// structure.  If we were in a secondary state, promote to attached (posession\n    /// of a fresh generation implies this).\n    pub(crate) fn attach_in_generation(\n        &mut self,\n        mode: AttachmentMode,\n        generation: Generation,\n        stripe_size: ShardStripeSize,\n    ) {\n        match &mut self.mode {\n            LocationMode::Attached(attach_conf) => {\n                attach_conf.generation = generation;\n                attach_conf.attach_mode = mode;\n            }\n            LocationMode::Secondary(_) => {\n                // We are promoted to attached by the control plane's re-attach response\n                self.mode = LocationMode::Attached(AttachedLocationConfig {\n                    generation,\n                    attach_mode: mode,\n                })\n            }\n        }\n\n        // This should never happen.\n        // TODO: turn this into a proper assertion.\n        if stripe_size != self.shard.stripe_size {\n            critical!(\n                \"stripe size mismatch: {} != {}\",\n                self.shard.stripe_size,\n                stripe_size,\n            );\n        }\n\n        self.shard.stripe_size = stripe_size;\n    }\n\n    pub(crate) fn try_from(conf: &'_ models::LocationConfig) -> anyhow::Result<Self> {\n        let tenant_conf = conf.tenant_conf.clone();\n\n        fn get_generation(conf: &'_ models::LocationConfig) -> Result<Generation, anyhow::Error> {\n            conf.generation\n                .map(Generation::new)\n                .ok_or_else(|| anyhow::anyhow!(\"Generation must be set when attaching\"))\n        }\n\n        let mode = match &conf.mode {\n            models::LocationConfigMode::AttachedMulti => {\n                LocationMode::Attached(AttachedLocationConfig {\n                    generation: get_generation(conf)?,\n                    attach_mode: AttachmentMode::Multi,\n                })\n            }\n            models::LocationConfigMode::AttachedSingle => {\n                LocationMode::Attached(AttachedLocationConfig {\n                    generation: get_generation(conf)?,\n                    attach_mode: AttachmentMode::Single,\n                })\n            }\n            models::LocationConfigMode::AttachedStale => {\n                LocationMode::Attached(AttachedLocationConfig {\n                    generation: get_generation(conf)?,\n                    attach_mode: AttachmentMode::Stale,\n                })\n            }\n            models::LocationConfigMode::Secondary => {\n                anyhow::ensure!(conf.generation.is_none());\n\n                let warm = conf\n                    .secondary_conf\n                    .as_ref()\n                    .map(|c| c.warm)\n                    .unwrap_or(false);\n                LocationMode::Secondary(SecondaryLocationConfig { warm })\n            }\n            models::LocationConfigMode::Detached => {\n                // Should not have been called: API code should translate this mode\n                // into a detach rather than trying to decode it as a LocationConf\n                return Err(anyhow::anyhow!(\"Cannot decode a Detached configuration\"));\n            }\n        };\n\n        let shard = if conf.shard_count == 0 {\n            // NB: carry over the persisted stripe size instead of using the default. This doesn't\n            // matter for most practical purposes, since unsharded tenants don't use the stripe\n            // size, but can cause inconsistencies between storcon and Pageserver and cause manual\n            // splits without `new_stripe_size` to use an unintended stripe size.\n            ShardIdentity::unsharded_with_stripe_size(ShardStripeSize(conf.shard_stripe_size))\n        } else {\n            ShardIdentity::new(\n                ShardNumber(conf.shard_number),\n                ShardCount::new(conf.shard_count),\n                ShardStripeSize(conf.shard_stripe_size),\n            )?\n        };\n\n        Ok(Self {\n            shard,\n            mode,\n            tenant_conf,\n        })\n    }\n}\n\nimpl Default for LocationConf {\n    // TODO: this should be removed once tenant loading can guarantee that we are never\n    // loading from a directory without a configuration.\n    // => tech debt since https://github.com/neondatabase/neon/issues/1555\n    fn default() -> Self {\n        Self {\n            mode: LocationMode::Attached(AttachedLocationConfig {\n                generation: Generation::none(),\n                attach_mode: AttachmentMode::Single,\n            }),\n            tenant_conf: pageserver_api::models::TenantConfig::default(),\n            shard: ShardIdentity::unsharded(),\n        }\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    #[test]\n    fn serde_roundtrip_tenant_conf_opt() {\n        let small_conf = pageserver_api::models::TenantConfig {\n            gc_horizon: Some(42),\n            ..Default::default()\n        };\n\n        let toml_form = toml_edit::ser::to_string(&small_conf).unwrap();\n        assert_eq!(toml_form, \"gc_horizon = 42\\n\");\n        assert_eq!(small_conf, toml_edit::de::from_str(&toml_form).unwrap());\n\n        let json_form = serde_json::to_string(&small_conf).unwrap();\n        assert_eq!(json_form, \"{\\\"gc_horizon\\\":42}\");\n        assert_eq!(small_conf, serde_json::from_str(&json_form).unwrap());\n    }\n}\n"
  },
  {
    "path": "pageserver/src/tenant/debug.rs",
    "content": "use std::{ops::Range, str::FromStr, sync::Arc};\n\nuse crate::walredo::RedoAttemptType;\nuse base64::{Engine as _, engine::general_purpose::STANDARD};\nuse bytes::{Bytes, BytesMut};\nuse camino::Utf8PathBuf;\nuse clap::Parser;\nuse itertools::Itertools;\nuse pageserver_api::{\n    key::Key,\n    keyspace::KeySpace,\n    shard::{ShardIdentity, ShardStripeSize},\n};\nuse postgres_ffi::PgMajorVersion;\nuse postgres_ffi::{BLCKSZ, page_is_new, page_set_lsn};\nuse tracing::Instrument;\nuse utils::{\n    generation::Generation,\n    id::{TenantId, TimelineId},\n    lsn::Lsn,\n    shard::{ShardCount, ShardIndex, ShardNumber},\n};\nuse wal_decoder::models::record::NeonWalRecord;\n\nuse crate::{\n    context::{DownloadBehavior, RequestContext},\n    task_mgr::TaskKind,\n    tenant::storage_layer::ValueReconstructState,\n    walredo::harness::RedoHarness,\n};\n\nuse super::{\n    WalRedoManager, WalredoManagerId,\n    harness::TenantHarness,\n    remote_timeline_client::LayerFileMetadata,\n    storage_layer::{AsLayerDesc, IoConcurrency, Layer, LayerName, ValuesReconstructState},\n};\n\nfn process_page_image(next_record_lsn: Lsn, is_fpw: bool, img_bytes: Bytes) -> Bytes {\n    // To match the logic in libs/wal_decoder/src/serialized_batch.rs\n    let mut new_image: BytesMut = img_bytes.into();\n    if is_fpw && !page_is_new(&new_image) {\n        page_set_lsn(&mut new_image, next_record_lsn);\n    }\n    assert_eq!(new_image.len(), BLCKSZ as usize);\n    new_image.freeze()\n}\n\nasync fn redo_wals(input: &str, key: Key) -> anyhow::Result<()> {\n    let tenant_id = TenantId::generate();\n    let timeline_id = TimelineId::generate();\n    let redo_harness = RedoHarness::new()?;\n    let span = redo_harness.span();\n    let tenant_conf = pageserver_api::models::TenantConfig {\n        ..Default::default()\n    };\n\n    let ctx = RequestContext::new(TaskKind::DebugTool, DownloadBehavior::Error);\n    let tenant = TenantHarness::create_custom(\n        \"search_key\",\n        tenant_conf,\n        tenant_id,\n        ShardIdentity::unsharded(),\n        Generation::new(1),\n    )\n    .await?\n    .do_try_load_with_redo(\n        Arc::new(WalRedoManager::Prod(\n            WalredoManagerId::next(),\n            redo_harness.manager,\n        )),\n        &ctx,\n    )\n    .await\n    .unwrap();\n    let timeline = tenant\n        .create_test_timeline(timeline_id, Lsn(0x10), PgMajorVersion::PG16, &ctx)\n        .await?;\n    let contents = tokio::fs::read_to_string(input)\n        .await\n        .map_err(|e| anyhow::Error::msg(format!(\"Failed to read input file {input}: {e}\")))\n        .unwrap();\n    let lines = contents.lines();\n    let mut last_wal_lsn: Option<Lsn> = None;\n    let state = {\n        let mut state = ValueReconstructState::default();\n        let mut is_fpw = false;\n        let mut is_first_line = true;\n        for line in lines {\n            if is_first_line {\n                is_first_line = false;\n                if line.trim() == \"FPW\" {\n                    is_fpw = true;\n                }\n                continue; // Skip the first line.\n            }\n            // Each input line is in the \"<next_record_lsn>,<base64>\" format.\n            let (lsn_str, payload_b64) = line\n                .split_once(',')\n                .expect(\"Invalid input format: expected '<lsn>,<base64>'\");\n\n            // Parse the LSN and decode the payload.\n            let lsn = Lsn::from_str(lsn_str.trim()).expect(\"Invalid LSN format\");\n            let bytes = Bytes::from(\n                STANDARD\n                    .decode(payload_b64.trim())\n                    .expect(\"Invalid base64 payload\"),\n            );\n\n            // The first line is considered the base image, the rest are WAL records.\n            if state.img.is_none() {\n                state.img = Some((lsn, process_page_image(lsn, is_fpw, bytes)));\n            } else {\n                let wal_record = NeonWalRecord::Postgres {\n                    will_init: false,\n                    rec: bytes,\n                };\n                state.records.push((lsn, wal_record));\n                last_wal_lsn.replace(lsn);\n            }\n        }\n        state\n    };\n\n    assert!(state.img.is_some(), \"No base image found\");\n    assert!(!state.records.is_empty(), \"No WAL records found\");\n    let result = timeline\n        .reconstruct_value(key, last_wal_lsn.unwrap(), state, RedoAttemptType::ReadPage)\n        .instrument(span.clone())\n        .await?;\n\n    eprintln!(\"final image: {:?}\", STANDARD.encode(result));\n\n    Ok(())\n}\n\nasync fn search_key(\n    tenant_id: TenantId,\n    timeline_id: TimelineId,\n    dir: String,\n    key: Key,\n    lsn: Lsn,\n) -> anyhow::Result<()> {\n    let shard_index = ShardIndex {\n        shard_number: ShardNumber(0),\n        shard_count: ShardCount(4),\n    };\n\n    let redo_harness = RedoHarness::new()?;\n    let span = redo_harness.span();\n    let tenant_conf = pageserver_api::models::TenantConfig {\n        ..Default::default()\n    };\n    let ctx = RequestContext::new(TaskKind::DebugTool, DownloadBehavior::Error);\n    let tenant = TenantHarness::create_custom(\n        \"search_key\",\n        tenant_conf,\n        tenant_id,\n        ShardIdentity::new(\n            shard_index.shard_number,\n            shard_index.shard_count,\n            ShardStripeSize(32768),\n        )\n        .unwrap(),\n        Generation::new(1),\n    )\n    .await?\n    .do_try_load_with_redo(\n        Arc::new(WalRedoManager::Prod(\n            WalredoManagerId::next(),\n            redo_harness.manager,\n        )),\n        &ctx,\n    )\n    .await\n    .unwrap();\n\n    let timeline = tenant\n        .create_test_timeline(timeline_id, Lsn(0x10), PgMajorVersion::PG16, &ctx)\n        .await?;\n\n    let mut delta_layers: Vec<Layer> = Vec::new();\n    let mut img_layer: Option<Layer> = Option::None;\n    let mut dir = tokio::fs::read_dir(dir).await?;\n    loop {\n        let entry = dir.next_entry().await?;\n        if entry.is_none() || !entry.as_ref().unwrap().file_type().await?.is_file() {\n            break;\n        }\n        let path = Utf8PathBuf::from_path_buf(entry.unwrap().path()).unwrap();\n        let layer_name = match LayerName::from_str(path.file_name().unwrap()) {\n            Ok(name) => name,\n            Err(_) => {\n                eprintln!(\"Skipped invalid layer: {path}\");\n                continue;\n            }\n        };\n        let layer = Layer::for_resident(\n            tenant.conf,\n            &timeline,\n            path.clone(),\n            layer_name,\n            LayerFileMetadata::new(\n                tokio::fs::metadata(path.clone()).await?.len(),\n                Generation::new(1),\n                shard_index,\n            ),\n        );\n        if layer.layer_desc().is_delta() {\n            delta_layers.push(layer.into());\n        } else if img_layer.is_none() {\n            img_layer = Some(layer.into());\n        } else {\n            anyhow::bail!(\"Found multiple image layers\");\n        }\n    }\n    // sort delta layers based on the descending order of LSN\n    delta_layers.sort_by(|a, b| {\n        b.layer_desc()\n            .get_lsn_range()\n            .start\n            .cmp(&a.layer_desc().get_lsn_range().start)\n    });\n\n    let mut state = ValuesReconstructState::new(IoConcurrency::Sequential);\n\n    let key_space = KeySpace::single(Range {\n        start: key,\n        end: key.next(),\n    });\n    let lsn_range = Range {\n        start: img_layer\n            .as_ref()\n            .map_or(Lsn(0x00), |img| img.layer_desc().image_layer_lsn()),\n        end: lsn,\n    };\n    for delta_layer in delta_layers.iter() {\n        delta_layer\n            .get_values_reconstruct_data(key_space.clone(), lsn_range.clone(), &mut state, &ctx)\n            .await?;\n    }\n\n    img_layer\n        .as_ref()\n        .unwrap()\n        .get_values_reconstruct_data(key_space.clone(), lsn_range.clone(), &mut state, &ctx)\n        .await?;\n\n    for (_key, result) in std::mem::take(&mut state.keys) {\n        let state = result.collect_pending_ios().await?;\n        if state.img.is_some() {\n            eprintln!(\n                \"image: {}: {:x?}\",\n                state.img.as_ref().unwrap().0,\n                STANDARD.encode(state.img.as_ref().unwrap().1.clone())\n            );\n        }\n        for delta in state.records.iter() {\n            match &delta.1 {\n                NeonWalRecord::Postgres { will_init, rec } => {\n                    eprintln!(\n                        \"delta: {}: will_init: {}, {:x?}\",\n                        delta.0,\n                        will_init,\n                        STANDARD.encode(rec)\n                    );\n                }\n                _ => {\n                    eprintln!(\"delta: {}: {:x?}\", delta.0, delta.1);\n                }\n            }\n        }\n\n        let result = timeline\n            .reconstruct_value(key, lsn_range.end, state, RedoAttemptType::ReadPage)\n            .instrument(span.clone())\n            .await?;\n        eprintln!(\"final image: {lsn} : {result:?}\");\n    }\n\n    Ok(())\n}\n\n/// Redo all WALs against the base image in the input file. Return the base64 encoded final image.\n/// Each line in the input file must be in the form \"<lsn>,<base64>\" where:\n///   * `<lsn>` is a PostgreSQL LSN in hexadecimal notation, e.g. `0/16ABCDE`.\n///   * `<base64>` is the base64‐encoded page image (first line) or WAL record (subsequent lines).\n///\n/// The first line provides the base image of a page. The LSN is the LSN of \"next record\" following\n/// the record containing the FPI. For example, if the FPI was extracted from a WAL record occuping\n/// [0/1, 0/200) in the WAL stream, the LSN appearing along side the page image here should be 0/200.\n///\n/// The subsequent lines are WAL records, ordered from the oldest to the newest. The LSN is the\n/// record LSN of the WAL record, not the \"next record\" LSN. For example, if the WAL record here\n/// occupies [0/1, 0/200) in the WAL stream, the LSN appearing along side the WAL record here should\n/// be 0/1.\n#[derive(Parser)]\nstruct RedoWalsCmd {\n    #[clap(long)]\n    input: String,\n    #[clap(long)]\n    key: String,\n}\n\n#[tokio::test]\nasync fn test_redo_wals() -> anyhow::Result<()> {\n    let args = std::env::args().collect_vec();\n    let pos = args\n        .iter()\n        .position(|arg| arg == \"--\")\n        .unwrap_or(args.len());\n    let slice = &args[pos..args.len()];\n    let cmd = match RedoWalsCmd::try_parse_from(slice) {\n        Ok(cmd) => cmd,\n        Err(err) => {\n            eprintln!(\"{err}\");\n            return Ok(());\n        }\n    };\n\n    let key = Key::from_hex(&cmd.key).unwrap();\n    redo_wals(&cmd.input, key).await?;\n\n    Ok(())\n}\n\n/// Search for a page at the given LSN in all layers of the data_dir.\n/// Return the base64-encoded image and all WAL records, as well as the final reconstructed image.\n#[derive(Parser)]\nstruct SearchKeyCmd {\n    #[clap(long)]\n    tenant_id: String,\n    #[clap(long)]\n    timeline_id: String,\n    #[clap(long)]\n    data_dir: String,\n    #[clap(long)]\n    key: String,\n    #[clap(long)]\n    lsn: String,\n}\n\n#[tokio::test]\nasync fn test_search_key() -> anyhow::Result<()> {\n    let args = std::env::args().collect_vec();\n    let pos = args\n        .iter()\n        .position(|arg| arg == \"--\")\n        .unwrap_or(args.len());\n    let slice = &args[pos..args.len()];\n    let cmd = match SearchKeyCmd::try_parse_from(slice) {\n        Ok(cmd) => cmd,\n        Err(err) => {\n            eprintln!(\"{err}\");\n            return Ok(());\n        }\n    };\n\n    let tenant_id = TenantId::from_str(&cmd.tenant_id).unwrap();\n    let timeline_id = TimelineId::from_str(&cmd.timeline_id).unwrap();\n    let key = Key::from_hex(&cmd.key).unwrap();\n    let lsn = Lsn::from_str(&cmd.lsn).unwrap();\n    search_key(tenant_id, timeline_id, cmd.data_dir, key, lsn).await?;\n\n    Ok(())\n}\n"
  },
  {
    "path": "pageserver/src/tenant/disk_btree.rs",
    "content": "//!\n//! Simple on-disk B-tree implementation\n//!\n//! This is used as the index structure within image and delta layers\n//!\n//! Features:\n//! - Fixed-width keys\n//! - Fixed-width values (VALUE_SZ)\n//! - The tree is created in a bulk operation. Insert/deletion after creation\n//!   is not supported\n//! - page-oriented\n//!\n//! TODO:\n//! - maybe something like an Adaptive Radix Tree would be more efficient?\n//! - the values stored by image and delta layers are offsets into the file,\n//!   and they are in monotonically increasing order. Prefix compression would\n//!   be very useful for them, too.\n//! - An Iterator interface would be more convenient for the callers than the\n//!   'visit' function\n//!\nuse std::cmp::Ordering;\nuse std::iter::Rev;\nuse std::ops::{Range, RangeInclusive};\nuse std::{io, result};\n\nuse async_stream::try_stream;\nuse byteorder::{BE, ReadBytesExt};\nuse bytes::BufMut;\nuse either::Either;\nuse futures::{Stream, StreamExt};\nuse hex;\nuse thiserror::Error;\nuse tracing::error;\n\nuse crate::context::RequestContext;\nuse crate::tenant::block_io::{BlockReader, BlockWriter};\nuse crate::virtual_file::{IoBuffer, IoBufferMut, owned_buffers_io::write::Buffer};\n\n// The maximum size of a value stored in the B-tree. 5 bytes is enough currently.\npub const VALUE_SZ: usize = 5;\npub const MAX_VALUE: u64 = 0x007f_ffff_ffff;\n\npub const PAGE_SZ: usize = 8192;\n\n#[derive(Clone, Copy, Debug)]\nstruct Value([u8; VALUE_SZ]);\n\nimpl Value {\n    fn from_slice(slice: &[u8]) -> Value {\n        let mut b = [0u8; VALUE_SZ];\n        b.copy_from_slice(slice);\n        Value(b)\n    }\n\n    fn from_u64(x: u64) -> Value {\n        assert!(x <= 0x007f_ffff_ffff);\n        Value([\n            (x >> 32) as u8,\n            (x >> 24) as u8,\n            (x >> 16) as u8,\n            (x >> 8) as u8,\n            x as u8,\n        ])\n    }\n\n    fn from_blknum(x: u32) -> Value {\n        Value([\n            0x80,\n            (x >> 24) as u8,\n            (x >> 16) as u8,\n            (x >> 8) as u8,\n            x as u8,\n        ])\n    }\n\n    #[allow(dead_code)]\n    fn is_offset(self) -> bool {\n        self.0[0] & 0x80 != 0\n    }\n\n    fn to_u64(self) -> u64 {\n        let b = &self.0;\n        ((b[0] as u64) << 32)\n            | ((b[1] as u64) << 24)\n            | ((b[2] as u64) << 16)\n            | ((b[3] as u64) << 8)\n            | b[4] as u64\n    }\n\n    fn to_blknum(self) -> u32 {\n        let b = &self.0;\n        assert!(b[0] == 0x80);\n        ((b[1] as u32) << 24) | ((b[2] as u32) << 16) | ((b[3] as u32) << 8) | b[4] as u32\n    }\n}\n\n#[derive(Error, Debug)]\npub enum DiskBtreeError {\n    #[error(\"Attempt to append a value that is too large {0} > {}\", MAX_VALUE)]\n    AppendOverflow(u64),\n\n    #[error(\"Unsorted input: key {key:?} is <= last_key {last_key:?}\")]\n    UnsortedInput { key: Box<[u8]>, last_key: Box<[u8]> },\n\n    #[error(\"Could not push to new leaf node\")]\n    FailedToPushToNewLeafNode,\n\n    #[error(\"IoError: {0}\")]\n    Io(#[from] io::Error),\n}\n\npub type Result<T> = result::Result<T, DiskBtreeError>;\n\n/// This is the on-disk representation.\nstruct OnDiskNode<'a, const L: usize> {\n    // Fixed-width fields\n    num_children: u16,\n    level: u8,\n    prefix_len: u8,\n    suffix_len: u8,\n\n    // Variable-length fields. These are stored on-disk after the fixed-width\n    // fields, in this order. In the in-memory representation, these point to\n    // the right parts in the page buffer.\n    prefix: &'a [u8],\n    keys: &'a [u8],\n    values: &'a [u8],\n}\n\nimpl<const L: usize> OnDiskNode<'_, L> {\n    ///\n    /// Interpret a PAGE_SZ page as a node.\n    ///\n    fn deparse(buf: &[u8]) -> Result<OnDiskNode<L>> {\n        let mut cursor = std::io::Cursor::new(buf);\n        let num_children = cursor.read_u16::<BE>()?;\n        let level = cursor.read_u8()?;\n        let prefix_len = cursor.read_u8()?;\n        let suffix_len = cursor.read_u8()?;\n\n        let mut off = cursor.position();\n        let prefix_off = off as usize;\n        off += prefix_len as u64;\n\n        let keys_off = off as usize;\n        let keys_len = num_children as usize * suffix_len as usize;\n        off += keys_len as u64;\n\n        let values_off = off as usize;\n        let values_len = num_children as usize * VALUE_SZ;\n        //off += values_len as u64;\n\n        let prefix = &buf[prefix_off..prefix_off + prefix_len as usize];\n        let keys = &buf[keys_off..keys_off + keys_len];\n        let values = &buf[values_off..values_off + values_len];\n\n        Ok(OnDiskNode {\n            num_children,\n            level,\n            prefix_len,\n            suffix_len,\n            prefix,\n            keys,\n            values,\n        })\n    }\n\n    ///\n    /// Read a value at 'idx'\n    ///\n    fn value(&self, idx: usize) -> Value {\n        let value_off = idx * VALUE_SZ;\n        let value_slice = &self.values[value_off..value_off + VALUE_SZ];\n        Value::from_slice(value_slice)\n    }\n\n    fn binary_search(\n        &self,\n        search_key: &[u8; L],\n        keybuf: &mut [u8],\n    ) -> result::Result<usize, usize> {\n        let mut size = self.num_children as usize;\n        let mut low = 0;\n        let mut high = size;\n        while low < high {\n            let mid = low + size / 2;\n\n            let key_off = mid * self.suffix_len as usize;\n            let suffix = &self.keys[key_off..key_off + self.suffix_len as usize];\n            // Does this match?\n            keybuf[self.prefix_len as usize..].copy_from_slice(suffix);\n\n            let cmp = keybuf[..].cmp(search_key);\n\n            if cmp == Ordering::Less {\n                low = mid + 1;\n            } else if cmp == Ordering::Greater {\n                high = mid;\n            } else {\n                return Ok(mid);\n            }\n            size = high - low;\n        }\n        Err(low)\n    }\n}\n\n///\n/// Public reader object, to search the tree.\n///\n#[derive(Clone)]\npub struct DiskBtreeReader<R, const L: usize>\nwhere\n    R: BlockReader,\n{\n    start_blk: u32,\n    root_blk: u32,\n    reader: R,\n}\n\n#[derive(Clone, Copy, Debug, PartialEq, Eq)]\npub enum VisitDirection {\n    Forwards,\n    Backwards,\n}\n\nimpl<R, const L: usize> DiskBtreeReader<R, L>\nwhere\n    R: BlockReader,\n{\n    pub fn new(start_blk: u32, root_blk: u32, reader: R) -> Self {\n        DiskBtreeReader {\n            start_blk,\n            root_blk,\n            reader,\n        }\n    }\n\n    ///\n    /// Read the value for given key. Returns the value, or None if it doesn't exist.\n    ///\n    pub async fn get(&self, search_key: &[u8; L], ctx: &RequestContext) -> Result<Option<u64>> {\n        let mut result: Option<u64> = None;\n        self.visit(\n            search_key,\n            VisitDirection::Forwards,\n            |key, value| {\n                if key == search_key {\n                    result = Some(value);\n                }\n                false\n            },\n            ctx,\n        )\n        .await?;\n        Ok(result)\n    }\n\n    pub fn iter<'a>(self, start_key: &'a [u8; L], ctx: &'a RequestContext) -> DiskBtreeIterator<'a>\n    where\n        R: 'a + Send,\n    {\n        DiskBtreeIterator {\n            stream: Box::pin(self.into_stream(start_key, ctx)),\n        }\n    }\n\n    /// Return a stream which yields all key, value pairs from the index\n    /// starting from the first key greater or equal to `start_key`.\n    ///\n    /// Note 1: that this is a copy of [`Self::visit`].\n    /// TODO: Once the sequential read path is removed this will become\n    /// the only index traversal method.\n    ///\n    /// Note 2: this function used to take `&self` but it now consumes `self`. This is due to\n    /// the lifetime constraints of the reader and the stream / iterator it creates. Using `&self`\n    /// requires the reader to be present when the stream is used, and this creates a lifetime\n    /// dependency between the reader and the stream. Now if we want to create an iterator that\n    /// holds the stream, someone will need to keep a reference to the reader, which is inconvenient\n    /// to use from the image/delta layer APIs.\n    ///\n    /// Feel free to add the `&self` variant back if it's necessary.\n    pub fn into_stream<'a>(\n        self,\n        start_key: &'a [u8; L],\n        ctx: &'a RequestContext,\n    ) -> impl Stream<Item = std::result::Result<(Vec<u8>, u64), DiskBtreeError>> + 'a\n    where\n        R: 'a,\n    {\n        try_stream! {\n            let mut stack = Vec::new();\n            stack.push((self.root_blk, None));\n            let block_cursor = self.reader.block_cursor();\n            let mut node_buf = [0_u8; PAGE_SZ];\n            while let Some((node_blknum, opt_iter)) = stack.pop() {\n                // Read the node, through the PS PageCache, into local variable `node_buf`.\n                // We could keep the page cache read guard alive, but, at the time of writing,\n                // we run quite small PS PageCache s => can't risk running out of\n                // PageCache space because this stream isn't consumed fast enough.\n                let page_read_guard = block_cursor\n                    .read_blk(self.start_blk + node_blknum, ctx)\n                    .await?;\n                node_buf.copy_from_slice(page_read_guard.as_ref());\n                drop(page_read_guard); // drop page cache read guard early\n\n                let node = OnDiskNode::deparse(&node_buf)?;\n                let prefix_len = node.prefix_len as usize;\n                let suffix_len = node.suffix_len as usize;\n\n                assert!(node.num_children > 0);\n\n                let mut keybuf = Vec::new();\n                keybuf.extend(node.prefix);\n                keybuf.resize(prefix_len + suffix_len, 0);\n\n                let mut iter: Either<Range<usize>, Rev<RangeInclusive<usize>>> = if let Some(iter) = opt_iter {\n                    iter\n                } else {\n                    // Locate the first match\n                    let idx = match node.binary_search(start_key, keybuf.as_mut_slice()) {\n                        Ok(idx) => idx,\n                        Err(idx) => {\n                            if node.level == 0 {\n                                // Imagine that the node contains the following keys:\n                                //\n                                // 1\n                                // 3  <-- idx\n                                // 5\n                                //\n                                // If the search key is '2' and there is exact match,\n                                // the binary search would return the index of key\n                                // '3'. That's cool, '3' is the first key to return.\n                                idx\n                            } else {\n                                // This is an internal page, so each key represents a lower\n                                // bound for what's in the child page. If there is no exact\n                                // match, we have to return the *previous* entry.\n                                //\n                                // 1  <-- return this\n                                // 3  <-- idx\n                                // 5\n                                idx.saturating_sub(1)\n                            }\n                        }\n                    };\n                    Either::Left(idx..node.num_children.into())\n                };\n\n\n                // idx points to the first match now. Keep going from there\n                while let Some(idx) = iter.next() {\n                    let key_off = idx * suffix_len;\n                    let suffix = &node.keys[key_off..key_off + suffix_len];\n                    keybuf[prefix_len..].copy_from_slice(suffix);\n                    let value = node.value(idx);\n                    #[allow(clippy::collapsible_if)]\n                    if node.level == 0 {\n                        // leaf\n                        yield (keybuf.clone(), value.to_u64());\n                    } else {\n                        stack.push((node_blknum, Some(iter)));\n                        stack.push((value.to_blknum(), None));\n                        break;\n                    }\n                }\n            }\n        }\n    }\n\n    ///\n    /// Scan the tree, starting from 'search_key', in the given direction. 'visitor'\n    /// will be called for every key >= 'search_key' (or <= 'search_key', if scanning\n    /// backwards)\n    ///\n    pub async fn visit<V>(\n        &self,\n        search_key: &[u8; L],\n        dir: VisitDirection,\n        mut visitor: V,\n        ctx: &RequestContext,\n    ) -> Result<bool>\n    where\n        V: FnMut(&[u8], u64) -> bool,\n    {\n        let mut stack = Vec::new();\n        stack.push((self.root_blk, None));\n        let block_cursor = self.reader.block_cursor();\n        while let Some((node_blknum, opt_iter)) = stack.pop() {\n            // Locate the node.\n            let node_buf = block_cursor\n                .read_blk(self.start_blk + node_blknum, ctx)\n                .await?;\n\n            let node = OnDiskNode::deparse(node_buf.as_ref())?;\n            let prefix_len = node.prefix_len as usize;\n            let suffix_len = node.suffix_len as usize;\n\n            assert!(node.num_children > 0);\n\n            let mut keybuf = Vec::new();\n            keybuf.extend(node.prefix);\n            keybuf.resize(prefix_len + suffix_len, 0);\n\n            let mut iter = if let Some(iter) = opt_iter {\n                iter\n            } else if dir == VisitDirection::Forwards {\n                // Locate the first match\n                let idx = match node.binary_search(search_key, keybuf.as_mut_slice()) {\n                    Ok(idx) => idx,\n                    Err(idx) => {\n                        if node.level == 0 {\n                            // Imagine that the node contains the following keys:\n                            //\n                            // 1\n                            // 3  <-- idx\n                            // 5\n                            //\n                            // If the search key is '2' and there is exact match,\n                            // the binary search would return the index of key\n                            // '3'. That's cool, '3' is the first key to return.\n                            idx\n                        } else {\n                            // This is an internal page, so each key represents a lower\n                            // bound for what's in the child page. If there is no exact\n                            // match, we have to return the *previous* entry.\n                            //\n                            // 1  <-- return this\n                            // 3  <-- idx\n                            // 5\n                            idx.saturating_sub(1)\n                        }\n                    }\n                };\n                Either::Left(idx..node.num_children.into())\n            } else {\n                let idx = match node.binary_search(search_key, keybuf.as_mut_slice()) {\n                    Ok(idx) => {\n                        // Exact match. That's the first entry to return, and walk\n                        // backwards from there.\n                        idx\n                    }\n                    Err(idx) => {\n                        // No exact match. The binary search returned the index of the\n                        // first key that's > search_key. Back off by one, and walk\n                        // backwards from there.\n                        if let Some(idx) = idx.checked_sub(1) {\n                            idx\n                        } else {\n                            return Ok(false);\n                        }\n                    }\n                };\n                Either::Right((0..=idx).rev())\n            };\n\n            // idx points to the first match now. Keep going from there\n            while let Some(idx) = iter.next() {\n                let key_off = idx * suffix_len;\n                let suffix = &node.keys[key_off..key_off + suffix_len];\n                keybuf[prefix_len..].copy_from_slice(suffix);\n                let value = node.value(idx);\n                #[allow(clippy::collapsible_if)]\n                if node.level == 0 {\n                    // leaf\n                    if !visitor(&keybuf, value.to_u64()) {\n                        return Ok(false);\n                    }\n                } else {\n                    stack.push((node_blknum, Some(iter)));\n                    stack.push((value.to_blknum(), None));\n                    break;\n                }\n            }\n        }\n        Ok(true)\n    }\n\n    #[allow(dead_code)]\n    pub async fn dump(&self, ctx: &RequestContext) -> Result<()> {\n        let mut stack = Vec::new();\n\n        stack.push((self.root_blk, String::new(), 0, 0, 0));\n\n        let block_cursor = self.reader.block_cursor();\n\n        while let Some((blknum, path, depth, child_idx, key_off)) = stack.pop() {\n            let blk = block_cursor.read_blk(self.start_blk + blknum, ctx).await?;\n            let buf: &[u8] = blk.as_ref();\n            let node = OnDiskNode::<L>::deparse(buf)?;\n\n            if child_idx == 0 {\n                print!(\"{:indent$}\", \"\", indent = depth * 2);\n                let path_prefix = stack\n                    .iter()\n                    .map(|(_blknum, path, ..)| path.as_str())\n                    .collect::<String>();\n                println!(\n                    \"blk #{blknum}: path {path_prefix}{path}: prefix {}, suffix_len {}\",\n                    hex::encode(node.prefix),\n                    node.suffix_len\n                );\n            }\n\n            if child_idx + 1 < node.num_children {\n                let key_off = key_off + node.suffix_len as usize;\n                stack.push((blknum, path.clone(), depth, child_idx + 1, key_off));\n            }\n            let key = &node.keys[key_off..key_off + node.suffix_len as usize];\n            let val = node.value(child_idx as usize);\n\n            print!(\"{:indent$}\", \"\", indent = depth * 2 + 2);\n            println!(\"{}: {}\", hex::encode(key), hex::encode(val.0));\n\n            if node.level > 0 {\n                stack.push((val.to_blknum(), hex::encode(node.prefix), depth + 1, 0, 0));\n            }\n        }\n        Ok(())\n    }\n}\n\npub struct DiskBtreeIterator<'a> {\n    #[allow(clippy::type_complexity)]\n    stream: std::pin::Pin<\n        Box<dyn Stream<Item = std::result::Result<(Vec<u8>, u64), DiskBtreeError>> + 'a + Send>,\n    >,\n}\n\nimpl DiskBtreeIterator<'_> {\n    pub async fn next(&mut self) -> Option<std::result::Result<(Vec<u8>, u64), DiskBtreeError>> {\n        self.stream.next().await\n    }\n}\n\n///\n/// Public builder object, for creating a new tree.\n///\n/// Usage: Create a builder object by calling 'new', load all the data into the\n/// tree by calling 'append' for each key-value pair, and then call 'finish'\n///\n/// 'L' is the key length in bytes\npub struct DiskBtreeBuilder<W, const L: usize>\nwhere\n    W: BlockWriter,\n{\n    writer: W,\n\n    ///\n    /// `stack[0]` is the current root page, `stack.last()` is the leaf.\n    ///\n    /// We maintain the length of the stack to be always greater than zero.\n    /// Two exceptions are:\n    /// 1. `Self::flush_node`. The method will push the new node if it extracted the last one.\n    ///    So because other methods cannot see the intermediate state invariant still holds.\n    /// 2. `Self::finish`. It consumes self and does not return it back,\n    ///    which means that this is where the structure is destroyed.\n    ///    Thus stack of zero length cannot be observed by other methods.\n    stack: Vec<BuildNode<L>>,\n\n    /// Last key that was appended to the tree. Used to sanity check that append\n    /// is called in increasing key order.\n    last_key: Option<[u8; L]>,\n}\n\nimpl<W, const L: usize> DiskBtreeBuilder<W, L>\nwhere\n    W: BlockWriter,\n{\n    pub fn new(writer: W) -> Self {\n        DiskBtreeBuilder {\n            writer,\n            last_key: None,\n            stack: vec![BuildNode::new(0)],\n        }\n    }\n\n    pub fn append(&mut self, key: &[u8; L], value: u64) -> Result<()> {\n        if value > MAX_VALUE {\n            return Err(DiskBtreeError::AppendOverflow(value));\n        }\n        if let Some(last_key) = &self.last_key {\n            if key <= last_key {\n                return Err(DiskBtreeError::UnsortedInput {\n                    key: key.as_slice().into(),\n                    last_key: last_key.as_slice().into(),\n                });\n            }\n        }\n        self.last_key = Some(*key);\n\n        self.append_internal(key, Value::from_u64(value))\n    }\n\n    fn append_internal(&mut self, key: &[u8; L], value: Value) -> Result<()> {\n        // Try to append to the current leaf buffer\n        let last = self\n            .stack\n            .last_mut()\n            .expect(\"should always have at least one item\");\n        let level = last.level;\n        if last.push(key, value) {\n            return Ok(());\n        }\n\n        // It did not fit. Try to compress, and if it succeeds to make\n        // some room on the node, try appending to it again.\n        #[allow(clippy::collapsible_if)]\n        if last.compress() {\n            if last.push(key, value) {\n                return Ok(());\n            }\n        }\n\n        // Could not append to the current leaf. Flush it and create a new one.\n        self.flush_node()?;\n\n        // Replace the node we flushed with an empty one and append the new\n        // key to it.\n        let mut last = BuildNode::new(level);\n        if !last.push(key, value) {\n            return Err(DiskBtreeError::FailedToPushToNewLeafNode);\n        }\n\n        self.stack.push(last);\n\n        Ok(())\n    }\n\n    /// Flush the bottommost node in the stack to disk. Appends a downlink to its parent,\n    /// and recursively flushes the parent too, if it becomes full. If the root page becomes full,\n    /// creates a new root page, increasing the height of the tree.\n    fn flush_node(&mut self) -> Result<()> {\n        // Get the current bottommost node in the stack and flush it to disk.\n        let last = self\n            .stack\n            .pop()\n            .expect(\"should always have at least one item\");\n        let buf = last.pack();\n        let downlink_key = last.first_key();\n        let downlink_ptr = self.writer.write_blk(buf)?;\n\n        // Append the downlink to the parent. If there is no parent, ie. this was the root page,\n        // create a new root page, increasing the height of the tree.\n        if self.stack.is_empty() {\n            self.stack.push(BuildNode::new(last.level + 1));\n        }\n        self.append_internal(&downlink_key, Value::from_blknum(downlink_ptr))\n    }\n\n    ///\n    /// Flushes everything to disk, and returns the block number of the root page.\n    /// The caller must store the root block number \"out-of-band\", and pass it\n    /// to the DiskBtreeReader::new() when you want to read the tree again.\n    /// (In the image and delta layers, it is stored in the beginning of the file,\n    /// in the summary header)\n    ///\n    pub fn finish(mut self) -> Result<(u32, W)> {\n        // flush all levels, except the root.\n        while self.stack.len() > 1 {\n            self.flush_node()?;\n        }\n\n        let root = self\n            .stack\n            .first()\n            .expect(\"by the check above we left one item there\");\n        let buf = root.pack();\n        let root_blknum = self.writer.write_blk(buf)?;\n\n        Ok((root_blknum, self.writer))\n    }\n\n    pub fn borrow_writer(&self) -> &W {\n        &self.writer\n    }\n}\n\n///\n/// BuildNode represesnts an incomplete page that we are appending to.\n///\n#[derive(Clone, Debug)]\nstruct BuildNode<const L: usize> {\n    num_children: u16,\n    level: u8,\n    prefix: Vec<u8>,\n    suffix_len: usize,\n\n    keys: Vec<u8>,\n    values: Vec<u8>,\n\n    size: usize, // physical size of this node, if it was written to disk like this\n}\n\nconst NODE_SIZE: usize = PAGE_SZ;\n\nconst NODE_HDR_SIZE: usize = 2 + 1 + 1 + 1;\n\nimpl<const L: usize> BuildNode<L> {\n    fn new(level: u8) -> Self {\n        BuildNode {\n            num_children: 0,\n            level,\n            prefix: Vec::new(),\n            suffix_len: 0,\n            keys: Vec::new(),\n            values: Vec::new(),\n            size: NODE_HDR_SIZE,\n        }\n    }\n\n    /// Try to append a key-value pair to this node. Returns 'true' on\n    /// success, 'false' if the page was full or the key was\n    /// incompatible with the prefix of the existing keys.\n    fn push(&mut self, key: &[u8; L], value: Value) -> bool {\n        // If we have already performed prefix-compression on the page,\n        // check that the incoming key has the same prefix.\n        if self.num_children > 0 {\n            // does the prefix allow it?\n            if !key.starts_with(&self.prefix) {\n                return false;\n            }\n        } else {\n            self.suffix_len = key.len();\n        }\n\n        // Is the node too full?\n        if self.size + self.suffix_len + VALUE_SZ >= NODE_SIZE {\n            return false;\n        }\n\n        // All clear\n        self.num_children += 1;\n        self.keys.extend(&key[self.prefix.len()..]);\n        self.values.extend(value.0);\n\n        assert!(self.keys.len() == self.num_children as usize * self.suffix_len);\n        assert!(self.values.len() == self.num_children as usize * VALUE_SZ);\n\n        self.size += self.suffix_len + VALUE_SZ;\n\n        true\n    }\n\n    ///\n    /// Perform prefix-compression.\n    ///\n    /// Returns 'true' on success, 'false' if no compression was possible.\n    ///\n    fn compress(&mut self) -> bool {\n        let first_suffix = self.first_suffix();\n        let last_suffix = self.last_suffix();\n\n        // Find the common prefix among all keys\n        let mut prefix_len = 0;\n        while prefix_len < self.suffix_len {\n            if first_suffix[prefix_len] != last_suffix[prefix_len] {\n                break;\n            }\n            prefix_len += 1;\n        }\n        if prefix_len == 0 {\n            return false;\n        }\n\n        // Can compress. Rewrite the keys without the common prefix.\n        self.prefix.extend(&self.keys[..prefix_len]);\n\n        let mut new_keys = Vec::new();\n        let mut key_off = 0;\n        while key_off < self.keys.len() {\n            let next_key_off = key_off + self.suffix_len;\n            new_keys.extend(&self.keys[key_off + prefix_len..next_key_off]);\n            key_off = next_key_off;\n        }\n        self.keys = new_keys;\n        self.suffix_len -= prefix_len;\n\n        self.size -= prefix_len * self.num_children as usize;\n        self.size += prefix_len;\n\n        assert!(self.keys.len() == self.num_children as usize * self.suffix_len);\n        assert!(self.values.len() == self.num_children as usize * VALUE_SZ);\n\n        true\n    }\n\n    ///\n    /// Serialize the node to on-disk format.\n    ///\n    fn pack(&self) -> IoBuffer {\n        assert!(self.keys.len() == self.num_children as usize * self.suffix_len);\n        assert!(self.values.len() == self.num_children as usize * VALUE_SZ);\n        assert!(self.num_children > 0);\n\n        let mut buf = IoBufferMut::with_capacity(PAGE_SZ);\n\n        buf.put_u16(self.num_children);\n        buf.put_u8(self.level);\n        buf.put_u8(self.prefix.len() as u8);\n        buf.put_u8(self.suffix_len as u8);\n        buf.put(&self.prefix[..]);\n        buf.put(&self.keys[..]);\n        buf.put(&self.values[..]);\n\n        assert!(buf.len() == self.size);\n\n        assert!(buf.len() <= PAGE_SZ);\n        buf.extend_with(0, PAGE_SZ - buf.len());\n        buf.freeze()\n    }\n\n    fn first_suffix(&self) -> &[u8] {\n        &self.keys[..self.suffix_len]\n    }\n    fn last_suffix(&self) -> &[u8] {\n        &self.keys[self.keys.len() - self.suffix_len..]\n    }\n\n    /// Return the full first key of the page, including the prefix\n    fn first_key(&self) -> [u8; L] {\n        let mut key = [0u8; L];\n        key[..self.prefix.len()].copy_from_slice(&self.prefix);\n        key[self.prefix.len()..].copy_from_slice(self.first_suffix());\n        key\n    }\n}\n\n#[cfg(test)]\npub(crate) mod tests {\n    use std::collections::BTreeMap;\n    use std::sync::atomic::{AtomicUsize, Ordering};\n\n    use rand::Rng;\n\n    use super::*;\n    use crate::context::DownloadBehavior;\n    use crate::task_mgr::TaskKind;\n    use crate::tenant::block_io::{BlockCursor, BlockLease, BlockReaderRef};\n\n    #[derive(Clone, Default)]\n    pub(crate) struct TestDisk {\n        blocks: Vec<IoBuffer>,\n    }\n    impl TestDisk {\n        fn new() -> Self {\n            Self::default()\n        }\n        pub(crate) fn read_blk(&self, blknum: u32) -> io::Result<BlockLease> {\n            let mut buf = [0u8; PAGE_SZ];\n            buf.copy_from_slice(&self.blocks[blknum as usize]);\n            Ok(std::sync::Arc::new(buf).into())\n        }\n    }\n    impl BlockReader for TestDisk {\n        fn block_cursor(&self) -> BlockCursor<'_> {\n            BlockCursor::new(BlockReaderRef::TestDisk(self))\n        }\n    }\n    impl BlockWriter for &mut TestDisk {\n        fn write_blk(&mut self, buf: IoBuffer) -> io::Result<u32> {\n            let blknum = self.blocks.len();\n            self.blocks.push(buf);\n            Ok(blknum as u32)\n        }\n    }\n\n    #[tokio::test]\n    async fn basic() -> Result<()> {\n        let mut disk = TestDisk::new();\n        let mut writer = DiskBtreeBuilder::<_, 6>::new(&mut disk);\n\n        let ctx =\n            RequestContext::new(TaskKind::UnitTest, DownloadBehavior::Error).with_scope_unit_test();\n\n        let all_keys: Vec<&[u8; 6]> = vec![\n            b\"xaaaaa\", b\"xaaaba\", b\"xaaaca\", b\"xabaaa\", b\"xababa\", b\"xabaca\", b\"xabada\", b\"xabadb\",\n        ];\n        let all_data: Vec<(&[u8; 6], u64)> = all_keys\n            .iter()\n            .enumerate()\n            .map(|(idx, key)| (*key, idx as u64))\n            .collect();\n        for (key, val) in all_data.iter() {\n            writer.append(key, *val)?;\n        }\n\n        let (root_offset, _writer) = writer.finish()?;\n\n        let reader = DiskBtreeReader::new(0, root_offset, disk);\n\n        reader.dump(&ctx).await?;\n\n        // Test the `get` function on all the keys.\n        for (key, val) in all_data.iter() {\n            assert_eq!(reader.get(key, &ctx).await?, Some(*val));\n        }\n        // And on some keys that don't exist\n        assert_eq!(reader.get(b\"aaaaaa\", &ctx).await?, None);\n        assert_eq!(reader.get(b\"zzzzzz\", &ctx).await?, None);\n        assert_eq!(reader.get(b\"xaaabx\", &ctx).await?, None);\n\n        // Test search with `visit` function\n        let search_key = b\"xabaaa\";\n        let expected: Vec<(Vec<u8>, u64)> = all_data\n            .iter()\n            .filter(|(key, _value)| key[..] >= search_key[..])\n            .map(|(key, value)| (key.to_vec(), *value))\n            .collect();\n\n        let mut data = Vec::new();\n        reader\n            .visit(\n                search_key,\n                VisitDirection::Forwards,\n                |key, value| {\n                    data.push((key.to_vec(), value));\n                    true\n                },\n                &ctx,\n            )\n            .await?;\n        assert_eq!(data, expected);\n\n        // Test a backwards scan\n        let mut expected: Vec<(Vec<u8>, u64)> = all_data\n            .iter()\n            .filter(|(key, _value)| key[..] <= search_key[..])\n            .map(|(key, value)| (key.to_vec(), *value))\n            .collect();\n        expected.reverse();\n        let mut data = Vec::new();\n        reader\n            .visit(\n                search_key,\n                VisitDirection::Backwards,\n                |key, value| {\n                    data.push((key.to_vec(), value));\n                    true\n                },\n                &ctx,\n            )\n            .await?;\n        assert_eq!(data, expected);\n\n        // Backward scan where nothing matches\n        reader\n            .visit(\n                b\"aaaaaa\",\n                VisitDirection::Backwards,\n                |key, value| {\n                    panic!(\"found unexpected key {}: {}\", hex::encode(key), value);\n                },\n                &ctx,\n            )\n            .await?;\n\n        // Full scan\n        let expected: Vec<(Vec<u8>, u64)> = all_data\n            .iter()\n            .map(|(key, value)| (key.to_vec(), *value))\n            .collect();\n        let mut data = Vec::new();\n        reader\n            .visit(\n                &[0u8; 6],\n                VisitDirection::Forwards,\n                |key, value| {\n                    data.push((key.to_vec(), value));\n                    true\n                },\n                &ctx,\n            )\n            .await?;\n        assert_eq!(data, expected);\n\n        Ok(())\n    }\n\n    #[tokio::test]\n    async fn lots_of_keys() -> Result<()> {\n        let mut disk = TestDisk::new();\n        let mut writer = DiskBtreeBuilder::<_, 8>::new(&mut disk);\n        let ctx =\n            RequestContext::new(TaskKind::UnitTest, DownloadBehavior::Error).with_scope_unit_test();\n\n        const NUM_KEYS: u64 = 1000;\n\n        let mut all_data: BTreeMap<u64, u64> = BTreeMap::new();\n\n        for idx in 0..NUM_KEYS {\n            let key_int: u64 = 1 + idx * 2;\n            let key = u64::to_be_bytes(key_int);\n            writer.append(&key, idx)?;\n\n            all_data.insert(key_int, idx);\n        }\n\n        let (root_offset, _writer) = writer.finish()?;\n\n        let reader = DiskBtreeReader::new(0, root_offset, disk);\n\n        reader.dump(&ctx).await?;\n\n        use std::sync::Mutex;\n\n        let result = Mutex::new(Vec::new());\n        let limit: AtomicUsize = AtomicUsize::new(10);\n        let take_ten = |key: &[u8], value: u64| {\n            let mut keybuf = [0u8; 8];\n            keybuf.copy_from_slice(key);\n            let key_int = u64::from_be_bytes(keybuf);\n\n            let mut result = result.lock().unwrap();\n            result.push((key_int, value));\n\n            // keep going until we have 10 matches\n            result.len() < limit.load(Ordering::Relaxed)\n        };\n\n        for search_key_int in 0..(NUM_KEYS * 2 + 10) {\n            let search_key = u64::to_be_bytes(search_key_int);\n            assert_eq!(\n                reader.get(&search_key, &ctx).await?,\n                all_data.get(&search_key_int).cloned()\n            );\n\n            // Test a forward scan starting with this key\n            result.lock().unwrap().clear();\n            reader\n                .visit(&search_key, VisitDirection::Forwards, take_ten, &ctx)\n                .await?;\n            let expected = all_data\n                .range(search_key_int..)\n                .take(10)\n                .map(|(&key, &val)| (key, val))\n                .collect::<Vec<(u64, u64)>>();\n            assert_eq!(*result.lock().unwrap(), expected);\n\n            // And a backwards scan\n            result.lock().unwrap().clear();\n            reader\n                .visit(&search_key, VisitDirection::Backwards, take_ten, &ctx)\n                .await?;\n            let expected = all_data\n                .range(..=search_key_int)\n                .rev()\n                .take(10)\n                .map(|(&key, &val)| (key, val))\n                .collect::<Vec<(u64, u64)>>();\n            assert_eq!(*result.lock().unwrap(), expected);\n        }\n\n        // full scan\n        let search_key = u64::to_be_bytes(0);\n        limit.store(usize::MAX, Ordering::Relaxed);\n        result.lock().unwrap().clear();\n        reader\n            .visit(&search_key, VisitDirection::Forwards, take_ten, &ctx)\n            .await?;\n        let expected = all_data\n            .iter()\n            .map(|(&key, &val)| (key, val))\n            .collect::<Vec<(u64, u64)>>();\n        assert_eq!(*result.lock().unwrap(), expected);\n\n        // full scan\n        let search_key = u64::to_be_bytes(u64::MAX);\n        limit.store(usize::MAX, Ordering::Relaxed);\n        result.lock().unwrap().clear();\n        reader\n            .visit(&search_key, VisitDirection::Backwards, take_ten, &ctx)\n            .await?;\n        let expected = all_data\n            .iter()\n            .rev()\n            .map(|(&key, &val)| (key, val))\n            .collect::<Vec<(u64, u64)>>();\n        assert_eq!(*result.lock().unwrap(), expected);\n\n        Ok(())\n    }\n\n    #[tokio::test]\n    async fn random_data() -> Result<()> {\n        let ctx = RequestContext::new(TaskKind::UnitTest, DownloadBehavior::Error);\n\n        // Generate random keys with exponential distribution, to\n        // exercise the prefix compression\n        const NUM_KEYS: usize = 100000;\n        let mut all_data: BTreeMap<u128, u64> = BTreeMap::new();\n        for idx in 0..NUM_KEYS {\n            let u: f64 = rand::rng().random_range(0.0..1.0);\n            let t = -(f64::ln(u));\n            let key_int = (t * 1000000.0) as u128;\n\n            all_data.insert(key_int, idx as u64);\n        }\n\n        // Build a tree from it\n        let mut disk = TestDisk::new();\n        let mut writer = DiskBtreeBuilder::<_, 16>::new(&mut disk);\n\n        for (&key, &val) in all_data.iter() {\n            writer.append(&u128::to_be_bytes(key), val)?;\n        }\n        let (root_offset, _writer) = writer.finish()?;\n\n        let reader = DiskBtreeReader::new(0, root_offset, disk);\n\n        // Test get() operation on all the keys\n        for (&key, &val) in all_data.iter() {\n            let search_key = u128::to_be_bytes(key);\n            assert_eq!(reader.get(&search_key, &ctx).await?, Some(val));\n        }\n\n        // Test get() operations on random keys, most of which will not exist\n        for _ in 0..100000 {\n            let key_int = rand::rng().random::<u128>();\n            let search_key = u128::to_be_bytes(key_int);\n            assert!(reader.get(&search_key, &ctx).await? == all_data.get(&key_int).cloned());\n        }\n\n        // Test boundary cases\n        assert!(\n            reader.get(&u128::to_be_bytes(u128::MIN), &ctx).await?\n                == all_data.get(&u128::MIN).cloned()\n        );\n        assert!(\n            reader.get(&u128::to_be_bytes(u128::MAX), &ctx).await?\n                == all_data.get(&u128::MAX).cloned()\n        );\n\n        // Test iterator and get_stream API\n        let mut iter = reader.iter(&[0; 16], &ctx);\n        let mut cnt = 0;\n        while let Some(res) = iter.next().await {\n            let (key, val) = res?;\n            let key = u128::from_be_bytes(key.as_slice().try_into().unwrap());\n            assert_eq!(val, *all_data.get(&key).unwrap());\n            cnt += 1;\n        }\n        assert_eq!(cnt, all_data.len());\n\n        Ok(())\n    }\n\n    #[test]\n    fn unsorted_input() {\n        let mut disk = TestDisk::new();\n        let mut writer = DiskBtreeBuilder::<_, 2>::new(&mut disk);\n\n        let _ = writer.append(b\"ba\", 1);\n        let _ = writer.append(b\"bb\", 2);\n        let err = writer.append(b\"aa\", 3).expect_err(\"should've failed\");\n        match err {\n            DiskBtreeError::UnsortedInput { key, last_key } => {\n                assert_eq!(key.as_ref(), b\"aa\".as_slice());\n                assert_eq!(last_key.as_ref(), b\"bb\".as_slice());\n            }\n            _ => panic!(\"unexpected error variant, expected DiskBtreeError::UnsortedInput\"),\n        }\n    }\n\n    ///\n    /// This test contains a particular data set, see disk_btree_test_data.rs\n    ///\n    #[tokio::test]\n    async fn particular_data() -> Result<()> {\n        // Build a tree from it\n        let mut disk = TestDisk::new();\n        let mut writer = DiskBtreeBuilder::<_, 26>::new(&mut disk);\n        let ctx =\n            RequestContext::new(TaskKind::UnitTest, DownloadBehavior::Error).with_scope_unit_test();\n\n        for (key, val) in disk_btree_test_data::TEST_DATA {\n            writer.append(&key, val)?;\n        }\n        let (root_offset, writer) = writer.finish()?;\n\n        println!(\"SIZE: {} blocks\", writer.blocks.len());\n\n        let reader = DiskBtreeReader::new(0, root_offset, disk);\n\n        // Test get() operation on all the keys\n        for (key, val) in disk_btree_test_data::TEST_DATA {\n            assert_eq!(reader.get(&key, &ctx).await?, Some(val));\n        }\n\n        // Test full scan\n        let mut count = 0;\n        reader\n            .visit(\n                &[0u8; 26],\n                VisitDirection::Forwards,\n                |_key, _value| {\n                    count += 1;\n                    true\n                },\n                &ctx,\n            )\n            .await?;\n        assert_eq!(count, disk_btree_test_data::TEST_DATA.len());\n\n        reader.dump(&ctx).await?;\n\n        Ok(())\n    }\n}\n\n#[cfg(test)]\n#[path = \"disk_btree_test_data.rs\"]\nmod disk_btree_test_data;\n"
  },
  {
    "path": "pageserver/src/tenant/disk_btree_test_data.rs",
    "content": "use hex_literal::hex;\n\n/// Test data set for the 'particular_data' test in disk_btree.rs\n///\n/// This test contains a particular data set, representing all the keys\n/// generated by the 'test_random_updates' unit test. I extracted this while\n/// trying to debug a failure in that test. The bug turned out to be\n/// elsewhere, and I'm not sure if this is still useful, but keeping it for\n/// now...  Maybe it's a useful data set to show the typical key-values used\n/// by a delta layer, for evaluating how well the prefix compression works.\n#[rustfmt::skip]\npub static TEST_DATA:  [([u8; 26], u64); 2000] = [\n    (hex!(\"0100000000333333334444444455000000000000000000000010\"), 0x004001),\n    (hex!(\"0100000000333333334444444455000000000000000000007cb0\"), 0x0040a1),\n    (hex!(\"0100000000333333334444444455000000010000000000000020\"), 0x004141),\n    (hex!(\"0100000000333333334444444455000000020000000000000030\"), 0x0041e1),\n    (hex!(\"01000000003333333344444444550000000200000000000051a0\"), 0x004281),\n    (hex!(\"0100000000333333334444444455000000030000000000000040\"), 0x004321),\n    (hex!(\"0100000000333333334444444455000000030000000000006cf0\"), 0x0043c1),\n    (hex!(\"0100000000333333334444444455000000030000000000007140\"), 0x004461),\n    (hex!(\"0100000000333333334444444455000000040000000000000050\"), 0x004501),\n    (hex!(\"01000000003333333344444444550000000400000000000047f0\"), 0x0045a1),\n    (hex!(\"01000000003333333344444444550000000400000000000072b0\"), 0x004641),\n    (hex!(\"0100000000333333334444444455000000050000000000000060\"), 0x0046e1),\n    (hex!(\"0100000000333333334444444455000000050000000000005550\"), 0x004781),\n    (hex!(\"0100000000333333334444444455000000060000000000000070\"), 0x004821),\n    (hex!(\"01000000003333333344444444550000000600000000000044a0\"), 0x0048c1),\n    (hex!(\"0100000000333333334444444455000000060000000000006870\"), 0x004961),\n    (hex!(\"0100000000333333334444444455000000070000000000000080\"), 0x004a01),\n    (hex!(\"0100000000333333334444444455000000080000000000000090\"), 0x004aa1),\n    (hex!(\"0100000000333333334444444455000000080000000000004150\"), 0x004b41),\n    (hex!(\"01000000003333333344444444550000000900000000000000a0\"), 0x004be1),\n    (hex!(\"01000000003333333344444444550000000a00000000000000b0\"), 0x004c81),\n    (hex!(\"01000000003333333344444444550000000a0000000000006680\"), 0x004d21),\n    (hex!(\"01000000003333333344444444550000000b00000000000000c0\"), 0x004dc1),\n    (hex!(\"01000000003333333344444444550000000b0000000000006230\"), 0x004e61),\n    (hex!(\"01000000003333333344444444550000000c00000000000000d0\"), 0x004f01),\n    (hex!(\"01000000003333333344444444550000000d00000000000000e0\"), 0x004fa1),\n    (hex!(\"01000000003333333344444444550000000e00000000000000f0\"), 0x005041),\n    (hex!(\"01000000003333333344444444550000000e0000000000006000\"), 0x0050e1),\n    (hex!(\"01000000003333333344444444550000000f0000000000000100\"), 0x005181),\n    (hex!(\"01000000003333333344444444550000000f00000000000053c0\"), 0x005221),\n    (hex!(\"01000000003333333344444444550000000f0000000000006580\"), 0x0052c1),\n    (hex!(\"0100000000333333334444444455000000100000000000000110\"), 0x005361),\n    (hex!(\"01000000003333333344444444550000001000000000000046c0\"), 0x005401),\n    (hex!(\"0100000000333333334444444455000000100000000000004e40\"), 0x0054a1),\n    (hex!(\"0100000000333333334444444455000000110000000000000120\"), 0x005541),\n    (hex!(\"0100000000333333334444444455000000120000000000000130\"), 0x0055e1),\n    (hex!(\"01000000003333333344444444550000001200000000000066d0\"), 0x005681),\n    (hex!(\"0100000000333333334444444455000000130000000000000140\"), 0x005721),\n    (hex!(\"0100000000333333334444444455000000130000000000007710\"), 0x0057c1),\n    (hex!(\"0100000000333333334444444455000000140000000000000150\"), 0x005861),\n    (hex!(\"0100000000333333334444444455000000140000000000006c40\"), 0x005901),\n    (hex!(\"0100000000333333334444444455000000150000000000000160\"), 0x0059a1),\n    (hex!(\"0100000000333333334444444455000000150000000000005990\"), 0x005a41),\n    (hex!(\"0100000000333333334444444455000000160000000000000170\"), 0x005ae1),\n    (hex!(\"0100000000333333334444444455000000160000000000005530\"), 0x005b81),\n    (hex!(\"0100000000333333334444444455000000170000000000000180\"), 0x005c21),\n    (hex!(\"0100000000333333334444444455000000170000000000004290\"), 0x005cc1),\n    (hex!(\"0100000000333333334444444455000000180000000000000190\"), 0x005d61),\n    (hex!(\"01000000003333333344444444550000001800000000000051c0\"), 0x005e01),\n    (hex!(\"01000000003333333344444444550000001900000000000001a0\"), 0x005ea1),\n    (hex!(\"0100000000333333334444444455000000190000000000005420\"), 0x005f41),\n    (hex!(\"0100000000333333334444444455000000190000000000005770\"), 0x005fe1),\n    (hex!(\"01000000003333333344444444550000001900000000000079d0\"), 0x006081),\n    (hex!(\"01000000003333333344444444550000001a00000000000001b0\"), 0x006121),\n    (hex!(\"01000000003333333344444444550000001a0000000000006f70\"), 0x0061c1),\n    (hex!(\"01000000003333333344444444550000001a0000000000007150\"), 0x006261),\n    (hex!(\"01000000003333333344444444550000001b00000000000001c0\"), 0x006301),\n    (hex!(\"01000000003333333344444444550000001b0000000000005070\"), 0x0063a1),\n    (hex!(\"01000000003333333344444444550000001c00000000000001d0\"), 0x006441),\n    (hex!(\"01000000003333333344444444550000001d00000000000001e0\"), 0x0064e1),\n    (hex!(\"01000000003333333344444444550000001e00000000000001f0\"), 0x006581),\n    (hex!(\"01000000003333333344444444550000001e0000000000005650\"), 0x006621),\n    (hex!(\"01000000003333333344444444550000001f0000000000000200\"), 0x0066c1),\n    (hex!(\"01000000003333333344444444550000001f0000000000006ca0\"), 0x006761),\n    (hex!(\"0100000000333333334444444455000000200000000000000210\"), 0x006801),\n    (hex!(\"0100000000333333334444444455000000200000000000005fc0\"), 0x0068a1),\n    (hex!(\"0100000000333333334444444455000000210000000000000220\"), 0x006941),\n    (hex!(\"0100000000333333334444444455000000210000000000006430\"), 0x0069e1),\n    (hex!(\"0100000000333333334444444455000000220000000000000230\"), 0x006a81),\n    (hex!(\"01000000003333333344444444550000002200000000000040e0\"), 0x006b21),\n    (hex!(\"0100000000333333334444444455000000230000000000000240\"), 0x006bc1),\n    (hex!(\"01000000003333333344444444550000002300000000000042d0\"), 0x006c61),\n    (hex!(\"0100000000333333334444444455000000240000000000000250\"), 0x006d01),\n    (hex!(\"0100000000333333334444444455000000250000000000000260\"), 0x006da1),\n    (hex!(\"01000000003333333344444444550000002500000000000058c0\"), 0x006e41),\n    (hex!(\"0100000000333333334444444455000000260000000000000270\"), 0x006ee1),\n    (hex!(\"0100000000333333334444444455000000260000000000004020\"), 0x006f81),\n    (hex!(\"0100000000333333334444444455000000270000000000000280\"), 0x007021),\n    (hex!(\"0100000000333333334444444455000000280000000000000290\"), 0x0070c1),\n    (hex!(\"0100000000333333334444444455000000280000000000007c00\"), 0x007161),\n    (hex!(\"01000000003333333344444444550000002900000000000002a0\"), 0x007201),\n    (hex!(\"01000000003333333344444444550000002a00000000000002b0\"), 0x0072a1),\n    (hex!(\"01000000003333333344444444550000002b00000000000002c0\"), 0x007341),\n    (hex!(\"01000000003333333344444444550000002c00000000000002d0\"), 0x0073e1),\n    (hex!(\"01000000003333333344444444550000002c00000000000041b0\"), 0x007481),\n    (hex!(\"01000000003333333344444444550000002c0000000000004c30\"), 0x007521),\n    (hex!(\"01000000003333333344444444550000002d00000000000002e0\"), 0x0075c1),\n    (hex!(\"01000000003333333344444444550000002d0000000000005e40\"), 0x007661),\n    (hex!(\"01000000003333333344444444550000002d0000000000006990\"), 0x007701),\n    (hex!(\"01000000003333333344444444550000002e00000000000002f0\"), 0x0077a1),\n    (hex!(\"01000000003333333344444444550000002f0000000000000300\"), 0x007841),\n    (hex!(\"01000000003333333344444444550000002f0000000000004a70\"), 0x0078e1),\n    (hex!(\"01000000003333333344444444550000002f0000000000006b40\"), 0x007981),\n    (hex!(\"0100000000333333334444444455000000300000000000000310\"), 0x007a21),\n    (hex!(\"0100000000333333334444444455000000310000000000000320\"), 0x007ac1),\n    (hex!(\"0100000000333333334444444455000000320000000000000330\"), 0x007b61),\n    (hex!(\"01000000003333333344444444550000003200000000000041a0\"), 0x007c01),\n    (hex!(\"0100000000333333334444444455000000320000000000007340\"), 0x007ca1),\n    (hex!(\"0100000000333333334444444455000000320000000000007730\"), 0x007d41),\n    (hex!(\"0100000000333333334444444455000000330000000000000340\"), 0x007de1),\n    (hex!(\"01000000003333333344444444550000003300000000000055a0\"), 0x007e81),\n    (hex!(\"0100000000333333334444444455000000340000000000000350\"), 0x007f21),\n    (hex!(\"0100000000333333334444444455000000350000000000000360\"), 0x007fc1),\n    (hex!(\"01000000003333333344444444550000003500000000000077a0\"), 0x008061),\n    (hex!(\"0100000000333333334444444455000000360000000000000370\"), 0x008101),\n    (hex!(\"0100000000333333334444444455000000370000000000000380\"), 0x0081a1),\n    (hex!(\"0100000000333333334444444455000000380000000000000390\"), 0x008241),\n    (hex!(\"01000000003333333344444444550000003900000000000003a0\"), 0x0082e1),\n    (hex!(\"01000000003333333344444444550000003a00000000000003b0\"), 0x008381),\n    (hex!(\"01000000003333333344444444550000003a00000000000071c0\"), 0x008421),\n    (hex!(\"01000000003333333344444444550000003b00000000000003c0\"), 0x0084c1),\n    (hex!(\"01000000003333333344444444550000003c00000000000003d0\"), 0x008561),\n    (hex!(\"01000000003333333344444444550000003d00000000000003e0\"), 0x008601),\n    (hex!(\"01000000003333333344444444550000003e00000000000003f0\"), 0x0086a1),\n    (hex!(\"01000000003333333344444444550000003e00000000000062e0\"), 0x008741),\n    (hex!(\"01000000003333333344444444550000003f0000000000000400\"), 0x0087e1),\n    (hex!(\"0100000000333333334444444455000000400000000000000410\"), 0x008881),\n    (hex!(\"0100000000333333334444444455000000400000000000004460\"), 0x008921),\n    (hex!(\"0100000000333333334444444455000000400000000000005b90\"), 0x0089c1),\n    (hex!(\"01000000003333333344444444550000004000000000000079b0\"), 0x008a61),\n    (hex!(\"0100000000333333334444444455000000410000000000000420\"), 0x008b01),\n    (hex!(\"0100000000333333334444444455000000420000000000000430\"), 0x008ba1),\n    (hex!(\"0100000000333333334444444455000000420000000000005640\"), 0x008c41),\n    (hex!(\"0100000000333333334444444455000000430000000000000440\"), 0x008ce1),\n    (hex!(\"01000000003333333344444444550000004300000000000072a0\"), 0x008d81),\n    (hex!(\"0100000000333333334444444455000000440000000000000450\"), 0x008e21),\n    (hex!(\"0100000000333333334444444455000000450000000000000460\"), 0x008ec1),\n    (hex!(\"0100000000333333334444444455000000450000000000005750\"), 0x008f61),\n    (hex!(\"01000000003333333344444444550000004500000000000077b0\"), 0x009001),\n    (hex!(\"0100000000333333334444444455000000460000000000000470\"), 0x0090a1),\n    (hex!(\"0100000000333333334444444455000000470000000000000480\"), 0x009141),\n    (hex!(\"0100000000333333334444444455000000480000000000000490\"), 0x0091e1),\n    (hex!(\"01000000003333333344444444550000004800000000000069e0\"), 0x009281),\n    (hex!(\"01000000003333333344444444550000004900000000000004a0\"), 0x009321),\n    (hex!(\"0100000000333333334444444455000000490000000000007370\"), 0x0093c1),\n    (hex!(\"01000000003333333344444444550000004a00000000000004b0\"), 0x009461),\n    (hex!(\"01000000003333333344444444550000004a0000000000005cb0\"), 0x009501),\n    (hex!(\"01000000003333333344444444550000004b00000000000004c0\"), 0x0095a1),\n    (hex!(\"01000000003333333344444444550000004c00000000000004d0\"), 0x009641),\n    (hex!(\"01000000003333333344444444550000004c0000000000004880\"), 0x0096e1),\n    (hex!(\"01000000003333333344444444550000004c0000000000007a40\"), 0x009781),\n    (hex!(\"01000000003333333344444444550000004d00000000000004e0\"), 0x009821),\n    (hex!(\"01000000003333333344444444550000004d0000000000006390\"), 0x0098c1),\n    (hex!(\"01000000003333333344444444550000004e00000000000004f0\"), 0x009961),\n    (hex!(\"01000000003333333344444444550000004e0000000000004db0\"), 0x009a01),\n    (hex!(\"01000000003333333344444444550000004f0000000000000500\"), 0x009aa1),\n    (hex!(\"0100000000333333334444444455000000500000000000000510\"), 0x009b41),\n    (hex!(\"0100000000333333334444444455000000510000000000000520\"), 0x009be1),\n    (hex!(\"01000000003333333344444444550000005100000000000069c0\"), 0x009c81),\n    (hex!(\"0100000000333333334444444455000000520000000000000530\"), 0x009d21),\n    (hex!(\"0100000000333333334444444455000000520000000000006e60\"), 0x009dc1),\n    (hex!(\"01000000003333333344444444550000005200000000000070c0\"), 0x009e61),\n    (hex!(\"0100000000333333334444444455000000530000000000000540\"), 0x009f01),\n    (hex!(\"0100000000333333334444444455000000530000000000005840\"), 0x009fa1),\n    (hex!(\"0100000000333333334444444455000000540000000000000550\"), 0x00a041),\n    (hex!(\"01000000003333333344444444550000005400000000000043e0\"), 0x00a0e1),\n    (hex!(\"01000000003333333344444444550000005400000000000074e0\"), 0x00a181),\n    (hex!(\"0100000000333333334444444455000000550000000000000560\"), 0x00a221),\n    (hex!(\"0100000000333333334444444455000000550000000000003ee0\"), 0x00a2c1),\n    (hex!(\"0100000000333333334444444455000000560000000000000570\"), 0x00a361),\n    (hex!(\"0100000000333333334444444455000000570000000000000580\"), 0x00a401),\n    (hex!(\"0100000000333333334444444455000000570000000000007030\"), 0x00a4a1),\n    (hex!(\"0100000000333333334444444455000000580000000000000590\"), 0x00a541),\n    (hex!(\"0100000000333333334444444455000000580000000000005340\"), 0x00a5e1),\n    (hex!(\"01000000003333333344444444550000005800000000000059f0\"), 0x00a681),\n    (hex!(\"0100000000333333334444444455000000580000000000006930\"), 0x00a721),\n    (hex!(\"01000000003333333344444444550000005900000000000005a0\"), 0x00a7c1),\n    (hex!(\"0100000000333333334444444455000000590000000000003f90\"), 0x00a861),\n    (hex!(\"01000000003333333344444444550000005a00000000000005b0\"), 0x00a901),\n    (hex!(\"01000000003333333344444444550000005b00000000000005c0\"), 0x00a9a1),\n    (hex!(\"01000000003333333344444444550000005b00000000000062c0\"), 0x00aa41),\n    (hex!(\"01000000003333333344444444550000005c00000000000005d0\"), 0x00aae1),\n    (hex!(\"01000000003333333344444444550000005c0000000000005a70\"), 0x00ab81),\n    (hex!(\"01000000003333333344444444550000005c0000000000005dd0\"), 0x00ac21),\n    (hex!(\"01000000003333333344444444550000005d00000000000005e0\"), 0x00acc1),\n    (hex!(\"01000000003333333344444444550000005d0000000000005730\"), 0x00ad61),\n    (hex!(\"01000000003333333344444444550000005e00000000000005f0\"), 0x00ae01),\n    (hex!(\"01000000003333333344444444550000005e0000000000004f40\"), 0x00aea1),\n    (hex!(\"01000000003333333344444444550000005f0000000000000600\"), 0x00af41),\n    (hex!(\"0100000000333333334444444455000000600000000000000610\"), 0x00afe1),\n    (hex!(\"0100000000333333334444444455000000600000000000007c40\"), 0x00b081),\n    (hex!(\"0100000000333333334444444455000000610000000000000620\"), 0x00b121),\n    (hex!(\"0100000000333333334444444455000000610000000000007860\"), 0x00b1c1),\n    (hex!(\"0100000000333333334444444455000000620000000000000630\"), 0x00b261),\n    (hex!(\"0100000000333333334444444455000000620000000000005050\"), 0x00b301),\n    (hex!(\"0100000000333333334444444455000000630000000000000640\"), 0x00b3a1),\n    (hex!(\"0100000000333333334444444455000000640000000000000650\"), 0x00b441),\n    (hex!(\"0100000000333333334444444455000000650000000000000660\"), 0x00b4e1),\n    (hex!(\"0100000000333333334444444455000000650000000000005330\"), 0x00b581),\n    (hex!(\"0100000000333333334444444455000000660000000000000670\"), 0x00b621),\n    (hex!(\"0100000000333333334444444455000000660000000000004e20\"), 0x00b6c1),\n    (hex!(\"0100000000333333334444444455000000660000000000005ee0\"), 0x00b761),\n    (hex!(\"0100000000333333334444444455000000660000000000006360\"), 0x00b801),\n    (hex!(\"0100000000333333334444444455000000670000000000000680\"), 0x00b8a1),\n    (hex!(\"0100000000333333334444444455000000670000000000004040\"), 0x00b941),\n    (hex!(\"0100000000333333334444444455000000680000000000000690\"), 0x00b9e1),\n    (hex!(\"0100000000333333334444444455000000680000000000003f80\"), 0x00ba81),\n    (hex!(\"01000000003333333344444444550000006800000000000041e0\"), 0x00bb21),\n    (hex!(\"01000000003333333344444444550000006900000000000006a0\"), 0x00bbc1),\n    (hex!(\"0100000000333333334444444455000000690000000000006080\"), 0x00bc61),\n    (hex!(\"01000000003333333344444444550000006a00000000000006b0\"), 0x00bd01),\n    (hex!(\"01000000003333333344444444550000006a00000000000042f0\"), 0x00bda1),\n    (hex!(\"01000000003333333344444444550000006b00000000000006c0\"), 0x00be41),\n    (hex!(\"01000000003333333344444444550000006b00000000000052f0\"), 0x00bee1),\n    (hex!(\"01000000003333333344444444550000006b0000000000005980\"), 0x00bf81),\n    (hex!(\"01000000003333333344444444550000006b0000000000006170\"), 0x00c021),\n    (hex!(\"01000000003333333344444444550000006c00000000000006d0\"), 0x00c0c1),\n    (hex!(\"01000000003333333344444444550000006d00000000000006e0\"), 0x00c161),\n    (hex!(\"01000000003333333344444444550000006d0000000000006fb0\"), 0x00c201),\n    (hex!(\"01000000003333333344444444550000006e00000000000006f0\"), 0x00c2a1),\n    (hex!(\"01000000003333333344444444550000006e00000000000065b0\"), 0x00c341),\n    (hex!(\"01000000003333333344444444550000006e0000000000007970\"), 0x00c3e1),\n    (hex!(\"01000000003333333344444444550000006f0000000000000700\"), 0x00c481),\n    (hex!(\"01000000003333333344444444550000006f0000000000005900\"), 0x00c521),\n    (hex!(\"01000000003333333344444444550000006f0000000000006d90\"), 0x00c5c1),\n    (hex!(\"0100000000333333334444444455000000700000000000000710\"), 0x00c661),\n    (hex!(\"01000000003333333344444444550000007000000000000045c0\"), 0x00c701),\n    (hex!(\"0100000000333333334444444455000000700000000000004d40\"), 0x00c7a1),\n    (hex!(\"0100000000333333334444444455000000710000000000000720\"), 0x00c841),\n    (hex!(\"0100000000333333334444444455000000710000000000004dc0\"), 0x00c8e1),\n    (hex!(\"0100000000333333334444444455000000710000000000007550\"), 0x00c981),\n    (hex!(\"0100000000333333334444444455000000720000000000000730\"), 0x00ca21),\n    (hex!(\"0100000000333333334444444455000000720000000000003ec0\"), 0x00cac1),\n    (hex!(\"01000000003333333344444444550000007200000000000045a0\"), 0x00cb61),\n    (hex!(\"0100000000333333334444444455000000720000000000006770\"), 0x00cc01),\n    (hex!(\"0100000000333333334444444455000000720000000000006bc0\"), 0x00cca1),\n    (hex!(\"0100000000333333334444444455000000730000000000000740\"), 0x00cd41),\n    (hex!(\"0100000000333333334444444455000000730000000000005250\"), 0x00cde1),\n    (hex!(\"01000000003333333344444444550000007300000000000075f0\"), 0x00ce81),\n    (hex!(\"0100000000333333334444444455000000740000000000000750\"), 0x00cf21),\n    (hex!(\"0100000000333333334444444455000000740000000000003ff0\"), 0x00cfc1),\n    (hex!(\"01000000003333333344444444550000007400000000000079e0\"), 0x00d061),\n    (hex!(\"0100000000333333334444444455000000750000000000000760\"), 0x00d101),\n    (hex!(\"0100000000333333334444444455000000750000000000004310\"), 0x00d1a1),\n    (hex!(\"0100000000333333334444444455000000760000000000000770\"), 0x00d241),\n    (hex!(\"0100000000333333334444444455000000770000000000000780\"), 0x00d2e1),\n    (hex!(\"01000000003333333344444444550000007700000000000062f0\"), 0x00d381),\n    (hex!(\"0100000000333333334444444455000000770000000000006940\"), 0x00d421),\n    (hex!(\"0100000000333333334444444455000000780000000000000790\"), 0x00d4c1),\n    (hex!(\"01000000003333333344444444550000007900000000000007a0\"), 0x00d561),\n    (hex!(\"0100000000333333334444444455000000790000000000007af0\"), 0x00d601),\n    (hex!(\"01000000003333333344444444550000007a00000000000007b0\"), 0x00d6a1),\n    (hex!(\"01000000003333333344444444550000007b00000000000007c0\"), 0x00d741),\n    (hex!(\"01000000003333333344444444550000007b00000000000067e0\"), 0x00d7e1),\n    (hex!(\"01000000003333333344444444550000007b0000000000007890\"), 0x00d881),\n    (hex!(\"01000000003333333344444444550000007c00000000000007d0\"), 0x00d921),\n    (hex!(\"01000000003333333344444444550000007d00000000000007e0\"), 0x00d9c1),\n    (hex!(\"01000000003333333344444444550000007e00000000000007f0\"), 0x00da61),\n    (hex!(\"01000000003333333344444444550000007f0000000000000800\"), 0x00db01),\n    (hex!(\"01000000003333333344444444550000007f0000000000005be0\"), 0x00dba1),\n    (hex!(\"0100000000333333334444444455000000800000000000000810\"), 0x00dc41),\n    (hex!(\"0100000000333333334444444455000000810000000000000820\"), 0x00dce1),\n    (hex!(\"0100000000333333334444444455000000810000000000007190\"), 0x00dd81),\n    (hex!(\"0100000000333333334444444455000000820000000000000830\"), 0x00de21),\n    (hex!(\"0100000000333333334444444455000000820000000000004ab0\"), 0x00dec1),\n    (hex!(\"0100000000333333334444444455000000830000000000000840\"), 0x00df61),\n    (hex!(\"0100000000333333334444444455000000830000000000006720\"), 0x00e001),\n    (hex!(\"0100000000333333334444444455000000840000000000000850\"), 0x00e0a1),\n    (hex!(\"0100000000333333334444444455000000850000000000000860\"), 0x00e141),\n    (hex!(\"01000000003333333344444444550000008500000000000054f0\"), 0x00e1e1),\n    (hex!(\"0100000000333333334444444455000000850000000000007920\"), 0x00e281),\n    (hex!(\"0100000000333333334444444455000000860000000000000870\"), 0x00e321),\n    (hex!(\"01000000003333333344444444550000008600000000000060e0\"), 0x00e3c1),\n    (hex!(\"0100000000333333334444444455000000860000000000006be0\"), 0x00e461),\n    (hex!(\"0100000000333333334444444455000000870000000000000880\"), 0x00e501),\n    (hex!(\"0100000000333333334444444455000000870000000000006820\"), 0x00e5a1),\n    (hex!(\"0100000000333333334444444455000000880000000000000890\"), 0x00e641),\n    (hex!(\"01000000003333333344444444550000008900000000000008a0\"), 0x00e6e1),\n    (hex!(\"0100000000333333334444444455000000890000000000007c30\"), 0x00e781),\n    (hex!(\"01000000003333333344444444550000008a00000000000008b0\"), 0x00e821),\n    (hex!(\"01000000003333333344444444550000008b00000000000008c0\"), 0x00e8c1),\n    (hex!(\"01000000003333333344444444550000008b0000000000005910\"), 0x00e961),\n    (hex!(\"01000000003333333344444444550000008b0000000000006fe0\"), 0x00ea01),\n    (hex!(\"01000000003333333344444444550000008c00000000000008d0\"), 0x00eaa1),\n    (hex!(\"01000000003333333344444444550000008c0000000000006800\"), 0x00eb41),\n    (hex!(\"01000000003333333344444444550000008d00000000000008e0\"), 0x00ebe1),\n    (hex!(\"01000000003333333344444444550000008d0000000000005810\"), 0x00ec81),\n    (hex!(\"01000000003333333344444444550000008d0000000000007c90\"), 0x00ed21),\n    (hex!(\"01000000003333333344444444550000008e00000000000008f0\"), 0x00edc1),\n    (hex!(\"01000000003333333344444444550000008e00000000000058f0\"), 0x00ee61),\n    (hex!(\"01000000003333333344444444550000008f0000000000000900\"), 0x00ef01),\n    (hex!(\"01000000003333333344444444550000008f0000000000005a30\"), 0x00efa1),\n    (hex!(\"0100000000333333334444444455000000900000000000000910\"), 0x00f041),\n    (hex!(\"0100000000333333334444444455000000900000000000006130\"), 0x00f0e1),\n    (hex!(\"0100000000333333334444444455000000900000000000006550\"), 0x00f181),\n    (hex!(\"0100000000333333334444444455000000910000000000000920\"), 0x00f221),\n    (hex!(\"01000000003333333344444444550000009100000000000079f0\"), 0x00f2c1),\n    (hex!(\"0100000000333333334444444455000000920000000000000930\"), 0x00f361),\n    (hex!(\"0100000000333333334444444455000000920000000000005620\"), 0x00f401),\n    (hex!(\"0100000000333333334444444455000000920000000000005e90\"), 0x00f4a1),\n    (hex!(\"01000000003333333344444444550000009200000000000063d0\"), 0x00f541),\n    (hex!(\"01000000003333333344444444550000009200000000000076c0\"), 0x00f5e1),\n    (hex!(\"0100000000333333334444444455000000930000000000000940\"), 0x00f681),\n    (hex!(\"01000000003333333344444444550000009300000000000044e0\"), 0x00f721),\n    (hex!(\"0100000000333333334444444455000000940000000000000950\"), 0x00f7c1),\n    (hex!(\"0100000000333333334444444455000000940000000000007a30\"), 0x00f861),\n    (hex!(\"0100000000333333334444444455000000950000000000000960\"), 0x00f901),\n    (hex!(\"0100000000333333334444444455000000950000000000007a70\"), 0x00f9a1),\n    (hex!(\"0100000000333333334444444455000000960000000000000970\"), 0x00fa41),\n    (hex!(\"0100000000333333334444444455000000970000000000000980\"), 0x00fae1),\n    (hex!(\"0100000000333333334444444455000000970000000000007330\"), 0x00fb81),\n    (hex!(\"0100000000333333334444444455000000980000000000000990\"), 0x00fc21),\n    (hex!(\"0100000000333333334444444455000000980000000000005af0\"), 0x00fcc1),\n    (hex!(\"0100000000333333334444444455000000980000000000007ae0\"), 0x00fd61),\n    (hex!(\"01000000003333333344444444550000009900000000000009a0\"), 0x00fe01),\n    (hex!(\"0100000000333333334444444455000000990000000000005160\"), 0x00fea1),\n    (hex!(\"0100000000333333334444444455000000990000000000006850\"), 0x00ff41),\n    (hex!(\"01000000003333333344444444550000009a00000000000009b0\"), 0x00ffe1),\n    (hex!(\"01000000003333333344444444550000009b00000000000009c0\"), 0x010081),\n    (hex!(\"01000000003333333344444444550000009b0000000000005010\"), 0x010121),\n    (hex!(\"01000000003333333344444444550000009c00000000000009d0\"), 0x0101c1),\n    (hex!(\"01000000003333333344444444550000009c00000000000042e0\"), 0x010261),\n    (hex!(\"01000000003333333344444444550000009d00000000000009e0\"), 0x010301),\n    (hex!(\"01000000003333333344444444550000009d00000000000057f0\"), 0x0103a1),\n    (hex!(\"01000000003333333344444444550000009e00000000000009f0\"), 0x010441),\n    (hex!(\"01000000003333333344444444550000009e0000000000004ef0\"), 0x0104e1),\n    (hex!(\"01000000003333333344444444550000009f0000000000000a00\"), 0x010581),\n    (hex!(\"01000000003333333344444444550000009f0000000000006110\"), 0x010621),\n    (hex!(\"0100000000333333334444444455000000a00000000000000a10\"), 0x0106c1),\n    (hex!(\"0100000000333333334444444455000000a10000000000000a20\"), 0x010761),\n    (hex!(\"0100000000333333334444444455000000a100000000000040d0\"), 0x010801),\n    (hex!(\"0100000000333333334444444455000000a10000000000007670\"), 0x0108a1),\n    (hex!(\"0100000000333333334444444455000000a20000000000000a30\"), 0x010941),\n    (hex!(\"0100000000333333334444444455000000a200000000000074d0\"), 0x0109e1),\n    (hex!(\"0100000000333333334444444455000000a30000000000000a40\"), 0x010a81),\n    (hex!(\"0100000000333333334444444455000000a30000000000004c90\"), 0x010b21),\n    (hex!(\"0100000000333333334444444455000000a40000000000000a50\"), 0x010bc1),\n    (hex!(\"0100000000333333334444444455000000a50000000000000a60\"), 0x010c61),\n    (hex!(\"0100000000333333334444444455000000a60000000000000a70\"), 0x010d01),\n    (hex!(\"0100000000333333334444444455000000a60000000000006d80\"), 0x010da1),\n    (hex!(\"0100000000333333334444444455000000a60000000000007830\"), 0x010e41),\n    (hex!(\"0100000000333333334444444455000000a70000000000000a80\"), 0x010ee1),\n    (hex!(\"0100000000333333334444444455000000a700000000000064f0\"), 0x010f81),\n    (hex!(\"0100000000333333334444444455000000a80000000000000a90\"), 0x011021),\n    (hex!(\"0100000000333333334444444455000000a90000000000000aa0\"), 0x0110c1),\n    (hex!(\"0100000000333333334444444455000000a90000000000005e30\"), 0x011161),\n    (hex!(\"0100000000333333334444444455000000aa0000000000000ab0\"), 0x011201),\n    (hex!(\"0100000000333333334444444455000000ab0000000000000ac0\"), 0x0112a1),\n    (hex!(\"0100000000333333334444444455000000ac0000000000000ad0\"), 0x011341),\n    (hex!(\"0100000000333333334444444455000000ac0000000000006d20\"), 0x0113e1),\n    (hex!(\"0100000000333333334444444455000000ac0000000000007000\"), 0x011481),\n    (hex!(\"0100000000333333334444444455000000ad0000000000000ae0\"), 0x011521),\n    (hex!(\"0100000000333333334444444455000000ae0000000000000af0\"), 0x0115c1),\n    (hex!(\"0100000000333333334444444455000000ae0000000000004a10\"), 0x011661),\n    (hex!(\"0100000000333333334444444455000000af0000000000000b00\"), 0x011701),\n    (hex!(\"0100000000333333334444444455000000af0000000000004e10\"), 0x0117a1),\n    (hex!(\"0100000000333333334444444455000000b00000000000000b10\"), 0x011841),\n    (hex!(\"0100000000333333334444444455000000b00000000000004280\"), 0x0118e1),\n    (hex!(\"0100000000333333334444444455000000b000000000000077e0\"), 0x011981),\n    (hex!(\"0100000000333333334444444455000000b10000000000000b20\"), 0x011a21),\n    (hex!(\"0100000000333333334444444455000000b20000000000000b30\"), 0x011ac1),\n    (hex!(\"0100000000333333334444444455000000b30000000000000b40\"), 0x011b61),\n    (hex!(\"0100000000333333334444444455000000b30000000000004bc0\"), 0x011c01),\n    (hex!(\"0100000000333333334444444455000000b40000000000000b50\"), 0x011ca1),\n    (hex!(\"0100000000333333334444444455000000b50000000000000b60\"), 0x011d41),\n    (hex!(\"0100000000333333334444444455000000b50000000000004fa0\"), 0x011de1),\n    (hex!(\"0100000000333333334444444455000000b50000000000006a60\"), 0x011e81),\n    (hex!(\"0100000000333333334444444455000000b60000000000000b70\"), 0x011f21),\n    (hex!(\"0100000000333333334444444455000000b60000000000005630\"), 0x011fc1),\n    (hex!(\"0100000000333333334444444455000000b70000000000000b80\"), 0x012061),\n    (hex!(\"0100000000333333334444444455000000b80000000000000b90\"), 0x012101),\n    (hex!(\"0100000000333333334444444455000000b80000000000006f80\"), 0x0121a1),\n    (hex!(\"0100000000333333334444444455000000b90000000000000ba0\"), 0x012241),\n    (hex!(\"0100000000333333334444444455000000ba0000000000000bb0\"), 0x0122e1),\n    (hex!(\"0100000000333333334444444455000000bb0000000000000bc0\"), 0x012381),\n    (hex!(\"0100000000333333334444444455000000bb00000000000047c0\"), 0x012421),\n    (hex!(\"0100000000333333334444444455000000bb0000000000006060\"), 0x0124c1),\n    (hex!(\"0100000000333333334444444455000000bc0000000000000bd0\"), 0x012561),\n    (hex!(\"0100000000333333334444444455000000bd0000000000000be0\"), 0x012601),\n    (hex!(\"0100000000333333334444444455000000bd0000000000004e80\"), 0x0126a1),\n    (hex!(\"0100000000333333334444444455000000be0000000000000bf0\"), 0x012741),\n    (hex!(\"0100000000333333334444444455000000bf0000000000000c00\"), 0x0127e1),\n    (hex!(\"0100000000333333334444444455000000bf00000000000047a0\"), 0x012881),\n    (hex!(\"0100000000333333334444444455000000bf0000000000006da0\"), 0x012921),\n    (hex!(\"0100000000333333334444444455000000c00000000000000c10\"), 0x0129c1),\n    (hex!(\"0100000000333333334444444455000000c10000000000000c20\"), 0x012a61),\n    (hex!(\"0100000000333333334444444455000000c20000000000000c30\"), 0x012b01),\n    (hex!(\"0100000000333333334444444455000000c20000000000004bd0\"), 0x012ba1),\n    (hex!(\"0100000000333333334444444455000000c20000000000006ac0\"), 0x012c41),\n    (hex!(\"0100000000333333334444444455000000c30000000000000c40\"), 0x012ce1),\n    (hex!(\"0100000000333333334444444455000000c30000000000004660\"), 0x012d81),\n    (hex!(\"0100000000333333334444444455000000c40000000000000c50\"), 0x012e21),\n    (hex!(\"0100000000333333334444444455000000c50000000000000c60\"), 0x012ec1),\n    (hex!(\"0100000000333333334444444455000000c60000000000000c70\"), 0x012f61),\n    (hex!(\"0100000000333333334444444455000000c60000000000005880\"), 0x013001),\n    (hex!(\"0100000000333333334444444455000000c60000000000006b70\"), 0x0130a1),\n    (hex!(\"0100000000333333334444444455000000c70000000000000c80\"), 0x013141),\n    (hex!(\"0100000000333333334444444455000000c80000000000000c90\"), 0x0131e1),\n    (hex!(\"0100000000333333334444444455000000c80000000000005310\"), 0x013281),\n    (hex!(\"0100000000333333334444444455000000c80000000000005db0\"), 0x013321),\n    (hex!(\"0100000000333333334444444455000000c80000000000007040\"), 0x0133c1),\n    (hex!(\"0100000000333333334444444455000000c80000000000007290\"), 0x013461),\n    (hex!(\"0100000000333333334444444455000000c90000000000000ca0\"), 0x013501),\n    (hex!(\"0100000000333333334444444455000000c90000000000004fe0\"), 0x0135a1),\n    (hex!(\"0100000000333333334444444455000000ca0000000000000cb0\"), 0x013641),\n    (hex!(\"0100000000333333334444444455000000ca0000000000006140\"), 0x0136e1),\n    (hex!(\"0100000000333333334444444455000000ca0000000000007700\"), 0x013781),\n    (hex!(\"0100000000333333334444444455000000cb0000000000000cc0\"), 0x013821),\n    (hex!(\"0100000000333333334444444455000000cc0000000000000cd0\"), 0x0138c1),\n    (hex!(\"0100000000333333334444444455000000cd0000000000000ce0\"), 0x013961),\n    (hex!(\"0100000000333333334444444455000000cd0000000000003f20\"), 0x013a01),\n    (hex!(\"0100000000333333334444444455000000cd00000000000040f0\"), 0x013aa1),\n    (hex!(\"0100000000333333334444444455000000cd0000000000004ec0\"), 0x013b41),\n    (hex!(\"0100000000333333334444444455000000ce0000000000000cf0\"), 0x013be1),\n    (hex!(\"0100000000333333334444444455000000ce0000000000007200\"), 0x013c81),\n    (hex!(\"0100000000333333334444444455000000cf0000000000000d00\"), 0x013d21),\n    (hex!(\"0100000000333333334444444455000000cf00000000000046a0\"), 0x013dc1),\n    (hex!(\"0100000000333333334444444455000000cf0000000000005960\"), 0x013e61),\n    (hex!(\"0100000000333333334444444455000000d00000000000000d10\"), 0x013f01),\n    (hex!(\"0100000000333333334444444455000000d00000000000005f30\"), 0x013fa1),\n    (hex!(\"0100000000333333334444444455000000d10000000000000d20\"), 0x014041),\n    (hex!(\"0100000000333333334444444455000000d10000000000007a00\"), 0x0140e1),\n    (hex!(\"0100000000333333334444444455000000d20000000000000d30\"), 0x014181),\n    (hex!(\"0100000000333333334444444455000000d30000000000000d40\"), 0x014221),\n    (hex!(\"0100000000333333334444444455000000d40000000000000d50\"), 0x0142c1),\n    (hex!(\"0100000000333333334444444455000000d50000000000000d60\"), 0x014361),\n    (hex!(\"0100000000333333334444444455000000d50000000000004960\"), 0x014401),\n    (hex!(\"0100000000333333334444444455000000d500000000000055d0\"), 0x0144a1),\n    (hex!(\"0100000000333333334444444455000000d500000000000067d0\"), 0x014541),\n    (hex!(\"0100000000333333334444444455000000d60000000000000d70\"), 0x0145e1),\n    (hex!(\"0100000000333333334444444455000000d70000000000000d80\"), 0x014681),\n    (hex!(\"0100000000333333334444444455000000d80000000000000d90\"), 0x014721),\n    (hex!(\"0100000000333333334444444455000000d800000000000065f0\"), 0x0147c1),\n    (hex!(\"0100000000333333334444444455000000d90000000000000da0\"), 0x014861),\n    (hex!(\"0100000000333333334444444455000000d90000000000004980\"), 0x014901),\n    (hex!(\"0100000000333333334444444455000000da0000000000000db0\"), 0x0149a1),\n    (hex!(\"0100000000333333334444444455000000da00000000000048c0\"), 0x014a41),\n    (hex!(\"0100000000333333334444444455000000da00000000000072c0\"), 0x014ae1),\n    (hex!(\"0100000000333333334444444455000000da00000000000076b0\"), 0x014b81),\n    (hex!(\"0100000000333333334444444455000000db0000000000000dc0\"), 0x014c21),\n    (hex!(\"0100000000333333334444444455000000dc0000000000000dd0\"), 0x014cc1),\n    (hex!(\"0100000000333333334444444455000000dc00000000000040a0\"), 0x014d61),\n    (hex!(\"0100000000333333334444444455000000dc00000000000074c0\"), 0x014e01),\n    (hex!(\"0100000000333333334444444455000000dd0000000000000de0\"), 0x014ea1),\n    (hex!(\"0100000000333333334444444455000000dd0000000000004e50\"), 0x014f41),\n    (hex!(\"0100000000333333334444444455000000dd0000000000007270\"), 0x014fe1),\n    (hex!(\"0100000000333333334444444455000000de0000000000000df0\"), 0x015081),\n    (hex!(\"0100000000333333334444444455000000de00000000000078d0\"), 0x015121),\n    (hex!(\"0100000000333333334444444455000000df0000000000000e00\"), 0x0151c1),\n    (hex!(\"0100000000333333334444444455000000df0000000000004d30\"), 0x015261),\n    (hex!(\"0100000000333333334444444455000000df0000000000006c30\"), 0x015301),\n    (hex!(\"0100000000333333334444444455000000e00000000000000e10\"), 0x0153a1),\n    (hex!(\"0100000000333333334444444455000000e00000000000005d30\"), 0x015441),\n    (hex!(\"0100000000333333334444444455000000e10000000000000e20\"), 0x0154e1),\n    (hex!(\"0100000000333333334444444455000000e10000000000004610\"), 0x015581),\n    (hex!(\"0100000000333333334444444455000000e100000000000051d0\"), 0x015621),\n    (hex!(\"0100000000333333334444444455000000e10000000000005f10\"), 0x0156c1),\n    (hex!(\"0100000000333333334444444455000000e20000000000000e30\"), 0x015761),\n    (hex!(\"0100000000333333334444444455000000e20000000000007a90\"), 0x015801),\n    (hex!(\"0100000000333333334444444455000000e30000000000000e40\"), 0x0158a1),\n    (hex!(\"0100000000333333334444444455000000e30000000000005ae0\"), 0x015941),\n    (hex!(\"0100000000333333334444444455000000e40000000000000e50\"), 0x0159e1),\n    (hex!(\"0100000000333333334444444455000000e50000000000000e60\"), 0x015a81),\n    (hex!(\"0100000000333333334444444455000000e50000000000004700\"), 0x015b21),\n    (hex!(\"0100000000333333334444444455000000e500000000000065d0\"), 0x015bc1),\n    (hex!(\"0100000000333333334444444455000000e60000000000000e70\"), 0x015c61),\n    (hex!(\"0100000000333333334444444455000000e60000000000004fd0\"), 0x015d01),\n    (hex!(\"0100000000333333334444444455000000e70000000000000e80\"), 0x015da1),\n    (hex!(\"0100000000333333334444444455000000e70000000000005150\"), 0x015e41),\n    (hex!(\"0100000000333333334444444455000000e70000000000005920\"), 0x015ee1),\n    (hex!(\"0100000000333333334444444455000000e80000000000000e90\"), 0x015f81),\n    (hex!(\"0100000000333333334444444455000000e80000000000004320\"), 0x016021),\n    (hex!(\"0100000000333333334444444455000000e80000000000005ec0\"), 0x0160c1),\n    (hex!(\"0100000000333333334444444455000000e90000000000000ea0\"), 0x016161),\n    (hex!(\"0100000000333333334444444455000000e900000000000043b0\"), 0x016201),\n    (hex!(\"0100000000333333334444444455000000ea0000000000000eb0\"), 0x0162a1),\n    (hex!(\"0100000000333333334444444455000000ea0000000000003ea0\"), 0x016341),\n    (hex!(\"0100000000333333334444444455000000ea0000000000004f50\"), 0x0163e1),\n    (hex!(\"0100000000333333334444444455000000ea0000000000007520\"), 0x016481),\n    (hex!(\"0100000000333333334444444455000000eb0000000000000ec0\"), 0x016521),\n    (hex!(\"0100000000333333334444444455000000ec0000000000000ed0\"), 0x0165c1),\n    (hex!(\"0100000000333333334444444455000000ec0000000000006670\"), 0x016661),\n    (hex!(\"0100000000333333334444444455000000ed0000000000000ee0\"), 0x016701),\n    (hex!(\"0100000000333333334444444455000000ee0000000000000ef0\"), 0x0167a1),\n    (hex!(\"0100000000333333334444444455000000ee0000000000004d10\"), 0x016841),\n    (hex!(\"0100000000333333334444444455000000ef0000000000000f00\"), 0x0168e1),\n    (hex!(\"0100000000333333334444444455000000f00000000000000f10\"), 0x016981),\n    (hex!(\"0100000000333333334444444455000000f00000000000007220\"), 0x016a21),\n    (hex!(\"0100000000333333334444444455000000f00000000000007540\"), 0x016ac1),\n    (hex!(\"0100000000333333334444444455000000f10000000000000f20\"), 0x016b61),\n    (hex!(\"0100000000333333334444444455000000f100000000000066f0\"), 0x016c01),\n    (hex!(\"0100000000333333334444444455000000f20000000000000f30\"), 0x016ca1),\n    (hex!(\"0100000000333333334444444455000000f20000000000007810\"), 0x016d41),\n    (hex!(\"0100000000333333334444444455000000f30000000000000f40\"), 0x016de1),\n    (hex!(\"0100000000333333334444444455000000f30000000000007b70\"), 0x016e81),\n    (hex!(\"0100000000333333334444444455000000f40000000000000f50\"), 0x016f21),\n    (hex!(\"0100000000333333334444444455000000f400000000000059c0\"), 0x016fc1),\n    (hex!(\"0100000000333333334444444455000000f50000000000000f60\"), 0x017061),\n    (hex!(\"0100000000333333334444444455000000f50000000000003fb0\"), 0x017101),\n    (hex!(\"0100000000333333334444444455000000f50000000000005740\"), 0x0171a1),\n    (hex!(\"0100000000333333334444444455000000f500000000000064d0\"), 0x017241),\n    (hex!(\"0100000000333333334444444455000000f50000000000006960\"), 0x0172e1),\n    (hex!(\"0100000000333333334444444455000000f60000000000000f70\"), 0x017381),\n    (hex!(\"0100000000333333334444444455000000f60000000000006d00\"), 0x017421),\n    (hex!(\"0100000000333333334444444455000000f70000000000000f80\"), 0x0174c1),\n    (hex!(\"0100000000333333334444444455000000f80000000000000f90\"), 0x017561),\n    (hex!(\"0100000000333333334444444455000000f90000000000000fa0\"), 0x017601),\n    (hex!(\"0100000000333333334444444455000000fa0000000000000fb0\"), 0x0176a1),\n    (hex!(\"0100000000333333334444444455000000fa00000000000067b0\"), 0x017741),\n    (hex!(\"0100000000333333334444444455000000fb0000000000000fc0\"), 0x0177e1),\n    (hex!(\"0100000000333333334444444455000000fb0000000000004eb0\"), 0x017881),\n    (hex!(\"0100000000333333334444444455000000fb0000000000006ef0\"), 0x017921),\n    (hex!(\"0100000000333333334444444455000000fc0000000000000fd0\"), 0x0179c1),\n    (hex!(\"0100000000333333334444444455000000fc0000000000004470\"), 0x017a61),\n    (hex!(\"0100000000333333334444444455000000fc0000000000005940\"), 0x017b01),\n    (hex!(\"0100000000333333334444444455000000fd0000000000000fe0\"), 0x017ba1),\n    (hex!(\"0100000000333333334444444455000000fe0000000000000ff0\"), 0x017c41),\n    (hex!(\"0100000000333333334444444455000000ff0000000000001000\"), 0x017ce1),\n    (hex!(\"0100000000333333334444444455000000ff0000000000005690\"), 0x017d81),\n    (hex!(\"0100000000333333334444444455000001000000000000001010\"), 0x017e21),\n    (hex!(\"0100000000333333334444444455000001000000000000005210\"), 0x017ec1),\n    (hex!(\"01000000003333333344444444550000010000000000000070a0\"), 0x017f61),\n    (hex!(\"0100000000333333334444444455000001010000000000001020\"), 0x018001),\n    (hex!(\"0100000000333333334444444455000001010000000000006b80\"), 0x0180a1),\n    (hex!(\"0100000000333333334444444455000001020000000000001030\"), 0x018141),\n    (hex!(\"0100000000333333334444444455000001030000000000001040\"), 0x0181e1),\n    (hex!(\"0100000000333333334444444455000001030000000000004c80\"), 0x018281),\n    (hex!(\"0100000000333333334444444455000001040000000000001050\"), 0x018321),\n    (hex!(\"0100000000333333334444444455000001040000000000004850\"), 0x0183c1),\n    (hex!(\"01000000003333333344444444550000010400000000000057b0\"), 0x018461),\n    (hex!(\"0100000000333333334444444455000001050000000000001060\"), 0x018501),\n    (hex!(\"01000000003333333344444444550000010500000000000048d0\"), 0x0185a1),\n    (hex!(\"0100000000333333334444444455000001050000000000007870\"), 0x018641),\n    (hex!(\"0100000000333333334444444455000001060000000000001070\"), 0x0186e1),\n    (hex!(\"0100000000333333334444444455000001060000000000004f90\"), 0x018781),\n    (hex!(\"0100000000333333334444444455000001060000000000006270\"), 0x018821),\n    (hex!(\"0100000000333333334444444455000001070000000000001080\"), 0x0188c1),\n    (hex!(\"01000000003333333344444444550000010700000000000063b0\"), 0x018961),\n    (hex!(\"0100000000333333334444444455000001080000000000001090\"), 0x018a01),\n    (hex!(\"01000000003333333344444444550000010900000000000010a0\"), 0x018aa1),\n    (hex!(\"0100000000333333334444444455000001090000000000006f40\"), 0x018b41),\n    (hex!(\"01000000003333333344444444550000010a00000000000010b0\"), 0x018be1),\n    (hex!(\"01000000003333333344444444550000010a0000000000006640\"), 0x018c81),\n    (hex!(\"01000000003333333344444444550000010b00000000000010c0\"), 0x018d21),\n    (hex!(\"01000000003333333344444444550000010c00000000000010d0\"), 0x018dc1),\n    (hex!(\"01000000003333333344444444550000010d00000000000010e0\"), 0x018e61),\n    (hex!(\"01000000003333333344444444550000010e00000000000010f0\"), 0x018f01),\n    (hex!(\"01000000003333333344444444550000010e0000000000005c40\"), 0x018fa1),\n    (hex!(\"01000000003333333344444444550000010e0000000000007ba0\"), 0x019041),\n    (hex!(\"01000000003333333344444444550000010f0000000000001100\"), 0x0190e1),\n    (hex!(\"01000000003333333344444444550000010f0000000000005c30\"), 0x019181),\n    (hex!(\"0100000000333333334444444455000001100000000000001110\"), 0x019221),\n    (hex!(\"0100000000333333334444444455000001100000000000007640\"), 0x0192c1),\n    (hex!(\"0100000000333333334444444455000001110000000000001120\"), 0x019361),\n    (hex!(\"01000000003333333344444444550000011100000000000052c0\"), 0x019401),\n    (hex!(\"0100000000333333334444444455000001110000000000005710\"), 0x0194a1),\n    (hex!(\"0100000000333333334444444455000001110000000000006a00\"), 0x019541),\n    (hex!(\"0100000000333333334444444455000001120000000000001130\"), 0x0195e1),\n    (hex!(\"0100000000333333334444444455000001130000000000001140\"), 0x019681),\n    (hex!(\"0100000000333333334444444455000001140000000000001150\"), 0x019721),\n    (hex!(\"0100000000333333334444444455000001140000000000003fa0\"), 0x0197c1),\n    (hex!(\"01000000003333333344444444550000011400000000000054b0\"), 0x019861),\n    (hex!(\"0100000000333333334444444455000001140000000000006070\"), 0x019901),\n    (hex!(\"0100000000333333334444444455000001150000000000001160\"), 0x0199a1),\n    (hex!(\"0100000000333333334444444455000001150000000000005320\"), 0x019a41),\n    (hex!(\"0100000000333333334444444455000001150000000000006600\"), 0x019ae1),\n    (hex!(\"0100000000333333334444444455000001150000000000006df0\"), 0x019b81),\n    (hex!(\"01000000003333333344444444550000011500000000000079c0\"), 0x019c21),\n    (hex!(\"0100000000333333334444444455000001160000000000001170\"), 0x019cc1),\n    (hex!(\"0100000000333333334444444455000001170000000000001180\"), 0x019d61),\n    (hex!(\"0100000000333333334444444455000001170000000000004a60\"), 0x019e01),\n    (hex!(\"01000000003333333344444444550000011700000000000063c0\"), 0x019ea1),\n    (hex!(\"0100000000333333334444444455000001180000000000001190\"), 0x019f41),\n    (hex!(\"0100000000333333334444444455000001180000000000004530\"), 0x019fe1),\n    (hex!(\"01000000003333333344444444550000011800000000000077c0\"), 0x01a081),\n    (hex!(\"01000000003333333344444444550000011900000000000011a0\"), 0x01a121),\n    (hex!(\"01000000003333333344444444550000011a00000000000011b0\"), 0x01a1c1),\n    (hex!(\"01000000003333333344444444550000011a00000000000041c0\"), 0x01a261),\n    (hex!(\"01000000003333333344444444550000011a00000000000061e0\"), 0x01a301),\n    (hex!(\"01000000003333333344444444550000011b00000000000011c0\"), 0x01a3a1),\n    (hex!(\"01000000003333333344444444550000011c00000000000011d0\"), 0x01a441),\n    (hex!(\"01000000003333333344444444550000011c0000000000005f90\"), 0x01a4e1),\n    (hex!(\"01000000003333333344444444550000011d00000000000011e0\"), 0x01a581),\n    (hex!(\"01000000003333333344444444550000011d0000000000004160\"), 0x01a621),\n    (hex!(\"01000000003333333344444444550000011e00000000000011f0\"), 0x01a6c1),\n    (hex!(\"01000000003333333344444444550000011e00000000000056d0\"), 0x01a761),\n    (hex!(\"01000000003333333344444444550000011f0000000000001200\"), 0x01a801),\n    (hex!(\"01000000003333333344444444550000011f0000000000004510\"), 0x01a8a1),\n    (hex!(\"0100000000333333334444444455000001200000000000001210\"), 0x01a941),\n    (hex!(\"0100000000333333334444444455000001210000000000001220\"), 0x01a9e1),\n    (hex!(\"0100000000333333334444444455000001210000000000005140\"), 0x01aa81),\n    (hex!(\"0100000000333333334444444455000001210000000000006710\"), 0x01ab21),\n    (hex!(\"0100000000333333334444444455000001210000000000006f50\"), 0x01abc1),\n    (hex!(\"0100000000333333334444444455000001220000000000001230\"), 0x01ac61),\n    (hex!(\"0100000000333333334444444455000001220000000000005570\"), 0x01ad01),\n    (hex!(\"0100000000333333334444444455000001220000000000007ac0\"), 0x01ada1),\n    (hex!(\"0100000000333333334444444455000001230000000000001240\"), 0x01ae41),\n    (hex!(\"0100000000333333334444444455000001240000000000001250\"), 0x01aee1),\n    (hex!(\"0100000000333333334444444455000001240000000000006cd0\"), 0x01af81),\n    (hex!(\"0100000000333333334444444455000001250000000000001260\"), 0x01b021),\n    (hex!(\"01000000003333333344444444550000012500000000000046b0\"), 0x01b0c1),\n    (hex!(\"0100000000333333334444444455000001250000000000005eb0\"), 0x01b161),\n    (hex!(\"0100000000333333334444444455000001260000000000001270\"), 0x01b201),\n    (hex!(\"0100000000333333334444444455000001260000000000004630\"), 0x01b2a1),\n    (hex!(\"0100000000333333334444444455000001270000000000001280\"), 0x01b341),\n    (hex!(\"0100000000333333334444444455000001270000000000004ff0\"), 0x01b3e1),\n    (hex!(\"0100000000333333334444444455000001270000000000006ec0\"), 0x01b481),\n    (hex!(\"0100000000333333334444444455000001280000000000001290\"), 0x01b521),\n    (hex!(\"01000000003333333344444444550000012900000000000012a0\"), 0x01b5c1),\n    (hex!(\"0100000000333333334444444455000001290000000000005f60\"), 0x01b661),\n    (hex!(\"01000000003333333344444444550000012a00000000000012b0\"), 0x01b701),\n    (hex!(\"01000000003333333344444444550000012a0000000000005480\"), 0x01b7a1),\n    (hex!(\"01000000003333333344444444550000012b00000000000012c0\"), 0x01b841),\n    (hex!(\"01000000003333333344444444550000012b00000000000065a0\"), 0x01b8e1),\n    (hex!(\"01000000003333333344444444550000012b00000000000066c0\"), 0x01b981),\n    (hex!(\"01000000003333333344444444550000012c00000000000012d0\"), 0x01ba21),\n    (hex!(\"01000000003333333344444444550000012c00000000000064b0\"), 0x01bac1),\n    (hex!(\"01000000003333333344444444550000012d00000000000012e0\"), 0x01bb61),\n    (hex!(\"01000000003333333344444444550000012d00000000000049c0\"), 0x01bc01),\n    (hex!(\"01000000003333333344444444550000012d0000000000004bf0\"), 0x01bca1),\n    (hex!(\"01000000003333333344444444550000012e00000000000012f0\"), 0x01bd41),\n    (hex!(\"01000000003333333344444444550000012e0000000000005ed0\"), 0x01bde1),\n    (hex!(\"01000000003333333344444444550000012f0000000000001300\"), 0x01be81),\n    (hex!(\"01000000003333333344444444550000012f00000000000049a0\"), 0x01bf21),\n    (hex!(\"0100000000333333334444444455000001300000000000001310\"), 0x01bfc1),\n    (hex!(\"0100000000333333334444444455000001300000000000007840\"), 0x01c061),\n    (hex!(\"0100000000333333334444444455000001310000000000001320\"), 0x01c101),\n    (hex!(\"0100000000333333334444444455000001310000000000005f70\"), 0x01c1a1),\n    (hex!(\"0100000000333333334444444455000001320000000000001330\"), 0x01c241),\n    (hex!(\"0100000000333333334444444455000001320000000000005a00\"), 0x01c2e1),\n    (hex!(\"0100000000333333334444444455000001330000000000001340\"), 0x01c381),\n    (hex!(\"0100000000333333334444444455000001330000000000006c70\"), 0x01c421),\n    (hex!(\"0100000000333333334444444455000001340000000000001350\"), 0x01c4c1),\n    (hex!(\"0100000000333333334444444455000001340000000000005c60\"), 0x01c561),\n    (hex!(\"0100000000333333334444444455000001350000000000001360\"), 0x01c601),\n    (hex!(\"0100000000333333334444444455000001350000000000004f10\"), 0x01c6a1),\n    (hex!(\"0100000000333333334444444455000001360000000000001370\"), 0x01c741),\n    (hex!(\"0100000000333333334444444455000001360000000000004c60\"), 0x01c7e1),\n    (hex!(\"0100000000333333334444444455000001370000000000001380\"), 0x01c881),\n    (hex!(\"0100000000333333334444444455000001380000000000001390\"), 0x01c921),\n    (hex!(\"01000000003333333344444444550000013900000000000013a0\"), 0x01c9c1),\n    (hex!(\"0100000000333333334444444455000001390000000000004ea0\"), 0x01ca61),\n    (hex!(\"01000000003333333344444444550000013a00000000000013b0\"), 0x01cb01),\n    (hex!(\"01000000003333333344444444550000013a0000000000007350\"), 0x01cba1),\n    (hex!(\"01000000003333333344444444550000013b00000000000013c0\"), 0x01cc41),\n    (hex!(\"01000000003333333344444444550000013c00000000000013d0\"), 0x01cce1),\n    (hex!(\"01000000003333333344444444550000013c0000000000007050\"), 0x01cd81),\n    (hex!(\"01000000003333333344444444550000013d00000000000013e0\"), 0x01ce21),\n    (hex!(\"01000000003333333344444444550000013d0000000000006bd0\"), 0x01cec1),\n    (hex!(\"01000000003333333344444444550000013e00000000000013f0\"), 0x01cf61),\n    (hex!(\"01000000003333333344444444550000013e00000000000058e0\"), 0x01d001),\n    (hex!(\"01000000003333333344444444550000013f0000000000001400\"), 0x01d0a1),\n    (hex!(\"01000000003333333344444444550000013f0000000000004740\"), 0x01d141),\n    (hex!(\"0100000000333333334444444455000001400000000000001410\"), 0x01d1e1),\n    (hex!(\"0100000000333333334444444455000001400000000000003f10\"), 0x01d281),\n    (hex!(\"0100000000333333334444444455000001400000000000006d40\"), 0x01d321),\n    (hex!(\"01000000003333333344444444550000014000000000000072d0\"), 0x01d3c1),\n    (hex!(\"0100000000333333334444444455000001410000000000001420\"), 0x01d461),\n    (hex!(\"0100000000333333334444444455000001420000000000001430\"), 0x01d501),\n    (hex!(\"0100000000333333334444444455000001430000000000001440\"), 0x01d5a1),\n    (hex!(\"0100000000333333334444444455000001440000000000001450\"), 0x01d641),\n    (hex!(\"0100000000333333334444444455000001450000000000001460\"), 0x01d6e1),\n    (hex!(\"0100000000333333334444444455000001460000000000001470\"), 0x01d781),\n    (hex!(\"01000000003333333344444444550000014600000000000055c0\"), 0x01d821),\n    (hex!(\"0100000000333333334444444455000001470000000000001480\"), 0x01d8c1),\n    (hex!(\"0100000000333333334444444455000001470000000000004570\"), 0x01d961),\n    (hex!(\"0100000000333333334444444455000001470000000000004be0\"), 0x01da01),\n    (hex!(\"0100000000333333334444444455000001480000000000001490\"), 0x01daa1),\n    (hex!(\"0100000000333333334444444455000001480000000000005360\"), 0x01db41),\n    (hex!(\"01000000003333333344444444550000014900000000000014a0\"), 0x01dbe1),\n    (hex!(\"01000000003333333344444444550000014a00000000000014b0\"), 0x01dc81),\n    (hex!(\"01000000003333333344444444550000014a00000000000053d0\"), 0x01dd21),\n    (hex!(\"01000000003333333344444444550000014b00000000000014c0\"), 0x01ddc1),\n    (hex!(\"01000000003333333344444444550000014b0000000000005950\"), 0x01de61),\n    (hex!(\"01000000003333333344444444550000014c00000000000014d0\"), 0x01df01),\n    (hex!(\"01000000003333333344444444550000014c0000000000004f60\"), 0x01dfa1),\n    (hex!(\"01000000003333333344444444550000014d00000000000014e0\"), 0x01e041),\n    (hex!(\"01000000003333333344444444550000014d0000000000004520\"), 0x01e0e1),\n    (hex!(\"01000000003333333344444444550000014d0000000000005200\"), 0x01e181),\n    (hex!(\"01000000003333333344444444550000014e00000000000014f0\"), 0x01e221),\n    (hex!(\"01000000003333333344444444550000014e0000000000005bd0\"), 0x01e2c1),\n    (hex!(\"01000000003333333344444444550000014f0000000000001500\"), 0x01e361),\n    (hex!(\"01000000003333333344444444550000014f00000000000060d0\"), 0x01e401),\n    (hex!(\"0100000000333333334444444455000001500000000000001510\"), 0x01e4a1),\n    (hex!(\"01000000003333333344444444550000015000000000000075e0\"), 0x01e541),\n    (hex!(\"0100000000333333334444444455000001510000000000001520\"), 0x01e5e1),\n    (hex!(\"0100000000333333334444444455000001510000000000005c00\"), 0x01e681),\n    (hex!(\"0100000000333333334444444455000001510000000000006af0\"), 0x01e721),\n    (hex!(\"0100000000333333334444444455000001510000000000007b80\"), 0x01e7c1),\n    (hex!(\"0100000000333333334444444455000001520000000000001530\"), 0x01e861),\n    (hex!(\"0100000000333333334444444455000001520000000000004c70\"), 0x01e901),\n    (hex!(\"0100000000333333334444444455000001530000000000001540\"), 0x01e9a1),\n    (hex!(\"0100000000333333334444444455000001540000000000001550\"), 0x01ea41),\n    (hex!(\"0100000000333333334444444455000001540000000000007cd0\"), 0x01eae1),\n    (hex!(\"0100000000333333334444444455000001550000000000001560\"), 0x01eb81),\n    (hex!(\"0100000000333333334444444455000001550000000000004ae0\"), 0x01ec21),\n    (hex!(\"01000000003333333344444444550000015500000000000068c0\"), 0x01ecc1),\n    (hex!(\"0100000000333333334444444455000001560000000000001570\"), 0x01ed61),\n    (hex!(\"01000000003333333344444444550000015600000000000064a0\"), 0x01ee01),\n    (hex!(\"0100000000333333334444444455000001570000000000001580\"), 0x01eea1),\n    (hex!(\"0100000000333333334444444455000001580000000000001590\"), 0x01ef41),\n    (hex!(\"0100000000333333334444444455000001580000000000006d30\"), 0x01efe1),\n    (hex!(\"01000000003333333344444444550000015800000000000074f0\"), 0x01f081),\n    (hex!(\"01000000003333333344444444550000015900000000000015a0\"), 0x01f121),\n    (hex!(\"01000000003333333344444444550000015900000000000053a0\"), 0x01f1c1),\n    (hex!(\"01000000003333333344444444550000015900000000000055e0\"), 0x01f261),\n    (hex!(\"0100000000333333334444444455000001590000000000006210\"), 0x01f301),\n    (hex!(\"01000000003333333344444444550000015900000000000067c0\"), 0x01f3a1),\n    (hex!(\"01000000003333333344444444550000015a00000000000015b0\"), 0x01f441),\n    (hex!(\"01000000003333333344444444550000015b00000000000015c0\"), 0x01f4e1),\n    (hex!(\"01000000003333333344444444550000015c00000000000015d0\"), 0x01f581),\n    (hex!(\"01000000003333333344444444550000015c0000000000004d80\"), 0x01f621),\n    (hex!(\"01000000003333333344444444550000015c00000000000073f0\"), 0x01f6c1),\n    (hex!(\"01000000003333333344444444550000015d00000000000015e0\"), 0x01f761),\n    (hex!(\"01000000003333333344444444550000015e00000000000015f0\"), 0x01f801),\n    (hex!(\"01000000003333333344444444550000015e0000000000004120\"), 0x01f8a1),\n    (hex!(\"01000000003333333344444444550000015e0000000000004350\"), 0x01f941),\n    (hex!(\"01000000003333333344444444550000015e0000000000007c50\"), 0x01f9e1),\n    (hex!(\"01000000003333333344444444550000015f0000000000001600\"), 0x01fa81),\n    (hex!(\"0100000000333333334444444455000001600000000000001610\"), 0x01fb21),\n    (hex!(\"0100000000333333334444444455000001600000000000004840\"), 0x01fbc1),\n    (hex!(\"0100000000333333334444444455000001600000000000004b10\"), 0x01fc61),\n    (hex!(\"0100000000333333334444444455000001600000000000007060\"), 0x01fd01),\n    (hex!(\"0100000000333333334444444455000001610000000000001620\"), 0x01fda1),\n    (hex!(\"0100000000333333334444444455000001610000000000005300\"), 0x01fe41),\n    (hex!(\"0100000000333333334444444455000001620000000000001630\"), 0x01fee1),\n    (hex!(\"0100000000333333334444444455000001620000000000006530\"), 0x01ff81),\n    (hex!(\"0100000000333333334444444455000001630000000000001640\"), 0x020021),\n    (hex!(\"0100000000333333334444444455000001640000000000001650\"), 0x0200c1),\n    (hex!(\"0100000000333333334444444455000001650000000000001660\"), 0x020161),\n    (hex!(\"0100000000333333334444444455000001660000000000001670\"), 0x020201),\n    (hex!(\"0100000000333333334444444455000001670000000000001680\"), 0x0202a1),\n    (hex!(\"0100000000333333334444444455000001670000000000007310\"), 0x020341),\n    (hex!(\"0100000000333333334444444455000001680000000000001690\"), 0x0203e1),\n    (hex!(\"0100000000333333334444444455000001680000000000007b50\"), 0x020481),\n    (hex!(\"01000000003333333344444444550000016900000000000016a0\"), 0x020521),\n    (hex!(\"01000000003333333344444444550000016900000000000049d0\"), 0x0205c1),\n    (hex!(\"01000000003333333344444444550000016a00000000000016b0\"), 0x020661),\n    (hex!(\"01000000003333333344444444550000016a00000000000078b0\"), 0x020701),\n    (hex!(\"01000000003333333344444444550000016b00000000000016c0\"), 0x0207a1),\n    (hex!(\"01000000003333333344444444550000016b0000000000004100\"), 0x020841),\n    (hex!(\"01000000003333333344444444550000016c00000000000016d0\"), 0x0208e1),\n    (hex!(\"01000000003333333344444444550000016c0000000000006e00\"), 0x020981),\n    (hex!(\"01000000003333333344444444550000016d00000000000016e0\"), 0x020a21),\n    (hex!(\"01000000003333333344444444550000016e00000000000016f0\"), 0x020ac1),\n    (hex!(\"01000000003333333344444444550000016e0000000000004ac0\"), 0x020b61),\n    (hex!(\"01000000003333333344444444550000016e0000000000007820\"), 0x020c01),\n    (hex!(\"01000000003333333344444444550000016f0000000000001700\"), 0x020ca1),\n    (hex!(\"0100000000333333334444444455000001700000000000001710\"), 0x020d41),\n    (hex!(\"0100000000333333334444444455000001700000000000005830\"), 0x020de1),\n    (hex!(\"0100000000333333334444444455000001710000000000001720\"), 0x020e81),\n    (hex!(\"01000000003333333344444444550000017100000000000072f0\"), 0x020f21),\n    (hex!(\"0100000000333333334444444455000001720000000000001730\"), 0x020fc1),\n    (hex!(\"0100000000333333334444444455000001720000000000004870\"), 0x021061),\n    (hex!(\"01000000003333333344444444550000017200000000000070b0\"), 0x021101),\n    (hex!(\"0100000000333333334444444455000001730000000000001740\"), 0x0211a1),\n    (hex!(\"0100000000333333334444444455000001740000000000001750\"), 0x021241),\n    (hex!(\"0100000000333333334444444455000001750000000000001760\"), 0x0212e1),\n    (hex!(\"0100000000333333334444444455000001750000000000005670\"), 0x021381),\n    (hex!(\"0100000000333333334444444455000001750000000000005870\"), 0x021421),\n    (hex!(\"0100000000333333334444444455000001760000000000001770\"), 0x0214c1),\n    (hex!(\"0100000000333333334444444455000001770000000000001780\"), 0x021561),\n    (hex!(\"0100000000333333334444444455000001770000000000005000\"), 0x021601),\n    (hex!(\"0100000000333333334444444455000001770000000000007090\"), 0x0216a1),\n    (hex!(\"0100000000333333334444444455000001780000000000001790\"), 0x021741),\n    (hex!(\"01000000003333333344444444550000017800000000000048a0\"), 0x0217e1),\n    (hex!(\"0100000000333333334444444455000001780000000000006bf0\"), 0x021881),\n    (hex!(\"01000000003333333344444444550000017900000000000017a0\"), 0x021921),\n    (hex!(\"01000000003333333344444444550000017900000000000057d0\"), 0x0219c1),\n    (hex!(\"0100000000333333334444444455000001790000000000006660\"), 0x021a61),\n    (hex!(\"01000000003333333344444444550000017a00000000000017b0\"), 0x021b01),\n    (hex!(\"01000000003333333344444444550000017a0000000000004970\"), 0x021ba1),\n    (hex!(\"01000000003333333344444444550000017a0000000000005dc0\"), 0x021c41),\n    (hex!(\"01000000003333333344444444550000017b00000000000017c0\"), 0x021ce1),\n    (hex!(\"01000000003333333344444444550000017b0000000000004ee0\"), 0x021d81),\n    (hex!(\"01000000003333333344444444550000017b00000000000054c0\"), 0x021e21),\n    (hex!(\"01000000003333333344444444550000017c00000000000017d0\"), 0x021ec1),\n    (hex!(\"01000000003333333344444444550000017c0000000000003fc0\"), 0x021f61),\n    (hex!(\"01000000003333333344444444550000017c00000000000063e0\"), 0x022001),\n    (hex!(\"01000000003333333344444444550000017c0000000000006520\"), 0x0220a1),\n    (hex!(\"01000000003333333344444444550000017d00000000000017e0\"), 0x022141),\n    (hex!(\"01000000003333333344444444550000017d0000000000006220\"), 0x0221e1),\n    (hex!(\"01000000003333333344444444550000017d0000000000007120\"), 0x022281),\n    (hex!(\"01000000003333333344444444550000017e00000000000017f0\"), 0x022321),\n    (hex!(\"01000000003333333344444444550000017f0000000000001800\"), 0x0223c1),\n    (hex!(\"0100000000333333334444444455000001800000000000001810\"), 0x022461),\n    (hex!(\"0100000000333333334444444455000001810000000000001820\"), 0x022501),\n    (hex!(\"01000000003333333344444444550000018100000000000041f0\"), 0x0225a1),\n    (hex!(\"0100000000333333334444444455000001810000000000007590\"), 0x022641),\n    (hex!(\"0100000000333333334444444455000001820000000000001830\"), 0x0226e1),\n    (hex!(\"0100000000333333334444444455000001820000000000004ce0\"), 0x022781),\n    (hex!(\"0100000000333333334444444455000001830000000000001840\"), 0x022821),\n    (hex!(\"01000000003333333344444444550000018300000000000042c0\"), 0x0228c1),\n    (hex!(\"0100000000333333334444444455000001840000000000001850\"), 0x022961),\n    (hex!(\"0100000000333333334444444455000001840000000000004f70\"), 0x022a01),\n    (hex!(\"0100000000333333334444444455000001850000000000001860\"), 0x022aa1),\n    (hex!(\"0100000000333333334444444455000001850000000000006470\"), 0x022b41),\n    (hex!(\"0100000000333333334444444455000001850000000000007500\"), 0x022be1),\n    (hex!(\"0100000000333333334444444455000001860000000000001870\"), 0x022c81),\n    (hex!(\"0100000000333333334444444455000001860000000000004770\"), 0x022d21),\n    (hex!(\"0100000000333333334444444455000001870000000000001880\"), 0x022dc1),\n    (hex!(\"0100000000333333334444444455000001870000000000006a30\"), 0x022e61),\n    (hex!(\"0100000000333333334444444455000001880000000000001890\"), 0x022f01),\n    (hex!(\"0100000000333333334444444455000001880000000000007410\"), 0x022fa1),\n    (hex!(\"01000000003333333344444444550000018900000000000018a0\"), 0x023041),\n    (hex!(\"01000000003333333344444444550000018900000000000044d0\"), 0x0230e1),\n    (hex!(\"0100000000333333334444444455000001890000000000005ac0\"), 0x023181),\n    (hex!(\"01000000003333333344444444550000018a00000000000018b0\"), 0x023221),\n    (hex!(\"01000000003333333344444444550000018a0000000000006260\"), 0x0232c1),\n    (hex!(\"01000000003333333344444444550000018a0000000000006d70\"), 0x023361),\n    (hex!(\"01000000003333333344444444550000018b00000000000018c0\"), 0x023401),\n    (hex!(\"01000000003333333344444444550000018b0000000000004aa0\"), 0x0234a1),\n    (hex!(\"01000000003333333344444444550000018b0000000000006fd0\"), 0x023541),\n    (hex!(\"01000000003333333344444444550000018c00000000000018d0\"), 0x0235e1),\n    (hex!(\"01000000003333333344444444550000018c00000000000051b0\"), 0x023681),\n    (hex!(\"01000000003333333344444444550000018c0000000000006650\"), 0x023721),\n    (hex!(\"01000000003333333344444444550000018d00000000000018e0\"), 0x0237c1),\n    (hex!(\"01000000003333333344444444550000018e00000000000018f0\"), 0x023861),\n    (hex!(\"01000000003333333344444444550000018e00000000000041d0\"), 0x023901),\n    (hex!(\"01000000003333333344444444550000018f0000000000001900\"), 0x0239a1),\n    (hex!(\"01000000003333333344444444550000018f0000000000007600\"), 0x023a41),\n    (hex!(\"0100000000333333334444444455000001900000000000001910\"), 0x023ae1),\n    (hex!(\"0100000000333333334444444455000001900000000000005410\"), 0x023b81),\n    (hex!(\"0100000000333333334444444455000001900000000000006760\"), 0x023c21),\n    (hex!(\"0100000000333333334444444455000001910000000000001920\"), 0x023cc1),\n    (hex!(\"0100000000333333334444444455000001920000000000001930\"), 0x023d61),\n    (hex!(\"0100000000333333334444444455000001920000000000004ca0\"), 0x023e01),\n    (hex!(\"0100000000333333334444444455000001920000000000005d80\"), 0x023ea1),\n    (hex!(\"0100000000333333334444444455000001920000000000005fd0\"), 0x023f41),\n    (hex!(\"01000000003333333344444444550000019200000000000070d0\"), 0x023fe1),\n    (hex!(\"0100000000333333334444444455000001930000000000001940\"), 0x024081),\n    (hex!(\"0100000000333333334444444455000001930000000000004010\"), 0x024121),\n    (hex!(\"0100000000333333334444444455000001930000000000007ca0\"), 0x0241c1),\n    (hex!(\"0100000000333333334444444455000001940000000000001950\"), 0x024261),\n    (hex!(\"0100000000333333334444444455000001950000000000001960\"), 0x024301),\n    (hex!(\"0100000000333333334444444455000001950000000000005380\"), 0x0243a1),\n    (hex!(\"0100000000333333334444444455000001960000000000001970\"), 0x024441),\n    (hex!(\"0100000000333333334444444455000001960000000000006de0\"), 0x0244e1),\n    (hex!(\"0100000000333333334444444455000001970000000000001980\"), 0x024581),\n    (hex!(\"01000000003333333344444444550000019700000000000048f0\"), 0x024621),\n    (hex!(\"0100000000333333334444444455000001980000000000001990\"), 0x0246c1),\n    (hex!(\"0100000000333333334444444455000001980000000000006510\"), 0x024761),\n    (hex!(\"01000000003333333344444444550000019900000000000019a0\"), 0x024801),\n    (hex!(\"0100000000333333334444444455000001990000000000007570\"), 0x0248a1),\n    (hex!(\"0100000000333333334444444455000001990000000000007580\"), 0x024941),\n    (hex!(\"01000000003333333344444444550000019a00000000000019b0\"), 0x0249e1),\n    (hex!(\"01000000003333333344444444550000019a0000000000004050\"), 0x024a81),\n    (hex!(\"01000000003333333344444444550000019a0000000000004ba0\"), 0x024b21),\n    (hex!(\"01000000003333333344444444550000019a0000000000005540\"), 0x024bc1),\n    (hex!(\"01000000003333333344444444550000019a00000000000061c0\"), 0x024c61),\n    (hex!(\"01000000003333333344444444550000019a0000000000007c60\"), 0x024d01),\n    (hex!(\"01000000003333333344444444550000019b00000000000019c0\"), 0x024da1),\n    (hex!(\"01000000003333333344444444550000019b0000000000006240\"), 0x024e41),\n    (hex!(\"01000000003333333344444444550000019c00000000000019d0\"), 0x024ee1),\n    (hex!(\"01000000003333333344444444550000019d00000000000019e0\"), 0x024f81),\n    (hex!(\"01000000003333333344444444550000019d0000000000004640\"), 0x025021),\n    (hex!(\"01000000003333333344444444550000019d00000000000052a0\"), 0x0250c1),\n    (hex!(\"01000000003333333344444444550000019d00000000000052b0\"), 0x025161),\n    (hex!(\"01000000003333333344444444550000019e00000000000019f0\"), 0x025201),\n    (hex!(\"01000000003333333344444444550000019f0000000000001a00\"), 0x0252a1),\n    (hex!(\"01000000003333333344444444550000019f0000000000006b20\"), 0x025341),\n    (hex!(\"0100000000333333334444444455000001a00000000000001a10\"), 0x0253e1),\n    (hex!(\"0100000000333333334444444455000001a10000000000001a20\"), 0x025481),\n    (hex!(\"0100000000333333334444444455000001a10000000000005460\"), 0x025521),\n    (hex!(\"0100000000333333334444444455000001a10000000000005d20\"), 0x0255c1),\n    (hex!(\"0100000000333333334444444455000001a100000000000068f0\"), 0x025661),\n    (hex!(\"0100000000333333334444444455000001a20000000000001a30\"), 0x025701),\n    (hex!(\"0100000000333333334444444455000001a20000000000007170\"), 0x0257a1),\n    (hex!(\"0100000000333333334444444455000001a30000000000001a40\"), 0x025841),\n    (hex!(\"0100000000333333334444444455000001a40000000000001a50\"), 0x0258e1),\n    (hex!(\"0100000000333333334444444455000001a50000000000001a60\"), 0x025981),\n    (hex!(\"0100000000333333334444444455000001a60000000000001a70\"), 0x025a21),\n    (hex!(\"0100000000333333334444444455000001a70000000000001a80\"), 0x025ac1),\n    (hex!(\"0100000000333333334444444455000001a70000000000005a90\"), 0x025b61),\n    (hex!(\"0100000000333333334444444455000001a70000000000006440\"), 0x025c01),\n    (hex!(\"0100000000333333334444444455000001a80000000000001a90\"), 0x025ca1),\n    (hex!(\"0100000000333333334444444455000001a80000000000004800\"), 0x025d41),\n    (hex!(\"0100000000333333334444444455000001a90000000000001aa0\"), 0x025de1),\n    (hex!(\"0100000000333333334444444455000001aa0000000000001ab0\"), 0x025e81),\n    (hex!(\"0100000000333333334444444455000001aa0000000000005b60\"), 0x025f21),\n    (hex!(\"0100000000333333334444444455000001ab0000000000001ac0\"), 0x025fc1),\n    (hex!(\"0100000000333333334444444455000001ab0000000000006700\"), 0x026061),\n    (hex!(\"0100000000333333334444444455000001ab00000000000071d0\"), 0x026101),\n    (hex!(\"0100000000333333334444444455000001ac0000000000001ad0\"), 0x0261a1),\n    (hex!(\"0100000000333333334444444455000001ac0000000000007380\"), 0x026241),\n    (hex!(\"0100000000333333334444444455000001ad0000000000001ae0\"), 0x0262e1),\n    (hex!(\"0100000000333333334444444455000001ad0000000000006350\"), 0x026381),\n    (hex!(\"0100000000333333334444444455000001ae0000000000001af0\"), 0x026421),\n    (hex!(\"0100000000333333334444444455000001af0000000000001b00\"), 0x0264c1),\n    (hex!(\"0100000000333333334444444455000001af0000000000007390\"), 0x026561),\n    (hex!(\"0100000000333333334444444455000001b00000000000001b10\"), 0x026601),\n    (hex!(\"0100000000333333334444444455000001b10000000000001b20\"), 0x0266a1),\n    (hex!(\"0100000000333333334444444455000001b10000000000005cc0\"), 0x026741),\n    (hex!(\"0100000000333333334444444455000001b20000000000001b30\"), 0x0267e1),\n    (hex!(\"0100000000333333334444444455000001b20000000000004fb0\"), 0x026881),\n    (hex!(\"0100000000333333334444444455000001b30000000000001b40\"), 0x026921),\n    (hex!(\"0100000000333333334444444455000001b40000000000001b50\"), 0x0269c1),\n    (hex!(\"0100000000333333334444444455000001b50000000000001b60\"), 0x026a61),\n    (hex!(\"0100000000333333334444444455000001b60000000000001b70\"), 0x026b01),\n    (hex!(\"0100000000333333334444444455000001b600000000000048e0\"), 0x026ba1),\n    (hex!(\"0100000000333333334444444455000001b70000000000001b80\"), 0x026c41),\n    (hex!(\"0100000000333333334444444455000001b70000000000005ca0\"), 0x026ce1),\n    (hex!(\"0100000000333333334444444455000001b70000000000007900\"), 0x026d81),\n    (hex!(\"0100000000333333334444444455000001b80000000000001b90\"), 0x026e21),\n    (hex!(\"0100000000333333334444444455000001b80000000000004d90\"), 0x026ec1),\n    (hex!(\"0100000000333333334444444455000001b90000000000001ba0\"), 0x026f61),\n    (hex!(\"0100000000333333334444444455000001b90000000000003f40\"), 0x027001),\n    (hex!(\"0100000000333333334444444455000001ba0000000000001bb0\"), 0x0270a1),\n    (hex!(\"0100000000333333334444444455000001ba00000000000042a0\"), 0x027141),\n    (hex!(\"0100000000333333334444444455000001ba00000000000067f0\"), 0x0271e1),\n    (hex!(\"0100000000333333334444444455000001ba00000000000073a0\"), 0x027281),\n    (hex!(\"0100000000333333334444444455000001bb0000000000001bc0\"), 0x027321),\n    (hex!(\"0100000000333333334444444455000001bb0000000000004a00\"), 0x0273c1),\n    (hex!(\"0100000000333333334444444455000001bb0000000000005e00\"), 0x027461),\n    (hex!(\"0100000000333333334444444455000001bc0000000000001bd0\"), 0x027501),\n    (hex!(\"0100000000333333334444444455000001bc0000000000004230\"), 0x0275a1),\n    (hex!(\"0100000000333333334444444455000001bc0000000000005860\"), 0x027641),\n    (hex!(\"0100000000333333334444444455000001bd0000000000001be0\"), 0x0276e1),\n    (hex!(\"0100000000333333334444444455000001bd0000000000007c70\"), 0x027781),\n    (hex!(\"0100000000333333334444444455000001be0000000000001bf0\"), 0x027821),\n    (hex!(\"0100000000333333334444444455000001be0000000000007770\"), 0x0278c1),\n    (hex!(\"0100000000333333334444444455000001be0000000000007cf0\"), 0x027961),\n    (hex!(\"0100000000333333334444444455000001bf0000000000001c00\"), 0x027a01),\n    (hex!(\"0100000000333333334444444455000001bf0000000000006490\"), 0x027aa1),\n    (hex!(\"0100000000333333334444444455000001c00000000000001c10\"), 0x027b41),\n    (hex!(\"0100000000333333334444444455000001c10000000000001c20\"), 0x027be1),\n    (hex!(\"0100000000333333334444444455000001c10000000000004600\"), 0x027c81),\n    (hex!(\"0100000000333333334444444455000001c20000000000001c30\"), 0x027d21),\n    (hex!(\"0100000000333333334444444455000001c20000000000006e30\"), 0x027dc1),\n    (hex!(\"0100000000333333334444444455000001c30000000000001c40\"), 0x027e61),\n    (hex!(\"0100000000333333334444444455000001c40000000000001c50\"), 0x027f01),\n    (hex!(\"0100000000333333334444444455000001c50000000000001c60\"), 0x027fa1),\n    (hex!(\"0100000000333333334444444455000001c60000000000001c70\"), 0x028041),\n    (hex!(\"0100000000333333334444444455000001c60000000000004240\"), 0x0280e1),\n    (hex!(\"0100000000333333334444444455000001c60000000000005bb0\"), 0x028181),\n    (hex!(\"0100000000333333334444444455000001c70000000000001c80\"), 0x028221),\n    (hex!(\"0100000000333333334444444455000001c80000000000001c90\"), 0x0282c1),\n    (hex!(\"0100000000333333334444444455000001c90000000000001ca0\"), 0x028361),\n    (hex!(\"0100000000333333334444444455000001c90000000000006730\"), 0x028401),\n    (hex!(\"0100000000333333334444444455000001ca0000000000001cb0\"), 0x0284a1),\n    (hex!(\"0100000000333333334444444455000001ca00000000000070f0\"), 0x028541),\n    (hex!(\"0100000000333333334444444455000001cb0000000000001cc0\"), 0x0285e1),\n    (hex!(\"0100000000333333334444444455000001cb00000000000071a0\"), 0x028681),\n    (hex!(\"0100000000333333334444444455000001cc0000000000001cd0\"), 0x028721),\n    (hex!(\"0100000000333333334444444455000001cc0000000000005280\"), 0x0287c1),\n    (hex!(\"0100000000333333334444444455000001cc0000000000005d90\"), 0x028861),\n    (hex!(\"0100000000333333334444444455000001cd0000000000001ce0\"), 0x028901),\n    (hex!(\"0100000000333333334444444455000001cd00000000000069b0\"), 0x0289a1),\n    (hex!(\"0100000000333333334444444455000001ce0000000000001cf0\"), 0x028a41),\n    (hex!(\"0100000000333333334444444455000001ce0000000000004540\"), 0x028ae1),\n    (hex!(\"0100000000333333334444444455000001cf0000000000001d00\"), 0x028b81),\n    (hex!(\"0100000000333333334444444455000001cf00000000000076a0\"), 0x028c21),\n    (hex!(\"0100000000333333334444444455000001d00000000000001d10\"), 0x028cc1),\n    (hex!(\"0100000000333333334444444455000001d000000000000060a0\"), 0x028d61),\n    (hex!(\"0100000000333333334444444455000001d10000000000001d20\"), 0x028e01),\n    (hex!(\"0100000000333333334444444455000001d20000000000001d30\"), 0x028ea1),\n    (hex!(\"0100000000333333334444444455000001d30000000000001d40\"), 0x028f41),\n    (hex!(\"0100000000333333334444444455000001d30000000000004000\"), 0x028fe1),\n    (hex!(\"0100000000333333334444444455000001d30000000000004140\"), 0x029081),\n    (hex!(\"0100000000333333334444444455000001d30000000000006790\"), 0x029121),\n    (hex!(\"0100000000333333334444444455000001d40000000000001d50\"), 0x0291c1),\n    (hex!(\"0100000000333333334444444455000001d50000000000001d60\"), 0x029261),\n    (hex!(\"0100000000333333334444444455000001d60000000000001d70\"), 0x029301),\n    (hex!(\"0100000000333333334444444455000001d60000000000004b50\"), 0x0293a1),\n    (hex!(\"0100000000333333334444444455000001d60000000000007430\"), 0x029441),\n    (hex!(\"0100000000333333334444444455000001d70000000000001d80\"), 0x0294e1),\n    (hex!(\"0100000000333333334444444455000001d70000000000006920\"), 0x029581),\n    (hex!(\"0100000000333333334444444455000001d80000000000001d90\"), 0x029621),\n    (hex!(\"0100000000333333334444444455000001d80000000000005b30\"), 0x0296c1),\n    (hex!(\"0100000000333333334444444455000001d90000000000001da0\"), 0x029761),\n    (hex!(\"0100000000333333334444444455000001da0000000000001db0\"), 0x029801),\n    (hex!(\"0100000000333333334444444455000001da0000000000004af0\"), 0x0298a1),\n    (hex!(\"0100000000333333334444444455000001da0000000000007240\"), 0x029941),\n    (hex!(\"0100000000333333334444444455000001da0000000000007470\"), 0x0299e1),\n    (hex!(\"0100000000333333334444444455000001db0000000000001dc0\"), 0x029a81),\n    (hex!(\"0100000000333333334444444455000001db00000000000045d0\"), 0x029b21),\n    (hex!(\"0100000000333333334444444455000001dc0000000000001dd0\"), 0x029bc1),\n    (hex!(\"0100000000333333334444444455000001dd0000000000001de0\"), 0x029c61),\n    (hex!(\"0100000000333333334444444455000001dd0000000000004bb0\"), 0x029d01),\n    (hex!(\"0100000000333333334444444455000001dd0000000000004cd0\"), 0x029da1),\n    (hex!(\"0100000000333333334444444455000001dd0000000000006100\"), 0x029e41),\n    (hex!(\"0100000000333333334444444455000001dd0000000000007bb0\"), 0x029ee1),\n    (hex!(\"0100000000333333334444444455000001de0000000000001df0\"), 0x029f81),\n    (hex!(\"0100000000333333334444444455000001de0000000000004260\"), 0x02a021),\n    (hex!(\"0100000000333333334444444455000001de0000000000006040\"), 0x02a0c1),\n    (hex!(\"0100000000333333334444444455000001df0000000000001e00\"), 0x02a161),\n    (hex!(\"0100000000333333334444444455000001df0000000000005fa0\"), 0x02a201),\n    (hex!(\"0100000000333333334444444455000001df0000000000006a70\"), 0x02a2a1),\n    (hex!(\"0100000000333333334444444455000001df0000000000006dc0\"), 0x02a341),\n    (hex!(\"0100000000333333334444444455000001e00000000000001e10\"), 0x02a3e1),\n    (hex!(\"0100000000333333334444444455000001e00000000000007010\"), 0x02a481),\n    (hex!(\"0100000000333333334444444455000001e10000000000001e20\"), 0x02a521),\n    (hex!(\"0100000000333333334444444455000001e10000000000005720\"), 0x02a5c1),\n    (hex!(\"0100000000333333334444444455000001e10000000000006830\"), 0x02a661),\n    (hex!(\"0100000000333333334444444455000001e20000000000001e30\"), 0x02a701),\n    (hex!(\"0100000000333333334444444455000001e20000000000005100\"), 0x02a7a1),\n    (hex!(\"0100000000333333334444444455000001e30000000000001e40\"), 0x02a841),\n    (hex!(\"0100000000333333334444444455000001e40000000000001e50\"), 0x02a8e1),\n    (hex!(\"0100000000333333334444444455000001e40000000000003f30\"), 0x02a981),\n    (hex!(\"0100000000333333334444444455000001e40000000000005220\"), 0x02aa21),\n    (hex!(\"0100000000333333334444444455000001e50000000000001e60\"), 0x02aac1),\n    (hex!(\"0100000000333333334444444455000001e50000000000006f60\"), 0x02ab61),\n    (hex!(\"0100000000333333334444444455000001e60000000000001e70\"), 0x02ac01),\n    (hex!(\"0100000000333333334444444455000001e60000000000006c80\"), 0x02aca1),\n    (hex!(\"0100000000333333334444444455000001e70000000000001e80\"), 0x02ad41),\n    (hex!(\"0100000000333333334444444455000001e80000000000001e90\"), 0x02ade1),\n    (hex!(\"0100000000333333334444444455000001e80000000000004e30\"), 0x02ae81),\n    (hex!(\"0100000000333333334444444455000001e90000000000001ea0\"), 0x02af21),\n    (hex!(\"0100000000333333334444444455000001e90000000000005470\"), 0x02afc1),\n    (hex!(\"0100000000333333334444444455000001ea0000000000001eb0\"), 0x02b061),\n    (hex!(\"0100000000333333334444444455000001ea0000000000007980\"), 0x02b101),\n    (hex!(\"0100000000333333334444444455000001eb0000000000001ec0\"), 0x02b1a1),\n    (hex!(\"0100000000333333334444444455000001eb0000000000004390\"), 0x02b241),\n    (hex!(\"0100000000333333334444444455000001eb0000000000005970\"), 0x02b2e1),\n    (hex!(\"0100000000333333334444444455000001ec0000000000001ed0\"), 0x02b381),\n    (hex!(\"0100000000333333334444444455000001ec0000000000005d50\"), 0x02b421),\n    (hex!(\"0100000000333333334444444455000001ec00000000000076e0\"), 0x02b4c1),\n    (hex!(\"0100000000333333334444444455000001ed0000000000001ee0\"), 0x02b561),\n    (hex!(\"0100000000333333334444444455000001ed0000000000006190\"), 0x02b601),\n    (hex!(\"0100000000333333334444444455000001ee0000000000001ef0\"), 0x02b6a1),\n    (hex!(\"0100000000333333334444444455000001ee0000000000004900\"), 0x02b741),\n    (hex!(\"0100000000333333334444444455000001ef0000000000001f00\"), 0x02b7e1),\n    (hex!(\"0100000000333333334444444455000001ef0000000000006c60\"), 0x02b881),\n    (hex!(\"0100000000333333334444444455000001f00000000000001f10\"), 0x02b921),\n    (hex!(\"0100000000333333334444444455000001f00000000000006950\"), 0x02b9c1),\n    (hex!(\"0100000000333333334444444455000001f10000000000001f20\"), 0x02ba61),\n    (hex!(\"0100000000333333334444444455000001f10000000000006400\"), 0x02bb01),\n    (hex!(\"0100000000333333334444444455000001f20000000000001f30\"), 0x02bba1),\n    (hex!(\"0100000000333333334444444455000001f20000000000006f00\"), 0x02bc41),\n    (hex!(\"0100000000333333334444444455000001f20000000000007b10\"), 0x02bce1),\n    (hex!(\"0100000000333333334444444455000001f30000000000001f40\"), 0x02bd81),\n    (hex!(\"0100000000333333334444444455000001f40000000000001f50\"), 0x02be21),\n    (hex!(\"0100000000333333334444444455000001f50000000000001f60\"), 0x02bec1),\n    (hex!(\"0100000000333333334444444455000001f500000000000044f0\"), 0x02bf61),\n    (hex!(\"0100000000333333334444444455000001f60000000000001f70\"), 0x02c001),\n    (hex!(\"0100000000333333334444444455000001f70000000000001f80\"), 0x02c0a1),\n    (hex!(\"0100000000333333334444444455000001f70000000000004ad0\"), 0x02c141),\n    (hex!(\"0100000000333333334444444455000001f80000000000001f90\"), 0x02c1e1),\n    (hex!(\"0100000000333333334444444455000001f90000000000001fa0\"), 0x02c281),\n    (hex!(\"0100000000333333334444444455000001f90000000000003f60\"), 0x02c321),\n    (hex!(\"0100000000333333334444444455000001f90000000000004a80\"), 0x02c3c1),\n    (hex!(\"0100000000333333334444444455000001fa0000000000001fb0\"), 0x02c461),\n    (hex!(\"0100000000333333334444444455000001fa0000000000006f90\"), 0x02c501),\n    (hex!(\"0100000000333333334444444455000001fb0000000000001fc0\"), 0x02c5a1),\n    (hex!(\"0100000000333333334444444455000001fc0000000000001fd0\"), 0x02c641),\n    (hex!(\"0100000000333333334444444455000001fc0000000000004a90\"), 0x02c6e1),\n    (hex!(\"0100000000333333334444444455000001fd0000000000001fe0\"), 0x02c781),\n    (hex!(\"0100000000333333334444444455000001fd0000000000005f50\"), 0x02c821),\n    (hex!(\"0100000000333333334444444455000001fe0000000000001ff0\"), 0x02c8c1),\n    (hex!(\"0100000000333333334444444455000001ff0000000000002000\"), 0x02c961),\n    (hex!(\"0100000000333333334444444455000002000000000000002010\"), 0x02ca01),\n    (hex!(\"0100000000333333334444444455000002000000000000005f00\"), 0x02caa1),\n    (hex!(\"0100000000333333334444444455000002000000000000006840\"), 0x02cb41),\n    (hex!(\"0100000000333333334444444455000002010000000000002020\"), 0x02cbe1),\n    (hex!(\"0100000000333333334444444455000002020000000000002030\"), 0x02cc81),\n    (hex!(\"0100000000333333334444444455000002030000000000002040\"), 0x02cd21),\n    (hex!(\"0100000000333333334444444455000002040000000000002050\"), 0x02cdc1),\n    (hex!(\"01000000003333333344444444550000020400000000000051f0\"), 0x02ce61),\n    (hex!(\"0100000000333333334444444455000002050000000000002060\"), 0x02cf01),\n    (hex!(\"0100000000333333334444444455000002060000000000002070\"), 0x02cfa1),\n    (hex!(\"0100000000333333334444444455000002060000000000005c80\"), 0x02d041),\n    (hex!(\"01000000003333333344444444550000020600000000000061d0\"), 0x02d0e1),\n    (hex!(\"01000000003333333344444444550000020600000000000078c0\"), 0x02d181),\n    (hex!(\"0100000000333333334444444455000002070000000000002080\"), 0x02d221),\n    (hex!(\"0100000000333333334444444455000002070000000000006ba0\"), 0x02d2c1),\n    (hex!(\"0100000000333333334444444455000002080000000000002090\"), 0x02d361),\n    (hex!(\"01000000003333333344444444550000020900000000000020a0\"), 0x02d401),\n    (hex!(\"01000000003333333344444444550000020900000000000067a0\"), 0x02d4a1),\n    (hex!(\"01000000003333333344444444550000020a00000000000020b0\"), 0x02d541),\n    (hex!(\"01000000003333333344444444550000020a0000000000004950\"), 0x02d5e1),\n    (hex!(\"01000000003333333344444444550000020a0000000000004de0\"), 0x02d681),\n    (hex!(\"01000000003333333344444444550000020b00000000000020c0\"), 0x02d721),\n    (hex!(\"01000000003333333344444444550000020b0000000000004b00\"), 0x02d7c1),\n    (hex!(\"01000000003333333344444444550000020c00000000000020d0\"), 0x02d861),\n    (hex!(\"01000000003333333344444444550000020d00000000000020e0\"), 0x02d901),\n    (hex!(\"01000000003333333344444444550000020e00000000000020f0\"), 0x02d9a1),\n    (hex!(\"01000000003333333344444444550000020f0000000000002100\"), 0x02da41),\n    (hex!(\"0100000000333333334444444455000002100000000000002110\"), 0x02dae1),\n    (hex!(\"0100000000333333334444444455000002110000000000002120\"), 0x02db81),\n    (hex!(\"0100000000333333334444444455000002110000000000004490\"), 0x02dc21),\n    (hex!(\"0100000000333333334444444455000002120000000000002130\"), 0x02dcc1),\n    (hex!(\"0100000000333333334444444455000002130000000000002140\"), 0x02dd61),\n    (hex!(\"01000000003333333344444444550000021300000000000046d0\"), 0x02de01),\n    (hex!(\"01000000003333333344444444550000021300000000000046e0\"), 0x02dea1),\n    (hex!(\"0100000000333333334444444455000002130000000000004b70\"), 0x02df41),\n    (hex!(\"0100000000333333334444444455000002140000000000002150\"), 0x02dfe1),\n    (hex!(\"0100000000333333334444444455000002140000000000006c50\"), 0x02e081),\n    (hex!(\"0100000000333333334444444455000002150000000000002160\"), 0x02e121),\n    (hex!(\"01000000003333333344444444550000021500000000000043c0\"), 0x02e1c1),\n    (hex!(\"0100000000333333334444444455000002160000000000002170\"), 0x02e261),\n    (hex!(\"01000000003333333344444444550000021600000000000055b0\"), 0x02e301),\n    (hex!(\"0100000000333333334444444455000002160000000000006150\"), 0x02e3a1),\n    (hex!(\"0100000000333333334444444455000002170000000000002180\"), 0x02e441),\n    (hex!(\"01000000003333333344444444550000021700000000000053b0\"), 0x02e4e1),\n    (hex!(\"0100000000333333334444444455000002170000000000007460\"), 0x02e581),\n    (hex!(\"0100000000333333334444444455000002180000000000002190\"), 0x02e621),\n    (hex!(\"01000000003333333344444444550000021900000000000021a0\"), 0x02e6c1),\n    (hex!(\"01000000003333333344444444550000021a00000000000021b0\"), 0x02e761),\n    (hex!(\"01000000003333333344444444550000021a0000000000007650\"), 0x02e801),\n    (hex!(\"01000000003333333344444444550000021b00000000000021c0\"), 0x02e8a1),\n    (hex!(\"01000000003333333344444444550000021b0000000000004b20\"), 0x02e941),\n    (hex!(\"01000000003333333344444444550000021c00000000000021d0\"), 0x02e9e1),\n    (hex!(\"01000000003333333344444444550000021c0000000000007610\"), 0x02ea81),\n    (hex!(\"01000000003333333344444444550000021d00000000000021e0\"), 0x02eb21),\n    (hex!(\"01000000003333333344444444550000021d0000000000005f40\"), 0x02ebc1),\n    (hex!(\"01000000003333333344444444550000021e00000000000021f0\"), 0x02ec61),\n    (hex!(\"01000000003333333344444444550000021e0000000000005a50\"), 0x02ed01),\n    (hex!(\"01000000003333333344444444550000021e0000000000005ff0\"), 0x02eda1),\n    (hex!(\"01000000003333333344444444550000021f0000000000002200\"), 0x02ee41),\n    (hex!(\"01000000003333333344444444550000021f00000000000043a0\"), 0x02eee1),\n    (hex!(\"01000000003333333344444444550000021f0000000000004cb0\"), 0x02ef81),\n    (hex!(\"01000000003333333344444444550000021f0000000000004e00\"), 0x02f021),\n    (hex!(\"0100000000333333334444444455000002200000000000002210\"), 0x02f0c1),\n    (hex!(\"0100000000333333334444444455000002210000000000002220\"), 0x02f161),\n    (hex!(\"0100000000333333334444444455000002210000000000006290\"), 0x02f201),\n    (hex!(\"0100000000333333334444444455000002210000000000007230\"), 0x02f2a1),\n    (hex!(\"0100000000333333334444444455000002220000000000002230\"), 0x02f341),\n    (hex!(\"0100000000333333334444444455000002220000000000006ea0\"), 0x02f3e1),\n    (hex!(\"0100000000333333334444444455000002230000000000002240\"), 0x02f481),\n    (hex!(\"0100000000333333334444444455000002230000000000004710\"), 0x02f521),\n    (hex!(\"0100000000333333334444444455000002240000000000002250\"), 0x02f5c1),\n    (hex!(\"0100000000333333334444444455000002250000000000002260\"), 0x02f661),\n    (hex!(\"0100000000333333334444444455000002260000000000002270\"), 0x02f701),\n    (hex!(\"0100000000333333334444444455000002260000000000005b40\"), 0x02f7a1),\n    (hex!(\"0100000000333333334444444455000002260000000000006300\"), 0x02f841),\n    (hex!(\"0100000000333333334444444455000002270000000000002280\"), 0x02f8e1),\n    (hex!(\"0100000000333333334444444455000002270000000000005b80\"), 0x02f981),\n    (hex!(\"0100000000333333334444444455000002280000000000002290\"), 0x02fa21),\n    (hex!(\"0100000000333333334444444455000002280000000000003ed0\"), 0x02fac1),\n    (hex!(\"0100000000333333334444444455000002280000000000004550\"), 0x02fb61),\n    (hex!(\"01000000003333333344444444550000022800000000000077d0\"), 0x02fc01),\n    (hex!(\"01000000003333333344444444550000022900000000000022a0\"), 0x02fca1),\n    (hex!(\"0100000000333333334444444455000002290000000000006480\"), 0x02fd41),\n    (hex!(\"01000000003333333344444444550000022a00000000000022b0\"), 0x02fde1),\n    (hex!(\"01000000003333333344444444550000022a0000000000005450\"), 0x02fe81),\n    (hex!(\"01000000003333333344444444550000022b00000000000022c0\"), 0x02ff21),\n    (hex!(\"01000000003333333344444444550000022b0000000000006dd0\"), 0x02ffc1),\n    (hex!(\"01000000003333333344444444550000022c00000000000022d0\"), 0x030061),\n    (hex!(\"01000000003333333344444444550000022c0000000000006890\"), 0x030101),\n    (hex!(\"01000000003333333344444444550000022d00000000000022e0\"), 0x0301a1),\n    (hex!(\"01000000003333333344444444550000022e00000000000022f0\"), 0x030241),\n    (hex!(\"01000000003333333344444444550000022e0000000000004f20\"), 0x0302e1),\n    (hex!(\"01000000003333333344444444550000022f0000000000002300\"), 0x030381),\n    (hex!(\"01000000003333333344444444550000022f0000000000005260\"), 0x030421),\n    (hex!(\"01000000003333333344444444550000022f00000000000053f0\"), 0x0304c1),\n    (hex!(\"0100000000333333334444444455000002300000000000002310\"), 0x030561),\n    (hex!(\"01000000003333333344444444550000023000000000000050e0\"), 0x030601),\n    (hex!(\"0100000000333333334444444455000002310000000000002320\"), 0x0306a1),\n    (hex!(\"0100000000333333334444444455000002310000000000007800\"), 0x030741),\n    (hex!(\"0100000000333333334444444455000002320000000000002330\"), 0x0307e1),\n    (hex!(\"0100000000333333334444444455000002330000000000002340\"), 0x030881),\n    (hex!(\"0100000000333333334444444455000002330000000000004d70\"), 0x030921),\n    (hex!(\"0100000000333333334444444455000002330000000000005cf0\"), 0x0309c1),\n    (hex!(\"0100000000333333334444444455000002340000000000002350\"), 0x030a61),\n    (hex!(\"0100000000333333334444444455000002350000000000002360\"), 0x030b01),\n    (hex!(\"0100000000333333334444444455000002350000000000006970\"), 0x030ba1),\n    (hex!(\"0100000000333333334444444455000002360000000000002370\"), 0x030c41),\n    (hex!(\"0100000000333333334444444455000002360000000000005270\"), 0x030ce1),\n    (hex!(\"0100000000333333334444444455000002370000000000002380\"), 0x030d81),\n    (hex!(\"0100000000333333334444444455000002370000000000005d70\"), 0x030e21),\n    (hex!(\"0100000000333333334444444455000002380000000000002390\"), 0x030ec1),\n    (hex!(\"01000000003333333344444444550000023800000000000069a0\"), 0x030f61),\n    (hex!(\"01000000003333333344444444550000023900000000000023a0\"), 0x031001),\n    (hex!(\"01000000003333333344444444550000023900000000000052e0\"), 0x0310a1),\n    (hex!(\"0100000000333333334444444455000002390000000000005a10\"), 0x031141),\n    (hex!(\"0100000000333333334444444455000002390000000000007440\"), 0x0311e1),\n    (hex!(\"01000000003333333344444444550000023a00000000000023b0\"), 0x031281),\n    (hex!(\"01000000003333333344444444550000023a0000000000003f00\"), 0x031321),\n    (hex!(\"01000000003333333344444444550000023a0000000000004430\"), 0x0313c1),\n    (hex!(\"01000000003333333344444444550000023a0000000000007070\"), 0x031461),\n    (hex!(\"01000000003333333344444444550000023a00000000000074a0\"), 0x031501),\n    (hex!(\"01000000003333333344444444550000023b00000000000023c0\"), 0x0315a1),\n    (hex!(\"01000000003333333344444444550000023b0000000000004730\"), 0x031641),\n    (hex!(\"01000000003333333344444444550000023b00000000000068b0\"), 0x0316e1),\n    (hex!(\"01000000003333333344444444550000023c00000000000023d0\"), 0x031781),\n    (hex!(\"01000000003333333344444444550000023c0000000000004680\"), 0x031821),\n    (hex!(\"01000000003333333344444444550000023d00000000000023e0\"), 0x0318c1),\n    (hex!(\"01000000003333333344444444550000023d00000000000059a0\"), 0x031961),\n    (hex!(\"01000000003333333344444444550000023e00000000000023f0\"), 0x031a01),\n    (hex!(\"01000000003333333344444444550000023f0000000000002400\"), 0x031aa1),\n    (hex!(\"0100000000333333334444444455000002400000000000002410\"), 0x031b41),\n    (hex!(\"0100000000333333334444444455000002400000000000004920\"), 0x031be1),\n    (hex!(\"01000000003333333344444444550000024000000000000066e0\"), 0x031c81),\n    (hex!(\"01000000003333333344444444550000024000000000000076f0\"), 0x031d21),\n    (hex!(\"01000000003333333344444444550000024000000000000078e0\"), 0x031dc1),\n    (hex!(\"0100000000333333334444444455000002410000000000002420\"), 0x031e61),\n    (hex!(\"0100000000333333334444444455000002420000000000002430\"), 0x031f01),\n    (hex!(\"0100000000333333334444444455000002420000000000006590\"), 0x031fa1),\n    (hex!(\"0100000000333333334444444455000002430000000000002440\"), 0x032041),\n    (hex!(\"0100000000333333334444444455000002430000000000004d00\"), 0x0320e1),\n    (hex!(\"0100000000333333334444444455000002440000000000002450\"), 0x032181),\n    (hex!(\"0100000000333333334444444455000002440000000000005f80\"), 0x032221),\n    (hex!(\"0100000000333333334444444455000002450000000000002460\"), 0x0322c1),\n    (hex!(\"0100000000333333334444444455000002450000000000004940\"), 0x032361),\n    (hex!(\"0100000000333333334444444455000002460000000000002470\"), 0x032401),\n    (hex!(\"0100000000333333334444444455000002470000000000002480\"), 0x0324a1),\n    (hex!(\"0100000000333333334444444455000002470000000000004dd0\"), 0x032541),\n    (hex!(\"0100000000333333334444444455000002470000000000005930\"), 0x0325e1),\n    (hex!(\"01000000003333333344444444550000024700000000000061b0\"), 0x032681),\n    (hex!(\"0100000000333333334444444455000002470000000000007740\"), 0x032721),\n    (hex!(\"0100000000333333334444444455000002480000000000002490\"), 0x0327c1),\n    (hex!(\"0100000000333333334444444455000002480000000000004890\"), 0x032861),\n    (hex!(\"01000000003333333344444444550000024900000000000024a0\"), 0x032901),\n    (hex!(\"01000000003333333344444444550000024a00000000000024b0\"), 0x0329a1),\n    (hex!(\"01000000003333333344444444550000024b00000000000024c0\"), 0x032a41),\n    (hex!(\"01000000003333333344444444550000024c00000000000024d0\"), 0x032ae1),\n    (hex!(\"01000000003333333344444444550000024d00000000000024e0\"), 0x032b81),\n    (hex!(\"01000000003333333344444444550000024d0000000000004070\"), 0x032c21),\n    (hex!(\"01000000003333333344444444550000024e00000000000024f0\"), 0x032cc1),\n    (hex!(\"01000000003333333344444444550000024e00000000000066a0\"), 0x032d61),\n    (hex!(\"01000000003333333344444444550000024e0000000000006ab0\"), 0x032e01),\n    (hex!(\"01000000003333333344444444550000024f0000000000002500\"), 0x032ea1),\n    (hex!(\"0100000000333333334444444455000002500000000000002510\"), 0x032f41),\n    (hex!(\"0100000000333333334444444455000002510000000000002520\"), 0x032fe1),\n    (hex!(\"0100000000333333334444444455000002510000000000007320\"), 0x033081),\n    (hex!(\"0100000000333333334444444455000002520000000000002530\"), 0x033121),\n    (hex!(\"0100000000333333334444444455000002520000000000006410\"), 0x0331c1),\n    (hex!(\"0100000000333333334444444455000002530000000000002540\"), 0x033261),\n    (hex!(\"0100000000333333334444444455000002530000000000005110\"), 0x033301),\n    (hex!(\"0100000000333333334444444455000002540000000000002550\"), 0x0333a1),\n    (hex!(\"01000000003333333344444444550000025400000000000040c0\"), 0x033441),\n    (hex!(\"0100000000333333334444444455000002540000000000006a40\"), 0x0334e1),\n    (hex!(\"0100000000333333334444444455000002550000000000002560\"), 0x033581),\n    (hex!(\"0100000000333333334444444455000002550000000000005190\"), 0x033621),\n    (hex!(\"0100000000333333334444444455000002560000000000002570\"), 0x0336c1),\n    (hex!(\"01000000003333333344444444550000025600000000000061f0\"), 0x033761),\n    (hex!(\"0100000000333333334444444455000002570000000000002580\"), 0x033801),\n    (hex!(\"0100000000333333334444444455000002580000000000002590\"), 0x0338a1),\n    (hex!(\"01000000003333333344444444550000025800000000000043d0\"), 0x033941),\n    (hex!(\"01000000003333333344444444550000025900000000000025a0\"), 0x0339e1),\n    (hex!(\"0100000000333333334444444455000002590000000000006bb0\"), 0x033a81),\n    (hex!(\"01000000003333333344444444550000025a00000000000025b0\"), 0x033b21),\n    (hex!(\"01000000003333333344444444550000025a0000000000005fb0\"), 0x033bc1),\n    (hex!(\"01000000003333333344444444550000025a00000000000064c0\"), 0x033c61),\n    (hex!(\"01000000003333333344444444550000025b00000000000025c0\"), 0x033d01),\n    (hex!(\"01000000003333333344444444550000025b0000000000005c10\"), 0x033da1),\n    (hex!(\"01000000003333333344444444550000025c00000000000025d0\"), 0x033e41),\n    (hex!(\"01000000003333333344444444550000025c0000000000007d00\"), 0x033ee1),\n    (hex!(\"01000000003333333344444444550000025d00000000000025e0\"), 0x033f81),\n    (hex!(\"01000000003333333344444444550000025e00000000000025f0\"), 0x034021),\n    (hex!(\"01000000003333333344444444550000025e00000000000045e0\"), 0x0340c1),\n    (hex!(\"01000000003333333344444444550000025e0000000000006ee0\"), 0x034161),\n    (hex!(\"01000000003333333344444444550000025f0000000000002600\"), 0x034201),\n    (hex!(\"01000000003333333344444444550000025f00000000000050b0\"), 0x0342a1),\n    (hex!(\"01000000003333333344444444550000025f0000000000007690\"), 0x034341),\n    (hex!(\"0100000000333333334444444455000002600000000000002610\"), 0x0343e1),\n    (hex!(\"0100000000333333334444444455000002600000000000007b60\"), 0x034481),\n    (hex!(\"0100000000333333334444444455000002610000000000002620\"), 0x034521),\n    (hex!(\"0100000000333333334444444455000002620000000000002630\"), 0x0345c1),\n    (hex!(\"0100000000333333334444444455000002630000000000002640\"), 0x034661),\n    (hex!(\"0100000000333333334444444455000002640000000000002650\"), 0x034701),\n    (hex!(\"0100000000333333334444444455000002650000000000002660\"), 0x0347a1),\n    (hex!(\"0100000000333333334444444455000002650000000000006180\"), 0x034841),\n    (hex!(\"0100000000333333334444444455000002660000000000002670\"), 0x0348e1),\n    (hex!(\"0100000000333333334444444455000002660000000000005430\"), 0x034981),\n    (hex!(\"0100000000333333334444444455000002660000000000007a60\"), 0x034a21),\n    (hex!(\"0100000000333333334444444455000002670000000000002680\"), 0x034ac1),\n    (hex!(\"01000000003333333344444444550000026700000000000077f0\"), 0x034b61),\n    (hex!(\"0100000000333333334444444455000002680000000000002690\"), 0x034c01),\n    (hex!(\"01000000003333333344444444550000026900000000000026a0\"), 0x034ca1),\n    (hex!(\"01000000003333333344444444550000026a00000000000026b0\"), 0x034d41),\n    (hex!(\"01000000003333333344444444550000026a0000000000007530\"), 0x034de1),\n    (hex!(\"01000000003333333344444444550000026b00000000000026c0\"), 0x034e81),\n    (hex!(\"01000000003333333344444444550000026b00000000000058b0\"), 0x034f21),\n    (hex!(\"01000000003333333344444444550000026b00000000000066b0\"), 0x034fc1),\n    (hex!(\"01000000003333333344444444550000026b0000000000006b10\"), 0x035061),\n    (hex!(\"01000000003333333344444444550000026c00000000000026d0\"), 0x035101),\n    (hex!(\"01000000003333333344444444550000026d00000000000026e0\"), 0x0351a1),\n    (hex!(\"01000000003333333344444444550000026d0000000000004210\"), 0x035241),\n    (hex!(\"01000000003333333344444444550000026d0000000000005490\"), 0x0352e1),\n    (hex!(\"01000000003333333344444444550000026d0000000000005e60\"), 0x035381),\n    (hex!(\"01000000003333333344444444550000026d00000000000068e0\"), 0x035421),\n    (hex!(\"01000000003333333344444444550000026d0000000000007020\"), 0x0354c1),\n    (hex!(\"01000000003333333344444444550000026d0000000000007300\"), 0x035561),\n    (hex!(\"01000000003333333344444444550000026e00000000000026f0\"), 0x035601),\n    (hex!(\"01000000003333333344444444550000026f0000000000002700\"), 0x0356a1),\n    (hex!(\"01000000003333333344444444550000026f0000000000004910\"), 0x035741),\n    (hex!(\"0100000000333333334444444455000002700000000000002710\"), 0x0357e1),\n    (hex!(\"0100000000333333334444444455000002710000000000002720\"), 0x035881),\n    (hex!(\"01000000003333333344444444550000027100000000000050c0\"), 0x035921),\n    (hex!(\"0100000000333333334444444455000002720000000000002730\"), 0x0359c1),\n    (hex!(\"0100000000333333334444444455000002730000000000002740\"), 0x035a61),\n    (hex!(\"0100000000333333334444444455000002740000000000002750\"), 0x035b01),\n    (hex!(\"0100000000333333334444444455000002740000000000007490\"), 0x035ba1),\n    (hex!(\"0100000000333333334444444455000002750000000000002760\"), 0x035c41),\n    (hex!(\"0100000000333333334444444455000002760000000000002770\"), 0x035ce1),\n    (hex!(\"0100000000333333334444444455000002760000000000004790\"), 0x035d81),\n    (hex!(\"0100000000333333334444444455000002770000000000002780\"), 0x035e21),\n    (hex!(\"01000000003333333344444444550000027700000000000050a0\"), 0x035ec1),\n    (hex!(\"0100000000333333334444444455000002780000000000002790\"), 0x035f61),\n    (hex!(\"0100000000333333334444444455000002780000000000004330\"), 0x036001),\n    (hex!(\"0100000000333333334444444455000002780000000000006b00\"), 0x0360a1),\n    (hex!(\"01000000003333333344444444550000027900000000000027a0\"), 0x036141),\n    (hex!(\"01000000003333333344444444550000027a00000000000027b0\"), 0x0361e1),\n    (hex!(\"01000000003333333344444444550000027b00000000000027c0\"), 0x036281),\n    (hex!(\"01000000003333333344444444550000027b0000000000004930\"), 0x036321),\n    (hex!(\"01000000003333333344444444550000027b0000000000006250\"), 0x0363c1),\n    (hex!(\"01000000003333333344444444550000027c00000000000027d0\"), 0x036461),\n    (hex!(\"01000000003333333344444444550000027d00000000000027e0\"), 0x036501),\n    (hex!(\"01000000003333333344444444550000027d0000000000005ce0\"), 0x0365a1),\n    (hex!(\"01000000003333333344444444550000027d0000000000005fe0\"), 0x036641),\n    (hex!(\"01000000003333333344444444550000027e00000000000027f0\"), 0x0366e1),\n    (hex!(\"01000000003333333344444444550000027f0000000000002800\"), 0x036781),\n    (hex!(\"01000000003333333344444444550000027f0000000000003e90\"), 0x036821),\n    (hex!(\"01000000003333333344444444550000027f0000000000007910\"), 0x0368c1),\n    (hex!(\"0100000000333333334444444455000002800000000000002810\"), 0x036961),\n    (hex!(\"0100000000333333334444444455000002800000000000004990\"), 0x036a01),\n    (hex!(\"0100000000333333334444444455000002800000000000006160\"), 0x036aa1),\n    (hex!(\"0100000000333333334444444455000002800000000000006740\"), 0x036b41),\n    (hex!(\"0100000000333333334444444455000002810000000000002820\"), 0x036be1),\n    (hex!(\"0100000000333333334444444455000002820000000000002830\"), 0x036c81),\n    (hex!(\"0100000000333333334444444455000002820000000000005170\"), 0x036d21),\n    (hex!(\"0100000000333333334444444455000002830000000000002840\"), 0x036dc1),\n    (hex!(\"0100000000333333334444444455000002840000000000002850\"), 0x036e61),\n    (hex!(\"0100000000333333334444444455000002840000000000004810\"), 0x036f01),\n    (hex!(\"0100000000333333334444444455000002840000000000006aa0\"), 0x036fa1),\n    (hex!(\"0100000000333333334444444455000002850000000000002860\"), 0x037041),\n    (hex!(\"0100000000333333334444444455000002860000000000002870\"), 0x0370e1),\n    (hex!(\"0100000000333333334444444455000002860000000000005080\"), 0x037181),\n    (hex!(\"0100000000333333334444444455000002870000000000002880\"), 0x037221),\n    (hex!(\"0100000000333333334444444455000002870000000000004e60\"), 0x0372c1),\n    (hex!(\"0100000000333333334444444455000002880000000000002890\"), 0x037361),\n    (hex!(\"0100000000333333334444444455000002880000000000005060\"), 0x037401),\n    (hex!(\"0100000000333333334444444455000002880000000000006f20\"), 0x0374a1),\n    (hex!(\"01000000003333333344444444550000028900000000000028a0\"), 0x037541),\n    (hex!(\"01000000003333333344444444550000028900000000000047e0\"), 0x0375e1),\n    (hex!(\"01000000003333333344444444550000028a00000000000028b0\"), 0x037681),\n    (hex!(\"01000000003333333344444444550000028a0000000000005ab0\"), 0x037721),\n    (hex!(\"01000000003333333344444444550000028a0000000000007130\"), 0x0377c1),\n    (hex!(\"01000000003333333344444444550000028a0000000000007660\"), 0x037861),\n    (hex!(\"01000000003333333344444444550000028b00000000000028c0\"), 0x037901),\n    (hex!(\"01000000003333333344444444550000028b00000000000054e0\"), 0x0379a1),\n    (hex!(\"01000000003333333344444444550000028c00000000000028d0\"), 0x037a41),\n    (hex!(\"01000000003333333344444444550000028c00000000000046f0\"), 0x037ae1),\n    (hex!(\"01000000003333333344444444550000028c00000000000061a0\"), 0x037b81),\n    (hex!(\"01000000003333333344444444550000028d00000000000028e0\"), 0x037c21),\n    (hex!(\"01000000003333333344444444550000028e00000000000028f0\"), 0x037cc1),\n    (hex!(\"01000000003333333344444444550000028e0000000000004130\"), 0x037d61),\n    (hex!(\"01000000003333333344444444550000028f0000000000002900\"), 0x037e01),\n    (hex!(\"01000000003333333344444444550000028f0000000000007510\"), 0x037ea1),\n    (hex!(\"0100000000333333334444444455000002900000000000002910\"), 0x037f41),\n    (hex!(\"0100000000333333334444444455000002900000000000004a40\"), 0x037fe1),\n    (hex!(\"0100000000333333334444444455000002910000000000002920\"), 0x038081),\n    (hex!(\"0100000000333333334444444455000002920000000000002930\"), 0x038121),\n    (hex!(\"0100000000333333334444444455000002920000000000004e90\"), 0x0381c1),\n    (hex!(\"0100000000333333334444444455000002930000000000002940\"), 0x038261),\n    (hex!(\"0100000000333333334444444455000002930000000000006880\"), 0x038301),\n    (hex!(\"0100000000333333334444444455000002940000000000002950\"), 0x0383a1),\n    (hex!(\"0100000000333333334444444455000002940000000000007bc0\"), 0x038441),\n    (hex!(\"0100000000333333334444444455000002950000000000002960\"), 0x0384e1),\n    (hex!(\"0100000000333333334444444455000002960000000000002970\"), 0x038581),\n    (hex!(\"01000000003333333344444444550000029600000000000059d0\"), 0x038621),\n    (hex!(\"0100000000333333334444444455000002970000000000002980\"), 0x0386c1),\n    (hex!(\"0100000000333333334444444455000002970000000000004a50\"), 0x038761),\n    (hex!(\"0100000000333333334444444455000002970000000000005f20\"), 0x038801),\n    (hex!(\"01000000003333333344444444550000029700000000000068d0\"), 0x0388a1),\n    (hex!(\"0100000000333333334444444455000002980000000000002990\"), 0x038941),\n    (hex!(\"0100000000333333334444444455000002980000000000004370\"), 0x0389e1),\n    (hex!(\"0100000000333333334444444455000002980000000000004420\"), 0x038a81),\n    (hex!(\"01000000003333333344444444550000029900000000000029a0\"), 0x038b21),\n    (hex!(\"01000000003333333344444444550000029a00000000000029b0\"), 0x038bc1),\n    (hex!(\"01000000003333333344444444550000029a0000000000006010\"), 0x038c61),\n    (hex!(\"01000000003333333344444444550000029a0000000000006980\"), 0x038d01),\n    (hex!(\"01000000003333333344444444550000029b00000000000029c0\"), 0x038da1),\n    (hex!(\"01000000003333333344444444550000029c00000000000029d0\"), 0x038e41),\n    (hex!(\"01000000003333333344444444550000029c0000000000007480\"), 0x038ee1),\n    (hex!(\"01000000003333333344444444550000029d00000000000029e0\"), 0x038f81),\n    (hex!(\"01000000003333333344444444550000029d0000000000005030\"), 0x039021),\n    (hex!(\"01000000003333333344444444550000029d0000000000007780\"), 0x0390c1),\n    (hex!(\"01000000003333333344444444550000029d0000000000007a50\"), 0x039161),\n    (hex!(\"01000000003333333344444444550000029e00000000000029f0\"), 0x039201),\n    (hex!(\"01000000003333333344444444550000029e00000000000074b0\"), 0x0392a1),\n    (hex!(\"01000000003333333344444444550000029f0000000000002a00\"), 0x039341),\n    (hex!(\"0100000000333333334444444455000002a00000000000002a10\"), 0x0393e1),\n    (hex!(\"0100000000333333334444444455000002a10000000000002a20\"), 0x039481),\n    (hex!(\"0100000000333333334444444455000002a20000000000002a30\"), 0x039521),\n    (hex!(\"0100000000333333334444444455000002a20000000000004c50\"), 0x0395c1),\n    (hex!(\"0100000000333333334444444455000002a20000000000006f10\"), 0x039661),\n    (hex!(\"0100000000333333334444444455000002a30000000000002a40\"), 0x039701),\n    (hex!(\"0100000000333333334444444455000002a40000000000002a50\"), 0x0397a1),\n    (hex!(\"0100000000333333334444444455000002a40000000000005d60\"), 0x039841),\n    (hex!(\"0100000000333333334444444455000002a50000000000002a60\"), 0x0398e1),\n    (hex!(\"0100000000333333334444444455000002a50000000000005440\"), 0x039981),\n    (hex!(\"0100000000333333334444444455000002a50000000000005890\"), 0x039a21),\n    (hex!(\"0100000000333333334444444455000002a60000000000002a70\"), 0x039ac1),\n    (hex!(\"0100000000333333334444444455000002a70000000000002a80\"), 0x039b61),\n    (hex!(\"0100000000333333334444444455000002a700000000000054a0\"), 0x039c01),\n    (hex!(\"0100000000333333334444444455000002a70000000000007280\"), 0x039ca1),\n    (hex!(\"0100000000333333334444444455000002a80000000000002a90\"), 0x039d41),\n    (hex!(\"0100000000333333334444444455000002a90000000000002aa0\"), 0x039de1),\n    (hex!(\"0100000000333333334444444455000002aa0000000000002ab0\"), 0x039e81),\n    (hex!(\"0100000000333333334444444455000002ab0000000000002ac0\"), 0x039f21),\n    (hex!(\"0100000000333333334444444455000002ab0000000000006c90\"), 0x039fc1),\n    (hex!(\"0100000000333333334444444455000002ac0000000000002ad0\"), 0x03a061),\n    (hex!(\"0100000000333333334444444455000002ac0000000000006db0\"), 0x03a101),\n    (hex!(\"0100000000333333334444444455000002ad0000000000002ae0\"), 0x03a1a1),\n    (hex!(\"0100000000333333334444444455000002ad00000000000065e0\"), 0x03a241),\n    (hex!(\"0100000000333333334444444455000002ad0000000000007b40\"), 0x03a2e1),\n    (hex!(\"0100000000333333334444444455000002ae0000000000002af0\"), 0x03a381),\n    (hex!(\"0100000000333333334444444455000002ae0000000000004d20\"), 0x03a421),\n    (hex!(\"0100000000333333334444444455000002ae0000000000006f30\"), 0x03a4c1),\n    (hex!(\"0100000000333333334444444455000002af0000000000002b00\"), 0x03a561),\n    (hex!(\"0100000000333333334444444455000002b00000000000002b10\"), 0x03a601),\n    (hex!(\"0100000000333333334444444455000002b00000000000004560\"), 0x03a6a1),\n    (hex!(\"0100000000333333334444444455000002b00000000000005800\"), 0x03a741),\n    (hex!(\"0100000000333333334444444455000002b00000000000005a60\"), 0x03a7e1),\n    (hex!(\"0100000000333333334444444455000002b10000000000002b20\"), 0x03a881),\n    (hex!(\"0100000000333333334444444455000002b10000000000007b30\"), 0x03a921),\n    (hex!(\"0100000000333333334444444455000002b20000000000002b30\"), 0x03a9c1),\n    (hex!(\"0100000000333333334444444455000002b20000000000004440\"), 0x03aa61),\n    (hex!(\"0100000000333333334444444455000002b20000000000004f80\"), 0x03ab01),\n    (hex!(\"0100000000333333334444444455000002b20000000000005020\"), 0x03aba1),\n    (hex!(\"0100000000333333334444444455000002b30000000000002b40\"), 0x03ac41),\n    (hex!(\"0100000000333333334444444455000002b40000000000002b50\"), 0x03ace1),\n    (hex!(\"0100000000333333334444444455000002b50000000000002b60\"), 0x03ad81),\n    (hex!(\"0100000000333333334444444455000002b500000000000059e0\"), 0x03ae21),\n    (hex!(\"0100000000333333334444444455000002b60000000000002b70\"), 0x03aec1),\n    (hex!(\"0100000000333333334444444455000002b70000000000002b80\"), 0x03af61),\n    (hex!(\"0100000000333333334444444455000002b80000000000002b90\"), 0x03b001),\n    (hex!(\"0100000000333333334444444455000002b80000000000004590\"), 0x03b0a1),\n    (hex!(\"0100000000333333334444444455000002b800000000000047d0\"), 0x03b141),\n    (hex!(\"0100000000333333334444444455000002b80000000000006030\"), 0x03b1e1),\n    (hex!(\"0100000000333333334444444455000002b80000000000006a20\"), 0x03b281),\n    (hex!(\"0100000000333333334444444455000002b80000000000006a90\"), 0x03b321),\n    (hex!(\"0100000000333333334444444455000002b90000000000002ba0\"), 0x03b3c1),\n    (hex!(\"0100000000333333334444444455000002ba0000000000002bb0\"), 0x03b461),\n    (hex!(\"0100000000333333334444444455000002ba0000000000006e80\"), 0x03b501),\n    (hex!(\"0100000000333333334444444455000002bb0000000000002bc0\"), 0x03b5a1),\n    (hex!(\"0100000000333333334444444455000002bc0000000000002bd0\"), 0x03b641),\n    (hex!(\"0100000000333333334444444455000002bc0000000000004b30\"), 0x03b6e1),\n    (hex!(\"0100000000333333334444444455000002bd0000000000002be0\"), 0x03b781),\n    (hex!(\"0100000000333333334444444455000002bd0000000000005e10\"), 0x03b821),\n    (hex!(\"0100000000333333334444444455000002be0000000000002bf0\"), 0x03b8c1),\n    (hex!(\"0100000000333333334444444455000002bf0000000000002c00\"), 0x03b961),\n    (hex!(\"0100000000333333334444444455000002c00000000000002c10\"), 0x03ba01),\n    (hex!(\"0100000000333333334444444455000002c10000000000002c20\"), 0x03baa1),\n    (hex!(\"0100000000333333334444444455000002c10000000000003ef0\"), 0x03bb41),\n    (hex!(\"0100000000333333334444444455000002c20000000000002c30\"), 0x03bbe1),\n    (hex!(\"0100000000333333334444444455000002c200000000000056e0\"), 0x03bc81),\n    (hex!(\"0100000000333333334444444455000002c30000000000002c40\"), 0x03bd21),\n    (hex!(\"0100000000333333334444444455000002c30000000000004b60\"), 0x03bdc1),\n    (hex!(\"0100000000333333334444444455000002c40000000000002c50\"), 0x03be61),\n    (hex!(\"0100000000333333334444444455000002c400000000000045f0\"), 0x03bf01),\n    (hex!(\"0100000000333333334444444455000002c40000000000005290\"), 0x03bfa1),\n    (hex!(\"0100000000333333334444444455000002c50000000000002c60\"), 0x03c041),\n    (hex!(\"0100000000333333334444444455000002c60000000000002c70\"), 0x03c0e1),\n    (hex!(\"0100000000333333334444444455000002c60000000000006ae0\"), 0x03c181),\n    (hex!(\"0100000000333333334444444455000002c70000000000002c80\"), 0x03c221),\n    (hex!(\"0100000000333333334444444455000002c70000000000005680\"), 0x03c2c1),\n    (hex!(\"0100000000333333334444444455000002c70000000000006e10\"), 0x03c361),\n    (hex!(\"0100000000333333334444444455000002c80000000000002c90\"), 0x03c401),\n    (hex!(\"0100000000333333334444444455000002c90000000000002ca0\"), 0x03c4a1),\n    (hex!(\"0100000000333333334444444455000002ca0000000000002cb0\"), 0x03c541),\n    (hex!(\"0100000000333333334444444455000002cb0000000000002cc0\"), 0x03c5e1),\n    (hex!(\"0100000000333333334444444455000002cc0000000000002cd0\"), 0x03c681),\n    (hex!(\"0100000000333333334444444455000002cc0000000000005b50\"), 0x03c721),\n    (hex!(\"0100000000333333334444444455000002cd0000000000002ce0\"), 0x03c7c1),\n    (hex!(\"0100000000333333334444444455000002ce0000000000002cf0\"), 0x03c861),\n    (hex!(\"0100000000333333334444444455000002ce00000000000043f0\"), 0x03c901),\n    (hex!(\"0100000000333333334444444455000002ce0000000000006420\"), 0x03c9a1),\n    (hex!(\"0100000000333333334444444455000002cf0000000000002d00\"), 0x03ca41),\n    (hex!(\"0100000000333333334444444455000002d00000000000002d10\"), 0x03cae1),\n    (hex!(\"0100000000333333334444444455000002d10000000000002d20\"), 0x03cb81),\n    (hex!(\"0100000000333333334444444455000002d10000000000005370\"), 0x03cc21),\n    (hex!(\"0100000000333333334444444455000002d20000000000002d30\"), 0x03ccc1),\n    (hex!(\"0100000000333333334444444455000002d20000000000005ef0\"), 0x03cd61),\n    (hex!(\"0100000000333333334444444455000002d20000000000006570\"), 0x03ce01),\n    (hex!(\"0100000000333333334444444455000002d30000000000002d40\"), 0x03cea1),\n    (hex!(\"0100000000333333334444444455000002d30000000000007360\"), 0x03cf41),\n    (hex!(\"0100000000333333334444444455000002d40000000000002d50\"), 0x03cfe1),\n    (hex!(\"0100000000333333334444444455000002d400000000000079a0\"), 0x03d081),\n    (hex!(\"0100000000333333334444444455000002d50000000000002d60\"), 0x03d121),\n    (hex!(\"0100000000333333334444444455000002d50000000000004250\"), 0x03d1c1),\n    (hex!(\"0100000000333333334444444455000002d50000000000006050\"), 0x03d261),\n    (hex!(\"0100000000333333334444444455000002d60000000000002d70\"), 0x03d301),\n    (hex!(\"0100000000333333334444444455000002d60000000000007080\"), 0x03d3a1),\n    (hex!(\"0100000000333333334444444455000002d70000000000002d80\"), 0x03d441),\n    (hex!(\"0100000000333333334444444455000002d80000000000002d90\"), 0x03d4e1),\n    (hex!(\"0100000000333333334444444455000002d80000000000007110\"), 0x03d581),\n    (hex!(\"0100000000333333334444444455000002d800000000000073c0\"), 0x03d621),\n    (hex!(\"0100000000333333334444444455000002d800000000000075a0\"), 0x03d6c1),\n    (hex!(\"0100000000333333334444444455000002d90000000000002da0\"), 0x03d761),\n    (hex!(\"0100000000333333334444444455000002d90000000000004860\"), 0x03d801),\n    (hex!(\"0100000000333333334444444455000002d90000000000006b60\"), 0x03d8a1),\n    (hex!(\"0100000000333333334444444455000002da0000000000002db0\"), 0x03d941),\n    (hex!(\"0100000000333333334444444455000002da0000000000006630\"), 0x03d9e1),\n    (hex!(\"0100000000333333334444444455000002db0000000000002dc0\"), 0x03da81),\n    (hex!(\"0100000000333333334444444455000002dc0000000000002dd0\"), 0x03db21),\n    (hex!(\"0100000000333333334444444455000002dc0000000000004830\"), 0x03dbc1),\n    (hex!(\"0100000000333333334444444455000002dd0000000000002de0\"), 0x03dc61),\n    (hex!(\"0100000000333333334444444455000002de0000000000002df0\"), 0x03dd01),\n    (hex!(\"0100000000333333334444444455000002de0000000000004f00\"), 0x03dda1),\n    (hex!(\"0100000000333333334444444455000002df0000000000002e00\"), 0x03de41),\n    (hex!(\"0100000000333333334444444455000002e00000000000002e10\"), 0x03dee1),\n    (hex!(\"0100000000333333334444444455000002e10000000000002e20\"), 0x03df81),\n    (hex!(\"0100000000333333334444444455000002e10000000000006e90\"), 0x03e021),\n    (hex!(\"0100000000333333334444444455000002e20000000000002e30\"), 0x03e0c1),\n    (hex!(\"0100000000333333334444444455000002e200000000000053e0\"), 0x03e161),\n    (hex!(\"0100000000333333334444444455000002e30000000000002e40\"), 0x03e201),\n    (hex!(\"0100000000333333334444444455000002e30000000000006020\"), 0x03e2a1),\n    (hex!(\"0100000000333333334444444455000002e30000000000006540\"), 0x03e341),\n    (hex!(\"0100000000333333334444444455000002e40000000000002e50\"), 0x03e3e1),\n    (hex!(\"0100000000333333334444444455000002e50000000000002e60\"), 0x03e481),\n    (hex!(\"0100000000333333334444444455000002e50000000000005180\"), 0x03e521),\n    (hex!(\"0100000000333333334444444455000002e50000000000007bf0\"), 0x03e5c1),\n    (hex!(\"0100000000333333334444444455000002e60000000000002e70\"), 0x03e661),\n    (hex!(\"0100000000333333334444444455000002e60000000000005350\"), 0x03e701),\n    (hex!(\"0100000000333333334444444455000002e60000000000007960\"), 0x03e7a1),\n    (hex!(\"0100000000333333334444444455000002e70000000000002e80\"), 0x03e841),\n    (hex!(\"0100000000333333334444444455000002e80000000000002e90\"), 0x03e8e1),\n    (hex!(\"0100000000333333334444444455000002e90000000000002ea0\"), 0x03e981),\n    (hex!(\"0100000000333333334444444455000002ea0000000000002eb0\"), 0x03ea21),\n    (hex!(\"0100000000333333334444444455000002eb0000000000002ec0\"), 0x03eac1),\n    (hex!(\"0100000000333333334444444455000002ec0000000000002ed0\"), 0x03eb61),\n    (hex!(\"0100000000333333334444444455000002ec0000000000006c10\"), 0x03ec01),\n    (hex!(\"0100000000333333334444444455000002ed0000000000002ee0\"), 0x03eca1),\n    (hex!(\"0100000000333333334444444455000002ed0000000000005590\"), 0x03ed41),\n    (hex!(\"0100000000333333334444444455000002ed0000000000005cd0\"), 0x03ede1),\n    (hex!(\"0100000000333333334444444455000002ed0000000000006910\"), 0x03ee81),\n    (hex!(\"0100000000333333334444444455000002ee0000000000002ef0\"), 0x03ef21),\n    (hex!(\"0100000000333333334444444455000002ef0000000000002f00\"), 0x03efc1),\n    (hex!(\"0100000000333333334444444455000002ef0000000000004ed0\"), 0x03f061),\n    (hex!(\"0100000000333333334444444455000002f00000000000002f10\"), 0x03f101),\n    (hex!(\"0100000000333333334444444455000002f00000000000004cf0\"), 0x03f1a1),\n    (hex!(\"0100000000333333334444444455000002f00000000000005d10\"), 0x03f241),\n    (hex!(\"0100000000333333334444444455000002f00000000000006860\"), 0x03f2e1),\n    (hex!(\"0100000000333333334444444455000002f00000000000006b50\"), 0x03f381),\n    (hex!(\"0100000000333333334444444455000002f00000000000007100\"), 0x03f421),\n    (hex!(\"0100000000333333334444444455000002f00000000000007aa0\"), 0x03f4c1),\n    (hex!(\"0100000000333333334444444455000002f10000000000002f20\"), 0x03f561),\n    (hex!(\"0100000000333333334444444455000002f20000000000002f30\"), 0x03f601),\n    (hex!(\"0100000000333333334444444455000002f200000000000044b0\"), 0x03f6a1),\n    (hex!(\"0100000000333333334444444455000002f30000000000002f40\"), 0x03f741),\n    (hex!(\"0100000000333333334444444455000002f300000000000075b0\"), 0x03f7e1),\n    (hex!(\"0100000000333333334444444455000002f40000000000002f50\"), 0x03f881),\n    (hex!(\"0100000000333333334444444455000002f400000000000060f0\"), 0x03f921),\n    (hex!(\"0100000000333333334444444455000002f50000000000002f60\"), 0x03f9c1),\n    (hex!(\"0100000000333333334444444455000002f50000000000007210\"), 0x03fa61),\n    (hex!(\"0100000000333333334444444455000002f60000000000002f70\"), 0x03fb01),\n    (hex!(\"0100000000333333334444444455000002f60000000000006610\"), 0x03fba1),\n    (hex!(\"0100000000333333334444444455000002f70000000000002f80\"), 0x03fc41),\n    (hex!(\"0100000000333333334444444455000002f70000000000007560\"), 0x03fce1),\n    (hex!(\"0100000000333333334444444455000002f80000000000002f90\"), 0x03fd81),\n    (hex!(\"0100000000333333334444444455000002f80000000000006320\"), 0x03fe21),\n    (hex!(\"0100000000333333334444444455000002f90000000000002fa0\"), 0x03fec1),\n    (hex!(\"0100000000333333334444444455000002f90000000000006e50\"), 0x03ff61),\n    (hex!(\"0100000000333333334444444455000002fa0000000000002fb0\"), 0x040001),\n    (hex!(\"0100000000333333334444444455000002fb0000000000002fc0\"), 0x0400a1),\n    (hex!(\"0100000000333333334444444455000002fb0000000000004780\"), 0x040141),\n    (hex!(\"0100000000333333334444444455000002fc0000000000002fd0\"), 0x0401e1),\n    (hex!(\"0100000000333333334444444455000002fd0000000000002fe0\"), 0x040281),\n    (hex!(\"0100000000333333334444444455000002fd0000000000005600\"), 0x040321),\n    (hex!(\"0100000000333333334444444455000002fd0000000000006c00\"), 0x0403c1),\n    (hex!(\"0100000000333333334444444455000002fe0000000000002ff0\"), 0x040461),\n    (hex!(\"0100000000333333334444444455000002ff0000000000003000\"), 0x040501),\n    (hex!(\"0100000000333333334444444455000003000000000000003010\"), 0x0405a1),\n    (hex!(\"0100000000333333334444444455000003000000000000004080\"), 0x040641),\n    (hex!(\"0100000000333333334444444455000003010000000000003020\"), 0x0406e1),\n    (hex!(\"0100000000333333334444444455000003010000000000006340\"), 0x040781),\n    (hex!(\"0100000000333333334444444455000003020000000000003030\"), 0x040821),\n    (hex!(\"0100000000333333334444444455000003020000000000005b00\"), 0x0408c1),\n    (hex!(\"0100000000333333334444444455000003020000000000007b20\"), 0x040961),\n    (hex!(\"0100000000333333334444444455000003030000000000003040\"), 0x040a01),\n    (hex!(\"01000000003333333344444444550000030300000000000056b0\"), 0x040aa1),\n    (hex!(\"0100000000333333334444444455000003030000000000006280\"), 0x040b41),\n    (hex!(\"0100000000333333334444444455000003030000000000007ad0\"), 0x040be1),\n    (hex!(\"0100000000333333334444444455000003040000000000003050\"), 0x040c81),\n    (hex!(\"0100000000333333334444444455000003040000000000005c50\"), 0x040d21),\n    (hex!(\"0100000000333333334444444455000003050000000000003060\"), 0x040dc1),\n    (hex!(\"01000000003333333344444444550000030500000000000072e0\"), 0x040e61),\n    (hex!(\"0100000000333333334444444455000003060000000000003070\"), 0x040f01),\n    (hex!(\"0100000000333333334444444455000003060000000000004360\"), 0x040fa1),\n    (hex!(\"0100000000333333334444444455000003060000000000004380\"), 0x041041),\n    (hex!(\"0100000000333333334444444455000003060000000000004820\"), 0x0410e1),\n    (hex!(\"0100000000333333334444444455000003060000000000006d10\"), 0x041181),\n    (hex!(\"0100000000333333334444444455000003070000000000003080\"), 0x041221),\n    (hex!(\"0100000000333333334444444455000003070000000000004450\"), 0x0412c1),\n    (hex!(\"0100000000333333334444444455000003080000000000003090\"), 0x041361),\n    (hex!(\"0100000000333333334444444455000003080000000000005ad0\"), 0x041401),\n    (hex!(\"01000000003333333344444444550000030900000000000030a0\"), 0x0414a1),\n    (hex!(\"01000000003333333344444444550000030a00000000000030b0\"), 0x041541),\n    (hex!(\"01000000003333333344444444550000030a0000000000007760\"), 0x0415e1),\n    (hex!(\"01000000003333333344444444550000030b00000000000030c0\"), 0x041681),\n    (hex!(\"01000000003333333344444444550000030b0000000000007a80\"), 0x041721),\n    (hex!(\"01000000003333333344444444550000030c00000000000030d0\"), 0x0417c1),\n    (hex!(\"01000000003333333344444444550000030d00000000000030e0\"), 0x041861),\n    (hex!(\"01000000003333333344444444550000030d0000000000003eb0\"), 0x041901),\n    (hex!(\"01000000003333333344444444550000030e00000000000030f0\"), 0x0419a1),\n    (hex!(\"01000000003333333344444444550000030f0000000000003100\"), 0x041a41),\n    (hex!(\"01000000003333333344444444550000030f0000000000004690\"), 0x041ae1),\n    (hex!(\"01000000003333333344444444550000030f0000000000006900\"), 0x041b81),\n    (hex!(\"0100000000333333334444444455000003100000000000003110\"), 0x041c21),\n    (hex!(\"01000000003333333344444444550000031000000000000058a0\"), 0x041cc1),\n    (hex!(\"0100000000333333334444444455000003110000000000003120\"), 0x041d61),\n    (hex!(\"0100000000333333334444444455000003110000000000004200\"), 0x041e01),\n    (hex!(\"0100000000333333334444444455000003120000000000003130\"), 0x041ea1),\n    (hex!(\"0100000000333333334444444455000003130000000000003140\"), 0x041f41),\n    (hex!(\"0100000000333333334444444455000003130000000000004d50\"), 0x041fe1),\n    (hex!(\"0100000000333333334444444455000003130000000000005400\"), 0x042081),\n    (hex!(\"0100000000333333334444444455000003130000000000005520\"), 0x042121),\n    (hex!(\"0100000000333333334444444455000003140000000000003150\"), 0x0421c1),\n    (hex!(\"0100000000333333334444444455000003140000000000006450\"), 0x042261),\n    (hex!(\"0100000000333333334444444455000003150000000000003160\"), 0x042301),\n    (hex!(\"01000000003333333344444444550000031500000000000062d0\"), 0x0423a1),\n    (hex!(\"0100000000333333334444444455000003160000000000003170\"), 0x042441),\n    (hex!(\"0100000000333333334444444455000003160000000000004c40\"), 0x0424e1),\n    (hex!(\"0100000000333333334444444455000003160000000000007c80\"), 0x042581),\n    (hex!(\"0100000000333333334444444455000003170000000000003180\"), 0x042621),\n    (hex!(\"0100000000333333334444444455000003170000000000004400\"), 0x0426c1),\n    (hex!(\"0100000000333333334444444455000003170000000000005090\"), 0x042761),\n    (hex!(\"0100000000333333334444444455000003170000000000006cb0\"), 0x042801),\n    (hex!(\"0100000000333333334444444455000003180000000000003190\"), 0x0428a1),\n    (hex!(\"0100000000333333334444444455000003180000000000006560\"), 0x042941),\n    (hex!(\"01000000003333333344444444550000031900000000000031a0\"), 0x0429e1),\n    (hex!(\"01000000003333333344444444550000031900000000000052d0\"), 0x042a81),\n    (hex!(\"01000000003333333344444444550000031900000000000057e0\"), 0x042b21),\n    (hex!(\"01000000003333333344444444550000031a00000000000031b0\"), 0x042bc1),\n    (hex!(\"01000000003333333344444444550000031a00000000000071e0\"), 0x042c61),\n    (hex!(\"01000000003333333344444444550000031b00000000000031c0\"), 0x042d01),\n    (hex!(\"01000000003333333344444444550000031c00000000000031d0\"), 0x042da1),\n    (hex!(\"01000000003333333344444444550000031c0000000000004480\"), 0x042e41),\n    (hex!(\"01000000003333333344444444550000031c0000000000005790\"), 0x042ee1),\n    (hex!(\"01000000003333333344444444550000031c0000000000007be0\"), 0x042f81),\n    (hex!(\"01000000003333333344444444550000031d00000000000031e0\"), 0x043021),\n    (hex!(\"01000000003333333344444444550000031d0000000000005560\"), 0x0430c1),\n    (hex!(\"01000000003333333344444444550000031e00000000000031f0\"), 0x043161),\n    (hex!(\"01000000003333333344444444550000031f0000000000003200\"), 0x043201),\n    (hex!(\"01000000003333333344444444550000031f0000000000004190\"), 0x0432a1),\n    (hex!(\"0100000000333333334444444455000003200000000000003210\"), 0x043341),\n    (hex!(\"0100000000333333334444444455000003210000000000003220\"), 0x0433e1),\n    (hex!(\"0100000000333333334444444455000003220000000000003230\"), 0x043481),\n    (hex!(\"0100000000333333334444444455000003230000000000003240\"), 0x043521),\n    (hex!(\"01000000003333333344444444550000032300000000000069d0\"), 0x0435c1),\n    (hex!(\"0100000000333333334444444455000003240000000000003250\"), 0x043661),\n    (hex!(\"0100000000333333334444444455000003250000000000003260\"), 0x043701),\n    (hex!(\"01000000003333333344444444550000032500000000000042b0\"), 0x0437a1),\n    (hex!(\"01000000003333333344444444550000032500000000000064e0\"), 0x043841),\n    (hex!(\"0100000000333333334444444455000003260000000000003270\"), 0x0438e1),\n    (hex!(\"0100000000333333334444444455000003270000000000003280\"), 0x043981),\n    (hex!(\"0100000000333333334444444455000003270000000000005b20\"), 0x043a21),\n    (hex!(\"0100000000333333334444444455000003270000000000006330\"), 0x043ac1),\n    (hex!(\"0100000000333333334444444455000003270000000000006810\"), 0x043b61),\n    (hex!(\"0100000000333333334444444455000003280000000000003290\"), 0x043c01),\n    (hex!(\"01000000003333333344444444550000032900000000000032a0\"), 0x043ca1),\n    (hex!(\"01000000003333333344444444550000032900000000000056f0\"), 0x043d41),\n    (hex!(\"0100000000333333334444444455000003290000000000005e20\"), 0x043de1),\n    (hex!(\"0100000000333333334444444455000003290000000000005e70\"), 0x043e81),\n    (hex!(\"01000000003333333344444444550000032a00000000000032b0\"), 0x043f21),\n    (hex!(\"01000000003333333344444444550000032b00000000000032c0\"), 0x043fc1),\n    (hex!(\"01000000003333333344444444550000032b0000000000005500\"), 0x044061),\n    (hex!(\"01000000003333333344444444550000032b0000000000005a20\"), 0x044101),\n    (hex!(\"01000000003333333344444444550000032c00000000000032d0\"), 0x0441a1),\n    (hex!(\"01000000003333333344444444550000032c0000000000004060\"), 0x044241),\n    (hex!(\"01000000003333333344444444550000032c0000000000004760\"), 0x0442e1),\n    (hex!(\"01000000003333333344444444550000032d00000000000032e0\"), 0x044381),\n    (hex!(\"01000000003333333344444444550000032d00000000000068a0\"), 0x044421),\n    (hex!(\"01000000003333333344444444550000032e00000000000032f0\"), 0x0444c1),\n    (hex!(\"01000000003333333344444444550000032f0000000000003300\"), 0x044561),\n    (hex!(\"0100000000333333334444444455000003300000000000003310\"), 0x044601),\n    (hex!(\"0100000000333333334444444455000003300000000000006e40\"), 0x0446a1),\n    (hex!(\"0100000000333333334444444455000003310000000000003320\"), 0x044741),\n    (hex!(\"0100000000333333334444444455000003310000000000004620\"), 0x0447e1),\n    (hex!(\"0100000000333333334444444455000003320000000000003330\"), 0x044881),\n    (hex!(\"0100000000333333334444444455000003330000000000003340\"), 0x044921),\n    (hex!(\"0100000000333333334444444455000003330000000000004b80\"), 0x0449c1),\n    (hex!(\"0100000000333333334444444455000003340000000000003350\"), 0x044a61),\n    (hex!(\"0100000000333333334444444455000003350000000000003360\"), 0x044b01),\n    (hex!(\"0100000000333333334444444455000003360000000000003370\"), 0x044ba1),\n    (hex!(\"0100000000333333334444444455000003370000000000003380\"), 0x044c41),\n    (hex!(\"0100000000333333334444444455000003380000000000003390\"), 0x044ce1),\n    (hex!(\"01000000003333333344444444550000033900000000000033a0\"), 0x044d81),\n    (hex!(\"0100000000333333334444444455000003390000000000006b90\"), 0x044e21),\n    (hex!(\"01000000003333333344444444550000033a00000000000033b0\"), 0x044ec1),\n    (hex!(\"01000000003333333344444444550000033a0000000000007420\"), 0x044f61),\n    (hex!(\"01000000003333333344444444550000033b00000000000033c0\"), 0x045001),\n    (hex!(\"01000000003333333344444444550000033b0000000000007620\"), 0x0450a1),\n    (hex!(\"01000000003333333344444444550000033c00000000000033d0\"), 0x045141),\n    (hex!(\"01000000003333333344444444550000033c0000000000006b30\"), 0x0451e1),\n    (hex!(\"01000000003333333344444444550000033d00000000000033e0\"), 0x045281),\n    (hex!(\"01000000003333333344444444550000033e00000000000033f0\"), 0x045321),\n    (hex!(\"01000000003333333344444444550000033e00000000000048b0\"), 0x0453c1),\n    (hex!(\"01000000003333333344444444550000033e0000000000004e70\"), 0x045461),\n    (hex!(\"01000000003333333344444444550000033f0000000000003400\"), 0x045501),\n    (hex!(\"01000000003333333344444444550000033f0000000000006380\"), 0x0455a1),\n    (hex!(\"0100000000333333334444444455000003400000000000003410\"), 0x045641),\n    (hex!(\"0100000000333333334444444455000003410000000000003420\"), 0x0456e1),\n    (hex!(\"0100000000333333334444444455000003410000000000006090\"), 0x045781),\n    (hex!(\"0100000000333333334444444455000003420000000000003430\"), 0x045821),\n    (hex!(\"01000000003333333344444444550000034200000000000073d0\"), 0x0458c1),\n    (hex!(\"0100000000333333334444444455000003430000000000003440\"), 0x045961),\n    (hex!(\"0100000000333333334444444455000003430000000000006370\"), 0x045a01),\n    (hex!(\"01000000003333333344444444550000034300000000000075c0\"), 0x045aa1),\n    (hex!(\"0100000000333333334444444455000003440000000000003450\"), 0x045b41),\n    (hex!(\"0100000000333333334444444455000003450000000000003460\"), 0x045be1),\n    (hex!(\"0100000000333333334444444455000003460000000000003470\"), 0x045c81),\n    (hex!(\"01000000003333333344444444550000034600000000000055f0\"), 0x045d21),\n    (hex!(\"0100000000333333334444444455000003470000000000003480\"), 0x045dc1),\n    (hex!(\"0100000000333333334444444455000003470000000000003fe0\"), 0x045e61),\n    (hex!(\"0100000000333333334444444455000003480000000000003490\"), 0x045f01),\n    (hex!(\"0100000000333333334444444455000003480000000000007990\"), 0x045fa1),\n    (hex!(\"01000000003333333344444444550000034900000000000034a0\"), 0x046041),\n    (hex!(\"0100000000333333334444444455000003490000000000004410\"), 0x0460e1),\n    (hex!(\"01000000003333333344444444550000034a00000000000034b0\"), 0x046181),\n    (hex!(\"01000000003333333344444444550000034a00000000000062a0\"), 0x046221),\n    (hex!(\"01000000003333333344444444550000034a0000000000007260\"), 0x0462c1),\n    (hex!(\"01000000003333333344444444550000034b00000000000034c0\"), 0x046361),\n    (hex!(\"01000000003333333344444444550000034b0000000000005760\"), 0x046401),\n    (hex!(\"01000000003333333344444444550000034b0000000000006200\"), 0x0464a1),\n    (hex!(\"01000000003333333344444444550000034c00000000000034d0\"), 0x046541),\n    (hex!(\"01000000003333333344444444550000034d00000000000034e0\"), 0x0465e1),\n    (hex!(\"01000000003333333344444444550000034e00000000000034f0\"), 0x046681),\n    (hex!(\"01000000003333333344444444550000034e0000000000007790\"), 0x046721),\n    (hex!(\"01000000003333333344444444550000034f0000000000003500\"), 0x0467c1),\n    (hex!(\"0100000000333333334444444455000003500000000000003510\"), 0x046861),\n    (hex!(\"0100000000333333334444444455000003510000000000003520\"), 0x046901),\n    (hex!(\"0100000000333333334444444455000003520000000000003530\"), 0x0469a1),\n    (hex!(\"01000000003333333344444444550000035200000000000056a0\"), 0x046a41),\n    (hex!(\"0100000000333333334444444455000003530000000000003540\"), 0x046ae1),\n    (hex!(\"0100000000333333334444444455000003540000000000003550\"), 0x046b81),\n    (hex!(\"01000000003333333344444444550000035400000000000047b0\"), 0x046c21),\n    (hex!(\"0100000000333333334444444455000003550000000000003560\"), 0x046cc1),\n    (hex!(\"0100000000333333334444444455000003550000000000004500\"), 0x046d61),\n    (hex!(\"0100000000333333334444444455000003560000000000003570\"), 0x046e01),\n    (hex!(\"0100000000333333334444444455000003560000000000004fc0\"), 0x046ea1),\n    (hex!(\"0100000000333333334444444455000003560000000000007160\"), 0x046f41),\n    (hex!(\"0100000000333333334444444455000003560000000000007400\"), 0x046fe1),\n    (hex!(\"0100000000333333334444444455000003570000000000003580\"), 0x047081),\n    (hex!(\"0100000000333333334444444455000003580000000000003590\"), 0x047121),\n    (hex!(\"0100000000333333334444444455000003580000000000005a80\"), 0x0471c1),\n    (hex!(\"01000000003333333344444444550000035900000000000035a0\"), 0x047261),\n    (hex!(\"01000000003333333344444444550000035900000000000073b0\"), 0x047301),\n    (hex!(\"01000000003333333344444444550000035a00000000000035b0\"), 0x0473a1),\n    (hex!(\"01000000003333333344444444550000035a0000000000004c20\"), 0x047441),\n    (hex!(\"01000000003333333344444444550000035b00000000000035c0\"), 0x0474e1),\n    (hex!(\"01000000003333333344444444550000035b0000000000005120\"), 0x047581),\n    (hex!(\"01000000003333333344444444550000035c00000000000035d0\"), 0x047621),\n    (hex!(\"01000000003333333344444444550000035c0000000000004300\"), 0x0476c1),\n    (hex!(\"01000000003333333344444444550000035c0000000000005a40\"), 0x047761),\n    (hex!(\"01000000003333333344444444550000035c0000000000006620\"), 0x047801),\n    (hex!(\"01000000003333333344444444550000035c0000000000006ed0\"), 0x0478a1),\n    (hex!(\"01000000003333333344444444550000035d00000000000035e0\"), 0x047941),\n    (hex!(\"01000000003333333344444444550000035d0000000000005df0\"), 0x0479e1),\n    (hex!(\"01000000003333333344444444550000035e00000000000035f0\"), 0x047a81),\n    (hex!(\"01000000003333333344444444550000035f0000000000003600\"), 0x047b21),\n    (hex!(\"01000000003333333344444444550000035f00000000000058d0\"), 0x047bc1),\n    (hex!(\"0100000000333333334444444455000003600000000000003610\"), 0x047c61),\n    (hex!(\"0100000000333333334444444455000003600000000000007b90\"), 0x047d01),\n    (hex!(\"0100000000333333334444444455000003610000000000003620\"), 0x047da1),\n    (hex!(\"0100000000333333334444444455000003610000000000006ad0\"), 0x047e41),\n    (hex!(\"0100000000333333334444444455000003620000000000003630\"), 0x047ee1),\n    (hex!(\"01000000003333333344444444550000036200000000000063a0\"), 0x047f81),\n    (hex!(\"0100000000333333334444444455000003630000000000003640\"), 0x048021),\n    (hex!(\"0100000000333333334444444455000003630000000000007250\"), 0x0480c1),\n    (hex!(\"0100000000333333334444444455000003640000000000003650\"), 0x048161),\n    (hex!(\"0100000000333333334444444455000003640000000000005510\"), 0x048201),\n    (hex!(\"0100000000333333334444444455000003640000000000007850\"), 0x0482a1),\n    (hex!(\"0100000000333333334444444455000003650000000000003660\"), 0x048341),\n    (hex!(\"0100000000333333334444444455000003660000000000003670\"), 0x0483e1),\n    (hex!(\"0100000000333333334444444455000003660000000000004650\"), 0x048481),\n    (hex!(\"01000000003333333344444444550000036600000000000050d0\"), 0x048521),\n    (hex!(\"0100000000333333334444444455000003660000000000006eb0\"), 0x0485c1),\n    (hex!(\"0100000000333333334444444455000003670000000000003680\"), 0x048661),\n    (hex!(\"01000000003333333344444444550000036700000000000071f0\"), 0x048701),\n    (hex!(\"0100000000333333334444444455000003680000000000003690\"), 0x0487a1),\n    (hex!(\"01000000003333333344444444550000036900000000000036a0\"), 0x048841),\n    (hex!(\"0100000000333333334444444455000003690000000000005c70\"), 0x0488e1),\n    (hex!(\"01000000003333333344444444550000036a00000000000036b0\"), 0x048981),\n    (hex!(\"01000000003333333344444444550000036a00000000000071b0\"), 0x048a21),\n    (hex!(\"01000000003333333344444444550000036b00000000000036c0\"), 0x048ac1),\n    (hex!(\"01000000003333333344444444550000036b0000000000004670\"), 0x048b61),\n    (hex!(\"01000000003333333344444444550000036c00000000000036d0\"), 0x048c01),\n    (hex!(\"01000000003333333344444444550000036c0000000000004750\"), 0x048ca1),\n    (hex!(\"01000000003333333344444444550000036c0000000000006fa0\"), 0x048d41),\n    (hex!(\"01000000003333333344444444550000036d00000000000036e0\"), 0x048de1),\n    (hex!(\"01000000003333333344444444550000036d0000000000003f70\"), 0x048e81),\n    (hex!(\"01000000003333333344444444550000036d0000000000004b90\"), 0x048f21),\n    (hex!(\"01000000003333333344444444550000036d00000000000057a0\"), 0x048fc1),\n    (hex!(\"01000000003333333344444444550000036e00000000000036f0\"), 0x049061),\n    (hex!(\"01000000003333333344444444550000036e00000000000075d0\"), 0x049101),\n    (hex!(\"01000000003333333344444444550000036f0000000000003700\"), 0x0491a1),\n    (hex!(\"0100000000333333334444444455000003700000000000003710\"), 0x049241),\n    (hex!(\"0100000000333333334444444455000003700000000000005aa0\"), 0x0492e1),\n    (hex!(\"0100000000333333334444444455000003710000000000003720\"), 0x049381),\n    (hex!(\"0100000000333333334444444455000003710000000000005130\"), 0x049421),\n    (hex!(\"0100000000333333334444444455000003710000000000006fc0\"), 0x0494c1),\n    (hex!(\"0100000000333333334444444455000003710000000000007b00\"), 0x049561),\n    (hex!(\"0100000000333333334444444455000003720000000000003730\"), 0x049601),\n    (hex!(\"01000000003333333344444444550000037200000000000054d0\"), 0x0496a1),\n    (hex!(\"0100000000333333334444444455000003730000000000003740\"), 0x049741),\n    (hex!(\"0100000000333333334444444455000003730000000000004220\"), 0x0497e1),\n    (hex!(\"0100000000333333334444444455000003740000000000003750\"), 0x049881),\n    (hex!(\"0100000000333333334444444455000003740000000000004720\"), 0x049921),\n    (hex!(\"0100000000333333334444444455000003750000000000003760\"), 0x0499c1),\n    (hex!(\"0100000000333333334444444455000003750000000000004110\"), 0x049a61),\n    (hex!(\"0100000000333333334444444455000003760000000000003770\"), 0x049b01),\n    (hex!(\"0100000000333333334444444455000003770000000000003780\"), 0x049ba1),\n    (hex!(\"0100000000333333334444444455000003780000000000003790\"), 0x049c41),\n    (hex!(\"0100000000333333334444444455000003780000000000004b40\"), 0x049ce1),\n    (hex!(\"0100000000333333334444444455000003780000000000005660\"), 0x049d81),\n    (hex!(\"0100000000333333334444444455000003780000000000005ea0\"), 0x049e21),\n    (hex!(\"01000000003333333344444444550000037900000000000037a0\"), 0x049ec1),\n    (hex!(\"01000000003333333344444444550000037a00000000000037b0\"), 0x049f61),\n    (hex!(\"01000000003333333344444444550000037b00000000000037c0\"), 0x04a001),\n    (hex!(\"01000000003333333344444444550000037c00000000000037d0\"), 0x04a0a1),\n    (hex!(\"01000000003333333344444444550000037c0000000000004340\"), 0x04a141),\n    (hex!(\"01000000003333333344444444550000037c0000000000005230\"), 0x04a1e1),\n    (hex!(\"01000000003333333344444444550000037d00000000000037e0\"), 0x04a281),\n    (hex!(\"01000000003333333344444444550000037d00000000000051e0\"), 0x04a321),\n    (hex!(\"01000000003333333344444444550000037e00000000000037f0\"), 0x04a3c1),\n    (hex!(\"01000000003333333344444444550000037e0000000000004090\"), 0x04a461),\n    (hex!(\"01000000003333333344444444550000037e0000000000005c20\"), 0x04a501),\n    (hex!(\"01000000003333333344444444550000037f0000000000003800\"), 0x04a5a1),\n    (hex!(\"0100000000333333334444444455000003800000000000003810\"), 0x04a641),\n    (hex!(\"0100000000333333334444444455000003800000000000007630\"), 0x04a6e1),\n    (hex!(\"0100000000333333334444444455000003810000000000003820\"), 0x04a781),\n    (hex!(\"0100000000333333334444444455000003820000000000003830\"), 0x04a821),\n    (hex!(\"0100000000333333334444444455000003820000000000004170\"), 0x04a8c1),\n    (hex!(\"0100000000333333334444444455000003830000000000003840\"), 0x04a961),\n    (hex!(\"0100000000333333334444444455000003840000000000003850\"), 0x04aa01),\n    (hex!(\"0100000000333333334444444455000003850000000000003860\"), 0x04aaa1),\n    (hex!(\"0100000000333333334444444455000003850000000000004180\"), 0x04ab41),\n    (hex!(\"0100000000333333334444444455000003850000000000005c90\"), 0x04abe1),\n    (hex!(\"0100000000333333334444444455000003850000000000005da0\"), 0x04ac81),\n    (hex!(\"0100000000333333334444444455000003850000000000006ff0\"), 0x04ad21),\n    (hex!(\"0100000000333333334444444455000003860000000000003870\"), 0x04adc1),\n    (hex!(\"01000000003333333344444444550000038600000000000065c0\"), 0x04ae61),\n    (hex!(\"0100000000333333334444444455000003870000000000003880\"), 0x04af01),\n    (hex!(\"0100000000333333334444444455000003870000000000007cc0\"), 0x04afa1),\n    (hex!(\"0100000000333333334444444455000003880000000000003890\"), 0x04b041),\n    (hex!(\"01000000003333333344444444550000038900000000000038a0\"), 0x04b0e1),\n    (hex!(\"01000000003333333344444444550000038a00000000000038b0\"), 0x04b181),\n    (hex!(\"01000000003333333344444444550000038a00000000000073e0\"), 0x04b221),\n    (hex!(\"01000000003333333344444444550000038b00000000000038c0\"), 0x04b2c1),\n    (hex!(\"01000000003333333344444444550000038c00000000000038d0\"), 0x04b361),\n    (hex!(\"01000000003333333344444444550000038d00000000000038e0\"), 0x04b401),\n    (hex!(\"01000000003333333344444444550000038d00000000000069f0\"), 0x04b4a1),\n    (hex!(\"01000000003333333344444444550000038d0000000000007680\"), 0x04b541),\n    (hex!(\"01000000003333333344444444550000038e00000000000038f0\"), 0x04b5e1),\n    (hex!(\"01000000003333333344444444550000038f0000000000003900\"), 0x04b681),\n    (hex!(\"01000000003333333344444444550000038f00000000000045b0\"), 0x04b721),\n    (hex!(\"01000000003333333344444444550000038f0000000000007180\"), 0x04b7c1),\n    (hex!(\"0100000000333333334444444455000003900000000000003910\"), 0x04b861),\n    (hex!(\"0100000000333333334444444455000003910000000000003920\"), 0x04b901),\n    (hex!(\"0100000000333333334444444455000003910000000000004a20\"), 0x04b9a1),\n    (hex!(\"0100000000333333334444444455000003920000000000003930\"), 0x04ba41),\n    (hex!(\"01000000003333333344444444550000039200000000000059b0\"), 0x04bae1),\n    (hex!(\"0100000000333333334444444455000003930000000000003940\"), 0x04bb81),\n    (hex!(\"0100000000333333334444444455000003930000000000006cc0\"), 0x04bc21),\n    (hex!(\"0100000000333333334444444455000003940000000000003950\"), 0x04bcc1),\n    (hex!(\"01000000003333333344444444550000039400000000000056c0\"), 0x04bd61),\n    (hex!(\"0100000000333333334444444455000003950000000000003960\"), 0x04be01),\n    (hex!(\"0100000000333333334444444455000003950000000000004cc0\"), 0x04bea1),\n    (hex!(\"0100000000333333334444444455000003950000000000007720\"), 0x04bf41),\n    (hex!(\"0100000000333333334444444455000003960000000000003970\"), 0x04bfe1),\n    (hex!(\"0100000000333333334444444455000003960000000000004da0\"), 0x04c081),\n    (hex!(\"0100000000333333334444444455000003960000000000004df0\"), 0x04c121),\n    (hex!(\"0100000000333333334444444455000003960000000000004f30\"), 0x04c1c1),\n    (hex!(\"01000000003333333344444444550000039600000000000050f0\"), 0x04c261),\n    (hex!(\"0100000000333333334444444455000003960000000000007940\"), 0x04c301),\n    (hex!(\"0100000000333333334444444455000003970000000000003980\"), 0x04c3a1),\n    (hex!(\"0100000000333333334444444455000003970000000000005850\"), 0x04c441),\n    (hex!(\"0100000000333333334444444455000003970000000000007bd0\"), 0x04c4e1),\n    (hex!(\"0100000000333333334444444455000003980000000000003990\"), 0x04c581),\n    (hex!(\"0100000000333333334444444455000003980000000000004c00\"), 0x04c621),\n    (hex!(\"0100000000333333334444444455000003980000000000005580\"), 0x04c6c1),\n    (hex!(\"01000000003333333344444444550000039900000000000039a0\"), 0x04c761),\n    (hex!(\"0100000000333333334444444455000003990000000000005820\"), 0x04c801),\n    (hex!(\"01000000003333333344444444550000039a00000000000039b0\"), 0x04c8a1),\n    (hex!(\"01000000003333333344444444550000039b00000000000039c0\"), 0x04c941),\n    (hex!(\"01000000003333333344444444550000039b0000000000004c10\"), 0x04c9e1),\n    (hex!(\"01000000003333333344444444550000039b0000000000006460\"), 0x04ca81),\n    (hex!(\"01000000003333333344444444550000039c00000000000039d0\"), 0x04cb21),\n    (hex!(\"01000000003333333344444444550000039d00000000000039e0\"), 0x04cbc1),\n    (hex!(\"01000000003333333344444444550000039d00000000000044c0\"), 0x04cc61),\n    (hex!(\"01000000003333333344444444550000039d00000000000049e0\"), 0x04cd01),\n    (hex!(\"01000000003333333344444444550000039e00000000000039f0\"), 0x04cda1),\n    (hex!(\"01000000003333333344444444550000039f0000000000003a00\"), 0x04ce41),\n    (hex!(\"0100000000333333334444444455000003a00000000000003a10\"), 0x04cee1),\n    (hex!(\"0100000000333333334444444455000003a10000000000003a20\"), 0x04cf81),\n    (hex!(\"0100000000333333334444444455000003a10000000000006a80\"), 0x04d021),\n    (hex!(\"0100000000333333334444444455000003a20000000000003a30\"), 0x04d0c1),\n    (hex!(\"0100000000333333334444444455000003a200000000000062b0\"), 0x04d161),\n    (hex!(\"0100000000333333334444444455000003a30000000000003a40\"), 0x04d201),\n    (hex!(\"0100000000333333334444444455000003a30000000000006ce0\"), 0x04d2a1),\n    (hex!(\"0100000000333333334444444455000003a40000000000003a50\"), 0x04d341),\n    (hex!(\"0100000000333333334444444455000003a50000000000003a60\"), 0x04d3e1),\n    (hex!(\"0100000000333333334444444455000003a60000000000003a70\"), 0x04d481),\n    (hex!(\"0100000000333333334444444455000003a60000000000007750\"), 0x04d521),\n    (hex!(\"0100000000333333334444444455000003a70000000000003a80\"), 0x04d5c1),\n    (hex!(\"0100000000333333334444444455000003a70000000000005b10\"), 0x04d661),\n    (hex!(\"0100000000333333334444444455000003a80000000000003a90\"), 0x04d701),\n    (hex!(\"0100000000333333334444444455000003a80000000000006c20\"), 0x04d7a1),\n    (hex!(\"0100000000333333334444444455000003a90000000000003aa0\"), 0x04d841),\n    (hex!(\"0100000000333333334444444455000003a90000000000005b70\"), 0x04d8e1),\n    (hex!(\"0100000000333333334444444455000003a900000000000070e0\"), 0x04d981),\n    (hex!(\"0100000000333333334444444455000003aa0000000000003ab0\"), 0x04da21),\n    (hex!(\"0100000000333333334444444455000003aa00000000000049f0\"), 0x04dac1),\n    (hex!(\"0100000000333333334444444455000003aa0000000000004d60\"), 0x04db61),\n    (hex!(\"0100000000333333334444444455000003ab0000000000003ac0\"), 0x04dc01),\n    (hex!(\"0100000000333333334444444455000003ac0000000000003ad0\"), 0x04dca1),\n    (hex!(\"0100000000333333334444444455000003ac0000000000004580\"), 0x04dd41),\n    (hex!(\"0100000000333333334444444455000003ad0000000000003ae0\"), 0x04dde1),\n    (hex!(\"0100000000333333334444444455000003ae0000000000003af0\"), 0x04de81),\n    (hex!(\"0100000000333333334444444455000003af0000000000003b00\"), 0x04df21),\n    (hex!(\"0100000000333333334444444455000003b00000000000003b10\"), 0x04dfc1),\n    (hex!(\"0100000000333333334444444455000003b10000000000003b20\"), 0x04e061),\n    (hex!(\"0100000000333333334444444455000003b10000000000003fd0\"), 0x04e101),\n    (hex!(\"0100000000333333334444444455000003b20000000000003b30\"), 0x04e1a1),\n    (hex!(\"0100000000333333334444444455000003b30000000000003b40\"), 0x04e241),\n    (hex!(\"0100000000333333334444444455000003b40000000000003b50\"), 0x04e2e1),\n    (hex!(\"0100000000333333334444444455000003b40000000000007450\"), 0x04e381),\n    (hex!(\"0100000000333333334444444455000003b50000000000003b60\"), 0x04e421),\n    (hex!(\"0100000000333333334444444455000003b60000000000003b70\"), 0x04e4c1),\n    (hex!(\"0100000000333333334444444455000003b70000000000003b80\"), 0x04e561),\n    (hex!(\"0100000000333333334444444455000003b70000000000006d50\"), 0x04e601),\n    (hex!(\"0100000000333333334444444455000003b80000000000003b90\"), 0x04e6a1),\n    (hex!(\"0100000000333333334444444455000003b800000000000057c0\"), 0x04e741),\n    (hex!(\"0100000000333333334444444455000003b800000000000078a0\"), 0x04e7e1),\n    (hex!(\"0100000000333333334444444455000003b90000000000003ba0\"), 0x04e881),\n    (hex!(\"0100000000333333334444444455000003b90000000000006750\"), 0x04e921),\n    (hex!(\"0100000000333333334444444455000003ba0000000000003bb0\"), 0x04e9c1),\n    (hex!(\"0100000000333333334444444455000003ba0000000000007a10\"), 0x04ea61),\n    (hex!(\"0100000000333333334444444455000003ba0000000000007a20\"), 0x04eb01),\n    (hex!(\"0100000000333333334444444455000003bb0000000000003bc0\"), 0x04eba1),\n    (hex!(\"0100000000333333334444444455000003bb0000000000005bc0\"), 0x04ec41),\n    (hex!(\"0100000000333333334444444455000003bc0000000000003bd0\"), 0x04ece1),\n    (hex!(\"0100000000333333334444444455000003bc0000000000005e80\"), 0x04ed81),\n    (hex!(\"0100000000333333334444444455000003bc0000000000007ab0\"), 0x04ee21),\n    (hex!(\"0100000000333333334444444455000003bd0000000000003be0\"), 0x04eec1),\n    (hex!(\"0100000000333333334444444455000003bd00000000000049b0\"), 0x04ef61),\n    (hex!(\"0100000000333333334444444455000003be0000000000003bf0\"), 0x04f001),\n    (hex!(\"0100000000333333334444444455000003be0000000000005780\"), 0x04f0a1),\n    (hex!(\"0100000000333333334444444455000003be0000000000007930\"), 0x04f141),\n    (hex!(\"0100000000333333334444444455000003bf0000000000003c00\"), 0x04f1e1),\n    (hex!(\"0100000000333333334444444455000003bf0000000000005de0\"), 0x04f281),\n    (hex!(\"0100000000333333334444444455000003bf00000000000060b0\"), 0x04f321),\n    (hex!(\"0100000000333333334444444455000003bf00000000000060c0\"), 0x04f3c1),\n    (hex!(\"0100000000333333334444444455000003bf0000000000006a50\"), 0x04f461),\n    (hex!(\"0100000000333333334444444455000003c00000000000003c10\"), 0x04f501),\n    (hex!(\"0100000000333333334444444455000003c00000000000004030\"), 0x04f5a1),\n    (hex!(\"0100000000333333334444444455000003c10000000000003c20\"), 0x04f641),\n    (hex!(\"0100000000333333334444444455000003c20000000000003c30\"), 0x04f6e1),\n    (hex!(\"0100000000333333334444444455000003c200000000000040b0\"), 0x04f781),\n    (hex!(\"0100000000333333334444444455000003c30000000000003c40\"), 0x04f821),\n    (hex!(\"0100000000333333334444444455000003c40000000000003c50\"), 0x04f8c1),\n    (hex!(\"0100000000333333334444444455000003c40000000000005ba0\"), 0x04f961),\n    (hex!(\"0100000000333333334444444455000003c50000000000003c60\"), 0x04fa01),\n    (hex!(\"0100000000333333334444444455000003c60000000000003c70\"), 0x04faa1),\n    (hex!(\"0100000000333333334444444455000003c70000000000003c80\"), 0x04fb41),\n    (hex!(\"0100000000333333334444444455000003c70000000000004270\"), 0x04fbe1),\n    (hex!(\"0100000000333333334444444455000003c80000000000003c90\"), 0x04fc81),\n    (hex!(\"0100000000333333334444444455000003c80000000000006e70\"), 0x04fd21),\n    (hex!(\"0100000000333333334444444455000003c90000000000003ca0\"), 0x04fdc1),\n    (hex!(\"0100000000333333334444444455000003ca0000000000003cb0\"), 0x04fe61),\n    (hex!(\"0100000000333333334444444455000003ca0000000000006e20\"), 0x04ff01),\n    (hex!(\"0100000000333333334444444455000003ca0000000000007c20\"), 0x04ffa1),\n    (hex!(\"0100000000333333334444444455000003cb0000000000003cc0\"), 0x050041),\n    (hex!(\"0100000000333333334444444455000003cc0000000000003cd0\"), 0x0500e1),\n    (hex!(\"0100000000333333334444444455000003cc0000000000006120\"), 0x050181),\n    (hex!(\"0100000000333333334444444455000003cc0000000000007950\"), 0x050221),\n    (hex!(\"0100000000333333334444444455000003cd0000000000003ce0\"), 0x0502c1),\n    (hex!(\"0100000000333333334444444455000003ce0000000000003cf0\"), 0x050361),\n    (hex!(\"0100000000333333334444444455000003cf0000000000003d00\"), 0x050401),\n    (hex!(\"0100000000333333334444444455000003d00000000000003d10\"), 0x0504a1),\n    (hex!(\"0100000000333333334444444455000003d10000000000003d20\"), 0x050541),\n    (hex!(\"0100000000333333334444444455000003d10000000000005e50\"), 0x0505e1),\n    (hex!(\"0100000000333333334444444455000003d10000000000007880\"), 0x050681),\n    (hex!(\"0100000000333333334444444455000003d20000000000003d30\"), 0x050721),\n    (hex!(\"0100000000333333334444444455000003d20000000000005d00\"), 0x0507c1),\n    (hex!(\"0100000000333333334444444455000003d30000000000003d40\"), 0x050861),\n    (hex!(\"0100000000333333334444444455000003d30000000000005d40\"), 0x050901),\n    (hex!(\"0100000000333333334444444455000003d300000000000063f0\"), 0x0509a1),\n    (hex!(\"0100000000333333334444444455000003d40000000000003d50\"), 0x050a41),\n    (hex!(\"0100000000333333334444444455000003d40000000000005700\"), 0x050ae1),\n    (hex!(\"0100000000333333334444444455000003d400000000000078f0\"), 0x050b81),\n    (hex!(\"0100000000333333334444444455000003d50000000000003d60\"), 0x050c21),\n    (hex!(\"0100000000333333334444444455000003d60000000000003d70\"), 0x050cc1),\n    (hex!(\"0100000000333333334444444455000003d70000000000003d80\"), 0x050d61),\n    (hex!(\"0100000000333333334444444455000003d80000000000003d90\"), 0x050e01),\n    (hex!(\"0100000000333333334444444455000003d80000000000006690\"), 0x050ea1),\n    (hex!(\"0100000000333333334444444455000003d90000000000003da0\"), 0x050f41),\n    (hex!(\"0100000000333333334444444455000003d900000000000076d0\"), 0x050fe1),\n    (hex!(\"0100000000333333334444444455000003da0000000000003db0\"), 0x051081),\n    (hex!(\"0100000000333333334444444455000003db0000000000003dc0\"), 0x051121),\n    (hex!(\"0100000000333333334444444455000003db0000000000004a30\"), 0x0511c1),\n    (hex!(\"0100000000333333334444444455000003db0000000000005390\"), 0x051261),\n    (hex!(\"0100000000333333334444444455000003dc0000000000003dd0\"), 0x051301),\n    (hex!(\"0100000000333333334444444455000003dc0000000000006d60\"), 0x0513a1),\n    (hex!(\"0100000000333333334444444455000003dd0000000000003de0\"), 0x051441),\n    (hex!(\"0100000000333333334444444455000003de0000000000003df0\"), 0x0514e1),\n    (hex!(\"0100000000333333334444444455000003df0000000000003e00\"), 0x051581),\n    (hex!(\"0100000000333333334444444455000003df0000000000005240\"), 0x051621),\n    (hex!(\"0100000000333333334444444455000003df0000000000005610\"), 0x0516c1),\n    (hex!(\"0100000000333333334444444455000003e00000000000003e10\"), 0x051761),\n    (hex!(\"0100000000333333334444444455000003e00000000000006500\"), 0x051801),\n    (hex!(\"0100000000333333334444444455000003e10000000000003e20\"), 0x0518a1),\n    (hex!(\"0100000000333333334444444455000003e10000000000006a10\"), 0x051941),\n    (hex!(\"0100000000333333334444444455000003e10000000000007c10\"), 0x0519e1),\n    (hex!(\"0100000000333333334444444455000003e20000000000003e30\"), 0x051a81),\n    (hex!(\"0100000000333333334444444455000003e20000000000006310\"), 0x051b21),\n    (hex!(\"0100000000333333334444444455000003e30000000000003e40\"), 0x051bc1),\n    (hex!(\"0100000000333333334444444455000003e40000000000003e50\"), 0x051c61),\n    (hex!(\"0100000000333333334444444455000003e40000000000006780\"), 0x051d01),\n    (hex!(\"0100000000333333334444444455000003e40000000000007ce0\"), 0x051da1),\n    (hex!(\"0100000000333333334444444455000003e50000000000003e60\"), 0x051e41),\n    (hex!(\"0100000000333333334444444455000003e60000000000003e70\"), 0x051ee1),\n    (hex!(\"0100000000333333334444444455000003e60000000000005040\"), 0x051f81),\n    (hex!(\"0100000000333333334444444455000003e60000000000005bf0\"), 0x052021),\n    (hex!(\"0100000000333333334444444455000003e70000000000003e80\"), 0x0520c1),\n    (hex!(\"0100000000333333334444444455000003e70000000000003f50\"), 0x052161),\n];\n"
  },
  {
    "path": "pageserver/src/tenant/ephemeral_file.rs",
    "content": "//! Implementation of append-only file data structure\n//! used to keep in-memory layers spilled on disk.\n\nuse std::io;\nuse std::sync::Arc;\nuse std::sync::atomic::{AtomicU64, Ordering};\n\nuse camino::Utf8PathBuf;\nuse num_traits::Num;\nuse pageserver_api::shard::TenantShardId;\nuse tokio_epoll_uring::{BoundedBuf, Slice};\nuse tokio_util::sync::CancellationToken;\nuse tracing::{error, info_span};\nuse utils::id::TimelineId;\nuse utils::sync::gate::GateGuard;\n\nuse crate::assert_u64_eq_usize::{U64IsUsize, UsizeIsU64};\nuse crate::config::PageServerConf;\nuse crate::context::RequestContext;\nuse crate::page_cache;\nuse crate::tenant::storage_layer::inmemory_layer::GlobalResourceUnits;\nuse crate::tenant::storage_layer::inmemory_layer::vectored_dio_read::File;\nuse crate::virtual_file::owned_buffers_io::io_buf_aligned::IoBufAlignedMut;\nuse crate::virtual_file::owned_buffers_io::slice::SliceMutExt;\nuse crate::virtual_file::owned_buffers_io::write::{Buffer, FlushTaskError};\nuse crate::virtual_file::{self, IoBufferMut, TempVirtualFile, VirtualFile, owned_buffers_io};\n\nuse self::owned_buffers_io::write::OwnedAsyncWriter;\n\npub struct EphemeralFile {\n    _tenant_shard_id: TenantShardId,\n    _timeline_id: TimelineId,\n    page_cache_file_id: page_cache::FileId,\n    file: TempVirtualFileCoOwnedByEphemeralFileAndBufferedWriter,\n\n    buffered_writer: tokio::sync::RwLock<BufferedWriter>,\n\n    bytes_written: AtomicU64,\n\n    resource_units: std::sync::Mutex<GlobalResourceUnits>,\n}\n\ntype BufferedWriter = owned_buffers_io::write::BufferedWriter<\n    IoBufferMut,\n    TempVirtualFileCoOwnedByEphemeralFileAndBufferedWriter,\n>;\n\n/// A TempVirtualFile that is co-owned by the [`EphemeralFile`]` and [`BufferedWriter`].\n///\n/// (Actually [`BufferedWriter`] internally is just a client to a background flush task.\n/// The co-ownership is between [`EphemeralFile`] and that flush task.)\n///\n/// Co-ownership allows us to serve reads for data that has already been flushed by the [`BufferedWriter`].\n#[derive(Debug, Clone)]\nstruct TempVirtualFileCoOwnedByEphemeralFileAndBufferedWriter {\n    inner: Arc<TempVirtualFile>,\n}\n\nconst TAIL_SZ: usize = 64 * 1024;\n\nimpl EphemeralFile {\n    pub async fn create(\n        conf: &PageServerConf,\n        tenant_shard_id: TenantShardId,\n        timeline_id: TimelineId,\n        gate: &utils::sync::gate::Gate,\n        cancel: &CancellationToken,\n        ctx: &RequestContext,\n    ) -> anyhow::Result<EphemeralFile> {\n        // TempVirtualFile requires us to never reuse a filename while an old\n        // instance of TempVirtualFile created with that filename is not done dropping yet.\n        // So, we use a monotonic counter to disambiguate the filenames.\n        static NEXT_TEMP_DISAMBIGUATOR: AtomicU64 = AtomicU64::new(1);\n        let filename_disambiguator =\n            NEXT_TEMP_DISAMBIGUATOR.fetch_add(1, std::sync::atomic::Ordering::Relaxed);\n\n        let filename = conf\n            .timeline_path(&tenant_shard_id, &timeline_id)\n            .join(Utf8PathBuf::from(format!(\n                \"ephemeral-{filename_disambiguator}\"\n            )));\n\n        let file = TempVirtualFileCoOwnedByEphemeralFileAndBufferedWriter::new(\n            VirtualFile::open_with_options_v2(\n                &filename,\n                virtual_file::OpenOptions::new()\n                    .create_new(true)\n                    .read(true)\n                    .write(true),\n                ctx,\n            )\n            .await?,\n            gate.enter()?,\n        );\n\n        let page_cache_file_id = page_cache::next_file_id(); // XXX get rid, we're not page-caching anymore\n\n        Ok(EphemeralFile {\n            _tenant_shard_id: tenant_shard_id,\n            _timeline_id: timeline_id,\n            page_cache_file_id,\n            file: file.clone(),\n            buffered_writer: tokio::sync::RwLock::new(BufferedWriter::new(\n                file,\n                0,\n                || IoBufferMut::with_capacity(TAIL_SZ),\n                gate.enter()?,\n                cancel.child_token(),\n                ctx,\n                info_span!(parent: None, \"ephemeral_file_buffered_writer\", tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(), timeline_id=%timeline_id, path = %filename),\n            )),\n            bytes_written: AtomicU64::new(0),\n            resource_units: std::sync::Mutex::new(GlobalResourceUnits::new()),\n        })\n    }\n}\n\nimpl TempVirtualFileCoOwnedByEphemeralFileAndBufferedWriter {\n    fn new(file: VirtualFile, gate_guard: GateGuard) -> Self {\n        Self {\n            inner: Arc::new(TempVirtualFile::new(file, gate_guard)),\n        }\n    }\n}\n\nimpl OwnedAsyncWriter for TempVirtualFileCoOwnedByEphemeralFileAndBufferedWriter {\n    fn write_all_at<Buf: owned_buffers_io::io_buf_aligned::IoBufAligned + Send>(\n        &self,\n        buf: owned_buffers_io::io_buf_ext::FullSlice<Buf>,\n        offset: u64,\n        ctx: &RequestContext,\n    ) -> impl std::future::Future<\n        Output = (\n            owned_buffers_io::io_buf_ext::FullSlice<Buf>,\n            std::io::Result<()>,\n        ),\n    > + Send {\n        self.inner.write_all_at(buf, offset, ctx)\n    }\n\n    fn set_len(\n        &self,\n        len: u64,\n        ctx: &RequestContext,\n    ) -> impl Future<Output = std::io::Result<()>> + Send {\n        self.inner.set_len(len, ctx)\n    }\n}\n\nimpl std::ops::Deref for TempVirtualFileCoOwnedByEphemeralFileAndBufferedWriter {\n    type Target = VirtualFile;\n\n    fn deref(&self) -> &Self::Target {\n        &self.inner\n    }\n}\n\n#[derive(Debug, thiserror::Error)]\npub(crate) enum EphemeralFileWriteError {\n    #[error(\"cancelled\")]\n    Cancelled,\n}\n\nimpl EphemeralFile {\n    pub(crate) fn len(&self) -> u64 {\n        // TODO(vlad): The value returned here is not always correct if\n        // we have more than one concurrent writer. Writes are always\n        // sequenced, but we could grab the buffered writer lock if we wanted\n        // to.\n        self.bytes_written.load(Ordering::Acquire)\n    }\n\n    pub(crate) fn page_cache_file_id(&self) -> page_cache::FileId {\n        self.page_cache_file_id\n    }\n\n    pub(crate) async fn load_to_io_buf(\n        &self,\n        ctx: &RequestContext,\n    ) -> Result<IoBufferMut, io::Error> {\n        let size = self.len().into_usize();\n        let buf = IoBufferMut::with_capacity(size);\n        let (slice, nread) = self.read_exact_at_eof_ok(0, buf.slice_full(), ctx).await?;\n        assert_eq!(nread, size);\n        let buf = slice.into_inner();\n        assert_eq!(buf.len(), nread);\n        assert_eq!(buf.capacity(), size, \"we shouldn't be reallocating\");\n        Ok(buf)\n    }\n\n    /// Returns the offset at which the first byte of the input was written, for use\n    /// in constructing indices over the written value.\n    ///\n    /// Panics if the write is short because there's no way we can recover from that.\n    /// TODO: make upstack handle this as an error.\n    pub(crate) async fn write_raw(\n        &self,\n        srcbuf: &[u8],\n        ctx: &RequestContext,\n    ) -> Result<u64, EphemeralFileWriteError> {\n        let (pos, control) = self.write_raw_controlled(srcbuf, ctx).await?;\n        if let Some(control) = control {\n            control.release().await;\n        }\n        Ok(pos)\n    }\n\n    async fn write_raw_controlled(\n        &self,\n        srcbuf: &[u8],\n        ctx: &RequestContext,\n    ) -> Result<(u64, Option<owned_buffers_io::write::FlushControl>), EphemeralFileWriteError> {\n        let mut writer = self.buffered_writer.write().await;\n\n        let (nwritten, control) = writer\n            .write_buffered_borrowed_controlled(srcbuf, ctx)\n            .await\n            .map_err(|e| match e {\n                FlushTaskError::Cancelled => EphemeralFileWriteError::Cancelled,\n            })?;\n        assert_eq!(\n            nwritten,\n            srcbuf.len(),\n            \"buffered writer has no short writes\"\n        );\n\n        // There's no realistic risk of overflow here. We won't have exabytes sized files on disk.\n        let pos = self\n            .bytes_written\n            .fetch_add(srcbuf.len().into_u64(), Ordering::AcqRel);\n\n        let mut resource_units = self.resource_units.lock().unwrap();\n        resource_units.maybe_publish_size(self.bytes_written.load(Ordering::Relaxed));\n\n        Ok((pos, control))\n    }\n\n    pub(crate) fn tick(&self) -> Option<u64> {\n        let mut resource_units = self.resource_units.lock().unwrap();\n        let len = self.bytes_written.load(Ordering::Relaxed);\n        resource_units.publish_size(len)\n    }\n}\n\nimpl super::storage_layer::inmemory_layer::vectored_dio_read::File for EphemeralFile {\n    async fn read_exact_at_eof_ok<B: IoBufAlignedMut + Send>(\n        &self,\n        start: u64,\n        mut dst: tokio_epoll_uring::Slice<B>,\n        ctx: &RequestContext,\n    ) -> std::io::Result<(tokio_epoll_uring::Slice<B>, usize)> {\n        // We will fill the slice in back to front. Hence, we need\n        // the slice to be fully initialized.\n        // TODO(vlad): Is there a nicer way of doing this?\n        dst.as_mut_rust_slice_full_zeroed();\n\n        let writer = self.buffered_writer.read().await;\n\n        // Read bytes written while under lock. This is a hack to deal with concurrent\n        // writes updating the number of bytes written. `bytes_written` is not DIO alligned\n        // but we may end the read there.\n        //\n        // TODO(vlad): Feels like there's a nicer path where we align the end if it\n        // shoots over the end of the file.\n        let bytes_written = self.bytes_written.load(Ordering::Acquire);\n\n        let dst_cap = dst.bytes_total().into_u64();\n        let end = {\n            // saturating_add is correct here because the max file size is u64::MAX, so,\n            // if start + dst.len() > u64::MAX, then we know it will be a short read\n            let mut end: u64 = start.saturating_add(dst_cap);\n            if end > bytes_written {\n                end = bytes_written;\n            }\n            end\n        };\n\n        let submitted_offset = writer.bytes_submitted();\n        let maybe_flushed = writer.inspect_maybe_flushed();\n\n        let mutable = match writer.inspect_mutable() {\n            Some(mutable) => &mutable[0..mutable.pending()],\n            None => {\n                // Timeline::cancel and hence buffered writer flush was cancelled.\n                // Remain read-available while timeline is shutting down.\n                &[]\n            }\n        };\n\n        // inclusive, exclusive\n        #[derive(Debug)]\n        struct Range<N>(N, N);\n        impl<N: Num + Clone + Copy + PartialOrd + Ord> Range<N> {\n            fn len(&self) -> N {\n                if self.0 > self.1 {\n                    N::zero()\n                } else {\n                    self.1 - self.0\n                }\n            }\n        }\n\n        let (written_range, maybe_flushed_range) = {\n            if maybe_flushed.is_some() {\n                // [       written       ][ maybe_flushed ][    mutable    ]\n                //                                         ^\n                //                                 `submitted_offset`\n                // <++++++ on disk +++++++????????????????>\n                (\n                    Range(\n                        start,\n                        std::cmp::min(end, submitted_offset.saturating_sub(TAIL_SZ as u64)),\n                    ),\n                    Range(\n                        std::cmp::max(start, submitted_offset.saturating_sub(TAIL_SZ as u64)),\n                        std::cmp::min(end, submitted_offset),\n                    ),\n                )\n            } else {\n                // [       written                        ][    mutable    ]\n                //                                         ^\n                //                                 `submitted_offset`\n                // <++++++ on disk +++++++++++++++++++++++>\n                (\n                    Range(start, std::cmp::min(end, submitted_offset)),\n                    // zero len\n                    Range(submitted_offset, u64::MIN),\n                )\n            }\n        };\n\n        let mutable_range = Range(std::cmp::max(start, submitted_offset), end);\n\n        // There are three sources from which we might have to read data:\n        // 1. The file itself\n        // 2. The buffer which contains changes currently being flushed\n        // 3. The buffer which contains chnages yet to be flushed\n        //\n        // For better concurrency, we do them in reverse order: perform the in-memory\n        // reads while holding the writer lock, drop the writer lock and read from the\n        // file if required.\n\n        let dst = if mutable_range.len() > 0 {\n            let offset_in_buffer = mutable_range\n                .0\n                .checked_sub(submitted_offset)\n                .unwrap()\n                .into_usize();\n            let to_copy =\n                &mutable[offset_in_buffer..(offset_in_buffer + mutable_range.len().into_usize())];\n            let bounds = dst.bounds();\n            let mut view = dst.slice({\n                let start =\n                    written_range.len().into_usize() + maybe_flushed_range.len().into_usize();\n                let end = start.checked_add(mutable_range.len().into_usize()).unwrap();\n                start..end\n            });\n            view.as_mut_rust_slice_full_zeroed()\n                .copy_from_slice(to_copy);\n            Slice::from_buf_bounds(Slice::into_inner(view), bounds)\n        } else {\n            dst\n        };\n\n        let dst = if maybe_flushed_range.len() > 0 {\n            let offset_in_buffer = maybe_flushed_range\n                .0\n                .checked_sub(submitted_offset.saturating_sub(TAIL_SZ as u64))\n                .unwrap()\n                .into_usize();\n            // Checked previously the buffer is Some.\n            let maybe_flushed = maybe_flushed.unwrap();\n            let to_copy = &maybe_flushed\n                [offset_in_buffer..(offset_in_buffer + maybe_flushed_range.len().into_usize())];\n            let bounds = dst.bounds();\n            let mut view = dst.slice({\n                let start = written_range.len().into_usize();\n                let end = start\n                    .checked_add(maybe_flushed_range.len().into_usize())\n                    .unwrap();\n                start..end\n            });\n            view.as_mut_rust_slice_full_zeroed()\n                .copy_from_slice(to_copy);\n            Slice::from_buf_bounds(Slice::into_inner(view), bounds)\n        } else {\n            dst\n        };\n\n        drop(writer);\n\n        let dst = if written_range.len() > 0 {\n            let bounds = dst.bounds();\n            let slice = self\n                .file\n                .read_exact_at(dst.slice(0..written_range.len().into_usize()), start, ctx)\n                .await?;\n            Slice::from_buf_bounds(Slice::into_inner(slice), bounds)\n        } else {\n            dst\n        };\n\n        // TODO: in debug mode, randomize the remaining bytes in `dst` to catch bugs\n\n        Ok((dst, (end - start).into_usize()))\n    }\n}\n\n/// Does the given filename look like an ephemeral file?\npub fn is_ephemeral_file(filename: &str) -> bool {\n    if let Some(rest) = filename.strip_prefix(\"ephemeral-\") {\n        rest.parse::<u32>().is_ok()\n    } else {\n        false\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use std::fs;\n    use std::str::FromStr;\n\n    use rand::Rng;\n\n    use super::*;\n    use crate::context::DownloadBehavior;\n    use crate::task_mgr::TaskKind;\n\n    fn harness(\n        test_name: &str,\n    ) -> Result<\n        (\n            &'static PageServerConf,\n            TenantShardId,\n            TimelineId,\n            RequestContext,\n        ),\n        io::Error,\n    > {\n        let repo_dir = PageServerConf::test_repo_dir(test_name);\n        let _ = fs::remove_dir_all(&repo_dir);\n        let conf = PageServerConf::dummy_conf(repo_dir);\n        // Make a static copy of the config. This can never be free'd, but that's\n        // OK in a test.\n        let conf: &'static PageServerConf = Box::leak(Box::new(conf));\n\n        let tenant_shard_id = TenantShardId::from_str(\"11000000000000000000000000000000\").unwrap();\n        let timeline_id = TimelineId::from_str(\"22000000000000000000000000000000\").unwrap();\n        fs::create_dir_all(conf.timeline_path(&tenant_shard_id, &timeline_id))?;\n\n        let ctx =\n            RequestContext::new(TaskKind::UnitTest, DownloadBehavior::Error).with_scope_unit_test();\n\n        Ok((conf, tenant_shard_id, timeline_id, ctx))\n    }\n\n    #[tokio::test]\n    async fn ephemeral_file_holds_gate_open() {\n        const FOREVER: std::time::Duration = std::time::Duration::from_secs(5);\n\n        let (conf, tenant_id, timeline_id, ctx) =\n            harness(\"ephemeral_file_holds_gate_open\").unwrap();\n\n        let gate = utils::sync::gate::Gate::default();\n        let cancel = CancellationToken::new();\n\n        let file = EphemeralFile::create(conf, tenant_id, timeline_id, &gate, &cancel, &ctx)\n            .await\n            .unwrap();\n\n        let mut closing = tokio::task::spawn(async move {\n            gate.close().await;\n        });\n\n        // gate is entered until the ephemeral file is dropped\n        // do not start paused tokio-epoll-uring has a sleep loop\n        tokio::time::pause();\n        tokio::time::timeout(FOREVER, &mut closing)\n            .await\n            .expect_err(\"closing cannot complete before dropping\");\n\n        // this is a requirement of the reset_tenant functionality: we have to be able to restart a\n        // tenant fast, and for that, we need all tenant_dir operations be guarded by entering a gate\n        drop(file);\n\n        tokio::time::timeout(FOREVER, &mut closing)\n            .await\n            .expect(\"closing completes right away\")\n            .expect(\"closing does not panic\");\n    }\n\n    #[tokio::test]\n    async fn test_ephemeral_file_basics() {\n        let (conf, tenant_id, timeline_id, ctx) = harness(\"test_ephemeral_file_basics\").unwrap();\n\n        let gate = utils::sync::gate::Gate::default();\n        let cancel = CancellationToken::new();\n\n        let file = EphemeralFile::create(conf, tenant_id, timeline_id, &gate, &cancel, &ctx)\n            .await\n            .unwrap();\n\n        let writer = file.buffered_writer.read().await;\n        let mutable = writer.mutable();\n        let cap = mutable.capacity();\n        let align = mutable.align();\n        drop(writer);\n\n        let write_nbytes = cap * 2 + cap / 2;\n\n        let content: Vec<u8> = rand::rng()\n            .sample_iter(rand::distr::StandardUniform)\n            .take(write_nbytes)\n            .collect();\n\n        let mut value_offsets = Vec::new();\n        for range in (0..write_nbytes)\n            .step_by(align)\n            .map(|start| start..(start + align).min(write_nbytes))\n        {\n            let off = file.write_raw(&content[range], &ctx).await.unwrap();\n            value_offsets.push(off);\n        }\n\n        assert_eq!(file.len() as usize, write_nbytes);\n        for (i, range) in (0..write_nbytes)\n            .step_by(align)\n            .map(|start| start..(start + align).min(write_nbytes))\n            .enumerate()\n        {\n            assert_eq!(value_offsets[i], range.start.into_u64());\n            let buf = IoBufferMut::with_capacity(range.len());\n            let (buf_slice, nread) = file\n                .read_exact_at_eof_ok(range.start.into_u64(), buf.slice_full(), &ctx)\n                .await\n                .unwrap();\n            let buf = buf_slice.into_inner();\n            assert_eq!(nread, range.len());\n            assert_eq!(&buf, &content[range]);\n        }\n\n        let file_contents = std::fs::read(file.file.path()).unwrap();\n        assert!(file_contents == content[0..cap * 2]);\n\n        let writer = file.buffered_writer.read().await;\n        let maybe_flushed_buffer_contents = writer.inspect_maybe_flushed().unwrap();\n        assert_eq!(&maybe_flushed_buffer_contents[..], &content[cap..cap * 2]);\n\n        let mutable_buffer_contents = writer.mutable();\n        assert_eq!(mutable_buffer_contents, &content[cap * 2..write_nbytes]);\n    }\n\n    #[tokio::test]\n    async fn test_flushes_do_happen() {\n        let (conf, tenant_id, timeline_id, ctx) = harness(\"test_flushes_do_happen\").unwrap();\n\n        let gate = utils::sync::gate::Gate::default();\n        let cancel = CancellationToken::new();\n        let file = EphemeralFile::create(conf, tenant_id, timeline_id, &gate, &cancel, &ctx)\n            .await\n            .unwrap();\n\n        // mutable buffer and maybe_flushed buffer each has `cap` bytes.\n        let writer = file.buffered_writer.read().await;\n        let cap = writer.mutable().capacity();\n        drop(writer);\n\n        let content: Vec<u8> = rand::rng()\n            .sample_iter(rand::distr::StandardUniform)\n            .take(cap * 2 + cap / 2)\n            .collect();\n\n        file.write_raw(&content, &ctx).await.unwrap();\n\n        // assert the state is as this test expects it to be\n        let load_io_buf_res = file.load_to_io_buf(&ctx).await.unwrap();\n        assert_eq!(&load_io_buf_res[..], &content[0..cap * 2 + cap / 2]);\n        let md = file.file.path().metadata().unwrap();\n        assert_eq!(\n            md.len(),\n            2 * cap.into_u64(),\n            \"buffered writer requires one write to be flushed if we write 2.5x buffer capacity\"\n        );\n        let writer = file.buffered_writer.read().await;\n        assert_eq!(\n            &writer.inspect_maybe_flushed().unwrap()[0..cap],\n            &content[cap..cap * 2]\n        );\n        assert_eq!(\n            &writer.mutable()[0..cap / 2],\n            &content[cap * 2..cap * 2 + cap / 2]\n        );\n    }\n\n    #[tokio::test]\n    async fn test_read_split_across_file_and_buffer() {\n        // This test exercises the logic on the read path that splits the logical read\n        // into a read from the flushed part (= the file) and a copy from the buffered writer's buffer.\n        //\n        // This test build on the assertions in test_flushes_do_happen\n\n        let (conf, tenant_id, timeline_id, ctx) =\n            harness(\"test_read_split_across_file_and_buffer\").unwrap();\n\n        let gate = utils::sync::gate::Gate::default();\n        let cancel = CancellationToken::new();\n\n        let file = EphemeralFile::create(conf, tenant_id, timeline_id, &gate, &cancel, &ctx)\n            .await\n            .unwrap();\n\n        let writer = file.buffered_writer.read().await;\n        let mutable = writer.mutable();\n        let cap = mutable.capacity();\n        let align = mutable.align();\n        drop(writer);\n        let content: Vec<u8> = rand::rng()\n            .sample_iter(rand::distr::StandardUniform)\n            .take(cap * 2 + cap / 2)\n            .collect();\n\n        let (_, control) = file.write_raw_controlled(&content, &ctx).await.unwrap();\n\n        let test_read = |start: usize, len: usize| {\n            let file = &file;\n            let ctx = &ctx;\n            let content = &content;\n            async move {\n                let (buf, nread) = file\n                    .read_exact_at_eof_ok(\n                        start.into_u64(),\n                        IoBufferMut::with_capacity(len).slice_full(),\n                        ctx,\n                    )\n                    .await\n                    .unwrap();\n                assert_eq!(nread, len);\n                assert_eq!(&buf.into_inner(), &content[start..(start + len)]);\n            }\n        };\n\n        let test_read_all_offset_combinations = || {\n            async move {\n                test_read(align, align).await;\n                // border onto edge of file\n                test_read(cap - align, align).await;\n                // read across file and buffer\n                test_read(cap - align, 2 * align).await;\n                // stay from start of maybe flushed buffer\n                test_read(cap, align).await;\n                // completely within maybe flushed buffer\n                test_read(cap + align, align).await;\n                // border onto edge of maybe flushed buffer.\n                test_read(cap * 2 - align, align).await;\n                // read across maybe flushed and mutable buffer\n                test_read(cap * 2 - align, 2 * align).await;\n                // read across three segments\n                test_read(cap - align, cap + 2 * align).await;\n                // completely within mutable buffer\n                test_read(cap * 2 + align, align).await;\n            }\n        };\n\n        // completely within the file range\n        assert!(align < cap, \"test assumption\");\n        assert!(cap % align == 0);\n\n        // test reads at different flush stages.\n        let not_started = control.unwrap().into_not_started();\n        test_read_all_offset_combinations().await;\n        let in_progress = not_started.ready_to_flush();\n        test_read_all_offset_combinations().await;\n        in_progress.wait_until_flush_is_done().await;\n        test_read_all_offset_combinations().await;\n    }\n}\n"
  },
  {
    "path": "pageserver/src/tenant/gc_block.rs",
    "content": "use std::collections::HashMap;\nuse std::sync::Arc;\n\nuse utils::id::TimelineId;\n\nuse super::remote_timeline_client::index::GcBlockingReason;\n\ntype Storage = HashMap<TimelineId, enumset::EnumSet<GcBlockingReason>>;\n\n/// GcBlock provides persistent (per-timeline) gc blocking.\n#[derive(Default)]\npub(crate) struct GcBlock {\n    /// The timelines which have current reasons to block gc.\n    ///\n    /// LOCK ORDER: this is held locked while scheduling the next index_part update. This is done\n    /// to keep the this field up to date with RemoteTimelineClient `upload_queue.dirty`.\n    reasons: std::sync::Mutex<Storage>,\n\n    /// GC background task or manually run `Tenant::gc_iteration` holds a lock on this.\n    ///\n    /// Do not add any more features taking and forbidding taking this lock. It should be\n    /// `tokio::sync::Notify`, but that is rarely used. On the other side, [`GcBlock::insert`]\n    /// synchronizes with gc attempts by locking and unlocking this mutex.\n    blocking: Arc<tokio::sync::Mutex<()>>,\n}\n\nimpl GcBlock {\n    /// Start another gc iteration.\n    ///\n    /// Returns a guard to be held for the duration of gc iteration to allow synchronizing with\n    /// it's ending, or if not currently possible, a value describing the reasons why not.\n    ///\n    /// Cancellation safe.\n    pub(super) async fn start(&self) -> Result<Guard, BlockingReasons> {\n        let reasons = {\n            let g = self.reasons.lock().unwrap();\n\n            // TODO: the assumption is that this method gets called periodically. in prod, we use 1h, in\n            // tests, we use everything. we should warn if the gc has been consecutively blocked\n            // for more than 1h (within single tenant session?).\n            BlockingReasons::clean_and_summarize(g)\n        };\n\n        if let Some(reasons) = reasons {\n            Err(reasons)\n        } else {\n            Ok(Guard {\n                _inner: self.blocking.clone().lock_owned().await,\n            })\n        }\n    }\n\n    /// Describe the current gc blocking reasons.\n    ///\n    /// TODO: make this json serializable.\n    pub(crate) fn summary(&self) -> Option<BlockingReasons> {\n        let g = self.reasons.lock().unwrap();\n\n        BlockingReasons::summarize(&g)\n    }\n\n    /// Start blocking gc for this one timeline for the given reason.\n    ///\n    /// This is not a guard based API but instead it mimics set API. The returned future will not\n    /// resolve until an existing gc round has completed.\n    ///\n    /// Returns true if this block was new, false if gc was already blocked for this reason.\n    ///\n    /// Cancellation safe: cancelling after first poll will keep the reason to block gc, but will\n    /// keep the gc blocking reason.\n    pub(crate) async fn insert(\n        &self,\n        timeline: &super::Timeline,\n        reason: GcBlockingReason,\n    ) -> anyhow::Result<bool> {\n        let (added, uploaded) = {\n            let mut g = self.reasons.lock().unwrap();\n            let set = g.entry(timeline.timeline_id).or_default();\n            let added = set.insert(reason);\n\n            // LOCK ORDER: intentionally hold the lock, see self.reasons.\n            let uploaded = timeline\n                .remote_client\n                .schedule_insert_gc_block_reason(reason)?;\n\n            (added, uploaded)\n        };\n\n        uploaded.await?;\n\n        // ensure that any ongoing gc iteration has completed\n        drop(self.blocking.lock().await);\n\n        Ok(added)\n    }\n\n    /// Remove blocking gc for this one timeline and the given reason.\n    pub(crate) async fn remove(\n        &self,\n        timeline: &super::Timeline,\n        reason: GcBlockingReason,\n    ) -> anyhow::Result<()> {\n        use std::collections::hash_map::Entry;\n\n        super::span::debug_assert_current_span_has_tenant_and_timeline_id();\n\n        let (remaining_blocks, uploaded) = {\n            let mut g = self.reasons.lock().unwrap();\n            match g.entry(timeline.timeline_id) {\n                Entry::Occupied(mut oe) => {\n                    let set = oe.get_mut();\n                    set.remove(reason);\n                    if set.is_empty() {\n                        oe.remove();\n                    }\n                }\n                Entry::Vacant(_) => {\n                    // we must still do the index_part.json update regardless, in case we had earlier\n                    // been cancelled\n                }\n            }\n\n            let remaining_blocks = g.len();\n\n            // LOCK ORDER: intentionally hold the lock while scheduling; see self.reasons\n            let uploaded = timeline\n                .remote_client\n                .schedule_remove_gc_block_reason(reason)?;\n\n            (remaining_blocks, uploaded)\n        };\n        uploaded.await?;\n\n        // no need to synchronize with gc iteration again\n\n        if remaining_blocks > 0 {\n            tracing::info!(remaining_blocks, removed=?reason, \"gc blocking removed, but gc remains blocked\");\n        } else {\n            tracing::info!(\"gc is now unblocked for the tenant\");\n        }\n\n        Ok(())\n    }\n\n    pub(crate) fn before_delete(&self, timeline_id: &super::TimelineId) {\n        let unblocked = {\n            let mut g = self.reasons.lock().unwrap();\n            if g.is_empty() {\n                return;\n            }\n\n            g.remove(timeline_id);\n\n            BlockingReasons::clean_and_summarize(g).is_none()\n        };\n\n        if unblocked {\n            tracing::info!(\"gc is now unblocked following deletion\");\n        }\n    }\n\n    /// Initialize with the non-deleted timelines of this tenant.\n    pub(crate) fn set_scanned(&self, scanned: Storage) {\n        let mut g = self.reasons.lock().unwrap();\n        assert!(g.is_empty());\n        g.extend(scanned.into_iter().filter(|(_, v)| !v.is_empty()));\n\n        if let Some(reasons) = BlockingReasons::clean_and_summarize(g) {\n            tracing::info!(summary=?reasons, \"initialized with gc blocked\");\n        }\n    }\n}\n\npub(crate) struct Guard {\n    _inner: tokio::sync::OwnedMutexGuard<()>,\n}\n\n#[derive(Debug)]\npub(crate) struct BlockingReasons {\n    timelines: usize,\n    reasons: enumset::EnumSet<GcBlockingReason>,\n}\n\nimpl std::fmt::Display for BlockingReasons {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        write!(\n            f,\n            \"{} timelines block for {:?}\",\n            self.timelines, self.reasons\n        )\n    }\n}\n\nimpl BlockingReasons {\n    fn clean_and_summarize(mut g: std::sync::MutexGuard<'_, Storage>) -> Option<Self> {\n        let mut reasons = enumset::EnumSet::empty();\n        g.retain(|_key, value| {\n            reasons = reasons.union(*value);\n            !value.is_empty()\n        });\n        if !g.is_empty() {\n            Some(BlockingReasons {\n                timelines: g.len(),\n                reasons,\n            })\n        } else {\n            None\n        }\n    }\n\n    fn summarize(g: &std::sync::MutexGuard<'_, Storage>) -> Option<Self> {\n        if g.is_empty() {\n            None\n        } else {\n            let reasons = g\n                .values()\n                .fold(enumset::EnumSet::empty(), |acc, next| acc.union(*next));\n            Some(BlockingReasons {\n                timelines: g.len(),\n                reasons,\n            })\n        }\n    }\n}\n"
  },
  {
    "path": "pageserver/src/tenant/gc_result.rs",
    "content": "use std::ops::AddAssign;\nuse std::time::Duration;\n\nuse anyhow::Result;\nuse serde::Serialize;\n\n///\n/// Result of performing GC\n///\n#[derive(Default, Serialize, Debug)]\npub struct GcResult {\n    pub layers_total: u64,\n    pub layers_needed_by_cutoff: u64,\n    pub layers_needed_by_pitr: u64,\n    pub layers_needed_by_branches: u64,\n    pub layers_needed_by_leases: u64,\n    pub layers_not_updated: u64,\n    pub layers_removed: u64, // # of layer files removed because they have been made obsolete by newer ondisk files.\n\n    #[serde(serialize_with = \"serialize_duration_as_millis\")]\n    pub elapsed: Duration,\n\n    /// The layers which were garbage collected.\n    ///\n    /// Used in `/v1/tenant/:tenant_id/timeline/:timeline_id/do_gc` to wait for the layers to be\n    /// dropped in tests.\n    #[cfg(feature = \"testing\")]\n    #[serde(skip)]\n    pub(crate) doomed_layers: Vec<crate::tenant::storage_layer::Layer>,\n}\n\n// helper function for `GcResult`, serializing a `Duration` as an integer number of milliseconds\nfn serialize_duration_as_millis<S>(d: &Duration, serializer: S) -> Result<S::Ok, S::Error>\nwhere\n    S: serde::Serializer,\n{\n    d.as_millis().serialize(serializer)\n}\n\nimpl AddAssign for GcResult {\n    fn add_assign(&mut self, other: Self) {\n        self.layers_total += other.layers_total;\n        self.layers_needed_by_pitr += other.layers_needed_by_pitr;\n        self.layers_needed_by_cutoff += other.layers_needed_by_cutoff;\n        self.layers_needed_by_branches += other.layers_needed_by_branches;\n        self.layers_needed_by_leases += other.layers_needed_by_leases;\n        self.layers_not_updated += other.layers_not_updated;\n        self.layers_removed += other.layers_removed;\n\n        self.elapsed += other.elapsed;\n\n        #[cfg(feature = \"testing\")]\n        {\n            let mut other = other;\n            self.doomed_layers.append(&mut other.doomed_layers);\n        }\n    }\n}\n"
  },
  {
    "path": "pageserver/src/tenant/layer_map/historic_layer_coverage.rs",
    "content": "use std::collections::BTreeMap;\nuse std::ops::Range;\n\nuse tracing::info;\n\nuse super::layer_coverage::LayerCoverageTuple;\nuse crate::tenant::storage_layer::PersistentLayerDesc;\n\n/// Layers in this module are identified and indexed by this data.\n///\n/// This is a helper struct to enable sorting layers by lsn.start.\n///\n/// These three values are enough to uniquely identify a layer, since\n/// a layer is obligated to contain all contents within range, so two\n/// deltas (or images) with the same range have identical content.\n#[derive(Debug, PartialEq, Eq, Clone)]\npub struct LayerKey {\n    // TODO I use i128 and u64 because it was easy for prototyping,\n    //      testing, and benchmarking. If we can use the Lsn and Key\n    //      types without overhead that would be preferable.\n    pub key: Range<i128>,\n    pub lsn: Range<u64>,\n    pub is_image: bool,\n}\n\nimpl PartialOrd for LayerKey {\n    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {\n        Some(self.cmp(other))\n    }\n}\n\nimpl Ord for LayerKey {\n    fn cmp(&self, other: &Self) -> std::cmp::Ordering {\n        // NOTE we really care about comparing by lsn.start first\n        self.lsn\n            .start\n            .cmp(&other.lsn.start)\n            .then(self.lsn.end.cmp(&other.lsn.end))\n            .then(self.key.start.cmp(&other.key.start))\n            .then(self.key.end.cmp(&other.key.end))\n            .then(self.is_image.cmp(&other.is_image))\n    }\n}\n\nimpl From<&PersistentLayerDesc> for LayerKey {\n    fn from(layer: &PersistentLayerDesc) -> Self {\n        let kr = layer.get_key_range();\n        let lr = layer.get_lsn_range();\n        LayerKey {\n            key: kr.start.to_i128()..kr.end.to_i128(),\n            lsn: lr.start.0..lr.end.0,\n            is_image: !layer.is_incremental(),\n        }\n    }\n}\n\n/// Efficiently queryable layer coverage for each LSN.\n///\n/// Allows answering layer map queries very efficiently,\n/// but doesn't allow retroactive insertion, which is\n/// sometimes necessary. See BufferedHistoricLayerCoverage.\npub struct HistoricLayerCoverage<Value> {\n    /// The latest state\n    head: LayerCoverageTuple<Value>,\n\n    /// TODO: this could be an ordered vec using binary search.\n    /// We push into this map everytime we add a layer, so might see some benefit\n    /// All previous states\n    historic: BTreeMap<u64, LayerCoverageTuple<Value>>,\n}\n\nimpl<T: Clone> Default for HistoricLayerCoverage<T> {\n    fn default() -> Self {\n        Self::new()\n    }\n}\n\nimpl<Value: Clone> HistoricLayerCoverage<Value> {\n    pub fn new() -> Self {\n        Self {\n            head: LayerCoverageTuple::default(),\n            historic: BTreeMap::default(),\n        }\n    }\n\n    /// Add a layer\n    ///\n    /// Panics if new layer has older lsn.start than an existing layer.\n    /// See BufferedHistoricLayerCoverage for a more general insertion method.\n    pub fn insert(&mut self, layer_key: LayerKey, value: Value) {\n        // It's only a persistent map, not a retroactive one\n        if let Some(last_entry) = self.historic.iter().next_back() {\n            let last_lsn = last_entry.0;\n            if layer_key.lsn.start < *last_lsn {\n                panic!(\"unexpected retroactive insert\");\n            }\n        }\n\n        // Insert into data structure\n        let target = if layer_key.is_image {\n            &mut self.head.image_coverage\n        } else {\n            &mut self.head.delta_coverage\n        };\n\n        target.insert(layer_key.key, layer_key.lsn.clone(), value);\n\n        // Remember history. Clone is O(1)\n        self.historic.insert(layer_key.lsn.start, self.head.clone());\n    }\n\n    /// Query at a particular LSN, inclusive\n    pub fn get_version(&self, lsn: u64) -> Option<&LayerCoverageTuple<Value>> {\n        match self.historic.range(..=lsn).next_back() {\n            Some((_, v)) => Some(v),\n            None => None,\n        }\n    }\n\n    /// Remove all entries after a certain LSN (inclusive)\n    pub fn trim(&mut self, begin: &u64) {\n        self.historic.split_off(begin);\n        self.head = self\n            .historic\n            .iter()\n            .next_back()\n            .map(|(_, v)| v.clone())\n            .unwrap_or_default();\n    }\n}\n\n/// This is the most basic test that demonstrates intended usage.\n/// All layers in this test have height 1.\n#[test]\nfn test_persistent_simple() {\n    let mut map = HistoricLayerCoverage::<String>::new();\n    map.insert(\n        LayerKey {\n            key: 0..5,\n            lsn: 100..101,\n            is_image: true,\n        },\n        \"Layer 1\".to_string(),\n    );\n    map.insert(\n        LayerKey {\n            key: 3..9,\n            lsn: 110..111,\n            is_image: true,\n        },\n        \"Layer 2\".to_string(),\n    );\n    map.insert(\n        LayerKey {\n            key: 5..6,\n            lsn: 120..121,\n            is_image: true,\n        },\n        \"Layer 3\".to_string(),\n    );\n\n    // After Layer 1 insertion\n    let version = map.get_version(105).unwrap();\n    assert_eq!(version.image_coverage.query(1), Some(\"Layer 1\".to_string()));\n    assert_eq!(version.image_coverage.query(4), Some(\"Layer 1\".to_string()));\n\n    // After Layer 2 insertion\n    let version = map.get_version(115).unwrap();\n    assert_eq!(version.image_coverage.query(4), Some(\"Layer 2\".to_string()));\n    assert_eq!(version.image_coverage.query(8), Some(\"Layer 2\".to_string()));\n    assert_eq!(version.image_coverage.query(11), None);\n\n    // After Layer 3 insertion\n    let version = map.get_version(125).unwrap();\n    assert_eq!(version.image_coverage.query(4), Some(\"Layer 2\".to_string()));\n    assert_eq!(version.image_coverage.query(5), Some(\"Layer 3\".to_string()));\n    assert_eq!(version.image_coverage.query(7), Some(\"Layer 2\".to_string()));\n}\n\n/// Cover simple off-by-one edge cases\n#[test]\nfn test_off_by_one() {\n    let mut map = HistoricLayerCoverage::<String>::new();\n    map.insert(\n        LayerKey {\n            key: 3..5,\n            lsn: 100..110,\n            is_image: true,\n        },\n        \"Layer 1\".to_string(),\n    );\n\n    // Check different LSNs\n    let version = map.get_version(99);\n    assert!(version.is_none());\n    let version = map.get_version(100).unwrap();\n    assert_eq!(version.image_coverage.query(4), Some(\"Layer 1\".to_string()));\n    let version = map.get_version(110).unwrap();\n    assert_eq!(version.image_coverage.query(4), Some(\"Layer 1\".to_string()));\n\n    // Check different keys\n    let version = map.get_version(105).unwrap();\n    assert_eq!(version.image_coverage.query(2), None);\n    assert_eq!(version.image_coverage.query(3), Some(\"Layer 1\".to_string()));\n    assert_eq!(version.image_coverage.query(4), Some(\"Layer 1\".to_string()));\n    assert_eq!(version.image_coverage.query(5), None);\n}\n\n/// White-box regression test, checking for incorrect removal of node at key.end\n#[test]\nfn test_regression() {\n    let mut map = HistoricLayerCoverage::<String>::new();\n    map.insert(\n        LayerKey {\n            key: 0..5,\n            lsn: 0..5,\n            is_image: false,\n        },\n        \"Layer 1\".to_string(),\n    );\n    map.insert(\n        LayerKey {\n            key: 0..5,\n            lsn: 1..2,\n            is_image: false,\n        },\n        \"Layer 2\".to_string(),\n    );\n\n    // If an insertion operation improperly deletes the endpoint of a previous layer\n    // (which is more likely to happen with layers that collide on key.end), we will\n    // end up with an infinite layer, covering the entire keyspace. Here we assert\n    // that there's no layer at key 100 because we didn't insert any layer there.\n    let version = map.get_version(100).unwrap();\n    assert_eq!(version.delta_coverage.query(100), None);\n}\n\n/// Cover edge cases where layers begin or end on the same key\n#[test]\nfn test_key_collision() {\n    let mut map = HistoricLayerCoverage::<String>::new();\n\n    map.insert(\n        LayerKey {\n            key: 3..5,\n            lsn: 100..110,\n            is_image: true,\n        },\n        \"Layer 10\".to_string(),\n    );\n    map.insert(\n        LayerKey {\n            key: 5..8,\n            lsn: 100..110,\n            is_image: true,\n        },\n        \"Layer 11\".to_string(),\n    );\n    map.insert(\n        LayerKey {\n            key: 3..4,\n            lsn: 200..210,\n            is_image: true,\n        },\n        \"Layer 20\".to_string(),\n    );\n\n    // Check after layer 11\n    let version = map.get_version(105).unwrap();\n    assert_eq!(version.image_coverage.query(2), None);\n    assert_eq!(\n        version.image_coverage.query(3),\n        Some(\"Layer 10\".to_string())\n    );\n    assert_eq!(\n        version.image_coverage.query(5),\n        Some(\"Layer 11\".to_string())\n    );\n    assert_eq!(\n        version.image_coverage.query(7),\n        Some(\"Layer 11\".to_string())\n    );\n    assert_eq!(version.image_coverage.query(8), None);\n\n    // Check after layer 20\n    let version = map.get_version(205).unwrap();\n    assert_eq!(version.image_coverage.query(2), None);\n    assert_eq!(\n        version.image_coverage.query(3),\n        Some(\"Layer 20\".to_string())\n    );\n    assert_eq!(\n        version.image_coverage.query(5),\n        Some(\"Layer 11\".to_string())\n    );\n    assert_eq!(\n        version.image_coverage.query(7),\n        Some(\"Layer 11\".to_string())\n    );\n    assert_eq!(version.image_coverage.query(8), None);\n}\n\n/// Test when rectangles have nontrivial height and possibly overlap\n#[test]\nfn test_persistent_overlapping() {\n    let mut map = HistoricLayerCoverage::<String>::new();\n\n    // Add 3 key-disjoint layers with varying LSN ranges\n    map.insert(\n        LayerKey {\n            key: 1..2,\n            lsn: 100..200,\n            is_image: true,\n        },\n        \"Layer 1\".to_string(),\n    );\n    map.insert(\n        LayerKey {\n            key: 4..5,\n            lsn: 110..200,\n            is_image: true,\n        },\n        \"Layer 2\".to_string(),\n    );\n    map.insert(\n        LayerKey {\n            key: 7..8,\n            lsn: 120..300,\n            is_image: true,\n        },\n        \"Layer 3\".to_string(),\n    );\n\n    // Add wide and short layer\n    map.insert(\n        LayerKey {\n            key: 0..9,\n            lsn: 130..199,\n            is_image: true,\n        },\n        \"Layer 4\".to_string(),\n    );\n\n    // Add wide layer taller than some\n    map.insert(\n        LayerKey {\n            key: 0..9,\n            lsn: 140..201,\n            is_image: true,\n        },\n        \"Layer 5\".to_string(),\n    );\n\n    // Add wide layer taller than all\n    map.insert(\n        LayerKey {\n            key: 0..9,\n            lsn: 150..301,\n            is_image: true,\n        },\n        \"Layer 6\".to_string(),\n    );\n\n    // After layer 4 insertion\n    let version = map.get_version(135).unwrap();\n    assert_eq!(version.image_coverage.query(0), Some(\"Layer 4\".to_string()));\n    assert_eq!(version.image_coverage.query(1), Some(\"Layer 1\".to_string()));\n    assert_eq!(version.image_coverage.query(2), Some(\"Layer 4\".to_string()));\n    assert_eq!(version.image_coverage.query(4), Some(\"Layer 2\".to_string()));\n    assert_eq!(version.image_coverage.query(5), Some(\"Layer 4\".to_string()));\n    assert_eq!(version.image_coverage.query(7), Some(\"Layer 3\".to_string()));\n    assert_eq!(version.image_coverage.query(8), Some(\"Layer 4\".to_string()));\n\n    // After layer 5 insertion\n    let version = map.get_version(145).unwrap();\n    assert_eq!(version.image_coverage.query(0), Some(\"Layer 5\".to_string()));\n    assert_eq!(version.image_coverage.query(1), Some(\"Layer 5\".to_string()));\n    assert_eq!(version.image_coverage.query(2), Some(\"Layer 5\".to_string()));\n    assert_eq!(version.image_coverage.query(4), Some(\"Layer 5\".to_string()));\n    assert_eq!(version.image_coverage.query(5), Some(\"Layer 5\".to_string()));\n    assert_eq!(version.image_coverage.query(7), Some(\"Layer 3\".to_string()));\n    assert_eq!(version.image_coverage.query(8), Some(\"Layer 5\".to_string()));\n\n    // After layer 6 insertion\n    let version = map.get_version(155).unwrap();\n    assert_eq!(version.image_coverage.query(0), Some(\"Layer 6\".to_string()));\n    assert_eq!(version.image_coverage.query(1), Some(\"Layer 6\".to_string()));\n    assert_eq!(version.image_coverage.query(2), Some(\"Layer 6\".to_string()));\n    assert_eq!(version.image_coverage.query(4), Some(\"Layer 6\".to_string()));\n    assert_eq!(version.image_coverage.query(5), Some(\"Layer 6\".to_string()));\n    assert_eq!(version.image_coverage.query(7), Some(\"Layer 6\".to_string()));\n    assert_eq!(version.image_coverage.query(8), Some(\"Layer 6\".to_string()));\n}\n\n/// Wrapper for HistoricLayerCoverage that allows us to hack around the lack\n/// of support for retroactive insertion by rebuilding the map since the\n/// change.\n///\n/// Why is this needed? We most often insert new layers with newer LSNs,\n/// but during compaction we create layers with non-latest LSN, and during\n/// GC we delete historic layers.\n///\n/// Even though rebuilding is an expensive (N log N) solution to the problem,\n/// it's not critical since we do something equally expensive just to decide\n/// whether or not to create new image layers.\n/// TODO It's not expensive but it's not great to hold a layer map write lock\n///      for that long.\n///\n/// If this becomes an actual bottleneck, one solution would be to build a\n/// segment tree that holds PersistentLayerMaps. Though this would mean that\n/// we take an additional log(N) performance hit for queries, which will probably\n/// still be more critical.\n///\n/// See this for more on persistent and retroactive techniques:\n/// <https://www.youtube.com/watch?v=WqCWghETNDc&t=581s>\npub struct BufferedHistoricLayerCoverage<Value> {\n    /// A persistent layer map that we rebuild when we need to retroactively update\n    historic_coverage: HistoricLayerCoverage<Value>,\n\n    /// We buffer insertion into the PersistentLayerMap to decrease the number of rebuilds.\n    buffer: BTreeMap<LayerKey, Option<Value>>,\n\n    /// All current layers. This is not used for search. Only to make rebuilds easier.\n    // TODO: This map is never cleared. Rebuilds could use the post-trim last entry of\n    // [`Self::historic_coverage`] instead of doubling memory usage.\n    // [`Self::len`]: can require rebuild and serve from latest historic\n    // [`Self::iter`]: already requires rebuild => can serve from latest historic\n    layers: BTreeMap<LayerKey, Value>,\n}\n\nimpl<T: std::fmt::Debug> std::fmt::Debug for BufferedHistoricLayerCoverage<T> {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        f.debug_struct(\"RetroactiveLayerMap\")\n            .field(\"buffer\", &self.buffer)\n            .field(\"layers\", &self.layers)\n            .finish()\n    }\n}\n\nimpl<T: Clone> Default for BufferedHistoricLayerCoverage<T> {\n    fn default() -> Self {\n        Self::new()\n    }\n}\n\nimpl<Value: Clone> BufferedHistoricLayerCoverage<Value> {\n    pub fn new() -> Self {\n        Self {\n            historic_coverage: HistoricLayerCoverage::<Value>::new(),\n            buffer: BTreeMap::new(),\n            layers: BTreeMap::new(),\n        }\n    }\n\n    pub fn insert(&mut self, layer_key: LayerKey, value: Value) {\n        self.buffer.insert(layer_key, Some(value));\n    }\n\n    pub fn remove(&mut self, layer_key: LayerKey) {\n        self.buffer.insert(layer_key, None);\n    }\n\n    pub fn rebuild(&mut self) {\n        // Find the first LSN that needs to be rebuilt\n        let rebuild_since: u64 = match self.buffer.iter().next() {\n            Some((LayerKey { lsn, .. }, _)) => lsn.start,\n            None => return, // No need to rebuild if buffer is empty\n        };\n\n        // Apply buffered updates to self.layers\n        let num_updates = self.buffer.len();\n        self.buffer.retain(|layer_key, layer| {\n            match layer {\n                Some(l) => {\n                    self.layers.insert(layer_key.clone(), l.clone());\n                }\n                None => {\n                    self.layers.remove(layer_key);\n                }\n            };\n            false\n        });\n\n        // Rebuild\n        let mut num_inserted = 0;\n        self.historic_coverage.trim(&rebuild_since);\n        for (layer_key, layer) in self.layers.range(\n            LayerKey {\n                lsn: rebuild_since..0,\n                key: 0..0,\n                is_image: false,\n            }..,\n        ) {\n            self.historic_coverage\n                .insert(layer_key.clone(), layer.clone());\n            num_inserted += 1;\n        }\n\n        // TODO maybe only warn if ratio is at least 10\n        info!(\n            \"Rebuilt layer map. Did {} insertions to process a batch of {} updates.\",\n            num_inserted, num_updates,\n        )\n    }\n\n    /// Iterate all the layers\n    pub fn iter(&self) -> impl ExactSizeIterator<Item = Value> {\n        // NOTE we can actually perform this without rebuilding,\n        //      but it's not necessary for now.\n        if !self.buffer.is_empty() {\n            panic!(\"rebuild pls\")\n        }\n\n        self.layers.values().cloned()\n    }\n\n    /// Return a reference to a queryable map, assuming all updates\n    /// have already been processed using self.rebuild()\n    pub fn get(&self) -> anyhow::Result<&HistoricLayerCoverage<Value>> {\n        // NOTE we error here instead of implicitly rebuilding because\n        //      rebuilding is somewhat expensive.\n        // TODO maybe implicitly rebuild and log/sentry an error?\n        if !self.buffer.is_empty() {\n            anyhow::bail!(\"rebuild required\")\n        }\n\n        Ok(&self.historic_coverage)\n    }\n\n    pub(crate) fn len(&self) -> usize {\n        self.layers.len()\n    }\n}\n\n#[test]\nfn test_retroactive_regression_1() {\n    let mut map = BufferedHistoricLayerCoverage::new();\n\n    map.insert(\n        LayerKey {\n            key: 0..21267647932558653966460912964485513215,\n            lsn: 23761336..23761457,\n            is_image: false,\n        },\n        \"sdfsdfs\".to_string(),\n    );\n\n    map.rebuild();\n\n    let version = map.get().unwrap().get_version(23761457).unwrap();\n    assert_eq!(\n        version.delta_coverage.query(100),\n        Some(\"sdfsdfs\".to_string())\n    );\n}\n\n#[test]\nfn test_retroactive_simple() {\n    let mut map = BufferedHistoricLayerCoverage::new();\n\n    // Append some images in increasing LSN order\n    map.insert(\n        LayerKey {\n            key: 0..5,\n            lsn: 100..101,\n            is_image: true,\n        },\n        \"Image 1\".to_string(),\n    );\n    map.insert(\n        LayerKey {\n            key: 3..9,\n            lsn: 110..111,\n            is_image: true,\n        },\n        \"Image 2\".to_string(),\n    );\n    map.insert(\n        LayerKey {\n            key: 4..6,\n            lsn: 120..121,\n            is_image: true,\n        },\n        \"Image 3\".to_string(),\n    );\n    map.insert(\n        LayerKey {\n            key: 8..9,\n            lsn: 120..121,\n            is_image: true,\n        },\n        \"Image 4\".to_string(),\n    );\n\n    // Add a delta layer out of order\n    map.insert(\n        LayerKey {\n            key: 2..5,\n            lsn: 105..106,\n            is_image: false,\n        },\n        \"Delta 1\".to_string(),\n    );\n\n    // Rebuild so we can start querying\n    map.rebuild();\n\n    {\n        let map = map.get().expect(\"rebuilt\");\n\n        let version = map.get_version(90);\n        assert!(version.is_none());\n        let version = map.get_version(102).unwrap();\n        assert_eq!(version.image_coverage.query(4), Some(\"Image 1\".to_string()));\n\n        let version = map.get_version(107).unwrap();\n        assert_eq!(version.image_coverage.query(4), Some(\"Image 1\".to_string()));\n        assert_eq!(version.delta_coverage.query(4), Some(\"Delta 1\".to_string()));\n\n        let version = map.get_version(115).unwrap();\n        assert_eq!(version.image_coverage.query(4), Some(\"Image 2\".to_string()));\n\n        let version = map.get_version(125).unwrap();\n        assert_eq!(version.image_coverage.query(4), Some(\"Image 3\".to_string()));\n    }\n\n    // Remove Image 3\n    map.remove(LayerKey {\n        key: 4..6,\n        lsn: 120..121,\n        is_image: true,\n    });\n    map.rebuild();\n\n    {\n        // Check deletion worked\n        let map = map.get().expect(\"rebuilt\");\n        let version = map.get_version(125).unwrap();\n        assert_eq!(version.image_coverage.query(4), Some(\"Image 2\".to_string()));\n        assert_eq!(version.image_coverage.query(8), Some(\"Image 4\".to_string()));\n    }\n}\n"
  },
  {
    "path": "pageserver/src/tenant/layer_map/layer_coverage.rs",
    "content": "use std::ops::Range;\n\n// NOTE the `im` crate has 20x more downloads and also has\n// persistent/immutable BTree. But it's bugged so rpds is a\n// better choice <https://github.com/neondatabase/neon/issues/3395>\nuse rpds::RedBlackTreeMapSync;\n\n/// Data structure that can efficiently:\n/// - find the latest layer by lsn.end at a given key\n/// - iterate the latest layers in a key range\n/// - insert layers in non-decreasing lsn.start order\n///\n/// For a detailed explanation and justification of this approach, see:\n/// <https://neon.tech/blog/persistent-structures-in-neons-wal-indexing>\n///\n/// NOTE The struct is parameterized over Value for easier\n///      testing, but in practice it's some sort of layer.\npub struct LayerCoverage<Value> {\n    /// For every change in coverage (as we sweep the key space)\n    /// we store (lsn.end, value).\n    ///\n    /// NOTE We use an immutable/persistent tree so that we can keep historic\n    ///      versions of this coverage without cloning the whole thing and\n    ///      incurring quadratic memory cost. See HistoricLayerCoverage.\n    ///\n    /// NOTE We use the Sync version of the map because we want Self to\n    ///      be Sync. Using nonsync might be faster, if we can work with\n    ///      that.\n    nodes: RedBlackTreeMapSync<i128, Option<(u64, Value)>>,\n}\n\nimpl<T: Clone> Default for LayerCoverage<T> {\n    fn default() -> Self {\n        Self::new()\n    }\n}\n\nimpl<Value: Clone> LayerCoverage<Value> {\n    pub fn new() -> Self {\n        Self {\n            nodes: RedBlackTreeMapSync::default(),\n        }\n    }\n\n    /// Helper function to subdivide the key range without changing any values\n    ///\n    /// This operation has no semantic effect by itself. It only helps us pin in\n    /// place the part of the coverage we don't want to change when inserting.\n    ///\n    /// As an analogy, think of a polygon. If you add a vertex along one of the\n    /// segments, the polygon is still the same, but it behaves differently when\n    /// we move or delete one of the other points.\n    ///\n    /// Complexity: O(log N)\n    fn add_node(&mut self, key: i128) {\n        let value = match self.nodes.range(..=key).next_back() {\n            Some((_, Some(v))) => Some(v.clone()),\n            Some((_, None)) => None,\n            None => None,\n        };\n        self.nodes.insert_mut(key, value);\n    }\n\n    /// Insert a layer.\n    ///\n    /// Complexity: worst case O(N), in practice O(log N). See NOTE in implementation.\n    pub fn insert(&mut self, key: Range<i128>, lsn: Range<u64>, value: Value) {\n        // Add nodes at endpoints\n        //\n        // NOTE The order of lines is important. We add nodes at the start\n        // and end of the key range **before updating any nodes** in order\n        // to pin down the current coverage outside of the relevant key range.\n        // Only the coverage inside the layer's key range should change.\n        self.add_node(key.start);\n        self.add_node(key.end);\n\n        // Raise the height where necessary\n        //\n        // NOTE This loop is worst case O(N), but amortized O(log N) in the special\n        // case when rectangles have no height. In practice I don't think we'll see\n        // the kind of layer intersections needed to trigger O(N) behavior. The worst\n        // case is N/2 horizontal layers overlapped with N/2 vertical layers in a\n        // grid pattern.\n        let mut to_update = Vec::new();\n        let mut to_remove = Vec::new();\n        let mut prev_covered = false;\n        for (k, node) in self.nodes.range(key) {\n            let needs_cover = match node {\n                None => true,\n                Some((h, _)) => h < &lsn.end,\n            };\n            if needs_cover {\n                match prev_covered {\n                    true => to_remove.push(*k),\n                    false => to_update.push(*k),\n                }\n            }\n            prev_covered = needs_cover;\n        }\n        // TODO check if the nodes inserted at key.start and key.end are safe\n        //      to remove. It's fine to keep them but they could be redundant.\n        for k in to_update {\n            self.nodes.insert_mut(k, Some((lsn.end, value.clone())));\n        }\n        for k in to_remove {\n            self.nodes.remove_mut(&k);\n        }\n    }\n\n    /// Get the latest (by lsn.end) layer at a given key\n    ///\n    /// Complexity: O(log N)\n    pub fn query(&self, key: i128) -> Option<Value> {\n        self.nodes\n            .range(..=key)\n            .next_back()?\n            .1\n            .as_ref()\n            .map(|(_, v)| v.clone())\n    }\n\n    /// Iterate the changes in layer coverage in a given range. You will likely\n    /// want to start with self.query(key.start), and then follow up with self.range\n    ///\n    /// Complexity: O(log N + result_size)\n    pub fn range(&self, key: Range<i128>) -> impl '_ + Iterator<Item = (i128, Option<Value>)> {\n        self.nodes\n            .range(key)\n            .map(|(k, v)| (*k, v.as_ref().map(|x| x.1.clone())))\n    }\n\n    /// Returns an iterator which includes all coverage changes for layers that intersect\n    /// with the provided range.\n    pub fn range_overlaps(\n        &self,\n        key_range: &Range<i128>,\n    ) -> impl Iterator<Item = (i128, Option<Value>)> + '_\n    where\n        Value: Eq,\n    {\n        let first_change = self.query(key_range.start);\n        match first_change {\n            Some(change) => {\n                // If the start of the range is covered, we have to deal with two cases:\n                // 1. Start of the range is aligned with the start of a layer.\n                // In this case the return of `self.range` will contain the layer which aligns with the start of the key range.\n                // We advance said iterator to avoid duplicating the first change.\n                // 2. Start of the range is not aligned with the start of a layer.\n                let range = key_range.start..key_range.end;\n                let mut range_coverage = self.range(range).peekable();\n                if range_coverage\n                    .peek()\n                    .is_some_and(|c| c.1.as_ref() == Some(&change))\n                {\n                    range_coverage.next();\n                }\n                itertools::Either::Left(\n                    std::iter::once((key_range.start, Some(change))).chain(range_coverage),\n                )\n            }\n            None => {\n                let range = key_range.start..key_range.end;\n                let coverage = self.range(range);\n                itertools::Either::Right(coverage)\n            }\n        }\n    }\n    /// O(1) clone\n    pub fn clone(&self) -> Self {\n        Self {\n            nodes: self.nodes.clone(),\n        }\n    }\n}\n\n/// Image and delta coverage at a specific LSN.\npub struct LayerCoverageTuple<Value> {\n    pub image_coverage: LayerCoverage<Value>,\n    pub delta_coverage: LayerCoverage<Value>,\n}\n\nimpl<T: Clone> Default for LayerCoverageTuple<T> {\n    fn default() -> Self {\n        Self {\n            image_coverage: LayerCoverage::default(),\n            delta_coverage: LayerCoverage::default(),\n        }\n    }\n}\n\nimpl<Value: Clone> LayerCoverageTuple<Value> {\n    pub fn clone(&self) -> Self {\n        Self {\n            image_coverage: self.image_coverage.clone(),\n            delta_coverage: self.delta_coverage.clone(),\n        }\n    }\n}\n"
  },
  {
    "path": "pageserver/src/tenant/layer_map.rs",
    "content": "//!\n//! The layer map tracks what layers exist in a timeline.\n//!\n//! When the timeline is first accessed, the server lists of all layer files\n//! in the timelines/<timeline_id> directory, and populates this map with\n//! ImageLayer and DeltaLayer structs corresponding to each file. When the first\n//! new WAL record is received, we create an InMemoryLayer to hold the incoming\n//! records. Now and then, in the checkpoint() function, the in-memory layer is\n//! are frozen, and it is split up into new image and delta layers and the\n//! corresponding files are written to disk.\n//!\n//! Design overview:\n//!\n//! The `search` method of the layer map is on the read critical path, so we've\n//! built an efficient data structure for fast reads, stored in `LayerMap::historic`.\n//! Other read methods are less critical but still impact performance of background tasks.\n//!\n//! This data structure relies on a persistent/immutable binary search tree. See the\n//! following lecture for an introduction <https://www.youtube.com/watch?v=WqCWghETNDc&t=581s>\n//! Summary: A persistent/immutable BST (and persistent data structures in general) allows\n//! you to modify the tree in such a way that each modification creates a new \"version\"\n//! of the tree. When you modify it, you get a new version, but all previous versions are\n//! still accessible too. So if someone is still holding a reference to an older version,\n//! they continue to see the tree as it was then. The persistent BST stores all the\n//! different versions in an efficient way.\n//!\n//! Our persistent BST maintains a map of which layer file \"covers\" each key. It has only\n//! one dimension, the key. See `layer_coverage.rs`. We use the persistent/immutable property\n//! to handle the LSN dimension.\n//!\n//! To build the layer map, we insert each layer to the persistent BST in LSN.start order,\n//! starting from the oldest one. After each insertion, we grab a reference to that \"version\"\n//! of the tree, and store it in another tree, a BtreeMap keyed by the LSN. See\n//! `historic_layer_coverage.rs`.\n//!\n//! To search for a particular key-LSN pair, you first look up the right \"version\" in the\n//! BTreeMap. Then you search that version of the BST with the key.\n//!\n//! The persistent BST keeps all the versions, but there is no way to change the old versions\n//! afterwards. We can add layers as long as they have larger LSNs than any previous layer in\n//! the map, but if we need to remove a layer, or insert anything with an older LSN, we need\n//! to throw away most of the persistent BST and build a new one, starting from the oldest\n//! LSN. See [`LayerMap::flush_updates()`].\n//!\n\nmod historic_layer_coverage;\nmod layer_coverage;\n\nuse std::collections::{BTreeMap, HashMap, VecDeque};\nuse std::iter::Peekable;\nuse std::ops::Range;\nuse std::sync::Arc;\nuse std::time::Instant;\n\nuse anyhow::Result;\nuse historic_layer_coverage::BufferedHistoricLayerCoverage;\npub use historic_layer_coverage::LayerKey;\nuse pageserver_api::key::Key;\nuse pageserver_api::keyspace::{KeySpace, KeySpaceAccum};\nuse range_set_blaze::{CheckSortedDisjoint, RangeSetBlaze};\nuse tokio::sync::watch;\nuse utils::lsn::Lsn;\n\nuse super::storage_layer::{LayerVisibilityHint, PersistentLayerDesc};\nuse crate::context::RequestContext;\nuse crate::tenant::storage_layer::{InMemoryLayer, ReadableLayerWeak};\n\n///\n/// LayerMap tracks what layers exist on a timeline.\n///\npub struct LayerMap {\n    //\n    // 'open_layer' holds the current InMemoryLayer that is accepting new\n    // records. If it is None, 'next_open_layer_at' will be set instead, indicating\n    // where the start LSN of the next InMemoryLayer that is to be created.\n    //\n    pub open_layer: Option<Arc<InMemoryLayer>>,\n    pub next_open_layer_at: Option<Lsn>,\n\n    ///\n    /// Frozen layers, if any. Frozen layers are in-memory layers that\n    /// are no longer added to, but haven't been written out to disk\n    /// yet. They contain WAL older than the current 'open_layer' or\n    /// 'next_open_layer_at', but newer than any historic layer.\n    /// The frozen layers are in order from oldest to newest, so that\n    /// the newest one is in the 'back' of the VecDeque, and the oldest\n    /// in the 'front'.\n    ///\n    pub frozen_layers: VecDeque<Arc<InMemoryLayer>>,\n\n    /// Index of the historic layers optimized for search\n    historic: BufferedHistoricLayerCoverage<Arc<PersistentLayerDesc>>,\n\n    /// L0 layers have key range Key::MIN..Key::MAX, and locating them using R-Tree search is very inefficient.\n    /// So L0 layers are held in l0_delta_layers vector, in addition to the R-tree.\n    ///\n    /// NB: make sure to notify `watch_l0_deltas` on changes.\n    l0_delta_layers: Vec<Arc<PersistentLayerDesc>>,\n\n    /// Notifies about L0 delta layer changes, sending the current number of L0 layers.\n    watch_l0_deltas: watch::Sender<usize>,\n}\n\nimpl Default for LayerMap {\n    fn default() -> Self {\n        Self {\n            open_layer: Default::default(),\n            next_open_layer_at: Default::default(),\n            frozen_layers: Default::default(),\n            historic: Default::default(),\n            l0_delta_layers: Default::default(),\n            watch_l0_deltas: watch::channel(0).0,\n        }\n    }\n}\n\n/// The primary update API for the layer map.\n///\n/// Batching historic layer insertions and removals is good for\n/// performance and this struct helps us do that correctly.\n#[must_use]\npub struct BatchedUpdates<'a> {\n    // While we hold this exclusive reference to the layer map the type checker\n    // will prevent us from accidentally reading any unflushed updates.\n    layer_map: &'a mut LayerMap,\n}\n\n/// Provide ability to batch more updates while hiding the read\n/// API so we don't accidentally read without flushing.\nimpl BatchedUpdates<'_> {\n    ///\n    /// Insert an on-disk layer.\n    ///\n    // TODO remove the `layer` argument when `mapping` is refactored out of `LayerMap`\n    pub fn insert_historic(&mut self, layer_desc: PersistentLayerDesc) {\n        self.layer_map.insert_historic_noflush(layer_desc)\n    }\n\n    ///\n    /// Remove an on-disk layer from the map.\n    ///\n    /// This should be called when the corresponding file on disk has been deleted.\n    ///\n    pub fn remove_historic(&mut self, layer_desc: &PersistentLayerDesc) {\n        self.layer_map.remove_historic_noflush(layer_desc)\n    }\n\n    // We will flush on drop anyway, but this method makes it\n    // more explicit that there is some work being done.\n    /// Apply all updates\n    pub fn flush(self) {\n        // Flush happens on drop\n    }\n}\n\n// Ideally the flush() method should be called explicitly for more\n// controlled execution. But if we forget we'd rather flush on drop\n// than panic later or read without flushing.\n//\n// TODO maybe warn if flush hasn't explicitly been called\nimpl Drop for BatchedUpdates<'_> {\n    fn drop(&mut self) {\n        self.layer_map.flush_updates();\n    }\n}\n\n/// Return value of LayerMap::search\n#[derive(Eq, PartialEq, Debug, Hash)]\npub struct SearchResult {\n    pub layer: ReadableLayerWeak,\n    pub lsn_floor: Lsn,\n}\n\n/// Return value of [`LayerMap::range_search`]\n///\n/// Contains a mapping from a layer description to a keyspace\n/// accumulator that contains all the keys which intersect the layer\n/// from the original search space.\n#[derive(Debug)]\npub struct RangeSearchResult {\n    pub found: HashMap<SearchResult, KeySpaceAccum>,\n}\n\nimpl RangeSearchResult {\n    fn new() -> Self {\n        Self {\n            found: HashMap::new(),\n        }\n    }\n\n    fn map_to_in_memory_layer(\n        in_memory_layer: Option<InMemoryLayerDesc>,\n        range: Range<Key>,\n    ) -> RangeSearchResult {\n        match in_memory_layer {\n            Some(inmem) => {\n                let search_result = SearchResult {\n                    lsn_floor: inmem.get_lsn_range().start,\n                    layer: ReadableLayerWeak::InMemoryLayer(inmem),\n                };\n\n                let mut accum = KeySpaceAccum::new();\n                accum.add_range(range);\n                RangeSearchResult {\n                    found: HashMap::from([(search_result, accum)]),\n                }\n            }\n            None => RangeSearchResult::new(),\n        }\n    }\n}\n\n/// Collector for results of range search queries on the LayerMap.\n/// It should be provided with two iterators for the delta and image coverage\n/// that contain all the changes for layers which intersect the range.\nstruct RangeSearchCollector<Iter>\nwhere\n    Iter: Iterator<Item = (i128, Option<Arc<PersistentLayerDesc>>)>,\n{\n    in_memory_layer: Option<InMemoryLayerDesc>,\n    delta_coverage: Peekable<Iter>,\n    image_coverage: Peekable<Iter>,\n    key_range: Range<Key>,\n    end_lsn: Lsn,\n\n    current_delta: Option<Arc<PersistentLayerDesc>>,\n    current_image: Option<Arc<PersistentLayerDesc>>,\n\n    result: RangeSearchResult,\n}\n\n#[derive(Debug)]\nenum NextLayerType {\n    Delta(i128),\n    Image(i128),\n    Both(i128),\n}\n\nimpl NextLayerType {\n    fn next_change_at_key(&self) -> Key {\n        match self {\n            NextLayerType::Delta(at) => Key::from_i128(*at),\n            NextLayerType::Image(at) => Key::from_i128(*at),\n            NextLayerType::Both(at) => Key::from_i128(*at),\n        }\n    }\n}\n\nimpl<Iter> RangeSearchCollector<Iter>\nwhere\n    Iter: Iterator<Item = (i128, Option<Arc<PersistentLayerDesc>>)>,\n{\n    fn new(\n        key_range: Range<Key>,\n        end_lsn: Lsn,\n        in_memory_layer: Option<InMemoryLayerDesc>,\n        delta_coverage: Iter,\n        image_coverage: Iter,\n    ) -> Self {\n        Self {\n            in_memory_layer,\n            delta_coverage: delta_coverage.peekable(),\n            image_coverage: image_coverage.peekable(),\n            key_range,\n            end_lsn,\n            current_delta: None,\n            current_image: None,\n            result: RangeSearchResult::new(),\n        }\n    }\n\n    /// Run the collector. Collection is implemented via a two pointer algorithm.\n    /// One pointer tracks the start of the current range and the other tracks\n    /// the beginning of the next range which will overlap with the next change\n    /// in coverage across both image and delta.\n    fn collect(mut self) -> RangeSearchResult {\n        let next_layer_type = self.choose_next_layer_type();\n        let mut current_range_start = match next_layer_type {\n            None => {\n                // No changes for the range\n                self.pad_range(self.key_range.clone());\n                return self.result;\n            }\n            Some(layer_type) if self.key_range.end <= layer_type.next_change_at_key() => {\n                // Changes only after the end of the range\n                self.pad_range(self.key_range.clone());\n                return self.result;\n            }\n            Some(layer_type) => {\n                // Changes for the range exist.\n                let coverage_start = layer_type.next_change_at_key();\n                let range_before = self.key_range.start..coverage_start;\n                self.pad_range(range_before);\n\n                self.advance(&layer_type);\n                coverage_start\n            }\n        };\n\n        while current_range_start < self.key_range.end {\n            let next_layer_type = self.choose_next_layer_type();\n            match next_layer_type {\n                Some(t) => {\n                    let current_range_end = t.next_change_at_key();\n                    self.add_range(current_range_start..current_range_end);\n                    current_range_start = current_range_end;\n\n                    self.advance(&t);\n                }\n                None => {\n                    self.add_range(current_range_start..self.key_range.end);\n                    current_range_start = self.key_range.end;\n                }\n            }\n        }\n\n        self.result\n    }\n\n    /// Map a range which does not intersect any persistent layers to\n    /// the in-memory layer candidate.\n    fn pad_range(&mut self, key_range: Range<Key>) {\n        if !key_range.is_empty() {\n            if let Some(ref inmem) = self.in_memory_layer {\n                let search_result = SearchResult {\n                    layer: ReadableLayerWeak::InMemoryLayer(inmem.clone()),\n                    lsn_floor: inmem.get_lsn_range().start,\n                };\n\n                self.result\n                    .found\n                    .entry(search_result)\n                    .or_default()\n                    .add_range(key_range);\n            }\n        }\n    }\n\n    /// Select the appropiate layer for the given range and update\n    /// the collector.\n    fn add_range(&mut self, covered_range: Range<Key>) {\n        let selected = LayerMap::select_layer(\n            self.current_delta.clone(),\n            self.current_image.clone(),\n            self.in_memory_layer.clone(),\n            self.end_lsn,\n        );\n\n        match selected {\n            Some(search_result) => self\n                .result\n                .found\n                .entry(search_result)\n                .or_default()\n                .add_range(covered_range),\n            None => self.pad_range(covered_range),\n        }\n    }\n\n    /// Move to the next coverage change.\n    fn advance(&mut self, layer_type: &NextLayerType) {\n        match layer_type {\n            NextLayerType::Delta(_) => {\n                let (_, layer) = self.delta_coverage.next().unwrap();\n                self.current_delta = layer;\n            }\n            NextLayerType::Image(_) => {\n                let (_, layer) = self.image_coverage.next().unwrap();\n                self.current_image = layer;\n            }\n            NextLayerType::Both(_) => {\n                let (_, image_layer) = self.image_coverage.next().unwrap();\n                let (_, delta_layer) = self.delta_coverage.next().unwrap();\n\n                self.current_image = image_layer;\n                self.current_delta = delta_layer;\n            }\n        }\n    }\n\n    /// Pick the next coverage change: the one at the lesser key or both if they're alligned.\n    fn choose_next_layer_type(&mut self) -> Option<NextLayerType> {\n        let next_delta_at = self.delta_coverage.peek().map(|(key, _)| key);\n        let next_image_at = self.image_coverage.peek().map(|(key, _)| key);\n\n        match (next_delta_at, next_image_at) {\n            (None, None) => None,\n            (Some(next_delta_at), None) => Some(NextLayerType::Delta(*next_delta_at)),\n            (None, Some(next_image_at)) => Some(NextLayerType::Image(*next_image_at)),\n            (Some(next_delta_at), Some(next_image_at)) if next_image_at < next_delta_at => {\n                Some(NextLayerType::Image(*next_image_at))\n            }\n            (Some(next_delta_at), Some(next_image_at)) if next_delta_at < next_image_at => {\n                Some(NextLayerType::Delta(*next_delta_at))\n            }\n            (Some(next_delta_at), Some(_)) => Some(NextLayerType::Both(*next_delta_at)),\n        }\n    }\n}\n\n#[derive(Debug, PartialEq, Eq, Clone, Hash)]\npub struct InMemoryLayerDesc {\n    handle: InMemoryLayerHandle,\n    lsn_range: Range<Lsn>,\n}\n\nimpl InMemoryLayerDesc {\n    pub(crate) fn get_lsn_range(&self) -> Range<Lsn> {\n        self.lsn_range.clone()\n    }\n}\n\n#[derive(Debug, PartialEq, Eq, Clone, Hash)]\nenum InMemoryLayerHandle {\n    Open,\n    Frozen(usize),\n}\n\nimpl LayerMap {\n    ///\n    /// Find the latest layer (by lsn.end) that covers the given\n    /// 'key', with lsn.start < 'end_lsn'.\n    ///\n    /// The caller of this function is the page reconstruction\n    /// algorithm looking for the next relevant delta layer, or\n    /// the terminal image layer. The caller will pass the lsn_floor\n    /// value as end_lsn in the next call to search.\n    ///\n    /// If there's an image layer exactly below the given end_lsn,\n    /// search should return that layer regardless if there are\n    /// overlapping deltas.\n    ///\n    /// If the latest layer is a delta and there is an overlapping\n    /// image with it below, the lsn_floor returned should be right\n    /// above that image so we don't skip it in the search. Otherwise\n    /// the lsn_floor returned should be the bottom of the delta layer\n    /// because we should make as much progress down the lsn axis\n    /// as possible. It's fine if this way we skip some overlapping\n    /// deltas, because the delta we returned would contain the same\n    /// wal content.\n    ///\n    /// TODO: This API is convoluted and inefficient. If the caller\n    /// makes N search calls, we'll end up finding the same latest\n    /// image layer N times. We should either cache the latest image\n    /// layer result, or simplify the api to `get_latest_image` and\n    /// `get_latest_delta`, and only call `get_latest_image` once.\n    ///\n    pub fn search(&self, key: Key, end_lsn: Lsn) -> Option<SearchResult> {\n        let in_memory_layer = self.search_in_memory_layer(end_lsn);\n\n        let version = match self.historic.get().unwrap().get_version(end_lsn.0 - 1) {\n            Some(version) => version,\n            None => {\n                return in_memory_layer.map(|desc| SearchResult {\n                    lsn_floor: desc.get_lsn_range().start,\n                    layer: ReadableLayerWeak::InMemoryLayer(desc),\n                });\n            }\n        };\n\n        let latest_delta = version.delta_coverage.query(key.to_i128());\n        let latest_image = version.image_coverage.query(key.to_i128());\n\n        Self::select_layer(latest_delta, latest_image, in_memory_layer, end_lsn)\n    }\n\n    /// Select a layer from three potential candidates (in-memory, delta and image layer).\n    /// The candidates represent the first layer of each type which intersect a key range.\n    ///\n    /// Layer types have an in implicit priority (image > delta > in-memory). For instance,\n    /// if we have the option of reading an LSN range from both an image and a delta, we\n    /// should read from the image.\n    fn select_layer(\n        delta_layer: Option<Arc<PersistentLayerDesc>>,\n        image_layer: Option<Arc<PersistentLayerDesc>>,\n        in_memory_layer: Option<InMemoryLayerDesc>,\n        end_lsn: Lsn,\n    ) -> Option<SearchResult> {\n        assert!(delta_layer.as_ref().is_none_or(|l| l.is_delta()));\n        assert!(image_layer.as_ref().is_none_or(|l| !l.is_delta()));\n\n        match (delta_layer, image_layer, in_memory_layer) {\n            (None, None, None) => None,\n            (None, Some(image), None) => {\n                let lsn_floor = image.get_lsn_range().start;\n                Some(SearchResult {\n                    layer: ReadableLayerWeak::PersistentLayer(image),\n                    lsn_floor,\n                })\n            }\n            (Some(delta), None, None) => {\n                let lsn_floor = delta.get_lsn_range().start;\n                Some(SearchResult {\n                    layer: ReadableLayerWeak::PersistentLayer(delta),\n                    lsn_floor,\n                })\n            }\n            (Some(delta), Some(image), None) => {\n                let img_lsn = image.get_lsn_range().start;\n                let image_is_newer = image.get_lsn_range().end >= delta.get_lsn_range().end;\n                let image_exact_match = img_lsn + 1 == end_lsn;\n                if image_is_newer || image_exact_match {\n                    Some(SearchResult {\n                        layer: ReadableLayerWeak::PersistentLayer(image),\n                        lsn_floor: img_lsn,\n                    })\n                } else {\n                    // If the delta overlaps with the image in the LSN dimension, do a partial\n                    // up to the image layer.\n                    let lsn_floor =\n                        std::cmp::max(delta.get_lsn_range().start, image.get_lsn_range().start + 1);\n                    Some(SearchResult {\n                        layer: ReadableLayerWeak::PersistentLayer(delta),\n                        lsn_floor,\n                    })\n                }\n            }\n            (None, None, Some(inmem)) => {\n                let lsn_floor = inmem.get_lsn_range().start;\n                Some(SearchResult {\n                    layer: ReadableLayerWeak::InMemoryLayer(inmem),\n                    lsn_floor,\n                })\n            }\n            (None, Some(image), Some(inmem)) => {\n                // If the in-memory layer overlaps with the image in the LSN dimension, do a partial\n                // up to the image layer.\n                let img_lsn = image.get_lsn_range().start;\n                let image_is_newer = image.get_lsn_range().end >= inmem.get_lsn_range().end;\n                let image_exact_match = img_lsn + 1 == end_lsn;\n                if image_is_newer || image_exact_match {\n                    Some(SearchResult {\n                        layer: ReadableLayerWeak::PersistentLayer(image),\n                        lsn_floor: img_lsn,\n                    })\n                } else {\n                    let lsn_floor =\n                        std::cmp::max(inmem.get_lsn_range().start, image.get_lsn_range().start + 1);\n                    Some(SearchResult {\n                        layer: ReadableLayerWeak::InMemoryLayer(inmem),\n                        lsn_floor,\n                    })\n                }\n            }\n            (Some(delta), None, Some(inmem)) => {\n                // Overlaps between delta and in-memory layers are not a valid\n                // state, but we handle them here for completeness.\n                let delta_end = delta.get_lsn_range().end;\n                let delta_is_newer = delta_end >= inmem.get_lsn_range().end;\n                let delta_exact_match = delta_end == end_lsn;\n                if delta_is_newer || delta_exact_match {\n                    Some(SearchResult {\n                        lsn_floor: delta.get_lsn_range().start,\n                        layer: ReadableLayerWeak::PersistentLayer(delta),\n                    })\n                } else {\n                    // If the in-memory layer overlaps with the delta in the LSN dimension, do a partial\n                    // up to the delta layer.\n                    let lsn_floor =\n                        std::cmp::max(inmem.get_lsn_range().start, delta.get_lsn_range().end);\n                    Some(SearchResult {\n                        layer: ReadableLayerWeak::InMemoryLayer(inmem),\n                        lsn_floor,\n                    })\n                }\n            }\n            (Some(delta), Some(image), Some(inmem)) => {\n                // Determine the preferred persistent layer without taking the in-memory layer\n                // into consideration.\n                let persistent_res =\n                    Self::select_layer(Some(delta.clone()), Some(image.clone()), None, end_lsn)\n                        .unwrap();\n                let persistent_l = match persistent_res.layer {\n                    ReadableLayerWeak::PersistentLayer(l) => l,\n                    ReadableLayerWeak::InMemoryLayer(_) => unreachable!(),\n                };\n\n                // Now handle the in-memory layer overlaps.\n                let inmem_res = if persistent_l.is_delta() {\n                    Self::select_layer(Some(persistent_l), None, Some(inmem.clone()), end_lsn)\n                        .unwrap()\n                } else {\n                    Self::select_layer(None, Some(persistent_l), Some(inmem.clone()), end_lsn)\n                        .unwrap()\n                };\n\n                Some(SearchResult {\n                    layer: inmem_res.layer,\n                    // Use the more restrictive LSN floor\n                    lsn_floor: std::cmp::max(persistent_res.lsn_floor, inmem_res.lsn_floor),\n                })\n            }\n        }\n    }\n\n    pub fn range_search(&self, key_range: Range<Key>, end_lsn: Lsn) -> RangeSearchResult {\n        let in_memory_layer = self.search_in_memory_layer(end_lsn);\n\n        let version = match self.historic.get().unwrap().get_version(end_lsn.0 - 1) {\n            Some(version) => version,\n            None => {\n                return RangeSearchResult::map_to_in_memory_layer(in_memory_layer, key_range);\n            }\n        };\n\n        let raw_range = key_range.start.to_i128()..key_range.end.to_i128();\n        let delta_changes = version.delta_coverage.range_overlaps(&raw_range);\n        let image_changes = version.image_coverage.range_overlaps(&raw_range);\n\n        let collector = RangeSearchCollector::new(\n            key_range,\n            end_lsn,\n            in_memory_layer,\n            delta_changes,\n            image_changes,\n        );\n        collector.collect()\n    }\n\n    /// Start a batch of updates, applied on drop\n    pub fn batch_update(&mut self) -> BatchedUpdates<'_> {\n        BatchedUpdates { layer_map: self }\n    }\n\n    ///\n    /// Insert an on-disk layer\n    ///\n    /// Helper function for BatchedUpdates::insert_historic\n    ///\n    /// TODO(chi): remove L generic so that we do not need to pass layer object.\n    pub(self) fn insert_historic_noflush(&mut self, layer_desc: PersistentLayerDesc) {\n        // TODO: See #3869, resulting #4088, attempted fix and repro #4094\n\n        if Self::is_l0(&layer_desc.key_range, layer_desc.is_delta) {\n            self.l0_delta_layers.push(layer_desc.clone().into());\n            self.watch_l0_deltas\n                .send_replace(self.l0_delta_layers.len());\n        }\n\n        self.historic.insert(\n            historic_layer_coverage::LayerKey::from(&layer_desc),\n            layer_desc.into(),\n        );\n    }\n\n    ///\n    /// Remove an on-disk layer from the map.\n    ///\n    /// Helper function for BatchedUpdates::remove_historic\n    ///\n    pub fn remove_historic_noflush(&mut self, layer_desc: &PersistentLayerDesc) {\n        self.historic\n            .remove(historic_layer_coverage::LayerKey::from(layer_desc));\n        let layer_key = layer_desc.key();\n        if Self::is_l0(&layer_desc.key_range, layer_desc.is_delta) {\n            let len_before = self.l0_delta_layers.len();\n            let mut l0_delta_layers = std::mem::take(&mut self.l0_delta_layers);\n            l0_delta_layers.retain(|other| other.key() != layer_key);\n            self.l0_delta_layers = l0_delta_layers;\n            self.watch_l0_deltas\n                .send_replace(self.l0_delta_layers.len());\n            // this assertion is related to use of Arc::ptr_eq in Self::compare_arced_layers,\n            // there's a chance that the comparison fails at runtime due to it comparing (pointer,\n            // vtable) pairs.\n            assert_eq!(\n                self.l0_delta_layers.len(),\n                len_before - 1,\n                \"failed to locate removed historic layer from l0_delta_layers\"\n            );\n        }\n    }\n\n    /// Helper function for BatchedUpdates::drop.\n    pub(self) fn flush_updates(&mut self) {\n        self.historic.rebuild();\n    }\n\n    /// Is there a newer image layer for given key- and LSN-range? Or a set\n    /// of image layers within the specified lsn range that cover the entire\n    /// specified key range?\n    ///\n    /// This is used for garbage collection, to determine if an old layer can\n    /// be deleted.\n    pub fn image_layer_exists(&self, key: &Range<Key>, lsn: &Range<Lsn>) -> bool {\n        if key.is_empty() {\n            // Vacuously true. There's a newer image for all 0 of the kerys in the range.\n            return true;\n        }\n\n        let version = match self.historic.get().unwrap().get_version(lsn.end.0 - 1) {\n            Some(v) => v,\n            None => return false,\n        };\n\n        let start = key.start.to_i128();\n        let end = key.end.to_i128();\n\n        let layer_covers = |layer: Option<Arc<PersistentLayerDesc>>| match layer {\n            Some(layer) => layer.get_lsn_range().start >= lsn.start,\n            None => false,\n        };\n\n        // Check the start is covered\n        if !layer_covers(version.image_coverage.query(start)) {\n            return false;\n        }\n\n        // Check after all changes of coverage\n        for (_, change_val) in version.image_coverage.range(start..end) {\n            if !layer_covers(change_val) {\n                return false;\n            }\n        }\n\n        true\n    }\n\n    pub fn iter_historic_layers(&self) -> impl ExactSizeIterator<Item = Arc<PersistentLayerDesc>> {\n        self.historic.iter()\n    }\n\n    /// Get a ref counted pointer for the first in memory layer that matches the provided predicate.\n    pub(crate) fn search_in_memory_layer(&self, below: Lsn) -> Option<InMemoryLayerDesc> {\n        let is_below = |l: &Arc<InMemoryLayer>| {\n            let start_lsn = l.get_lsn_range().start;\n            below > start_lsn\n        };\n\n        if let Some(open) = &self.open_layer {\n            if is_below(open) {\n                return Some(InMemoryLayerDesc {\n                    handle: InMemoryLayerHandle::Open,\n                    lsn_range: open.get_lsn_range(),\n                });\n            }\n        }\n\n        self.frozen_layers\n            .iter()\n            .enumerate()\n            .rfind(|(_idx, l)| is_below(l))\n            .map(|(idx, l)| InMemoryLayerDesc {\n                handle: InMemoryLayerHandle::Frozen(idx),\n                lsn_range: l.get_lsn_range(),\n            })\n    }\n\n    pub(crate) fn in_memory_layer(&self, desc: &InMemoryLayerDesc) -> Arc<InMemoryLayer> {\n        match desc.handle {\n            InMemoryLayerHandle::Open => self.open_layer.as_ref().unwrap().clone(),\n            InMemoryLayerHandle::Frozen(idx) => self.frozen_layers[idx].clone(),\n        }\n    }\n\n    ///\n    /// Divide the whole given range of keys into sub-ranges based on the latest\n    /// image layer that covers each range at the specified lsn (inclusive).\n    /// This is used when creating  new image layers.\n    pub fn image_coverage(\n        &self,\n        key_range: &Range<Key>,\n        lsn: Lsn,\n    ) -> Vec<(Range<Key>, Option<Arc<PersistentLayerDesc>>)> {\n        let version = match self.historic.get().unwrap().get_version(lsn.0) {\n            Some(v) => v,\n            None => return vec![],\n        };\n\n        let start = key_range.start.to_i128();\n        let end = key_range.end.to_i128();\n\n        // Initialize loop variables\n        let mut coverage: Vec<(Range<Key>, Option<Arc<PersistentLayerDesc>>)> = vec![];\n        let mut current_key = start;\n        let mut current_val = version.image_coverage.query(start);\n\n        // Loop through the change events and push intervals\n        for (change_key, change_val) in version.image_coverage.range(start..end) {\n            let kr = Key::from_i128(current_key)..Key::from_i128(change_key);\n            coverage.push((kr, current_val.take()));\n            current_key = change_key;\n            current_val.clone_from(&change_val);\n        }\n\n        // Add the final interval\n        let kr = Key::from_i128(current_key)..Key::from_i128(end);\n        coverage.push((kr, current_val.take()));\n\n        coverage\n    }\n\n    /// Check if the key range resembles that of an L0 layer.\n    pub fn is_l0(key_range: &Range<Key>, is_delta_layer: bool) -> bool {\n        is_delta_layer && key_range == &(Key::MIN..Key::MAX)\n    }\n\n    /// This function determines which layers are counted in `count_deltas`:\n    /// layers that should count towards deciding whether or not to reimage\n    /// a certain partition range.\n    ///\n    /// There are two kinds of layers we currently consider reimage-worthy:\n    ///\n    /// Case 1: Non-L0 layers are currently reimage-worthy by default.\n    /// TODO Some of these layers are very sparse and cover the entire key\n    ///      range. Replacing 256MB of data (or less!) with terabytes of\n    ///      images doesn't seem wise. We need a better heuristic, possibly\n    ///      based on some of these factors:\n    ///      a) whether this layer has any wal in this partition range\n    ///      b) the size of the layer\n    ///      c) the number of images needed to cover it\n    ///      d) the estimated time until we'll have to reimage over it for GC\n    ///\n    /// Case 2: Since L0 layers by definition cover the entire key space, we consider\n    /// them reimage-worthy only when the entire key space can be covered by very few\n    /// images (currently 1).\n    /// TODO The optimal number should probably be slightly higher than 1, but to\n    ///      implement that we need to plumb a lot more context into this function\n    ///      than just the current partition_range.\n    pub fn is_reimage_worthy(layer: &PersistentLayerDesc, partition_range: &Range<Key>) -> bool {\n        // Case 1\n        if !Self::is_l0(&layer.key_range, layer.is_delta) {\n            return true;\n        }\n\n        // Case 2\n        if partition_range == &(Key::MIN..Key::MAX) {\n            return true;\n        }\n\n        false\n    }\n\n    /// Count the height of the tallest stack of reimage-worthy deltas\n    /// in this 2d region.\n    ///\n    /// If `limit` is provided we don't try to count above that number.\n    ///\n    /// This number is used to compute the largest number of deltas that\n    /// we'll need to visit for any page reconstruction in this region.\n    /// We use this heuristic to decide whether to create an image layer.\n    pub fn count_deltas(&self, key: &Range<Key>, lsn: &Range<Lsn>, limit: Option<usize>) -> usize {\n        // We get the delta coverage of the region, and for each part of the coverage\n        // we recurse right underneath the delta. The recursion depth is limited by\n        // the largest result this function could return, which is in practice between\n        // 3 and 10 (since we usually try to create an image when the number gets larger).\n\n        if lsn.is_empty() || key.is_empty() || limit == Some(0) {\n            return 0;\n        }\n\n        let version = match self.historic.get().unwrap().get_version(lsn.end.0 - 1) {\n            Some(v) => v,\n            None => return 0,\n        };\n\n        let start = key.start.to_i128();\n        let end = key.end.to_i128();\n\n        // Initialize loop variables\n        let mut max_stacked_deltas = 0;\n        let mut current_key = start;\n        let mut current_val = version.delta_coverage.query(start);\n\n        // Loop through the delta coverage and recurse on each part\n        for (change_key, change_val) in version.delta_coverage.range(start..end) {\n            // If there's a relevant delta in this part, add 1 and recurse down\n            if let Some(val) = &current_val {\n                if val.get_lsn_range().end > lsn.start {\n                    let kr = Key::from_i128(current_key)..Key::from_i128(change_key);\n                    let lr = lsn.start..val.get_lsn_range().start;\n                    if !kr.is_empty() {\n                        let base_count = Self::is_reimage_worthy(val, key) as usize;\n                        let new_limit = limit.map(|l| l - base_count);\n                        let max_stacked_deltas_underneath = self.count_deltas(&kr, &lr, new_limit);\n                        max_stacked_deltas = std::cmp::max(\n                            max_stacked_deltas,\n                            base_count + max_stacked_deltas_underneath,\n                        );\n                    }\n                }\n            }\n\n            current_key = change_key;\n            current_val.clone_from(&change_val);\n        }\n\n        // Consider the last part\n        if let Some(val) = &current_val {\n            if val.get_lsn_range().end > lsn.start {\n                let kr = Key::from_i128(current_key)..Key::from_i128(end);\n                let lr = lsn.start..val.get_lsn_range().start;\n\n                if !kr.is_empty() {\n                    let base_count = Self::is_reimage_worthy(val, key) as usize;\n                    let new_limit = limit.map(|l| l - base_count);\n                    let max_stacked_deltas_underneath = self.count_deltas(&kr, &lr, new_limit);\n                    max_stacked_deltas = std::cmp::max(\n                        max_stacked_deltas,\n                        base_count + max_stacked_deltas_underneath,\n                    );\n                }\n            }\n        }\n\n        max_stacked_deltas\n    }\n\n    /* BEGIN_HADRON */\n    /**\n     * Compute the image consistent LSN, the largest LSN below which all pages have been redone successfully.\n     * It works by first finding the latest image layers and store them into a map. Then for each delta layer,\n     * find all overlapping image layers in order to potentially increase the image LSN in case there are gaps\n     * (e.g., if an image is created at LSN 100 but the delta layer spans LSN [150, 200], then we can increase\n     * image LSN to 150 because there is no WAL record in between).\n     * Finally, the image consistent LSN is computed by taking the minimum of all image layers.\n     */\n    pub fn compute_image_consistent_lsn(&self, disk_consistent_lsn: Lsn) -> Lsn {\n        struct ImageLayerInfo {\n            // creation LSN of the image layer\n            image_lsn: Lsn,\n            // the current minimum LSN of newer delta layers with overlapping key ranges\n            min_delta_lsn: Lsn,\n        }\n        let started_at = Instant::now();\n\n        let min_l0_deltas_lsn = {\n            let l0_deltas = self.level0_deltas();\n            l0_deltas\n                .iter()\n                .map(|layer| layer.get_lsn_range().start)\n                .min()\n                .unwrap_or(disk_consistent_lsn)\n        };\n        let global_key_range = Key::MIN..Key::MAX;\n\n        // step 1: collect all most recent image layers into a map\n        // map: end key to image_layer_info\n        let mut image_map: BTreeMap<Key, ImageLayerInfo> = BTreeMap::new();\n        for (img_range, img) in self.image_coverage(&global_key_range, disk_consistent_lsn) {\n            let img_lsn = img.map(|layer| layer.get_lsn_range().end).unwrap_or(Lsn(0));\n            image_map.insert(\n                img_range.end,\n                ImageLayerInfo {\n                    image_lsn: img_lsn,\n                    min_delta_lsn: min_l0_deltas_lsn,\n                },\n            );\n        }\n\n        // step 2: go through all delta layers, and update the image layer info with overlapping\n        // key ranges\n        for layer in self.historic.iter() {\n            if !layer.is_delta {\n                continue;\n            }\n            let delta_key_range = layer.get_key_range();\n            let delta_lsn_range = layer.get_lsn_range();\n            for (img_end_key, img_info) in image_map.range_mut(delta_key_range.start..Key::MAX) {\n                debug_assert!(img_end_key >= &delta_key_range.start);\n                if delta_lsn_range.end > img_info.image_lsn {\n                    // the delta layer includes WAL records after the image\n                    // it's possibel that the delta layer's start LSN < image LSN, which will be simply ignored by step 3\n                    img_info.min_delta_lsn =\n                        std::cmp::min(img_info.min_delta_lsn, delta_lsn_range.start);\n                }\n                if img_end_key >= &delta_key_range.end {\n                    // we have fully processed all overlapping image layers\n                    break;\n                }\n            }\n        }\n\n        // step 3, go through all image layers and find the image consistent LSN\n        let mut img_consistent_lsn = min_l0_deltas_lsn.checked_sub(Lsn(1)).unwrap();\n        let mut prev_key = Key::MIN;\n        for (img_key, img_info) in image_map {\n            tracing::debug!(\n                \"Image layer {:?}:{} has min delta lsn {}\",\n                Range {\n                    start: prev_key,\n                    end: img_key,\n                },\n                img_info.image_lsn,\n                img_info.min_delta_lsn,\n            );\n            let image_lsn = std::cmp::max(\n                img_info.image_lsn,\n                img_info.min_delta_lsn.checked_sub(Lsn(1)).unwrap_or(Lsn(0)),\n            );\n            img_consistent_lsn = std::cmp::min(img_consistent_lsn, image_lsn);\n            prev_key = img_key;\n        }\n        tracing::info!(\n            \"computed image_consistent_lsn {} for disk_consistent_lsn {} in {}ms. Processed {} layrs in total.\",\n            img_consistent_lsn,\n            disk_consistent_lsn,\n            started_at.elapsed().as_millis(),\n            self.historic.len()\n        );\n        img_consistent_lsn\n    }\n\n    /* END_HADRON */\n\n    /// Return all L0 delta layers\n    pub fn level0_deltas(&self) -> &Vec<Arc<PersistentLayerDesc>> {\n        &self.l0_delta_layers\n    }\n\n    /// Subscribes to L0 delta layer changes, sending the current number of L0 delta layers.\n    pub fn watch_level0_deltas(&self) -> watch::Receiver<usize> {\n        self.watch_l0_deltas.subscribe()\n    }\n\n    /// debugging function to print out the contents of the layer map\n    #[allow(unused)]\n    pub async fn dump(&self, verbose: bool, ctx: &RequestContext) -> Result<()> {\n        println!(\"Begin dump LayerMap\");\n\n        println!(\"open_layer:\");\n        if let Some(open_layer) = &self.open_layer {\n            open_layer.dump(verbose, ctx).await?;\n        }\n\n        println!(\"frozen_layers:\");\n        for frozen_layer in self.frozen_layers.iter() {\n            frozen_layer.dump(verbose, ctx).await?;\n        }\n\n        println!(\"historic_layers:\");\n        for desc in self.iter_historic_layers() {\n            desc.dump();\n        }\n        println!(\"End dump LayerMap\");\n        Ok(())\n    }\n\n    /// `read_points` represent the tip of a timeline and any branch points, i.e. the places\n    /// where we expect to serve reads.\n    ///\n    /// This function is O(N) and should be called infrequently.  The caller is responsible for\n    /// looking up and updating the Layer objects for these layer descriptors.\n    pub fn get_visibility(\n        &self,\n        mut read_points: Vec<Lsn>,\n    ) -> (\n        Vec<(Arc<PersistentLayerDesc>, LayerVisibilityHint)>,\n        KeySpace,\n    ) {\n        // This is like a KeySpace, but this type is intended for efficient unions with image layer ranges, whereas\n        // KeySpace is intended to be composed statically and iterated over.\n        struct KeyShadow {\n            // Map of range start to range end\n            inner: RangeSetBlaze<i128>,\n        }\n\n        impl KeyShadow {\n            fn new() -> Self {\n                Self {\n                    inner: Default::default(),\n                }\n            }\n\n            fn contains(&self, range: Range<Key>) -> bool {\n                let range_incl = range.start.to_i128()..=range.end.to_i128() - 1;\n                self.inner.is_superset(&RangeSetBlaze::from_sorted_disjoint(\n                    CheckSortedDisjoint::from([range_incl]),\n                ))\n            }\n\n            /// Add the input range to the keys covered by self.\n            ///\n            /// Return true if inserting this range covered some keys that were previously not covered\n            fn cover(&mut self, insert: Range<Key>) -> bool {\n                let range_incl = insert.start.to_i128()..=insert.end.to_i128() - 1;\n                self.inner.ranges_insert(range_incl)\n            }\n\n            fn reset(&mut self) {\n                self.inner = Default::default();\n            }\n\n            fn to_keyspace(&self) -> KeySpace {\n                let mut accum = KeySpaceAccum::new();\n                for range_incl in self.inner.ranges() {\n                    let range = Range {\n                        start: Key::from_i128(*range_incl.start()),\n                        end: Key::from_i128(range_incl.end() + 1),\n                    };\n                    accum.add_range(range)\n                }\n\n                accum.to_keyspace()\n            }\n        }\n\n        // The 'shadow' will be updated as we sweep through the layers: an image layer subtracts from the shadow,\n        // and a ReadPoint\n        read_points.sort_by_key(|rp| rp.0);\n        let mut shadow = KeyShadow::new();\n\n        // We will interleave all our read points and layers into a sorted collection\n        enum Item {\n            ReadPoint { lsn: Lsn },\n            Layer(Arc<PersistentLayerDesc>),\n        }\n\n        let mut items = Vec::with_capacity(self.historic.len() + read_points.len());\n        items.extend(self.iter_historic_layers().map(Item::Layer));\n        items.extend(\n            read_points\n                .into_iter()\n                .map(|rp| Item::ReadPoint { lsn: rp }),\n        );\n\n        // Ordering: we want to iterate like this:\n        // 1. Highest LSNs first\n        // 2. Consider images before deltas if they end at the same LSNs (images cover deltas)\n        // 3. Consider ReadPoints before image layers if they're at the same LSN (readpoints make that image visible)\n        items.sort_by_key(|item| {\n            std::cmp::Reverse(match item {\n                Item::Layer(layer) => {\n                    if layer.is_delta() {\n                        (Lsn(layer.get_lsn_range().end.0 - 1), 0)\n                    } else {\n                        (layer.image_layer_lsn(), 1)\n                    }\n                }\n                Item::ReadPoint { lsn } => (*lsn, 2),\n            })\n        });\n\n        let mut results = Vec::with_capacity(self.historic.len());\n\n        let mut maybe_covered_deltas: Vec<Arc<PersistentLayerDesc>> = Vec::new();\n\n        for item in items {\n            let (reached_lsn, is_readpoint) = match &item {\n                Item::ReadPoint { lsn } => (lsn, true),\n                Item::Layer(layer) => (&layer.lsn_range.start, false),\n            };\n            maybe_covered_deltas.retain(|d| {\n                if *reached_lsn >= d.lsn_range.start && is_readpoint {\n                    // We encountered a readpoint within the delta layer: it is visible\n\n                    results.push((d.clone(), LayerVisibilityHint::Visible));\n                    false\n                } else if *reached_lsn < d.lsn_range.start {\n                    // We passed the layer's range without encountering a read point: it is not visible\n                    results.push((d.clone(), LayerVisibilityHint::Covered));\n                    false\n                } else {\n                    // We're still in the delta layer: continue iterating\n                    true\n                }\n            });\n\n            match item {\n                Item::ReadPoint { lsn: _lsn } => {\n                    // TODO: propagate the child timeline's shadow from their own run of this function, so that we don't have\n                    // to assume that the whole key range is visible at the branch point.\n                    shadow.reset();\n                }\n                Item::Layer(layer) => {\n                    let visibility = if layer.is_delta() {\n                        if shadow.contains(layer.get_key_range()) {\n                            // If a layer isn't visible based on current state, we must defer deciding whether\n                            // it is truly not visible until we have advanced past the delta's range: we might\n                            // encounter another branch point within this delta layer's LSN range.\n                            maybe_covered_deltas.push(layer);\n                            continue;\n                        } else {\n                            LayerVisibilityHint::Visible\n                        }\n                    } else {\n                        let modified = shadow.cover(layer.get_key_range());\n                        if modified {\n                            // An image layer in a region which wasn't fully covered yet: this layer is visible, but layers below it will be covered\n                            LayerVisibilityHint::Visible\n                        } else {\n                            // An image layer in a region that was already covered\n                            LayerVisibilityHint::Covered\n                        }\n                    };\n\n                    results.push((layer, visibility));\n                }\n            }\n        }\n\n        // Drain any remaining maybe_covered deltas\n        results.extend(\n            maybe_covered_deltas\n                .into_iter()\n                .map(|d| (d, LayerVisibilityHint::Covered)),\n        );\n\n        (results, shadow.to_keyspace())\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use std::collections::HashMap;\n    use std::path::PathBuf;\n\n    use crate::{\n        DEFAULT_PG_VERSION,\n        tenant::{harness::TenantHarness, storage_layer::LayerName},\n    };\n    use pageserver_api::key::DBDIR_KEY;\n    use pageserver_api::keyspace::{KeySpace, KeySpaceRandomAccum};\n    use tokio_util::sync::CancellationToken;\n    use utils::id::{TenantId, TimelineId};\n    use utils::shard::TenantShardId;\n\n    use super::*;\n    use crate::tenant::IndexPart;\n\n    #[derive(Clone)]\n    struct LayerDesc {\n        key_range: Range<Key>,\n        lsn_range: Range<Lsn>,\n        is_delta: bool,\n    }\n\n    fn create_layer_map(layers: Vec<LayerDesc>) -> LayerMap {\n        let mut layer_map = LayerMap::default();\n\n        for layer in layers {\n            layer_map.insert_historic_noflush(PersistentLayerDesc::new_test(\n                layer.key_range,\n                layer.lsn_range,\n                layer.is_delta,\n            ));\n        }\n\n        layer_map.flush_updates();\n        layer_map\n    }\n\n    fn assert_range_search_result_eq(lhs: RangeSearchResult, rhs: RangeSearchResult) {\n        let lhs: HashMap<SearchResult, KeySpace> = lhs\n            .found\n            .into_iter()\n            .map(|(search_result, accum)| (search_result, accum.to_keyspace()))\n            .collect();\n        let rhs: HashMap<SearchResult, KeySpace> = rhs\n            .found\n            .into_iter()\n            .map(|(search_result, accum)| (search_result, accum.to_keyspace()))\n            .collect();\n\n        assert_eq!(lhs, rhs);\n    }\n\n    #[cfg(test)]\n    fn brute_force_range_search(\n        layer_map: &LayerMap,\n        key_range: Range<Key>,\n        end_lsn: Lsn,\n    ) -> RangeSearchResult {\n        let mut range_search_result = RangeSearchResult::new();\n\n        let mut key = key_range.start;\n        while key != key_range.end {\n            let res = layer_map.search(key, end_lsn);\n            if let Some(res) = res {\n                range_search_result\n                    .found\n                    .entry(res)\n                    .or_default()\n                    .add_key(key);\n            }\n\n            key = key.next();\n        }\n\n        range_search_result\n    }\n\n    #[test]\n    fn ranged_search_on_empty_layer_map() {\n        let layer_map = LayerMap::default();\n        let range = Key::from_i128(100)..Key::from_i128(200);\n\n        let res = layer_map.range_search(range.clone(), Lsn(100));\n        assert_range_search_result_eq(res, RangeSearchResult::new());\n    }\n\n    #[tokio::test]\n    async fn ranged_search() {\n        let harness = TenantHarness::create(\"ranged_search\").await.unwrap();\n        let (tenant, ctx) = harness.load().await;\n        let cancel = CancellationToken::new();\n        let timeline_id = TimelineId::generate();\n        // Create the timeline such that the in-memory layers can be written\n        // to the timeline directory.\n        tenant\n            .create_test_timeline(timeline_id, Lsn(0x10), DEFAULT_PG_VERSION, &ctx)\n            .await\n            .unwrap();\n\n        let gate = utils::sync::gate::Gate::default();\n        let add_in_memory_layer = async |layer_map: &mut LayerMap, lsn_range: Range<Lsn>| {\n            let layer = InMemoryLayer::create(\n                harness.conf,\n                timeline_id,\n                harness.tenant_shard_id,\n                lsn_range.start,\n                &gate,\n                &cancel,\n                &ctx,\n            )\n            .await\n            .unwrap();\n\n            layer.freeze(lsn_range.end).await;\n\n            layer_map.frozen_layers.push_back(Arc::new(layer));\n        };\n\n        let in_memory_layer_configurations = [\n            vec![],\n            // Overlaps with the top-most image\n            vec![Lsn(35)..Lsn(50)],\n        ];\n\n        let layers = vec![\n            LayerDesc {\n                key_range: Key::from_i128(15)..Key::from_i128(50),\n                lsn_range: Lsn(5)..Lsn(6),\n                is_delta: false,\n            },\n            LayerDesc {\n                key_range: Key::from_i128(10)..Key::from_i128(20),\n                lsn_range: Lsn(5)..Lsn(20),\n                is_delta: true,\n            },\n            LayerDesc {\n                key_range: Key::from_i128(15)..Key::from_i128(25),\n                lsn_range: Lsn(20)..Lsn(30),\n                is_delta: true,\n            },\n            LayerDesc {\n                key_range: Key::from_i128(35)..Key::from_i128(40),\n                lsn_range: Lsn(25)..Lsn(35),\n                is_delta: true,\n            },\n            LayerDesc {\n                key_range: Key::from_i128(35)..Key::from_i128(40),\n                lsn_range: Lsn(40)..Lsn(41),\n                is_delta: false,\n            },\n        ];\n\n        let mut layer_map = create_layer_map(layers.clone());\n        for in_memory_layers in in_memory_layer_configurations {\n            for in_mem_layer_range in in_memory_layers {\n                add_in_memory_layer(&mut layer_map, in_mem_layer_range).await;\n            }\n\n            for start in 0..60 {\n                for end in (start + 1)..60 {\n                    let range = Key::from_i128(start)..Key::from_i128(end);\n                    let result = layer_map.range_search(range.clone(), Lsn(100));\n                    let expected = brute_force_range_search(&layer_map, range, Lsn(100));\n\n                    eprintln!(\"{start}..{end}: {result:?}\");\n\n                    assert_range_search_result_eq(result, expected);\n                }\n            }\n        }\n    }\n\n    #[test]\n    fn layer_visibility_basic() {\n        // A simple synthetic input, as a smoke test.\n        let tenant_shard_id = TenantShardId::unsharded(TenantId::generate());\n        let timeline_id = TimelineId::generate();\n        let mut layer_map = LayerMap::default();\n        let mut updates = layer_map.batch_update();\n\n        const FAKE_LAYER_SIZE: u64 = 1024;\n\n        let inject_delta = |updates: &mut BatchedUpdates,\n                            key_start: i128,\n                            key_end: i128,\n                            lsn_start: u64,\n                            lsn_end: u64| {\n            let desc = PersistentLayerDesc::new_delta(\n                tenant_shard_id,\n                timeline_id,\n                Range {\n                    start: Key::from_i128(key_start),\n                    end: Key::from_i128(key_end),\n                },\n                Range {\n                    start: Lsn(lsn_start),\n                    end: Lsn(lsn_end),\n                },\n                1024,\n            );\n            updates.insert_historic(desc.clone());\n            desc\n        };\n\n        let inject_image =\n            |updates: &mut BatchedUpdates, key_start: i128, key_end: i128, lsn: u64| {\n                let desc = PersistentLayerDesc::new_img(\n                    tenant_shard_id,\n                    timeline_id,\n                    Range {\n                        start: Key::from_i128(key_start),\n                        end: Key::from_i128(key_end),\n                    },\n                    Lsn(lsn),\n                    FAKE_LAYER_SIZE,\n                );\n                updates.insert_historic(desc.clone());\n                desc\n            };\n\n        //\n        // Construct our scenario: the following lines go in backward-LSN order, constructing the various scenarios\n        // we expect to handle.  You can follow these examples through in the same order as they would be processed\n        // by the function under test.\n        //\n\n        let mut read_points = vec![Lsn(1000)];\n\n        // A delta ahead of any image layer\n        let ahead_layer = inject_delta(&mut updates, 10, 20, 101, 110);\n\n        // An image layer is visible and covers some layers beneath itself\n        let visible_covering_img = inject_image(&mut updates, 5, 25, 99);\n\n        // A delta layer covered by the image layer: should be covered\n        let covered_delta = inject_delta(&mut updates, 10, 20, 90, 100);\n\n        // A delta layer partially covered by an image layer: should be visible\n        let partially_covered_delta = inject_delta(&mut updates, 1, 7, 90, 100);\n\n        // A delta layer not covered by an image layer: should be visible\n        let not_covered_delta = inject_delta(&mut updates, 1, 4, 90, 100);\n\n        // An image layer covered by the image layer above: should be covered\n        let covered_image = inject_image(&mut updates, 10, 20, 89);\n\n        // An image layer partially covered by an image layer: should be visible\n        let partially_covered_image = inject_image(&mut updates, 1, 7, 89);\n\n        // An image layer not covered by an image layer: should be visible\n        let not_covered_image = inject_image(&mut updates, 1, 4, 89);\n\n        // A read point: this will make subsequent layers below here visible, even if there are\n        // more recent layers covering them.\n        read_points.push(Lsn(80));\n\n        // A delta layer covered by an earlier image layer, but visible to a readpoint below that covering layer\n        let covered_delta_below_read_point = inject_delta(&mut updates, 10, 20, 70, 79);\n\n        // A delta layer whose end LSN is covered, but where a read point is present partway through its LSN range:\n        // the read point should make it visible, even though its end LSN is covered\n        let covering_img_between_read_points = inject_image(&mut updates, 10, 20, 69);\n        let covered_delta_between_read_points = inject_delta(&mut updates, 10, 15, 67, 69);\n        read_points.push(Lsn(65));\n        let covered_delta_intersects_read_point = inject_delta(&mut updates, 15, 20, 60, 69);\n\n        let visible_img_after_last_read_point = inject_image(&mut updates, 10, 20, 65);\n\n        updates.flush();\n\n        let (layer_visibilities, shadow) = layer_map.get_visibility(read_points);\n        let layer_visibilities = layer_visibilities.into_iter().collect::<HashMap<_, _>>();\n\n        assert_eq!(\n            layer_visibilities.get(&ahead_layer),\n            Some(&LayerVisibilityHint::Visible)\n        );\n        assert_eq!(\n            layer_visibilities.get(&visible_covering_img),\n            Some(&LayerVisibilityHint::Visible)\n        );\n        assert_eq!(\n            layer_visibilities.get(&covered_delta),\n            Some(&LayerVisibilityHint::Covered)\n        );\n        assert_eq!(\n            layer_visibilities.get(&partially_covered_delta),\n            Some(&LayerVisibilityHint::Visible)\n        );\n        assert_eq!(\n            layer_visibilities.get(&not_covered_delta),\n            Some(&LayerVisibilityHint::Visible)\n        );\n        assert_eq!(\n            layer_visibilities.get(&covered_image),\n            Some(&LayerVisibilityHint::Covered)\n        );\n        assert_eq!(\n            layer_visibilities.get(&partially_covered_image),\n            Some(&LayerVisibilityHint::Visible)\n        );\n        assert_eq!(\n            layer_visibilities.get(&not_covered_image),\n            Some(&LayerVisibilityHint::Visible)\n        );\n        assert_eq!(\n            layer_visibilities.get(&covered_delta_below_read_point),\n            Some(&LayerVisibilityHint::Visible)\n        );\n        assert_eq!(\n            layer_visibilities.get(&covering_img_between_read_points),\n            Some(&LayerVisibilityHint::Visible)\n        );\n        assert_eq!(\n            layer_visibilities.get(&covered_delta_between_read_points),\n            Some(&LayerVisibilityHint::Covered)\n        );\n        assert_eq!(\n            layer_visibilities.get(&covered_delta_intersects_read_point),\n            Some(&LayerVisibilityHint::Visible)\n        );\n        assert_eq!(\n            layer_visibilities.get(&visible_img_after_last_read_point),\n            Some(&LayerVisibilityHint::Visible)\n        );\n\n        // Shadow should include all the images below the last read point\n        let expected_shadow = KeySpace {\n            ranges: vec![Key::from_i128(10)..Key::from_i128(20)],\n        };\n        assert_eq!(shadow, expected_shadow);\n    }\n\n    fn fixture_path(relative: &str) -> PathBuf {\n        PathBuf::from(env!(\"CARGO_MANIFEST_DIR\")).join(relative)\n    }\n\n    #[test]\n    fn layer_visibility_realistic() {\n        // Load a large example layermap\n        let index_raw = std::fs::read_to_string(fixture_path(\n            \"test_data/indices/mixed_workload/index_part.json\",\n        ))\n        .unwrap();\n        let index: IndexPart = serde_json::from_str::<IndexPart>(&index_raw).unwrap();\n\n        let tenant_id = TenantId::generate();\n        let tenant_shard_id = TenantShardId::unsharded(tenant_id);\n        let timeline_id = TimelineId::generate();\n\n        let mut layer_map = LayerMap::default();\n        let mut updates = layer_map.batch_update();\n        for (layer_name, layer_metadata) in index.layer_metadata {\n            let layer_desc = match layer_name {\n                LayerName::Image(layer_name) => PersistentLayerDesc {\n                    key_range: layer_name.key_range.clone(),\n                    lsn_range: layer_name.lsn_as_range(),\n                    tenant_shard_id,\n                    timeline_id,\n                    is_delta: false,\n                    file_size: layer_metadata.file_size,\n                },\n                LayerName::Delta(layer_name) => PersistentLayerDesc {\n                    key_range: layer_name.key_range,\n                    lsn_range: layer_name.lsn_range,\n                    tenant_shard_id,\n                    timeline_id,\n                    is_delta: true,\n                    file_size: layer_metadata.file_size,\n                },\n            };\n            updates.insert_historic(layer_desc);\n        }\n        updates.flush();\n\n        let read_points = vec![index.metadata.disk_consistent_lsn()];\n        let (layer_visibilities, shadow) = layer_map.get_visibility(read_points);\n        for (layer_desc, visibility) in &layer_visibilities {\n            tracing::info!(\"{layer_desc:?}: {visibility:?}\");\n            eprintln!(\"{layer_desc:?}: {visibility:?}\");\n        }\n\n        // The shadow should be non-empty, since there were some image layers\n        assert!(!shadow.ranges.is_empty());\n\n        // At least some layers should be marked covered\n        assert!(\n            layer_visibilities\n                .iter()\n                .any(|i| matches!(i.1, LayerVisibilityHint::Covered))\n        );\n\n        let layer_visibilities = layer_visibilities.into_iter().collect::<HashMap<_, _>>();\n\n        // Brute force validation: a layer should be marked covered if and only if there are image layers above it in LSN order which cover it\n        for (layer_desc, visible) in &layer_visibilities {\n            let mut coverage = KeySpaceRandomAccum::new();\n            let mut covered_by = Vec::new();\n\n            for other_layer in layer_map.iter_historic_layers() {\n                if &other_layer == layer_desc {\n                    continue;\n                }\n                if !other_layer.is_delta()\n                    && other_layer.image_layer_lsn() >= Lsn(layer_desc.get_lsn_range().end.0 - 1)\n                    && other_layer.key_range.start <= layer_desc.key_range.end\n                    && layer_desc.key_range.start <= other_layer.key_range.end\n                {\n                    coverage.add_range(other_layer.get_key_range());\n                    covered_by.push((*other_layer).clone());\n                }\n            }\n            let coverage = coverage.to_keyspace();\n\n            let expect_visible = if coverage.ranges.len() == 1\n                && coverage.contains(&layer_desc.key_range.start)\n                && coverage.contains(&Key::from_i128(layer_desc.key_range.end.to_i128() - 1))\n            {\n                LayerVisibilityHint::Covered\n            } else {\n                LayerVisibilityHint::Visible\n            };\n\n            if expect_visible != *visible {\n                eprintln!(\n                    \"Layer {}..{} @ {}..{} (delta={}) is {visible:?}, should be {expect_visible:?}\",\n                    layer_desc.key_range.start,\n                    layer_desc.key_range.end,\n                    layer_desc.lsn_range.start,\n                    layer_desc.lsn_range.end,\n                    layer_desc.is_delta()\n                );\n                if expect_visible == LayerVisibilityHint::Covered {\n                    eprintln!(\"Covered by:\");\n                    for other in covered_by {\n                        eprintln!(\n                            \"  {}..{} @ {}\",\n                            other.get_key_range().start,\n                            other.get_key_range().end,\n                            other.image_layer_lsn()\n                        );\n                    }\n                    if let Some(range) = coverage.ranges.first() {\n                        eprintln!(\n                            \"Total coverage from contributing layers: {}..{}\",\n                            range.start, range.end\n                        );\n                    } else {\n                        eprintln!(\n                            \"Total coverage from contributing layers: {:?}\",\n                            coverage.ranges\n                        );\n                    }\n                }\n            }\n            assert_eq!(expect_visible, *visible);\n        }\n\n        // Sanity: the layer that holds latest data for the DBDIR key should always be visible\n        // (just using this key as a key that will always exist for any layermap fixture)\n        let dbdir_layer = {\n            let readable_layer = layer_map\n                .search(DBDIR_KEY, index.metadata.disk_consistent_lsn())\n                .unwrap();\n\n            match readable_layer.layer {\n                ReadableLayerWeak::PersistentLayer(desc) => desc,\n                ReadableLayerWeak::InMemoryLayer(_) => unreachable!(\"\"),\n            }\n        };\n        assert!(matches!(\n            layer_visibilities.get(&dbdir_layer).unwrap(),\n            LayerVisibilityHint::Visible\n        ));\n    }\n\n    /* BEGIN_HADRON */\n    #[test]\n    fn test_compute_image_consistent_lsn() {\n        let mut layer_map = LayerMap::default();\n\n        let disk_consistent_lsn = Lsn(1000);\n        // case 1: empty layer map\n        let image_consistent_lsn = layer_map.compute_image_consistent_lsn(disk_consistent_lsn);\n        assert_eq!(\n            disk_consistent_lsn.checked_sub(Lsn(1)).unwrap(),\n            image_consistent_lsn\n        );\n\n        // case 2: only L0 delta layer\n        {\n            let mut updates = layer_map.batch_update();\n            updates.insert_historic(PersistentLayerDesc::new_test(\n                Key::from_i128(0)..Key::from_i128(100),\n                Lsn(900)..Lsn(990),\n                true,\n            ));\n\n            updates.insert_historic(PersistentLayerDesc::new_test(\n                Key::from_i128(0)..Key::from_i128(100),\n                Lsn(850)..Lsn(899),\n                true,\n            ));\n        }\n\n        // should use min L0 delta LSN - 1 as image consistent LSN\n        let image_consistent_lsn = layer_map.compute_image_consistent_lsn(disk_consistent_lsn);\n        assert_eq!(Lsn(849), image_consistent_lsn);\n\n        // case 3: 3 images, no L1 delta\n        {\n            let mut updates = layer_map.batch_update();\n            updates.insert_historic(PersistentLayerDesc::new_test(\n                Key::from_i128(0)..Key::from_i128(40),\n                Lsn(100)..Lsn(100),\n                false,\n            ));\n\n            updates.insert_historic(PersistentLayerDesc::new_test(\n                Key::from_i128(40)..Key::from_i128(70),\n                Lsn(200)..Lsn(200),\n                false,\n            ));\n\n            updates.insert_historic(PersistentLayerDesc::new_test(\n                Key::from_i128(70)..Key::from_i128(100),\n                Lsn(150)..Lsn(150),\n                false,\n            ));\n        }\n        // should use min L0 delta LSN - 1 as image consistent LSN\n        let image_consistent_lsn = layer_map.compute_image_consistent_lsn(disk_consistent_lsn);\n        assert_eq!(Lsn(849), image_consistent_lsn);\n\n        // case 4: 3 images with 1 L1 delta\n        {\n            let mut updates = layer_map.batch_update();\n            updates.insert_historic(PersistentLayerDesc::new_test(\n                Key::from_i128(0)..Key::from_i128(50),\n                Lsn(300)..Lsn(350),\n                true,\n            ));\n        }\n        let image_consistent_lsn = layer_map.compute_image_consistent_lsn(disk_consistent_lsn);\n        assert_eq!(Lsn(299), image_consistent_lsn);\n\n        // case 5: 3 images with 1 more L1 delta with smaller LSN\n        {\n            let mut updates = layer_map.batch_update();\n            updates.insert_historic(PersistentLayerDesc::new_test(\n                Key::from_i128(50)..Key::from_i128(72),\n                Lsn(200)..Lsn(300),\n                true,\n            ));\n        }\n        let image_consistent_lsn = layer_map.compute_image_consistent_lsn(disk_consistent_lsn);\n        assert_eq!(Lsn(199), image_consistent_lsn);\n\n        // case 6: 3 images with more newer L1 deltas (no impact on final results)\n        {\n            let mut updates = layer_map.batch_update();\n            updates.insert_historic(PersistentLayerDesc::new_test(\n                Key::from_i128(0)..Key::from_i128(30),\n                Lsn(400)..Lsn(500),\n                true,\n            ));\n            updates.insert_historic(PersistentLayerDesc::new_test(\n                Key::from_i128(35)..Key::from_i128(100),\n                Lsn(450)..Lsn(600),\n                true,\n            ));\n        }\n        let image_consistent_lsn = layer_map.compute_image_consistent_lsn(disk_consistent_lsn);\n        assert_eq!(Lsn(199), image_consistent_lsn);\n\n        // case 7: 3 images with more older L1 deltas (no impact on final results)\n        {\n            let mut updates = layer_map.batch_update();\n            updates.insert_historic(PersistentLayerDesc::new_test(\n                Key::from_i128(0)..Key::from_i128(40),\n                Lsn(0)..Lsn(50),\n                true,\n            ));\n\n            updates.insert_historic(PersistentLayerDesc::new_test(\n                Key::from_i128(50)..Key::from_i128(100),\n                Lsn(10)..Lsn(60),\n                true,\n            ));\n        }\n        let image_consistent_lsn = layer_map.compute_image_consistent_lsn(disk_consistent_lsn);\n        assert_eq!(Lsn(199), image_consistent_lsn);\n\n        // case 8: 3 images with one more L1 delta with overlapping LSN range\n        {\n            let mut updates = layer_map.batch_update();\n            updates.insert_historic(PersistentLayerDesc::new_test(\n                Key::from_i128(0)..Key::from_i128(50),\n                Lsn(50)..Lsn(250),\n                true,\n            ));\n        }\n        let image_consistent_lsn = layer_map.compute_image_consistent_lsn(disk_consistent_lsn);\n        assert_eq!(Lsn(100), image_consistent_lsn);\n    }\n\n    /* END_HADRON */\n}\n\n#[cfg(test)]\nmod select_layer_tests {\n    use super::*;\n\n    fn create_persistent_layer(\n        start_lsn: u64,\n        end_lsn: u64,\n        is_delta: bool,\n    ) -> Arc<PersistentLayerDesc> {\n        if !is_delta {\n            assert_eq!(end_lsn, start_lsn + 1);\n        }\n\n        Arc::new(PersistentLayerDesc::new_test(\n            Key::MIN..Key::MAX,\n            Lsn(start_lsn)..Lsn(end_lsn),\n            is_delta,\n        ))\n    }\n\n    fn create_inmem_layer(start_lsn: u64, end_lsn: u64) -> InMemoryLayerDesc {\n        InMemoryLayerDesc {\n            handle: InMemoryLayerHandle::Open,\n            lsn_range: Lsn(start_lsn)..Lsn(end_lsn),\n        }\n    }\n\n    #[test]\n    fn test_select_layer_empty() {\n        assert!(LayerMap::select_layer(None, None, None, Lsn(100)).is_none());\n    }\n\n    #[test]\n    fn test_select_layer_only_delta() {\n        let delta = create_persistent_layer(10, 20, true);\n        let result = LayerMap::select_layer(Some(delta.clone()), None, None, Lsn(100)).unwrap();\n\n        assert_eq!(result.lsn_floor, Lsn(10));\n        assert!(\n            matches!(result.layer, ReadableLayerWeak::PersistentLayer(l) if Arc::ptr_eq(&l, &delta))\n        );\n    }\n\n    #[test]\n    fn test_select_layer_only_image() {\n        let image = create_persistent_layer(10, 11, false);\n        let result = LayerMap::select_layer(None, Some(image.clone()), None, Lsn(100)).unwrap();\n\n        assert_eq!(result.lsn_floor, Lsn(10));\n        assert!(\n            matches!(result.layer, ReadableLayerWeak::PersistentLayer(l) if Arc::ptr_eq(&l, &image))\n        );\n    }\n\n    #[test]\n    fn test_select_layer_only_inmem() {\n        let inmem = create_inmem_layer(10, 20);\n        let result = LayerMap::select_layer(None, None, Some(inmem.clone()), Lsn(100)).unwrap();\n\n        assert_eq!(result.lsn_floor, Lsn(10));\n        assert!(matches!(result.layer, ReadableLayerWeak::InMemoryLayer(l) if l == inmem));\n    }\n\n    #[test]\n    fn test_select_layer_image_inside_delta() {\n        let delta = create_persistent_layer(10, 20, true);\n        let image = create_persistent_layer(15, 16, false);\n\n        let result =\n            LayerMap::select_layer(Some(delta.clone()), Some(image.clone()), None, Lsn(100))\n                .unwrap();\n\n        assert_eq!(result.lsn_floor, Lsn(16));\n        assert!(\n            matches!(result.layer, ReadableLayerWeak::PersistentLayer(l) if Arc::ptr_eq(&l, &delta))\n        );\n\n        let result = LayerMap::select_layer(\n            Some(delta.clone()),\n            Some(image.clone()),\n            None,\n            result.lsn_floor,\n        )\n        .unwrap();\n\n        assert_eq!(result.lsn_floor, Lsn(15));\n        assert!(\n            matches!(result.layer, ReadableLayerWeak::PersistentLayer(l) if Arc::ptr_eq(&l, &image))\n        );\n    }\n\n    #[test]\n    fn test_select_layer_newer_image() {\n        let delta = create_persistent_layer(10, 20, true);\n        let image = create_persistent_layer(25, 26, false);\n\n        let result =\n            LayerMap::select_layer(Some(delta.clone()), Some(image.clone()), None, Lsn(30))\n                .unwrap();\n\n        assert_eq!(result.lsn_floor, Lsn(25));\n        assert!(\n            matches!(result.layer, ReadableLayerWeak::PersistentLayer(l) if Arc::ptr_eq(&l, &image))\n        );\n\n        let result =\n            LayerMap::select_layer(Some(delta.clone()), None, None, result.lsn_floor).unwrap();\n\n        assert_eq!(result.lsn_floor, Lsn(10));\n        assert!(\n            matches!(result.layer, ReadableLayerWeak::PersistentLayer(l) if Arc::ptr_eq(&l, &delta))\n        );\n    }\n\n    #[test]\n    fn test_select_layer_delta_with_older_image() {\n        let delta = create_persistent_layer(15, 25, true);\n        let image = create_persistent_layer(10, 11, false);\n\n        let result =\n            LayerMap::select_layer(Some(delta.clone()), Some(image.clone()), None, Lsn(30))\n                .unwrap();\n\n        assert_eq!(result.lsn_floor, Lsn(15));\n        assert!(\n            matches!(result.layer, ReadableLayerWeak::PersistentLayer(l) if Arc::ptr_eq(&l, &delta))\n        );\n\n        let result =\n            LayerMap::select_layer(None, Some(image.clone()), None, result.lsn_floor).unwrap();\n\n        assert_eq!(result.lsn_floor, Lsn(10));\n        assert!(\n            matches!(result.layer, ReadableLayerWeak::PersistentLayer(l) if Arc::ptr_eq(&l, &image))\n        );\n    }\n\n    #[test]\n    fn test_select_layer_image_inside_inmem() {\n        let image = create_persistent_layer(15, 16, false);\n        let inmem = create_inmem_layer(10, 25);\n\n        let result =\n            LayerMap::select_layer(None, Some(image.clone()), Some(inmem.clone()), Lsn(30))\n                .unwrap();\n\n        assert_eq!(result.lsn_floor, Lsn(16));\n        assert!(matches!(result.layer, ReadableLayerWeak::InMemoryLayer(l) if l == inmem));\n\n        let result = LayerMap::select_layer(\n            None,\n            Some(image.clone()),\n            Some(inmem.clone()),\n            result.lsn_floor,\n        )\n        .unwrap();\n\n        assert_eq!(result.lsn_floor, Lsn(15));\n        assert!(\n            matches!(result.layer, ReadableLayerWeak::PersistentLayer(l) if Arc::ptr_eq(&l, &image))\n        );\n\n        let result =\n            LayerMap::select_layer(None, None, Some(inmem.clone()), result.lsn_floor).unwrap();\n        assert_eq!(result.lsn_floor, Lsn(10));\n        assert!(matches!(result.layer, ReadableLayerWeak::InMemoryLayer(l) if l == inmem));\n    }\n\n    #[test]\n    fn test_select_layer_delta_inside_inmem() {\n        let delta_top = create_persistent_layer(15, 20, true);\n        let delta_bottom = create_persistent_layer(10, 15, true);\n        let inmem = create_inmem_layer(15, 25);\n\n        let result =\n            LayerMap::select_layer(Some(delta_top.clone()), None, Some(inmem.clone()), Lsn(30))\n                .unwrap();\n\n        assert_eq!(result.lsn_floor, Lsn(20));\n        assert!(matches!(result.layer, ReadableLayerWeak::InMemoryLayer(l) if l == inmem));\n\n        let result = LayerMap::select_layer(\n            Some(delta_top.clone()),\n            None,\n            Some(inmem.clone()),\n            result.lsn_floor,\n        )\n        .unwrap();\n\n        assert_eq!(result.lsn_floor, Lsn(15));\n        assert!(\n            matches!(result.layer, ReadableLayerWeak::PersistentLayer(l) if Arc::ptr_eq(&l, &delta_top))\n        );\n\n        let result = LayerMap::select_layer(\n            Some(delta_bottom.clone()),\n            None,\n            Some(inmem.clone()),\n            result.lsn_floor,\n        )\n        .unwrap();\n        assert_eq!(result.lsn_floor, Lsn(10));\n        assert!(\n            matches!(result.layer, ReadableLayerWeak::PersistentLayer(l) if Arc::ptr_eq(&l, &delta_bottom))\n        );\n    }\n\n    #[test]\n    fn test_select_layer_all_overlap_1() {\n        let inmem = create_inmem_layer(10, 30);\n        let delta = create_persistent_layer(15, 25, true);\n        let image = create_persistent_layer(20, 21, false);\n\n        let result = LayerMap::select_layer(\n            Some(delta.clone()),\n            Some(image.clone()),\n            Some(inmem.clone()),\n            Lsn(50),\n        )\n        .unwrap();\n\n        assert_eq!(result.lsn_floor, Lsn(25));\n        assert!(matches!(result.layer, ReadableLayerWeak::InMemoryLayer(l) if l == inmem));\n\n        let result = LayerMap::select_layer(\n            Some(delta.clone()),\n            Some(image.clone()),\n            Some(inmem.clone()),\n            result.lsn_floor,\n        )\n        .unwrap();\n\n        assert_eq!(result.lsn_floor, Lsn(21));\n        assert!(\n            matches!(result.layer, ReadableLayerWeak::PersistentLayer(l) if Arc::ptr_eq(&l, &delta))\n        );\n\n        let result = LayerMap::select_layer(\n            Some(delta.clone()),\n            Some(image.clone()),\n            Some(inmem.clone()),\n            result.lsn_floor,\n        )\n        .unwrap();\n\n        assert_eq!(result.lsn_floor, Lsn(20));\n        assert!(\n            matches!(result.layer, ReadableLayerWeak::PersistentLayer(l) if Arc::ptr_eq(&l, &image))\n        );\n    }\n\n    #[test]\n    fn test_select_layer_all_overlap_2() {\n        let inmem = create_inmem_layer(20, 30);\n        let delta = create_persistent_layer(10, 40, true);\n        let image = create_persistent_layer(25, 26, false);\n\n        let result = LayerMap::select_layer(\n            Some(delta.clone()),\n            Some(image.clone()),\n            Some(inmem.clone()),\n            Lsn(50),\n        )\n        .unwrap();\n\n        assert_eq!(result.lsn_floor, Lsn(26));\n        assert!(\n            matches!(result.layer, ReadableLayerWeak::PersistentLayer(l) if Arc::ptr_eq(&l, &delta))\n        );\n\n        let result = LayerMap::select_layer(\n            Some(delta.clone()),\n            Some(image.clone()),\n            Some(inmem.clone()),\n            result.lsn_floor,\n        )\n        .unwrap();\n\n        assert_eq!(result.lsn_floor, Lsn(25));\n        assert!(\n            matches!(result.layer, ReadableLayerWeak::PersistentLayer(l) if Arc::ptr_eq(&l, &image))\n        );\n    }\n\n    #[test]\n    fn test_select_layer_all_overlap_3() {\n        let inmem = create_inmem_layer(30, 40);\n        let delta = create_persistent_layer(10, 30, true);\n        let image = create_persistent_layer(20, 21, false);\n\n        let result = LayerMap::select_layer(\n            Some(delta.clone()),\n            Some(image.clone()),\n            Some(inmem.clone()),\n            Lsn(50),\n        )\n        .unwrap();\n\n        assert_eq!(result.lsn_floor, Lsn(30));\n        assert!(matches!(result.layer, ReadableLayerWeak::InMemoryLayer(l) if l == inmem));\n\n        let result = LayerMap::select_layer(\n            Some(delta.clone()),\n            Some(image.clone()),\n            None,\n            result.lsn_floor,\n        )\n        .unwrap();\n\n        assert_eq!(result.lsn_floor, Lsn(21));\n        assert!(\n            matches!(result.layer, ReadableLayerWeak::PersistentLayer(l) if Arc::ptr_eq(&l, &delta))\n        );\n\n        let result = LayerMap::select_layer(\n            Some(delta.clone()),\n            Some(image.clone()),\n            None,\n            result.lsn_floor,\n        )\n        .unwrap();\n\n        assert_eq!(result.lsn_floor, Lsn(20));\n        assert!(\n            matches!(result.layer, ReadableLayerWeak::PersistentLayer(l) if Arc::ptr_eq(&l, &image))\n        );\n    }\n}\n"
  },
  {
    "path": "pageserver/src/tenant/metadata.rs",
    "content": "//! Describes the legacy now hopefully no longer modified per-timeline metadata.\n//!\n//! It is stored in `index_part.json` managed by [`remote_timeline_client`]. For many tenants and\n//! their timelines, this struct and its original serialization format is still needed because\n//! they were written a long time ago.\n//!\n//! Instead of changing and adding versioning to this, just change [`IndexPart`] with soft json\n//! versioning.\n//!\n//! To clean up this module we need to migrate all index_part.json files to a later version.\n//! While doing this, we need to be mindful about s3 based recovery as well, so it might take\n//! however long we keep the old versions to be able to delete the old code. After that, we can\n//! remove everything else than [`TimelineMetadataBodyV2`], rename it as `TimelineMetadata` and\n//! move it to `index.rs`. Before doing all of this, we need to keep the structures for backwards\n//! compatibility.\n//!\n//! [`remote_timeline_client`]: super::remote_timeline_client\n//! [`IndexPart`]: super::remote_timeline_client::index::IndexPart\n\nuse anyhow::ensure;\nuse postgres_ffi::PgMajorVersion;\nuse serde::{Deserialize, Serialize};\nuse utils::bin_ser::{BeSer, SerializeError};\nuse utils::id::TimelineId;\nuse utils::lsn::Lsn;\n\n/// Use special format number to enable backward compatibility.\nconst METADATA_FORMAT_VERSION: u16 = 4;\n\n/// Previous supported format versions.\n///\n/// In practice, none of these should remain, all are [`METADATA_FORMAT_VERSION`], but confirming\n/// that requires a scrubber run which is yet to be done.\nconst METADATA_OLD_FORMAT_VERSION: u16 = 3;\n\n/// When the file existed on disk we assumed that a write of up to METADATA_MAX_SIZE bytes is atomic.\n///\n/// This is the same assumption that PostgreSQL makes with the control file,\n///\n/// see PG_CONTROL_MAX_SAFE_SIZE\nconst METADATA_MAX_SIZE: usize = 512;\n\n/// Legacy metadata stored as a component of `index_part.json` per timeline.\n///\n/// Do not make new changes to this type or the module. In production, we have two different kinds\n/// of serializations of this type: bincode and json. Bincode version reflects what used to be\n/// stored on disk in earlier versions and does internal crc32 checksumming.\n///\n/// This type should not implement `serde::Serialize` or `serde::Deserialize` because there would\n/// be a confusion whether you want the old version ([`TimelineMetadata::from_bytes`]) or the modern\n/// as-exists in `index_part.json` ([`self::modern_serde`]).\n///\n/// ```compile_fail\n/// #[derive(serde::Serialize)]\n/// struct DoNotDoThis(pageserver::tenant::metadata::TimelineMetadata);\n/// ```\n///\n/// ```compile_fail\n/// #[derive(serde::Deserialize)]\n/// struct NeitherDoThis(pageserver::tenant::metadata::TimelineMetadata);\n/// ```\n#[derive(Debug, Clone, PartialEq, Eq)]\npub struct TimelineMetadata {\n    hdr: TimelineMetadataHeader,\n    body: TimelineMetadataBodyV2,\n}\n\n#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]\nstruct TimelineMetadataHeader {\n    checksum: u32,       // CRC of serialized metadata body\n    size: u16,           // size of serialized metadata\n    format_version: u16, // metadata format version (used for compatibility checks)\n}\n\nimpl TryFrom<&TimelineMetadataBodyV2> for TimelineMetadataHeader {\n    type Error = Crc32CalculationFailed;\n\n    fn try_from(value: &TimelineMetadataBodyV2) -> Result<Self, Self::Error> {\n        #[derive(Default)]\n        struct Crc32Sink {\n            crc: u32,\n            count: usize,\n        }\n\n        impl std::io::Write for Crc32Sink {\n            fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {\n                self.crc = crc32c::crc32c_append(self.crc, buf);\n                self.count += buf.len();\n                Ok(buf.len())\n            }\n\n            fn flush(&mut self) -> std::io::Result<()> {\n                Ok(())\n            }\n        }\n\n        // jump through hoops to calculate the crc32 so that TimelineMetadata::ne works\n        // across serialization versions\n        let mut sink = Crc32Sink::default();\n        <TimelineMetadataBodyV2 as utils::bin_ser::BeSer>::ser_into(value, &mut sink)\n            .map_err(Crc32CalculationFailed)?;\n\n        let size = METADATA_HDR_SIZE + sink.count;\n\n        Ok(TimelineMetadataHeader {\n            checksum: sink.crc,\n            size: size as u16,\n            format_version: METADATA_FORMAT_VERSION,\n        })\n    }\n}\n\n#[derive(thiserror::Error, Debug)]\n#[error(\"re-serializing for crc32 failed\")]\nstruct Crc32CalculationFailed(#[source] utils::bin_ser::SerializeError);\n\nconst METADATA_HDR_SIZE: usize = size_of::<TimelineMetadataHeader>();\n\n#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]\nstruct TimelineMetadataBodyV2 {\n    disk_consistent_lsn: Lsn,\n    // This is only set if we know it. We track it in memory when the page\n    // server is running, but we only track the value corresponding to\n    // 'last_record_lsn', not 'disk_consistent_lsn' which can lag behind by a\n    // lot. We only store it in the metadata file when we flush *all* the\n    // in-memory data so that 'last_record_lsn' is the same as\n    // 'disk_consistent_lsn'.  That's OK, because after page server restart, as\n    // soon as we reprocess at least one record, we will have a valid\n    // 'prev_record_lsn' value in memory again. This is only really needed when\n    // doing a clean shutdown, so that there is no more WAL beyond\n    // 'disk_consistent_lsn'\n    prev_record_lsn: Option<Lsn>,\n    ancestor_timeline: Option<TimelineId>,\n    ancestor_lsn: Lsn,\n\n    // The LSN at which GC was last executed.  Synonym of [`Timeline::applied_gc_cutoff_lsn`].\n    latest_gc_cutoff_lsn: Lsn,\n\n    initdb_lsn: Lsn,\n    pg_version: PgMajorVersion,\n}\n\n#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]\nstruct TimelineMetadataBodyV1 {\n    disk_consistent_lsn: Lsn,\n    // This is only set if we know it. We track it in memory when the page\n    // server is running, but we only track the value corresponding to\n    // 'last_record_lsn', not 'disk_consistent_lsn' which can lag behind by a\n    // lot. We only store it in the metadata file when we flush *all* the\n    // in-memory data so that 'last_record_lsn' is the same as\n    // 'disk_consistent_lsn'.  That's OK, because after page server restart, as\n    // soon as we reprocess at least one record, we will have a valid\n    // 'prev_record_lsn' value in memory again. This is only really needed when\n    // doing a clean shutdown, so that there is no more WAL beyond\n    // 'disk_consistent_lsn'\n    prev_record_lsn: Option<Lsn>,\n    ancestor_timeline: Option<TimelineId>,\n    ancestor_lsn: Lsn,\n    latest_gc_cutoff_lsn: Lsn,\n    initdb_lsn: Lsn,\n}\n\nimpl TimelineMetadata {\n    pub fn new(\n        disk_consistent_lsn: Lsn,\n        prev_record_lsn: Option<Lsn>,\n        ancestor_timeline: Option<TimelineId>,\n        ancestor_lsn: Lsn,\n        latest_gc_cutoff_lsn: Lsn,\n        initdb_lsn: Lsn,\n        pg_version: PgMajorVersion,\n    ) -> Self {\n        Self {\n            hdr: TimelineMetadataHeader {\n                checksum: 0,\n                size: 0,\n                format_version: METADATA_FORMAT_VERSION,\n            },\n            body: TimelineMetadataBodyV2 {\n                disk_consistent_lsn,\n                prev_record_lsn,\n                ancestor_timeline,\n                ancestor_lsn,\n                latest_gc_cutoff_lsn,\n                initdb_lsn,\n                pg_version,\n            },\n        }\n    }\n\n    #[cfg(test)]\n    pub(crate) fn with_recalculated_checksum(mut self) -> anyhow::Result<Self> {\n        self.hdr = TimelineMetadataHeader::try_from(&self.body)?;\n        Ok(self)\n    }\n\n    fn upgrade_timeline_metadata(metadata_bytes: &[u8]) -> anyhow::Result<Self> {\n        let mut hdr = TimelineMetadataHeader::des(&metadata_bytes[0..METADATA_HDR_SIZE])?;\n\n        // backward compatible only up to this version\n        ensure!(\n            hdr.format_version == METADATA_OLD_FORMAT_VERSION,\n            \"unsupported metadata format version {}\",\n            hdr.format_version\n        );\n\n        let metadata_size = hdr.size as usize;\n\n        let body: TimelineMetadataBodyV1 =\n            TimelineMetadataBodyV1::des(&metadata_bytes[METADATA_HDR_SIZE..metadata_size])?;\n\n        let body = TimelineMetadataBodyV2 {\n            disk_consistent_lsn: body.disk_consistent_lsn,\n            prev_record_lsn: body.prev_record_lsn,\n            ancestor_timeline: body.ancestor_timeline,\n            ancestor_lsn: body.ancestor_lsn,\n            latest_gc_cutoff_lsn: body.latest_gc_cutoff_lsn,\n            initdb_lsn: body.initdb_lsn,\n            pg_version: PgMajorVersion::PG14, // All timelines created before this version had pg_version 14\n        };\n\n        hdr.format_version = METADATA_FORMAT_VERSION;\n\n        Ok(Self { hdr, body })\n    }\n\n    pub fn from_bytes(metadata_bytes: &[u8]) -> anyhow::Result<Self> {\n        ensure!(\n            metadata_bytes.len() == METADATA_MAX_SIZE,\n            \"metadata bytes size is wrong\"\n        );\n        let hdr = TimelineMetadataHeader::des(&metadata_bytes[0..METADATA_HDR_SIZE])?;\n\n        let metadata_size = hdr.size as usize;\n        ensure!(\n            metadata_size <= METADATA_MAX_SIZE,\n            \"corrupted metadata file\"\n        );\n        let calculated_checksum = crc32c::crc32c(&metadata_bytes[METADATA_HDR_SIZE..metadata_size]);\n        ensure!(\n            hdr.checksum == calculated_checksum,\n            \"metadata checksum mismatch\"\n        );\n\n        if hdr.format_version != METADATA_FORMAT_VERSION {\n            // If metadata has the old format,\n            // upgrade it and return the result\n            TimelineMetadata::upgrade_timeline_metadata(metadata_bytes)\n        } else {\n            let body =\n                TimelineMetadataBodyV2::des(&metadata_bytes[METADATA_HDR_SIZE..metadata_size])?;\n            ensure!(\n                body.disk_consistent_lsn.is_aligned(),\n                \"disk_consistent_lsn is not aligned\"\n            );\n            Ok(TimelineMetadata { hdr, body })\n        }\n    }\n\n    pub fn to_bytes(&self) -> Result<Vec<u8>, SerializeError> {\n        let body_bytes = self.body.ser()?;\n        let metadata_size = METADATA_HDR_SIZE + body_bytes.len();\n        let hdr = TimelineMetadataHeader {\n            size: metadata_size as u16,\n            format_version: METADATA_FORMAT_VERSION,\n            checksum: crc32c::crc32c(&body_bytes),\n        };\n        let hdr_bytes = hdr.ser()?;\n        let mut metadata_bytes = vec![0u8; METADATA_MAX_SIZE];\n        metadata_bytes[0..METADATA_HDR_SIZE].copy_from_slice(&hdr_bytes);\n        metadata_bytes[METADATA_HDR_SIZE..metadata_size].copy_from_slice(&body_bytes);\n        Ok(metadata_bytes)\n    }\n\n    /// [`Lsn`] that corresponds to the corresponding timeline directory\n    /// contents, stored locally in the pageserver workdir.\n    pub fn disk_consistent_lsn(&self) -> Lsn {\n        self.body.disk_consistent_lsn\n    }\n\n    pub fn prev_record_lsn(&self) -> Option<Lsn> {\n        self.body.prev_record_lsn\n    }\n\n    pub fn ancestor_timeline(&self) -> Option<TimelineId> {\n        self.body.ancestor_timeline\n    }\n\n    pub fn ancestor_lsn(&self) -> Lsn {\n        self.body.ancestor_lsn\n    }\n\n    /// When reparenting, the `ancestor_lsn` does not change.\n    ///\n    /// Returns true if anything was changed.\n    pub fn reparent(&mut self, timeline: &TimelineId) {\n        assert!(self.body.ancestor_timeline.is_some());\n        // no assertion for redoing this: it's fine, we may have to repeat this multiple times over\n        self.body.ancestor_timeline = Some(*timeline);\n    }\n\n    /// Returns true if anything was changed\n    pub fn detach_from_ancestor(&mut self, branchpoint: &(TimelineId, Lsn)) {\n        // Detaching from ancestor now doesn't always detach directly to the direct ancestor, but we\n        // ensure the LSN is the same. So we don't check the timeline ID.\n        if self.body.ancestor_lsn != Lsn(0) {\n            assert_eq!(self.body.ancestor_lsn, branchpoint.1);\n        }\n        self.body.ancestor_timeline = None;\n        self.body.ancestor_lsn = Lsn(0);\n    }\n\n    pub fn latest_gc_cutoff_lsn(&self) -> Lsn {\n        self.body.latest_gc_cutoff_lsn\n    }\n\n    pub fn initdb_lsn(&self) -> Lsn {\n        self.body.initdb_lsn\n    }\n\n    pub fn pg_version(&self) -> PgMajorVersion {\n        self.body.pg_version\n    }\n\n    // Checksums make it awkward to build a valid instance by hand.  This helper\n    // provides a TimelineMetadata with a valid checksum in its header.\n    pub fn example() -> Self {\n        let instance = Self::new(\n            \"0/16960E8\".parse::<Lsn>().unwrap(),\n            None,\n            None,\n            Lsn::from_hex(\"00000000\").unwrap(),\n            Lsn::from_hex(\"00000000\").unwrap(),\n            Lsn::from_hex(\"00000000\").unwrap(),\n            PgMajorVersion::PG14,\n        );\n        let bytes = instance.to_bytes().unwrap();\n        Self::from_bytes(&bytes).unwrap()\n    }\n\n    pub(crate) fn apply(&mut self, update: &MetadataUpdate) {\n        self.body.disk_consistent_lsn = update.disk_consistent_lsn;\n        self.body.prev_record_lsn = update.prev_record_lsn;\n        self.body.latest_gc_cutoff_lsn = update.latest_gc_cutoff_lsn;\n    }\n}\n\npub(crate) mod modern_serde {\n    use serde::{Deserialize, Serialize};\n\n    use super::{TimelineMetadata, TimelineMetadataBodyV2, TimelineMetadataHeader};\n\n    pub(crate) fn deserialize<'de, D>(deserializer: D) -> Result<TimelineMetadata, D::Error>\n    where\n        D: serde::de::Deserializer<'de>,\n    {\n        // for legacy reasons versions 1-5 had TimelineMetadata serialized as a Vec<u8> field with\n        // BeSer.\n        struct Visitor;\n\n        impl<'d> serde::de::Visitor<'d> for Visitor {\n            type Value = TimelineMetadata;\n\n            fn expecting(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {\n                f.write_str(\"BeSer bytes or json structure\")\n            }\n\n            fn visit_seq<A>(self, seq: A) -> Result<Self::Value, A::Error>\n            where\n                A: serde::de::SeqAccess<'d>,\n            {\n                use serde::de::Error;\n                let de = serde::de::value::SeqAccessDeserializer::new(seq);\n                Vec::<u8>::deserialize(de)\n                    .map(|v| TimelineMetadata::from_bytes(&v).map_err(A::Error::custom))?\n            }\n\n            fn visit_map<A>(self, map: A) -> Result<Self::Value, A::Error>\n            where\n                A: serde::de::MapAccess<'d>,\n            {\n                use serde::de::Error;\n\n                let de = serde::de::value::MapAccessDeserializer::new(map);\n                let body = TimelineMetadataBodyV2::deserialize(de)?;\n                let hdr = TimelineMetadataHeader::try_from(&body).map_err(A::Error::custom)?;\n\n                Ok(TimelineMetadata { hdr, body })\n            }\n        }\n\n        deserializer.deserialize_any(Visitor)\n    }\n\n    pub(crate) fn serialize<S>(\n        metadata: &TimelineMetadata,\n        serializer: S,\n    ) -> Result<S::Ok, S::Error>\n    where\n        S: serde::Serializer,\n    {\n        // header is not needed, upon reading we've upgraded all v1 to v2\n        metadata.body.serialize(serializer)\n    }\n\n    #[test]\n    fn deserializes_bytes_as_well_as_equivalent_body_v2() {\n        #[derive(serde::Deserialize, serde::Serialize)]\n        struct Wrapper(\n            #[serde(deserialize_with = \"deserialize\", serialize_with = \"serialize\")]\n            TimelineMetadata,\n        );\n\n        let too_many_bytes = \"[216,111,252,208,0,54,0,4,0,0,0,0,1,73,253,144,1,0,0,0,0,1,73,253,24,0,0,0,0,0,0,0,0,0,0,0,0,0,1,73,253,24,0,0,0,0,1,73,253,24,0,0,0,15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]\";\n\n        let wrapper_from_bytes = serde_json::from_str::<Wrapper>(too_many_bytes).unwrap();\n\n        let serialized = serde_json::to_value(&wrapper_from_bytes).unwrap();\n\n        assert_eq!(\n            serialized,\n            serde_json::json! {{\n                \"disk_consistent_lsn\": \"0/149FD90\",\n                \"prev_record_lsn\": \"0/149FD18\",\n                \"ancestor_timeline\": null,\n                \"ancestor_lsn\": \"0/0\",\n                \"latest_gc_cutoff_lsn\": \"0/149FD18\",\n                \"initdb_lsn\": \"0/149FD18\",\n                \"pg_version\": 15\n            }}\n        );\n\n        let wrapper_from_json = serde_json::value::from_value::<Wrapper>(serialized).unwrap();\n\n        assert_eq!(wrapper_from_bytes.0, wrapper_from_json.0);\n    }\n}\n\n/// Parts of the metadata which are regularly modified.\npub(crate) struct MetadataUpdate {\n    disk_consistent_lsn: Lsn,\n    prev_record_lsn: Option<Lsn>,\n    latest_gc_cutoff_lsn: Lsn,\n}\n\nimpl MetadataUpdate {\n    pub(crate) fn new(\n        disk_consistent_lsn: Lsn,\n        prev_record_lsn: Option<Lsn>,\n        latest_gc_cutoff_lsn: Lsn,\n    ) -> Self {\n        Self {\n            disk_consistent_lsn,\n            prev_record_lsn,\n            latest_gc_cutoff_lsn,\n        }\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use super::*;\n    use crate::tenant::harness::TIMELINE_ID;\n\n    #[test]\n    fn metadata_serializes_correctly() {\n        let original_metadata = TimelineMetadata::new(\n            Lsn(0x200),\n            Some(Lsn(0x100)),\n            Some(TIMELINE_ID),\n            Lsn(0),\n            Lsn(0),\n            Lsn(0),\n            // Any version will do here, so use the default\n            crate::DEFAULT_PG_VERSION,\n        );\n\n        let metadata_bytes = original_metadata\n            .to_bytes()\n            .expect(\"Should serialize correct metadata to bytes\");\n\n        let deserialized_metadata = TimelineMetadata::from_bytes(&metadata_bytes)\n            .expect(\"Should deserialize its own bytes\");\n\n        assert_eq!(\n            deserialized_metadata.body, original_metadata.body,\n            \"Metadata that was serialized to bytes and deserialized back should not change\"\n        );\n    }\n\n    // Generate old version metadata and read it with current code.\n    // Ensure that it is upgraded correctly\n    #[test]\n    fn test_metadata_upgrade() {\n        #[derive(Debug, Clone, PartialEq, Eq)]\n        struct TimelineMetadataV1 {\n            hdr: TimelineMetadataHeader,\n            body: TimelineMetadataBodyV1,\n        }\n\n        let metadata_v1 = TimelineMetadataV1 {\n            hdr: TimelineMetadataHeader {\n                checksum: 0,\n                size: 0,\n                format_version: METADATA_OLD_FORMAT_VERSION,\n            },\n            body: TimelineMetadataBodyV1 {\n                disk_consistent_lsn: Lsn(0x200),\n                prev_record_lsn: Some(Lsn(0x100)),\n                ancestor_timeline: Some(TIMELINE_ID),\n                ancestor_lsn: Lsn(0),\n                latest_gc_cutoff_lsn: Lsn(0),\n                initdb_lsn: Lsn(0),\n            },\n        };\n\n        impl TimelineMetadataV1 {\n            pub fn to_bytes(&self) -> anyhow::Result<Vec<u8>> {\n                let body_bytes = self.body.ser()?;\n                let metadata_size = METADATA_HDR_SIZE + body_bytes.len();\n                let hdr = TimelineMetadataHeader {\n                    size: metadata_size as u16,\n                    format_version: METADATA_OLD_FORMAT_VERSION,\n                    checksum: crc32c::crc32c(&body_bytes),\n                };\n                let hdr_bytes = hdr.ser()?;\n                let mut metadata_bytes = vec![0u8; METADATA_MAX_SIZE];\n                metadata_bytes[0..METADATA_HDR_SIZE].copy_from_slice(&hdr_bytes);\n                metadata_bytes[METADATA_HDR_SIZE..metadata_size].copy_from_slice(&body_bytes);\n                Ok(metadata_bytes)\n            }\n        }\n\n        let metadata_bytes = metadata_v1\n            .to_bytes()\n            .expect(\"Should serialize correct metadata to bytes\");\n\n        // This should deserialize to the latest version format\n        let deserialized_metadata = TimelineMetadata::from_bytes(&metadata_bytes)\n            .expect(\"Should deserialize its own bytes\");\n\n        let expected_metadata = TimelineMetadata::new(\n            Lsn(0x200),\n            Some(Lsn(0x100)),\n            Some(TIMELINE_ID),\n            Lsn(0),\n            Lsn(0),\n            Lsn(0),\n            PgMajorVersion::PG14, // All timelines created before this version had pg_version 14\n        );\n\n        assert_eq!(\n            deserialized_metadata.body, expected_metadata.body,\n            \"Metadata of the old version {METADATA_OLD_FORMAT_VERSION} should be upgraded to the latest version {METADATA_FORMAT_VERSION}\"\n        );\n    }\n\n    #[test]\n    fn test_metadata_bincode_serde_ensure_roundtrip() {\n        let original_metadata = TimelineMetadata::new(\n            Lsn(0x200),\n            Some(Lsn(0x100)),\n            Some(TIMELINE_ID),\n            Lsn(0),\n            Lsn(0),\n            Lsn(0),\n            // Updating this version to 17 will cause the test to fail at the\n            // next assert_eq!().\n            PgMajorVersion::PG16,\n        );\n        let expected_bytes = vec![\n            /* TimelineMetadataHeader */\n            74, 104, 158, 105, 0, 70, 0, 4, // checksum, size, format_version (4 + 2 + 2)\n            /* TimelineMetadataBodyV2 */\n            0, 0, 0, 0, 0, 0, 2, 0, // disk_consistent_lsn (8 bytes)\n            1, 0, 0, 0, 0, 0, 0, 1, 0, // prev_record_lsn (9 bytes)\n            1, 17, 34, 51, 68, 85, 102, 119, 136, 17, 34, 51, 68, 85, 102, 119,\n            136, // ancestor_timeline (17 bytes)\n            0, 0, 0, 0, 0, 0, 0, 0, // ancestor_lsn (8 bytes)\n            0, 0, 0, 0, 0, 0, 0, 0, // latest_gc_cutoff_lsn (8 bytes)\n            0, 0, 0, 0, 0, 0, 0, 0, // initdb_lsn (8 bytes)\n            0, 0, 0, 16, // pg_version (4 bytes)\n            /* padding bytes */\n            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n            0, 0, 0, 0, 0, 0, 0,\n        ];\n        let metadata_ser_bytes = original_metadata.to_bytes().unwrap();\n        assert_eq!(metadata_ser_bytes, expected_bytes);\n\n        let expected_metadata = {\n            let mut temp_metadata = original_metadata;\n            let body_bytes = temp_metadata\n                .body\n                .ser()\n                .expect(\"Cannot serialize the metadata body\");\n            let metadata_size = METADATA_HDR_SIZE + body_bytes.len();\n            let hdr = TimelineMetadataHeader {\n                size: metadata_size as u16,\n                format_version: METADATA_FORMAT_VERSION,\n                checksum: crc32c::crc32c(&body_bytes),\n            };\n            temp_metadata.hdr = hdr;\n            temp_metadata\n        };\n        let des_metadata = TimelineMetadata::from_bytes(&metadata_ser_bytes).unwrap();\n        assert_eq!(des_metadata, expected_metadata);\n    }\n}\n"
  },
  {
    "path": "pageserver/src/tenant/mgr.rs",
    "content": "//! This module acts as a switchboard to access different repositories managed by this\n//! page server.\n\nuse std::borrow::Cow;\nuse std::cmp::Ordering;\nuse std::collections::{BTreeMap, HashMap, HashSet};\nuse std::ops::Deref;\nuse std::sync::Arc;\nuse std::time::Duration;\n\nuse anyhow::Context;\nuse camino::{Utf8DirEntry, Utf8Path, Utf8PathBuf};\nuse futures::StreamExt;\nuse itertools::Itertools;\nuse pageserver_api::key::Key;\nuse pageserver_api::models::{DetachBehavior, LocationConfigMode};\nuse pageserver_api::shard::{\n    ShardCount, ShardIdentity, ShardIndex, ShardNumber, ShardStripeSize, TenantShardId,\n};\nuse pageserver_api::upcall_api::ReAttachResponseTenant;\nuse rand::Rng;\nuse rand::distr::Alphanumeric;\nuse remote_storage::TimeoutOrCancel;\nuse sysinfo::SystemExt;\nuse tokio::fs;\nuse tokio::task::JoinSet;\nuse tokio_util::sync::CancellationToken;\nuse tracing::*;\nuse utils::crashsafe::path_with_suffix_extension;\nuse utils::fs_ext::PathExt;\nuse utils::generation::Generation;\nuse utils::id::{TenantId, TimelineId};\nuse utils::{backoff, completion, crashsafe};\n\nuse super::remote_timeline_client::remote_tenant_path;\nuse super::secondary::SecondaryTenant;\nuse super::timeline::detach_ancestor::{self, PreparedTimelineDetach};\nuse super::{GlobalShutDown, TenantSharedResources};\nuse crate::config::PageServerConf;\nuse crate::context::{DownloadBehavior, RequestContext};\nuse crate::controller_upcall_client::{\n    RetryForeverError, StorageControllerUpcallApi, StorageControllerUpcallClient,\n};\nuse crate::deletion_queue::DeletionQueueClient;\nuse crate::http::routes::ACTIVE_TENANT_TIMEOUT;\nuse crate::metrics::{LOCAL_DATA_LOSS_SUSPECTED, TENANT, TENANT_MANAGER as METRICS};\nuse crate::task_mgr::{BACKGROUND_RUNTIME, TaskKind};\nuse crate::tenant::config::{\n    AttachedLocationConfig, AttachmentMode, LocationConf, LocationMode, SecondaryLocationConfig,\n};\nuse crate::tenant::span::debug_assert_current_span_has_tenant_id;\nuse crate::tenant::storage_layer::inmemory_layer;\nuse crate::tenant::timeline::ShutdownMode;\nuse crate::tenant::timeline::layer_manager::LayerManagerLockHolder;\nuse crate::tenant::{\n    AttachedTenantConf, GcError, LoadConfigError, SpawnMode, TenantShard, TenantState,\n};\nuse crate::virtual_file::MaybeFatalIo;\nuse crate::{InitializationOrder, TEMP_FILE_SUFFIX};\n\n/// For a tenant that appears in TenantsMap, it may either be\n/// - `Attached`: has a full Tenant object, is elegible to service\n///   reads and ingest WAL.\n/// - `Secondary`: is only keeping a local cache warm.\n///\n/// Secondary is a totally distinct state rather than being a mode of a `Tenant`, because\n/// that way we avoid having to carefully switch a tenant's ingestion etc on and off during\n/// its lifetime, and we can preserve some important safety invariants like `Tenant` always\n/// having a properly acquired generation (Secondary doesn't need a generation)\n#[derive(Clone)]\npub(crate) enum TenantSlot {\n    Attached(Arc<TenantShard>),\n    Secondary(Arc<SecondaryTenant>),\n    /// In this state, other administrative operations acting on the TenantId should\n    /// block, or return a retry indicator equivalent to HTTP 503.\n    InProgress(utils::completion::Barrier),\n}\n\nimpl std::fmt::Debug for TenantSlot {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        match self {\n            Self::Attached(tenant) => write!(f, \"Attached({})\", tenant.current_state()),\n            Self::Secondary(_) => write!(f, \"Secondary\"),\n            Self::InProgress(_) => write!(f, \"InProgress\"),\n        }\n    }\n}\n\nimpl TenantSlot {\n    /// Return the `Tenant` in this slot if attached, else None\n    fn get_attached(&self) -> Option<&Arc<TenantShard>> {\n        match self {\n            Self::Attached(t) => Some(t),\n            Self::Secondary(_) => None,\n            Self::InProgress(_) => None,\n        }\n    }\n}\n\n/// The tenants known to the pageserver.\n/// The enum variants are used to distinguish the different states that the pageserver can be in.\npub(crate) enum TenantsMap {\n    /// [`init_tenant_mgr`] is not done yet.\n    Initializing,\n    /// [`init_tenant_mgr`] is done, all on-disk tenants have been loaded.\n    /// New tenants can be added using [`TenantManager::tenant_map_acquire_slot`].\n    Open(BTreeMap<TenantShardId, TenantSlot>),\n    /// The pageserver has entered shutdown mode via [`TenantManager::shutdown`].\n    /// Existing tenants are still accessible, but no new tenants can be created.\n    ShuttingDown(BTreeMap<TenantShardId, TenantSlot>),\n}\n\n/// When resolving a TenantId to a shard, we may be looking for the 0th\n/// shard, or we might be looking for whichever shard holds a particular page.\n#[derive(Copy, Clone)]\npub(crate) enum ShardSelector {\n    /// Only return the 0th shard, if it is present.  If a non-0th shard is present,\n    /// ignore it.\n    Zero,\n    /// Pick the shard that holds this key\n    Page(Key),\n    /// The shard ID is known: pick the given shard\n    Known(ShardIndex),\n}\n\n/// A convenience for use with the re_attach ControllerUpcallClient function: rather\n/// than the serializable struct, we build this enum that encapsulates\n/// the invariant that attached tenants always have generations.\n///\n/// This represents the subset of a LocationConfig that we receive during re-attach.\npub(crate) enum TenantStartupMode {\n    Attached((AttachmentMode, Generation, ShardStripeSize)),\n    Secondary,\n}\n\nimpl TenantStartupMode {\n    /// Return the generation & mode that should be used when starting\n    /// this tenant.\n    ///\n    /// If this returns None, the re-attach struct is in an invalid state and\n    /// should be ignored in the response.\n    fn from_reattach_tenant(rart: ReAttachResponseTenant) -> Option<Self> {\n        match (rart.mode, rart.r#gen) {\n            (LocationConfigMode::Detached, _) => None,\n            (LocationConfigMode::Secondary, _) => Some(Self::Secondary),\n            (LocationConfigMode::AttachedMulti, Some(g)) => Some(Self::Attached((\n                AttachmentMode::Multi,\n                Generation::new(g),\n                rart.stripe_size,\n            ))),\n            (LocationConfigMode::AttachedSingle, Some(g)) => Some(Self::Attached((\n                AttachmentMode::Single,\n                Generation::new(g),\n                rart.stripe_size,\n            ))),\n            (LocationConfigMode::AttachedStale, Some(g)) => Some(Self::Attached((\n                AttachmentMode::Stale,\n                Generation::new(g),\n                rart.stripe_size,\n            ))),\n            _ => {\n                tracing::warn!(\n                    \"Received invalid re-attach state for tenant {}: {rart:?}\",\n                    rart.id\n                );\n                None\n            }\n        }\n    }\n}\n\n/// Result type for looking up a TenantId to a specific shard\npub(crate) enum ShardResolveResult {\n    NotFound,\n    Found(Arc<TenantShard>),\n    // Wait for this barrrier, then query again\n    InProgress(utils::completion::Barrier),\n}\n\nimpl TenantsMap {\n    /// Convenience function for typical usage, where we want to get a `Tenant` object, for\n    /// working with attached tenants.  If the TenantId is in the map but in Secondary state,\n    /// None is returned.\n    pub(crate) fn get(&self, tenant_shard_id: &TenantShardId) -> Option<&Arc<TenantShard>> {\n        match self {\n            TenantsMap::Initializing => None,\n            TenantsMap::Open(m) | TenantsMap::ShuttingDown(m) => {\n                m.get(tenant_shard_id).and_then(|slot| slot.get_attached())\n            }\n        }\n    }\n\n    #[cfg(all(debug_assertions, not(test)))]\n    pub(crate) fn len(&self) -> usize {\n        match self {\n            TenantsMap::Initializing => 0,\n            TenantsMap::Open(m) | TenantsMap::ShuttingDown(m) => m.len(),\n        }\n    }\n}\n\n/// Precursor to deletion of a tenant dir: we do a fast rename to a tmp path, and then\n/// the slower actual deletion in the background.\n///\n/// This is \"safe\" in that that it won't leave behind a partially deleted directory\n/// at the original path, because we rename with TEMP_FILE_SUFFIX before starting deleting\n/// the contents.\n///\n/// This is pageserver-specific, as it relies on future processes after a crash to check\n/// for TEMP_FILE_SUFFIX when loading things.\nasync fn safe_rename_tenant_dir(path: impl AsRef<Utf8Path>) -> std::io::Result<Utf8PathBuf> {\n    let parent = path\n        .as_ref()\n        .parent()\n        // It is invalid to call this function with a relative path.  Tenant directories\n        // should always have a parent.\n        .ok_or(std::io::Error::new(\n            std::io::ErrorKind::InvalidInput,\n            \"Path must be absolute\",\n        ))?;\n    let rand_suffix = rand::rng()\n        .sample_iter(&Alphanumeric)\n        .take(8)\n        .map(char::from)\n        .collect::<String>()\n        + TEMP_FILE_SUFFIX;\n    let tmp_path = path_with_suffix_extension(&path, &rand_suffix);\n    fs::rename(path.as_ref(), &tmp_path).await?;\n    fs::File::open(parent)\n        .await?\n        .sync_all()\n        .await\n        .maybe_fatal_err(\"safe_rename_tenant_dir\")?;\n    Ok(tmp_path)\n}\n\n/// See [`Self::spawn`].\n#[derive(Clone, Default)]\npub struct BackgroundPurges(tokio_util::task::TaskTracker);\n\nimpl BackgroundPurges {\n    /// When we have moved a tenant's content to a temporary directory, we may delete it lazily in\n    /// the background, and thereby avoid blocking any API requests on this deletion completing.\n    ///\n    /// Although we are cleaning up the tenant, this task is not meant to be bound by the lifetime of the tenant in memory.\n    /// Thus the [`BackgroundPurges`] type to keep track of these tasks.\n    pub fn spawn(&self, tmp_path: Utf8PathBuf) {\n        // because on shutdown we close and wait, we are misusing TaskTracker a bit.\n        //\n        // so first acquire a token, then check if the tracker has been closed. the tracker might get closed\n        // right after, but at least the shutdown will wait for what we are spawning next.\n        let token = self.0.token();\n\n        if self.0.is_closed() {\n            warn!(\n                %tmp_path,\n                \"trying to spawn background purge during shutdown, ignoring\"\n            );\n            return;\n        }\n\n        let span = info_span!(parent: None, \"background_purge\", %tmp_path);\n\n        let task = move || {\n            let _token = token;\n            let _entered = span.entered();\n            if let Err(error) = std::fs::remove_dir_all(tmp_path.as_path()) {\n                // should we fatal_io_error here?\n                warn!(%error, \"failed to purge tenant directory\");\n            }\n        };\n\n        BACKGROUND_RUNTIME.spawn_blocking(task);\n    }\n\n    /// When this future completes, all background purges have completed.\n    /// The first poll of the future will already lock out new background purges spawned via [`Self::spawn`].\n    ///\n    /// Concurrent calls will coalesce.\n    ///\n    /// # Cancellation-Safety\n    ///\n    /// If this future is dropped before polled to completion, concurrent and subsequent\n    /// instances of this future will continue to be correct.\n    #[instrument(skip_all)]\n    pub async fn shutdown(&self) {\n        // forbid new tasks (can be called many times)\n        self.0.close();\n        self.0.wait().await;\n    }\n}\n\n/// Responsible for storing and mutating the collection of all tenants\n/// that this pageserver has state for.\n///\n/// Every Tenant and SecondaryTenant instance lives inside the TenantManager.\n///\n/// The most important role of the TenantManager is to prevent conflicts: e.g. trying to attach\n/// the same tenant twice concurrently, or trying to configure the same tenant into secondary\n/// and attached modes concurrently.\npub struct TenantManager {\n    conf: &'static PageServerConf,\n    tenants: std::sync::RwLock<TenantsMap>,\n    resources: TenantSharedResources,\n\n    // Long-running operations that happen outside of a [`Tenant`] lifetime should respect this token.\n    // This is for edge cases like tenant deletion.  In normal cases (within a Tenant lifetime),\n    // tenants have their own cancellation tokens, which we fire individually in [`Self::shutdown`], or\n    // when the tenant detaches.\n    cancel: CancellationToken,\n\n    background_purges: BackgroundPurges,\n}\n\nfn emergency_generations(\n    tenant_confs: &HashMap<TenantShardId, Result<LocationConf, LoadConfigError>>,\n) -> HashMap<TenantShardId, TenantStartupMode> {\n    tenant_confs\n        .iter()\n        .filter_map(|(tid, lc)| {\n            let lc = match lc {\n                Ok(lc) => lc,\n                Err(_) => return None,\n            };\n            Some((\n                *tid,\n                match &lc.mode {\n                    LocationMode::Attached(alc) => TenantStartupMode::Attached((\n                        alc.attach_mode,\n                        alc.generation,\n                        lc.shard.stripe_size,\n                    )),\n                    LocationMode::Secondary(_) => TenantStartupMode::Secondary,\n                },\n            ))\n        })\n        .collect()\n}\n\nasync fn init_load_generations(\n    conf: &'static PageServerConf,\n    tenant_confs: &HashMap<TenantShardId, Result<LocationConf, LoadConfigError>>,\n    resources: &TenantSharedResources,\n    cancel: &CancellationToken,\n) -> anyhow::Result<Option<HashMap<TenantShardId, TenantStartupMode>>> {\n    let generations = if conf.control_plane_emergency_mode {\n        error!(\n            \"Emergency mode!  Tenants will be attached unsafely using their last known generation\"\n        );\n        emergency_generations(tenant_confs)\n    } else {\n        let client = StorageControllerUpcallClient::new(conf, cancel);\n        info!(\"Calling {} API to re-attach tenants\", client.base_url());\n        // If we are configured to use the control plane API, then it is the source of truth for what tenants to load.\n        let empty_local_disk = tenant_confs.is_empty();\n        match client.re_attach(conf, empty_local_disk).await {\n            Ok(tenants) => tenants\n                .into_iter()\n                .flat_map(|(id, rart)| {\n                    TenantStartupMode::from_reattach_tenant(rart).map(|tsm| (id, tsm))\n                })\n                .collect(),\n            Err(RetryForeverError::ShuttingDown) => {\n                anyhow::bail!(\"Shut down while waiting for control plane re-attach response\")\n            }\n        }\n    };\n\n    // The deletion queue needs to know about the startup attachment state to decide which (if any) stored\n    // deletion list entries may still be valid.  We provide that by pushing a recovery operation into\n    // the queue. Sequential processing of te queue ensures that recovery is done before any new tenant deletions\n    // are processed, even though we don't block on recovery completing here.\n    let attached_tenants = generations\n        .iter()\n        .flat_map(|(id, start_mode)| {\n            match start_mode {\n                TenantStartupMode::Attached((_mode, generation, _stripe_size)) => Some(generation),\n                TenantStartupMode::Secondary => None,\n            }\n            .map(|gen_| (*id, *gen_))\n        })\n        .collect();\n    resources.deletion_queue_client.recover(attached_tenants)?;\n\n    Ok(Some(generations))\n}\n\n/// Given a directory discovered in the pageserver's tenants/ directory, attempt\n/// to load a tenant config from it.\n///\n/// If we cleaned up something expected (like an empty dir or a temp dir), return None.\nfn load_tenant_config(\n    conf: &'static PageServerConf,\n    tenant_shard_id: TenantShardId,\n    dentry: Utf8DirEntry,\n) -> Option<Result<LocationConf, LoadConfigError>> {\n    let tenant_dir_path = dentry.path().to_path_buf();\n    if crate::is_temporary(&tenant_dir_path) {\n        info!(\"Found temporary tenant directory, removing: {tenant_dir_path}\");\n        // No need to use safe_remove_tenant_dir_all because this is already\n        // a temporary path\n        std::fs::remove_dir_all(&tenant_dir_path).fatal_err(\"delete temporary tenant dir\");\n        return None;\n    }\n\n    // This case happens if we crash during attachment before writing a config into the dir\n    let is_empty = tenant_dir_path\n        .is_empty_dir()\n        .fatal_err(\"Checking for empty tenant dir\");\n    if is_empty {\n        info!(\"removing empty tenant directory {tenant_dir_path:?}\");\n        std::fs::remove_dir(&tenant_dir_path).fatal_err(\"delete empty tenant dir\");\n        return None;\n    }\n\n    Some(TenantShard::load_tenant_config(conf, &tenant_shard_id))\n}\n\n/// Initial stage of load: walk the local tenants directory, clean up any temp files,\n/// and load configurations for the tenants we found.\n///\n/// Do this in parallel, because we expect 10k+ tenants, so serial execution can take\n/// seconds even on reasonably fast drives.\nasync fn init_load_tenant_configs(\n    conf: &'static PageServerConf,\n) -> HashMap<TenantShardId, Result<LocationConf, LoadConfigError>> {\n    let tenants_dir = conf.tenants_path();\n\n    let dentries = tokio::task::spawn_blocking(move || -> Vec<Utf8DirEntry> {\n        let context = format!(\"read tenants dir {tenants_dir}\");\n        let dir_entries = tenants_dir.read_dir_utf8().fatal_err(&context);\n\n        dir_entries\n            .collect::<Result<Vec<_>, std::io::Error>>()\n            .fatal_err(&context)\n    })\n    .await\n    .expect(\"Config load task panicked\");\n\n    let mut configs = HashMap::new();\n\n    let mut join_set = JoinSet::new();\n    for dentry in dentries {\n        let tenant_shard_id = match dentry.file_name().parse::<TenantShardId>() {\n            Ok(id) => id,\n            Err(_) => {\n                warn!(\n                    \"Invalid tenant path (garbage in our repo directory?): '{}'\",\n                    dentry.file_name()\n                );\n                continue;\n            }\n        };\n\n        join_set.spawn_blocking(move || {\n            (\n                tenant_shard_id,\n                load_tenant_config(conf, tenant_shard_id, dentry),\n            )\n        });\n    }\n\n    while let Some(r) = join_set.join_next().await {\n        let (tenant_shard_id, tenant_config) = r.expect(\"Panic in config load task\");\n        if let Some(tenant_config) = tenant_config {\n            configs.insert(tenant_shard_id, tenant_config);\n        }\n    }\n\n    configs\n}\n\n#[derive(Debug, thiserror::Error)]\npub(crate) enum DeleteTenantError {\n    #[error(\"Tenant map slot error {0}\")]\n    SlotError(#[from] TenantSlotError),\n\n    #[error(\"Cancelled\")]\n    Cancelled,\n\n    #[error(transparent)]\n    Other(#[from] anyhow::Error),\n}\n\n/// Initialize repositories at `Initializing` state.\npub fn init(\n    conf: &'static PageServerConf,\n    background_purges: BackgroundPurges,\n    resources: TenantSharedResources,\n    cancel: CancellationToken,\n) -> TenantManager {\n    TenantManager {\n        conf,\n        tenants: std::sync::RwLock::new(TenantsMap::Initializing),\n        resources,\n        cancel,\n        background_purges,\n    }\n}\n\n/// Transition repositories from `Initializing` state to `Open` state with locally available timelines.\n/// Timelines that are only partially available locally (remote storage has more data than this pageserver)\n/// are scheduled for download and added to the tenant once download is completed.\n#[instrument(skip_all)]\npub async fn init_tenant_mgr(\n    tenant_manager: Arc<TenantManager>,\n    init_order: InitializationOrder,\n) -> anyhow::Result<()> {\n    debug_assert!(matches!(\n        *tenant_manager.tenants.read().unwrap(),\n        TenantsMap::Initializing\n    ));\n    let mut tenants = BTreeMap::new();\n\n    let ctx = RequestContext::todo_child(TaskKind::Startup, DownloadBehavior::Warn);\n\n    let conf = tenant_manager.conf;\n    let resources = &tenant_manager.resources;\n    let cancel = &tenant_manager.cancel;\n    let background_purges = &tenant_manager.background_purges;\n\n    // Initialize dynamic limits that depend on system resources\n    let system_memory =\n        sysinfo::System::new_with_specifics(sysinfo::RefreshKind::new().with_memory())\n            .total_memory();\n    let max_ephemeral_layer_bytes =\n        conf.ephemeral_bytes_per_memory_kb as u64 * (system_memory / 1024);\n    tracing::info!(\n        \"Initialized ephemeral layer size limit to {max_ephemeral_layer_bytes}, for {system_memory} bytes of memory\"\n    );\n    inmemory_layer::GLOBAL_RESOURCES.max_dirty_bytes.store(\n        max_ephemeral_layer_bytes,\n        std::sync::atomic::Ordering::Relaxed,\n    );\n\n    // Scan local filesystem for attached tenants\n    let tenant_configs = init_load_tenant_configs(conf).await;\n\n    // Determine which tenants are to be secondary or attached, and in which generation\n    let tenant_modes = init_load_generations(conf, &tenant_configs, resources, cancel).await?;\n\n    // Hadron local SSD check: Raise an alert if our local filesystem does not contain any tenants but the re-attach request returned tenants.\n    // This can happen if the PS suffered a Kubernetes node failure resulting in loss of all local data, but recovered quickly on another node\n    // so the Storage Controller has not had the time to move tenants out.\n    let data_loss_suspected = if let Some(tenant_modes) = &tenant_modes {\n        tenant_configs.is_empty() && !tenant_modes.is_empty()\n    } else {\n        false\n    };\n    if data_loss_suspected {\n        tracing::error!(\n            \"Local data loss suspected: no tenants found on local filesystem, but re-attach request returned tenants\"\n        );\n    }\n    LOCAL_DATA_LOSS_SUSPECTED.set(if data_loss_suspected { 1 } else { 0 });\n\n    tracing::info!(\n        \"Attaching {} tenants at startup, warming up {} at a time\",\n        tenant_configs.len(),\n        conf.concurrent_tenant_warmup.initial_permits()\n    );\n    TENANT.startup_scheduled.inc_by(tenant_configs.len() as u64);\n\n    // Accumulate futures for writing tenant configs, so that we can execute in parallel\n    let mut config_write_futs = Vec::new();\n\n    // Update the location configs according to the re-attach response and persist them to disk\n    tracing::info!(\"Updating {} location configs\", tenant_configs.len());\n    for (tenant_shard_id, location_conf) in tenant_configs {\n        let tenant_dir_path = conf.tenant_path(&tenant_shard_id);\n\n        let mut location_conf = match location_conf {\n            Ok(l) => l,\n            Err(e) => {\n                // This should only happen in the case of a serialization bug or critical local I/O error: we cannot load this tenant\n                error!(tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(), \"Failed to load tenant config, failed to {e:#}\");\n                continue;\n            }\n        };\n\n        // FIXME: if we were attached, and get demoted to secondary on re-attach, we\n        // don't have a place to get a config.\n        // (https://github.com/neondatabase/neon/issues/5377)\n        const DEFAULT_SECONDARY_CONF: SecondaryLocationConfig =\n            SecondaryLocationConfig { warm: true };\n\n        if let Some(tenant_modes) = &tenant_modes {\n            // We have a generation map: treat it as the authority for whether\n            // this tenant is really attached.\n            match tenant_modes.get(&tenant_shard_id) {\n                None => {\n                    info!(tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(), \"Detaching tenant, control plane omitted it in re-attach response\");\n\n                    match safe_rename_tenant_dir(&tenant_dir_path).await {\n                        Ok(tmp_path) => {\n                            background_purges.spawn(tmp_path);\n                        }\n                        Err(e) => {\n                            error!(tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(),\n                            \"Failed to move detached tenant directory '{tenant_dir_path}': {e:?}\");\n                        }\n                    };\n\n                    // We deleted local content: move on to next tenant, don't try and spawn this one.\n                    continue;\n                }\n                Some(TenantStartupMode::Secondary) => {\n                    if !matches!(location_conf.mode, LocationMode::Secondary(_)) {\n                        location_conf.mode = LocationMode::Secondary(DEFAULT_SECONDARY_CONF);\n                    }\n                }\n                Some(TenantStartupMode::Attached((attach_mode, generation, stripe_size))) => {\n                    let old_gen_higher = match &location_conf.mode {\n                        LocationMode::Attached(AttachedLocationConfig {\n                            generation: old_generation,\n                            attach_mode: _attach_mode,\n                        }) => {\n                            if old_generation > generation {\n                                Some(old_generation)\n                            } else {\n                                None\n                            }\n                        }\n                        _ => None,\n                    };\n                    if let Some(old_generation) = old_gen_higher {\n                        tracing::error!(tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(),\n                            \"Control plane gave decreasing generation ({generation:?}) in re-attach response for tenant that was attached in generation {:?}, demoting to secondary\",\n                            old_generation\n                        );\n\n                        // We cannot safely attach this tenant given a bogus generation number, but let's avoid throwing away\n                        // local disk content: demote to secondary rather than detaching.\n                        location_conf.mode = LocationMode::Secondary(DEFAULT_SECONDARY_CONF);\n                    } else {\n                        location_conf.attach_in_generation(*attach_mode, *generation, *stripe_size);\n                    }\n                }\n            }\n        } else {\n            // Legacy mode: no generation information, any tenant present\n            // on local disk may activate\n            info!(tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(), \"Starting tenant in legacy mode, no generation\",);\n        };\n\n        // Presence of a generation number implies attachment: attach the tenant\n        // if it wasn't already, and apply the generation number.\n        config_write_futs.push(async move {\n            let r =\n                TenantShard::persist_tenant_config(conf, &tenant_shard_id, &location_conf).await;\n            (tenant_shard_id, location_conf, r)\n        });\n    }\n\n    // Execute config writes with concurrency, to avoid bottlenecking on local FS write latency\n    tracing::info!(\n        \"Writing {} location config files...\",\n        config_write_futs.len()\n    );\n    let config_write_results = futures::stream::iter(config_write_futs)\n        .buffer_unordered(16)\n        .collect::<Vec<_>>()\n        .await;\n\n    tracing::info!(\n        \"Spawning {} tenant shard locations...\",\n        config_write_results.len()\n    );\n    // For those shards that have live configurations, construct `Tenant` or `SecondaryTenant` objects and start them running\n    for (tenant_shard_id, location_conf, config_write_result) in config_write_results {\n        // Writing a config to local disk is foundational to startup up tenants: panic if we can't.\n        config_write_result.fatal_err(\"write tenant shard config file\");\n\n        let tenant_dir_path = conf.tenant_path(&tenant_shard_id);\n        let shard_identity = location_conf.shard;\n        let slot = match location_conf.mode {\n            LocationMode::Attached(attached_conf) => TenantSlot::Attached(\n                tenant_spawn(\n                    conf,\n                    tenant_shard_id,\n                    &tenant_dir_path,\n                    resources.clone(),\n                    AttachedTenantConf::new(conf, location_conf.tenant_conf, attached_conf),\n                    shard_identity,\n                    Some(init_order.clone()),\n                    SpawnMode::Lazy,\n                    &ctx,\n                )\n                .expect(\"global shutdown during init_tenant_mgr cannot happen\"),\n            ),\n            LocationMode::Secondary(secondary_conf) => {\n                info!(\n                    tenant_id = %tenant_shard_id.tenant_id,\n                    shard_id = %tenant_shard_id.shard_slug(),\n                    \"Starting secondary tenant\"\n                );\n                TenantSlot::Secondary(SecondaryTenant::new(\n                    tenant_shard_id,\n                    shard_identity,\n                    location_conf.tenant_conf,\n                    &secondary_conf,\n                ))\n            }\n        };\n\n        METRICS.slot_inserted(&slot);\n        tenants.insert(tenant_shard_id, slot);\n    }\n\n    info!(\"Processed {} local tenants at startup\", tenants.len());\n\n    let mut tenant_map = tenant_manager.tenants.write().unwrap();\n    *tenant_map = TenantsMap::Open(tenants);\n\n    Ok(())\n}\n\n/// Wrapper for Tenant::spawn that checks invariants before running\n#[allow(clippy::too_many_arguments)]\nfn tenant_spawn(\n    conf: &'static PageServerConf,\n    tenant_shard_id: TenantShardId,\n    tenant_path: &Utf8Path,\n    resources: TenantSharedResources,\n    location_conf: AttachedTenantConf,\n    shard_identity: ShardIdentity,\n    init_order: Option<InitializationOrder>,\n    mode: SpawnMode,\n    ctx: &RequestContext,\n) -> Result<Arc<TenantShard>, GlobalShutDown> {\n    // All these conditions should have been satisfied by our caller: the tenant dir exists, is a well formed\n    // path, and contains a configuration file.  Assertions that do synchronous I/O are limited to debug mode\n    // to avoid impacting prod runtime performance.\n    assert!(!crate::is_temporary(tenant_path));\n    debug_assert!(tenant_path.is_dir());\n    debug_assert!(\n        conf.tenant_location_config_path(&tenant_shard_id)\n            .try_exists()\n            .unwrap()\n    );\n\n    TenantShard::spawn(\n        conf,\n        tenant_shard_id,\n        resources,\n        location_conf,\n        shard_identity,\n        init_order,\n        mode,\n        ctx,\n    )\n}\n\n#[derive(thiserror::Error, Debug)]\npub(crate) enum UpsertLocationError {\n    #[error(\"Bad config request: {0}\")]\n    BadRequest(anyhow::Error),\n\n    #[error(\"Cannot change config in this state: {0}\")]\n    Unavailable(#[from] TenantMapError),\n\n    #[error(\"Tenant is already being modified\")]\n    InProgress,\n\n    #[error(\"Failed to flush: {0}\")]\n    Flush(anyhow::Error),\n\n    /// This error variant is for unexpected situations (soft assertions) where the system is in an unexpected state.\n    #[error(\"Internal error: {0}\")]\n    InternalError(anyhow::Error),\n}\n\nimpl TenantManager {\n    /// Convenience function so that anyone with a TenantManager can get at the global configuration, without\n    /// having to pass it around everywhere as a separate object.\n    pub(crate) fn get_conf(&self) -> &'static PageServerConf {\n        self.conf\n    }\n\n    /// Gets the attached tenant from the in-memory data, erroring if it's absent, in secondary mode, or currently\n    /// undergoing a state change (i.e. slot is InProgress).\n    ///\n    /// The return TenantShard is not guaranteed to be active: check its status after obtaing it, or\n    /// use [`TenantShard::wait_to_become_active`] before using it if you will do I/O on it.\n    pub(crate) fn get_attached_tenant_shard(\n        &self,\n        tenant_shard_id: TenantShardId,\n    ) -> Result<Arc<TenantShard>, GetTenantError> {\n        let locked = self.tenants.read().unwrap();\n\n        let peek_slot = tenant_map_peek_slot(&locked, &tenant_shard_id, TenantSlotPeekMode::Read)?;\n\n        match peek_slot {\n            Some(TenantSlot::Attached(tenant)) => Ok(Arc::clone(tenant)),\n            Some(TenantSlot::InProgress(_)) => Err(GetTenantError::NotActive(tenant_shard_id)),\n            None | Some(TenantSlot::Secondary(_)) => {\n                Err(GetTenantError::ShardNotFound(tenant_shard_id))\n            }\n        }\n    }\n\n    pub(crate) fn get_secondary_tenant_shard(\n        &self,\n        tenant_shard_id: TenantShardId,\n    ) -> Option<Arc<SecondaryTenant>> {\n        let locked = self.tenants.read().unwrap();\n\n        let peek_slot = tenant_map_peek_slot(&locked, &tenant_shard_id, TenantSlotPeekMode::Read)\n            .ok()\n            .flatten();\n\n        match peek_slot {\n            Some(TenantSlot::Secondary(s)) => Some(s.clone()),\n            _ => None,\n        }\n    }\n\n    /// Whether the `TenantManager` is responsible for the tenant shard\n    pub(crate) fn manages_tenant_shard(&self, tenant_shard_id: TenantShardId) -> bool {\n        let locked = self.tenants.read().unwrap();\n\n        let peek_slot = tenant_map_peek_slot(&locked, &tenant_shard_id, TenantSlotPeekMode::Read)\n            .ok()\n            .flatten();\n\n        peek_slot.is_some()\n    }\n\n    /// Returns whether a local shard exists that's a child of the given tenant shard. Note that\n    /// this just checks for any shard with a larger shard count, and it may not be a direct child\n    /// of the given shard (their keyspace may not overlap).\n    pub(crate) fn has_child_shard(&self, tenant_id: TenantId, shard_index: ShardIndex) -> bool {\n        match &*self.tenants.read().unwrap() {\n            TenantsMap::Initializing => false,\n            TenantsMap::Open(slots) | TenantsMap::ShuttingDown(slots) => slots\n                .range(TenantShardId::tenant_range(tenant_id))\n                .any(|(tsid, _)| tsid.shard_count > shard_index.shard_count),\n        }\n    }\n\n    #[instrument(skip_all, fields(tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug()))]\n    pub(crate) async fn upsert_location(\n        &self,\n        tenant_shard_id: TenantShardId,\n        new_location_config: LocationConf,\n        flush: Option<Duration>,\n        mut spawn_mode: SpawnMode,\n        ctx: &RequestContext,\n    ) -> Result<Option<Arc<TenantShard>>, UpsertLocationError> {\n        debug_assert_current_span_has_tenant_id();\n        info!(\"configuring tenant location to state {new_location_config:?}\");\n\n        enum FastPathModified {\n            Attached(Arc<TenantShard>),\n            Secondary(Arc<SecondaryTenant>),\n        }\n\n        // Special case fast-path for updates to existing slots: if our upsert is only updating configuration,\n        // then we do not need to set the slot to InProgress, we can just call into the\n        // existng tenant.\n        let fast_path_taken = {\n            let locked = self.tenants.read().unwrap();\n            let peek_slot =\n                tenant_map_peek_slot(&locked, &tenant_shard_id, TenantSlotPeekMode::Write)?;\n            match (&new_location_config.mode, peek_slot) {\n                (LocationMode::Attached(attach_conf), Some(TenantSlot::Attached(tenant))) => {\n                    match attach_conf.generation.cmp(&tenant.generation) {\n                        Ordering::Equal => {\n                            // A transition from Attached to Attached in the same generation, we may\n                            // take our fast path and just provide the updated configuration\n                            // to the tenant.\n                            tenant.set_new_location_config(\n                                AttachedTenantConf::try_from(\n                                    self.conf,\n                                    new_location_config.clone(),\n                                )\n                                .map_err(UpsertLocationError::BadRequest)?,\n                            );\n\n                            Some(FastPathModified::Attached(tenant.clone()))\n                        }\n                        Ordering::Less => {\n                            return Err(UpsertLocationError::BadRequest(anyhow::anyhow!(\n                                \"Generation {:?} is less than existing {:?}\",\n                                attach_conf.generation,\n                                tenant.generation\n                            )));\n                        }\n                        Ordering::Greater => {\n                            // Generation advanced, fall through to general case of replacing `Tenant` object\n                            None\n                        }\n                    }\n                }\n                (\n                    LocationMode::Secondary(secondary_conf),\n                    Some(TenantSlot::Secondary(secondary_tenant)),\n                ) => {\n                    secondary_tenant.set_config(secondary_conf);\n                    secondary_tenant.set_tenant_conf(&new_location_config.tenant_conf);\n                    Some(FastPathModified::Secondary(secondary_tenant.clone()))\n                }\n                _ => {\n                    // Not an Attached->Attached transition, fall through to general case\n                    None\n                }\n            }\n        };\n\n        // Fast-path continued: having dropped out of the self.tenants lock, do the async\n        // phase of writing config and/or waiting for flush, before returning.\n        match fast_path_taken {\n            Some(FastPathModified::Attached(tenant)) => {\n                tenant\n                    .shard_identity\n                    .assert_equal(new_location_config.shard);\n                TenantShard::persist_tenant_config(\n                    self.conf,\n                    &tenant_shard_id,\n                    &new_location_config,\n                )\n                .await\n                .fatal_err(\"write tenant shard config\");\n\n                // Transition to AttachedStale means we may well hold a valid generation\n                // still, and have been requested to go stale as part of a migration.  If\n                // the caller set `flush`, then flush to remote storage.\n                if let LocationMode::Attached(AttachedLocationConfig {\n                    generation: _,\n                    attach_mode: AttachmentMode::Stale,\n                }) = &new_location_config.mode\n                {\n                    if let Some(flush_timeout) = flush {\n                        match tokio::time::timeout(flush_timeout, tenant.flush_remote()).await {\n                            Ok(Err(e)) => {\n                                return Err(UpsertLocationError::Flush(e));\n                            }\n                            Ok(Ok(_)) => return Ok(Some(tenant)),\n                            Err(_) => {\n                                tracing::warn!(\n                                    timeout_ms = flush_timeout.as_millis(),\n                                    \"Timed out waiting for flush to remote storage, proceeding anyway.\"\n                                )\n                            }\n                        }\n                    }\n                }\n\n                return Ok(Some(tenant));\n            }\n            Some(FastPathModified::Secondary(secondary_tenant)) => {\n                secondary_tenant\n                    .shard_identity\n                    .assert_equal(new_location_config.shard);\n                TenantShard::persist_tenant_config(\n                    self.conf,\n                    &tenant_shard_id,\n                    &new_location_config,\n                )\n                .await\n                .fatal_err(\"write tenant shard config\");\n\n                return Ok(None);\n            }\n            None => {\n                // Proceed with the general case procedure, where we will shutdown & remove any existing\n                // slot contents and replace with a fresh one\n            }\n        };\n\n        // General case for upserts to TenantsMap, excluding the case above: we will substitute an\n        // InProgress value to the slot while we make whatever changes are required.  The state for\n        // the tenant is inaccessible to the outside world while we are doing this, but that is sensible:\n        // the state is ill-defined while we're in transition.  Transitions are async, but fast: we do\n        // not do significant I/O, and shutdowns should be prompt via cancellation tokens.\n        let mut slot_guard = self\n            .tenant_map_acquire_slot(&tenant_shard_id, TenantSlotAcquireMode::Any)\n            .map_err(|e| match e {\n                TenantSlotError::NotFound(_) => {\n                    unreachable!(\"Called with mode Any\")\n                }\n                TenantSlotError::InProgress => UpsertLocationError::InProgress,\n                TenantSlotError::MapState(s) => UpsertLocationError::Unavailable(s),\n            })?;\n\n        match slot_guard.get_old_value() {\n            Some(TenantSlot::Attached(tenant)) => {\n                tenant\n                    .shard_identity\n                    .assert_equal(new_location_config.shard);\n\n                // The case where we keep a Tenant alive was covered above in the special case\n                // for Attached->Attached transitions in the same generation.  By this point,\n                // if we see an attached tenant we know it will be discarded and should be\n                // shut down.\n                let (_guard, progress) = utils::completion::channel();\n\n                match tenant.get_attach_mode() {\n                    AttachmentMode::Single | AttachmentMode::Multi => {\n                        // Before we leave our state as the presumed holder of the latest generation,\n                        // flush any outstanding deletions to reduce the risk of leaking objects.\n                        self.resources.deletion_queue_client.flush_advisory()\n                    }\n                    AttachmentMode::Stale => {\n                        // If we're stale there's not point trying to flush deletions\n                    }\n                };\n\n                info!(\"Shutting down attached tenant\");\n                match tenant.shutdown(progress, ShutdownMode::Hard).await {\n                    Ok(()) => {}\n                    Err(barrier) => {\n                        info!(\"Shutdown already in progress, waiting for it to complete\");\n                        barrier.wait().await;\n                    }\n                }\n                slot_guard.drop_old_value().expect(\"We just shut it down\");\n\n                // Edge case: if we were called with SpawnMode::Create, but a Tenant already existed, then\n                // the caller thinks they're creating but the tenant already existed.  We must switch to\n                // Eager mode so that when starting this Tenant we properly probe remote storage for timelines,\n                // rather than assuming it to be empty.\n                spawn_mode = SpawnMode::Eager;\n            }\n            Some(TenantSlot::Secondary(secondary_tenant)) => {\n                secondary_tenant\n                    .shard_identity\n                    .assert_equal(new_location_config.shard);\n\n                info!(\"Shutting down secondary tenant\");\n                secondary_tenant.shutdown().await;\n            }\n            Some(TenantSlot::InProgress(_)) => {\n                // This should never happen: acquire_slot should error out\n                // if the contents of a slot were InProgress.\n                return Err(UpsertLocationError::InternalError(anyhow::anyhow!(\n                    \"Acquired an InProgress slot, this is a bug.\"\n                )));\n            }\n            None => {\n                // Slot was vacant, nothing needs shutting down.\n            }\n        }\n\n        let tenant_path = self.conf.tenant_path(&tenant_shard_id);\n        let timelines_path = self.conf.timelines_path(&tenant_shard_id);\n\n        // Directory structure is the same for attached and secondary modes:\n        // create it if it doesn't exist.  Timeline load/creation expects the\n        // timelines/ subdir to already exist.\n        //\n        // Does not need to be fsync'd because local storage is just a cache.\n        tokio::fs::create_dir_all(&timelines_path)\n            .await\n            .fatal_err(\"create timelines/ dir\");\n\n        // Before activating either secondary or attached mode, persist the\n        // configuration, so that on restart we will re-attach (or re-start\n        // secondary) on the tenant.\n        TenantShard::persist_tenant_config(self.conf, &tenant_shard_id, &new_location_config)\n            .await\n            .fatal_err(\"write tenant shard config\");\n\n        let new_slot = match &new_location_config.mode {\n            LocationMode::Secondary(secondary_config) => {\n                let shard_identity = new_location_config.shard;\n                TenantSlot::Secondary(SecondaryTenant::new(\n                    tenant_shard_id,\n                    shard_identity,\n                    new_location_config.tenant_conf,\n                    secondary_config,\n                ))\n            }\n            LocationMode::Attached(_attach_config) => {\n                let shard_identity = new_location_config.shard;\n\n                // Testing hack: if we are configured with no control plane, then drop the generation\n                // from upserts.  This enables creating generation-less tenants even though neon_local\n                // always uses generations when calling the location conf API.\n                let attached_conf = AttachedTenantConf::try_from(self.conf, new_location_config)\n                    .map_err(UpsertLocationError::BadRequest)?;\n\n                let tenant = tenant_spawn(\n                    self.conf,\n                    tenant_shard_id,\n                    &tenant_path,\n                    self.resources.clone(),\n                    attached_conf,\n                    shard_identity,\n                    None,\n                    spawn_mode,\n                    ctx,\n                )\n                .map_err(|_: GlobalShutDown| {\n                    UpsertLocationError::Unavailable(TenantMapError::ShuttingDown)\n                })?;\n\n                TenantSlot::Attached(tenant)\n            }\n        };\n\n        let attached_tenant = if let TenantSlot::Attached(tenant) = &new_slot {\n            Some(tenant.clone())\n        } else {\n            None\n        };\n\n        match slot_guard.upsert(new_slot) {\n            Err(TenantSlotUpsertError::InternalError(e)) => {\n                Err(UpsertLocationError::InternalError(anyhow::anyhow!(e)))\n            }\n            Err(TenantSlotUpsertError::MapState(e)) => Err(UpsertLocationError::Unavailable(e)),\n            Err(TenantSlotUpsertError::ShuttingDown((new_slot, _completion))) => {\n                // If we just called tenant_spawn() on a new tenant, and can't insert it into our map, then\n                // we must not leak it: this would violate the invariant that after shutdown_all_tenants, all tenants\n                // are shutdown.\n                //\n                // We must shut it down inline here.\n                match new_slot {\n                    TenantSlot::InProgress(_) => {\n                        // Unreachable because we never insert an InProgress\n                        unreachable!()\n                    }\n                    TenantSlot::Attached(tenant) => {\n                        let (_guard, progress) = utils::completion::channel();\n                        info!(\n                            \"Shutting down just-spawned tenant, because tenant manager is shut down\"\n                        );\n                        match tenant.shutdown(progress, ShutdownMode::Hard).await {\n                            Ok(()) => {\n                                info!(\"Finished shutting down just-spawned tenant\");\n                            }\n                            Err(barrier) => {\n                                info!(\"Shutdown already in progress, waiting for it to complete\");\n                                barrier.wait().await;\n                            }\n                        }\n                    }\n                    TenantSlot::Secondary(secondary_tenant) => {\n                        secondary_tenant.shutdown().await;\n                    }\n                }\n\n                Err(UpsertLocationError::Unavailable(\n                    TenantMapError::ShuttingDown,\n                ))\n            }\n            Ok(()) => Ok(attached_tenant),\n        }\n    }\n\n    fn tenant_map_acquire_slot(\n        &self,\n        tenant_shard_id: &TenantShardId,\n        mode: TenantSlotAcquireMode,\n    ) -> Result<SlotGuard, TenantSlotError> {\n        use TenantSlotAcquireMode::*;\n        METRICS.tenant_slot_writes.inc();\n\n        let mut locked = self.tenants.write().unwrap();\n        let span = tracing::info_span!(\"acquire_slot\", tenant_id=%tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug());\n        let _guard = span.enter();\n\n        let m = match &mut *locked {\n            TenantsMap::Initializing => return Err(TenantMapError::StillInitializing.into()),\n            TenantsMap::ShuttingDown(_) => return Err(TenantMapError::ShuttingDown.into()),\n            TenantsMap::Open(m) => m,\n        };\n\n        use std::collections::btree_map::Entry;\n\n        let entry = m.entry(*tenant_shard_id);\n\n        match entry {\n            Entry::Vacant(v) => match mode {\n                MustExist => {\n                    tracing::debug!(\"Vacant && MustExist: return NotFound\");\n                    Err(TenantSlotError::NotFound(*tenant_shard_id))\n                }\n                _ => {\n                    let (completion, barrier) = utils::completion::channel();\n                    let inserting = TenantSlot::InProgress(barrier);\n                    METRICS.slot_inserted(&inserting);\n                    v.insert(inserting);\n                    tracing::debug!(\"Vacant, inserted InProgress\");\n                    Ok(SlotGuard::new(\n                        *tenant_shard_id,\n                        None,\n                        completion,\n                        &self.tenants,\n                    ))\n                }\n            },\n            Entry::Occupied(mut o) => {\n                // Apply mode-driven checks\n                match (o.get(), mode) {\n                    (TenantSlot::InProgress(_), _) => {\n                        tracing::debug!(\"Occupied, failing for InProgress\");\n                        Err(TenantSlotError::InProgress)\n                    }\n                    _ => {\n                        // Happy case: the slot was not in any state that violated our mode\n                        let (completion, barrier) = utils::completion::channel();\n                        let in_progress = TenantSlot::InProgress(barrier);\n                        METRICS.slot_inserted(&in_progress);\n                        let old_value = o.insert(in_progress);\n                        METRICS.slot_removed(&old_value);\n                        tracing::debug!(\"Occupied, replaced with InProgress\");\n                        Ok(SlotGuard::new(\n                            *tenant_shard_id,\n                            Some(old_value),\n                            completion,\n                            &self.tenants,\n                        ))\n                    }\n                }\n            }\n        }\n    }\n\n    /// Resetting a tenant is equivalent to detaching it, then attaching it again with the same\n    /// LocationConf that was last used to attach it.  Optionally, the local file cache may be\n    /// dropped before re-attaching.\n    ///\n    /// This is not part of a tenant's normal lifecycle: it is used for debug/support, in situations\n    /// where an issue is identified that would go away with a restart of the tenant.\n    ///\n    /// This does not have any special \"force\" shutdown of a tenant: it relies on the tenant's tasks\n    /// to respect the cancellation tokens used in normal shutdown().\n    #[instrument(skip_all, fields(tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(), %drop_cache))]\n    pub(crate) async fn reset_tenant(\n        &self,\n        tenant_shard_id: TenantShardId,\n        drop_cache: bool,\n        ctx: &RequestContext,\n    ) -> anyhow::Result<()> {\n        let mut slot_guard =\n            self.tenant_map_acquire_slot(&tenant_shard_id, TenantSlotAcquireMode::Any)?;\n        let Some(old_slot) = slot_guard.get_old_value() else {\n            anyhow::bail!(\"Tenant not found when trying to reset\");\n        };\n\n        let Some(tenant) = old_slot.get_attached() else {\n            slot_guard.revert();\n            anyhow::bail!(\"Tenant is not in attached state\");\n        };\n\n        let (_guard, progress) = utils::completion::channel();\n        match tenant.shutdown(progress, ShutdownMode::Hard).await {\n            Ok(()) => {\n                slot_guard.drop_old_value()?;\n            }\n            Err(_barrier) => {\n                slot_guard.revert();\n                anyhow::bail!(\"Cannot reset Tenant, already shutting down\");\n            }\n        }\n\n        let tenant_path = self.conf.tenant_path(&tenant_shard_id);\n        let timelines_path = self.conf.timelines_path(&tenant_shard_id);\n        let config = TenantShard::load_tenant_config(self.conf, &tenant_shard_id)?;\n\n        if drop_cache {\n            tracing::info!(\"Dropping local file cache\");\n\n            match tokio::fs::read_dir(&timelines_path).await {\n                Err(e) => {\n                    tracing::warn!(\"Failed to list timelines while dropping cache: {}\", e);\n                }\n                Ok(mut entries) => {\n                    while let Some(entry) = entries.next_entry().await? {\n                        tokio::fs::remove_dir_all(entry.path()).await?;\n                    }\n                }\n            }\n        }\n\n        let shard_identity = config.shard;\n        let tenant = tenant_spawn(\n            self.conf,\n            tenant_shard_id,\n            &tenant_path,\n            self.resources.clone(),\n            AttachedTenantConf::try_from(self.conf, config)?,\n            shard_identity,\n            None,\n            SpawnMode::Eager,\n            ctx,\n        )?;\n\n        slot_guard.upsert(TenantSlot::Attached(tenant))?;\n\n        Ok(())\n    }\n\n    pub(crate) fn get_attached_active_tenant_shards(&self) -> Vec<Arc<TenantShard>> {\n        let locked = self.tenants.read().unwrap();\n        match &*locked {\n            TenantsMap::Initializing => Vec::new(),\n            TenantsMap::Open(map) | TenantsMap::ShuttingDown(map) => map\n                .values()\n                .filter_map(|slot| {\n                    slot.get_attached()\n                        .and_then(|t| if t.is_active() { Some(t.clone()) } else { None })\n                })\n                .collect(),\n        }\n    }\n    // Do some synchronous work for all tenant slots in Secondary state.  The provided\n    // callback should be small and fast, as it will be called inside the global\n    // TenantsMap lock.\n    pub(crate) fn foreach_secondary_tenants<F>(&self, mut func: F)\n    where\n        // TODO: let the callback return a hint to drop out of the loop early\n        F: FnMut(&TenantShardId, &Arc<SecondaryTenant>),\n    {\n        let locked = self.tenants.read().unwrap();\n\n        let map = match &*locked {\n            TenantsMap::Initializing | TenantsMap::ShuttingDown(_) => return,\n            TenantsMap::Open(m) => m,\n        };\n\n        for (tenant_id, slot) in map {\n            if let TenantSlot::Secondary(state) = slot {\n                // Only expose secondary tenants that are not currently shutting down\n                if !state.cancel.is_cancelled() {\n                    func(tenant_id, state)\n                }\n            }\n        }\n    }\n\n    /// Total list of all tenant slots: this includes attached, secondary, and InProgress.\n    pub(crate) fn list(&self) -> Vec<(TenantShardId, TenantSlot)> {\n        let locked = self.tenants.read().unwrap();\n        match &*locked {\n            TenantsMap::Initializing => Vec::new(),\n            TenantsMap::Open(map) | TenantsMap::ShuttingDown(map) => {\n                map.iter().map(|(k, v)| (*k, v.clone())).collect()\n            }\n        }\n    }\n\n    pub(crate) fn get(&self, tenant_shard_id: TenantShardId) -> Option<TenantSlot> {\n        let locked = self.tenants.read().unwrap();\n        match &*locked {\n            TenantsMap::Initializing => None,\n            TenantsMap::Open(map) | TenantsMap::ShuttingDown(map) => {\n                map.get(&tenant_shard_id).cloned()\n            }\n        }\n    }\n\n    /// If a tenant is attached, detach it.  Then remove its data from remote storage.\n    ///\n    /// A tenant is considered deleted once it is gone from remote storage.  It is the caller's\n    /// responsibility to avoid trying to attach the tenant again or use it any way once deletion\n    /// has started: this operation is not atomic, and must be retried until it succeeds.\n    ///\n    /// As a special case, if an unsharded tenant ID is given for a sharded tenant, it will remove\n    /// all tenant shards in remote storage (removing all paths with the tenant prefix). The storage\n    /// controller uses this to purge all remote tenant data, including any stale parent shards that\n    /// may remain after splits. Ideally, this special case would be handled elsewhere. See:\n    /// <https://github.com/neondatabase/neon/pull/9394>.\n    pub(crate) async fn delete_tenant(\n        &self,\n        tenant_shard_id: TenantShardId,\n    ) -> Result<(), DeleteTenantError> {\n        super::span::debug_assert_current_span_has_tenant_id();\n\n        async fn delete_local(\n            conf: &PageServerConf,\n            background_purges: &BackgroundPurges,\n            tenant_shard_id: &TenantShardId,\n        ) -> anyhow::Result<()> {\n            let local_tenant_directory = conf.tenant_path(tenant_shard_id);\n            let tmp_dir = safe_rename_tenant_dir(&local_tenant_directory)\n                .await\n                .with_context(|| {\n                    format!(\"local tenant directory {local_tenant_directory:?} rename\")\n                })?;\n            background_purges.spawn(tmp_dir);\n            Ok(())\n        }\n\n        let slot_guard =\n            self.tenant_map_acquire_slot(&tenant_shard_id, TenantSlotAcquireMode::Any)?;\n        match &slot_guard.old_value {\n            Some(TenantSlot::Attached(tenant)) => {\n                // Legacy deletion flow: the tenant remains attached, goes to Stopping state, and\n                // deletion will be resumed across restarts.\n                let tenant = tenant.clone();\n                let (_guard, progress) = utils::completion::channel();\n                match tenant.shutdown(progress, ShutdownMode::Hard).await {\n                    Ok(()) => {}\n                    Err(barrier) => {\n                        info!(\"Shutdown already in progress, waiting for it to complete\");\n                        barrier.wait().await;\n                    }\n                }\n                delete_local(self.conf, &self.background_purges, &tenant_shard_id).await?;\n            }\n            Some(TenantSlot::Secondary(secondary_tenant)) => {\n                secondary_tenant.shutdown().await;\n\n                delete_local(self.conf, &self.background_purges, &tenant_shard_id).await?;\n            }\n            Some(TenantSlot::InProgress(_)) => unreachable!(),\n            None => {}\n        };\n\n        // Fall through: local state for this tenant is no longer present, proceed with remote delete.\n        // - We use a retry wrapper here so that common transient S3 errors (e.g. 503, 429) do not result\n        //   in 500 responses to delete requests.\n        // - We keep the `SlotGuard` during this I/O, so that if a concurrent delete request comes in, it will\n        //   503/retry, rather than kicking off a wasteful concurrent deletion.\n        // NB: this also deletes partial prefixes, i.e. a <tenant_id> path will delete all\n        // <tenant_id>_<shard_id>/* objects. See method comment for why.\n        backoff::retry(\n            || async move {\n                self.resources\n                    .remote_storage\n                    .delete_prefix(&remote_tenant_path(&tenant_shard_id), &self.cancel)\n                    .await\n            },\n            |_| false, // backoff::retry handles cancellation\n            1,\n            3,\n            &format!(\"delete_tenant[tenant_shard_id={tenant_shard_id}]\"),\n            &self.cancel,\n        )\n        .await\n        .unwrap_or(Err(TimeoutOrCancel::Cancel.into()))\n        .map_err(|err| {\n            if TimeoutOrCancel::caused_by_cancel(&err) {\n                return DeleteTenantError::Cancelled;\n            }\n            DeleteTenantError::Other(err)\n        })\n    }\n\n    #[instrument(skip_all, fields(tenant_id=%tenant.get_tenant_shard_id().tenant_id, shard_id=%tenant.get_tenant_shard_id().shard_slug(), new_shard_count=%new_shard_count.literal()))]\n    pub(crate) async fn shard_split(\n        &self,\n        tenant: Arc<TenantShard>,\n        new_shard_count: ShardCount,\n        new_stripe_size: Option<ShardStripeSize>,\n        ctx: &RequestContext,\n    ) -> anyhow::Result<Vec<TenantShardId>> {\n        let tenant_shard_id = *tenant.get_tenant_shard_id();\n        let r = self\n            .do_shard_split(tenant, new_shard_count, new_stripe_size, ctx)\n            .await;\n        if r.is_err() {\n            // Shard splitting might have left the original shard in a partially shut down state (it\n            // stops the shard's remote timeline client).  Reset it to ensure we leave things in\n            // a working state.\n            if self.get(tenant_shard_id).is_some() {\n                tracing::warn!(\"Resetting after shard split failure\");\n                if let Err(e) = self.reset_tenant(tenant_shard_id, false, ctx).await {\n                    // Log this error because our return value will still be the original error, not this one.  This is\n                    // a severe error: if this happens, we might be leaving behind a tenant that is not fully functional\n                    // (e.g. has uploads disabled).  We can't do anything else: if reset fails then shutting the tenant down or\n                    // setting it broken probably won't help either.\n                    tracing::error!(\"Failed to reset: {e}\");\n                }\n            }\n        }\n\n        r\n    }\n\n    pub(crate) async fn do_shard_split(\n        &self,\n        tenant: Arc<TenantShard>,\n        new_shard_count: ShardCount,\n        new_stripe_size: Option<ShardStripeSize>,\n        ctx: &RequestContext,\n    ) -> anyhow::Result<Vec<TenantShardId>> {\n        let tenant_shard_id = *tenant.get_tenant_shard_id();\n\n        // Validate the incoming request\n        if new_shard_count.count() <= tenant_shard_id.shard_count.count() {\n            anyhow::bail!(\"Requested shard count is not an increase\");\n        }\n        let expansion_factor = new_shard_count.count() / tenant_shard_id.shard_count.count();\n        if !expansion_factor.is_power_of_two() {\n            anyhow::bail!(\"Requested split is not a power of two\");\n        }\n\n        if let Some(new_stripe_size) = new_stripe_size {\n            if tenant.get_shard_stripe_size() != new_stripe_size\n                && tenant_shard_id.shard_count.count() > 1\n            {\n                // This tenant already has multiple shards, it is illegal to try and change its stripe size\n                anyhow::bail!(\n                    \"Shard stripe size may not be modified once tenant has multiple shards\"\n                );\n            }\n        }\n\n        // Plan: identify what the new child shards will be\n        let child_shards = tenant_shard_id.split(new_shard_count);\n        tracing::info!(\n            \"Shard {} splits into: {}\",\n            tenant_shard_id.to_index(),\n            child_shards\n                .iter()\n                .map(|id| format!(\"{}\", id.to_index()))\n                .join(\",\")\n        );\n\n        fail::fail_point!(\"shard-split-pre-prepare\", |_| Err(anyhow::anyhow!(\n            \"failpoint\"\n        )));\n\n        let parent_shard_identity = tenant.shard_identity;\n        let parent_tenant_conf = tenant.get_tenant_conf();\n        let parent_generation = tenant.generation;\n\n        // Phase 1: Write out child shards' remote index files, in the parent tenant's current generation\n        if let Err(e) = tenant.split_prepare(&child_shards).await {\n            // If [`Tenant::split_prepare`] fails, we must reload the tenant, because it might\n            // have been left in a partially-shut-down state.\n            tracing::warn!(\"Failed to prepare for split: {e}, reloading Tenant before returning\");\n            return Err(e);\n        }\n\n        fail::fail_point!(\"shard-split-post-prepare\", |_| Err(anyhow::anyhow!(\n            \"failpoint\"\n        )));\n\n        self.resources.deletion_queue_client.flush_advisory();\n\n        // Phase 2: Put the parent shard to InProgress and grab a reference to the parent Tenant\n        //\n        // TODO: keeping the parent as InProgress while spawning the children causes read\n        // unavailability, as we can't acquire a new timeline handle for it (existing handles appear\n        // to still work though, even downgraded ones). The parent should be available for reads\n        // until the children are ready -- potentially until *all* subsplits across all parent\n        // shards are complete and the compute has been notified. See:\n        // <https://databricks.atlassian.net/browse/LKB-672>.\n        drop(tenant);\n        let mut parent_slot_guard =\n            self.tenant_map_acquire_slot(&tenant_shard_id, TenantSlotAcquireMode::Any)?;\n        let parent = match parent_slot_guard.get_old_value() {\n            Some(TenantSlot::Attached(t)) => t,\n            Some(TenantSlot::Secondary(_)) => anyhow::bail!(\"Tenant location in secondary mode\"),\n            Some(TenantSlot::InProgress(_)) => {\n                // tenant_map_acquire_slot never returns InProgress, if a slot was InProgress\n                // it would return an error.\n                unreachable!()\n            }\n            None => {\n                // We don't actually need the parent shard to still be attached to do our work, but it's\n                // a weird enough situation that the caller probably didn't want us to continue working\n                // if they had detached the tenant they requested the split on.\n                anyhow::bail!(\"Detached parent shard in the middle of split!\")\n            }\n        };\n        fail::fail_point!(\"shard-split-pre-hardlink\", |_| Err(anyhow::anyhow!(\n            \"failpoint\"\n        )));\n        // Optimization: hardlink layers from the parent into the children, so that they don't have to\n        // re-download & duplicate the data referenced in their initial IndexPart\n        self.shard_split_hardlink(parent, child_shards.clone())\n            .await?;\n        fail::fail_point!(\"shard-split-post-hardlink\", |_| Err(anyhow::anyhow!(\n            \"failpoint\"\n        )));\n\n        // Take a snapshot of where the parent's WAL ingest had got to: we will wait for\n        // child shards to reach this point.\n        let mut target_lsns = HashMap::new();\n        for timeline in parent.timelines.lock().unwrap().clone().values() {\n            target_lsns.insert(timeline.timeline_id, timeline.get_last_record_lsn());\n        }\n\n        // TODO: we should have the parent shard stop its WAL ingest here, it's a waste of resources\n        // and could slow down the children trying to catch up.\n\n        // Phase 3: Spawn the child shards\n        for child_shard in &child_shards {\n            let mut child_shard_identity = parent_shard_identity;\n            if let Some(new_stripe_size) = new_stripe_size {\n                child_shard_identity.stripe_size = new_stripe_size;\n            }\n            child_shard_identity.count = child_shard.shard_count;\n            child_shard_identity.number = child_shard.shard_number;\n\n            let child_location_conf = LocationConf {\n                mode: LocationMode::Attached(AttachedLocationConfig {\n                    generation: parent_generation,\n                    attach_mode: AttachmentMode::Single,\n                }),\n                shard: child_shard_identity,\n                tenant_conf: parent_tenant_conf.clone(),\n            };\n\n            self.upsert_location(\n                *child_shard,\n                child_location_conf,\n                None,\n                SpawnMode::Eager,\n                ctx,\n            )\n            .await?;\n        }\n\n        fail::fail_point!(\"shard-split-post-child-conf\", |_| Err(anyhow::anyhow!(\n            \"failpoint\"\n        )));\n\n        // Phase 4: wait for child chards WAL ingest to catch up to target LSN\n        for child_shard_id in &child_shards {\n            let child_shard_id = *child_shard_id;\n            let child_shard = {\n                let locked = self.tenants.read().unwrap();\n                let peek_slot =\n                    tenant_map_peek_slot(&locked, &child_shard_id, TenantSlotPeekMode::Read)?;\n                peek_slot.and_then(|s| s.get_attached()).cloned()\n            };\n            if let Some(t) = child_shard {\n                // Wait for the child shard to become active: this should be very quick because it only\n                // has to download the index_part that we just uploaded when creating it.\n                if let Err(e) = t.wait_to_become_active(ACTIVE_TENANT_TIMEOUT).await {\n                    // This is not fatal: we have durably created the child shard.  It just makes the\n                    // split operation less seamless for clients, as we will may detach the parent\n                    // shard before the child shards are fully ready to serve requests.\n                    tracing::warn!(\"Failed to wait for shard {child_shard_id} to activate: {e}\");\n                    continue;\n                }\n\n                let timelines = t.timelines.lock().unwrap().clone();\n                for timeline in timelines.values() {\n                    let Some(target_lsn) = target_lsns.get(&timeline.timeline_id) else {\n                        continue;\n                    };\n\n                    tracing::info!(\n                        \"Waiting for child shard {}/{} to reach target lsn {}...\",\n                        child_shard_id,\n                        timeline.timeline_id,\n                        target_lsn\n                    );\n\n                    fail::fail_point!(\"shard-split-lsn-wait\", |_| Err(anyhow::anyhow!(\n                        \"failpoint\"\n                    )));\n                    if let Err(e) = timeline\n                        .wait_lsn(\n                            *target_lsn,\n                            crate::tenant::timeline::WaitLsnWaiter::Tenant,\n                            crate::tenant::timeline::WaitLsnTimeout::Default,\n                            ctx,\n                        )\n                        .await\n                    {\n                        // Failure here might mean shutdown, in any case this part is an optimization\n                        // and we shouldn't hold up the split operation.\n                        tracing::warn!(\n                            \"Failed to wait for timeline {} to reach lsn {target_lsn}: {e}\",\n                            timeline.timeline_id\n                        );\n                    } else {\n                        tracing::info!(\n                            \"Child shard {}/{} reached target lsn {}\",\n                            child_shard_id,\n                            timeline.timeline_id,\n                            target_lsn\n                        );\n                    }\n                }\n            }\n        }\n\n        // Phase 5: Shut down the parent shard. We leave it on disk in case the split fails and we\n        // have to roll back to the parent shard, avoiding a cold start. It will be cleaned up once\n        // the storage controller commits the split, or if all else fails, on the next restart.\n        //\n        // TODO: We don't flush the ephemeral layer here, because the split is likely to succeed and\n        // catching up the parent should be reasonably quick. Consider using FreezeAndFlush instead.\n        let (_guard, progress) = completion::channel();\n        match parent.shutdown(progress, ShutdownMode::Hard).await {\n            Ok(()) => {}\n            Err(other) => {\n                other.wait().await;\n            }\n        }\n\n        fail::fail_point!(\"shard-split-pre-finish\", |_| Err(anyhow::anyhow!(\n            \"failpoint\"\n        )));\n\n        parent_slot_guard.drop_old_value()?;\n\n        // Phase 6: Release the InProgress on the parent shard\n        drop(parent_slot_guard);\n\n        utils::pausable_failpoint!(\"shard-split-post-finish-pause\");\n\n        Ok(child_shards)\n    }\n\n    /// Part of [`Self::shard_split`]: hard link parent shard layers into child shards, as an optimization\n    /// to avoid the children downloading them again.\n    ///\n    /// For each resident layer in the parent shard, we will hard link it into all of the child shards.\n    async fn shard_split_hardlink(\n        &self,\n        parent_shard: &TenantShard,\n        child_shards: Vec<TenantShardId>,\n    ) -> anyhow::Result<()> {\n        debug_assert_current_span_has_tenant_id();\n\n        let parent_path = self.conf.tenant_path(parent_shard.get_tenant_shard_id());\n        let (parent_timelines, parent_layers) = {\n            let mut parent_layers = Vec::new();\n            let timelines = parent_shard.timelines.lock().unwrap().clone();\n            let parent_timelines = timelines.keys().cloned().collect::<Vec<_>>();\n            for timeline in timelines.values() {\n                tracing::info!(timeline_id=%timeline.timeline_id, \"Loading list of layers to hardlink\");\n                let layers = timeline\n                    .layers\n                    .read(LayerManagerLockHolder::GetLayerMapInfo)\n                    .await;\n\n                for layer in layers.likely_resident_layers() {\n                    let relative_path = layer\n                        .local_path()\n                        .strip_prefix(&parent_path)\n                        .context(\"Removing prefix from parent layer path\")?;\n                    parent_layers.push(relative_path.to_owned());\n                }\n            }\n\n            if parent_layers.is_empty() {\n                tracing::info!(\"Ancestor shard has no resident layer to hard link\");\n            }\n\n            (parent_timelines, parent_layers)\n        };\n\n        let mut child_prefixes = Vec::new();\n        let mut create_dirs = Vec::new();\n\n        for child in child_shards {\n            let child_prefix = self.conf.tenant_path(&child);\n            create_dirs.push(child_prefix.clone());\n            create_dirs.extend(\n                parent_timelines\n                    .iter()\n                    .map(|t| self.conf.timeline_path(&child, t)),\n            );\n\n            child_prefixes.push(child_prefix);\n        }\n\n        // Since we will do a large number of small filesystem metadata operations, batch them into\n        // spawn_blocking calls rather than doing each one as a tokio::fs round-trip.\n        let span = tracing::Span::current();\n        let jh = tokio::task::spawn_blocking(move || -> anyhow::Result<usize> {\n            // Run this synchronous code in the same log context as the outer function that spawned it.\n            let _span = span.enter();\n\n            tracing::info!(\"Creating {} directories\", create_dirs.len());\n            for dir in &create_dirs {\n                if let Err(e) = std::fs::create_dir_all(dir) {\n                    // Ignore AlreadyExists errors, drop out on all other errors\n                    match e.kind() {\n                        std::io::ErrorKind::AlreadyExists => {}\n                        _ => {\n                            return Err(anyhow::anyhow!(e).context(format!(\"Creating {dir}\")));\n                        }\n                    }\n                }\n            }\n\n            for child_prefix in child_prefixes {\n                tracing::info!(\n                    \"Hard-linking {} parent layers into child path {}\",\n                    parent_layers.len(),\n                    child_prefix\n                );\n                for relative_layer in &parent_layers {\n                    let parent_path = parent_path.join(relative_layer);\n                    let child_path = child_prefix.join(relative_layer);\n                    if let Err(e) = std::fs::hard_link(&parent_path, &child_path) {\n                        match e.kind() {\n                            std::io::ErrorKind::AlreadyExists => {}\n                            std::io::ErrorKind::NotFound => {\n                                tracing::info!(\n                                    \"Layer {} not found during hard-linking, evicted during split?\",\n                                    relative_layer\n                                );\n                            }\n                            _ => {\n                                return Err(anyhow::anyhow!(e).context(format!(\n                                    \"Hard linking {relative_layer} into {child_prefix}\"\n                                )));\n                            }\n                        }\n                    }\n                }\n            }\n\n            // Durability is not required for correctness, but if we crashed during split and\n            // then came restarted with empty timeline dirs, it would be very inefficient to\n            // re-populate from remote storage.\n            tracing::info!(\"fsyncing {} directories\", create_dirs.len());\n            for dir in create_dirs {\n                if let Err(e) = crashsafe::fsync(&dir) {\n                    // Something removed a newly created timeline dir out from underneath us?  Extremely\n                    // unexpected, but not worth panic'ing over as this whole function is just an\n                    // optimization.\n                    tracing::warn!(\"Failed to fsync directory {dir}: {e}\")\n                }\n            }\n\n            Ok(parent_layers.len())\n        });\n\n        match jh.await {\n            Ok(Ok(layer_count)) => {\n                tracing::info!(count = layer_count, \"Hard linked layers into child shards\");\n            }\n            Ok(Err(e)) => {\n                // This is an optimization, so we tolerate failure.\n                tracing::warn!(\"Error hard-linking layers, proceeding anyway: {e}\")\n            }\n            Err(e) => {\n                // This is something totally unexpected like a panic, so bail out.\n                anyhow::bail!(\"Error joining hard linking task: {e}\");\n            }\n        }\n\n        Ok(())\n    }\n\n    ///\n    /// Shut down all tenants. This runs as part of pageserver shutdown.\n    ///\n    /// NB: We leave the tenants in the map, so that they remain accessible through\n    /// the management API until we shut it down. If we removed the shut-down tenants\n    /// from the tenants map, the management API would return 404 for these tenants,\n    /// because TenantsMap::get() now returns `None`.\n    /// That could be easily misinterpreted by control plane, the consumer of the\n    /// management API. For example, it could attach the tenant on a different pageserver.\n    /// We would then be in split-brain once this pageserver restarts.\n    #[instrument(skip_all)]\n    pub(crate) async fn shutdown(&self) {\n        self.cancel.cancel();\n\n        self.shutdown_all_tenants0().await\n    }\n\n    async fn shutdown_all_tenants0(&self) {\n        let mut join_set = JoinSet::new();\n\n        #[cfg(all(debug_assertions, not(test)))]\n        {\n            // Check that our metrics properly tracked the size of the tenants map.  This is a convenient location to check,\n            // as it happens implicitly at the end of tests etc.\n            let m = self.tenants.read().unwrap();\n            debug_assert_eq!(METRICS.slots_total(), m.len() as u64);\n        }\n\n        // Atomically, 1. create the shutdown tasks and 2. prevent creation of new tenants.\n        let (total_in_progress, total_attached) = {\n            let mut m = self.tenants.write().unwrap();\n            match &mut *m {\n                TenantsMap::Initializing => {\n                    *m = TenantsMap::ShuttingDown(BTreeMap::default());\n                    info!(\"tenants map is empty\");\n                    return;\n                }\n                TenantsMap::Open(tenants) => {\n                    let mut shutdown_state = BTreeMap::new();\n                    let mut total_in_progress = 0;\n                    let mut total_attached = 0;\n\n                    for (tenant_shard_id, v) in std::mem::take(tenants).into_iter() {\n                        match v {\n                            TenantSlot::Attached(t) => {\n                                shutdown_state\n                                    .insert(tenant_shard_id, TenantSlot::Attached(t.clone()));\n                                join_set.spawn(\n                                    async move {\n                                        let res = {\n                                            let (_guard, shutdown_progress) = completion::channel();\n                                            t.shutdown(shutdown_progress, ShutdownMode::FreezeAndFlush).await\n                                        };\n\n                                        if let Err(other_progress) = res {\n                                            // join the another shutdown in progress\n                                            other_progress.wait().await;\n                                        }\n\n                                        // we cannot afford per tenant logging here, because if s3 is degraded, we are\n                                        // going to log too many lines\n                                        debug!(\"tenant successfully stopped\");\n                                    }\n                                    .instrument(info_span!(\"shutdown\", tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug())),\n                                );\n\n                                total_attached += 1;\n                            }\n                            TenantSlot::Secondary(state) => {\n                                // We don't need to wait for this individually per-tenant: the\n                                // downloader task will be waited on eventually, this cancel\n                                // is just to encourage it to drop out if it is doing work\n                                // for this tenant right now.\n                                state.cancel.cancel();\n\n                                shutdown_state\n                                    .insert(tenant_shard_id, TenantSlot::Secondary(state));\n                            }\n                            TenantSlot::InProgress(notify) => {\n                                // InProgress tenants are not visible in TenantsMap::ShuttingDown: we will\n                                // wait for their notifications to fire in this function.\n                                join_set.spawn(async move {\n                                    notify.wait().await;\n                                });\n\n                                total_in_progress += 1;\n                            }\n                        }\n                    }\n                    *m = TenantsMap::ShuttingDown(shutdown_state);\n                    (total_in_progress, total_attached)\n                }\n                TenantsMap::ShuttingDown(_) => {\n                    error!(\n                        \"already shutting down, this function isn't supposed to be called more than once\"\n                    );\n                    return;\n                }\n            }\n        };\n\n        let started_at = std::time::Instant::now();\n\n        info!(\n            \"Waiting for {} InProgress tenants and {} Attached tenants to shut down\",\n            total_in_progress, total_attached\n        );\n\n        let total = join_set.len();\n        let mut panicked = 0;\n        let mut buffering = true;\n        const BUFFER_FOR: std::time::Duration = std::time::Duration::from_millis(500);\n        let mut buffered = std::pin::pin!(tokio::time::sleep(BUFFER_FOR));\n\n        while !join_set.is_empty() {\n            tokio::select! {\n                Some(joined) = join_set.join_next() => {\n                    match joined {\n                        Ok(()) => {},\n                        Err(join_error) if join_error.is_cancelled() => {\n                            unreachable!(\"we are not cancelling any of the tasks\");\n                        }\n                        Err(join_error) if join_error.is_panic() => {\n                            // cannot really do anything, as this panic is likely a bug\n                            panicked += 1;\n                        }\n                        Err(join_error) => {\n                            warn!(\"unknown kind of JoinError: {join_error}\");\n                        }\n                    }\n                    if !buffering {\n                        // buffer so that every 500ms since the first update (or starting) we'll log\n                        // how far away we are; this is because we will get SIGKILL'd at 10s, and we\n                        // are not able to log *then*.\n                        buffering = true;\n                        buffered.as_mut().reset(tokio::time::Instant::now() + BUFFER_FOR);\n                    }\n                },\n                _ = &mut buffered, if buffering => {\n                    buffering = false;\n                    info!(remaining = join_set.len(), total, elapsed_ms = started_at.elapsed().as_millis(), \"waiting for tenants to shutdown\");\n                }\n            }\n        }\n\n        if panicked > 0 {\n            warn!(\n                panicked,\n                total, \"observed panicks while shutting down tenants\"\n            );\n        }\n\n        // caller will log how long we took\n    }\n\n    /// Detaches a tenant, and removes its local files asynchronously.\n    ///\n    /// File removal is idempotent: even if the tenant has already been removed, this will still\n    /// remove any local files. This is used during shard splits, where we leave the parent shard's\n    /// files around in case we have to roll back the split.\n    pub(crate) async fn detach_tenant(\n        &self,\n        conf: &'static PageServerConf,\n        tenant_shard_id: TenantShardId,\n        deletion_queue_client: &DeletionQueueClient,\n    ) -> Result<(), TenantStateError> {\n        if let Some(tmp_path) = self\n            .detach_tenant0(conf, tenant_shard_id, deletion_queue_client)\n            .await?\n        {\n            self.background_purges.spawn(tmp_path);\n        }\n\n        Ok(())\n    }\n\n    /// Detaches a tenant. This renames the tenant directory to a temporary path and returns it,\n    /// allowing the caller to delete it asynchronously. Returns None if the dir is already removed.\n    async fn detach_tenant0(\n        &self,\n        conf: &'static PageServerConf,\n        tenant_shard_id: TenantShardId,\n        deletion_queue_client: &DeletionQueueClient,\n    ) -> Result<Option<Utf8PathBuf>, TenantStateError> {\n        let tenant_dir_rename_operation = |tenant_id_to_clean: TenantShardId| async move {\n            let local_tenant_directory = conf.tenant_path(&tenant_id_to_clean);\n            if !tokio::fs::try_exists(&local_tenant_directory).await? {\n                // If the tenant directory doesn't exist, it's already cleaned up.\n                return Ok(None);\n            }\n            safe_rename_tenant_dir(&local_tenant_directory)\n                .await\n                .with_context(|| {\n                    format!(\"local tenant directory {local_tenant_directory:?} rename\")\n                })\n                .map(Some)\n        };\n\n        let mut removal_result = self\n            .remove_tenant_from_memory(\n                tenant_shard_id,\n                tenant_dir_rename_operation(tenant_shard_id),\n            )\n            .await;\n\n        // If the tenant was not found, it was likely already removed. Attempt to remove the tenant\n        // directory on disk anyway. For example, during shard splits, we shut down and remove the\n        // parent shard, but leave its directory on disk in case we have to roll back the split.\n        //\n        // TODO: it would be better to leave the parent shard attached until the split is committed.\n        // This will be needed by the gRPC page service too, such that a compute can continue to\n        // read from the parent shard until it's notified about the new child shards. See:\n        // <https://github.com/neondatabase/neon/issues/11728>.\n        if let Err(TenantStateError::SlotError(TenantSlotError::NotFound(_))) = removal_result {\n            removal_result = tenant_dir_rename_operation(tenant_shard_id)\n                .await\n                .map_err(TenantStateError::Other);\n        }\n\n        // Flush pending deletions, so that they have a good chance of passing validation\n        // before this tenant is potentially re-attached elsewhere.\n        deletion_queue_client.flush_advisory();\n\n        removal_result\n    }\n\n    pub(crate) fn list_tenants(\n        &self,\n    ) -> Result<Vec<(TenantShardId, TenantState, Generation)>, TenantMapListError> {\n        let tenants = self.tenants.read().unwrap();\n        let m = match &*tenants {\n            TenantsMap::Initializing => return Err(TenantMapListError::Initializing),\n            TenantsMap::Open(m) | TenantsMap::ShuttingDown(m) => m,\n        };\n        Ok(m.iter()\n            .filter_map(|(id, tenant)| match tenant {\n                TenantSlot::Attached(tenant) => {\n                    Some((*id, tenant.current_state(), tenant.generation()))\n                }\n                TenantSlot::Secondary(_) => None,\n                TenantSlot::InProgress(_) => None,\n            })\n            .collect())\n    }\n\n    /// Completes an earlier prepared timeline detach ancestor.\n    pub(crate) async fn complete_detaching_timeline_ancestor(\n        &self,\n        tenant_shard_id: TenantShardId,\n        timeline_id: TimelineId,\n        prepared: PreparedTimelineDetach,\n        behavior: DetachBehavior,\n        mut attempt: detach_ancestor::Attempt,\n        ctx: &RequestContext,\n    ) -> Result<HashSet<TimelineId>, detach_ancestor::Error> {\n        use detach_ancestor::Error;\n\n        let slot_guard = self\n            .tenant_map_acquire_slot(&tenant_shard_id, TenantSlotAcquireMode::MustExist)\n            .map_err(|e| {\n                use TenantSlotError::*;\n\n                match e {\n                    MapState(TenantMapError::ShuttingDown) => Error::ShuttingDown,\n                    NotFound(_) | InProgress | MapState(_) => Error::DetachReparent(e.into()),\n                }\n            })?;\n\n        let tenant = {\n            let old_slot = slot_guard\n                .get_old_value()\n                .as_ref()\n                .expect(\"requested MustExist\");\n\n            let Some(tenant) = old_slot.get_attached() else {\n                return Err(Error::DetachReparent(anyhow::anyhow!(\n                    \"Tenant is not in attached state\"\n                )));\n            };\n\n            if !tenant.is_active() {\n                return Err(Error::DetachReparent(anyhow::anyhow!(\n                    \"Tenant is not active\"\n                )));\n            }\n\n            tenant.clone()\n        };\n\n        let timeline = tenant\n            .get_timeline(timeline_id, true)\n            .map_err(Error::NotFound)?;\n\n        let resp = timeline\n            .detach_from_ancestor_and_reparent(\n                &tenant,\n                prepared,\n                attempt.ancestor_timeline_id,\n                attempt.ancestor_lsn,\n                behavior,\n                ctx,\n            )\n            .await?;\n\n        let mut slot_guard = slot_guard;\n\n        let tenant = if resp.reset_tenant_required() {\n            attempt.before_reset_tenant();\n\n            let (_guard, progress) = utils::completion::channel();\n            match tenant.shutdown(progress, ShutdownMode::Reload).await {\n                Ok(()) => {\n                    slot_guard.drop_old_value().expect(\"it was just shutdown\");\n                }\n                Err(_barrier) => {\n                    slot_guard.revert();\n                    // this really should not happen, at all, unless a shutdown without acquiring\n                    // tenant slot was already going? regardless, on restart the attempt tracking\n                    // will reset to retryable.\n                    return Err(Error::ShuttingDown);\n                }\n            }\n\n            let tenant_path = self.conf.tenant_path(&tenant_shard_id);\n            let config = TenantShard::load_tenant_config(self.conf, &tenant_shard_id)\n                .map_err(|e| Error::DetachReparent(e.into()))?;\n\n            let shard_identity = config.shard;\n            let tenant = tenant_spawn(\n                self.conf,\n                tenant_shard_id,\n                &tenant_path,\n                self.resources.clone(),\n                AttachedTenantConf::try_from(self.conf, config).map_err(Error::DetachReparent)?,\n                shard_identity,\n                None,\n                SpawnMode::Eager,\n                ctx,\n            )\n            .map_err(|_| Error::ShuttingDown)?;\n\n            {\n                let mut g = tenant.ongoing_timeline_detach.lock().unwrap();\n                assert!(\n                    g.is_none(),\n                    \"there cannot be any new timeline detach ancestor on newly created tenant\"\n                );\n                *g = Some((attempt.timeline_id, attempt.new_barrier()));\n            }\n\n            // if we bail out here, we will not allow a new attempt, which should be fine.\n            // pageserver should be shutting down regardless? tenant_reset would help, unless it\n            // runs into the same problem.\n            slot_guard\n                .upsert(TenantSlot::Attached(tenant.clone()))\n                .map_err(|e| match e {\n                    TenantSlotUpsertError::ShuttingDown(_) => Error::ShuttingDown,\n                    other => Error::DetachReparent(other.into()),\n                })?;\n            tenant\n        } else {\n            tracing::info!(\"skipping tenant_reset as no changes made required it\");\n            tenant\n        };\n\n        if let Some(reparented) = resp.completed() {\n            // finally ask the restarted tenant to complete the detach\n            //\n            // rationale for 9999s: we don't really have a timetable here; if retried, the caller\n            // will get an 503.\n            tenant\n                .wait_to_become_active(std::time::Duration::from_secs(9999))\n                .await\n                .map_err(|e| {\n                    use GetActiveTenantError::{Cancelled, WillNotBecomeActive};\n                    use pageserver_api::models::TenantState;\n                    match e {\n                        Cancelled | WillNotBecomeActive(TenantState::Stopping { .. }) => {\n                            Error::ShuttingDown\n                        }\n                        other => Error::Complete(other.into()),\n                    }\n                })?;\n\n            utils::pausable_failpoint!(\n                \"timeline-detach-ancestor::after_activating_before_finding-pausable\"\n            );\n\n            let timeline = tenant\n                .get_timeline(attempt.timeline_id, true)\n                .map_err(Error::NotFound)?;\n\n            timeline\n                .complete_detaching_timeline_ancestor(&tenant, attempt, ctx)\n                .await\n                .map(|()| reparented)\n        } else {\n            // at least the latest versions have now been downloaded and refreshed; be ready to\n            // retry another time.\n            Err(Error::FailedToReparentAll)\n        }\n    }\n\n    /// A page service client sends a TenantId, and to look up the correct Tenant we must\n    /// resolve this to a fully qualified TenantShardId.\n    ///\n    /// During shard splits: we shall see parent shards in InProgress state and skip them, and\n    /// instead match on child shards which should appear in Attached state.  Very early in a shard\n    /// split, or in other cases where a shard is InProgress, we will return our own InProgress result\n    /// to instruct the caller to wait for that to finish before querying again.\n    pub(crate) fn resolve_attached_shard(\n        &self,\n        tenant_id: &TenantId,\n        selector: ShardSelector,\n    ) -> ShardResolveResult {\n        let tenants = self.tenants.read().unwrap();\n        let mut want_shard: Option<ShardIndex> = None;\n        let mut any_in_progress = None;\n\n        match &*tenants {\n            TenantsMap::Initializing => ShardResolveResult::NotFound,\n            TenantsMap::Open(m) | TenantsMap::ShuttingDown(m) => {\n                for slot in m.range(TenantShardId::tenant_range(*tenant_id)) {\n                    // Ignore all slots that don't contain an attached tenant\n                    let tenant = match &slot.1 {\n                        TenantSlot::Attached(t) => t,\n                        TenantSlot::InProgress(barrier) => {\n                            // We might still find a usable shard, but in case we don't, remember that\n                            // we saw at least one InProgress slot, so that we can distinguish this case\n                            // from a simple NotFound in our return value.\n                            any_in_progress = Some(barrier.clone());\n                            continue;\n                        }\n                        _ => continue,\n                    };\n\n                    match selector {\n                        ShardSelector::Zero if slot.0.shard_number == ShardNumber(0) => {\n                            return ShardResolveResult::Found(tenant.clone());\n                        }\n                        ShardSelector::Page(key) => {\n                            // Each time we find an attached slot with a different shard count,\n                            // recompute the expected shard number: during shard splits we might\n                            // have multiple shards with the old shard count.\n                            if want_shard.is_none()\n                                || want_shard.unwrap().shard_count != tenant.shard_identity.count\n                            {\n                                want_shard = Some(ShardIndex {\n                                    shard_number: tenant.shard_identity.get_shard_number(&key),\n                                    shard_count: tenant.shard_identity.count,\n                                });\n                            }\n\n                            if Some(ShardIndex {\n                                shard_number: tenant.shard_identity.number,\n                                shard_count: tenant.shard_identity.count,\n                            }) == want_shard\n                            {\n                                return ShardResolveResult::Found(tenant.clone());\n                            }\n                        }\n                        ShardSelector::Known(shard)\n                            if tenant.shard_identity.shard_index() == shard =>\n                        {\n                            return ShardResolveResult::Found(tenant.clone());\n                        }\n                        _ => continue,\n                    }\n                }\n\n                // Fall through: we didn't find a slot that was in Attached state & matched our selector.  If\n                // we found one or more InProgress slot, indicate to caller that they should retry later.  Otherwise\n                // this requested shard simply isn't found.\n                if let Some(barrier) = any_in_progress {\n                    ShardResolveResult::InProgress(barrier)\n                } else {\n                    ShardResolveResult::NotFound\n                }\n            }\n        }\n    }\n\n    /// Calculate the tenant shards' contributions to this pageserver's utilization metrics.  The\n    /// returned values are:\n    ///  - the number of bytes of local disk space this pageserver's shards are requesting, i.e.\n    ///    how much space they would use if not impacted by disk usage eviction.\n    ///  - the number of tenant shards currently on this pageserver, including attached\n    ///    and secondary.\n    ///\n    /// This function is quite expensive: callers are expected to cache the result and\n    /// limit how often they call it.\n    pub(crate) fn calculate_utilization(&self) -> Result<(u64, u32), TenantMapListError> {\n        let tenants = self.tenants.read().unwrap();\n        let m = match &*tenants {\n            TenantsMap::Initializing => return Err(TenantMapListError::Initializing),\n            TenantsMap::Open(m) | TenantsMap::ShuttingDown(m) => m,\n        };\n        let shard_count = m.len();\n        let mut wanted_bytes = 0;\n\n        for tenant_slot in m.values() {\n            match tenant_slot {\n                TenantSlot::InProgress(_barrier) => {\n                    // While a slot is being changed, we can't know how much storage it wants.  This\n                    // means this function's output can fluctuate if a lot of changes are going on\n                    // (such as transitions from secondary to attached).\n                    //\n                    // We could wait for the barrier and retry, but it's important that the utilization\n                    // API is responsive, and the data quality impact is not very significant.\n                    continue;\n                }\n                TenantSlot::Attached(tenant) => {\n                    wanted_bytes += tenant.local_storage_wanted();\n                }\n                TenantSlot::Secondary(secondary) => {\n                    let progress = secondary.progress.lock().unwrap();\n                    wanted_bytes += if progress.heatmap_mtime.is_some() {\n                        // If we have heatmap info, then we will 'want' the sum\n                        // of the size of layers in the heatmap: this is how much space\n                        // we would use if not doing any eviction.\n                        progress.bytes_total\n                    } else {\n                        // In the absence of heatmap info, assume that the secondary location simply\n                        // needs as much space as it is currently using.\n                        secondary.resident_size_metric.get()\n                    }\n                }\n            }\n        }\n\n        Ok((wanted_bytes, shard_count as u32))\n    }\n\n    #[instrument(skip_all, fields(tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(), %timeline_id))]\n    pub(crate) async fn immediate_gc(\n        &self,\n        tenant_shard_id: TenantShardId,\n        timeline_id: TimelineId,\n        gc_req: TimelineGcRequest,\n        cancel: CancellationToken,\n        ctx: &RequestContext,\n    ) -> Result<GcResult, ApiError> {\n        let tenant = {\n            let guard = self.tenants.read().unwrap();\n            guard\n                .get(&tenant_shard_id)\n                .cloned()\n                .with_context(|| format!(\"tenant {tenant_shard_id}\"))\n                .map_err(|e| ApiError::NotFound(e.into()))?\n        };\n\n        let gc_horizon = gc_req.gc_horizon.unwrap_or_else(|| tenant.get_gc_horizon());\n        // Use tenant's pitr setting\n        let pitr = tenant.get_pitr_interval();\n\n        tenant.wait_to_become_active(ACTIVE_TENANT_TIMEOUT).await?;\n\n        // Run in task_mgr to avoid race with tenant_detach operation\n        let ctx: RequestContext =\n            ctx.detached_child(TaskKind::GarbageCollector, DownloadBehavior::Download);\n\n        let _gate_guard = tenant.gate.enter().map_err(|_| ApiError::ShuttingDown)?;\n\n        fail::fail_point!(\"immediate_gc_task_pre\");\n\n        #[allow(unused_mut)]\n        let mut result = tenant\n            .gc_iteration(Some(timeline_id), gc_horizon, pitr, &cancel, &ctx)\n            .await;\n        // FIXME: `gc_iteration` can return an error for multiple reasons; we should handle it\n        // better once the types support it.\n\n        #[cfg(feature = \"testing\")]\n        {\n            // we need to synchronize with drop completion for python tests without polling for\n            // log messages\n            if let Ok(result) = result.as_mut() {\n                let mut js = tokio::task::JoinSet::new();\n                for layer in std::mem::take(&mut result.doomed_layers) {\n                    js.spawn(layer.wait_drop());\n                }\n                tracing::info!(\n                    total = js.len(),\n                    \"starting to wait for the gc'd layers to be dropped\"\n                );\n                while let Some(res) = js.join_next().await {\n                    res.expect(\"wait_drop should not panic\");\n                }\n            }\n\n            let timeline = tenant.get_timeline(timeline_id, false).ok();\n            let rtc = timeline.as_ref().map(|x| &x.remote_client);\n\n            if let Some(rtc) = rtc {\n                // layer drops schedule actions on remote timeline client to actually do the\n                // deletions; don't care about the shutdown error, just exit fast\n                drop(rtc.wait_completion().await);\n            }\n        }\n\n        result.map_err(|e| match e {\n            GcError::TenantCancelled | GcError::TimelineCancelled => ApiError::ShuttingDown,\n            GcError::TimelineNotFound => {\n                ApiError::NotFound(anyhow::anyhow!(\"Timeline not found\").into())\n            }\n            other => ApiError::InternalServerError(anyhow::anyhow!(other)),\n        })\n    }\n\n    /// Stops and removes the tenant from memory, if it's not [`TenantState::Stopping`] already, bails otherwise.\n    /// Allows to remove other tenant resources manually, via `tenant_cleanup`.\n    /// If the cleanup fails, tenant will stay in memory in [`TenantState::Broken`] state, and another removal\n    async fn remove_tenant_from_memory<V, F>(\n        &self,\n        tenant_shard_id: TenantShardId,\n        tenant_cleanup: F,\n    ) -> Result<V, TenantStateError>\n    where\n        F: std::future::Future<Output = anyhow::Result<V>>,\n    {\n        let mut slot_guard =\n            self.tenant_map_acquire_slot(&tenant_shard_id, TenantSlotAcquireMode::MustExist)?;\n\n        // allow pageserver shutdown to await for our completion\n        let (_guard, progress) = completion::channel();\n\n        // The SlotGuard allows us to manipulate the Tenant object without fear of some\n        // concurrent API request doing something else for the same tenant ID.\n        let attached_tenant = match slot_guard.get_old_value() {\n            Some(TenantSlot::Attached(tenant)) => {\n                // whenever we remove a tenant from memory, we don't want to flush and wait for upload\n                let shutdown_mode = ShutdownMode::Hard;\n\n                // shutdown is sure to transition tenant to stopping, and wait for all tasks to complete, so\n                // that we can continue safely to cleanup.\n                match tenant.shutdown(progress, shutdown_mode).await {\n                    Ok(()) => {}\n                    Err(_other) => {\n                        // if pageserver shutdown or other detach/ignore is already ongoing, we don't want to\n                        // wait for it but return an error right away because these are distinct requests.\n                        slot_guard.revert();\n                        return Err(TenantStateError::IsStopping(tenant_shard_id));\n                    }\n                }\n                Some(tenant)\n            }\n            Some(TenantSlot::Secondary(secondary_state)) => {\n                tracing::info!(\"Shutting down in secondary mode\");\n                secondary_state.shutdown().await;\n                None\n            }\n            Some(TenantSlot::InProgress(_)) => {\n                // Acquiring a slot guarantees its old value was not InProgress\n                unreachable!();\n            }\n            None => None,\n        };\n\n        match tenant_cleanup\n            .await\n            .with_context(|| format!(\"Failed to run cleanup for tenant {tenant_shard_id}\"))\n        {\n            Ok(hook_value) => {\n                // Success: drop the old TenantSlot::Attached.\n                slot_guard\n                    .drop_old_value()\n                    .expect(\"We just called shutdown\");\n\n                Ok(hook_value)\n            }\n            Err(e) => {\n                // If we had a Tenant, set it to Broken and put it back in the TenantsMap\n                if let Some(attached_tenant) = attached_tenant {\n                    attached_tenant.set_broken(e.to_string()).await;\n                }\n                // Leave the broken tenant in the map\n                slot_guard.revert();\n\n                Err(TenantStateError::Other(e))\n            }\n        }\n    }\n}\n\n#[derive(Debug, thiserror::Error)]\npub(crate) enum GetTenantError {\n    /// NotFound is a TenantId rather than TenantShardId, because this error type is used from\n    /// getters that use a TenantId and a ShardSelector, not just getters that target a specific shard.\n    #[error(\"Tenant {0} not found\")]\n    NotFound(TenantId),\n\n    #[error(\"Tenant {0} not found\")]\n    ShardNotFound(TenantShardId),\n\n    #[error(\"Tenant {0} is not active\")]\n    NotActive(TenantShardId),\n\n    // Initializing or shutting down: cannot authoritatively say whether we have this tenant\n    #[error(\"Tenant map is not available: {0}\")]\n    MapState(#[from] TenantMapError),\n}\n\n#[derive(thiserror::Error, Debug)]\npub(crate) enum GetActiveTenantError {\n    /// We may time out either while TenantSlot is InProgress, or while the Tenant\n    /// is in a non-Active state\n    #[error(\n        \"Timed out waiting {wait_time:?} for tenant active state. Latest state: {latest_state:?}\"\n    )]\n    WaitForActiveTimeout {\n        latest_state: Option<TenantState>,\n        wait_time: Duration,\n    },\n\n    /// The TenantSlot is absent, or in secondary mode\n    #[error(transparent)]\n    NotFound(#[from] GetTenantError),\n\n    /// Cancellation token fired while we were waiting\n    #[error(\"cancelled\")]\n    Cancelled,\n\n    /// Tenant exists, but is in a state that cannot become active (e.g. Stopping, Broken)\n    #[error(\"will not become active.  Current state: {0}\")]\n    WillNotBecomeActive(TenantState),\n\n    /// Broken is logically a subset of WillNotBecomeActive, but a distinct error is useful as\n    /// WillNotBecomeActive is a permitted error under some circumstances, whereas broken should\n    /// never happen.\n    #[error(\"Tenant is broken: {0}\")]\n    Broken(String),\n\n    #[error(\"reconnect to switch tenant id\")]\n    SwitchedTenant,\n}\n\n#[derive(Debug, thiserror::Error)]\npub(crate) enum DeleteTimelineError {\n    #[error(\"Tenant {0}\")]\n    Tenant(#[from] GetTenantError),\n\n    #[error(\"Timeline {0}\")]\n    Timeline(#[from] crate::tenant::DeleteTimelineError),\n}\n\n#[derive(Debug, thiserror::Error)]\npub(crate) enum TenantStateError {\n    #[error(\"Tenant {0} is stopping\")]\n    IsStopping(TenantShardId),\n    #[error(transparent)]\n    SlotError(#[from] TenantSlotError),\n    #[error(transparent)]\n    SlotUpsertError(#[from] TenantSlotUpsertError),\n    #[error(transparent)]\n    Other(#[from] anyhow::Error),\n}\n\n#[derive(Debug, thiserror::Error)]\npub(crate) enum TenantMapListError {\n    #[error(\"tenant map is still initiailizing\")]\n    Initializing,\n}\n\n#[derive(Debug, thiserror::Error)]\npub(crate) enum TenantMapInsertError {\n    #[error(transparent)]\n    SlotError(#[from] TenantSlotError),\n    #[error(transparent)]\n    SlotUpsertError(#[from] TenantSlotUpsertError),\n    #[error(transparent)]\n    Other(#[from] anyhow::Error),\n}\n\n/// Superset of TenantMapError: issues that can occur when acquiring a slot\n/// for a particular tenant ID.\n#[derive(Debug, thiserror::Error)]\npub(crate) enum TenantSlotError {\n    /// When acquiring a slot with the expectation that the tenant already exists.\n    #[error(\"Tenant {0} not found\")]\n    NotFound(TenantShardId),\n\n    // Tried to read a slot that is currently being mutated by another administrative\n    // operation.\n    #[error(\"tenant has a state change in progress, try again later\")]\n    InProgress,\n\n    #[error(transparent)]\n    MapState(#[from] TenantMapError),\n}\n\n/// Superset of TenantMapError: issues that can occur when using a SlotGuard\n/// to insert a new value.\n#[derive(thiserror::Error)]\npub(crate) enum TenantSlotUpsertError {\n    /// An error where the slot is in an unexpected state, indicating a code bug\n    #[error(\"Internal error updating Tenant\")]\n    InternalError(Cow<'static, str>),\n\n    #[error(transparent)]\n    MapState(TenantMapError),\n\n    // If we encounter TenantManager shutdown during upsert, we must carry the Completion\n    // from the SlotGuard, so that the caller can hold it while they clean up: otherwise\n    // TenantManager shutdown might race ahead before we're done cleaning up any Tenant that\n    // was protected by the SlotGuard.\n    #[error(\"Shutting down\")]\n    ShuttingDown((TenantSlot, utils::completion::Completion)),\n}\n\nimpl std::fmt::Debug for TenantSlotUpsertError {\n    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {\n        match self {\n            Self::InternalError(reason) => write!(f, \"Internal Error {reason}\"),\n            Self::MapState(map_error) => write!(f, \"Tenant map state: {map_error:?}\"),\n            Self::ShuttingDown(_completion) => write!(f, \"Tenant map shutting down\"),\n        }\n    }\n}\n\n#[derive(Debug, thiserror::Error)]\nenum TenantSlotDropError {\n    /// It is only legal to drop a TenantSlot if its contents are fully shut down\n    #[error(\"Tenant was not shut down\")]\n    NotShutdown,\n}\n\n/// Errors that can happen any time we are walking the tenant map to try and acquire\n/// the TenantSlot for a particular tenant.\n#[derive(Debug, thiserror::Error)]\npub(crate) enum TenantMapError {\n    // Tried to read while initializing\n    #[error(\"tenant map is still initializing\")]\n    StillInitializing,\n\n    // Tried to read while shutting down\n    #[error(\"tenant map is shutting down\")]\n    ShuttingDown,\n}\n\n/// Guards a particular tenant_id's content in the TenantsMap.\n///\n/// While this structure exists, the TenantsMap will contain a [`TenantSlot::InProgress`]\n/// for this tenant, which acts as a marker for any operations targeting\n/// this tenant to retry later, or wait for the InProgress state to end.\n///\n/// This structure enforces the important invariant that we do not have overlapping\n/// tasks that will try to use local storage for a the same tenant ID: we enforce that\n/// the previous contents of a slot have been shut down before the slot can be\n/// left empty or used for something else\n///\n/// Holders of a SlotGuard should explicitly dispose of it, using either `upsert`\n/// to provide a new value, or `revert` to put the slot back into its initial\n/// state.  If the SlotGuard is dropped without calling either of these, then\n/// we will leave the slot empty if our `old_value` is already shut down, else\n/// we will replace the slot with `old_value` (equivalent to doing a revert).\n///\n/// The `old_value` may be dropped before the SlotGuard is dropped, by calling\n/// `drop_old_value`.  It is an error to call this without shutting down\n/// the conents of `old_value`.\npub(crate) struct SlotGuard<'a> {\n    tenant_shard_id: TenantShardId,\n    old_value: Option<TenantSlot>,\n    upserted: bool,\n\n    /// [`TenantSlot::InProgress`] carries the corresponding Barrier: it will\n    /// release any waiters as soon as this SlotGuard is dropped.\n    completion: utils::completion::Completion,\n\n    tenants: &'a std::sync::RwLock<TenantsMap>,\n}\n\nimpl<'a> SlotGuard<'a> {\n    fn new(\n        tenant_shard_id: TenantShardId,\n        old_value: Option<TenantSlot>,\n        completion: utils::completion::Completion,\n        tenants: &'a std::sync::RwLock<TenantsMap>,\n    ) -> Self {\n        Self {\n            tenant_shard_id,\n            old_value,\n            upserted: false,\n            completion,\n            tenants,\n        }\n    }\n\n    /// Get any value that was present in the slot before we acquired ownership\n    /// of it: in state transitions, this will be the old state.\n    ///\n    // FIXME: get_ prefix\n    // FIXME: this should be .as_ref() -- unsure why no clippy\n    fn get_old_value(&self) -> &Option<TenantSlot> {\n        &self.old_value\n    }\n\n    /// Emplace a new value in the slot.  This consumes the guard, and after\n    /// returning, the slot is no longer protected from concurrent changes.\n    fn upsert(mut self, new_value: TenantSlot) -> Result<(), TenantSlotUpsertError> {\n        if !self.old_value_is_shutdown() {\n            // This is a bug: callers should never try to drop an old value without\n            // shutting it down\n            return Err(TenantSlotUpsertError::InternalError(\n                \"Old TenantSlot value not shut down\".into(),\n            ));\n        }\n\n        let replaced: Option<TenantSlot> = {\n            let mut locked = self.tenants.write().unwrap();\n\n            if let TenantSlot::InProgress(_) = new_value {\n                // It is never expected to try and upsert InProgress via this path: it should\n                // only be written via the tenant_map_acquire_slot path.  If we hit this it's a bug.\n                return Err(TenantSlotUpsertError::InternalError(\n                    \"Attempt to upsert an InProgress state\".into(),\n                ));\n            }\n\n            let m = match &mut *locked {\n                TenantsMap::Initializing => {\n                    return Err(TenantSlotUpsertError::MapState(\n                        TenantMapError::StillInitializing,\n                    ));\n                }\n                TenantsMap::ShuttingDown(_) => {\n                    return Err(TenantSlotUpsertError::ShuttingDown((\n                        new_value,\n                        self.completion.clone(),\n                    )));\n                }\n                TenantsMap::Open(m) => m,\n            };\n\n            METRICS.slot_inserted(&new_value);\n\n            let replaced = m.insert(self.tenant_shard_id, new_value);\n            self.upserted = true;\n            if let Some(replaced) = replaced.as_ref() {\n                METRICS.slot_removed(replaced);\n            }\n\n            replaced\n        };\n\n        // Sanity check: on an upsert we should always be replacing an InProgress marker\n        match replaced {\n            Some(TenantSlot::InProgress(_)) => {\n                // Expected case: we find our InProgress in the map: nothing should have\n                // replaced it because the code that acquires slots will not grant another\n                // one for the same TenantId.\n                Ok(())\n            }\n            None => {\n                METRICS.unexpected_errors.inc();\n                error!(\n                    tenant_shard_id = %self.tenant_shard_id,\n                    \"Missing InProgress marker during tenant upsert, this is a bug.\"\n                );\n                Err(TenantSlotUpsertError::InternalError(\n                    \"Missing InProgress marker during tenant upsert\".into(),\n                ))\n            }\n            Some(slot) => {\n                METRICS.unexpected_errors.inc();\n                error!(tenant_shard_id=%self.tenant_shard_id, \"Unexpected contents of TenantSlot during upsert, this is a bug.  Contents: {:?}\", slot);\n                Err(TenantSlotUpsertError::InternalError(\n                    \"Unexpected contents of TenantSlot\".into(),\n                ))\n            }\n        }\n    }\n\n    /// Replace the InProgress slot with whatever was in the guard when we started\n    fn revert(mut self) {\n        if let Some(value) = self.old_value.take() {\n            match self.upsert(value) {\n                Err(TenantSlotUpsertError::InternalError(_)) => {\n                    // We already logged the error, nothing else we can do.\n                }\n                Err(\n                    TenantSlotUpsertError::MapState(_) | TenantSlotUpsertError::ShuttingDown(_),\n                ) => {\n                    // If the map is shutting down, we need not replace anything\n                }\n                Ok(()) => {}\n            }\n        }\n    }\n\n    /// We may never drop our old value until it is cleanly shut down: otherwise we might leave\n    /// rogue background tasks that would write to the local tenant directory that this guard\n    /// is responsible for protecting\n    fn old_value_is_shutdown(&self) -> bool {\n        match self.old_value.as_ref() {\n            Some(TenantSlot::Attached(tenant)) => tenant.gate.close_complete(),\n            Some(TenantSlot::Secondary(secondary_tenant)) => secondary_tenant.gate.close_complete(),\n            Some(TenantSlot::InProgress(_)) => {\n                // A SlotGuard cannot be constructed for a slot that was already InProgress\n                unreachable!()\n            }\n            None => true,\n        }\n    }\n\n    /// The guard holder is done with the old value of the slot: they are obliged to already\n    /// shut it down before we reach this point.\n    fn drop_old_value(&mut self) -> Result<(), TenantSlotDropError> {\n        if !self.old_value_is_shutdown() {\n            Err(TenantSlotDropError::NotShutdown)\n        } else {\n            self.old_value.take();\n            Ok(())\n        }\n    }\n}\n\nimpl<'a> Drop for SlotGuard<'a> {\n    fn drop(&mut self) {\n        if self.upserted {\n            return;\n        }\n        // Our old value is already shutdown, or it never existed: it is safe\n        // for us to fully release the TenantSlot back into an empty state\n\n        let mut locked = self.tenants.write().unwrap();\n\n        let m = match &mut *locked {\n            TenantsMap::Initializing => {\n                // There is no map, this should never happen.\n                return;\n            }\n            TenantsMap::ShuttingDown(_) => {\n                // When we transition to shutdown, InProgress elements are removed\n                // from the map, so we do not need to clean up our Inprogress marker.\n                // See [`shutdown_all_tenants0`]\n                return;\n            }\n            TenantsMap::Open(m) => m,\n        };\n\n        use std::collections::btree_map::Entry;\n        match m.entry(self.tenant_shard_id) {\n            Entry::Occupied(mut entry) => {\n                if !matches!(entry.get(), TenantSlot::InProgress(_)) {\n                    METRICS.unexpected_errors.inc();\n                    error!(tenant_shard_id=%self.tenant_shard_id, \"Unexpected contents of TenantSlot during drop, this is a bug.  Contents: {:?}\", entry.get());\n                }\n\n                if self.old_value_is_shutdown() {\n                    METRICS.slot_removed(entry.get());\n                    entry.remove();\n                } else {\n                    let inserting = self.old_value.take().unwrap();\n                    METRICS.slot_inserted(&inserting);\n                    let replaced = entry.insert(inserting);\n                    METRICS.slot_removed(&replaced);\n                }\n            }\n            Entry::Vacant(_) => {\n                METRICS.unexpected_errors.inc();\n                error!(\n                    tenant_shard_id = %self.tenant_shard_id,\n                    \"Missing InProgress marker during SlotGuard drop, this is a bug.\"\n                );\n            }\n        }\n    }\n}\n\nenum TenantSlotPeekMode {\n    /// In Read mode, peek will be permitted to see the slots even if the pageserver is shutting down\n    Read,\n    /// In Write mode, trying to peek at a slot while the pageserver is shutting down is an error\n    Write,\n}\n\nfn tenant_map_peek_slot<'a>(\n    tenants: &'a std::sync::RwLockReadGuard<'a, TenantsMap>,\n    tenant_shard_id: &TenantShardId,\n    mode: TenantSlotPeekMode,\n) -> Result<Option<&'a TenantSlot>, TenantMapError> {\n    match tenants.deref() {\n        TenantsMap::Initializing => Err(TenantMapError::StillInitializing),\n        TenantsMap::ShuttingDown(m) => match mode {\n            TenantSlotPeekMode::Read => Ok(Some(\n                // When reading in ShuttingDown state, we must translate None results\n                // into a ShuttingDown error, because absence of a tenant shard ID in the map\n                // isn't a reliable indicator of the tenant being gone: it might have been\n                // InProgress when shutdown started, and cleaned up from that state such\n                // that it's now no longer in the map.  Callers will have to wait until\n                // we next start up to get a proper answer.  This avoids incorrect 404 API responses.\n                m.get(tenant_shard_id).ok_or(TenantMapError::ShuttingDown)?,\n            )),\n            TenantSlotPeekMode::Write => Err(TenantMapError::ShuttingDown),\n        },\n        TenantsMap::Open(m) => Ok(m.get(tenant_shard_id)),\n    }\n}\n\nenum TenantSlotAcquireMode {\n    /// Acquire the slot irrespective of current state, or whether it already exists\n    Any,\n    /// Return an error if trying to acquire a slot and it doesn't already exist\n    MustExist,\n}\n\nuse http_utils::error::ApiError;\nuse pageserver_api::models::TimelineGcRequest;\n\nuse crate::tenant::gc_result::GcResult;\n\n#[cfg(test)]\nmod tests {\n    use std::collections::BTreeMap;\n    use std::sync::Arc;\n\n    use camino::Utf8PathBuf;\n    use storage_broker::BrokerClientChannel;\n    use tracing::Instrument;\n\n    use super::super::harness::TenantHarness;\n    use super::TenantsMap;\n    use crate::{\n        basebackup_cache::BasebackupCache,\n        tenant::{\n            TenantSharedResources,\n            mgr::{BackgroundPurges, TenantManager, TenantSlot},\n        },\n    };\n\n    #[tokio::test(start_paused = true)]\n    async fn shutdown_awaits_in_progress_tenant() {\n        // Test that if an InProgress tenant is in the map during shutdown, the shutdown will gracefully\n        // wait for it to complete before proceeding.\n\n        let h = TenantHarness::create(\"shutdown_awaits_in_progress_tenant\")\n            .await\n            .unwrap();\n        let (t, _ctx) = h.load().await;\n\n        // harness loads it to active, which is forced and nothing is running on the tenant\n\n        let id = t.tenant_shard_id();\n\n        // tenant harness configures the logging and we cannot escape it\n        let span = h.span();\n        let _e = span.enter();\n\n        let tenants = BTreeMap::from([(id, TenantSlot::Attached(t.clone()))]);\n\n        // Invoke remove_tenant_from_memory with a cleanup hook that blocks until we manually\n        // permit it to proceed: that will stick the tenant in InProgress\n\n        let (basebackup_cache, _) = BasebackupCache::new(Utf8PathBuf::new(), None);\n\n        let tenant_manager = TenantManager {\n            tenants: std::sync::RwLock::new(TenantsMap::Open(tenants)),\n            conf: h.conf,\n            resources: TenantSharedResources {\n                broker_client: BrokerClientChannel::connect_lazy(\"foobar.com\")\n                    .await\n                    .unwrap(),\n                remote_storage: h.remote_storage.clone(),\n                deletion_queue_client: h.deletion_queue.new_client(),\n                l0_flush_global_state: crate::l0_flush::L0FlushGlobalState::new(\n                    h.conf.l0_flush.clone(),\n                ),\n                basebackup_cache,\n                feature_resolver: crate::feature_resolver::FeatureResolver::new_disabled(),\n            },\n            cancel: tokio_util::sync::CancellationToken::new(),\n            background_purges: BackgroundPurges::default(),\n        };\n\n        let tenant_manager = Arc::new(tenant_manager);\n\n        let (until_cleanup_completed, can_complete_cleanup) = utils::completion::channel();\n        let (until_cleanup_started, cleanup_started) = utils::completion::channel();\n        let mut remove_tenant_from_memory_task = {\n            let tenant_manager = tenant_manager.clone();\n            let jh = tokio::spawn({\n                async move {\n                    let cleanup = async move {\n                        drop(until_cleanup_started);\n                        can_complete_cleanup.wait().await;\n                        anyhow::Ok(())\n                    };\n                    tenant_manager.remove_tenant_from_memory(id, cleanup).await\n                }\n                .instrument(h.span())\n            });\n\n            // now the long cleanup should be in place, with the stopping state\n            cleanup_started.wait().await;\n            jh\n        };\n\n        let mut shutdown_task = {\n            let (until_shutdown_started, shutdown_started) = utils::completion::channel();\n\n            let tenant_manager = tenant_manager.clone();\n\n            let shutdown_task = tokio::spawn(async move {\n                drop(until_shutdown_started);\n                tenant_manager.shutdown_all_tenants0().await;\n            });\n\n            shutdown_started.wait().await;\n            shutdown_task\n        };\n\n        let long_time = std::time::Duration::from_secs(15);\n        tokio::select! {\n            _ = &mut shutdown_task => unreachable!(\"shutdown should block on remove_tenant_from_memory completing\"),\n            _ = &mut remove_tenant_from_memory_task => unreachable!(\"remove_tenant_from_memory_task should not complete until explicitly unblocked\"),\n            _ = tokio::time::sleep(long_time) => {},\n        }\n\n        drop(until_cleanup_completed);\n\n        // Now that we allow it to proceed, shutdown should complete immediately\n        remove_tenant_from_memory_task.await.unwrap().unwrap();\n        shutdown_task.await.unwrap();\n    }\n}\n"
  },
  {
    "path": "pageserver/src/tenant/remote_timeline_client/download.rs",
    "content": "//! Helper functions to download files from remote storage with a RemoteStorage\n//!\n//! The functions in this module retry failed operations automatically, according\n//! to the FAILED_DOWNLOAD_RETRIES constant.\n\nuse std::collections::HashSet;\nuse std::future::Future;\nuse std::str::FromStr;\nuse std::sync::atomic::AtomicU64;\nuse std::time::SystemTime;\n\nuse anyhow::{Context, anyhow};\nuse camino::{Utf8Path, Utf8PathBuf};\nuse pageserver_api::shard::TenantShardId;\nuse remote_storage::{\n    DownloadError, DownloadKind, DownloadOpts, GenericRemoteStorage, ListingMode, RemotePath,\n};\nuse tokio::fs::{self, File, OpenOptions};\nuse tokio::io::AsyncSeekExt;\nuse tokio_util::io::StreamReader;\nuse tokio_util::sync::CancellationToken;\nuse tracing::warn;\nuse utils::crashsafe::path_with_suffix_extension;\nuse utils::id::{TenantId, TimelineId};\nuse utils::{backoff, pausable_failpoint};\n\nuse super::index::{IndexPart, LayerFileMetadata};\nuse super::manifest::TenantManifest;\nuse super::{\n    FAILED_DOWNLOAD_WARN_THRESHOLD, FAILED_REMOTE_OP_RETRIES, INITDB_PATH, parse_remote_index_path,\n    parse_remote_tenant_manifest_path, remote_index_path, remote_initdb_archive_path,\n    remote_initdb_preserved_archive_path, remote_tenant_manifest_path,\n    remote_tenant_manifest_prefix, remote_tenant_path,\n};\nuse crate::TEMP_FILE_SUFFIX;\nuse crate::config::PageServerConf;\nuse crate::context::RequestContext;\nuse crate::span::{\n    debug_assert_current_span_has_tenant_and_timeline_id, debug_assert_current_span_has_tenant_id,\n};\nuse crate::tenant::Generation;\nuse crate::tenant::remote_timeline_client::{remote_layer_path, remote_timelines_path};\nuse crate::tenant::storage_layer::LayerName;\nuse crate::virtual_file;\nuse crate::virtual_file::owned_buffers_io::write::FlushTaskError;\nuse crate::virtual_file::{IoBufferMut, MaybeFatalIo, VirtualFile};\nuse crate::virtual_file::{TempVirtualFile, owned_buffers_io};\n\n///\n/// If 'metadata' is given, we will validate that the downloaded file's size matches that\n/// in the metadata. (In the future, we might do more cross-checks, like CRC validation)\n///\n/// Returns the size of the downloaded file.\n#[allow(clippy::too_many_arguments)]\npub async fn download_layer_file<'a>(\n    conf: &'static PageServerConf,\n    storage: &'a GenericRemoteStorage,\n    tenant_shard_id: TenantShardId,\n    timeline_id: TimelineId,\n    layer_file_name: &'a LayerName,\n    layer_metadata: &'a LayerFileMetadata,\n    local_path: &Utf8Path,\n    gate: &utils::sync::gate::Gate,\n    cancel: &CancellationToken,\n    ctx: &RequestContext,\n) -> Result<u64, DownloadError> {\n    debug_assert_current_span_has_tenant_and_timeline_id();\n\n    let timeline_path = conf.timeline_path(&tenant_shard_id, &timeline_id);\n\n    let remote_path = remote_layer_path(\n        &tenant_shard_id.tenant_id,\n        &timeline_id,\n        layer_metadata.shard,\n        layer_file_name,\n        layer_metadata.generation,\n    );\n\n    let (bytes_amount, temp_file) = download_retry(\n        || async {\n            // TempVirtualFile requires us to never reuse a filename while an old\n            // instance of TempVirtualFile created with that filename is not done dropping yet.\n            // So, we use a monotonic counter to disambiguate the filenames.\n            static NEXT_TEMP_DISAMBIGUATOR: AtomicU64 = AtomicU64::new(1);\n            let filename_disambiguator =\n                NEXT_TEMP_DISAMBIGUATOR.fetch_add(1, std::sync::atomic::Ordering::Relaxed);\n\n            let temp_file_path = path_with_suffix_extension(\n                local_path,\n                &format!(\"{filename_disambiguator:x}.{TEMP_DOWNLOAD_EXTENSION}\"),\n            );\n\n            let temp_file = TempVirtualFile::new(\n                VirtualFile::open_with_options_v2(\n                    &temp_file_path,\n                    virtual_file::OpenOptions::new()\n                        .create_new(true)\n                        .write(true),\n                    ctx,\n                )\n                .await\n                .with_context(|| format!(\"create a temp file for layer download: {temp_file_path}\"))\n                .map_err(DownloadError::Other)?,\n                gate.enter().map_err(|_| DownloadError::Cancelled)?,\n            );\n            download_object(storage, &remote_path, temp_file, gate, cancel, ctx).await\n        },\n        &format!(\"download {remote_path:?}\"),\n        cancel,\n    )\n    .await?;\n\n    let expected = layer_metadata.file_size;\n    if expected != bytes_amount {\n        return Err(DownloadError::Other(anyhow!(\n            \"According to layer file metadata should have downloaded {expected} bytes but downloaded {bytes_amount} bytes into file {:?}\",\n            temp_file.path()\n        )));\n    }\n\n    fail::fail_point!(\"remote-storage-download-pre-rename\", |_| {\n        Err(DownloadError::Other(anyhow!(\n            \"remote-storage-download-pre-rename failpoint triggered\"\n        )))\n    });\n\n    // Try rename before disarming the temp file.\n    // That way, if rename fails for whatever reason, we clean up the temp file on the return path.\n\n    fs::rename(temp_file.path(), &local_path)\n        .await\n        .with_context(|| format!(\"rename download layer file to {local_path}\"))\n        .map_err(DownloadError::Other)?;\n\n    // The temp file's VirtualFile points to the temp_file_path which we moved above.\n    // Drop it immediately, it's invalid.\n    // This will get better in https://github.com/neondatabase/neon/issues/11692\n    let _: VirtualFile = temp_file.disarm_into_inner();\n    // NB: The gate guard that was stored in `temp_file` is dropped but we continue\n    // to operate on it and on the parent timeline directory.\n    // Those operations are safe to do because higher-level code is holding another gate guard:\n    // - attached mode: the download task spawned by struct Layer is holding the gate guard\n    // - secondary mode: The TenantDownloader::download holds the gate open\n\n    // The rename above is not durable yet.\n    // It doesn't matter for crash consistency because pageserver startup deletes temp\n    // files and we'll re-download on demand if necessary.\n\n    // We use fatal_err() below because the after the rename above,\n    // the in-memory state of the filesystem already has the layer file in its final place,\n    // and subsequent pageserver code could think it's durable while it really isn't.\n    let work = {\n        let ctx = ctx.detached_child(ctx.task_kind(), ctx.download_behavior());\n        async move {\n            let timeline_dir = VirtualFile::open(&timeline_path, &ctx)\n                .await\n                .fatal_err(\"VirtualFile::open for timeline dir fsync\");\n            timeline_dir\n                .sync_all()\n                .await\n                .fatal_err(\"VirtualFile::sync_all timeline dir\");\n        }\n    };\n    crate::virtual_file::io_engine::get()\n        .spawn_blocking_and_block_on_if_std(work)\n        .await;\n\n    tracing::debug!(\"download complete: {local_path}\");\n\n    Ok(bytes_amount)\n}\n\n/// Download the object `src_path` in the remote `storage` to local path `dst_path`.\n///\n/// If Ok() is returned, the download succeeded and the inode & data have been made durable.\n/// (Note that the directory entry for the inode is not made durable.)\n/// The file size in bytes is returned.\n///\n/// If Err() is returned, there was some error. The file at `dst_path` has been unlinked.\n/// The unlinking has _not_ been made durable.\nasync fn download_object(\n    storage: &GenericRemoteStorage,\n    src_path: &RemotePath,\n    destination_file: TempVirtualFile,\n    gate: &utils::sync::gate::Gate,\n    cancel: &CancellationToken,\n    ctx: &RequestContext,\n) -> Result<(u64, TempVirtualFile), DownloadError> {\n    let mut download = storage\n        .download(src_path, &DownloadOpts::default(), cancel)\n        .await?;\n\n    pausable_failpoint!(\"before-downloading-layer-stream-pausable\");\n\n    let dst_path = destination_file.path().to_owned();\n    let mut buffered = owned_buffers_io::write::BufferedWriter::<IoBufferMut, _>::new(\n        destination_file,\n        0,\n        || IoBufferMut::with_capacity(super::BUFFER_SIZE),\n        gate.enter().map_err(|_| DownloadError::Cancelled)?,\n        cancel.child_token(),\n        ctx,\n        tracing::info_span!(parent: None, \"download_object_buffered_writer\", %dst_path),\n    );\n\n    // TODO: use vectored write (writev) once supported by tokio-epoll-uring.\n    // There's chunks_vectored() on the stream.\n    let (bytes_amount, destination_file) = async {\n        while let Some(res) = futures::StreamExt::next(&mut download.download_stream).await {\n            let chunk = match res {\n                Ok(chunk) => chunk,\n                Err(e) => return Err(DownloadError::from(e)),\n            };\n            buffered\n                .write_buffered_borrowed(&chunk, ctx)\n                .await\n                .map_err(|e| match e {\n                    FlushTaskError::Cancelled => DownloadError::Cancelled,\n                })?;\n        }\n        buffered\n            .shutdown(\n                owned_buffers_io::write::BufferedWriterShutdownMode::PadThenTruncate,\n                ctx,\n            )\n            .await\n            .map_err(|e| match e {\n                FlushTaskError::Cancelled => DownloadError::Cancelled,\n            })\n    }\n    .await?;\n\n    // not using sync_data because it can lose file size update\n    destination_file\n        .sync_all()\n        .await\n        .maybe_fatal_err(\"download_object sync_all\")\n        .with_context(|| format!(\"failed to fsync source file at {dst_path}\"))\n        .map_err(DownloadError::Other)?;\n\n    Ok((bytes_amount, destination_file))\n}\n\nconst TEMP_DOWNLOAD_EXTENSION: &str = \"temp_download\";\n\npub(crate) fn is_temp_download_file(path: &Utf8Path) -> bool {\n    let extension = path.extension();\n    match extension {\n        Some(TEMP_DOWNLOAD_EXTENSION) => true,\n        Some(_) => false,\n        None => false,\n    }\n}\n\nasync fn list_identifiers<T>(\n    storage: &GenericRemoteStorage,\n    prefix: RemotePath,\n    cancel: CancellationToken,\n) -> anyhow::Result<(HashSet<T>, HashSet<String>)>\nwhere\n    T: FromStr + Eq + std::hash::Hash,\n{\n    let listing = download_retry_forever(\n        || storage.list(Some(&prefix), ListingMode::WithDelimiter, None, &cancel),\n        &format!(\"list identifiers in prefix {prefix}\"),\n        &cancel,\n    )\n    .await?;\n\n    let mut parsed_ids = HashSet::new();\n    let mut other_prefixes = HashSet::new();\n\n    for id_remote_storage_key in listing.prefixes {\n        let object_name = id_remote_storage_key.object_name().ok_or_else(|| {\n            anyhow::anyhow!(\"failed to get object name for key {id_remote_storage_key}\")\n        })?;\n\n        match object_name.parse::<T>() {\n            Ok(t) => parsed_ids.insert(t),\n            Err(_) => other_prefixes.insert(object_name.to_string()),\n        };\n    }\n\n    for object in listing.keys {\n        let object_name = object\n            .key\n            .object_name()\n            .ok_or_else(|| anyhow::anyhow!(\"object name for key {}\", object.key))?;\n        other_prefixes.insert(object_name.to_string());\n    }\n\n    Ok((parsed_ids, other_prefixes))\n}\n\n/// List shards of given tenant in remote storage\npub(crate) async fn list_remote_tenant_shards(\n    storage: &GenericRemoteStorage,\n    tenant_id: TenantId,\n    cancel: CancellationToken,\n) -> anyhow::Result<(HashSet<TenantShardId>, HashSet<String>)> {\n    let remote_path = remote_tenant_path(&TenantShardId::unsharded(tenant_id));\n    list_identifiers::<TenantShardId>(storage, remote_path, cancel).await\n}\n\n/// List timelines of given tenant shard in remote storage\npub async fn list_remote_timelines(\n    storage: &GenericRemoteStorage,\n    tenant_shard_id: TenantShardId,\n    cancel: CancellationToken,\n) -> anyhow::Result<(HashSet<TimelineId>, HashSet<String>)> {\n    fail::fail_point!(\"storage-sync-list-remote-timelines\", |_| {\n        anyhow::bail!(\"storage-sync-list-remote-timelines\");\n    });\n\n    let remote_path = remote_timelines_path(&tenant_shard_id).add_trailing_slash();\n    list_identifiers::<TimelineId>(storage, remote_path, cancel).await\n}\n\nasync fn do_download_remote_path_retry_forever(\n    storage: &GenericRemoteStorage,\n    remote_path: &RemotePath,\n    download_opts: DownloadOpts,\n    cancel: &CancellationToken,\n) -> Result<(Vec<u8>, SystemTime), DownloadError> {\n    download_retry_forever(\n        || async {\n            let download = storage\n                .download(remote_path, &download_opts, cancel)\n                .await?;\n\n            let mut bytes = Vec::new();\n\n            let stream = download.download_stream;\n            let mut stream = StreamReader::new(stream);\n\n            tokio::io::copy_buf(&mut stream, &mut bytes).await?;\n\n            Ok((bytes, download.last_modified))\n        },\n        &format!(\"download {remote_path:?}\"),\n        cancel,\n    )\n    .await\n}\n\nasync fn do_download_tenant_manifest(\n    storage: &GenericRemoteStorage,\n    tenant_shard_id: &TenantShardId,\n    _timeline_id: Option<&TimelineId>,\n    generation: Generation,\n    cancel: &CancellationToken,\n) -> Result<(TenantManifest, Generation, SystemTime), DownloadError> {\n    let remote_path = remote_tenant_manifest_path(tenant_shard_id, generation);\n\n    let download_opts = DownloadOpts {\n        kind: DownloadKind::Small,\n        ..Default::default()\n    };\n\n    let (manifest_bytes, manifest_bytes_mtime) =\n        do_download_remote_path_retry_forever(storage, &remote_path, download_opts, cancel).await?;\n\n    let tenant_manifest = TenantManifest::from_json_bytes(&manifest_bytes)\n        .with_context(|| format!(\"deserialize tenant manifest file at {remote_path:?}\"))\n        .map_err(DownloadError::Other)?;\n\n    Ok((tenant_manifest, generation, manifest_bytes_mtime))\n}\n\nasync fn do_download_index_part(\n    storage: &GenericRemoteStorage,\n    tenant_shard_id: &TenantShardId,\n    timeline_id: Option<&TimelineId>,\n    index_generation: Generation,\n    cancel: &CancellationToken,\n) -> Result<(IndexPart, Generation, SystemTime), DownloadError> {\n    let timeline_id =\n        timeline_id.expect(\"A timeline ID is always provided when downloading an index\");\n    let remote_path = remote_index_path(tenant_shard_id, timeline_id, index_generation);\n\n    let download_opts = DownloadOpts {\n        kind: DownloadKind::Small,\n        ..Default::default()\n    };\n\n    let (index_part_bytes, index_part_mtime) =\n        do_download_remote_path_retry_forever(storage, &remote_path, download_opts, cancel).await?;\n\n    let index_part: IndexPart = serde_json::from_slice(&index_part_bytes)\n        .with_context(|| format!(\"deserialize index part file at {remote_path:?}\"))\n        .map_err(DownloadError::Other)?;\n\n    Ok((index_part, index_generation, index_part_mtime))\n}\n\n/// Metadata objects are \"generationed\", meaning that they include a generation suffix.  This\n/// function downloads the object with the highest generation <= `my_generation`.\n///\n/// Data objects (layer files) also include a generation in their path, but there is no equivalent\n/// search process, because their reference from an index includes the generation.\n///\n/// An expensive object listing operation is only done if necessary: the typical fast path is to issue two\n/// GET operations, one to our own generation (stale attachment case), and one to the immediately preceding\n/// generation (normal case when migrating/restarting).  Only if both of these return 404 do we fall back\n/// to listing objects.\n///\n/// * `my_generation`: the value of `[crate::tenant::TenantShard::generation]`\n/// * `what`: for logging, what object are we downloading\n/// * `prefix`: when listing objects, use this prefix (i.e. the part of the object path before the generation)\n/// * `do_download`: a GET of the object in a particular generation, which should **retry indefinitely** unless\n///                  `cancel`` has fired.  This function does not do its own retries of GET operations, and relies\n///                  on the function passed in to do so.\n/// * `parse_path`: parse a fully qualified remote storage path to get the generation of the object.\n#[allow(clippy::too_many_arguments)]\n#[tracing::instrument(skip_all, fields(generation=?my_generation))]\npub(crate) async fn download_generation_object<'a, T, DF, DFF, PF>(\n    storage: &'a GenericRemoteStorage,\n    tenant_shard_id: &'a TenantShardId,\n    timeline_id: Option<&'a TimelineId>,\n    my_generation: Generation,\n    what: &str,\n    prefix: RemotePath,\n    do_download: DF,\n    parse_path: PF,\n    cancel: &'a CancellationToken,\n) -> Result<(T, Generation, SystemTime), DownloadError>\nwhere\n    DF: Fn(\n        &'a GenericRemoteStorage,\n        &'a TenantShardId,\n        Option<&'a TimelineId>,\n        Generation,\n        &'a CancellationToken,\n    ) -> DFF,\n    DFF: Future<Output = Result<(T, Generation, SystemTime), DownloadError>>,\n    PF: Fn(RemotePath) -> Option<Generation>,\n    T: 'static,\n{\n    debug_assert_current_span_has_tenant_id();\n\n    if my_generation.is_none() {\n        // Operating without generations: just fetch the generation-less path\n        return do_download(storage, tenant_shard_id, timeline_id, my_generation, cancel).await;\n    }\n\n    // Stale case: If we were intentionally attached in a stale generation, the remote object may already\n    // exist in our generation.\n    //\n    // This is an optimization to avoid doing the listing for the general case below.\n    let res = do_download(storage, tenant_shard_id, timeline_id, my_generation, cancel).await;\n    match res {\n        Ok(decoded) => {\n            tracing::debug!(\"Found {what} from current generation (this is a stale attachment)\");\n            return Ok(decoded);\n        }\n        Err(DownloadError::NotFound) => {}\n        Err(e) => return Err(e),\n    };\n\n    // Typical case: the previous generation of this tenant was running healthily, and had uploaded the object\n    // we are seeking in that generation.  We may safely start from this index without doing a listing, because:\n    //  - We checked for current generation case above\n    //  - generations > my_generation are to be ignored\n    //  - any other objects that exist would have an older generation than `previous_gen`, and\n    //    we want to find the most recent object from a previous generation.\n    //\n    // This is an optimization to avoid doing the listing for the general case below.\n    let res = do_download(\n        storage,\n        tenant_shard_id,\n        timeline_id,\n        my_generation.previous(),\n        cancel,\n    )\n    .await;\n    match res {\n        Ok(decoded) => {\n            tracing::debug!(\"Found {what} from previous generation\");\n            return Ok(decoded);\n        }\n        Err(DownloadError::NotFound) => {\n            tracing::debug!(\"No {what} found from previous generation, falling back to listing\");\n        }\n        Err(e) => {\n            return Err(e);\n        }\n    }\n\n    // General case/fallback: if there is no index at my_generation or prev_generation, then list all index_part.json\n    // objects, and select the highest one with a generation <= my_generation.  Constructing the prefix is equivalent\n    // to constructing a full index path with no generation, because the generation is a suffix.\n    let paths = download_retry(\n        || async {\n            storage\n                .list(Some(&prefix), ListingMode::NoDelimiter, None, cancel)\n                .await\n        },\n        \"list index_part files\",\n        cancel,\n    )\n    .await?\n    .keys;\n\n    // General case logic for which index to use: the latest index whose generation\n    // is <= our own.  See \"Finding the remote indices for timelines\" in docs/rfcs/025-generation-numbers.md\n    let max_previous_generation = paths\n        .into_iter()\n        .filter_map(|o| parse_path(o.key))\n        .filter(|g| g <= &my_generation)\n        .max();\n\n    match max_previous_generation {\n        Some(g) => {\n            tracing::debug!(\"Found {what} in generation {g:?}\");\n            do_download(storage, tenant_shard_id, timeline_id, g, cancel).await\n        }\n        None => {\n            // Migration from legacy pre-generation state: we have a generation but no prior\n            // attached pageservers did.  Try to load from a no-generation path.\n            tracing::debug!(\"No {what}* found\");\n            do_download(\n                storage,\n                tenant_shard_id,\n                timeline_id,\n                Generation::none(),\n                cancel,\n            )\n            .await\n        }\n    }\n}\n\n/// index_part.json objects are suffixed with a generation number, so we cannot\n/// directly GET the latest index part without doing some probing.\n///\n/// In this function we probe for the most recent index in a generation <= our current generation.\n/// See \"Finding the remote indices for timelines\" in docs/rfcs/025-generation-numbers.md\npub(crate) async fn download_index_part(\n    storage: &GenericRemoteStorage,\n    tenant_shard_id: &TenantShardId,\n    timeline_id: &TimelineId,\n    my_generation: Generation,\n    cancel: &CancellationToken,\n) -> Result<(IndexPart, Generation, SystemTime), DownloadError> {\n    debug_assert_current_span_has_tenant_and_timeline_id();\n\n    let index_prefix = remote_index_path(tenant_shard_id, timeline_id, Generation::none());\n    download_generation_object(\n        storage,\n        tenant_shard_id,\n        Some(timeline_id),\n        my_generation,\n        \"index_part\",\n        index_prefix,\n        do_download_index_part,\n        parse_remote_index_path,\n        cancel,\n    )\n    .await\n}\n\npub(crate) async fn download_tenant_manifest(\n    storage: &GenericRemoteStorage,\n    tenant_shard_id: &TenantShardId,\n    my_generation: Generation,\n    cancel: &CancellationToken,\n) -> Result<(TenantManifest, Generation, SystemTime), DownloadError> {\n    let manifest_prefix = remote_tenant_manifest_prefix(tenant_shard_id);\n\n    download_generation_object(\n        storage,\n        tenant_shard_id,\n        None,\n        my_generation,\n        \"tenant-manifest\",\n        manifest_prefix,\n        do_download_tenant_manifest,\n        parse_remote_tenant_manifest_path,\n        cancel,\n    )\n    .await\n}\n\npub(crate) async fn download_initdb_tar_zst(\n    conf: &'static PageServerConf,\n    storage: &GenericRemoteStorage,\n    tenant_shard_id: &TenantShardId,\n    timeline_id: &TimelineId,\n    cancel: &CancellationToken,\n) -> Result<(Utf8PathBuf, File), DownloadError> {\n    debug_assert_current_span_has_tenant_and_timeline_id();\n\n    let remote_path = remote_initdb_archive_path(&tenant_shard_id.tenant_id, timeline_id);\n\n    let remote_preserved_path =\n        remote_initdb_preserved_archive_path(&tenant_shard_id.tenant_id, timeline_id);\n\n    let timeline_path = conf.timelines_path(tenant_shard_id);\n\n    if !timeline_path.exists() {\n        tokio::fs::create_dir_all(&timeline_path)\n            .await\n            .with_context(|| format!(\"timeline dir creation {timeline_path}\"))\n            .map_err(DownloadError::Other)?;\n    }\n    let temp_path = timeline_path.join(format!(\n        \"{INITDB_PATH}.download-{timeline_id}.{TEMP_FILE_SUFFIX}\"\n    ));\n\n    let file = download_retry(\n        || async {\n            let file = OpenOptions::new()\n                .create(true)\n                .truncate(true)\n                .read(true)\n                .write(true)\n                .open(&temp_path)\n                .await\n                .with_context(|| format!(\"tempfile creation {temp_path}\"))\n                .map_err(DownloadError::Other)?;\n\n            let download = match storage\n                .download(&remote_path, &DownloadOpts::default(), cancel)\n                .await\n            {\n                Ok(dl) => dl,\n                Err(DownloadError::NotFound) => {\n                    storage\n                        .download(&remote_preserved_path, &DownloadOpts::default(), cancel)\n                        .await?\n                }\n                Err(other) => Err(other)?,\n            };\n            let mut download = tokio_util::io::StreamReader::new(download.download_stream);\n            let mut writer = tokio::io::BufWriter::with_capacity(super::BUFFER_SIZE, file);\n\n            tokio::io::copy_buf(&mut download, &mut writer).await?;\n\n            let mut file = writer.into_inner();\n\n            file.seek(std::io::SeekFrom::Start(0))\n                .await\n                .with_context(|| format!(\"rewinding initdb.tar.zst at: {remote_path:?}\"))\n                .map_err(DownloadError::Other)?;\n\n            Ok(file)\n        },\n        &format!(\"download {remote_path}\"),\n        cancel,\n    )\n    .await\n    .inspect_err(|_e| {\n        // Do a best-effort attempt at deleting the temporary file upon encountering an error.\n        // We don't have async here nor do we want to pile on any extra errors.\n        if let Err(e) = std::fs::remove_file(&temp_path) {\n            if e.kind() != std::io::ErrorKind::NotFound {\n                warn!(\"error deleting temporary file {temp_path}: {e}\");\n            }\n        }\n    })?;\n\n    Ok((temp_path, file))\n}\n\n/// Helper function to handle retries for a download operation.\n///\n/// Remote operations can fail due to rate limits (S3), spurious network\n/// problems, or other external reasons. Retry FAILED_DOWNLOAD_RETRIES times,\n/// with backoff.\n///\n/// (See similar logic for uploads in `perform_upload_task`)\npub(super) async fn download_retry<T, O, F>(\n    op: O,\n    description: &str,\n    cancel: &CancellationToken,\n) -> Result<T, DownloadError>\nwhere\n    O: FnMut() -> F,\n    F: Future<Output = Result<T, DownloadError>>,\n{\n    backoff::retry(\n        op,\n        DownloadError::is_permanent,\n        FAILED_DOWNLOAD_WARN_THRESHOLD,\n        FAILED_REMOTE_OP_RETRIES,\n        description,\n        cancel,\n    )\n    .await\n    .ok_or_else(|| DownloadError::Cancelled)\n    .and_then(|x| x)\n}\n\npub(crate) async fn download_retry_forever<T, O, F>(\n    op: O,\n    description: &str,\n    cancel: &CancellationToken,\n) -> Result<T, DownloadError>\nwhere\n    O: FnMut() -> F,\n    F: Future<Output = Result<T, DownloadError>>,\n{\n    backoff::retry(\n        op,\n        DownloadError::is_permanent,\n        FAILED_DOWNLOAD_WARN_THRESHOLD,\n        u32::MAX,\n        description,\n        cancel,\n    )\n    .await\n    .ok_or_else(|| DownloadError::Cancelled)\n    .and_then(|x| x)\n}\n"
  },
  {
    "path": "pageserver/src/tenant/remote_timeline_client/index.rs",
    "content": "//! In-memory index to track the tenant files on the remote storage.\n//!\n//! Able to restore itself from the storage index parts, that are located in every timeline's remote directory and contain all data about\n//! remote timeline layers and its metadata.\n\nuse std::collections::HashMap;\n\nuse chrono::NaiveDateTime;\nuse pageserver_api::models::AuxFilePolicy;\nuse pageserver_api::models::RelSizeMigration;\nuse pageserver_api::shard::ShardIndex;\nuse serde::{Deserialize, Serialize};\nuse utils::id::TimelineId;\nuse utils::lsn::Lsn;\n\nuse super::is_same_remote_layer_path;\nuse crate::tenant::Generation;\nuse crate::tenant::metadata::TimelineMetadata;\nuse crate::tenant::storage_layer::LayerName;\nuse crate::tenant::timeline::import_pgdata;\n\n/// In-memory representation of an `index_part.json` file\n///\n/// Contains the data about all files in the timeline, present remotely and its metadata.\n///\n/// This type needs to be backwards and forwards compatible. When changing the fields,\n/// remember to add a test case for the changed version.\n#[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize)]\npub struct IndexPart {\n    /// Debugging aid describing the version of this type.\n    #[serde(default)]\n    version: usize,\n\n    #[serde(default)]\n    #[serde(skip_serializing_if = \"Option::is_none\")]\n    pub deleted_at: Option<NaiveDateTime>,\n\n    #[serde(default)]\n    #[serde(skip_serializing_if = \"Option::is_none\")]\n    pub archived_at: Option<NaiveDateTime>,\n\n    /// This field supports import-from-pgdata (\"fast imports\" platform feature).\n    /// We don't currently use fast imports, so, this field is None for all production timelines.\n    /// See <https://github.com/neondatabase/neon/pull/9218> for more information.\n    #[serde(default)]\n    #[serde(skip_serializing_if = \"Option::is_none\")]\n    pub import_pgdata: Option<import_pgdata::index_part_format::Root>,\n\n    /// Layer filenames and metadata. For an index persisted in remote storage, all layers must\n    /// exist in remote storage.\n    pub layer_metadata: HashMap<LayerName, LayerFileMetadata>,\n\n    /// Because of the trouble of eyeballing the legacy \"metadata\" field, we copied the\n    /// \"disk_consistent_lsn\" out. After version 7 this is no longer needed, but the name cannot be\n    /// reused.\n    pub(super) disk_consistent_lsn: Lsn,\n\n    // TODO: rename as \"metadata\" next week, keep the alias = \"metadata_bytes\", bump version Adding\n    // the \"alias = metadata\" was forgotten in #7693, so we have to use \"rewrite = metadata_bytes\"\n    // for backwards compatibility.\n    #[serde(\n        rename = \"metadata_bytes\",\n        alias = \"metadata\",\n        with = \"crate::tenant::metadata::modern_serde\"\n    )]\n    pub metadata: TimelineMetadata,\n\n    #[serde(default)]\n    pub(crate) lineage: Lineage,\n\n    #[serde(skip_serializing_if = \"Option::is_none\", default)]\n    pub(crate) gc_blocking: Option<GcBlocking>,\n\n    /// Describes the kind of aux files stored in the timeline.\n    ///\n    /// The value is modified during file ingestion when the latest wanted value communicated via tenant config is applied if it is acceptable.\n    /// A V1 setting after V2 files have been committed is not accepted.\n    ///\n    /// None means no aux files have been written to the storage before the point\n    /// when this flag is introduced.\n    ///\n    /// This flag is not used any more as all tenants have been transitioned to the new aux file policy.\n    #[serde(skip_serializing_if = \"Option::is_none\", default)]\n    pub(crate) last_aux_file_policy: Option<AuxFilePolicy>,\n\n    #[serde(skip_serializing_if = \"Option::is_none\", default)]\n    pub(crate) rel_size_migration: Option<RelSizeMigration>,\n\n    /// Not used anymore -- kept here for backwards compatibility. Merged into the `gc_compaction` field.\n    #[serde(skip_serializing_if = \"Option::is_none\", default)]\n    l2_lsn: Option<Lsn>,\n\n    /// State for the garbage-collecting compaction pass.\n    ///\n    /// Garbage-collecting compaction (gc-compaction) prunes `Value`s that are outside\n    /// the PITR window and not needed by child timelines.\n    ///\n    /// A commonly used synonym for this compaction pass is\n    /// \"bottommost-compaction\"  because the affected LSN range\n    /// is the \"bottom\" of the (key,lsn) map.\n    ///\n    /// Gc-compaction is a quite expensive operation; that's why we use\n    /// trigger condition.\n    /// This field here holds the state pertaining to that trigger condition\n    /// and (in future) to the progress of the gc-compaction, so that it's\n    /// resumable across restarts & migrations.\n    ///\n    /// Note that the underlying algorithm is _also_ called `gc-compaction`\n    /// in most places & design docs; but in fact it is more flexible than\n    /// just the specific use case here; it needs a new name.\n    #[serde(skip_serializing_if = \"Option::is_none\", default)]\n    pub(crate) gc_compaction: Option<GcCompactionState>,\n\n    /// The timestamp when the timeline was marked invisible in synthetic size calculations.\n    #[serde(skip_serializing_if = \"Option::is_none\", default)]\n    pub(crate) marked_invisible_at: Option<NaiveDateTime>,\n\n    /// The LSN at which we started the rel size migration. Accesses below this LSN should be\n    /// processed with the v1 read path. Usually this LSN should be set together with `rel_size_migration`.\n    #[serde(skip_serializing_if = \"Option::is_none\", default)]\n    pub(crate) rel_size_migrated_at: Option<Lsn>,\n}\n\n#[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize)]\npub struct GcCompactionState {\n    /// The upper bound of the last completed garbage-collecting compaction, aka. L2 LSN.\n    pub(crate) last_completed_lsn: Lsn,\n}\n\nimpl IndexPart {\n    /// When adding or modifying any parts of `IndexPart`, increment the version so that it can be\n    /// used to understand later versions.\n    ///\n    /// Version is currently informative only.\n    /// Version history\n    /// - 2: added `deleted_at`\n    /// - 3: no longer deserialize `timeline_layers` (serialized format is the same, but timeline_layers\n    ///   is always generated from the keys of `layer_metadata`)\n    /// - 4: timeline_layers is fully removed.\n    /// - 5: lineage was added\n    /// - 6: last_aux_file_policy is added.\n    /// - 7: metadata_bytes is no longer written, but still read\n    /// - 8: added `archived_at`\n    /// - 9: +gc_blocking\n    /// - 10: +import_pgdata\n    /// - 11: +rel_size_migration\n    /// - 12: +l2_lsn\n    /// - 13: +gc_compaction\n    /// - 14: +marked_invisible_at\n    /// - 15: +rel_size_migrated_at\n    const LATEST_VERSION: usize = 15;\n\n    // Versions we may see when reading from a bucket.\n    pub const KNOWN_VERSIONS: &'static [usize] =\n        &[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15];\n\n    pub const FILE_NAME: &'static str = \"index_part.json\";\n\n    pub fn empty(metadata: TimelineMetadata) -> Self {\n        IndexPart {\n            version: Self::LATEST_VERSION,\n            layer_metadata: Default::default(),\n            disk_consistent_lsn: metadata.disk_consistent_lsn(),\n            metadata,\n            deleted_at: None,\n            archived_at: None,\n            lineage: Default::default(),\n            gc_blocking: None,\n            last_aux_file_policy: None,\n            import_pgdata: None,\n            rel_size_migration: None,\n            l2_lsn: None,\n            gc_compaction: None,\n            marked_invisible_at: None,\n            rel_size_migrated_at: None,\n        }\n    }\n\n    pub fn version(&self) -> usize {\n        self.version\n    }\n\n    /// If you want this under normal operations, read it from self.metadata:\n    /// this method is just for the scrubber to use when validating an index.\n    pub fn duplicated_disk_consistent_lsn(&self) -> Lsn {\n        self.disk_consistent_lsn\n    }\n\n    pub fn from_json_bytes(bytes: &[u8]) -> Result<Self, serde_json::Error> {\n        serde_json::from_slice::<IndexPart>(bytes)\n    }\n\n    pub fn to_json_bytes(&self) -> serde_json::Result<Vec<u8>> {\n        serde_json::to_vec(self)\n    }\n\n    #[cfg(test)]\n    pub(crate) fn example() -> Self {\n        Self::empty(TimelineMetadata::example())\n    }\n\n    /// Returns true if the index contains a reference to the given layer (i.e. file path).\n    ///\n    /// TODO: there should be a variant of LayerName for the physical remote path that contains\n    /// information about the shard and generation, to avoid passing in metadata.\n    pub fn references(&self, name: &LayerName, metadata: &LayerFileMetadata) -> bool {\n        let Some(index_metadata) = self.layer_metadata.get(name) else {\n            return false;\n        };\n        is_same_remote_layer_path(name, metadata, name, index_metadata)\n    }\n\n    /// Check for invariants in the index: this is useful when uploading an index to ensure that if\n    /// we encounter a bug, we do not persist buggy metadata.\n    pub(crate) fn validate(&self) -> Result<(), String> {\n        if self.import_pgdata.is_none()\n            && self.metadata.ancestor_timeline().is_none()\n            && self.layer_metadata.is_empty()\n        {\n            // Unless we're in the middle of a raw pgdata import, or this is a child timeline,the index must\n            // always have at least one layer.\n            return Err(\"Index has no ancestor and no layers\".to_string());\n        }\n\n        Ok(())\n    }\n}\n\n/// Metadata gathered for each of the layer files.\n///\n/// Fields have to be `Option`s because remote [`IndexPart`]'s can be from different version, which\n/// might have less or more metadata depending if upgrading or rolling back an upgrade.\n#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]\npub struct LayerFileMetadata {\n    pub file_size: u64,\n\n    #[serde(default = \"Generation::none\")]\n    #[serde(skip_serializing_if = \"Generation::is_none\")]\n    pub generation: Generation,\n\n    #[serde(default = \"ShardIndex::unsharded\")]\n    #[serde(skip_serializing_if = \"ShardIndex::is_unsharded\")]\n    pub shard: ShardIndex,\n}\n\nimpl LayerFileMetadata {\n    pub fn new(file_size: u64, generation: Generation, shard: ShardIndex) -> Self {\n        LayerFileMetadata {\n            file_size,\n            generation,\n            shard,\n        }\n    }\n    /// Helper to get both generation and file size in a tuple\n    pub fn generation_file_size(&self) -> (Generation, u64) {\n        (self.generation, self.file_size)\n    }\n}\n\n/// Limited history of earlier ancestors.\n///\n/// A timeline can have more than 1 earlier ancestor, in the rare case that it was repeatedly\n/// reparented by having an later timeline be detached from it's ancestor.\n#[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize, Default)]\npub(crate) struct Lineage {\n    /// Has the `reparenting_history` been truncated to [`Lineage::REMEMBER_AT_MOST`].\n    #[serde(skip_serializing_if = \"is_false\", default)]\n    reparenting_history_truncated: bool,\n\n    /// Earlier ancestors, truncated when [`Self::reparenting_history_truncated`]\n    ///\n    /// These are stored in case we want to support WAL based DR on the timeline. There can be many\n    /// of these and at most one [`Self::original_ancestor`]. There cannot be more reparentings\n    /// after [`Self::original_ancestor`] has been set.\n    #[serde(skip_serializing_if = \"Vec::is_empty\", default)]\n    reparenting_history: Vec<TimelineId>,\n\n    /// The ancestor from which this timeline has been detached from and when.\n    ///\n    /// If you are adding support for detaching from a hierarchy, consider changing the ancestry\n    /// into a `Vec<(TimelineId, Lsn)>` to be a path instead.\n    // FIXME: this is insufficient even for path of two timelines for future wal recovery\n    // purposes:\n    //\n    // assuming a \"old main\" which has received most of the WAL, and has a branch \"new main\",\n    // starting a bit before \"old main\" last_record_lsn. the current version works fine,\n    // because we will know to replay wal and branch at the recorded Lsn to do wal recovery.\n    //\n    // then assuming \"new main\" would similarly receive a branch right before its last_record_lsn,\n    // \"new new main\". the current implementation would just store (\"new main\", ancestor_lsn, _)\n    // here. however, we cannot recover from WAL using only that information, we would need the\n    // whole ancestry here:\n    //\n    // ```json\n    // [\n    //   [\"old main\", ancestor_lsn(\"new main\"), _],\n    //   [\"new main\", ancestor_lsn(\"new new main\"), _]\n    // ]\n    // ```\n    #[serde(skip_serializing_if = \"Option::is_none\", default)]\n    original_ancestor: Option<(TimelineId, Lsn, NaiveDateTime)>,\n}\n\nfn is_false(b: &bool) -> bool {\n    !b\n}\n\nimpl Lineage {\n    const REMEMBER_AT_MOST: usize = 100;\n\n    pub(crate) fn record_previous_ancestor(&mut self, old_ancestor: &TimelineId) -> bool {\n        if self.reparenting_history.last() == Some(old_ancestor) {\n            // do not re-record it\n            false\n        } else {\n            #[cfg(feature = \"testing\")]\n            {\n                let existing = self\n                    .reparenting_history\n                    .iter()\n                    .position(|x| x == old_ancestor);\n                assert_eq!(\n                    existing, None,\n                    \"we cannot reparent onto and off and onto the same timeline twice\"\n                );\n            }\n            let drop_oldest = self.reparenting_history.len() + 1 >= Self::REMEMBER_AT_MOST;\n\n            self.reparenting_history_truncated |= drop_oldest;\n            if drop_oldest {\n                self.reparenting_history.remove(0);\n            }\n            self.reparenting_history.push(*old_ancestor);\n            true\n        }\n    }\n\n    /// Returns true if anything changed.\n    pub(crate) fn record_detaching(&mut self, branchpoint: &(TimelineId, Lsn)) -> bool {\n        if let Some((id, lsn, _)) = self.original_ancestor {\n            assert_eq!(\n                &(id, lsn),\n                branchpoint,\n                \"detaching attempt has to be for the same ancestor we are already detached from\"\n            );\n            false\n        } else {\n            self.original_ancestor =\n                Some((branchpoint.0, branchpoint.1, chrono::Utc::now().naive_utc()));\n            true\n        }\n    }\n\n    /// The queried lsn is most likely the basebackup lsn, and this answers question \"is it allowed\n    /// to start a read/write primary at this lsn\".\n    ///\n    /// Returns true if the Lsn was previously our branch point.\n    pub(crate) fn is_previous_ancestor_lsn(&self, lsn: Lsn) -> bool {\n        self.original_ancestor\n            .is_some_and(|(_, ancestor_lsn, _)| ancestor_lsn == lsn)\n    }\n\n    /// Returns true if the timeline originally had an ancestor, and no longer has one.\n    pub(crate) fn is_detached_from_ancestor(&self) -> bool {\n        self.original_ancestor.is_some()\n    }\n\n    /// Returns original ancestor timeline id and lsn that this timeline has been detached from.\n    pub(crate) fn detached_previous_ancestor(&self) -> Option<(TimelineId, Lsn)> {\n        self.original_ancestor.map(|(id, lsn, _)| (id, lsn))\n    }\n\n    pub(crate) fn is_reparented(&self) -> bool {\n        !self.reparenting_history.is_empty()\n    }\n}\n\n#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]\npub(crate) struct GcBlocking {\n    pub(crate) started_at: NaiveDateTime,\n    pub(crate) reasons: enumset::EnumSet<GcBlockingReason>,\n}\n\n#[derive(Debug, enumset::EnumSetType, serde::Serialize, serde::Deserialize)]\n#[enumset(serialize_repr = \"list\")]\npub(crate) enum GcBlockingReason {\n    Manual,\n    DetachAncestor,\n}\n\nimpl GcBlocking {\n    pub(super) fn started_now_for(reason: GcBlockingReason) -> Self {\n        GcBlocking {\n            started_at: chrono::Utc::now().naive_utc(),\n            reasons: enumset::EnumSet::only(reason),\n        }\n    }\n\n    /// Returns true if the given reason is one of the reasons why the gc is blocked.\n    pub(crate) fn blocked_by(&self, reason: GcBlockingReason) -> bool {\n        self.reasons.contains(reason)\n    }\n\n    /// Returns a version of self with the given reason.\n    pub(super) fn with_reason(&self, reason: GcBlockingReason) -> Self {\n        assert!(!self.blocked_by(reason));\n        let mut reasons = self.reasons;\n        reasons.insert(reason);\n\n        Self {\n            started_at: self.started_at,\n            reasons,\n        }\n    }\n\n    /// Returns a version of self without the given reason. Assumption is that if\n    /// there are no more reasons, we can unblock the gc by returning `None`.\n    pub(super) fn without_reason(&self, reason: GcBlockingReason) -> Option<Self> {\n        assert!(self.blocked_by(reason));\n\n        if self.reasons.len() == 1 {\n            None\n        } else {\n            let mut reasons = self.reasons;\n            assert!(reasons.remove(reason));\n            assert!(!reasons.is_empty());\n\n            Some(Self {\n                started_at: self.started_at,\n                reasons,\n            })\n        }\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use postgres_ffi::PgMajorVersion;\n    use std::str::FromStr;\n    use utils::id::TimelineId;\n\n    use super::*;\n\n    #[test]\n    fn v1_indexpart_is_parsed() {\n        let example = r#\"{\n            \"version\":1,\n            \"timeline_layers\":[\"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9\"],\n            \"layer_metadata\":{\n                \"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9\": { \"file_size\": 25600000 },\n                \"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51\": { \"file_size\": 9007199254741001 }\n            },\n            \"disk_consistent_lsn\":\"0/16960E8\",\n            \"metadata_bytes\":[113,11,159,210,0,54,0,4,0,0,0,0,1,105,96,232,1,0,0,0,0,1,105,96,112,0,0,0,0,0,0,0,0,0,0,0,0,0,1,105,96,112,0,0,0,0,1,105,96,112,0,0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]\n        }\"#;\n\n        let expected = IndexPart {\n            // note this is not verified, could be anything, but exists for humans debugging.. could be the git version instead?\n            version: 1,\n            layer_metadata: HashMap::from([\n                (\"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9\".parse().unwrap(), LayerFileMetadata {\n                    file_size: 25600000,\n                    generation: Generation::none(),\n                    shard: ShardIndex::unsharded()\n                }),\n                (\"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51\".parse().unwrap(), LayerFileMetadata {\n                    // serde_json should always parse this but this might be a double with jq for\n                    // example.\n                    file_size: 9007199254741001,\n                    generation: Generation::none(),\n                    shard: ShardIndex::unsharded()\n                })\n            ]),\n            disk_consistent_lsn: \"0/16960E8\".parse::<Lsn>().unwrap(),\n            metadata: TimelineMetadata::from_bytes(&[113,11,159,210,0,54,0,4,0,0,0,0,1,105,96,232,1,0,0,0,0,1,105,96,112,0,0,0,0,0,0,0,0,0,0,0,0,0,1,105,96,112,0,0,0,0,1,105,96,112,0,0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]).unwrap(),\n            deleted_at: None,\n            archived_at: None,\n            lineage: Lineage::default(),\n            gc_blocking: None,\n            last_aux_file_policy: None,\n            import_pgdata: None,\n            rel_size_migration: None,\n            l2_lsn: None,\n            gc_compaction: None,\n            marked_invisible_at: None,\n            rel_size_migrated_at: None,\n        };\n\n        let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();\n        assert_eq!(part, expected);\n    }\n\n    #[test]\n    fn v1_indexpart_is_parsed_with_optional_missing_layers() {\n        let example = r#\"{\n            \"version\":1,\n            \"timeline_layers\":[\"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9\"],\n            \"missing_layers\":[\"This shouldn't fail deserialization\"],\n            \"layer_metadata\":{\n                \"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9\": { \"file_size\": 25600000 },\n                \"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51\": { \"file_size\": 9007199254741001 }\n            },\n            \"disk_consistent_lsn\":\"0/16960E8\",\n            \"metadata_bytes\":[113,11,159,210,0,54,0,4,0,0,0,0,1,105,96,232,1,0,0,0,0,1,105,96,112,0,0,0,0,0,0,0,0,0,0,0,0,0,1,105,96,112,0,0,0,0,1,105,96,112,0,0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]\n        }\"#;\n\n        let expected = IndexPart {\n            // note this is not verified, could be anything, but exists for humans debugging.. could be the git version instead?\n            version: 1,\n            layer_metadata: HashMap::from([\n                (\"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9\".parse().unwrap(), LayerFileMetadata {\n                    file_size: 25600000,\n                    generation: Generation::none(),\n                    shard: ShardIndex::unsharded()\n                }),\n                (\"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51\".parse().unwrap(), LayerFileMetadata {\n                    // serde_json should always parse this but this might be a double with jq for\n                    // example.\n                    file_size: 9007199254741001,\n                    generation: Generation::none(),\n                    shard: ShardIndex::unsharded()\n                })\n            ]),\n            disk_consistent_lsn: \"0/16960E8\".parse::<Lsn>().unwrap(),\n            metadata: TimelineMetadata::from_bytes(&[113,11,159,210,0,54,0,4,0,0,0,0,1,105,96,232,1,0,0,0,0,1,105,96,112,0,0,0,0,0,0,0,0,0,0,0,0,0,1,105,96,112,0,0,0,0,1,105,96,112,0,0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]).unwrap(),\n            deleted_at: None,\n            archived_at: None,\n            lineage: Lineage::default(),\n            gc_blocking: None,\n            last_aux_file_policy: None,\n            import_pgdata: None,\n            rel_size_migration: None,\n            l2_lsn: None,\n            gc_compaction: None,\n            marked_invisible_at: None,\n            rel_size_migrated_at: None,\n        };\n\n        let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();\n        assert_eq!(part, expected);\n    }\n\n    #[test]\n    fn v2_indexpart_is_parsed_with_deleted_at() {\n        let example = r#\"{\n            \"version\":2,\n            \"timeline_layers\":[\"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9\"],\n            \"missing_layers\":[\"This shouldn't fail deserialization\"],\n            \"layer_metadata\":{\n                \"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9\": { \"file_size\": 25600000 },\n                \"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51\": { \"file_size\": 9007199254741001 }\n            },\n            \"disk_consistent_lsn\":\"0/16960E8\",\n            \"metadata_bytes\":[113,11,159,210,0,54,0,4,0,0,0,0,1,105,96,232,1,0,0,0,0,1,105,96,112,0,0,0,0,0,0,0,0,0,0,0,0,0,1,105,96,112,0,0,0,0,1,105,96,112,0,0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],\n            \"deleted_at\": \"2023-07-31T09:00:00.123\"\n        }\"#;\n\n        let expected = IndexPart {\n            // note this is not verified, could be anything, but exists for humans debugging.. could be the git version instead?\n            version: 2,\n            layer_metadata: HashMap::from([\n                (\"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9\".parse().unwrap(), LayerFileMetadata {\n                    file_size: 25600000,\n                    generation: Generation::none(),\n                    shard: ShardIndex::unsharded()\n                }),\n                (\"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51\".parse().unwrap(), LayerFileMetadata {\n                    // serde_json should always parse this but this might be a double with jq for\n                    // example.\n                    file_size: 9007199254741001,\n                    generation: Generation::none(),\n                    shard: ShardIndex::unsharded()\n                })\n            ]),\n            disk_consistent_lsn: \"0/16960E8\".parse::<Lsn>().unwrap(),\n            metadata: TimelineMetadata::from_bytes(&[113,11,159,210,0,54,0,4,0,0,0,0,1,105,96,232,1,0,0,0,0,1,105,96,112,0,0,0,0,0,0,0,0,0,0,0,0,0,1,105,96,112,0,0,0,0,1,105,96,112,0,0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]).unwrap(),\n            deleted_at: Some(parse_naive_datetime(\"2023-07-31T09:00:00.123000000\")),\n            archived_at: None,\n            lineage: Lineage::default(),\n            gc_blocking: None,\n            last_aux_file_policy: None,\n            import_pgdata: None,\n            rel_size_migration: None,\n            l2_lsn: None,\n            gc_compaction: None,\n            marked_invisible_at: None,\n            rel_size_migrated_at: None,\n        };\n\n        let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();\n        assert_eq!(part, expected);\n    }\n\n    #[test]\n    fn empty_layers_are_parsed() {\n        let empty_layers_json = r#\"{\n            \"version\":1,\n            \"timeline_layers\":[],\n            \"layer_metadata\":{},\n            \"disk_consistent_lsn\":\"0/2532648\",\n            \"metadata_bytes\":[136,151,49,208,0,70,0,4,0,0,0,0,2,83,38,72,1,0,0,0,0,2,83,38,32,1,87,198,240,135,97,119,45,125,38,29,155,161,140,141,255,210,0,0,0,0,2,83,38,72,0,0,0,0,1,73,240,192,0,0,0,0,1,73,240,192,0,0,0,15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]\n        }\"#;\n\n        let expected = IndexPart {\n            version: 1,\n            layer_metadata: HashMap::new(),\n            disk_consistent_lsn: \"0/2532648\".parse::<Lsn>().unwrap(),\n            metadata: TimelineMetadata::from_bytes(&[\n                136, 151, 49, 208, 0, 70, 0, 4, 0, 0, 0, 0, 2, 83, 38, 72, 1, 0, 0, 0, 0, 2, 83,\n                38, 32, 1, 87, 198, 240, 135, 97, 119, 45, 125, 38, 29, 155, 161, 140, 141, 255,\n                210, 0, 0, 0, 0, 2, 83, 38, 72, 0, 0, 0, 0, 1, 73, 240, 192, 0, 0, 0, 0, 1, 73,\n                240, 192, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n                0, 0,\n            ])\n            .unwrap(),\n            deleted_at: None,\n            archived_at: None,\n            lineage: Lineage::default(),\n            gc_blocking: None,\n            last_aux_file_policy: None,\n            import_pgdata: None,\n            rel_size_migration: None,\n            l2_lsn: None,\n            gc_compaction: None,\n            marked_invisible_at: None,\n            rel_size_migrated_at: None,\n        };\n\n        let empty_layers_parsed = IndexPart::from_json_bytes(empty_layers_json.as_bytes()).unwrap();\n\n        assert_eq!(empty_layers_parsed, expected);\n    }\n\n    #[test]\n    fn v4_indexpart_is_parsed() {\n        let example = r#\"{\n            \"version\":4,\n            \"layer_metadata\":{\n                \"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9\": { \"file_size\": 25600000 },\n                \"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51\": { \"file_size\": 9007199254741001 }\n            },\n            \"disk_consistent_lsn\":\"0/16960E8\",\n            \"metadata_bytes\":[113,11,159,210,0,54,0,4,0,0,0,0,1,105,96,232,1,0,0,0,0,1,105,96,112,0,0,0,0,0,0,0,0,0,0,0,0,0,1,105,96,112,0,0,0,0,1,105,96,112,0,0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],\n            \"deleted_at\": \"2023-07-31T09:00:00.123\"\n        }\"#;\n\n        let expected = IndexPart {\n            version: 4,\n            layer_metadata: HashMap::from([\n                (\"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9\".parse().unwrap(), LayerFileMetadata {\n                    file_size: 25600000,\n                    generation: Generation::none(),\n                    shard: ShardIndex::unsharded()\n                }),\n                (\"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51\".parse().unwrap(), LayerFileMetadata {\n                    // serde_json should always parse this but this might be a double with jq for\n                    // example.\n                    file_size: 9007199254741001,\n                    generation: Generation::none(),\n                    shard: ShardIndex::unsharded()\n                })\n            ]),\n            disk_consistent_lsn: \"0/16960E8\".parse::<Lsn>().unwrap(),\n            metadata: TimelineMetadata::from_bytes(&[113,11,159,210,0,54,0,4,0,0,0,0,1,105,96,232,1,0,0,0,0,1,105,96,112,0,0,0,0,0,0,0,0,0,0,0,0,0,1,105,96,112,0,0,0,0,1,105,96,112,0,0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]).unwrap(),\n            deleted_at: Some(parse_naive_datetime(\"2023-07-31T09:00:00.123000000\")),\n            archived_at: None,\n            lineage: Lineage::default(),\n            gc_blocking: None,\n            last_aux_file_policy: None,\n            import_pgdata: None,\n            rel_size_migration: None,\n            l2_lsn: None,\n            gc_compaction: None,\n            marked_invisible_at: None,\n            rel_size_migrated_at: None,\n        };\n\n        let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();\n        assert_eq!(part, expected);\n    }\n\n    #[test]\n    fn v5_indexpart_is_parsed() {\n        let example = r#\"{\n            \"version\":5,\n            \"layer_metadata\":{\n                \"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000014EF420-00000000014EF499\":{\"file_size\":23289856,\"generation\":1},\n                \"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000014EF499-00000000015A7619\":{\"file_size\":1015808,\"generation\":1}},\n                \"disk_consistent_lsn\":\"0/15A7618\",\n                \"metadata_bytes\":[226,88,25,241,0,46,0,4,0,0,0,0,1,90,118,24,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,78,244,32,0,0,0,0,1,78,244,32,0,0,0,16,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],\n                \"lineage\":{\n                    \"original_ancestor\":[\"e2bfd8c633d713d279e6fcd2bcc15b6d\",\"0/15A7618\",\"2024-05-07T18:52:36.322426563\"],\n                    \"reparenting_history\":[\"e1bfd8c633d713d279e6fcd2bcc15b6d\"]\n                }\n        }\"#;\n\n        let expected = IndexPart {\n            version: 5,\n            layer_metadata: HashMap::from([\n                (\"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000014EF420-00000000014EF499\".parse().unwrap(), LayerFileMetadata {\n                    file_size: 23289856,\n                    generation: Generation::new(1),\n                    shard: ShardIndex::unsharded(),\n                }),\n                (\"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000014EF499-00000000015A7619\".parse().unwrap(), LayerFileMetadata {\n                    file_size: 1015808,\n                    generation: Generation::new(1),\n                    shard: ShardIndex::unsharded(),\n                })\n            ]),\n            disk_consistent_lsn: Lsn::from_str(\"0/15A7618\").unwrap(),\n            metadata: TimelineMetadata::from_bytes(&[226,88,25,241,0,46,0,4,0,0,0,0,1,90,118,24,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,78,244,32,0,0,0,0,1,78,244,32,0,0,0,16,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]).unwrap(),\n            deleted_at: None,\n            archived_at: None,\n            lineage: Lineage {\n                reparenting_history_truncated: false,\n                reparenting_history: vec![TimelineId::from_str(\"e1bfd8c633d713d279e6fcd2bcc15b6d\").unwrap()],\n                original_ancestor: Some((TimelineId::from_str(\"e2bfd8c633d713d279e6fcd2bcc15b6d\").unwrap(), Lsn::from_str(\"0/15A7618\").unwrap(), parse_naive_datetime(\"2024-05-07T18:52:36.322426563\"))),\n            },\n            gc_blocking: None,\n            last_aux_file_policy: None,\n            import_pgdata: None,\n            rel_size_migration: None,\n            l2_lsn: None,\n            gc_compaction: None,\n            marked_invisible_at: None,\n            rel_size_migrated_at: None,\n        };\n\n        let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();\n        assert_eq!(part, expected);\n    }\n\n    #[test]\n    fn v6_indexpart_is_parsed() {\n        let example = r#\"{\n            \"version\":6,\n            \"layer_metadata\":{\n                \"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9\": { \"file_size\": 25600000 },\n                \"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51\": { \"file_size\": 9007199254741001 }\n            },\n            \"disk_consistent_lsn\":\"0/16960E8\",\n            \"metadata_bytes\":[113,11,159,210,0,54,0,4,0,0,0,0,1,105,96,232,1,0,0,0,0,1,105,96,112,0,0,0,0,0,0,0,0,0,0,0,0,0,1,105,96,112,0,0,0,0,1,105,96,112,0,0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],\n            \"deleted_at\": \"2023-07-31T09:00:00.123\",\n            \"lineage\":{\n                \"original_ancestor\":[\"e2bfd8c633d713d279e6fcd2bcc15b6d\",\"0/15A7618\",\"2024-05-07T18:52:36.322426563\"],\n                \"reparenting_history\":[\"e1bfd8c633d713d279e6fcd2bcc15b6d\"]\n            },\n            \"last_aux_file_policy\": \"V2\"\n        }\"#;\n\n        let expected = IndexPart {\n            version: 6,\n            layer_metadata: HashMap::from([\n                (\"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9\".parse().unwrap(), LayerFileMetadata {\n                    file_size: 25600000,\n                    generation: Generation::none(),\n                    shard: ShardIndex::unsharded()\n                }),\n                (\"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51\".parse().unwrap(), LayerFileMetadata {\n                    // serde_json should always parse this but this might be a double with jq for\n                    // example.\n                    file_size: 9007199254741001,\n                    generation: Generation::none(),\n                    shard: ShardIndex::unsharded()\n                })\n            ]),\n            disk_consistent_lsn: \"0/16960E8\".parse::<Lsn>().unwrap(),\n            metadata: TimelineMetadata::from_bytes(&[113,11,159,210,0,54,0,4,0,0,0,0,1,105,96,232,1,0,0,0,0,1,105,96,112,0,0,0,0,0,0,0,0,0,0,0,0,0,1,105,96,112,0,0,0,0,1,105,96,112,0,0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]).unwrap(),\n            deleted_at: Some(parse_naive_datetime(\"2023-07-31T09:00:00.123000000\")),\n            archived_at: None,\n            lineage: Lineage {\n                reparenting_history_truncated: false,\n                reparenting_history: vec![TimelineId::from_str(\"e1bfd8c633d713d279e6fcd2bcc15b6d\").unwrap()],\n                original_ancestor: Some((TimelineId::from_str(\"e2bfd8c633d713d279e6fcd2bcc15b6d\").unwrap(), Lsn::from_str(\"0/15A7618\").unwrap(), parse_naive_datetime(\"2024-05-07T18:52:36.322426563\"))),\n            },\n            gc_blocking: None,\n            last_aux_file_policy: Some(AuxFilePolicy::V2),\n            import_pgdata: None,\n            rel_size_migration: None,\n            l2_lsn: None,\n            gc_compaction: None,\n            marked_invisible_at: None,\n            rel_size_migrated_at: None,\n        };\n\n        let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();\n        assert_eq!(part, expected);\n    }\n\n    #[test]\n    fn v7_indexpart_is_parsed() {\n        let example = r#\"{\n            \"version\": 7,\n            \"layer_metadata\":{\n                \"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9\": { \"file_size\": 25600000 },\n                \"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51\": { \"file_size\": 9007199254741001 }\n            },\n            \"disk_consistent_lsn\":\"0/16960E8\",\n            \"metadata\": {\n                \"disk_consistent_lsn\": \"0/16960E8\",\n                \"prev_record_lsn\": \"0/1696070\",\n                \"ancestor_timeline\": \"e45a7f37d3ee2ff17dc14bf4f4e3f52e\",\n                \"ancestor_lsn\": \"0/0\",\n                \"latest_gc_cutoff_lsn\": \"0/1696070\",\n                \"initdb_lsn\": \"0/1696070\",\n                \"pg_version\": 14\n            },\n            \"deleted_at\": \"2023-07-31T09:00:00.123\"\n        }\"#;\n\n        let expected = IndexPart {\n            version: 7,\n            layer_metadata: HashMap::from([\n                (\"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9\".parse().unwrap(), LayerFileMetadata {\n                    file_size: 25600000,\n                    generation: Generation::none(),\n                    shard: ShardIndex::unsharded()\n                }),\n                (\"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51\".parse().unwrap(), LayerFileMetadata {\n                    file_size: 9007199254741001,\n                    generation: Generation::none(),\n                    shard: ShardIndex::unsharded()\n                })\n            ]),\n            disk_consistent_lsn: \"0/16960E8\".parse::<Lsn>().unwrap(),\n            metadata: TimelineMetadata::new(\n                Lsn::from_str(\"0/16960E8\").unwrap(),\n                Some(Lsn::from_str(\"0/1696070\").unwrap()),\n                Some(TimelineId::from_str(\"e45a7f37d3ee2ff17dc14bf4f4e3f52e\").unwrap()),\n                Lsn::INVALID,\n                Lsn::from_str(\"0/1696070\").unwrap(),\n                Lsn::from_str(\"0/1696070\").unwrap(),\n                PgMajorVersion::PG14,\n            ).with_recalculated_checksum().unwrap(),\n            deleted_at: Some(parse_naive_datetime(\"2023-07-31T09:00:00.123000000\")),\n            archived_at: None,\n            lineage: Default::default(),\n            gc_blocking: None,\n            last_aux_file_policy: Default::default(),\n            import_pgdata: None,\n            rel_size_migration: None,\n            l2_lsn: None,\n            gc_compaction: None,\n            marked_invisible_at: None,\n            rel_size_migrated_at: None,\n        };\n\n        let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();\n        assert_eq!(part, expected);\n    }\n\n    #[test]\n    fn v8_indexpart_is_parsed() {\n        let example = r#\"{\n            \"version\": 8,\n            \"layer_metadata\":{\n                \"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9\": { \"file_size\": 25600000 },\n                \"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51\": { \"file_size\": 9007199254741001 }\n            },\n            \"disk_consistent_lsn\":\"0/16960E8\",\n            \"metadata\": {\n                \"disk_consistent_lsn\": \"0/16960E8\",\n                \"prev_record_lsn\": \"0/1696070\",\n                \"ancestor_timeline\": \"e45a7f37d3ee2ff17dc14bf4f4e3f52e\",\n                \"ancestor_lsn\": \"0/0\",\n                \"latest_gc_cutoff_lsn\": \"0/1696070\",\n                \"initdb_lsn\": \"0/1696070\",\n                \"pg_version\": 14\n            },\n            \"deleted_at\": \"2023-07-31T09:00:00.123\",\n            \"archived_at\": \"2023-04-29T09:00:00.123\"\n        }\"#;\n\n        let expected = IndexPart {\n            version: 8,\n            layer_metadata: HashMap::from([\n                (\"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9\".parse().unwrap(), LayerFileMetadata {\n                    file_size: 25600000,\n                    generation: Generation::none(),\n                    shard: ShardIndex::unsharded()\n                }),\n                (\"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51\".parse().unwrap(), LayerFileMetadata {\n                    file_size: 9007199254741001,\n                    generation: Generation::none(),\n                    shard: ShardIndex::unsharded()\n                })\n            ]),\n            disk_consistent_lsn: \"0/16960E8\".parse::<Lsn>().unwrap(),\n            metadata: TimelineMetadata::new(\n                Lsn::from_str(\"0/16960E8\").unwrap(),\n                Some(Lsn::from_str(\"0/1696070\").unwrap()),\n                Some(TimelineId::from_str(\"e45a7f37d3ee2ff17dc14bf4f4e3f52e\").unwrap()),\n                Lsn::INVALID,\n                Lsn::from_str(\"0/1696070\").unwrap(),\n                Lsn::from_str(\"0/1696070\").unwrap(),\n                PgMajorVersion::PG14,\n            ).with_recalculated_checksum().unwrap(),\n            deleted_at: Some(parse_naive_datetime(\"2023-07-31T09:00:00.123000000\")),\n            archived_at: Some(parse_naive_datetime(\"2023-04-29T09:00:00.123000000\")),\n            lineage: Default::default(),\n            gc_blocking: None,\n            last_aux_file_policy: Default::default(),\n            import_pgdata: None,\n            rel_size_migration: None,\n            l2_lsn: None,\n            gc_compaction: None,\n            marked_invisible_at: None,\n            rel_size_migrated_at: None,\n        };\n\n        let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();\n        assert_eq!(part, expected);\n    }\n\n    #[test]\n    fn v9_indexpart_is_parsed() {\n        let example = r#\"{\n            \"version\": 9,\n            \"layer_metadata\":{\n                \"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9\": { \"file_size\": 25600000 },\n                \"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51\": { \"file_size\": 9007199254741001 }\n            },\n            \"disk_consistent_lsn\":\"0/16960E8\",\n            \"metadata\": {\n                \"disk_consistent_lsn\": \"0/16960E8\",\n                \"prev_record_lsn\": \"0/1696070\",\n                \"ancestor_timeline\": \"e45a7f37d3ee2ff17dc14bf4f4e3f52e\",\n                \"ancestor_lsn\": \"0/0\",\n                \"latest_gc_cutoff_lsn\": \"0/1696070\",\n                \"initdb_lsn\": \"0/1696070\",\n                \"pg_version\": 14\n            },\n            \"gc_blocking\": {\n                \"started_at\": \"2024-07-19T09:00:00.123\",\n                \"reasons\": [\"DetachAncestor\"]\n            }\n        }\"#;\n\n        let expected = IndexPart {\n            version: 9,\n            layer_metadata: HashMap::from([\n                (\"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9\".parse().unwrap(), LayerFileMetadata {\n                    file_size: 25600000,\n                    generation: Generation::none(),\n                    shard: ShardIndex::unsharded()\n                }),\n                (\"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51\".parse().unwrap(), LayerFileMetadata {\n                    file_size: 9007199254741001,\n                    generation: Generation::none(),\n                    shard: ShardIndex::unsharded()\n                })\n            ]),\n            disk_consistent_lsn: \"0/16960E8\".parse::<Lsn>().unwrap(),\n            metadata: TimelineMetadata::new(\n                Lsn::from_str(\"0/16960E8\").unwrap(),\n                Some(Lsn::from_str(\"0/1696070\").unwrap()),\n                Some(TimelineId::from_str(\"e45a7f37d3ee2ff17dc14bf4f4e3f52e\").unwrap()),\n                Lsn::INVALID,\n                Lsn::from_str(\"0/1696070\").unwrap(),\n                Lsn::from_str(\"0/1696070\").unwrap(),\n                PgMajorVersion::PG14,\n            ).with_recalculated_checksum().unwrap(),\n            deleted_at: None,\n            lineage: Default::default(),\n            gc_blocking: Some(GcBlocking {\n                started_at: parse_naive_datetime(\"2024-07-19T09:00:00.123000000\"),\n                reasons: enumset::EnumSet::from_iter([GcBlockingReason::DetachAncestor]),\n            }),\n            last_aux_file_policy: Default::default(),\n            archived_at: None,\n            import_pgdata: None,\n            rel_size_migration: None,\n            l2_lsn: None,\n            gc_compaction: None,\n            marked_invisible_at: None,\n            rel_size_migrated_at: None,\n        };\n\n        let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();\n        assert_eq!(part, expected);\n    }\n\n    #[test]\n    fn v10_importpgdata_is_parsed() {\n        let example = r#\"{\n            \"version\": 10,\n            \"layer_metadata\":{\n                \"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9\": { \"file_size\": 25600000 },\n                \"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51\": { \"file_size\": 9007199254741001 }\n            },\n            \"disk_consistent_lsn\":\"0/16960E8\",\n            \"metadata\": {\n                \"disk_consistent_lsn\": \"0/16960E8\",\n                \"prev_record_lsn\": \"0/1696070\",\n                \"ancestor_timeline\": \"e45a7f37d3ee2ff17dc14bf4f4e3f52e\",\n                \"ancestor_lsn\": \"0/0\",\n                \"latest_gc_cutoff_lsn\": \"0/1696070\",\n                \"initdb_lsn\": \"0/1696070\",\n                \"pg_version\": 14\n            },\n            \"gc_blocking\": {\n                \"started_at\": \"2024-07-19T09:00:00.123\",\n                \"reasons\": [\"DetachAncestor\"]\n            },\n            \"import_pgdata\": {\n                \"V1\": {\n                    \"Done\": {\n                        \"idempotency_key\": \"specified-by-client-218a5213-5044-4562-a28d-d024c5f057f5\",\n                        \"started_at\": \"2024-11-13T09:23:42.123\",\n                        \"finished_at\": \"2024-11-13T09:42:23.123\"\n                    }\n                }\n            }\n        }\"#;\n\n        let expected = IndexPart {\n            version: 10,\n            layer_metadata: HashMap::from([\n                (\"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9\".parse().unwrap(), LayerFileMetadata {\n                    file_size: 25600000,\n                    generation: Generation::none(),\n                    shard: ShardIndex::unsharded()\n                }),\n                (\"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51\".parse().unwrap(), LayerFileMetadata {\n                    file_size: 9007199254741001,\n                    generation: Generation::none(),\n                    shard: ShardIndex::unsharded()\n                })\n            ]),\n            disk_consistent_lsn: \"0/16960E8\".parse::<Lsn>().unwrap(),\n            metadata: TimelineMetadata::new(\n                Lsn::from_str(\"0/16960E8\").unwrap(),\n                Some(Lsn::from_str(\"0/1696070\").unwrap()),\n                Some(TimelineId::from_str(\"e45a7f37d3ee2ff17dc14bf4f4e3f52e\").unwrap()),\n                Lsn::INVALID,\n                Lsn::from_str(\"0/1696070\").unwrap(),\n                Lsn::from_str(\"0/1696070\").unwrap(),\n                PgMajorVersion::PG14,\n            ).with_recalculated_checksum().unwrap(),\n            deleted_at: None,\n            lineage: Default::default(),\n            gc_blocking: Some(GcBlocking {\n                started_at: parse_naive_datetime(\"2024-07-19T09:00:00.123000000\"),\n                reasons: enumset::EnumSet::from_iter([GcBlockingReason::DetachAncestor]),\n            }),\n            last_aux_file_policy: Default::default(),\n            archived_at: None,\n            import_pgdata: Some(import_pgdata::index_part_format::Root::V1(import_pgdata::index_part_format::V1::Done(import_pgdata::index_part_format::Done{\n                started_at: parse_naive_datetime(\"2024-11-13T09:23:42.123000000\"),\n                finished_at: parse_naive_datetime(\"2024-11-13T09:42:23.123000000\"),\n                idempotency_key: import_pgdata::index_part_format::IdempotencyKey::new(\"specified-by-client-218a5213-5044-4562-a28d-d024c5f057f5\".to_string()),\n            }))),\n            rel_size_migration: None,\n            l2_lsn: None,\n            gc_compaction: None,\n            marked_invisible_at: None,\n            rel_size_migrated_at: None,\n        };\n\n        let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();\n        assert_eq!(part, expected);\n    }\n\n    #[test]\n    fn v11_rel_size_migration_is_parsed() {\n        let example = r#\"{\n            \"version\": 11,\n            \"layer_metadata\":{\n                \"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9\": { \"file_size\": 25600000 },\n                \"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51\": { \"file_size\": 9007199254741001 }\n            },\n            \"disk_consistent_lsn\":\"0/16960E8\",\n            \"metadata\": {\n                \"disk_consistent_lsn\": \"0/16960E8\",\n                \"prev_record_lsn\": \"0/1696070\",\n                \"ancestor_timeline\": \"e45a7f37d3ee2ff17dc14bf4f4e3f52e\",\n                \"ancestor_lsn\": \"0/0\",\n                \"latest_gc_cutoff_lsn\": \"0/1696070\",\n                \"initdb_lsn\": \"0/1696070\",\n                \"pg_version\": 14\n            },\n            \"gc_blocking\": {\n                \"started_at\": \"2024-07-19T09:00:00.123\",\n                \"reasons\": [\"DetachAncestor\"]\n            },\n            \"import_pgdata\": {\n                \"V1\": {\n                    \"Done\": {\n                        \"idempotency_key\": \"specified-by-client-218a5213-5044-4562-a28d-d024c5f057f5\",\n                        \"started_at\": \"2024-11-13T09:23:42.123\",\n                        \"finished_at\": \"2024-11-13T09:42:23.123\"\n                    }\n                }\n            },\n            \"rel_size_migration\": \"legacy\"\n        }\"#;\n\n        let expected = IndexPart {\n            version: 11,\n            layer_metadata: HashMap::from([\n                (\"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9\".parse().unwrap(), LayerFileMetadata {\n                    file_size: 25600000,\n                    generation: Generation::none(),\n                    shard: ShardIndex::unsharded()\n                }),\n                (\"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51\".parse().unwrap(), LayerFileMetadata {\n                    file_size: 9007199254741001,\n                    generation: Generation::none(),\n                    shard: ShardIndex::unsharded()\n                })\n            ]),\n            disk_consistent_lsn: \"0/16960E8\".parse::<Lsn>().unwrap(),\n            metadata: TimelineMetadata::new(\n                Lsn::from_str(\"0/16960E8\").unwrap(),\n                Some(Lsn::from_str(\"0/1696070\").unwrap()),\n                Some(TimelineId::from_str(\"e45a7f37d3ee2ff17dc14bf4f4e3f52e\").unwrap()),\n                Lsn::INVALID,\n                Lsn::from_str(\"0/1696070\").unwrap(),\n                Lsn::from_str(\"0/1696070\").unwrap(),\n                PgMajorVersion::PG14,\n            ).with_recalculated_checksum().unwrap(),\n            deleted_at: None,\n            lineage: Default::default(),\n            gc_blocking: Some(GcBlocking {\n                started_at: parse_naive_datetime(\"2024-07-19T09:00:00.123000000\"),\n                reasons: enumset::EnumSet::from_iter([GcBlockingReason::DetachAncestor]),\n            }),\n            last_aux_file_policy: Default::default(),\n            archived_at: None,\n            import_pgdata: Some(import_pgdata::index_part_format::Root::V1(import_pgdata::index_part_format::V1::Done(import_pgdata::index_part_format::Done{\n                started_at: parse_naive_datetime(\"2024-11-13T09:23:42.123000000\"),\n                finished_at: parse_naive_datetime(\"2024-11-13T09:42:23.123000000\"),\n                idempotency_key: import_pgdata::index_part_format::IdempotencyKey::new(\"specified-by-client-218a5213-5044-4562-a28d-d024c5f057f5\".to_string()),\n            }))),\n            rel_size_migration: Some(RelSizeMigration::Legacy),\n            l2_lsn: None,\n            gc_compaction: None,\n            marked_invisible_at: None,\n            rel_size_migrated_at: None,\n        };\n\n        let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();\n        assert_eq!(part, expected);\n    }\n\n    #[test]\n    fn v12_v13_l2_gc_ompaction_is_parsed() {\n        let example = r#\"{\n            \"version\": 13,\n            \"layer_metadata\":{\n                \"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9\": { \"file_size\": 25600000 },\n                \"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51\": { \"file_size\": 9007199254741001 }\n            },\n            \"disk_consistent_lsn\":\"0/16960E8\",\n            \"metadata\": {\n                \"disk_consistent_lsn\": \"0/16960E8\",\n                \"prev_record_lsn\": \"0/1696070\",\n                \"ancestor_timeline\": \"e45a7f37d3ee2ff17dc14bf4f4e3f52e\",\n                \"ancestor_lsn\": \"0/0\",\n                \"latest_gc_cutoff_lsn\": \"0/1696070\",\n                \"initdb_lsn\": \"0/1696070\",\n                \"pg_version\": 14\n            },\n            \"gc_blocking\": {\n                \"started_at\": \"2024-07-19T09:00:00.123\",\n                \"reasons\": [\"DetachAncestor\"]\n            },\n            \"import_pgdata\": {\n                \"V1\": {\n                    \"Done\": {\n                        \"idempotency_key\": \"specified-by-client-218a5213-5044-4562-a28d-d024c5f057f5\",\n                        \"started_at\": \"2024-11-13T09:23:42.123\",\n                        \"finished_at\": \"2024-11-13T09:42:23.123\"\n                    }\n                }\n            },\n            \"rel_size_migration\": \"legacy\",\n            \"l2_lsn\": \"0/16960E8\",\n            \"gc_compaction\": {\n                \"last_completed_lsn\": \"0/16960E8\"\n            }\n        }\"#;\n\n        let expected = IndexPart {\n            version: 13,\n            layer_metadata: HashMap::from([\n                (\"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9\".parse().unwrap(), LayerFileMetadata {\n                    file_size: 25600000,\n                    generation: Generation::none(),\n                    shard: ShardIndex::unsharded()\n                }),\n                (\"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51\".parse().unwrap(), LayerFileMetadata {\n                    file_size: 9007199254741001,\n                    generation: Generation::none(),\n                    shard: ShardIndex::unsharded()\n                })\n            ]),\n            disk_consistent_lsn: \"0/16960E8\".parse::<Lsn>().unwrap(),\n            metadata: TimelineMetadata::new(\n                Lsn::from_str(\"0/16960E8\").unwrap(),\n                Some(Lsn::from_str(\"0/1696070\").unwrap()),\n                Some(TimelineId::from_str(\"e45a7f37d3ee2ff17dc14bf4f4e3f52e\").unwrap()),\n                Lsn::INVALID,\n                Lsn::from_str(\"0/1696070\").unwrap(),\n                Lsn::from_str(\"0/1696070\").unwrap(),\n                PgMajorVersion::PG14,\n            ).with_recalculated_checksum().unwrap(),\n            deleted_at: None,\n            lineage: Default::default(),\n            gc_blocking: Some(GcBlocking {\n                started_at: parse_naive_datetime(\"2024-07-19T09:00:00.123000000\"),\n                reasons: enumset::EnumSet::from_iter([GcBlockingReason::DetachAncestor]),\n            }),\n            last_aux_file_policy: Default::default(),\n            archived_at: None,\n            import_pgdata: Some(import_pgdata::index_part_format::Root::V1(import_pgdata::index_part_format::V1::Done(import_pgdata::index_part_format::Done{\n                started_at: parse_naive_datetime(\"2024-11-13T09:23:42.123000000\"),\n                finished_at: parse_naive_datetime(\"2024-11-13T09:42:23.123000000\"),\n                idempotency_key: import_pgdata::index_part_format::IdempotencyKey::new(\"specified-by-client-218a5213-5044-4562-a28d-d024c5f057f5\".to_string()),\n            }))),\n            rel_size_migration: Some(RelSizeMigration::Legacy),\n            l2_lsn: Some(\"0/16960E8\".parse::<Lsn>().unwrap()),\n            gc_compaction: Some(GcCompactionState {\n                last_completed_lsn: \"0/16960E8\".parse::<Lsn>().unwrap(),\n            }),\n            marked_invisible_at: None,\n            rel_size_migrated_at: None,\n        };\n\n        let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();\n        assert_eq!(part, expected);\n    }\n\n    #[test]\n    fn v14_marked_invisible_at_is_parsed() {\n        let example = r#\"{\n            \"version\": 14,\n            \"layer_metadata\":{\n                \"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9\": { \"file_size\": 25600000 },\n                \"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51\": { \"file_size\": 9007199254741001 }\n            },\n            \"disk_consistent_lsn\":\"0/16960E8\",\n            \"metadata\": {\n                \"disk_consistent_lsn\": \"0/16960E8\",\n                \"prev_record_lsn\": \"0/1696070\",\n                \"ancestor_timeline\": \"e45a7f37d3ee2ff17dc14bf4f4e3f52e\",\n                \"ancestor_lsn\": \"0/0\",\n                \"latest_gc_cutoff_lsn\": \"0/1696070\",\n                \"initdb_lsn\": \"0/1696070\",\n                \"pg_version\": 14\n            },\n            \"gc_blocking\": {\n                \"started_at\": \"2024-07-19T09:00:00.123\",\n                \"reasons\": [\"DetachAncestor\"]\n            },\n            \"import_pgdata\": {\n                \"V1\": {\n                    \"Done\": {\n                        \"idempotency_key\": \"specified-by-client-218a5213-5044-4562-a28d-d024c5f057f5\",\n                        \"started_at\": \"2024-11-13T09:23:42.123\",\n                        \"finished_at\": \"2024-11-13T09:42:23.123\"\n                    }\n                }\n            },\n            \"rel_size_migration\": \"legacy\",\n            \"l2_lsn\": \"0/16960E8\",\n            \"gc_compaction\": {\n                \"last_completed_lsn\": \"0/16960E8\"\n            },\n            \"marked_invisible_at\": \"2023-07-31T09:00:00.123\"\n        }\"#;\n\n        let expected = IndexPart {\n            version: 14,\n            layer_metadata: HashMap::from([\n                (\"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9\".parse().unwrap(), LayerFileMetadata {\n                    file_size: 25600000,\n                    generation: Generation::none(),\n                    shard: ShardIndex::unsharded()\n                }),\n                (\"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51\".parse().unwrap(), LayerFileMetadata {\n                    file_size: 9007199254741001,\n                    generation: Generation::none(),\n                    shard: ShardIndex::unsharded()\n                })\n            ]),\n            disk_consistent_lsn: \"0/16960E8\".parse::<Lsn>().unwrap(),\n            metadata: TimelineMetadata::new(\n                Lsn::from_str(\"0/16960E8\").unwrap(),\n                Some(Lsn::from_str(\"0/1696070\").unwrap()),\n                Some(TimelineId::from_str(\"e45a7f37d3ee2ff17dc14bf4f4e3f52e\").unwrap()),\n                Lsn::INVALID,\n                Lsn::from_str(\"0/1696070\").unwrap(),\n                Lsn::from_str(\"0/1696070\").unwrap(),\n                PgMajorVersion::PG14,\n            ).with_recalculated_checksum().unwrap(),\n            deleted_at: None,\n            lineage: Default::default(),\n            gc_blocking: Some(GcBlocking {\n                started_at: parse_naive_datetime(\"2024-07-19T09:00:00.123000000\"),\n                reasons: enumset::EnumSet::from_iter([GcBlockingReason::DetachAncestor]),\n            }),\n            last_aux_file_policy: Default::default(),\n            archived_at: None,\n            import_pgdata: Some(import_pgdata::index_part_format::Root::V1(import_pgdata::index_part_format::V1::Done(import_pgdata::index_part_format::Done{\n                started_at: parse_naive_datetime(\"2024-11-13T09:23:42.123000000\"),\n                finished_at: parse_naive_datetime(\"2024-11-13T09:42:23.123000000\"),\n                idempotency_key: import_pgdata::index_part_format::IdempotencyKey::new(\"specified-by-client-218a5213-5044-4562-a28d-d024c5f057f5\".to_string()),\n            }))),\n            rel_size_migration: Some(RelSizeMigration::Legacy),\n            l2_lsn: Some(\"0/16960E8\".parse::<Lsn>().unwrap()),\n            gc_compaction: Some(GcCompactionState {\n                last_completed_lsn: \"0/16960E8\".parse::<Lsn>().unwrap(),\n            }),\n            marked_invisible_at: Some(parse_naive_datetime(\"2023-07-31T09:00:00.123000000\")),\n            rel_size_migrated_at: None,\n        };\n\n        let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();\n        assert_eq!(part, expected);\n    }\n\n    #[test]\n    fn v15_rel_size_migrated_at_is_parsed() {\n        let example = r#\"{\n            \"version\": 15,\n            \"layer_metadata\":{\n                \"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9\": { \"file_size\": 25600000 },\n                \"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51\": { \"file_size\": 9007199254741001 }\n            },\n            \"disk_consistent_lsn\":\"0/16960E8\",\n            \"metadata\": {\n                \"disk_consistent_lsn\": \"0/16960E8\",\n                \"prev_record_lsn\": \"0/1696070\",\n                \"ancestor_timeline\": \"e45a7f37d3ee2ff17dc14bf4f4e3f52e\",\n                \"ancestor_lsn\": \"0/0\",\n                \"latest_gc_cutoff_lsn\": \"0/1696070\",\n                \"initdb_lsn\": \"0/1696070\",\n                \"pg_version\": 14\n            },\n            \"gc_blocking\": {\n                \"started_at\": \"2024-07-19T09:00:00.123\",\n                \"reasons\": [\"DetachAncestor\"]\n            },\n            \"import_pgdata\": {\n                \"V1\": {\n                    \"Done\": {\n                        \"idempotency_key\": \"specified-by-client-218a5213-5044-4562-a28d-d024c5f057f5\",\n                        \"started_at\": \"2024-11-13T09:23:42.123\",\n                        \"finished_at\": \"2024-11-13T09:42:23.123\"\n                    }\n                }\n            },\n            \"rel_size_migration\": \"legacy\",\n            \"l2_lsn\": \"0/16960E8\",\n            \"gc_compaction\": {\n                \"last_completed_lsn\": \"0/16960E8\"\n            },\n            \"marked_invisible_at\": \"2023-07-31T09:00:00.123\",\n            \"rel_size_migrated_at\": \"0/16960E8\"\n        }\"#;\n\n        let expected = IndexPart {\n            version: 15,\n            layer_metadata: HashMap::from([\n                (\"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9\".parse().unwrap(), LayerFileMetadata {\n                    file_size: 25600000,\n                    generation: Generation::none(),\n                    shard: ShardIndex::unsharded()\n                }),\n                (\"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51\".parse().unwrap(), LayerFileMetadata {\n                    file_size: 9007199254741001,\n                    generation: Generation::none(),\n                    shard: ShardIndex::unsharded()\n                })\n            ]),\n            disk_consistent_lsn: \"0/16960E8\".parse::<Lsn>().unwrap(),\n            metadata: TimelineMetadata::new(\n                Lsn::from_str(\"0/16960E8\").unwrap(),\n                Some(Lsn::from_str(\"0/1696070\").unwrap()),\n                Some(TimelineId::from_str(\"e45a7f37d3ee2ff17dc14bf4f4e3f52e\").unwrap()),\n                Lsn::INVALID,\n                Lsn::from_str(\"0/1696070\").unwrap(),\n                Lsn::from_str(\"0/1696070\").unwrap(),\n                PgMajorVersion::PG14,\n            ).with_recalculated_checksum().unwrap(),\n            deleted_at: None,\n            lineage: Default::default(),\n            gc_blocking: Some(GcBlocking {\n                started_at: parse_naive_datetime(\"2024-07-19T09:00:00.123000000\"),\n                reasons: enumset::EnumSet::from_iter([GcBlockingReason::DetachAncestor]),\n            }),\n            last_aux_file_policy: Default::default(),\n            archived_at: None,\n            import_pgdata: Some(import_pgdata::index_part_format::Root::V1(import_pgdata::index_part_format::V1::Done(import_pgdata::index_part_format::Done{\n                started_at: parse_naive_datetime(\"2024-11-13T09:23:42.123000000\"),\n                finished_at: parse_naive_datetime(\"2024-11-13T09:42:23.123000000\"),\n                idempotency_key: import_pgdata::index_part_format::IdempotencyKey::new(\"specified-by-client-218a5213-5044-4562-a28d-d024c5f057f5\".to_string()),\n            }))),\n            rel_size_migration: Some(RelSizeMigration::Legacy),\n            l2_lsn: Some(\"0/16960E8\".parse::<Lsn>().unwrap()),\n            gc_compaction: Some(GcCompactionState {\n                last_completed_lsn: \"0/16960E8\".parse::<Lsn>().unwrap(),\n            }),\n            marked_invisible_at: Some(parse_naive_datetime(\"2023-07-31T09:00:00.123000000\")),\n            rel_size_migrated_at: Some(\"0/16960E8\".parse::<Lsn>().unwrap()),\n        };\n\n        let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();\n        assert_eq!(part, expected);\n    }\n\n    fn parse_naive_datetime(s: &str) -> NaiveDateTime {\n        chrono::NaiveDateTime::parse_from_str(s, \"%Y-%m-%dT%H:%M:%S.%f\").unwrap()\n    }\n}\n"
  },
  {
    "path": "pageserver/src/tenant/remote_timeline_client/manifest.rs",
    "content": "use chrono::NaiveDateTime;\nuse serde::{Deserialize, Serialize};\nuse utils::id::TimelineId;\nuse utils::lsn::Lsn;\nuse utils::shard::ShardStripeSize;\n\n/// Tenant shard manifest, stored in remote storage. Contains offloaded timelines and other tenant\n/// shard-wide information that must be persisted in remote storage.\n///\n/// The manifest is always updated on tenant attach, and as needed.\n#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]\npub struct TenantManifest {\n    /// The manifest version. Incremented on manifest format changes, even non-breaking ones.\n    /// Manifests must generally always be backwards and forwards compatible for one release, to\n    /// allow release rollbacks.\n    pub version: usize,\n\n    /// This tenant's stripe size. This is only advisory, and used to recover tenant data from\n    /// remote storage. The autoritative source is the storage controller. If None, assume the\n    /// original default value of 32768 blocks (256 MB).\n    #[serde(skip_serializing_if = \"Option::is_none\")]\n    pub stripe_size: Option<ShardStripeSize>,\n\n    /// The list of offloaded timelines together with enough information\n    /// to not have to actually load them.\n    ///\n    /// Note: the timelines mentioned in this list might be deleted, i.e.\n    /// we don't hold an invariant that the references aren't dangling.\n    /// Existence of index-part.json is the actual indicator of timeline existence.\n    #[serde(default)]\n    pub offloaded_timelines: Vec<OffloadedTimelineManifest>,\n}\n\n/// The remote level representation of an offloaded timeline.\n///\n/// Very similar to [`pageserver_api::models::OffloadedTimelineInfo`],\n/// but the two datastructures serve different needs, this is for a persistent disk format\n/// that must be backwards compatible, while the other is only for informative purposes.\n#[derive(Clone, Debug, Serialize, Deserialize, Copy, PartialEq, Eq)]\npub struct OffloadedTimelineManifest {\n    pub timeline_id: TimelineId,\n    /// Whether the timeline has a parent it has been branched off from or not\n    pub ancestor_timeline_id: Option<TimelineId>,\n    /// Whether to retain the branch lsn at the ancestor or not\n    pub ancestor_retain_lsn: Option<Lsn>,\n    /// The time point when the timeline was archived\n    pub archived_at: NaiveDateTime,\n}\n\n/// The newest manifest version. This should be incremented on changes, even non-breaking ones. We\n/// do not use deny_unknown_fields, so new fields are not breaking.\n///\n/// 1: initial version\n/// 2: +stripe_size\n///\n/// When adding new versions, also add a parse_vX test case below.\npub const LATEST_TENANT_MANIFEST_VERSION: usize = 2;\n\nimpl TenantManifest {\n    /// Returns true if the manifests are equal, ignoring the version number. This avoids\n    /// re-uploading all manifests just because the version number is bumped.\n    pub fn eq_ignoring_version(&self, other: &Self) -> bool {\n        // Fast path: if the version is equal, just compare directly.\n        if self.version == other.version {\n            return self == other;\n        }\n\n        // We could alternatively just clone and modify the version here.\n        let Self {\n            version: _, // ignore version\n            stripe_size,\n            offloaded_timelines,\n        } = self;\n\n        stripe_size == &other.stripe_size && offloaded_timelines == &other.offloaded_timelines\n    }\n\n    /// Decodes a manifest from JSON.\n    pub fn from_json_bytes(bytes: &[u8]) -> Result<Self, serde_json::Error> {\n        serde_json::from_slice(bytes)\n    }\n\n    /// Encodes a manifest as JSON.\n    pub fn to_json_bytes(&self) -> serde_json::Result<Vec<u8>> {\n        serde_json::to_vec(self)\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use std::str::FromStr;\n\n    use utils::id::TimelineId;\n\n    use super::*;\n\n    /// Empty manifests should be parsed. Version is required.\n    #[test]\n    fn parse_empty() -> anyhow::Result<()> {\n        let json = r#\"{\n             \"version\": 0\n         }\"#;\n        let expected = TenantManifest {\n            version: 0,\n            stripe_size: None,\n            offloaded_timelines: Vec::new(),\n        };\n        assert_eq!(expected, TenantManifest::from_json_bytes(json.as_bytes())?);\n        Ok(())\n    }\n\n    /// Unknown fields should be ignored, for forwards compatibility.\n    #[test]\n    fn parse_unknown_fields() -> anyhow::Result<()> {\n        let json = r#\"{\n             \"version\": 1,\n             \"foo\": \"bar\"\n         }\"#;\n        let expected = TenantManifest {\n            version: 1,\n            stripe_size: None,\n            offloaded_timelines: Vec::new(),\n        };\n        assert_eq!(expected, TenantManifest::from_json_bytes(json.as_bytes())?);\n        Ok(())\n    }\n\n    /// v1 manifests should be parsed, for backwards compatibility.\n    #[test]\n    fn parse_v1() -> anyhow::Result<()> {\n        let json = r#\"{\n             \"version\": 1,\n             \"offloaded_timelines\": [\n                 {\n                     \"timeline_id\": \"5c4df612fd159e63c1b7853fe94d97da\",\n                     \"archived_at\": \"2025-03-07T11:07:11.373105434\"\n                 },\n                 {\n                     \"timeline_id\": \"f3def5823ad7080d2ea538d8e12163fa\",\n                     \"ancestor_timeline_id\": \"5c4df612fd159e63c1b7853fe94d97da\",\n                     \"ancestor_retain_lsn\": \"0/1F79038\",\n                     \"archived_at\": \"2025-03-05T11:10:22.257901390\"\n                 }\n             ]\n         }\"#;\n        let expected = TenantManifest {\n            version: 1,\n            stripe_size: None,\n            offloaded_timelines: vec![\n                OffloadedTimelineManifest {\n                    timeline_id: TimelineId::from_str(\"5c4df612fd159e63c1b7853fe94d97da\")?,\n                    ancestor_timeline_id: None,\n                    ancestor_retain_lsn: None,\n                    archived_at: NaiveDateTime::from_str(\"2025-03-07T11:07:11.373105434\")?,\n                },\n                OffloadedTimelineManifest {\n                    timeline_id: TimelineId::from_str(\"f3def5823ad7080d2ea538d8e12163fa\")?,\n                    ancestor_timeline_id: Some(TimelineId::from_str(\n                        \"5c4df612fd159e63c1b7853fe94d97da\",\n                    )?),\n                    ancestor_retain_lsn: Some(Lsn::from_str(\"0/1F79038\")?),\n                    archived_at: NaiveDateTime::from_str(\"2025-03-05T11:10:22.257901390\")?,\n                },\n            ],\n        };\n        assert_eq!(expected, TenantManifest::from_json_bytes(json.as_bytes())?);\n        Ok(())\n    }\n\n    /// v2 manifests should be parsed, for backwards compatibility.\n    #[test]\n    fn parse_v2() -> anyhow::Result<()> {\n        let json = r#\"{\n             \"version\": 2,\n             \"stripe_size\": 32768,\n             \"offloaded_timelines\": [\n                 {\n                     \"timeline_id\": \"5c4df612fd159e63c1b7853fe94d97da\",\n                     \"archived_at\": \"2025-03-07T11:07:11.373105434\"\n                 },\n                 {\n                     \"timeline_id\": \"f3def5823ad7080d2ea538d8e12163fa\",\n                     \"ancestor_timeline_id\": \"5c4df612fd159e63c1b7853fe94d97da\",\n                     \"ancestor_retain_lsn\": \"0/1F79038\",\n                     \"archived_at\": \"2025-03-05T11:10:22.257901390\"\n                 }\n             ]\n         }\"#;\n        let expected = TenantManifest {\n            version: 2,\n            stripe_size: Some(ShardStripeSize(32768)),\n            offloaded_timelines: vec![\n                OffloadedTimelineManifest {\n                    timeline_id: TimelineId::from_str(\"5c4df612fd159e63c1b7853fe94d97da\")?,\n                    ancestor_timeline_id: None,\n                    ancestor_retain_lsn: None,\n                    archived_at: NaiveDateTime::from_str(\"2025-03-07T11:07:11.373105434\")?,\n                },\n                OffloadedTimelineManifest {\n                    timeline_id: TimelineId::from_str(\"f3def5823ad7080d2ea538d8e12163fa\")?,\n                    ancestor_timeline_id: Some(TimelineId::from_str(\n                        \"5c4df612fd159e63c1b7853fe94d97da\",\n                    )?),\n                    ancestor_retain_lsn: Some(Lsn::from_str(\"0/1F79038\")?),\n                    archived_at: NaiveDateTime::from_str(\"2025-03-05T11:10:22.257901390\")?,\n                },\n            ],\n        };\n        assert_eq!(expected, TenantManifest::from_json_bytes(json.as_bytes())?);\n        Ok(())\n    }\n}\n"
  },
  {
    "path": "pageserver/src/tenant/remote_timeline_client/upload.rs",
    "content": "//! Helper functions to upload files to remote storage with a RemoteStorage\n\nuse std::io::{ErrorKind, SeekFrom};\nuse std::num::NonZeroU32;\nuse std::time::SystemTime;\n\nuse anyhow::{Context, bail};\nuse bytes::Bytes;\nuse camino::Utf8Path;\nuse fail::fail_point;\nuse pageserver_api::shard::TenantShardId;\nuse remote_storage::{GenericRemoteStorage, RemotePath, TimeTravelError};\nuse tokio::fs::{self, File};\nuse tokio::io::AsyncSeekExt;\nuse tokio_util::sync::CancellationToken;\nuse tracing::info;\nuse utils::id::{TenantId, TimelineId};\nuse utils::{backoff, pausable_failpoint};\n\nuse super::Generation;\nuse super::index::IndexPart;\nuse super::manifest::TenantManifest;\nuse crate::tenant::remote_timeline_client::{\n    remote_index_path, remote_initdb_archive_path, remote_initdb_preserved_archive_path,\n    remote_tenant_manifest_path,\n};\n\n/// Serializes and uploads the given index part data to the remote storage.\npub(crate) async fn upload_index_part(\n    storage: &GenericRemoteStorage,\n    tenant_shard_id: &TenantShardId,\n    timeline_id: &TimelineId,\n    generation: Generation,\n    index_part: &IndexPart,\n    cancel: &CancellationToken,\n) -> anyhow::Result<()> {\n    tracing::trace!(\"uploading new index part\");\n\n    fail_point!(\"before-upload-index\", |_| {\n        bail!(\"failpoint before-upload-index\")\n    });\n    pausable_failpoint!(\"before-upload-index-pausable\");\n\n    // Safety: refuse to persist invalid index metadata, to mitigate the impact of any bug that produces this\n    // (this should never happen)\n    index_part.validate().map_err(|e| anyhow::anyhow!(e))?;\n\n    // FIXME: this error comes too late\n    let serialized = index_part.to_json_bytes()?;\n    let serialized = Bytes::from(serialized);\n\n    let index_part_size = serialized.len();\n\n    let remote_path = remote_index_path(tenant_shard_id, timeline_id, generation);\n    storage\n        .upload_storage_object(\n            futures::stream::once(futures::future::ready(Ok(serialized))),\n            index_part_size,\n            &remote_path,\n            cancel,\n        )\n        .await\n        .with_context(|| format!(\"upload index part for '{tenant_shard_id} / {timeline_id}'\"))\n}\n\n/// Serializes and uploads the given tenant manifest data to the remote storage.\npub(crate) async fn upload_tenant_manifest(\n    storage: &GenericRemoteStorage,\n    tenant_shard_id: &TenantShardId,\n    generation: Generation,\n    tenant_manifest: &TenantManifest,\n    cancel: &CancellationToken,\n) -> anyhow::Result<()> {\n    tracing::trace!(\"uploading new tenant manifest\");\n\n    fail_point!(\"before-upload-manifest\", |_| {\n        bail!(\"failpoint before-upload-manifest\")\n    });\n    pausable_failpoint!(\"before-upload-manifest-pausable\");\n\n    let serialized = Bytes::from(tenant_manifest.to_json_bytes()?);\n    let tenant_manifest_size = serialized.len();\n    let remote_path = remote_tenant_manifest_path(tenant_shard_id, generation);\n\n    storage\n        .upload_storage_object(\n            futures::stream::once(futures::future::ready(Ok(serialized))),\n            tenant_manifest_size,\n            &remote_path,\n            cancel,\n        )\n        .await\n        .with_context(|| format!(\"upload tenant manifest for '{tenant_shard_id}'\"))\n}\n\n/// Attempts to upload given layer files.\n/// No extra checks for overlapping files is made and any files that are already present remotely will be overwritten, if submitted during the upload.\n///\n/// On an error, bumps the retries count and reschedules the entire task.\npub(super) async fn upload_timeline_layer<'a>(\n    storage: &'a GenericRemoteStorage,\n    local_path: &'a Utf8Path,\n    remote_path: &'a RemotePath,\n    metadata_size: u64,\n    cancel: &CancellationToken,\n) -> anyhow::Result<()> {\n    fail_point!(\"before-upload-layer\", |_| {\n        bail!(\"failpoint before-upload-layer\")\n    });\n\n    pausable_failpoint!(\"before-upload-layer-pausable\");\n\n    let source_file_res = fs::File::open(&local_path).await;\n    let source_file = match source_file_res {\n        Ok(source_file) => source_file,\n        Err(e) if e.kind() == ErrorKind::NotFound => {\n            // If we encounter this arm, it wasn't intended, but it's also not\n            // a big problem, if it's because the file was deleted before an\n            // upload. However, a nonexistent file can also be indicative of\n            // something worse, like when a file is scheduled for upload before\n            // it has been written to disk yet.\n            //\n            // This is tested against `test_compaction_delete_before_upload`\n            info!(path = %local_path, \"File to upload doesn't exist. Likely the file has been deleted and an upload is not required any more.\");\n            return Ok(());\n        }\n        Err(e) => Err(e).with_context(|| format!(\"open a source file for layer {local_path:?}\"))?,\n    };\n\n    let fs_size = source_file\n        .metadata()\n        .await\n        .with_context(|| format!(\"get the source file metadata for layer {local_path:?}\"))?\n        .len();\n\n    if metadata_size != fs_size {\n        bail!(\n            \"File {local_path:?} has its current FS size {fs_size} diferent from initially determined {metadata_size}\"\n        );\n    }\n\n    let fs_size = usize::try_from(fs_size)\n        .with_context(|| format!(\"convert {local_path:?} size {fs_size} usize\"))?;\n    /* BEGIN_HADRON */\n    let mut metadata = None;\n    match storage {\n        // Pass the file path as a storage metadata to minimize changes to neon.\n        // Otherwise, we need to change the upload interface.\n        GenericRemoteStorage::AzureBlob(s) => {\n            let block_size_mb = s.put_block_size_mb.unwrap_or(0);\n            if block_size_mb > 0 && fs_size > block_size_mb * 1024 * 1024 {\n                metadata = Some(remote_storage::StorageMetadata::from([(\n                    \"databricks_azure_put_block\",\n                    local_path.as_str(),\n                )]));\n            }\n        }\n        GenericRemoteStorage::LocalFs(_) => {}\n        GenericRemoteStorage::AwsS3(_) => {}\n        GenericRemoteStorage::Unreliable(_) => {}\n        GenericRemoteStorage::GCS(_) => {}\n    };\n    /* END_HADRON */\n    let reader = tokio_util::io::ReaderStream::with_capacity(source_file, super::BUFFER_SIZE);\n\n    storage\n        .upload(reader, fs_size, remote_path, metadata, cancel)\n        .await\n        .with_context(|| format!(\"upload layer from local path '{local_path}'\"))\n}\n\npub(super) async fn copy_timeline_layer(\n    storage: &GenericRemoteStorage,\n    source_path: &RemotePath,\n    target_path: &RemotePath,\n    cancel: &CancellationToken,\n) -> anyhow::Result<()> {\n    fail_point!(\"before-copy-layer\", |_| {\n        bail!(\"failpoint before-copy-layer\")\n    });\n\n    pausable_failpoint!(\"before-copy-layer-pausable\");\n\n    storage\n        .copy_object(source_path, target_path, cancel)\n        .await\n        .with_context(|| format!(\"copy layer {source_path} to {target_path}\"))\n}\n\n/// Uploads the given `initdb` data to the remote storage.\npub(crate) async fn upload_initdb_dir(\n    storage: &GenericRemoteStorage,\n    tenant_id: &TenantId,\n    timeline_id: &TimelineId,\n    mut initdb_tar_zst: File,\n    size: u64,\n    cancel: &CancellationToken,\n) -> anyhow::Result<()> {\n    tracing::trace!(\"uploading initdb dir\");\n\n    // We might have read somewhat into the file already in the prior retry attempt\n    initdb_tar_zst.seek(SeekFrom::Start(0)).await?;\n\n    let file = tokio_util::io::ReaderStream::with_capacity(initdb_tar_zst, super::BUFFER_SIZE);\n\n    let remote_path = remote_initdb_archive_path(tenant_id, timeline_id);\n    storage\n        .upload_storage_object(file, size as usize, &remote_path, cancel)\n        .await\n        .with_context(|| format!(\"upload initdb dir for '{tenant_id} / {timeline_id}'\"))\n}\n\npub(crate) async fn preserve_initdb_archive(\n    storage: &GenericRemoteStorage,\n    tenant_id: &TenantId,\n    timeline_id: &TimelineId,\n    cancel: &CancellationToken,\n) -> anyhow::Result<()> {\n    let source_path = remote_initdb_archive_path(tenant_id, timeline_id);\n    let dest_path = remote_initdb_preserved_archive_path(tenant_id, timeline_id);\n    storage\n        .copy_object(&source_path, &dest_path, cancel)\n        .await\n        .with_context(|| format!(\"backing up initdb archive for '{tenant_id} / {timeline_id}'\"))\n}\n\npub(crate) async fn time_travel_recover_tenant(\n    storage: &GenericRemoteStorage,\n    tenant_shard_id: &TenantShardId,\n    timestamp: SystemTime,\n    done_if_after: SystemTime,\n    cancel: &CancellationToken,\n) -> Result<(), TimeTravelError> {\n    let warn_after = 3;\n    let max_attempts = 10;\n    let mut prefixes = Vec::with_capacity(2);\n    if tenant_shard_id.is_shard_zero() {\n        // Also recover the unsharded prefix for a shard of zero:\n        // - if the tenant is totally unsharded, the unsharded prefix contains all the data\n        // - if the tenant is sharded, we still want to recover the initdb data, but we only\n        //   want to do it once, so let's do it on the 0 shard\n        let timelines_path_unsharded =\n            super::remote_timelines_path_unsharded(&tenant_shard_id.tenant_id);\n        prefixes.push(timelines_path_unsharded);\n    }\n    if !tenant_shard_id.is_unsharded() {\n        // If the tenant is sharded, we need to recover the sharded prefix\n        let timelines_path = super::remote_timelines_path(tenant_shard_id);\n        prefixes.push(timelines_path);\n    }\n\n    // Limit the number of versions deletions, mostly so that we don't\n    // keep requesting forever if the list is too long, as we'd put the\n    // list in RAM.\n    // Building a list of 100k entries that reaches the limit roughly takes\n    // 40 seconds, and roughly corresponds to tenants of 2 TiB physical size.\n    const COMPLEXITY_LIMIT: Option<NonZeroU32> = NonZeroU32::new(100_000);\n\n    for prefix in &prefixes {\n        backoff::retry(\n            || async {\n                storage\n                    .time_travel_recover(\n                        Some(prefix),\n                        timestamp,\n                        done_if_after,\n                        cancel,\n                        COMPLEXITY_LIMIT,\n                    )\n                    .await\n            },\n            |e| !matches!(e, TimeTravelError::Other(_)),\n            warn_after,\n            max_attempts,\n            \"time travel recovery of tenant prefix\",\n            cancel,\n        )\n        .await\n        .ok_or_else(|| TimeTravelError::Cancelled)\n        .and_then(|x| x)?;\n    }\n    Ok(())\n}\n"
  },
  {
    "path": "pageserver/src/tenant/remote_timeline_client.rs",
    "content": "//! This module manages synchronizing local FS with remote storage.\n//!\n//! # Overview\n//!\n//! * [`RemoteTimelineClient`] provides functions related to upload/download of a particular timeline.\n//!   It contains a queue of pending uploads, and manages the queue, performing uploads in parallel\n//!   when it's safe to do so.\n//!\n//! * Stand-alone function, [`list_remote_timelines`], to get list of timelines of a tenant.\n//!\n//! These functions use the low-level remote storage client, [`remote_storage::RemoteStorage`].\n//!\n//! # APIs & How To Use Them\n//!\n//! There is a [RemoteTimelineClient] for each [Timeline][`crate::tenant::Timeline`] in the system,\n//! unless the pageserver is configured without remote storage.\n//!\n//! We allocate the client instance in [Timeline][`crate::tenant::Timeline`], i.e.,\n//! either in [`crate::tenant::mgr`] during startup or when creating a new\n//! timeline.\n//! However, the client does not become ready for use until we've initialized its upload queue:\n//!\n//! - For timelines that already have some state on the remote storage, we use\n//!   [`RemoteTimelineClient::init_upload_queue`] .\n//! - For newly created timelines, we use\n//!   [`RemoteTimelineClient::init_upload_queue_for_empty_remote`].\n//!\n//! The former takes the remote's [`IndexPart`] as an argument, possibly retrieved\n//! using [`list_remote_timelines`]. We'll elaborate on [`IndexPart`] in the next section.\n//!\n//! Whenever we've created/updated/deleted a file in a timeline directory, we schedule\n//! the corresponding remote operation with the timeline's [`RemoteTimelineClient`]:\n//!\n//! - [`RemoteTimelineClient::schedule_layer_file_upload`]  when we've created a new layer file.\n//! - [`RemoteTimelineClient::schedule_index_upload_for_metadata_update`] when we've updated the timeline metadata file.\n//! - [`RemoteTimelineClient::schedule_index_upload_for_file_changes`] to upload an updated index file, after we've scheduled file uploads\n//! - [`RemoteTimelineClient::schedule_layer_file_deletion`] when we've deleted one or more layer files.\n//!\n//! Internally, these functions create [`UploadOp`]s and put them in a queue.\n//!\n//! There are also APIs for downloading files.\n//! These are not part of the aforementioned queuing and will not be discussed\n//! further here, except in the section covering tenant attach.\n//!\n//! # Remote Storage Structure & [`IndexPart`] Index File\n//!\n//! The \"directory structure\" in the remote storage mirrors the local directory structure, with paths\n//! like `tenants/<tenant_id>/timelines/<timeline_id>/<layer filename>`.\n//! Yet instead of keeping the `metadata` file remotely, we wrap it with more\n//! data in an \"index file\" aka [`IndexPart`], containing the list of **all** remote\n//! files for a given timeline.\n//! If a file is not referenced from [`IndexPart`], it's not part of the remote storage state.\n//!\n//! Having the `IndexPart` also avoids expensive and slow `S3 list` commands.\n//!\n//! # Consistency\n//!\n//! To have a consistent remote structure, it's important that uploads and\n//! deletions are performed in the right order. For example, the index file\n//! contains a list of layer files, so it must not be uploaded until all the\n//! layer files that are in its list have been successfully uploaded.\n//!\n//! The contract between client and its user is that the user is responsible of\n//! scheduling operations in an order that keeps the remote consistent as\n//! described above.\n//!\n//! From the user's perspective, the operations are executed sequentially.\n//! Internally, the client knows which operations can be performed in parallel,\n//! and which operations act like a \"barrier\" that require preceding operations\n//! to finish. The calling code just needs to call the schedule-functions in the\n//! correct order, and the client will parallelize the operations in a way that\n//! is safe. For more details, see `UploadOp::can_bypass`.\n//!\n//! All of this relies on the following invariants:\n//!\n//! - We rely on read-after write consistency in the remote storage.\n//! - Layer files are immutable.\n//!\n//! NB: Pageserver assumes that it has exclusive write access to the tenant in remote\n//! storage. Different tenants can be attached to different pageservers, but if the\n//! same tenant is attached to two pageservers at the same time, they will overwrite\n//! each other's index file updates, and confusion will ensue. There's no interlock or\n//! mechanism to detect that in the pageserver, we rely on the control plane to ensure\n//! that that doesn't happen.\n//!\n//! ## Implementation Note\n//!\n//! The *actual* remote state lags behind the *desired* remote state while\n//! there are in-flight operations.\n//! We keep track of the desired remote state in [`UploadQueueInitialized::dirty`].\n//! It is initialized based on the [`IndexPart`] that was passed during init\n//! and updated with every `schedule_*` function call.\n//! All this is necessary necessary to compute the future [`IndexPart`]s\n//! when scheduling an operation while other operations that also affect the\n//! remote [`IndexPart`] are in flight.\n//!\n//! # Retries & Error Handling\n//!\n//! The client retries operations indefinitely, using exponential back-off.\n//! There is no way to force a retry, i.e., interrupt the back-off.\n//! This could be built easily.\n//!\n//! # Cancellation\n//!\n//! The operations execute as plain [`task_mgr`] tasks, scoped to\n//! the client's tenant and timeline.\n//! Dropping the client will drop queued operations but not executing operations.\n//! These will complete unless the `task_mgr` tasks are cancelled using `task_mgr`\n//! APIs, e.g., during pageserver shutdown, timeline delete, or tenant detach.\n//!\n//! # Completion\n//!\n//! Once an operation has completed, we update [`UploadQueueInitialized::clean`] immediately,\n//! and submit a request through the DeletionQueue to update\n//! [`UploadQueueInitialized::visible_remote_consistent_lsn`] after it has\n//! validated that our generation is not stale.  It is this visible value\n//! that is advertized to safekeepers as a signal that that they can\n//! delete the WAL up to that LSN.\n//!\n//! The [`RemoteTimelineClient::wait_completion`] method can be used to wait\n//! for all pending operations to complete. It does not prevent more\n//! operations from getting scheduled.\n//!\n//! # Crash Consistency\n//!\n//! We do not persist the upload queue state.\n//! If we drop the client, or crash, all unfinished operations are lost.\n//!\n//! To recover, the following steps need to be taken:\n//! - Retrieve the current remote [`IndexPart`]. This gives us a\n//!   consistent remote state, assuming the user scheduled the operations in\n//!   the correct order.\n//! - Initiate upload queue with that [`IndexPart`].\n//! - Reschedule all lost operations by comparing the local filesystem state\n//!   and remote state as per [`IndexPart`]. This is done in\n//!   [`TenantShard::timeline_init_and_sync`].\n//!\n//! Note that if we crash during file deletion between the index update\n//! that removes the file from the list of files, and deleting the remote file,\n//! the file is leaked in the remote storage. Similarly, if a new file is created\n//! and uploaded, but the pageserver dies permanently before updating the\n//! remote index file, the new file is leaked in remote storage. We accept and\n//! tolerate that for now.\n//! Note further that we cannot easily fix this by scheduling deletes for every\n//! file that is present only on the remote, because we cannot distinguish the\n//! following two cases:\n//! - (1) We had the file locally, deleted it locally, scheduled a remote delete,\n//!   but crashed before it finished remotely.\n//! - (2) We never had the file locally because we haven't on-demand downloaded\n//!   it yet.\n//!\n//! # Downloads\n//!\n//! In addition to the upload queue, [`RemoteTimelineClient`] has functions for\n//! downloading files from the remote storage. Downloads are performed immediately\n//! against the `RemoteStorage`, independently of the upload queue.\n//!\n//! When we attach a tenant, we perform the following steps:\n//! - create `Tenant` object in `TenantState::Attaching` state\n//! - List timelines that are present in remote storage, and for each:\n//!   - download their remote [`IndexPart`]s\n//!   - create `Timeline` struct and a `RemoteTimelineClient`\n//!   - initialize the client's upload queue with its `IndexPart`\n//!   - schedule uploads for layers that are only present locally.\n//! - After the above is done for each timeline, open the tenant for business by\n//!   transitioning it from `TenantState::Attaching` to `TenantState::Active` state.\n//!   This starts the timelines' WAL-receivers and the tenant's GC & Compaction loops.\n//!\n//! # Operating Without Remote Storage\n//!\n//! If no remote storage configuration is provided, the [`RemoteTimelineClient`] is\n//! not created and the uploads are skipped.\n//!\n//! [`TenantShard::timeline_init_and_sync`]: super::TenantShard::timeline_init_and_sync\n//! [`Timeline::load_layer_map`]: super::Timeline::load_layer_map\n\npub(crate) mod download;\npub mod index;\npub mod manifest;\npub(crate) mod upload;\n\nuse std::collections::{HashMap, HashSet, VecDeque};\nuse std::ops::DerefMut;\nuse std::sync::atomic::{AtomicU32, Ordering};\nuse std::sync::{Arc, Mutex, OnceLock};\nuse std::time::Duration;\n\nuse anyhow::Context;\nuse camino::Utf8Path;\nuse chrono::{NaiveDateTime, Utc};\npub(crate) use download::{\n    download_index_part, download_initdb_tar_zst, download_tenant_manifest, is_temp_download_file,\n    list_remote_tenant_shards, list_remote_timelines,\n};\nuse index::GcCompactionState;\npub(crate) use index::LayerFileMetadata;\nuse pageserver_api::models::{RelSizeMigration, TimelineArchivalState, TimelineVisibilityState};\nuse pageserver_api::shard::{ShardIndex, TenantShardId};\nuse regex::Regex;\nuse remote_storage::{\n    DownloadError, GenericRemoteStorage, ListingMode, RemotePath, TimeoutOrCancel,\n};\nuse scopeguard::ScopeGuard;\nuse tokio_util::sync::CancellationToken;\nuse tracing::{Instrument, debug, error, info, info_span, instrument, warn};\npub(crate) use upload::upload_initdb_dir;\nuse utils::backoff::{\n    self, DEFAULT_BASE_BACKOFF_SECONDS, DEFAULT_MAX_BACKOFF_SECONDS, exponential_backoff,\n};\nuse utils::id::{TenantId, TimelineId};\nuse utils::lsn::Lsn;\nuse utils::pausable_failpoint;\nuse utils::shard::ShardNumber;\n\nuse self::index::IndexPart;\nuse super::config::AttachedLocationConfig;\nuse super::metadata::MetadataUpdate;\nuse super::storage_layer::{Layer, LayerName, ResidentLayer};\nuse super::timeline::import_pgdata;\nuse super::upload_queue::{NotInitialized, SetDeletedFlagProgress};\nuse super::{DeleteTimelineError, Generation};\nuse crate::config::PageServerConf;\nuse crate::context::RequestContext;\nuse crate::deletion_queue::{DeletionQueueClient, DeletionQueueError};\nuse crate::metrics::{\n    MeasureRemoteOp, REMOTE_ONDEMAND_DOWNLOADED_BYTES, REMOTE_ONDEMAND_DOWNLOADED_LAYERS,\n    RemoteOpFileKind, RemoteOpKind, RemoteTimelineClientMetrics,\n    RemoteTimelineClientMetricsCallTrackSize,\n};\nuse crate::task_mgr::{BACKGROUND_RUNTIME, TaskKind, shutdown_token};\nuse crate::tenant::metadata::TimelineMetadata;\nuse crate::tenant::remote_timeline_client::download::download_retry;\nuse crate::tenant::storage_layer::AsLayerDesc;\nuse crate::tenant::upload_queue::{\n    Delete, OpType, UploadOp, UploadQueue, UploadQueueInitialized, UploadQueueStopped,\n    UploadQueueStoppedDeletable, UploadTask,\n};\nuse crate::tenant::{TIMELINES_SEGMENT_NAME, debug_assert_current_span_has_tenant_and_timeline_id};\nuse crate::{TENANT_HEATMAP_BASENAME, task_mgr};\n\n// Occasional network issues and such can cause remote operations to fail, and\n// that's expected. If a download fails, we log it at info-level, and retry.\n// But after FAILED_DOWNLOAD_WARN_THRESHOLD retries, we start to log it at WARN\n// level instead, as repeated failures can mean a more serious problem. If it\n// fails more than FAILED_DOWNLOAD_RETRIES times, we give up\npub(crate) const FAILED_DOWNLOAD_WARN_THRESHOLD: u32 = 3;\npub(crate) const FAILED_REMOTE_OP_RETRIES: u32 = 10;\n\n// Similarly log failed uploads and deletions at WARN level, after this many\n// retries. Uploads and deletions are retried forever, though.\npub(crate) const FAILED_UPLOAD_WARN_THRESHOLD: u32 = 3;\n\npub(crate) const INITDB_PATH: &str = \"initdb.tar.zst\";\n\npub(crate) const INITDB_PRESERVED_PATH: &str = \"initdb-preserved.tar.zst\";\n\n/// Default buffer size when interfacing with [`tokio::fs::File`].\npub(crate) const BUFFER_SIZE: usize = 32 * 1024;\n\n/// Doing non-essential flushes of deletion queue is subject to this timeout, after\n/// which we warn and skip.\nconst DELETION_QUEUE_FLUSH_TIMEOUT: Duration = Duration::from_secs(10);\n\npub enum MaybeDeletedIndexPart {\n    IndexPart(IndexPart),\n    Deleted(IndexPart),\n}\n\n#[derive(Debug, thiserror::Error)]\npub enum PersistIndexPartWithDeletedFlagError {\n    #[error(\"another task is already setting the deleted_flag, started at {0:?}\")]\n    AlreadyInProgress(NaiveDateTime),\n    #[error(\"the deleted_flag was already set, value is {0:?}\")]\n    AlreadyDeleted(NaiveDateTime),\n    #[error(transparent)]\n    Other(#[from] anyhow::Error),\n}\n\n#[derive(Debug, thiserror::Error)]\npub enum WaitCompletionError {\n    #[error(transparent)]\n    NotInitialized(NotInitialized),\n    #[error(\"wait_completion aborted because upload queue was stopped\")]\n    UploadQueueShutDownOrStopped,\n}\n\n#[derive(Debug, thiserror::Error)]\n#[error(\"Upload queue either in unexpected state or hasn't downloaded manifest yet\")]\npub struct UploadQueueNotReadyError;\n\n#[derive(Debug, thiserror::Error)]\npub enum ShutdownIfArchivedError {\n    #[error(transparent)]\n    NotInitialized(NotInitialized),\n    #[error(\"timeline is not archived\")]\n    NotArchived,\n}\n\n/// Behavioral modes that enable seamless live migration.\n///\n/// See docs/rfcs/028-pageserver-migration.md to understand how these fit in.\nstruct RemoteTimelineClientConfig {\n    /// If this is false, then update to remote_consistent_lsn are dropped rather\n    /// than being submitted to DeletionQueue for validation.  This behavior is\n    /// used when a tenant attachment is known to have a stale generation number,\n    /// such that validation attempts will always fail.  This is not necessary\n    /// for correctness, but avoids spamming error statistics with failed validations\n    /// when doing migrations of tenants.\n    process_remote_consistent_lsn_updates: bool,\n\n    /// If this is true, then object deletions are held in a buffer in RemoteTimelineClient\n    /// rather than being submitted to the DeletionQueue.  This behavior is used when a tenant\n    /// is known to be multi-attached, in order to avoid disrupting other attached tenants\n    /// whose generations' metadata refers to the deleted objects.\n    block_deletions: bool,\n}\n\n/// RemoteTimelineClientConfig's state is entirely driven by LocationConf, but we do\n/// not carry the entire LocationConf structure: it's much more than we need.  The From\n/// impl extracts the subset of the LocationConf that is interesting to RemoteTimelineClient.\nimpl From<&AttachedLocationConfig> for RemoteTimelineClientConfig {\n    fn from(lc: &AttachedLocationConfig) -> Self {\n        Self {\n            block_deletions: !lc.may_delete_layers_hint(),\n            process_remote_consistent_lsn_updates: lc.may_upload_layers_hint(),\n        }\n    }\n}\n\n/// A client for accessing a timeline's data in remote storage.\n///\n/// This takes care of managing the number of connections, and balancing them\n/// across tenants. This also handles retries of failed uploads.\n///\n/// Upload and delete requests are ordered so that before a deletion is\n/// performed, we wait for all preceding uploads to finish. This ensures sure\n/// that if you perform a compaction operation that reshuffles data in layer\n/// files, we don't have a transient state where the old files have already been\n/// deleted, but new files have not yet been uploaded.\n///\n/// Similarly, this enforces an order between index-file uploads, and layer\n/// uploads.  Before an index-file upload is performed, all preceding layer\n/// uploads must be finished.\n///\n/// This also maintains a list of remote files, and automatically includes that\n/// in the index part file, whenever timeline metadata is uploaded.\n///\n/// Downloads are not queued, they are performed immediately.\npub(crate) struct RemoteTimelineClient {\n    conf: &'static PageServerConf,\n\n    runtime: tokio::runtime::Handle,\n\n    tenant_shard_id: TenantShardId,\n    timeline_id: TimelineId,\n    generation: Generation,\n\n    upload_queue: Mutex<UploadQueue>,\n\n    pub(crate) metrics: Arc<RemoteTimelineClientMetrics>,\n\n    storage_impl: GenericRemoteStorage,\n\n    deletion_queue_client: DeletionQueueClient,\n\n    /// Subset of tenant configuration used to control upload behaviors during migrations\n    config: std::sync::RwLock<RemoteTimelineClientConfig>,\n\n    cancel: CancellationToken,\n}\n\nimpl Drop for RemoteTimelineClient {\n    fn drop(&mut self) {\n        debug!(\"dropping RemoteTimelineClient\");\n    }\n}\n\nimpl RemoteTimelineClient {\n    ///\n    /// Create a remote storage client for given timeline\n    ///\n    /// Note: the caller must initialize the upload queue before any uploads can be scheduled,\n    /// by calling init_upload_queue.\n    ///\n    pub(crate) fn new(\n        remote_storage: GenericRemoteStorage,\n        deletion_queue_client: DeletionQueueClient,\n        conf: &'static PageServerConf,\n        tenant_shard_id: TenantShardId,\n        timeline_id: TimelineId,\n        generation: Generation,\n        location_conf: &AttachedLocationConfig,\n    ) -> RemoteTimelineClient {\n        RemoteTimelineClient {\n            conf,\n            runtime: if cfg!(test) {\n                // remote_timeline_client.rs tests rely on current-thread runtime\n                tokio::runtime::Handle::current()\n            } else {\n                BACKGROUND_RUNTIME.handle().clone()\n            },\n            tenant_shard_id,\n            timeline_id,\n            generation,\n            storage_impl: remote_storage,\n            deletion_queue_client,\n            upload_queue: Mutex::new(UploadQueue::Uninitialized),\n            metrics: Arc::new(RemoteTimelineClientMetrics::new(\n                &tenant_shard_id,\n                &timeline_id,\n            )),\n            config: std::sync::RwLock::new(RemoteTimelineClientConfig::from(location_conf)),\n            cancel: CancellationToken::new(),\n        }\n    }\n\n    /// Initialize the upload queue for a remote storage that already received\n    /// an index file upload, i.e., it's not empty.\n    /// The given `index_part` must be the one on the remote.\n    pub fn init_upload_queue(&self, index_part: &IndexPart) -> anyhow::Result<()> {\n        // Set the maximum number of inprogress tasks to the remote storage concurrency. There's\n        // certainly no point in starting more upload tasks than this.\n        let inprogress_limit = self\n            .conf\n            .remote_storage_config\n            .as_ref()\n            .map_or(0, |r| r.concurrency_limit());\n        let mut upload_queue = self.upload_queue.lock().unwrap();\n        upload_queue.initialize_with_current_remote_index_part(index_part, inprogress_limit)?;\n        self.update_remote_physical_size_gauge(Some(index_part));\n        info!(\n            \"initialized upload queue from remote index with {} layer files\",\n            index_part.layer_metadata.len()\n        );\n        Ok(())\n    }\n\n    /// Initialize the upload queue for the case where the remote storage is empty,\n    /// i.e., it doesn't have an `IndexPart`.\n    ///\n    /// `rel_size_v2_status` needs to be carried over during branching, and that's why\n    /// it's passed in here.\n    pub fn init_upload_queue_for_empty_remote(\n        &self,\n        local_metadata: &TimelineMetadata,\n        rel_size_v2_migration: Option<RelSizeMigration>,\n        rel_size_migrated_at: Option<Lsn>,\n    ) -> anyhow::Result<()> {\n        // Set the maximum number of inprogress tasks to the remote storage concurrency. There's\n        // certainly no point in starting more upload tasks than this.\n        let inprogress_limit = self\n            .conf\n            .remote_storage_config\n            .as_ref()\n            .map_or(0, |r| r.concurrency_limit());\n        let mut upload_queue = self.upload_queue.lock().unwrap();\n        let initialized_queue =\n            upload_queue.initialize_empty_remote(local_metadata, inprogress_limit)?;\n        initialized_queue.dirty.rel_size_migration = rel_size_v2_migration;\n        initialized_queue.dirty.rel_size_migrated_at = rel_size_migrated_at;\n        self.update_remote_physical_size_gauge(None);\n        info!(\"initialized upload queue as empty\");\n        Ok(())\n    }\n\n    /// Initialize the queue in stopped state. Used in startup path\n    /// to continue deletion operation interrupted by pageserver crash or restart.\n    pub fn init_upload_queue_stopped_to_continue_deletion(\n        &self,\n        index_part: &IndexPart,\n    ) -> anyhow::Result<()> {\n        // FIXME: consider newtype for DeletedIndexPart.\n        let deleted_at = index_part.deleted_at.ok_or(anyhow::anyhow!(\n            \"bug: it is responsibility of the caller to provide index part from MaybeDeletedIndexPart::Deleted\"\n        ))?;\n        let inprogress_limit = self\n            .conf\n            .remote_storage_config\n            .as_ref()\n            .map_or(0, |r| r.concurrency_limit());\n\n        let mut upload_queue = self.upload_queue.lock().unwrap();\n        upload_queue.initialize_with_current_remote_index_part(index_part, inprogress_limit)?;\n        self.update_remote_physical_size_gauge(Some(index_part));\n        self.stop_impl(&mut upload_queue);\n\n        upload_queue\n            .stopped_mut()\n            .expect(\"stopped above\")\n            .deleted_at = SetDeletedFlagProgress::Successful(deleted_at);\n\n        Ok(())\n    }\n\n    /// Notify this client of a change to its parent tenant's config, as this may cause us to\n    /// take action (unblocking deletions when transitioning from AttachedMulti to AttachedSingle)\n    pub(super) fn update_config(&self, location_conf: &AttachedLocationConfig) {\n        let new_conf = RemoteTimelineClientConfig::from(location_conf);\n        let unblocked = !new_conf.block_deletions;\n\n        // Update config before draining deletions, so that we don't race with more being\n        // inserted.  This can result in deletions happening our of order, but that does not\n        // violate any invariants: deletions only need to be ordered relative to upload of the index\n        // that dereferences the deleted objects, and we are not changing that order.\n        *self.config.write().unwrap() = new_conf;\n\n        if unblocked {\n            // If we may now delete layers, drain any that were blocked in our old\n            // configuration state\n            let mut queue_locked = self.upload_queue.lock().unwrap();\n\n            if let Ok(queue) = queue_locked.initialized_mut() {\n                let blocked_deletions = std::mem::take(&mut queue.blocked_deletions);\n                for d in blocked_deletions {\n                    if let Err(e) = self.deletion_queue_client.push_layers(\n                        self.tenant_shard_id,\n                        self.timeline_id,\n                        self.generation,\n                        d.layers,\n                    ) {\n                        // This could happen if the pageserver is shut down while a tenant\n                        // is transitioning from a deletion-blocked state: we will leak some\n                        // S3 objects in this case.\n                        warn!(\"Failed to drain blocked deletions: {}\", e);\n                        break;\n                    }\n                }\n            }\n        }\n    }\n\n    /// Returns `None` if nothing is yet uplodaded, `Some(disk_consistent_lsn)` otherwise.\n    pub fn remote_consistent_lsn_projected(&self) -> Option<Lsn> {\n        match &mut *self.upload_queue.lock().unwrap() {\n            UploadQueue::Uninitialized => None,\n            UploadQueue::Initialized(q) => q.get_last_remote_consistent_lsn_projected(),\n            UploadQueue::Stopped(UploadQueueStopped::Uninitialized) => None,\n            UploadQueue::Stopped(UploadQueueStopped::Deletable(q)) => q\n                .upload_queue_for_deletion\n                .get_last_remote_consistent_lsn_projected(),\n        }\n    }\n\n    pub fn remote_consistent_lsn_visible(&self) -> Option<Lsn> {\n        match &mut *self.upload_queue.lock().unwrap() {\n            UploadQueue::Uninitialized => None,\n            UploadQueue::Initialized(q) => Some(q.get_last_remote_consistent_lsn_visible()),\n            UploadQueue::Stopped(UploadQueueStopped::Uninitialized) => None,\n            UploadQueue::Stopped(UploadQueueStopped::Deletable(q)) => Some(\n                q.upload_queue_for_deletion\n                    .get_last_remote_consistent_lsn_visible(),\n            ),\n        }\n    }\n\n    /// Returns true if this timeline was previously detached at this Lsn and the remote timeline\n    /// client is currently initialized.\n    pub(crate) fn is_previous_ancestor_lsn(&self, lsn: Lsn) -> bool {\n        self.upload_queue\n            .lock()\n            .unwrap()\n            .initialized_mut()\n            .map(|uq| uq.clean.0.lineage.is_previous_ancestor_lsn(lsn))\n            .unwrap_or(false)\n    }\n\n    /// Returns whether the timeline is archived.\n    /// Return None if the remote index_part hasn't been downloaded yet.\n    pub(crate) fn is_archived(&self) -> Option<bool> {\n        self.upload_queue\n            .lock()\n            .unwrap()\n            .initialized_mut()\n            .map(|q| q.clean.0.archived_at.is_some())\n            .ok()\n    }\n\n    /// Returns true if the timeline is invisible in synthetic size calculations.\n    pub(crate) fn is_invisible(&self) -> Option<bool> {\n        self.upload_queue\n            .lock()\n            .unwrap()\n            .initialized_mut()\n            .map(|q| q.clean.0.marked_invisible_at.is_some())\n            .ok()\n    }\n\n    /// Returns `Ok(Some(timestamp))` if the timeline has been archived, `Ok(None)` if the timeline hasn't been archived.\n    ///\n    /// Return Err(_) if the remote index_part hasn't been downloaded yet, or the timeline hasn't been stopped yet.\n    pub(crate) fn archived_at_stopped_queue(\n        &self,\n    ) -> Result<Option<NaiveDateTime>, UploadQueueNotReadyError> {\n        self.upload_queue\n            .lock()\n            .unwrap()\n            .stopped_mut()\n            .map(|q| q.upload_queue_for_deletion.clean.0.archived_at)\n            .map_err(|_| UploadQueueNotReadyError)\n    }\n\n    fn update_remote_physical_size_gauge(&self, current_remote_index_part: Option<&IndexPart>) {\n        let size: u64 = if let Some(current_remote_index_part) = current_remote_index_part {\n            current_remote_index_part\n                .layer_metadata\n                .values()\n                .map(|ilmd| ilmd.file_size)\n                .sum()\n        } else {\n            0\n        };\n        self.metrics.remote_physical_size_gauge.set(size);\n    }\n\n    pub fn get_remote_physical_size(&self) -> u64 {\n        self.metrics.remote_physical_size_gauge.get()\n    }\n\n    //\n    // Download operations.\n    //\n    // These don't use the per-timeline queue. They do use the global semaphore in\n    // S3Bucket, to limit the total number of concurrent operations, though.\n    //\n\n    /// Download index file\n    pub async fn download_index_file(\n        &self,\n        cancel: &CancellationToken,\n    ) -> Result<MaybeDeletedIndexPart, DownloadError> {\n        let _unfinished_gauge_guard = self.metrics.call_begin(\n            &RemoteOpFileKind::Index,\n            &RemoteOpKind::Download,\n            crate::metrics::RemoteTimelineClientMetricsCallTrackSize::DontTrackSize {\n                reason: \"no need for a downloads gauge\",\n            },\n        );\n\n        let (index_part, index_generation, index_last_modified) = download::download_index_part(\n            &self.storage_impl,\n            &self.tenant_shard_id,\n            &self.timeline_id,\n            self.generation,\n            cancel,\n        )\n        .measure_remote_op(\n            Option::<TaskKind>::None,\n            RemoteOpFileKind::Index,\n            RemoteOpKind::Download,\n            Arc::clone(&self.metrics),\n        )\n        .await?;\n\n        // Defense in depth: monotonicity of generation numbers is an important correctness guarantee, so when we see a very\n        // old index, we do extra checks in case this is the result of backward time-travel of the generation number (e.g.\n        // in case of a bug in the service that issues generation numbers). Indices are allowed to be old, but we expect that\n        // when we load an old index we are loading the _latest_ index: if we are asked to load an old index and there is\n        // also a newer index available, that is surprising.\n        const INDEX_AGE_CHECKS_THRESHOLD: Duration = Duration::from_secs(14 * 24 * 3600);\n        let index_age = index_last_modified.elapsed().unwrap_or_else(|e| {\n            if e.duration() > Duration::from_secs(5) {\n                // We only warn if the S3 clock and our local clock are >5s out: because this is a low resolution\n                // timestamp, it is common to be out by at least 1 second.\n                tracing::warn!(\"Index has modification time in the future: {e}\");\n            }\n            Duration::ZERO\n        });\n        if index_age > INDEX_AGE_CHECKS_THRESHOLD {\n            tracing::info!(\n                ?index_generation,\n                age = index_age.as_secs_f64(),\n                \"Loaded an old index, checking for other indices...\"\n            );\n\n            // Find the highest-generation index\n            let (_latest_index_part, latest_index_generation, latest_index_mtime) =\n                download::download_index_part(\n                    &self.storage_impl,\n                    &self.tenant_shard_id,\n                    &self.timeline_id,\n                    Generation::MAX,\n                    cancel,\n                )\n                .await?;\n\n            if latest_index_generation > index_generation {\n                // Unexpected!  Why are we loading such an old index if a more recent one exists?\n                // We will refuse to proceed, as there is no reasonable scenario where this should happen, but\n                // there _is_ a clear bug/corruption scenario where it would happen (controller sets the generation\n                // backwards).\n                tracing::error!(\n                    ?index_generation,\n                    ?latest_index_generation,\n                    ?latest_index_mtime,\n                    \"Found a newer index while loading an old one\"\n                );\n                return Err(DownloadError::Fatal(\n                    \"Index age exceeds threshold and a newer index exists\".into(),\n                ));\n            }\n        }\n\n        if index_part.deleted_at.is_some() {\n            Ok(MaybeDeletedIndexPart::Deleted(index_part))\n        } else {\n            Ok(MaybeDeletedIndexPart::IndexPart(index_part))\n        }\n    }\n\n    /// Download a (layer) file from `path`, into local filesystem.\n    ///\n    /// 'layer_metadata' is the metadata from the remote index file.\n    ///\n    /// On success, returns the size of the downloaded file.\n    pub async fn download_layer_file(\n        &self,\n        layer_file_name: &LayerName,\n        layer_metadata: &LayerFileMetadata,\n        local_path: &Utf8Path,\n        gate: &utils::sync::gate::Gate,\n        cancel: &CancellationToken,\n        ctx: &RequestContext,\n    ) -> Result<u64, DownloadError> {\n        let downloaded_size = {\n            let _unfinished_gauge_guard = self.metrics.call_begin(\n                &RemoteOpFileKind::Layer,\n                &RemoteOpKind::Download,\n                crate::metrics::RemoteTimelineClientMetricsCallTrackSize::DontTrackSize {\n                    reason: \"no need for a downloads gauge\",\n                },\n            );\n            download::download_layer_file(\n                self.conf,\n                &self.storage_impl,\n                self.tenant_shard_id,\n                self.timeline_id,\n                layer_file_name,\n                layer_metadata,\n                local_path,\n                gate,\n                cancel,\n                ctx,\n            )\n            .measure_remote_op(\n                Some(ctx.task_kind()),\n                RemoteOpFileKind::Layer,\n                RemoteOpKind::Download,\n                Arc::clone(&self.metrics),\n            )\n            .await?\n        };\n\n        REMOTE_ONDEMAND_DOWNLOADED_LAYERS.inc();\n        REMOTE_ONDEMAND_DOWNLOADED_BYTES.inc_by(downloaded_size);\n\n        Ok(downloaded_size)\n    }\n\n    //\n    // Upload operations.\n    //\n\n    /// Launch an index-file upload operation in the background, with\n    /// fully updated metadata.\n    ///\n    /// This should only be used to upload initial metadata to remote storage.\n    ///\n    /// The upload will be added to the queue immediately, but it\n    /// won't be performed until all previously scheduled layer file\n    /// upload operations have completed successfully.  This is to\n    /// ensure that when the index file claims that layers X, Y and Z\n    /// exist in remote storage, they really do. To wait for the upload\n    /// to complete, use `wait_completion`.\n    ///\n    /// If there were any changes to the list of files, i.e. if any\n    /// layer file uploads were scheduled, since the last index file\n    /// upload, those will be included too.\n    pub fn schedule_index_upload_for_full_metadata_update(\n        self: &Arc<Self>,\n        metadata: &TimelineMetadata,\n    ) -> anyhow::Result<()> {\n        let mut guard = self.upload_queue.lock().unwrap();\n        let upload_queue = guard.initialized_mut()?;\n\n        // As documented in the struct definition, it's ok for latest_metadata to be\n        // ahead of what's _actually_ on the remote during index upload.\n        upload_queue.dirty.metadata = metadata.clone();\n\n        self.schedule_index_upload(upload_queue);\n\n        Ok(())\n    }\n\n    /// Launch an index-file upload operation in the background, with only parts of the metadata\n    /// updated.\n    ///\n    /// This is the regular way of updating metadata on layer flushes or Gc.\n    ///\n    /// Using this lighter update mechanism allows for reparenting and detaching without changes to\n    /// `index_part.json`, while being more clear on what values update regularly.\n    pub(crate) fn schedule_index_upload_for_metadata_update(\n        self: &Arc<Self>,\n        update: &MetadataUpdate,\n    ) -> anyhow::Result<()> {\n        let mut guard = self.upload_queue.lock().unwrap();\n        let upload_queue = guard.initialized_mut()?;\n\n        upload_queue.dirty.metadata.apply(update);\n\n        // Defense in depth: if we somehow generated invalid metadata, do not persist it.\n        upload_queue\n            .dirty\n            .validate()\n            .map_err(|e| anyhow::anyhow!(e))?;\n\n        self.schedule_index_upload(upload_queue);\n\n        Ok(())\n    }\n\n    /// Launch an index-file upload operation in the background, with only the `archived_at` field updated.\n    ///\n    /// Returns whether it is required to wait for the queue to be empty to ensure that the change is uploaded,\n    /// so either if the change is already sitting in the queue, but not commited yet, or the change has not\n    /// been in the queue yet.\n    pub(crate) fn schedule_index_upload_for_timeline_archival_state(\n        self: &Arc<Self>,\n        state: TimelineArchivalState,\n    ) -> anyhow::Result<bool> {\n        let mut guard = self.upload_queue.lock().unwrap();\n        let upload_queue = guard.initialized_mut()?;\n\n        /// Returns Some(_) if a change is needed, and Some(true) if it's a\n        /// change needed to set archived_at.\n        fn need_change(\n            archived_at: &Option<NaiveDateTime>,\n            state: TimelineArchivalState,\n        ) -> Option<bool> {\n            match (archived_at, state) {\n                (Some(_), TimelineArchivalState::Archived)\n                | (None, TimelineArchivalState::Unarchived) => {\n                    // Nothing to do\n                    tracing::info!(\"intended state matches present state\");\n                    None\n                }\n                (None, TimelineArchivalState::Archived) => Some(true),\n                (Some(_), TimelineArchivalState::Unarchived) => Some(false),\n            }\n        }\n        let need_upload_scheduled = need_change(&upload_queue.dirty.archived_at, state);\n\n        if let Some(archived_at_set) = need_upload_scheduled {\n            let intended_archived_at = archived_at_set.then(|| Utc::now().naive_utc());\n            upload_queue.dirty.archived_at = intended_archived_at;\n            self.schedule_index_upload(upload_queue);\n        }\n\n        let need_wait = need_change(&upload_queue.clean.0.archived_at, state).is_some();\n        Ok(need_wait)\n    }\n\n    pub(crate) fn schedule_index_upload_for_timeline_invisible_state(\n        self: &Arc<Self>,\n        state: TimelineVisibilityState,\n    ) -> anyhow::Result<()> {\n        let mut guard = self.upload_queue.lock().unwrap();\n        let upload_queue = guard.initialized_mut()?;\n\n        fn need_change(\n            marked_invisible_at: &Option<NaiveDateTime>,\n            state: TimelineVisibilityState,\n        ) -> Option<bool> {\n            match (marked_invisible_at, state) {\n                (Some(_), TimelineVisibilityState::Invisible) => Some(false),\n                (None, TimelineVisibilityState::Invisible) => Some(true),\n                (Some(_), TimelineVisibilityState::Visible) => Some(false),\n                (None, TimelineVisibilityState::Visible) => Some(true),\n            }\n        }\n\n        let need_upload_scheduled = need_change(&upload_queue.dirty.marked_invisible_at, state);\n\n        if let Some(marked_invisible_at_set) = need_upload_scheduled {\n            let intended_marked_invisible_at =\n                marked_invisible_at_set.then(|| Utc::now().naive_utc());\n            upload_queue.dirty.marked_invisible_at = intended_marked_invisible_at;\n            self.schedule_index_upload(upload_queue);\n        }\n\n        Ok(())\n    }\n\n    /// Shuts the timeline client down, but only if the timeline is archived.\n    ///\n    /// This function and [`Self::schedule_index_upload_for_timeline_archival_state`] use the\n    /// same lock to prevent races between unarchival and offloading: unarchival requires the\n    /// upload queue to be initialized, and leaves behind an upload queue where either dirty\n    /// or clean has archived_at of `None`. offloading leaves behind an uninitialized upload\n    /// queue.\n    pub(crate) async fn shutdown_if_archived(\n        self: &Arc<Self>,\n    ) -> Result<(), ShutdownIfArchivedError> {\n        {\n            let mut guard = self.upload_queue.lock().unwrap();\n            let upload_queue = guard\n                .initialized_mut()\n                .map_err(ShutdownIfArchivedError::NotInitialized)?;\n\n            match (\n                upload_queue.dirty.archived_at.is_none(),\n                upload_queue.clean.0.archived_at.is_none(),\n            ) {\n                // The expected case: the timeline is archived and we don't want to unarchive\n                (false, false) => {}\n                (true, false) => {\n                    tracing::info!(\"can't shut down timeline: timeline slated for unarchival\");\n                    return Err(ShutdownIfArchivedError::NotArchived);\n                }\n                (dirty_archived, true) => {\n                    tracing::info!(%dirty_archived, \"can't shut down timeline: timeline not archived in remote storage\");\n                    return Err(ShutdownIfArchivedError::NotArchived);\n                }\n            }\n\n            // Set the shutting_down flag while the guard from the archival check is held.\n            // This prevents a race with unarchival, as initialized_mut will not return\n            // an upload queue from this point.\n            // Also launch the queued tasks like shutdown() does.\n            if !upload_queue.shutting_down {\n                upload_queue.shutting_down = true;\n                upload_queue.queued_operations.push_back(UploadOp::Shutdown);\n                // this operation is not counted similar to Barrier\n                self.launch_queued_tasks(upload_queue);\n            }\n        }\n\n        self.shutdown().await;\n\n        Ok(())\n    }\n\n    /// Launch an index-file upload operation in the background, setting `import_pgdata` field.\n    pub(crate) fn schedule_index_upload_for_import_pgdata_state_update(\n        self: &Arc<Self>,\n        state: Option<import_pgdata::index_part_format::Root>,\n    ) -> anyhow::Result<()> {\n        let mut guard = self.upload_queue.lock().unwrap();\n        let upload_queue = guard.initialized_mut()?;\n        upload_queue.dirty.import_pgdata = state;\n        self.schedule_index_upload(upload_queue);\n        Ok(())\n    }\n\n    /// If the `import_pgdata` field marks the timeline as having an import in progress,\n    /// launch an index-file upload operation that transitions it to done in the background\n    pub(crate) fn schedule_index_upload_for_import_pgdata_finalize(\n        self: &Arc<Self>,\n    ) -> anyhow::Result<()> {\n        use import_pgdata::index_part_format;\n\n        let mut guard = self.upload_queue.lock().unwrap();\n        let upload_queue = guard.initialized_mut()?;\n        let to_update = match &upload_queue.dirty.import_pgdata {\n            Some(import) if !import.is_done() => Some(import),\n            Some(_) | None => None,\n        };\n\n        if let Some(old) = to_update {\n            let new =\n                index_part_format::Root::V1(index_part_format::V1::Done(index_part_format::Done {\n                    idempotency_key: old.idempotency_key().clone(),\n                    started_at: *old.started_at(),\n                    finished_at: chrono::Utc::now().naive_utc(),\n                }));\n\n            upload_queue.dirty.import_pgdata = Some(new);\n            self.schedule_index_upload(upload_queue);\n        }\n\n        Ok(())\n    }\n\n    /// Launch an index-file upload operation in the background, setting `gc_compaction_state` field.\n    pub(crate) fn schedule_index_upload_for_gc_compaction_state_update(\n        self: &Arc<Self>,\n        gc_compaction_state: GcCompactionState,\n    ) -> anyhow::Result<()> {\n        let mut guard = self.upload_queue.lock().unwrap();\n        let upload_queue = guard.initialized_mut()?;\n        upload_queue.dirty.gc_compaction = Some(gc_compaction_state);\n        self.schedule_index_upload(upload_queue);\n        Ok(())\n    }\n\n    /// Launch an index-file upload operation in the background, setting `rel_size_v2_status` field.\n    pub(crate) fn schedule_index_upload_for_rel_size_v2_status_update(\n        self: &Arc<Self>,\n        rel_size_v2_status: RelSizeMigration,\n        rel_size_migrated_at: Option<Lsn>,\n    ) -> anyhow::Result<()> {\n        let mut guard = self.upload_queue.lock().unwrap();\n        let upload_queue = guard.initialized_mut()?;\n        upload_queue.dirty.rel_size_migration = Some(rel_size_v2_status);\n        upload_queue.dirty.rel_size_migrated_at = rel_size_migrated_at;\n        // TODO: allow this operation to bypass the validation check because we might upload the index part\n        // with no layers but the flag updated. For now, we just modify the index part in memory and the next\n        // upload will include the flag.\n        // self.schedule_index_upload(upload_queue);\n        Ok(())\n    }\n\n    ///\n    /// Launch an index-file upload operation in the background, if necessary.\n    ///\n    /// Use this function to schedule the update of the index file after\n    /// scheduling file uploads or deletions. If no file uploads or deletions\n    /// have been scheduled since the last index file upload, this does\n    /// nothing.\n    ///\n    /// Like schedule_index_upload_for_metadata_update(), this merely adds\n    /// the upload to the upload queue and returns quickly.\n    pub fn schedule_index_upload_for_file_changes(self: &Arc<Self>) -> Result<(), NotInitialized> {\n        let mut guard = self.upload_queue.lock().unwrap();\n        let upload_queue = guard.initialized_mut()?;\n\n        if upload_queue.latest_files_changes_since_metadata_upload_scheduled > 0 {\n            self.schedule_index_upload(upload_queue);\n        }\n\n        Ok(())\n    }\n\n    /// Only used in the `patch_index_part` HTTP API to force trigger an index upload.\n    pub fn force_schedule_index_upload(self: &Arc<Self>) -> Result<(), NotInitialized> {\n        let mut guard = self.upload_queue.lock().unwrap();\n        let upload_queue = guard.initialized_mut()?;\n        self.schedule_index_upload(upload_queue);\n        Ok(())\n    }\n\n    /// Launch an index-file upload operation in the background (internal function)\n    fn schedule_index_upload(self: &Arc<Self>, upload_queue: &mut UploadQueueInitialized) {\n        let disk_consistent_lsn = upload_queue.dirty.metadata.disk_consistent_lsn();\n        // fix up the duplicated field\n        upload_queue.dirty.disk_consistent_lsn = disk_consistent_lsn;\n\n        // make sure it serializes before doing it in perform_upload_task so that it doesn't\n        // look like a retryable error\n        let void = std::io::sink();\n        serde_json::to_writer(void, &upload_queue.dirty).expect(\"serialize index_part.json\");\n\n        let index_part = &upload_queue.dirty;\n\n        info!(\n            \"scheduling metadata upload up to consistent LSN {disk_consistent_lsn} with {} files ({} changed)\",\n            index_part.layer_metadata.len(),\n            upload_queue.latest_files_changes_since_metadata_upload_scheduled,\n        );\n\n        let op = UploadOp::UploadMetadata {\n            uploaded: Box::new(index_part.clone()),\n        };\n        self.metric_begin(&op);\n        upload_queue.queued_operations.push_back(op);\n        upload_queue.latest_files_changes_since_metadata_upload_scheduled = 0;\n\n        // Launch the task immediately, if possible\n        self.launch_queued_tasks(upload_queue);\n    }\n\n    /// Reparent this timeline to a new parent.\n    ///\n    /// A retryable step of timeline ancestor detach.\n    pub(crate) async fn schedule_reparenting_and_wait(\n        self: &Arc<Self>,\n        new_parent: &TimelineId,\n    ) -> anyhow::Result<()> {\n        let receiver = {\n            let mut guard = self.upload_queue.lock().unwrap();\n            let upload_queue = guard.initialized_mut()?;\n\n            let Some(prev) = upload_queue.dirty.metadata.ancestor_timeline() else {\n                return Err(anyhow::anyhow!(\n                    \"cannot reparent without a current ancestor\"\n                ));\n            };\n\n            let uploaded = &upload_queue.clean.0.metadata;\n\n            if uploaded.ancestor_timeline().is_none() && !uploaded.ancestor_lsn().is_valid() {\n                // nothing to do\n                None\n            } else {\n                upload_queue.dirty.metadata.reparent(new_parent);\n                upload_queue.dirty.lineage.record_previous_ancestor(&prev);\n\n                self.schedule_index_upload(upload_queue);\n\n                Some(self.schedule_barrier0(upload_queue))\n            }\n        };\n\n        if let Some(receiver) = receiver {\n            Self::wait_completion0(receiver).await?;\n        }\n        Ok(())\n    }\n\n    /// Schedules uploading a new version of `index_part.json` with the given layers added,\n    /// detaching from ancestor and waits for it to complete.\n    ///\n    /// This is used with `Timeline::detach_ancestor` functionality.\n    pub(crate) async fn schedule_adding_existing_layers_to_index_detach_and_wait(\n        self: &Arc<Self>,\n        layers: &[Layer],\n        adopted: (TimelineId, Lsn),\n    ) -> anyhow::Result<()> {\n        let barrier = {\n            let mut guard = self.upload_queue.lock().unwrap();\n            let upload_queue = guard.initialized_mut()?;\n\n            if upload_queue.clean.0.lineage.detached_previous_ancestor() == Some(adopted) {\n                None\n            } else {\n                upload_queue.dirty.metadata.detach_from_ancestor(&adopted);\n                upload_queue.dirty.lineage.record_detaching(&adopted);\n\n                for layer in layers {\n                    let prev = upload_queue\n                        .dirty\n                        .layer_metadata\n                        .insert(layer.layer_desc().layer_name(), layer.metadata());\n                    assert!(prev.is_none(), \"copied layer existed already {layer}\");\n                }\n\n                self.schedule_index_upload(upload_queue);\n\n                Some(self.schedule_barrier0(upload_queue))\n            }\n        };\n\n        if let Some(barrier) = barrier {\n            Self::wait_completion0(barrier).await?;\n        }\n        Ok(())\n    }\n\n    /// Adds a gc blocking reason for this timeline if one does not exist already.\n    ///\n    /// A retryable step of timeline detach ancestor.\n    ///\n    /// Returns a future which waits until the completion of the upload.\n    pub(crate) fn schedule_insert_gc_block_reason(\n        self: &Arc<Self>,\n        reason: index::GcBlockingReason,\n    ) -> Result<impl std::future::Future<Output = Result<(), WaitCompletionError>>, NotInitialized>\n    {\n        let maybe_barrier = {\n            let mut guard = self.upload_queue.lock().unwrap();\n            let upload_queue = guard.initialized_mut()?;\n\n            if let index::GcBlockingReason::DetachAncestor = reason {\n                if upload_queue.dirty.metadata.ancestor_timeline().is_none() {\n                    drop(guard);\n                    panic!(\"cannot start detach ancestor if there is nothing to detach from\");\n                }\n            }\n\n            let wanted = |x: Option<&index::GcBlocking>| x.is_some_and(|x| x.blocked_by(reason));\n\n            let current = upload_queue.dirty.gc_blocking.as_ref();\n            let uploaded = upload_queue.clean.0.gc_blocking.as_ref();\n\n            match (current, uploaded) {\n                (x, y) if wanted(x) && wanted(y) => None,\n                (x, y) if wanted(x) && !wanted(y) => Some(self.schedule_barrier0(upload_queue)),\n                // Usual case: !wanted(x) && !wanted(y)\n                //\n                // Unusual: !wanted(x) && wanted(y) which means we have two processes waiting to\n                // turn on and off some reason.\n                (x, y) => {\n                    if !wanted(x) && wanted(y) {\n                        // this could be avoided by having external in-memory synchronization, like\n                        // timeline detach ancestor\n                        warn!(\n                            ?reason,\n                            op = \"insert\",\n                            \"unexpected: two racing processes to enable and disable a gc blocking reason\"\n                        );\n                    }\n\n                    // at this point, the metadata must always show that there is a parent\n                    upload_queue.dirty.gc_blocking = current\n                        .map(|x| x.with_reason(reason))\n                        .or_else(|| Some(index::GcBlocking::started_now_for(reason)));\n                    self.schedule_index_upload(upload_queue);\n                    Some(self.schedule_barrier0(upload_queue))\n                }\n            }\n        };\n\n        Ok(async move {\n            if let Some(barrier) = maybe_barrier {\n                Self::wait_completion0(barrier).await?;\n            }\n            Ok(())\n        })\n    }\n\n    /// Removes a gc blocking reason for this timeline if one exists.\n    ///\n    /// A retryable step of timeline detach ancestor.\n    ///\n    /// Returns a future which waits until the completion of the upload.\n    pub(crate) fn schedule_remove_gc_block_reason(\n        self: &Arc<Self>,\n        reason: index::GcBlockingReason,\n    ) -> Result<impl std::future::Future<Output = Result<(), WaitCompletionError>>, NotInitialized>\n    {\n        let maybe_barrier = {\n            let mut guard = self.upload_queue.lock().unwrap();\n            let upload_queue = guard.initialized_mut()?;\n\n            if let index::GcBlockingReason::DetachAncestor = reason {\n                if !upload_queue.clean.0.lineage.is_detached_from_ancestor() {\n                    drop(guard);\n                    panic!(\"cannot complete timeline_ancestor_detach while not detached\");\n                }\n            }\n\n            let wanted = |x: Option<&index::GcBlocking>| {\n                x.is_none() || x.is_some_and(|b| !b.blocked_by(reason))\n            };\n\n            let current = upload_queue.dirty.gc_blocking.as_ref();\n            let uploaded = upload_queue.clean.0.gc_blocking.as_ref();\n\n            match (current, uploaded) {\n                (x, y) if wanted(x) && wanted(y) => None,\n                (x, y) if wanted(x) && !wanted(y) => Some(self.schedule_barrier0(upload_queue)),\n                (x, y) => {\n                    if !wanted(x) && wanted(y) {\n                        warn!(\n                            ?reason,\n                            op = \"remove\",\n                            \"unexpected: two racing processes to enable and disable a gc blocking reason (remove)\"\n                        );\n                    }\n\n                    upload_queue.dirty.gc_blocking =\n                        current.as_ref().and_then(|x| x.without_reason(reason));\n                    assert!(wanted(upload_queue.dirty.gc_blocking.as_ref()));\n                    self.schedule_index_upload(upload_queue);\n                    Some(self.schedule_barrier0(upload_queue))\n                }\n            }\n        };\n\n        Ok(async move {\n            if let Some(barrier) = maybe_barrier {\n                Self::wait_completion0(barrier).await?;\n            }\n            Ok(())\n        })\n    }\n\n    /// Launch an upload operation in the background; the file is added to be included in next\n    /// `index_part.json` upload.\n    pub(crate) fn schedule_layer_file_upload(\n        self: &Arc<Self>,\n        layer: ResidentLayer,\n    ) -> Result<(), NotInitialized> {\n        let mut guard = self.upload_queue.lock().unwrap();\n        let upload_queue = guard.initialized_mut()?;\n\n        self.schedule_layer_file_upload0(upload_queue, layer);\n        self.launch_queued_tasks(upload_queue);\n        Ok(())\n    }\n\n    fn schedule_layer_file_upload0(\n        self: &Arc<Self>,\n        upload_queue: &mut UploadQueueInitialized,\n        layer: ResidentLayer,\n    ) {\n        let metadata = layer.metadata();\n\n        upload_queue\n            .dirty\n            .layer_metadata\n            .insert(layer.layer_desc().layer_name(), metadata.clone());\n        upload_queue.latest_files_changes_since_metadata_upload_scheduled += 1;\n\n        info!(\n            gen=?metadata.generation,\n            shard=?metadata.shard,\n            \"scheduled layer file upload {layer}\",\n        );\n\n        let op = UploadOp::UploadLayer(layer, metadata, None);\n        self.metric_begin(&op);\n        upload_queue.queued_operations.push_back(op);\n    }\n\n    /// Launch a delete operation in the background.\n    ///\n    /// The operation does not modify local filesystem state.\n    ///\n    /// Note: This schedules an index file upload before the deletions.  The\n    /// deletion won't actually be performed, until all previously scheduled\n    /// upload operations, and the index file upload, have completed\n    /// successfully.\n    pub fn schedule_layer_file_deletion(\n        self: &Arc<Self>,\n        names: &[LayerName],\n    ) -> anyhow::Result<()> {\n        let mut guard = self.upload_queue.lock().unwrap();\n        let upload_queue = guard.initialized_mut()?;\n\n        let with_metadata =\n            self.schedule_unlinking_of_layers_from_index_part0(upload_queue, names.iter().cloned());\n\n        self.schedule_deletion_of_unlinked0(upload_queue, with_metadata);\n\n        // Launch the tasks immediately, if possible\n        self.launch_queued_tasks(upload_queue);\n        Ok(())\n    }\n\n    /// Unlinks the layer files from `index_part.json` but does not yet schedule deletion for the\n    /// layer files, leaving them dangling.\n    ///\n    /// The files will be leaked in remote storage unless [`Self::schedule_deletion_of_unlinked`]\n    /// is invoked on them.\n    pub(crate) fn schedule_gc_update(\n        self: &Arc<Self>,\n        gc_layers: &[Layer],\n    ) -> Result<(), NotInitialized> {\n        let mut guard = self.upload_queue.lock().unwrap();\n        let upload_queue = guard.initialized_mut()?;\n\n        // just forget the return value; after uploading the next index_part.json, we can consider\n        // the layer files as \"dangling\". this is fine, at worst case we create work for the\n        // scrubber.\n\n        let names = gc_layers.iter().map(|x| x.layer_desc().layer_name());\n\n        self.schedule_unlinking_of_layers_from_index_part0(upload_queue, names);\n\n        self.launch_queued_tasks(upload_queue);\n\n        Ok(())\n    }\n\n    pub(crate) fn schedule_unlinking_of_layers_from_index_part<I>(\n        self: &Arc<Self>,\n        names: I,\n    ) -> Result<(), NotInitialized>\n    where\n        I: IntoIterator<Item = LayerName>,\n    {\n        let mut guard = self.upload_queue.lock().unwrap();\n        let upload_queue = guard.initialized_mut()?;\n\n        self.schedule_unlinking_of_layers_from_index_part0(upload_queue, names);\n\n        Ok(())\n    }\n\n    /// Update the remote index file, removing the to-be-deleted files from the index,\n    /// allowing scheduling of actual deletions later.\n    fn schedule_unlinking_of_layers_from_index_part0<I>(\n        self: &Arc<Self>,\n        upload_queue: &mut UploadQueueInitialized,\n        names: I,\n    ) -> Vec<(LayerName, LayerFileMetadata)>\n    where\n        I: IntoIterator<Item = LayerName>,\n    {\n        // Decorate our list of names with each name's metadata, dropping\n        // names that are unexpectedly missing from our metadata.  This metadata\n        // is later used when physically deleting layers, to construct key paths.\n        let with_metadata: Vec<_> = names\n            .into_iter()\n            .filter_map(|name| {\n                let meta = upload_queue.dirty.layer_metadata.remove(&name);\n\n                if let Some(meta) = meta {\n                    upload_queue.latest_files_changes_since_metadata_upload_scheduled += 1;\n                    Some((name, meta))\n                } else {\n                    // This can only happen if we forgot to to schedule the file upload\n                    // before scheduling the delete. Log it because it is a rare/strange\n                    // situation, and in case something is misbehaving, we'd like to know which\n                    // layers experienced this.\n                    info!(\"Deleting layer {name} not found in latest_files list, never uploaded?\");\n                    None\n                }\n            })\n            .collect();\n\n        #[cfg(feature = \"testing\")]\n        for (name, metadata) in &with_metadata {\n            let gen_ = metadata.generation;\n            if let Some(unexpected) = upload_queue.dangling_files.insert(name.to_owned(), gen_) {\n                if unexpected == gen_ {\n                    tracing::error!(\"{name} was unlinked twice with same generation\");\n                } else {\n                    tracing::error!(\n                        \"{name} was unlinked twice with different generations {gen_:?} and {unexpected:?}\"\n                    );\n                }\n            }\n        }\n\n        // after unlinking files from the upload_queue.latest_files we must always schedule an\n        // index_part update, because that needs to be uploaded before we can actually delete the\n        // files.\n        if upload_queue.latest_files_changes_since_metadata_upload_scheduled > 0 {\n            self.schedule_index_upload(upload_queue);\n        }\n\n        with_metadata\n    }\n\n    /// Schedules deletion for layer files which have previously been unlinked from the\n    /// `index_part.json` with [`Self::schedule_gc_update`] or [`Self::schedule_compaction_update`].\n    pub(crate) fn schedule_deletion_of_unlinked(\n        self: &Arc<Self>,\n        layers: Vec<(LayerName, LayerFileMetadata)>,\n    ) -> anyhow::Result<()> {\n        let mut guard = self.upload_queue.lock().unwrap();\n        let upload_queue = guard.initialized_mut()?;\n\n        self.schedule_deletion_of_unlinked0(upload_queue, layers);\n        self.launch_queued_tasks(upload_queue);\n        Ok(())\n    }\n\n    fn schedule_deletion_of_unlinked0(\n        self: &Arc<Self>,\n        upload_queue: &mut UploadQueueInitialized,\n        mut with_metadata: Vec<(LayerName, LayerFileMetadata)>,\n    ) {\n        // Filter out any layers which were not created by this tenant shard.  These are\n        // layers that originate from some ancestor shard after a split, and may still\n        // be referenced by other shards. We are free to delete them locally and remove\n        // them from our index (and would have already done so when we reach this point\n        // in the code), but we may not delete them remotely.\n        with_metadata.retain(|(name, meta)| {\n            let retain = meta.shard.shard_number == self.tenant_shard_id.shard_number\n                && meta.shard.shard_count == self.tenant_shard_id.shard_count;\n            if !retain {\n                tracing::debug!(\n                    \"Skipping deletion of ancestor-shard layer {name}, from shard {}\",\n                    meta.shard\n                );\n            }\n            retain\n        });\n\n        for (name, meta) in &with_metadata {\n            info!(\n                \"scheduling deletion of layer {}{} (shard {})\",\n                name,\n                meta.generation.get_suffix(),\n                meta.shard\n            );\n        }\n\n        #[cfg(feature = \"testing\")]\n        for (name, meta) in &with_metadata {\n            let gen_ = meta.generation;\n            match upload_queue.dangling_files.remove(name) {\n                Some(same) if same == gen_ => { /* expected */ }\n                Some(other) => {\n                    tracing::error!(\"{name} was unlinked with {other:?} but deleted with {gen_:?}\");\n                }\n                None => {\n                    tracing::error!(\"{name} was unlinked but was not dangling\");\n                }\n            }\n        }\n\n        // schedule the actual deletions\n        if with_metadata.is_empty() {\n            // avoid scheduling the op & bumping the metric\n            return;\n        }\n        let op = UploadOp::Delete(Delete {\n            layers: with_metadata,\n        });\n        self.metric_begin(&op);\n        upload_queue.queued_operations.push_back(op);\n    }\n\n    /// Schedules a compaction update to the remote `index_part.json`.\n    ///\n    /// `compacted_from` represent the L0 names which have been `compacted_to` L1 layers.\n    pub(crate) fn schedule_compaction_update(\n        self: &Arc<Self>,\n        compacted_from: &[Layer],\n        compacted_to: &[ResidentLayer],\n    ) -> Result<(), NotInitialized> {\n        let mut guard = self.upload_queue.lock().unwrap();\n        let upload_queue = guard.initialized_mut()?;\n\n        for layer in compacted_to {\n            self.schedule_layer_file_upload0(upload_queue, layer.clone());\n        }\n\n        let names = compacted_from.iter().map(|x| x.layer_desc().layer_name());\n\n        self.schedule_unlinking_of_layers_from_index_part0(upload_queue, names);\n        self.launch_queued_tasks(upload_queue);\n\n        Ok(())\n    }\n\n    /// Wait for all previously scheduled uploads/deletions to complete\n    pub(crate) async fn wait_completion(self: &Arc<Self>) -> Result<(), WaitCompletionError> {\n        let receiver = {\n            let mut guard = self.upload_queue.lock().unwrap();\n            let upload_queue = guard\n                .initialized_mut()\n                .map_err(WaitCompletionError::NotInitialized)?;\n            self.schedule_barrier0(upload_queue)\n        };\n\n        Self::wait_completion0(receiver).await\n    }\n\n    async fn wait_completion0(\n        mut receiver: tokio::sync::watch::Receiver<()>,\n    ) -> Result<(), WaitCompletionError> {\n        if receiver.changed().await.is_err() {\n            return Err(WaitCompletionError::UploadQueueShutDownOrStopped);\n        }\n\n        Ok(())\n    }\n\n    pub(crate) fn schedule_barrier(self: &Arc<Self>) -> anyhow::Result<()> {\n        let mut guard = self.upload_queue.lock().unwrap();\n        let upload_queue = guard.initialized_mut()?;\n        self.schedule_barrier0(upload_queue);\n        Ok(())\n    }\n\n    fn schedule_barrier0(\n        self: &Arc<Self>,\n        upload_queue: &mut UploadQueueInitialized,\n    ) -> tokio::sync::watch::Receiver<()> {\n        let (sender, receiver) = tokio::sync::watch::channel(());\n        let barrier_op = UploadOp::Barrier(sender);\n\n        upload_queue.queued_operations.push_back(barrier_op);\n        // Don't count this kind of operation!\n\n        // Launch the task immediately, if possible\n        self.launch_queued_tasks(upload_queue);\n\n        receiver\n    }\n\n    /// Wait for all previously scheduled operations to complete, and then stop.\n    ///\n    /// Not cancellation safe\n    pub(crate) async fn shutdown(self: &Arc<Self>) {\n        // On cancellation the queue is left in ackward state of refusing new operations but\n        // proper stop is yet to be called. On cancel the original or some later task must call\n        // `stop` or `shutdown`.\n        let sg = scopeguard::guard((), |_| {\n            tracing::error!(\n                \"RemoteTimelineClient::shutdown was cancelled; this should not happen, do not make this into an allowed_error\"\n            )\n        });\n\n        let fut = {\n            let mut guard = self.upload_queue.lock().unwrap();\n            let upload_queue = match &mut *guard {\n                UploadQueue::Stopped(_) => {\n                    scopeguard::ScopeGuard::into_inner(sg);\n                    return;\n                }\n                UploadQueue::Uninitialized => {\n                    // transition into Stopped state\n                    self.stop_impl(&mut guard);\n                    scopeguard::ScopeGuard::into_inner(sg);\n                    return;\n                }\n                UploadQueue::Initialized(init) => init,\n            };\n\n            // if the queue is already stuck due to a shutdown operation which was cancelled, then\n            // just don't add more of these as they would never complete.\n            //\n            // TODO: if launch_queued_tasks were to be refactored to accept a &mut UploadQueue\n            // in every place we would not have to jump through this hoop, and this method could be\n            // made cancellable.\n            if !upload_queue.shutting_down {\n                upload_queue.shutting_down = true;\n                upload_queue.queued_operations.push_back(UploadOp::Shutdown);\n                // this operation is not counted similar to Barrier\n\n                self.launch_queued_tasks(upload_queue);\n            }\n\n            upload_queue.shutdown_ready.clone().acquire_owned()\n        };\n\n        let res = fut.await;\n\n        scopeguard::ScopeGuard::into_inner(sg);\n\n        match res {\n            Ok(_permit) => unreachable!(\"shutdown_ready should not have been added permits\"),\n            Err(_closed) => {\n                // expected\n            }\n        }\n\n        self.stop();\n    }\n\n    /// Set the deleted_at field in the remote index file.\n    ///\n    /// This fails if the upload queue has not been `stop()`ed.\n    ///\n    /// The caller is responsible for calling `stop()` AND for waiting\n    /// for any ongoing upload tasks to finish after `stop()` has succeeded.\n    /// Check method [`RemoteTimelineClient::stop`] for details.\n    #[instrument(skip_all)]\n    pub(crate) async fn persist_index_part_with_deleted_flag(\n        self: &Arc<Self>,\n    ) -> Result<(), PersistIndexPartWithDeletedFlagError> {\n        let index_part_with_deleted_at = {\n            let mut locked = self.upload_queue.lock().unwrap();\n\n            // We must be in stopped state because otherwise\n            // we can have inprogress index part upload that can overwrite the file\n            // with missing is_deleted flag that we going to set below\n            let stopped = locked.stopped_mut()?;\n\n            match stopped.deleted_at {\n                SetDeletedFlagProgress::NotRunning => (), // proceed\n                SetDeletedFlagProgress::InProgress(at) => {\n                    return Err(PersistIndexPartWithDeletedFlagError::AlreadyInProgress(at));\n                }\n                SetDeletedFlagProgress::Successful(at) => {\n                    return Err(PersistIndexPartWithDeletedFlagError::AlreadyDeleted(at));\n                }\n            };\n            let deleted_at = Utc::now().naive_utc();\n            stopped.deleted_at = SetDeletedFlagProgress::InProgress(deleted_at);\n\n            let mut index_part = stopped.upload_queue_for_deletion.dirty.clone();\n            index_part.deleted_at = Some(deleted_at);\n            index_part\n        };\n\n        let undo_deleted_at = scopeguard::guard(Arc::clone(self), |self_clone| {\n            let mut locked = self_clone.upload_queue.lock().unwrap();\n            let stopped = locked\n                .stopped_mut()\n                .expect(\"there's no way out of Stopping, and we checked it's Stopping above\");\n            stopped.deleted_at = SetDeletedFlagProgress::NotRunning;\n        });\n\n        pausable_failpoint!(\"persist_deleted_index_part\");\n\n        backoff::retry(\n            || {\n                upload::upload_index_part(\n                    &self.storage_impl,\n                    &self.tenant_shard_id,\n                    &self.timeline_id,\n                    self.generation,\n                    &index_part_with_deleted_at,\n                    &self.cancel,\n                )\n            },\n            |_e| false,\n            1,\n            // have just a couple of attempts\n            // when executed as part of timeline deletion this happens in context of api call\n            // when executed as part of tenant deletion this happens in the background\n            2,\n            \"persist_index_part_with_deleted_flag\",\n            &self.cancel,\n        )\n        .await\n        .ok_or_else(|| anyhow::Error::new(TimeoutOrCancel::Cancel))\n        .and_then(|x| x)?;\n\n        // all good, disarm the guard and mark as success\n        ScopeGuard::into_inner(undo_deleted_at);\n        {\n            let mut locked = self.upload_queue.lock().unwrap();\n\n            let stopped = locked\n                .stopped_mut()\n                .expect(\"there's no way out of Stopping, and we checked it's Stopping above\");\n            stopped.deleted_at = SetDeletedFlagProgress::Successful(\n                index_part_with_deleted_at\n                    .deleted_at\n                    .expect(\"we set it above\"),\n            );\n        }\n\n        Ok(())\n    }\n\n    pub(crate) fn is_deleting(&self) -> bool {\n        let mut locked = self.upload_queue.lock().unwrap();\n        locked.stopped_mut().is_ok()\n    }\n\n    pub(crate) async fn preserve_initdb_archive(\n        self: &Arc<Self>,\n        tenant_id: &TenantId,\n        timeline_id: &TimelineId,\n        cancel: &CancellationToken,\n    ) -> anyhow::Result<()> {\n        backoff::retry(\n            || async {\n                upload::preserve_initdb_archive(&self.storage_impl, tenant_id, timeline_id, cancel)\n                    .await\n            },\n            TimeoutOrCancel::caused_by_cancel,\n            FAILED_DOWNLOAD_WARN_THRESHOLD,\n            FAILED_REMOTE_OP_RETRIES,\n            \"preserve_initdb_tar_zst\",\n            &cancel.clone(),\n        )\n        .await\n        .ok_or_else(|| anyhow::Error::new(TimeoutOrCancel::Cancel))\n        .and_then(|x| x)\n        .context(\"backing up initdb archive\")?;\n        Ok(())\n    }\n\n    /// Uploads the given layer **without** adding it to be part of a future `index_part.json` upload.\n    ///\n    /// This is not normally needed.\n    pub(crate) async fn upload_layer_file(\n        self: &Arc<Self>,\n        uploaded: &ResidentLayer,\n        cancel: &CancellationToken,\n    ) -> anyhow::Result<()> {\n        let remote_path = remote_layer_path(\n            &self.tenant_shard_id.tenant_id,\n            &self.timeline_id,\n            uploaded.metadata().shard,\n            &uploaded.layer_desc().layer_name(),\n            uploaded.metadata().generation,\n        );\n\n        backoff::retry(\n            || async {\n                upload::upload_timeline_layer(\n                    &self.storage_impl,\n                    uploaded.local_path(),\n                    &remote_path,\n                    uploaded.metadata().file_size,\n                    cancel,\n                )\n                .await\n            },\n            TimeoutOrCancel::caused_by_cancel,\n            FAILED_UPLOAD_WARN_THRESHOLD,\n            FAILED_REMOTE_OP_RETRIES,\n            \"upload a layer without adding it to latest files\",\n            cancel,\n        )\n        .await\n        .ok_or_else(|| anyhow::Error::new(TimeoutOrCancel::Cancel))\n        .and_then(|x| x)\n        .context(\"upload a layer without adding it to latest files\")\n    }\n\n    /// Copies the `adopted` remote existing layer to the remote path of `adopted_as`. The layer is\n    /// not added to be part of a future `index_part.json` upload.\n    pub(crate) async fn copy_timeline_layer(\n        self: &Arc<Self>,\n        adopted: &Layer,\n        adopted_as: &Layer,\n        cancel: &CancellationToken,\n    ) -> anyhow::Result<()> {\n        let source_remote_path = remote_layer_path(\n            &self.tenant_shard_id.tenant_id,\n            &adopted\n                .get_timeline_id()\n                .expect(\"Source timeline should be alive\"),\n            adopted.metadata().shard,\n            &adopted.layer_desc().layer_name(),\n            adopted.metadata().generation,\n        );\n\n        let target_remote_path = remote_layer_path(\n            &self.tenant_shard_id.tenant_id,\n            &self.timeline_id,\n            adopted_as.metadata().shard,\n            &adopted_as.layer_desc().layer_name(),\n            adopted_as.metadata().generation,\n        );\n\n        backoff::retry(\n            || async {\n                upload::copy_timeline_layer(\n                    &self.storage_impl,\n                    &source_remote_path,\n                    &target_remote_path,\n                    cancel,\n                )\n                .await\n            },\n            TimeoutOrCancel::caused_by_cancel,\n            FAILED_UPLOAD_WARN_THRESHOLD,\n            FAILED_REMOTE_OP_RETRIES,\n            \"copy timeline layer\",\n            cancel,\n        )\n        .await\n        .ok_or_else(|| anyhow::Error::new(TimeoutOrCancel::Cancel))\n        .and_then(|x| x)\n        .context(\"remote copy timeline layer\")\n    }\n\n    async fn flush_deletion_queue(&self) -> Result<(), DeletionQueueError> {\n        match tokio::time::timeout(\n            DELETION_QUEUE_FLUSH_TIMEOUT,\n            self.deletion_queue_client.flush_immediate(),\n        )\n        .await\n        {\n            Ok(result) => result,\n            Err(_timeout) => {\n                // Flushing remote deletions is not mandatory: we flush here to make the system easier to test, and\n                // to ensure that _usually_ objects are really gone after a DELETE is acked.  However, in case of deletion\n                // queue issues (https://github.com/neondatabase/neon/issues/6440), we don't want to wait indefinitely here.\n                tracing::warn!(\n                    \"Timed out waiting for deletion queue flush, acking deletion anyway\"\n                );\n                Ok(())\n            }\n        }\n    }\n\n    /// Prerequisites: UploadQueue should be in stopped state and deleted_at should be successfuly set.\n    /// The function deletes layer files one by one, then lists the prefix to see if we leaked something\n    /// deletes leaked files if any and proceeds with deletion of index file at the end.\n    pub(crate) async fn delete_all(self: &Arc<Self>) -> Result<(), DeleteTimelineError> {\n        debug_assert_current_span_has_tenant_and_timeline_id();\n\n        let layers: Vec<RemotePath> = {\n            let mut locked = self.upload_queue.lock().unwrap();\n            let stopped = locked.stopped_mut().map_err(DeleteTimelineError::Other)?;\n\n            if !matches!(stopped.deleted_at, SetDeletedFlagProgress::Successful(_)) {\n                return Err(DeleteTimelineError::Other(anyhow::anyhow!(\n                    \"deleted_at is not set\"\n                )));\n            }\n\n            debug_assert!(stopped.upload_queue_for_deletion.no_pending_work());\n\n            stopped\n                .upload_queue_for_deletion\n                .dirty\n                .layer_metadata\n                .drain()\n                .filter(|(_file_name, meta)| {\n                    // Filter out layers that belonged to an ancestor shard.  Since we are deleting the whole timeline from\n                    // all shards anyway, we _could_ delete these, but\n                    // - it creates a potential race if other shards are still\n                    //   using the layers while this shard deletes them.\n                    // - it means that if we rolled back the shard split, the ancestor shards would be in a state where\n                    //   these timelines are present but corrupt (their index exists but some layers don't)\n                    //\n                    // These layers will eventually be cleaned up by the scrubber when it does physical GC.\n                    meta.shard.shard_number == self.tenant_shard_id.shard_number\n                        && meta.shard.shard_count == self.tenant_shard_id.shard_count\n                })\n                .map(|(file_name, meta)| {\n                    remote_layer_path(\n                        &self.tenant_shard_id.tenant_id,\n                        &self.timeline_id,\n                        meta.shard,\n                        &file_name,\n                        meta.generation,\n                    )\n                })\n                .collect()\n        };\n\n        let layer_deletion_count = layers.len();\n        self.deletion_queue_client\n            .push_immediate(layers)\n            .await\n            .map_err(|_| DeleteTimelineError::Cancelled)?;\n\n        // Delete the initdb.tar.zst, which is not always present, but deletion attempts of\n        // inexistant objects are not considered errors.\n        let initdb_path =\n            remote_initdb_archive_path(&self.tenant_shard_id.tenant_id, &self.timeline_id);\n        self.deletion_queue_client\n            .push_immediate(vec![initdb_path])\n            .await\n            .map_err(|_| DeleteTimelineError::Cancelled)?;\n\n        // Do not delete index part yet, it is needed for possible retry. If we remove it first\n        // and retry will arrive to different pageserver there wont be any traces of it on remote storage\n        let timeline_storage_path = remote_timeline_path(&self.tenant_shard_id, &self.timeline_id);\n\n        // Execute all pending deletions, so that when we proceed to do a listing below, we aren't\n        // taking the burden of listing all the layers that we already know we should delete.\n        self.flush_deletion_queue()\n            .await\n            .map_err(|_| DeleteTimelineError::Cancelled)?;\n\n        let cancel = shutdown_token();\n\n        let remaining = download_retry(\n            || async {\n                self.storage_impl\n                    .list(\n                        Some(&timeline_storage_path),\n                        ListingMode::NoDelimiter,\n                        None,\n                        &cancel,\n                    )\n                    .await\n            },\n            \"list remaining files\",\n            &cancel,\n        )\n        .await\n        .context(\"list files remaining files\")?\n        .keys;\n\n        // We will delete the current index_part object last, since it acts as a deletion\n        // marker via its deleted_at attribute\n        let latest_index = remaining\n            .iter()\n            .filter(|o| {\n                o.key\n                    .object_name()\n                    .map(|n| n.starts_with(IndexPart::FILE_NAME))\n                    .unwrap_or(false)\n            })\n            .filter_map(|o| {\n                parse_remote_index_path(o.key.clone()).map(|gen_| (o.key.clone(), gen_))\n            })\n            .max_by_key(|i| i.1)\n            .map(|i| i.0.clone())\n            .unwrap_or(\n                // No generation-suffixed indices, assume we are dealing with\n                // a legacy index.\n                remote_index_path(&self.tenant_shard_id, &self.timeline_id, Generation::none()),\n            );\n\n        let remaining_layers: Vec<RemotePath> = remaining\n            .into_iter()\n            .filter_map(|o| {\n                if o.key == latest_index || o.key.object_name() == Some(INITDB_PRESERVED_PATH) {\n                    None\n                } else {\n                    Some(o.key)\n                }\n            })\n            .inspect(|path| {\n                if let Some(name) = path.object_name() {\n                    info!(%name, \"deleting a file not referenced from index_part.json\");\n                } else {\n                    warn!(%path, \"deleting a nameless or non-utf8 object not referenced from index_part.json\");\n                }\n            })\n            .collect();\n\n        let not_referenced_count = remaining_layers.len();\n        if !remaining_layers.is_empty() {\n            self.deletion_queue_client\n                .push_immediate(remaining_layers)\n                .await\n                .map_err(|_| DeleteTimelineError::Cancelled)?;\n        }\n\n        fail::fail_point!(\"timeline-delete-before-index-delete\", |_| {\n            Err(DeleteTimelineError::Other(anyhow::anyhow!(\n                \"failpoint: timeline-delete-before-index-delete\"\n            )))?\n        });\n\n        debug!(\"enqueuing index part deletion\");\n        self.deletion_queue_client\n            .push_immediate([latest_index].to_vec())\n            .await\n            .map_err(|_| DeleteTimelineError::Cancelled)?;\n\n        // Timeline deletion is rare and we have probably emitted a reasonably number of objects: wait\n        // for a flush to a persistent deletion list so that we may be sure deletion will occur.\n        self.flush_deletion_queue()\n            .await\n            .map_err(|_| DeleteTimelineError::Cancelled)?;\n\n        fail::fail_point!(\"timeline-delete-after-index-delete\", |_| {\n            Err(DeleteTimelineError::Other(anyhow::anyhow!(\n                \"failpoint: timeline-delete-after-index-delete\"\n            )))?\n        });\n\n        info!(prefix=%timeline_storage_path, referenced=layer_deletion_count, not_referenced=%not_referenced_count, \"done deleting in timeline prefix, including index_part.json\");\n\n        Ok(())\n    }\n\n    /// Pick next tasks from the queue, and start as many of them as possible without violating\n    /// the ordering constraints.\n    ///\n    /// The number of inprogress tasks is limited by `Self::inprogress_tasks`, see `next_ready`.\n    fn launch_queued_tasks(self: &Arc<Self>, upload_queue: &mut UploadQueueInitialized) {\n        while let Some((mut next_op, coalesced_ops)) = upload_queue.next_ready() {\n            debug!(\"starting op: {next_op}\");\n\n            // Prepare upload.\n            match &mut next_op {\n                UploadOp::UploadLayer(layer, meta, mode) => {\n                    if upload_queue\n                        .recently_deleted\n                        .remove(&(layer.layer_desc().layer_name().clone(), meta.generation))\n                    {\n                        *mode = Some(OpType::FlushDeletion);\n                    } else {\n                        *mode = Some(OpType::MayReorder)\n                    }\n                }\n                UploadOp::UploadMetadata { .. } => {}\n                UploadOp::Delete(Delete { layers }) => {\n                    for (name, meta) in layers {\n                        upload_queue\n                            .recently_deleted\n                            .insert((name.clone(), meta.generation));\n                    }\n                }\n                UploadOp::Barrier(sender) => {\n                    sender.send_replace(());\n                    continue;\n                }\n                UploadOp::Shutdown => unreachable!(\"shutdown is intentionally never popped off\"),\n            };\n\n            // Assign unique ID to this task\n            upload_queue.task_counter += 1;\n            let upload_task_id = upload_queue.task_counter;\n\n            // Add it to the in-progress map\n            let task = Arc::new(UploadTask {\n                task_id: upload_task_id,\n                op: next_op,\n                coalesced_ops,\n                retries: AtomicU32::new(0),\n            });\n            upload_queue\n                .inprogress_tasks\n                .insert(task.task_id, Arc::clone(&task));\n\n            // Spawn task to perform the task\n            let self_rc = Arc::clone(self);\n            let tenant_shard_id = self.tenant_shard_id;\n            let timeline_id = self.timeline_id;\n            task_mgr::spawn(\n                &self.runtime,\n                TaskKind::RemoteUploadTask,\n                self.tenant_shard_id,\n                Some(self.timeline_id),\n                \"remote upload\",\n                async move {\n                    self_rc.perform_upload_task(task).await;\n                    Ok(())\n                }\n                .instrument(info_span!(parent: None, \"remote_upload\", tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(), %timeline_id, %upload_task_id)),\n            );\n\n            // Loop back to process next task\n        }\n    }\n\n    ///\n    /// Perform an upload task.\n    ///\n    /// The task is in the `inprogress_tasks` list. This function will try to\n    /// execute it, retrying forever. On successful completion, the task is\n    /// removed it from the `inprogress_tasks` list, and any next task(s) in the\n    /// queue that were waiting by the completion are launched.\n    ///\n    /// The task can be shut down, however. That leads to stopping the whole\n    /// queue.\n    ///\n    async fn perform_upload_task(self: &Arc<Self>, task: Arc<UploadTask>) {\n        let cancel = shutdown_token();\n        // Loop to retry until it completes.\n        loop {\n            // If we're requested to shut down, close up shop and exit.\n            //\n            // Note: We only check for the shutdown requests between retries, so\n            // if a shutdown request arrives while we're busy uploading, in the\n            // upload::upload:*() call below, we will wait not exit until it has\n            // finished. We probably could cancel the upload by simply dropping\n            // the Future, but we're not 100% sure if the remote storage library\n            // is cancellation safe, so we don't dare to do that. Hopefully, the\n            // upload finishes or times out soon enough.\n            if cancel.is_cancelled() {\n                info!(\"upload task cancelled by shutdown request\");\n                self.stop();\n                return;\n            }\n\n            // Assert that we don't modify a layer that's referenced by the current index.\n            if cfg!(debug_assertions) {\n                let modified = match &task.op {\n                    UploadOp::UploadLayer(layer, layer_metadata, _) => {\n                        vec![(layer.layer_desc().layer_name(), layer_metadata)]\n                    }\n                    UploadOp::Delete(delete) => {\n                        delete.layers.iter().map(|(n, m)| (n.clone(), m)).collect()\n                    }\n                    // These don't modify layers.\n                    UploadOp::UploadMetadata { .. } => Vec::new(),\n                    UploadOp::Barrier(_) => Vec::new(),\n                    UploadOp::Shutdown => Vec::new(),\n                };\n                if let Ok(queue) = self.upload_queue.lock().unwrap().initialized_mut() {\n                    for (ref name, metadata) in modified {\n                        debug_assert!(\n                            !queue.clean.0.references(name, metadata),\n                            \"layer {name} modified while referenced by index\",\n                        );\n                    }\n                }\n            }\n\n            let upload_result: anyhow::Result<()> = match &task.op {\n                UploadOp::UploadLayer(layer, layer_metadata, mode) => {\n                    // TODO: check if this mechanism can be removed now that can_bypass() performs\n                    // conflict checks during scheduling.\n                    if let Some(OpType::FlushDeletion) = mode {\n                        if self.config.read().unwrap().block_deletions {\n                            // Of course, this is not efficient... but usually the queue should be empty.\n                            let mut queue_locked = self.upload_queue.lock().unwrap();\n                            let mut detected = false;\n                            if let Ok(queue) = queue_locked.initialized_mut() {\n                                for list in queue.blocked_deletions.iter_mut() {\n                                    list.layers.retain(|(name, meta)| {\n                                        if name == &layer.layer_desc().layer_name()\n                                            && meta.generation == layer_metadata.generation\n                                        {\n                                            detected = true;\n                                            // remove the layer from deletion queue\n                                            false\n                                        } else {\n                                            // keep the layer\n                                            true\n                                        }\n                                    });\n                                }\n                            }\n                            if detected {\n                                info!(\n                                    \"cancelled blocked deletion of layer {} at gen {:?}\",\n                                    layer.layer_desc().layer_name(),\n                                    layer_metadata.generation\n                                );\n                            }\n                        } else {\n                            // TODO: we did not guarantee that upload task starts after deletion task, so there could be possibly race conditions\n                            // that we still get the layer deleted. But this only happens if someone creates a layer immediately after it's deleted,\n                            // which is not possible in the current system.\n                            info!(\n                                \"waiting for deletion queue flush to complete before uploading layer {} at gen {:?}\",\n                                layer.layer_desc().layer_name(),\n                                layer_metadata.generation\n                            );\n                            {\n                                // We are going to flush, we can clean up the recently deleted list.\n                                let mut queue_locked = self.upload_queue.lock().unwrap();\n                                if let Ok(queue) = queue_locked.initialized_mut() {\n                                    queue.recently_deleted.clear();\n                                }\n                            }\n                            if let Err(e) = self.deletion_queue_client.flush_execute().await {\n                                warn!(\n                                    \"failed to flush the deletion queue before uploading layer {} at gen {:?}, still proceeding to upload: {e:#} \",\n                                    layer.layer_desc().layer_name(),\n                                    layer_metadata.generation\n                                );\n                            } else {\n                                info!(\n                                    \"done flushing deletion queue before uploading layer {} at gen {:?}\",\n                                    layer.layer_desc().layer_name(),\n                                    layer_metadata.generation\n                                );\n                            }\n                        }\n                    }\n                    let local_path = layer.local_path();\n\n                    // We should only be uploading layers created by this `Tenant`'s lifetime, so\n                    // the metadata in the upload should always match our current generation.\n                    assert_eq!(layer_metadata.generation, self.generation);\n\n                    let remote_path = remote_layer_path(\n                        &self.tenant_shard_id.tenant_id,\n                        &self.timeline_id,\n                        layer_metadata.shard,\n                        &layer.layer_desc().layer_name(),\n                        layer_metadata.generation,\n                    );\n\n                    upload::upload_timeline_layer(\n                        &self.storage_impl,\n                        local_path,\n                        &remote_path,\n                        layer_metadata.file_size,\n                        &self.cancel,\n                    )\n                    .measure_remote_op(\n                        Some(TaskKind::RemoteUploadTask),\n                        RemoteOpFileKind::Layer,\n                        RemoteOpKind::Upload,\n                        Arc::clone(&self.metrics),\n                    )\n                    .await\n                }\n                UploadOp::UploadMetadata { uploaded } => {\n                    let res = upload::upload_index_part(\n                        &self.storage_impl,\n                        &self.tenant_shard_id,\n                        &self.timeline_id,\n                        self.generation,\n                        uploaded,\n                        &self.cancel,\n                    )\n                    .measure_remote_op(\n                        Some(TaskKind::RemoteUploadTask),\n                        RemoteOpFileKind::Index,\n                        RemoteOpKind::Upload,\n                        Arc::clone(&self.metrics),\n                    )\n                    .await;\n                    if res.is_ok() {\n                        self.update_remote_physical_size_gauge(Some(uploaded));\n                        let mention_having_future_layers = if cfg!(feature = \"testing\") {\n                            uploaded\n                                .layer_metadata\n                                .keys()\n                                .any(|x| x.is_in_future(uploaded.metadata.disk_consistent_lsn()))\n                        } else {\n                            false\n                        };\n                        if mention_having_future_layers {\n                            // find rationale near crate::tenant::timeline::init::cleanup_future_layer\n                            tracing::info!(\n                                disk_consistent_lsn = %uploaded.metadata.disk_consistent_lsn(),\n                                \"uploaded an index_part.json with future layers -- this is ok! if shutdown now, expect future layer cleanup\"\n                            );\n                        }\n                    }\n                    res\n                }\n                // TODO: this should wait for the deletion to be executed by the deletion queue.\n                // Otherwise, the deletion may race with an upload and wrongfully delete a newer\n                // file. Some of the above logic attempts to work around this, it should be replaced\n                // by the upload queue ordering guarantees (see `can_bypass`). See:\n                // <https://github.com/neondatabase/neon/issues/10283>.\n                UploadOp::Delete(delete) => {\n                    if self.config.read().unwrap().block_deletions {\n                        let mut queue_locked = self.upload_queue.lock().unwrap();\n                        if let Ok(queue) = queue_locked.initialized_mut() {\n                            queue.blocked_deletions.push(delete.clone());\n                        }\n                        Ok(())\n                    } else {\n                        pausable_failpoint!(\"before-delete-layer-pausable\");\n                        self.deletion_queue_client\n                            .push_layers(\n                                self.tenant_shard_id,\n                                self.timeline_id,\n                                self.generation,\n                                delete.layers.clone(),\n                            )\n                            .map_err(|e| anyhow::anyhow!(e))\n                    }\n                }\n                unexpected @ UploadOp::Barrier(_) | unexpected @ UploadOp::Shutdown => {\n                    // unreachable. Barrier operations are handled synchronously in\n                    // launch_queued_tasks\n                    warn!(\"unexpected {unexpected:?} operation in perform_upload_task\");\n                    break;\n                }\n            };\n\n            match upload_result {\n                Ok(()) => {\n                    break;\n                }\n                Err(e) if TimeoutOrCancel::caused_by_cancel(&e) => {\n                    // loop around to do the proper stopping\n                    continue;\n                }\n                Err(e) => {\n                    let retries = task.retries.fetch_add(1, Ordering::SeqCst);\n\n                    // Uploads can fail due to rate limits (IAM, S3), spurious network problems,\n                    // or other external reasons. Such issues are relatively regular, so log them\n                    // at info level at first, and only WARN if the operation fails repeatedly.\n                    //\n                    // (See similar logic for downloads in `download::download_retry`)\n                    if retries < FAILED_UPLOAD_WARN_THRESHOLD {\n                        info!(\n                            \"failed to perform remote task {}, will retry (attempt {}): {:#}\",\n                            task.op, retries, e\n                        );\n                    } else {\n                        warn!(\n                            \"failed to perform remote task {}, will retry (attempt {}): {:?}\",\n                            task.op, retries, e\n                        );\n                    }\n\n                    // sleep until it's time to retry, or we're cancelled\n                    exponential_backoff(\n                        retries,\n                        DEFAULT_BASE_BACKOFF_SECONDS,\n                        DEFAULT_MAX_BACKOFF_SECONDS,\n                        &cancel,\n                    )\n                    .await;\n                }\n            }\n        }\n\n        let retries = task.retries.load(Ordering::SeqCst);\n        if retries > 0 {\n            info!(\n                \"remote task {} completed successfully after {} retries\",\n                task.op, retries\n            );\n        } else {\n            debug!(\"remote task {} completed successfully\", task.op);\n        }\n\n        // The task has completed successfully. Remove it from the in-progress list.\n        let lsn_update = {\n            let mut upload_queue_guard = self.upload_queue.lock().unwrap();\n            let upload_queue = match upload_queue_guard.deref_mut() {\n                UploadQueue::Uninitialized => panic!(\n                    \"callers are responsible for ensuring this is only called on an initialized queue\"\n                ),\n                UploadQueue::Stopped(_stopped) => None,\n                UploadQueue::Initialized(qi) => Some(qi),\n            };\n\n            let upload_queue = match upload_queue {\n                Some(upload_queue) => upload_queue,\n                None => {\n                    info!(\"another concurrent task already stopped the queue\");\n                    return;\n                }\n            };\n\n            upload_queue.inprogress_tasks.remove(&task.task_id);\n\n            let lsn_update = match task.op {\n                UploadOp::UploadLayer(_, _, _) => None,\n                UploadOp::UploadMetadata { ref uploaded } => {\n                    // the task id is reused as a monotonicity check for storing the \"clean\"\n                    // IndexPart.\n                    let last_updater = upload_queue.clean.1;\n                    let is_later = last_updater.is_some_and(|task_id| task_id < task.task_id);\n                    let monotone = is_later || last_updater.is_none();\n\n                    assert!(\n                        monotone,\n                        \"no two index uploads should be completing at the same time, prev={last_updater:?}, task.task_id={}\",\n                        task.task_id\n                    );\n\n                    // not taking ownership is wasteful\n                    upload_queue.clean.0.clone_from(uploaded);\n                    upload_queue.clean.1 = Some(task.task_id);\n\n                    let lsn = upload_queue.clean.0.metadata.disk_consistent_lsn();\n                    self.metrics\n                        .projected_remote_consistent_lsn_gauge\n                        .set(lsn.0);\n\n                    if self.generation.is_none() {\n                        // Legacy mode: skip validating generation\n                        upload_queue.visible_remote_consistent_lsn.store(lsn);\n                        None\n                    } else if self\n                        .config\n                        .read()\n                        .unwrap()\n                        .process_remote_consistent_lsn_updates\n                    {\n                        Some((lsn, upload_queue.visible_remote_consistent_lsn.clone()))\n                    } else {\n                        // Our config disables remote_consistent_lsn updates: drop it.\n                        None\n                    }\n                }\n                UploadOp::Delete(_) => None,\n                UploadOp::Barrier(..) | UploadOp::Shutdown => unreachable!(),\n            };\n\n            // Launch any queued tasks that were unblocked by this one.\n            self.launch_queued_tasks(upload_queue);\n            lsn_update\n        };\n\n        if let Some((lsn, slot)) = lsn_update {\n            // Updates to the remote_consistent_lsn we advertise to pageservers\n            // are all routed through the DeletionQueue, to enforce important\n            // data safety guarantees (see docs/rfcs/025-generation-numbers.md)\n            self.deletion_queue_client\n                .update_remote_consistent_lsn(\n                    self.tenant_shard_id,\n                    self.timeline_id,\n                    self.generation,\n                    lsn,\n                    slot,\n                )\n                .await;\n        }\n\n        self.metric_end(&task.op);\n        for coalesced_op in &task.coalesced_ops {\n            self.metric_end(coalesced_op);\n        }\n    }\n\n    fn metric_impl(\n        &self,\n        op: &UploadOp,\n    ) -> Option<(\n        RemoteOpFileKind,\n        RemoteOpKind,\n        RemoteTimelineClientMetricsCallTrackSize,\n    )> {\n        use RemoteTimelineClientMetricsCallTrackSize::DontTrackSize;\n        let res = match op {\n            UploadOp::UploadLayer(_, m, _) => (\n                RemoteOpFileKind::Layer,\n                RemoteOpKind::Upload,\n                RemoteTimelineClientMetricsCallTrackSize::Bytes(m.file_size),\n            ),\n            UploadOp::UploadMetadata { .. } => (\n                RemoteOpFileKind::Index,\n                RemoteOpKind::Upload,\n                DontTrackSize {\n                    reason: \"metadata uploads are tiny\",\n                },\n            ),\n            UploadOp::Delete(_delete) => (\n                RemoteOpFileKind::Layer,\n                RemoteOpKind::Delete,\n                DontTrackSize {\n                    reason: \"should we track deletes? positive or negative sign?\",\n                },\n            ),\n            UploadOp::Barrier(..) | UploadOp::Shutdown => {\n                // we do not account these\n                return None;\n            }\n        };\n        Some(res)\n    }\n\n    fn metric_begin(&self, op: &UploadOp) {\n        let (file_kind, op_kind, track_bytes) = match self.metric_impl(op) {\n            Some(x) => x,\n            None => return,\n        };\n        let guard = self.metrics.call_begin(&file_kind, &op_kind, track_bytes);\n        guard.will_decrement_manually(); // in metric_end(), see right below\n    }\n\n    fn metric_end(&self, op: &UploadOp) {\n        let (file_kind, op_kind, track_bytes) = match self.metric_impl(op) {\n            Some(x) => x,\n            None => return,\n        };\n        self.metrics.call_end(&file_kind, &op_kind, track_bytes);\n    }\n\n    /// Close the upload queue for new operations and cancel queued operations.\n    ///\n    /// Use [`RemoteTimelineClient::shutdown`] for graceful stop.\n    ///\n    /// In-progress operations will still be running after this function returns.\n    /// Use `task_mgr::shutdown_tasks(Some(TaskKind::RemoteUploadTask), Some(self.tenant_shard_id), Some(timeline_id))`\n    /// to wait for them to complete, after calling this function.\n    pub(crate) fn stop(&self) {\n        // Whichever *task* for this RemoteTimelineClient grabs the mutex first will transition the queue\n        // into stopped state, thereby dropping all off the queued *ops* which haven't become *tasks* yet.\n        // The other *tasks* will come here and observe an already shut down queue and hence simply wrap up their business.\n        let mut guard = self.upload_queue.lock().unwrap();\n        self.stop_impl(&mut guard);\n    }\n\n    fn stop_impl(&self, guard: &mut std::sync::MutexGuard<UploadQueue>) {\n        match &mut **guard {\n            UploadQueue::Uninitialized => {\n                info!(\"UploadQueue is in state Uninitialized, nothing to do\");\n                **guard = UploadQueue::Stopped(UploadQueueStopped::Uninitialized);\n            }\n            UploadQueue::Stopped(_) => {\n                // nothing to do\n                info!(\"another concurrent task already shut down the queue\");\n            }\n            UploadQueue::Initialized(initialized) => {\n                info!(\"shutting down upload queue\");\n\n                // Replace the queue with the Stopped state, taking ownership of the old\n                // Initialized queue. We will do some checks on it, and then drop it.\n                let qi = {\n                    // Here we preserve working version of the upload queue for possible use during deletions.\n                    // In-place replace of Initialized to Stopped can be done with the help of https://github.com/Sgeo/take_mut\n                    // but for this use case it doesnt really makes sense to bring unsafe code only for this usage point.\n                    // Deletion is not really perf sensitive so there shouldnt be any problems with cloning a fraction of it.\n                    let upload_queue_for_deletion = UploadQueueInitialized {\n                        inprogress_limit: initialized.inprogress_limit,\n                        task_counter: 0,\n                        dirty: initialized.dirty.clone(),\n                        clean: initialized.clean.clone(),\n                        latest_files_changes_since_metadata_upload_scheduled: 0,\n                        visible_remote_consistent_lsn: initialized\n                            .visible_remote_consistent_lsn\n                            .clone(),\n                        inprogress_tasks: HashMap::default(),\n                        queued_operations: VecDeque::default(),\n                        #[cfg(feature = \"testing\")]\n                        dangling_files: HashMap::default(),\n                        blocked_deletions: Vec::new(),\n                        shutting_down: false,\n                        shutdown_ready: Arc::new(tokio::sync::Semaphore::new(0)),\n                        recently_deleted: HashSet::new(),\n                    };\n\n                    let upload_queue = std::mem::replace(\n                        &mut **guard,\n                        UploadQueue::Stopped(UploadQueueStopped::Deletable(\n                            UploadQueueStoppedDeletable {\n                                upload_queue_for_deletion,\n                                deleted_at: SetDeletedFlagProgress::NotRunning,\n                            },\n                        )),\n                    );\n                    if let UploadQueue::Initialized(qi) = upload_queue {\n                        qi\n                    } else {\n                        unreachable!(\"we checked in the match above that it is Initialized\");\n                    }\n                };\n\n                // We don't need to do anything here for in-progress tasks. They will finish\n                // on their own, decrement the unfinished-task counter themselves, and observe\n                // that the queue is Stopped.\n                drop(qi.inprogress_tasks);\n\n                // Tear down queued ops\n                for op in qi.queued_operations.into_iter() {\n                    self.metric_end(&op);\n                    // Dropping UploadOp::Barrier() here will make wait_completion() return with an Err()\n                    // which is exactly what we want to happen.\n                    drop(op);\n                }\n            }\n        }\n    }\n\n    /// Returns an accessor which will hold the UploadQueue mutex for accessing the upload queue\n    /// externally to RemoteTimelineClient.\n    pub(crate) fn initialized_upload_queue(\n        &self,\n    ) -> Result<UploadQueueAccessor<'_>, NotInitialized> {\n        let mut inner = self.upload_queue.lock().unwrap();\n        inner.initialized_mut()?;\n        Ok(UploadQueueAccessor { inner })\n    }\n\n    pub(crate) fn no_pending_work(&self) -> bool {\n        let inner = self.upload_queue.lock().unwrap();\n        match &*inner {\n            UploadQueue::Uninitialized\n            | UploadQueue::Stopped(UploadQueueStopped::Uninitialized) => true,\n            UploadQueue::Stopped(UploadQueueStopped::Deletable(x)) => {\n                x.upload_queue_for_deletion.no_pending_work()\n            }\n            UploadQueue::Initialized(x) => x.no_pending_work(),\n        }\n    }\n\n    /// 'foreign' in the sense that it does not belong to this tenant shard.  This method\n    /// is used during GC for other shards to get the index of shard zero.\n    pub(crate) async fn download_foreign_index(\n        &self,\n        shard_number: ShardNumber,\n        cancel: &CancellationToken,\n    ) -> Result<(IndexPart, Generation, std::time::SystemTime), DownloadError> {\n        let foreign_shard_id = TenantShardId {\n            shard_number,\n            shard_count: self.tenant_shard_id.shard_count,\n            tenant_id: self.tenant_shard_id.tenant_id,\n        };\n        download_index_part(\n            &self.storage_impl,\n            &foreign_shard_id,\n            &self.timeline_id,\n            Generation::MAX,\n            cancel,\n        )\n        .await\n    }\n}\n\npub(crate) struct UploadQueueAccessor<'a> {\n    inner: std::sync::MutexGuard<'a, UploadQueue>,\n}\n\nimpl UploadQueueAccessor<'_> {\n    pub(crate) fn latest_uploaded_index_part(&self) -> &IndexPart {\n        match &*self.inner {\n            UploadQueue::Initialized(x) => &x.clean.0,\n            UploadQueue::Uninitialized | UploadQueue::Stopped(_) => {\n                unreachable!(\"checked before constructing\")\n            }\n        }\n    }\n}\n\npub fn remote_tenant_path(tenant_shard_id: &TenantShardId) -> RemotePath {\n    let path = format!(\"tenants/{tenant_shard_id}\");\n    RemotePath::from_string(&path).expect(\"Failed to construct path\")\n}\n\npub fn remote_tenant_manifest_path(\n    tenant_shard_id: &TenantShardId,\n    generation: Generation,\n) -> RemotePath {\n    let path = format!(\n        \"tenants/{tenant_shard_id}/tenant-manifest{}.json\",\n        generation.get_suffix()\n    );\n    RemotePath::from_string(&path).expect(\"Failed to construct path\")\n}\n\n/// Prefix to all generations' manifest objects in a tenant shard\npub fn remote_tenant_manifest_prefix(tenant_shard_id: &TenantShardId) -> RemotePath {\n    let path = format!(\"tenants/{tenant_shard_id}/tenant-manifest\",);\n    RemotePath::from_string(&path).expect(\"Failed to construct path\")\n}\n\npub fn remote_timelines_path(tenant_shard_id: &TenantShardId) -> RemotePath {\n    let path = format!(\"tenants/{tenant_shard_id}/{TIMELINES_SEGMENT_NAME}\");\n    RemotePath::from_string(&path).expect(\"Failed to construct path\")\n}\n\nfn remote_timelines_path_unsharded(tenant_id: &TenantId) -> RemotePath {\n    let path = format!(\"tenants/{tenant_id}/{TIMELINES_SEGMENT_NAME}\");\n    RemotePath::from_string(&path).expect(\"Failed to construct path\")\n}\n\npub fn remote_timeline_path(\n    tenant_shard_id: &TenantShardId,\n    timeline_id: &TimelineId,\n) -> RemotePath {\n    remote_timelines_path(tenant_shard_id).join(Utf8Path::new(&timeline_id.to_string()))\n}\n\n/// Obtains the path of the given Layer in the remote\n///\n/// Note that the shard component of a remote layer path is _not_ always the same\n/// as in the TenantShardId of the caller: tenants may reference layers from a different\n/// ShardIndex.  Use the ShardIndex from the layer's metadata.\npub fn remote_layer_path(\n    tenant_id: &TenantId,\n    timeline_id: &TimelineId,\n    shard: ShardIndex,\n    layer_file_name: &LayerName,\n    generation: Generation,\n) -> RemotePath {\n    // Generation-aware key format\n    let path = format!(\n        \"tenants/{tenant_id}{0}/{TIMELINES_SEGMENT_NAME}/{timeline_id}/{1}{2}\",\n        shard.get_suffix(),\n        layer_file_name,\n        generation.get_suffix()\n    );\n\n    RemotePath::from_string(&path).expect(\"Failed to construct path\")\n}\n\n/// Returns true if a and b have the same layer path within a tenant/timeline. This is essentially\n/// remote_layer_path(a) == remote_layer_path(b) without the string allocations.\n///\n/// TODO: there should be a variant of LayerName for the physical path that contains information\n/// about the shard and generation, such that this could be replaced by a simple comparison.\npub fn is_same_remote_layer_path(\n    aname: &LayerName,\n    ameta: &LayerFileMetadata,\n    bname: &LayerName,\n    bmeta: &LayerFileMetadata,\n) -> bool {\n    // NB: don't assert remote_layer_path(a) == remote_layer_path(b); too expensive even for debug.\n    aname == bname && ameta.shard == bmeta.shard && ameta.generation == bmeta.generation\n}\n\npub fn remote_initdb_archive_path(tenant_id: &TenantId, timeline_id: &TimelineId) -> RemotePath {\n    RemotePath::from_string(&format!(\n        \"tenants/{tenant_id}/{TIMELINES_SEGMENT_NAME}/{timeline_id}/{INITDB_PATH}\"\n    ))\n    .expect(\"Failed to construct path\")\n}\n\npub fn remote_initdb_preserved_archive_path(\n    tenant_id: &TenantId,\n    timeline_id: &TimelineId,\n) -> RemotePath {\n    RemotePath::from_string(&format!(\n        \"tenants/{tenant_id}/{TIMELINES_SEGMENT_NAME}/{timeline_id}/{INITDB_PRESERVED_PATH}\"\n    ))\n    .expect(\"Failed to construct path\")\n}\n\npub fn remote_index_path(\n    tenant_shard_id: &TenantShardId,\n    timeline_id: &TimelineId,\n    generation: Generation,\n) -> RemotePath {\n    RemotePath::from_string(&format!(\n        \"tenants/{tenant_shard_id}/{TIMELINES_SEGMENT_NAME}/{timeline_id}/{0}{1}\",\n        IndexPart::FILE_NAME,\n        generation.get_suffix()\n    ))\n    .expect(\"Failed to construct path\")\n}\n\npub(crate) fn remote_heatmap_path(tenant_shard_id: &TenantShardId) -> RemotePath {\n    RemotePath::from_string(&format!(\n        \"tenants/{tenant_shard_id}/{TENANT_HEATMAP_BASENAME}\"\n    ))\n    .expect(\"Failed to construct path\")\n}\n\n/// Given the key of an index, parse out the generation part of the name\npub fn parse_remote_index_path(path: RemotePath) -> Option<Generation> {\n    let file_name = match path.get_path().file_name() {\n        Some(f) => f,\n        None => {\n            // Unexpected: we should be seeing index_part.json paths only\n            tracing::warn!(\"Malformed index key {}\", path);\n            return None;\n        }\n    };\n\n    match file_name.split_once('-') {\n        Some((_, gen_suffix)) => Generation::parse_suffix(gen_suffix),\n        None => None,\n    }\n}\n\n/// Given the key of a tenant manifest, parse out the generation number\npub fn parse_remote_tenant_manifest_path(path: RemotePath) -> Option<Generation> {\n    static RE: OnceLock<Regex> = OnceLock::new();\n    let re = RE.get_or_init(|| Regex::new(r\".*tenant-manifest-([0-9a-f]{8}).json\").unwrap());\n    re.captures(path.get_path().as_str())\n        .and_then(|c| c.get(1))\n        .and_then(|m| Generation::parse_suffix(m.as_str()))\n}\n\n#[cfg(test)]\nmod tests {\n    use std::collections::HashSet;\n\n    use super::*;\n    use crate::DEFAULT_PG_VERSION;\n    use crate::context::RequestContext;\n    use crate::tenant::config::AttachmentMode;\n    use crate::tenant::harness::{TIMELINE_ID, TenantHarness};\n    use crate::tenant::storage_layer::layer::local_layer_path;\n    use crate::tenant::{TenantShard, Timeline};\n\n    pub(super) fn dummy_contents(name: &str) -> Vec<u8> {\n        format!(\"contents for {name}\").into()\n    }\n\n    pub(super) fn dummy_metadata(disk_consistent_lsn: Lsn) -> TimelineMetadata {\n        let metadata = TimelineMetadata::new(\n            disk_consistent_lsn,\n            None,\n            None,\n            Lsn(0),\n            Lsn(0),\n            Lsn(0),\n            // Any version will do\n            // but it should be consistent with the one in the tests\n            crate::DEFAULT_PG_VERSION,\n        );\n\n        // go through serialize + deserialize to fix the header, including checksum\n        TimelineMetadata::from_bytes(&metadata.to_bytes().unwrap()).unwrap()\n    }\n\n    fn assert_file_list(a: &HashSet<LayerName>, b: &[&str]) {\n        let mut avec: Vec<String> = a.iter().map(|x| x.to_string()).collect();\n        avec.sort();\n\n        let mut bvec = b.to_vec();\n        bvec.sort_unstable();\n\n        assert_eq!(avec, bvec);\n    }\n\n    fn assert_remote_files(expected: &[&str], remote_path: &Utf8Path, generation: Generation) {\n        let mut expected: Vec<String> = expected\n            .iter()\n            .map(|x| format!(\"{}{}\", x, generation.get_suffix()))\n            .collect();\n        expected.sort();\n\n        let mut found: Vec<String> = Vec::new();\n        for entry in std::fs::read_dir(remote_path).unwrap().flatten() {\n            let entry_name = entry.file_name();\n            let fname = entry_name.to_str().unwrap();\n            found.push(String::from(fname));\n        }\n        found.sort();\n\n        assert_eq!(found, expected);\n    }\n\n    struct TestSetup {\n        harness: TenantHarness,\n        tenant: Arc<TenantShard>,\n        timeline: Arc<Timeline>,\n        tenant_ctx: RequestContext,\n    }\n\n    impl TestSetup {\n        async fn new(test_name: &str) -> anyhow::Result<Self> {\n            let test_name = Box::leak(Box::new(format!(\"remote_timeline_client__{test_name}\")));\n            let harness = TenantHarness::create(test_name).await?;\n            let (tenant, ctx) = harness.load().await;\n\n            let timeline = tenant\n                .create_test_timeline(TIMELINE_ID, Lsn(8), DEFAULT_PG_VERSION, &ctx)\n                .await?;\n\n            Ok(Self {\n                harness,\n                tenant,\n                timeline,\n                tenant_ctx: ctx,\n            })\n        }\n\n        /// Construct a RemoteTimelineClient in an arbitrary generation\n        fn build_client(&self, generation: Generation) -> Arc<RemoteTimelineClient> {\n            let location_conf = AttachedLocationConfig {\n                generation,\n                attach_mode: AttachmentMode::Single,\n            };\n            Arc::new(RemoteTimelineClient {\n                conf: self.harness.conf,\n                runtime: tokio::runtime::Handle::current(),\n                tenant_shard_id: self.harness.tenant_shard_id,\n                timeline_id: TIMELINE_ID,\n                generation,\n                storage_impl: self.harness.remote_storage.clone(),\n                deletion_queue_client: self.harness.deletion_queue.new_client(),\n                upload_queue: Mutex::new(UploadQueue::Uninitialized),\n                metrics: Arc::new(RemoteTimelineClientMetrics::new(\n                    &self.harness.tenant_shard_id,\n                    &TIMELINE_ID,\n                )),\n                config: std::sync::RwLock::new(RemoteTimelineClientConfig::from(&location_conf)),\n                cancel: CancellationToken::new(),\n            })\n        }\n\n        /// A tracing::Span that satisfies remote_timeline_client methods that assert tenant_id\n        /// and timeline_id are present.\n        fn span(&self) -> tracing::Span {\n            tracing::info_span!(\n                \"test\",\n                tenant_id = %self.harness.tenant_shard_id.tenant_id,\n                shard_id = %self.harness.tenant_shard_id.shard_slug(),\n                timeline_id = %TIMELINE_ID\n            )\n        }\n    }\n\n    // Test scheduling\n    #[tokio::test]\n    async fn upload_scheduling() {\n        // Test outline:\n        //\n        // Schedule upload of a bunch of layers. Check that they are started immediately, not queued\n        // Schedule upload of index. Check that it is queued\n        // let the layer file uploads finish. Check that the index-upload is now started\n        // let the index-upload finish.\n        //\n        // Download back the index.json. Check that the list of files is correct\n        //\n        // Schedule upload. Schedule deletion. Check that the deletion is queued\n        // let upload finish. Check that deletion is now started\n        // Schedule another deletion. Check that it's launched immediately.\n        // Schedule index upload. Check that it's queued\n\n        let test_setup = TestSetup::new(\"upload_scheduling\").await.unwrap();\n        let span = test_setup.span();\n        let _guard = span.enter();\n\n        let TestSetup {\n            harness,\n            tenant: _tenant,\n            timeline,\n            tenant_ctx: _tenant_ctx,\n        } = test_setup;\n\n        let client = &timeline.remote_client;\n\n        // Download back the index.json, and check that the list of files is correct\n        let initial_index_part = match client\n            .download_index_file(&CancellationToken::new())\n            .await\n            .unwrap()\n        {\n            MaybeDeletedIndexPart::IndexPart(index_part) => index_part,\n            MaybeDeletedIndexPart::Deleted(_) => panic!(\"unexpectedly got deleted index part\"),\n        };\n        let initial_layers = initial_index_part\n            .layer_metadata\n            .keys()\n            .map(|f| f.to_owned())\n            .collect::<HashSet<LayerName>>();\n        let initial_layer = {\n            assert!(initial_layers.len() == 1);\n            initial_layers.into_iter().next().unwrap()\n        };\n\n        let timeline_path = harness.timeline_path(&TIMELINE_ID);\n\n        println!(\"workdir: {}\", harness.conf.workdir);\n\n        let remote_timeline_dir = harness\n            .remote_fs_dir\n            .join(timeline_path.strip_prefix(&harness.conf.workdir).unwrap());\n        println!(\"remote_timeline_dir: {remote_timeline_dir}\");\n\n        let generation = harness.generation;\n        let shard = harness.shard;\n\n        // Create a couple of dummy files,  schedule upload for them\n\n        let layers = [\n            (\"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51\".parse().unwrap(), dummy_contents(\"foo\")),\n            (\"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D9-00000000016B5A52\".parse().unwrap(), dummy_contents(\"bar\")),\n            (\"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59DA-00000000016B5A53\".parse().unwrap(), dummy_contents(\"baz\"))\n        ]\n        .into_iter()\n        .map(|(name, contents): (LayerName, Vec<u8>)| {\n\n            let local_path = local_layer_path(\n                harness.conf,\n                &timeline.tenant_shard_id,\n                &timeline.timeline_id,\n                &name,\n                &generation,\n            );\n            std::fs::write(&local_path, &contents).unwrap();\n\n            Layer::for_resident(\n                harness.conf,\n                &timeline,\n                local_path,\n                name,\n                LayerFileMetadata::new(contents.len() as u64, generation, shard),\n            )\n        }).collect::<Vec<_>>();\n\n        client\n            .schedule_layer_file_upload(layers[0].clone())\n            .unwrap();\n        client\n            .schedule_layer_file_upload(layers[1].clone())\n            .unwrap();\n\n        // Check that they are started immediately, not queued\n        //\n        // this works because we running within block_on, so any futures are now queued up until\n        // our next await point.\n        {\n            let mut guard = client.upload_queue.lock().unwrap();\n            let upload_queue = guard.initialized_mut().unwrap();\n            assert!(upload_queue.queued_operations.is_empty());\n            assert_eq!(upload_queue.inprogress_tasks.len(), 2);\n            assert_eq!(upload_queue.num_inprogress_layer_uploads(), 2);\n\n            // also check that `latest_file_changes` was updated\n            assert!(upload_queue.latest_files_changes_since_metadata_upload_scheduled == 2);\n        }\n\n        // Schedule upload of index. Check that it is queued\n        let metadata = dummy_metadata(Lsn(0x20));\n        client\n            .schedule_index_upload_for_full_metadata_update(&metadata)\n            .unwrap();\n        {\n            let mut guard = client.upload_queue.lock().unwrap();\n            let upload_queue = guard.initialized_mut().unwrap();\n            assert!(upload_queue.queued_operations.len() == 1);\n            assert!(upload_queue.latest_files_changes_since_metadata_upload_scheduled == 0);\n        }\n\n        // Wait for the uploads to finish\n        client.wait_completion().await.unwrap();\n        {\n            let mut guard = client.upload_queue.lock().unwrap();\n            let upload_queue = guard.initialized_mut().unwrap();\n\n            assert!(upload_queue.queued_operations.is_empty());\n            assert!(upload_queue.inprogress_tasks.is_empty());\n        }\n\n        // Download back the index.json, and check that the list of files is correct\n        let index_part = match client\n            .download_index_file(&CancellationToken::new())\n            .await\n            .unwrap()\n        {\n            MaybeDeletedIndexPart::IndexPart(index_part) => index_part,\n            MaybeDeletedIndexPart::Deleted(_) => panic!(\"unexpectedly got deleted index part\"),\n        };\n\n        assert_file_list(\n            &index_part\n                .layer_metadata\n                .keys()\n                .map(|f| f.to_owned())\n                .collect(),\n            &[\n                &initial_layer.to_string(),\n                &layers[0].layer_desc().layer_name().to_string(),\n                &layers[1].layer_desc().layer_name().to_string(),\n            ],\n        );\n        assert_eq!(index_part.metadata, metadata);\n\n        // Schedule upload and then a deletion. Check that the deletion is queued\n        client\n            .schedule_layer_file_upload(layers[2].clone())\n            .unwrap();\n\n        // this is no longer consistent with how deletion works with Layer::drop, but in this test\n        // keep using schedule_layer_file_deletion because we don't have a way to wait for the\n        // spawn_blocking started by the drop.\n        client\n            .schedule_layer_file_deletion(&[layers[0].layer_desc().layer_name()])\n            .unwrap();\n        {\n            let mut guard = client.upload_queue.lock().unwrap();\n            let upload_queue = guard.initialized_mut().unwrap();\n\n            // Deletion schedules upload of the index file, and the file deletion itself\n            assert_eq!(upload_queue.queued_operations.len(), 2);\n            assert_eq!(upload_queue.inprogress_tasks.len(), 1);\n            assert_eq!(upload_queue.num_inprogress_layer_uploads(), 1);\n            assert_eq!(upload_queue.num_inprogress_deletions(), 0);\n            assert_eq!(\n                upload_queue.latest_files_changes_since_metadata_upload_scheduled,\n                0\n            );\n        }\n        assert_remote_files(\n            &[\n                &initial_layer.to_string(),\n                &layers[0].layer_desc().layer_name().to_string(),\n                &layers[1].layer_desc().layer_name().to_string(),\n                \"index_part.json\",\n            ],\n            &remote_timeline_dir,\n            generation,\n        );\n\n        // Finish them\n        client.wait_completion().await.unwrap();\n        harness.deletion_queue.pump().await;\n\n        assert_remote_files(\n            &[\n                &initial_layer.to_string(),\n                &layers[1].layer_desc().layer_name().to_string(),\n                &layers[2].layer_desc().layer_name().to_string(),\n                \"index_part.json\",\n            ],\n            &remote_timeline_dir,\n            generation,\n        );\n    }\n\n    #[tokio::test]\n    async fn bytes_unfinished_gauge_for_layer_file_uploads() {\n        // Setup\n\n        let TestSetup {\n            harness,\n            tenant: _tenant,\n            timeline,\n            ..\n        } = TestSetup::new(\"metrics\").await.unwrap();\n        let client = &timeline.remote_client;\n\n        let layer_file_name_1: LayerName = \"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51\".parse().unwrap();\n        let local_path = local_layer_path(\n            harness.conf,\n            &timeline.tenant_shard_id,\n            &timeline.timeline_id,\n            &layer_file_name_1,\n            &harness.generation,\n        );\n        let content_1 = dummy_contents(\"foo\");\n        std::fs::write(&local_path, &content_1).unwrap();\n\n        let layer_file_1 = Layer::for_resident(\n            harness.conf,\n            &timeline,\n            local_path,\n            layer_file_name_1.clone(),\n            LayerFileMetadata::new(content_1.len() as u64, harness.generation, harness.shard),\n        );\n\n        #[derive(Debug, PartialEq, Clone, Copy)]\n        struct BytesStartedFinished {\n            started: Option<usize>,\n            finished: Option<usize>,\n        }\n        impl std::ops::Add for BytesStartedFinished {\n            type Output = Self;\n            fn add(self, rhs: Self) -> Self::Output {\n                Self {\n                    started: self.started.map(|v| v + rhs.started.unwrap_or(0)),\n                    finished: self.finished.map(|v| v + rhs.finished.unwrap_or(0)),\n                }\n            }\n        }\n        let get_bytes_started_stopped = || {\n            let started = client\n                .metrics\n                .get_bytes_started_counter_value(&RemoteOpFileKind::Layer, &RemoteOpKind::Upload)\n                .map(|v| v.try_into().unwrap());\n            let stopped = client\n                .metrics\n                .get_bytes_finished_counter_value(&RemoteOpFileKind::Layer, &RemoteOpKind::Upload)\n                .map(|v| v.try_into().unwrap());\n            BytesStartedFinished {\n                started,\n                finished: stopped,\n            }\n        };\n\n        // Test\n        tracing::info!(\"now doing actual test\");\n\n        let actual_a = get_bytes_started_stopped();\n\n        client\n            .schedule_layer_file_upload(layer_file_1.clone())\n            .unwrap();\n\n        let actual_b = get_bytes_started_stopped();\n\n        client.wait_completion().await.unwrap();\n\n        let actual_c = get_bytes_started_stopped();\n\n        // Validate\n\n        let expected_b = actual_a\n            + BytesStartedFinished {\n                started: Some(content_1.len()),\n                // assert that the _finished metric is created eagerly so that subtractions work on first sample\n                finished: Some(0),\n            };\n        assert_eq!(actual_b, expected_b);\n\n        let expected_c = actual_a\n            + BytesStartedFinished {\n                started: Some(content_1.len()),\n                finished: Some(content_1.len()),\n            };\n        assert_eq!(actual_c, expected_c);\n    }\n\n    async fn inject_index_part(test_state: &TestSetup, generation: Generation) -> IndexPart {\n        // An empty IndexPart, just sufficient to ensure deserialization will succeed\n        let example_index_part = IndexPart::example();\n\n        let index_part_bytes = serde_json::to_vec(&example_index_part).unwrap();\n\n        let index_path = test_state.harness.remote_fs_dir.join(\n            remote_index_path(\n                &test_state.harness.tenant_shard_id,\n                &TIMELINE_ID,\n                generation,\n            )\n            .get_path(),\n        );\n\n        std::fs::create_dir_all(index_path.parent().unwrap())\n            .expect(\"creating test dir should work\");\n\n        eprintln!(\"Writing {index_path}\");\n        std::fs::write(&index_path, index_part_bytes).unwrap();\n        example_index_part\n    }\n\n    /// Assert that when a RemoteTimelineclient in generation `get_generation` fetches its\n    /// index, the IndexPart returned is equal to `expected`\n    async fn assert_got_index_part(\n        test_state: &TestSetup,\n        get_generation: Generation,\n        expected: &IndexPart,\n    ) {\n        let client = test_state.build_client(get_generation);\n\n        let download_r = client\n            .download_index_file(&CancellationToken::new())\n            .await\n            .expect(\"download should always succeed\");\n        assert!(matches!(download_r, MaybeDeletedIndexPart::IndexPart(_)));\n        match download_r {\n            MaybeDeletedIndexPart::IndexPart(index_part) => {\n                assert_eq!(&index_part, expected);\n            }\n            MaybeDeletedIndexPart::Deleted(_index_part) => panic!(\"Test doesn't set deleted_at\"),\n        }\n    }\n\n    #[tokio::test]\n    async fn index_part_download_simple() -> anyhow::Result<()> {\n        let test_state = TestSetup::new(\"index_part_download_simple\").await.unwrap();\n        let span = test_state.span();\n        let _guard = span.enter();\n\n        // Simple case: we are in generation N, load the index from generation N - 1\n        let generation_n = 5;\n        let injected = inject_index_part(&test_state, Generation::new(generation_n - 1)).await;\n\n        assert_got_index_part(&test_state, Generation::new(generation_n), &injected).await;\n\n        Ok(())\n    }\n\n    #[tokio::test]\n    async fn index_part_download_ordering() -> anyhow::Result<()> {\n        let test_state = TestSetup::new(\"index_part_download_ordering\")\n            .await\n            .unwrap();\n\n        let span = test_state.span();\n        let _guard = span.enter();\n\n        // A generation-less IndexPart exists in the bucket, we should find it\n        let generation_n = 5;\n        let injected_none = inject_index_part(&test_state, Generation::none()).await;\n        assert_got_index_part(&test_state, Generation::new(generation_n), &injected_none).await;\n\n        // If a more recent-than-none generation exists, we should prefer to load that\n        let injected_1 = inject_index_part(&test_state, Generation::new(1)).await;\n        assert_got_index_part(&test_state, Generation::new(generation_n), &injected_1).await;\n\n        // If a more-recent-than-me generation exists, we should ignore it.\n        let _injected_10 = inject_index_part(&test_state, Generation::new(10)).await;\n        assert_got_index_part(&test_state, Generation::new(generation_n), &injected_1).await;\n\n        // If a directly previous generation exists, _and_ an index exists in my own\n        // generation, I should prefer my own generation.\n        let _injected_prev =\n            inject_index_part(&test_state, Generation::new(generation_n - 1)).await;\n        let injected_current = inject_index_part(&test_state, Generation::new(generation_n)).await;\n        assert_got_index_part(\n            &test_state,\n            Generation::new(generation_n),\n            &injected_current,\n        )\n        .await;\n\n        Ok(())\n    }\n}\n"
  },
  {
    "path": "pageserver/src/tenant/secondary/downloader.rs",
    "content": "use std::collections::{HashMap, HashSet};\nuse std::pin::Pin;\nuse std::str::FromStr;\nuse std::sync::Arc;\nuse std::time::{Duration, Instant, SystemTime};\n\nuse crate::metrics::{STORAGE_IO_SIZE, StorageIoSizeOperation};\nuse camino::Utf8PathBuf;\nuse chrono::format::{DelayedFormat, StrftimeItems};\nuse futures::Future;\nuse metrics::UIntGauge;\nuse pageserver_api::models::SecondaryProgress;\nuse pageserver_api::shard::TenantShardId;\nuse remote_storage::{DownloadError, DownloadKind, DownloadOpts, Etag, GenericRemoteStorage};\nuse tokio_util::sync::CancellationToken;\nuse tracing::{Instrument, info_span, instrument, warn};\nuse utils::completion::Barrier;\nuse utils::crashsafe::path_with_suffix_extension;\nuse utils::id::TimelineId;\nuse utils::{backoff, failpoint_support, fs_ext, pausable_failpoint, serde_system_time};\n\nuse super::heatmap::{HeatMapLayer, HeatMapTenant, HeatMapTimeline};\nuse super::scheduler::{\n    self, Completion, JobGenerator, SchedulingResult, TenantBackgroundJobs, period_jitter,\n    period_warmup,\n};\nuse super::{\n    CommandRequest, DownloadCommand, GetTenantError, SecondaryTenant, SecondaryTenantError,\n};\nuse crate::TEMP_FILE_SUFFIX;\nuse crate::config::PageServerConf;\nuse crate::context::RequestContext;\nuse crate::disk_usage_eviction_task::{\n    DiskUsageEvictionInfo, EvictionCandidate, EvictionLayer, EvictionSecondaryLayer, finite_f32,\n};\nuse crate::metrics::SECONDARY_MODE;\nuse crate::tenant::config::SecondaryLocationConfig;\nuse crate::tenant::debug_assert_current_span_has_tenant_and_timeline_id;\nuse crate::tenant::ephemeral_file::is_ephemeral_file;\nuse crate::tenant::mgr::TenantManager;\nuse crate::tenant::remote_timeline_client::download::download_layer_file;\nuse crate::tenant::remote_timeline_client::index::LayerFileMetadata;\nuse crate::tenant::remote_timeline_client::{\n    FAILED_DOWNLOAD_WARN_THRESHOLD, FAILED_REMOTE_OP_RETRIES, is_temp_download_file,\n    remote_heatmap_path,\n};\nuse crate::tenant::span::debug_assert_current_span_has_tenant_id;\nuse crate::tenant::storage_layer::layer::local_layer_path;\nuse crate::tenant::storage_layer::{LayerName, LayerVisibilityHint};\nuse crate::tenant::tasks::{BackgroundLoopKind, warn_when_period_overrun};\nuse crate::virtual_file::{MaybeFatalIo, VirtualFile, on_fatal_io_error};\n\n/// For each tenant, default period for how long must have passed since the last download_tenant call before\n/// calling it again.  This default is replaced with the value of [`HeatMapTenant::upload_period_ms`] after first\n/// download, if the uploader populated it.\nconst DEFAULT_DOWNLOAD_INTERVAL: Duration = Duration::from_millis(60000);\n\npub(super) async fn downloader_task(\n    tenant_manager: Arc<TenantManager>,\n    remote_storage: GenericRemoteStorage,\n    command_queue: tokio::sync::mpsc::Receiver<CommandRequest<DownloadCommand>>,\n    background_jobs_can_start: Barrier,\n    cancel: CancellationToken,\n    root_ctx: RequestContext,\n) {\n    let concurrency = tenant_manager.get_conf().secondary_download_concurrency;\n\n    let generator = SecondaryDownloader {\n        tenant_manager,\n        remote_storage,\n        root_ctx,\n    };\n    let mut scheduler = Scheduler::new(generator, concurrency);\n\n    scheduler\n        .run(command_queue, background_jobs_can_start, cancel)\n        .instrument(info_span!(\"secondary_download_scheduler\"))\n        .await\n}\n\nstruct SecondaryDownloader {\n    tenant_manager: Arc<TenantManager>,\n    remote_storage: GenericRemoteStorage,\n    root_ctx: RequestContext,\n}\n\n#[derive(Debug, Clone)]\npub(super) struct OnDiskState {\n    metadata: LayerFileMetadata,\n    access_time: SystemTime,\n    local_path: Utf8PathBuf,\n}\n\nimpl OnDiskState {\n    fn new(\n        _conf: &'static PageServerConf,\n        _tenant_shard_id: &TenantShardId,\n        _imeline_id: &TimelineId,\n        _ame: LayerName,\n        metadata: LayerFileMetadata,\n        access_time: SystemTime,\n        local_path: Utf8PathBuf,\n    ) -> Self {\n        Self {\n            metadata,\n            access_time,\n            local_path,\n        }\n    }\n\n    // This is infallible, because all errors are either acceptable (ENOENT), or totally\n    // unexpected (fatal).\n    pub(super) fn remove_blocking(&self) {\n        // We tolerate ENOENT, because between planning eviction and executing\n        // it, the secondary downloader could have seen an updated heatmap that\n        // resulted in a layer being deleted.\n        // Other local I/O errors are process-fatal: these should never happen.\n        std::fs::remove_file(&self.local_path)\n            .or_else(fs_ext::ignore_not_found)\n            .fatal_err(\"Deleting secondary layer\")\n    }\n\n    pub(crate) fn file_size(&self) -> u64 {\n        self.metadata.file_size\n    }\n}\n\npub(super) struct SecondaryDetailTimeline {\n    on_disk_layers: HashMap<LayerName, OnDiskState>,\n\n    /// We remember when layers were evicted, to prevent re-downloading them.\n    pub(super) evicted_at: HashMap<LayerName, SystemTime>,\n\n    ctx: RequestContext,\n}\n\nimpl Clone for SecondaryDetailTimeline {\n    fn clone(&self) -> Self {\n        Self {\n            on_disk_layers: self.on_disk_layers.clone(),\n            evicted_at: self.evicted_at.clone(),\n            // This is a bit awkward. The downloader code operates on a snapshot\n            // of the secondary list to avoid locking it for extended periods of time.\n            // No particularly strong reason to chose [`RequestContext::detached_child`],\n            // but makes more sense than [`RequestContext::attached_child`].\n            ctx: self\n                .ctx\n                .detached_child(self.ctx.task_kind(), self.ctx.download_behavior()),\n        }\n    }\n}\n\nimpl std::fmt::Debug for SecondaryDetailTimeline {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        f.debug_struct(\"SecondaryDetailTimeline\")\n            .field(\"on_disk_layers\", &self.on_disk_layers)\n            .field(\"evicted_at\", &self.evicted_at)\n            .finish()\n    }\n}\n\nimpl SecondaryDetailTimeline {\n    pub(super) fn empty(ctx: RequestContext) -> Self {\n        SecondaryDetailTimeline {\n            on_disk_layers: Default::default(),\n            evicted_at: Default::default(),\n            ctx,\n        }\n    }\n\n    pub(super) fn context(&self) -> &RequestContext {\n        &self.ctx\n    }\n\n    pub(super) fn remove_layer(\n        &mut self,\n        name: &LayerName,\n        resident_metric: &UIntGauge,\n    ) -> Option<OnDiskState> {\n        let removed = self.on_disk_layers.remove(name);\n        if let Some(removed) = &removed {\n            resident_metric.sub(removed.file_size());\n        }\n        removed\n    }\n\n    /// `local_path`\n    fn touch_layer<F>(\n        &mut self,\n        conf: &'static PageServerConf,\n        tenant_shard_id: &TenantShardId,\n        timeline_id: &TimelineId,\n        touched: &HeatMapLayer,\n        resident_metric: &UIntGauge,\n        local_path: F,\n    ) where\n        F: FnOnce() -> Utf8PathBuf,\n    {\n        use std::collections::hash_map::Entry;\n        match self.on_disk_layers.entry(touched.name.clone()) {\n            Entry::Occupied(mut v) => {\n                v.get_mut().access_time = touched.access_time;\n            }\n            Entry::Vacant(e) => {\n                e.insert(OnDiskState::new(\n                    conf,\n                    tenant_shard_id,\n                    timeline_id,\n                    touched.name.clone(),\n                    touched.metadata.clone(),\n                    touched.access_time,\n                    local_path(),\n                ));\n                resident_metric.add(touched.metadata.file_size);\n            }\n        }\n    }\n}\n\n// Aspects of a heatmap that we remember after downloading it\n#[derive(Clone, Debug)]\nstruct DownloadSummary {\n    etag: Etag,\n    #[allow(unused)]\n    mtime: SystemTime,\n    upload_period: Duration,\n}\n\n/// This state is written by the secondary downloader, it is opaque\n/// to TenantManager\n#[derive(Debug)]\npub(super) struct SecondaryDetail {\n    pub(super) config: SecondaryLocationConfig,\n\n    last_download: Option<DownloadSummary>,\n    next_download: Option<Instant>,\n    timelines: HashMap<TimelineId, SecondaryDetailTimeline>,\n}\n\n/// Helper for logging SystemTime\nfn strftime(t: &'_ SystemTime) -> DelayedFormat<StrftimeItems<'_>> {\n    let datetime: chrono::DateTime<chrono::Utc> = (*t).into();\n    datetime.format(\"%d/%m/%Y %T\")\n}\n\n/// Information returned from download function when it detects the heatmap has changed\nstruct HeatMapModified {\n    etag: Etag,\n    last_modified: SystemTime,\n    bytes: Vec<u8>,\n}\n\nenum HeatMapDownload {\n    // The heatmap's etag has changed: return the new etag, mtime and the body bytes\n    Modified(HeatMapModified),\n    // The heatmap's etag is unchanged\n    Unmodified,\n}\n\nimpl SecondaryDetail {\n    pub(super) fn new(config: SecondaryLocationConfig) -> Self {\n        Self {\n            config,\n            last_download: None,\n            next_download: None,\n            timelines: HashMap::new(),\n        }\n    }\n\n    #[cfg(feature = \"testing\")]\n    pub(crate) fn total_resident_size(&self) -> u64 {\n        self.timelines\n            .values()\n            .map(|tl| {\n                tl.on_disk_layers\n                    .values()\n                    .map(|v| v.metadata.file_size)\n                    .sum::<u64>()\n            })\n            .sum::<u64>()\n    }\n\n    pub(super) fn evict_layer(\n        &mut self,\n        name: LayerName,\n        timeline_id: &TimelineId,\n        now: SystemTime,\n        resident_metric: &UIntGauge,\n    ) -> Option<OnDiskState> {\n        let timeline = self.timelines.get_mut(timeline_id)?;\n        let removed = timeline.remove_layer(&name, resident_metric);\n        if removed.is_some() {\n            timeline.evicted_at.insert(name, now);\n        }\n        removed\n    }\n\n    pub(super) fn remove_timeline(\n        &mut self,\n        tenant_shard_id: &TenantShardId,\n        timeline_id: &TimelineId,\n        resident_metric: &UIntGauge,\n    ) {\n        let removed = self.timelines.remove(timeline_id);\n        if let Some(removed) = removed {\n            Self::clear_timeline_metrics(tenant_shard_id, timeline_id, removed, resident_metric);\n        }\n    }\n\n    pub(super) fn drain_timelines(\n        &mut self,\n        tenant_shard_id: &TenantShardId,\n        resident_metric: &UIntGauge,\n    ) {\n        for (timeline_id, removed) in self.timelines.drain() {\n            Self::clear_timeline_metrics(tenant_shard_id, &timeline_id, removed, resident_metric);\n        }\n    }\n\n    fn clear_timeline_metrics(\n        tenant_shard_id: &TenantShardId,\n        timeline_id: &TimelineId,\n        detail: SecondaryDetailTimeline,\n        resident_metric: &UIntGauge,\n    ) {\n        resident_metric.sub(\n            detail\n                .on_disk_layers\n                .values()\n                .map(|l| l.metadata.file_size)\n                .sum(),\n        );\n\n        let shard_id = format!(\"{}\", tenant_shard_id.shard_slug());\n        let tenant_id = tenant_shard_id.tenant_id.to_string();\n        let timeline_id = timeline_id.to_string();\n        for op in StorageIoSizeOperation::VARIANTS {\n            let _ = STORAGE_IO_SIZE.remove_label_values(&[\n                op,\n                tenant_id.as_str(),\n                shard_id.as_str(),\n                timeline_id.as_str(),\n            ]);\n        }\n    }\n\n    /// Additionally returns the total number of layers, used for more stable relative access time\n    /// based eviction.\n    pub(super) fn get_layers_for_eviction(\n        &self,\n        parent: &Arc<SecondaryTenant>,\n    ) -> (DiskUsageEvictionInfo, usize) {\n        let mut result = DiskUsageEvictionInfo::default();\n        let mut total_layers = 0;\n\n        for (timeline_id, timeline_detail) in &self.timelines {\n            result\n                .resident_layers\n                .extend(timeline_detail.on_disk_layers.iter().map(|(name, ods)| {\n                    EvictionCandidate {\n                        layer: EvictionLayer::Secondary(EvictionSecondaryLayer {\n                            secondary_tenant: parent.clone(),\n                            timeline_id: *timeline_id,\n                            name: name.clone(),\n                            metadata: ods.metadata.clone(),\n                        }),\n                        last_activity_ts: ods.access_time,\n                        relative_last_activity: finite_f32::FiniteF32::ZERO,\n                        // Secondary location layers are presumed visible, because Covered layers\n                        // are excluded from the heatmap\n                        visibility: LayerVisibilityHint::Visible,\n                    }\n                }));\n\n            // total might be missing currently downloading layers, but as a lower than actual\n            // value it is good enough approximation.\n            total_layers += timeline_detail.on_disk_layers.len() + timeline_detail.evicted_at.len();\n        }\n        result.max_layer_size = result\n            .resident_layers\n            .iter()\n            .map(|l| l.layer.get_file_size())\n            .max();\n\n        tracing::debug!(\n            \"eviction: secondary tenant {} found {} timelines, {} layers\",\n            parent.get_tenant_shard_id(),\n            self.timelines.len(),\n            result.resident_layers.len()\n        );\n\n        (result, total_layers)\n    }\n}\n\nstruct PendingDownload {\n    secondary_state: Arc<SecondaryTenant>,\n    last_download: Option<DownloadSummary>,\n    target_time: Option<Instant>,\n}\n\nimpl scheduler::PendingJob for PendingDownload {\n    fn get_tenant_shard_id(&self) -> &TenantShardId {\n        self.secondary_state.get_tenant_shard_id()\n    }\n}\n\nstruct RunningDownload {\n    barrier: Barrier,\n}\n\nimpl scheduler::RunningJob for RunningDownload {\n    fn get_barrier(&self) -> Barrier {\n        self.barrier.clone()\n    }\n}\n\nstruct CompleteDownload {\n    secondary_state: Arc<SecondaryTenant>,\n    completed_at: Instant,\n    result: Result<(), UpdateError>,\n}\n\nimpl scheduler::Completion for CompleteDownload {\n    fn get_tenant_shard_id(&self) -> &TenantShardId {\n        self.secondary_state.get_tenant_shard_id()\n    }\n}\n\ntype Scheduler = TenantBackgroundJobs<\n    SecondaryDownloader,\n    PendingDownload,\n    RunningDownload,\n    CompleteDownload,\n    DownloadCommand,\n>;\n\nimpl JobGenerator<PendingDownload, RunningDownload, CompleteDownload, DownloadCommand>\n    for SecondaryDownloader\n{\n    #[instrument(skip_all, fields(tenant_id=%completion.get_tenant_shard_id().tenant_id, shard_id=%completion.get_tenant_shard_id().shard_slug()))]\n    fn on_completion(&mut self, completion: CompleteDownload) {\n        let CompleteDownload {\n            secondary_state,\n            completed_at: _completed_at,\n            result,\n        } = completion;\n\n        tracing::debug!(\"Secondary tenant download completed\");\n\n        let mut detail = secondary_state.detail.lock().unwrap();\n\n        match result {\n            Err(UpdateError::Restart) => {\n                // Start downloading again as soon as we can.  This will involve waiting for the scheduler's\n                // scheduling interval.  This slightly reduces the peak download speed of tenants that hit their\n                // deadline and keep restarting, but that also helps give other tenants a chance to execute rather\n                // that letting one big tenant dominate for a long time.\n                detail.next_download = Some(Instant::now());\n            }\n            _ => {\n                let period = detail\n                    .last_download\n                    .as_ref()\n                    .map(|d| d.upload_period)\n                    .unwrap_or(DEFAULT_DOWNLOAD_INTERVAL);\n\n                // We advance next_download irrespective of errors: we don't want error cases to result in\n                // expensive busy-polling.\n                detail.next_download = Some(Instant::now() + period_jitter(period, 5));\n            }\n        }\n    }\n\n    async fn schedule(&mut self) -> SchedulingResult<PendingDownload> {\n        let mut result = SchedulingResult {\n            jobs: Vec::new(),\n            want_interval: None,\n        };\n\n        // Step 1: identify some tenants that we may work on\n        let mut tenants: Vec<Arc<SecondaryTenant>> = Vec::new();\n        self.tenant_manager\n            .foreach_secondary_tenants(|_id, secondary_state| {\n                tenants.push(secondary_state.clone());\n            });\n\n        // Step 2: filter out tenants which are not yet elegible to run\n        let now = Instant::now();\n        result.jobs = tenants\n            .into_iter()\n            .filter_map(|secondary_tenant| {\n                let (last_download, next_download) = {\n                    let mut detail = secondary_tenant.detail.lock().unwrap();\n\n                    if !detail.config.warm {\n                        // Downloads are disabled for this tenant\n                        detail.next_download = None;\n                        return None;\n                    }\n\n                    if detail.next_download.is_none() {\n                        // Initialize randomly in the range from 0 to our interval: this uniformly spreads the start times.  Subsequent\n                        // rounds will use a smaller jitter to avoid accidentally synchronizing later.\n                        detail.next_download = Some(now.checked_add(period_warmup(DEFAULT_DOWNLOAD_INTERVAL)).expect(\n                        \"Using our constant, which is known to be small compared with clock range\",\n                    ));\n                    }\n                    (detail.last_download.clone(), detail.next_download.unwrap())\n                };\n\n                if now > next_download {\n                    Some(PendingDownload {\n                        secondary_state: secondary_tenant,\n                        last_download,\n                        target_time: Some(next_download),\n                    })\n                } else {\n                    None\n                }\n            })\n            .collect();\n\n        // Step 3: sort by target execution time to run most urgent first.\n        result.jobs.sort_by_key(|j| j.target_time);\n\n        result\n    }\n\n    fn on_command(\n        &mut self,\n        command: DownloadCommand,\n    ) -> Result<PendingDownload, SecondaryTenantError> {\n        let tenant_shard_id = command.get_tenant_shard_id();\n\n        let tenant = self\n            .tenant_manager\n            .get_secondary_tenant_shard(*tenant_shard_id)\n            .ok_or(GetTenantError::ShardNotFound(*tenant_shard_id))?;\n\n        Ok(PendingDownload {\n            target_time: None,\n            last_download: None,\n            secondary_state: tenant,\n        })\n    }\n\n    fn spawn(\n        &mut self,\n        job: PendingDownload,\n    ) -> (\n        RunningDownload,\n        Pin<Box<dyn Future<Output = CompleteDownload> + Send>>,\n    ) {\n        let PendingDownload {\n            secondary_state,\n            last_download,\n            target_time,\n        } = job;\n\n        let (completion, barrier) = utils::completion::channel();\n        let remote_storage = self.remote_storage.clone();\n        let conf = self.tenant_manager.get_conf();\n        let tenant_shard_id = *secondary_state.get_tenant_shard_id();\n        let download_ctx = self\n            .root_ctx\n            .attached_child()\n            .with_scope_secondary_tenant(&tenant_shard_id);\n        (RunningDownload { barrier }, Box::pin(async move {\n            let _completion = completion;\n\n            let result = TenantDownloader::new(conf, &remote_storage, &secondary_state)\n                .download(&download_ctx)\n                .await;\n            match &result\n            {\n                Err(UpdateError::NoData) => {\n                    tracing::info!(\"No heatmap found for tenant.  This is fine if it is new.\");\n                },\n                Err(UpdateError::NoSpace) => {\n                    tracing::warn!(\"Insufficient space while downloading.  Will retry later.\");\n                }\n                Err(UpdateError::Cancelled) => {\n                    tracing::info!(\"Shut down while downloading\");\n                },\n                Err(UpdateError::Deserialize(e)) => {\n                    tracing::error!(\"Corrupt content while downloading tenant: {e}\");\n                },\n                Err(e @ (UpdateError::DownloadError(_) | UpdateError::Other(_))) => {\n                    tracing::error!(\"Error while downloading tenant: {e}\");\n                },\n                Err(UpdateError::Restart) => {\n                    tracing::info!(\"Download reached deadline & will restart to update heatmap\")\n                }\n                Ok(()) => {}\n            };\n\n            // Irrespective of the result, we will reschedule ourselves to run after our usual period.\n\n            // If the job had a target execution time, we may check our final execution\n            // time against that for observability purposes.\n            if let (Some(target_time), Some(last_download)) = (target_time, last_download) {\n                // Elapsed time includes any scheduling lag as well as the execution of the job\n                let elapsed = Instant::now().duration_since(target_time);\n\n                warn_when_period_overrun(\n                    elapsed,\n                    last_download.upload_period,\n                    BackgroundLoopKind::SecondaryDownload,\n                );\n            }\n\n            CompleteDownload {\n                secondary_state,\n                completed_at: Instant::now(),\n                result\n            }\n        }.instrument(info_span!(parent: None, \"secondary_download\", tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug()))))\n    }\n}\n\nenum LayerAction {\n    Download,\n    NoAction,\n    Skip,\n    Touch,\n}\n\n/// This type is a convenience to group together the various functions involved in\n/// freshening a secondary tenant.\nstruct TenantDownloader<'a> {\n    conf: &'static PageServerConf,\n    remote_storage: &'a GenericRemoteStorage,\n    secondary_state: &'a SecondaryTenant,\n}\n\n/// Errors that may be encountered while updating a tenant\n#[derive(thiserror::Error, Debug)]\nenum UpdateError {\n    /// This is not a true failure, but it's how a download indicates that it would like to be restarted by\n    /// the scheduler, to pick up the latest heatmap\n    #[error(\"Reached deadline, restarting downloads\")]\n    Restart,\n\n    #[error(\"No remote data found\")]\n    NoData,\n    #[error(\"Insufficient local storage space\")]\n    NoSpace,\n    #[error(\"Failed to download: {0}\")]\n    DownloadError(DownloadError),\n    #[error(transparent)]\n    Deserialize(#[from] serde_json::Error),\n    #[error(\"Cancelled\")]\n    Cancelled,\n    #[error(transparent)]\n    Other(#[from] anyhow::Error),\n}\n\nimpl From<DownloadError> for UpdateError {\n    fn from(value: DownloadError) -> Self {\n        match &value {\n            DownloadError::Cancelled => Self::Cancelled,\n            DownloadError::NotFound => Self::NoData,\n            _ => Self::DownloadError(value),\n        }\n    }\n}\n\nimpl From<std::io::Error> for UpdateError {\n    fn from(value: std::io::Error) -> Self {\n        if let Some(nix::errno::Errno::ENOSPC) =\n            value.raw_os_error().map(nix::errno::Errno::from_raw)\n        {\n            UpdateError::NoSpace\n        } else if value\n            .get_ref()\n            .and_then(|x| x.downcast_ref::<DownloadError>())\n            .is_some()\n        {\n            UpdateError::from(DownloadError::from(value))\n        } else {\n            // An I/O error from e.g. tokio::io::copy_buf is most likely a remote storage issue\n            UpdateError::Other(anyhow::anyhow!(value))\n        }\n    }\n}\n\nimpl<'a> TenantDownloader<'a> {\n    fn new(\n        conf: &'static PageServerConf,\n        remote_storage: &'a GenericRemoteStorage,\n        secondary_state: &'a SecondaryTenant,\n    ) -> Self {\n        Self {\n            conf,\n            remote_storage,\n            secondary_state,\n        }\n    }\n\n    async fn download(&self, ctx: &RequestContext) -> Result<(), UpdateError> {\n        debug_assert_current_span_has_tenant_id();\n\n        // For the duration of a download, we must hold the SecondaryTenant::gate, to ensure\n        // cover our access to local storage.\n        let Ok(_guard) = self.secondary_state.gate.enter() else {\n            // Shutting down\n            return Err(UpdateError::Cancelled);\n        };\n\n        let tenant_shard_id = self.secondary_state.get_tenant_shard_id();\n\n        // We will use the etag from last successful download to make the download conditional on changes\n        let last_download = self\n            .secondary_state\n            .detail\n            .lock()\n            .unwrap()\n            .last_download\n            .clone();\n\n        // Download the tenant's heatmap\n        let HeatMapModified {\n            last_modified: heatmap_mtime,\n            etag: heatmap_etag,\n            bytes: heatmap_bytes,\n        } = match tokio::select!(\n            bytes = self.download_heatmap(last_download.as_ref().map(|d| &d.etag)) => {bytes?},\n            _ = self.secondary_state.cancel.cancelled() => return Ok(())\n        ) {\n            HeatMapDownload::Unmodified => {\n                tracing::info!(\"Heatmap unchanged since last successful download\");\n                return Ok(());\n            }\n            HeatMapDownload::Modified(m) => m,\n        };\n\n        // Heatmap storage location\n        let heatmap_path = self.conf.tenant_heatmap_path(tenant_shard_id);\n\n        let last_heatmap = if last_download.is_none() {\n            match load_heatmap(&heatmap_path, ctx).await {\n                Ok(htm) => htm,\n                Err(e) => {\n                    tracing::warn!(\"Couldn't load heatmap from {heatmap_path}: {e:?}\");\n                    None\n                }\n            }\n        } else {\n            None\n        };\n\n        let last_heatmap_timelines = last_heatmap.as_ref().map(|htm| {\n            htm.timelines\n                .iter()\n                .map(|tl| (tl.timeline_id, tl))\n                .collect::<HashMap<_, _>>()\n        });\n\n        let heatmap = serde_json::from_slice::<HeatMapTenant>(&heatmap_bytes)?;\n\n        let temp_path = path_with_suffix_extension(&heatmap_path, TEMP_FILE_SUFFIX);\n        let context_msg = format!(\"write tenant {tenant_shard_id} heatmap to {heatmap_path}\");\n        let heatmap_path_bg = heatmap_path.clone();\n        VirtualFile::crashsafe_overwrite(heatmap_path_bg, temp_path, heatmap_bytes)\n            .await\n            .maybe_fatal_err(&context_msg)?;\n\n        tracing::debug!(\n            \"Wrote local heatmap to {}, with {} timelines\",\n            heatmap_path,\n            heatmap.timelines.len()\n        );\n\n        // Get or initialize the local disk state for the timelines we will update\n        let mut timeline_states = HashMap::new();\n        for timeline in &heatmap.timelines {\n            let timeline_state = self\n                .secondary_state\n                .detail\n                .lock()\n                .unwrap()\n                .timelines\n                .get(&timeline.timeline_id)\n                .cloned();\n\n            let timeline_state = match timeline_state {\n                Some(t) => t,\n                None => {\n                    let last_heatmap =\n                        last_heatmap_timelines\n                            .as_ref()\n                            .and_then(|last_heatmap_timelines| {\n                                last_heatmap_timelines.get(&timeline.timeline_id).copied()\n                            });\n                    // We have no existing state: need to scan local disk for layers first.\n                    let timeline_state = init_timeline_state(\n                        self.conf,\n                        tenant_shard_id,\n                        last_heatmap,\n                        timeline,\n                        &self.secondary_state.resident_size_metric,\n                        ctx,\n                    )\n                    .await;\n\n                    // Re-acquire detail lock now that we're done with async load from local FS\n                    self.secondary_state\n                        .detail\n                        .lock()\n                        .unwrap()\n                        .timelines\n                        .insert(timeline.timeline_id, timeline_state.clone());\n                    timeline_state\n                }\n            };\n\n            timeline_states.insert(timeline.timeline_id, timeline_state);\n        }\n\n        // Clean up any local layers that aren't in the heatmap.  We do this first for all timelines, on the general\n        // principle that deletions should be done before writes wherever possible, and so that we can use this\n        // phase to initialize our SecondaryProgress.\n        {\n            *self.secondary_state.progress.lock().unwrap() =\n                self.prepare_timelines(&heatmap, heatmap_mtime).await?;\n        }\n\n        // Calculate a deadline for downloads: if downloading takes longer than this, it is useful to drop out and start again,\n        // so that we are always using reasonably a fresh heatmap.  Otherwise, if we had really huge content to download, we might\n        // spend 10s of minutes downloading layers we don't need.\n        // (see https://github.com/neondatabase/neon/issues/8182)\n        let deadline = {\n            let period = self\n                .secondary_state\n                .detail\n                .lock()\n                .unwrap()\n                .last_download\n                .as_ref()\n                .map(|d| d.upload_period)\n                .unwrap_or(DEFAULT_DOWNLOAD_INTERVAL);\n\n            // Use double the period: we are not promising to complete within the period, this is just a heuristic\n            // to keep using a \"reasonably fresh\" heatmap.\n            Instant::now() + period * 2\n        };\n\n        // Download the layers in the heatmap\n        for timeline in heatmap.timelines {\n            let timeline_state = timeline_states\n                .remove(&timeline.timeline_id)\n                .expect(\"Just populated above\");\n\n            if self.secondary_state.cancel.is_cancelled() {\n                tracing::debug!(\n                    \"Cancelled before downloading timeline {}\",\n                    timeline.timeline_id\n                );\n                return Ok(());\n            }\n\n            let timeline_id = timeline.timeline_id;\n            self.download_timeline(timeline, timeline_state, deadline, ctx)\n                .instrument(tracing::info_span!(\n                    \"secondary_download_timeline\",\n                    tenant_id=%tenant_shard_id.tenant_id,\n                    shard_id=%tenant_shard_id.shard_slug(),\n                    %timeline_id\n                ))\n                .await?;\n        }\n\n        // Metrics consistency check in testing builds\n        self.secondary_state.validate_metrics();\n        // Only update last_etag after a full successful download: this way will not skip\n        // the next download, even if the heatmap's actual etag is unchanged.\n        self.secondary_state.detail.lock().unwrap().last_download = Some(DownloadSummary {\n            etag: heatmap_etag,\n            mtime: heatmap_mtime,\n            upload_period: heatmap\n                .upload_period_ms\n                .map(|ms| Duration::from_millis(ms as u64))\n                .unwrap_or(DEFAULT_DOWNLOAD_INTERVAL),\n        });\n\n        // Robustness: we should have updated progress properly, but in case we didn't, make sure\n        // we don't leave the tenant in a state where we claim to have successfully downloaded\n        // everything, but our progress is incomplete.  The invariant here should be that if\n        // we have set `last_download` to this heatmap's etag, then the next time we see that\n        // etag we can safely do no work (i.e. we must be complete).\n        let mut progress = self.secondary_state.progress.lock().unwrap();\n        debug_assert!(progress.layers_downloaded == progress.layers_total);\n        debug_assert!(progress.bytes_downloaded == progress.bytes_total);\n        if progress.layers_downloaded != progress.layers_total\n            || progress.bytes_downloaded != progress.bytes_total\n        {\n            tracing::warn!(\"Correcting drift in progress stats ({progress:?})\");\n            progress.layers_downloaded = progress.layers_total;\n            progress.bytes_downloaded = progress.bytes_total;\n        }\n\n        Ok(())\n    }\n\n    /// Do any fast local cleanup that comes before the much slower process of downloading\n    /// layers from remote storage.  In the process, initialize the SecondaryProgress object\n    /// that will later be updated incrementally as we download layers.\n    async fn prepare_timelines(\n        &self,\n        heatmap: &HeatMapTenant,\n        heatmap_mtime: SystemTime,\n    ) -> Result<SecondaryProgress, UpdateError> {\n        let heatmap_stats = heatmap.get_stats();\n        // We will construct a progress object, and then populate its initial \"downloaded\" numbers\n        // while iterating through local layer state in [`Self::prepare_timelines`]\n        let mut progress = SecondaryProgress {\n            layers_total: heatmap_stats.layers,\n            bytes_total: heatmap_stats.bytes,\n            heatmap_mtime: Some(serde_system_time::SystemTime(heatmap_mtime)),\n            layers_downloaded: 0,\n            bytes_downloaded: 0,\n        };\n\n        // Also expose heatmap bytes_total as a metric\n        self.secondary_state\n            .heatmap_total_size_metric\n            .set(heatmap_stats.bytes);\n\n        // Accumulate list of things to delete while holding the detail lock, for execution after dropping the lock\n        let mut delete_layers = Vec::new();\n        let mut delete_timelines = Vec::new();\n        {\n            let mut detail = self.secondary_state.detail.lock().unwrap();\n            for (timeline_id, timeline_state) in &mut detail.timelines {\n                let Some(heatmap_timeline_index) = heatmap\n                    .timelines\n                    .iter()\n                    .position(|t| t.timeline_id == *timeline_id)\n                else {\n                    // This timeline is no longer referenced in the heatmap: delete it locally\n                    delete_timelines.push(*timeline_id);\n                    continue;\n                };\n\n                let heatmap_timeline = heatmap.timelines.get(heatmap_timeline_index).unwrap();\n\n                let layers_in_heatmap = heatmap_timeline\n                    .hot_layers()\n                    .map(|l| (&l.name, l.metadata.generation))\n                    .collect::<HashSet<_>>();\n                let layers_on_disk = timeline_state\n                    .on_disk_layers\n                    .iter()\n                    .map(|l| (l.0, l.1.metadata.generation))\n                    .collect::<HashSet<_>>();\n\n                let mut layer_count = layers_on_disk.len();\n                let mut layer_byte_count: u64 = timeline_state\n                    .on_disk_layers\n                    .values()\n                    .map(|l| l.metadata.file_size)\n                    .sum();\n\n                // Remove on-disk layers that are no longer present in heatmap\n                for (layer_file_name, generation) in layers_on_disk.difference(&layers_in_heatmap) {\n                    layer_count -= 1;\n                    layer_byte_count -= timeline_state\n                        .on_disk_layers\n                        .get(layer_file_name)\n                        .unwrap()\n                        .metadata\n                        .file_size;\n\n                    let local_path = local_layer_path(\n                        self.conf,\n                        self.secondary_state.get_tenant_shard_id(),\n                        timeline_id,\n                        layer_file_name,\n                        generation,\n                    );\n\n                    delete_layers.push((*timeline_id, (*layer_file_name).clone(), local_path));\n                }\n\n                progress.bytes_downloaded += layer_byte_count;\n                progress.layers_downloaded += layer_count;\n            }\n\n            for delete_timeline in &delete_timelines {\n                // We haven't removed from disk yet, but optimistically remove from in-memory state: if removal\n                // from disk fails that will be a fatal error.\n                detail.remove_timeline(\n                    self.secondary_state.get_tenant_shard_id(),\n                    delete_timeline,\n                    &self.secondary_state.resident_size_metric,\n                );\n            }\n        }\n\n        // Execute accumulated deletions\n        for (timeline_id, layer_name, local_path) in delete_layers {\n            tracing::info!(timeline_id=%timeline_id, \"Removing secondary local layer {layer_name} because it's absent in heatmap\",);\n\n            tokio::fs::remove_file(&local_path)\n                .await\n                .or_else(fs_ext::ignore_not_found)\n                .maybe_fatal_err(\"Removing secondary layer\")?;\n\n            // Update in-memory housekeeping to reflect the absence of the deleted layer\n            let mut detail = self.secondary_state.detail.lock().unwrap();\n            let Some(timeline_state) = detail.timelines.get_mut(&timeline_id) else {\n                continue;\n            };\n            timeline_state.remove_layer(&layer_name, &self.secondary_state.resident_size_metric);\n        }\n\n        for timeline_id in delete_timelines {\n            let timeline_path = self\n                .conf\n                .timeline_path(self.secondary_state.get_tenant_shard_id(), &timeline_id);\n            tracing::info!(timeline_id=%timeline_id,\n                \"Timeline no longer in heatmap, removing from secondary location\"\n            );\n            tokio::fs::remove_dir_all(&timeline_path)\n                .await\n                .or_else(fs_ext::ignore_not_found)\n                .maybe_fatal_err(\"Removing secondary timeline\")?;\n        }\n\n        Ok(progress)\n    }\n\n    /// Returns downloaded bytes if the etag differs from `prev_etag`, or None if the object\n    /// still matches `prev_etag`.\n    async fn download_heatmap(\n        &self,\n        prev_etag: Option<&Etag>,\n    ) -> Result<HeatMapDownload, UpdateError> {\n        debug_assert_current_span_has_tenant_id();\n        let tenant_shard_id = self.secondary_state.get_tenant_shard_id();\n        tracing::debug!(\"Downloading heatmap for secondary tenant\",);\n\n        let heatmap_path = remote_heatmap_path(tenant_shard_id);\n        let cancel = &self.secondary_state.cancel;\n        let opts = DownloadOpts {\n            etag: prev_etag.cloned(),\n            kind: DownloadKind::Small,\n            ..Default::default()\n        };\n\n        backoff::retry(\n            || async {\n                let download = match self\n                    .remote_storage\n                    .download(&heatmap_path, &opts, cancel)\n                    .await\n                {\n                    Ok(download) => download,\n                    Err(DownloadError::Unmodified) => return Ok(HeatMapDownload::Unmodified),\n                    Err(err) => return Err(err.into()),\n                };\n\n                let mut heatmap_bytes = Vec::new();\n                let mut body = tokio_util::io::StreamReader::new(download.download_stream);\n                let _size = tokio::io::copy_buf(&mut body, &mut heatmap_bytes).await?;\n                Ok(HeatMapDownload::Modified(HeatMapModified {\n                    etag: download.etag,\n                    last_modified: download.last_modified,\n                    bytes: heatmap_bytes,\n                }))\n            },\n            |e| matches!(e, UpdateError::NoData | UpdateError::Cancelled),\n            FAILED_DOWNLOAD_WARN_THRESHOLD,\n            FAILED_REMOTE_OP_RETRIES,\n            \"download heatmap\",\n            cancel,\n        )\n        .await\n        .ok_or_else(|| UpdateError::Cancelled)\n        .and_then(|x| x)\n        .inspect(|_| SECONDARY_MODE.download_heatmap.inc())\n    }\n\n    /// Download heatmap layers that are not present on local disk, or update their\n    /// access time if they are already present.\n    async fn download_timeline_layers(\n        &self,\n        tenant_shard_id: &TenantShardId,\n        timeline: HeatMapTimeline,\n        timeline_state: SecondaryDetailTimeline,\n        deadline: Instant,\n    ) -> (Result<(), UpdateError>, Vec<HeatMapLayer>) {\n        // Accumulate updates to the state\n        let mut touched = Vec::new();\n\n        let timeline_id = timeline.timeline_id;\n        for layer in timeline.into_hot_layers() {\n            if self.secondary_state.cancel.is_cancelled() {\n                tracing::debug!(\"Cancelled -- dropping out of layer loop\");\n                return (Err(UpdateError::Cancelled), touched);\n            }\n\n            if Instant::now() > deadline {\n                // We've been running downloads for a while, restart to download latest heatmap.\n                return (Err(UpdateError::Restart), touched);\n            }\n\n            match self.layer_action(&timeline_state, &layer).await {\n                LayerAction::Download => (),\n                LayerAction::NoAction => continue,\n                LayerAction::Skip => {\n                    self.skip_layer(layer);\n                    continue;\n                }\n                LayerAction::Touch => {\n                    touched.push(layer);\n                    continue;\n                }\n            }\n\n            match self\n                .download_layer(\n                    tenant_shard_id,\n                    &timeline_id,\n                    layer,\n                    timeline_state.context(),\n                )\n                .await\n            {\n                Ok(Some(layer)) => touched.push(layer),\n                Ok(None) => {\n                    // Not an error but we didn't download it: remote layer is missing.  Don't add it to the list of\n                    // things to consider touched.\n                }\n                Err(e) => {\n                    return (Err(e), touched);\n                }\n            }\n        }\n\n        (Ok(()), touched)\n    }\n\n    async fn layer_action(\n        &self,\n        timeline_state: &SecondaryDetailTimeline,\n        layer: &HeatMapLayer,\n    ) -> LayerAction {\n        // Existing on-disk layers: just update their access time.\n        if let Some(on_disk) = timeline_state.on_disk_layers.get(&layer.name) {\n            tracing::debug!(\"Layer {} is already on disk\", layer.name);\n\n            if cfg!(debug_assertions) {\n                // Debug for https://github.com/neondatabase/neon/issues/6966: check that the files we think\n                // are already present on disk are really there.\n                match tokio::fs::metadata(&on_disk.local_path).await {\n                    Ok(meta) => {\n                        tracing::debug!(\n                            \"Layer {} present at {}, size {}\",\n                            layer.name,\n                            on_disk.local_path,\n                            meta.len(),\n                        );\n                    }\n                    Err(e) => {\n                        tracing::warn!(\n                            \"Layer {} not found at {} ({})\",\n                            layer.name,\n                            on_disk.local_path,\n                            e\n                        );\n                        debug_assert!(false);\n                    }\n                }\n            }\n\n            if on_disk.metadata.generation_file_size() != layer.metadata.generation_file_size() {\n                tracing::info!(\n                    \"Re-downloading layer {} with changed size or generation: {:?}->{:?}\",\n                    layer.name,\n                    on_disk.metadata.generation_file_size(),\n                    layer.metadata.generation_file_size()\n                );\n                return LayerAction::Download;\n            }\n            if on_disk.metadata != layer.metadata || on_disk.access_time != layer.access_time {\n                // We already have this layer on disk.  Update its access time.\n                tracing::debug!(\n                    \"Access time updated for layer {}: {} -> {}\",\n                    layer.name,\n                    strftime(&on_disk.access_time),\n                    strftime(&layer.access_time)\n                );\n                return LayerAction::Touch;\n            }\n            return LayerAction::NoAction;\n        } else {\n            tracing::debug!(\"Layer {} not present on disk yet\", layer.name);\n        }\n\n        // Eviction: if we evicted a layer, then do not re-download it unless it was accessed more\n        // recently than it was evicted.\n        if let Some(evicted_at) = timeline_state.evicted_at.get(&layer.name) {\n            if &layer.access_time > evicted_at {\n                tracing::info!(\n                    \"Re-downloading evicted layer {}, accessed at {}, evicted at {}\",\n                    layer.name,\n                    strftime(&layer.access_time),\n                    strftime(evicted_at)\n                );\n            } else {\n                tracing::trace!(\n                    \"Not re-downloading evicted layer {}, accessed at {}, evicted at {}\",\n                    layer.name,\n                    strftime(&layer.access_time),\n                    strftime(evicted_at)\n                );\n                return LayerAction::Skip;\n            }\n        }\n        LayerAction::Download\n    }\n\n    async fn download_timeline(\n        &self,\n        timeline: HeatMapTimeline,\n        timeline_state: SecondaryDetailTimeline,\n        deadline: Instant,\n        ctx: &RequestContext,\n    ) -> Result<(), UpdateError> {\n        debug_assert_current_span_has_tenant_and_timeline_id();\n        let tenant_shard_id = self.secondary_state.get_tenant_shard_id();\n        let timeline_id = timeline.timeline_id;\n\n        tracing::debug!(timeline_id=%timeline_id, \"Downloading layers, {} in heatmap\", timeline.hot_layers().count());\n\n        let (result, touched) = self\n            .download_timeline_layers(tenant_shard_id, timeline, timeline_state, deadline)\n            .await;\n\n        // Write updates to state to record layers we just downloaded or touched, irrespective of whether the overall result was successful\n        {\n            let mut detail = self.secondary_state.detail.lock().unwrap();\n            let timeline_detail = detail.timelines.entry(timeline_id).or_insert_with(|| {\n                let ctx = ctx.with_scope_secondary_timeline(tenant_shard_id, &timeline_id);\n                SecondaryDetailTimeline::empty(ctx)\n            });\n\n            tracing::info!(\"Wrote timeline_detail for {} touched layers\", touched.len());\n            touched.into_iter().for_each(|t| {\n                timeline_detail.touch_layer(\n                    self.conf,\n                    tenant_shard_id,\n                    &timeline_id,\n                    &t,\n                    &self.secondary_state.resident_size_metric,\n                    || {\n                        local_layer_path(\n                            self.conf,\n                            tenant_shard_id,\n                            &timeline_id,\n                            &t.name,\n                            &t.metadata.generation,\n                        )\n                    },\n                )\n            });\n        }\n\n        result\n    }\n\n    /// Call this during timeline download if a layer will _not_ be downloaded, to update progress statistics\n    fn skip_layer(&self, layer: HeatMapLayer) {\n        let mut progress = self.secondary_state.progress.lock().unwrap();\n        progress.layers_total = progress.layers_total.saturating_sub(1);\n        progress.bytes_total = progress\n            .bytes_total\n            .saturating_sub(layer.metadata.file_size);\n    }\n\n    async fn download_layer(\n        &self,\n        tenant_shard_id: &TenantShardId,\n        timeline_id: &TimelineId,\n        layer: HeatMapLayer,\n        ctx: &RequestContext,\n    ) -> Result<Option<HeatMapLayer>, UpdateError> {\n        // Failpoints for simulating slow remote storage\n        failpoint_support::sleep_millis_async!(\n            \"secondary-layer-download-sleep\",\n            &self.secondary_state.cancel\n        );\n\n        pausable_failpoint!(\"secondary-layer-download-pausable\");\n\n        let local_path = local_layer_path(\n            self.conf,\n            tenant_shard_id,\n            timeline_id,\n            &layer.name,\n            &layer.metadata.generation,\n        );\n\n        // Note: no backoff::retry wrapper here because download_layer_file does its own retries internally\n        tracing::info!(\n            \"Starting download of layer {}, size {}\",\n            layer.name,\n            layer.metadata.file_size\n        );\n        let downloaded_bytes = download_layer_file(\n            self.conf,\n            self.remote_storage,\n            *tenant_shard_id,\n            *timeline_id,\n            &layer.name,\n            &layer.metadata,\n            &local_path,\n            &self.secondary_state.gate,\n            &self.secondary_state.cancel,\n            ctx,\n        )\n        .await;\n\n        let downloaded_bytes = match downloaded_bytes {\n            Ok(bytes) => bytes,\n            Err(DownloadError::NotFound) => {\n                // A heatmap might be out of date and refer to a layer that doesn't exist any more.\n                // This is harmless: continue to download the next layer. It is expected during compaction\n                // GC.\n                tracing::debug!(\n                    \"Skipped downloading missing layer {}, raced with compaction/gc?\",\n                    layer.name\n                );\n                self.skip_layer(layer);\n\n                return Ok(None);\n            }\n            Err(e) => return Err(e.into()),\n        };\n\n        if downloaded_bytes != layer.metadata.file_size {\n            let local_path = local_layer_path(\n                self.conf,\n                tenant_shard_id,\n                timeline_id,\n                &layer.name,\n                &layer.metadata.generation,\n            );\n\n            tracing::warn!(\n                \"Downloaded layer {} with unexpected size {} != {}.  Removing download.\",\n                layer.name,\n                downloaded_bytes,\n                layer.metadata.file_size\n            );\n\n            tokio::fs::remove_file(&local_path)\n                .await\n                .or_else(fs_ext::ignore_not_found)?;\n        } else {\n            tracing::info!(\"Downloaded layer {}, size {}\", layer.name, downloaded_bytes);\n            let mut progress = self.secondary_state.progress.lock().unwrap();\n            progress.bytes_downloaded += downloaded_bytes;\n            progress.layers_downloaded += 1;\n        }\n\n        SECONDARY_MODE.download_layer.inc();\n\n        Ok(Some(layer))\n    }\n}\n\n/// Scan local storage and build up Layer objects based on the metadata in a HeatMapTimeline\nasync fn init_timeline_state(\n    conf: &'static PageServerConf,\n    tenant_shard_id: &TenantShardId,\n    last_heatmap: Option<&HeatMapTimeline>,\n    heatmap: &HeatMapTimeline,\n    resident_metric: &UIntGauge,\n    ctx: &RequestContext,\n) -> SecondaryDetailTimeline {\n    let ctx = ctx.with_scope_secondary_timeline(tenant_shard_id, &heatmap.timeline_id);\n    let mut detail = SecondaryDetailTimeline::empty(ctx);\n\n    let timeline_path = conf.timeline_path(tenant_shard_id, &heatmap.timeline_id);\n    let mut dir = match tokio::fs::read_dir(&timeline_path).await {\n        Ok(d) => d,\n        Err(e) => {\n            if e.kind() == std::io::ErrorKind::NotFound {\n                let context = format!(\"Creating timeline directory {timeline_path}\");\n                tracing::info!(\"{}\", context);\n                tokio::fs::create_dir_all(&timeline_path)\n                    .await\n                    .fatal_err(&context);\n\n                // No entries to report: drop out.\n                return detail;\n            } else {\n                on_fatal_io_error(&e, &format!(\"Reading timeline dir {timeline_path}\"));\n            }\n        }\n    };\n\n    // As we iterate through layers found on disk, we will look up their metadata from this map.\n    // Layers not present in metadata will be discarded.\n    let heatmap_metadata: HashMap<&LayerName, &HeatMapLayer> =\n        heatmap.hot_layers().map(|l| (&l.name, l)).collect();\n\n    let last_heatmap_metadata: HashMap<&LayerName, &HeatMapLayer> =\n        if let Some(last_heatmap) = last_heatmap {\n            last_heatmap.hot_layers().map(|l| (&l.name, l)).collect()\n        } else {\n            HashMap::new()\n        };\n\n    while let Some(dentry) = dir\n        .next_entry()\n        .await\n        .fatal_err(&format!(\"Listing {timeline_path}\"))\n    {\n        let Ok(file_path) = Utf8PathBuf::from_path_buf(dentry.path()) else {\n            tracing::warn!(\"Malformed filename at {}\", dentry.path().to_string_lossy());\n            continue;\n        };\n        let local_meta = dentry\n            .metadata()\n            .await\n            .fatal_err(&format!(\"Read metadata on {file_path}\"));\n\n        let file_name = file_path.file_name().expect(\"created it from the dentry\");\n        if crate::is_temporary(&file_path)\n            || is_temp_download_file(&file_path)\n            || is_ephemeral_file(file_name)\n        {\n            // Temporary files are frequently left behind from restarting during downloads\n            tracing::info!(\"Cleaning up temporary file {file_path}\");\n            if let Err(e) = tokio::fs::remove_file(&file_path)\n                .await\n                .or_else(fs_ext::ignore_not_found)\n            {\n                tracing::error!(\"Failed to remove temporary file {file_path}: {e}\");\n            }\n            continue;\n        }\n\n        match LayerName::from_str(file_name) {\n            Ok(name) => {\n                let remote_meta = heatmap_metadata.get(&name);\n                let last_meta = last_heatmap_metadata.get(&name);\n                let mut remove = false;\n                match remote_meta {\n                    Some(remote_meta) => {\n                        let last_meta_generation_file_size = last_meta\n                            .map(|m| m.metadata.generation_file_size())\n                            .unwrap_or(remote_meta.metadata.generation_file_size());\n                        // TODO: checksums for layers (https://github.com/neondatabase/neon/issues/2784)\n                        if remote_meta.metadata.generation_file_size()\n                            != last_meta_generation_file_size\n                        {\n                            tracing::info!(\n                                \"Removing local layer {name} as on-disk json metadata has different generation or file size from remote: {:?} -> {:?}\",\n                                last_meta_generation_file_size,\n                                remote_meta.metadata.generation_file_size()\n                            );\n                            remove = true;\n                        } else if local_meta.len() != remote_meta.metadata.file_size {\n                            // This can happen in the presence of race conditions: the remote and on-disk metadata have changed, but we haven't had\n                            // the chance yet to download the new layer to disk, before the process restarted.\n                            tracing::info!(\n                                \"Removing local layer {name} with unexpected local size {} != {}\",\n                                local_meta.len(),\n                                remote_meta.metadata.file_size\n                            );\n                            remove = true;\n                        } else {\n                            // We expect the access time to be initialized immediately afterwards, when\n                            // the latest heatmap is applied to the state.\n                            detail.touch_layer(\n                                conf,\n                                tenant_shard_id,\n                                &heatmap.timeline_id,\n                                remote_meta,\n                                resident_metric,\n                                || file_path,\n                            );\n                        }\n                    }\n                    None => {\n                        // FIXME: consider some optimization when transitioning from attached to secondary: maybe\n                        // wait until we have seen a heatmap that is more recent than the most recent on-disk state?  Otherwise\n                        // we will end up deleting any layers which were created+uploaded more recently than the heatmap.\n                        tracing::info!(\n                            \"Removing secondary local layer {} because it's absent in heatmap\",\n                            name\n                        );\n                        remove = true;\n                    }\n                }\n                if remove {\n                    tokio::fs::remove_file(&dentry.path())\n                        .await\n                        .or_else(fs_ext::ignore_not_found)\n                        .fatal_err(&format!(\n                            \"Removing layer {}\",\n                            dentry.path().to_string_lossy()\n                        ));\n                }\n            }\n            Err(_) => {\n                // Ignore it.\n                tracing::warn!(\"Unexpected file in timeline directory: {file_name}\");\n            }\n        }\n    }\n\n    detail\n}\n\n/// Loads a json-encoded heatmap file from the provided on-disk path\nasync fn load_heatmap(\n    path: &Utf8PathBuf,\n    ctx: &RequestContext,\n) -> Result<Option<HeatMapTenant>, anyhow::Error> {\n    let st = match VirtualFile::read_to_string(path, ctx).await {\n        Ok(st) => st,\n        Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(None),\n        Err(e) => Err(e)?,\n    };\n    let htm = serde_json::from_str(&st)?;\n    Ok(Some(htm))\n}\n"
  },
  {
    "path": "pageserver/src/tenant/secondary/heatmap.rs",
    "content": "use std::collections::HashMap;\nuse std::time::SystemTime;\n\nuse serde::{Deserialize, Serialize};\nuse serde_with::{DisplayFromStr, TimestampSeconds, serde_as};\nuse utils::generation::Generation;\nuse utils::id::TimelineId;\n\nuse crate::tenant::remote_timeline_client::index::LayerFileMetadata;\nuse crate::tenant::storage_layer::LayerName;\n\n#[derive(Serialize, Deserialize)]\npub(crate) struct HeatMapTenant {\n    /// Generation of the attached location that uploaded the heatmap: this is not required\n    /// for correctness, but acts as a hint to secondary locations in order to detect thrashing\n    /// in the unlikely event that two attached locations are both uploading conflicting heatmaps.\n    pub(super) generation: Generation,\n\n    pub(super) timelines: Vec<HeatMapTimeline>,\n\n    /// Uploaders provide their own upload period in the heatmap, as a hint to downloaders\n    /// of how frequently it is worthwhile to check for updates.\n    ///\n    /// This is optional for backward compat, and because we sometimes might upload\n    /// a heatmap explicitly via API for a tenant that has no periodic upload configured.\n    #[serde(default)]\n    pub(super) upload_period_ms: Option<u128>,\n}\n\nimpl HeatMapTenant {\n    pub(crate) fn into_timelines_index(self) -> HashMap<TimelineId, HeatMapTimeline> {\n        self.timelines\n            .into_iter()\n            .map(|htl| (htl.timeline_id, htl))\n            .collect()\n    }\n}\n\n#[serde_as]\n#[derive(Serialize, Deserialize, Clone)]\npub(crate) struct HeatMapTimeline {\n    #[serde_as(as = \"DisplayFromStr\")]\n    pub(crate) timeline_id: TimelineId,\n\n    layers: Vec<HeatMapLayer>,\n}\n\n#[serde_as]\n#[derive(Serialize, Deserialize, Clone)]\npub(crate) struct HeatMapLayer {\n    pub(crate) name: LayerName,\n    pub(crate) metadata: LayerFileMetadata,\n\n    #[serde_as(as = \"TimestampSeconds<i64>\")]\n    pub(crate) access_time: SystemTime,\n\n    #[serde(default)]\n    pub(crate) cold: bool, // TODO: an actual 'heat' score that would let secondary locations prioritize downloading\n                           // the hottest layers, rather than trying to simply mirror whatever layers are on-disk on the primary.\n}\n\nimpl HeatMapLayer {\n    pub(crate) fn new(\n        name: LayerName,\n        metadata: LayerFileMetadata,\n        access_time: SystemTime,\n        cold: bool,\n    ) -> Self {\n        Self {\n            name,\n            metadata,\n            access_time,\n            cold,\n        }\n    }\n}\n\nimpl HeatMapTimeline {\n    pub(crate) fn new(timeline_id: TimelineId, layers: Vec<HeatMapLayer>) -> Self {\n        Self {\n            timeline_id,\n            layers,\n        }\n    }\n\n    pub(crate) fn into_hot_layers(self) -> impl Iterator<Item = HeatMapLayer> {\n        self.layers.into_iter().filter(|l| !l.cold)\n    }\n\n    pub(crate) fn hot_layers(&self) -> impl Iterator<Item = &HeatMapLayer> {\n        self.layers.iter().filter(|l| !l.cold)\n    }\n\n    pub(crate) fn all_layers(&self) -> impl Iterator<Item = &HeatMapLayer> {\n        self.layers.iter()\n    }\n}\n\npub(crate) struct HeatMapStats {\n    pub(crate) bytes: u64,\n    pub(crate) layers: usize,\n}\n\nimpl HeatMapTenant {\n    pub(crate) fn get_stats(&self) -> HeatMapStats {\n        let mut stats = HeatMapStats {\n            bytes: 0,\n            layers: 0,\n        };\n        for timeline in &self.timelines {\n            for layer in timeline.hot_layers() {\n                stats.layers += 1;\n                stats.bytes += layer.metadata.file_size;\n            }\n        }\n\n        stats\n    }\n\n    pub(crate) fn strip_atimes(self) -> Self {\n        Self {\n            timelines: self\n                .timelines\n                .into_iter()\n                .map(|mut tl| {\n                    for layer in &mut tl.layers {\n                        layer.access_time = SystemTime::UNIX_EPOCH;\n                    }\n                    tl\n                })\n                .collect(),\n            generation: self.generation,\n            upload_period_ms: self.upload_period_ms,\n        }\n    }\n}\n"
  },
  {
    "path": "pageserver/src/tenant/secondary/heatmap_uploader.rs",
    "content": "use std::collections::HashMap;\nuse std::pin::Pin;\nuse std::sync::{Arc, Weak};\nuse std::time::{Duration, Instant};\n\nuse futures::Future;\nuse pageserver_api::shard::TenantShardId;\nuse remote_storage::{GenericRemoteStorage, TimeoutOrCancel};\nuse tokio_util::sync::CancellationToken;\nuse tracing::{Instrument, info_span, instrument};\nuse utils::backoff;\nuse utils::completion::Barrier;\nuse utils::crashsafe::path_with_suffix_extension;\nuse utils::yielding_loop::yielding_loop;\n\nuse super::heatmap::HeatMapTenant;\nuse super::scheduler::{\n    self, JobGenerator, RunningJob, SchedulingResult, TenantBackgroundJobs, period_jitter,\n    period_warmup,\n};\nuse super::{CommandRequest, SecondaryTenantError, UploadCommand};\nuse crate::TEMP_FILE_SUFFIX;\nuse crate::metrics::SECONDARY_MODE;\nuse crate::tenant::TenantShard;\nuse crate::tenant::config::AttachmentMode;\nuse crate::tenant::mgr::{GetTenantError, TenantManager};\nuse crate::tenant::remote_timeline_client::remote_heatmap_path;\nuse crate::tenant::span::debug_assert_current_span_has_tenant_id;\nuse crate::tenant::tasks::{BackgroundLoopKind, warn_when_period_overrun};\nuse crate::virtual_file::VirtualFile;\n\npub(super) async fn heatmap_uploader_task(\n    tenant_manager: Arc<TenantManager>,\n    remote_storage: GenericRemoteStorage,\n    command_queue: tokio::sync::mpsc::Receiver<CommandRequest<UploadCommand>>,\n    background_jobs_can_start: Barrier,\n    cancel: CancellationToken,\n) {\n    let concurrency = tenant_manager.get_conf().heatmap_upload_concurrency;\n\n    let generator = HeatmapUploader {\n        tenant_manager,\n        remote_storage,\n        cancel: cancel.clone(),\n        tenants: HashMap::new(),\n    };\n    let mut scheduler = Scheduler::new(generator, concurrency);\n\n    scheduler\n        .run(command_queue, background_jobs_can_start, cancel)\n        .instrument(info_span!(\"heatmap_upload_scheduler\"))\n        .await\n}\n\n/// This type is owned by a single task ([`heatmap_uploader_task`]) which runs an event\n/// handling loop and mutates it as needed: there are no locks here, because that event loop\n/// can hold &mut references to this type throughout.\nstruct HeatmapUploader {\n    tenant_manager: Arc<TenantManager>,\n    remote_storage: GenericRemoteStorage,\n    cancel: CancellationToken,\n\n    tenants: HashMap<TenantShardId, UploaderTenantState>,\n}\n\nstruct WriteInProgress {\n    barrier: Barrier,\n}\n\nimpl RunningJob for WriteInProgress {\n    fn get_barrier(&self) -> Barrier {\n        self.barrier.clone()\n    }\n}\n\nstruct UploadPending {\n    tenant: Arc<TenantShard>,\n    last_upload: Option<LastUploadState>,\n    target_time: Option<Instant>,\n    period: Option<Duration>,\n}\n\nimpl scheduler::PendingJob for UploadPending {\n    fn get_tenant_shard_id(&self) -> &TenantShardId {\n        self.tenant.get_tenant_shard_id()\n    }\n}\n\nstruct WriteComplete {\n    tenant_shard_id: TenantShardId,\n    completed_at: Instant,\n    uploaded: Option<LastUploadState>,\n    next_upload: Option<Instant>,\n}\n\nimpl scheduler::Completion for WriteComplete {\n    fn get_tenant_shard_id(&self) -> &TenantShardId {\n        &self.tenant_shard_id\n    }\n}\n\n/// The heatmap uploader keeps a little bit of per-tenant state, mainly to remember\n/// when we last did a write.  We only populate this after doing at least one\n/// write for a tenant -- this avoids holding state for tenants that have\n/// uploads disabled.\nstruct UploaderTenantState {\n    // This Weak only exists to enable culling idle instances of this type\n    // when the Tenant has been deallocated.\n    tenant: Weak<TenantShard>,\n\n    /// Digest of the serialized heatmap that we last successfully uploaded\n    last_upload_state: Option<LastUploadState>,\n\n    /// When the last upload attempt completed (may have been successful or failed)\n    last_upload: Option<Instant>,\n\n    /// When should we next do an upload?  None means never.\n    next_upload: Option<Instant>,\n}\n\ntype Scheduler = TenantBackgroundJobs<\n    HeatmapUploader,\n    UploadPending,\n    WriteInProgress,\n    WriteComplete,\n    UploadCommand,\n>;\n\nimpl JobGenerator<UploadPending, WriteInProgress, WriteComplete, UploadCommand>\n    for HeatmapUploader\n{\n    async fn schedule(&mut self) -> SchedulingResult<UploadPending> {\n        // Cull any entries in self.tenants whose Arc<Tenant> is gone\n        self.tenants\n            .retain(|_k, v| v.tenant.upgrade().is_some() && v.next_upload.is_some());\n\n        let now = Instant::now();\n\n        let mut result = SchedulingResult {\n            jobs: Vec::new(),\n            want_interval: None,\n        };\n\n        let tenants = self.tenant_manager.get_attached_active_tenant_shards();\n\n        yielding_loop(1000, &self.cancel, tenants.into_iter(), |tenant| {\n            let period = match tenant.get_heatmap_period() {\n                None => {\n                    // Heatmaps are disabled for this tenant\n                    return;\n                }\n                Some(period) => {\n                    // If any tenant has asked for uploads more frequent than our scheduling interval,\n                    // reduce it to match so that we can keep up.  This is mainly useful in testing, where\n                    // we may set rather short intervals.\n                    result.want_interval = match result.want_interval {\n                        None => Some(period),\n                        Some(existing) => Some(std::cmp::min(period, existing)),\n                    };\n\n                    period\n                }\n            };\n\n            // Stale attachments do not upload anything: if we are in this state, there is probably some\n            // other attachment in mode Single or Multi running on another pageserver, and we don't\n            // want to thrash and overwrite their heatmap uploads.\n            if tenant.get_attach_mode() == AttachmentMode::Stale {\n                return;\n            }\n\n            // Create an entry in self.tenants if one doesn't already exist: this will later be updated\n            // with the completion time in on_completion.\n            let state = self\n                .tenants\n                .entry(*tenant.get_tenant_shard_id())\n                .or_insert_with(|| UploaderTenantState {\n                    tenant: Arc::downgrade(&tenant),\n                    last_upload: None,\n                    next_upload: Some(now.checked_add(period_warmup(period)).unwrap_or(now)),\n                    last_upload_state: None,\n                });\n\n            // Decline to do the upload if insufficient time has passed\n            if state.next_upload.map(|nu| nu > now).unwrap_or(false) {\n                return;\n            }\n\n            let last_upload = state.last_upload_state.clone();\n            result.jobs.push(UploadPending {\n                tenant,\n                last_upload,\n                target_time: state.next_upload,\n                period: Some(period),\n            });\n        })\n        .await\n        .ok();\n\n        result\n    }\n\n    fn spawn(\n        &mut self,\n        job: UploadPending,\n    ) -> (\n        WriteInProgress,\n        Pin<Box<dyn Future<Output = WriteComplete> + Send>>,\n    ) {\n        let UploadPending {\n            tenant,\n            last_upload,\n            target_time,\n            period,\n        } = job;\n\n        let remote_storage = self.remote_storage.clone();\n        let (completion, barrier) = utils::completion::channel();\n        let tenant_shard_id = *tenant.get_tenant_shard_id();\n        (WriteInProgress { barrier }, Box::pin(async move {\n            // Guard for the barrier in [`WriteInProgress`]\n            let _completion = completion;\n\n            let started_at = Instant::now();\n            let uploaded = match upload_tenant_heatmap(remote_storage, &tenant, last_upload.clone()).await {\n                Ok(UploadHeatmapOutcome::Uploaded(uploaded)) => {\n                    let duration = Instant::now().duration_since(started_at);\n                    SECONDARY_MODE\n                        .upload_heatmap_duration\n                        .observe(duration.as_secs_f64());\n                    SECONDARY_MODE.upload_heatmap.inc();\n                    Some(uploaded)\n                }\n                Ok(UploadHeatmapOutcome::NoChange | UploadHeatmapOutcome::Skipped) => last_upload,\n                Err(UploadHeatmapError::Upload(e)) => {\n                    tracing::warn!(\n                        \"Failed to upload heatmap for tenant {}: {e:#}\",\n                        tenant.get_tenant_shard_id(),\n                    );\n                    let duration = Instant::now().duration_since(started_at);\n                    SECONDARY_MODE\n                        .upload_heatmap_duration\n                        .observe(duration.as_secs_f64());\n                    SECONDARY_MODE.upload_heatmap_errors.inc();\n                    last_upload\n                }\n                Err(UploadHeatmapError::Cancelled) => {\n                    tracing::info!(\"Cancelled heatmap upload, shutting down\");\n                    last_upload\n                }\n            };\n\n            let now = Instant::now();\n\n            // If the job had a target execution time, we may check our final execution\n            // time against that for observability purposes.\n            if let (Some(target_time), Some(period)) = (target_time, period) {\n                // Elapsed time includes any scheduling lag as well as the execution of the job\n                let elapsed = now.duration_since(target_time);\n\n                warn_when_period_overrun(elapsed, period, BackgroundLoopKind::HeatmapUpload);\n            }\n\n            let next_upload = tenant\n                .get_heatmap_period()\n                .and_then(|period| now.checked_add(period_jitter(period, 5)));\n\n            WriteComplete {\n                    tenant_shard_id: *tenant.get_tenant_shard_id(),\n                    completed_at: now,\n                    uploaded,\n                    next_upload,\n                }\n        }.instrument(info_span!(parent: None, \"heatmap_upload\", tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug()))))\n    }\n\n    fn on_command(\n        &mut self,\n        command: UploadCommand,\n    ) -> Result<UploadPending, SecondaryTenantError> {\n        let tenant_shard_id = command.get_tenant_shard_id();\n\n        tracing::info!(\n            tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(),\n            \"Starting heatmap write on command\");\n        let tenant = self\n            .tenant_manager\n            .get_attached_tenant_shard(*tenant_shard_id)?;\n        if !tenant.is_active() {\n            return Err(GetTenantError::NotActive(*tenant_shard_id).into());\n        }\n\n        Ok(UploadPending {\n            // Ignore our state for last digest: this forces an upload even if nothing has changed\n            last_upload: None,\n            tenant,\n            target_time: None,\n            period: None,\n        })\n    }\n\n    #[instrument(skip_all, fields(tenant_id=%completion.tenant_shard_id.tenant_id, shard_id=%completion.tenant_shard_id.shard_slug()))]\n    fn on_completion(&mut self, completion: WriteComplete) {\n        tracing::debug!(\"Heatmap upload completed\");\n        let WriteComplete {\n            tenant_shard_id,\n            completed_at,\n            uploaded,\n            next_upload,\n        } = completion;\n        use std::collections::hash_map::Entry;\n        match self.tenants.entry(tenant_shard_id) {\n            Entry::Vacant(_) => {\n                // Tenant state was dropped, nothing to update.\n            }\n            Entry::Occupied(mut entry) => {\n                entry.get_mut().last_upload = Some(completed_at);\n                entry.get_mut().last_upload_state = uploaded;\n                entry.get_mut().next_upload = next_upload\n            }\n        }\n    }\n}\n\nenum UploadHeatmapOutcome {\n    /// We successfully wrote to remote storage, with this digest.\n    Uploaded(LastUploadState),\n    /// We did not upload because the heatmap digest was unchanged since the last upload\n    NoChange,\n    /// We skipped the upload for some reason, such as tenant/timeline not ready\n    Skipped,\n}\n\n#[derive(thiserror::Error, Debug)]\nenum UploadHeatmapError {\n    #[error(\"Cancelled\")]\n    Cancelled,\n\n    #[error(transparent)]\n    Upload(#[from] anyhow::Error),\n}\n\n/// Digests describing the heatmap we most recently uploaded successfully.\n///\n/// md5 is generally a bad hash.  We use it because it's convenient for interop with AWS S3's ETag,\n/// which is also an md5sum.\n#[derive(Clone)]\nstruct LastUploadState {\n    // Digest of json-encoded HeatMapTenant\n    uploaded_digest: md5::Digest,\n\n    // Digest without atimes set.\n    layers_only_digest: md5::Digest,\n}\n\n/// The inner upload operation.  This will skip if `last_digest` is Some and matches the digest\n/// of the object we would have uploaded.\nasync fn upload_tenant_heatmap(\n    remote_storage: GenericRemoteStorage,\n    tenant: &Arc<TenantShard>,\n    last_upload: Option<LastUploadState>,\n) -> Result<UploadHeatmapOutcome, UploadHeatmapError> {\n    debug_assert_current_span_has_tenant_id();\n\n    let generation = tenant.get_generation();\n    debug_assert!(!generation.is_none());\n    if generation.is_none() {\n        // We do not expect this: None generations should only appear in historic layer metadata, not in running Tenants\n        tracing::warn!(\"Skipping heatmap upload for tenant with generation==None\");\n        return Ok(UploadHeatmapOutcome::Skipped);\n    }\n\n    let mut heatmap = HeatMapTenant {\n        timelines: Vec::new(),\n        generation,\n        upload_period_ms: tenant.get_heatmap_period().map(|p| p.as_millis()),\n    };\n    let timelines = tenant.timelines.lock().unwrap().clone();\n\n    // Ensure that Tenant::shutdown waits for any upload in flight: this is needed because otherwise\n    // when we delete a tenant, we might race with an upload in flight and end up leaving a heatmap behind\n    // in remote storage.\n    let Ok(_guard) = tenant.gate.enter() else {\n        tracing::info!(\"Skipping heatmap upload for tenant which is shutting down\");\n        return Err(UploadHeatmapError::Cancelled);\n    };\n\n    for (timeline_id, timeline) in timelines {\n        let heatmap_timeline = timeline.generate_heatmap().await;\n        match heatmap_timeline {\n            None => {\n                tracing::debug!(\n                    \"Skipping heatmap upload because timeline {timeline_id} is not ready\"\n                );\n                return Ok(UploadHeatmapOutcome::Skipped);\n            }\n            Some(heatmap_timeline) => {\n                heatmap.timelines.push(heatmap_timeline);\n            }\n        }\n    }\n\n    // Serialize the heatmap\n    let bytes = serde_json::to_vec(&heatmap).map_err(|e| anyhow::anyhow!(e))?;\n\n    // Drop out early if nothing changed since our last upload\n    let digest = md5::compute(&bytes);\n    if Some(&digest) == last_upload.as_ref().map(|d| &d.uploaded_digest) {\n        return Ok(UploadHeatmapOutcome::NoChange);\n    }\n\n    // Calculate a digest that omits atimes, so that we can distinguish actual changes in\n    // layers from changes only in atimes.\n    let heatmap_size_bytes = heatmap.get_stats().bytes;\n    let layers_only_bytes =\n        serde_json::to_vec(&heatmap.strip_atimes()).map_err(|e| anyhow::anyhow!(e))?;\n    let layers_only_digest = md5::compute(&layers_only_bytes);\n    if heatmap_size_bytes < tenant.get_checkpoint_distance() {\n        // For small tenants, skip upload if only atimes changed. This avoids doing frequent\n        // uploads from long-idle tenants whose atimes are just incremented by periodic\n        // size calculations.\n        if Some(&layers_only_digest) == last_upload.as_ref().map(|d| &d.layers_only_digest) {\n            return Ok(UploadHeatmapOutcome::NoChange);\n        }\n    }\n\n    let bytes = bytes::Bytes::from(bytes);\n    let size = bytes.len();\n\n    let path = remote_heatmap_path(tenant.get_tenant_shard_id());\n\n    let cancel = &tenant.cancel;\n\n    tracing::debug!(\"Uploading {size} byte heatmap to {path}\");\n    if let Err(e) = backoff::retry(\n        || async {\n            let bytes = futures::stream::once(futures::future::ready(Ok(bytes.clone())));\n            remote_storage\n                .upload_storage_object(bytes, size, &path, cancel)\n                .await\n        },\n        TimeoutOrCancel::caused_by_cancel,\n        3,\n        u32::MAX,\n        \"Uploading heatmap\",\n        cancel,\n    )\n    .await\n    .ok_or_else(|| anyhow::anyhow!(\"Shutting down\"))\n    .and_then(|x| x)\n    {\n        if cancel.is_cancelled() {\n            return Err(UploadHeatmapError::Cancelled);\n        } else {\n            return Err(e.into());\n        }\n    }\n\n    // After a successful upload persist the fresh heatmap to disk.\n    // When restarting, the tenant will read the heatmap from disk\n    // and additively generate a new heatmap (see [`Timeline::generate_heatmap`]).\n    // If the heatmap is stale, the additive generation can lead to keeping previously\n    // evicted timelines on the secondarie's disk.\n    let tenant_shard_id = tenant.get_tenant_shard_id();\n    let heatmap_path = tenant.conf.tenant_heatmap_path(tenant_shard_id);\n    let temp_path = path_with_suffix_extension(&heatmap_path, TEMP_FILE_SUFFIX);\n    if let Err(err) = VirtualFile::crashsafe_overwrite(heatmap_path, temp_path, bytes).await {\n        tracing::warn!(\"Non fatal IO error writing to disk after heatmap upload: {err}\");\n    }\n\n    tracing::info!(\"Successfully uploaded {size} byte heatmap to {path}\");\n\n    Ok(UploadHeatmapOutcome::Uploaded(LastUploadState {\n        uploaded_digest: digest,\n        layers_only_digest,\n    }))\n}\n"
  },
  {
    "path": "pageserver/src/tenant/secondary/scheduler.rs",
    "content": "use std::collections::HashMap;\nuse std::marker::PhantomData;\nuse std::pin::Pin;\nuse std::time::{Duration, Instant};\n\nuse futures::Future;\nuse pageserver_api::shard::TenantShardId;\nuse rand::Rng;\nuse tokio::task::JoinSet;\nuse tokio_util::sync::CancellationToken;\nuse utils::completion::Barrier;\nuse utils::yielding_loop::yielding_loop;\n\nuse super::{CommandRequest, CommandResponse, SecondaryTenantError};\n\n/// Scheduling interval is the time between calls to JobGenerator::schedule.\n/// When we schedule jobs, the job generator may provide a hint of its preferred\n/// interval, which we will respect within these intervals.\nconst MAX_SCHEDULING_INTERVAL: Duration = Duration::from_secs(10);\nconst MIN_SCHEDULING_INTERVAL: Duration = Duration::from_secs(1);\n\n/// Jitter a Duration by an integer percentage.  Returned values are uniform\n/// in the range 100-pct..100+pct (i.e. a 5% jitter is 5% either way: a ~10% range)\npub(super) fn period_jitter(d: Duration, pct: u32) -> Duration {\n    if d == Duration::ZERO {\n        d\n    } else {\n        rand::rng().random_range((d * (100 - pct)) / 100..(d * (100 + pct)) / 100)\n    }\n}\n\n/// When a periodic task first starts, it should wait for some time in the range 0..period, so\n/// that starting many such tasks at the same time spreads them across the time range.\npub(super) fn period_warmup(period: Duration) -> Duration {\n    if period == Duration::ZERO {\n        period\n    } else {\n        rand::rng().random_range(Duration::ZERO..period)\n    }\n}\n\n/// Scheduling helper for background work across many tenants.\n///\n/// Systems that need to run background work across many tenants may use this type\n/// to schedule jobs within a concurrency limit, along with their own [`JobGenerator`]\n/// implementation to provide the work to execute.  This is a simple scheduler that just\n/// polls the generator for outstanding work, replacing its queue of pending work with\n/// what the generator yields on each call: the job generator can change its mind about\n/// the order of jobs between calls.  The job generator is notified when jobs complete,\n/// and additionally may expose a command hook to generate jobs on-demand (e.g. to implement\n/// admin APIs).\n///\n/// For an example see [`crate::tenant::secondary::heatmap_uploader`]\n///\n/// G: A JobGenerator that this scheduler will poll to find pending jobs\n/// PJ: 'Pending Job': type for job descriptors that are ready to run\n/// RJ: 'Running Job' type' for jobs that have been spawned\n/// C : 'Completion' type that spawned jobs will send when they finish\n/// CMD: 'Command' type that the job generator will accept to create jobs on-demand\npub(super) struct TenantBackgroundJobs<G, PJ, RJ, C, CMD>\nwhere\n    G: JobGenerator<PJ, RJ, C, CMD>,\n    C: Completion,\n    PJ: PendingJob,\n    RJ: RunningJob,\n{\n    generator: G,\n\n    /// Ready to run.  Will progress to `running` once concurrent limit is satisfied, or\n    /// be removed on next scheduling pass.\n    pending: std::collections::VecDeque<PJ>,\n\n    /// Tasks currently running in Self::tasks for these tenants.  Check this map\n    /// before pushing more work into pending for the same tenant.\n    running: HashMap<TenantShardId, RJ>,\n\n    tasks: JoinSet<C>,\n\n    concurrency: usize,\n\n    /// How often we would like schedule_interval to be called.\n    pub(super) scheduling_interval: Duration,\n\n    _phantom: PhantomData<(PJ, RJ, C, CMD)>,\n}\n\npub(crate) trait JobGenerator<PJ, RJ, C, CMD>\nwhere\n    C: Completion,\n    PJ: PendingJob,\n    RJ: RunningJob,\n{\n    /// Called at each scheduling interval.  Return a list of jobs to run, most urgent first.\n    ///\n    /// This function may be expensive (e.g. walk all tenants), but should not do any I/O.\n    /// Implementations should take care to yield the executor periodically if running\n    /// very long loops.\n    ///\n    /// Yielding a job here does _not_ guarantee that it will run: if the queue of pending\n    /// jobs is not drained by the next scheduling interval, pending jobs will be cleared\n    /// and re-generated.\n    async fn schedule(&mut self) -> SchedulingResult<PJ>;\n\n    /// Called when a pending job is ready to be run.\n    ///\n    /// The job generation provides a future, and a RJ (Running Job) descriptor that tracks it.\n    fn spawn(&mut self, pending_job: PJ) -> (RJ, Pin<Box<dyn Future<Output = C> + Send>>);\n\n    /// Called when a job previously spawned with spawn() transmits its completion\n    fn on_completion(&mut self, completion: C);\n\n    /// Called when a command is received.  A job will be spawned immediately if the return\n    /// value is Some, ignoring concurrency limits and the pending queue.\n    fn on_command(&mut self, cmd: CMD) -> Result<PJ, SecondaryTenantError>;\n}\n\n/// [`JobGenerator`] returns this to provide pending jobs, and hints about scheduling\npub(super) struct SchedulingResult<PJ> {\n    pub(super) jobs: Vec<PJ>,\n    /// The job generator would like to be called again this soon\n    pub(super) want_interval: Option<Duration>,\n}\n\n/// See [`TenantBackgroundJobs`].\npub(super) trait PendingJob {\n    fn get_tenant_shard_id(&self) -> &TenantShardId;\n}\n\n/// See [`TenantBackgroundJobs`].\npub(super) trait Completion: Send + 'static {\n    fn get_tenant_shard_id(&self) -> &TenantShardId;\n}\n\n/// See [`TenantBackgroundJobs`].\npub(super) trait RunningJob {\n    fn get_barrier(&self) -> Barrier;\n}\n\nimpl<G, PJ, RJ, C, CMD> TenantBackgroundJobs<G, PJ, RJ, C, CMD>\nwhere\n    C: Completion,\n    PJ: PendingJob,\n    RJ: RunningJob,\n    G: JobGenerator<PJ, RJ, C, CMD>,\n{\n    pub(super) fn new(generator: G, concurrency: usize) -> Self {\n        Self {\n            generator,\n            pending: std::collections::VecDeque::new(),\n            running: HashMap::new(),\n            tasks: JoinSet::new(),\n            concurrency,\n            scheduling_interval: MAX_SCHEDULING_INTERVAL,\n            _phantom: PhantomData,\n        }\n    }\n\n    pub(super) async fn run(\n        &mut self,\n        mut command_queue: tokio::sync::mpsc::Receiver<CommandRequest<CMD>>,\n        background_jobs_can_start: Barrier,\n        cancel: CancellationToken,\n    ) {\n        tracing::info!(\"Waiting for background_jobs_can start...\");\n        background_jobs_can_start.wait().await;\n        tracing::info!(\"background_jobs_can is ready, proceeding.\");\n\n        while !cancel.is_cancelled() {\n            // Look for new work: this is relatively expensive because we have to go acquire the lock on\n            // the tenant manager to retrieve tenants, and then iterate over them to figure out which ones\n            // require an upload.\n            self.schedule_iteration(&cancel).await;\n\n            if cancel.is_cancelled() {\n                return;\n            }\n\n            // Schedule some work, if concurrency limit permits it\n            self.spawn_pending();\n\n            // This message is printed every scheduling iteration as proof of liveness when looking at logs\n            tracing::info!(\n                \"Status: {} tasks running, {} pending\",\n                self.running.len(),\n                self.pending.len()\n            );\n\n            // Between scheduling iterations, we will:\n            //  - Drain any complete tasks and spawn pending tasks\n            //  - Handle incoming administrative commands\n            //  - Check our cancellation token\n            let next_scheduling_iteration = Instant::now()\n                .checked_add(self.scheduling_interval)\n                .unwrap_or_else(|| {\n                    tracing::warn!(\n                        \"Scheduling interval invalid ({}s)\",\n                        self.scheduling_interval.as_secs_f64()\n                    );\n                    // unwrap(): this constant is small, cannot fail to add to time unless\n                    // we are close to the end of the universe.\n                    Instant::now().checked_add(MIN_SCHEDULING_INTERVAL).unwrap()\n                });\n            loop {\n                tokio::select! {\n                    _ = cancel.cancelled() => {\n                        tracing::info!(\"joining tasks\");\n                        // We do not simply drop the JoinSet, in order to have an orderly shutdown without cancellation.\n                        // It is the callers responsibility to make sure that the tasks they scheduled\n                        // respect an appropriate cancellation token, to shut down promptly.  It is only\n                        // safe to wait on joining these tasks because we can see the cancellation token\n                        // has been set.\n                        while let Some(_r) = self.tasks.join_next().await {}\n                        tracing::info!(\"terminating on cancellation token.\");\n\n                        break;\n                    },\n                    _ = tokio::time::sleep(next_scheduling_iteration.duration_since(Instant::now())) => {\n                        tracing::debug!(\"woke for scheduling interval\");\n                        break;},\n                    cmd = command_queue.recv() => {\n                        tracing::debug!(\"woke for command queue\");\n                        let cmd = match cmd {\n                            Some(c) =>c,\n                            None => {\n                                // SecondaryController was destroyed, and this has raced with\n                                // our CancellationToken\n                                tracing::info!(\"terminating on command queue destruction\");\n                                cancel.cancel();\n                                break;\n                            }\n                        };\n\n                        let CommandRequest{\n                            response_tx,\n                            payload\n                        } = cmd;\n                        self.handle_command(payload, response_tx);\n                    },\n                    _ = async {\n                        let completion = self.process_next_completion().await;\n                        match completion {\n                            Some(c) => {\n                                self.generator.on_completion(c);\n                                if !cancel.is_cancelled() {\n                                    self.spawn_pending();\n                                }\n                            },\n                            None => {\n                                // Nothing is running, so just wait: expect that this future\n                                // will be dropped when something in the outer select! fires.\n                                cancel.cancelled().await;\n                            }\n                        }\n\n                     } => {}\n                }\n            }\n        }\n    }\n\n    fn do_spawn(&mut self, job: PJ) {\n        let tenant_shard_id = *job.get_tenant_shard_id();\n        let (in_progress, fut) = self.generator.spawn(job);\n\n        self.tasks.spawn(fut);\n\n        let replaced = self.running.insert(tenant_shard_id, in_progress);\n        debug_assert!(replaced.is_none());\n        if replaced.is_some() {\n            tracing::warn!(%tenant_shard_id, \"Unexpectedly spawned a task when one was already running\")\n        }\n    }\n\n    /// For all pending tenants that are elegible for execution, spawn their task.\n    ///\n    /// Caller provides the spawn operation, we track the resulting execution.\n    fn spawn_pending(&mut self) {\n        while !self.pending.is_empty() && self.running.len() < self.concurrency {\n            // unwrap: loop condition includes !is_empty()\n            let pending = self.pending.pop_front().unwrap();\n            if !self.running.contains_key(pending.get_tenant_shard_id()) {\n                self.do_spawn(pending);\n            }\n        }\n    }\n\n    /// For administrative commands: skip the pending queue, ignore concurrency limits\n    fn spawn_now(&mut self, job: PJ) -> &RJ {\n        let tenant_shard_id = *job.get_tenant_shard_id();\n        self.do_spawn(job);\n        self.running\n            .get(&tenant_shard_id)\n            .expect(\"We just inserted this\")\n    }\n\n    /// Wait until the next task completes, and handle its completion\n    ///\n    /// Cancellation: this method is cancel-safe.\n    async fn process_next_completion(&mut self) -> Option<C> {\n        match self.tasks.join_next().await {\n            Some(r) => {\n                // We use a channel to drive completions, but also\n                // need to drain the JoinSet to avoid completed tasks\n                // accumulating.  These calls are 1:1 because every task\n                // we spawn into this joinset submits is result to the channel.\n                let completion = r.expect(\"Panic in background task\");\n\n                self.running.remove(completion.get_tenant_shard_id());\n                Some(completion)\n            }\n            None => {\n                // Nothing is running, so we have nothing to wait for.  We may drop out: the\n                // main even loop will call us again after the next time it has run something.\n                None\n            }\n        }\n    }\n\n    /// Convert the command into a pending job, spawn it, and when the spawned\n    /// job completes, send the result down `response_tx`.\n    fn handle_command(\n        &mut self,\n        cmd: CMD,\n        response_tx: tokio::sync::oneshot::Sender<CommandResponse>,\n    ) {\n        let job = match self.generator.on_command(cmd) {\n            Ok(j) => j,\n            Err(e) => {\n                response_tx.send(CommandResponse { result: Err(e) }).ok();\n                return;\n            }\n        };\n\n        let tenant_shard_id = job.get_tenant_shard_id();\n        let barrier = if let Some(barrier) = self.get_running(tenant_shard_id) {\n            tracing::info!(\n                tenant_id=%tenant_shard_id.tenant_id,\n                shard_id=%tenant_shard_id.shard_slug(),\n                \"Command already running, waiting for it\"\n            );\n            barrier\n        } else {\n            let running = self.spawn_now(job);\n            running.get_barrier().clone()\n        };\n\n        // This task does no I/O: it only listens for a barrier's completion and then\n        // sends to the command response channel.  It is therefore safe to spawn this without\n        // any gates/task_mgr hooks.\n        tokio::task::spawn(async move {\n            barrier.wait().await;\n\n            response_tx.send(CommandResponse { result: Ok(()) }).ok();\n        });\n    }\n\n    fn get_running(&self, tenant_shard_id: &TenantShardId) -> Option<Barrier> {\n        self.running.get(tenant_shard_id).map(|r| r.get_barrier())\n    }\n\n    /// Periodic execution phase: inspect all attached tenants and schedule any work they require.\n    ///\n    /// The type in `tenants` should be a tenant-like structure, e.g. [`crate::tenant::TenantShard`] or [`crate::tenant::secondary::SecondaryTenant`]\n    ///\n    /// This function resets the pending list: it is assumed that the caller may change their mind about\n    /// which tenants need work between calls to schedule_iteration.\n    async fn schedule_iteration(&mut self, cancel: &CancellationToken) {\n        let SchedulingResult {\n            jobs,\n            want_interval,\n        } = self.generator.schedule().await;\n\n        // Adjust interval based on feedback from the job generator\n        if let Some(want_interval) = want_interval {\n            // Calculation uses second granularity: this scheduler is not intended for high frequency tasks\n            self.scheduling_interval = Duration::from_secs(std::cmp::min(\n                std::cmp::max(MIN_SCHEDULING_INTERVAL.as_secs(), want_interval.as_secs()),\n                MAX_SCHEDULING_INTERVAL.as_secs(),\n            ));\n        }\n\n        // The priority order of previously scheduled work may be invalidated by current state: drop\n        // all pending work (it will be re-scheduled if still needed)\n        self.pending.clear();\n\n        // While iterating over the potentially-long list of tenants, we will periodically yield\n        // to avoid blocking executor.\n        yielding_loop(1000, cancel, jobs.into_iter(), |job| {\n            // Skip tenants that already have a write in flight\n            if !self.running.contains_key(job.get_tenant_shard_id()) {\n                self.pending.push_back(job);\n            }\n        })\n        .await\n        .ok();\n    }\n}\n"
  },
  {
    "path": "pageserver/src/tenant/secondary.rs",
    "content": "mod downloader;\npub mod heatmap;\nmod heatmap_uploader;\nmod scheduler;\n\nuse std::sync::Arc;\nuse std::time::SystemTime;\n\nuse metrics::UIntGauge;\nuse pageserver_api::models;\nuse pageserver_api::shard::{ShardIdentity, TenantShardId};\nuse remote_storage::GenericRemoteStorage;\nuse tokio::task::JoinHandle;\nuse tokio_util::sync::CancellationToken;\nuse tracing::instrument;\nuse utils::completion::Barrier;\nuse utils::id::TimelineId;\nuse utils::sync::gate::Gate;\n\nuse self::downloader::{SecondaryDetail, downloader_task};\nuse self::heatmap_uploader::heatmap_uploader_task;\nuse super::GetTenantError;\nuse super::config::SecondaryLocationConfig;\nuse super::mgr::TenantManager;\nuse super::span::debug_assert_current_span_has_tenant_id;\nuse super::storage_layer::LayerName;\nuse crate::context::RequestContext;\nuse crate::disk_usage_eviction_task::DiskUsageEvictionInfo;\nuse crate::metrics::{SECONDARY_HEATMAP_TOTAL_SIZE, SECONDARY_RESIDENT_PHYSICAL_SIZE};\nuse crate::task_mgr::{self, BACKGROUND_RUNTIME, TaskKind};\n\nenum DownloadCommand {\n    Download(TenantShardId),\n}\nenum UploadCommand {\n    Upload(TenantShardId),\n}\n\nimpl UploadCommand {\n    fn get_tenant_shard_id(&self) -> &TenantShardId {\n        match self {\n            Self::Upload(id) => id,\n        }\n    }\n}\n\nimpl DownloadCommand {\n    fn get_tenant_shard_id(&self) -> &TenantShardId {\n        match self {\n            Self::Download(id) => id,\n        }\n    }\n}\n\nstruct CommandRequest<T> {\n    payload: T,\n    response_tx: tokio::sync::oneshot::Sender<CommandResponse>,\n}\n\nstruct CommandResponse {\n    result: Result<(), SecondaryTenantError>,\n}\n\n#[derive(thiserror::Error, Debug)]\npub(crate) enum SecondaryTenantError {\n    #[error(\"{0}\")]\n    GetTenant(GetTenantError),\n    #[error(\"shutting down\")]\n    ShuttingDown,\n}\n\nimpl From<GetTenantError> for SecondaryTenantError {\n    fn from(gte: GetTenantError) -> Self {\n        Self::GetTenant(gte)\n    }\n}\n\n// Whereas [`Tenant`] represents an attached tenant, this type represents the work\n// we do for secondary tenant locations: where we are not serving clients or\n// ingesting WAL, but we are maintaining a warm cache of layer files.\n//\n// This type is all about the _download_ path for secondary mode.  The upload path\n// runs separately (see [`heatmap_uploader`]) while a regular attached `Tenant` exists.\n//\n// This structure coordinates TenantManager and SecondaryDownloader,\n// so that the downloader can indicate which tenants it is currently\n// operating on, and the manager can indicate when a particular\n// secondary tenant should cancel any work in flight.\n#[derive(Debug)]\npub(crate) struct SecondaryTenant {\n    /// Carrying a tenant shard ID simplifies callers such as the downloader\n    /// which need to organize many of these objects by ID.\n    tenant_shard_id: TenantShardId,\n\n    /// Cancellation token indicates to SecondaryDownloader that it should stop doing\n    /// any work for this tenant at the next opportunity.\n    pub(crate) cancel: CancellationToken,\n\n    pub(crate) gate: Gate,\n\n    // Secondary mode does not need the full shard identity or the pageserver_api::models::TenantConfig.  However,\n    // storing these enables us to report our full LocationConf, enabling convenient reconciliation\n    // by the control plane (see [`Self::get_location_conf`])\n    pub(crate) shard_identity: ShardIdentity,\n    tenant_conf: std::sync::Mutex<pageserver_api::models::TenantConfig>,\n\n    // Internal state used by the Downloader.\n    detail: std::sync::Mutex<SecondaryDetail>,\n\n    // Public state indicating overall progress of downloads relative to the last heatmap seen\n    pub(crate) progress: std::sync::Mutex<models::SecondaryProgress>,\n\n    // Sum of layer sizes on local disk\n    pub(super) resident_size_metric: UIntGauge,\n\n    // Sum of layer sizes in the most recently downloaded heatmap\n    pub(super) heatmap_total_size_metric: UIntGauge,\n}\n\nimpl SecondaryTenant {\n    pub(crate) fn new(\n        tenant_shard_id: TenantShardId,\n        shard_identity: ShardIdentity,\n        tenant_conf: pageserver_api::models::TenantConfig,\n        config: &SecondaryLocationConfig,\n    ) -> Arc<Self> {\n        let tenant_id = tenant_shard_id.tenant_id.to_string();\n        let shard_id = format!(\"{}\", tenant_shard_id.shard_slug());\n        let resident_size_metric = SECONDARY_RESIDENT_PHYSICAL_SIZE\n            .get_metric_with_label_values(&[&tenant_id, &shard_id])\n            .unwrap();\n\n        let heatmap_total_size_metric = SECONDARY_HEATMAP_TOTAL_SIZE\n            .get_metric_with_label_values(&[&tenant_id, &shard_id])\n            .unwrap();\n\n        Arc::new(Self {\n            tenant_shard_id,\n            // todo: shall we make this a descendent of the\n            // main cancellation token, or is it sufficient that\n            // on shutdown we walk the tenants and fire their\n            // individual cancellations?\n            cancel: CancellationToken::new(),\n            gate: Gate::default(),\n\n            shard_identity,\n            tenant_conf: std::sync::Mutex::new(tenant_conf),\n\n            detail: std::sync::Mutex::new(SecondaryDetail::new(config.clone())),\n\n            progress: std::sync::Mutex::default(),\n\n            resident_size_metric,\n            heatmap_total_size_metric,\n        })\n    }\n\n    pub(crate) fn tenant_shard_id(&self) -> TenantShardId {\n        self.tenant_shard_id\n    }\n\n    pub(crate) async fn shutdown(&self) {\n        self.cancel.cancel();\n\n        // Wait for any secondary downloader work to complete\n        self.gate.close().await;\n\n        self.validate_metrics();\n\n        // Metrics are subtracted from and/or removed eagerly.\n        // Deletions are done in the background via [`BackgroundPurges::spawn`].\n        let tenant_id = self.tenant_shard_id.tenant_id.to_string();\n        let shard_id = format!(\"{}\", self.tenant_shard_id.shard_slug());\n        let _ = SECONDARY_RESIDENT_PHYSICAL_SIZE.remove_label_values(&[&tenant_id, &shard_id]);\n        let _ = SECONDARY_HEATMAP_TOTAL_SIZE.remove_label_values(&[&tenant_id, &shard_id]);\n\n        self.detail\n            .lock()\n            .unwrap()\n            .drain_timelines(&self.tenant_shard_id, &self.resident_size_metric);\n    }\n\n    pub(crate) fn set_config(&self, config: &SecondaryLocationConfig) {\n        self.detail.lock().unwrap().config = config.clone();\n    }\n\n    pub(crate) fn set_tenant_conf(&self, config: &pageserver_api::models::TenantConfig) {\n        *(self.tenant_conf.lock().unwrap()) = config.clone();\n    }\n\n    /// For API access: generate a LocationConfig equivalent to the one that would be used to\n    /// create a Tenant in the same state.  Do not use this in hot paths: it's for relatively\n    /// rare external API calls, like a reconciliation at startup.\n    pub(crate) fn get_location_conf(&self) -> models::LocationConfig {\n        let conf = self.detail.lock().unwrap().config.clone();\n\n        let conf = models::LocationConfigSecondary { warm: conf.warm };\n\n        let tenant_conf = self.tenant_conf.lock().unwrap().clone();\n        models::LocationConfig {\n            mode: models::LocationConfigMode::Secondary,\n            generation: None,\n            secondary_conf: Some(conf),\n            shard_number: self.tenant_shard_id.shard_number.0,\n            shard_count: self.tenant_shard_id.shard_count.literal(),\n            shard_stripe_size: self.shard_identity.stripe_size.0,\n            tenant_conf,\n        }\n    }\n\n    pub(crate) fn get_tenant_shard_id(&self) -> &TenantShardId {\n        &self.tenant_shard_id\n    }\n\n    pub(crate) fn get_layers_for_eviction(self: &Arc<Self>) -> (DiskUsageEvictionInfo, usize) {\n        self.detail.lock().unwrap().get_layers_for_eviction(self)\n    }\n\n    /// Cancellation safe, but on cancellation the eviction will go through\n    #[instrument(skip_all, fields(tenant_id=%self.tenant_shard_id.tenant_id, shard_id=%self.tenant_shard_id.shard_slug(), timeline_id=%timeline_id, name=%name))]\n    pub(crate) async fn evict_layer(self: &Arc<Self>, timeline_id: TimelineId, name: LayerName) {\n        debug_assert_current_span_has_tenant_id();\n\n        let guard = match self.gate.enter() {\n            Ok(g) => g,\n            Err(_) => {\n                tracing::debug!(\"Dropping layer evictions, secondary tenant shutting down\",);\n                return;\n            }\n        };\n\n        let now = SystemTime::now();\n        tracing::info!(\"Evicting secondary layer\");\n\n        let this = self.clone();\n\n        // spawn it to be cancellation safe\n        tokio::task::spawn_blocking(move || {\n            let _guard = guard;\n\n            // Update the timeline's state.  This does not have to be synchronized with\n            // the download process, because:\n            // - If downloader is racing with us to remove a file (e.g. because it is\n            //   removed from heatmap), then our mutual .remove() operations will both\n            //   succeed.\n            // - If downloader is racing with us to download the object (this would require\n            //   multiple eviction iterations to race with multiple download iterations), then\n            //   if we remove it from the state, the worst that happens is the downloader\n            //   downloads it again before re-inserting, or we delete the file but it remains\n            //   in the state map (in which case it will be downloaded if this secondary\n            //   tenant transitions to attached and tries to access it)\n            //\n            // The important assumption here is that the secondary timeline state does not\n            // have to 100% match what is on disk, because it's a best-effort warming\n            // of the cache.\n            let mut detail = this.detail.lock().unwrap();\n            if let Some(removed) =\n                detail.evict_layer(name, &timeline_id, now, &this.resident_size_metric)\n            {\n                // We might race with removal of the same layer during downloads, so finding the layer we\n                // were trying to remove is optional.  Only issue the disk I/O to remove it if we found it.\n                removed.remove_blocking();\n            }\n        })\n        .await\n        .expect(\"secondary eviction should not have panicked\");\n    }\n\n    /// Exhaustive check that incrementally updated metrics match the actual state.\n    #[cfg(feature = \"testing\")]\n    fn validate_metrics(&self) {\n        let detail = self.detail.lock().unwrap();\n        let resident_size = detail.total_resident_size();\n\n        assert_eq!(resident_size, self.resident_size_metric.get());\n    }\n\n    #[cfg(not(feature = \"testing\"))]\n    fn validate_metrics(&self) {\n        // No-op in non-testing builds\n    }\n}\n\n/// The SecondaryController is a pseudo-rpc client for administrative control of secondary mode downloads,\n/// and heatmap uploads.  This is not a hot data path: it's used for:\n/// - Live migrations, where we want to ensure a migration destination has the freshest possible\n///   content before trying to cut over.\n/// - Tests, where we want to immediately upload/download for a particular tenant.\n///\n/// In normal operations, outside of migrations, uploads & downloads are autonomous and not driven by this interface.\npub struct SecondaryController {\n    upload_req_tx: tokio::sync::mpsc::Sender<CommandRequest<UploadCommand>>,\n    download_req_tx: tokio::sync::mpsc::Sender<CommandRequest<DownloadCommand>>,\n}\n\nimpl SecondaryController {\n    async fn dispatch<T>(\n        &self,\n        queue: &tokio::sync::mpsc::Sender<CommandRequest<T>>,\n        payload: T,\n    ) -> Result<(), SecondaryTenantError> {\n        let (response_tx, response_rx) = tokio::sync::oneshot::channel();\n\n        queue\n            .send(CommandRequest {\n                payload,\n                response_tx,\n            })\n            .await\n            .map_err(|_| SecondaryTenantError::ShuttingDown)?;\n\n        let response = response_rx\n            .await\n            .map_err(|_| SecondaryTenantError::ShuttingDown)?;\n\n        response.result\n    }\n\n    pub(crate) async fn upload_tenant(\n        &self,\n        tenant_shard_id: TenantShardId,\n    ) -> Result<(), SecondaryTenantError> {\n        self.dispatch(&self.upload_req_tx, UploadCommand::Upload(tenant_shard_id))\n            .await\n    }\n    pub(crate) async fn download_tenant(\n        &self,\n        tenant_shard_id: TenantShardId,\n    ) -> Result<(), SecondaryTenantError> {\n        self.dispatch(\n            &self.download_req_tx,\n            DownloadCommand::Download(tenant_shard_id),\n        )\n        .await\n    }\n}\n\npub struct GlobalTasks {\n    cancel: CancellationToken,\n    uploader: JoinHandle<()>,\n    downloader: JoinHandle<()>,\n}\n\nimpl GlobalTasks {\n    /// Caller is responsible for requesting shutdown via the cancellation token that was\n    /// passed to [`spawn_tasks`].\n    ///\n    /// # Panics\n    ///\n    /// This method panics if that token is not cancelled.\n    /// This is low-risk because we're calling this during process shutdown, so, a panic\n    /// will be informative but not cause undue downtime.\n    pub async fn wait(self) {\n        let Self {\n            cancel,\n            uploader,\n            downloader,\n        } = self;\n        assert!(\n            cancel.is_cancelled(),\n            \"must cancel cancellation token, otherwise the tasks will not shut down\"\n        );\n\n        let (uploader, downloader) = futures::future::join(uploader, downloader).await;\n        uploader.expect(\n            \"unreachable: exit_on_panic_or_error would catch the panic and exit the process\",\n        );\n        downloader.expect(\n            \"unreachable: exit_on_panic_or_error would catch the panic and exit the process\",\n        );\n    }\n}\n\npub fn spawn_tasks(\n    tenant_manager: Arc<TenantManager>,\n    remote_storage: GenericRemoteStorage,\n    background_jobs_can_start: Barrier,\n    cancel: CancellationToken,\n) -> (SecondaryController, GlobalTasks) {\n    let mgr_clone = tenant_manager.clone();\n    let storage_clone = remote_storage.clone();\n    let bg_jobs_clone = background_jobs_can_start.clone();\n\n    let (download_req_tx, download_req_rx) =\n        tokio::sync::mpsc::channel::<CommandRequest<DownloadCommand>>(16);\n    let (upload_req_tx, upload_req_rx) =\n        tokio::sync::mpsc::channel::<CommandRequest<UploadCommand>>(16);\n\n    let cancel_clone = cancel.clone();\n    let downloader = BACKGROUND_RUNTIME.spawn(task_mgr::exit_on_panic_or_error(\n        \"secondary tenant downloads\",\n        async move {\n            downloader_task(\n                mgr_clone,\n                storage_clone,\n                download_req_rx,\n                bg_jobs_clone,\n                cancel_clone,\n                RequestContext::new(\n                    TaskKind::SecondaryDownloads,\n                    crate::context::DownloadBehavior::Download,\n                ),\n            )\n            .await;\n            anyhow::Ok(())\n        },\n    ));\n\n    let cancel_clone = cancel.clone();\n    let uploader = BACKGROUND_RUNTIME.spawn(task_mgr::exit_on_panic_or_error(\n        \"heatmap uploads\",\n        async move {\n            heatmap_uploader_task(\n                tenant_manager,\n                remote_storage,\n                upload_req_rx,\n                background_jobs_can_start,\n                cancel_clone,\n            )\n            .await;\n            anyhow::Ok(())\n        },\n    ));\n\n    (\n        SecondaryController {\n            upload_req_tx,\n            download_req_tx,\n        },\n        GlobalTasks {\n            cancel,\n            uploader,\n            downloader,\n        },\n    )\n}\n"
  },
  {
    "path": "pageserver/src/tenant/size.rs",
    "content": "use std::cmp;\nuse std::collections::hash_map::Entry;\nuse std::collections::{HashMap, HashSet};\nuse std::sync::Arc;\n\nuse tenant_size_model::svg::SvgBranchKind;\nuse tenant_size_model::{Segment, StorageModel};\nuse tokio::sync::Semaphore;\nuse tokio::sync::oneshot::error::RecvError;\nuse tokio_util::sync::CancellationToken;\nuse tracing::*;\nuse utils::id::TimelineId;\nuse utils::lsn::Lsn;\n\nuse super::{GcError, LogicalSizeCalculationCause, TenantShard};\nuse crate::context::RequestContext;\nuse crate::pgdatadir_mapping::CalculateLogicalSizeError;\nuse crate::tenant::{MaybeOffloaded, Timeline};\n\n/// Inputs to the actual tenant sizing model\n///\n/// Implements [`serde::Serialize`] but is not meant to be part of the public API, instead meant to\n/// be a transferrable format between execution environments and developer.\n///\n/// This tracks more information than the actual StorageModel that calculation\n/// needs. We will convert this into a StorageModel when it's time to perform\n/// the calculation.\n///\n#[derive(Debug, serde::Serialize, serde::Deserialize)]\npub struct ModelInputs {\n    pub segments: Vec<SegmentMeta>,\n    pub timeline_inputs: Vec<TimelineInputs>,\n}\n\n/// A [`Segment`], with some extra information for display purposes\n#[derive(Debug, serde::Serialize, serde::Deserialize, PartialEq, Eq)]\npub struct SegmentMeta {\n    pub segment: Segment,\n    pub timeline_id: TimelineId,\n    pub kind: LsnKind,\n}\n\n#[derive(thiserror::Error, Debug)]\npub(crate) enum CalculateSyntheticSizeError {\n    /// Something went wrong internally to the calculation of logical size at a particular branch point\n    #[error(\"Failed to calculated logical size on timeline {timeline_id} at {lsn}: {error}\")]\n    LogicalSize {\n        timeline_id: TimelineId,\n        lsn: Lsn,\n        error: CalculateLogicalSizeError,\n    },\n\n    /// Something went wrong internally when calculating GC parameters at start of size calculation\n    #[error(transparent)]\n    GcInfo(GcError),\n\n    /// Totally unexpected errors, like panics joining a task\n    #[error(transparent)]\n    Fatal(anyhow::Error),\n\n    /// Tenant shut down while calculating size\n    #[error(\"Cancelled\")]\n    Cancelled,\n}\n\nimpl From<GcError> for CalculateSyntheticSizeError {\n    fn from(value: GcError) -> Self {\n        match value {\n            GcError::TenantCancelled | GcError::TimelineCancelled => {\n                CalculateSyntheticSizeError::Cancelled\n            }\n            other => CalculateSyntheticSizeError::GcInfo(other),\n        }\n    }\n}\n\nimpl SegmentMeta {\n    fn size_needed(&self) -> bool {\n        match self.kind {\n            LsnKind::BranchStart => {\n                // If we don't have a later GcCutoff point on this branch, and\n                // no ancestor, calculate size for the branch start point.\n                self.segment.needed && self.segment.parent.is_none()\n            }\n            LsnKind::BranchPoint => true,\n            LsnKind::GcCutOff => true,\n            LsnKind::BranchEnd => false,\n            LsnKind::LeasePoint => true,\n            LsnKind::LeaseStart => false,\n            LsnKind::LeaseEnd => false,\n        }\n    }\n}\n\n#[derive(\n    Debug, Clone, Copy, Eq, Ord, PartialEq, PartialOrd, serde::Serialize, serde::Deserialize,\n)]\npub enum LsnKind {\n    /// A timeline starting here\n    BranchStart,\n    /// A child timeline branches off from here\n    BranchPoint,\n    /// GC cutoff point\n    GcCutOff,\n    /// Last record LSN\n    BranchEnd,\n    /// A LSN lease is granted here.\n    LeasePoint,\n    /// A lease starts from here.\n    LeaseStart,\n    /// Last record LSN for the lease (should have the same LSN as the previous [`LsnKind::LeaseStart`]).\n    LeaseEnd,\n}\n\nimpl From<LsnKind> for SvgBranchKind {\n    fn from(kind: LsnKind) -> Self {\n        match kind {\n            LsnKind::LeasePoint | LsnKind::LeaseStart | LsnKind::LeaseEnd => SvgBranchKind::Lease,\n            _ => SvgBranchKind::Timeline,\n        }\n    }\n}\n\n/// Collect all relevant LSNs to the inputs. These will only be helpful in the serialized form as\n/// part of [`ModelInputs`] from the HTTP api, explaining the inputs.\n#[derive(Debug, serde::Serialize, serde::Deserialize)]\npub struct TimelineInputs {\n    pub timeline_id: TimelineId,\n\n    pub ancestor_id: Option<TimelineId>,\n\n    ancestor_lsn: Lsn,\n    last_record: Lsn,\n    latest_gc_cutoff: Lsn,\n\n    /// Cutoff point based on GC settings\n    next_pitr_cutoff: Lsn,\n\n    /// Cutoff point calculated from the user-supplied 'max_retention_period'\n    retention_param_cutoff: Option<Lsn>,\n\n    /// Lease points on the timeline\n    lease_points: Vec<Lsn>,\n}\n\n/// Gathers the inputs for the tenant sizing model.\n///\n/// Tenant size does not consider the latest state, but only the state until next_pitr_cutoff, which\n/// is updated on-demand, during the start of this calculation and separate from the\n/// [`TimelineInputs::latest_gc_cutoff`].\n///\n/// For timelines in general:\n///\n/// ```text\n/// 0-----|---------|----|------------| · · · · · |·> lsn\n///   initdb_lsn  branchpoints*  next_pitr_cutoff  latest\n/// ```\npub(super) async fn gather_inputs(\n    tenant: &TenantShard,\n    limit: &Arc<Semaphore>,\n    max_retention_period: Option<u64>,\n    logical_size_cache: &mut HashMap<(TimelineId, Lsn), u64>,\n    cause: LogicalSizeCalculationCause,\n    cancel: &CancellationToken,\n    ctx: &RequestContext,\n) -> Result<ModelInputs, CalculateSyntheticSizeError> {\n    // refresh is needed to update [`timeline::GcCutoffs`]\n    tenant.refresh_gc_info(cancel, ctx).await?;\n\n    // Collect information about all the timelines\n    let mut timelines = tenant.list_timelines();\n\n    if timelines.is_empty() {\n        // perhaps the tenant has just been created, and as such doesn't have any data yet\n        return Ok(ModelInputs {\n            segments: vec![],\n            timeline_inputs: Vec::new(),\n        });\n    }\n\n    // Filter out timelines that are not active\n    //\n    // There may be a race when a timeline is dropped,\n    // but it is unlikely to cause any issues. In the worst case,\n    // the calculation will error out.\n    timelines.retain(|t| t.is_active());\n    // Also filter out archived timelines.\n    timelines.retain(|t| t.is_archived() != Some(true));\n\n    // Build a map of branch points.\n    let mut branchpoints: HashMap<TimelineId, HashSet<Lsn>> = HashMap::new();\n    for timeline in timelines.iter() {\n        if let Some(ancestor_id) = timeline.get_ancestor_timeline_id() {\n            branchpoints\n                .entry(ancestor_id)\n                .or_default()\n                .insert(timeline.get_ancestor_lsn());\n        }\n    }\n\n    // These become the final result.\n    let mut timeline_inputs = Vec::with_capacity(timelines.len());\n    let mut segments: Vec<SegmentMeta> = Vec::new();\n\n    //\n    // Build Segments representing each timeline. As we do that, also remember\n    // the branchpoints and branch startpoints in 'branchpoint_segments' and\n    // 'branchstart_segments'\n    //\n\n    // BranchPoint segments of each timeline\n    // (timeline, branchpoint LSN) -> segment_id\n    let mut branchpoint_segments: HashMap<(TimelineId, Lsn), usize> = HashMap::new();\n\n    // timeline, Branchpoint seg id, (ancestor, ancestor LSN)\n    type BranchStartSegment = (TimelineId, usize, Option<(TimelineId, Lsn)>);\n    let mut branchstart_segments: Vec<BranchStartSegment> = Vec::new();\n\n    for timeline in timelines.iter() {\n        let timeline_id = timeline.timeline_id;\n        let last_record_lsn = timeline.get_last_record_lsn();\n        let ancestor_lsn = timeline.get_ancestor_lsn();\n\n        // there's a race between the update (holding tenant.gc_lock) and this read but it\n        // might not be an issue, because it's not for Timeline::gc\n        let gc_info = timeline.gc_info.read().unwrap();\n\n        // similar to gc, but Timeline::get_latest_gc_cutoff_lsn() will not be updated before a\n        // new gc run, which we have no control over. however differently from `Timeline::gc`\n        // we don't consider the `Timeline::disk_consistent_lsn` at all, because we are not\n        // actually removing files.\n        //\n        // We only consider [`timeline::GcCutoffs::time`], and not [`timeline::GcCutoffs::space`], because from\n        // a user's perspective they have only requested retention up to the time bound (pitr_cutoff), rather\n        // than our internal space cutoff.  This means that if someone drops a database and waits for their\n        // PITR interval, they will see synthetic size decrease, even if we are still storing data inside\n        // the space cutoff.\n        let mut next_pitr_cutoff = gc_info.cutoffs.time.unwrap_or_default(); // TODO: handle None\n\n        // If the caller provided a shorter retention period, use that instead of the GC cutoff.\n        let retention_param_cutoff = if let Some(max_retention_period) = max_retention_period {\n            let param_cutoff = Lsn(last_record_lsn.0.saturating_sub(max_retention_period));\n            if next_pitr_cutoff < param_cutoff {\n                next_pitr_cutoff = param_cutoff;\n            }\n            Some(param_cutoff)\n        } else {\n            None\n        };\n\n        let branch_is_invisible = timeline.is_invisible() == Some(true);\n\n        let lease_points = gc_info\n            .leases\n            .keys()\n            .filter(|&&lsn| lsn > ancestor_lsn)\n            .copied()\n            .collect::<Vec<_>>();\n\n        // next_pitr_cutoff in parent branch are not of interest (right now at least), nor do we\n        // want to query any logical size before initdb_lsn.\n        let branch_start_lsn = cmp::max(ancestor_lsn, timeline.initdb_lsn);\n\n        // Build \"interesting LSNs\" on this timeline\n        let mut lsns: Vec<(Lsn, LsnKind)> = gc_info\n            .retain_lsns\n            .iter()\n            .filter(|(lsn, _child_id, is_offloaded)| {\n                lsn > &ancestor_lsn && *is_offloaded == MaybeOffloaded::No\n            })\n            .copied()\n            // this assumes there are no other retain_lsns than the branchpoints\n            .map(|(lsn, _child_id, _is_offloaded)| (lsn, LsnKind::BranchPoint))\n            .collect::<Vec<_>>();\n\n        if !branch_is_invisible {\n            // Do not count lease points for invisible branches.\n            lsns.extend(lease_points.iter().map(|&lsn| (lsn, LsnKind::LeasePoint)));\n        }\n\n        drop(gc_info);\n\n        // Add branch points we collected earlier, just in case there were any that were\n        // not present in retain_lsns. We will remove any duplicates below later.\n        if let Some(this_branchpoints) = branchpoints.get(&timeline_id) {\n            lsns.extend(\n                this_branchpoints\n                    .iter()\n                    .map(|lsn| (*lsn, LsnKind::BranchPoint)),\n            )\n        }\n\n        // Add a point for the PITR cutoff\n        let branch_start_needed = next_pitr_cutoff <= branch_start_lsn;\n        if !branch_start_needed && !branch_is_invisible {\n            // Only add the GcCutOff point when the timeline is visible; otherwise, do not compute the size for the LSN\n            // range from the last branch point to the latest data.\n            lsns.push((next_pitr_cutoff, LsnKind::GcCutOff));\n        }\n\n        lsns.sort_unstable();\n        lsns.dedup();\n\n        //\n        // Create Segments for the interesting points.\n        //\n\n        // Timeline start point\n        let ancestor = timeline\n            .get_ancestor_timeline_id()\n            .map(|ancestor_id| (ancestor_id, ancestor_lsn));\n        branchstart_segments.push((timeline_id, segments.len(), ancestor));\n        segments.push(SegmentMeta {\n            segment: Segment {\n                parent: None, // filled in later\n                lsn: branch_start_lsn.0,\n                size: None, // filled in later\n                needed: branch_start_needed,\n            },\n            timeline_id: timeline.timeline_id,\n            kind: LsnKind::BranchStart,\n        });\n\n        // GC cutoff point, and any branch points, i.e. points where\n        // other timelines branch off from this timeline.\n        let mut parent = segments.len() - 1;\n        for (lsn, kind) in lsns {\n            if kind == LsnKind::BranchPoint {\n                branchpoint_segments.insert((timeline_id, lsn), segments.len());\n            }\n\n            segments.push(SegmentMeta {\n                segment: Segment {\n                    parent: Some(parent),\n                    lsn: lsn.0,\n                    size: None,\n                    needed: lsn > next_pitr_cutoff,\n                },\n                timeline_id: timeline.timeline_id,\n                kind,\n            });\n\n            parent = segments.len() - 1;\n\n            if kind == LsnKind::LeasePoint {\n                // Needs `LeaseStart` and `LeaseEnd` as well to model lease as a read-only branch that never writes data\n                // (i.e. it's lsn has not advanced from ancestor_lsn), and therefore the three segments have the same LSN\n                // value. Without the other two segments, the calculation code would not count the leased LSN as a point\n                // to be retained.\n                // Did not use `BranchStart` or `BranchEnd` so we can differentiate branches and leases during debug.\n                //\n                // Alt Design: rewrite the entire calculation code to be independent of timeline id. Both leases and\n                // branch points can be given a synthetic id so we can unite them.\n                let mut lease_parent = parent;\n\n                // Start of a lease.\n                segments.push(SegmentMeta {\n                    segment: Segment {\n                        parent: Some(lease_parent),\n                        lsn: lsn.0,\n                        size: None,                     // Filled in later, if necessary\n                        needed: lsn > next_pitr_cutoff, // only needed if the point is within rentention.\n                    },\n                    timeline_id: timeline.timeline_id,\n                    kind: LsnKind::LeaseStart,\n                });\n                lease_parent += 1;\n\n                // End of the lease.\n                segments.push(SegmentMeta {\n                    segment: Segment {\n                        parent: Some(lease_parent),\n                        lsn: lsn.0,\n                        size: None,   // Filled in later, if necessary\n                        needed: true, // everything at the lease LSN must be readable => is needed\n                    },\n                    timeline_id: timeline.timeline_id,\n                    kind: LsnKind::LeaseEnd,\n                });\n            }\n        }\n\n        let branch_end_lsn = if branch_is_invisible {\n            // If the branch is invisible, the branch end is the last requested LSN (likely a branch cutoff point).\n            segments.last().unwrap().segment.lsn\n        } else {\n            // Otherwise, the branch end is the last record LSN.\n            last_record_lsn.0\n        };\n\n        // Current end of the timeline\n        segments.push(SegmentMeta {\n            segment: Segment {\n                parent: Some(parent),\n                lsn: branch_end_lsn,\n                size: None, // Filled in later, if necessary\n                needed: true,\n            },\n            timeline_id: timeline.timeline_id,\n            kind: LsnKind::BranchEnd,\n        });\n\n        timeline_inputs.push(TimelineInputs {\n            timeline_id: timeline.timeline_id,\n            ancestor_id: timeline.get_ancestor_timeline_id(),\n            ancestor_lsn,\n            last_record: last_record_lsn,\n            // this is not used above, because it might not have updated recently enough\n            latest_gc_cutoff: *timeline.get_applied_gc_cutoff_lsn(),\n            next_pitr_cutoff,\n            retention_param_cutoff,\n            lease_points,\n        });\n    }\n\n    // We now have all segments from the timelines in 'segments'. The timelines\n    // haven't been linked to each other yet, though. Do that.\n    for (_timeline_id, seg_id, ancestor) in branchstart_segments {\n        // Look up the branch point\n        if let Some(ancestor) = ancestor {\n            let parent_id = *branchpoint_segments.get(&ancestor).unwrap();\n            segments[seg_id].segment.parent = Some(parent_id);\n        }\n    }\n\n    // We left the 'size' field empty in all of the Segments so far.\n    // Now find logical sizes for all of the points that might need or benefit from them.\n    fill_logical_sizes(\n        &timelines,\n        &mut segments,\n        limit,\n        logical_size_cache,\n        cause,\n        ctx,\n    )\n    .await?;\n\n    if tenant.cancel.is_cancelled() {\n        // If we're shutting down, return an error rather than a sparse result that might include some\n        // timelines from before we started shutting down\n        return Err(CalculateSyntheticSizeError::Cancelled);\n    }\n\n    Ok(ModelInputs {\n        segments,\n        timeline_inputs,\n    })\n}\n\n/// Augment 'segments' with logical sizes\n///\n/// This will leave segments' sizes as None if the Timeline associated with the segment is deleted concurrently\n/// (i.e. we cannot read its logical size at a particular LSN).\nasync fn fill_logical_sizes(\n    timelines: &[Arc<Timeline>],\n    segments: &mut [SegmentMeta],\n    limit: &Arc<Semaphore>,\n    logical_size_cache: &mut HashMap<(TimelineId, Lsn), u64>,\n    cause: LogicalSizeCalculationCause,\n    ctx: &RequestContext,\n) -> Result<(), CalculateSyntheticSizeError> {\n    let timeline_hash: HashMap<TimelineId, Arc<Timeline>> = HashMap::from_iter(\n        timelines\n            .iter()\n            .map(|timeline| (timeline.timeline_id, Arc::clone(timeline))),\n    );\n\n    // record the used/inserted cache keys here, to remove extras not to start leaking\n    // after initial run the cache should be quite stable, but live timelines will eventually\n    // require new lsns to be inspected.\n    let mut sizes_needed = HashMap::<(TimelineId, Lsn), Option<u64>>::new();\n\n    // with joinset, on drop, all of the tasks will just be de-scheduled, which we can use to\n    // our advantage with `?` error handling.\n    let mut joinset = tokio::task::JoinSet::new();\n\n    // For each point that would benefit from having a logical size available,\n    // spawn a Task to fetch it, unless we have it cached already.\n    for seg in segments.iter() {\n        if !seg.size_needed() {\n            continue;\n        }\n\n        let timeline_id = seg.timeline_id;\n        let lsn = Lsn(seg.segment.lsn);\n\n        if let Entry::Vacant(e) = sizes_needed.entry((timeline_id, lsn)) {\n            let cached_size = logical_size_cache.get(&(timeline_id, lsn)).cloned();\n            if cached_size.is_none() {\n                let timeline = Arc::clone(timeline_hash.get(&timeline_id).unwrap());\n                let parallel_size_calcs = Arc::clone(limit);\n                let ctx = ctx.attached_child().with_scope_timeline(&timeline);\n                joinset.spawn(\n                    calculate_logical_size(parallel_size_calcs, timeline, lsn, cause, ctx)\n                        .in_current_span(),\n                );\n            }\n            e.insert(cached_size);\n        }\n    }\n\n    // Perform the size lookups\n    let mut have_any_error = None;\n    while let Some(res) = joinset.join_next().await {\n        // each of these come with Result<anyhow::Result<_>, JoinError>\n        // because of spawn + spawn_blocking\n        match res {\n            Err(join_error) if join_error.is_cancelled() => {\n                unreachable!(\"we are not cancelling any of the futures, nor should be\");\n            }\n            Err(join_error) => {\n                // cannot really do anything, as this panic is likely a bug\n                error!(\n                    \"task that calls spawn_ondemand_logical_size_calculation panicked: {join_error:#}\"\n                );\n\n                have_any_error = Some(CalculateSyntheticSizeError::Fatal(\n                    anyhow::anyhow!(join_error)\n                        .context(\"task that calls spawn_ondemand_logical_size_calculation\"),\n                ));\n            }\n            Ok(Err(recv_result_error)) => {\n                // cannot really do anything, as this panic is likely a bug\n                error!(\"failed to receive logical size query result: {recv_result_error:#}\");\n                have_any_error = Some(CalculateSyntheticSizeError::Fatal(\n                    anyhow::anyhow!(recv_result_error)\n                        .context(\"Receiving logical size query result\"),\n                ));\n            }\n            Ok(Ok(TimelineAtLsnSizeResult(timeline, lsn, Err(error)))) => {\n                if matches!(error, CalculateLogicalSizeError::Cancelled) {\n                    // Skip this: it's okay if one timeline among many is shutting down while we\n                    // calculate inputs for the overall tenant.\n                    continue;\n                } else {\n                    warn!(\n                        timeline_id=%timeline.timeline_id,\n                        \"failed to calculate logical size at {lsn}: {error:#}\"\n                    );\n                    have_any_error = Some(CalculateSyntheticSizeError::LogicalSize {\n                        timeline_id: timeline.timeline_id,\n                        lsn,\n                        error,\n                    });\n                }\n            }\n            Ok(Ok(TimelineAtLsnSizeResult(timeline, lsn, Ok(size)))) => {\n                debug!(timeline_id=%timeline.timeline_id, %lsn, size, \"size calculated\");\n\n                logical_size_cache.insert((timeline.timeline_id, lsn), size);\n                sizes_needed.insert((timeline.timeline_id, lsn), Some(size));\n            }\n        }\n    }\n\n    // prune any keys not needed anymore; we record every used key and added key.\n    logical_size_cache.retain(|key, _| sizes_needed.contains_key(key));\n\n    if let Some(error) = have_any_error {\n        // we cannot complete this round, because we are missing data.\n        // we have however cached all we were able to request calculation on.\n        return Err(error);\n    }\n\n    // Insert the looked up sizes to the Segments\n    for seg in segments.iter_mut() {\n        if !seg.size_needed() {\n            continue;\n        }\n\n        let timeline_id = seg.timeline_id;\n        let lsn = Lsn(seg.segment.lsn);\n\n        if let Some(Some(size)) = sizes_needed.get(&(timeline_id, lsn)) {\n            seg.segment.size = Some(*size);\n        }\n    }\n    Ok(())\n}\n\nimpl ModelInputs {\n    pub fn calculate_model(&self) -> tenant_size_model::StorageModel {\n        // Convert SegmentMetas into plain Segments\n        StorageModel {\n            segments: self\n                .segments\n                .iter()\n                .map(|seg| seg.segment.clone())\n                .collect(),\n        }\n    }\n\n    // calculate total project size\n    pub fn calculate(&self) -> u64 {\n        let storage = self.calculate_model();\n        let sizes = storage.calculate();\n        sizes.total_size\n    }\n}\n\n/// Newtype around the tuple that carries the timeline at lsn logical size calculation.\nstruct TimelineAtLsnSizeResult(\n    Arc<crate::tenant::Timeline>,\n    utils::lsn::Lsn,\n    Result<u64, CalculateLogicalSizeError>,\n);\n\n#[instrument(skip_all, fields(timeline_id=%timeline.timeline_id, lsn=%lsn))]\nasync fn calculate_logical_size(\n    limit: Arc<tokio::sync::Semaphore>,\n    timeline: Arc<crate::tenant::Timeline>,\n    lsn: utils::lsn::Lsn,\n    cause: LogicalSizeCalculationCause,\n    ctx: RequestContext,\n) -> Result<TimelineAtLsnSizeResult, RecvError> {\n    let _permit = tokio::sync::Semaphore::acquire_owned(limit)\n        .await\n        .expect(\"global semaphore should not had been closed\");\n\n    let size_res = timeline\n        .spawn_ondemand_logical_size_calculation(lsn, cause, ctx)\n        .instrument(info_span!(\"spawn_ondemand_logical_size_calculation\"))\n        .await?;\n    Ok(TimelineAtLsnSizeResult(timeline, lsn, size_res))\n}\n\n#[cfg(test)]\n#[test]\nfn verify_size_for_multiple_branches() {\n    // this is generated from integration test test_tenant_size_with_multiple_branches, but this way\n    // it has the stable lsn's\n    //\n    // The timeline_inputs don't participate in the size calculation, and are here just to explain\n    // the inputs.\n    let doc = r#\"\n{\n  \"segments\": [\n    {\n      \"segment\": {\n        \"parent\": 9,\n        \"lsn\": 26033560,\n        \"size\": null,\n        \"needed\": false\n      },\n      \"timeline_id\": \"20b129c9b50cff7213e6503a31b2a5ce\",\n      \"kind\": \"BranchStart\"\n    },\n    {\n      \"segment\": {\n        \"parent\": 0,\n        \"lsn\": 35720400,\n        \"size\": 25206784,\n        \"needed\": false\n      },\n      \"timeline_id\": \"20b129c9b50cff7213e6503a31b2a5ce\",\n      \"kind\": \"GcCutOff\"\n    },\n    {\n      \"segment\": {\n        \"parent\": 1,\n        \"lsn\": 35851472,\n        \"size\": null,\n        \"needed\": true\n      },\n      \"timeline_id\": \"20b129c9b50cff7213e6503a31b2a5ce\",\n      \"kind\": \"BranchEnd\"\n    },\n    {\n      \"segment\": {\n        \"parent\": 7,\n        \"lsn\": 24566168,\n        \"size\": null,\n        \"needed\": false\n      },\n      \"timeline_id\": \"454626700469f0a9914949b9d018e876\",\n      \"kind\": \"BranchStart\"\n    },\n    {\n      \"segment\": {\n        \"parent\": 3,\n        \"lsn\": 25261936,\n        \"size\": 26050560,\n        \"needed\": false\n      },\n      \"timeline_id\": \"454626700469f0a9914949b9d018e876\",\n      \"kind\": \"GcCutOff\"\n    },\n    {\n      \"segment\": {\n        \"parent\": 4,\n        \"lsn\": 25393008,\n        \"size\": null,\n        \"needed\": true\n      },\n      \"timeline_id\": \"454626700469f0a9914949b9d018e876\",\n      \"kind\": \"BranchEnd\"\n    },\n    {\n      \"segment\": {\n        \"parent\": null,\n        \"lsn\": 23694408,\n        \"size\": null,\n        \"needed\": false\n      },\n      \"timeline_id\": \"cb5e3cbe60a4afc00d01880e1a37047f\",\n      \"kind\": \"BranchStart\"\n    },\n    {\n      \"segment\": {\n        \"parent\": 6,\n        \"lsn\": 24566168,\n        \"size\": 25739264,\n        \"needed\": false\n      },\n      \"timeline_id\": \"cb5e3cbe60a4afc00d01880e1a37047f\",\n      \"kind\": \"BranchPoint\"\n    },\n    {\n      \"segment\": {\n        \"parent\": 7,\n        \"lsn\": 25902488,\n        \"size\": 26402816,\n        \"needed\": false\n      },\n      \"timeline_id\": \"cb5e3cbe60a4afc00d01880e1a37047f\",\n      \"kind\": \"GcCutOff\"\n    },\n    {\n      \"segment\": {\n        \"parent\": 8,\n        \"lsn\": 26033560,\n        \"size\": 26468352,\n        \"needed\": true\n      },\n      \"timeline_id\": \"cb5e3cbe60a4afc00d01880e1a37047f\",\n      \"kind\": \"BranchPoint\"\n    },\n    {\n      \"segment\": {\n        \"parent\": 9,\n        \"lsn\": 26033560,\n        \"size\": null,\n        \"needed\": true\n      },\n      \"timeline_id\": \"cb5e3cbe60a4afc00d01880e1a37047f\",\n      \"kind\": \"BranchEnd\"\n    }\n  ],\n  \"timeline_inputs\": [\n    {\n      \"timeline_id\": \"20b129c9b50cff7213e6503a31b2a5ce\",\n      \"ancestor_lsn\": \"0/18D3D98\",\n      \"last_record\": \"0/2230CD0\",\n      \"latest_gc_cutoff\": \"0/1698C48\",\n      \"next_pitr_cutoff\": \"0/2210CD0\",\n      \"retention_param_cutoff\": null,\n      \"lease_points\": []\n    },\n    {\n      \"timeline_id\": \"454626700469f0a9914949b9d018e876\",\n      \"ancestor_lsn\": \"0/176D998\",\n      \"last_record\": \"0/1837770\",\n      \"latest_gc_cutoff\": \"0/1698C48\",\n      \"next_pitr_cutoff\": \"0/1817770\",\n      \"retention_param_cutoff\": null,\n      \"lease_points\": []\n    },\n    {\n      \"timeline_id\": \"cb5e3cbe60a4afc00d01880e1a37047f\",\n      \"ancestor_lsn\": \"0/0\",\n      \"last_record\": \"0/18D3D98\",\n      \"latest_gc_cutoff\": \"0/1698C48\",\n      \"next_pitr_cutoff\": \"0/18B3D98\",\n      \"retention_param_cutoff\": null,\n      \"lease_points\": []\n    }\n  ]\n}\n\"#;\n    let inputs: ModelInputs = serde_json::from_str(doc).unwrap();\n\n    assert_eq!(inputs.calculate(), 37_851_408);\n}\n\n#[cfg(test)]\n#[test]\nfn verify_size_for_one_branch() {\n    let doc = r#\"\n{\n  \"segments\": [\n    {\n      \"segment\": {\n        \"parent\": null,\n        \"lsn\": 0,\n        \"size\": null,\n        \"needed\": false\n      },\n      \"timeline_id\": \"f15ae0cf21cce2ba27e4d80c6709a6cd\",\n      \"kind\": \"BranchStart\"\n    },\n    {\n      \"segment\": {\n        \"parent\": 0,\n        \"lsn\": 305547335776,\n        \"size\": 220054675456,\n        \"needed\": false\n      },\n      \"timeline_id\": \"f15ae0cf21cce2ba27e4d80c6709a6cd\",\n      \"kind\": \"GcCutOff\"\n    },\n    {\n      \"segment\": {\n        \"parent\": 1,\n        \"lsn\": 305614444640,\n        \"size\": null,\n        \"needed\": true\n      },\n      \"timeline_id\": \"f15ae0cf21cce2ba27e4d80c6709a6cd\",\n      \"kind\": \"BranchEnd\"\n    }\n  ],\n  \"timeline_inputs\": [\n    {\n      \"timeline_id\": \"f15ae0cf21cce2ba27e4d80c6709a6cd\",\n      \"ancestor_lsn\": \"0/0\",\n      \"last_record\": \"47/280A5860\",\n      \"latest_gc_cutoff\": \"47/240A5860\",\n      \"next_pitr_cutoff\": \"47/240A5860\",\n      \"retention_param_cutoff\": \"0/0\",\n      \"lease_points\": []\n    }\n  ]\n}\"#;\n\n    let model: ModelInputs = serde_json::from_str(doc).unwrap();\n\n    let res = model.calculate_model().calculate();\n\n    println!(\"calculated synthetic size: {}\", res.total_size);\n    println!(\"result: {:?}\", serde_json::to_string(&res.segments));\n\n    use utils::lsn::Lsn;\n    let latest_gc_cutoff_lsn: Lsn = \"47/240A5860\".parse().unwrap();\n    let last_lsn: Lsn = \"47/280A5860\".parse().unwrap();\n    println!(\n        \"latest_gc_cutoff lsn 47/240A5860 is {}, last_lsn lsn 47/280A5860 is {}\",\n        u64::from(latest_gc_cutoff_lsn),\n        u64::from(last_lsn)\n    );\n    assert_eq!(res.total_size, 220121784320);\n}\n"
  },
  {
    "path": "pageserver/src/tenant/storage_layer/batch_split_writer.rs",
    "content": "use std::future::Future;\nuse std::ops::Range;\nuse std::sync::Arc;\n\nuse bytes::Bytes;\nuse pageserver_api::key::{KEY_SIZE, Key};\nuse tokio_util::sync::CancellationToken;\nuse utils::id::TimelineId;\nuse utils::lsn::Lsn;\nuse utils::shard::TenantShardId;\nuse wal_decoder::models::value::Value;\n\nuse super::errors::PutError;\nuse super::layer::S3_UPLOAD_LIMIT;\nuse super::{\n    DeltaLayerWriter, ImageLayerWriter, PersistentLayerDesc, PersistentLayerKey, ResidentLayer,\n};\nuse crate::config::PageServerConf;\nuse crate::context::RequestContext;\nuse crate::tenant::Timeline;\nuse crate::tenant::storage_layer::Layer;\n\npub(crate) enum BatchWriterResult {\n    Produced(ResidentLayer),\n    Discarded(PersistentLayerKey),\n}\n\n#[cfg(test)]\nimpl BatchWriterResult {\n    fn into_resident_layer(self) -> ResidentLayer {\n        match self {\n            BatchWriterResult::Produced(layer) => layer,\n            BatchWriterResult::Discarded(_) => panic!(\"unexpected discarded layer\"),\n        }\n    }\n\n    fn into_discarded_layer(self) -> PersistentLayerKey {\n        match self {\n            BatchWriterResult::Produced(_) => panic!(\"unexpected produced layer\"),\n            BatchWriterResult::Discarded(layer) => layer,\n        }\n    }\n}\n\nenum LayerWriterWrapper {\n    Image(ImageLayerWriter),\n    Delta(DeltaLayerWriter),\n}\n\n/// An layer writer that takes unfinished layers and finish them atomically.\n#[must_use]\npub struct BatchLayerWriter {\n    generated_layer_writers: Vec<(LayerWriterWrapper, PersistentLayerKey)>,\n    conf: &'static PageServerConf,\n}\n\nimpl BatchLayerWriter {\n    pub fn new(conf: &'static PageServerConf) -> Self {\n        Self {\n            generated_layer_writers: Vec::new(),\n            conf,\n        }\n    }\n\n    pub fn add_unfinished_image_writer(\n        &mut self,\n        writer: ImageLayerWriter,\n        key_range: Range<Key>,\n        lsn: Lsn,\n    ) {\n        self.generated_layer_writers.push((\n            LayerWriterWrapper::Image(writer),\n            PersistentLayerKey {\n                key_range,\n                lsn_range: PersistentLayerDesc::image_layer_lsn_range(lsn),\n                is_delta: false,\n            },\n        ));\n    }\n\n    pub fn add_unfinished_delta_writer(\n        &mut self,\n        writer: DeltaLayerWriter,\n        key_range: Range<Key>,\n        lsn_range: Range<Lsn>,\n    ) {\n        self.generated_layer_writers.push((\n            LayerWriterWrapper::Delta(writer),\n            PersistentLayerKey {\n                key_range,\n                lsn_range,\n                is_delta: true,\n            },\n        ));\n    }\n\n    pub(crate) async fn finish(\n        self,\n        tline: &Arc<Timeline>,\n        ctx: &RequestContext,\n    ) -> anyhow::Result<Vec<ResidentLayer>> {\n        let res = self\n            .finish_with_discard_fn(tline, ctx, |_| async { false })\n            .await?;\n        let mut output = Vec::new();\n        for r in res {\n            if let BatchWriterResult::Produced(layer) = r {\n                output.push(layer);\n            }\n        }\n        Ok(output)\n    }\n\n    pub(crate) async fn finish_with_discard_fn<D, F>(\n        self,\n        tline: &Arc<Timeline>,\n        ctx: &RequestContext,\n        discard_fn: D,\n    ) -> anyhow::Result<Vec<BatchWriterResult>>\n    where\n        D: Fn(&PersistentLayerKey) -> F,\n        F: Future<Output = bool>,\n    {\n        let Self {\n            generated_layer_writers,\n            ..\n        } = self;\n        let clean_up_layers = |generated_layers: Vec<BatchWriterResult>| {\n            for produced_layer in generated_layers {\n                if let BatchWriterResult::Produced(resident_layer) = produced_layer {\n                    let layer: Layer = resident_layer.into();\n                    layer.delete_on_drop();\n                }\n            }\n        };\n        // BEGIN: catch every error and do the recovery in the below section\n        let mut generated_layers: Vec<BatchWriterResult> = Vec::new();\n        for (inner, layer_key) in generated_layer_writers {\n            if discard_fn(&layer_key).await {\n                generated_layers.push(BatchWriterResult::Discarded(layer_key));\n            } else {\n                let res = match inner {\n                    LayerWriterWrapper::Delta(writer) => {\n                        writer.finish(layer_key.key_range.end, ctx).await\n                    }\n                    LayerWriterWrapper::Image(writer) => {\n                        writer\n                            .finish_with_end_key(layer_key.key_range.end, ctx)\n                            .await\n                    }\n                };\n                let layer = match res {\n                    Ok((desc, path)) => {\n                        match Layer::finish_creating(self.conf, tline, desc, &path) {\n                            Ok(layer) => layer,\n                            Err(e) => {\n                                tokio::fs::remove_file(&path).await.ok();\n                                clean_up_layers(generated_layers);\n                                return Err(e);\n                            }\n                        }\n                    }\n                    Err(e) => {\n                        // Image/DeltaLayerWriter::finish will clean up the temporary layer if anything goes wrong,\n                        // so we don't need to remove the layer we just failed to create by ourselves.\n                        clean_up_layers(generated_layers);\n                        return Err(e);\n                    }\n                };\n                generated_layers.push(BatchWriterResult::Produced(layer));\n            }\n        }\n        // END: catch every error and do the recovery in the above section\n        Ok(generated_layers)\n    }\n\n    pub fn pending_layer_num(&self) -> usize {\n        self.generated_layer_writers.len()\n    }\n}\n\n/// An image writer that takes images and produces multiple image layers.\n#[must_use]\npub struct SplitImageLayerWriter<'a> {\n    inner: Option<ImageLayerWriter>,\n    target_layer_size: u64,\n    lsn: Lsn,\n    conf: &'static PageServerConf,\n    timeline_id: TimelineId,\n    tenant_shard_id: TenantShardId,\n    batches: BatchLayerWriter,\n    start_key: Key,\n    gate: &'a utils::sync::gate::Gate,\n    cancel: CancellationToken,\n}\n\nimpl<'a> SplitImageLayerWriter<'a> {\n    #[allow(clippy::too_many_arguments)]\n    pub fn new(\n        conf: &'static PageServerConf,\n        timeline_id: TimelineId,\n        tenant_shard_id: TenantShardId,\n        start_key: Key,\n        lsn: Lsn,\n        target_layer_size: u64,\n        gate: &'a utils::sync::gate::Gate,\n        cancel: CancellationToken,\n    ) -> Self {\n        Self {\n            target_layer_size,\n            inner: None,\n            conf,\n            timeline_id,\n            tenant_shard_id,\n            batches: BatchLayerWriter::new(conf),\n            lsn,\n            start_key,\n            gate,\n            cancel,\n        }\n    }\n\n    pub async fn put_image(\n        &mut self,\n        key: Key,\n        img: Bytes,\n        ctx: &RequestContext,\n    ) -> Result<(), PutError> {\n        if self.inner.is_none() {\n            self.inner = Some(\n                ImageLayerWriter::new(\n                    self.conf,\n                    self.timeline_id,\n                    self.tenant_shard_id,\n                    &(self.start_key..Key::MAX),\n                    self.lsn,\n                    self.gate,\n                    self.cancel.clone(),\n                    ctx,\n                )\n                .await\n                .map_err(PutError::Other)?,\n            );\n        }\n\n        let inner = self.inner.as_mut().unwrap();\n\n        // The current estimation is an upper bound of the space that the key/image could take\n        // because we did not consider compression in this estimation. The resulting image layer\n        // could be smaller than the target size.\n        let addition_size_estimation = KEY_SIZE as u64 + img.len() as u64;\n        if inner.num_keys() >= 1\n            && inner.estimated_size() + addition_size_estimation >= self.target_layer_size\n        {\n            let next_image_writer = ImageLayerWriter::new(\n                self.conf,\n                self.timeline_id,\n                self.tenant_shard_id,\n                &(key..Key::MAX),\n                self.lsn,\n                self.gate,\n                self.cancel.clone(),\n                ctx,\n            )\n            .await\n            .map_err(PutError::Other)?;\n            let prev_image_writer = std::mem::replace(inner, next_image_writer);\n            self.batches.add_unfinished_image_writer(\n                prev_image_writer,\n                self.start_key..key,\n                self.lsn,\n            );\n            self.start_key = key;\n        }\n        inner.put_image(key, img, ctx).await\n    }\n\n    pub(crate) async fn finish_with_discard_fn<D, F>(\n        self,\n        tline: &Arc<Timeline>,\n        ctx: &RequestContext,\n        end_key: Key,\n        discard_fn: D,\n    ) -> anyhow::Result<Vec<BatchWriterResult>>\n    where\n        D: Fn(&PersistentLayerKey) -> F,\n        F: Future<Output = bool>,\n    {\n        let Self {\n            mut batches, inner, ..\n        } = self;\n        if let Some(inner) = inner {\n            if inner.num_keys() != 0 {\n                batches.add_unfinished_image_writer(inner, self.start_key..end_key, self.lsn);\n            }\n        }\n        batches.finish_with_discard_fn(tline, ctx, discard_fn).await\n    }\n\n    #[cfg(test)]\n    pub(crate) async fn finish(\n        self,\n        tline: &Arc<Timeline>,\n        ctx: &RequestContext,\n        end_key: Key,\n    ) -> anyhow::Result<Vec<BatchWriterResult>> {\n        self.finish_with_discard_fn(tline, ctx, end_key, |_| async { false })\n            .await\n    }\n}\n\n/// A delta writer that takes key-lsn-values and produces multiple delta layers.\n///\n/// Note that if updates of a single key exceed the target size limit, all of the updates will be batched\n/// into a single file. This behavior might change in the future. For reference, the legacy compaction algorithm\n/// will split them into multiple files based on size.\n#[must_use]\npub struct SplitDeltaLayerWriter<'a> {\n    inner: Option<(Key, DeltaLayerWriter)>,\n    target_layer_size: u64,\n    conf: &'static PageServerConf,\n    timeline_id: TimelineId,\n    tenant_shard_id: TenantShardId,\n    lsn_range: Range<Lsn>,\n    last_key_written: Key,\n    batches: BatchLayerWriter,\n    gate: &'a utils::sync::gate::Gate,\n    cancel: CancellationToken,\n}\n\nimpl<'a> SplitDeltaLayerWriter<'a> {\n    pub fn new(\n        conf: &'static PageServerConf,\n        timeline_id: TimelineId,\n        tenant_shard_id: TenantShardId,\n        lsn_range: Range<Lsn>,\n        target_layer_size: u64,\n        gate: &'a utils::sync::gate::Gate,\n        cancel: CancellationToken,\n    ) -> Self {\n        Self {\n            target_layer_size,\n            inner: None,\n            conf,\n            timeline_id,\n            tenant_shard_id,\n            lsn_range,\n            last_key_written: Key::MIN,\n            batches: BatchLayerWriter::new(conf),\n            gate,\n            cancel,\n        }\n    }\n\n    pub async fn put_value(\n        &mut self,\n        key: Key,\n        lsn: Lsn,\n        val: Value,\n        ctx: &RequestContext,\n    ) -> Result<(), PutError> {\n        // The current estimation is key size plus LSN size plus value size estimation. This is not an accurate\n        // number, and therefore the final layer size could be a little bit larger or smaller than the target.\n        //\n        // Also, keep all updates of a single key in a single file. TODO: split them using the legacy compaction\n        // strategy. https://github.com/neondatabase/neon/issues/8837\n\n        if self.inner.is_none() {\n            self.inner = Some((\n                key,\n                DeltaLayerWriter::new(\n                    self.conf,\n                    self.timeline_id,\n                    self.tenant_shard_id,\n                    key,\n                    self.lsn_range.clone(),\n                    self.gate,\n                    self.cancel.clone(),\n                    ctx,\n                )\n                .await\n                .map_err(PutError::Other)?,\n            ));\n        }\n        let (_, inner) = self.inner.as_mut().unwrap();\n\n        let addition_size_estimation = KEY_SIZE as u64 + 8 /* LSN u64 size */ + 80 /* value size estimation */;\n        if inner.num_keys() >= 1\n            && inner.estimated_size() + addition_size_estimation >= self.target_layer_size\n        {\n            if key != self.last_key_written {\n                let next_delta_writer = DeltaLayerWriter::new(\n                    self.conf,\n                    self.timeline_id,\n                    self.tenant_shard_id,\n                    key,\n                    self.lsn_range.clone(),\n                    self.gate,\n                    self.cancel.clone(),\n                    ctx,\n                )\n                .await\n                .map_err(PutError::Other)?;\n                let (start_key, prev_delta_writer) =\n                    self.inner.replace((key, next_delta_writer)).unwrap();\n                self.batches.add_unfinished_delta_writer(\n                    prev_delta_writer,\n                    start_key..key,\n                    self.lsn_range.clone(),\n                );\n            } else if inner.estimated_size() >= S3_UPLOAD_LIMIT {\n                // We have to produce a very large file b/c a key is updated too often.\n                return Err(PutError::Other(anyhow::anyhow!(\n                    \"a single key is updated too often: key={}, estimated_size={}, and the layer file cannot be produced\",\n                    key,\n                    inner.estimated_size()\n                )));\n            }\n        }\n        self.last_key_written = key;\n        let (_, inner) = self.inner.as_mut().unwrap();\n        inner.put_value(key, lsn, val, ctx).await\n    }\n\n    pub(crate) async fn finish_with_discard_fn<D, F>(\n        self,\n        tline: &Arc<Timeline>,\n        ctx: &RequestContext,\n        discard_fn: D,\n    ) -> anyhow::Result<Vec<BatchWriterResult>>\n    where\n        D: Fn(&PersistentLayerKey) -> F,\n        F: Future<Output = bool>,\n    {\n        let Self {\n            mut batches, inner, ..\n        } = self;\n        if let Some((start_key, writer)) = inner {\n            if writer.num_keys() != 0 {\n                let end_key = self.last_key_written.next();\n                batches.add_unfinished_delta_writer(\n                    writer,\n                    start_key..end_key,\n                    self.lsn_range.clone(),\n                );\n            }\n        }\n        batches.finish_with_discard_fn(tline, ctx, discard_fn).await\n    }\n\n    #[cfg(test)]\n    pub(crate) async fn finish(\n        self,\n        tline: &Arc<Timeline>,\n        ctx: &RequestContext,\n    ) -> anyhow::Result<Vec<BatchWriterResult>> {\n        self.finish_with_discard_fn(tline, ctx, |_| async { false })\n            .await\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use itertools::Itertools;\n    use rand::{RngCore, SeedableRng};\n\n    use super::*;\n    use crate::DEFAULT_PG_VERSION;\n    use crate::tenant::harness::{TIMELINE_ID, TenantHarness};\n    use crate::tenant::storage_layer::AsLayerDesc;\n\n    fn get_key(id: u32) -> Key {\n        let mut key = Key::from_hex(\"000000000033333333444444445500000000\").unwrap();\n        key.field6 = id;\n        key\n    }\n\n    fn get_img(id: u32) -> Bytes {\n        format!(\"{id:064}\").into()\n    }\n\n    fn get_large_img() -> Bytes {\n        let mut rng = rand::rngs::SmallRng::seed_from_u64(42);\n        let mut data = vec![0; 8192];\n        rng.fill_bytes(&mut data);\n        data.into()\n    }\n\n    #[tokio::test]\n    async fn write_one_image() {\n        let harness = TenantHarness::create(\"split_writer_write_one_image\")\n            .await\n            .unwrap();\n        let (tenant, ctx) = harness.load().await;\n\n        let tline = tenant\n            .create_test_timeline(TIMELINE_ID, Lsn(0x10), DEFAULT_PG_VERSION, &ctx)\n            .await\n            .unwrap();\n\n        let mut image_writer = SplitImageLayerWriter::new(\n            tenant.conf,\n            tline.timeline_id,\n            tenant.tenant_shard_id,\n            get_key(0),\n            Lsn(0x18),\n            4 * 1024 * 1024,\n            &tline.gate,\n            tline.cancel.clone(),\n        );\n\n        let mut delta_writer = SplitDeltaLayerWriter::new(\n            tenant.conf,\n            tline.timeline_id,\n            tenant.tenant_shard_id,\n            Lsn(0x18)..Lsn(0x20),\n            4 * 1024 * 1024,\n            &tline.gate,\n            tline.cancel.clone(),\n        );\n\n        image_writer\n            .put_image(get_key(0), get_img(0), &ctx)\n            .await\n            .unwrap();\n        let layers = image_writer\n            .finish(&tline, &ctx, get_key(10))\n            .await\n            .unwrap();\n        assert_eq!(layers.len(), 1);\n\n        delta_writer\n            .put_value(get_key(0), Lsn(0x18), Value::Image(get_img(0)), &ctx)\n            .await\n            .unwrap();\n        let layers = delta_writer.finish(&tline, &ctx).await.unwrap();\n        assert_eq!(layers.len(), 1);\n        assert_eq!(\n            layers\n                .into_iter()\n                .next()\n                .unwrap()\n                .into_resident_layer()\n                .layer_desc()\n                .key(),\n            PersistentLayerKey {\n                key_range: get_key(0)..get_key(1),\n                lsn_range: Lsn(0x18)..Lsn(0x20),\n                is_delta: true\n            }\n        );\n    }\n\n    #[tokio::test]\n    async fn write_split() {\n        // Test the split writer with retaining all the layers we have produced (discard=false)\n        write_split_helper(\"split_writer_write_split\", false).await;\n    }\n\n    #[tokio::test]\n    async fn write_split_discard() {\n        // Test the split writer with discarding all the layers we have produced (discard=true)\n        write_split_helper(\"split_writer_write_split_discard\", true).await;\n    }\n\n    /// Test the image+delta writer by writing a large number of images and deltas. If discard is\n    /// set to true, all layers will be discarded.\n    async fn write_split_helper(harness_name: &'static str, discard: bool) {\n        let harness = TenantHarness::create(harness_name).await.unwrap();\n        let (tenant, ctx) = harness.load().await;\n\n        let tline = tenant\n            .create_test_timeline(TIMELINE_ID, Lsn(0x10), DEFAULT_PG_VERSION, &ctx)\n            .await\n            .unwrap();\n\n        let mut image_writer = SplitImageLayerWriter::new(\n            tenant.conf,\n            tline.timeline_id,\n            tenant.tenant_shard_id,\n            get_key(0),\n            Lsn(0x18),\n            4 * 1024 * 1024,\n            &tline.gate,\n            tline.cancel.clone(),\n        );\n        let mut delta_writer = SplitDeltaLayerWriter::new(\n            tenant.conf,\n            tline.timeline_id,\n            tenant.tenant_shard_id,\n            Lsn(0x18)..Lsn(0x20),\n            4 * 1024 * 1024,\n            &tline.gate,\n            tline.cancel.clone(),\n        );\n        const N: usize = 2000;\n        for i in 0..N {\n            let i = i as u32;\n            image_writer\n                .put_image(get_key(i), get_large_img(), &ctx)\n                .await\n                .unwrap();\n            delta_writer\n                .put_value(get_key(i), Lsn(0x20), Value::Image(get_large_img()), &ctx)\n                .await\n                .unwrap();\n        }\n        let image_layers = image_writer\n            .finish_with_discard_fn(&tline, &ctx, get_key(N as u32), |_| async { discard })\n            .await\n            .unwrap();\n        let delta_layers = delta_writer\n            .finish_with_discard_fn(&tline, &ctx, |_| async { discard })\n            .await\n            .unwrap();\n        let image_layers = image_layers\n            .into_iter()\n            .map(|x| {\n                if discard {\n                    x.into_discarded_layer()\n                } else {\n                    x.into_resident_layer().layer_desc().key()\n                }\n            })\n            .collect_vec();\n        let delta_layers = delta_layers\n            .into_iter()\n            .map(|x| {\n                if discard {\n                    x.into_discarded_layer()\n                } else {\n                    x.into_resident_layer().layer_desc().key()\n                }\n            })\n            .collect_vec();\n        assert_eq!(image_layers.len(), N / 512 + 1);\n        assert_eq!(delta_layers.len(), N / 512 + 1);\n        assert_eq!(delta_layers.first().unwrap().key_range.start, get_key(0));\n        assert_eq!(\n            delta_layers.last().unwrap().key_range.end,\n            get_key(N as u32)\n        );\n        for idx in 0..image_layers.len() {\n            assert_ne!(image_layers[idx].key_range.start, Key::MIN);\n            assert_ne!(image_layers[idx].key_range.end, Key::MAX);\n            assert_ne!(delta_layers[idx].key_range.start, Key::MIN);\n            assert_ne!(delta_layers[idx].key_range.end, Key::MAX);\n            if idx > 0 {\n                assert_eq!(\n                    image_layers[idx - 1].key_range.end,\n                    image_layers[idx].key_range.start\n                );\n                assert_eq!(\n                    delta_layers[idx - 1].key_range.end,\n                    delta_layers[idx].key_range.start\n                );\n            }\n        }\n    }\n\n    #[tokio::test]\n    async fn write_large_img() {\n        let harness = TenantHarness::create(\"split_writer_write_large_img\")\n            .await\n            .unwrap();\n        let (tenant, ctx) = harness.load().await;\n\n        let tline = tenant\n            .create_test_timeline(TIMELINE_ID, Lsn(0x10), DEFAULT_PG_VERSION, &ctx)\n            .await\n            .unwrap();\n\n        let mut image_writer = SplitImageLayerWriter::new(\n            tenant.conf,\n            tline.timeline_id,\n            tenant.tenant_shard_id,\n            get_key(0),\n            Lsn(0x18),\n            4 * 1024,\n            &tline.gate,\n            tline.cancel.clone(),\n        );\n\n        let mut delta_writer = SplitDeltaLayerWriter::new(\n            tenant.conf,\n            tline.timeline_id,\n            tenant.tenant_shard_id,\n            Lsn(0x18)..Lsn(0x20),\n            4 * 1024,\n            &tline.gate,\n            tline.cancel.clone(),\n        );\n\n        image_writer\n            .put_image(get_key(0), get_img(0), &ctx)\n            .await\n            .unwrap();\n        image_writer\n            .put_image(get_key(1), get_large_img(), &ctx)\n            .await\n            .unwrap();\n        let layers = image_writer\n            .finish(&tline, &ctx, get_key(10))\n            .await\n            .unwrap();\n        assert_eq!(layers.len(), 2);\n\n        delta_writer\n            .put_value(get_key(0), Lsn(0x18), Value::Image(get_img(0)), &ctx)\n            .await\n            .unwrap();\n        delta_writer\n            .put_value(get_key(1), Lsn(0x1A), Value::Image(get_large_img()), &ctx)\n            .await\n            .unwrap();\n        let layers = delta_writer.finish(&tline, &ctx).await.unwrap();\n        assert_eq!(layers.len(), 2);\n        let mut layers_iter = layers.into_iter();\n        assert_eq!(\n            layers_iter\n                .next()\n                .unwrap()\n                .into_resident_layer()\n                .layer_desc()\n                .key(),\n            PersistentLayerKey {\n                key_range: get_key(0)..get_key(1),\n                lsn_range: Lsn(0x18)..Lsn(0x20),\n                is_delta: true\n            }\n        );\n        assert_eq!(\n            layers_iter\n                .next()\n                .unwrap()\n                .into_resident_layer()\n                .layer_desc()\n                .key(),\n            PersistentLayerKey {\n                key_range: get_key(1)..get_key(2),\n                lsn_range: Lsn(0x18)..Lsn(0x20),\n                is_delta: true\n            }\n        );\n    }\n\n    #[tokio::test]\n    async fn write_split_single_key() {\n        let harness = TenantHarness::create(\"split_writer_write_split_single_key\")\n            .await\n            .unwrap();\n        let (tenant, ctx) = harness.load().await;\n\n        let tline = tenant\n            .create_test_timeline(TIMELINE_ID, Lsn(0x10), DEFAULT_PG_VERSION, &ctx)\n            .await\n            .unwrap();\n\n        const N: usize = 2000;\n        let mut delta_writer = SplitDeltaLayerWriter::new(\n            tenant.conf,\n            tline.timeline_id,\n            tenant.tenant_shard_id,\n            Lsn(0x10)..Lsn(N as u64 * 16 + 0x10),\n            4 * 1024 * 1024,\n            &tline.gate,\n            tline.cancel.clone(),\n        );\n\n        for i in 0..N {\n            let i = i as u32;\n            delta_writer\n                .put_value(\n                    get_key(0),\n                    Lsn(i as u64 * 16 + 0x10),\n                    Value::Image(get_large_img()),\n                    &ctx,\n                )\n                .await\n                .unwrap();\n        }\n        let delta_layers = delta_writer.finish(&tline, &ctx).await.unwrap();\n        assert_eq!(delta_layers.len(), 1);\n        let delta_layer = delta_layers\n            .into_iter()\n            .next()\n            .unwrap()\n            .into_resident_layer();\n        assert_eq!(\n            delta_layer.layer_desc().key(),\n            PersistentLayerKey {\n                key_range: get_key(0)..get_key(1),\n                lsn_range: Lsn(0x10)..Lsn(N as u64 * 16 + 0x10),\n                is_delta: true\n            }\n        );\n    }\n}\n"
  },
  {
    "path": "pageserver/src/tenant/storage_layer/delta_layer.rs",
    "content": "//! A DeltaLayer represents a collection of WAL records or page images in a range of\n//! LSNs, and in a range of Keys. It is stored on a file on disk.\n//!\n//! Usually a delta layer only contains differences, in the form of WAL records\n//! against a base LSN. However, if a relation extended or a whole new relation\n//! is created, there would be no base for the new pages. The entries for them\n//! must be page images or WAL records with the 'will_init' flag set, so that\n//! they can be replayed without referring to an older page version.\n//!\n//! The delta files are stored in `timelines/<timeline_id>` directory.  Currently,\n//! there are no subdirectories, and each delta file is named like this:\n//!\n//! ```text\n//!    <key start>-<key end>__<start LSN>-<end LSN>\n//! ```\n//!\n//! For example:\n//!\n//! ```text\n//!    000000067F000032BE0000400000000020B6-000000067F000032BE0000400000000030B6__000000578C6B29-0000000057A50051\n//! ```\n//!\n//! Every delta file consists of three parts: \"summary\", \"values\", and\n//! \"index\". The summary is a fixed size header at the beginning of the file,\n//! and it contains basic information about the layer, and offsets to the other\n//! parts. The \"index\" is a B-tree, mapping from Key and LSN to an offset in the\n//! \"values\" part.  The actual page images and WAL records are stored in the\n//! \"values\" part.\n//!\nuse std::collections::{HashMap, VecDeque};\nuse std::fs::File;\nuse std::ops::Range;\nuse std::os::unix::fs::FileExt;\nuse std::str::FromStr;\nuse std::sync::Arc;\nuse std::sync::atomic::AtomicU64;\n\nuse anyhow::{Context, Result, bail, ensure};\nuse camino::{Utf8Path, Utf8PathBuf};\nuse futures::StreamExt;\nuse itertools::Itertools;\nuse pageserver_api::config::MaxVectoredReadBytes;\nuse pageserver_api::key::{DBDIR_KEY, KEY_SIZE, Key};\nuse pageserver_api::keyspace::KeySpace;\nuse pageserver_api::models::ImageCompressionAlgorithm;\nuse pageserver_api::shard::TenantShardId;\nuse serde::{Deserialize, Serialize};\nuse tokio::sync::OnceCell;\nuse tokio_epoll_uring::IoBuf;\nuse tokio_util::sync::CancellationToken;\nuse tracing::*;\nuse utils::bin_ser::BeSer;\nuse utils::bin_ser::SerializeError;\nuse utils::id::{TenantId, TimelineId};\nuse utils::lsn::Lsn;\nuse wal_decoder::models::value::Value;\n\nuse super::errors::PutError;\nuse super::{\n    AsLayerDesc, LayerName, OnDiskValue, OnDiskValueIo, PersistentLayerDesc, ResidentLayer,\n    ValuesReconstructState,\n};\nuse crate::config::PageServerConf;\nuse crate::context::{PageContentKind, RequestContext, RequestContextBuilder};\nuse crate::page_cache::{self, FileId, PAGE_SZ};\nuse crate::tenant::blob_io::BlobWriter;\nuse crate::tenant::block_io::{BlockBuf, BlockCursor, BlockLease, BlockReader, FileBlockReader};\nuse crate::tenant::disk_btree::{\n    DiskBtreeBuilder, DiskBtreeIterator, DiskBtreeReader, VisitDirection,\n};\nuse crate::tenant::storage_layer::layer::S3_UPLOAD_LIMIT;\nuse crate::tenant::timeline::GetVectoredError;\nuse crate::tenant::vectored_blob_io::{\n    BlobFlag, BufView, StreamingVectoredReadPlanner, VectoredBlobReader, VectoredRead,\n    VectoredReadPlanner,\n};\nuse crate::virtual_file::TempVirtualFile;\nuse crate::virtual_file::owned_buffers_io::io_buf_ext::{FullSlice, IoBufExt};\nuse crate::virtual_file::owned_buffers_io::write::{Buffer, BufferedWriterShutdownMode};\nuse crate::virtual_file::{self, IoBuffer, IoBufferMut, MaybeFatalIo, VirtualFile};\nuse crate::{DELTA_FILE_MAGIC, STORAGE_FORMAT_VERSION, TEMP_FILE_SUFFIX};\n\n///\n/// Header stored in the beginning of the file\n///\n/// After this comes the 'values' part, starting on block 1. After that,\n/// the 'index' starts at the block indicated by 'index_start_blk'\n///\n#[derive(Debug, Serialize, Deserialize, PartialEq, Eq)]\npub struct Summary {\n    /// Magic value to identify this as a neon delta file. Always DELTA_FILE_MAGIC.\n    pub magic: u16,\n    pub format_version: u16,\n\n    pub tenant_id: TenantId,\n    pub timeline_id: TimelineId,\n    pub key_range: Range<Key>,\n    pub lsn_range: Range<Lsn>,\n\n    /// Block number where the 'index' part of the file begins.\n    pub index_start_blk: u32,\n    /// Block within the 'index', where the B-tree root page is stored\n    pub index_root_blk: u32,\n}\n\nimpl From<&DeltaLayer> for Summary {\n    fn from(layer: &DeltaLayer) -> Self {\n        Self::expected(\n            layer.desc.tenant_shard_id.tenant_id,\n            layer.desc.timeline_id,\n            layer.desc.key_range.clone(),\n            layer.desc.lsn_range.clone(),\n        )\n    }\n}\n\nimpl Summary {\n    /// Serializes the summary header into an aligned buffer of lenth `PAGE_SZ`.\n    pub fn ser_into_page(&self) -> Result<IoBuffer, SerializeError> {\n        let mut buf = IoBufferMut::with_capacity(PAGE_SZ);\n        Self::ser_into(self, &mut buf)?;\n        // Pad zeroes to the buffer so the length is a multiple of the alignment.\n        buf.extend_with(0, buf.capacity() - buf.len());\n        Ok(buf.freeze())\n    }\n\n    pub(super) fn expected(\n        tenant_id: TenantId,\n        timeline_id: TimelineId,\n        keys: Range<Key>,\n        lsns: Range<Lsn>,\n    ) -> Self {\n        Self {\n            magic: DELTA_FILE_MAGIC,\n            format_version: STORAGE_FORMAT_VERSION,\n\n            tenant_id,\n            timeline_id,\n            key_range: keys,\n            lsn_range: lsns,\n\n            index_start_blk: 0,\n            index_root_blk: 0,\n        }\n    }\n}\n\n// Flag indicating that this version initialize the page\nconst WILL_INIT: u64 = 1;\n\n/// Struct representing reference to BLOB in layers.\n///\n/// Reference contains BLOB offset, and for WAL records it also contains\n/// `will_init` flag. The flag helps to determine the range of records\n/// that needs to be applied, without reading/deserializing records themselves.\n#[derive(Debug, Serialize, Deserialize, Copy, Clone)]\npub struct BlobRef(pub u64);\n\nimpl BlobRef {\n    pub fn will_init(&self) -> bool {\n        (self.0 & WILL_INIT) != 0\n    }\n\n    pub fn pos(&self) -> u64 {\n        self.0 >> 1\n    }\n\n    pub fn new(pos: u64, will_init: bool) -> BlobRef {\n        let mut blob_ref = pos << 1;\n        if will_init {\n            blob_ref |= WILL_INIT;\n        }\n        BlobRef(blob_ref)\n    }\n}\n\npub const DELTA_KEY_SIZE: usize = KEY_SIZE + 8;\nstruct DeltaKey([u8; DELTA_KEY_SIZE]);\n\n/// This is the key of the B-tree index stored in the delta layer. It consists\n/// of the serialized representation of a Key and LSN.\nimpl DeltaKey {\n    fn from_slice(buf: &[u8]) -> Self {\n        let mut bytes: [u8; DELTA_KEY_SIZE] = [0u8; DELTA_KEY_SIZE];\n        bytes.copy_from_slice(buf);\n        DeltaKey(bytes)\n    }\n\n    fn from_key_lsn(key: &Key, lsn: Lsn) -> Self {\n        let mut bytes: [u8; DELTA_KEY_SIZE] = [0u8; DELTA_KEY_SIZE];\n        key.write_to_byte_slice(&mut bytes[0..KEY_SIZE]);\n        bytes[KEY_SIZE..].copy_from_slice(&u64::to_be_bytes(lsn.0));\n        DeltaKey(bytes)\n    }\n\n    fn key(&self) -> Key {\n        Key::from_slice(&self.0)\n    }\n\n    fn lsn(&self) -> Lsn {\n        Lsn(u64::from_be_bytes(self.0[KEY_SIZE..].try_into().unwrap()))\n    }\n\n    fn extract_lsn_from_buf(buf: &[u8]) -> Lsn {\n        let mut lsn_buf = [0u8; 8];\n        lsn_buf.copy_from_slice(&buf[KEY_SIZE..]);\n        Lsn(u64::from_be_bytes(lsn_buf))\n    }\n}\n\n/// This is used only from `pagectl`. Within pageserver, all layers are\n/// [`crate::tenant::storage_layer::Layer`], which can hold a [`DeltaLayerInner`].\npub struct DeltaLayer {\n    path: Utf8PathBuf,\n    pub desc: PersistentLayerDesc,\n    inner: OnceCell<Arc<DeltaLayerInner>>,\n}\n\nimpl std::fmt::Debug for DeltaLayer {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        use super::RangeDisplayDebug;\n\n        f.debug_struct(\"DeltaLayer\")\n            .field(\"key_range\", &RangeDisplayDebug(&self.desc.key_range))\n            .field(\"lsn_range\", &self.desc.lsn_range)\n            .field(\"file_size\", &self.desc.file_size)\n            .field(\"inner\", &self.inner)\n            .finish()\n    }\n}\n\n/// `DeltaLayerInner` is the in-memory data structure associated with an on-disk delta\n/// file.\npub struct DeltaLayerInner {\n    // values copied from summary\n    index_start_blk: u32,\n    index_root_blk: u32,\n\n    file: Arc<VirtualFile>,\n    file_id: FileId,\n\n    layer_key_range: Range<Key>,\n    layer_lsn_range: Range<Lsn>,\n\n    max_vectored_read_bytes: Option<MaxVectoredReadBytes>,\n}\n\nimpl DeltaLayerInner {\n    pub(crate) fn layer_dbg_info(&self) -> String {\n        format!(\n            \"delta {}..{} {}..{}\",\n            self.key_range().start,\n            self.key_range().end,\n            self.lsn_range().start,\n            self.lsn_range().end\n        )\n    }\n}\n\nimpl std::fmt::Debug for DeltaLayerInner {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        f.debug_struct(\"DeltaLayerInner\")\n            .field(\"index_start_blk\", &self.index_start_blk)\n            .field(\"index_root_blk\", &self.index_root_blk)\n            .finish()\n    }\n}\n\n/// Boilerplate to implement the Layer trait, always use layer_desc for persistent layers.\nimpl std::fmt::Display for DeltaLayer {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        write!(f, \"{}\", self.layer_desc().short_id())\n    }\n}\n\nimpl AsLayerDesc for DeltaLayer {\n    fn layer_desc(&self) -> &PersistentLayerDesc {\n        &self.desc\n    }\n}\n\nimpl DeltaLayer {\n    pub async fn dump(&self, verbose: bool, ctx: &RequestContext) -> Result<()> {\n        self.desc.dump();\n\n        if !verbose {\n            return Ok(());\n        }\n\n        let inner = self.load(ctx).await?;\n\n        inner.dump(ctx).await\n    }\n\n    fn temp_path_for(\n        conf: &PageServerConf,\n        tenant_shard_id: &TenantShardId,\n        timeline_id: &TimelineId,\n        key_start: Key,\n        lsn_range: &Range<Lsn>,\n    ) -> Utf8PathBuf {\n        // TempVirtualFile requires us to never reuse a filename while an old\n        // instance of TempVirtualFile created with that filename is not done dropping yet.\n        // So, we use a monotonic counter to disambiguate the filenames.\n        static NEXT_TEMP_DISAMBIGUATOR: AtomicU64 = AtomicU64::new(1);\n        let filename_disambiguator =\n            NEXT_TEMP_DISAMBIGUATOR.fetch_add(1, std::sync::atomic::Ordering::Relaxed);\n\n        conf.timeline_path(tenant_shard_id, timeline_id)\n            .join(format!(\n                \"{}-XXX__{:016X}-{:016X}.{:x}.{}\",\n                key_start,\n                u64::from(lsn_range.start),\n                u64::from(lsn_range.end),\n                filename_disambiguator,\n                TEMP_FILE_SUFFIX,\n            ))\n    }\n\n    ///\n    /// Open the underlying file and read the metadata into memory, if it's\n    /// not loaded already.\n    ///\n    async fn load(&self, ctx: &RequestContext) -> Result<&Arc<DeltaLayerInner>> {\n        // Quick exit if already loaded\n        self.inner\n            .get_or_try_init(|| self.load_inner(ctx))\n            .await\n            .with_context(|| format!(\"Failed to load delta layer {}\", self.path()))\n    }\n\n    async fn load_inner(&self, ctx: &RequestContext) -> anyhow::Result<Arc<DeltaLayerInner>> {\n        let path = self.path();\n\n        let loaded = DeltaLayerInner::load(&path, None, None, ctx).await?;\n\n        // not production code\n        let actual_layer_name = LayerName::from_str(path.file_name().unwrap()).unwrap();\n        let expected_layer_name = self.layer_desc().layer_name();\n\n        if actual_layer_name != expected_layer_name {\n            println!(\"warning: filename does not match what is expected from in-file summary\");\n            println!(\"actual: {:?}\", actual_layer_name.to_string());\n            println!(\"expected: {:?}\", expected_layer_name.to_string());\n        }\n\n        Ok(Arc::new(loaded))\n    }\n\n    /// Create a DeltaLayer struct representing an existing file on disk.\n    ///\n    /// This variant is only used for debugging purposes, by the 'pagectl' binary.\n    pub fn new_for_path(path: &Utf8Path, file: File) -> Result<Self> {\n        let mut summary_buf = vec![0; PAGE_SZ];\n        file.read_exact_at(&mut summary_buf, 0)?;\n        let summary = Summary::des_prefix(&summary_buf)?;\n\n        let metadata = file\n            .metadata()\n            .context(\"get file metadata to determine size\")?;\n\n        // This function is never used for constructing layers in a running pageserver,\n        // so it does not need an accurate TenantShardId.\n        let tenant_shard_id = TenantShardId::unsharded(summary.tenant_id);\n\n        Ok(DeltaLayer {\n            path: path.to_path_buf(),\n            desc: PersistentLayerDesc::new_delta(\n                tenant_shard_id,\n                summary.timeline_id,\n                summary.key_range,\n                summary.lsn_range,\n                metadata.len(),\n            ),\n            inner: OnceCell::new(),\n        })\n    }\n\n    /// Path to the layer file in pageserver workdir.\n    fn path(&self) -> Utf8PathBuf {\n        self.path.clone()\n    }\n}\n\n/// A builder object for constructing a new delta layer.\n///\n/// Usage:\n///\n/// 1. Create the DeltaLayerWriter by calling DeltaLayerWriter::new(...)\n///\n/// 2. Write the contents by calling `put_value` for every page\n///    version to store in the layer.\n///\n/// 3. Call `finish`.\n///\nstruct DeltaLayerWriterInner {\n    pub path: Utf8PathBuf,\n    timeline_id: TimelineId,\n    tenant_shard_id: TenantShardId,\n\n    key_start: Key,\n    lsn_range: Range<Lsn>,\n\n    tree: DiskBtreeBuilder<BlockBuf, DELTA_KEY_SIZE>,\n\n    blob_writer: BlobWriter<TempVirtualFile>,\n\n    // Number of key-lsns in the layer.\n    num_keys: usize,\n}\n\nimpl DeltaLayerWriterInner {\n    ///\n    /// Start building a new delta layer.\n    ///\n    #[allow(clippy::too_many_arguments)]\n    async fn new(\n        conf: &'static PageServerConf,\n        timeline_id: TimelineId,\n        tenant_shard_id: TenantShardId,\n        key_start: Key,\n        lsn_range: Range<Lsn>,\n        gate: &utils::sync::gate::Gate,\n        cancel: CancellationToken,\n        ctx: &RequestContext,\n    ) -> anyhow::Result<Self> {\n        // Create the file initially with a temporary filename. We don't know\n        // the end key yet, so we cannot form the final filename yet. We will\n        // rename it when we're done.\n        let path =\n            DeltaLayer::temp_path_for(conf, &tenant_shard_id, &timeline_id, key_start, &lsn_range);\n        let file = TempVirtualFile::new(\n            VirtualFile::open_with_options_v2(\n                &path,\n                virtual_file::OpenOptions::new()\n                    .create_new(true)\n                    .write(true),\n                ctx,\n            )\n            .await?,\n            gate.enter()?,\n        );\n\n        // Start at PAGE_SZ, make room for the header block\n        let blob_writer = BlobWriter::new(\n            file,\n            PAGE_SZ as u64,\n            gate,\n            cancel,\n            ctx,\n            info_span!(parent: None, \"delta_layer_writer_flush_task\", tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(), timeline_id=%timeline_id, path = %path),\n        )?;\n\n        // Initialize the b-tree index builder\n        let block_buf = BlockBuf::new();\n        let tree_builder = DiskBtreeBuilder::new(block_buf);\n\n        Ok(Self {\n            path,\n            timeline_id,\n            tenant_shard_id,\n            key_start,\n            lsn_range,\n            tree: tree_builder,\n            blob_writer,\n            num_keys: 0,\n        })\n    }\n\n    ///\n    /// Append a key-value pair to the file.\n    ///\n    /// The values must be appended in key, lsn order.\n    ///\n    async fn put_value(\n        &mut self,\n        key: Key,\n        lsn: Lsn,\n        val: Value,\n        ctx: &RequestContext,\n    ) -> Result<(), PutError> {\n        let (_, res) = self\n            .put_value_bytes(\n                key,\n                lsn,\n                Value::ser(&val)\n                    .map_err(anyhow::Error::new)\n                    .map_err(PutError::Other)?\n                    .slice_len(),\n                val.will_init(),\n                ctx,\n            )\n            .await;\n        res\n    }\n\n    async fn put_value_bytes<Buf>(\n        &mut self,\n        key: Key,\n        lsn: Lsn,\n        val: FullSlice<Buf>,\n        will_init: bool,\n        ctx: &RequestContext,\n    ) -> (FullSlice<Buf>, Result<(), PutError>)\n    where\n        Buf: IoBuf + Send,\n    {\n        assert!(\n            self.lsn_range.start <= lsn,\n            \"lsn_start={}, lsn={}\",\n            self.lsn_range.start,\n            lsn\n        );\n        // We don't want to use compression in delta layer creation\n        let compression = ImageCompressionAlgorithm::Disabled;\n        let (val, res) = self\n            .blob_writer\n            .write_blob_maybe_compressed(val, ctx, compression)\n            .await;\n        let res = res.map_err(PutError::WriteBlob);\n        let off = match res {\n            Ok((off, _)) => off,\n            Err(e) => return (val, Err(e)),\n        };\n\n        let blob_ref = BlobRef::new(off, will_init);\n\n        let delta_key = DeltaKey::from_key_lsn(&key, lsn);\n        let res = self\n            .tree\n            .append(&delta_key.0, blob_ref.0)\n            .map_err(anyhow::Error::new)\n            .map_err(PutError::Other);\n\n        self.num_keys += 1;\n\n        (val, res)\n    }\n\n    fn size(&self) -> u64 {\n        self.blob_writer.size() + self.tree.borrow_writer().size()\n    }\n\n    ///\n    /// Finish writing the delta layer.\n    ///\n    async fn finish(\n        self,\n        key_end: Key,\n        ctx: &RequestContext,\n    ) -> anyhow::Result<(PersistentLayerDesc, Utf8PathBuf)> {\n        let index_start_blk = self.blob_writer.size().div_ceil(PAGE_SZ as u64) as u32;\n\n        let file = self\n            .blob_writer\n            .shutdown(\n                BufferedWriterShutdownMode::ZeroPadToNextMultiple(PAGE_SZ),\n                ctx,\n            )\n            .await?;\n\n        // Write out the index\n        let (index_root_blk, block_buf) = self.tree.finish()?;\n        let mut offset = index_start_blk as u64 * PAGE_SZ as u64;\n\n        // TODO(yuchen): https://github.com/neondatabase/neon/issues/10092\n        // Should we just replace BlockBuf::blocks with one big buffer\n        for buf in block_buf.blocks {\n            let (_buf, res) = file.write_all_at(buf.slice_len(), offset, ctx).await;\n            res?;\n            offset += PAGE_SZ as u64;\n        }\n        assert!(self.lsn_range.start < self.lsn_range.end);\n        // Fill in the summary on blk 0\n        let summary = Summary {\n            magic: DELTA_FILE_MAGIC,\n            format_version: STORAGE_FORMAT_VERSION,\n            tenant_id: self.tenant_shard_id.tenant_id,\n            timeline_id: self.timeline_id,\n            key_range: self.key_start..key_end,\n            lsn_range: self.lsn_range.clone(),\n            index_start_blk,\n            index_root_blk,\n        };\n\n        // Writes summary at the first block (offset 0).\n        let buf = summary.ser_into_page()?;\n        let (_buf, res) = file.write_all_at(buf.slice_len(), 0, ctx).await;\n        res?;\n\n        let metadata = file\n            .metadata()\n            .await\n            .context(\"get file metadata to determine size\")?;\n\n        // 5GB limit for objects without multipart upload (which we don't want to use)\n        // Make it a little bit below to account for differing GB units\n        // https://docs.aws.amazon.com/AmazonS3/latest/userguide/upload-objects.html\n        ensure!(\n            metadata.len() <= S3_UPLOAD_LIMIT,\n            \"Created delta layer file at {} of size {} above limit {S3_UPLOAD_LIMIT}!\",\n            file.path(),\n            metadata.len()\n        );\n\n        // Note: Because we opened the file in write-only mode, we cannot\n        // reuse the same VirtualFile for reading later. That's why we don't\n        // set inner.file here. The first read will have to re-open it.\n\n        let desc = PersistentLayerDesc::new_delta(\n            self.tenant_shard_id,\n            self.timeline_id,\n            self.key_start..key_end,\n            self.lsn_range.clone(),\n            metadata.len(),\n        );\n\n        // fsync the file\n        file.sync_all()\n            .await\n            .maybe_fatal_err(\"delta_layer sync_all\")?;\n\n        trace!(\"created delta layer {}\", self.path);\n\n        // The gate guard stored in `destination_file` is dropped. Callers (e.g.. flush loop or compaction)\n        // keep the gate open also, so that it's safe for them to rename the file to its final destination.\n        file.disarm_into_inner();\n\n        Ok((desc, self.path))\n    }\n}\n\n/// A builder object for constructing a new delta layer.\n///\n/// Usage:\n///\n/// 1. Create the DeltaLayerWriter by calling DeltaLayerWriter::new(...)\n///\n/// 2. Write the contents by calling `put_value` for every page\n///    version to store in the layer.\n///\n/// 3. Call `finish`.\n///\n/// # Note\n///\n/// As described in <https://github.com/neondatabase/neon/issues/2650>, it's\n/// possible for the writer to drop before `finish` is actually called. So this\n/// could lead to odd temporary files in the directory, exhausting file system.\n/// This structure wraps `DeltaLayerWriterInner` and also contains `Drop`\n/// implementation that cleans up the temporary file in failure. It's not\n/// possible to do this directly in `DeltaLayerWriterInner` since `finish` moves\n/// out some fields, making it impossible to implement `Drop`.\n///\n#[must_use]\npub struct DeltaLayerWriter {\n    inner: Option<DeltaLayerWriterInner>,\n}\n\nimpl DeltaLayerWriter {\n    ///\n    /// Start building a new delta layer.\n    ///\n    #[allow(clippy::too_many_arguments)]\n    pub async fn new(\n        conf: &'static PageServerConf,\n        timeline_id: TimelineId,\n        tenant_shard_id: TenantShardId,\n        key_start: Key,\n        lsn_range: Range<Lsn>,\n        gate: &utils::sync::gate::Gate,\n        cancel: CancellationToken,\n        ctx: &RequestContext,\n    ) -> anyhow::Result<Self> {\n        Ok(Self {\n            inner: Some(\n                DeltaLayerWriterInner::new(\n                    conf,\n                    timeline_id,\n                    tenant_shard_id,\n                    key_start,\n                    lsn_range,\n                    gate,\n                    cancel,\n                    ctx,\n                )\n                .await?,\n            ),\n        })\n    }\n\n    pub fn is_empty(&self) -> bool {\n        self.inner.as_ref().unwrap().num_keys == 0\n    }\n\n    ///\n    /// Append a key-value pair to the file.\n    ///\n    /// The values must be appended in key, lsn order.\n    ///\n    pub async fn put_value(\n        &mut self,\n        key: Key,\n        lsn: Lsn,\n        val: Value,\n        ctx: &RequestContext,\n    ) -> Result<(), PutError> {\n        self.inner\n            .as_mut()\n            .unwrap()\n            .put_value(key, lsn, val, ctx)\n            .await\n    }\n\n    pub async fn put_value_bytes<Buf>(\n        &mut self,\n        key: Key,\n        lsn: Lsn,\n        val: FullSlice<Buf>,\n        will_init: bool,\n        ctx: &RequestContext,\n    ) -> (FullSlice<Buf>, Result<(), PutError>)\n    where\n        Buf: IoBuf + Send,\n    {\n        self.inner\n            .as_mut()\n            .unwrap()\n            .put_value_bytes(key, lsn, val, will_init, ctx)\n            .await\n    }\n\n    pub fn size(&self) -> u64 {\n        self.inner.as_ref().unwrap().size()\n    }\n\n    ///\n    /// Finish writing the delta layer.\n    ///\n    pub(crate) async fn finish(\n        mut self,\n        key_end: Key,\n        ctx: &RequestContext,\n    ) -> anyhow::Result<(PersistentLayerDesc, Utf8PathBuf)> {\n        self.inner.take().unwrap().finish(key_end, ctx).await\n    }\n\n    pub(crate) fn num_keys(&self) -> usize {\n        self.inner.as_ref().unwrap().num_keys\n    }\n\n    pub(crate) fn estimated_size(&self) -> u64 {\n        let inner = self.inner.as_ref().unwrap();\n        inner.blob_writer.size() + inner.tree.borrow_writer().size() + PAGE_SZ as u64\n    }\n}\n\n#[derive(thiserror::Error, Debug)]\npub enum RewriteSummaryError {\n    #[error(\"magic mismatch\")]\n    MagicMismatch,\n    #[error(transparent)]\n    Other(#[from] anyhow::Error),\n}\n\nimpl From<std::io::Error> for RewriteSummaryError {\n    fn from(e: std::io::Error) -> Self {\n        Self::Other(anyhow::anyhow!(e))\n    }\n}\n\nimpl DeltaLayer {\n    pub async fn rewrite_summary<F>(\n        path: &Utf8Path,\n        rewrite: F,\n        ctx: &RequestContext,\n    ) -> Result<(), RewriteSummaryError>\n    where\n        F: Fn(Summary) -> Summary,\n    {\n        let file = VirtualFile::open_with_options_v2(\n            path,\n            virtual_file::OpenOptions::new().read(true).write(true),\n            ctx,\n        )\n        .await\n        .with_context(|| format!(\"Failed to open file '{path}'\"))?;\n        let file_id = page_cache::next_file_id();\n        let block_reader = FileBlockReader::new(&file, file_id);\n        let summary_blk = block_reader.read_blk(0, ctx).await?;\n        let actual_summary = Summary::des_prefix(summary_blk.as_ref()).context(\"deserialize\")?;\n        if actual_summary.magic != DELTA_FILE_MAGIC {\n            return Err(RewriteSummaryError::MagicMismatch);\n        }\n\n        let new_summary = rewrite(actual_summary);\n\n        let buf = new_summary.ser_into_page().context(\"serialize\")?;\n        let (_buf, res) = file.write_all_at(buf.slice_len(), 0, ctx).await;\n        res?;\n        Ok(())\n    }\n}\n\nimpl DeltaLayerInner {\n    pub(crate) fn key_range(&self) -> &Range<Key> {\n        &self.layer_key_range\n    }\n\n    pub(crate) fn lsn_range(&self) -> &Range<Lsn> {\n        &self.layer_lsn_range\n    }\n\n    pub(super) async fn load(\n        path: &Utf8Path,\n        summary: Option<Summary>,\n        max_vectored_read_bytes: Option<MaxVectoredReadBytes>,\n        ctx: &RequestContext,\n    ) -> anyhow::Result<Self> {\n        let file = Arc::new(\n            VirtualFile::open_v2(path, ctx)\n                .await\n                .context(\"open layer file\")?,\n        );\n\n        let file_id = page_cache::next_file_id();\n\n        let block_reader = FileBlockReader::new(&file, file_id);\n\n        let summary_blk = block_reader\n            .read_blk(0, ctx)\n            .await\n            .context(\"read first block\")?;\n\n        // TODO: this should be an assertion instead; see ImageLayerInner::load\n        let actual_summary =\n            Summary::des_prefix(summary_blk.as_ref()).context(\"deserialize first block\")?;\n\n        if let Some(mut expected_summary) = summary {\n            // production code path\n            expected_summary.index_start_blk = actual_summary.index_start_blk;\n            expected_summary.index_root_blk = actual_summary.index_root_blk;\n            // mask out the timeline_id, but still require the layers to be from the same tenant\n            expected_summary.timeline_id = actual_summary.timeline_id;\n\n            if actual_summary != expected_summary {\n                bail!(\n                    \"in-file summary does not match expected summary. actual = {:?} expected = {:?}\",\n                    actual_summary,\n                    expected_summary\n                );\n            }\n        }\n\n        Ok(DeltaLayerInner {\n            file,\n            file_id,\n            index_start_blk: actual_summary.index_start_blk,\n            index_root_blk: actual_summary.index_root_blk,\n            max_vectored_read_bytes,\n            layer_key_range: actual_summary.key_range,\n            layer_lsn_range: actual_summary.lsn_range,\n        })\n    }\n\n    // Look up the keys in the provided keyspace and update\n    // the reconstruct state with whatever is found.\n    //\n    // Currently, the index is visited for each range, but this\n    // can be further optimised to visit the index only once.\n    pub(super) async fn get_values_reconstruct_data(\n        &self,\n        this: ResidentLayer,\n        keyspace: KeySpace,\n        lsn_range: Range<Lsn>,\n        reconstruct_state: &mut ValuesReconstructState,\n        ctx: &RequestContext,\n    ) -> Result<(), GetVectoredError> {\n        let block_reader = FileBlockReader::new(&self.file, self.file_id);\n        let index_reader = DiskBtreeReader::<_, DELTA_KEY_SIZE>::new(\n            self.index_start_blk,\n            self.index_root_blk,\n            block_reader,\n        );\n\n        let planner = VectoredReadPlanner::new(\n            self.max_vectored_read_bytes\n                .expect(\"Layer is loaded with max vectored bytes config\")\n                .0\n                .into(),\n        );\n\n        let data_end_offset = self.index_start_offset();\n\n        let reads = Self::plan_reads(\n            &keyspace,\n            lsn_range.clone(),\n            data_end_offset,\n            index_reader,\n            planner,\n            ctx,\n        )\n        .await\n        .map_err(GetVectoredError::Other)?;\n\n        self.do_reads_and_update_state(this, reads, reconstruct_state, ctx)\n            .await;\n\n        Ok(())\n    }\n\n    async fn plan_reads<Reader>(\n        keyspace: &KeySpace,\n        lsn_range: Range<Lsn>,\n        data_end_offset: u64,\n        index_reader: DiskBtreeReader<Reader, DELTA_KEY_SIZE>,\n        mut planner: VectoredReadPlanner,\n        ctx: &RequestContext,\n    ) -> anyhow::Result<Vec<VectoredRead>>\n    where\n        Reader: BlockReader + Clone,\n    {\n        let ctx = RequestContextBuilder::from(ctx)\n            .page_content_kind(PageContentKind::DeltaLayerBtreeNode)\n            .attached_child();\n\n        for range in keyspace.ranges.iter() {\n            let mut range_end_handled = false;\n\n            let start_key = DeltaKey::from_key_lsn(&range.start, lsn_range.start);\n            let index_stream = index_reader.clone().into_stream(&start_key.0, &ctx);\n            let mut index_stream = std::pin::pin!(index_stream);\n\n            while let Some(index_entry) = index_stream.next().await {\n                let (raw_key, value) = index_entry?;\n                let key = Key::from_slice(&raw_key[..KEY_SIZE]);\n                let lsn = DeltaKey::extract_lsn_from_buf(&raw_key);\n                let blob_ref = BlobRef(value);\n\n                // Lsns are not monotonically increasing across keys, so we don't assert on them.\n                assert!(key >= range.start);\n\n                let outside_lsn_range = !lsn_range.contains(&lsn);\n\n                let flag = {\n                    if outside_lsn_range {\n                        BlobFlag::Ignore\n                    } else if blob_ref.will_init() {\n                        BlobFlag::ReplaceAll\n                    } else {\n                        // Usual path: add blob to the read\n                        BlobFlag::None\n                    }\n                };\n\n                if key >= range.end || (key.next() == range.end && lsn >= lsn_range.end) {\n                    planner.handle_range_end(blob_ref.pos());\n                    range_end_handled = true;\n                    break;\n                } else {\n                    planner.handle(key, lsn, blob_ref.pos(), flag);\n                }\n            }\n\n            if !range_end_handled {\n                tracing::debug!(\"Handling range end fallback at {}\", data_end_offset);\n                planner.handle_range_end(data_end_offset);\n            }\n        }\n\n        Ok(planner.finish())\n    }\n\n    fn get_min_read_buffer_size(\n        planned_reads: &[VectoredRead],\n        read_size_soft_max: usize,\n    ) -> usize {\n        let Some(largest_read) = planned_reads.iter().max_by_key(|read| read.size()) else {\n            return read_size_soft_max;\n        };\n\n        let largest_read_size = largest_read.size();\n        if largest_read_size > read_size_soft_max {\n            // If the read is oversized, it should only contain one key.\n            let offenders = largest_read\n                .blobs_at\n                .as_slice()\n                .iter()\n                .filter_map(|(_, blob_meta)| {\n                    if blob_meta.key.is_rel_dir_key()\n                        || blob_meta.key == DBDIR_KEY\n                        || blob_meta.key.is_aux_file_key()\n                    {\n                        // The size of values for these keys is unbounded and can\n                        // grow very large in pathological cases.\n                        None\n                    } else {\n                        Some(format!(\"{}@{}\", blob_meta.key, blob_meta.lsn))\n                    }\n                })\n                .join(\", \");\n\n            if !offenders.is_empty() {\n                tracing::warn!(\n                    \"Oversized vectored read ({} > {}) for keys {}\",\n                    largest_read_size,\n                    read_size_soft_max,\n                    offenders\n                );\n            }\n        }\n\n        largest_read_size\n    }\n\n    async fn do_reads_and_update_state(\n        &self,\n        this: ResidentLayer,\n        reads: Vec<VectoredRead>,\n        reconstruct_state: &mut ValuesReconstructState,\n        ctx: &RequestContext,\n    ) {\n        let max_vectored_read_bytes = self\n            .max_vectored_read_bytes\n            .expect(\"Layer is loaded with max vectored bytes config\")\n            .0\n            .into();\n        let buf_size = Self::get_min_read_buffer_size(&reads, max_vectored_read_bytes);\n\n        // Note that reads are processed in reverse order (from highest key+lsn).\n        // This is the order that `ReconstructState` requires such that it can\n        // track when a key is done.\n        for read in reads.into_iter().rev() {\n            let mut ios: HashMap<(Key, Lsn), OnDiskValueIo> = Default::default();\n            for (_, blob_meta) in read.blobs_at.as_slice().iter().rev() {\n                let io = reconstruct_state.update_key(\n                    &blob_meta.key,\n                    blob_meta.lsn,\n                    blob_meta.will_init,\n                );\n                ios.insert((blob_meta.key, blob_meta.lsn), io);\n            }\n\n            let read_extend_residency = this.clone();\n            let read_from = self.file.clone();\n            let read_ctx = ctx.attached_child();\n            reconstruct_state\n                .spawn_io(async move {\n                    let vectored_blob_reader = VectoredBlobReader::new(&read_from);\n                    let buf = IoBufferMut::with_capacity(buf_size);\n\n                    let res = vectored_blob_reader.read_blobs(&read, buf, &read_ctx).await;\n                    match res {\n                        Ok(blobs_buf) => {\n                            let view = BufView::new_slice(&blobs_buf.buf);\n                            for meta in blobs_buf.blobs.iter().rev() {\n                                let io = ios.remove(&(meta.meta.key, meta.meta.lsn)).unwrap();\n\n                                let blob_read = meta.read(&view).await;\n                                let blob_read = match blob_read {\n                                    Ok(buf) => buf,\n                                    Err(e) => {\n                                        io.complete(Err(e));\n                                        continue;\n                                    }\n                                };\n\n                                io.complete(Ok(OnDiskValue::WalRecordOrImage(\n                                    blob_read.into_bytes(),\n                                )));\n                            }\n\n                            assert!(ios.is_empty());\n                        }\n                        Err(err) => {\n                            for (_, sender) in ios {\n                                sender.complete(Err(std::io::Error::new(\n                                    err.kind(),\n                                    \"vec read failed\",\n                                )));\n                            }\n                        }\n                    }\n\n                    // keep layer resident until this IO is done; this spawned IO future generally outlives the\n                    // call to `self` / the `Arc<DownloadedLayer>` / the `ResidentLayer` that guarantees residency\n                    drop(read_extend_residency);\n                })\n                .await;\n        }\n    }\n\n    pub(crate) async fn index_entries<'a>(\n        &'a self,\n        ctx: &RequestContext,\n    ) -> Result<Vec<DeltaEntry<'a>>> {\n        let block_reader = FileBlockReader::new(&self.file, self.file_id);\n        let tree_reader = DiskBtreeReader::<_, DELTA_KEY_SIZE>::new(\n            self.index_start_blk,\n            self.index_root_blk,\n            block_reader,\n        );\n\n        let mut all_keys: Vec<DeltaEntry<'_>> = Vec::new();\n\n        tree_reader\n            .visit(\n                &[0u8; DELTA_KEY_SIZE],\n                VisitDirection::Forwards,\n                |key, value| {\n                    let delta_key = DeltaKey::from_slice(key);\n                    let val_ref = ValueRef {\n                        blob_ref: BlobRef(value),\n                        layer: self,\n                    };\n                    let pos = BlobRef(value).pos();\n                    if let Some(last) = all_keys.last_mut() {\n                        // subtract offset of the current and last entries to get the size\n                        // of the value associated with this (key, lsn) tuple\n                        let first_pos = last.size;\n                        last.size = pos - first_pos;\n                    }\n                    let entry = DeltaEntry {\n                        key: delta_key.key(),\n                        lsn: delta_key.lsn(),\n                        size: pos,\n                        val: val_ref,\n                    };\n                    all_keys.push(entry);\n                    true\n                },\n                &RequestContextBuilder::from(ctx)\n                    .page_content_kind(PageContentKind::DeltaLayerBtreeNode)\n                    .attached_child(),\n            )\n            .await?;\n        if let Some(last) = all_keys.last_mut() {\n            // Last key occupies all space till end of value storage,\n            // which corresponds to beginning of the index\n            last.size = self.index_start_offset() - last.size;\n        }\n        Ok(all_keys)\n    }\n\n    /// Using the given writer, write out a version which has the earlier Lsns than `until`.\n    ///\n    /// Return the amount of key value records pushed to the writer.\n    pub(super) async fn copy_prefix(\n        &self,\n        writer: &mut DeltaLayerWriter,\n        until: Lsn,\n        ctx: &RequestContext,\n    ) -> anyhow::Result<usize> {\n        use futures::stream::TryStreamExt;\n\n        use crate::tenant::vectored_blob_io::{\n            BlobMeta, ChunkedVectoredReadBuilder, VectoredReadExtended,\n        };\n\n        #[derive(Debug)]\n        enum Item {\n            Actual(Key, Lsn, BlobRef),\n            Sentinel,\n        }\n\n        impl From<Item> for Option<(Key, Lsn, BlobRef)> {\n            fn from(value: Item) -> Self {\n                match value {\n                    Item::Actual(key, lsn, blob) => Some((key, lsn, blob)),\n                    Item::Sentinel => None,\n                }\n            }\n        }\n\n        impl Item {\n            fn offset(&self) -> Option<BlobRef> {\n                match self {\n                    Item::Actual(_, _, blob) => Some(*blob),\n                    Item::Sentinel => None,\n                }\n            }\n\n            fn is_last(&self) -> bool {\n                matches!(self, Item::Sentinel)\n            }\n        }\n\n        let block_reader = FileBlockReader::new(&self.file, self.file_id);\n        let tree_reader = DiskBtreeReader::<_, DELTA_KEY_SIZE>::new(\n            self.index_start_blk,\n            self.index_root_blk,\n            block_reader,\n        );\n\n        let stream = self.stream_index_forwards(tree_reader, &[0u8; DELTA_KEY_SIZE], ctx);\n        let stream = stream.map_ok(|(key, lsn, pos)| Item::Actual(key, lsn, pos));\n        // put in a sentinel value for getting the end offset for last item, and not having to\n        // repeat the whole read part\n        let stream = stream.chain(futures::stream::once(futures::future::ready(Ok(\n            Item::Sentinel,\n        ))));\n        let mut stream = std::pin::pin!(stream);\n\n        let mut prev: Option<(Key, Lsn, BlobRef)> = None;\n\n        let mut read_builder: Option<ChunkedVectoredReadBuilder> = None;\n\n        let max_read_size = self\n            .max_vectored_read_bytes\n            .map(|x| x.0.get())\n            .unwrap_or(8192);\n\n        let mut buffer = Some(IoBufferMut::with_capacity(max_read_size));\n\n        // FIXME: buffering of DeltaLayerWriter\n        let mut per_blob_copy = Vec::new();\n\n        let mut records = 0;\n\n        while let Some(item) = stream.try_next().await? {\n            tracing::debug!(?item, \"popped\");\n            let offset = item\n                .offset()\n                .unwrap_or(BlobRef::new(self.index_start_offset(), false));\n\n            let actionable = if let Some((key, lsn, start_offset)) = prev.take() {\n                let end_offset = offset;\n\n                Some((\n                    BlobMeta {\n                        key,\n                        lsn,\n                        will_init: false,\n                    },\n                    start_offset..end_offset,\n                ))\n            } else {\n                None\n            };\n\n            let is_last = item.is_last();\n\n            prev = Option::from(item);\n\n            let actionable = actionable.filter(|x| x.0.lsn < until);\n\n            let builder = if let Some((meta, offsets)) = actionable {\n                // extend or create a new builder\n                if read_builder\n                    .as_mut()\n                    .map(|x| x.extend(offsets.start.pos(), offsets.end.pos(), meta))\n                    .unwrap_or(VectoredReadExtended::No)\n                    == VectoredReadExtended::Yes\n                {\n                    None\n                } else {\n                    read_builder.replace(ChunkedVectoredReadBuilder::new(\n                        offsets.start.pos(),\n                        offsets.end.pos(),\n                        meta,\n                        max_read_size,\n                    ))\n                }\n            } else {\n                // nothing to do, except perhaps flush any existing for the last element\n                None\n            };\n\n            // flush the possible older builder and also the new one if the item was the last one\n            let builders = builder.into_iter();\n            let builders = if is_last {\n                builders.chain(read_builder.take())\n            } else {\n                builders.chain(None)\n            };\n\n            for builder in builders {\n                let read = builder.build();\n\n                let reader = VectoredBlobReader::new(&self.file);\n\n                let mut buf = buffer.take().unwrap();\n\n                buf.clear();\n                buf.reserve(read.size());\n                let res = reader.read_blobs(&read, buf, ctx).await?;\n\n                let view = BufView::new_slice(&res.buf);\n\n                for blob in res.blobs {\n                    let key = blob.meta.key;\n                    let lsn = blob.meta.lsn;\n\n                    let data = blob.read(&view).await?;\n\n                    #[cfg(debug_assertions)]\n                    Value::des(&data)\n                        .with_context(|| {\n                            format!(\n                                \"blob failed to deserialize for {}: {:?}\",\n                                blob,\n                                utils::Hex(&data)\n                            )\n                        })\n                        .unwrap();\n\n                    // is it an image or will_init walrecord?\n                    // FIXME: this could be handled by threading the BlobRef to the\n                    // VectoredReadBuilder\n                    let will_init = wal_decoder::models::value::ValueBytes::will_init(&data)\n                        .inspect_err(|_e| {\n                            #[cfg(feature = \"testing\")]\n                            tracing::error!(data=?utils::Hex(&data), err=?_e, %key, %lsn, \"failed to parse will_init out of serialized value\");\n                        })\n                        .unwrap_or(false);\n\n                    per_blob_copy.clear();\n                    per_blob_copy.extend_from_slice(&data);\n\n                    let (tmp, res) = writer\n                        .put_value_bytes(\n                            key,\n                            lsn,\n                            std::mem::take(&mut per_blob_copy).slice_len(),\n                            will_init,\n                            ctx,\n                        )\n                        .await;\n                    per_blob_copy = tmp.into_raw_slice().into_inner();\n\n                    res?;\n\n                    records += 1;\n                }\n\n                buffer = Some(res.buf);\n            }\n        }\n\n        assert!(\n            read_builder.is_none(),\n            \"with the sentinel above loop should had handled all\"\n        );\n\n        Ok(records)\n    }\n\n    pub(super) async fn dump(&self, ctx: &RequestContext) -> anyhow::Result<()> {\n        println!(\n            \"index_start_blk: {}, root {}\",\n            self.index_start_blk, self.index_root_blk\n        );\n\n        let block_reader = FileBlockReader::new(&self.file, self.file_id);\n        let tree_reader = DiskBtreeReader::<_, DELTA_KEY_SIZE>::new(\n            self.index_start_blk,\n            self.index_root_blk,\n            block_reader,\n        );\n\n        tree_reader.dump(ctx).await?;\n\n        let keys = self.index_entries(ctx).await?;\n\n        async fn dump_blob(val: &ValueRef<'_>, ctx: &RequestContext) -> anyhow::Result<String> {\n            let buf = val.load_raw(ctx).await?;\n            let val = Value::des(&buf)?;\n            let desc = match val {\n                Value::Image(img) => {\n                    format!(\" img {} bytes\", img.len())\n                }\n                Value::WalRecord(rec) => {\n                    let wal_desc = wal_decoder::models::record::describe_wal_record(&rec)?;\n                    format!(\n                        \" rec {} bytes will_init: {} {}\",\n                        buf.len(),\n                        rec.will_init(),\n                        wal_desc\n                    )\n                }\n            };\n            Ok(desc)\n        }\n\n        for entry in keys {\n            let DeltaEntry { key, lsn, val, .. } = entry;\n            let desc = match dump_blob(&val, ctx).await {\n                Ok(desc) => desc,\n                Err(err) => {\n                    format!(\"ERROR: {err}\")\n                }\n            };\n            println!(\"  key {key} at {lsn}: {desc}\");\n\n            // Print more details about CHECKPOINT records. Would be nice to print details\n            // of many other record types too, but these are particularly interesting, as\n            // have a lot of special processing for them in walingest.rs.\n            use pageserver_api::key::CHECKPOINT_KEY;\n            use postgres_ffi::CheckPoint;\n            if key == CHECKPOINT_KEY {\n                let val = val.load(ctx).await?;\n                match val {\n                    Value::Image(img) => {\n                        let checkpoint = CheckPoint::decode(&img)?;\n                        println!(\"   CHECKPOINT: {checkpoint:?}\");\n                    }\n                    Value::WalRecord(_rec) => {\n                        println!(\"   unexpected walrecord value for checkpoint key\");\n                    }\n                }\n            }\n        }\n\n        Ok(())\n    }\n\n    fn stream_index_forwards<'a, R>(\n        &'a self,\n        reader: DiskBtreeReader<R, DELTA_KEY_SIZE>,\n        start: &'a [u8; DELTA_KEY_SIZE],\n        ctx: &'a RequestContext,\n    ) -> impl futures::stream::Stream<\n        Item = Result<(Key, Lsn, BlobRef), crate::tenant::disk_btree::DiskBtreeError>,\n    > + 'a\n    where\n        R: BlockReader + 'a,\n    {\n        use futures::stream::TryStreamExt;\n        let stream = reader.into_stream(start, ctx);\n        stream.map_ok(|(key, value)| {\n            let key = DeltaKey::from_slice(&key);\n            let (key, lsn) = (key.key(), key.lsn());\n            let offset = BlobRef(value);\n\n            (key, lsn, offset)\n        })\n    }\n\n    /// The file offset to the first block of index.\n    ///\n    /// The file structure is summary, values, and index. We often need this for the size of last blob.\n    fn index_start_offset(&self) -> u64 {\n        let offset = self.index_start_blk as u64 * PAGE_SZ as u64;\n        let bref = BlobRef(offset);\n        tracing::debug!(\n            index_start_blk = self.index_start_blk,\n            offset,\n            pos = bref.pos(),\n            \"index_start_offset\"\n        );\n        offset\n    }\n\n    pub fn iter_with_options<'a>(\n        &'a self,\n        ctx: &'a RequestContext,\n        max_read_size: u64,\n        max_batch_size: usize,\n    ) -> DeltaLayerIterator<'a> {\n        let block_reader = FileBlockReader::new(&self.file, self.file_id);\n        let tree_reader =\n            DiskBtreeReader::new(self.index_start_blk, self.index_root_blk, block_reader);\n        DeltaLayerIterator {\n            delta_layer: self,\n            ctx,\n            index_iter: tree_reader.iter(&[0; DELTA_KEY_SIZE], ctx),\n            key_values_batch: std::collections::VecDeque::new(),\n            is_end: false,\n            planner: StreamingVectoredReadPlanner::new(max_read_size, max_batch_size),\n        }\n    }\n\n    /// NB: not super efficient, but not terrible either. Should prob be an iterator.\n    //\n    // We're reusing the index traversal logical in plan_reads; would be nice to\n    // factor that out.\n    pub(crate) async fn load_keys(&self, ctx: &RequestContext) -> anyhow::Result<Vec<Key>> {\n        self.index_entries(ctx)\n            .await\n            .map(|entries| entries.into_iter().map(|entry| entry.key).collect())\n    }\n}\n\n/// A set of data associated with a delta layer key and its value\npub struct DeltaEntry<'a> {\n    pub key: Key,\n    pub lsn: Lsn,\n    /// Size of the stored value\n    pub size: u64,\n    /// Reference to the on-disk value\n    pub val: ValueRef<'a>,\n}\n\n/// Reference to an on-disk value\npub struct ValueRef<'a> {\n    blob_ref: BlobRef,\n    layer: &'a DeltaLayerInner,\n}\n\nimpl ValueRef<'_> {\n    /// Loads the value from disk\n    pub async fn load(&self, ctx: &RequestContext) -> Result<Value> {\n        let buf = self.load_raw(ctx).await?;\n        let val = Value::des(&buf)?;\n        Ok(val)\n    }\n\n    async fn load_raw(&self, ctx: &RequestContext) -> Result<Vec<u8>> {\n        let reader = BlockCursor::new(crate::tenant::block_io::BlockReaderRef::Adapter(Adapter(\n            self.layer,\n        )));\n        let buf = reader.read_blob(self.blob_ref.pos(), ctx).await?;\n        Ok(buf)\n    }\n}\n\npub(crate) struct Adapter<T>(T);\n\nimpl<T: AsRef<DeltaLayerInner>> Adapter<T> {\n    pub(crate) async fn read_blk(\n        &self,\n        blknum: u32,\n        ctx: &RequestContext,\n    ) -> Result<BlockLease, std::io::Error> {\n        let block_reader = FileBlockReader::new(&self.0.as_ref().file, self.0.as_ref().file_id);\n        block_reader.read_blk(blknum, ctx).await\n    }\n}\n\nimpl AsRef<DeltaLayerInner> for DeltaLayerInner {\n    fn as_ref(&self) -> &DeltaLayerInner {\n        self\n    }\n}\n\nimpl<'a> pageserver_compaction::interface::CompactionDeltaEntry<'a, Key> for DeltaEntry<'a> {\n    fn key(&self) -> Key {\n        self.key\n    }\n    fn lsn(&self) -> Lsn {\n        self.lsn\n    }\n    fn size(&self) -> u64 {\n        self.size\n    }\n}\n\npub struct DeltaLayerIterator<'a> {\n    delta_layer: &'a DeltaLayerInner,\n    ctx: &'a RequestContext,\n    planner: StreamingVectoredReadPlanner,\n    index_iter: DiskBtreeIterator<'a>,\n    key_values_batch: VecDeque<(Key, Lsn, Value)>,\n    is_end: bool,\n}\n\nimpl DeltaLayerIterator<'_> {\n    pub(crate) fn layer_dbg_info(&self) -> String {\n        self.delta_layer.layer_dbg_info()\n    }\n\n    /// Retrieve a batch of key-value pairs into the iterator buffer.\n    async fn next_batch(&mut self) -> anyhow::Result<()> {\n        assert!(self.key_values_batch.is_empty());\n        assert!(!self.is_end);\n\n        let plan = loop {\n            if let Some(res) = self.index_iter.next().await {\n                let (raw_key, value) = res?;\n                let key = Key::from_slice(&raw_key[..KEY_SIZE]);\n                let lsn = DeltaKey::extract_lsn_from_buf(&raw_key);\n                let blob_ref = BlobRef(value);\n                let offset = blob_ref.pos();\n                if let Some(batch_plan) =\n                    self.planner.handle(key, lsn, offset, blob_ref.will_init())\n                {\n                    break batch_plan;\n                }\n            } else {\n                self.is_end = true;\n                let data_end_offset = self.delta_layer.index_start_offset();\n                if let Some(item) = self.planner.handle_range_end(data_end_offset) {\n                    break item;\n                } else {\n                    return Ok(()); // TODO: test empty iterator\n                }\n            }\n        };\n        let vectored_blob_reader = VectoredBlobReader::new(&self.delta_layer.file);\n        let mut next_batch = std::collections::VecDeque::new();\n        let buf_size = plan.size();\n        let buf = IoBufferMut::with_capacity(buf_size);\n        let blobs_buf = vectored_blob_reader\n            .read_blobs(&plan, buf, self.ctx)\n            .await?;\n        let view = BufView::new_slice(&blobs_buf.buf);\n        for meta in blobs_buf.blobs.iter() {\n            let blob_read = meta.read(&view).await?;\n            let value = Value::des(&blob_read)?;\n\n            next_batch.push_back((meta.meta.key, meta.meta.lsn, value));\n        }\n        self.key_values_batch = next_batch;\n        Ok(())\n    }\n\n    pub async fn next(&mut self) -> anyhow::Result<Option<(Key, Lsn, Value)>> {\n        if self.key_values_batch.is_empty() {\n            if self.is_end {\n                return Ok(None);\n            }\n            self.next_batch().await?;\n        }\n        Ok(Some(\n            self.key_values_batch\n                .pop_front()\n                .expect(\"should not be empty\"),\n        ))\n    }\n}\n\n#[cfg(test)]\npub(crate) mod test {\n    use std::collections::BTreeMap;\n\n    use super::*;\n    use crate::DEFAULT_PG_VERSION;\n    use crate::context::DownloadBehavior;\n    use crate::task_mgr::TaskKind;\n    use crate::tenant::disk_btree::tests::TestDisk;\n    use crate::tenant::harness::{TIMELINE_ID, TenantHarness};\n    use crate::tenant::storage_layer::{Layer, ResidentLayer};\n    use crate::tenant::timeline::layer_manager::LayerManagerLockHolder;\n    use crate::tenant::{TenantShard, Timeline};\n    use bytes::Bytes;\n    use itertools::MinMaxResult;\n    use postgres_ffi::PgMajorVersion;\n    use rand::prelude::{SeedableRng, StdRng};\n    use rand::seq::IndexedRandom;\n    use rand::{Rng, RngCore};\n\n    /// Construct an index for a fictional delta layer and and then\n    /// traverse in order to plan vectored reads for a query. Finally,\n    /// verify that the traversal fed the right index key and value\n    /// pairs into the planner.\n    #[tokio::test]\n    async fn test_delta_layer_index_traversal() {\n        let base_key = Key {\n            field1: 0,\n            field2: 1663,\n            field3: 12972,\n            field4: 16396,\n            field5: 0,\n            field6: 246080,\n        };\n\n        // Populate the index with some entries\n        let entries: BTreeMap<Key, Vec<Lsn>> = BTreeMap::from([\n            (base_key, vec![Lsn(1), Lsn(5), Lsn(25), Lsn(26), Lsn(28)]),\n            (base_key.add(1), vec![Lsn(2), Lsn(5), Lsn(10), Lsn(50)]),\n            (base_key.add(2), vec![Lsn(2), Lsn(5), Lsn(10), Lsn(50)]),\n            (base_key.add(5), vec![Lsn(10), Lsn(15), Lsn(16), Lsn(20)]),\n        ]);\n\n        let mut disk = TestDisk::default();\n        let mut writer = DiskBtreeBuilder::<_, DELTA_KEY_SIZE>::new(&mut disk);\n\n        let mut disk_offset = 0;\n        for (key, lsns) in &entries {\n            for lsn in lsns {\n                let index_key = DeltaKey::from_key_lsn(key, *lsn);\n                let blob_ref = BlobRef::new(disk_offset, false);\n                writer\n                    .append(&index_key.0, blob_ref.0)\n                    .expect(\"In memory disk append should never fail\");\n\n                disk_offset += 1;\n            }\n        }\n\n        // Prepare all the arguments for the call into `plan_reads` below\n        let (root_offset, _writer) = writer\n            .finish()\n            .expect(\"In memory disk finish should never fail\");\n        let reader = DiskBtreeReader::<_, DELTA_KEY_SIZE>::new(0, root_offset, disk);\n        let planner = VectoredReadPlanner::new(100);\n        let ctx = RequestContext::new(TaskKind::UnitTest, DownloadBehavior::Error);\n\n        let keyspace = KeySpace {\n            ranges: vec![\n                base_key..base_key.add(3),\n                base_key.add(3)..base_key.add(100),\n            ],\n        };\n        let lsn_range = Lsn(2)..Lsn(40);\n\n        // Plan and validate\n        let vectored_reads = DeltaLayerInner::plan_reads(\n            &keyspace,\n            lsn_range.clone(),\n            disk_offset,\n            reader,\n            planner,\n            &ctx,\n        )\n        .await\n        .expect(\"Read planning should not fail\");\n\n        validate(keyspace, lsn_range, vectored_reads, entries);\n    }\n\n    fn validate(\n        keyspace: KeySpace,\n        lsn_range: Range<Lsn>,\n        vectored_reads: Vec<VectoredRead>,\n        index_entries: BTreeMap<Key, Vec<Lsn>>,\n    ) {\n        #[derive(Debug, PartialEq, Eq)]\n        struct BlobSpec {\n            key: Key,\n            lsn: Lsn,\n            at: u64,\n        }\n\n        let mut planned_blobs = Vec::new();\n        for read in vectored_reads {\n            for (at, meta) in read.blobs_at.as_slice() {\n                planned_blobs.push(BlobSpec {\n                    key: meta.key,\n                    lsn: meta.lsn,\n                    at: *at,\n                });\n            }\n        }\n\n        let mut expected_blobs = Vec::new();\n        let mut disk_offset = 0;\n        for (key, lsns) in index_entries {\n            for lsn in lsns {\n                let key_included = keyspace.ranges.iter().any(|range| range.contains(&key));\n                let lsn_included = lsn_range.contains(&lsn);\n\n                if key_included && lsn_included {\n                    expected_blobs.push(BlobSpec {\n                        key,\n                        lsn,\n                        at: disk_offset,\n                    });\n                }\n\n                disk_offset += 1;\n            }\n        }\n\n        assert_eq!(planned_blobs, expected_blobs);\n    }\n\n    mod constants {\n        use utils::lsn::Lsn;\n\n        /// Offset used by all lsns in this test\n        pub(super) const LSN_OFFSET: Lsn = Lsn(0x08);\n        /// Number of unique keys including in the test data\n        pub(super) const KEY_COUNT: u8 = 60;\n        /// Max number of different lsns for each key\n        pub(super) const MAX_ENTRIES_PER_KEY: u8 = 20;\n        /// Possible value sizes for each key along with a probability weight\n        pub(super) const VALUE_SIZES: [(usize, u8); 3] = [(100, 2), (1024, 2), (1024 * 1024, 1)];\n        /// Probability that there will be a gap between the current key and the next one (33.3%)\n        pub(super) const KEY_GAP_CHANGES: [(bool, u8); 2] = [(true, 1), (false, 2)];\n        /// The minimum size of a key range in all the generated reads\n        pub(super) const MIN_RANGE_SIZE: i128 = 10;\n        /// The number of ranges included in each vectored read\n        pub(super) const RANGES_COUNT: u8 = 2;\n        /// The number of vectored reads performed\n        pub(super) const READS_COUNT: u8 = 100;\n        /// Soft max size of a vectored read. Will be violated if we have to read keys\n        /// with values larger than the limit\n        pub(super) const MAX_VECTORED_READ_BYTES: usize = 64 * 1024;\n    }\n\n    struct Entry {\n        key: Key,\n        lsn: Lsn,\n        value: Vec<u8>,\n    }\n\n    fn generate_entries(rng: &mut StdRng) -> Vec<Entry> {\n        let mut current_key = Key::MIN;\n\n        let mut entries = Vec::new();\n        for _ in 0..constants::KEY_COUNT {\n            let count = rng.random_range(1..constants::MAX_ENTRIES_PER_KEY);\n            let mut lsns_iter =\n                std::iter::successors(Some(Lsn(constants::LSN_OFFSET.0 + 0x08)), |lsn| {\n                    Some(Lsn(lsn.0 + 0x08))\n                });\n            let mut lsns = Vec::new();\n            while lsns.len() < count as usize {\n                let take = rng.random_bool(0.5);\n                let lsn = lsns_iter.next().unwrap();\n                if take {\n                    lsns.push(lsn);\n                }\n            }\n\n            for lsn in lsns {\n                let size = constants::VALUE_SIZES\n                    .choose_weighted(rng, |item| item.1)\n                    .unwrap()\n                    .0;\n                let mut buf = vec![0; size];\n                rng.fill_bytes(&mut buf);\n\n                entries.push(Entry {\n                    key: current_key,\n                    lsn,\n                    value: buf,\n                })\n            }\n\n            let gap = constants::KEY_GAP_CHANGES\n                .choose_weighted(rng, |item| item.1)\n                .unwrap()\n                .0;\n            if gap {\n                current_key = current_key.add(2);\n            } else {\n                current_key = current_key.add(1);\n            }\n        }\n\n        entries\n    }\n\n    struct EntriesMeta {\n        key_range: Range<Key>,\n        lsn_range: Range<Lsn>,\n        index: BTreeMap<(Key, Lsn), Vec<u8>>,\n    }\n\n    fn get_entries_meta(entries: &[Entry]) -> EntriesMeta {\n        let key_range = match entries.iter().minmax_by_key(|e| e.key) {\n            MinMaxResult::MinMax(min, max) => min.key..max.key.next(),\n            _ => panic!(\"More than one entry is always expected\"),\n        };\n\n        let lsn_range = match entries.iter().minmax_by_key(|e| e.lsn) {\n            MinMaxResult::MinMax(min, max) => min.lsn..Lsn(max.lsn.0 + 1),\n            _ => panic!(\"More than one entry is always expected\"),\n        };\n\n        let mut index = BTreeMap::new();\n        for entry in entries.iter() {\n            index.insert((entry.key, entry.lsn), entry.value.clone());\n        }\n\n        EntriesMeta {\n            key_range,\n            lsn_range,\n            index,\n        }\n    }\n\n    fn pick_random_keyspace(rng: &mut StdRng, key_range: &Range<Key>) -> KeySpace {\n        let start = key_range.start.to_i128();\n        let end = key_range.end.to_i128();\n\n        let mut keyspace = KeySpace::default();\n\n        for _ in 0..constants::RANGES_COUNT {\n            let mut range: Option<Range<Key>> = Option::default();\n            while range.is_none() || keyspace.overlaps(range.as_ref().unwrap()) {\n                let range_start = rng.random_range(start..end);\n                let range_end_offset = range_start + constants::MIN_RANGE_SIZE;\n                if range_end_offset >= end {\n                    range = Some(Key::from_i128(range_start)..Key::from_i128(end));\n                } else {\n                    let range_end =\n                        rng.random_range((range_start + constants::MIN_RANGE_SIZE)..end);\n                    range = Some(Key::from_i128(range_start)..Key::from_i128(range_end));\n                }\n            }\n            keyspace.ranges.push(range.unwrap());\n        }\n\n        keyspace\n    }\n\n    #[tokio::test]\n    async fn test_delta_layer_vectored_read_end_to_end() -> anyhow::Result<()> {\n        let harness = TenantHarness::create(\"test_delta_layer_oversized_vectored_read\").await?;\n        let (tenant, ctx) = harness.load().await;\n\n        let timeline_id = TimelineId::generate();\n        let timeline = tenant\n            .create_test_timeline(timeline_id, constants::LSN_OFFSET, DEFAULT_PG_VERSION, &ctx)\n            .await?;\n\n        tracing::info!(\"Generating test data ...\");\n\n        let rng = &mut StdRng::seed_from_u64(0);\n        let entries = generate_entries(rng);\n        let entries_meta = get_entries_meta(&entries);\n\n        tracing::info!(\"Done generating {} entries\", entries.len());\n\n        tracing::info!(\"Writing test data to delta layer ...\");\n        let mut writer = DeltaLayerWriter::new(\n            harness.conf,\n            timeline_id,\n            harness.tenant_shard_id,\n            entries_meta.key_range.start,\n            entries_meta.lsn_range.clone(),\n            &timeline.gate,\n            timeline.cancel.clone(),\n            &ctx,\n        )\n        .await?;\n\n        for entry in entries {\n            let (_, res) = writer\n                .put_value_bytes(entry.key, entry.lsn, entry.value.slice_len(), false, &ctx)\n                .await;\n            res?;\n        }\n\n        let (desc, path) = writer.finish(entries_meta.key_range.end, &ctx).await?;\n        let resident = Layer::finish_creating(harness.conf, &timeline, desc, &path)?;\n\n        let inner = resident.get_as_delta(&ctx).await?;\n\n        let file_size = inner.file.metadata().await?.len();\n        tracing::info!(\n            \"Done writing test data to delta layer. Resulting file size is: {}\",\n            file_size\n        );\n\n        for i in 0..constants::READS_COUNT {\n            tracing::info!(\"Doing vectored read {}/{}\", i + 1, constants::READS_COUNT);\n\n            let block_reader = FileBlockReader::new(&inner.file, inner.file_id);\n            let index_reader = DiskBtreeReader::<_, DELTA_KEY_SIZE>::new(\n                inner.index_start_blk,\n                inner.index_root_blk,\n                block_reader,\n            );\n\n            let planner = VectoredReadPlanner::new(constants::MAX_VECTORED_READ_BYTES);\n            let keyspace = pick_random_keyspace(rng, &entries_meta.key_range);\n            let data_end_offset = inner.index_start_blk as u64 * PAGE_SZ as u64;\n\n            let vectored_reads = DeltaLayerInner::plan_reads(\n                &keyspace,\n                entries_meta.lsn_range.clone(),\n                data_end_offset,\n                index_reader,\n                planner,\n                &ctx,\n            )\n            .await?;\n\n            let vectored_blob_reader = VectoredBlobReader::new(&inner.file);\n            let buf_size = DeltaLayerInner::get_min_read_buffer_size(\n                &vectored_reads,\n                constants::MAX_VECTORED_READ_BYTES,\n            );\n            let mut buf = Some(IoBufferMut::with_capacity(buf_size));\n\n            for read in vectored_reads {\n                let blobs_buf = vectored_blob_reader\n                    .read_blobs(&read, buf.take().expect(\"Should have a buffer\"), &ctx)\n                    .await?;\n                let view = BufView::new_slice(&blobs_buf.buf);\n                for meta in blobs_buf.blobs.iter() {\n                    let value = meta.read(&view).await?;\n                    assert_eq!(\n                        &value[..],\n                        &entries_meta.index[&(meta.meta.key, meta.meta.lsn)]\n                    );\n                }\n\n                buf = Some(blobs_buf.buf);\n            }\n        }\n\n        Ok(())\n    }\n\n    #[tokio::test]\n    async fn copy_delta_prefix_smoke() {\n        use bytes::Bytes;\n        use wal_decoder::models::record::NeonWalRecord;\n\n        let h = crate::tenant::harness::TenantHarness::create(\"truncate_delta_smoke\")\n            .await\n            .unwrap();\n        let (tenant, ctx) = h.load().await;\n        let ctx = &ctx;\n        let timeline = tenant\n            .create_test_timeline(TimelineId::generate(), Lsn(0x10), PgMajorVersion::PG14, ctx)\n            .await\n            .unwrap();\n        let ctx = &ctx.with_scope_timeline(&timeline);\n\n        let initdb_layer = timeline\n            .layers\n            .read(crate::tenant::timeline::layer_manager::LayerManagerLockHolder::Testing)\n            .await\n            .likely_resident_layers()\n            .next()\n            .cloned()\n            .unwrap();\n\n        {\n            let mut writer = timeline.writer().await;\n\n            let data = [\n                (0x20, 12, Value::Image(Bytes::from_static(b\"foobar\"))),\n                (\n                    0x30,\n                    12,\n                    Value::WalRecord(NeonWalRecord::Postgres {\n                        will_init: false,\n                        rec: Bytes::from_static(b\"1\"),\n                    }),\n                ),\n                (\n                    0x40,\n                    12,\n                    Value::WalRecord(NeonWalRecord::Postgres {\n                        will_init: true,\n                        rec: Bytes::from_static(b\"2\"),\n                    }),\n                ),\n                // build an oversized value so we cannot extend and existing read over\n                // this\n                (\n                    0x50,\n                    12,\n                    Value::WalRecord(NeonWalRecord::Postgres {\n                        will_init: true,\n                        rec: {\n                            let mut buf =\n                                vec![0u8; tenant.conf.max_vectored_read_bytes.0.get() + 1024];\n                            buf.iter_mut()\n                                .enumerate()\n                                .for_each(|(i, slot)| *slot = (i % 256) as u8);\n                            Bytes::from(buf)\n                        },\n                    }),\n                ),\n                // because the oversized read cannot be extended further, we are sure to exercise the\n                // builder created on the last round with this:\n                (\n                    0x60,\n                    12,\n                    Value::WalRecord(NeonWalRecord::Postgres {\n                        will_init: true,\n                        rec: Bytes::from_static(b\"3\"),\n                    }),\n                ),\n                (\n                    0x60,\n                    9,\n                    Value::Image(Bytes::from_static(b\"something for a different key\")),\n                ),\n            ];\n\n            let mut last_lsn = None;\n\n            for (lsn, key, value) in data {\n                let key = Key::from_i128(key);\n                writer.put(key, Lsn(lsn), &value, ctx).await.unwrap();\n                last_lsn = Some(lsn);\n            }\n\n            writer.finish_write(Lsn(last_lsn.unwrap()));\n        }\n        timeline.freeze_and_flush().await.unwrap();\n\n        let new_layer = timeline\n            .layers\n            .read(LayerManagerLockHolder::Testing)\n            .await\n            .likely_resident_layers()\n            .find(|&x| x != &initdb_layer)\n            .cloned()\n            .unwrap();\n\n        // create a copy for the timeline, so we don't overwrite the file\n        let branch = tenant\n            .branch_timeline_test(&timeline, TimelineId::generate(), None, ctx)\n            .await\n            .unwrap();\n\n        assert_eq!(branch.get_ancestor_lsn(), Lsn(0x60));\n\n        // truncating at 0x61 gives us a full copy, otherwise just go backwards until there's just\n        // a single key\n\n        for truncate_at in [0x61, 0x51, 0x41, 0x31, 0x21] {\n            let truncate_at = Lsn(truncate_at);\n\n            let mut writer = DeltaLayerWriter::new(\n                tenant.conf,\n                branch.timeline_id,\n                tenant.tenant_shard_id,\n                Key::MIN,\n                Lsn(0x11)..truncate_at,\n                &branch.gate,\n                branch.cancel.clone(),\n                ctx,\n            )\n            .await\n            .unwrap();\n\n            let new_layer = new_layer.download_and_keep_resident(ctx).await.unwrap();\n\n            new_layer\n                .copy_delta_prefix(&mut writer, truncate_at, ctx)\n                .await\n                .unwrap();\n\n            let (desc, path) = writer.finish(Key::MAX, ctx).await.unwrap();\n            let copied_layer = Layer::finish_creating(tenant.conf, &branch, desc, &path).unwrap();\n\n            copied_layer.get_as_delta(ctx).await.unwrap();\n\n            assert_keys_and_values_eq(\n                new_layer.get_as_delta(ctx).await.unwrap(),\n                copied_layer.get_as_delta(ctx).await.unwrap(),\n                truncate_at,\n                ctx,\n            )\n            .await;\n        }\n    }\n\n    async fn assert_keys_and_values_eq(\n        source: &DeltaLayerInner,\n        truncated: &DeltaLayerInner,\n        truncated_at: Lsn,\n        ctx: &RequestContext,\n    ) {\n        use futures::future::ready;\n        use futures::stream::TryStreamExt;\n\n        let start_key = [0u8; DELTA_KEY_SIZE];\n\n        let source_reader = FileBlockReader::new(&source.file, source.file_id);\n        let source_tree = DiskBtreeReader::<_, DELTA_KEY_SIZE>::new(\n            source.index_start_blk,\n            source.index_root_blk,\n            &source_reader,\n        );\n        let source_stream = source.stream_index_forwards(source_tree, &start_key, ctx);\n        let source_stream = source_stream.filter(|res| match res {\n            Ok((_, lsn, _)) => ready(lsn < &truncated_at),\n            _ => ready(true),\n        });\n        let mut source_stream = std::pin::pin!(source_stream);\n\n        let truncated_reader = FileBlockReader::new(&truncated.file, truncated.file_id);\n        let truncated_tree = DiskBtreeReader::<_, DELTA_KEY_SIZE>::new(\n            truncated.index_start_blk,\n            truncated.index_root_blk,\n            &truncated_reader,\n        );\n        let truncated_stream = truncated.stream_index_forwards(truncated_tree, &start_key, ctx);\n        let mut truncated_stream = std::pin::pin!(truncated_stream);\n\n        let mut scratch_left = Vec::new();\n        let mut scratch_right = Vec::new();\n\n        loop {\n            let (src, truncated) = (source_stream.try_next(), truncated_stream.try_next());\n            let (src, truncated) = tokio::try_join!(src, truncated).unwrap();\n\n            if src.is_none() {\n                assert!(truncated.is_none());\n                break;\n            }\n\n            let (src, truncated) = (src.unwrap(), truncated.unwrap());\n\n            // because we've filtered the source with Lsn, we should always have the same keys from both.\n            assert_eq!(src.0, truncated.0);\n            assert_eq!(src.1, truncated.1);\n\n            // if this is needed for something else, just drop this assert.\n            assert!(\n                src.2.pos() >= truncated.2.pos(),\n                \"value position should not go backwards {} vs. {}\",\n                src.2.pos(),\n                truncated.2.pos()\n            );\n\n            scratch_left.clear();\n            let src_cursor = source_reader.block_cursor();\n            let left = src_cursor.read_blob_into_buf(src.2.pos(), &mut scratch_left, ctx);\n            scratch_right.clear();\n            let trunc_cursor = truncated_reader.block_cursor();\n            let right = trunc_cursor.read_blob_into_buf(truncated.2.pos(), &mut scratch_right, ctx);\n\n            tokio::try_join!(left, right).unwrap();\n\n            assert_eq!(utils::Hex(&scratch_left), utils::Hex(&scratch_right));\n        }\n    }\n\n    pub(crate) fn sort_delta(\n        (k1, l1, _): &(Key, Lsn, Value),\n        (k2, l2, _): &(Key, Lsn, Value),\n    ) -> std::cmp::Ordering {\n        (k1, l1).cmp(&(k2, l2))\n    }\n\n    #[cfg(feature = \"testing\")]\n    pub(crate) fn sort_delta_value(\n        (k1, l1, v1): &(Key, Lsn, Value),\n        (k2, l2, v2): &(Key, Lsn, Value),\n    ) -> std::cmp::Ordering {\n        let order_1 = if v1.is_image() { 0 } else { 1 };\n        let order_2 = if v2.is_image() { 0 } else { 1 };\n        (k1, l1, order_1).cmp(&(k2, l2, order_2))\n    }\n\n    pub(crate) async fn produce_delta_layer(\n        tenant: &TenantShard,\n        tline: &Arc<Timeline>,\n        mut deltas: Vec<(Key, Lsn, Value)>,\n        ctx: &RequestContext,\n    ) -> anyhow::Result<ResidentLayer> {\n        deltas.sort_by(sort_delta);\n        let (key_start, _, _) = deltas.first().unwrap();\n        let (key_max, _, _) = deltas.last().unwrap();\n        let lsn_min = deltas.iter().map(|(_, lsn, _)| lsn).min().unwrap();\n        let lsn_max = deltas.iter().map(|(_, lsn, _)| lsn).max().unwrap();\n        let lsn_end = Lsn(lsn_max.0 + 1);\n        let mut writer = DeltaLayerWriter::new(\n            tenant.conf,\n            tline.timeline_id,\n            tenant.tenant_shard_id,\n            *key_start,\n            (*lsn_min)..lsn_end,\n            &tline.gate,\n            tline.cancel.clone(),\n            ctx,\n        )\n        .await?;\n        let key_end = key_max.next();\n\n        for (key, lsn, value) in deltas {\n            writer.put_value(key, lsn, value, ctx).await?;\n        }\n\n        let (desc, path) = writer.finish(key_end, ctx).await?;\n        let delta_layer = Layer::finish_creating(tenant.conf, tline, desc, &path)?;\n\n        Ok::<_, anyhow::Error>(delta_layer)\n    }\n\n    async fn assert_delta_iter_equal(\n        delta_iter: &mut DeltaLayerIterator<'_>,\n        expect: &[(Key, Lsn, Value)],\n    ) {\n        let mut expect_iter = expect.iter();\n        loop {\n            let o1 = delta_iter.next().await.unwrap();\n            let o2 = expect_iter.next();\n            assert_eq!(o1.is_some(), o2.is_some());\n            if o1.is_none() && o2.is_none() {\n                break;\n            }\n            let (k1, l1, v1) = o1.unwrap();\n            let (k2, l2, v2) = o2.unwrap();\n            assert_eq!(&k1, k2);\n            assert_eq!(l1, *l2);\n            assert_eq!(&v1, v2);\n        }\n    }\n\n    #[tokio::test]\n    async fn delta_layer_iterator() {\n        let harness = TenantHarness::create(\"delta_layer_iterator\").await.unwrap();\n        let (tenant, ctx) = harness.load().await;\n\n        let tline = tenant\n            .create_test_timeline(TIMELINE_ID, Lsn(0x10), DEFAULT_PG_VERSION, &ctx)\n            .await\n            .unwrap();\n\n        fn get_key(id: u32) -> Key {\n            let mut key = Key::from_hex(\"000000000033333333444444445500000000\").unwrap();\n            key.field6 = id;\n            key\n        }\n        const N: usize = 1000;\n        let test_deltas = (0..N)\n            .map(|idx| {\n                (\n                    get_key(idx as u32 / 10),\n                    Lsn(0x10 * ((idx as u64) % 10 + 1)),\n                    Value::Image(Bytes::from(format!(\"img{idx:05}\"))),\n                )\n            })\n            .collect_vec();\n        let resident_layer = produce_delta_layer(&tenant, &tline, test_deltas.clone(), &ctx)\n            .await\n            .unwrap();\n        let delta_layer = resident_layer.get_as_delta(&ctx).await.unwrap();\n        for max_read_size in [1, 1024] {\n            for batch_size in [1, 2, 4, 8, 3, 7, 13] {\n                println!(\"running with batch_size={batch_size} max_read_size={max_read_size}\");\n                // Test if the batch size is correctly determined\n                let mut iter = delta_layer.iter_with_options(&ctx, max_read_size, batch_size);\n                let mut num_items = 0;\n                for _ in 0..3 {\n                    iter.next_batch().await.unwrap();\n                    num_items += iter.key_values_batch.len();\n                    if max_read_size == 1 {\n                        // every key should be a batch b/c the value is larger than max_read_size\n                        assert_eq!(iter.key_values_batch.len(), 1);\n                    } else {\n                        assert!(iter.key_values_batch.len() <= batch_size);\n                    }\n                    if num_items >= N {\n                        break;\n                    }\n                    iter.key_values_batch.clear();\n                }\n                // Test if the result is correct\n                let mut iter = delta_layer.iter_with_options(&ctx, max_read_size, batch_size);\n                assert_delta_iter_equal(&mut iter, &test_deltas).await;\n            }\n        }\n    }\n}\n"
  },
  {
    "path": "pageserver/src/tenant/storage_layer/errors.rs",
    "content": "use crate::tenant::blob_io::WriteBlobError;\n\n#[derive(Debug, thiserror::Error)]\npub enum PutError {\n    #[error(transparent)]\n    WriteBlob(WriteBlobError),\n    #[error(transparent)]\n    Other(anyhow::Error),\n}\n\nimpl PutError {\n    pub fn is_cancel(&self) -> bool {\n        match self {\n            PutError::WriteBlob(e) => e.is_cancel(),\n            PutError::Other(_) => false,\n        }\n    }\n    pub fn into_anyhow(self) -> anyhow::Error {\n        match self {\n            PutError::WriteBlob(e) => e.into_anyhow(),\n            PutError::Other(e) => e,\n        }\n    }\n}\n"
  },
  {
    "path": "pageserver/src/tenant/storage_layer/filter_iterator.rs",
    "content": "use std::ops::Range;\nuse std::sync::Arc;\n\nuse anyhow::bail;\nuse pageserver_api::key::Key;\nuse pageserver_api::keyspace::{KeySpace, SparseKeySpace};\nuse utils::lsn::Lsn;\nuse wal_decoder::models::value::Value;\n\nuse super::PersistentLayerKey;\nuse super::merge_iterator::{MergeIterator, MergeIteratorItem};\n\n/// A filter iterator over merge iterators (and can be easily extended to other types of iterators).\n///\n/// The iterator will skip any keys not included in the keyspace filter. In other words, the keyspace filter contains the keys\n/// to be retained.\npub struct FilterIterator<'a> {\n    inner: MergeIterator<'a>,\n    retain_key_filters: Vec<Range<Key>>,\n    current_filter_idx: usize,\n}\n\nimpl<'a> FilterIterator<'a> {\n    pub fn create(\n        inner: MergeIterator<'a>,\n        dense_keyspace: KeySpace,\n        sparse_keyspace: SparseKeySpace,\n    ) -> anyhow::Result<Self> {\n        let mut retain_key_filters = Vec::new();\n        retain_key_filters.extend(dense_keyspace.ranges);\n        retain_key_filters.extend(sparse_keyspace.0.ranges);\n        retain_key_filters.sort_by(|a, b| a.start.cmp(&b.start));\n        // Verify key filters are non-overlapping and sorted\n        for window in retain_key_filters.windows(2) {\n            if window[0].end > window[1].start {\n                bail!(\n                    \"Key filters are overlapping: {:?} and {:?}\",\n                    window[0],\n                    window[1]\n                );\n            }\n        }\n        Ok(Self {\n            inner,\n            retain_key_filters,\n            current_filter_idx: 0,\n        })\n    }\n\n    async fn next_inner<R: MergeIteratorItem>(&mut self) -> anyhow::Result<Option<R>> {\n        while let Some(item) = self.inner.next_inner::<R>().await? {\n            while self.current_filter_idx < self.retain_key_filters.len()\n                && item.key_lsn_value().0 >= self.retain_key_filters[self.current_filter_idx].end\n            {\n                // [filter region]    [filter region]     [filter region]\n                //                                     ^ item\n                //                    ^ current filter\n                self.current_filter_idx += 1;\n                // [filter region]    [filter region]     [filter region]\n                //                                     ^ item\n                //                                        ^ current filter\n            }\n            if self.current_filter_idx >= self.retain_key_filters.len() {\n                // We already exhausted all filters, so we should return now\n                // [filter region] [filter region] [filter region]\n                //                                                    ^ item\n                //                                                 ^ current filter (nothing)\n                return Ok(None);\n            }\n            if self.retain_key_filters[self.current_filter_idx].contains(&item.key_lsn_value().0) {\n                // [filter region]    [filter region]     [filter region]\n                //                                              ^ item\n                //                                        ^ current filter\n                return Ok(Some(item));\n            }\n            // If the key is not contained in the key retaining filters, continue to the next item.\n            // [filter region]    [filter region]     [filter region]\n            //                                     ^ item\n            //                                        ^ current filter\n        }\n        Ok(None)\n    }\n\n    pub async fn next(&mut self) -> anyhow::Result<Option<(Key, Lsn, Value)>> {\n        self.next_inner().await\n    }\n\n    pub async fn next_with_trace(\n        &mut self,\n    ) -> anyhow::Result<Option<((Key, Lsn, Value), Arc<PersistentLayerKey>)>> {\n        self.next_inner().await\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use itertools::Itertools;\n    use pageserver_api::key::Key;\n    use utils::lsn::Lsn;\n\n    use super::*;\n    use crate::DEFAULT_PG_VERSION;\n    use crate::tenant::harness::{TIMELINE_ID, TenantHarness};\n    use crate::tenant::storage_layer::delta_layer::test::produce_delta_layer;\n\n    async fn assert_filter_iter_equal(\n        filter_iter: &mut FilterIterator<'_>,\n        expect: &[(Key, Lsn, Value)],\n    ) {\n        let mut expect_iter = expect.iter();\n        loop {\n            let o1 = filter_iter.next().await.unwrap();\n            let o2 = expect_iter.next();\n            assert_eq!(o1.is_some(), o2.is_some());\n            if o1.is_none() && o2.is_none() {\n                break;\n            }\n            let (k1, l1, v1) = o1.unwrap();\n            let (k2, l2, v2) = o2.unwrap();\n            assert_eq!(&k1, k2);\n            assert_eq!(l1, *l2);\n            assert_eq!(&v1, v2);\n        }\n    }\n\n    #[tokio::test]\n    async fn filter_keyspace_iterator() {\n        use bytes::Bytes;\n\n        let harness = TenantHarness::create(\"filter_iterator_filter_keyspace_iterator\")\n            .await\n            .unwrap();\n        let (tenant, ctx) = harness.load().await;\n\n        let tline = tenant\n            .create_test_timeline(TIMELINE_ID, Lsn(0x10), DEFAULT_PG_VERSION, &ctx)\n            .await\n            .unwrap();\n\n        fn get_key(id: u32) -> Key {\n            let mut key = Key::from_hex(\"000000000033333333444444445500000000\").unwrap();\n            key.field6 = id;\n            key\n        }\n        const N: usize = 100;\n        let test_deltas1 = (0..N)\n            .map(|idx| {\n                (\n                    get_key(idx as u32),\n                    Lsn(0x20 * ((idx as u64) % 10 + 1)),\n                    Value::Image(Bytes::from(format!(\"img{idx:05}\"))),\n                )\n            })\n            .collect_vec();\n        let resident_layer_1 = produce_delta_layer(&tenant, &tline, test_deltas1.clone(), &ctx)\n            .await\n            .unwrap();\n\n        let merge_iter = MergeIterator::create_for_testing(\n            &[resident_layer_1.get_as_delta(&ctx).await.unwrap()],\n            &[],\n            &ctx,\n        );\n\n        let mut filter_iter = FilterIterator::create(\n            merge_iter,\n            KeySpace {\n                ranges: vec![\n                    get_key(5)..get_key(10),\n                    get_key(20)..get_key(30),\n                    get_key(90)..get_key(110),\n                    get_key(1000)..get_key(2000),\n                ],\n            },\n            SparseKeySpace(KeySpace::default()),\n        )\n        .unwrap();\n        let mut result = Vec::new();\n        result.extend(test_deltas1[5..10].iter().cloned());\n        result.extend(test_deltas1[20..30].iter().cloned());\n        result.extend(test_deltas1[90..100].iter().cloned());\n        assert_filter_iter_equal(&mut filter_iter, &result).await;\n\n        let merge_iter = MergeIterator::create_for_testing(\n            &[resident_layer_1.get_as_delta(&ctx).await.unwrap()],\n            &[],\n            &ctx,\n        );\n\n        let mut filter_iter = FilterIterator::create(\n            merge_iter,\n            KeySpace {\n                ranges: vec![\n                    get_key(0)..get_key(10),\n                    get_key(20)..get_key(30),\n                    get_key(90)..get_key(95),\n                ],\n            },\n            SparseKeySpace(KeySpace::default()),\n        )\n        .unwrap();\n        let mut result = Vec::new();\n        result.extend(test_deltas1[0..10].iter().cloned());\n        result.extend(test_deltas1[20..30].iter().cloned());\n        result.extend(test_deltas1[90..95].iter().cloned());\n        assert_filter_iter_equal(&mut filter_iter, &result).await;\n    }\n}\n"
  },
  {
    "path": "pageserver/src/tenant/storage_layer/image_layer.rs",
    "content": "//! An ImageLayer represents an image or a snapshot of a key-range at\n//! one particular LSN.\n//!\n//! It contains an image of all key-value pairs in its key-range. Any key\n//! that falls into the image layer's range but does not exist in the layer,\n//! does not exist.\n//!\n//! An image layer is stored in a file on disk. The file is stored in\n//! timelines/<timeline_id> directory.  Currently, there are no\n//! subdirectories, and each image layer file is named like this:\n//!\n//! ```text\n//!    <key start>-<key end>__<LSN>\n//! ```\n//!\n//! For example:\n//!\n//! ```text\n//!    000000067F000032BE0000400000000070B6-000000067F000032BE0000400000000080B6__00000000346BC568\n//! ```\n//!\n//! Every image layer file consists of three parts: \"summary\",\n//! \"index\", and \"values\".  The summary is a fixed size header at the\n//! beginning of the file, and it contains basic information about the\n//! layer, and offsets to the other parts. The \"index\" is a B-tree,\n//! mapping from Key to an offset in the \"values\" part.  The\n//! actual page images are stored in the \"values\" part.\nuse std::collections::{HashMap, VecDeque};\nuse std::fs::File;\nuse std::ops::Range;\nuse std::os::unix::prelude::FileExt;\nuse std::str::FromStr;\nuse std::sync::Arc;\nuse std::sync::atomic::AtomicU64;\n\nuse anyhow::{Context, Result, bail, ensure};\nuse bytes::Bytes;\nuse camino::{Utf8Path, Utf8PathBuf};\nuse hex;\nuse itertools::Itertools;\nuse pageserver_api::config::MaxVectoredReadBytes;\nuse pageserver_api::key::{DBDIR_KEY, KEY_SIZE, Key};\nuse pageserver_api::keyspace::KeySpace;\nuse pageserver_api::shard::{ShardIdentity, TenantShardId};\nuse serde::{Deserialize, Serialize};\nuse tokio::sync::OnceCell;\nuse tokio_stream::StreamExt;\nuse tokio_util::sync::CancellationToken;\nuse tracing::*;\nuse utils::bin_ser::BeSer;\nuse utils::bin_ser::SerializeError;\nuse utils::id::{TenantId, TimelineId};\nuse utils::lsn::Lsn;\nuse wal_decoder::models::value::Value;\n\nuse super::errors::PutError;\nuse super::layer_name::ImageLayerName;\nuse super::{\n    AsLayerDesc, LayerName, OnDiskValue, OnDiskValueIo, PersistentLayerDesc, ResidentLayer,\n    ValuesReconstructState,\n};\nuse crate::config::PageServerConf;\nuse crate::context::{PageContentKind, RequestContext, RequestContextBuilder};\nuse crate::page_cache::{self, FileId, PAGE_SZ};\nuse crate::tenant::blob_io::BlobWriter;\nuse crate::tenant::block_io::{BlockBuf, FileBlockReader};\nuse crate::tenant::disk_btree::{\n    DiskBtreeBuilder, DiskBtreeIterator, DiskBtreeReader, VisitDirection,\n};\nuse crate::tenant::timeline::GetVectoredError;\nuse crate::tenant::vectored_blob_io::{\n    BlobFlag, BufView, StreamingVectoredReadPlanner, VectoredBlobReader, VectoredRead,\n    VectoredReadPlanner,\n};\nuse crate::virtual_file::TempVirtualFile;\nuse crate::virtual_file::owned_buffers_io::io_buf_ext::IoBufExt;\nuse crate::virtual_file::owned_buffers_io::write::{Buffer, BufferedWriterShutdownMode};\nuse crate::virtual_file::{self, IoBuffer, IoBufferMut, MaybeFatalIo, VirtualFile};\nuse crate::{IMAGE_FILE_MAGIC, STORAGE_FORMAT_VERSION, TEMP_FILE_SUFFIX};\n\n///\n/// Header stored in the beginning of the file\n///\n/// After this comes the 'values' part, starting on block 1. After that,\n/// the 'index' starts at the block indicated by 'index_start_blk'\n///\n#[derive(Debug, Serialize, Deserialize, PartialEq, Eq)]\npub struct Summary {\n    /// Magic value to identify this as a neon image file. Always IMAGE_FILE_MAGIC.\n    pub magic: u16,\n    pub format_version: u16,\n\n    pub tenant_id: TenantId,\n    pub timeline_id: TimelineId,\n    pub key_range: Range<Key>,\n    pub lsn: Lsn,\n\n    /// Block number where the 'index' part of the file begins.\n    pub index_start_blk: u32,\n    /// Block within the 'index', where the B-tree root page is stored\n    pub index_root_blk: u32,\n    // the 'values' part starts after the summary header, on block 1.\n}\n\nimpl From<&ImageLayer> for Summary {\n    fn from(layer: &ImageLayer) -> Self {\n        Self::expected(\n            layer.desc.tenant_shard_id.tenant_id,\n            layer.desc.timeline_id,\n            layer.desc.key_range.clone(),\n            layer.lsn,\n        )\n    }\n}\n\nimpl Summary {\n    /// Serializes the summary header into an aligned buffer of lenth `PAGE_SZ`.\n    pub fn ser_into_page(&self) -> Result<IoBuffer, SerializeError> {\n        let mut buf = IoBufferMut::with_capacity(PAGE_SZ);\n        Self::ser_into(self, &mut buf)?;\n        // Pad zeroes to the buffer so the length is a multiple of the alignment.\n        buf.extend_with(0, buf.capacity() - buf.len());\n        Ok(buf.freeze())\n    }\n\n    pub(super) fn expected(\n        tenant_id: TenantId,\n        timeline_id: TimelineId,\n        key_range: Range<Key>,\n        lsn: Lsn,\n    ) -> Self {\n        Self {\n            magic: IMAGE_FILE_MAGIC,\n            format_version: STORAGE_FORMAT_VERSION,\n            tenant_id,\n            timeline_id,\n            key_range,\n            lsn,\n\n            index_start_blk: 0,\n            index_root_blk: 0,\n        }\n    }\n}\n\n/// This is used only from `pagectl`. Within pageserver, all layers are\n/// [`crate::tenant::storage_layer::Layer`], which can hold an [`ImageLayerInner`].\npub struct ImageLayer {\n    path: Utf8PathBuf,\n    pub desc: PersistentLayerDesc,\n    // This entry contains an image of all pages as of this LSN, should be the same as desc.lsn\n    pub lsn: Lsn,\n    inner: OnceCell<ImageLayerInner>,\n}\n\nimpl std::fmt::Debug for ImageLayer {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        use super::RangeDisplayDebug;\n\n        f.debug_struct(\"ImageLayer\")\n            .field(\"key_range\", &RangeDisplayDebug(&self.desc.key_range))\n            .field(\"file_size\", &self.desc.file_size)\n            .field(\"lsn\", &self.lsn)\n            .field(\"inner\", &self.inner)\n            .finish()\n    }\n}\n\n/// ImageLayer is the in-memory data structure associated with an on-disk image\n/// file.\npub struct ImageLayerInner {\n    // values copied from summary\n    index_start_blk: u32,\n    index_root_blk: u32,\n\n    key_range: Range<Key>,\n    lsn: Lsn,\n\n    file: Arc<VirtualFile>,\n    file_id: FileId,\n\n    max_vectored_read_bytes: Option<MaxVectoredReadBytes>,\n}\n\nimpl ImageLayerInner {\n    pub(crate) fn layer_dbg_info(&self) -> String {\n        format!(\n            \"image {}..{} {}\",\n            self.key_range().start,\n            self.key_range().end,\n            self.lsn()\n        )\n    }\n}\n\nimpl std::fmt::Debug for ImageLayerInner {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        f.debug_struct(\"ImageLayerInner\")\n            .field(\"index_start_blk\", &self.index_start_blk)\n            .field(\"index_root_blk\", &self.index_root_blk)\n            .finish()\n    }\n}\n\nimpl ImageLayerInner {\n    pub(super) async fn dump(&self, ctx: &RequestContext) -> anyhow::Result<()> {\n        let block_reader = FileBlockReader::new(&self.file, self.file_id);\n        let tree_reader = DiskBtreeReader::<_, KEY_SIZE>::new(\n            self.index_start_blk,\n            self.index_root_blk,\n            block_reader,\n        );\n\n        tree_reader.dump(ctx).await?;\n\n        tree_reader\n            .visit(\n                &[0u8; KEY_SIZE],\n                VisitDirection::Forwards,\n                |key, value| {\n                    println!(\"key: {} offset {}\", hex::encode(key), value);\n                    true\n                },\n                ctx,\n            )\n            .await?;\n\n        Ok(())\n    }\n}\n\n/// Boilerplate to implement the Layer trait, always use layer_desc for persistent layers.\nimpl std::fmt::Display for ImageLayer {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        write!(f, \"{}\", self.layer_desc().short_id())\n    }\n}\n\nimpl AsLayerDesc for ImageLayer {\n    fn layer_desc(&self) -> &PersistentLayerDesc {\n        &self.desc\n    }\n}\n\nimpl ImageLayer {\n    pub async fn dump(&self, verbose: bool, ctx: &RequestContext) -> Result<()> {\n        self.desc.dump();\n\n        if !verbose {\n            return Ok(());\n        }\n\n        let inner = self.load(ctx).await?;\n\n        inner.dump(ctx).await?;\n\n        Ok(())\n    }\n\n    fn temp_path_for(\n        conf: &PageServerConf,\n        timeline_id: TimelineId,\n        tenant_shard_id: TenantShardId,\n        fname: &ImageLayerName,\n    ) -> Utf8PathBuf {\n        // TempVirtualFile requires us to never reuse a filename while an old\n        // instance of TempVirtualFile created with that filename is not done dropping yet.\n        // So, we use a monotonic counter to disambiguate the filenames.\n        static NEXT_TEMP_DISAMBIGUATOR: AtomicU64 = AtomicU64::new(1);\n        let filename_disambiguator =\n            NEXT_TEMP_DISAMBIGUATOR.fetch_add(1, std::sync::atomic::Ordering::Relaxed);\n\n        conf.timeline_path(&tenant_shard_id, &timeline_id)\n            .join(format!(\n                \"{fname}.{filename_disambiguator:x}.{TEMP_FILE_SUFFIX}\"\n            ))\n    }\n\n    ///\n    /// Open the underlying file and read the metadata into memory, if it's\n    /// not loaded already.\n    ///\n    async fn load(&self, ctx: &RequestContext) -> Result<&ImageLayerInner> {\n        self.inner\n            .get_or_try_init(|| self.load_inner(ctx))\n            .await\n            .with_context(|| format!(\"Failed to load image layer {}\", self.path()))\n    }\n\n    async fn load_inner(&self, ctx: &RequestContext) -> Result<ImageLayerInner> {\n        let path = self.path();\n\n        let loaded =\n            ImageLayerInner::load(&path, self.desc.image_layer_lsn(), None, None, ctx).await?;\n\n        // not production code\n        let actual_layer_name = LayerName::from_str(path.file_name().unwrap()).unwrap();\n        let expected_layer_name = self.layer_desc().layer_name();\n\n        if actual_layer_name != expected_layer_name {\n            println!(\"warning: filename does not match what is expected from in-file summary\");\n            println!(\"actual: {:?}\", actual_layer_name.to_string());\n            println!(\"expected: {:?}\", expected_layer_name.to_string());\n        }\n\n        Ok(loaded)\n    }\n\n    /// Create an ImageLayer struct representing an existing file on disk.\n    ///\n    /// This variant is only used for debugging purposes, by the 'pagectl' binary.\n    pub fn new_for_path(path: &Utf8Path, file: File) -> Result<ImageLayer> {\n        let mut summary_buf = vec![0; PAGE_SZ];\n        file.read_exact_at(&mut summary_buf, 0)?;\n        let summary = Summary::des_prefix(&summary_buf)?;\n        let metadata = file\n            .metadata()\n            .context(\"get file metadata to determine size\")?;\n\n        // This function is never used for constructing layers in a running pageserver,\n        // so it does not need an accurate TenantShardId.\n        let tenant_shard_id = TenantShardId::unsharded(summary.tenant_id);\n\n        Ok(ImageLayer {\n            path: path.to_path_buf(),\n            desc: PersistentLayerDesc::new_img(\n                tenant_shard_id,\n                summary.timeline_id,\n                summary.key_range,\n                summary.lsn,\n                metadata.len(),\n            ), // Now we assume image layer ALWAYS covers the full range. This may change in the future.\n            lsn: summary.lsn,\n            inner: OnceCell::new(),\n        })\n    }\n\n    fn path(&self) -> Utf8PathBuf {\n        self.path.clone()\n    }\n}\n\n#[derive(thiserror::Error, Debug)]\npub enum RewriteSummaryError {\n    #[error(\"magic mismatch\")]\n    MagicMismatch,\n    #[error(transparent)]\n    Other(#[from] anyhow::Error),\n}\n\nimpl From<std::io::Error> for RewriteSummaryError {\n    fn from(e: std::io::Error) -> Self {\n        Self::Other(anyhow::anyhow!(e))\n    }\n}\n\nimpl ImageLayer {\n    pub async fn rewrite_summary<F>(\n        path: &Utf8Path,\n        rewrite: F,\n        ctx: &RequestContext,\n    ) -> Result<(), RewriteSummaryError>\n    where\n        F: Fn(Summary) -> Summary,\n    {\n        let file = VirtualFile::open_with_options_v2(\n            path,\n            virtual_file::OpenOptions::new().read(true).write(true),\n            ctx,\n        )\n        .await\n        .with_context(|| format!(\"Failed to open file '{path}'\"))?;\n        let file_id = page_cache::next_file_id();\n        let block_reader = FileBlockReader::new(&file, file_id);\n        let summary_blk = block_reader.read_blk(0, ctx).await?;\n        let actual_summary = Summary::des_prefix(summary_blk.as_ref()).context(\"deserialize\")?;\n        if actual_summary.magic != IMAGE_FILE_MAGIC {\n            return Err(RewriteSummaryError::MagicMismatch);\n        }\n\n        let new_summary = rewrite(actual_summary);\n\n        let buf = new_summary.ser_into_page().context(\"serialize\")?;\n        let (_buf, res) = file.write_all_at(buf.slice_len(), 0, ctx).await;\n        res?;\n        Ok(())\n    }\n}\n\nimpl ImageLayerInner {\n    pub(crate) fn key_range(&self) -> &Range<Key> {\n        &self.key_range\n    }\n\n    pub(crate) fn lsn(&self) -> Lsn {\n        self.lsn\n    }\n\n    pub(super) async fn load(\n        path: &Utf8Path,\n        lsn: Lsn,\n        summary: Option<Summary>,\n        max_vectored_read_bytes: Option<MaxVectoredReadBytes>,\n        ctx: &RequestContext,\n    ) -> anyhow::Result<Self> {\n        let file = Arc::new(\n            VirtualFile::open_v2(path, ctx)\n                .await\n                .context(\"open layer file\")?,\n        );\n        let file_id = page_cache::next_file_id();\n        let block_reader = FileBlockReader::new(&file, file_id);\n        let summary_blk = block_reader\n            .read_blk(0, ctx)\n            .await\n            .context(\"read first block\")?;\n\n        // length is the only way how this could fail, so it's not actually likely at all unless\n        // read_blk returns wrong sized block.\n        //\n        // TODO: confirm and make this into assertion\n        let actual_summary =\n            Summary::des_prefix(summary_blk.as_ref()).context(\"deserialize first block\")?;\n\n        if let Some(mut expected_summary) = summary {\n            // production code path\n            expected_summary.index_start_blk = actual_summary.index_start_blk;\n            expected_summary.index_root_blk = actual_summary.index_root_blk;\n            // mask out the timeline_id, but still require the layers to be from the same tenant\n            expected_summary.timeline_id = actual_summary.timeline_id;\n\n            if actual_summary != expected_summary {\n                bail!(\n                    \"in-file summary does not match expected summary. actual = {:?} expected = {:?}\",\n                    actual_summary,\n                    expected_summary\n                );\n            }\n        }\n\n        Ok(ImageLayerInner {\n            index_start_blk: actual_summary.index_start_blk,\n            index_root_blk: actual_summary.index_root_blk,\n            lsn,\n            file,\n            file_id,\n            max_vectored_read_bytes,\n            key_range: actual_summary.key_range,\n        })\n    }\n\n    // Look up the keys in the provided keyspace and update\n    // the reconstruct state with whatever is found.\n    pub(super) async fn get_values_reconstruct_data(\n        &self,\n        this: ResidentLayer,\n        keyspace: KeySpace,\n        reconstruct_state: &mut ValuesReconstructState,\n        ctx: &RequestContext,\n    ) -> Result<(), GetVectoredError> {\n        let reads = self\n            .plan_reads(keyspace, None, ctx)\n            .await\n            .map_err(GetVectoredError::Other)?;\n\n        self.do_reads_and_update_state(this, reads, reconstruct_state, ctx)\n            .await;\n\n        reconstruct_state.on_image_layer_visited(&self.key_range);\n\n        Ok(())\n    }\n\n    /// Traverse the layer's index to build read operations on the overlap of the input keyspace\n    /// and the keys in this layer.\n    ///\n    /// If shard_identity is provided, it will be used to filter keys down to those stored on\n    /// this shard.\n    async fn plan_reads(\n        &self,\n        keyspace: KeySpace,\n        shard_identity: Option<&ShardIdentity>,\n        ctx: &RequestContext,\n    ) -> anyhow::Result<Vec<VectoredRead>> {\n        let mut planner = VectoredReadPlanner::new(\n            self.max_vectored_read_bytes\n                .expect(\"Layer is loaded with max vectored bytes config\")\n                .0\n                .into(),\n        );\n\n        let block_reader = FileBlockReader::new(&self.file, self.file_id);\n        let tree_reader =\n            DiskBtreeReader::new(self.index_start_blk, self.index_root_blk, block_reader);\n\n        let ctx = RequestContextBuilder::from(ctx)\n            .page_content_kind(PageContentKind::ImageLayerBtreeNode)\n            .attached_child();\n\n        for range in keyspace.ranges.iter() {\n            let mut range_end_handled = false;\n            let mut search_key: [u8; KEY_SIZE] = [0u8; KEY_SIZE];\n            range.start.write_to_byte_slice(&mut search_key);\n\n            let index_stream = tree_reader.clone().into_stream(&search_key, &ctx);\n            let mut index_stream = std::pin::pin!(index_stream);\n\n            while let Some(index_entry) = index_stream.next().await {\n                let (raw_key, offset) = index_entry?;\n\n                let key = Key::from_slice(&raw_key[..KEY_SIZE]);\n                assert!(key >= range.start);\n\n                let flag = if let Some(shard_identity) = shard_identity {\n                    if shard_identity.is_key_disposable(&key) {\n                        BlobFlag::Ignore\n                    } else {\n                        BlobFlag::None\n                    }\n                } else {\n                    BlobFlag::None\n                };\n\n                if key >= range.end {\n                    planner.handle_range_end(offset);\n                    range_end_handled = true;\n                    break;\n                } else {\n                    planner.handle(key, self.lsn, offset, flag);\n                }\n            }\n\n            if !range_end_handled {\n                let payload_end = self.index_start_blk as u64 * PAGE_SZ as u64;\n                planner.handle_range_end(payload_end);\n            }\n        }\n\n        Ok(planner.finish())\n    }\n\n    /// Given a key range, select the parts of that range that should be retained by the ShardIdentity,\n    /// then execute vectored GET operations, passing the results of all read keys into the writer.\n    pub(super) async fn filter(\n        &self,\n        shard_identity: &ShardIdentity,\n        writer: &mut ImageLayerWriter,\n        ctx: &RequestContext,\n    ) -> anyhow::Result<usize> {\n        // Fragment the range into the regions owned by this ShardIdentity\n        let plan = self\n            .plan_reads(\n                KeySpace {\n                    // If asked for the total key space, plan_reads will give us all the keys in the layer\n                    ranges: vec![Key::MIN..Key::MAX],\n                },\n                Some(shard_identity),\n                ctx,\n            )\n            .await?;\n\n        let vectored_blob_reader = VectoredBlobReader::new(&self.file);\n        let mut key_count = 0;\n        for read in plan.into_iter() {\n            let buf_size = read.size();\n\n            let buf = IoBufferMut::with_capacity(buf_size);\n            let blobs_buf = vectored_blob_reader.read_blobs(&read, buf, ctx).await?;\n\n            let view = BufView::new_slice(&blobs_buf.buf);\n\n            for meta in blobs_buf.blobs.iter() {\n                // Just read the raw header+data and pass it through to the target layer, without\n                // decoding and recompressing it.\n                let raw = meta.raw_with_header(&view);\n                key_count += 1;\n                writer\n                    .put_image_raw(meta.meta.key, raw.into_bytes(), ctx)\n                    .await\n                    .context(format!(\"Storing key {}\", meta.meta.key))?;\n            }\n        }\n\n        Ok(key_count)\n    }\n\n    async fn do_reads_and_update_state(\n        &self,\n        this: ResidentLayer,\n        reads: Vec<VectoredRead>,\n        reconstruct_state: &mut ValuesReconstructState,\n        ctx: &RequestContext,\n    ) {\n        let max_vectored_read_bytes = self\n            .max_vectored_read_bytes\n            .expect(\"Layer is loaded with max vectored bytes config\")\n            .0\n            .into();\n\n        for read in reads.into_iter() {\n            let mut ios: HashMap<(Key, Lsn), OnDiskValueIo> = Default::default();\n            for (_, blob_meta) in read.blobs_at.as_slice() {\n                let io = reconstruct_state.update_key(&blob_meta.key, blob_meta.lsn, true);\n                ios.insert((blob_meta.key, blob_meta.lsn), io);\n            }\n\n            let buf_size = read.size();\n\n            if buf_size > max_vectored_read_bytes {\n                // If the read is oversized, it should only contain one key.\n                let offenders = read\n                    .blobs_at\n                    .as_slice()\n                    .iter()\n                    .filter_map(|(_, blob_meta)| {\n                        if blob_meta.key.is_rel_dir_key()\n                            || blob_meta.key == DBDIR_KEY\n                            || blob_meta.key.is_aux_file_key()\n                        {\n                            // The size of values for these keys is unbounded and can\n                            // grow very large in pathological cases.\n                            None\n                        } else {\n                            Some(format!(\"{}@{}\", blob_meta.key, blob_meta.lsn))\n                        }\n                    })\n                    .join(\", \");\n\n                if !offenders.is_empty() {\n                    tracing::warn!(\n                        \"Oversized vectored read ({} > {}) for keys {}\",\n                        buf_size,\n                        max_vectored_read_bytes,\n                        offenders\n                    );\n                }\n            }\n\n            let read_extend_residency = this.clone();\n            let read_from = self.file.clone();\n            let read_ctx = ctx.attached_child();\n            reconstruct_state\n                .spawn_io(async move {\n                    let buf = IoBufferMut::with_capacity(buf_size);\n                    let vectored_blob_reader = VectoredBlobReader::new(&read_from);\n                    let res = vectored_blob_reader.read_blobs(&read, buf, &read_ctx).await;\n\n                    match res {\n                        Ok(blobs_buf) => {\n                            let view = BufView::new_slice(&blobs_buf.buf);\n                            for meta in blobs_buf.blobs.iter() {\n                                let io: OnDiskValueIo =\n                                    ios.remove(&(meta.meta.key, meta.meta.lsn)).unwrap();\n                                let img_buf = meta.read(&view).await;\n\n                                let img_buf = match img_buf {\n                                    Ok(img_buf) => img_buf,\n                                    Err(e) => {\n                                        io.complete(Err(e));\n                                        continue;\n                                    }\n                                };\n\n                                io.complete(Ok(OnDiskValue::RawImage(img_buf.into_bytes())));\n                            }\n\n                            assert!(ios.is_empty());\n                        }\n                        Err(err) => {\n                            for (_, io) in ios {\n                                io.complete(Err(std::io::Error::new(\n                                    err.kind(),\n                                    \"vec read failed\",\n                                )));\n                            }\n                        }\n                    }\n\n                    // keep layer resident until this IO is done; this spawned IO future generally outlives the\n                    // call to `self` / the `Arc<DownloadedLayer>` / the `ResidentLayer` that guarantees residency\n                    drop(read_extend_residency);\n                })\n                .await;\n        }\n    }\n\n    pub(crate) fn iter_with_options<'a>(\n        &'a self,\n        ctx: &'a RequestContext,\n        max_read_size: u64,\n        max_batch_size: usize,\n    ) -> ImageLayerIterator<'a> {\n        let block_reader = FileBlockReader::new(&self.file, self.file_id);\n        let tree_reader =\n            DiskBtreeReader::new(self.index_start_blk, self.index_root_blk, block_reader);\n        ImageLayerIterator {\n            image_layer: self,\n            ctx,\n            index_iter: tree_reader.iter(&[0; KEY_SIZE], ctx),\n            key_values_batch: VecDeque::new(),\n            is_end: false,\n            planner: StreamingVectoredReadPlanner::new(max_read_size, max_batch_size),\n        }\n    }\n\n    /// NB: not super efficient, but not terrible either. Should prob be an iterator.\n    //\n    // We're reusing the index traversal logical in plan_reads; would be nice to\n    // factor that out.\n    pub(crate) async fn load_keys(&self, ctx: &RequestContext) -> anyhow::Result<Vec<Key>> {\n        let plan = self\n            .plan_reads(KeySpace::single(self.key_range.clone()), None, ctx)\n            .await?;\n        Ok(plan\n            .into_iter()\n            .flat_map(|read| read.blobs_at)\n            .map(|(_, blob_meta)| blob_meta.key)\n            .collect())\n    }\n}\n\n/// A builder object for constructing a new image layer.\n///\n/// Usage:\n///\n/// 1. Create the ImageLayerWriter by calling ImageLayerWriter::new(...)\n///\n/// 2. Write the contents by calling `put_page_image` for every key-value\n///    pair in the key range.\n///\n/// 3. Call `finish`.\n///\nstruct ImageLayerWriterInner {\n    conf: &'static PageServerConf,\n    path: Utf8PathBuf,\n    timeline_id: TimelineId,\n    tenant_shard_id: TenantShardId,\n    key_range: Range<Key>,\n    lsn: Lsn,\n\n    // Total uncompressed bytes passed into put_image\n    uncompressed_bytes: u64,\n\n    // Like `uncompressed_bytes`,\n    // but only of images we might consider for compression\n    uncompressed_bytes_eligible: u64,\n\n    // Like `uncompressed_bytes`, but only of images\n    // where we have chosen their compressed form\n    uncompressed_bytes_chosen: u64,\n\n    // Number of keys in the layer.\n    num_keys: usize,\n\n    blob_writer: BlobWriter<TempVirtualFile>,\n    tree: DiskBtreeBuilder<BlockBuf, KEY_SIZE>,\n\n    #[cfg(feature = \"testing\")]\n    last_written_key: Key,\n}\n\nimpl ImageLayerWriterInner {\n    ///\n    /// Start building a new image layer.\n    ///\n    #[allow(clippy::too_many_arguments)]\n    async fn new(\n        conf: &'static PageServerConf,\n        timeline_id: TimelineId,\n        tenant_shard_id: TenantShardId,\n        key_range: &Range<Key>,\n        lsn: Lsn,\n        gate: &utils::sync::gate::Gate,\n        cancel: CancellationToken,\n        ctx: &RequestContext,\n    ) -> anyhow::Result<Self> {\n        // Create the file initially with a temporary filename.\n        // We'll atomically rename it to the final name when we're done.\n        let path = ImageLayer::temp_path_for(\n            conf,\n            timeline_id,\n            tenant_shard_id,\n            &ImageLayerName {\n                key_range: key_range.clone(),\n                lsn,\n            },\n        );\n        trace!(\"creating image layer {}\", path);\n        let file = TempVirtualFile::new(\n            VirtualFile::open_with_options_v2(\n                &path,\n                virtual_file::OpenOptions::new()\n                    .create_new(true)\n                    .write(true),\n                ctx,\n            )\n            .await?,\n            gate.enter()?,\n        );\n\n        // Start at `PAGE_SZ` to make room for the header block.\n        let blob_writer = BlobWriter::new(\n            file,\n            PAGE_SZ as u64,\n            gate,\n            cancel,\n            ctx,\n            info_span!(parent: None, \"image_layer_writer_flush_task\", tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(), timeline_id=%timeline_id, path = %path),\n        )?;\n\n        // Initialize the b-tree index builder\n        let block_buf = BlockBuf::new();\n        let tree_builder = DiskBtreeBuilder::new(block_buf);\n\n        let writer = Self {\n            conf,\n            path,\n            timeline_id,\n            tenant_shard_id,\n            key_range: key_range.clone(),\n            lsn,\n            tree: tree_builder,\n            blob_writer,\n            uncompressed_bytes: 0,\n            uncompressed_bytes_eligible: 0,\n            uncompressed_bytes_chosen: 0,\n            num_keys: 0,\n            #[cfg(feature = \"testing\")]\n            last_written_key: Key::MIN,\n        };\n\n        Ok(writer)\n    }\n\n    ///\n    /// Write next value to the file.\n    ///\n    /// The page versions must be appended in blknum order.\n    ///\n    async fn put_image(\n        &mut self,\n        key: Key,\n        img: Bytes,\n        ctx: &RequestContext,\n    ) -> Result<(), PutError> {\n        if !self.key_range.contains(&key) {\n            return Err(PutError::Other(anyhow::anyhow!(\n                \"key {:?} not in range {:?}\",\n                key,\n                self.key_range\n            )));\n        }\n        let compression = self.conf.image_compression;\n        let uncompressed_len = img.len() as u64;\n        self.uncompressed_bytes += uncompressed_len;\n        self.num_keys += 1;\n        let (_img, res) = self\n            .blob_writer\n            .write_blob_maybe_compressed(img.slice_len(), ctx, compression)\n            .await;\n        // TODO: re-use the buffer for `img` further upstack\n        let (off, compression_info) = res.map_err(PutError::WriteBlob)?;\n        if compression_info.compressed_size.is_some() {\n            // The image has been considered for compression at least\n            self.uncompressed_bytes_eligible += uncompressed_len;\n        }\n        if compression_info.written_compressed {\n            // The image has been compressed\n            self.uncompressed_bytes_chosen += uncompressed_len;\n        }\n\n        let mut keybuf: [u8; KEY_SIZE] = [0u8; KEY_SIZE];\n        key.write_to_byte_slice(&mut keybuf);\n        self.tree\n            .append(&keybuf, off)\n            .map_err(anyhow::Error::new)\n            .map_err(PutError::Other)?;\n\n        #[cfg(feature = \"testing\")]\n        {\n            self.last_written_key = key;\n        }\n\n        Ok(())\n    }\n\n    ///\n    /// Write the next image to the file, as a raw blob header and data.\n    ///\n    /// The page versions must be appended in blknum order.\n    ///\n    async fn put_image_raw(\n        &mut self,\n        key: Key,\n        raw_with_header: Bytes,\n        ctx: &RequestContext,\n    ) -> anyhow::Result<()> {\n        ensure!(self.key_range.contains(&key));\n\n        // NB: we don't update the (un)compressed metrics, since we can't determine them without\n        // decompressing the image. This seems okay.\n        self.num_keys += 1;\n\n        let (_, res) = self\n            .blob_writer\n            .write_blob_raw(raw_with_header.slice_len(), ctx)\n            .await;\n        let offset = res?;\n\n        let mut keybuf: [u8; KEY_SIZE] = [0u8; KEY_SIZE];\n        key.write_to_byte_slice(&mut keybuf);\n        self.tree.append(&keybuf, offset)?;\n\n        #[cfg(feature = \"testing\")]\n        {\n            self.last_written_key = key;\n        }\n\n        Ok(())\n    }\n\n    ///\n    /// Finish writing the image layer.\n    ///\n    async fn finish(\n        self,\n        ctx: &RequestContext,\n        end_key: Option<Key>,\n    ) -> anyhow::Result<(PersistentLayerDesc, Utf8PathBuf)> {\n        let index_start_blk = self.blob_writer.size().div_ceil(PAGE_SZ as u64) as u32;\n\n        // Calculate compression ratio\n        let compressed_size = self.blob_writer.size() - PAGE_SZ as u64; // Subtract PAGE_SZ for header\n        crate::metrics::COMPRESSION_IMAGE_INPUT_BYTES.inc_by(self.uncompressed_bytes);\n        crate::metrics::COMPRESSION_IMAGE_INPUT_BYTES_CONSIDERED\n            .inc_by(self.uncompressed_bytes_eligible);\n        crate::metrics::COMPRESSION_IMAGE_INPUT_BYTES_CHOSEN.inc_by(self.uncompressed_bytes_chosen);\n\n        // NB: filter() may pass through raw pages from a different layer, without looking at\n        // whether these are compressed or not. We don't track metrics for these, so avoid\n        // increasing `COMPRESSION_IMAGE_OUTPUT_BYTES` in this case too.\n        if self.uncompressed_bytes > 0 {\n            crate::metrics::COMPRESSION_IMAGE_OUTPUT_BYTES.inc_by(compressed_size);\n        };\n\n        let file = self\n            .blob_writer\n            .shutdown(\n                BufferedWriterShutdownMode::ZeroPadToNextMultiple(PAGE_SZ),\n                ctx,\n            )\n            .await?;\n\n        // Write out the index\n        let mut offset = index_start_blk as u64 * PAGE_SZ as u64;\n        let (index_root_blk, block_buf) = self.tree.finish()?;\n\n        // TODO(yuchen): https://github.com/neondatabase/neon/issues/10092\n        // Should we just replace BlockBuf::blocks with one big buffer?\n        for buf in block_buf.blocks {\n            let (_buf, res) = file.write_all_at(buf.slice_len(), offset, ctx).await;\n            res?;\n            offset += PAGE_SZ as u64;\n        }\n\n        let final_key_range = if let Some(end_key) = end_key {\n            self.key_range.start..end_key\n        } else {\n            self.key_range.clone()\n        };\n\n        // Fill in the summary on blk 0\n        let summary = Summary {\n            magic: IMAGE_FILE_MAGIC,\n            format_version: STORAGE_FORMAT_VERSION,\n            tenant_id: self.tenant_shard_id.tenant_id,\n            timeline_id: self.timeline_id,\n            key_range: final_key_range.clone(),\n            lsn: self.lsn,\n            index_start_blk,\n            index_root_blk,\n        };\n\n        // Writes summary at the first block (offset 0).\n        let buf = summary.ser_into_page()?;\n        let (_buf, res) = file.write_all_at(buf.slice_len(), 0, ctx).await;\n        res?;\n\n        let metadata = file\n            .metadata()\n            .await\n            .context(\"get metadata to determine file size\")?;\n\n        let desc = PersistentLayerDesc::new_img(\n            self.tenant_shard_id,\n            self.timeline_id,\n            final_key_range,\n            self.lsn,\n            metadata.len(),\n        );\n\n        #[cfg(feature = \"testing\")]\n        if let Some(end_key) = end_key {\n            assert!(\n                self.last_written_key < end_key,\n                \"written key violates end_key range\"\n            );\n        }\n\n        // Note: Because we open the file in write-only mode, we cannot\n        // reuse the same VirtualFile for reading later. That's why we don't\n        // set inner.file here. The first read will have to re-open it.\n\n        // fsync the file\n        file.sync_all()\n            .await\n            .maybe_fatal_err(\"image_layer sync_all\")?;\n\n        trace!(\"created image layer {}\", self.path);\n\n        // The gate guard stored in `destination_file` is dropped. Callers (e.g.. flush loop or compaction)\n        // keep the gate open also, so that it's safe for them to rename the file to its final destination.\n        file.disarm_into_inner();\n\n        Ok((desc, self.path))\n    }\n}\n\n/// A builder object for constructing a new image layer.\n///\n/// Usage:\n///\n/// 1. Create the ImageLayerWriter by calling ImageLayerWriter::new(...)\n///\n/// 2. Write the contents by calling `put_page_image` for every key-value\n///    pair in the key range.\n///\n/// 3. Call `finish`.\n///\n/// # Note\n///\n/// As described in <https://github.com/neondatabase/neon/issues/2650>, it's\n/// possible for the writer to drop before `finish` is actually called. So this\n/// could lead to odd temporary files in the directory, exhausting file system.\n/// This structure wraps `ImageLayerWriterInner` and also contains `Drop`\n/// implementation that cleans up the temporary file in failure. It's not\n/// possible to do this directly in `ImageLayerWriterInner` since `finish` moves\n/// out some fields, making it impossible to implement `Drop`.\n///\n#[must_use]\npub struct ImageLayerWriter {\n    inner: Option<ImageLayerWriterInner>,\n}\n\nimpl ImageLayerWriter {\n    ///\n    /// Start building a new image layer.\n    ///\n    #[allow(clippy::too_many_arguments)]\n    pub async fn new(\n        conf: &'static PageServerConf,\n        timeline_id: TimelineId,\n        tenant_shard_id: TenantShardId,\n        key_range: &Range<Key>,\n        lsn: Lsn,\n        gate: &utils::sync::gate::Gate,\n        cancel: CancellationToken,\n        ctx: &RequestContext,\n    ) -> anyhow::Result<ImageLayerWriter> {\n        Ok(Self {\n            inner: Some(\n                ImageLayerWriterInner::new(\n                    conf,\n                    timeline_id,\n                    tenant_shard_id,\n                    key_range,\n                    lsn,\n                    gate,\n                    cancel,\n                    ctx,\n                )\n                .await?,\n            ),\n        })\n    }\n\n    ///\n    /// Write next value to the file.\n    ///\n    /// The page versions must be appended in blknum order.\n    ///\n    pub async fn put_image(\n        &mut self,\n        key: Key,\n        img: Bytes,\n        ctx: &RequestContext,\n    ) -> Result<(), PutError> {\n        self.inner.as_mut().unwrap().put_image(key, img, ctx).await\n    }\n\n    ///\n    /// Write the next value to the file, as a raw header and data. This allows passing through a\n    /// raw, potentially compressed image from a different layer file without recompressing it.\n    ///\n    /// The page versions must be appended in blknum order.\n    ///\n    pub async fn put_image_raw(\n        &mut self,\n        key: Key,\n        raw_with_header: Bytes,\n        ctx: &RequestContext,\n    ) -> anyhow::Result<()> {\n        self.inner\n            .as_mut()\n            .unwrap()\n            .put_image_raw(key, raw_with_header, ctx)\n            .await\n    }\n\n    /// Estimated size of the image layer.\n    pub(crate) fn estimated_size(&self) -> u64 {\n        let inner = self.inner.as_ref().unwrap();\n        inner.blob_writer.size() + inner.tree.borrow_writer().size() + PAGE_SZ as u64\n    }\n\n    pub(crate) fn num_keys(&self) -> usize {\n        self.inner.as_ref().unwrap().num_keys\n    }\n\n    ///\n    /// Finish writing the image layer.\n    ///\n    pub(crate) async fn finish(\n        mut self,\n        ctx: &RequestContext,\n    ) -> anyhow::Result<(PersistentLayerDesc, Utf8PathBuf)> {\n        self.inner.take().unwrap().finish(ctx, None).await\n    }\n\n    /// Finish writing the image layer with an end key, used in [`super::batch_split_writer::SplitImageLayerWriter`]. The end key determines the end of the image layer's covered range and is exclusive.\n    pub(super) async fn finish_with_end_key(\n        mut self,\n        end_key: Key,\n        ctx: &RequestContext,\n    ) -> anyhow::Result<(PersistentLayerDesc, Utf8PathBuf)> {\n        self.inner.take().unwrap().finish(ctx, Some(end_key)).await\n    }\n}\n\npub struct ImageLayerIterator<'a> {\n    image_layer: &'a ImageLayerInner,\n    ctx: &'a RequestContext,\n    planner: StreamingVectoredReadPlanner,\n    index_iter: DiskBtreeIterator<'a>,\n    key_values_batch: VecDeque<(Key, Lsn, Value)>,\n    is_end: bool,\n}\n\nimpl ImageLayerIterator<'_> {\n    pub(crate) fn layer_dbg_info(&self) -> String {\n        self.image_layer.layer_dbg_info()\n    }\n\n    /// Retrieve a batch of key-value pairs into the iterator buffer.\n    async fn next_batch(&mut self) -> anyhow::Result<()> {\n        assert!(self.key_values_batch.is_empty());\n        assert!(!self.is_end);\n\n        let plan = loop {\n            if let Some(res) = self.index_iter.next().await {\n                let (raw_key, offset) = res?;\n                if let Some(batch_plan) = self.planner.handle(\n                    Key::from_slice(&raw_key[..KEY_SIZE]),\n                    self.image_layer.lsn,\n                    offset,\n                    true,\n                ) {\n                    break batch_plan;\n                }\n            } else {\n                self.is_end = true;\n                let payload_end = self.image_layer.index_start_blk as u64 * PAGE_SZ as u64;\n                if let Some(item) = self.planner.handle_range_end(payload_end) {\n                    break item;\n                } else {\n                    return Ok(()); // TODO: a test case on empty iterator\n                }\n            }\n        };\n        let vectored_blob_reader = VectoredBlobReader::new(&self.image_layer.file);\n        let mut next_batch = std::collections::VecDeque::new();\n        let buf_size = plan.size();\n        let buf = IoBufferMut::with_capacity(buf_size);\n        let blobs_buf = vectored_blob_reader\n            .read_blobs(&plan, buf, self.ctx)\n            .await?;\n        let view = BufView::new_slice(&blobs_buf.buf);\n        for meta in blobs_buf.blobs.iter() {\n            let img_buf = meta.read(&view).await?;\n            next_batch.push_back((\n                meta.meta.key,\n                self.image_layer.lsn,\n                Value::Image(img_buf.into_bytes()),\n            ));\n        }\n        self.key_values_batch = next_batch;\n        Ok(())\n    }\n\n    pub async fn next(&mut self) -> anyhow::Result<Option<(Key, Lsn, Value)>> {\n        if self.key_values_batch.is_empty() {\n            if self.is_end {\n                return Ok(None);\n            }\n            self.next_batch().await?;\n        }\n        Ok(Some(\n            self.key_values_batch\n                .pop_front()\n                .expect(\"should not be empty\"),\n        ))\n    }\n}\n\n#[cfg(test)]\nmod test {\n    use std::sync::Arc;\n    use std::time::Duration;\n\n    use bytes::Bytes;\n    use itertools::Itertools;\n    use pageserver_api::key::Key;\n    use pageserver_api::shard::{ShardCount, ShardIdentity, ShardNumber, ShardStripeSize};\n    use utils::generation::Generation;\n    use utils::id::{TenantId, TimelineId};\n    use utils::lsn::Lsn;\n    use wal_decoder::models::value::Value;\n\n    use super::{ImageLayerIterator, ImageLayerWriter};\n    use crate::DEFAULT_PG_VERSION;\n    use crate::context::RequestContext;\n    use crate::tenant::harness::{TIMELINE_ID, TenantHarness};\n    use crate::tenant::storage_layer::{Layer, ResidentLayer};\n    use crate::tenant::{TenantShard, Timeline};\n\n    #[tokio::test]\n    async fn image_layer_rewrite() {\n        let tenant_conf = pageserver_api::models::TenantConfig {\n            gc_period: Some(Duration::ZERO),\n            compaction_period: Some(Duration::ZERO),\n            ..Default::default()\n        };\n        let tenant_id = TenantId::generate();\n        let mut gen_ = Generation::new(0xdead0001);\n        let mut get_next_gen = || {\n            let ret = gen_;\n            gen_ = gen_.next();\n            ret\n        };\n        // The LSN at which we will create an image layer to filter\n        let lsn = Lsn(0xdeadbeef0000);\n        let timeline_id = TimelineId::generate();\n\n        //\n        // Create an unsharded parent with a layer.\n        //\n\n        let harness = TenantHarness::create_custom(\n            \"test_image_layer_rewrite--parent\",\n            tenant_conf.clone(),\n            tenant_id,\n            ShardIdentity::unsharded(),\n            get_next_gen(),\n        )\n        .await\n        .unwrap();\n        let (tenant, ctx) = harness.load().await;\n        let timeline = tenant\n            .create_test_timeline(timeline_id, lsn, DEFAULT_PG_VERSION, &ctx)\n            .await\n            .unwrap();\n\n        // This key range contains several 0x8000 page stripes, only one of which belongs to shard zero\n        let input_start = Key::from_hex(\"000000067f00000001000000ae0000000000\").unwrap();\n        let input_end = Key::from_hex(\"000000067f00000001000000ae0000002000\").unwrap();\n        let range = input_start..input_end;\n\n        // Build an image layer to filter\n        let resident = {\n            let mut writer = ImageLayerWriter::new(\n                harness.conf,\n                timeline_id,\n                harness.tenant_shard_id,\n                &range,\n                lsn,\n                &timeline.gate,\n                timeline.cancel.clone(),\n                &ctx,\n            )\n            .await\n            .unwrap();\n\n            let foo_img = Bytes::from_static(&[1, 2, 3, 4]);\n            let mut key = range.start;\n            while key < range.end {\n                writer.put_image(key, foo_img.clone(), &ctx).await.unwrap();\n\n                key = key.next();\n            }\n            let (desc, path) = writer.finish(&ctx).await.unwrap();\n            Layer::finish_creating(tenant.conf, &timeline, desc, &path).unwrap()\n        };\n        let original_size = resident.metadata().file_size;\n\n        //\n        // Create child shards and do the rewrite, exercising filter().\n        // TODO: abstraction in TenantHarness for splits.\n        //\n\n        // Filter for various shards: this exercises cases like values at start of key range, end of key\n        // range, middle of key range.\n        let shard_count = ShardCount::new(4);\n        for shard_number in 0..shard_count.count() {\n            //\n            // mimic the shard split\n            //\n            let shard_identity = ShardIdentity::new(\n                ShardNumber(shard_number),\n                shard_count,\n                ShardStripeSize(0x800),\n            )\n            .unwrap();\n            let harness = TenantHarness::create_custom(\n                Box::leak(Box::new(format!(\n                    \"test_image_layer_rewrite--child{}\",\n                    shard_identity.shard_slug()\n                ))),\n                tenant_conf.clone(),\n                tenant_id,\n                shard_identity,\n                // NB: in reality, the shards would each fork off their own gen number sequence from the parent.\n                // But here, all we care about is that the gen number is unique.\n                get_next_gen(),\n            )\n            .await\n            .unwrap();\n            let (tenant, ctx) = harness.load().await;\n            let timeline = tenant\n                .create_test_timeline(timeline_id, lsn, DEFAULT_PG_VERSION, &ctx)\n                .await\n                .unwrap();\n\n            //\n            // use filter() and make assertions\n            //\n\n            let mut filtered_writer = ImageLayerWriter::new(\n                harness.conf,\n                timeline_id,\n                harness.tenant_shard_id,\n                &range,\n                lsn,\n                &timeline.gate,\n                timeline.cancel.clone(),\n                &ctx,\n            )\n            .await\n            .unwrap();\n\n            let wrote_keys = resident\n                .filter(&shard_identity, &mut filtered_writer, &ctx)\n                .await\n                .unwrap();\n            let replacement = if wrote_keys > 0 {\n                let (desc, path) = filtered_writer.finish(&ctx).await.unwrap();\n                let resident = Layer::finish_creating(tenant.conf, &timeline, desc, &path).unwrap();\n                Some(resident)\n            } else {\n                None\n            };\n\n            // This exact size and those below will need updating as/when the layer encoding changes, but\n            // should be deterministic for a given version of the format, as we used no randomness generating the input.\n            assert_eq!(original_size, 122880);\n\n            match shard_number {\n                0 => {\n                    // We should have written out just one stripe for our shard identity\n                    assert_eq!(wrote_keys, 0x800);\n                    let replacement = replacement.unwrap();\n\n                    // We should have dropped some of the data\n                    assert!(replacement.metadata().file_size < original_size);\n                    assert!(replacement.metadata().file_size > 0);\n\n                    // Assert that we dropped ~3/4 of the data.\n                    assert_eq!(replacement.metadata().file_size, 49152);\n                }\n                1 => {\n                    // Shard 1 has no keys in our input range\n                    assert_eq!(wrote_keys, 0x0);\n                    assert!(replacement.is_none());\n                }\n                2 => {\n                    // Shard 2 has one stripes in the input range\n                    assert_eq!(wrote_keys, 0x800);\n                    let replacement = replacement.unwrap();\n                    assert!(replacement.metadata().file_size < original_size);\n                    assert!(replacement.metadata().file_size > 0);\n                    assert_eq!(replacement.metadata().file_size, 49152);\n                }\n                3 => {\n                    // Shard 3 has two stripes in the input range\n                    assert_eq!(wrote_keys, 0x1000);\n                    let replacement = replacement.unwrap();\n                    assert!(replacement.metadata().file_size < original_size);\n                    assert!(replacement.metadata().file_size > 0);\n                    assert_eq!(replacement.metadata().file_size, 73728);\n                }\n                _ => unreachable!(),\n            }\n        }\n    }\n\n    async fn produce_image_layer(\n        tenant: &TenantShard,\n        tline: &Arc<Timeline>,\n        mut images: Vec<(Key, Bytes)>,\n        lsn: Lsn,\n        ctx: &RequestContext,\n    ) -> anyhow::Result<ResidentLayer> {\n        images.sort();\n        let (key_start, _) = images.first().unwrap();\n        let (key_last, _) = images.last().unwrap();\n        let key_end = key_last.next();\n        let key_range = *key_start..key_end;\n        let mut writer = ImageLayerWriter::new(\n            tenant.conf,\n            tline.timeline_id,\n            tenant.tenant_shard_id,\n            &key_range,\n            lsn,\n            &tline.gate,\n            tline.cancel.clone(),\n            ctx,\n        )\n        .await?;\n\n        for (key, img) in images {\n            writer.put_image(key, img, ctx).await?;\n        }\n        let (desc, path) = writer.finish(ctx).await?;\n        let img_layer = Layer::finish_creating(tenant.conf, tline, desc, &path)?;\n\n        Ok::<_, anyhow::Error>(img_layer)\n    }\n\n    async fn assert_img_iter_equal(\n        img_iter: &mut ImageLayerIterator<'_>,\n        expect: &[(Key, Bytes)],\n        expect_lsn: Lsn,\n    ) {\n        let mut expect_iter = expect.iter();\n        loop {\n            let o1 = img_iter.next().await.unwrap();\n            let o2 = expect_iter.next();\n            match (o1, o2) {\n                (None, None) => break,\n                (Some((k1, l1, v1)), Some((k2, i2))) => {\n                    let Value::Image(i1) = v1 else {\n                        panic!(\"expect Value::Image\")\n                    };\n                    assert_eq!(&k1, k2);\n                    assert_eq!(l1, expect_lsn);\n                    assert_eq!(&i1, i2);\n                }\n                (o1, o2) => panic!(\"iterators length mismatch: {o1:?}, {o2:?}\"),\n            }\n        }\n    }\n\n    #[tokio::test]\n    async fn image_layer_iterator() {\n        let harness = TenantHarness::create(\"image_layer_iterator\").await.unwrap();\n        let (tenant, ctx) = harness.load().await;\n\n        let tline = tenant\n            .create_test_timeline(TIMELINE_ID, Lsn(0x10), DEFAULT_PG_VERSION, &ctx)\n            .await\n            .unwrap();\n\n        fn get_key(id: u32) -> Key {\n            let mut key = Key::from_hex(\"000000000033333333444444445500000000\").unwrap();\n            key.field6 = id;\n            key\n        }\n        const N: usize = 1000;\n        let test_imgs = (0..N)\n            .map(|idx| (get_key(idx as u32), Bytes::from(format!(\"img{idx:05}\"))))\n            .collect_vec();\n        let resident_layer =\n            produce_image_layer(&tenant, &tline, test_imgs.clone(), Lsn(0x10), &ctx)\n                .await\n                .unwrap();\n        let img_layer = resident_layer.get_as_image(&ctx).await.unwrap();\n        for max_read_size in [1, 1024] {\n            for batch_size in [1, 2, 4, 8, 3, 7, 13] {\n                println!(\"running with batch_size={batch_size} max_read_size={max_read_size}\");\n                // Test if the batch size is correctly determined\n                let mut iter = img_layer.iter_with_options(&ctx, max_read_size, batch_size);\n                let mut num_items = 0;\n                for _ in 0..3 {\n                    iter.next_batch().await.unwrap();\n                    num_items += iter.key_values_batch.len();\n                    if max_read_size == 1 {\n                        // every key should be a batch b/c the value is larger than max_read_size\n                        assert_eq!(iter.key_values_batch.len(), 1);\n                    } else {\n                        assert!(iter.key_values_batch.len() <= batch_size);\n                    }\n                    if num_items >= N {\n                        break;\n                    }\n                    iter.key_values_batch.clear();\n                }\n                // Test if the result is correct\n                let mut iter = img_layer.iter_with_options(&ctx, max_read_size, batch_size);\n                assert_img_iter_equal(&mut iter, &test_imgs, Lsn(0x10)).await;\n            }\n        }\n    }\n}\n"
  },
  {
    "path": "pageserver/src/tenant/storage_layer/inmemory_layer/vectored_dio_read.rs",
    "content": "use std::collections::BTreeMap;\nuse std::sync::{Arc, RwLock};\n\nuse itertools::Itertools;\nuse tokio_epoll_uring::{BoundedBuf, IoBufMut, Slice};\n\nuse crate::assert_u64_eq_usize::{U64IsUsize, UsizeIsU64};\nuse crate::context::RequestContext;\nuse crate::virtual_file::IoBufferMut;\nuse crate::virtual_file::owned_buffers_io::io_buf_aligned::IoBufAlignedMut;\n\n/// The file interface we require. At runtime, this is a [`crate::tenant::ephemeral_file::EphemeralFile`].\npub trait File: Send {\n    /// Attempt to read the bytes in `self` in range `[start,start+dst.bytes_total())`\n    /// and return the number of bytes read (let's call it `nread`).\n    /// The bytes read are placed in `dst`, i.e., `&dst[..nread]` will contain the read bytes.\n    ///\n    /// The only reason why the read may be short (i.e., `nread != dst.bytes_total()`)\n    /// is if the file is shorter than `start+dst.len()`.\n    ///\n    /// This is unlike [`std::os::unix::fs::FileExt::read_exact_at`] which returns an\n    /// [`std::io::ErrorKind::UnexpectedEof`] error if the file is shorter than `start+dst.len()`.\n    ///\n    /// No guarantees are made about the remaining bytes in `dst` in case of a short read.\n    async fn read_exact_at_eof_ok<B: IoBufAlignedMut + Send>(\n        &self,\n        start: u64,\n        dst: Slice<B>,\n        ctx: &RequestContext,\n    ) -> std::io::Result<(Slice<B>, usize)>;\n}\n\n/// A logical read from [`File`]. See [`Self::new`].\npub struct LogicalRead<B: Buffer> {\n    pos: u64,\n    state: RwLockRefCell<LogicalReadState<B>>,\n}\n\nenum LogicalReadState<B: Buffer> {\n    NotStarted(B),\n    Ongoing(B),\n    Ok(B),\n    Error(Arc<std::io::Error>),\n    Undefined,\n}\n\nimpl<B: Buffer> LogicalRead<B> {\n    /// Create a new [`LogicalRead`] from [`File`] of the data in the file in range `[ pos, pos + buf.cap() )`.\n    pub fn new(pos: u64, buf: B) -> Self {\n        Self {\n            pos,\n            state: RwLockRefCell::new(LogicalReadState::NotStarted(buf)),\n        }\n    }\n    pub fn into_result(self) -> Option<Result<B, Arc<std::io::Error>>> {\n        match self.state.into_inner() {\n            LogicalReadState::Ok(buf) => Some(Ok(buf)),\n            LogicalReadState::Error(e) => Some(Err(e)),\n            LogicalReadState::NotStarted(_) | LogicalReadState::Ongoing(_) => None,\n            LogicalReadState::Undefined => unreachable!(),\n        }\n    }\n}\n\n/// The buffer into which a [`LogicalRead`] result is placed.\npub trait Buffer: std::ops::Deref<Target = [u8]> {\n    /// Immutable.\n    fn cap(&self) -> usize;\n    /// Changes only through [`Self::extend_from_slice`].\n    fn len(&self) -> usize;\n    /// Panics if the total length would exceed the initialized capacity.\n    fn extend_from_slice(&mut self, src: &[u8]);\n}\n\n/// The minimum alignment and size requirement for disk offsets and memory buffer size for direct IO.\nconst DIO_CHUNK_SIZE: usize = crate::virtual_file::get_io_buffer_alignment();\n\n/// If multiple chunks need to be read, merge adjacent chunk reads into batches of max size `MAX_CHUNK_BATCH_SIZE`.\n/// (The unit is the number of chunks.)\nconst MAX_CHUNK_BATCH_SIZE: usize = {\n    let desired = 128 * 1024; // 128k\n    if desired % DIO_CHUNK_SIZE != 0 {\n        panic!(\"MAX_CHUNK_BATCH_SIZE must be a multiple of DIO_CHUNK_SIZE\")\n        // compile-time error\n    }\n    desired / DIO_CHUNK_SIZE\n};\n\n/// Execute the given logical `reads` against `file`.\n/// The results are placed in the buffers of the [`LogicalRead`]s.\n/// Retrieve the results by calling [`LogicalRead::into_result`] on each [`LogicalRead`].\n///\n/// The [`LogicalRead`]s must be freshly created using [`LogicalRead::new`] when calling this function.\n/// Otherwise, this function panics.\npub async fn execute<'a, I, F, B>(file: &F, reads: I, ctx: &RequestContext)\nwhere\n    I: IntoIterator<Item = &'a LogicalRead<B>>,\n    F: File,\n    B: Buffer + IoBufMut + Send,\n{\n    // Terminology:\n    // logical read = a request to read an arbitrary range of bytes from `file`; byte-level granularity\n    // chunk = we conceptually divide up the byte range of `file` into DIO_CHUNK_SIZEs ranges\n    // interest = a range within a chunk that a logical read is interested in; one logical read gets turned into many interests\n    // physical read = the read request we're going to issue to the OS; covers a range of chunks; chunk-level granularity\n\n    // Preserve a copy of the logical reads for debug assertions at the end\n    #[cfg(debug_assertions)]\n    let (reads, assert_logical_reads) = {\n        let (reads, assert) = reads.into_iter().tee();\n        (reads, Some(Vec::from_iter(assert)))\n    };\n    #[cfg(not(debug_assertions))]\n    let (reads, assert_logical_reads): (_, Option<Vec<&'a LogicalRead<B>>>) = (reads, None);\n\n    // Plan which parts of which chunks need to be appended to which buffer\n    let mut by_chunk: BTreeMap<u64, Vec<Interest<B>>> = BTreeMap::new();\n    struct Interest<'a, B: Buffer> {\n        logical_read: &'a LogicalRead<B>,\n        offset_in_chunk: u64,\n        len: u64,\n    }\n    for logical_read in reads {\n        let LogicalRead { pos, state } = logical_read;\n        let mut state = state.borrow_mut();\n\n        // transition from NotStarted to Ongoing\n        let cur = std::mem::replace(&mut *state, LogicalReadState::Undefined);\n        let req_len = match cur {\n            LogicalReadState::NotStarted(buf) => {\n                if buf.len() != 0 {\n                    panic!(\n                        \"The `LogicalRead`s that are passed in must be freshly created using `LogicalRead::new`\"\n                    );\n                }\n                // buf.cap() == 0 is ok\n\n                // transition into Ongoing state\n                let req_len = buf.cap();\n                *state = LogicalReadState::Ongoing(buf);\n                req_len\n            }\n            x => panic!(\n                \"must only call with fresh LogicalReads, got another state, leaving Undefined state behind state={x:?}\"\n            ),\n        };\n\n        // plan which chunks we need to read from\n        let mut remaining = req_len;\n        let mut chunk_no = *pos / (DIO_CHUNK_SIZE.into_u64());\n        let mut offset_in_chunk = pos.into_usize() % DIO_CHUNK_SIZE;\n        while remaining > 0 {\n            let remaining_in_chunk = std::cmp::min(remaining, DIO_CHUNK_SIZE - offset_in_chunk);\n            by_chunk.entry(chunk_no).or_default().push(Interest {\n                logical_read,\n                offset_in_chunk: offset_in_chunk.into_u64(),\n                len: remaining_in_chunk.into_u64(),\n            });\n            offset_in_chunk = 0;\n            chunk_no += 1;\n            remaining -= remaining_in_chunk;\n        }\n    }\n\n    // At this point, we could iterate over by_chunk, in chunk order,\n    // read each chunk from disk, and fill the buffers.\n    // However, we can merge adjacent chunks into batches of MAX_CHUNK_BATCH_SIZE\n    // so we issue fewer IOs = fewer roundtrips = lower overall latency.\n    struct PhysicalRead<'a, B: Buffer> {\n        start_chunk_no: u64,\n        nchunks: usize,\n        dsts: Vec<PhysicalInterest<'a, B>>,\n    }\n    struct PhysicalInterest<'a, B: Buffer> {\n        logical_read: &'a LogicalRead<B>,\n        offset_in_physical_read: u64,\n        len: u64,\n    }\n    let mut physical_reads: Vec<PhysicalRead<B>> = Vec::new();\n    let mut by_chunk = by_chunk.into_iter().peekable();\n    loop {\n        let mut last_chunk_no = None;\n        let to_merge: Vec<(u64, Vec<Interest<B>>)> = by_chunk\n            .peeking_take_while(|(chunk_no, _)| {\n                if let Some(last_chunk_no) = last_chunk_no {\n                    if *chunk_no != last_chunk_no + 1 {\n                        return false;\n                    }\n                }\n                last_chunk_no = Some(*chunk_no);\n                true\n            })\n            .take(MAX_CHUNK_BATCH_SIZE)\n            .collect(); // TODO: avoid this .collect()\n        let Some(start_chunk_no) = to_merge.first().map(|(chunk_no, _)| *chunk_no) else {\n            break;\n        };\n        let nchunks = to_merge.len();\n        let dsts = to_merge\n            .into_iter()\n            .enumerate()\n            .flat_map(|(i, (_, dsts))| {\n                dsts.into_iter().map(\n                    move |Interest {\n                              logical_read,\n                              offset_in_chunk,\n                              len,\n                          }| {\n                        PhysicalInterest {\n                            logical_read,\n                            offset_in_physical_read: i\n                                .checked_mul(DIO_CHUNK_SIZE)\n                                .unwrap()\n                                .into_u64()\n                                + offset_in_chunk,\n                            len,\n                        }\n                    },\n                )\n            })\n            .collect();\n        physical_reads.push(PhysicalRead {\n            start_chunk_no,\n            nchunks,\n            dsts,\n        });\n    }\n    drop(by_chunk);\n\n    // Execute physical reads and fill the logical read buffers\n    // TODO: pipelined reads; prefetch;\n    let get_io_buffer = |nchunks| IoBufferMut::with_capacity(nchunks * DIO_CHUNK_SIZE);\n    for PhysicalRead {\n        start_chunk_no,\n        nchunks,\n        dsts,\n    } in physical_reads\n    {\n        let all_done = dsts\n            .iter()\n            .all(|PhysicalInterest { logical_read, .. }| logical_read.state.borrow().is_terminal());\n        if all_done {\n            continue;\n        }\n        let read_offset = start_chunk_no\n            .checked_mul(DIO_CHUNK_SIZE.into_u64())\n            .expect(\"we produce chunk_nos by dividing by DIO_CHUNK_SIZE earlier\");\n        let io_buf = get_io_buffer(nchunks).slice_full();\n        let req_len = io_buf.len();\n        let (io_buf_slice, nread) = match file.read_exact_at_eof_ok(read_offset, io_buf, ctx).await\n        {\n            Ok(t) => t,\n            Err(e) => {\n                let e = Arc::new(e);\n                for PhysicalInterest { logical_read, .. } in dsts {\n                    *logical_read.state.borrow_mut() = LogicalReadState::Error(Arc::clone(&e));\n                    // this will make later reads for the given LogicalRead short-circuit, see top of loop body\n                }\n                continue;\n            }\n        };\n        let io_buf = io_buf_slice.into_inner();\n        assert!(\n            nread <= io_buf.len(),\n            \"the last chunk in the file can be a short read, so, no ==\"\n        );\n        let io_buf = &io_buf[..nread];\n        for PhysicalInterest {\n            logical_read,\n            offset_in_physical_read,\n            len,\n        } in dsts\n        {\n            let mut logical_read_state_borrow = logical_read.state.borrow_mut();\n            let logical_read_buf = match &mut *logical_read_state_borrow {\n                LogicalReadState::NotStarted(_) => {\n                    unreachable!(\"we transition it into Ongoing at function entry\")\n                }\n                LogicalReadState::Ongoing(buf) => buf,\n                LogicalReadState::Ok(_) | LogicalReadState::Error(_) => {\n                    continue;\n                }\n                LogicalReadState::Undefined => unreachable!(),\n            };\n            let range_in_io_buf = std::ops::Range {\n                start: offset_in_physical_read as usize,\n                end: offset_in_physical_read as usize + len as usize,\n            };\n            assert!(range_in_io_buf.end >= range_in_io_buf.start);\n            if range_in_io_buf.end > nread {\n                let msg = format!(\n                    \"physical read returned EOF where this logical read expected more data in the file: offset=0x{read_offset:x} req_len=0x{req_len:x} nread=0x{nread:x} {:?}\",\n                    &*logical_read_state_borrow\n                );\n                logical_read_state_borrow.transition_to_terminal(Err(std::io::Error::new(\n                    std::io::ErrorKind::UnexpectedEof,\n                    msg,\n                )));\n                continue;\n            }\n            let data = &io_buf[range_in_io_buf];\n\n            // Copy data from io buffer into the logical read buffer.\n            // (And in debug mode, validate that the buffer impl adheres to the Buffer trait spec.)\n            let pre = if cfg!(debug_assertions) {\n                Some((logical_read_buf.len(), logical_read_buf.cap()))\n            } else {\n                None\n            };\n            logical_read_buf.extend_from_slice(data);\n            let post = if cfg!(debug_assertions) {\n                Some((logical_read_buf.len(), logical_read_buf.cap()))\n            } else {\n                None\n            };\n            match (pre, post) {\n                (None, None) => {}\n                (Some(_), None) | (None, Some(_)) => unreachable!(),\n                (Some((pre_len, pre_cap)), Some((post_len, post_cap))) => {\n                    assert_eq!(pre_len + len as usize, post_len);\n                    assert_eq!(pre_cap, post_cap);\n                }\n            }\n\n            if logical_read_buf.len() == logical_read_buf.cap() {\n                logical_read_state_borrow.transition_to_terminal(Ok(()));\n            }\n        }\n    }\n\n    if let Some(assert_logical_reads) = assert_logical_reads {\n        for logical_read in assert_logical_reads {\n            assert!(logical_read.state.borrow().is_terminal());\n        }\n    }\n}\n\nimpl<B: Buffer> LogicalReadState<B> {\n    fn is_terminal(&self) -> bool {\n        match self {\n            LogicalReadState::NotStarted(_) | LogicalReadState::Ongoing(_) => false,\n            LogicalReadState::Ok(_) | LogicalReadState::Error(_) => true,\n            LogicalReadState::Undefined => unreachable!(),\n        }\n    }\n    fn transition_to_terminal(&mut self, err: std::io::Result<()>) {\n        let cur = std::mem::replace(self, LogicalReadState::Undefined);\n        let buf = match cur {\n            LogicalReadState::Ongoing(buf) => buf,\n            x => panic!(\"must only call in state Ongoing, got {x:?}\"),\n        };\n        *self = match err {\n            Ok(()) => LogicalReadState::Ok(buf),\n            Err(e) => LogicalReadState::Error(Arc::new(e)),\n        };\n    }\n}\n\nimpl<B: Buffer> std::fmt::Debug for LogicalReadState<B> {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        #[derive(Debug)]\n        #[allow(unused)]\n        struct BufferDebug {\n            len: usize,\n            cap: usize,\n        }\n        impl<'a> From<&'a dyn Buffer> for BufferDebug {\n            fn from(buf: &'a dyn Buffer) -> Self {\n                Self {\n                    len: buf.len(),\n                    cap: buf.cap(),\n                }\n            }\n        }\n        match self {\n            LogicalReadState::NotStarted(b) => {\n                write!(f, \"NotStarted({:?})\", BufferDebug::from(b as &dyn Buffer))\n            }\n            LogicalReadState::Ongoing(b) => {\n                write!(f, \"Ongoing({:?})\", BufferDebug::from(b as &dyn Buffer))\n            }\n            LogicalReadState::Ok(b) => write!(f, \"Ok({:?})\", BufferDebug::from(b as &dyn Buffer)),\n            LogicalReadState::Error(e) => write!(f, \"Error({e:?})\"),\n            LogicalReadState::Undefined => write!(f, \"Undefined\"),\n        }\n    }\n}\n\n#[derive(Debug)]\nstruct RwLockRefCell<T>(RwLock<T>);\nimpl<T> RwLockRefCell<T> {\n    fn new(value: T) -> Self {\n        Self(RwLock::new(value))\n    }\n    fn borrow(&self) -> impl std::ops::Deref<Target = T> + '_ {\n        self.0.try_read().unwrap()\n    }\n    fn borrow_mut(&self) -> impl std::ops::DerefMut<Target = T> + '_ {\n        self.0.try_write().unwrap()\n    }\n    fn into_inner(self) -> T {\n        self.0.into_inner().unwrap()\n    }\n}\n\nimpl Buffer for Vec<u8> {\n    fn cap(&self) -> usize {\n        self.capacity()\n    }\n\n    fn len(&self) -> usize {\n        self.len()\n    }\n\n    fn extend_from_slice(&mut self, src: &[u8]) {\n        if self.len() + src.len() > self.cap() {\n            panic!(\"Buffer capacity exceeded\");\n        }\n        Vec::extend_from_slice(self, src);\n    }\n}\n\n#[cfg(test)]\n#[allow(clippy::assertions_on_constants)]\nmod tests {\n    use std::cell::RefCell;\n    use std::collections::VecDeque;\n\n    use rand::Rng;\n\n    use super::*;\n    use crate::context::DownloadBehavior;\n    use crate::task_mgr::TaskKind;\n    use crate::virtual_file::owned_buffers_io::slice::SliceMutExt;\n\n    struct InMemoryFile {\n        content: Vec<u8>,\n    }\n\n    impl InMemoryFile {\n        fn new_random(len: usize) -> Self {\n            Self {\n                content: rand::rng()\n                    .sample_iter(rand::distr::StandardUniform)\n                    .take(len)\n                    .collect(),\n            }\n        }\n        fn test_logical_read(&self, pos: u64, len: usize) -> TestLogicalRead {\n            let expected_result = if pos as usize + len > self.content.len() {\n                Err(\"InMemoryFile short read\".to_string())\n            } else {\n                Ok(self.content[pos as usize..pos as usize + len].to_vec())\n            };\n            TestLogicalRead::new(pos, len, expected_result)\n        }\n    }\n\n    #[test]\n    fn test_in_memory_file() {\n        let ctx = RequestContext::new(TaskKind::UnitTest, DownloadBehavior::Error);\n        let file = InMemoryFile::new_random(10);\n        let test_read = |pos, len| {\n            let buf = IoBufferMut::with_capacity_zeroed(len);\n            let fut = file.read_exact_at_eof_ok(pos, buf.slice_full(), &ctx);\n            use futures::FutureExt;\n            let (slice, nread) = fut\n                .now_or_never()\n                .expect(\"impl never awaits\")\n                .expect(\"impl never errors\");\n            let mut buf = slice.into_inner();\n            buf.truncate(nread);\n            buf\n        };\n        assert_eq!(&test_read(0, 1), &file.content[0..1]);\n        assert_eq!(&test_read(1, 2), &file.content[1..3]);\n        assert_eq!(&test_read(9, 2), &file.content[9..]);\n        assert!(test_read(10, 2).is_empty());\n        assert!(test_read(11, 2).is_empty());\n    }\n\n    impl File for InMemoryFile {\n        async fn read_exact_at_eof_ok<B: IoBufMut + Send>(\n            &self,\n            start: u64,\n            mut dst: Slice<B>,\n            _ctx: &RequestContext,\n        ) -> std::io::Result<(Slice<B>, usize)> {\n            let dst_slice: &mut [u8] = dst.as_mut_rust_slice_full_zeroed();\n            let nread = {\n                let req_len = dst_slice.len();\n                let len = std::cmp::min(req_len, self.content.len().saturating_sub(start as usize));\n                if start as usize >= self.content.len() {\n                    0\n                } else {\n                    dst_slice[..len]\n                        .copy_from_slice(&self.content[start as usize..start as usize + len]);\n                    len\n                }\n            };\n            rand::Rng::fill(&mut rand::rng(), &mut dst_slice[nread..]); // to discover bugs\n            Ok((dst, nread))\n        }\n    }\n\n    #[derive(Clone)]\n    struct TestLogicalRead {\n        pos: u64,\n        len: usize,\n        expected_result: Result<Vec<u8>, String>,\n    }\n\n    impl TestLogicalRead {\n        fn new(pos: u64, len: usize, expected_result: Result<Vec<u8>, String>) -> Self {\n            Self {\n                pos,\n                len,\n                expected_result,\n            }\n        }\n        fn make_logical_read(&self) -> LogicalRead<Vec<u8>> {\n            LogicalRead::new(self.pos, Vec::with_capacity(self.len))\n        }\n    }\n\n    async fn execute_and_validate_test_logical_reads<I, F>(\n        file: &F,\n        test_logical_reads: I,\n        ctx: &RequestContext,\n    ) where\n        I: IntoIterator<Item = TestLogicalRead>,\n        F: File,\n    {\n        let (tmp, test_logical_reads) = test_logical_reads.into_iter().tee();\n        let logical_reads = tmp.map(|tr| tr.make_logical_read()).collect::<Vec<_>>();\n        execute(file, logical_reads.iter(), ctx).await;\n        for (logical_read, test_logical_read) in logical_reads.into_iter().zip(test_logical_reads) {\n            let actual = logical_read.into_result().expect(\"we call execute()\");\n            match (actual, test_logical_read.expected_result) {\n                (Ok(actual), Ok(expected)) if actual == expected => {}\n                (Err(actual), Err(expected)) => {\n                    assert_eq!(actual.to_string(), expected);\n                }\n                (actual, expected) => panic!(\"expected {expected:?}\\nactual {actual:?}\"),\n            }\n        }\n    }\n\n    #[tokio::test]\n    async fn test_blackbox() {\n        let ctx = RequestContext::new(TaskKind::UnitTest, DownloadBehavior::Error);\n        let cs = DIO_CHUNK_SIZE;\n        let cs_u64 = cs.into_u64();\n\n        let file = InMemoryFile::new_random(10 * cs);\n\n        let test_logical_reads = vec![\n            file.test_logical_read(0, 1),\n            // adjacent to logical_read0\n            file.test_logical_read(1, 2),\n            // gap\n            // spans adjacent chunks\n            file.test_logical_read(cs_u64 - 1, 2),\n            // gap\n            //  tail of chunk 3, all of chunk 4, and 2 bytes of chunk 5\n            file.test_logical_read(3 * cs_u64 - 1, cs + 2),\n            // gap\n            file.test_logical_read(5 * cs_u64, 1),\n        ];\n        let num_test_logical_reads = test_logical_reads.len();\n        let test_logical_reads_perms = test_logical_reads\n            .into_iter()\n            .permutations(num_test_logical_reads);\n\n        // test all orderings of LogicalReads, the order shouldn't matter for the results\n        for test_logical_reads in test_logical_reads_perms {\n            execute_and_validate_test_logical_reads(&file, test_logical_reads, &ctx).await;\n        }\n    }\n\n    #[tokio::test]\n    #[should_panic]\n    async fn test_reusing_logical_reads_panics() {\n        let ctx = RequestContext::new(TaskKind::UnitTest, DownloadBehavior::Error);\n        let file = InMemoryFile::new_random(DIO_CHUNK_SIZE);\n        let a = file.test_logical_read(23, 10);\n        let logical_reads = vec![a.make_logical_read()];\n        execute(&file, &logical_reads, &ctx).await;\n        // reuse pancis\n        execute(&file, &logical_reads, &ctx).await;\n    }\n\n    struct RecorderFile<'a> {\n        recorded: RefCell<Vec<RecordedRead>>,\n        file: &'a InMemoryFile,\n    }\n\n    struct RecordedRead {\n        pos: u64,\n        req_len: usize,\n        res: Vec<u8>,\n    }\n\n    impl<'a> RecorderFile<'a> {\n        fn new(file: &'a InMemoryFile) -> RecorderFile<'a> {\n            Self {\n                recorded: Default::default(),\n                file,\n            }\n        }\n    }\n\n    impl File for RecorderFile<'_> {\n        async fn read_exact_at_eof_ok<B: IoBufAlignedMut + Send>(\n            &self,\n            start: u64,\n            dst: Slice<B>,\n            ctx: &RequestContext,\n        ) -> std::io::Result<(Slice<B>, usize)> {\n            let (dst, nread) = self.file.read_exact_at_eof_ok(start, dst, ctx).await?;\n            self.recorded.borrow_mut().push(RecordedRead {\n                pos: start,\n                req_len: dst.bytes_total(),\n                res: Vec::from(&dst[..nread]),\n            });\n            Ok((dst, nread))\n        }\n    }\n\n    #[tokio::test]\n    async fn test_logical_reads_to_same_chunk_are_merged_into_one_chunk_read() {\n        let ctx = RequestContext::new(TaskKind::UnitTest, DownloadBehavior::Error);\n\n        let file = InMemoryFile::new_random(2 * DIO_CHUNK_SIZE);\n\n        let a = file.test_logical_read(DIO_CHUNK_SIZE.into_u64(), 10);\n        let b = file.test_logical_read(DIO_CHUNK_SIZE.into_u64() + 30, 20);\n\n        let recorder = RecorderFile::new(&file);\n\n        execute_and_validate_test_logical_reads(&recorder, vec![a, b], &ctx).await;\n\n        let recorded = recorder.recorded.borrow();\n        assert_eq!(recorded.len(), 1);\n        let RecordedRead { pos, req_len, .. } = &recorded[0];\n        assert_eq!(*pos, DIO_CHUNK_SIZE.into_u64());\n        assert_eq!(*req_len, DIO_CHUNK_SIZE);\n    }\n\n    #[tokio::test]\n    async fn test_max_chunk_batch_size_is_respected() {\n        let ctx = RequestContext::new(TaskKind::UnitTest, DownloadBehavior::Error);\n\n        let file = InMemoryFile::new_random(4 * MAX_CHUNK_BATCH_SIZE * DIO_CHUNK_SIZE);\n\n        // read the 10th byte of each chunk 3 .. 3+2*MAX_CHUNK_BATCH_SIZE\n        assert!(3 < MAX_CHUNK_BATCH_SIZE, \"test assumption\");\n        assert!(10 < DIO_CHUNK_SIZE, \"test assumption\");\n        let mut test_logical_reads = Vec::new();\n        for i in 3..3 + MAX_CHUNK_BATCH_SIZE + MAX_CHUNK_BATCH_SIZE / 2 {\n            test_logical_reads\n                .push(file.test_logical_read(i.into_u64() * DIO_CHUNK_SIZE.into_u64() + 10, 1));\n        }\n\n        let recorder = RecorderFile::new(&file);\n\n        execute_and_validate_test_logical_reads(&recorder, test_logical_reads, &ctx).await;\n\n        let recorded = recorder.recorded.borrow();\n        assert_eq!(recorded.len(), 2);\n        {\n            let RecordedRead { pos, req_len, .. } = &recorded[0];\n            assert_eq!(*pos as usize, 3 * DIO_CHUNK_SIZE);\n            assert_eq!(*req_len, MAX_CHUNK_BATCH_SIZE * DIO_CHUNK_SIZE);\n        }\n        {\n            let RecordedRead { pos, req_len, .. } = &recorded[1];\n            assert_eq!(*pos as usize, (3 + MAX_CHUNK_BATCH_SIZE) * DIO_CHUNK_SIZE);\n            assert_eq!(*req_len, MAX_CHUNK_BATCH_SIZE / 2 * DIO_CHUNK_SIZE);\n        }\n    }\n\n    #[tokio::test]\n    async fn test_batch_breaks_if_chunk_is_not_interesting() {\n        let ctx = RequestContext::new(TaskKind::UnitTest, DownloadBehavior::Error);\n\n        assert!(MAX_CHUNK_BATCH_SIZE > 10, \"test assumption\");\n        let file = InMemoryFile::new_random(3 * DIO_CHUNK_SIZE);\n\n        let a = file.test_logical_read(0, 1); // chunk 0\n        let b = file.test_logical_read(2 * DIO_CHUNK_SIZE.into_u64(), 1); // chunk 2\n\n        let recorder = RecorderFile::new(&file);\n\n        execute_and_validate_test_logical_reads(&recorder, vec![a, b], &ctx).await;\n\n        let recorded = recorder.recorded.borrow();\n\n        assert_eq!(recorded.len(), 2);\n        {\n            let RecordedRead { pos, req_len, .. } = &recorded[0];\n            assert_eq!(*pos, 0);\n            assert_eq!(*req_len, DIO_CHUNK_SIZE);\n        }\n        {\n            let RecordedRead { pos, req_len, .. } = &recorded[1];\n            assert_eq!(*pos, 2 * DIO_CHUNK_SIZE.into_u64());\n            assert_eq!(*req_len, DIO_CHUNK_SIZE);\n        }\n    }\n\n    struct ExpectedRead {\n        expect_pos: u64,\n        expect_len: usize,\n        respond: Result<Vec<u8>, String>,\n    }\n\n    struct MockFile {\n        expected: RefCell<VecDeque<ExpectedRead>>,\n    }\n\n    impl Drop for MockFile {\n        fn drop(&mut self) {\n            assert!(\n                self.expected.borrow().is_empty(),\n                \"expected reads not satisfied\"\n            );\n        }\n    }\n\n    macro_rules! mock_file {\n        ($($pos:expr , $len:expr => $respond:expr),* $(,)?) => {{\n            MockFile {\n                expected: RefCell::new(VecDeque::from(vec![$(ExpectedRead {\n                    expect_pos: $pos,\n                    expect_len: $len,\n                    respond: $respond,\n                }),*])),\n            }\n        }};\n    }\n\n    impl File for MockFile {\n        async fn read_exact_at_eof_ok<B: IoBufMut + Send>(\n            &self,\n            start: u64,\n            mut dst: Slice<B>,\n            _ctx: &RequestContext,\n        ) -> std::io::Result<(Slice<B>, usize)> {\n            let ExpectedRead {\n                expect_pos,\n                expect_len,\n                respond,\n            } = self\n                .expected\n                .borrow_mut()\n                .pop_front()\n                .expect(\"unexpected read\");\n            assert_eq!(start, expect_pos);\n            assert_eq!(dst.bytes_total(), expect_len);\n            match respond {\n                Ok(mocked_bytes) => {\n                    let len = std::cmp::min(dst.bytes_total(), mocked_bytes.len());\n                    let dst_slice: &mut [u8] = dst.as_mut_rust_slice_full_zeroed();\n                    dst_slice[..len].copy_from_slice(&mocked_bytes[..len]);\n                    rand::Rng::fill(&mut rand::rng(), &mut dst_slice[len..]); // to discover bugs\n                    Ok((dst, len))\n                }\n                Err(e) => Err(std::io::Error::other(e)),\n            }\n        }\n    }\n\n    #[tokio::test]\n    async fn test_mock_file() {\n        // Self-test to ensure the relevant features of mock file work as expected.\n\n        let ctx = RequestContext::new(TaskKind::UnitTest, DownloadBehavior::Error);\n\n        let mock_file = mock_file! {\n            0    , 512 => Ok(vec![0; 512]),\n            512  , 512 => Ok(vec![1; 512]),\n            1024 , 512 => Ok(vec![2; 10]),\n            2048,  1024 => Err(\"foo\".to_owned()),\n        };\n\n        let buf = IoBufferMut::with_capacity(512);\n        let (buf, nread) = mock_file\n            .read_exact_at_eof_ok(0, buf.slice_full(), &ctx)\n            .await\n            .unwrap();\n        assert_eq!(nread, 512);\n        assert_eq!(&buf.into_inner()[..nread], &[0; 512]);\n\n        let buf = IoBufferMut::with_capacity(512);\n        let (buf, nread) = mock_file\n            .read_exact_at_eof_ok(512, buf.slice_full(), &ctx)\n            .await\n            .unwrap();\n        assert_eq!(nread, 512);\n        assert_eq!(&buf.into_inner()[..nread], &[1; 512]);\n\n        let buf = IoBufferMut::with_capacity(512);\n        let (buf, nread) = mock_file\n            .read_exact_at_eof_ok(1024, buf.slice_full(), &ctx)\n            .await\n            .unwrap();\n        assert_eq!(nread, 10);\n        assert_eq!(&buf.into_inner()[..nread], &[2; 10]);\n\n        let buf = IoBufferMut::with_capacity(1024);\n        let err = mock_file\n            .read_exact_at_eof_ok(2048, buf.slice_full(), &ctx)\n            .await\n            .err()\n            .unwrap();\n        assert_eq!(err.to_string(), \"foo\");\n    }\n\n    #[tokio::test]\n    async fn test_error_on_one_chunk_read_fails_only_dependent_logical_reads() {\n        let ctx = RequestContext::new(TaskKind::UnitTest, DownloadBehavior::Error);\n\n        let test_logical_reads = vec![\n            // read spanning two batches\n            TestLogicalRead::new(\n                DIO_CHUNK_SIZE.into_u64() / 2,\n                MAX_CHUNK_BATCH_SIZE * DIO_CHUNK_SIZE,\n                Err(\"foo\".to_owned()),\n            ),\n            // second read in failing chunk\n            TestLogicalRead::new(\n                (MAX_CHUNK_BATCH_SIZE * DIO_CHUNK_SIZE).into_u64() + DIO_CHUNK_SIZE.into_u64() - 10,\n                5,\n                Err(\"foo\".to_owned()),\n            ),\n            // read unaffected\n            TestLogicalRead::new(\n                (MAX_CHUNK_BATCH_SIZE * DIO_CHUNK_SIZE).into_u64()\n                    + 2 * DIO_CHUNK_SIZE.into_u64()\n                    + 10,\n                5,\n                Ok(vec![1; 5]),\n            ),\n        ];\n        let (tmp, test_logical_reads) = test_logical_reads.into_iter().tee();\n        let test_logical_read_perms = tmp.permutations(test_logical_reads.len());\n\n        for test_logical_reads in test_logical_read_perms {\n            let file = mock_file!(\n                0, MAX_CHUNK_BATCH_SIZE*DIO_CHUNK_SIZE => Ok(vec![0; MAX_CHUNK_BATCH_SIZE*DIO_CHUNK_SIZE]),\n                (MAX_CHUNK_BATCH_SIZE*DIO_CHUNK_SIZE).into_u64(), DIO_CHUNK_SIZE => Err(\"foo\".to_owned()),\n                (MAX_CHUNK_BATCH_SIZE*DIO_CHUNK_SIZE + 2*DIO_CHUNK_SIZE).into_u64(), DIO_CHUNK_SIZE => Ok(vec![1; DIO_CHUNK_SIZE]),\n            );\n            execute_and_validate_test_logical_reads(&file, test_logical_reads, &ctx).await;\n        }\n    }\n\n    struct TestShortReadsSetup {\n        ctx: RequestContext,\n        file: InMemoryFile,\n        written: u64,\n    }\n    fn setup_short_chunk_read_tests() -> TestShortReadsSetup {\n        let ctx = RequestContext::new(TaskKind::UnitTest, DownloadBehavior::Error);\n        assert!(DIO_CHUNK_SIZE > 20, \"test assumption\");\n        let written = (2 * DIO_CHUNK_SIZE - 10).into_u64();\n        let file = InMemoryFile::new_random(written as usize);\n        TestShortReadsSetup { ctx, file, written }\n    }\n\n    #[tokio::test]\n    async fn test_short_chunk_read_from_written_range() {\n        // Test what happens if there are logical reads\n        // that start within the last chunk, and\n        // the last chunk is not the full chunk length.\n        //\n        // The read should succeed despite the short chunk length.\n        let TestShortReadsSetup { ctx, file, written } = setup_short_chunk_read_tests();\n\n        let a = file.test_logical_read(written - 10, 5);\n        let recorder = RecorderFile::new(&file);\n\n        execute_and_validate_test_logical_reads(&recorder, vec![a], &ctx).await;\n\n        let recorded = recorder.recorded.borrow();\n        assert_eq!(recorded.len(), 1);\n        let RecordedRead { pos, req_len, res } = &recorded[0];\n        assert_eq!(*pos, DIO_CHUNK_SIZE.into_u64());\n        assert_eq!(*req_len, DIO_CHUNK_SIZE);\n        assert_eq!(res, &file.content[DIO_CHUNK_SIZE..(written as usize)]);\n    }\n\n    #[tokio::test]\n    async fn test_short_chunk_read_and_logical_read_from_unwritten_range() {\n        // Test what happens if there are logical reads\n        // that start within the last chunk, and\n        // the last chunk is not the full chunk length, and\n        // the logical reads end in the unwritten range.\n        //\n        // All should fail with UnexpectedEof and have the same IO pattern.\n        async fn the_impl(offset_delta: i64) {\n            let TestShortReadsSetup { ctx, file, written } = setup_short_chunk_read_tests();\n\n            let offset = u64::try_from(\n                i64::try_from(written)\n                    .unwrap()\n                    .checked_add(offset_delta)\n                    .unwrap(),\n            )\n            .unwrap();\n            let a = file.test_logical_read(offset, 5);\n            let recorder = RecorderFile::new(&file);\n            let a_vr = a.make_logical_read();\n            execute(&recorder, vec![&a_vr], &ctx).await;\n\n            // validate the LogicalRead result\n            let a_res = a_vr.into_result().unwrap();\n            let a_err = a_res.unwrap_err();\n            assert_eq!(a_err.kind(), std::io::ErrorKind::UnexpectedEof);\n\n            // validate the IO pattern\n            let recorded = recorder.recorded.borrow();\n            assert_eq!(recorded.len(), 1);\n            let RecordedRead { pos, req_len, res } = &recorded[0];\n            assert_eq!(*pos, DIO_CHUNK_SIZE.into_u64());\n            assert_eq!(*req_len, DIO_CHUNK_SIZE);\n            assert_eq!(res, &file.content[DIO_CHUNK_SIZE..(written as usize)]);\n        }\n\n        the_impl(-1).await; // start == length - 1\n        the_impl(0).await; // start == length\n        the_impl(1).await; // start == length + 1\n    }\n\n    // TODO: mixed: some valid, some UnexpectedEof\n\n    // TODO: same tests but with merges\n}\n"
  },
  {
    "path": "pageserver/src/tenant/storage_layer/inmemory_layer.rs",
    "content": "//! An in-memory layer stores recently received key-value pairs.\n//!\n//! The \"in-memory\" part of the name is a bit misleading: the actual page versions are\n//! held in an ephemeral file, not in memory. The metadata for each page version, i.e.\n//! its position in the file, is kept in memory, though.\n//!\nuse std::cmp::Ordering;\nuse std::collections::{BTreeMap, HashMap};\nuse std::fmt::Write;\nuse std::ops::Range;\nuse std::sync::atomic::{AtomicU64, AtomicUsize, Ordering as AtomicOrdering};\nuse std::sync::{Arc, OnceLock};\nuse std::time::Instant;\n\nuse anyhow::Result;\nuse camino::Utf8PathBuf;\nuse pageserver_api::key::{CompactKey, Key};\nuse pageserver_api::keyspace::KeySpace;\nuse pageserver_api::models::InMemoryLayerInfo;\nuse pageserver_api::shard::TenantShardId;\nuse tokio::sync::RwLock;\nuse tokio_util::sync::CancellationToken;\nuse tracing::*;\nuse utils::id::TimelineId;\nuse utils::lsn::Lsn;\nuse utils::vec_map::VecMap;\nuse wal_decoder::serialized_batch::{SerializedValueBatch, SerializedValueMeta, ValueMeta};\n\nuse super::{DeltaLayerWriter, PersistentLayerDesc, ValuesReconstructState};\nuse crate::assert_u64_eq_usize::{U64IsUsize, UsizeIsU64, u64_to_usize};\nuse crate::config::PageServerConf;\nuse crate::context::{PageContentKind, RequestContext, RequestContextBuilder};\n// avoid binding to Write (conflicts with std::io::Write)\n// while being able to use std::fmt::Write's methods\nuse crate::metrics::TIMELINE_EPHEMERAL_BYTES;\nuse crate::tenant::ephemeral_file::EphemeralFile;\nuse crate::tenant::storage_layer::{OnDiskValue, OnDiskValueIo};\nuse crate::tenant::timeline::GetVectoredError;\nuse crate::virtual_file::owned_buffers_io::io_buf_ext::IoBufExt;\nuse crate::{l0_flush, page_cache};\n\npub(crate) mod vectored_dio_read;\n\n#[derive(Debug, PartialEq, Eq, Clone, Copy, Hash)]\npub(crate) struct InMemoryLayerFileId(page_cache::FileId);\n\npub struct InMemoryLayer {\n    conf: &'static PageServerConf,\n    tenant_shard_id: TenantShardId,\n    timeline_id: TimelineId,\n    file_id: InMemoryLayerFileId,\n\n    /// This layer contains all the changes from 'start_lsn'. The\n    /// start is inclusive.\n    start_lsn: Lsn,\n\n    /// Frozen layers have an exclusive end LSN.\n    /// Writes are only allowed when this is `None`.\n    pub(crate) end_lsn: OnceLock<Lsn>,\n\n    /// Used for traversal path. Cached representation of the in-memory layer after frozen.\n    frozen_local_path_str: OnceLock<Arc<str>>,\n\n    opened_at: Instant,\n\n    /// All versions of all pages in the layer are kept here. Indexed\n    /// by block number and LSN. The [`IndexEntry`] is an offset into the\n    /// ephemeral file where the page version is stored.\n    ///\n    /// We use a separate lock for the index to reduce the critical section\n    /// during which reads cannot be planned.\n    ///\n    /// Note that the file backing [`InMemoryLayer::file`] is append-only,\n    /// so it is not necessary to hold a lock on the index while reading or writing from the file.\n    /// In particular:\n    /// 1. It is safe to read and release [`InMemoryLayer::index`] before reading from [`InMemoryLayer::file`].\n    /// 2. It is safe to write to [`InMemoryLayer::file`] before locking and updating [`InMemoryLayer::index`].\n    index: RwLock<BTreeMap<CompactKey, VecMap<Lsn, IndexEntry>>>,\n\n    /// Wrapper for the actual on-disk file. Uses interior mutability for concurrent reads/writes.\n    file: EphemeralFile,\n\n    estimated_in_mem_size: AtomicU64,\n}\n\nimpl std::fmt::Debug for InMemoryLayer {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        f.debug_struct(\"InMemoryLayer\")\n            .field(\"start_lsn\", &self.start_lsn)\n            .field(\"end_lsn\", &self.end_lsn)\n            .finish()\n    }\n}\n\n/// Support the same max blob length as blob_io, because ultimately\n/// all the InMemoryLayer contents end up being written into a delta layer,\n/// using the [`crate::tenant::blob_io`].\nconst MAX_SUPPORTED_BLOB_LEN: usize = crate::tenant::blob_io::MAX_SUPPORTED_BLOB_LEN;\nconst MAX_SUPPORTED_BLOB_LEN_BITS: usize = {\n    let trailing_ones = MAX_SUPPORTED_BLOB_LEN.trailing_ones() as usize;\n    let leading_zeroes = MAX_SUPPORTED_BLOB_LEN.leading_zeros() as usize;\n    assert!(trailing_ones + leading_zeroes == std::mem::size_of::<usize>() * 8);\n    trailing_ones\n};\n\n/// See [`InMemoryLayer::index`].\n///\n/// For memory efficiency, the data is packed into a u64.\n///\n/// Layout:\n/// - 1 bit: `will_init`\n/// - [`MAX_SUPPORTED_BLOB_LEN_BITS`][]: `len`\n/// - [`MAX_SUPPORTED_POS_BITS`](IndexEntry::MAX_SUPPORTED_POS_BITS): `pos`\n#[derive(Debug, Clone, Copy, PartialEq, Eq)]\npub struct IndexEntry(u64);\n\nimpl IndexEntry {\n    /// See [`Self::MAX_SUPPORTED_POS`].\n    const MAX_SUPPORTED_POS_BITS: usize = {\n        let remainder = 64 - 1 - MAX_SUPPORTED_BLOB_LEN_BITS;\n        if remainder < 32 {\n            panic!(\"pos can be u32 as per type system, support that\");\n        }\n        remainder\n    };\n    /// The maximum supported blob offset that can be represented by [`Self`].\n    /// See also [`Self::validate_checkpoint_distance`].\n    const MAX_SUPPORTED_POS: usize = (1 << Self::MAX_SUPPORTED_POS_BITS) - 1;\n\n    // Layout\n    const WILL_INIT_RANGE: Range<usize> = 0..1;\n    const LEN_RANGE: Range<usize> =\n        Self::WILL_INIT_RANGE.end..Self::WILL_INIT_RANGE.end + MAX_SUPPORTED_BLOB_LEN_BITS;\n    const POS_RANGE: Range<usize> =\n        Self::LEN_RANGE.end..Self::LEN_RANGE.end + Self::MAX_SUPPORTED_POS_BITS;\n    const _ASSERT: () = {\n        if Self::POS_RANGE.end != 64 {\n            panic!(\"we don't want undefined bits for our own sanity\")\n        }\n    };\n\n    /// Fails if and only if the offset or length encoded in `arg` is too large to be represented by [`Self`].\n    ///\n    /// The only reason why that can happen in the system is if the [`InMemoryLayer`] grows too long.\n    /// The [`InMemoryLayer`] size is determined by the checkpoint distance, enforced by [`crate::tenant::Timeline::should_roll`].\n    ///\n    /// Thus, to avoid failure of this function, whenever we start up and/or change checkpoint distance,\n    /// call [`Self::validate_checkpoint_distance`] with the new checkpoint distance value.\n    ///\n    /// TODO: this check should happen ideally at config parsing time (and in the request handler when a change to checkpoint distance is requested)\n    /// When cleaning this up, also look into the s3 max file size check that is performed in delta layer writer.\n    #[inline(always)]\n    fn new(arg: IndexEntryNewArgs) -> anyhow::Result<Self> {\n        let IndexEntryNewArgs {\n            base_offset,\n            batch_offset,\n            len,\n            will_init,\n        } = arg;\n\n        let pos = base_offset\n            .checked_add(batch_offset)\n            .ok_or_else(|| anyhow::anyhow!(\"base_offset + batch_offset overflows u64: base_offset={base_offset} batch_offset={batch_offset}\"))?;\n\n        if pos.into_usize() > Self::MAX_SUPPORTED_POS {\n            anyhow::bail!(\n                \"base_offset+batch_offset exceeds the maximum supported value: base_offset={base_offset} batch_offset={batch_offset} (+)={pos} max={max}\",\n                max = Self::MAX_SUPPORTED_POS\n            );\n        }\n\n        if len > MAX_SUPPORTED_BLOB_LEN {\n            anyhow::bail!(\n                \"len exceeds the maximum supported length: len={len} max={MAX_SUPPORTED_BLOB_LEN}\",\n            );\n        }\n\n        let mut data: u64 = 0;\n        use bit_field::BitField;\n        data.set_bits(Self::WILL_INIT_RANGE, if will_init { 1 } else { 0 });\n        data.set_bits(Self::LEN_RANGE, len.into_u64());\n        data.set_bits(Self::POS_RANGE, pos);\n\n        Ok(Self(data))\n    }\n\n    #[inline(always)]\n    fn unpack(&self) -> IndexEntryUnpacked {\n        use bit_field::BitField;\n        IndexEntryUnpacked {\n            will_init: self.0.get_bits(Self::WILL_INIT_RANGE) != 0,\n            len: self.0.get_bits(Self::LEN_RANGE),\n            pos: self.0.get_bits(Self::POS_RANGE),\n        }\n    }\n\n    /// See [`Self::new`].\n    pub(crate) const fn validate_checkpoint_distance(\n        checkpoint_distance: u64,\n    ) -> Result<(), &'static str> {\n        if checkpoint_distance > Self::MAX_SUPPORTED_POS as u64 {\n            return Err(\"exceeds the maximum supported value\");\n        }\n        let res = u64_to_usize(checkpoint_distance).checked_add(MAX_SUPPORTED_BLOB_LEN);\n        if res.is_none() {\n            return Err(\n                \"checkpoint distance + max supported blob len overflows in-memory addition\",\n            );\n        }\n\n        // NB: it is ok for the result of the addition to be larger than MAX_SUPPORTED_POS\n\n        Ok(())\n    }\n\n    const _ASSERT_DEFAULT_CHECKPOINT_DISTANCE_IS_VALID: () = {\n        let res = Self::validate_checkpoint_distance(\n            pageserver_api::config::tenant_conf_defaults::DEFAULT_CHECKPOINT_DISTANCE,\n        );\n        if res.is_err() {\n            panic!(\"default checkpoint distance is valid\")\n        }\n    };\n}\n\n/// Args to [`IndexEntry::new`].\n#[derive(Clone, Copy)]\nstruct IndexEntryNewArgs {\n    base_offset: u64,\n    batch_offset: u64,\n    len: usize,\n    will_init: bool,\n}\n\n/// Unpacked representation of the bitfielded [`IndexEntry`].\n#[derive(Clone, Copy, PartialEq, Eq, Debug)]\nstruct IndexEntryUnpacked {\n    will_init: bool,\n    len: u64,\n    pos: u64,\n}\n\n/// State shared by all in-memory (ephemeral) layers.  Updated infrequently during background ticks in Timeline,\n/// to minimize contention.\n///\n/// This global state is used to implement behaviors that require a global view of the system, e.g.\n/// rolling layers proactively to limit the total amount of dirty data.\npub(crate) struct GlobalResources {\n    // Limit on how high dirty_bytes may grow before we start freezing layers to reduce it.\n    // Zero means unlimited.\n    pub(crate) max_dirty_bytes: AtomicU64,\n    // How many bytes are in all EphemeralFile objects\n    dirty_bytes: AtomicU64,\n    // How many layers are contributing to dirty_bytes\n    dirty_layers: AtomicUsize,\n}\n\n// Per-timeline RAII struct for its contribution to [`GlobalResources`]\npub(crate) struct GlobalResourceUnits {\n    // How many dirty bytes have I added to the global dirty_bytes: this guard object is responsible\n    // for decrementing the global counter by this many bytes when dropped.\n    dirty_bytes: u64,\n}\n\nimpl GlobalResourceUnits {\n    // Hint for the layer append path to update us when the layer size differs from the last\n    // call to update_size by this much.  If we don't reach this threshold, we'll still get\n    // updated when the Timeline \"ticks\" in the background.\n    const MAX_SIZE_DRIFT: u64 = 10 * 1024 * 1024;\n\n    pub(crate) fn new() -> Self {\n        GLOBAL_RESOURCES\n            .dirty_layers\n            .fetch_add(1, AtomicOrdering::Relaxed);\n        Self { dirty_bytes: 0 }\n    }\n\n    /// Do not call this frequently: all timelines will write to these same global atomics,\n    /// so this is a relatively expensive operation.  Wait at least a few seconds between calls.\n    ///\n    /// Returns the effective layer size limit that should be applied, if any, to keep\n    /// the total number of dirty bytes below the configured maximum.\n    pub(crate) fn publish_size(&mut self, size: u64) -> Option<u64> {\n        let new_global_dirty_bytes = match size.cmp(&self.dirty_bytes) {\n            Ordering::Equal => GLOBAL_RESOURCES.dirty_bytes.load(AtomicOrdering::Relaxed),\n            Ordering::Greater => {\n                let delta = size - self.dirty_bytes;\n                let old = GLOBAL_RESOURCES\n                    .dirty_bytes\n                    .fetch_add(delta, AtomicOrdering::Relaxed);\n                old + delta\n            }\n            Ordering::Less => {\n                let delta = self.dirty_bytes - size;\n                let old = GLOBAL_RESOURCES\n                    .dirty_bytes\n                    .fetch_sub(delta, AtomicOrdering::Relaxed);\n                old - delta\n            }\n        };\n\n        // This is a sloppy update: concurrent updates to the counter will race, and the exact\n        // value of the metric might not be the exact latest value of GLOBAL_RESOURCES::dirty_bytes.\n        // That's okay: as long as the metric contains some recent value, it doesn't have to always\n        // be literally the last update.\n        TIMELINE_EPHEMERAL_BYTES.set(new_global_dirty_bytes);\n\n        self.dirty_bytes = size;\n\n        let max_dirty_bytes = GLOBAL_RESOURCES\n            .max_dirty_bytes\n            .load(AtomicOrdering::Relaxed);\n        if max_dirty_bytes > 0 && new_global_dirty_bytes > max_dirty_bytes {\n            // Set the layer file limit to the average layer size: this implies that all above-average\n            // sized layers will be elegible for freezing.  They will be frozen in the order they\n            // next enter publish_size.\n            Some(\n                new_global_dirty_bytes\n                    / GLOBAL_RESOURCES.dirty_layers.load(AtomicOrdering::Relaxed) as u64,\n            )\n        } else {\n            None\n        }\n    }\n\n    // Call publish_size if the input size differs from last published size by more than\n    // the drift limit\n    pub(crate) fn maybe_publish_size(&mut self, size: u64) {\n        let publish = match size.cmp(&self.dirty_bytes) {\n            Ordering::Equal => false,\n            Ordering::Greater => size - self.dirty_bytes > Self::MAX_SIZE_DRIFT,\n            Ordering::Less => self.dirty_bytes - size > Self::MAX_SIZE_DRIFT,\n        };\n\n        if publish {\n            self.publish_size(size);\n        }\n    }\n}\n\nimpl Drop for GlobalResourceUnits {\n    fn drop(&mut self) {\n        GLOBAL_RESOURCES\n            .dirty_layers\n            .fetch_sub(1, AtomicOrdering::Relaxed);\n\n        // Subtract our contribution to the global total dirty bytes\n        self.publish_size(0);\n    }\n}\n\npub(crate) static GLOBAL_RESOURCES: GlobalResources = GlobalResources {\n    max_dirty_bytes: AtomicU64::new(0),\n    dirty_bytes: AtomicU64::new(0),\n    dirty_layers: AtomicUsize::new(0),\n};\n\nimpl InMemoryLayer {\n    pub(crate) fn file_id(&self) -> InMemoryLayerFileId {\n        self.file_id\n    }\n\n    pub(crate) fn get_timeline_id(&self) -> TimelineId {\n        self.timeline_id\n    }\n\n    pub(crate) fn info(&self) -> InMemoryLayerInfo {\n        let lsn_start = self.start_lsn;\n\n        if let Some(&lsn_end) = self.end_lsn.get() {\n            InMemoryLayerInfo::Frozen { lsn_start, lsn_end }\n        } else {\n            InMemoryLayerInfo::Open { lsn_start }\n        }\n    }\n\n    pub(crate) fn len(&self) -> u64 {\n        self.file.len()\n    }\n\n    pub(crate) fn assert_writable(&self) {\n        assert!(self.end_lsn.get().is_none());\n    }\n\n    pub(crate) fn end_lsn_or_max(&self) -> Lsn {\n        self.end_lsn.get().copied().unwrap_or(Lsn::MAX)\n    }\n\n    pub(crate) fn get_lsn_range(&self) -> Range<Lsn> {\n        self.start_lsn..self.end_lsn_or_max()\n    }\n\n    /// debugging function to print out the contents of the layer\n    ///\n    /// this is likely completly unused\n    pub async fn dump(&self, _verbose: bool, _ctx: &RequestContext) -> Result<()> {\n        let end_str = self.end_lsn_or_max();\n\n        println!(\n            \"----- in-memory layer for tli {} LSNs {}-{} ----\",\n            self.timeline_id, self.start_lsn, end_str,\n        );\n\n        Ok(())\n    }\n\n    // Look up the keys in the provided keyspace and update\n    // the reconstruct state with whatever is found.\n    pub async fn get_values_reconstruct_data(\n        self: &Arc<InMemoryLayer>,\n        keyspace: KeySpace,\n        lsn_range: Range<Lsn>,\n        reconstruct_state: &mut ValuesReconstructState,\n        ctx: &RequestContext,\n    ) -> Result<(), GetVectoredError> {\n        let ctx = RequestContextBuilder::from(ctx)\n            .page_content_kind(PageContentKind::InMemoryLayer)\n            .attached_child();\n\n        let index = self.index.read().await;\n\n        struct ValueRead {\n            entry_lsn: Lsn,\n            read: vectored_dio_read::LogicalRead<Vec<u8>>,\n        }\n        let mut reads: HashMap<Key, Vec<ValueRead>> = HashMap::new();\n        let mut ios: HashMap<(Key, Lsn), OnDiskValueIo> = Default::default();\n\n        for range in keyspace.ranges.iter() {\n            for (key, vec_map) in index.range(range.start.to_compact()..range.end.to_compact()) {\n                let key = Key::from_compact(*key);\n                let slice = vec_map.slice_range(lsn_range.clone());\n\n                for (entry_lsn, index_entry) in slice.iter().rev() {\n                    let IndexEntryUnpacked {\n                        pos,\n                        len,\n                        will_init,\n                    } = index_entry.unpack();\n\n                    reads.entry(key).or_default().push(ValueRead {\n                        entry_lsn: *entry_lsn,\n                        read: vectored_dio_read::LogicalRead::new(\n                            pos,\n                            Vec::with_capacity(len as usize),\n                        ),\n                    });\n\n                    let io = reconstruct_state.update_key(&key, *entry_lsn, will_init);\n                    ios.insert((key, *entry_lsn), io);\n\n                    if will_init {\n                        break;\n                    }\n                }\n            }\n        }\n        drop(index); // release the lock before we spawn the IO\n        let read_from = Arc::clone(self);\n        let read_ctx = ctx.attached_child();\n        reconstruct_state\n            .spawn_io(async move {\n                let f = vectored_dio_read::execute(\n                    &read_from.file,\n                    reads\n                        .iter()\n                        .flat_map(|(_, value_reads)| value_reads.iter().map(|v| &v.read)),\n                    &read_ctx,\n                );\n                send_future::SendFuture::send(f) // https://github.com/rust-lang/rust/issues/96865\n                    .await;\n\n                for (key, value_reads) in reads {\n                    for ValueRead { entry_lsn, read } in value_reads {\n                        let io = ios.remove(&(key, entry_lsn)).expect(\"sender must exist\");\n                        match read.into_result().expect(\"we run execute() above\") {\n                            Err(e) => {\n                                io.complete(Err(std::io::Error::new(\n                                    e.kind(),\n                                    \"dio vec read failed\",\n                                )));\n                            }\n                            Ok(value_buf) => {\n                                io.complete(Ok(OnDiskValue::WalRecordOrImage(value_buf.into())));\n                            }\n                        }\n                    }\n                }\n\n                assert!(ios.is_empty());\n\n                // Keep layer existent until this IO is done;\n                // This is kinda forced for InMemoryLayer because we need to inner.read() anyway,\n                // but it's less obvious for DeltaLayer and ImageLayer. So, keep this explicit\n                // drop for consistency among all three layer types.\n                drop(read_from);\n            })\n            .await;\n\n        Ok(())\n    }\n}\n\nfn inmem_layer_display(mut f: impl Write, start_lsn: Lsn, end_lsn: Lsn) -> std::fmt::Result {\n    write!(f, \"inmem-{:016X}-{:016X}\", start_lsn.0, end_lsn.0)\n}\n\nfn inmem_layer_log_display(\n    mut f: impl Write,\n    timeline: TimelineId,\n    start_lsn: Lsn,\n    end_lsn: Lsn,\n) -> std::fmt::Result {\n    write!(f, \"timeline {timeline} in-memory \")?;\n    inmem_layer_display(f, start_lsn, end_lsn)\n}\n\nimpl std::fmt::Display for InMemoryLayer {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        let end_lsn = self.end_lsn_or_max();\n        inmem_layer_display(f, self.start_lsn, end_lsn)\n    }\n}\n\nimpl InMemoryLayer {\n    pub fn estimated_in_mem_size(&self) -> u64 {\n        self.estimated_in_mem_size.load(AtomicOrdering::Relaxed)\n    }\n\n    /// Create a new, empty, in-memory layer\n    pub async fn create(\n        conf: &'static PageServerConf,\n        timeline_id: TimelineId,\n        tenant_shard_id: TenantShardId,\n        start_lsn: Lsn,\n        gate: &utils::sync::gate::Gate,\n        cancel: &CancellationToken,\n        ctx: &RequestContext,\n    ) -> Result<InMemoryLayer> {\n        trace!(\n            \"initializing new empty InMemoryLayer for writing on timeline {timeline_id} at {start_lsn}\"\n        );\n\n        let file =\n            EphemeralFile::create(conf, tenant_shard_id, timeline_id, gate, cancel, ctx).await?;\n        let key = InMemoryLayerFileId(file.page_cache_file_id());\n\n        Ok(InMemoryLayer {\n            file_id: key,\n            frozen_local_path_str: OnceLock::new(),\n            conf,\n            timeline_id,\n            tenant_shard_id,\n            start_lsn,\n            end_lsn: OnceLock::new(),\n            opened_at: Instant::now(),\n            index: RwLock::new(BTreeMap::new()),\n            file,\n            estimated_in_mem_size: AtomicU64::new(0),\n        })\n    }\n\n    /// Write path.\n    ///\n    /// Errors are not retryable, the [`InMemoryLayer`] must be discarded, and not be read from.\n    /// The reason why it's not retryable is that the [`EphemeralFile`] writes are not retryable.\n    ///\n    /// This method shall not be called concurrently. We enforce this property via [`crate::tenant::Timeline::write_lock`].\n    ///\n    /// TODO: it can be made retryable if we aborted the process on EphemeralFile write errors.\n    pub async fn put_batch(\n        &self,\n        serialized_batch: SerializedValueBatch,\n        ctx: &RequestContext,\n    ) -> anyhow::Result<()> {\n        self.assert_writable();\n\n        let base_offset = self.file.len();\n\n        let SerializedValueBatch {\n            raw,\n            metadata,\n            max_lsn: _,\n            len: _,\n        } = serialized_batch;\n\n        // Write the batch to the file\n        self.file.write_raw(&raw, ctx).await?;\n        let new_size = self.file.len();\n\n        let expected_new_len = base_offset\n            .checked_add(raw.len().into_u64())\n            // write_raw would error if we were to overflow u64.\n            // also IndexEntry and higher levels in\n            //the code don't allow the file to grow that large\n            .unwrap();\n        assert_eq!(new_size, expected_new_len);\n\n        // Update the index with the new entries\n        let mut index = self.index.write().await;\n\n        for meta in metadata {\n            let SerializedValueMeta {\n                key,\n                lsn,\n                batch_offset,\n                len,\n                will_init,\n            } = match meta {\n                ValueMeta::Serialized(ser) => ser,\n                ValueMeta::Observed(_) => {\n                    continue;\n                }\n            };\n\n            // Add the base_offset to the batch's index entries which are relative to the batch start.\n            let index_entry = IndexEntry::new(IndexEntryNewArgs {\n                base_offset,\n                batch_offset,\n                len,\n                will_init,\n            })?;\n\n            let vec_map = index.entry(key).or_default();\n            let old = vec_map.append_or_update_last(lsn, index_entry).unwrap().0;\n            if old.is_some() {\n                // This should not break anything, but is unexpected: ingestion code aims to filter out\n                // multiple writes to the same key at the same LSN.  This happens in cases where our\n                // ingenstion code generates some write like an empty page, and we see a write from postgres\n                // to the same key in the same wal record.  If one such write makes it through, we\n                // index the most recent write, implicitly ignoring the earlier write.  We log a warning\n                // because this case is unexpected, and we would like tests to fail if this happens.\n                warn!(\"Key {} at {} written twice at same LSN\", key, lsn);\n            }\n            self.estimated_in_mem_size.fetch_add(\n                (std::mem::size_of::<CompactKey>()\n                    + std::mem::size_of::<Lsn>()\n                    + std::mem::size_of::<IndexEntry>()) as u64,\n                AtomicOrdering::Relaxed,\n            );\n        }\n\n        Ok(())\n    }\n\n    pub(crate) fn get_opened_at(&self) -> Instant {\n        self.opened_at\n    }\n\n    pub(crate) fn tick(&self) -> Option<u64> {\n        self.file.tick()\n    }\n\n    pub(crate) async fn put_tombstones(&self, _key_ranges: &[(Range<Key>, Lsn)]) -> Result<()> {\n        // TODO: Currently, we just leak the storage for any deleted keys\n        Ok(())\n    }\n\n    /// Records the end_lsn for non-dropped layers.\n    /// `end_lsn` is exclusive\n    ///\n    /// A note on locking:\n    /// The current API of [`InMemoryLayer`] does not ensure that there's no ongoing\n    /// writes while freezing the layer. This is enforced at a higher level via\n    /// [`crate::tenant::Timeline::write_lock`]. Freeze might be called via two code paths:\n    /// 1. Via the active [`crate::tenant::timeline::TimelineWriter`]. This holds the\n    ///    Timeline::write_lock for its lifetime. The rolling is handled in\n    ///    [`crate::tenant::timeline::TimelineWriter::put_batch`]. It's a &mut self function\n    ///    so can't be called from different threads.\n    /// 2. In the background via [`crate::tenant::Timeline::maybe_freeze_ephemeral_layer`].\n    ///    This only proceeds if try_lock on Timeline::write_lock succeeds (i.e. there's no active writer),\n    ///    hence there can be no concurrent writes\n    pub async fn freeze(&self, end_lsn: Lsn) {\n        assert!(\n            self.start_lsn < end_lsn,\n            \"{} >= {}\",\n            self.start_lsn,\n            end_lsn\n        );\n        self.end_lsn.set(end_lsn).expect(\"end_lsn set only once\");\n\n        self.frozen_local_path_str\n            .set({\n                let mut buf = String::new();\n                inmem_layer_log_display(&mut buf, self.get_timeline_id(), self.start_lsn, end_lsn)\n                    .unwrap();\n                buf.into()\n            })\n            .expect(\"frozen_local_path_str set only once\");\n\n        #[cfg(debug_assertions)]\n        {\n            let index = self.index.read().await;\n            for vec_map in index.values() {\n                for (lsn, _) in vec_map.as_slice() {\n                    assert!(*lsn < end_lsn);\n                }\n            }\n        }\n    }\n\n    /// Write this frozen in-memory layer to disk. If `key_range` is set, the delta\n    /// layer will only contain the key range the user specifies, and may return `None`\n    /// if there are no matching keys.\n    ///\n    /// Returns a new delta layer with all the same data as this in-memory layer\n    pub async fn write_to_disk(\n        &self,\n        ctx: &RequestContext,\n        key_range: Option<Range<Key>>,\n        l0_flush_global_state: &l0_flush::Inner,\n        gate: &utils::sync::gate::Gate,\n        cancel: CancellationToken,\n    ) -> Result<Option<(PersistentLayerDesc, Utf8PathBuf)>> {\n        let index = self.index.read().await;\n\n        use l0_flush::Inner;\n        let _concurrency_permit = match l0_flush_global_state {\n            Inner::Direct { semaphore, .. } => Some(semaphore.acquire().await),\n        };\n\n        let end_lsn = *self.end_lsn.get().unwrap();\n\n        let key_count = if let Some(key_range) = key_range {\n            let key_range = key_range.start.to_compact()..key_range.end.to_compact();\n\n            index.iter().filter(|(k, _)| key_range.contains(k)).count()\n        } else {\n            index.len()\n        };\n        if key_count == 0 {\n            return Ok(None);\n        }\n\n        let mut delta_layer_writer = DeltaLayerWriter::new(\n            self.conf,\n            self.timeline_id,\n            self.tenant_shard_id,\n            Key::MIN,\n            self.start_lsn..end_lsn,\n            gate,\n            cancel,\n            ctx,\n        )\n        .await?;\n\n        match l0_flush_global_state {\n            l0_flush::Inner::Direct { .. } => {\n                let file_contents = self.file.load_to_io_buf(ctx).await?;\n                let file_contents = file_contents.freeze();\n\n                for (key, vec_map) in index.iter() {\n                    // Write all page versions\n                    for (lsn, entry) in vec_map\n                        .as_slice()\n                        .iter()\n                        .map(|(lsn, entry)| (lsn, entry.unpack()))\n                    {\n                        let IndexEntryUnpacked {\n                            pos,\n                            len,\n                            will_init,\n                        } = entry;\n                        let buf = file_contents.slice(pos as usize..(pos + len) as usize);\n                        let (_buf, res) = delta_layer_writer\n                            .put_value_bytes(\n                                Key::from_compact(*key),\n                                *lsn,\n                                buf.slice_len(),\n                                will_init,\n                                ctx,\n                            )\n                            .await;\n                        res?;\n                    }\n                }\n            }\n        }\n\n        // MAX is used here because we identify L0 layers by full key range\n        let (desc, path) = delta_layer_writer.finish(Key::MAX, ctx).await?;\n\n        // Hold the permit until all the IO is done, including the fsync in `delta_layer_writer.finish()``.\n        //\n        // If we didn't and our caller drops this future, tokio-epoll-uring would extend the lifetime of\n        // the `file_contents: Vec<u8>` until the IO is done, but not the permit's lifetime.\n        // Thus, we'd have more concurrenct `Vec<u8>` in existence than the semaphore allows.\n        //\n        // We hold across the fsync so that on ext4 mounted with data=ordered, all the kernel page cache pages\n        // we dirtied when writing to the filesystem have been flushed and marked !dirty.\n        drop(_concurrency_permit);\n\n        Ok(Some((desc, path)))\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use super::*;\n\n    #[test]\n    fn test_index_entry() {\n        const MAX_SUPPORTED_POS: usize = IndexEntry::MAX_SUPPORTED_POS;\n        use {IndexEntryNewArgs as Args, IndexEntryUnpacked as Unpacked};\n\n        let roundtrip = |args, expect: Unpacked| {\n            let res = IndexEntry::new(args).expect(\"this tests expects no errors\");\n            let IndexEntryUnpacked {\n                will_init,\n                len,\n                pos,\n            } = res.unpack();\n            assert_eq!(will_init, expect.will_init);\n            assert_eq!(len, expect.len);\n            assert_eq!(pos, expect.pos);\n        };\n\n        // basic roundtrip\n        for pos in [0, MAX_SUPPORTED_POS] {\n            for len in [0, MAX_SUPPORTED_BLOB_LEN] {\n                for will_init in [true, false] {\n                    let expect = Unpacked {\n                        will_init,\n                        len: len.into_u64(),\n                        pos: pos.into_u64(),\n                    };\n                    roundtrip(\n                        Args {\n                            will_init,\n                            base_offset: pos.into_u64(),\n                            batch_offset: 0,\n                            len,\n                        },\n                        expect,\n                    );\n                    roundtrip(\n                        Args {\n                            will_init,\n                            base_offset: 0,\n                            batch_offset: pos.into_u64(),\n                            len,\n                        },\n                        expect,\n                    );\n                }\n            }\n        }\n\n        // too-large len\n        let too_large = Args {\n            will_init: false,\n            len: MAX_SUPPORTED_BLOB_LEN + 1,\n            base_offset: 0,\n            batch_offset: 0,\n        };\n        assert!(IndexEntry::new(too_large).is_err());\n\n        // too-large pos\n        {\n            let too_large = Args {\n                will_init: false,\n                len: 0,\n                base_offset: MAX_SUPPORTED_POS.into_u64() + 1,\n                batch_offset: 0,\n            };\n            assert!(IndexEntry::new(too_large).is_err());\n            let too_large = Args {\n                will_init: false,\n                len: 0,\n                base_offset: 0,\n                batch_offset: MAX_SUPPORTED_POS.into_u64() + 1,\n            };\n            assert!(IndexEntry::new(too_large).is_err());\n        }\n\n        // too large (base_offset + batch_offset)\n        {\n            let too_large = Args {\n                will_init: false,\n                len: 0,\n                base_offset: MAX_SUPPORTED_POS.into_u64(),\n                batch_offset: 1,\n            };\n            assert!(IndexEntry::new(too_large).is_err());\n            let too_large = Args {\n                will_init: false,\n                len: 0,\n                base_offset: MAX_SUPPORTED_POS.into_u64() - 1,\n                batch_offset: MAX_SUPPORTED_POS.into_u64() - 1,\n            };\n            assert!(IndexEntry::new(too_large).is_err());\n        }\n\n        // valid special cases\n        // - area past the max supported pos that is accessible by len\n        for len in [1, MAX_SUPPORTED_BLOB_LEN] {\n            roundtrip(\n                Args {\n                    will_init: false,\n                    len,\n                    base_offset: MAX_SUPPORTED_POS.into_u64(),\n                    batch_offset: 0,\n                },\n                Unpacked {\n                    will_init: false,\n                    len: len as u64,\n                    pos: MAX_SUPPORTED_POS.into_u64(),\n                },\n            );\n            roundtrip(\n                Args {\n                    will_init: false,\n                    len,\n                    base_offset: 0,\n                    batch_offset: MAX_SUPPORTED_POS.into_u64(),\n                },\n                Unpacked {\n                    will_init: false,\n                    len: len as u64,\n                    pos: MAX_SUPPORTED_POS.into_u64(),\n                },\n            );\n        }\n    }\n}\n"
  },
  {
    "path": "pageserver/src/tenant/storage_layer/layer/failpoints.rs",
    "content": "//! failpoints for unit tests, implying `#[cfg(test)]`.\n//!\n//! These are not accessible over http.\n\nuse super::*;\n\nimpl Layer {\n    /// Enable a failpoint from a unit test.\n    pub(super) fn enable_failpoint(&self, failpoint: Failpoint) {\n        self.0.failpoints.lock().unwrap().push(failpoint);\n    }\n}\n\nimpl LayerInner {\n    /// Query if this failpoint is enabled, as in, arrive at a failpoint.\n    ///\n    /// Calls to this method need to be `#[cfg(test)]` guarded.\n    pub(super) async fn failpoint(&self, kind: FailpointKind) -> Result<(), FailpointHit> {\n        let fut = {\n            let mut fps = self.failpoints.lock().unwrap();\n            // find the *last* failpoint for cases in which we need to use multiple for the same\n            // thing (two blocked evictions)\n            let fp = fps.iter_mut().rfind(|x| x.kind() == kind);\n\n            let Some(fp) = fp else {\n                return Ok(());\n            };\n\n            fp.hit()\n        };\n\n        fut.await\n    }\n}\n\n#[derive(Debug, PartialEq, Eq)]\npub(crate) enum FailpointKind {\n    /// Failpoint acts as an accurate cancelled by drop here; see the only site of use.\n    AfterDeterminingLayerNeedsNoDownload,\n    /// Failpoint for stalling eviction starting\n    WaitBeforeStartingEvicting,\n    /// Failpoint hit in the spawned task\n    WaitBeforeDownloading,\n}\n\npub(crate) enum Failpoint {\n    AfterDeterminingLayerNeedsNoDownload,\n    WaitBeforeStartingEvicting(\n        Option<utils::completion::Completion>,\n        utils::completion::Barrier,\n    ),\n    WaitBeforeDownloading(\n        Option<utils::completion::Completion>,\n        utils::completion::Barrier,\n    ),\n}\n\nimpl Failpoint {\n    fn kind(&self) -> FailpointKind {\n        match self {\n            Failpoint::AfterDeterminingLayerNeedsNoDownload => {\n                FailpointKind::AfterDeterminingLayerNeedsNoDownload\n            }\n            Failpoint::WaitBeforeStartingEvicting(..) => FailpointKind::WaitBeforeStartingEvicting,\n            Failpoint::WaitBeforeDownloading(..) => FailpointKind::WaitBeforeDownloading,\n        }\n    }\n\n    fn hit(&mut self) -> impl std::future::Future<Output = Result<(), FailpointHit>> + 'static {\n        use futures::future::FutureExt;\n\n        // use boxed futures to avoid Either hurdles\n        match self {\n            Failpoint::AfterDeterminingLayerNeedsNoDownload => {\n                let kind = self.kind();\n\n                async move { Err(FailpointHit(kind)) }.boxed()\n            }\n            Failpoint::WaitBeforeStartingEvicting(arrival, b)\n            | Failpoint::WaitBeforeDownloading(arrival, b) => {\n                // first one signals arrival\n                drop(arrival.take());\n\n                let b = b.clone();\n\n                async move {\n                    tracing::trace!(\"waiting on a failpoint barrier\");\n                    b.wait().await;\n                    tracing::trace!(\"done waiting on a failpoint barrier\");\n                    Ok(())\n                }\n                .boxed()\n            }\n        }\n    }\n}\n\nimpl std::fmt::Display for FailpointKind {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        std::fmt::Debug::fmt(self, f)\n    }\n}\n\n#[derive(Debug)]\npub(crate) struct FailpointHit(FailpointKind);\n\nimpl std::fmt::Display for FailpointHit {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        std::fmt::Debug::fmt(self, f)\n    }\n}\n\nimpl std::error::Error for FailpointHit {}\n\nimpl From<FailpointHit> for DownloadError {\n    fn from(value: FailpointHit) -> Self {\n        DownloadError::Failpoint(value.0)\n    }\n}\n"
  },
  {
    "path": "pageserver/src/tenant/storage_layer/layer/tests.rs",
    "content": "use std::time::UNIX_EPOCH;\n\nuse pageserver_api::key::{CONTROLFILE_KEY, Key};\nuse postgres_ffi::PgMajorVersion;\nuse tokio::task::JoinSet;\nuse utils::completion::{self, Completion};\nuse utils::id::TimelineId;\n\nuse super::failpoints::{Failpoint, FailpointKind};\nuse super::*;\nuse crate::context::DownloadBehavior;\nuse crate::tenant::harness::{TenantHarness, test_img};\nuse crate::tenant::storage_layer::{IoConcurrency, LayerVisibilityHint};\nuse crate::tenant::timeline::layer_manager::LayerManagerLockHolder;\n\n/// Used in tests to advance a future to wanted await point, and not futher.\nconst ADVANCE: std::time::Duration = std::time::Duration::from_secs(3600);\n\n/// Used in tests to indicate forever long timeout; has to be longer than the amount of ADVANCE\n/// timeout uses to advance futures.\nconst FOREVER: std::time::Duration = std::time::Duration::from_secs(ADVANCE.as_secs() * 24 * 7);\n\n/// Demonstrate the API and resident -> evicted -> resident -> deleted transitions.\n#[tokio::test]\nasync fn smoke_test() {\n    let handle = tokio::runtime::Handle::current();\n\n    let h = TenantHarness::create(\"smoke_test\").await.unwrap();\n    let span = h.span();\n    let download_span = span.in_scope(|| tracing::info_span!(\"downloading\", timeline_id = 1));\n    let (tenant, ctx) = h.load().await;\n    let io_concurrency = IoConcurrency::spawn_for_test();\n\n    let image_layers = vec![(\n        Lsn(0x40),\n        vec![(\n            Key::from_hex(\"620000000033333333444444445500000000\").unwrap(),\n            test_img(\"foo\"),\n        )],\n    )];\n\n    // Create a test timeline with one real layer, and one synthetic test layer.  The synthetic\n    // one is only there so that we can GC the real one without leaving the timeline's metadata\n    // empty, which is an illegal state (see [`IndexPart::validate`]).\n    let timeline = tenant\n        .create_test_timeline_with_layers(\n            TimelineId::generate(),\n            Lsn(0x10),\n            PgMajorVersion::PG14,\n            &ctx,\n            Default::default(), // in-memory layers\n            Default::default(),\n            image_layers,\n            Lsn(0x100),\n        )\n        .await\n        .unwrap();\n    let ctx = &ctx.with_scope_timeline(&timeline);\n\n    // Grab one of the timeline's layers to exercise in the test, and the other layer that is just\n    // there to avoid the timeline being illegally empty\n    let (layer, dummy_layer) = {\n        let mut layers = {\n            let layers = timeline.layers.read(LayerManagerLockHolder::Testing).await;\n            layers.likely_resident_layers().cloned().collect::<Vec<_>>()\n        };\n\n        assert_eq!(layers.len(), 2);\n\n        layers.sort_by_key(|l| l.layer_desc().get_key_range().start);\n        let synthetic_layer = layers.pop().unwrap();\n        let real_layer = layers.pop().unwrap();\n        tracing::info!(\n            \"real_layer={:?} ({}), synthetic_layer={:?} ({})\",\n            real_layer,\n            real_layer.layer_desc().file_size,\n            synthetic_layer,\n            synthetic_layer.layer_desc().file_size\n        );\n        (real_layer, synthetic_layer)\n    };\n\n    // all layers created at pageserver are like `layer`, initialized with strong\n    // Arc<DownloadedLayer>.\n\n    let controlfile_keyspace = KeySpace {\n        ranges: vec![CONTROLFILE_KEY..CONTROLFILE_KEY.next()],\n    };\n\n    let img_before = {\n        let mut data = ValuesReconstructState::new(io_concurrency.clone());\n        layer\n            .get_values_reconstruct_data(\n                controlfile_keyspace.clone(),\n                Lsn(0x10)..Lsn(0x11),\n                &mut data,\n                ctx,\n            )\n            .await\n            .unwrap();\n\n        data.keys\n            .remove(&CONTROLFILE_KEY)\n            .expect(\"must be present\")\n            .collect_pending_ios()\n            .await\n            .expect(\"must not error\")\n            .img\n            .take()\n            .expect(\"tenant harness writes the control file\")\n    };\n\n    // important part is evicting the layer, which can be done when there are no more ResidentLayer\n    // instances -- there currently are none, only two `Layer` values, one in the layermap and on\n    // in scope.\n    layer.evict_and_wait(FOREVER).await.unwrap();\n\n    // double-evict returns an error, which is valid if both eviction_task and disk usage based\n    // eviction would both evict the same layer at the same time.\n\n    let e = layer.evict_and_wait(FOREVER).await.unwrap_err();\n    assert!(matches!(e, EvictionError::NotFound));\n\n    let dl_ctx = RequestContextBuilder::from(ctx)\n        .download_behavior(DownloadBehavior::Download)\n        .attached_child();\n\n    // on accesses when the layer is evicted, it will automatically be downloaded.\n    let img_after = {\n        let mut data = ValuesReconstructState::new(io_concurrency.clone());\n        layer\n            .get_values_reconstruct_data(\n                controlfile_keyspace.clone(),\n                Lsn(0x10)..Lsn(0x11),\n                &mut data,\n                &dl_ctx,\n            )\n            .instrument(download_span.clone())\n            .await\n            .unwrap();\n        data.keys\n            .remove(&CONTROLFILE_KEY)\n            .expect(\"must be present\")\n            .collect_pending_ios()\n            .await\n            .expect(\"must not error\")\n            .img\n            .take()\n            .expect(\"tenant harness writes the control file\")\n    };\n\n    assert_eq!(img_before, img_after);\n\n    // evict_and_wait can timeout, but it doesn't cancel the evicting itself\n    //\n    // ZERO for timeout does not work reliably, so first take up all spawn_blocking slots to\n    // artificially slow it down.\n    let helper = SpawnBlockingPoolHelper::consume_all_spawn_blocking_threads(&handle).await;\n\n    match layer\n        .evict_and_wait(std::time::Duration::ZERO)\n        .await\n        .unwrap_err()\n    {\n        EvictionError::Timeout => {\n            // expected, but note that the eviction is \"still ongoing\"\n            helper.release().await;\n            // exhaust spawn_blocking pool to ensure it is now complete\n            SpawnBlockingPoolHelper::consume_and_release_all_of_spawn_blocking_threads(&handle)\n                .await;\n        }\n        other => unreachable!(\"{other:?}\"),\n    }\n\n    // only way to query if a layer is resident is to acquire a ResidentLayer instance.\n    // Layer::keep_resident never downloads, but it might initialize if the layer file is found\n    // downloaded locally.\n    let none = layer.keep_resident().await;\n    assert!(\n        none.is_none(),\n        \"Expected none, because eviction removed the local file, found: {none:?}\"\n    );\n\n    // plain downloading is rarely needed\n    layer\n        .download_and_keep_resident(&dl_ctx)\n        .instrument(download_span)\n        .await\n        .unwrap();\n\n    // last important part is deletion on drop: gc and compaction use it for compacted L0 layers\n    // or fully garbage collected layers. deletion means deleting the local file, and scheduling a\n    // deletion of the already unlinked from index_part.json remote file.\n    //\n    // marking a layer to be deleted on drop is irreversible; there is no technical reason against\n    // reversiblity, but currently it is not needed so it is not provided.\n    layer.delete_on_drop();\n\n    let path = layer.local_path().to_owned();\n\n    // wait_drop produces an unconnected to Layer future which will resolve when the\n    // LayerInner::drop has completed.\n    let mut wait_drop = std::pin::pin!(layer.wait_drop());\n\n    // paused time doesn't really work well with timeouts and evict_and_wait, so delay pausing\n    // until here\n    tokio::time::pause();\n    tokio::time::timeout(ADVANCE, &mut wait_drop)\n        .await\n        .expect_err(\"should had timed out because two strong references exist\");\n\n    tokio::fs::metadata(&path)\n        .await\n        .expect(\"the local layer file still exists\");\n\n    let rtc = &timeline.remote_client;\n\n    // Simulate GC removing our test layer.\n    {\n        let mut g = timeline.layers.write(LayerManagerLockHolder::Testing).await;\n\n        let layers = &[layer];\n        g.open_mut().unwrap().finish_gc_timeline(layers);\n\n        // this just updates the remote_physical_size for demonstration purposes\n        rtc.schedule_gc_update(layers).unwrap();\n    }\n\n    // when strong references are dropped, the file is deleted and remote deletion is scheduled\n    wait_drop.await;\n\n    let e = tokio::fs::metadata(&path)\n        .await\n        .expect_err(\"the local file is deleted\");\n    assert_eq!(e.kind(), std::io::ErrorKind::NotFound);\n\n    rtc.wait_completion().await.unwrap();\n\n    assert_eq!(\n        rtc.get_remote_physical_size(),\n        dummy_layer.metadata().file_size\n    );\n    assert_eq!(0, LAYER_IMPL_METRICS.inits_cancelled.get())\n}\n\n/// This test demonstrates a previous hang when a eviction and deletion were requested at the same\n/// time. Now both of them complete per Arc drop semantics.\n#[tokio::test(start_paused = true)]\nasync fn evict_and_wait_on_wanted_deleted() {\n    // this is the runtime on which Layer spawns the blocking tasks on\n    let handle = tokio::runtime::Handle::current();\n\n    let h = TenantHarness::create(\"evict_and_wait_on_wanted_deleted\")\n        .await\n        .unwrap();\n    utils::logging::replace_panic_hook_with_tracing_panic_hook().forget();\n    let (tenant, ctx) = h.load().await;\n\n    let timeline = tenant\n        .create_test_timeline(\n            TimelineId::generate(),\n            Lsn(0x10),\n            PgMajorVersion::PG14,\n            &ctx,\n        )\n        .await\n        .unwrap();\n\n    let layer = {\n        let mut layers = {\n            let layers = timeline.layers.read(LayerManagerLockHolder::Testing).await;\n            layers.likely_resident_layers().cloned().collect::<Vec<_>>()\n        };\n\n        assert_eq!(layers.len(), 1);\n\n        layers.swap_remove(0)\n    };\n\n    // setup done\n\n    let resident = layer.keep_resident().await.unwrap();\n\n    {\n        let mut evict_and_wait = std::pin::pin!(layer.evict_and_wait(FOREVER));\n\n        // drive the future to await on the status channel\n        tokio::time::timeout(ADVANCE, &mut evict_and_wait)\n            .await\n            .expect_err(\"should had been a timeout since we are holding the layer resident\");\n\n        layer.delete_on_drop();\n\n        drop(resident);\n\n        // make sure the eviction task gets to run\n        SpawnBlockingPoolHelper::consume_and_release_all_of_spawn_blocking_threads(&handle).await;\n\n        let resident = layer.keep_resident().await;\n        assert!(\n            resident.is_none(),\n            \"keep_resident should not have re-initialized: {resident:?}\"\n        );\n\n        evict_and_wait\n            .await\n            .expect(\"evict_and_wait should had succeeded\");\n\n        // works as intended\n    }\n\n    // assert that once we remove the `layer` from the layer map and drop our reference,\n    // the deletion of the layer in remote_storage happens.\n    {\n        let mut layers = timeline.layers.write(LayerManagerLockHolder::Testing).await;\n        layers.open_mut().unwrap().finish_gc_timeline(&[layer]);\n    }\n\n    SpawnBlockingPoolHelper::consume_and_release_all_of_spawn_blocking_threads(&handle).await;\n\n    assert_eq!(1, LAYER_IMPL_METRICS.started_deletes.get());\n    assert_eq!(1, LAYER_IMPL_METRICS.completed_deletes.get());\n    assert_eq!(1, LAYER_IMPL_METRICS.started_evictions.get());\n    assert_eq!(1, LAYER_IMPL_METRICS.completed_evictions.get());\n    assert_eq!(0, LAYER_IMPL_METRICS.inits_cancelled.get())\n}\n\n/// This test ensures we are able to read the layer while the layer eviction has been\n/// started but not completed.\n#[test]\nfn read_wins_pending_eviction() {\n    let rt = tokio::runtime::Builder::new_current_thread()\n        .max_blocking_threads(1)\n        .enable_all()\n        .start_paused(true)\n        .build()\n        .unwrap();\n\n    rt.block_on(async move {\n        // this is the runtime on which Layer spawns the blocking tasks on\n        let handle = tokio::runtime::Handle::current();\n        let h = TenantHarness::create(\"read_wins_pending_eviction\")\n            .await\n            .unwrap();\n        let (tenant, ctx) = h.load().await;\n        let span = h.span();\n        let download_span = span.in_scope(|| tracing::info_span!(\"downloading\", timeline_id = 1));\n\n        let timeline = tenant\n            .create_test_timeline(\n                TimelineId::generate(),\n                Lsn(0x10),\n                PgMajorVersion::PG14,\n                &ctx,\n            )\n            .await\n            .unwrap();\n        let ctx = ctx.with_scope_timeline(&timeline);\n\n        let layer = {\n            let mut layers = {\n                let layers = timeline.layers.read(LayerManagerLockHolder::Testing).await;\n                layers.likely_resident_layers().cloned().collect::<Vec<_>>()\n            };\n\n            assert_eq!(layers.len(), 1);\n\n            layers.swap_remove(0)\n        };\n\n        // setup done\n\n        let resident = layer.keep_resident().await.unwrap();\n\n        let mut evict_and_wait = std::pin::pin!(layer.evict_and_wait(FOREVER));\n\n        // drive the future to await on the status channel\n        tokio::time::timeout(ADVANCE, &mut evict_and_wait)\n            .await\n            .expect_err(\"should had been a timeout since we are holding the layer resident\");\n        assert_eq!(1, LAYER_IMPL_METRICS.started_evictions.get());\n\n        let (completion, barrier) = utils::completion::channel();\n        let (arrival, arrived_at_barrier) = utils::completion::channel();\n        layer.enable_failpoint(Failpoint::WaitBeforeStartingEvicting(\n            Some(arrival),\n            barrier,\n        ));\n\n        // now the eviction cannot proceed because the threads are consumed while completion exists\n        drop(resident);\n        arrived_at_barrier.wait().await;\n        assert!(!layer.is_likely_resident());\n\n        // because no actual eviction happened, we get to just reinitialize the DownloadedLayer\n        layer\n            .0\n            .get_or_maybe_download(false, &ctx)\n            .instrument(download_span)\n            .await\n            .expect(\"should had reinitialized without downloading\");\n\n        assert!(layer.is_likely_resident());\n\n        // reinitialization notifies of new resident status, which should error out all evict_and_wait\n        let e = tokio::time::timeout(ADVANCE, &mut evict_and_wait)\n            .await\n            .expect(\"no timeout, because get_or_maybe_download re-initialized\")\n            .expect_err(\"eviction should not have succeeded because re-initialized\");\n\n        // works as intended: evictions lose to \"downloads\"\n        assert!(matches!(e, EvictionError::Downloaded), \"{e:?}\");\n        assert_eq!(0, LAYER_IMPL_METRICS.completed_evictions.get());\n\n        // this is not wrong: the eviction is technically still \"on the way\" as it's still queued\n        // because of a failpoint\n        assert_eq!(\n            0,\n            LAYER_IMPL_METRICS\n                .cancelled_evictions\n                .values()\n                .map(|ctr| ctr.get())\n                .sum::<u64>()\n        );\n\n        drop(completion);\n\n        tokio::time::sleep(ADVANCE).await;\n        SpawnBlockingPoolHelper::consume_and_release_all_of_spawn_blocking_threads0(&handle, 1)\n            .await;\n\n        assert_eq!(0, LAYER_IMPL_METRICS.completed_evictions.get());\n\n        // now we finally can observe the original eviction failing\n        // it would had been possible to observe it earlier, but here it is guaranteed to have\n        // happened.\n        assert_eq!(\n            1,\n            LAYER_IMPL_METRICS\n                .cancelled_evictions\n                .values()\n                .map(|ctr| ctr.get())\n                .sum::<u64>()\n        );\n\n        assert_eq!(\n            1,\n            LAYER_IMPL_METRICS.cancelled_evictions[EvictionCancelled::AlreadyReinitialized].get()\n        );\n\n        assert_eq!(0, LAYER_IMPL_METRICS.inits_cancelled.get())\n    });\n}\n\n/// Use failpoint to delay an eviction starting to get a VersionCheckFailed.\n#[test]\nfn multiple_pending_evictions_in_order() {\n    let name = \"multiple_pending_evictions_in_order\";\n    let in_order = true;\n    multiple_pending_evictions_scenario(name, in_order);\n}\n\n/// Use failpoint to reorder later eviction before first to get a UnexpectedEvictedState.\n#[test]\nfn multiple_pending_evictions_out_of_order() {\n    let name = \"multiple_pending_evictions_out_of_order\";\n    let in_order = false;\n    multiple_pending_evictions_scenario(name, in_order);\n}\n\nfn multiple_pending_evictions_scenario(name: &'static str, in_order: bool) {\n    let rt = tokio::runtime::Builder::new_current_thread()\n        .max_blocking_threads(1)\n        .enable_all()\n        .start_paused(true)\n        .build()\n        .unwrap();\n\n    rt.block_on(async move {\n        // this is the runtime on which Layer spawns the blocking tasks on\n        let handle = tokio::runtime::Handle::current();\n        let h = TenantHarness::create(name).await.unwrap();\n        let (tenant, ctx) = h.load().await;\n        let span = h.span();\n        let download_span = span.in_scope(|| tracing::info_span!(\"downloading\", timeline_id = 1));\n\n        let timeline = tenant\n            .create_test_timeline(\n                TimelineId::generate(),\n                Lsn(0x10),\n                PgMajorVersion::PG14,\n                &ctx,\n            )\n            .await\n            .unwrap();\n        let ctx = ctx.with_scope_timeline(&timeline);\n\n        let layer = {\n            let mut layers = {\n                let layers = timeline.layers.read(LayerManagerLockHolder::Testing).await;\n                layers.likely_resident_layers().cloned().collect::<Vec<_>>()\n            };\n\n            assert_eq!(layers.len(), 1);\n\n            layers.swap_remove(0)\n        };\n\n        // setup done\n\n        let resident = layer.keep_resident().await.unwrap();\n\n        let mut evict_and_wait = std::pin::pin!(layer.evict_and_wait(FOREVER));\n\n        // drive the future to await on the status channel\n        tokio::time::timeout(ADVANCE, &mut evict_and_wait)\n            .await\n            .expect_err(\"should had been a timeout since we are holding the layer resident\");\n        assert_eq!(1, LAYER_IMPL_METRICS.started_evictions.get());\n\n        let (completion1, barrier) = utils::completion::channel();\n        let mut completion1 = Some(completion1);\n        let (arrival, arrived_at_barrier) = utils::completion::channel();\n        layer.enable_failpoint(Failpoint::WaitBeforeStartingEvicting(\n            Some(arrival),\n            barrier,\n        ));\n\n        // now the eviction cannot proceed because we are simulating arbitrary long delay for the\n        // eviction task start.\n        drop(resident);\n        assert!(!layer.is_likely_resident());\n\n        arrived_at_barrier.wait().await;\n\n        // because no actual eviction happened, we get to just reinitialize the DownloadedLayer\n        layer\n            .0\n            .get_or_maybe_download(false, &ctx)\n            .instrument(download_span)\n            .await\n            .expect(\"should had reinitialized without downloading\");\n\n        assert!(layer.is_likely_resident());\n\n        // reinitialization notifies of new resident status, which should error out all evict_and_wait\n        let e = tokio::time::timeout(ADVANCE, &mut evict_and_wait)\n            .await\n            .expect(\"no timeout, because get_or_maybe_download re-initialized\")\n            .expect_err(\"eviction should not have succeeded because re-initialized\");\n\n        // works as intended: evictions lose to \"downloads\"\n        assert!(matches!(e, EvictionError::Downloaded), \"{e:?}\");\n        assert_eq!(0, LAYER_IMPL_METRICS.completed_evictions.get());\n\n        // this is not wrong: the eviction is technically still \"on the way\" as it's still queued\n        // because of a failpoint\n        assert_eq!(\n            0,\n            LAYER_IMPL_METRICS\n                .cancelled_evictions\n                .values()\n                .map(|ctr| ctr.get())\n                .sum::<u64>()\n        );\n\n        assert_eq!(0, LAYER_IMPL_METRICS.completed_evictions.get());\n\n        // configure another failpoint for the second eviction -- evictions are per initialization,\n        // so now that we've reinitialized the inner, we get to run two of them at the same time.\n        let (completion2, barrier) = utils::completion::channel();\n        let (arrival, arrived_at_barrier) = utils::completion::channel();\n        layer.enable_failpoint(Failpoint::WaitBeforeStartingEvicting(\n            Some(arrival),\n            barrier,\n        ));\n\n        let mut second_eviction = std::pin::pin!(layer.evict_and_wait(FOREVER));\n\n        // advance to the wait on the queue\n        tokio::time::timeout(ADVANCE, &mut second_eviction)\n            .await\n            .expect_err(\"timeout because failpoint is blocking\");\n\n        arrived_at_barrier.wait().await;\n\n        assert_eq!(2, LAYER_IMPL_METRICS.started_evictions.get());\n\n        let mut release_earlier_eviction = |expected_reason| {\n            assert_eq!(\n                0,\n                LAYER_IMPL_METRICS.cancelled_evictions[expected_reason].get(),\n            );\n\n            drop(completion1.take().unwrap());\n\n            let handle = &handle;\n\n            async move {\n                tokio::time::sleep(ADVANCE).await;\n                SpawnBlockingPoolHelper::consume_and_release_all_of_spawn_blocking_threads0(\n                    handle, 1,\n                )\n                .await;\n\n                assert_eq!(\n                    1,\n                    LAYER_IMPL_METRICS.cancelled_evictions[expected_reason].get(),\n                );\n            }\n        };\n\n        if in_order {\n            release_earlier_eviction(EvictionCancelled::VersionCheckFailed).await;\n        }\n\n        // release the later eviction which is for the current version\n        drop(completion2);\n        tokio::time::sleep(ADVANCE).await;\n        SpawnBlockingPoolHelper::consume_and_release_all_of_spawn_blocking_threads0(&handle, 1)\n            .await;\n\n        if !in_order {\n            release_earlier_eviction(EvictionCancelled::UnexpectedEvictedState).await;\n        }\n\n        tokio::time::timeout(ADVANCE, &mut second_eviction)\n            .await\n            .expect(\"eviction goes through now that spawn_blocking is unclogged\")\n            .expect(\"eviction should succeed, because version matches\");\n\n        assert_eq!(1, LAYER_IMPL_METRICS.completed_evictions.get());\n\n        // ensure the cancelled are unchanged\n        assert_eq!(\n            1,\n            LAYER_IMPL_METRICS\n                .cancelled_evictions\n                .values()\n                .map(|ctr| ctr.get())\n                .sum::<u64>()\n        );\n\n        assert_eq!(0, LAYER_IMPL_METRICS.inits_cancelled.get())\n    });\n}\n\n/// The test ensures with a failpoint that a pending eviction is not cancelled by what is currently\n/// a `Layer::keep_resident` call.\n///\n/// This matters because cancelling the eviction would leave us in a state where the file is on\n/// disk but the layer internal state says it has not been initialized. Futhermore, it allows us to\n/// have non-repairing `Layer::is_likely_resident`.\n#[tokio::test(start_paused = true)]\nasync fn cancelled_get_or_maybe_download_does_not_cancel_eviction() {\n    let handle = tokio::runtime::Handle::current();\n    let h = TenantHarness::create(\"cancelled_get_or_maybe_download_does_not_cancel_eviction\")\n        .await\n        .unwrap();\n    let (tenant, ctx) = h.load().await;\n\n    let timeline = tenant\n        .create_test_timeline(\n            TimelineId::generate(),\n            Lsn(0x10),\n            PgMajorVersion::PG14,\n            &ctx,\n        )\n        .await\n        .unwrap();\n    let ctx = ctx.with_scope_timeline(&timeline);\n\n    // This test does downloads\n    let ctx = RequestContextBuilder::from(&ctx)\n        .download_behavior(DownloadBehavior::Download)\n        .attached_child();\n\n    let layer = {\n        let mut layers = {\n            let layers = timeline.layers.read(LayerManagerLockHolder::Testing).await;\n            layers.likely_resident_layers().cloned().collect::<Vec<_>>()\n        };\n\n        assert_eq!(layers.len(), 1);\n\n        layers.swap_remove(0)\n    };\n\n    // this failpoint will simulate the `get_or_maybe_download` becoming cancelled (by returning an\n    // Err) at the right time as in \"during\" the `LayerInner::needs_download`.\n    layer.enable_failpoint(Failpoint::AfterDeterminingLayerNeedsNoDownload);\n\n    let (completion, barrier) = utils::completion::channel();\n    let (arrival, arrived_at_barrier) = utils::completion::channel();\n\n    layer.enable_failpoint(Failpoint::WaitBeforeStartingEvicting(\n        Some(arrival),\n        barrier,\n    ));\n\n    tokio::time::timeout(ADVANCE, layer.evict_and_wait(FOREVER))\n        .await\n        .expect_err(\"should had advanced to waiting on channel\");\n\n    arrived_at_barrier.wait().await;\n\n    // simulate a cancelled read which is cancelled before it gets to re-initialize\n    let e = layer\n        .0\n        .get_or_maybe_download(false, &ctx)\n        .await\n        .unwrap_err();\n    assert!(\n        matches!(\n            e,\n            DownloadError::Failpoint(FailpointKind::AfterDeterminingLayerNeedsNoDownload)\n        ),\n        \"{e:?}\"\n    );\n\n    assert!(\n        layer.0.needs_download().await.unwrap().is_none(),\n        \"file is still on disk\"\n    );\n\n    // release the eviction task\n    drop(completion);\n    tokio::time::sleep(ADVANCE).await;\n    SpawnBlockingPoolHelper::consume_and_release_all_of_spawn_blocking_threads(&handle).await;\n\n    // failpoint is still enabled, but it is not hit\n    let e = layer\n        .0\n        .get_or_maybe_download(false, &ctx)\n        .await\n        .unwrap_err();\n    assert!(matches!(e, DownloadError::DownloadRequired), \"{e:?}\");\n\n    // failpoint is not counted as cancellation either\n    assert_eq!(0, LAYER_IMPL_METRICS.inits_cancelled.get())\n}\n\n#[tokio::test(start_paused = true)]\nasync fn evict_and_wait_does_not_wait_for_download() {\n    // let handle = tokio::runtime::Handle::current();\n    let h = TenantHarness::create(\"evict_and_wait_does_not_wait_for_download\")\n        .await\n        .unwrap();\n    let (tenant, ctx) = h.load().await;\n    let span = h.span();\n    let download_span = span.in_scope(|| tracing::info_span!(\"downloading\", timeline_id = 1));\n\n    let timeline = tenant\n        .create_test_timeline(\n            TimelineId::generate(),\n            Lsn(0x10),\n            PgMajorVersion::PG14,\n            &ctx,\n        )\n        .await\n        .unwrap();\n    let ctx = ctx.with_scope_timeline(&timeline);\n\n    // This test does downloads\n    let ctx = RequestContextBuilder::from(&ctx)\n        .download_behavior(DownloadBehavior::Download)\n        .attached_child();\n\n    let layer = {\n        let mut layers = {\n            let layers = timeline.layers.read(LayerManagerLockHolder::Testing).await;\n            layers.likely_resident_layers().cloned().collect::<Vec<_>>()\n        };\n\n        assert_eq!(layers.len(), 1);\n\n        layers.swap_remove(0)\n    };\n\n    // kind of forced setup: start an eviction but do not allow it progress until we are\n    // downloading\n    let (eviction_can_continue, barrier) = utils::completion::channel();\n    let (arrival, eviction_arrived) = utils::completion::channel();\n    layer.enable_failpoint(Failpoint::WaitBeforeStartingEvicting(\n        Some(arrival),\n        barrier,\n    ));\n\n    let mut evict_and_wait = std::pin::pin!(layer.evict_and_wait(FOREVER));\n\n    // use this once-awaited other_evict to synchronize with the eviction\n    let other_evict = layer.evict_and_wait(FOREVER);\n\n    tokio::time::timeout(ADVANCE, &mut evict_and_wait)\n        .await\n        .expect_err(\"should had advanced\");\n    eviction_arrived.wait().await;\n    drop(eviction_can_continue);\n    other_evict.await.unwrap();\n\n    // now the layer is evicted, and the \"evict_and_wait\" is waiting on the receiver\n    assert!(!layer.is_likely_resident());\n\n    // following new evict_and_wait will fail until we've completed the download\n    let e = layer.evict_and_wait(FOREVER).await.unwrap_err();\n    assert!(matches!(e, EvictionError::NotFound), \"{e:?}\");\n\n    let (download_can_continue, barrier) = utils::completion::channel();\n    let (arrival, _download_arrived) = utils::completion::channel();\n    layer.enable_failpoint(Failpoint::WaitBeforeDownloading(Some(arrival), barrier));\n\n    let mut download = std::pin::pin!(\n        layer\n            .0\n            .get_or_maybe_download(true, &ctx)\n            .instrument(download_span)\n    );\n\n    assert!(\n        !layer.is_likely_resident(),\n        \"during download layer is evicted\"\n    );\n\n    tokio::time::timeout(ADVANCE, &mut download)\n        .await\n        .expect_err(\"should had timed out because of failpoint\");\n\n    // now we finally get to continue, and because the latest state is downloading, we deduce that\n    // original eviction succeeded\n    evict_and_wait.await.unwrap();\n\n    // however a new evict_and_wait will fail\n    let e = layer.evict_and_wait(FOREVER).await.unwrap_err();\n    assert!(matches!(e, EvictionError::NotFound), \"{e:?}\");\n\n    assert!(!layer.is_likely_resident());\n\n    drop(download_can_continue);\n    download.await.expect(\"download should had succeeded\");\n    assert!(layer.is_likely_resident());\n\n    // only now can we evict\n    layer.evict_and_wait(FOREVER).await.unwrap();\n}\n\n/// Asserts that there is no miscalculation when Layer is dropped while it is being kept resident,\n/// which is the last value.\n///\n/// Also checks that the same does not happen on a non-evicted layer (regression test).\n#[tokio::test(start_paused = true)]\nasync fn eviction_cancellation_on_drop() {\n    use bytes::Bytes;\n    use wal_decoder::models::value::Value;\n\n    // this is the runtime on which Layer spawns the blocking tasks on\n    let handle = tokio::runtime::Handle::current();\n\n    let h = TenantHarness::create(\"eviction_cancellation_on_drop\")\n        .await\n        .unwrap();\n    utils::logging::replace_panic_hook_with_tracing_panic_hook().forget();\n    let (tenant, ctx) = h.load().await;\n\n    let timeline = tenant\n        .create_test_timeline(\n            TimelineId::generate(),\n            Lsn(0x10),\n            PgMajorVersion::PG14,\n            &ctx,\n        )\n        .await\n        .unwrap();\n\n    {\n        // create_test_timeline wrote us one layer, write another\n        let mut writer = timeline.writer().await;\n        writer\n            .put(\n                pageserver_api::key::Key::from_i128(5),\n                Lsn(0x20),\n                &Value::Image(Bytes::from_static(b\"this does not matter either\")),\n                &ctx,\n            )\n            .await\n            .unwrap();\n\n        writer.finish_write(Lsn(0x20));\n    }\n\n    timeline.freeze_and_flush().await.unwrap();\n\n    // wait for the upload to complete so our Arc::strong_count assertion holds\n    timeline.remote_client.wait_completion().await.unwrap();\n\n    let (evicted_layer, not_evicted) = {\n        let mut layers = {\n            let mut guard = timeline.layers.write(LayerManagerLockHolder::Testing).await;\n            let layers = guard.likely_resident_layers().cloned().collect::<Vec<_>>();\n            // remove the layers from layermap\n            guard.open_mut().unwrap().finish_gc_timeline(&layers);\n\n            layers\n        };\n\n        assert_eq!(layers.len(), 2);\n\n        (layers.pop().unwrap(), layers.pop().unwrap())\n    };\n\n    let victims = [(evicted_layer, true), (not_evicted, false)];\n\n    for (victim, evict) in victims {\n        let resident = victim.keep_resident().await.unwrap();\n        drop(victim);\n\n        assert_eq!(Arc::strong_count(&resident.owner.0), 1);\n\n        if evict {\n            let evict_and_wait = resident.owner.evict_and_wait(FOREVER);\n\n            // drive the future to await on the status channel, and then drop it\n            tokio::time::timeout(ADVANCE, evict_and_wait)\n                .await\n                .expect_err(\"should had been a timeout since we are holding the layer resident\");\n        }\n\n        // 1 == we only evict one of the layers\n        assert_eq!(1, LAYER_IMPL_METRICS.started_evictions.get());\n\n        drop(resident);\n\n        // run any spawned\n        tokio::time::sleep(ADVANCE).await;\n\n        SpawnBlockingPoolHelper::consume_and_release_all_of_spawn_blocking_threads(&handle).await;\n\n        assert_eq!(\n            1,\n            LAYER_IMPL_METRICS.cancelled_evictions[EvictionCancelled::LayerGone].get()\n        );\n    }\n}\n\n/// A test case to remind you the cost of these structures. You can bump the size limit\n/// below if it is really necessary to add more fields to the structures.\n#[test]\n#[cfg(target_arch = \"x86_64\")]\nfn layer_size() {\n    assert_eq!(size_of::<LayerAccessStats>(), 8);\n    assert_eq!(size_of::<PersistentLayerDesc>(), 104);\n    assert_eq!(size_of::<LayerInner>(), 296);\n    // it also has the utf8 path\n}\n\nstruct SpawnBlockingPoolHelper {\n    awaited_by_spawn_blocking_tasks: Completion,\n    blocking_tasks: JoinSet<()>,\n}\n\nimpl SpawnBlockingPoolHelper {\n    /// All `crate::task_mgr::BACKGROUND_RUNTIME` spawn_blocking threads will be consumed until\n    /// release is called.\n    ///\n    /// In the tests this can be used to ensure something cannot be started on the target runtimes\n    /// spawn_blocking pool.\n    ///\n    /// This should be no issue nowdays, because nextest runs each test in it's own process.\n    async fn consume_all_spawn_blocking_threads(handle: &tokio::runtime::Handle) -> Self {\n        let default_max_blocking_threads = 512;\n\n        Self::consume_all_spawn_blocking_threads0(handle, default_max_blocking_threads).await\n    }\n\n    async fn consume_all_spawn_blocking_threads0(\n        handle: &tokio::runtime::Handle,\n        threads: usize,\n    ) -> Self {\n        assert_ne!(threads, 0);\n\n        let (completion, barrier) = completion::channel();\n        let (started, starts_completed) = completion::channel();\n\n        let mut blocking_tasks = JoinSet::new();\n\n        for _ in 0..threads {\n            let barrier = barrier.clone();\n            let started = started.clone();\n            blocking_tasks.spawn_blocking_on(\n                move || {\n                    drop(started);\n                    tokio::runtime::Handle::current().block_on(barrier.wait());\n                },\n                handle,\n            );\n        }\n\n        drop(started);\n\n        starts_completed.wait().await;\n\n        drop(barrier);\n\n        tracing::trace!(\"consumed all threads\");\n\n        SpawnBlockingPoolHelper {\n            awaited_by_spawn_blocking_tasks: completion,\n            blocking_tasks,\n        }\n    }\n\n    /// Release all previously blocked spawn_blocking threads\n    async fn release(self) {\n        let SpawnBlockingPoolHelper {\n            awaited_by_spawn_blocking_tasks,\n            mut blocking_tasks,\n        } = self;\n\n        drop(awaited_by_spawn_blocking_tasks);\n\n        while let Some(res) = blocking_tasks.join_next().await {\n            res.expect(\"none of the tasks should had panicked\");\n        }\n\n        tracing::trace!(\"released all threads\");\n    }\n\n    /// In the tests it is used as an easy way of making sure something scheduled on the target\n    /// runtimes `spawn_blocking` has completed, because it must've been scheduled and completed\n    /// before our tasks have a chance to schedule and complete.\n    async fn consume_and_release_all_of_spawn_blocking_threads(handle: &tokio::runtime::Handle) {\n        Self::consume_and_release_all_of_spawn_blocking_threads0(handle, 512).await\n    }\n\n    async fn consume_and_release_all_of_spawn_blocking_threads0(\n        handle: &tokio::runtime::Handle,\n        threads: usize,\n    ) {\n        Self::consume_all_spawn_blocking_threads0(handle, threads)\n            .await\n            .release()\n            .await\n    }\n}\n\n#[test]\nfn spawn_blocking_pool_helper_actually_works() {\n    // create a custom runtime for which we know and control how many blocking threads it has\n    //\n    // because the amount is not configurable for our helper, expect the same amount as\n    // BACKGROUND_RUNTIME using the tokio defaults would have.\n    let rt = tokio::runtime::Builder::new_current_thread()\n        .max_blocking_threads(1)\n        .enable_all()\n        .build()\n        .unwrap();\n\n    let handle = rt.handle();\n\n    rt.block_on(async move {\n        // this will not return until all threads are spun up and actually executing the code\n        // waiting on `consumed` to be `SpawnBlockingPoolHelper::release`'d.\n        let consumed =\n            SpawnBlockingPoolHelper::consume_all_spawn_blocking_threads0(handle, 1).await;\n\n        println!(\"consumed\");\n\n        let mut jh = std::pin::pin!(tokio::task::spawn_blocking(move || {\n            // this will not get to run before we release\n        }));\n\n        println!(\"spawned\");\n\n        tokio::time::timeout(std::time::Duration::from_secs(1), &mut jh)\n            .await\n            .expect_err(\"the task should not have gotten to run yet\");\n\n        println!(\"tried to join\");\n\n        consumed.release().await;\n\n        println!(\"released\");\n\n        tokio::time::timeout(std::time::Duration::from_secs(1), jh)\n            .await\n            .expect(\"no timeout\")\n            .expect(\"no join error\");\n\n        println!(\"joined\");\n    });\n}\n\n/// Drop the low bits from a time, to emulate the precision loss in LayerAccessStats\nfn lowres_time(hires: SystemTime) -> SystemTime {\n    let ts = hires.duration_since(UNIX_EPOCH).unwrap().as_secs();\n    UNIX_EPOCH + Duration::from_secs(ts)\n}\n\n#[test]\nfn access_stats() {\n    let access_stats = LayerAccessStats::default();\n    // Default is visible\n    assert_eq!(access_stats.visibility(), LayerVisibilityHint::Visible);\n\n    access_stats.set_visibility(LayerVisibilityHint::Covered);\n    assert_eq!(access_stats.visibility(), LayerVisibilityHint::Covered);\n    access_stats.set_visibility(LayerVisibilityHint::Visible);\n    assert_eq!(access_stats.visibility(), LayerVisibilityHint::Visible);\n\n    let rtime = UNIX_EPOCH + Duration::from_secs(2000000000);\n    access_stats.record_residence_event_at(rtime);\n    assert_eq!(access_stats.latest_activity(), lowres_time(rtime));\n\n    let atime = UNIX_EPOCH + Duration::from_secs(2100000000);\n    access_stats.record_access_at(atime);\n    assert_eq!(access_stats.latest_activity(), lowres_time(atime));\n\n    // Setting visibility doesn't clobber access time\n    access_stats.set_visibility(LayerVisibilityHint::Covered);\n    assert_eq!(access_stats.latest_activity(), lowres_time(atime));\n    access_stats.set_visibility(LayerVisibilityHint::Visible);\n    assert_eq!(access_stats.latest_activity(), lowres_time(atime));\n\n    // Recording access implicitly makes layer visible, if it wasn't already\n    let atime = UNIX_EPOCH + Duration::from_secs(2200000000);\n    access_stats.set_visibility(LayerVisibilityHint::Covered);\n    assert_eq!(access_stats.visibility(), LayerVisibilityHint::Covered);\n    assert!(access_stats.record_access_at(atime));\n    access_stats.set_visibility(LayerVisibilityHint::Visible);\n    assert!(!access_stats.record_access_at(atime));\n    access_stats.set_visibility(LayerVisibilityHint::Visible);\n}\n\n#[test]\nfn access_stats_2038() {\n    // The access stats structure uses a timestamp representation that will run out\n    // of bits in 2038.  One year before that, this unit test will start failing.\n\n    let one_year_from_now = SystemTime::now().duration_since(UNIX_EPOCH).unwrap()\n        + Duration::from_secs(3600 * 24 * 365);\n\n    assert!(one_year_from_now.as_secs() < (2 << 31));\n}\n"
  },
  {
    "path": "pageserver/src/tenant/storage_layer/layer.rs",
    "content": "use std::ops::Range;\nuse std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};\nuse std::sync::{Arc, Weak};\nuse std::time::{Duration, SystemTime};\n\nuse crate::PERF_TRACE_TARGET;\nuse crate::metrics::{ONDEMAND_DOWNLOAD_BYTES, ONDEMAND_DOWNLOAD_COUNT};\nuse anyhow::Context;\nuse camino::{Utf8Path, Utf8PathBuf};\nuse pageserver_api::keyspace::KeySpace;\nuse pageserver_api::models::HistoricLayerInfo;\nuse pageserver_api::shard::{ShardIdentity, ShardIndex, TenantShardId};\nuse tracing::{Instrument, info_span};\nuse utils::generation::Generation;\nuse utils::id::TimelineId;\nuse utils::lsn::Lsn;\nuse utils::sync::{gate, heavier_once_cell};\n\nuse super::delta_layer::{self};\nuse super::image_layer::{self};\nuse super::{\n    AsLayerDesc, ImageLayerWriter, LayerAccessStats, LayerAccessStatsReset, LayerName,\n    LayerVisibilityHint, PerfInstrumentFutureExt, PersistentLayerDesc, ValuesReconstructState,\n};\nuse crate::config::PageServerConf;\nuse crate::context::{RequestContext, RequestContextBuilder};\nuse crate::span::debug_assert_current_span_has_tenant_and_timeline_id;\nuse crate::task_mgr::TaskKind;\nuse crate::tenant::Timeline;\nuse crate::tenant::remote_timeline_client::LayerFileMetadata;\nuse crate::tenant::timeline::{CompactionError, GetVectoredError};\n\n#[cfg(test)]\nmod tests;\n\n#[cfg(test)]\nmod failpoints;\n\npub const S3_UPLOAD_LIMIT: u64 = 4_500_000_000;\n\n/// A Layer contains all data in a \"rectangle\" consisting of a range of keys and\n/// range of LSNs.\n///\n/// There are two kinds of layers, in-memory and on-disk layers. In-memory\n/// layers are used to ingest incoming WAL, and provide fast access to the\n/// recent page versions. On-disk layers are stored as files on disk, and are\n/// immutable. This type represents the on-disk kind while in-memory kind are represented by\n/// [`InMemoryLayer`].\n///\n/// Furthermore, there are two kinds of on-disk layers: delta and image layers.\n/// A delta layer contains all modifications within a range of LSNs and keys.\n/// An image layer is a snapshot of all the data in a key-range, at a single\n/// LSN.\n///\n/// This type models the on-disk layers, which can be evicted and on-demand downloaded. As a\n/// general goal, read accesses should always win eviction and eviction should not wait for\n/// download.\n///\n/// ### State transitions\n///\n/// The internal state of `Layer` is composed of most importantly the on-filesystem state and the\n/// [`ResidentOrWantedEvicted`] enum. On-filesystem state can be either present (fully downloaded,\n/// right size) or deleted.\n///\n/// Reads will always win requests to evict until `wait_for_turn_and_evict` has acquired the\n/// `heavier_once_cell::InitPermit` and has started to `evict_blocking`. Before the\n/// `heavier_once_cell::InitPermit` has been acquired, any read request\n/// (`get_or_maybe_download`) can \"re-initialize\" using the existing downloaded file and thus\n/// cancelling the eviction.\n///\n/// ```text\n///  +-----------------+   get_or_maybe_download    +--------------------------------+\n///  | not initialized |--------------------------->| Resident(Arc<DownloadedLayer>) |\n///  |     ENOENT      |                         /->|                                |\n///  +-----------------+                         |  +--------------------------------+\n///                  ^                           |                         |       ^\n///                  |    get_or_maybe_download  |                         |       | get_or_maybe_download, either:\n///   evict_blocking | /-------------------------/                         |       | - upgrade weak to strong\n///                  | |                                                   |       | - re-initialize without download\n///                  | |                                    evict_and_wait |       |\n///  +-----------------+                                                   v       |\n///  | not initialized |  on_downloaded_layer_drop  +--------------------------------------+\n///  | file is present |<---------------------------| WantedEvicted(Weak<DownloadedLayer>) |\n///  +-----------------+                            +--------------------------------------+\n/// ```\n///\n/// ### Unsupported\n///\n/// - Evicting by the operator deleting files from the filesystem\n///\n/// [`InMemoryLayer`]: super::inmemory_layer::InMemoryLayer\n#[derive(Clone)]\npub(crate) struct Layer(Arc<LayerInner>);\n\nimpl std::fmt::Display for Layer {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        write!(\n            f,\n            \"{}{}\",\n            self.layer_desc().short_id(),\n            self.0.generation.get_suffix()\n        )\n    }\n}\n\nimpl std::fmt::Debug for Layer {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        write!(f, \"{self}\")\n    }\n}\n\nimpl AsLayerDesc for Layer {\n    fn layer_desc(&self) -> &PersistentLayerDesc {\n        self.0.layer_desc()\n    }\n}\n\nimpl PartialEq for Layer {\n    fn eq(&self, other: &Self) -> bool {\n        Arc::as_ptr(&self.0) == Arc::as_ptr(&other.0)\n    }\n}\n\npub(crate) fn local_layer_path(\n    conf: &PageServerConf,\n    tenant_shard_id: &TenantShardId,\n    timeline_id: &TimelineId,\n    layer_file_name: &LayerName,\n    generation: &Generation,\n) -> Utf8PathBuf {\n    let timeline_path = conf.timeline_path(tenant_shard_id, timeline_id);\n\n    if generation.is_none() {\n        // Without a generation, we may only use legacy path style\n        timeline_path.join(layer_file_name.to_string())\n    } else {\n        timeline_path.join(format!(\"{}-v1{}\", layer_file_name, generation.get_suffix()))\n    }\n}\n\npub(crate) enum LastEviction {\n    Never,\n    At(std::time::Instant),\n    Evicting,\n}\n\nimpl LastEviction {\n    pub(crate) fn happened_after(&self, timepoint: std::time::Instant) -> bool {\n        match self {\n            LastEviction::Never => false,\n            LastEviction::At(evicted_at) => evicted_at > &timepoint,\n            LastEviction::Evicting => true,\n        }\n    }\n}\n\nimpl Layer {\n    /// Creates a layer value for a file we know to not be resident.\n    pub(crate) fn for_evicted(\n        conf: &'static PageServerConf,\n        timeline: &Arc<Timeline>,\n        file_name: LayerName,\n        metadata: LayerFileMetadata,\n    ) -> Self {\n        let local_path = local_layer_path(\n            conf,\n            &timeline.tenant_shard_id,\n            &timeline.timeline_id,\n            &file_name,\n            &metadata.generation,\n        );\n\n        let desc = PersistentLayerDesc::from_filename(\n            timeline.tenant_shard_id,\n            timeline.timeline_id,\n            file_name,\n            metadata.file_size,\n        );\n\n        let owner = Layer(Arc::new(LayerInner::new(\n            conf,\n            timeline,\n            local_path,\n            desc,\n            None,\n            metadata.generation,\n            metadata.shard,\n        )));\n\n        debug_assert!(owner.0.needs_download_blocking().unwrap().is_some());\n\n        owner\n    }\n\n    /// Creates a Layer value for a file we know to be resident in timeline directory.\n    pub(crate) fn for_resident(\n        conf: &'static PageServerConf,\n        timeline: &Arc<Timeline>,\n        local_path: Utf8PathBuf,\n        file_name: LayerName,\n        metadata: LayerFileMetadata,\n    ) -> ResidentLayer {\n        let desc = PersistentLayerDesc::from_filename(\n            timeline.tenant_shard_id,\n            timeline.timeline_id,\n            file_name,\n            metadata.file_size,\n        );\n\n        let mut resident = None;\n\n        let owner = Layer(Arc::new_cyclic(|owner| {\n            let inner = Arc::new(DownloadedLayer {\n                owner: owner.clone(),\n                kind: tokio::sync::OnceCell::default(),\n                version: 0,\n            });\n            resident = Some(inner.clone());\n\n            LayerInner::new(\n                conf,\n                timeline,\n                local_path,\n                desc,\n                Some(inner),\n                metadata.generation,\n                metadata.shard,\n            )\n        }));\n\n        let downloaded = resident.expect(\"just initialized\");\n\n        debug_assert!(owner.0.needs_download_blocking().unwrap().is_none());\n\n        timeline\n            .metrics\n            .resident_physical_size_add(metadata.file_size);\n\n        ResidentLayer { downloaded, owner }\n    }\n\n    /// Creates a Layer value for freshly written out new layer file by renaming it from a\n    /// temporary path.\n    pub(crate) fn finish_creating(\n        conf: &'static PageServerConf,\n        timeline: &Arc<Timeline>,\n        desc: PersistentLayerDesc,\n        temp_path: &Utf8Path,\n    ) -> anyhow::Result<ResidentLayer> {\n        let mut resident = None;\n\n        let owner = Layer(Arc::new_cyclic(|owner| {\n            let inner = Arc::new(DownloadedLayer {\n                owner: owner.clone(),\n                kind: tokio::sync::OnceCell::default(),\n                version: 0,\n            });\n            resident = Some(inner.clone());\n\n            let local_path = local_layer_path(\n                conf,\n                &timeline.tenant_shard_id,\n                &timeline.timeline_id,\n                &desc.layer_name(),\n                &timeline.generation,\n            );\n\n            LayerInner::new(\n                conf,\n                timeline,\n                local_path,\n                desc,\n                Some(inner),\n                timeline.generation,\n                timeline.get_shard_index(),\n            )\n        }));\n\n        let downloaded = resident.expect(\"just initialized\");\n\n        // We never want to overwrite an existing file, so we use `RENAME_NOREPLACE`.\n        // TODO: this leaves the temp file in place if the rename fails, risking us running\n        // out of space. Should we clean it up here or does the calling context deal with this?\n        utils::fs_ext::rename_noreplace(temp_path.as_std_path(), owner.local_path().as_std_path())\n            .with_context(|| format!(\"rename temporary file as correct path for {owner}\"))?;\n\n        Ok(ResidentLayer { downloaded, owner })\n    }\n\n    /// Requests the layer to be evicted and waits for this to be done.\n    ///\n    /// If the file is not resident, an [`EvictionError::NotFound`] is returned.\n    ///\n    /// If for a bad luck or blocking of the executor, we miss the actual eviction and the layer is\n    /// re-downloaded, [`EvictionError::Downloaded`] is returned.\n    ///\n    /// Timeout is mandatory, because waiting for eviction is only needed for our tests; eviction\n    /// will happen regardless the future returned by this method completing unless there is a\n    /// read access before eviction gets to complete.\n    ///\n    /// Technically cancellation safe, but cancelling might shift the viewpoint of what generation\n    /// of download-evict cycle on retry.\n    pub(crate) async fn evict_and_wait(&self, timeout: Duration) -> Result<(), EvictionError> {\n        self.0.evict_and_wait(timeout).await\n    }\n\n    /// Delete the layer file when the `self` gets dropped, also try to schedule a remote index upload\n    /// then.\n    ///\n    /// On drop, this will cause a call to [`crate::tenant::remote_timeline_client::RemoteTimelineClient::schedule_deletion_of_unlinked`].\n    /// This means that the unlinking by [gc] or [compaction] must have happened strictly before\n    /// the value this is called on gets dropped.\n    ///\n    /// This is ensured by both of those methods accepting references to Layer.\n    ///\n    /// [gc]: [`RemoteTimelineClient::schedule_gc_update`]\n    /// [compaction]: [`RemoteTimelineClient::schedule_compaction_update`]\n    pub(crate) fn delete_on_drop(&self) {\n        self.0.delete_on_drop();\n    }\n\n    pub(crate) async fn get_values_reconstruct_data(\n        &self,\n        keyspace: KeySpace,\n        lsn_range: Range<Lsn>,\n        reconstruct_data: &mut ValuesReconstructState,\n        ctx: &RequestContext,\n    ) -> Result<(), GetVectoredError> {\n        let downloaded = {\n            let ctx = RequestContextBuilder::from(ctx)\n                .perf_span(|crnt_perf_span| {\n                    info_span!(\n                        target: PERF_TRACE_TARGET,\n                        parent: crnt_perf_span,\n                        \"GET_LAYER\",\n                    )\n                })\n                .attached_child();\n\n            self.0\n                .get_or_maybe_download(true, &ctx)\n                .maybe_perf_instrument(&ctx, |crnt_perf_context| crnt_perf_context.clone())\n                .await\n                .map_err(|err| match err {\n                    DownloadError::TimelineShutdown | DownloadError::DownloadCancelled => {\n                        GetVectoredError::Cancelled\n                    }\n                    other => GetVectoredError::Other(anyhow::anyhow!(other)),\n                })?\n        };\n\n        let this = ResidentLayer {\n            downloaded: downloaded.clone(),\n            owner: self.clone(),\n        };\n\n        self.record_access(ctx);\n\n        let ctx = RequestContextBuilder::from(ctx)\n            .perf_span(|crnt_perf_span| {\n                info_span!(\n                    target: PERF_TRACE_TARGET,\n                    parent: crnt_perf_span,\n                    \"VISIT_LAYER\",\n                )\n            })\n            .attached_child();\n\n        downloaded\n            .get_values_reconstruct_data(this, keyspace, lsn_range, reconstruct_data, &ctx)\n            .instrument(tracing::debug_span!(\"get_values_reconstruct_data\", layer=%self))\n            .maybe_perf_instrument(&ctx, |crnt_perf_span| crnt_perf_span.clone())\n            .await\n            .map_err(|err| match err {\n                GetVectoredError::Other(err) => GetVectoredError::Other(\n                    err.context(format!(\"get_values_reconstruct_data for layer {self}\")),\n                ),\n                err => err,\n            })\n    }\n\n    /// Download the layer if evicted.\n    ///\n    /// Will not error when the layer is already downloaded.\n    pub(crate) async fn download(&self, ctx: &RequestContext) -> Result<(), DownloadError> {\n        self.0.get_or_maybe_download(true, ctx).await?;\n        Ok(())\n    }\n\n    pub(crate) async fn needs_download(&self) -> Result<Option<NeedsDownload>, std::io::Error> {\n        self.0.needs_download().await\n    }\n\n    /// Assuming the layer is already downloaded, returns a guard which will prohibit eviction\n    /// while the guard exists.\n    ///\n    /// Returns None if the layer is currently evicted or becoming evicted.\n    pub(crate) async fn keep_resident(&self) -> Option<ResidentLayer> {\n        let downloaded = self.0.inner.get().and_then(|rowe| rowe.get())?;\n\n        Some(ResidentLayer {\n            downloaded,\n            owner: self.clone(),\n        })\n    }\n\n    /// Weak indicator of is the layer resident or not. Good enough for eviction, which can deal\n    /// with `EvictionError::NotFound`.\n    ///\n    /// Returns `true` if this layer might be resident, or `false`, if it most likely evicted or\n    /// will be unless a read happens soon.\n    pub(crate) fn is_likely_resident(&self) -> bool {\n        self.0\n            .inner\n            .get()\n            .map(|rowe| rowe.is_likely_resident())\n            .unwrap_or(false)\n    }\n\n    /// Downloads if necessary and creates a guard, which will keep this layer from being evicted.\n    pub(crate) async fn download_and_keep_resident(\n        &self,\n        ctx: &RequestContext,\n    ) -> Result<ResidentLayer, DownloadError> {\n        let downloaded = self.0.get_or_maybe_download(true, ctx).await?;\n\n        Ok(ResidentLayer {\n            downloaded,\n            owner: self.clone(),\n        })\n    }\n\n    pub(crate) fn info(&self, reset: LayerAccessStatsReset) -> HistoricLayerInfo {\n        self.0.info(reset)\n    }\n\n    pub(crate) fn latest_activity(&self) -> SystemTime {\n        self.0.access_stats.latest_activity()\n    }\n\n    pub(crate) fn visibility(&self) -> LayerVisibilityHint {\n        self.0.access_stats.visibility()\n    }\n\n    pub(crate) fn local_path(&self) -> &Utf8Path {\n        &self.0.path\n    }\n\n    pub(crate) fn metadata(&self) -> LayerFileMetadata {\n        self.0.metadata()\n    }\n\n    pub(crate) fn last_evicted_at(&self) -> LastEviction {\n        match self.0.last_evicted_at.try_lock() {\n            Ok(lock) => match *lock {\n                None => LastEviction::Never,\n                Some(at) => LastEviction::At(at),\n            },\n            Err(std::sync::TryLockError::WouldBlock) => LastEviction::Evicting,\n            Err(std::sync::TryLockError::Poisoned(p)) => panic!(\"Lock poisoned: {p}\"),\n        }\n    }\n\n    pub(crate) fn get_timeline_id(&self) -> Option<TimelineId> {\n        self.0\n            .timeline\n            .upgrade()\n            .map(|timeline| timeline.timeline_id)\n    }\n\n    /// Traditional debug dumping facility\n    #[allow(unused)]\n    pub(crate) async fn dump(&self, verbose: bool, ctx: &RequestContext) -> anyhow::Result<()> {\n        self.0.desc.dump();\n\n        if verbose {\n            // for now, unconditionally download everything, even if that might not be wanted.\n            let l = self.0.get_or_maybe_download(true, ctx).await?;\n            l.dump(&self.0, ctx).await?\n        }\n\n        Ok(())\n    }\n\n    /// Waits until this layer has been dropped (and if needed, local file deletion and remote\n    /// deletion scheduling has completed).\n    ///\n    /// Does not start local deletion, use [`Self::delete_on_drop`] for that\n    /// separatedly.\n    #[cfg(any(feature = \"testing\", test))]\n    pub(crate) fn wait_drop(&self) -> impl std::future::Future<Output = ()> + 'static {\n        let mut rx = self.0.status.as_ref().unwrap().subscribe();\n\n        async move {\n            loop {\n                if rx.changed().await.is_err() {\n                    break;\n                }\n            }\n        }\n    }\n\n    fn record_access(&self, ctx: &RequestContext) {\n        if self.0.access_stats.record_access(ctx) {\n            // Visibility was modified to Visible: maybe log about this\n            match ctx.task_kind() {\n                TaskKind::CalculateSyntheticSize\n                | TaskKind::OndemandLogicalSizeCalculation\n                | TaskKind::GarbageCollector\n                | TaskKind::MgmtRequest => {\n                    // This situation is expected in code paths do binary searches of the LSN space to resolve\n                    // an LSN to a timestamp, which happens during GC, during GC cutoff calculations in synthetic size,\n                    // and on-demand for certain HTTP API requests. On-demand logical size calculation is also included\n                    // because it is run as a sub-task of synthetic size.\n                }\n                _ => {\n                    // In all other contexts, it is unusual to do I/O involving layers which are not visible at\n                    // some branch tip, so we log the fact that we are accessing something that the visibility\n                    // calculation thought should not be visible.\n                    //\n                    // This case is legal in brief time windows: for example an in-flight getpage request can hold on to a layer object\n                    // which was covered by a concurrent compaction.\n                    tracing::info!(\n                        layer=%self,\n                        \"became visible as a result of access\",\n                    );\n                }\n            }\n\n            // Update the timeline's visible bytes count\n            if let Some(tl) = self.0.timeline.upgrade() {\n                tl.metrics\n                    .visible_physical_size_gauge\n                    .add(self.0.desc.file_size)\n            }\n        }\n    }\n\n    pub(crate) fn set_visibility(&self, visibility: LayerVisibilityHint) {\n        let old_visibility = self.0.access_stats.set_visibility(visibility.clone());\n        use LayerVisibilityHint::*;\n        match (old_visibility, visibility) {\n            (Visible, Covered) => {\n                // Subtract this layer's contribution to the visible size metric\n                if let Some(tl) = self.0.timeline.upgrade() {\n                    debug_assert!(\n                        tl.metrics.visible_physical_size_gauge.get() >= self.0.desc.file_size\n                    );\n                    tl.metrics\n                        .visible_physical_size_gauge\n                        .sub(self.0.desc.file_size)\n                }\n            }\n            (Covered, Visible) => {\n                // Add this layer's contribution to the visible size metric\n                if let Some(tl) = self.0.timeline.upgrade() {\n                    tl.metrics\n                        .visible_physical_size_gauge\n                        .add(self.0.desc.file_size)\n                }\n            }\n            (Covered, Covered) | (Visible, Visible) => {\n                // no change\n            }\n        }\n    }\n}\n\n/// The download-ness ([`DownloadedLayer`]) can be either resident or wanted evicted.\n///\n/// However when we want something evicted, we cannot evict it right away as there might be current\n/// reads happening on it. For example: it has been searched from [`LayerMap::search`] but not yet\n/// read with [`Layer::get_values_reconstruct_data`].\n///\n/// [`LayerMap::search`]: crate::tenant::layer_map::LayerMap::search\n#[derive(Debug)]\nenum ResidentOrWantedEvicted {\n    Resident(Arc<DownloadedLayer>),\n    WantedEvicted(Weak<DownloadedLayer>, usize),\n}\n\nimpl ResidentOrWantedEvicted {\n    /// Non-mutating access to the a DownloadedLayer, if possible.\n    ///\n    /// This is not used on the read path (anything that calls\n    /// [`LayerInner::get_or_maybe_download`]) because it was decided that reads always win\n    /// evictions, and part of that winning is using [`ResidentOrWantedEvicted::get_and_upgrade`].\n    fn get(&self) -> Option<Arc<DownloadedLayer>> {\n        match self {\n            ResidentOrWantedEvicted::Resident(strong) => Some(strong.clone()),\n            ResidentOrWantedEvicted::WantedEvicted(weak, _) => weak.upgrade(),\n        }\n    }\n\n    /// Best-effort query for residency right now, not as strong guarantee as receiving a strong\n    /// reference from `ResidentOrWantedEvicted::get`.\n    fn is_likely_resident(&self) -> bool {\n        match self {\n            ResidentOrWantedEvicted::Resident(_) => true,\n            ResidentOrWantedEvicted::WantedEvicted(weak, _) => weak.strong_count() > 0,\n        }\n    }\n\n    /// Upgrades any weak to strong if possible.\n    ///\n    /// Returns a strong reference if possible, along with a boolean telling if an upgrade\n    /// happened.\n    fn get_and_upgrade(&mut self) -> Option<(Arc<DownloadedLayer>, bool)> {\n        match self {\n            ResidentOrWantedEvicted::Resident(strong) => Some((strong.clone(), false)),\n            ResidentOrWantedEvicted::WantedEvicted(weak, _) => match weak.upgrade() {\n                Some(strong) => {\n                    LAYER_IMPL_METRICS.inc_raced_wanted_evicted_accesses();\n\n                    *self = ResidentOrWantedEvicted::Resident(strong.clone());\n\n                    Some((strong, true))\n                }\n                None => None,\n            },\n        }\n    }\n\n    /// When eviction is first requested, drop down to holding a [`Weak`].\n    ///\n    /// Returns `Some` if this was the first time eviction was requested. Care should be taken to\n    /// drop the possibly last strong reference outside of the mutex of\n    /// [`heavier_once_cell::OnceCell`].\n    fn downgrade(&mut self) -> Option<Arc<DownloadedLayer>> {\n        match self {\n            ResidentOrWantedEvicted::Resident(strong) => {\n                let weak = Arc::downgrade(strong);\n                let mut temp = ResidentOrWantedEvicted::WantedEvicted(weak, strong.version);\n                std::mem::swap(self, &mut temp);\n                match temp {\n                    ResidentOrWantedEvicted::Resident(strong) => Some(strong),\n                    ResidentOrWantedEvicted::WantedEvicted(..) => unreachable!(\"just swapped\"),\n                }\n            }\n            ResidentOrWantedEvicted::WantedEvicted(..) => None,\n        }\n    }\n}\n\nstruct LayerInner {\n    /// Only needed to check ondemand_download_behavior_treat_error_as_warn and creation of\n    /// [`Self::path`].\n    conf: &'static PageServerConf,\n\n    /// Full path to the file; unclear if this should exist anymore.\n    path: Utf8PathBuf,\n\n    desc: PersistentLayerDesc,\n\n    /// Timeline access is needed for remote timeline client and metrics.\n    ///\n    /// There should not be an access to timeline for any reason without entering the\n    /// [`Timeline::gate`] at the same time.\n    timeline: Weak<Timeline>,\n\n    access_stats: LayerAccessStats,\n\n    /// This custom OnceCell is backed by std mutex, but only held for short time periods.\n    ///\n    /// Filesystem changes (download, evict) are only done while holding a permit which the\n    /// `heavier_once_cell` provides.\n    ///\n    /// A number of fields in `Layer` are meant to only be updated when holding the InitPermit, but\n    /// possibly read while not holding it.\n    inner: heavier_once_cell::OnceCell<ResidentOrWantedEvicted>,\n\n    /// Do we want to delete locally and remotely this when `LayerInner` is dropped\n    wanted_deleted: AtomicBool,\n\n    /// Version is to make sure we will only evict a specific initialization of the downloaded file.\n    ///\n    /// Incremented for each initialization, stored in `DownloadedLayer::version` or\n    /// `ResidentOrWantedEvicted::WantedEvicted`.\n    version: AtomicUsize,\n\n    /// Allow subscribing to when the layer actually gets evicted, a non-cancellable download\n    /// starts, or completes.\n    ///\n    /// Updates must only be posted while holding the InitPermit or the heavier_once_cell::Guard.\n    /// Holding the InitPermit is the only time we can do state transitions, but we also need to\n    /// cancel a pending eviction on upgrading a [`ResidentOrWantedEvicted::WantedEvicted`] back to\n    /// [`ResidentOrWantedEvicted::Resident`] on access.\n    ///\n    /// The sender is wrapped in an Option to facilitate moving it out on [`LayerInner::drop`].\n    status: Option<tokio::sync::watch::Sender<Status>>,\n\n    /// Counter for exponential backoff with the download.\n    ///\n    /// This is atomic only for the purposes of having additional data only accessed while holding\n    /// the InitPermit.\n    consecutive_failures: AtomicUsize,\n\n    /// The generation of this Layer.\n    ///\n    /// For loaded layers (resident or evicted) this comes from [`LayerFileMetadata::generation`],\n    /// for created layers from [`Timeline::generation`].\n    generation: Generation,\n\n    /// The shard of this Layer.\n    ///\n    /// For layers created in this process, this will always be the [`ShardIndex`] of the\n    /// current `ShardIdentity`` (TODO: add link once it's introduced).\n    ///\n    /// For loaded layers, this may be some other value if the tenant has undergone\n    /// a shard split since the layer was originally written.\n    shard: ShardIndex,\n\n    /// When the Layer was last evicted but has not been downloaded since.\n    ///\n    /// This is used for skipping evicted layers from the previous heatmap (see\n    /// `[Timeline::generate_heatmap]`) and for updating metrics\n    /// (see [`LayerImplMetrics::redownload_after`]).\n    last_evicted_at: std::sync::Mutex<Option<std::time::Instant>>,\n\n    #[cfg(test)]\n    failpoints: std::sync::Mutex<Vec<failpoints::Failpoint>>,\n}\n\nimpl std::fmt::Display for LayerInner {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        write!(f, \"{}\", self.layer_desc().short_id())\n    }\n}\n\nimpl AsLayerDesc for LayerInner {\n    fn layer_desc(&self) -> &PersistentLayerDesc {\n        &self.desc\n    }\n}\n\n#[derive(Debug, Clone, Copy)]\nenum Status {\n    Resident,\n    Evicted,\n    Downloading,\n}\n\nimpl Drop for LayerInner {\n    fn drop(&mut self) {\n        // if there was a pending eviction, mark it cancelled here to balance metrics\n        if let Some((ResidentOrWantedEvicted::WantedEvicted(..), _)) = self.inner.take_and_deinit()\n        {\n            // eviction has already been started\n            LAYER_IMPL_METRICS.inc_eviction_cancelled(EvictionCancelled::LayerGone);\n\n            // eviction request is intentionally not honored as no one is present to wait for it\n            // and we could be delaying shutdown for nothing.\n        }\n\n        let timeline = self.timeline.upgrade();\n\n        if let Some(timeline) = timeline.as_ref() {\n            // Only need to decrement metrics if the timeline still exists: otherwise\n            // it will have already de-registered these metrics via TimelineMetrics::shutdown\n            timeline.metrics.dec_layer(&self.desc);\n\n            if matches!(self.access_stats.visibility(), LayerVisibilityHint::Visible) {\n                debug_assert!(\n                    timeline.metrics.visible_physical_size_gauge.get() >= self.desc.file_size\n                );\n                timeline\n                    .metrics\n                    .visible_physical_size_gauge\n                    .sub(self.desc.file_size);\n            }\n        }\n\n        if !*self.wanted_deleted.get_mut() {\n            return;\n        }\n\n        let span = tracing::info_span!(parent: None, \"layer_delete\", tenant_id = %self.layer_desc().tenant_shard_id.tenant_id, shard_id=%self.layer_desc().tenant_shard_id.shard_slug(), timeline_id = %self.layer_desc().timeline_id);\n\n        let path = std::mem::take(&mut self.path);\n        let file_name = self.layer_desc().layer_name();\n        let file_size = self.layer_desc().file_size;\n        let meta = self.metadata();\n        let status = self.status.take();\n\n        Self::spawn_blocking(move || {\n            let _g = span.entered();\n\n            // carry this until we are finished for [`Layer::wait_drop`] support\n            let _status = status;\n\n            let Some(timeline) = timeline else {\n                // no need to nag that timeline is gone: under normal situation on\n                // task_mgr::remove_tenant_from_memory the timeline is gone before we get dropped.\n                LAYER_IMPL_METRICS.inc_deletes_failed(DeleteFailed::TimelineGone);\n                return;\n            };\n\n            let Ok(_guard) = timeline.gate.enter() else {\n                LAYER_IMPL_METRICS.inc_deletes_failed(DeleteFailed::TimelineGone);\n                return;\n            };\n\n            let removed = match std::fs::remove_file(path) {\n                Ok(()) => true,\n                Err(e) if e.kind() == std::io::ErrorKind::NotFound => {\n                    // until we no longer do detaches by removing all local files before removing the\n                    // tenant from the global map, we will always get these errors even if we knew what\n                    // is the latest state.\n                    //\n                    // we currently do not track the latest state, so we'll also end up here on evicted\n                    // layers.\n                    false\n                }\n                Err(e) => {\n                    tracing::error!(\"failed to remove wanted deleted layer: {e}\");\n                    LAYER_IMPL_METRICS.inc_delete_removes_failed();\n                    false\n                }\n            };\n\n            if removed {\n                timeline.metrics.resident_physical_size_sub(file_size);\n            }\n            let res = timeline\n                .remote_client\n                .schedule_deletion_of_unlinked(vec![(file_name, meta)]);\n\n            if let Err(e) = res {\n                // test_timeline_deletion_with_files_stuck_in_upload_queue is good at\n                // demonstrating this deadlock (without spawn_blocking): stop will drop\n                // queued items, which will have ResidentLayer's, and those drops would try\n                // to re-entrantly lock the RemoteTimelineClient inner state.\n                if !timeline.is_active() {\n                    tracing::info!(\"scheduling deletion on drop failed: {e:#}\");\n                } else {\n                    tracing::warn!(\"scheduling deletion on drop failed: {e:#}\");\n                }\n                LAYER_IMPL_METRICS.inc_deletes_failed(DeleteFailed::DeleteSchedulingFailed);\n            } else {\n                LAYER_IMPL_METRICS.inc_completed_deletes();\n            }\n        });\n    }\n}\n\nimpl LayerInner {\n    #[allow(clippy::too_many_arguments)]\n    fn new(\n        conf: &'static PageServerConf,\n        timeline: &Arc<Timeline>,\n        local_path: Utf8PathBuf,\n        desc: PersistentLayerDesc,\n        downloaded: Option<Arc<DownloadedLayer>>,\n        generation: Generation,\n        shard: ShardIndex,\n    ) -> Self {\n        let (inner, version, init_status) = if let Some(inner) = downloaded {\n            let version = inner.version;\n            let resident = ResidentOrWantedEvicted::Resident(inner);\n            (\n                heavier_once_cell::OnceCell::new(resident),\n                version,\n                Status::Resident,\n            )\n        } else {\n            (heavier_once_cell::OnceCell::default(), 0, Status::Evicted)\n        };\n\n        // This object acts as a RAII guard on these metrics: increment on construction\n        timeline.metrics.inc_layer(&desc);\n\n        // New layers are visible by default. This metric is later updated on drop or in set_visibility\n        timeline\n            .metrics\n            .visible_physical_size_gauge\n            .add(desc.file_size);\n\n        LayerInner {\n            conf,\n            path: local_path,\n            desc,\n            timeline: Arc::downgrade(timeline),\n            access_stats: Default::default(),\n            wanted_deleted: AtomicBool::new(false),\n            inner,\n            version: AtomicUsize::new(version),\n            status: Some(tokio::sync::watch::channel(init_status).0),\n            consecutive_failures: AtomicUsize::new(0),\n            generation,\n            shard,\n            last_evicted_at: std::sync::Mutex::default(),\n            #[cfg(test)]\n            failpoints: Default::default(),\n        }\n    }\n\n    fn delete_on_drop(&self) {\n        let res =\n            self.wanted_deleted\n                .compare_exchange(false, true, Ordering::Release, Ordering::Relaxed);\n\n        if res.is_ok() {\n            LAYER_IMPL_METRICS.inc_started_deletes();\n        }\n    }\n\n    /// Cancellation safe, however dropping the future and calling this method again might result\n    /// in a new attempt to evict OR join the previously started attempt.\n    #[tracing::instrument(level = tracing::Level::DEBUG, skip_all, ret, err(level = tracing::Level::DEBUG), fields(layer=%self))]\n    pub(crate) async fn evict_and_wait(&self, timeout: Duration) -> Result<(), EvictionError> {\n        let mut rx = self.status.as_ref().unwrap().subscribe();\n\n        {\n            let current = rx.borrow_and_update();\n            match &*current {\n                Status::Resident => {\n                    // we might get lucky and evict this; continue\n                }\n                Status::Evicted | Status::Downloading => {\n                    // it is already evicted\n                    return Err(EvictionError::NotFound);\n                }\n            }\n        }\n\n        let strong = {\n            match self.inner.get() {\n                Some(mut either) => either.downgrade(),\n                None => {\n                    // we already have a scheduled eviction, which just has not gotten to run yet.\n                    // it might still race with a read access, but that could also get cancelled,\n                    // so let's say this is not evictable.\n                    return Err(EvictionError::NotFound);\n                }\n            }\n        };\n\n        if strong.is_some() {\n            // drop the DownloadedLayer outside of the holding the guard\n            drop(strong);\n\n            // idea here is that only one evicter should ever get to witness a strong reference,\n            // which means whenever get_or_maybe_download upgrades a weak, it must mark up a\n            // cancelled eviction and signal us, like it currently does.\n            //\n            // a second concurrent evict_and_wait will not see a strong reference.\n            LAYER_IMPL_METRICS.inc_started_evictions();\n        }\n\n        let changed = rx.changed();\n        let changed = tokio::time::timeout(timeout, changed).await;\n\n        let Ok(changed) = changed else {\n            return Err(EvictionError::Timeout);\n        };\n\n        let _: () = changed.expect(\"cannot be closed, because we are holding a strong reference\");\n\n        let current = rx.borrow_and_update();\n\n        match &*current {\n            // the easiest case\n            Status::Evicted => Ok(()),\n            // it surely was evicted in between, but then there was a new access now; we can't know\n            // if it'll succeed so lets just call it evicted\n            Status::Downloading => Ok(()),\n            // either the download which was started after eviction completed already, or it was\n            // never evicted\n            Status::Resident => Err(EvictionError::Downloaded),\n        }\n    }\n\n    /// Cancellation safe.\n    async fn get_or_maybe_download(\n        self: &Arc<Self>,\n        allow_download: bool,\n        ctx: &RequestContext,\n    ) -> Result<Arc<DownloadedLayer>, DownloadError> {\n        let mut wait_for_download_recorder =\n            scopeguard::guard(utils::elapsed_accum::ElapsedAccum::default(), |accum| {\n                ctx.ondemand_download_wait_observe(accum.get());\n            });\n        let (weak, permit) = {\n            // get_or_init_detached can:\n            // - be fast (mutex lock) OR uncontested semaphore permit acquire\n            // - be slow (wait for semaphore permit or closing)\n            let init_cancelled = scopeguard::guard((), |_| LAYER_IMPL_METRICS.inc_init_cancelled());\n\n            let locked = self\n                .inner\n                .get_or_init_detached_measured(Some(&mut wait_for_download_recorder))\n                .await\n                .map(|mut guard| guard.get_and_upgrade().ok_or(guard));\n\n            scopeguard::ScopeGuard::into_inner(init_cancelled);\n\n            match locked {\n                // this path could had been a RwLock::read\n                Ok(Ok((strong, upgraded))) if !upgraded => return Ok(strong),\n                Ok(Ok((strong, _))) => {\n                    // when upgraded back, the Arc<DownloadedLayer> is still available, but\n                    // previously a `evict_and_wait` was received. this is the only place when we\n                    // send out an update without holding the InitPermit.\n                    //\n                    // note that we also have dropped the Guard; this is fine, because we just made\n                    // a state change and are holding a strong reference to be returned.\n                    self.status.as_ref().unwrap().send_replace(Status::Resident);\n                    LAYER_IMPL_METRICS\n                        .inc_eviction_cancelled(EvictionCancelled::UpgradedBackOnAccess);\n\n                    return Ok(strong);\n                }\n                Ok(Err(guard)) => {\n                    // path to here: we won the eviction, the file should still be on the disk.\n                    let (weak, permit) = guard.take_and_deinit();\n                    (Some(weak), permit)\n                }\n                Err(permit) => (None, permit),\n            }\n        };\n        let _guard = wait_for_download_recorder.guard();\n\n        if let Some(weak) = weak {\n            // only drop the weak after dropping the heavier_once_cell guard\n            assert!(\n                matches!(weak, ResidentOrWantedEvicted::WantedEvicted(..)),\n                \"unexpected {weak:?}, ResidentOrWantedEvicted::get_and_upgrade has a bug\"\n            );\n        }\n\n        let timeline = self\n            .timeline\n            .upgrade()\n            .ok_or(DownloadError::TimelineShutdown)?;\n\n        // count cancellations, which currently remain largely unexpected\n        let init_cancelled = scopeguard::guard((), |_| LAYER_IMPL_METRICS.inc_init_cancelled());\n\n        // check if we really need to be downloaded: this can happen if a read access won the\n        // semaphore before eviction.\n        //\n        // if we are cancelled while doing this `stat` the `self.inner` will be uninitialized. a\n        // pending eviction will try to evict even upon finding an uninitialized `self.inner`.\n        let needs_download = self\n            .needs_download()\n            .await\n            .map_err(DownloadError::PreStatFailed);\n\n        scopeguard::ScopeGuard::into_inner(init_cancelled);\n\n        let needs_download = needs_download?;\n\n        let Some(reason) = needs_download else {\n            // the file is present locally because eviction has not had a chance to run yet\n\n            #[cfg(test)]\n            self.failpoint(failpoints::FailpointKind::AfterDeterminingLayerNeedsNoDownload)\n                .await?;\n\n            LAYER_IMPL_METRICS.inc_init_needed_no_download();\n\n            return Ok(self.initialize_after_layer_is_on_disk(permit));\n        };\n\n        // we must download; getting cancelled before spawning the download is not an issue as\n        // any still running eviction would not find anything to evict.\n\n        if let NeedsDownload::NotFile(ft) = reason {\n            return Err(DownloadError::NotFile(ft));\n        }\n\n        self.check_expected_download(ctx)?;\n\n        if !allow_download {\n            // this is only used from tests, but it is hard to test without the boolean\n            return Err(DownloadError::DownloadRequired);\n        }\n\n        let ctx = RequestContextBuilder::from(ctx)\n            .perf_span(|crnt_perf_span| {\n                info_span!(\n                    target: PERF_TRACE_TARGET,\n                    parent: crnt_perf_span,\n                    \"DOWNLOAD_LAYER\",\n                    layer = %self,\n                    reason = %reason,\n                )\n            })\n            .attached_child();\n\n        async move {\n            tracing::info!(%reason, \"downloading on-demand\");\n\n            let init_cancelled = scopeguard::guard((), |_| LAYER_IMPL_METRICS.inc_init_cancelled());\n            let res = self\n                .download_init_and_wait(timeline, permit, ctx.attached_child())\n                .maybe_perf_instrument(&ctx, |current_perf_span| current_perf_span.clone())\n                .await?;\n\n            scopeguard::ScopeGuard::into_inner(init_cancelled);\n            Ok(res)\n        }\n        .instrument(tracing::info_span!(\"get_or_maybe_download\", layer=%self))\n        .await\n    }\n\n    /// Nag or fail per RequestContext policy\n    fn check_expected_download(&self, ctx: &RequestContext) -> Result<(), DownloadError> {\n        use crate::context::DownloadBehavior::*;\n        let b = ctx.download_behavior();\n        match b {\n            Download => Ok(()),\n            Warn | Error => {\n                tracing::info!(\n                    \"unexpectedly on-demand downloading for task kind {:?}\",\n                    ctx.task_kind()\n                );\n                crate::metrics::UNEXPECTED_ONDEMAND_DOWNLOADS.inc();\n\n                let really_error =\n                    matches!(b, Error) && !self.conf.ondemand_download_behavior_treat_error_as_warn;\n\n                if really_error {\n                    // this check is only probablistic, seems like flakyness footgun\n                    Err(DownloadError::ContextAndConfigReallyDeniesDownloads)\n                } else {\n                    Ok(())\n                }\n            }\n        }\n    }\n\n    /// Actual download, at most one is executed at the time.\n    async fn download_init_and_wait(\n        self: &Arc<Self>,\n        timeline: Arc<Timeline>,\n        permit: heavier_once_cell::InitPermit,\n        ctx: RequestContext,\n    ) -> Result<Arc<DownloadedLayer>, DownloadError> {\n        debug_assert_current_span_has_tenant_and_timeline_id();\n\n        let (tx, rx) = tokio::sync::oneshot::channel();\n\n        let this: Arc<Self> = self.clone();\n\n        let guard = timeline\n            .gate\n            .enter()\n            .map_err(|_| DownloadError::DownloadCancelled)?;\n\n        Self::spawn(\n            async move {\n                let _guard = guard;\n\n                // now that we have commited to downloading, send out an update to:\n                // - unhang any pending eviction\n                // - break out of evict_and_wait\n                this.status\n                    .as_ref()\n                    .unwrap()\n                    .send_replace(Status::Downloading);\n\n                #[cfg(test)]\n                this.failpoint(failpoints::FailpointKind::WaitBeforeDownloading)\n                    .await\n                    .unwrap();\n\n                let res = this.download_and_init(timeline, permit, &ctx).await;\n\n                if let Err(res) = tx.send(res) {\n                    match res {\n                        Ok(_res) => {\n                            tracing::debug!(\"layer initialized, but caller has been cancelled\");\n                            LAYER_IMPL_METRICS.inc_init_completed_without_requester();\n                        }\n                        Err(e) => {\n                            tracing::info!(\n                                \"layer file download failed, and caller has been cancelled: {e:?}\"\n                            );\n                            LAYER_IMPL_METRICS.inc_download_failed_without_requester();\n                        }\n                    }\n                }\n            }\n            .in_current_span(),\n        );\n\n        match rx.await {\n            Ok(Ok(res)) => Ok(res),\n            Ok(Err(remote_storage::DownloadError::Cancelled)) => {\n                Err(DownloadError::DownloadCancelled)\n            }\n            Ok(Err(_)) => Err(DownloadError::DownloadFailed),\n            Err(_gone) => Err(DownloadError::DownloadCancelled),\n        }\n    }\n\n    async fn download_and_init(\n        self: &Arc<LayerInner>,\n        timeline: Arc<Timeline>,\n        permit: heavier_once_cell::InitPermit,\n        ctx: &RequestContext,\n    ) -> Result<Arc<DownloadedLayer>, remote_storage::DownloadError> {\n        let start = std::time::Instant::now();\n        let result = timeline\n            .remote_client\n            .download_layer_file(\n                &self.desc.layer_name(),\n                &self.metadata(),\n                &self.path,\n                &timeline.gate,\n                &timeline.cancel,\n                ctx,\n            )\n            .await;\n        let latency = start.elapsed();\n        let latency_millis = u64::try_from(latency.as_millis()).unwrap();\n        match result {\n            Ok(size) => {\n                assert_eq!(size, self.desc.file_size);\n\n                match self.needs_download().await {\n                    Ok(Some(reason)) => {\n                        // this is really a bug in needs_download or remote timeline client\n                        panic!(\"post-condition failed: needs_download returned {reason:?}\");\n                    }\n                    Ok(None) => {\n                        // as expected\n                    }\n                    Err(e) => {\n                        panic!(\"post-condition failed: needs_download errored: {e:?}\");\n                    }\n                };\n                tracing::info!(size=%self.desc.file_size, %latency_millis, \"on-demand download successful\");\n                timeline\n                    .metrics\n                    .resident_physical_size_add(self.desc.file_size);\n                self.consecutive_failures.store(0, Ordering::Relaxed);\n\n                let since_last_eviction = self\n                    .last_evicted_at\n                    .lock()\n                    .unwrap()\n                    .take()\n                    .map(|ts| ts.elapsed());\n                if let Some(since_last_eviction) = since_last_eviction {\n                    LAYER_IMPL_METRICS.record_redownloaded_after(since_last_eviction);\n                }\n\n                self.access_stats.record_residence_event();\n\n                let task_kind: &'static str = ctx.task_kind().into();\n                ONDEMAND_DOWNLOAD_BYTES\n                    .with_label_values(&[task_kind])\n                    .inc_by(self.desc.file_size);\n                ONDEMAND_DOWNLOAD_COUNT\n                    .with_label_values(&[task_kind])\n                    .inc();\n\n                Ok(self.initialize_after_layer_is_on_disk(permit))\n            }\n            Err(e) => {\n                let consecutive_failures =\n                    1 + self.consecutive_failures.fetch_add(1, Ordering::Relaxed);\n\n                if timeline.cancel.is_cancelled() {\n                    // If we're shutting down, drop out before logging the error\n                    return Err(e);\n                }\n\n                tracing::error!(consecutive_failures, %latency_millis, \"layer file download failed: {e:#}\");\n\n                let backoff = utils::backoff::exponential_backoff_duration_seconds(\n                    consecutive_failures.min(u32::MAX as usize) as u32,\n                    1.5,\n                    60.0,\n                );\n\n                let backoff = std::time::Duration::from_secs_f64(backoff);\n\n                tokio::select! {\n                    _ = tokio::time::sleep(backoff) => {},\n                    _ = timeline.cancel.cancelled() => {},\n                };\n\n                Err(e)\n            }\n        }\n    }\n\n    /// Initializes the `Self::inner` to a \"resident\" state.\n    ///\n    /// Callers are assumed to ensure that the file is actually on disk with `Self::needs_download`\n    /// before calling this method.\n    ///\n    /// If this method is ever made async, it needs to be cancellation safe so that no state\n    /// changes are made before we can write to the OnceCell in non-cancellable fashion.\n    fn initialize_after_layer_is_on_disk(\n        self: &Arc<LayerInner>,\n        permit: heavier_once_cell::InitPermit,\n    ) -> Arc<DownloadedLayer> {\n        debug_assert_current_span_has_tenant_and_timeline_id();\n\n        // disable any scheduled but not yet running eviction deletions for this initialization\n        let next_version = 1 + self.version.fetch_add(1, Ordering::Relaxed);\n        self.status.as_ref().unwrap().send_replace(Status::Resident);\n\n        let res = Arc::new(DownloadedLayer {\n            owner: Arc::downgrade(self),\n            kind: tokio::sync::OnceCell::default(),\n            version: next_version,\n        });\n\n        let waiters = self.inner.initializer_count();\n        if waiters > 0 {\n            tracing::info!(waiters, \"completing layer init for other tasks\");\n        }\n\n        let value = ResidentOrWantedEvicted::Resident(res.clone());\n\n        self.inner.set(value, permit);\n\n        res\n    }\n\n    async fn needs_download(&self) -> Result<Option<NeedsDownload>, std::io::Error> {\n        match tokio::fs::metadata(&self.path).await {\n            Ok(m) => Ok(self.is_file_present_and_good_size(&m).err()),\n            Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(Some(NeedsDownload::NotFound)),\n            Err(e) => Err(e),\n        }\n    }\n\n    fn needs_download_blocking(&self) -> Result<Option<NeedsDownload>, std::io::Error> {\n        match self.path.metadata() {\n            Ok(m) => Ok(self.is_file_present_and_good_size(&m).err()),\n            Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(Some(NeedsDownload::NotFound)),\n            Err(e) => Err(e),\n        }\n    }\n\n    fn is_file_present_and_good_size(&self, m: &std::fs::Metadata) -> Result<(), NeedsDownload> {\n        // in future, this should include sha2-256 validation of the file.\n        if !m.is_file() {\n            Err(NeedsDownload::NotFile(m.file_type()))\n        } else if m.len() != self.desc.file_size {\n            Err(NeedsDownload::WrongSize {\n                actual: m.len(),\n                expected: self.desc.file_size,\n            })\n        } else {\n            Ok(())\n        }\n    }\n\n    fn info(&self, reset: LayerAccessStatsReset) -> HistoricLayerInfo {\n        let layer_name = self.desc.layer_name().to_string();\n\n        let resident = self\n            .inner\n            .get()\n            .map(|rowe| rowe.is_likely_resident())\n            .unwrap_or(false);\n\n        let access_stats = self.access_stats.as_api_model(reset);\n\n        if self.desc.is_delta {\n            let lsn_range = &self.desc.lsn_range;\n\n            HistoricLayerInfo::Delta {\n                layer_file_name: layer_name,\n                layer_file_size: self.desc.file_size,\n                lsn_start: lsn_range.start,\n                lsn_end: lsn_range.end,\n                remote: !resident,\n                access_stats,\n                l0: crate::tenant::layer_map::LayerMap::is_l0(\n                    &self.layer_desc().key_range,\n                    self.layer_desc().is_delta,\n                ),\n            }\n        } else {\n            let lsn = self.desc.image_layer_lsn();\n\n            HistoricLayerInfo::Image {\n                layer_file_name: layer_name,\n                layer_file_size: self.desc.file_size,\n                lsn_start: lsn,\n                remote: !resident,\n                access_stats,\n            }\n        }\n    }\n\n    /// `DownloadedLayer` is being dropped, so it calls this method.\n    fn on_downloaded_layer_drop(self: Arc<LayerInner>, only_version: usize) {\n        // we cannot know without inspecting LayerInner::inner if we should evict or not, even\n        // though here it is very likely\n        let span = tracing::info_span!(parent: None, \"layer_evict\", tenant_id = %self.desc.tenant_shard_id.tenant_id, shard_id = %self.desc.tenant_shard_id.shard_slug(), timeline_id = %self.desc.timeline_id, layer=%self, version=%only_version);\n\n        // NOTE: this scope *must* never call `self.inner.get` because evict_and_wait might\n        // drop while the `self.inner` is being locked, leading to a deadlock.\n\n        let start_evicting = async move {\n            #[cfg(test)]\n            self.failpoint(failpoints::FailpointKind::WaitBeforeStartingEvicting)\n                .await\n                .expect(\"failpoint should not have errored\");\n\n            tracing::debug!(\"eviction started\");\n\n            let res = self.wait_for_turn_and_evict(only_version).await;\n            // metrics: ignore the Ok branch, it is not done yet\n            if let Err(e) = res {\n                tracing::debug!(res=?Err::<(), _>(&e), \"eviction completed\");\n                LAYER_IMPL_METRICS.inc_eviction_cancelled(e);\n            }\n        };\n\n        Self::spawn(start_evicting.instrument(span));\n    }\n\n    async fn wait_for_turn_and_evict(\n        self: Arc<LayerInner>,\n        only_version: usize,\n    ) -> Result<(), EvictionCancelled> {\n        fn is_good_to_continue(status: &Status) -> Result<(), EvictionCancelled> {\n            use Status::*;\n            match status {\n                Resident => Ok(()),\n                Evicted => Err(EvictionCancelled::UnexpectedEvictedState),\n                Downloading => Err(EvictionCancelled::LostToDownload),\n            }\n        }\n\n        let timeline = self\n            .timeline\n            .upgrade()\n            .ok_or(EvictionCancelled::TimelineGone)?;\n\n        let mut rx = self\n            .status\n            .as_ref()\n            .expect(\"LayerInner cannot be dropped, holding strong ref\")\n            .subscribe();\n\n        is_good_to_continue(&rx.borrow_and_update())?;\n\n        let Ok(gate) = timeline.gate.enter() else {\n            return Err(EvictionCancelled::TimelineGone);\n        };\n\n        let permit = {\n            // we cannot just `std::fs::remove_file` because there might already be an\n            // get_or_maybe_download which will inspect filesystem and reinitialize. filesystem\n            // operations must be done while holding the heavier_once_cell::InitPermit\n            let mut wait = std::pin::pin!(self.inner.get_or_init_detached());\n\n            let waited = loop {\n                // we must race to the Downloading starting, otherwise we would have to wait until the\n                // completion of the download. waiting for download could be long and hinder our\n                // efforts to alert on \"hanging\" evictions.\n                tokio::select! {\n                    res = &mut wait => break res,\n                    _ = rx.changed() => {\n                        is_good_to_continue(&rx.borrow_and_update())?;\n                        // two possibilities for Status::Resident:\n                        // - the layer was found locally from disk by a read\n                        // - we missed a bunch of updates and now the layer is\n                        // again downloaded -- assume we'll fail later on with\n                        // version check or AlreadyReinitialized\n                    }\n                }\n            };\n\n            // re-check now that we have the guard or permit; all updates should have happened\n            // while holding the permit.\n            is_good_to_continue(&rx.borrow_and_update())?;\n\n            // the term deinitialize is used here, because we clearing out the Weak will eventually\n            // lead to deallocating the reference counted value, and the value we\n            // `Guard::take_and_deinit` is likely to be the last because the Weak is never cloned.\n            let (_weak, permit) = match waited {\n                Ok(guard) => {\n                    match &*guard {\n                        ResidentOrWantedEvicted::WantedEvicted(_weak, version)\n                            if *version == only_version =>\n                        {\n                            tracing::debug!(version, \"deinitializing matching WantedEvicted\");\n                            let (weak, permit) = guard.take_and_deinit();\n                            (Some(weak), permit)\n                        }\n                        ResidentOrWantedEvicted::WantedEvicted(_, version) => {\n                            // if we were not doing the version check, we would need to try to\n                            // upgrade the weak here to see if it really is dropped. version check\n                            // is done instead assuming that it is cheaper.\n                            tracing::debug!(\n                                version,\n                                only_version,\n                                \"version mismatch, not deinitializing\"\n                            );\n                            return Err(EvictionCancelled::VersionCheckFailed);\n                        }\n                        ResidentOrWantedEvicted::Resident(_) => {\n                            return Err(EvictionCancelled::AlreadyReinitialized);\n                        }\n                    }\n                }\n                Err(permit) => {\n                    tracing::debug!(\"continuing after cancelled get_or_maybe_download or eviction\");\n                    (None, permit)\n                }\n            };\n\n            permit\n        };\n\n        let span = tracing::Span::current();\n\n        let spawned_at = std::time::Instant::now();\n\n        // this is on purpose a detached spawn; we don't need to wait for it\n        //\n        // eviction completion reporting is the only thing hinging on this, and it can be just as\n        // well from a spawn_blocking thread.\n        //\n        // important to note that now that we've acquired the permit we have made sure the evicted\n        // file is either the exact `WantedEvicted` we wanted to evict, or uninitialized in case\n        // there are multiple evictions. The rest is not cancellable, and we've now commited to\n        // evicting.\n        //\n        // If spawn_blocking has a queue and maximum number of threads are in use, we could stall\n        // reads. We will need to add cancellation for that if necessary.\n        Self::spawn_blocking(move || {\n            let _span = span.entered();\n\n            let res = self.evict_blocking(&timeline, &gate, &permit);\n\n            let waiters = self.inner.initializer_count();\n\n            if waiters > 0 {\n                LAYER_IMPL_METRICS.inc_evicted_with_waiters();\n            }\n\n            let completed_in = spawned_at.elapsed();\n            LAYER_IMPL_METRICS.record_time_to_evict(completed_in);\n\n            match res {\n                Ok(()) => LAYER_IMPL_METRICS.inc_completed_evictions(),\n                Err(e) => LAYER_IMPL_METRICS.inc_eviction_cancelled(e),\n            }\n\n            tracing::debug!(?res, elapsed_ms=%completed_in.as_millis(), %waiters, \"eviction completed\");\n        });\n\n        Ok(())\n    }\n\n    /// This is blocking only to do just one spawn_blocking hop compared to multiple via tokio::fs.\n    fn evict_blocking(\n        &self,\n        timeline: &Timeline,\n        _gate: &gate::GateGuard,\n        _permit: &heavier_once_cell::InitPermit,\n    ) -> Result<(), EvictionCancelled> {\n        // now accesses to `self.inner.get_or_init*` wait on the semaphore or the `_permit`\n\n        match capture_mtime_and_remove(&self.path) {\n            Ok(local_layer_mtime) => {\n                let duration = SystemTime::now().duration_since(local_layer_mtime);\n                match duration {\n                    Ok(elapsed) => {\n                        let accessed_and_visible = self.access_stats.accessed()\n                            && self.access_stats.visibility() == LayerVisibilityHint::Visible;\n                        if accessed_and_visible {\n                            // Only layers used for reads contribute to our \"low residence\" metric that is used\n                            // to detect thrashing.  Layers promoted for other reasons (e.g. compaction) are allowed\n                            // to be rapidly evicted without contributing to this metric.\n                            timeline\n                                .metrics\n                                .evictions_with_low_residence_duration\n                                .read()\n                                .unwrap()\n                                .observe(elapsed);\n                        }\n\n                        tracing::info!(\n                            residence_millis = elapsed.as_millis(),\n                            accessed_and_visible,\n                            \"evicted layer after known residence period\"\n                        );\n                    }\n                    Err(_) => {\n                        tracing::info!(\"evicted layer after unknown residence period\");\n                    }\n                }\n                timeline.metrics.evictions.inc();\n                timeline\n                    .metrics\n                    .resident_physical_size_sub(self.desc.file_size);\n            }\n            Err(e) if e.kind() == std::io::ErrorKind::NotFound => {\n                tracing::error!(\n                    layer_size = %self.desc.file_size,\n                    \"failed to evict layer from disk, it was already gone\"\n                );\n                return Err(EvictionCancelled::FileNotFound);\n            }\n            Err(e) => {\n                // FIXME: this should probably be an abort\n                tracing::error!(\"failed to evict file from disk: {e:#}\");\n                return Err(EvictionCancelled::RemoveFailed);\n            }\n        }\n\n        self.access_stats.record_residence_event();\n\n        *self.last_evicted_at.lock().unwrap() = Some(std::time::Instant::now());\n\n        self.status.as_ref().unwrap().send_replace(Status::Evicted);\n\n        Ok(())\n    }\n\n    fn metadata(&self) -> LayerFileMetadata {\n        LayerFileMetadata::new(self.desc.file_size, self.generation, self.shard)\n    }\n\n    /// Needed to use entered runtime in tests, but otherwise use BACKGROUND_RUNTIME.\n    ///\n    /// Synchronizing with spawned tasks is very complicated otherwise.\n    fn spawn<F>(fut: F)\n    where\n        F: std::future::Future<Output = ()> + Send + 'static,\n    {\n        #[cfg(test)]\n        tokio::task::spawn(fut);\n        #[cfg(not(test))]\n        crate::task_mgr::BACKGROUND_RUNTIME.spawn(fut);\n    }\n\n    /// Needed to use entered runtime in tests, but otherwise use BACKGROUND_RUNTIME.\n    fn spawn_blocking<F>(f: F)\n    where\n        F: FnOnce() + Send + 'static,\n    {\n        #[cfg(test)]\n        tokio::task::spawn_blocking(f);\n        #[cfg(not(test))]\n        crate::task_mgr::BACKGROUND_RUNTIME.spawn_blocking(f);\n    }\n}\n\nfn capture_mtime_and_remove(path: &Utf8Path) -> Result<SystemTime, std::io::Error> {\n    let m = path.metadata()?;\n    let local_layer_mtime = m.modified()?;\n    std::fs::remove_file(path)?;\n    Ok(local_layer_mtime)\n}\n\n#[derive(Debug, thiserror::Error)]\npub(crate) enum EvictionError {\n    #[error(\"layer was already evicted\")]\n    NotFound,\n\n    /// Evictions must always lose to downloads in races, and this time it happened.\n    #[error(\"layer was downloaded instead\")]\n    Downloaded,\n\n    #[error(\"eviction did not happen within timeout\")]\n    Timeout,\n}\n\n/// Error internal to the [`LayerInner::get_or_maybe_download`]\n#[derive(Debug, thiserror::Error)]\npub(crate) enum DownloadError {\n    #[error(\"timeline has already shutdown\")]\n    TimelineShutdown,\n    #[error(\"context denies downloading\")]\n    ContextAndConfigReallyDeniesDownloads,\n    #[error(\"downloading is really required but not allowed by this method\")]\n    DownloadRequired,\n    #[error(\"layer path exists, but it is not a file: {0:?}\")]\n    NotFile(std::fs::FileType),\n    /// Why no error here? Because it will be reported by page_service. We should had also done\n    /// retries already.\n    #[error(\"downloading evicted layer file failed\")]\n    DownloadFailed,\n    #[error(\"downloading failed, possibly for shutdown\")]\n    DownloadCancelled,\n    #[error(\"pre-condition: stat before download failed\")]\n    PreStatFailed(#[source] std::io::Error),\n\n    #[cfg(test)]\n    #[error(\"failpoint: {0:?}\")]\n    Failpoint(failpoints::FailpointKind),\n}\n\nimpl DownloadError {\n    pub(crate) fn is_cancelled(&self) -> bool {\n        matches!(self, DownloadError::DownloadCancelled)\n    }\n}\n\n#[derive(Debug, PartialEq, Copy, Clone)]\npub(crate) enum NeedsDownload {\n    NotFound,\n    NotFile(std::fs::FileType),\n    WrongSize { actual: u64, expected: u64 },\n}\n\nimpl std::fmt::Display for NeedsDownload {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        match self {\n            NeedsDownload::NotFound => write!(f, \"file was not found\"),\n            NeedsDownload::NotFile(ft) => write!(f, \"path is not a file; {ft:?}\"),\n            NeedsDownload::WrongSize { actual, expected } => {\n                write!(f, \"file size mismatch {actual} vs. {expected}\")\n            }\n        }\n    }\n}\n\n/// Existence of `DownloadedLayer` means that we have the file locally, and can later evict it.\npub(crate) struct DownloadedLayer {\n    owner: Weak<LayerInner>,\n    // Use tokio OnceCell as we do not need to deinitialize this, it'll just get dropped with the\n    // DownloadedLayer\n    kind: tokio::sync::OnceCell<anyhow::Result<LayerKind>>,\n    version: usize,\n}\n\nimpl std::fmt::Debug for DownloadedLayer {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        f.debug_struct(\"DownloadedLayer\")\n            // owner omitted because it is always \"Weak\"\n            .field(\"kind\", &self.kind)\n            .field(\"version\", &self.version)\n            .finish()\n    }\n}\n\nimpl Drop for DownloadedLayer {\n    fn drop(&mut self) {\n        if let Some(owner) = self.owner.upgrade() {\n            owner.on_downloaded_layer_drop(self.version);\n        } else {\n            // Layer::drop will handle cancelling the eviction; because of drop order and\n            // `DownloadedLayer` never leaking, we cannot know here if eviction was requested.\n        }\n    }\n}\n\nimpl DownloadedLayer {\n    /// Initializes the `DeltaLayerInner` or `ImageLayerInner` within [`LayerKind`].\n    /// Failure to load the layer is sticky, i.e., future `get()` calls will return\n    /// the initial load failure immediately.\n    ///\n    /// `owner` parameter is a strong reference at the same `LayerInner` as the\n    /// `DownloadedLayer::owner` would be when upgraded. Given how this method ends up called,\n    /// we will always have the LayerInner on the callstack, so we can just use it.\n    async fn get<'a>(\n        &'a self,\n        owner: &Arc<LayerInner>,\n        ctx: &RequestContext,\n    ) -> anyhow::Result<&'a LayerKind> {\n        let init = || async {\n            assert_eq!(\n                Weak::as_ptr(&self.owner),\n                Arc::as_ptr(owner),\n                \"these are the same, just avoiding the upgrade\"\n            );\n\n            let res = if owner.desc.is_delta {\n                let ctx = RequestContextBuilder::from(ctx)\n                    .page_content_kind(crate::context::PageContentKind::DeltaLayerSummary)\n                    .attached_child();\n                let summary = Some(delta_layer::Summary::expected(\n                    owner.desc.tenant_shard_id.tenant_id,\n                    owner.desc.timeline_id,\n                    owner.desc.key_range.clone(),\n                    owner.desc.lsn_range.clone(),\n                ));\n                delta_layer::DeltaLayerInner::load(\n                    &owner.path,\n                    summary,\n                    Some(owner.conf.max_vectored_read_bytes),\n                    &ctx,\n                )\n                .await\n                .map(LayerKind::Delta)\n            } else {\n                let ctx = RequestContextBuilder::from(ctx)\n                    .page_content_kind(crate::context::PageContentKind::ImageLayerSummary)\n                    .attached_child();\n                let lsn = owner.desc.image_layer_lsn();\n                let summary = Some(image_layer::Summary::expected(\n                    owner.desc.tenant_shard_id.tenant_id,\n                    owner.desc.timeline_id,\n                    owner.desc.key_range.clone(),\n                    lsn,\n                ));\n                image_layer::ImageLayerInner::load(\n                    &owner.path,\n                    lsn,\n                    summary,\n                    Some(owner.conf.max_vectored_read_bytes),\n                    &ctx,\n                )\n                .await\n                .map(LayerKind::Image)\n            };\n\n            match res {\n                Ok(layer) => Ok(layer),\n                Err(err) => {\n                    LAYER_IMPL_METRICS.inc_permanent_loading_failures();\n                    // We log this message once over the lifetime of `Self`\n                    // => Ok and good to log backtrace and path here.\n                    tracing::error!(\n                        \"layer load failed, assuming permanent failure: {}: {err:?}\",\n                        owner.path\n                    );\n                    Err(err)\n                }\n            }\n        };\n        self.kind\n            .get_or_init(init)\n            .await\n            .as_ref()\n            // We already logged the full backtrace above, once. Don't repeat that here.\n            .map_err(|e| anyhow::anyhow!(\"layer load failed earlier: {e}\"))\n    }\n\n    async fn get_values_reconstruct_data(\n        &self,\n        this: ResidentLayer,\n        keyspace: KeySpace,\n        lsn_range: Range<Lsn>,\n        reconstruct_data: &mut ValuesReconstructState,\n        ctx: &RequestContext,\n    ) -> Result<(), GetVectoredError> {\n        use LayerKind::*;\n\n        match self\n            .get(&this.owner.0, ctx)\n            .await\n            .map_err(GetVectoredError::Other)?\n        {\n            Delta(d) => {\n                d.get_values_reconstruct_data(this, keyspace, lsn_range, reconstruct_data, ctx)\n                    .await\n            }\n            Image(i) => {\n                i.get_values_reconstruct_data(this, keyspace, reconstruct_data, ctx)\n                    .await\n            }\n        }\n    }\n\n    async fn dump(&self, owner: &Arc<LayerInner>, ctx: &RequestContext) -> anyhow::Result<()> {\n        use LayerKind::*;\n        match self.get(owner, ctx).await? {\n            Delta(d) => d.dump(ctx).await?,\n            Image(i) => i.dump(ctx).await?,\n        }\n\n        Ok(())\n    }\n}\n\n/// Wrapper around an actual layer implementation.\n#[derive(Debug)]\nenum LayerKind {\n    Delta(delta_layer::DeltaLayerInner),\n    Image(image_layer::ImageLayerInner),\n}\n\n/// Guard for forcing a layer be resident while it exists.\n#[derive(Clone)]\npub struct ResidentLayer {\n    owner: Layer,\n    downloaded: Arc<DownloadedLayer>,\n}\n\nimpl std::fmt::Display for ResidentLayer {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        write!(f, \"{}\", self.owner)\n    }\n}\n\nimpl std::fmt::Debug for ResidentLayer {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        write!(f, \"{}\", self.owner)\n    }\n}\n\nimpl ResidentLayer {\n    /// Release the eviction guard, converting back into a plain [`Layer`].\n    ///\n    /// You can access the [`Layer`] also by using `as_ref`.\n    pub(crate) fn drop_eviction_guard(self) -> Layer {\n        self.into()\n    }\n\n    /// Loads all keys stored in the layer. Returns key, lsn and value size.\n    #[tracing::instrument(level = tracing::Level::DEBUG, skip_all, fields(layer=%self))]\n    pub(crate) async fn load_keys<'a>(\n        &'a self,\n        ctx: &RequestContext,\n    ) -> anyhow::Result<Vec<pageserver_api::key::Key>> {\n        use LayerKind::*;\n\n        let owner = &self.owner.0;\n        let inner = self.downloaded.get(owner, ctx).await?;\n\n        // this is valid because the DownloadedLayer::kind is a OnceCell, not a\n        // Mutex<OnceCell>, so we cannot go and deinitialize the value with OnceCell::take\n        // while it's being held.\n        self.owner.record_access(ctx);\n\n        let res = match inner {\n            Delta(d) => delta_layer::DeltaLayerInner::load_keys(d, ctx).await,\n            Image(i) => image_layer::ImageLayerInner::load_keys(i, ctx).await,\n        };\n        res.with_context(|| format!(\"Layer index is corrupted for {self}\"))\n    }\n\n    /// Read all they keys in this layer which match the ShardIdentity, and write them all to\n    /// the provided writer.  Return the number of keys written.\n    #[tracing::instrument(level = tracing::Level::DEBUG, skip_all, fields(layer=%self))]\n    pub(crate) async fn filter(\n        &self,\n        shard_identity: &ShardIdentity,\n        writer: &mut ImageLayerWriter,\n        ctx: &RequestContext,\n    ) -> Result<usize, CompactionError> {\n        use LayerKind::*;\n\n        match self\n            .downloaded\n            .get(&self.owner.0, ctx)\n            .await\n            .map_err(CompactionError::Other)?\n        {\n            Delta(_) => {\n                return Err(CompactionError::Other(anyhow::anyhow!(format!(\n                    \"cannot filter() on a delta layer {self}\"\n                ))));\n            }\n            Image(i) => i\n                .filter(shard_identity, writer, ctx)\n                .await\n                .map_err(CompactionError::Other),\n        }\n    }\n\n    /// Returns the amount of keys and values written to the writer.\n    pub(crate) async fn copy_delta_prefix(\n        &self,\n        writer: &mut super::delta_layer::DeltaLayerWriter,\n        until: Lsn,\n        ctx: &RequestContext,\n    ) -> anyhow::Result<usize> {\n        use LayerKind::*;\n\n        let owner = &self.owner.0;\n\n        match self.downloaded.get(owner, ctx).await? {\n            Delta(d) => d\n                .copy_prefix(writer, until, ctx)\n                .await\n                .with_context(|| format!(\"copy_delta_prefix until {until} of {self}\")),\n            Image(_) => anyhow::bail!(format!(\"cannot copy_lsn_prefix of image layer {self}\")),\n        }\n    }\n\n    pub(crate) fn local_path(&self) -> &Utf8Path {\n        &self.owner.0.path\n    }\n\n    pub(crate) fn metadata(&self) -> LayerFileMetadata {\n        self.owner.metadata()\n    }\n\n    /// Cast the layer to a delta, return an error if it is an image layer.\n    pub(crate) async fn get_as_delta(\n        &self,\n        ctx: &RequestContext,\n    ) -> anyhow::Result<&delta_layer::DeltaLayerInner> {\n        use LayerKind::*;\n        match self.downloaded.get(&self.owner.0, ctx).await? {\n            Delta(d) => Ok(d),\n            Image(_) => Err(anyhow::anyhow!(\"image layer\")),\n        }\n    }\n\n    /// Cast the layer to an image, return an error if it is a delta layer.\n    pub(crate) async fn get_as_image(\n        &self,\n        ctx: &RequestContext,\n    ) -> anyhow::Result<&image_layer::ImageLayerInner> {\n        use LayerKind::*;\n        match self.downloaded.get(&self.owner.0, ctx).await? {\n            Image(d) => Ok(d),\n            Delta(_) => Err(anyhow::anyhow!(\"delta layer\")),\n        }\n    }\n}\n\nimpl AsLayerDesc for ResidentLayer {\n    fn layer_desc(&self) -> &PersistentLayerDesc {\n        self.owner.layer_desc()\n    }\n}\n\nimpl AsRef<Layer> for ResidentLayer {\n    fn as_ref(&self) -> &Layer {\n        &self.owner\n    }\n}\n\n/// Drop the eviction guard.\nimpl From<ResidentLayer> for Layer {\n    fn from(value: ResidentLayer) -> Self {\n        value.owner\n    }\n}\n\nuse metrics::IntCounter;\n\npub(crate) struct LayerImplMetrics {\n    started_evictions: IntCounter,\n    completed_evictions: IntCounter,\n    cancelled_evictions: enum_map::EnumMap<EvictionCancelled, IntCounter>,\n\n    started_deletes: IntCounter,\n    completed_deletes: IntCounter,\n    failed_deletes: enum_map::EnumMap<DeleteFailed, IntCounter>,\n\n    rare_counters: enum_map::EnumMap<RareEvent, IntCounter>,\n    inits_cancelled: metrics::core::GenericCounter<metrics::core::AtomicU64>,\n    redownload_after: metrics::Histogram,\n    time_to_evict: metrics::Histogram,\n}\n\nimpl Default for LayerImplMetrics {\n    fn default() -> Self {\n        use enum_map::Enum;\n\n        // reminder: these will be pageserver_layer_* with \"_total\" suffix\n\n        let started_evictions = metrics::register_int_counter!(\n            \"pageserver_layer_started_evictions\",\n            \"Evictions started in the Layer implementation\"\n        )\n        .unwrap();\n        let completed_evictions = metrics::register_int_counter!(\n            \"pageserver_layer_completed_evictions\",\n            \"Evictions completed in the Layer implementation\"\n        )\n        .unwrap();\n\n        let cancelled_evictions = metrics::register_int_counter_vec!(\n            \"pageserver_layer_cancelled_evictions_count\",\n            \"Different reasons for evictions to have been cancelled or failed\",\n            &[\"reason\"]\n        )\n        .unwrap();\n\n        let cancelled_evictions = enum_map::EnumMap::from_array(std::array::from_fn(|i| {\n            let reason = EvictionCancelled::from_usize(i);\n            let s = reason.as_str();\n            cancelled_evictions.with_label_values(&[s])\n        }));\n\n        let started_deletes = metrics::register_int_counter!(\n            \"pageserver_layer_started_deletes\",\n            \"Deletions on drop pending in the Layer implementation\"\n        )\n        .unwrap();\n        let completed_deletes = metrics::register_int_counter!(\n            \"pageserver_layer_completed_deletes\",\n            \"Deletions on drop completed in the Layer implementation\"\n        )\n        .unwrap();\n\n        let failed_deletes = metrics::register_int_counter_vec!(\n            \"pageserver_layer_failed_deletes_count\",\n            \"Different reasons for deletions on drop to have failed\",\n            &[\"reason\"]\n        )\n        .unwrap();\n\n        let failed_deletes = enum_map::EnumMap::from_array(std::array::from_fn(|i| {\n            let reason = DeleteFailed::from_usize(i);\n            let s = reason.as_str();\n            failed_deletes.with_label_values(&[s])\n        }));\n\n        let rare_counters = metrics::register_int_counter_vec!(\n            \"pageserver_layer_assumed_rare_count\",\n            \"Times unexpected or assumed rare event happened\",\n            &[\"event\"]\n        )\n        .unwrap();\n\n        let rare_counters = enum_map::EnumMap::from_array(std::array::from_fn(|i| {\n            let event = RareEvent::from_usize(i);\n            let s = event.as_str();\n            rare_counters.with_label_values(&[s])\n        }));\n\n        let inits_cancelled = metrics::register_int_counter!(\n            \"pageserver_layer_inits_cancelled_count\",\n            \"Times Layer initialization was cancelled\",\n        )\n        .unwrap();\n\n        let redownload_after = {\n            let minute = 60.0;\n            let hour = 60.0 * minute;\n            metrics::register_histogram!(\n                \"pageserver_layer_redownloaded_after\",\n                \"Time between evicting and re-downloading.\",\n                vec![\n                    10.0,\n                    30.0,\n                    minute,\n                    5.0 * minute,\n                    15.0 * minute,\n                    30.0 * minute,\n                    hour,\n                    12.0 * hour,\n                ]\n            )\n            .unwrap()\n        };\n\n        let time_to_evict = metrics::register_histogram!(\n            \"pageserver_layer_eviction_held_permit_seconds\",\n            \"Time eviction held the permit.\",\n            vec![0.001, 0.010, 0.100, 0.500, 1.000, 5.000]\n        )\n        .unwrap();\n\n        Self {\n            started_evictions,\n            completed_evictions,\n            cancelled_evictions,\n\n            started_deletes,\n            completed_deletes,\n            failed_deletes,\n\n            rare_counters,\n            inits_cancelled,\n            redownload_after,\n            time_to_evict,\n        }\n    }\n}\n\nimpl LayerImplMetrics {\n    fn inc_started_evictions(&self) {\n        self.started_evictions.inc();\n    }\n    fn inc_completed_evictions(&self) {\n        self.completed_evictions.inc();\n    }\n    fn inc_eviction_cancelled(&self, reason: EvictionCancelled) {\n        self.cancelled_evictions[reason].inc()\n    }\n\n    fn inc_started_deletes(&self) {\n        self.started_deletes.inc();\n    }\n    fn inc_completed_deletes(&self) {\n        self.completed_deletes.inc();\n    }\n    fn inc_deletes_failed(&self, reason: DeleteFailed) {\n        self.failed_deletes[reason].inc();\n    }\n\n    /// Counted separatedly from failed layer deletes because we will complete the layer deletion\n    /// attempt regardless of failure to delete local file.\n    fn inc_delete_removes_failed(&self) {\n        self.rare_counters[RareEvent::RemoveOnDropFailed].inc();\n    }\n\n    /// Expected rare just as cancellations are rare, but we could have cancellations separate from\n    /// the single caller which can start the download, so use this counter to separte them.\n    fn inc_init_completed_without_requester(&self) {\n        self.rare_counters[RareEvent::InitCompletedWithoutRequester].inc();\n    }\n\n    /// Expected rare because cancellations are unexpected, and failures are unexpected\n    fn inc_download_failed_without_requester(&self) {\n        self.rare_counters[RareEvent::DownloadFailedWithoutRequester].inc();\n    }\n\n    /// The Weak in ResidentOrWantedEvicted::WantedEvicted was successfully upgraded.\n    ///\n    /// If this counter is always zero, we should replace ResidentOrWantedEvicted type with an\n    /// Option.\n    fn inc_raced_wanted_evicted_accesses(&self) {\n        self.rare_counters[RareEvent::UpgradedWantedEvicted].inc();\n    }\n\n    /// These are only expected for [`Self::inc_init_cancelled`] amount when\n    /// running with remote storage.\n    fn inc_init_needed_no_download(&self) {\n        self.rare_counters[RareEvent::InitWithoutDownload].inc();\n    }\n\n    /// Expected rare because all layer files should be readable and good\n    fn inc_permanent_loading_failures(&self) {\n        self.rare_counters[RareEvent::PermanentLoadingFailure].inc();\n    }\n\n    fn inc_init_cancelled(&self) {\n        self.inits_cancelled.inc()\n    }\n\n    fn record_redownloaded_after(&self, duration: std::time::Duration) {\n        self.redownload_after.observe(duration.as_secs_f64())\n    }\n\n    /// This would be bad if it ever happened, or mean extreme disk pressure. We should probably\n    /// instead cancel eviction if we would have read waiters. We cannot however separate reads\n    /// from other evictions, so this could have noise as well.\n    fn inc_evicted_with_waiters(&self) {\n        self.rare_counters[RareEvent::EvictedWithWaiters].inc();\n    }\n\n    /// Recorded at least initially as the permit is now acquired in async context before\n    /// spawn_blocking action.\n    fn record_time_to_evict(&self, duration: std::time::Duration) {\n        self.time_to_evict.observe(duration.as_secs_f64())\n    }\n}\n\n#[derive(Debug, Clone, Copy, enum_map::Enum)]\nenum EvictionCancelled {\n    LayerGone,\n    TimelineGone,\n    VersionCheckFailed,\n    FileNotFound,\n    RemoveFailed,\n    AlreadyReinitialized,\n    /// Not evicted because of a pending reinitialization\n    LostToDownload,\n    /// After eviction, there was a new layer access which cancelled the eviction.\n    UpgradedBackOnAccess,\n    UnexpectedEvictedState,\n}\n\nimpl EvictionCancelled {\n    fn as_str(&self) -> &'static str {\n        match self {\n            EvictionCancelled::LayerGone => \"layer_gone\",\n            EvictionCancelled::TimelineGone => \"timeline_gone\",\n            EvictionCancelled::VersionCheckFailed => \"version_check_fail\",\n            EvictionCancelled::FileNotFound => \"file_not_found\",\n            EvictionCancelled::RemoveFailed => \"remove_failed\",\n            EvictionCancelled::AlreadyReinitialized => \"already_reinitialized\",\n            EvictionCancelled::LostToDownload => \"lost_to_download\",\n            EvictionCancelled::UpgradedBackOnAccess => \"upgraded_back_on_access\",\n            EvictionCancelled::UnexpectedEvictedState => \"unexpected_evicted_state\",\n        }\n    }\n}\n\n#[derive(enum_map::Enum)]\nenum DeleteFailed {\n    TimelineGone,\n    DeleteSchedulingFailed,\n}\n\nimpl DeleteFailed {\n    fn as_str(&self) -> &'static str {\n        match self {\n            DeleteFailed::TimelineGone => \"timeline_gone\",\n            DeleteFailed::DeleteSchedulingFailed => \"delete_scheduling_failed\",\n        }\n    }\n}\n\n#[derive(enum_map::Enum)]\nenum RareEvent {\n    RemoveOnDropFailed,\n    InitCompletedWithoutRequester,\n    DownloadFailedWithoutRequester,\n    UpgradedWantedEvicted,\n    InitWithoutDownload,\n    PermanentLoadingFailure,\n    EvictedWithWaiters,\n}\n\nimpl RareEvent {\n    fn as_str(&self) -> &'static str {\n        use RareEvent::*;\n\n        match self {\n            RemoveOnDropFailed => \"remove_on_drop_failed\",\n            InitCompletedWithoutRequester => \"init_completed_without\",\n            DownloadFailedWithoutRequester => \"download_failed_without\",\n            UpgradedWantedEvicted => \"raced_wanted_evicted\",\n            InitWithoutDownload => \"init_needed_no_download\",\n            PermanentLoadingFailure => \"permanent_loading_failure\",\n            EvictedWithWaiters => \"evicted_with_waiters\",\n        }\n    }\n}\n\npub(crate) static LAYER_IMPL_METRICS: once_cell::sync::Lazy<LayerImplMetrics> =\n    once_cell::sync::Lazy::new(LayerImplMetrics::default);\n"
  },
  {
    "path": "pageserver/src/tenant/storage_layer/layer_desc.rs",
    "content": "use core::fmt::Display;\nuse std::ops::Range;\n\nuse pageserver_api::key::Key;\nuse pageserver_api::shard::TenantShardId;\nuse serde::{Deserialize, Serialize};\n#[cfg(test)]\nuse utils::id::TenantId;\nuse utils::id::TimelineId;\nuse utils::lsn::Lsn;\n\nuse super::{DeltaLayerName, ImageLayerName, LayerName};\n\n/// A unique identifier of a persistent layer.\n///\n/// This is different from `LayerDescriptor`, which is only used in the benchmarks.\n/// This struct contains all necessary information to find the image / delta layer. It also provides\n/// a unified way to generate layer information like file name.\n#[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize, Hash)]\npub struct PersistentLayerDesc {\n    pub tenant_shard_id: TenantShardId,\n    pub timeline_id: TimelineId,\n    /// Range of keys that this layer covers\n    pub key_range: Range<Key>,\n    /// Inclusive start, exclusive end of the LSN range that this layer holds.\n    ///\n    /// - For an open in-memory layer, the end bound is MAX_LSN\n    /// - For a frozen in-memory layer or a delta layer, the end bound is a valid lsn after the\n    ///   range start\n    /// - An image layer represents snapshot at one LSN, so end_lsn is always the snapshot LSN + 1\n    pub lsn_range: Range<Lsn>,\n    /// Whether this is a delta layer, and also, is this incremental.\n    pub is_delta: bool,\n    pub file_size: u64,\n}\n\n/// A unique identifier of a persistent layer within the context of one timeline.\n#[derive(Debug, PartialEq, Eq, Clone, Hash)]\npub struct PersistentLayerKey {\n    pub key_range: Range<Key>,\n    pub lsn_range: Range<Lsn>,\n    pub is_delta: bool,\n}\n\nimpl std::fmt::Display for PersistentLayerKey {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        write!(\n            f,\n            \"{}..{} {}..{} is_delta={}\",\n            self.key_range.start,\n            self.key_range.end,\n            self.lsn_range.start,\n            self.lsn_range.end,\n            self.is_delta\n        )\n    }\n}\n\nimpl From<ImageLayerName> for PersistentLayerKey {\n    fn from(image_layer_name: ImageLayerName) -> Self {\n        Self {\n            key_range: image_layer_name.key_range,\n            lsn_range: PersistentLayerDesc::image_layer_lsn_range(image_layer_name.lsn),\n            is_delta: false,\n        }\n    }\n}\n\nimpl From<DeltaLayerName> for PersistentLayerKey {\n    fn from(delta_layer_name: DeltaLayerName) -> Self {\n        Self {\n            key_range: delta_layer_name.key_range,\n            lsn_range: delta_layer_name.lsn_range,\n            is_delta: true,\n        }\n    }\n}\n\nimpl From<LayerName> for PersistentLayerKey {\n    fn from(layer_name: LayerName) -> Self {\n        match layer_name {\n            LayerName::Image(i) => i.into(),\n            LayerName::Delta(d) => d.into(),\n        }\n    }\n}\nimpl PersistentLayerDesc {\n    pub fn key(&self) -> PersistentLayerKey {\n        PersistentLayerKey {\n            key_range: self.key_range.clone(),\n            lsn_range: self.lsn_range.clone(),\n            is_delta: self.is_delta,\n        }\n    }\n\n    pub fn short_id(&self) -> impl Display {\n        self.layer_name()\n    }\n\n    #[cfg(test)]\n    pub fn new_test(key_range: Range<Key>, lsn_range: Range<Lsn>, is_delta: bool) -> Self {\n        Self {\n            tenant_shard_id: TenantShardId::unsharded(TenantId::generate()),\n            timeline_id: TimelineId::generate(),\n            key_range,\n            lsn_range,\n            is_delta,\n            file_size: 0,\n        }\n    }\n\n    pub fn new_img(\n        tenant_shard_id: TenantShardId,\n        timeline_id: TimelineId,\n        key_range: Range<Key>,\n        lsn: Lsn,\n        file_size: u64,\n    ) -> Self {\n        Self {\n            tenant_shard_id,\n            timeline_id,\n            key_range,\n            lsn_range: Self::image_layer_lsn_range(lsn),\n            is_delta: false,\n            file_size,\n        }\n    }\n\n    pub fn new_delta(\n        tenant_shard_id: TenantShardId,\n        timeline_id: TimelineId,\n        key_range: Range<Key>,\n        lsn_range: Range<Lsn>,\n        file_size: u64,\n    ) -> Self {\n        Self {\n            tenant_shard_id,\n            timeline_id,\n            key_range,\n            lsn_range,\n            is_delta: true,\n            file_size,\n        }\n    }\n\n    pub fn from_filename(\n        tenant_shard_id: TenantShardId,\n        timeline_id: TimelineId,\n        filename: LayerName,\n        file_size: u64,\n    ) -> Self {\n        match filename {\n            LayerName::Image(i) => {\n                Self::new_img(tenant_shard_id, timeline_id, i.key_range, i.lsn, file_size)\n            }\n            LayerName::Delta(d) => Self::new_delta(\n                tenant_shard_id,\n                timeline_id,\n                d.key_range,\n                d.lsn_range,\n                file_size,\n            ),\n        }\n    }\n\n    /// Get the LSN that the image layer covers.\n    pub fn image_layer_lsn(&self) -> Lsn {\n        assert!(!self.is_delta);\n        assert!(self.lsn_range.start + 1 == self.lsn_range.end);\n        self.lsn_range.start\n    }\n\n    /// Get the LSN range corresponding to a single image layer LSN.\n    pub fn image_layer_lsn_range(lsn: Lsn) -> Range<Lsn> {\n        lsn..(lsn + 1)\n    }\n\n    /// Get a delta layer name for this layer.\n    ///\n    /// Panic: if this is not a delta layer.\n    pub fn delta_layer_name(&self) -> DeltaLayerName {\n        assert!(self.is_delta);\n        DeltaLayerName {\n            key_range: self.key_range.clone(),\n            lsn_range: self.lsn_range.clone(),\n        }\n    }\n\n    /// Get a image layer name for this layer.\n    ///\n    /// Panic: if this is not an image layer, or the lsn range is invalid\n    pub fn image_layer_name(&self) -> ImageLayerName {\n        assert!(!self.is_delta);\n        assert!(self.lsn_range.start + 1 == self.lsn_range.end);\n        ImageLayerName {\n            key_range: self.key_range.clone(),\n            lsn: self.lsn_range.start,\n        }\n    }\n\n    pub fn layer_name(&self) -> LayerName {\n        if self.is_delta {\n            self.delta_layer_name().into()\n        } else {\n            self.image_layer_name().into()\n        }\n    }\n\n    // TODO: remove this in the future once we refactor timeline APIs.\n\n    pub fn get_lsn_range(&self) -> Range<Lsn> {\n        self.lsn_range.clone()\n    }\n\n    pub fn get_key_range(&self) -> Range<Key> {\n        self.key_range.clone()\n    }\n\n    pub fn get_timeline_id(&self) -> TimelineId {\n        self.timeline_id\n    }\n\n    /// Does this layer only contain some data for the key-range (incremental),\n    /// or does it contain a version of every page? This is important to know\n    /// for garbage collecting old layers: an incremental layer depends on\n    /// the previous non-incremental layer.\n    pub fn is_incremental(&self) -> bool {\n        self.is_delta\n    }\n\n    pub fn is_delta(&self) -> bool {\n        self.is_delta\n    }\n\n    pub fn dump(&self) {\n        if self.is_delta {\n            println!(\n                \"----- delta layer for ten {} tli {} keys {}-{} lsn {}-{} is_incremental {} size {} ----\",\n                self.tenant_shard_id,\n                self.timeline_id,\n                self.key_range.start,\n                self.key_range.end,\n                self.lsn_range.start,\n                self.lsn_range.end,\n                self.is_incremental(),\n                self.file_size,\n            );\n        } else {\n            println!(\n                \"----- image layer for ten {} tli {} key {}-{} at {} is_incremental {} size {} ----\",\n                self.tenant_shard_id,\n                self.timeline_id,\n                self.key_range.start,\n                self.key_range.end,\n                self.image_layer_lsn(),\n                self.is_incremental(),\n                self.file_size\n            );\n        }\n    }\n\n    pub fn file_size(&self) -> u64 {\n        self.file_size\n    }\n}\n"
  },
  {
    "path": "pageserver/src/tenant/storage_layer/layer_name.rs",
    "content": "//!\n//! Helper functions for dealing with filenames of the image and delta layer files.\n//!\nuse std::cmp::Ordering;\nuse std::fmt;\nuse std::ops::Range;\nuse std::str::FromStr;\n\nuse pageserver_api::key::Key;\nuse utils::lsn::Lsn;\n\nuse super::PersistentLayerDesc;\n\n// Note: Timeline::load_layer_map() relies on this sort order\n#[derive(PartialEq, Eq, Clone, Hash)]\npub struct DeltaLayerName {\n    pub key_range: Range<Key>,\n    pub lsn_range: Range<Lsn>,\n}\n\nimpl std::fmt::Debug for DeltaLayerName {\n    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {\n        use super::RangeDisplayDebug;\n\n        f.debug_struct(\"DeltaLayerName\")\n            .field(\"key_range\", &RangeDisplayDebug(&self.key_range))\n            .field(\"lsn_range\", &self.lsn_range)\n            .finish()\n    }\n}\n\nimpl PartialOrd for DeltaLayerName {\n    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {\n        Some(self.cmp(other))\n    }\n}\n\nimpl Ord for DeltaLayerName {\n    fn cmp(&self, other: &Self) -> Ordering {\n        let mut cmp = self.key_range.start.cmp(&other.key_range.start);\n        if cmp != Ordering::Equal {\n            return cmp;\n        }\n        cmp = self.key_range.end.cmp(&other.key_range.end);\n        if cmp != Ordering::Equal {\n            return cmp;\n        }\n        cmp = self.lsn_range.start.cmp(&other.lsn_range.start);\n        if cmp != Ordering::Equal {\n            return cmp;\n        }\n        cmp = self.lsn_range.end.cmp(&other.lsn_range.end);\n\n        cmp\n    }\n}\n\n/// Represents the region of the LSN-Key space covered by a DeltaLayer\n///\n/// ```text\n///    <key start>-<key end>__<LSN start>-<LSN end>-<generation>\n/// ```\nimpl DeltaLayerName {\n    /// Parse the part of a delta layer's file name that represents the LayerName. Returns None\n    /// if the filename does not match the expected pattern.\n    pub fn parse_str(fname: &str) -> Option<Self> {\n        let (key_parts, lsn_generation_parts) = fname.split_once(\"__\")?;\n        let (key_start_str, key_end_str) = key_parts.split_once('-')?;\n        let (lsn_start_str, lsn_end_generation_parts) = lsn_generation_parts.split_once('-')?;\n        let lsn_end_str = if let Some((lsn_end_str, maybe_generation)) =\n            lsn_end_generation_parts.split_once('-')\n        {\n            if maybe_generation.starts_with(\"v\") {\n                // vY-XXXXXXXX\n                lsn_end_str\n            } else if maybe_generation.len() == 8 {\n                // XXXXXXXX\n                lsn_end_str\n            } else {\n                // no idea what this is\n                return None;\n            }\n        } else {\n            lsn_end_generation_parts\n        };\n\n        let key_start = Key::from_hex(key_start_str).ok()?;\n        let key_end = Key::from_hex(key_end_str).ok()?;\n\n        let start_lsn = Lsn::from_hex(lsn_start_str).ok()?;\n        let end_lsn = Lsn::from_hex(lsn_end_str).ok()?;\n\n        if start_lsn >= end_lsn {\n            return None;\n            // or panic?\n        }\n\n        if key_start >= key_end {\n            return None;\n            // or panic?\n        }\n\n        Some(DeltaLayerName {\n            key_range: key_start..key_end,\n            lsn_range: start_lsn..end_lsn,\n        })\n    }\n}\n\nimpl fmt::Display for DeltaLayerName {\n    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {\n        write!(\n            f,\n            \"{}-{}__{:016X}-{:016X}\",\n            self.key_range.start,\n            self.key_range.end,\n            u64::from(self.lsn_range.start),\n            u64::from(self.lsn_range.end),\n        )\n    }\n}\n\n#[derive(PartialEq, Eq, Clone, Hash)]\npub struct ImageLayerName {\n    pub key_range: Range<Key>,\n    pub lsn: Lsn,\n}\n\nimpl std::fmt::Debug for ImageLayerName {\n    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {\n        use super::RangeDisplayDebug;\n\n        f.debug_struct(\"ImageLayerName\")\n            .field(\"key_range\", &RangeDisplayDebug(&self.key_range))\n            .field(\"lsn\", &self.lsn)\n            .finish()\n    }\n}\n\nimpl PartialOrd for ImageLayerName {\n    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {\n        Some(self.cmp(other))\n    }\n}\n\nimpl Ord for ImageLayerName {\n    fn cmp(&self, other: &Self) -> Ordering {\n        let mut cmp = self.key_range.start.cmp(&other.key_range.start);\n        if cmp != Ordering::Equal {\n            return cmp;\n        }\n        cmp = self.key_range.end.cmp(&other.key_range.end);\n        if cmp != Ordering::Equal {\n            return cmp;\n        }\n        cmp = self.lsn.cmp(&other.lsn);\n\n        cmp\n    }\n}\n\nimpl ImageLayerName {\n    pub fn lsn_as_range(&self) -> Range<Lsn> {\n        // Saves from having to copypaste this all over\n        PersistentLayerDesc::image_layer_lsn_range(self.lsn)\n    }\n}\n\n///\n/// Represents the part of the Key-LSN space covered by an ImageLayer\n///\n/// ```text\n///    <key start>-<key end>__<LSN>-<generation>\n/// ```\nimpl ImageLayerName {\n    /// Parse a string as then LayerName part of an image layer file name. Returns None if the\n    /// filename does not match the expected pattern.\n    pub fn parse_str(fname: &str) -> Option<Self> {\n        let (key_parts, lsn_generation_parts) = fname.split_once(\"__\")?;\n        let (key_start_str, key_end_str) = key_parts.split_once('-')?;\n        let lsn_str =\n            if let Some((lsn_str, maybe_generation)) = lsn_generation_parts.split_once('-') {\n                if maybe_generation.starts_with(\"v\") {\n                    // vY-XXXXXXXX\n                    lsn_str\n                } else if maybe_generation.len() == 8 {\n                    // XXXXXXXX\n                    lsn_str\n                } else {\n                    // likely a delta layer\n                    return None;\n                }\n            } else {\n                lsn_generation_parts\n            };\n\n        let key_start = Key::from_hex(key_start_str).ok()?;\n        let key_end = Key::from_hex(key_end_str).ok()?;\n\n        let lsn = Lsn::from_hex(lsn_str).ok()?;\n\n        Some(ImageLayerName {\n            key_range: key_start..key_end,\n            lsn,\n        })\n    }\n}\n\nimpl fmt::Display for ImageLayerName {\n    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {\n        write!(\n            f,\n            \"{}-{}__{:016X}\",\n            self.key_range.start,\n            self.key_range.end,\n            u64::from(self.lsn),\n        )\n    }\n}\n\n/// LayerName is the logical identity of a layer within a LayerMap at a moment in time.\n///\n/// The LayerName is not a unique filename, as the same LayerName may have multiple physical incarnations\n/// over time (e.g. across shard splits or compression). The physical filenames of layers in local\n/// storage and object names in remote storage consist of the LayerName plus some extra qualifiers\n/// that uniquely identify the physical incarnation of a layer (see [crate::tenant::remote_timeline_client::remote_layer_path])\n/// and [`crate::tenant::storage_layer::layer::local_layer_path`])\n#[derive(Debug, PartialEq, Eq, Hash, Clone, Ord, PartialOrd)]\npub enum LayerName {\n    Image(ImageLayerName),\n    Delta(DeltaLayerName),\n}\n\nimpl LayerName {\n    /// Determines if this layer file is considered to be in future meaning we will discard these\n    /// layers during timeline initialization from the given disk_consistent_lsn.\n    pub(crate) fn is_in_future(&self, disk_consistent_lsn: Lsn) -> bool {\n        use LayerName::*;\n        match self {\n            Image(file_name) if file_name.lsn > disk_consistent_lsn => true,\n            Delta(file_name) if file_name.lsn_range.end > disk_consistent_lsn + 1 => true,\n            _ => false,\n        }\n    }\n\n    pub(crate) fn kind(&self) -> &'static str {\n        use LayerName::*;\n        match self {\n            Delta(_) => \"delta\",\n            Image(_) => \"image\",\n        }\n    }\n\n    /// Gets the key range encoded in the layer name.\n    pub fn key_range(&self) -> &Range<Key> {\n        match &self {\n            LayerName::Image(layer) => &layer.key_range,\n            LayerName::Delta(layer) => &layer.key_range,\n        }\n    }\n\n    /// Gets the LSN range encoded in the layer name.\n    pub fn lsn_as_range(&self) -> Range<Lsn> {\n        match &self {\n            LayerName::Image(layer) => layer.lsn_as_range(),\n            LayerName::Delta(layer) => layer.lsn_range.clone(),\n        }\n    }\n\n    pub fn is_delta(&self) -> bool {\n        matches!(self, LayerName::Delta(_))\n    }\n}\n\nimpl fmt::Display for LayerName {\n    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {\n        match self {\n            Self::Image(fname) => write!(f, \"{fname}\"),\n            Self::Delta(fname) => write!(f, \"{fname}\"),\n        }\n    }\n}\n\nimpl From<ImageLayerName> for LayerName {\n    fn from(fname: ImageLayerName) -> Self {\n        Self::Image(fname)\n    }\n}\nimpl From<DeltaLayerName> for LayerName {\n    fn from(fname: DeltaLayerName) -> Self {\n        Self::Delta(fname)\n    }\n}\n\nimpl FromStr for LayerName {\n    type Err = String;\n\n    /// Conversion from either a physical layer filename, or the string-ization of\n    /// Self. When loading a physical layer filename, we drop any extra information\n    /// not needed to build Self.\n    fn from_str(value: &str) -> Result<Self, Self::Err> {\n        let delta = DeltaLayerName::parse_str(value);\n        let image = ImageLayerName::parse_str(value);\n        let ok = match (delta, image) {\n            (None, None) => {\n                return Err(format!(\n                    \"neither delta nor image layer file name: {value:?}\"\n                ));\n            }\n            (Some(delta), None) => Self::Delta(delta),\n            (None, Some(image)) => Self::Image(image),\n            (Some(_), Some(_)) => unreachable!(),\n        };\n        Ok(ok)\n    }\n}\n\nimpl serde::Serialize for LayerName {\n    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>\n    where\n        S: serde::Serializer,\n    {\n        match self {\n            Self::Image(fname) => serializer.collect_str(fname),\n            Self::Delta(fname) => serializer.collect_str(fname),\n        }\n    }\n}\n\nimpl<'de> serde::Deserialize<'de> for LayerName {\n    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>\n    where\n        D: serde::Deserializer<'de>,\n    {\n        deserializer.deserialize_string(LayerNameVisitor)\n    }\n}\n\nstruct LayerNameVisitor;\n\nimpl serde::de::Visitor<'_> for LayerNameVisitor {\n    type Value = LayerName;\n\n    fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {\n        write!(\n            formatter,\n            \"a string that is a valid image or delta layer file name\"\n        )\n    }\n    fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>\n    where\n        E: serde::de::Error,\n    {\n        v.parse().map_err(|e| E::custom(e))\n    }\n}\n\n#[cfg(test)]\nmod test {\n    use super::*;\n    #[test]\n    fn image_layer_parse() {\n        let expected = LayerName::Image(ImageLayerName {\n            key_range: Key::from_i128(0)\n                ..Key::from_hex(\"000000067F00000001000004DF0000000006\").unwrap(),\n            lsn: Lsn::from_hex(\"00000000014FED58\").unwrap(),\n        });\n        let parsed = LayerName::from_str(\"000000000000000000000000000000000000-000000067F00000001000004DF0000000006__00000000014FED58-v1-00000001\").unwrap();\n        assert_eq!(parsed, expected);\n\n        let parsed = LayerName::from_str(\"000000000000000000000000000000000000-000000067F00000001000004DF0000000006__00000000014FED58-00000001\").unwrap();\n        assert_eq!(parsed, expected);\n\n        // Omitting generation suffix is valid\n        let parsed = LayerName::from_str(\"000000000000000000000000000000000000-000000067F00000001000004DF0000000006__00000000014FED58\").unwrap();\n        assert_eq!(parsed, expected);\n    }\n\n    #[test]\n    fn delta_layer_parse() {\n        let expected = LayerName::Delta(DeltaLayerName {\n            key_range: Key::from_i128(0)\n                ..Key::from_hex(\"000000067F00000001000004DF0000000006\").unwrap(),\n            lsn_range: Lsn::from_hex(\"00000000014FED58\").unwrap()\n                ..Lsn::from_hex(\"000000000154C481\").unwrap(),\n        });\n        let parsed = LayerName::from_str(\"000000000000000000000000000000000000-000000067F00000001000004DF0000000006__00000000014FED58-000000000154C481-v1-00000001\").unwrap();\n        assert_eq!(parsed, expected);\n\n        let parsed = LayerName::from_str(\"000000000000000000000000000000000000-000000067F00000001000004DF0000000006__00000000014FED58-000000000154C481-00000001\").unwrap();\n        assert_eq!(parsed, expected);\n\n        // Omitting generation suffix is valid\n        let parsed = LayerName::from_str(\"000000000000000000000000000000000000-000000067F00000001000004DF0000000006__00000000014FED58-000000000154C481\").unwrap();\n        assert_eq!(parsed, expected);\n    }\n}\n"
  },
  {
    "path": "pageserver/src/tenant/storage_layer/merge_iterator.rs",
    "content": "use std::cmp::Ordering;\nuse std::collections::{BinaryHeap, binary_heap};\nuse std::sync::Arc;\n\nuse anyhow::bail;\nuse pageserver_api::key::Key;\nuse utils::lsn::Lsn;\nuse wal_decoder::models::value::Value;\n\nuse super::delta_layer::{DeltaLayerInner, DeltaLayerIterator};\nuse super::image_layer::{ImageLayerInner, ImageLayerIterator};\nuse super::{PersistentLayerDesc, PersistentLayerKey};\nuse crate::context::RequestContext;\n\n#[derive(Clone, Copy)]\npub(crate) enum LayerRef<'a> {\n    Image(&'a ImageLayerInner),\n    Delta(&'a DeltaLayerInner),\n}\n\nimpl<'a> LayerRef<'a> {\n    fn iter_with_options(\n        self,\n        ctx: &'a RequestContext,\n        max_read_size: u64,\n        max_batch_size: usize,\n    ) -> LayerIterRef<'a> {\n        match self {\n            Self::Image(x) => {\n                LayerIterRef::Image(x.iter_with_options(ctx, max_read_size, max_batch_size))\n            }\n            Self::Delta(x) => {\n                LayerIterRef::Delta(x.iter_with_options(ctx, max_read_size, max_batch_size))\n            }\n        }\n    }\n\n    fn layer_dbg_info(&self) -> String {\n        match self {\n            Self::Image(x) => x.layer_dbg_info(),\n            Self::Delta(x) => x.layer_dbg_info(),\n        }\n    }\n}\n\nenum LayerIterRef<'a> {\n    Image(ImageLayerIterator<'a>),\n    Delta(DeltaLayerIterator<'a>),\n}\n\nimpl LayerIterRef<'_> {\n    async fn next(&mut self) -> anyhow::Result<Option<(Key, Lsn, Value)>> {\n        match self {\n            Self::Delta(x) => x.next().await,\n            Self::Image(x) => x.next().await,\n        }\n    }\n\n    fn layer_dbg_info(&self) -> String {\n        match self {\n            Self::Image(x) => x.layer_dbg_info(),\n            Self::Delta(x) => x.layer_dbg_info(),\n        }\n    }\n}\n\n/// This type plays several roles at once\n/// 1. Unified iterator for image and delta layers.\n/// 2. `Ord` for use in [`MergeIterator::heap`] (for the k-merge).\n/// 3. Lazy creation of the real delta/image iterator.\n#[allow(clippy::large_enum_variant, reason = \"TODO\")]\npub(crate) enum IteratorWrapper<'a> {\n    NotLoaded {\n        ctx: &'a RequestContext,\n        first_key_lower_bound: (Key, Lsn),\n        layer: LayerRef<'a>,\n        source_desc: Arc<PersistentLayerKey>,\n        max_read_size: u64,\n        max_batch_size: usize,\n    },\n    Loaded {\n        iter: PeekableLayerIterRef<'a>,\n        source_desc: Arc<PersistentLayerKey>,\n    },\n}\n\npub(crate) struct PeekableLayerIterRef<'a> {\n    iter: LayerIterRef<'a>,\n    peeked: Option<(Key, Lsn, Value)>, // None == end\n}\n\nimpl<'a> PeekableLayerIterRef<'a> {\n    async fn create(mut iter: LayerIterRef<'a>) -> anyhow::Result<Self> {\n        let peeked = iter.next().await?;\n        Ok(Self { iter, peeked })\n    }\n\n    fn peek(&self) -> &Option<(Key, Lsn, Value)> {\n        &self.peeked\n    }\n\n    async fn next(&mut self) -> anyhow::Result<Option<(Key, Lsn, Value)>> {\n        let result = self.peeked.take();\n        self.peeked = self.iter.next().await?;\n        if let (Some((k1, l1, _)), Some((k2, l2, _))) = (&self.peeked, &result) {\n            if (k1, l1) < (k2, l2) {\n                bail!(\"iterator is not ordered: {}\", self.iter.layer_dbg_info());\n            }\n        }\n        Ok(result)\n    }\n}\n\nimpl std::cmp::PartialEq for IteratorWrapper<'_> {\n    fn eq(&self, other: &Self) -> bool {\n        self.cmp(other) == Ordering::Equal\n    }\n}\n\nimpl std::cmp::Eq for IteratorWrapper<'_> {}\n\nimpl std::cmp::PartialOrd for IteratorWrapper<'_> {\n    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {\n        Some(self.cmp(other))\n    }\n}\n\nimpl std::cmp::Ord for IteratorWrapper<'_> {\n    fn cmp(&self, other: &Self) -> std::cmp::Ordering {\n        use std::cmp::Ordering;\n        let a = self.peek_next_key_lsn_value();\n        let b = other.peek_next_key_lsn_value();\n        match (a, b) {\n            (Some((k1, l1, v1)), Some((k2, l2, v2))) => {\n                fn map_value_to_num(val: &Option<&Value>) -> usize {\n                    match val {\n                        None => 0,\n                        Some(Value::Image(_)) => 1,\n                        Some(Value::WalRecord(_)) => 2,\n                    }\n                }\n                let order_1 = map_value_to_num(&v1);\n                let order_2 = map_value_to_num(&v2);\n                // When key_lsn are the same, the unloaded iter will always appear before the loaded one.\n                // And note that we do a reverse at the end of the comparison, so it works with the max heap.\n                (k1, l1, order_1).cmp(&(k2, l2, order_2))\n            }\n            (Some(_), None) => Ordering::Less,\n            (None, Some(_)) => Ordering::Greater,\n            (None, None) => Ordering::Equal,\n        }\n        .reverse()\n    }\n}\n\nimpl<'a> IteratorWrapper<'a> {\n    pub fn create_from_image_layer(\n        image_layer: &'a ImageLayerInner,\n        ctx: &'a RequestContext,\n        max_read_size: u64,\n        max_batch_size: usize,\n    ) -> Self {\n        Self::NotLoaded {\n            layer: LayerRef::Image(image_layer),\n            first_key_lower_bound: (image_layer.key_range().start, image_layer.lsn()),\n            ctx,\n            source_desc: PersistentLayerKey {\n                key_range: image_layer.key_range().clone(),\n                lsn_range: PersistentLayerDesc::image_layer_lsn_range(image_layer.lsn()),\n                is_delta: false,\n            }\n            .into(),\n            max_read_size,\n            max_batch_size,\n        }\n    }\n\n    pub fn create_from_delta_layer(\n        delta_layer: &'a DeltaLayerInner,\n        ctx: &'a RequestContext,\n        max_read_size: u64,\n        max_batch_size: usize,\n    ) -> Self {\n        Self::NotLoaded {\n            layer: LayerRef::Delta(delta_layer),\n            first_key_lower_bound: (delta_layer.key_range().start, delta_layer.lsn_range().start),\n            ctx,\n            source_desc: PersistentLayerKey {\n                key_range: delta_layer.key_range().clone(),\n                lsn_range: delta_layer.lsn_range().clone(),\n                is_delta: true,\n            }\n            .into(),\n            max_read_size,\n            max_batch_size,\n        }\n    }\n\n    fn peek_next_key_lsn_value(&self) -> Option<(&Key, Lsn, Option<&Value>)> {\n        match self {\n            Self::Loaded { iter, .. } => iter\n                .peek()\n                .as_ref()\n                .map(|(key, lsn, val)| (key, *lsn, Some(val))),\n            Self::NotLoaded {\n                first_key_lower_bound: (key, lsn),\n                ..\n            } => Some((key, *lsn, None)),\n        }\n    }\n\n    // CORRECTNESS: this function must always take `&mut self`, never `&self`.\n    //\n    // The reason is that `impl Ord for Self` evaluates differently after this function\n    // returns. We're called through a `PeekMut::deref_mut`, which causes heap repair when\n    // the PeekMut gets returned. So, it's critical that we actually run through `PeekMut::deref_mut`\n    // and not just `PeekMut::deref`\n    // If we don't take `&mut self`\n    async fn load(&mut self) -> anyhow::Result<()> {\n        assert!(!self.is_loaded());\n        let Self::NotLoaded {\n            ctx,\n            first_key_lower_bound,\n            layer,\n            source_desc,\n            max_read_size,\n            max_batch_size,\n        } = self\n        else {\n            unreachable!()\n        };\n        let iter = layer.iter_with_options(ctx, *max_read_size, *max_batch_size);\n        let iter = PeekableLayerIterRef::create(iter).await?;\n        if let Some((k1, l1, _)) = iter.peek() {\n            let (k2, l2) = first_key_lower_bound;\n            if (k1, l1) < (k2, l2) {\n                bail!(\n                    \"layer key range did not include the first key in the layer: {}\",\n                    layer.layer_dbg_info()\n                );\n            }\n        }\n        *self = Self::Loaded {\n            iter,\n            source_desc: source_desc.clone(),\n        };\n        Ok(())\n    }\n\n    fn is_loaded(&self) -> bool {\n        matches!(self, Self::Loaded { .. })\n    }\n\n    /// Correctness: must load the iterator before using.\n    ///\n    /// Given this iterator wrapper is private to the merge iterator, users won't be able to mis-use it.\n    /// The public interfaces to use are [`crate::tenant::storage_layer::delta_layer::DeltaLayerIterator`] and\n    /// [`crate::tenant::storage_layer::image_layer::ImageLayerIterator`].\n    async fn next(&mut self) -> anyhow::Result<Option<(Key, Lsn, Value)>> {\n        let Self::Loaded { iter, .. } = self else {\n            panic!(\"must load the iterator before using\")\n        };\n        iter.next().await\n    }\n\n    /// Get the persistent layer key corresponding to this iterator\n    fn trace_source(&self) -> Arc<PersistentLayerKey> {\n        match self {\n            Self::Loaded { source_desc, .. } => source_desc.clone(),\n            Self::NotLoaded { source_desc, .. } => source_desc.clone(),\n        }\n    }\n}\n\n/// A merge iterator over delta/image layer iterators.\n///\n/// When duplicated records are found, the iterator will not perform any\n/// deduplication, and the caller should handle these situation. By saying\n/// duplicated records, there are many possibilities:\n///\n/// * Two same delta at the same LSN.\n/// * Two same image at the same LSN.\n/// * Delta/image at the same LSN where the image has already applied the delta.\n///\n/// The iterator will always put the image before the delta.\npub struct MergeIterator<'a> {\n    heap: BinaryHeap<IteratorWrapper<'a>>,\n}\n\npub(crate) trait MergeIteratorItem {\n    fn new(item: (Key, Lsn, Value), iterator: &IteratorWrapper<'_>) -> Self;\n\n    fn key_lsn_value(&self) -> &(Key, Lsn, Value);\n}\n\nimpl MergeIteratorItem for (Key, Lsn, Value) {\n    fn new(item: (Key, Lsn, Value), _: &IteratorWrapper<'_>) -> Self {\n        item\n    }\n\n    fn key_lsn_value(&self) -> &(Key, Lsn, Value) {\n        self\n    }\n}\n\nimpl MergeIteratorItem for ((Key, Lsn, Value), Arc<PersistentLayerKey>) {\n    fn new(item: (Key, Lsn, Value), iter: &IteratorWrapper<'_>) -> Self {\n        (item, iter.trace_source().clone())\n    }\n\n    fn key_lsn_value(&self) -> &(Key, Lsn, Value) {\n        &self.0\n    }\n}\n\nimpl<'a> MergeIterator<'a> {\n    #[cfg(test)]\n    pub(crate) fn create_for_testing(\n        deltas: &[&'a DeltaLayerInner],\n        images: &[&'a ImageLayerInner],\n        ctx: &'a RequestContext,\n    ) -> Self {\n        Self::create_with_options(deltas, images, ctx, 1024 * 8192, 1024)\n    }\n\n    /// Create a new merge iterator with custom options.\n    ///\n    /// Adjust `max_read_size` and `max_batch_size` to trade memory usage for performance. The size should scale\n    /// with the number of layers to compact. If there are a lot of layers, consider reducing the values, so that\n    /// the buffer does not take too much memory.\n    ///\n    /// The default options for L0 compactions are:\n    /// - max_read_size: 1024 * 8192 (8MB)\n    /// - max_batch_size: 1024\n    ///\n    /// The default options for gc-compaction are:\n    /// - max_read_size: 128 * 8192 (1MB)\n    /// - max_batch_size: 128\n    pub fn create_with_options(\n        deltas: &[&'a DeltaLayerInner],\n        images: &[&'a ImageLayerInner],\n        ctx: &'a RequestContext,\n        max_read_size: u64,\n        max_batch_size: usize,\n    ) -> Self {\n        let mut heap = Vec::with_capacity(images.len() + deltas.len());\n        for image in images {\n            heap.push(IteratorWrapper::create_from_image_layer(\n                image,\n                ctx,\n                max_read_size,\n                max_batch_size,\n            ));\n        }\n        for delta in deltas {\n            heap.push(IteratorWrapper::create_from_delta_layer(\n                delta,\n                ctx,\n                max_read_size,\n                max_batch_size,\n            ));\n        }\n        Self {\n            heap: BinaryHeap::from(heap),\n        }\n    }\n\n    pub(crate) async fn next_inner<R: MergeIteratorItem>(&mut self) -> anyhow::Result<Option<R>> {\n        while let Some(mut iter) = self.heap.peek_mut() {\n            if !iter.is_loaded() {\n                // Once we load the iterator, we can know the real first key-value pair in the iterator.\n                // We put it back into the heap so that a potentially unloaded layer may have a key between\n                // [potential_first_key, loaded_first_key).\n                iter.load().await?;\n                continue;\n            }\n            let Some(item) = iter.next().await? else {\n                // If the iterator returns None, we pop this iterator. Actually, in the current implementation,\n                // we order None > Some, and all the rest of the iterators should return None.\n                binary_heap::PeekMut::pop(iter);\n                continue;\n            };\n            return Ok(Some(R::new(item, &iter)));\n        }\n        Ok(None)\n    }\n\n    /// Get the next key-value pair from the iterator.\n    pub async fn next(&mut self) -> anyhow::Result<Option<(Key, Lsn, Value)>> {\n        self.next_inner().await\n    }\n\n    /// Get the next key-value pair from the iterator, and trace where the key comes from.\n    pub async fn next_with_trace(\n        &mut self,\n    ) -> anyhow::Result<Option<((Key, Lsn, Value), Arc<PersistentLayerKey>)>> {\n        self.next_inner().await\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use itertools::Itertools;\n    use pageserver_api::key::Key;\n    use utils::lsn::Lsn;\n    #[cfg(feature = \"testing\")]\n    use wal_decoder::models::record::NeonWalRecord;\n\n    use super::*;\n    use crate::DEFAULT_PG_VERSION;\n    use crate::tenant::harness::{TIMELINE_ID, TenantHarness};\n    #[cfg(feature = \"testing\")]\n    use crate::tenant::storage_layer::delta_layer::test::sort_delta_value;\n    use crate::tenant::storage_layer::delta_layer::test::{produce_delta_layer, sort_delta};\n\n    async fn assert_merge_iter_equal(\n        merge_iter: &mut MergeIterator<'_>,\n        expect: &[(Key, Lsn, Value)],\n    ) {\n        let mut expect_iter = expect.iter();\n        loop {\n            let o1 = merge_iter.next().await.unwrap();\n            let o2 = expect_iter.next();\n            assert_eq!(o1.is_some(), o2.is_some());\n            if o1.is_none() && o2.is_none() {\n                break;\n            }\n            let (k1, l1, v1) = o1.unwrap();\n            let (k2, l2, v2) = o2.unwrap();\n            assert_eq!(&k1, k2);\n            assert_eq!(l1, *l2);\n            assert_eq!(&v1, v2);\n        }\n    }\n\n    #[tokio::test]\n    async fn merge_in_between() {\n        use bytes::Bytes;\n\n        let harness = TenantHarness::create(\"merge_iterator_merge_in_between\")\n            .await\n            .unwrap();\n        let (tenant, ctx) = harness.load().await;\n\n        let tline = tenant\n            .create_test_timeline(TIMELINE_ID, Lsn(0x10), DEFAULT_PG_VERSION, &ctx)\n            .await\n            .unwrap();\n\n        fn get_key(id: u32) -> Key {\n            let mut key = Key::from_hex(\"000000000033333333444444445500000000\").unwrap();\n            key.field6 = id;\n            key\n        }\n        let test_deltas1 = vec![\n            (\n                get_key(0),\n                Lsn(0x10),\n                Value::Image(Bytes::copy_from_slice(b\"test\")),\n            ),\n            (\n                get_key(5),\n                Lsn(0x10),\n                Value::Image(Bytes::copy_from_slice(b\"test\")),\n            ),\n        ];\n        let resident_layer_1 = produce_delta_layer(&tenant, &tline, test_deltas1.clone(), &ctx)\n            .await\n            .unwrap();\n        let test_deltas2 = vec![\n            (\n                get_key(3),\n                Lsn(0x10),\n                Value::Image(Bytes::copy_from_slice(b\"test\")),\n            ),\n            (\n                get_key(4),\n                Lsn(0x10),\n                Value::Image(Bytes::copy_from_slice(b\"test\")),\n            ),\n        ];\n        let resident_layer_2 = produce_delta_layer(&tenant, &tline, test_deltas2.clone(), &ctx)\n            .await\n            .unwrap();\n        let mut merge_iter = MergeIterator::create_for_testing(\n            &[\n                resident_layer_2.get_as_delta(&ctx).await.unwrap(),\n                resident_layer_1.get_as_delta(&ctx).await.unwrap(),\n            ],\n            &[],\n            &ctx,\n        );\n        let mut expect = Vec::new();\n        expect.extend(test_deltas1);\n        expect.extend(test_deltas2);\n        expect.sort_by(sort_delta);\n        assert_merge_iter_equal(&mut merge_iter, &expect).await;\n    }\n\n    #[tokio::test]\n    async fn delta_merge() {\n        use bytes::Bytes;\n\n        let harness = TenantHarness::create(\"merge_iterator_delta_merge\")\n            .await\n            .unwrap();\n        let (tenant, ctx) = harness.load().await;\n\n        let tline = tenant\n            .create_test_timeline(TIMELINE_ID, Lsn(0x10), DEFAULT_PG_VERSION, &ctx)\n            .await\n            .unwrap();\n\n        fn get_key(id: u32) -> Key {\n            let mut key = Key::from_hex(\"000000000033333333444444445500000000\").unwrap();\n            key.field6 = id;\n            key\n        }\n        const N: usize = 1000;\n        let test_deltas1 = (0..N)\n            .map(|idx| {\n                (\n                    get_key(idx as u32 / 10),\n                    Lsn(0x20 * ((idx as u64) % 10 + 1)),\n                    Value::Image(Bytes::from(format!(\"img{idx:05}\"))),\n                )\n            })\n            .collect_vec();\n        let resident_layer_1 = produce_delta_layer(&tenant, &tline, test_deltas1.clone(), &ctx)\n            .await\n            .unwrap();\n        let test_deltas2 = (0..N)\n            .map(|idx| {\n                (\n                    get_key(idx as u32 / 10),\n                    Lsn(0x20 * ((idx as u64) % 10 + 1) + 0x10),\n                    Value::Image(Bytes::from(format!(\"img{idx:05}\"))),\n                )\n            })\n            .collect_vec();\n        let resident_layer_2 = produce_delta_layer(&tenant, &tline, test_deltas2.clone(), &ctx)\n            .await\n            .unwrap();\n        let test_deltas3 = (0..N)\n            .map(|idx| {\n                (\n                    get_key(idx as u32 / 10 + N as u32),\n                    Lsn(0x10 * ((idx as u64) % 10 + 1)),\n                    Value::Image(Bytes::from(format!(\"img{idx:05}\"))),\n                )\n            })\n            .collect_vec();\n        let resident_layer_3 = produce_delta_layer(&tenant, &tline, test_deltas3.clone(), &ctx)\n            .await\n            .unwrap();\n        let mut merge_iter = MergeIterator::create_for_testing(\n            &[\n                resident_layer_1.get_as_delta(&ctx).await.unwrap(),\n                resident_layer_2.get_as_delta(&ctx).await.unwrap(),\n                resident_layer_3.get_as_delta(&ctx).await.unwrap(),\n            ],\n            &[],\n            &ctx,\n        );\n        let mut expect = Vec::new();\n        expect.extend(test_deltas1);\n        expect.extend(test_deltas2);\n        expect.extend(test_deltas3);\n        expect.sort_by(sort_delta);\n        assert_merge_iter_equal(&mut merge_iter, &expect).await;\n\n        // TODO: test layers are loaded only when needed, reducing num of active iterators in k-merge\n    }\n\n    #[cfg(feature = \"testing\")]\n    #[tokio::test]\n    async fn delta_image_mixed_merge() {\n        use bytes::Bytes;\n\n        let harness = TenantHarness::create(\"merge_iterator_delta_image_mixed_merge\")\n            .await\n            .unwrap();\n        let (tenant, ctx) = harness.load().await;\n\n        let tline = tenant\n            .create_test_timeline(TIMELINE_ID, Lsn(0x10), DEFAULT_PG_VERSION, &ctx)\n            .await\n            .unwrap();\n\n        fn get_key(id: u32) -> Key {\n            let mut key = Key::from_hex(\"000000000033333333444444445500000000\").unwrap();\n            key.field6 = id;\n            key\n        }\n        // In this test case, we want to test if the iterator still works correctly with multiple copies\n        // of a delta+image at the same LSN, for example, the following sequence a@10=+a, a@10=+a, a@10=ab, a@10=ab.\n        // Duplicated deltas/images are possible for old tenants before the full L0 compaction file name fix.\n        // An incomplete compaction could produce multiple exactly-the-same delta layers. Force image generation\n        // could produce overlapping images. Apart from duplicated deltas/images, in the current storage implementation\n        // one key-lsn could have a delta in the delta layer and one image in the image layer. The iterator should\n        // correctly process these situations and return everything as-is, and the upper layer of the system\n        // will handle duplicated LSNs.\n        let test_deltas1 = vec![\n            (\n                get_key(0),\n                Lsn(0x10),\n                Value::WalRecord(NeonWalRecord::wal_init(\"\")),\n            ),\n            (\n                get_key(0),\n                Lsn(0x18),\n                Value::WalRecord(NeonWalRecord::wal_append(\"a\")),\n            ),\n            (\n                get_key(5),\n                Lsn(0x10),\n                Value::WalRecord(NeonWalRecord::wal_init(\"\")),\n            ),\n            (\n                get_key(5),\n                Lsn(0x18),\n                Value::WalRecord(NeonWalRecord::wal_append(\"b\")),\n            ),\n        ];\n        let resident_layer_1 = produce_delta_layer(&tenant, &tline, test_deltas1.clone(), &ctx)\n            .await\n            .unwrap();\n        let mut test_deltas2 = test_deltas1.clone();\n        test_deltas2.push((\n            get_key(10),\n            Lsn(0x20),\n            Value::Image(Bytes::copy_from_slice(b\"test\")),\n        ));\n        let resident_layer_2 = produce_delta_layer(&tenant, &tline, test_deltas2.clone(), &ctx)\n            .await\n            .unwrap();\n        let test_deltas3 = vec![\n            (\n                get_key(0),\n                Lsn(0x10),\n                Value::Image(Bytes::copy_from_slice(b\"\")),\n            ),\n            (\n                get_key(5),\n                Lsn(0x18),\n                Value::Image(Bytes::copy_from_slice(b\"b\")),\n            ),\n            (\n                get_key(15),\n                Lsn(0x20),\n                Value::Image(Bytes::copy_from_slice(b\"test\")),\n            ),\n        ];\n        let resident_layer_3 = produce_delta_layer(&tenant, &tline, test_deltas3.clone(), &ctx)\n            .await\n            .unwrap();\n        let mut test_deltas4 = test_deltas3.clone();\n        test_deltas4.push((\n            get_key(20),\n            Lsn(0x20),\n            Value::Image(Bytes::copy_from_slice(b\"test\")),\n        ));\n        let resident_layer_4 = produce_delta_layer(&tenant, &tline, test_deltas4.clone(), &ctx)\n            .await\n            .unwrap();\n        let mut expect = Vec::new();\n        expect.extend(test_deltas1);\n        expect.extend(test_deltas2);\n        expect.extend(test_deltas3);\n        expect.extend(test_deltas4);\n        expect.sort_by(sort_delta_value);\n\n        // Test with different layer order for MergeIterator::create to ensure the order\n        // is stable.\n\n        let mut merge_iter = MergeIterator::create_for_testing(\n            &[\n                resident_layer_4.get_as_delta(&ctx).await.unwrap(),\n                resident_layer_1.get_as_delta(&ctx).await.unwrap(),\n                resident_layer_3.get_as_delta(&ctx).await.unwrap(),\n                resident_layer_2.get_as_delta(&ctx).await.unwrap(),\n            ],\n            &[],\n            &ctx,\n        );\n        assert_merge_iter_equal(&mut merge_iter, &expect).await;\n\n        let mut merge_iter = MergeIterator::create_for_testing(\n            &[\n                resident_layer_1.get_as_delta(&ctx).await.unwrap(),\n                resident_layer_4.get_as_delta(&ctx).await.unwrap(),\n                resident_layer_3.get_as_delta(&ctx).await.unwrap(),\n                resident_layer_2.get_as_delta(&ctx).await.unwrap(),\n            ],\n            &[],\n            &ctx,\n        );\n        assert_merge_iter_equal(&mut merge_iter, &expect).await;\n\n        is_send(merge_iter);\n    }\n\n    #[cfg(feature = \"testing\")]\n    fn is_send(_: impl Send) {}\n}\n"
  },
  {
    "path": "pageserver/src/tenant/storage_layer.rs",
    "content": "//! Common traits and structs for layers\n\npub mod batch_split_writer;\npub mod delta_layer;\npub mod errors;\npub mod filter_iterator;\npub mod image_layer;\npub mod inmemory_layer;\npub(crate) mod layer;\nmod layer_desc;\nmod layer_name;\npub mod merge_iterator;\n\nuse std::cmp::Ordering;\nuse std::collections::hash_map::Entry;\nuse std::collections::{BinaryHeap, HashMap};\nuse std::ops::Range;\nuse std::pin::Pin;\nuse std::sync::Arc;\nuse std::sync::atomic::AtomicUsize;\nuse std::time::{Duration, SystemTime, UNIX_EPOCH};\n\nuse crate::PERF_TRACE_TARGET;\npub use batch_split_writer::{BatchLayerWriter, SplitDeltaLayerWriter, SplitImageLayerWriter};\nuse bytes::Bytes;\npub use delta_layer::{DeltaLayer, DeltaLayerWriter, ValueRef};\nuse futures::StreamExt;\nuse futures::stream::FuturesUnordered;\npub use image_layer::{ImageLayer, ImageLayerWriter};\npub use inmemory_layer::InMemoryLayer;\npub(crate) use layer::{EvictionError, Layer, ResidentLayer};\npub use layer_desc::{PersistentLayerDesc, PersistentLayerKey};\npub use layer_name::{DeltaLayerName, ImageLayerName, LayerName};\nuse pageserver_api::config::GetVectoredConcurrentIo;\nuse pageserver_api::key::Key;\nuse pageserver_api::keyspace::{KeySpace, KeySpaceRandomAccum};\nuse tracing::{Instrument, info_span, trace};\nuse utils::lsn::Lsn;\nuse utils::sync::gate::GateGuard;\nuse wal_decoder::models::record::NeonWalRecord;\nuse wal_decoder::models::value::Value;\n\nuse self::inmemory_layer::InMemoryLayerFileId;\nuse super::PageReconstructError;\nuse super::layer_map::InMemoryLayerDesc;\nuse super::timeline::{GetVectoredError, ReadPath};\nuse crate::context::{\n    AccessStatsBehavior, PerfInstrumentFutureExt, RequestContext, RequestContextBuilder,\n};\n\npub fn range_overlaps<T>(a: &Range<T>, b: &Range<T>) -> bool\nwhere\n    T: PartialOrd<T>,\n{\n    if a.start < b.start {\n        a.end > b.start\n    } else {\n        b.end > a.start\n    }\n}\n\n/// Struct used to communicate across calls to 'get_value_reconstruct_data'.\n///\n/// Before first call, you can fill in 'page_img' if you have an older cached\n/// version of the page available. That can save work in\n/// 'get_value_reconstruct_data', as it can stop searching for page versions\n/// when all the WAL records going back to the cached image have been collected.\n///\n/// When get_value_reconstruct_data returns Complete, 'img' is set to an image\n/// of the page, or the oldest WAL record in 'records' is a will_init-type\n/// record that initializes the page without requiring a previous image.\n///\n/// If 'get_page_reconstruct_data' returns Continue, some 'records' may have\n/// been collected, but there are more records outside the current layer. Pass\n/// the same ValueReconstructState struct in the next 'get_value_reconstruct_data'\n/// call, to collect more records.\n///\n#[derive(Debug, Default, Clone)]\npub(crate) struct ValueReconstructState {\n    pub(crate) records: Vec<(Lsn, NeonWalRecord)>,\n    pub(crate) img: Option<(Lsn, Bytes)>,\n}\n\nimpl ValueReconstructState {\n    /// Returns the number of page deltas applied to the page image.\n    pub fn num_deltas(&self) -> usize {\n        match self.img {\n            Some(_) => self.records.len(),\n            None => self.records.len() - 1, // omit will_init record\n        }\n    }\n}\n\n#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]\npub(crate) enum ValueReconstructSituation {\n    Complete,\n    #[default]\n    Continue,\n}\n\n/// On disk representation of a value loaded in a buffer\n#[derive(Debug)]\npub(crate) enum OnDiskValue {\n    /// Unencoded [`Value::Image`]\n    RawImage(Bytes),\n    /// Encoded [`Value`]. Can deserialize into an image or a WAL record\n    WalRecordOrImage(Bytes),\n}\n\n/// Reconstruct data accumulated for a single key during a vectored get\n#[derive(Debug, Default)]\npub struct VectoredValueReconstructState {\n    pub(crate) on_disk_values: Vec<(Lsn, OnDiskValueIoWaiter)>,\n\n    pub(crate) situation: ValueReconstructSituation,\n}\n\n#[derive(Debug)]\npub(crate) struct OnDiskValueIoWaiter {\n    rx: tokio::sync::oneshot::Receiver<OnDiskValueIoResult>,\n}\n\n#[derive(Debug)]\n#[must_use]\npub(crate) enum OnDiskValueIo {\n    /// Traversal identified this IO as required to complete the vectored get.\n    Required {\n        num_active_ios: Arc<AtomicUsize>,\n        tx: tokio::sync::oneshot::Sender<OnDiskValueIoResult>,\n    },\n    /// Sparse keyspace reads always read all the values for a given key,\n    /// even though only the first value is needed.\n    ///\n    /// This variant represents the unnecessary IOs for those values at lower LSNs\n    /// that aren't needed, but are currently still being done.\n    ///\n    /// The execution of unnecessary IOs was a pre-existing behavior before concurrent IO.\n    /// We added this explicit representation here so that we can drop\n    /// unnecessary IO results immediately, instead of buffering them in\n    /// `oneshot` channels inside [`VectoredValueReconstructState`] until\n    /// [`VectoredValueReconstructState::collect_pending_ios`] gets called.\n    Unnecessary,\n}\n\ntype OnDiskValueIoResult = Result<OnDiskValue, std::io::Error>;\n\nimpl OnDiskValueIo {\n    pub(crate) fn complete(self, res: OnDiskValueIoResult) {\n        match self {\n            OnDiskValueIo::Required { num_active_ios, tx } => {\n                num_active_ios.fetch_sub(1, std::sync::atomic::Ordering::Release);\n                let _ = tx.send(res);\n            }\n            OnDiskValueIo::Unnecessary => {\n                // Nobody cared, see variant doc comment.\n            }\n        }\n    }\n}\n\n#[derive(Debug, thiserror::Error)]\npub(crate) enum WaitCompletionError {\n    #[error(\"OnDiskValueIo was dropped without completing, likely the sidecar task panicked\")]\n    IoDropped,\n}\n\nimpl OnDiskValueIoWaiter {\n    pub(crate) async fn wait_completion(self) -> Result<OnDiskValueIoResult, WaitCompletionError> {\n        // NB: for Unnecessary IOs, this method never gets called because we don't add them to `on_disk_values`.\n        self.rx.await.map_err(|_| WaitCompletionError::IoDropped)\n    }\n}\n\nimpl VectoredValueReconstructState {\n    /// # Cancel-Safety\n    ///\n    /// Technically fine to stop polling this future, but, the IOs will still\n    /// be executed to completion by the sidecar task and hold on to / consume resources.\n    /// Better not do it to make reasonsing about the system easier.\n    pub(crate) async fn collect_pending_ios(\n        self,\n    ) -> Result<ValueReconstructState, PageReconstructError> {\n        use utils::bin_ser::BeSer;\n\n        let mut res = Ok(ValueReconstructState::default());\n\n        // We should try hard not to bail early, so that by the time we return from this\n        // function, all IO for this value is done. It's not required -- we could totally\n        // stop polling the IO futures in the sidecar task, they need to support that,\n        // but just stopping to poll doesn't reduce the IO load on the disk. It's easier\n        // to reason about the system if we just wait for all IO to complete, even if\n        // we're no longer interested in the result.\n        //\n        // Revisit this when IO futures are replaced with a more sophisticated IO system\n        // and an IO scheduler, where we know which IOs were submitted and which ones\n        // just queued. Cf the comment on IoConcurrency::spawn_io.\n        for (lsn, waiter) in self.on_disk_values {\n            let value_recv_res = waiter\n                .wait_completion()\n                // we rely on the caller to poll us to completion, so this is not a bail point\n                .await;\n            // Force not bailing early by wrapping the code into a closure.\n            #[allow(clippy::redundant_closure_call)]\n            let _: () = (|| {\n                match (&mut res, value_recv_res) {\n                    (Err(_), _) => {\n                        // We've already failed, no need to process more.\n                    }\n                    (Ok(_), Err(wait_err)) => {\n                        // This shouldn't happen - likely the sidecar task panicked.\n                        res = Err(PageReconstructError::Other(wait_err.into()));\n                    }\n                    (Ok(_), Ok(Err(err))) => {\n                        let err: std::io::Error = err;\n                        // TODO: returning IO error here will fail a compute query.\n                        // Probably not what we want, we're not doing `maybe_fatal_err`\n                        // in the IO futures.\n                        // But it's been like that for a long time, not changing it\n                        // as part of concurrent IO.\n                        // => https://github.com/neondatabase/neon/issues/10454\n                        res = Err(PageReconstructError::Other(err.into()));\n                    }\n                    (Ok(ok), Ok(Ok(OnDiskValue::RawImage(img)))) => {\n                        assert!(ok.img.is_none());\n                        ok.img = Some((lsn, img));\n                    }\n                    (Ok(ok), Ok(Ok(OnDiskValue::WalRecordOrImage(buf)))) => {\n                        match Value::des(&buf) {\n                            Ok(Value::WalRecord(rec)) => {\n                                ok.records.push((lsn, rec));\n                            }\n                            Ok(Value::Image(img)) => {\n                                assert!(ok.img.is_none());\n                                ok.img = Some((lsn, img));\n                            }\n                            Err(err) => {\n                                res = Err(PageReconstructError::Other(err.into()));\n                            }\n                        }\n                    }\n                }\n            })();\n        }\n\n        res\n    }\n\n    /// Benchmarking utility to await for the completion of all pending ios\n    ///\n    /// # Cancel-Safety\n    ///\n    /// Technically fine to stop polling this future, but, the IOs will still\n    /// be executed to completion by the sidecar task and hold on to / consume resources.\n    /// Better not do it to make reasonsing about the system easier.\n    #[cfg(feature = \"benchmarking\")]\n    pub async fn sink_pending_ios(self) -> Result<(), std::io::Error> {\n        let mut res = Ok(());\n\n        // We should try hard not to bail early, so that by the time we return from this\n        // function, all IO for this value is done. It's not required -- we could totally\n        // stop polling the IO futures in the sidecar task, they need to support that,\n        // but just stopping to poll doesn't reduce the IO load on the disk. It's easier\n        // to reason about the system if we just wait for all IO to complete, even if\n        // we're no longer interested in the result.\n        //\n        // Revisit this when IO futures are replaced with a more sophisticated IO system\n        // and an IO scheduler, where we know which IOs were submitted and which ones\n        // just queued. Cf the comment on IoConcurrency::spawn_io.\n        for (_lsn, waiter) in self.on_disk_values {\n            let value_recv_res = waiter\n                .wait_completion()\n                // we rely on the caller to poll us to completion, so this is not a bail point\n                .await;\n\n            match (&mut res, value_recv_res) {\n                (Err(_), _) => {\n                    // We've already failed, no need to process more.\n                }\n                (Ok(_), Err(_wait_err)) => {\n                    // This shouldn't happen - likely the sidecar task panicked.\n                    unreachable!();\n                }\n                (Ok(_), Ok(Err(err))) => {\n                    let err: std::io::Error = err;\n                    res = Err(err);\n                }\n                (Ok(_ok), Ok(Ok(OnDiskValue::RawImage(_img)))) => {}\n                (Ok(_ok), Ok(Ok(OnDiskValue::WalRecordOrImage(_buf)))) => {}\n            }\n        }\n\n        res\n    }\n}\n\n/// Bag of data accumulated during a vectored get..\npub struct ValuesReconstructState {\n    /// The keys will be removed after `get_vectored` completes. The caller outside `Timeline`\n    /// should not expect to get anything from this hashmap.\n    pub keys: HashMap<Key, VectoredValueReconstructState>,\n    /// The keys which are already retrieved\n    keys_done: KeySpaceRandomAccum,\n\n    /// The keys covered by the image layers\n    keys_with_image_coverage: Option<Range<Key>>,\n\n    // Statistics that are still accessible as a caller of `get_vectored_impl`.\n    layers_visited: u32,\n    delta_layers_visited: u32,\n\n    pub(crate) enable_debug: bool,\n    pub(crate) debug_state: ValueReconstructState,\n\n    pub(crate) io_concurrency: IoConcurrency,\n    num_active_ios: Arc<AtomicUsize>,\n\n    pub(crate) read_path: Option<ReadPath>,\n}\n\n/// The level of IO concurrency to be used on the read path\n///\n/// The desired end state is that we always do parallel IO.\n/// This struct and the dispatching in the impl will be removed once\n/// we've built enough confidence.\npub enum IoConcurrency {\n    Sequential,\n    SidecarTask {\n        task_id: usize,\n        ios_tx: tokio::sync::mpsc::UnboundedSender<IoFuture>,\n    },\n}\n\ntype IoFuture = Pin<Box<dyn Send + Future<Output = ()>>>;\n\npub(crate) enum SelectedIoConcurrency {\n    Sequential,\n    SidecarTask(GateGuard),\n}\n\nimpl std::fmt::Debug for IoConcurrency {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        match self {\n            IoConcurrency::Sequential => write!(f, \"Sequential\"),\n            IoConcurrency::SidecarTask { .. } => write!(f, \"SidecarTask\"),\n        }\n    }\n}\n\nimpl std::fmt::Debug for SelectedIoConcurrency {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        match self {\n            SelectedIoConcurrency::Sequential => write!(f, \"Sequential\"),\n            SelectedIoConcurrency::SidecarTask(_) => write!(f, \"SidecarTask\"),\n        }\n    }\n}\n\nimpl IoConcurrency {\n    /// Force sequential IO. This is a temporary workaround until we have\n    /// moved plumbing-through-the-call-stack\n    /// of IoConcurrency into `RequestContextq.\n    ///\n    /// DO NOT USE for new code.\n    ///\n    /// Tracking issue: <https://github.com/neondatabase/neon/issues/10460>.\n    pub(crate) fn sequential() -> Self {\n        Self::spawn(SelectedIoConcurrency::Sequential)\n    }\n\n    pub fn spawn_from_conf(conf: GetVectoredConcurrentIo, gate_guard: GateGuard) -> IoConcurrency {\n        let selected = match conf {\n            GetVectoredConcurrentIo::Sequential => SelectedIoConcurrency::Sequential,\n            GetVectoredConcurrentIo::SidecarTask => SelectedIoConcurrency::SidecarTask(gate_guard),\n        };\n        Self::spawn(selected)\n    }\n\n    pub(crate) fn spawn(io_concurrency: SelectedIoConcurrency) -> Self {\n        match io_concurrency {\n            SelectedIoConcurrency::Sequential => IoConcurrency::Sequential,\n            SelectedIoConcurrency::SidecarTask(gate_guard) => {\n                let (ios_tx, ios_rx) = tokio::sync::mpsc::unbounded_channel();\n                static TASK_ID: AtomicUsize = AtomicUsize::new(0);\n                let task_id = TASK_ID.fetch_add(1, std::sync::atomic::Ordering::Relaxed);\n                // TODO: enrich the span with more context (tenant,shard,timeline) + (basebackup|pagestream|...)\n                let span =\n                    tracing::info_span!(parent: None, \"IoConcurrency_sidecar\", task_id = task_id);\n                trace!(task_id, \"spawning sidecar task\");\n                tokio::spawn(async move {\n                    trace!(\"start\");\n                    scopeguard::defer!{ trace!(\"end\") };\n                    type IosRx = tokio::sync::mpsc::UnboundedReceiver<IoFuture>;\n                    enum State {\n                        Waiting {\n                            // invariant: is_empty(), but we recycle the allocation\n                            empty_futures: FuturesUnordered<IoFuture>,\n                            ios_rx: IosRx,\n                        },\n                        Executing {\n                            futures: FuturesUnordered<IoFuture>,\n                            ios_rx: IosRx,\n                        },\n                        ShuttingDown {\n                            futures: FuturesUnordered<IoFuture>,\n                        },\n                    }\n                    let mut state = State::Waiting {\n                        empty_futures: FuturesUnordered::new(),\n                        ios_rx,\n                    };\n                    loop {\n                        match state {\n                            State::Waiting {\n                                empty_futures,\n                                mut ios_rx,\n                            } => {\n                                assert!(empty_futures.is_empty());\n                                tokio::select! {\n                                    fut = ios_rx.recv() => {\n                                        if let Some(fut) = fut {\n                                            trace!(\"received new io future\");\n                                            empty_futures.push(fut);\n                                            state = State::Executing { futures: empty_futures, ios_rx };\n                                        } else {\n                                            state = State::ShuttingDown { futures: empty_futures }\n                                        }\n                                    }\n                                }\n                            }\n                            State::Executing {\n                                mut futures,\n                                mut ios_rx,\n                            } => {\n                                tokio::select! {\n                                    res = futures.next() => {\n                                        trace!(\"io future completed\");\n                                        assert!(res.is_some());\n                                        if futures.is_empty() {\n                                            state = State::Waiting { empty_futures: futures, ios_rx};\n                                        } else {\n                                            state = State::Executing { futures, ios_rx };\n                                        }\n                                    }\n                                    fut = ios_rx.recv() => {\n                                        if let Some(fut) = fut {\n                                            trace!(\"received new io future\");\n                                            futures.push(fut);\n                                            state =  State::Executing { futures, ios_rx};\n                                        } else {\n                                            state = State::ShuttingDown { futures };\n                                        }\n                                    }\n                                }\n                            }\n                            State::ShuttingDown {\n                                mut futures,\n                            } => {\n                                trace!(\"shutting down\");\n                                while let Some(()) = futures.next().await {\n                                    trace!(\"io future completed (shutdown)\");\n                                    // drain\n                                }\n                                trace!(\"shutdown complete\");\n                                break;\n                            }\n                        }\n                    }\n                    drop(gate_guard); // drop it right before we exit\n                }.instrument(span));\n                IoConcurrency::SidecarTask { task_id, ios_tx }\n            }\n        }\n    }\n\n    /// Submit an IO to be executed in the background. DEADLOCK RISK, read the full doc string.\n    ///\n    /// The IO is represented as an opaque future.\n    /// IO completion must be handled inside the future, e.g., through a oneshot channel.\n    ///\n    /// The API seems simple but there are multiple **pitfalls** involving\n    /// DEADLOCK RISK.\n    ///\n    /// First, there are no guarantees about the exexecution of the IO.\n    /// It may be `await`ed in-place before this function returns.\n    /// It may be polled partially by this task and handed off to another task to be finished.\n    /// It may be polled and then dropped before returning ready.\n    ///\n    /// This means that submitted IOs must not be interedependent.\n    /// Interdependence may be through shared limited resources, e.g.,\n    /// - VirtualFile file descriptor cache slot acquisition\n    /// - tokio-epoll-uring slot\n    ///\n    /// # Why current usage is safe from deadlocks\n    ///\n    /// Textbook condition for a deadlock is that _all_ of the following be given\n    /// - Mutual exclusion\n    /// - Hold and wait\n    /// - No preemption\n    /// - Circular wait\n    ///\n    /// The current usage is safe because:\n    /// - Mutual exclusion: IO futures definitely use mutexes, no way around that for now\n    /// - Hold and wait: IO futures currently hold two kinds of locks/resources while waiting\n    ///   for acquisition of other resources:\n    ///    - VirtualFile file descriptor cache slot tokio mutex\n    ///    - tokio-epoll-uring slot (uses tokio notify => wait queue, much like mutex)\n    /// - No preemption: there's no taking-away of acquired locks/resources => given\n    /// - Circular wait: this is the part of the condition that isn't met: all IO futures\n    ///   first acquire VirtualFile mutex, then tokio-epoll-uring slot.\n    ///   There is no IO future that acquires slot before VirtualFile.\n    ///   Hence there can be no circular waiting.\n    ///   Hence there cannot be a deadlock.\n    ///\n    /// This is a very fragile situation and must be revisited whenver any code called from\n    /// inside the IO futures is changed.\n    ///\n    /// We will move away from opaque IO futures towards well-defined IOs at some point in\n    /// the future when we have shipped this first version of concurrent IO to production\n    /// and are ready to retire the Sequential mode which runs the futures in place.\n    /// Right now, while brittle, the opaque IO approach allows us to ship the feature\n    /// with minimal changes to the code and minimal changes to existing behavior in Sequential mode.\n    ///\n    /// Also read the comment in `collect_pending_ios`.\n    pub(crate) async fn spawn_io<F>(&mut self, fut: F)\n    where\n        F: std::future::Future<Output = ()> + Send + 'static,\n    {\n        match self {\n            IoConcurrency::Sequential => fut.await,\n            IoConcurrency::SidecarTask { ios_tx, .. } => {\n                let fut = Box::pin(fut);\n                // NB: experiments showed that doing an opportunistic poll of `fut` here was bad for throughput\n                // while insignificant for latency.\n                // It would make sense to revisit the tokio-epoll-uring API in the future such that we can try\n                // a submission here, but never poll the future. That way, io_uring can make proccess while\n                // the future sits in the ios_tx queue.\n                match ios_tx.send(fut) {\n                    Ok(()) => {}\n                    Err(_) => {\n                        unreachable!(\"the io task must have exited, likely it panicked\")\n                    }\n                }\n            }\n        }\n    }\n\n    #[cfg(test)]\n    pub(crate) fn spawn_for_test() -> impl std::ops::DerefMut<Target = Self> {\n        use std::ops::{Deref, DerefMut};\n\n        use tracing::info;\n        use utils::sync::gate::Gate;\n\n        // Spawn needs a Gate, give it one.\n        struct Wrapper {\n            inner: IoConcurrency,\n            #[allow(dead_code)]\n            gate: Box<Gate>,\n        }\n        impl Deref for Wrapper {\n            type Target = IoConcurrency;\n\n            fn deref(&self) -> &Self::Target {\n                &self.inner\n            }\n        }\n        impl DerefMut for Wrapper {\n            fn deref_mut(&mut self) -> &mut Self::Target {\n                &mut self.inner\n            }\n        }\n        let gate = Box::new(Gate::default());\n\n        // The default behavior when running Rust unit tests without any further\n        // flags is to use the new behavior.\n        // The CI uses the following environment variable to unit test both old\n        // and new behavior.\n        // NB: the Python regression & perf tests take the `else` branch\n        // below and have their own defaults management.\n        let selected = {\n            // The pageserver_api::config type is unsuitable because it's internally tagged.\n            #[derive(serde::Deserialize)]\n            #[serde(rename_all = \"kebab-case\")]\n            enum TestOverride {\n                Sequential,\n                SidecarTask,\n            }\n            use once_cell::sync::Lazy;\n            static TEST_OVERRIDE: Lazy<TestOverride> = Lazy::new(|| {\n                utils::env::var_serde_json_string(\n                    \"NEON_PAGESERVER_UNIT_TEST_GET_VECTORED_CONCURRENT_IO\",\n                )\n                .unwrap_or(TestOverride::SidecarTask)\n            });\n\n            match *TEST_OVERRIDE {\n                TestOverride::Sequential => SelectedIoConcurrency::Sequential,\n                TestOverride::SidecarTask => {\n                    SelectedIoConcurrency::SidecarTask(gate.enter().expect(\"just created it\"))\n                }\n            }\n        };\n\n        info!(?selected, \"get_vectored_concurrent_io test\");\n\n        Wrapper {\n            inner: Self::spawn(selected),\n            gate,\n        }\n    }\n}\n\nimpl Clone for IoConcurrency {\n    fn clone(&self) -> Self {\n        match self {\n            IoConcurrency::Sequential => IoConcurrency::Sequential,\n            IoConcurrency::SidecarTask { task_id, ios_tx } => IoConcurrency::SidecarTask {\n                task_id: *task_id,\n                ios_tx: ios_tx.clone(),\n            },\n        }\n    }\n}\n\n/// Make noise in case the [`ValuesReconstructState`] gets dropped while\n/// there are still IOs in flight.\n/// Refer to `collect_pending_ios` for why we prefer not to do that.\n//\n/// We log from here instead of from the sidecar task because the [`ValuesReconstructState`]\n/// gets dropped in a tracing span with more context.\n/// We repeat the sidecar tasks's `task_id` so we can correlate what we emit here with\n/// the logs / panic handler logs from the sidecar task, which also logs the `task_id`.\nimpl Drop for ValuesReconstructState {\n    fn drop(&mut self) {\n        let num_active_ios = self\n            .num_active_ios\n            .load(std::sync::atomic::Ordering::Acquire);\n        if num_active_ios == 0 {\n            return;\n        }\n        let sidecar_task_id = match &self.io_concurrency {\n            IoConcurrency::Sequential => None,\n            IoConcurrency::SidecarTask { task_id, .. } => Some(*task_id),\n        };\n        tracing::warn!(\n            num_active_ios,\n            ?sidecar_task_id,\n            backtrace=%std::backtrace::Backtrace::force_capture(),\n            \"dropping ValuesReconstructState while some IOs have not been completed\",\n        );\n    }\n}\n\nimpl ValuesReconstructState {\n    pub fn new(io_concurrency: IoConcurrency) -> Self {\n        Self {\n            keys: HashMap::new(),\n            keys_done: KeySpaceRandomAccum::new(),\n            keys_with_image_coverage: None,\n            layers_visited: 0,\n            delta_layers_visited: 0,\n            io_concurrency,\n            enable_debug: false,\n            debug_state: ValueReconstructState::default(),\n            num_active_ios: Arc::new(AtomicUsize::new(0)),\n            read_path: None,\n        }\n    }\n\n    pub(crate) fn new_with_debug(io_concurrency: IoConcurrency) -> Self {\n        Self {\n            keys: HashMap::new(),\n            keys_done: KeySpaceRandomAccum::new(),\n            keys_with_image_coverage: None,\n            layers_visited: 0,\n            delta_layers_visited: 0,\n            io_concurrency,\n            enable_debug: true,\n            debug_state: ValueReconstructState::default(),\n            num_active_ios: Arc::new(AtomicUsize::new(0)),\n            read_path: None,\n        }\n    }\n\n    /// Absolutely read [`IoConcurrency::spawn_io`] to learn about assumptions & pitfalls.\n    pub(crate) async fn spawn_io<F>(&mut self, fut: F)\n    where\n        F: std::future::Future<Output = ()> + Send + 'static,\n    {\n        self.io_concurrency.spawn_io(fut).await;\n    }\n\n    pub(crate) fn set_debug_state(&mut self, debug_state: &ValueReconstructState) {\n        if self.enable_debug {\n            self.debug_state = debug_state.clone();\n        }\n    }\n\n    pub(crate) fn on_layer_visited(&mut self, layer: &ReadableLayer) {\n        self.layers_visited += 1;\n        if let ReadableLayer::PersistentLayer(layer) = layer {\n            if layer.layer_desc().is_delta() {\n                self.delta_layers_visited += 1;\n            }\n        }\n    }\n\n    pub(crate) fn get_delta_layers_visited(&self) -> u32 {\n        self.delta_layers_visited\n    }\n\n    pub(crate) fn get_layers_visited(&self) -> u32 {\n        self.layers_visited\n    }\n\n    /// On hitting image layer, we can mark all keys in this range as done, because\n    /// if the image layer does not contain a key, it is deleted/never added.\n    pub(crate) fn on_image_layer_visited(&mut self, key_range: &Range<Key>) {\n        let prev_val = self.keys_with_image_coverage.replace(key_range.clone());\n        assert_eq!(\n            prev_val, None,\n            \"should consume the keyspace before the next iteration\"\n        );\n    }\n\n    /// Update the state collected for a given key.\n    /// Returns true if this was the last value needed for the key and false otherwise.\n    ///\n    /// If the key is done after the update, mark it as such.\n    ///\n    /// If the key is in the sparse keyspace (i.e., aux files), we do not track them in\n    /// `key_done`.\n    // TODO: rename this method & update description.\n    pub(crate) fn update_key(&mut self, key: &Key, lsn: Lsn, completes: bool) -> OnDiskValueIo {\n        let state = self.keys.entry(*key).or_default();\n\n        let is_sparse_key = key.is_sparse();\n\n        let required_io = match state.situation {\n            ValueReconstructSituation::Complete => {\n                if is_sparse_key {\n                    // Sparse keyspace might be visited multiple times because\n                    // we don't track unmapped keyspaces.\n                    return OnDiskValueIo::Unnecessary;\n                } else {\n                    unreachable!()\n                }\n            }\n            ValueReconstructSituation::Continue => {\n                self.num_active_ios\n                    .fetch_add(1, std::sync::atomic::Ordering::Release);\n                let (tx, rx) = tokio::sync::oneshot::channel();\n                state.on_disk_values.push((lsn, OnDiskValueIoWaiter { rx }));\n                OnDiskValueIo::Required {\n                    tx,\n                    num_active_ios: Arc::clone(&self.num_active_ios),\n                }\n            }\n        };\n\n        if completes && state.situation == ValueReconstructSituation::Continue {\n            state.situation = ValueReconstructSituation::Complete;\n            if !is_sparse_key {\n                self.keys_done.add_key(*key);\n            }\n        }\n\n        required_io\n    }\n\n    /// Returns the key space describing the keys that have\n    /// been marked as completed since the last call to this function.\n    /// Returns individual keys done, and the image layer coverage.\n    pub(crate) fn consume_done_keys(&mut self) -> (KeySpace, Option<Range<Key>>) {\n        (\n            self.keys_done.consume_keyspace(),\n            self.keys_with_image_coverage.take(),\n        )\n    }\n}\n\n/// A key that uniquely identifies a layer in a timeline\n#[derive(Debug, PartialEq, Eq, Clone, Hash)]\npub(crate) enum LayerId {\n    PersitentLayerId(PersistentLayerKey),\n    InMemoryLayerId(InMemoryLayerFileId),\n}\n\n/// Uniquely identify a layer visit by the layer\n/// and LSN range of the reads. Note that the end of the range is exclusive.\n///\n/// The layer itself is not enough since we may have different LSN lower\n/// bounds for delta layer reads. Scenarios where this can happen are:\n///\n/// 1. Layer overlaps: imagine an image layer inside and in-memory layer\n///    and a query that only partially hits the image layer. Part of the query\n///    needs to read the whole in-memory layer and the other part needs to read\n///    only up to the image layer. Hence, they'll have different LSN floor values\n///    for the read.\n///\n/// 2. Scattered reads: the read path supports starting at different LSNs. Imagine\n///    The start LSN for one range is inside a layer and the start LSN for another range\n///    Is above the layer (includes all of it). Both ranges need to read the layer all the\n///    Way to the end but starting at different points. Hence, they'll have different LSN\n///    Ceil values.\n///\n/// The implication is that we might visit the same layer multiple times\n/// in order to read different LSN ranges from it. In practice, this isn't very concerning\n/// because:\n/// 1. Layer overlaps are rare and generally not intended\n/// 2. Scattered reads will stabilise after the first few layers provided their starting LSNs\n///    are grouped tightly enough (likely the case).\n#[derive(Debug, PartialEq, Eq, Clone, Hash)]\nstruct LayerToVisitId {\n    layer_id: LayerId,\n    lsn_floor: Lsn,\n    lsn_ceil: Lsn,\n}\n\n#[derive(Debug, PartialEq, Eq, Hash)]\npub enum ReadableLayerWeak {\n    PersistentLayer(Arc<PersistentLayerDesc>),\n    InMemoryLayer(InMemoryLayerDesc),\n}\n\n/// Layer wrapper for the read path. Note that it is valid\n/// to use these layers even after external operations have\n/// been performed on them (compaction, freeze, etc.).\n#[derive(Debug)]\npub(crate) enum ReadableLayer {\n    PersistentLayer(Layer),\n    InMemoryLayer(Arc<InMemoryLayer>),\n}\n\n/// A partial description of a read to be done.\n#[derive(Debug, Clone)]\nstruct LayerVisit {\n    /// An id used to resolve the readable layer within the fringe\n    layer_to_visit_id: LayerToVisitId,\n    /// Lsn range for the read, used for selecting the next read\n    lsn_range: Range<Lsn>,\n}\n\n/// Data structure which maintains a fringe of layers for the\n/// read path. The fringe is the set of layers which intersects\n/// the current keyspace that the search is descending on.\n/// Each layer tracks the keyspace that intersects it.\n///\n/// The fringe must appear sorted by Lsn. Hence, it uses\n/// a two layer indexing scheme.\n#[derive(Debug)]\npub(crate) struct LayerFringe {\n    planned_visits_by_lsn: BinaryHeap<LayerVisit>,\n    visit_reads: HashMap<LayerToVisitId, LayerVisitReads>,\n}\n\n#[derive(Debug)]\nstruct LayerVisitReads {\n    layer: ReadableLayer,\n    target_keyspace: KeySpaceRandomAccum,\n}\n\nimpl LayerFringe {\n    pub(crate) fn new() -> Self {\n        LayerFringe {\n            planned_visits_by_lsn: BinaryHeap::new(),\n            visit_reads: HashMap::new(),\n        }\n    }\n\n    pub(crate) fn next_layer(&mut self) -> Option<(ReadableLayer, KeySpace, Range<Lsn>)> {\n        let read_desc = self.planned_visits_by_lsn.pop()?;\n\n        let removed = self.visit_reads.remove_entry(&read_desc.layer_to_visit_id);\n\n        match removed {\n            Some((\n                _,\n                LayerVisitReads {\n                    layer,\n                    mut target_keyspace,\n                },\n            )) => Some((\n                layer,\n                target_keyspace.consume_keyspace(),\n                read_desc.lsn_range,\n            )),\n            None => unreachable!(\"fringe internals are always consistent\"),\n        }\n    }\n\n    pub(crate) fn update(\n        &mut self,\n        layer: ReadableLayer,\n        keyspace: KeySpace,\n        lsn_range: Range<Lsn>,\n    ) {\n        let layer_to_visit_id = LayerToVisitId {\n            layer_id: layer.id(),\n            lsn_floor: lsn_range.start,\n            lsn_ceil: lsn_range.end,\n        };\n\n        let entry = self.visit_reads.entry(layer_to_visit_id.clone());\n        match entry {\n            Entry::Occupied(mut entry) => {\n                entry.get_mut().target_keyspace.add_keyspace(keyspace);\n            }\n            Entry::Vacant(entry) => {\n                self.planned_visits_by_lsn.push(LayerVisit {\n                    lsn_range,\n                    layer_to_visit_id: layer_to_visit_id.clone(),\n                });\n                let mut accum = KeySpaceRandomAccum::new();\n                accum.add_keyspace(keyspace);\n                entry.insert(LayerVisitReads {\n                    layer,\n                    target_keyspace: accum,\n                });\n            }\n        }\n    }\n}\n\nimpl Default for LayerFringe {\n    fn default() -> Self {\n        Self::new()\n    }\n}\n\nimpl Ord for LayerVisit {\n    fn cmp(&self, other: &Self) -> Ordering {\n        let ord = self.lsn_range.end.cmp(&other.lsn_range.end);\n        if ord == std::cmp::Ordering::Equal {\n            self.lsn_range.start.cmp(&other.lsn_range.start).reverse()\n        } else {\n            ord\n        }\n    }\n}\n\nimpl PartialOrd for LayerVisit {\n    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {\n        Some(self.cmp(other))\n    }\n}\n\nimpl PartialEq for LayerVisit {\n    fn eq(&self, other: &Self) -> bool {\n        self.lsn_range == other.lsn_range\n    }\n}\n\nimpl Eq for LayerVisit {}\n\nimpl ReadableLayer {\n    pub(crate) fn id(&self) -> LayerId {\n        match self {\n            Self::PersistentLayer(layer) => LayerId::PersitentLayerId(layer.layer_desc().key()),\n            Self::InMemoryLayer(layer) => LayerId::InMemoryLayerId(layer.file_id()),\n        }\n    }\n\n    pub(crate) async fn get_values_reconstruct_data(\n        &self,\n        keyspace: KeySpace,\n        lsn_range: Range<Lsn>,\n        reconstruct_state: &mut ValuesReconstructState,\n        ctx: &RequestContext,\n    ) -> Result<(), GetVectoredError> {\n        match self {\n            ReadableLayer::PersistentLayer(layer) => {\n                let ctx = RequestContextBuilder::from(ctx)\n                    .perf_span(|crnt_perf_span| {\n                        info_span!(\n                            target: PERF_TRACE_TARGET,\n                            parent: crnt_perf_span,\n                            \"PLAN_LAYER\",\n                            layer = %layer\n                        )\n                    })\n                    .attached_child();\n\n                layer\n                    .get_values_reconstruct_data(keyspace, lsn_range, reconstruct_state, &ctx)\n                    .maybe_perf_instrument(&ctx, |crnt_perf_span| crnt_perf_span.clone())\n                    .await\n            }\n            ReadableLayer::InMemoryLayer(layer) => {\n                let ctx = RequestContextBuilder::from(ctx)\n                    .perf_span(|crnt_perf_span| {\n                        info_span!(\n                            target: PERF_TRACE_TARGET,\n                            parent: crnt_perf_span,\n                            \"PLAN_LAYER\",\n                            layer = %layer\n                        )\n                    })\n                    .attached_child();\n\n                layer\n                    .get_values_reconstruct_data(keyspace, lsn_range, reconstruct_state, &ctx)\n                    .maybe_perf_instrument(&ctx, |crnt_perf_span| crnt_perf_span.clone())\n                    .await\n            }\n        }\n    }\n}\n\n/// Layers contain a hint indicating whether they are likely to be used for reads.\n///\n/// This is a hint rather than an authoritative value, so that we do not have to update it synchronously\n/// when changing the visibility of layers (for example when creating a branch that makes some previously\n/// covered layers visible).  It should be used for cache management but not for correctness-critical checks.\n#[derive(Debug, Clone, PartialEq, Eq)]\npub enum LayerVisibilityHint {\n    /// A Visible layer might be read while serving a read, because there is not an image layer between it\n    /// and a readable LSN (the tip of the branch or a child's branch point)\n    Visible,\n    /// A Covered layer probably won't be read right now, but _can_ be read in future if someone creates\n    /// a branch or ephemeral endpoint at an LSN below the layer that covers this.\n    Covered,\n}\n\npub(crate) struct LayerAccessStats(std::sync::atomic::AtomicU64);\n\n#[derive(Clone, Copy, strum_macros::EnumString)]\npub(crate) enum LayerAccessStatsReset {\n    NoReset,\n    AllStats,\n}\n\nimpl Default for LayerAccessStats {\n    fn default() -> Self {\n        // Default value is to assume resident since creation time, and visible.\n        let (_mask, mut value) = Self::to_low_res_timestamp(Self::RTIME_SHIFT, SystemTime::now());\n        value |= 0x1 << Self::VISIBILITY_SHIFT;\n\n        Self(std::sync::atomic::AtomicU64::new(value))\n    }\n}\n\n// Efficient store of two very-low-resolution timestamps and some bits.  Used for storing last access time and\n// last residence change time.\nimpl LayerAccessStats {\n    // How many high bits to drop from a u32 timestamp?\n    // - Only storing up to a u32 timestamp will work fine until 2038 (if this code is still in use\n    //   after that, this software has been very successful!)\n    // - Dropping the top bit is implicitly safe because unix timestamps are meant to be\n    // stored in an i32, so they never used it.\n    // - Dropping the next two bits is safe because this code is only running on systems in\n    // years >= 2024, and these bits have been 1 since 2021\n    //\n    // Therefore we may store only 28 bits for a timestamp with one second resolution.  We do\n    // this truncation to make space for some flags in the high bits of our u64.\n    const TS_DROP_HIGH_BITS: u32 = u32::count_ones(Self::TS_ONES) + 1;\n    const TS_MASK: u32 = 0x1f_ff_ff_ff;\n    const TS_ONES: u32 = 0x60_00_00_00;\n\n    const ATIME_SHIFT: u32 = 0;\n    const RTIME_SHIFT: u32 = 32 - Self::TS_DROP_HIGH_BITS;\n    const VISIBILITY_SHIFT: u32 = 64 - 2 * Self::TS_DROP_HIGH_BITS;\n\n    fn write_bits(&self, mask: u64, value: u64) -> u64 {\n        self.0\n            .fetch_update(\n                // TODO: decide what orderings are correct\n                std::sync::atomic::Ordering::Relaxed,\n                std::sync::atomic::Ordering::Relaxed,\n                |v| Some((v & !mask) | (value & mask)),\n            )\n            .expect(\"Inner function is infallible\")\n    }\n\n    fn to_low_res_timestamp(shift: u32, time: SystemTime) -> (u64, u64) {\n        // Drop the low three bits of the timestamp, for an ~8s accuracy\n        let timestamp = time.duration_since(UNIX_EPOCH).unwrap().as_secs() & (Self::TS_MASK as u64);\n\n        ((Self::TS_MASK as u64) << shift, timestamp << shift)\n    }\n\n    fn read_low_res_timestamp(&self, shift: u32) -> Option<SystemTime> {\n        let read = self.0.load(std::sync::atomic::Ordering::Relaxed);\n\n        let ts_bits = (read & ((Self::TS_MASK as u64) << shift)) >> shift;\n        if ts_bits == 0 {\n            None\n        } else {\n            Some(UNIX_EPOCH + Duration::from_secs(ts_bits | (Self::TS_ONES as u64)))\n        }\n    }\n\n    /// Record a change in layer residency.\n    ///\n    /// Recording the event must happen while holding the layer map lock to\n    /// ensure that latest-activity-threshold-based layer eviction (eviction_task.rs)\n    /// can do an \"imitate access\" to this layer, before it observes `now-latest_activity() > threshold`.\n    ///\n    /// If we instead recorded the residence event with a timestamp from before grabbing the layer map lock,\n    /// the following race could happen:\n    ///\n    /// - Compact: Write out an L1 layer from several L0 layers. This records residence event LayerCreate with the current timestamp.\n    /// - Eviction: imitate access logical size calculation. This accesses the L0 layers because the L1 layer is not yet in the layer map.\n    /// - Compact: Grab layer map lock, add the new L1 to layer map and remove the L0s, release layer map lock.\n    /// - Eviction: observes the new L1 layer whose only activity timestamp is the LayerCreate event.\n    pub(crate) fn record_residence_event_at(&self, now: SystemTime) {\n        let (mask, value) = Self::to_low_res_timestamp(Self::RTIME_SHIFT, now);\n        self.write_bits(mask, value);\n    }\n\n    pub(crate) fn record_residence_event(&self) {\n        self.record_residence_event_at(SystemTime::now())\n    }\n\n    fn record_access_at(&self, now: SystemTime) -> bool {\n        let (mut mask, mut value) = Self::to_low_res_timestamp(Self::ATIME_SHIFT, now);\n\n        // A layer which is accessed must be visible.\n        mask |= 0x1 << Self::VISIBILITY_SHIFT;\n        value |= 0x1 << Self::VISIBILITY_SHIFT;\n\n        let old_bits = self.write_bits(mask, value);\n        !matches!(\n            self.decode_visibility(old_bits),\n            LayerVisibilityHint::Visible\n        )\n    }\n\n    /// Returns true if we modified the layer's visibility to set it to Visible implicitly\n    /// as a result of this access\n    pub(crate) fn record_access(&self, ctx: &RequestContext) -> bool {\n        if ctx.access_stats_behavior() == AccessStatsBehavior::Skip {\n            return false;\n        }\n\n        self.record_access_at(SystemTime::now())\n    }\n\n    fn as_api_model(\n        &self,\n        reset: LayerAccessStatsReset,\n    ) -> pageserver_api::models::LayerAccessStats {\n        let ret = pageserver_api::models::LayerAccessStats {\n            access_time: self\n                .read_low_res_timestamp(Self::ATIME_SHIFT)\n                .unwrap_or(UNIX_EPOCH),\n            residence_time: self\n                .read_low_res_timestamp(Self::RTIME_SHIFT)\n                .unwrap_or(UNIX_EPOCH),\n            visible: matches!(self.visibility(), LayerVisibilityHint::Visible),\n        };\n        match reset {\n            LayerAccessStatsReset::NoReset => {}\n            LayerAccessStatsReset::AllStats => {\n                self.write_bits((Self::TS_MASK as u64) << Self::ATIME_SHIFT, 0x0);\n                self.write_bits((Self::TS_MASK as u64) << Self::RTIME_SHIFT, 0x0);\n            }\n        }\n        ret\n    }\n\n    /// Get the latest access timestamp, falling back to latest residence event.  The latest residence event\n    /// will be this Layer's construction time, if its residence hasn't changed since then.\n    pub(crate) fn latest_activity(&self) -> SystemTime {\n        if let Some(t) = self.read_low_res_timestamp(Self::ATIME_SHIFT) {\n            t\n        } else {\n            self.read_low_res_timestamp(Self::RTIME_SHIFT)\n                .expect(\"Residence time is set on construction\")\n        }\n    }\n\n    /// Whether this layer has been accessed (excluding in [`AccessStatsBehavior::Skip`]).\n    ///\n    /// This indicates whether the layer has been used for some purpose that would motivate\n    /// us to keep it on disk, such as for serving a getpage request.\n    fn accessed(&self) -> bool {\n        // Consider it accessed if the most recent access is more recent than\n        // the most recent change in residence status.\n        match (\n            self.read_low_res_timestamp(Self::ATIME_SHIFT),\n            self.read_low_res_timestamp(Self::RTIME_SHIFT),\n        ) {\n            (None, _) => false,\n            (Some(_), None) => true,\n            (Some(a), Some(r)) => a >= r,\n        }\n    }\n\n    /// Helper for extracting the visibility hint from the literal value of our inner u64\n    fn decode_visibility(&self, bits: u64) -> LayerVisibilityHint {\n        match (bits >> Self::VISIBILITY_SHIFT) & 0x1 {\n            1 => LayerVisibilityHint::Visible,\n            0 => LayerVisibilityHint::Covered,\n            _ => unreachable!(),\n        }\n    }\n\n    /// Returns the old value which has been replaced\n    pub(crate) fn set_visibility(&self, visibility: LayerVisibilityHint) -> LayerVisibilityHint {\n        let value = match visibility {\n            LayerVisibilityHint::Visible => 0x1 << Self::VISIBILITY_SHIFT,\n            LayerVisibilityHint::Covered => 0x0,\n        };\n\n        let old_bits = self.write_bits(0x1 << Self::VISIBILITY_SHIFT, value);\n        self.decode_visibility(old_bits)\n    }\n\n    pub(crate) fn visibility(&self) -> LayerVisibilityHint {\n        let read = self.0.load(std::sync::atomic::Ordering::Relaxed);\n        self.decode_visibility(read)\n    }\n}\n\n/// Get a layer descriptor from a layer.\npub(crate) trait AsLayerDesc {\n    /// Get the layer descriptor.\n    fn layer_desc(&self) -> &PersistentLayerDesc;\n}\n\npub mod tests {\n    use pageserver_api::shard::TenantShardId;\n    use utils::id::TimelineId;\n\n    use super::*;\n\n    impl From<DeltaLayerName> for PersistentLayerDesc {\n        fn from(value: DeltaLayerName) -> Self {\n            PersistentLayerDesc::new_delta(\n                TenantShardId::from([0; 18]),\n                TimelineId::from_array([0; 16]),\n                value.key_range,\n                value.lsn_range,\n                233,\n            )\n        }\n    }\n\n    impl From<ImageLayerName> for PersistentLayerDesc {\n        fn from(value: ImageLayerName) -> Self {\n            PersistentLayerDesc::new_img(\n                TenantShardId::from([0; 18]),\n                TimelineId::from_array([0; 16]),\n                value.key_range,\n                value.lsn,\n                233,\n            )\n        }\n    }\n\n    impl From<LayerName> for PersistentLayerDesc {\n        fn from(value: LayerName) -> Self {\n            match value {\n                LayerName::Delta(d) => Self::from(d),\n                LayerName::Image(i) => Self::from(i),\n            }\n        }\n    }\n}\n\n/// Range wrapping newtype, which uses display to render Debug.\n///\n/// Useful with `Key`, which has too verbose `{:?}` for printing multiple layers.\nstruct RangeDisplayDebug<'a, T: std::fmt::Display>(&'a Range<T>);\n\nimpl<T: std::fmt::Display> std::fmt::Debug for RangeDisplayDebug<'_, T> {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        write!(f, \"{}..{}\", self.0.start, self.0.end)\n    }\n}\n\n#[cfg(test)]\nmod tests2 {\n    use pageserver_api::key::DBDIR_KEY;\n    use tracing::info;\n\n    use super::*;\n    use crate::tenant::storage_layer::IoConcurrency;\n\n    /// TODO: currently this test relies on manual visual inspection of the --no-capture output.\n    /// Should look like so:\n    /// ```text\n    /// RUST_LOG=trace cargo nextest run  --features testing  --no-capture test_io_concurrency_noise\n    /// running 1 test\n    /// 2025-01-21T17:42:01.335679Z  INFO get_vectored_concurrent_io test selected=SidecarTask\n    /// 2025-01-21T17:42:01.335680Z TRACE spawning sidecar task task_id=0\n    /// 2025-01-21T17:42:01.335937Z TRACE IoConcurrency_sidecar{task_id=0}: start\n    /// 2025-01-21T17:42:01.335972Z TRACE IoConcurrency_sidecar{task_id=0}: received new io future\n    /// 2025-01-21T17:42:01.335999Z  INFO IoConcurrency_sidecar{task_id=0}: waiting for signal to complete IO\n    /// 2025-01-21T17:42:01.336229Z  WARN dropping ValuesReconstructState while some IOs have not been completed num_active_ios=1 sidecar_task_id=Some(0) backtrace=   0: <pageserver::tenant::storage_layer::ValuesReconstructState as core::ops::drop::Drop>::drop\n    ///              at ./src/tenant/storage_layer.rs:553:24\n    ///    1: core::ptr::drop_in_place<pageserver::tenant::storage_layer::ValuesReconstructState>\n    ///              at /home/christian/.rustup/toolchains/1.84.0-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/core/src/ptr/mod.rs:521:1\n    ///    2: core::mem::drop\n    ///              at /home/christian/.rustup/toolchains/1.84.0-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/core/src/mem/mod.rs:942:24\n    ///    3: pageserver::tenant::storage_layer::tests2::test_io_concurrency_noise::{{closure}}\n    ///              at ./src/tenant/storage_layer.rs:1159:9\n    ///   ...\n    ///   49: <unknown>\n    /// 2025-01-21T17:42:01.452293Z  INFO IoConcurrency_sidecar{task_id=0}: completing IO\n    /// 2025-01-21T17:42:01.452357Z TRACE IoConcurrency_sidecar{task_id=0}: io future completed\n    /// 2025-01-21T17:42:01.452473Z TRACE IoConcurrency_sidecar{task_id=0}: end\n    /// test tenant::storage_layer::tests2::test_io_concurrency_noise ... ok\n    ///\n    /// ```\n    #[tokio::test]\n    async fn test_io_concurrency_noise() {\n        crate::tenant::harness::setup_logging();\n\n        let io_concurrency = IoConcurrency::spawn_for_test();\n        match *io_concurrency {\n            IoConcurrency::Sequential => {\n                // This test asserts behavior in sidecar mode, doesn't make sense in sequential mode.\n                return;\n            }\n            IoConcurrency::SidecarTask { .. } => {}\n        }\n        let mut reconstruct_state = ValuesReconstructState::new(io_concurrency.clone());\n\n        let (io_fut_is_waiting_tx, io_fut_is_waiting) = tokio::sync::oneshot::channel();\n        let (do_complete_io, should_complete_io) = tokio::sync::oneshot::channel();\n        let (io_fut_exiting_tx, io_fut_exiting) = tokio::sync::oneshot::channel();\n\n        let io = reconstruct_state.update_key(&DBDIR_KEY, Lsn(8), true);\n        reconstruct_state\n            .spawn_io(async move {\n                info!(\"waiting for signal to complete IO\");\n                io_fut_is_waiting_tx.send(()).unwrap();\n                should_complete_io.await.unwrap();\n                info!(\"completing IO\");\n                io.complete(Ok(OnDiskValue::RawImage(Bytes::new())));\n                io_fut_exiting_tx.send(()).unwrap();\n            })\n            .await;\n\n        io_fut_is_waiting.await.unwrap();\n\n        // this is what makes the noise\n        drop(reconstruct_state);\n\n        do_complete_io.send(()).unwrap();\n\n        io_fut_exiting.await.unwrap();\n    }\n}\n"
  },
  {
    "path": "pageserver/src/tenant/tasks.rs",
    "content": "//! This module contains per-tenant background processes, e.g. compaction and GC.\n\nuse std::cmp::max;\nuse std::future::Future;\nuse std::ops::{ControlFlow, RangeInclusive};\nuse std::pin::pin;\nuse std::sync::Arc;\nuse std::time::{Duration, Instant};\n\nuse once_cell::sync::Lazy;\nuse pageserver_api::config::tenant_conf_defaults::DEFAULT_COMPACTION_PERIOD;\nuse rand::Rng;\nuse scopeguard::defer;\nuse tokio::sync::{Semaphore, SemaphorePermit};\nuse tokio_util::sync::CancellationToken;\nuse tracing::*;\nuse utils::backoff::exponential_backoff_duration;\nuse utils::completion::Barrier;\nuse utils::pausable_failpoint;\n\nuse crate::context::{DownloadBehavior, RequestContext};\nuse crate::metrics::{self, BackgroundLoopSemaphoreMetricsRecorder, TENANT_TASK_EVENTS};\nuse crate::task_mgr::{self, BACKGROUND_RUNTIME, TOKIO_WORKER_THREADS, TaskKind};\nuse crate::tenant::throttle::Stats;\nuse crate::tenant::timeline::CompactionError;\nuse crate::tenant::timeline::compaction::CompactionOutcome;\nuse crate::tenant::{TenantShard, TenantState};\n\n/// Semaphore limiting concurrent background tasks (across all tenants).\n///\n/// We use 3/4 Tokio threads, to avoid blocking all threads in case we do any CPU-heavy work.\nstatic CONCURRENT_BACKGROUND_TASKS: Lazy<Semaphore> = Lazy::new(|| {\n    let total_threads = TOKIO_WORKER_THREADS.get();\n\n    /*BEGIN_HADRON*/\n    // ideally we should run at least one compaction task per tenant in order to (1) maximize\n    // compaction throughput (2) avoid head-of-line blocking of large compactions. However doing\n    // that may create too many compaction tasks with lots of memory overheads. So we limit the\n    // number of compaction tasks based on the available CPU core count.\n    // Need to revisit.\n    // let tasks_per_thread = std::env::var(\"BG_TASKS_PER_THREAD\")\n    //     .ok()\n    //     .and_then(|s| s.parse().ok())\n    //     .unwrap_or(4);\n    // let permits = usize::max(1, total_threads * tasks_per_thread);\n    // // assert!(permits < total_threads, \"need threads for other work\");\n    /*END_HADRON*/\n\n    let permits = max(1, (total_threads * 3).checked_div(4).unwrap_or(0));\n    assert_ne!(permits, 0, \"we will not be adding in permits later\");\n    assert!(permits < total_threads, \"need threads for other work\");\n    Semaphore::new(permits)\n});\n\n/// Semaphore limiting concurrent L0 compaction tasks (across all tenants). This is only used if\n/// both `compaction_l0_semaphore` and `compaction_l0_first` are enabled.\n///\n/// This is a separate semaphore from background tasks, because L0 compaction needs to be responsive\n/// to avoid high read amp during heavy write workloads. Regular image/GC compaction is less\n/// important (e.g. due to page images in delta layers) and can wait for other background tasks.\n///\n/// We use 3/4 Tokio threads, to avoid blocking all threads in case we do any CPU-heavy work. Note\n/// that this runs on the same Tokio runtime as `CONCURRENT_BACKGROUND_TASKS`, and shares the same\n/// thread pool.\nstatic CONCURRENT_L0_COMPACTION_TASKS: Lazy<Semaphore> = Lazy::new(|| {\n    let total_threads = TOKIO_WORKER_THREADS.get();\n    let permits = max(1, (total_threads * 3).checked_div(4).unwrap_or(0));\n    assert_ne!(permits, 0, \"we will not be adding in permits later\");\n    assert!(permits < total_threads, \"need threads for other work\");\n    Semaphore::new(permits)\n});\n\n/// Background jobs.\n///\n/// NB: not all of these acquire a CONCURRENT_BACKGROUND_TASKS semaphore permit, only the ones that\n/// do any significant IO or CPU work.\n#[derive(\n    Debug,\n    PartialEq,\n    Eq,\n    Clone,\n    Copy,\n    strum_macros::IntoStaticStr,\n    strum_macros::Display,\n    enum_map::Enum,\n)]\n#[strum(serialize_all = \"snake_case\")]\npub(crate) enum BackgroundLoopKind {\n    /// L0Compaction runs as a separate pass within the Compaction loop, not a separate loop. It is\n    /// used to request the `CONCURRENT_L0_COMPACTION_TASKS` semaphore and associated metrics.\n    L0Compaction,\n    Compaction,\n    Gc,\n    Eviction,\n    TenantHouseKeeping,\n    ConsumptionMetricsCollectMetrics,\n    ConsumptionMetricsSyntheticSizeWorker,\n    InitialLogicalSizeCalculation,\n    HeatmapUpload,\n    SecondaryDownload,\n}\n\npub struct BackgroundLoopSemaphorePermit<'a> {\n    _permit: SemaphorePermit<'static>,\n    _recorder: BackgroundLoopSemaphoreMetricsRecorder<'a>,\n}\n\n/// Acquires a semaphore permit, to limit concurrent background jobs.\npub(crate) async fn acquire_concurrency_permit(\n    loop_kind: BackgroundLoopKind,\n    _ctx: &RequestContext,\n) -> BackgroundLoopSemaphorePermit<'static> {\n    let mut recorder = metrics::BACKGROUND_LOOP_SEMAPHORE.record(loop_kind);\n\n    if loop_kind == BackgroundLoopKind::InitialLogicalSizeCalculation {\n        pausable_failpoint!(\"initial-size-calculation-permit-pause\");\n    }\n\n    // TODO: assert that we run on BACKGROUND_RUNTIME; requires tokio_unstable Handle::id();\n    let semaphore = match loop_kind {\n        BackgroundLoopKind::L0Compaction => &CONCURRENT_L0_COMPACTION_TASKS,\n        _ => &CONCURRENT_BACKGROUND_TASKS,\n    };\n    let permit = semaphore.acquire().await.expect(\"should never close\");\n\n    recorder.acquired();\n\n    BackgroundLoopSemaphorePermit {\n        _permit: permit,\n        _recorder: recorder,\n    }\n}\n\n/// Start per tenant background loops: compaction, GC, and ingest housekeeping.\npub fn start_background_loops(tenant: &Arc<TenantShard>, can_start: Option<&Barrier>) {\n    let tenant_shard_id = tenant.tenant_shard_id;\n\n    task_mgr::spawn(\n        BACKGROUND_RUNTIME.handle(),\n        TaskKind::Compaction,\n        tenant_shard_id,\n        None,\n        &format!(\"compactor for tenant {tenant_shard_id}\"),\n        {\n            let tenant = Arc::clone(tenant);\n            let can_start = can_start.cloned();\n            async move {\n                let cancel = task_mgr::shutdown_token(); // NB: must be in async context\n                tokio::select! {\n                    _ = cancel.cancelled() => return Ok(()),\n                    _ = Barrier::maybe_wait(can_start) => {}\n                };\n                TENANT_TASK_EVENTS.with_label_values(&[\"start\"]).inc();\n                defer!(TENANT_TASK_EVENTS.with_label_values(&[\"stop\"]).inc());\n                compaction_loop(tenant, cancel)\n                    // If you rename this span, change the RUST_LOG env variable in test_runner/performance/test_branch_creation.py\n                    .instrument(info_span!(\"compaction_loop\", tenant_id = %tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug()))\n                    .await;\n                Ok(())\n            }\n        },\n    );\n\n    task_mgr::spawn(\n        BACKGROUND_RUNTIME.handle(),\n        TaskKind::GarbageCollector,\n        tenant_shard_id,\n        None,\n        &format!(\"garbage collector for tenant {tenant_shard_id}\"),\n        {\n            let tenant = Arc::clone(tenant);\n            let can_start = can_start.cloned();\n            async move {\n                let cancel = task_mgr::shutdown_token(); // NB: must be in async context\n                tokio::select! {\n                    _ = cancel.cancelled() => return Ok(()),\n                    _ = Barrier::maybe_wait(can_start) => {}\n                };\n                TENANT_TASK_EVENTS.with_label_values(&[\"start\"]).inc();\n                defer!(TENANT_TASK_EVENTS.with_label_values(&[\"stop\"]).inc());\n                gc_loop(tenant, cancel)\n                    .instrument(info_span!(\"gc_loop\", tenant_id = %tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug()))\n                    .await;\n                Ok(())\n            }\n        },\n    );\n\n    task_mgr::spawn(\n        BACKGROUND_RUNTIME.handle(),\n        TaskKind::TenantHousekeeping,\n        tenant_shard_id,\n        None,\n        &format!(\"housekeeping for tenant {tenant_shard_id}\"),\n        {\n            let tenant = Arc::clone(tenant);\n            let can_start = can_start.cloned();\n            async move {\n                let cancel = task_mgr::shutdown_token(); // NB: must be in async context\n                tokio::select! {\n                    _ = cancel.cancelled() => return Ok(()),\n                    _ = Barrier::maybe_wait(can_start) => {}\n                };\n                TENANT_TASK_EVENTS.with_label_values(&[\"start\"]).inc();\n                defer!(TENANT_TASK_EVENTS.with_label_values(&[\"stop\"]).inc());\n                tenant_housekeeping_loop(tenant, cancel)\n                    .instrument(info_span!(\"tenant_housekeeping_loop\", tenant_id = %tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug()))\n                    .await;\n                Ok(())\n            }\n        },\n    );\n}\n\n/// Compaction task's main loop.\nasync fn compaction_loop(tenant: Arc<TenantShard>, cancel: CancellationToken) {\n    const BASE_BACKOFF_SECS: f64 = 1.0;\n    const MAX_BACKOFF_SECS: f64 = 300.0;\n    const RECHECK_CONFIG_INTERVAL: Duration = Duration::from_secs(10);\n\n    let ctx = RequestContext::todo_child(TaskKind::Compaction, DownloadBehavior::Download);\n    let mut period = tenant.get_compaction_period();\n    let mut error_run = 0; // consecutive errors\n\n    // Stagger the compaction loop across tenants.\n    if wait_for_active_tenant(&tenant, &cancel).await.is_break() {\n        return;\n    }\n    if sleep_random(period, &cancel).await.is_err() {\n        return;\n    }\n\n    loop {\n        // Recheck that we're still active.\n        if wait_for_active_tenant(&tenant, &cancel).await.is_break() {\n            return;\n        }\n\n        // Refresh the period. If compaction is disabled, check again in a bit.\n        period = tenant.get_compaction_period();\n        if period == Duration::ZERO {\n            #[cfg(not(feature = \"testing\"))]\n            info!(\"automatic compaction is disabled\");\n            tokio::select! {\n                _ = tokio::time::sleep(RECHECK_CONFIG_INTERVAL) => {},\n                _ = cancel.cancelled() => return,\n            }\n            continue;\n        }\n\n        // Wait for the next compaction run.\n        let backoff = exponential_backoff_duration(error_run, BASE_BACKOFF_SECS, MAX_BACKOFF_SECS);\n        tokio::select! {\n            _ = tokio::time::sleep(backoff), if error_run > 0 => {},\n            _ = tokio::time::sleep(period), if error_run == 0 => {},\n            _ = tenant.l0_compaction_trigger.notified(), if error_run == 0 => {},\n            _ = cancel.cancelled() => return,\n        }\n\n        // Run compaction.\n        let iteration = Iteration {\n            started_at: Instant::now(),\n            period,\n            kind: BackgroundLoopKind::Compaction,\n        };\n        let IterationResult { output, elapsed } = iteration\n            .run(tenant.compaction_iteration(&cancel, &ctx))\n            .await;\n\n        match output {\n            Ok(outcome) => {\n                error_run = 0;\n                // If there's more compaction work, L0 or not, schedule an immediate run.\n                match outcome {\n                    CompactionOutcome::Done => {}\n                    CompactionOutcome::Skipped => {}\n                    CompactionOutcome::YieldForL0 => tenant.l0_compaction_trigger.notify_one(),\n                    CompactionOutcome::Pending => tenant.l0_compaction_trigger.notify_one(),\n                }\n            }\n\n            Err(err) => {\n                error_run += 1;\n                let backoff =\n                    exponential_backoff_duration(error_run, BASE_BACKOFF_SECS, MAX_BACKOFF_SECS);\n                log_compaction_error(\n                    &err,\n                    Some((error_run, backoff)),\n                    cancel.is_cancelled(),\n                    false,\n                );\n                continue;\n            }\n        }\n\n        // NB: this log entry is recorded by performance tests.\n        debug!(\n            elapsed_ms = elapsed.as_millis(),\n            \"compaction iteration complete\"\n        );\n    }\n}\n\npub(crate) fn log_compaction_error(\n    err: &CompactionError,\n    retry_info: Option<(u32, Duration)>,\n    task_cancelled: bool,\n    degrade_to_warning: bool,\n) {\n    let is_cancel = err.is_cancel();\n\n    let level = if is_cancel || task_cancelled {\n        Level::INFO\n    } else {\n        Level::ERROR\n    };\n\n    if let Some((error_count, sleep_duration)) = retry_info {\n        match level {\n            Level::ERROR => {\n                error!(\n                    \"Compaction failed {error_count} times, retrying in {sleep_duration:?}: {err:#}\"\n                )\n            }\n            Level::INFO => {\n                info!(\n                    \"Compaction failed {error_count} times, retrying in {sleep_duration:?}: {err:#}\"\n                )\n            }\n            level => unimplemented!(\"unexpected level {level:?}\"),\n        }\n    } else {\n        match level {\n            Level::ERROR if degrade_to_warning => warn!(\"Compaction failed and discarded: {err:#}\"),\n            Level::ERROR => error!(\"Compaction failed: {err:?}\"),\n            Level::INFO => info!(\"Compaction failed: {err:#}\"),\n            level => unimplemented!(\"unexpected level {level:?}\"),\n        }\n    }\n}\n\n/// GC task's main loop.\nasync fn gc_loop(tenant: Arc<TenantShard>, cancel: CancellationToken) {\n    const MAX_BACKOFF_SECS: f64 = 300.0;\n    let mut error_run = 0; // consecutive errors\n\n    // GC might require downloading, to find the cutoff LSN that corresponds to the\n    // cutoff specified as time.\n    let ctx = RequestContext::todo_child(TaskKind::GarbageCollector, DownloadBehavior::Download);\n    let mut first = true;\n\n    loop {\n        if wait_for_active_tenant(&tenant, &cancel).await.is_break() {\n            return;\n        }\n\n        let period = tenant.get_gc_period();\n\n        if first {\n            first = false;\n            if sleep_random(period, &cancel).await.is_err() {\n                break;\n            }\n        }\n\n        let gc_horizon = tenant.get_gc_horizon();\n        let sleep_duration;\n        if period == Duration::ZERO || gc_horizon == 0 {\n            #[cfg(not(feature = \"testing\"))]\n            info!(\"automatic GC is disabled\");\n            // check again in 10 seconds, in case it's been enabled again.\n            sleep_duration = Duration::from_secs(10);\n        } else {\n            let iteration = Iteration {\n                started_at: Instant::now(),\n                period,\n                kind: BackgroundLoopKind::Gc,\n            };\n            // Run gc\n            let IterationResult { output, elapsed: _ } = iteration\n                .run(tenant.gc_iteration(\n                    None,\n                    gc_horizon,\n                    tenant.get_pitr_interval(),\n                    &cancel,\n                    &ctx,\n                ))\n                .await;\n            match output {\n                Ok(_) => {\n                    error_run = 0;\n                    sleep_duration = period;\n                }\n                Err(crate::tenant::GcError::TenantCancelled) => {\n                    return;\n                }\n                Err(e) => {\n                    error_run += 1;\n                    let wait_duration =\n                        exponential_backoff_duration(error_run, 1.0, MAX_BACKOFF_SECS);\n\n                    if matches!(e, crate::tenant::GcError::TimelineCancelled) {\n                        // Timeline was cancelled during gc. We might either be in an event\n                        // that affects the entire tenant (tenant deletion, pageserver shutdown),\n                        // or in one that affects the timeline only (timeline deletion).\n                        // Therefore, don't exit the loop.\n                        info!(\"Gc failed {error_run} times, retrying in {wait_duration:?}: {e:?}\");\n                    } else {\n                        error!(\"Gc failed {error_run} times, retrying in {wait_duration:?}: {e:?}\");\n                    }\n\n                    sleep_duration = wait_duration;\n                }\n            }\n        };\n\n        if tokio::time::timeout(sleep_duration, cancel.cancelled())\n            .await\n            .is_ok()\n        {\n            break;\n        }\n    }\n}\n\n/// Tenant housekeeping's main loop.\nasync fn tenant_housekeeping_loop(tenant: Arc<TenantShard>, cancel: CancellationToken) {\n    let mut last_throttle_flag_reset_at = Instant::now();\n    loop {\n        if wait_for_active_tenant(&tenant, &cancel).await.is_break() {\n            return;\n        }\n\n        // Use the same period as compaction; it's not worth a separate setting. But if it's set to\n        // zero (to disable compaction), then use a reasonable default. Jitter it by 5%.\n        let period = match tenant.get_compaction_period() {\n            Duration::ZERO => humantime::parse_duration(DEFAULT_COMPACTION_PERIOD).unwrap(),\n            period => period,\n        };\n\n        let Ok(period) = sleep_jitter(period, period * 5 / 100, &cancel).await else {\n            break;\n        };\n\n        // Do tenant housekeeping.\n        let iteration = Iteration {\n            started_at: Instant::now(),\n            period,\n            kind: BackgroundLoopKind::TenantHouseKeeping,\n        };\n        iteration.run(tenant.housekeeping()).await;\n\n        // Log any getpage throttling.\n        info_span!(parent: None, \"pagestream_throttle\", tenant_id=%tenant.tenant_shard_id, shard_id=%tenant.tenant_shard_id.shard_slug()).in_scope(|| {\n            let now = Instant::now();\n            let prev = std::mem::replace(&mut last_throttle_flag_reset_at, now);\n            let Stats { count_accounted_start, count_accounted_finish, count_throttled, sum_throttled_usecs} = tenant.pagestream_throttle.reset_stats();\n            if count_throttled == 0 {\n                return;\n            }\n            let allowed_rps = tenant.pagestream_throttle.steady_rps();\n            let delta = now - prev;\n            info!(\n                n_seconds=%format_args!(\"{:.3}\", delta.as_secs_f64()),\n                count_accounted = count_accounted_finish,  // don't break existing log scraping\n                count_throttled,\n                sum_throttled_usecs,\n                count_accounted_start, // log after pre-existing fields to not break existing log scraping\n                allowed_rps=%format_args!(\"{allowed_rps:.0}\"),\n                \"shard was throttled in the last n_seconds\"\n            );\n        });\n    }\n}\n\n/// Waits until the tenant becomes active, or returns `ControlFlow::Break()` to shut down.\nasync fn wait_for_active_tenant(\n    tenant: &Arc<TenantShard>,\n    cancel: &CancellationToken,\n) -> ControlFlow<()> {\n    if tenant.current_state() == TenantState::Active {\n        return ControlFlow::Continue(());\n    }\n\n    let mut update_rx = tenant.subscribe_for_state_updates();\n    tokio::select! {\n        result = update_rx.wait_for(|s| s == &TenantState::Active) => {\n            if result.is_err() {\n                return ControlFlow::Break(());\n            }\n            debug!(\"Tenant state changed to active, continuing the task loop\");\n            ControlFlow::Continue(())\n        },\n        _ = cancel.cancelled() => ControlFlow::Break(()),\n    }\n}\n\n#[derive(thiserror::Error, Debug)]\n#[error(\"cancelled\")]\npub(crate) struct Cancelled;\n\n/// Sleeps for a random interval up to the given max value.\n///\n/// This delay prevents a thundering herd of background tasks and will likely keep them running on\n/// different periods for more stable load.\npub(crate) async fn sleep_random(\n    max: Duration,\n    cancel: &CancellationToken,\n) -> Result<Duration, Cancelled> {\n    sleep_random_range(Duration::ZERO..=max, cancel).await\n}\n\n/// Sleeps for a random interval in the given range. Returns the duration.\npub(crate) async fn sleep_random_range(\n    interval: RangeInclusive<Duration>,\n    cancel: &CancellationToken,\n) -> Result<Duration, Cancelled> {\n    let delay = rand::rng().random_range(interval);\n    if delay == Duration::ZERO {\n        return Ok(delay);\n    }\n    tokio::select! {\n        _ = cancel.cancelled() => Err(Cancelled),\n        _ = tokio::time::sleep(delay) => Ok(delay),\n    }\n}\n\n/// Sleeps for an interval with a random jitter.\npub(crate) async fn sleep_jitter(\n    duration: Duration,\n    jitter: Duration,\n    cancel: &CancellationToken,\n) -> Result<Duration, Cancelled> {\n    let from = duration.saturating_sub(jitter);\n    let to = duration.saturating_add(jitter);\n    sleep_random_range(from..=to, cancel).await\n}\n\nstruct Iteration {\n    started_at: Instant,\n    period: Duration,\n    kind: BackgroundLoopKind,\n}\n\nstruct IterationResult<O> {\n    output: O,\n    elapsed: Duration,\n}\n\nimpl Iteration {\n    #[instrument(skip_all)]\n    pub(crate) async fn run<F: Future<Output = O>, O>(self, fut: F) -> IterationResult<O> {\n        let mut fut = pin!(fut);\n\n        // Wrap `fut` into a future that logs a message every `period` so that we get a\n        // very obvious breadcrumb in the logs _while_ a slow iteration is happening.\n        let output = loop {\n            match tokio::time::timeout(self.period, &mut fut).await {\n                Ok(r) => break r,\n                Err(_) => info!(\"still running\"),\n            }\n        };\n        let elapsed = self.started_at.elapsed();\n        warn_when_period_overrun(elapsed, self.period, self.kind);\n\n        IterationResult { output, elapsed }\n    }\n}\n\n// NB: the `task` and `period` are used for metrics labels.\npub(crate) fn warn_when_period_overrun(\n    elapsed: Duration,\n    period: Duration,\n    task: BackgroundLoopKind,\n) {\n    // Duration::ZERO will happen because it's the \"disable [bgtask]\" value.\n    if elapsed >= period && period != Duration::ZERO {\n        // humantime does no significant digits clamping whereas Duration's debug is a bit more\n        // intelligent. however it makes sense to keep the \"configuration format\" for period, even\n        // though there's no way to output the actual config value.\n        info!(\n            ?elapsed,\n            period = %humantime::format_duration(period),\n            ?task,\n            \"task iteration took longer than the configured period\"\n        );\n        metrics::BACKGROUND_LOOP_PERIOD_OVERRUN_COUNT\n            .with_label_values(&[task.into(), &format!(\"{}\", period.as_secs())])\n            .inc();\n    }\n}\n"
  },
  {
    "path": "pageserver/src/tenant/throttle.rs",
    "content": "use std::sync::Arc;\nuse std::sync::atomic::{AtomicU64, Ordering};\nuse std::time::Instant;\n\nuse arc_swap::ArcSwap;\nuse utils::leaky_bucket::{LeakyBucketConfig, RateLimiter};\n\n/// Throttle for `async` functions.\n///\n/// Runtime reconfigurable.\n///\n/// To share a throttle among multiple entities, wrap it in an [`Arc`].\n///\n/// The intial use case for this is tenant-wide throttling of getpage@lsn requests.\npub struct Throttle {\n    inner: ArcSwap<Inner>,\n    /// will be turned into [`Stats::count_accounted_start`]\n    count_accounted_start: AtomicU64,\n    /// will be turned into [`Stats::count_accounted_finish`]\n    count_accounted_finish: AtomicU64,\n    /// will be turned into [`Stats::count_throttled`]\n    count_throttled: AtomicU64,\n    /// will be turned into [`Stats::sum_throttled_usecs`]\n    sum_throttled_usecs: AtomicU64,\n}\n\npub struct Inner {\n    enabled: bool,\n    rate_limiter: Arc<RateLimiter>,\n}\n\npub type Config = pageserver_api::models::ThrottleConfig;\n\n/// See [`Throttle::reset_stats`].\npub struct Stats {\n    /// Number of requests that started [`Throttle::throttle`] calls.\n    pub count_accounted_start: u64,\n    /// Number of requests that finished [`Throttle::throttle`] calls.\n    pub count_accounted_finish: u64,\n    /// Subset of the `accounted` requests that were actually throttled.\n    /// Note that the numbers are stored as two independent atomics, so, there might be a slight drift.\n    pub count_throttled: u64,\n    /// Sum of microseconds that throttled requests spent waiting for throttling.\n    pub sum_throttled_usecs: u64,\n}\n\npub enum ThrottleResult {\n    NotThrottled { end: Instant },\n    Throttled { end: Instant },\n}\n\nimpl Throttle {\n    pub fn new(config: Config) -> Self {\n        Self {\n            inner: ArcSwap::new(Arc::new(Self::new_inner(config))),\n            count_accounted_start: AtomicU64::new(0),\n            count_accounted_finish: AtomicU64::new(0),\n            count_throttled: AtomicU64::new(0),\n            sum_throttled_usecs: AtomicU64::new(0),\n        }\n    }\n    fn new_inner(config: Config) -> Inner {\n        let Config {\n            enabled,\n            initial,\n            refill_interval,\n            refill_amount,\n            max,\n        } = config;\n\n        // steady rate, we expect `refill_amount` requests per `refill_interval`.\n        // dividing gives us the rps.\n        let rps = f64::from(refill_amount.get()) / refill_interval.as_secs_f64();\n        let config = LeakyBucketConfig::new(rps, f64::from(max));\n\n        // initial tracks how many tokens are available to put in the bucket\n        // we want how many tokens are currently in the bucket\n        let initial_tokens = max - initial;\n\n        let rate_limiter = RateLimiter::with_initial_tokens(config, f64::from(initial_tokens));\n\n        Inner {\n            enabled: enabled.is_enabled(),\n            rate_limiter: Arc::new(rate_limiter),\n        }\n    }\n    pub fn reconfigure(&self, config: Config) {\n        self.inner.store(Arc::new(Self::new_inner(config)));\n    }\n\n    /// The [`Throttle`] keeps an internal flag that is true if there was ever any actual throttling.\n    /// This method allows retrieving & resetting that flag.\n    /// Useful for periodic reporting.\n    pub fn reset_stats(&self) -> Stats {\n        let count_accounted_start = self.count_accounted_start.swap(0, Ordering::Relaxed);\n        let count_accounted_finish = self.count_accounted_finish.swap(0, Ordering::Relaxed);\n        let count_throttled = self.count_throttled.swap(0, Ordering::Relaxed);\n        let sum_throttled_usecs = self.sum_throttled_usecs.swap(0, Ordering::Relaxed);\n        Stats {\n            count_accounted_start,\n            count_accounted_finish,\n            count_throttled,\n            sum_throttled_usecs,\n        }\n    }\n\n    /// See [`Config::steady_rps`].\n    pub fn steady_rps(&self) -> f64 {\n        self.inner.load().rate_limiter.steady_rps()\n    }\n\n    /// `start` must be [`Instant::now`] or earlier.\n    pub async fn throttle(&self, key_count: usize, start: Instant) -> ThrottleResult {\n        let inner = self.inner.load_full(); // clones the `Inner` Arc\n\n        if !inner.enabled {\n            return ThrottleResult::NotThrottled { end: start };\n        }\n\n        self.count_accounted_start.fetch_add(1, Ordering::Relaxed);\n        let did_throttle = inner.rate_limiter.acquire(key_count).await;\n        self.count_accounted_finish.fetch_add(1, Ordering::Relaxed);\n\n        if did_throttle {\n            self.count_throttled.fetch_add(1, Ordering::Relaxed);\n            let end = Instant::now();\n            let wait_time = end - start;\n            self.sum_throttled_usecs\n                .fetch_add(wait_time.as_micros() as u64, Ordering::Relaxed);\n            ThrottleResult::Throttled { end }\n        } else {\n            ThrottleResult::NotThrottled { end: start }\n        }\n    }\n}\n"
  },
  {
    "path": "pageserver/src/tenant/timeline/analysis.rs",
    "content": "use std::collections::BTreeSet;\nuse std::ops::Range;\n\nuse utils::lsn::Lsn;\n\nuse super::Timeline;\nuse crate::tenant::timeline::layer_manager::LayerManagerLockHolder;\n\n#[derive(serde::Serialize)]\npub(crate) struct RangeAnalysis {\n    start: String,\n    end: String,\n    has_image: bool,\n    num_of_deltas_above_image: usize,\n    total_num_of_deltas: usize,\n    num_of_l0: usize,\n}\n\nimpl Timeline {\n    pub(crate) async fn perf_info(&self) -> Vec<RangeAnalysis> {\n        // First, collect all split points of the layers.\n        let mut split_points = BTreeSet::new();\n        let mut delta_ranges = Vec::new();\n        let mut image_ranges = Vec::new();\n\n        let num_of_l0;\n        let all_layer_files = {\n            let guard = self\n                .layers\n                .read(LayerManagerLockHolder::GetLayerMapInfo)\n                .await;\n            num_of_l0 = guard.layer_map().unwrap().level0_deltas().len();\n            guard.all_persistent_layers()\n        };\n        let lsn = self.get_last_record_lsn();\n\n        for key in all_layer_files {\n            split_points.insert(key.key_range.start);\n            split_points.insert(key.key_range.end);\n            if key.is_delta {\n                delta_ranges.push((key.key_range.clone(), key.lsn_range.clone()));\n            } else {\n                image_ranges.push((key.key_range.clone(), key.lsn_range.start));\n            }\n        }\n\n        // For each split range, compute the estimated read amplification.\n        let split_points = split_points.into_iter().collect::<Vec<_>>();\n\n        let mut result = Vec::new();\n\n        for i in 0..(split_points.len() - 1) {\n            let start = split_points[i];\n            let end = split_points[i + 1];\n            // Find the latest image layer that contains the information.\n            let mut maybe_image_layers = image_ranges\n                .iter()\n                // We insert split points for all image layers, and therefore a `contains` check for the start point should be enough.\n                .filter(|(key_range, img_lsn)| key_range.contains(&start) && img_lsn <= &lsn)\n                .cloned()\n                .collect::<Vec<_>>();\n            maybe_image_layers.sort_by(|a, b| a.1.cmp(&b.1));\n            let image_layer = maybe_image_layers.last().cloned();\n            let lsn_filter_start = image_layer\n                .as_ref()\n                .map(|(_, lsn)| *lsn)\n                .unwrap_or(Lsn::INVALID);\n\n            fn overlaps_with(lsn_range_a: &Range<Lsn>, lsn_range_b: &Range<Lsn>) -> bool {\n                !(lsn_range_a.end <= lsn_range_b.start || lsn_range_a.start >= lsn_range_b.end)\n            }\n\n            let maybe_delta_layers = delta_ranges\n                .iter()\n                .filter(|(key_range, lsn_range)| {\n                    key_range.contains(&start) && overlaps_with(&(lsn_filter_start..lsn), lsn_range)\n                })\n                .cloned()\n                .collect::<Vec<_>>();\n\n            let pitr_delta_layers = delta_ranges\n                .iter()\n                .filter(|(key_range, _)| key_range.contains(&start))\n                .cloned()\n                .collect::<Vec<_>>();\n\n            result.push(RangeAnalysis {\n                start: start.to_string(),\n                end: end.to_string(),\n                has_image: image_layer.is_some(),\n                num_of_deltas_above_image: maybe_delta_layers.len(),\n                total_num_of_deltas: pitr_delta_layers.len(),\n                num_of_l0,\n            });\n        }\n\n        result\n    }\n}\n"
  },
  {
    "path": "pageserver/src/tenant/timeline/compaction.rs",
    "content": "//! New compaction implementation. The algorithm itself is implemented in the\n//! compaction crate. This file implements the callbacks and structs that allow\n//! the algorithm to drive the process.\n//!\n//! The old legacy algorithm is implemented directly in `timeline.rs`.\n\nuse std::cmp::min;\nuse std::collections::{BinaryHeap, HashMap, HashSet, VecDeque};\nuse std::ops::{Deref, Range};\nuse std::sync::Arc;\nuse std::time::{Duration, Instant};\n\nuse super::layer_manager::LayerManagerLockHolder;\nuse super::{\n    CompactFlags, CompactOptions, CompactionError, CreateImageLayersError, DurationRecorder,\n    GetVectoredError, ImageLayerCreationMode, LastImageLayerCreationStatus, RecordedDuration,\n    Timeline,\n};\n\nuse crate::pgdatadir_mapping::CollectKeySpaceError;\nuse crate::tenant::timeline::{DeltaEntry, RepartitionError};\nuse crate::walredo::RedoAttemptType;\nuse anyhow::{Context, anyhow};\nuse bytes::Bytes;\nuse enumset::EnumSet;\nuse fail::fail_point;\nuse futures::FutureExt;\nuse itertools::Itertools;\nuse once_cell::sync::Lazy;\nuse pageserver_api::config::tenant_conf_defaults::DEFAULT_CHECKPOINT_DISTANCE;\nuse pageserver_api::key::{KEY_SIZE, Key};\nuse pageserver_api::keyspace::{KeySpace, ShardedRange};\nuse pageserver_api::models::{CompactInfoResponse, CompactKeyRange};\nuse pageserver_api::shard::{ShardCount, ShardIdentity, TenantShardId};\nuse pageserver_compaction::helpers::{fully_contains, overlaps_with};\nuse pageserver_compaction::interface::*;\nuse serde::Serialize;\nuse tokio::sync::{OwnedSemaphorePermit, Semaphore};\nuse tokio_util::sync::CancellationToken;\nuse tracing::{Instrument, debug, error, info, info_span, trace, warn};\nuse utils::critical_timeline;\nuse utils::id::TimelineId;\nuse utils::lsn::Lsn;\nuse wal_decoder::models::record::NeonWalRecord;\nuse wal_decoder::models::value::Value;\n\nuse crate::context::{AccessStatsBehavior, RequestContext, RequestContextBuilder};\nuse crate::page_cache;\nuse crate::statvfs::Statvfs;\nuse crate::tenant::checks::check_valid_layermap;\nuse crate::tenant::gc_block::GcBlock;\nuse crate::tenant::layer_map::LayerMap;\nuse crate::tenant::remote_timeline_client::WaitCompletionError;\nuse crate::tenant::remote_timeline_client::index::GcCompactionState;\nuse crate::tenant::storage_layer::batch_split_writer::{\n    BatchWriterResult, SplitDeltaLayerWriter, SplitImageLayerWriter,\n};\nuse crate::tenant::storage_layer::filter_iterator::FilterIterator;\nuse crate::tenant::storage_layer::merge_iterator::MergeIterator;\nuse crate::tenant::storage_layer::{\n    AsLayerDesc, LayerVisibilityHint, PersistentLayerDesc, PersistentLayerKey,\n    ValueReconstructState,\n};\nuse crate::tenant::tasks::log_compaction_error;\nuse crate::tenant::timeline::{\n    DeltaLayerWriter, ImageLayerCreationOutcome, ImageLayerWriter, IoConcurrency, Layer,\n    ResidentLayer, drop_layer_manager_rlock,\n};\nuse crate::tenant::{DeltaLayer, MaybeOffloaded, PageReconstructError};\nuse crate::virtual_file::{MaybeFatalIo, VirtualFile};\n\n/// Maximum number of deltas before generating an image layer in bottom-most compaction.\nconst COMPACTION_DELTA_THRESHOLD: usize = 5;\n\n/// Ratio of shard-local pages below which we trigger shard ancestor layer rewrites. 0.3 means that\n/// <= 30% of layer pages must belong to the descendant shard to rewrite the layer.\n///\n/// We choose a value < 0.5 to avoid rewriting all visible layers every time we do a power-of-two\n/// shard split, which gets expensive for large tenants.\nconst ANCESTOR_COMPACTION_REWRITE_THRESHOLD: f64 = 0.3;\n\n#[derive(Default, Debug, Clone, Copy, Hash, PartialEq, Eq, Serialize)]\npub struct GcCompactionJobId(pub usize);\n\nimpl std::fmt::Display for GcCompactionJobId {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        write!(f, \"{}\", self.0)\n    }\n}\n\npub struct GcCompactionCombinedSettings {\n    pub gc_compaction_enabled: bool,\n    pub gc_compaction_verification: bool,\n    pub gc_compaction_initial_threshold_kb: u64,\n    pub gc_compaction_ratio_percent: u64,\n}\n\n#[derive(Debug, Clone)]\npub enum GcCompactionQueueItem {\n    MetaJob {\n        /// Compaction options\n        options: CompactOptions,\n        /// Whether the compaction is triggered automatically (determines whether we need to update L2 LSN)\n        auto: bool,\n    },\n    SubCompactionJob {\n        i: usize,\n        total: usize,\n        options: CompactOptions,\n    },\n    Notify(GcCompactionJobId, Option<Lsn>),\n}\n\n/// Statistics for gc-compaction meta jobs, which contains several sub compaction jobs.\n#[derive(Debug, Clone, Serialize, Default)]\npub struct GcCompactionMetaStatistics {\n    /// The total number of sub compaction jobs.\n    pub total_sub_compaction_jobs: usize,\n    /// The total number of sub compaction jobs that failed.\n    pub failed_sub_compaction_jobs: usize,\n    /// The total number of sub compaction jobs that succeeded.\n    pub succeeded_sub_compaction_jobs: usize,\n    /// The layer size before compaction.\n    pub before_compaction_layer_size: u64,\n    /// The layer size after compaction.\n    pub after_compaction_layer_size: u64,\n    /// The start time of the meta job.\n    pub start_time: Option<chrono::DateTime<chrono::Utc>>,\n    /// The end time of the meta job.\n    pub end_time: Option<chrono::DateTime<chrono::Utc>>,\n    /// The duration of the meta job.\n    pub duration_secs: f64,\n    /// The id of the meta job.\n    pub meta_job_id: GcCompactionJobId,\n    /// The LSN below which the layers are compacted, used to compute the statistics.\n    pub below_lsn: Lsn,\n    /// The retention ratio of the meta job (after_compaction_layer_size / before_compaction_layer_size)\n    pub retention_ratio: f64,\n}\n\nimpl GcCompactionMetaStatistics {\n    fn finalize(&mut self) {\n        let end_time = chrono::Utc::now();\n        if let Some(start_time) = self.start_time {\n            if end_time > start_time {\n                let delta = end_time - start_time;\n                if let Ok(std_dur) = delta.to_std() {\n                    self.duration_secs = std_dur.as_secs_f64();\n                }\n            }\n        }\n        self.retention_ratio = self.after_compaction_layer_size as f64\n            / (self.before_compaction_layer_size as f64 + 1.0);\n        self.end_time = Some(end_time);\n    }\n}\n\nimpl GcCompactionQueueItem {\n    pub fn into_compact_info_resp(\n        self,\n        id: GcCompactionJobId,\n        running: bool,\n    ) -> Option<CompactInfoResponse> {\n        match self {\n            GcCompactionQueueItem::MetaJob { options, .. } => Some(CompactInfoResponse {\n                compact_key_range: options.compact_key_range,\n                compact_lsn_range: options.compact_lsn_range,\n                sub_compaction: options.sub_compaction,\n                running,\n                job_id: id.0,\n            }),\n            GcCompactionQueueItem::SubCompactionJob { options, .. } => Some(CompactInfoResponse {\n                compact_key_range: options.compact_key_range,\n                compact_lsn_range: options.compact_lsn_range,\n                sub_compaction: options.sub_compaction,\n                running,\n                job_id: id.0,\n            }),\n            GcCompactionQueueItem::Notify(_, _) => None,\n        }\n    }\n}\n\n#[derive(Default)]\nstruct GcCompactionGuardItems {\n    notify: Option<tokio::sync::oneshot::Sender<()>>,\n    permit: Option<OwnedSemaphorePermit>,\n}\n\nstruct GcCompactionQueueInner {\n    running: Option<(GcCompactionJobId, GcCompactionQueueItem)>,\n    queued: VecDeque<(GcCompactionJobId, GcCompactionQueueItem)>,\n    guards: HashMap<GcCompactionJobId, GcCompactionGuardItems>,\n    last_id: GcCompactionJobId,\n    meta_statistics: Option<GcCompactionMetaStatistics>,\n}\n\nimpl GcCompactionQueueInner {\n    fn next_id(&mut self) -> GcCompactionJobId {\n        let id = self.last_id;\n        self.last_id = GcCompactionJobId(id.0 + 1);\n        id\n    }\n}\n\n/// A structure to store gc_compaction jobs.\npub struct GcCompactionQueue {\n    /// All items in the queue, and the currently-running job.\n    inner: std::sync::Mutex<GcCompactionQueueInner>,\n    /// Ensure only one thread is consuming the queue.\n    consumer_lock: tokio::sync::Mutex<()>,\n}\n\nstatic CONCURRENT_GC_COMPACTION_TASKS: Lazy<Arc<Semaphore>> = Lazy::new(|| {\n    // Only allow one timeline on one pageserver to run gc compaction at a time.\n    Arc::new(Semaphore::new(1))\n});\n\nimpl GcCompactionQueue {\n    pub fn new() -> Self {\n        GcCompactionQueue {\n            inner: std::sync::Mutex::new(GcCompactionQueueInner {\n                running: None,\n                queued: VecDeque::new(),\n                guards: HashMap::new(),\n                last_id: GcCompactionJobId(0),\n                meta_statistics: None,\n            }),\n            consumer_lock: tokio::sync::Mutex::new(()),\n        }\n    }\n\n    pub fn cancel_scheduled(&self) {\n        let mut guard = self.inner.lock().unwrap();\n        guard.queued.clear();\n        // TODO: if there is a running job, we should keep the gc guard. However, currently, the cancel\n        // API is only used for testing purposes, so we can drop everything here.\n        guard.guards.clear();\n    }\n\n    /// Schedule a manual compaction job.\n    pub fn schedule_manual_compaction(\n        &self,\n        options: CompactOptions,\n        notify: Option<tokio::sync::oneshot::Sender<()>>,\n    ) -> GcCompactionJobId {\n        let mut guard = self.inner.lock().unwrap();\n        let id = guard.next_id();\n        guard.queued.push_back((\n            id,\n            GcCompactionQueueItem::MetaJob {\n                options,\n                auto: false,\n            },\n        ));\n        guard.guards.entry(id).or_default().notify = notify;\n        info!(\"scheduled compaction job id={}\", id);\n        id\n    }\n\n    /// Schedule an auto compaction job.\n    fn schedule_auto_compaction(\n        &self,\n        options: CompactOptions,\n        permit: OwnedSemaphorePermit,\n    ) -> GcCompactionJobId {\n        let mut guard = self.inner.lock().unwrap();\n        let id = guard.next_id();\n        guard.queued.push_back((\n            id,\n            GcCompactionQueueItem::MetaJob {\n                options,\n                auto: true,\n            },\n        ));\n        guard.guards.entry(id).or_default().permit = Some(permit);\n        id\n    }\n\n    /// Trigger an auto compaction.\n    pub async fn trigger_auto_compaction(\n        &self,\n        timeline: &Arc<Timeline>,\n    ) -> Result<(), CompactionError> {\n        let GcCompactionCombinedSettings {\n            gc_compaction_enabled,\n            gc_compaction_initial_threshold_kb,\n            gc_compaction_ratio_percent,\n            ..\n        } = timeline.get_gc_compaction_settings();\n        if !gc_compaction_enabled {\n            return Ok(());\n        }\n        if self.remaining_jobs_num() > 0 {\n            // Only schedule auto compaction when the queue is empty\n            return Ok(());\n        }\n        if timeline.ancestor_timeline().is_some() {\n            // Do not trigger auto compaction for child timelines. We haven't tested\n            // it enough in staging yet.\n            return Ok(());\n        }\n        if timeline.get_gc_compaction_watermark() == Lsn::INVALID {\n            // If the gc watermark is not set, we don't need to trigger auto compaction.\n            // This check is the same as in `gc_compaction_split_jobs` but we don't log\n            // here and we can also skip the computation of the trigger condition earlier.\n            return Ok(());\n        }\n\n        let Ok(permit) = CONCURRENT_GC_COMPACTION_TASKS.clone().try_acquire_owned() else {\n            // Only allow one compaction run at a time. TODO: As we do `try_acquire_owned`, we cannot ensure\n            // the fairness of the lock across timelines. We should listen for both `acquire` and `l0_compaction_trigger`\n            // to ensure the fairness while avoid starving other tasks.\n            return Ok(());\n        };\n\n        let gc_compaction_state = timeline.get_gc_compaction_state();\n        let l2_lsn = gc_compaction_state\n            .map(|x| x.last_completed_lsn)\n            .unwrap_or(Lsn::INVALID);\n\n        let layers = {\n            let guard = timeline\n                .layers\n                .read(LayerManagerLockHolder::GetLayerMapInfo)\n                .await;\n            let layer_map = guard.layer_map()?;\n            layer_map.iter_historic_layers().collect_vec()\n        };\n        let mut l2_size: u64 = 0;\n        let mut l1_size = 0;\n        let gc_cutoff = *timeline.get_applied_gc_cutoff_lsn();\n        for layer in layers {\n            if layer.lsn_range.start <= l2_lsn {\n                l2_size += layer.file_size();\n            } else if layer.lsn_range.start <= gc_cutoff {\n                l1_size += layer.file_size();\n            }\n        }\n\n        fn trigger_compaction(\n            l1_size: u64,\n            l2_size: u64,\n            gc_compaction_initial_threshold_kb: u64,\n            gc_compaction_ratio_percent: u64,\n        ) -> bool {\n            const AUTO_TRIGGER_LIMIT: u64 = 150 * 1024 * 1024 * 1024; // 150GB\n            if l1_size + l2_size >= AUTO_TRIGGER_LIMIT {\n                // Do not auto-trigger when physical size >= 150GB\n                return false;\n            }\n            // initial trigger\n            if l2_size == 0 && l1_size >= gc_compaction_initial_threshold_kb * 1024 {\n                info!(\n                    \"trigger auto-compaction because l1_size={} >= gc_compaction_initial_threshold_kb={}\",\n                    l1_size, gc_compaction_initial_threshold_kb\n                );\n                return true;\n            }\n            // size ratio trigger\n            if l2_size == 0 {\n                return false;\n            }\n            if l1_size as f64 / l2_size as f64 >= (gc_compaction_ratio_percent as f64 / 100.0) {\n                info!(\n                    \"trigger auto-compaction because l1_size={} / l2_size={} > gc_compaction_ratio_percent={}\",\n                    l1_size, l2_size, gc_compaction_ratio_percent\n                );\n                return true;\n            }\n            false\n        }\n\n        if trigger_compaction(\n            l1_size,\n            l2_size,\n            gc_compaction_initial_threshold_kb,\n            gc_compaction_ratio_percent,\n        ) {\n            self.schedule_auto_compaction(\n                CompactOptions {\n                    flags: {\n                        let mut flags = EnumSet::new();\n                        flags |= CompactFlags::EnhancedGcBottomMostCompaction;\n                        if timeline.get_compaction_l0_first() {\n                            flags |= CompactFlags::YieldForL0;\n                        }\n                        flags\n                    },\n                    sub_compaction: true,\n                    // Only auto-trigger gc-compaction over the data keyspace due to concerns in\n                    // https://github.com/neondatabase/neon/issues/11318.\n                    compact_key_range: Some(CompactKeyRange {\n                        start: Key::MIN,\n                        end: Key::metadata_key_range().start,\n                    }),\n                    compact_lsn_range: None,\n                    sub_compaction_max_job_size_mb: None,\n                    gc_compaction_do_metadata_compaction: false,\n                },\n                permit,\n            );\n            info!(\n                \"scheduled auto gc-compaction: l1_size={}, l2_size={}, l2_lsn={}, gc_cutoff={}\",\n                l1_size, l2_size, l2_lsn, gc_cutoff\n            );\n        } else {\n            debug!(\n                \"did not trigger auto gc-compaction: l1_size={}, l2_size={}, l2_lsn={}, gc_cutoff={}\",\n                l1_size, l2_size, l2_lsn, gc_cutoff\n            );\n        }\n        Ok(())\n    }\n\n    async fn collect_layer_below_lsn(\n        &self,\n        timeline: &Arc<Timeline>,\n        lsn: Lsn,\n    ) -> Result<u64, CompactionError> {\n        let guard = timeline\n            .layers\n            .read(LayerManagerLockHolder::GetLayerMapInfo)\n            .await;\n        let layer_map = guard.layer_map()?;\n        let layers = layer_map.iter_historic_layers().collect_vec();\n        let mut size = 0;\n        for layer in layers {\n            if layer.lsn_range.start <= lsn {\n                size += layer.file_size();\n            }\n        }\n        Ok(size)\n    }\n\n    /// Notify the caller the job has finished and unblock GC.\n    fn notify_and_unblock(&self, id: GcCompactionJobId) {\n        info!(\"compaction job id={} finished\", id);\n        let mut guard = self.inner.lock().unwrap();\n        if let Some(items) = guard.guards.remove(&id) {\n            if let Some(tx) = items.notify {\n                let _ = tx.send(());\n            }\n        }\n        if let Some(ref meta_statistics) = guard.meta_statistics {\n            if meta_statistics.meta_job_id == id {\n                if let Ok(stats) = serde_json::to_string(&meta_statistics) {\n                    info!(\n                        \"gc-compaction meta statistics for job id = {}: {}\",\n                        id, stats\n                    );\n                }\n            }\n        }\n    }\n\n    fn clear_running_job(&self) {\n        let mut guard = self.inner.lock().unwrap();\n        guard.running = None;\n    }\n\n    async fn handle_sub_compaction(\n        &self,\n        id: GcCompactionJobId,\n        options: CompactOptions,\n        timeline: &Arc<Timeline>,\n        auto: bool,\n    ) -> Result<(), CompactionError> {\n        info!(\n            \"running scheduled enhanced gc bottom-most compaction with sub-compaction, splitting compaction jobs\"\n        );\n        let res = timeline\n            .gc_compaction_split_jobs(\n                GcCompactJob::from_compact_options(options.clone()),\n                options.sub_compaction_max_job_size_mb,\n            )\n            .await;\n        let jobs = match res {\n            Ok(jobs) => jobs,\n            Err(err) => {\n                warn!(\"cannot split gc-compaction jobs: {}, unblocked gc\", err);\n                self.notify_and_unblock(id);\n                return Err(err);\n            }\n        };\n        if jobs.is_empty() {\n            info!(\"no jobs to run, skipping scheduled compaction task\");\n            self.notify_and_unblock(id);\n        } else {\n            let jobs_len = jobs.len();\n            let mut pending_tasks = Vec::new();\n            // gc-compaction might pick more layers or fewer layers to compact. The L2 LSN does not need to be accurate.\n            // And therefore, we simply assume the maximum LSN of all jobs is the expected L2 LSN.\n            let expected_l2_lsn = jobs\n                .iter()\n                .map(|job| job.compact_lsn_range.end)\n                .max()\n                .unwrap();\n            for (i, job) in jobs.into_iter().enumerate() {\n                // Unfortunately we need to convert the `GcCompactJob` back to `CompactionOptions`\n                // until we do further refactors to allow directly call `compact_with_gc`.\n                let mut flags: EnumSet<CompactFlags> = EnumSet::default();\n                flags |= CompactFlags::EnhancedGcBottomMostCompaction;\n                if job.dry_run {\n                    flags |= CompactFlags::DryRun;\n                }\n                if options.flags.contains(CompactFlags::YieldForL0) {\n                    flags |= CompactFlags::YieldForL0;\n                }\n                let options = CompactOptions {\n                    flags,\n                    sub_compaction: false,\n                    compact_key_range: Some(job.compact_key_range.into()),\n                    compact_lsn_range: Some(job.compact_lsn_range.into()),\n                    sub_compaction_max_job_size_mb: None,\n                    gc_compaction_do_metadata_compaction: false,\n                };\n                pending_tasks.push(GcCompactionQueueItem::SubCompactionJob {\n                    options,\n                    i,\n                    total: jobs_len,\n                });\n            }\n\n            if !auto {\n                pending_tasks.push(GcCompactionQueueItem::Notify(id, None));\n            } else {\n                pending_tasks.push(GcCompactionQueueItem::Notify(id, Some(expected_l2_lsn)));\n            }\n\n            let layer_size = self\n                .collect_layer_below_lsn(timeline, expected_l2_lsn)\n                .await?;\n\n            {\n                let mut guard = self.inner.lock().unwrap();\n                let mut tasks = Vec::new();\n                for task in pending_tasks {\n                    let id = guard.next_id();\n                    tasks.push((id, task));\n                }\n                tasks.reverse();\n                for item in tasks {\n                    guard.queued.push_front(item);\n                }\n                guard.meta_statistics = Some(GcCompactionMetaStatistics {\n                    meta_job_id: id,\n                    start_time: Some(chrono::Utc::now()),\n                    before_compaction_layer_size: layer_size,\n                    below_lsn: expected_l2_lsn,\n                    total_sub_compaction_jobs: jobs_len,\n                    ..Default::default()\n                });\n            }\n\n            info!(\n                \"scheduled enhanced gc bottom-most compaction with sub-compaction, split into {} jobs\",\n                jobs_len\n            );\n        }\n        Ok(())\n    }\n\n    /// Take a job from the queue and process it. Returns if there are still pending tasks.\n    pub async fn iteration(\n        &self,\n        cancel: &CancellationToken,\n        ctx: &RequestContext,\n        gc_block: &GcBlock,\n        timeline: &Arc<Timeline>,\n    ) -> Result<CompactionOutcome, CompactionError> {\n        let res = self.iteration_inner(cancel, ctx, gc_block, timeline).await;\n        if let Err(err) = &res {\n            log_compaction_error(err, None, cancel.is_cancelled(), true);\n        }\n        match res {\n            Ok(res) => Ok(res),\n            Err(e) if e.is_cancel() => Err(e),\n            Err(_) => {\n                // There are some cases where traditional gc might collect some layer\n                // files causing gc-compaction cannot read the full history of the key.\n                // This needs to be resolved in the long-term by improving the compaction\n                // process. For now, let's simply avoid such errors triggering the\n                // circuit breaker.\n                Ok(CompactionOutcome::Skipped)\n            }\n        }\n    }\n\n    async fn iteration_inner(\n        &self,\n        cancel: &CancellationToken,\n        ctx: &RequestContext,\n        gc_block: &GcBlock,\n        timeline: &Arc<Timeline>,\n    ) -> Result<CompactionOutcome, CompactionError> {\n        let Ok(_one_op_at_a_time_guard) = self.consumer_lock.try_lock() else {\n            return Err(CompactionError::Other(anyhow::anyhow!(\n                \"cannot run gc-compaction because another gc-compaction is running. This should not happen because we only call this function from the gc-compaction queue.\"\n            )));\n        };\n        let has_pending_tasks;\n        let mut yield_for_l0 = false;\n        let Some((id, item)) = ({\n            let mut guard = self.inner.lock().unwrap();\n            if let Some((id, item)) = guard.queued.pop_front() {\n                guard.running = Some((id, item.clone()));\n                has_pending_tasks = !guard.queued.is_empty();\n                Some((id, item))\n            } else {\n                has_pending_tasks = false;\n                None\n            }\n        }) else {\n            self.trigger_auto_compaction(timeline).await?;\n            // Always yield after triggering auto-compaction. Gc-compaction is a low-priority task and we\n            // have not implemented preemption mechanism yet. We always want to yield it to more important\n            // tasks if there is one.\n            return Ok(CompactionOutcome::Done);\n        };\n        match item {\n            GcCompactionQueueItem::MetaJob { options, auto } => {\n                if !options\n                    .flags\n                    .contains(CompactFlags::EnhancedGcBottomMostCompaction)\n                {\n                    warn!(\n                        \"ignoring scheduled compaction task: scheduled task must be gc compaction: {:?}\",\n                        options\n                    );\n                } else if options.sub_compaction {\n                    info!(\n                        \"running scheduled enhanced gc bottom-most compaction with sub-compaction, splitting compaction jobs\"\n                    );\n                    self.handle_sub_compaction(id, options, timeline, auto)\n                        .await?;\n                } else {\n                    // Auto compaction always enables sub-compaction so we don't need to handle update_l2_lsn\n                    // in this branch.\n                    let _gc_guard = match gc_block.start().await {\n                        Ok(guard) => guard,\n                        Err(e) => {\n                            self.notify_and_unblock(id);\n                            self.clear_running_job();\n                            return Err(CompactionError::Other(anyhow!(\n                                \"cannot run gc-compaction because gc is blocked: {}\",\n                                e\n                            )));\n                        }\n                    };\n                    let res = timeline.compact_with_options(cancel, options, ctx).await;\n                    let compaction_result = match res {\n                        Ok(res) => res,\n                        Err(err) => {\n                            warn!(%err, \"failed to run gc-compaction\");\n                            self.notify_and_unblock(id);\n                            self.clear_running_job();\n                            return Err(err);\n                        }\n                    };\n                    if compaction_result == CompactionOutcome::YieldForL0 {\n                        yield_for_l0 = true;\n                    }\n                }\n            }\n            GcCompactionQueueItem::SubCompactionJob { options, i, total } => {\n                // TODO: error handling, clear the queue if any task fails?\n                let _gc_guard = match gc_block.start().await {\n                    Ok(guard) => guard,\n                    Err(e) => {\n                        self.clear_running_job();\n                        return Err(CompactionError::Other(anyhow!(\n                            \"cannot run gc-compaction because gc is blocked: {}\",\n                            e\n                        )));\n                    }\n                };\n                info!(\"running gc-compaction subcompaction job {}/{}\", i, total);\n                let res = timeline.compact_with_options(cancel, options, ctx).await;\n                let compaction_result = match res {\n                    Ok(res) => res,\n                    Err(err) => {\n                        warn!(%err, \"failed to run gc-compaction subcompaction job\");\n                        self.clear_running_job();\n                        let mut guard = self.inner.lock().unwrap();\n                        if let Some(ref mut meta_statistics) = guard.meta_statistics {\n                            meta_statistics.failed_sub_compaction_jobs += 1;\n                        }\n                        return Err(err);\n                    }\n                };\n                if compaction_result == CompactionOutcome::YieldForL0 {\n                    // We will permenantly give up a task if we yield for L0 compaction: the preempted subcompaction job won't be running\n                    // again. This ensures that we don't keep doing duplicated work within gc-compaction. Not directly returning here because\n                    // we need to clean things up before returning from the function.\n                    yield_for_l0 = true;\n                }\n                {\n                    let mut guard = self.inner.lock().unwrap();\n                    if let Some(ref mut meta_statistics) = guard.meta_statistics {\n                        meta_statistics.succeeded_sub_compaction_jobs += 1;\n                    }\n                }\n            }\n            GcCompactionQueueItem::Notify(id, l2_lsn) => {\n                let below_lsn = {\n                    let mut guard = self.inner.lock().unwrap();\n                    if let Some(ref mut meta_statistics) = guard.meta_statistics {\n                        meta_statistics.below_lsn\n                    } else {\n                        Lsn::INVALID\n                    }\n                };\n                let layer_size = if below_lsn != Lsn::INVALID {\n                    self.collect_layer_below_lsn(timeline, below_lsn).await?\n                } else {\n                    0\n                };\n                {\n                    let mut guard = self.inner.lock().unwrap();\n                    if let Some(ref mut meta_statistics) = guard.meta_statistics {\n                        meta_statistics.after_compaction_layer_size = layer_size;\n                        meta_statistics.finalize();\n                    }\n                }\n                self.notify_and_unblock(id);\n                if let Some(l2_lsn) = l2_lsn {\n                    let current_l2_lsn = timeline\n                        .get_gc_compaction_state()\n                        .map(|x| x.last_completed_lsn)\n                        .unwrap_or(Lsn::INVALID);\n                    if l2_lsn >= current_l2_lsn {\n                        info!(\"l2_lsn updated to {}\", l2_lsn);\n                        timeline\n                            .update_gc_compaction_state(GcCompactionState {\n                                last_completed_lsn: l2_lsn,\n                            })\n                            .map_err(CompactionError::Other)?;\n                    } else {\n                        warn!(\n                            \"l2_lsn updated to {} but it is less than the current l2_lsn {}\",\n                            l2_lsn, current_l2_lsn\n                        );\n                    }\n                }\n            }\n        }\n        self.clear_running_job();\n        Ok(if yield_for_l0 {\n            tracing::info!(\"give up gc-compaction: yield for L0 compaction\");\n            CompactionOutcome::YieldForL0\n        } else if has_pending_tasks {\n            CompactionOutcome::Pending\n        } else {\n            CompactionOutcome::Done\n        })\n    }\n\n    #[allow(clippy::type_complexity)]\n    pub fn remaining_jobs(\n        &self,\n    ) -> (\n        Option<(GcCompactionJobId, GcCompactionQueueItem)>,\n        VecDeque<(GcCompactionJobId, GcCompactionQueueItem)>,\n    ) {\n        let guard = self.inner.lock().unwrap();\n        (guard.running.clone(), guard.queued.clone())\n    }\n\n    pub fn remaining_jobs_num(&self) -> usize {\n        let guard = self.inner.lock().unwrap();\n        guard.queued.len() + if guard.running.is_some() { 1 } else { 0 }\n    }\n}\n\n/// A job description for the gc-compaction job. This structure describes the rectangle range that the job will\n/// process. The exact layers that need to be compacted/rewritten will be generated when `compact_with_gc` gets\n/// called.\n#[derive(Debug, Clone)]\npub(crate) struct GcCompactJob {\n    pub dry_run: bool,\n    /// The key range to be compacted. The compaction algorithm will only regenerate key-value pairs within this range\n    /// [left inclusive, right exclusive), and other pairs will be rewritten into new files if necessary.\n    pub compact_key_range: Range<Key>,\n    /// The LSN range to be compacted. The compaction algorithm will use this range to determine the layers to be\n    /// selected for the compaction, and it does not guarantee the generated layers will have exactly the same LSN range\n    /// as specified here. The true range being compacted is `min_lsn/max_lsn` in [`GcCompactionJobDescription`].\n    /// min_lsn will always <= the lower bound specified here, and max_lsn will always >= the upper bound specified here.\n    pub compact_lsn_range: Range<Lsn>,\n    /// See [`CompactOptions::gc_compaction_do_metadata_compaction`].\n    pub do_metadata_compaction: bool,\n}\n\nimpl GcCompactJob {\n    pub fn from_compact_options(options: CompactOptions) -> Self {\n        GcCompactJob {\n            dry_run: options.flags.contains(CompactFlags::DryRun),\n            compact_key_range: options\n                .compact_key_range\n                .map(|x| x.into())\n                .unwrap_or(Key::MIN..Key::MAX),\n            compact_lsn_range: options\n                .compact_lsn_range\n                .map(|x| x.into())\n                .unwrap_or(Lsn::INVALID..Lsn::MAX),\n            do_metadata_compaction: options.gc_compaction_do_metadata_compaction,\n        }\n    }\n}\n\n/// A job description for the gc-compaction job. This structure is generated when `compact_with_gc` is called\n/// and contains the exact layers we want to compact.\npub struct GcCompactionJobDescription {\n    /// All layers to read in the compaction job\n    selected_layers: Vec<Layer>,\n    /// GC cutoff of the job. This is the lowest LSN that will be accessed by the read/GC path and we need to\n    /// keep all deltas <= this LSN or generate an image == this LSN.\n    gc_cutoff: Lsn,\n    /// LSNs to retain for the job. Read path will use this LSN so we need to keep deltas <= this LSN or\n    /// generate an image == this LSN.\n    retain_lsns_below_horizon: Vec<Lsn>,\n    /// Maximum layer LSN processed in this compaction, that is max(end_lsn of layers). Exclusive. All data\n    /// \\>= this LSN will be kept and will not be rewritten.\n    max_layer_lsn: Lsn,\n    /// Minimum layer LSN processed in this compaction, that is min(start_lsn of layers). Inclusive.\n    /// All access below (strict lower than `<`) this LSN will be routed through the normal read path instead of\n    /// k-merge within gc-compaction.\n    min_layer_lsn: Lsn,\n    /// Only compact layers overlapping with this range.\n    compaction_key_range: Range<Key>,\n    /// When partial compaction is enabled, these layers need to be rewritten to ensure no overlap.\n    /// This field is here solely for debugging. The field will not be read once the compaction\n    /// description is generated.\n    rewrite_layers: Vec<Arc<PersistentLayerDesc>>,\n}\n\n/// The result of bottom-most compaction for a single key at each LSN.\n#[derive(Debug)]\n#[cfg_attr(test, derive(PartialEq))]\npub struct KeyLogAtLsn(pub Vec<(Lsn, Value)>);\n\n/// The result of bottom-most compaction.\n#[derive(Debug)]\n#[cfg_attr(test, derive(PartialEq))]\npub(crate) struct KeyHistoryRetention {\n    /// Stores logs to reconstruct the value at the given LSN, that is to say, logs <= LSN or image == LSN.\n    pub(crate) below_horizon: Vec<(Lsn, KeyLogAtLsn)>,\n    /// Stores logs to reconstruct the value at any LSN above the horizon, that is to say, log > LSN.\n    pub(crate) above_horizon: KeyLogAtLsn,\n}\n\nimpl KeyHistoryRetention {\n    /// Hack: skip delta layer if we need to produce a layer of a same key-lsn.\n    ///\n    /// This can happen if we have removed some deltas in \"the middle\" of some existing layer's key-lsn-range.\n    /// For example, consider the case where a single delta with range [0x10,0x50) exists.\n    /// And we have branches at LSN 0x10, 0x20, 0x30.\n    /// Then we delete branch @ 0x20.\n    /// Bottom-most compaction may now delete the delta [0x20,0x30).\n    /// And that wouldnt' change the shape of the layer.\n    ///\n    /// Note that bottom-most-gc-compaction never _adds_ new data in that case, only removes.\n    ///\n    /// `discard_key` will only be called when the writer reaches its target (instead of for every key), so it's fine to grab a lock inside.\n    async fn discard_key(key: &PersistentLayerKey, tline: &Arc<Timeline>, dry_run: bool) -> bool {\n        if dry_run {\n            return true;\n        }\n        if LayerMap::is_l0(&key.key_range, key.is_delta) {\n            // gc-compaction should not produce L0 deltas, otherwise it will break the layer order.\n            // We should ignore such layers.\n            return true;\n        }\n        let layer_generation;\n        {\n            let guard = tline.layers.read(LayerManagerLockHolder::Compaction).await;\n            if !guard.contains_key(key) {\n                return false;\n            }\n            layer_generation = guard.get_from_key(key).metadata().generation;\n        }\n        if layer_generation == tline.generation {\n            info!(\n                key=%key,\n                ?layer_generation,\n                \"discard layer due to duplicated layer key in the same generation\",\n            );\n            true\n        } else {\n            false\n        }\n    }\n\n    /// Pipe a history of a single key to the writers.\n    ///\n    /// If `image_writer` is none, the images will be placed into the delta layers.\n    /// The delta writer will contain all images and deltas (below and above the horizon) except the bottom-most images.\n    #[allow(clippy::too_many_arguments)]\n    async fn pipe_to(\n        self,\n        key: Key,\n        delta_writer: &mut SplitDeltaLayerWriter<'_>,\n        mut image_writer: Option<&mut SplitImageLayerWriter<'_>>,\n        stat: &mut CompactionStatistics,\n        ctx: &RequestContext,\n    ) -> anyhow::Result<()> {\n        let mut first_batch = true;\n        for (cutoff_lsn, KeyLogAtLsn(logs)) in self.below_horizon {\n            if first_batch {\n                if logs.len() == 1 && logs[0].1.is_image() {\n                    let Value::Image(img) = &logs[0].1 else {\n                        unreachable!()\n                    };\n                    stat.produce_image_key(img);\n                    if let Some(image_writer) = image_writer.as_mut() {\n                        image_writer.put_image(key, img.clone(), ctx).await?;\n                    } else {\n                        delta_writer\n                            .put_value(key, cutoff_lsn, Value::Image(img.clone()), ctx)\n                            .await?;\n                    }\n                } else {\n                    for (lsn, val) in logs {\n                        stat.produce_key(&val);\n                        delta_writer.put_value(key, lsn, val, ctx).await?;\n                    }\n                }\n                first_batch = false;\n            } else {\n                for (lsn, val) in logs {\n                    stat.produce_key(&val);\n                    delta_writer.put_value(key, lsn, val, ctx).await?;\n                }\n            }\n        }\n        let KeyLogAtLsn(above_horizon_logs) = self.above_horizon;\n        for (lsn, val) in above_horizon_logs {\n            stat.produce_key(&val);\n            delta_writer.put_value(key, lsn, val, ctx).await?;\n        }\n        Ok(())\n    }\n\n    /// Verify if every key in the retention is readable by replaying the logs.\n    async fn verify(\n        &self,\n        key: Key,\n        base_img_from_ancestor: &Option<(Key, Lsn, Bytes)>,\n        full_history: &[(Key, Lsn, Value)],\n        tline: &Arc<Timeline>,\n    ) -> anyhow::Result<()> {\n        // Usually the min_lsn should be the first record but we do a full iteration to be safe.\n        let Some(min_lsn) = full_history.iter().map(|(_, lsn, _)| *lsn).min() else {\n            // This should never happen b/c if we don't have any history of a key, we won't even do `generate_key_retention`.\n            return Ok(());\n        };\n        let Some(max_lsn) = full_history.iter().map(|(_, lsn, _)| *lsn).max() else {\n            // This should never happen b/c if we don't have any history of a key, we won't even do `generate_key_retention`.\n            return Ok(());\n        };\n        let mut base_img = base_img_from_ancestor\n            .as_ref()\n            .map(|(_, lsn, img)| (*lsn, img));\n        let mut history = Vec::new();\n\n        async fn collect_and_verify(\n            key: Key,\n            lsn: Lsn,\n            base_img: &Option<(Lsn, &Bytes)>,\n            history: &[(Lsn, &NeonWalRecord)],\n            tline: &Arc<Timeline>,\n            skip_empty: bool,\n        ) -> anyhow::Result<()> {\n            if base_img.is_none() && history.is_empty() {\n                if skip_empty {\n                    return Ok(());\n                }\n                anyhow::bail!(\"verification failed: key {} has no history at {}\", key, lsn);\n            };\n\n            let mut records = history\n                .iter()\n                .map(|(lsn, val)| (*lsn, (*val).clone()))\n                .collect::<Vec<_>>();\n\n            // WAL redo requires records in the reverse LSN order\n            records.reverse();\n            let data = ValueReconstructState {\n                img: base_img.as_ref().map(|(lsn, img)| (*lsn, (*img).clone())),\n                records,\n            };\n\n            tline\n                .reconstruct_value(key, lsn, data, RedoAttemptType::GcCompaction)\n                .await\n                .with_context(|| format!(\"verification failed for key {key} at lsn {lsn}\"))?;\n\n            Ok(())\n        }\n\n        for (retain_lsn, KeyLogAtLsn(logs)) in &self.below_horizon {\n            for (lsn, val) in logs {\n                match val {\n                    Value::Image(img) => {\n                        base_img = Some((*lsn, img));\n                        history.clear();\n                    }\n                    Value::WalRecord(rec) if val.will_init() => {\n                        base_img = None;\n                        history.clear();\n                        history.push((*lsn, rec));\n                    }\n                    Value::WalRecord(rec) => {\n                        history.push((*lsn, rec));\n                    }\n                }\n            }\n            if *retain_lsn >= min_lsn {\n                // Only verify after the key appears in the full history for the first time.\n\n                // We don't modify history: in theory, we could replace the history with a single\n                // image as in `generate_key_retention` to make redos at later LSNs faster. But we\n                // want to verify everything as if they are read from the real layer map.\n                collect_and_verify(key, *retain_lsn, &base_img, &history, tline, false)\n                    .await\n                    .context(\"below horizon retain_lsn\")?;\n            }\n        }\n\n        for (lsn, val) in &self.above_horizon.0 {\n            match val {\n                Value::Image(img) => {\n                    // Above the GC horizon, we verify every time we see an image.\n                    collect_and_verify(key, *lsn, &base_img, &history, tline, true)\n                        .await\n                        .context(\"above horizon full image\")?;\n                    base_img = Some((*lsn, img));\n                    history.clear();\n                }\n                Value::WalRecord(rec) if val.will_init() => {\n                    // Above the GC horizon, we verify every time we see an init record.\n                    collect_and_verify(key, *lsn, &base_img, &history, tline, true)\n                        .await\n                        .context(\"above horizon init record\")?;\n                    base_img = None;\n                    history.clear();\n                    history.push((*lsn, rec));\n                }\n                Value::WalRecord(rec) => {\n                    history.push((*lsn, rec));\n                }\n            }\n        }\n        // Ensure the latest record is readable.\n        collect_and_verify(key, max_lsn, &base_img, &history, tline, false)\n            .await\n            .context(\"latest record\")?;\n        Ok(())\n    }\n}\n\n#[derive(Debug, Serialize, Default)]\nstruct CompactionStatisticsNumSize {\n    num: u64,\n    size: u64,\n}\n\n#[derive(Debug, Serialize, Default)]\npub struct CompactionStatistics {\n    /// Delta layer visited (maybe compressed, physical size)\n    delta_layer_visited: CompactionStatisticsNumSize,\n    /// Image layer visited (maybe compressed, physical size)\n    image_layer_visited: CompactionStatisticsNumSize,\n    /// Delta layer produced (maybe compressed, physical size)\n    delta_layer_produced: CompactionStatisticsNumSize,\n    /// Image layer produced (maybe compressed, physical size)\n    image_layer_produced: CompactionStatisticsNumSize,\n    /// Delta layer discarded (maybe compressed, physical size of the layer being discarded instead of the original layer)\n    delta_layer_discarded: CompactionStatisticsNumSize,\n    /// Image layer discarded (maybe compressed, physical size of the layer being discarded instead of the original layer)\n    image_layer_discarded: CompactionStatisticsNumSize,\n    num_unique_keys_visited: usize,\n    /// Delta visited (uncompressed, original size)\n    wal_keys_visited: CompactionStatisticsNumSize,\n    /// Image visited (uncompressed, original size)\n    image_keys_visited: CompactionStatisticsNumSize,\n    /// Delta produced (uncompressed, original size)\n    wal_produced: CompactionStatisticsNumSize,\n    /// Image produced (uncompressed, original size)\n    image_produced: CompactionStatisticsNumSize,\n\n    // Time spent in each phase\n    time_acquire_lock_secs: f64,\n    time_analyze_secs: f64,\n    time_download_layer_secs: f64,\n    time_to_first_kv_pair_secs: f64,\n    time_main_loop_secs: f64,\n    time_final_phase_secs: f64,\n    time_total_secs: f64,\n\n    // Summary\n    /// Ratio of the key-value size after/before gc-compaction.\n    uncompressed_retention_ratio: f64,\n    /// Ratio of the physical size after/before gc-compaction.\n    compressed_retention_ratio: f64,\n}\n\nimpl CompactionStatistics {\n    fn estimated_size_of_value(val: &Value) -> usize {\n        match val {\n            Value::Image(img) => img.len(),\n            Value::WalRecord(NeonWalRecord::Postgres { rec, .. }) => rec.len(),\n            _ => std::mem::size_of::<NeonWalRecord>(),\n        }\n    }\n    fn estimated_size_of_key() -> usize {\n        KEY_SIZE // TODO: distinguish image layer and delta layer (count LSN in delta layer)\n    }\n    fn visit_delta_layer(&mut self, size: u64) {\n        self.delta_layer_visited.num += 1;\n        self.delta_layer_visited.size += size;\n    }\n    fn visit_image_layer(&mut self, size: u64) {\n        self.image_layer_visited.num += 1;\n        self.image_layer_visited.size += size;\n    }\n    fn on_unique_key_visited(&mut self) {\n        self.num_unique_keys_visited += 1;\n    }\n    fn visit_wal_key(&mut self, val: &Value) {\n        self.wal_keys_visited.num += 1;\n        self.wal_keys_visited.size +=\n            Self::estimated_size_of_value(val) as u64 + Self::estimated_size_of_key() as u64;\n    }\n    fn visit_image_key(&mut self, val: &Value) {\n        self.image_keys_visited.num += 1;\n        self.image_keys_visited.size +=\n            Self::estimated_size_of_value(val) as u64 + Self::estimated_size_of_key() as u64;\n    }\n    fn produce_key(&mut self, val: &Value) {\n        match val {\n            Value::Image(img) => self.produce_image_key(img),\n            Value::WalRecord(_) => self.produce_wal_key(val),\n        }\n    }\n    fn produce_wal_key(&mut self, val: &Value) {\n        self.wal_produced.num += 1;\n        self.wal_produced.size +=\n            Self::estimated_size_of_value(val) as u64 + Self::estimated_size_of_key() as u64;\n    }\n    fn produce_image_key(&mut self, val: &Bytes) {\n        self.image_produced.num += 1;\n        self.image_produced.size += val.len() as u64 + Self::estimated_size_of_key() as u64;\n    }\n    fn discard_delta_layer(&mut self, original_size: u64) {\n        self.delta_layer_discarded.num += 1;\n        self.delta_layer_discarded.size += original_size;\n    }\n    fn discard_image_layer(&mut self, original_size: u64) {\n        self.image_layer_discarded.num += 1;\n        self.image_layer_discarded.size += original_size;\n    }\n    fn produce_delta_layer(&mut self, size: u64) {\n        self.delta_layer_produced.num += 1;\n        self.delta_layer_produced.size += size;\n    }\n    fn produce_image_layer(&mut self, size: u64) {\n        self.image_layer_produced.num += 1;\n        self.image_layer_produced.size += size;\n    }\n    fn finalize(&mut self) {\n        let original_key_value_size = self.image_keys_visited.size + self.wal_keys_visited.size;\n        let produced_key_value_size = self.image_produced.size + self.wal_produced.size;\n        self.uncompressed_retention_ratio =\n            produced_key_value_size as f64 / (original_key_value_size as f64 + 1.0); // avoid div by 0\n        let original_physical_size = self.image_layer_visited.size + self.delta_layer_visited.size;\n        let produced_physical_size = self.image_layer_produced.size\n            + self.delta_layer_produced.size\n            + self.image_layer_discarded.size\n            + self.delta_layer_discarded.size; // Also include the discarded layers to make the ratio accurate\n        self.compressed_retention_ratio =\n            produced_physical_size as f64 / (original_physical_size as f64 + 1.0); // avoid div by 0\n    }\n}\n\n#[derive(Default, Debug, Clone, Copy, PartialEq, Eq)]\npub enum CompactionOutcome {\n    #[default]\n    /// No layers need to be compacted after this round. Compaction doesn't need\n    /// to be immediately scheduled.\n    Done,\n    /// Still has pending layers to be compacted after this round. Ideally, the scheduler\n    /// should immediately schedule another compaction.\n    Pending,\n    /// A timeline needs L0 compaction. Yield and schedule an immediate L0 compaction pass (only\n    /// guaranteed when `compaction_l0_first` is enabled).\n    YieldForL0,\n    /// Compaction was skipped, because the timeline is ineligible for compaction.\n    Skipped,\n}\n\nimpl Timeline {\n    /// TODO: cancellation\n    ///\n    /// Returns whether the compaction has pending tasks.\n    pub(crate) async fn compact_legacy(\n        self: &Arc<Self>,\n        cancel: &CancellationToken,\n        options: CompactOptions,\n        ctx: &RequestContext,\n    ) -> Result<CompactionOutcome, CompactionError> {\n        if options\n            .flags\n            .contains(CompactFlags::EnhancedGcBottomMostCompaction)\n        {\n            self.compact_with_gc(cancel, options, ctx).await?;\n            return Ok(CompactionOutcome::Done);\n        }\n\n        if options.flags.contains(CompactFlags::DryRun) {\n            return Err(CompactionError::Other(anyhow!(\n                \"dry-run mode is not supported for legacy compaction for now\"\n            )));\n        }\n\n        if options.compact_key_range.is_some() || options.compact_lsn_range.is_some() {\n            // maybe useful in the future? could implement this at some point\n            return Err(CompactionError::Other(anyhow!(\n                \"compaction range is not supported for legacy compaction for now\"\n            )));\n        }\n\n        // High level strategy for compaction / image creation:\n        //\n        // 1. First, do a L0 compaction to ensure we move the L0\n        // layers into the historic layer map get flat levels of\n        // layers. If we did not compact all L0 layers, we will\n        // prioritize compacting the timeline again and not do\n        // any of the compactions below.\n        //\n        // 2. Then, calculate the desired \"partitioning\" of the\n        // currently in-use key space. The goal is to partition the\n        // key space into roughly fixed-size chunks, but also take into\n        // account any existing image layers, and try to align the\n        // chunk boundaries with the existing image layers to avoid\n        // too much churn. Also try to align chunk boundaries with\n        // relation boundaries.  In principle, we don't know about\n        // relation boundaries here, we just deal with key-value\n        // pairs, and the code in pgdatadir_mapping.rs knows how to\n        // map relations into key-value pairs. But in practice we know\n        // that 'field6' is the block number, and the fields 1-5\n        // identify a relation. This is just an optimization,\n        // though.\n        //\n        // 3. Once we know the partitioning, for each partition,\n        // decide if it's time to create a new image layer. The\n        // criteria is: there has been too much \"churn\" since the last\n        // image layer? The \"churn\" is fuzzy concept, it's a\n        // combination of too many delta files, or too much WAL in\n        // total in the delta file. Or perhaps: if creating an image\n        // file would allow to delete some older files.\n        //\n        // 4. In the end, if the tenant gets auto-sharded, we will run\n        // a shard-ancestor compaction.\n\n        // Is the timeline being deleted?\n        if self.is_stopping() {\n            trace!(\"Dropping out of compaction on timeline shutdown\");\n            return Err(CompactionError::new_cancelled());\n        }\n\n        let target_file_size = self.get_checkpoint_distance();\n\n        // Define partitioning schema if needed\n\n        // HADRON\n        let force_image_creation_lsn = self.get_force_image_creation_lsn();\n\n        // 1. L0 Compact\n        let l0_outcome = {\n            let timer = self.metrics.compact_time_histo.start_timer();\n            let l0_outcome = self\n                .compact_level0(\n                    target_file_size,\n                    options.flags.contains(CompactFlags::ForceL0Compaction),\n                    force_image_creation_lsn,\n                    ctx,\n                )\n                .await?;\n            timer.stop_and_record();\n            l0_outcome\n        };\n\n        if options.flags.contains(CompactFlags::OnlyL0Compaction) {\n            return Ok(l0_outcome);\n        }\n\n        // Yield if we have pending L0 compaction. The scheduler will do another pass.\n        if (l0_outcome == CompactionOutcome::Pending || l0_outcome == CompactionOutcome::YieldForL0)\n            && options.flags.contains(CompactFlags::YieldForL0)\n        {\n            info!(\"image/ancestor compaction yielding for L0 compaction\");\n            return Ok(CompactionOutcome::YieldForL0);\n        }\n\n        let gc_cutoff = *self.applied_gc_cutoff_lsn.read();\n        let l0_l1_boundary_lsn = {\n            // We do the repartition on the L0-L1 boundary. All data below the boundary\n            // are compacted by L0 with low read amplification, thus making the `repartition`\n            // function run fast.\n            let guard = self\n                .layers\n                .read(LayerManagerLockHolder::GetLayerMapInfo)\n                .await;\n            guard\n                .all_persistent_layers()\n                .iter()\n                .map(|x| {\n                    // Use the end LSN of delta layers OR the start LSN of image layers.\n                    if x.is_delta {\n                        x.lsn_range.end\n                    } else {\n                        x.lsn_range.start\n                    }\n                })\n                .max()\n        };\n\n        let (partition_mode, partition_lsn) = {\n            let last_repartition_lsn = self.partitioning.read().1;\n            let lsn = match l0_l1_boundary_lsn {\n                Some(boundary) => gc_cutoff\n                    .max(boundary)\n                    .max(last_repartition_lsn)\n                    .max(self.initdb_lsn)\n                    .max(self.ancestor_lsn),\n                None => self.get_last_record_lsn(),\n            };\n            if lsn <= self.initdb_lsn || lsn <= self.ancestor_lsn {\n                // Do not attempt to create image layers below the initdb or ancestor LSN -- no data below it\n                (\"l0_l1_boundary\", self.get_last_record_lsn())\n            } else {\n                (\"l0_l1_boundary\", lsn)\n            }\n        };\n\n        // 2. Repartition and create image layers if necessary\n        match self\n            .repartition(\n                partition_lsn,\n                self.get_compaction_target_size(),\n                options.flags,\n                ctx,\n            )\n            .await\n        {\n            Ok(((dense_partitioning, sparse_partitioning), lsn)) if lsn >= gc_cutoff => {\n                // Disables access_stats updates, so that the files we read remain candidates for eviction after we're done with them\n                let image_ctx = RequestContextBuilder::from(ctx)\n                    .access_stats_behavior(AccessStatsBehavior::Skip)\n                    .attached_child();\n\n                let mut partitioning = dense_partitioning;\n                partitioning\n                    .parts\n                    .extend(sparse_partitioning.into_dense().parts);\n\n                // 3. Create new image layers for partitions that have been modified \"enough\".\n                let mode = if options\n                    .flags\n                    .contains(CompactFlags::ForceImageLayerCreation)\n                {\n                    ImageLayerCreationMode::Force\n                } else {\n                    ImageLayerCreationMode::Try\n                };\n                let (image_layers, outcome) = self\n                    .create_image_layers(\n                        &partitioning,\n                        lsn,\n                        force_image_creation_lsn,\n                        mode,\n                        &image_ctx,\n                        self.last_image_layer_creation_status\n                            .load()\n                            .as_ref()\n                            .clone(),\n                        options.flags.contains(CompactFlags::YieldForL0),\n                    )\n                    .instrument(info_span!(\"create_image_layers\", mode = %mode, partition_mode = %partition_mode, lsn = %lsn))\n                    .await\n                    .inspect_err(|err| {\n                        if let CreateImageLayersError::GetVectoredError(\n                            GetVectoredError::MissingKey(_),\n                        ) = err\n                        {\n                            critical_timeline!(\n                                self.tenant_shard_id,\n                                self.timeline_id,\n                                Some(&self.corruption_detected),\n                                \"missing key during compaction: {err:?}\"\n                            );\n                        }\n                    })?;\n\n                self.last_image_layer_creation_status\n                    .store(Arc::new(outcome.clone()));\n\n                self.upload_new_image_layers(image_layers)?;\n                if let LastImageLayerCreationStatus::Incomplete { .. } = outcome {\n                    // Yield and do not do any other kind of compaction.\n                    info!(\n                        \"skipping shard ancestor compaction due to pending image layer generation tasks (preempted by L0 compaction).\"\n                    );\n                    return Ok(CompactionOutcome::YieldForL0);\n                }\n            }\n\n            Ok(_) => {\n                // This happens very frequently so we don't want to log it.\n                debug!(\"skipping repartitioning due to image compaction LSN being below GC cutoff\");\n            }\n\n            // Suppress errors when cancelled.\n            //\n            // Log other errors but continue. Failure to repartition is normal, if the timeline was just created\n            // as an empty timeline. Also in unit tests, when we use the timeline as a simple\n            // key-value store, ignoring the datadir layout. Log the error but continue.\n            //\n            // TODO:\n            // 1. shouldn't we return early here if we observe cancellation\n            // 2. Experiment: can we stop checking self.cancel here?\n            Err(_) if self.cancel.is_cancelled() => {} // TODO: try how we fare removing this branch\n            Err(err) if err.is_cancel() => {}\n            Err(RepartitionError::CollectKeyspace(\n                e @ CollectKeySpaceError::Decode(_)\n                | e @ CollectKeySpaceError::PageRead(\n                    PageReconstructError::MissingKey(_) | PageReconstructError::WalRedo(_),\n                ),\n            )) => {\n                // Alert on critical errors that indicate data corruption.\n                critical_timeline!(\n                    self.tenant_shard_id,\n                    self.timeline_id,\n                    Some(&self.corruption_detected),\n                    \"could not compact, repartitioning keyspace failed: {e:?}\"\n                );\n            }\n            Err(e) => error!(\n                \"could not compact, repartitioning keyspace failed: {:?}\",\n                e.into_anyhow()\n            ),\n        };\n\n        let partition_count = self.partitioning.read().0.0.parts.len();\n\n        // 4. Shard ancestor compaction\n        if self.get_compaction_shard_ancestor() && self.shard_identity.count >= ShardCount::new(2) {\n            // Limit the number of layer rewrites to the number of partitions: this means its\n            // runtime should be comparable to a full round of image layer creations, rather than\n            // being potentially much longer.\n            let rewrite_max = partition_count;\n\n            let outcome = self\n                .compact_shard_ancestors(\n                    rewrite_max,\n                    options.flags.contains(CompactFlags::YieldForL0),\n                    ctx,\n                )\n                .await?;\n            match outcome {\n                CompactionOutcome::Pending | CompactionOutcome::YieldForL0 => return Ok(outcome),\n                CompactionOutcome::Done | CompactionOutcome::Skipped => {}\n            }\n        }\n\n        Ok(CompactionOutcome::Done)\n    }\n\n    /* BEGIN_HADRON */\n    // Get the force image creation LSN based on gc_cutoff_lsn.\n    // Note that this is an estimation and the workload rate may suddenly change. When that happens,\n    // the force image creation may be too early or too late, but eventually it should be able to catch up.\n    pub(crate) fn get_force_image_creation_lsn(self: &Arc<Self>) -> Option<Lsn> {\n        let image_creation_period = self.get_image_layer_force_creation_period()?;\n        let current_lsn = self.get_last_record_lsn();\n        let pitr_lsn = self.gc_info.read().unwrap().cutoffs.time?;\n        let pitr_interval = self.get_pitr_interval();\n        if pitr_lsn == Lsn::INVALID || pitr_interval.is_zero() {\n            tracing::warn!(\n                \"pitr LSN/interval not found, skipping force image creation LSN calculation\"\n            );\n            return None;\n        }\n\n        let delta_lsn = current_lsn.checked_sub(pitr_lsn).unwrap().0\n            * image_creation_period.as_secs()\n            / pitr_interval.as_secs();\n        let force_image_creation_lsn = current_lsn.checked_sub(delta_lsn).unwrap_or(Lsn(0));\n\n        tracing::info!(\n            \"Tenant shard {} computed force_image_creation_lsn: {}. Current lsn: {}, image_layer_force_creation_period: {:?}, GC cutoff: {}, PITR interval: {:?}\",\n            self.tenant_shard_id,\n            force_image_creation_lsn,\n            current_lsn,\n            image_creation_period,\n            pitr_lsn,\n            pitr_interval\n        );\n\n        Some(force_image_creation_lsn)\n    }\n    /* END_HADRON */\n\n    /// Check for layers that are elegible to be rewritten:\n    /// - Shard splitting: After a shard split, ancestor layers beyond pitr_interval, so that\n    ///   we don't indefinitely retain keys in this shard that aren't needed.\n    /// - For future use: layers beyond pitr_interval that are in formats we would\n    ///   rather not maintain compatibility with indefinitely.\n    ///\n    /// Note: this phase may read and write many gigabytes of data: use rewrite_max to bound\n    /// how much work it will try to do in each compaction pass.\n    async fn compact_shard_ancestors(\n        self: &Arc<Self>,\n        rewrite_max: usize,\n        yield_for_l0: bool,\n        ctx: &RequestContext,\n    ) -> Result<CompactionOutcome, CompactionError> {\n        let mut outcome = CompactionOutcome::Done;\n        let mut drop_layers = Vec::new();\n        let mut layers_to_rewrite: Vec<Layer> = Vec::new();\n\n        // We will use the Lsn cutoff of the last GC as a threshold for rewriting layers: if a\n        // layer is behind this Lsn, it indicates that the layer is being retained beyond the\n        // pitr_interval, for example because a branchpoint references it.\n        //\n        // Holding this read guard also blocks [`Self::gc_timeline`] from entering while we\n        // are rewriting layers.\n        let latest_gc_cutoff = self.get_applied_gc_cutoff_lsn();\n        let pitr_cutoff = self.gc_info.read().unwrap().cutoffs.time;\n\n        let layers = self.layers.read(LayerManagerLockHolder::Compaction).await;\n        let layers_iter = layers.layer_map()?.iter_historic_layers();\n        let (layers_total, mut layers_checked) = (layers_iter.len(), 0);\n        for layer_desc in layers_iter {\n            layers_checked += 1;\n            let layer = layers.get_from_desc(&layer_desc);\n            if layer.metadata().shard.shard_count == self.shard_identity.count {\n                // This layer does not belong to a historic ancestor, no need to re-image it.\n                continue;\n            }\n\n            // This layer was created on an ancestor shard: check if it contains any data for this shard.\n            let sharded_range = ShardedRange::new(layer_desc.get_key_range(), &self.shard_identity);\n            let layer_local_page_count = sharded_range.page_count();\n            let layer_raw_page_count = ShardedRange::raw_size(&layer_desc.get_key_range());\n            if layer_local_page_count == 0 {\n                // This ancestral layer only covers keys that belong to other shards.\n                // We include the full metadata in the log: if we had some critical bug that caused\n                // us to incorrectly drop layers, this would simplify manually debugging + reinstating those layers.\n                debug!(%layer, old_metadata=?layer.metadata(),\n                    \"dropping layer after shard split, contains no keys for this shard\",\n                );\n\n                if cfg!(debug_assertions) {\n                    // Expensive, exhaustive check of keys in this layer: this guards against ShardedRange's calculations being\n                    // wrong.  If ShardedRange claims the local page count is zero, then no keys in this layer\n                    // should be !is_key_disposable()\n                    // TODO: exclude sparse keyspace from this check, otherwise it will infinitely loop.\n                    let range = layer_desc.get_key_range();\n                    let mut key = range.start;\n                    while key < range.end {\n                        debug_assert!(self.shard_identity.is_key_disposable(&key));\n                        key = key.next();\n                    }\n                }\n\n                drop_layers.push(layer);\n                continue;\n            } else if layer_local_page_count != u32::MAX\n                && layer_local_page_count == layer_raw_page_count\n            {\n                debug!(%layer,\n                    \"layer is entirely shard local ({} keys), no need to filter it\",\n                    layer_local_page_count\n                );\n                continue;\n            }\n\n            // Only rewrite a layer if we can reclaim significant space.\n            if layer_local_page_count != u32::MAX\n                && layer_local_page_count as f64 / layer_raw_page_count as f64\n                    <= ANCESTOR_COMPACTION_REWRITE_THRESHOLD\n            {\n                debug!(%layer,\n                    \"layer has a large share of local pages \\\n                        ({layer_local_page_count}/{layer_raw_page_count} > \\\n                        {ANCESTOR_COMPACTION_REWRITE_THRESHOLD}), not rewriting\",\n                );\n            }\n\n            // Don't bother re-writing a layer if it is within the PITR window: it will age-out eventually\n            // without incurring the I/O cost of a rewrite.\n            if layer_desc.get_lsn_range().end >= *latest_gc_cutoff {\n                debug!(%layer, \"Skipping rewrite of layer still in GC window ({} >= {})\",\n                    layer_desc.get_lsn_range().end, *latest_gc_cutoff);\n                continue;\n            }\n\n            // We do not yet implement rewrite of delta layers.\n            if layer_desc.is_delta() {\n                debug!(%layer, \"Skipping rewrite of delta layer\");\n                continue;\n            }\n\n            // We don't bother rewriting layers that aren't visible, since these won't be needed by\n            // reads and will likely be garbage collected soon.\n            if layer.visibility() != LayerVisibilityHint::Visible {\n                debug!(%layer, \"Skipping rewrite of invisible layer\");\n                continue;\n            }\n\n            // Only rewrite layers if their generations differ.  This guarantees:\n            //  - that local rewrite is safe, as local layer paths will differ between existing layer and rewritten one\n            //  - that the layer is persistent in remote storage, as we only see old-generation'd layer via loading from remote storage\n            if layer.metadata().generation == self.generation {\n                debug!(%layer, \"Skipping rewrite, is not from old generation\");\n                continue;\n            }\n\n            if layers_to_rewrite.len() >= rewrite_max {\n                debug!(%layer, \"Will rewrite layer on a future compaction, already rewrote {}\",\n                    layers_to_rewrite.len()\n                );\n                outcome = CompactionOutcome::Pending;\n                break;\n            }\n\n            // Fall through: all our conditions for doing a rewrite passed.\n            layers_to_rewrite.push(layer);\n        }\n\n        // Drop read lock on layer map before we start doing time-consuming I/O.\n        drop(layers);\n\n        // Drop out early if there's nothing to do.\n        if layers_to_rewrite.is_empty() && drop_layers.is_empty() {\n            return Ok(CompactionOutcome::Done);\n        }\n\n        info!(\n            \"starting shard ancestor compaction, rewriting {} layers and dropping {} layers, \\\n                checked {layers_checked}/{layers_total} layers \\\n                (latest_gc_cutoff={} pitr_cutoff={:?})\",\n            layers_to_rewrite.len(),\n            drop_layers.len(),\n            *latest_gc_cutoff,\n            pitr_cutoff,\n        );\n        let started = Instant::now();\n\n        let mut replace_image_layers = Vec::new();\n        let total = layers_to_rewrite.len();\n\n        for (i, layer) in layers_to_rewrite.into_iter().enumerate() {\n            if self.cancel.is_cancelled() {\n                return Err(CompactionError::new_cancelled());\n            }\n\n            info!(layer=%layer, \"rewriting layer after shard split: {}/{}\", i, total);\n\n            let mut image_layer_writer = ImageLayerWriter::new(\n                self.conf,\n                self.timeline_id,\n                self.tenant_shard_id,\n                &layer.layer_desc().key_range,\n                layer.layer_desc().image_layer_lsn(),\n                &self.gate,\n                self.cancel.clone(),\n                ctx,\n            )\n            .await\n            .map_err(CompactionError::Other)?;\n\n            // Safety of layer rewrites:\n            // - We are writing to a different local file path than we are reading from, so the old Layer\n            //   cannot interfere with the new one.\n            // - In the page cache, contents for a particular VirtualFile are stored with a file_id that\n            //   is different for two layers with the same name (in `ImageLayerInner::new` we always\n            //   acquire a fresh id from [`crate::page_cache::next_file_id`].  So readers do not risk\n            //   reading the index from one layer file, and then data blocks from the rewritten layer file.\n            // - Any readers that have a reference to the old layer will keep it alive until they are done\n            //   with it. If they are trying to promote from remote storage, that will fail, but this is the same\n            //   as for compaction generally: compaction is allowed to delete layers that readers might be trying to use.\n            // - We do not run concurrently with other kinds of compaction, so the only layer map writes we race with are:\n            //    - GC, which at worst witnesses us \"undelete\" a layer that they just deleted.\n            //    - ingestion, which only inserts layers, therefore cannot collide with us.\n            let resident = layer.download_and_keep_resident(ctx).await?;\n\n            let keys_written = resident\n                .filter(&self.shard_identity, &mut image_layer_writer, ctx)\n                .await?;\n\n            if keys_written > 0 {\n                let (desc, path) = image_layer_writer\n                    .finish(ctx)\n                    .await\n                    .map_err(CompactionError::Other)?;\n                let new_layer = Layer::finish_creating(self.conf, self, desc, &path)\n                    .map_err(CompactionError::Other)?;\n                info!(layer=%new_layer, \"rewrote layer, {} -> {} bytes\",\n                    layer.metadata().file_size,\n                    new_layer.metadata().file_size);\n\n                replace_image_layers.push((layer, new_layer));\n            } else {\n                // Drop the old layer.  Usually for this case we would already have noticed that\n                // the layer has no data for us with the ShardedRange check above, but\n                drop_layers.push(layer);\n            }\n\n            // Yield for L0 compaction if necessary, but make sure we update the layer map below\n            // with the work we've already done.\n            if yield_for_l0\n                && self\n                    .l0_compaction_trigger\n                    .notified()\n                    .now_or_never()\n                    .is_some()\n            {\n                info!(\"shard ancestor compaction yielding for L0 compaction\");\n                outcome = CompactionOutcome::YieldForL0;\n                break;\n            }\n        }\n\n        for layer in &drop_layers {\n            info!(%layer, old_metadata=?layer.metadata(),\n                \"dropping layer after shard split (no keys for this shard)\",\n            );\n        }\n\n        // At this point, we have replaced local layer files with their rewritten form, but not yet uploaded\n        // metadata to reflect that. If we restart here, the replaced layer files will look invalid (size mismatch\n        // to remote index) and be removed. This is inefficient but safe.\n        fail::fail_point!(\"compact-shard-ancestors-localonly\");\n\n        // Update the LayerMap so that readers will use the new layers, and enqueue it for writing to remote storage\n        self.rewrite_layers(replace_image_layers, drop_layers)\n            .await?;\n\n        fail::fail_point!(\"compact-shard-ancestors-enqueued\");\n\n        // We wait for all uploads to complete before finishing this compaction stage.  This is not\n        // necessary for correctness, but it simplifies testing, and avoids proceeding with another\n        // Timeline's compaction while this timeline's uploads may be generating lots of disk I/O\n        // load.\n        if outcome != CompactionOutcome::YieldForL0 {\n            info!(\"shard ancestor compaction waiting for uploads\");\n            tokio::select! {\n                result = self.remote_client.wait_completion() => match result {\n                    Ok(()) => {},\n                    Err(WaitCompletionError::NotInitialized(ni)) => return Err(CompactionError::from(ni)),\n                    Err(WaitCompletionError::UploadQueueShutDownOrStopped) => {\n                        return Err(CompactionError::new_cancelled());\n                    }\n                },\n                // Don't wait if there's L0 compaction to do. We don't need to update the outcome\n                // here, because we've already done the actual work.\n                _ = self.l0_compaction_trigger.notified(), if yield_for_l0 => {},\n            }\n        }\n\n        info!(\n            \"shard ancestor compaction done in {:.3}s{}\",\n            started.elapsed().as_secs_f64(),\n            match outcome {\n                CompactionOutcome::Pending =>\n                    format!(\", with pending work (rewrite_max={rewrite_max})\"),\n                CompactionOutcome::YieldForL0 => String::from(\", yielding for L0 compaction\"),\n                CompactionOutcome::Skipped | CompactionOutcome::Done => String::new(),\n            }\n        );\n\n        fail::fail_point!(\"compact-shard-ancestors-persistent\");\n\n        Ok(outcome)\n    }\n\n    /// Update the LayerVisibilityHint of layers covered by image layers, based on whether there is\n    /// an image layer between them and the most recent readable LSN (branch point or tip of timeline).  The\n    /// purpose of the visibility hint is to record which layers need to be available to service reads.\n    ///\n    /// The result may be used as an input to eviction and secondary downloads to de-prioritize layers\n    /// that we know won't be needed for reads.\n    pub(crate) async fn update_layer_visibility(\n        &self,\n    ) -> Result<(), super::layer_manager::Shutdown> {\n        let head_lsn = self.get_last_record_lsn();\n\n        // We will sweep through layers in reverse-LSN order.  We only do historic layers.  L0 deltas\n        // are implicitly left visible, because LayerVisibilityHint's default is Visible, and we never modify it here.\n        // Note that L0 deltas _can_ be covered by image layers, but we consider them 'visible' because we anticipate that\n        // they will be subject to L0->L1 compaction in the near future.\n        let layer_manager = self\n            .layers\n            .read(LayerManagerLockHolder::GetLayerMapInfo)\n            .await;\n        let layer_map = layer_manager.layer_map()?;\n\n        let readable_points = {\n            let children = self.gc_info.read().unwrap().retain_lsns.clone();\n\n            let mut readable_points = Vec::with_capacity(children.len() + 1);\n            for (child_lsn, _child_timeline_id, is_offloaded) in &children {\n                if *is_offloaded == MaybeOffloaded::Yes {\n                    continue;\n                }\n                readable_points.push(*child_lsn);\n            }\n            readable_points.push(head_lsn);\n            readable_points\n        };\n\n        let (layer_visibility, covered) = layer_map.get_visibility(readable_points);\n        for (layer_desc, visibility) in layer_visibility {\n            // FIXME: a more efficiency bulk zip() through the layers rather than NlogN getting each one\n            let layer = layer_manager.get_from_desc(&layer_desc);\n            layer.set_visibility(visibility);\n        }\n\n        // TODO: publish our covered KeySpace to our parent, so that when they update their visibility, they can\n        // avoid assuming that everything at a branch point is visible.\n        drop(covered);\n        Ok(())\n    }\n\n    /// Collect a bunch of Level 0 layer files, and compact and reshuffle them as\n    /// as Level 1 files. Returns whether the L0 layers are fully compacted.\n    async fn compact_level0(\n        self: &Arc<Self>,\n        target_file_size: u64,\n        force_compaction_ignore_threshold: bool,\n        force_compaction_lsn: Option<Lsn>,\n        ctx: &RequestContext,\n    ) -> Result<CompactionOutcome, CompactionError> {\n        let CompactLevel0Phase1Result {\n            new_layers,\n            deltas_to_compact,\n            outcome,\n        } = {\n            let phase1_span = info_span!(\"compact_level0_phase1\");\n            let ctx = ctx.attached_child();\n            let stats = CompactLevel0Phase1StatsBuilder {\n                version: Some(2),\n                tenant_id: Some(self.tenant_shard_id),\n                timeline_id: Some(self.timeline_id),\n                ..Default::default()\n            };\n\n            self.compact_level0_phase1(\n                stats,\n                target_file_size,\n                force_compaction_ignore_threshold,\n                force_compaction_lsn,\n                &ctx,\n            )\n            .instrument(phase1_span)\n            .await?\n        };\n\n        if new_layers.is_empty() && deltas_to_compact.is_empty() {\n            // nothing to do\n            return Ok(CompactionOutcome::Done);\n        }\n\n        self.finish_compact_batch(&new_layers, &Vec::new(), &deltas_to_compact)\n            .await?;\n        Ok(outcome)\n    }\n\n    /// Level0 files first phase of compaction, explained in the [`Self::compact_legacy`] comment.\n    async fn compact_level0_phase1(\n        self: &Arc<Self>,\n        mut stats: CompactLevel0Phase1StatsBuilder,\n        target_file_size: u64,\n        force_compaction_ignore_threshold: bool,\n        force_compaction_lsn: Option<Lsn>,\n        ctx: &RequestContext,\n    ) -> Result<CompactLevel0Phase1Result, CompactionError> {\n        let begin = tokio::time::Instant::now();\n        let guard = self.layers.read(LayerManagerLockHolder::Compaction).await;\n        let now = tokio::time::Instant::now();\n        stats.read_lock_acquisition_micros =\n            DurationRecorder::Recorded(RecordedDuration(now - begin), now);\n\n        let layers = guard.layer_map()?;\n        let level0_deltas = layers.level0_deltas();\n        stats.level0_deltas_count = Some(level0_deltas.len());\n\n        // Only compact if enough layers have accumulated.\n        let threshold = self.get_compaction_threshold();\n        if level0_deltas.is_empty() || level0_deltas.len() < threshold {\n            if force_compaction_ignore_threshold {\n                if !level0_deltas.is_empty() {\n                    info!(\n                        level0_deltas = level0_deltas.len(),\n                        threshold, \"too few deltas to compact, but forcing compaction\"\n                    );\n                } else {\n                    info!(\n                        level0_deltas = level0_deltas.len(),\n                        threshold, \"too few deltas to compact, cannot force compaction\"\n                    );\n                    return Ok(CompactLevel0Phase1Result::default());\n                }\n            } else {\n                // HADRON\n                let min_lsn = level0_deltas\n                    .iter()\n                    .map(|a| a.get_lsn_range().start)\n                    .reduce(min);\n                if force_compaction_lsn.is_some()\n                    && min_lsn.is_some()\n                    && min_lsn.unwrap() < force_compaction_lsn.unwrap()\n                {\n                    info!(\n                        \"forcing L0 compaction of {} L0 deltas. Min lsn: {}, force compaction lsn: {}\",\n                        level0_deltas.len(),\n                        min_lsn.unwrap(),\n                        force_compaction_lsn.unwrap()\n                    );\n                } else {\n                    debug!(\n                        level0_deltas = level0_deltas.len(),\n                        threshold, \"too few deltas to compact\"\n                    );\n                    return Ok(CompactLevel0Phase1Result::default());\n                }\n            }\n        }\n\n        let mut level0_deltas = level0_deltas\n            .iter()\n            .map(|x| guard.get_from_desc(x))\n            .collect::<Vec<_>>();\n\n        drop_layer_manager_rlock(guard);\n\n        // The is the last LSN that we have seen for L0 compaction in the timeline. This LSN might be updated\n        // by the time we finish the compaction. So we need to get it here.\n        let l0_last_record_lsn = self.get_last_record_lsn();\n\n        // Gather the files to compact in this iteration.\n        //\n        // Start with the oldest Level 0 delta file, and collect any other\n        // level 0 files that form a contiguous sequence, such that the end\n        // LSN of previous file matches the start LSN of the next file.\n        //\n        // Note that if the files don't form such a sequence, we might\n        // \"compact\" just a single file. That's a bit pointless, but it allows\n        // us to get rid of the level 0 file, and compact the other files on\n        // the next iteration. This could probably made smarter, but such\n        // \"gaps\" in the sequence of level 0 files should only happen in case\n        // of a crash, partial download from cloud storage, or something like\n        // that, so it's not a big deal in practice.\n        level0_deltas.sort_by_key(|l| l.layer_desc().lsn_range.start);\n        let mut level0_deltas_iter = level0_deltas.iter();\n\n        let first_level0_delta = level0_deltas_iter.next().unwrap();\n        let mut prev_lsn_end = first_level0_delta.layer_desc().lsn_range.end;\n        let mut deltas_to_compact = Vec::with_capacity(level0_deltas.len());\n\n        // Accumulate the size of layers in `deltas_to_compact`\n        let mut deltas_to_compact_bytes = 0;\n\n        // Under normal circumstances, we will accumulate up to compaction_upper_limit L0s of size\n        // checkpoint_distance each.  To avoid edge cases using extra system resources, bound our\n        // work in this function to only operate on this much delta data at once.\n        //\n        // In general, compaction_threshold should be <= compaction_upper_limit, but in case that\n        // the constraint is not respected, we use the larger of the two.\n        let delta_size_limit = std::cmp::max(\n            self.get_compaction_upper_limit(),\n            self.get_compaction_threshold(),\n        ) as u64\n            * std::cmp::max(self.get_checkpoint_distance(), DEFAULT_CHECKPOINT_DISTANCE);\n\n        let mut fully_compacted = true;\n\n        deltas_to_compact.push(first_level0_delta.download_and_keep_resident(ctx).await?);\n        for l in level0_deltas_iter {\n            let lsn_range = &l.layer_desc().lsn_range;\n\n            if lsn_range.start != prev_lsn_end {\n                break;\n            }\n            deltas_to_compact.push(l.download_and_keep_resident(ctx).await?);\n            deltas_to_compact_bytes += l.metadata().file_size;\n            prev_lsn_end = lsn_range.end;\n\n            if deltas_to_compact_bytes >= delta_size_limit {\n                info!(\n                    l0_deltas_selected = deltas_to_compact.len(),\n                    l0_deltas_total = level0_deltas.len(),\n                    \"L0 compaction picker hit max delta layer size limit: {}\",\n                    delta_size_limit\n                );\n                fully_compacted = false;\n\n                // Proceed with compaction, but only a subset of L0s\n                break;\n            }\n        }\n        let lsn_range = Range {\n            start: deltas_to_compact\n                .first()\n                .unwrap()\n                .layer_desc()\n                .lsn_range\n                .start,\n            end: deltas_to_compact.last().unwrap().layer_desc().lsn_range.end,\n        };\n\n        info!(\n            \"Starting Level0 compaction in LSN range {}-{} for {} layers ({} deltas in total)\",\n            lsn_range.start,\n            lsn_range.end,\n            deltas_to_compact.len(),\n            level0_deltas.len()\n        );\n\n        for l in deltas_to_compact.iter() {\n            info!(\"compact includes {l}\");\n        }\n\n        // We don't need the original list of layers anymore. Drop it so that\n        // we don't accidentally use it later in the function.\n        drop(level0_deltas);\n\n        stats.compaction_prerequisites_micros = stats.read_lock_acquisition_micros.till_now();\n\n        // TODO: replace with streaming k-merge\n        let all_keys = {\n            let mut all_keys = Vec::new();\n            for l in deltas_to_compact.iter() {\n                if self.cancel.is_cancelled() {\n                    return Err(CompactionError::new_cancelled());\n                }\n                let delta = l.get_as_delta(ctx).await.map_err(CompactionError::Other)?;\n                let keys = delta\n                    .index_entries(ctx)\n                    .await\n                    .map_err(CompactionError::Other)?;\n                all_keys.extend(keys);\n            }\n            // The current stdlib sorting implementation is designed in a way where it is\n            // particularly fast where the slice is made up of sorted sub-ranges.\n            all_keys.sort_by_key(|DeltaEntry { key, lsn, .. }| (*key, *lsn));\n            all_keys\n        };\n\n        stats.read_lock_held_key_sort_micros = stats.compaction_prerequisites_micros.till_now();\n\n        // Determine N largest holes where N is number of compacted layers. The vec is sorted by key range start.\n        //\n        // A hole is a key range for which this compaction doesn't have any WAL records.\n        // Our goal in this compaction iteration is to avoid creating L1s that, in terms of their key range,\n        // cover the hole, but actually don't contain any WAL records for that key range.\n        // The reason is that the mere stack of L1s (`count_deltas`) triggers image layer creation (`create_image_layers`).\n        // That image layer creation would be useless for a hole range covered by L1s that don't contain any WAL records.\n        //\n        // The algorithm chooses holes as follows.\n        // - Slide a 2-window over the keys in key orde to get the hole range (=distance between two keys).\n        // - Filter: min threshold on range length\n        // - Rank: by coverage size (=number of image layers required to reconstruct each key in the range for which we have any data)\n        //\n        // For more details, intuition, and some ASCII art see https://github.com/neondatabase/neon/pull/3597#discussion_r1112704451\n        #[derive(PartialEq, Eq)]\n        struct Hole {\n            key_range: Range<Key>,\n            coverage_size: usize,\n        }\n        let holes: Vec<Hole> = {\n            use std::cmp::Ordering;\n            impl Ord for Hole {\n                fn cmp(&self, other: &Self) -> Ordering {\n                    self.coverage_size.cmp(&other.coverage_size).reverse()\n                }\n            }\n            impl PartialOrd for Hole {\n                fn partial_cmp(&self, other: &Self) -> Option<Ordering> {\n                    Some(self.cmp(other))\n                }\n            }\n            let max_holes = deltas_to_compact.len();\n            let min_hole_range = (target_file_size / page_cache::PAGE_SZ as u64) as i128;\n            let min_hole_coverage_size = 3; // TODO: something more flexible?\n            // min-heap (reserve space for one more element added before eviction)\n            let mut heap: BinaryHeap<Hole> = BinaryHeap::with_capacity(max_holes + 1);\n            let mut prev: Option<Key> = None;\n\n            for &DeltaEntry { key: next_key, .. } in all_keys.iter() {\n                if let Some(prev_key) = prev {\n                    // just first fast filter, do not create hole entries for metadata keys. The last hole in the\n                    // compaction is the gap between data key and metadata keys.\n                    if next_key.to_i128() - prev_key.to_i128() >= min_hole_range\n                        && !Key::is_metadata_key(&prev_key)\n                    {\n                        let key_range = prev_key..next_key;\n                        // Measuring hole by just subtraction of i128 representation of key range boundaries\n                        // has not so much sense, because largest holes will corresponds field1/field2 changes.\n                        // But we are mostly interested to eliminate holes which cause generation of excessive image layers.\n                        // That is why it is better to measure size of hole as number of covering image layers.\n                        let coverage_size = {\n                            // TODO: optimize this with copy-on-write layer map.\n                            let guard = self.layers.read(LayerManagerLockHolder::Compaction).await;\n                            let layers = guard.layer_map()?;\n                            layers.image_coverage(&key_range, l0_last_record_lsn).len()\n                        };\n                        if coverage_size >= min_hole_coverage_size {\n                            heap.push(Hole {\n                                key_range,\n                                coverage_size,\n                            });\n                            if heap.len() > max_holes {\n                                heap.pop(); // remove smallest hole\n                            }\n                        }\n                    }\n                }\n                prev = Some(next_key.next());\n            }\n            let mut holes = heap.into_vec();\n            holes.sort_unstable_by_key(|hole| hole.key_range.start);\n            holes\n        };\n        stats.read_lock_held_compute_holes_micros = stats.read_lock_held_key_sort_micros.till_now();\n\n        if self.cancel.is_cancelled() {\n            return Err(CompactionError::new_cancelled());\n        }\n\n        stats.read_lock_drop_micros = stats.read_lock_held_compute_holes_micros.till_now();\n\n        // This iterator walks through all key-value pairs from all the layers\n        // we're compacting, in key, LSN order.\n        // If there's both a Value::Image and Value::WalRecord for the same (key,lsn),\n        // then the Value::Image is ordered before Value::WalRecord.\n        let mut all_values_iter = {\n            let mut deltas = Vec::with_capacity(deltas_to_compact.len());\n            for l in deltas_to_compact.iter() {\n                let l = l.get_as_delta(ctx).await.map_err(CompactionError::Other)?;\n                deltas.push(l);\n            }\n            MergeIterator::create_with_options(\n                &deltas,\n                &[],\n                ctx,\n                1024 * 8192, /* 8 MiB buffer per layer iterator */\n                1024,\n            )\n        };\n\n        // This iterator walks through all keys and is needed to calculate size used by each key\n        let mut all_keys_iter = all_keys\n            .iter()\n            .map(|DeltaEntry { key, lsn, size, .. }| (*key, *lsn, *size))\n            .coalesce(|mut prev, cur| {\n                // Coalesce keys that belong to the same key pair.\n                // This ensures that compaction doesn't put them\n                // into different layer files.\n                // Still limit this by the target file size,\n                // so that we keep the size of the files in\n                // check.\n                if prev.0 == cur.0 && prev.2 < target_file_size {\n                    prev.2 += cur.2;\n                    Ok(prev)\n                } else {\n                    Err((prev, cur))\n                }\n            });\n\n        // Merge the contents of all the input delta layers into a new set\n        // of delta layers, based on the current partitioning.\n        //\n        // We split the new delta layers on the key dimension. We iterate through the key space, and for each key, check if including the next key to the current output layer we're building would cause the layer to become too large. If so, dump the current output layer and start new one.\n        // It's possible that there is a single key with so many page versions that storing all of them in a single layer file\n        // would be too large. In that case, we also split on the LSN dimension.\n        //\n        // LSN\n        //  ^\n        //  |\n        //  | +-----------+            +--+--+--+--+\n        //  | |           |            |  |  |  |  |\n        //  | +-----------+            |  |  |  |  |\n        //  | |           |            |  |  |  |  |\n        //  | +-----------+     ==>    |  |  |  |  |\n        //  | |           |            |  |  |  |  |\n        //  | +-----------+            |  |  |  |  |\n        //  | |           |            |  |  |  |  |\n        //  | +-----------+            +--+--+--+--+\n        //  |\n        //  +--------------> key\n        //\n        //\n        // If one key (X) has a lot of page versions:\n        //\n        // LSN\n        //  ^\n        //  |                                 (X)\n        //  | +-----------+            +--+--+--+--+\n        //  | |           |            |  |  |  |  |\n        //  | +-----------+            |  |  +--+  |\n        //  | |           |            |  |  |  |  |\n        //  | +-----------+     ==>    |  |  |  |  |\n        //  | |           |            |  |  +--+  |\n        //  | +-----------+            |  |  |  |  |\n        //  | |           |            |  |  |  |  |\n        //  | +-----------+            +--+--+--+--+\n        //  |\n        //  +--------------> key\n        // TODO: this actually divides the layers into fixed-size chunks, not\n        // based on the partitioning.\n        //\n        // TODO: we should also opportunistically materialize and\n        // garbage collect what we can.\n        let mut new_layers = Vec::new();\n        let mut prev_key: Option<Key> = None;\n        let mut writer: Option<DeltaLayerWriter> = None;\n        let mut key_values_total_size = 0u64;\n        let mut dup_start_lsn: Lsn = Lsn::INVALID; // start LSN of layer containing values of the single key\n        let mut dup_end_lsn: Lsn = Lsn::INVALID; // end LSN of layer containing values of the single key\n        let mut next_hole = 0; // index of next hole in holes vector\n\n        let mut keys = 0;\n\n        while let Some((key, lsn, value)) = all_values_iter\n            .next()\n            .await\n            .map_err(CompactionError::Other)?\n        {\n            keys += 1;\n\n            if keys % 32_768 == 0 && self.cancel.is_cancelled() {\n                // avoid hitting the cancellation token on every key. in benches, we end up\n                // shuffling an order of million keys per layer, this means we'll check it\n                // around tens of times per layer.\n                return Err(CompactionError::new_cancelled());\n            }\n\n            let same_key = prev_key == Some(key);\n            // We need to check key boundaries once we reach next key or end of layer with the same key\n            if !same_key || lsn == dup_end_lsn {\n                let mut next_key_size = 0u64;\n                let is_dup_layer = dup_end_lsn.is_valid();\n                dup_start_lsn = Lsn::INVALID;\n                if !same_key {\n                    dup_end_lsn = Lsn::INVALID;\n                }\n                // Determine size occupied by this key. We stop at next key or when size becomes larger than target_file_size\n                for (next_key, next_lsn, next_size) in all_keys_iter.by_ref() {\n                    next_key_size = next_size;\n                    if key != next_key {\n                        if dup_end_lsn.is_valid() {\n                            // We are writting segment with duplicates:\n                            // place all remaining values of this key in separate segment\n                            dup_start_lsn = dup_end_lsn; // new segments starts where old stops\n                            dup_end_lsn = lsn_range.end; // there are no more values of this key till end of LSN range\n                        }\n                        break;\n                    }\n                    key_values_total_size += next_size;\n                    // Check if it is time to split segment: if total keys size is larger than target file size.\n                    // We need to avoid generation of empty segments if next_size > target_file_size.\n                    if key_values_total_size > target_file_size && lsn != next_lsn {\n                        // Split key between multiple layers: such layer can contain only single key\n                        dup_start_lsn = if dup_end_lsn.is_valid() {\n                            dup_end_lsn // new segment with duplicates starts where old one stops\n                        } else {\n                            lsn // start with the first LSN for this key\n                        };\n                        dup_end_lsn = next_lsn; // upper LSN boundary is exclusive\n                        break;\n                    }\n                }\n                // handle case when loop reaches last key: in this case dup_end is non-zero but dup_start is not set.\n                if dup_end_lsn.is_valid() && !dup_start_lsn.is_valid() {\n                    dup_start_lsn = dup_end_lsn;\n                    dup_end_lsn = lsn_range.end;\n                }\n                if writer.is_some() {\n                    let written_size = writer.as_mut().unwrap().size();\n                    let contains_hole =\n                        next_hole < holes.len() && key >= holes[next_hole].key_range.end;\n                    // check if key cause layer overflow or contains hole...\n                    if is_dup_layer\n                        || dup_end_lsn.is_valid()\n                        || written_size + key_values_total_size > target_file_size\n                        || contains_hole\n                    {\n                        // ... if so, flush previous layer and prepare to write new one\n                        let (desc, path) = writer\n                            .take()\n                            .unwrap()\n                            .finish(prev_key.unwrap().next(), ctx)\n                            .await\n                            .map_err(CompactionError::Other)?;\n                        let new_delta = Layer::finish_creating(self.conf, self, desc, &path)\n                            .map_err(CompactionError::Other)?;\n\n                        new_layers.push(new_delta);\n                        writer = None;\n\n                        if contains_hole {\n                            // skip hole\n                            next_hole += 1;\n                        }\n                    }\n                }\n                // Remember size of key value because at next iteration we will access next item\n                key_values_total_size = next_key_size;\n            }\n            fail_point!(\"delta-layer-writer-fail-before-finish\", |_| {\n                Err(CompactionError::Other(anyhow::anyhow!(\n                    \"failpoint delta-layer-writer-fail-before-finish\"\n                )))\n            });\n\n            if !self.shard_identity.is_key_disposable(&key) {\n                if writer.is_none() {\n                    if self.cancel.is_cancelled() {\n                        // to be somewhat responsive to cancellation, check for each new layer\n                        return Err(CompactionError::new_cancelled());\n                    }\n                    // Create writer if not initiaized yet\n                    writer = Some(\n                        DeltaLayerWriter::new(\n                            self.conf,\n                            self.timeline_id,\n                            self.tenant_shard_id,\n                            key,\n                            if dup_end_lsn.is_valid() {\n                                // this is a layer containing slice of values of the same key\n                                debug!(\"Create new dup layer {}..{}\", dup_start_lsn, dup_end_lsn);\n                                dup_start_lsn..dup_end_lsn\n                            } else {\n                                debug!(\"Create new layer {}..{}\", lsn_range.start, lsn_range.end);\n                                lsn_range.clone()\n                            },\n                            &self.gate,\n                            self.cancel.clone(),\n                            ctx,\n                        )\n                        .await\n                        .map_err(CompactionError::Other)?,\n                    );\n\n                    keys = 0;\n                }\n\n                writer\n                    .as_mut()\n                    .unwrap()\n                    .put_value(key, lsn, value, ctx)\n                    .await?;\n            } else {\n                let owner = self.shard_identity.get_shard_number(&key);\n\n                // This happens after a shard split, when we're compacting an L0 created by our parent shard\n                debug!(\"dropping key {key} during compaction (it belongs on shard {owner})\");\n            }\n\n            if !new_layers.is_empty() {\n                fail_point!(\"after-timeline-compacted-first-L1\");\n            }\n\n            prev_key = Some(key);\n        }\n        if let Some(writer) = writer {\n            let (desc, path) = writer\n                .finish(prev_key.unwrap().next(), ctx)\n                .await\n                .map_err(CompactionError::Other)?;\n            let new_delta = Layer::finish_creating(self.conf, self, desc, &path)\n                .map_err(CompactionError::Other)?;\n            new_layers.push(new_delta);\n        }\n\n        // Sync layers\n        if !new_layers.is_empty() {\n            // Print a warning if the created layer is larger than double the target size\n            // Add two pages for potential overhead. This should in theory be already\n            // accounted for in the target calculation, but for very small targets,\n            // we still might easily hit the limit otherwise.\n            let warn_limit = target_file_size * 2 + page_cache::PAGE_SZ as u64 * 2;\n            for layer in new_layers.iter() {\n                if layer.layer_desc().file_size > warn_limit {\n                    warn!(\n                        %layer,\n                        \"created delta file of size {} larger than double of target of {target_file_size}\", layer.layer_desc().file_size\n                    );\n                }\n            }\n\n            // The writer.finish() above already did the fsync of the inodes.\n            // We just need to fsync the directory in which these inodes are linked,\n            // which we know to be the timeline directory.\n            //\n            // We use fatal_err() below because the after writer.finish() returns with success,\n            // the in-memory state of the filesystem already has the layer file in its final place,\n            // and subsequent pageserver code could think it's durable while it really isn't.\n            let timeline_dir = VirtualFile::open(\n                &self\n                    .conf\n                    .timeline_path(&self.tenant_shard_id, &self.timeline_id),\n                ctx,\n            )\n            .await\n            .fatal_err(\"VirtualFile::open for timeline dir fsync\");\n            timeline_dir\n                .sync_all()\n                .await\n                .fatal_err(\"VirtualFile::sync_all timeline dir\");\n        }\n\n        stats.write_layer_files_micros = stats.read_lock_drop_micros.till_now();\n        stats.new_deltas_count = Some(new_layers.len());\n        stats.new_deltas_size = Some(new_layers.iter().map(|l| l.layer_desc().file_size).sum());\n\n        match TryInto::<CompactLevel0Phase1Stats>::try_into(stats)\n            .and_then(|stats| serde_json::to_string(&stats).context(\"serde_json::to_string\"))\n        {\n            Ok(stats_json) => {\n                info!(\n                    stats_json = stats_json.as_str(),\n                    \"compact_level0_phase1 stats available\"\n                )\n            }\n            Err(e) => {\n                warn!(\"compact_level0_phase1 stats failed to serialize: {:#}\", e);\n            }\n        }\n\n        // Without this, rustc complains about deltas_to_compact still\n        // being borrowed when we `.into_iter()` below.\n        drop(all_values_iter);\n\n        Ok(CompactLevel0Phase1Result {\n            new_layers,\n            deltas_to_compact: deltas_to_compact\n                .into_iter()\n                .map(|x| x.drop_eviction_guard())\n                .collect::<Vec<_>>(),\n            outcome: if fully_compacted {\n                CompactionOutcome::Done\n            } else {\n                CompactionOutcome::Pending\n            },\n        })\n    }\n}\n\n#[derive(Default)]\nstruct CompactLevel0Phase1Result {\n    new_layers: Vec<ResidentLayer>,\n    deltas_to_compact: Vec<Layer>,\n    // Whether we have included all L0 layers, or selected only part of them due to the\n    // L0 compaction size limit.\n    outcome: CompactionOutcome,\n}\n\n#[derive(Default)]\nstruct CompactLevel0Phase1StatsBuilder {\n    version: Option<u64>,\n    tenant_id: Option<TenantShardId>,\n    timeline_id: Option<TimelineId>,\n    read_lock_acquisition_micros: DurationRecorder,\n    read_lock_held_key_sort_micros: DurationRecorder,\n    compaction_prerequisites_micros: DurationRecorder,\n    read_lock_held_compute_holes_micros: DurationRecorder,\n    read_lock_drop_micros: DurationRecorder,\n    write_layer_files_micros: DurationRecorder,\n    level0_deltas_count: Option<usize>,\n    new_deltas_count: Option<usize>,\n    new_deltas_size: Option<u64>,\n}\n\n#[derive(serde::Serialize)]\nstruct CompactLevel0Phase1Stats {\n    version: u64,\n    tenant_id: TenantShardId,\n    timeline_id: TimelineId,\n    read_lock_acquisition_micros: RecordedDuration,\n    read_lock_held_key_sort_micros: RecordedDuration,\n    compaction_prerequisites_micros: RecordedDuration,\n    read_lock_held_compute_holes_micros: RecordedDuration,\n    read_lock_drop_micros: RecordedDuration,\n    write_layer_files_micros: RecordedDuration,\n    level0_deltas_count: usize,\n    new_deltas_count: usize,\n    new_deltas_size: u64,\n}\n\nimpl TryFrom<CompactLevel0Phase1StatsBuilder> for CompactLevel0Phase1Stats {\n    type Error = anyhow::Error;\n\n    fn try_from(value: CompactLevel0Phase1StatsBuilder) -> Result<Self, Self::Error> {\n        Ok(Self {\n            version: value.version.ok_or_else(|| anyhow!(\"version not set\"))?,\n            tenant_id: value\n                .tenant_id\n                .ok_or_else(|| anyhow!(\"tenant_id not set\"))?,\n            timeline_id: value\n                .timeline_id\n                .ok_or_else(|| anyhow!(\"timeline_id not set\"))?,\n            read_lock_acquisition_micros: value\n                .read_lock_acquisition_micros\n                .into_recorded()\n                .ok_or_else(|| anyhow!(\"read_lock_acquisition_micros not set\"))?,\n            read_lock_held_key_sort_micros: value\n                .read_lock_held_key_sort_micros\n                .into_recorded()\n                .ok_or_else(|| anyhow!(\"read_lock_held_key_sort_micros not set\"))?,\n            compaction_prerequisites_micros: value\n                .compaction_prerequisites_micros\n                .into_recorded()\n                .ok_or_else(|| anyhow!(\"read_lock_held_prerequisites_micros not set\"))?,\n            read_lock_held_compute_holes_micros: value\n                .read_lock_held_compute_holes_micros\n                .into_recorded()\n                .ok_or_else(|| anyhow!(\"read_lock_held_compute_holes_micros not set\"))?,\n            read_lock_drop_micros: value\n                .read_lock_drop_micros\n                .into_recorded()\n                .ok_or_else(|| anyhow!(\"read_lock_drop_micros not set\"))?,\n            write_layer_files_micros: value\n                .write_layer_files_micros\n                .into_recorded()\n                .ok_or_else(|| anyhow!(\"write_layer_files_micros not set\"))?,\n            level0_deltas_count: value\n                .level0_deltas_count\n                .ok_or_else(|| anyhow!(\"level0_deltas_count not set\"))?,\n            new_deltas_count: value\n                .new_deltas_count\n                .ok_or_else(|| anyhow!(\"new_deltas_count not set\"))?,\n            new_deltas_size: value\n                .new_deltas_size\n                .ok_or_else(|| anyhow!(\"new_deltas_size not set\"))?,\n        })\n    }\n}\n\nimpl Timeline {\n    /// Entry point for new tiered compaction algorithm.\n    ///\n    /// All the real work is in the implementation in the pageserver_compaction\n    /// crate. The code here would apply to any algorithm implemented by the\n    /// same interface, but tiered is the only one at the moment.\n    ///\n    /// TODO: cancellation\n    pub(crate) async fn compact_tiered(\n        self: &Arc<Self>,\n        _cancel: &CancellationToken,\n        ctx: &RequestContext,\n    ) -> Result<(), CompactionError> {\n        let fanout = self.get_compaction_threshold() as u64;\n        let target_file_size = self.get_checkpoint_distance();\n\n        // Find the top of the historical layers\n        let end_lsn = {\n            let guard = self.layers.read(LayerManagerLockHolder::Compaction).await;\n            let layers = guard.layer_map()?;\n\n            let l0_deltas = layers.level0_deltas();\n\n            // As an optimization, if we find that there are too few L0 layers,\n            // bail out early. We know that the compaction algorithm would do\n            // nothing in that case.\n            if l0_deltas.len() < fanout as usize {\n                // doesn't need compacting\n                return Ok(());\n            }\n            l0_deltas.iter().map(|l| l.lsn_range.end).max().unwrap()\n        };\n\n        // Is the timeline being deleted?\n        if self.is_stopping() {\n            trace!(\"Dropping out of compaction on timeline shutdown\");\n            return Err(CompactionError::new_cancelled());\n        }\n\n        let (dense_ks, _sparse_ks) = self\n            .collect_keyspace(end_lsn, ctx)\n            .await\n            .map_err(CompactionError::from_collect_keyspace)?;\n        // TODO(chi): ignore sparse_keyspace for now, compact it in the future.\n        let mut adaptor = TimelineAdaptor::new(self, (end_lsn, dense_ks));\n\n        pageserver_compaction::compact_tiered::compact_tiered(\n            &mut adaptor,\n            end_lsn,\n            target_file_size,\n            fanout,\n            ctx,\n        )\n        .await\n        // TODO: compact_tiered needs to return CompactionError\n        .map_err(CompactionError::Other)?;\n\n        adaptor.flush_updates().await?;\n        Ok(())\n    }\n\n    /// Take a list of images and deltas, produce images and deltas according to GC horizon and retain_lsns.\n    ///\n    /// It takes a key, the values of the key within the compaction process, a GC horizon, and all retain_lsns below the horizon.\n    /// For now, it requires the `accumulated_values` contains the full history of the key (i.e., the key with the lowest LSN is\n    /// an image or a WAL not requiring a base image). This restriction will be removed once we implement gc-compaction on branch.\n    ///\n    /// The function returns the deltas and the base image that need to be placed at each of the retain LSN. For example, we have:\n    ///\n    /// A@0x10, +B@0x20, +C@0x30, +D@0x40, +E@0x50, +F@0x60\n    /// horizon = 0x50, retain_lsn = 0x20, 0x40, delta_threshold=3\n    ///\n    /// The function will produce:\n    ///\n    /// ```plain\n    /// 0x20(retain_lsn) -> img=AB@0x20                  always produce a single image below the lowest retain LSN\n    /// 0x40(retain_lsn) -> deltas=[+C@0x30, +D@0x40]    two deltas since the last base image, keeping the deltas\n    /// 0x50(horizon)    -> deltas=[ABCDE@0x50]          three deltas since the last base image, generate an image but put it in the delta\n    /// above_horizon    -> deltas=[+F@0x60]             full history above the horizon\n    /// ```\n    ///\n    /// Note that `accumulated_values` must be sorted by LSN and should belong to a single key.\n    #[allow(clippy::too_many_arguments)]\n    pub(crate) async fn generate_key_retention(\n        self: &Arc<Timeline>,\n        key: Key,\n        full_history: &[(Key, Lsn, Value)],\n        horizon: Lsn,\n        retain_lsn_below_horizon: &[Lsn],\n        delta_threshold_cnt: usize,\n        base_img_from_ancestor: Option<(Key, Lsn, Bytes)>,\n        verification: bool,\n    ) -> anyhow::Result<KeyHistoryRetention> {\n        // Pre-checks for the invariants\n\n        let debug_mode = cfg!(debug_assertions) || cfg!(feature = \"testing\");\n\n        if debug_mode {\n            for (log_key, _, _) in full_history {\n                assert_eq!(log_key, &key, \"mismatched key\");\n            }\n            for i in 1..full_history.len() {\n                assert!(full_history[i - 1].1 <= full_history[i].1, \"unordered LSN\");\n                if full_history[i - 1].1 == full_history[i].1 {\n                    assert!(\n                        matches!(full_history[i - 1].2, Value::Image(_)),\n                        \"unordered delta/image, or duplicated delta\"\n                    );\n                }\n            }\n            // There was an assertion for no base image that checks if the first\n            // record in the history is `will_init` before, but it was removed.\n            // This is explained in the test cases for generate_key_retention.\n            // Search \"incomplete history\" for more information.\n            for lsn in retain_lsn_below_horizon {\n                assert!(lsn < &horizon, \"retain lsn must be below horizon\")\n            }\n            for i in 1..retain_lsn_below_horizon.len() {\n                assert!(\n                    retain_lsn_below_horizon[i - 1] <= retain_lsn_below_horizon[i],\n                    \"unordered LSN\"\n                );\n            }\n        }\n        let has_ancestor = base_img_from_ancestor.is_some();\n        // Step 1: split history into len(retain_lsn_below_horizon) + 2 buckets, where the last bucket is for all deltas above the horizon,\n        // and the second-to-last bucket is for the horizon. Each bucket contains lsn_last_bucket < deltas <= lsn_this_bucket.\n        let (mut split_history, lsn_split_points) = {\n            let mut split_history = Vec::new();\n            split_history.resize_with(retain_lsn_below_horizon.len() + 2, Vec::new);\n            let mut lsn_split_points = Vec::with_capacity(retain_lsn_below_horizon.len() + 1);\n            for lsn in retain_lsn_below_horizon {\n                lsn_split_points.push(*lsn);\n            }\n            lsn_split_points.push(horizon);\n            let mut current_idx = 0;\n            for item @ (_, lsn, _) in full_history {\n                while current_idx < lsn_split_points.len() && *lsn > lsn_split_points[current_idx] {\n                    current_idx += 1;\n                }\n                split_history[current_idx].push(item);\n            }\n            (split_history, lsn_split_points)\n        };\n        // Step 2: filter out duplicated records due to the k-merge of image/delta layers\n        for split_for_lsn in &mut split_history {\n            let mut prev_lsn = None;\n            let mut new_split_for_lsn = Vec::with_capacity(split_for_lsn.len());\n            for record @ (_, lsn, _) in std::mem::take(split_for_lsn) {\n                if let Some(prev_lsn) = &prev_lsn {\n                    if *prev_lsn == lsn {\n                        // The case that we have an LSN with both data from the delta layer and the image layer. As\n                        // `ValueWrapper` ensures that an image is ordered before a delta at the same LSN, we simply\n                        // drop this delta and keep the image.\n                        //\n                        // For example, we have delta layer key1@0x10, key1@0x20, and image layer key1@0x10, we will\n                        // keep the image for key1@0x10 and the delta for key1@0x20. key1@0x10 delta will be simply\n                        // dropped.\n                        //\n                        // TODO: in case we have both delta + images for a given LSN and it does not exceed the delta\n                        // threshold, we could have kept delta instead to save space. This is an optimization for the future.\n                        continue;\n                    }\n                }\n                prev_lsn = Some(lsn);\n                new_split_for_lsn.push(record);\n            }\n            *split_for_lsn = new_split_for_lsn;\n        }\n        // Step 3: generate images when necessary\n        let mut retention = Vec::with_capacity(split_history.len());\n        let mut records_since_last_image = 0;\n        let batch_cnt = split_history.len();\n        assert!(\n            batch_cnt >= 2,\n            \"should have at least below + above horizon batches\"\n        );\n        let mut replay_history: Vec<(Key, Lsn, Value)> = Vec::new();\n        if let Some((key, lsn, ref img)) = base_img_from_ancestor {\n            replay_history.push((key, lsn, Value::Image(img.clone())));\n        }\n\n        /// Generate debug information for the replay history\n        fn generate_history_trace(replay_history: &[(Key, Lsn, Value)]) -> String {\n            use std::fmt::Write;\n            let mut output = String::new();\n            if let Some((key, _, _)) = replay_history.first() {\n                write!(output, \"key={key} \").unwrap();\n                let mut cnt = 0;\n                for (_, lsn, val) in replay_history {\n                    if val.is_image() {\n                        write!(output, \"i@{lsn} \").unwrap();\n                    } else if val.will_init() {\n                        write!(output, \"di@{lsn} \").unwrap();\n                    } else {\n                        write!(output, \"d@{lsn} \").unwrap();\n                    }\n                    cnt += 1;\n                    if cnt >= 128 {\n                        write!(output, \"... and more\").unwrap();\n                        break;\n                    }\n                }\n            } else {\n                write!(output, \"<no history>\").unwrap();\n            }\n            output\n        }\n\n        fn generate_debug_trace(\n            replay_history: Option<&[(Key, Lsn, Value)]>,\n            full_history: &[(Key, Lsn, Value)],\n            lsns: &[Lsn],\n            horizon: Lsn,\n        ) -> String {\n            use std::fmt::Write;\n            let mut output = String::new();\n            if let Some(replay_history) = replay_history {\n                writeln!(\n                    output,\n                    \"replay_history: {}\",\n                    generate_history_trace(replay_history)\n                )\n                .unwrap();\n            } else {\n                writeln!(output, \"replay_history: <disabled>\",).unwrap();\n            }\n            writeln!(\n                output,\n                \"full_history: {}\",\n                generate_history_trace(full_history)\n            )\n            .unwrap();\n            writeln!(\n                output,\n                \"when processing: [{}] horizon={}\",\n                lsns.iter().map(|l| format!(\"{l}\")).join(\",\"),\n                horizon\n            )\n            .unwrap();\n            output\n        }\n\n        let mut key_exists = false;\n        for (i, split_for_lsn) in split_history.into_iter().enumerate() {\n            // TODO: there could be image keys inside the splits, and we can compute records_since_last_image accordingly.\n            records_since_last_image += split_for_lsn.len();\n            // Whether to produce an image into the final layer files\n            let produce_image = if i == 0 && !has_ancestor {\n                // We always generate images for the first batch (below horizon / lowest retain_lsn)\n                true\n            } else if i == batch_cnt - 1 {\n                // Do not generate images for the last batch (above horizon)\n                false\n            } else if records_since_last_image == 0 {\n                false\n            } else if records_since_last_image >= delta_threshold_cnt {\n                // Generate images when there are too many records\n                true\n            } else {\n                false\n            };\n            replay_history.extend(split_for_lsn.iter().map(|x| (*x).clone()));\n            // Only retain the items after the last image record\n            for idx in (0..replay_history.len()).rev() {\n                if replay_history[idx].2.will_init() {\n                    replay_history = replay_history[idx..].to_vec();\n                    break;\n                }\n            }\n            if replay_history.is_empty() && !key_exists {\n                // The key does not exist at earlier LSN, we can skip this iteration.\n                retention.push(Vec::new());\n                continue;\n            } else {\n                key_exists = true;\n            }\n            let Some((_, _, val)) = replay_history.first() else {\n                unreachable!(\"replay history should not be empty once it exists\")\n            };\n            if !val.will_init() {\n                return Err(anyhow::anyhow!(\"invalid history, no base image\")).with_context(|| {\n                    generate_debug_trace(\n                        Some(&replay_history),\n                        full_history,\n                        retain_lsn_below_horizon,\n                        horizon,\n                    )\n                });\n            }\n            // Whether to reconstruct the image. In debug mode, we will generate an image\n            // at every retain_lsn to ensure data is not corrupted, but we won't put the\n            // image into the final layer.\n            let img_and_lsn = if produce_image {\n                records_since_last_image = 0;\n                let replay_history_for_debug = if debug_mode {\n                    Some(replay_history.clone())\n                } else {\n                    None\n                };\n                let replay_history_for_debug_ref = replay_history_for_debug.as_deref();\n                let history = std::mem::take(&mut replay_history);\n                let mut img = None;\n                let mut records = Vec::with_capacity(history.len());\n                if let (_, lsn, Value::Image(val)) = history.first().as_ref().unwrap() {\n                    img = Some((*lsn, val.clone()));\n                    for (_, lsn, val) in history.into_iter().skip(1) {\n                        let Value::WalRecord(rec) = val else {\n                            return Err(anyhow::anyhow!(\n                                \"invalid record, first record is image, expect walrecords\"\n                            ))\n                            .with_context(|| {\n                                generate_debug_trace(\n                                    replay_history_for_debug_ref,\n                                    full_history,\n                                    retain_lsn_below_horizon,\n                                    horizon,\n                                )\n                            });\n                        };\n                        records.push((lsn, rec));\n                    }\n                } else {\n                    for (_, lsn, val) in history.into_iter() {\n                        let Value::WalRecord(rec) = val else {\n                            return Err(anyhow::anyhow!(\"invalid record, first record is walrecord, expect rest are walrecord\"))\n                                .with_context(|| generate_debug_trace(\n                                    replay_history_for_debug_ref,\n                                    full_history,\n                                    retain_lsn_below_horizon,\n                                    horizon,\n                                ));\n                        };\n                        records.push((lsn, rec));\n                    }\n                }\n                // WAL redo requires records in the reverse LSN order\n                records.reverse();\n                let state = ValueReconstructState { img, records };\n                // last batch does not generate image so i is always in range, unless we force generate\n                // an image during testing\n                let request_lsn = if i >= lsn_split_points.len() {\n                    Lsn::MAX\n                } else {\n                    lsn_split_points[i]\n                };\n                let img = self\n                    .reconstruct_value(key, request_lsn, state, RedoAttemptType::GcCompaction)\n                    .await?;\n                Some((request_lsn, img))\n            } else {\n                None\n            };\n            if produce_image {\n                let (request_lsn, img) = img_and_lsn.unwrap();\n                replay_history.push((key, request_lsn, Value::Image(img.clone())));\n                retention.push(vec![(request_lsn, Value::Image(img))]);\n            } else {\n                let deltas = split_for_lsn\n                    .iter()\n                    .map(|(_, lsn, value)| (*lsn, value.clone()))\n                    .collect_vec();\n                retention.push(deltas);\n            }\n        }\n        let mut result = Vec::with_capacity(retention.len());\n        assert_eq!(retention.len(), lsn_split_points.len() + 1);\n        for (idx, logs) in retention.into_iter().enumerate() {\n            if idx == lsn_split_points.len() {\n                let retention = KeyHistoryRetention {\n                    below_horizon: result,\n                    above_horizon: KeyLogAtLsn(logs),\n                };\n                if verification {\n                    retention\n                        .verify(key, &base_img_from_ancestor, full_history, self)\n                        .await?;\n                }\n                return Ok(retention);\n            } else {\n                result.push((lsn_split_points[idx], KeyLogAtLsn(logs)));\n            }\n        }\n        unreachable!(\"key retention is empty\")\n    }\n\n    /// Check how much space is left on the disk\n    async fn check_available_space(self: &Arc<Self>) -> anyhow::Result<u64> {\n        let tenants_dir = self.conf.tenants_path();\n\n        let stat = Statvfs::get(&tenants_dir, None)\n            .context(\"statvfs failed, presumably directory got unlinked\")?;\n\n        let (avail_bytes, _) = stat.get_avail_total_bytes();\n\n        Ok(avail_bytes)\n    }\n\n    /// Check if the compaction can proceed safely without running out of space. We assume the size\n    /// upper bound of the produced files of a compaction job is the same as all layers involved in\n    /// the compaction. Therefore, we need `2 * layers_to_be_compacted_size` at least to do a\n    /// compaction.\n    async fn check_compaction_space(\n        self: &Arc<Self>,\n        layer_selection: &[Layer],\n    ) -> Result<(), CompactionError> {\n        let available_space = self\n            .check_available_space()\n            .await\n            .map_err(CompactionError::Other)?;\n        let mut remote_layer_size = 0;\n        let mut all_layer_size = 0;\n        for layer in layer_selection {\n            let needs_download = layer\n                .needs_download()\n                .await\n                .context(\"failed to check if layer needs download\")\n                .map_err(CompactionError::Other)?;\n            if needs_download.is_some() {\n                remote_layer_size += layer.layer_desc().file_size;\n            }\n            all_layer_size += layer.layer_desc().file_size;\n        }\n        let allocated_space = (available_space as f64 * 0.8) as u64; /* reserve 20% space for other tasks */\n        if all_layer_size /* space needed for newly-generated file */ + remote_layer_size /* space for downloading layers */ > allocated_space\n        {\n            return Err(CompactionError::Other(anyhow!(\n                \"not enough space for compaction: available_space={}, allocated_space={}, all_layer_size={}, remote_layer_size={}, required_space={}\",\n                available_space,\n                allocated_space,\n                all_layer_size,\n                remote_layer_size,\n                all_layer_size + remote_layer_size\n            )));\n        }\n        Ok(())\n    }\n\n    /// Check to bail out of gc compaction early if it would use too much memory.\n    async fn check_memory_usage(\n        self: &Arc<Self>,\n        layer_selection: &[Layer],\n    ) -> Result<(), CompactionError> {\n        let mut estimated_memory_usage_mb = 0.0;\n        let mut num_image_layers = 0;\n        let mut num_delta_layers = 0;\n        let target_layer_size_bytes = 256 * 1024 * 1024;\n        for layer in layer_selection {\n            let layer_desc = layer.layer_desc();\n            if layer_desc.is_delta() {\n                // Delta layers at most have 1MB buffer; 3x to make it safe (there're deltas as large as 16KB).\n                // Scale it by target_layer_size_bytes so that tests can pass (some tests, e.g., `test_pageserver_gc_compaction_preempt\n                // use 3MB layer size and we need to account for that).\n                estimated_memory_usage_mb +=\n                    3.0 * (layer_desc.file_size / target_layer_size_bytes) as f64;\n                num_delta_layers += 1;\n            } else {\n                // Image layers at most have 1MB buffer but it might be compressed; assume 5x compression ratio.\n                estimated_memory_usage_mb +=\n                    5.0 * (layer_desc.file_size / target_layer_size_bytes) as f64;\n                num_image_layers += 1;\n            }\n        }\n        if estimated_memory_usage_mb > 1024.0 {\n            return Err(CompactionError::Other(anyhow!(\n                \"estimated memory usage is too high: {}MB, giving up compaction; num_image_layers={}, num_delta_layers={}\",\n                estimated_memory_usage_mb,\n                num_image_layers,\n                num_delta_layers\n            )));\n        }\n        Ok(())\n    }\n\n    /// Get a watermark for gc-compaction, that is the lowest LSN that we can use as the `gc_horizon` for\n    /// the compaction algorithm. It is min(space_cutoff, time_cutoff, latest_gc_cutoff, standby_horizon).\n    /// Leases and retain_lsns are considered in the gc-compaction job itself so we don't need to account for them\n    /// here.\n    pub(crate) fn get_gc_compaction_watermark(self: &Arc<Self>) -> Lsn {\n        let gc_cutoff_lsn = {\n            let gc_info = self.gc_info.read().unwrap();\n            gc_info.min_cutoff()\n        };\n\n        // TODO: standby horizon should use leases so we don't really need to consider it here.\n        // let watermark = watermark.min(self.standby_horizon.load());\n\n        // TODO: ensure the child branches will not use anything below the watermark, or consider\n        // them when computing the watermark.\n        gc_cutoff_lsn.min(*self.get_applied_gc_cutoff_lsn())\n    }\n\n    /// Split a gc-compaction job into multiple compaction jobs. The split is based on the key range and the estimated size of the compaction job.\n    /// The function returns a list of compaction jobs that can be executed separately. If the upper bound of the compact LSN\n    /// range is not specified, we will use the latest gc_cutoff as the upper bound, so that all jobs in the jobset acts\n    /// like a full compaction of the specified keyspace.\n    pub(crate) async fn gc_compaction_split_jobs(\n        self: &Arc<Self>,\n        job: GcCompactJob,\n        sub_compaction_max_job_size_mb: Option<u64>,\n    ) -> Result<Vec<GcCompactJob>, CompactionError> {\n        let compact_below_lsn = if job.compact_lsn_range.end != Lsn::MAX {\n            job.compact_lsn_range.end\n        } else {\n            self.get_gc_compaction_watermark()\n        };\n\n        if compact_below_lsn == Lsn::INVALID {\n            tracing::warn!(\n                \"no layers to compact with gc: gc_cutoff not generated yet, skipping gc bottom-most compaction\"\n            );\n            return Ok(vec![]);\n        }\n\n        // Split compaction job to about 4GB each\n        const GC_COMPACT_MAX_SIZE_MB: u64 = 4 * 1024;\n        let sub_compaction_max_job_size_mb =\n            sub_compaction_max_job_size_mb.unwrap_or(GC_COMPACT_MAX_SIZE_MB);\n\n        let mut compact_jobs = Vec::<GcCompactJob>::new();\n        // For now, we simply use the key partitioning information; we should do a more fine-grained partitioning\n        // by estimating the amount of files read for a compaction job. We should also partition on LSN.\n        let ((dense_ks, sparse_ks), _) = self.partitioning.read().as_ref().clone();\n        // Truncate the key range to be within user specified compaction range.\n        fn truncate_to(\n            source_start: &Key,\n            source_end: &Key,\n            target_start: &Key,\n            target_end: &Key,\n        ) -> Option<(Key, Key)> {\n            let start = source_start.max(target_start);\n            let end = source_end.min(target_end);\n            if start < end {\n                Some((*start, *end))\n            } else {\n                None\n            }\n        }\n        let mut split_key_ranges = Vec::new();\n        let ranges = dense_ks\n            .parts\n            .iter()\n            .map(|partition| partition.ranges.iter())\n            .chain(sparse_ks.parts.iter().map(|x| x.0.ranges.iter()))\n            .flatten()\n            .cloned()\n            .collect_vec();\n        for range in ranges.iter() {\n            let Some((start, end)) = truncate_to(\n                &range.start,\n                &range.end,\n                &job.compact_key_range.start,\n                &job.compact_key_range.end,\n            ) else {\n                continue;\n            };\n            split_key_ranges.push((start, end));\n        }\n        split_key_ranges.sort();\n        let all_layers = {\n            let guard = self.layers.read(LayerManagerLockHolder::Compaction).await;\n            let layer_map = guard.layer_map()?;\n            layer_map.iter_historic_layers().collect_vec()\n        };\n        let mut current_start = None;\n        let ranges_num = split_key_ranges.len();\n        for (idx, (start, end)) in split_key_ranges.into_iter().enumerate() {\n            if current_start.is_none() {\n                current_start = Some(start);\n            }\n            let start = current_start.unwrap();\n            if start >= end {\n                // We have already processed this partition.\n                continue;\n            }\n            let overlapping_layers = {\n                let mut desc = Vec::new();\n                for layer in all_layers.iter() {\n                    if overlaps_with(&layer.get_key_range(), &(start..end))\n                        && layer.get_lsn_range().start <= compact_below_lsn\n                    {\n                        desc.push(layer.clone());\n                    }\n                }\n                desc\n            };\n            let total_size = overlapping_layers.iter().map(|x| x.file_size).sum::<u64>();\n            if total_size > sub_compaction_max_job_size_mb * 1024 * 1024 || ranges_num == idx + 1 {\n                // Try to extend the compaction range so that we include at least one full layer file.\n                let extended_end = overlapping_layers\n                    .iter()\n                    .map(|layer| layer.key_range.end)\n                    .min();\n                // It is possible that the search range does not contain any layer files when we reach the end of the loop.\n                // In this case, we simply use the specified key range end.\n                let end = if let Some(extended_end) = extended_end {\n                    extended_end.max(end)\n                } else {\n                    end\n                };\n                let end = if ranges_num == idx + 1 {\n                    // extend the compaction range to the end of the key range if it's the last partition\n                    end.max(job.compact_key_range.end)\n                } else {\n                    end\n                };\n                if total_size == 0 && !compact_jobs.is_empty() {\n                    info!(\n                        \"splitting compaction job: {}..{}, estimated_size={}, extending the previous job\",\n                        start, end, total_size\n                    );\n                    compact_jobs.last_mut().unwrap().compact_key_range.end = end;\n                    current_start = Some(end);\n                } else {\n                    info!(\n                        \"splitting compaction job: {}..{}, estimated_size={}\",\n                        start, end, total_size\n                    );\n                    compact_jobs.push(GcCompactJob {\n                        dry_run: job.dry_run,\n                        compact_key_range: start..end,\n                        compact_lsn_range: job.compact_lsn_range.start..compact_below_lsn,\n                        do_metadata_compaction: false,\n                    });\n                    current_start = Some(end);\n                }\n            }\n        }\n        Ok(compact_jobs)\n    }\n\n    /// An experimental compaction building block that combines compaction with garbage collection.\n    ///\n    /// The current implementation picks all delta + image layers that are below or intersecting with\n    /// the GC horizon without considering retain_lsns. Then, it does a full compaction over all these delta\n    /// layers and image layers, which generates image layers on the gc horizon, drop deltas below gc horizon,\n    /// and create delta layers with all deltas >= gc horizon.\n    ///\n    /// If `options.compact_range` is provided, it will only compact the keys within the range, aka partial compaction.\n    /// Partial compaction will read and process all layers overlapping with the key range, even if it might\n    /// contain extra keys. After the gc-compaction phase completes, delta layers that are not fully contained\n    /// within the key range will be rewritten to ensure they do not overlap with the delta layers. Providing\n    /// Key::MIN..Key..MAX to the function indicates a full compaction, though technically, `Key::MAX` is not\n    /// part of the range.\n    ///\n    /// If `options.compact_lsn_range.end` is provided, the compaction will only compact layers below or intersect with\n    /// the LSN. Otherwise, it will use the gc cutoff by default.\n    pub(crate) async fn compact_with_gc(\n        self: &Arc<Self>,\n        cancel: &CancellationToken,\n        options: CompactOptions,\n        ctx: &RequestContext,\n    ) -> Result<CompactionOutcome, CompactionError> {\n        let sub_compaction = options.sub_compaction;\n        let job = GcCompactJob::from_compact_options(options.clone());\n        let yield_for_l0 = options.flags.contains(CompactFlags::YieldForL0);\n        if sub_compaction {\n            info!(\n                \"running enhanced gc bottom-most compaction with sub-compaction, splitting compaction jobs\"\n            );\n            let jobs = self\n                .gc_compaction_split_jobs(job, options.sub_compaction_max_job_size_mb)\n                .await?;\n            let jobs_len = jobs.len();\n            for (idx, job) in jobs.into_iter().enumerate() {\n                let sub_compaction_progress = format!(\"{}/{}\", idx + 1, jobs_len);\n                self.compact_with_gc_inner(cancel, job, ctx, yield_for_l0)\n                    .instrument(info_span!(\n                        \"sub_compaction\",\n                        sub_compaction_progress = sub_compaction_progress\n                    ))\n                    .await?;\n            }\n            if jobs_len == 0 {\n                info!(\"no jobs to run, skipping gc bottom-most compaction\");\n            }\n            return Ok(CompactionOutcome::Done);\n        }\n        self.compact_with_gc_inner(cancel, job, ctx, yield_for_l0)\n            .await\n    }\n\n    async fn compact_with_gc_inner(\n        self: &Arc<Self>,\n        cancel: &CancellationToken,\n        mut job: GcCompactJob,\n        ctx: &RequestContext,\n        yield_for_l0: bool,\n    ) -> Result<CompactionOutcome, CompactionError> {\n        // Block other compaction/GC tasks from running for now. GC-compaction could run along\n        // with legacy compaction tasks in the future. Always ensure the lock order is compaction -> gc.\n        // Note that we already acquired the compaction lock when the outer `compact` function gets called.\n\n        // If the job is not configured to compact the metadata key range, shrink the key range\n        // to exclude the metadata key range. The check is done by checking if the end of the key range\n        // is larger than the start of the metadata key range. Note that metadata keys cover the entire\n        // second half of the keyspace, so it's enough to only check the end of the key range.\n        if !job.do_metadata_compaction\n            && job.compact_key_range.end > Key::metadata_key_range().start\n        {\n            tracing::info!(\n                \"compaction for metadata key range is not supported yet, overriding compact_key_range from {} to {}\",\n                job.compact_key_range.end,\n                Key::metadata_key_range().start\n            );\n            // Shrink the key range to exclude the metadata key range.\n            job.compact_key_range.end = Key::metadata_key_range().start;\n\n            // Skip the job if the key range completely lies within the metadata key range.\n            if job.compact_key_range.start >= job.compact_key_range.end {\n                tracing::info!(\"compact_key_range is empty, skipping compaction\");\n                return Ok(CompactionOutcome::Done);\n            }\n        }\n\n        let timer = Instant::now();\n        let begin_timer = timer;\n\n        let gc_lock = async {\n            tokio::select! {\n                guard = self.gc_lock.lock() => Ok(guard),\n                _ = cancel.cancelled() => Err(CompactionError::new_cancelled()),\n            }\n        };\n\n        let time_acquire_lock = timer.elapsed();\n        let timer = Instant::now();\n\n        let gc_lock = crate::timed(\n            gc_lock,\n            \"acquires gc lock\",\n            std::time::Duration::from_secs(5),\n        )\n        .await?;\n\n        let dry_run = job.dry_run;\n        let compact_key_range = job.compact_key_range;\n        let compact_lsn_range = job.compact_lsn_range;\n\n        let debug_mode = cfg!(debug_assertions) || cfg!(feature = \"testing\");\n\n        info!(\n            \"running enhanced gc bottom-most compaction, dry_run={dry_run}, compact_key_range={}..{}, compact_lsn_range={}..{}\",\n            compact_key_range.start,\n            compact_key_range.end,\n            compact_lsn_range.start,\n            compact_lsn_range.end\n        );\n\n        scopeguard::defer! {\n            info!(\"done enhanced gc bottom-most compaction\");\n        };\n\n        let mut stat = CompactionStatistics::default();\n\n        // Step 0: pick all delta layers + image layers below/intersect with the GC horizon.\n        // The layer selection has the following properties:\n        // 1. If a layer is in the selection, all layers below it are in the selection.\n        // 2. Inferred from (1), for each key in the layer selection, the value can be reconstructed only with the layers in the layer selection.\n        let job_desc = {\n            let guard = self\n                .layers\n                .read(LayerManagerLockHolder::GarbageCollection)\n                .await;\n            let layers = guard.layer_map()?;\n            let gc_info = self.gc_info.read().unwrap();\n            let mut retain_lsns_below_horizon = Vec::new();\n            let gc_cutoff = {\n                // Currently, gc-compaction only kicks in after the legacy gc has updated the gc_cutoff.\n                // Therefore, it can only clean up data that cannot be cleaned up with legacy gc, instead of\n                // cleaning everything that theoritically it could. In the future, it should use `self.gc_info`\n                // to get the truth data.\n                let real_gc_cutoff = self.get_gc_compaction_watermark();\n                // The compaction algorithm will keep all keys above the gc_cutoff while keeping only necessary keys below the gc_cutoff for\n                // each of the retain_lsn. Therefore, if the user-provided `compact_lsn_range.end` is larger than the real gc cutoff, we will use\n                // the real cutoff.\n                let mut gc_cutoff = if compact_lsn_range.end == Lsn::MAX {\n                    if real_gc_cutoff == Lsn::INVALID {\n                        // If the gc_cutoff is not generated yet, we should not compact anything.\n                        tracing::warn!(\n                            \"no layers to compact with gc: gc_cutoff not generated yet, skipping gc bottom-most compaction\"\n                        );\n                        return Ok(CompactionOutcome::Skipped);\n                    }\n                    real_gc_cutoff\n                } else {\n                    compact_lsn_range.end\n                };\n                if gc_cutoff > real_gc_cutoff {\n                    warn!(\n                        \"provided compact_lsn_range.end={} is larger than the real_gc_cutoff={}, using the real gc cutoff\",\n                        gc_cutoff, real_gc_cutoff\n                    );\n                    gc_cutoff = real_gc_cutoff;\n                }\n                gc_cutoff\n            };\n            for (lsn, _timeline_id, _is_offloaded) in &gc_info.retain_lsns {\n                if lsn < &gc_cutoff {\n                    retain_lsns_below_horizon.push(*lsn);\n                }\n            }\n            for lsn in gc_info.leases.keys() {\n                if lsn < &gc_cutoff {\n                    retain_lsns_below_horizon.push(*lsn);\n                }\n            }\n            let mut selected_layers: Vec<Layer> = Vec::new();\n            drop(gc_info);\n            // Firstly, pick all the layers intersect or below the gc_cutoff, get the largest LSN in the selected layers.\n            let Some(max_layer_lsn) = layers\n                .iter_historic_layers()\n                .filter(|desc| desc.get_lsn_range().start <= gc_cutoff)\n                .map(|desc| desc.get_lsn_range().end)\n                .max()\n            else {\n                info!(\n                    \"no layers to compact with gc: no historic layers below gc_cutoff, gc_cutoff={}\",\n                    gc_cutoff\n                );\n                return Ok(CompactionOutcome::Done);\n            };\n            // Next, if the user specifies compact_lsn_range.start, we need to filter some layers out. All the layers (strictly) below\n            // the min_layer_lsn computed as below will be filtered out and the data will be accessed using the normal read path, as if\n            // it is a branch.\n            let Some(min_layer_lsn) = layers\n                .iter_historic_layers()\n                .filter(|desc| {\n                    if compact_lsn_range.start == Lsn::INVALID {\n                        true // select all layers below if start == Lsn(0)\n                    } else {\n                        desc.get_lsn_range().end > compact_lsn_range.start // strictly larger than compact_above_lsn\n                    }\n                })\n                .map(|desc| desc.get_lsn_range().start)\n                .min()\n            else {\n                info!(\n                    \"no layers to compact with gc: no historic layers above compact_above_lsn, compact_above_lsn={}\",\n                    compact_lsn_range.end\n                );\n                return Ok(CompactionOutcome::Done);\n            };\n            // Then, pick all the layers that are below the max_layer_lsn. This is to ensure we can pick all single-key\n            // layers to compact.\n            let mut rewrite_layers = Vec::new();\n            for desc in layers.iter_historic_layers() {\n                if desc.get_lsn_range().end <= max_layer_lsn\n                    && desc.get_lsn_range().start >= min_layer_lsn\n                    && overlaps_with(&desc.get_key_range(), &compact_key_range)\n                {\n                    // If the layer overlaps with the compaction key range, we need to read it to obtain all keys within the range,\n                    // even if it might contain extra keys\n                    selected_layers.push(guard.get_from_desc(&desc));\n                    // If the layer is not fully contained within the key range, we need to rewrite it if it's a delta layer (it's fine\n                    // to overlap image layers)\n                    if desc.is_delta() && !fully_contains(&compact_key_range, &desc.get_key_range())\n                    {\n                        rewrite_layers.push(desc);\n                    }\n                }\n            }\n            if selected_layers.is_empty() {\n                info!(\n                    \"no layers to compact with gc: no layers within the key range, gc_cutoff={}, key_range={}..{}\",\n                    gc_cutoff, compact_key_range.start, compact_key_range.end\n                );\n                return Ok(CompactionOutcome::Done);\n            }\n            retain_lsns_below_horizon.sort();\n            GcCompactionJobDescription {\n                selected_layers,\n                gc_cutoff,\n                retain_lsns_below_horizon,\n                min_layer_lsn,\n                max_layer_lsn,\n                compaction_key_range: compact_key_range,\n                rewrite_layers,\n            }\n        };\n        let (has_data_below, lowest_retain_lsn) = if compact_lsn_range.start != Lsn::INVALID {\n            // If we only compact above some LSN, we should get the history from the current branch below the specified LSN.\n            // We use job_desc.min_layer_lsn as if it's the lowest branch point.\n            (true, job_desc.min_layer_lsn)\n        } else if self.ancestor_timeline.is_some() {\n            // In theory, we can also use min_layer_lsn here, but using ancestor LSN makes sure the delta layers cover the\n            // LSN ranges all the way to the ancestor timeline.\n            (true, self.ancestor_lsn)\n        } else {\n            let res = job_desc\n                .retain_lsns_below_horizon\n                .first()\n                .copied()\n                .unwrap_or(job_desc.gc_cutoff);\n            if debug_mode {\n                assert_eq!(\n                    res,\n                    job_desc\n                        .retain_lsns_below_horizon\n                        .iter()\n                        .min()\n                        .copied()\n                        .unwrap_or(job_desc.gc_cutoff)\n                );\n            }\n            (false, res)\n        };\n\n        let verification = self.get_gc_compaction_settings().gc_compaction_verification;\n\n        info!(\n            \"picked {} layers for compaction ({} layers need rewriting) with max_layer_lsn={} min_layer_lsn={} gc_cutoff={} lowest_retain_lsn={}, key_range={}..{}, has_data_below={}\",\n            job_desc.selected_layers.len(),\n            job_desc.rewrite_layers.len(),\n            job_desc.max_layer_lsn,\n            job_desc.min_layer_lsn,\n            job_desc.gc_cutoff,\n            lowest_retain_lsn,\n            job_desc.compaction_key_range.start,\n            job_desc.compaction_key_range.end,\n            has_data_below,\n        );\n\n        let time_analyze = timer.elapsed();\n        let timer = Instant::now();\n\n        for layer in &job_desc.selected_layers {\n            debug!(\"read layer: {}\", layer.layer_desc().key());\n        }\n        for layer in &job_desc.rewrite_layers {\n            debug!(\"rewrite layer: {}\", layer.key());\n        }\n\n        self.check_compaction_space(&job_desc.selected_layers)\n            .await?;\n\n        self.check_memory_usage(&job_desc.selected_layers).await?;\n        if job_desc.selected_layers.len() > 100\n            && job_desc.rewrite_layers.len() as f64 >= job_desc.selected_layers.len() as f64 * 0.7\n        {\n            return Err(CompactionError::Other(anyhow!(\n                \"too many layers to rewrite: {} / {}, giving up compaction\",\n                job_desc.rewrite_layers.len(),\n                job_desc.selected_layers.len()\n            )));\n        }\n\n        // Generate statistics for the compaction\n        for layer in &job_desc.selected_layers {\n            let desc = layer.layer_desc();\n            if desc.is_delta() {\n                stat.visit_delta_layer(desc.file_size());\n            } else {\n                stat.visit_image_layer(desc.file_size());\n            }\n        }\n\n        // Step 1: construct a k-merge iterator over all layers.\n        // Also, verify if the layer map can be split by drawing a horizontal line at every LSN start/end split point.\n        let layer_names = job_desc\n            .selected_layers\n            .iter()\n            .map(|layer| layer.layer_desc().layer_name())\n            .collect_vec();\n        if let Some(err) = check_valid_layermap(&layer_names) {\n            return Err(CompactionError::Other(anyhow!(\n                \"gc-compaction layer map check failed because {}, cannot proceed with compaction due to potential data loss\",\n                err\n            )));\n        }\n        // The maximum LSN we are processing in this compaction loop\n        let end_lsn = job_desc\n            .selected_layers\n            .iter()\n            .map(|l| l.layer_desc().lsn_range.end)\n            .max()\n            .unwrap();\n        let mut delta_layers = Vec::new();\n        let mut image_layers = Vec::new();\n        let mut downloaded_layers = Vec::new();\n        let mut total_downloaded_size = 0;\n        let mut total_layer_size = 0;\n        for layer in &job_desc.selected_layers {\n            if layer\n                .needs_download()\n                .await\n                .context(\"failed to check if layer needs download\")\n                .map_err(CompactionError::Other)?\n                .is_some()\n            {\n                total_downloaded_size += layer.layer_desc().file_size;\n            }\n            total_layer_size += layer.layer_desc().file_size;\n            if cancel.is_cancelled() {\n                return Err(CompactionError::new_cancelled());\n            }\n            let should_yield = yield_for_l0\n                && self\n                    .l0_compaction_trigger\n                    .notified()\n                    .now_or_never()\n                    .is_some();\n            if should_yield {\n                tracing::info!(\"preempt gc-compaction when downloading layers: too many L0 layers\");\n                return Ok(CompactionOutcome::YieldForL0);\n            }\n            let resident_layer = layer\n                .download_and_keep_resident(ctx)\n                .await\n                .context(\"failed to download and keep resident layer\")\n                .map_err(CompactionError::Other)?;\n            downloaded_layers.push(resident_layer);\n        }\n        info!(\n            \"finish downloading layers, downloaded={}, total={}, ratio={:.2}\",\n            total_downloaded_size,\n            total_layer_size,\n            total_downloaded_size as f64 / total_layer_size as f64\n        );\n        for resident_layer in &downloaded_layers {\n            if resident_layer.layer_desc().is_delta() {\n                let layer = resident_layer\n                    .get_as_delta(ctx)\n                    .await\n                    .context(\"failed to get delta layer\")\n                    .map_err(CompactionError::Other)?;\n                delta_layers.push(layer);\n            } else {\n                let layer = resident_layer\n                    .get_as_image(ctx)\n                    .await\n                    .context(\"failed to get image layer\")\n                    .map_err(CompactionError::Other)?;\n                image_layers.push(layer);\n            }\n        }\n        let (dense_ks, sparse_ks) = self\n            .collect_gc_compaction_keyspace()\n            .await\n            .context(\"failed to collect gc compaction keyspace\")\n            .map_err(CompactionError::Other)?;\n        let mut merge_iter = FilterIterator::create(\n            MergeIterator::create_with_options(\n                &delta_layers,\n                &image_layers,\n                ctx,\n                128 * 8192, /* 1MB buffer for each of the inner iterators */\n                128,\n            ),\n            dense_ks,\n            sparse_ks,\n        )\n        .context(\"failed to create filter iterator\")\n        .map_err(CompactionError::Other)?;\n\n        let time_download_layer = timer.elapsed();\n        let mut timer = Instant::now();\n\n        // Step 2: Produce images+deltas.\n        let mut accumulated_values = Vec::new();\n        let mut accumulated_values_estimated_size = 0;\n        let mut last_key: Option<Key> = None;\n\n        // Only create image layers when there is no ancestor branches. TODO: create covering image layer\n        // when some condition meet.\n        let mut image_layer_writer = if !has_data_below {\n            Some(SplitImageLayerWriter::new(\n                self.conf,\n                self.timeline_id,\n                self.tenant_shard_id,\n                job_desc.compaction_key_range.start,\n                lowest_retain_lsn,\n                self.get_compaction_target_size(),\n                &self.gate,\n                self.cancel.clone(),\n            ))\n        } else {\n            None\n        };\n\n        let mut delta_layer_writer = SplitDeltaLayerWriter::new(\n            self.conf,\n            self.timeline_id,\n            self.tenant_shard_id,\n            lowest_retain_lsn..end_lsn,\n            self.get_compaction_target_size(),\n            &self.gate,\n            self.cancel.clone(),\n        );\n\n        #[derive(Default)]\n        struct RewritingLayers {\n            before: Option<DeltaLayerWriter>,\n            after: Option<DeltaLayerWriter>,\n        }\n        let mut delta_layer_rewriters = HashMap::<Arc<PersistentLayerKey>, RewritingLayers>::new();\n\n        /// When compacting not at a bottom range (=`[0,X)`) of the root branch, we \"have data below\" (`has_data_below=true`).\n        /// The two cases are compaction in ancestor branches and when `compact_lsn_range.start` is set.\n        /// In those cases, we need to pull up data from below the LSN range we're compaction.\n        ///\n        /// This function unifies the cases so that later code doesn't have to think about it.\n        ///\n        /// Currently, we always get the ancestor image for each key in the child branch no matter whether the image\n        /// is needed for reconstruction. This should be fixed in the future.\n        ///\n        /// Furthermore, we should do vectored get instead of a single get, or better, use k-merge for ancestor\n        /// images.\n        async fn get_ancestor_image(\n            this_tline: &Arc<Timeline>,\n            key: Key,\n            ctx: &RequestContext,\n            has_data_below: bool,\n            history_lsn_point: Lsn,\n        ) -> anyhow::Result<Option<(Key, Lsn, Bytes)>> {\n            if !has_data_below {\n                return Ok(None);\n            };\n            // This function is implemented as a get of the current timeline at ancestor LSN, therefore reusing\n            // as much existing code as possible.\n            let img = this_tline.get(key, history_lsn_point, ctx).await?;\n            Ok(Some((key, history_lsn_point, img)))\n        }\n\n        // Actually, we can decide not to write to the image layer at all at this point because\n        // the key and LSN range are determined. However, to keep things simple here, we still\n        // create this writer, and discard the writer in the end.\n        let mut time_to_first_kv_pair = None;\n\n        while let Some(((key, lsn, val), desc)) = merge_iter\n            .next_with_trace()\n            .await\n            .context(\"failed to get next key-value pair\")\n            .map_err(CompactionError::Other)?\n        {\n            if time_to_first_kv_pair.is_none() {\n                time_to_first_kv_pair = Some(timer.elapsed());\n                timer = Instant::now();\n            }\n\n            if cancel.is_cancelled() {\n                return Err(CompactionError::new_cancelled());\n            }\n\n            let should_yield = yield_for_l0\n                && self\n                    .l0_compaction_trigger\n                    .notified()\n                    .now_or_never()\n                    .is_some();\n            if should_yield {\n                tracing::info!(\"preempt gc-compaction in the main loop: too many L0 layers\");\n                return Ok(CompactionOutcome::YieldForL0);\n            }\n            if self.shard_identity.is_key_disposable(&key) {\n                // If this shard does not need to store this key, simply skip it.\n                //\n                // This is not handled in the filter iterator because shard is determined by hash.\n                // Therefore, it does not give us any performance benefit to do things like skip\n                // a whole layer file as handling key spaces (ranges).\n                if cfg!(debug_assertions) {\n                    let shard = self.shard_identity.shard_index();\n                    let owner = self.shard_identity.get_shard_number(&key);\n                    panic!(\"key {key} does not belong on shard {shard}, owned by {owner}\");\n                }\n                continue;\n            }\n            if !job_desc.compaction_key_range.contains(&key) {\n                if !desc.is_delta {\n                    continue;\n                }\n                let rewriter = delta_layer_rewriters.entry(desc.clone()).or_default();\n                let rewriter = if key < job_desc.compaction_key_range.start {\n                    if rewriter.before.is_none() {\n                        rewriter.before = Some(\n                            DeltaLayerWriter::new(\n                                self.conf,\n                                self.timeline_id,\n                                self.tenant_shard_id,\n                                desc.key_range.start,\n                                desc.lsn_range.clone(),\n                                &self.gate,\n                                self.cancel.clone(),\n                                ctx,\n                            )\n                            .await\n                            .context(\"failed to create delta layer writer\")\n                            .map_err(CompactionError::Other)?,\n                        );\n                    }\n                    rewriter.before.as_mut().unwrap()\n                } else if key >= job_desc.compaction_key_range.end {\n                    if rewriter.after.is_none() {\n                        rewriter.after = Some(\n                            DeltaLayerWriter::new(\n                                self.conf,\n                                self.timeline_id,\n                                self.tenant_shard_id,\n                                job_desc.compaction_key_range.end,\n                                desc.lsn_range.clone(),\n                                &self.gate,\n                                self.cancel.clone(),\n                                ctx,\n                            )\n                            .await\n                            .context(\"failed to create delta layer writer\")\n                            .map_err(CompactionError::Other)?,\n                        );\n                    }\n                    rewriter.after.as_mut().unwrap()\n                } else {\n                    unreachable!()\n                };\n                rewriter\n                    .put_value(key, lsn, val, ctx)\n                    .await\n                    .context(\"failed to put value\")\n                    .map_err(CompactionError::Other)?;\n                continue;\n            }\n            match val {\n                Value::Image(_) => stat.visit_image_key(&val),\n                Value::WalRecord(_) => stat.visit_wal_key(&val),\n            }\n            if last_key.is_none() || last_key.as_ref() == Some(&key) {\n                if last_key.is_none() {\n                    last_key = Some(key);\n                }\n                accumulated_values_estimated_size += val.estimated_size();\n                accumulated_values.push((key, lsn, val));\n\n                // Accumulated values should never exceed 512MB.\n                if accumulated_values_estimated_size >= 1024 * 1024 * 512 {\n                    return Err(CompactionError::Other(anyhow!(\n                        \"too many values for a single key: {} for key {}, {} items\",\n                        accumulated_values_estimated_size,\n                        key,\n                        accumulated_values.len()\n                    )));\n                }\n            } else {\n                let last_key: &mut Key = last_key.as_mut().unwrap();\n                stat.on_unique_key_visited(); // TODO: adjust statistics for partial compaction\n                let retention = self\n                    .generate_key_retention(\n                        *last_key,\n                        &accumulated_values,\n                        job_desc.gc_cutoff,\n                        &job_desc.retain_lsns_below_horizon,\n                        COMPACTION_DELTA_THRESHOLD,\n                        get_ancestor_image(self, *last_key, ctx, has_data_below, lowest_retain_lsn)\n                            .await\n                            .context(\"failed to get ancestor image\")\n                            .map_err(CompactionError::Other)?,\n                        verification,\n                    )\n                    .await\n                    .context(\"failed to generate key retention\")\n                    .map_err(CompactionError::Other)?;\n                retention\n                    .pipe_to(\n                        *last_key,\n                        &mut delta_layer_writer,\n                        image_layer_writer.as_mut(),\n                        &mut stat,\n                        ctx,\n                    )\n                    .await\n                    .context(\"failed to pipe to delta layer writer\")\n                    .map_err(CompactionError::Other)?;\n                accumulated_values.clear();\n                *last_key = key;\n                accumulated_values_estimated_size = val.estimated_size();\n                accumulated_values.push((key, lsn, val));\n            }\n        }\n\n        // TODO: move the below part to the loop body\n        let Some(last_key) = last_key else {\n            return Err(CompactionError::Other(anyhow!(\n                \"no keys produced during compaction\"\n            )));\n        };\n        stat.on_unique_key_visited();\n\n        let retention = self\n            .generate_key_retention(\n                last_key,\n                &accumulated_values,\n                job_desc.gc_cutoff,\n                &job_desc.retain_lsns_below_horizon,\n                COMPACTION_DELTA_THRESHOLD,\n                get_ancestor_image(self, last_key, ctx, has_data_below, lowest_retain_lsn)\n                    .await\n                    .context(\"failed to get ancestor image\")\n                    .map_err(CompactionError::Other)?,\n                verification,\n            )\n            .await\n            .context(\"failed to generate key retention\")\n            .map_err(CompactionError::Other)?;\n        retention\n            .pipe_to(\n                last_key,\n                &mut delta_layer_writer,\n                image_layer_writer.as_mut(),\n                &mut stat,\n                ctx,\n            )\n            .await\n            .context(\"failed to pipe to delta layer writer\")\n            .map_err(CompactionError::Other)?;\n        // end: move the above part to the loop body\n\n        let time_main_loop = timer.elapsed();\n        let timer = Instant::now();\n\n        let mut rewrote_delta_layers = Vec::new();\n        for (key, writers) in delta_layer_rewriters {\n            if let Some(delta_writer_before) = writers.before {\n                let (desc, path) = delta_writer_before\n                    .finish(job_desc.compaction_key_range.start, ctx)\n                    .await\n                    .context(\"failed to finish delta layer writer\")\n                    .map_err(CompactionError::Other)?;\n                let layer = Layer::finish_creating(self.conf, self, desc, &path)\n                    .context(\"failed to finish creating delta layer\")\n                    .map_err(CompactionError::Other)?;\n                rewrote_delta_layers.push(layer);\n            }\n            if let Some(delta_writer_after) = writers.after {\n                let (desc, path) = delta_writer_after\n                    .finish(key.key_range.end, ctx)\n                    .await\n                    .context(\"failed to finish delta layer writer\")\n                    .map_err(CompactionError::Other)?;\n                let layer = Layer::finish_creating(self.conf, self, desc, &path)\n                    .context(\"failed to finish creating delta layer\")\n                    .map_err(CompactionError::Other)?;\n                rewrote_delta_layers.push(layer);\n            }\n        }\n\n        let discard = |key: &PersistentLayerKey| {\n            let key = key.clone();\n            async move { KeyHistoryRetention::discard_key(&key, self, dry_run).await }\n        };\n\n        let produced_image_layers = if let Some(writer) = image_layer_writer {\n            if !dry_run {\n                let end_key = job_desc.compaction_key_range.end;\n                writer\n                    .finish_with_discard_fn(self, ctx, end_key, discard)\n                    .await\n                    .context(\"failed to finish image layer writer\")\n                    .map_err(CompactionError::Other)?\n            } else {\n                drop(writer);\n                Vec::new()\n            }\n        } else {\n            Vec::new()\n        };\n\n        let produced_delta_layers = if !dry_run {\n            delta_layer_writer\n                .finish_with_discard_fn(self, ctx, discard)\n                .await\n                .context(\"failed to finish delta layer writer\")\n                .map_err(CompactionError::Other)?\n        } else {\n            drop(delta_layer_writer);\n            Vec::new()\n        };\n\n        // TODO: make image/delta/rewrote_delta layers generation atomic. At this point, we already generated resident layers, and if\n        // compaction is cancelled at this point, we might have some layers that are not cleaned up.\n        let mut compact_to = Vec::new();\n        let mut keep_layers = HashSet::new();\n        let produced_delta_layers_len = produced_delta_layers.len();\n        let produced_image_layers_len = produced_image_layers.len();\n\n        let layer_selection_by_key = job_desc\n            .selected_layers\n            .iter()\n            .map(|l| (l.layer_desc().key(), l.layer_desc().clone()))\n            .collect::<HashMap<_, _>>();\n\n        for action in produced_delta_layers {\n            match action {\n                BatchWriterResult::Produced(layer) => {\n                    if cfg!(debug_assertions) {\n                        info!(\"produced delta layer: {}\", layer.layer_desc().key());\n                    }\n                    stat.produce_delta_layer(layer.layer_desc().file_size());\n                    compact_to.push(layer);\n                }\n                BatchWriterResult::Discarded(l) => {\n                    if cfg!(debug_assertions) {\n                        info!(\"discarded delta layer: {}\", l);\n                    }\n                    if let Some(layer_desc) = layer_selection_by_key.get(&l) {\n                        stat.discard_delta_layer(layer_desc.file_size());\n                    } else {\n                        tracing::warn!(\n                            \"discarded delta layer not in layer_selection: {}, produced a layer outside of the compaction key range?\",\n                            l\n                        );\n                        stat.discard_delta_layer(0);\n                    }\n                    keep_layers.insert(l);\n                }\n            }\n        }\n        for layer in &rewrote_delta_layers {\n            debug!(\n                \"produced rewritten delta layer: {}\",\n                layer.layer_desc().key()\n            );\n            // For now, we include rewritten delta layer size in the \"produce_delta_layer\". We could\n            // make it a separate statistics in the future.\n            stat.produce_delta_layer(layer.layer_desc().file_size());\n        }\n        compact_to.extend(rewrote_delta_layers);\n        for action in produced_image_layers {\n            match action {\n                BatchWriterResult::Produced(layer) => {\n                    debug!(\"produced image layer: {}\", layer.layer_desc().key());\n                    stat.produce_image_layer(layer.layer_desc().file_size());\n                    compact_to.push(layer);\n                }\n                BatchWriterResult::Discarded(l) => {\n                    debug!(\"discarded image layer: {}\", l);\n                    if let Some(layer_desc) = layer_selection_by_key.get(&l) {\n                        stat.discard_image_layer(layer_desc.file_size());\n                    } else {\n                        tracing::warn!(\n                            \"discarded image layer not in layer_selection: {}, produced a layer outside of the compaction key range?\",\n                            l\n                        );\n                        stat.discard_image_layer(0);\n                    }\n                    keep_layers.insert(l);\n                }\n            }\n        }\n\n        let mut layer_selection = job_desc.selected_layers;\n\n        // Partial compaction might select more data than it processes, e.g., if\n        // the compaction_key_range only partially overlaps:\n        //\n        //         [---compaction_key_range---]\n        //   [---A----][----B----][----C----][----D----]\n        //\n        // For delta layers, we will rewrite the layers so that it is cut exactly at\n        // the compaction key range, so we can always discard them. However, for image\n        // layers, as we do not rewrite them for now, we need to handle them differently.\n        // Assume image layers  A, B, C, D are all in the `layer_selection`.\n        //\n        // The created image layers contain whatever is needed from B, C, and from\n        // `----]` of A, and from  `[---` of D.\n        //\n        // In contrast, `[---A` and `D----]` have not been processed, so, we must\n        // keep that data.\n        //\n        // The solution for now is to keep A and D completely if they are image layers.\n        // (layer_selection is what we'll remove from the layer map, so, retain what\n        // is _not_ fully covered by compaction_key_range).\n        for layer in &layer_selection {\n            if !layer.layer_desc().is_delta() {\n                if !overlaps_with(\n                    &layer.layer_desc().key_range,\n                    &job_desc.compaction_key_range,\n                ) {\n                    return Err(CompactionError::Other(anyhow!(\n                        \"violated constraint: image layer outside of compaction key range\"\n                    )));\n                }\n                if !fully_contains(\n                    &job_desc.compaction_key_range,\n                    &layer.layer_desc().key_range,\n                ) {\n                    keep_layers.insert(layer.layer_desc().key());\n                }\n            }\n        }\n\n        layer_selection.retain(|x| !keep_layers.contains(&x.layer_desc().key()));\n\n        let time_final_phase = timer.elapsed();\n\n        stat.time_final_phase_secs = time_final_phase.as_secs_f64();\n        stat.time_to_first_kv_pair_secs = time_to_first_kv_pair\n            .unwrap_or(Duration::ZERO)\n            .as_secs_f64();\n        stat.time_main_loop_secs = time_main_loop.as_secs_f64();\n        stat.time_acquire_lock_secs = time_acquire_lock.as_secs_f64();\n        stat.time_download_layer_secs = time_download_layer.as_secs_f64();\n        stat.time_analyze_secs = time_analyze.as_secs_f64();\n        stat.time_total_secs = begin_timer.elapsed().as_secs_f64();\n        stat.finalize();\n\n        info!(\n            \"gc-compaction statistics: {}\",\n            serde_json::to_string(&stat)\n                .context(\"failed to serialize gc-compaction statistics\")\n                .map_err(CompactionError::Other)?\n        );\n\n        if dry_run {\n            return Ok(CompactionOutcome::Done);\n        }\n\n        info!(\n            \"produced {} delta layers and {} image layers, {} layers are kept\",\n            produced_delta_layers_len,\n            produced_image_layers_len,\n            keep_layers.len()\n        );\n\n        // Step 3: Place back to the layer map.\n\n        // First, do a sanity check to ensure the newly-created layer map does not contain overlaps.\n        let all_layers = {\n            let guard = self\n                .layers\n                .read(LayerManagerLockHolder::GarbageCollection)\n                .await;\n            let layer_map = guard.layer_map()?;\n            layer_map.iter_historic_layers().collect_vec()\n        };\n\n        let mut final_layers = all_layers\n            .iter()\n            .map(|layer| layer.layer_name())\n            .collect::<HashSet<_>>();\n        for layer in &layer_selection {\n            final_layers.remove(&layer.layer_desc().layer_name());\n        }\n        for layer in &compact_to {\n            final_layers.insert(layer.layer_desc().layer_name());\n        }\n        let final_layers = final_layers.into_iter().collect_vec();\n\n        // TODO: move this check before we call `finish` on image layer writers. However, this will require us to get the layer name before we finish\n        // the writer, so potentially, we will need a function like `ImageLayerBatchWriter::get_all_pending_layer_keys` to get all the keys that are\n        // in the writer before finalizing the persistent layers. Now we would leave some dangling layers on the disk if the check fails.\n        if let Some(err) = check_valid_layermap(&final_layers) {\n            return Err(CompactionError::Other(anyhow!(\n                \"gc-compaction layer map check failed after compaction because {}, compaction result not applied to the layer map due to potential data loss\",\n                err\n            )));\n        }\n\n        // Between the sanity check and this compaction update, there could be new layers being flushed, but it should be fine because we only\n        // operate on L1 layers.\n        {\n            // Gc-compaction will rewrite the history of a key. This could happen in two ways:\n            //\n            // 1. We create an image layer to replace all the deltas below the compact LSN. In this case, assume\n            // we have 2 delta layers A and B, both below the compact LSN. We create an image layer I to replace\n            // A and B at the compact LSN. If the read path finishes reading A, yields, and now we update the layer\n            // map, the read path then cannot find any keys below A, reporting a missing key error, while the key\n            // now gets stored in I at the compact LSN.\n            //\n            // ---------------                                       ---------------\n            //   delta1@LSN20                                         image1@LSN20\n            // ---------------  (read path collects delta@LSN20,  => ---------------  (read path cannot find anything\n            //   delta1@LSN10    yields)                                               below LSN 20)\n            // ---------------\n            //\n            // 2. We create a delta layer to replace all the deltas below the compact LSN, and in the delta layers,\n            // we combines the history of a key into a single image. For example, we have deltas at LSN 1, 2, 3, 4,\n            // Assume one delta layer contains LSN 1, 2, 3 and the other contains LSN 4.\n            //\n            // We let gc-compaction combine delta 2, 3, 4 into an image at LSN 4, which produces a delta layer that\n            // contains the delta at LSN 1, the image at LSN 4. If the read path finishes reading the original delta\n            // layer containing 4, yields, and we update the layer map to put the delta layer.\n            //\n            // ---------------                                      ---------------\n            //   delta1@LSN4                                          image1@LSN4\n            // ---------------  (read path collects delta@LSN4,  => ---------------  (read path collects LSN4 and LSN1,\n            //  delta1@LSN1-3    yields)                              delta1@LSN1     which is an invalid history)\n            // ---------------                                      ---------------\n            //\n            // Therefore, the gc-compaction layer update operation should wait for all ongoing reads, block all pending reads,\n            // and only allow reads to continue after the update is finished.\n\n            let update_guard = self.gc_compaction_layer_update_lock.write().await;\n            // Acquiring the update guard ensures current read operations end and new read operations are blocked.\n            // TODO: can we use `latest_gc_cutoff` Rcu to achieve the same effect?\n            let mut guard = self\n                .layers\n                .write(LayerManagerLockHolder::GarbageCollection)\n                .await;\n            guard\n                .open_mut()?\n                .finish_gc_compaction(&layer_selection, &compact_to, &self.metrics);\n            drop(update_guard); // Allow new reads to start ONLY after we finished updating the layer map.\n        };\n\n        // Schedule an index-only upload to update the `latest_gc_cutoff` in the index_part.json.\n        // Otherwise, after restart, the index_part only contains the old `latest_gc_cutoff` and\n        // find_gc_cutoffs will try accessing things below the cutoff. TODO: ideally, this should\n        // be batched into `schedule_compaction_update`.\n        let disk_consistent_lsn = self.disk_consistent_lsn.load();\n        self.schedule_uploads(disk_consistent_lsn, None)\n            .context(\"failed to schedule uploads\")\n            .map_err(CompactionError::Other)?;\n        // If a layer gets rewritten throughout gc-compaction, we need to keep that layer only in `compact_to` instead\n        // of `compact_from`.\n        let compact_from = {\n            let mut compact_from = Vec::new();\n            let mut compact_to_set = HashMap::new();\n            for layer in &compact_to {\n                compact_to_set.insert(layer.layer_desc().key(), layer);\n            }\n            for layer in &layer_selection {\n                if let Some(to) = compact_to_set.get(&layer.layer_desc().key()) {\n                    tracing::info!(\n                        \"skipping delete {} because found same layer key at different generation {}\",\n                        layer,\n                        to\n                    );\n                } else {\n                    compact_from.push(layer.clone());\n                }\n            }\n            compact_from\n        };\n        self.remote_client\n            .schedule_compaction_update(&compact_from, &compact_to)?;\n\n        drop(gc_lock);\n\n        Ok(CompactionOutcome::Done)\n    }\n}\n\nstruct TimelineAdaptor {\n    timeline: Arc<Timeline>,\n\n    keyspace: (Lsn, KeySpace),\n\n    new_deltas: Vec<ResidentLayer>,\n    new_images: Vec<ResidentLayer>,\n    layers_to_delete: Vec<Arc<PersistentLayerDesc>>,\n}\n\nimpl TimelineAdaptor {\n    pub fn new(timeline: &Arc<Timeline>, keyspace: (Lsn, KeySpace)) -> Self {\n        Self {\n            timeline: timeline.clone(),\n            keyspace,\n            new_images: Vec::new(),\n            new_deltas: Vec::new(),\n            layers_to_delete: Vec::new(),\n        }\n    }\n\n    pub async fn flush_updates(&mut self) -> Result<(), CompactionError> {\n        let layers_to_delete = {\n            let guard = self\n                .timeline\n                .layers\n                .read(LayerManagerLockHolder::Compaction)\n                .await;\n            self.layers_to_delete\n                .iter()\n                .map(|x| guard.get_from_desc(x))\n                .collect::<Vec<Layer>>()\n        };\n        self.timeline\n            .finish_compact_batch(&self.new_deltas, &self.new_images, &layers_to_delete)\n            .await?;\n\n        self.timeline\n            .upload_new_image_layers(std::mem::take(&mut self.new_images))?;\n\n        self.new_deltas.clear();\n        self.layers_to_delete.clear();\n        Ok(())\n    }\n}\n\n#[derive(Clone)]\nstruct ResidentDeltaLayer(ResidentLayer);\n#[derive(Clone)]\nstruct ResidentImageLayer(ResidentLayer);\n\nimpl CompactionJobExecutor for TimelineAdaptor {\n    type Key = pageserver_api::key::Key;\n\n    type Layer = OwnArc<PersistentLayerDesc>;\n    type DeltaLayer = ResidentDeltaLayer;\n    type ImageLayer = ResidentImageLayer;\n\n    type RequestContext = crate::context::RequestContext;\n\n    fn get_shard_identity(&self) -> &ShardIdentity {\n        self.timeline.get_shard_identity()\n    }\n\n    async fn get_layers(\n        &mut self,\n        key_range: &Range<Key>,\n        lsn_range: &Range<Lsn>,\n        _ctx: &RequestContext,\n    ) -> anyhow::Result<Vec<OwnArc<PersistentLayerDesc>>> {\n        self.flush_updates().await?;\n\n        let guard = self\n            .timeline\n            .layers\n            .read(LayerManagerLockHolder::Compaction)\n            .await;\n        let layer_map = guard.layer_map()?;\n\n        let result = layer_map\n            .iter_historic_layers()\n            .filter(|l| {\n                overlaps_with(&l.lsn_range, lsn_range) && overlaps_with(&l.key_range, key_range)\n            })\n            .map(OwnArc)\n            .collect();\n        Ok(result)\n    }\n\n    async fn get_keyspace(\n        &mut self,\n        key_range: &Range<Key>,\n        lsn: Lsn,\n        _ctx: &RequestContext,\n    ) -> anyhow::Result<Vec<Range<Key>>> {\n        if lsn == self.keyspace.0 {\n            Ok(pageserver_compaction::helpers::intersect_keyspace(\n                &self.keyspace.1.ranges,\n                key_range,\n            ))\n        } else {\n            // The current compaction implementation only ever requests the key space\n            // at the compaction end LSN.\n            anyhow::bail!(\"keyspace not available for requested lsn\");\n        }\n    }\n\n    async fn downcast_delta_layer(\n        &self,\n        layer: &OwnArc<PersistentLayerDesc>,\n        ctx: &RequestContext,\n    ) -> anyhow::Result<Option<ResidentDeltaLayer>> {\n        // this is a lot more complex than a simple downcast...\n        if layer.is_delta() {\n            let l = {\n                let guard = self\n                    .timeline\n                    .layers\n                    .read(LayerManagerLockHolder::Compaction)\n                    .await;\n                guard.get_from_desc(layer)\n            };\n            let result = l.download_and_keep_resident(ctx).await?;\n\n            Ok(Some(ResidentDeltaLayer(result)))\n        } else {\n            Ok(None)\n        }\n    }\n\n    async fn create_image(\n        &mut self,\n        lsn: Lsn,\n        key_range: &Range<Key>,\n        ctx: &RequestContext,\n    ) -> anyhow::Result<()> {\n        Ok(self.create_image_impl(lsn, key_range, ctx).await?)\n    }\n\n    async fn create_delta(\n        &mut self,\n        lsn_range: &Range<Lsn>,\n        key_range: &Range<Key>,\n        input_layers: &[ResidentDeltaLayer],\n        ctx: &RequestContext,\n    ) -> anyhow::Result<()> {\n        debug!(\"Create new layer {}..{}\", lsn_range.start, lsn_range.end);\n\n        let mut all_entries = Vec::new();\n        for dl in input_layers.iter() {\n            all_entries.extend(dl.load_keys(ctx).await?);\n        }\n\n        // The current stdlib sorting implementation is designed in a way where it is\n        // particularly fast where the slice is made up of sorted sub-ranges.\n        all_entries.sort_by_key(|DeltaEntry { key, lsn, .. }| (*key, *lsn));\n\n        let mut writer = DeltaLayerWriter::new(\n            self.timeline.conf,\n            self.timeline.timeline_id,\n            self.timeline.tenant_shard_id,\n            key_range.start,\n            lsn_range.clone(),\n            &self.timeline.gate,\n            self.timeline.cancel.clone(),\n            ctx,\n        )\n        .await?;\n\n        let mut dup_values = 0;\n\n        // This iterator walks through all key-value pairs from all the layers\n        // we're compacting, in key, LSN order.\n        let mut prev: Option<(Key, Lsn)> = None;\n        for &DeltaEntry {\n            key, lsn, ref val, ..\n        } in all_entries.iter()\n        {\n            if prev == Some((key, lsn)) {\n                // This is a duplicate. Skip it.\n                //\n                // It can happen if compaction is interrupted after writing some\n                // layers but not all, and we are compacting the range again.\n                // The calculations in the algorithm assume that there are no\n                // duplicates, so the math on targeted file size is likely off,\n                // and we will create smaller files than expected.\n                dup_values += 1;\n                continue;\n            }\n\n            let value = val.load(ctx).await?;\n\n            writer.put_value(key, lsn, value, ctx).await?;\n\n            prev = Some((key, lsn));\n        }\n\n        if dup_values > 0 {\n            warn!(\"delta layer created with {} duplicate values\", dup_values);\n        }\n\n        fail_point!(\"delta-layer-writer-fail-before-finish\", |_| {\n            Err(anyhow::anyhow!(\n                \"failpoint delta-layer-writer-fail-before-finish\"\n            ))\n        });\n\n        let (desc, path) = writer.finish(prev.unwrap().0.next(), ctx).await?;\n        let new_delta_layer =\n            Layer::finish_creating(self.timeline.conf, &self.timeline, desc, &path)?;\n\n        self.new_deltas.push(new_delta_layer);\n        Ok(())\n    }\n\n    async fn delete_layer(\n        &mut self,\n        layer: &OwnArc<PersistentLayerDesc>,\n        _ctx: &RequestContext,\n    ) -> anyhow::Result<()> {\n        self.layers_to_delete.push(layer.clone().0);\n        Ok(())\n    }\n}\n\nimpl TimelineAdaptor {\n    async fn create_image_impl(\n        &mut self,\n        lsn: Lsn,\n        key_range: &Range<Key>,\n        ctx: &RequestContext,\n    ) -> Result<(), CreateImageLayersError> {\n        let timer = self.timeline.metrics.create_images_time_histo.start_timer();\n\n        let image_layer_writer = ImageLayerWriter::new(\n            self.timeline.conf,\n            self.timeline.timeline_id,\n            self.timeline.tenant_shard_id,\n            key_range,\n            lsn,\n            &self.timeline.gate,\n            self.timeline.cancel.clone(),\n            ctx,\n        )\n        .await\n        .map_err(CreateImageLayersError::Other)?;\n\n        fail_point!(\"image-layer-writer-fail-before-finish\", |_| {\n            Err(CreateImageLayersError::Other(anyhow::anyhow!(\n                \"failpoint image-layer-writer-fail-before-finish\"\n            )))\n        });\n\n        let keyspace = KeySpace {\n            ranges: self\n                .get_keyspace(key_range, lsn, ctx)\n                .await\n                .map_err(CreateImageLayersError::Other)?,\n        };\n        // TODO set proper (stateful) start. The create_image_layer_for_rel_blocks function mostly\n        let outcome = self\n            .timeline\n            .create_image_layer_for_rel_blocks(\n                &keyspace,\n                image_layer_writer,\n                lsn,\n                ctx,\n                key_range.clone(),\n                IoConcurrency::sequential(),\n                None,\n            )\n            .await?;\n\n        if let ImageLayerCreationOutcome::Generated {\n            unfinished_image_layer,\n        } = outcome\n        {\n            let (desc, path) = unfinished_image_layer\n                .finish(ctx)\n                .await\n                .map_err(CreateImageLayersError::Other)?;\n            let image_layer =\n                Layer::finish_creating(self.timeline.conf, &self.timeline, desc, &path)\n                    .map_err(CreateImageLayersError::Other)?;\n            self.new_images.push(image_layer);\n        }\n\n        timer.stop_and_record();\n\n        Ok(())\n    }\n}\n\nimpl CompactionRequestContext for crate::context::RequestContext {}\n\n#[derive(Debug, Clone)]\npub struct OwnArc<T>(pub Arc<T>);\n\nimpl<T> Deref for OwnArc<T> {\n    type Target = <Arc<T> as Deref>::Target;\n    fn deref(&self) -> &Self::Target {\n        &self.0\n    }\n}\n\nimpl<T> AsRef<T> for OwnArc<T> {\n    fn as_ref(&self) -> &T {\n        self.0.as_ref()\n    }\n}\n\nimpl CompactionLayer<Key> for OwnArc<PersistentLayerDesc> {\n    fn key_range(&self) -> &Range<Key> {\n        &self.key_range\n    }\n    fn lsn_range(&self) -> &Range<Lsn> {\n        &self.lsn_range\n    }\n    fn file_size(&self) -> u64 {\n        self.file_size\n    }\n    fn short_id(&self) -> std::string::String {\n        self.as_ref().short_id().to_string()\n    }\n    fn is_delta(&self) -> bool {\n        self.as_ref().is_delta()\n    }\n}\n\nimpl CompactionLayer<Key> for OwnArc<DeltaLayer> {\n    fn key_range(&self) -> &Range<Key> {\n        &self.layer_desc().key_range\n    }\n    fn lsn_range(&self) -> &Range<Lsn> {\n        &self.layer_desc().lsn_range\n    }\n    fn file_size(&self) -> u64 {\n        self.layer_desc().file_size\n    }\n    fn short_id(&self) -> std::string::String {\n        self.layer_desc().short_id().to_string()\n    }\n    fn is_delta(&self) -> bool {\n        true\n    }\n}\n\nimpl CompactionLayer<Key> for ResidentDeltaLayer {\n    fn key_range(&self) -> &Range<Key> {\n        &self.0.layer_desc().key_range\n    }\n    fn lsn_range(&self) -> &Range<Lsn> {\n        &self.0.layer_desc().lsn_range\n    }\n    fn file_size(&self) -> u64 {\n        self.0.layer_desc().file_size\n    }\n    fn short_id(&self) -> std::string::String {\n        self.0.layer_desc().short_id().to_string()\n    }\n    fn is_delta(&self) -> bool {\n        true\n    }\n}\n\nimpl CompactionDeltaLayer<TimelineAdaptor> for ResidentDeltaLayer {\n    type DeltaEntry<'a> = DeltaEntry<'a>;\n\n    async fn load_keys(&self, ctx: &RequestContext) -> anyhow::Result<Vec<DeltaEntry<'_>>> {\n        self.0.get_as_delta(ctx).await?.index_entries(ctx).await\n    }\n}\n\nimpl CompactionLayer<Key> for ResidentImageLayer {\n    fn key_range(&self) -> &Range<Key> {\n        &self.0.layer_desc().key_range\n    }\n    fn lsn_range(&self) -> &Range<Lsn> {\n        &self.0.layer_desc().lsn_range\n    }\n    fn file_size(&self) -> u64 {\n        self.0.layer_desc().file_size\n    }\n    fn short_id(&self) -> std::string::String {\n        self.0.layer_desc().short_id().to_string()\n    }\n    fn is_delta(&self) -> bool {\n        false\n    }\n}\nimpl CompactionImageLayer<TimelineAdaptor> for ResidentImageLayer {}\n"
  },
  {
    "path": "pageserver/src/tenant/timeline/delete.rs",
    "content": "use std::ops::{Deref, DerefMut};\nuse std::sync::Arc;\n\nuse anyhow::Context;\nuse pageserver_api::models::TimelineState;\nuse pageserver_api::shard::TenantShardId;\nuse remote_storage::DownloadError;\nuse tokio::sync::OwnedMutexGuard;\nuse tracing::{Instrument, error, info, info_span, instrument};\nuse utils::id::TimelineId;\nuse utils::{crashsafe, fs_ext, pausable_failpoint};\n\nuse crate::config::PageServerConf;\nuse crate::context::RequestContext;\nuse crate::task_mgr::{self, TaskKind};\nuse crate::tenant::metadata::TimelineMetadata;\nuse crate::tenant::remote_timeline_client::{\n    PersistIndexPartWithDeletedFlagError, RemoteTimelineClient,\n};\nuse crate::tenant::{\n    CreateTimelineCause, DeleteTimelineError, MaybeDeletedIndexPart, TenantManifestError,\n    TenantShard, Timeline, TimelineOrOffloaded,\n};\nuse crate::virtual_file::MaybeFatalIo;\n\n/// Mark timeline as deleted in S3 so we won't pick it up next time\n/// during attach or pageserver restart.\n/// See comment in persist_index_part_with_deleted_flag.\nasync fn set_deleted_in_remote_index(\n    remote_client: &Arc<RemoteTimelineClient>,\n) -> Result<(), DeleteTimelineError> {\n    let res = remote_client.persist_index_part_with_deleted_flag().await;\n    match res {\n        // If we (now, or already) marked it successfully as deleted, we can proceed\n        Ok(()) | Err(PersistIndexPartWithDeletedFlagError::AlreadyDeleted(_)) => (),\n        // Bail out otherwise\n        //\n        // AlreadyInProgress shouldn't happen, because the 'delete_lock' prevents\n        // two tasks from performing the deletion at the same time. The first task\n        // that starts deletion should run it to completion.\n        Err(e @ PersistIndexPartWithDeletedFlagError::AlreadyInProgress(_))\n        | Err(e @ PersistIndexPartWithDeletedFlagError::Other(_)) => {\n            return Err(DeleteTimelineError::Other(anyhow::anyhow!(e)));\n        }\n    }\n    Ok(())\n}\n\n/// Grab the compaction and gc locks, and actually perform the deletion.\n///\n/// The locks prevent GC or compaction from running at the same time. The background tasks do not\n/// register themselves with the timeline it's operating on, so it might still be running even\n/// though we called `shutdown_tasks`.\n///\n/// Note that there are still other race conditions between\n/// GC, compaction and timeline deletion. See\n/// <https://github.com/neondatabase/neon/issues/2671>\n///\n/// No timeout here, GC & Compaction should be responsive to the\n/// `TimelineState::Stopping` change.\n// pub(super): documentation link\npub(super) async fn delete_local_timeline_directory(\n    conf: &PageServerConf,\n    tenant_shard_id: TenantShardId,\n    timeline: &Timeline,\n) {\n    // Always ensure the lock order is compaction -> gc.\n    let compaction_lock = timeline.compaction_lock.lock();\n    let _compaction_lock = crate::timed(\n        compaction_lock,\n        \"acquires compaction lock\",\n        std::time::Duration::from_secs(5),\n    )\n    .await;\n\n    let gc_lock = timeline.gc_lock.lock();\n    let _gc_lock = crate::timed(\n        gc_lock,\n        \"acquires gc lock\",\n        std::time::Duration::from_secs(5),\n    )\n    .await;\n\n    // NB: storage_sync upload tasks that reference these layers have been cancelled\n    //     by the caller.\n\n    let local_timeline_directory = conf.timeline_path(&tenant_shard_id, &timeline.timeline_id);\n\n    // NB: This need not be atomic because the deleted flag in the IndexPart\n    // will be observed during tenant/timeline load. The deletion will be resumed there.\n    //\n    // ErrorKind::NotFound can happen e.g. if we race with tenant detach, because,\n    // no locks are shared.\n    tokio::fs::remove_dir_all(local_timeline_directory)\n        .await\n        .or_else(fs_ext::ignore_not_found)\n        .fatal_err(\"removing timeline directory\");\n\n    // Make sure previous deletions are ordered before mark removal.\n    // Otherwise there is no guarantee that they reach the disk before mark deletion.\n    // So its possible for mark to reach disk first and for other deletions\n    // to be reordered later and thus missed if a crash occurs.\n    // Note that we dont need to sync after mark file is removed\n    // because we can tolerate the case when mark file reappears on startup.\n    let timeline_path = conf.timelines_path(&tenant_shard_id);\n    crashsafe::fsync_async(timeline_path)\n        .await\n        .fatal_err(\"fsync after removing timeline directory\");\n\n    info!(\"finished deleting layer files, releasing locks\");\n}\n\n/// It is important that this gets called when DeletionGuard is being held.\n/// For more context see comments in [`make_timeline_delete_guard`]\nasync fn remove_maybe_offloaded_timeline_from_tenant(\n    tenant: &TenantShard,\n    timeline: &TimelineOrOffloaded,\n    _: &DeletionGuard, // using it as a witness\n) -> anyhow::Result<()> {\n    // Remove the timeline from the map.\n    // This observes the locking order between timelines and timelines_offloaded\n    let mut timelines = tenant.timelines.lock().unwrap();\n    let mut timelines_offloaded = tenant.timelines_offloaded.lock().unwrap();\n    let mut timelines_importing = tenant.timelines_importing.lock().unwrap();\n    let offloaded_children_exist = timelines_offloaded\n        .iter()\n        .any(|(_, entry)| entry.ancestor_timeline_id == Some(timeline.timeline_id()));\n    let children_exist = timelines\n        .iter()\n        .any(|(_, entry)| entry.get_ancestor_timeline_id() == Some(timeline.timeline_id()));\n    // XXX this can happen because of race conditions with branch creation.\n    // We already deleted the remote layer files, so it's probably best to panic.\n    if children_exist || offloaded_children_exist {\n        panic!(\"Timeline grew children while we removed layer files\");\n    }\n\n    match timeline {\n        TimelineOrOffloaded::Timeline(timeline) => {\n            timelines.remove(&timeline.timeline_id).expect(\n                \"timeline that we were deleting was concurrently removed from 'timelines' map\",\n            );\n            tenant\n                .scheduled_compaction_tasks\n                .lock()\n                .unwrap()\n                .remove(&timeline.timeline_id);\n        }\n        TimelineOrOffloaded::Offloaded(timeline) => {\n            let offloaded_timeline = timelines_offloaded\n                .remove(&timeline.timeline_id)\n                .expect(\"timeline that we were deleting was concurrently removed from 'timelines_offloaded' map\");\n            offloaded_timeline.delete_from_ancestor_with_timelines(&timelines);\n        }\n        TimelineOrOffloaded::Importing(importing) => {\n            timelines_importing.remove(&importing.timeline.timeline_id);\n        }\n    }\n\n    drop(timelines_importing);\n    drop(timelines_offloaded);\n    drop(timelines);\n\n    Ok(())\n}\n\n/// Orchestrates timeline shut down of all timeline tasks, removes its in-memory structures,\n/// and deletes its data from both disk and s3.\n/// The sequence of steps:\n/// 1. Set deleted_at in remote index part.\n/// 2. Create local mark file.\n/// 3. Delete local files except metadata (it is simpler this way, to be able to reuse timeline initialization code that expects metadata)\n/// 4. Delete remote layers\n/// 5. Delete index part\n/// 6. Delete meta, timeline directory\n/// 7. Delete mark file\n///\n/// It is resumable from any step in case a crash/restart occurs.\n/// There are two entrypoints to the process:\n/// 1. [`DeleteTimelineFlow::run`] this is the main one called by a management api handler.\n/// 2. [`DeleteTimelineFlow::resume_deletion`] is called during restarts when local metadata is still present\n///    and we possibly neeed to continue deletion of remote files.\n///\n/// Note the only other place that messes around timeline delete mark is the logic that scans directory with timelines during tenant load.\n#[derive(Default)]\npub enum DeleteTimelineFlow {\n    #[default]\n    NotStarted,\n    InProgress,\n    Finished,\n}\n\nimpl DeleteTimelineFlow {\n    // These steps are run in the context of management api request handler.\n    // Long running steps are continued to run in the background.\n    // NB: If this fails half-way through, and is retried, the retry will go through\n    // all the same steps again. Make sure the code here is idempotent, and don't\n    // error out if some of the shutdown tasks have already been completed!\n    #[instrument(skip_all)]\n    pub async fn run(\n        tenant: &Arc<TenantShard>,\n        timeline_id: TimelineId,\n    ) -> Result<(), DeleteTimelineError> {\n        super::debug_assert_current_span_has_tenant_and_timeline_id();\n\n        let (timeline, mut guard) =\n            make_timeline_delete_guard(tenant, timeline_id, TimelineDeleteGuardKind::Delete)?;\n\n        guard.mark_in_progress()?;\n\n        // Now that the Timeline is in Stopping state, request all the related tasks to shut down.\n        // TODO(vlad): shut down imported timeline here\n        match &timeline {\n            TimelineOrOffloaded::Timeline(timeline) => {\n                timeline.shutdown(super::ShutdownMode::Hard).await;\n            }\n            TimelineOrOffloaded::Importing(importing) => {\n                importing.shutdown().await;\n            }\n            TimelineOrOffloaded::Offloaded(_offloaded) => {\n                // Nothing to shut down in this case\n            }\n        }\n\n        tenant.gc_block.before_delete(&timeline.timeline_id());\n\n        fail::fail_point!(\"timeline-delete-before-index-deleted-at\", |_| {\n            Err(anyhow::anyhow!(\n                \"failpoint: timeline-delete-before-index-deleted-at\"\n            ))?\n        });\n\n        let remote_client = match timeline.maybe_remote_client() {\n            Some(remote_client) => remote_client,\n            None => {\n                let remote_client = tenant\n                    .build_timeline_client(timeline.timeline_id(), tenant.remote_storage.clone());\n                let result = match remote_client\n                    .download_index_file(&tenant.cancel)\n                    .instrument(info_span!(\"download_index_file\"))\n                    .await\n                {\n                    Ok(r) => r,\n                    Err(DownloadError::NotFound) => {\n                        // Deletion is already complete.\n                        // As we came here, we will need to remove the timeline from the tenant though.\n                        tracing::info!(\"Timeline already deleted in remote storage\");\n                        if let TimelineOrOffloaded::Offloaded(_) = &timeline {\n                            // We only supoprt this for offloaded timelines, as we don't know which state non-offloaded timelines are in.\n                            tracing::info!(\n                                \"Timeline with gone index part is offloaded timeline. Removing from tenant.\"\n                            );\n                            remove_maybe_offloaded_timeline_from_tenant(tenant, &timeline, &guard)\n                                .await?;\n                        }\n                        return Ok(());\n                    }\n                    Err(e) => {\n                        return Err(DeleteTimelineError::Other(anyhow::anyhow!(\n                            \"error: {:?}\",\n                            e\n                        )));\n                    }\n                };\n                let index_part = match result {\n                    MaybeDeletedIndexPart::Deleted(p) => {\n                        tracing::info!(\"Timeline already set as deleted in remote index\");\n                        p\n                    }\n                    MaybeDeletedIndexPart::IndexPart(p) => p,\n                };\n                let remote_client = Arc::new(remote_client);\n\n                remote_client\n                    .init_upload_queue(&index_part)\n                    .map_err(DeleteTimelineError::Other)?;\n                remote_client.shutdown().await;\n                remote_client\n            }\n        };\n        set_deleted_in_remote_index(&remote_client).await?;\n\n        fail::fail_point!(\"timeline-delete-before-schedule\", |_| {\n            Err(anyhow::anyhow!(\n                \"failpoint: timeline-delete-before-schedule\"\n            ))?\n        });\n\n        Self::schedule_background(\n            guard,\n            tenant.conf,\n            Arc::clone(tenant),\n            timeline,\n            remote_client,\n        );\n\n        Ok(())\n    }\n\n    fn mark_in_progress(&mut self) -> anyhow::Result<()> {\n        match self {\n            Self::Finished => anyhow::bail!(\"Bug. Is in finished state\"),\n            Self::InProgress { .. } => { /* We're in a retry */ }\n            Self::NotStarted => { /* Fresh start */ }\n        }\n\n        *self = Self::InProgress;\n\n        Ok(())\n    }\n\n    /// Shortcut to create Timeline in stopping state and spawn deletion task.\n    #[instrument(skip_all, fields(%timeline_id))]\n    pub(crate) async fn resume_deletion(\n        tenant: Arc<TenantShard>,\n        timeline_id: TimelineId,\n        local_metadata: &TimelineMetadata,\n        remote_client: RemoteTimelineClient,\n        ctx: &RequestContext,\n    ) -> anyhow::Result<()> {\n        // Note: here we even skip populating layer map. Timeline is essentially uninitialized.\n        // RemoteTimelineClient is the only functioning part.\n        let (timeline, _timeline_ctx) = tenant\n            .create_timeline_struct(\n                timeline_id,\n                local_metadata,\n                None, // Ancestor is not needed for deletion.\n                None, // Previous heatmap is not needed for deletion\n                tenant.get_timeline_resources_for(remote_client),\n                // Important. We dont pass ancestor above because it can be missing.\n                // Thus we need to skip the validation here.\n                CreateTimelineCause::Delete,\n                crate::tenant::CreateTimelineIdempotency::FailWithConflict, // doesn't matter what we put here\n                None, // doesn't matter what we put here\n                None, // doesn't matter what we put here\n                None, // doesn't matter what we put here\n                ctx,\n            )\n            .context(\"create_timeline_struct\")?;\n\n        let mut guard = DeletionGuard(\n            Arc::clone(&timeline.delete_progress)\n                .try_lock_owned()\n                .expect(\"cannot happen because we're the only owner\"),\n        );\n\n        // We meed to do this because when console retries delete request we shouldnt answer with 404\n        // because 404 means successful deletion.\n        {\n            let mut locked = tenant.timelines.lock().unwrap();\n            locked.insert(timeline_id, Arc::clone(&timeline));\n        }\n\n        guard.mark_in_progress()?;\n\n        let remote_client = timeline.remote_client.clone();\n        let timeline = TimelineOrOffloaded::Timeline(timeline);\n        Self::schedule_background(guard, tenant.conf, tenant, timeline, remote_client);\n\n        Ok(())\n    }\n\n    fn schedule_background(\n        guard: DeletionGuard,\n        conf: &'static PageServerConf,\n        tenant: Arc<TenantShard>,\n        timeline: TimelineOrOffloaded,\n        remote_client: Arc<RemoteTimelineClient>,\n    ) {\n        let tenant_shard_id = timeline.tenant_shard_id();\n        let timeline_id = timeline.timeline_id();\n\n        // Take a tenant gate guard, because timeline deletion needs access to the tenant to update its manifest.\n        let Ok(tenant_guard) = tenant.gate.enter() else {\n            // It is safe to simply skip here, because we only schedule background work once the timeline is durably marked for deletion.\n            info!(\"Tenant is shutting down, timeline deletion will be resumed when it next starts\");\n            return;\n        };\n\n        task_mgr::spawn(\n            task_mgr::BACKGROUND_RUNTIME.handle(),\n            TaskKind::TimelineDeletionWorker,\n            tenant_shard_id,\n            Some(timeline_id),\n            \"timeline_delete\",\n            async move {\n                let _guard = tenant_guard;\n\n                if let Err(err) = Self::background(guard, conf, &tenant, &timeline, remote_client).await {\n                    // Only log as an error if it's not a cancellation.\n                    if matches!(err, DeleteTimelineError::Cancelled) {\n                        info!(\"Shutdown during timeline deletion\");\n                    }else {\n                        error!(\"Error: {err:#}\");\n                    }\n                    if let TimelineOrOffloaded::Timeline(timeline) = timeline {\n                        timeline.set_broken(format!(\"{err:#}\"))\n                    }\n                };\n                Ok(())\n            }\n            .instrument(tracing::info_span!(parent: None, \"delete_timeline\", tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(),timeline_id=%timeline_id)),\n        );\n    }\n\n    async fn background(\n        mut guard: DeletionGuard,\n        conf: &PageServerConf,\n        tenant: &TenantShard,\n        timeline: &TimelineOrOffloaded,\n        remote_client: Arc<RemoteTimelineClient>,\n    ) -> Result<(), DeleteTimelineError> {\n        fail::fail_point!(\"timeline-delete-before-rm\", |_| {\n            Err(anyhow::anyhow!(\"failpoint: timeline-delete-before-rm\"))?\n        });\n\n        match timeline {\n            TimelineOrOffloaded::Timeline(timeline) => {\n                delete_local_timeline_directory(conf, tenant.tenant_shard_id, timeline).await;\n            }\n            TimelineOrOffloaded::Importing(importing) => {\n                delete_local_timeline_directory(conf, tenant.tenant_shard_id, &importing.timeline)\n                    .await;\n            }\n            TimelineOrOffloaded::Offloaded(_offloaded) => {\n                // Offloaded timelines have no local state\n                // TODO: once we persist offloaded information, delete the timeline from there, too\n            }\n        }\n\n        fail::fail_point!(\"timeline-delete-after-rm\", |_| {\n            Err(anyhow::anyhow!(\"failpoint: timeline-delete-after-rm\"))?\n        });\n\n        remote_client.delete_all().await?;\n\n        pausable_failpoint!(\"in_progress_delete\");\n\n        remove_maybe_offloaded_timeline_from_tenant(tenant, timeline, &guard).await?;\n\n        // This is susceptible to race conditions, i.e. we won't continue deletions if there is a crash\n        // between the deletion of the index-part.json and reaching of this code.\n        // So indeed, the tenant manifest might refer to an offloaded timeline which has already been deleted.\n        // However, we handle this case in tenant loading code so the next time we attach, the issue is\n        // resolved.\n        tenant\n            .maybe_upload_tenant_manifest()\n            .await\n            .map_err(|err| match err {\n                TenantManifestError::Cancelled => DeleteTimelineError::Cancelled,\n                err => DeleteTimelineError::Other(err.into()),\n            })?;\n\n        *guard = Self::Finished;\n\n        Ok(())\n    }\n\n    pub(crate) fn is_not_started(&self) -> bool {\n        matches!(self, Self::NotStarted)\n    }\n}\n\n#[derive(Copy, Clone, PartialEq, Eq)]\npub(super) enum TimelineDeleteGuardKind {\n    Offload,\n    Delete,\n}\n\npub(super) fn make_timeline_delete_guard(\n    tenant: &TenantShard,\n    timeline_id: TimelineId,\n    guard_kind: TimelineDeleteGuardKind,\n) -> Result<(TimelineOrOffloaded, DeletionGuard), DeleteTimelineError> {\n    // Note the interaction between this guard and deletion guard.\n    // Here we attempt to lock deletion guard when we're holding a lock on timelines.\n    // This is important because when you take into account `remove_timeline_from_tenant`\n    // we remove timeline from memory when we still hold the deletion guard.\n    // So here when timeline deletion is finished timeline wont be present in timelines map at all\n    // which makes the following sequence impossible:\n    // T1: get preempted right before the try_lock on `Timeline::delete_progress`\n    // T2: do a full deletion, acquire and drop `Timeline::delete_progress`\n    // T1: acquire deletion lock, do another `DeleteTimelineFlow::run`\n    // For more context see this discussion: `https://github.com/neondatabase/neon/pull/4552#discussion_r1253437346`\n    let timelines = tenant.timelines.lock().unwrap();\n    let timelines_offloaded = tenant.timelines_offloaded.lock().unwrap();\n    let timelines_importing = tenant.timelines_importing.lock().unwrap();\n\n    let timeline = match timelines.get(&timeline_id) {\n        Some(t) => TimelineOrOffloaded::Timeline(Arc::clone(t)),\n        None => match timelines_offloaded.get(&timeline_id) {\n            Some(t) => TimelineOrOffloaded::Offloaded(Arc::clone(t)),\n            None => match timelines_importing.get(&timeline_id) {\n                Some(t) => TimelineOrOffloaded::Importing(Arc::clone(t)),\n                None => return Err(DeleteTimelineError::NotFound),\n            },\n        },\n    };\n\n    // Ensure that there are no child timelines, because we are about to remove files,\n    // which will break child branches\n    let mut children = Vec::new();\n    if guard_kind == TimelineDeleteGuardKind::Delete {\n        children.extend(timelines_offloaded.iter().filter_map(|(id, entry)| {\n            (entry.ancestor_timeline_id == Some(timeline_id)).then_some(*id)\n        }));\n    }\n    children.extend(timelines.iter().filter_map(|(id, entry)| {\n        (entry.get_ancestor_timeline_id() == Some(timeline_id)).then_some(*id)\n    }));\n\n    if !children.is_empty() {\n        return Err(DeleteTimelineError::HasChildren(children));\n    }\n\n    // Note that using try_lock here is important to avoid a deadlock.\n    // Here we take lock on timelines and then the deletion guard.\n    // At the end of the operation we're holding the guard and need to lock timelines map\n    // to remove the timeline from it.\n    // Always if you have two locks that are taken in different order this can result in a deadlock.\n\n    let delete_progress = Arc::clone(timeline.delete_progress());\n    let delete_lock_guard = match delete_progress.try_lock_owned() {\n        Ok(guard) => DeletionGuard(guard),\n        Err(_) => {\n            // Unfortunately if lock fails arc is consumed.\n            return Err(DeleteTimelineError::AlreadyInProgress(Arc::clone(\n                timeline.delete_progress(),\n            )));\n        }\n    };\n\n    if guard_kind == TimelineDeleteGuardKind::Delete {\n        if let TimelineOrOffloaded::Timeline(timeline) = &timeline {\n            timeline.set_state(TimelineState::Stopping);\n        }\n    }\n\n    Ok((timeline, delete_lock_guard))\n}\n\npub(super) struct DeletionGuard(OwnedMutexGuard<DeleteTimelineFlow>);\n\nimpl Deref for DeletionGuard {\n    type Target = DeleteTimelineFlow;\n\n    fn deref(&self) -> &Self::Target {\n        &self.0\n    }\n}\n\nimpl DerefMut for DeletionGuard {\n    fn deref_mut(&mut self) -> &mut Self::Target {\n        &mut self.0\n    }\n}\n"
  },
  {
    "path": "pageserver/src/tenant/timeline/detach_ancestor.rs",
    "content": "use std::collections::HashSet;\nuse std::sync::Arc;\n\nuse anyhow::Context;\nuse bytes::Bytes;\nuse http_utils::error::ApiError;\nuse pageserver_api::key::Key;\nuse pageserver_api::keyspace::KeySpace;\nuse pageserver_api::models::DetachBehavior;\nuse pageserver_api::models::detach_ancestor::AncestorDetached;\nuse pageserver_api::shard::ShardIdentity;\nuse pageserver_compaction::helpers::overlaps_with;\nuse tokio::sync::Semaphore;\nuse tokio_util::sync::CancellationToken;\nuse tracing::Instrument;\nuse utils::completion;\nuse utils::generation::Generation;\nuse utils::id::TimelineId;\nuse utils::lsn::Lsn;\nuse utils::sync::gate::GateError;\n\nuse super::layer_manager::{LayerManager, LayerManagerLockHolder};\nuse super::{FlushLayerError, Timeline};\nuse crate::context::{DownloadBehavior, RequestContext};\nuse crate::task_mgr::TaskKind;\nuse crate::tenant::TenantShard;\nuse crate::tenant::remote_timeline_client::index::GcBlockingReason::DetachAncestor;\nuse crate::tenant::storage_layer::layer::local_layer_path;\nuse crate::tenant::storage_layer::{\n    AsLayerDesc as _, DeltaLayerWriter, ImageLayerWriter, IoConcurrency, Layer, ResidentLayer,\n    ValuesReconstructState,\n};\nuse crate::tenant::timeline::VersionedKeySpaceQuery;\nuse crate::virtual_file::{MaybeFatalIo, VirtualFile};\n\n#[derive(Debug, thiserror::Error)]\npub(crate) enum Error {\n    #[error(\"no ancestors\")]\n    NoAncestor,\n\n    #[error(\"too many ancestors\")]\n    TooManyAncestors,\n\n    #[error(\"ancestor is not empty\")]\n    AncestorNotEmpty,\n\n    #[error(\"shutting down, please retry later\")]\n    ShuttingDown,\n\n    #[error(\"archived: {}\", .0)]\n    Archived(TimelineId),\n\n    #[error(transparent)]\n    NotFound(crate::tenant::GetTimelineError),\n\n    #[error(\"failed to reparent all candidate timelines, please retry\")]\n    FailedToReparentAll,\n\n    #[error(\"ancestor is already being detached by: {}\", .0)]\n    OtherTimelineDetachOngoing(TimelineId),\n\n    #[error(\"preparing to timeline ancestor detach failed\")]\n    Prepare(#[source] anyhow::Error),\n\n    #[error(\"detaching and reparenting failed\")]\n    DetachReparent(#[source] anyhow::Error),\n\n    #[error(\"completing ancestor detach failed\")]\n    Complete(#[source] anyhow::Error),\n\n    #[error(\"failpoint: {}\", .0)]\n    Failpoint(&'static str),\n}\n\nimpl Error {\n    /// Try to catch cancellation from within the `anyhow::Error`, or wrap the anyhow as the given\n    /// variant or fancier `or_else`.\n    fn launder<F>(e: anyhow::Error, or_else: F) -> Error\n    where\n        F: Fn(anyhow::Error) -> Error,\n    {\n        use remote_storage::TimeoutOrCancel;\n\n        use crate::tenant::remote_timeline_client::WaitCompletionError;\n        use crate::tenant::upload_queue::NotInitialized;\n\n        if e.is::<NotInitialized>()\n            || TimeoutOrCancel::caused_by_cancel(&e)\n            || e.downcast_ref::<remote_storage::DownloadError>()\n                .is_some_and(|e| e.is_cancelled())\n            || e.is::<WaitCompletionError>()\n        {\n            Error::ShuttingDown\n        } else {\n            or_else(e)\n        }\n    }\n}\n\nimpl From<Error> for ApiError {\n    fn from(value: Error) -> Self {\n        match value {\n            Error::NoAncestor => ApiError::Conflict(value.to_string()),\n            Error::TooManyAncestors | Error::AncestorNotEmpty => {\n                ApiError::BadRequest(anyhow::anyhow!(\"{value}\"))\n            }\n            Error::ShuttingDown => ApiError::ShuttingDown,\n            Error::Archived(_) => ApiError::BadRequest(anyhow::anyhow!(\"{value}\")),\n            Error::OtherTimelineDetachOngoing(_) | Error::FailedToReparentAll => {\n                ApiError::ResourceUnavailable(value.to_string().into())\n            }\n            Error::NotFound(e) => ApiError::from(e),\n            // these variants should have no cancellation errors because of Error::launder\n            Error::Prepare(_)\n            | Error::DetachReparent(_)\n            | Error::Complete(_)\n            | Error::Failpoint(_) => ApiError::InternalServerError(value.into()),\n        }\n    }\n}\n\nimpl From<crate::tenant::upload_queue::NotInitialized> for Error {\n    fn from(_: crate::tenant::upload_queue::NotInitialized) -> Self {\n        // treat all as shutting down signals, even though that is not entirely correct\n        // (uninitialized state)\n        Error::ShuttingDown\n    }\n}\nimpl From<super::layer_manager::Shutdown> for Error {\n    fn from(_: super::layer_manager::Shutdown) -> Self {\n        Error::ShuttingDown\n    }\n}\n\npub(crate) enum Progress {\n    Prepared(Attempt, PreparedTimelineDetach),\n    Done(AncestorDetached),\n}\n\npub(crate) struct PreparedTimelineDetach {\n    layers: Vec<Layer>,\n}\n\n// TODO: this should be part of PageserverConf because we cannot easily modify cplane arguments.\n#[derive(Debug)]\npub(crate) struct Options {\n    pub(crate) rewrite_concurrency: std::num::NonZeroUsize,\n    pub(crate) copy_concurrency: std::num::NonZeroUsize,\n}\n\nimpl Default for Options {\n    fn default() -> Self {\n        Self {\n            rewrite_concurrency: std::num::NonZeroUsize::new(2).unwrap(),\n            copy_concurrency: std::num::NonZeroUsize::new(100).unwrap(),\n        }\n    }\n}\n\n/// Represents an across tenant reset exclusive single attempt to detach ancestor.\n#[derive(Debug)]\npub(crate) struct Attempt {\n    pub(crate) timeline_id: TimelineId,\n    pub(crate) ancestor_timeline_id: TimelineId,\n    pub(crate) ancestor_lsn: Lsn,\n    _guard: completion::Completion,\n    gate_entered: Option<utils::sync::gate::GateGuard>,\n}\n\nimpl Attempt {\n    pub(crate) fn before_reset_tenant(&mut self) {\n        let taken = self.gate_entered.take();\n        assert!(taken.is_some());\n    }\n\n    pub(crate) fn new_barrier(&self) -> completion::Barrier {\n        self._guard.barrier()\n    }\n}\n\npub(crate) async fn generate_tombstone_image_layer(\n    detached: &Arc<Timeline>,\n    ancestor: &Arc<Timeline>,\n    ancestor_lsn: Lsn,\n    historic_layers_to_copy: &Vec<Layer>,\n    ctx: &RequestContext,\n) -> Result<Option<ResidentLayer>, Error> {\n    tracing::info!(\n        \"removing non-inherited keys by writing an image layer with tombstones at the detach LSN\"\n    );\n    let io_concurrency = IoConcurrency::spawn_from_conf(\n        detached.conf.get_vectored_concurrent_io,\n        detached.gate.enter().map_err(|_| Error::ShuttingDown)?,\n    );\n    let mut reconstruct_state = ValuesReconstructState::new(io_concurrency);\n    // Directly use `get_vectored_impl` to skip the max_vectored_read_key limit check. Note that the keyspace should\n    // not contain too many keys, otherwise this takes a lot of memory. Currently we limit it to 10k keys in the compute.\n    let key_range = Key::sparse_non_inherited_keyspace();\n    // avoid generating a \"future layer\" which will then be removed\n    let image_lsn = ancestor_lsn;\n\n    {\n        for layer in historic_layers_to_copy {\n            let desc = layer.layer_desc();\n            if !desc.is_delta\n                && desc.lsn_range.start == image_lsn\n                && overlaps_with(&key_range, &desc.key_range)\n            {\n                tracing::info!(\n                    layer=%layer, \"will copy tombstone from ancestor instead of creating a new one\"\n                );\n\n                return Ok(None);\n            }\n        }\n\n        let layers = detached\n            .layers\n            .read(LayerManagerLockHolder::DetachAncestor)\n            .await;\n        for layer in layers.all_persistent_layers() {\n            if !layer.is_delta\n                && layer.lsn_range.start == image_lsn\n                && overlaps_with(&key_range, &layer.key_range)\n            {\n                tracing::warn!(\n                    layer=%layer, \"image layer at the detach LSN already exists, skipping removing aux files\"\n                );\n                return Ok(None);\n            }\n        }\n    }\n\n    let query = VersionedKeySpaceQuery::uniform(KeySpace::single(key_range.clone()), image_lsn);\n    let data = ancestor\n        .get_vectored_impl(query, &mut reconstruct_state, ctx)\n        .await\n        .context(\"failed to retrieve aux keys\")\n        .map_err(|e| Error::launder(e, Error::Prepare))?;\n    if !data.is_empty() {\n        // TODO: is it possible that we can have an image at `image_lsn`? Unlikely because image layers are only generated\n        // upon compaction but theoretically possible.\n        let mut image_layer_writer = ImageLayerWriter::new(\n            detached.conf,\n            detached.timeline_id,\n            detached.tenant_shard_id,\n            &key_range,\n            image_lsn,\n            &detached.gate,\n            detached.cancel.clone(),\n            ctx,\n        )\n        .await\n        .context(\"failed to create image layer writer\")\n        .map_err(Error::Prepare)?;\n        for key in data.keys() {\n            image_layer_writer\n                .put_image(*key, Bytes::new(), ctx)\n                .await\n                .context(\"failed to write key\")\n                .map_err(|e| Error::launder(e, Error::Prepare))?;\n        }\n        let (desc, path) = image_layer_writer\n            .finish(ctx)\n            .await\n            .context(\"failed to finish image layer writer for removing the metadata keys\")\n            .map_err(|e| Error::launder(e, Error::Prepare))?;\n        let generated = Layer::finish_creating(detached.conf, detached, desc, &path)\n            .map_err(|e| Error::launder(e, Error::Prepare))?;\n        detached\n            .remote_client\n            .upload_layer_file(&generated, &detached.cancel)\n            .await\n            .map_err(|e| Error::launder(e, Error::Prepare))?;\n        tracing::info!(layer=%generated, \"wrote image layer\");\n        Ok(Some(generated))\n    } else {\n        tracing::info!(\"no aux keys found in ancestor\");\n        Ok(None)\n    }\n}\n\n/// See [`Timeline::prepare_to_detach_from_ancestor`]\npub(super) async fn prepare(\n    detached: &Arc<Timeline>,\n    tenant: &TenantShard,\n    behavior: DetachBehavior,\n    options: Options,\n    ctx: &RequestContext,\n) -> Result<Progress, Error> {\n    use Error::*;\n\n    let Some((mut ancestor, mut ancestor_lsn)) = detached\n        .ancestor_timeline\n        .as_ref()\n        .map(|tl| (tl.clone(), detached.ancestor_lsn))\n    else {\n        let ancestor_id;\n        let ancestor_lsn;\n        let still_in_progress = {\n            let accessor = detached.remote_client.initialized_upload_queue()?;\n\n            // we are safe to inspect the latest uploaded, because we can only witness this after\n            // restart is complete and ancestor is no more.\n            let latest = accessor.latest_uploaded_index_part();\n            let Some((id, lsn)) = latest.lineage.detached_previous_ancestor() else {\n                return Err(NoAncestor);\n            };\n            ancestor_id = id;\n            ancestor_lsn = lsn;\n\n            latest\n                .gc_blocking\n                .as_ref()\n                .is_some_and(|b| b.blocked_by(DetachAncestor))\n        };\n\n        if still_in_progress {\n            // gc is still blocked, we can still reparent and complete.\n            // we are safe to reparent remaining, because they were locked in in the beginning.\n            let attempt =\n                continue_with_blocked_gc(detached, tenant, ancestor_id, ancestor_lsn).await?;\n\n            // because the ancestor of detached is already set to none, we have published all\n            // of the layers, so we are still \"prepared.\"\n            return Ok(Progress::Prepared(\n                attempt,\n                PreparedTimelineDetach { layers: Vec::new() },\n            ));\n        }\n\n        let reparented_timelines = reparented_direct_children(detached, tenant)?;\n        return Ok(Progress::Done(AncestorDetached {\n            reparented_timelines,\n        }));\n    };\n\n    if detached.is_archived() != Some(false) {\n        return Err(Archived(detached.timeline_id));\n    }\n\n    if !ancestor_lsn.is_valid() {\n        // rare case, probably wouldn't even load\n        tracing::error!(\"ancestor is set, but ancestor_lsn is invalid, this timeline needs fixing\");\n        return Err(NoAncestor);\n    }\n\n    check_no_archived_children_of_ancestor(tenant, detached, &ancestor, ancestor_lsn, behavior)?;\n\n    if let DetachBehavior::MultiLevelAndNoReparent = behavior {\n        // If the ancestor has an ancestor, we might be able to fast-path detach it if the current ancestor does not have any data written/used by the detaching timeline.\n        while let Some(ancestor_of_ancestor) = ancestor.ancestor_timeline.clone() {\n            if ancestor_lsn != ancestor.ancestor_lsn {\n                // non-technical requirement; we could flatten still if ancestor LSN does not match but that needs\n                // us to copy and cut more layers.\n                return Err(AncestorNotEmpty);\n            }\n            // Use the ancestor of the ancestor as the new ancestor (only when the ancestor LSNs are the same)\n            ancestor_lsn = ancestor.ancestor_lsn; // Get the LSN first before resetting the `ancestor` variable\n            ancestor = ancestor_of_ancestor;\n            // TODO: do we still need to check if we don't want to reparent?\n            check_no_archived_children_of_ancestor(\n                tenant,\n                detached,\n                &ancestor,\n                ancestor_lsn,\n                behavior,\n            )?;\n        }\n    } else if ancestor.ancestor_timeline.is_some() {\n        // non-technical requirement; we could flatten N ancestors just as easily but we chose\n        // not to, at least initially\n        return Err(TooManyAncestors);\n    }\n\n    tracing::info!(\n        \"attempt to detach the timeline from the ancestor: {}@{}, behavior={:?}\",\n        ancestor.timeline_id,\n        ancestor_lsn,\n        behavior\n    );\n\n    let attempt = start_new_attempt(detached, tenant, ancestor.timeline_id, ancestor_lsn).await?;\n\n    utils::pausable_failpoint!(\"timeline-detach-ancestor::before_starting_after_locking-pausable\");\n\n    fail::fail_point!(\n        \"timeline-detach-ancestor::before_starting_after_locking\",\n        |_| Err(Error::Failpoint(\n            \"timeline-detach-ancestor::before_starting_after_locking\"\n        ))\n    );\n\n    if ancestor_lsn >= ancestor.get_disk_consistent_lsn() {\n        let span =\n            tracing::info_span!(\"freeze_and_flush\", ancestor_timeline_id=%ancestor.timeline_id);\n        async {\n            let started_at = std::time::Instant::now();\n            let freeze_and_flush = ancestor.freeze_and_flush0();\n            let mut freeze_and_flush = std::pin::pin!(freeze_and_flush);\n\n            let res =\n                tokio::time::timeout(std::time::Duration::from_secs(1), &mut freeze_and_flush)\n                    .await;\n\n            let res = match res {\n                Ok(res) => res,\n                Err(_elapsed) => {\n                    tracing::info!(\"freezing and flushing ancestor is still ongoing\");\n                    freeze_and_flush.await\n                }\n            };\n\n            res.map_err(|e| {\n                use FlushLayerError::*;\n                match e {\n                    Cancelled | NotRunning(_) => {\n                        // FIXME(#6424): technically statically unreachable right now, given how we never\n                        // drop the sender\n                        Error::ShuttingDown\n                    }\n                    CreateImageLayersError(_) | Other(_) => Error::Prepare(e.into()),\n                }\n            })?;\n\n            // we do not need to wait for uploads to complete but we do need `struct Layer`,\n            // copying delta prefix is unsupported currently for `InMemoryLayer`.\n            tracing::info!(\n                elapsed_ms = started_at.elapsed().as_millis(),\n                \"froze and flushed the ancestor\"\n            );\n            Ok::<_, Error>(())\n        }\n        .instrument(span)\n        .await?;\n    }\n\n    let end_lsn = ancestor_lsn + 1;\n\n    let (filtered_layers, straddling_branchpoint, rest_of_historic) = {\n        // we do not need to start from our layers, because they can only be layers that come\n        // *after* ancestor_lsn\n        let layers = tokio::select! {\n            guard = ancestor.layers.read(LayerManagerLockHolder::DetachAncestor) => guard,\n            _ = detached.cancel.cancelled() => {\n                return Err(ShuttingDown);\n            }\n            _ = ancestor.cancel.cancelled() => {\n                return Err(ShuttingDown);\n            }\n        };\n\n        // between retries, these can change if compaction or gc ran in between. this will mean\n        // we have to redo work.\n        partition_work(ancestor_lsn, &layers)?\n    };\n\n    // TODO: layers are already sorted by something: use that to determine how much of remote\n    // copies are already done -- gc is blocked, but a compaction could had happened on ancestor,\n    // which is something to keep in mind if copy skipping is implemented.\n    tracing::info!(filtered=%filtered_layers, to_rewrite = straddling_branchpoint.len(), historic=%rest_of_historic.len(), \"collected layers\");\n\n    // TODO: copying and lsn prefix copying could be done at the same time with a single fsync after\n    let mut new_layers: Vec<Layer> =\n        Vec::with_capacity(straddling_branchpoint.len() + rest_of_historic.len() + 1);\n\n    if let Some(tombstone_layer) =\n        generate_tombstone_image_layer(detached, &ancestor, ancestor_lsn, &rest_of_historic, ctx)\n            .await?\n    {\n        new_layers.push(tombstone_layer.into());\n    }\n\n    {\n        tracing::info!(to_rewrite = %straddling_branchpoint.len(), \"copying prefix of delta layers\");\n\n        let mut tasks = tokio::task::JoinSet::new();\n\n        let mut wrote_any = false;\n\n        let limiter = Arc::new(Semaphore::new(options.rewrite_concurrency.get()));\n\n        for layer in straddling_branchpoint {\n            let limiter = limiter.clone();\n            let timeline = detached.clone();\n            let ctx = ctx.detached_child(TaskKind::DetachAncestor, DownloadBehavior::Download);\n\n            let span = tracing::info_span!(\"upload_rewritten_layer\", %layer);\n            tasks.spawn(\n                async move {\n                    let _permit = limiter.acquire().await;\n                    let copied =\n                        upload_rewritten_layer(end_lsn, &layer, &timeline, &timeline.cancel, &ctx)\n                            .await?;\n                    if let Some(copied) = copied.as_ref() {\n                        tracing::info!(%copied, \"rewrote and uploaded\");\n                    }\n                    Ok(copied)\n                }\n                .instrument(span),\n            );\n        }\n\n        while let Some(res) = tasks.join_next().await {\n            match res {\n                Ok(Ok(Some(copied))) => {\n                    wrote_any = true;\n                    new_layers.push(copied);\n                }\n                Ok(Ok(None)) => {}\n                Ok(Err(e)) => return Err(e),\n                Err(je) => return Err(Error::Prepare(je.into())),\n            }\n        }\n\n        // FIXME: the fsync should be mandatory, after both rewrites and copies\n        if wrote_any {\n            fsync_timeline_dir(detached, ctx).await;\n        }\n    }\n\n    let mut tasks = tokio::task::JoinSet::new();\n    let limiter = Arc::new(Semaphore::new(options.copy_concurrency.get()));\n    let cancel_eval = CancellationToken::new();\n\n    for adopted in rest_of_historic {\n        let limiter = limiter.clone();\n        let timeline = detached.clone();\n        let cancel_eval = cancel_eval.clone();\n\n        tasks.spawn(\n            async move {\n                let _permit = tokio::select! {\n                    permit = limiter.acquire() => {\n                        permit\n                    }\n                    // Wait for the cancellation here instead of letting the entire task be cancelled.\n                    // Cancellations are racy in that they might leave layers on disk.\n                    _ = cancel_eval.cancelled() => {\n                        Err(Error::ShuttingDown)?\n                    }\n                };\n                let (owned, did_hardlink) = remote_copy(\n                    &adopted,\n                    &timeline,\n                    timeline.generation,\n                    timeline.shard_identity,\n                    &timeline.cancel,\n                )\n                .await?;\n                tracing::info!(layer=%owned, did_hard_link=%did_hardlink, \"remote copied\");\n                Ok((owned, did_hardlink))\n            }\n            .in_current_span(),\n        );\n    }\n\n    fn delete_layers(timeline: &Timeline, layers: Vec<Layer>) -> Result<(), Error> {\n        // We are deleting layers, so we must hold the gate\n        let _gate = timeline.gate.enter().map_err(|e| match e {\n            GateError::GateClosed => Error::ShuttingDown,\n        })?;\n        {\n            layers.into_iter().for_each(|l: Layer| {\n                l.delete_on_drop();\n                std::mem::drop(l);\n            });\n        }\n        Ok(())\n    }\n\n    let mut should_fsync = false;\n    let mut first_err = None;\n    while let Some(res) = tasks.join_next().await {\n        match res {\n            Ok(Ok((owned, did_hardlink))) => {\n                if did_hardlink {\n                    should_fsync = true;\n                }\n                new_layers.push(owned);\n            }\n\n            // Don't stop the evaluation on errors, so that we get the full set of hardlinked layers to delete.\n            Ok(Err(failed)) => {\n                cancel_eval.cancel();\n                first_err.get_or_insert(failed);\n            }\n            Err(je) => {\n                cancel_eval.cancel();\n                first_err.get_or_insert(Error::Prepare(je.into()));\n            }\n        }\n    }\n\n    if let Some(failed) = first_err {\n        delete_layers(detached, new_layers)?;\n        return Err(failed);\n    }\n\n    // fsync directory again if we hardlinked something\n    if should_fsync {\n        fsync_timeline_dir(detached, ctx).await;\n    }\n\n    let prepared = PreparedTimelineDetach { layers: new_layers };\n\n    Ok(Progress::Prepared(attempt, prepared))\n}\n\nasync fn start_new_attempt(\n    detached: &Timeline,\n    tenant: &TenantShard,\n    ancestor_timeline_id: TimelineId,\n    ancestor_lsn: Lsn,\n) -> Result<Attempt, Error> {\n    let attempt = obtain_exclusive_attempt(detached, tenant, ancestor_timeline_id, ancestor_lsn)?;\n\n    // insert the block in the index_part.json, if not already there.\n    let _dont_care = tenant\n        .gc_block\n        .insert(\n            detached,\n            crate::tenant::remote_timeline_client::index::GcBlockingReason::DetachAncestor,\n        )\n        .await\n        .map_err(|e| Error::launder(e, Error::Prepare))?;\n\n    Ok(attempt)\n}\n\nasync fn continue_with_blocked_gc(\n    detached: &Timeline,\n    tenant: &TenantShard,\n    ancestor_timeline_id: TimelineId,\n    ancestor_lsn: Lsn,\n) -> Result<Attempt, Error> {\n    // FIXME: it would be nice to confirm that there is an in-memory version, since we've just\n    // verified there is a persistent one?\n    obtain_exclusive_attempt(detached, tenant, ancestor_timeline_id, ancestor_lsn)\n}\n\nfn obtain_exclusive_attempt(\n    detached: &Timeline,\n    tenant: &TenantShard,\n    ancestor_timeline_id: TimelineId,\n    ancestor_lsn: Lsn,\n) -> Result<Attempt, Error> {\n    use Error::{OtherTimelineDetachOngoing, ShuttingDown};\n\n    // ensure we are the only active attempt for this tenant\n    let (guard, barrier) = completion::channel();\n    {\n        let mut guard = tenant.ongoing_timeline_detach.lock().unwrap();\n        if let Some((tl, other)) = guard.as_ref() {\n            if !other.is_ready() {\n                return Err(OtherTimelineDetachOngoing(*tl));\n            }\n            // FIXME: no test enters here\n        }\n        *guard = Some((detached.timeline_id, barrier));\n    }\n\n    // ensure the gate is still open\n    let _gate_entered = detached.gate.enter().map_err(|_| ShuttingDown)?;\n\n    Ok(Attempt {\n        timeline_id: detached.timeline_id,\n        ancestor_timeline_id,\n        ancestor_lsn,\n        _guard: guard,\n        gate_entered: Some(_gate_entered),\n    })\n}\n\nfn reparented_direct_children(\n    detached: &Arc<Timeline>,\n    tenant: &TenantShard,\n) -> Result<HashSet<TimelineId>, Error> {\n    let mut all_direct_children = tenant\n        .timelines\n        .lock()\n        .unwrap()\n        .values()\n        .filter_map(|tl| {\n            let is_direct_child = matches!(tl.ancestor_timeline.as_ref(), Some(ancestor) if Arc::ptr_eq(ancestor, detached));\n\n            if is_direct_child {\n                Some(tl.clone())\n            } else {\n                if let Some(timeline) = tl.ancestor_timeline.as_ref() {\n                    assert_ne!(timeline.timeline_id, detached.timeline_id, \"we cannot have two timelines with the same timeline_id live\");\n                }\n                None\n            }\n        })\n        // Collect to avoid lock taking order problem with Tenant::timelines and\n        // Timeline::remote_client\n        .collect::<Vec<_>>();\n\n    let mut any_shutdown = false;\n\n    all_direct_children.retain(|tl| match tl.remote_client.initialized_upload_queue() {\n        Ok(accessor) => accessor\n            .latest_uploaded_index_part()\n            .lineage\n            .is_reparented(),\n        Err(_shutdownalike) => {\n            // not 100% a shutdown, but let's bail early not to give inconsistent results in\n            // sharded enviroment.\n            any_shutdown = true;\n            true\n        }\n    });\n\n    if any_shutdown {\n        // it could be one or many being deleted; have client retry\n        return Err(Error::ShuttingDown);\n    }\n\n    Ok(all_direct_children\n        .into_iter()\n        .map(|tl| tl.timeline_id)\n        .collect())\n}\n\nfn partition_work(\n    ancestor_lsn: Lsn,\n    source: &LayerManager,\n) -> Result<(usize, Vec<Layer>, Vec<Layer>), Error> {\n    let mut straddling_branchpoint = vec![];\n    let mut rest_of_historic = vec![];\n\n    let mut later_by_lsn = 0;\n\n    for desc in source.layer_map()?.iter_historic_layers() {\n        // off by one chances here:\n        // - start is inclusive\n        // - end is exclusive\n        if desc.lsn_range.start > ancestor_lsn {\n            later_by_lsn += 1;\n            continue;\n        }\n\n        let target = if desc.lsn_range.start <= ancestor_lsn\n            && desc.lsn_range.end > ancestor_lsn\n            && desc.is_delta\n        {\n            // TODO: image layer at Lsn optimization\n            &mut straddling_branchpoint\n        } else {\n            &mut rest_of_historic\n        };\n\n        target.push(source.get_from_desc(&desc));\n    }\n\n    Ok((later_by_lsn, straddling_branchpoint, rest_of_historic))\n}\n\nasync fn upload_rewritten_layer(\n    end_lsn: Lsn,\n    layer: &Layer,\n    target: &Arc<Timeline>,\n    cancel: &CancellationToken,\n    ctx: &RequestContext,\n) -> Result<Option<Layer>, Error> {\n    let copied = copy_lsn_prefix(end_lsn, layer, target, ctx).await?;\n\n    let Some(copied) = copied else {\n        return Ok(None);\n    };\n\n    target\n        .remote_client\n        .upload_layer_file(&copied, cancel)\n        .await\n        .map_err(|e| Error::launder(e, Error::Prepare))?;\n\n    Ok(Some(copied.into()))\n}\n\nasync fn copy_lsn_prefix(\n    end_lsn: Lsn,\n    layer: &Layer,\n    target_timeline: &Arc<Timeline>,\n    ctx: &RequestContext,\n) -> Result<Option<ResidentLayer>, Error> {\n    if target_timeline.cancel.is_cancelled() {\n        return Err(Error::ShuttingDown);\n    }\n\n    tracing::debug!(%layer, %end_lsn, \"copying lsn prefix\");\n\n    let mut writer = DeltaLayerWriter::new(\n        target_timeline.conf,\n        target_timeline.timeline_id,\n        target_timeline.tenant_shard_id,\n        layer.layer_desc().key_range.start,\n        layer.layer_desc().lsn_range.start..end_lsn,\n        &target_timeline.gate,\n        target_timeline.cancel.clone(),\n        ctx,\n    )\n    .await\n    .with_context(|| format!(\"prepare to copy lsn prefix of ancestors {layer}\"))\n    .map_err(Error::Prepare)?;\n\n    let resident = layer.download_and_keep_resident(ctx).await.map_err(|e| {\n        if e.is_cancelled() {\n            Error::ShuttingDown\n        } else {\n            Error::Prepare(e.into())\n        }\n    })?;\n\n    let records = resident\n        .copy_delta_prefix(&mut writer, end_lsn, ctx)\n        .await\n        .with_context(|| format!(\"copy lsn prefix of ancestors {layer}\"))\n        .map_err(Error::Prepare)?;\n\n    drop(resident);\n\n    tracing::debug!(%layer, records, \"copied records\");\n\n    if records == 0 {\n        drop(writer);\n        // TODO: we might want to store an empty marker in remote storage for this\n        // layer so that we will not needlessly walk `layer` on repeated attempts.\n        Ok(None)\n    } else {\n        // reuse the key instead of adding more holes between layers by using the real\n        // highest key in the layer.\n        let reused_highest_key = layer.layer_desc().key_range.end;\n        let (desc, path) = writer\n            .finish(reused_highest_key, ctx)\n            .await\n            .map_err(Error::Prepare)?;\n        let copied = Layer::finish_creating(target_timeline.conf, target_timeline, desc, &path)\n            .map_err(Error::Prepare)?;\n\n        tracing::debug!(%layer, %copied, \"new layer produced\");\n\n        Ok(Some(copied))\n    }\n}\n\n/// Creates a new Layer instance for the adopted layer, and ensures it is found in the remote\n/// storage on successful return. without the adopted layer being added to `index_part.json`.\n/// Returns (Layer, did hardlink)\nasync fn remote_copy(\n    adopted: &Layer,\n    adoptee: &Arc<Timeline>,\n    generation: Generation,\n    shard_identity: ShardIdentity,\n    cancel: &CancellationToken,\n) -> Result<(Layer, bool), Error> {\n    let mut metadata = adopted.metadata();\n    debug_assert!(metadata.generation <= generation);\n    metadata.generation = generation;\n    metadata.shard = shard_identity.shard_index();\n\n    let conf = adoptee.conf;\n    let file_name = adopted.layer_desc().layer_name();\n\n    // We don't want to shut the timeline down during this operation because we do `delete_on_drop` below\n    let _gate = adoptee.gate.enter().map_err(|e| match e {\n        GateError::GateClosed => Error::ShuttingDown,\n    })?;\n\n    // depending if Layer::keep_resident, do a hardlink\n    let did_hardlink;\n    let owned = if let Some(adopted_resident) = adopted.keep_resident().await {\n        let adopted_path = adopted_resident.local_path();\n        let adoptee_path = local_layer_path(\n            conf,\n            &adoptee.tenant_shard_id,\n            &adoptee.timeline_id,\n            &file_name,\n            &metadata.generation,\n        );\n\n        match std::fs::hard_link(adopted_path, &adoptee_path) {\n            Ok(()) => {}\n            Err(e) if e.kind() == std::io::ErrorKind::AlreadyExists => {\n                // In theory we should not get into this situation as we are doing cleanups of the layer file after errors.\n                // However, we don't do cleanups for errors past `prepare`, so there is the slight chance to get to this branch.\n\n                // Double check that the file is orphan (probably from an earlier attempt), then delete it\n                let key = file_name.clone().into();\n                if adoptee\n                    .layers\n                    .read(LayerManagerLockHolder::DetachAncestor)\n                    .await\n                    .contains_key(&key)\n                {\n                    // We are supposed to filter out such cases before coming to this function\n                    return Err(Error::Prepare(anyhow::anyhow!(\n                        \"layer file {file_name} already present and inside layer map\"\n                    )));\n                }\n                tracing::info!(\"Deleting orphan layer file to make way for hard linking\");\n                // Delete orphan layer file and try again, to ensure this layer has a well understood source\n                std::fs::remove_file(&adoptee_path)\n                    .map_err(|e| Error::launder(e.into(), Error::Prepare))?;\n                std::fs::hard_link(adopted_path, &adoptee_path)\n                    .map_err(|e| Error::launder(e.into(), Error::Prepare))?;\n            }\n            Err(e) => {\n                return Err(Error::launder(e.into(), Error::Prepare));\n            }\n        };\n        did_hardlink = true;\n        Layer::for_resident(conf, adoptee, adoptee_path, file_name, metadata).drop_eviction_guard()\n    } else {\n        did_hardlink = false;\n        Layer::for_evicted(conf, adoptee, file_name, metadata)\n    };\n\n    let layer = match adoptee\n        .remote_client\n        .copy_timeline_layer(adopted, &owned, cancel)\n        .await\n    {\n        Ok(()) => owned,\n        Err(e) => {\n            {\n                // Clean up the layer so that on a retry we don't get errors that the file already exists\n                owned.delete_on_drop();\n                std::mem::drop(owned);\n            }\n            return Err(Error::launder(e, Error::Prepare));\n        }\n    };\n\n    Ok((layer, did_hardlink))\n}\n\npub(crate) enum DetachingAndReparenting {\n    /// All of the following timeline ids were reparented and the timeline ancestor detach must be\n    /// marked as completed.\n    Reparented(HashSet<TimelineId>),\n\n    /// Some of the reparentings failed. The timeline ancestor detach must **not** be marked as\n    /// completed.\n    ///\n    /// Nested `must_reset_tenant` is set to true when any restart requiring changes were made.\n    SomeReparentingFailed { must_reset_tenant: bool },\n\n    /// Detaching and reparentings were completed in a previous attempt. Timeline ancestor detach\n    /// must be marked as completed.\n    AlreadyDone(HashSet<TimelineId>),\n}\n\nimpl DetachingAndReparenting {\n    pub(crate) fn reset_tenant_required(&self) -> bool {\n        use DetachingAndReparenting::*;\n        match self {\n            Reparented(_) => true,\n            SomeReparentingFailed { must_reset_tenant } => *must_reset_tenant,\n            AlreadyDone(_) => false,\n        }\n    }\n\n    pub(crate) fn completed(self) -> Option<HashSet<TimelineId>> {\n        use DetachingAndReparenting::*;\n        match self {\n            Reparented(x) | AlreadyDone(x) => Some(x),\n            SomeReparentingFailed { .. } => None,\n        }\n    }\n}\n\n/// See [`Timeline::detach_from_ancestor_and_reparent`].\npub(super) async fn detach_and_reparent(\n    detached: &Arc<Timeline>,\n    tenant: &TenantShard,\n    prepared: PreparedTimelineDetach,\n    ancestor_timeline_id: TimelineId,\n    ancestor_lsn: Lsn,\n    behavior: DetachBehavior,\n    _ctx: &RequestContext,\n) -> Result<DetachingAndReparenting, Error> {\n    let PreparedTimelineDetach { layers } = prepared;\n\n    #[derive(Debug)]\n    enum Ancestor {\n        NotDetached(Arc<Timeline>, Lsn),\n        Detached(Arc<Timeline>, Lsn),\n    }\n\n    let (recorded_branchpoint, still_ongoing) = {\n        let access = detached.remote_client.initialized_upload_queue()?;\n        let latest = access.latest_uploaded_index_part();\n\n        (\n            latest.lineage.detached_previous_ancestor(),\n            latest\n                .gc_blocking\n                .as_ref()\n                .is_some_and(|b| b.blocked_by(DetachAncestor)),\n        )\n    };\n    assert!(\n        still_ongoing,\n        \"cannot (detach? reparent)? complete if the operation is not still ongoing\"\n    );\n\n    let ancestor_to_detach = match detached.ancestor_timeline.as_ref() {\n        Some(mut ancestor) => {\n            while ancestor.timeline_id != ancestor_timeline_id {\n                match ancestor.ancestor_timeline.as_ref() {\n                    Some(found) => {\n                        if ancestor_lsn != ancestor.ancestor_lsn {\n                            return Err(Error::DetachReparent(anyhow::anyhow!(\n                                \"cannot find the ancestor timeline to detach from: wrong ancestor lsn\"\n                            )));\n                        }\n                        ancestor = found;\n                    }\n                    None => {\n                        return Err(Error::DetachReparent(anyhow::anyhow!(\n                            \"cannot find the ancestor timeline to detach from\"\n                        )));\n                    }\n                }\n            }\n            Some(ancestor)\n        }\n        None => None,\n    };\n    let ancestor = match (ancestor_to_detach, recorded_branchpoint) {\n        (Some(ancestor), None) => {\n            assert!(\n                !layers.is_empty(),\n                \"there should always be at least one layer to inherit\"\n            );\n            Ancestor::NotDetached(ancestor.clone(), detached.ancestor_lsn)\n        }\n        (Some(_), Some(_)) => {\n            panic!(\n                \"it should be impossible to get to here without having gone through the tenant reset; if the tenant was reset, then the ancestor_timeline would be None\"\n            );\n        }\n        (None, Some((ancestor_id, ancestor_lsn))) => {\n            // it has been either:\n            // - detached but still exists => we can try reparenting\n            // - detached and deleted\n            //\n            // either way, we must complete\n            assert!(\n                layers.is_empty(),\n                \"no layers should had been copied as detach is done\"\n            );\n\n            let existing = tenant.timelines.lock().unwrap().get(&ancestor_id).cloned();\n\n            if let Some(ancestor) = existing {\n                Ancestor::Detached(ancestor, ancestor_lsn)\n            } else {\n                let direct_children = reparented_direct_children(detached, tenant)?;\n                return Ok(DetachingAndReparenting::AlreadyDone(direct_children));\n            }\n        }\n        (None, None) => {\n            // TODO: make sure there are no `?` before tenant_reset from after a questionmark from\n            // here.\n            panic!(\n                \"bug: detach_and_reparent called on a timeline which has not been detached or which has no live ancestor\"\n            );\n        }\n    };\n\n    // publish the prepared layers before we reparent any of the timelines, so that on restart\n    // reparented timelines find layers. also do the actual detaching.\n    //\n    // if we crash after this operation, a retry will allow reparenting the remaining timelines as\n    // gc is blocked.\n\n    let (ancestor, ancestor_lsn, was_detached) = match ancestor {\n        Ancestor::NotDetached(ancestor, ancestor_lsn) => {\n            // this has to complete before any reparentings because otherwise they would not have\n            // layers on the new parent.\n            detached\n                .remote_client\n                .schedule_adding_existing_layers_to_index_detach_and_wait(\n                    &layers,\n                    (ancestor.timeline_id, ancestor_lsn),\n                )\n                .await\n                .context(\"publish layers and detach ancestor\")\n                .map_err(|e| Error::launder(e, Error::DetachReparent))?;\n\n            tracing::info!(\n                ancestor=%ancestor.timeline_id,\n                %ancestor_lsn,\n                inherited_layers=%layers.len(),\n                \"detached from ancestor\"\n            );\n            (ancestor, ancestor_lsn, true)\n        }\n        Ancestor::Detached(ancestor, ancestor_lsn) => (ancestor, ancestor_lsn, false),\n    };\n\n    if let DetachBehavior::MultiLevelAndNoReparent = behavior {\n        // Do not reparent if the user requests to behave so.\n        return Ok(DetachingAndReparenting::Reparented(HashSet::new()));\n    }\n\n    let mut tasks = tokio::task::JoinSet::new();\n\n    // Returns a single permit semaphore which will be used to make one reparenting succeed,\n    // others will fail as if those timelines had been stopped for whatever reason.\n    #[cfg(feature = \"testing\")]\n    let failpoint_sem = || -> Option<Arc<Semaphore>> {\n        fail::fail_point!(\"timeline-detach-ancestor::allow_one_reparented\", |_| Some(\n            Arc::new(Semaphore::new(1))\n        ));\n        None\n    }();\n\n    // because we are now keeping the slot in progress, it is unlikely that there will be any\n    // timeline deletions during this time. if we raced one, then we'll just ignore it.\n    {\n        let g = tenant.timelines.lock().unwrap();\n        reparentable_timelines(g.values(), detached, &ancestor, ancestor_lsn)\n            .cloned()\n            .for_each(|timeline| {\n                // important in this scope: we are holding the Tenant::timelines lock\n                let span = tracing::info_span!(\"reparent\", reparented=%timeline.timeline_id);\n                let new_parent = detached.timeline_id;\n                #[cfg(feature = \"testing\")]\n                let failpoint_sem = failpoint_sem.clone();\n\n                tasks.spawn(\n                    async move {\n                        let res = async {\n                            #[cfg(feature = \"testing\")]\n                            if let Some(failpoint_sem) = failpoint_sem {\n                                let _permit = failpoint_sem.acquire().await.map_err(|_| {\n                                    anyhow::anyhow!(\n                                        \"failpoint: timeline-detach-ancestor::allow_one_reparented\",\n                                    )\n                                })?;\n                                failpoint_sem.close();\n                            }\n\n                            timeline\n                                .remote_client\n                                .schedule_reparenting_and_wait(&new_parent)\n                                .await\n                        }\n                        .await;\n\n                        match res {\n                            Ok(()) => {\n                                tracing::info!(\"reparented\");\n                                Some(timeline)\n                            }\n                            Err(e) => {\n                                // with the use of tenant slot, raced timeline deletion is the most\n                                // likely reason.\n                                tracing::warn!(\"reparenting failed: {e:#}\");\n                                None\n                            }\n                        }\n                    }\n                    .instrument(span),\n                );\n            });\n    }\n\n    let reparenting_candidates = tasks.len();\n    let mut reparented = HashSet::with_capacity(tasks.len());\n\n    while let Some(res) = tasks.join_next().await {\n        match res {\n            Ok(Some(timeline)) => {\n                assert!(\n                    reparented.insert(timeline.timeline_id),\n                    \"duplicate reparenting? timeline_id={}\",\n                    timeline.timeline_id\n                );\n            }\n            Err(je) if je.is_cancelled() => unreachable!(\"not used\"),\n            // just ignore failures now, we can retry\n            Ok(None) => {}\n            Err(je) if je.is_panic() => {}\n            Err(je) => tracing::error!(\"unexpected join error: {je:?}\"),\n        }\n    }\n\n    let reparented_all = reparenting_candidates == reparented.len();\n\n    if reparented_all {\n        Ok(DetachingAndReparenting::Reparented(reparented))\n    } else {\n        tracing::info!(\n            reparented = reparented.len(),\n            candidates = reparenting_candidates,\n            \"failed to reparent all candidates; they can be retried after the tenant_reset\",\n        );\n\n        let must_reset_tenant = !reparented.is_empty() || was_detached;\n        Ok(DetachingAndReparenting::SomeReparentingFailed { must_reset_tenant })\n    }\n}\n\npub(super) async fn complete(\n    detached: &Arc<Timeline>,\n    tenant: &TenantShard,\n    mut attempt: Attempt,\n    _ctx: &RequestContext,\n) -> Result<(), Error> {\n    assert_eq!(detached.timeline_id, attempt.timeline_id);\n\n    if attempt.gate_entered.is_none() {\n        let entered = detached.gate.enter().map_err(|_| Error::ShuttingDown)?;\n        attempt.gate_entered = Some(entered);\n    } else {\n        // Some(gate_entered) means the tenant was not restarted, as is not required\n    }\n\n    assert!(detached.ancestor_timeline.is_none());\n\n    // this should be an 503 at least...?\n    fail::fail_point!(\n        \"timeline-detach-ancestor::complete_before_uploading\",\n        |_| Err(Error::Failpoint(\n            \"timeline-detach-ancestor::complete_before_uploading\"\n        ))\n    );\n\n    tenant\n        .gc_block\n        .remove(\n            detached,\n            crate::tenant::remote_timeline_client::index::GcBlockingReason::DetachAncestor,\n        )\n        .await\n        .map_err(|e| Error::launder(e, Error::Complete))?;\n\n    Ok(())\n}\n\n/// Query against a locked `Tenant::timelines`.\n///\n/// A timeline is reparentable if:\n///\n/// - It is not the timeline being detached.\n/// - It has the same ancestor as the timeline being detached. Note that the ancestor might not be the direct ancestor.\nfn reparentable_timelines<'a, I>(\n    timelines: I,\n    detached: &'a Arc<Timeline>,\n    ancestor: &'a Arc<Timeline>,\n    ancestor_lsn: Lsn,\n) -> impl Iterator<Item = &'a Arc<Timeline>> + 'a\nwhere\n    I: Iterator<Item = &'a Arc<Timeline>> + 'a,\n{\n    timelines.filter_map(move |tl| {\n        if Arc::ptr_eq(tl, detached) {\n            return None;\n        }\n\n        let tl_ancestor = tl.ancestor_timeline.as_ref()?;\n        let is_same = Arc::ptr_eq(ancestor, tl_ancestor);\n        let is_earlier = tl.get_ancestor_lsn() <= ancestor_lsn;\n\n        let is_deleting = tl\n            .delete_progress\n            .try_lock()\n            .map(|flow| !flow.is_not_started())\n            .unwrap_or(true);\n\n        if is_same && is_earlier && !is_deleting {\n            Some(tl)\n        } else {\n            None\n        }\n    })\n}\n\nfn check_no_archived_children_of_ancestor(\n    tenant: &TenantShard,\n    detached: &Arc<Timeline>,\n    ancestor: &Arc<Timeline>,\n    ancestor_lsn: Lsn,\n    detach_behavior: DetachBehavior,\n) -> Result<(), Error> {\n    match detach_behavior {\n        DetachBehavior::NoAncestorAndReparent => {\n            let timelines = tenant.timelines.lock().unwrap();\n            let timelines_offloaded = tenant.timelines_offloaded.lock().unwrap();\n\n            for timeline in\n                reparentable_timelines(timelines.values(), detached, ancestor, ancestor_lsn)\n            {\n                if timeline.is_archived() == Some(true) {\n                    return Err(Error::Archived(timeline.timeline_id));\n                }\n            }\n\n            for timeline_offloaded in timelines_offloaded.values() {\n                if timeline_offloaded.ancestor_timeline_id != Some(ancestor.timeline_id) {\n                    continue;\n                }\n                // This forbids the detach ancestor feature if flattened timelines are present,\n                // even if the ancestor_lsn is from after the branchpoint of the detached timeline.\n                // But as per current design, we don't record the ancestor_lsn of flattened timelines.\n                // This is a bit unfortunate, but as of writing this we don't support flattening\n                // anyway. Maybe we can evolve the data model in the future.\n                if let Some(retain_lsn) = timeline_offloaded.ancestor_retain_lsn {\n                    let is_earlier = retain_lsn <= ancestor_lsn;\n                    if !is_earlier {\n                        continue;\n                    }\n                }\n                return Err(Error::Archived(timeline_offloaded.timeline_id));\n            }\n        }\n        DetachBehavior::MultiLevelAndNoReparent => {\n            // We don't need to check anything if the user requested to not reparent.\n        }\n    }\n\n    Ok(())\n}\n\nasync fn fsync_timeline_dir(timeline: &Timeline, ctx: &RequestContext) {\n    let path = &timeline\n        .conf\n        .timeline_path(&timeline.tenant_shard_id, &timeline.timeline_id);\n    let timeline_dir = VirtualFile::open(&path, ctx)\n        .await\n        .fatal_err(\"VirtualFile::open for timeline dir fsync\");\n    timeline_dir\n        .sync_all()\n        .await\n        .fatal_err(\"VirtualFile::sync_all timeline dir\");\n}\n"
  },
  {
    "path": "pageserver/src/tenant/timeline/eviction_task.rs",
    "content": "//! The per-timeline layer eviction task, which evicts data which has not been accessed for more\n//! than a given threshold.\n//!\n//! Data includes all kinds of caches, namely:\n//! - (in-memory layers)\n//! - on-demand downloaded layer files on disk\n//! - (cached layer file pages)\n//! - derived data from layer file contents, namely:\n//!     - initial logical size\n//!     - partitioning\n//!     - (other currently missing unknowns)\n//!\n//! Items with parentheses are not (yet) touched by this task.\n//!\n//! See write-up on restart on-demand download spike: <https://gist.github.com/problame/2265bf7b8dc398be834abfead36c76b5>\nuse std::collections::HashMap;\nuse std::ops::ControlFlow;\nuse std::sync::Arc;\nuse std::time::{Duration, SystemTime};\n\nuse pageserver_api::models::{EvictionPolicy, EvictionPolicyLayerAccessThreshold};\nuse tokio::time::Instant;\nuse tokio_util::sync::CancellationToken;\nuse tracing::{Instrument, debug, info, info_span, instrument, warn};\nuse utils::completion;\nuse utils::sync::gate::GateGuard;\n\nuse super::Timeline;\nuse crate::context::{DownloadBehavior, RequestContext};\nuse crate::pgdatadir_mapping::CollectKeySpaceError;\nuse crate::task_mgr::{self, BACKGROUND_RUNTIME, TaskKind};\nuse crate::tenant::size::CalculateSyntheticSizeError;\nuse crate::tenant::storage_layer::LayerVisibilityHint;\nuse crate::tenant::tasks::{BackgroundLoopKind, BackgroundLoopSemaphorePermit, sleep_random};\nuse crate::tenant::timeline::EvictionError;\nuse crate::tenant::timeline::layer_manager::LayerManagerLockHolder;\nuse crate::tenant::{LogicalSizeCalculationCause, TenantShard};\n\n#[derive(Default)]\npub struct EvictionTaskTimelineState {\n    last_layer_access_imitation: Option<tokio::time::Instant>,\n}\n\n#[derive(Default)]\npub struct EvictionTaskTenantState {\n    last_layer_access_imitation: Option<Instant>,\n}\n\nimpl Timeline {\n    pub(super) fn launch_eviction_task(\n        self: &Arc<Self>,\n        parent: Arc<TenantShard>,\n        background_tasks_can_start: Option<&completion::Barrier>,\n    ) {\n        let self_clone = Arc::clone(self);\n        let background_tasks_can_start = background_tasks_can_start.cloned();\n        task_mgr::spawn(\n            BACKGROUND_RUNTIME.handle(),\n            TaskKind::Eviction,\n            self.tenant_shard_id,\n            Some(self.timeline_id),\n            &format!(\n                \"layer eviction for {}/{}\",\n                self.tenant_shard_id, self.timeline_id\n            ),\n            async move {\n                tokio::select! {\n                    _ = self_clone.cancel.cancelled() => { return Ok(()); }\n                    _ = completion::Barrier::maybe_wait(background_tasks_can_start) => {}\n                };\n\n                self_clone.eviction_task(parent).await;\n                Ok(())\n            },\n        );\n    }\n\n    #[instrument(skip_all, fields(tenant_id = %self.tenant_shard_id.tenant_id, shard_id = %self.tenant_shard_id.shard_slug(), timeline_id = %self.timeline_id))]\n    async fn eviction_task(self: Arc<Self>, tenant: Arc<TenantShard>) {\n        // acquire the gate guard only once within a useful span\n        let Ok(guard) = self.gate.enter() else {\n            return;\n        };\n\n        {\n            let policy = self.get_eviction_policy();\n            let period = match policy {\n                EvictionPolicy::LayerAccessThreshold(lat) => lat.period,\n                EvictionPolicy::OnlyImitiate(lat) => lat.period,\n                EvictionPolicy::NoEviction => Duration::from_secs(10),\n            };\n            if sleep_random(period, &self.cancel).await.is_err() {\n                return;\n            }\n        }\n\n        let ctx = RequestContext::new(TaskKind::Eviction, DownloadBehavior::Warn)\n            .with_scope_timeline(&self);\n        loop {\n            let policy = self.get_eviction_policy();\n            let cf = self\n                .eviction_iteration(&tenant, &policy, &self.cancel, &guard, &ctx)\n                .await;\n\n            match cf {\n                ControlFlow::Break(()) => break,\n                ControlFlow::Continue(sleep_until) => {\n                    if tokio::time::timeout_at(sleep_until, self.cancel.cancelled())\n                        .await\n                        .is_ok()\n                    {\n                        break;\n                    }\n                }\n            }\n        }\n    }\n\n    #[instrument(skip_all, fields(policy_kind = policy.discriminant_str()))]\n    async fn eviction_iteration(\n        self: &Arc<Self>,\n        tenant: &TenantShard,\n        policy: &EvictionPolicy,\n        cancel: &CancellationToken,\n        gate: &GateGuard,\n        ctx: &RequestContext,\n    ) -> ControlFlow<(), Instant> {\n        debug!(\"eviction iteration: {policy:?}\");\n        let start = Instant::now();\n        let (period, threshold) = match policy {\n            EvictionPolicy::NoEviction => {\n                // check again in 10 seconds; XXX config watch mechanism\n                return ControlFlow::Continue(Instant::now() + Duration::from_secs(10));\n            }\n            EvictionPolicy::LayerAccessThreshold(p) => {\n                match self\n                    .eviction_iteration_threshold(tenant, p, cancel, gate, ctx)\n                    .await\n                {\n                    ControlFlow::Break(()) => return ControlFlow::Break(()),\n                    ControlFlow::Continue(()) => (),\n                }\n                (p.period, p.threshold)\n            }\n            EvictionPolicy::OnlyImitiate(p) => {\n                if self\n                    .imitiate_only(tenant, p, cancel, gate, ctx)\n                    .await\n                    .is_break()\n                {\n                    return ControlFlow::Break(());\n                }\n                (p.period, p.threshold)\n            }\n        };\n\n        let elapsed = start.elapsed();\n        crate::tenant::tasks::warn_when_period_overrun(\n            elapsed,\n            period,\n            BackgroundLoopKind::Eviction,\n        );\n        // FIXME: if we were to mix policies on a pageserver, we would have no way to sense this. I\n        // don't think that is a relevant fear however, and regardless the imitation should be the\n        // most costly part.\n        crate::metrics::EVICTION_ITERATION_DURATION\n            .get_metric_with_label_values(&[\n                &format!(\"{}\", period.as_secs()),\n                &format!(\"{}\", threshold.as_secs()),\n            ])\n            .unwrap()\n            .observe(elapsed.as_secs_f64());\n\n        ControlFlow::Continue(start + period)\n    }\n\n    async fn eviction_iteration_threshold(\n        self: &Arc<Self>,\n        tenant: &TenantShard,\n        p: &EvictionPolicyLayerAccessThreshold,\n        cancel: &CancellationToken,\n        gate: &GateGuard,\n        ctx: &RequestContext,\n    ) -> ControlFlow<()> {\n        let now = SystemTime::now();\n\n        let permit = self.acquire_imitation_permit(cancel, ctx).await?;\n\n        self.imitate_layer_accesses(tenant, p, cancel, gate, permit, ctx)\n            .await?;\n\n        #[derive(Debug, Default)]\n        struct EvictionStats {\n            candidates: usize,\n            evicted: usize,\n            errors: usize,\n            not_evictable: usize,\n            timeouts: usize,\n            #[allow(dead_code)]\n            skipped_for_shutdown: usize,\n        }\n\n        let mut stats = EvictionStats::default();\n        // Gather layers for eviction.\n        // NB: all the checks can be invalidated as soon as we release the layer map lock.\n        // We don't want to hold the layer map lock during eviction.\n\n        // So, we just need to deal with this.\n\n        let mut js = tokio::task::JoinSet::new();\n        {\n            let guard = self.layers.read(LayerManagerLockHolder::Eviction).await;\n\n            guard\n                .likely_resident_layers()\n                .filter(|layer| {\n                    let last_activity_ts = layer.latest_activity();\n\n                    let no_activity_for = match now.duration_since(last_activity_ts) {\n                        Ok(d) => d,\n                        Err(_e) => {\n                            // We reach here if `now` < `last_activity_ts`, which can legitimately\n                            // happen if there is an access between us getting `now`, and us getting\n                            // the access stats from the layer.\n                            //\n                            // The other reason why it can happen is system clock skew because\n                            // SystemTime::now() is not monotonic, so, even if there is no access\n                            // to the layer after we get `now` at the beginning of this function,\n                            // it could be that `now`  < `last_activity_ts`.\n                            //\n                            // To distinguish the cases, we would need to record `Instant`s in the\n                            // access stats (i.e., monotonic timestamps), but then, the timestamps\n                            // values in the access stats would need to be `Instant`'s, and hence\n                            // they would be meaningless outside of the pageserver process.\n                            // At the time of writing, the trade-off is that access stats are more\n                            // valuable than detecting clock skew.\n                            return false;\n                        }\n                    };\n\n                    match layer.visibility() {\n                        LayerVisibilityHint::Visible => {\n                            // Usual case: a visible layer might be read any time, and we will keep it\n                            // resident until it hits our configured TTL threshold.\n                            no_activity_for > p.threshold\n                        }\n                        LayerVisibilityHint::Covered => {\n                            // Covered layers: this is probably a layer that was recently covered by\n                            // an image layer during compaction.  We don't evict it immediately, but\n                            // it doesn't stay resident for the full `threshold`: we just keep it\n                            // for a shorter time in case\n                            // - it is used for Timestamp->LSN lookups\n                            // - a new branch is created in recent history which will read this layer\n                            no_activity_for > p.period\n                        }\n                    }\n                })\n                .cloned()\n                .for_each(|layer| {\n                    js.spawn(async move {\n                        layer\n                            .evict_and_wait(std::time::Duration::from_secs(5))\n                            .await\n                    });\n                    stats.candidates += 1;\n                });\n        };\n\n        let join_all = async move {\n            while let Some(next) = js.join_next().await {\n                match next {\n                    Ok(Ok(())) => stats.evicted += 1,\n                    Ok(Err(EvictionError::NotFound | EvictionError::Downloaded)) => {\n                        stats.not_evictable += 1;\n                    }\n                    Ok(Err(EvictionError::Timeout)) => {\n                        stats.timeouts += 1;\n                    }\n                    Err(je) if je.is_cancelled() => unreachable!(\"not used\"),\n                    Err(je) if je.is_panic() => {\n                        /* already logged */\n                        stats.errors += 1;\n                    }\n                    Err(je) => tracing::error!(\"unknown JoinError: {je:?}\"),\n                }\n            }\n            stats\n        };\n\n        tokio::select! {\n            stats = join_all => {\n                if stats.candidates == stats.not_evictable {\n                    debug!(stats=?stats, \"eviction iteration complete\");\n                } else if stats.errors > 0 || stats.not_evictable > 0 || stats.timeouts > 0 {\n                    // reminder: timeouts are not eviction cancellations\n                    warn!(stats=?stats, \"eviction iteration complete\");\n                } else {\n                    info!(stats=?stats, \"eviction iteration complete\");\n                }\n            }\n            _ = cancel.cancelled() => {\n                // just drop the joinset to \"abort\"\n            }\n        }\n\n        ControlFlow::Continue(())\n    }\n\n    /// Like `eviction_iteration_threshold`, but without any eviction. Eviction will be done by\n    /// disk usage based eviction task.\n    async fn imitiate_only(\n        self: &Arc<Self>,\n        tenant: &TenantShard,\n        p: &EvictionPolicyLayerAccessThreshold,\n        cancel: &CancellationToken,\n        gate: &GateGuard,\n        ctx: &RequestContext,\n    ) -> ControlFlow<()> {\n        let permit = self.acquire_imitation_permit(cancel, ctx).await?;\n\n        self.imitate_layer_accesses(tenant, p, cancel, gate, permit, ctx)\n            .await\n    }\n\n    async fn acquire_imitation_permit(\n        &self,\n        cancel: &CancellationToken,\n        ctx: &RequestContext,\n    ) -> ControlFlow<(), BackgroundLoopSemaphorePermit<'static>> {\n        let acquire_permit =\n            crate::tenant::tasks::acquire_concurrency_permit(BackgroundLoopKind::Eviction, ctx);\n\n        tokio::select! {\n            permit = acquire_permit => ControlFlow::Continue(permit),\n            _ = cancel.cancelled() => ControlFlow::Break(()),\n            _ = self.cancel.cancelled() => ControlFlow::Break(()),\n        }\n    }\n\n    /// If we evict layers but keep cached values derived from those layers, then\n    /// we face a storm of on-demand downloads after pageserver restart.\n    /// The reason is that the restart empties the caches, and so, the values\n    /// need to be re-computed by accessing layers, which we evicted while the\n    /// caches were filled.\n    ///\n    /// Solutions here would be one of the following:\n    /// 1. Have a persistent cache.\n    /// 2. Count every access to a cached value to the access stats of all layers\n    ///    that were accessed to compute the value in the first place.\n    /// 3. Invalidate the caches at a period of < p.threshold/2, so that the values\n    ///    get re-computed from layers, thereby counting towards layer access stats.\n    /// 4. Make the eviction task imitate the layer accesses that typically hit caches.\n    ///\n    /// We follow approach (4) here because in Neon prod deployment:\n    /// - page cache is quite small => high churn => low hit rate\n    ///   => eviction gets correct access stats\n    /// - value-level caches such as logical size & repatition have a high hit rate,\n    ///   especially for inactive tenants\n    ///   => eviction sees zero accesses for these\n    ///   => they cause the on-demand download storm on pageserver restart\n    ///\n    /// We should probably move to persistent caches in the future, or avoid\n    /// having inactive tenants attached to pageserver in the first place.\n    #[instrument(skip_all)]\n    async fn imitate_layer_accesses(\n        &self,\n        tenant: &TenantShard,\n        p: &EvictionPolicyLayerAccessThreshold,\n        cancel: &CancellationToken,\n        gate: &GateGuard,\n        permit: BackgroundLoopSemaphorePermit<'static>,\n        ctx: &RequestContext,\n    ) -> ControlFlow<()> {\n        if !self.tenant_shard_id.is_shard_zero() {\n            // Shards !=0 do not maintain accurate relation sizes, and do not need to calculate logical size\n            // for consumption metrics (consumption metrics are only sent from shard 0).  We may therefore\n            // skip imitating logical size accesses for eviction purposes.\n            return ControlFlow::Continue(());\n        }\n\n        let mut state = self.eviction_task_timeline_state.lock().await;\n\n        // Only do the imitate_layer accesses approximately as often as the threshold.  A little\n        // more frequently, to avoid this period racing with the threshold/period-th eviction iteration.\n        let inter_imitate_period = p.threshold.checked_sub(p.period).unwrap_or(p.threshold);\n\n        match state.last_layer_access_imitation {\n            Some(ts) if ts.elapsed() < inter_imitate_period => { /* no need to run */ }\n            _ => {\n                self.imitate_timeline_cached_layer_accesses(gate, ctx).await;\n                state.last_layer_access_imitation = Some(tokio::time::Instant::now())\n            }\n        }\n        drop(state);\n\n        if cancel.is_cancelled() {\n            return ControlFlow::Break(());\n        }\n\n        // This task is timeline-scoped, but the synthetic size calculation is tenant-scoped.\n        // Make one of the tenant's timelines draw the short straw and run the calculation.\n        // The others wait until the calculation is done so that they take into account the\n        // imitated accesses that the winner made.\n        let (mut state, _permit) = {\n            if let Ok(locked) = tenant.eviction_task_tenant_state.try_lock() {\n                (locked, permit)\n            } else {\n                // we might need to wait for a long time here in case of pathological synthetic\n                // size calculation performance\n                drop(permit);\n                let locked = tokio::select! {\n                    locked = tenant.eviction_task_tenant_state.lock() => locked,\n                    _ = self.cancel.cancelled() => {\n                        return ControlFlow::Break(())\n                    },\n                    _ = cancel.cancelled() => {\n                        return ControlFlow::Break(())\n                    }\n                };\n                // then reacquire -- this will be bad if there is a lot of traffic, but because we\n                // released the permit, the overall latency will be much better.\n                let permit = self.acquire_imitation_permit(cancel, ctx).await?;\n                (locked, permit)\n            }\n        };\n        match state.last_layer_access_imitation {\n            Some(ts) if ts.elapsed() < inter_imitate_period => { /* no need to run */ }\n            _ => {\n                self.imitate_synthetic_size_calculation_worker(tenant, cancel, ctx)\n                    .await;\n                state.last_layer_access_imitation = Some(tokio::time::Instant::now());\n            }\n        }\n        drop(state);\n\n        if cancel.is_cancelled() {\n            return ControlFlow::Break(());\n        }\n\n        ControlFlow::Continue(())\n    }\n\n    /// Recompute the values which would cause on-demand downloads during restart.\n    #[instrument(skip_all)]\n    async fn imitate_timeline_cached_layer_accesses(\n        &self,\n        guard: &GateGuard,\n        ctx: &RequestContext,\n    ) {\n        let lsn = self.get_last_record_lsn();\n\n        // imitiate on-restart initial logical size\n        let size = self\n            .calculate_logical_size(\n                lsn,\n                LogicalSizeCalculationCause::EvictionTaskImitation,\n                guard,\n                ctx,\n            )\n            .instrument(info_span!(\"calculate_logical_size\"))\n            .await;\n\n        match &size {\n            Ok(_size) => {\n                // good, don't log it to avoid confusion\n            }\n            Err(_) => {\n                // we have known issues for which we already log this on consumption metrics,\n                // gc, and compaction. leave logging out for now.\n                //\n                // https://github.com/neondatabase/neon/issues/2539\n            }\n        }\n\n        // imitiate repartiting on first compactation\n        if let Err(e) = self\n            .collect_keyspace(lsn, ctx)\n            .instrument(info_span!(\"collect_keyspace\"))\n            .await\n        {\n            // if this failed, we probably failed logical size because these use the same keys\n            if size.is_err() {\n                // ignore, see above comment\n            } else {\n                match e {\n                    CollectKeySpaceError::Cancelled => {\n                        // Shutting down, ignore\n                    }\n                    err => {\n                        warn!(\n                            \"failed to collect keyspace but succeeded in calculating logical size: {err:#}\"\n                        );\n                    }\n                }\n            }\n        }\n    }\n\n    // Imitate the synthetic size calculation done by the consumption_metrics module.\n    #[instrument(skip_all)]\n    async fn imitate_synthetic_size_calculation_worker(\n        &self,\n        tenant: &TenantShard,\n        cancel: &CancellationToken,\n        ctx: &RequestContext,\n    ) {\n        if self.conf.metric_collection_endpoint.is_none() {\n            // We don't start the consumption metrics task if this is not set in the config.\n            // So, no need to imitate the accesses in that case.\n            return;\n        }\n\n        // The consumption metrics are collected on a per-tenant basis, by a single\n        // global background loop.\n        // It limits the number of synthetic size calculations using the global\n        // `concurrent_tenant_size_logical_size_queries` semaphore to not overload\n        // the pageserver. (size calculation is somewhat expensive in terms of CPU and IOs).\n        //\n        // If we used that same semaphore here, then we'd compete for the\n        // same permits, which may impact timeliness of consumption metrics.\n        // That is a no-go, as consumption metrics are much more important\n        // than what we do here.\n        //\n        // So, we have a separate semaphore, initialized to the same\n        // number of permits as the `concurrent_tenant_size_logical_size_queries`.\n        // In the worst, we would have twice the amount of concurrenct size calculations.\n        // But in practice, the `p.threshold` >> `consumption metric interval`, and\n        // we spread out the eviction task using `random_init_delay`.\n        // So, the chance of the worst case is quite low in practice.\n        // It runs as a per-tenant task, but the eviction_task.rs is per-timeline.\n        // So, we must coordinate with other with other eviction tasks of this tenant.\n        let limit = self\n            .conf\n            .eviction_task_immitated_concurrent_logical_size_queries\n            .inner();\n\n        let mut throwaway_cache = HashMap::new();\n        let gather = crate::tenant::size::gather_inputs(\n            tenant,\n            limit,\n            None,\n            &mut throwaway_cache,\n            LogicalSizeCalculationCause::EvictionTaskImitation,\n            cancel,\n            ctx,\n        )\n        .instrument(info_span!(\"gather_inputs\"));\n\n        tokio::select! {\n            _ = cancel.cancelled() => {}\n            gather_result = gather => {\n                match gather_result {\n                    Ok(_) => {},\n                    // It can happen sometimes that we hit this instead of the cancellation token firing above\n                    Err(CalculateSyntheticSizeError::Cancelled) => {}\n                    Err(e) => {\n                        // We don't care about the result, but, if it failed, we should log it,\n                        // since consumption metric might be hitting the cached value and\n                        // thus not encountering this error.\n                        warn!(\"failed to imitate synthetic size calculation accesses: {e:#}\")\n                    }\n                }\n           }\n        }\n    }\n}\n"
  },
  {
    "path": "pageserver/src/tenant/timeline/handle.rs",
    "content": "//! A cache for [`crate::tenant::mgr`]+`Tenant::get_timeline`+`Timeline::gate.enter()`.\n//!\n//! # Motivation\n//!\n//! On a single page service connection, we're typically serving a single TenantTimelineId.\n//!\n//! Without sharding, there is a single Timeline object to which we dispatch\n//! all requests. For example, a getpage request gets dispatched to the\n//! Timeline::get method of the Timeline object that represents the\n//! (tenant,timeline) of that connection.\n//!\n//! With sharding, for each request that comes in on the connection,\n//! we first have to perform shard routing based on the requested key (=~ page number).\n//! The result of shard routing is a Timeline object.\n//! We then dispatch the request to that Timeline object.\n//!\n//! Regardless of whether the tenant is sharded or not, we want to ensure that\n//! we hold the Timeline gate open while we're invoking the method on the\n//! Timeline object.\n//!\n//! We want to avoid the overhead of doing, for each incoming request,\n//! - tenant manager lookup (global rwlock + btreemap lookup for shard routing)\n//! - cloning the `Arc<Timeline>` out of the tenant manager so we can\n//!   release the mgr rwlock before doing any request processing work\n//! - re-entering the Timeline gate for each Timeline method invocation.\n//!\n//! Regardless of how we accomplish the above, it should not\n//! prevent the Timeline from shutting down promptly.\n//!\n//!\n//! # Design\n//!\n//! ## Data Structures\n//!\n//! There are two concepts expressed as associated types in the `Types` trait:\n//! - `TenantManager`: the thing that performs the expensive work. It produces\n//!   a `Timeline` object, which is the other associated type.\n//! - `Timeline`: the item that we cache for fast (TenantTimelineId,ShardSelector) lookup.\n//!\n//! There are three user-facing data structures exposed by this module:\n//! - `PerTimelineState`: a struct embedded into each Timeline struct. Lifetime == Timeline lifetime.\n//! - `Cache`: a struct private to each connection handler; Lifetime == connection lifetime.\n//! - `Handle`: a smart pointer that derefs to the Types::Timeline.\n//! - `WeakHandle`: downgrade of a `Handle` that does not keep the gate open, but allows\n//!   trying to ugprade back to a `Handle`. If successful, a re-upgraded Handle will always\n//!   point to the same cached `Types::Timeline`. Upgrades never invoke the `TenantManager`.\n//!\n//! Internally, there is 0 or 1 `HandleInner` per `(Cache,Timeline)`.\n//! Since Cache:Connection is 1:1, there is 0 or 1 `HandleInner` per `(Connection,Timeline)`.\n//!\n//! The `HandleInner`  is allocated as a `Arc<Mutex<HandleInner>>` and\n//! referenced weakly and strongly from various places which we are now illustrating.\n//! For brevity, we will omit the `Arc<Mutex<>>` part in the following and instead\n//! use `strong ref` and `weak ref` when referring to the `Arc<Mutex<HandleInner>>`\n//! or `Weak<Mutex<HandleInner>>`, respectively.\n//!\n//! - The `Handle` is a strong ref.\n//! - The `WeakHandle` is a weak ref.\n//! - The `PerTimelineState` contains a `HashMap<CacheId, strong ref>`.\n//! - The `Cache` is a `HashMap<unique identifier for the shard, weak ref>`.\n//!\n//! Lifetimes:\n//! - `WeakHandle` and `Handle`: single pagestream request.\n//! - `Cache`: single page service connection.\n//! - `PerTimelineState`:  lifetime of the Timeline object (i.e., i.e., till `Timeline::shutdown`).\n//!\n//! ## Request Handling Flow (= filling and using the `Cache``)\n//!\n//! To dispatch a request, the page service connection calls `Cache::get`.\n//!\n//! A cache miss means we call Types::TenantManager::resolve for shard routing,\n//! cloning the `Arc<Timeline>` out of it, and entering the gate. The result of\n//! resolve() is the object we want to cache, and return `Handle`s to for subseqent `Cache::get` calls.\n//!\n//! We wrap the object returned from resolve() in an `Arc` and store that inside the\n//! `Arc<Mutex<HandleInner>>>`. A weak ref to the HandleInner is stored in the `Cache`\n//! and a strong ref in the `PerTimelineState`.\n//! Another strong ref is returned wrapped in a `Handle`.\n//!\n//! For subsequent requests, `Cache::get` will perform a \"fast path\" shard routing\n//! and find the weak ref in the cache.\n//! We upgrade the weak ref to a strong ref and return it wrapped in a `Handle`.\n//!\n//! The pagestream processing is pipelined and involves a batching step.\n//! While a request is batching, the `Handle` is downgraded to a `WeakHandle`.\n//! When the batch is ready to be executed, the `WeakHandle` is upgraded back to a `Handle`\n//! and the request handler dispatches the request to the right `<Handle as Deref<Target = Timeline>>::$request_method`.\n//! It then drops the `Handle`, and thus the `Arc<Mutex<HandleInner>>` inside it.\n//!\n//! # Performance\n//!\n//! Remember from the introductory section:\n//!\n//! > We want to avoid the overhead of doing, for each incoming request,\n//! > - tenant manager lookup (global rwlock + btreemap lookup for shard routing)\n//! > - cloning the `Arc<Timeline>` out of the tenant manager so we can\n//! >   release the mgr rwlock before doing any request processing work\n//! > - re-entering the Timeline gate for each Timeline method invocation.\n//!\n//! All of these boil down to some state that is either globally shared among all shards\n//! or state shared among all tasks that serve a particular timeline.\n//! It is either protected by RwLock or manipulated via atomics.\n//! Even atomics are costly when shared across multiple cores.\n//! So, we want to avoid any permanent need for coordination between page_service tasks.\n//!\n//! The solution is to add indirection: we wrap the Types::Timeline object that is\n//! returned by Types::TenantManager into an Arc that is rivate to the `HandleInner`\n//! and hence to the single Cache / page_service connection.\n//! (Review the \"Data Structures\" section if that is unclear to you.)\n//!\n//!\n//! When upgrading a `WeakHandle`, we upgrade its weak to a strong ref (of the `Mutex<HandleInner>`),\n//! lock the mutex, take out a clone of the `Arc<Types::Timeline>`, and drop the Mutex.\n//! The Mutex is not contended because it is private to the connection.\n//! And again, the  `Arc<Types::Timeline>` clone is cheap because that wrapper\n//! Arc's refcounts are private to the connection.\n//!\n//! Downgrading drops these two Arcs, which again, manipulates refcounts that are private to the connection.\n//!\n//!\n//! # Shutdown\n//!\n//! The attentive reader may have noticed the following reference cycle around the `Arc<Timeline>`:\n//!\n//! ```text\n//! Timeline --owns--> PerTimelineState --strong--> HandleInner --strong--> Types::Timeline --strong--> Timeline\n//! ```\n//!\n//! Further, there is this cycle:\n//!\n//! ```text\n//! Timeline --owns--> PerTimelineState --strong--> HandleInner --strong--> Types::Timeline --strong--> GateGuard --keepalive--> Timeline\n//! ```\n//!\n//! The former cycle is a memory leak if not broken.\n//! The latter cycle further prevents the Timeline from shutting down\n//! because we certainly won't drop the Timeline while the GateGuard is alive.\n//! Preventing shutdown is the whole point of this handle/cache system,\n//! but when the Timeline needs to shut down, we need to break the cycle.\n//!\n//! The cycle is broken by either\n//! - Timeline shutdown (=> `PerTimelineState::shutdown`)\n//! - Connection shutdown (=> dropping the `Cache`).\n//!\n//! Both transition the `HandleInner` from [`HandleInner::Open`] to\n//! [`HandleInner::ShutDown`], which drops the only long-lived\n//! `Arc<Types::Timeline>`. Once the last short-lived Arc<Types::Timeline>\n//! is dropped, the `Types::Timeline` gets dropped and thereby\n//! the `GateGuard` and the `Arc<Timeline>` that it stores,\n//! thereby breaking both cycles.\n//!\n//! `PerTimelineState::shutdown` drops all the `HandleInners` it contains,\n//! thereby breaking the cycle.\n//! It also initiates draining of already existing `Handle`s by\n//! poisoning things so that no new `HandleInner`'s can be added\n//! to the `PerTimelineState`, which will make subsequent `Cache::get` fail.\n//!\n//! Concurrently existing / already upgraded `Handle`s will extend the\n//! lifetime of the `Arc<Mutex<HandleInner>>` and hence cycles.\n//! However, since `Handle`s are short-lived and new `Handle`s are not\n//! handed out from `Cache::get` or `WeakHandle::upgrade` after\n//! `PerTimelineState::shutdown`, that extension of the cycle is bounded.\n//!\n//! Concurrently existing `WeakHandle`s will fail to `upgrade()`:\n//! while they will succeed in upgrading `Weak<Mutex<HandleInner>>`,\n//! they will find the inner in state `HandleInner::ShutDown` state where the\n//! `Arc<GateGuard>` and Timeline has already been dropped.\n//!\n//! Dropping the `Cache` undoes the registration of this `Cache`'s\n//! `HandleInner`s from all the `PerTimelineState`s, i.e., it\n//! removes the strong ref to each of its `HandleInner`s\n//! from all the `PerTimelineState`.\n//!\n//! # Locking Rules\n//!\n//! To prevent deadlocks we:\n//!\n//! 1. Only ever hold one of the locks at a time.\n//! 2. Don't add more than one Drop impl that locks on the\n//!    cycles above.\n//!\n//! As per (2), that impl is in `Drop for Cache`.\n//!\n//! # Fast Path for Shard Routing\n//!\n//! The `Cache` has a fast path for shard routing to avoid calling into\n//! the tenant manager for every request.\n//!\n//! The `Cache` maintains a hash map of `ShardTimelineId` to `WeakHandle`s.\n//!\n//! The current implementation uses the first entry in the hash map\n//! to determine the `ShardParameters` and derive the correct\n//! `ShardIndex` for the requested key.\n//!\n//! It then looks up the hash map for that `ShardTimelineId := {ShardIndex,TimelineId}`.\n//!\n//! If the lookup is successful and the `WeakHandle` can be upgraded,\n//! it's a hit.\n//!\n//! ## Cache invalidation\n//!\n//! The insight is that cache invalidation is sufficient and most efficiently if done lazily.\n//! The only reasons why an entry in the cache can become stale are:\n//! 1. The `PerTimelineState` / Timeline is shutting down e.g. because the shard is\n//!    being detached, timeline or shard deleted, or pageserver is shutting down.\n//! 2. We're doing a shard split and new traffic should be routed to the child shards.\n//!\n//! Regarding (1), we will eventually fail to upgrade the `WeakHandle` once the\n//! timeline has shut down, and when that happens, we remove the entry from the cache.\n//!\n//! Regarding (2), the insight is that it is toally fine to keep dispatching requests\n//! to the parent shard during a shard split. Eventually, the shard split task will\n//! shut down the parent => case (1).\n\nuse std::collections::HashMap;\nuse std::collections::hash_map;\nuse std::sync::Arc;\nuse std::sync::Mutex;\nuse std::sync::Weak;\nuse std::time::Duration;\n\nuse pageserver_api::shard::ShardIdentity;\nuse tracing::{instrument, trace};\nuse utils::id::TimelineId;\nuse utils::shard::{ShardIndex, ShardNumber};\n\nuse crate::page_service::GetActiveTimelineError;\nuse crate::tenant::GetTimelineError;\nuse crate::tenant::mgr::{GetActiveTenantError, ShardSelector};\n\npub(crate) trait Types: Sized {\n    type TenantManager: TenantManager<Self> + Sized;\n    type Timeline: Timeline<Self> + Sized;\n}\n\n/// Uniquely identifies a [`Cache`] instance over the lifetime of the process.\n/// Required so [`Cache::drop`] can take out the handles from the [`PerTimelineState`].\n/// Alternative to this would be to allocate [`Cache`] in a `Box` and identify it by the pointer.\n#[derive(Debug, Hash, PartialEq, Eq, Clone, Copy)]\nstruct CacheId(u64);\n\nimpl CacheId {\n    fn next() -> Self {\n        static NEXT_ID: std::sync::atomic::AtomicU64 = std::sync::atomic::AtomicU64::new(1);\n        let id = NEXT_ID.fetch_add(1, std::sync::atomic::Ordering::Relaxed);\n        if id == 0 {\n            panic!(\"CacheId::new() returned 0, overflow\");\n        }\n        Self(id)\n    }\n}\n\n/// See module-level comment.\npub(crate) struct Cache<T: Types> {\n    id: CacheId,\n    map: Map<T>,\n}\n\ntype Map<T> = HashMap<ShardTimelineId, WeakHandle<T>>;\n\nimpl<T: Types> Default for Cache<T> {\n    fn default() -> Self {\n        Self {\n            id: CacheId::next(),\n            map: Default::default(),\n        }\n    }\n}\n\n#[derive(PartialEq, Eq, Debug, Hash, Clone, Copy)]\npub(crate) struct ShardTimelineId {\n    pub(crate) shard_index: ShardIndex,\n    pub(crate) timeline_id: TimelineId,\n}\n\n/// See module-level comment.\npub(crate) struct Handle<T: Types> {\n    inner: Arc<Mutex<HandleInner<T>>>,\n    open: Arc<T::Timeline>,\n}\npub(crate) struct WeakHandle<T: Types> {\n    inner: Weak<Mutex<HandleInner<T>>>,\n}\n\nenum HandleInner<T: Types> {\n    Open(Arc<T::Timeline>),\n    ShutDown,\n}\n\n/// Embedded in each [`Types::Timeline`] as the anchor for the only long-lived strong ref to `HandleInner`.\n///\n/// See module-level comment for details.\npub struct PerTimelineState<T: Types> {\n    // None = shutting down\n    #[allow(clippy::type_complexity)]\n    handles: Mutex<Option<HashMap<CacheId, Arc<Mutex<HandleInner<T>>>>>>,\n}\n\nimpl<T: Types> Default for PerTimelineState<T> {\n    fn default() -> Self {\n        Self {\n            handles: Mutex::new(Some(Default::default())),\n        }\n    }\n}\n\n/// Abstract view of [`crate::tenant::mgr`], for testability.\npub(crate) trait TenantManager<T: Types> {\n    /// Invoked by [`Cache::get`] to resolve a [`ShardTimelineId`] to a [`Types::Timeline`].\n    async fn resolve(\n        &self,\n        timeline_id: TimelineId,\n        shard_selector: ShardSelector,\n    ) -> Result<T::Timeline, GetActiveTimelineError>;\n}\n\n/// Abstract view of an [`Arc<Timeline>`], for testability.\npub(crate) trait Timeline<T: Types> {\n    fn shard_timeline_id(&self) -> ShardTimelineId;\n    fn get_shard_identity(&self) -> &ShardIdentity;\n    fn per_timeline_state(&self) -> &PerTimelineState<T>;\n}\n\n/// Internal type used in [`Cache::get`].\nenum RoutingResult<T: Types> {\n    FastPath(Handle<T>),\n    SlowPath(ShardTimelineId),\n    NeedConsultTenantManager,\n}\n\nimpl<T: Types> Cache<T> {\n    /* BEGIN_HADRON */\n    /// A wrapper of do_get to resolve the tenant shard for a get page request.\n    #[instrument(level = \"trace\", skip_all)]\n    pub(crate) async fn get(\n        &mut self,\n        timeline_id: TimelineId,\n        shard_selector: ShardSelector,\n        tenant_manager: &T::TenantManager,\n    ) -> Result<Handle<T>, GetActiveTimelineError> {\n        const GET_MAX_RETRIES: usize = 10;\n        const RETRY_BACKOFF: Duration = Duration::from_millis(100);\n        let mut attempt = 0;\n        loop {\n            attempt += 1;\n            match self\n                .do_get(timeline_id, shard_selector, tenant_manager)\n                .await\n            {\n                Ok(handle) => return Ok(handle),\n                Err(\n                    e @ GetActiveTimelineError::Tenant(GetActiveTenantError::WaitForActiveTimeout {\n                        ..\n                    }),\n                ) => {\n                    // Retry on tenant manager error to handle tenant split more gracefully\n                    if attempt < GET_MAX_RETRIES {\n                        tokio::time::sleep(RETRY_BACKOFF).await;\n                        continue;\n                    } else {\n                        tracing::info!(\n                            \"Failed to resolve tenant shard after {} attempts: {:?}\",\n                            GET_MAX_RETRIES,\n                            e\n                        );\n                        return Err(e);\n                    }\n                }\n                Err(err) => return Err(err),\n            }\n        }\n    }\n    /* END_HADRON */\n\n    /// See module-level comment for details.\n    ///\n    /// Does NOT check for the shutdown state of [`Types::Timeline`].\n    /// Instead, the methods of [`Types::Timeline`] that are invoked through\n    /// the [`Handle`] are responsible for checking these conditions\n    /// and if so, return an error that causes the page service to\n    /// close the connection.\n    #[instrument(level = \"trace\", skip_all)]\n    async fn do_get(\n        &mut self,\n        timeline_id: TimelineId,\n        shard_selector: ShardSelector,\n        tenant_manager: &T::TenantManager,\n    ) -> Result<Handle<T>, GetActiveTimelineError> {\n        // terminates because when every iteration we remove an element from the map\n        let miss: ShardSelector = loop {\n            let routing_state = self.shard_routing(timeline_id, shard_selector);\n            match routing_state {\n                RoutingResult::FastPath(handle) => return Ok(handle),\n                RoutingResult::SlowPath(key) => match self.map.get(&key) {\n                    Some(cached) => match cached.upgrade() {\n                        Ok(upgraded) => return Ok(upgraded),\n                        Err(HandleUpgradeError::ShutDown) => {\n                            // TODO: dedup with shard_routing()\n                            trace!(\"handle cache stale\");\n                            self.map.remove(&key).unwrap();\n                            continue;\n                        }\n                    },\n                    None => break ShardSelector::Known(key.shard_index),\n                },\n                RoutingResult::NeedConsultTenantManager => break shard_selector,\n            }\n        };\n        self.get_miss(timeline_id, miss, tenant_manager).await\n    }\n\n    #[inline(always)]\n    fn shard_routing(\n        &mut self,\n        timeline_id: TimelineId,\n        shard_selector: ShardSelector,\n    ) -> RoutingResult<T> {\n        loop {\n            // terminates because when every iteration we remove an element from the map\n            let Some((first_key, first_handle)) = self.map.iter().next() else {\n                return RoutingResult::NeedConsultTenantManager;\n            };\n            let Ok(first_handle) = first_handle.upgrade() else {\n                // TODO: dedup with get()\n                trace!(\"handle cache stale\");\n                let first_key_owned = *first_key;\n                self.map.remove(&first_key_owned).unwrap();\n                continue;\n            };\n\n            let first_handle_shard_identity = first_handle.get_shard_identity();\n            let make_shard_index = |shard_num: ShardNumber| ShardIndex {\n                shard_number: shard_num,\n                shard_count: first_handle_shard_identity.count,\n            };\n\n            let need_idx = match shard_selector {\n                ShardSelector::Page(key) => {\n                    make_shard_index(first_handle_shard_identity.get_shard_number(&key))\n                }\n                ShardSelector::Zero => make_shard_index(ShardNumber(0)),\n                ShardSelector::Known(shard_idx) => shard_idx,\n            };\n            let need_shard_timeline_id = ShardTimelineId {\n                shard_index: need_idx,\n                timeline_id,\n            };\n            let first_handle_shard_timeline_id = ShardTimelineId {\n                shard_index: first_handle_shard_identity.shard_index(),\n                timeline_id: first_handle.shard_timeline_id().timeline_id,\n            };\n\n            if need_shard_timeline_id == first_handle_shard_timeline_id {\n                return RoutingResult::FastPath(first_handle);\n            } else {\n                return RoutingResult::SlowPath(need_shard_timeline_id);\n            }\n        }\n    }\n\n    #[instrument(level = \"trace\", skip_all)]\n    #[inline(always)]\n    async fn get_miss(\n        &mut self,\n        timeline_id: TimelineId,\n        shard_selector: ShardSelector,\n        tenant_manager: &T::TenantManager,\n    ) -> Result<Handle<T>, GetActiveTimelineError> {\n        let timeline = tenant_manager.resolve(timeline_id, shard_selector).await?;\n        let key = timeline.shard_timeline_id();\n        match &shard_selector {\n            ShardSelector::Zero => assert_eq!(key.shard_index.shard_number, ShardNumber(0)),\n            ShardSelector::Page(_) => (), // gotta trust tenant_manager\n            ShardSelector::Known(idx) => assert_eq!(idx, &key.shard_index),\n        }\n\n        trace!(\"creating new HandleInner\");\n        let timeline = Arc::new(timeline);\n        let handle_inner_arc = Arc::new(Mutex::new(HandleInner::Open(Arc::clone(&timeline))));\n        let handle_weak = WeakHandle {\n            inner: Arc::downgrade(&handle_inner_arc),\n        };\n        let handle = handle_weak\n            .upgrade()\n            .ok()\n            .expect(\"we just created it and it's not linked anywhere yet\");\n        let mut lock_guard = timeline\n            .per_timeline_state()\n            .handles\n            .lock()\n            .expect(\"mutex poisoned\");\n        let Some(per_timeline_state) = &mut *lock_guard else {\n            return Err(GetActiveTimelineError::Timeline(\n                GetTimelineError::ShuttingDown,\n            ));\n        };\n        let replaced = per_timeline_state.insert(self.id, Arc::clone(&handle_inner_arc));\n        assert!(replaced.is_none(), \"some earlier code left a stale handle\");\n        match self.map.entry(key) {\n            hash_map::Entry::Occupied(_o) => {\n                // This cannot not happen because\n                // 1. we're the _miss_ handle, i.e., `self.map` didn't contain an entry and\n                // 2. we were holding &mut self during .resolve().await above, so, no other thread can have inserted a handle\n                //    while we were waiting for the tenant manager.\n                unreachable!()\n            }\n            hash_map::Entry::Vacant(v) => {\n                v.insert(handle_weak);\n            }\n        }\n        Ok(handle)\n    }\n}\n\npub(crate) enum HandleUpgradeError {\n    ShutDown,\n}\n\nimpl<T: Types> WeakHandle<T> {\n    pub(crate) fn upgrade(&self) -> Result<Handle<T>, HandleUpgradeError> {\n        let Some(inner) = Weak::upgrade(&self.inner) else {\n            return Err(HandleUpgradeError::ShutDown);\n        };\n        let lock_guard = inner.lock().expect(\"poisoned\");\n        match &*lock_guard {\n            HandleInner::Open(open) => {\n                let open = Arc::clone(open);\n                drop(lock_guard);\n                Ok(Handle { open, inner })\n            }\n            HandleInner::ShutDown => Err(HandleUpgradeError::ShutDown),\n        }\n    }\n\n    pub(crate) fn is_same_handle_as(&self, other: &WeakHandle<T>) -> bool {\n        Weak::ptr_eq(&self.inner, &other.inner)\n    }\n}\n\nimpl<T: Types> std::ops::Deref for Handle<T> {\n    type Target = T::Timeline;\n    fn deref(&self) -> &Self::Target {\n        &self.open\n    }\n}\n\nimpl<T: Types> Handle<T> {\n    pub(crate) fn downgrade(&self) -> WeakHandle<T> {\n        WeakHandle {\n            inner: Arc::downgrade(&self.inner),\n        }\n    }\n}\n\nimpl<T: Types> PerTimelineState<T> {\n    /// After this method returns, [`Cache::get`] will never again return a [`Handle`]\n    /// to the [`Types::Timeline`] that embeds this per-timeline state.\n    /// Even if [`TenantManager::resolve`] would still resolve to it.\n    ///\n    /// Already-alive [`Handle`]s for will remain open, usable, and keeping the [`Types::Timeline`] alive.\n    /// That's ok because they're short-lived. See module-level comment for details.\n    #[instrument(level = \"trace\", skip_all)]\n    pub(super) fn shutdown(&self) {\n        let handles = self\n            .handles\n            .lock()\n            .expect(\"mutex poisoned\")\n            // NB: this .take() sets locked to None.\n            // That's what makes future `Cache::get` misses fail.\n            // Cache hits are taken care of below.\n            .take();\n        let Some(handles) = handles else {\n            trace!(\"already shut down\");\n            return;\n        };\n        for handle_inner_arc in handles.values() {\n            // Make hits fail.\n            let mut lock_guard = handle_inner_arc.lock().expect(\"poisoned\");\n            lock_guard.shutdown();\n        }\n        drop(handles);\n    }\n}\n\n// When dropping a [`Cache`], prune its handles in the [`PerTimelineState`] to break the reference cycle.\nimpl<T: Types> Drop for Cache<T> {\n    fn drop(&mut self) {\n        for (\n            _,\n            WeakHandle {\n                inner: handle_inner_weak,\n            },\n        ) in self.map.drain()\n        {\n            let Some(handle_inner_arc) = handle_inner_weak.upgrade() else {\n                continue;\n            };\n            let Some(handle_timeline) = handle_inner_arc\n                // locking rules: drop lock before acquiring other lock below\n                .lock()\n                .expect(\"poisoned\")\n                .shutdown()\n            else {\n                // Concurrent PerTimelineState::shutdown.\n                continue;\n            };\n            // Clean up per_timeline_state so the HandleInner allocation can be dropped.\n            let per_timeline_state = handle_timeline.per_timeline_state();\n            let mut handles_lock_guard = per_timeline_state.handles.lock().expect(\"mutex poisoned\");\n            let Some(handles) = &mut *handles_lock_guard else {\n                continue;\n            };\n            let Some(removed_handle_inner_arc) = handles.remove(&self.id) else {\n                // Concurrent PerTimelineState::shutdown.\n                continue;\n            };\n            drop(handles_lock_guard); // locking rules!\n            assert!(Arc::ptr_eq(&removed_handle_inner_arc, &handle_inner_arc));\n        }\n    }\n}\n\nimpl<T: Types> HandleInner<T> {\n    fn shutdown(&mut self) -> Option<Arc<T::Timeline>> {\n        match std::mem::replace(self, HandleInner::ShutDown) {\n            HandleInner::Open(timeline) => Some(timeline),\n            HandleInner::ShutDown => {\n                // Duplicate shutdowns are possible because both Cache::drop and PerTimelineState::shutdown\n                // may do it concurrently, but locking rules disallow holding per-timeline-state lock and\n                // the handle lock at the same time.\n                None\n            }\n        }\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use std::sync::Weak;\n\n    use pageserver_api::key::{DBDIR_KEY, Key, rel_block_to_key};\n    use pageserver_api::models::ShardParameters;\n    use pageserver_api::reltag::RelTag;\n    use pageserver_api::shard::DEFAULT_STRIPE_SIZE;\n    use utils::id::TenantId;\n    use utils::shard::{ShardCount, TenantShardId};\n    use utils::sync::gate::GateGuard;\n\n    use super::*;\n\n    const FOREVER: std::time::Duration = std::time::Duration::from_secs(u64::MAX);\n\n    #[derive(Debug)]\n    struct TestTypes;\n    impl Types for TestTypes {\n        type TenantManager = StubManager;\n        type Timeline = Entered;\n    }\n\n    struct StubManager {\n        shards: Vec<Arc<StubTimeline>>,\n    }\n\n    struct StubTimeline {\n        gate: utils::sync::gate::Gate,\n        id: TimelineId,\n        shard: ShardIdentity,\n        per_timeline_state: PerTimelineState<TestTypes>,\n        myself: Weak<StubTimeline>,\n    }\n\n    struct Entered {\n        timeline: Arc<StubTimeline>,\n        #[allow(dead_code)] // it's stored here to keep the gate open\n        gate_guard: Arc<GateGuard>,\n    }\n\n    impl StubTimeline {\n        fn getpage(&self) {\n            // do nothing\n        }\n    }\n\n    impl Timeline<TestTypes> for Entered {\n        fn shard_timeline_id(&self) -> ShardTimelineId {\n            ShardTimelineId {\n                shard_index: self.shard.shard_index(),\n                timeline_id: self.id,\n            }\n        }\n\n        fn get_shard_identity(&self) -> &ShardIdentity {\n            &self.shard\n        }\n\n        fn per_timeline_state(&self) -> &PerTimelineState<TestTypes> {\n            &self.per_timeline_state\n        }\n    }\n\n    impl TenantManager<TestTypes> for StubManager {\n        async fn resolve(\n            &self,\n            timeline_id: TimelineId,\n            shard_selector: ShardSelector,\n        ) -> Result<Entered, GetActiveTimelineError> {\n            fn enter_gate(\n                timeline: &StubTimeline,\n            ) -> Result<Arc<GateGuard>, GetActiveTimelineError> {\n                Ok(Arc::new(timeline.gate.enter().map_err(|_| {\n                    GetActiveTimelineError::Timeline(GetTimelineError::ShuttingDown)\n                })?))\n            }\n\n            for timeline in &self.shards {\n                if timeline.id == timeline_id {\n                    match &shard_selector {\n                        ShardSelector::Zero if timeline.shard.is_shard_zero() => {\n                            return Ok(Entered {\n                                timeline: Arc::clone(timeline),\n                                gate_guard: enter_gate(timeline)?,\n                            });\n                        }\n                        ShardSelector::Zero => continue,\n                        ShardSelector::Page(key) if timeline.shard.is_key_local(key) => {\n                            return Ok(Entered {\n                                timeline: Arc::clone(timeline),\n                                gate_guard: enter_gate(timeline)?,\n                            });\n                        }\n                        ShardSelector::Page(_) => continue,\n                        ShardSelector::Known(idx) if idx == &timeline.shard.shard_index() => {\n                            return Ok(Entered {\n                                timeline: Arc::clone(timeline),\n                                gate_guard: enter_gate(timeline)?,\n                            });\n                        }\n                        ShardSelector::Known(_) => continue,\n                    }\n                }\n            }\n            Err(GetActiveTimelineError::Timeline(\n                GetTimelineError::NotFound {\n                    tenant_id: TenantShardId::unsharded(TenantId::from([0; 16])),\n                    timeline_id,\n                },\n            ))\n        }\n    }\n\n    impl std::ops::Deref for Entered {\n        type Target = StubTimeline;\n        fn deref(&self) -> &Self::Target {\n            &self.timeline\n        }\n    }\n\n    #[tokio::test(start_paused = true)]\n    async fn test_timeline_shutdown() {\n        crate::tenant::harness::setup_logging();\n\n        let timeline_id = TimelineId::generate();\n        let shard0 = Arc::new_cyclic(|myself| StubTimeline {\n            gate: Default::default(),\n            id: timeline_id,\n            shard: ShardIdentity::unsharded(),\n            per_timeline_state: PerTimelineState::default(),\n            myself: myself.clone(),\n        });\n        let mgr = StubManager {\n            shards: vec![shard0.clone()],\n        };\n        let key = DBDIR_KEY;\n\n        let mut cache = Cache::<TestTypes>::default();\n\n        //\n        // fill the cache\n        //\n        let handle: Handle<_> = cache\n            .get(timeline_id, ShardSelector::Page(key), &mgr)\n            .await\n            .expect(\"we have the timeline\");\n        assert!(Weak::ptr_eq(&handle.myself, &shard0.myself));\n        assert_eq!(cache.map.len(), 1);\n        drop(handle);\n\n        //\n        // demonstrate that Handle holds up gate closure\n        // but shutdown prevents new handles from being handed out\n        //\n\n        tokio::select! {\n            _ = shard0.gate.close() => {\n                panic!(\"cache and per-timeline handler state keep cache open\");\n            }\n            _ = tokio::time::sleep(FOREVER) => {\n                // NB: first poll of close() makes it enter closing state\n            }\n        }\n\n        let handle = cache\n            .get(timeline_id, ShardSelector::Page(key), &mgr)\n            .await\n            .expect(\"we have the timeline\");\n        assert!(Weak::ptr_eq(&handle.myself, &shard0.myself));\n\n        // SHUTDOWN\n        shard0.per_timeline_state.shutdown(); // keeping handle alive across shutdown\n\n        assert_eq!(\n            cache.map.len(),\n            1,\n            \"this is an implementation detail but worth pointing out: we can't clear the cache from shutdown(), it's cleared on first access after\"\n        );\n\n        // this handle is perfectly usable\n        handle.getpage();\n\n        cache\n            .get(timeline_id, ShardSelector::Page(key), &mgr)\n            .await\n            .err()\n            .expect(\"documented behavior: can't get new handle after shutdown, even if there is an alive Handle\");\n        assert_eq!(\n            cache.map.len(),\n            0,\n            \"first access after shutdown cleans up the Weak's from the cache\"\n        );\n\n        tokio::select! {\n            _ = shard0.gate.close() => {\n                panic!(\"handle is keeping gate open\");\n            }\n            _ = tokio::time::sleep(FOREVER) => { }\n        }\n\n        drop(handle);\n\n        // closing gate succeeds after dropping handle\n        tokio::select! {\n            _ = shard0.gate.close() => { }\n            _ = tokio::time::sleep(FOREVER) => {\n                panic!(\"handle is dropped, no other gate holders exist\")\n            }\n        }\n\n        // map gets cleaned on next lookup\n        cache\n            .get(timeline_id, ShardSelector::Page(key), &mgr)\n            .await\n            .err()\n            .expect(\"documented behavior: can't get new handle after shutdown\");\n        assert_eq!(cache.map.len(), 0);\n\n        // ensure all refs to shard0 are gone and we're not leaking anything\n        drop(shard0);\n        drop(mgr);\n    }\n\n    #[tokio::test]\n    async fn test_multiple_timelines_and_deletion() {\n        crate::tenant::harness::setup_logging();\n\n        let timeline_a = TimelineId::generate();\n        let timeline_b = TimelineId::generate();\n        assert_ne!(timeline_a, timeline_b);\n        let timeline_a = Arc::new_cyclic(|myself| StubTimeline {\n            gate: Default::default(),\n            id: timeline_a,\n            shard: ShardIdentity::unsharded(),\n            per_timeline_state: PerTimelineState::default(),\n            myself: myself.clone(),\n        });\n        let timeline_b = Arc::new_cyclic(|myself| StubTimeline {\n            gate: Default::default(),\n            id: timeline_b,\n            shard: ShardIdentity::unsharded(),\n            per_timeline_state: PerTimelineState::default(),\n            myself: myself.clone(),\n        });\n        let mut mgr = StubManager {\n            shards: vec![timeline_a.clone(), timeline_b.clone()],\n        };\n        let key = DBDIR_KEY;\n\n        let mut cache = Cache::<TestTypes>::default();\n\n        cache\n            .get(timeline_a.id, ShardSelector::Page(key), &mgr)\n            .await\n            .expect(\"we have it\");\n        cache\n            .get(timeline_b.id, ShardSelector::Page(key), &mgr)\n            .await\n            .expect(\"we have it\");\n        assert_eq!(cache.map.len(), 2);\n\n        // delete timeline A\n        timeline_a.per_timeline_state.shutdown();\n        mgr.shards.retain(|t| t.id != timeline_a.id);\n        assert!(\n            mgr.resolve(timeline_a.id, ShardSelector::Page(key))\n                .await\n                .is_err(),\n            \"broken StubManager implementation\"\n        );\n\n        assert_eq!(\n            cache.map.len(),\n            2,\n            \"cache still has a Weak handle to Timeline A\"\n        );\n        cache\n            .get(timeline_a.id, ShardSelector::Page(key), &mgr)\n            .await\n            .err()\n            .expect(\"documented behavior: can't get new handle after shutdown\");\n\n        assert_eq!(cache.map.len(), 1, \"next access cleans up the cache\");\n\n        cache\n            .get(timeline_b.id, ShardSelector::Page(key), &mgr)\n            .await\n            .expect(\"we still have it\");\n    }\n\n    fn make_relation_key_for_shard(shard: ShardNumber, params: ShardParameters) -> Key {\n        rel_block_to_key(\n            RelTag {\n                spcnode: 1663,\n                dbnode: 208101,\n                relnode: 2620,\n                forknum: 0,\n            },\n            shard.0 as u32 * params.stripe_size.0,\n        )\n    }\n\n    #[tokio::test(start_paused = true)]\n    async fn test_shard_split() {\n        crate::tenant::harness::setup_logging();\n        let timeline_id = TimelineId::generate();\n        let parent = Arc::new_cyclic(|myself| StubTimeline {\n            gate: Default::default(),\n            id: timeline_id,\n            shard: ShardIdentity::unsharded(),\n            per_timeline_state: PerTimelineState::default(),\n            myself: myself.clone(),\n        });\n        let child_params = ShardParameters {\n            count: ShardCount(2),\n            stripe_size: DEFAULT_STRIPE_SIZE,\n        };\n        let child0 = Arc::new_cyclic(|myself| StubTimeline {\n            gate: Default::default(),\n            id: timeline_id,\n            shard: ShardIdentity::from_params(ShardNumber(0), child_params),\n            per_timeline_state: PerTimelineState::default(),\n            myself: myself.clone(),\n        });\n        let child1 = Arc::new_cyclic(|myself| StubTimeline {\n            gate: Default::default(),\n            id: timeline_id,\n            shard: ShardIdentity::from_params(ShardNumber(1), child_params),\n            per_timeline_state: PerTimelineState::default(),\n            myself: myself.clone(),\n        });\n        let child_shards_by_shard_number = [child0.clone(), child1.clone()];\n\n        let mut cache = Cache::<TestTypes>::default();\n\n        // fill the cache with the parent\n        for i in 0..2 {\n            let handle = cache\n                .get(\n                    timeline_id,\n                    ShardSelector::Page(make_relation_key_for_shard(ShardNumber(i), child_params)),\n                    &StubManager {\n                        shards: vec![parent.clone()],\n                    },\n                )\n                .await\n                .expect(\"we have it\");\n            assert!(\n                Weak::ptr_eq(&handle.myself, &parent.myself),\n                \"mgr returns parent first\"\n            );\n            drop(handle);\n        }\n\n        //\n        // SHARD SPLIT: tenant manager changes, but the cache isn't informed\n        //\n\n        // while we haven't shut down the parent, the cache will return the cached parent, even\n        // if the tenant manager returns the child\n        for i in 0..2 {\n            let handle = cache\n                .get(\n                    timeline_id,\n                    ShardSelector::Page(make_relation_key_for_shard(ShardNumber(i), child_params)),\n                    &StubManager {\n                        shards: vec![], // doesn't matter what's in here, the cache is fully loaded\n                    },\n                )\n                .await\n                .expect(\"we have it\");\n            assert!(\n                Weak::ptr_eq(&handle.myself, &parent.myself),\n                \"mgr returns parent\"\n            );\n            drop(handle);\n        }\n\n        let parent_handle = cache\n            .get(\n                timeline_id,\n                ShardSelector::Page(make_relation_key_for_shard(ShardNumber(0), child_params)),\n                &StubManager {\n                    shards: vec![parent.clone()],\n                },\n            )\n            .await\n            .expect(\"we have it\");\n        assert!(Weak::ptr_eq(&parent_handle.myself, &parent.myself));\n\n        // invalidate the cache\n        parent.per_timeline_state.shutdown();\n\n        // the cache will now return the child, even though the parent handle still exists\n        for i in 0..2 {\n            let handle = cache\n                .get(\n                    timeline_id,\n                    ShardSelector::Page(make_relation_key_for_shard(ShardNumber(i), child_params)),\n                    &StubManager {\n                        shards: vec![child0.clone(), child1.clone()], // <====== this changed compared to previous loop\n                    },\n                )\n                .await\n                .expect(\"we have it\");\n            assert!(\n                Weak::ptr_eq(\n                    &handle.myself,\n                    &child_shards_by_shard_number[i as usize].myself\n                ),\n                \"mgr returns child\"\n            );\n            drop(handle);\n        }\n\n        // all the while the parent handle kept the parent gate open\n        tokio::select! {\n            _ = parent_handle.gate.close() => {\n                panic!(\"parent handle is keeping gate open\");\n            }\n            _ = tokio::time::sleep(FOREVER) => { }\n        }\n        drop(parent_handle);\n        tokio::select! {\n            _ = parent.gate.close() => { }\n            _ = tokio::time::sleep(FOREVER) => {\n                panic!(\"parent handle is dropped, no other gate holders exist\")\n            }\n        }\n    }\n\n    #[tokio::test(start_paused = true)]\n    async fn test_connection_handler_exit() {\n        crate::tenant::harness::setup_logging();\n        let timeline_id = TimelineId::generate();\n        let shard0 = Arc::new_cyclic(|myself| StubTimeline {\n            gate: Default::default(),\n            id: timeline_id,\n            shard: ShardIdentity::unsharded(),\n            per_timeline_state: PerTimelineState::default(),\n            myself: myself.clone(),\n        });\n        let mgr = StubManager {\n            shards: vec![shard0.clone()],\n        };\n        let key = DBDIR_KEY;\n\n        // Simulate 10 connections that's opened, used, and closed\n        for _ in 0..10 {\n            let mut cache = Cache::<TestTypes>::default();\n            let handle = {\n                let handle = cache\n                    .get(timeline_id, ShardSelector::Page(key), &mgr)\n                    .await\n                    .expect(\"we have the timeline\");\n                assert!(Weak::ptr_eq(&handle.myself, &shard0.myself));\n                handle\n            };\n            handle.getpage();\n        }\n\n        // No handles exist, thus gates are closed and don't require shutdown.\n        // Thus the gate should close immediately, even without shutdown.\n        tokio::select! {\n            _ = shard0.gate.close() => { }\n            _ = tokio::time::sleep(FOREVER) => {\n                panic!(\"handle is dropped, no other gate holders exist\")\n            }\n        }\n    }\n\n    #[tokio::test(start_paused = true)]\n    async fn test_weak_handles() {\n        crate::tenant::harness::setup_logging();\n        let timeline_id = TimelineId::generate();\n        let shard0 = Arc::new_cyclic(|myself| StubTimeline {\n            gate: Default::default(),\n            id: timeline_id,\n            shard: ShardIdentity::unsharded(),\n            per_timeline_state: PerTimelineState::default(),\n            myself: myself.clone(),\n        });\n        let mgr = StubManager {\n            shards: vec![shard0.clone()],\n        };\n\n        let refcount_start = Arc::strong_count(&shard0);\n\n        let key = DBDIR_KEY;\n\n        let mut cache = Cache::<TestTypes>::default();\n\n        let handle = cache\n            .get(timeline_id, ShardSelector::Page(key), &mgr)\n            .await\n            .expect(\"we have the timeline\");\n        assert!(Weak::ptr_eq(&handle.myself, &shard0.myself));\n\n        let weak_handle = handle.downgrade();\n\n        drop(handle);\n\n        let upgraded_handle = weak_handle.upgrade().ok().expect(\"we can upgrade it\");\n\n        // Start shutdown\n        shard0.per_timeline_state.shutdown();\n\n        // Upgrades during shutdown don't work, even if upgraded_handle exists.\n        weak_handle\n            .upgrade()\n            .err()\n            .expect(\"can't upgrade weak handle as soon as shutdown started\");\n\n        // But upgraded_handle is still alive, so the gate won't close.\n        tokio::select! {\n            _ = shard0.gate.close() => {\n                panic!(\"handle is keeping gate open\");\n            }\n            _ = tokio::time::sleep(FOREVER) => { }\n        }\n\n        // Drop the last handle.\n        drop(upgraded_handle);\n\n        // The gate should close now, despite there still being a weak_handle.\n        tokio::select! {\n            _ = shard0.gate.close() => { }\n            _ = tokio::time::sleep(FOREVER) => {\n                panic!(\"only strong handle is dropped and we shut down per-timeline-state\")\n            }\n        }\n\n        // The weak handle still can't be upgraded.\n        weak_handle\n            .upgrade()\n            .err()\n            .expect(\"still shouldn't be able to upgrade the weak handle\");\n\n        // There should be no strong references to the timeline object except the one on \"stack\".\n        assert_eq!(Arc::strong_count(&shard0), refcount_start);\n    }\n\n    #[tokio::test(start_paused = true)]\n    async fn test_reference_cycle_broken_when_cache_is_dropped() {\n        crate::tenant::harness::setup_logging();\n        let timeline_id = TimelineId::generate();\n        let shard0 = Arc::new_cyclic(|myself| StubTimeline {\n            gate: Default::default(),\n            id: timeline_id,\n            shard: ShardIdentity::unsharded(),\n            per_timeline_state: PerTimelineState::default(),\n            myself: myself.clone(),\n        });\n        let mgr = StubManager {\n            shards: vec![shard0.clone()],\n        };\n        let key = DBDIR_KEY;\n\n        let mut cache = Cache::<TestTypes>::default();\n\n        // helper to check if a handle is referenced by per_timeline_state\n        let per_timeline_state_refs_handle = |handle_weak: &Weak<Mutex<HandleInner<_>>>| {\n            let per_timeline_state = shard0.per_timeline_state.handles.lock().unwrap();\n            let per_timeline_state = per_timeline_state.as_ref().unwrap();\n            per_timeline_state\n                .values()\n                .any(|v| Weak::ptr_eq(&Arc::downgrade(v), handle_weak))\n        };\n\n        // Fill the cache.\n        let handle = cache\n            .get(timeline_id, ShardSelector::Page(key), &mgr)\n            .await\n            .expect(\"we have the timeline\");\n        assert!(Weak::ptr_eq(&handle.myself, &shard0.myself));\n        let handle_inner_weak = Arc::downgrade(&handle.inner);\n        assert!(\n            per_timeline_state_refs_handle(&handle_inner_weak),\n            \"we still hold `handle` _and_ haven't dropped `cache` yet\"\n        );\n\n        // Drop the cache.\n        drop(cache);\n\n        assert!(\n            !(per_timeline_state_refs_handle(&handle_inner_weak)),\n            \"nothing should reference the handle allocation anymore\"\n        );\n        assert!(\n            Weak::upgrade(&handle_inner_weak).is_some(),\n            \"the local `handle` still keeps the allocation alive\"\n        );\n        // but obviously the cache is gone so no new allocations can be handed out.\n\n        // Drop handle.\n        drop(handle);\n        assert!(\n            Weak::upgrade(&handle_inner_weak).is_none(),\n            \"the local `handle` is dropped, so the allocation should be dropped by now\"\n        );\n    }\n\n    #[tokio::test(start_paused = true)]\n    async fn test_reference_cycle_broken_when_per_timeline_state_shutdown() {\n        crate::tenant::harness::setup_logging();\n        let timeline_id = TimelineId::generate();\n        let shard0 = Arc::new_cyclic(|myself| StubTimeline {\n            gate: Default::default(),\n            id: timeline_id,\n            shard: ShardIdentity::unsharded(),\n            per_timeline_state: PerTimelineState::default(),\n            myself: myself.clone(),\n        });\n        let mgr = StubManager {\n            shards: vec![shard0.clone()],\n        };\n        let key = DBDIR_KEY;\n\n        let mut cache = Cache::<TestTypes>::default();\n        let handle = cache\n            .get(timeline_id, ShardSelector::Page(key), &mgr)\n            .await\n            .expect(\"we have the timeline\");\n        // grab a weak reference to the inner so can later try to Weak::upgrade it and assert that fails\n        let handle_inner_weak = Arc::downgrade(&handle.inner);\n\n        // drop the handle, obviously the lifetime of `inner` is at least as long as each strong reference to it\n        drop(handle);\n        assert!(Weak::upgrade(&handle_inner_weak).is_some(), \"can still\");\n\n        // Shutdown the per_timeline_state.\n        shard0.per_timeline_state.shutdown();\n        assert!(Weak::upgrade(&handle_inner_weak).is_none(), \"can no longer\");\n\n        // cache only contains Weak's, so, it can outlive the per_timeline_state without\n        // Drop explicitly solely to make this point.\n        drop(cache);\n    }\n}\n"
  },
  {
    "path": "pageserver/src/tenant/timeline/heatmap_layers_downloader.rs",
    "content": "//! Timeline utility module to hydrate everything from the current heatmap.\n//!\n//! Provides utilities to spawn and abort a background task where the downloads happen.\n//! See /v1/tenant/:tenant_shard_id/timeline/:timeline_id/download_heatmap_layers.\n\nuse std::sync::{Arc, Mutex};\n\nuse futures::StreamExt;\nuse http_utils::error::ApiError;\nuse tokio_util::sync::CancellationToken;\nuse utils::sync::gate::Gate;\n\nuse crate::context::RequestContext;\n\nuse super::Timeline;\n\n// This status is not strictly necessary now, but gives us a nice place\n// to store progress information if we ever wish to expose it.\npub(super) enum HeatmapLayersDownloadStatus {\n    InProgress,\n    Complete,\n}\n\npub(super) struct HeatmapLayersDownloader {\n    handle: tokio::task::JoinHandle<()>,\n    status: Arc<Mutex<HeatmapLayersDownloadStatus>>,\n    cancel: CancellationToken,\n    downloads_guard: Arc<Gate>,\n}\n\nimpl HeatmapLayersDownloader {\n    fn new(\n        timeline: Arc<Timeline>,\n        concurrency: usize,\n        recurse: bool,\n        ctx: RequestContext,\n    ) -> Result<HeatmapLayersDownloader, ApiError> {\n        let tl_guard = timeline.gate.enter().map_err(|_| ApiError::Cancelled)?;\n\n        let cancel = timeline.cancel.child_token();\n        let downloads_guard = Arc::new(Gate::default());\n\n        let status = Arc::new(Mutex::new(HeatmapLayersDownloadStatus::InProgress));\n\n        let handle = tokio::task::spawn({\n            let status = status.clone();\n            let downloads_guard = downloads_guard.clone();\n            let cancel = cancel.clone();\n\n            async move {\n                let _guard = tl_guard;\n\n                scopeguard::defer! {\n                    *status.lock().unwrap() = HeatmapLayersDownloadStatus::Complete;\n                }\n\n                let Some(heatmap) = timeline.generate_heatmap().await else {\n                    tracing::info!(\"Heatmap layers download failed to generate heatmap\");\n                    return;\n                };\n\n                tracing::info!(\n                    resident_size=%timeline.resident_physical_size(),\n                    heatmap_layers=%heatmap.all_layers().count(),\n                    \"Starting heatmap layers download\"\n                );\n\n                let stream = futures::stream::iter(heatmap.all_layers().cloned().filter_map(\n                    |layer| {\n                        let ctx = ctx.attached_child();\n                        let tl = timeline.clone();\n                        let dl_guard = match downloads_guard.enter() {\n                            Ok(g) => g,\n                            Err(_) => {\n                                // [`Self::shutdown`] was called. Don't spawn any more downloads.\n                                return None;\n                            }\n                        };\n\n                        Some(async move {\n                            let _dl_guard = dl_guard;\n\n                            let res = tl.download_layer(&layer.name, &ctx).await;\n                            if let Err(err) = res {\n                                if !err.is_cancelled() {\n                                    tracing::warn!(layer=%layer.name,\"Failed to download heatmap layer: {err}\")\n                                }\n                            }\n                        })\n                    }\n                )).buffered(concurrency);\n\n                tokio::select! {\n                    _ = stream.collect::<()>() => {\n                        tracing::info!(\n                            resident_size=%timeline.resident_physical_size(),\n                            \"Heatmap layers download completed\"\n                        );\n                    },\n                    _ = cancel.cancelled() => {\n                        tracing::info!(\"Heatmap layers download cancelled\");\n                        return;\n                    }\n                }\n\n                if recurse {\n                    if let Some(ancestor) = timeline.ancestor_timeline() {\n                        let ctx = ctx.attached_child();\n                        let res =\n                            ancestor.start_heatmap_layers_download(concurrency, recurse, &ctx);\n                        if let Err(err) = res {\n                            tracing::info!(\n                                \"Failed to start heatmap layers download for ancestor: {err}\"\n                            );\n                        }\n                    }\n                }\n            }\n        });\n\n        Ok(Self {\n            status,\n            handle,\n            cancel,\n            downloads_guard,\n        })\n    }\n\n    fn is_complete(&self) -> bool {\n        matches!(\n            *self.status.lock().unwrap(),\n            HeatmapLayersDownloadStatus::Complete\n        )\n    }\n\n    /// Drive any in-progress downloads to completion and stop spawning any new ones.\n    ///\n    /// This has two callers and they behave differently\n    /// 1. [`Timeline::shutdown`]: the drain will be immediate since downloads themselves\n    ///    are sensitive to timeline cancellation.\n    ///\n    /// 2. Endpoint handler in [`crate::http::routes`]: the drain will wait for any in-progress\n    ///    downloads to complete.\n    async fn stop_and_drain(self) {\n        // Counterintuitive: close the guard before cancelling.\n        // Something needs to poll the already created download futures to completion.\n        // If we cancel first, then the underlying task exits and we lost\n        // the poller.\n        self.downloads_guard.close().await;\n        self.cancel.cancel();\n        if let Err(err) = self.handle.await {\n            tracing::warn!(\"Failed to join heatmap layer downloader task: {err}\");\n        }\n    }\n}\n\nimpl Timeline {\n    pub(crate) fn start_heatmap_layers_download(\n        self: &Arc<Self>,\n        concurrency: usize,\n        recurse: bool,\n        ctx: &RequestContext,\n    ) -> Result<(), ApiError> {\n        let mut locked = self.heatmap_layers_downloader.lock().unwrap();\n        if locked.as_ref().map(|dl| dl.is_complete()).unwrap_or(true) {\n            let dl = HeatmapLayersDownloader::new(\n                self.clone(),\n                concurrency,\n                recurse,\n                ctx.attached_child(),\n            )?;\n            *locked = Some(dl);\n            Ok(())\n        } else {\n            Err(ApiError::Conflict(\"Already running\".to_string()))\n        }\n    }\n\n    pub(crate) async fn stop_and_drain_heatmap_layers_download(&self) {\n        // This can race with the start of a new downloader and lead to a situation\n        // where one donloader is shutting down and another one is in-flight.\n        // The only impact is that we'd end up using more remote storage semaphore\n        // units than expected.\n        let downloader = self.heatmap_layers_downloader.lock().unwrap().take();\n        if let Some(dl) = downloader {\n            dl.stop_and_drain().await;\n        }\n    }\n}\n"
  },
  {
    "path": "pageserver/src/tenant/timeline/import_pgdata/flow.rs",
    "content": "//! Import a PGDATA directory into an empty root timeline.\n//!\n//! This module is adapted hackathon code by Heikki and Stas.\n//! Other code in the parent module was written by Christian as part of a customer PoC.\n//!\n//! The hackathon code was producing image layer files as a free-standing program.\n//!\n//! It has been modified to\n//! - run inside a running Pageserver, within the proper lifecycles of Timeline -> Tenant(Shard)\n//! - => sharding-awareness: produce image layers with only the data relevant for this shard\n//! - => S3 as the source for the PGDATA instead of local filesystem\n//!\n//! TODOs before productionization:\n//! - ChunkProcessingJob should cut up an ImportJob to hit exactly target image layer size.\n//!\n//! An incomplete set of TODOs from the Hackathon:\n//! - version-specific CheckPointData (=> pgv abstraction, already exists for regular walingest)\n\nuse std::collections::HashSet;\nuse std::hash::{Hash, Hasher};\nuse std::num::NonZeroUsize;\nuse std::ops::Range;\nuse std::sync::Arc;\n\nuse anyhow::ensure;\nuse bytes::Bytes;\nuse futures::stream::FuturesOrdered;\nuse itertools::Itertools;\nuse pageserver_api::config::TimelineImportConfig;\nuse pageserver_api::key::{\n    CHECKPOINT_KEY, CONTROLFILE_KEY, DBDIR_KEY, Key, TWOPHASEDIR_KEY, rel_block_to_key,\n    rel_dir_to_key, rel_size_to_key, relmap_file_key, slru_block_to_key, slru_dir_to_key,\n    slru_segment_size_to_key,\n};\nuse pageserver_api::keyspace::{ShardedRange, singleton_range};\nuse pageserver_api::models::{ShardImportProgress, ShardImportProgressV1, ShardImportStatus};\nuse pageserver_api::reltag::{RelTag, SlruKind};\nuse pageserver_api::shard::ShardIdentity;\nuse postgres_ffi::BLCKSZ;\nuse postgres_ffi::relfile_utils::parse_relfilename;\nuse remote_storage::RemotePath;\nuse tokio::sync::Semaphore;\nuse tokio_stream::StreamExt;\nuse tracing::{debug, instrument};\nuse utils::bin_ser::BeSer;\nuse utils::lsn::Lsn;\nuse utils::pausable_failpoint;\n\nuse super::Timeline;\nuse super::importbucket_client::{ControlFile, RemoteStorageWrapper};\nuse crate::assert_u64_eq_usize::UsizeIsU64;\nuse crate::context::{DownloadBehavior, RequestContext};\nuse crate::controller_upcall_client::{StorageControllerUpcallApi, StorageControllerUpcallClient};\nuse crate::pgdatadir_mapping::{\n    DbDirectory, RelDirectory, SlruSegmentDirectory, TwoPhaseDirectory,\n};\nuse crate::task_mgr::TaskKind;\nuse crate::tenant::storage_layer::{AsLayerDesc, ImageLayerWriter, Layer};\nuse crate::tenant::timeline::layer_manager::LayerManagerLockHolder;\n\npub async fn run(\n    timeline: Arc<Timeline>,\n    control_file: ControlFile,\n    storage: RemoteStorageWrapper,\n    import_progress: Option<ShardImportProgress>,\n    ctx: &RequestContext,\n) -> anyhow::Result<()> {\n    // Match how we run the import based on the progress version.\n    // If there's no import progress, it means that this is a new import\n    // and we can use whichever version we want.\n    match import_progress {\n        Some(ShardImportProgress::V1(progress)) => {\n            run_v1(timeline, control_file, storage, Some(progress), ctx).await\n        }\n        None => run_v1(timeline, control_file, storage, None, ctx).await,\n    }\n}\n\nasync fn run_v1(\n    timeline: Arc<Timeline>,\n    control_file: ControlFile,\n    storage: RemoteStorageWrapper,\n    import_progress: Option<ShardImportProgressV1>,\n    ctx: &RequestContext,\n) -> anyhow::Result<()> {\n    let planner = Planner {\n        control_file,\n        storage: storage.clone(),\n        shard: timeline.shard_identity,\n        tasks: Vec::default(),\n    };\n\n    // Use the job size limit encoded in the progress if we are resuming an import.\n    // This ensures that imports have stable plans even if the pageserver config changes.\n    let import_config = {\n        match &import_progress {\n            Some(progress) => {\n                let base = &timeline.conf.timeline_import_config;\n                TimelineImportConfig {\n                    import_job_soft_size_limit: NonZeroUsize::new(progress.job_soft_size_limit)\n                        .unwrap(),\n                    import_job_concurrency: base.import_job_concurrency,\n                    import_job_checkpoint_threshold: base.import_job_checkpoint_threshold,\n                    import_job_max_byte_range_size: base.import_job_max_byte_range_size,\n                }\n            }\n            None => timeline.conf.timeline_import_config.clone(),\n        }\n    };\n\n    let plan = planner.plan(&import_config).await?;\n\n    // Hash the plan and compare with the hash of the plan we got back from the storage controller.\n    // If the two match, it means that the planning stage had the same output.\n    //\n    // This is not intended to be a cryptographically secure hash.\n    const SEED: u64 = 42;\n    let mut hasher = twox_hash::XxHash64::with_seed(SEED);\n    plan.hash(&mut hasher);\n    let plan_hash = hasher.finish();\n\n    if let Some(progress) = &import_progress {\n        // Handle collisions on jobs of unequal length\n        if progress.jobs != plan.jobs.len() {\n            anyhow::bail!(\"Import plan job length does not match storcon metadata\")\n        }\n\n        if plan_hash != progress.import_plan_hash {\n            anyhow::bail!(\"Import plan does not match storcon metadata\");\n        }\n    }\n\n    pausable_failpoint!(\"import-timeline-pre-execute-pausable\");\n\n    let jobs_count = import_progress.as_ref().map(|p| p.jobs);\n    let start_from_job_idx = import_progress.map(|progress| progress.completed);\n\n    tracing::info!(\n        start_from_job_idx=?start_from_job_idx,\n        jobs=?jobs_count,\n        \"Executing import plan\"\n    );\n\n    plan.execute(timeline, start_from_job_idx, plan_hash, &import_config, ctx)\n        .await\n}\n\nstruct Planner {\n    control_file: ControlFile,\n    storage: RemoteStorageWrapper,\n    shard: ShardIdentity,\n    tasks: Vec<AnyImportTask>,\n}\n\n#[derive(Hash)]\nstruct Plan {\n    jobs: Vec<ChunkProcessingJob>,\n    // Included here such that it ends up in the hash for the plan\n    shard: ShardIdentity,\n}\n\nimpl Planner {\n    /// Creates an import plan\n    ///\n    /// This function is and must remain pure: given the same input, it will generate the same import plan.\n    async fn plan(mut self, import_config: &TimelineImportConfig) -> anyhow::Result<Plan> {\n        let pgdata_lsn = Lsn(self.control_file.control_file_data().checkPoint).align();\n        anyhow::ensure!(pgdata_lsn.is_valid());\n\n        let datadir = PgDataDir::new(&self.storage).await?;\n\n        // Import dbdir (00:00:00 keyspace)\n        // This is just constructed here, but will be written to the image layer in the first call to import_db()\n        let dbdir_buf = Bytes::from(DbDirectory::ser(&DbDirectory {\n            dbdirs: datadir\n                .dbs\n                .iter()\n                .map(|db| ((db.spcnode, db.dboid), true))\n                .collect(),\n        })?);\n        self.tasks\n            .push(ImportSingleKeyTask::new(DBDIR_KEY, dbdir_buf).into());\n\n        // Import databases (00:spcnode:dbnode keyspace for each db)\n        for db in datadir.dbs {\n            self.import_db(&db).await?;\n        }\n\n        // Import SLRUs\n        if self.shard.is_shard_zero() {\n            // pg_xact (01:00 keyspace)\n            self.import_slru(SlruKind::Clog, &self.storage.pgdata().join(\"pg_xact\"))\n                .await?;\n            // pg_multixact/members (01:01 keyspace)\n            self.import_slru(\n                SlruKind::MultiXactMembers,\n                &self.storage.pgdata().join(\"pg_multixact/members\"),\n            )\n            .await?;\n            // pg_multixact/offsets (01:02 keyspace)\n            self.import_slru(\n                SlruKind::MultiXactOffsets,\n                &self.storage.pgdata().join(\"pg_multixact/offsets\"),\n            )\n            .await?;\n        }\n\n        // Import pg_twophase.\n        // TODO: as empty\n        let twophasedir_buf = TwoPhaseDirectory::ser(&TwoPhaseDirectory {\n            xids: HashSet::new(),\n        })?;\n        self.tasks\n            .push(AnyImportTask::SingleKey(ImportSingleKeyTask::new(\n                TWOPHASEDIR_KEY,\n                Bytes::from(twophasedir_buf),\n            )));\n\n        // Controlfile, checkpoint\n        self.tasks\n            .push(AnyImportTask::SingleKey(ImportSingleKeyTask::new(\n                CONTROLFILE_KEY,\n                self.control_file.control_file_buf().clone(),\n            )));\n\n        let checkpoint_buf = self\n            .control_file\n            .control_file_data()\n            .checkPointCopy\n            .encode()?;\n        self.tasks\n            .push(AnyImportTask::SingleKey(ImportSingleKeyTask::new(\n                CHECKPOINT_KEY,\n                checkpoint_buf,\n            )));\n\n        // Sort the tasks by the key ranges they handle.\n        // The plan being generated here needs to be stable across invocations\n        // of this method.\n        self.tasks.sort_by_key(|task| match task {\n            AnyImportTask::SingleKey(key) => (key.key, key.key.next()),\n            AnyImportTask::RelBlocks(rel_blocks) => {\n                (rel_blocks.key_range.start, rel_blocks.key_range.end)\n            }\n            AnyImportTask::SlruBlocks(slru_blocks) => {\n                (slru_blocks.key_range.start, slru_blocks.key_range.end)\n            }\n        });\n\n        // Assigns parts of key space to later parallel jobs\n        // Note: The image layers produced here may have gaps, meaning,\n        //       there is not an image for each key in the layer's key range.\n        //       The read path stops traversal at the first image layer, regardless\n        //       of whether a base image has been found for a key or not.\n        //       (Concept of sparse image layers doesn't exist.)\n        //       This behavior is exactly right for the base image layers we're producing here.\n        //       But, since no other place in the code currently produces image layers with gaps,\n        //       it seems noteworthy.\n        let mut last_end_key = Key::MIN;\n        let mut current_chunk = Vec::new();\n        let mut current_chunk_size: usize = 0;\n        let mut jobs = Vec::new();\n        for task in std::mem::take(&mut self.tasks).into_iter() {\n            let task_size = task.total_size(&self.shard);\n            let projected_chunk_size = current_chunk_size.saturating_add(task_size);\n            if projected_chunk_size > import_config.import_job_soft_size_limit.into() {\n                let key_range = last_end_key..task.key_range().start;\n                jobs.push(ChunkProcessingJob::new(\n                    key_range.clone(),\n                    std::mem::take(&mut current_chunk),\n                    pgdata_lsn,\n                ));\n                last_end_key = key_range.end;\n                current_chunk_size = 0;\n            }\n            current_chunk_size = current_chunk_size.saturating_add(task_size);\n            current_chunk.push(task);\n        }\n        jobs.push(ChunkProcessingJob::new(\n            last_end_key..Key::MAX,\n            current_chunk,\n            pgdata_lsn,\n        ));\n\n        Ok(Plan {\n            jobs,\n            shard: self.shard,\n        })\n    }\n\n    #[instrument(level = tracing::Level::DEBUG, skip_all, fields(dboid=%db.dboid, tablespace=%db.spcnode, path=%db.path))]\n    async fn import_db(&mut self, db: &PgDataDirDb) -> anyhow::Result<()> {\n        debug!(\"start\");\n        scopeguard::defer! {\n            debug!(\"return\");\n        }\n\n        // Import relmap (00:spcnode:dbnode:00:*:00)\n        let relmap_key = relmap_file_key(db.spcnode, db.dboid);\n        debug!(\"Constructing relmap entry, key {relmap_key}\");\n        let relmap_path = db.path.join(\"pg_filenode.map\");\n        let relmap_buf = self.storage.get(&relmap_path).await?;\n        self.tasks\n            .push(AnyImportTask::SingleKey(ImportSingleKeyTask::new(\n                relmap_key, relmap_buf,\n            )));\n\n        // Import reldir (00:spcnode:dbnode:00:*:01)\n        let reldir_key = rel_dir_to_key(db.spcnode, db.dboid);\n        debug!(\"Constructing reldirs entry, key {reldir_key}\");\n        let reldir_buf = RelDirectory::ser(&RelDirectory {\n            rels: db\n                .files\n                .iter()\n                .map(|f| (f.rel_tag.relnode, f.rel_tag.forknum))\n                .collect(),\n        })?;\n        self.tasks\n            .push(AnyImportTask::SingleKey(ImportSingleKeyTask::new(\n                reldir_key,\n                Bytes::from(reldir_buf),\n            )));\n\n        // Import data (00:spcnode:dbnode:reloid:fork:blk) and set sizes for each last\n        // segment in a given relation (00:spcnode:dbnode:reloid:fork:ff)\n        for file in &db.files {\n            debug!(%file.path, %file.filesize, \"importing file\");\n            let len = file.filesize;\n            ensure!(len % 8192 == 0);\n            let start_blk: u32 = file.segno * (1024 * 1024 * 1024 / 8192);\n            let start_key = rel_block_to_key(file.rel_tag, start_blk);\n            let end_key = rel_block_to_key(file.rel_tag, start_blk + (len / 8192) as u32);\n            self.tasks\n                .push(AnyImportTask::RelBlocks(ImportRelBlocksTask::new(\n                    self.shard,\n                    start_key..end_key,\n                    &file.path,\n                    self.storage.clone(),\n                )));\n\n            // Set relsize for the last segment (00:spcnode:dbnode:reloid:fork:ff)\n            if let Some(nblocks) = file.nblocks {\n                let size_key = rel_size_to_key(file.rel_tag);\n                //debug!(\"Setting relation size (path={path}, rel_tag={rel_tag}, segno={segno}) to {nblocks}, key {size_key}\");\n                let buf = nblocks.to_le_bytes();\n                self.tasks\n                    .push(AnyImportTask::SingleKey(ImportSingleKeyTask::new(\n                        size_key,\n                        Bytes::from(buf.to_vec()),\n                    )));\n            }\n        }\n\n        Ok(())\n    }\n\n    async fn import_slru(&mut self, kind: SlruKind, path: &RemotePath) -> anyhow::Result<()> {\n        assert!(self.shard.is_shard_zero());\n\n        let segments = self.storage.listfilesindir(path).await?;\n        let segments: Vec<(String, u32, usize)> = segments\n            .into_iter()\n            .filter_map(|(path, size)| {\n                let filename = path.object_name()?;\n                let segno = u32::from_str_radix(filename, 16).ok()?;\n                Some((filename.to_string(), segno, size))\n            })\n            .collect();\n\n        // Write SlruDir\n        let slrudir_key = slru_dir_to_key(kind);\n        let segnos: HashSet<u32> = segments\n            .iter()\n            .map(|(_path, segno, _size)| *segno)\n            .collect();\n        let slrudir = SlruSegmentDirectory { segments: segnos };\n        let slrudir_buf = SlruSegmentDirectory::ser(&slrudir)?;\n        self.tasks\n            .push(AnyImportTask::SingleKey(ImportSingleKeyTask::new(\n                slrudir_key,\n                Bytes::from(slrudir_buf),\n            )));\n\n        for (segpath, segno, size) in segments {\n            // SlruSegBlocks for each segment\n            let p = path.join(&segpath);\n            let file_size = size;\n            ensure!(file_size % 8192 == 0);\n            let nblocks = u32::try_from(file_size / 8192)?;\n            let start_key = slru_block_to_key(kind, segno, 0);\n            let end_key = slru_block_to_key(kind, segno, nblocks);\n            debug!(%p, segno=%segno, %size, %start_key, %end_key, \"scheduling SLRU segment\");\n            self.tasks\n                .push(AnyImportTask::SlruBlocks(ImportSlruBlocksTask::new(\n                    start_key..end_key,\n                    &p,\n                    self.storage.clone(),\n                )));\n\n            // Followed by SlruSegSize\n            let segsize_key = slru_segment_size_to_key(kind, segno);\n            let segsize_buf = nblocks.to_le_bytes();\n            self.tasks\n                .push(AnyImportTask::SingleKey(ImportSingleKeyTask::new(\n                    segsize_key,\n                    Bytes::copy_from_slice(&segsize_buf),\n                )));\n        }\n        Ok(())\n    }\n}\n\nimpl Plan {\n    async fn execute(\n        self,\n        timeline: Arc<Timeline>,\n        start_after_job_idx: Option<usize>,\n        import_plan_hash: u64,\n        import_config: &TimelineImportConfig,\n        ctx: &RequestContext,\n    ) -> anyhow::Result<()> {\n        let storcon_client = StorageControllerUpcallClient::new(timeline.conf, &timeline.cancel);\n\n        let mut work = FuturesOrdered::new();\n        let semaphore = Arc::new(Semaphore::new(import_config.import_job_concurrency.into()));\n\n        let jobs_in_plan = self.jobs.len();\n\n        let mut jobs = self\n            .jobs\n            .into_iter()\n            .enumerate()\n            .map(|(idx, job)| (idx + 1, job))\n            .filter(|(idx, _job)| {\n                // Filter out any jobs that have been done already\n                if let Some(start_after) = start_after_job_idx {\n                    *idx > start_after\n                } else {\n                    true\n                }\n            })\n            .peekable();\n\n        let mut last_completed_job_idx = start_after_job_idx.unwrap_or(0);\n        let checkpoint_every: usize = import_config.import_job_checkpoint_threshold.into();\n        let max_byte_range_size: usize = import_config.import_job_max_byte_range_size.into();\n\n        // Run import jobs concurrently up to the limit specified by the pageserver configuration.\n        // Note that we process completed futures in the oreder of insertion. This will be the\n        // building block for resuming imports across pageserver restarts or tenant migrations.\n        while last_completed_job_idx < jobs_in_plan {\n            tokio::select! {\n                permit = semaphore.clone().acquire_owned(), if jobs.peek().is_some() => {\n                    let permit = permit.expect(\"never closed\");\n                    let (job_idx, job) = jobs.next().expect(\"we peeked\");\n\n                    let job_timeline = timeline.clone();\n                    let ctx = ctx.detached_child(TaskKind::ImportPgdata, DownloadBehavior::Error);\n\n                    work.push_back(tokio::task::spawn(async move {\n                        let _permit = permit;\n                        let res = job.run(job_timeline, max_byte_range_size, &ctx).await;\n                        (job_idx, res)\n                    }));\n                },\n                maybe_complete_job_idx = work.next() => {\n                    pausable_failpoint!(\"import-task-complete-pausable\");\n\n                    match maybe_complete_job_idx {\n                        Some(Ok((job_idx, res))) => {\n                            assert!(last_completed_job_idx.checked_add(1).unwrap() == job_idx);\n\n                            res?;\n                            last_completed_job_idx = job_idx;\n\n                            if last_completed_job_idx % checkpoint_every == 0 {\n                                tracing::info!(last_completed_job_idx, jobs=%jobs_in_plan, \"Checkpointing import status\");\n\n                                let progress = ShardImportProgressV1 {\n                                    jobs: jobs_in_plan,\n                                    completed: last_completed_job_idx,\n                                    import_plan_hash,\n                                    job_soft_size_limit: import_config.import_job_soft_size_limit.into(),\n                                };\n\n                                timeline.remote_client.schedule_index_upload_for_file_changes()?;\n                                timeline.remote_client.wait_completion().await?;\n\n                                storcon_client.put_timeline_import_status(\n                                    timeline.tenant_shard_id,\n                                    timeline.timeline_id,\n                                    timeline.generation,\n                                    ShardImportStatus::InProgress(Some(ShardImportProgress::V1(progress)))\n                                )\n                                .await\n                                .map_err(|_err| {\n                                    anyhow::anyhow!(\"Shut down while putting timeline import status\")\n                                })?;\n                            }\n                        },\n                        Some(Err(_)) => {\n                            anyhow::bail!(\n                                \"import job panicked or cancelled\"\n                            );\n                        }\n                        None => {}\n                    }\n                }\n            }\n        }\n\n        Ok(())\n    }\n}\n\n//\n// dbdir iteration tools\n//\n\nstruct PgDataDir {\n    pub dbs: Vec<PgDataDirDb>, // spcnode, dboid, path\n}\n\nstruct PgDataDirDb {\n    pub spcnode: u32,\n    pub dboid: u32,\n    pub path: RemotePath,\n    pub files: Vec<PgDataDirDbFile>,\n}\n\nstruct PgDataDirDbFile {\n    pub path: RemotePath,\n    pub rel_tag: RelTag,\n    pub segno: u32,\n    pub filesize: usize,\n    // Cummulative size of the given fork, set only for the last segment of that fork\n    pub nblocks: Option<usize>,\n}\n\nimpl PgDataDir {\n    async fn new(storage: &RemoteStorageWrapper) -> anyhow::Result<Self> {\n        let datadir_path = storage.pgdata();\n        // Import ordinary databases, DEFAULTTABLESPACE_OID is smaller than GLOBALTABLESPACE_OID, so import them first\n        // Traverse database in increasing oid order\n\n        let basedir = &datadir_path.join(\"base\");\n        let db_oids: Vec<_> = storage\n            .listdir(basedir)\n            .await?\n            .into_iter()\n            .filter_map(|path| path.object_name().and_then(|name| name.parse::<u32>().ok()))\n            .sorted()\n            .collect();\n        debug!(?db_oids, \"found databases\");\n        let mut databases = Vec::new();\n        for dboid in db_oids {\n            databases.push(\n                PgDataDirDb::new(\n                    storage,\n                    &basedir.join(dboid.to_string()),\n                    postgres_ffi_types::constants::DEFAULTTABLESPACE_OID,\n                    dboid,\n                    &datadir_path,\n                )\n                .await?,\n            );\n        }\n\n        // special case for global catalogs\n        databases.push(\n            PgDataDirDb::new(\n                storage,\n                &datadir_path.join(\"global\"),\n                postgres_ffi_types::constants::GLOBALTABLESPACE_OID,\n                0,\n                &datadir_path,\n            )\n            .await?,\n        );\n\n        databases.sort_by_key(|db| (db.spcnode, db.dboid));\n\n        Ok(Self { dbs: databases })\n    }\n}\n\nimpl PgDataDirDb {\n    #[instrument(level = tracing::Level::DEBUG, skip_all, fields(%dboid, %db_path))]\n    async fn new(\n        storage: &RemoteStorageWrapper,\n        db_path: &RemotePath,\n        spcnode: u32,\n        dboid: u32,\n        datadir_path: &RemotePath,\n    ) -> anyhow::Result<Self> {\n        let mut files: Vec<PgDataDirDbFile> = storage\n            .listfilesindir(db_path)\n            .await?\n            .into_iter()\n            .filter_map(|(path, size)| {\n                debug!(%path, %size, \"found file in dbdir\");\n                path.object_name().and_then(|name| {\n                    // returns (relnode, forknum, segno)\n                    parse_relfilename(name).ok().map(|x| (size, x))\n                })\n            })\n            .sorted_by_key(|(_, relfilename)| *relfilename)\n            .map(|(filesize, (relnode, forknum, segno))| {\n                let rel_tag = RelTag {\n                    spcnode,\n                    dbnode: dboid,\n                    relnode,\n                    forknum,\n                };\n\n                let path = datadir_path.join(rel_tag.to_segfile_name(segno));\n                anyhow::ensure!(filesize % BLCKSZ as usize == 0);\n                let nblocks = filesize / BLCKSZ as usize;\n\n                Ok(PgDataDirDbFile {\n                    path,\n                    filesize,\n                    rel_tag,\n                    segno,\n                    nblocks: Some(nblocks), // first non-cummulative sizes\n                })\n            })\n            .collect::<anyhow::Result<_, _>>()?;\n\n        // Set cummulative sizes. Do all of that math here, so that later we could easier\n        // parallelize over segments and know with which segments we need to write relsize\n        // entry.\n        let mut cumulative_nblocks: usize = 0;\n        let mut prev_rel_tag: Option<RelTag> = None;\n        for i in 0..files.len() {\n            if prev_rel_tag == Some(files[i].rel_tag) {\n                cumulative_nblocks += files[i].nblocks.unwrap();\n            } else {\n                cumulative_nblocks = files[i].nblocks.unwrap();\n            }\n\n            files[i].nblocks = if i == files.len() - 1 || files[i + 1].rel_tag != files[i].rel_tag {\n                Some(cumulative_nblocks)\n            } else {\n                None\n            };\n\n            prev_rel_tag = Some(files[i].rel_tag);\n        }\n\n        Ok(PgDataDirDb {\n            files,\n            path: db_path.clone(),\n            spcnode,\n            dboid,\n        })\n    }\n}\n\ntrait ImportTask {\n    fn key_range(&self) -> Range<Key>;\n\n    fn total_size(&self, shard_identity: &ShardIdentity) -> usize {\n        let range = ShardedRange::new(self.key_range(), shard_identity);\n        let page_count = range.page_count();\n        if page_count == u32::MAX {\n            tracing::warn!(\n                \"Import task has non contiguous key range: {}..{}\",\n                self.key_range().start,\n                self.key_range().end\n            );\n\n            // Tasks should operate on contiguous ranges. It is unexpected for\n            // ranges to violate this assumption. Calling code handles this by mapping\n            // any task on a non contiguous range to its own image layer.\n            usize::MAX\n        } else {\n            page_count as usize * 8192\n        }\n    }\n\n    async fn doit(\n        self,\n        layer_writer: &mut ImageLayerWriter,\n        max_byte_range_size: usize,\n        ctx: &RequestContext,\n    ) -> anyhow::Result<usize>;\n}\n\nstruct ImportSingleKeyTask {\n    key: Key,\n    buf: Bytes,\n}\n\nimpl Hash for ImportSingleKeyTask {\n    fn hash<H: Hasher>(&self, state: &mut H) {\n        let ImportSingleKeyTask { key, buf } = self;\n\n        key.hash(state);\n        // The key value might not have a stable binary representation.\n        // For instance, the db directory uses an unstable hash-map.\n        // To work around this we are a bit lax here and only hash the\n        // size of the buffer which must be consistent.\n        buf.len().hash(state);\n    }\n}\n\nimpl ImportSingleKeyTask {\n    fn new(key: Key, buf: Bytes) -> Self {\n        ImportSingleKeyTask { key, buf }\n    }\n}\n\nimpl ImportTask for ImportSingleKeyTask {\n    fn key_range(&self) -> Range<Key> {\n        singleton_range(self.key)\n    }\n\n    async fn doit(\n        self,\n        layer_writer: &mut ImageLayerWriter,\n        _max_byte_range_size: usize,\n        ctx: &RequestContext,\n    ) -> anyhow::Result<usize> {\n        layer_writer.put_image(self.key, self.buf, ctx).await?;\n        Ok(1)\n    }\n}\n\nstruct ImportRelBlocksTask {\n    shard_identity: ShardIdentity,\n    key_range: Range<Key>,\n    path: RemotePath,\n    storage: RemoteStorageWrapper,\n}\n\nimpl Hash for ImportRelBlocksTask {\n    fn hash<H: Hasher>(&self, state: &mut H) {\n        let ImportRelBlocksTask {\n            shard_identity: _,\n            key_range,\n            path,\n            storage: _,\n        } = self;\n\n        key_range.hash(state);\n        path.hash(state);\n    }\n}\n\nimpl ImportRelBlocksTask {\n    fn new(\n        shard_identity: ShardIdentity,\n        key_range: Range<Key>,\n        path: &RemotePath,\n        storage: RemoteStorageWrapper,\n    ) -> Self {\n        ImportRelBlocksTask {\n            shard_identity,\n            key_range,\n            path: path.clone(),\n            storage,\n        }\n    }\n}\n\nimpl ImportTask for ImportRelBlocksTask {\n    fn key_range(&self) -> Range<Key> {\n        self.key_range.clone()\n    }\n\n    #[instrument(level = tracing::Level::DEBUG, skip_all, fields(%self.path))]\n    async fn doit(\n        self,\n        layer_writer: &mut ImageLayerWriter,\n        max_byte_range_size: usize,\n        ctx: &RequestContext,\n    ) -> anyhow::Result<usize> {\n        debug!(\"Importing relation file\");\n\n        let (rel_tag, start_blk) = self.key_range.start.to_rel_block()?;\n        let (rel_tag_end, end_blk) = self.key_range.end.to_rel_block()?;\n        assert_eq!(rel_tag, rel_tag_end);\n\n        let ranges = (start_blk..end_blk)\n            .enumerate()\n            .filter_map(|(i, blknum)| {\n                let key = rel_block_to_key(rel_tag, blknum);\n                if self.shard_identity.is_key_disposable(&key) {\n                    return None;\n                }\n                let file_offset = i.checked_mul(8192).unwrap();\n                Some((\n                    vec![key],\n                    file_offset,\n                    file_offset.checked_add(8192).unwrap(),\n                ))\n            })\n            .coalesce(|(mut acc, acc_start, acc_end), (mut key, start, end)| {\n                assert_eq!(key.len(), 1);\n                assert!(!acc.is_empty());\n                assert!(acc_end > acc_start);\n                if acc_end == start && end - acc_start <= max_byte_range_size {\n                    acc.push(key.pop().unwrap());\n                    Ok((acc, acc_start, end))\n                } else {\n                    Err(((acc, acc_start, acc_end), (key, start, end)))\n                }\n            });\n\n        let mut nimages = 0;\n        for (keys, range_start, range_end) in ranges {\n            let range_buf = self\n                .storage\n                .get_range(&self.path, range_start.into_u64(), range_end.into_u64())\n                .await?;\n            let mut buf = Bytes::from(range_buf);\n            for key in keys {\n                // The writer buffers writes internally\n                let image = buf.split_to(8192);\n                layer_writer.put_image(key, image, ctx).await?;\n                nimages += 1;\n            }\n        }\n\n        Ok(nimages)\n    }\n}\n\nstruct ImportSlruBlocksTask {\n    key_range: Range<Key>,\n    path: RemotePath,\n    storage: RemoteStorageWrapper,\n}\n\nimpl Hash for ImportSlruBlocksTask {\n    fn hash<H: Hasher>(&self, state: &mut H) {\n        let ImportSlruBlocksTask {\n            key_range,\n            path,\n            storage: _,\n        } = self;\n\n        key_range.hash(state);\n        path.hash(state);\n    }\n}\n\nimpl ImportSlruBlocksTask {\n    fn new(key_range: Range<Key>, path: &RemotePath, storage: RemoteStorageWrapper) -> Self {\n        ImportSlruBlocksTask {\n            key_range,\n            path: path.clone(),\n            storage,\n        }\n    }\n}\n\nimpl ImportTask for ImportSlruBlocksTask {\n    fn key_range(&self) -> Range<Key> {\n        self.key_range.clone()\n    }\n\n    async fn doit(\n        self,\n        layer_writer: &mut ImageLayerWriter,\n        _max_byte_range_size: usize,\n        ctx: &RequestContext,\n    ) -> anyhow::Result<usize> {\n        debug!(\"Importing SLRU segment file {}\", self.path);\n        let buf = self.storage.get(&self.path).await?;\n\n        // TODO(vlad): Does timestamp to LSN work for imported timelines?\n        // Probably not since we don't append the `xact_time` to it as in\n        // [`WalIngest::ingest_xact_record`].\n        let (kind, segno, start_blk) = self.key_range.start.to_slru_block()?;\n        let (_kind, _segno, end_blk) = self.key_range.end.to_slru_block()?;\n        let mut blknum = start_blk;\n        let mut nimages = 0;\n        let mut file_offset = 0;\n        while blknum < end_blk {\n            let key = slru_block_to_key(kind, segno, blknum);\n            let buf = &buf[file_offset..(file_offset + 8192)];\n            file_offset += 8192;\n            layer_writer\n                .put_image(key, Bytes::copy_from_slice(buf), ctx)\n                .await?;\n            nimages += 1;\n            blknum += 1;\n        }\n        Ok(nimages)\n    }\n}\n\n#[derive(Hash)]\nenum AnyImportTask {\n    SingleKey(ImportSingleKeyTask),\n    RelBlocks(ImportRelBlocksTask),\n    SlruBlocks(ImportSlruBlocksTask),\n}\n\nimpl ImportTask for AnyImportTask {\n    fn key_range(&self) -> Range<Key> {\n        match self {\n            Self::SingleKey(t) => t.key_range(),\n            Self::RelBlocks(t) => t.key_range(),\n            Self::SlruBlocks(t) => t.key_range(),\n        }\n    }\n    /// returns the number of images put into the `layer_writer`\n    async fn doit(\n        self,\n        layer_writer: &mut ImageLayerWriter,\n        max_byte_range_size: usize,\n        ctx: &RequestContext,\n    ) -> anyhow::Result<usize> {\n        match self {\n            Self::SingleKey(t) => t.doit(layer_writer, max_byte_range_size, ctx).await,\n            Self::RelBlocks(t) => t.doit(layer_writer, max_byte_range_size, ctx).await,\n            Self::SlruBlocks(t) => t.doit(layer_writer, max_byte_range_size, ctx).await,\n        }\n    }\n}\n\nimpl From<ImportSingleKeyTask> for AnyImportTask {\n    fn from(t: ImportSingleKeyTask) -> Self {\n        Self::SingleKey(t)\n    }\n}\n\nimpl From<ImportRelBlocksTask> for AnyImportTask {\n    fn from(t: ImportRelBlocksTask) -> Self {\n        Self::RelBlocks(t)\n    }\n}\n\nimpl From<ImportSlruBlocksTask> for AnyImportTask {\n    fn from(t: ImportSlruBlocksTask) -> Self {\n        Self::SlruBlocks(t)\n    }\n}\n\n#[derive(Hash)]\nstruct ChunkProcessingJob {\n    range: Range<Key>,\n    tasks: Vec<AnyImportTask>,\n\n    pgdata_lsn: Lsn,\n}\n\nimpl ChunkProcessingJob {\n    fn new(range: Range<Key>, tasks: Vec<AnyImportTask>, pgdata_lsn: Lsn) -> Self {\n        assert!(pgdata_lsn.is_valid());\n        Self {\n            range,\n            tasks,\n            pgdata_lsn,\n        }\n    }\n\n    async fn run(\n        self,\n        timeline: Arc<Timeline>,\n        max_byte_range_size: usize,\n        ctx: &RequestContext,\n    ) -> anyhow::Result<()> {\n        let mut writer = ImageLayerWriter::new(\n            timeline.conf,\n            timeline.timeline_id,\n            timeline.tenant_shard_id,\n            &self.range,\n            self.pgdata_lsn,\n            &timeline.gate,\n            timeline.cancel.clone(),\n            ctx,\n        )\n        .await?;\n\n        let mut nimages = 0;\n        for task in self.tasks {\n            nimages += task.doit(&mut writer, max_byte_range_size, ctx).await?;\n        }\n\n        let resident_layer = if nimages > 0 {\n            let (desc, path) = writer.finish(ctx).await?;\n\n            {\n                let guard = timeline\n                    .layers\n                    .read(LayerManagerLockHolder::ImportPgData)\n                    .await;\n                let existing_layer = guard.try_get_from_key(&desc.key());\n                if let Some(layer) = existing_layer {\n                    if layer.metadata().generation == timeline.generation {\n                        return Err(anyhow::anyhow!(\n                            \"Import attempted to rewrite layer file in the same generation: {}\",\n                            layer.local_path()\n                        ));\n                    }\n                }\n            }\n\n            Layer::finish_creating(timeline.conf, &timeline, desc, &path)?\n        } else {\n            // dropping the writer cleans up\n            return Ok(());\n        };\n\n        // The same import job might run multiple times since not each job is checkpointed.\n        // Hence, we must support the cases where the layer already exists. We cannot be\n        // certain that the existing layer is identical to the new one, so in that case\n        // we replace the old layer with the one we just generated.\n\n        let mut guard = timeline\n            .layers\n            .write(LayerManagerLockHolder::ImportPgData)\n            .await;\n\n        let existing_layer = guard\n            .try_get_from_key(&resident_layer.layer_desc().key())\n            .cloned();\n        match existing_layer {\n            Some(existing) => {\n                // Unlink the remote layer from the index without scheduling its deletion.\n                // When `existing_layer` drops [`LayerInner::drop`] will schedule its deletion from\n                // remote storage, but that assumes that the layer was unlinked from the index first.\n                timeline\n                    .remote_client\n                    .schedule_unlinking_of_layers_from_index_part(std::iter::once(\n                        existing.layer_desc().layer_name(),\n                    ))?;\n\n                guard.open_mut()?.rewrite_layers(\n                    &[(existing.clone(), resident_layer.clone())],\n                    &[],\n                    &timeline.metrics,\n                );\n            }\n            None => {\n                guard\n                    .open_mut()?\n                    .track_new_image_layers(&[resident_layer.clone()], &timeline.metrics);\n            }\n        }\n\n        crate::tenant::timeline::drop_layer_manager_wlock(guard);\n\n        timeline\n            .remote_client\n            .schedule_layer_file_upload(resident_layer)?;\n\n        Ok(())\n    }\n}\n"
  },
  {
    "path": "pageserver/src/tenant/timeline/import_pgdata/importbucket_client.rs",
    "content": "use std::ops::Bound;\nuse std::sync::Arc;\n\nuse anyhow::Context;\nuse bytes::Bytes;\nuse postgres_ffi::{ControlFileData, PgMajorVersion};\nuse remote_storage::{\n    Download, DownloadError, DownloadKind, DownloadOpts, GenericRemoteStorage, Listing,\n    ListingObject, RemotePath, RemoteStorageConfig,\n};\nuse serde::de::DeserializeOwned;\nuse tokio_util::sync::CancellationToken;\nuse tracing::{debug, info, instrument};\nuse utils::lsn::Lsn;\n\nuse super::index_part_format;\nuse crate::assert_u64_eq_usize::U64IsUsize;\nuse crate::config::PageServerConf;\n\npub async fn new(\n    conf: &'static PageServerConf,\n    location: &index_part_format::Location,\n    cancel: CancellationToken,\n) -> Result<RemoteStorageWrapper, anyhow::Error> {\n    // Downloads should be reasonably sized. We do ranged reads for relblock raw data\n    // and full reads for SLRU segments which are bounded by Postgres.\n    let timeout = RemoteStorageConfig::DEFAULT_TIMEOUT;\n    let location_storage = match location {\n        #[cfg(feature = \"testing\")]\n        index_part_format::Location::LocalFs { path } => {\n            GenericRemoteStorage::LocalFs(remote_storage::LocalFs::new(path.clone(), timeout)?)\n        }\n        index_part_format::Location::AwsS3 {\n            region,\n            bucket,\n            key,\n        } => {\n            // TODO: think about security implications of letting the client specify the bucket & prefix.\n            // It's the most flexible right now, but, possibly we want to move bucket name into PS conf\n            // and force the timeline_id into the prefix?\n            GenericRemoteStorage::AwsS3(Arc::new(\n                remote_storage::S3Bucket::new(\n                    &remote_storage::S3Config {\n                        bucket_name: bucket.clone(),\n                        prefix_in_bucket: Some(key.clone()),\n                        bucket_region: region.clone(),\n                        endpoint: conf\n                            .import_pgdata_aws_endpoint_url\n                            .clone()\n                            .map(|url| url.to_string()), //  by specifying None here, remote_storage/aws-sdk-rust will infer from env\n                        // This matches the default import job concurrency. This is managed\n                        // separately from the usual S3 client, but the concern here is bandwidth\n                        // usage.\n                        concurrency_limit: 128.try_into().unwrap(),\n                        max_keys_per_list_response: Some(1000),\n                        upload_storage_class: None, // irrelevant\n                    },\n                    timeout,\n                )\n                .await\n                .context(\"setup s3 bucket\")?,\n            ))\n        }\n    };\n    let storage_wrapper = RemoteStorageWrapper::new(location_storage, cancel);\n    Ok(storage_wrapper)\n}\n\n/// Wrap [`remote_storage`] APIs to make it look a bit more like a filesystem API\n/// such as [`tokio::fs`], which was used in the original implementation of the import code.\n#[derive(Clone)]\npub struct RemoteStorageWrapper {\n    storage: GenericRemoteStorage,\n    cancel: CancellationToken,\n}\n\nimpl RemoteStorageWrapper {\n    pub fn new(storage: GenericRemoteStorage, cancel: CancellationToken) -> Self {\n        Self { storage, cancel }\n    }\n\n    #[instrument(level = tracing::Level::DEBUG, skip_all, fields(%path))]\n    pub async fn listfilesindir(\n        &self,\n        path: &RemotePath,\n    ) -> Result<Vec<(RemotePath, usize)>, DownloadError> {\n        assert!(\n            path.object_name().is_some(),\n            \"must specify dirname, without trailing slash\"\n        );\n        let path = path.add_trailing_slash();\n\n        let res = crate::tenant::remote_timeline_client::download::download_retry_forever(\n            || async {\n                let Listing { keys, prefixes: _ } = self\n                    .storage\n                    .list(\n                        Some(&path),\n                        remote_storage::ListingMode::WithDelimiter,\n                        None,\n                        &self.cancel,\n                    )\n                    .await?;\n                let res = keys\n                    .into_iter()\n                    .map(|ListingObject { key, size, .. }| (key, size.into_usize()))\n                    .collect();\n                Ok(res)\n            },\n            &format!(\"listfilesindir {path:?}\"),\n            &self.cancel,\n        )\n        .await;\n        debug!(?res, \"returning\");\n        res\n    }\n\n    #[instrument(level = tracing::Level::DEBUG, skip_all, fields(%path))]\n    pub async fn listdir(&self, path: &RemotePath) -> Result<Vec<RemotePath>, DownloadError> {\n        assert!(\n            path.object_name().is_some(),\n            \"must specify dirname, without trailing slash\"\n        );\n        let path = path.add_trailing_slash();\n\n        let res = crate::tenant::remote_timeline_client::download::download_retry_forever(\n            || async {\n                let Listing { keys, prefixes } = self\n                    .storage\n                    .list(\n                        Some(&path),\n                        remote_storage::ListingMode::WithDelimiter,\n                        None,\n                        &self.cancel,\n                    )\n                    .await?;\n                let res = keys\n                    .into_iter()\n                    .map(|ListingObject { key, .. }| key)\n                    .chain(prefixes.into_iter())\n                    .collect();\n                Ok(res)\n            },\n            &format!(\"listdir {path:?}\"),\n            &self.cancel,\n        )\n        .await;\n        debug!(?res, \"returning\");\n        res\n    }\n\n    #[instrument(level = tracing::Level::DEBUG, skip_all, fields(%path))]\n    pub async fn get(&self, path: &RemotePath) -> Result<Bytes, DownloadError> {\n        let res = crate::tenant::remote_timeline_client::download::download_retry_forever(\n            || async {\n                let Download {\n                    download_stream, ..\n                } = self\n                    .storage\n                    .download(path, &DownloadOpts::default(), &self.cancel)\n                    .await?;\n                let mut reader = tokio_util::io::StreamReader::new(download_stream);\n\n                // XXX optimize this, can we get the capacity hint from somewhere?\n                let mut buf = Vec::new();\n                tokio::io::copy_buf(&mut reader, &mut buf).await?;\n                Ok(Bytes::from(buf))\n            },\n            &format!(\"download {path:?}\"),\n            &self.cancel,\n        )\n        .await;\n        debug!(len = res.as_ref().ok().map(|buf| buf.len()), \"done\");\n        res\n    }\n\n    #[instrument(level = tracing::Level::DEBUG, skip_all, fields(%path))]\n    pub async fn get_json<T: DeserializeOwned>(\n        &self,\n        path: &RemotePath,\n    ) -> Result<Option<T>, DownloadError> {\n        let buf = match self.get(path).await {\n            Ok(buf) => buf,\n            Err(DownloadError::NotFound) => return Ok(None),\n            Err(err) => return Err(err),\n        };\n        let res = serde_json::from_slice(&buf)\n            .context(\"serialize\")\n            // TODO: own error type\n            .map_err(DownloadError::Other)?;\n        Ok(Some(res))\n    }\n\n    #[instrument(level = tracing::Level::DEBUG, skip_all, fields(%path))]\n    pub async fn get_range(\n        &self,\n        path: &RemotePath,\n        start_inclusive: u64,\n        end_exclusive: u64,\n    ) -> Result<Vec<u8>, DownloadError> {\n        let len = end_exclusive\n            .checked_sub(start_inclusive)\n            .unwrap()\n            .into_usize();\n        let res = crate::tenant::remote_timeline_client::download::download_retry_forever(\n            || async {\n                let Download {\n                    download_stream, ..\n                } = self\n                    .storage\n                    .download(\n                        path,\n                        &DownloadOpts {\n                            kind: DownloadKind::Large,\n                            etag: None,\n                            byte_start: Bound::Included(start_inclusive),\n                            byte_end: Bound::Excluded(end_exclusive),\n                            version_id: None,\n                        },\n                        &self.cancel)\n                    .await?;\n                let mut reader = tokio_util::io::StreamReader::new(download_stream);\n\n                let mut buf = Vec::with_capacity(len);\n                tokio::io::copy_buf(&mut reader, &mut buf).await?;\n                Ok(buf)\n            },\n            &format!(\"download range len=0x{len:x} [0x{start_inclusive:x},0x{end_exclusive:x}) from {path:?}\"),\n            &self.cancel,\n        )\n        .await;\n        debug!(len = res.as_ref().ok().map(|buf| buf.len()), \"done\");\n        res\n    }\n\n    pub fn pgdata(&self) -> RemotePath {\n        RemotePath::from_string(\"pgdata\").unwrap()\n    }\n\n    pub async fn get_control_file(&self) -> Result<ControlFile, anyhow::Error> {\n        let control_file_path = self.pgdata().join(\"global/pg_control\");\n        info!(\"get control file from {control_file_path}\");\n        let control_file_buf = self.get(&control_file_path).await?;\n        ControlFile::new(control_file_buf)\n    }\n}\n\npub struct ControlFile {\n    control_file_data: ControlFileData,\n    control_file_buf: Bytes,\n}\n\nimpl ControlFile {\n    pub(crate) fn new(control_file_buf: Bytes) -> Result<Self, anyhow::Error> {\n        // XXX ControlFileData is version-specific, we're always using v14 here. v17 had changes.\n        let control_file_data = ControlFileData::decode(&control_file_buf)?;\n        let control_file = ControlFile {\n            control_file_data,\n            control_file_buf,\n        };\n        control_file.try_pg_version()?; // so that we can offer infallible pg_version()\n        Ok(control_file)\n    }\n    pub(crate) fn base_lsn(&self) -> Lsn {\n        Lsn(self.control_file_data.checkPoint).align()\n    }\n    pub(crate) fn pg_version(&self) -> PgMajorVersion {\n        self.try_pg_version()\n            .expect(\"prepare() checks that try_pg_version doesn't error\")\n    }\n    pub(crate) fn control_file_data(&self) -> &ControlFileData {\n        &self.control_file_data\n    }\n    pub(crate) fn control_file_buf(&self) -> &Bytes {\n        &self.control_file_buf\n    }\n\n    fn try_pg_version(&self) -> anyhow::Result<PgMajorVersion> {\n        Ok(match self.control_file_data.catalog_version_no {\n            // thesea are from catversion.h\n            202107181 => PgMajorVersion::PG14,\n            202209061 => PgMajorVersion::PG15,\n            202307071 => PgMajorVersion::PG16,\n            202406281 => PgMajorVersion::PG17,\n            catversion => {\n                anyhow::bail!(\"unrecognized catalog version {catversion}\")\n            }\n        })\n    }\n}\n"
  },
  {
    "path": "pageserver/src/tenant/timeline/import_pgdata/importbucket_format.rs",
    "content": "use serde::{Deserialize, Serialize};\n\n#[derive(Deserialize, Serialize, Debug, Clone, PartialEq, Eq)]\npub struct PgdataStatus {\n    pub done: bool,\n    // TODO: remaining fields\n}\n"
  },
  {
    "path": "pageserver/src/tenant/timeline/import_pgdata/index_part_format.rs",
    "content": "#[cfg(feature = \"testing\")]\nuse camino::Utf8PathBuf;\nuse serde::{Deserialize, Serialize};\n\n#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]\npub enum Root {\n    V1(V1),\n}\n#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]\npub enum V1 {\n    InProgress(InProgress),\n    Done(Done),\n}\n\n#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]\n#[serde(transparent)]\npub struct IdempotencyKey(String);\n\nimpl IdempotencyKey {\n    pub fn new(s: String) -> Self {\n        Self(s)\n    }\n}\n\n#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]\npub struct InProgress {\n    pub idempotency_key: IdempotencyKey,\n    pub location: Location,\n    pub started_at: chrono::NaiveDateTime,\n}\n\n#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]\npub struct Done {\n    pub idempotency_key: IdempotencyKey,\n    pub started_at: chrono::NaiveDateTime,\n    pub finished_at: chrono::NaiveDateTime,\n}\n\n#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]\npub enum Location {\n    #[cfg(feature = \"testing\")]\n    LocalFs { path: Utf8PathBuf },\n    AwsS3 {\n        region: String,\n        bucket: String,\n        key: String,\n    },\n}\n\nimpl Root {\n    pub fn is_done(&self) -> bool {\n        match self {\n            Root::V1(v1) => match v1 {\n                V1::Done(_) => true,\n                V1::InProgress(_) => false,\n            },\n        }\n    }\n    pub fn idempotency_key(&self) -> &IdempotencyKey {\n        match self {\n            Root::V1(v1) => match v1 {\n                V1::InProgress(in_progress) => &in_progress.idempotency_key,\n                V1::Done(done) => &done.idempotency_key,\n            },\n        }\n    }\n    pub fn started_at(&self) -> &chrono::NaiveDateTime {\n        match self {\n            Root::V1(v1) => match v1 {\n                V1::InProgress(in_progress) => &in_progress.started_at,\n                V1::Done(done) => &done.started_at,\n            },\n        }\n    }\n}\n"
  },
  {
    "path": "pageserver/src/tenant/timeline/import_pgdata.rs",
    "content": "use std::sync::Arc;\n\nuse anyhow::{Context, bail};\nuse importbucket_client::{ControlFile, RemoteStorageWrapper};\nuse pageserver_api::models::ShardImportStatus;\nuse remote_storage::RemotePath;\nuse tokio::task::JoinHandle;\nuse tokio_util::sync::CancellationToken;\nuse tracing::info;\nuse utils::lsn::Lsn;\nuse utils::pausable_failpoint;\nuse utils::sync::gate::Gate;\n\nuse super::{Timeline, TimelineDeleteProgress};\nuse crate::context::RequestContext;\nuse crate::controller_upcall_client::{StorageControllerUpcallApi, StorageControllerUpcallClient};\nuse crate::tenant::metadata::TimelineMetadata;\nuse crate::tenant::timeline::layer_manager::LayerManagerLockHolder;\n\nmod flow;\nmod importbucket_client;\nmod importbucket_format;\npub(crate) mod index_part_format;\n\npub struct ImportingTimeline {\n    pub import_task_handle: JoinHandle<()>,\n    pub import_task_gate: Gate,\n    pub timeline: Arc<Timeline>,\n    pub delete_progress: TimelineDeleteProgress,\n}\n\nimpl std::fmt::Debug for ImportingTimeline {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        write!(f, \"ImportingTimeline<{}>\", self.timeline.timeline_id)\n    }\n}\n\nimpl ImportingTimeline {\n    pub async fn shutdown(&self) {\n        self.import_task_handle.abort();\n        self.import_task_gate.close().await;\n\n        self.timeline.remote_client.shutdown().await;\n    }\n}\n\npub async fn doit(\n    timeline: &Arc<Timeline>,\n    index_part: index_part_format::Root,\n    ctx: &RequestContext,\n    cancel: CancellationToken,\n) -> anyhow::Result<()> {\n    let index_part_format::Root::V1(v1) = index_part;\n    let index_part_format::InProgress {\n        location,\n        idempotency_key: _,\n        started_at: _,\n    } = match v1 {\n        index_part_format::V1::Done(_) => return Ok(()),\n        index_part_format::V1::InProgress(in_progress) => in_progress,\n    };\n\n    let storcon_client = StorageControllerUpcallClient::new(timeline.conf, &cancel);\n\n    let shard_status = storcon_client\n        .get_timeline_import_status(\n            timeline.tenant_shard_id,\n            timeline.timeline_id,\n            timeline.generation,\n        )\n        .await\n        .map_err(|_err| anyhow::anyhow!(\"Shut down while getting timeline import status\"))?;\n\n    info!(?shard_status, \"peeking shard status\");\n    match shard_status {\n        ShardImportStatus::InProgress(maybe_progress) => {\n            let storage =\n                importbucket_client::new(timeline.conf, &location, cancel.clone()).await?;\n\n            let control_file_res = if maybe_progress.is_none() {\n                // Only prepare the import once when there's no progress.\n                prepare_import(timeline, storage.clone(), &cancel).await\n            } else {\n                storage.get_control_file().await\n            };\n\n            let control_file = match control_file_res {\n                Ok(cf) => cf,\n                Err(err) => {\n                    return Err(\n                        terminate_flow_with_error(timeline, err, &storcon_client, &cancel).await,\n                    );\n                }\n            };\n\n            let res = flow::run(\n                timeline.clone(),\n                control_file,\n                storage.clone(),\n                maybe_progress,\n                ctx,\n            )\n            .await;\n            if let Err(err) = res {\n                return Err(\n                    terminate_flow_with_error(timeline, err, &storcon_client, &cancel).await,\n                );\n            }\n\n            tracing::info!(\"Import plan executed. Flushing remote changes and notifying storcon\");\n\n            timeline\n                .remote_client\n                .schedule_index_upload_for_file_changes()?;\n            timeline.remote_client.wait_completion().await?;\n\n            pausable_failpoint!(\"import-timeline-pre-success-notify-pausable\");\n\n            // Communicate that shard is done.\n            // Ensure at-least-once delivery of the upcall to storage controller\n            // before we mark the task as done and never come here again.\n            //\n            // Note that we do not mark the import complete in the index part now.\n            // This happens in [`Tenant::finalize_importing_timeline`] in response\n            // to the storage controller calling\n            // `/v1/tenant/:tenant_id/timeline/:timeline_id/activate_post_import`.\n            storcon_client\n                .put_timeline_import_status(\n                    timeline.tenant_shard_id,\n                    timeline.timeline_id,\n                    timeline.generation,\n                    ShardImportStatus::Done,\n                )\n                .await\n                .map_err(|_err| {\n                    anyhow::anyhow!(\"Shut down while putting timeline import status\")\n                })?;\n        }\n        ShardImportStatus::Error(err) => {\n            info!(\n                \"shard status indicates that the shard is done (error), skipping import {}\",\n                err\n            );\n        }\n        ShardImportStatus::Done => {\n            info!(\"shard status indicates that the shard is done (success), skipping import\");\n        }\n    }\n\n    Ok(())\n}\n\nasync fn prepare_import(\n    timeline: &Arc<Timeline>,\n    storage: RemoteStorageWrapper,\n    cancel: &CancellationToken,\n) -> anyhow::Result<ControlFile> {\n    // Wipe the slate clean before starting the import as a precaution.\n    // This method is only called when there's no recorded checkpoint for the import\n    // in the storage controller.\n    //\n    // Note that this is split-brain safe (two imports for same timeline shards running in\n    // different generations) because we go through the usual deletion path, including deletion queue.\n    info!(\"wipe the slate clean\");\n    {\n        // TODO: do we need to hold GC lock for this?\n        let mut guard = timeline\n            .layers\n            .write(LayerManagerLockHolder::ImportPgData)\n            .await;\n        assert!(\n            guard.layer_map()?.open_layer.is_none(),\n            \"while importing, there should be no in-memory layer\" // this just seems like a good place to assert it\n        );\n        let all_layers_keys = guard.all_persistent_layers();\n        let all_layers: Vec<_> = all_layers_keys\n            .iter()\n            .map(|key| guard.get_from_key(key))\n            .collect();\n        let open = guard.open_mut().context(\"open_mut\")?;\n\n        timeline.remote_client.schedule_gc_update(&all_layers)?;\n        open.finish_gc_timeline(&all_layers);\n    }\n\n    //\n    // Wait for pgdata to finish uploading\n    //\n    info!(\"wait for pgdata to reach status 'done'\");\n    let status_prefix = RemotePath::from_string(\"status\").unwrap();\n    let pgdata_status_key = status_prefix.join(\"pgdata\");\n    loop {\n        let res = async {\n            let pgdata_status: Option<importbucket_format::PgdataStatus> = storage\n                .get_json(&pgdata_status_key)\n                .await\n                .context(\"get pgdata status\")?;\n            info!(?pgdata_status, \"peeking pgdata status\");\n            if pgdata_status.map(|st| st.done).unwrap_or(false) {\n                Ok(())\n            } else {\n                Err(anyhow::anyhow!(\"pgdata not done yet\"))\n            }\n        }\n        .await;\n        match res {\n            Ok(_) => break,\n            Err(_err) => {\n                info!(\"indefinitely waiting for pgdata to finish\");\n                if tokio::time::timeout(std::time::Duration::from_secs(10), cancel.cancelled())\n                    .await\n                    .is_ok()\n                {\n                    bail!(\"cancelled while waiting for pgdata\");\n                }\n            }\n        }\n    }\n\n    let control_file = storage.get_control_file().await?;\n    let base_lsn = control_file.base_lsn();\n\n    info!(\"update TimelineMetadata based on LSNs from control file\");\n    {\n        let pg_version = control_file.pg_version();\n        async move {\n            // FIXME: The 'disk_consistent_lsn' should be the LSN at the *end* of the\n            // checkpoint record, and prev_record_lsn should point to its beginning.\n            // We should read the real end of the record from the WAL, but here we\n            // just fake it.\n            let disk_consistent_lsn = Lsn(base_lsn.0 + 8);\n            let prev_record_lsn = base_lsn;\n            let metadata = TimelineMetadata::new(\n                disk_consistent_lsn,\n                Some(prev_record_lsn),\n                None,     // no ancestor\n                Lsn(0),   // no ancestor lsn\n                base_lsn, // latest_gc_cutoff_lsn\n                base_lsn, // initdb_lsn\n                pg_version,\n            );\n\n            let _start_lsn = disk_consistent_lsn + 1;\n\n            timeline\n                .remote_client\n                .schedule_index_upload_for_full_metadata_update(&metadata)?;\n\n            timeline.remote_client.wait_completion().await?;\n\n            anyhow::Ok(())\n        }\n    }\n    .await?;\n\n    Ok(control_file)\n}\n\nasync fn terminate_flow_with_error(\n    timeline: &Arc<Timeline>,\n    error: anyhow::Error,\n    storcon_client: &StorageControllerUpcallClient,\n    cancel: &CancellationToken,\n) -> anyhow::Error {\n    // The import task is a aborted on tenant shutdown, so in principle, it should\n    // never be cancelled. To be on the safe side, check the cancellation tokens\n    // before marking the import as failed.\n    if !(cancel.is_cancelled() || timeline.cancel.is_cancelled()) {\n        let notify_res = storcon_client\n            .put_timeline_import_status(\n                timeline.tenant_shard_id,\n                timeline.timeline_id,\n                timeline.generation,\n                ShardImportStatus::Error(format!(\"{error:#}\")),\n            )\n            .await;\n\n        if let Err(_notify_error) = notify_res {\n            // The [`StorageControllerUpcallClient::put_timeline_import_status`] retries\n            // forever internally, so errors returned by it can only be due to cancellation.\n            info!(\"failed to notify storcon about permanent import error\");\n        }\n\n        // Will be logged by [`Tenant::create_timeline_import_pgdata_task`]\n        error\n    } else {\n        anyhow::anyhow!(\"Import task cancelled\")\n    }\n}\n"
  },
  {
    "path": "pageserver/src/tenant/timeline/init.rs",
    "content": "use std::collections::{HashMap, hash_map};\nuse std::str::FromStr;\n\nuse anyhow::Context;\nuse camino::{Utf8Path, Utf8PathBuf};\nuse utils::lsn::Lsn;\n\nuse crate::is_temporary;\nuse crate::tenant::ephemeral_file::is_ephemeral_file;\nuse crate::tenant::remote_timeline_client::index::{IndexPart, LayerFileMetadata};\nuse crate::tenant::remote_timeline_client::{self};\nuse crate::tenant::storage_layer::LayerName;\n\n/// Identified files in the timeline directory.\npub(super) enum Discovered {\n    /// The only one we care about\n    Layer(LayerName, LocalLayerFileMetadata),\n    /// Old ephmeral files from previous launches, should be removed\n    Ephemeral(String),\n    /// Old temporary timeline files, unsure what these really are, should be removed\n    Temporary(String),\n    /// Temporary on-demand download files, should be removed\n    TemporaryDownload(String),\n    /// Backup file from previously future layers\n    IgnoredBackup(Utf8PathBuf),\n    /// Unrecognized, warn about these\n    Unknown(String),\n}\n\n/// Scans the timeline directory for interesting files.\npub(super) fn scan_timeline_dir(path: &Utf8Path) -> anyhow::Result<Vec<Discovered>> {\n    let mut ret = Vec::new();\n\n    for direntry in path.read_dir_utf8()? {\n        let direntry = direntry?;\n        let file_name = direntry.file_name().to_string();\n\n        let discovered = match LayerName::from_str(&file_name) {\n            Ok(file_name) => {\n                let file_size = direntry.metadata()?.len();\n                Discovered::Layer(\n                    file_name,\n                    LocalLayerFileMetadata::new(direntry.path().to_owned(), file_size),\n                )\n            }\n            Err(_) => {\n                if file_name.ends_with(\".old\") {\n                    // ignore these\n                    Discovered::IgnoredBackup(direntry.path().to_owned())\n                } else if remote_timeline_client::is_temp_download_file(direntry.path()) {\n                    Discovered::TemporaryDownload(file_name)\n                } else if is_ephemeral_file(&file_name) {\n                    Discovered::Ephemeral(file_name)\n                } else if is_temporary(direntry.path()) {\n                    Discovered::Temporary(file_name)\n                } else {\n                    Discovered::Unknown(file_name)\n                }\n            }\n        };\n\n        ret.push(discovered);\n    }\n\n    Ok(ret)\n}\n\n/// Whereas `LayerFileMetadata` describes the metadata we would store in remote storage,\n/// this structure extends it with metadata describing the layer's presence in local storage.\n#[derive(Clone, Debug)]\npub(super) struct LocalLayerFileMetadata {\n    pub(super) file_size: u64,\n    pub(super) local_path: Utf8PathBuf,\n}\n\nimpl LocalLayerFileMetadata {\n    pub fn new(local_path: Utf8PathBuf, file_size: u64) -> Self {\n        Self {\n            local_path,\n            file_size,\n        }\n    }\n}\n\n/// For a layer that is present in remote metadata, this type describes how to handle\n/// it during startup: it is either Resident (and we have some metadata about a local file),\n/// or it is Evicted (and we only have remote metadata).\n#[derive(Clone, Debug)]\npub(super) enum Decision {\n    /// The layer is not present locally.\n    Evicted(LayerFileMetadata),\n    /// The layer is present locally, and metadata matches: we may hook up this layer to the\n    /// existing file in local storage.\n    Resident {\n        local: LocalLayerFileMetadata,\n        remote: LayerFileMetadata,\n    },\n}\n\n/// A layer needs to be left out of the layer map.\n#[derive(Debug)]\npub(super) enum DismissedLayer {\n    /// The related layer is is in future compared to disk_consistent_lsn, it must not be loaded.\n    Future {\n        /// `None` if the layer is only known through [`IndexPart`].\n        local: Option<LocalLayerFileMetadata>,\n    },\n    /// The layer only exists locally.\n    ///\n    /// In order to make crash safe updates to layer map, we must dismiss layers which are only\n    /// found locally or not yet included in the remote `index_part.json`.\n    LocalOnly(LocalLayerFileMetadata),\n\n    /// The layer exists in remote storage but the local layer's metadata (e.g. file size)\n    /// does not match it\n    BadMetadata(LocalLayerFileMetadata),\n}\n\n/// Merges local discoveries and remote [`IndexPart`] to a collection of decisions.\npub(super) fn reconcile(\n    local_layers: Vec<(LayerName, LocalLayerFileMetadata)>,\n    index_part: &IndexPart,\n    disk_consistent_lsn: Lsn,\n) -> Vec<(LayerName, Result<Decision, DismissedLayer>)> {\n    let mut result = Vec::new();\n\n    let mut remote_layers = HashMap::new();\n\n    // Construct Decisions for layers that are found locally, if they're in remote metadata.  Otherwise\n    // construct DismissedLayers to get rid of them.\n    for (layer_name, local_metadata) in local_layers {\n        let Some(remote_metadata) = index_part.layer_metadata.get(&layer_name) else {\n            result.push((layer_name, Err(DismissedLayer::LocalOnly(local_metadata))));\n            continue;\n        };\n\n        if remote_metadata.file_size != local_metadata.file_size {\n            result.push((layer_name, Err(DismissedLayer::BadMetadata(local_metadata))));\n            continue;\n        }\n\n        remote_layers.insert(\n            layer_name,\n            Decision::Resident {\n                local: local_metadata,\n                remote: remote_metadata.clone(),\n            },\n        );\n    }\n\n    // Construct Decision for layers that were not found locally\n    index_part\n        .layer_metadata\n        .iter()\n        .for_each(|(name, metadata)| {\n            if let hash_map::Entry::Vacant(entry) = remote_layers.entry(name.clone()) {\n                entry.insert(Decision::Evicted(metadata.clone()));\n            }\n        });\n\n    // For layers that were found in authoritative remote metadata, apply a final check that they are within\n    // the disk_consistent_lsn.\n    result.extend(remote_layers.into_iter().map(|(name, decision)| {\n        if name.is_in_future(disk_consistent_lsn) {\n            match decision {\n                Decision::Evicted(_remote) => (name, Err(DismissedLayer::Future { local: None })),\n                Decision::Resident {\n                    local,\n                    remote: _remote,\n                } => (name, Err(DismissedLayer::Future { local: Some(local) })),\n            }\n        } else {\n            (name, Ok(decision))\n        }\n    }));\n\n    result\n}\n\npub(super) fn cleanup(path: &Utf8Path, kind: &str) -> anyhow::Result<()> {\n    let file_name = path.file_name().expect(\"must be file path\");\n    tracing::debug!(kind, ?file_name, \"cleaning up\");\n    std::fs::remove_file(path).with_context(|| format!(\"failed to remove {kind} at {path}\"))\n}\n\npub(super) fn cleanup_local_file_for_remote(local: &LocalLayerFileMetadata) -> anyhow::Result<()> {\n    let local_size = local.file_size;\n    let path = &local.local_path;\n    let file_name = path.file_name().expect(\"must be file path\");\n    tracing::warn!(\n        \"removing local file {file_name:?} because it has unexpected length {local_size};\"\n    );\n\n    std::fs::remove_file(path).with_context(|| format!(\"failed to remove layer at {path}\"))\n}\n\npub(super) fn cleanup_future_layer(\n    path: &Utf8Path,\n    name: &LayerName,\n    disk_consistent_lsn: Lsn,\n) -> anyhow::Result<()> {\n    // future image layers are allowed to be produced always for not yet flushed to disk\n    // lsns stored in InMemoryLayer.\n    let kind = name.kind();\n    tracing::info!(\"found future {kind} layer {name} disk_consistent_lsn is {disk_consistent_lsn}\");\n    std::fs::remove_file(path)?;\n    Ok(())\n}\n\npub(super) fn cleanup_local_only_file(\n    name: &LayerName,\n    local: &LocalLayerFileMetadata,\n) -> anyhow::Result<()> {\n    let kind = name.kind();\n    tracing::info!(\n        \"found local-only {kind} layer {name} size {}\",\n        local.file_size\n    );\n    std::fs::remove_file(&local.local_path)?;\n    Ok(())\n}\n"
  },
  {
    "path": "pageserver/src/tenant/timeline/layer_manager.rs",
    "content": "use std::collections::HashMap;\nuse std::mem::ManuallyDrop;\nuse std::ops::{Deref, DerefMut};\nuse std::sync::Arc;\nuse std::time::Duration;\n\nuse anyhow::{Context, bail, ensure};\nuse itertools::Itertools;\nuse pageserver_api::keyspace::KeySpace;\nuse pageserver_api::shard::TenantShardId;\nuse tokio_util::sync::CancellationToken;\nuse tracing::trace;\nuse utils::id::TimelineId;\nuse utils::lsn::{AtomicLsn, Lsn};\n\nuse super::{LayerFringe, ReadableLayer, TimelineWriterState};\nuse crate::config::PageServerConf;\nuse crate::context::RequestContext;\nuse crate::metrics::TimelineMetrics;\nuse crate::tenant::layer_map::{BatchedUpdates, LayerMap, SearchResult};\nuse crate::tenant::storage_layer::{\n    AsLayerDesc, InMemoryLayer, Layer, LayerVisibilityHint, PersistentLayerDesc,\n    PersistentLayerKey, ReadableLayerWeak, ResidentLayer,\n};\n\n/// Warn if the lock was held for longer than this threshold.\n/// It's very generous and we should bring this value down over time.\nconst LAYER_MANAGER_LOCK_WARN_THRESHOLD: Duration = Duration::from_secs(5);\nconst LAYER_MANAGER_LOCK_READ_WARN_THRESHOLD: Duration = Duration::from_secs(30);\n\n/// Describes the operation that is holding the layer manager lock\n#[derive(Debug, Clone, Copy, strum_macros::Display)]\n#[strum(serialize_all = \"kebab_case\")]\npub(crate) enum LayerManagerLockHolder {\n    GetLayerMapInfo,\n    GenerateHeatmap,\n    GetPage,\n    Init,\n    LoadLayerMap,\n    GetLayerForWrite,\n    TryFreezeLayer,\n    FlushFrozenLayer,\n    FlushLoop,\n    Compaction,\n    GarbageCollection,\n    Shutdown,\n    ImportPgData,\n    DetachAncestor,\n    Eviction,\n    ComputeImageConsistentLsn,\n    #[cfg(test)]\n    Testing,\n}\n\n/// Wrapper for the layer manager that tracks the amount of time during which\n/// it was held under read or write lock\n#[derive(Default)]\npub(crate) struct LockedLayerManager {\n    locked: tokio::sync::RwLock<LayerManager>,\n}\n\npub(crate) struct LayerManagerReadGuard<'a> {\n    guard: ManuallyDrop<tokio::sync::RwLockReadGuard<'a, LayerManager>>,\n    acquired_at: std::time::Instant,\n    holder: LayerManagerLockHolder,\n}\n\npub(crate) struct LayerManagerWriteGuard<'a> {\n    guard: ManuallyDrop<tokio::sync::RwLockWriteGuard<'a, LayerManager>>,\n    acquired_at: std::time::Instant,\n    holder: LayerManagerLockHolder,\n}\n\nimpl Drop for LayerManagerReadGuard<'_> {\n    fn drop(&mut self) {\n        // Drop the lock first, before potentially warning if it was held for too long.\n        // SAFETY: ManuallyDrop in Drop implementation\n        unsafe { ManuallyDrop::drop(&mut self.guard) };\n\n        let held_for = self.acquired_at.elapsed();\n        if held_for >= LAYER_MANAGER_LOCK_READ_WARN_THRESHOLD {\n            tracing::warn!(\n                holder=%self.holder,\n                \"Layer manager read lock held for {}s\",\n                held_for.as_secs_f64(),\n            );\n        }\n    }\n}\n\nimpl Drop for LayerManagerWriteGuard<'_> {\n    fn drop(&mut self) {\n        // Drop the lock first, before potentially warning if it was held for too long.\n        // SAFETY: ManuallyDrop in Drop implementation\n        unsafe { ManuallyDrop::drop(&mut self.guard) };\n\n        let held_for = self.acquired_at.elapsed();\n        if held_for >= LAYER_MANAGER_LOCK_WARN_THRESHOLD {\n            tracing::warn!(\n                holder=%self.holder,\n                \"Layer manager write lock held for {}s\",\n                held_for.as_secs_f64(),\n            );\n        }\n    }\n}\n\nimpl Deref for LayerManagerReadGuard<'_> {\n    type Target = LayerManager;\n\n    fn deref(&self) -> &Self::Target {\n        self.guard.deref()\n    }\n}\n\nimpl Deref for LayerManagerWriteGuard<'_> {\n    type Target = LayerManager;\n\n    fn deref(&self) -> &Self::Target {\n        self.guard.deref()\n    }\n}\n\nimpl DerefMut for LayerManagerWriteGuard<'_> {\n    fn deref_mut(&mut self) -> &mut Self::Target {\n        self.guard.deref_mut()\n    }\n}\n\nimpl LockedLayerManager {\n    pub(crate) async fn read(&self, holder: LayerManagerLockHolder) -> LayerManagerReadGuard {\n        let guard = ManuallyDrop::new(self.locked.read().await);\n        LayerManagerReadGuard {\n            guard,\n            acquired_at: std::time::Instant::now(),\n            holder,\n        }\n    }\n\n    pub(crate) fn try_read(\n        &self,\n        holder: LayerManagerLockHolder,\n    ) -> Result<LayerManagerReadGuard, tokio::sync::TryLockError> {\n        let guard = ManuallyDrop::new(self.locked.try_read()?);\n\n        Ok(LayerManagerReadGuard {\n            guard,\n            acquired_at: std::time::Instant::now(),\n            holder,\n        })\n    }\n\n    pub(crate) async fn write(&self, holder: LayerManagerLockHolder) -> LayerManagerWriteGuard {\n        let guard = ManuallyDrop::new(self.locked.write().await);\n        LayerManagerWriteGuard {\n            guard,\n            acquired_at: std::time::Instant::now(),\n            holder,\n        }\n    }\n\n    pub(crate) fn try_write(\n        &self,\n        holder: LayerManagerLockHolder,\n    ) -> Result<LayerManagerWriteGuard, tokio::sync::TryLockError> {\n        let guard = ManuallyDrop::new(self.locked.try_write()?);\n\n        Ok(LayerManagerWriteGuard {\n            guard,\n            acquired_at: std::time::Instant::now(),\n            holder,\n        })\n    }\n}\n\n/// Provides semantic APIs to manipulate the layer map.\npub(crate) enum LayerManager {\n    /// Open as in not shutdown layer manager; we still have in-memory layers and we can manipulate\n    /// the layers.\n    Open(OpenLayerManager),\n    /// Shutdown layer manager where there are no more in-memory layers and persistent layers are\n    /// read-only.\n    Closed {\n        layers: HashMap<PersistentLayerKey, Layer>,\n    },\n}\n\nimpl Default for LayerManager {\n    fn default() -> Self {\n        LayerManager::Open(OpenLayerManager::default())\n    }\n}\n\nimpl LayerManager {\n    fn upgrade(&self, weak: ReadableLayerWeak) -> ReadableLayer {\n        match weak {\n            ReadableLayerWeak::PersistentLayer(desc) => {\n                ReadableLayer::PersistentLayer(self.get_from_desc(&desc))\n            }\n            ReadableLayerWeak::InMemoryLayer(desc) => {\n                let inmem = self\n                    .layer_map()\n                    .expect(\"no concurrent shutdown\")\n                    .in_memory_layer(&desc);\n                ReadableLayer::InMemoryLayer(inmem)\n            }\n        }\n    }\n\n    pub(crate) fn get_from_key(&self, key: &PersistentLayerKey) -> Layer {\n        // The assumption for the `expect()` is that all code maintains the following invariant:\n        // A layer's descriptor is present in the LayerMap => the LayerFileManager contains a layer for the descriptor.\n        self.try_get_from_key(key)\n            .with_context(|| format!(\"get layer from key: {key}\"))\n            .expect(\"not found\")\n            .clone()\n    }\n\n    pub(crate) fn try_get_from_key(&self, key: &PersistentLayerKey) -> Option<&Layer> {\n        self.layers().get(key)\n    }\n\n    pub(crate) fn get_from_desc(&self, desc: &PersistentLayerDesc) -> Layer {\n        self.get_from_key(&desc.key())\n    }\n\n    /// Get an immutable reference to the layer map.\n    ///\n    /// We expect users only to be able to get an immutable layer map. If users want to make modifications,\n    /// they should use the below semantic APIs. This design makes us step closer to immutable storage state.\n    pub(crate) fn layer_map(&self) -> Result<&LayerMap, Shutdown> {\n        use LayerManager::*;\n        match self {\n            Open(OpenLayerManager { layer_map, .. }) => Ok(layer_map),\n            Closed { .. } => Err(Shutdown),\n        }\n    }\n\n    pub(crate) fn open_mut(&mut self) -> Result<&mut OpenLayerManager, Shutdown> {\n        use LayerManager::*;\n\n        match self {\n            Open(open) => Ok(open),\n            Closed { .. } => Err(Shutdown),\n        }\n    }\n\n    /// LayerManager shutdown. The in-memory layers do cleanup on drop, so we must drop them in\n    /// order to allow shutdown to complete.\n    ///\n    /// If there was a want to flush in-memory layers, it must have happened earlier.\n    pub(crate) fn shutdown(&mut self, writer_state: &mut Option<TimelineWriterState>) {\n        use LayerManager::*;\n        match self {\n            Open(OpenLayerManager {\n                layer_map,\n                layer_fmgr: LayerFileManager(hashmap),\n            }) => {\n                // NB: no need to decrement layer metrics; metrics are removed on timeline shutdown.\n                let open = layer_map.open_layer.take();\n                let frozen = layer_map.frozen_layers.len();\n                let taken_writer_state = writer_state.take();\n                tracing::info!(open = open.is_some(), frozen, \"dropped inmemory layers\");\n                let layers = std::mem::take(hashmap);\n                *self = Closed { layers };\n                assert_eq!(open.is_some(), taken_writer_state.is_some());\n            }\n            Closed { .. } => {\n                tracing::debug!(\"ignoring multiple shutdowns on layer manager\")\n            }\n        }\n    }\n\n    /// Sum up the historic layer sizes\n    pub(crate) fn layer_size_sum(&self) -> u64 {\n        self.layers()\n            .values()\n            .map(|l| l.layer_desc().file_size)\n            .sum()\n    }\n\n    pub(crate) fn likely_resident_layers(&self) -> impl Iterator<Item = &'_ Layer> + '_ {\n        self.layers().values().filter(|l| l.is_likely_resident())\n    }\n\n    pub(crate) fn visible_layers(&self) -> impl Iterator<Item = &'_ Layer> + '_ {\n        self.layers()\n            .values()\n            .filter(|l| l.visibility() == LayerVisibilityHint::Visible)\n    }\n\n    pub(crate) fn contains(&self, layer: &Layer) -> bool {\n        self.contains_key(&layer.layer_desc().key())\n    }\n\n    pub(crate) fn contains_key(&self, key: &PersistentLayerKey) -> bool {\n        self.layers().contains_key(key)\n    }\n\n    pub(crate) fn all_persistent_layers(&self) -> Vec<PersistentLayerKey> {\n        self.layers().keys().cloned().collect_vec()\n    }\n\n    /// Update the [`LayerFringe`] of a read request\n    ///\n    /// Take a key space at a given LSN and query the layer map below each range\n    /// of the key space to find the next layers to visit.\n    pub(crate) fn update_search_fringe(\n        &self,\n        keyspace: &KeySpace,\n        cont_lsn: Lsn,\n        fringe: &mut LayerFringe,\n    ) -> Result<(), Shutdown> {\n        let map = self.layer_map()?;\n\n        for range in keyspace.ranges.iter() {\n            let results = map.range_search(range.clone(), cont_lsn);\n            results\n                .found\n                .into_iter()\n                .map(|(SearchResult { layer, lsn_floor }, keyspace_accum)| {\n                    (\n                        self.upgrade(layer),\n                        keyspace_accum.to_keyspace(),\n                        lsn_floor..cont_lsn,\n                    )\n                })\n                .for_each(|(layer, keyspace, lsn_range)| fringe.update(layer, keyspace, lsn_range));\n        }\n\n        Ok(())\n    }\n\n    fn layers(&self) -> &HashMap<PersistentLayerKey, Layer> {\n        use LayerManager::*;\n        match self {\n            Open(OpenLayerManager { layer_fmgr, .. }) => &layer_fmgr.0,\n            Closed { layers } => layers,\n        }\n    }\n}\n\n#[derive(Default)]\npub(crate) struct OpenLayerManager {\n    layer_map: LayerMap,\n    layer_fmgr: LayerFileManager<Layer>,\n}\n\nimpl std::fmt::Debug for OpenLayerManager {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        f.debug_struct(\"OpenLayerManager\")\n            .field(\"layer_count\", &self.layer_fmgr.0.len())\n            .finish()\n    }\n}\n\n#[derive(Debug, thiserror::Error)]\n#[error(\"layer manager has been shutdown\")]\npub(crate) struct Shutdown;\n\nimpl OpenLayerManager {\n    /// Called from `load_layer_map`. Initialize the layer manager with:\n    /// 1. all on-disk layers\n    /// 2. next open layer (with disk disk_consistent_lsn LSN)\n    pub(crate) fn initialize_local_layers(&mut self, layers: Vec<Layer>, next_open_layer_at: Lsn) {\n        let mut updates = self.layer_map.batch_update();\n        for layer in layers {\n            Self::insert_historic_layer(layer, &mut updates, &mut self.layer_fmgr);\n        }\n        updates.flush();\n        self.layer_map.next_open_layer_at = Some(next_open_layer_at);\n    }\n\n    /// Initialize when creating a new timeline, called in `init_empty_layer_map`.\n    pub(crate) fn initialize_empty(&mut self, next_open_layer_at: Lsn) {\n        self.layer_map.next_open_layer_at = Some(next_open_layer_at);\n    }\n\n    /// Open a new writable layer to append data if there is no open layer, otherwise return the\n    /// current open layer, called within `get_layer_for_write`.\n    #[allow(clippy::too_many_arguments)]\n    pub(crate) async fn get_layer_for_write(\n        &mut self,\n        lsn: Lsn,\n        conf: &'static PageServerConf,\n        timeline_id: TimelineId,\n        tenant_shard_id: TenantShardId,\n        gate: &utils::sync::gate::Gate,\n        cancel: &CancellationToken,\n        ctx: &RequestContext,\n    ) -> anyhow::Result<Arc<InMemoryLayer>> {\n        ensure!(lsn.is_aligned());\n\n        // Do we have a layer open for writing already?\n        let layer = if let Some(open_layer) = &self.layer_map.open_layer {\n            if open_layer.get_lsn_range().start > lsn {\n                bail!(\n                    \"unexpected open layer in the future: open layers starts at {}, write lsn {}\",\n                    open_layer.get_lsn_range().start,\n                    lsn\n                );\n            }\n\n            Arc::clone(open_layer)\n        } else {\n            // No writeable layer yet. Create one.\n            let start_lsn = self\n                .layer_map\n                .next_open_layer_at\n                .context(\"No next open layer found\")?;\n\n            trace!(\n                \"creating in-memory layer at {}/{} for record at {}\",\n                timeline_id, start_lsn, lsn\n            );\n\n            let new_layer = InMemoryLayer::create(\n                conf,\n                timeline_id,\n                tenant_shard_id,\n                start_lsn,\n                gate,\n                cancel,\n                ctx,\n            )\n            .await?;\n            let layer = Arc::new(new_layer);\n\n            self.layer_map.open_layer = Some(layer.clone());\n            self.layer_map.next_open_layer_at = None;\n\n            layer\n        };\n\n        Ok(layer)\n    }\n\n    /// Tries to freeze an open layer and also manages clearing the TimelineWriterState.\n    ///\n    /// Returns true if anything was frozen.\n    pub(super) async fn try_freeze_in_memory_layer(\n        &mut self,\n        lsn: Lsn,\n        last_freeze_at: &AtomicLsn,\n        write_lock: &mut tokio::sync::MutexGuard<'_, Option<TimelineWriterState>>,\n        metrics: &TimelineMetrics,\n    ) -> bool {\n        let Lsn(last_record_lsn) = lsn;\n        let end_lsn = Lsn(last_record_lsn + 1);\n\n        let froze = if let Some(open_layer) = &self.layer_map.open_layer {\n            let open_layer_rc = Arc::clone(open_layer);\n            open_layer.freeze(end_lsn).await;\n\n            // Increment the frozen layer metrics. This is decremented in `finish_flush_l0_layer()`.\n            // TODO: It would be nicer to do this via `InMemoryLayer::drop()`, but it requires a\n            // reference to the timeline metrics. Other methods use a metrics borrow as well.\n            metrics.inc_frozen_layer(open_layer);\n\n            // The layer is no longer open, update the layer map to reflect this.\n            // We will replace it with on-disk historics below.\n            self.layer_map.frozen_layers.push_back(open_layer_rc);\n            self.layer_map.open_layer = None;\n            self.layer_map.next_open_layer_at = Some(end_lsn);\n\n            true\n        } else {\n            false\n        };\n\n        // Even if there was no layer to freeze, advance last_freeze_at to last_record_lsn+1: this\n        // accounts for regions in the LSN range where we might have ingested no data due to sharding.\n        last_freeze_at.store(end_lsn);\n\n        // the writer state must no longer have a reference to the frozen layer\n        let taken = write_lock.take();\n        assert_eq!(\n            froze,\n            taken.is_some(),\n            \"should only had frozen a layer when TimelineWriterState existed\"\n        );\n\n        froze\n    }\n\n    /// Add image layers to the layer map, called from [`super::Timeline::create_image_layers`].\n    pub(crate) fn track_new_image_layers(\n        &mut self,\n        image_layers: &[ResidentLayer],\n        metrics: &TimelineMetrics,\n    ) {\n        let mut updates = self.layer_map.batch_update();\n        for layer in image_layers {\n            Self::insert_historic_layer(layer.as_ref().clone(), &mut updates, &mut self.layer_fmgr);\n\n            // record these here instead of Layer::finish_creating because otherwise partial\n            // failure with create_image_layers would balloon up the physical size gauge. downside\n            // is that all layers need to be created before metrics are updated.\n            metrics.record_new_file_metrics(layer.layer_desc().file_size);\n        }\n        updates.flush();\n    }\n\n    /// Flush a frozen layer and add the written delta layer to the layer map.\n    pub(crate) fn finish_flush_l0_layer(\n        &mut self,\n        delta_layer: Option<&ResidentLayer>,\n        frozen_layer_for_check: &Arc<InMemoryLayer>,\n        metrics: &TimelineMetrics,\n    ) {\n        let inmem = self\n            .layer_map\n            .frozen_layers\n            .pop_front()\n            .expect(\"there must be a inmem layer to flush\");\n        metrics.dec_frozen_layer(&inmem);\n\n        // Only one task may call this function at a time (for this\n        // timeline). If two tasks tried to flush the same frozen\n        // layer to disk at the same time, that would not work.\n        assert_eq!(Arc::as_ptr(&inmem), Arc::as_ptr(frozen_layer_for_check));\n\n        if let Some(l) = delta_layer {\n            let mut updates = self.layer_map.batch_update();\n            Self::insert_historic_layer(l.as_ref().clone(), &mut updates, &mut self.layer_fmgr);\n            metrics.record_new_file_metrics(l.layer_desc().file_size);\n            updates.flush();\n        }\n    }\n\n    /// Called when compaction is completed.\n    pub(crate) fn finish_compact_l0(\n        &mut self,\n        compact_from: &[Layer],\n        compact_to: &[ResidentLayer],\n        metrics: &TimelineMetrics,\n    ) {\n        let mut updates = self.layer_map.batch_update();\n        for l in compact_to {\n            Self::insert_historic_layer(l.as_ref().clone(), &mut updates, &mut self.layer_fmgr);\n            metrics.record_new_file_metrics(l.layer_desc().file_size);\n        }\n        for l in compact_from {\n            Self::delete_historic_layer(l, &mut updates, &mut self.layer_fmgr);\n        }\n        updates.flush();\n    }\n\n    /// Called when a GC-compaction is completed.\n    pub(crate) fn finish_gc_compaction(\n        &mut self,\n        compact_from: &[Layer],\n        compact_to: &[ResidentLayer],\n        metrics: &TimelineMetrics,\n    ) {\n        // gc-compaction could contain layer rewrites. We need to delete the old layers and insert the new ones.\n\n        // Match the old layers with the new layers\n        let mut add_layers = HashMap::new();\n        let mut rewrite_layers = HashMap::new();\n        let mut drop_layers = HashMap::new();\n        for layer in compact_from {\n            drop_layers.insert(layer.layer_desc().key(), layer.clone());\n        }\n        for layer in compact_to {\n            if let Some(old_layer) = drop_layers.remove(&layer.layer_desc().key()) {\n                rewrite_layers.insert(layer.layer_desc().key(), (old_layer.clone(), layer.clone()));\n            } else {\n                add_layers.insert(layer.layer_desc().key(), layer.clone());\n            }\n        }\n        let add_layers = add_layers.values().cloned().collect::<Vec<_>>();\n        let drop_layers = drop_layers.values().cloned().collect::<Vec<_>>();\n        let rewrite_layers = rewrite_layers.values().cloned().collect::<Vec<_>>();\n\n        self.rewrite_layers_inner(&rewrite_layers, &drop_layers, &add_layers, metrics);\n    }\n\n    /// Called post-compaction when some previous generation image layers were trimmed.\n    pub fn rewrite_layers(\n        &mut self,\n        rewrite_layers: &[(Layer, ResidentLayer)],\n        drop_layers: &[Layer],\n        metrics: &TimelineMetrics,\n    ) {\n        self.rewrite_layers_inner(rewrite_layers, drop_layers, &[], metrics);\n    }\n\n    fn rewrite_layers_inner(\n        &mut self,\n        rewrite_layers: &[(Layer, ResidentLayer)],\n        drop_layers: &[Layer],\n        add_layers: &[ResidentLayer],\n        metrics: &TimelineMetrics,\n    ) {\n        let mut updates = self.layer_map.batch_update();\n        for (old_layer, new_layer) in rewrite_layers {\n            debug_assert_eq!(\n                old_layer.layer_desc().key_range,\n                new_layer.layer_desc().key_range\n            );\n            debug_assert_eq!(\n                old_layer.layer_desc().lsn_range,\n                new_layer.layer_desc().lsn_range\n            );\n\n            // Transfer visibility hint from old to new layer, since the new layer covers the same key space.  This is not guaranteed to\n            // be accurate (as the new layer may cover a different subset of the key range), but is a sensible default, and prevents\n            // always marking rewritten layers as visible.\n            new_layer.as_ref().set_visibility(old_layer.visibility());\n\n            // Safety: we may never rewrite the same file in-place.  Callers are responsible\n            // for ensuring that they only rewrite layers after something changes the path,\n            // such as an increment in the generation number.\n            assert_ne!(old_layer.local_path(), new_layer.local_path());\n\n            Self::delete_historic_layer(old_layer, &mut updates, &mut self.layer_fmgr);\n\n            Self::insert_historic_layer(\n                new_layer.as_ref().clone(),\n                &mut updates,\n                &mut self.layer_fmgr,\n            );\n\n            metrics.record_new_file_metrics(new_layer.layer_desc().file_size);\n        }\n        for l in drop_layers {\n            Self::delete_historic_layer(l, &mut updates, &mut self.layer_fmgr);\n        }\n        for l in add_layers {\n            Self::insert_historic_layer(l.as_ref().clone(), &mut updates, &mut self.layer_fmgr);\n            metrics.record_new_file_metrics(l.layer_desc().file_size);\n        }\n        updates.flush();\n    }\n\n    /// Called when garbage collect has selected the layers to be removed.\n    pub(crate) fn finish_gc_timeline(&mut self, gc_layers: &[Layer]) {\n        let mut updates = self.layer_map.batch_update();\n        for doomed_layer in gc_layers {\n            Self::delete_historic_layer(doomed_layer, &mut updates, &mut self.layer_fmgr);\n        }\n        updates.flush()\n    }\n\n    #[cfg(test)]\n    pub(crate) fn force_insert_layer(&mut self, layer: ResidentLayer) {\n        let mut updates = self.layer_map.batch_update();\n        Self::insert_historic_layer(layer.as_ref().clone(), &mut updates, &mut self.layer_fmgr);\n        updates.flush()\n    }\n\n    /// Helper function to insert a layer into the layer map and file manager.\n    fn insert_historic_layer(\n        layer: Layer,\n        updates: &mut BatchedUpdates<'_>,\n        mapping: &mut LayerFileManager<Layer>,\n    ) {\n        updates.insert_historic(layer.layer_desc().clone());\n        mapping.insert(layer);\n    }\n\n    /// Removes the layer from local FS (if present) and from memory.\n    /// Remote storage is not affected by this operation.\n    fn delete_historic_layer(\n        // we cannot remove layers otherwise, since gc and compaction will race\n        layer: &Layer,\n        updates: &mut BatchedUpdates<'_>,\n        mapping: &mut LayerFileManager<Layer>,\n    ) {\n        let desc = layer.layer_desc();\n\n        // TODO Removing from the bottom of the layer map is expensive.\n        //      Maybe instead discard all layer map historic versions that\n        //      won't be needed for page reconstruction for this timeline,\n        //      and mark what we can't delete yet as deleted from the layer\n        //      map index without actually rebuilding the index.\n        updates.remove_historic(desc);\n        mapping.remove(layer);\n        layer.delete_on_drop();\n    }\n\n    #[cfg(test)]\n    pub(crate) fn force_insert_in_memory_layer(&mut self, layer: Arc<InMemoryLayer>) {\n        use pageserver_api::models::InMemoryLayerInfo;\n\n        match layer.info() {\n            InMemoryLayerInfo::Open { .. } => {\n                assert!(self.layer_map.open_layer.is_none());\n                self.layer_map.open_layer = Some(layer);\n            }\n            InMemoryLayerInfo::Frozen { lsn_start, .. } => {\n                if let Some(last) = self.layer_map.frozen_layers.back() {\n                    assert!(last.get_lsn_range().end <= lsn_start);\n                }\n\n                self.layer_map.frozen_layers.push_back(layer);\n            }\n        }\n    }\n}\n\npub(crate) struct LayerFileManager<T>(HashMap<PersistentLayerKey, T>);\n\nimpl<T> Default for LayerFileManager<T> {\n    fn default() -> Self {\n        Self(HashMap::default())\n    }\n}\n\nimpl<T: AsLayerDesc + Clone> LayerFileManager<T> {\n    pub(crate) fn insert(&mut self, layer: T) {\n        let present = self.0.insert(layer.layer_desc().key(), layer.clone());\n        if present.is_some() && cfg!(debug_assertions) {\n            panic!(\"overwriting a layer: {:?}\", layer.layer_desc())\n        }\n    }\n\n    pub(crate) fn remove(&mut self, layer: &T) {\n        let present = self.0.remove(&layer.layer_desc().key());\n        if present.is_none() && cfg!(debug_assertions) {\n            panic!(\n                \"removing layer that is not present in layer mapping: {:?}\",\n                layer.layer_desc()\n            )\n        }\n    }\n}\n"
  },
  {
    "path": "pageserver/src/tenant/timeline/logical_size.rs",
    "content": "use std::sync::atomic::{AtomicBool, AtomicI64, Ordering as AtomicOrdering};\n\nuse anyhow::Context;\nuse once_cell::sync::OnceCell;\nuse tokio_util::sync::CancellationToken;\nuse utils::lsn::Lsn;\n\n/// Internal structure to hold all data needed for logical size calculation.\n///\n/// Calculation consists of two stages:\n///\n/// 1. Initial size calculation. That might take a long time, because it requires\n///    reading all layers containing relation sizes at `initial_part_end`.\n///\n/// 2. Collecting an incremental part and adding that to the initial size.\n///    Increments are appended on walreceiver writing new timeline data,\n///    which result in increase or decrease of the logical size.\npub(super) struct LogicalSize {\n    /// Size, potentially slow to compute. Calculating this might require reading multiple\n    /// layers, and even ancestor's layers.\n    ///\n    /// NOTE: size at a given LSN is constant, but after a restart we will calculate\n    /// the initial size at a different LSN.\n    pub initial_logical_size: OnceCell<(\n        u64,\n        crate::metrics::initial_logical_size::FinishedCalculationGuard,\n    )>,\n\n    /// Cancellation for the best-effort logical size calculation.\n    ///\n    /// The token is kept in a once-cell so that we can error out if a higher priority\n    /// request comes in *before* we have started the normal logical size calculation.\n    pub(crate) cancel_wait_for_background_loop_concurrency_limit_semaphore:\n        OnceCell<CancellationToken>,\n\n    /// Once the initial logical size is initialized, this is notified.\n    pub(crate) initialized: tokio::sync::Semaphore,\n\n    /// Latest Lsn that has its size uncalculated, could be absent for freshly created timelines.\n    pub initial_part_end: Option<Lsn>,\n\n    /// All other size changes after startup, combined together.\n    ///\n    /// Size shouldn't ever be negative, but this is signed for two reasons:\n    ///\n    /// 1. If we initialized the \"baseline\" size lazily, while we already\n    ///    process incoming WAL, the incoming WAL records could decrement the\n    ///    variable and temporarily make it negative. (This is just future-proofing;\n    ///    the initialization is currently not done lazily.)\n    ///\n    /// 2. If there is a bug and we e.g. forget to increment it in some cases\n    ///    when size grows, but remember to decrement it when it shrinks again, the\n    ///    variable could go negative. In that case, it seems better to at least\n    ///    try to keep tracking it, rather than clamp or overflow it. Note that\n    ///    get_current_logical_size() will clamp the returned value to zero if it's\n    ///    negative, and log an error. Could set it permanently to zero or some\n    ///    special value to indicate \"broken\" instead, but this will do for now.\n    ///\n    /// Note that we also expose a copy of this value as a prometheus metric,\n    /// see `current_logical_size_gauge`. Use the `update_current_logical_size`\n    /// to modify this, it will also keep the prometheus metric in sync.\n    pub size_added_after_initial: AtomicI64,\n\n    /// For [`crate::metrics::initial_logical_size::TIMELINES_WHERE_WALRECEIVER_GOT_APPROXIMATE_SIZE`].\n    pub(super) did_return_approximate_to_walreceiver: AtomicBool,\n}\n\n/// Normalized current size, that the data in pageserver occupies.\n#[derive(Debug, Clone, Copy)]\npub(crate) enum CurrentLogicalSize {\n    /// The size is not yet calculated to the end, this is an intermediate result,\n    /// constructed from walreceiver increments and normalized: logical data could delete some objects, hence be negative,\n    /// yet total logical size cannot be below 0.\n    Approximate(Approximate),\n    // Fully calculated logical size, only other future walreceiver increments are changing it, and those changes are\n    // available for observation without any calculations.\n    Exact(Exact),\n}\n\n#[derive(Debug, Copy, Clone, PartialEq, Eq)]\npub(crate) enum Accuracy {\n    Approximate,\n    Exact,\n}\n\n#[derive(Debug, Clone, Copy)]\npub(crate) struct Approximate(u64);\n#[derive(Debug, Clone, Copy)]\npub(crate) struct Exact(u64);\n\nimpl From<&Approximate> for u64 {\n    fn from(value: &Approximate) -> Self {\n        value.0\n    }\n}\n\nimpl From<&Exact> for u64 {\n    fn from(val: &Exact) -> Self {\n        val.0\n    }\n}\n\nimpl Approximate {\n    /// For use in situations where we don't have a sane logical size value but need\n    /// to return something, e.g. in HTTP API on shard >0 of a sharded tenant.\n    pub(crate) fn zero() -> Self {\n        Self(0)\n    }\n}\n\nimpl CurrentLogicalSize {\n    pub(crate) fn size_dont_care_about_accuracy(&self) -> u64 {\n        match self {\n            Self::Approximate(size) => size.into(),\n            Self::Exact(size) => size.into(),\n        }\n    }\n    pub(crate) fn accuracy(&self) -> Accuracy {\n        match self {\n            Self::Approximate(_) => Accuracy::Approximate,\n            Self::Exact(_) => Accuracy::Exact,\n        }\n    }\n\n    pub(crate) fn is_exact(&self) -> bool {\n        matches!(self, Self::Exact(_))\n    }\n}\n\nimpl LogicalSize {\n    pub(super) fn empty_initial() -> Self {\n        Self {\n            initial_logical_size: OnceCell::with_value((0, {\n                crate::metrics::initial_logical_size::START_CALCULATION\n                    .first(crate::metrics::initial_logical_size::StartCircumstances::EmptyInitial)\n                    .calculation_result_saved()\n            })),\n            cancel_wait_for_background_loop_concurrency_limit_semaphore: OnceCell::new(),\n            initial_part_end: None,\n            size_added_after_initial: AtomicI64::new(0),\n            did_return_approximate_to_walreceiver: AtomicBool::new(false),\n            initialized: tokio::sync::Semaphore::new(0),\n        }\n    }\n\n    pub(super) fn deferred_initial(compute_to: Lsn) -> Self {\n        Self {\n            initial_logical_size: OnceCell::new(),\n            cancel_wait_for_background_loop_concurrency_limit_semaphore: OnceCell::new(),\n            initial_part_end: Some(compute_to),\n            size_added_after_initial: AtomicI64::new(0),\n            did_return_approximate_to_walreceiver: AtomicBool::new(false),\n            initialized: tokio::sync::Semaphore::new(0),\n        }\n    }\n\n    pub(super) fn current_size(&self) -> CurrentLogicalSize {\n        let size_increment: i64 = self.size_added_after_initial.load(AtomicOrdering::Acquire);\n        //                  ^^^ keep this type explicit so that the casts in this function break if\n        //                  we change the type.\n        match self.initial_logical_size.get() {\n            Some((initial_size, _)) => {\n                CurrentLogicalSize::Exact(Exact(initial_size.checked_add_signed(size_increment)\n                    .with_context(|| format!(\"Overflow during logical size calculation, initial_size: {initial_size}, size_increment: {size_increment}\"))\n                    .unwrap()))\n            }\n            None => {\n\n                let non_negative_size_increment = u64::try_from(size_increment).unwrap_or(0);\n                CurrentLogicalSize::Approximate(Approximate(non_negative_size_increment))\n            }\n        }\n    }\n\n    pub(super) fn increment_size(&self, delta: i64) {\n        self.size_added_after_initial\n            .fetch_add(delta, AtomicOrdering::SeqCst);\n    }\n\n    /// Make the value computed by initial logical size computation\n    /// available for re-use. This doesn't contain the incremental part.\n    pub(super) fn initialized_size(&self, lsn: Lsn) -> Option<u64> {\n        match self.initial_part_end {\n            Some(v) if v == lsn => self.initial_logical_size.get().map(|(s, _)| *s),\n            _ => None,\n        }\n    }\n}\n"
  },
  {
    "path": "pageserver/src/tenant/timeline/offload.rs",
    "content": "use std::sync::Arc;\n\nuse pageserver_api::models::{TenantState, TimelineState};\n\nuse super::Timeline;\nuse super::delete::{DeletionGuard, delete_local_timeline_directory};\nuse crate::span::debug_assert_current_span_has_tenant_and_timeline_id;\nuse crate::tenant::remote_timeline_client::ShutdownIfArchivedError;\nuse crate::tenant::timeline::delete::{TimelineDeleteGuardKind, make_timeline_delete_guard};\nuse crate::tenant::{\n    DeleteTimelineError, OffloadedTimeline, TenantManifestError, TenantShard, TimelineOrOffloaded,\n};\n\n#[derive(thiserror::Error, Debug)]\npub(crate) enum OffloadError {\n    #[error(\"Cancelled\")]\n    Cancelled,\n    #[error(\"Timeline is not archived\")]\n    NotArchived,\n    #[error(\"Offload or deletion already in progress\")]\n    AlreadyInProgress,\n    #[error(\"Unexpected offload error: {0}\")]\n    Other(anyhow::Error),\n}\n\nimpl From<TenantManifestError> for OffloadError {\n    fn from(e: TenantManifestError) -> Self {\n        match e {\n            TenantManifestError::Cancelled => Self::Cancelled,\n            TenantManifestError::RemoteStorage(e) => Self::Other(e),\n        }\n    }\n}\n\npub(crate) async fn offload_timeline(\n    tenant: &TenantShard,\n    timeline: &Arc<Timeline>,\n) -> Result<(), OffloadError> {\n    debug_assert_current_span_has_tenant_and_timeline_id();\n    tracing::info!(\"offloading archived timeline\");\n\n    let delete_guard_res = make_timeline_delete_guard(\n        tenant,\n        timeline.timeline_id,\n        TimelineDeleteGuardKind::Offload,\n    );\n    let (timeline, guard) = match delete_guard_res {\n        Ok(timeline_and_guard) => timeline_and_guard,\n        Err(DeleteTimelineError::HasChildren(children)) => {\n            let is_archived = timeline.is_archived();\n            if is_archived == Some(true) {\n                tracing::error!(\"timeline is archived but has non-archived children: {children:?}\");\n                return Err(OffloadError::NotArchived);\n            }\n            tracing::info!(\n                ?is_archived,\n                \"timeline is not archived and has unarchived children\"\n            );\n            return Err(OffloadError::NotArchived);\n        }\n        Err(DeleteTimelineError::AlreadyInProgress(_)) => {\n            tracing::info!(\"timeline offload or deletion already in progress\");\n            return Err(OffloadError::AlreadyInProgress);\n        }\n        Err(e) => return Err(OffloadError::Other(anyhow::anyhow!(e))),\n    };\n\n    let TimelineOrOffloaded::Timeline(timeline) = timeline else {\n        tracing::error!(\"timeline already offloaded, but given timeline object\");\n        return Ok(());\n    };\n\n    match timeline.remote_client.shutdown_if_archived().await {\n        Ok(()) => {}\n        Err(ShutdownIfArchivedError::NotInitialized(_)) => {\n            // Either the timeline is being deleted, the operation is being retried, or we are shutting down.\n            // Don't return cancelled here to keep it idempotent.\n        }\n        Err(ShutdownIfArchivedError::NotArchived) => return Err(OffloadError::NotArchived),\n    }\n    timeline.set_state(TimelineState::Stopping);\n\n    // Now that the Timeline is in Stopping state, request all the related tasks to shut down.\n    timeline.shutdown(super::ShutdownMode::Reload).await;\n\n    // TODO extend guard mechanism above with method\n    // to make deletions possible while offloading is in progress\n\n    let conf = &tenant.conf;\n    delete_local_timeline_directory(conf, tenant.tenant_shard_id, &timeline).await;\n\n    let remaining_refcount = remove_timeline_from_tenant(tenant, &timeline, &guard);\n\n    {\n        let mut offloaded_timelines = tenant.timelines_offloaded.lock().unwrap();\n        if matches!(\n            tenant.current_state(),\n            TenantState::Stopping { .. } | TenantState::Broken { .. }\n        ) {\n            // Cancel the operation if the tenant is shutting down. Do this while the\n            // timelines_offloaded lock is held to prevent a race with Tenant::shutdown\n            // for defusing the lock\n            return Err(OffloadError::Cancelled);\n        }\n        offloaded_timelines.insert(\n            timeline.timeline_id,\n            Arc::new(\n                OffloadedTimeline::from_timeline(&timeline)\n                    .expect(\"we checked above that timeline was ready\"),\n            ),\n        );\n    }\n\n    // Last step: mark timeline as offloaded in S3\n    // TODO: maybe move this step above, right above deletion of the local timeline directory,\n    // then there is no potential race condition where we partially offload a timeline, and\n    // at the next restart attach it again.\n    // For that to happen, we'd need to make the manifest reflect our *intended* state,\n    // not our actual state of offloaded timelines.\n    tenant.maybe_upload_tenant_manifest().await?;\n\n    tracing::info!(\"Timeline offload complete (remaining arc refcount: {remaining_refcount})\");\n\n    Ok(())\n}\n\n/// It is important that this gets called when DeletionGuard is being held.\n/// For more context see comments in [`make_timeline_delete_guard`]\n///\n/// Returns the strong count of the timeline `Arc`\nfn remove_timeline_from_tenant(\n    tenant: &TenantShard,\n    timeline: &Timeline,\n    _: &DeletionGuard, // using it as a witness\n) -> usize {\n    // Remove the timeline from the map.\n    let mut timelines = tenant.timelines.lock().unwrap();\n    let children_exist = timelines\n        .iter()\n        .any(|(_, entry)| entry.get_ancestor_timeline_id() == Some(timeline.timeline_id));\n    // XXX this can happen because `branch_timeline` doesn't check `TimelineState::Stopping`.\n    // We already deleted the layer files, so it's probably best to panic.\n    // (Ideally, above remove_dir_all is atomic so we don't see this timeline after a restart)\n    if children_exist {\n        panic!(\"Timeline grew children while we removed layer files\");\n    }\n\n    let timeline = timelines\n        .remove(&timeline.timeline_id)\n        .expect(\"timeline that we were deleting was concurrently removed from 'timelines' map\");\n\n    // Clear the compaction queue for this timeline\n    tenant\n        .scheduled_compaction_tasks\n        .lock()\n        .unwrap()\n        .remove(&timeline.timeline_id);\n\n    Arc::strong_count(&timeline)\n}\n"
  },
  {
    "path": "pageserver/src/tenant/timeline/span.rs",
    "content": "\n"
  },
  {
    "path": "pageserver/src/tenant/timeline/uninit.rs",
    "content": "use std::collections::hash_map::Entry;\nuse std::fs;\nuse std::future::Future;\nuse std::sync::Arc;\n\nuse anyhow::Context;\nuse camino::Utf8PathBuf;\nuse tracing::{error, info, info_span};\nuse utils::fs_ext;\nuse utils::id::TimelineId;\nuse utils::lsn::Lsn;\nuse utils::sync::gate::GateGuard;\n\nuse super::Timeline;\nuse crate::context::RequestContext;\nuse crate::import_datadir;\nuse crate::span::debug_assert_current_span_has_tenant_and_timeline_id;\nuse crate::tenant::{\n    CreateTimelineError, CreateTimelineIdempotency, TenantShard, TimelineOrOffloaded,\n};\n\n/// A timeline with some of its files on disk, being initialized.\n/// This struct ensures the atomicity of the timeline init: it's either properly created and inserted into pageserver's memory, or\n/// its local files are removed.  If we crash while this class exists, then the timeline's local\n/// state is cleaned up during [`TenantShard::clean_up_timelines`], because the timeline's content isn't in remote storage.\n///\n/// The caller is responsible for proper timeline data filling before the final init.\n#[must_use]\npub struct UninitializedTimeline<'t> {\n    pub(crate) owning_tenant: &'t TenantShard,\n    timeline_id: TimelineId,\n    raw_timeline: Option<(Arc<Timeline>, TimelineCreateGuard)>,\n    /// Whether we spawned the inner Timeline's tasks such that we must later shut it down\n    /// if aborting the timeline creation\n    needs_shutdown: bool,\n}\n\nimpl<'t> UninitializedTimeline<'t> {\n    pub(crate) fn new(\n        owning_tenant: &'t TenantShard,\n        timeline_id: TimelineId,\n        raw_timeline: Option<(Arc<Timeline>, TimelineCreateGuard)>,\n    ) -> Self {\n        Self {\n            owning_tenant,\n            timeline_id,\n            raw_timeline,\n            needs_shutdown: false,\n        }\n    }\n\n    /// When writing data to this timeline during creation, use this wrapper: it will take care of\n    /// setup of Timeline tasks required for I/O (flush loop) and making sure they are torn down\n    /// later.\n    pub(crate) async fn write<F, Fut>(&mut self, f: F) -> anyhow::Result<()>\n    where\n        F: FnOnce(Arc<Timeline>) -> Fut,\n        Fut: Future<Output = Result<(), CreateTimelineError>>,\n    {\n        debug_assert_current_span_has_tenant_and_timeline_id();\n\n        // Remember that we did I/O (spawned the flush loop), so that we can check we shut it down on drop\n        self.needs_shutdown = true;\n\n        let timeline = self.raw_timeline()?;\n\n        // Spawn flush loop so that the Timeline is ready to accept writes\n        timeline.maybe_spawn_flush_loop();\n\n        // Invoke the provided function, which will write some data into the new timeline\n        if let Err(e) = f(timeline.clone()).await {\n            self.abort().await;\n            return Err(e.into());\n        }\n\n        // Flush the underlying timeline's ephemeral layers to disk\n        if let Err(e) = timeline\n            .freeze_and_flush()\n            .await\n            .context(\"Failed to flush after timeline creation writes\")\n        {\n            self.abort().await;\n            return Err(e);\n        }\n\n        Ok(())\n    }\n\n    pub(crate) async fn abort(&self) {\n        if let Some((raw_timeline, _)) = self.raw_timeline.as_ref() {\n            raw_timeline.shutdown(super::ShutdownMode::Hard).await;\n        }\n    }\n\n    /// Finish timeline creation: insert it into the Tenant's timelines map\n    ///\n    /// This function launches the flush loop if not already done.\n    ///\n    /// The caller is responsible for activating the timeline (function `.activate()`).\n    pub(crate) async fn finish_creation(mut self) -> anyhow::Result<Arc<Timeline>> {\n        let timeline_id = self.timeline_id;\n        let tenant_shard_id = self.owning_tenant.tenant_shard_id;\n\n        if self.raw_timeline.is_none() {\n            self.abort().await;\n\n            return Err(anyhow::anyhow!(\n                \"No timeline for initialization found for {tenant_shard_id}/{timeline_id}\"\n            ));\n        }\n\n        // Check that the caller initialized disk_consistent_lsn\n        let new_disk_consistent_lsn = self\n            .raw_timeline\n            .as_ref()\n            .expect(\"checked above\")\n            .0\n            .get_disk_consistent_lsn();\n\n        if !new_disk_consistent_lsn.is_valid() {\n            self.abort().await;\n\n            return Err(anyhow::anyhow!(\n                \"new timeline {tenant_shard_id}/{timeline_id} has invalid disk_consistent_lsn\"\n            ));\n        }\n\n        let mut timelines = self.owning_tenant.timelines.lock().unwrap();\n        match timelines.entry(timeline_id) {\n            Entry::Occupied(_) => {\n                // Unexpected, bug in the caller.  Tenant is responsible for preventing concurrent creation of the same timeline.\n                //\n                // We do not call Self::abort here.  Because we don't cleanly shut down our Timeline, [`Self::drop`] should\n                // skip trying to delete the timeline directory too.\n                anyhow::bail!(\n                    \"Found freshly initialized timeline {tenant_shard_id}/{timeline_id} in the tenant map\"\n                )\n            }\n            Entry::Vacant(v) => {\n                // after taking here should be no fallible operations, because the drop guard will not\n                // cleanup after and would block for example the tenant deletion\n                let (new_timeline, _create_guard) =\n                    self.raw_timeline.take().expect(\"already checked\");\n\n                v.insert(Arc::clone(&new_timeline));\n\n                new_timeline.maybe_spawn_flush_loop();\n\n                Ok(new_timeline)\n            }\n        }\n    }\n\n    pub(crate) fn finish_creation_myself(&mut self) -> (Arc<Timeline>, TimelineCreateGuard) {\n        self.raw_timeline.take().expect(\"already checked\")\n    }\n\n    /// Prepares timeline data by loading it from the basebackup archive.\n    pub(crate) async fn import_basebackup_from_tar(\n        mut self,\n        tenant: Arc<TenantShard>,\n        copyin_read: &mut (impl tokio::io::AsyncRead + Send + Sync + Unpin),\n        base_lsn: Lsn,\n        broker_client: storage_broker::BrokerClientChannel,\n        ctx: &RequestContext,\n    ) -> anyhow::Result<Arc<Timeline>> {\n        self.write(|raw_timeline| async move {\n            import_datadir::import_basebackup_from_tar(&raw_timeline, copyin_read, base_lsn, ctx)\n                .await\n                .context(\"Failed to import basebackup\")\n                .map_err(CreateTimelineError::Other)?;\n\n            fail::fail_point!(\"before-checkpoint-new-timeline\", |_| {\n                Err(CreateTimelineError::Other(anyhow::anyhow!(\n                    \"failpoint before-checkpoint-new-timeline\"\n                )))\n            });\n\n            Ok(())\n        })\n        .await?;\n\n        // All the data has been imported. Insert the Timeline into the tenant's timelines map\n        let tl = self.finish_creation().await?;\n        tl.activate(tenant, broker_client, None, ctx);\n        Ok(tl)\n    }\n\n    pub(crate) fn raw_timeline(&self) -> anyhow::Result<&Arc<Timeline>> {\n        Ok(&self\n            .raw_timeline\n            .as_ref()\n            .with_context(|| {\n                format!(\n                    \"No raw timeline {}/{} found\",\n                    self.owning_tenant.tenant_shard_id, self.timeline_id\n                )\n            })?\n            .0)\n    }\n}\n\nimpl Drop for UninitializedTimeline<'_> {\n    fn drop(&mut self) {\n        if let Some((timeline, create_guard)) = self.raw_timeline.take() {\n            let _entered = info_span!(\"drop_uninitialized_timeline\", tenant_id = %self.owning_tenant.tenant_shard_id.tenant_id, shard_id = %self.owning_tenant.tenant_shard_id.shard_slug(), timeline_id = %self.timeline_id).entered();\n            if self.needs_shutdown && !timeline.gate.close_complete() {\n                // This should not happen: caller should call [`Self::abort`] on failures\n                tracing::warn!(\n                    \"Timeline not shut down after initialization failure, cannot clean up files\"\n                );\n            } else {\n                // This is unusual, but can happen harmlessly if the pageserver is stopped while\n                // creating a timeline.\n                info!(\"Timeline got dropped without initializing, cleaning its files\");\n                cleanup_timeline_directory(create_guard);\n            }\n        }\n    }\n}\n\npub(crate) fn cleanup_timeline_directory(create_guard: TimelineCreateGuard) {\n    let timeline_path = &create_guard.timeline_path;\n    match fs_ext::ignore_absent_files(|| fs::remove_dir_all(timeline_path)) {\n        Ok(()) => {\n            info!(\"Timeline dir {timeline_path:?} removed successfully\")\n        }\n        Err(e) => {\n            error!(\"Failed to clean up uninitialized timeline directory {timeline_path:?}: {e:?}\")\n        }\n    }\n    // Having cleaned up, we can release this TimelineId in `[TenantShard::timelines_creating]` to allow other\n    // timeline creation attempts under this TimelineId to proceed\n    drop(create_guard);\n}\n\n/// A guard for timeline creations in process: as long as this object exists, the timeline ID\n/// is kept in `[TenantShard::timelines_creating]` to exclude concurrent attempts to create the same timeline.\n#[must_use]\npub(crate) struct TimelineCreateGuard {\n    pub(crate) _tenant_gate_guard: GateGuard,\n    pub(crate) owning_tenant: Arc<TenantShard>,\n    pub(crate) timeline_id: TimelineId,\n    pub(crate) timeline_path: Utf8PathBuf,\n    pub(crate) idempotency: CreateTimelineIdempotency,\n}\n\n/// Errors when acquiring exclusive access to a timeline ID for creation\n#[derive(thiserror::Error, Debug)]\npub(crate) enum TimelineExclusionError {\n    #[error(\"Already exists\")]\n    AlreadyExists {\n        existing: TimelineOrOffloaded,\n        arg: CreateTimelineIdempotency,\n    },\n    #[error(\"Already creating\")]\n    AlreadyCreating,\n    #[error(\"Shutting down\")]\n    ShuttingDown,\n\n    // e.g. I/O errors, or some failure deep in postgres initdb\n    #[error(transparent)]\n    Other(#[from] anyhow::Error),\n}\n\nimpl TimelineCreateGuard {\n    pub(crate) fn new(\n        owning_tenant: &Arc<TenantShard>,\n        timeline_id: TimelineId,\n        timeline_path: Utf8PathBuf,\n        idempotency: CreateTimelineIdempotency,\n        allow_offloaded: bool,\n    ) -> Result<Self, TimelineExclusionError> {\n        let _tenant_gate_guard = owning_tenant\n            .gate\n            .enter()\n            .map_err(|_| TimelineExclusionError::ShuttingDown)?;\n\n        // Lock order: this is the only place we take both locks.  During drop() we only\n        // lock creating_timelines\n        let timelines = owning_tenant.timelines.lock().unwrap();\n        let timelines_offloaded = owning_tenant.timelines_offloaded.lock().unwrap();\n        let mut creating_timelines: std::sync::MutexGuard<\n            '_,\n            std::collections::HashSet<TimelineId>,\n        > = owning_tenant.timelines_creating.lock().unwrap();\n\n        if let Some(existing) = timelines.get(&timeline_id) {\n            return Err(TimelineExclusionError::AlreadyExists {\n                existing: TimelineOrOffloaded::Timeline(existing.clone()),\n                arg: idempotency,\n            });\n        }\n        if !allow_offloaded {\n            if let Some(existing) = timelines_offloaded.get(&timeline_id) {\n                return Err(TimelineExclusionError::AlreadyExists {\n                    existing: TimelineOrOffloaded::Offloaded(existing.clone()),\n                    arg: idempotency,\n                });\n            }\n        }\n        if creating_timelines.contains(&timeline_id) {\n            return Err(TimelineExclusionError::AlreadyCreating);\n        }\n        creating_timelines.insert(timeline_id);\n        drop(creating_timelines);\n        drop(timelines_offloaded);\n        drop(timelines);\n        Ok(Self {\n            _tenant_gate_guard,\n            owning_tenant: Arc::clone(owning_tenant),\n            timeline_id,\n            timeline_path,\n            idempotency,\n        })\n    }\n}\n\nimpl Drop for TimelineCreateGuard {\n    fn drop(&mut self) {\n        self.owning_tenant\n            .timelines_creating\n            .lock()\n            .unwrap()\n            .remove(&self.timeline_id);\n    }\n}\n"
  },
  {
    "path": "pageserver/src/tenant/timeline/walreceiver/connection_manager.rs",
    "content": "//! WAL receiver logic that ensures the pageserver gets connectected to safekeeper,\n//! that contains the latest WAL to stream and this connection does not go stale.\n//!\n//! To achieve that, a storage broker is used: safekepers propagate their timelines' state in it,\n//! the manager subscribes for changes and accumulates those to query the one with the biggest Lsn for connection.\n//! Current connection state is tracked too, to ensure it's not getting stale.\n//!\n//! After every connection or storage broker update fetched, the state gets updated correspondingly and rechecked for the new conneciton leader,\n//! then a (re)connection happens, if necessary.\n//! Only WAL streaming task expects to be finished, other loops (storage broker, connection management) never exit unless cancelled explicitly via the dedicated channel.\n\nuse std::collections::HashMap;\nuse std::num::NonZeroU64;\nuse std::ops::ControlFlow;\nuse std::sync::Arc;\nuse std::time::Duration;\n\nuse anyhow::Context;\nuse chrono::{NaiveDateTime, Utc};\nuse pageserver_api::models::TimelineState;\nuse postgres_connection::PgConnectionConfig;\nuse storage_broker::proto::{\n    FilterTenantTimelineId, MessageType, SafekeeperDiscoveryRequest, SafekeeperDiscoveryResponse,\n    SubscribeByFilterRequest, TenantTimelineId as ProtoTenantTimelineId, TypeSubscription,\n    TypedMessage,\n};\nuse storage_broker::{BrokerClientChannel, Code, Streaming};\nuse tokio_util::sync::CancellationToken;\nuse tracing::*;\nuse utils::backoff::{\n    DEFAULT_BASE_BACKOFF_SECONDS, DEFAULT_MAX_BACKOFF_SECONDS, exponential_backoff,\n};\nuse utils::id::{NodeId, TenantTimelineId};\nuse utils::lsn::Lsn;\nuse utils::postgres_client::{ConnectionConfigArgs, wal_stream_connection_config};\n\nuse super::walreceiver_connection::{WalConnectionStatus, WalReceiverError};\nuse super::{TaskEvent, TaskHandle, TaskStateUpdate, WalReceiverConf};\nuse crate::context::{DownloadBehavior, RequestContext};\nuse crate::metrics::{\n    WALRECEIVER_ACTIVE_MANAGERS, WALRECEIVER_BROKER_UPDATES, WALRECEIVER_CANDIDATES_ADDED,\n    WALRECEIVER_CANDIDATES_REMOVED, WALRECEIVER_SWITCHES,\n};\nuse crate::task_mgr::TaskKind;\nuse crate::tenant::{Timeline, debug_assert_current_span_has_tenant_and_timeline_id};\n\npub(crate) struct Cancelled;\n\n/// Attempts to subscribe for timeline updates, pushed by safekeepers into the broker.\n/// Based on the updates, desides whether to start, keep or stop a WAL receiver task.\n/// If storage broker subscription is cancelled, exits.\n///\n/// # Cancel-Safety\n///\n/// Not cancellation-safe. Use `cancel` token to request cancellation.\npub(super) async fn connection_manager_loop_step(\n    broker_client: &mut BrokerClientChannel,\n    connection_manager_state: &mut ConnectionManagerState,\n    ctx: &RequestContext,\n    cancel: &CancellationToken,\n    manager_status: &std::sync::RwLock<Option<ConnectionManagerStatus>>,\n) -> Result<(), Cancelled> {\n    match tokio::select! {\n        _ = cancel.cancelled() => { return Err(Cancelled); },\n        st = connection_manager_state.timeline.wait_to_become_active(ctx) => { st }\n    } {\n        Ok(()) => {}\n        Err(new_state) => {\n            debug!(\n                ?new_state,\n                \"state changed, stopping wal connection manager loop\"\n            );\n            return Err(Cancelled);\n        }\n    }\n\n    WALRECEIVER_ACTIVE_MANAGERS.inc();\n    scopeguard::defer! {\n        WALRECEIVER_ACTIVE_MANAGERS.dec();\n    }\n\n    let id = TenantTimelineId {\n        tenant_id: connection_manager_state.timeline.tenant_shard_id.tenant_id,\n        timeline_id: connection_manager_state.timeline.timeline_id,\n    };\n\n    let mut timeline_state_updates = connection_manager_state\n        .timeline\n        .subscribe_for_state_updates();\n\n    let mut wait_lsn_status = connection_manager_state\n        .timeline\n        .subscribe_for_wait_lsn_updates();\n\n    // TODO: create a separate config option for discovery request interval\n    let discovery_request_interval = connection_manager_state.conf.lagging_wal_timeout;\n    let mut last_discovery_ts: Option<std::time::Instant> = None;\n\n    // Subscribe to the broker updates. Stream shares underlying TCP connection\n    // with other streams on this client (other connection managers). When\n    // object goes out of scope, stream finishes in drop() automatically.\n    let mut broker_subscription = subscribe_for_timeline_updates(broker_client, id, cancel).await?;\n    let mut broker_reset_interval = tokio::time::interval(tokio::time::Duration::from_secs(30));\n    debug!(\"Subscribed for broker timeline updates\");\n\n    loop {\n        let time_until_next_retry = connection_manager_state.time_until_next_retry();\n        let any_activity = connection_manager_state.wal_connection.is_some()\n            || !connection_manager_state.wal_stream_candidates.is_empty();\n\n        // These things are happening concurrently:\n        //\n        //  - cancellation request\n        //  - keep receiving WAL on the current connection\n        //      - if the shared state says we need to change connection, disconnect and return\n        //      - this runs in a separate task and we receive updates via a watch channel\n        //  - change connection if the rules decide so, or if the current connection dies\n        //  - receive updates from broker\n        //      - this might change the current desired connection\n        //  - timeline state changes to something that does not allow walreceiver to run concurrently\n        //  - if there's no connection and no candidates, try to send a discovery request\n\n        // NB: make sure each of the select expressions are cancellation-safe\n        // (no need for arms to be cancellation-safe).\n        tokio::select! {\n            _ = cancel.cancelled() => { return Err(Cancelled); }\n            Some(wal_connection_update) = async {\n                match connection_manager_state.wal_connection.as_mut() {\n                    Some(wal_connection) => Some(wal_connection.connection_task.next_task_event().await),\n                    None => None,\n                }\n            } => {\n                let wal_connection = connection_manager_state.wal_connection.as_mut()\n                    .expect(\"Should have a connection, as checked by the corresponding select! guard\");\n                match wal_connection_update {\n                    TaskEvent::Update(TaskStateUpdate::Started) => {},\n                    TaskEvent::Update(TaskStateUpdate::Progress(new_status)) => {\n                        if new_status.has_processed_wal {\n                            // We have advanced last_record_lsn by processing the WAL received\n                            // from this safekeeper. This is good enough to clean unsuccessful\n                            // retries history and allow reconnecting to this safekeeper without\n                            // sleeping for a long time.\n                            connection_manager_state.wal_connection_retries.remove(&wal_connection.sk_id);\n                        }\n                        wal_connection.status = new_status;\n                    }\n                    TaskEvent::End(walreceiver_task_result) => {\n                        match walreceiver_task_result {\n                            Ok(()) => debug!(\"WAL receiving task finished\"),\n                            Err(e) => error!(\"wal receiver task finished with an error: {e:?}\"),\n                        }\n                        connection_manager_state.drop_old_connection(false).await;\n                    },\n                }\n            },\n\n            // Got a new update from the broker\n            broker_update = broker_subscription.message() /* TODO: review cancellation-safety */ => {\n                match broker_update {\n                    Ok(Some(broker_update)) => {\n                        broker_reset_interval.reset();\n                        connection_manager_state.register_timeline_update(broker_update);\n                    },\n                    Err(status) => {\n                        match status.code() {\n                            Code::Unknown if status.message().contains(\"stream closed because of a broken pipe\") || status.message().contains(\"connection reset\") || status.message().contains(\"error reading a body from connection\") => {\n                                // tonic's error handling doesn't provide a clear code for disconnections: we get\n                                // \"h2 protocol error: error reading a body from connection: stream closed because of a broken pipe\"\n                                // => https://github.com/neondatabase/neon/issues/9562\n                                info!(\"broker disconnected: {status}\");\n                            },\n                            _ => {\n                                warn!(\"broker subscription failed: {status}\");\n                            }\n                        }\n                        return Ok(());\n                    }\n                    Ok(None) => {\n                        error!(\"broker subscription stream ended\"); // can't happen\n                        return Ok(());\n                    }\n                }\n            },\n\n            // If we've not received any updates from the broker from a while, are waiting for WAL\n            // and have no safekeeper connection or connection candidates, then it might be that\n            // the broker subscription is wedged. Drop the current subscription and re-subscribe\n            // with the goal of unblocking it.\n            _ = broker_reset_interval.tick() => {\n                let awaiting_lsn = wait_lsn_status.borrow().is_some();\n                let no_candidates = connection_manager_state.wal_stream_candidates.is_empty();\n                let no_connection = connection_manager_state.wal_connection.is_none();\n\n                if awaiting_lsn && no_candidates && no_connection {\n                    tracing::info!(\"No broker updates received for a while, but waiting for WAL. Re-setting stream ...\");\n                    broker_subscription = subscribe_for_timeline_updates(broker_client, id, cancel).await?;\n                }\n            },\n\n            new_event = async {\n                // Reminder: this match arm needs to be cancellation-safe.\n                loop {\n                    if connection_manager_state.timeline.current_state() == TimelineState::Loading {\n                        warn!(\"wal connection manager should only be launched after timeline has become active\");\n                    }\n                    match timeline_state_updates.changed().await {\n                        Ok(()) => {\n                            let new_state = connection_manager_state.timeline.current_state();\n                            match new_state {\n                                // we're already active as walreceiver, no need to reactivate\n                                TimelineState::Active => continue,\n                                TimelineState::Broken { .. } | TimelineState::Stopping => {\n                                    debug!(\"timeline entered terminal state {new_state:?}, stopping wal connection manager loop\");\n                                    return ControlFlow::Break(());\n                                }\n                                TimelineState::Loading => {\n                                    warn!(\"timeline transitioned back to Loading state, that should not happen\");\n                                    return ControlFlow::Continue(());\n                                }\n                            }\n                        }\n                        Err(_sender_dropped_error) => return ControlFlow::Break(()),\n                    }\n                }\n            } => match new_event {\n                ControlFlow::Continue(()) => {\n                    return Ok(());\n                }\n                ControlFlow::Break(()) => {\n                    debug!(\"Timeline is no longer active, stopping wal connection manager loop\");\n                    return Err(Cancelled);\n                }\n            },\n\n            Some(()) = async {\n                match time_until_next_retry {\n                    Some(sleep_time) => {\n                        tokio::time::sleep(sleep_time).await;\n                        Some(())\n                    },\n                    None => {\n                        debug!(\"No candidates to retry, waiting indefinitely for the broker events\");\n                        None\n                    }\n                }\n            } => debug!(\"Waking up for the next retry after waiting for {time_until_next_retry:?}\"),\n\n            Some(()) = async {\n                // Reminder: this match arm needs to be cancellation-safe.\n                // Calculating time needed to wait until sending the next discovery request.\n                // Current implementation is conservative and sends discovery requests only when there are no candidates.\n\n                if any_activity {\n                    // No need to send discovery requests if there is an active connection or candidates.\n                    return None;\n                }\n\n                // Waiting for an active wait_lsn request.\n                while wait_lsn_status.borrow().is_none() {\n                    if wait_lsn_status.changed().await.is_err() {\n                        // wait_lsn_status channel was closed, exiting\n                        warn!(\"wait_lsn_status channel was closed in connection_manager_loop_step\");\n                        return None;\n                    }\n                }\n\n                // All preconditions met, preparing to send a discovery request.\n                let now = std::time::Instant::now();\n                let next_discovery_ts = last_discovery_ts\n                    .map(|ts| ts + discovery_request_interval)\n                    .unwrap_or_else(|| now);\n\n                if next_discovery_ts > now {\n                    // Prevent sending discovery requests too frequently.\n                    tokio::time::sleep(next_discovery_ts - now).await;\n                }\n\n                let tenant_timeline_id = Some(ProtoTenantTimelineId {\n                    tenant_id: id.tenant_id.as_ref().to_owned(),\n                    timeline_id: id.timeline_id.as_ref().to_owned(),\n                });\n                let request = SafekeeperDiscoveryRequest { tenant_timeline_id };\n                let msg = TypedMessage {\n                    r#type: MessageType::SafekeeperDiscoveryRequest as i32,\n                    safekeeper_timeline_info: None,\n                    safekeeper_discovery_request: Some(request),\n                    safekeeper_discovery_response: None,\n                    };\n\n                last_discovery_ts = Some(std::time::Instant::now());\n                info!(\"No active connection and no candidates, sending discovery request to the broker\");\n\n                // Cancellation safety: we want to send a message to the broker, but publish_one()\n                // function can get cancelled by the other select! arm. This is absolutely fine, because\n                // we just want to receive broker updates and discovery is not important if we already\n                // receive updates.\n                //\n                // It is possible that `last_discovery_ts` will be updated, but the message will not be sent.\n                // This is totally fine because of the reason above.\n\n                // This is a fire-and-forget request, we don't care about the response\n                let _ = broker_client.publish_one(msg).await;\n                debug!(\"Discovery request sent to the broker\");\n                None\n            } => {}\n        }\n\n        if let Some(new_candidate) = connection_manager_state.next_connection_candidate() {\n            info!(\"Switching to new connection candidate: {new_candidate:?}\");\n            connection_manager_state\n                .change_connection(new_candidate, ctx)\n                .await\n        }\n        *manager_status.write().unwrap() = Some(connection_manager_state.manager_status());\n    }\n}\n\n/// Endlessly try to subscribe for broker updates for a given timeline.\nasync fn subscribe_for_timeline_updates(\n    broker_client: &mut BrokerClientChannel,\n    id: TenantTimelineId,\n    cancel: &CancellationToken,\n) -> Result<Streaming<TypedMessage>, Cancelled> {\n    let mut attempt = 0;\n    loop {\n        exponential_backoff(\n            attempt,\n            DEFAULT_BASE_BACKOFF_SECONDS,\n            DEFAULT_MAX_BACKOFF_SECONDS,\n            cancel,\n        )\n        .await;\n        attempt += 1;\n\n        // subscribe to the specific timeline\n        let request = SubscribeByFilterRequest {\n            types: vec![\n                TypeSubscription {\n                    r#type: MessageType::SafekeeperTimelineInfo as i32,\n                },\n                TypeSubscription {\n                    r#type: MessageType::SafekeeperDiscoveryResponse as i32,\n                },\n            ],\n            tenant_timeline_id: Some(FilterTenantTimelineId {\n                enabled: true,\n                tenant_timeline_id: Some(ProtoTenantTimelineId {\n                    tenant_id: id.tenant_id.as_ref().to_owned(),\n                    timeline_id: id.timeline_id.as_ref().to_owned(),\n                }),\n            }),\n        };\n\n        match {\n            tokio::select! {\n                r = broker_client.subscribe_by_filter(request) => { r }\n                _ = cancel.cancelled() => { return Err(Cancelled); }\n            }\n        } {\n            Ok(resp) => {\n                return Ok(resp.into_inner());\n            }\n            Err(e) => {\n                // Safekeeper nodes can stop pushing timeline updates to the broker, when no new writes happen and\n                // entire WAL is streamed. Keep this noticeable with logging, but do not warn/error.\n                info!(\n                    \"Attempt #{attempt}, failed to subscribe for timeline {id} updates in broker: {e:#}\"\n                );\n                continue;\n            }\n        }\n    }\n}\n\nconst WALCONNECTION_RETRY_MIN_BACKOFF_SECONDS: f64 = 0.1;\nconst WALCONNECTION_RETRY_MAX_BACKOFF_SECONDS: f64 = 15.0;\nconst WALCONNECTION_RETRY_BACKOFF_MULTIPLIER: f64 = 1.5;\n\n/// All data that's needed to run endless broker loop and keep the WAL streaming connection alive, if possible.\npub(super) struct ConnectionManagerState {\n    id: TenantTimelineId,\n    /// Use pageserver data about the timeline to filter out some of the safekeepers.\n    timeline: Arc<Timeline>,\n    /// Child token of [`super::WalReceiver::cancel`], inherited to all tasks we spawn.\n    cancel: CancellationToken,\n    conf: WalReceiverConf,\n    /// Current connection to safekeeper for WAL streaming.\n    wal_connection: Option<WalConnection>,\n    /// Info about retries and unsuccessful attempts to connect to safekeepers.\n    wal_connection_retries: HashMap<NodeId, RetryInfo>,\n    /// Data about all timelines, available for connection, fetched from storage broker, grouped by their corresponding safekeeper node id.\n    wal_stream_candidates: HashMap<NodeId, BrokerSkTimeline>,\n}\n\n/// An information about connection manager's current connection and connection candidates.\n#[derive(Debug, Clone)]\npub struct ConnectionManagerStatus {\n    existing_connection: Option<WalConnectionStatus>,\n    wal_stream_candidates: HashMap<NodeId, BrokerSkTimeline>,\n}\n\nimpl ConnectionManagerStatus {\n    /// Generates a string, describing current connection status in a form, suitable for logging.\n    pub fn to_human_readable_string(&self) -> String {\n        let mut resulting_string = String::new();\n        match &self.existing_connection {\n            Some(connection) => {\n                if connection.has_processed_wal {\n                    resulting_string.push_str(&format!(\n                        \" (update {}): streaming WAL from node {}, \",\n                        connection.latest_wal_update.format(\"%Y-%m-%d %H:%M:%S\"),\n                        connection.node,\n                    ));\n\n                    match (connection.streaming_lsn, connection.commit_lsn) {\n                        (None, None) => resulting_string.push_str(\"no streaming data\"),\n                        (None, Some(commit_lsn)) => {\n                            resulting_string.push_str(&format!(\"commit Lsn: {commit_lsn}\"))\n                        }\n                        (Some(streaming_lsn), None) => {\n                            resulting_string.push_str(&format!(\"streaming Lsn: {streaming_lsn}\"))\n                        }\n                        (Some(streaming_lsn), Some(commit_lsn)) => resulting_string.push_str(\n                            &format!(\"commit|streaming Lsn: {commit_lsn}|{streaming_lsn}\"),\n                        ),\n                    }\n                } else if connection.is_connected {\n                    resulting_string.push_str(&format!(\n                        \" (update {}): connecting to node {}\",\n                        connection\n                            .latest_connection_update\n                            .format(\"%Y-%m-%d %H:%M:%S\"),\n                        connection.node,\n                    ));\n                } else {\n                    resulting_string.push_str(&format!(\n                        \" (update {}): initializing node {} connection\",\n                        connection\n                            .latest_connection_update\n                            .format(\"%Y-%m-%d %H:%M:%S\"),\n                        connection.node,\n                    ));\n                }\n            }\n            None => resulting_string.push_str(\": disconnected\"),\n        }\n\n        resulting_string.push_str(\", safekeeper candidates (id|update_time|commit_lsn): [\");\n        let mut candidates = self.wal_stream_candidates.iter().peekable();\n        while let Some((node_id, candidate_info)) = candidates.next() {\n            resulting_string.push_str(&format!(\n                \"({}|{}|{})\",\n                node_id,\n                candidate_info.latest_update.format(\"%H:%M:%S\"),\n                Lsn(candidate_info.timeline.commit_lsn)\n            ));\n            if candidates.peek().is_some() {\n                resulting_string.push_str(\", \");\n            }\n        }\n        resulting_string.push(']');\n\n        resulting_string\n    }\n}\n\n/// Current connection data.\n#[derive(Debug)]\nstruct WalConnection {\n    /// Time when the connection was initiated.\n    started_at: NaiveDateTime,\n    /// Current safekeeper pageserver is connected to for WAL streaming.\n    sk_id: NodeId,\n    /// Availability zone of the safekeeper.\n    availability_zone: Option<String>,\n    /// Status of the connection.\n    status: WalConnectionStatus,\n    /// WAL streaming task handle.\n    connection_task: TaskHandle<WalConnectionStatus>,\n    /// Have we discovered that other safekeeper has more recent WAL than we do?\n    discovered_new_wal: Option<NewCommittedWAL>,\n}\n\n/// Notion of a new committed WAL, which exists on other safekeeper.\n#[derive(Debug, Clone, Copy)]\nstruct NewCommittedWAL {\n    /// LSN of the new committed WAL.\n    lsn: Lsn,\n    /// When we discovered that the new committed WAL exists on other safekeeper.\n    discovered_at: NaiveDateTime,\n}\n\n#[derive(Debug, Clone, Copy)]\nstruct RetryInfo {\n    next_retry_at: Option<NaiveDateTime>,\n    retry_duration_seconds: f64,\n}\n\n/// Data about the timeline to connect to, received from the broker.\n#[derive(Debug, Clone)]\nstruct BrokerSkTimeline {\n    timeline: SafekeeperDiscoveryResponse,\n    /// Time at which the data was fetched from the broker last time, to track the stale data.\n    latest_update: NaiveDateTime,\n}\n\nimpl ConnectionManagerState {\n    pub(super) fn new(\n        timeline: Arc<Timeline>,\n        conf: WalReceiverConf,\n        cancel: CancellationToken,\n    ) -> Self {\n        let id = TenantTimelineId {\n            tenant_id: timeline.tenant_shard_id.tenant_id,\n            timeline_id: timeline.timeline_id,\n        };\n        Self {\n            id,\n            timeline,\n            cancel,\n            conf,\n            wal_connection: None,\n            wal_stream_candidates: HashMap::new(),\n            wal_connection_retries: HashMap::new(),\n        }\n    }\n\n    fn spawn<Fut>(\n        &self,\n        task: impl FnOnce(\n            tokio::sync::watch::Sender<TaskStateUpdate<WalConnectionStatus>>,\n            CancellationToken,\n        ) -> Fut\n        + Send\n        + 'static,\n    ) -> TaskHandle<WalConnectionStatus>\n    where\n        Fut: std::future::Future<Output = anyhow::Result<()>> + Send,\n    {\n        // TODO: get rid of TaskHandle\n        super::TaskHandle::spawn(&self.cancel, task)\n    }\n\n    /// Shuts down the current connection (if any) and immediately starts another one with the given connection string.\n    async fn change_connection(&mut self, new_sk: NewWalConnectionCandidate, ctx: &RequestContext) {\n        WALRECEIVER_SWITCHES\n            .with_label_values(&[new_sk.reason.name()])\n            .inc();\n\n        self.drop_old_connection(true).await;\n\n        let node_id = new_sk.safekeeper_id;\n        let connect_timeout = self.conf.wal_connect_timeout;\n        let ingest_batch_size = self.conf.ingest_batch_size;\n        let protocol = self.conf.protocol;\n        let validate_wal_contiguity = self.conf.validate_wal_contiguity;\n        let timeline = Arc::clone(&self.timeline);\n        let ctx = ctx.detached_child(\n            TaskKind::WalReceiverConnectionHandler,\n            DownloadBehavior::Download,\n        );\n\n        let span = info_span!(\"connection\", %node_id);\n        let connection_handle = self.spawn(move |events_sender, cancellation| {\n            async move {\n                debug_assert_current_span_has_tenant_and_timeline_id();\n\n                let res = super::walreceiver_connection::handle_walreceiver_connection(\n                    timeline,\n                    protocol,\n                    new_sk.wal_source_connconf,\n                    events_sender,\n                    cancellation.clone(),\n                    connect_timeout,\n                    ctx,\n                    node_id,\n                    ingest_batch_size,\n                    validate_wal_contiguity,\n                )\n                .await;\n\n                match res {\n                    Ok(()) => Ok(()),\n                    Err(e) => {\n                        match e {\n                            WalReceiverError::SuccessfulCompletion(msg) => {\n                                info!(\"walreceiver connection handling ended with success: {msg}\");\n                                Ok(())\n                            }\n                            WalReceiverError::ExpectedSafekeeperError(e) => {\n                                info!(\"walreceiver connection handling ended: {e}\");\n                                Ok(())\n                            }\n                            WalReceiverError::ClosedGate => {\n                                info!(\n                                    \"walreceiver connection handling ended because of closed gate\"\n                                );\n                                Ok(())\n                            }\n                            WalReceiverError::Cancelled => Ok(()),\n                            WalReceiverError::Other(e) => {\n                                // give out an error to have task_mgr give it a really verbose logging\n                                if cancellation.is_cancelled() {\n                                    // Ideally we would learn about this via some path other than Other, but\n                                    // that requires refactoring all the intermediate layers of ingest code\n                                    // that only emit anyhow::Error\n                                    Ok(())\n                                } else {\n                                    Err(e).context(\"walreceiver connection handling failure\")\n                                }\n                            }\n                        }\n                    }\n                }\n            }\n            .instrument(span)\n        });\n\n        let now = Utc::now().naive_utc();\n        self.wal_connection = Some(WalConnection {\n            started_at: now,\n            sk_id: new_sk.safekeeper_id,\n            availability_zone: new_sk.availability_zone,\n            status: WalConnectionStatus {\n                is_connected: false,\n                has_processed_wal: false,\n                latest_connection_update: now,\n                latest_wal_update: now,\n                streaming_lsn: None,\n                commit_lsn: None,\n                node: node_id,\n            },\n            connection_task: connection_handle,\n            discovered_new_wal: None,\n        });\n    }\n\n    /// Drops the current connection (if any) and updates retry timeout for the next\n    /// connection attempt to the same safekeeper.\n    ///\n    /// # Cancel-Safety\n    ///\n    /// Not cancellation-safe.\n    async fn drop_old_connection(&mut self, needs_shutdown: bool) {\n        let wal_connection = match self.wal_connection.take() {\n            Some(wal_connection) => wal_connection,\n            None => return,\n        };\n\n        if needs_shutdown {\n            wal_connection\n                .connection_task\n                .shutdown()\n                // This here is why this function isn't cancellation-safe.\n                // If we got cancelled here, then self.wal_connection is already None and we lose track of the task.\n                // Even if our caller diligently calls Self::shutdown(), it will find a self.wal_connection=None\n                // and thus be ineffective.\n                .await;\n        }\n\n        let retry = self\n            .wal_connection_retries\n            .entry(wal_connection.sk_id)\n            .or_insert(RetryInfo {\n                next_retry_at: None,\n                retry_duration_seconds: WALCONNECTION_RETRY_MIN_BACKOFF_SECONDS,\n            });\n\n        let now = Utc::now().naive_utc();\n\n        // Schedule the next retry attempt. We want to have exponential backoff for connection attempts,\n        // and we add backoff to the time when we started the connection attempt. If the connection\n        // was active for a long time, then next_retry_at will be in the past.\n        retry.next_retry_at =\n            wal_connection\n                .started_at\n                .checked_add_signed(chrono::Duration::milliseconds(\n                    (retry.retry_duration_seconds * 1000.0) as i64,\n                ));\n\n        if let Some(next) = &retry.next_retry_at {\n            if next > &now {\n                info!(\n                    \"Next connection retry to {:?} is at {}\",\n                    wal_connection.sk_id, next\n                );\n            }\n        }\n\n        let next_retry_duration =\n            retry.retry_duration_seconds * WALCONNECTION_RETRY_BACKOFF_MULTIPLIER;\n        // Clamp the next retry duration to the maximum allowed.\n        let next_retry_duration = next_retry_duration.min(WALCONNECTION_RETRY_MAX_BACKOFF_SECONDS);\n        // Clamp the next retry duration to the minimum allowed.\n        let next_retry_duration = next_retry_duration.max(WALCONNECTION_RETRY_MIN_BACKOFF_SECONDS);\n\n        retry.retry_duration_seconds = next_retry_duration;\n    }\n\n    /// Returns time needed to wait to have a new candidate for WAL streaming.\n    fn time_until_next_retry(&self) -> Option<Duration> {\n        let now = Utc::now().naive_utc();\n\n        let next_retry_at = self\n            .wal_connection_retries\n            .values()\n            .filter_map(|retry| retry.next_retry_at)\n            .filter(|next_retry_at| next_retry_at > &now)\n            .min()?;\n\n        (next_retry_at - now).to_std().ok()\n    }\n\n    /// Adds another broker timeline into the state, if its more recent than the one already added there for the same key.\n    fn register_timeline_update(&mut self, typed_msg: TypedMessage) {\n        let mut is_discovery = false;\n        let timeline_update = match typed_msg.r#type() {\n            MessageType::SafekeeperTimelineInfo => {\n                let info = match typed_msg.safekeeper_timeline_info {\n                    Some(info) => info,\n                    None => {\n                        warn!(\"bad proto message from broker: no safekeeper_timeline_info\");\n                        return;\n                    }\n                };\n                SafekeeperDiscoveryResponse {\n                    safekeeper_id: info.safekeeper_id,\n                    tenant_timeline_id: info.tenant_timeline_id,\n                    commit_lsn: info.commit_lsn,\n                    safekeeper_connstr: info.safekeeper_connstr,\n                    availability_zone: info.availability_zone,\n                    standby_horizon: info.standby_horizon,\n                }\n            }\n            MessageType::SafekeeperDiscoveryResponse => {\n                is_discovery = true;\n                match typed_msg.safekeeper_discovery_response {\n                    Some(response) => response,\n                    None => {\n                        warn!(\"bad proto message from broker: no safekeeper_discovery_response\");\n                        return;\n                    }\n                }\n            }\n            _ => {\n                // unexpected message\n                return;\n            }\n        };\n\n        WALRECEIVER_BROKER_UPDATES.inc();\n\n        trace!(\n            \"safekeeper info update: standby_horizon(cutoff)={}\",\n            timeline_update.standby_horizon\n        );\n        if timeline_update.standby_horizon != 0 {\n            // ignore reports from safekeepers not connected to replicas\n            self.timeline\n                .standby_horizon\n                .store(Lsn(timeline_update.standby_horizon));\n            self.timeline\n                .metrics\n                .standby_horizon_gauge\n                .set(timeline_update.standby_horizon as i64);\n        }\n\n        let new_safekeeper_id = NodeId(timeline_update.safekeeper_id);\n        let old_entry = self.wal_stream_candidates.insert(\n            new_safekeeper_id,\n            BrokerSkTimeline {\n                timeline: timeline_update,\n                latest_update: Utc::now().naive_utc(),\n            },\n        );\n\n        if old_entry.is_none() {\n            info!(\n                ?is_discovery,\n                %new_safekeeper_id,\n                \"New SK node was added\",\n            );\n            WALRECEIVER_CANDIDATES_ADDED.inc();\n        }\n    }\n\n    /// Cleans up stale broker records and checks the rest for the new connection candidate.\n    /// Returns a new candidate, if the current state is absent or somewhat lagging, `None` otherwise.\n    /// The current rules for approving new candidates:\n    /// * pick a candidate different from the connected safekeeper with biggest `commit_lsn` and lowest failed connection attemps\n    /// * if there's no such entry, no new candidate found, abort\n    /// * otherwise check if the candidate is much better than the current one\n    ///\n    /// To understand exact rules for determining if the candidate is better than the current one, refer to this function's implementation.\n    /// General rules are following:\n    /// * if connected safekeeper is not present, pick the candidate\n    /// * if we haven't received any updates for some time, pick the candidate\n    /// * if the candidate commit_lsn is much higher than the current one, pick the candidate\n    /// * if the candidate commit_lsn is same, but candidate is located in the same AZ as the pageserver, pick the candidate\n    /// * if connected safekeeper stopped sending us new WAL which is available on other safekeeper, pick the candidate\n    ///\n    /// This way we ensure to keep up with the most up-to-date safekeeper and don't try to jump from one safekeeper to another too frequently.\n    /// Both thresholds are configured per tenant.\n    fn next_connection_candidate(&mut self) -> Option<NewWalConnectionCandidate> {\n        self.cleanup_old_candidates();\n\n        match &self.wal_connection {\n            Some(existing_wal_connection) => {\n                let connected_sk_node = existing_wal_connection.sk_id;\n\n                let (new_sk_id, new_safekeeper_broker_data, new_wal_source_connconf) =\n                    self.select_connection_candidate(Some(connected_sk_node))?;\n                let new_availability_zone = new_safekeeper_broker_data.availability_zone.clone();\n\n                let now = Utc::now().naive_utc();\n                if let Ok(latest_interaciton) =\n                    (now - existing_wal_connection.status.latest_connection_update).to_std()\n                {\n                    // Drop connection if we haven't received keepalive message for a while.\n                    if latest_interaciton > self.conf.wal_connect_timeout {\n                        return Some(NewWalConnectionCandidate {\n                            safekeeper_id: new_sk_id,\n                            wal_source_connconf: new_wal_source_connconf,\n                            availability_zone: new_availability_zone,\n                            reason: ReconnectReason::NoKeepAlives {\n                                last_keep_alive: Some(\n                                    existing_wal_connection.status.latest_connection_update,\n                                ),\n                                check_time: now,\n                                threshold: self.conf.wal_connect_timeout,\n                            },\n                        });\n                    }\n                }\n\n                if !existing_wal_connection.status.is_connected {\n                    // We haven't connected yet and we shouldn't switch until connection timeout (condition above).\n                    return None;\n                }\n\n                if let Some(current_commit_lsn) = existing_wal_connection.status.commit_lsn {\n                    let new_commit_lsn = Lsn(new_safekeeper_broker_data.commit_lsn);\n                    // Check if the new candidate has much more WAL than the current one.\n                    match new_commit_lsn.0.checked_sub(current_commit_lsn.0) {\n                        Some(new_sk_lsn_advantage) => {\n                            if new_sk_lsn_advantage >= self.conf.max_lsn_wal_lag.get() {\n                                return Some(NewWalConnectionCandidate {\n                                    safekeeper_id: new_sk_id,\n                                    wal_source_connconf: new_wal_source_connconf,\n                                    availability_zone: new_availability_zone,\n                                    reason: ReconnectReason::LaggingWal {\n                                        current_commit_lsn,\n                                        new_commit_lsn,\n                                        threshold: self.conf.max_lsn_wal_lag,\n                                    },\n                                });\n                            }\n                            // If we have a candidate with the same commit_lsn as the current one, which is in the same AZ as pageserver,\n                            // and the current one is not, switch to the new one.\n                            if self.conf.availability_zone.is_some()\n                                && existing_wal_connection.availability_zone\n                                    != self.conf.availability_zone\n                                && self.conf.availability_zone == new_availability_zone\n                            {\n                                return Some(NewWalConnectionCandidate {\n                                    safekeeper_id: new_sk_id,\n                                    availability_zone: new_availability_zone,\n                                    wal_source_connconf: new_wal_source_connconf,\n                                    reason: ReconnectReason::SwitchAvailabilityZone,\n                                });\n                            }\n                        }\n                        None => debug!(\n                            \"Best SK candidate has its commit_lsn behind connected SK's commit_lsn\"\n                        ),\n                    }\n                }\n\n                let current_lsn = match existing_wal_connection.status.streaming_lsn {\n                    Some(lsn) => lsn,\n                    None => self.timeline.get_last_record_lsn(),\n                };\n                let current_commit_lsn = existing_wal_connection\n                    .status\n                    .commit_lsn\n                    .unwrap_or(current_lsn);\n                let candidate_commit_lsn = Lsn(new_safekeeper_broker_data.commit_lsn);\n\n                // Keep discovered_new_wal only if connected safekeeper has not caught up yet.\n                let mut discovered_new_wal = existing_wal_connection\n                    .discovered_new_wal\n                    .filter(|new_wal| new_wal.lsn > current_commit_lsn);\n\n                if discovered_new_wal.is_none() {\n                    // Check if the new candidate has more WAL than the current one.\n                    // If the new candidate has more WAL than the current one, we consider switching to the new candidate.\n                    discovered_new_wal = if candidate_commit_lsn > current_commit_lsn {\n                        trace!(\n                            \"New candidate has commit_lsn {}, higher than current_commit_lsn {}\",\n                            candidate_commit_lsn, current_commit_lsn\n                        );\n                        Some(NewCommittedWAL {\n                            lsn: candidate_commit_lsn,\n                            discovered_at: Utc::now().naive_utc(),\n                        })\n                    } else {\n                        None\n                    };\n                }\n\n                let waiting_for_new_lsn_since = if current_lsn < current_commit_lsn {\n                    // Connected safekeeper has more WAL, but we haven't received updates for some time.\n                    trace!(\n                        \"Connected safekeeper has more WAL, but we haven't received updates for {:?}. current_lsn: {}, current_commit_lsn: {}\",\n                        (now - existing_wal_connection.status.latest_wal_update).to_std(),\n                        current_lsn,\n                        current_commit_lsn\n                    );\n                    Some(existing_wal_connection.status.latest_wal_update)\n                } else {\n                    discovered_new_wal.as_ref().map(|new_wal| {\n                        // We know that new WAL is available on other safekeeper, but connected safekeeper don't have it.\n                        new_wal\n                            .discovered_at\n                            .max(existing_wal_connection.status.latest_wal_update)\n                    })\n                };\n\n                // If we haven't received any WAL updates for a while and candidate has more WAL, switch to it.\n                if let Some(waiting_for_new_lsn_since) = waiting_for_new_lsn_since {\n                    if let Ok(waiting_for_new_wal) = (now - waiting_for_new_lsn_since).to_std() {\n                        if candidate_commit_lsn > current_commit_lsn\n                            && waiting_for_new_wal > self.conf.lagging_wal_timeout\n                        {\n                            return Some(NewWalConnectionCandidate {\n                                safekeeper_id: new_sk_id,\n                                wal_source_connconf: new_wal_source_connconf,\n                                availability_zone: new_availability_zone,\n                                reason: ReconnectReason::NoWalTimeout {\n                                    current_lsn,\n                                    current_commit_lsn,\n                                    candidate_commit_lsn,\n                                    last_wal_interaction: Some(\n                                        existing_wal_connection.status.latest_wal_update,\n                                    ),\n                                    check_time: now,\n                                    threshold: self.conf.lagging_wal_timeout,\n                                },\n                            });\n                        }\n                    }\n                }\n\n                self.wal_connection.as_mut().unwrap().discovered_new_wal = discovered_new_wal;\n            }\n            None => {\n                let (new_sk_id, new_safekeeper_broker_data, new_wal_source_connconf) =\n                    self.select_connection_candidate(None)?;\n                return Some(NewWalConnectionCandidate {\n                    safekeeper_id: new_sk_id,\n                    availability_zone: new_safekeeper_broker_data.availability_zone.clone(),\n                    wal_source_connconf: new_wal_source_connconf,\n                    reason: ReconnectReason::NoExistingConnection,\n                });\n            }\n        }\n\n        None\n    }\n\n    /// Selects the best possible candidate, based on the data collected from the broker updates about the safekeepers.\n    /// Optionally, omits the given node, to support gracefully switching from a healthy safekeeper to another.\n    ///\n    /// The candidate that is chosen:\n    /// * has no pending retry cooldown\n    /// * has greatest commit_lsn among the ones that are left\n    fn select_connection_candidate(\n        &self,\n        node_to_omit: Option<NodeId>,\n    ) -> Option<(NodeId, &SafekeeperDiscoveryResponse, PgConnectionConfig)> {\n        self.applicable_connection_candidates()\n            .filter(|&(sk_id, _, _)| Some(sk_id) != node_to_omit)\n            .max_by_key(|(_, info, _)| info.commit_lsn)\n    }\n\n    /// Returns a list of safekeepers that have valid info and ready for connection.\n    /// Some safekeepers are filtered by the retry cooldown.\n    fn applicable_connection_candidates(\n        &self,\n    ) -> impl Iterator<Item = (NodeId, &SafekeeperDiscoveryResponse, PgConnectionConfig)> {\n        let now = Utc::now().naive_utc();\n\n        self.wal_stream_candidates\n            .iter()\n            .filter(|(_, info)| Lsn(info.timeline.commit_lsn) != Lsn::INVALID)\n            .filter(move |(sk_id, _)| {\n                let next_retry_at = self\n                    .wal_connection_retries\n                    .get(sk_id)\n                    .and_then(|retry_info| {\n                        retry_info.next_retry_at\n                    });\n\n                next_retry_at.is_none() || next_retry_at.unwrap() <= now\n            }).filter_map(|(sk_id, broker_info)| {\n                let info = &broker_info.timeline;\n                if info.safekeeper_connstr.is_empty() {\n                    return None; // no connection string, ignore sk\n                }\n\n                let shard_identity = self.timeline.get_shard_identity();\n                let (shard_number, shard_count, shard_stripe_size) = (\n                    Some(shard_identity.number.0),\n                    Some(shard_identity.count.0),\n                    Some(shard_identity.stripe_size.0),\n                );\n\n                let connection_conf_args = ConnectionConfigArgs {\n                    protocol: self.conf.protocol,\n                    ttid: self.id,\n                    shard_number,\n                    shard_count,\n                    shard_stripe_size,\n                    listen_pg_addr_str: info.safekeeper_connstr.as_ref(),\n                    auth_token: self.conf.auth_token.as_ref().map(|t| t.as_str()),\n                    availability_zone: self.conf.availability_zone.as_deref()\n                };\n\n                match wal_stream_connection_config(connection_conf_args) {\n                    Ok(connstr) => Some((*sk_id, info, connstr)),\n                    Err(e) => {\n                        error!(\"Failed to create wal receiver connection string from broker data of safekeeper node {}: {e:#}\", sk_id);\n                        None\n                    }\n                }\n            })\n    }\n\n    /// Remove candidates which haven't sent broker updates for a while.\n    fn cleanup_old_candidates(&mut self) {\n        let mut node_ids_to_remove = Vec::with_capacity(self.wal_stream_candidates.len());\n        let lagging_wal_timeout = self.conf.lagging_wal_timeout;\n\n        self.wal_stream_candidates.retain(|node_id, broker_info| {\n            if let Ok(time_since_latest_broker_update) =\n                (Utc::now().naive_utc() - broker_info.latest_update).to_std()\n            {\n                let should_retain = time_since_latest_broker_update < lagging_wal_timeout;\n                if !should_retain {\n                    node_ids_to_remove.push(*node_id);\n                }\n                should_retain\n            } else {\n                true\n            }\n        });\n\n        if !node_ids_to_remove.is_empty() {\n            for node_id in node_ids_to_remove {\n                info!(\n                    \"Safekeeper node {node_id} did not send events for over {lagging_wal_timeout:?}, not retrying the connections\"\n                );\n                self.wal_connection_retries.remove(&node_id);\n                WALRECEIVER_CANDIDATES_REMOVED.inc();\n            }\n        }\n    }\n\n    /// # Cancel-Safety\n    ///\n    /// Not cancellation-safe.\n    pub(super) async fn shutdown(mut self) {\n        if let Some(wal_connection) = self.wal_connection.take() {\n            wal_connection.connection_task.shutdown().await;\n        }\n    }\n\n    fn manager_status(&self) -> ConnectionManagerStatus {\n        ConnectionManagerStatus {\n            existing_connection: self.wal_connection.as_ref().map(|conn| conn.status),\n            wal_stream_candidates: self.wal_stream_candidates.clone(),\n        }\n    }\n}\n\n#[derive(Debug)]\nstruct NewWalConnectionCandidate {\n    safekeeper_id: NodeId,\n    wal_source_connconf: PgConnectionConfig,\n    availability_zone: Option<String>,\n    reason: ReconnectReason,\n}\n\n/// Stores the reason why WAL connection was switched, for furter debugging purposes.\n#[derive(Debug, PartialEq, Eq)]\nenum ReconnectReason {\n    NoExistingConnection,\n    LaggingWal {\n        current_commit_lsn: Lsn,\n        new_commit_lsn: Lsn,\n        threshold: NonZeroU64,\n    },\n    SwitchAvailabilityZone,\n    NoWalTimeout {\n        current_lsn: Lsn,\n        current_commit_lsn: Lsn,\n        candidate_commit_lsn: Lsn,\n        last_wal_interaction: Option<NaiveDateTime>,\n        check_time: NaiveDateTime,\n        threshold: Duration,\n    },\n    NoKeepAlives {\n        last_keep_alive: Option<NaiveDateTime>,\n        check_time: NaiveDateTime,\n        threshold: Duration,\n    },\n}\n\nimpl ReconnectReason {\n    fn name(&self) -> &str {\n        match self {\n            ReconnectReason::NoExistingConnection => \"NoExistingConnection\",\n            ReconnectReason::LaggingWal { .. } => \"LaggingWal\",\n            ReconnectReason::SwitchAvailabilityZone => \"SwitchAvailabilityZone\",\n            ReconnectReason::NoWalTimeout { .. } => \"NoWalTimeout\",\n            ReconnectReason::NoKeepAlives { .. } => \"NoKeepAlives\",\n        }\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use url::Host;\n    use utils::postgres_client::PostgresClientProtocol;\n\n    use super::*;\n    use crate::tenant::harness::{TIMELINE_ID, TenantHarness};\n\n    fn dummy_broker_sk_timeline(\n        commit_lsn: u64,\n        safekeeper_connstr: &str,\n        latest_update: NaiveDateTime,\n    ) -> BrokerSkTimeline {\n        BrokerSkTimeline {\n            timeline: SafekeeperDiscoveryResponse {\n                safekeeper_id: 0,\n                tenant_timeline_id: None,\n                commit_lsn,\n                safekeeper_connstr: safekeeper_connstr.to_owned(),\n                availability_zone: None,\n                standby_horizon: 0,\n            },\n            latest_update,\n        }\n    }\n\n    #[tokio::test]\n    async fn no_connection_no_candidate() -> anyhow::Result<()> {\n        let harness = TenantHarness::create(\"no_connection_no_candidate\").await?;\n        let mut state = dummy_state(&harness).await;\n        let now = Utc::now().naive_utc();\n\n        let lagging_wal_timeout = chrono::Duration::from_std(state.conf.lagging_wal_timeout)?;\n        let delay_over_threshold = now - lagging_wal_timeout - lagging_wal_timeout;\n\n        state.wal_connection = None;\n        state.wal_stream_candidates = HashMap::from([\n            (NodeId(0), dummy_broker_sk_timeline(1, \"\", now)),\n            (NodeId(1), dummy_broker_sk_timeline(0, \"no_commit_lsn\", now)),\n            (NodeId(2), dummy_broker_sk_timeline(0, \"no_commit_lsn\", now)),\n            (\n                NodeId(3),\n                dummy_broker_sk_timeline(\n                    1 + state.conf.max_lsn_wal_lag.get(),\n                    \"delay_over_threshold\",\n                    delay_over_threshold,\n                ),\n            ),\n        ]);\n\n        let no_candidate = state.next_connection_candidate();\n        assert!(\n            no_candidate.is_none(),\n            \"Expected no candidate selected out of non full data options, but got {no_candidate:?}\"\n        );\n\n        Ok(())\n    }\n\n    #[tokio::test]\n    async fn connection_no_candidate() -> anyhow::Result<()> {\n        let harness = TenantHarness::create(\"connection_no_candidate\").await?;\n        let mut state = dummy_state(&harness).await;\n        let now = Utc::now().naive_utc();\n\n        let connected_sk_id = NodeId(0);\n        let current_lsn = 100_000;\n\n        let connection_status = WalConnectionStatus {\n            is_connected: true,\n            has_processed_wal: true,\n            latest_connection_update: now,\n            latest_wal_update: now,\n            commit_lsn: Some(Lsn(current_lsn)),\n            streaming_lsn: Some(Lsn(current_lsn)),\n            node: NodeId(1),\n        };\n\n        state.conf.max_lsn_wal_lag = NonZeroU64::new(100).unwrap();\n        state.wal_connection = Some(WalConnection {\n            started_at: now,\n            sk_id: connected_sk_id,\n            availability_zone: None,\n            status: connection_status,\n            connection_task: state.spawn(move |sender, _| async move {\n                sender\n                    .send(TaskStateUpdate::Progress(connection_status))\n                    .ok();\n                Ok(())\n            }),\n            discovered_new_wal: None,\n        });\n        state.wal_stream_candidates = HashMap::from([\n            (\n                connected_sk_id,\n                dummy_broker_sk_timeline(\n                    current_lsn + state.conf.max_lsn_wal_lag.get() * 2,\n                    DUMMY_SAFEKEEPER_HOST,\n                    now,\n                ),\n            ),\n            (\n                NodeId(1),\n                dummy_broker_sk_timeline(current_lsn, \"not_advanced_lsn\", now),\n            ),\n            (\n                NodeId(2),\n                dummy_broker_sk_timeline(\n                    current_lsn + state.conf.max_lsn_wal_lag.get() / 2,\n                    \"not_enough_advanced_lsn\",\n                    now,\n                ),\n            ),\n        ]);\n\n        let no_candidate = state.next_connection_candidate();\n        assert!(\n            no_candidate.is_none(),\n            \"Expected no candidate selected out of valid options since candidate Lsn data is ignored and others' was not advanced enough, but got {no_candidate:?}\"\n        );\n\n        Ok(())\n    }\n\n    #[tokio::test]\n    async fn no_connection_candidate() -> anyhow::Result<()> {\n        let harness = TenantHarness::create(\"no_connection_candidate\").await?;\n        let mut state = dummy_state(&harness).await;\n        let now = Utc::now().naive_utc();\n\n        state.wal_connection = None;\n        state.wal_stream_candidates = HashMap::from([(\n            NodeId(0),\n            dummy_broker_sk_timeline(\n                1 + state.conf.max_lsn_wal_lag.get(),\n                DUMMY_SAFEKEEPER_HOST,\n                now,\n            ),\n        )]);\n\n        let only_candidate = state\n            .next_connection_candidate()\n            .expect(\"Expected one candidate selected out of the only data option, but got none\");\n        assert_eq!(only_candidate.safekeeper_id, NodeId(0));\n        assert_eq!(\n            only_candidate.reason,\n            ReconnectReason::NoExistingConnection,\n            \"Should select new safekeeper due to missing connection, even if there's also a lag in the wal over the threshold\"\n        );\n        assert_eq!(\n            only_candidate.wal_source_connconf.host(),\n            &Host::Domain(DUMMY_SAFEKEEPER_HOST.to_owned())\n        );\n\n        let selected_lsn = 100_000;\n        state.wal_stream_candidates = HashMap::from([\n            (\n                NodeId(0),\n                dummy_broker_sk_timeline(selected_lsn - 100, \"smaller_commit_lsn\", now),\n            ),\n            (\n                NodeId(1),\n                dummy_broker_sk_timeline(selected_lsn, DUMMY_SAFEKEEPER_HOST, now),\n            ),\n            (\n                NodeId(2),\n                dummy_broker_sk_timeline(selected_lsn + 100, \"\", now),\n            ),\n        ]);\n        let biggest_wal_candidate = state.next_connection_candidate().expect(\n            \"Expected one candidate selected out of multiple valid data options, but got none\",\n        );\n\n        assert_eq!(biggest_wal_candidate.safekeeper_id, NodeId(1));\n        assert_eq!(\n            biggest_wal_candidate.reason,\n            ReconnectReason::NoExistingConnection,\n            \"Should select new safekeeper due to missing connection, even if there's also a lag in the wal over the threshold\"\n        );\n        assert_eq!(\n            biggest_wal_candidate.wal_source_connconf.host(),\n            &Host::Domain(DUMMY_SAFEKEEPER_HOST.to_owned())\n        );\n\n        Ok(())\n    }\n\n    #[tokio::test]\n    async fn candidate_with_many_connection_failures() -> anyhow::Result<()> {\n        let harness = TenantHarness::create(\"candidate_with_many_connection_failures\").await?;\n        let mut state = dummy_state(&harness).await;\n        let now = Utc::now().naive_utc();\n\n        let current_lsn = Lsn(100_000).align();\n        let bigger_lsn = Lsn(current_lsn.0 + 100).align();\n\n        state.wal_connection = None;\n        state.wal_stream_candidates = HashMap::from([\n            (\n                NodeId(0),\n                dummy_broker_sk_timeline(bigger_lsn.0, DUMMY_SAFEKEEPER_HOST, now),\n            ),\n            (\n                NodeId(1),\n                dummy_broker_sk_timeline(current_lsn.0, DUMMY_SAFEKEEPER_HOST, now),\n            ),\n        ]);\n        state.wal_connection_retries = HashMap::from([(\n            NodeId(0),\n            RetryInfo {\n                next_retry_at: now.checked_add_signed(chrono::Duration::hours(1)),\n                retry_duration_seconds: WALCONNECTION_RETRY_MAX_BACKOFF_SECONDS,\n            },\n        )]);\n\n        let candidate_with_less_errors = state\n            .next_connection_candidate()\n            .expect(\"Expected one candidate selected, but got none\");\n        assert_eq!(\n            candidate_with_less_errors.safekeeper_id,\n            NodeId(1),\n            \"Should select the node with no pending retry cooldown\"\n        );\n\n        Ok(())\n    }\n\n    #[tokio::test]\n    async fn lsn_wal_over_threshold_current_candidate() -> anyhow::Result<()> {\n        let harness = TenantHarness::create(\"lsn_wal_over_threshcurrent_candidate\").await?;\n        let mut state = dummy_state(&harness).await;\n        let current_lsn = Lsn(100_000).align();\n        let now = Utc::now().naive_utc();\n\n        let connected_sk_id = NodeId(0);\n        let new_lsn = Lsn(current_lsn.0 + state.conf.max_lsn_wal_lag.get() + 1);\n\n        let connection_status = WalConnectionStatus {\n            is_connected: true,\n            has_processed_wal: true,\n            latest_connection_update: now,\n            latest_wal_update: now,\n            commit_lsn: Some(current_lsn),\n            streaming_lsn: Some(current_lsn),\n            node: connected_sk_id,\n        };\n\n        state.wal_connection = Some(WalConnection {\n            started_at: now,\n            sk_id: connected_sk_id,\n            availability_zone: None,\n            status: connection_status,\n            connection_task: state.spawn(move |sender, _| async move {\n                sender\n                    .send(TaskStateUpdate::Progress(connection_status))\n                    .ok();\n                Ok(())\n            }),\n            discovered_new_wal: None,\n        });\n        state.wal_stream_candidates = HashMap::from([\n            (\n                connected_sk_id,\n                dummy_broker_sk_timeline(current_lsn.0, DUMMY_SAFEKEEPER_HOST, now),\n            ),\n            (\n                NodeId(1),\n                dummy_broker_sk_timeline(new_lsn.0, \"advanced_by_lsn_safekeeper\", now),\n            ),\n        ]);\n\n        let over_threshcurrent_candidate = state.next_connection_candidate().expect(\n            \"Expected one candidate selected out of multiple valid data options, but got none\",\n        );\n\n        assert_eq!(over_threshcurrent_candidate.safekeeper_id, NodeId(1));\n        assert_eq!(\n            over_threshcurrent_candidate.reason,\n            ReconnectReason::LaggingWal {\n                current_commit_lsn: current_lsn,\n                new_commit_lsn: new_lsn,\n                threshold: state.conf.max_lsn_wal_lag\n            },\n            \"Should select bigger WAL safekeeper if it starts to lag enough\"\n        );\n        assert_eq!(\n            over_threshcurrent_candidate.wal_source_connconf.host(),\n            &Host::Domain(\"advanced_by_lsn_safekeeper\".to_owned())\n        );\n\n        Ok(())\n    }\n\n    #[tokio::test]\n    async fn timeout_connection_threshold_current_candidate() -> anyhow::Result<()> {\n        let harness =\n            TenantHarness::create(\"timeout_connection_threshold_current_candidate\").await?;\n        let mut state = dummy_state(&harness).await;\n        let current_lsn = Lsn(100_000).align();\n        let now = Utc::now().naive_utc();\n\n        let wal_connect_timeout = chrono::Duration::from_std(state.conf.wal_connect_timeout)?;\n        let time_over_threshold =\n            Utc::now().naive_utc() - wal_connect_timeout - wal_connect_timeout;\n\n        let connection_status = WalConnectionStatus {\n            is_connected: true,\n            has_processed_wal: true,\n            latest_connection_update: time_over_threshold,\n            latest_wal_update: time_over_threshold,\n            commit_lsn: Some(current_lsn),\n            streaming_lsn: Some(current_lsn),\n            node: NodeId(1),\n        };\n\n        state.wal_connection = Some(WalConnection {\n            started_at: now,\n            sk_id: NodeId(1),\n            availability_zone: None,\n            status: connection_status,\n            connection_task: state.spawn(move |sender, _| async move {\n                sender\n                    .send(TaskStateUpdate::Progress(connection_status))\n                    .ok();\n                Ok(())\n            }),\n            discovered_new_wal: None,\n        });\n        state.wal_stream_candidates = HashMap::from([(\n            NodeId(0),\n            dummy_broker_sk_timeline(current_lsn.0, DUMMY_SAFEKEEPER_HOST, now),\n        )]);\n\n        let over_threshcurrent_candidate = state.next_connection_candidate().expect(\n            \"Expected one candidate selected out of multiple valid data options, but got none\",\n        );\n\n        assert_eq!(over_threshcurrent_candidate.safekeeper_id, NodeId(0));\n        match over_threshcurrent_candidate.reason {\n            ReconnectReason::NoKeepAlives {\n                last_keep_alive,\n                threshold,\n                ..\n            } => {\n                assert_eq!(last_keep_alive, Some(time_over_threshold));\n                assert_eq!(threshold, state.conf.lagging_wal_timeout);\n            }\n            unexpected => panic!(\"Unexpected reason: {unexpected:?}\"),\n        }\n        assert_eq!(\n            over_threshcurrent_candidate.wal_source_connconf.host(),\n            &Host::Domain(DUMMY_SAFEKEEPER_HOST.to_owned())\n        );\n\n        Ok(())\n    }\n\n    #[tokio::test]\n    async fn timeout_wal_over_threshold_current_candidate() -> anyhow::Result<()> {\n        let harness = TenantHarness::create(\"timeout_wal_over_threshold_current_candidate\").await?;\n        let mut state = dummy_state(&harness).await;\n        let current_lsn = Lsn(100_000).align();\n        let new_lsn = Lsn(100_100).align();\n        let now = Utc::now().naive_utc();\n\n        let lagging_wal_timeout = chrono::Duration::from_std(state.conf.lagging_wal_timeout)?;\n        let time_over_threshold =\n            Utc::now().naive_utc() - lagging_wal_timeout - lagging_wal_timeout;\n\n        let connection_status = WalConnectionStatus {\n            is_connected: true,\n            has_processed_wal: true,\n            latest_connection_update: now,\n            latest_wal_update: time_over_threshold,\n            commit_lsn: Some(current_lsn),\n            streaming_lsn: Some(current_lsn),\n            node: NodeId(1),\n        };\n\n        state.wal_connection = Some(WalConnection {\n            started_at: now,\n            sk_id: NodeId(1),\n            availability_zone: None,\n            status: connection_status,\n            connection_task: state.spawn(move |_, _| async move { Ok(()) }),\n            discovered_new_wal: Some(NewCommittedWAL {\n                discovered_at: time_over_threshold,\n                lsn: new_lsn,\n            }),\n        });\n        state.wal_stream_candidates = HashMap::from([(\n            NodeId(0),\n            dummy_broker_sk_timeline(new_lsn.0, DUMMY_SAFEKEEPER_HOST, now),\n        )]);\n\n        let over_threshcurrent_candidate = state.next_connection_candidate().expect(\n            \"Expected one candidate selected out of multiple valid data options, but got none\",\n        );\n\n        assert_eq!(over_threshcurrent_candidate.safekeeper_id, NodeId(0));\n        match over_threshcurrent_candidate.reason {\n            ReconnectReason::NoWalTimeout {\n                current_lsn,\n                current_commit_lsn,\n                candidate_commit_lsn,\n                last_wal_interaction,\n                threshold,\n                ..\n            } => {\n                assert_eq!(current_lsn, current_lsn);\n                assert_eq!(current_commit_lsn, current_lsn);\n                assert_eq!(candidate_commit_lsn, new_lsn);\n                assert_eq!(last_wal_interaction, Some(time_over_threshold));\n                assert_eq!(threshold, state.conf.lagging_wal_timeout);\n            }\n            unexpected => panic!(\"Unexpected reason: {unexpected:?}\"),\n        }\n        assert_eq!(\n            over_threshcurrent_candidate.wal_source_connconf.host(),\n            &Host::Domain(DUMMY_SAFEKEEPER_HOST.to_owned())\n        );\n\n        Ok(())\n    }\n\n    const DUMMY_SAFEKEEPER_HOST: &str = \"safekeeper_connstr\";\n\n    async fn dummy_state(harness: &TenantHarness) -> ConnectionManagerState {\n        let (tenant, ctx) = harness.load().await;\n        let timeline = tenant\n            .create_test_timeline(TIMELINE_ID, Lsn(0x8), crate::DEFAULT_PG_VERSION, &ctx)\n            .await\n            .expect(\"Failed to create an empty timeline for dummy wal connection manager\");\n\n        let protocol = PostgresClientProtocol::Interpreted {\n            format: utils::postgres_client::InterpretedFormat::Protobuf,\n            compression: Some(utils::postgres_client::Compression::Zstd { level: 1 }),\n        };\n\n        ConnectionManagerState {\n            id: TenantTimelineId {\n                tenant_id: harness.tenant_shard_id.tenant_id,\n                timeline_id: TIMELINE_ID,\n            },\n            timeline,\n            cancel: CancellationToken::new(),\n            conf: WalReceiverConf {\n                protocol,\n                wal_connect_timeout: Duration::from_secs(1),\n                lagging_wal_timeout: Duration::from_secs(1),\n                max_lsn_wal_lag: NonZeroU64::new(1024 * 1024).unwrap(),\n                auth_token: None,\n                availability_zone: None,\n                ingest_batch_size: 1,\n                validate_wal_contiguity: false,\n            },\n            wal_connection: None,\n            wal_stream_candidates: HashMap::new(),\n            wal_connection_retries: HashMap::new(),\n        }\n    }\n\n    #[tokio::test]\n    async fn switch_to_same_availability_zone() -> anyhow::Result<()> {\n        // Pageserver and one of safekeepers will be in the same availability zone\n        // and pageserver should prefer to connect to it.\n        let test_az = Some(\"test_az\".to_owned());\n\n        let harness = TenantHarness::create(\"switch_to_same_availability_zone\").await?;\n        let mut state = dummy_state(&harness).await;\n        state.conf.availability_zone.clone_from(&test_az);\n        let current_lsn = Lsn(100_000).align();\n        let now = Utc::now().naive_utc();\n\n        let connected_sk_id = NodeId(0);\n\n        let connection_status = WalConnectionStatus {\n            is_connected: true,\n            has_processed_wal: true,\n            latest_connection_update: now,\n            latest_wal_update: now,\n            commit_lsn: Some(current_lsn),\n            streaming_lsn: Some(current_lsn),\n            node: connected_sk_id,\n        };\n\n        state.wal_connection = Some(WalConnection {\n            started_at: now,\n            sk_id: connected_sk_id,\n            availability_zone: None,\n            status: connection_status,\n            connection_task: state.spawn(move |sender, _| async move {\n                sender\n                    .send(TaskStateUpdate::Progress(connection_status))\n                    .ok();\n                Ok(())\n            }),\n            discovered_new_wal: None,\n        });\n\n        // We have another safekeeper with the same commit_lsn, and it have the same availability zone as\n        // the current pageserver.\n        let mut same_az_sk = dummy_broker_sk_timeline(current_lsn.0, \"same_az\", now);\n        same_az_sk.timeline.availability_zone.clone_from(&test_az);\n\n        state.wal_stream_candidates = HashMap::from([\n            (\n                connected_sk_id,\n                dummy_broker_sk_timeline(current_lsn.0, DUMMY_SAFEKEEPER_HOST, now),\n            ),\n            (NodeId(1), same_az_sk),\n        ]);\n\n        // We expect that pageserver will switch to the safekeeper in the same availability zone,\n        // even if it has the same commit_lsn.\n        let next_candidate = state.next_connection_candidate().expect(\n            \"Expected one candidate selected out of multiple valid data options, but got none\",\n        );\n\n        assert_eq!(next_candidate.safekeeper_id, NodeId(1));\n        assert_eq!(\n            next_candidate.reason,\n            ReconnectReason::SwitchAvailabilityZone,\n            \"Should switch to the safekeeper in the same availability zone, if it has the same commit_lsn\"\n        );\n        assert_eq!(\n            next_candidate.wal_source_connconf.host(),\n            &Host::Domain(\"same_az\".to_owned())\n        );\n\n        Ok(())\n    }\n}\n"
  },
  {
    "path": "pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs",
    "content": "//! Actual Postgres connection handler to stream WAL to the server.\n\nuse std::error::Error;\nuse std::pin::pin;\nuse std::str::FromStr;\nuse std::sync::Arc;\nuse std::time::{Duration, SystemTime};\n\nuse anyhow::{Context, anyhow};\nuse bytes::BytesMut;\nuse chrono::{NaiveDateTime, Utc};\nuse fail::fail_point;\nuse futures::StreamExt;\nuse postgres_backend::is_expected_io_error;\nuse postgres_connection::PgConnectionConfig;\nuse postgres_ffi::WAL_SEGMENT_SIZE;\nuse postgres_ffi::v14::xlog_utils::normalize_lsn;\nuse postgres_ffi::waldecoder::WalDecodeError;\nuse postgres_protocol::message::backend::ReplicationMessage;\nuse postgres_types::PgLsn;\nuse tokio::sync::watch;\nuse tokio::{select, time};\nuse tokio_postgres::error::SqlState;\nuse tokio_postgres::replication::ReplicationStream;\nuse tokio_postgres::{Client, SimpleQueryMessage, SimpleQueryRow};\nuse tokio_util::sync::CancellationToken;\nuse tracing::{Instrument, debug, error, info, trace, warn};\nuse utils::critical_timeline;\nuse utils::id::NodeId;\nuse utils::lsn::Lsn;\nuse utils::pageserver_feedback::PageserverFeedback;\nuse utils::postgres_client::PostgresClientProtocol;\nuse utils::sync::gate::GateError;\nuse wal_decoder::models::{FlushUncommittedRecords, InterpretedWalRecords};\nuse wal_decoder::wire_format::FromWireFormat;\n\nuse super::TaskStateUpdate;\nuse crate::context::RequestContext;\nuse crate::metrics::{LIVE_CONNECTIONS, WAL_INGEST, WALRECEIVER_STARTED_CONNECTIONS};\nuse crate::pgdatadir_mapping::DatadirModification;\nuse crate::task_mgr::{TaskKind, WALRECEIVER_RUNTIME};\nuse crate::tenant::{\n    Timeline, WalReceiverInfo, debug_assert_current_span_has_tenant_and_timeline_id,\n};\nuse crate::walingest::WalIngest;\n\n/// Status of the connection.\n#[derive(Debug, Clone, Copy)]\npub(super) struct WalConnectionStatus {\n    /// If we were able to initiate a postgres connection, this means that safekeeper process is at least running.\n    pub is_connected: bool,\n    /// Defines a healthy connection as one on which pageserver received WAL from safekeeper\n    /// and is able to process it in walingest without errors.\n    pub has_processed_wal: bool,\n    /// Connection establishment time or the timestamp of a latest connection message received.\n    pub latest_connection_update: NaiveDateTime,\n    /// Time of the latest WAL message received.\n    pub latest_wal_update: NaiveDateTime,\n    /// Latest WAL update contained WAL up to this LSN. Next WAL message with start from that LSN.\n    pub streaming_lsn: Option<Lsn>,\n    /// Latest commit_lsn received from the safekeeper. Can be zero if no message has been received yet.\n    pub commit_lsn: Option<Lsn>,\n    /// The node it is connected to\n    pub node: NodeId,\n}\n\npub(super) enum WalReceiverError {\n    /// An error of a type that does not indicate an issue, e.g. a connection closing\n    ExpectedSafekeeperError(tokio_postgres::Error),\n    /// An \"error\" message that carries a SUCCESSFUL_COMPLETION status code.  Carries\n    /// the message part of the original postgres error\n    SuccessfulCompletion(String),\n    /// Generic error\n    Other(anyhow::Error),\n    ClosedGate,\n    Cancelled,\n}\n\nimpl From<tokio_postgres::Error> for WalReceiverError {\n    fn from(err: tokio_postgres::Error) -> Self {\n        if let Some(dberror) = err.as_db_error().filter(|db_error| {\n            db_error.code() == &SqlState::SUCCESSFUL_COMPLETION\n                && db_error.message().contains(\"ending streaming\")\n        }) {\n            // Strip the outer DbError, which carries a misleading \"error\" severity\n            Self::SuccessfulCompletion(dberror.message().to_string())\n        } else if err.is_closed()\n            || err\n                .source()\n                .and_then(|source| source.downcast_ref::<std::io::Error>())\n                .map(is_expected_io_error)\n                .unwrap_or(false)\n        {\n            Self::ExpectedSafekeeperError(err)\n        } else {\n            Self::Other(anyhow::Error::new(err))\n        }\n    }\n}\n\nimpl From<anyhow::Error> for WalReceiverError {\n    fn from(err: anyhow::Error) -> Self {\n        Self::Other(err)\n    }\n}\n\nimpl From<WalDecodeError> for WalReceiverError {\n    fn from(err: WalDecodeError) -> Self {\n        Self::Other(anyhow::Error::new(err))\n    }\n}\n\n/// Open a connection to the given safekeeper and receive WAL, sending back progress\n/// messages as we go.\n#[allow(clippy::too_many_arguments)]\npub(super) async fn handle_walreceiver_connection(\n    timeline: Arc<Timeline>,\n    protocol: PostgresClientProtocol,\n    wal_source_connconf: PgConnectionConfig,\n    events_sender: watch::Sender<TaskStateUpdate<WalConnectionStatus>>,\n    cancellation: CancellationToken,\n    connect_timeout: Duration,\n    ctx: RequestContext,\n    safekeeper_node: NodeId,\n    ingest_batch_size: u64,\n    validate_wal_contiguity: bool,\n) -> Result<(), WalReceiverError> {\n    debug_assert_current_span_has_tenant_and_timeline_id();\n\n    // prevent timeline shutdown from finishing until we have exited\n    let _guard = timeline.gate.enter().map_err(|e| match e {\n        GateError::GateClosed => WalReceiverError::ClosedGate,\n    })?;\n    // This function spawns a side-car task (WalReceiverConnectionPoller).\n    // Get its gate guard now as well.\n    let poller_guard = timeline.gate.enter().map_err(|e| match e {\n        GateError::GateClosed => WalReceiverError::ClosedGate,\n    })?;\n\n    WALRECEIVER_STARTED_CONNECTIONS.inc();\n\n    // Connect to the database in replication mode.\n    info!(\"connecting to {wal_source_connconf:?}\");\n\n    let (replication_client, connection) = {\n        let mut config = wal_source_connconf.to_tokio_postgres_config();\n        config.application_name(format!(\"pageserver-{}\", timeline.conf.id.0).as_str());\n        config.replication_mode(tokio_postgres::config::ReplicationMode::Physical);\n        match time::timeout(connect_timeout, config.connect(tokio_postgres::NoTls)).await {\n            Ok(client_and_conn) => client_and_conn?,\n            Err(_elapsed) => {\n                // Timing out to connect to a safekeeper node could happen long time, due to\n                // many reasons that pageserver cannot control.\n                // Do not produce an error, but make it visible, that timeouts happen by logging the `event.\n                info!(\n                    \"Timed out while waiting {connect_timeout:?} for walreceiver connection to open\"\n                );\n                return Ok(());\n            }\n        }\n    };\n\n    debug!(\"connected!\");\n    let mut connection_status = WalConnectionStatus {\n        is_connected: true,\n        has_processed_wal: false,\n        latest_connection_update: Utc::now().naive_utc(),\n        latest_wal_update: Utc::now().naive_utc(),\n        streaming_lsn: None,\n        commit_lsn: None,\n        node: safekeeper_node,\n    };\n    if let Err(e) = events_sender.send(TaskStateUpdate::Progress(connection_status)) {\n        warn!(\n            \"Wal connection event listener dropped right after connection init, aborting the connection: {e}\"\n        );\n        return Ok(());\n    }\n\n    // The connection object performs the actual communication with the database,\n    // so spawn it off to run on its own. It shouldn't outlive this function, but,\n    // due to lack of async drop, we can't enforce that. However, we ensure that\n    // 1. it is sensitive to `cancellation` and\n    // 2. holds the Timeline gate open so that after timeline shutdown,\n    //    we know this task is gone.\n    let _connection_ctx = ctx.detached_child(\n        TaskKind::WalReceiverConnectionPoller,\n        ctx.download_behavior(),\n    );\n    let connection_cancellation = cancellation.clone();\n    WALRECEIVER_RUNTIME.spawn(\n        async move {\n            debug_assert_current_span_has_tenant_and_timeline_id();\n            select! {\n                connection_result = connection => match connection_result {\n                    Ok(()) => debug!(\"Walreceiver db connection closed\"),\n                    Err(connection_error) => {\n                        match WalReceiverError::from(connection_error) {\n                            WalReceiverError::ExpectedSafekeeperError(_) => {\n                                // silence, because most likely we've already exited the outer call\n                                // with a similar error.\n                            },\n                            WalReceiverError::SuccessfulCompletion(_) => {}\n                            WalReceiverError::Cancelled => {\n                                debug!(\"Connection cancelled\")\n                            }\n                            WalReceiverError::ClosedGate => {\n                                // doesn't happen at runtime\n                            }\n                            WalReceiverError::Other(err) => {\n                                warn!(\"Connection aborted: {err:#}\")\n                            }\n                        }\n                    }\n                },\n                _ = connection_cancellation.cancelled() => debug!(\"Connection cancelled\"),\n            }\n            drop(poller_guard);\n        }\n        // Enrich the log lines emitted by this closure with meaningful context.\n        // TODO: technically, this task outlives the surrounding function, so, the\n        // spans won't be properly nested.\n        .instrument(tracing::info_span!(\"poller\")),\n    );\n\n    let _guard = LIVE_CONNECTIONS\n        .with_label_values(&[\"wal_receiver\"])\n        .guard();\n\n    let identify = identify_system(&replication_client).await?;\n    info!(\"{identify:?}\");\n\n    let end_of_wal = Lsn::from(u64::from(identify.xlogpos));\n    let mut caught_up = false;\n\n    connection_status.latest_connection_update = Utc::now().naive_utc();\n    connection_status.latest_wal_update = Utc::now().naive_utc();\n    connection_status.commit_lsn = Some(end_of_wal);\n    if let Err(e) = events_sender.send(TaskStateUpdate::Progress(connection_status)) {\n        warn!(\n            \"Wal connection event listener dropped after IDENTIFY_SYSTEM, aborting the connection: {e}\"\n        );\n        return Ok(());\n    }\n\n    //\n    // Start streaming the WAL, from where we left off previously.\n    //\n    // If we had previously received WAL up to some point in the middle of a WAL record, we\n    // better start from the end of last full WAL record, not in the middle of one.\n    let mut last_rec_lsn = timeline.get_last_record_lsn();\n    let mut startpoint = last_rec_lsn;\n\n    if startpoint == Lsn(0) {\n        return Err(WalReceiverError::Other(anyhow!(\"No previous WAL position\")));\n    }\n\n    // There might be some padding after the last full record, skip it.\n    startpoint += startpoint.calc_padding(8u32);\n\n    // If the starting point is at a WAL page boundary, skip past the page header. We don't need the page headers\n    // for anything, and in some corner cases, the compute node might have never generated the WAL for page headers\n    //. That happens if you create a branch at page boundary: the start point of the branch is at the page boundary,\n    // but when the compute node first starts on the branch, we normalize the first REDO position to just after the page\n    // header (see generate_pg_control()), so the WAL for the page header is never streamed from the compute node\n    //  to the safekeepers.\n    startpoint = normalize_lsn(startpoint, WAL_SEGMENT_SIZE);\n\n    info!(\n        \"last_record_lsn {last_rec_lsn} starting replication from {startpoint}, safekeeper is at {end_of_wal}...\"\n    );\n\n    let query = format!(\"START_REPLICATION PHYSICAL {startpoint}\");\n\n    let copy_stream = replication_client.copy_both_simple(&query).await?;\n    let mut physical_stream = pin!(ReplicationStream::new(copy_stream));\n\n    let mut walingest = WalIngest::new(timeline.as_ref(), startpoint, &ctx)\n        .await\n        .map_err(|e| match e.kind {\n            crate::walingest::WalIngestErrorKind::Cancelled => WalReceiverError::Cancelled,\n            _ => WalReceiverError::Other(e.into()),\n        })?;\n\n    let (format, compression) = match protocol {\n        PostgresClientProtocol::Interpreted {\n            format,\n            compression,\n        } => (format, compression),\n        PostgresClientProtocol::Vanilla => {\n            return Err(WalReceiverError::Other(anyhow!(\n                \"Vanilla WAL receiver protocol is no longer supported for ingest\"\n            )));\n        }\n    };\n\n    let mut expected_wal_start = startpoint;\n    while let Some(replication_message) = {\n        select! {\n            biased;\n            _ = cancellation.cancelled() => {\n                debug!(\"walreceiver interrupted\");\n                None\n            }\n            replication_message = physical_stream.next() => replication_message,\n        }\n    } {\n        let replication_message = replication_message?;\n\n        let now = Utc::now().naive_utc();\n        let last_rec_lsn_before_msg = last_rec_lsn;\n\n        // Update the connection status before processing the message. If the message processing\n        // fails (e.g. in walingest), we still want to know latests LSNs from the safekeeper.\n        match &replication_message {\n            ReplicationMessage::PrimaryKeepAlive(keepalive) => {\n                connection_status.latest_connection_update = now;\n                connection_status.commit_lsn = Some(Lsn::from(keepalive.wal_end()));\n            }\n            ReplicationMessage::RawInterpretedWalRecords(raw) => {\n                connection_status.latest_connection_update = now;\n                if !raw.data().is_empty() {\n                    connection_status.latest_wal_update = now;\n                }\n\n                connection_status.commit_lsn = Some(Lsn::from(raw.commit_lsn()));\n                connection_status.streaming_lsn = Some(Lsn::from(raw.streaming_lsn()));\n            }\n            &_ => {}\n        };\n        if let Err(e) = events_sender.send(TaskStateUpdate::Progress(connection_status)) {\n            warn!(\"Wal connection event listener dropped, aborting the connection: {e}\");\n            return Ok(());\n        }\n\n        let status_update = match replication_message {\n            ReplicationMessage::RawInterpretedWalRecords(raw) => {\n                WAL_INGEST.bytes_received.inc_by(raw.data().len() as u64);\n\n                let mut uncommitted_records = 0;\n\n                // This is the end LSN of the raw WAL from which the records\n                // were interpreted.\n                let streaming_lsn = Lsn::from(raw.streaming_lsn());\n\n                let batch = InterpretedWalRecords::from_wire(raw.data(), format, compression)\n                    .await\n                    .with_context(|| {\n                        anyhow::anyhow!(\n                        \"Failed to deserialize interpreted records ending at LSN {streaming_lsn}\"\n                    )\n                    })?;\n\n                // Guard against WAL gaps. If the start LSN of the PG WAL section\n                // from which the interpreted records were extracted, doesn't match\n                // the end of the previous batch (or the starting point for the first batch),\n                // then kill this WAL receiver connection and start a new one.\n                if validate_wal_contiguity {\n                    if let Some(raw_wal_start_lsn) = batch.raw_wal_start_lsn {\n                        match raw_wal_start_lsn.cmp(&expected_wal_start) {\n                            std::cmp::Ordering::Greater => {\n                                let msg = format!(\n                                    \"Gap in streamed WAL: [{expected_wal_start}, {raw_wal_start_lsn}\"\n                                );\n                                critical_timeline!(\n                                    timeline.tenant_shard_id,\n                                    timeline.timeline_id,\n                                    Some(&timeline.corruption_detected),\n                                    \"{msg}\"\n                                );\n                                return Err(WalReceiverError::Other(anyhow!(msg)));\n                            }\n                            std::cmp::Ordering::Less => {\n                                // Other shards are reading WAL behind us.\n                                // This is valid, but check that we received records\n                                // that we haven't seen before.\n                                if let Some(first_rec) = batch.records.first() {\n                                    if first_rec.next_record_lsn < last_rec_lsn {\n                                        let msg = format!(\n                                            \"Received record with next_record_lsn multiple times ({} < {})\",\n                                            first_rec.next_record_lsn, expected_wal_start\n                                        );\n                                        critical_timeline!(\n                                            timeline.tenant_shard_id,\n                                            timeline.timeline_id,\n                                            Some(&timeline.corruption_detected),\n                                            \"{msg}\"\n                                        );\n                                        return Err(WalReceiverError::Other(anyhow!(msg)));\n                                    }\n                                }\n                            }\n                            std::cmp::Ordering::Equal => {}\n                        }\n                    }\n                }\n\n                let InterpretedWalRecords {\n                    records,\n                    next_record_lsn,\n                    raw_wal_start_lsn: _,\n                } = batch;\n\n                tracing::debug!(\n                    \"Received WAL up to {} with next_record_lsn={}\",\n                    streaming_lsn,\n                    next_record_lsn\n                );\n\n                // We start the modification at 0 because each interpreted record\n                // advances it to its end LSN. 0 is just an initialization placeholder.\n                let mut modification = timeline.begin_modification(Lsn(0));\n\n                async fn commit(\n                    modification: &mut DatadirModification<'_>,\n                    ctx: &RequestContext,\n                    uncommitted: &mut u64,\n                ) -> anyhow::Result<()> {\n                    let stats = modification.stats();\n                    modification.commit(ctx).await?;\n                    WAL_INGEST.records_committed.inc_by(*uncommitted);\n                    WAL_INGEST.inc_values_committed(&stats);\n                    *uncommitted = 0;\n                    Ok(())\n                }\n\n                if !records.is_empty() {\n                    timeline\n                        .metrics\n                        .wal_records_received\n                        .inc_by(records.len() as u64);\n                }\n\n                for interpreted in records {\n                    if matches!(interpreted.flush_uncommitted, FlushUncommittedRecords::Yes)\n                        && uncommitted_records > 0\n                    {\n                        commit(&mut modification, &ctx, &mut uncommitted_records).await?;\n                    }\n\n                    let local_next_record_lsn = interpreted.next_record_lsn;\n\n                    if interpreted.is_observed() {\n                        WAL_INGEST.records_observed.inc();\n                    }\n\n                    walingest\n                        .ingest_record(interpreted, &mut modification, &ctx)\n                        .await\n                        .with_context(|| {\n                            format!(\"could not ingest record at {local_next_record_lsn}\")\n                        })\n                        .inspect_err(|err| {\n                            // TODO: we can't differentiate cancellation errors with\n                            // anyhow::Error, so just ignore it if we're cancelled.\n                            if !cancellation.is_cancelled() && !timeline.is_stopping() {\n                                critical_timeline!(\n                                    timeline.tenant_shard_id,\n                                    timeline.timeline_id,\n                                    Some(&timeline.corruption_detected),\n                                    \"{err:?}\"\n                                );\n                            }\n                        })?;\n\n                    uncommitted_records += 1;\n\n                    // FIXME: this cannot be made pausable_failpoint without fixing the\n                    // failpoint library; in tests, the added amount of debugging will cause us\n                    // to timeout the tests.\n                    fail_point!(\"walreceiver-after-ingest\");\n\n                    // Commit every ingest_batch_size records. Even if we filtered out\n                    // all records, we still need to call commit to advance the LSN.\n                    if uncommitted_records >= ingest_batch_size\n                        || modification.approx_pending_bytes()\n                            > DatadirModification::MAX_PENDING_BYTES\n                    {\n                        commit(&mut modification, &ctx, &mut uncommitted_records).await?;\n                    }\n                }\n\n                // Records might have been filtered out on the safekeeper side, but we still\n                // need to advance last record LSN on all shards. If we've not ingested the latest\n                // record, then set the LSN of the modification past it. This way all shards\n                // advance their last record LSN at the same time.\n                let needs_last_record_lsn_advance = if next_record_lsn > modification.get_lsn() {\n                    modification.set_lsn(next_record_lsn).unwrap();\n                    true\n                } else {\n                    false\n                };\n\n                if uncommitted_records > 0 || needs_last_record_lsn_advance {\n                    // Commit any uncommitted records\n                    commit(&mut modification, &ctx, &mut uncommitted_records).await?;\n                }\n\n                if !caught_up && streaming_lsn >= end_of_wal {\n                    info!(\"caught up at LSN {streaming_lsn}\");\n                    caught_up = true;\n                }\n\n                tracing::debug!(\n                    \"Ingested WAL up to {streaming_lsn}. Last record LSN is {}\",\n                    timeline.get_last_record_lsn()\n                );\n\n                last_rec_lsn = next_record_lsn;\n                expected_wal_start = streaming_lsn;\n\n                Some(streaming_lsn)\n            }\n\n            ReplicationMessage::PrimaryKeepAlive(keepalive) => {\n                let wal_end = keepalive.wal_end();\n                let timestamp = keepalive.timestamp();\n                let reply_requested = keepalive.reply() != 0;\n\n                trace!(\n                    \"received PrimaryKeepAlive(wal_end: {wal_end}, timestamp: {timestamp:?} reply: {reply_requested})\"\n                );\n\n                if reply_requested {\n                    Some(last_rec_lsn)\n                } else {\n                    None\n                }\n            }\n\n            _ => None,\n        };\n\n        if !connection_status.has_processed_wal && last_rec_lsn > last_rec_lsn_before_msg {\n            // We have successfully processed at least one WAL record.\n            connection_status.has_processed_wal = true;\n            if let Err(e) = events_sender.send(TaskStateUpdate::Progress(connection_status)) {\n                warn!(\"Wal connection event listener dropped, aborting the connection: {e}\");\n                return Ok(());\n            }\n        }\n\n        if let Some(last_lsn) = status_update {\n            let timeline_remote_consistent_lsn = timeline\n                .get_remote_consistent_lsn_visible()\n                .unwrap_or(Lsn(0));\n\n            // The last LSN we processed. It is not guaranteed to survive pageserver crash.\n            let last_received_lsn = last_lsn;\n            // `disk_consistent_lsn` is the LSN at which page server guarantees local persistence of all received data\n            let disk_consistent_lsn = timeline.get_disk_consistent_lsn();\n            // The last LSN that is synced to remote storage and is guaranteed to survive pageserver crash\n            // Used by safekeepers to remove WAL preceding `remote_consistent_lsn`.\n            let remote_consistent_lsn = timeline_remote_consistent_lsn;\n            let ts = SystemTime::now();\n\n            // Update the status about what we just received. This is shown in the mgmt API.\n            let last_received_wal = WalReceiverInfo {\n                wal_source_connconf: wal_source_connconf.clone(),\n                last_received_msg_lsn: last_lsn,\n                last_received_msg_ts: ts\n                    .duration_since(SystemTime::UNIX_EPOCH)\n                    .expect(\"Received message time should be before UNIX EPOCH!\")\n                    .as_micros(),\n            };\n            *timeline.last_received_wal.lock().unwrap() = Some(last_received_wal);\n\n            // Send the replication feedback message.\n            // Regular standby_status_update fields are put into this message.\n            let current_timeline_size = if timeline.tenant_shard_id.is_shard_zero() {\n                timeline\n                    .get_current_logical_size(\n                        crate::tenant::timeline::GetLogicalSizePriority::User,\n                        &ctx,\n                    )\n                    // FIXME: https://github.com/neondatabase/neon/issues/5963\n                    .size_dont_care_about_accuracy()\n            } else {\n                // Non-zero shards send zero for logical size.  The safekeeper will ignore\n                // this number.  This is because in a sharded tenant, only shard zero maintains\n                // accurate logical size.\n                0\n            };\n\n            let status_update = PageserverFeedback {\n                current_timeline_size,\n                last_received_lsn,\n                disk_consistent_lsn,\n                remote_consistent_lsn,\n                replytime: ts,\n                shard_number: timeline.tenant_shard_id.shard_number.0 as u32,\n                corruption_detected: timeline\n                    .corruption_detected\n                    .load(std::sync::atomic::Ordering::Relaxed),\n            };\n\n            debug!(\"neon_status_update {status_update:?}\");\n\n            let mut data = BytesMut::new();\n            status_update.serialize(&mut data);\n            physical_stream\n                .as_mut()\n                .zenith_status_update(data.len() as u64, &data)\n                .await?;\n        }\n    }\n\n    Ok(())\n}\n\n/// Data returned from the postgres `IDENTIFY_SYSTEM` command\n///\n/// See the [postgres docs] for more details.\n///\n/// [postgres docs]: https://www.postgresql.org/docs/current/protocol-replication.html\n#[derive(Debug)]\n// As of nightly 2021-09-11, fields that are only read by the type's `Debug` impl still count as\n// unused. Relevant issue: https://github.com/rust-lang/rust/issues/88900\n#[allow(dead_code)]\nstruct IdentifySystem {\n    systemid: u64,\n    timeline: u32,\n    xlogpos: PgLsn,\n    dbname: Option<String>,\n}\n\n/// There was a problem parsing the response to\n/// a postgres IDENTIFY_SYSTEM command.\n#[derive(Debug, thiserror::Error)]\n#[error(\"IDENTIFY_SYSTEM parse error\")]\nstruct IdentifyError;\n\n/// Run the postgres `IDENTIFY_SYSTEM` command\nasync fn identify_system(client: &Client) -> anyhow::Result<IdentifySystem> {\n    let query_str = \"IDENTIFY_SYSTEM\";\n    let response = client.simple_query(query_str).await?;\n\n    // get(N) from row, then parse it as some destination type.\n    fn get_parse<T>(row: &SimpleQueryRow, idx: usize) -> Result<T, IdentifyError>\n    where\n        T: FromStr,\n    {\n        let val = row.get(idx).ok_or(IdentifyError)?;\n        val.parse::<T>().or(Err(IdentifyError))\n    }\n\n    // extract the row contents into an IdentifySystem struct.\n    // written as a closure so I can use ? for Option here.\n    if let Some(SimpleQueryMessage::Row(first_row)) = response.first() {\n        Ok(IdentifySystem {\n            systemid: get_parse(first_row, 0)?,\n            timeline: get_parse(first_row, 1)?,\n            xlogpos: get_parse(first_row, 2)?,\n            dbname: get_parse(first_row, 3).ok(),\n        })\n    } else {\n        Err(IdentifyError.into())\n    }\n}\n"
  },
  {
    "path": "pageserver/src/tenant/timeline/walreceiver.rs",
    "content": "//! WAL receiver manages an open connection to safekeeper, to get the WAL it streams into.\n//! To do so, a current implementation needs to do the following:\n//!\n//! * acknowledge the timelines that it needs to stream WAL into.\n//!   Pageserver is able to dynamically (un)load tenants on attach and detach,\n//!   hence WAL receiver needs to react on such events.\n//!\n//! * get a broker subscription, stream data from it to determine that a timeline needs WAL streaming.\n//!   For that, it watches specific keys in storage_broker and pulls the relevant data periodically.\n//!   The data is produced by safekeepers, that push it periodically and pull it to synchronize between each other.\n//!   Without this data, no WAL streaming is possible currently.\n//!\n//! Only one active WAL streaming connection is allowed at a time.\n//! The connection is supposed to be updated periodically, based on safekeeper timeline data.\n//!\n//! * handle the actual connection and WAL streaming\n//!\n//! Handling happens dynamically, by portions of WAL being processed and registered in the server.\n//! Along with the registration, certain metadata is written to show WAL streaming progress and rely on that when considering safekeepers for connection.\n//!\n//! The current module contains high-level primitives used in the submodules; general synchronization, timeline acknowledgement and shutdown logic.\n\nmod connection_manager;\nmod walreceiver_connection;\n\nuse std::future::Future;\nuse std::num::NonZeroU64;\nuse std::sync::Arc;\nuse std::time::Duration;\n\nuse storage_broker::BrokerClientChannel;\nuse tokio::sync::watch;\nuse tokio_util::sync::CancellationToken;\nuse tracing::*;\nuse utils::postgres_client::PostgresClientProtocol;\n\nuse self::connection_manager::ConnectionManagerStatus;\nuse super::Timeline;\nuse crate::context::{DownloadBehavior, RequestContext};\nuse crate::task_mgr::{TaskKind, WALRECEIVER_RUNTIME};\nuse crate::tenant::debug_assert_current_span_has_tenant_and_timeline_id;\nuse crate::tenant::timeline::walreceiver::connection_manager::{\n    ConnectionManagerState, connection_manager_loop_step,\n};\n\n#[derive(Clone)]\npub struct WalReceiverConf {\n    pub protocol: PostgresClientProtocol,\n    /// The timeout on the connection to safekeeper for WAL streaming.\n    pub wal_connect_timeout: Duration,\n    /// The timeout to use to determine when the current connection is \"stale\" and reconnect to the other one.\n    pub lagging_wal_timeout: Duration,\n    /// The Lsn lag to use to determine when the current connection is lagging to much behind and reconnect to the other one.\n    pub max_lsn_wal_lag: NonZeroU64,\n    pub auth_token: Option<Arc<String>>,\n    pub availability_zone: Option<String>,\n    pub ingest_batch_size: u64,\n    pub validate_wal_contiguity: bool,\n}\n\npub struct WalReceiver {\n    manager_status: Arc<std::sync::RwLock<Option<ConnectionManagerStatus>>>,\n    /// All task spawned by [`WalReceiver::start`] and its children are sensitive to this token.\n    /// It's a child token of [`Timeline`] so that timeline shutdown can cancel WalReceiver tasks early for `freeze_and_flush=true`.\n    cancel: CancellationToken,\n}\n\nimpl WalReceiver {\n    pub fn start(\n        timeline: Arc<Timeline>,\n        conf: WalReceiverConf,\n        mut broker_client: BrokerClientChannel,\n        ctx: &RequestContext,\n    ) -> Self {\n        let tenant_shard_id = timeline.tenant_shard_id;\n        let timeline_id = timeline.timeline_id;\n        let walreceiver_ctx =\n            ctx.detached_child(TaskKind::WalReceiverManager, DownloadBehavior::Error);\n        let loop_status = Arc::new(std::sync::RwLock::new(None));\n        let manager_status = Arc::clone(&loop_status);\n        let cancel = timeline.cancel.child_token();\n        let _task = WALRECEIVER_RUNTIME.spawn({\n            let cancel = cancel.clone();\n            async move {\n                debug_assert_current_span_has_tenant_and_timeline_id();\n                // acquire timeline gate so we know the task doesn't outlive the Timeline\n                let Ok(_guard) = timeline.gate.enter() else {\n                    debug!(\"WAL receiver manager could not enter the gate timeline gate, it's closed already\");\n                    return;\n                };\n                debug!(\"WAL receiver manager started, connecting to broker\");\n                let mut connection_manager_state = ConnectionManagerState::new(\n                    timeline,\n                    conf,\n                    cancel.clone(),\n                );\n                while !cancel.is_cancelled() {\n                    let loop_step_result = connection_manager_loop_step(\n                        &mut broker_client,\n                        &mut connection_manager_state,\n                        &walreceiver_ctx,\n                        &cancel,\n                        &loop_status,\n                    ).await;\n                    match loop_step_result {\n                        Ok(()) => continue,\n                        Err(_cancelled) => {\n                            trace!(\"Connection manager loop ended, shutting down\");\n                            break;\n                        }\n                    }\n                }\n                connection_manager_state.shutdown().await;\n                *loop_status.write().unwrap() = None;\n                info!(\"task exits\");\n            }\n            .instrument(info_span!(parent: None, \"wal_connection_manager\", tenant_id = %tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug(), timeline_id = %timeline_id))\n        });\n\n        Self {\n            manager_status,\n            cancel,\n        }\n    }\n\n    #[instrument(skip_all, level = tracing::Level::DEBUG)]\n    pub async fn cancel(self) {\n        debug_assert_current_span_has_tenant_and_timeline_id();\n        debug!(\"cancelling walreceiver tasks\");\n        self.cancel.cancel();\n    }\n\n    pub(crate) fn status(&self) -> Option<ConnectionManagerStatus> {\n        self.manager_status.read().unwrap().clone()\n    }\n}\n\n/// A handle of an asynchronous task.\n/// The task has a channel that it can use to communicate its lifecycle events in a certain form, see [`TaskEvent`]\n/// and a cancellation token that it can listen to for earlier interrupts.\n///\n/// Note that the communication happens via the `watch` channel, that does not accumulate the events, replacing the old one with the never one on submission.\n/// That may lead to certain events not being observed by the listener.\n#[derive(Debug)]\nstruct TaskHandle<E> {\n    join_handle: Option<tokio::task::JoinHandle<anyhow::Result<()>>>,\n    events_receiver: watch::Receiver<TaskStateUpdate<E>>,\n    cancellation: CancellationToken,\n}\n\nenum TaskEvent<E> {\n    Update(TaskStateUpdate<E>),\n    End(anyhow::Result<()>),\n}\n\n#[derive(Debug, Clone)]\nenum TaskStateUpdate<E> {\n    Started,\n    Progress(E),\n}\n\nimpl<E: Clone> TaskHandle<E> {\n    /// Initializes the task, starting it immediately after the creation.\n    ///\n    /// The second argument to `task` is a child token of `cancel_parent` ([`CancellationToken::child_token`]).\n    /// It being a child token enables us to provide a [`Self::shutdown`] method.\n    fn spawn<Fut>(\n        cancel_parent: &CancellationToken,\n        task: impl FnOnce(watch::Sender<TaskStateUpdate<E>>, CancellationToken) -> Fut + Send + 'static,\n    ) -> Self\n    where\n        Fut: Future<Output = anyhow::Result<()>> + Send,\n        E: Send + Sync + 'static,\n    {\n        let cancellation = cancel_parent.child_token();\n        let (events_sender, events_receiver) = watch::channel(TaskStateUpdate::Started);\n\n        let cancellation_clone = cancellation.clone();\n        let join_handle = WALRECEIVER_RUNTIME.spawn(async move {\n            events_sender.send(TaskStateUpdate::Started).ok();\n            task(events_sender, cancellation_clone).await\n            // events_sender is dropped at some point during the .await above.\n            // But the task is still running on WALRECEIVER_RUNTIME.\n            // That is the window when `!jh.is_finished()`\n            // is true inside `fn next_task_event()` below.\n        });\n\n        TaskHandle {\n            join_handle: Some(join_handle),\n            events_receiver,\n            cancellation,\n        }\n    }\n\n    /// # Cancel-Safety\n    ///\n    /// Cancellation-safe.\n    async fn next_task_event(&mut self) -> TaskEvent<E> {\n        match self.events_receiver.changed().await {\n            Ok(()) => TaskEvent::Update((self.events_receiver.borrow()).clone()),\n            Err(_task_channel_part_dropped) => {\n                TaskEvent::End(match self.join_handle.as_mut() {\n                    Some(jh) => {\n                        if !jh.is_finished() {\n                            // See: https://github.com/neondatabase/neon/issues/2885\n                            trace!(\"sender is dropped while join handle is still alive\");\n                        }\n\n                        let res = match jh.await {\n                            Ok(res) => res,\n                            Err(je) if je.is_cancelled() => unreachable!(\"not used\"),\n                            Err(je) if je.is_panic() => {\n                                // already logged\n                                Ok(())\n                            }\n                            Err(je) => Err(anyhow::Error::new(je).context(\"join walreceiver task\")),\n                        };\n\n                        // For cancellation-safety, drop join_handle only after successful .await.\n                        self.join_handle = None;\n\n                        res\n                    }\n                    None => {\n                        // Another option is to have an enum, join handle or result and give away the reference to it\n                        Err(anyhow::anyhow!(\"Task was joined more than once\"))\n                    }\n                })\n            }\n        }\n    }\n\n    /// Aborts current task, waiting for it to finish.\n    async fn shutdown(self) {\n        if let Some(jh) = self.join_handle {\n            self.cancellation.cancel();\n            match jh.await {\n                Ok(Ok(())) => debug!(\"Shutdown success\"),\n                Ok(Err(e)) => error!(\"Shutdown task error: {e:?}\"),\n                Err(je) if je.is_cancelled() => unreachable!(\"not used\"),\n                Err(je) if je.is_panic() => {\n                    // already logged\n                }\n                Err(je) => {\n                    error!(\"Shutdown task join error: {je}\")\n                }\n            }\n        }\n    }\n}\n"
  },
  {
    "path": "pageserver/src/tenant/timeline.rs",
    "content": "pub(crate) mod analysis;\npub(crate) mod compaction;\npub mod delete;\npub(crate) mod detach_ancestor;\nmod eviction_task;\npub(crate) mod handle;\nmod heatmap_layers_downloader;\npub(crate) mod import_pgdata;\nmod init;\npub mod layer_manager;\npub(crate) mod logical_size;\npub mod offload;\npub mod span;\npub mod uninit;\nmod walreceiver;\n\nuse hashlink::LruCache;\nuse std::array;\nuse std::cmp::{max, min};\nuse std::collections::btree_map::Entry;\nuse std::collections::{BTreeMap, HashMap, HashSet};\nuse std::ops::{ControlFlow, Deref, Range};\nuse std::sync::atomic::{AtomicBool, AtomicU64, Ordering as AtomicOrdering};\nuse std::sync::{Arc, Mutex, OnceLock, RwLock, Weak};\nuse std::time::{Duration, Instant, SystemTime};\n\nuse anyhow::{Context, Result, anyhow, bail, ensure};\nuse arc_swap::{ArcSwap, ArcSwapOption};\nuse bytes::Bytes;\nuse camino::Utf8Path;\nuse chrono::{DateTime, Utc};\nuse compaction::{CompactionOutcome, GcCompactionCombinedSettings};\nuse enumset::EnumSet;\nuse fail::fail_point;\nuse futures::stream::FuturesUnordered;\nuse futures::{FutureExt, StreamExt};\nuse handle::ShardTimelineId;\nuse layer_manager::{\n    LayerManagerLockHolder, LayerManagerReadGuard, LayerManagerWriteGuard, LockedLayerManager,\n    Shutdown,\n};\n\nuse once_cell::sync::Lazy;\nuse pageserver_api::config::tenant_conf_defaults::DEFAULT_PITR_INTERVAL;\nuse pageserver_api::key::{\n    KEY_SIZE, Key, METADATA_KEY_BEGIN_PREFIX, METADATA_KEY_END_PREFIX, NON_INHERITED_RANGE,\n    SPARSE_RANGE,\n};\nuse pageserver_api::keyspace::{KeySpaceAccum, KeySpaceRandomAccum, SparseKeyPartitioning};\nuse pageserver_api::models::{\n    CompactKeyRange, CompactLsnRange, CompactionAlgorithm, CompactionAlgorithmSettings,\n    DetachBehavior, DownloadRemoteLayersTaskInfo, DownloadRemoteLayersTaskSpawnRequest,\n    EvictionPolicy, InMemoryLayerInfo, LayerMapInfo, LsnLease, PageTraceEvent, RelSizeMigration,\n    TimelineState,\n};\nuse pageserver_api::reltag::{BlockNumber, RelTag};\nuse pageserver_api::shard::{ShardIdentity, ShardIndex, ShardNumber, TenantShardId};\nuse postgres_connection::PgConnectionConfig;\nuse postgres_ffi::v14::xlog_utils;\nuse postgres_ffi::{PgMajorVersion, WAL_SEGMENT_SIZE, to_pg_timestamp};\nuse rand::Rng;\nuse remote_storage::DownloadError;\nuse serde_with::serde_as;\nuse storage_broker::BrokerClientChannel;\nuse tokio::runtime::Handle;\nuse tokio::sync::mpsc::Sender;\nuse tokio::sync::{Notify, oneshot, watch};\nuse tokio_util::sync::CancellationToken;\nuse tracing::*;\nuse utils::generation::Generation;\nuse utils::guard_arc_swap::GuardArcSwap;\nuse utils::id::TimelineId;\nuse utils::logging::{MonitorSlowFutureCallback, log_slow, monitor_slow_future};\nuse utils::lsn::{AtomicLsn, Lsn, RecordLsn};\nuse utils::postgres_client::PostgresClientProtocol;\nuse utils::rate_limit::RateLimit;\nuse utils::seqwait::SeqWait;\nuse utils::simple_rcu::{Rcu, RcuReadGuard};\nuse utils::sync::gate::{Gate, GateGuard};\nuse utils::{completion, critical_timeline, fs_ext, pausable_failpoint};\n#[cfg(test)]\nuse wal_decoder::models::value::Value;\nuse wal_decoder::serialized_batch::{SerializedValueBatch, ValueMeta};\n\nuse self::delete::DeleteTimelineFlow;\npub(super) use self::eviction_task::EvictionTaskTenantState;\nuse self::eviction_task::EvictionTaskTimelineState;\nuse self::logical_size::LogicalSize;\nuse self::walreceiver::{WalReceiver, WalReceiverConf};\nuse super::remote_timeline_client::RemoteTimelineClient;\nuse super::remote_timeline_client::index::{GcCompactionState, IndexPart};\nuse super::secondary::heatmap::HeatMapLayer;\nuse super::storage_layer::{LayerFringe, LayerVisibilityHint, ReadableLayer};\nuse super::tasks::log_compaction_error;\nuse super::upload_queue::NotInitialized;\nuse super::{\n    AttachedTenantConf, GcError, HeatMapTimeline, MaybeOffloaded,\n    debug_assert_current_span_has_tenant_and_timeline_id,\n};\nuse crate::PERF_TRACE_TARGET;\nuse crate::aux_file::AuxFileSizeEstimator;\nuse crate::basebackup_cache::BasebackupCache;\nuse crate::config::PageServerConf;\nuse crate::context::{\n    DownloadBehavior, PerfInstrumentFutureExt, RequestContext, RequestContextBuilder,\n};\nuse crate::disk_usage_eviction_task::{DiskUsageEvictionInfo, EvictionCandidate, finite_f32};\nuse crate::feature_resolver::TenantFeatureResolver;\nuse crate::keyspace::{KeyPartitioning, KeySpace};\nuse crate::l0_flush::{self, L0FlushGlobalState};\nuse crate::metrics::{\n    DELTAS_PER_READ_GLOBAL, LAYERS_PER_READ_AMORTIZED_GLOBAL, LAYERS_PER_READ_BATCH_GLOBAL,\n    LAYERS_PER_READ_GLOBAL, ScanLatencyOngoingRecording, TimelineMetrics,\n};\nuse crate::page_service::TenantManagerTypes;\nuse crate::pgdatadir_mapping::{\n    CalculateLogicalSizeError, CollectKeySpaceError, DirectoryKind, LsnForTimestamp,\n    MAX_AUX_FILE_V2_DELTAS, MetricsUpdate,\n};\nuse crate::task_mgr::TaskKind;\nuse crate::tenant::gc_result::GcResult;\nuse crate::tenant::layer_map::LayerMap;\nuse crate::tenant::metadata::TimelineMetadata;\nuse crate::tenant::storage_layer::delta_layer::DeltaEntry;\nuse crate::tenant::storage_layer::inmemory_layer::IndexEntry;\nuse crate::tenant::storage_layer::{\n    AsLayerDesc, BatchLayerWriter, DeltaLayerWriter, EvictionError, ImageLayerName,\n    ImageLayerWriter, InMemoryLayer, IoConcurrency, Layer, LayerAccessStatsReset, LayerName,\n    PersistentLayerDesc, PersistentLayerKey, ResidentLayer, ValueReconstructSituation,\n    ValueReconstructState, ValuesReconstructState,\n};\nuse crate::tenant::tasks::BackgroundLoopKind;\nuse crate::tenant::timeline::logical_size::CurrentLogicalSize;\nuse crate::virtual_file::{MaybeFatalIo, VirtualFile};\nuse crate::walingest::WalLagCooldown;\nuse crate::walredo::RedoAttemptType;\nuse crate::{ZERO_PAGE, task_mgr, walredo};\n\n#[derive(Debug, PartialEq, Eq, Clone, Copy)]\npub(crate) enum FlushLoopState {\n    NotStarted,\n    Running {\n        #[cfg(test)]\n        expect_initdb_optimization: bool,\n        #[cfg(test)]\n        initdb_optimization_count: usize,\n    },\n    Exited,\n}\n\n#[derive(Debug, Copy, Clone, PartialEq, Eq)]\npub enum ImageLayerCreationMode {\n    /// Try to create image layers based on `time_for_new_image_layer`. Used in compaction code path.\n    Try,\n    /// Force creating the image layers if possible. For now, no image layers will be created\n    /// for metadata keys. Used in compaction code path with force flag enabled.\n    Force,\n    /// Initial ingestion of the data, and no data should be dropped in this function. This\n    /// means that no metadata keys should be included in the partitions. Used in flush frozen layer\n    /// code path.\n    Initial,\n}\n\n#[derive(Clone, Debug, Default)]\npub enum LastImageLayerCreationStatus {\n    Incomplete {\n        /// The last key of the partition (exclusive) that was processed in the last\n        /// image layer creation attempt. We will continue from this key in the next\n        /// attempt.\n        last_key: Key,\n    },\n    Complete,\n    #[default]\n    Initial,\n}\n\nimpl std::fmt::Display for ImageLayerCreationMode {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        write!(f, \"{self:?}\")\n    }\n}\n\n/// Temporary function for immutable storage state refactor, ensures we are dropping mutex guard instead of other things.\n/// Can be removed after all refactors are done.\nfn drop_layer_manager_rlock(rlock: LayerManagerReadGuard<'_>) {\n    drop(rlock)\n}\n\n/// Temporary function for immutable storage state refactor, ensures we are dropping mutex guard instead of other things.\n/// Can be removed after all refactors are done.\nfn drop_layer_manager_wlock(rlock: LayerManagerWriteGuard<'_>) {\n    drop(rlock)\n}\n\n/// The outward-facing resources required to build a Timeline\npub struct TimelineResources {\n    pub remote_client: RemoteTimelineClient,\n    pub pagestream_throttle: Arc<crate::tenant::throttle::Throttle>,\n    pub pagestream_throttle_metrics: Arc<crate::metrics::tenant_throttling::Pagestream>,\n    pub l0_compaction_trigger: Arc<Notify>,\n    pub l0_flush_global_state: l0_flush::L0FlushGlobalState,\n    pub basebackup_cache: Arc<BasebackupCache>,\n    pub feature_resolver: Arc<TenantFeatureResolver>,\n}\n\npub struct Timeline {\n    pub(crate) conf: &'static PageServerConf,\n    tenant_conf: Arc<ArcSwap<AttachedTenantConf>>,\n\n    myself: Weak<Self>,\n\n    pub(crate) tenant_shard_id: TenantShardId,\n    pub timeline_id: TimelineId,\n\n    /// The generation of the tenant that instantiated us: this is used for safety when writing remote objects.\n    /// Never changes for the lifetime of this [`Timeline`] object.\n    ///\n    /// This duplicates the generation stored in LocationConf, but that structure is mutable:\n    /// this copy enforces the invariant that generatio doesn't change during a Tenant's lifetime.\n    pub(crate) generation: Generation,\n\n    /// The detailed sharding information from our parent Tenant.  This enables us to map keys\n    /// to shards, and is constant through the lifetime of this Timeline.\n    shard_identity: ShardIdentity,\n\n    pub pg_version: PgMajorVersion,\n\n    /// The tuple has two elements.\n    /// 1. `LayerFileManager` keeps track of the various physical representations of the layer files (inmem, local, remote).\n    /// 2. `LayerMap`, the acceleration data structure for `get_reconstruct_data`.\n    ///\n    /// `LayerMap` maps out the `(PAGE,LSN) / (KEY,LSN)` space, which is composed of `(KeyRange, LsnRange)` rectangles.\n    /// We describe these rectangles through the `PersistentLayerDesc` struct.\n    ///\n    /// When we want to reconstruct a page, we first find the `PersistentLayerDesc`'s that we need for page reconstruction,\n    /// using `LayerMap`. Then, we use `LayerFileManager` to get the `PersistentLayer`'s that correspond to the\n    /// `PersistentLayerDesc`'s.\n    ///\n    /// Hence, it's important to keep things coherent. The `LayerFileManager` must always have an entry for all\n    /// `PersistentLayerDesc`'s in the `LayerMap`. If it doesn't, `LayerFileManager::get_from_desc` will panic at\n    /// runtime, e.g., during page reconstruction.\n    ///\n    /// In the future, we'll be able to split up the tuple of LayerMap and `LayerFileManager`,\n    /// so that e.g. on-demand-download/eviction, and layer spreading, can operate just on `LayerFileManager`.\n    pub(crate) layers: LockedLayerManager,\n\n    last_freeze_at: AtomicLsn,\n    // Atomic would be more appropriate here.\n    last_freeze_ts: RwLock<Instant>,\n\n    pub(crate) standby_horizon: AtomicLsn,\n\n    // WAL redo manager. `None` only for broken tenants.\n    walredo_mgr: Option<Arc<super::WalRedoManager>>,\n\n    /// Remote storage client.\n    /// See [`remote_timeline_client`](super::remote_timeline_client) module comment for details.\n    pub(crate) remote_client: Arc<RemoteTimelineClient>,\n\n    // What page versions do we hold in the repository? If we get a\n    // request > last_record_lsn, we need to wait until we receive all\n    // the WAL up to the request. The SeqWait provides functions for\n    // that. TODO: If we get a request for an old LSN, such that the\n    // versions have already been garbage collected away, we should\n    // throw an error, but we don't track that currently.\n    //\n    // last_record_lsn.load().last points to the end of last processed WAL record.\n    //\n    // We also remember the starting point of the previous record in\n    // 'last_record_lsn.load().prev'. It's used to set the xl_prev pointer of the\n    // first WAL record when the node is started up. But here, we just\n    // keep track of it.\n    last_record_lsn: SeqWait<RecordLsn, Lsn>,\n\n    // All WAL records have been processed and stored durably on files on\n    // local disk, up to this LSN. On crash and restart, we need to re-process\n    // the WAL starting from this point.\n    //\n    // Some later WAL records might have been processed and also flushed to disk\n    // already, so don't be surprised to see some, but there's no guarantee on\n    // them yet.\n    disk_consistent_lsn: AtomicLsn,\n\n    // Parent timeline that this timeline was branched from, and the LSN\n    // of the branch point.\n    ancestor_timeline: Option<Arc<Timeline>>,\n    ancestor_lsn: Lsn,\n\n    // The LSN of gc-compaction that was last applied to this timeline.\n    gc_compaction_state: ArcSwapOption<GcCompactionState>,\n\n    pub(crate) metrics: Arc<TimelineMetrics>,\n\n    // `Timeline` doesn't write these metrics itself, but it manages the lifetime.  Code\n    // in `crate::page_service` writes these metrics.\n    pub(crate) query_metrics: crate::metrics::SmgrQueryTimePerTimeline,\n\n    directory_metrics_inited: [AtomicBool; DirectoryKind::KINDS_NUM],\n    directory_metrics: [AtomicU64; DirectoryKind::KINDS_NUM],\n\n    /// Ensures layers aren't frozen by checkpointer between\n    /// [`Timeline::get_layer_for_write`] and layer reads.\n    /// Locked automatically by [`TimelineWriter`] and checkpointer.\n    /// Must always be acquired before the layer map/individual layer lock\n    /// to avoid deadlock.\n    ///\n    /// The state is cleared upon freezing.\n    write_lock: tokio::sync::Mutex<Option<TimelineWriterState>>,\n\n    /// Used to avoid multiple `flush_loop` tasks running\n    pub(super) flush_loop_state: Mutex<FlushLoopState>,\n\n    /// layer_flush_start_tx can be used to wake up the layer-flushing task.\n    /// - The u64 value is a counter, incremented every time a new flush cycle is requested.\n    ///   The flush cycle counter is sent back on the layer_flush_done channel when\n    ///   the flush finishes. You can use that to wait for the flush to finish.\n    /// - The LSN is updated to max() of its current value and the latest disk_consistent_lsn\n    ///   read by whoever sends an update\n    layer_flush_start_tx: tokio::sync::watch::Sender<(u64, Lsn)>,\n    /// to be notified when layer flushing has finished, subscribe to the layer_flush_done channel\n    layer_flush_done_tx: tokio::sync::watch::Sender<(u64, Result<(), FlushLayerError>)>,\n\n    // The LSN at which we have executed GC: whereas [`Self::gc_info`] records the LSN at which\n    // we _intend_ to GC (i.e. the PITR cutoff), this LSN records where we actually last did it.\n    // Because PITR interval is mutable, it's possible for this LSN to be earlier or later than\n    // the planned GC cutoff.\n    pub applied_gc_cutoff_lsn: Rcu<Lsn>,\n\n    pub(crate) gc_compaction_layer_update_lock: tokio::sync::RwLock<()>,\n\n    // List of child timelines and their branch points. This is needed to avoid\n    // garbage collecting data that is still needed by the child timelines.\n    pub(crate) gc_info: std::sync::RwLock<GcInfo>,\n\n    pub(crate) last_image_layer_creation_status: ArcSwap<LastImageLayerCreationStatus>,\n\n    // It may change across major versions so for simplicity\n    // keep it after running initdb for a timeline.\n    // It is needed in checks when we want to error on some operations\n    // when they are requested for pre-initdb lsn.\n    // It can be unified with latest_gc_cutoff_lsn under some \"first_valid_lsn\",\n    // though let's keep them both for better error visibility.\n    pub initdb_lsn: Lsn,\n\n    /// The repartitioning result. Allows a single writer and multiple readers.\n    pub(crate) partitioning: GuardArcSwap<((KeyPartitioning, SparseKeyPartitioning), Lsn)>,\n\n    /// Configuration: how often should the partitioning be recalculated.\n    repartition_threshold: u64,\n\n    last_image_layer_creation_check_at: AtomicLsn,\n    last_image_layer_creation_check_instant: std::sync::Mutex<Option<Instant>>,\n\n    /// Current logical size of the \"datadir\", at the last LSN.\n    current_logical_size: LogicalSize,\n\n    /// Information about the last processed message by the WAL receiver,\n    /// or None if WAL receiver has not received anything for this timeline\n    /// yet.\n    pub last_received_wal: Mutex<Option<WalReceiverInfo>>,\n    pub walreceiver: Mutex<Option<WalReceiver>>,\n\n    /// Relation size cache\n    pub(crate) rel_size_latest_cache: RwLock<HashMap<RelTag, (Lsn, BlockNumber)>>,\n    pub(crate) rel_size_snapshot_cache: Mutex<LruCache<(Lsn, RelTag), BlockNumber>>,\n\n    download_all_remote_layers_task_info: RwLock<Option<DownloadRemoteLayersTaskInfo>>,\n\n    state: watch::Sender<TimelineState>,\n\n    /// Prevent two tasks from deleting the timeline at the same time. If held, the\n    /// timeline is being deleted. If 'true', the timeline has already been deleted.\n    pub delete_progress: TimelineDeleteProgress,\n\n    eviction_task_timeline_state: tokio::sync::Mutex<EvictionTaskTimelineState>,\n\n    /// Load or creation time information about the disk_consistent_lsn and when the loading\n    /// happened. Used for consumption metrics.\n    pub(crate) loaded_at: (Lsn, SystemTime),\n\n    /// Gate to prevent shutdown completing while I/O is still happening to this timeline's data\n    pub(crate) gate: Gate,\n\n    /// Cancellation token scoped to this timeline: anything doing long-running work relating\n    /// to the timeline should drop out when this token fires.\n    pub(crate) cancel: CancellationToken,\n\n    /// Make sure we only have one running compaction at a time in tests.\n    ///\n    /// Must only be taken in two places:\n    /// - [`Timeline::compact`] (this file)\n    /// - [`delete::delete_local_timeline_directory`]\n    ///\n    /// Timeline deletion will acquire both compaction and gc locks in whatever order.\n    compaction_lock: tokio::sync::Mutex<()>,\n\n    /// If true, the last compaction failed.\n    compaction_failed: AtomicBool,\n\n    /// Begin Hadron: If true, the pageserver has likely detected data corruption in the timeline.\n    /// We need to feed this information back to the Safekeeper and postgres for them to take the\n    /// appropriate action.\n    corruption_detected: AtomicBool,\n\n    /// Notifies the tenant compaction loop that there is pending L0 compaction work.\n    l0_compaction_trigger: Arc<Notify>,\n\n    /// Make sure we only have one running gc at a time.\n    ///\n    /// Must only be taken in two places:\n    /// - [`Timeline::gc`] (this file)\n    /// - [`delete::delete_local_timeline_directory`]\n    ///\n    /// Timeline deletion will acquire both compaction and gc locks in whatever order.\n    gc_lock: tokio::sync::Mutex<()>,\n\n    /// Cloned from [`super::TenantShard::pagestream_throttle`] on construction.\n    pub(crate) pagestream_throttle: Arc<crate::tenant::throttle::Throttle>,\n\n    /// Size estimator for aux file v2\n    pub(crate) aux_file_size_estimator: AuxFileSizeEstimator,\n\n    /// Some test cases directly place keys into the timeline without actually modifying the directory\n    /// keys (i.e., DB_DIR). The test cases creating such keys will put the keyspaces here, so that\n    /// these keys won't get garbage-collected during compaction/GC. This field only modifies the dense\n    /// keyspace return value of `collect_keyspace`. For sparse keyspaces, use AUX keys for testing, and\n    /// in the future, add `extra_test_sparse_keyspace` if necessary.\n    #[cfg(test)]\n    pub(crate) extra_test_dense_keyspace: ArcSwap<KeySpace>,\n\n    pub(crate) l0_flush_global_state: L0FlushGlobalState,\n\n    pub(crate) handles: handle::PerTimelineState<TenantManagerTypes>,\n\n    pub(crate) attach_wal_lag_cooldown: Arc<OnceLock<WalLagCooldown>>,\n\n    /// Cf. [`crate::tenant::CreateTimelineIdempotency`].\n    pub(crate) create_idempotency: crate::tenant::CreateTimelineIdempotency,\n\n    /// If Some, collects GetPage metadata for an ongoing PageTrace.\n    pub(crate) page_trace: ArcSwapOption<Sender<PageTraceEvent>>,\n\n    pub(super) previous_heatmap: ArcSwapOption<PreviousHeatmap>,\n\n    /// May host a background Tokio task which downloads all the layers from the current\n    /// heatmap on demand.\n    heatmap_layers_downloader: Mutex<Option<heatmap_layers_downloader::HeatmapLayersDownloader>>,\n\n    pub(crate) rel_size_v2_status: ArcSwap<(Option<RelSizeMigration>, Option<Lsn>)>,\n\n    wait_lsn_log_slow: tokio::sync::Semaphore,\n\n    /// A channel to send async requests to prepare a basebackup for the basebackup cache.\n    basebackup_cache: Arc<BasebackupCache>,\n\n    #[expect(dead_code)]\n    feature_resolver: Arc<TenantFeatureResolver>,\n\n    /// Basebackup will collect the count and store it here. Used for reldirv2 rollout.\n    pub(crate) db_rel_count: ArcSwapOption<(usize, usize)>,\n}\n\npub(crate) enum PreviousHeatmap {\n    Active {\n        heatmap: HeatMapTimeline,\n        read_at: std::time::Instant,\n        // End LSN covered by the heatmap if known\n        end_lsn: Option<Lsn>,\n    },\n    Obsolete,\n}\n\npub type TimelineDeleteProgress = Arc<tokio::sync::Mutex<DeleteTimelineFlow>>;\n\npub struct WalReceiverInfo {\n    pub wal_source_connconf: PgConnectionConfig,\n    pub last_received_msg_lsn: Lsn,\n    pub last_received_msg_ts: u128,\n}\n\n/// Information about how much history needs to be retained, needed by\n/// Garbage Collection.\n#[derive(Default)]\npub(crate) struct GcInfo {\n    /// Specific LSNs that are needed.\n    ///\n    /// Currently, this includes all points where child branches have\n    /// been forked off from. In the future, could also include\n    /// explicit user-defined snapshot points.\n    pub(crate) retain_lsns: Vec<(Lsn, TimelineId, MaybeOffloaded)>,\n\n    /// The cutoff coordinates, which are combined by selecting the minimum.\n    pub(crate) cutoffs: GcCutoffs,\n\n    /// Leases granted to particular LSNs.\n    pub(crate) leases: BTreeMap<Lsn, LsnLease>,\n\n    /// Whether our branch point is within our ancestor's PITR interval (for cost estimation)\n    pub(crate) within_ancestor_pitr: bool,\n}\n\nimpl GcInfo {\n    pub(crate) fn min_cutoff(&self) -> Lsn {\n        self.cutoffs.select_min()\n    }\n\n    pub(super) fn insert_child(\n        &mut self,\n        child_id: TimelineId,\n        child_lsn: Lsn,\n        is_offloaded: MaybeOffloaded,\n    ) {\n        self.retain_lsns.push((child_lsn, child_id, is_offloaded));\n        self.retain_lsns.sort_by_key(|i| i.0);\n    }\n\n    pub(super) fn remove_child_maybe_offloaded(\n        &mut self,\n        child_id: TimelineId,\n        maybe_offloaded: MaybeOffloaded,\n    ) -> bool {\n        // Remove at most one element. Needed for correctness if there is two live `Timeline` objects referencing\n        // the same timeline. Shouldn't but maybe can occur when Arc's live longer than intended.\n        let mut removed = false;\n        self.retain_lsns.retain(|i| {\n            if removed {\n                return true;\n            }\n            let remove = i.1 == child_id && i.2 == maybe_offloaded;\n            removed |= remove;\n            !remove\n        });\n        removed\n    }\n\n    pub(super) fn remove_child_not_offloaded(&mut self, child_id: TimelineId) -> bool {\n        self.remove_child_maybe_offloaded(child_id, MaybeOffloaded::No)\n    }\n\n    pub(super) fn remove_child_offloaded(&mut self, child_id: TimelineId) -> bool {\n        self.remove_child_maybe_offloaded(child_id, MaybeOffloaded::Yes)\n    }\n    pub(crate) fn lsn_covered_by_lease(&self, lsn: Lsn) -> bool {\n        self.leases.contains_key(&lsn)\n    }\n}\n\n/// The `GcInfo` component describing which Lsns need to be retained.  Functionally, this\n/// is a single number (the oldest LSN which we must retain), but it internally distinguishes\n/// between time-based and space-based retention for observability and consumption metrics purposes.\n#[derive(Clone, Debug, Default)]\npub(crate) struct GcCutoffs {\n    /// Calculated from the [`pageserver_api::models::TenantConfig::gc_horizon`], this LSN indicates how much\n    /// history we must keep to retain a specified number of bytes of WAL.\n    pub(crate) space: Lsn,\n\n    /// Calculated from [`pageserver_api::models::TenantConfig::pitr_interval`], this LSN indicates\n    /// how much history we must keep to enable reading back at least the PITR interval duration.\n    ///\n    /// None indicates that the PITR cutoff has not been computed. A PITR interval of 0 will yield\n    /// Some(last_record_lsn).\n    pub(crate) time: Option<Lsn>,\n}\n\nimpl GcCutoffs {\n    fn select_min(&self) -> Lsn {\n        // NB: if we haven't computed the PITR cutoff yet, we can't GC anything.\n        self.space.min(self.time.unwrap_or_default())\n    }\n}\n\npub(crate) struct TimelineVisitOutcome {\n    completed_keyspace: KeySpace,\n    image_covered_keyspace: KeySpace,\n}\n\n/// An error happened in a get() operation.\n#[derive(thiserror::Error, Debug)]\npub(crate) enum PageReconstructError {\n    #[error(transparent)]\n    Other(anyhow::Error),\n\n    #[error(\"Ancestor LSN wait error: {0}\")]\n    AncestorLsnTimeout(WaitLsnError),\n\n    #[error(\"timeline shutting down\")]\n    Cancelled,\n\n    /// An error happened replaying WAL records\n    #[error(transparent)]\n    WalRedo(anyhow::Error),\n\n    #[error(\"{0}\")]\n    MissingKey(Box<MissingKeyError>),\n}\n\nimpl PageReconstructError {\n    pub(crate) fn is_cancel(&self) -> bool {\n        match self {\n            PageReconstructError::Other(_) => false,\n            PageReconstructError::AncestorLsnTimeout(e) => e.is_cancel(),\n            PageReconstructError::Cancelled => true,\n            PageReconstructError::WalRedo(_) => false,\n            PageReconstructError::MissingKey(_) => false,\n        }\n    }\n    #[allow(dead_code)] // we use the is_cancel + into_anyhow pattern in quite a few places, this one will follow soon enough\n    pub(crate) fn into_anyhow(self) -> anyhow::Error {\n        match self {\n            PageReconstructError::Other(e) => e,\n            PageReconstructError::AncestorLsnTimeout(e) => e.into_anyhow(),\n            PageReconstructError::Cancelled => anyhow::Error::new(self),\n            PageReconstructError::WalRedo(e) => e,\n            PageReconstructError::MissingKey(_) => anyhow::Error::new(self),\n        }\n    }\n}\n\nimpl From<anyhow::Error> for PageReconstructError {\n    fn from(value: anyhow::Error) -> Self {\n        // with walingest.rs many PageReconstructError are wrapped in as anyhow::Error\n        match value.downcast::<PageReconstructError>() {\n            Ok(pre) => pre,\n            Err(other) => PageReconstructError::Other(other),\n        }\n    }\n}\n\nimpl From<utils::bin_ser::DeserializeError> for PageReconstructError {\n    fn from(value: utils::bin_ser::DeserializeError) -> Self {\n        PageReconstructError::Other(anyhow::Error::new(value).context(\"deserialization failure\"))\n    }\n}\n\nimpl From<layer_manager::Shutdown> for PageReconstructError {\n    fn from(_: layer_manager::Shutdown) -> Self {\n        PageReconstructError::Cancelled\n    }\n}\n\nimpl GetVectoredError {\n    #[cfg(test)]\n    pub(crate) fn is_missing_key_error(&self) -> bool {\n        matches!(self, Self::MissingKey(_))\n    }\n}\n\nimpl From<layer_manager::Shutdown> for GetVectoredError {\n    fn from(_: layer_manager::Shutdown) -> Self {\n        GetVectoredError::Cancelled\n    }\n}\n\n/// A layer identifier when used in the [`ReadPath`] structure. This enum is for observability purposes\n/// only and not used by the \"real read path\".\npub enum ReadPathLayerId {\n    PersistentLayer(PersistentLayerKey),\n    InMemoryLayer(Range<Lsn>),\n}\n\nimpl std::fmt::Display for ReadPathLayerId {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        match self {\n            ReadPathLayerId::PersistentLayer(key) => write!(f, \"{key}\"),\n            ReadPathLayerId::InMemoryLayer(range) => {\n                write!(f, \"in-mem {}..{}\", range.start, range.end)\n            }\n        }\n    }\n}\npub struct ReadPath {\n    keyspace: KeySpace,\n    lsn: Lsn,\n    path: Vec<(ReadPathLayerId, KeySpace, Range<Lsn>)>,\n}\n\nimpl ReadPath {\n    pub fn new(keyspace: KeySpace, lsn: Lsn) -> Self {\n        Self {\n            keyspace,\n            lsn,\n            path: Vec::new(),\n        }\n    }\n\n    pub fn record_layer_visit(\n        &mut self,\n        layer_to_read: &ReadableLayer,\n        keyspace_to_read: &KeySpace,\n        lsn_range: &Range<Lsn>,\n    ) {\n        let id = match layer_to_read {\n            ReadableLayer::PersistentLayer(layer) => {\n                ReadPathLayerId::PersistentLayer(layer.layer_desc().key())\n            }\n            ReadableLayer::InMemoryLayer(layer) => {\n                ReadPathLayerId::InMemoryLayer(layer.get_lsn_range())\n            }\n        };\n        self.path\n            .push((id, keyspace_to_read.clone(), lsn_range.clone()));\n    }\n}\n\nimpl std::fmt::Display for ReadPath {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        writeln!(f, \"Read path for {} at lsn {}:\", self.keyspace, self.lsn)?;\n        for (idx, (layer_id, keyspace, lsn_range)) in self.path.iter().enumerate() {\n            writeln!(\n                f,\n                \"{}: {} {}..{} {}\",\n                idx, layer_id, lsn_range.start, lsn_range.end, keyspace\n            )?;\n        }\n        Ok(())\n    }\n}\n\n#[derive(thiserror::Error)]\npub struct MissingKeyError {\n    keyspace: KeySpace,\n    shard: ShardNumber,\n    query: Option<VersionedKeySpaceQuery>,\n    // This is largest request LSN from the get page request batch\n    original_hwm_lsn: Lsn,\n    ancestor_lsn: Option<Lsn>,\n    /// Debug information about the read path if there's an error\n    read_path: Option<ReadPath>,\n    backtrace: Option<std::backtrace::Backtrace>,\n}\n\nimpl MissingKeyError {\n    fn enrich(&mut self, query: VersionedKeySpaceQuery) {\n        self.query = Some(query);\n    }\n}\n\nimpl std::fmt::Debug for MissingKeyError {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        write!(f, \"{self}\")\n    }\n}\n\nimpl std::fmt::Display for MissingKeyError {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        write!(\n            f,\n            \"could not find data for key {} (shard {:?}), original HWM LSN {}\",\n            self.keyspace, self.shard, self.original_hwm_lsn\n        )?;\n\n        if let Some(ref ancestor_lsn) = self.ancestor_lsn {\n            write!(f, \", ancestor {ancestor_lsn}\")?;\n        }\n\n        if let Some(ref query) = self.query {\n            write!(f, \", query {query}\")?;\n        }\n\n        if let Some(ref read_path) = self.read_path {\n            write!(f, \"\\n{read_path}\")?;\n        }\n\n        if let Some(ref backtrace) = self.backtrace {\n            write!(f, \"\\n{backtrace}\")?;\n        }\n\n        Ok(())\n    }\n}\n\n#[derive(thiserror::Error, Debug)]\npub(crate) enum CreateImageLayersError {\n    #[error(\"timeline shutting down\")]\n    Cancelled,\n\n    #[error(\"read failed\")]\n    GetVectoredError(#[source] GetVectoredError),\n\n    #[error(\"reconstruction failed\")]\n    PageReconstructError(#[source] PageReconstructError),\n\n    #[error(transparent)]\n    Other(anyhow::Error),\n}\n\nimpl From<layer_manager::Shutdown> for CreateImageLayersError {\n    fn from(_: layer_manager::Shutdown) -> Self {\n        CreateImageLayersError::Cancelled\n    }\n}\n\n#[derive(thiserror::Error, Debug, Clone)]\npub(crate) enum FlushLayerError {\n    /// Timeline cancellation token was cancelled\n    #[error(\"timeline shutting down\")]\n    Cancelled,\n\n    /// We tried to flush a layer while the Timeline is in an unexpected state\n    #[error(\"cannot flush frozen layers when flush_loop is not running, state is {0:?}\")]\n    NotRunning(FlushLoopState),\n\n    // Arc<> the following non-clonable error types: we must be Clone-able because the flush error is propagated from the flush\n    // loop via a watch channel, where we can only borrow it.\n    #[error(\"create image layers (shared)\")]\n    CreateImageLayersError(Arc<CreateImageLayersError>),\n\n    #[error(\"other (shared)\")]\n    Other(#[from] Arc<anyhow::Error>),\n}\n\nimpl FlushLayerError {\n    // When crossing from generic anyhow errors to this error type, we explicitly check\n    // for timeline cancellation to avoid logging inoffensive shutdown errors as warn/err.\n    fn from_anyhow(timeline: &Timeline, err: anyhow::Error) -> Self {\n        let cancelled = timeline.cancel.is_cancelled()\n            // The upload queue might have been shut down before the official cancellation of the timeline.\n            || err\n                .downcast_ref::<NotInitialized>()\n                .map(NotInitialized::is_stopping)\n                .unwrap_or_default();\n        if cancelled {\n            Self::Cancelled\n        } else {\n            Self::Other(Arc::new(err))\n        }\n    }\n}\n\nimpl From<layer_manager::Shutdown> for FlushLayerError {\n    fn from(_: layer_manager::Shutdown) -> Self {\n        FlushLayerError::Cancelled\n    }\n}\n\n#[derive(thiserror::Error, Debug)]\npub enum GetVectoredError {\n    #[error(\"timeline shutting down\")]\n    Cancelled,\n\n    #[error(\"requested too many keys: {0} > {1}\")]\n    Oversized(u64, u64),\n\n    #[error(\"requested at invalid LSN: {0}\")]\n    InvalidLsn(Lsn),\n\n    #[error(\"requested key not found: {0}\")]\n    MissingKey(Box<MissingKeyError>),\n\n    #[error(\"ancestry walk\")]\n    GetReadyAncestorError(#[source] GetReadyAncestorError),\n\n    #[error(transparent)]\n    Other(#[from] anyhow::Error),\n}\n\nimpl From<GetReadyAncestorError> for GetVectoredError {\n    fn from(value: GetReadyAncestorError) -> Self {\n        use GetReadyAncestorError::*;\n        match value {\n            Cancelled => GetVectoredError::Cancelled,\n            AncestorLsnTimeout(_) | BadState { .. } => {\n                GetVectoredError::GetReadyAncestorError(value)\n            }\n        }\n    }\n}\n\n#[derive(thiserror::Error, Debug)]\npub enum GetReadyAncestorError {\n    #[error(\"ancestor LSN wait error\")]\n    AncestorLsnTimeout(#[from] WaitLsnError),\n\n    #[error(\"bad state on timeline {timeline_id}: {state:?}\")]\n    BadState {\n        timeline_id: TimelineId,\n        state: TimelineState,\n    },\n\n    #[error(\"cancelled\")]\n    Cancelled,\n}\n\n#[derive(Clone, Copy)]\npub enum LogicalSizeCalculationCause {\n    Initial,\n    ConsumptionMetricsSyntheticSize,\n    EvictionTaskImitation,\n    TenantSizeHandler,\n}\n\npub enum GetLogicalSizePriority {\n    User,\n    Background,\n}\n\n#[derive(Debug, enumset::EnumSetType)]\npub(crate) enum CompactFlags {\n    ForceRepartition,\n    ForceImageLayerCreation,\n    ForceL0Compaction,\n    OnlyL0Compaction,\n    EnhancedGcBottomMostCompaction,\n    DryRun,\n    /// Makes image compaction yield if there's pending L0 compaction. This should always be used in\n    /// the background compaction task, since we want to aggressively compact down L0 to bound\n    /// read amplification.\n    ///\n    /// It only makes sense to use this when `compaction_l0_first` is enabled (such that we yield to\n    /// an L0 compaction pass), and without `OnlyL0Compaction` (L0 compaction shouldn't yield for L0\n    /// compaction).\n    YieldForL0,\n}\n\n#[serde_with::serde_as]\n#[derive(Debug, Clone, serde::Deserialize)]\npub(crate) struct CompactRequest {\n    pub compact_key_range: Option<CompactKeyRange>,\n    pub compact_lsn_range: Option<CompactLsnRange>,\n    /// Whether the compaction job should be scheduled.\n    #[serde(default)]\n    pub scheduled: bool,\n    /// Whether the compaction job should be split across key ranges.\n    #[serde(default)]\n    pub sub_compaction: bool,\n    /// Max job size for each subcompaction job.\n    pub sub_compaction_max_job_size_mb: Option<u64>,\n}\n\n#[derive(Debug, Clone, serde::Deserialize)]\npub(crate) struct MarkInvisibleRequest {\n    #[serde(default)]\n    pub is_visible: Option<bool>,\n}\n\n#[derive(Debug, Clone, Default)]\npub(crate) struct CompactOptions {\n    pub flags: EnumSet<CompactFlags>,\n    /// If set, the compaction will only compact the key range specified by this option.\n    /// This option is only used by GC compaction. For the full explanation, see [`compaction::GcCompactJob`].\n    pub compact_key_range: Option<CompactKeyRange>,\n    /// If set, the compaction will only compact the LSN within this value.\n    /// This option is only used by GC compaction. For the full explanation, see [`compaction::GcCompactJob`].\n    pub compact_lsn_range: Option<CompactLsnRange>,\n    /// Enable sub-compaction (split compaction job across key ranges).\n    /// This option is only used by GC compaction.\n    pub sub_compaction: bool,\n    /// Set job size for the GC compaction.\n    /// This option is only used by GC compaction.\n    pub sub_compaction_max_job_size_mb: Option<u64>,\n    /// Only for GC compaction.\n    /// If set, the compaction will compact the metadata layers. Should be only set to true in unit tests\n    /// because metadata compaction is not fully supported yet.\n    pub gc_compaction_do_metadata_compaction: bool,\n}\n\nimpl CompactOptions {\n    #[cfg(test)]\n    pub fn default_for_gc_compaction_unit_tests() -> Self {\n        Self {\n            gc_compaction_do_metadata_compaction: true,\n            ..Default::default()\n        }\n    }\n}\n\nimpl std::fmt::Debug for Timeline {\n    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {\n        write!(f, \"Timeline<{}>\", self.timeline_id)\n    }\n}\n\n#[derive(thiserror::Error, Debug, Clone)]\npub enum WaitLsnError {\n    // Called on a timeline which is shutting down\n    #[error(\"Shutdown\")]\n    Shutdown,\n\n    // Called on an timeline not in active state or shutting down\n    #[error(\"Bad timeline state: {0:?}\")]\n    BadState(TimelineState),\n\n    // Timeout expired while waiting for LSN to catch up with goal.\n    #[error(\"{0}\")]\n    Timeout(String),\n}\n\nimpl WaitLsnError {\n    pub(crate) fn is_cancel(&self) -> bool {\n        match self {\n            WaitLsnError::Shutdown => true,\n            WaitLsnError::BadState(timeline_state) => match timeline_state {\n                TimelineState::Loading => false,\n                TimelineState::Active => false,\n                TimelineState::Stopping => true,\n                TimelineState::Broken { .. } => false,\n            },\n            WaitLsnError::Timeout(_) => false,\n        }\n    }\n    pub(crate) fn into_anyhow(self) -> anyhow::Error {\n        match self {\n            WaitLsnError::Shutdown => anyhow::Error::new(self),\n            WaitLsnError::BadState(_) => anyhow::Error::new(self),\n            WaitLsnError::Timeout(_) => anyhow::Error::new(self),\n        }\n    }\n}\n\nimpl From<WaitLsnError> for tonic::Status {\n    fn from(err: WaitLsnError) -> Self {\n        use tonic::Code;\n        let code = if err.is_cancel() {\n            Code::Unavailable\n        } else {\n            Code::Internal\n        };\n        tonic::Status::new(code, err.to_string())\n    }\n}\n\n// The impls below achieve cancellation mapping for errors.\n// Perhaps there's a way of achieving this with less cruft.\n\nimpl From<CreateImageLayersError> for CompactionError {\n    fn from(e: CreateImageLayersError) -> Self {\n        match e {\n            CreateImageLayersError::Cancelled => CompactionError::new_cancelled(),\n            CreateImageLayersError::Other(e) => {\n                CompactionError::Other(e.context(\"create image layers\"))\n            }\n            _ => CompactionError::Other(e.into()),\n        }\n    }\n}\n\nimpl From<CreateImageLayersError> for FlushLayerError {\n    fn from(e: CreateImageLayersError) -> Self {\n        match e {\n            CreateImageLayersError::Cancelled => FlushLayerError::Cancelled,\n            any => FlushLayerError::CreateImageLayersError(Arc::new(any)),\n        }\n    }\n}\n\nimpl From<PageReconstructError> for CreateImageLayersError {\n    fn from(e: PageReconstructError) -> Self {\n        match e {\n            PageReconstructError::Cancelled => CreateImageLayersError::Cancelled,\n            _ => CreateImageLayersError::PageReconstructError(e),\n        }\n    }\n}\n\nimpl From<super::storage_layer::errors::PutError> for CreateImageLayersError {\n    fn from(e: super::storage_layer::errors::PutError) -> Self {\n        if e.is_cancel() {\n            CreateImageLayersError::Cancelled\n        } else {\n            CreateImageLayersError::Other(e.into_anyhow())\n        }\n    }\n}\n\nimpl From<GetVectoredError> for CreateImageLayersError {\n    fn from(e: GetVectoredError) -> Self {\n        match e {\n            GetVectoredError::Cancelled => CreateImageLayersError::Cancelled,\n            _ => CreateImageLayersError::GetVectoredError(e),\n        }\n    }\n}\n\nimpl From<GetVectoredError> for PageReconstructError {\n    fn from(e: GetVectoredError) -> Self {\n        match e {\n            GetVectoredError::Cancelled => PageReconstructError::Cancelled,\n            GetVectoredError::InvalidLsn(_) => PageReconstructError::Other(anyhow!(\"Invalid LSN\")),\n            err @ GetVectoredError::Oversized(_, _) => PageReconstructError::Other(err.into()),\n            GetVectoredError::MissingKey(err) => PageReconstructError::MissingKey(err),\n            GetVectoredError::GetReadyAncestorError(err) => PageReconstructError::from(err),\n            GetVectoredError::Other(err) => PageReconstructError::Other(err),\n        }\n    }\n}\n\nimpl From<GetReadyAncestorError> for PageReconstructError {\n    fn from(e: GetReadyAncestorError) -> Self {\n        use GetReadyAncestorError::*;\n        match e {\n            AncestorLsnTimeout(wait_err) => PageReconstructError::AncestorLsnTimeout(wait_err),\n            bad_state @ BadState { .. } => PageReconstructError::Other(anyhow::anyhow!(bad_state)),\n            Cancelled => PageReconstructError::Cancelled,\n        }\n    }\n}\n\npub(crate) enum WaitLsnTimeout {\n    Custom(Duration),\n    // Use the [`PageServerConf::wait_lsn_timeout`] default\n    Default,\n}\n\npub(crate) enum WaitLsnWaiter<'a> {\n    Timeline(&'a Timeline),\n    Tenant,\n    PageService,\n    HttpEndpoint,\n    BaseBackupCache,\n}\n\n/// Argument to [`Timeline::shutdown`].\n#[derive(Debug, Clone, Copy)]\npub(crate) enum ShutdownMode {\n    /// Graceful shutdown, may do a lot of I/O as we flush any open layers to disk. This method can\n    /// take multiple seconds for a busy timeline.\n    ///\n    /// While we are flushing, we continue to accept read I/O for LSNs ingested before\n    /// the call to [`Timeline::shutdown`].\n    FreezeAndFlush,\n    /// Only flush the layers to the remote storage without freezing any open layers. Flush the deletion\n    /// queue. This is the mode used by ancestor detach and any other operations that reloads a tenant\n    /// but not increasing the generation number. Note that this mode cannot be used at tenant shutdown,\n    /// as flushing the deletion queue at that time will cause shutdown-in-progress errors.\n    Reload,\n    /// Shut down immediately, without waiting for any open layers to flush.\n    Hard,\n}\n\n#[allow(clippy::large_enum_variant, reason = \"TODO\")]\nenum ImageLayerCreationOutcome {\n    /// We generated an image layer\n    Generated {\n        unfinished_image_layer: ImageLayerWriter,\n    },\n    /// The key range is empty\n    Empty,\n    /// (Only used in metadata image layer creation), after reading the metadata keys, we decide to skip\n    /// the image layer creation.\n    Skip,\n}\n\nenum RepartitionError {\n    Other(anyhow::Error),\n    CollectKeyspace(CollectKeySpaceError),\n}\n\nimpl RepartitionError {\n    fn is_cancel(&self) -> bool {\n        match self {\n            RepartitionError::Other(_) => false,\n            RepartitionError::CollectKeyspace(e) => e.is_cancel(),\n        }\n    }\n    fn into_anyhow(self) -> anyhow::Error {\n        match self {\n            RepartitionError::Other(e) => e,\n            RepartitionError::CollectKeyspace(e) => e.into_anyhow(),\n        }\n    }\n}\n\n/// Public interface functions\nimpl Timeline {\n    /// Get the LSN where this branch was created\n    pub(crate) fn get_ancestor_lsn(&self) -> Lsn {\n        self.ancestor_lsn\n    }\n\n    /// Get the ancestor's timeline id\n    pub(crate) fn get_ancestor_timeline_id(&self) -> Option<TimelineId> {\n        self.ancestor_timeline\n            .as_ref()\n            .map(|ancestor| ancestor.timeline_id)\n    }\n\n    /// Get the ancestor timeline\n    pub(crate) fn ancestor_timeline(&self) -> Option<&Arc<Timeline>> {\n        self.ancestor_timeline.as_ref()\n    }\n\n    /// Get the bytes written since the PITR cutoff on this branch, and\n    /// whether this branch's ancestor_lsn is within its parent's PITR.\n    pub(crate) fn get_pitr_history_stats(&self) -> (u64, bool) {\n        // TODO: for backwards compatibility, we return the full history back to 0 when the PITR\n        // cutoff has not yet been initialized. This should return None instead, but this is exposed\n        // in external HTTP APIs and callers may not handle a null value.\n        let gc_info = self.gc_info.read().unwrap();\n        let history = self\n            .get_last_record_lsn()\n            .checked_sub(gc_info.cutoffs.time.unwrap_or_default())\n            .unwrap_or_default()\n            .0;\n        (history, gc_info.within_ancestor_pitr)\n    }\n\n    /// Read timeline's GC cutoff: this is the LSN at which GC has started to happen\n    pub(crate) fn get_applied_gc_cutoff_lsn(&self) -> RcuReadGuard<Lsn> {\n        self.applied_gc_cutoff_lsn.read()\n    }\n\n    /// Read timeline's planned GC cutoff: this is the logical end of history that users are allowed\n    /// to read (based on configured PITR), even if physically we have more history. Returns None\n    /// if the PITR cutoff has not yet been initialized.\n    pub(crate) fn get_gc_cutoff_lsn(&self) -> Option<Lsn> {\n        self.gc_info.read().unwrap().cutoffs.time\n    }\n\n    /// Look up given page version.\n    ///\n    /// If a remote layer file is needed, it is downloaded as part of this\n    /// call.\n    ///\n    /// This method enforces [`Self::pagestream_throttle`] internally.\n    ///\n    /// NOTE: It is considered an error to 'get' a key that doesn't exist. The\n    /// abstraction above this needs to store suitable metadata to track what\n    /// data exists with what keys, in separate metadata entries. If a\n    /// non-existent key is requested, we may incorrectly return a value from\n    /// an ancestor branch, for example, or waste a lot of cycles chasing the\n    /// non-existing key.\n    ///\n    /// # Cancel-Safety\n    ///\n    /// This method is cancellation-safe.\n    #[inline(always)]\n    pub(crate) async fn get(\n        &self,\n        key: Key,\n        lsn: Lsn,\n        ctx: &RequestContext,\n    ) -> Result<Bytes, PageReconstructError> {\n        if !lsn.is_valid() {\n            return Err(PageReconstructError::Other(anyhow::anyhow!(\"Invalid LSN\")));\n        }\n\n        // This check is debug-only because of the cost of hashing, and because it's a double-check: we\n        // already checked the key against the shard_identity when looking up the Timeline from\n        // page_service.\n        debug_assert!(!self.shard_identity.is_key_disposable(&key));\n\n        let mut reconstruct_state = ValuesReconstructState::new(IoConcurrency::sequential());\n\n        let query = VersionedKeySpaceQuery::uniform(KeySpace::single(key..key.next()), lsn);\n\n        let vectored_res = self\n            .get_vectored_impl(query, &mut reconstruct_state, ctx)\n            .await;\n\n        let key_value = vectored_res?.pop_first();\n        match key_value {\n            Some((got_key, value)) => {\n                if got_key != key {\n                    error!(\n                        \"Expected {}, but singular vectored get returned {}\",\n                        key, got_key\n                    );\n                    Err(PageReconstructError::Other(anyhow!(\n                        \"Singular vectored get returned wrong key\"\n                    )))\n                } else {\n                    value\n                }\n            }\n            None => Err(PageReconstructError::MissingKey(Box::new(\n                MissingKeyError {\n                    keyspace: KeySpace::single(key..key.next()),\n                    shard: self.shard_identity.get_shard_number(&key),\n                    original_hwm_lsn: lsn,\n                    ancestor_lsn: None,\n                    backtrace: None,\n                    read_path: None,\n                    query: None,\n                },\n            ))),\n        }\n    }\n\n    #[inline(always)]\n    pub(crate) async fn debug_get(\n        &self,\n        key: Key,\n        lsn: Lsn,\n        ctx: &RequestContext,\n        reconstruct_state: &mut ValuesReconstructState,\n    ) -> Result<Bytes, PageReconstructError> {\n        if !lsn.is_valid() {\n            return Err(PageReconstructError::Other(anyhow::anyhow!(\"Invalid LSN\")));\n        }\n\n        // This check is debug-only because of the cost of hashing, and because it's a double-check: we\n        // already checked the key against the shard_identity when looking up the Timeline from\n        // page_service.\n        debug_assert!(!self.shard_identity.is_key_disposable(&key));\n\n        let query = VersionedKeySpaceQuery::uniform(KeySpace::single(key..key.next()), lsn);\n        let vectored_res = self\n            .debug_get_vectored_impl(query, reconstruct_state, ctx)\n            .await;\n\n        let key_value = vectored_res?.pop_first();\n        match key_value {\n            Some((got_key, value)) => {\n                if got_key != key {\n                    error!(\n                        \"Expected {}, but singular vectored get returned {}\",\n                        key, got_key\n                    );\n                    Err(PageReconstructError::Other(anyhow!(\n                        \"Singular vectored get returned wrong key\"\n                    )))\n                } else {\n                    value\n                }\n            }\n            None => Err(PageReconstructError::MissingKey(Box::new(\n                MissingKeyError {\n                    keyspace: KeySpace::single(key..key.next()),\n                    shard: self.shard_identity.get_shard_number(&key),\n                    original_hwm_lsn: lsn,\n                    ancestor_lsn: None,\n                    backtrace: None,\n                    read_path: None,\n                    query: None,\n                },\n            ))),\n        }\n    }\n\n    pub(crate) const LAYERS_VISITED_WARN_THRESHOLD: u32 = 100;\n\n    /// Look up multiple page versions at a given LSN\n    ///\n    /// This naive implementation will be replaced with a more efficient one\n    /// which actually vectorizes the read path.\n    ///\n    /// NB: the read path must be cancellation-safe. The Tonic gRPC service will drop the future\n    /// if the client goes away (e.g. due to timeout or cancellation).\n    pub(crate) async fn get_vectored(\n        &self,\n        query: VersionedKeySpaceQuery,\n        io_concurrency: super::storage_layer::IoConcurrency,\n        ctx: &RequestContext,\n    ) -> Result<BTreeMap<Key, Result<Bytes, PageReconstructError>>, GetVectoredError> {\n        let total_keyspace = query.total_keyspace();\n\n        let key_count = total_keyspace.total_raw_size();\n        if key_count > self.conf.max_get_vectored_keys.get() {\n            return Err(GetVectoredError::Oversized(\n                key_count as u64,\n                self.conf.max_get_vectored_keys.get() as u64,\n            ));\n        }\n\n        for range in &total_keyspace.ranges {\n            let mut key = range.start;\n            while key != range.end {\n                assert!(!self.shard_identity.is_key_disposable(&key));\n                key = key.next();\n            }\n        }\n\n        trace!(\n            \"get vectored query {} from task kind {:?}\",\n            query,\n            ctx.task_kind(),\n        );\n\n        let start = crate::metrics::GET_VECTORED_LATENCY\n            .for_task_kind(ctx.task_kind())\n            .map(|metric| (metric, Instant::now()));\n\n        let res = self\n            .get_vectored_impl(query, &mut ValuesReconstructState::new(io_concurrency), ctx)\n            .await;\n\n        if let Some((metric, start)) = start {\n            let elapsed = start.elapsed();\n            metric.observe(elapsed.as_secs_f64());\n        }\n\n        res\n    }\n\n    /// Scan the keyspace and return all existing key-values in the keyspace. This currently uses vectored\n    /// get underlying. Normal vectored get would throw an error when a key in the keyspace is not found\n    /// during the search, but for the scan interface, it returns all existing key-value pairs, and does\n    /// not expect each single key in the key space will be found. The semantics is closer to the RocksDB\n    /// scan iterator interface. We could optimize this interface later to avoid some checks in the vectored\n    /// get path to maintain and split the probing and to-be-probe keyspace. We also need to ensure that\n    /// the scan operation will not cause OOM in the future.\n    pub(crate) async fn scan(\n        &self,\n        keyspace: KeySpace,\n        lsn: Lsn,\n        ctx: &RequestContext,\n        io_concurrency: super::storage_layer::IoConcurrency,\n    ) -> Result<BTreeMap<Key, Result<Bytes, PageReconstructError>>, GetVectoredError> {\n        if !lsn.is_valid() {\n            return Err(GetVectoredError::InvalidLsn(lsn));\n        }\n\n        trace!(\n            \"key-value scan request for {:?}@{} from task kind {:?}\",\n            keyspace,\n            lsn,\n            ctx.task_kind()\n        );\n\n        // We should generalize this into Keyspace::contains in the future.\n        for range in &keyspace.ranges {\n            if range.start.field1 < METADATA_KEY_BEGIN_PREFIX\n                || range.end.field1 > METADATA_KEY_END_PREFIX\n            {\n                return Err(GetVectoredError::Other(anyhow::anyhow!(\n                    \"only metadata keyspace can be scanned\"\n                )));\n            }\n        }\n\n        let start = crate::metrics::SCAN_LATENCY\n            .for_task_kind(ctx.task_kind())\n            .map(ScanLatencyOngoingRecording::start_recording);\n\n        let query = VersionedKeySpaceQuery::uniform(keyspace, lsn);\n\n        let vectored_res = self\n            .get_vectored_impl(query, &mut ValuesReconstructState::new(io_concurrency), ctx)\n            .await;\n\n        if let Some(recording) = start {\n            recording.observe();\n        }\n\n        vectored_res\n    }\n\n    pub(super) async fn get_vectored_impl(\n        &self,\n        query: VersionedKeySpaceQuery,\n        reconstruct_state: &mut ValuesReconstructState,\n        ctx: &RequestContext,\n    ) -> Result<BTreeMap<Key, Result<Bytes, PageReconstructError>>, GetVectoredError> {\n        if query.is_empty() {\n            return Ok(BTreeMap::default());\n        }\n\n        let read_path = if self.conf.enable_read_path_debugging || ctx.read_path_debug() {\n            Some(ReadPath::new(\n                query.total_keyspace(),\n                query.high_watermark_lsn()?,\n            ))\n        } else {\n            None\n        };\n\n        reconstruct_state.read_path = read_path;\n\n        let redo_attempt_type = if ctx.task_kind() == TaskKind::Compaction {\n            RedoAttemptType::LegacyCompaction\n        } else {\n            RedoAttemptType::ReadPage\n        };\n\n        let traversal_res: Result<(), _> = {\n            let ctx = RequestContextBuilder::from(ctx)\n                .perf_span(|crnt_perf_span| {\n                    info_span!(\n                        target: PERF_TRACE_TARGET,\n                        parent: crnt_perf_span,\n                        \"PLAN_IO\",\n                    )\n                })\n                .attached_child();\n\n            self.get_vectored_reconstruct_data(query.clone(), reconstruct_state, &ctx)\n                .maybe_perf_instrument(&ctx, |crnt_perf_span| crnt_perf_span.clone())\n                .await\n        };\n\n        if let Err(err) = traversal_res {\n            // Wait for all the spawned IOs to complete.\n            // See comments on `spawn_io` inside `storage_layer` for more details.\n            let mut collect_futs = std::mem::take(&mut reconstruct_state.keys)\n                .into_values()\n                .map(|state| state.collect_pending_ios())\n                .collect::<FuturesUnordered<_>>();\n            while collect_futs.next().await.is_some() {}\n\n            // Enrich the missing key error with the original query.\n            if let GetVectoredError::MissingKey(mut missing_err) = err {\n                missing_err.enrich(query.clone());\n                return Err(GetVectoredError::MissingKey(missing_err));\n            }\n\n            return Err(err);\n        };\n\n        let layers_visited = reconstruct_state.get_layers_visited();\n\n        let ctx = RequestContextBuilder::from(ctx)\n            .perf_span(|crnt_perf_span| {\n                info_span!(\n                    target: PERF_TRACE_TARGET,\n                    parent: crnt_perf_span,\n                    \"RECONSTRUCT\",\n                )\n            })\n            .attached_child();\n\n        let futs = FuturesUnordered::new();\n        for (key, state) in std::mem::take(&mut reconstruct_state.keys) {\n            let req_lsn_for_key = query.map_key_to_lsn(&key);\n\n            futs.push({\n                let walredo_self = self.myself.upgrade().expect(\"&self method holds the arc\");\n                let ctx = RequestContextBuilder::from(&ctx)\n                    .perf_span(|crnt_perf_span| {\n                        info_span!(\n                            target: PERF_TRACE_TARGET,\n                            parent: crnt_perf_span,\n                            \"RECONSTRUCT_KEY\",\n                            key = %key,\n                        )\n                    })\n                    .attached_child();\n\n                async move {\n                    assert_eq!(state.situation, ValueReconstructSituation::Complete);\n\n                    let res = state\n                        .collect_pending_ios()\n                        .maybe_perf_instrument(&ctx, |crnt_perf_span| {\n                            info_span!(\n                                target: PERF_TRACE_TARGET,\n                                parent: crnt_perf_span,\n                                \"WAIT_FOR_IO_COMPLETIONS\",\n                            )\n                        })\n                        .await;\n\n                    let converted = match res {\n                        Ok(ok) => ok,\n                        Err(err) => {\n                            return (key, Err(err));\n                        }\n                    };\n                    DELTAS_PER_READ_GLOBAL.observe(converted.num_deltas() as f64);\n\n                    // The walredo module expects the records to be descending in terms of Lsn.\n                    // And we submit the IOs in that order, so, there shuold be no need to sort here.\n                    debug_assert!(\n                        converted\n                            .records\n                            .is_sorted_by_key(|(lsn, _)| std::cmp::Reverse(*lsn)),\n                        \"{converted:?}\"\n                    );\n\n                    let walredo_deltas = converted.num_deltas();\n                    let walredo_res = walredo_self\n                        .reconstruct_value(key, req_lsn_for_key, converted, redo_attempt_type)\n                        .maybe_perf_instrument(&ctx, |crnt_perf_span| {\n                            info_span!(\n                                target: PERF_TRACE_TARGET,\n                                parent: crnt_perf_span,\n                                \"WALREDO\",\n                                deltas = %walredo_deltas,\n                            )\n                        })\n                        .await;\n\n                    (key, walredo_res)\n                }\n            });\n        }\n\n        let results = futs\n            .collect::<BTreeMap<Key, Result<Bytes, PageReconstructError>>>()\n            .maybe_perf_instrument(&ctx, |crnt_perf_span| crnt_perf_span.clone())\n            .await;\n\n        // For aux file keys (v1 or v2) the vectored read path does not return an error\n        // when they're missing. Instead they are omitted from the resulting btree\n        // (this is a requirement, not a bug). Skip updating the metric in these cases\n        // to avoid infinite results.\n        if !results.is_empty() {\n            if layers_visited >= Self::LAYERS_VISITED_WARN_THRESHOLD {\n                let total_keyspace = query.total_keyspace();\n                let max_request_lsn = query.high_watermark_lsn().expect(\"Validated previously\");\n\n                static LOG_PACER: Lazy<Mutex<RateLimit>> =\n                    Lazy::new(|| Mutex::new(RateLimit::new(Duration::from_secs(60))));\n                LOG_PACER.lock().unwrap().call(|| {\n                    let num_keys = total_keyspace.total_raw_size();\n                    let num_pages = results.len();\n                    tracing::info!(\n                      shard_id = %self.tenant_shard_id.shard_slug(),\n                      lsn = %max_request_lsn,\n                      \"Vectored read for {total_keyspace} visited {layers_visited} layers. Returned {num_pages}/{num_keys} pages.\",\n                    );\n                });\n            }\n\n            // Records the number of layers visited in a few different ways:\n            //\n            // * LAYERS_PER_READ: all layers count towards every read in the batch, because each\n            //   layer directly affects its observed latency.\n            //\n            // * LAYERS_PER_READ_BATCH: all layers count towards each batch, to get the per-batch\n            //   layer visits and access cost.\n            //\n            // * LAYERS_PER_READ_AMORTIZED: the average layer count per read, to get the amortized\n            //   read amplification after batching.\n            let layers_visited = layers_visited as f64;\n            let avg_layers_visited = layers_visited / results.len() as f64;\n            LAYERS_PER_READ_BATCH_GLOBAL.observe(layers_visited);\n            for _ in &results {\n                self.metrics.layers_per_read.observe(layers_visited);\n                LAYERS_PER_READ_GLOBAL.observe(layers_visited);\n                LAYERS_PER_READ_AMORTIZED_GLOBAL.observe(avg_layers_visited);\n            }\n        }\n\n        Ok(results)\n    }\n\n    // A copy of the get_vectored_impl method except that we store the image and wal records into `reconstruct_state`.\n    // This is only used in the http getpage call for debugging purpose.\n    pub(super) async fn debug_get_vectored_impl(\n        &self,\n        query: VersionedKeySpaceQuery,\n        reconstruct_state: &mut ValuesReconstructState,\n        ctx: &RequestContext,\n    ) -> Result<BTreeMap<Key, Result<Bytes, PageReconstructError>>, GetVectoredError> {\n        if query.is_empty() {\n            return Ok(BTreeMap::default());\n        }\n\n        let read_path = if self.conf.enable_read_path_debugging || ctx.read_path_debug() {\n            Some(ReadPath::new(\n                query.total_keyspace(),\n                query.high_watermark_lsn()?,\n            ))\n        } else {\n            None\n        };\n\n        reconstruct_state.read_path = read_path;\n\n        let traversal_res: Result<(), _> = self\n            .get_vectored_reconstruct_data(query.clone(), reconstruct_state, ctx)\n            .await;\n\n        if let Err(err) = traversal_res {\n            // Wait for all the spawned IOs to complete.\n            // See comments on `spawn_io` inside `storage_layer` for more details.\n            let mut collect_futs = std::mem::take(&mut reconstruct_state.keys)\n                .into_values()\n                .map(|state| state.collect_pending_ios())\n                .collect::<FuturesUnordered<_>>();\n            while collect_futs.next().await.is_some() {}\n            return Err(err);\n        };\n\n        let reconstruct_state = Arc::new(Mutex::new(reconstruct_state));\n        let futs = FuturesUnordered::new();\n\n        for (key, state) in std::mem::take(&mut reconstruct_state.lock().unwrap().keys) {\n            let req_lsn_for_key = query.map_key_to_lsn(&key);\n            futs.push({\n                let walredo_self = self.myself.upgrade().expect(\"&self method holds the arc\");\n                let rc_clone = Arc::clone(&reconstruct_state);\n\n                async move {\n                    assert_eq!(state.situation, ValueReconstructSituation::Complete);\n\n                    let converted = match state.collect_pending_ios().await {\n                        Ok(ok) => ok,\n                        Err(err) => {\n                            return (key, Err(err));\n                        }\n                    };\n                    DELTAS_PER_READ_GLOBAL.observe(converted.num_deltas() as f64);\n\n                    // The walredo module expects the records to be descending in terms of Lsn.\n                    // And we submit the IOs in that order, so, there shuold be no need to sort here.\n                    debug_assert!(\n                        converted\n                            .records\n                            .is_sorted_by_key(|(lsn, _)| std::cmp::Reverse(*lsn)),\n                        \"{converted:?}\"\n                    );\n                    {\n                        let mut guard = rc_clone.lock().unwrap();\n                        guard.set_debug_state(&converted);\n                    }\n                    (\n                        key,\n                        walredo_self\n                            .reconstruct_value(\n                                key,\n                                req_lsn_for_key,\n                                converted,\n                                RedoAttemptType::ReadPage,\n                            )\n                            .await,\n                    )\n                }\n            });\n        }\n\n        let results = futs\n            .collect::<BTreeMap<Key, Result<Bytes, PageReconstructError>>>()\n            .await;\n\n        Ok(results)\n    }\n\n    /// Get last or prev record separately. Same as get_last_record_rlsn().last/prev.\n    pub(crate) fn get_last_record_lsn(&self) -> Lsn {\n        self.last_record_lsn.load().last\n    }\n\n    pub(crate) fn get_prev_record_lsn(&self) -> Lsn {\n        self.last_record_lsn.load().prev\n    }\n\n    /// Atomically get both last and prev.\n    pub(crate) fn get_last_record_rlsn(&self) -> RecordLsn {\n        self.last_record_lsn.load()\n    }\n\n    /// Subscribe to callers of wait_lsn(). The value of the channel is None if there are no\n    /// wait_lsn() calls in progress, and Some(Lsn) if there is an active waiter for wait_lsn().\n    pub(crate) fn subscribe_for_wait_lsn_updates(&self) -> watch::Receiver<Option<Lsn>> {\n        self.last_record_lsn.status_receiver()\n    }\n\n    pub(crate) fn get_disk_consistent_lsn(&self) -> Lsn {\n        self.disk_consistent_lsn.load()\n    }\n\n    /// remote_consistent_lsn from the perspective of the tenant's current generation,\n    /// not validated with control plane yet.\n    /// See [`Self::get_remote_consistent_lsn_visible`].\n    pub(crate) fn get_remote_consistent_lsn_projected(&self) -> Option<Lsn> {\n        self.remote_client.remote_consistent_lsn_projected()\n    }\n\n    /// remote_consistent_lsn which the tenant is guaranteed not to go backward from,\n    /// i.e. a value of remote_consistent_lsn_projected which has undergone\n    /// generation validation in the deletion queue.\n    pub(crate) fn get_remote_consistent_lsn_visible(&self) -> Option<Lsn> {\n        self.remote_client.remote_consistent_lsn_visible()\n    }\n\n    /// The sum of the file size of all historic layers in the layer map.\n    /// This method makes no distinction between local and remote layers.\n    /// Hence, the result **does not represent local filesystem usage**.\n    pub(crate) async fn layer_size_sum(&self) -> u64 {\n        let guard = self\n            .layers\n            .read(LayerManagerLockHolder::GetLayerMapInfo)\n            .await;\n        guard.layer_size_sum()\n    }\n\n    pub(crate) fn resident_physical_size(&self) -> u64 {\n        self.metrics.resident_physical_size_get()\n    }\n\n    pub(crate) fn get_directory_metrics(&self) -> [u64; DirectoryKind::KINDS_NUM] {\n        array::from_fn(|idx| self.directory_metrics[idx].load(AtomicOrdering::Relaxed))\n    }\n\n    ///\n    /// Wait until WAL has been received and processed up to this LSN.\n    ///\n    /// You should call this before any of the other get_* or list_* functions. Calling\n    /// those functions with an LSN that has been processed yet is an error.\n    ///\n    pub(crate) async fn wait_lsn(\n        &self,\n        lsn: Lsn,\n        who_is_waiting: WaitLsnWaiter<'_>,\n        timeout: WaitLsnTimeout,\n        ctx: &RequestContext, /* Prepare for use by cancellation */\n    ) -> Result<(), WaitLsnError> {\n        let state = self.current_state();\n        if self.cancel.is_cancelled() || matches!(state, TimelineState::Stopping) {\n            return Err(WaitLsnError::Shutdown);\n        } else if !matches!(state, TimelineState::Active) {\n            return Err(WaitLsnError::BadState(state));\n        }\n\n        if cfg!(debug_assertions) {\n            match ctx.task_kind() {\n                TaskKind::WalReceiverManager\n                | TaskKind::WalReceiverConnectionHandler\n                | TaskKind::WalReceiverConnectionPoller => {\n                    let is_myself = match who_is_waiting {\n                        WaitLsnWaiter::Timeline(waiter) => {\n                            Weak::ptr_eq(&waiter.myself, &self.myself)\n                        }\n                        WaitLsnWaiter::Tenant\n                        | WaitLsnWaiter::PageService\n                        | WaitLsnWaiter::HttpEndpoint\n                        | WaitLsnWaiter::BaseBackupCache => unreachable!(\n                            \"tenant or page_service context are not expected to have task kind {:?}\",\n                            ctx.task_kind()\n                        ),\n                    };\n                    if is_myself {\n                        if let Err(current) = self.last_record_lsn.would_wait_for(lsn) {\n                            // walingest is the only one that can advance last_record_lsn; it should make sure to never reach here\n                            panic!(\n                                \"this timeline's walingest task is calling wait_lsn({lsn}) but we only have last_record_lsn={current}; would deadlock\"\n                            );\n                        }\n                    } else {\n                        // if another  timeline's  is waiting for us, there's no deadlock risk because\n                        // our walreceiver task can make progress independent of theirs\n                    }\n                }\n                _ => {}\n            }\n        }\n\n        let timeout = match timeout {\n            WaitLsnTimeout::Custom(t) => t,\n            WaitLsnTimeout::Default => self.conf.wait_lsn_timeout,\n        };\n\n        let timer = crate::metrics::WAIT_LSN_TIME.start_timer();\n        let start_finish_counterpair_guard = self.metrics.wait_lsn_start_finish_counterpair.guard();\n\n        let wait_for_timeout = self.last_record_lsn.wait_for_timeout(lsn, timeout);\n        let wait_for_timeout = std::pin::pin!(wait_for_timeout);\n        // Use threshold of 1 because even 1 second of wait for ingest is very much abnormal.\n        let log_slow_threshold = Duration::from_secs(1);\n        // Use period of 10 to avoid flooding logs during an outage that affects all timelines.\n        let log_slow_period = Duration::from_secs(10);\n        let mut logging_permit = None;\n        let wait_for_timeout = monitor_slow_future(\n            log_slow_threshold,\n            log_slow_period,\n            wait_for_timeout,\n            |MonitorSlowFutureCallback {\n                 ready,\n                 is_slow,\n                 elapsed_total,\n                 elapsed_since_last_callback,\n             }| {\n                self.metrics\n                    .wait_lsn_in_progress_micros\n                    .inc_by(u64::try_from(elapsed_since_last_callback.as_micros()).unwrap());\n                if !is_slow {\n                    return;\n                }\n                // It's slow, see if we should log it.\n                // (We limit the logging to one per invocation per timeline to avoid excessive\n                // logging during an extended broker / networking outage that affects all timelines.)\n                if logging_permit.is_none() {\n                    logging_permit = self.wait_lsn_log_slow.try_acquire().ok();\n                }\n                if logging_permit.is_none() {\n                    return;\n                }\n                // We log it.\n                if ready {\n                    info!(\n                        \"slow wait_lsn completed after {:.3}s\",\n                        elapsed_total.as_secs_f64()\n                    );\n                } else {\n                    info!(\n                        \"slow wait_lsn still running for {:.3}s\",\n                        elapsed_total.as_secs_f64()\n                    );\n                }\n            },\n        );\n        let res = wait_for_timeout.await;\n        // don't count the time spent waiting for lock below, and also in walreceiver.status(), towards the wait_lsn_time_histo\n        drop(logging_permit);\n        drop(start_finish_counterpair_guard);\n        drop(timer);\n        match res {\n            Ok(()) => Ok(()),\n            Err(e) => {\n                use utils::seqwait::SeqWaitError::*;\n                match e {\n                    Shutdown => Err(WaitLsnError::Shutdown),\n                    Timeout => {\n                        let walreceiver_status = self.walreceiver_status();\n                        Err(WaitLsnError::Timeout(format!(\n                            \"Timed out while waiting for WAL record at LSN {} to arrive, last_record_lsn {} disk consistent LSN={}, WalReceiver status: {}\",\n                            lsn,\n                            self.get_last_record_lsn(),\n                            self.get_disk_consistent_lsn(),\n                            walreceiver_status,\n                        )))\n                    }\n                }\n            }\n        }\n    }\n\n    pub(crate) fn walreceiver_status(&self) -> String {\n        match &*self.walreceiver.lock().unwrap() {\n            None => \"stopping or stopped\".to_string(),\n            Some(walreceiver) => match walreceiver.status() {\n                Some(status) => status.to_human_readable_string(),\n                None => \"Not active\".to_string(),\n            },\n        }\n    }\n\n    /// Check that it is valid to request operations with that lsn.\n    pub(crate) fn check_lsn_is_in_scope(\n        &self,\n        lsn: Lsn,\n        latest_gc_cutoff_lsn: &RcuReadGuard<Lsn>,\n    ) -> anyhow::Result<()> {\n        ensure!(\n            lsn >= **latest_gc_cutoff_lsn,\n            \"LSN {} is earlier than latest GC cutoff {} (we might've already garbage collected needed data)\",\n            lsn,\n            **latest_gc_cutoff_lsn,\n        );\n        Ok(())\n    }\n\n    /// Initializes an LSN lease. The function will return an error if the requested LSN is less than the `latest_gc_cutoff_lsn`.\n    pub(crate) fn init_lsn_lease(\n        &self,\n        lsn: Lsn,\n        length: Duration,\n        ctx: &RequestContext,\n    ) -> anyhow::Result<LsnLease> {\n        self.make_lsn_lease(lsn, length, true, ctx)\n    }\n\n    /// Renews a lease at a particular LSN. The requested LSN is not validated against the `latest_gc_cutoff_lsn` when we are in the grace period.\n    pub(crate) fn renew_lsn_lease(\n        &self,\n        lsn: Lsn,\n        length: Duration,\n        ctx: &RequestContext,\n    ) -> anyhow::Result<LsnLease> {\n        self.make_lsn_lease(lsn, length, false, ctx)\n    }\n\n    /// Obtains a temporary lease blocking garbage collection for the given LSN.\n    ///\n    /// If we are in `AttachedSingle` mode and is not blocked by the lsn lease deadline, this function will error\n    /// if the requesting LSN is less than the `latest_gc_cutoff_lsn` and there is no existing request present.\n    ///\n    /// If there is an existing lease in the map, the lease will be renewed only if the request extends the lease.\n    /// The returned lease is therefore the maximum between the existing lease and the requesting lease.\n    fn make_lsn_lease(\n        &self,\n        lsn: Lsn,\n        length: Duration,\n        init: bool,\n        _ctx: &RequestContext,\n    ) -> anyhow::Result<LsnLease> {\n        let lease = {\n            // Normalize the requested LSN to be aligned, and move to the first record\n            // if it points to the beginning of the page (header).\n            let lsn = xlog_utils::normalize_lsn(lsn, WAL_SEGMENT_SIZE);\n\n            let mut gc_info = self.gc_info.write().unwrap();\n            let planned_cutoff = gc_info.min_cutoff();\n\n            let valid_until = SystemTime::now() + length;\n\n            let entry = gc_info.leases.entry(lsn);\n\n            match entry {\n                Entry::Occupied(mut occupied) => {\n                    let existing_lease = occupied.get_mut();\n                    if valid_until > existing_lease.valid_until {\n                        existing_lease.valid_until = valid_until;\n                        let dt: DateTime<Utc> = valid_until.into();\n                        info!(\"lease extended to {}\", dt);\n                    } else {\n                        let dt: DateTime<Utc> = existing_lease.valid_until.into();\n                        info!(\"existing lease covers greater length, valid until {}\", dt);\n                    }\n\n                    existing_lease.clone()\n                }\n                Entry::Vacant(vacant) => {\n                    // Never allow a lease to be requested for an LSN below the applied GC cutoff. The data could have been deleted.\n                    let latest_gc_cutoff_lsn = self.get_applied_gc_cutoff_lsn();\n                    if lsn < *latest_gc_cutoff_lsn {\n                        bail!(\n                            \"tried to request an lsn lease for an lsn below the latest gc cutoff. requested at {} gc cutoff {}\",\n                            lsn,\n                            *latest_gc_cutoff_lsn\n                        );\n                    }\n\n                    // We allow create lease for those below the planned gc cutoff if we are still within the grace period\n                    // of GC blocking.\n                    let validate = {\n                        let conf = self.tenant_conf.load();\n                        !conf.is_gc_blocked_by_lsn_lease_deadline()\n                    };\n\n                    // Do not allow initial lease creation to be below the planned gc cutoff. The client (compute_ctl) determines\n                    // whether it is a initial lease creation or a renewal.\n                    if (init || validate) && lsn < planned_cutoff {\n                        bail!(\n                            \"tried to request an lsn lease for an lsn below the planned gc cutoff. requested at {} planned gc cutoff {}\",\n                            lsn,\n                            planned_cutoff\n                        );\n                    }\n\n                    let dt: DateTime<Utc> = valid_until.into();\n                    info!(\"lease created, valid until {}\", dt);\n                    vacant.insert(LsnLease { valid_until }).clone()\n                }\n            }\n        };\n\n        Ok(lease)\n    }\n\n    /// Freeze the current open in-memory layer. It will be written to disk on next iteration.\n    /// Returns the flush request ID which can be awaited with wait_flush_completion().\n    #[instrument(skip(self), fields(tenant_id=%self.tenant_shard_id.tenant_id, shard_id=%self.tenant_shard_id.shard_slug(), timeline_id=%self.timeline_id))]\n    pub(crate) async fn freeze(&self) -> Result<u64, FlushLayerError> {\n        self.freeze0().await\n    }\n\n    /// Freeze and flush the open in-memory layer, waiting for it to be written to disk.\n    #[instrument(skip(self), fields(tenant_id=%self.tenant_shard_id.tenant_id, shard_id=%self.tenant_shard_id.shard_slug(), timeline_id=%self.timeline_id))]\n    pub(crate) async fn freeze_and_flush(&self) -> Result<(), FlushLayerError> {\n        self.freeze_and_flush0().await\n    }\n\n    /// Freeze the current open in-memory layer. It will be written to disk on next iteration.\n    /// Returns the flush request ID which can be awaited with wait_flush_completion().\n    pub(crate) async fn freeze0(&self) -> Result<u64, FlushLayerError> {\n        let mut g = self.write_lock.lock().await;\n        let to_lsn = self.get_last_record_lsn();\n        self.freeze_inmem_layer_at(to_lsn, &mut g).await\n    }\n\n    // This exists to provide a non-span creating version of `freeze_and_flush` we can call without\n    // polluting the span hierarchy.\n    pub(crate) async fn freeze_and_flush0(&self) -> Result<(), FlushLayerError> {\n        let token = self.freeze0().await?;\n        self.wait_flush_completion(token).await\n    }\n\n    // Check if an open ephemeral layer should be closed: this provides\n    // background enforcement of checkpoint interval if there is no active WAL receiver, to avoid keeping\n    // an ephemeral layer open forever when idle.  It also freezes layers if the global limit on\n    // ephemeral layer bytes has been breached.\n    pub(super) async fn maybe_freeze_ephemeral_layer(&self) {\n        debug_assert_current_span_has_tenant_and_timeline_id();\n\n        let Ok(mut write_guard) = self.write_lock.try_lock() else {\n            // If the write lock is held, there is an active wal receiver: rolling open layers\n            // is their responsibility while they hold this lock.\n            return;\n        };\n\n        // FIXME: why not early exit? because before #7927 the state would had been cleared every\n        // time, and this was missed.\n        // if write_guard.is_none() { return; }\n\n        let Ok(layers_guard) = self.layers.try_read(LayerManagerLockHolder::TryFreezeLayer) else {\n            // Don't block if the layer lock is busy\n            return;\n        };\n\n        let Ok(lm) = layers_guard.layer_map() else {\n            return;\n        };\n\n        let Some(open_layer) = &lm.open_layer else {\n            // If there is no open layer, we have no layer freezing to do.  However, we might need to generate\n            // some updates to disk_consistent_lsn and remote_consistent_lsn, in case we ingested some WAL regions\n            // that didn't result in writes to this shard.\n\n            // Must not hold the layers lock while waiting for a flush.\n            drop(layers_guard);\n\n            let last_record_lsn = self.get_last_record_lsn();\n            let disk_consistent_lsn = self.get_disk_consistent_lsn();\n            if last_record_lsn > disk_consistent_lsn {\n                // We have no open layer, but disk_consistent_lsn is behind the last record: this indicates\n                // we are a sharded tenant and have skipped some WAL\n                let last_freeze_ts = *self.last_freeze_ts.read().unwrap();\n                if last_freeze_ts.elapsed() >= self.get_checkpoint_timeout() {\n                    // Only do this if have been layer-less longer than get_checkpoint_timeout, so that a shard\n                    // without any data ingested (yet) doesn't write a remote index as soon as it\n                    // sees its LSN advance: we only do this if we've been layer-less\n                    // for some time.\n                    tracing::debug!(\n                        \"Advancing disk_consistent_lsn past WAL ingest gap {} -> {}\",\n                        disk_consistent_lsn,\n                        last_record_lsn\n                    );\n\n                    // The flush loop will update remote consistent LSN as well as disk consistent LSN.\n                    // We know there is no open layer, so we can request freezing without actually\n                    // freezing anything. This is true even if we have dropped the layers_guard, we\n                    // still hold the write_guard.\n                    let _ = async {\n                        let token = self\n                            .freeze_inmem_layer_at(last_record_lsn, &mut write_guard)\n                            .await?;\n                        self.wait_flush_completion(token).await\n                    }\n                    .await;\n                }\n            }\n\n            return;\n        };\n\n        let current_size = open_layer.len();\n\n        let current_lsn = self.get_last_record_lsn();\n\n        let checkpoint_distance_override = open_layer.tick();\n\n        if let Some(size_override) = checkpoint_distance_override {\n            if current_size > size_override {\n                // This is not harmful, but it only happens in relatively rare cases where\n                // time-based checkpoints are not happening fast enough to keep the amount of\n                // ephemeral data within configured limits.  It's a sign of stress on the system.\n                tracing::info!(\n                    \"Early-rolling open layer at size {current_size} (limit {size_override}) due to dirty data pressure\"\n                );\n            }\n        }\n\n        let checkpoint_distance =\n            checkpoint_distance_override.unwrap_or(self.get_checkpoint_distance());\n\n        if self.should_roll(\n            current_size,\n            current_size,\n            checkpoint_distance,\n            self.get_last_record_lsn(),\n            self.last_freeze_at.load(),\n            open_layer.get_opened_at(),\n        ) {\n            match open_layer.info() {\n                InMemoryLayerInfo::Frozen { lsn_start, lsn_end } => {\n                    // We may reach this point if the layer was already frozen by not yet flushed: flushing\n                    // happens asynchronously in the background.\n                    tracing::debug!(\n                        \"Not freezing open layer, it's already frozen ({lsn_start}..{lsn_end})\"\n                    );\n                }\n                InMemoryLayerInfo::Open { .. } => {\n                    // Upgrade to a write lock and freeze the layer\n                    drop(layers_guard);\n                    let res = self\n                        .freeze_inmem_layer_at(current_lsn, &mut write_guard)\n                        .await;\n\n                    if let Err(e) = res {\n                        tracing::info!(\n                            \"failed to flush frozen layer after background freeze: {e:#}\"\n                        );\n                    }\n                }\n            }\n        }\n    }\n\n    /// Checks if the internal state of the timeline is consistent with it being able to be offloaded.\n    ///\n    /// This is neccessary but not sufficient for offloading of the timeline as it might have\n    /// child timelines that are not offloaded yet.\n    pub(crate) fn can_offload(&self) -> (bool, &'static str) {\n        if self.remote_client.is_archived() != Some(true) {\n            return (false, \"the timeline is not archived\");\n        }\n        if !self.remote_client.no_pending_work() {\n            // if the remote client is still processing some work, we can't offload\n            return (false, \"the upload queue is not drained yet\");\n        }\n\n        (true, \"ok\")\n    }\n\n    /// Outermost timeline compaction operation; downloads needed layers. Returns whether we have pending\n    /// compaction tasks.\n    pub(crate) async fn compact(\n        self: &Arc<Self>,\n        cancel: &CancellationToken,\n        flags: EnumSet<CompactFlags>,\n        ctx: &RequestContext,\n    ) -> Result<CompactionOutcome, CompactionError> {\n        let res = self\n            .compact_with_options(\n                cancel,\n                CompactOptions {\n                    flags,\n                    compact_key_range: None,\n                    compact_lsn_range: None,\n                    sub_compaction: false,\n                    sub_compaction_max_job_size_mb: None,\n                    gc_compaction_do_metadata_compaction: false,\n                },\n                ctx,\n            )\n            .await;\n        if let Err(err) = &res {\n            log_compaction_error(err, None, cancel.is_cancelled(), false);\n        }\n        res\n    }\n\n    /// Outermost timeline compaction operation; downloads needed layers.\n    ///\n    /// NB: the cancellation token is usually from a background task, but can also come from a\n    /// request task.\n    pub(crate) async fn compact_with_options(\n        self: &Arc<Self>,\n        cancel: &CancellationToken,\n        options: CompactOptions,\n        ctx: &RequestContext,\n    ) -> Result<CompactionOutcome, CompactionError> {\n        // Acquire the compaction lock and task semaphore.\n        //\n        // L0-only compaction uses a separate semaphore (if enabled) to make sure it isn't starved\n        // out by other background tasks (including image compaction). We request this via\n        // `BackgroundLoopKind::L0Compaction`.\n        //\n        // Yield for pending L0 compaction while waiting for the semaphore.\n        let is_l0_only = options.flags.contains(CompactFlags::OnlyL0Compaction);\n        let semaphore_kind = match is_l0_only && self.get_compaction_l0_semaphore() {\n            true => BackgroundLoopKind::L0Compaction,\n            false => BackgroundLoopKind::Compaction,\n        };\n        let yield_for_l0 = options.flags.contains(CompactFlags::YieldForL0);\n        if yield_for_l0 {\n            // If this is an L0 pass, it doesn't make sense to yield for L0.\n            debug_assert!(!is_l0_only, \"YieldForL0 during L0 pass\");\n            // If `compaction_l0_first` is disabled, there's no point yielding.\n            debug_assert!(self.get_compaction_l0_first(), \"YieldForL0 without L0 pass\");\n        }\n\n        let acquire = async move {\n            let guard = self.compaction_lock.lock().await;\n            let permit = super::tasks::acquire_concurrency_permit(semaphore_kind, ctx).await;\n            (guard, permit)\n        };\n\n        let (_guard, _permit) = tokio::select! {\n            (guard, permit) = acquire => (guard, permit),\n            _ = self.l0_compaction_trigger.notified(), if yield_for_l0 => {\n                return Ok(CompactionOutcome::YieldForL0);\n            }\n            _ = self.cancel.cancelled() => return Ok(CompactionOutcome::Skipped),\n            _ = cancel.cancelled() => return Ok(CompactionOutcome::Skipped),\n        };\n\n        let last_record_lsn = self.get_last_record_lsn();\n\n        // Last record Lsn could be zero in case the timeline was just created\n        if !last_record_lsn.is_valid() {\n            warn!(\n                \"Skipping compaction for potentially just initialized timeline, it has invalid last record lsn: {last_record_lsn}\"\n            );\n            return Ok(CompactionOutcome::Skipped);\n        }\n\n        let result = match self.get_compaction_algorithm_settings().kind {\n            CompactionAlgorithm::Tiered => {\n                self.compact_tiered(cancel, ctx).await?;\n                Ok(CompactionOutcome::Done)\n            }\n            CompactionAlgorithm::Legacy => self.compact_legacy(cancel, options, ctx).await,\n        };\n\n        // Signal compaction failure to avoid L0 flush stalls when it's broken.\n        match &result {\n            Ok(_) => self.compaction_failed.store(false, AtomicOrdering::Relaxed),\n            Err(e) if e.is_cancel() => {}\n            Err(_) => self.compaction_failed.store(true, AtomicOrdering::Relaxed),\n        };\n\n        result\n    }\n\n    /// Mutate the timeline with a [`TimelineWriter`].\n    pub(crate) async fn writer(&self) -> TimelineWriter<'_> {\n        TimelineWriter {\n            tl: self,\n            write_guard: self.write_lock.lock().await,\n        }\n    }\n\n    pub(crate) fn activate(\n        self: &Arc<Self>,\n        parent: Arc<crate::tenant::TenantShard>,\n        broker_client: BrokerClientChannel,\n        background_jobs_can_start: Option<&completion::Barrier>,\n        ctx: &RequestContext,\n    ) {\n        if self.tenant_shard_id.is_shard_zero() {\n            // Logical size is only maintained accurately on shard zero.\n            self.spawn_initial_logical_size_computation_task(ctx);\n        }\n        self.launch_wal_receiver(ctx, broker_client);\n        self.set_state(TimelineState::Active);\n        self.launch_eviction_task(parent, background_jobs_can_start);\n    }\n\n    /// After this function returns, there are no timeline-scoped tasks are left running.\n    ///\n    /// The preferred pattern for is:\n    /// - in any spawned tasks, keep Timeline::guard open + Timeline::cancel / child token\n    /// - if early shutdown (not just cancellation) of a sub-tree of tasks is required,\n    ///   go the extra mile and keep track of JoinHandles\n    /// - Keep track of JoinHandles using a passed-down `Arc<Mutex<Option<JoinSet>>>` or similar,\n    ///   instead of spawning directly on a runtime. It is a more composable / testable pattern.\n    ///\n    /// For legacy reasons, we still have multiple tasks spawned using\n    /// `task_mgr::spawn(X, Some(tenant_id), Some(timeline_id))`.\n    /// We refer to these as \"timeline-scoped task_mgr tasks\".\n    /// Some of these tasks are already sensitive to Timeline::cancel while others are\n    /// not sensitive to Timeline::cancel and instead respect [`task_mgr::shutdown_token`]\n    /// or [`task_mgr::shutdown_watcher`].\n    /// We want to gradually convert the code base away from these.\n    ///\n    /// Here is an inventory of timeline-scoped task_mgr tasks that are still sensitive to\n    /// `task_mgr::shutdown_{token,watcher}` (there are also tenant-scoped and global-scoped\n    /// ones that aren't mentioned here):\n    /// - [`TaskKind::TimelineDeletionWorker`]\n    ///    - NB: also used for tenant deletion\n    /// - [`TaskKind::RemoteUploadTask`]`\n    /// - [`TaskKind::InitialLogicalSizeCalculation`]\n    /// - [`TaskKind::DownloadAllRemoteLayers`] (can we get rid of it?)\n    // Inventory of timeline-scoped task_mgr tasks that use spawn but aren't sensitive:\n    /// - [`TaskKind::Eviction`]\n    /// - [`TaskKind::LayerFlushTask`]\n    /// - [`TaskKind::OndemandLogicalSizeCalculation`]\n    /// - [`TaskKind::GarbageCollector`] (immediate_gc is timeline-scoped)\n    pub(crate) async fn shutdown(&self, mode: ShutdownMode) {\n        debug_assert_current_span_has_tenant_and_timeline_id();\n\n        // Regardless of whether we're going to try_freeze_and_flush\n        // cancel walreceiver to stop ingesting more data asap.\n        //\n        // Note that we're accepting a race condition here where we may\n        // do the final flush below, before walreceiver observes the\n        // cancellation and exits.\n        // This means we may open a new InMemoryLayer after the final flush below.\n        // Flush loop is also still running for a short while, so, in theory, it\n        // could also make its way into the upload queue.\n        //\n        // If we wait for the shutdown of the walreceiver before moving on to the\n        // flush, then that would be avoided. But we don't do it because the\n        // walreceiver entertains reads internally, which means that it possibly\n        // depends on the download of layers. Layer download is only sensitive to\n        // the cancellation of the entire timeline, so cancelling the walreceiver\n        // will have no effect on the individual get requests.\n        // This would cause problems when there is a lot of ongoing downloads or\n        // there is S3 unavailabilities, i.e. detach, deletion, etc would hang,\n        // and we can't deallocate resources of the timeline, etc.\n        let walreceiver = self.walreceiver.lock().unwrap().take();\n        tracing::debug!(\n            is_some = walreceiver.is_some(),\n            \"Waiting for WalReceiverManager...\"\n        );\n        if let Some(walreceiver) = walreceiver {\n            walreceiver.cancel().await;\n        }\n        // ... and inform any waiters for newer LSNs that there won't be any.\n        self.last_record_lsn.shutdown();\n\n        if let ShutdownMode::FreezeAndFlush = mode {\n            let do_flush = if let Some((open, frozen)) = self\n                .layers\n                .read(LayerManagerLockHolder::Shutdown)\n                .await\n                .layer_map()\n                .map(|lm| (lm.open_layer.is_some(), lm.frozen_layers.len()))\n                .ok()\n                .filter(|(open, frozen)| *open || *frozen > 0)\n            {\n                if self.remote_client.is_archived() == Some(true) {\n                    // No point flushing on shutdown for an archived timeline: it is not important\n                    // to have it nice and fresh after our restart, and trying to flush here might\n                    // race with trying to offload it (which also stops the flush loop)\n                    false\n                } else {\n                    tracing::info!(?open, frozen, \"flushing and freezing on shutdown\");\n                    true\n                }\n            } else {\n                // this is double-shutdown, it'll be a no-op\n                true\n            };\n\n            // we shut down walreceiver above, so, we won't add anything more\n            // to the InMemoryLayer; freeze it and wait for all frozen layers\n            // to reach the disk & upload queue, then shut the upload queue and\n            // wait for it to drain.\n            if do_flush {\n                match self.freeze_and_flush().await {\n                    Ok(_) => {\n                        // drain the upload queue\n                        // if we did not wait for completion here, it might be our shutdown process\n                        // didn't wait for remote uploads to complete at all, as new tasks can forever\n                        // be spawned.\n                        //\n                        // what is problematic is the shutting down of RemoteTimelineClient, because\n                        // obviously it does not make sense to stop while we wait for it, but what\n                        // about corner cases like s3 suddenly hanging up?\n                        self.remote_client.shutdown().await;\n                    }\n                    Err(FlushLayerError::Cancelled) => {\n                        // this is likely the second shutdown, ignore silently.\n                        // TODO: this can be removed once https://github.com/neondatabase/neon/issues/5080\n                        debug_assert!(self.cancel.is_cancelled());\n                    }\n                    Err(e) => {\n                        // Non-fatal.  Shutdown is infallible.  Failures to flush just mean that\n                        // we have some extra WAL replay to do next time the timeline starts.\n                        warn!(\"failed to freeze and flush: {e:#}\");\n                    }\n                }\n\n                // `self.remote_client.shutdown().await` above should have already flushed everything from the queue, but\n                // we also do a final check here to ensure that the queue is empty.\n                if !self.remote_client.no_pending_work() {\n                    warn!(\n                        \"still have pending work in remote upload queue, but continuing shutting down anyways\"\n                    );\n                }\n            }\n        }\n\n        if let ShutdownMode::Reload = mode {\n            // drain the upload queue\n            self.remote_client.shutdown().await;\n            if !self.remote_client.no_pending_work() {\n                warn!(\n                    \"still have pending work in remote upload queue, but continuing shutting down anyways\"\n                );\n            }\n        }\n\n        // Signal any subscribers to our cancellation token to drop out\n        tracing::debug!(\"Cancelling CancellationToken\");\n        self.cancel.cancel();\n\n        // If we have a background task downloading heatmap layers stop it.\n        // The background downloads are sensitive to timeline cancellation (done above),\n        // so the drain will be immediate.\n        self.stop_and_drain_heatmap_layers_download().await;\n\n        // Ensure Prevent new page service requests from starting.\n        self.handles.shutdown();\n\n        // Transition the remote_client into a state where it's only useful for timeline deletion.\n        // (The deletion use case is why we can't just hook up remote_client to Self::cancel).)\n        self.remote_client.stop();\n\n        // As documented in remote_client.stop()'s doc comment, it's our responsibility\n        // to shut down the upload queue tasks.\n        // TODO: fix that, task management should be encapsulated inside remote_client.\n        task_mgr::shutdown_tasks(\n            Some(TaskKind::RemoteUploadTask),\n            Some(self.tenant_shard_id),\n            Some(self.timeline_id),\n        )\n        .await;\n\n        // TODO: work toward making this a no-op. See this function's doc comment for more context.\n        tracing::debug!(\"Waiting for tasks...\");\n        task_mgr::shutdown_tasks(None, Some(self.tenant_shard_id), Some(self.timeline_id)).await;\n\n        {\n            // Allow any remaining in-memory layers to do cleanup -- until that, they hold the gate\n            // open.\n            let mut write_guard = self.write_lock.lock().await;\n            self.layers\n                .write(LayerManagerLockHolder::Shutdown)\n                .await\n                .shutdown(&mut write_guard);\n        }\n\n        // Finally wait until any gate-holders are complete.\n        //\n        // TODO: once above shutdown_tasks is a no-op, we can close the gate before calling shutdown_tasks\n        // and use a TBD variant of shutdown_tasks that asserts that there were no tasks left.\n        self.gate.close().await;\n\n        self.metrics.shutdown();\n    }\n\n    pub(crate) fn set_state(&self, new_state: TimelineState) {\n        match (self.current_state(), new_state) {\n            (equal_state_1, equal_state_2) if equal_state_1 == equal_state_2 => {\n                info!(\"Ignoring new state, equal to the existing one: {equal_state_2:?}\");\n            }\n            (st, TimelineState::Loading) => {\n                error!(\"ignoring transition from {st:?} into Loading state\");\n            }\n            (TimelineState::Broken { .. }, new_state) => {\n                error!(\"Ignoring state update {new_state:?} for broken timeline\");\n            }\n            (TimelineState::Stopping, TimelineState::Active) => {\n                error!(\"Not activating a Stopping timeline\");\n            }\n            (_, new_state) => {\n                self.state.send_replace(new_state);\n            }\n        }\n    }\n\n    pub(crate) fn set_broken(&self, reason: String) {\n        let backtrace_str: String = format!(\"{}\", std::backtrace::Backtrace::force_capture());\n        let broken_state = TimelineState::Broken {\n            reason,\n            backtrace: backtrace_str,\n        };\n        self.set_state(broken_state);\n\n        // Although the Broken state is not equivalent to shutdown() (shutdown will be called\n        // later when this tenant is detach or the process shuts down), firing the cancellation token\n        // here avoids the need for other tasks to watch for the Broken state explicitly.\n        self.cancel.cancel();\n    }\n\n    pub(crate) fn current_state(&self) -> TimelineState {\n        self.state.borrow().clone()\n    }\n\n    pub(crate) fn is_broken(&self) -> bool {\n        matches!(&*self.state.borrow(), TimelineState::Broken { .. })\n    }\n\n    pub(crate) fn is_active(&self) -> bool {\n        self.current_state() == TimelineState::Active\n    }\n\n    pub(crate) fn is_archived(&self) -> Option<bool> {\n        self.remote_client.is_archived()\n    }\n\n    pub(crate) fn is_invisible(&self) -> Option<bool> {\n        self.remote_client.is_invisible()\n    }\n\n    pub(crate) fn is_stopping(&self) -> bool {\n        self.current_state() == TimelineState::Stopping\n    }\n\n    pub(crate) fn subscribe_for_state_updates(&self) -> watch::Receiver<TimelineState> {\n        self.state.subscribe()\n    }\n\n    pub(crate) async fn wait_to_become_active(\n        &self,\n        _ctx: &RequestContext, // Prepare for use by cancellation\n    ) -> Result<(), TimelineState> {\n        let mut receiver = self.state.subscribe();\n        loop {\n            let current_state = receiver.borrow().clone();\n            match current_state {\n                TimelineState::Loading => {\n                    receiver\n                        .changed()\n                        .await\n                        .expect(\"holding a reference to self\");\n                }\n                TimelineState::Active => {\n                    return Ok(());\n                }\n                TimelineState::Broken { .. } | TimelineState::Stopping => {\n                    // There's no chance the timeline can transition back into ::Active\n                    return Err(current_state);\n                }\n            }\n        }\n    }\n\n    pub(crate) async fn layer_map_info(\n        &self,\n        reset: LayerAccessStatsReset,\n    ) -> Result<LayerMapInfo, layer_manager::Shutdown> {\n        let guard = self\n            .layers\n            .read(LayerManagerLockHolder::GetLayerMapInfo)\n            .await;\n        let layer_map = guard.layer_map()?;\n        let mut in_memory_layers = Vec::with_capacity(layer_map.frozen_layers.len() + 1);\n        if let Some(open_layer) = &layer_map.open_layer {\n            in_memory_layers.push(open_layer.info());\n        }\n        for frozen_layer in &layer_map.frozen_layers {\n            in_memory_layers.push(frozen_layer.info());\n        }\n\n        let historic_layers = layer_map\n            .iter_historic_layers()\n            .map(|desc| guard.get_from_desc(&desc).info(reset))\n            .collect();\n\n        Ok(LayerMapInfo {\n            in_memory_layers,\n            historic_layers,\n        })\n    }\n\n    #[instrument(skip_all, fields(tenant_id = %self.tenant_shard_id.tenant_id, shard_id = %self.tenant_shard_id.shard_slug(), timeline_id = %self.timeline_id))]\n    pub(crate) async fn download_layer(\n        &self,\n        layer_file_name: &LayerName,\n        ctx: &RequestContext,\n    ) -> Result<Option<bool>, super::storage_layer::layer::DownloadError> {\n        let Some(layer) = self\n            .find_layer(layer_file_name)\n            .await\n            .map_err(|e| match e {\n                layer_manager::Shutdown => {\n                    super::storage_layer::layer::DownloadError::TimelineShutdown\n                }\n            })?\n        else {\n            return Ok(None);\n        };\n\n        layer.download(ctx).await?;\n\n        Ok(Some(true))\n    }\n\n    /// Evict just one layer.\n    ///\n    /// Returns `Ok(None)` in the case where the layer could not be found by its `layer_file_name`.\n    pub(crate) async fn evict_layer(\n        &self,\n        layer_file_name: &LayerName,\n    ) -> anyhow::Result<Option<bool>> {\n        let _gate = self\n            .gate\n            .enter()\n            .map_err(|_| anyhow::anyhow!(\"Shutting down\"))?;\n\n        let Some(local_layer) = self.find_layer(layer_file_name).await? else {\n            return Ok(None);\n        };\n\n        // curl has this by default\n        let timeout = std::time::Duration::from_secs(120);\n\n        match local_layer.evict_and_wait(timeout).await {\n            Ok(()) => Ok(Some(true)),\n            Err(EvictionError::NotFound) => Ok(Some(false)),\n            Err(EvictionError::Downloaded) => Ok(Some(false)),\n            Err(EvictionError::Timeout) => Ok(Some(false)),\n        }\n    }\n\n    fn should_roll(\n        &self,\n        layer_size: u64,\n        projected_layer_size: u64,\n        checkpoint_distance: u64,\n        projected_lsn: Lsn,\n        last_freeze_at: Lsn,\n        opened_at: Instant,\n    ) -> bool {\n        let distance = projected_lsn.widening_sub(last_freeze_at);\n\n        // Rolling the open layer can be triggered by:\n        // 1. The distance from the last LSN we rolled at. This bounds the amount of WAL that\n        //    the safekeepers need to store.  For sharded tenants, we multiply by shard count to\n        //    account for how writes are distributed across shards: we expect each node to consume\n        //    1/count of the LSN on average.\n        // 2. The size of the currently open layer.\n        // 3. The time since the last roll. It helps safekeepers to regard pageserver as caught\n        //    up and suspend activity.\n        if distance >= checkpoint_distance as i128 * self.shard_identity.count.count() as i128 {\n            info!(\n                \"Will roll layer at {} with layer size {} due to LSN distance ({})\",\n                projected_lsn, layer_size, distance\n            );\n\n            true\n        } else if projected_layer_size >= checkpoint_distance {\n            // NB: this check is relied upon by:\n            let _ = IndexEntry::validate_checkpoint_distance;\n            info!(\n                \"Will roll layer at {} with layer size {} due to layer size ({})\",\n                projected_lsn, layer_size, projected_layer_size\n            );\n\n            true\n        } else if distance > 0 && opened_at.elapsed() >= self.get_checkpoint_timeout() {\n            info!(\n                \"Will roll layer at {} with layer size {} due to time since first write to the layer ({:?})\",\n                projected_lsn,\n                layer_size,\n                opened_at.elapsed()\n            );\n\n            true\n        } else {\n            false\n        }\n    }\n\n    pub(crate) fn is_basebackup_cache_enabled(&self) -> bool {\n        let tenant_conf = self.tenant_conf.load();\n        tenant_conf\n            .tenant_conf\n            .basebackup_cache_enabled\n            .unwrap_or(self.conf.default_tenant_conf.basebackup_cache_enabled)\n    }\n\n    /// Try to get a basebackup from the on-disk cache.\n    pub(crate) async fn get_cached_basebackup(&self, lsn: Lsn) -> Option<tokio::fs::File> {\n        self.basebackup_cache\n            .get(self.tenant_shard_id.tenant_id, self.timeline_id, lsn)\n            .await\n    }\n\n    /// Convenience method to attempt fetching a basebackup for the timeline if enabled and safe for\n    /// the given request parameters.\n    ///\n    /// TODO: consider moving this onto GrpcPageServiceHandler once the libpq handler is gone.\n    pub async fn get_cached_basebackup_if_enabled(\n        &self,\n        lsn: Option<Lsn>,\n        prev_lsn: Option<Lsn>,\n        full: bool,\n        replica: bool,\n        gzip: bool,\n    ) -> Option<tokio::fs::File> {\n        if !self.is_basebackup_cache_enabled() || !self.basebackup_cache.is_enabled() {\n            return None;\n        }\n        // We have to know which LSN to fetch the basebackup for.\n        let lsn = lsn?;\n        // We only cache gzipped, non-full basebackups for primary computes with automatic prev_lsn.\n        if prev_lsn.is_some() || full || replica || !gzip {\n            return None;\n        }\n        self.get_cached_basebackup(lsn).await\n    }\n\n    /// Prepare basebackup for the given LSN and store it in the basebackup cache.\n    /// The method is asynchronous and returns immediately.\n    /// The actual basebackup preparation is performed in the background\n    /// by the basebackup cache on a best-effort basis.\n    pub(crate) fn prepare_basebackup(&self, lsn: Lsn) {\n        if !self.is_basebackup_cache_enabled() {\n            return;\n        }\n        if !self.tenant_shard_id.is_shard_zero() {\n            // In theory we should never get here, but just in case check it.\n            // Preparing basebackup doesn't make sense for shards other than shard zero.\n            return;\n        }\n        if !self.is_active() {\n            // May happen during initial timeline creation.\n            // Such timeline is not in the global timeline map yet,\n            // so basebackup cache will not be able to find it.\n            // TODO(diko): We can prepare such timelines in finish_creation().\n            return;\n        }\n\n        self.basebackup_cache\n            .send_prepare(self.tenant_shard_id, self.timeline_id, lsn);\n    }\n}\n\n/// Number of times we will compute partition within a checkpoint distance.\nconst REPARTITION_FREQ_IN_CHECKPOINT_DISTANCE: u64 = 10;\n\n// Private functions\nimpl Timeline {\n    pub(crate) fn get_lsn_lease_length(&self) -> Duration {\n        let tenant_conf = self.tenant_conf.load();\n        tenant_conf\n            .tenant_conf\n            .lsn_lease_length\n            .unwrap_or(self.conf.default_tenant_conf.lsn_lease_length)\n    }\n\n    pub(crate) fn get_lsn_lease_length_for_ts(&self) -> Duration {\n        let tenant_conf = self.tenant_conf.load();\n        tenant_conf\n            .tenant_conf\n            .lsn_lease_length_for_ts\n            .unwrap_or(self.conf.default_tenant_conf.lsn_lease_length_for_ts)\n    }\n\n    pub(crate) fn is_gc_blocked_by_lsn_lease_deadline(&self) -> bool {\n        let tenant_conf = self.tenant_conf.load();\n        tenant_conf.is_gc_blocked_by_lsn_lease_deadline()\n    }\n\n    pub(crate) fn get_lazy_slru_download(&self) -> bool {\n        let tenant_conf = self.tenant_conf.load();\n        tenant_conf\n            .tenant_conf\n            .lazy_slru_download\n            .unwrap_or(self.conf.default_tenant_conf.lazy_slru_download)\n    }\n\n    /// Checks if a get page request should get perf tracing\n    ///\n    /// The configuration priority is: tenant config override, default tenant config,\n    /// pageserver config.\n    pub(crate) fn is_get_page_request_sampled(&self) -> bool {\n        let tenant_conf = self.tenant_conf.load();\n        let ratio = tenant_conf\n            .tenant_conf\n            .sampling_ratio\n            .flatten()\n            .or(self.conf.default_tenant_conf.sampling_ratio)\n            .or(self.conf.tracing.as_ref().map(|t| t.sampling_ratio));\n\n        match ratio {\n            Some(r) => {\n                if r.numerator == 0 {\n                    false\n                } else {\n                    rand::rng().random_range(0..r.denominator) < r.numerator\n                }\n            }\n            None => false,\n        }\n    }\n\n    fn get_checkpoint_distance(&self) -> u64 {\n        let tenant_conf = self.tenant_conf.load();\n        tenant_conf\n            .tenant_conf\n            .checkpoint_distance\n            .unwrap_or(self.conf.default_tenant_conf.checkpoint_distance)\n    }\n\n    fn get_checkpoint_timeout(&self) -> Duration {\n        let tenant_conf = self.tenant_conf.load();\n        tenant_conf\n            .tenant_conf\n            .checkpoint_timeout\n            .unwrap_or(self.conf.default_tenant_conf.checkpoint_timeout)\n    }\n\n    pub(crate) fn get_pitr_interval(&self) -> Duration {\n        let tenant_conf = &self.tenant_conf.load().tenant_conf;\n        tenant_conf\n            .pitr_interval\n            .unwrap_or(self.conf.default_tenant_conf.pitr_interval)\n    }\n\n    fn get_compaction_period(&self) -> Duration {\n        let tenant_conf = self.tenant_conf.load().tenant_conf.clone();\n        tenant_conf\n            .compaction_period\n            .unwrap_or(self.conf.default_tenant_conf.compaction_period)\n    }\n\n    fn get_compaction_target_size(&self) -> u64 {\n        let tenant_conf = self.tenant_conf.load();\n        tenant_conf\n            .tenant_conf\n            .compaction_target_size\n            .unwrap_or(self.conf.default_tenant_conf.compaction_target_size)\n    }\n\n    fn get_compaction_threshold(&self) -> usize {\n        let tenant_conf = self.tenant_conf.load();\n        tenant_conf\n            .tenant_conf\n            .compaction_threshold\n            .unwrap_or(self.conf.default_tenant_conf.compaction_threshold)\n    }\n\n    /// Returns `true` if the rel_size_v2 config is enabled. NOTE: the write path and read path\n    /// should look at `get_rel_size_v2_status()` to get the actual status of the timeline. It is\n    /// possible that the index part persists the state while the config doesn't get persisted.\n    pub(crate) fn get_rel_size_v2_enabled(&self) -> bool {\n        let tenant_conf = self.tenant_conf.load();\n        tenant_conf\n            .tenant_conf\n            .rel_size_v2_enabled\n            .unwrap_or(self.conf.default_tenant_conf.rel_size_v2_enabled)\n    }\n\n    pub(crate) fn get_rel_size_v2_status(&self) -> (RelSizeMigration, Option<Lsn>) {\n        let (status, migrated_at) = self.rel_size_v2_status.load().as_ref().clone();\n        (status.unwrap_or(RelSizeMigration::Legacy), migrated_at)\n    }\n\n    fn get_compaction_upper_limit(&self) -> usize {\n        let tenant_conf = self.tenant_conf.load();\n        tenant_conf\n            .tenant_conf\n            .compaction_upper_limit\n            .unwrap_or(self.conf.default_tenant_conf.compaction_upper_limit)\n    }\n\n    pub fn get_compaction_l0_first(&self) -> bool {\n        let tenant_conf = self.tenant_conf.load().tenant_conf.clone();\n        tenant_conf\n            .compaction_l0_first\n            .unwrap_or(self.conf.default_tenant_conf.compaction_l0_first)\n    }\n\n    pub fn get_compaction_l0_semaphore(&self) -> bool {\n        let tenant_conf = self.tenant_conf.load().tenant_conf.clone();\n        tenant_conf\n            .compaction_l0_semaphore\n            .unwrap_or(self.conf.default_tenant_conf.compaction_l0_semaphore)\n    }\n\n    fn get_l0_flush_delay_threshold(&self) -> Option<usize> {\n        // By default, delay L0 flushes at 3x the compaction threshold. The compaction threshold\n        // defaults to 10, and L0 compaction is generally able to keep L0 counts below 30.\n        const DEFAULT_L0_FLUSH_DELAY_FACTOR: usize = 3;\n\n        // If compaction is disabled, don't delay.\n        if self.get_compaction_period() == Duration::ZERO {\n            return None;\n        }\n\n        let compaction_threshold = self.get_compaction_threshold();\n        let tenant_conf = self.tenant_conf.load();\n        let l0_flush_delay_threshold = tenant_conf\n            .tenant_conf\n            .l0_flush_delay_threshold\n            .or(self.conf.default_tenant_conf.l0_flush_delay_threshold)\n            .unwrap_or(DEFAULT_L0_FLUSH_DELAY_FACTOR * compaction_threshold);\n\n        // 0 disables backpressure.\n        if l0_flush_delay_threshold == 0 {\n            return None;\n        }\n\n        // Clamp the flush delay threshold to the compaction threshold; it doesn't make sense to\n        // backpressure flushes below this.\n        // TODO: the tenant config should have validation to prevent this instead.\n        debug_assert!(l0_flush_delay_threshold >= compaction_threshold);\n        Some(max(l0_flush_delay_threshold, compaction_threshold))\n    }\n\n    fn get_l0_flush_stall_threshold(&self) -> Option<usize> {\n        // Disable L0 stalls by default. Stalling can cause unavailability if L0 compaction isn't\n        // responsive, and it can e.g. block on other compaction via the compaction semaphore or\n        // sibling timelines. We need more confidence before enabling this.\n        const DEFAULT_L0_FLUSH_STALL_FACTOR: usize = 0; // TODO: default to e.g. 5\n\n        // If compaction is disabled, don't stall.\n        if self.get_compaction_period() == Duration::ZERO {\n            return None;\n        }\n\n        // If compaction is failing, don't stall and try to keep the tenant alive. This may not be a\n        // good idea: read amp can grow unbounded, leading to terrible performance, and we may take\n        // on unbounded compaction debt that can take a long time to fix once compaction comes back\n        // online. At least we'll delay flushes, slowing down the growth and buying some time.\n        if self.compaction_failed.load(AtomicOrdering::Relaxed) {\n            return None;\n        }\n\n        let compaction_threshold = self.get_compaction_threshold();\n        let tenant_conf = self.tenant_conf.load();\n        let l0_flush_stall_threshold = tenant_conf\n            .tenant_conf\n            .l0_flush_stall_threshold\n            .or(self.conf.default_tenant_conf.l0_flush_stall_threshold);\n\n        // Tests sometimes set compaction_threshold=1 to generate lots of layer files, and don't\n        // handle the 20-second compaction delay. Some (e.g. `test_backward_compatibility`) can't\n        // easily adjust the L0 backpressure settings, so just disable stalls in this case.\n        if cfg!(feature = \"testing\")\n            && compaction_threshold == 1\n            && l0_flush_stall_threshold.is_none()\n        {\n            return None;\n        }\n\n        let l0_flush_stall_threshold = l0_flush_stall_threshold\n            .unwrap_or(DEFAULT_L0_FLUSH_STALL_FACTOR * compaction_threshold);\n\n        // 0 disables backpressure.\n        if l0_flush_stall_threshold == 0 {\n            return None;\n        }\n\n        // Clamp the flush stall threshold to the compaction threshold; it doesn't make sense to\n        // backpressure flushes below this.\n        // TODO: the tenant config should have validation to prevent this instead.\n        debug_assert!(l0_flush_stall_threshold >= compaction_threshold);\n        Some(max(l0_flush_stall_threshold, compaction_threshold))\n    }\n\n    fn get_image_creation_threshold(&self) -> usize {\n        let tenant_conf = self.tenant_conf.load();\n        tenant_conf\n            .tenant_conf\n            .image_creation_threshold\n            .unwrap_or(self.conf.default_tenant_conf.image_creation_threshold)\n    }\n\n    // HADRON\n    fn get_image_layer_force_creation_period(&self) -> Option<Duration> {\n        let tenant_conf = self.tenant_conf.load();\n        tenant_conf\n            .tenant_conf\n            .image_layer_force_creation_period\n            .or(self\n                .conf\n                .default_tenant_conf\n                .image_layer_force_creation_period)\n    }\n\n    fn get_compaction_algorithm_settings(&self) -> CompactionAlgorithmSettings {\n        let tenant_conf = &self.tenant_conf.load();\n        tenant_conf\n            .tenant_conf\n            .compaction_algorithm\n            .as_ref()\n            .unwrap_or(&self.conf.default_tenant_conf.compaction_algorithm)\n            .clone()\n    }\n\n    pub fn get_compaction_shard_ancestor(&self) -> bool {\n        let tenant_conf = self.tenant_conf.load();\n        tenant_conf\n            .tenant_conf\n            .compaction_shard_ancestor\n            .unwrap_or(self.conf.default_tenant_conf.compaction_shard_ancestor)\n    }\n\n    fn get_eviction_policy(&self) -> EvictionPolicy {\n        let tenant_conf = self.tenant_conf.load();\n        tenant_conf\n            .tenant_conf\n            .eviction_policy\n            .unwrap_or(self.conf.default_tenant_conf.eviction_policy)\n    }\n\n    fn get_evictions_low_residence_duration_metric_threshold(\n        tenant_conf: &pageserver_api::models::TenantConfig,\n        default_tenant_conf: &pageserver_api::config::TenantConfigToml,\n    ) -> Duration {\n        tenant_conf\n            .evictions_low_residence_duration_metric_threshold\n            .unwrap_or(default_tenant_conf.evictions_low_residence_duration_metric_threshold)\n    }\n\n    fn get_image_layer_creation_check_threshold(&self) -> u8 {\n        let tenant_conf = self.tenant_conf.load();\n        tenant_conf\n            .tenant_conf\n            .image_layer_creation_check_threshold\n            .unwrap_or(\n                self.conf\n                    .default_tenant_conf\n                    .image_layer_creation_check_threshold,\n            )\n    }\n\n    fn get_gc_compaction_settings(&self) -> GcCompactionCombinedSettings {\n        let tenant_conf = &self.tenant_conf.load();\n        let gc_compaction_enabled = tenant_conf\n            .tenant_conf\n            .gc_compaction_enabled\n            .unwrap_or(self.conf.default_tenant_conf.gc_compaction_enabled);\n        let gc_compaction_verification = tenant_conf\n            .tenant_conf\n            .gc_compaction_verification\n            .unwrap_or(self.conf.default_tenant_conf.gc_compaction_verification);\n        let gc_compaction_initial_threshold_kb = tenant_conf\n            .tenant_conf\n            .gc_compaction_initial_threshold_kb\n            .unwrap_or(\n                self.conf\n                    .default_tenant_conf\n                    .gc_compaction_initial_threshold_kb,\n            );\n        let gc_compaction_ratio_percent = tenant_conf\n            .tenant_conf\n            .gc_compaction_ratio_percent\n            .unwrap_or(self.conf.default_tenant_conf.gc_compaction_ratio_percent);\n        GcCompactionCombinedSettings {\n            gc_compaction_enabled,\n            gc_compaction_verification,\n            gc_compaction_initial_threshold_kb,\n            gc_compaction_ratio_percent,\n        }\n    }\n\n    fn get_image_creation_preempt_threshold(&self) -> usize {\n        let tenant_conf = self.tenant_conf.load();\n        tenant_conf\n            .tenant_conf\n            .image_creation_preempt_threshold\n            .unwrap_or(\n                self.conf\n                    .default_tenant_conf\n                    .image_creation_preempt_threshold,\n            )\n    }\n\n    pub(super) fn tenant_conf_updated(&self, new_conf: &AttachedTenantConf) {\n        // NB: Most tenant conf options are read by background loops, so,\n        // changes will automatically be picked up.\n\n        // The threshold is embedded in the metric. So, we need to update it.\n        {\n            let new_threshold = Self::get_evictions_low_residence_duration_metric_threshold(\n                &new_conf.tenant_conf,\n                &self.conf.default_tenant_conf,\n            );\n\n            let tenant_id_str = self.tenant_shard_id.tenant_id.to_string();\n            let shard_id_str = format!(\"{}\", self.tenant_shard_id.shard_slug());\n\n            let timeline_id_str = self.timeline_id.to_string();\n\n            self.remote_client.update_config(&new_conf.location);\n\n            let mut rel_size_cache = self.rel_size_snapshot_cache.lock().unwrap();\n            if let Some(new_capacity) = new_conf.tenant_conf.relsize_snapshot_cache_capacity {\n                if new_capacity != rel_size_cache.capacity() {\n                    rel_size_cache.set_capacity(new_capacity);\n                }\n            }\n\n            self.metrics\n                .evictions_with_low_residence_duration\n                .write()\n                .unwrap()\n                .change_threshold(\n                    &tenant_id_str,\n                    &shard_id_str,\n                    &timeline_id_str,\n                    new_threshold,\n                );\n        }\n    }\n\n    /// Open a Timeline handle.\n    ///\n    /// Loads the metadata for the timeline into memory, but not the layer map.\n    #[allow(clippy::too_many_arguments)]\n    pub(super) fn new(\n        conf: &'static PageServerConf,\n        tenant_conf: Arc<ArcSwap<AttachedTenantConf>>,\n        metadata: &TimelineMetadata,\n        previous_heatmap: Option<PreviousHeatmap>,\n        ancestor: Option<Arc<Timeline>>,\n        timeline_id: TimelineId,\n        tenant_shard_id: TenantShardId,\n        generation: Generation,\n        shard_identity: ShardIdentity,\n        walredo_mgr: Option<Arc<super::WalRedoManager>>,\n        resources: TimelineResources,\n        pg_version: PgMajorVersion,\n        state: TimelineState,\n        attach_wal_lag_cooldown: Arc<OnceLock<WalLagCooldown>>,\n        create_idempotency: crate::tenant::CreateTimelineIdempotency,\n        gc_compaction_state: Option<GcCompactionState>,\n        rel_size_v2_status: Option<RelSizeMigration>,\n        rel_size_migrated_at: Option<Lsn>,\n        cancel: CancellationToken,\n    ) -> Arc<Self> {\n        let disk_consistent_lsn = metadata.disk_consistent_lsn();\n        let (state, _) = watch::channel(state);\n\n        let (layer_flush_start_tx, _) = tokio::sync::watch::channel((0, disk_consistent_lsn));\n        let (layer_flush_done_tx, _) = tokio::sync::watch::channel((0, Ok(())));\n\n        let evictions_low_residence_duration_metric_threshold = {\n            let loaded_tenant_conf = tenant_conf.load();\n            Self::get_evictions_low_residence_duration_metric_threshold(\n                &loaded_tenant_conf.tenant_conf,\n                &conf.default_tenant_conf,\n            )\n        };\n\n        if let Some(ancestor) = &ancestor {\n            let mut ancestor_gc_info = ancestor.gc_info.write().unwrap();\n            // If we construct an explicit timeline object, it's obviously not offloaded\n            let is_offloaded = MaybeOffloaded::No;\n            ancestor_gc_info.insert_child(timeline_id, metadata.ancestor_lsn(), is_offloaded);\n        }\n\n        let relsize_snapshot_cache_capacity = {\n            let loaded_tenant_conf = tenant_conf.load();\n            loaded_tenant_conf\n                .tenant_conf\n                .relsize_snapshot_cache_capacity\n                .unwrap_or(conf.default_tenant_conf.relsize_snapshot_cache_capacity)\n        };\n\n        Arc::new_cyclic(|myself| {\n            let metrics = Arc::new(TimelineMetrics::new(\n                &tenant_shard_id,\n                &timeline_id,\n                crate::metrics::EvictionsWithLowResidenceDurationBuilder::new(\n                    \"mtime\",\n                    evictions_low_residence_duration_metric_threshold,\n                ),\n            ));\n            let aux_file_metrics = metrics.aux_file_size_gauge.clone();\n\n            let mut result = Timeline {\n                conf,\n                tenant_conf,\n                myself: myself.clone(),\n                timeline_id,\n                tenant_shard_id,\n                generation,\n                shard_identity,\n                pg_version,\n                layers: Default::default(),\n                gc_compaction_layer_update_lock: tokio::sync::RwLock::new(()),\n\n                walredo_mgr,\n                walreceiver: Mutex::new(None),\n\n                remote_client: Arc::new(resources.remote_client),\n\n                // initialize in-memory 'last_record_lsn' from 'disk_consistent_lsn'.\n                last_record_lsn: SeqWait::new(RecordLsn {\n                    last: disk_consistent_lsn,\n                    prev: metadata.prev_record_lsn().unwrap_or(Lsn(0)),\n                }),\n                disk_consistent_lsn: AtomicLsn::new(disk_consistent_lsn.0),\n\n                gc_compaction_state: ArcSwapOption::from_pointee(gc_compaction_state),\n\n                last_freeze_at: AtomicLsn::new(disk_consistent_lsn.0),\n                last_freeze_ts: RwLock::new(Instant::now()),\n\n                loaded_at: (disk_consistent_lsn, SystemTime::now()),\n\n                ancestor_timeline: ancestor,\n                ancestor_lsn: metadata.ancestor_lsn(),\n\n                metrics,\n\n                query_metrics: crate::metrics::SmgrQueryTimePerTimeline::new(\n                    &tenant_shard_id,\n                    &timeline_id,\n                    resources.pagestream_throttle_metrics,\n                ),\n\n                directory_metrics: array::from_fn(|_| AtomicU64::new(0)),\n                directory_metrics_inited: array::from_fn(|_| AtomicBool::new(false)),\n\n                flush_loop_state: Mutex::new(FlushLoopState::NotStarted),\n\n                layer_flush_start_tx,\n                layer_flush_done_tx,\n\n                write_lock: tokio::sync::Mutex::new(None),\n\n                gc_info: std::sync::RwLock::new(GcInfo::default()),\n\n                last_image_layer_creation_status: ArcSwap::new(Arc::new(\n                    LastImageLayerCreationStatus::default(),\n                )),\n\n                applied_gc_cutoff_lsn: Rcu::new(metadata.latest_gc_cutoff_lsn()),\n                initdb_lsn: metadata.initdb_lsn(),\n\n                current_logical_size: if disk_consistent_lsn.is_valid() {\n                    // we're creating timeline data with some layer files existing locally,\n                    // need to recalculate timeline's logical size based on data in the layers.\n                    LogicalSize::deferred_initial(disk_consistent_lsn)\n                } else {\n                    // we're creating timeline data without any layers existing locally,\n                    // initial logical size is 0.\n                    LogicalSize::empty_initial()\n                },\n\n                partitioning: GuardArcSwap::new((\n                    (KeyPartitioning::new(), KeyPartitioning::new().into_sparse()),\n                    Lsn(0),\n                )),\n                repartition_threshold: 0,\n                last_image_layer_creation_check_at: AtomicLsn::new(0),\n                last_image_layer_creation_check_instant: Mutex::new(None),\n                last_received_wal: Mutex::new(None),\n                rel_size_latest_cache: RwLock::new(HashMap::new()),\n                rel_size_snapshot_cache: Mutex::new(LruCache::new(relsize_snapshot_cache_capacity)),\n\n                download_all_remote_layers_task_info: RwLock::new(None),\n\n                state,\n\n                eviction_task_timeline_state: tokio::sync::Mutex::new(\n                    EvictionTaskTimelineState::default(),\n                ),\n                delete_progress: TimelineDeleteProgress::default(),\n\n                cancel,\n                gate: Gate::default(),\n\n                compaction_lock: tokio::sync::Mutex::default(),\n                compaction_failed: AtomicBool::default(),\n                corruption_detected: AtomicBool::default(),\n                l0_compaction_trigger: resources.l0_compaction_trigger,\n                gc_lock: tokio::sync::Mutex::default(),\n\n                standby_horizon: AtomicLsn::new(0),\n\n                pagestream_throttle: resources.pagestream_throttle,\n\n                aux_file_size_estimator: AuxFileSizeEstimator::new(aux_file_metrics),\n\n                #[cfg(test)]\n                extra_test_dense_keyspace: ArcSwap::new(Arc::new(KeySpace::default())),\n\n                l0_flush_global_state: resources.l0_flush_global_state,\n\n                handles: Default::default(),\n\n                attach_wal_lag_cooldown,\n\n                create_idempotency,\n\n                page_trace: Default::default(),\n\n                previous_heatmap: ArcSwapOption::from_pointee(previous_heatmap),\n\n                heatmap_layers_downloader: Mutex::new(None),\n\n                rel_size_v2_status: ArcSwap::from_pointee((\n                    rel_size_v2_status,\n                    rel_size_migrated_at,\n                )),\n\n                wait_lsn_log_slow: tokio::sync::Semaphore::new(1),\n\n                basebackup_cache: resources.basebackup_cache,\n\n                feature_resolver: resources.feature_resolver.clone(),\n\n                db_rel_count: ArcSwapOption::from_pointee(None),\n            };\n\n            result.repartition_threshold =\n                result.get_checkpoint_distance() / REPARTITION_FREQ_IN_CHECKPOINT_DISTANCE;\n\n            result\n                .metrics\n                .last_record_lsn_gauge\n                .set(disk_consistent_lsn.0 as i64);\n            result\n        })\n    }\n\n    pub(super) fn maybe_spawn_flush_loop(self: &Arc<Self>) {\n        let Ok(guard) = self.gate.enter() else {\n            info!(\"cannot start flush loop when the timeline gate has already been closed\");\n            return;\n        };\n        let mut flush_loop_state = self.flush_loop_state.lock().unwrap();\n        match *flush_loop_state {\n            FlushLoopState::NotStarted => (),\n            FlushLoopState::Running { .. } => {\n                info!(\n                    \"skipping attempt to start flush_loop twice {}/{}\",\n                    self.tenant_shard_id, self.timeline_id\n                );\n                return;\n            }\n            FlushLoopState::Exited => {\n                info!(\n                    \"ignoring attempt to restart exited flush_loop {}/{}\",\n                    self.tenant_shard_id, self.timeline_id\n                );\n                return;\n            }\n        }\n\n        let layer_flush_start_rx = self.layer_flush_start_tx.subscribe();\n        let self_clone = Arc::clone(self);\n\n        debug!(\"spawning flush loop\");\n        *flush_loop_state = FlushLoopState::Running {\n            #[cfg(test)]\n            expect_initdb_optimization: false,\n            #[cfg(test)]\n            initdb_optimization_count: 0,\n        };\n        task_mgr::spawn(\n            task_mgr::BACKGROUND_RUNTIME.handle(),\n            task_mgr::TaskKind::LayerFlushTask,\n            self.tenant_shard_id,\n            Some(self.timeline_id),\n            \"layer flush task\",\n            async move {\n                let _guard = guard;\n                let background_ctx = RequestContext::todo_child(TaskKind::LayerFlushTask, DownloadBehavior::Error).with_scope_timeline(&self_clone);\n                self_clone.flush_loop(layer_flush_start_rx, &background_ctx).await;\n                let mut flush_loop_state = self_clone.flush_loop_state.lock().unwrap();\n                assert!(matches!(*flush_loop_state, FlushLoopState::Running{..}));\n                *flush_loop_state  = FlushLoopState::Exited;\n                Ok(())\n            }\n            .instrument(info_span!(parent: None, \"layer flush task\", tenant_id = %self.tenant_shard_id.tenant_id, shard_id = %self.tenant_shard_id.shard_slug(), timeline_id = %self.timeline_id))\n        );\n    }\n\n    pub(crate) fn update_gc_compaction_state(\n        &self,\n        gc_compaction_state: GcCompactionState,\n    ) -> anyhow::Result<()> {\n        self.gc_compaction_state\n            .store(Some(Arc::new(gc_compaction_state.clone())));\n        self.remote_client\n            .schedule_index_upload_for_gc_compaction_state_update(gc_compaction_state)\n    }\n\n    pub(crate) fn update_rel_size_v2_status(\n        &self,\n        rel_size_v2_status: RelSizeMigration,\n        rel_size_migrated_at: Option<Lsn>,\n    ) -> anyhow::Result<()> {\n        self.rel_size_v2_status.store(Arc::new((\n            Some(rel_size_v2_status.clone()),\n            rel_size_migrated_at,\n        )));\n        self.remote_client\n            .schedule_index_upload_for_rel_size_v2_status_update(\n                rel_size_v2_status,\n                rel_size_migrated_at,\n            )\n    }\n\n    pub(crate) fn get_gc_compaction_state(&self) -> Option<GcCompactionState> {\n        self.gc_compaction_state\n            .load()\n            .as_ref()\n            .map(|x| x.as_ref().clone())\n    }\n\n    /// Creates and starts the wal receiver.\n    ///\n    /// This function is expected to be called at most once per Timeline's lifecycle\n    /// when the timeline is activated.\n    fn launch_wal_receiver(\n        self: &Arc<Self>,\n        ctx: &RequestContext,\n        broker_client: BrokerClientChannel,\n    ) {\n        info!(\n            \"launching WAL receiver for timeline {} of tenant {}\",\n            self.timeline_id, self.tenant_shard_id\n        );\n\n        let tenant_conf = self.tenant_conf.load();\n        let wal_connect_timeout = tenant_conf\n            .tenant_conf\n            .walreceiver_connect_timeout\n            .unwrap_or(self.conf.default_tenant_conf.walreceiver_connect_timeout);\n        let lagging_wal_timeout = tenant_conf\n            .tenant_conf\n            .lagging_wal_timeout\n            .unwrap_or(self.conf.default_tenant_conf.lagging_wal_timeout);\n        let max_lsn_wal_lag = tenant_conf\n            .tenant_conf\n            .max_lsn_wal_lag\n            .unwrap_or(self.conf.default_tenant_conf.max_lsn_wal_lag);\n\n        let mut guard = self.walreceiver.lock().unwrap();\n        assert!(\n            guard.is_none(),\n            \"multiple launches / re-launches of WAL receiver are not supported\"\n        );\n\n        let protocol = PostgresClientProtocol::Interpreted {\n            format: utils::postgres_client::InterpretedFormat::Protobuf,\n            compression: Some(utils::postgres_client::Compression::Zstd { level: 1 }),\n        };\n\n        *guard = Some(WalReceiver::start(\n            Arc::clone(self),\n            WalReceiverConf {\n                protocol,\n                wal_connect_timeout,\n                lagging_wal_timeout,\n                max_lsn_wal_lag,\n                auth_token: crate::config::SAFEKEEPER_AUTH_TOKEN.get().cloned(),\n                availability_zone: self.conf.availability_zone.clone(),\n                ingest_batch_size: self.conf.ingest_batch_size,\n                validate_wal_contiguity: self.conf.validate_wal_contiguity,\n            },\n            broker_client,\n            ctx,\n        ));\n    }\n\n    /// Initialize with an empty layer map. Used when creating a new timeline.\n    pub(super) fn init_empty_layer_map(&self, start_lsn: Lsn) {\n        let mut layers = self.layers.try_write(LayerManagerLockHolder::Init).expect(\n            \"in the context where we call this function, no other task has access to the object\",\n        );\n        layers\n            .open_mut()\n            .expect(\"in this context the LayerManager must still be open\")\n            .initialize_empty(Lsn(start_lsn.0));\n    }\n\n    /// Scan the timeline directory, cleanup, populate the layer map, and schedule uploads for local-only\n    /// files.\n    pub(super) async fn load_layer_map(\n        &self,\n        disk_consistent_lsn: Lsn,\n        index_part: IndexPart,\n    ) -> anyhow::Result<()> {\n        use LayerName::*;\n        use init::Decision::*;\n        use init::{Discovered, DismissedLayer};\n\n        let mut guard = self\n            .layers\n            .write(LayerManagerLockHolder::LoadLayerMap)\n            .await;\n\n        let timer = self.metrics.load_layer_map_histo.start_timer();\n\n        // Scan timeline directory and create ImageLayerName and DeltaFilename\n        // structs representing all files on disk\n        let timeline_path = self\n            .conf\n            .timeline_path(&self.tenant_shard_id, &self.timeline_id);\n        let conf = self.conf;\n        let span = tracing::Span::current();\n\n        // Copy to move into the task we're about to spawn\n        let this = self.myself.upgrade().expect(\"&self method holds the arc\");\n\n        let (loaded_layers, needs_cleanup, total_physical_size) = tokio::task::spawn_blocking({\n            move || {\n                let _g = span.entered();\n                let discovered = init::scan_timeline_dir(&timeline_path)?;\n                let mut discovered_layers = Vec::with_capacity(discovered.len());\n                let mut unrecognized_files = Vec::new();\n\n                let mut path = timeline_path;\n\n                for discovered in discovered {\n                    let (name, kind) = match discovered {\n                        Discovered::Layer(layer_file_name, local_metadata) => {\n                            discovered_layers.push((layer_file_name, local_metadata));\n                            continue;\n                        }\n                        Discovered::IgnoredBackup(path) => {\n                            std::fs::remove_file(path)\n                                .or_else(fs_ext::ignore_not_found)\n                                .fatal_err(\"Removing .old file\");\n                            continue;\n                        }\n                        Discovered::Unknown(file_name) => {\n                            // we will later error if there are any\n                            unrecognized_files.push(file_name);\n                            continue;\n                        }\n                        Discovered::Ephemeral(name) => (name, \"old ephemeral file\"),\n                        Discovered::Temporary(name) => (name, \"temporary timeline file\"),\n                        Discovered::TemporaryDownload(name) => (name, \"temporary download\"),\n                    };\n                    path.push(Utf8Path::new(&name));\n                    init::cleanup(&path, kind)?;\n                    path.pop();\n                }\n\n                if !unrecognized_files.is_empty() {\n                    // assume that if there are any there are many many.\n                    let n = unrecognized_files.len();\n                    let first = &unrecognized_files[..n.min(10)];\n                    anyhow::bail!(\n                        \"unrecognized files in timeline dir (total {n}), first 10: {first:?}\"\n                    );\n                }\n\n                let decided = init::reconcile(discovered_layers, &index_part, disk_consistent_lsn);\n\n                let mut loaded_layers = Vec::new();\n                let mut needs_cleanup = Vec::new();\n                let mut total_physical_size = 0;\n\n                for (name, decision) in decided {\n                    let decision = match decision {\n                        Ok(decision) => decision,\n                        Err(DismissedLayer::Future { local }) => {\n                            if let Some(local) = local {\n                                init::cleanup_future_layer(\n                                    &local.local_path,\n                                    &name,\n                                    disk_consistent_lsn,\n                                )?;\n                            }\n                            needs_cleanup.push(name);\n                            continue;\n                        }\n                        Err(DismissedLayer::LocalOnly(local)) => {\n                            init::cleanup_local_only_file(&name, &local)?;\n                            // this file never existed remotely, we will have to do rework\n                            continue;\n                        }\n                        Err(DismissedLayer::BadMetadata(local)) => {\n                            init::cleanup_local_file_for_remote(&local)?;\n                            // this file never existed remotely, we will have to do rework\n                            continue;\n                        }\n                    };\n\n                    match &name {\n                        Delta(d) => assert!(d.lsn_range.end <= disk_consistent_lsn + 1),\n                        Image(i) => assert!(i.lsn <= disk_consistent_lsn),\n                    }\n\n                    tracing::debug!(layer=%name, ?decision, \"applied\");\n\n                    let layer = match decision {\n                        Resident { local, remote } => {\n                            total_physical_size += local.file_size;\n                            Layer::for_resident(conf, &this, local.local_path, name, remote)\n                                .drop_eviction_guard()\n                        }\n                        Evicted(remote) => Layer::for_evicted(conf, &this, name, remote),\n                    };\n\n                    loaded_layers.push(layer);\n                }\n                Ok((loaded_layers, needs_cleanup, total_physical_size))\n            }\n        })\n        .await\n        .map_err(anyhow::Error::new)\n        .and_then(|x| x)?;\n\n        let num_layers = loaded_layers.len();\n\n        guard\n            .open_mut()\n            .expect(\"layermanager must be open during init\")\n            .initialize_local_layers(loaded_layers, disk_consistent_lsn + 1);\n\n        self.remote_client\n            .schedule_layer_file_deletion(&needs_cleanup)?;\n        self.remote_client\n            .schedule_index_upload_for_file_changes()?;\n        // This barrier orders above DELETEs before any later operations.\n        // This is critical because code executing after the barrier might\n        // create again objects with the same key that we just scheduled for deletion.\n        // For example, if we just scheduled deletion of an image layer \"from the future\",\n        // later compaction might run again and re-create the same image layer.\n        // \"from the future\" here means an image layer whose LSN is > IndexPart::disk_consistent_lsn.\n        // \"same\" here means same key range and LSN.\n        //\n        // Without a barrier between above DELETEs and the re-creation's PUTs,\n        // the upload queue may execute the PUT first, then the DELETE.\n        // In our example, we will end up with an IndexPart referencing a non-existent object.\n        //\n        // 1. a future image layer is created and uploaded\n        // 2. ps restart\n        // 3. the future layer from (1) is deleted during load layer map\n        // 4. image layer is re-created and uploaded\n        // 5. deletion queue would like to delete (1) but actually deletes (4)\n        // 6. delete by name works as expected, but it now deletes the wrong (later) version\n        //\n        // See https://github.com/neondatabase/neon/issues/5878\n        //\n        // NB: generation numbers naturally protect against this because they disambiguate\n        //     (1) and (4)\n        // TODO: this is basically a no-op now, should we remove it?\n        self.remote_client.schedule_barrier()?;\n        // TenantShard::create_timeline will wait for these uploads to happen before returning, or\n        // on retry.\n\n        info!(\n            \"loaded layer map with {} layers at {}, total physical size: {}\",\n            num_layers, disk_consistent_lsn, total_physical_size\n        );\n\n        timer.stop_and_record();\n        Ok(())\n    }\n\n    /// Retrieve current logical size of the timeline.\n    ///\n    /// The size could be lagging behind the actual number, in case\n    /// the initial size calculation has not been run (gets triggered on the first size access).\n    ///\n    /// return size and boolean flag that shows if the size is exact\n    pub(crate) fn get_current_logical_size(\n        self: &Arc<Self>,\n        priority: GetLogicalSizePriority,\n        ctx: &RequestContext,\n    ) -> logical_size::CurrentLogicalSize {\n        if !self.tenant_shard_id.is_shard_zero() {\n            // Logical size is only accurately maintained on shard zero: when called elsewhere, for example\n            // when HTTP API is serving a GET for timeline zero, return zero\n            return logical_size::CurrentLogicalSize::Approximate(logical_size::Approximate::zero());\n        }\n\n        let current_size = self.current_logical_size.current_size();\n        debug!(\"Current size: {current_size:?}\");\n\n        match (current_size.accuracy(), priority) {\n            (logical_size::Accuracy::Exact, _) => (), // nothing to do\n            (logical_size::Accuracy::Approximate, GetLogicalSizePriority::Background) => {\n                // background task will eventually deliver an exact value, we're in no rush\n            }\n            (logical_size::Accuracy::Approximate, GetLogicalSizePriority::User) => {\n                // background task is not ready, but user is asking for it now;\n                // => make the background task skip the line\n                // (The alternative would be to calculate the size here, but,\n                //  it can actually take a long time if the user has a lot of rels.\n                //  And we'll inevitable need it again; So, let the background task do the work.)\n                match self\n                    .current_logical_size\n                    .cancel_wait_for_background_loop_concurrency_limit_semaphore\n                    .get()\n                {\n                    Some(cancel) => cancel.cancel(),\n                    None => {\n                        match self.current_state() {\n                            TimelineState::Broken { .. } | TimelineState::Stopping => {\n                                // Can happen when timeline detail endpoint is used when deletion is ongoing (or its broken).\n                                // Don't make noise.\n                            }\n                            TimelineState::Loading => {\n                                // Import does not return an activated timeline.\n                                info!(\n                                    \"discarding priority boost for logical size calculation because timeline is not yet active\"\n                                );\n                            }\n                            TimelineState::Active => {\n                                // activation should be setting the once cell\n                                warn!(\n                                    \"unexpected: cancel_wait_for_background_loop_concurrency_limit_semaphore not set, priority-boosting of logical size calculation will not work\"\n                                );\n                                debug_assert!(false);\n                            }\n                        }\n                    }\n                }\n            }\n        }\n\n        if let CurrentLogicalSize::Approximate(_) = &current_size {\n            if ctx.task_kind() == TaskKind::WalReceiverConnectionHandler {\n                let first = self\n                    .current_logical_size\n                    .did_return_approximate_to_walreceiver\n                    .compare_exchange(\n                        false,\n                        true,\n                        AtomicOrdering::Relaxed,\n                        AtomicOrdering::Relaxed,\n                    )\n                    .is_ok();\n                if first {\n                    crate::metrics::initial_logical_size::TIMELINES_WHERE_WALRECEIVER_GOT_APPROXIMATE_SIZE.inc();\n                }\n            }\n        }\n\n        current_size\n    }\n\n    fn spawn_initial_logical_size_computation_task(self: &Arc<Self>, ctx: &RequestContext) {\n        let Some(initial_part_end) = self.current_logical_size.initial_part_end else {\n            // nothing to do for freshly created timelines;\n            assert_eq!(\n                self.current_logical_size.current_size().accuracy(),\n                logical_size::Accuracy::Exact,\n            );\n            self.current_logical_size.initialized.add_permits(1);\n            return;\n        };\n\n        let cancel_wait_for_background_loop_concurrency_limit_semaphore = CancellationToken::new();\n        let token = cancel_wait_for_background_loop_concurrency_limit_semaphore.clone();\n        self.current_logical_size\n            .cancel_wait_for_background_loop_concurrency_limit_semaphore.set(token)\n            .expect(\"initial logical size calculation task must be spawned exactly once per Timeline object\");\n\n        let self_clone = Arc::clone(self);\n        let background_ctx = ctx.detached_child(\n            TaskKind::InitialLogicalSizeCalculation,\n            DownloadBehavior::Download,\n        );\n        task_mgr::spawn(\n            task_mgr::BACKGROUND_RUNTIME.handle(),\n            task_mgr::TaskKind::InitialLogicalSizeCalculation,\n            self.tenant_shard_id,\n            Some(self.timeline_id),\n            \"initial size calculation\",\n            // NB: don't log errors here, task_mgr will do that.\n            async move {\n                self_clone\n                    .initial_logical_size_calculation_task(\n                        initial_part_end,\n                        cancel_wait_for_background_loop_concurrency_limit_semaphore,\n                        background_ctx,\n                    )\n                    .await;\n                Ok(())\n            }\n            .instrument(info_span!(parent: None, \"initial_size_calculation\", tenant_id=%self.tenant_shard_id.tenant_id, shard_id=%self.tenant_shard_id.shard_slug(), timeline_id=%self.timeline_id)),\n        );\n    }\n\n    /// # Cancellation\n    ///\n    /// This method is sensitive to `Timeline::cancel`.\n    ///\n    /// It is _not_ sensitive to task_mgr::shutdown_token().\n    ///\n    /// # Cancel-Safety\n    ///\n    /// It does Timeline IO, hence this should be polled to completion because\n    /// we could be leaving in-flight IOs behind, which is safe, but annoying\n    /// to reason about.\n    async fn initial_logical_size_calculation_task(\n        self: Arc<Self>,\n        initial_part_end: Lsn,\n        skip_concurrency_limiter: CancellationToken,\n        background_ctx: RequestContext,\n    ) {\n        scopeguard::defer! {\n            // Irrespective of the outcome of this operation, we should unblock anyone waiting for it.\n            self.current_logical_size.initialized.add_permits(1);\n        }\n\n        let try_once = |attempt: usize| {\n            let background_ctx = &background_ctx;\n            let self_ref = &self;\n            let skip_concurrency_limiter = &skip_concurrency_limiter;\n            async move {\n                let wait_for_permit = super::tasks::acquire_concurrency_permit(\n                    BackgroundLoopKind::InitialLogicalSizeCalculation,\n                    background_ctx,\n                );\n\n                use crate::metrics::initial_logical_size::StartCircumstances;\n                let (_maybe_permit, circumstances) = tokio::select! {\n                    permit = wait_for_permit => {\n                        (Some(permit), StartCircumstances::AfterBackgroundTasksRateLimit)\n                    }\n                    _ = self_ref.cancel.cancelled() => {\n                        return Err(CalculateLogicalSizeError::Cancelled);\n                    }\n                    () = skip_concurrency_limiter.cancelled() => {\n                        // Some action that is part of a end user interaction requested logical size\n                        // => break out of the rate limit\n                        // TODO: ideally we'd not run on BackgroundRuntime but the requester's runtime;\n                        // but then again what happens if they cancel; also, we should just be using\n                        // one runtime across the entire process, so, let's leave this for now.\n                        (None, StartCircumstances::SkippedConcurrencyLimiter)\n                    }\n                };\n\n                let metrics_guard = if attempt == 1 {\n                    crate::metrics::initial_logical_size::START_CALCULATION.first(circumstances)\n                } else {\n                    crate::metrics::initial_logical_size::START_CALCULATION.retry(circumstances)\n                };\n\n                let io_concurrency = IoConcurrency::spawn_from_conf(\n                    self_ref.conf.get_vectored_concurrent_io,\n                    self_ref\n                        .gate\n                        .enter()\n                        .map_err(|_| CalculateLogicalSizeError::Cancelled)?,\n                );\n\n                let calculated_size = self_ref\n                    .logical_size_calculation_task(\n                        initial_part_end,\n                        LogicalSizeCalculationCause::Initial,\n                        background_ctx,\n                    )\n                    .await?;\n\n                self_ref\n                    .trigger_aux_file_size_computation(\n                        initial_part_end,\n                        background_ctx,\n                        io_concurrency,\n                    )\n                    .await?;\n\n                // TODO: add aux file size to logical size\n\n                Ok((calculated_size, metrics_guard))\n            }\n        };\n\n        let retrying = async {\n            let mut attempt = 0;\n            loop {\n                attempt += 1;\n\n                match try_once(attempt).await {\n                    Ok(res) => return ControlFlow::Continue(res),\n                    Err(CalculateLogicalSizeError::Cancelled) => return ControlFlow::Break(()),\n                    Err(\n                        e @ (CalculateLogicalSizeError::Decode(_)\n                        | CalculateLogicalSizeError::PageRead(_)),\n                    ) => {\n                        warn!(attempt, \"initial size calculation failed: {e:?}\");\n                        // exponential back-off doesn't make sense at these long intervals;\n                        // use fixed retry interval with generous jitter instead\n                        let sleep_duration = Duration::from_secs(\n                            u64::try_from(\n                                // 1hour base\n                                (60_i64 * 60_i64)\n                                    // 10min jitter\n                                    + rand::rng().random_range(-10 * 60..10 * 60),\n                            )\n                            .expect(\"10min < 1hour\"),\n                        );\n                        tokio::select! {\n                            _ = tokio::time::sleep(sleep_duration) => {}\n                            _ = self.cancel.cancelled() => return ControlFlow::Break(()),\n                        }\n                    }\n                }\n            }\n        };\n\n        let (calculated_size, metrics_guard) = match retrying.await {\n            ControlFlow::Continue(calculated_size) => calculated_size,\n            ControlFlow::Break(()) => return,\n        };\n\n        // we cannot query current_logical_size.current_size() to know the current\n        // *negative* value, only truncated to u64.\n        let added = self\n            .current_logical_size\n            .size_added_after_initial\n            .load(AtomicOrdering::Relaxed);\n\n        let sum = calculated_size.saturating_add_signed(added);\n\n        // set the gauge value before it can be set in `update_current_logical_size`.\n        self.metrics.current_logical_size_gauge.set(sum);\n\n        self.current_logical_size\n            .initial_logical_size\n            .set((calculated_size, metrics_guard.calculation_result_saved()))\n            .ok()\n            .expect(\"only this task sets it\");\n    }\n\n    pub(crate) fn spawn_ondemand_logical_size_calculation(\n        self: &Arc<Self>,\n        lsn: Lsn,\n        cause: LogicalSizeCalculationCause,\n        ctx: RequestContext,\n    ) -> oneshot::Receiver<Result<u64, CalculateLogicalSizeError>> {\n        let (sender, receiver) = oneshot::channel();\n        let self_clone = Arc::clone(self);\n        // XXX if our caller loses interest, i.e., ctx is cancelled,\n        // we should stop the size calculation work and return an error.\n        // That would require restructuring this function's API to\n        // return the result directly, instead of a Receiver for the result.\n        let ctx = ctx.detached_child(\n            TaskKind::OndemandLogicalSizeCalculation,\n            DownloadBehavior::Download,\n        );\n        task_mgr::spawn(\n            task_mgr::BACKGROUND_RUNTIME.handle(),\n            task_mgr::TaskKind::OndemandLogicalSizeCalculation,\n            self.tenant_shard_id,\n            Some(self.timeline_id),\n            \"ondemand logical size calculation\",\n            async move {\n                let res = self_clone\n                    .logical_size_calculation_task(lsn, cause, &ctx)\n                    .await;\n                let _ = sender.send(res).ok();\n                Ok(()) // Receiver is responsible for handling errors\n            }\n            .in_current_span(),\n        );\n        receiver\n    }\n\n    #[instrument(skip_all)]\n    async fn logical_size_calculation_task(\n        self: &Arc<Self>,\n        lsn: Lsn,\n        cause: LogicalSizeCalculationCause,\n        ctx: &RequestContext,\n    ) -> Result<u64, CalculateLogicalSizeError> {\n        crate::span::debug_assert_current_span_has_tenant_and_timeline_id();\n        // We should never be calculating logical sizes on shard !=0, because these shards do not have\n        // accurate relation sizes, and they do not emit consumption metrics.\n        debug_assert!(self.tenant_shard_id.is_shard_zero());\n\n        let guard = self\n            .gate\n            .enter()\n            .map_err(|_| CalculateLogicalSizeError::Cancelled)?;\n\n        self.calculate_logical_size(lsn, cause, &guard, ctx).await\n    }\n\n    /// Calculate the logical size of the database at the latest LSN.\n    ///\n    /// NOTE: counted incrementally, includes ancestors. This can be a slow operation,\n    /// especially if we need to download remote layers.\n    async fn calculate_logical_size(\n        &self,\n        up_to_lsn: Lsn,\n        cause: LogicalSizeCalculationCause,\n        _guard: &GateGuard,\n        ctx: &RequestContext,\n    ) -> Result<u64, CalculateLogicalSizeError> {\n        info!(\n            \"Calculating logical size for timeline {} at {}\",\n            self.timeline_id, up_to_lsn\n        );\n\n        if let Err(()) = pausable_failpoint!(\"timeline-calculate-logical-size-pause\", &self.cancel)\n        {\n            return Err(CalculateLogicalSizeError::Cancelled);\n        }\n\n        // See if we've already done the work for initial size calculation.\n        // This is a short-cut for timelines that are mostly unused.\n        if let Some(size) = self.current_logical_size.initialized_size(up_to_lsn) {\n            return Ok(size);\n        }\n        let storage_time_metrics = match cause {\n            LogicalSizeCalculationCause::Initial\n            | LogicalSizeCalculationCause::ConsumptionMetricsSyntheticSize\n            | LogicalSizeCalculationCause::TenantSizeHandler => &self.metrics.logical_size_histo,\n            LogicalSizeCalculationCause::EvictionTaskImitation => {\n                &self.metrics.imitate_logical_size_histo\n            }\n        };\n        let timer = storage_time_metrics.start_timer();\n        let logical_size = self\n            .get_current_logical_size_non_incremental(up_to_lsn, ctx)\n            .await?;\n        debug!(\"calculated logical size: {logical_size}\");\n        timer.stop_and_record();\n        Ok(logical_size)\n    }\n\n    /// Update current logical size, adding `delta' to the old value.\n    fn update_current_logical_size(&self, delta: i64) {\n        let logical_size = &self.current_logical_size;\n        logical_size.increment_size(delta);\n\n        // Also set the value in the prometheus gauge. Note that\n        // there is a race condition here: if this is is called by two\n        // threads concurrently, the prometheus gauge might be set to\n        // one value while current_logical_size is set to the\n        // other.\n        match logical_size.current_size() {\n            CurrentLogicalSize::Exact(ref new_current_size) => self\n                .metrics\n                .current_logical_size_gauge\n                .set(new_current_size.into()),\n            CurrentLogicalSize::Approximate(_) => {\n                // don't update the gauge yet, this allows us not to update the gauge back and\n                // forth between the initial size calculation task.\n            }\n        }\n    }\n\n    pub(crate) fn update_directory_entries_count(&self, kind: DirectoryKind, count: MetricsUpdate) {\n        // TODO: this directory metrics is not correct -- we could have multiple reldirs in the system\n        // for each of the database, but we only store one value, and therefore each pgdirmodification\n        // would overwrite the previous value if they modify different databases.\n\n        match count {\n            MetricsUpdate::Set(count) => {\n                self.directory_metrics[kind.offset()].store(count, AtomicOrdering::Relaxed);\n                self.directory_metrics_inited[kind.offset()].store(true, AtomicOrdering::Relaxed);\n            }\n            MetricsUpdate::Add(count) => {\n                // TODO: these operations are not atomic; but we only have one writer to the metrics, so\n                // it's fine.\n                if self.directory_metrics_inited[kind.offset()].load(AtomicOrdering::Relaxed) {\n                    // The metrics has been initialized with `MetricsUpdate::Set` before, so we can add/sub\n                    // the value reliably.\n                    self.directory_metrics[kind.offset()].fetch_add(count, AtomicOrdering::Relaxed);\n                }\n                // Otherwise, ignore this update\n            }\n            MetricsUpdate::Sub(count) => {\n                // TODO: these operations are not atomic; but we only have one writer to the metrics, so\n                // it's fine.\n                if self.directory_metrics_inited[kind.offset()].load(AtomicOrdering::Relaxed) {\n                    // The metrics has been initialized with `MetricsUpdate::Set` before.\n                    // The operation could overflow so we need to normalize the value.\n                    let prev_val =\n                        self.directory_metrics[kind.offset()].load(AtomicOrdering::Relaxed);\n                    let res = prev_val.saturating_sub(count);\n                    self.directory_metrics[kind.offset()].store(res, AtomicOrdering::Relaxed);\n                }\n                // Otherwise, ignore this update\n            }\n        };\n\n        // TODO: remove this, there's no place in the code that updates this aux metrics.\n        let aux_metric =\n            self.directory_metrics[DirectoryKind::AuxFiles.offset()].load(AtomicOrdering::Relaxed);\n\n        let sum_of_entries = self\n            .directory_metrics\n            .iter()\n            .map(|v| v.load(AtomicOrdering::Relaxed))\n            .sum();\n        // Set a high general threshold and a lower threshold for the auxiliary files,\n        // as we can have large numbers of relations in the db directory.\n        const SUM_THRESHOLD: u64 = 5000;\n        const AUX_THRESHOLD: u64 = 1000;\n        if sum_of_entries >= SUM_THRESHOLD || aux_metric >= AUX_THRESHOLD {\n            self.metrics\n                .directory_entries_count_gauge\n                .set(sum_of_entries);\n        } else if let Some(metric) = Lazy::get(&self.metrics.directory_entries_count_gauge) {\n            metric.set(sum_of_entries);\n        }\n    }\n\n    async fn find_layer(\n        &self,\n        layer_name: &LayerName,\n    ) -> Result<Option<Layer>, layer_manager::Shutdown> {\n        let guard = self\n            .layers\n            .read(LayerManagerLockHolder::GetLayerMapInfo)\n            .await;\n        let layer = guard\n            .layer_map()?\n            .iter_historic_layers()\n            .find(|l| &l.layer_name() == layer_name)\n            .map(|found| guard.get_from_desc(&found));\n        Ok(layer)\n    }\n\n    pub(super) fn should_keep_previous_heatmap(&self, new_heatmap_end_lsn: Lsn) -> bool {\n        let crnt = self.previous_heatmap.load();\n        match crnt.as_deref() {\n            Some(PreviousHeatmap::Active { end_lsn, .. }) => match end_lsn {\n                Some(crnt_end_lsn) => *crnt_end_lsn > new_heatmap_end_lsn,\n                None => true,\n            },\n            Some(PreviousHeatmap::Obsolete) => false,\n            None => false,\n        }\n    }\n\n    /// The timeline heatmap is a hint to secondary locations from the primary location,\n    /// indicating which layers are currently on-disk on the primary.\n    ///\n    /// None is returned if the Timeline is in a state where uploading a heatmap\n    /// doesn't make sense, such as shutting down or initializing.  The caller\n    /// should treat this as a cue to simply skip doing any heatmap uploading\n    /// for this timeline.\n    pub(crate) async fn generate_heatmap(&self) -> Option<HeatMapTimeline> {\n        if !self.is_active() {\n            return None;\n        }\n\n        let guard = self\n            .layers\n            .read(LayerManagerLockHolder::GenerateHeatmap)\n            .await;\n\n        // Firstly, if there's any heatmap left over from when this location\n        // was a secondary, take that into account. Keep layers that are:\n        // * present in the layer map\n        // * visible\n        // * non-resident\n        // * not evicted since we read the heatmap\n        //\n        // Without this, a new cold, attached location would clobber the previous\n        // heatamp.\n        let previous_heatmap = self.previous_heatmap.load();\n        let visible_non_resident = match previous_heatmap.as_deref() {\n            Some(PreviousHeatmap::Active {\n                heatmap, read_at, ..\n            }) => Some(heatmap.all_layers().filter_map(|hl| {\n                let desc: PersistentLayerDesc = hl.name.clone().into();\n                let layer = guard.try_get_from_key(&desc.key())?;\n\n                if layer.visibility() == LayerVisibilityHint::Covered {\n                    return None;\n                }\n\n                if layer.is_likely_resident() {\n                    return None;\n                }\n\n                if layer.last_evicted_at().happened_after(*read_at) {\n                    return None;\n                }\n\n                Some((desc, hl.metadata.clone(), hl.access_time, hl.cold))\n            })),\n            Some(PreviousHeatmap::Obsolete) => None,\n            None => None,\n        };\n\n        // Secondly, all currently visible, resident layers are included.\n        let resident = guard.likely_resident_layers().filter_map(|layer| {\n            match layer.visibility() {\n                LayerVisibilityHint::Visible => {\n                    // Layer is visible to one or more read LSNs: elegible for inclusion in layer map\n                    let last_activity_ts = layer.latest_activity();\n                    Some((\n                        layer.layer_desc().clone(),\n                        layer.metadata(),\n                        last_activity_ts,\n                        false, // these layers are not cold\n                    ))\n                }\n                LayerVisibilityHint::Covered => {\n                    // Layer is resident but unlikely to be read: not elegible for inclusion in heatmap.\n                    None\n                }\n            }\n        });\n\n        let mut layers = match visible_non_resident {\n            Some(non_resident) => {\n                let mut non_resident = non_resident.peekable();\n                if non_resident.peek().is_none() {\n                    tracing::info!(timeline_id=%self.timeline_id, \"Previous heatmap now obsolete\");\n                    self.previous_heatmap\n                        .store(Some(PreviousHeatmap::Obsolete.into()));\n                }\n\n                non_resident.chain(resident).collect::<Vec<_>>()\n            }\n            None => resident.collect::<Vec<_>>(),\n        };\n\n        // Sort layers in order of which to download first.  For a large set of layers to download, we\n        // want to prioritize those layers which are most likely to still be in the resident many minutes\n        // or hours later:\n        // - Cold layers go last for convenience when a human inspects the heatmap.\n        // - Download L0s last, because they churn the fastest: L0s on a fast-writing tenant might\n        //   only exist for a few minutes before being compacted into L1s.\n        // - For L1 & image layers, download most recent LSNs first: the older the LSN, the sooner\n        //   the layer is likely to be covered by an image layer during compaction.\n        layers.sort_by_key(|(desc, _meta, _atime, cold)| {\n            std::cmp::Reverse((\n                *cold,\n                !LayerMap::is_l0(&desc.key_range, desc.is_delta),\n                desc.lsn_range.end,\n            ))\n        });\n\n        let layers = layers\n            .into_iter()\n            .map(|(desc, meta, atime, cold)| {\n                HeatMapLayer::new(desc.layer_name(), meta, atime, cold)\n            })\n            .collect();\n\n        Some(HeatMapTimeline::new(self.timeline_id, layers))\n    }\n\n    pub(super) async fn generate_unarchival_heatmap(&self, end_lsn: Lsn) -> PreviousHeatmap {\n        let guard = self\n            .layers\n            .read(LayerManagerLockHolder::GenerateHeatmap)\n            .await;\n\n        let now = SystemTime::now();\n        let mut heatmap_layers = Vec::default();\n        for vl in guard.visible_layers() {\n            if vl.layer_desc().get_lsn_range().start >= end_lsn {\n                continue;\n            }\n\n            let hl = HeatMapLayer {\n                name: vl.layer_desc().layer_name(),\n                metadata: vl.metadata(),\n                access_time: now,\n                cold: true,\n            };\n            heatmap_layers.push(hl);\n        }\n\n        tracing::info!(\n            \"Generating unarchival heatmap with {} layers\",\n            heatmap_layers.len()\n        );\n\n        let heatmap = HeatMapTimeline::new(self.timeline_id, heatmap_layers);\n        PreviousHeatmap::Active {\n            heatmap,\n            read_at: Instant::now(),\n            end_lsn: Some(end_lsn),\n        }\n    }\n\n    /// Returns true if the given lsn is or was an ancestor branchpoint.\n    pub(crate) fn is_ancestor_lsn(&self, lsn: Lsn) -> bool {\n        // upon timeline detach, we set the ancestor_lsn to Lsn::INVALID and the store the original\n        // branchpoint in the value in IndexPart::lineage\n        self.ancestor_lsn == lsn\n            || (self.ancestor_lsn == Lsn::INVALID\n                && self.remote_client.is_previous_ancestor_lsn(lsn))\n    }\n}\n\n#[derive(Clone)]\n/// Type representing a query in the ([`Lsn`], [`Key`]) space.\n/// In other words, a set of segments in a 2D space.\n///\n/// This representation has the advatange of avoiding hash map\n/// allocations for uniform queries.\npub(crate) enum VersionedKeySpaceQuery {\n    /// Variant for queries at a single [`Lsn`]\n    Uniform { keyspace: KeySpace, lsn: Lsn },\n    /// Variant for queries at multiple [`Lsn`]s\n    Scattered {\n        keyspaces_at_lsn: Vec<(Lsn, KeySpace)>,\n    },\n}\n\nimpl VersionedKeySpaceQuery {\n    pub(crate) fn uniform(keyspace: KeySpace, lsn: Lsn) -> Self {\n        Self::Uniform { keyspace, lsn }\n    }\n\n    pub(crate) fn scattered(keyspaces_at_lsn: Vec<(Lsn, KeySpace)>) -> Self {\n        Self::Scattered { keyspaces_at_lsn }\n    }\n\n    /// Returns the most recent (largest) LSN included in the query.\n    /// If any of the LSNs included in the query are invalid, returns\n    /// an error instead.\n    fn high_watermark_lsn(&self) -> Result<Lsn, GetVectoredError> {\n        match self {\n            Self::Uniform { lsn, .. } => {\n                if !lsn.is_valid() {\n                    return Err(GetVectoredError::InvalidLsn(*lsn));\n                }\n\n                Ok(*lsn)\n            }\n            Self::Scattered { keyspaces_at_lsn } => {\n                let mut max_lsn = None;\n                for (lsn, _keyspace) in keyspaces_at_lsn.iter() {\n                    if !lsn.is_valid() {\n                        return Err(GetVectoredError::InvalidLsn(*lsn));\n                    }\n                    max_lsn = std::cmp::max(max_lsn, Some(lsn));\n                }\n\n                if let Some(computed) = max_lsn {\n                    Ok(*computed)\n                } else {\n                    Err(GetVectoredError::Other(anyhow!(\"empty input\")))\n                }\n            }\n        }\n    }\n\n    /// Returns the total keyspace being queried: the result of projecting\n    /// everything in the key dimensions onto the key axis.\n    fn total_keyspace(&self) -> KeySpace {\n        match self {\n            Self::Uniform { keyspace, .. } => keyspace.clone(),\n            Self::Scattered { keyspaces_at_lsn } => keyspaces_at_lsn\n                .iter()\n                .map(|(_lsn, keyspace)| keyspace)\n                .fold(KeySpace::default(), |mut acc, v| {\n                    acc.merge(v);\n                    acc\n                }),\n        }\n    }\n\n    /// Returns LSN for a specific key.\n    ///\n    /// Invariant: requested key must be part of [`Self::total_keyspace`]\n    pub(super) fn map_key_to_lsn(&self, key: &Key) -> Lsn {\n        match self {\n            Self::Uniform { lsn, .. } => *lsn,\n            Self::Scattered { keyspaces_at_lsn } => {\n                keyspaces_at_lsn\n                    .iter()\n                    .find(|(_lsn, keyspace)| keyspace.contains(key))\n                    .expect(\"Returned key was requested\")\n                    .0\n            }\n        }\n    }\n\n    /// Remove any parts of the query (segments) which overlap with the provided\n    /// key space (also segments).\n    fn remove_overlapping_with(&mut self, to_remove: &KeySpace) -> KeySpace {\n        match self {\n            Self::Uniform { keyspace, .. } => keyspace.remove_overlapping_with(to_remove),\n            Self::Scattered { keyspaces_at_lsn } => {\n                let mut removed_accum = KeySpaceRandomAccum::new();\n                keyspaces_at_lsn.iter_mut().for_each(|(_lsn, keyspace)| {\n                    let removed = keyspace.remove_overlapping_with(to_remove);\n                    removed_accum.add_keyspace(removed);\n                });\n\n                removed_accum.to_keyspace()\n            }\n        }\n    }\n\n    fn is_empty(&self) -> bool {\n        match self {\n            Self::Uniform { keyspace, .. } => keyspace.is_empty(),\n            Self::Scattered { keyspaces_at_lsn } => keyspaces_at_lsn\n                .iter()\n                .all(|(_lsn, keyspace)| keyspace.is_empty()),\n        }\n    }\n\n    /// \"Lower\" the query on the LSN dimension\n    fn lower(&mut self, to: Lsn) {\n        match self {\n            Self::Uniform { lsn, .. } => {\n                // If the originally requested LSN is smaller than the starting\n                // LSN of the ancestor we are descending into, we need to respect that.\n                // Hence the min.\n                *lsn = std::cmp::min(*lsn, to);\n            }\n            Self::Scattered { keyspaces_at_lsn } => {\n                keyspaces_at_lsn.iter_mut().for_each(|(lsn, _keyspace)| {\n                    *lsn = std::cmp::min(*lsn, to);\n                });\n            }\n        }\n    }\n}\n\nimpl std::fmt::Display for VersionedKeySpaceQuery {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        write!(f, \"[\")?;\n\n        match self {\n            VersionedKeySpaceQuery::Uniform { keyspace, lsn } => {\n                write!(f, \"{keyspace} @ {lsn}\")?;\n            }\n            VersionedKeySpaceQuery::Scattered { keyspaces_at_lsn } => {\n                for (lsn, keyspace) in keyspaces_at_lsn.iter() {\n                    write!(f, \"{keyspace} @ {lsn},\")?;\n                }\n            }\n        }\n\n        write!(f, \"]\")\n    }\n}\n\nimpl Timeline {\n    #[allow(clippy::doc_lazy_continuation)]\n    /// Get the data needed to reconstruct all keys in the provided keyspace\n    ///\n    /// The algorithm is as follows:\n    /// 1.   While some keys are still not done and there's a timeline to visit:\n    /// 2.   Visit the timeline (see [`Timeline::get_vectored_reconstruct_data_timeline`]:\n    /// 2.1: Build the fringe for the current keyspace\n    /// 2.2  Visit the newest layer from the fringe to collect all values for the range it\n    ///      intersects\n    /// 2.3. Pop the timeline from the fringe\n    /// 2.4. If the fringe is empty, go back to 1\n    async fn get_vectored_reconstruct_data(\n        &self,\n        mut query: VersionedKeySpaceQuery,\n        reconstruct_state: &mut ValuesReconstructState,\n        ctx: &RequestContext,\n    ) -> Result<(), GetVectoredError> {\n        let original_hwm_lsn = query.high_watermark_lsn().unwrap();\n\n        let mut timeline_owned: Arc<Timeline>;\n        let mut timeline = self;\n\n        let missing_keyspace = loop {\n            if self.cancel.is_cancelled() {\n                return Err(GetVectoredError::Cancelled);\n            }\n\n            let TimelineVisitOutcome {\n                completed_keyspace: completed,\n                image_covered_keyspace,\n            } = {\n                let ctx = RequestContextBuilder::from(ctx)\n                    .perf_span(|crnt_perf_span| {\n                        info_span!(\n                            target: PERF_TRACE_TARGET,\n                            parent: crnt_perf_span,\n                            \"PLAN_IO_TIMELINE\",\n                            timeline = %timeline.timeline_id,\n                            high_watermark_lsn = %query.high_watermark_lsn().unwrap(),\n                        )\n                    })\n                    .attached_child();\n\n                Self::get_vectored_reconstruct_data_timeline(\n                    timeline,\n                    &query,\n                    reconstruct_state,\n                    &self.cancel,\n                    &ctx,\n                )\n                .maybe_perf_instrument(&ctx, |crnt_perf_span| crnt_perf_span.clone())\n                .await?\n            };\n\n            query.remove_overlapping_with(&completed);\n\n            // Do not descend into the ancestor timeline for aux files.\n            // We don't return a blanket [`GetVectoredError::MissingKey`] to avoid\n            // stalling compaction.\n            query.remove_overlapping_with(&KeySpace {\n                ranges: vec![NON_INHERITED_RANGE, Key::sparse_non_inherited_keyspace()],\n            });\n\n            // Keyspace is fully retrieved\n            if query.is_empty() {\n                break None;\n            }\n\n            let Some(ancestor_timeline) = timeline.ancestor_timeline.as_ref() else {\n                // Not fully retrieved but no ancestor timeline.\n                break Some(query.total_keyspace());\n            };\n\n            // Now we see if there are keys covered by the image layer but does not exist in the\n            // image layer, which means that the key does not exist.\n\n            // The block below will stop the vectored search if any of the keys encountered an image layer\n            // which did not contain a snapshot for said key. Since we have already removed all completed\n            // keys from `keyspace`, we expect there to be no overlap between it and the image covered key\n            // space. If that's not the case, we had at least one key encounter a gap in the image layer\n            // and stop the search as a result of that.\n            let mut removed = query.remove_overlapping_with(&image_covered_keyspace);\n            // Do not fire missing key error and end early for sparse keys. Note that we hava already removed\n            // non-inherited keyspaces before, so we can safely do a full `SPARSE_RANGE` remove instead of\n            // figuring out what is the inherited key range and do a fine-grained pruning.\n            removed.remove_overlapping_with(&KeySpace {\n                ranges: vec![SPARSE_RANGE],\n            });\n            if !removed.is_empty() {\n                break Some(removed);\n            }\n\n            // Each key range in the original query is at some point in the LSN space.\n            // When descending into the ancestor, lower all ranges in the LSN space\n            // such that new changes on the parent timeline are not visible.\n            query.lower(timeline.ancestor_lsn);\n\n            let ctx = RequestContextBuilder::from(ctx)\n                .perf_span(|crnt_perf_span| {\n                    info_span!(\n                        target: PERF_TRACE_TARGET,\n                        parent: crnt_perf_span,\n                        \"GET_ANCESTOR\",\n                        timeline = %timeline.timeline_id,\n                        ancestor = %ancestor_timeline.timeline_id,\n                        ancestor_lsn = %timeline.ancestor_lsn\n                    )\n                })\n                .attached_child();\n\n            timeline_owned = timeline\n                .get_ready_ancestor_timeline(ancestor_timeline, &ctx)\n                .maybe_perf_instrument(&ctx, |crnt_perf_span| crnt_perf_span.clone())\n                .await?;\n            timeline = &*timeline_owned;\n        };\n\n        // Remove sparse keys from the keyspace so that it doesn't fire errors.\n        let missing_keyspace = if let Some(missing_keyspace) = missing_keyspace {\n            let mut missing_keyspace = missing_keyspace;\n            missing_keyspace.remove_overlapping_with(&KeySpace {\n                ranges: vec![SPARSE_RANGE],\n            });\n            if missing_keyspace.is_empty() {\n                None\n            } else {\n                Some(missing_keyspace)\n            }\n        } else {\n            None\n        };\n\n        if let Some(missing_keyspace) = missing_keyspace {\n            return Err(GetVectoredError::MissingKey(Box::new(MissingKeyError {\n                keyspace: missing_keyspace, /* better if we can store the full keyspace */\n                shard: self.shard_identity.number,\n                original_hwm_lsn,\n                ancestor_lsn: Some(timeline.ancestor_lsn),\n                backtrace: None,\n                read_path: std::mem::take(&mut reconstruct_state.read_path),\n                query: None,\n            })));\n        }\n\n        Ok(())\n    }\n\n    async fn get_vectored_init_fringe(\n        &self,\n        query: &VersionedKeySpaceQuery,\n    ) -> Result<LayerFringe, GetVectoredError> {\n        let mut fringe = LayerFringe::new();\n        let guard = self.layers.read(LayerManagerLockHolder::GetPage).await;\n\n        match query {\n            VersionedKeySpaceQuery::Uniform { keyspace, lsn } => {\n                // LSNs requested by the compute or determined by the pageserver\n                // are inclusive. Queries to the layer map use exclusive LSNs.\n                // Hence, bump the value before the query - same in the other\n                // match arm.\n                let cont_lsn = Lsn(lsn.0 + 1);\n                guard.update_search_fringe(keyspace, cont_lsn, &mut fringe)?;\n            }\n            VersionedKeySpaceQuery::Scattered { keyspaces_at_lsn } => {\n                for (lsn, keyspace) in keyspaces_at_lsn.iter() {\n                    let cont_lsn_for_keyspace = Lsn(lsn.0 + 1);\n                    guard.update_search_fringe(keyspace, cont_lsn_for_keyspace, &mut fringe)?;\n                }\n            }\n        }\n\n        Ok(fringe)\n    }\n\n    /// Collect the reconstruct data for a keyspace from the specified timeline.\n    ///\n    /// Maintain a fringe [`LayerFringe`] which tracks all the layers that intersect\n    /// the current keyspace. The current keyspace of the search at any given timeline\n    /// is the original keyspace minus all the keys that have been completed minus\n    /// any keys for which we couldn't find an intersecting layer. It's not tracked explicitly,\n    /// but if you merge all the keyspaces in the fringe, you get the \"current keyspace\".\n    ///\n    /// This is basically a depth-first search visitor implementation where a vertex\n    /// is the (layer, lsn range, key space) tuple. The fringe acts as the stack.\n    ///\n    /// At each iteration pop the top of the fringe (the layer with the highest Lsn)\n    /// and get all the required reconstruct data from the layer in one go.\n    ///\n    /// Returns the completed keyspace and the keyspaces with image coverage. The caller\n    /// decides how to deal with these two keyspaces.\n    async fn get_vectored_reconstruct_data_timeline(\n        timeline: &Timeline,\n        query: &VersionedKeySpaceQuery,\n        reconstruct_state: &mut ValuesReconstructState,\n        cancel: &CancellationToken,\n        ctx: &RequestContext,\n    ) -> Result<TimelineVisitOutcome, GetVectoredError> {\n        // Prevent GC from progressing while visiting the current timeline.\n        // If we are GC-ing because a new image layer was added while traversing\n        // the timeline, then it will remove layers that are required for fulfilling\n        // the current get request (read-path cannot \"look back\" and notice the new\n        // image layer).\n        let _gc_cutoff_holder = timeline.get_applied_gc_cutoff_lsn();\n\n        // See `compaction::compact_with_gc` for why we need this.\n        let _guard = timeline.gc_compaction_layer_update_lock.read().await;\n\n        // Initialize the fringe\n        let mut fringe = timeline.get_vectored_init_fringe(query).await?;\n\n        let mut completed_keyspace = KeySpace::default();\n        let mut image_covered_keyspace = KeySpaceRandomAccum::new();\n\n        while let Some((layer_to_read, keyspace_to_read, lsn_range)) = fringe.next_layer() {\n            if cancel.is_cancelled() {\n                return Err(GetVectoredError::Cancelled);\n            }\n\n            if let Some(ref mut read_path) = reconstruct_state.read_path {\n                read_path.record_layer_visit(&layer_to_read, &keyspace_to_read, &lsn_range);\n            }\n\n            // Visit the layer and plan IOs for it\n            let next_cont_lsn = lsn_range.start;\n            layer_to_read\n                .get_values_reconstruct_data(\n                    keyspace_to_read.clone(),\n                    lsn_range,\n                    reconstruct_state,\n                    ctx,\n                )\n                .await?;\n\n            let mut unmapped_keyspace = keyspace_to_read;\n            let cont_lsn = next_cont_lsn;\n\n            reconstruct_state.on_layer_visited(&layer_to_read);\n\n            let (keys_done_last_step, keys_with_image_coverage) =\n                reconstruct_state.consume_done_keys();\n            unmapped_keyspace.remove_overlapping_with(&keys_done_last_step);\n            completed_keyspace.merge(&keys_done_last_step);\n            if let Some(keys_with_image_coverage) = keys_with_image_coverage {\n                unmapped_keyspace\n                    .remove_overlapping_with(&KeySpace::single(keys_with_image_coverage.clone()));\n                image_covered_keyspace.add_range(keys_with_image_coverage);\n            }\n\n            // Query the layer map for the next layers to read.\n            //\n            // Do not descent any further if the last layer we visited\n            // completed all keys in the keyspace it inspected. This is not\n            // required for correctness, but avoids visiting extra layers\n            // which turns out to be a perf bottleneck in some cases.\n            if !unmapped_keyspace.is_empty() {\n                let guard = timeline.layers.read(LayerManagerLockHolder::GetPage).await;\n                guard.update_search_fringe(&unmapped_keyspace, cont_lsn, &mut fringe)?;\n\n                // It's safe to drop the layer map lock after planning the next round of reads.\n                // The fringe keeps readable handles for the layers which are safe to read even\n                // if layers were compacted or flushed.\n                //\n                // The more interesting consideration is: \"Why is the read algorithm still correct\n                // if the layer map changes while it is operating?\". Doing a vectored read on a\n                // timeline boils down to pushing an imaginary lsn boundary downwards for each range\n                // covered by the read. The layer map tells us how to move the lsn downwards for a\n                // range at *a particular point in time*. It is fine for the answer to be different\n                // at two different time points.\n                drop(guard);\n            }\n        }\n\n        Ok(TimelineVisitOutcome {\n            completed_keyspace,\n            image_covered_keyspace: image_covered_keyspace.consume_keyspace(),\n        })\n    }\n\n    async fn get_ready_ancestor_timeline(\n        &self,\n        ancestor: &Arc<Timeline>,\n        ctx: &RequestContext,\n    ) -> Result<Arc<Timeline>, GetReadyAncestorError> {\n        // It's possible that the ancestor timeline isn't active yet, or\n        // is active but hasn't yet caught up to the branch point. Wait\n        // for it.\n        //\n        // This cannot happen while the pageserver is running normally,\n        // because you cannot create a branch from a point that isn't\n        // present in the pageserver yet. However, we don't wait for the\n        // branch point to be uploaded to cloud storage before creating\n        // a branch. I.e., the branch LSN need not be remote consistent\n        // for the branching operation to succeed.\n        //\n        // Hence, if we try to load a tenant in such a state where\n        // 1. the existence of the branch was persisted (in IndexPart and/or locally)\n        // 2. but the ancestor state is behind branch_lsn because it was not yet persisted\n        // then we will need to wait for the ancestor timeline to\n        // re-stream WAL up to branch_lsn before we access it.\n        //\n        // How can a tenant get in such a state?\n        // - ungraceful pageserver process exit\n        // - detach+attach => this is a bug, https://github.com/neondatabase/neon/issues/4219\n        //\n        // NB: this could be avoided by requiring\n        //   branch_lsn >= remote_consistent_lsn\n        // during branch creation.\n        match ancestor.wait_to_become_active(ctx).await {\n            Ok(()) => {}\n            Err(TimelineState::Stopping) => {\n                // If an ancestor is stopping, it means the tenant is stopping: handle this the same as if this timeline was stopping.\n                return Err(GetReadyAncestorError::Cancelled);\n            }\n            Err(state) => {\n                return Err(GetReadyAncestorError::BadState {\n                    timeline_id: ancestor.timeline_id,\n                    state,\n                });\n            }\n        }\n        ancestor\n            .wait_lsn(\n                self.ancestor_lsn,\n                WaitLsnWaiter::Timeline(self),\n                WaitLsnTimeout::Default,\n                ctx,\n            )\n            .await\n            .map_err(|e| match e {\n                e @ WaitLsnError::Timeout(_) => GetReadyAncestorError::AncestorLsnTimeout(e),\n                WaitLsnError::Shutdown => GetReadyAncestorError::Cancelled,\n                WaitLsnError::BadState(state) => GetReadyAncestorError::BadState {\n                    timeline_id: ancestor.timeline_id,\n                    state,\n                },\n            })?;\n\n        Ok(ancestor.clone())\n    }\n\n    pub(crate) fn get_shard_identity(&self) -> &ShardIdentity {\n        &self.shard_identity\n    }\n\n    #[inline(always)]\n    pub(crate) fn shard_timeline_id(&self) -> ShardTimelineId {\n        ShardTimelineId {\n            shard_index: ShardIndex {\n                shard_number: self.shard_identity.number,\n                shard_count: self.shard_identity.count,\n            },\n            timeline_id: self.timeline_id,\n        }\n    }\n\n    /// Returns a non-frozen open in-memory layer for ingestion.\n    ///\n    /// Takes a witness of timeline writer state lock being held, because it makes no sense to call\n    /// this function without holding the mutex.\n    async fn get_layer_for_write(\n        &self,\n        lsn: Lsn,\n        _guard: &tokio::sync::MutexGuard<'_, Option<TimelineWriterState>>,\n        ctx: &RequestContext,\n    ) -> anyhow::Result<Arc<InMemoryLayer>> {\n        let mut guard = self\n            .layers\n            .write(LayerManagerLockHolder::GetLayerForWrite)\n            .await;\n\n        let last_record_lsn = self.get_last_record_lsn();\n        ensure!(\n            lsn > last_record_lsn,\n            \"cannot modify relation after advancing last_record_lsn (incoming_lsn={}, last_record_lsn={})\",\n            lsn,\n            last_record_lsn,\n        );\n\n        let layer = guard\n            .open_mut()?\n            .get_layer_for_write(\n                lsn,\n                self.conf,\n                self.timeline_id,\n                self.tenant_shard_id,\n                &self.gate,\n                &self.cancel,\n                ctx,\n            )\n            .await?;\n        Ok(layer)\n    }\n\n    pub(crate) fn finish_write(&self, new_lsn: Lsn) {\n        assert!(new_lsn.is_aligned());\n\n        self.metrics.last_record_lsn_gauge.set(new_lsn.0 as i64);\n        self.last_record_lsn.advance(new_lsn);\n    }\n\n    /// Freeze any existing open in-memory layer and unconditionally notify the flush loop.\n    ///\n    /// Unconditional flush loop notification is given because in sharded cases we will want to\n    /// leave an Lsn gap. Unsharded tenants do not have Lsn gaps.\n    async fn freeze_inmem_layer_at(\n        &self,\n        at: Lsn,\n        write_lock: &mut tokio::sync::MutexGuard<'_, Option<TimelineWriterState>>,\n    ) -> Result<u64, FlushLayerError> {\n        let frozen = {\n            let mut guard = self\n                .layers\n                .write(LayerManagerLockHolder::TryFreezeLayer)\n                .await;\n            guard\n                .open_mut()?\n                .try_freeze_in_memory_layer(at, &self.last_freeze_at, write_lock, &self.metrics)\n                .await\n        };\n\n        if frozen {\n            let now = Instant::now();\n            *(self.last_freeze_ts.write().unwrap()) = now;\n        }\n\n        // Increment the flush cycle counter and wake up the flush task.\n        // Remember the new value, so that when we listen for the flush\n        // to finish, we know when the flush that we initiated has\n        // finished, instead of some other flush that was started earlier.\n        let mut my_flush_request = 0;\n\n        let flush_loop_state = { *self.flush_loop_state.lock().unwrap() };\n        if !matches!(flush_loop_state, FlushLoopState::Running { .. }) {\n            return Err(FlushLayerError::NotRunning(flush_loop_state));\n        }\n\n        self.layer_flush_start_tx.send_modify(|(counter, lsn)| {\n            my_flush_request = *counter + 1;\n            *counter = my_flush_request;\n            *lsn = std::cmp::max(at, *lsn);\n        });\n\n        assert_ne!(my_flush_request, 0);\n\n        Ok(my_flush_request)\n    }\n\n    /// Layer flusher task's main loop.\n    async fn flush_loop(\n        self: &Arc<Self>,\n        mut layer_flush_start_rx: tokio::sync::watch::Receiver<(u64, Lsn)>,\n        ctx: &RequestContext,\n    ) {\n        // Always notify waiters about the flush loop exiting since the loop might stop\n        // when the timeline hasn't been cancelled.\n        let scopeguard_rx = layer_flush_start_rx.clone();\n        scopeguard::defer! {\n            let (flush_counter, _) = *scopeguard_rx.borrow();\n            let _ = self\n                .layer_flush_done_tx\n                .send_replace((flush_counter, Err(FlushLayerError::Cancelled)));\n        }\n\n        // Subscribe to L0 delta layer updates, for compaction backpressure.\n        let mut watch_l0 = match self\n            .layers\n            .read(LayerManagerLockHolder::FlushLoop)\n            .await\n            .layer_map()\n        {\n            Ok(lm) => lm.watch_level0_deltas(),\n            Err(Shutdown) => return,\n        };\n\n        info!(\"started flush loop\");\n        loop {\n            tokio::select! {\n                _ = self.cancel.cancelled() => {\n                    info!(\"shutting down layer flush task due to Timeline::cancel\");\n                    break;\n                },\n                _ = layer_flush_start_rx.changed() => {}\n            }\n            trace!(\"waking up\");\n            let (flush_counter, frozen_to_lsn) = *layer_flush_start_rx.borrow();\n\n            // The highest LSN to which we flushed in the loop over frozen layers\n            let mut flushed_to_lsn = Lsn(0);\n\n            let result = loop {\n                if self.cancel.is_cancelled() {\n                    info!(\"dropping out of flush loop for timeline shutdown\");\n                    return;\n                }\n\n                // Break to notify potential waiters as soon as we've flushed the requested LSN. If\n                // more requests have arrived in the meanwhile, we'll resume flushing afterwards.\n                if flushed_to_lsn >= frozen_to_lsn {\n                    break Ok(());\n                }\n\n                // Fetch the next layer to flush, if any.\n                let (layer, l0_count, frozen_count, frozen_size, open_layer_size) = {\n                    let layers = self.layers.read(LayerManagerLockHolder::FlushLoop).await;\n                    let Ok(lm) = layers.layer_map() else {\n                        info!(\"dropping out of flush loop for timeline shutdown\");\n                        return;\n                    };\n                    let l0_count = lm.level0_deltas().len();\n                    let frozen_count = lm.frozen_layers.len();\n                    let frozen_size: u64 = lm\n                        .frozen_layers\n                        .iter()\n                        .map(|l| l.estimated_in_mem_size())\n                        .sum();\n                    let open_layer_size: u64 = lm\n                        .open_layer\n                        .as_ref()\n                        .map(|l| l.estimated_in_mem_size())\n                        .unwrap_or(0);\n                    let layer = lm.frozen_layers.front().cloned();\n                    (layer, l0_count, frozen_count, frozen_size, open_layer_size)\n                    // drop 'layers' lock\n                };\n                let Some(layer) = layer else {\n                    break Ok(());\n                };\n\n                // Stall flushes to backpressure if compaction can't keep up. This is propagated up\n                // to WAL ingestion by having ephemeral layer rolls wait for flushes.\n                if let Some(stall_threshold) = self.get_l0_flush_stall_threshold() {\n                    if l0_count >= stall_threshold {\n                        warn!(\n                            \"stalling layer flushes for compaction backpressure at {l0_count} \\\n                            L0 layers ({frozen_count} frozen layers with {frozen_size} bytes, {open_layer_size} bytes in open layer)\"\n                        );\n                        let stall_timer = self\n                            .metrics\n                            .flush_delay_histo\n                            .start_timer()\n                            .record_on_drop();\n                        tokio::select! {\n                            result = watch_l0.wait_for(|l0| *l0 < stall_threshold) => {\n                                if let Ok(l0) = result.as_deref() {\n                                    let delay = stall_timer.elapsed().as_secs_f64();\n                                    info!(\"resuming layer flushes at {l0} L0 layers after {delay:.3}s\");\n                                }\n                            },\n                            _ = self.cancel.cancelled() => {},\n                        }\n                        continue; // check again\n                    }\n                }\n\n                // Flush the layer.\n                let flush_timer = self.metrics.flush_time_histo.start_timer();\n                match self.flush_frozen_layer(layer, ctx).await {\n                    Ok(layer_lsn) => flushed_to_lsn = max(flushed_to_lsn, layer_lsn),\n                    Err(FlushLayerError::Cancelled) => {\n                        info!(\"dropping out of flush loop for timeline shutdown\");\n                        return;\n                    }\n                    err @ Err(\n                        FlushLayerError::NotRunning(_)\n                        | FlushLayerError::Other(_)\n                        | FlushLayerError::CreateImageLayersError(_),\n                    ) => {\n                        error!(\"could not flush frozen layer: {err:?}\");\n                        break err.map(|_| ());\n                    }\n                }\n                let flush_duration = flush_timer.stop_and_record();\n\n                // Notify the tenant compaction loop if L0 compaction is needed.\n                let l0_count = *watch_l0.borrow();\n                if l0_count >= self.get_compaction_threshold() {\n                    self.l0_compaction_trigger.notify_one();\n                }\n\n                // Delay the next flush to backpressure if compaction can't keep up. We delay by the\n                // flush duration such that the flush takes 2x as long. This is propagated up to WAL\n                // ingestion by having ephemeral layer rolls wait for flushes.\n                if let Some(delay_threshold) = self.get_l0_flush_delay_threshold() {\n                    if l0_count >= delay_threshold {\n                        let delay = flush_duration.as_secs_f64();\n                        info!(\n                            \"delaying layer flush by {delay:.3}s for compaction backpressure at \\\n                            {l0_count} L0 layers ({frozen_count} frozen layers with {frozen_size} bytes, {open_layer_size} bytes in open layer)\"\n                        );\n                        let _delay_timer = self\n                            .metrics\n                            .flush_delay_histo\n                            .start_timer()\n                            .record_on_drop();\n                        tokio::select! {\n                            _ = tokio::time::sleep(flush_duration) => {},\n                            _ = watch_l0.wait_for(|l0| *l0 < delay_threshold) => {},\n                            _ = self.cancel.cancelled() => {},\n                        }\n                    }\n                }\n            };\n\n            // Unsharded tenants should never advance their LSN beyond the end of the\n            // highest layer they write: such gaps between layer data and the frozen LSN\n            // are only legal on sharded tenants.\n            debug_assert!(\n                self.shard_identity.count.count() > 1\n                    || flushed_to_lsn >= frozen_to_lsn\n                    || !flushed_to_lsn.is_valid()\n            );\n\n            if flushed_to_lsn < frozen_to_lsn\n                && self.shard_identity.count.count() > 1\n                && result.is_ok()\n            {\n                // If our layer flushes didn't carry disk_consistent_lsn up to the `to_lsn` advertised\n                // to us via layer_flush_start_rx, then advance it here.\n                //\n                // This path is only taken for tenants with multiple shards: single sharded tenants should\n                // never encounter a gap in the wal.\n                let old_disk_consistent_lsn = self.disk_consistent_lsn.load();\n                tracing::debug!(\n                    \"Advancing disk_consistent_lsn across layer gap {old_disk_consistent_lsn}->{frozen_to_lsn}\"\n                );\n                if self.set_disk_consistent_lsn(frozen_to_lsn) {\n                    if let Err(e) = self.schedule_uploads(frozen_to_lsn, vec![]) {\n                        tracing::warn!(\n                            \"Failed to schedule metadata upload after updating disk_consistent_lsn: {e}\"\n                        );\n                    }\n                }\n            }\n\n            // Notify any listeners that we're done\n            let _ = self\n                .layer_flush_done_tx\n                .send_replace((flush_counter, result));\n        }\n    }\n\n    /// Waits any flush request created by [`Self::freeze_inmem_layer_at`] to complete.\n    async fn wait_flush_completion(&self, request: u64) -> Result<(), FlushLayerError> {\n        let mut rx = self.layer_flush_done_tx.subscribe();\n        loop {\n            {\n                let (last_result_counter, last_result) = &*rx.borrow();\n                if *last_result_counter >= request {\n                    if let Err(err) = last_result {\n                        // We already logged the original error in\n                        // flush_loop. We cannot propagate it to the caller\n                        // here, because it might not be Cloneable\n                        return Err(err.clone());\n                    } else {\n                        return Ok(());\n                    }\n                }\n            }\n            trace!(\"waiting for flush to complete\");\n            tokio::select! {\n                rx_e = rx.changed() => {\n                    rx_e.map_err(|_| FlushLayerError::NotRunning(*self.flush_loop_state.lock().unwrap()))?;\n                },\n                // Cancellation safety: we are not leaving an I/O in-flight for the flush, we're just ignoring\n                // the notification from [`flush_loop`] that it completed.\n                _ = self.cancel.cancelled() => {\n                    tracing::info!(\"Cancelled layer flush due on timeline shutdown\");\n                    return Ok(())\n                }\n            };\n            trace!(\"done\")\n        }\n    }\n\n    /// Flush one frozen in-memory layer to disk, as a new delta layer.\n    ///\n    /// Return value is the last lsn (inclusive) of the layer that was frozen.\n    #[instrument(skip_all, fields(layer=%frozen_layer))]\n    async fn flush_frozen_layer(\n        self: &Arc<Self>,\n        frozen_layer: Arc<InMemoryLayer>,\n        ctx: &RequestContext,\n    ) -> Result<Lsn, FlushLayerError> {\n        debug_assert_current_span_has_tenant_and_timeline_id();\n\n        // As a special case, when we have just imported an image into the repository,\n        // instead of writing out a L0 delta layer, we directly write out image layer\n        // files instead. This is possible as long as *all* the data imported into the\n        // repository have the same LSN.\n        let lsn_range = frozen_layer.get_lsn_range();\n\n        // Whether to directly create image layers for this flush, or flush them as delta layers\n        let create_image_layer =\n            lsn_range.start == self.initdb_lsn && lsn_range.end == Lsn(self.initdb_lsn.0 + 1);\n\n        #[cfg(test)]\n        {\n            match &mut *self.flush_loop_state.lock().unwrap() {\n                FlushLoopState::NotStarted | FlushLoopState::Exited => {\n                    panic!(\"flush loop not running\")\n                }\n                FlushLoopState::Running {\n                    expect_initdb_optimization,\n                    initdb_optimization_count,\n                    ..\n                } => {\n                    if create_image_layer {\n                        *initdb_optimization_count += 1;\n                    } else {\n                        assert!(!*expect_initdb_optimization, \"expected initdb optimization\");\n                    }\n                }\n            }\n        }\n\n        let (layers_to_upload, delta_layer_to_add) = if create_image_layer {\n            // Note: The 'ctx' in use here has DownloadBehavior::Error. We should not\n            // require downloading anything during initial import.\n            let ((rel_partition, metadata_partition), _lsn) = self\n                .repartition(\n                    self.initdb_lsn,\n                    self.get_compaction_target_size(),\n                    EnumSet::empty(),\n                    ctx,\n                )\n                .await\n                .map_err(|e| FlushLayerError::from_anyhow(self, e.into_anyhow()))?;\n\n            if self.cancel.is_cancelled() {\n                return Err(FlushLayerError::Cancelled);\n            }\n\n            // Ensure that we have a single call to `create_image_layers` with a combined dense keyspace.\n            // So that the key ranges don't overlap.\n            let mut partitions = KeyPartitioning::default();\n            partitions.parts.extend(rel_partition.parts);\n            if !metadata_partition.parts.is_empty() {\n                assert_eq!(\n                    metadata_partition.parts.len(),\n                    1,\n                    \"currently sparse keyspace should only contain a single metadata keyspace\"\n                );\n                // Safety: create_image_layers treat sparse keyspaces differently that it does not scan\n                // every single key within the keyspace, and therefore, it's safe to force converting it\n                // into a dense keyspace before calling this function.\n                partitions\n                    .parts\n                    .extend(metadata_partition.into_dense().parts);\n            }\n\n            let mut layers_to_upload = Vec::new();\n            let (generated_image_layers, is_complete) = self\n                .create_image_layers(\n                    &partitions,\n                    self.initdb_lsn,\n                    None,\n                    ImageLayerCreationMode::Initial,\n                    ctx,\n                    LastImageLayerCreationStatus::Initial,\n                    false, // don't yield for L0, we're flushing L0\n                )\n                .instrument(info_span!(\"create_image_layers\", mode = %ImageLayerCreationMode::Initial, partition_mode = \"initial\", lsn = %self.initdb_lsn))\n                .await?;\n            debug_assert!(\n                matches!(is_complete, LastImageLayerCreationStatus::Complete),\n                \"init image generation mode must fully cover the keyspace\"\n            );\n            layers_to_upload.extend(generated_image_layers);\n\n            (layers_to_upload, None)\n        } else {\n            // Normal case, write out a L0 delta layer file.\n            // `create_delta_layer` will not modify the layer map.\n            // We will remove frozen layer and add delta layer in one atomic operation later.\n            let Some(layer) = self\n                .create_delta_layer(&frozen_layer, None, ctx)\n                .await\n                .map_err(|e| FlushLayerError::from_anyhow(self, e))?\n            else {\n                panic!(\"delta layer cannot be empty if no filter is applied\");\n            };\n            (\n                // FIXME: even though we have a single image and single delta layer assumption\n                // we push them to vec\n                vec![layer.clone()],\n                Some(layer),\n            )\n        };\n\n        pausable_failpoint!(\"flush-layer-cancel-after-writing-layer-out-pausable\");\n\n        if self.cancel.is_cancelled() {\n            return Err(FlushLayerError::Cancelled);\n        }\n\n        fail_point!(\"flush-layer-before-update-remote-consistent-lsn\", |_| {\n            Err(FlushLayerError::Other(anyhow!(\"failpoint\").into()))\n        });\n\n        let disk_consistent_lsn = Lsn(lsn_range.end.0 - 1);\n\n        // The new on-disk layers are now in the layer map. We can remove the\n        // in-memory layer from the map now. The flushed layer is stored in\n        // the mapping in `create_delta_layer`.\n        {\n            let mut guard = self\n                .layers\n                .write(LayerManagerLockHolder::FlushFrozenLayer)\n                .await;\n\n            guard.open_mut()?.finish_flush_l0_layer(\n                delta_layer_to_add.as_ref(),\n                &frozen_layer,\n                &self.metrics,\n            );\n\n            if self.set_disk_consistent_lsn(disk_consistent_lsn) {\n                // Schedule remote uploads that will reflect our new disk_consistent_lsn\n                self.schedule_uploads(disk_consistent_lsn, layers_to_upload)\n                    .map_err(|e| FlushLayerError::from_anyhow(self, e))?;\n            }\n            // release lock on 'layers'\n        };\n\n        // FIXME: between create_delta_layer and the scheduling of the upload in `update_metadata_file`,\n        // a compaction can delete the file and then it won't be available for uploads any more.\n        // We still schedule the upload, resulting in an error, but ideally we'd somehow avoid this\n        // race situation.\n        // See https://github.com/neondatabase/neon/issues/4526\n        pausable_failpoint!(\"flush-frozen-pausable\");\n\n        // This failpoint is used by another test case `test_pageserver_recovery`.\n        fail_point!(\"flush-frozen-exit\");\n\n        Ok(Lsn(lsn_range.end.0 - 1))\n    }\n\n    /// Return true if the value changed\n    ///\n    /// This function must only be used from the layer flush task.\n    fn set_disk_consistent_lsn(&self, new_value: Lsn) -> bool {\n        let old_value = self.disk_consistent_lsn.fetch_max(new_value);\n        assert!(\n            new_value >= old_value,\n            \"disk_consistent_lsn must be growing monotonously at runtime; current {old_value}, offered {new_value}\"\n        );\n\n        self.metrics\n            .disk_consistent_lsn_gauge\n            .set(new_value.0 as i64);\n        new_value != old_value\n    }\n\n    /// Update metadata file\n    fn schedule_uploads(\n        &self,\n        disk_consistent_lsn: Lsn,\n        layers_to_upload: impl IntoIterator<Item = ResidentLayer>,\n    ) -> anyhow::Result<()> {\n        // We can only save a valid 'prev_record_lsn' value on disk if we\n        // flushed *all* in-memory changes to disk. We only track\n        // 'prev_record_lsn' in memory for the latest processed record, so we\n        // don't remember what the correct value that corresponds to some old\n        // LSN is. But if we flush everything, then the value corresponding\n        // current 'last_record_lsn' is correct and we can store it on disk.\n        let RecordLsn {\n            last: last_record_lsn,\n            prev: prev_record_lsn,\n        } = self.last_record_lsn.load();\n        let ondisk_prev_record_lsn = if disk_consistent_lsn == last_record_lsn {\n            Some(prev_record_lsn)\n        } else {\n            None\n        };\n\n        let update = crate::tenant::metadata::MetadataUpdate::new(\n            disk_consistent_lsn,\n            ondisk_prev_record_lsn,\n            *self.applied_gc_cutoff_lsn.read(),\n        );\n\n        fail_point!(\"checkpoint-before-saving-metadata\", |x| bail!(\n            \"{}\",\n            x.unwrap()\n        ));\n\n        for layer in layers_to_upload {\n            self.remote_client.schedule_layer_file_upload(layer)?;\n        }\n        self.remote_client\n            .schedule_index_upload_for_metadata_update(&update)?;\n\n        Ok(())\n    }\n\n    pub(crate) async fn preserve_initdb_archive(&self) -> anyhow::Result<()> {\n        self.remote_client\n            .preserve_initdb_archive(\n                &self.tenant_shard_id.tenant_id,\n                &self.timeline_id,\n                &self.cancel,\n            )\n            .await\n    }\n\n    // Write out the given frozen in-memory layer as a new L0 delta file. This L0 file will not be tracked\n    // in layer map immediately. The caller is responsible to put it into the layer map.\n    async fn create_delta_layer(\n        self: &Arc<Self>,\n        frozen_layer: &Arc<InMemoryLayer>,\n        key_range: Option<Range<Key>>,\n        ctx: &RequestContext,\n    ) -> anyhow::Result<Option<ResidentLayer>> {\n        let self_clone = Arc::clone(self);\n        let frozen_layer = Arc::clone(frozen_layer);\n        let ctx = ctx.attached_child();\n        let work = async move {\n            let Some((desc, path)) = frozen_layer\n                .write_to_disk(\n                    &ctx,\n                    key_range,\n                    self_clone.l0_flush_global_state.inner(),\n                    &self_clone.gate,\n                    self_clone.cancel.clone(),\n                )\n                .await?\n            else {\n                return Ok(None);\n            };\n            let new_delta = Layer::finish_creating(self_clone.conf, &self_clone, desc, &path)?;\n\n            // The write_to_disk() above calls writer.finish() which already did the fsync of the inodes.\n            // We just need to fsync the directory in which these inodes are linked,\n            // which we know to be the timeline directory.\n            //\n            // We use fatal_err() below because the after write_to_disk returns with success,\n            // the in-memory state of the filesystem already has the layer file in its final place,\n            // and subsequent pageserver code could think it's durable while it really isn't.\n            let timeline_dir = VirtualFile::open(\n                &self_clone\n                    .conf\n                    .timeline_path(&self_clone.tenant_shard_id, &self_clone.timeline_id),\n                &ctx,\n            )\n            .await\n            .fatal_err(\"VirtualFile::open for timeline dir fsync\");\n            timeline_dir\n                .sync_all()\n                .await\n                .fatal_err(\"VirtualFile::sync_all timeline dir\");\n            anyhow::Ok(Some(new_delta))\n        };\n        // Before tokio-epoll-uring, we ran write_to_disk & the sync_all inside spawn_blocking.\n        // Preserve that behavior to maintain the same behavior for `virtual_file_io_engine=std-fs`.\n        use crate::virtual_file::io_engine::IoEngine;\n        match crate::virtual_file::io_engine::get() {\n            IoEngine::NotSet => panic!(\"io engine not set\"),\n            IoEngine::StdFs => {\n                let span = tracing::info_span!(\"blocking\");\n                tokio::task::spawn_blocking({\n                    move || Handle::current().block_on(work.instrument(span))\n                })\n                .await\n                .context(\"spawn_blocking\")\n                .and_then(|x| x)\n            }\n            #[cfg(target_os = \"linux\")]\n            IoEngine::TokioEpollUring => work.await,\n        }\n    }\n\n    async fn repartition(\n        &self,\n        lsn: Lsn,\n        partition_size: u64,\n        flags: EnumSet<CompactFlags>,\n        ctx: &RequestContext,\n    ) -> Result<((KeyPartitioning, SparseKeyPartitioning), Lsn), RepartitionError> {\n        let Ok(mut guard) = self.partitioning.try_write_guard() else {\n            // NB: there are two callers, one is the compaction task, of which there is only one per struct Tenant and hence Timeline.\n            // The other is the initdb optimization in flush_frozen_layer, used by `boostrap_timeline`, which runs before `.activate()`\n            // and hence before the compaction task starts.\n            return Err(RepartitionError::Other(anyhow!(\n                \"repartition() called concurrently\"\n            )));\n        };\n        let ((dense_partition, sparse_partition), partition_lsn) = &*guard.read();\n        if lsn < *partition_lsn {\n            return Err(RepartitionError::Other(anyhow!(\n                \"repartition() called with LSN going backwards, this should not happen\"\n            )));\n        }\n\n        let distance = lsn.0 - partition_lsn.0;\n        if *partition_lsn != Lsn(0)\n            && distance <= self.repartition_threshold\n            && !flags.contains(CompactFlags::ForceRepartition)\n        {\n            debug!(\n                distance,\n                threshold = self.repartition_threshold,\n                \"no repartitioning needed\"\n            );\n            return Ok((\n                (dense_partition.clone(), sparse_partition.clone()),\n                *partition_lsn,\n            ));\n        }\n\n        let (dense_ks, sparse_ks) = self\n            .collect_keyspace(lsn, ctx)\n            .await\n            .map_err(RepartitionError::CollectKeyspace)?;\n        let dense_partitioning = dense_ks.partition(\n            &self.shard_identity,\n            partition_size,\n            postgres_ffi::BLCKSZ as u64,\n        );\n        let sparse_partitioning = SparseKeyPartitioning {\n            parts: vec![sparse_ks],\n        }; // no partitioning for metadata keys for now\n        let result = ((dense_partitioning, sparse_partitioning), lsn);\n        guard.write(result.clone());\n        Ok(result)\n    }\n\n    // Is it time to create a new image layer for the given partition? True if we want to generate.\n    async fn time_for_new_image_layer(\n        &self,\n        partition: &KeySpace,\n        lsn: Lsn,\n        force_image_creation_lsn: Option<Lsn>,\n    ) -> bool {\n        let threshold = self.get_image_creation_threshold();\n\n        let guard = self.layers.read(LayerManagerLockHolder::Compaction).await;\n        let Ok(layers) = guard.layer_map() else {\n            return false;\n        };\n        let mut min_image_lsn: Lsn = Lsn::MAX;\n        let mut max_deltas = 0;\n        for part_range in &partition.ranges {\n            let image_coverage = layers.image_coverage(part_range, lsn);\n            for (img_range, last_img) in image_coverage {\n                let img_lsn = if let Some(last_img) = last_img {\n                    last_img.get_lsn_range().end\n                } else {\n                    Lsn(0)\n                };\n                // Let's consider an example:\n                //\n                // delta layer with LSN range 71-81\n                // delta layer with LSN range 81-91\n                // delta layer with LSN range 91-101\n                // image layer at LSN 100\n                //\n                // If 'lsn' is still 100, i.e. no new WAL has been processed since the last image layer,\n                // there's no need to create a new one. We check this case explicitly, to avoid passing\n                // a bogus range to count_deltas below, with start > end. It's even possible that there\n                // are some delta layers *later* than current 'lsn', if more WAL was processed and flushed\n                // after we read last_record_lsn, which is passed here in the 'lsn' argument.\n                if img_lsn < lsn {\n                    let num_deltas =\n                        layers.count_deltas(&img_range, &(img_lsn..lsn), Some(threshold));\n\n                    max_deltas = max_deltas.max(num_deltas);\n                    if num_deltas >= threshold {\n                        debug!(\n                            \"key range {}-{}, has {} deltas on this timeline in LSN range {}..{}\",\n                            img_range.start, img_range.end, num_deltas, img_lsn, lsn\n                        );\n                        return true;\n                    }\n                }\n                min_image_lsn = min(min_image_lsn, img_lsn);\n            }\n        }\n\n        // HADRON\n        // for child timelines, we consider all pages up to ancestor_LSN are redone successfully by the parent timeline\n        min_image_lsn = min_image_lsn.max(self.get_ancestor_lsn());\n        if min_image_lsn < force_image_creation_lsn.unwrap_or(Lsn(0)) && max_deltas > 0 {\n            info!(\n                \"forcing image creation for partitioned range {}-{}. Min image LSN: {}, force image creation LSN: {}, num deltas: {}\",\n                partition.ranges[0].start,\n                partition.ranges[0].end,\n                min_image_lsn,\n                force_image_creation_lsn.unwrap(),\n                max_deltas\n            );\n            return true;\n        }\n\n        debug!(\n            max_deltas,\n            \"none of the partitioned ranges had >= {threshold} deltas\"\n        );\n        false\n    }\n\n    /// Create image layers for Postgres data. Assumes the caller passes a partition that is not too large,\n    /// so that at most one image layer will be produced from this function.\n    #[allow(clippy::too_many_arguments)]\n    async fn create_image_layer_for_rel_blocks(\n        self: &Arc<Self>,\n        partition: &KeySpace,\n        mut image_layer_writer: ImageLayerWriter,\n        lsn: Lsn,\n        ctx: &RequestContext,\n        img_range: Range<Key>,\n        io_concurrency: IoConcurrency,\n        progress: Option<(usize, usize)>,\n    ) -> Result<ImageLayerCreationOutcome, CreateImageLayersError> {\n        let mut wrote_keys = false;\n\n        let mut key_request_accum = KeySpaceAccum::new();\n        for range in &partition.ranges {\n            let mut key = range.start;\n            while key < range.end {\n                // Decide whether to retain this key: usually we do, but sharded tenants may\n                // need to drop keys that don't belong to them.  If we retain the key, add it\n                // to `key_request_accum` for later issuing a vectored get\n                if self.shard_identity.is_key_disposable(&key) {\n                    debug!(\n                        \"Dropping key {} during compaction (it belongs on shard {:?})\",\n                        key,\n                        self.shard_identity.get_shard_number(&key)\n                    );\n                } else {\n                    key_request_accum.add_key(key);\n                }\n\n                let last_key_in_range = key.next() == range.end;\n                key = key.next();\n\n                // Maybe flush `key_rest_accum`\n                if key_request_accum.raw_size() >= self.conf.max_get_vectored_keys.get() as u64\n                    || (last_key_in_range && key_request_accum.raw_size() > 0)\n                {\n                    let query =\n                        VersionedKeySpaceQuery::uniform(key_request_accum.consume_keyspace(), lsn);\n\n                    let results = self\n                        .get_vectored(query, io_concurrency.clone(), ctx)\n                        .await?;\n\n                    if self.cancel.is_cancelled() {\n                        return Err(CreateImageLayersError::Cancelled);\n                    }\n\n                    for (img_key, img) in results {\n                        let img = match img {\n                            Ok(img) => img,\n                            Err(err) => {\n                                // If we fail to reconstruct a VM or FSM page, we can zero the\n                                // page without losing any actual user data. That seems better\n                                // than failing repeatedly and getting stuck.\n                                //\n                                // We had a bug at one point, where we truncated the FSM and VM\n                                // in the pageserver, but the Postgres didn't know about that\n                                // and continued to generate incremental WAL records for pages\n                                // that didn't exist in the pageserver. Trying to replay those\n                                // WAL records failed to find the previous image of the page.\n                                // This special case allows us to recover from that situation.\n                                // See https://github.com/neondatabase/neon/issues/2601.\n                                //\n                                // Unfortunately we cannot do this for the main fork, or for\n                                // any metadata keys, keys, as that would lead to actual data\n                                // loss.\n                                if img_key.is_rel_fsm_block_key() || img_key.is_rel_vm_block_key() {\n                                    warn!(\n                                        \"could not reconstruct FSM or VM key {img_key}, filling with zeros: {err:?}\"\n                                    );\n                                    ZERO_PAGE.clone()\n                                } else {\n                                    return Err(CreateImageLayersError::from(err));\n                                }\n                            }\n                        };\n\n                        // Write all the keys we just read into our new image layer.\n                        image_layer_writer.put_image(img_key, img, ctx).await?;\n                        wrote_keys = true;\n                    }\n                }\n            }\n        }\n\n        let progress_report = progress\n            .map(|(idx, total)| format!(\"({idx}/{total}) \"))\n            .unwrap_or_default();\n        if wrote_keys {\n            // Normal path: we have written some data into the new image layer for this\n            // partition, so flush it to disk.\n            info!(\n                \"{} produced image layer for rel {}\",\n                progress_report,\n                ImageLayerName {\n                    key_range: img_range.clone(),\n                    lsn\n                },\n            );\n            Ok(ImageLayerCreationOutcome::Generated {\n                unfinished_image_layer: image_layer_writer,\n            })\n        } else {\n            tracing::debug!(\n                \"{} no data in range {}-{}\",\n                progress_report,\n                img_range.start,\n                img_range.end\n            );\n            Ok(ImageLayerCreationOutcome::Empty)\n        }\n    }\n\n    /// Create an image layer for metadata keys. This function produces one image layer for all metadata\n    /// keys for now. Because metadata keys cannot exceed basebackup size limit, the image layer for it\n    /// would not be too large to fit in a single image layer.\n    ///\n    /// Creating image layers for metadata keys are different from relational keys. Firstly, instead of\n    /// iterating each key and get an image for each of them, we do a `vectored_get` scan over the sparse\n    /// keyspace to get all images in one run. Secondly, we use a different image layer generation metrics\n    /// for metadata keys than relational keys, which is the number of delta files visited during the scan.\n    #[allow(clippy::too_many_arguments)]\n    async fn create_image_layer_for_metadata_keys(\n        self: &Arc<Self>,\n        partition: &KeySpace,\n        mut image_layer_writer: ImageLayerWriter,\n        lsn: Lsn,\n        ctx: &RequestContext,\n        img_range: Range<Key>,\n        mode: ImageLayerCreationMode,\n        io_concurrency: IoConcurrency,\n    ) -> Result<ImageLayerCreationOutcome, CreateImageLayersError> {\n        // Metadata keys image layer creation.\n        let mut reconstruct_state = ValuesReconstructState::new(io_concurrency);\n        let begin = Instant::now();\n        // Directly use `get_vectored_impl` to skip the max_vectored_read_key limit check. Note that the keyspace should\n        // not contain too many keys, otherwise this takes a lot of memory.\n        let data = self\n            .get_vectored_impl(\n                VersionedKeySpaceQuery::uniform(partition.clone(), lsn),\n                &mut reconstruct_state,\n                ctx,\n            )\n            .await?;\n        let (data, total_kb_retrieved, total_keys_retrieved) = {\n            let mut new_data = BTreeMap::new();\n            let mut total_kb_retrieved = 0;\n            let mut total_keys_retrieved = 0;\n            for (k, v) in data {\n                let v = v?;\n                total_kb_retrieved += KEY_SIZE + v.len();\n                total_keys_retrieved += 1;\n                new_data.insert(k, v);\n            }\n            (new_data, total_kb_retrieved / 1024, total_keys_retrieved)\n        };\n        let delta_files_accessed = reconstruct_state.get_delta_layers_visited();\n        let elapsed = begin.elapsed();\n\n        let trigger_generation = delta_files_accessed as usize >= MAX_AUX_FILE_V2_DELTAS;\n        info!(\n            \"metadata key compaction: trigger_generation={trigger_generation}, delta_files_accessed={delta_files_accessed}, total_kb_retrieved={total_kb_retrieved}, total_keys_retrieved={total_keys_retrieved}, read_time={}s\",\n            elapsed.as_secs_f64()\n        );\n\n        if !trigger_generation && mode == ImageLayerCreationMode::Try {\n            return Ok(ImageLayerCreationOutcome::Skip);\n        }\n        if self.cancel.is_cancelled() {\n            return Err(CreateImageLayersError::Cancelled);\n        }\n        let mut wrote_any_image = false;\n        for (k, v) in data {\n            if v.is_empty() {\n                // the key has been deleted, it does not need an image\n                // in metadata keyspace, an empty image == tombstone\n                continue;\n            }\n            wrote_any_image = true;\n\n            // No need to handle sharding b/c metadata keys are always on the 0-th shard.\n\n            // TODO: split image layers to avoid too large layer files. Too large image files are not handled\n            // on the normal data path either.\n            image_layer_writer.put_image(k, v, ctx).await?;\n        }\n\n        if wrote_any_image {\n            // Normal path: we have written some data into the new image layer for this\n            // partition, so flush it to disk.\n            info!(\n                \"created image layer for metadata {}\",\n                ImageLayerName {\n                    key_range: img_range.clone(),\n                    lsn\n                }\n            );\n            Ok(ImageLayerCreationOutcome::Generated {\n                unfinished_image_layer: image_layer_writer,\n            })\n        } else {\n            tracing::debug!(\"no data in range {}-{}\", img_range.start, img_range.end);\n            Ok(ImageLayerCreationOutcome::Empty)\n        }\n    }\n\n    /// Predicate function which indicates whether we should check if new image layers\n    /// are required. Since checking if new image layers are required is expensive in\n    /// terms of CPU, we only do it in the following cases:\n    /// 1. If the timeline has ingested sufficient WAL to justify the cost or ...\n    /// 2. If enough time has passed since the last check:\n    ///     1. For large tenants, we wish to perform the check more often since they\n    ///        suffer from the lack of image layers. Note that we assume sharded tenants\n    ///        to be large since non-zero shards do not track the logical size.\n    ///     2. For small tenants (that can mostly fit in RAM), we use a much longer interval\n    fn should_check_if_image_layers_required(self: &Arc<Timeline>, lsn: Lsn) -> bool {\n        let large_timeline_threshold = self.conf.image_layer_generation_large_timeline_threshold;\n\n        let last_checks_at = self.last_image_layer_creation_check_at.load();\n        let distance = lsn\n            .checked_sub(last_checks_at)\n            .expect(\"Attempt to compact with LSN going backwards\");\n        let min_distance =\n            self.get_image_layer_creation_check_threshold() as u64 * self.get_checkpoint_distance();\n\n        let distance_based_decision = distance.0 >= min_distance;\n\n        let mut last_check_instant = self.last_image_layer_creation_check_instant.lock().unwrap();\n        let check_required_after = (|| {\n            if self.shard_identity.is_unsharded() {\n                if let CurrentLogicalSize::Exact(logical_size) =\n                    self.current_logical_size.current_size()\n                {\n                    if Some(Into::<u64>::into(&logical_size)) < large_timeline_threshold {\n                        return Duration::from_secs(3600 * 48);\n                    }\n                }\n            }\n\n            self.get_checkpoint_timeout()\n        })();\n\n        let time_based_decision = match *last_check_instant {\n            Some(last_check) => {\n                let elapsed = last_check.elapsed();\n                elapsed >= check_required_after\n            }\n            None => true,\n        };\n\n        // Do the expensive delta layer counting only if this timeline has ingested sufficient\n        // WAL since the last check or a checkpoint timeout interval has elapsed since the last\n        // check.\n        let decision = distance_based_decision || time_based_decision;\n        tracing::info!(\n            \"Decided to check image layers: {}. Distance-based decision: {}, time-based decision: {}\",\n            decision,\n            distance_based_decision,\n            time_based_decision\n        );\n        if decision {\n            self.last_image_layer_creation_check_at.store(lsn);\n            *last_check_instant = Some(Instant::now());\n        }\n\n        decision\n    }\n\n    /// Returns the image layers generated and an enum indicating whether the process is fully completed.\n    /// true = we have generate all image layers, false = we preempt the process for L0 compaction.\n    ///\n    /// `partition_mode` is only for logging purpose and is not used anywhere in this function.\n    #[allow(clippy::too_many_arguments)]\n    async fn create_image_layers(\n        self: &Arc<Timeline>,\n        partitioning: &KeyPartitioning,\n        lsn: Lsn,\n        force_image_creation_lsn: Option<Lsn>,\n        mode: ImageLayerCreationMode,\n        ctx: &RequestContext,\n        last_status: LastImageLayerCreationStatus,\n        yield_for_l0: bool,\n    ) -> Result<(Vec<ResidentLayer>, LastImageLayerCreationStatus), CreateImageLayersError> {\n        let timer = self.metrics.create_images_time_histo.start_timer();\n\n        if partitioning.parts.is_empty() {\n            warn!(\"no partitions to create image layers for\");\n            return Ok((vec![], LastImageLayerCreationStatus::Complete));\n        }\n\n        // We need to avoid holes between generated image layers.\n        // Otherwise LayerMap::image_layer_exists will return false if key range of some layer is covered by more than one\n        // image layer with hole between them. In this case such layer can not be utilized by GC.\n        //\n        // How such hole between partitions can appear?\n        // if we have relation with relid=1 and size 100 and relation with relid=2 with size 200 then result of\n        // KeySpace::partition may contain partitions <100000000..100000099> and <200000000..200000199>.\n        // If there is delta layer <100000000..300000000> then it never be garbage collected because\n        // image layers  <100000000..100000099> and <200000000..200000199> are not completely covering it.\n        let mut start = Key::MIN;\n\n        let check_for_image_layers =\n            if let LastImageLayerCreationStatus::Incomplete { last_key } = last_status {\n                info!(\n                    \"resuming image layer creation: last_status=incomplete, continue from {}\",\n                    last_key\n                );\n                true\n            } else {\n                self.should_check_if_image_layers_required(lsn)\n            };\n\n        let mut batch_image_writer = BatchLayerWriter::new(self.conf);\n\n        let mut all_generated = true;\n\n        let mut partition_processed = 0;\n        let mut total_partitions = partitioning.parts.len();\n        let mut last_partition_processed = None;\n        let mut partition_parts = partitioning.parts.clone();\n\n        if let LastImageLayerCreationStatus::Incomplete { last_key } = last_status {\n            // We need to skip the partitions that have already been processed.\n            let mut found = false;\n            for (i, partition) in partition_parts.iter().enumerate() {\n                if last_key <= partition.end().unwrap() {\n                    // ```plain\n                    // |------|--------|----------|------|\n                    //              ^last_key\n                    //                    ^start from this partition\n                    // ```\n                    // Why `i+1` instead of `i`?\n                    // It is possible that the user did some writes after the previous image layer creation attempt so that\n                    // a relation grows in size, and the last_key is now in the middle of the partition. In this case, we\n                    // still want to skip this partition, so that we can make progress and avoid generating image layers over\n                    // the same partition. Doing a mod to ensure we don't end up with an empty vec.\n                    if i + 1 >= total_partitions {\n                        // In general, this case should not happen -- if last_key is on the last partition, the previous\n                        // iteration of image layer creation should return a complete status.\n                        break; // with found=false\n                    }\n                    partition_parts = partition_parts.split_off(i + 1); // Remove the first i + 1 elements\n                    total_partitions = partition_parts.len();\n                    // Update the start key to the partition start.\n                    start = partition_parts[0].start().unwrap();\n                    found = true;\n                    break;\n                }\n            }\n            if !found {\n                // Last key is within the last partition, or larger than all partitions.\n                return Ok((vec![], LastImageLayerCreationStatus::Complete));\n            }\n        }\n\n        let total = partition_parts.len();\n        for (idx, partition) in partition_parts.iter().enumerate() {\n            if self.cancel.is_cancelled() {\n                return Err(CreateImageLayersError::Cancelled);\n            }\n            partition_processed += 1;\n            let img_range = start..partition.ranges.last().unwrap().end;\n            let compact_metadata = partition.overlaps(&Key::metadata_key_range());\n            if compact_metadata {\n                for range in &partition.ranges {\n                    assert!(\n                        range.start.field1 >= METADATA_KEY_BEGIN_PREFIX\n                            && range.end.field1 <= METADATA_KEY_END_PREFIX,\n                        \"metadata keys must be partitioned separately\"\n                    );\n                }\n                if mode == ImageLayerCreationMode::Try && !check_for_image_layers {\n                    // Skip compaction if there are not enough updates. Metadata compaction will do a scan and\n                    // might mess up with evictions.\n                    start = img_range.end;\n                    continue;\n                }\n                // For initial and force modes, we always generate image layers for metadata keys.\n            } else if let ImageLayerCreationMode::Try = mode {\n                // check_for_image_layers = false -> skip\n                // check_for_image_layers = true -> check time_for_new_image_layer -> skip/generate\n                if !check_for_image_layers\n                    || !self\n                        .time_for_new_image_layer(partition, lsn, force_image_creation_lsn)\n                        .await\n                {\n                    start = img_range.end;\n                    continue;\n                }\n            }\n            if let ImageLayerCreationMode::Force = mode {\n                // When forced to create image layers, we might try and create them where they already\n                // exist.  This mode is only used in tests/debug.\n                let layers = self.layers.read(LayerManagerLockHolder::Compaction).await;\n                if layers.contains_key(&PersistentLayerKey {\n                    key_range: img_range.clone(),\n                    lsn_range: PersistentLayerDesc::image_layer_lsn_range(lsn),\n                    is_delta: false,\n                }) {\n                    // TODO: this can be processed with the BatchLayerWriter::finish_with_discard\n                    // in the future.\n                    tracing::info!(\n                        \"Skipping image layer at {lsn} {}..{}, already exists\",\n                        img_range.start,\n                        img_range.end\n                    );\n                    start = img_range.end;\n                    continue;\n                }\n            }\n\n            let image_layer_writer = ImageLayerWriter::new(\n                self.conf,\n                self.timeline_id,\n                self.tenant_shard_id,\n                &img_range,\n                lsn,\n                &self.gate,\n                self.cancel.clone(),\n                ctx,\n            )\n            .await\n            .map_err(CreateImageLayersError::Other)?;\n\n            fail_point!(\"image-layer-writer-fail-before-finish\", |_| {\n                Err(CreateImageLayersError::Other(anyhow::anyhow!(\n                    \"failpoint image-layer-writer-fail-before-finish\"\n                )))\n            });\n\n            // Begin Hadron\n            //\n            fail_point!(\"create-image-layer-fail-simulated-corruption\", |_| {\n                self.corruption_detected\n                    .store(true, std::sync::atomic::Ordering::Relaxed);\n                Err(CreateImageLayersError::Other(anyhow::anyhow!(\n                    \"failpoint create-image-layer-fail-simulated-corruption\"\n                )))\n            });\n            // End Hadron\n\n            let io_concurrency = IoConcurrency::spawn_from_conf(\n                self.conf.get_vectored_concurrent_io,\n                self.gate\n                    .enter()\n                    .map_err(|_| CreateImageLayersError::Cancelled)?,\n            );\n\n            let outcome = if !compact_metadata {\n                self.create_image_layer_for_rel_blocks(\n                    partition,\n                    image_layer_writer,\n                    lsn,\n                    ctx,\n                    img_range.clone(),\n                    io_concurrency,\n                    Some((idx, total)),\n                )\n                .await?\n            } else {\n                self.create_image_layer_for_metadata_keys(\n                    partition,\n                    image_layer_writer,\n                    lsn,\n                    ctx,\n                    img_range.clone(),\n                    mode,\n                    io_concurrency,\n                )\n                .await?\n            };\n            match outcome {\n                ImageLayerCreationOutcome::Empty => {\n                    // No data in this partition, so we don't need to create an image layer (for now).\n                    // The next image layer should cover this key range, so we don't advance the `start`\n                    // key.\n                }\n                ImageLayerCreationOutcome::Generated {\n                    unfinished_image_layer,\n                } => {\n                    batch_image_writer.add_unfinished_image_writer(\n                        unfinished_image_layer,\n                        img_range.clone(),\n                        lsn,\n                    );\n                    // The next image layer should be generated right after this one.\n                    start = img_range.end;\n                }\n                ImageLayerCreationOutcome::Skip => {\n                    // We don't need to create an image layer for this partition.\n                    // The next image layer should NOT cover this range, otherwise\n                    // the keyspace becomes empty (reads don't go past image layers).\n                    start = img_range.end;\n                }\n            }\n\n            if let ImageLayerCreationMode::Try = mode {\n                // We have at least made some progress\n                if yield_for_l0 && batch_image_writer.pending_layer_num() >= 1 {\n                    // The `Try` mode is currently only used on the compaction path. We want to avoid\n                    // image layer generation taking too long time and blocking L0 compaction. So in this\n                    // mode, we also inspect the current number of L0 layers and skip image layer generation\n                    // if there are too many of them.\n                    let image_preempt_threshold = self.get_image_creation_preempt_threshold()\n                        * self.get_compaction_threshold();\n                    // TODO: currently we do not respect `get_image_creation_preempt_threshold` and always yield\n                    // when there is a single timeline with more than L0 threshold L0 layers. As long as the\n                    // `get_image_creation_preempt_threshold` is set to a value greater than 0, we will yield for L0 compaction.\n                    if image_preempt_threshold != 0 {\n                        let should_yield = self\n                            .l0_compaction_trigger\n                            .notified()\n                            .now_or_never()\n                            .is_some();\n                        if should_yield {\n                            tracing::info!(\n                                \"preempt image layer generation at {lsn} when processing partition {}..{}: too many L0 layers\",\n                                partition.start().unwrap(),\n                                partition.end().unwrap()\n                            );\n                            last_partition_processed = Some(partition.clone());\n                            all_generated = false;\n                            break;\n                        }\n                    }\n                }\n            }\n        }\n\n        let image_layers = batch_image_writer\n            .finish(self, ctx)\n            .await\n            .map_err(CreateImageLayersError::Other)?;\n\n        let mut guard = self.layers.write(LayerManagerLockHolder::Compaction).await;\n\n        // FIXME: we could add the images to be uploaded *before* returning from here, but right\n        // now they are being scheduled outside of write lock; current way is inconsistent with\n        // compaction lock order.\n        guard\n            .open_mut()?\n            .track_new_image_layers(&image_layers, &self.metrics);\n        drop_layer_manager_wlock(guard);\n        let duration = timer.stop_and_record();\n\n        // Creating image layers may have caused some previously visible layers to be covered\n        if !image_layers.is_empty() {\n            self.update_layer_visibility().await?;\n        }\n\n        let total_layer_size = image_layers\n            .iter()\n            .map(|l| l.metadata().file_size)\n            .sum::<u64>();\n\n        if !image_layers.is_empty() {\n            info!(\n                \"created {} image layers ({} bytes) in {}s, processed {} out of {} partitions\",\n                image_layers.len(),\n                total_layer_size,\n                duration.as_secs_f64(),\n                partition_processed,\n                total_partitions\n            );\n        }\n\n        Ok((\n            image_layers,\n            if all_generated {\n                LastImageLayerCreationStatus::Complete\n            } else {\n                LastImageLayerCreationStatus::Incomplete {\n                    last_key: if let Some(last_partition_processed) = last_partition_processed {\n                        last_partition_processed.end().unwrap_or(Key::MIN)\n                    } else {\n                        // This branch should be unreachable, but in case it happens, we can just return the start key.\n                        Key::MIN\n                    },\n                }\n            },\n        ))\n    }\n\n    /// Wait until the background initial logical size calculation is complete, or\n    /// this Timeline is shut down.  Calling this function will cause the initial\n    /// logical size calculation to skip waiting for the background jobs barrier.\n    pub(crate) async fn await_initial_logical_size(self: Arc<Self>) {\n        if !self.shard_identity.is_shard_zero() {\n            // We don't populate logical size on shard >0: skip waiting for it.\n            return;\n        }\n\n        if self.remote_client.is_deleting() {\n            // The timeline was created in a deletion-resume state, we don't expect logical size to be populated\n            return;\n        }\n\n        if self.current_logical_size.current_size().is_exact() {\n            // root timelines are initialized with exact count, but never start the background\n            // calculation\n            return;\n        }\n\n        if self.cancel.is_cancelled() {\n            // We already requested stopping the tenant, so we cannot wait for the logical size\n            // calculation to complete given the task might have been already cancelled.\n            return;\n        }\n\n        if let Some(await_bg_cancel) = self\n            .current_logical_size\n            .cancel_wait_for_background_loop_concurrency_limit_semaphore\n            .get()\n        {\n            await_bg_cancel.cancel();\n        } else {\n            // We should not wait if we were not able to explicitly instruct\n            // the logical size cancellation to skip the concurrency limit semaphore.\n            // TODO: this is an unexpected case.  We should restructure so that it\n            // can't happen.\n            tracing::warn!(\n                \"await_initial_logical_size: can't get semaphore cancel token, skipping\"\n            );\n            debug_assert!(false);\n        }\n\n        tokio::select!(\n            _ = self.current_logical_size.initialized.acquire() => {},\n            _ = self.cancel.cancelled() => {}\n        )\n    }\n\n    /// Detach this timeline from its ancestor by copying all of ancestors layers as this\n    /// Timelines layers up to the ancestor_lsn.\n    ///\n    /// Requires a timeline that:\n    /// - has an ancestor to detach from\n    /// - the ancestor does not have an ancestor -- follows from the original RFC limitations, not\n    ///   a technical requirement\n    ///\n    /// After the operation has been started, it cannot be canceled. Upon restart it needs to be\n    /// polled again until completion.\n    ///\n    /// During the operation all timelines sharing the data with this timeline will be reparented\n    /// from our ancestor to be branches of this timeline.\n    pub(crate) async fn prepare_to_detach_from_ancestor(\n        self: &Arc<Timeline>,\n        tenant: &crate::tenant::TenantShard,\n        options: detach_ancestor::Options,\n        behavior: DetachBehavior,\n        ctx: &RequestContext,\n    ) -> Result<detach_ancestor::Progress, detach_ancestor::Error> {\n        detach_ancestor::prepare(self, tenant, behavior, options, ctx).await\n    }\n\n    /// Second step of detach from ancestor; detaches the `self` from it's current ancestor and\n    /// reparents any reparentable children of previous ancestor.\n    ///\n    /// This method is to be called while holding the TenantManager's tenant slot, so during this\n    /// method we cannot be deleted nor can any timeline be deleted. After this method returns\n    /// successfully, tenant must be reloaded.\n    ///\n    /// Final step will be to [`Self::complete_detaching_timeline_ancestor`] after optionally\n    /// resetting the tenant.\n    pub(crate) async fn detach_from_ancestor_and_reparent(\n        self: &Arc<Timeline>,\n        tenant: &crate::tenant::TenantShard,\n        prepared: detach_ancestor::PreparedTimelineDetach,\n        ancestor_timeline_id: TimelineId,\n        ancestor_lsn: Lsn,\n        behavior: DetachBehavior,\n        ctx: &RequestContext,\n    ) -> Result<detach_ancestor::DetachingAndReparenting, detach_ancestor::Error> {\n        detach_ancestor::detach_and_reparent(\n            self,\n            tenant,\n            prepared,\n            ancestor_timeline_id,\n            ancestor_lsn,\n            behavior,\n            ctx,\n        )\n        .await\n    }\n\n    /// Final step which unblocks the GC.\n    ///\n    /// The tenant must've been reset if ancestry was modified previously (in tenant manager).\n    pub(crate) async fn complete_detaching_timeline_ancestor(\n        self: &Arc<Timeline>,\n        tenant: &crate::tenant::TenantShard,\n        attempt: detach_ancestor::Attempt,\n        ctx: &RequestContext,\n    ) -> Result<(), detach_ancestor::Error> {\n        detach_ancestor::complete(self, tenant, attempt, ctx).await\n    }\n}\n\nimpl Drop for Timeline {\n    fn drop(&mut self) {\n        if let Some(ancestor) = &self.ancestor_timeline {\n            // This lock should never be poisoned, but in case it is we do a .map() instead of\n            // an unwrap(), to avoid panicking in a destructor and thereby aborting the process.\n            if let Ok(mut gc_info) = ancestor.gc_info.write() {\n                if !gc_info.remove_child_not_offloaded(self.timeline_id) {\n                    tracing::error!(tenant_id = %self.tenant_shard_id.tenant_id, shard_id = %self.tenant_shard_id.shard_slug(), timeline_id = %self.timeline_id,\n                        \"Couldn't remove retain_lsn entry from timeline's parent on drop: already removed\");\n                }\n            }\n        }\n        info!(\n            \"Timeline {} for tenant {} is being dropped\",\n            self.timeline_id, self.tenant_shard_id.tenant_id\n        );\n    }\n}\n\npub(crate) use compaction_error::CompactionError;\n/// In a private mod to enforce that [`CompactionError::is_cancel`] is used\n/// instead of `match`ing on [`CompactionError::ShuttingDown`].\nmod compaction_error {\n    use utils::sync::gate::GateError;\n\n    use crate::{\n        pgdatadir_mapping::CollectKeySpaceError,\n        tenant::{PageReconstructError, blob_io::WriteBlobError, upload_queue::NotInitialized},\n        virtual_file::owned_buffers_io::write::FlushTaskError,\n    };\n\n    /// Top-level failure to compact. Use [`Self::is_cancel`].\n    #[derive(Debug, thiserror::Error)]\n    pub(crate) enum CompactionError {\n        /// Use [`Self::is_cancel`] instead of checking for this variant.\n        #[error(\"The timeline or pageserver is shutting down\")]\n        #[allow(private_interfaces)]\n        ShuttingDown(ForbidMatching), // private ForbidMatching enforces use of [`Self::is_cancel`].\n        #[error(transparent)]\n        Other(anyhow::Error),\n    }\n\n    #[derive(Debug)]\n    struct ForbidMatching;\n\n    impl CompactionError {\n        pub fn new_cancelled() -> Self {\n            Self::ShuttingDown(ForbidMatching)\n        }\n        /// Errors that can be ignored, i.e., cancel and shutdown.\n        pub fn is_cancel(&self) -> bool {\n            let other = match self {\n                CompactionError::ShuttingDown(_) => return true,\n                CompactionError::Other(other) => other,\n            };\n\n            // The write path of compaction in particular often lacks differentiated\n            // handling errors stemming from cancellation from other errors.\n            // So, if requested, we also check the ::Other variant by downcasting.\n            // The list below has been found empirically from flaky tests and production logs.\n            // The process is simple: on ::Other(), compaction will print the enclosed\n            // anyhow::Error in debug mode, i.e., with backtrace. That backtrace contains the\n            // line where the write path / compaction code does undifferentiated error handling\n            // from a non-anyhow type to an anyhow type. Add the type to the list of downcasts\n            // below, following the same is_cancel() pattern.\n\n            let root_cause = other.root_cause();\n\n            let upload_queue = root_cause\n                .downcast_ref::<NotInitialized>()\n                .is_some_and(|e| e.is_stopping());\n            let timeline = root_cause\n                .downcast_ref::<PageReconstructError>()\n                .is_some_and(|e| e.is_cancel());\n            let buffered_writer_flush_task_canelled = root_cause\n                .downcast_ref::<FlushTaskError>()\n                .is_some_and(|e| e.is_cancel());\n            let write_blob_cancelled = root_cause\n                .downcast_ref::<WriteBlobError>()\n                .is_some_and(|e| e.is_cancel());\n            let gate_closed = root_cause\n                .downcast_ref::<GateError>()\n                .is_some_and(|e| e.is_cancel());\n            upload_queue\n                || timeline\n                || buffered_writer_flush_task_canelled\n                || write_blob_cancelled\n                || gate_closed\n        }\n        pub fn into_anyhow(self) -> anyhow::Error {\n            match self {\n                CompactionError::ShuttingDown(ForbidMatching) => anyhow::Error::new(self),\n                CompactionError::Other(e) => e,\n            }\n        }\n        pub fn from_collect_keyspace(err: CollectKeySpaceError) -> Self {\n            if err.is_cancel() {\n                Self::new_cancelled()\n            } else {\n                Self::Other(err.into_anyhow())\n            }\n        }\n    }\n}\n\nimpl From<super::upload_queue::NotInitialized> for CompactionError {\n    fn from(value: super::upload_queue::NotInitialized) -> Self {\n        match value {\n            super::upload_queue::NotInitialized::Uninitialized => {\n                CompactionError::Other(anyhow::anyhow!(value))\n            }\n            super::upload_queue::NotInitialized::ShuttingDown\n            | super::upload_queue::NotInitialized::Stopped => CompactionError::new_cancelled(),\n        }\n    }\n}\n\nimpl From<super::storage_layer::layer::DownloadError> for CompactionError {\n    fn from(e: super::storage_layer::layer::DownloadError) -> Self {\n        match e {\n            super::storage_layer::layer::DownloadError::TimelineShutdown\n            | super::storage_layer::layer::DownloadError::DownloadCancelled => {\n                CompactionError::new_cancelled()\n            }\n            super::storage_layer::layer::DownloadError::ContextAndConfigReallyDeniesDownloads\n            | super::storage_layer::layer::DownloadError::DownloadRequired\n            | super::storage_layer::layer::DownloadError::NotFile(_)\n            | super::storage_layer::layer::DownloadError::DownloadFailed\n            | super::storage_layer::layer::DownloadError::PreStatFailed(_) => {\n                CompactionError::Other(anyhow::anyhow!(e))\n            }\n            #[cfg(test)]\n            super::storage_layer::layer::DownloadError::Failpoint(_) => {\n                CompactionError::Other(anyhow::anyhow!(e))\n            }\n        }\n    }\n}\n\nimpl From<layer_manager::Shutdown> for CompactionError {\n    fn from(_: layer_manager::Shutdown) -> Self {\n        CompactionError::new_cancelled()\n    }\n}\n\nimpl From<super::storage_layer::errors::PutError> for CompactionError {\n    fn from(e: super::storage_layer::errors::PutError) -> Self {\n        if e.is_cancel() {\n            CompactionError::new_cancelled()\n        } else {\n            CompactionError::Other(e.into_anyhow())\n        }\n    }\n}\n\n#[serde_as]\n#[derive(serde::Serialize)]\nstruct RecordedDuration(#[serde_as(as = \"serde_with::DurationMicroSeconds\")] Duration);\n\n#[derive(Default)]\nenum DurationRecorder {\n    #[default]\n    NotStarted,\n    Recorded(RecordedDuration, tokio::time::Instant),\n}\n\nimpl DurationRecorder {\n    fn till_now(&self) -> DurationRecorder {\n        match self {\n            DurationRecorder::NotStarted => {\n                panic!(\"must only call on recorded measurements\")\n            }\n            DurationRecorder::Recorded(_, ended) => {\n                let now = tokio::time::Instant::now();\n                DurationRecorder::Recorded(RecordedDuration(now - *ended), now)\n            }\n        }\n    }\n    fn into_recorded(self) -> Option<RecordedDuration> {\n        match self {\n            DurationRecorder::NotStarted => None,\n            DurationRecorder::Recorded(recorded, _) => Some(recorded),\n        }\n    }\n}\n\n/// Descriptor for a delta layer used in testing infra. The start/end key/lsn range of the\n/// delta layer might be different from the min/max key/lsn in the delta layer. Therefore,\n/// the layer descriptor requires the user to provide the ranges, which should cover all\n/// keys specified in the `data` field.\n#[cfg(test)]\n#[derive(Clone)]\npub struct DeltaLayerTestDesc {\n    pub lsn_range: Range<Lsn>,\n    pub key_range: Range<Key>,\n    pub data: Vec<(Key, Lsn, Value)>,\n}\n\n#[cfg(test)]\n#[derive(Clone)]\npub struct InMemoryLayerTestDesc {\n    pub lsn_range: Range<Lsn>,\n    pub data: Vec<(Key, Lsn, Value)>,\n    pub is_open: bool,\n}\n\n#[cfg(test)]\nimpl DeltaLayerTestDesc {\n    pub fn new(lsn_range: Range<Lsn>, key_range: Range<Key>, data: Vec<(Key, Lsn, Value)>) -> Self {\n        Self {\n            lsn_range,\n            key_range,\n            data,\n        }\n    }\n\n    pub fn new_with_inferred_key_range(\n        lsn_range: Range<Lsn>,\n        data: Vec<(Key, Lsn, Value)>,\n    ) -> Self {\n        let key_min = data.iter().map(|(key, _, _)| key).min().unwrap();\n        let key_max = data.iter().map(|(key, _, _)| key).max().unwrap();\n        Self {\n            key_range: (*key_min)..(key_max.next()),\n            lsn_range,\n            data,\n        }\n    }\n\n    pub(crate) fn layer_name(&self) -> LayerName {\n        LayerName::Delta(super::storage_layer::DeltaLayerName {\n            key_range: self.key_range.clone(),\n            lsn_range: self.lsn_range.clone(),\n        })\n    }\n}\n\nimpl Timeline {\n    async fn finish_compact_batch(\n        self: &Arc<Self>,\n        new_deltas: &[ResidentLayer],\n        new_images: &[ResidentLayer],\n        layers_to_remove: &[Layer],\n    ) -> Result<(), CompactionError> {\n        let mut guard = tokio::select! {\n            guard = self.layers.write(LayerManagerLockHolder::Compaction) => guard,\n            _ = self.cancel.cancelled() => {\n                return Err(CompactionError::new_cancelled());\n            }\n        };\n\n        let mut duplicated_layers = HashSet::new();\n\n        let mut insert_layers = Vec::with_capacity(new_deltas.len());\n\n        for l in new_deltas {\n            if guard.contains(l.as_ref()) {\n                // expected in tests\n                tracing::error!(layer=%l, \"duplicated L1 layer\");\n\n                // good ways to cause a duplicate: we repeatedly error after taking the writelock\n                // `guard`  on self.layers. as of writing this, there are no error returns except\n                // for compact_level0_phase1 creating an L0, which does not happen in practice\n                // because we have not implemented L0 => L0 compaction.\n                duplicated_layers.insert(l.layer_desc().key());\n            } else if LayerMap::is_l0(&l.layer_desc().key_range, l.layer_desc().is_delta) {\n                return Err(CompactionError::Other(anyhow::anyhow!(\n                    \"compaction generates a L0 layer file as output, which will cause infinite compaction.\"\n                )));\n            } else {\n                insert_layers.push(l.clone());\n            }\n        }\n\n        // only remove those inputs which were not outputs\n        let remove_layers: Vec<Layer> = layers_to_remove\n            .iter()\n            .filter(|l| !duplicated_layers.contains(&l.layer_desc().key()))\n            .cloned()\n            .collect();\n\n        if !new_images.is_empty() {\n            guard\n                .open_mut()?\n                .track_new_image_layers(new_images, &self.metrics);\n        }\n\n        guard\n            .open_mut()?\n            .finish_compact_l0(&remove_layers, &insert_layers, &self.metrics);\n\n        self.remote_client\n            .schedule_compaction_update(&remove_layers, new_deltas)?;\n\n        drop_layer_manager_wlock(guard);\n\n        Ok(())\n    }\n\n    async fn rewrite_layers(\n        self: &Arc<Self>,\n        mut replace_layers: Vec<(Layer, ResidentLayer)>,\n        mut drop_layers: Vec<Layer>,\n    ) -> Result<(), CompactionError> {\n        let mut guard = self.layers.write(LayerManagerLockHolder::Compaction).await;\n\n        // Trim our lists in case our caller (compaction) raced with someone else (GC) removing layers: we want\n        // to avoid double-removing, and avoid rewriting something that was removed.\n        replace_layers.retain(|(l, _)| guard.contains(l));\n        drop_layers.retain(|l| guard.contains(l));\n\n        guard\n            .open_mut()?\n            .rewrite_layers(&replace_layers, &drop_layers, &self.metrics);\n\n        let upload_layers: Vec<_> = replace_layers.into_iter().map(|r| r.1).collect();\n\n        self.remote_client\n            .schedule_compaction_update(&drop_layers, &upload_layers)?;\n\n        Ok(())\n    }\n\n    /// Schedules the uploads of the given image layers\n    fn upload_new_image_layers(\n        self: &Arc<Self>,\n        new_images: impl IntoIterator<Item = ResidentLayer>,\n    ) -> Result<(), super::upload_queue::NotInitialized> {\n        for layer in new_images {\n            self.remote_client.schedule_layer_file_upload(layer)?;\n        }\n        // should any new image layer been created, not uploading index_part will\n        // result in a mismatch between remote_physical_size and layermap calculated\n        // size, which will fail some tests, but should not be an issue otherwise.\n        self.remote_client\n            .schedule_index_upload_for_file_changes()?;\n        Ok(())\n    }\n\n    async fn find_gc_time_cutoff(\n        &self,\n        now: SystemTime,\n        pitr: Duration,\n        cancel: &CancellationToken,\n        ctx: &RequestContext,\n    ) -> Result<Option<Lsn>, PageReconstructError> {\n        debug_assert_current_span_has_tenant_and_timeline_id();\n        if self.shard_identity.is_shard_zero() {\n            // Shard Zero has SLRU data and can calculate the PITR time -> LSN mapping itself\n            let time_range = if pitr == Duration::ZERO {\n                humantime::parse_duration(DEFAULT_PITR_INTERVAL).expect(\"constant is invalid\")\n            } else {\n                pitr\n            };\n\n            // If PITR is so large or `now` is so small that this underflows, we will retain no history (highly unexpected case)\n            let time_cutoff = now.checked_sub(time_range).unwrap_or(now);\n            let timestamp = to_pg_timestamp(time_cutoff);\n\n            let time_cutoff = match self.find_lsn_for_timestamp(timestamp, cancel, ctx).await? {\n                LsnForTimestamp::Present(lsn) => Some(lsn),\n                LsnForTimestamp::Future(lsn) => {\n                    // The timestamp is in the future. That sounds impossible,\n                    // but what it really means is that there hasn't been\n                    // any commits since the cutoff timestamp.\n                    //\n                    // In this case we should use the LSN of the most recent commit,\n                    // which is implicitly the last LSN in the log.\n                    debug!(\"future({})\", lsn);\n                    Some(self.get_last_record_lsn())\n                }\n                LsnForTimestamp::Past(lsn) => {\n                    debug!(\"past({})\", lsn);\n                    None\n                }\n                LsnForTimestamp::NoData(lsn) => {\n                    debug!(\"nodata({})\", lsn);\n                    None\n                }\n            };\n            Ok(time_cutoff)\n        } else {\n            // Shards other than shard zero cannot do timestamp->lsn lookups, and must instead learn their GC cutoff\n            // from shard zero's index.  The index doesn't explicitly tell us the time cutoff, but we may assume that\n            // the point up to which shard zero's last_gc_cutoff has advanced will either be the time cutoff, or a\n            // space cutoff that we would also have respected ourselves.\n            match self\n                .remote_client\n                .download_foreign_index(ShardNumber(0), cancel)\n                .await\n            {\n                Ok((index_part, index_generation, _index_mtime)) => {\n                    tracing::info!(\n                        \"GC loaded shard zero metadata (gen {index_generation:?}): latest_gc_cutoff_lsn: {}\",\n                        index_part.metadata.latest_gc_cutoff_lsn()\n                    );\n                    Ok(Some(index_part.metadata.latest_gc_cutoff_lsn()))\n                }\n                Err(DownloadError::NotFound) => {\n                    // This is unexpected, because during timeline creations shard zero persists to remote\n                    // storage before other shards are called, and during timeline deletion non-zeroth shards are\n                    // deleted before the zeroth one.  However, it should be harmless: if we somehow end up in this\n                    // state, then shard zero should _eventually_ write an index when it GCs.\n                    tracing::warn!(\"GC couldn't find shard zero's index for timeline\");\n                    Ok(None)\n                }\n                Err(e) => {\n                    // TODO: this function should return a different error type than page reconstruct error\n                    Err(PageReconstructError::Other(anyhow::anyhow!(e)))\n                }\n            }\n\n            // TODO: after reading shard zero's GC cutoff, we should validate its generation with the storage\n            // controller.  Otherwise, it is possible that we see the GC cutoff go backwards while shard zero\n            // is going through a migration if we read the old location's index and it has GC'd ahead of the\n            // new location.  This is legal in principle, but problematic in practice because it might result\n            // in a timeline creation succeeding on shard zero ('s new location) but then failing on other shards\n            // because they have GC'd past the branch point.\n        }\n    }\n\n    /// Find the Lsns above which layer files need to be retained on\n    /// garbage collection.\n    ///\n    /// We calculate two cutoffs, one based on time and one based on WAL size.  `pitr`\n    /// controls the time cutoff (or ZERO to disable time-based retention), and `space_cutoff` controls\n    /// the space-based retention.\n    ///\n    /// This function doesn't simply to calculate time & space based retention: it treats time-based\n    /// retention as authoritative if enabled, and falls back to space-based retention if calculating\n    /// the LSN for a time point isn't possible.  Therefore the GcCutoffs::horizon in the response might\n    /// be different to the `space_cutoff` input.  Callers should treat the min() of the two cutoffs\n    /// in the response as the GC cutoff point for the timeline.\n    #[instrument(skip_all, fields(timeline_id=%self.timeline_id))]\n    pub(super) async fn find_gc_cutoffs(\n        &self,\n        now: SystemTime,\n        space_cutoff: Lsn,\n        pitr: Duration,\n        cancel: &CancellationToken,\n        ctx: &RequestContext,\n    ) -> Result<GcCutoffs, PageReconstructError> {\n        let _timer = self\n            .metrics\n            .find_gc_cutoffs_histo\n            .start_timer()\n            .record_on_drop();\n\n        pausable_failpoint!(\"Timeline::find_gc_cutoffs-pausable\");\n\n        if cfg!(test) && pitr == Duration::ZERO {\n            // Unit tests which specify zero PITR interval expect to avoid doing any I/O for timestamp lookup\n            return Ok(GcCutoffs {\n                time: Some(self.get_last_record_lsn()),\n                space: space_cutoff,\n            });\n        }\n\n        // Calculate a time-based limit on how much to retain:\n        // - if PITR interval is set, then this is our cutoff.\n        // - if PITR interval is not set, then we do a lookup\n        //   based on DEFAULT_PITR_INTERVAL, so that size-based retention does not result in keeping history around permanently on idle databases.\n        let time_cutoff = self.find_gc_time_cutoff(now, pitr, cancel, ctx).await?;\n\n        Ok(match (pitr, time_cutoff) {\n            (Duration::ZERO, Some(time_cutoff)) => {\n                // PITR is not set. Retain the size-based limit, or the default time retention,\n                // whichever requires less data.\n                GcCutoffs {\n                    time: Some(self.get_last_record_lsn()),\n                    space: std::cmp::max(time_cutoff, space_cutoff),\n                }\n            }\n            (Duration::ZERO, None) => {\n                // PITR is not set, and time lookup failed\n                GcCutoffs {\n                    time: Some(self.get_last_record_lsn()),\n                    space: space_cutoff,\n                }\n            }\n            (_, None) => {\n                // PITR interval is set & we didn't look up a timestamp successfully.  Conservatively assume PITR\n                // cannot advance beyond what was already GC'd, and respect space-based retention\n                GcCutoffs {\n                    time: Some(*self.get_applied_gc_cutoff_lsn()),\n                    space: space_cutoff,\n                }\n            }\n            (_, Some(time_cutoff)) => {\n                // PITR interval is set and we looked up timestamp successfully.  Ignore\n                // size based retention and make time cutoff authoritative\n                GcCutoffs {\n                    time: Some(time_cutoff),\n                    space: time_cutoff,\n                }\n            }\n        })\n    }\n\n    /// Garbage collect layer files on a timeline that are no longer needed.\n    ///\n    /// Currently, we don't make any attempt at removing unneeded page versions\n    /// within a layer file. We can only remove the whole file if it's fully\n    /// obsolete.\n    pub(super) async fn gc(&self) -> Result<GcResult, GcError> {\n        // this is most likely the background tasks, but it might be the spawned task from\n        // immediate_gc\n        let _g = tokio::select! {\n            guard = self.gc_lock.lock() => guard,\n            _ = self.cancel.cancelled() => return Ok(GcResult::default()),\n        };\n        let timer = self.metrics.garbage_collect_histo.start_timer();\n\n        fail_point!(\"before-timeline-gc\");\n\n        // Is the timeline being deleted?\n        if self.is_stopping() {\n            return Err(GcError::TimelineCancelled);\n        }\n\n        let (space_cutoff, time_cutoff, retain_lsns, max_lsn_with_valid_lease) = {\n            let gc_info = self.gc_info.read().unwrap();\n\n            let space_cutoff = min(gc_info.cutoffs.space, self.get_disk_consistent_lsn());\n            let time_cutoff = gc_info.cutoffs.time;\n            let retain_lsns = gc_info\n                .retain_lsns\n                .iter()\n                .map(|(lsn, _child_id, _is_offloaded)| *lsn)\n                .collect();\n\n            // Gets the maximum LSN that holds the valid lease.\n            //\n            // Caveat: `refresh_gc_info` is in charged of updating the lease map.\n            // Here, we do not check for stale leases again.\n            let max_lsn_with_valid_lease = gc_info.leases.last_key_value().map(|(lsn, _)| *lsn);\n\n            (\n                space_cutoff,\n                time_cutoff,\n                retain_lsns,\n                max_lsn_with_valid_lease,\n            )\n        };\n\n        let mut new_gc_cutoff = space_cutoff.min(time_cutoff.unwrap_or_default());\n        let standby_horizon = self.standby_horizon.load();\n        // Hold GC for the standby, but as a safety guard do it only within some\n        // reasonable lag.\n        if standby_horizon != Lsn::INVALID {\n            if let Some(standby_lag) = new_gc_cutoff.checked_sub(standby_horizon) {\n                const MAX_ALLOWED_STANDBY_LAG: u64 = 10u64 << 30; // 10 GB\n                if standby_lag.0 < MAX_ALLOWED_STANDBY_LAG {\n                    new_gc_cutoff = Lsn::min(standby_horizon, new_gc_cutoff);\n                    trace!(\"holding off GC for standby apply LSN {}\", standby_horizon);\n                } else {\n                    warn!(\n                        \"standby is lagging for more than {}MB, not holding gc for it\",\n                        MAX_ALLOWED_STANDBY_LAG / 1024 / 1024\n                    )\n                }\n            }\n        }\n\n        // Reset standby horizon to ignore it if it is not updated till next GC.\n        // It is an easy way to unset it when standby disappears without adding\n        // more conf options.\n        self.standby_horizon.store(Lsn::INVALID);\n        self.metrics\n            .standby_horizon_gauge\n            .set(Lsn::INVALID.0 as i64);\n\n        let res = self\n            .gc_timeline(\n                space_cutoff,\n                time_cutoff,\n                retain_lsns,\n                max_lsn_with_valid_lease,\n                new_gc_cutoff,\n            )\n            .instrument(\n                info_span!(\"gc_timeline\", timeline_id = %self.timeline_id, cutoff = %new_gc_cutoff),\n            )\n            .await?;\n\n        // only record successes\n        timer.stop_and_record();\n\n        Ok(res)\n    }\n\n    async fn gc_timeline(\n        &self,\n        space_cutoff: Lsn,\n        time_cutoff: Option<Lsn>, // None if uninitialized\n        retain_lsns: Vec<Lsn>,\n        max_lsn_with_valid_lease: Option<Lsn>,\n        new_gc_cutoff: Lsn,\n    ) -> Result<GcResult, GcError> {\n        // FIXME: if there is an ongoing detach_from_ancestor, we should just skip gc\n\n        let now = SystemTime::now();\n        let mut result: GcResult = GcResult::default();\n\n        // Nothing to GC. Return early.\n        let latest_gc_cutoff = *self.get_applied_gc_cutoff_lsn();\n        if latest_gc_cutoff >= new_gc_cutoff {\n            info!(\n                \"Nothing to GC: new_gc_cutoff_lsn {new_gc_cutoff}, latest_gc_cutoff_lsn {latest_gc_cutoff}\",\n            );\n            return Ok(result);\n        }\n\n        let Some(time_cutoff) = time_cutoff else {\n            // The GC cutoff should have been computed by now, but let's be defensive.\n            info!(\"Nothing to GC: time_cutoff not yet computed\");\n            return Ok(result);\n        };\n\n        // We need to ensure that no one tries to read page versions or create\n        // branches at a point before latest_gc_cutoff_lsn. See branch_timeline()\n        // for details. This will block until the old value is no longer in use.\n        //\n        // The GC cutoff should only ever move forwards.\n        let waitlist = {\n            let write_guard = self.applied_gc_cutoff_lsn.lock_for_write();\n            if *write_guard > new_gc_cutoff {\n                return Err(GcError::BadLsn {\n                    why: format!(\n                        \"Cannot move GC cutoff LSN backwards (was {}, new {})\",\n                        *write_guard, new_gc_cutoff\n                    ),\n                });\n            }\n\n            write_guard.store_and_unlock(new_gc_cutoff)\n        };\n        let waitlist_wait_fut = std::pin::pin!(waitlist.wait());\n        log_slow(\n            \"applied_gc_cutoff waitlist wait\",\n            Duration::from_secs(30),\n            waitlist_wait_fut,\n        )\n        .await;\n\n        info!(\"GC starting\");\n\n        debug!(\"retain_lsns: {:?}\", retain_lsns);\n\n        let max_retain_lsn = retain_lsns.iter().max();\n\n        // Scan all layers in the timeline (remote or on-disk).\n        //\n        // Garbage collect the layer if all conditions are satisfied:\n        // 1. it is older than cutoff LSN;\n        // 2. it is older than PITR interval;\n        // 3. it doesn't need to be retained for 'retain_lsns';\n        // 4. it does not need to be kept for LSNs holding valid leases.\n        // 5. newer on-disk image layers cover the layer's whole key range\n        let layers_to_remove = {\n            let mut layers_to_remove = Vec::new();\n\n            let guard = self\n                .layers\n                .read(LayerManagerLockHolder::GarbageCollection)\n                .await;\n            let layers = guard.layer_map()?;\n            'outer: for l in layers.iter_historic_layers() {\n                result.layers_total += 1;\n\n                // 1. Is it newer than GC horizon cutoff point?\n                if l.get_lsn_range().end > space_cutoff {\n                    debug!(\n                        \"keeping {} because it's newer than space_cutoff {}\",\n                        l.layer_name(),\n                        space_cutoff,\n                    );\n                    result.layers_needed_by_cutoff += 1;\n                    continue 'outer;\n                }\n\n                // 2. It is newer than PiTR cutoff point?\n                if l.get_lsn_range().end > time_cutoff {\n                    debug!(\n                        \"keeping {} because it's newer than time_cutoff {}\",\n                        l.layer_name(),\n                        time_cutoff,\n                    );\n                    result.layers_needed_by_pitr += 1;\n                    continue 'outer;\n                }\n\n                // 3. Is it needed by a child branch?\n                // NOTE With that we would keep data that\n                // might be referenced by child branches forever.\n                // We can track this in child timeline GC and delete parent layers when\n                // they are no longer needed. This might be complicated with long inheritance chains.\n                if let Some(retain_lsn) = max_retain_lsn {\n                    // start_lsn is inclusive\n                    if &l.get_lsn_range().start <= retain_lsn {\n                        debug!(\n                            \"keeping {} because it's still might be referenced by child branch forked at {} is_dropped: xx is_incremental: {}\",\n                            l.layer_name(),\n                            retain_lsn,\n                            l.is_incremental(),\n                        );\n                        result.layers_needed_by_branches += 1;\n                        continue 'outer;\n                    }\n                }\n\n                // 4. Is there a valid lease that requires us to keep this layer?\n                if let Some(lsn) = &max_lsn_with_valid_lease {\n                    // keep if layer start <= any of the lease\n                    if &l.get_lsn_range().start <= lsn {\n                        debug!(\n                            \"keeping {} because there is a valid lease preventing GC at {}\",\n                            l.layer_name(),\n                            lsn,\n                        );\n                        result.layers_needed_by_leases += 1;\n                        continue 'outer;\n                    }\n                }\n\n                // 5. Is there a later on-disk layer for this relation?\n                //\n                // The end-LSN is exclusive, while disk_consistent_lsn is\n                // inclusive. For example, if disk_consistent_lsn is 100, it is\n                // OK for a delta layer to have end LSN 101, but if the end LSN\n                // is 102, then it might not have been fully flushed to disk\n                // before crash.\n                //\n                // For example, imagine that the following layers exist:\n                //\n                // 1000      - image (A)\n                // 1000-2000 - delta (B)\n                // 2000      - image (C)\n                // 2000-3000 - delta (D)\n                // 3000      - image (E)\n                //\n                // If GC horizon is at 2500, we can remove layers A and B, but\n                // we cannot remove C, even though it's older than 2500, because\n                // the delta layer 2000-3000 depends on it.\n                if !layers\n                    .image_layer_exists(&l.get_key_range(), &(l.get_lsn_range().end..new_gc_cutoff))\n                {\n                    debug!(\"keeping {} because it is the latest layer\", l.layer_name());\n                    result.layers_not_updated += 1;\n                    continue 'outer;\n                }\n\n                // We didn't find any reason to keep this file, so remove it.\n                info!(\n                    \"garbage collecting {} is_dropped: xx is_incremental: {}\",\n                    l.layer_name(),\n                    l.is_incremental(),\n                );\n                layers_to_remove.push(l);\n            }\n\n            layers_to_remove\n        };\n\n        if !layers_to_remove.is_empty() {\n            // Persist the new GC cutoff value before we actually remove anything.\n            // This unconditionally schedules also an index_part.json update, even though, we will\n            // be doing one a bit later with the unlinked gc'd layers.\n            let disk_consistent_lsn = self.disk_consistent_lsn.load();\n            self.schedule_uploads(disk_consistent_lsn, None)\n                .map_err(|e| {\n                    if self.cancel.is_cancelled() {\n                        GcError::TimelineCancelled\n                    } else {\n                        GcError::Remote(e)\n                    }\n                })?;\n\n            let mut guard = self\n                .layers\n                .write(LayerManagerLockHolder::GarbageCollection)\n                .await;\n\n            let gc_layers = layers_to_remove\n                .iter()\n                .flat_map(|desc| guard.try_get_from_key(&desc.key()).cloned())\n                .collect::<Vec<Layer>>();\n\n            result.layers_removed = gc_layers.len() as u64;\n\n            self.remote_client.schedule_gc_update(&gc_layers)?;\n            guard.open_mut()?.finish_gc_timeline(&gc_layers);\n\n            #[cfg(feature = \"testing\")]\n            {\n                result.doomed_layers = gc_layers;\n            }\n        }\n\n        info!(\n            \"GC completed removing {} layers, cutoff {}\",\n            result.layers_removed, new_gc_cutoff\n        );\n\n        result.elapsed = now.elapsed().unwrap_or(Duration::ZERO);\n        Ok(result)\n    }\n\n    /// Reconstruct a value, using the given base image and WAL records in 'data'.\n    pub(crate) async fn reconstruct_value(\n        &self,\n        key: Key,\n        request_lsn: Lsn,\n        mut data: ValueReconstructState,\n        redo_attempt_type: RedoAttemptType,\n    ) -> Result<Bytes, PageReconstructError> {\n        // Perform WAL redo if needed\n        data.records.reverse();\n\n        let fire_critical_error = match redo_attempt_type {\n            RedoAttemptType::ReadPage => true,\n            RedoAttemptType::LegacyCompaction => true,\n            RedoAttemptType::GcCompaction => false,\n        };\n\n        // If we have a page image, and no WAL, we're all set\n        if data.records.is_empty() {\n            if let Some((img_lsn, img)) = &data.img {\n                trace!(\n                    \"found page image for key {} at {}, no WAL redo required, req LSN {}\",\n                    key, img_lsn, request_lsn,\n                );\n                Ok(img.clone())\n            } else {\n                Err(PageReconstructError::from(anyhow!(\n                    \"base image for {key} at {request_lsn} not found\"\n                )))\n            }\n        } else {\n            // We need to do WAL redo.\n            //\n            // If we don't have a base image, then the oldest WAL record better initialize\n            // the page\n            if data.img.is_none() && !data.records.first().unwrap().1.will_init() {\n                Err(PageReconstructError::from(anyhow!(\n                    \"Base image for {} at {} not found, but got {} WAL records\",\n                    key,\n                    request_lsn,\n                    data.records.len()\n                )))\n            } else {\n                if data.img.is_some() {\n                    trace!(\n                        \"found {} WAL records and a base image for {} at {}, performing WAL redo\",\n                        data.records.len(),\n                        key,\n                        request_lsn\n                    );\n                } else {\n                    trace!(\n                        \"found {} WAL records that will init the page for {} at {}, performing WAL redo\",\n                        data.records.len(),\n                        key,\n                        request_lsn\n                    );\n                };\n                let res = self\n                    .walredo_mgr\n                    .as_ref()\n                    .context(\"timeline has no walredo manager\")\n                    .map_err(PageReconstructError::WalRedo)?\n                    .request_redo(\n                        key,\n                        request_lsn,\n                        data.img,\n                        data.records,\n                        self.pg_version,\n                        redo_attempt_type,\n                    )\n                    .await;\n                let img = match res {\n                    Ok(img) => img,\n                    Err(walredo::Error::Cancelled) => return Err(PageReconstructError::Cancelled),\n                    Err(walredo::Error::Other(err)) => {\n                        if fire_critical_error {\n                            critical_timeline!(\n                                self.tenant_shard_id,\n                                self.timeline_id,\n                                Some(&self.corruption_detected),\n                                \"walredo failure during page reconstruction: {err:?}\"\n                            );\n                        }\n                        return Err(PageReconstructError::WalRedo(\n                            err.context(\"reconstruct a page image\"),\n                        ));\n                    }\n                };\n                Ok(img)\n            }\n        }\n    }\n\n    pub(crate) async fn spawn_download_all_remote_layers(\n        self: Arc<Self>,\n        request: DownloadRemoteLayersTaskSpawnRequest,\n        ctx: &RequestContext,\n    ) -> Result<DownloadRemoteLayersTaskInfo, DownloadRemoteLayersTaskInfo> {\n        use pageserver_api::models::DownloadRemoteLayersTaskState;\n\n        // this is not really needed anymore; it has tests which really check the return value from\n        // http api. it would be better not to maintain this anymore.\n\n        let mut status_guard = self.download_all_remote_layers_task_info.write().unwrap();\n        if let Some(st) = &*status_guard {\n            match &st.state {\n                DownloadRemoteLayersTaskState::Running => {\n                    return Err(st.clone());\n                }\n                DownloadRemoteLayersTaskState::ShutDown\n                | DownloadRemoteLayersTaskState::Completed => {\n                    *status_guard = None;\n                }\n            }\n        }\n\n        let self_clone = Arc::clone(&self);\n        let task_ctx = ctx.detached_child(\n            TaskKind::DownloadAllRemoteLayers,\n            DownloadBehavior::Download,\n        );\n        let task_id = task_mgr::spawn(\n            task_mgr::BACKGROUND_RUNTIME.handle(),\n            task_mgr::TaskKind::DownloadAllRemoteLayers,\n            self.tenant_shard_id,\n            Some(self.timeline_id),\n            \"download all remote layers task\",\n            async move {\n                self_clone.download_all_remote_layers(request, &task_ctx).await;\n                let mut status_guard = self_clone.download_all_remote_layers_task_info.write().unwrap();\n                 match &mut *status_guard {\n                    None => {\n                        warn!(\"tasks status is supposed to be Some(), since we are running\");\n                    }\n                    Some(st) => {\n                        let exp_task_id = format!(\"{}\", task_mgr::current_task_id().unwrap());\n                        if st.task_id != exp_task_id {\n                            warn!(\"task id changed while we were still running, expecting {} but have {}\", exp_task_id, st.task_id);\n                        } else {\n                            st.state = DownloadRemoteLayersTaskState::Completed;\n                        }\n                    }\n                };\n                Ok(())\n            }\n            .instrument(info_span!(parent: None, \"download_all_remote_layers\", tenant_id = %self.tenant_shard_id.tenant_id, shard_id = %self.tenant_shard_id.shard_slug(), timeline_id = %self.timeline_id))\n        );\n\n        let initial_info = DownloadRemoteLayersTaskInfo {\n            task_id: format!(\"{task_id}\"),\n            state: DownloadRemoteLayersTaskState::Running,\n            total_layer_count: 0,\n            successful_download_count: 0,\n            failed_download_count: 0,\n        };\n        *status_guard = Some(initial_info.clone());\n\n        Ok(initial_info)\n    }\n\n    async fn download_all_remote_layers(\n        self: &Arc<Self>,\n        request: DownloadRemoteLayersTaskSpawnRequest,\n        ctx: &RequestContext,\n    ) {\n        use pageserver_api::models::DownloadRemoteLayersTaskState;\n\n        let remaining = {\n            let guard = self\n                .layers\n                .read(LayerManagerLockHolder::GetLayerMapInfo)\n                .await;\n            let Ok(lm) = guard.layer_map() else {\n                // technically here we could look into iterating accessible layers, but downloading\n                // all layers of a shutdown timeline makes no sense regardless.\n                tracing::info!(\"attempted to download all layers of shutdown timeline\");\n                return;\n            };\n            lm.iter_historic_layers()\n                .map(|desc| guard.get_from_desc(&desc))\n                .collect::<Vec<_>>()\n        };\n        let total_layer_count = remaining.len();\n\n        macro_rules! lock_status {\n            ($st:ident) => {\n                let mut st = self.download_all_remote_layers_task_info.write().unwrap();\n                let st = st\n                    .as_mut()\n                    .expect(\"this function is only called after the task has been spawned\");\n                assert_eq!(\n                    st.task_id,\n                    format!(\n                        \"{}\",\n                        task_mgr::current_task_id().expect(\"we run inside a task_mgr task\")\n                    )\n                );\n                let $st = st;\n            };\n        }\n\n        {\n            lock_status!(st);\n            st.total_layer_count = total_layer_count as u64;\n        }\n\n        let mut remaining = remaining.into_iter();\n        let mut have_remaining = true;\n        let mut js = tokio::task::JoinSet::new();\n\n        let cancel = task_mgr::shutdown_token();\n\n        let limit = request.max_concurrent_downloads;\n\n        loop {\n            while js.len() < limit.get() && have_remaining && !cancel.is_cancelled() {\n                let Some(next) = remaining.next() else {\n                    have_remaining = false;\n                    break;\n                };\n\n                let span = tracing::info_span!(\"download\", layer = %next);\n\n                let ctx = ctx.attached_child();\n                js.spawn(\n                    async move {\n                        let res = next.download(&ctx).await;\n                        (next, res)\n                    }\n                    .instrument(span),\n                );\n            }\n\n            while let Some(res) = js.join_next().await {\n                match res {\n                    Ok((_, Ok(_))) => {\n                        lock_status!(st);\n                        st.successful_download_count += 1;\n                    }\n                    Ok((layer, Err(e))) => {\n                        tracing::error!(%layer, \"download failed: {e:#}\");\n                        lock_status!(st);\n                        st.failed_download_count += 1;\n                    }\n                    Err(je) if je.is_cancelled() => unreachable!(\"not used here\"),\n                    Err(je) if je.is_panic() => {\n                        lock_status!(st);\n                        st.failed_download_count += 1;\n                    }\n                    Err(je) => tracing::warn!(\"unknown joinerror: {je:?}\"),\n                }\n            }\n\n            if js.is_empty() && (!have_remaining || cancel.is_cancelled()) {\n                break;\n            }\n        }\n\n        {\n            lock_status!(st);\n            st.state = DownloadRemoteLayersTaskState::Completed;\n        }\n    }\n\n    pub(crate) fn get_download_all_remote_layers_task_info(\n        &self,\n    ) -> Option<DownloadRemoteLayersTaskInfo> {\n        self.download_all_remote_layers_task_info\n            .read()\n            .unwrap()\n            .clone()\n    }\n\n    /* BEGIN_HADRON */\n    pub(crate) async fn compute_image_consistent_lsn(&self) -> anyhow::Result<Lsn> {\n        let guard = self\n            .layers\n            .read(LayerManagerLockHolder::ComputeImageConsistentLsn)\n            .await;\n        let layer_map = guard.layer_map()?;\n        let disk_consistent_lsn = self.get_disk_consistent_lsn();\n\n        Ok(layer_map.compute_image_consistent_lsn(disk_consistent_lsn))\n    }\n    /* END_HADRON */\n}\n\nimpl Timeline {\n    /// Returns non-remote layers for eviction.\n    pub(crate) async fn get_local_layers_for_disk_usage_eviction(&self) -> DiskUsageEvictionInfo {\n        let guard = self.layers.read(LayerManagerLockHolder::Eviction).await;\n        let mut max_layer_size: Option<u64> = None;\n\n        let resident_layers = guard\n            .likely_resident_layers()\n            .map(|layer| {\n                let file_size = layer.layer_desc().file_size;\n                max_layer_size = max_layer_size.map_or(Some(file_size), |m| Some(m.max(file_size)));\n\n                let last_activity_ts = layer.latest_activity();\n\n                EvictionCandidate {\n                    layer: layer.to_owned().into(),\n                    last_activity_ts,\n                    relative_last_activity: finite_f32::FiniteF32::ZERO,\n                    visibility: layer.visibility(),\n                }\n            })\n            .collect();\n\n        DiskUsageEvictionInfo {\n            max_layer_size,\n            resident_layers,\n        }\n    }\n\n    pub(crate) fn get_shard_index(&self) -> ShardIndex {\n        ShardIndex {\n            shard_number: self.tenant_shard_id.shard_number,\n            shard_count: self.tenant_shard_id.shard_count,\n        }\n    }\n\n    /// Persistently blocks gc for `Manual` reason.\n    ///\n    /// Returns true if no such block existed before, false otherwise.\n    pub(crate) async fn block_gc(&self, tenant: &super::TenantShard) -> anyhow::Result<bool> {\n        use crate::tenant::remote_timeline_client::index::GcBlockingReason;\n        assert_eq!(self.tenant_shard_id, tenant.tenant_shard_id);\n        tenant.gc_block.insert(self, GcBlockingReason::Manual).await\n    }\n\n    /// Persistently unblocks gc for `Manual` reason.\n    pub(crate) async fn unblock_gc(&self, tenant: &super::TenantShard) -> anyhow::Result<()> {\n        use crate::tenant::remote_timeline_client::index::GcBlockingReason;\n        assert_eq!(self.tenant_shard_id, tenant.tenant_shard_id);\n        tenant.gc_block.remove(self, GcBlockingReason::Manual).await\n    }\n\n    #[cfg(test)]\n    pub(super) fn force_advance_lsn(self: &Arc<Timeline>, new_lsn: Lsn) {\n        self.last_record_lsn.advance(new_lsn);\n    }\n\n    #[cfg(test)]\n    pub(super) fn force_set_disk_consistent_lsn(&self, new_value: Lsn) {\n        self.disk_consistent_lsn.store(new_value);\n    }\n\n    /// Force create an image layer and place it into the layer map.\n    ///\n    /// DO NOT use this function directly. Use [`TenantShard::branch_timeline_test_with_layers`]\n    /// or [`TenantShard::create_test_timeline_with_layers`] to ensure all these layers are\n    /// placed into the layer map in one run AND be validated.\n    #[cfg(test)]\n    pub(super) async fn force_create_image_layer(\n        self: &Arc<Timeline>,\n        lsn: Lsn,\n        mut images: Vec<(Key, Bytes)>,\n        check_start_lsn: Option<Lsn>,\n        ctx: &RequestContext,\n    ) -> anyhow::Result<()> {\n        let last_record_lsn = self.get_last_record_lsn();\n        assert!(\n            lsn <= last_record_lsn,\n            \"advance last record lsn before inserting a layer, lsn={lsn}, last_record_lsn={last_record_lsn}\"\n        );\n        if let Some(check_start_lsn) = check_start_lsn {\n            assert!(lsn >= check_start_lsn);\n        }\n        images.sort_unstable_by(|(ka, _), (kb, _)| ka.cmp(kb));\n        let min_key = *images.first().map(|(k, _)| k).unwrap();\n        let end_key = images.last().map(|(k, _)| k).unwrap().next();\n        let mut image_layer_writer = ImageLayerWriter::new(\n            self.conf,\n            self.timeline_id,\n            self.tenant_shard_id,\n            &(min_key..end_key),\n            lsn,\n            &self.gate,\n            self.cancel.clone(),\n            ctx,\n        )\n        .await?;\n        for (key, img) in images {\n            image_layer_writer.put_image(key, img, ctx).await?;\n        }\n        let (desc, path) = image_layer_writer.finish(ctx).await?;\n        let image_layer = Layer::finish_creating(self.conf, self, desc, &path)?;\n        info!(\"force created image layer {}\", image_layer.local_path());\n        {\n            let mut guard = self.layers.write(LayerManagerLockHolder::Testing).await;\n            guard\n                .open_mut()\n                .unwrap()\n                .force_insert_layer(image_layer.clone());\n        }\n\n        // Update remote_timeline_client state to reflect existence of this layer\n        self.remote_client\n            .schedule_layer_file_upload(image_layer)\n            .unwrap();\n\n        Ok(())\n    }\n\n    /// Force create a delta layer and place it into the layer map.\n    ///\n    /// DO NOT use this function directly. Use [`TenantShard::branch_timeline_test_with_layers`]\n    /// or [`TenantShard::create_test_timeline_with_layers`] to ensure all these layers are\n    /// placed into the layer map in one run AND be validated.\n    #[cfg(test)]\n    pub(super) async fn force_create_delta_layer(\n        self: &Arc<Timeline>,\n        mut deltas: DeltaLayerTestDesc,\n        check_start_lsn: Option<Lsn>,\n        ctx: &RequestContext,\n    ) -> anyhow::Result<()> {\n        let last_record_lsn = self.get_last_record_lsn();\n        deltas\n            .data\n            .sort_unstable_by(|(ka, la, _), (kb, lb, _)| (ka, la).cmp(&(kb, lb)));\n        assert!(deltas.data.first().unwrap().0 >= deltas.key_range.start);\n        assert!(deltas.data.last().unwrap().0 < deltas.key_range.end);\n        for (_, lsn, _) in &deltas.data {\n            assert!(deltas.lsn_range.start <= *lsn && *lsn < deltas.lsn_range.end);\n        }\n        assert!(\n            deltas.lsn_range.end <= last_record_lsn,\n            \"advance last record lsn before inserting a layer, end_lsn={}, last_record_lsn={}\",\n            deltas.lsn_range.end,\n            last_record_lsn\n        );\n        if let Some(check_start_lsn) = check_start_lsn {\n            assert!(deltas.lsn_range.start >= check_start_lsn);\n        }\n        let mut delta_layer_writer = DeltaLayerWriter::new(\n            self.conf,\n            self.timeline_id,\n            self.tenant_shard_id,\n            deltas.key_range.start,\n            deltas.lsn_range,\n            &self.gate,\n            self.cancel.clone(),\n            ctx,\n        )\n        .await?;\n        for (key, lsn, val) in deltas.data {\n            delta_layer_writer.put_value(key, lsn, val, ctx).await?;\n        }\n        let (desc, path) = delta_layer_writer.finish(deltas.key_range.end, ctx).await?;\n        let delta_layer = Layer::finish_creating(self.conf, self, desc, &path)?;\n        info!(\"force created delta layer {}\", delta_layer.local_path());\n        {\n            let mut guard = self.layers.write(LayerManagerLockHolder::Testing).await;\n            guard\n                .open_mut()\n                .unwrap()\n                .force_insert_layer(delta_layer.clone());\n        }\n\n        // Update remote_timeline_client state to reflect existence of this layer\n        self.remote_client\n            .schedule_layer_file_upload(delta_layer)\n            .unwrap();\n\n        Ok(())\n    }\n\n    /// Force create an in-memory layer and place them into the layer map.\n    #[cfg(test)]\n    pub(super) async fn force_create_in_memory_layer(\n        self: &Arc<Timeline>,\n        mut in_memory: InMemoryLayerTestDesc,\n        check_start_lsn: Option<Lsn>,\n        ctx: &RequestContext,\n    ) -> anyhow::Result<()> {\n        use utils::bin_ser::BeSer;\n\n        // Validate LSNs\n        if let Some(check_start_lsn) = check_start_lsn {\n            assert!(in_memory.lsn_range.start >= check_start_lsn);\n        }\n\n        let last_record_lsn = self.get_last_record_lsn();\n        let layer_end_lsn = if in_memory.is_open {\n            in_memory\n                .data\n                .iter()\n                .map(|(_key, lsn, _value)| lsn)\n                .max()\n                .cloned()\n        } else {\n            Some(in_memory.lsn_range.end)\n        };\n\n        if let Some(end) = layer_end_lsn {\n            assert!(\n                end <= last_record_lsn,\n                \"advance last record lsn before inserting a layer, end_lsn={end}, last_record_lsn={last_record_lsn}\",\n            );\n        }\n\n        in_memory.data.iter().for_each(|(_key, lsn, _value)| {\n            assert!(*lsn >= in_memory.lsn_range.start);\n            assert!(*lsn < in_memory.lsn_range.end);\n        });\n\n        // Build the batch\n        in_memory\n            .data\n            .sort_unstable_by(|(ka, la, _), (kb, lb, _)| (ka, la).cmp(&(kb, lb)));\n\n        let data = in_memory\n            .data\n            .into_iter()\n            .map(|(key, lsn, value)| {\n                let value_size = value.serialized_size().unwrap() as usize;\n                (key.to_compact(), lsn, value_size, value)\n            })\n            .collect::<Vec<_>>();\n\n        let batch = SerializedValueBatch::from_values(data);\n\n        // Create the in-memory layer and write the batch into it\n        let layer = InMemoryLayer::create(\n            self.conf,\n            self.timeline_id,\n            self.tenant_shard_id,\n            in_memory.lsn_range.start,\n            &self.gate,\n            // TODO: if we ever use this function in production code, we need to pass the real cancellation token\n            &CancellationToken::new(),\n            ctx,\n        )\n        .await\n        .unwrap();\n\n        layer.put_batch(batch, ctx).await.unwrap();\n        if !in_memory.is_open {\n            layer.freeze(in_memory.lsn_range.end).await;\n        }\n\n        info!(\"force created in-memory layer {:?}\", in_memory.lsn_range);\n\n        // Link the layer to the layer map\n        {\n            let mut guard = self.layers.write(LayerManagerLockHolder::Testing).await;\n            let layer_map = guard.open_mut().unwrap();\n            layer_map.force_insert_in_memory_layer(Arc::new(layer));\n        }\n\n        Ok(())\n    }\n\n    /// Return all keys at the LSN in the image layers\n    #[cfg(test)]\n    pub(crate) async fn inspect_image_layers(\n        self: &Arc<Timeline>,\n        lsn: Lsn,\n        ctx: &RequestContext,\n        io_concurrency: IoConcurrency,\n    ) -> anyhow::Result<Vec<(Key, Bytes)>> {\n        let mut all_data = Vec::new();\n        let guard = self.layers.read(LayerManagerLockHolder::Testing).await;\n        for layer in guard.layer_map()?.iter_historic_layers() {\n            if !layer.is_delta() && layer.image_layer_lsn() == lsn {\n                let layer = guard.get_from_desc(&layer);\n                let mut reconstruct_data = ValuesReconstructState::new(io_concurrency.clone());\n                layer\n                    .get_values_reconstruct_data(\n                        KeySpace::single(Key::MIN..Key::MAX),\n                        lsn..Lsn(lsn.0 + 1),\n                        &mut reconstruct_data,\n                        ctx,\n                    )\n                    .await?;\n                for (k, v) in std::mem::take(&mut reconstruct_data.keys) {\n                    let v = v.collect_pending_ios().await?;\n                    all_data.push((k, v.img.unwrap().1));\n                }\n            }\n        }\n        all_data.sort();\n        Ok(all_data)\n    }\n\n    /// Get all historic layer descriptors in the layer map\n    #[cfg(test)]\n    pub(crate) async fn inspect_historic_layers(\n        self: &Arc<Timeline>,\n    ) -> anyhow::Result<Vec<super::storage_layer::PersistentLayerKey>> {\n        let mut layers = Vec::new();\n        let guard = self.layers.read(LayerManagerLockHolder::Testing).await;\n        for layer in guard.layer_map()?.iter_historic_layers() {\n            layers.push(layer.key());\n        }\n        Ok(layers)\n    }\n\n    #[cfg(test)]\n    pub(crate) fn add_extra_test_dense_keyspace(&self, ks: KeySpace) {\n        let mut keyspace = self.extra_test_dense_keyspace.load().as_ref().clone();\n        keyspace.merge(&ks);\n        self.extra_test_dense_keyspace.store(Arc::new(keyspace));\n    }\n}\n\n/// Tracking writes ingestion does to a particular in-memory layer.\n///\n/// Cleared upon freezing a layer.\npub(crate) struct TimelineWriterState {\n    open_layer: Arc<InMemoryLayer>,\n    current_size: u64,\n    // Previous Lsn which passed through\n    prev_lsn: Option<Lsn>,\n    // Largest Lsn which passed through the current writer\n    max_lsn: Option<Lsn>,\n    // Cached details of the last freeze. Avoids going trough the atomic/lock on every put.\n    cached_last_freeze_at: Lsn,\n}\n\nimpl TimelineWriterState {\n    fn new(open_layer: Arc<InMemoryLayer>, current_size: u64, last_freeze_at: Lsn) -> Self {\n        Self {\n            open_layer,\n            current_size,\n            prev_lsn: None,\n            max_lsn: None,\n            cached_last_freeze_at: last_freeze_at,\n        }\n    }\n}\n\n/// Various functions to mutate the timeline.\n// TODO Currently, Deref is used to allow easy access to read methods from this trait.\n// This is probably considered a bad practice in Rust and should be fixed eventually,\n// but will cause large code changes.\npub(crate) struct TimelineWriter<'a> {\n    tl: &'a Timeline,\n    write_guard: tokio::sync::MutexGuard<'a, Option<TimelineWriterState>>,\n}\n\nimpl Deref for TimelineWriter<'_> {\n    type Target = Timeline;\n\n    fn deref(&self) -> &Self::Target {\n        self.tl\n    }\n}\n\n#[derive(PartialEq)]\nenum OpenLayerAction {\n    Roll,\n    Open,\n    None,\n}\n\nimpl TimelineWriter<'_> {\n    async fn handle_open_layer_action(\n        &mut self,\n        at: Lsn,\n        action: OpenLayerAction,\n        ctx: &RequestContext,\n    ) -> anyhow::Result<&Arc<InMemoryLayer>> {\n        match action {\n            OpenLayerAction::Roll => {\n                let freeze_at = self.write_guard.as_ref().unwrap().max_lsn.unwrap();\n                self.roll_layer(freeze_at).await?;\n                self.open_layer(at, ctx).await?;\n            }\n            OpenLayerAction::Open => self.open_layer(at, ctx).await?,\n            OpenLayerAction::None => {\n                assert!(self.write_guard.is_some());\n            }\n        }\n\n        Ok(&self.write_guard.as_ref().unwrap().open_layer)\n    }\n\n    async fn open_layer(&mut self, at: Lsn, ctx: &RequestContext) -> anyhow::Result<()> {\n        let layer = self\n            .tl\n            .get_layer_for_write(at, &self.write_guard, ctx)\n            .await?;\n        let initial_size = layer.len();\n\n        let last_freeze_at = self.last_freeze_at.load();\n        self.write_guard.replace(TimelineWriterState::new(\n            layer,\n            initial_size,\n            last_freeze_at,\n        ));\n\n        Ok(())\n    }\n\n    async fn roll_layer(&mut self, freeze_at: Lsn) -> Result<(), FlushLayerError> {\n        let current_size = self.write_guard.as_ref().unwrap().current_size;\n\n        // If layer flushes are backpressured due to compaction not keeping up, wait for the flush\n        // to propagate the backpressure up into WAL ingestion.\n        let l0_count = self\n            .tl\n            .layers\n            .read(LayerManagerLockHolder::GetLayerMapInfo)\n            .await\n            .layer_map()?\n            .level0_deltas()\n            .len();\n        let wait_thresholds = [\n            self.get_l0_flush_delay_threshold(),\n            self.get_l0_flush_stall_threshold(),\n        ];\n        let wait_threshold = wait_thresholds.into_iter().flatten().min();\n\n        // self.write_guard will be taken by the freezing\n        let flush_id = self\n            .tl\n            .freeze_inmem_layer_at(freeze_at, &mut self.write_guard)\n            .await?;\n\n        assert!(self.write_guard.is_none());\n\n        if let Some(wait_threshold) = wait_threshold {\n            if l0_count >= wait_threshold {\n                debug!(\n                    \"layer roll waiting for flush due to compaction backpressure at {l0_count} L0 layers\"\n                );\n                self.tl.wait_flush_completion(flush_id).await?;\n            }\n        }\n\n        if current_size >= self.get_checkpoint_distance() * 2 {\n            warn!(\"Flushed oversized open layer with size {}\", current_size)\n        }\n\n        Ok(())\n    }\n\n    fn get_open_layer_action(&self, lsn: Lsn, new_value_size: u64) -> OpenLayerAction {\n        let state = &*self.write_guard;\n        let Some(state) = &state else {\n            return OpenLayerAction::Open;\n        };\n\n        #[cfg(feature = \"testing\")]\n        if state.cached_last_freeze_at < self.tl.last_freeze_at.load() {\n            // this check and assertion are not really needed because\n            // LayerManager::try_freeze_in_memory_layer will always clear out the\n            // TimelineWriterState if something is frozen. however, we can advance last_freeze_at when there\n            // is no TimelineWriterState.\n            assert!(\n                state.open_layer.end_lsn.get().is_some(),\n                \"our open_layer must be outdated\"\n            );\n\n            // this would be a memory leak waiting to happen because the in-memory layer always has\n            // an index\n            panic!(\"BUG: TimelineWriterState held on to frozen in-memory layer.\");\n        }\n\n        if state.prev_lsn == Some(lsn) {\n            // Rolling mid LSN is not supported by [downstream code].\n            // Hence, only roll at LSN boundaries.\n            //\n            // [downstream code]: https://github.com/neondatabase/neon/pull/7993#discussion_r1633345422\n            return OpenLayerAction::None;\n        }\n\n        if state.current_size == 0 {\n            // Don't roll empty layers\n            return OpenLayerAction::None;\n        }\n\n        if self.tl.should_roll(\n            state.current_size,\n            state.current_size + new_value_size,\n            self.get_checkpoint_distance(),\n            lsn,\n            state.cached_last_freeze_at,\n            state.open_layer.get_opened_at(),\n        ) {\n            OpenLayerAction::Roll\n        } else {\n            OpenLayerAction::None\n        }\n    }\n\n    /// Put a batch of keys at the specified Lsns.\n    pub(crate) async fn put_batch(\n        &mut self,\n        batch: SerializedValueBatch,\n        ctx: &RequestContext,\n    ) -> anyhow::Result<()> {\n        if !batch.has_data() {\n            return Ok(());\n        }\n\n        // In debug builds, assert that we don't write any keys that don't belong to this shard.\n        // We don't assert this in release builds, since key ownership policies may change over\n        // time. Stray keys will be removed during compaction.\n        if cfg!(debug_assertions) {\n            for metadata in &batch.metadata {\n                if let ValueMeta::Serialized(metadata) = metadata {\n                    let key = Key::from_compact(metadata.key);\n                    assert!(\n                        self.shard_identity.is_key_local(&key)\n                            || self.shard_identity.is_key_global(&key),\n                        \"key {key} does not belong on shard {}\",\n                        self.shard_identity.shard_index()\n                    );\n                }\n            }\n        }\n\n        let batch_max_lsn = batch.max_lsn;\n        let buf_size: u64 = batch.buffer_size() as u64;\n\n        let action = self.get_open_layer_action(batch_max_lsn, buf_size);\n        let layer = self\n            .handle_open_layer_action(batch_max_lsn, action, ctx)\n            .await?;\n\n        let res = layer.put_batch(batch, ctx).await;\n\n        if res.is_ok() {\n            // Update the current size only when the entire write was ok.\n            // In case of failures, we may have had partial writes which\n            // render the size tracking out of sync. That's ok because\n            // the checkpoint distance should be significantly smaller\n            // than the S3 single shot upload limit of 5GiB.\n            let state = self.write_guard.as_mut().unwrap();\n\n            state.current_size += buf_size;\n            state.prev_lsn = Some(batch_max_lsn);\n            state.max_lsn = std::cmp::max(state.max_lsn, Some(batch_max_lsn));\n        }\n\n        res\n    }\n\n    #[cfg(test)]\n    /// Test helper, for tests that would like to poke individual values without composing a batch\n    pub(crate) async fn put(\n        &mut self,\n        key: Key,\n        lsn: Lsn,\n        value: &Value,\n        ctx: &RequestContext,\n    ) -> anyhow::Result<()> {\n        use utils::bin_ser::BeSer;\n        if !key.is_valid_key_on_write_path() {\n            bail!(\n                \"the request contains data not supported by pageserver at TimelineWriter::put: {}\",\n                key\n            );\n        }\n        let val_ser_size = value.serialized_size().unwrap() as usize;\n        let batch = SerializedValueBatch::from_values(vec![(\n            key.to_compact(),\n            lsn,\n            val_ser_size,\n            value.clone(),\n        )]);\n\n        self.put_batch(batch, ctx).await\n    }\n\n    pub(crate) async fn delete_batch(\n        &mut self,\n        batch: &[(Range<Key>, Lsn)],\n        ctx: &RequestContext,\n    ) -> anyhow::Result<()> {\n        if let Some((_, lsn)) = batch.first() {\n            let action = self.get_open_layer_action(*lsn, 0);\n            let layer = self.handle_open_layer_action(*lsn, action, ctx).await?;\n            layer.put_tombstones(batch).await?;\n        }\n\n        Ok(())\n    }\n\n    /// Track the end of the latest digested WAL record.\n    /// Remember the (end of) last valid WAL record remembered in the timeline.\n    ///\n    /// Call this after you have finished writing all the WAL up to 'lsn'.\n    ///\n    /// 'lsn' must be aligned. This wakes up any wait_lsn() callers waiting for\n    /// the 'lsn' or anything older. The previous last record LSN is stored alongside\n    /// the latest and can be read.\n    pub(crate) fn finish_write(&self, new_lsn: Lsn) {\n        self.tl.finish_write(new_lsn);\n    }\n\n    pub(crate) fn update_current_logical_size(&self, delta: i64) {\n        self.tl.update_current_logical_size(delta)\n    }\n}\n\n// We need TimelineWriter to be send in upcoming conversion of\n// Timeline::layers to tokio::sync::RwLock.\n#[test]\nfn is_send() {\n    fn _assert_send<T: Send>() {}\n    _assert_send::<TimelineWriter<'_>>();\n}\n\n#[cfg(test)]\nmod tests {\n    use std::sync::Arc;\n\n    use pageserver_api::key::Key;\n    use postgres_ffi::PgMajorVersion;\n    use std::iter::Iterator;\n    use tracing::Instrument;\n    use utils::id::TimelineId;\n    use utils::lsn::Lsn;\n    use wal_decoder::models::value::Value;\n\n    use super::HeatMapTimeline;\n    use crate::context::RequestContextBuilder;\n    use crate::tenant::harness::{TenantHarness, test_img};\n    use crate::tenant::layer_map::LayerMap;\n    use crate::tenant::storage_layer::{Layer, LayerName, LayerVisibilityHint};\n    use crate::tenant::timeline::layer_manager::LayerManagerLockHolder;\n    use crate::tenant::timeline::{DeltaLayerTestDesc, EvictionError};\n    use crate::tenant::{PreviousHeatmap, Timeline};\n\n    fn assert_heatmaps_have_same_layers(lhs: &HeatMapTimeline, rhs: &HeatMapTimeline) {\n        assert_eq!(lhs.all_layers().count(), rhs.all_layers().count());\n        let lhs_rhs = lhs.all_layers().zip(rhs.all_layers());\n        for (l, r) in lhs_rhs {\n            assert_eq!(l.name, r.name);\n            assert_eq!(l.metadata, r.metadata);\n        }\n    }\n\n    #[tokio::test]\n    async fn test_heatmap_generation() {\n        let harness = TenantHarness::create(\"heatmap_generation\").await.unwrap();\n\n        let covered_delta = DeltaLayerTestDesc::new_with_inferred_key_range(\n            Lsn(0x10)..Lsn(0x20),\n            vec![(\n                Key::from_hex(\"620000000033333333444444445500000000\").unwrap(),\n                Lsn(0x11),\n                Value::Image(test_img(\"foo\")),\n            )],\n        );\n        let visible_delta = DeltaLayerTestDesc::new_with_inferred_key_range(\n            Lsn(0x10)..Lsn(0x20),\n            vec![(\n                Key::from_hex(\"720000000033333333444444445500000000\").unwrap(),\n                Lsn(0x11),\n                Value::Image(test_img(\"foo\")),\n            )],\n        );\n        let l0_delta = DeltaLayerTestDesc::new(\n            Lsn(0x20)..Lsn(0x30),\n            Key::from_hex(\"000000000000000000000000000000000000\").unwrap()\n                ..Key::from_hex(\"FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF\").unwrap(),\n            vec![(\n                Key::from_hex(\"720000000033333333444444445500000000\").unwrap(),\n                Lsn(0x25),\n                Value::Image(test_img(\"foo\")),\n            )],\n        );\n        let delta_layers = vec![\n            covered_delta.clone(),\n            visible_delta.clone(),\n            l0_delta.clone(),\n        ];\n\n        let image_layer = (\n            Lsn(0x40),\n            vec![(\n                Key::from_hex(\"620000000033333333444444445500000000\").unwrap(),\n                test_img(\"bar\"),\n            )],\n        );\n        let image_layers = vec![image_layer];\n\n        let (tenant, ctx) = harness.load().await;\n        let timeline = tenant\n            .create_test_timeline_with_layers(\n                TimelineId::generate(),\n                Lsn(0x10),\n                PgMajorVersion::PG14,\n                &ctx,\n                Vec::new(), // in-memory layers\n                delta_layers,\n                image_layers,\n                Lsn(0x100),\n            )\n            .await\n            .unwrap();\n        let ctx = &ctx.with_scope_timeline(&timeline);\n\n        // Layer visibility is an input to heatmap generation, so refresh it first\n        timeline.update_layer_visibility().await.unwrap();\n\n        let heatmap = timeline\n            .generate_heatmap()\n            .await\n            .expect(\"Infallible while timeline is not shut down\");\n\n        assert_eq!(heatmap.timeline_id, timeline.timeline_id);\n\n        // L0 should come last\n        let heatmap_layers = heatmap.all_layers().collect::<Vec<_>>();\n        assert_eq!(heatmap_layers.last().unwrap().name, l0_delta.layer_name());\n\n        let mut last_lsn = Lsn::MAX;\n        for layer in heatmap_layers {\n            // Covered layer should be omitted\n            assert!(layer.name != covered_delta.layer_name());\n\n            let layer_lsn = match &layer.name {\n                LayerName::Delta(d) => d.lsn_range.end,\n                LayerName::Image(i) => i.lsn,\n            };\n\n            // Apart from L0s, newest Layers should come first\n            if !LayerMap::is_l0(layer.name.key_range(), layer.name.is_delta()) {\n                assert!(layer_lsn <= last_lsn);\n                last_lsn = layer_lsn;\n            }\n        }\n\n        // Evict all the layers and stash the old heatmap in the timeline.\n        // This simulates a migration to a cold secondary location.\n\n        let guard = timeline.layers.read(LayerManagerLockHolder::Testing).await;\n        let mut all_layers = Vec::new();\n        let forever = std::time::Duration::from_secs(120);\n        for layer in guard.likely_resident_layers() {\n            all_layers.push(layer.clone());\n            layer.evict_and_wait(forever).await.unwrap();\n        }\n        drop(guard);\n\n        timeline\n            .previous_heatmap\n            .store(Some(Arc::new(PreviousHeatmap::Active {\n                heatmap: heatmap.clone(),\n                read_at: std::time::Instant::now(),\n                end_lsn: None,\n            })));\n\n        // Generate a new heatmap and assert that it contains the same layers as the old one.\n        let post_migration_heatmap = timeline.generate_heatmap().await.unwrap();\n        assert_heatmaps_have_same_layers(&heatmap, &post_migration_heatmap);\n\n        // Download each layer one by one. Generate the heatmap at each step and check\n        // that it's stable.\n        for layer in all_layers {\n            if layer.visibility() == LayerVisibilityHint::Covered {\n                continue;\n            }\n\n            eprintln!(\"Downloading {layer} and re-generating heatmap\");\n\n            let ctx = &RequestContextBuilder::from(ctx)\n                .download_behavior(crate::context::DownloadBehavior::Download)\n                .attached_child();\n\n            let _resident = layer\n                .download_and_keep_resident(ctx)\n                .instrument(tracing::info_span!(\n                    parent: None,\n                    \"download_layer\",\n                    tenant_id = %timeline.tenant_shard_id.tenant_id,\n                    shard_id = %timeline.tenant_shard_id.shard_slug(),\n                    timeline_id = %timeline.timeline_id\n                ))\n                .await\n                .unwrap();\n\n            let post_download_heatmap = timeline.generate_heatmap().await.unwrap();\n            assert_heatmaps_have_same_layers(&heatmap, &post_download_heatmap);\n        }\n\n        // Everything from the post-migration heatmap is now resident.\n        // Check that we drop it from memory.\n        assert!(matches!(\n            timeline.previous_heatmap.load().as_deref(),\n            Some(PreviousHeatmap::Obsolete)\n        ));\n    }\n\n    #[tokio::test]\n    async fn test_previous_heatmap_obsoletion() {\n        let harness = TenantHarness::create(\"heatmap_previous_heatmap_obsoletion\")\n            .await\n            .unwrap();\n\n        let l0_delta = DeltaLayerTestDesc::new(\n            Lsn(0x20)..Lsn(0x30),\n            Key::from_hex(\"000000000000000000000000000000000000\").unwrap()\n                ..Key::from_hex(\"FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF\").unwrap(),\n            vec![(\n                Key::from_hex(\"720000000033333333444444445500000000\").unwrap(),\n                Lsn(0x25),\n                Value::Image(test_img(\"foo\")),\n            )],\n        );\n\n        let image_layer = (\n            Lsn(0x40),\n            vec![(\n                Key::from_hex(\"620000000033333333444444445500000000\").unwrap(),\n                test_img(\"bar\"),\n            )],\n        );\n\n        let delta_layers = vec![l0_delta];\n        let image_layers = vec![image_layer];\n\n        let (tenant, ctx) = harness.load().await;\n        let timeline = tenant\n            .create_test_timeline_with_layers(\n                TimelineId::generate(),\n                Lsn(0x10),\n                PgMajorVersion::PG14,\n                &ctx,\n                Vec::new(), // in-memory layers\n                delta_layers,\n                image_layers,\n                Lsn(0x100),\n            )\n            .await\n            .unwrap();\n\n        // Layer visibility is an input to heatmap generation, so refresh it first\n        timeline.update_layer_visibility().await.unwrap();\n\n        let heatmap = timeline\n            .generate_heatmap()\n            .await\n            .expect(\"Infallible while timeline is not shut down\");\n\n        // Both layers should be in the heatmap\n        assert!(heatmap.all_layers().count() > 0);\n\n        // Now simulate a migration.\n        timeline\n            .previous_heatmap\n            .store(Some(Arc::new(PreviousHeatmap::Active {\n                heatmap: heatmap.clone(),\n                read_at: std::time::Instant::now(),\n                end_lsn: None,\n            })));\n\n        // Evict all the layers in the previous heatmap\n        let guard = timeline.layers.read(LayerManagerLockHolder::Testing).await;\n        let forever = std::time::Duration::from_secs(120);\n        for layer in guard.likely_resident_layers() {\n            layer.evict_and_wait(forever).await.unwrap();\n        }\n        drop(guard);\n\n        // Generate a new heatmap and check that the previous heatmap\n        // has been marked obsolete.\n        let post_eviction_heatmap = timeline\n            .generate_heatmap()\n            .await\n            .expect(\"Infallible while timeline is not shut down\");\n\n        assert_eq!(post_eviction_heatmap.all_layers().count(), 0);\n        assert!(matches!(\n            timeline.previous_heatmap.load().as_deref(),\n            Some(PreviousHeatmap::Obsolete)\n        ));\n    }\n\n    #[tokio::test]\n    async fn two_layer_eviction_attempts_at_the_same_time() {\n        let harness = TenantHarness::create(\"two_layer_eviction_attempts_at_the_same_time\")\n            .await\n            .unwrap();\n\n        let (tenant, ctx) = harness.load().await;\n        let timeline = tenant\n            .create_test_timeline(\n                TimelineId::generate(),\n                Lsn(0x10),\n                PgMajorVersion::PG14,\n                &ctx,\n            )\n            .await\n            .unwrap();\n\n        let layer = find_some_layer(&timeline).await;\n        let layer = layer\n            .keep_resident()\n            .await\n            .expect(\"no download => no downloading errors\")\n            .drop_eviction_guard();\n\n        let forever = std::time::Duration::from_secs(120);\n\n        let first = layer.evict_and_wait(forever);\n        let second = layer.evict_and_wait(forever);\n\n        let (first, second) = tokio::join!(first, second);\n\n        let res = layer.keep_resident().await;\n        assert!(res.is_none(), \"{res:?}\");\n\n        match (first, second) {\n            (Ok(()), Ok(())) => {\n                // because there are no more timeline locks being taken on eviction path, we can\n                // witness all three outcomes here.\n            }\n            (Ok(()), Err(EvictionError::NotFound)) | (Err(EvictionError::NotFound), Ok(())) => {\n                // if one completes before the other, this is fine just as well.\n            }\n            other => unreachable!(\"unexpected {:?}\", other),\n        }\n    }\n\n    async fn find_some_layer(timeline: &Timeline) -> Layer {\n        let layers = timeline\n            .layers\n            .read(LayerManagerLockHolder::GetLayerMapInfo)\n            .await;\n        let desc = layers\n            .layer_map()\n            .unwrap()\n            .iter_historic_layers()\n            .next()\n            .expect(\"must find one layer to evict\");\n\n        layers.get_from_desc(&desc)\n    }\n}\n"
  },
  {
    "path": "pageserver/src/tenant/upload_queue.rs",
    "content": "use std::collections::{HashMap, HashSet, VecDeque};\nuse std::fmt::Debug;\nuse std::sync::Arc;\nuse std::sync::atomic::AtomicU32;\n\nuse chrono::NaiveDateTime;\nuse once_cell::sync::Lazy;\nuse tracing::info;\nuse utils::generation::Generation;\nuse utils::lsn::{AtomicLsn, Lsn};\n\nuse super::remote_timeline_client::is_same_remote_layer_path;\nuse super::storage_layer::{AsLayerDesc as _, LayerName, ResidentLayer};\nuse crate::tenant::metadata::TimelineMetadata;\nuse crate::tenant::remote_timeline_client::index::{IndexPart, LayerFileMetadata};\n\n/// Kill switch for upload queue reordering in case it causes problems.\n/// TODO: remove this once we have confidence in it.\nstatic DISABLE_UPLOAD_QUEUE_REORDERING: Lazy<bool> =\n    Lazy::new(|| std::env::var(\"DISABLE_UPLOAD_QUEUE_REORDERING\").as_deref() == Ok(\"true\"));\n\n/// Kill switch for index upload coalescing in case it causes problems.\n/// TODO: remove this once we have confidence in it.\nstatic DISABLE_UPLOAD_QUEUE_INDEX_COALESCING: Lazy<bool> =\n    Lazy::new(|| std::env::var(\"DISABLE_UPLOAD_QUEUE_INDEX_COALESCING\").as_deref() == Ok(\"true\"));\n\n// clippy warns that Uninitialized is much smaller than Initialized, which wastes\n// memory for Uninitialized variants. Doesn't matter in practice, there are not\n// that many upload queues in a running pageserver, and most of them are initialized\n// anyway.\n#[allow(clippy::large_enum_variant)]\npub enum UploadQueue {\n    Uninitialized,\n    Initialized(UploadQueueInitialized),\n    Stopped(UploadQueueStopped),\n}\n\nimpl UploadQueue {\n    pub fn as_str(&self) -> &'static str {\n        match self {\n            UploadQueue::Uninitialized => \"Uninitialized\",\n            UploadQueue::Initialized(_) => \"Initialized\",\n            UploadQueue::Stopped(_) => \"Stopped\",\n        }\n    }\n}\n\n#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]\npub enum OpType {\n    MayReorder,\n    FlushDeletion,\n}\n\n/// This keeps track of queued and in-progress tasks.\npub struct UploadQueueInitialized {\n    /// Maximum number of inprogress tasks to schedule. 0 is no limit.\n    pub(crate) inprogress_limit: usize,\n\n    /// Counter to assign task IDs\n    pub(crate) task_counter: u64,\n\n    /// The next uploaded index_part.json; assumed to be dirty.\n    ///\n    /// Should not be read, directly except for layer file updates. Instead you should add a\n    /// projected field.\n    pub(crate) dirty: IndexPart,\n\n    /// The latest remote persisted IndexPart.\n    ///\n    /// Each completed metadata upload will update this. The second item is the task_id which last\n    /// updated the value, used to ensure we never store an older value over a newer one.\n    pub(crate) clean: (IndexPart, Option<u64>),\n\n    /// How many file uploads or deletions been scheduled, since the\n    /// last (scheduling of) metadata index upload?\n    pub(crate) latest_files_changes_since_metadata_upload_scheduled: u64,\n\n    /// The Lsn is only updated after our generation has been validated with\n    /// the control plane (unlesss a timeline's generation is None, in which case\n    /// we skip validation)\n    pub(crate) visible_remote_consistent_lsn: Arc<AtomicLsn>,\n\n    /// Tasks that are currently in-progress. In-progress means that a tokio Task\n    /// has been launched for it. An in-progress task can be busy uploading, but it can\n    /// also be waiting on the `concurrency_limiter` Semaphore in S3Bucket, or it can\n    /// be waiting for retry in `exponential_backoff`.\n    pub inprogress_tasks: HashMap<u64, Arc<UploadTask>>,\n\n    /// Queued operations that have not been launched yet. They might depend on previous\n    /// tasks to finish. For example, metadata upload cannot be performed before all\n    /// preceding layer file uploads have completed.\n    pub queued_operations: VecDeque<UploadOp>,\n\n    /// Files which have been unlinked but not yet had scheduled a deletion for. Only kept around\n    /// for error logging.\n    ///\n    /// Putting this behind a testing feature to catch problems in tests, but assuming we could have a\n    /// bug causing leaks, then it's better to not leave this enabled for production builds.\n    #[cfg(feature = \"testing\")]\n    pub(crate) dangling_files: HashMap<LayerName, Generation>,\n\n    /// Ensure we order file operations correctly.\n    pub(crate) recently_deleted: HashSet<(LayerName, Generation)>,\n\n    /// Deletions that are blocked by the tenant configuration\n    pub(crate) blocked_deletions: Vec<Delete>,\n\n    /// Set to true when we have inserted the `UploadOp::Shutdown` into the `inprogress_tasks`.\n    pub(crate) shutting_down: bool,\n\n    /// Permitless semaphore on which any number of `RemoteTimelineClient::shutdown` futures can\n    /// wait on until one of them stops the queue. The semaphore is closed when\n    /// `RemoteTimelineClient::launch_queued_tasks` encounters `UploadOp::Shutdown`.\n    pub(crate) shutdown_ready: Arc<tokio::sync::Semaphore>,\n}\n\nimpl UploadQueueInitialized {\n    pub(super) fn no_pending_work(&self) -> bool {\n        self.inprogress_tasks.is_empty() && self.queued_operations.is_empty()\n    }\n\n    pub(super) fn get_last_remote_consistent_lsn_visible(&self) -> Lsn {\n        self.visible_remote_consistent_lsn.load()\n    }\n\n    pub(super) fn get_last_remote_consistent_lsn_projected(&self) -> Option<Lsn> {\n        let lsn = self.clean.0.metadata.disk_consistent_lsn();\n        self.clean.1.map(|_| lsn)\n    }\n\n    /// Returns and removes the next ready operation from the queue, if any. This isn't necessarily\n    /// the first operation in the queue, to avoid head-of-line blocking -- an operation can jump\n    /// the queue if it doesn't conflict with operations ahead of it.\n    ///\n    /// Also returns any operations that were coalesced into this one, e.g. multiple index uploads.\n    ///\n    /// None may be returned even if the queue isn't empty, if no operations are ready yet.\n    ///\n    /// NB: this is quadratic, but queues are expected to be small, and bounded by inprogress_limit.\n    pub fn next_ready(&mut self) -> Option<(UploadOp, Vec<UploadOp>)> {\n        // If inprogress_tasks is already at limit, don't schedule anything more.\n        if self.inprogress_limit > 0 && self.inprogress_tasks.len() >= self.inprogress_limit {\n            return None;\n        }\n\n        for (i, candidate) in self.queued_operations.iter().enumerate() {\n            // If this candidate is ready, go for it. Otherwise, try the next one.\n            if self.is_ready(i) {\n                // Shutdown operations are left at the head of the queue, to prevent further\n                // operations from starting. Signal that we're ready to shut down.\n                if matches!(candidate, UploadOp::Shutdown) {\n                    assert!(self.inprogress_tasks.is_empty(), \"shutdown with tasks\");\n                    assert_eq!(i, 0, \"shutdown not at head of queue\");\n                    self.shutdown_ready.close();\n                    return None;\n                }\n\n                let mut op = self.queued_operations.remove(i).expect(\"i can't disappear\");\n\n                // Coalesce any back-to-back index uploads by only uploading the newest one that's\n                // ready. This typically happens with layer/index/layer/index/... sequences, where\n                // the layers bypass the indexes, leaving the indexes queued.\n                //\n                // If other operations are interleaved between index uploads we don't try to\n                // coalesce them, since we may as well update the index concurrently with them.\n                // This keeps the index fresh and avoids starvation.\n                //\n                // NB: we assume that all uploaded indexes have the same remote path. This\n                // is true at the time of writing: the path only depends on the tenant,\n                // timeline and generation, all of which are static for a timeline instance.\n                // Otherwise, we must be careful not to coalesce different paths.\n                let mut coalesced_ops = Vec::new();\n                if matches!(op, UploadOp::UploadMetadata { .. }) {\n                    while let Some(UploadOp::UploadMetadata { .. }) = self.queued_operations.get(i)\n                    {\n                        if *DISABLE_UPLOAD_QUEUE_INDEX_COALESCING {\n                            break;\n                        }\n                        if !self.is_ready(i) {\n                            break;\n                        }\n                        coalesced_ops.push(op);\n                        op = self.queued_operations.remove(i).expect(\"i can't disappear\");\n                    }\n                }\n\n                return Some((op, coalesced_ops));\n            }\n\n            // Nothing can bypass a barrier or shutdown. If it wasn't scheduled above, give up.\n            if matches!(candidate, UploadOp::Barrier(_) | UploadOp::Shutdown) {\n                return None;\n            }\n\n            // If upload queue reordering is disabled, bail out after the first operation.\n            if *DISABLE_UPLOAD_QUEUE_REORDERING {\n                return None;\n            }\n        }\n        None\n    }\n\n    /// Returns true if the queued operation at the given position is ready to be uploaded, i.e. if\n    /// it doesn't conflict with any in-progress or queued operations ahead of it. Operations are\n    /// allowed to skip the queue when it's safe to do so, to increase parallelism.\n    ///\n    /// The position must be valid for the queue size.\n    fn is_ready(&self, pos: usize) -> bool {\n        let candidate = self.queued_operations.get(pos).expect(\"invalid position\");\n        self\n            // Look at in-progress operations, in random order.\n            .inprogress_tasks\n            .values()\n            .map(|task| &task.op)\n            // Then queued operations ahead of the candidate, front-to-back.\n            .chain(self.queued_operations.iter().take(pos))\n            // Keep track of the active index ahead of each operation. This is used to ensure that\n            // an upload doesn't skip the queue too far, such that it modifies a layer that's\n            // referenced by an active index.\n            //\n            // It's okay that in-progress operations are emitted in random order above, since at\n            // most one of them can be an index upload (enforced by can_bypass).\n            .scan(&self.clean.0, |next_active_index, op| {\n                let active_index = *next_active_index;\n                if let UploadOp::UploadMetadata { uploaded } = op {\n                    *next_active_index = uploaded; // stash index for next operation after this\n                }\n                Some((op, active_index))\n            })\n            // Check if the candidate can bypass all of them.\n            .all(|(op, active_index)| candidate.can_bypass(op, active_index))\n    }\n\n    /// Returns the number of in-progress deletion operations.\n    #[cfg(test)]\n    pub(crate) fn num_inprogress_deletions(&self) -> usize {\n        self.inprogress_tasks\n            .iter()\n            .filter(|(_, t)| matches!(t.op, UploadOp::Delete(_)))\n            .count()\n    }\n\n    /// Returns the number of in-progress layer uploads.\n    #[cfg(test)]\n    pub(crate) fn num_inprogress_layer_uploads(&self) -> usize {\n        self.inprogress_tasks\n            .iter()\n            .filter(|(_, t)| matches!(t.op, UploadOp::UploadLayer(_, _, _)))\n            .count()\n    }\n\n    /// Test helper that schedules all ready operations into inprogress_tasks, and returns\n    /// references to them.\n    ///\n    /// TODO: the corresponding production logic should be moved from RemoteTimelineClient into\n    /// UploadQueue, so we can use the same code path.\n    #[cfg(test)]\n    fn schedule_ready(&mut self) -> Vec<Arc<UploadTask>> {\n        let mut tasks = Vec::new();\n        // NB: schedule operations one by one, to handle conflicts with inprogress_tasks.\n        while let Some((op, coalesced_ops)) = self.next_ready() {\n            self.task_counter += 1;\n            let task = Arc::new(UploadTask {\n                task_id: self.task_counter,\n                op,\n                coalesced_ops,\n                retries: 0.into(),\n            });\n            self.inprogress_tasks.insert(task.task_id, task.clone());\n            tasks.push(task);\n        }\n        tasks\n    }\n\n    /// Test helper that marks an operation as completed, removing it from inprogress_tasks.\n    ///\n    /// TODO: the corresponding production logic should be moved from RemoteTimelineClient into\n    /// UploadQueue, so we can use the same code path.\n    #[cfg(test)]\n    fn complete(&mut self, task_id: u64) {\n        let Some(task) = self.inprogress_tasks.remove(&task_id) else {\n            return;\n        };\n        // Update the clean index on uploads.\n        if let UploadOp::UploadMetadata { ref uploaded } = task.op {\n            if task.task_id > self.clean.1.unwrap_or_default() {\n                self.clean = (*uploaded.clone(), Some(task.task_id));\n            }\n        }\n    }\n}\n\n#[derive(Clone, Copy)]\npub(super) enum SetDeletedFlagProgress {\n    NotRunning,\n    InProgress(NaiveDateTime),\n    Successful(NaiveDateTime),\n}\n\npub struct UploadQueueStoppedDeletable {\n    pub(super) upload_queue_for_deletion: UploadQueueInitialized,\n    pub(super) deleted_at: SetDeletedFlagProgress,\n}\n\n#[allow(clippy::large_enum_variant, reason = \"TODO\")]\npub enum UploadQueueStopped {\n    Deletable(UploadQueueStoppedDeletable),\n    Uninitialized,\n}\n\n#[derive(thiserror::Error, Debug)]\npub enum NotInitialized {\n    #[error(\"queue is in state Uninitialized\")]\n    Uninitialized,\n    #[error(\"queue is in state Stopped\")]\n    Stopped,\n    #[error(\"queue is shutting down\")]\n    ShuttingDown,\n}\n\nimpl NotInitialized {\n    pub(crate) fn is_stopping(&self) -> bool {\n        use NotInitialized::*;\n        match self {\n            Uninitialized => false,\n            Stopped => true,\n            ShuttingDown => true,\n        }\n    }\n}\n\nimpl UploadQueue {\n    pub fn initialize_empty_remote(\n        &mut self,\n        metadata: &TimelineMetadata,\n        inprogress_limit: usize,\n    ) -> anyhow::Result<&mut UploadQueueInitialized> {\n        match self {\n            UploadQueue::Uninitialized => (),\n            UploadQueue::Initialized(_) | UploadQueue::Stopped(_) => {\n                anyhow::bail!(\"already initialized, state {}\", self.as_str())\n            }\n        }\n\n        info!(\"initializing upload queue for empty remote\");\n\n        let index_part = IndexPart::empty(metadata.clone());\n\n        let state = UploadQueueInitialized {\n            inprogress_limit,\n            dirty: index_part.clone(),\n            clean: (index_part, None),\n            latest_files_changes_since_metadata_upload_scheduled: 0,\n            visible_remote_consistent_lsn: Arc::new(AtomicLsn::new(0)),\n            // what follows are boring default initializations\n            task_counter: 0,\n            inprogress_tasks: HashMap::new(),\n            queued_operations: VecDeque::new(),\n            #[cfg(feature = \"testing\")]\n            dangling_files: HashMap::new(),\n            recently_deleted: HashSet::new(),\n            blocked_deletions: Vec::new(),\n            shutting_down: false,\n            shutdown_ready: Arc::new(tokio::sync::Semaphore::new(0)),\n        };\n\n        *self = UploadQueue::Initialized(state);\n        Ok(self.initialized_mut().expect(\"we just set it\"))\n    }\n\n    pub fn initialize_with_current_remote_index_part(\n        &mut self,\n        index_part: &IndexPart,\n        inprogress_limit: usize,\n    ) -> anyhow::Result<&mut UploadQueueInitialized> {\n        match self {\n            UploadQueue::Uninitialized => (),\n            UploadQueue::Initialized(_) | UploadQueue::Stopped(_) => {\n                anyhow::bail!(\"already initialized, state {}\", self.as_str())\n            }\n        }\n\n        info!(\n            \"initializing upload queue with remote index_part.disk_consistent_lsn: {}\",\n            index_part.metadata.disk_consistent_lsn()\n        );\n\n        let state = UploadQueueInitialized {\n            inprogress_limit,\n            dirty: index_part.clone(),\n            clean: (index_part.clone(), None),\n            latest_files_changes_since_metadata_upload_scheduled: 0,\n            visible_remote_consistent_lsn: Arc::new(\n                index_part.metadata.disk_consistent_lsn().into(),\n            ),\n            // what follows are boring default initializations\n            task_counter: 0,\n            inprogress_tasks: HashMap::new(),\n            queued_operations: VecDeque::new(),\n            #[cfg(feature = \"testing\")]\n            dangling_files: HashMap::new(),\n            recently_deleted: HashSet::new(),\n            blocked_deletions: Vec::new(),\n            shutting_down: false,\n            shutdown_ready: Arc::new(tokio::sync::Semaphore::new(0)),\n        };\n\n        *self = UploadQueue::Initialized(state);\n        Ok(self.initialized_mut().expect(\"we just set it\"))\n    }\n\n    pub fn initialized_mut(&mut self) -> Result<&mut UploadQueueInitialized, NotInitialized> {\n        use UploadQueue::*;\n        match self {\n            Uninitialized => Err(NotInitialized::Uninitialized),\n            Initialized(x) => {\n                if x.shutting_down {\n                    Err(NotInitialized::ShuttingDown)\n                } else {\n                    Ok(x)\n                }\n            }\n            Stopped(_) => Err(NotInitialized::Stopped),\n        }\n    }\n\n    pub(crate) fn stopped_mut(&mut self) -> anyhow::Result<&mut UploadQueueStoppedDeletable> {\n        match self {\n            UploadQueue::Initialized(_) | UploadQueue::Uninitialized => {\n                anyhow::bail!(\"queue is in state {}\", self.as_str())\n            }\n            UploadQueue::Stopped(UploadQueueStopped::Uninitialized) => {\n                anyhow::bail!(\"queue is in state Stopped(Uninitialized)\")\n            }\n            UploadQueue::Stopped(UploadQueueStopped::Deletable(deletable)) => Ok(deletable),\n        }\n    }\n}\n\n/// An in-progress upload or delete task.\n#[derive(Debug)]\npub struct UploadTask {\n    /// Unique ID of this task. Used as the key in `inprogress_tasks` above.\n    pub task_id: u64,\n    /// Number of task retries.\n    pub retries: AtomicU32,\n    /// The upload operation.\n    pub op: UploadOp,\n    /// Any upload operations that were coalesced into this operation. This typically happens with\n    /// back-to-back index uploads, see `UploadQueueInitialized::next_ready()`.\n    pub coalesced_ops: Vec<UploadOp>,\n}\n\n/// A deletion of some layers within the lifetime of a timeline.  This is not used\n/// for timeline deletion, which skips this queue and goes directly to DeletionQueue.\n#[derive(Debug, Clone)]\npub struct Delete {\n    pub layers: Vec<(LayerName, LayerFileMetadata)>,\n}\n\n#[derive(Clone, Debug)]\npub enum UploadOp {\n    /// Upload a layer file. The last field indicates the last operation for thie file.\n    UploadLayer(ResidentLayer, LayerFileMetadata, Option<OpType>),\n\n    /// Upload a index_part.json file\n    UploadMetadata {\n        /// The next [`UploadQueueInitialized::clean`] after this upload succeeds.\n        uploaded: Box<IndexPart>,\n    },\n\n    /// Delete layer files\n    Delete(Delete),\n\n    /// Barrier. When the barrier operation is reached, the channel is closed.\n    Barrier(tokio::sync::watch::Sender<()>),\n\n    /// Shutdown; upon encountering this operation no new operations will be spawned, otherwise\n    /// this is the same as a Barrier.\n    Shutdown,\n}\n\nimpl std::fmt::Display for UploadOp {\n    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {\n        match self {\n            UploadOp::UploadLayer(layer, metadata, mode) => {\n                write!(\n                    f,\n                    \"UploadLayer({}, size={:?}, gen={:?}, mode={:?})\",\n                    layer, metadata.file_size, metadata.generation, mode\n                )\n            }\n            UploadOp::UploadMetadata { uploaded, .. } => {\n                write!(\n                    f,\n                    \"UploadMetadata(lsn: {})\",\n                    uploaded.metadata.disk_consistent_lsn()\n                )\n            }\n            UploadOp::Delete(delete) => {\n                write!(f, \"Delete({} layers)\", delete.layers.len())\n            }\n            UploadOp::Barrier(_) => write!(f, \"Barrier\"),\n            UploadOp::Shutdown => write!(f, \"Shutdown\"),\n        }\n    }\n}\n\nimpl UploadOp {\n    /// Returns true if self can bypass other, i.e. if the operations don't conflict. index is the\n    /// active index when other would be uploaded -- if we allow self to bypass other, this would\n    /// be the active index when self is uploaded.\n    pub fn can_bypass(&self, other: &UploadOp, index: &IndexPart) -> bool {\n        match (self, other) {\n            // Nothing can bypass a barrier or shutdown, and it can't bypass anything.\n            (UploadOp::Barrier(_), _) | (_, UploadOp::Barrier(_)) => false,\n            (UploadOp::Shutdown, _) | (_, UploadOp::Shutdown) => false,\n\n            // Uploads and deletes can bypass each other unless they're for the same file.\n            (UploadOp::UploadLayer(a, ameta, _), UploadOp::UploadLayer(b, bmeta, _)) => {\n                let aname = &a.layer_desc().layer_name();\n                let bname = &b.layer_desc().layer_name();\n                !is_same_remote_layer_path(aname, ameta, bname, bmeta)\n            }\n            (UploadOp::UploadLayer(u, umeta, _), UploadOp::Delete(d))\n            | (UploadOp::Delete(d), UploadOp::UploadLayer(u, umeta, _)) => {\n                d.layers.iter().all(|(dname, dmeta)| {\n                    !is_same_remote_layer_path(&u.layer_desc().layer_name(), umeta, dname, dmeta)\n                })\n            }\n\n            // Deletes are idempotent and can always bypass each other.\n            (UploadOp::Delete(_), UploadOp::Delete(_)) => true,\n\n            // Uploads and deletes can bypass an index upload as long as neither the uploaded index\n            // nor the active index below it references the file. A layer can't be modified or\n            // deleted while referenced by an index.\n            //\n            // Similarly, index uploads can bypass uploads and deletes as long as neither the\n            // uploaded index nor the active index references the file (the latter would be\n            // incorrect use by the caller).\n            (UploadOp::UploadLayer(u, umeta, _), UploadOp::UploadMetadata { uploaded: i })\n            | (UploadOp::UploadMetadata { uploaded: i }, UploadOp::UploadLayer(u, umeta, _)) => {\n                let uname = u.layer_desc().layer_name();\n                !i.references(&uname, umeta) && !index.references(&uname, umeta)\n            }\n            (UploadOp::Delete(d), UploadOp::UploadMetadata { uploaded: i })\n            | (UploadOp::UploadMetadata { uploaded: i }, UploadOp::Delete(d)) => {\n                d.layers.iter().all(|(dname, dmeta)| {\n                    !i.references(dname, dmeta) && !index.references(dname, dmeta)\n                })\n            }\n\n            // Indexes can never bypass each other. They can coalesce though, and\n            // `UploadQueue::next_ready()` currently does this when possible.\n            (UploadOp::UploadMetadata { .. }, UploadOp::UploadMetadata { .. }) => false,\n        }\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use std::str::FromStr as _;\n\n    use itertools::Itertools as _;\n    use utils::shard::{ShardCount, ShardIndex, ShardNumber};\n\n    use super::*;\n    use crate::DEFAULT_PG_VERSION;\n    use crate::tenant::Timeline;\n    use crate::tenant::harness::{TIMELINE_ID, TenantHarness};\n    use crate::tenant::storage_layer::Layer;\n    use crate::tenant::storage_layer::layer::local_layer_path;\n\n    /// Test helper which asserts that two operations are the same, in lieu of UploadOp PartialEq.\n    #[track_caller]\n    fn assert_same_op(a: &UploadOp, b: &UploadOp) {\n        use UploadOp::*;\n        match (a, b) {\n            (UploadLayer(a, ameta, atype), UploadLayer(b, bmeta, btype)) => {\n                assert_eq!(a.layer_desc().layer_name(), b.layer_desc().layer_name());\n                assert_eq!(ameta, bmeta);\n                assert_eq!(atype, btype);\n            }\n            (Delete(a), Delete(b)) => assert_eq!(a.layers, b.layers),\n            (UploadMetadata { uploaded: a }, UploadMetadata { uploaded: b }) => assert_eq!(a, b),\n            (Barrier(_), Barrier(_)) => {}\n            (Shutdown, Shutdown) => {}\n            (a, b) => panic!(\"{a:?} != {b:?}\"),\n        }\n    }\n\n    /// Test helper which asserts that two sets of operations are the same.\n    #[track_caller]\n    fn assert_same_ops<'a>(\n        a: impl IntoIterator<Item = &'a UploadOp>,\n        b: impl IntoIterator<Item = &'a UploadOp>,\n    ) {\n        a.into_iter()\n            .zip_eq(b)\n            .for_each(|(a, b)| assert_same_op(a, b))\n    }\n\n    /// Test helper to construct a test timeline.\n    ///\n    /// TODO: it really shouldn't be necessary to construct an entire tenant and timeline just to\n    /// test the upload queue -- decouple ResidentLayer from Timeline.\n    ///\n    /// TODO: the upload queue uses TimelineMetadata::example() instead, because there's no way to\n    /// obtain a TimelineMetadata from a Timeline.\n    fn make_timeline() -> Arc<Timeline> {\n        // Grab the current test name from the current thread name.\n        // TODO: TenantHarness shouldn't take a &'static str, but just leak the test name for now.\n        let test_name = std::thread::current().name().unwrap().to_string();\n        let test_name = Box::leak(test_name.into_boxed_str());\n\n        let runtime = tokio::runtime::Builder::new_current_thread()\n            .enable_all()\n            .build()\n            .expect(\"failed to create runtime\");\n\n        runtime\n            .block_on(async {\n                let harness = TenantHarness::create(test_name).await?;\n                let (tenant, ctx) = harness.load().await;\n                tenant\n                    .create_test_timeline(TIMELINE_ID, Lsn(8), DEFAULT_PG_VERSION, &ctx)\n                    .await\n            })\n            .expect(\"failed to create timeline\")\n    }\n\n    /// Test helper to construct an (empty) resident layer.\n    fn make_layer(timeline: &Arc<Timeline>, name: &str) -> ResidentLayer {\n        make_layer_with_size(timeline, name, 0)\n    }\n\n    /// Test helper to construct a resident layer with the given size.\n    fn make_layer_with_size(timeline: &Arc<Timeline>, name: &str, size: usize) -> ResidentLayer {\n        let metadata = LayerFileMetadata {\n            generation: timeline.generation,\n            shard: timeline.get_shard_index(),\n            file_size: size as u64,\n        };\n        make_layer_with_metadata(timeline, name, metadata)\n    }\n\n    /// Test helper to construct a layer with the given metadata.\n    fn make_layer_with_metadata(\n        timeline: &Arc<Timeline>,\n        name: &str,\n        metadata: LayerFileMetadata,\n    ) -> ResidentLayer {\n        let name = LayerName::from_str(name).expect(\"invalid name\");\n        let local_path = local_layer_path(\n            timeline.conf,\n            &timeline.tenant_shard_id,\n            &timeline.timeline_id,\n            &name,\n            &metadata.generation,\n        );\n        std::fs::write(&local_path, vec![0; metadata.file_size as usize])\n            .expect(\"failed to write file\");\n        Layer::for_resident(timeline.conf, timeline, local_path, name, metadata)\n    }\n\n    /// Test helper to add a layer to an index and return a new index.\n    fn index_with(index: &IndexPart, layer: &ResidentLayer) -> Box<IndexPart> {\n        let mut index = index.clone();\n        index\n            .layer_metadata\n            .insert(layer.layer_desc().layer_name(), layer.metadata());\n        Box::new(index)\n    }\n\n    /// Test helper to remove a layer from an index and return a new index.\n    fn index_without(index: &IndexPart, layer: &ResidentLayer) -> Box<IndexPart> {\n        let mut index = index.clone();\n        index\n            .layer_metadata\n            .remove(&layer.layer_desc().layer_name());\n        Box::new(index)\n    }\n\n    /// Nothing can bypass a barrier, and it can't bypass inprogress tasks.\n    #[test]\n    fn schedule_barrier() -> anyhow::Result<()> {\n        let mut queue = UploadQueue::Uninitialized;\n        let queue = queue.initialize_empty_remote(&TimelineMetadata::example(), 0)?;\n        let tli = make_timeline();\n\n        let index = Box::new(queue.clean.0.clone()); // empty, doesn't matter\n        let layer0 = make_layer(\n            &tli,\n            \"000000000000000000000000000000000000-100000000000000000000000000000000000__00000000016B59D8-00000000016B5A51\",\n        );\n        let layer1 = make_layer(\n            &tli,\n            \"100000000000000000000000000000000000-200000000000000000000000000000000000__00000000016B59D8-00000000016B5A51\",\n        );\n        let layer2 = make_layer(\n            &tli,\n            \"200000000000000000000000000000000000-300000000000000000000000000000000000__00000000016B59D8-00000000016B5A51\",\n        );\n        let layer3 = make_layer(\n            &tli,\n            \"300000000000000000000000000000000000-400000000000000000000000000000000000__00000000016B59D8-00000000016B5A51\",\n        );\n        let (barrier, _) = tokio::sync::watch::channel(());\n\n        // Enqueue non-conflicting upload, delete, and index before and after a barrier.\n        let ops = [\n            UploadOp::UploadLayer(layer0.clone(), layer0.metadata(), None),\n            UploadOp::Delete(Delete {\n                layers: vec![(layer1.layer_desc().layer_name(), layer1.metadata())],\n            }),\n            UploadOp::UploadMetadata {\n                uploaded: index.clone(),\n            },\n            UploadOp::Barrier(barrier),\n            UploadOp::UploadLayer(layer2.clone(), layer2.metadata(), None),\n            UploadOp::Delete(Delete {\n                layers: vec![(layer3.layer_desc().layer_name(), layer3.metadata())],\n            }),\n            UploadOp::UploadMetadata {\n                uploaded: index.clone(),\n            },\n        ];\n\n        queue.queued_operations.extend(ops.clone());\n\n        // Schedule the initial operations ahead of the barrier.\n        let tasks = queue.schedule_ready();\n\n        assert_same_ops(tasks.iter().map(|t| &t.op), &ops[0..3]);\n        assert!(matches!(\n            queue.queued_operations.front(),\n            Some(&UploadOp::Barrier(_))\n        ));\n\n        // Complete the initial operations. The barrier isn't scheduled while they're pending.\n        for task in tasks {\n            assert!(queue.schedule_ready().is_empty());\n            queue.complete(task.task_id);\n        }\n\n        // Schedule the barrier. The later tasks won't schedule until it completes.\n        let tasks = queue.schedule_ready();\n\n        assert_eq!(tasks.len(), 1);\n        assert!(matches!(tasks[0].op, UploadOp::Barrier(_)));\n        assert_eq!(queue.queued_operations.len(), 3);\n\n        // Complete the barrier. The rest of the tasks schedule immediately.\n        queue.complete(tasks[0].task_id);\n\n        let tasks = queue.schedule_ready();\n        assert_same_ops(tasks.iter().map(|t| &t.op), &ops[4..]);\n        assert!(queue.queued_operations.is_empty());\n\n        Ok(())\n    }\n\n    /// Deletes can be scheduled in parallel, even if they're for the same file.\n    #[test]\n    fn schedule_delete_parallel() -> anyhow::Result<()> {\n        let mut queue = UploadQueue::Uninitialized;\n        let queue = queue.initialize_empty_remote(&TimelineMetadata::example(), 0)?;\n        let tli = make_timeline();\n\n        // Enqueue a bunch of deletes, some with conflicting names.\n        let layer0 = make_layer(\n            &tli,\n            \"000000000000000000000000000000000000-100000000000000000000000000000000000__00000000016B59D8-00000000016B5A51\",\n        );\n        let layer1 = make_layer(\n            &tli,\n            \"100000000000000000000000000000000000-200000000000000000000000000000000000__00000000016B59D8-00000000016B5A51\",\n        );\n        let layer2 = make_layer(\n            &tli,\n            \"200000000000000000000000000000000000-300000000000000000000000000000000000__00000000016B59D8-00000000016B5A51\",\n        );\n        let layer3 = make_layer(\n            &tli,\n            \"300000000000000000000000000000000000-400000000000000000000000000000000000__00000000016B59D8-00000000016B5A51\",\n        );\n\n        let ops = [\n            UploadOp::Delete(Delete {\n                layers: vec![(layer0.layer_desc().layer_name(), layer0.metadata())],\n            }),\n            UploadOp::Delete(Delete {\n                layers: vec![(layer1.layer_desc().layer_name(), layer1.metadata())],\n            }),\n            UploadOp::Delete(Delete {\n                layers: vec![\n                    (layer1.layer_desc().layer_name(), layer1.metadata()),\n                    (layer2.layer_desc().layer_name(), layer2.metadata()),\n                ],\n            }),\n            UploadOp::Delete(Delete {\n                layers: vec![(layer2.layer_desc().layer_name(), layer2.metadata())],\n            }),\n            UploadOp::Delete(Delete {\n                layers: vec![(layer3.layer_desc().layer_name(), layer3.metadata())],\n            }),\n        ];\n\n        queue.queued_operations.extend(ops.clone());\n\n        // Schedule all ready operations. Since deletes don't conflict, they're all scheduled.\n        let tasks = queue.schedule_ready();\n\n        assert_same_ops(tasks.iter().map(|t| &t.op), &ops);\n        assert!(queue.queued_operations.is_empty());\n\n        Ok(())\n    }\n\n    /// Conflicting uploads are serialized.\n    #[test]\n    fn schedule_upload_conflicts() -> anyhow::Result<()> {\n        let mut queue = UploadQueue::Uninitialized;\n        let queue = queue.initialize_with_current_remote_index_part(&IndexPart::example(), 0)?;\n        let tli = make_timeline();\n\n        // Enqueue three versions of the same layer, with different file sizes.\n        let layer0a = make_layer_with_size(\n            &tli,\n            \"000000000000000000000000000000000000-100000000000000000000000000000000000__00000000016B59D8-00000000016B5A51\",\n            1,\n        );\n        let layer0b = make_layer_with_size(\n            &tli,\n            \"000000000000000000000000000000000000-100000000000000000000000000000000000__00000000016B59D8-00000000016B5A51\",\n            2,\n        );\n        let layer0c = make_layer_with_size(\n            &tli,\n            \"000000000000000000000000000000000000-100000000000000000000000000000000000__00000000016B59D8-00000000016B5A51\",\n            3,\n        );\n\n        let ops = [\n            UploadOp::UploadLayer(layer0a.clone(), layer0a.metadata(), None),\n            UploadOp::UploadLayer(layer0b.clone(), layer0b.metadata(), None),\n            UploadOp::UploadLayer(layer0c.clone(), layer0c.metadata(), None),\n        ];\n\n        queue.queued_operations.extend(ops.clone());\n\n        // Only one version should be scheduled and uploaded at a time.\n        for op in ops {\n            let tasks = queue.schedule_ready();\n            assert_eq!(tasks.len(), 1);\n            assert_same_op(&tasks[0].op, &op);\n            queue.complete(tasks[0].task_id);\n        }\n        assert!(queue.schedule_ready().is_empty());\n        assert!(queue.queued_operations.is_empty());\n\n        Ok(())\n    }\n\n    /// Conflicting uploads and deletes are serialized.\n    #[test]\n    fn schedule_upload_delete_conflicts() -> anyhow::Result<()> {\n        let mut queue = UploadQueue::Uninitialized;\n        let queue = queue.initialize_with_current_remote_index_part(&IndexPart::example(), 0)?;\n        let tli = make_timeline();\n\n        // Enqueue two layer uploads, with a delete of both layers in between them. These should be\n        // scheduled one at a time, since deletes can't bypass uploads and vice versa.\n        let layer0 = make_layer(\n            &tli,\n            \"000000000000000000000000000000000000-100000000000000000000000000000000000__00000000016B59D8-00000000016B5A51\",\n        );\n        let layer1 = make_layer(\n            &tli,\n            \"100000000000000000000000000000000000-200000000000000000000000000000000000__00000000016B59D8-00000000016B5A51\",\n        );\n\n        let ops = [\n            UploadOp::UploadLayer(layer0.clone(), layer0.metadata(), None),\n            UploadOp::Delete(Delete {\n                layers: vec![\n                    (layer0.layer_desc().layer_name(), layer0.metadata()),\n                    (layer1.layer_desc().layer_name(), layer1.metadata()),\n                ],\n            }),\n            UploadOp::UploadLayer(layer1.clone(), layer1.metadata(), None),\n        ];\n\n        queue.queued_operations.extend(ops.clone());\n\n        // Only one version should be scheduled and uploaded at a time.\n        for op in ops {\n            let tasks = queue.schedule_ready();\n            assert_eq!(tasks.len(), 1);\n            assert_same_op(&tasks[0].op, &op);\n            queue.complete(tasks[0].task_id);\n        }\n        assert!(queue.schedule_ready().is_empty());\n        assert!(queue.queued_operations.is_empty());\n\n        Ok(())\n    }\n\n    /// Non-conflicting uploads and deletes can bypass the queue, avoiding the conflicting\n    /// delete/upload operations at the head of the queue.\n    #[test]\n    fn schedule_upload_delete_conflicts_bypass() -> anyhow::Result<()> {\n        let mut queue = UploadQueue::Uninitialized;\n        let queue = queue.initialize_with_current_remote_index_part(&IndexPart::example(), 0)?;\n        let tli = make_timeline();\n\n        // Enqueue two layer uploads, with a delete of both layers in between them. These should be\n        // scheduled one at a time, since deletes can't bypass uploads and vice versa.\n        //\n        // Also enqueue non-conflicting uploads and deletes at the end. These can bypass the queue\n        // and run immediately.\n        let layer0 = make_layer(\n            &tli,\n            \"000000000000000000000000000000000000-100000000000000000000000000000000000__00000000016B59D8-00000000016B5A51\",\n        );\n        let layer1 = make_layer(\n            &tli,\n            \"100000000000000000000000000000000000-200000000000000000000000000000000000__00000000016B59D8-00000000016B5A51\",\n        );\n        let layer2 = make_layer(\n            &tli,\n            \"200000000000000000000000000000000000-300000000000000000000000000000000000__00000000016B59D8-00000000016B5A51\",\n        );\n        let layer3 = make_layer(\n            &tli,\n            \"300000000000000000000000000000000000-400000000000000000000000000000000000__00000000016B59D8-00000000016B5A51\",\n        );\n\n        let ops = [\n            UploadOp::UploadLayer(layer0.clone(), layer0.metadata(), None),\n            UploadOp::Delete(Delete {\n                layers: vec![\n                    (layer0.layer_desc().layer_name(), layer0.metadata()),\n                    (layer1.layer_desc().layer_name(), layer1.metadata()),\n                ],\n            }),\n            UploadOp::UploadLayer(layer1.clone(), layer1.metadata(), None),\n            UploadOp::UploadLayer(layer2.clone(), layer2.metadata(), None),\n            UploadOp::Delete(Delete {\n                layers: vec![(layer3.layer_desc().layer_name(), layer3.metadata())],\n            }),\n        ];\n\n        queue.queued_operations.extend(ops.clone());\n\n        // Operations 0, 3, and 4 are scheduled immediately.\n        let tasks = queue.schedule_ready();\n        assert_same_ops(tasks.iter().map(|t| &t.op), [&ops[0], &ops[3], &ops[4]]);\n        assert_eq!(queue.queued_operations.len(), 2);\n\n        Ok(())\n    }\n\n    /// Non-conflicting uploads are parallelized.\n    #[test]\n    fn schedule_upload_parallel() -> anyhow::Result<()> {\n        let mut queue = UploadQueue::Uninitialized;\n        let queue = queue.initialize_with_current_remote_index_part(&IndexPart::example(), 0)?;\n        let tli = make_timeline();\n\n        // Enqueue three different layer uploads.\n        let layer0 = make_layer(\n            &tli,\n            \"000000000000000000000000000000000000-100000000000000000000000000000000000__00000000016B59D8-00000000016B5A51\",\n        );\n        let layer1 = make_layer(\n            &tli,\n            \"100000000000000000000000000000000000-200000000000000000000000000000000000__00000000016B59D8-00000000016B5A51\",\n        );\n        let layer2 = make_layer(\n            &tli,\n            \"200000000000000000000000000000000000-300000000000000000000000000000000000__00000000016B59D8-00000000016B5A51\",\n        );\n\n        let ops = [\n            UploadOp::UploadLayer(layer0.clone(), layer0.metadata(), None),\n            UploadOp::UploadLayer(layer1.clone(), layer1.metadata(), None),\n            UploadOp::UploadLayer(layer2.clone(), layer2.metadata(), None),\n        ];\n\n        queue.queued_operations.extend(ops.clone());\n\n        // All uploads should be scheduled concurrently.\n        let tasks = queue.schedule_ready();\n\n        assert_same_ops(tasks.iter().map(|t| &t.op), &ops);\n        assert!(queue.queued_operations.is_empty());\n\n        Ok(())\n    }\n\n    /// Index uploads are coalesced.\n    #[test]\n    fn schedule_index_coalesce() -> anyhow::Result<()> {\n        let mut queue = UploadQueue::Uninitialized;\n        let queue = queue.initialize_with_current_remote_index_part(&IndexPart::example(), 0)?;\n\n        // Enqueue three uploads of the current empty index.\n        let index = Box::new(queue.clean.0.clone());\n\n        let ops = [\n            UploadOp::UploadMetadata {\n                uploaded: index.clone(),\n            },\n            UploadOp::UploadMetadata {\n                uploaded: index.clone(),\n            },\n            UploadOp::UploadMetadata {\n                uploaded: index.clone(),\n            },\n        ];\n\n        queue.queued_operations.extend(ops.clone());\n\n        // The index uploads are coalesced into a single operation.\n        let tasks = queue.schedule_ready();\n        assert_eq!(tasks.len(), 1);\n        assert_same_op(&tasks[0].op, &ops[2]);\n        assert_same_ops(&tasks[0].coalesced_ops, &ops[0..2]);\n\n        assert!(queue.queued_operations.is_empty());\n\n        Ok(())\n    }\n\n    /// Chains of upload/index operations lead to parallel layer uploads and serial index uploads.\n    /// This is the common case with layer flushes.\n    #[test]\n    fn schedule_index_upload_chain() -> anyhow::Result<()> {\n        let mut queue = UploadQueue::Uninitialized;\n        let queue = queue.initialize_with_current_remote_index_part(&IndexPart::example(), 0)?;\n        let tli = make_timeline();\n\n        // Enqueue three uploads of the current empty index.\n        let index = Box::new(queue.clean.0.clone());\n        let layer0 = make_layer(\n            &tli,\n            \"000000000000000000000000000000000000-100000000000000000000000000000000000__00000000016B59D8-00000000016B5A51\",\n        );\n        let index0 = index_with(&index, &layer0);\n        let layer1 = make_layer(\n            &tli,\n            \"100000000000000000000000000000000000-200000000000000000000000000000000000__00000000016B59D8-00000000016B5A51\",\n        );\n        let index1 = index_with(&index0, &layer1);\n        let layer2 = make_layer(\n            &tli,\n            \"200000000000000000000000000000000000-300000000000000000000000000000000000__00000000016B59D8-00000000016B5A51\",\n        );\n        let index2 = index_with(&index1, &layer2);\n\n        let ops = [\n            UploadOp::UploadLayer(layer0.clone(), layer0.metadata(), None),\n            UploadOp::UploadMetadata {\n                uploaded: index0.clone(),\n            },\n            UploadOp::UploadLayer(layer1.clone(), layer1.metadata(), None),\n            UploadOp::UploadMetadata {\n                uploaded: index1.clone(),\n            },\n            UploadOp::UploadLayer(layer2.clone(), layer2.metadata(), None),\n            UploadOp::UploadMetadata {\n                uploaded: index2.clone(),\n            },\n        ];\n\n        queue.queued_operations.extend(ops.clone());\n\n        // The layer uploads should be scheduled immediately. The indexes must wait.\n        let upload_tasks = queue.schedule_ready();\n        assert_same_ops(\n            upload_tasks.iter().map(|t| &t.op),\n            [&ops[0], &ops[2], &ops[4]],\n        );\n\n        // layer2 completes first. None of the indexes can upload yet.\n        queue.complete(upload_tasks[2].task_id);\n        assert!(queue.schedule_ready().is_empty());\n\n        // layer0 completes. index0 can upload. It completes.\n        queue.complete(upload_tasks[0].task_id);\n        let index_tasks = queue.schedule_ready();\n        assert_eq!(index_tasks.len(), 1);\n        assert_same_op(&index_tasks[0].op, &ops[1]);\n        queue.complete(index_tasks[0].task_id);\n\n        // layer 1 completes. This unblocks index 1 and 2, which coalesce into\n        // a single upload for index 2.\n        queue.complete(upload_tasks[1].task_id);\n\n        let index_tasks = queue.schedule_ready();\n        assert_eq!(index_tasks.len(), 1);\n        assert_same_op(&index_tasks[0].op, &ops[5]);\n        assert_same_ops(&index_tasks[0].coalesced_ops, &ops[3..4]);\n\n        assert!(queue.queued_operations.is_empty());\n\n        Ok(())\n    }\n\n    /// A delete can't bypass an index upload if an index ahead of it still references it.\n    #[test]\n    fn schedule_index_delete_dereferenced() -> anyhow::Result<()> {\n        let mut queue = UploadQueue::Uninitialized;\n        let queue = queue.initialize_with_current_remote_index_part(&IndexPart::example(), 0)?;\n        let tli = make_timeline();\n\n        // Create a layer to upload.\n        let layer = make_layer(\n            &tli,\n            \"000000000000000000000000000000000000-100000000000000000000000000000000000__00000000016B59D8-00000000016B5A51\",\n        );\n        let index_upload = index_with(&queue.clean.0, &layer);\n\n        // Remove the layer reference in a new index, then delete the layer.\n        let index_deref = index_without(&index_upload, &layer);\n\n        let ops = [\n            // Initial upload, with a barrier to prevent index coalescing.\n            UploadOp::UploadLayer(layer.clone(), layer.metadata(), None),\n            UploadOp::UploadMetadata {\n                uploaded: index_upload.clone(),\n            },\n            UploadOp::Barrier(tokio::sync::watch::channel(()).0),\n            // Dereference the layer and delete it.\n            UploadOp::UploadMetadata {\n                uploaded: index_deref.clone(),\n            },\n            UploadOp::Delete(Delete {\n                layers: vec![(layer.layer_desc().layer_name(), layer.metadata())],\n            }),\n        ];\n\n        queue.queued_operations.extend(ops.clone());\n\n        // Operations are serialized.\n        for op in ops {\n            let tasks = queue.schedule_ready();\n            assert_eq!(tasks.len(), 1);\n            assert_same_op(&tasks[0].op, &op);\n            queue.complete(tasks[0].task_id);\n        }\n        assert!(queue.queued_operations.is_empty());\n\n        Ok(())\n    }\n\n    /// An upload with a reused layer name doesn't clobber the previous layer. Specifically, a\n    /// dereference/upload/reference cycle can't allow the upload to bypass the reference.\n    #[test]\n    fn schedule_index_upload_dereferenced() -> anyhow::Result<()> {\n        let mut queue = UploadQueue::Uninitialized;\n        let queue = queue.initialize_with_current_remote_index_part(&IndexPart::example(), 0)?;\n        let tli = make_timeline();\n\n        // Create a layer to upload.\n        let layer = make_layer(\n            &tli,\n            \"000000000000000000000000000000000000-100000000000000000000000000000000000__00000000016B59D8-00000000016B5A51\",\n        );\n\n        // Upload the layer. Then dereference the layer, and upload/reference it again.\n        let index_upload = index_with(&queue.clean.0, &layer);\n        let index_deref = index_without(&index_upload, &layer);\n        let index_ref = index_with(&index_deref, &layer);\n\n        let ops = [\n            // Initial upload, with a barrier to prevent index coalescing.\n            UploadOp::UploadLayer(layer.clone(), layer.metadata(), None),\n            UploadOp::UploadMetadata {\n                uploaded: index_upload.clone(),\n            },\n            UploadOp::Barrier(tokio::sync::watch::channel(()).0),\n            // Dereference the layer.\n            UploadOp::UploadMetadata {\n                uploaded: index_deref.clone(),\n            },\n            // Replace and reference the layer.\n            UploadOp::UploadLayer(layer.clone(), layer.metadata(), None),\n            UploadOp::UploadMetadata {\n                uploaded: index_ref.clone(),\n            },\n        ];\n\n        queue.queued_operations.extend(ops.clone());\n\n        // Operations are serialized.\n        for op in ops {\n            let tasks = queue.schedule_ready();\n            assert_eq!(tasks.len(), 1);\n            assert_same_op(&tasks[0].op, &op);\n            queue.complete(tasks[0].task_id);\n        }\n        assert!(queue.queued_operations.is_empty());\n\n        Ok(())\n    }\n\n    /// Nothing can bypass a shutdown, and it waits for inprogress tasks. It's never returned from\n    /// next_ready(), but is left at the head of the queue.\n    #[test]\n    fn schedule_shutdown() -> anyhow::Result<()> {\n        let mut queue = UploadQueue::Uninitialized;\n        let queue = queue.initialize_empty_remote(&TimelineMetadata::example(), 0)?;\n        let tli = make_timeline();\n\n        let index = Box::new(queue.clean.0.clone()); // empty, doesn't matter\n        let layer0 = make_layer(\n            &tli,\n            \"000000000000000000000000000000000000-100000000000000000000000000000000000__00000000016B59D8-00000000016B5A51\",\n        );\n        let layer1 = make_layer(\n            &tli,\n            \"100000000000000000000000000000000000-200000000000000000000000000000000000__00000000016B59D8-00000000016B5A51\",\n        );\n        let layer2 = make_layer(\n            &tli,\n            \"200000000000000000000000000000000000-300000000000000000000000000000000000__00000000016B59D8-00000000016B5A51\",\n        );\n        let layer3 = make_layer(\n            &tli,\n            \"300000000000000000000000000000000000-400000000000000000000000000000000000__00000000016B59D8-00000000016B5A51\",\n        );\n\n        // Enqueue non-conflicting upload, delete, and index before and after a shutdown.\n        let ops = [\n            UploadOp::UploadLayer(layer0.clone(), layer0.metadata(), None),\n            UploadOp::Delete(Delete {\n                layers: vec![(layer1.layer_desc().layer_name(), layer1.metadata())],\n            }),\n            UploadOp::UploadMetadata {\n                uploaded: index.clone(),\n            },\n            UploadOp::Shutdown,\n            UploadOp::UploadLayer(layer2.clone(), layer2.metadata(), None),\n            UploadOp::Delete(Delete {\n                layers: vec![(layer3.layer_desc().layer_name(), layer3.metadata())],\n            }),\n            UploadOp::UploadMetadata {\n                uploaded: index.clone(),\n            },\n        ];\n\n        queue.queued_operations.extend(ops.clone());\n\n        // Schedule the initial operations ahead of the shutdown.\n        let tasks = queue.schedule_ready();\n\n        assert_same_ops(tasks.iter().map(|t| &t.op), &ops[0..3]);\n        assert!(matches!(\n            queue.queued_operations.front(),\n            Some(&UploadOp::Shutdown)\n        ));\n\n        // Complete the initial operations. The shutdown isn't triggered while they're pending.\n        for task in tasks {\n            assert!(queue.schedule_ready().is_empty());\n            queue.complete(task.task_id);\n        }\n\n        // The shutdown is triggered the next time we try to pull an operation. It isn't returned,\n        // but is left in the queue.\n        assert!(!queue.shutdown_ready.is_closed());\n        assert!(queue.next_ready().is_none());\n        assert!(queue.shutdown_ready.is_closed());\n\n        Ok(())\n    }\n\n    /// Scheduling respects inprogress_limit.\n    #[test]\n    fn schedule_inprogress_limit() -> anyhow::Result<()> {\n        // Create a queue with inprogress_limit=2.\n        let mut queue = UploadQueue::Uninitialized;\n        let queue = queue.initialize_empty_remote(&TimelineMetadata::example(), 2)?;\n        let tli = make_timeline();\n\n        // Enqueue a bunch of uploads.\n        let layer0 = make_layer(\n            &tli,\n            \"000000000000000000000000000000000000-100000000000000000000000000000000000__00000000016B59D8-00000000016B5A51\",\n        );\n        let layer1 = make_layer(\n            &tli,\n            \"100000000000000000000000000000000000-200000000000000000000000000000000000__00000000016B59D8-00000000016B5A51\",\n        );\n        let layer2 = make_layer(\n            &tli,\n            \"200000000000000000000000000000000000-300000000000000000000000000000000000__00000000016B59D8-00000000016B5A51\",\n        );\n        let layer3 = make_layer(\n            &tli,\n            \"300000000000000000000000000000000000-400000000000000000000000000000000000__00000000016B59D8-00000000016B5A51\",\n        );\n\n        let ops = [\n            UploadOp::UploadLayer(layer0.clone(), layer0.metadata(), None),\n            UploadOp::UploadLayer(layer1.clone(), layer1.metadata(), None),\n            UploadOp::UploadLayer(layer2.clone(), layer2.metadata(), None),\n            UploadOp::UploadLayer(layer3.clone(), layer3.metadata(), None),\n        ];\n\n        queue.queued_operations.extend(ops.clone());\n\n        // Schedule all ready operations. Only 2 are scheduled.\n        let tasks = queue.schedule_ready();\n        assert_same_ops(tasks.iter().map(|t| &t.op), &ops[0..2]);\n        assert!(queue.next_ready().is_none());\n\n        // When one completes, another is scheduled.\n        queue.complete(tasks[0].task_id);\n        let tasks = queue.schedule_ready();\n        assert_same_ops(tasks.iter().map(|t| &t.op), &ops[2..3]);\n\n        Ok(())\n    }\n\n    /// Tests that can_bypass takes name, generation and shard index into account for all operations.\n    #[test]\n    fn can_bypass_path() -> anyhow::Result<()> {\n        let tli = make_timeline();\n\n        let name0 = &\"000000000000000000000000000000000000-100000000000000000000000000000000000__00000000016B59D8-00000000016B5A51\";\n        let name1 = &\"100000000000000000000000000000000000-200000000000000000000000000000000000__00000000016B59D8-00000000016B5A51\";\n\n        // Asserts that layers a and b either can or can't bypass each other, for all combinations\n        // of operations (except Delete and UploadMetadata which are special-cased).\n        #[track_caller]\n        fn assert_can_bypass(a: ResidentLayer, b: ResidentLayer, can_bypass: bool) {\n            let index = IndexPart::empty(TimelineMetadata::example());\n            for (a, b) in make_ops(a).into_iter().zip(make_ops(b)) {\n                match (&a, &b) {\n                    // Deletes can always bypass each other.\n                    (UploadOp::Delete(_), UploadOp::Delete(_)) => assert!(a.can_bypass(&b, &index)),\n                    // Indexes can never bypass each other.\n                    (UploadOp::UploadMetadata { .. }, UploadOp::UploadMetadata { .. }) => {\n                        assert!(!a.can_bypass(&b, &index))\n                    }\n                    // For other operations, assert as requested.\n                    (a, b) => assert_eq!(a.can_bypass(b, &index), can_bypass),\n                }\n            }\n        }\n\n        fn make_ops(layer: ResidentLayer) -> Vec<UploadOp> {\n            let mut index = IndexPart::empty(TimelineMetadata::example());\n            index\n                .layer_metadata\n                .insert(layer.layer_desc().layer_name(), layer.metadata());\n            vec![\n                UploadOp::UploadLayer(layer.clone(), layer.metadata(), None),\n                UploadOp::Delete(Delete {\n                    layers: vec![(layer.layer_desc().layer_name(), layer.metadata())],\n                }),\n                UploadOp::UploadMetadata {\n                    uploaded: Box::new(index),\n                },\n            ]\n        }\n\n        // Makes a ResidentLayer.\n        let layer = |name: &'static str, shard: Option<u8>, generation: u32| -> ResidentLayer {\n            let shard = shard\n                .map(|n| ShardIndex::new(ShardNumber(n), ShardCount(8)))\n                .unwrap_or(ShardIndex::unsharded());\n            let metadata = LayerFileMetadata {\n                shard,\n                generation: Generation::Valid(generation),\n                file_size: 0,\n            };\n            make_layer_with_metadata(&tli, name, metadata)\n        };\n\n        // Same name and metadata can't bypass. This goes both for unsharded and sharded, as well as\n        // 0 or >0 generation.\n        assert_can_bypass(layer(name0, None, 0), layer(name0, None, 0), false);\n        assert_can_bypass(layer(name0, Some(0), 0), layer(name0, Some(0), 0), false);\n        assert_can_bypass(layer(name0, None, 1), layer(name0, None, 1), false);\n\n        // Different names can bypass.\n        assert_can_bypass(layer(name0, None, 0), layer(name1, None, 0), true);\n\n        // Different shards can bypass. Shard 0 is different from unsharded.\n        assert_can_bypass(layer(name0, Some(0), 0), layer(name0, Some(1), 0), true);\n        assert_can_bypass(layer(name0, Some(0), 0), layer(name0, None, 0), true);\n\n        // Different generations can bypass, both sharded and unsharded.\n        assert_can_bypass(layer(name0, None, 0), layer(name0, None, 1), true);\n        assert_can_bypass(layer(name0, Some(1), 0), layer(name0, Some(1), 1), true);\n\n        Ok(())\n    }\n}\n"
  },
  {
    "path": "pageserver/src/tenant/vectored_blob_io.rs",
    "content": "//!\n//! Utilities for vectored reading of variable-sized \"blobs\".\n//!\n//! The \"blob\" api is an abstraction on top of the \"block\" api,\n//! with the main difference being that blobs do not have a fixed\n//! size (each blob is prefixed with 1 or 4 byte length field)\n//!\n//! The vectored apis provided in this module allow for planning\n//! and executing disk IO which covers multiple blobs.\n//!\n//! Reads are planned with [`VectoredReadPlanner`] which will coalesce\n//! adjacent blocks into a single disk IO request and exectuted by\n//! [`VectoredBlobReader`] which does all the required offset juggling\n//! and returns a buffer housing all the blobs and a list of offsets.\n//!\n//! Note that the vectored blob api does *not* go through the page cache.\n\nuse std::collections::BTreeMap;\nuse std::ops::Deref;\n\nuse bytes::Bytes;\nuse pageserver_api::key::Key;\nuse tokio::io::AsyncWriteExt;\nuse tokio_epoll_uring::BoundedBuf;\nuse utils::lsn::Lsn;\nuse utils::vec_map::VecMap;\n\nuse crate::context::RequestContext;\nuse crate::tenant::blob_io::{BYTE_UNCOMPRESSED, BYTE_ZSTD, Header};\nuse crate::virtual_file::{self, IoBufferMut, VirtualFile};\n\n/// Metadata bundled with the start and end offset of a blob.\n#[derive(Copy, Clone, Debug)]\npub struct BlobMeta {\n    pub key: Key,\n    pub lsn: Lsn,\n    pub will_init: bool,\n}\n\n/// A view into the vectored blobs read buffer.\n#[derive(Clone, Debug)]\npub(crate) enum BufView<'a> {\n    Slice(&'a [u8]),\n    Bytes(bytes::Bytes),\n}\n\nimpl<'a> BufView<'a> {\n    /// Creates a new slice-based view on the blob.\n    pub fn new_slice(slice: &'a [u8]) -> Self {\n        Self::Slice(slice)\n    }\n\n    /// Creates a new [`bytes::Bytes`]-based view on the blob.\n    pub fn new_bytes(bytes: bytes::Bytes) -> Self {\n        Self::Bytes(bytes)\n    }\n\n    /// Convert the view into `Bytes`.\n    ///\n    /// If using slice as the underlying storage, the copy will be an O(n) operation.\n    pub fn into_bytes(self) -> Bytes {\n        match self {\n            BufView::Slice(slice) => Bytes::copy_from_slice(slice),\n            BufView::Bytes(bytes) => bytes,\n        }\n    }\n\n    /// Creates a sub-view of the blob based on the range.\n    fn view(&self, range: std::ops::Range<usize>) -> Self {\n        match self {\n            BufView::Slice(slice) => BufView::Slice(&slice[range]),\n            BufView::Bytes(bytes) => BufView::Bytes(bytes.slice(range)),\n        }\n    }\n}\n\nimpl Deref for BufView<'_> {\n    type Target = [u8];\n\n    fn deref(&self) -> &Self::Target {\n        match self {\n            BufView::Slice(slice) => slice,\n            BufView::Bytes(bytes) => bytes,\n        }\n    }\n}\n\nimpl AsRef<[u8]> for BufView<'_> {\n    fn as_ref(&self) -> &[u8] {\n        match self {\n            BufView::Slice(slice) => slice,\n            BufView::Bytes(bytes) => bytes.as_ref(),\n        }\n    }\n}\n\nimpl<'a> From<&'a [u8]> for BufView<'a> {\n    fn from(value: &'a [u8]) -> Self {\n        Self::new_slice(value)\n    }\n}\n\nimpl From<Bytes> for BufView<'_> {\n    fn from(value: Bytes) -> Self {\n        Self::new_bytes(value)\n    }\n}\n\n/// Blob offsets into [`VectoredBlobsBuf::buf`]. The byte ranges is potentially compressed,\n/// subject to [`VectoredBlob::compression_bits`].\npub struct VectoredBlob {\n    /// Blob metadata.\n    pub meta: BlobMeta,\n    /// Header start offset.\n    header_start: usize,\n    /// Data start offset.\n    data_start: usize,\n    /// End offset.\n    end: usize,\n    /// Compression used on the data, extracted from the header.\n    compression_bits: u8,\n}\n\nimpl VectoredBlob {\n    /// Reads a decompressed view of the blob.\n    pub(crate) async fn read<'a>(&self, buf: &BufView<'a>) -> Result<BufView<'a>, std::io::Error> {\n        let view = buf.view(self.data_start..self.end);\n\n        match self.compression_bits {\n            BYTE_UNCOMPRESSED => Ok(view),\n            BYTE_ZSTD => {\n                let mut decompressed_vec = Vec::new();\n                let mut decoder =\n                    async_compression::tokio::write::ZstdDecoder::new(&mut decompressed_vec);\n                decoder.write_all(&view).await?;\n                decoder.flush().await?;\n                // Zero-copy conversion from `Vec` to `Bytes`\n                Ok(BufView::new_bytes(Bytes::from(decompressed_vec)))\n            }\n            bits => {\n                let error = std::io::Error::new(\n                    std::io::ErrorKind::InvalidData,\n                    format!(\n                        \"Failed to decompress blob for {}@{}, {}..{}: invalid compression byte {bits:x}\",\n                        self.meta.key, self.meta.lsn, self.data_start, self.end\n                    ),\n                );\n                Err(error)\n            }\n        }\n    }\n\n    /// Returns the raw blob including header.\n    pub(crate) fn raw_with_header<'a>(&self, buf: &BufView<'a>) -> BufView<'a> {\n        buf.view(self.header_start..self.end)\n    }\n}\n\nimpl std::fmt::Display for VectoredBlob {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        write!(\n            f,\n            \"{}@{}, {}..{}\",\n            self.meta.key, self.meta.lsn, self.data_start, self.end\n        )\n    }\n}\n\n/// Return type of [`VectoredBlobReader::read_blobs`]\npub struct VectoredBlobsBuf {\n    /// Buffer for all blobs in this read\n    pub buf: IoBufferMut,\n    /// Offsets into the buffer and metadata for all blobs in this read\n    pub blobs: Vec<VectoredBlob>,\n}\n\n/// Description of one disk read for multiple blobs.\n/// Used as the argument form [`VectoredBlobReader::read_blobs`]\n#[derive(Debug)]\npub struct VectoredRead {\n    pub start: u64,\n    pub end: u64,\n    /// Start offset and metadata for each blob in this read\n    pub blobs_at: VecMap<u64, BlobMeta>,\n}\n\nimpl VectoredRead {\n    pub(crate) fn size(&self) -> usize {\n        (self.end - self.start) as usize\n    }\n}\n\n#[derive(Eq, PartialEq, Debug)]\npub(crate) enum VectoredReadExtended {\n    Yes,\n    No,\n}\n\n/// A vectored read builder that tries to coalesce all reads that fits in a chunk.\npub(crate) struct ChunkedVectoredReadBuilder {\n    /// Start block number\n    start_blk_no: usize,\n    /// End block number (exclusive).\n    end_blk_no: usize,\n    /// Start offset and metadata for each blob in this read\n    blobs_at: VecMap<u64, BlobMeta>,\n    max_read_size: Option<usize>,\n}\n\nimpl ChunkedVectoredReadBuilder {\n    const CHUNK_SIZE: usize = virtual_file::get_io_buffer_alignment();\n    /// Start building a new vectored read.\n    ///\n    /// Note that by design, this does not check against reading more than `max_read_size` to\n    /// support reading larger blobs than the configuration value. The builder will be single use\n    /// however after that.\n    fn new_impl(\n        start_offset: u64,\n        end_offset: u64,\n        meta: BlobMeta,\n        max_read_size: Option<usize>,\n    ) -> Self {\n        let mut blobs_at = VecMap::default();\n        blobs_at\n            .append(start_offset, meta)\n            .expect(\"First insertion always succeeds\");\n\n        let start_blk_no = start_offset as usize / Self::CHUNK_SIZE;\n        let end_blk_no = (end_offset as usize).div_ceil(Self::CHUNK_SIZE);\n        Self {\n            start_blk_no,\n            end_blk_no,\n            blobs_at,\n            max_read_size,\n        }\n    }\n\n    pub(crate) fn new(\n        start_offset: u64,\n        end_offset: u64,\n        meta: BlobMeta,\n        max_read_size: usize,\n    ) -> Self {\n        Self::new_impl(start_offset, end_offset, meta, Some(max_read_size))\n    }\n\n    pub(crate) fn new_streaming(start_offset: u64, end_offset: u64, meta: BlobMeta) -> Self {\n        Self::new_impl(start_offset, end_offset, meta, None)\n    }\n\n    /// Attempts to extend the current read with a new blob if the new blob resides in the same or the immediate next chunk.\n    ///\n    /// The resulting size also must be below the max read size.\n    pub(crate) fn extend(&mut self, start: u64, end: u64, meta: BlobMeta) -> VectoredReadExtended {\n        tracing::trace!(start, end, \"trying to extend\");\n        let start_blk_no = start as usize / Self::CHUNK_SIZE;\n        let end_blk_no = (end as usize).div_ceil(Self::CHUNK_SIZE);\n\n        let not_limited_by_max_read_size = {\n            if let Some(max_read_size) = self.max_read_size {\n                let coalesced_size = (end_blk_no - self.start_blk_no) * Self::CHUNK_SIZE;\n                coalesced_size <= max_read_size\n            } else {\n                true\n            }\n        };\n\n        // True if the second block starts in the same block or the immediate next block where the first block ended.\n        //\n        // Note: This automatically handles the case where two blocks are adjacent to each other,\n        // whether they starts on chunk size boundary or not.\n        let is_adjacent_chunk_read = {\n            // 1. first.end & second.start are in the same block\n            self.end_blk_no == start_blk_no + 1 ||\n            // 2. first.end ends one block before second.start\n            self.end_blk_no == start_blk_no\n        };\n\n        if is_adjacent_chunk_read && not_limited_by_max_read_size {\n            self.end_blk_no = end_blk_no;\n            self.blobs_at\n                .append(start, meta)\n                .expect(\"LSNs are ordered within vectored reads\");\n\n            return VectoredReadExtended::Yes;\n        }\n\n        VectoredReadExtended::No\n    }\n\n    pub(crate) fn size(&self) -> usize {\n        (self.end_blk_no - self.start_blk_no) * Self::CHUNK_SIZE\n    }\n\n    pub(crate) fn build(self) -> VectoredRead {\n        let start = (self.start_blk_no * Self::CHUNK_SIZE) as u64;\n        let end = (self.end_blk_no * Self::CHUNK_SIZE) as u64;\n        VectoredRead {\n            start,\n            end,\n            blobs_at: self.blobs_at,\n        }\n    }\n}\n\n#[derive(Copy, Clone, Debug)]\npub enum BlobFlag {\n    None,\n    Ignore,\n    ReplaceAll,\n}\n\n/// Planner for vectored blob reads.\n///\n/// Blob offsets are received via [`VectoredReadPlanner::handle`]\n/// and coalesced into disk reads.\n///\n/// The implementation is very simple:\n/// * Collect all blob offsets in an ordered structure\n/// * Iterate over the collected blobs and coalesce them into reads at the end\npub struct VectoredReadPlanner {\n    // Track all the blob offsets. Start offsets must be ordered.\n    // Values in the value tuples are:\n    // (\n    //   lsn of the blob,\n    //   start offset of the blob in the underlying file,\n    //   end offset of the blob in the underlying file,\n    //   whether the blob initializes the page image or not\n    //   see [`pageserver_api::record::NeonWalRecord::will_init`]\n    // )\n    blobs: BTreeMap<Key, Vec<(Lsn, u64, u64, bool)>>,\n    // Arguments for previous blob passed into [`VectoredReadPlanner::handle`]\n    prev: Option<(Key, Lsn, u64, BlobFlag)>,\n\n    max_read_size: usize,\n}\n\nimpl VectoredReadPlanner {\n    pub fn new(max_read_size: usize) -> Self {\n        Self {\n            blobs: BTreeMap::new(),\n            prev: None,\n            max_read_size,\n        }\n    }\n\n    /// Include a new blob in the read plan.\n    ///\n    /// This function is called from a B-Tree index visitor (see `DeltaLayerInner::plan_reads`\n    /// and `ImageLayerInner::plan_reads`). Said visitor wants to collect blob offsets for all\n    /// keys in a given keyspace. This function must be called for each key in the desired\n    /// keyspace (monotonically continuous). [`Self::handle_range_end`] must\n    /// be called after every range in the offset.\n    ///\n    /// In the event that keys are skipped, the behaviour is undefined and can lead to an\n    /// incorrect read plan. We can end up asserting, erroring in wal redo or returning\n    /// incorrect data to the user.\n    ///\n    /// The `flag` argument has two interesting values:\n    /// * [`BlobFlag::ReplaceAll`]: The blob for this key should replace all existing blobs.\n    ///   This is used for WAL records that `will_init`.\n    /// * [`BlobFlag::Ignore`]: This blob should not be included in the read. This happens\n    ///   if the blob is cached.\n    pub fn handle(&mut self, key: Key, lsn: Lsn, offset: u64, flag: BlobFlag) {\n        // Implementation note: internally lag behind by one blob such that\n        // we have a start and end offset when initialising [`VectoredRead`]\n        let (prev_key, prev_lsn, prev_offset, prev_flag) = match self.prev {\n            None => {\n                self.prev = Some((key, lsn, offset, flag));\n                return;\n            }\n            Some(prev) => prev,\n        };\n\n        self.add_blob(prev_key, prev_lsn, prev_offset, offset, prev_flag);\n\n        self.prev = Some((key, lsn, offset, flag));\n    }\n\n    pub fn handle_range_end(&mut self, offset: u64) {\n        if let Some((prev_key, prev_lsn, prev_offset, prev_flag)) = self.prev {\n            self.add_blob(prev_key, prev_lsn, prev_offset, offset, prev_flag);\n        }\n\n        self.prev = None;\n    }\n\n    fn add_blob(&mut self, key: Key, lsn: Lsn, start_offset: u64, end_offset: u64, flag: BlobFlag) {\n        match flag {\n            BlobFlag::None => {\n                let blobs_for_key = self.blobs.entry(key).or_default();\n                blobs_for_key.push((lsn, start_offset, end_offset, false));\n            }\n            BlobFlag::ReplaceAll => {\n                let blobs_for_key = self.blobs.entry(key).or_default();\n                blobs_for_key.clear();\n                blobs_for_key.push((lsn, start_offset, end_offset, true));\n            }\n            BlobFlag::Ignore => {}\n        }\n    }\n\n    pub fn finish(self) -> Vec<VectoredRead> {\n        let mut current_read_builder: Option<ChunkedVectoredReadBuilder> = None;\n        let mut reads = Vec::new();\n\n        for (key, blobs_for_key) in self.blobs {\n            for (lsn, start_offset, end_offset, will_init) in blobs_for_key {\n                let extended = match &mut current_read_builder {\n                    Some(read_builder) => read_builder.extend(\n                        start_offset,\n                        end_offset,\n                        BlobMeta {\n                            key,\n                            lsn,\n                            will_init,\n                        },\n                    ),\n                    None => VectoredReadExtended::No,\n                };\n\n                if extended == VectoredReadExtended::No {\n                    let next_read_builder = ChunkedVectoredReadBuilder::new(\n                        start_offset,\n                        end_offset,\n                        BlobMeta {\n                            key,\n                            lsn,\n                            will_init,\n                        },\n                        self.max_read_size,\n                    );\n\n                    let prev_read_builder = current_read_builder.replace(next_read_builder);\n\n                    // `current_read_builder` is None in the first iteration of the outer loop\n                    if let Some(read_builder) = prev_read_builder {\n                        reads.push(read_builder.build());\n                    }\n                }\n            }\n        }\n\n        if let Some(read_builder) = current_read_builder {\n            reads.push(read_builder.build());\n        }\n\n        reads\n    }\n}\n\n/// Disk reader for vectored blob spans (does not go through the page cache)\npub struct VectoredBlobReader<'a> {\n    file: &'a VirtualFile,\n}\n\nimpl<'a> VectoredBlobReader<'a> {\n    pub fn new(file: &'a VirtualFile) -> Self {\n        Self { file }\n    }\n\n    /// Read the requested blobs into the buffer.\n    ///\n    /// We have to deal with the fact that blobs are not fixed size.\n    /// Each blob is prefixed by a size header.\n    ///\n    /// The success return value is a struct which contains the buffer\n    /// filled from disk and a list of offsets at which each blob lies\n    /// in the buffer.\n    pub async fn read_blobs(\n        &self,\n        read: &VectoredRead,\n        buf: IoBufferMut,\n        ctx: &RequestContext,\n    ) -> Result<VectoredBlobsBuf, std::io::Error> {\n        assert!(read.size() > 0);\n        assert!(\n            read.size() <= buf.capacity(),\n            \"{} > {}\",\n            read.size(),\n            buf.capacity()\n        );\n\n        if cfg!(debug_assertions) {\n            const ALIGN: u64 = virtual_file::get_io_buffer_alignment() as u64;\n            debug_assert_eq!(\n                read.start % ALIGN,\n                0,\n                \"Read start at {} does not satisfy the required io buffer alignment ({} bytes)\",\n                read.start,\n                ALIGN\n            );\n        }\n\n        let buf = self\n            .file\n            .read_exact_at(buf.slice(0..read.size()), read.start, ctx)\n            .await?\n            .into_inner();\n\n        let blobs_at = read.blobs_at.as_slice();\n\n        let mut blobs = Vec::with_capacity(blobs_at.len());\n        // Blobs in `read` only provide their starting offset. The end offset\n        // of a blob is implicit: the start of the next blob if one exists\n        // or the end of the read.\n\n        for (blob_start, meta) in blobs_at.iter().copied() {\n            let header_start = (blob_start - read.start) as usize;\n            let header = Header::decode(&buf[header_start..]).map_err(|anyhow_err| {\n                std::io::Error::new(std::io::ErrorKind::InvalidData, anyhow_err)\n            })?;\n            let data_start = header_start + header.header_len;\n            let end = data_start + header.data_len;\n            let compression_bits = header.compression_bits;\n\n            blobs.push(VectoredBlob {\n                header_start,\n                data_start,\n                end,\n                meta,\n                compression_bits,\n            });\n        }\n\n        Ok(VectoredBlobsBuf { buf, blobs })\n    }\n}\n\n/// Read planner used in [`crate::tenant::storage_layer::image_layer::ImageLayerIterator`].\n///\n/// It provides a streaming API for getting read blobs. It returns a batch when\n/// `handle` gets called and when the current key would just exceed the read_size and\n/// max_cnt constraints.\npub struct StreamingVectoredReadPlanner {\n    read_builder: Option<ChunkedVectoredReadBuilder>,\n    // Arguments for previous blob passed into [`StreamingVectoredReadPlanner::handle`]\n    prev: Option<(Key, Lsn, u64, bool)>,\n    /// Max read size per batch. This is not a strict limit. If there are [0, 100) and [100, 200), while the `max_read_size` is 150,\n    /// we will produce a single batch instead of split them.\n    max_read_size: u64,\n    /// Max item count per batch\n    max_cnt: usize,\n    /// Size of the current batch\n    cnt: usize,\n}\n\nimpl StreamingVectoredReadPlanner {\n    pub fn new(max_read_size: u64, max_cnt: usize) -> Self {\n        assert!(max_cnt > 0);\n        assert!(max_read_size > 0);\n        Self {\n            read_builder: None,\n            prev: None,\n            max_cnt,\n            max_read_size,\n            cnt: 0,\n        }\n    }\n\n    pub fn handle(\n        &mut self,\n        key: Key,\n        lsn: Lsn,\n        offset: u64,\n        will_init: bool,\n    ) -> Option<VectoredRead> {\n        // Implementation note: internally lag behind by one blob such that\n        // we have a start and end offset when initialising [`VectoredRead`]\n        let (prev_key, prev_lsn, prev_offset, prev_will_init) = match self.prev {\n            None => {\n                self.prev = Some((key, lsn, offset, will_init));\n                return None;\n            }\n            Some(prev) => prev,\n        };\n\n        let res = self.add_blob(\n            prev_key,\n            prev_lsn,\n            prev_offset,\n            offset,\n            false,\n            prev_will_init,\n        );\n\n        self.prev = Some((key, lsn, offset, will_init));\n\n        res\n    }\n\n    pub fn handle_range_end(&mut self, offset: u64) -> Option<VectoredRead> {\n        let res = if let Some((prev_key, prev_lsn, prev_offset, prev_will_init)) = self.prev {\n            self.add_blob(\n                prev_key,\n                prev_lsn,\n                prev_offset,\n                offset,\n                true,\n                prev_will_init,\n            )\n        } else {\n            None\n        };\n\n        self.prev = None;\n\n        res\n    }\n\n    fn add_blob(\n        &mut self,\n        key: Key,\n        lsn: Lsn,\n        start_offset: u64,\n        end_offset: u64,\n        is_last_blob_in_read: bool,\n        will_init: bool,\n    ) -> Option<VectoredRead> {\n        match &mut self.read_builder {\n            Some(read_builder) => {\n                let extended = read_builder.extend(\n                    start_offset,\n                    end_offset,\n                    BlobMeta {\n                        key,\n                        lsn,\n                        will_init,\n                    },\n                );\n                assert_eq!(extended, VectoredReadExtended::Yes);\n            }\n            None => {\n                self.read_builder = {\n                    Some(ChunkedVectoredReadBuilder::new_streaming(\n                        start_offset,\n                        end_offset,\n                        BlobMeta {\n                            key,\n                            lsn,\n                            will_init,\n                        },\n                    ))\n                };\n            }\n        }\n        let read_builder = self.read_builder.as_mut().unwrap();\n        self.cnt += 1;\n        if is_last_blob_in_read\n            || read_builder.size() >= self.max_read_size as usize\n            || self.cnt >= self.max_cnt\n        {\n            let prev_read_builder = self.read_builder.take();\n            self.cnt = 0;\n\n            // `current_read_builder` is None in the first iteration\n            if let Some(read_builder) = prev_read_builder {\n                return Some(read_builder.build());\n            }\n        }\n        None\n    }\n}\n\n#[cfg(test)]\nmod tests {\n\n    use super::super::blob_io::tests::{random_array, write_maybe_compressed};\n    use super::*;\n    use crate::context::DownloadBehavior;\n    use crate::page_cache::PAGE_SZ;\n    use crate::task_mgr::TaskKind;\n\n    fn validate_read(read: &VectoredRead, offset_range: &[(Key, Lsn, u64, BlobFlag)]) {\n        const ALIGN: u64 = virtual_file::get_io_buffer_alignment() as u64;\n        assert_eq!(read.start % ALIGN, 0);\n        assert_eq!(read.start / ALIGN, offset_range.first().unwrap().2 / ALIGN);\n\n        let expected_offsets_in_read: Vec<_> = offset_range.iter().map(|o| o.2).collect();\n\n        let offsets_in_read: Vec<_> = read\n            .blobs_at\n            .as_slice()\n            .iter()\n            .map(|(offset, _)| *offset)\n            .collect();\n\n        assert_eq!(expected_offsets_in_read, offsets_in_read);\n    }\n\n    #[test]\n    fn planner_chunked_coalesce_all_test() {\n        use crate::virtual_file;\n\n        const CHUNK_SIZE: u64 = virtual_file::get_io_buffer_alignment() as u64;\n\n        let max_read_size = CHUNK_SIZE as usize * 8;\n        let key = Key::MIN;\n        let lsn = Lsn(0);\n\n        let blob_descriptions = [\n            (key, lsn, CHUNK_SIZE / 8, BlobFlag::None), // Read 1 BEGIN\n            (key, lsn, CHUNK_SIZE / 4, BlobFlag::Ignore), // Gap\n            (key, lsn, CHUNK_SIZE / 2, BlobFlag::None),\n            (key, lsn, CHUNK_SIZE - 2, BlobFlag::Ignore), // Gap\n            (key, lsn, CHUNK_SIZE, BlobFlag::None),\n            (key, lsn, CHUNK_SIZE * 2 - 1, BlobFlag::None),\n            (key, lsn, CHUNK_SIZE * 2 + 1, BlobFlag::Ignore), // Gap\n            (key, lsn, CHUNK_SIZE * 3 + 1, BlobFlag::None),\n            (key, lsn, CHUNK_SIZE * 5 + 1, BlobFlag::None),\n            (key, lsn, CHUNK_SIZE * 6 + 1, BlobFlag::Ignore), // skipped chunk size, but not a chunk: should coalesce.\n            (key, lsn, CHUNK_SIZE * 7 + 1, BlobFlag::None),\n            (key, lsn, CHUNK_SIZE * 8, BlobFlag::None), // Read 2 BEGIN (b/c max_read_size)\n            (key, lsn, CHUNK_SIZE * 9, BlobFlag::Ignore), // ==== skipped a chunk\n            (key, lsn, CHUNK_SIZE * 10, BlobFlag::None), // Read 3 BEGIN (cannot coalesce)\n        ];\n\n        let ranges = [\n            &[\n                blob_descriptions[0],\n                blob_descriptions[2],\n                blob_descriptions[4],\n                blob_descriptions[5],\n                blob_descriptions[7],\n                blob_descriptions[8],\n                blob_descriptions[10],\n            ],\n            &blob_descriptions[11..12],\n            &blob_descriptions[13..],\n        ];\n\n        let mut planner = VectoredReadPlanner::new(max_read_size);\n        for (key, lsn, offset, flag) in blob_descriptions {\n            planner.handle(key, lsn, offset, flag);\n        }\n\n        planner.handle_range_end(652 * 1024);\n\n        let reads = planner.finish();\n\n        assert_eq!(reads.len(), ranges.len());\n\n        for (idx, read) in reads.iter().enumerate() {\n            validate_read(read, ranges[idx]);\n        }\n    }\n\n    #[test]\n    fn planner_max_read_size_test() {\n        let max_read_size = 128 * 1024;\n        let key = Key::MIN;\n        let lsn = Lsn(0);\n\n        let blob_descriptions = vec![\n            (key, lsn, 0, BlobFlag::None),\n            (key, lsn, 32 * 1024, BlobFlag::None),\n            (key, lsn, 96 * 1024, BlobFlag::None), // Last in read 1\n            (key, lsn, 128 * 1024, BlobFlag::None), // Last in read 2\n            (key, lsn, 198 * 1024, BlobFlag::None), // Last in read 3\n            (key, lsn, 268 * 1024, BlobFlag::None), // Last in read 4\n            (key, lsn, 396 * 1024, BlobFlag::None), // Last in read 5\n            (key, lsn, 652 * 1024, BlobFlag::None), // Last in read 6\n        ];\n\n        let ranges = [\n            &blob_descriptions[0..3],\n            &blob_descriptions[3..4],\n            &blob_descriptions[4..5],\n            &blob_descriptions[5..6],\n            &blob_descriptions[6..7],\n            &blob_descriptions[7..],\n        ];\n\n        let mut planner = VectoredReadPlanner::new(max_read_size);\n        for (key, lsn, offset, flag) in blob_descriptions.clone() {\n            planner.handle(key, lsn, offset, flag);\n        }\n\n        planner.handle_range_end(652 * 1024);\n\n        let reads = planner.finish();\n\n        assert_eq!(reads.len(), 6);\n\n        // TODO: could remove zero reads to produce 5 reads here\n\n        for (idx, read) in reads.iter().enumerate() {\n            validate_read(read, ranges[idx]);\n        }\n    }\n\n    #[test]\n    fn planner_replacement_test() {\n        const CHUNK_SIZE: u64 = virtual_file::get_io_buffer_alignment() as u64;\n        let max_read_size = 128 * CHUNK_SIZE as usize;\n        let first_key = Key::MIN;\n        let second_key = first_key.next();\n        let lsn = Lsn(0);\n\n        let blob_descriptions = vec![\n            (first_key, lsn, 0, BlobFlag::None),          // First in read 1\n            (first_key, lsn, CHUNK_SIZE, BlobFlag::None), // Last in read 1\n            (second_key, lsn, 2 * CHUNK_SIZE, BlobFlag::ReplaceAll),\n            (second_key, lsn, 3 * CHUNK_SIZE, BlobFlag::None),\n            (second_key, lsn, 4 * CHUNK_SIZE, BlobFlag::ReplaceAll), // First in read 2\n            (second_key, lsn, 5 * CHUNK_SIZE, BlobFlag::None),       // Last in read 2\n        ];\n\n        let ranges = [&blob_descriptions[0..2], &blob_descriptions[4..]];\n\n        let mut planner = VectoredReadPlanner::new(max_read_size);\n        for (key, lsn, offset, flag) in blob_descriptions.clone() {\n            planner.handle(key, lsn, offset, flag);\n        }\n\n        planner.handle_range_end(6 * CHUNK_SIZE);\n\n        let reads = planner.finish();\n        assert_eq!(reads.len(), 2);\n\n        for (idx, read) in reads.iter().enumerate() {\n            validate_read(read, ranges[idx]);\n        }\n    }\n\n    #[test]\n    fn streaming_planner_max_read_size_test() {\n        let max_read_size = 128 * 1024;\n        let key = Key::MIN;\n        let lsn = Lsn(0);\n\n        let blob_descriptions = vec![\n            (key, lsn, 0, BlobFlag::None),\n            (key, lsn, 32 * 1024, BlobFlag::None),\n            (key, lsn, 96 * 1024, BlobFlag::None),\n            (key, lsn, 128 * 1024, BlobFlag::None),\n            (key, lsn, 198 * 1024, BlobFlag::None),\n            (key, lsn, 268 * 1024, BlobFlag::None),\n            (key, lsn, 396 * 1024, BlobFlag::None),\n            (key, lsn, 652 * 1024, BlobFlag::None),\n        ];\n\n        let ranges = [\n            &blob_descriptions[0..3],\n            &blob_descriptions[3..5],\n            &blob_descriptions[5..6],\n            &blob_descriptions[6..7],\n            &blob_descriptions[7..],\n        ];\n\n        let mut planner = StreamingVectoredReadPlanner::new(max_read_size, 1000);\n        let mut reads = Vec::new();\n        for (key, lsn, offset, _) in blob_descriptions.clone() {\n            reads.extend(planner.handle(key, lsn, offset, false));\n        }\n        reads.extend(planner.handle_range_end(652 * 1024));\n\n        assert_eq!(reads.len(), ranges.len());\n\n        for (idx, read) in reads.iter().enumerate() {\n            validate_read(read, ranges[idx]);\n        }\n    }\n\n    #[test]\n    fn streaming_planner_max_cnt_test() {\n        let max_read_size = 1024 * 1024;\n        let key = Key::MIN;\n        let lsn = Lsn(0);\n\n        let blob_descriptions = vec![\n            (key, lsn, 0, BlobFlag::None),\n            (key, lsn, 32 * 1024, BlobFlag::None),\n            (key, lsn, 96 * 1024, BlobFlag::None),\n            (key, lsn, 128 * 1024, BlobFlag::None),\n            (key, lsn, 198 * 1024, BlobFlag::None),\n            (key, lsn, 268 * 1024, BlobFlag::None),\n            (key, lsn, 396 * 1024, BlobFlag::None),\n            (key, lsn, 652 * 1024, BlobFlag::None),\n        ];\n\n        let ranges = [\n            &blob_descriptions[0..2],\n            &blob_descriptions[2..4],\n            &blob_descriptions[4..6],\n            &blob_descriptions[6..],\n        ];\n\n        let mut planner = StreamingVectoredReadPlanner::new(max_read_size, 2);\n        let mut reads = Vec::new();\n        for (key, lsn, offset, _) in blob_descriptions.clone() {\n            reads.extend(planner.handle(key, lsn, offset, false));\n        }\n        reads.extend(planner.handle_range_end(652 * 1024));\n\n        assert_eq!(reads.len(), ranges.len());\n\n        for (idx, read) in reads.iter().enumerate() {\n            validate_read(read, ranges[idx]);\n        }\n    }\n\n    #[test]\n    fn streaming_planner_edge_test() {\n        let max_read_size = 1024 * 1024;\n        let key = Key::MIN;\n        let lsn = Lsn(0);\n        {\n            let mut planner = StreamingVectoredReadPlanner::new(max_read_size, 1);\n            let mut reads = Vec::new();\n            reads.extend(planner.handle_range_end(652 * 1024));\n            assert!(reads.is_empty());\n        }\n        {\n            let mut planner = StreamingVectoredReadPlanner::new(max_read_size, 1);\n            let mut reads = Vec::new();\n            reads.extend(planner.handle(key, lsn, 0, false));\n            reads.extend(planner.handle_range_end(652 * 1024));\n            assert_eq!(reads.len(), 1);\n            validate_read(&reads[0], &[(key, lsn, 0, BlobFlag::None)]);\n        }\n        {\n            let mut planner = StreamingVectoredReadPlanner::new(max_read_size, 1);\n            let mut reads = Vec::new();\n            reads.extend(planner.handle(key, lsn, 0, false));\n            reads.extend(planner.handle(key, lsn, 128 * 1024, false));\n            reads.extend(planner.handle_range_end(652 * 1024));\n            assert_eq!(reads.len(), 2);\n            validate_read(&reads[0], &[(key, lsn, 0, BlobFlag::None)]);\n            validate_read(&reads[1], &[(key, lsn, 128 * 1024, BlobFlag::None)]);\n        }\n        {\n            let mut planner = StreamingVectoredReadPlanner::new(max_read_size, 2);\n            let mut reads = Vec::new();\n            reads.extend(planner.handle(key, lsn, 0, false));\n            reads.extend(planner.handle(key, lsn, 128 * 1024, false));\n            reads.extend(planner.handle_range_end(652 * 1024));\n            assert_eq!(reads.len(), 1);\n            validate_read(\n                &reads[0],\n                &[\n                    (key, lsn, 0, BlobFlag::None),\n                    (key, lsn, 128 * 1024, BlobFlag::None),\n                ],\n            );\n        }\n    }\n\n    async fn round_trip_test_compressed(\n        blobs: &[Vec<u8>],\n        compression: bool,\n    ) -> anyhow::Result<()> {\n        let ctx =\n            RequestContext::new(TaskKind::UnitTest, DownloadBehavior::Error).with_scope_unit_test();\n        let (_temp_dir, pathbuf, offsets) =\n            write_maybe_compressed(blobs, compression, &ctx).await?;\n\n        let file = VirtualFile::open_v2(&pathbuf, &ctx).await?;\n        let file_len = std::fs::metadata(&pathbuf)?.len();\n\n        // Multiply by two (compressed data might need more space), and add a few bytes for the header\n        let reserved_bytes = blobs.iter().map(|bl| bl.len()).max().unwrap() * 2 + 16;\n        let mut buf = IoBufferMut::with_capacity(reserved_bytes);\n\n        let vectored_blob_reader = VectoredBlobReader::new(&file);\n        let meta = BlobMeta {\n            key: Key::MIN,\n            lsn: Lsn(0),\n            will_init: false,\n        };\n\n        for (idx, (blob, offset)) in blobs.iter().zip(offsets.iter()).enumerate() {\n            let end = offsets.get(idx + 1).unwrap_or(&file_len);\n            if idx + 1 == offsets.len() {\n                continue;\n            }\n            let read_builder = ChunkedVectoredReadBuilder::new(*offset, *end, meta, 16 * 4096);\n            let read = read_builder.build();\n            let result = vectored_blob_reader.read_blobs(&read, buf, &ctx).await?;\n            assert_eq!(result.blobs.len(), 1);\n            let read_blob = &result.blobs[0];\n            let view = BufView::new_slice(&result.buf);\n            let read_buf = read_blob.read(&view).await?;\n            assert_eq!(\n                &blob[..],\n                &read_buf[..],\n                \"mismatch for idx={idx} at offset={offset}\"\n            );\n\n            // Check that raw_with_header returns a valid header.\n            let raw = read_blob.raw_with_header(&view);\n            let header = Header::decode(&raw)?;\n            if !compression || header.header_len == 1 {\n                assert_eq!(header.compression_bits, BYTE_UNCOMPRESSED);\n            }\n            assert_eq!(raw.len(), header.total_len());\n\n            buf = result.buf;\n        }\n        Ok(())\n    }\n\n    #[tokio::test]\n    async fn test_really_big_array() -> anyhow::Result<()> {\n        let blobs = &[\n            b\"test\".to_vec(),\n            random_array(10 * PAGE_SZ),\n            b\"hello\".to_vec(),\n            random_array(66 * PAGE_SZ),\n            vec![0xf3; 24 * PAGE_SZ],\n            b\"foobar\".to_vec(),\n        ];\n        round_trip_test_compressed(blobs, false).await?;\n        round_trip_test_compressed(blobs, true).await?;\n        Ok(())\n    }\n\n    #[tokio::test]\n    async fn test_arrays_inc() -> anyhow::Result<()> {\n        let blobs = (0..PAGE_SZ / 8)\n            .map(|v| random_array(v * 16))\n            .collect::<Vec<_>>();\n        round_trip_test_compressed(&blobs, false).await?;\n        round_trip_test_compressed(&blobs, true).await?;\n        Ok(())\n    }\n}\n"
  },
  {
    "path": "pageserver/src/tenant.rs",
    "content": "//! Timeline repository implementation that keeps old data in layer files, and\n//! the recent changes in ephemeral files.\n//!\n//! See tenant/*_layer.rs files. The functions here are responsible for locating\n//! the correct layer for the get/put call, walking back the timeline branching\n//! history as needed.\n//!\n//! The files are stored in the .neon/tenants/<tenant_id>/timelines/<timeline_id>\n//! directory. See docs/pageserver-storage.md for how the files are managed.\n//! In addition to the layer files, there is a metadata file in the same\n//! directory that contains information about the timeline, in particular its\n//! parent timeline, and the last LSN that has been written to disk.\n//!\n\nuse std::collections::hash_map::Entry;\nuse std::collections::{BTreeMap, HashMap, HashSet};\nuse std::fmt::{Debug, Display};\nuse std::fs::File;\nuse std::future::Future;\nuse std::sync::atomic::{AtomicBool, AtomicU64, Ordering};\nuse std::sync::{Arc, Mutex, Weak};\nuse std::time::{Duration, Instant, SystemTime};\nuse std::{fmt, fs};\n\nuse anyhow::{Context, bail};\nuse arc_swap::ArcSwap;\nuse camino::{Utf8Path, Utf8PathBuf};\nuse chrono::NaiveDateTime;\nuse enumset::EnumSet;\nuse futures::StreamExt;\nuse futures::stream::FuturesUnordered;\nuse itertools::Itertools as _;\nuse once_cell::sync::Lazy;\npub use pageserver_api::models::TenantState;\nuse pageserver_api::models::{self, RelSizeMigration};\nuse pageserver_api::models::{\n    CompactInfoResponse, TimelineArchivalState, TimelineState, TopTenantShardItem,\n    WalRedoManagerStatus,\n};\nuse pageserver_api::shard::{ShardIdentity, ShardStripeSize, TenantShardId};\nuse postgres_ffi::PgMajorVersion;\nuse remote_storage::{DownloadError, GenericRemoteStorage, TimeoutOrCancel};\nuse remote_timeline_client::index::GcCompactionState;\nuse remote_timeline_client::manifest::{\n    LATEST_TENANT_MANIFEST_VERSION, OffloadedTimelineManifest, TenantManifest,\n};\nuse remote_timeline_client::{\n    FAILED_REMOTE_OP_RETRIES, FAILED_UPLOAD_WARN_THRESHOLD, UploadQueueNotReadyError,\n    download_tenant_manifest,\n};\nuse secondary::heatmap::{HeatMapTenant, HeatMapTimeline};\nuse storage_broker::BrokerClientChannel;\nuse timeline::compaction::{CompactionOutcome, GcCompactionQueue};\nuse timeline::import_pgdata::ImportingTimeline;\nuse timeline::layer_manager::LayerManagerLockHolder;\nuse timeline::offload::{OffloadError, offload_timeline};\nuse timeline::{\n    CompactFlags, CompactOptions, CompactionError, PreviousHeatmap, ShutdownMode, import_pgdata,\n};\nuse tokio::io::BufReader;\nuse tokio::sync::{Notify, Semaphore, watch};\nuse tokio::task::JoinSet;\nuse tokio_util::sync::CancellationToken;\nuse tracing::*;\nuse upload_queue::NotInitialized;\nuse utils::circuit_breaker::CircuitBreaker;\nuse utils::crashsafe::path_with_suffix_extension;\nuse utils::sync::gate::{Gate, GateGuard};\nuse utils::timeout::{TimeoutCancellableError, timeout_cancellable};\nuse utils::try_rcu::ArcSwapExt;\nuse utils::zstd::{create_zst_tarball, extract_zst_tarball};\nuse utils::{backoff, completion, failpoint_support, fs_ext, pausable_failpoint};\n\nuse self::config::{AttachedLocationConfig, AttachmentMode, LocationConf};\nuse self::metadata::TimelineMetadata;\nuse self::mgr::{GetActiveTenantError, GetTenantError};\nuse self::remote_timeline_client::upload::{upload_index_part, upload_tenant_manifest};\nuse self::remote_timeline_client::{RemoteTimelineClient, WaitCompletionError};\nuse self::timeline::uninit::{TimelineCreateGuard, TimelineExclusionError, UninitializedTimeline};\nuse self::timeline::{\n    EvictionTaskTenantState, GcCutoffs, TimelineDeleteProgress, TimelineResources, WaitLsnError,\n};\nuse crate::basebackup_cache::BasebackupCache;\nuse crate::config::PageServerConf;\nuse crate::context;\nuse crate::context::RequestContextBuilder;\nuse crate::context::{DownloadBehavior, RequestContext};\nuse crate::deletion_queue::{DeletionQueueClient, DeletionQueueError};\nuse crate::feature_resolver::{FeatureResolver, TenantFeatureResolver};\nuse crate::l0_flush::L0FlushGlobalState;\nuse crate::metrics::{\n    BROKEN_TENANTS_SET, CIRCUIT_BREAKERS_BROKEN, CIRCUIT_BREAKERS_UNBROKEN, CONCURRENT_INITDBS,\n    INITDB_RUN_TIME, INITDB_SEMAPHORE_ACQUISITION_TIME, TENANT, TENANT_OFFLOADED_TIMELINES,\n    TENANT_STATE_METRIC, TENANT_SYNTHETIC_SIZE_METRIC, TIMELINE_STATE_METRIC,\n    remove_tenant_metrics,\n};\nuse crate::task_mgr::TaskKind;\nuse crate::tenant::config::LocationMode;\nuse crate::tenant::gc_result::GcResult;\npub use crate::tenant::remote_timeline_client::index::IndexPart;\nuse crate::tenant::remote_timeline_client::{\n    INITDB_PATH, MaybeDeletedIndexPart, remote_initdb_archive_path,\n};\nuse crate::tenant::storage_layer::{DeltaLayer, ImageLayer};\nuse crate::tenant::timeline::delete::DeleteTimelineFlow;\nuse crate::tenant::timeline::uninit::cleanup_timeline_directory;\nuse crate::virtual_file::VirtualFile;\nuse crate::walingest::WalLagCooldown;\nuse crate::walredo::{PostgresRedoManager, RedoAttemptType};\nuse crate::{InitializationOrder, TEMP_FILE_SUFFIX, import_datadir, span, task_mgr, walredo};\n\nstatic INIT_DB_SEMAPHORE: Lazy<Semaphore> = Lazy::new(|| Semaphore::new(8));\nuse utils::crashsafe;\nuse utils::generation::Generation;\nuse utils::id::TimelineId;\nuse utils::lsn::{Lsn, RecordLsn};\n\npub mod blob_io;\npub mod block_io;\npub mod vectored_blob_io;\n\npub mod disk_btree;\npub(crate) mod ephemeral_file;\npub mod layer_map;\n\npub mod metadata;\npub mod remote_timeline_client;\npub mod storage_layer;\n\npub mod checks;\npub mod config;\npub mod mgr;\npub mod secondary;\npub mod tasks;\npub mod upload_queue;\n\npub(crate) mod timeline;\n\npub mod size;\n\nmod gc_block;\nmod gc_result;\npub(crate) mod throttle;\n\n#[cfg(test)]\npub mod debug;\n\npub(crate) use timeline::{LogicalSizeCalculationCause, PageReconstructError, Timeline};\n\npub(crate) use crate::span::debug_assert_current_span_has_tenant_and_timeline_id;\n// re-export for use in walreceiver\npub use crate::tenant::timeline::WalReceiverInfo;\n\n/// The \"tenants\" part of `tenants/<tenant>/timelines...`\npub const TENANTS_SEGMENT_NAME: &str = \"tenants\";\n\n/// Parts of the `.neon/tenants/<tenant_id>/timelines/<timeline_id>` directory prefix.\npub const TIMELINES_SEGMENT_NAME: &str = \"timelines\";\n\n/// References to shared objects that are passed into each tenant, such\n/// as the shared remote storage client and process initialization state.\n#[derive(Clone)]\npub struct TenantSharedResources {\n    pub broker_client: storage_broker::BrokerClientChannel,\n    pub remote_storage: GenericRemoteStorage,\n    pub deletion_queue_client: DeletionQueueClient,\n    pub l0_flush_global_state: L0FlushGlobalState,\n    pub basebackup_cache: Arc<BasebackupCache>,\n    pub feature_resolver: FeatureResolver,\n}\n\n/// A [`TenantShard`] is really an _attached_ tenant.  The configuration\n/// for an attached tenant is a subset of the [`LocationConf`], represented\n/// in this struct.\n#[derive(Clone)]\npub(super) struct AttachedTenantConf {\n    tenant_conf: pageserver_api::models::TenantConfig,\n    location: AttachedLocationConfig,\n    /// The deadline before which we are blocked from GC so that\n    /// leases have a chance to be renewed.\n    lsn_lease_deadline: Option<tokio::time::Instant>,\n}\n\nimpl AttachedTenantConf {\n    fn new(\n        conf: &'static PageServerConf,\n        tenant_conf: pageserver_api::models::TenantConfig,\n        location: AttachedLocationConfig,\n    ) -> Self {\n        // Sets a deadline before which we cannot proceed to GC due to lsn lease.\n        //\n        // We do this as the leases mapping are not persisted to disk. By delaying GC by lease\n        // length, we guarantee that all the leases we granted before will have a chance to renew\n        // when we run GC for the first time after restart / transition from AttachedMulti to AttachedSingle.\n        let lsn_lease_deadline = if location.attach_mode == AttachmentMode::Single {\n            Some(\n                tokio::time::Instant::now()\n                    + TenantShard::get_lsn_lease_length_impl(conf, &tenant_conf),\n            )\n        } else {\n            // We don't use `lsn_lease_deadline` to delay GC in AttachedMulti and AttachedStale\n            // because we don't do GC in these modes.\n            None\n        };\n\n        Self {\n            tenant_conf,\n            location,\n            lsn_lease_deadline,\n        }\n    }\n\n    fn try_from(\n        conf: &'static PageServerConf,\n        location_conf: LocationConf,\n    ) -> anyhow::Result<Self> {\n        match &location_conf.mode {\n            LocationMode::Attached(attach_conf) => {\n                Ok(Self::new(conf, location_conf.tenant_conf, *attach_conf))\n            }\n            LocationMode::Secondary(_) => {\n                anyhow::bail!(\n                    \"Attempted to construct AttachedTenantConf from a LocationConf in secondary mode\"\n                )\n            }\n        }\n    }\n\n    fn is_gc_blocked_by_lsn_lease_deadline(&self) -> bool {\n        self.lsn_lease_deadline\n            .map(|d| tokio::time::Instant::now() < d)\n            .unwrap_or(false)\n    }\n}\nstruct TimelinePreload {\n    timeline_id: TimelineId,\n    client: RemoteTimelineClient,\n    index_part: Result<MaybeDeletedIndexPart, DownloadError>,\n    previous_heatmap: Option<PreviousHeatmap>,\n}\n\npub(crate) struct TenantPreload {\n    /// The tenant manifest from remote storage, or None if no manifest was found.\n    tenant_manifest: Option<TenantManifest>,\n    /// Map from timeline ID to a possible timeline preload. It is None iff the timeline is offloaded according to the manifest.\n    timelines: HashMap<TimelineId, Option<TimelinePreload>>,\n}\n\n/// When we spawn a tenant, there is a special mode for tenant creation that\n/// avoids trying to read anything from remote storage.\npub(crate) enum SpawnMode {\n    /// Activate as soon as possible\n    Eager,\n    /// Lazy activation in the background, with the option to skip the queue if the need comes up\n    Lazy,\n}\n\n///\n/// Tenant consists of multiple timelines. Keep them in a hash table.\n///\npub struct TenantShard {\n    // Global pageserver config parameters\n    pub conf: &'static PageServerConf,\n\n    /// The value creation timestamp, used to measure activation delay, see:\n    /// <https://github.com/neondatabase/neon/issues/4025>\n    constructed_at: Instant,\n\n    state: watch::Sender<TenantState>,\n\n    // Overridden tenant-specific config parameters.\n    // We keep pageserver_api::models::TenantConfig sturct here to preserve the information\n    // about parameters that are not set.\n    // This is necessary to allow global config updates.\n    tenant_conf: Arc<ArcSwap<AttachedTenantConf>>,\n\n    tenant_shard_id: TenantShardId,\n\n    // The detailed sharding information, beyond the number/count in tenant_shard_id\n    shard_identity: ShardIdentity,\n\n    /// The remote storage generation, used to protect S3 objects from split-brain.\n    /// Does not change over the lifetime of the [`TenantShard`] object.\n    ///\n    /// This duplicates the generation stored in LocationConf, but that structure is mutable:\n    /// this copy enforces the invariant that generatio doesn't change during a Tenant's lifetime.\n    generation: Generation,\n\n    timelines: Mutex<HashMap<TimelineId, Arc<Timeline>>>,\n\n    /// During timeline creation, we first insert the TimelineId to the\n    /// creating map, then `timelines`, then remove it from the creating map.\n    /// **Lock order**: if acquiring all (or a subset), acquire them in order `timelines`, `timelines_offloaded`, `timelines_creating`\n    timelines_creating: std::sync::Mutex<HashSet<TimelineId>>,\n\n    /// Possibly offloaded and archived timelines\n    /// **Lock order**: if acquiring all (or a subset), acquire them in order `timelines`, `timelines_offloaded`, `timelines_creating`\n    timelines_offloaded: Mutex<HashMap<TimelineId, Arc<OffloadedTimeline>>>,\n\n    /// Tracks the timelines that are currently importing into this tenant shard.\n    ///\n    /// Note that importing timelines are also present in [`Self::timelines_creating`].\n    /// Keep this in mind when ordering lock acquisition.\n    ///\n    /// Lifetime:\n    /// * An imported timeline is created while scanning the bucket on tenant attach\n    ///   if the index part contains an `import_pgdata` entry and said field marks the import\n    ///   as in progress.\n    /// * Imported timelines are removed when the storage controller calls the post timeline\n    ///   import activation endpoint.\n    timelines_importing: std::sync::Mutex<HashMap<TimelineId, Arc<ImportingTimeline>>>,\n\n    /// The last tenant manifest known to be in remote storage. None if the manifest has not yet\n    /// been either downloaded or uploaded. Always Some after tenant attach.\n    ///\n    /// Initially populated during tenant attach, updated via `maybe_upload_tenant_manifest`.\n    ///\n    /// Do not modify this directly. It is used to check whether a new manifest needs to be\n    /// uploaded. The manifest is constructed in `build_tenant_manifest`, and uploaded via\n    /// `maybe_upload_tenant_manifest`.\n    remote_tenant_manifest: tokio::sync::Mutex<Option<TenantManifest>>,\n\n    // This mutex prevents creation of new timelines during GC.\n    // Adding yet another mutex (in addition to `timelines`) is needed because holding\n    // `timelines` mutex during all GC iteration\n    // may block for a long time `get_timeline`, `get_timelines_state`,... and other operations\n    // with timelines, which in turn may cause dropping replication connection, expiration of wait_for_lsn\n    // timeout...\n    gc_cs: tokio::sync::Mutex<()>,\n    walredo_mgr: Option<Arc<WalRedoManager>>,\n\n    /// Provides access to timeline data sitting in the remote storage.\n    pub(crate) remote_storage: GenericRemoteStorage,\n\n    /// Access to global deletion queue for when this tenant wants to schedule a deletion.\n    deletion_queue_client: DeletionQueueClient,\n\n    /// A channel to send async requests to prepare a basebackup for the basebackup cache.\n    basebackup_cache: Arc<BasebackupCache>,\n\n    /// Cached logical sizes updated updated on each [`TenantShard::gather_size_inputs`].\n    cached_logical_sizes: tokio::sync::Mutex<HashMap<(TimelineId, Lsn), u64>>,\n    cached_synthetic_tenant_size: Arc<AtomicU64>,\n\n    eviction_task_tenant_state: tokio::sync::Mutex<EvictionTaskTenantState>,\n\n    /// Track repeated failures to compact, so that we can back off.\n    /// Overhead of mutex is acceptable because compaction is done with a multi-second period.\n    compaction_circuit_breaker: std::sync::Mutex<CircuitBreaker>,\n\n    /// Signals the tenant compaction loop that there is L0 compaction work to be done.\n    pub(crate) l0_compaction_trigger: Arc<Notify>,\n\n    /// Scheduled gc-compaction tasks.\n    scheduled_compaction_tasks: std::sync::Mutex<HashMap<TimelineId, Arc<GcCompactionQueue>>>,\n\n    /// If the tenant is in Activating state, notify this to encourage it\n    /// to proceed to Active as soon as possible, rather than waiting for lazy\n    /// background warmup.\n    pub(crate) activate_now_sem: tokio::sync::Semaphore,\n\n    /// Time it took for the tenant to activate. Zero if not active yet.\n    attach_wal_lag_cooldown: Arc<std::sync::OnceLock<WalLagCooldown>>,\n\n    // Cancellation token fires when we have entered shutdown().  This is a parent of\n    // Timelines' cancellation token.\n    pub(crate) cancel: CancellationToken,\n\n    // Users of the TenantShard such as the page service must take this Gate to avoid\n    // trying to use a TenantShard which is shutting down.\n    pub(crate) gate: Gate,\n\n    /// Throttle applied at the top of [`Timeline::get`].\n    /// All [`TenantShard::timelines`] of a given [`TenantShard`] instance share the same [`throttle::Throttle`] instance.\n    pub(crate) pagestream_throttle: Arc<throttle::Throttle>,\n\n    pub(crate) pagestream_throttle_metrics: Arc<crate::metrics::tenant_throttling::Pagestream>,\n\n    /// An ongoing timeline detach concurrency limiter.\n    ///\n    /// As a tenant will likely be restarted as part of timeline detach ancestor it makes no sense\n    /// to have two running at the same time. A different one can be started if an earlier one\n    /// has failed for whatever reason.\n    ongoing_timeline_detach: std::sync::Mutex<Option<(TimelineId, utils::completion::Barrier)>>,\n\n    /// `index_part.json` based gc blocking reason tracking.\n    ///\n    /// New gc iterations must start a new iteration by acquiring `GcBlock::start` before\n    /// proceeding.\n    pub(crate) gc_block: gc_block::GcBlock,\n\n    l0_flush_global_state: L0FlushGlobalState,\n\n    pub(crate) feature_resolver: Arc<TenantFeatureResolver>,\n}\nimpl std::fmt::Debug for TenantShard {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        write!(f, \"{} ({})\", self.tenant_shard_id, self.current_state())\n    }\n}\n\npub(crate) enum WalRedoManager {\n    Prod(WalredoManagerId, PostgresRedoManager),\n    #[cfg(test)]\n    Test(harness::TestRedoManager),\n}\n\n#[derive(thiserror::Error, Debug)]\n#[error(\"pageserver is shutting down\")]\npub(crate) struct GlobalShutDown;\n\nimpl WalRedoManager {\n    pub(crate) fn new(mgr: PostgresRedoManager) -> Result<Arc<Self>, GlobalShutDown> {\n        let id = WalredoManagerId::next();\n        let arc = Arc::new(Self::Prod(id, mgr));\n        let mut guard = WALREDO_MANAGERS.lock().unwrap();\n        match &mut *guard {\n            Some(map) => {\n                map.insert(id, Arc::downgrade(&arc));\n                Ok(arc)\n            }\n            None => Err(GlobalShutDown),\n        }\n    }\n}\n\nimpl Drop for WalRedoManager {\n    fn drop(&mut self) {\n        match self {\n            Self::Prod(id, _) => {\n                let mut guard = WALREDO_MANAGERS.lock().unwrap();\n                if let Some(map) = &mut *guard {\n                    map.remove(id).expect(\"new() registers, drop() unregisters\");\n                }\n            }\n            #[cfg(test)]\n            Self::Test(_) => {\n                // Not applicable to test redo manager\n            }\n        }\n    }\n}\n\n/// Global registry of all walredo managers so that [`crate::shutdown_pageserver`] can shut down\n/// the walredo processes outside of the regular order.\n///\n/// This is necessary to work around a systemd bug where it freezes if there are\n/// walredo processes left => <https://github.com/neondatabase/cloud/issues/11387>\n#[allow(clippy::type_complexity)]\npub(crate) static WALREDO_MANAGERS: once_cell::sync::Lazy<\n    Mutex<Option<HashMap<WalredoManagerId, Weak<WalRedoManager>>>>,\n> = once_cell::sync::Lazy::new(|| Mutex::new(Some(HashMap::new())));\n#[derive(PartialEq, Eq, Hash, Clone, Copy, Debug)]\npub(crate) struct WalredoManagerId(u64);\nimpl WalredoManagerId {\n    pub fn next() -> Self {\n        static NEXT: std::sync::atomic::AtomicU64 = std::sync::atomic::AtomicU64::new(1);\n        let id = NEXT.fetch_add(1, std::sync::atomic::Ordering::Relaxed);\n        if id == 0 {\n            panic!(\n                \"WalredoManagerId::new() returned 0, indicating wraparound, risking it's no longer unique\"\n            );\n        }\n        Self(id)\n    }\n}\n\n#[cfg(test)]\nimpl From<harness::TestRedoManager> for WalRedoManager {\n    fn from(mgr: harness::TestRedoManager) -> Self {\n        Self::Test(mgr)\n    }\n}\n\nimpl WalRedoManager {\n    pub(crate) async fn shutdown(&self) -> bool {\n        match self {\n            Self::Prod(_, mgr) => mgr.shutdown().await,\n            #[cfg(test)]\n            Self::Test(_) => {\n                // Not applicable to test redo manager\n                true\n            }\n        }\n    }\n\n    pub(crate) fn maybe_quiesce(&self, idle_timeout: Duration) {\n        match self {\n            Self::Prod(_, mgr) => mgr.maybe_quiesce(idle_timeout),\n            #[cfg(test)]\n            Self::Test(_) => {\n                // Not applicable to test redo manager\n            }\n        }\n    }\n\n    /// # Cancel-Safety\n    ///\n    /// This method is cancellation-safe.\n    pub async fn request_redo(\n        &self,\n        key: pageserver_api::key::Key,\n        lsn: Lsn,\n        base_img: Option<(Lsn, bytes::Bytes)>,\n        records: Vec<(Lsn, wal_decoder::models::record::NeonWalRecord)>,\n        pg_version: PgMajorVersion,\n        redo_attempt_type: RedoAttemptType,\n    ) -> Result<bytes::Bytes, walredo::Error> {\n        match self {\n            Self::Prod(_, mgr) => {\n                mgr.request_redo(key, lsn, base_img, records, pg_version, redo_attempt_type)\n                    .await\n            }\n            #[cfg(test)]\n            Self::Test(mgr) => {\n                mgr.request_redo(key, lsn, base_img, records, pg_version, redo_attempt_type)\n                    .await\n            }\n        }\n    }\n\n    pub(crate) fn status(&self) -> Option<WalRedoManagerStatus> {\n        match self {\n            WalRedoManager::Prod(_, m) => Some(m.status()),\n            #[cfg(test)]\n            WalRedoManager::Test(_) => None,\n        }\n    }\n}\n\n/// A very lightweight memory representation of an offloaded timeline.\n///\n/// We need to store the list of offloaded timelines so that we can perform operations on them,\n/// like unoffloading them, or (at a later date), decide to perform flattening.\n/// This type has a much smaller memory impact than [`Timeline`], and thus we can store many\n/// more offloaded timelines than we can manage ones that aren't.\npub struct OffloadedTimeline {\n    pub tenant_shard_id: TenantShardId,\n    pub timeline_id: TimelineId,\n    pub ancestor_timeline_id: Option<TimelineId>,\n    /// Whether to retain the branch lsn at the ancestor or not\n    pub ancestor_retain_lsn: Option<Lsn>,\n\n    /// When the timeline was archived.\n    ///\n    /// Present for future flattening deliberations.\n    pub archived_at: NaiveDateTime,\n\n    /// Prevent two tasks from deleting the timeline at the same time. If held, the\n    /// timeline is being deleted. If 'true', the timeline has already been deleted.\n    pub delete_progress: TimelineDeleteProgress,\n\n    /// Part of the `OffloadedTimeline` object's lifecycle: this needs to be set before we drop it\n    pub deleted_from_ancestor: AtomicBool,\n\n    _metrics_guard: OffloadedTimelineMetricsGuard,\n}\n\n/// Increases the offloaded timeline count metric when created, and decreases when dropped.\nstruct OffloadedTimelineMetricsGuard;\n\nimpl OffloadedTimelineMetricsGuard {\n    fn new() -> Self {\n        TIMELINE_STATE_METRIC\n            .with_label_values(&[\"offloaded\"])\n            .inc();\n        Self\n    }\n}\n\nimpl Drop for OffloadedTimelineMetricsGuard {\n    fn drop(&mut self) {\n        TIMELINE_STATE_METRIC\n            .with_label_values(&[\"offloaded\"])\n            .dec();\n    }\n}\n\nimpl OffloadedTimeline {\n    /// Obtains an offloaded timeline from a given timeline object.\n    ///\n    /// Returns `None` if the `archived_at` flag couldn't be obtained, i.e.\n    /// the timeline is not in a stopped state.\n    /// Panics if the timeline is not archived.\n    fn from_timeline(timeline: &Timeline) -> Result<Self, UploadQueueNotReadyError> {\n        let (ancestor_retain_lsn, ancestor_timeline_id) =\n            if let Some(ancestor_timeline) = timeline.ancestor_timeline() {\n                let ancestor_lsn = timeline.get_ancestor_lsn();\n                let ancestor_timeline_id = ancestor_timeline.timeline_id;\n                let mut gc_info = ancestor_timeline.gc_info.write().unwrap();\n                gc_info.insert_child(timeline.timeline_id, ancestor_lsn, MaybeOffloaded::Yes);\n                (Some(ancestor_lsn), Some(ancestor_timeline_id))\n            } else {\n                (None, None)\n            };\n        let archived_at = timeline\n            .remote_client\n            .archived_at_stopped_queue()?\n            .expect(\"must be called on an archived timeline\");\n        Ok(Self {\n            tenant_shard_id: timeline.tenant_shard_id,\n            timeline_id: timeline.timeline_id,\n            ancestor_timeline_id,\n            ancestor_retain_lsn,\n            archived_at,\n\n            delete_progress: timeline.delete_progress.clone(),\n            deleted_from_ancestor: AtomicBool::new(false),\n\n            _metrics_guard: OffloadedTimelineMetricsGuard::new(),\n        })\n    }\n    fn from_manifest(tenant_shard_id: TenantShardId, manifest: &OffloadedTimelineManifest) -> Self {\n        // We expect to reach this case in tenant loading, where the `retain_lsn` is populated in the parent's `gc_info`\n        // by the `initialize_gc_info` function.\n        let OffloadedTimelineManifest {\n            timeline_id,\n            ancestor_timeline_id,\n            ancestor_retain_lsn,\n            archived_at,\n        } = *manifest;\n        Self {\n            tenant_shard_id,\n            timeline_id,\n            ancestor_timeline_id,\n            ancestor_retain_lsn,\n            archived_at,\n            delete_progress: TimelineDeleteProgress::default(),\n            deleted_from_ancestor: AtomicBool::new(false),\n            _metrics_guard: OffloadedTimelineMetricsGuard::new(),\n        }\n    }\n    fn manifest(&self) -> OffloadedTimelineManifest {\n        let Self {\n            timeline_id,\n            ancestor_timeline_id,\n            ancestor_retain_lsn,\n            archived_at,\n            ..\n        } = self;\n        OffloadedTimelineManifest {\n            timeline_id: *timeline_id,\n            ancestor_timeline_id: *ancestor_timeline_id,\n            ancestor_retain_lsn: *ancestor_retain_lsn,\n            archived_at: *archived_at,\n        }\n    }\n    /// Delete this timeline's retain_lsn from its ancestor, if present in the given tenant\n    fn delete_from_ancestor_with_timelines(\n        &self,\n        timelines: &std::sync::MutexGuard<'_, HashMap<TimelineId, Arc<Timeline>>>,\n    ) {\n        if let (Some(_retain_lsn), Some(ancestor_timeline_id)) =\n            (self.ancestor_retain_lsn, self.ancestor_timeline_id)\n        {\n            if let Some((_, ancestor_timeline)) = timelines\n                .iter()\n                .find(|(tid, _tl)| **tid == ancestor_timeline_id)\n            {\n                let removal_happened = ancestor_timeline\n                    .gc_info\n                    .write()\n                    .unwrap()\n                    .remove_child_offloaded(self.timeline_id);\n                if !removal_happened {\n                    tracing::error!(tenant_id = %self.tenant_shard_id.tenant_id, shard_id = %self.tenant_shard_id.shard_slug(), timeline_id = %self.timeline_id,\n                        \"Couldn't remove retain_lsn entry from offloaded timeline's parent: already removed\");\n                }\n            }\n        }\n        self.deleted_from_ancestor.store(true, Ordering::Release);\n    }\n    /// Call [`Self::delete_from_ancestor_with_timelines`] instead if possible.\n    ///\n    /// As the entire tenant is being dropped, don't bother deregistering the `retain_lsn` from the ancestor.\n    fn defuse_for_tenant_drop(&self) {\n        self.deleted_from_ancestor.store(true, Ordering::Release);\n    }\n}\n\nimpl fmt::Debug for OffloadedTimeline {\n    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {\n        write!(f, \"OffloadedTimeline<{}>\", self.timeline_id)\n    }\n}\n\nimpl Drop for OffloadedTimeline {\n    fn drop(&mut self) {\n        if !self.deleted_from_ancestor.load(Ordering::Acquire) {\n            tracing::warn!(\n                \"offloaded timeline {} was dropped without having cleaned it up at the ancestor\",\n                self.timeline_id\n            );\n        }\n    }\n}\n\n#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]\npub enum MaybeOffloaded {\n    Yes,\n    No,\n}\n\n#[derive(Clone, Debug)]\npub enum TimelineOrOffloaded {\n    Timeline(Arc<Timeline>),\n    Offloaded(Arc<OffloadedTimeline>),\n    Importing(Arc<ImportingTimeline>),\n}\n\nimpl TimelineOrOffloaded {\n    pub fn arc_ref(&self) -> TimelineOrOffloadedArcRef<'_> {\n        match self {\n            TimelineOrOffloaded::Timeline(timeline) => {\n                TimelineOrOffloadedArcRef::Timeline(timeline)\n            }\n            TimelineOrOffloaded::Offloaded(offloaded) => {\n                TimelineOrOffloadedArcRef::Offloaded(offloaded)\n            }\n            TimelineOrOffloaded::Importing(importing) => {\n                TimelineOrOffloadedArcRef::Importing(importing)\n            }\n        }\n    }\n    pub fn tenant_shard_id(&self) -> TenantShardId {\n        self.arc_ref().tenant_shard_id()\n    }\n    pub fn timeline_id(&self) -> TimelineId {\n        self.arc_ref().timeline_id()\n    }\n    pub fn delete_progress(&self) -> &Arc<tokio::sync::Mutex<DeleteTimelineFlow>> {\n        match self {\n            TimelineOrOffloaded::Timeline(timeline) => &timeline.delete_progress,\n            TimelineOrOffloaded::Offloaded(offloaded) => &offloaded.delete_progress,\n            TimelineOrOffloaded::Importing(importing) => &importing.delete_progress,\n        }\n    }\n    fn maybe_remote_client(&self) -> Option<Arc<RemoteTimelineClient>> {\n        match self {\n            TimelineOrOffloaded::Timeline(timeline) => Some(timeline.remote_client.clone()),\n            TimelineOrOffloaded::Offloaded(_offloaded) => None,\n            TimelineOrOffloaded::Importing(importing) => {\n                Some(importing.timeline.remote_client.clone())\n            }\n        }\n    }\n}\n\npub enum TimelineOrOffloadedArcRef<'a> {\n    Timeline(&'a Arc<Timeline>),\n    Offloaded(&'a Arc<OffloadedTimeline>),\n    Importing(&'a Arc<ImportingTimeline>),\n}\n\nimpl TimelineOrOffloadedArcRef<'_> {\n    pub fn tenant_shard_id(&self) -> TenantShardId {\n        match self {\n            TimelineOrOffloadedArcRef::Timeline(timeline) => timeline.tenant_shard_id,\n            TimelineOrOffloadedArcRef::Offloaded(offloaded) => offloaded.tenant_shard_id,\n            TimelineOrOffloadedArcRef::Importing(importing) => importing.timeline.tenant_shard_id,\n        }\n    }\n    pub fn timeline_id(&self) -> TimelineId {\n        match self {\n            TimelineOrOffloadedArcRef::Timeline(timeline) => timeline.timeline_id,\n            TimelineOrOffloadedArcRef::Offloaded(offloaded) => offloaded.timeline_id,\n            TimelineOrOffloadedArcRef::Importing(importing) => importing.timeline.timeline_id,\n        }\n    }\n}\n\nimpl<'a> From<&'a Arc<Timeline>> for TimelineOrOffloadedArcRef<'a> {\n    fn from(timeline: &'a Arc<Timeline>) -> Self {\n        Self::Timeline(timeline)\n    }\n}\n\nimpl<'a> From<&'a Arc<OffloadedTimeline>> for TimelineOrOffloadedArcRef<'a> {\n    fn from(timeline: &'a Arc<OffloadedTimeline>) -> Self {\n        Self::Offloaded(timeline)\n    }\n}\n\nimpl<'a> From<&'a Arc<ImportingTimeline>> for TimelineOrOffloadedArcRef<'a> {\n    fn from(timeline: &'a Arc<ImportingTimeline>) -> Self {\n        Self::Importing(timeline)\n    }\n}\n\n#[derive(Debug, thiserror::Error, PartialEq, Eq)]\npub enum GetTimelineError {\n    #[error(\"Timeline is shutting down\")]\n    ShuttingDown,\n    #[error(\"Timeline {tenant_id}/{timeline_id} is not active, state: {state:?}\")]\n    NotActive {\n        tenant_id: TenantShardId,\n        timeline_id: TimelineId,\n        state: TimelineState,\n    },\n    #[error(\"Timeline {tenant_id}/{timeline_id} was not found\")]\n    NotFound {\n        tenant_id: TenantShardId,\n        timeline_id: TimelineId,\n    },\n}\n\n#[derive(Debug, thiserror::Error)]\npub enum LoadLocalTimelineError {\n    #[error(\"FailedToLoad\")]\n    Load(#[source] anyhow::Error),\n    #[error(\"FailedToResumeDeletion\")]\n    ResumeDeletion(#[source] anyhow::Error),\n}\n\n#[derive(thiserror::Error)]\npub enum DeleteTimelineError {\n    #[error(\"NotFound\")]\n    NotFound,\n\n    #[error(\"HasChildren\")]\n    HasChildren(Vec<TimelineId>),\n\n    #[error(\"Timeline deletion is already in progress\")]\n    AlreadyInProgress(Arc<tokio::sync::Mutex<DeleteTimelineFlow>>),\n\n    #[error(\"Cancelled\")]\n    Cancelled,\n\n    #[error(transparent)]\n    Other(#[from] anyhow::Error),\n}\n\nimpl Debug for DeleteTimelineError {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        match self {\n            Self::NotFound => write!(f, \"NotFound\"),\n            Self::HasChildren(c) => f.debug_tuple(\"HasChildren\").field(c).finish(),\n            Self::AlreadyInProgress(_) => f.debug_tuple(\"AlreadyInProgress\").finish(),\n            Self::Cancelled => f.debug_tuple(\"Cancelled\").finish(),\n            Self::Other(e) => f.debug_tuple(\"Other\").field(e).finish(),\n        }\n    }\n}\n\n#[derive(thiserror::Error)]\npub enum TimelineArchivalError {\n    #[error(\"NotFound\")]\n    NotFound,\n\n    #[error(\"Timeout\")]\n    Timeout,\n\n    #[error(\"Cancelled\")]\n    Cancelled,\n\n    #[error(\"ancestor is archived: {}\", .0)]\n    HasArchivedParent(TimelineId),\n\n    #[error(\"HasUnarchivedChildren\")]\n    HasUnarchivedChildren(Vec<TimelineId>),\n\n    #[error(\"Timeline archival is already in progress\")]\n    AlreadyInProgress,\n\n    #[error(transparent)]\n    Other(anyhow::Error),\n}\n\n#[derive(thiserror::Error, Debug)]\npub(crate) enum TenantManifestError {\n    #[error(\"Remote storage error: {0}\")]\n    RemoteStorage(anyhow::Error),\n\n    #[error(\"Cancelled\")]\n    Cancelled,\n}\n\nimpl From<TenantManifestError> for TimelineArchivalError {\n    fn from(e: TenantManifestError) -> Self {\n        match e {\n            TenantManifestError::RemoteStorage(e) => Self::Other(e),\n            TenantManifestError::Cancelled => Self::Cancelled,\n        }\n    }\n}\n\nimpl Debug for TimelineArchivalError {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        match self {\n            Self::NotFound => write!(f, \"NotFound\"),\n            Self::Timeout => write!(f, \"Timeout\"),\n            Self::Cancelled => write!(f, \"Cancelled\"),\n            Self::HasArchivedParent(p) => f.debug_tuple(\"HasArchivedParent\").field(p).finish(),\n            Self::HasUnarchivedChildren(c) => {\n                f.debug_tuple(\"HasUnarchivedChildren\").field(c).finish()\n            }\n            Self::AlreadyInProgress => f.debug_tuple(\"AlreadyInProgress\").finish(),\n            Self::Other(e) => f.debug_tuple(\"Other\").field(e).finish(),\n        }\n    }\n}\n\npub enum SetStoppingError {\n    AlreadyStopping(completion::Barrier),\n    Broken,\n}\n\nimpl Debug for SetStoppingError {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        match self {\n            Self::AlreadyStopping(_) => f.debug_tuple(\"AlreadyStopping\").finish(),\n            Self::Broken => write!(f, \"Broken\"),\n        }\n    }\n}\n\n#[derive(thiserror::Error, Debug)]\npub(crate) enum FinalizeTimelineImportError {\n    #[error(\"Import task not done yet\")]\n    ImportTaskStillRunning,\n    #[error(\"Shutting down\")]\n    ShuttingDown,\n}\n\n/// Arguments to [`TenantShard::create_timeline`].\n///\n/// Not usable as an idempotency key for timeline creation because if [`CreateTimelineParamsBranch::ancestor_start_lsn`]\n/// is `None`, the result of the timeline create call is not deterministic.\n///\n/// See [`CreateTimelineIdempotency`] for an idempotency key.\n#[derive(Debug)]\npub(crate) enum CreateTimelineParams {\n    Bootstrap(CreateTimelineParamsBootstrap),\n    Branch(CreateTimelineParamsBranch),\n    ImportPgdata(CreateTimelineParamsImportPgdata),\n}\n\n#[derive(Debug)]\npub(crate) struct CreateTimelineParamsBootstrap {\n    pub(crate) new_timeline_id: TimelineId,\n    pub(crate) existing_initdb_timeline_id: Option<TimelineId>,\n    pub(crate) pg_version: PgMajorVersion,\n}\n\n/// NB: See comment on [`CreateTimelineIdempotency::Branch`] for why there's no `pg_version` here.\n#[derive(Debug)]\npub(crate) struct CreateTimelineParamsBranch {\n    pub(crate) new_timeline_id: TimelineId,\n    pub(crate) ancestor_timeline_id: TimelineId,\n    pub(crate) ancestor_start_lsn: Option<Lsn>,\n}\n\n#[derive(Debug)]\npub(crate) struct CreateTimelineParamsImportPgdata {\n    pub(crate) new_timeline_id: TimelineId,\n    pub(crate) location: import_pgdata::index_part_format::Location,\n    pub(crate) idempotency_key: import_pgdata::index_part_format::IdempotencyKey,\n}\n\n/// What is used to determine idempotency of a [`TenantShard::create_timeline`] call in  [`TenantShard::start_creating_timeline`] in  [`TenantShard::start_creating_timeline`].\n///\n/// Each [`Timeline`] object holds [`Self`] as an immutable property in [`Timeline::create_idempotency`].\n///\n/// We lower timeline creation requests to [`Self`], and then use [`PartialEq::eq`] to compare [`Timeline::create_idempotency`] with the request.\n/// If they are equal, we return a reference to the existing timeline, otherwise it's an idempotency conflict.\n///\n/// There is special treatment for [`Self::FailWithConflict`] to always return an idempotency conflict.\n/// It would be nice to have more advanced derive macros to make that special treatment declarative.\n///\n/// Notes:\n/// - Unlike [`CreateTimelineParams`], ancestor LSN is fixed, so, branching will be at a deterministic LSN.\n/// - We make some trade-offs though, e.g., [`CreateTimelineParamsBootstrap::existing_initdb_timeline_id`]\n///   is not considered for idempotency. We can improve on this over time if we deem it necessary.\n///\n#[derive(Debug, Clone, PartialEq, Eq)]\npub(crate) enum CreateTimelineIdempotency {\n    /// NB: special treatment, see comment in [`Self`].\n    FailWithConflict,\n    Bootstrap {\n        pg_version: PgMajorVersion,\n    },\n    /// NB: branches always have the same `pg_version` as their ancestor.\n    /// While [`pageserver_api::models::TimelineCreateRequestMode::Branch::pg_version`]\n    /// exists as a field, and is set by cplane, it has always been ignored by pageserver when\n    /// determining the child branch pg_version.\n    Branch {\n        ancestor_timeline_id: TimelineId,\n        ancestor_start_lsn: Lsn,\n    },\n    ImportPgdata(CreatingTimelineIdempotencyImportPgdata),\n}\n\n#[derive(Debug, Clone, PartialEq, Eq)]\npub(crate) struct CreatingTimelineIdempotencyImportPgdata {\n    idempotency_key: import_pgdata::index_part_format::IdempotencyKey,\n}\n\n/// What is returned by [`TenantShard::start_creating_timeline`].\n#[must_use]\nenum StartCreatingTimelineResult {\n    CreateGuard(TimelineCreateGuard),\n    Idempotent(Arc<Timeline>),\n}\n\n#[allow(clippy::large_enum_variant, reason = \"TODO\")]\nenum TimelineInitAndSyncResult {\n    ReadyToActivate,\n    NeedsSpawnImportPgdata(TimelineInitAndSyncNeedsSpawnImportPgdata),\n}\n\n#[must_use]\nstruct TimelineInitAndSyncNeedsSpawnImportPgdata {\n    timeline: Arc<Timeline>,\n    import_pgdata: import_pgdata::index_part_format::Root,\n    guard: TimelineCreateGuard,\n}\n\n/// What is returned by [`TenantShard::create_timeline`].\nenum CreateTimelineResult {\n    Created(Arc<Timeline>),\n    Idempotent(Arc<Timeline>),\n    /// IMPORTANT: This [`Arc<Timeline>`] object is not in [`TenantShard::timelines`] when\n    /// we return this result, nor will this concrete object ever be added there.\n    /// Cf method comment on [`TenantShard::create_timeline_import_pgdata`].\n    ImportSpawned(Arc<Timeline>),\n}\n\nimpl CreateTimelineResult {\n    fn discriminant(&self) -> &'static str {\n        match self {\n            Self::Created(_) => \"Created\",\n            Self::Idempotent(_) => \"Idempotent\",\n            Self::ImportSpawned(_) => \"ImportSpawned\",\n        }\n    }\n    fn timeline(&self) -> &Arc<Timeline> {\n        match self {\n            Self::Created(t) | Self::Idempotent(t) | Self::ImportSpawned(t) => t,\n        }\n    }\n    /// Unit test timelines aren't activated, test has to do it if it needs to.\n    #[cfg(test)]\n    fn into_timeline_for_test(self) -> Arc<Timeline> {\n        match self {\n            Self::Created(t) | Self::Idempotent(t) | Self::ImportSpawned(t) => t,\n        }\n    }\n}\n\n#[derive(thiserror::Error, Debug)]\npub enum CreateTimelineError {\n    #[error(\"creation of timeline with the given ID is in progress\")]\n    AlreadyCreating,\n    #[error(\"timeline already exists with different parameters\")]\n    Conflict,\n    #[error(transparent)]\n    AncestorLsn(anyhow::Error),\n    #[error(\"ancestor timeline is not active\")]\n    AncestorNotActive,\n    #[error(\"ancestor timeline is archived\")]\n    AncestorArchived,\n    #[error(\"tenant shutting down\")]\n    ShuttingDown,\n    #[error(transparent)]\n    Other(#[from] anyhow::Error),\n}\n\n#[derive(thiserror::Error, Debug)]\npub enum InitdbError {\n    #[error(\"Operation was cancelled\")]\n    Cancelled,\n    #[error(transparent)]\n    Other(anyhow::Error),\n    #[error(transparent)]\n    Inner(postgres_initdb::Error),\n}\n\nenum CreateTimelineCause {\n    Load,\n    Delete,\n}\n\n#[allow(clippy::large_enum_variant, reason = \"TODO\")]\nenum LoadTimelineCause {\n    Attach,\n    Unoffload,\n}\n\n#[derive(thiserror::Error, Debug)]\npub(crate) enum GcError {\n    // The tenant is shutting down\n    #[error(\"tenant shutting down\")]\n    TenantCancelled,\n\n    // The tenant is shutting down\n    #[error(\"timeline shutting down\")]\n    TimelineCancelled,\n\n    // The tenant is in a state inelegible to run GC\n    #[error(\"not active\")]\n    NotActive,\n\n    // A requested GC cutoff LSN was invalid, for example it tried to move backwards\n    #[error(\"not active\")]\n    BadLsn { why: String },\n\n    // A remote storage error while scheduling updates after compaction\n    #[error(transparent)]\n    Remote(anyhow::Error),\n\n    // An error reading while calculating GC cutoffs\n    #[error(transparent)]\n    GcCutoffs(PageReconstructError),\n\n    // If GC was invoked for a particular timeline, this error means it didn't exist\n    #[error(\"timeline not found\")]\n    TimelineNotFound,\n}\n\nimpl From<PageReconstructError> for GcError {\n    fn from(value: PageReconstructError) -> Self {\n        match value {\n            PageReconstructError::Cancelled => Self::TimelineCancelled,\n            other => Self::GcCutoffs(other),\n        }\n    }\n}\n\nimpl From<NotInitialized> for GcError {\n    fn from(value: NotInitialized) -> Self {\n        match value {\n            NotInitialized::Uninitialized => GcError::Remote(value.into()),\n            NotInitialized::Stopped | NotInitialized::ShuttingDown => GcError::TimelineCancelled,\n        }\n    }\n}\n\nimpl From<timeline::layer_manager::Shutdown> for GcError {\n    fn from(_: timeline::layer_manager::Shutdown) -> Self {\n        GcError::TimelineCancelled\n    }\n}\n\n#[derive(thiserror::Error, Debug)]\npub(crate) enum LoadConfigError {\n    #[error(\"TOML deserialization error: '{0}'\")]\n    DeserializeToml(#[from] toml_edit::de::Error),\n\n    #[error(\"Config not found at {0}\")]\n    NotFound(Utf8PathBuf),\n}\n\nimpl TenantShard {\n    /// Yet another helper for timeline initialization.\n    ///\n    /// - Initializes the Timeline struct and inserts it into the tenant's hash map\n    /// - Scans the local timeline directory for layer files and builds the layer map\n    /// - Downloads remote index file and adds remote files to the layer map\n    /// - Schedules remote upload tasks for any files that are present locally but missing from remote storage.\n    ///\n    /// If the operation fails, the timeline is left in the tenant's hash map in Broken state. On success,\n    /// it is marked as Active.\n    #[allow(clippy::too_many_arguments)]\n    async fn timeline_init_and_sync(\n        self: &Arc<Self>,\n        timeline_id: TimelineId,\n        resources: TimelineResources,\n        index_part: IndexPart,\n        metadata: TimelineMetadata,\n        previous_heatmap: Option<PreviousHeatmap>,\n        ancestor: Option<Arc<Timeline>>,\n        cause: LoadTimelineCause,\n        ctx: &RequestContext,\n    ) -> anyhow::Result<TimelineInitAndSyncResult> {\n        let tenant_id = self.tenant_shard_id;\n\n        let import_pgdata = index_part.import_pgdata.clone();\n        let idempotency = match &import_pgdata {\n            Some(import_pgdata) => {\n                CreateTimelineIdempotency::ImportPgdata(CreatingTimelineIdempotencyImportPgdata {\n                    idempotency_key: import_pgdata.idempotency_key().clone(),\n                })\n            }\n            None => {\n                if metadata.ancestor_timeline().is_none() {\n                    CreateTimelineIdempotency::Bootstrap {\n                        pg_version: metadata.pg_version(),\n                    }\n                } else {\n                    CreateTimelineIdempotency::Branch {\n                        ancestor_timeline_id: metadata.ancestor_timeline().unwrap(),\n                        ancestor_start_lsn: metadata.ancestor_lsn(),\n                    }\n                }\n            }\n        };\n\n        let (timeline, _timeline_ctx) = self.create_timeline_struct(\n            timeline_id,\n            &metadata,\n            previous_heatmap,\n            ancestor.clone(),\n            resources,\n            CreateTimelineCause::Load,\n            idempotency.clone(),\n            index_part.gc_compaction.clone(),\n            index_part.rel_size_migration.clone(),\n            index_part.rel_size_migrated_at,\n            ctx,\n        )?;\n        let disk_consistent_lsn = timeline.get_disk_consistent_lsn();\n\n        if !disk_consistent_lsn.is_valid() {\n            // As opposed to normal timelines which get initialised with a disk consitent LSN\n            // via initdb, imported timelines start from 0. If the import task stops before\n            // it advances disk consitent LSN, allow it to resume.\n            let in_progress_import = import_pgdata\n                .as_ref()\n                .map(|import| !import.is_done())\n                .unwrap_or(false);\n            if !in_progress_import {\n                anyhow::bail!(\"Timeline {tenant_id}/{timeline_id} has invalid disk_consistent_lsn\");\n            }\n        }\n\n        assert_eq!(\n            disk_consistent_lsn,\n            metadata.disk_consistent_lsn(),\n            \"these are used interchangeably\"\n        );\n\n        timeline.remote_client.init_upload_queue(&index_part)?;\n\n        timeline\n            .load_layer_map(disk_consistent_lsn, index_part)\n            .await\n            .with_context(|| {\n                format!(\"Failed to load layermap for timeline {tenant_id}/{timeline_id}\")\n            })?;\n\n        // When unarchiving, we've mostly likely lost the heatmap generated prior\n        // to the archival operation. To allow warming this timeline up, generate\n        // a previous heatmap which contains all visible layers in the layer map.\n        // This previous heatmap will be used whenever a fresh heatmap is generated\n        // for the timeline.\n        if self.conf.generate_unarchival_heatmap && matches!(cause, LoadTimelineCause::Unoffload) {\n            let mut tline_ending_at = Some((&timeline, timeline.get_last_record_lsn()));\n            while let Some((tline, end_lsn)) = tline_ending_at {\n                let unarchival_heatmap = tline.generate_unarchival_heatmap(end_lsn).await;\n                // Another unearchived timeline might have generated a heatmap for this ancestor.\n                // If the current branch point greater than the previous one use the the heatmap\n                // we just generated - it should include more layers.\n                if !tline.should_keep_previous_heatmap(end_lsn) {\n                    tline\n                        .previous_heatmap\n                        .store(Some(Arc::new(unarchival_heatmap)));\n                } else {\n                    tracing::info!(\"Previous heatmap preferred. Dropping unarchival heatmap.\")\n                }\n\n                match tline.ancestor_timeline() {\n                    Some(ancestor) => {\n                        if ancestor.update_layer_visibility().await.is_err() {\n                            // Ancestor timeline is shutting down.\n                            break;\n                        }\n\n                        tline_ending_at = Some((ancestor, tline.get_ancestor_lsn()));\n                    }\n                    None => {\n                        tline_ending_at = None;\n                    }\n                }\n            }\n        }\n\n        match import_pgdata {\n            Some(import_pgdata) if !import_pgdata.is_done() => {\n                let mut guard = self.timelines_creating.lock().unwrap();\n                if !guard.insert(timeline_id) {\n                    // We should never try and load the same timeline twice during startup\n                    unreachable!(\"Timeline {tenant_id}/{timeline_id} is already being created\")\n                }\n                let timeline_create_guard = TimelineCreateGuard {\n                    _tenant_gate_guard: self.gate.enter()?,\n                    owning_tenant: self.clone(),\n                    timeline_id,\n                    idempotency,\n                    // The users of this specific return value don't need the timline_path in there.\n                    timeline_path: timeline\n                        .conf\n                        .timeline_path(&timeline.tenant_shard_id, &timeline.timeline_id),\n                };\n                Ok(TimelineInitAndSyncResult::NeedsSpawnImportPgdata(\n                    TimelineInitAndSyncNeedsSpawnImportPgdata {\n                        timeline,\n                        import_pgdata,\n                        guard: timeline_create_guard,\n                    },\n                ))\n            }\n            Some(_) | None => {\n                {\n                    let mut timelines_accessor = self.timelines.lock().unwrap();\n                    match timelines_accessor.entry(timeline_id) {\n                        // We should never try and load the same timeline twice during startup\n                        Entry::Occupied(_) => {\n                            unreachable!(\n                                \"Timeline {tenant_id}/{timeline_id} already exists in the tenant map\"\n                            );\n                        }\n                        Entry::Vacant(v) => {\n                            v.insert(Arc::clone(&timeline));\n                            timeline.maybe_spawn_flush_loop();\n                        }\n                    }\n                }\n\n                if disk_consistent_lsn.is_valid() {\n                    // Sanity check: a timeline should have some content.\n                    // Exception: importing timelines might not yet have any\n                    anyhow::ensure!(\n                        ancestor.is_some()\n                            || timeline\n                                .layers\n                                .read(LayerManagerLockHolder::LoadLayerMap)\n                                .await\n                                .layer_map()\n                                .expect(\n                                    \"currently loading, layer manager cannot be shutdown already\"\n                                )\n                                .iter_historic_layers()\n                                .next()\n                                .is_some(),\n                        \"Timeline has no ancestor and no layer files\"\n                    );\n                }\n\n                Ok(TimelineInitAndSyncResult::ReadyToActivate)\n            }\n        }\n    }\n\n    /// Attach a tenant that's available in cloud storage.\n    ///\n    /// This returns quickly, after just creating the in-memory object\n    /// Tenant struct and launching a background task to download\n    /// the remote index files.  On return, the tenant is most likely still in\n    /// Attaching state, and it will become Active once the background task\n    /// finishes. You can use wait_until_active() to wait for the task to\n    /// complete.\n    ///\n    #[allow(clippy::too_many_arguments)]\n    pub(crate) fn spawn(\n        conf: &'static PageServerConf,\n        tenant_shard_id: TenantShardId,\n        resources: TenantSharedResources,\n        attached_conf: AttachedTenantConf,\n        shard_identity: ShardIdentity,\n        init_order: Option<InitializationOrder>,\n        mode: SpawnMode,\n        ctx: &RequestContext,\n    ) -> Result<Arc<TenantShard>, GlobalShutDown> {\n        let wal_redo_manager =\n            WalRedoManager::new(PostgresRedoManager::new(conf, tenant_shard_id))?;\n\n        let TenantSharedResources {\n            broker_client,\n            remote_storage,\n            deletion_queue_client,\n            l0_flush_global_state,\n            basebackup_cache,\n            feature_resolver,\n        } = resources;\n\n        let attach_mode = attached_conf.location.attach_mode;\n        let generation = attached_conf.location.generation;\n\n        let tenant = Arc::new(TenantShard::new(\n            TenantState::Attaching,\n            conf,\n            attached_conf,\n            shard_identity,\n            Some(wal_redo_manager),\n            tenant_shard_id,\n            remote_storage.clone(),\n            deletion_queue_client,\n            l0_flush_global_state,\n            basebackup_cache,\n            feature_resolver,\n        ));\n\n        // The attach task will carry a GateGuard, so that shutdown() reliably waits for it to drop out if\n        // we shut down while attaching.\n        let attach_gate_guard = tenant\n            .gate\n            .enter()\n            .expect(\"We just created the TenantShard: nothing else can have shut it down yet\");\n\n        // Do all the hard work in the background\n        let tenant_clone = Arc::clone(&tenant);\n        let ctx = ctx.detached_child(TaskKind::Attach, DownloadBehavior::Warn);\n        task_mgr::spawn(\n            &tokio::runtime::Handle::current(),\n            TaskKind::Attach,\n            tenant_shard_id,\n            None,\n            \"attach tenant\",\n            async move {\n\n                info!(\n                    ?attach_mode,\n                    \"Attaching tenant\"\n                );\n\n                let _gate_guard = attach_gate_guard;\n\n                // Is this tenant being spawned as part of process startup?\n                let starting_up = init_order.is_some();\n                scopeguard::defer! {\n                    if starting_up {\n                        TENANT.startup_complete.inc();\n                    }\n                }\n\n                fn make_broken_or_stopping(t: &TenantShard, err: anyhow::Error) {\n                    t.state.send_modify(|state| match state {\n                        // TODO: the old code alluded to DeleteTenantFlow sometimes setting\n                        // TenantState::Stopping before we get here, but this may be outdated.\n                        // Let's find out with a testing assertion. If this doesn't fire, and the\n                        // logs don't show this happening in production, remove the Stopping cases.\n                        TenantState::Stopping{..} if cfg!(any(test, feature = \"testing\")) => {\n                            panic!(\"unexpected TenantState::Stopping during attach\")\n                        }\n                        // If the tenant is cancelled, assume the error was caused by cancellation.\n                        TenantState::Attaching if t.cancel.is_cancelled() => {\n                            info!(\"attach cancelled, setting tenant state to Stopping: {err}\");\n                            // NB: progress None tells `set_stopping` that attach has cancelled.\n                            *state = TenantState::Stopping { progress: None };\n                        }\n                        // According to the old code, DeleteTenantFlow may already have set this to\n                        // Stopping. Retain its progress.\n                        // TODO: there is no DeleteTenantFlow. Is this still needed? See above.\n                        TenantState::Stopping { progress } if t.cancel.is_cancelled() => {\n                            assert!(progress.is_some(), \"concurrent attach cancellation\");\n                            info!(\"attach cancelled, already Stopping: {err}\");\n                        }\n                        // Mark the tenant as broken.\n                        TenantState::Attaching | TenantState::Stopping { .. } => {\n                            error!(\"attach failed, setting tenant state to Broken (was {state}): {err:?}\");\n                            *state = TenantState::broken_from_reason(err.to_string())\n                        }\n                        // The attach task owns the tenant state until activated.\n                        state => panic!(\"invalid tenant state {state} during attach: {err:?}\"),\n                    });\n                }\n\n                // TODO: should also be rejecting tenant conf changes that violate this check.\n                if let Err(e) = crate::tenant::storage_layer::inmemory_layer::IndexEntry::validate_checkpoint_distance(tenant_clone.get_checkpoint_distance()) {\n                    make_broken_or_stopping(&tenant_clone, anyhow::anyhow!(e));\n                    return Ok(());\n                }\n\n                let mut init_order = init_order;\n                // take the completion because initial tenant loading will complete when all of\n                // these tasks complete.\n                let _completion = init_order\n                    .as_mut()\n                    .and_then(|x| x.initial_tenant_load.take());\n                let remote_load_completion = init_order\n                    .as_mut()\n                    .and_then(|x| x.initial_tenant_load_remote.take());\n\n                enum AttachType<'a> {\n                    /// We are attaching this tenant lazily in the background.\n                    Warmup {\n                        _permit: tokio::sync::SemaphorePermit<'a>,\n                        during_startup: bool\n                    },\n                    /// We are attaching this tenant as soon as we can, because for example an\n                    /// endpoint tried to access it.\n                    OnDemand,\n                    /// During normal operations after startup, we are attaching a tenant, and\n                    /// eager attach was requested.\n                    Normal,\n                }\n\n                let attach_type = if matches!(mode, SpawnMode::Lazy) {\n                    // Before doing any I/O, wait for at least one of:\n                    // - A client attempting to access to this tenant (on-demand loading)\n                    // - A permit becoming available in the warmup semaphore (background warmup)\n\n                    tokio::select!(\n                        permit = tenant_clone.activate_now_sem.acquire() => {\n                            let _ = permit.expect(\"activate_now_sem is never closed\");\n                            tracing::info!(\"Activating tenant (on-demand)\");\n                            AttachType::OnDemand\n                        },\n                        permit = conf.concurrent_tenant_warmup.inner().acquire() => {\n                            let _permit = permit.expect(\"concurrent_tenant_warmup semaphore is never closed\");\n                            tracing::info!(\"Activating tenant (warmup)\");\n                            AttachType::Warmup {\n                                _permit,\n                                during_startup: init_order.is_some()\n                            }\n                        }\n                        _ = tenant_clone.cancel.cancelled() => {\n                            // This is safe, but should be pretty rare: it is interesting if a tenant\n                            // stayed in Activating for such a long time that shutdown found it in\n                            // that state.\n                            tracing::info!(state=%tenant_clone.current_state(), \"Tenant shut down before activation\");\n                            // Set the tenant to Stopping to signal `set_stopping` that we're done.\n                            make_broken_or_stopping(&tenant_clone, anyhow::anyhow!(\"Shut down while Attaching\"));\n                            return Ok(());\n                        },\n                    )\n                } else {\n                    // SpawnMode::{Create,Eager} always cause jumping ahead of the\n                    // concurrent_tenant_warmup queue\n                    AttachType::Normal\n                };\n\n                let preload = match &mode {\n                    SpawnMode::Eager | SpawnMode::Lazy => {\n                        let _preload_timer = TENANT.preload.start_timer();\n                        let res = tenant_clone\n                            .preload(&remote_storage, task_mgr::shutdown_token())\n                            .await;\n                        match res {\n                            Ok(p) => Some(p),\n                            Err(e) => {\n                                make_broken_or_stopping(&tenant_clone, anyhow::anyhow!(e));\n                                return Ok(());\n                            }\n                        }\n                    }\n\n                };\n\n                // Remote preload is complete.\n                drop(remote_load_completion);\n\n\n                // We will time the duration of the attach phase unless this is a creation (attach will do no work)\n                let attach_start = std::time::Instant::now();\n                let attached = {\n                    let _attach_timer = Some(TENANT.attach.start_timer());\n                    tenant_clone.attach(preload, &ctx).await\n                };\n                let attach_duration = attach_start.elapsed();\n                _ = tenant_clone.attach_wal_lag_cooldown.set(WalLagCooldown::new(attach_start, attach_duration));\n\n                match attached {\n                    Ok(()) => {\n                        info!(\"attach finished, activating\");\n                        tenant_clone.activate(broker_client, None, &ctx);\n                    }\n                    Err(e) => make_broken_or_stopping(&tenant_clone, anyhow::anyhow!(e)),\n                }\n\n                // If we are doing an opportunistic warmup attachment at startup, initialize\n                // logical size at the same time.  This is better than starting a bunch of idle tenants\n                // with cold caches and then coming back later to initialize their logical sizes.\n                //\n                // It also prevents the warmup proccess competing with the concurrency limit on\n                // logical size calculations: if logical size calculation semaphore is saturated,\n                // then warmup will wait for that before proceeding to the next tenant.\n                if matches!(attach_type, AttachType::Warmup { during_startup: true, .. }) {\n                    let mut futs: FuturesUnordered<_> = tenant_clone.timelines.lock().unwrap().values().cloned().map(|t| t.await_initial_logical_size()).collect();\n                    tracing::info!(\"Waiting for initial logical sizes while warming up...\");\n                    while futs.next().await.is_some() {}\n                    tracing::info!(\"Warm-up complete\");\n                }\n\n                Ok(())\n            }\n            .instrument(tracing::info_span!(parent: None, \"attach\", tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(), gen=?generation)),\n        );\n        Ok(tenant)\n    }\n\n    #[instrument(skip_all)]\n    pub(crate) async fn preload(\n        self: &Arc<Self>,\n        remote_storage: &GenericRemoteStorage,\n        cancel: CancellationToken,\n    ) -> anyhow::Result<TenantPreload> {\n        span::debug_assert_current_span_has_tenant_id();\n        // Get list of remote timelines\n        // download index files for every tenant timeline\n        info!(\"listing remote timelines\");\n        let (mut remote_timeline_ids, other_keys) = remote_timeline_client::list_remote_timelines(\n            remote_storage,\n            self.tenant_shard_id,\n            cancel.clone(),\n        )\n        .await?;\n\n        let tenant_manifest = match download_tenant_manifest(\n            remote_storage,\n            &self.tenant_shard_id,\n            self.generation,\n            &cancel,\n        )\n        .await\n        {\n            Ok((tenant_manifest, _, _)) => Some(tenant_manifest),\n            Err(DownloadError::NotFound) => None,\n            Err(err) => return Err(err.into()),\n        };\n\n        info!(\n            \"found {} timelines ({} offloaded timelines)\",\n            remote_timeline_ids.len(),\n            tenant_manifest\n                .as_ref()\n                .map(|m| m.offloaded_timelines.len())\n                .unwrap_or(0)\n        );\n\n        for k in other_keys {\n            warn!(\"Unexpected non timeline key {k}\");\n        }\n\n        // Avoid downloading IndexPart of offloaded timelines.\n        let mut offloaded_with_prefix = HashSet::new();\n        if let Some(tenant_manifest) = &tenant_manifest {\n            for offloaded in tenant_manifest.offloaded_timelines.iter() {\n                if remote_timeline_ids.remove(&offloaded.timeline_id) {\n                    offloaded_with_prefix.insert(offloaded.timeline_id);\n                } else {\n                    // We'll take care later of timelines in the manifest without a prefix\n                }\n            }\n        }\n\n        // TODO(vlad): Could go to S3 if the secondary is freezing cold and hasn't even\n        // pulled the first heatmap. Not entirely necessary since the storage controller\n        // will kick the secondary in any case and cause a download.\n        let maybe_heatmap_at = self.read_on_disk_heatmap().await;\n\n        let timelines = self\n            .load_timelines_metadata(\n                remote_timeline_ids,\n                remote_storage,\n                maybe_heatmap_at,\n                cancel,\n            )\n            .await?;\n\n        Ok(TenantPreload {\n            tenant_manifest,\n            timelines: timelines\n                .into_iter()\n                .map(|(id, tl)| (id, Some(tl)))\n                .chain(offloaded_with_prefix.into_iter().map(|id| (id, None)))\n                .collect(),\n        })\n    }\n\n    async fn read_on_disk_heatmap(&self) -> Option<(HeatMapTenant, std::time::Instant)> {\n        if !self.conf.load_previous_heatmap {\n            return None;\n        }\n\n        let on_disk_heatmap_path = self.conf.tenant_heatmap_path(&self.tenant_shard_id);\n        match tokio::fs::read_to_string(on_disk_heatmap_path).await {\n            Ok(heatmap) => match serde_json::from_str::<HeatMapTenant>(&heatmap) {\n                Ok(heatmap) => Some((heatmap, std::time::Instant::now())),\n                Err(err) => {\n                    error!(\"Failed to deserialize old heatmap: {err}\");\n                    None\n                }\n            },\n            Err(err) => match err.kind() {\n                std::io::ErrorKind::NotFound => None,\n                _ => {\n                    error!(\"Unexpected IO error reading old heatmap: {err}\");\n                    None\n                }\n            },\n        }\n    }\n\n    ///\n    /// Background task that downloads all data for a tenant and brings it to Active state.\n    ///\n    /// No background tasks are started as part of this routine.\n    ///\n    async fn attach(\n        self: &Arc<TenantShard>,\n        preload: Option<TenantPreload>,\n        ctx: &RequestContext,\n    ) -> anyhow::Result<()> {\n        span::debug_assert_current_span_has_tenant_id();\n\n        failpoint_support::sleep_millis_async!(\"before-attaching-tenant\");\n\n        let Some(preload) = preload else {\n            anyhow::bail!(\n                \"local-only deployment is no longer supported, https://github.com/neondatabase/neon/issues/5624\"\n            );\n        };\n\n        let mut offloaded_timeline_ids = HashSet::new();\n        let mut offloaded_timelines_list = Vec::new();\n        if let Some(tenant_manifest) = &preload.tenant_manifest {\n            for timeline_manifest in tenant_manifest.offloaded_timelines.iter() {\n                let timeline_id = timeline_manifest.timeline_id;\n                let offloaded_timeline =\n                    OffloadedTimeline::from_manifest(self.tenant_shard_id, timeline_manifest);\n                offloaded_timelines_list.push((timeline_id, Arc::new(offloaded_timeline)));\n                offloaded_timeline_ids.insert(timeline_id);\n            }\n        }\n        // Complete deletions for offloaded timeline id's from manifest.\n        // The manifest will be uploaded later in this function.\n        offloaded_timelines_list\n            .retain(|(offloaded_id, offloaded)| {\n                // Existence of a timeline is finally determined by the existence of an index-part.json in remote storage.\n                // If there is dangling references in another location, they need to be cleaned up.\n                let delete = !preload.timelines.contains_key(offloaded_id);\n                if delete {\n                    tracing::info!(\"Removing offloaded timeline {offloaded_id} from manifest as no remote prefix was found\");\n                    offloaded.defuse_for_tenant_drop();\n                }\n                !delete\n        });\n\n        let mut timelines_to_resume_deletions = vec![];\n\n        let mut remote_index_and_client = HashMap::new();\n        let mut timeline_ancestors = HashMap::new();\n        let mut existent_timelines = HashSet::new();\n        for (timeline_id, preload) in preload.timelines {\n            let Some(preload) = preload else { continue };\n            // This is an invariant of the `preload` function's API\n            assert!(!offloaded_timeline_ids.contains(&timeline_id));\n            let index_part = match preload.index_part {\n                Ok(i) => {\n                    debug!(\"remote index part exists for timeline {timeline_id}\");\n                    // We found index_part on the remote, this is the standard case.\n                    existent_timelines.insert(timeline_id);\n                    i\n                }\n                Err(DownloadError::NotFound) => {\n                    // There is no index_part on the remote. We only get here\n                    // if there is some prefix for the timeline in the remote storage.\n                    // This can e.g. be the initdb.tar.zst archive, maybe a\n                    // remnant from a prior incomplete creation or deletion attempt.\n                    // Delete the local directory as the deciding criterion for a\n                    // timeline's existence is presence of index_part.\n                    info!(%timeline_id, \"index_part not found on remote\");\n                    continue;\n                }\n                Err(DownloadError::Fatal(why)) => {\n                    // If, while loading one remote timeline, we saw an indication that our generation\n                    // number is likely invalid, then we should not load the whole tenant.\n                    error!(%timeline_id, \"Fatal error loading timeline: {why}\");\n                    anyhow::bail!(why.to_string());\n                }\n                Err(e) => {\n                    // Some (possibly ephemeral) error happened during index_part download.\n                    // Pretend the timeline exists to not delete the timeline directory,\n                    // as it might be a temporary issue and we don't want to re-download\n                    // everything after it resolves.\n                    warn!(%timeline_id, \"Failed to load index_part from remote storage, failed creation? ({e})\");\n\n                    existent_timelines.insert(timeline_id);\n                    continue;\n                }\n            };\n            match index_part {\n                MaybeDeletedIndexPart::IndexPart(index_part) => {\n                    timeline_ancestors.insert(timeline_id, index_part.metadata.clone());\n                    remote_index_and_client.insert(\n                        timeline_id,\n                        (index_part, preload.client, preload.previous_heatmap),\n                    );\n                }\n                MaybeDeletedIndexPart::Deleted(index_part) => {\n                    info!(\n                        \"timeline {} is deleted, picking to resume deletion\",\n                        timeline_id\n                    );\n                    timelines_to_resume_deletions.push((timeline_id, index_part, preload.client));\n                }\n            }\n        }\n\n        let mut gc_blocks = HashMap::new();\n\n        // For every timeline, download the metadata file, scan the local directory,\n        // and build a layer map that contains an entry for each remote and local\n        // layer file.\n        let sorted_timelines = tree_sort_timelines(timeline_ancestors, |m| m.ancestor_timeline())?;\n        for (timeline_id, remote_metadata) in sorted_timelines {\n            let (index_part, remote_client, previous_heatmap) = remote_index_and_client\n                .remove(&timeline_id)\n                .expect(\"just put it in above\");\n\n            if let Some(blocking) = index_part.gc_blocking.as_ref() {\n                // could just filter these away, but it helps while testing\n                anyhow::ensure!(\n                    !blocking.reasons.is_empty(),\n                    \"index_part for {timeline_id} is malformed: it should not have gc blocking with zero reasons\"\n                );\n                let prev = gc_blocks.insert(timeline_id, blocking.reasons);\n                assert!(prev.is_none());\n            }\n\n            // TODO again handle early failure\n            let effect = self\n                .load_remote_timeline(\n                    timeline_id,\n                    index_part,\n                    remote_metadata,\n                    previous_heatmap,\n                    self.get_timeline_resources_for(remote_client),\n                    LoadTimelineCause::Attach,\n                    ctx,\n                )\n                .await\n                .with_context(|| {\n                    format!(\n                        \"failed to load remote timeline {} for tenant {}\",\n                        timeline_id, self.tenant_shard_id\n                    )\n                })?;\n\n            match effect {\n                TimelineInitAndSyncResult::ReadyToActivate => {\n                    // activation happens later, on Tenant::activate\n                }\n                TimelineInitAndSyncResult::NeedsSpawnImportPgdata(\n                    TimelineInitAndSyncNeedsSpawnImportPgdata {\n                        timeline,\n                        import_pgdata,\n                        guard,\n                    },\n                ) => {\n                    let timeline_id = timeline.timeline_id;\n                    let import_task_gate = Gate::default();\n                    let import_task_guard = import_task_gate.enter().unwrap();\n                    let import_task_handle =\n                        tokio::task::spawn(self.clone().create_timeline_import_pgdata_task(\n                            timeline.clone(),\n                            import_pgdata,\n                            guard,\n                            import_task_guard,\n                            ctx.detached_child(TaskKind::ImportPgdata, DownloadBehavior::Warn),\n                        ));\n\n                    let prev = self.timelines_importing.lock().unwrap().insert(\n                        timeline_id,\n                        Arc::new(ImportingTimeline {\n                            timeline: timeline.clone(),\n                            import_task_handle,\n                            import_task_gate,\n                            delete_progress: TimelineDeleteProgress::default(),\n                        }),\n                    );\n\n                    assert!(prev.is_none());\n                }\n            }\n        }\n\n        // At this point we've initialized all timelines and are tracking them.\n        // Now compute the layer visibility for all (not offloaded) timelines.\n        let compute_visiblity_for = {\n            let timelines_accessor = self.timelines.lock().unwrap();\n            let mut timelines_offloaded_accessor = self.timelines_offloaded.lock().unwrap();\n\n            timelines_offloaded_accessor.extend(offloaded_timelines_list.into_iter());\n\n            // Before activation, populate each Timeline's GcInfo with information about its children\n            self.initialize_gc_info(&timelines_accessor, &timelines_offloaded_accessor, None);\n\n            timelines_accessor.values().cloned().collect::<Vec<_>>()\n        };\n\n        for tl in compute_visiblity_for {\n            tl.update_layer_visibility().await.with_context(|| {\n                format!(\n                    \"failed initial timeline visibility computation {} for tenant {}\",\n                    tl.timeline_id, self.tenant_shard_id\n                )\n            })?;\n        }\n\n        // Walk through deleted timelines, resume deletion\n        for (timeline_id, index_part, remote_timeline_client) in timelines_to_resume_deletions {\n            remote_timeline_client\n                .init_upload_queue_stopped_to_continue_deletion(&index_part)\n                .context(\"init queue stopped\")\n                .map_err(LoadLocalTimelineError::ResumeDeletion)?;\n\n            DeleteTimelineFlow::resume_deletion(\n                Arc::clone(self),\n                timeline_id,\n                &index_part.metadata,\n                remote_timeline_client,\n                ctx,\n            )\n            .instrument(tracing::info_span!(\"timeline_delete\", %timeline_id))\n            .await\n            .context(\"resume_deletion\")\n            .map_err(LoadLocalTimelineError::ResumeDeletion)?;\n        }\n\n        // Stash the preloaded tenant manifest, and upload a new manifest if changed.\n        //\n        // NB: this must happen after the tenant is fully populated above. In particular the\n        // offloaded timelines, which are included in the manifest.\n        {\n            let mut guard = self.remote_tenant_manifest.lock().await;\n            assert!(guard.is_none(), \"tenant manifest set before preload\"); // first populated here\n            *guard = preload.tenant_manifest;\n        }\n        self.maybe_upload_tenant_manifest().await?;\n\n        // The local filesystem contents are a cache of what's in the remote IndexPart;\n        // IndexPart is the source of truth.\n        self.clean_up_timelines(&existent_timelines)?;\n\n        self.gc_block.set_scanned(gc_blocks);\n\n        fail::fail_point!(\"attach-before-activate\", |_| {\n            anyhow::bail!(\"attach-before-activate\");\n        });\n        failpoint_support::sleep_millis_async!(\"attach-before-activate-sleep\", &self.cancel);\n\n        info!(\"Done\");\n\n        Ok(())\n    }\n\n    /// Check for any local timeline directories that are temporary, or do not correspond to a\n    /// timeline that still exists: this can happen if we crashed during a deletion/creation, or\n    /// if a timeline was deleted while the tenant was attached to a different pageserver.\n    fn clean_up_timelines(&self, existent_timelines: &HashSet<TimelineId>) -> anyhow::Result<()> {\n        let timelines_dir = self.conf.timelines_path(&self.tenant_shard_id);\n\n        let entries = match timelines_dir.read_dir_utf8() {\n            Ok(d) => d,\n            Err(e) => {\n                if e.kind() == std::io::ErrorKind::NotFound {\n                    return Ok(());\n                } else {\n                    return Err(e).context(\"list timelines directory for tenant\");\n                }\n            }\n        };\n\n        for entry in entries {\n            let entry = entry.context(\"read timeline dir entry\")?;\n            let entry_path = entry.path();\n\n            let purge = if crate::is_temporary(entry_path) {\n                true\n            } else {\n                match TimelineId::try_from(entry_path.file_name()) {\n                    Ok(i) => {\n                        // Purge if the timeline ID does not exist in remote storage: remote storage is the authority.\n                        !existent_timelines.contains(&i)\n                    }\n                    Err(e) => {\n                        tracing::warn!(\n                            \"Unparseable directory in timelines directory: {entry_path}, ignoring ({e})\"\n                        );\n                        // Do not purge junk: if we don't recognize it, be cautious and leave it for a human.\n                        false\n                    }\n                }\n            };\n\n            if purge {\n                tracing::info!(\"Purging stale timeline dentry {entry_path}\");\n                if let Err(e) = match entry.file_type() {\n                    Ok(t) => if t.is_dir() {\n                        std::fs::remove_dir_all(entry_path)\n                    } else {\n                        std::fs::remove_file(entry_path)\n                    }\n                    .or_else(fs_ext::ignore_not_found),\n                    Err(e) => Err(e),\n                } {\n                    tracing::warn!(\"Failed to purge stale timeline dentry {entry_path}: {e}\");\n                }\n            }\n        }\n\n        Ok(())\n    }\n\n    /// Get sum of all remote timelines sizes\n    ///\n    /// This function relies on the index_part instead of listing the remote storage\n    pub fn remote_size(&self) -> u64 {\n        let mut size = 0;\n\n        for timeline in self.list_timelines() {\n            size += timeline.remote_client.get_remote_physical_size();\n        }\n\n        size\n    }\n\n    #[instrument(skip_all, fields(timeline_id=%timeline_id))]\n    #[allow(clippy::too_many_arguments)]\n    async fn load_remote_timeline(\n        self: &Arc<Self>,\n        timeline_id: TimelineId,\n        index_part: IndexPart,\n        remote_metadata: TimelineMetadata,\n        previous_heatmap: Option<PreviousHeatmap>,\n        resources: TimelineResources,\n        cause: LoadTimelineCause,\n        ctx: &RequestContext,\n    ) -> anyhow::Result<TimelineInitAndSyncResult> {\n        span::debug_assert_current_span_has_tenant_id();\n\n        info!(\"downloading index file for timeline {}\", timeline_id);\n        tokio::fs::create_dir_all(self.conf.timeline_path(&self.tenant_shard_id, &timeline_id))\n            .await\n            .context(\"Failed to create new timeline directory\")?;\n\n        let ancestor = if let Some(ancestor_id) = remote_metadata.ancestor_timeline() {\n            let timelines = self.timelines.lock().unwrap();\n            Some(Arc::clone(timelines.get(&ancestor_id).ok_or_else(\n                || {\n                    anyhow::anyhow!(\n                        \"cannot find ancestor timeline {ancestor_id} for timeline {timeline_id}\"\n                    )\n                },\n            )?))\n        } else {\n            None\n        };\n\n        self.timeline_init_and_sync(\n            timeline_id,\n            resources,\n            index_part,\n            remote_metadata,\n            previous_heatmap,\n            ancestor,\n            cause,\n            ctx,\n        )\n        .await\n    }\n\n    async fn load_timelines_metadata(\n        self: &Arc<TenantShard>,\n        timeline_ids: HashSet<TimelineId>,\n        remote_storage: &GenericRemoteStorage,\n        heatmap: Option<(HeatMapTenant, std::time::Instant)>,\n        cancel: CancellationToken,\n    ) -> anyhow::Result<HashMap<TimelineId, TimelinePreload>> {\n        let mut timeline_heatmaps = heatmap.map(|h| (h.0.into_timelines_index(), h.1));\n\n        let mut part_downloads = JoinSet::new();\n        for timeline_id in timeline_ids {\n            let cancel_clone = cancel.clone();\n\n            let previous_timeline_heatmap = timeline_heatmaps.as_mut().and_then(|hs| {\n                hs.0.remove(&timeline_id).map(|h| PreviousHeatmap::Active {\n                    heatmap: h,\n                    read_at: hs.1,\n                    end_lsn: None,\n                })\n            });\n            part_downloads.spawn(\n                self.load_timeline_metadata(\n                    timeline_id,\n                    remote_storage.clone(),\n                    previous_timeline_heatmap,\n                    cancel_clone,\n                )\n                .instrument(info_span!(\"download_index_part\", %timeline_id)),\n            );\n        }\n\n        let mut timeline_preloads: HashMap<TimelineId, TimelinePreload> = HashMap::new();\n\n        loop {\n            tokio::select!(\n                next = part_downloads.join_next() => {\n                    match next {\n                        Some(result) => {\n                            let preload = result.context(\"join preload task\")?;\n                            timeline_preloads.insert(preload.timeline_id, preload);\n                        },\n                        None => {\n                            break;\n                        }\n                    }\n                },\n                _ = cancel.cancelled() => {\n                    anyhow::bail!(\"Cancelled while waiting for remote index download\")\n                }\n            )\n        }\n\n        Ok(timeline_preloads)\n    }\n\n    fn build_timeline_client(\n        &self,\n        timeline_id: TimelineId,\n        remote_storage: GenericRemoteStorage,\n    ) -> RemoteTimelineClient {\n        RemoteTimelineClient::new(\n            remote_storage.clone(),\n            self.deletion_queue_client.clone(),\n            self.conf,\n            self.tenant_shard_id,\n            timeline_id,\n            self.generation,\n            &self.tenant_conf.load().location,\n        )\n    }\n\n    fn load_timeline_metadata(\n        self: &Arc<TenantShard>,\n        timeline_id: TimelineId,\n        remote_storage: GenericRemoteStorage,\n        previous_heatmap: Option<PreviousHeatmap>,\n        cancel: CancellationToken,\n    ) -> impl Future<Output = TimelinePreload> + use<> {\n        let client = self.build_timeline_client(timeline_id, remote_storage);\n        async move {\n            debug_assert_current_span_has_tenant_and_timeline_id();\n            debug!(\"starting index part download\");\n\n            let index_part = client.download_index_file(&cancel).await;\n\n            debug!(\"finished index part download\");\n\n            TimelinePreload {\n                client,\n                timeline_id,\n                index_part,\n                previous_heatmap,\n            }\n        }\n    }\n\n    fn check_to_be_archived_has_no_unarchived_children(\n        timeline_id: TimelineId,\n        timelines: &std::sync::MutexGuard<'_, HashMap<TimelineId, Arc<Timeline>>>,\n    ) -> Result<(), TimelineArchivalError> {\n        let children: Vec<TimelineId> = timelines\n            .iter()\n            .filter_map(|(id, entry)| {\n                if entry.get_ancestor_timeline_id() != Some(timeline_id) {\n                    return None;\n                }\n                if entry.is_archived() == Some(true) {\n                    return None;\n                }\n                Some(*id)\n            })\n            .collect();\n\n        if !children.is_empty() {\n            return Err(TimelineArchivalError::HasUnarchivedChildren(children));\n        }\n        Ok(())\n    }\n\n    fn check_ancestor_of_to_be_unarchived_is_not_archived(\n        ancestor_timeline_id: TimelineId,\n        timelines: &std::sync::MutexGuard<'_, HashMap<TimelineId, Arc<Timeline>>>,\n        offloaded_timelines: &std::sync::MutexGuard<\n            '_,\n            HashMap<TimelineId, Arc<OffloadedTimeline>>,\n        >,\n    ) -> Result<(), TimelineArchivalError> {\n        let has_archived_parent =\n            if let Some(ancestor_timeline) = timelines.get(&ancestor_timeline_id) {\n                ancestor_timeline.is_archived() == Some(true)\n            } else if offloaded_timelines.contains_key(&ancestor_timeline_id) {\n                true\n            } else {\n                error!(\"ancestor timeline {ancestor_timeline_id} not found\");\n                if cfg!(debug_assertions) {\n                    panic!(\"ancestor timeline {ancestor_timeline_id} not found\");\n                }\n                return Err(TimelineArchivalError::NotFound);\n            };\n        if has_archived_parent {\n            return Err(TimelineArchivalError::HasArchivedParent(\n                ancestor_timeline_id,\n            ));\n        }\n        Ok(())\n    }\n\n    fn check_to_be_unarchived_timeline_has_no_archived_parent(\n        timeline: &Arc<Timeline>,\n    ) -> Result<(), TimelineArchivalError> {\n        if let Some(ancestor_timeline) = timeline.ancestor_timeline() {\n            if ancestor_timeline.is_archived() == Some(true) {\n                return Err(TimelineArchivalError::HasArchivedParent(\n                    ancestor_timeline.timeline_id,\n                ));\n            }\n        }\n        Ok(())\n    }\n\n    /// Loads the specified (offloaded) timeline from S3 and attaches it as a loaded timeline\n    ///\n    /// Counterpart to [`offload_timeline`].\n    async fn unoffload_timeline(\n        self: &Arc<Self>,\n        timeline_id: TimelineId,\n        broker_client: storage_broker::BrokerClientChannel,\n        ctx: RequestContext,\n    ) -> Result<Arc<Timeline>, TimelineArchivalError> {\n        info!(\"unoffloading timeline\");\n\n        // We activate the timeline below manually, so this must be called on an active tenant.\n        // We expect callers of this function to ensure this.\n        match self.current_state() {\n            TenantState::Activating { .. }\n            | TenantState::Attaching\n            | TenantState::Broken { .. } => {\n                panic!(\"Timeline expected to be active\")\n            }\n            TenantState::Stopping { .. } => return Err(TimelineArchivalError::Cancelled),\n            TenantState::Active => {}\n        }\n        let cancel = self.cancel.clone();\n\n        // Protect against concurrent attempts to use this TimelineId\n        // We don't care much about idempotency, as it's ensured a layer above.\n        let allow_offloaded = true;\n        let _create_guard = self\n            .create_timeline_create_guard(\n                timeline_id,\n                CreateTimelineIdempotency::FailWithConflict,\n                allow_offloaded,\n            )\n            .map_err(|err| match err {\n                TimelineExclusionError::AlreadyCreating => TimelineArchivalError::AlreadyInProgress,\n                TimelineExclusionError::AlreadyExists { .. } => {\n                    TimelineArchivalError::Other(anyhow::anyhow!(\"Timeline already exists\"))\n                }\n                TimelineExclusionError::Other(e) => TimelineArchivalError::Other(e),\n                TimelineExclusionError::ShuttingDown => TimelineArchivalError::Cancelled,\n            })?;\n\n        let timeline_preload = self\n            .load_timeline_metadata(\n                timeline_id,\n                self.remote_storage.clone(),\n                None,\n                cancel.clone(),\n            )\n            .await;\n\n        let index_part = match timeline_preload.index_part {\n            Ok(index_part) => {\n                debug!(\"remote index part exists for timeline {timeline_id}\");\n                index_part\n            }\n            Err(DownloadError::NotFound) => {\n                error!(%timeline_id, \"index_part not found on remote\");\n                return Err(TimelineArchivalError::NotFound);\n            }\n            Err(DownloadError::Cancelled) => return Err(TimelineArchivalError::Cancelled),\n            Err(e) => {\n                // Some (possibly ephemeral) error happened during index_part download.\n                warn!(%timeline_id, \"Failed to load index_part from remote storage, failed creation? ({e})\");\n                return Err(TimelineArchivalError::Other(\n                    anyhow::Error::new(e).context(\"downloading index_part from remote storage\"),\n                ));\n            }\n        };\n        let index_part = match index_part {\n            MaybeDeletedIndexPart::IndexPart(index_part) => index_part,\n            MaybeDeletedIndexPart::Deleted(_index_part) => {\n                info!(\"timeline is deleted according to index_part.json\");\n                return Err(TimelineArchivalError::NotFound);\n            }\n        };\n        let remote_metadata = index_part.metadata.clone();\n        let timeline_resources = self.build_timeline_resources(timeline_id);\n        self.load_remote_timeline(\n            timeline_id,\n            index_part,\n            remote_metadata,\n            None,\n            timeline_resources,\n            LoadTimelineCause::Unoffload,\n            &ctx,\n        )\n        .await\n        .with_context(|| {\n            format!(\n                \"failed to load remote timeline {} for tenant {}\",\n                timeline_id, self.tenant_shard_id\n            )\n        })\n        .map_err(TimelineArchivalError::Other)?;\n\n        let timeline = {\n            let timelines = self.timelines.lock().unwrap();\n            let Some(timeline) = timelines.get(&timeline_id) else {\n                warn!(\"timeline not available directly after attach\");\n                // This is not a panic because no locks are held between `load_remote_timeline`\n                // which puts the timeline into timelines, and our look into the timeline map.\n                return Err(TimelineArchivalError::Other(anyhow::anyhow!(\n                    \"timeline not available directly after attach\"\n                )));\n            };\n            let mut offloaded_timelines = self.timelines_offloaded.lock().unwrap();\n            match offloaded_timelines.remove(&timeline_id) {\n                Some(offloaded) => {\n                    offloaded.delete_from_ancestor_with_timelines(&timelines);\n                }\n                None => warn!(\"timeline already removed from offloaded timelines\"),\n            }\n\n            self.initialize_gc_info(&timelines, &offloaded_timelines, Some(timeline_id));\n\n            Arc::clone(timeline)\n        };\n\n        // Upload new list of offloaded timelines to S3\n        self.maybe_upload_tenant_manifest().await?;\n\n        // Activate the timeline (if it makes sense)\n        if !(timeline.is_broken() || timeline.is_stopping()) {\n            let background_jobs_can_start = None;\n            timeline.activate(\n                self.clone(),\n                broker_client.clone(),\n                background_jobs_can_start,\n                &ctx.with_scope_timeline(&timeline),\n            );\n        }\n\n        info!(\"timeline unoffloading complete\");\n        Ok(timeline)\n    }\n\n    pub(crate) async fn apply_timeline_archival_config(\n        self: &Arc<Self>,\n        timeline_id: TimelineId,\n        new_state: TimelineArchivalState,\n        broker_client: storage_broker::BrokerClientChannel,\n        ctx: RequestContext,\n    ) -> Result<(), TimelineArchivalError> {\n        info!(\"setting timeline archival config\");\n        // First part: figure out what is needed to do, and do validation\n        let timeline_or_unarchive_offloaded = 'outer: {\n            let timelines = self.timelines.lock().unwrap();\n\n            let Some(timeline) = timelines.get(&timeline_id) else {\n                let offloaded_timelines = self.timelines_offloaded.lock().unwrap();\n                let Some(offloaded) = offloaded_timelines.get(&timeline_id) else {\n                    return Err(TimelineArchivalError::NotFound);\n                };\n                if new_state == TimelineArchivalState::Archived {\n                    // It's offloaded already, so nothing to do\n                    return Ok(());\n                }\n                if let Some(ancestor_timeline_id) = offloaded.ancestor_timeline_id {\n                    Self::check_ancestor_of_to_be_unarchived_is_not_archived(\n                        ancestor_timeline_id,\n                        &timelines,\n                        &offloaded_timelines,\n                    )?;\n                }\n                break 'outer None;\n            };\n\n            // Do some validation. We release the timelines lock below, so there is potential\n            // for race conditions: these checks are more present to prevent misunderstandings of\n            // the API's capabilities, instead of serving as the sole way to defend their invariants.\n            match new_state {\n                TimelineArchivalState::Unarchived => {\n                    Self::check_to_be_unarchived_timeline_has_no_archived_parent(timeline)?\n                }\n                TimelineArchivalState::Archived => {\n                    Self::check_to_be_archived_has_no_unarchived_children(timeline_id, &timelines)?\n                }\n            }\n            Some(Arc::clone(timeline))\n        };\n\n        // Second part: unoffload timeline (if needed)\n        let timeline = if let Some(timeline) = timeline_or_unarchive_offloaded {\n            timeline\n        } else {\n            // Turn offloaded timeline into a non-offloaded one\n            self.unoffload_timeline(timeline_id, broker_client, ctx)\n                .await?\n        };\n\n        // Third part: upload new timeline archival state and block until it is present in S3\n        let upload_needed = match timeline\n            .remote_client\n            .schedule_index_upload_for_timeline_archival_state(new_state)\n        {\n            Ok(upload_needed) => upload_needed,\n            Err(e) => {\n                if timeline.cancel.is_cancelled() {\n                    return Err(TimelineArchivalError::Cancelled);\n                } else {\n                    return Err(TimelineArchivalError::Other(e));\n                }\n            }\n        };\n\n        if upload_needed {\n            info!(\"Uploading new state\");\n            const MAX_WAIT: Duration = Duration::from_secs(10);\n            let Ok(v) =\n                tokio::time::timeout(MAX_WAIT, timeline.remote_client.wait_completion()).await\n            else {\n                tracing::warn!(\"reached timeout for waiting on upload queue\");\n                return Err(TimelineArchivalError::Timeout);\n            };\n            v.map_err(|e| match e {\n                WaitCompletionError::NotInitialized(e) => {\n                    TimelineArchivalError::Other(anyhow::anyhow!(e))\n                }\n                WaitCompletionError::UploadQueueShutDownOrStopped => {\n                    TimelineArchivalError::Cancelled\n                }\n            })?;\n        }\n        Ok(())\n    }\n\n    pub fn get_offloaded_timeline(\n        &self,\n        timeline_id: TimelineId,\n    ) -> Result<Arc<OffloadedTimeline>, GetTimelineError> {\n        self.timelines_offloaded\n            .lock()\n            .unwrap()\n            .get(&timeline_id)\n            .map(Arc::clone)\n            .ok_or(GetTimelineError::NotFound {\n                tenant_id: self.tenant_shard_id,\n                timeline_id,\n            })\n    }\n\n    pub(crate) fn tenant_shard_id(&self) -> TenantShardId {\n        self.tenant_shard_id\n    }\n\n    /// Get Timeline handle for given Neon timeline ID.\n    /// This function is idempotent. It doesn't change internal state in any way.\n    pub fn get_timeline(\n        &self,\n        timeline_id: TimelineId,\n        active_only: bool,\n    ) -> Result<Arc<Timeline>, GetTimelineError> {\n        let timelines_accessor = self.timelines.lock().unwrap();\n        let timeline = timelines_accessor\n            .get(&timeline_id)\n            .ok_or(GetTimelineError::NotFound {\n                tenant_id: self.tenant_shard_id,\n                timeline_id,\n            })?;\n\n        if active_only && !timeline.is_active() {\n            Err(GetTimelineError::NotActive {\n                tenant_id: self.tenant_shard_id,\n                timeline_id,\n                state: timeline.current_state(),\n            })\n        } else {\n            Ok(Arc::clone(timeline))\n        }\n    }\n\n    /// Lists timelines the tenant contains.\n    /// It's up to callers to omit certain timelines that are not considered ready for use.\n    pub fn list_timelines(&self) -> Vec<Arc<Timeline>> {\n        self.timelines\n            .lock()\n            .unwrap()\n            .values()\n            .map(Arc::clone)\n            .collect()\n    }\n\n    /// Lists timelines the tenant contains.\n    /// It's up to callers to omit certain timelines that are not considered ready for use.\n    pub fn list_importing_timelines(&self) -> Vec<Arc<ImportingTimeline>> {\n        self.timelines_importing\n            .lock()\n            .unwrap()\n            .values()\n            .map(Arc::clone)\n            .collect()\n    }\n\n    /// Lists timelines the tenant manages, including offloaded ones.\n    ///\n    /// It's up to callers to omit certain timelines that are not considered ready for use.\n    pub fn list_timelines_and_offloaded(\n        &self,\n    ) -> (Vec<Arc<Timeline>>, Vec<Arc<OffloadedTimeline>>) {\n        let timelines = self\n            .timelines\n            .lock()\n            .unwrap()\n            .values()\n            .map(Arc::clone)\n            .collect();\n        let offloaded = self\n            .timelines_offloaded\n            .lock()\n            .unwrap()\n            .values()\n            .map(Arc::clone)\n            .collect();\n        (timelines, offloaded)\n    }\n\n    pub fn list_timeline_ids(&self) -> Vec<TimelineId> {\n        self.timelines.lock().unwrap().keys().cloned().collect()\n    }\n\n    /// This is used by tests & import-from-basebackup.\n    ///\n    /// The returned [`UninitializedTimeline`] contains no data nor metadata and it is in\n    /// a state that will fail [`TenantShard::load_remote_timeline`] because `disk_consistent_lsn=Lsn(0)`.\n    ///\n    /// The caller is responsible for getting the timeline into a state that will be accepted\n    /// by [`TenantShard::load_remote_timeline`] / [`TenantShard::attach`].\n    /// Then they may call [`UninitializedTimeline::finish_creation`] to add the timeline\n    /// to the [`TenantShard::timelines`].\n    ///\n    /// Tests should use `TenantShard::create_test_timeline` to set up the minimum required metadata keys.\n    pub(crate) async fn create_empty_timeline(\n        self: &Arc<Self>,\n        new_timeline_id: TimelineId,\n        initdb_lsn: Lsn,\n        pg_version: PgMajorVersion,\n        ctx: &RequestContext,\n    ) -> anyhow::Result<(UninitializedTimeline, RequestContext)> {\n        anyhow::ensure!(\n            self.is_active(),\n            \"Cannot create empty timelines on inactive tenant\"\n        );\n\n        // Protect against concurrent attempts to use this TimelineId\n        let create_guard = match self\n            .start_creating_timeline(new_timeline_id, CreateTimelineIdempotency::FailWithConflict)\n            .await?\n        {\n            StartCreatingTimelineResult::CreateGuard(guard) => guard,\n            StartCreatingTimelineResult::Idempotent(_) => {\n                unreachable!(\"FailWithConflict implies we get an error instead\")\n            }\n        };\n\n        let new_metadata = TimelineMetadata::new(\n            // Initialize disk_consistent LSN to 0, The caller must import some data to\n            // make it valid, before calling finish_creation()\n            Lsn(0),\n            None,\n            None,\n            Lsn(0),\n            initdb_lsn,\n            initdb_lsn,\n            pg_version,\n        );\n        self.prepare_new_timeline(\n            new_timeline_id,\n            &new_metadata,\n            create_guard,\n            initdb_lsn,\n            None,\n            None,\n            None,\n            ctx,\n        )\n        .await\n    }\n\n    /// Helper for unit tests to create an empty timeline.\n    ///\n    /// The timeline is has state value `Active` but its background loops are not running.\n    // This makes the various functions which anyhow::ensure! for Active state work in tests.\n    // Our current tests don't need the background loops.\n    #[cfg(test)]\n    pub async fn create_test_timeline(\n        self: &Arc<Self>,\n        new_timeline_id: TimelineId,\n        initdb_lsn: Lsn,\n        pg_version: PgMajorVersion,\n        ctx: &RequestContext,\n    ) -> anyhow::Result<Arc<Timeline>> {\n        let (uninit_tl, ctx) = self\n            .create_empty_timeline(new_timeline_id, initdb_lsn, pg_version, ctx)\n            .await?;\n        let tline = uninit_tl.raw_timeline().expect(\"we just created it\");\n        assert_eq!(tline.get_last_record_lsn(), Lsn(0));\n\n        // Setup minimum keys required for the timeline to be usable.\n        let mut modification = tline.begin_modification(initdb_lsn);\n        modification\n            .init_empty_test_timeline()\n            .context(\"init_empty_test_timeline\")?;\n        modification\n            .commit(&ctx)\n            .await\n            .context(\"commit init_empty_test_timeline modification\")?;\n\n        // Flush to disk so that uninit_tl's check for valid disk_consistent_lsn passes.\n        tline.maybe_spawn_flush_loop();\n        tline.freeze_and_flush().await.context(\"freeze_and_flush\")?;\n\n        // Make sure the freeze_and_flush reaches remote storage.\n        tline.remote_client.wait_completion().await.unwrap();\n\n        let tl = uninit_tl.finish_creation().await?;\n        // The non-test code would call tl.activate() here.\n        tl.set_state(TimelineState::Active);\n        Ok(tl)\n    }\n\n    /// Helper for unit tests to create a timeline with some pre-loaded states.\n    #[cfg(test)]\n    #[allow(clippy::too_many_arguments)]\n    pub async fn create_test_timeline_with_layers(\n        self: &Arc<Self>,\n        new_timeline_id: TimelineId,\n        initdb_lsn: Lsn,\n        pg_version: PgMajorVersion,\n        ctx: &RequestContext,\n        in_memory_layer_desc: Vec<timeline::InMemoryLayerTestDesc>,\n        delta_layer_desc: Vec<timeline::DeltaLayerTestDesc>,\n        image_layer_desc: Vec<(Lsn, Vec<(pageserver_api::key::Key, bytes::Bytes)>)>,\n        end_lsn: Lsn,\n    ) -> anyhow::Result<Arc<Timeline>> {\n        use checks::check_valid_layermap;\n        use itertools::Itertools;\n\n        let tline = self\n            .create_test_timeline(new_timeline_id, initdb_lsn, pg_version, ctx)\n            .await?;\n        tline.force_advance_lsn(end_lsn);\n        for deltas in delta_layer_desc {\n            tline\n                .force_create_delta_layer(deltas, Some(initdb_lsn), ctx)\n                .await?;\n        }\n        for (lsn, images) in image_layer_desc {\n            tline\n                .force_create_image_layer(lsn, images, Some(initdb_lsn), ctx)\n                .await?;\n        }\n        for in_memory in in_memory_layer_desc {\n            tline\n                .force_create_in_memory_layer(in_memory, Some(initdb_lsn), ctx)\n                .await?;\n        }\n        let layer_names = tline\n            .layers\n            .read(LayerManagerLockHolder::Testing)\n            .await\n            .layer_map()\n            .unwrap()\n            .iter_historic_layers()\n            .map(|layer| layer.layer_name())\n            .collect_vec();\n        if let Some(err) = check_valid_layermap(&layer_names) {\n            bail!(\"invalid layermap: {err}\");\n        }\n        Ok(tline)\n    }\n\n    /// Create a new timeline.\n    ///\n    /// Returns the new timeline ID and reference to its Timeline object.\n    ///\n    /// If the caller specified the timeline ID to use (`new_timeline_id`), and timeline with\n    /// the same timeline ID already exists, returns CreateTimelineError::AlreadyExists.\n    #[allow(clippy::too_many_arguments)]\n    pub(crate) async fn create_timeline(\n        self: &Arc<TenantShard>,\n        params: CreateTimelineParams,\n        broker_client: storage_broker::BrokerClientChannel,\n        ctx: &RequestContext,\n    ) -> Result<Arc<Timeline>, CreateTimelineError> {\n        if !self.is_active() {\n            if matches!(self.current_state(), TenantState::Stopping { .. }) {\n                return Err(CreateTimelineError::ShuttingDown);\n            } else {\n                return Err(CreateTimelineError::Other(anyhow::anyhow!(\n                    \"Cannot create timelines on inactive tenant\"\n                )));\n            }\n        }\n\n        let _gate = self\n            .gate\n            .enter()\n            .map_err(|_| CreateTimelineError::ShuttingDown)?;\n\n        let result: CreateTimelineResult = match params {\n            CreateTimelineParams::Bootstrap(CreateTimelineParamsBootstrap {\n                new_timeline_id,\n                existing_initdb_timeline_id,\n                pg_version,\n            }) => {\n                self.bootstrap_timeline(\n                    new_timeline_id,\n                    pg_version,\n                    existing_initdb_timeline_id,\n                    ctx,\n                )\n                .await?\n            }\n            CreateTimelineParams::Branch(CreateTimelineParamsBranch {\n                new_timeline_id,\n                ancestor_timeline_id,\n                mut ancestor_start_lsn,\n            }) => {\n                let ancestor_timeline = self\n                    .get_timeline(ancestor_timeline_id, false)\n                    .context(\"Cannot branch off the timeline that's not present in pageserver\")?;\n\n                // instead of waiting around, just deny the request because ancestor is not yet\n                // ready for other purposes either.\n                if !ancestor_timeline.is_active() {\n                    return Err(CreateTimelineError::AncestorNotActive);\n                }\n\n                if ancestor_timeline.is_archived() == Some(true) {\n                    info!(\"tried to branch archived timeline\");\n                    return Err(CreateTimelineError::AncestorArchived);\n                }\n\n                if let Some(lsn) = ancestor_start_lsn.as_mut() {\n                    *lsn = lsn.align();\n\n                    let ancestor_ancestor_lsn = ancestor_timeline.get_ancestor_lsn();\n                    if ancestor_ancestor_lsn > *lsn {\n                        // can we safely just branch from the ancestor instead?\n                        return Err(CreateTimelineError::AncestorLsn(anyhow::anyhow!(\n                            \"invalid start lsn {} for ancestor timeline {}: less than timeline ancestor lsn {}\",\n                            lsn,\n                            ancestor_timeline_id,\n                            ancestor_ancestor_lsn,\n                        )));\n                    }\n\n                    // Wait for the WAL to arrive and be processed on the parent branch up\n                    // to the requested branch point. The repository code itself doesn't\n                    // require it, but if we start to receive WAL on the new timeline,\n                    // decoding the new WAL might need to look up previous pages, relation\n                    // sizes etc. and that would get confused if the previous page versions\n                    // are not in the repository yet.\n                    ancestor_timeline\n                        .wait_lsn(\n                            *lsn,\n                            timeline::WaitLsnWaiter::Tenant,\n                            timeline::WaitLsnTimeout::Default,\n                            ctx,\n                        )\n                        .await\n                        .map_err(|e| match e {\n                            e @ (WaitLsnError::Timeout(_) | WaitLsnError::BadState { .. }) => {\n                                CreateTimelineError::AncestorLsn(anyhow::anyhow!(e))\n                            }\n                            WaitLsnError::Shutdown => CreateTimelineError::ShuttingDown,\n                        })?;\n                }\n\n                self.branch_timeline(&ancestor_timeline, new_timeline_id, ancestor_start_lsn, ctx)\n                    .await?\n            }\n            CreateTimelineParams::ImportPgdata(params) => {\n                self.create_timeline_import_pgdata(params, ctx).await?\n            }\n        };\n\n        // At this point we have dropped our guard on [`Self::timelines_creating`], and\n        // the timeline is visible in [`Self::timelines`], but it is _not_ durable yet.  We must\n        // not send a success to the caller until it is.  The same applies to idempotent retries.\n        //\n        // TODO: the timeline is already visible in [`Self::timelines`]; a caller could incorrectly\n        // assume that, because they can see the timeline via API, that the creation is done and\n        // that it is durable. Ideally, we would keep the timeline hidden (in [`Self::timelines_creating`])\n        // until it is durable, e.g., by extending the time we hold the creation guard. This also\n        // interacts with UninitializedTimeline and is generally a bit tricky.\n        //\n        // To re-emphasize: the only correct way to create a timeline is to repeat calling the\n        // creation API until it returns success. Only then is durability guaranteed.\n        info!(creation_result=%result.discriminant(), \"waiting for timeline to be durable\");\n        result\n            .timeline()\n            .remote_client\n            .wait_completion()\n            .await\n            .map_err(|e| match e {\n                WaitCompletionError::NotInitialized(\n                    e, // If the queue is already stopped, it's a shutdown error.\n                ) if e.is_stopping() => CreateTimelineError::ShuttingDown,\n                WaitCompletionError::NotInitialized(_) => {\n                    // This is a bug: we should never try to wait for uploads before initializing the timeline\n                    debug_assert!(false);\n                    CreateTimelineError::Other(anyhow::anyhow!(\"timeline not initialized\"))\n                }\n                WaitCompletionError::UploadQueueShutDownOrStopped => {\n                    CreateTimelineError::ShuttingDown\n                }\n            })?;\n\n        // The creating task is responsible for activating the timeline.\n        // We do this after `wait_completion()` so that we don't spin up tasks that start\n        // doing stuff before the IndexPart is durable in S3, which is done by the previous section.\n        let activated_timeline = match result {\n            CreateTimelineResult::Created(timeline) => {\n                timeline.activate(\n                    self.clone(),\n                    broker_client,\n                    None,\n                    &ctx.with_scope_timeline(&timeline),\n                );\n                timeline\n            }\n            CreateTimelineResult::Idempotent(timeline) => {\n                info!(\n                    \"request was deemed idempotent, activation will be done by the creating task\"\n                );\n                timeline\n            }\n            CreateTimelineResult::ImportSpawned(timeline) => {\n                info!(\n                    \"import task spawned, timeline will become visible and activated once the import is done\"\n                );\n                timeline\n            }\n        };\n\n        Ok(activated_timeline)\n    }\n\n    /// The returned [`Arc<Timeline>`] is NOT in the [`TenantShard::timelines`] map until the import\n    /// completes in the background. A DIFFERENT [`Arc<Timeline>`] will be inserted into the\n    /// [`TenantShard::timelines`] map when the import completes.\n    /// We only return an [`Arc<Timeline>`] here so the API handler can create a [`pageserver_api::models::TimelineInfo`]\n    /// for the response.\n    async fn create_timeline_import_pgdata(\n        self: &Arc<Self>,\n        params: CreateTimelineParamsImportPgdata,\n        ctx: &RequestContext,\n    ) -> Result<CreateTimelineResult, CreateTimelineError> {\n        let CreateTimelineParamsImportPgdata {\n            new_timeline_id,\n            location,\n            idempotency_key,\n        } = params;\n\n        let started_at = chrono::Utc::now().naive_utc();\n\n        //\n        // There's probably a simpler way to upload an index part, but, remote_timeline_client\n        // is the canonical way we do it.\n        // - create an empty timeline in-memory\n        // - use its remote_timeline_client to do the upload\n        // - dispose of the uninit timeline\n        // - keep the creation guard alive\n\n        let timeline_create_guard = match self\n            .start_creating_timeline(\n                new_timeline_id,\n                CreateTimelineIdempotency::ImportPgdata(CreatingTimelineIdempotencyImportPgdata {\n                    idempotency_key: idempotency_key.clone(),\n                }),\n            )\n            .await?\n        {\n            StartCreatingTimelineResult::CreateGuard(guard) => guard,\n            StartCreatingTimelineResult::Idempotent(timeline) => {\n                return Ok(CreateTimelineResult::Idempotent(timeline));\n            }\n        };\n\n        let (mut uninit_timeline, timeline_ctx) = {\n            let this = &self;\n            let initdb_lsn = Lsn(0);\n            async move {\n                let new_metadata = TimelineMetadata::new(\n                    // Initialize disk_consistent LSN to 0, The caller must import some data to\n                    // make it valid, before calling finish_creation()\n                    Lsn(0),\n                    None,\n                    None,\n                    Lsn(0),\n                    initdb_lsn,\n                    initdb_lsn,\n                    PgMajorVersion::PG15,\n                );\n                this.prepare_new_timeline(\n                    new_timeline_id,\n                    &new_metadata,\n                    timeline_create_guard,\n                    initdb_lsn,\n                    None,\n                    None,\n                    None,\n                    ctx,\n                )\n                .await\n            }\n        }\n        .await?;\n\n        let in_progress = import_pgdata::index_part_format::InProgress {\n            idempotency_key,\n            location,\n            started_at,\n        };\n        let index_part = import_pgdata::index_part_format::Root::V1(\n            import_pgdata::index_part_format::V1::InProgress(in_progress),\n        );\n        uninit_timeline\n            .raw_timeline()\n            .unwrap()\n            .remote_client\n            .schedule_index_upload_for_import_pgdata_state_update(Some(index_part.clone()))?;\n\n        // wait_completion happens in caller\n\n        let (timeline, timeline_create_guard) = uninit_timeline.finish_creation_myself();\n\n        let import_task_gate = Gate::default();\n        let import_task_guard = import_task_gate.enter().unwrap();\n\n        let import_task_handle = tokio::spawn(self.clone().create_timeline_import_pgdata_task(\n            timeline.clone(),\n            index_part,\n            timeline_create_guard,\n            import_task_guard,\n            timeline_ctx.detached_child(TaskKind::ImportPgdata, DownloadBehavior::Warn),\n        ));\n\n        let prev = self.timelines_importing.lock().unwrap().insert(\n            timeline.timeline_id,\n            Arc::new(ImportingTimeline {\n                timeline: timeline.clone(),\n                import_task_handle,\n                import_task_gate,\n                delete_progress: TimelineDeleteProgress::default(),\n            }),\n        );\n\n        // Idempotency is enforced higher up the stack\n        assert!(prev.is_none());\n\n        // NB: the timeline doesn't exist in self.timelines at this point\n        Ok(CreateTimelineResult::ImportSpawned(timeline))\n    }\n\n    /// Finalize the import of a timeline on this shard by marking it complete in\n    /// the index part. If the import task hasn't finished yet, returns an error.\n    ///\n    /// This method is idempotent. If the import was finalized once, the next call\n    /// will be a no-op.\n    pub(crate) async fn finalize_importing_timeline(\n        &self,\n        timeline_id: TimelineId,\n    ) -> Result<(), FinalizeTimelineImportError> {\n        let timeline = {\n            let locked = self.timelines_importing.lock().unwrap();\n            match locked.get(&timeline_id) {\n                Some(importing_timeline) => {\n                    if !importing_timeline.import_task_handle.is_finished() {\n                        return Err(FinalizeTimelineImportError::ImportTaskStillRunning);\n                    }\n\n                    importing_timeline.timeline.clone()\n                }\n                None => {\n                    return Ok(());\n                }\n            }\n        };\n\n        timeline\n            .remote_client\n            .schedule_index_upload_for_import_pgdata_finalize()\n            .map_err(|_err| FinalizeTimelineImportError::ShuttingDown)?;\n        timeline\n            .remote_client\n            .wait_completion()\n            .await\n            .map_err(|_err| FinalizeTimelineImportError::ShuttingDown)?;\n\n        self.timelines_importing\n            .lock()\n            .unwrap()\n            .remove(&timeline_id);\n\n        Ok(())\n    }\n\n    #[instrument(skip_all, fields(tenant_id=%self.tenant_shard_id.tenant_id, shard_id=%self.tenant_shard_id.shard_slug(), timeline_id=%timeline.timeline_id))]\n    async fn create_timeline_import_pgdata_task(\n        self: Arc<TenantShard>,\n        timeline: Arc<Timeline>,\n        index_part: import_pgdata::index_part_format::Root,\n        timeline_create_guard: TimelineCreateGuard,\n        _import_task_guard: GateGuard,\n        ctx: RequestContext,\n    ) {\n        debug_assert_current_span_has_tenant_and_timeline_id();\n        info!(\"starting\");\n        scopeguard::defer! {info!(\"exiting\")};\n\n        let res = self\n            .create_timeline_import_pgdata_task_impl(\n                timeline,\n                index_part,\n                timeline_create_guard,\n                ctx,\n            )\n            .await;\n        if let Err(err) = &res {\n            error!(?err, \"task failed\");\n            // TODO sleep & retry, sensitive to tenant shutdown\n            // TODO: allow timeline deletion requests => should cancel the task\n        }\n    }\n\n    async fn create_timeline_import_pgdata_task_impl(\n        self: Arc<TenantShard>,\n        timeline: Arc<Timeline>,\n        index_part: import_pgdata::index_part_format::Root,\n        _timeline_create_guard: TimelineCreateGuard,\n        ctx: RequestContext,\n    ) -> Result<(), anyhow::Error> {\n        info!(\"importing pgdata\");\n        let ctx = ctx.with_scope_timeline(&timeline);\n        import_pgdata::doit(&timeline, index_part, &ctx, self.cancel.clone())\n            .await\n            .context(\"import\")?;\n        info!(\"import done - waiting for activation\");\n\n        anyhow::Ok(())\n    }\n\n    pub(crate) async fn delete_timeline(\n        self: Arc<Self>,\n        timeline_id: TimelineId,\n    ) -> Result<(), DeleteTimelineError> {\n        DeleteTimelineFlow::run(&self, timeline_id).await?;\n\n        Ok(())\n    }\n\n    /// perform one garbage collection iteration, removing old data files from disk.\n    /// this function is periodically called by gc task.\n    /// also it can be explicitly requested through page server api 'do_gc' command.\n    ///\n    /// `target_timeline_id` specifies the timeline to GC, or None for all.\n    ///\n    /// The `horizon` an `pitr` parameters determine how much WAL history needs to be retained.\n    /// Also known as the retention period, or the GC cutoff point. `horizon` specifies\n    /// the amount of history, as LSN difference from current latest LSN on each timeline.\n    /// `pitr` specifies the same as a time difference from the current time. The effective\n    /// GC cutoff point is determined conservatively by either `horizon` and `pitr`, whichever\n    /// requires more history to be retained.\n    //\n    pub(crate) async fn gc_iteration(\n        &self,\n        target_timeline_id: Option<TimelineId>,\n        horizon: u64,\n        pitr: Duration,\n        cancel: &CancellationToken,\n        ctx: &RequestContext,\n    ) -> Result<GcResult, GcError> {\n        // Don't start doing work during shutdown\n        if let TenantState::Stopping { .. } = self.current_state() {\n            return Ok(GcResult::default());\n        }\n\n        // there is a global allowed_error for this\n        if !self.is_active() {\n            return Err(GcError::NotActive);\n        }\n\n        {\n            let conf = self.tenant_conf.load();\n\n            // If we may not delete layers, then simply skip GC.  Even though a tenant\n            // in AttachedMulti state could do GC and just enqueue the blocked deletions,\n            // the only advantage to doing it is to perhaps shrink the LayerMap metadata\n            // a bit sooner than we would achieve by waiting for AttachedSingle status.\n            if !conf.location.may_delete_layers_hint() {\n                info!(\"Skipping GC in location state {:?}\", conf.location);\n                return Ok(GcResult::default());\n            }\n\n            if conf.is_gc_blocked_by_lsn_lease_deadline() {\n                info!(\"Skipping GC because lsn lease deadline is not reached\");\n                return Ok(GcResult::default());\n            }\n        }\n\n        let _guard = match self.gc_block.start().await {\n            Ok(guard) => guard,\n            Err(reasons) => {\n                info!(\"Skipping GC: {reasons}\");\n                return Ok(GcResult::default());\n            }\n        };\n\n        self.gc_iteration_internal(target_timeline_id, horizon, pitr, cancel, ctx)\n            .await\n    }\n\n    /// Performs one compaction iteration. Called periodically from the compaction loop. Returns\n    /// whether another compaction is needed, if we still have pending work or if we yield for\n    /// immediate L0 compaction.\n    ///\n    /// Compaction can also be explicitly requested for a timeline via the HTTP API.\n    async fn compaction_iteration(\n        self: &Arc<Self>,\n        cancel: &CancellationToken,\n        ctx: &RequestContext,\n    ) -> Result<CompactionOutcome, CompactionError> {\n        // Don't compact inactive tenants.\n        if !self.is_active() {\n            return Ok(CompactionOutcome::Skipped);\n        }\n\n        // Don't compact tenants that can't upload layers. We don't check `may_delete_layers_hint`,\n        // since we need to compact L0 even in AttachedMulti to bound read amplification.\n        let location = self.tenant_conf.load().location;\n        if !location.may_upload_layers_hint() {\n            info!(\"skipping compaction in location state {location:?}\");\n            return Ok(CompactionOutcome::Skipped);\n        }\n\n        // Don't compact if the circuit breaker is tripped.\n        if self.compaction_circuit_breaker.lock().unwrap().is_broken() {\n            info!(\"skipping compaction due to previous failures\");\n            return Ok(CompactionOutcome::Skipped);\n        }\n\n        // Collect all timelines to compact, along with offload instructions and L0 counts.\n        let mut compact: Vec<Arc<Timeline>> = Vec::new();\n        let mut offload: HashSet<TimelineId> = HashSet::new();\n        let mut l0_counts: HashMap<TimelineId, usize> = HashMap::new();\n\n        {\n            let offload_enabled = self.get_timeline_offloading_enabled();\n            let timelines = self.timelines.lock().unwrap();\n            for (&timeline_id, timeline) in timelines.iter() {\n                // Skip inactive timelines.\n                if !timeline.is_active() {\n                    continue;\n                }\n\n                // Schedule the timeline for compaction.\n                compact.push(timeline.clone());\n\n                // Schedule the timeline for offloading if eligible.\n                let can_offload = offload_enabled\n                    && timeline.can_offload().0\n                    && !timelines\n                        .iter()\n                        .any(|(_, tli)| tli.get_ancestor_timeline_id() == Some(timeline_id));\n                if can_offload {\n                    offload.insert(timeline_id);\n                }\n            }\n        } // release timelines lock\n\n        for timeline in &compact {\n            // Collect L0 counts. Can't await while holding lock above.\n            if let Ok(lm) = timeline\n                .layers\n                .read(LayerManagerLockHolder::Compaction)\n                .await\n                .layer_map()\n            {\n                l0_counts.insert(timeline.timeline_id, lm.level0_deltas().len());\n            }\n        }\n\n        // Pass 1: L0 compaction across all timelines, in order of L0 count. We prioritize this to\n        // bound read amplification.\n        //\n        // TODO: this may spin on one or more ingest-heavy timelines, starving out image/GC\n        // compaction and offloading. We leave that as a potential problem to solve later. Consider\n        // splitting L0 and image/GC compaction to separate background jobs.\n        if self.get_compaction_l0_first() {\n            let compaction_threshold = self.get_compaction_threshold();\n            let compact_l0 = compact\n                .iter()\n                .map(|tli| (tli, l0_counts.get(&tli.timeline_id).copied().unwrap_or(0)))\n                .filter(|&(_, l0)| l0 >= compaction_threshold)\n                .sorted_by_key(|&(_, l0)| l0)\n                .rev()\n                .map(|(tli, _)| tli.clone())\n                .collect_vec();\n\n            let mut has_pending_l0 = false;\n            for timeline in compact_l0 {\n                let ctx = &ctx.with_scope_timeline(&timeline);\n                // NB: don't set CompactFlags::YieldForL0, since this is an L0-only compaction pass.\n                let outcome = timeline\n                    .compact(cancel, CompactFlags::OnlyL0Compaction.into(), ctx)\n                    .instrument(info_span!(\"compact_timeline\", timeline_id = %timeline.timeline_id))\n                    .await\n                    .inspect_err(|err| self.maybe_trip_compaction_breaker(err))?;\n                match outcome {\n                    CompactionOutcome::Done => {}\n                    CompactionOutcome::Skipped => {}\n                    CompactionOutcome::Pending => has_pending_l0 = true,\n                    CompactionOutcome::YieldForL0 => has_pending_l0 = true,\n                }\n            }\n            if has_pending_l0 {\n                return Ok(CompactionOutcome::YieldForL0); // do another pass\n            }\n        }\n\n        // Pass 2: image compaction and timeline offloading. If any timelines have accumulated more\n        // L0 layers, they may also be compacted here. Image compaction will yield if there is\n        // pending L0 compaction on any tenant timeline.\n        //\n        // TODO: consider ordering timelines by some priority, e.g. time since last full compaction,\n        // amount of L1 delta debt or garbage, offload-eligible timelines first, etc.\n        let mut has_pending = false;\n        for timeline in compact {\n            if !timeline.is_active() {\n                continue;\n            }\n            let ctx = &ctx.with_scope_timeline(&timeline);\n\n            // Yield for L0 if the separate L0 pass is enabled (otherwise there's no point).\n            let mut flags = EnumSet::default();\n            if self.get_compaction_l0_first() {\n                flags |= CompactFlags::YieldForL0;\n            }\n\n            let mut outcome = timeline\n                .compact(cancel, flags, ctx)\n                .instrument(info_span!(\"compact_timeline\", timeline_id = %timeline.timeline_id))\n                .await\n                .inspect_err(|err| self.maybe_trip_compaction_breaker(err))?;\n\n            // If we're done compacting, check the scheduled GC compaction queue for more work.\n            if outcome == CompactionOutcome::Done {\n                let queue = {\n                    let mut guard = self.scheduled_compaction_tasks.lock().unwrap();\n                    guard\n                        .entry(timeline.timeline_id)\n                        .or_insert_with(|| Arc::new(GcCompactionQueue::new()))\n                        .clone()\n                };\n                let gc_compaction_strategy = self\n                    .feature_resolver\n                    .evaluate_multivariate(\"gc-comapction-strategy\")\n                    .ok();\n                let span = if let Some(gc_compaction_strategy) = gc_compaction_strategy {\n                    info_span!(\"gc_compact_timeline\", timeline_id = %timeline.timeline_id, strategy = %gc_compaction_strategy)\n                } else {\n                    info_span!(\"gc_compact_timeline\", timeline_id = %timeline.timeline_id)\n                };\n                outcome = queue\n                    .iteration(cancel, ctx, &self.gc_block, &timeline)\n                    .instrument(span)\n                    .await?;\n            }\n\n            // If we're done compacting, offload the timeline if requested.\n            if outcome == CompactionOutcome::Done && offload.contains(&timeline.timeline_id) {\n                pausable_failpoint!(\"before-timeline-auto-offload\");\n                offload_timeline(self, &timeline)\n                    .instrument(info_span!(\"offload_timeline\", timeline_id = %timeline.timeline_id))\n                    .await\n                    .or_else(|err| match err {\n                        // Ignore this, we likely raced with unarchival.\n                        OffloadError::NotArchived => Ok(()),\n                        OffloadError::AlreadyInProgress => Ok(()),\n                        OffloadError::Cancelled => Err(CompactionError::new_cancelled()),\n                        // don't break the anyhow chain\n                        OffloadError::Other(err) => Err(CompactionError::Other(err)),\n                    })?;\n            }\n\n            match outcome {\n                CompactionOutcome::Done => {}\n                CompactionOutcome::Skipped => {}\n                CompactionOutcome::Pending => has_pending = true,\n                // This mostly makes sense when the L0-only pass above is enabled, since there's\n                // otherwise no guarantee that we'll start with the timeline that has high L0.\n                CompactionOutcome::YieldForL0 => return Ok(CompactionOutcome::YieldForL0),\n            }\n        }\n\n        // Success! Untrip the breaker if necessary.\n        self.compaction_circuit_breaker\n            .lock()\n            .unwrap()\n            .success(&CIRCUIT_BREAKERS_UNBROKEN);\n\n        match has_pending {\n            true => Ok(CompactionOutcome::Pending),\n            false => Ok(CompactionOutcome::Done),\n        }\n    }\n\n    /// Trips the compaction circuit breaker if appropriate.\n    pub(crate) fn maybe_trip_compaction_breaker(&self, err: &CompactionError) {\n        if err.is_cancel() {\n            return;\n        }\n        self.compaction_circuit_breaker\n            .lock()\n            .unwrap()\n            .fail(&CIRCUIT_BREAKERS_BROKEN, err);\n    }\n\n    /// Cancel scheduled compaction tasks\n    pub(crate) fn cancel_scheduled_compaction(&self, timeline_id: TimelineId) {\n        let mut guard = self.scheduled_compaction_tasks.lock().unwrap();\n        if let Some(q) = guard.get_mut(&timeline_id) {\n            q.cancel_scheduled();\n        }\n    }\n\n    pub(crate) fn get_scheduled_compaction_tasks(\n        &self,\n        timeline_id: TimelineId,\n    ) -> Vec<CompactInfoResponse> {\n        let res = {\n            let guard = self.scheduled_compaction_tasks.lock().unwrap();\n            guard.get(&timeline_id).map(|q| q.remaining_jobs())\n        };\n        let Some((running, remaining)) = res else {\n            return Vec::new();\n        };\n        let mut result = Vec::new();\n        if let Some((id, running)) = running {\n            result.extend(running.into_compact_info_resp(id, true));\n        }\n        for (id, job) in remaining {\n            result.extend(job.into_compact_info_resp(id, false));\n        }\n        result\n    }\n\n    /// Schedule a compaction task for a timeline.\n    pub(crate) async fn schedule_compaction(\n        &self,\n        timeline_id: TimelineId,\n        options: CompactOptions,\n    ) -> anyhow::Result<tokio::sync::oneshot::Receiver<()>> {\n        let (tx, rx) = tokio::sync::oneshot::channel();\n        let mut guard = self.scheduled_compaction_tasks.lock().unwrap();\n        let q = guard\n            .entry(timeline_id)\n            .or_insert_with(|| Arc::new(GcCompactionQueue::new()));\n        q.schedule_manual_compaction(options, Some(tx));\n        Ok(rx)\n    }\n\n    /// Performs periodic housekeeping, via the tenant housekeeping background task.\n    async fn housekeeping(&self) {\n        // Call through to all timelines to freeze ephemeral layers as needed. This usually happens\n        // during ingest, but we don't want idle timelines to hold open layers for too long.\n        //\n        // We don't do this if the tenant can't upload layers (i.e. it's in stale attachment mode).\n        // We don't run compaction in this case either, and don't want to keep flushing tiny L0\n        // layers that won't be compacted down.\n        if self.tenant_conf.load().location.may_upload_layers_hint() {\n            let timelines = self\n                .timelines\n                .lock()\n                .unwrap()\n                .values()\n                .filter(|tli| tli.is_active())\n                .cloned()\n                .collect_vec();\n\n            for timeline in timelines {\n                // Include a span with the timeline ID. The parent span already has the tenant ID.\n                let span =\n                    info_span!(\"maybe_freeze_ephemeral_layer\", timeline_id = %timeline.timeline_id);\n                timeline\n                    .maybe_freeze_ephemeral_layer()\n                    .instrument(span)\n                    .await;\n            }\n        }\n\n        // Shut down walredo if idle.\n        const WALREDO_IDLE_TIMEOUT: Duration = Duration::from_secs(180);\n        if let Some(ref walredo_mgr) = self.walredo_mgr {\n            walredo_mgr.maybe_quiesce(WALREDO_IDLE_TIMEOUT);\n        }\n\n        // Update the feature resolver with the latest tenant-spcific data.\n        self.feature_resolver.refresh_properties_and_flags(self);\n    }\n\n    pub fn timeline_has_no_attached_children(&self, timeline_id: TimelineId) -> bool {\n        let timelines = self.timelines.lock().unwrap();\n        !timelines\n            .iter()\n            .any(|(_id, tl)| tl.get_ancestor_timeline_id() == Some(timeline_id))\n    }\n\n    pub fn current_state(&self) -> TenantState {\n        self.state.borrow().clone()\n    }\n\n    pub fn is_active(&self) -> bool {\n        self.current_state() == TenantState::Active\n    }\n\n    pub fn generation(&self) -> Generation {\n        self.generation\n    }\n\n    pub(crate) fn wal_redo_manager_status(&self) -> Option<WalRedoManagerStatus> {\n        self.walredo_mgr.as_ref().and_then(|mgr| mgr.status())\n    }\n\n    /// Changes tenant status to active, unless shutdown was already requested.\n    ///\n    /// `background_jobs_can_start` is an optional barrier set to a value during pageserver startup\n    /// to delay background jobs. Background jobs can be started right away when None is given.\n    fn activate(\n        self: &Arc<Self>,\n        broker_client: BrokerClientChannel,\n        background_jobs_can_start: Option<&completion::Barrier>,\n        ctx: &RequestContext,\n    ) {\n        span::debug_assert_current_span_has_tenant_id();\n\n        let mut activating = false;\n        self.state.send_modify(|current_state| {\n            use pageserver_api::models::ActivatingFrom;\n            match &*current_state {\n                TenantState::Activating(_) | TenantState::Active | TenantState::Broken { .. } | TenantState::Stopping { .. } => {\n                    panic!(\"caller is responsible for calling activate() only on Loading / Attaching tenants, got {current_state:?}\");\n                }\n                TenantState::Attaching => {\n                    *current_state = TenantState::Activating(ActivatingFrom::Attaching);\n                }\n            }\n            debug!(tenant_id = %self.tenant_shard_id.tenant_id, shard_id = %self.tenant_shard_id.shard_slug(), \"Activating tenant\");\n            activating = true;\n            // Continue outside the closure. We need to grab timelines.lock()\n            // and we plan to turn it into a tokio::sync::Mutex in a future patch.\n        });\n\n        if activating {\n            let timelines_accessor = self.timelines.lock().unwrap();\n            let timelines_offloaded_accessor = self.timelines_offloaded.lock().unwrap();\n            let timelines_to_activate = timelines_accessor\n                .values()\n                .filter(|timeline| !(timeline.is_broken() || timeline.is_stopping()));\n\n            // Spawn gc and compaction loops. The loops will shut themselves\n            // down when they notice that the tenant is inactive.\n            tasks::start_background_loops(self, background_jobs_can_start);\n\n            let mut activated_timelines = 0;\n\n            for timeline in timelines_to_activate {\n                timeline.activate(\n                    self.clone(),\n                    broker_client.clone(),\n                    background_jobs_can_start,\n                    &ctx.with_scope_timeline(timeline),\n                );\n                activated_timelines += 1;\n            }\n\n            let tid = self.tenant_shard_id.tenant_id.to_string();\n            let shard_id = self.tenant_shard_id.shard_slug().to_string();\n            let offloaded_timeline_count = timelines_offloaded_accessor.len();\n            TENANT_OFFLOADED_TIMELINES\n                .with_label_values(&[&tid, &shard_id])\n                .set(offloaded_timeline_count as u64);\n\n            self.state.send_modify(move |current_state| {\n                assert!(\n                    matches!(current_state, TenantState::Activating(_)),\n                    \"set_stopping and set_broken wait for us to leave Activating state\",\n                );\n                *current_state = TenantState::Active;\n\n                let elapsed = self.constructed_at.elapsed();\n                let total_timelines = timelines_accessor.len();\n\n                // log a lot of stuff, because some tenants sometimes suffer from user-visible\n                // times to activate. see https://github.com/neondatabase/neon/issues/4025\n                info!(\n                    since_creation_millis = elapsed.as_millis(),\n                    tenant_id = %self.tenant_shard_id.tenant_id,\n                    shard_id = %self.tenant_shard_id.shard_slug(),\n                    activated_timelines,\n                    total_timelines,\n                    post_state = <&'static str>::from(&*current_state),\n                    \"activation attempt finished\"\n                );\n\n                TENANT.activation.observe(elapsed.as_secs_f64());\n            });\n        }\n    }\n\n    /// Shutdown the tenant and join all of the spawned tasks.\n    ///\n    /// The method caters for all use-cases:\n    /// - pageserver shutdown (freeze_and_flush == true)\n    /// - detach + ignore (freeze_and_flush == false)\n    ///\n    /// This will attempt to shutdown even if tenant is broken.\n    ///\n    /// `shutdown_progress` is a [`completion::Barrier`] for the shutdown initiated by this call.\n    /// If the tenant is already shutting down, we return a clone of the first shutdown call's\n    /// `Barrier` as an `Err`. This not-first caller can use the returned barrier to join with\n    /// the ongoing shutdown.\n    async fn shutdown(\n        &self,\n        shutdown_progress: completion::Barrier,\n        shutdown_mode: timeline::ShutdownMode,\n    ) -> Result<(), completion::Barrier> {\n        span::debug_assert_current_span_has_tenant_id();\n\n        // Set tenant (and its timlines) to Stoppping state.\n        //\n        // Since we can only transition into Stopping state after activation is complete,\n        // run it in a JoinSet so all tenants have a chance to stop before we get SIGKILLed.\n        //\n        // Transitioning tenants to Stopping state has a couple of non-obvious side effects:\n        // 1. Lock out any new requests to the tenants.\n        // 2. Signal cancellation to WAL receivers (we wait on it below).\n        // 3. Signal cancellation for other tenant background loops.\n        // 4. ???\n        //\n        // The waiting for the cancellation is not done uniformly.\n        // We certainly wait for WAL receivers to shut down.\n        // That is necessary so that no new data comes in before the freeze_and_flush.\n        // But the tenant background loops are joined-on in our caller.\n        // It's mesed up.\n        // we just ignore the failure to stop\n\n        // If we're still attaching, fire the cancellation token early to drop out: this\n        // will prevent us flushing, but ensures timely shutdown if some I/O during attach\n        // is very slow.\n        let shutdown_mode = if matches!(self.current_state(), TenantState::Attaching) {\n            self.cancel.cancel();\n\n            // Having fired our cancellation token, do not try and flush timelines: their cancellation tokens\n            // are children of ours, so their flush loops will have shut down already\n            timeline::ShutdownMode::Hard\n        } else {\n            shutdown_mode\n        };\n\n        match self.set_stopping(shutdown_progress).await {\n            Ok(()) => {}\n            Err(SetStoppingError::Broken) => {\n                // assume that this is acceptable\n            }\n            Err(SetStoppingError::AlreadyStopping(other)) => {\n                // give caller the option to wait for this this shutdown\n                info!(\"Tenant::shutdown: AlreadyStopping\");\n                return Err(other);\n            }\n        };\n\n        let mut js = tokio::task::JoinSet::new();\n        {\n            let timelines = self.timelines.lock().unwrap();\n            timelines.values().for_each(|timeline| {\n                let timeline = Arc::clone(timeline);\n                let timeline_id = timeline.timeline_id;\n                let span = tracing::info_span!(\"timeline_shutdown\", %timeline_id, ?shutdown_mode);\n                js.spawn(async move { timeline.shutdown(shutdown_mode).instrument(span).await });\n            });\n        }\n        {\n            let timelines_offloaded = self.timelines_offloaded.lock().unwrap();\n            timelines_offloaded.values().for_each(|timeline| {\n                timeline.defuse_for_tenant_drop();\n            });\n        }\n        {\n            let mut timelines_importing = self.timelines_importing.lock().unwrap();\n            timelines_importing\n                .drain()\n                .for_each(|(timeline_id, importing_timeline)| {\n                    let span = tracing::info_span!(\"importing_timeline_shutdown\", %timeline_id);\n                    js.spawn(async move { importing_timeline.shutdown().instrument(span).await });\n                });\n        }\n        // test_long_timeline_create_then_tenant_delete is leaning on this message\n        tracing::info!(\"Waiting for timelines...\");\n        while let Some(res) = js.join_next().await {\n            match res {\n                Ok(()) => {}\n                Err(je) if je.is_cancelled() => unreachable!(\"no cancelling used\"),\n                Err(je) if je.is_panic() => { /* logged already */ }\n                Err(je) => warn!(\"unexpected JoinError: {je:?}\"),\n            }\n        }\n\n        if let ShutdownMode::Reload = shutdown_mode {\n            tracing::info!(\"Flushing deletion queue\");\n            if let Err(e) = self.deletion_queue_client.flush().await {\n                match e {\n                    DeletionQueueError::ShuttingDown => {\n                        // This is the only error we expect for now. In the future, if more error\n                        // variants are added, we should handle them here.\n                    }\n                }\n            }\n        }\n\n        // We cancel the Tenant's cancellation token _after_ the timelines have all shut down.  This permits\n        // them to continue to do work during their shutdown methods, e.g. flushing data.\n        tracing::debug!(\"Cancelling CancellationToken\");\n        self.cancel.cancel();\n\n        // shutdown all tenant and timeline tasks: gc, compaction, page service\n        // No new tasks will be started for this tenant because it's in `Stopping` state.\n        //\n        // this will additionally shutdown and await all timeline tasks.\n        tracing::debug!(\"Waiting for tasks...\");\n        task_mgr::shutdown_tasks(None, Some(self.tenant_shard_id), None).await;\n\n        if let Some(walredo_mgr) = self.walredo_mgr.as_ref() {\n            walredo_mgr.shutdown().await;\n        }\n\n        // Wait for any in-flight operations to complete\n        self.gate.close().await;\n\n        remove_tenant_metrics(&self.tenant_shard_id);\n\n        Ok(())\n    }\n\n    /// Change tenant status to Stopping, to mark that it is being shut down.\n    ///\n    /// This function waits for the tenant to become active if it isn't already, before transitioning it into Stopping state.\n    ///\n    /// This function is not cancel-safe!\n    async fn set_stopping(&self, progress: completion::Barrier) -> Result<(), SetStoppingError> {\n        let mut rx = self.state.subscribe();\n\n        // cannot stop before we're done activating, so wait out until we're done activating\n        rx.wait_for(|state| match state {\n            TenantState::Activating(_) | TenantState::Attaching => {\n                info!(\"waiting for {state} to turn Active|Broken|Stopping\");\n                false\n            }\n            TenantState::Active | TenantState::Broken { .. } | TenantState::Stopping { .. } => true,\n        })\n        .await\n        .expect(\"cannot drop self.state while on a &self method\");\n\n        // we now know we're done activating, let's see whether this task is the winner to transition into Stopping\n        let mut err = None;\n        let stopping = self.state.send_if_modified(|current_state| match current_state {\n            TenantState::Activating(_) | TenantState::Attaching => {\n                unreachable!(\"we ensured above that we're done with activation, and, there is no re-activation\")\n            }\n            TenantState::Active => {\n                // FIXME: due to time-of-check vs time-of-use issues, it can happen that new timelines\n                // are created after the transition to Stopping. That's harmless, as the Timelines\n                // won't be accessible to anyone afterwards, because the Tenant is in Stopping state.\n                *current_state = TenantState::Stopping { progress: Some(progress) };\n                // Continue stopping outside the closure. We need to grab timelines.lock()\n                // and we plan to turn it into a tokio::sync::Mutex in a future patch.\n                true\n            }\n            TenantState::Stopping { progress: None } => {\n                // An attach was cancelled, and the attach transitioned the tenant from Attaching to\n                // Stopping(None) to let us know it exited. Register our progress and continue.\n                *current_state = TenantState::Stopping { progress: Some(progress) };\n                true\n            }\n            TenantState::Broken { reason, .. } => {\n                info!(\n                    \"Cannot set tenant to Stopping state, it is in Broken state due to: {reason}\"\n                );\n                err = Some(SetStoppingError::Broken);\n                false\n            }\n            TenantState::Stopping { progress: Some(progress) } => {\n                info!(\"Tenant is already in Stopping state\");\n                err = Some(SetStoppingError::AlreadyStopping(progress.clone()));\n                false\n            }\n        });\n        match (stopping, err) {\n            (true, None) => {} // continue\n            (false, Some(err)) => return Err(err),\n            (true, Some(_)) => unreachable!(\n                \"send_if_modified closure must error out if not transitioning to Stopping\"\n            ),\n            (false, None) => unreachable!(\n                \"send_if_modified closure must return true if transitioning to Stopping\"\n            ),\n        }\n\n        let timelines_accessor = self.timelines.lock().unwrap();\n        let not_broken_timelines = timelines_accessor\n            .values()\n            .filter(|timeline| !timeline.is_broken());\n        for timeline in not_broken_timelines {\n            timeline.set_state(TimelineState::Stopping);\n        }\n        Ok(())\n    }\n\n    /// Method for tenant::mgr to transition us into Broken state in case of a late failure in\n    /// `remove_tenant_from_memory`\n    ///\n    /// This function waits for the tenant to become active if it isn't already, before transitioning it into Stopping state.\n    ///\n    /// In tests, we also use this to set tenants to Broken state on purpose.\n    pub(crate) async fn set_broken(&self, reason: String) {\n        let mut rx = self.state.subscribe();\n\n        // The load & attach routines own the tenant state until it has reached `Active`.\n        // So, wait until it's done.\n        rx.wait_for(|state| match state {\n            TenantState::Activating(_) | TenantState::Attaching => {\n                info!(\n                    \"waiting for {} to turn Active|Broken|Stopping\",\n                    <&'static str>::from(state)\n                );\n                false\n            }\n            TenantState::Active | TenantState::Broken { .. } | TenantState::Stopping { .. } => true,\n        })\n        .await\n        .expect(\"cannot drop self.state while on a &self method\");\n\n        // we now know we're done activating, let's see whether this task is the winner to transition into Broken\n        self.set_broken_no_wait(reason)\n    }\n\n    pub(crate) fn set_broken_no_wait(&self, reason: impl Display) {\n        let reason = reason.to_string();\n        self.state.send_modify(|current_state| {\n            match *current_state {\n                TenantState::Activating(_) | TenantState::Attaching => {\n                    unreachable!(\"we ensured above that we're done with activation, and, there is no re-activation\")\n                }\n                TenantState::Active => {\n                    if cfg!(feature = \"testing\") {\n                        warn!(\"Changing Active tenant to Broken state, reason: {}\", reason);\n                        *current_state = TenantState::broken_from_reason(reason);\n                    } else {\n                        unreachable!(\"not allowed to call set_broken on Active tenants in non-testing builds\")\n                    }\n                }\n                TenantState::Broken { .. } => {\n                    warn!(\"Tenant is already in Broken state\");\n                }\n                // This is the only \"expected\" path, any other path is a bug.\n                TenantState::Stopping { .. } => {\n                    warn!(\n                        \"Marking Stopping tenant as Broken state, reason: {}\",\n                        reason\n                    );\n                    *current_state = TenantState::broken_from_reason(reason);\n                }\n           }\n        });\n    }\n\n    pub fn subscribe_for_state_updates(&self) -> watch::Receiver<TenantState> {\n        self.state.subscribe()\n    }\n\n    /// The activate_now semaphore is initialized with zero units.  As soon as\n    /// we add a unit, waiters will be able to acquire a unit and proceed.\n    pub(crate) fn activate_now(&self) {\n        self.activate_now_sem.add_permits(1);\n    }\n\n    pub(crate) async fn wait_to_become_active(\n        &self,\n        timeout: Duration,\n    ) -> Result<(), GetActiveTenantError> {\n        let mut receiver = self.state.subscribe();\n        loop {\n            let current_state = receiver.borrow_and_update().clone();\n            match current_state {\n                TenantState::Attaching | TenantState::Activating(_) => {\n                    // in these states, there's a chance that we can reach ::Active\n                    self.activate_now();\n                    match timeout_cancellable(timeout, &self.cancel, receiver.changed()).await {\n                        Ok(r) => {\n                            r.map_err(\n                            |_e: tokio::sync::watch::error::RecvError|\n                                // Tenant existed but was dropped: report it as non-existent\n                                GetActiveTenantError::NotFound(GetTenantError::ShardNotFound(self.tenant_shard_id))\n                        )?\n                        }\n                        Err(TimeoutCancellableError::Cancelled) => {\n                            return Err(GetActiveTenantError::Cancelled);\n                        }\n                        Err(TimeoutCancellableError::Timeout) => {\n                            return Err(GetActiveTenantError::WaitForActiveTimeout {\n                                latest_state: Some(self.current_state()),\n                                wait_time: timeout,\n                            });\n                        }\n                    }\n                }\n                TenantState::Active => {\n                    return Ok(());\n                }\n                TenantState::Broken { reason, .. } => {\n                    // This is fatal, and reported distinctly from the general case of \"will never be active\" because\n                    // it's logically a 500 to external API users (broken is always a bug).\n                    return Err(GetActiveTenantError::Broken(reason));\n                }\n                TenantState::Stopping { .. } => {\n                    // There's no chance the tenant can transition back into ::Active\n                    return Err(GetActiveTenantError::WillNotBecomeActive(current_state));\n                }\n            }\n        }\n    }\n\n    pub(crate) fn get_attach_mode(&self) -> AttachmentMode {\n        self.tenant_conf.load().location.attach_mode\n    }\n\n    /// For API access: generate a LocationConfig equivalent to the one that would be used to\n    /// create a Tenant in the same state.  Do not use this in hot paths: it's for relatively\n    /// rare external API calls, like a reconciliation at startup.\n    pub(crate) fn get_location_conf(&self) -> models::LocationConfig {\n        let attached_tenant_conf = self.tenant_conf.load();\n\n        let location_config_mode = match attached_tenant_conf.location.attach_mode {\n            AttachmentMode::Single => models::LocationConfigMode::AttachedSingle,\n            AttachmentMode::Multi => models::LocationConfigMode::AttachedMulti,\n            AttachmentMode::Stale => models::LocationConfigMode::AttachedStale,\n        };\n\n        models::LocationConfig {\n            mode: location_config_mode,\n            generation: self.generation.into(),\n            secondary_conf: None,\n            shard_number: self.shard_identity.number.0,\n            shard_count: self.shard_identity.count.literal(),\n            shard_stripe_size: self.shard_identity.stripe_size.0,\n            tenant_conf: attached_tenant_conf.tenant_conf.clone(),\n        }\n    }\n\n    pub(crate) fn get_tenant_shard_id(&self) -> &TenantShardId {\n        &self.tenant_shard_id\n    }\n\n    pub(crate) fn get_shard_identity(&self) -> ShardIdentity {\n        self.shard_identity\n    }\n\n    pub(crate) fn get_shard_stripe_size(&self) -> ShardStripeSize {\n        self.shard_identity.stripe_size\n    }\n\n    pub(crate) fn get_generation(&self) -> Generation {\n        self.generation\n    }\n\n    /// This function partially shuts down the tenant (it shuts down the Timelines) and is fallible,\n    /// and can leave the tenant in a bad state if it fails.  The caller is responsible for\n    /// resetting this tenant to a valid state if we fail.\n    pub(crate) async fn split_prepare(\n        &self,\n        child_shards: &Vec<TenantShardId>,\n    ) -> anyhow::Result<()> {\n        let (timelines, offloaded) = {\n            let timelines = self.timelines.lock().unwrap();\n            let offloaded = self.timelines_offloaded.lock().unwrap();\n            (timelines.clone(), offloaded.clone())\n        };\n        let timelines_iter = timelines\n            .values()\n            .map(TimelineOrOffloadedArcRef::<'_>::from)\n            .chain(\n                offloaded\n                    .values()\n                    .map(TimelineOrOffloadedArcRef::<'_>::from),\n            );\n        for timeline in timelines_iter {\n            // We do not block timeline creation/deletion during splits inside the pageserver: it is up to higher levels\n            // to ensure that they do not start a split if currently in the process of doing these.\n\n            let timeline_id = timeline.timeline_id();\n\n            if let TimelineOrOffloadedArcRef::Timeline(timeline) = timeline {\n                // Upload an index from the parent: this is partly to provide freshness for the\n                // child tenants that will copy it, and partly for general ease-of-debugging: there will\n                // always be a parent shard index in the same generation as we wrote the child shard index.\n                tracing::info!(%timeline_id, \"Uploading index\");\n                timeline\n                    .remote_client\n                    .schedule_index_upload_for_file_changes()?;\n                timeline.remote_client.wait_completion().await?;\n            }\n\n            let remote_client = match timeline {\n                TimelineOrOffloadedArcRef::Timeline(timeline) => timeline.remote_client.clone(),\n                TimelineOrOffloadedArcRef::Offloaded(offloaded) => {\n                    let remote_client = self\n                        .build_timeline_client(offloaded.timeline_id, self.remote_storage.clone());\n                    Arc::new(remote_client)\n                }\n                TimelineOrOffloadedArcRef::Importing(_) => {\n                    unreachable!(\"Importing timelines are not included in the iterator\")\n                }\n            };\n\n            // Shut down the timeline's remote client: this means that the indices we write\n            // for child shards will not be invalidated by the parent shard deleting layers.\n            tracing::info!(%timeline_id, \"Shutting down remote storage client\");\n            remote_client.shutdown().await;\n\n            // Download methods can still be used after shutdown, as they don't flow through the remote client's\n            // queue.  In principal the RemoteTimelineClient could provide this without downloading it, but this\n            // operation is rare, so it's simpler to just download it (and robustly guarantees that the index\n            // we use here really is the remotely persistent one).\n            tracing::info!(%timeline_id, \"Downloading index_part from parent\");\n            let result = remote_client\n                .download_index_file(&self.cancel)\n                .instrument(info_span!(\"download_index_file\", tenant_id=%self.tenant_shard_id.tenant_id, shard_id=%self.tenant_shard_id.shard_slug(), %timeline_id))\n                .await?;\n            let index_part = match result {\n                MaybeDeletedIndexPart::Deleted(_) => {\n                    anyhow::bail!(\"Timeline deletion happened concurrently with split\")\n                }\n                MaybeDeletedIndexPart::IndexPart(p) => p,\n            };\n\n            // A shard split may not take place while a timeline import is on-going\n            // for the tenant. Timeline imports run as part of each tenant shard\n            // and rely on the sharding scheme to split the work among pageservers.\n            // If we were to split in the middle of this process, we would have to\n            // either ensure that it's driven to completion on the old shard set\n            // or transfer it to the new shard set. It's technically possible, but complex.\n            match index_part.import_pgdata {\n                Some(ref import) if !import.is_done() => {\n                    anyhow::bail!(\n                        \"Cannot split due to import with idempotency key: {:?}\",\n                        import.idempotency_key()\n                    );\n                }\n                Some(_) | None => {\n                    // fallthrough\n                }\n            }\n\n            for child_shard in child_shards {\n                tracing::info!(%timeline_id, \"Uploading index_part for child {}\", child_shard.to_index());\n                upload_index_part(\n                    &self.remote_storage,\n                    child_shard,\n                    &timeline_id,\n                    self.generation,\n                    &index_part,\n                    &self.cancel,\n                )\n                .await?;\n            }\n        }\n\n        let tenant_manifest = self.build_tenant_manifest();\n        for child_shard in child_shards {\n            tracing::info!(\n                \"Uploading tenant manifest for child {}\",\n                child_shard.to_index()\n            );\n            upload_tenant_manifest(\n                &self.remote_storage,\n                child_shard,\n                self.generation,\n                &tenant_manifest,\n                &self.cancel,\n            )\n            .await?;\n        }\n\n        Ok(())\n    }\n\n    pub(crate) fn get_sizes(&self) -> TopTenantShardItem {\n        let mut result = TopTenantShardItem {\n            id: self.tenant_shard_id,\n            resident_size: 0,\n            physical_size: 0,\n            max_logical_size: 0,\n            max_logical_size_per_shard: 0,\n        };\n\n        for timeline in self.timelines.lock().unwrap().values() {\n            result.resident_size += timeline.metrics.resident_physical_size_gauge.get();\n\n            result.physical_size += timeline\n                .remote_client\n                .metrics\n                .remote_physical_size_gauge\n                .get();\n            result.max_logical_size = std::cmp::max(\n                result.max_logical_size,\n                timeline.metrics.current_logical_size_gauge.get(),\n            );\n        }\n\n        result.max_logical_size_per_shard = result\n            .max_logical_size\n            .div_ceil(self.tenant_shard_id.shard_count.count() as u64);\n\n        result\n    }\n}\n\n/// Given a Vec of timelines and their ancestors (timeline_id, ancestor_id),\n/// perform a topological sort, so that the parent of each timeline comes\n/// before the children.\n/// E extracts the ancestor from T\n/// This allows for T to be different. It can be TimelineMetadata, can be Timeline itself, etc.\nfn tree_sort_timelines<T, E>(\n    timelines: HashMap<TimelineId, T>,\n    extractor: E,\n) -> anyhow::Result<Vec<(TimelineId, T)>>\nwhere\n    E: Fn(&T) -> Option<TimelineId>,\n{\n    let mut result = Vec::with_capacity(timelines.len());\n\n    let mut now = Vec::with_capacity(timelines.len());\n    // (ancestor, children)\n    let mut later: HashMap<TimelineId, Vec<(TimelineId, T)>> =\n        HashMap::with_capacity(timelines.len());\n\n    for (timeline_id, value) in timelines {\n        if let Some(ancestor_id) = extractor(&value) {\n            let children = later.entry(ancestor_id).or_default();\n            children.push((timeline_id, value));\n        } else {\n            now.push((timeline_id, value));\n        }\n    }\n\n    while let Some((timeline_id, metadata)) = now.pop() {\n        result.push((timeline_id, metadata));\n        // All children of this can be loaded now\n        if let Some(mut children) = later.remove(&timeline_id) {\n            now.append(&mut children);\n        }\n    }\n\n    // All timelines should be visited now. Unless there were timelines with missing ancestors.\n    if !later.is_empty() {\n        for (missing_id, orphan_ids) in later {\n            for (orphan_id, _) in orphan_ids {\n                error!(\n                    \"could not load timeline {orphan_id} because its ancestor timeline {missing_id} could not be loaded\"\n                );\n            }\n        }\n        bail!(\"could not load tenant because some timelines are missing ancestors\");\n    }\n\n    Ok(result)\n}\n\nimpl TenantShard {\n    pub fn tenant_specific_overrides(&self) -> pageserver_api::models::TenantConfig {\n        self.tenant_conf.load().tenant_conf.clone()\n    }\n\n    pub fn effective_config(&self) -> pageserver_api::config::TenantConfigToml {\n        self.tenant_specific_overrides()\n            .merge(self.conf.default_tenant_conf.clone())\n    }\n\n    pub fn get_checkpoint_distance(&self) -> u64 {\n        let tenant_conf = self.tenant_conf.load().tenant_conf.clone();\n        tenant_conf\n            .checkpoint_distance\n            .unwrap_or(self.conf.default_tenant_conf.checkpoint_distance)\n    }\n\n    pub fn get_checkpoint_timeout(&self) -> Duration {\n        let tenant_conf = self.tenant_conf.load().tenant_conf.clone();\n        tenant_conf\n            .checkpoint_timeout\n            .unwrap_or(self.conf.default_tenant_conf.checkpoint_timeout)\n    }\n\n    pub fn get_compaction_target_size(&self) -> u64 {\n        let tenant_conf = self.tenant_conf.load().tenant_conf.clone();\n        tenant_conf\n            .compaction_target_size\n            .unwrap_or(self.conf.default_tenant_conf.compaction_target_size)\n    }\n\n    pub fn get_compaction_period(&self) -> Duration {\n        let tenant_conf = self.tenant_conf.load().tenant_conf.clone();\n        tenant_conf\n            .compaction_period\n            .unwrap_or(self.conf.default_tenant_conf.compaction_period)\n    }\n\n    pub fn get_compaction_threshold(&self) -> usize {\n        let tenant_conf = self.tenant_conf.load().tenant_conf.clone();\n        tenant_conf\n            .compaction_threshold\n            .unwrap_or(self.conf.default_tenant_conf.compaction_threshold)\n    }\n\n    pub fn get_rel_size_v2_enabled(&self) -> bool {\n        let tenant_conf = self.tenant_conf.load().tenant_conf.clone();\n        tenant_conf\n            .rel_size_v2_enabled\n            .unwrap_or(self.conf.default_tenant_conf.rel_size_v2_enabled)\n    }\n\n    pub fn get_compaction_upper_limit(&self) -> usize {\n        let tenant_conf = self.tenant_conf.load().tenant_conf.clone();\n        tenant_conf\n            .compaction_upper_limit\n            .unwrap_or(self.conf.default_tenant_conf.compaction_upper_limit)\n    }\n\n    pub fn get_compaction_l0_first(&self) -> bool {\n        let tenant_conf = self.tenant_conf.load().tenant_conf.clone();\n        tenant_conf\n            .compaction_l0_first\n            .unwrap_or(self.conf.default_tenant_conf.compaction_l0_first)\n    }\n\n    pub fn get_gc_horizon(&self) -> u64 {\n        let tenant_conf = self.tenant_conf.load().tenant_conf.clone();\n        tenant_conf\n            .gc_horizon\n            .unwrap_or(self.conf.default_tenant_conf.gc_horizon)\n    }\n\n    pub fn get_gc_period(&self) -> Duration {\n        let tenant_conf = self.tenant_conf.load().tenant_conf.clone();\n        tenant_conf\n            .gc_period\n            .unwrap_or(self.conf.default_tenant_conf.gc_period)\n    }\n\n    pub fn get_image_creation_threshold(&self) -> usize {\n        let tenant_conf = self.tenant_conf.load().tenant_conf.clone();\n        tenant_conf\n            .image_creation_threshold\n            .unwrap_or(self.conf.default_tenant_conf.image_creation_threshold)\n    }\n\n    // HADRON\n    pub fn get_image_creation_timeout(&self) -> Option<Duration> {\n        let tenant_conf = self.tenant_conf.load().tenant_conf.clone();\n        tenant_conf.image_layer_force_creation_period.or(self\n            .conf\n            .default_tenant_conf\n            .image_layer_force_creation_period)\n    }\n\n    pub fn get_pitr_interval(&self) -> Duration {\n        let tenant_conf = self.tenant_conf.load().tenant_conf.clone();\n        tenant_conf\n            .pitr_interval\n            .unwrap_or(self.conf.default_tenant_conf.pitr_interval)\n    }\n\n    pub fn get_min_resident_size_override(&self) -> Option<u64> {\n        let tenant_conf = self.tenant_conf.load().tenant_conf.clone();\n        tenant_conf\n            .min_resident_size_override\n            .or(self.conf.default_tenant_conf.min_resident_size_override)\n    }\n\n    pub fn get_heatmap_period(&self) -> Option<Duration> {\n        let tenant_conf = self.tenant_conf.load().tenant_conf.clone();\n        let heatmap_period = tenant_conf\n            .heatmap_period\n            .unwrap_or(self.conf.default_tenant_conf.heatmap_period);\n        if heatmap_period.is_zero() {\n            None\n        } else {\n            Some(heatmap_period)\n        }\n    }\n\n    pub fn get_lsn_lease_length(&self) -> Duration {\n        Self::get_lsn_lease_length_impl(self.conf, &self.tenant_conf.load().tenant_conf)\n    }\n\n    pub fn get_lsn_lease_length_impl(\n        conf: &'static PageServerConf,\n        tenant_conf: &pageserver_api::models::TenantConfig,\n    ) -> Duration {\n        tenant_conf\n            .lsn_lease_length\n            .unwrap_or(conf.default_tenant_conf.lsn_lease_length)\n    }\n\n    pub fn get_timeline_offloading_enabled(&self) -> bool {\n        if self.conf.timeline_offloading {\n            return true;\n        }\n        let tenant_conf = self.tenant_conf.load().tenant_conf.clone();\n        tenant_conf\n            .timeline_offloading\n            .unwrap_or(self.conf.default_tenant_conf.timeline_offloading)\n    }\n\n    /// Generate an up-to-date TenantManifest based on the state of this Tenant.\n    fn build_tenant_manifest(&self) -> TenantManifest {\n        // Collect the offloaded timelines, and sort them for deterministic output.\n        let offloaded_timelines = self\n            .timelines_offloaded\n            .lock()\n            .unwrap()\n            .values()\n            .map(|tli| tli.manifest())\n            .sorted_by_key(|m| m.timeline_id)\n            .collect_vec();\n\n        TenantManifest {\n            version: LATEST_TENANT_MANIFEST_VERSION,\n            stripe_size: Some(self.get_shard_stripe_size()),\n            offloaded_timelines,\n        }\n    }\n\n    pub fn update_tenant_config<\n        F: Fn(\n            pageserver_api::models::TenantConfig,\n        ) -> anyhow::Result<pageserver_api::models::TenantConfig>,\n    >(\n        &self,\n        update: F,\n    ) -> anyhow::Result<pageserver_api::models::TenantConfig> {\n        // Use read-copy-update in order to avoid overwriting the location config\n        // state if this races with [`TenantShard::set_new_location_config`]. Note that\n        // this race is not possible if both request types come from the storage\n        // controller (as they should!) because an exclusive op lock is required\n        // on the storage controller side.\n\n        self.tenant_conf\n            .try_rcu(|attached_conf| -> Result<_, anyhow::Error> {\n                Ok(Arc::new(AttachedTenantConf {\n                    tenant_conf: update(attached_conf.tenant_conf.clone())?,\n                    location: attached_conf.location,\n                    lsn_lease_deadline: attached_conf.lsn_lease_deadline,\n                }))\n            })?;\n\n        let updated = self.tenant_conf.load();\n\n        self.tenant_conf_updated(&updated.tenant_conf);\n        // Don't hold self.timelines.lock() during the notifies.\n        // There's no risk of deadlock right now, but there could be if we consolidate\n        // mutexes in struct Timeline in the future.\n        let timelines = self.list_timelines();\n        for timeline in timelines {\n            timeline.tenant_conf_updated(&updated);\n        }\n\n        Ok(updated.tenant_conf.clone())\n    }\n\n    pub(crate) fn set_new_location_config(&self, new_conf: AttachedTenantConf) {\n        let new_tenant_conf = new_conf.tenant_conf.clone();\n\n        self.tenant_conf.store(Arc::new(new_conf.clone()));\n\n        self.tenant_conf_updated(&new_tenant_conf);\n        // Don't hold self.timelines.lock() during the notifies.\n        // There's no risk of deadlock right now, but there could be if we consolidate\n        // mutexes in struct Timeline in the future.\n        let timelines = self.list_timelines();\n        for timeline in timelines {\n            timeline.tenant_conf_updated(&new_conf);\n        }\n    }\n\n    fn get_pagestream_throttle_config(\n        psconf: &'static PageServerConf,\n        overrides: &pageserver_api::models::TenantConfig,\n    ) -> throttle::Config {\n        overrides\n            .timeline_get_throttle\n            .clone()\n            .unwrap_or(psconf.default_tenant_conf.timeline_get_throttle.clone())\n    }\n\n    pub(crate) fn tenant_conf_updated(&self, new_conf: &pageserver_api::models::TenantConfig) {\n        let conf = Self::get_pagestream_throttle_config(self.conf, new_conf);\n        self.pagestream_throttle.reconfigure(conf)\n    }\n\n    /// Helper function to create a new Timeline struct.\n    ///\n    /// The returned Timeline is in Loading state. The caller is responsible for\n    /// initializing any on-disk state, and for inserting the Timeline to the 'timelines'\n    /// map.\n    ///\n    /// `validate_ancestor == false` is used when a timeline is created for deletion\n    /// and we might not have the ancestor present anymore which is fine for to be\n    /// deleted timelines.\n    #[allow(clippy::too_many_arguments)]\n    fn create_timeline_struct(\n        &self,\n        new_timeline_id: TimelineId,\n        new_metadata: &TimelineMetadata,\n        previous_heatmap: Option<PreviousHeatmap>,\n        ancestor: Option<Arc<Timeline>>,\n        resources: TimelineResources,\n        cause: CreateTimelineCause,\n        create_idempotency: CreateTimelineIdempotency,\n        gc_compaction_state: Option<GcCompactionState>,\n        rel_size_v2_status: Option<RelSizeMigration>,\n        rel_size_migrated_at: Option<Lsn>,\n        ctx: &RequestContext,\n    ) -> anyhow::Result<(Arc<Timeline>, RequestContext)> {\n        let state = match cause {\n            CreateTimelineCause::Load => {\n                let ancestor_id = new_metadata.ancestor_timeline();\n                anyhow::ensure!(\n                    ancestor_id == ancestor.as_ref().map(|t| t.timeline_id),\n                    \"Timeline's {new_timeline_id} ancestor {ancestor_id:?} was not found\"\n                );\n                TimelineState::Loading\n            }\n            CreateTimelineCause::Delete => TimelineState::Stopping,\n        };\n\n        let pg_version = new_metadata.pg_version();\n\n        let timeline = Timeline::new(\n            self.conf,\n            Arc::clone(&self.tenant_conf),\n            new_metadata,\n            previous_heatmap,\n            ancestor,\n            new_timeline_id,\n            self.tenant_shard_id,\n            self.generation,\n            self.shard_identity,\n            self.walredo_mgr.clone(),\n            resources,\n            pg_version,\n            state,\n            self.attach_wal_lag_cooldown.clone(),\n            create_idempotency,\n            gc_compaction_state,\n            rel_size_v2_status,\n            rel_size_migrated_at,\n            self.cancel.child_token(),\n        );\n\n        let timeline_ctx = RequestContextBuilder::from(ctx)\n            .scope(context::Scope::new_timeline(&timeline))\n            .detached_child();\n\n        Ok((timeline, timeline_ctx))\n    }\n\n    /// [`TenantShard::shutdown`] must be called before dropping the returned [`TenantShard`] object\n    /// to ensure proper cleanup of background tasks and metrics.\n    //\n    // Allow too_many_arguments because a constructor's argument list naturally grows with the\n    // number of attributes in the struct: breaking these out into a builder wouldn't be helpful.\n    #[allow(clippy::too_many_arguments)]\n    fn new(\n        state: TenantState,\n        conf: &'static PageServerConf,\n        attached_conf: AttachedTenantConf,\n        shard_identity: ShardIdentity,\n        walredo_mgr: Option<Arc<WalRedoManager>>,\n        tenant_shard_id: TenantShardId,\n        remote_storage: GenericRemoteStorage,\n        deletion_queue_client: DeletionQueueClient,\n        l0_flush_global_state: L0FlushGlobalState,\n        basebackup_cache: Arc<BasebackupCache>,\n        feature_resolver: FeatureResolver,\n    ) -> TenantShard {\n        assert!(!attached_conf.location.generation.is_none());\n\n        let (state, mut rx) = watch::channel(state);\n\n        tokio::spawn(async move {\n            // reflect tenant state in metrics:\n            // - global per tenant state: TENANT_STATE_METRIC\n            // - \"set\" of broken tenants: BROKEN_TENANTS_SET\n            //\n            // set of broken tenants should not have zero counts so that it remains accessible for\n            // alerting.\n\n            let tid = tenant_shard_id.to_string();\n            let shard_id = tenant_shard_id.shard_slug().to_string();\n            let set_key = &[tid.as_str(), shard_id.as_str()][..];\n\n            fn inspect_state(state: &TenantState) -> ([&'static str; 1], bool) {\n                ([state.into()], matches!(state, TenantState::Broken { .. }))\n            }\n\n            let mut tuple = inspect_state(&rx.borrow_and_update());\n\n            let is_broken = tuple.1;\n            let mut counted_broken = if is_broken {\n                // add the id to the set right away, there should not be any updates on the channel\n                // after before tenant is removed, if ever\n                BROKEN_TENANTS_SET.with_label_values(set_key).set(1);\n                true\n            } else {\n                false\n            };\n\n            loop {\n                let labels = &tuple.0;\n                let current = TENANT_STATE_METRIC.with_label_values(labels);\n                current.inc();\n\n                if rx.changed().await.is_err() {\n                    // tenant has been dropped\n                    current.dec();\n                    drop(BROKEN_TENANTS_SET.remove_label_values(set_key));\n                    break;\n                }\n\n                current.dec();\n                tuple = inspect_state(&rx.borrow_and_update());\n\n                let is_broken = tuple.1;\n                if is_broken && !counted_broken {\n                    counted_broken = true;\n                    // insert the tenant_id (back) into the set while avoiding needless counter\n                    // access\n                    BROKEN_TENANTS_SET.with_label_values(set_key).set(1);\n                }\n            }\n        });\n\n        TenantShard {\n            tenant_shard_id,\n            shard_identity,\n            generation: attached_conf.location.generation,\n            conf,\n            // using now here is good enough approximation to catch tenants with really long\n            // activation times.\n            constructed_at: Instant::now(),\n            timelines: Mutex::new(HashMap::new()),\n            timelines_creating: Mutex::new(HashSet::new()),\n            timelines_offloaded: Mutex::new(HashMap::new()),\n            timelines_importing: Mutex::new(HashMap::new()),\n            remote_tenant_manifest: Default::default(),\n            gc_cs: tokio::sync::Mutex::new(()),\n            walredo_mgr,\n            remote_storage,\n            deletion_queue_client,\n            state,\n            cached_logical_sizes: tokio::sync::Mutex::new(HashMap::new()),\n            cached_synthetic_tenant_size: Arc::new(AtomicU64::new(0)),\n            eviction_task_tenant_state: tokio::sync::Mutex::new(EvictionTaskTenantState::default()),\n            compaction_circuit_breaker: std::sync::Mutex::new(CircuitBreaker::new(\n                format!(\"compaction-{tenant_shard_id}\"),\n                5,\n                // Compaction can be a very expensive operation, and might leak disk space.  It also ought\n                // to be infallible, as long as remote storage is available.  So if it repeatedly fails,\n                // use an extremely long backoff.\n                Some(Duration::from_secs(3600 * 24)),\n            )),\n            l0_compaction_trigger: Arc::new(Notify::new()),\n            scheduled_compaction_tasks: Mutex::new(Default::default()),\n            activate_now_sem: tokio::sync::Semaphore::new(0),\n            attach_wal_lag_cooldown: Arc::new(std::sync::OnceLock::new()),\n            cancel: CancellationToken::default(),\n            gate: Gate::default(),\n            pagestream_throttle: Arc::new(throttle::Throttle::new(\n                TenantShard::get_pagestream_throttle_config(conf, &attached_conf.tenant_conf),\n            )),\n            pagestream_throttle_metrics: Arc::new(\n                crate::metrics::tenant_throttling::Pagestream::new(&tenant_shard_id),\n            ),\n            tenant_conf: Arc::new(ArcSwap::from_pointee(attached_conf)),\n            ongoing_timeline_detach: std::sync::Mutex::default(),\n            gc_block: Default::default(),\n            l0_flush_global_state,\n            basebackup_cache,\n            feature_resolver: Arc::new(TenantFeatureResolver::new(\n                feature_resolver,\n                tenant_shard_id.tenant_id,\n            )),\n        }\n    }\n\n    /// Locate and load config\n    pub(super) fn load_tenant_config(\n        conf: &'static PageServerConf,\n        tenant_shard_id: &TenantShardId,\n    ) -> Result<LocationConf, LoadConfigError> {\n        let config_path = conf.tenant_location_config_path(tenant_shard_id);\n\n        info!(\"loading tenant configuration from {config_path}\");\n\n        // load and parse file\n        let config = fs::read_to_string(&config_path).map_err(|e| {\n            match e.kind() {\n                std::io::ErrorKind::NotFound => {\n                    // The config should almost always exist for a tenant directory:\n                    //  - When attaching a tenant, the config is the first thing we write\n                    //  - When detaching a tenant, we atomically move the directory to a tmp location\n                    //    before deleting contents.\n                    //\n                    // The very rare edge case that can result in a missing config is if we crash during attach\n                    // between creating directory and writing config.  Callers should handle that as if the\n                    // directory didn't exist.\n\n                    LoadConfigError::NotFound(config_path)\n                }\n                _ => {\n                    // No IO errors except NotFound are acceptable here: other kinds of error indicate local storage or permissions issues\n                    // that we cannot cleanly recover\n                    crate::virtual_file::on_fatal_io_error(&e, \"Reading tenant config file\")\n                }\n            }\n        })?;\n\n        Ok(toml_edit::de::from_str::<LocationConf>(&config)?)\n    }\n\n    /// Stores a tenant location config to disk.\n    ///\n    /// NB: make sure to call `ShardIdentity::assert_equal` before persisting a new config, to avoid\n    /// changes to shard parameters that may result in data corruption.\n    #[tracing::instrument(skip_all, fields(tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug()))]\n    pub(super) async fn persist_tenant_config(\n        conf: &'static PageServerConf,\n        tenant_shard_id: &TenantShardId,\n        location_conf: &LocationConf,\n    ) -> std::io::Result<()> {\n        let config_path = conf.tenant_location_config_path(tenant_shard_id);\n\n        Self::persist_tenant_config_at(tenant_shard_id, &config_path, location_conf).await\n    }\n\n    #[tracing::instrument(skip_all, fields(tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug()))]\n    pub(super) async fn persist_tenant_config_at(\n        tenant_shard_id: &TenantShardId,\n        config_path: &Utf8Path,\n        location_conf: &LocationConf,\n    ) -> std::io::Result<()> {\n        debug!(\"persisting tenantconf to {config_path}\");\n\n        let mut conf_content = r#\"# This file contains a specific per-tenant's config.\n#  It is read in case of pageserver restart.\n\"#\n        .to_string();\n\n        fail::fail_point!(\"tenant-config-before-write\", |_| {\n            Err(std::io::Error::other(\"tenant-config-before-write\"))\n        });\n\n        // Convert the config to a toml file.\n        conf_content +=\n            &toml_edit::ser::to_string_pretty(&location_conf).expect(\"Config serialization failed\");\n\n        let temp_path = path_with_suffix_extension(config_path, TEMP_FILE_SUFFIX);\n\n        let conf_content = conf_content.into_bytes();\n        VirtualFile::crashsafe_overwrite(config_path.to_owned(), temp_path, conf_content).await\n    }\n\n    //\n    // How garbage collection works:\n    //\n    //                    +--bar------------->\n    //                   /\n    //             +----+-----foo---------------->\n    //            /\n    // ----main--+-------------------------->\n    //                \\\n    //                 +-----baz-------->\n    //\n    //\n    // 1. Grab 'gc_cs' mutex to prevent new timelines from being created while Timeline's\n    //    `gc_infos` are being refreshed\n    // 2. Scan collected timelines, and on each timeline, make note of the\n    //    all the points where other timelines have been branched off.\n    //    We will refrain from removing page versions at those LSNs.\n    // 3. For each timeline, scan all layer files on the timeline.\n    //    Remove all files for which a newer file exists and which\n    //    don't cover any branch point LSNs.\n    //\n    // TODO:\n    // - if a relation has a non-incremental persistent layer on a child branch, then we\n    //   don't need to keep that in the parent anymore. But currently\n    //   we do.\n    async fn gc_iteration_internal(\n        &self,\n        target_timeline_id: Option<TimelineId>,\n        horizon: u64,\n        pitr: Duration,\n        cancel: &CancellationToken,\n        ctx: &RequestContext,\n    ) -> Result<GcResult, GcError> {\n        let mut totals: GcResult = Default::default();\n        let now = Instant::now();\n\n        let gc_timelines = self\n            .refresh_gc_info_internal(target_timeline_id, horizon, pitr, cancel, ctx)\n            .await?;\n\n        failpoint_support::sleep_millis_async!(\"gc_iteration_internal_after_getting_gc_timelines\");\n\n        // If there is nothing to GC, we don't want any messages in the INFO log.\n        if !gc_timelines.is_empty() {\n            info!(\"{} timelines need GC\", gc_timelines.len());\n        } else {\n            debug!(\"{} timelines need GC\", gc_timelines.len());\n        }\n\n        // Perform GC for each timeline.\n        //\n        // Note that we don't hold the `TenantShard::gc_cs` lock here because we don't want to delay the\n        // branch creation task, which requires the GC lock. A GC iteration can run concurrently\n        // with branch creation.\n        //\n        // See comments in [`TenantShard::branch_timeline`] for more information about why branch\n        // creation task can run concurrently with timeline's GC iteration.\n        for timeline in gc_timelines {\n            if cancel.is_cancelled() {\n                // We were requested to shut down. Stop and return with the progress we\n                // made.\n                break;\n            }\n            let result = match timeline.gc().await {\n                Err(GcError::TimelineCancelled) => {\n                    if target_timeline_id.is_some() {\n                        // If we were targetting this specific timeline, surface cancellation to caller\n                        return Err(GcError::TimelineCancelled);\n                    } else {\n                        // A timeline may be shutting down independently of the tenant's lifecycle: we should\n                        // skip past this and proceed to try GC on other timelines.\n                        continue;\n                    }\n                }\n                r => r?,\n            };\n            totals += result;\n        }\n\n        totals.elapsed = now.elapsed();\n        Ok(totals)\n    }\n\n    /// Refreshes the Timeline::gc_info for all timelines, returning the\n    /// vector of timelines which have [`Timeline::get_last_record_lsn`] past\n    /// [`TenantShard::get_gc_horizon`].\n    ///\n    /// This is usually executed as part of periodic gc, but can now be triggered more often.\n    pub(crate) async fn refresh_gc_info(\n        &self,\n        cancel: &CancellationToken,\n        ctx: &RequestContext,\n    ) -> Result<Vec<Arc<Timeline>>, GcError> {\n        // since this method can now be called at different rates than the configured gc loop, it\n        // might be that these configuration values get applied faster than what it was previously,\n        // since these were only read from the gc task.\n        let horizon = self.get_gc_horizon();\n        let pitr = self.get_pitr_interval();\n\n        // refresh all timelines\n        let target_timeline_id = None;\n\n        self.refresh_gc_info_internal(target_timeline_id, horizon, pitr, cancel, ctx)\n            .await\n    }\n\n    /// Populate all Timelines' `GcInfo` with information about their children.  We do not set the\n    /// PITR cutoffs here, because that requires I/O: this is done later, before GC, by [`Self::refresh_gc_info_internal`]\n    ///\n    /// Subsequently, parent-child relationships are updated incrementally inside [`Timeline::new`] and [`Timeline::drop`].\n    fn initialize_gc_info(\n        &self,\n        timelines: &std::sync::MutexGuard<HashMap<TimelineId, Arc<Timeline>>>,\n        timelines_offloaded: &std::sync::MutexGuard<HashMap<TimelineId, Arc<OffloadedTimeline>>>,\n        restrict_to_timeline: Option<TimelineId>,\n    ) {\n        if restrict_to_timeline.is_none() {\n            // This function must be called before activation: after activation timeline create/delete operations\n            // might happen, and this function is not safe to run concurrently with those.\n            assert!(!self.is_active());\n        }\n\n        // Scan all timelines. For each timeline, remember the timeline ID and\n        // the branch point where it was created.\n        let mut all_branchpoints: BTreeMap<TimelineId, Vec<(Lsn, TimelineId, MaybeOffloaded)>> =\n            BTreeMap::new();\n        timelines.iter().for_each(|(timeline_id, timeline_entry)| {\n            if let Some(ancestor_timeline_id) = &timeline_entry.get_ancestor_timeline_id() {\n                let ancestor_children = all_branchpoints.entry(*ancestor_timeline_id).or_default();\n                ancestor_children.push((\n                    timeline_entry.get_ancestor_lsn(),\n                    *timeline_id,\n                    MaybeOffloaded::No,\n                ));\n            }\n        });\n        timelines_offloaded\n            .iter()\n            .for_each(|(timeline_id, timeline_entry)| {\n                let Some(ancestor_timeline_id) = &timeline_entry.ancestor_timeline_id else {\n                    return;\n                };\n                let Some(retain_lsn) = timeline_entry.ancestor_retain_lsn else {\n                    return;\n                };\n                let ancestor_children = all_branchpoints.entry(*ancestor_timeline_id).or_default();\n                ancestor_children.push((retain_lsn, *timeline_id, MaybeOffloaded::Yes));\n            });\n\n        // The number of bytes we always keep, irrespective of PITR: this is a constant across timelines\n        let horizon = self.get_gc_horizon();\n\n        // Populate each timeline's GcInfo with information about its child branches\n        let timelines_to_write = if let Some(timeline_id) = restrict_to_timeline {\n            itertools::Either::Left(timelines.get(&timeline_id).into_iter())\n        } else {\n            itertools::Either::Right(timelines.values())\n        };\n        for timeline in timelines_to_write {\n            let mut branchpoints: Vec<(Lsn, TimelineId, MaybeOffloaded)> = all_branchpoints\n                .remove(&timeline.timeline_id)\n                .unwrap_or_default();\n\n            branchpoints.sort_by_key(|b| b.0);\n\n            let mut target = timeline.gc_info.write().unwrap();\n\n            target.retain_lsns = branchpoints;\n\n            let space_cutoff = timeline\n                .get_last_record_lsn()\n                .checked_sub(horizon)\n                .unwrap_or(Lsn(0));\n\n            target.cutoffs = GcCutoffs {\n                space: space_cutoff,\n                time: None,\n            };\n        }\n    }\n\n    async fn refresh_gc_info_internal(\n        &self,\n        target_timeline_id: Option<TimelineId>,\n        horizon: u64,\n        pitr: Duration,\n        cancel: &CancellationToken,\n        ctx: &RequestContext,\n    ) -> Result<Vec<Arc<Timeline>>, GcError> {\n        // before taking the gc_cs lock, do the heavier weight finding of gc_cutoff points for\n        // currently visible timelines.\n        let timelines = self\n            .timelines\n            .lock()\n            .unwrap()\n            .values()\n            .filter(|tl| match target_timeline_id.as_ref() {\n                Some(target) => &tl.timeline_id == target,\n                None => true,\n            })\n            .cloned()\n            .collect::<Vec<_>>();\n\n        if target_timeline_id.is_some() && timelines.is_empty() {\n            // We were to act on a particular timeline and it wasn't found\n            return Err(GcError::TimelineNotFound);\n        }\n\n        let mut gc_cutoffs: HashMap<TimelineId, GcCutoffs> =\n            HashMap::with_capacity(timelines.len());\n\n        // Ensures all timelines use the same start time when computing the time cutoff.\n        let now_ts_for_pitr_calc = SystemTime::now();\n        for timeline in timelines.iter() {\n            let ctx = &ctx.with_scope_timeline(timeline);\n            let cutoff = timeline\n                .get_last_record_lsn()\n                .checked_sub(horizon)\n                .unwrap_or(Lsn(0));\n\n            let cutoffs = timeline\n                .find_gc_cutoffs(now_ts_for_pitr_calc, cutoff, pitr, cancel, ctx)\n                .await?;\n            let old = gc_cutoffs.insert(timeline.timeline_id, cutoffs);\n            assert!(old.is_none());\n        }\n\n        if !self.is_active() || self.cancel.is_cancelled() {\n            return Err(GcError::TenantCancelled);\n        }\n\n        // grab mutex to prevent new timelines from being created here; avoid doing long operations\n        // because that will stall branch creation.\n        let gc_cs = self.gc_cs.lock().await;\n\n        // Ok, we now know all the branch points.\n        // Update the GC information for each timeline.\n        let mut gc_timelines = Vec::with_capacity(timelines.len());\n        for timeline in timelines {\n            // We filtered the timeline list above\n            if let Some(target_timeline_id) = target_timeline_id {\n                assert_eq!(target_timeline_id, timeline.timeline_id);\n            }\n\n            {\n                let mut target = timeline.gc_info.write().unwrap();\n\n                // Cull any expired leases\n                let now = SystemTime::now();\n                target.leases.retain(|_, lease| !lease.is_expired(&now));\n\n                timeline\n                    .metrics\n                    .valid_lsn_lease_count_gauge\n                    .set(target.leases.len() as u64);\n\n                // Look up parent's PITR cutoff to update the child's knowledge of whether it is within parent's PITR\n                if let Some(ancestor_id) = timeline.get_ancestor_timeline_id() {\n                    if let Some(ancestor_gc_cutoffs) = gc_cutoffs.get(&ancestor_id) {\n                        target.within_ancestor_pitr =\n                            Some(timeline.get_ancestor_lsn()) >= ancestor_gc_cutoffs.time;\n                    }\n                }\n\n                // Update metrics that depend on GC state\n                timeline\n                    .metrics\n                    .archival_size\n                    .set(if target.within_ancestor_pitr {\n                        timeline.metrics.current_logical_size_gauge.get()\n                    } else {\n                        0\n                    });\n                if let Some(time_cutoff) = target.cutoffs.time {\n                    timeline.metrics.pitr_history_size.set(\n                        timeline\n                            .get_last_record_lsn()\n                            .checked_sub(time_cutoff)\n                            .unwrap_or_default()\n                            .0,\n                    );\n                }\n\n                // Apply the cutoffs we found to the Timeline's GcInfo.  Why might we _not_ have cutoffs for a timeline?\n                // - this timeline was created while we were finding cutoffs\n                // - lsn for timestamp search fails for this timeline repeatedly\n                if let Some(cutoffs) = gc_cutoffs.get(&timeline.timeline_id) {\n                    let original_cutoffs = target.cutoffs.clone();\n                    // GC cutoffs should never go back\n                    target.cutoffs = GcCutoffs {\n                        space: cutoffs.space.max(original_cutoffs.space),\n                        time: cutoffs.time.max(original_cutoffs.time),\n                    }\n                }\n            }\n\n            gc_timelines.push(timeline);\n        }\n        drop(gc_cs);\n        Ok(gc_timelines)\n    }\n\n    /// A substitute for `branch_timeline` for use in unit tests.\n    /// The returned timeline will have state value `Active` to make various `anyhow::ensure!()`\n    /// calls pass, but, we do not actually call `.activate()` under the hood. So, none of the\n    /// timeline background tasks are launched, except the flush loop.\n    #[cfg(test)]\n    async fn branch_timeline_test(\n        self: &Arc<Self>,\n        src_timeline: &Arc<Timeline>,\n        dst_id: TimelineId,\n        ancestor_lsn: Option<Lsn>,\n        ctx: &RequestContext,\n    ) -> Result<Arc<Timeline>, CreateTimelineError> {\n        let tl = self\n            .branch_timeline_impl(src_timeline, dst_id, ancestor_lsn, ctx)\n            .await?\n            .into_timeline_for_test();\n        tl.set_state(TimelineState::Active);\n        Ok(tl)\n    }\n\n    /// Helper for unit tests to branch a timeline with some pre-loaded states.\n    #[cfg(test)]\n    #[allow(clippy::too_many_arguments)]\n    pub async fn branch_timeline_test_with_layers(\n        self: &Arc<Self>,\n        src_timeline: &Arc<Timeline>,\n        dst_id: TimelineId,\n        ancestor_lsn: Option<Lsn>,\n        ctx: &RequestContext,\n        delta_layer_desc: Vec<timeline::DeltaLayerTestDesc>,\n        image_layer_desc: Vec<(Lsn, Vec<(pageserver_api::key::Key, bytes::Bytes)>)>,\n        end_lsn: Lsn,\n    ) -> anyhow::Result<Arc<Timeline>> {\n        use checks::check_valid_layermap;\n        use itertools::Itertools;\n\n        let tline = self\n            .branch_timeline_test(src_timeline, dst_id, ancestor_lsn, ctx)\n            .await?;\n        let ancestor_lsn = if let Some(ancestor_lsn) = ancestor_lsn {\n            ancestor_lsn\n        } else {\n            tline.get_last_record_lsn()\n        };\n        assert!(end_lsn >= ancestor_lsn);\n        tline.force_advance_lsn(end_lsn);\n        for deltas in delta_layer_desc {\n            tline\n                .force_create_delta_layer(deltas, Some(ancestor_lsn), ctx)\n                .await?;\n        }\n        for (lsn, images) in image_layer_desc {\n            tline\n                .force_create_image_layer(lsn, images, Some(ancestor_lsn), ctx)\n                .await?;\n        }\n        let layer_names = tline\n            .layers\n            .read(LayerManagerLockHolder::Testing)\n            .await\n            .layer_map()\n            .unwrap()\n            .iter_historic_layers()\n            .map(|layer| layer.layer_name())\n            .collect_vec();\n        if let Some(err) = check_valid_layermap(&layer_names) {\n            bail!(\"invalid layermap: {err}\");\n        }\n        Ok(tline)\n    }\n\n    /// Branch an existing timeline.\n    async fn branch_timeline(\n        self: &Arc<Self>,\n        src_timeline: &Arc<Timeline>,\n        dst_id: TimelineId,\n        start_lsn: Option<Lsn>,\n        ctx: &RequestContext,\n    ) -> Result<CreateTimelineResult, CreateTimelineError> {\n        self.branch_timeline_impl(src_timeline, dst_id, start_lsn, ctx)\n            .await\n    }\n\n    async fn branch_timeline_impl(\n        self: &Arc<Self>,\n        src_timeline: &Arc<Timeline>,\n        dst_id: TimelineId,\n        start_lsn: Option<Lsn>,\n        ctx: &RequestContext,\n    ) -> Result<CreateTimelineResult, CreateTimelineError> {\n        let src_id = src_timeline.timeline_id;\n\n        // We will validate our ancestor LSN in this function.  Acquire the GC lock so that\n        // this check cannot race with GC, and the ancestor LSN is guaranteed to remain\n        // valid while we are creating the branch.\n        let _gc_cs = self.gc_cs.lock().await;\n\n        // If no start LSN is specified, we branch the new timeline from the source timeline's last record LSN\n        let start_lsn = start_lsn.unwrap_or_else(|| {\n            let lsn = src_timeline.get_last_record_lsn();\n            info!(\"branching timeline {dst_id} from timeline {src_id} at last record LSN: {lsn}\");\n            lsn\n        });\n\n        // we finally have determined the ancestor_start_lsn, so we can get claim exclusivity now\n        let timeline_create_guard = match self\n            .start_creating_timeline(\n                dst_id,\n                CreateTimelineIdempotency::Branch {\n                    ancestor_timeline_id: src_timeline.timeline_id,\n                    ancestor_start_lsn: start_lsn,\n                },\n            )\n            .await?\n        {\n            StartCreatingTimelineResult::CreateGuard(guard) => guard,\n            StartCreatingTimelineResult::Idempotent(timeline) => {\n                return Ok(CreateTimelineResult::Idempotent(timeline));\n            }\n        };\n\n        // Ensure that `start_lsn` is valid, i.e. the LSN is within the PITR\n        // horizon on the source timeline\n        //\n        // We check it against both the planned GC cutoff stored in 'gc_info',\n        // and the 'latest_gc_cutoff' of the last GC that was performed.  The\n        // planned GC cutoff in 'gc_info' is normally larger than\n        // 'applied_gc_cutoff_lsn', but beware of corner cases like if you just\n        // changed the GC settings for the tenant to make the PITR window\n        // larger, but some of the data was already removed by an earlier GC\n        // iteration.\n\n        // check against last actual 'latest_gc_cutoff' first\n        let applied_gc_cutoff_lsn = src_timeline.get_applied_gc_cutoff_lsn();\n        {\n            let gc_info = src_timeline.gc_info.read().unwrap();\n            let planned_cutoff = gc_info.min_cutoff();\n            if gc_info.lsn_covered_by_lease(start_lsn) {\n                tracing::info!(\n                    \"skipping comparison of {start_lsn} with gc cutoff {} and planned gc cutoff {planned_cutoff} due to lsn lease\",\n                    *applied_gc_cutoff_lsn\n                );\n            } else {\n                src_timeline\n                    .check_lsn_is_in_scope(start_lsn, &applied_gc_cutoff_lsn)\n                    .context(format!(\n                        \"invalid branch start lsn: less than latest GC cutoff {}\",\n                        *applied_gc_cutoff_lsn,\n                    ))\n                    .map_err(CreateTimelineError::AncestorLsn)?;\n\n                // and then the planned GC cutoff\n                if start_lsn < planned_cutoff {\n                    return Err(CreateTimelineError::AncestorLsn(anyhow::anyhow!(\n                        \"invalid branch start lsn: less than planned GC cutoff {planned_cutoff}\"\n                    )));\n                }\n            }\n        }\n\n        //\n        // The branch point is valid, and we are still holding the 'gc_cs' lock\n        // so that GC cannot advance the GC cutoff until we are finished.\n        // Proceed with the branch creation.\n        //\n\n        // Determine prev-LSN for the new timeline. We can only determine it if\n        // the timeline was branched at the current end of the source timeline.\n        let RecordLsn {\n            last: src_last,\n            prev: src_prev,\n        } = src_timeline.get_last_record_rlsn();\n        let dst_prev = if src_last == start_lsn {\n            Some(src_prev)\n        } else {\n            None\n        };\n\n        // Create the metadata file, noting the ancestor of the new timeline.\n        // There is initially no data in it, but all the read-calls know to look\n        // into the ancestor.\n        let metadata = TimelineMetadata::new(\n            start_lsn,\n            dst_prev,\n            Some(src_id),\n            start_lsn,\n            *src_timeline.applied_gc_cutoff_lsn.read(), // FIXME: should we hold onto this guard longer?\n            src_timeline.initdb_lsn,\n            src_timeline.pg_version,\n        );\n\n        let (rel_size_v2_status, rel_size_migrated_at) = src_timeline.get_rel_size_v2_status();\n        let (uninitialized_timeline, _timeline_ctx) = self\n            .prepare_new_timeline(\n                dst_id,\n                &metadata,\n                timeline_create_guard,\n                start_lsn + 1,\n                Some(Arc::clone(src_timeline)),\n                Some(rel_size_v2_status),\n                rel_size_migrated_at,\n                ctx,\n            )\n            .await?;\n\n        let new_timeline = uninitialized_timeline.finish_creation().await?;\n\n        // Root timeline gets its layers during creation and uploads them along with the metadata.\n        // A branch timeline though, when created, can get no writes for some time, hence won't get any layers created.\n        // We still need to upload its metadata eagerly: if other nodes `attach` the tenant and miss this timeline, their GC\n        // could get incorrect information and remove more layers, than needed.\n        // See also https://github.com/neondatabase/neon/issues/3865\n        new_timeline\n            .remote_client\n            .schedule_index_upload_for_full_metadata_update(&metadata)\n            .context(\"branch initial metadata upload\")?;\n\n        // Callers are responsible to wait for uploads to complete and for activating the timeline.\n\n        Ok(CreateTimelineResult::Created(new_timeline))\n    }\n\n    /// For unit tests, make this visible so that other modules can directly create timelines\n    #[cfg(test)]\n    #[tracing::instrument(skip_all, fields(tenant_id=%self.tenant_shard_id.tenant_id, shard_id=%self.tenant_shard_id.shard_slug(), %timeline_id))]\n    pub(crate) async fn bootstrap_timeline_test(\n        self: &Arc<Self>,\n        timeline_id: TimelineId,\n        pg_version: PgMajorVersion,\n        load_existing_initdb: Option<TimelineId>,\n        ctx: &RequestContext,\n    ) -> anyhow::Result<Arc<Timeline>> {\n        self.bootstrap_timeline(timeline_id, pg_version, load_existing_initdb, ctx)\n            .await\n            .map_err(anyhow::Error::new)\n            .map(|r| r.into_timeline_for_test())\n    }\n\n    /// Get exclusive access to the timeline ID for creation.\n    ///\n    /// Timeline-creating code paths must use this function before making changes\n    /// to in-memory or persistent state.\n    ///\n    /// The `state` parameter is a description of the timeline creation operation\n    /// we intend to perform.\n    /// If the timeline was already created in the meantime, we check whether this\n    /// request conflicts or is idempotent , based on `state`.\n    async fn start_creating_timeline(\n        self: &Arc<Self>,\n        new_timeline_id: TimelineId,\n        idempotency: CreateTimelineIdempotency,\n    ) -> Result<StartCreatingTimelineResult, CreateTimelineError> {\n        let allow_offloaded = false;\n        match self.create_timeline_create_guard(new_timeline_id, idempotency, allow_offloaded) {\n            Ok(create_guard) => {\n                pausable_failpoint!(\"timeline-creation-after-uninit\");\n                Ok(StartCreatingTimelineResult::CreateGuard(create_guard))\n            }\n            Err(TimelineExclusionError::ShuttingDown) => Err(CreateTimelineError::ShuttingDown),\n            Err(TimelineExclusionError::AlreadyCreating) => {\n                // Creation is in progress, we cannot create it again, and we cannot\n                // check if this request matches the existing one, so caller must try\n                // again later.\n                Err(CreateTimelineError::AlreadyCreating)\n            }\n            Err(TimelineExclusionError::Other(e)) => Err(CreateTimelineError::Other(e)),\n            Err(TimelineExclusionError::AlreadyExists {\n                existing: TimelineOrOffloaded::Offloaded(_existing),\n                ..\n            }) => {\n                info!(\"timeline already exists but is offloaded\");\n                Err(CreateTimelineError::Conflict)\n            }\n            Err(TimelineExclusionError::AlreadyExists {\n                existing: TimelineOrOffloaded::Importing(_existing),\n                ..\n            }) => {\n                // If there's a timeline already importing, then we would hit\n                // the [`TimelineExclusionError::AlreadyCreating`] branch above.\n                unreachable!(\"Importing timelines hold the creation guard\")\n            }\n            Err(TimelineExclusionError::AlreadyExists {\n                existing: TimelineOrOffloaded::Timeline(existing),\n                arg,\n            }) => {\n                {\n                    let existing = &existing.create_idempotency;\n                    let _span = info_span!(\"idempotency_check\", ?existing, ?arg).entered();\n                    debug!(\"timeline already exists\");\n\n                    match (existing, &arg) {\n                        // FailWithConflict => no idempotency check\n                        (CreateTimelineIdempotency::FailWithConflict, _)\n                        | (_, CreateTimelineIdempotency::FailWithConflict) => {\n                            warn!(\"timeline already exists, failing request\");\n                            return Err(CreateTimelineError::Conflict);\n                        }\n                        // Idempotent <=> CreateTimelineIdempotency is identical\n                        (x, y) if x == y => {\n                            info!(\n                                \"timeline already exists and idempotency matches, succeeding request\"\n                            );\n                            // fallthrough\n                        }\n                        (_, _) => {\n                            warn!(\"idempotency conflict, failing request\");\n                            return Err(CreateTimelineError::Conflict);\n                        }\n                    }\n                }\n\n                Ok(StartCreatingTimelineResult::Idempotent(existing))\n            }\n        }\n    }\n\n    async fn upload_initdb(\n        &self,\n        timelines_path: &Utf8PathBuf,\n        pgdata_path: &Utf8PathBuf,\n        timeline_id: &TimelineId,\n    ) -> anyhow::Result<()> {\n        let temp_path = timelines_path.join(format!(\n            \"{INITDB_PATH}.upload-{timeline_id}.{TEMP_FILE_SUFFIX}\"\n        ));\n\n        scopeguard::defer! {\n            if let Err(e) = fs::remove_file(&temp_path) {\n                error!(\"Failed to remove temporary initdb archive '{temp_path}': {e}\");\n            }\n        }\n\n        let (pgdata_zstd, tar_zst_size) = create_zst_tarball(pgdata_path, &temp_path).await?;\n        const INITDB_TAR_ZST_WARN_LIMIT: u64 = 2 * 1024 * 1024;\n        if tar_zst_size > INITDB_TAR_ZST_WARN_LIMIT {\n            warn!(\n                \"compressed {temp_path} size of {tar_zst_size} is above limit {INITDB_TAR_ZST_WARN_LIMIT}.\"\n            );\n        }\n\n        pausable_failpoint!(\"before-initdb-upload\");\n\n        backoff::retry(\n            || async {\n                self::remote_timeline_client::upload_initdb_dir(\n                    &self.remote_storage,\n                    &self.tenant_shard_id.tenant_id,\n                    timeline_id,\n                    pgdata_zstd.try_clone().await?,\n                    tar_zst_size,\n                    &self.cancel,\n                )\n                .await\n            },\n            |_| false,\n            3,\n            u32::MAX,\n            \"persist_initdb_tar_zst\",\n            &self.cancel,\n        )\n        .await\n        .ok_or_else(|| anyhow::Error::new(TimeoutOrCancel::Cancel))\n        .and_then(|x| x)\n    }\n\n    /// - run initdb to init temporary instance and get bootstrap data\n    /// - after initialization completes, tar up the temp dir and upload it to S3.\n    async fn bootstrap_timeline(\n        self: &Arc<Self>,\n        timeline_id: TimelineId,\n        pg_version: PgMajorVersion,\n        load_existing_initdb: Option<TimelineId>,\n        ctx: &RequestContext,\n    ) -> Result<CreateTimelineResult, CreateTimelineError> {\n        let timeline_create_guard = match self\n            .start_creating_timeline(\n                timeline_id,\n                CreateTimelineIdempotency::Bootstrap { pg_version },\n            )\n            .await?\n        {\n            StartCreatingTimelineResult::CreateGuard(guard) => guard,\n            StartCreatingTimelineResult::Idempotent(timeline) => {\n                return Ok(CreateTimelineResult::Idempotent(timeline));\n            }\n        };\n\n        // create a `tenant/{tenant_id}/timelines/basebackup-{timeline_id}.{TEMP_FILE_SUFFIX}/`\n        // temporary directory for basebackup files for the given timeline.\n\n        let timelines_path = self.conf.timelines_path(&self.tenant_shard_id);\n        let pgdata_path = path_with_suffix_extension(\n            timelines_path.join(format!(\"basebackup-{timeline_id}\")),\n            TEMP_FILE_SUFFIX,\n        );\n\n        // Remove whatever was left from the previous runs: safe because TimelineCreateGuard guarantees\n        // we won't race with other creations or existent timelines with the same path.\n        if pgdata_path.exists() {\n            fs::remove_dir_all(&pgdata_path).with_context(|| {\n                format!(\"Failed to remove already existing initdb directory: {pgdata_path}\")\n            })?;\n            tracing::info!(\"removed previous attempt's temporary initdb directory '{pgdata_path}'\");\n        }\n\n        // this new directory is very temporary, set to remove it immediately after bootstrap, we don't need it\n        let pgdata_path_deferred = pgdata_path.clone();\n        scopeguard::defer! {\n            if let Err(e) = fs::remove_dir_all(&pgdata_path_deferred).or_else(fs_ext::ignore_not_found) {\n                // this is unlikely, but we will remove the directory on pageserver restart or another bootstrap call\n                error!(\"Failed to remove temporary initdb directory '{pgdata_path_deferred}': {e}\");\n            } else {\n                tracing::info!(\"removed temporary initdb directory '{pgdata_path_deferred}'\");\n            }\n        }\n        if let Some(existing_initdb_timeline_id) = load_existing_initdb {\n            if existing_initdb_timeline_id != timeline_id {\n                let source_path = &remote_initdb_archive_path(\n                    &self.tenant_shard_id.tenant_id,\n                    &existing_initdb_timeline_id,\n                );\n                let dest_path =\n                    &remote_initdb_archive_path(&self.tenant_shard_id.tenant_id, &timeline_id);\n\n                // if this fails, it will get retried by retried control plane requests\n                self.remote_storage\n                    .copy_object(source_path, dest_path, &self.cancel)\n                    .await\n                    .context(\"copy initdb tar\")?;\n            }\n            let (initdb_tar_zst_path, initdb_tar_zst) =\n                self::remote_timeline_client::download_initdb_tar_zst(\n                    self.conf,\n                    &self.remote_storage,\n                    &self.tenant_shard_id,\n                    &existing_initdb_timeline_id,\n                    &self.cancel,\n                )\n                .await\n                .context(\"download initdb tar\")?;\n\n            scopeguard::defer! {\n                if let Err(e) = fs::remove_file(&initdb_tar_zst_path) {\n                    error!(\"Failed to remove temporary initdb archive '{initdb_tar_zst_path}': {e}\");\n                }\n            }\n\n            let buf_read =\n                BufReader::with_capacity(remote_timeline_client::BUFFER_SIZE, initdb_tar_zst);\n            extract_zst_tarball(&pgdata_path, buf_read)\n                .await\n                .context(\"extract initdb tar\")?;\n        } else {\n            // Init temporarily repo to get bootstrap data, this creates a directory in the `pgdata_path` path\n            run_initdb(self.conf, &pgdata_path, pg_version, &self.cancel)\n                .await\n                .context(\"run initdb\")?;\n\n            // Upload the created data dir to S3\n            if self.tenant_shard_id().is_shard_zero() {\n                self.upload_initdb(&timelines_path, &pgdata_path, &timeline_id)\n                    .await?;\n            }\n        }\n        let pgdata_lsn = import_datadir::get_lsn_from_controlfile(&pgdata_path)?.align();\n\n        // Import the contents of the data directory at the initial checkpoint\n        // LSN, and any WAL after that.\n        // Initdb lsn will be equal to last_record_lsn which will be set after import.\n        // Because we know it upfront avoid having an option or dummy zero value by passing it to the metadata.\n        let new_metadata = TimelineMetadata::new(\n            Lsn(0),\n            None,\n            None,\n            Lsn(0),\n            pgdata_lsn,\n            pgdata_lsn,\n            pg_version,\n        );\n        let (mut raw_timeline, timeline_ctx) = self\n            .prepare_new_timeline(\n                timeline_id,\n                &new_metadata,\n                timeline_create_guard,\n                pgdata_lsn,\n                None,\n                None,\n                None,\n                ctx,\n            )\n            .await?;\n\n        let tenant_shard_id = raw_timeline.owning_tenant.tenant_shard_id;\n        raw_timeline\n            .write(|unfinished_timeline| async move {\n                import_datadir::import_timeline_from_postgres_datadir(\n                    &unfinished_timeline,\n                    &pgdata_path,\n                    pgdata_lsn,\n                    &timeline_ctx,\n                )\n                .await\n                .with_context(|| {\n                    format!(\n                        \"Failed to import pgdatadir for timeline {tenant_shard_id}/{timeline_id}\"\n                    )\n                })?;\n\n                fail::fail_point!(\"before-checkpoint-new-timeline\", |_| {\n                    Err(CreateTimelineError::Other(anyhow::anyhow!(\n                        \"failpoint before-checkpoint-new-timeline\"\n                    )))\n                });\n\n                Ok(())\n            })\n            .await?;\n\n        // All done!\n        let timeline = raw_timeline.finish_creation().await?;\n\n        // Callers are responsible to wait for uploads to complete and for activating the timeline.\n\n        Ok(CreateTimelineResult::Created(timeline))\n    }\n\n    fn build_timeline_remote_client(&self, timeline_id: TimelineId) -> RemoteTimelineClient {\n        RemoteTimelineClient::new(\n            self.remote_storage.clone(),\n            self.deletion_queue_client.clone(),\n            self.conf,\n            self.tenant_shard_id,\n            timeline_id,\n            self.generation,\n            &self.tenant_conf.load().location,\n        )\n    }\n\n    /// Builds required resources for a new timeline.\n    fn build_timeline_resources(&self, timeline_id: TimelineId) -> TimelineResources {\n        let remote_client = self.build_timeline_remote_client(timeline_id);\n        self.get_timeline_resources_for(remote_client)\n    }\n\n    /// Builds timeline resources for the given remote client.\n    fn get_timeline_resources_for(&self, remote_client: RemoteTimelineClient) -> TimelineResources {\n        TimelineResources {\n            remote_client,\n            pagestream_throttle: self.pagestream_throttle.clone(),\n            pagestream_throttle_metrics: self.pagestream_throttle_metrics.clone(),\n            l0_compaction_trigger: self.l0_compaction_trigger.clone(),\n            l0_flush_global_state: self.l0_flush_global_state.clone(),\n            basebackup_cache: self.basebackup_cache.clone(),\n            feature_resolver: self.feature_resolver.clone(),\n        }\n    }\n\n    /// Creates intermediate timeline structure and its files.\n    ///\n    /// An empty layer map is initialized, and new data and WAL can be imported starting\n    /// at 'disk_consistent_lsn'. After any initial data has been imported, call\n    /// `finish_creation` to insert the Timeline into the timelines map.\n    #[allow(clippy::too_many_arguments)]\n    async fn prepare_new_timeline<'a>(\n        &'a self,\n        new_timeline_id: TimelineId,\n        new_metadata: &TimelineMetadata,\n        create_guard: TimelineCreateGuard,\n        start_lsn: Lsn,\n        ancestor: Option<Arc<Timeline>>,\n        rel_size_v2_status: Option<RelSizeMigration>,\n        rel_size_migrated_at: Option<Lsn>,\n        ctx: &RequestContext,\n    ) -> anyhow::Result<(UninitializedTimeline<'a>, RequestContext)> {\n        let tenant_shard_id = self.tenant_shard_id;\n\n        let resources = self.build_timeline_resources(new_timeline_id);\n        resources.remote_client.init_upload_queue_for_empty_remote(\n            new_metadata,\n            rel_size_v2_status.clone(),\n            rel_size_migrated_at,\n        )?;\n\n        let (timeline_struct, timeline_ctx) = self\n            .create_timeline_struct(\n                new_timeline_id,\n                new_metadata,\n                None,\n                ancestor,\n                resources,\n                CreateTimelineCause::Load,\n                create_guard.idempotency.clone(),\n                None,\n                rel_size_v2_status,\n                rel_size_migrated_at,\n                ctx,\n            )\n            .context(\"Failed to create timeline data structure\")?;\n\n        timeline_struct.init_empty_layer_map(start_lsn);\n\n        if let Err(e) = self\n            .create_timeline_files(&create_guard.timeline_path)\n            .await\n        {\n            error!(\n                \"Failed to create initial files for timeline {tenant_shard_id}/{new_timeline_id}, cleaning up: {e:?}\"\n            );\n            cleanup_timeline_directory(create_guard);\n            return Err(e);\n        }\n\n        debug!(\n            \"Successfully created initial files for timeline {tenant_shard_id}/{new_timeline_id}\"\n        );\n\n        Ok((\n            UninitializedTimeline::new(\n                self,\n                new_timeline_id,\n                Some((timeline_struct, create_guard)),\n            ),\n            timeline_ctx,\n        ))\n    }\n\n    async fn create_timeline_files(&self, timeline_path: &Utf8Path) -> anyhow::Result<()> {\n        crashsafe::create_dir(timeline_path).context(\"Failed to create timeline directory\")?;\n\n        fail::fail_point!(\"after-timeline-dir-creation\", |_| {\n            anyhow::bail!(\"failpoint after-timeline-dir-creation\");\n        });\n\n        Ok(())\n    }\n\n    /// Get a guard that provides exclusive access to the timeline directory, preventing\n    /// concurrent attempts to create the same timeline.\n    ///\n    /// The `allow_offloaded` parameter controls whether to tolerate the existence of\n    /// offloaded timelines or not.\n    fn create_timeline_create_guard(\n        self: &Arc<Self>,\n        timeline_id: TimelineId,\n        idempotency: CreateTimelineIdempotency,\n        allow_offloaded: bool,\n    ) -> Result<TimelineCreateGuard, TimelineExclusionError> {\n        let tenant_shard_id = self.tenant_shard_id;\n\n        let timeline_path = self.conf.timeline_path(&tenant_shard_id, &timeline_id);\n\n        let create_guard = TimelineCreateGuard::new(\n            self,\n            timeline_id,\n            timeline_path.clone(),\n            idempotency,\n            allow_offloaded,\n        )?;\n\n        // At this stage, we have got exclusive access to in-memory state for this timeline ID\n        // for creation.\n        // A timeline directory should never exist on disk already:\n        // - a previous failed creation would have cleaned up after itself\n        // - a pageserver restart would clean up timeline directories that don't have valid remote state\n        //\n        // Therefore it is an unexpected internal error to encounter a timeline directory already existing here,\n        // this error may indicate a bug in cleanup on failed creations.\n        if timeline_path.exists() {\n            return Err(TimelineExclusionError::Other(anyhow::anyhow!(\n                \"Timeline directory already exists! This is a bug.\"\n            )));\n        }\n\n        Ok(create_guard)\n    }\n\n    /// Gathers inputs from all of the timelines to produce a sizing model input.\n    ///\n    /// Future is cancellation safe. Only one calculation can be running at once per tenant.\n    #[instrument(skip_all, fields(tenant_id=%self.tenant_shard_id.tenant_id, shard_id=%self.tenant_shard_id.shard_slug()))]\n    pub async fn gather_size_inputs(\n        &self,\n        // `max_retention_period` overrides the cutoff that is used to calculate the size\n        // (only if it is shorter than the real cutoff).\n        max_retention_period: Option<u64>,\n        cause: LogicalSizeCalculationCause,\n        cancel: &CancellationToken,\n        ctx: &RequestContext,\n    ) -> Result<size::ModelInputs, size::CalculateSyntheticSizeError> {\n        let logical_sizes_at_once = self\n            .conf\n            .concurrent_tenant_size_logical_size_queries\n            .inner();\n\n        // TODO: Having a single mutex block concurrent reads is not great for performance.\n        //\n        // But the only case where we need to run multiple of these at once is when we\n        // request a size for a tenant manually via API, while another background calculation\n        // is in progress (which is not a common case).\n        //\n        // See more for on the issue #2748 condenced out of the initial PR review.\n        let mut shared_cache = tokio::select! {\n            locked = self.cached_logical_sizes.lock() => locked,\n            _ = cancel.cancelled() => return Err(size::CalculateSyntheticSizeError::Cancelled),\n            _ = self.cancel.cancelled() => return Err(size::CalculateSyntheticSizeError::Cancelled),\n        };\n\n        size::gather_inputs(\n            self,\n            logical_sizes_at_once,\n            max_retention_period,\n            &mut shared_cache,\n            cause,\n            cancel,\n            ctx,\n        )\n        .await\n    }\n\n    /// Calculate synthetic tenant size and cache the result.\n    /// This is periodically called by background worker.\n    /// result is cached in tenant struct\n    #[instrument(skip_all, fields(tenant_id=%self.tenant_shard_id.tenant_id, shard_id=%self.tenant_shard_id.shard_slug()))]\n    pub async fn calculate_synthetic_size(\n        &self,\n        cause: LogicalSizeCalculationCause,\n        cancel: &CancellationToken,\n        ctx: &RequestContext,\n    ) -> Result<u64, size::CalculateSyntheticSizeError> {\n        let inputs = self.gather_size_inputs(None, cause, cancel, ctx).await?;\n\n        let size = inputs.calculate();\n\n        self.set_cached_synthetic_size(size);\n\n        Ok(size)\n    }\n\n    /// Cache given synthetic size and update the metric value\n    pub fn set_cached_synthetic_size(&self, size: u64) {\n        self.cached_synthetic_tenant_size\n            .store(size, Ordering::Relaxed);\n\n        // Only shard zero should be calculating synthetic sizes\n        debug_assert!(self.shard_identity.is_shard_zero());\n\n        TENANT_SYNTHETIC_SIZE_METRIC\n            .get_metric_with_label_values(&[&self.tenant_shard_id.tenant_id.to_string()])\n            .unwrap()\n            .set(size);\n    }\n\n    pub fn cached_synthetic_size(&self) -> u64 {\n        self.cached_synthetic_tenant_size.load(Ordering::Relaxed)\n    }\n\n    /// Flush any in-progress layers, schedule uploads, and wait for uploads to complete.\n    ///\n    /// This function can take a long time: callers should wrap it in a timeout if calling\n    /// from an external API handler.\n    ///\n    /// Cancel-safety: cancelling this function may leave I/O running, but such I/O is\n    /// still bounded by tenant/timeline shutdown.\n    #[tracing::instrument(skip_all)]\n    pub(crate) async fn flush_remote(&self) -> anyhow::Result<()> {\n        let timelines = self.timelines.lock().unwrap().clone();\n\n        async fn flush_timeline(_gate: GateGuard, timeline: Arc<Timeline>) -> anyhow::Result<()> {\n            tracing::info!(timeline_id=%timeline.timeline_id, \"Flushing...\");\n            timeline.freeze_and_flush().await?;\n            tracing::info!(timeline_id=%timeline.timeline_id, \"Waiting for uploads...\");\n            timeline.remote_client.wait_completion().await?;\n\n            Ok(())\n        }\n\n        // We do not use a JoinSet for these tasks, because we don't want them to be\n        // aborted when this function's future is cancelled: they should stay alive\n        // holding their GateGuard until they complete, to ensure their I/Os complete\n        // before Timeline shutdown completes.\n        let mut results = FuturesUnordered::new();\n\n        for (_timeline_id, timeline) in timelines {\n            // Run each timeline's flush in a task holding the timeline's gate: this\n            // means that if this function's future is cancelled, the Timeline shutdown\n            // will still wait for any I/O in here to complete.\n            let Ok(gate) = timeline.gate.enter() else {\n                continue;\n            };\n            let jh = tokio::task::spawn(async move { flush_timeline(gate, timeline).await });\n            results.push(jh);\n        }\n\n        while let Some(r) = results.next().await {\n            if let Err(e) = r {\n                if !e.is_cancelled() && !e.is_panic() {\n                    tracing::error!(\"unexpected join error: {e:?}\");\n                }\n            }\n        }\n\n        // The flushes we did above were just writes, but the TenantShard might have had\n        // pending deletions as well from recent compaction/gc: we want to flush those\n        // as well.  This requires flushing the global delete queue.  This is cheap\n        // because it's typically a no-op.\n        match self.deletion_queue_client.flush_execute().await {\n            Ok(_) => {}\n            Err(DeletionQueueError::ShuttingDown) => {}\n        }\n\n        Ok(())\n    }\n\n    pub(crate) fn get_tenant_conf(&self) -> pageserver_api::models::TenantConfig {\n        self.tenant_conf.load().tenant_conf.clone()\n    }\n\n    /// How much local storage would this tenant like to have?  It can cope with\n    /// less than this (via eviction and on-demand downloads), but this function enables\n    /// the TenantShard to advertise how much storage it would prefer to have to provide fast I/O\n    /// by keeping important things on local disk.\n    ///\n    /// This is a heuristic, not a guarantee: tenants that are long-idle will actually use less\n    /// than they report here, due to layer eviction.  Tenants with many active branches may\n    /// actually use more than they report here.\n    pub(crate) fn local_storage_wanted(&self) -> u64 {\n        let timelines = self.timelines.lock().unwrap();\n\n        // Heuristic: we use the max() of the timelines' visible sizes, rather than the sum.  This\n        // reflects the observation that on tenants with multiple large branches, typically only one\n        // of them is used actively enough to occupy space on disk.\n        timelines\n            .values()\n            .map(|t| t.metrics.visible_physical_size_gauge.get())\n            .max()\n            .unwrap_or(0)\n    }\n\n    /// HADRON\n    /// Return the visible size of all timelines in this tenant.\n    pub(crate) fn get_visible_size(&self) -> u64 {\n        let timelines = self.timelines.lock().unwrap();\n        timelines\n            .values()\n            .map(|t| t.metrics.visible_physical_size_gauge.get())\n            .sum()\n    }\n\n    /// Builds a new tenant manifest, and uploads it if it differs from the last-known tenant\n    /// manifest in `Self::remote_tenant_manifest`.\n    ///\n    /// TODO: instead of requiring callers to remember to call `maybe_upload_tenant_manifest` after\n    /// changing any `TenantShard` state that's included in the manifest, consider making the manifest\n    /// the authoritative source of data with an API that automatically uploads on changes. Revisit\n    /// this when the manifest is more widely used and we have a better idea of the data model.\n    pub(crate) async fn maybe_upload_tenant_manifest(&self) -> Result<(), TenantManifestError> {\n        // Multiple tasks may call this function concurrently after mutating the TenantShard runtime\n        // state, affecting the manifest generated by `build_tenant_manifest`. We use an async mutex\n        // to serialize these callers. `eq_ignoring_version` acts as a slightly inefficient but\n        // simple coalescing mechanism.\n        let mut guard = tokio::select! {\n            guard = self.remote_tenant_manifest.lock() => guard,\n            _ = self.cancel.cancelled() => return Err(TenantManifestError::Cancelled),\n        };\n\n        // Build a new manifest.\n        let manifest = self.build_tenant_manifest();\n\n        // Check if the manifest has changed. We ignore the version number here, to avoid\n        // uploading every manifest on version number bumps.\n        if let Some(old) = guard.as_ref() {\n            if manifest.eq_ignoring_version(old) {\n                return Ok(());\n            }\n        }\n\n        // Update metrics\n        let tid = self.tenant_shard_id.to_string();\n        let shard_id = self.tenant_shard_id.shard_slug().to_string();\n        let set_key = &[tid.as_str(), shard_id.as_str()][..];\n        TENANT_OFFLOADED_TIMELINES\n            .with_label_values(set_key)\n            .set(manifest.offloaded_timelines.len() as u64);\n\n        // Upload the manifest. Remote storage does no retries internally, so retry here.\n        match backoff::retry(\n            || async {\n                upload_tenant_manifest(\n                    &self.remote_storage,\n                    &self.tenant_shard_id,\n                    self.generation,\n                    &manifest,\n                    &self.cancel,\n                )\n                .await\n            },\n            |_| self.cancel.is_cancelled(),\n            FAILED_UPLOAD_WARN_THRESHOLD,\n            FAILED_REMOTE_OP_RETRIES,\n            \"uploading tenant manifest\",\n            &self.cancel,\n        )\n        .await\n        {\n            None => Err(TenantManifestError::Cancelled),\n            Some(Err(_)) if self.cancel.is_cancelled() => Err(TenantManifestError::Cancelled),\n            Some(Err(e)) => Err(TenantManifestError::RemoteStorage(e)),\n            Some(Ok(_)) => {\n                // Store the successfully uploaded manifest, so that future callers can avoid\n                // re-uploading the same thing.\n                *guard = Some(manifest);\n\n                Ok(())\n            }\n        }\n    }\n}\n\n/// Create the cluster temporarily in 'initdbpath' directory inside the repository\n/// to get bootstrap data for timeline initialization.\nasync fn run_initdb(\n    conf: &'static PageServerConf,\n    initdb_target_dir: &Utf8Path,\n    pg_version: PgMajorVersion,\n    cancel: &CancellationToken,\n) -> Result<(), InitdbError> {\n    let initdb_bin_path = conf\n        .pg_bin_dir(pg_version)\n        .map_err(InitdbError::Other)?\n        .join(\"initdb\");\n    let initdb_lib_dir = conf.pg_lib_dir(pg_version).map_err(InitdbError::Other)?;\n    info!(\n        \"running {} in {}, libdir: {}\",\n        initdb_bin_path, initdb_target_dir, initdb_lib_dir,\n    );\n\n    let _permit = {\n        let _timer = INITDB_SEMAPHORE_ACQUISITION_TIME.start_timer();\n        INIT_DB_SEMAPHORE.acquire().await\n    };\n\n    CONCURRENT_INITDBS.inc();\n    scopeguard::defer! {\n        CONCURRENT_INITDBS.dec();\n    }\n\n    let _timer = INITDB_RUN_TIME.start_timer();\n    let res = postgres_initdb::do_run_initdb(postgres_initdb::RunInitdbArgs {\n        superuser: &conf.superuser,\n        locale: &conf.locale,\n        initdb_bin: &initdb_bin_path,\n        pg_version,\n        library_search_path: &initdb_lib_dir,\n        pgdata: initdb_target_dir,\n    })\n    .await\n    .map_err(InitdbError::Inner);\n\n    // This isn't true cancellation support, see above. Still return an error to\n    // excercise the cancellation code path.\n    if cancel.is_cancelled() {\n        return Err(InitdbError::Cancelled);\n    }\n\n    res\n}\n\n/// Dump contents of a layer file to stdout.\npub async fn dump_layerfile_from_path(\n    path: &Utf8Path,\n    verbose: bool,\n    ctx: &RequestContext,\n) -> anyhow::Result<()> {\n    use std::os::unix::fs::FileExt;\n\n    // All layer files start with a two-byte \"magic\" value, to identify the kind of\n    // file.\n    let file = File::open(path)?;\n    let mut header_buf = [0u8; 2];\n    file.read_exact_at(&mut header_buf, 0)?;\n\n    match u16::from_be_bytes(header_buf) {\n        crate::IMAGE_FILE_MAGIC => {\n            ImageLayer::new_for_path(path, file)?\n                .dump(verbose, ctx)\n                .await?\n        }\n        crate::DELTA_FILE_MAGIC => {\n            DeltaLayer::new_for_path(path, file)?\n                .dump(verbose, ctx)\n                .await?\n        }\n        magic => bail!(\"unrecognized magic identifier: {:?}\", magic),\n    }\n\n    Ok(())\n}\n\n#[cfg(test)]\npub(crate) mod harness {\n    use bytes::{Bytes, BytesMut};\n    use hex_literal::hex;\n    use once_cell::sync::OnceCell;\n    use pageserver_api::key::Key;\n    use pageserver_api::models::ShardParameters;\n    use pageserver_api::shard::ShardIndex;\n    use utils::id::TenantId;\n    use utils::logging;\n    use wal_decoder::models::record::NeonWalRecord;\n\n    use super::*;\n    use crate::deletion_queue::mock::MockDeletionQueue;\n    use crate::l0_flush::L0FlushConfig;\n    use crate::walredo::apply_neon;\n\n    pub const TIMELINE_ID: TimelineId =\n        TimelineId::from_array(hex!(\"11223344556677881122334455667788\"));\n    pub const NEW_TIMELINE_ID: TimelineId =\n        TimelineId::from_array(hex!(\"AA223344556677881122334455667788\"));\n\n    /// Convenience function to create a page image with given string as the only content\n    pub fn test_img(s: &str) -> Bytes {\n        let mut buf = BytesMut::new();\n        buf.extend_from_slice(s.as_bytes());\n        buf.resize(64, 0);\n\n        buf.freeze()\n    }\n\n    pub struct TenantHarness {\n        pub conf: &'static PageServerConf,\n        pub tenant_conf: pageserver_api::models::TenantConfig,\n        pub tenant_shard_id: TenantShardId,\n        pub shard_identity: ShardIdentity,\n        pub generation: Generation,\n        pub shard: ShardIndex,\n        pub remote_storage: GenericRemoteStorage,\n        pub remote_fs_dir: Utf8PathBuf,\n        pub deletion_queue: MockDeletionQueue,\n    }\n\n    static LOG_HANDLE: OnceCell<()> = OnceCell::new();\n\n    pub(crate) fn setup_logging() {\n        LOG_HANDLE.get_or_init(|| {\n            logging::init(\n                logging::LogFormat::Test,\n                // enable it in case the tests exercise code paths that use\n                // debug_assert_current_span_has_tenant_and_timeline_id\n                logging::TracingErrorLayerEnablement::EnableWithRustLogFilter,\n                logging::Output::Stdout,\n            )\n            .expect(\"Failed to init test logging\");\n        });\n    }\n\n    impl TenantHarness {\n        pub async fn create_custom(\n            test_name: &'static str,\n            tenant_conf: pageserver_api::models::TenantConfig,\n            tenant_id: TenantId,\n            shard_identity: ShardIdentity,\n            generation: Generation,\n        ) -> anyhow::Result<Self> {\n            setup_logging();\n\n            let repo_dir = PageServerConf::test_repo_dir(test_name);\n            let _ = fs::remove_dir_all(&repo_dir);\n            fs::create_dir_all(&repo_dir)?;\n\n            let conf = PageServerConf::dummy_conf(repo_dir);\n            // Make a static copy of the config. This can never be free'd, but that's\n            // OK in a test.\n            let conf: &'static PageServerConf = Box::leak(Box::new(conf));\n\n            let shard = shard_identity.shard_index();\n            let tenant_shard_id = TenantShardId {\n                tenant_id,\n                shard_number: shard.shard_number,\n                shard_count: shard.shard_count,\n            };\n            fs::create_dir_all(conf.tenant_path(&tenant_shard_id))?;\n            fs::create_dir_all(conf.timelines_path(&tenant_shard_id))?;\n\n            use remote_storage::{RemoteStorageConfig, RemoteStorageKind};\n            let remote_fs_dir = conf.workdir.join(\"localfs\");\n            std::fs::create_dir_all(&remote_fs_dir).unwrap();\n            let config = RemoteStorageConfig {\n                storage: RemoteStorageKind::LocalFs {\n                    local_path: remote_fs_dir.clone(),\n                },\n                timeout: RemoteStorageConfig::DEFAULT_TIMEOUT,\n                small_timeout: RemoteStorageConfig::DEFAULT_SMALL_TIMEOUT,\n            };\n            let remote_storage = GenericRemoteStorage::from_config(&config).await.unwrap();\n            let deletion_queue = MockDeletionQueue::new(Some(remote_storage.clone()));\n\n            Ok(Self {\n                conf,\n                tenant_conf,\n                tenant_shard_id,\n                shard_identity,\n                generation,\n                shard,\n                remote_storage,\n                remote_fs_dir,\n                deletion_queue,\n            })\n        }\n\n        pub async fn create(test_name: &'static str) -> anyhow::Result<Self> {\n            // Disable automatic GC and compaction to make the unit tests more deterministic.\n            // The tests perform them manually if needed.\n            let tenant_conf = pageserver_api::models::TenantConfig {\n                gc_period: Some(Duration::ZERO),\n                compaction_period: Some(Duration::ZERO),\n                ..Default::default()\n            };\n            let tenant_id = TenantId::generate();\n            let shard = ShardIdentity::unsharded();\n            Self::create_custom(\n                test_name,\n                tenant_conf,\n                tenant_id,\n                shard,\n                Generation::new(0xdeadbeef),\n            )\n            .await\n        }\n\n        pub fn span(&self) -> tracing::Span {\n            info_span!(\"TenantHarness\", tenant_id=%self.tenant_shard_id.tenant_id, shard_id=%self.tenant_shard_id.shard_slug())\n        }\n\n        pub(crate) async fn load(&self) -> (Arc<TenantShard>, RequestContext) {\n            let ctx = RequestContext::new(TaskKind::UnitTest, DownloadBehavior::Error)\n                .with_scope_unit_test();\n            (\n                self.do_try_load(&ctx)\n                    .await\n                    .expect(\"failed to load test tenant\"),\n                ctx,\n            )\n        }\n\n        #[instrument(skip_all, fields(tenant_id=%self.tenant_shard_id.tenant_id, shard_id=%self.tenant_shard_id.shard_slug()))]\n        pub(crate) async fn do_try_load_with_redo(\n            &self,\n            walredo_mgr: Arc<WalRedoManager>,\n            ctx: &RequestContext,\n        ) -> anyhow::Result<Arc<TenantShard>> {\n            let (basebackup_cache, _) = BasebackupCache::new(Utf8PathBuf::new(), None);\n\n            let tenant = Arc::new(TenantShard::new(\n                TenantState::Attaching,\n                self.conf,\n                AttachedTenantConf::try_from(\n                    self.conf,\n                    LocationConf::attached_single(\n                        self.tenant_conf.clone(),\n                        self.generation,\n                        ShardParameters::default(),\n                    ),\n                )\n                .unwrap(),\n                self.shard_identity,\n                Some(walredo_mgr),\n                self.tenant_shard_id,\n                self.remote_storage.clone(),\n                self.deletion_queue.new_client(),\n                // TODO: ideally we should run all unit tests with both configs\n                L0FlushGlobalState::new(L0FlushConfig::default()),\n                basebackup_cache,\n                FeatureResolver::new_disabled(),\n            ));\n\n            let preload = tenant\n                .preload(&self.remote_storage, CancellationToken::new())\n                .await?;\n            tenant.attach(Some(preload), ctx).await?;\n\n            tenant.state.send_replace(TenantState::Active);\n            for timeline in tenant.timelines.lock().unwrap().values() {\n                timeline.set_state(TimelineState::Active);\n            }\n            Ok(tenant)\n        }\n\n        pub(crate) async fn do_try_load(\n            &self,\n            ctx: &RequestContext,\n        ) -> anyhow::Result<Arc<TenantShard>> {\n            let walredo_mgr = Arc::new(WalRedoManager::from(TestRedoManager));\n            self.do_try_load_with_redo(walredo_mgr, ctx).await\n        }\n\n        pub fn timeline_path(&self, timeline_id: &TimelineId) -> Utf8PathBuf {\n            self.conf.timeline_path(&self.tenant_shard_id, timeline_id)\n        }\n    }\n\n    // Mock WAL redo manager that doesn't do much\n    pub(crate) struct TestRedoManager;\n\n    impl TestRedoManager {\n        /// # Cancel-Safety\n        ///\n        /// This method is cancellation-safe.\n        pub async fn request_redo(\n            &self,\n            key: Key,\n            lsn: Lsn,\n            base_img: Option<(Lsn, Bytes)>,\n            records: Vec<(Lsn, NeonWalRecord)>,\n            _pg_version: PgMajorVersion,\n            _redo_attempt_type: RedoAttemptType,\n        ) -> Result<Bytes, walredo::Error> {\n            let records_neon = records.iter().all(|r| apply_neon::can_apply_in_neon(&r.1));\n            if records_neon {\n                // For Neon wal records, we can decode without spawning postgres, so do so.\n                let mut page = match (base_img, records.first()) {\n                    (Some((_lsn, img)), _) => {\n                        let mut page = BytesMut::new();\n                        page.extend_from_slice(&img);\n                        page\n                    }\n                    (_, Some((_lsn, rec))) if rec.will_init() => BytesMut::new(),\n                    _ => {\n                        panic!(\"Neon WAL redo requires base image or will init record\");\n                    }\n                };\n\n                for (record_lsn, record) in records {\n                    apply_neon::apply_in_neon(&record, record_lsn, key, &mut page)?;\n                }\n                Ok(page.freeze())\n            } else {\n                // We never spawn a postgres walredo process in unit tests: just log what we might have done.\n                let s = format!(\n                    \"redo for {} to get to {}, with {} and {} records\",\n                    key,\n                    lsn,\n                    if base_img.is_some() {\n                        \"base image\"\n                    } else {\n                        \"no base image\"\n                    },\n                    records.len()\n                );\n                println!(\"{s}\");\n\n                Ok(test_img(&s))\n            }\n        }\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use std::collections::{BTreeMap, BTreeSet};\n\n    use bytes::{Bytes, BytesMut};\n    use hex_literal::hex;\n    use itertools::Itertools;\n    #[cfg(feature = \"testing\")]\n    use models::CompactLsnRange;\n    use pageserver_api::key::{\n        AUX_KEY_PREFIX, Key, NON_INHERITED_RANGE, RELATION_SIZE_PREFIX, repl_origin_key,\n    };\n    use pageserver_api::keyspace::KeySpace;\n    #[cfg(feature = \"testing\")]\n    use pageserver_api::keyspace::KeySpaceRandomAccum;\n    use pageserver_api::models::{CompactionAlgorithm, CompactionAlgorithmSettings, LsnLease};\n    use pageserver_compaction::helpers::overlaps_with;\n    use rand::Rng;\n    #[cfg(feature = \"testing\")]\n    use rand::SeedableRng;\n    #[cfg(feature = \"testing\")]\n    use rand::rngs::StdRng;\n    #[cfg(feature = \"testing\")]\n    use std::ops::Range;\n    use storage_layer::{IoConcurrency, PersistentLayerKey};\n    use tests::storage_layer::ValuesReconstructState;\n    use tests::timeline::{GetVectoredError, ShutdownMode};\n    #[cfg(feature = \"testing\")]\n    use timeline::GcInfo;\n    #[cfg(feature = \"testing\")]\n    use timeline::InMemoryLayerTestDesc;\n    #[cfg(feature = \"testing\")]\n    use timeline::compaction::{KeyHistoryRetention, KeyLogAtLsn};\n    use timeline::{CompactOptions, DeltaLayerTestDesc, VersionedKeySpaceQuery};\n    use utils::id::TenantId;\n    use utils::shard::{ShardCount, ShardNumber};\n    #[cfg(feature = \"testing\")]\n    use wal_decoder::models::record::NeonWalRecord;\n    use wal_decoder::models::value::Value;\n\n    use super::*;\n    use crate::DEFAULT_PG_VERSION;\n    use crate::keyspace::KeySpaceAccum;\n    use crate::tenant::harness::*;\n    use crate::tenant::timeline::CompactFlags;\n\n    static TEST_KEY: Lazy<Key> =\n        Lazy::new(|| Key::from_slice(&hex!(\"010000000033333333444444445500000001\")));\n\n    #[cfg(feature = \"testing\")]\n    struct TestTimelineSpecification {\n        start_lsn: Lsn,\n        last_record_lsn: Lsn,\n\n        in_memory_layers_shape: Vec<(Range<Key>, Range<Lsn>)>,\n        delta_layers_shape: Vec<(Range<Key>, Range<Lsn>)>,\n        image_layers_shape: Vec<(Range<Key>, Lsn)>,\n\n        gap_chance: u8,\n        will_init_chance: u8,\n    }\n\n    #[cfg(feature = \"testing\")]\n    struct Storage {\n        storage: HashMap<(Key, Lsn), Value>,\n        start_lsn: Lsn,\n    }\n\n    #[cfg(feature = \"testing\")]\n    impl Storage {\n        fn get(&self, key: Key, lsn: Lsn) -> Bytes {\n            use bytes::BufMut;\n\n            let mut crnt_lsn = lsn;\n            let mut got_base = false;\n\n            let mut acc = Vec::new();\n\n            while crnt_lsn >= self.start_lsn {\n                if let Some(value) = self.storage.get(&(key, crnt_lsn)) {\n                    acc.push(value.clone());\n\n                    match value {\n                        Value::WalRecord(NeonWalRecord::Test { will_init, .. }) => {\n                            if *will_init {\n                                got_base = true;\n                                break;\n                            }\n                        }\n                        Value::Image(_) => {\n                            got_base = true;\n                            break;\n                        }\n                        _ => unreachable!(),\n                    }\n                }\n\n                crnt_lsn = crnt_lsn.checked_sub(1u64).unwrap();\n            }\n\n            assert!(\n                got_base,\n                \"Input data was incorrect. No base image for {key}@{lsn}\"\n            );\n\n            tracing::debug!(\"Wal redo depth for {key}@{lsn} is {}\", acc.len());\n\n            let mut blob = BytesMut::new();\n            for value in acc.into_iter().rev() {\n                match value {\n                    Value::WalRecord(NeonWalRecord::Test { append, .. }) => {\n                        blob.extend_from_slice(append.as_bytes());\n                    }\n                    Value::Image(img) => {\n                        blob.put(img);\n                    }\n                    _ => unreachable!(),\n                }\n            }\n\n            blob.into()\n        }\n    }\n\n    #[cfg(feature = \"testing\")]\n    #[allow(clippy::too_many_arguments)]\n    async fn randomize_timeline(\n        tenant: &Arc<TenantShard>,\n        new_timeline_id: TimelineId,\n        pg_version: PgMajorVersion,\n        spec: TestTimelineSpecification,\n        random: &mut rand::rngs::StdRng,\n        ctx: &RequestContext,\n    ) -> anyhow::Result<(Arc<Timeline>, Storage, Vec<Lsn>)> {\n        let mut storage: HashMap<(Key, Lsn), Value> = HashMap::default();\n        let mut interesting_lsns = vec![spec.last_record_lsn];\n\n        for (key_range, lsn_range) in spec.in_memory_layers_shape.iter() {\n            let mut lsn = lsn_range.start;\n            while lsn < lsn_range.end {\n                let mut key = key_range.start;\n                while key < key_range.end {\n                    let gap = random.random_range(1..=100) <= spec.gap_chance;\n                    let will_init = random.random_range(1..=100) <= spec.will_init_chance;\n\n                    if gap {\n                        continue;\n                    }\n\n                    let record = if will_init {\n                        Value::WalRecord(NeonWalRecord::wal_init(format!(\"[wil_init {key}@{lsn}]\")))\n                    } else {\n                        Value::WalRecord(NeonWalRecord::wal_append(format!(\"[delta {key}@{lsn}]\")))\n                    };\n\n                    storage.insert((key, lsn), record);\n\n                    key = key.next();\n                }\n                lsn = Lsn(lsn.0 + 1);\n            }\n\n            // Stash some interesting LSN for future use\n            for offset in [0, 5, 100].iter() {\n                if *offset == 0 {\n                    interesting_lsns.push(lsn_range.start);\n                } else {\n                    let below = lsn_range.start.checked_sub(*offset);\n                    match below {\n                        Some(v) if v >= spec.start_lsn => {\n                            interesting_lsns.push(v);\n                        }\n                        _ => {}\n                    }\n\n                    let above = Lsn(lsn_range.start.0 + offset);\n                    interesting_lsns.push(above);\n                }\n            }\n        }\n\n        for (key_range, lsn_range) in spec.delta_layers_shape.iter() {\n            let mut lsn = lsn_range.start;\n            while lsn < lsn_range.end {\n                let mut key = key_range.start;\n                while key < key_range.end {\n                    let gap = random.random_range(1..=100) <= spec.gap_chance;\n                    let will_init = random.random_range(1..=100) <= spec.will_init_chance;\n\n                    if gap {\n                        continue;\n                    }\n\n                    let record = if will_init {\n                        Value::WalRecord(NeonWalRecord::wal_init(format!(\"[wil_init {key}@{lsn}]\")))\n                    } else {\n                        Value::WalRecord(NeonWalRecord::wal_append(format!(\"[delta {key}@{lsn}]\")))\n                    };\n\n                    storage.insert((key, lsn), record);\n\n                    key = key.next();\n                }\n                lsn = Lsn(lsn.0 + 1);\n            }\n\n            // Stash some interesting LSN for future use\n            for offset in [0, 5, 100].iter() {\n                if *offset == 0 {\n                    interesting_lsns.push(lsn_range.start);\n                } else {\n                    let below = lsn_range.start.checked_sub(*offset);\n                    match below {\n                        Some(v) if v >= spec.start_lsn => {\n                            interesting_lsns.push(v);\n                        }\n                        _ => {}\n                    }\n\n                    let above = Lsn(lsn_range.start.0 + offset);\n                    interesting_lsns.push(above);\n                }\n            }\n        }\n\n        for (key_range, lsn) in spec.image_layers_shape.iter() {\n            let mut key = key_range.start;\n            while key < key_range.end {\n                let blob = Bytes::from(format!(\"[image {key}@{lsn}]\"));\n                let record = Value::Image(blob.clone());\n                storage.insert((key, *lsn), record);\n\n                key = key.next();\n            }\n\n            // Stash some interesting LSN for future use\n            for offset in [0, 5, 100].iter() {\n                if *offset == 0 {\n                    interesting_lsns.push(*lsn);\n                } else {\n                    let below = lsn.checked_sub(*offset);\n                    match below {\n                        Some(v) if v >= spec.start_lsn => {\n                            interesting_lsns.push(v);\n                        }\n                        _ => {}\n                    }\n\n                    let above = Lsn(lsn.0 + offset);\n                    interesting_lsns.push(above);\n                }\n            }\n        }\n\n        let in_memory_test_layers = {\n            let mut acc = Vec::new();\n\n            for (key_range, lsn_range) in spec.in_memory_layers_shape.iter() {\n                let mut data = Vec::new();\n\n                let mut lsn = lsn_range.start;\n                while lsn < lsn_range.end {\n                    let mut key = key_range.start;\n                    while key < key_range.end {\n                        if let Some(record) = storage.get(&(key, lsn)) {\n                            data.push((key, lsn, record.clone()));\n                        }\n\n                        key = key.next();\n                    }\n                    lsn = Lsn(lsn.0 + 1);\n                }\n\n                acc.push(InMemoryLayerTestDesc {\n                    data,\n                    lsn_range: lsn_range.clone(),\n                    is_open: false,\n                })\n            }\n\n            acc\n        };\n\n        let delta_test_layers = {\n            let mut acc = Vec::new();\n\n            for (key_range, lsn_range) in spec.delta_layers_shape.iter() {\n                let mut data = Vec::new();\n\n                let mut lsn = lsn_range.start;\n                while lsn < lsn_range.end {\n                    let mut key = key_range.start;\n                    while key < key_range.end {\n                        if let Some(record) = storage.get(&(key, lsn)) {\n                            data.push((key, lsn, record.clone()));\n                        }\n\n                        key = key.next();\n                    }\n                    lsn = Lsn(lsn.0 + 1);\n                }\n\n                acc.push(DeltaLayerTestDesc {\n                    data,\n                    lsn_range: lsn_range.clone(),\n                    key_range: key_range.clone(),\n                })\n            }\n\n            acc\n        };\n\n        let image_test_layers = {\n            let mut acc = Vec::new();\n\n            for (key_range, lsn) in spec.image_layers_shape.iter() {\n                let mut data = Vec::new();\n\n                let mut key = key_range.start;\n                while key < key_range.end {\n                    if let Some(record) = storage.get(&(key, *lsn)) {\n                        let blob = match record {\n                            Value::Image(blob) => blob.clone(),\n                            _ => unreachable!(),\n                        };\n\n                        data.push((key, blob));\n                    }\n\n                    key = key.next();\n                }\n\n                acc.push((*lsn, data));\n            }\n\n            acc\n        };\n\n        let tline = tenant\n            .create_test_timeline_with_layers(\n                new_timeline_id,\n                spec.start_lsn,\n                pg_version,\n                ctx,\n                in_memory_test_layers,\n                delta_test_layers,\n                image_test_layers,\n                spec.last_record_lsn,\n            )\n            .await?;\n\n        Ok((\n            tline,\n            Storage {\n                storage,\n                start_lsn: spec.start_lsn,\n            },\n            interesting_lsns,\n        ))\n    }\n\n    #[tokio::test]\n    async fn test_basic() -> anyhow::Result<()> {\n        let (tenant, ctx) = TenantHarness::create(\"test_basic\").await?.load().await;\n        let tline = tenant\n            .create_test_timeline(TIMELINE_ID, Lsn(0x08), DEFAULT_PG_VERSION, &ctx)\n            .await?;\n\n        let mut writer = tline.writer().await;\n        writer\n            .put(\n                *TEST_KEY,\n                Lsn(0x10),\n                &Value::Image(test_img(\"foo at 0x10\")),\n                &ctx,\n            )\n            .await?;\n        writer.finish_write(Lsn(0x10));\n        drop(writer);\n\n        let mut writer = tline.writer().await;\n        writer\n            .put(\n                *TEST_KEY,\n                Lsn(0x20),\n                &Value::Image(test_img(\"foo at 0x20\")),\n                &ctx,\n            )\n            .await?;\n        writer.finish_write(Lsn(0x20));\n        drop(writer);\n\n        assert_eq!(\n            tline.get(*TEST_KEY, Lsn(0x10), &ctx).await?,\n            test_img(\"foo at 0x10\")\n        );\n        assert_eq!(\n            tline.get(*TEST_KEY, Lsn(0x1f), &ctx).await?,\n            test_img(\"foo at 0x10\")\n        );\n        assert_eq!(\n            tline.get(*TEST_KEY, Lsn(0x20), &ctx).await?,\n            test_img(\"foo at 0x20\")\n        );\n\n        Ok(())\n    }\n\n    #[tokio::test]\n    async fn no_duplicate_timelines() -> anyhow::Result<()> {\n        let (tenant, ctx) = TenantHarness::create(\"no_duplicate_timelines\")\n            .await?\n            .load()\n            .await;\n        let _ = tenant\n            .create_test_timeline(TIMELINE_ID, Lsn(0x10), DEFAULT_PG_VERSION, &ctx)\n            .await?;\n\n        match tenant\n            .create_empty_timeline(TIMELINE_ID, Lsn(0x10), DEFAULT_PG_VERSION, &ctx)\n            .await\n        {\n            Ok(_) => panic!(\"duplicate timeline creation should fail\"),\n            Err(e) => assert_eq!(\n                e.to_string(),\n                \"timeline already exists with different parameters\".to_string()\n            ),\n        }\n\n        Ok(())\n    }\n\n    /// Convenience function to create a page image with given string as the only content\n    pub fn test_value(s: &str) -> Value {\n        let mut buf = BytesMut::new();\n        buf.extend_from_slice(s.as_bytes());\n        Value::Image(buf.freeze())\n    }\n\n    ///\n    /// Test branch creation\n    ///\n    #[tokio::test]\n    async fn test_branch() -> anyhow::Result<()> {\n        use std::str::from_utf8;\n\n        let (tenant, ctx) = TenantHarness::create(\"test_branch\").await?.load().await;\n        let tline = tenant\n            .create_test_timeline(TIMELINE_ID, Lsn(0x10), DEFAULT_PG_VERSION, &ctx)\n            .await?;\n        let mut writer = tline.writer().await;\n\n        #[allow(non_snake_case)]\n        let TEST_KEY_A: Key = Key::from_hex(\"110000000033333333444444445500000001\").unwrap();\n        #[allow(non_snake_case)]\n        let TEST_KEY_B: Key = Key::from_hex(\"110000000033333333444444445500000002\").unwrap();\n\n        // Insert a value on the timeline\n        writer\n            .put(TEST_KEY_A, Lsn(0x20), &test_value(\"foo at 0x20\"), &ctx)\n            .await?;\n        writer\n            .put(TEST_KEY_B, Lsn(0x20), &test_value(\"foobar at 0x20\"), &ctx)\n            .await?;\n        writer.finish_write(Lsn(0x20));\n\n        writer\n            .put(TEST_KEY_A, Lsn(0x30), &test_value(\"foo at 0x30\"), &ctx)\n            .await?;\n        writer.finish_write(Lsn(0x30));\n        writer\n            .put(TEST_KEY_A, Lsn(0x40), &test_value(\"foo at 0x40\"), &ctx)\n            .await?;\n        writer.finish_write(Lsn(0x40));\n\n        //assert_current_logical_size(&tline, Lsn(0x40));\n\n        // Branch the history, modify relation differently on the new timeline\n        tenant\n            .branch_timeline_test(&tline, NEW_TIMELINE_ID, Some(Lsn(0x30)), &ctx)\n            .await?;\n        let newtline = tenant\n            .get_timeline(NEW_TIMELINE_ID, true)\n            .expect(\"Should have a local timeline\");\n        let mut new_writer = newtline.writer().await;\n        new_writer\n            .put(TEST_KEY_A, Lsn(0x40), &test_value(\"bar at 0x40\"), &ctx)\n            .await?;\n        new_writer.finish_write(Lsn(0x40));\n\n        // Check page contents on both branches\n        assert_eq!(\n            from_utf8(&tline.get(TEST_KEY_A, Lsn(0x40), &ctx).await?)?,\n            \"foo at 0x40\"\n        );\n        assert_eq!(\n            from_utf8(&newtline.get(TEST_KEY_A, Lsn(0x40), &ctx).await?)?,\n            \"bar at 0x40\"\n        );\n        assert_eq!(\n            from_utf8(&newtline.get(TEST_KEY_B, Lsn(0x40), &ctx).await?)?,\n            \"foobar at 0x20\"\n        );\n\n        //assert_current_logical_size(&tline, Lsn(0x40));\n\n        Ok(())\n    }\n\n    async fn make_some_layers(\n        tline: &Timeline,\n        start_lsn: Lsn,\n        ctx: &RequestContext,\n    ) -> anyhow::Result<()> {\n        let mut lsn = start_lsn;\n        {\n            let mut writer = tline.writer().await;\n            // Create a relation on the timeline\n            writer\n                .put(\n                    *TEST_KEY,\n                    lsn,\n                    &Value::Image(test_img(&format!(\"foo at {lsn}\"))),\n                    ctx,\n                )\n                .await?;\n            writer.finish_write(lsn);\n            lsn += 0x10;\n            writer\n                .put(\n                    *TEST_KEY,\n                    lsn,\n                    &Value::Image(test_img(&format!(\"foo at {lsn}\"))),\n                    ctx,\n                )\n                .await?;\n            writer.finish_write(lsn);\n            lsn += 0x10;\n        }\n        tline.freeze_and_flush().await?;\n        {\n            let mut writer = tline.writer().await;\n            writer\n                .put(\n                    *TEST_KEY,\n                    lsn,\n                    &Value::Image(test_img(&format!(\"foo at {lsn}\"))),\n                    ctx,\n                )\n                .await?;\n            writer.finish_write(lsn);\n            lsn += 0x10;\n            writer\n                .put(\n                    *TEST_KEY,\n                    lsn,\n                    &Value::Image(test_img(&format!(\"foo at {lsn}\"))),\n                    ctx,\n                )\n                .await?;\n            writer.finish_write(lsn);\n        }\n        tline.freeze_and_flush().await.map_err(|e| e.into())\n    }\n\n    #[tokio::test]\n    async fn test_prohibit_branch_creation_on_garbage_collected_data() -> anyhow::Result<()> {\n        let (tenant, ctx) =\n            TenantHarness::create(\"test_prohibit_branch_creation_on_garbage_collected_data\")\n                .await?\n                .load()\n                .await;\n        let tline = tenant\n            .create_test_timeline(TIMELINE_ID, Lsn(0x10), DEFAULT_PG_VERSION, &ctx)\n            .await?;\n        make_some_layers(tline.as_ref(), Lsn(0x20), &ctx).await?;\n\n        // this removes layers before lsn 40 (50 minus 10), so there are two remaining layers, image and delta for 31-50\n        // FIXME: this doesn't actually remove any layer currently, given how the flushing\n        // and compaction works. But it does set the 'cutoff' point so that the cross check\n        // below should fail.\n        tenant\n            .gc_iteration(\n                Some(TIMELINE_ID),\n                0x10,\n                Duration::ZERO,\n                &CancellationToken::new(),\n                &ctx,\n            )\n            .await?;\n\n        // try to branch at lsn 25, should fail because we already garbage collected the data\n        match tenant\n            .branch_timeline_test(&tline, NEW_TIMELINE_ID, Some(Lsn(0x25)), &ctx)\n            .await\n        {\n            Ok(_) => panic!(\"branching should have failed\"),\n            Err(err) => {\n                let CreateTimelineError::AncestorLsn(err) = err else {\n                    panic!(\"wrong error type\")\n                };\n                assert!(err.to_string().contains(\"invalid branch start lsn\"));\n                assert!(\n                    err.source()\n                        .unwrap()\n                        .to_string()\n                        .contains(\"we might've already garbage collected needed data\")\n                )\n            }\n        }\n\n        Ok(())\n    }\n\n    #[tokio::test]\n    async fn test_prohibit_branch_creation_on_pre_initdb_lsn() -> anyhow::Result<()> {\n        let (tenant, ctx) =\n            TenantHarness::create(\"test_prohibit_branch_creation_on_pre_initdb_lsn\")\n                .await?\n                .load()\n                .await;\n\n        let tline = tenant\n            .create_test_timeline(TIMELINE_ID, Lsn(0x50), DEFAULT_PG_VERSION, &ctx)\n            .await?;\n        // try to branch at lsn 0x25, should fail because initdb lsn is 0x50\n        match tenant\n            .branch_timeline_test(&tline, NEW_TIMELINE_ID, Some(Lsn(0x25)), &ctx)\n            .await\n        {\n            Ok(_) => panic!(\"branching should have failed\"),\n            Err(err) => {\n                let CreateTimelineError::AncestorLsn(err) = err else {\n                    panic!(\"wrong error type\");\n                };\n                assert!(&err.to_string().contains(\"invalid branch start lsn\"));\n                assert!(\n                    &err.source()\n                        .unwrap()\n                        .to_string()\n                        .contains(\"is earlier than latest GC cutoff\")\n                );\n            }\n        }\n\n        Ok(())\n    }\n\n    /*\n    // FIXME: This currently fails to error out. Calling GC doesn't currently\n    // remove the old value, we'd need to work a little harder\n    #[tokio::test]\n    async fn test_prohibit_get_for_garbage_collected_data() -> anyhow::Result<()> {\n        let repo =\n            RepoHarness::create(\"test_prohibit_get_for_garbage_collected_data\")?\n            .load();\n\n        let tline = repo.create_empty_timeline(TIMELINE_ID, Lsn(0), DEFAULT_PG_VERSION)?;\n        make_some_layers(tline.as_ref(), Lsn(0x20), &ctx).await?;\n\n        repo.gc_iteration(Some(TIMELINE_ID), 0x10, Duration::ZERO)?;\n        let applied_gc_cutoff_lsn = tline.get_applied_gc_cutoff_lsn();\n        assert!(*applied_gc_cutoff_lsn > Lsn(0x25));\n        match tline.get(*TEST_KEY, Lsn(0x25)) {\n            Ok(_) => panic!(\"request for page should have failed\"),\n            Err(err) => assert!(err.to_string().contains(\"not found at\")),\n        }\n        Ok(())\n    }\n     */\n\n    #[tokio::test]\n    async fn test_get_branchpoints_from_an_inactive_timeline() -> anyhow::Result<()> {\n        let (tenant, ctx) =\n            TenantHarness::create(\"test_get_branchpoints_from_an_inactive_timeline\")\n                .await?\n                .load()\n                .await;\n        let tline = tenant\n            .create_test_timeline(TIMELINE_ID, Lsn(0x10), DEFAULT_PG_VERSION, &ctx)\n            .await?;\n        make_some_layers(tline.as_ref(), Lsn(0x20), &ctx).await?;\n\n        tenant\n            .branch_timeline_test(&tline, NEW_TIMELINE_ID, Some(Lsn(0x40)), &ctx)\n            .await?;\n        let newtline = tenant\n            .get_timeline(NEW_TIMELINE_ID, true)\n            .expect(\"Should have a local timeline\");\n\n        make_some_layers(newtline.as_ref(), Lsn(0x60), &ctx).await?;\n\n        tline.set_broken(\"test\".to_owned());\n\n        tenant\n            .gc_iteration(\n                Some(TIMELINE_ID),\n                0x10,\n                Duration::ZERO,\n                &CancellationToken::new(),\n                &ctx,\n            )\n            .await?;\n\n        // The branchpoints should contain all timelines, even ones marked\n        // as Broken.\n        {\n            let branchpoints = &tline.gc_info.read().unwrap().retain_lsns;\n            assert_eq!(branchpoints.len(), 1);\n            assert_eq!(\n                branchpoints[0],\n                (Lsn(0x40), NEW_TIMELINE_ID, MaybeOffloaded::No)\n            );\n        }\n\n        // You can read the key from the child branch even though the parent is\n        // Broken, as long as you don't need to access data from the parent.\n        assert_eq!(\n            newtline.get(*TEST_KEY, Lsn(0x70), &ctx).await?,\n            test_img(&format!(\"foo at {}\", Lsn(0x70)))\n        );\n\n        // This needs to traverse to the parent, and fails.\n        let err = newtline.get(*TEST_KEY, Lsn(0x50), &ctx).await.unwrap_err();\n        assert!(\n            err.to_string().starts_with(&format!(\n                \"bad state on timeline {}: Broken\",\n                tline.timeline_id\n            )),\n            \"{err}\"\n        );\n\n        Ok(())\n    }\n\n    #[tokio::test]\n    async fn test_retain_data_in_parent_which_is_needed_for_child() -> anyhow::Result<()> {\n        let (tenant, ctx) =\n            TenantHarness::create(\"test_retain_data_in_parent_which_is_needed_for_child\")\n                .await?\n                .load()\n                .await;\n        let tline = tenant\n            .create_test_timeline(TIMELINE_ID, Lsn(0x10), DEFAULT_PG_VERSION, &ctx)\n            .await?;\n        make_some_layers(tline.as_ref(), Lsn(0x20), &ctx).await?;\n\n        tenant\n            .branch_timeline_test(&tline, NEW_TIMELINE_ID, Some(Lsn(0x40)), &ctx)\n            .await?;\n        let newtline = tenant\n            .get_timeline(NEW_TIMELINE_ID, true)\n            .expect(\"Should have a local timeline\");\n        // this removes layers before lsn 40 (50 minus 10), so there are two remaining layers, image and delta for 31-50\n        tenant\n            .gc_iteration(\n                Some(TIMELINE_ID),\n                0x10,\n                Duration::ZERO,\n                &CancellationToken::new(),\n                &ctx,\n            )\n            .await?;\n        assert!(newtline.get(*TEST_KEY, Lsn(0x25), &ctx).await.is_ok());\n\n        Ok(())\n    }\n    #[tokio::test]\n    async fn test_parent_keeps_data_forever_after_branching() -> anyhow::Result<()> {\n        let (tenant, ctx) = TenantHarness::create(\"test_parent_keeps_data_forever_after_branching\")\n            .await?\n            .load()\n            .await;\n        let tline = tenant\n            .create_test_timeline(TIMELINE_ID, Lsn(0x10), DEFAULT_PG_VERSION, &ctx)\n            .await?;\n        make_some_layers(tline.as_ref(), Lsn(0x20), &ctx).await?;\n\n        tenant\n            .branch_timeline_test(&tline, NEW_TIMELINE_ID, Some(Lsn(0x40)), &ctx)\n            .await?;\n        let newtline = tenant\n            .get_timeline(NEW_TIMELINE_ID, true)\n            .expect(\"Should have a local timeline\");\n\n        make_some_layers(newtline.as_ref(), Lsn(0x60), &ctx).await?;\n\n        // run gc on parent\n        tenant\n            .gc_iteration(\n                Some(TIMELINE_ID),\n                0x10,\n                Duration::ZERO,\n                &CancellationToken::new(),\n                &ctx,\n            )\n            .await?;\n\n        // Check that the data is still accessible on the branch.\n        assert_eq!(\n            newtline.get(*TEST_KEY, Lsn(0x50), &ctx).await?,\n            test_img(&format!(\"foo at {}\", Lsn(0x40)))\n        );\n\n        Ok(())\n    }\n\n    #[tokio::test]\n    async fn timeline_load() -> anyhow::Result<()> {\n        const TEST_NAME: &str = \"timeline_load\";\n        let harness = TenantHarness::create(TEST_NAME).await?;\n        {\n            let (tenant, ctx) = harness.load().await;\n            let tline = tenant\n                .create_test_timeline(TIMELINE_ID, Lsn(0x7000), DEFAULT_PG_VERSION, &ctx)\n                .await?;\n            make_some_layers(tline.as_ref(), Lsn(0x8000), &ctx).await?;\n            // so that all uploads finish & we can call harness.load() below again\n            tenant\n                .shutdown(Default::default(), ShutdownMode::FreezeAndFlush)\n                .instrument(harness.span())\n                .await\n                .ok()\n                .unwrap();\n        }\n\n        let (tenant, _ctx) = harness.load().await;\n        tenant\n            .get_timeline(TIMELINE_ID, true)\n            .expect(\"cannot load timeline\");\n\n        Ok(())\n    }\n\n    #[tokio::test]\n    async fn timeline_load_with_ancestor() -> anyhow::Result<()> {\n        const TEST_NAME: &str = \"timeline_load_with_ancestor\";\n        let harness = TenantHarness::create(TEST_NAME).await?;\n        // create two timelines\n        {\n            let (tenant, ctx) = harness.load().await;\n            let tline = tenant\n                .create_test_timeline(TIMELINE_ID, Lsn(0x10), DEFAULT_PG_VERSION, &ctx)\n                .await?;\n\n            make_some_layers(tline.as_ref(), Lsn(0x20), &ctx).await?;\n\n            let child_tline = tenant\n                .branch_timeline_test(&tline, NEW_TIMELINE_ID, Some(Lsn(0x40)), &ctx)\n                .await?;\n            child_tline.set_state(TimelineState::Active);\n\n            let newtline = tenant\n                .get_timeline(NEW_TIMELINE_ID, true)\n                .expect(\"Should have a local timeline\");\n\n            make_some_layers(newtline.as_ref(), Lsn(0x60), &ctx).await?;\n\n            // so that all uploads finish & we can call harness.load() below again\n            tenant\n                .shutdown(Default::default(), ShutdownMode::FreezeAndFlush)\n                .instrument(harness.span())\n                .await\n                .ok()\n                .unwrap();\n        }\n\n        // check that both of them are initially unloaded\n        let (tenant, _ctx) = harness.load().await;\n\n        // check that both, child and ancestor are loaded\n        let _child_tline = tenant\n            .get_timeline(NEW_TIMELINE_ID, true)\n            .expect(\"cannot get child timeline loaded\");\n\n        let _ancestor_tline = tenant\n            .get_timeline(TIMELINE_ID, true)\n            .expect(\"cannot get ancestor timeline loaded\");\n\n        Ok(())\n    }\n\n    #[tokio::test]\n    async fn delta_layer_dumping() -> anyhow::Result<()> {\n        use storage_layer::AsLayerDesc;\n        let (tenant, ctx) = TenantHarness::create(\"test_layer_dumping\")\n            .await?\n            .load()\n            .await;\n        let tline = tenant\n            .create_test_timeline(TIMELINE_ID, Lsn(0x10), DEFAULT_PG_VERSION, &ctx)\n            .await?;\n        make_some_layers(tline.as_ref(), Lsn(0x20), &ctx).await?;\n\n        let layer_map = tline.layers.read(LayerManagerLockHolder::Testing).await;\n        let level0_deltas = layer_map\n            .layer_map()?\n            .level0_deltas()\n            .iter()\n            .map(|desc| layer_map.get_from_desc(desc))\n            .collect::<Vec<_>>();\n\n        assert!(!level0_deltas.is_empty());\n\n        for delta in level0_deltas {\n            // Ensure we are dumping a delta layer here\n            assert!(delta.layer_desc().is_delta);\n            delta.dump(true, &ctx).await.unwrap();\n        }\n\n        Ok(())\n    }\n\n    #[tokio::test]\n    async fn test_images() -> anyhow::Result<()> {\n        let (tenant, ctx) = TenantHarness::create(\"test_images\").await?.load().await;\n        let tline = tenant\n            .create_test_timeline(TIMELINE_ID, Lsn(0x08), DEFAULT_PG_VERSION, &ctx)\n            .await?;\n\n        let mut writer = tline.writer().await;\n        writer\n            .put(\n                *TEST_KEY,\n                Lsn(0x10),\n                &Value::Image(test_img(\"foo at 0x10\")),\n                &ctx,\n            )\n            .await?;\n        writer.finish_write(Lsn(0x10));\n        drop(writer);\n\n        tline.freeze_and_flush().await?;\n        tline\n            .compact(&CancellationToken::new(), EnumSet::default(), &ctx)\n            .await?;\n\n        let mut writer = tline.writer().await;\n        writer\n            .put(\n                *TEST_KEY,\n                Lsn(0x20),\n                &Value::Image(test_img(\"foo at 0x20\")),\n                &ctx,\n            )\n            .await?;\n        writer.finish_write(Lsn(0x20));\n        drop(writer);\n\n        tline.freeze_and_flush().await?;\n        tline\n            .compact(&CancellationToken::new(), EnumSet::default(), &ctx)\n            .await?;\n\n        let mut writer = tline.writer().await;\n        writer\n            .put(\n                *TEST_KEY,\n                Lsn(0x30),\n                &Value::Image(test_img(\"foo at 0x30\")),\n                &ctx,\n            )\n            .await?;\n        writer.finish_write(Lsn(0x30));\n        drop(writer);\n\n        tline.freeze_and_flush().await?;\n        tline\n            .compact(&CancellationToken::new(), EnumSet::default(), &ctx)\n            .await?;\n\n        let mut writer = tline.writer().await;\n        writer\n            .put(\n                *TEST_KEY,\n                Lsn(0x40),\n                &Value::Image(test_img(\"foo at 0x40\")),\n                &ctx,\n            )\n            .await?;\n        writer.finish_write(Lsn(0x40));\n        drop(writer);\n\n        tline.freeze_and_flush().await?;\n        tline\n            .compact(&CancellationToken::new(), EnumSet::default(), &ctx)\n            .await?;\n\n        assert_eq!(\n            tline.get(*TEST_KEY, Lsn(0x10), &ctx).await?,\n            test_img(\"foo at 0x10\")\n        );\n        assert_eq!(\n            tline.get(*TEST_KEY, Lsn(0x1f), &ctx).await?,\n            test_img(\"foo at 0x10\")\n        );\n        assert_eq!(\n            tline.get(*TEST_KEY, Lsn(0x20), &ctx).await?,\n            test_img(\"foo at 0x20\")\n        );\n        assert_eq!(\n            tline.get(*TEST_KEY, Lsn(0x30), &ctx).await?,\n            test_img(\"foo at 0x30\")\n        );\n        assert_eq!(\n            tline.get(*TEST_KEY, Lsn(0x40), &ctx).await?,\n            test_img(\"foo at 0x40\")\n        );\n\n        Ok(())\n    }\n\n    async fn bulk_insert_compact_gc(\n        tenant: &TenantShard,\n        timeline: &Arc<Timeline>,\n        ctx: &RequestContext,\n        lsn: Lsn,\n        repeat: usize,\n        key_count: usize,\n    ) -> anyhow::Result<HashMap<Key, BTreeSet<Lsn>>> {\n        let compact = true;\n        bulk_insert_maybe_compact_gc(tenant, timeline, ctx, lsn, repeat, key_count, compact).await\n    }\n\n    async fn bulk_insert_maybe_compact_gc(\n        tenant: &TenantShard,\n        timeline: &Arc<Timeline>,\n        ctx: &RequestContext,\n        mut lsn: Lsn,\n        repeat: usize,\n        key_count: usize,\n        compact: bool,\n    ) -> anyhow::Result<HashMap<Key, BTreeSet<Lsn>>> {\n        let mut inserted: HashMap<Key, BTreeSet<Lsn>> = Default::default();\n\n        let mut test_key = Key::from_hex(\"010000000033333333444444445500000000\").unwrap();\n        let mut blknum = 0;\n\n        // Enforce that key range is monotonously increasing\n        let mut keyspace = KeySpaceAccum::new();\n\n        let cancel = CancellationToken::new();\n\n        for _ in 0..repeat {\n            for _ in 0..key_count {\n                test_key.field6 = blknum;\n                let mut writer = timeline.writer().await;\n                writer\n                    .put(\n                        test_key,\n                        lsn,\n                        &Value::Image(test_img(&format!(\"{blknum} at {lsn}\"))),\n                        ctx,\n                    )\n                    .await?;\n                inserted.entry(test_key).or_default().insert(lsn);\n                writer.finish_write(lsn);\n                drop(writer);\n\n                keyspace.add_key(test_key);\n\n                lsn = Lsn(lsn.0 + 0x10);\n                blknum += 1;\n            }\n\n            timeline.freeze_and_flush().await?;\n            if compact {\n                // this requires timeline to be &Arc<Timeline>\n                timeline.compact(&cancel, EnumSet::default(), ctx).await?;\n            }\n\n            // this doesn't really need to use the timeline_id target, but it is closer to what it\n            // originally was.\n            let res = tenant\n                .gc_iteration(Some(timeline.timeline_id), 0, Duration::ZERO, &cancel, ctx)\n                .await?;\n\n            assert_eq!(res.layers_removed, 0, \"this never removes anything\");\n        }\n\n        Ok(inserted)\n    }\n\n    //\n    // Insert 1000 key-value pairs with increasing keys, flush, compact, GC.\n    // Repeat 50 times.\n    //\n    #[tokio::test]\n    async fn test_bulk_insert() -> anyhow::Result<()> {\n        let harness = TenantHarness::create(\"test_bulk_insert\").await?;\n        let (tenant, ctx) = harness.load().await;\n        let tline = tenant\n            .create_test_timeline(TIMELINE_ID, Lsn(0x08), DEFAULT_PG_VERSION, &ctx)\n            .await?;\n\n        let lsn = Lsn(0x10);\n        bulk_insert_compact_gc(&tenant, &tline, &ctx, lsn, 50, 10000).await?;\n\n        Ok(())\n    }\n\n    // Test the vectored get real implementation against a simple sequential implementation.\n    //\n    // The test generates a keyspace by repeatedly flushing the in-memory layer and compacting.\n    // Projected to 2D the key space looks like below. Lsn grows upwards on the Y axis and keys\n    // grow to the right on the X axis.\n    //                       [Delta]\n    //                 [Delta]\n    //           [Delta]\n    //    [Delta]\n    // ------------ Image ---------------\n    //\n    // After layer generation we pick the ranges to query as follows:\n    // 1. The beginning of each delta layer\n    // 2. At the seam between two adjacent delta layers\n    //\n    // There's one major downside to this test: delta layers only contains images,\n    // so the search can stop at the first delta layer and doesn't traverse any deeper.\n    #[tokio::test]\n    async fn test_get_vectored() -> anyhow::Result<()> {\n        let harness = TenantHarness::create(\"test_get_vectored\").await?;\n        let (tenant, ctx) = harness.load().await;\n        let io_concurrency = IoConcurrency::spawn_for_test();\n        let tline = tenant\n            .create_test_timeline(TIMELINE_ID, Lsn(0x08), DEFAULT_PG_VERSION, &ctx)\n            .await?;\n\n        let lsn = Lsn(0x10);\n        let inserted = bulk_insert_compact_gc(&tenant, &tline, &ctx, lsn, 50, 10000).await?;\n\n        let guard = tline.layers.read(LayerManagerLockHolder::Testing).await;\n        let lm = guard.layer_map()?;\n\n        lm.dump(true, &ctx).await?;\n\n        let mut reads = Vec::new();\n        let mut prev = None;\n        lm.iter_historic_layers().for_each(|desc| {\n            if !desc.is_delta() {\n                prev = Some(desc.clone());\n                return;\n            }\n\n            let start = desc.key_range.start;\n            let end = desc\n                .key_range\n                .start\n                .add(tenant.conf.max_get_vectored_keys.get() as u32);\n            reads.push(KeySpace {\n                ranges: vec![start..end],\n            });\n\n            if let Some(prev) = &prev {\n                if !prev.is_delta() {\n                    return;\n                }\n\n                let first_range = Key {\n                    field6: prev.key_range.end.field6 - 4,\n                    ..prev.key_range.end\n                }..prev.key_range.end;\n\n                let second_range = desc.key_range.start..Key {\n                    field6: desc.key_range.start.field6 + 4,\n                    ..desc.key_range.start\n                };\n\n                reads.push(KeySpace {\n                    ranges: vec![first_range, second_range],\n                });\n            };\n\n            prev = Some(desc.clone());\n        });\n\n        drop(guard);\n\n        // Pick a big LSN such that we query over all the changes.\n        let reads_lsn = Lsn(u64::MAX - 1);\n\n        for read in reads {\n            info!(\"Doing vectored read on {:?}\", read);\n\n            let query = VersionedKeySpaceQuery::uniform(read.clone(), reads_lsn);\n\n            let vectored_res = tline\n                .get_vectored_impl(\n                    query,\n                    &mut ValuesReconstructState::new(io_concurrency.clone()),\n                    &ctx,\n                )\n                .await;\n\n            let mut expected_lsns: HashMap<Key, Lsn> = Default::default();\n            let mut expect_missing = false;\n            let mut key = read.start().unwrap();\n            while key != read.end().unwrap() {\n                if let Some(lsns) = inserted.get(&key) {\n                    let expected_lsn = lsns.iter().rfind(|lsn| **lsn <= reads_lsn);\n                    match expected_lsn {\n                        Some(lsn) => {\n                            expected_lsns.insert(key, *lsn);\n                        }\n                        None => {\n                            expect_missing = true;\n                            break;\n                        }\n                    }\n                } else {\n                    expect_missing = true;\n                    break;\n                }\n\n                key = key.next();\n            }\n\n            if expect_missing {\n                assert!(matches!(vectored_res, Err(GetVectoredError::MissingKey(_))));\n            } else {\n                for (key, image) in vectored_res? {\n                    let expected_lsn = expected_lsns.get(&key).expect(\"determined above\");\n                    let expected_image = test_img(&format!(\"{} at {}\", key.field6, expected_lsn));\n                    assert_eq!(image?, expected_image);\n                }\n            }\n        }\n\n        Ok(())\n    }\n\n    #[tokio::test]\n    async fn test_get_vectored_aux_files() -> anyhow::Result<()> {\n        let harness = TenantHarness::create(\"test_get_vectored_aux_files\").await?;\n\n        let (tenant, ctx) = harness.load().await;\n        let io_concurrency = IoConcurrency::spawn_for_test();\n        let (tline, ctx) = tenant\n            .create_empty_timeline(TIMELINE_ID, Lsn(0), DEFAULT_PG_VERSION, &ctx)\n            .await?;\n        let tline = tline.raw_timeline().unwrap();\n\n        let mut modification = tline.begin_modification(Lsn(0x1000));\n        modification.put_file(\"foo/bar1\", b\"content1\", &ctx).await?;\n        modification.set_lsn(Lsn(0x1008))?;\n        modification.put_file(\"foo/bar2\", b\"content2\", &ctx).await?;\n        modification.commit(&ctx).await?;\n\n        let child_timeline_id = TimelineId::generate();\n        tenant\n            .branch_timeline_test(\n                tline,\n                child_timeline_id,\n                Some(tline.get_last_record_lsn()),\n                &ctx,\n            )\n            .await?;\n\n        let child_timeline = tenant\n            .get_timeline(child_timeline_id, true)\n            .expect(\"Should have the branched timeline\");\n\n        let aux_keyspace = KeySpace {\n            ranges: vec![NON_INHERITED_RANGE],\n        };\n        let read_lsn = child_timeline.get_last_record_lsn();\n\n        let query = VersionedKeySpaceQuery::uniform(aux_keyspace.clone(), read_lsn);\n\n        let vectored_res = child_timeline\n            .get_vectored_impl(\n                query,\n                &mut ValuesReconstructState::new(io_concurrency.clone()),\n                &ctx,\n            )\n            .await;\n\n        let images = vectored_res?;\n        assert!(images.is_empty());\n        Ok(())\n    }\n\n    // Test that vectored get handles layer gaps correctly\n    // by advancing into the next ancestor timeline if required.\n    //\n    // The test generates timelines that look like the diagram below.\n    // We leave a gap in one of the L1 layers at `gap_at_key` (`/` in the diagram).\n    // The reconstruct data for that key lies in the ancestor timeline (`X` in the diagram).\n    //\n    // ```\n    //-------------------------------+\n    //                          ...  |\n    //               [   L1   ]      |\n    //     [ / L1   ]                | Child Timeline\n    // ...                           |\n    // ------------------------------+\n    //     [ X L1   ]                | Parent Timeline\n    // ------------------------------+\n    // ```\n    #[tokio::test]\n    async fn test_get_vectored_key_gap() -> anyhow::Result<()> {\n        let tenant_conf = pageserver_api::models::TenantConfig {\n            // Make compaction deterministic\n            gc_period: Some(Duration::ZERO),\n            compaction_period: Some(Duration::ZERO),\n            // Encourage creation of L1 layers\n            checkpoint_distance: Some(16 * 1024),\n            compaction_target_size: Some(8 * 1024),\n            ..Default::default()\n        };\n\n        let harness = TenantHarness::create_custom(\n            \"test_get_vectored_key_gap\",\n            tenant_conf,\n            TenantId::generate(),\n            ShardIdentity::unsharded(),\n            Generation::new(0xdeadbeef),\n        )\n        .await?;\n        let (tenant, ctx) = harness.load().await;\n        let io_concurrency = IoConcurrency::spawn_for_test();\n\n        let mut current_key = Key::from_hex(\"010000000033333333444444445500000000\").unwrap();\n        let gap_at_key = current_key.add(100);\n        let mut current_lsn = Lsn(0x10);\n\n        const KEY_COUNT: usize = 10_000;\n\n        let timeline_id = TimelineId::generate();\n        let current_timeline = tenant\n            .create_test_timeline(timeline_id, current_lsn, DEFAULT_PG_VERSION, &ctx)\n            .await?;\n\n        current_lsn += 0x100;\n\n        let mut writer = current_timeline.writer().await;\n        writer\n            .put(\n                gap_at_key,\n                current_lsn,\n                &Value::Image(test_img(&format!(\"{gap_at_key} at {current_lsn}\"))),\n                &ctx,\n            )\n            .await?;\n        writer.finish_write(current_lsn);\n        drop(writer);\n\n        let mut latest_lsns = HashMap::new();\n        latest_lsns.insert(gap_at_key, current_lsn);\n\n        current_timeline.freeze_and_flush().await?;\n\n        let child_timeline_id = TimelineId::generate();\n\n        tenant\n            .branch_timeline_test(\n                &current_timeline,\n                child_timeline_id,\n                Some(current_lsn),\n                &ctx,\n            )\n            .await?;\n        let child_timeline = tenant\n            .get_timeline(child_timeline_id, true)\n            .expect(\"Should have the branched timeline\");\n\n        for i in 0..KEY_COUNT {\n            if current_key == gap_at_key {\n                current_key = current_key.next();\n                continue;\n            }\n\n            current_lsn += 0x10;\n\n            let mut writer = child_timeline.writer().await;\n            writer\n                .put(\n                    current_key,\n                    current_lsn,\n                    &Value::Image(test_img(&format!(\"{current_key} at {current_lsn}\"))),\n                    &ctx,\n                )\n                .await?;\n            writer.finish_write(current_lsn);\n            drop(writer);\n\n            latest_lsns.insert(current_key, current_lsn);\n            current_key = current_key.next();\n\n            // Flush every now and then to encourage layer file creation.\n            if i % 500 == 0 {\n                child_timeline.freeze_and_flush().await?;\n            }\n        }\n\n        child_timeline.freeze_and_flush().await?;\n        let mut flags = EnumSet::new();\n        flags.insert(CompactFlags::ForceRepartition);\n        child_timeline\n            .compact(&CancellationToken::new(), flags, &ctx)\n            .await?;\n\n        let key_near_end = {\n            let mut tmp = current_key;\n            tmp.field6 -= 10;\n            tmp\n        };\n\n        let key_near_gap = {\n            let mut tmp = gap_at_key;\n            tmp.field6 -= 10;\n            tmp\n        };\n\n        let read = KeySpace {\n            ranges: vec![key_near_gap..gap_at_key.next(), key_near_end..current_key],\n        };\n\n        let query = VersionedKeySpaceQuery::uniform(read.clone(), current_lsn);\n\n        let results = child_timeline\n            .get_vectored_impl(\n                query,\n                &mut ValuesReconstructState::new(io_concurrency.clone()),\n                &ctx,\n            )\n            .await?;\n\n        for (key, img_res) in results {\n            let expected = test_img(&format!(\"{} at {}\", key, latest_lsns[&key]));\n            assert_eq!(img_res?, expected);\n        }\n\n        Ok(())\n    }\n\n    // Test that vectored get descends into ancestor timelines correctly and\n    // does not return an image that's newer than requested.\n    //\n    // The diagram below ilustrates an interesting case. We have a parent timeline\n    // (top of the Lsn range) and a child timeline. The request key cannot be reconstructed\n    // from the child timeline, so the parent timeline must be visited. When advacing into\n    // the child timeline, the read path needs to remember what the requested Lsn was in\n    // order to avoid returning an image that's too new. The test below constructs such\n    // a timeline setup and does a few queries around the Lsn of each page image.\n    // ```\n    //    LSN\n    //     ^\n    //     |\n    //     |\n    // 500 | --------------------------------------> branch point\n    // 400 |        X\n    // 300 |        X\n    // 200 | --------------------------------------> requested lsn\n    // 100 |        X\n    //     |---------------------------------------> Key\n    //              |\n    //              ------> requested key\n    //\n    // Legend:\n    // * X - page images\n    // ```\n    #[tokio::test]\n    async fn test_get_vectored_ancestor_descent() -> anyhow::Result<()> {\n        let harness = TenantHarness::create(\"test_get_vectored_on_lsn_axis\").await?;\n        let (tenant, ctx) = harness.load().await;\n        let io_concurrency = IoConcurrency::spawn_for_test();\n\n        let start_key = Key::from_hex(\"010000000033333333444444445500000000\").unwrap();\n        let end_key = start_key.add(1000);\n        let child_gap_at_key = start_key.add(500);\n        let mut parent_gap_lsns: BTreeMap<Lsn, String> = BTreeMap::new();\n\n        let mut current_lsn = Lsn(0x10);\n\n        let timeline_id = TimelineId::generate();\n        let parent_timeline = tenant\n            .create_test_timeline(timeline_id, current_lsn, DEFAULT_PG_VERSION, &ctx)\n            .await?;\n\n        current_lsn += 0x100;\n\n        for _ in 0..3 {\n            let mut key = start_key;\n            while key < end_key {\n                current_lsn += 0x10;\n\n                let image_value = format!(\"{child_gap_at_key} at {current_lsn}\");\n\n                let mut writer = parent_timeline.writer().await;\n                writer\n                    .put(\n                        key,\n                        current_lsn,\n                        &Value::Image(test_img(&image_value)),\n                        &ctx,\n                    )\n                    .await?;\n                writer.finish_write(current_lsn);\n\n                if key == child_gap_at_key {\n                    parent_gap_lsns.insert(current_lsn, image_value);\n                }\n\n                key = key.next();\n            }\n\n            parent_timeline.freeze_and_flush().await?;\n        }\n\n        let child_timeline_id = TimelineId::generate();\n\n        let child_timeline = tenant\n            .branch_timeline_test(&parent_timeline, child_timeline_id, Some(current_lsn), &ctx)\n            .await?;\n\n        let mut key = start_key;\n        while key < end_key {\n            if key == child_gap_at_key {\n                key = key.next();\n                continue;\n            }\n\n            current_lsn += 0x10;\n\n            let mut writer = child_timeline.writer().await;\n            writer\n                .put(\n                    key,\n                    current_lsn,\n                    &Value::Image(test_img(&format!(\"{key} at {current_lsn}\"))),\n                    &ctx,\n                )\n                .await?;\n            writer.finish_write(current_lsn);\n\n            key = key.next();\n        }\n\n        child_timeline.freeze_and_flush().await?;\n\n        let lsn_offsets: [i64; 5] = [-10, -1, 0, 1, 10];\n        let mut query_lsns = Vec::new();\n        for image_lsn in parent_gap_lsns.keys().rev() {\n            for offset in lsn_offsets {\n                query_lsns.push(Lsn(image_lsn\n                    .0\n                    .checked_add_signed(offset)\n                    .expect(\"Shouldn't overflow\")));\n            }\n        }\n\n        for query_lsn in query_lsns {\n            let query = VersionedKeySpaceQuery::uniform(\n                KeySpace {\n                    ranges: vec![child_gap_at_key..child_gap_at_key.next()],\n                },\n                query_lsn,\n            );\n\n            let results = child_timeline\n                .get_vectored_impl(\n                    query,\n                    &mut ValuesReconstructState::new(io_concurrency.clone()),\n                    &ctx,\n                )\n                .await;\n\n            let expected_item = parent_gap_lsns\n                .iter()\n                .rev()\n                .find(|(lsn, _)| **lsn <= query_lsn);\n\n            info!(\n                \"Doing vectored read at LSN {}. Expecting image to be: {:?}\",\n                query_lsn, expected_item\n            );\n\n            match expected_item {\n                Some((_, img_value)) => {\n                    let key_results = results.expect(\"No vectored get error expected\");\n                    let key_result = &key_results[&child_gap_at_key];\n                    let returned_img = key_result\n                        .as_ref()\n                        .expect(\"No page reconstruct error expected\");\n\n                    info!(\n                        \"Vectored read at LSN {} returned image {}\",\n                        query_lsn,\n                        std::str::from_utf8(returned_img)?\n                    );\n                    assert_eq!(*returned_img, test_img(img_value));\n                }\n                None => {\n                    assert!(matches!(results, Err(GetVectoredError::MissingKey(_))));\n                }\n            }\n        }\n\n        Ok(())\n    }\n\n    #[tokio::test]\n    async fn test_random_updates() -> anyhow::Result<()> {\n        let names_algorithms = [\n            (\"test_random_updates_legacy\", CompactionAlgorithm::Legacy),\n            (\"test_random_updates_tiered\", CompactionAlgorithm::Tiered),\n        ];\n        for (name, algorithm) in names_algorithms {\n            test_random_updates_algorithm(name, algorithm).await?;\n        }\n        Ok(())\n    }\n\n    async fn test_random_updates_algorithm(\n        name: &'static str,\n        compaction_algorithm: CompactionAlgorithm,\n    ) -> anyhow::Result<()> {\n        let mut harness = TenantHarness::create(name).await?;\n        harness.tenant_conf.compaction_algorithm = Some(CompactionAlgorithmSettings {\n            kind: compaction_algorithm,\n        });\n        let (tenant, ctx) = harness.load().await;\n        let tline = tenant\n            .create_test_timeline(TIMELINE_ID, Lsn(0x10), DEFAULT_PG_VERSION, &ctx)\n            .await?;\n\n        const NUM_KEYS: usize = 1000;\n        let cancel = CancellationToken::new();\n\n        let mut test_key = Key::from_hex(\"010000000033333333444444445500000000\").unwrap();\n        let mut test_key_end = test_key;\n        test_key_end.field6 = NUM_KEYS as u32;\n        tline.add_extra_test_dense_keyspace(KeySpace::single(test_key..test_key_end));\n\n        let mut keyspace = KeySpaceAccum::new();\n\n        // Track when each page was last modified. Used to assert that\n        // a read sees the latest page version.\n        let mut updated = [Lsn(0); NUM_KEYS];\n\n        let mut lsn = Lsn(0x10);\n        #[allow(clippy::needless_range_loop)]\n        for blknum in 0..NUM_KEYS {\n            lsn = Lsn(lsn.0 + 0x10);\n            test_key.field6 = blknum as u32;\n            let mut writer = tline.writer().await;\n            writer\n                .put(\n                    test_key,\n                    lsn,\n                    &Value::Image(test_img(&format!(\"{blknum} at {lsn}\"))),\n                    &ctx,\n                )\n                .await?;\n            writer.finish_write(lsn);\n            updated[blknum] = lsn;\n            drop(writer);\n\n            keyspace.add_key(test_key);\n        }\n\n        for _ in 0..50 {\n            for _ in 0..NUM_KEYS {\n                lsn = Lsn(lsn.0 + 0x10);\n                let blknum = rand::rng().random_range(0..NUM_KEYS);\n                test_key.field6 = blknum as u32;\n                let mut writer = tline.writer().await;\n                writer\n                    .put(\n                        test_key,\n                        lsn,\n                        &Value::Image(test_img(&format!(\"{blknum} at {lsn}\"))),\n                        &ctx,\n                    )\n                    .await?;\n                writer.finish_write(lsn);\n                drop(writer);\n                updated[blknum] = lsn;\n            }\n\n            // Read all the blocks\n            for (blknum, last_lsn) in updated.iter().enumerate() {\n                test_key.field6 = blknum as u32;\n                assert_eq!(\n                    tline.get(test_key, lsn, &ctx).await?,\n                    test_img(&format!(\"{blknum} at {last_lsn}\"))\n                );\n            }\n\n            // Perform a cycle of flush, and GC\n            tline.freeze_and_flush().await?;\n            tenant\n                .gc_iteration(Some(tline.timeline_id), 0, Duration::ZERO, &cancel, &ctx)\n                .await?;\n        }\n\n        Ok(())\n    }\n\n    #[tokio::test]\n    async fn test_traverse_branches() -> anyhow::Result<()> {\n        let (tenant, ctx) = TenantHarness::create(\"test_traverse_branches\")\n            .await?\n            .load()\n            .await;\n        let mut tline = tenant\n            .create_test_timeline(TIMELINE_ID, Lsn(0x10), DEFAULT_PG_VERSION, &ctx)\n            .await?;\n\n        const NUM_KEYS: usize = 1000;\n\n        let mut test_key = Key::from_hex(\"010000000033333333444444445500000000\").unwrap();\n\n        let mut keyspace = KeySpaceAccum::new();\n\n        let cancel = CancellationToken::new();\n\n        // Track when each page was last modified. Used to assert that\n        // a read sees the latest page version.\n        let mut updated = [Lsn(0); NUM_KEYS];\n\n        let mut lsn = Lsn(0x10);\n        #[allow(clippy::needless_range_loop)]\n        for blknum in 0..NUM_KEYS {\n            lsn = Lsn(lsn.0 + 0x10);\n            test_key.field6 = blknum as u32;\n            let mut writer = tline.writer().await;\n            writer\n                .put(\n                    test_key,\n                    lsn,\n                    &Value::Image(test_img(&format!(\"{blknum} at {lsn}\"))),\n                    &ctx,\n                )\n                .await?;\n            writer.finish_write(lsn);\n            updated[blknum] = lsn;\n            drop(writer);\n\n            keyspace.add_key(test_key);\n        }\n\n        for _ in 0..50 {\n            let new_tline_id = TimelineId::generate();\n            tenant\n                .branch_timeline_test(&tline, new_tline_id, Some(lsn), &ctx)\n                .await?;\n            tline = tenant\n                .get_timeline(new_tline_id, true)\n                .expect(\"Should have the branched timeline\");\n\n            for _ in 0..NUM_KEYS {\n                lsn = Lsn(lsn.0 + 0x10);\n                let blknum = rand::rng().random_range(0..NUM_KEYS);\n                test_key.field6 = blknum as u32;\n                let mut writer = tline.writer().await;\n                writer\n                    .put(\n                        test_key,\n                        lsn,\n                        &Value::Image(test_img(&format!(\"{blknum} at {lsn}\"))),\n                        &ctx,\n                    )\n                    .await?;\n                println!(\"updating {blknum} at {lsn}\");\n                writer.finish_write(lsn);\n                drop(writer);\n                updated[blknum] = lsn;\n            }\n\n            // Read all the blocks\n            for (blknum, last_lsn) in updated.iter().enumerate() {\n                test_key.field6 = blknum as u32;\n                assert_eq!(\n                    tline.get(test_key, lsn, &ctx).await?,\n                    test_img(&format!(\"{blknum} at {last_lsn}\"))\n                );\n            }\n\n            // Perform a cycle of flush, compact, and GC\n            tline.freeze_and_flush().await?;\n            tline.compact(&cancel, EnumSet::default(), &ctx).await?;\n            tenant\n                .gc_iteration(Some(tline.timeline_id), 0, Duration::ZERO, &cancel, &ctx)\n                .await?;\n        }\n\n        Ok(())\n    }\n\n    #[tokio::test]\n    async fn test_traverse_ancestors() -> anyhow::Result<()> {\n        let (tenant, ctx) = TenantHarness::create(\"test_traverse_ancestors\")\n            .await?\n            .load()\n            .await;\n        let mut tline = tenant\n            .create_test_timeline(TIMELINE_ID, Lsn(0x10), DEFAULT_PG_VERSION, &ctx)\n            .await?;\n\n        const NUM_KEYS: usize = 100;\n        const NUM_TLINES: usize = 50;\n\n        let mut test_key = Key::from_hex(\"010000000033333333444444445500000000\").unwrap();\n        // Track page mutation lsns across different timelines.\n        let mut updated = [[Lsn(0); NUM_KEYS]; NUM_TLINES];\n\n        let mut lsn = Lsn(0x10);\n\n        #[allow(clippy::needless_range_loop)]\n        for idx in 0..NUM_TLINES {\n            let new_tline_id = TimelineId::generate();\n            tenant\n                .branch_timeline_test(&tline, new_tline_id, Some(lsn), &ctx)\n                .await?;\n            tline = tenant\n                .get_timeline(new_tline_id, true)\n                .expect(\"Should have the branched timeline\");\n\n            for _ in 0..NUM_KEYS {\n                lsn = Lsn(lsn.0 + 0x10);\n                let blknum = rand::rng().random_range(0..NUM_KEYS);\n                test_key.field6 = blknum as u32;\n                let mut writer = tline.writer().await;\n                writer\n                    .put(\n                        test_key,\n                        lsn,\n                        &Value::Image(test_img(&format!(\"{idx} {blknum} at {lsn}\"))),\n                        &ctx,\n                    )\n                    .await?;\n                println!(\"updating [{idx}][{blknum}] at {lsn}\");\n                writer.finish_write(lsn);\n                drop(writer);\n                updated[idx][blknum] = lsn;\n            }\n        }\n\n        // Read pages from leaf timeline across all ancestors.\n        for (idx, lsns) in updated.iter().enumerate() {\n            for (blknum, lsn) in lsns.iter().enumerate() {\n                // Skip empty mutations.\n                if lsn.0 == 0 {\n                    continue;\n                }\n                println!(\"checking [{idx}][{blknum}] at {lsn}\");\n                test_key.field6 = blknum as u32;\n                assert_eq!(\n                    tline.get(test_key, *lsn, &ctx).await?,\n                    test_img(&format!(\"{idx} {blknum} at {lsn}\"))\n                );\n            }\n        }\n        Ok(())\n    }\n\n    #[tokio::test]\n    async fn test_write_at_initdb_lsn_takes_optimization_code_path() -> anyhow::Result<()> {\n        let (tenant, ctx) = TenantHarness::create(\"test_empty_test_timeline_is_usable\")\n            .await?\n            .load()\n            .await;\n\n        let initdb_lsn = Lsn(0x20);\n        let (utline, ctx) = tenant\n            .create_empty_timeline(TIMELINE_ID, initdb_lsn, DEFAULT_PG_VERSION, &ctx)\n            .await?;\n        let tline = utline.raw_timeline().unwrap();\n\n        // Spawn flush loop now so that we can set the `expect_initdb_optimization`\n        tline.maybe_spawn_flush_loop();\n\n        // Make sure the timeline has the minimum set of required keys for operation.\n        // The only operation you can always do on an empty timeline is to `put` new data.\n        // Except if you `put` at `initdb_lsn`.\n        // In that case, there's an optimization to directly create image layers instead of delta layers.\n        // It uses `repartition()`, which assumes some keys to be present.\n        // Let's make sure the test timeline can handle that case.\n        {\n            let mut state = tline.flush_loop_state.lock().unwrap();\n            assert_eq!(\n                timeline::FlushLoopState::Running {\n                    expect_initdb_optimization: false,\n                    initdb_optimization_count: 0,\n                },\n                *state\n            );\n            *state = timeline::FlushLoopState::Running {\n                expect_initdb_optimization: true,\n                initdb_optimization_count: 0,\n            };\n        }\n\n        // Make writes at the initdb_lsn. When we flush it below, it should be handled by the optimization.\n        // As explained above, the optimization requires some keys to be present.\n        // As per `create_empty_timeline` documentation, use init_empty to set them.\n        // This is what `create_test_timeline` does, by the way.\n        let mut modification = tline.begin_modification(initdb_lsn);\n        modification\n            .init_empty_test_timeline()\n            .context(\"init_empty_test_timeline\")?;\n        modification\n            .commit(&ctx)\n            .await\n            .context(\"commit init_empty_test_timeline modification\")?;\n\n        // Do the flush. The flush code will check the expectations that we set above.\n        tline.freeze_and_flush().await?;\n\n        // assert freeze_and_flush exercised the initdb optimization\n        {\n            let state = tline.flush_loop_state.lock().unwrap();\n            let timeline::FlushLoopState::Running {\n                expect_initdb_optimization,\n                initdb_optimization_count,\n            } = *state\n            else {\n                panic!(\"unexpected state: {:?}\", *state);\n            };\n            assert!(expect_initdb_optimization);\n            assert!(initdb_optimization_count > 0);\n        }\n        Ok(())\n    }\n\n    #[tokio::test]\n    async fn test_create_guard_crash() -> anyhow::Result<()> {\n        let name = \"test_create_guard_crash\";\n        let harness = TenantHarness::create(name).await?;\n        {\n            let (tenant, ctx) = harness.load().await;\n            let (tline, _ctx) = tenant\n                .create_empty_timeline(TIMELINE_ID, Lsn(0), DEFAULT_PG_VERSION, &ctx)\n                .await?;\n            // Leave the timeline ID in [`TenantShard::timelines_creating`] to exclude attempting to create it again\n            let raw_tline = tline.raw_timeline().unwrap();\n            raw_tline\n                .shutdown(super::timeline::ShutdownMode::Hard)\n                .instrument(info_span!(\"test_shutdown\", tenant_id=%raw_tline.tenant_shard_id, shard_id=%raw_tline.tenant_shard_id.shard_slug(), timeline_id=%TIMELINE_ID))\n                .await;\n            std::mem::forget(tline);\n        }\n\n        let (tenant, _) = harness.load().await;\n        match tenant.get_timeline(TIMELINE_ID, false) {\n            Ok(_) => panic!(\"timeline should've been removed during load\"),\n            Err(e) => {\n                assert_eq!(\n                    e,\n                    GetTimelineError::NotFound {\n                        tenant_id: tenant.tenant_shard_id,\n                        timeline_id: TIMELINE_ID,\n                    }\n                )\n            }\n        }\n\n        assert!(\n            !harness\n                .conf\n                .timeline_path(&tenant.tenant_shard_id, &TIMELINE_ID)\n                .exists()\n        );\n\n        Ok(())\n    }\n\n    #[tokio::test]\n    async fn test_read_at_max_lsn() -> anyhow::Result<()> {\n        let names_algorithms = [\n            (\"test_read_at_max_lsn_legacy\", CompactionAlgorithm::Legacy),\n            (\"test_read_at_max_lsn_tiered\", CompactionAlgorithm::Tiered),\n        ];\n        for (name, algorithm) in names_algorithms {\n            test_read_at_max_lsn_algorithm(name, algorithm).await?;\n        }\n        Ok(())\n    }\n\n    async fn test_read_at_max_lsn_algorithm(\n        name: &'static str,\n        compaction_algorithm: CompactionAlgorithm,\n    ) -> anyhow::Result<()> {\n        let mut harness = TenantHarness::create(name).await?;\n        harness.tenant_conf.compaction_algorithm = Some(CompactionAlgorithmSettings {\n            kind: compaction_algorithm,\n        });\n        let (tenant, ctx) = harness.load().await;\n        let tline = tenant\n            .create_test_timeline(TIMELINE_ID, Lsn(0x08), DEFAULT_PG_VERSION, &ctx)\n            .await?;\n\n        let lsn = Lsn(0x10);\n        let compact = false;\n        bulk_insert_maybe_compact_gc(&tenant, &tline, &ctx, lsn, 50, 10000, compact).await?;\n\n        let test_key = Key::from_hex(\"010000000033333333444444445500000000\").unwrap();\n        let read_lsn = Lsn(u64::MAX - 1);\n\n        let result = tline.get(test_key, read_lsn, &ctx).await;\n        assert!(result.is_ok(), \"result is not Ok: {}\", result.unwrap_err());\n\n        Ok(())\n    }\n\n    #[tokio::test]\n    async fn test_metadata_scan() -> anyhow::Result<()> {\n        let harness = TenantHarness::create(\"test_metadata_scan\").await?;\n        let (tenant, ctx) = harness.load().await;\n        let io_concurrency = IoConcurrency::spawn_for_test();\n        let tline = tenant\n            .create_test_timeline(TIMELINE_ID, Lsn(0x10), DEFAULT_PG_VERSION, &ctx)\n            .await?;\n\n        const NUM_KEYS: usize = 1000;\n        const STEP: usize = 10000; // random update + scan base_key + idx * STEP\n\n        let cancel = CancellationToken::new();\n\n        let mut base_key = Key::from_hex(\"000000000033333333444444445500000000\").unwrap();\n        base_key.field1 = AUX_KEY_PREFIX;\n        let mut test_key = base_key;\n\n        // Track when each page was last modified. Used to assert that\n        // a read sees the latest page version.\n        let mut updated = [Lsn(0); NUM_KEYS];\n\n        let mut lsn = Lsn(0x10);\n        #[allow(clippy::needless_range_loop)]\n        for blknum in 0..NUM_KEYS {\n            lsn = Lsn(lsn.0 + 0x10);\n            test_key.field6 = (blknum * STEP) as u32;\n            let mut writer = tline.writer().await;\n            writer\n                .put(\n                    test_key,\n                    lsn,\n                    &Value::Image(test_img(&format!(\"{blknum} at {lsn}\"))),\n                    &ctx,\n                )\n                .await?;\n            writer.finish_write(lsn);\n            updated[blknum] = lsn;\n            drop(writer);\n        }\n\n        let keyspace = KeySpace::single(base_key..base_key.add((NUM_KEYS * STEP) as u32));\n\n        for iter in 0..=10 {\n            // Read all the blocks\n            for (blknum, last_lsn) in updated.iter().enumerate() {\n                test_key.field6 = (blknum * STEP) as u32;\n                assert_eq!(\n                    tline.get(test_key, lsn, &ctx).await?,\n                    test_img(&format!(\"{blknum} at {last_lsn}\"))\n                );\n            }\n\n            let mut cnt = 0;\n            let query = VersionedKeySpaceQuery::uniform(keyspace.clone(), lsn);\n\n            for (key, value) in tline\n                .get_vectored_impl(\n                    query,\n                    &mut ValuesReconstructState::new(io_concurrency.clone()),\n                    &ctx,\n                )\n                .await?\n            {\n                let blknum = key.field6 as usize;\n                let value = value?;\n                assert!(blknum % STEP == 0);\n                let blknum = blknum / STEP;\n                assert_eq!(\n                    value,\n                    test_img(&format!(\"{} at {}\", blknum, updated[blknum]))\n                );\n                cnt += 1;\n            }\n\n            assert_eq!(cnt, NUM_KEYS);\n\n            for _ in 0..NUM_KEYS {\n                lsn = Lsn(lsn.0 + 0x10);\n                let blknum = rand::rng().random_range(0..NUM_KEYS);\n                test_key.field6 = (blknum * STEP) as u32;\n                let mut writer = tline.writer().await;\n                writer\n                    .put(\n                        test_key,\n                        lsn,\n                        &Value::Image(test_img(&format!(\"{blknum} at {lsn}\"))),\n                        &ctx,\n                    )\n                    .await?;\n                writer.finish_write(lsn);\n                drop(writer);\n                updated[blknum] = lsn;\n            }\n\n            // Perform two cycles of flush, compact, and GC\n            for round in 0..2 {\n                tline.freeze_and_flush().await?;\n                tline\n                    .compact(\n                        &cancel,\n                        if iter % 5 == 0 && round == 0 {\n                            let mut flags = EnumSet::new();\n                            flags.insert(CompactFlags::ForceImageLayerCreation);\n                            flags.insert(CompactFlags::ForceRepartition);\n                            flags\n                        } else {\n                            EnumSet::empty()\n                        },\n                        &ctx,\n                    )\n                    .await?;\n                tenant\n                    .gc_iteration(Some(tline.timeline_id), 0, Duration::ZERO, &cancel, &ctx)\n                    .await?;\n            }\n        }\n\n        Ok(())\n    }\n\n    #[tokio::test]\n    async fn test_metadata_compaction_trigger() -> anyhow::Result<()> {\n        let harness = TenantHarness::create(\"test_metadata_compaction_trigger\").await?;\n        let (tenant, ctx) = harness.load().await;\n        let tline = tenant\n            .create_test_timeline(TIMELINE_ID, Lsn(0x10), DEFAULT_PG_VERSION, &ctx)\n            .await?;\n\n        let cancel = CancellationToken::new();\n\n        let mut base_key = Key::from_hex(\"000000000033333333444444445500000000\").unwrap();\n        base_key.field1 = AUX_KEY_PREFIX;\n        let test_key = base_key;\n        let mut lsn = Lsn(0x10);\n\n        for _ in 0..20 {\n            lsn = Lsn(lsn.0 + 0x10);\n            let mut writer = tline.writer().await;\n            writer\n                .put(\n                    test_key,\n                    lsn,\n                    &Value::Image(test_img(&format!(\"{} at {}\", 0, lsn))),\n                    &ctx,\n                )\n                .await?;\n            writer.finish_write(lsn);\n            drop(writer);\n            tline.freeze_and_flush().await?; // force create a delta layer\n        }\n\n        let before_num_l0_delta_files = tline\n            .layers\n            .read(LayerManagerLockHolder::Testing)\n            .await\n            .layer_map()?\n            .level0_deltas()\n            .len();\n\n        tline.compact(&cancel, EnumSet::default(), &ctx).await?;\n\n        let after_num_l0_delta_files = tline\n            .layers\n            .read(LayerManagerLockHolder::Testing)\n            .await\n            .layer_map()?\n            .level0_deltas()\n            .len();\n\n        assert!(\n            after_num_l0_delta_files < before_num_l0_delta_files,\n            \"after_num_l0_delta_files={after_num_l0_delta_files}, before_num_l0_delta_files={before_num_l0_delta_files}\"\n        );\n\n        assert_eq!(\n            tline.get(test_key, lsn, &ctx).await?,\n            test_img(&format!(\"{} at {}\", 0, lsn))\n        );\n\n        Ok(())\n    }\n\n    #[tokio::test]\n    async fn test_aux_file_e2e() {\n        let harness = TenantHarness::create(\"test_aux_file_e2e\").await.unwrap();\n\n        let (tenant, ctx) = harness.load().await;\n        let io_concurrency = IoConcurrency::spawn_for_test();\n\n        let mut lsn = Lsn(0x08);\n\n        let tline: Arc<Timeline> = tenant\n            .create_test_timeline(TIMELINE_ID, lsn, DEFAULT_PG_VERSION, &ctx)\n            .await\n            .unwrap();\n\n        {\n            lsn += 8;\n            let mut modification = tline.begin_modification(lsn);\n            modification\n                .put_file(\"pg_logical/mappings/test1\", b\"first\", &ctx)\n                .await\n                .unwrap();\n            modification.commit(&ctx).await.unwrap();\n        }\n\n        // we can read everything from the storage\n        let files = tline\n            .list_aux_files(lsn, &ctx, io_concurrency.clone())\n            .await\n            .unwrap();\n        assert_eq!(\n            files.get(\"pg_logical/mappings/test1\"),\n            Some(&bytes::Bytes::from_static(b\"first\"))\n        );\n\n        {\n            lsn += 8;\n            let mut modification = tline.begin_modification(lsn);\n            modification\n                .put_file(\"pg_logical/mappings/test2\", b\"second\", &ctx)\n                .await\n                .unwrap();\n            modification.commit(&ctx).await.unwrap();\n        }\n\n        let files = tline\n            .list_aux_files(lsn, &ctx, io_concurrency.clone())\n            .await\n            .unwrap();\n        assert_eq!(\n            files.get(\"pg_logical/mappings/test2\"),\n            Some(&bytes::Bytes::from_static(b\"second\"))\n        );\n\n        let child = tenant\n            .branch_timeline_test(&tline, NEW_TIMELINE_ID, Some(lsn), &ctx)\n            .await\n            .unwrap();\n\n        let files = child\n            .list_aux_files(lsn, &ctx, io_concurrency.clone())\n            .await\n            .unwrap();\n        assert_eq!(files.get(\"pg_logical/mappings/test1\"), None);\n        assert_eq!(files.get(\"pg_logical/mappings/test2\"), None);\n    }\n\n    #[tokio::test]\n    async fn test_repl_origin_tombstones() {\n        let harness = TenantHarness::create(\"test_repl_origin_tombstones\")\n            .await\n            .unwrap();\n\n        let (tenant, ctx) = harness.load().await;\n        let io_concurrency = IoConcurrency::spawn_for_test();\n\n        let mut lsn = Lsn(0x08);\n\n        let tline: Arc<Timeline> = tenant\n            .create_test_timeline(TIMELINE_ID, lsn, DEFAULT_PG_VERSION, &ctx)\n            .await\n            .unwrap();\n\n        let repl_lsn = Lsn(0x10);\n        {\n            lsn += 8;\n            let mut modification = tline.begin_modification(lsn);\n            modification.put_for_unit_test(repl_origin_key(2), Value::Image(Bytes::new()));\n            modification.set_replorigin(1, repl_lsn).await.unwrap();\n            modification.commit(&ctx).await.unwrap();\n        }\n\n        // we can read everything from the storage\n        let repl_origins = tline\n            .get_replorigins(lsn, &ctx, io_concurrency.clone())\n            .await\n            .unwrap();\n        assert_eq!(repl_origins.len(), 1);\n        assert_eq!(repl_origins[&1], lsn);\n\n        {\n            lsn += 8;\n            let mut modification = tline.begin_modification(lsn);\n            modification.put_for_unit_test(\n                repl_origin_key(3),\n                Value::Image(Bytes::copy_from_slice(b\"cannot_decode_this\")),\n            );\n            modification.commit(&ctx).await.unwrap();\n        }\n        let result = tline\n            .get_replorigins(lsn, &ctx, io_concurrency.clone())\n            .await;\n        assert!(result.is_err());\n    }\n\n    #[tokio::test]\n    async fn test_metadata_image_creation() -> anyhow::Result<()> {\n        let harness = TenantHarness::create(\"test_metadata_image_creation\").await?;\n        let (tenant, ctx) = harness.load().await;\n        let io_concurrency = IoConcurrency::spawn_for_test();\n        let tline = tenant\n            .create_test_timeline(TIMELINE_ID, Lsn(0x10), DEFAULT_PG_VERSION, &ctx)\n            .await?;\n\n        const NUM_KEYS: usize = 1000;\n        const STEP: usize = 10000; // random update + scan base_key + idx * STEP\n\n        let cancel = CancellationToken::new();\n\n        let base_key = Key::from_hex(\"620000000033333333444444445500000000\").unwrap();\n        assert_eq!(base_key.field1, AUX_KEY_PREFIX); // in case someone accidentally changed the prefix...\n        let mut test_key = base_key;\n        let mut lsn = Lsn(0x10);\n\n        async fn scan_with_statistics(\n            tline: &Timeline,\n            keyspace: &KeySpace,\n            lsn: Lsn,\n            ctx: &RequestContext,\n            io_concurrency: IoConcurrency,\n        ) -> anyhow::Result<(BTreeMap<Key, Result<Bytes, PageReconstructError>>, usize)> {\n            let mut reconstruct_state = ValuesReconstructState::new(io_concurrency);\n            let query = VersionedKeySpaceQuery::uniform(keyspace.clone(), lsn);\n            let res = tline\n                .get_vectored_impl(query, &mut reconstruct_state, ctx)\n                .await?;\n            Ok((res, reconstruct_state.get_delta_layers_visited() as usize))\n        }\n\n        for blknum in 0..NUM_KEYS {\n            lsn = Lsn(lsn.0 + 0x10);\n            test_key.field6 = (blknum * STEP) as u32;\n            let mut writer = tline.writer().await;\n            writer\n                .put(\n                    test_key,\n                    lsn,\n                    &Value::Image(test_img(&format!(\"{blknum} at {lsn}\"))),\n                    &ctx,\n                )\n                .await?;\n            writer.finish_write(lsn);\n            drop(writer);\n        }\n\n        let keyspace = KeySpace::single(base_key..base_key.add((NUM_KEYS * STEP) as u32));\n\n        for iter in 1..=10 {\n            for _ in 0..NUM_KEYS {\n                lsn = Lsn(lsn.0 + 0x10);\n                let blknum = rand::rng().random_range(0..NUM_KEYS);\n                test_key.field6 = (blknum * STEP) as u32;\n                let mut writer = tline.writer().await;\n                writer\n                    .put(\n                        test_key,\n                        lsn,\n                        &Value::Image(test_img(&format!(\"{blknum} at {lsn}\"))),\n                        &ctx,\n                    )\n                    .await?;\n                writer.finish_write(lsn);\n                drop(writer);\n            }\n\n            tline.freeze_and_flush().await?;\n            // Force layers to L1\n            tline\n                .compact(\n                    &cancel,\n                    {\n                        let mut flags = EnumSet::new();\n                        flags.insert(CompactFlags::ForceL0Compaction);\n                        flags\n                    },\n                    &ctx,\n                )\n                .await?;\n\n            if iter % 5 == 0 {\n                let scan_lsn = Lsn(lsn.0 + 1);\n                info!(\"scanning at {}\", scan_lsn);\n                let (_, before_delta_file_accessed) =\n                    scan_with_statistics(&tline, &keyspace, scan_lsn, &ctx, io_concurrency.clone())\n                        .await?;\n                tline\n                    .compact(\n                        &cancel,\n                        {\n                            let mut flags = EnumSet::new();\n                            flags.insert(CompactFlags::ForceImageLayerCreation);\n                            flags.insert(CompactFlags::ForceRepartition);\n                            flags.insert(CompactFlags::ForceL0Compaction);\n                            flags\n                        },\n                        &ctx,\n                    )\n                    .await?;\n                let (_, after_delta_file_accessed) =\n                    scan_with_statistics(&tline, &keyspace, scan_lsn, &ctx, io_concurrency.clone())\n                        .await?;\n                assert!(\n                    after_delta_file_accessed < before_delta_file_accessed,\n                    \"after_delta_file_accessed={after_delta_file_accessed}, before_delta_file_accessed={before_delta_file_accessed}\"\n                );\n                // Given that we already produced an image layer, there should be no delta layer needed for the scan, but still setting a low threshold there for unforeseen circumstances.\n                assert!(\n                    after_delta_file_accessed <= 2,\n                    \"after_delta_file_accessed={after_delta_file_accessed}\"\n                );\n            }\n        }\n\n        Ok(())\n    }\n\n    #[tokio::test]\n    async fn test_vectored_missing_data_key_reads() -> anyhow::Result<()> {\n        let harness = TenantHarness::create(\"test_vectored_missing_data_key_reads\").await?;\n        let (tenant, ctx) = harness.load().await;\n\n        let base_key = Key::from_hex(\"000000000033333333444444445500000000\").unwrap();\n        let base_key_child = Key::from_hex(\"000000000033333333444444445500000001\").unwrap();\n        let base_key_nonexist = Key::from_hex(\"000000000033333333444444445500000002\").unwrap();\n\n        let tline = tenant\n            .create_test_timeline_with_layers(\n                TIMELINE_ID,\n                Lsn(0x10),\n                DEFAULT_PG_VERSION,\n                &ctx,\n                Vec::new(), // in-memory layers\n                Vec::new(), // delta layers\n                vec![(Lsn(0x20), vec![(base_key, test_img(\"data key 1\"))])], // image layers\n                Lsn(0x20), // it's fine to not advance LSN to 0x30 while using 0x30 to get below because `get_vectored_impl` does not wait for LSN\n            )\n            .await?;\n        tline.add_extra_test_dense_keyspace(KeySpace::single(base_key..(base_key_nonexist.next())));\n\n        let child = tenant\n            .branch_timeline_test_with_layers(\n                &tline,\n                NEW_TIMELINE_ID,\n                Some(Lsn(0x20)),\n                &ctx,\n                Vec::new(), // delta layers\n                vec![(Lsn(0x30), vec![(base_key_child, test_img(\"data key 2\"))])], // image layers\n                Lsn(0x30),\n            )\n            .await\n            .unwrap();\n\n        let lsn = Lsn(0x30);\n\n        // test vectored get on parent timeline\n        assert_eq!(\n            get_vectored_impl_wrapper(&tline, base_key, lsn, &ctx).await?,\n            Some(test_img(\"data key 1\"))\n        );\n        assert!(\n            get_vectored_impl_wrapper(&tline, base_key_child, lsn, &ctx)\n                .await\n                .unwrap_err()\n                .is_missing_key_error()\n        );\n        assert!(\n            get_vectored_impl_wrapper(&tline, base_key_nonexist, lsn, &ctx)\n                .await\n                .unwrap_err()\n                .is_missing_key_error()\n        );\n\n        // test vectored get on child timeline\n        assert_eq!(\n            get_vectored_impl_wrapper(&child, base_key, lsn, &ctx).await?,\n            Some(test_img(\"data key 1\"))\n        );\n        assert_eq!(\n            get_vectored_impl_wrapper(&child, base_key_child, lsn, &ctx).await?,\n            Some(test_img(\"data key 2\"))\n        );\n        assert!(\n            get_vectored_impl_wrapper(&child, base_key_nonexist, lsn, &ctx)\n                .await\n                .unwrap_err()\n                .is_missing_key_error()\n        );\n\n        Ok(())\n    }\n\n    #[tokio::test]\n    async fn test_vectored_missing_metadata_key_reads() -> anyhow::Result<()> {\n        let harness = TenantHarness::create(\"test_vectored_missing_metadata_key_reads\").await?;\n        let (tenant, ctx) = harness.load().await;\n        let io_concurrency = IoConcurrency::spawn_for_test();\n\n        let base_key = Key::from_hex(\"620000000033333333444444445500000000\").unwrap();\n        let base_key_child = Key::from_hex(\"620000000033333333444444445500000001\").unwrap();\n        let base_key_nonexist = Key::from_hex(\"620000000033333333444444445500000002\").unwrap();\n        let base_key_overwrite = Key::from_hex(\"620000000033333333444444445500000003\").unwrap();\n\n        let base_inherited_key = Key::from_hex(\"610000000033333333444444445500000000\").unwrap();\n        let base_inherited_key_child =\n            Key::from_hex(\"610000000033333333444444445500000001\").unwrap();\n        let base_inherited_key_nonexist =\n            Key::from_hex(\"610000000033333333444444445500000002\").unwrap();\n        let base_inherited_key_overwrite =\n            Key::from_hex(\"610000000033333333444444445500000003\").unwrap();\n\n        assert_eq!(base_key.field1, AUX_KEY_PREFIX); // in case someone accidentally changed the prefix...\n        assert_eq!(base_inherited_key.field1, RELATION_SIZE_PREFIX);\n\n        let tline = tenant\n            .create_test_timeline_with_layers(\n                TIMELINE_ID,\n                Lsn(0x10),\n                DEFAULT_PG_VERSION,\n                &ctx,\n                Vec::new(), // in-memory layers\n                Vec::new(), // delta layers\n                vec![(\n                    Lsn(0x20),\n                    vec![\n                        (base_inherited_key, test_img(\"metadata inherited key 1\")),\n                        (\n                            base_inherited_key_overwrite,\n                            test_img(\"metadata key overwrite 1a\"),\n                        ),\n                        (base_key, test_img(\"metadata key 1\")),\n                        (base_key_overwrite, test_img(\"metadata key overwrite 1b\")),\n                    ],\n                )], // image layers\n                Lsn(0x20), // it's fine to not advance LSN to 0x30 while using 0x30 to get below because `get_vectored_impl` does not wait for LSN\n            )\n            .await?;\n\n        let child = tenant\n            .branch_timeline_test_with_layers(\n                &tline,\n                NEW_TIMELINE_ID,\n                Some(Lsn(0x20)),\n                &ctx,\n                Vec::new(), // delta layers\n                vec![(\n                    Lsn(0x30),\n                    vec![\n                        (\n                            base_inherited_key_child,\n                            test_img(\"metadata inherited key 2\"),\n                        ),\n                        (\n                            base_inherited_key_overwrite,\n                            test_img(\"metadata key overwrite 2a\"),\n                        ),\n                        (base_key_child, test_img(\"metadata key 2\")),\n                        (base_key_overwrite, test_img(\"metadata key overwrite 2b\")),\n                    ],\n                )], // image layers\n                Lsn(0x30),\n            )\n            .await\n            .unwrap();\n\n        let lsn = Lsn(0x30);\n\n        // test vectored get on parent timeline\n        assert_eq!(\n            get_vectored_impl_wrapper(&tline, base_key, lsn, &ctx).await?,\n            Some(test_img(\"metadata key 1\"))\n        );\n        assert_eq!(\n            get_vectored_impl_wrapper(&tline, base_key_child, lsn, &ctx).await?,\n            None\n        );\n        assert_eq!(\n            get_vectored_impl_wrapper(&tline, base_key_nonexist, lsn, &ctx).await?,\n            None\n        );\n        assert_eq!(\n            get_vectored_impl_wrapper(&tline, base_key_overwrite, lsn, &ctx).await?,\n            Some(test_img(\"metadata key overwrite 1b\"))\n        );\n        assert_eq!(\n            get_vectored_impl_wrapper(&tline, base_inherited_key, lsn, &ctx).await?,\n            Some(test_img(\"metadata inherited key 1\"))\n        );\n        assert_eq!(\n            get_vectored_impl_wrapper(&tline, base_inherited_key_child, lsn, &ctx).await?,\n            None\n        );\n        assert_eq!(\n            get_vectored_impl_wrapper(&tline, base_inherited_key_nonexist, lsn, &ctx).await?,\n            None\n        );\n        assert_eq!(\n            get_vectored_impl_wrapper(&tline, base_inherited_key_overwrite, lsn, &ctx).await?,\n            Some(test_img(\"metadata key overwrite 1a\"))\n        );\n\n        // test vectored get on child timeline\n        assert_eq!(\n            get_vectored_impl_wrapper(&child, base_key, lsn, &ctx).await?,\n            None\n        );\n        assert_eq!(\n            get_vectored_impl_wrapper(&child, base_key_child, lsn, &ctx).await?,\n            Some(test_img(\"metadata key 2\"))\n        );\n        assert_eq!(\n            get_vectored_impl_wrapper(&child, base_key_nonexist, lsn, &ctx).await?,\n            None\n        );\n        assert_eq!(\n            get_vectored_impl_wrapper(&child, base_inherited_key, lsn, &ctx).await?,\n            Some(test_img(\"metadata inherited key 1\"))\n        );\n        assert_eq!(\n            get_vectored_impl_wrapper(&child, base_inherited_key_child, lsn, &ctx).await?,\n            Some(test_img(\"metadata inherited key 2\"))\n        );\n        assert_eq!(\n            get_vectored_impl_wrapper(&child, base_inherited_key_nonexist, lsn, &ctx).await?,\n            None\n        );\n        assert_eq!(\n            get_vectored_impl_wrapper(&child, base_key_overwrite, lsn, &ctx).await?,\n            Some(test_img(\"metadata key overwrite 2b\"))\n        );\n        assert_eq!(\n            get_vectored_impl_wrapper(&child, base_inherited_key_overwrite, lsn, &ctx).await?,\n            Some(test_img(\"metadata key overwrite 2a\"))\n        );\n\n        // test vectored scan on parent timeline\n        let mut reconstruct_state = ValuesReconstructState::new(io_concurrency.clone());\n        let query =\n            VersionedKeySpaceQuery::uniform(KeySpace::single(Key::metadata_key_range()), lsn);\n        let res = tline\n            .get_vectored_impl(query, &mut reconstruct_state, &ctx)\n            .await?;\n\n        assert_eq!(\n            res.into_iter()\n                .map(|(k, v)| (k, v.unwrap()))\n                .collect::<Vec<_>>(),\n            vec![\n                (base_inherited_key, test_img(\"metadata inherited key 1\")),\n                (\n                    base_inherited_key_overwrite,\n                    test_img(\"metadata key overwrite 1a\")\n                ),\n                (base_key, test_img(\"metadata key 1\")),\n                (base_key_overwrite, test_img(\"metadata key overwrite 1b\")),\n            ]\n        );\n\n        // test vectored scan on child timeline\n        let mut reconstruct_state = ValuesReconstructState::new(io_concurrency.clone());\n        let query =\n            VersionedKeySpaceQuery::uniform(KeySpace::single(Key::metadata_key_range()), lsn);\n        let res = child\n            .get_vectored_impl(query, &mut reconstruct_state, &ctx)\n            .await?;\n\n        assert_eq!(\n            res.into_iter()\n                .map(|(k, v)| (k, v.unwrap()))\n                .collect::<Vec<_>>(),\n            vec![\n                (base_inherited_key, test_img(\"metadata inherited key 1\")),\n                (\n                    base_inherited_key_child,\n                    test_img(\"metadata inherited key 2\")\n                ),\n                (\n                    base_inherited_key_overwrite,\n                    test_img(\"metadata key overwrite 2a\")\n                ),\n                (base_key_child, test_img(\"metadata key 2\")),\n                (base_key_overwrite, test_img(\"metadata key overwrite 2b\")),\n            ]\n        );\n\n        Ok(())\n    }\n\n    async fn get_vectored_impl_wrapper(\n        tline: &Arc<Timeline>,\n        key: Key,\n        lsn: Lsn,\n        ctx: &RequestContext,\n    ) -> Result<Option<Bytes>, GetVectoredError> {\n        let io_concurrency = IoConcurrency::spawn_from_conf(\n            tline.conf.get_vectored_concurrent_io,\n            tline.gate.enter().unwrap(),\n        );\n        let mut reconstruct_state = ValuesReconstructState::new(io_concurrency);\n        let query = VersionedKeySpaceQuery::uniform(KeySpace::single(key..key.next()), lsn);\n        let mut res = tline\n            .get_vectored_impl(query, &mut reconstruct_state, ctx)\n            .await?;\n        Ok(res.pop_last().map(|(k, v)| {\n            assert_eq!(k, key);\n            v.unwrap()\n        }))\n    }\n\n    #[tokio::test]\n    async fn test_metadata_tombstone_reads() -> anyhow::Result<()> {\n        let harness = TenantHarness::create(\"test_metadata_tombstone_reads\").await?;\n        let (tenant, ctx) = harness.load().await;\n        let key0 = Key::from_hex(\"620000000033333333444444445500000000\").unwrap();\n        let key1 = Key::from_hex(\"620000000033333333444444445500000001\").unwrap();\n        let key2 = Key::from_hex(\"620000000033333333444444445500000002\").unwrap();\n        let key3 = Key::from_hex(\"620000000033333333444444445500000003\").unwrap();\n\n        // We emulate the situation that the compaction algorithm creates an image layer that removes the tombstones\n        // Lsn 0x30 key0, key3, no key1+key2\n        // Lsn 0x20 key1+key2 tomestones\n        // Lsn 0x10 key1 in image, key2 in delta\n        let tline = tenant\n            .create_test_timeline_with_layers(\n                TIMELINE_ID,\n                Lsn(0x10),\n                DEFAULT_PG_VERSION,\n                &ctx,\n                Vec::new(), // in-memory layers\n                // delta layers\n                vec![\n                    DeltaLayerTestDesc::new_with_inferred_key_range(\n                        Lsn(0x10)..Lsn(0x20),\n                        vec![(key2, Lsn(0x10), Value::Image(test_img(\"metadata key 2\")))],\n                    ),\n                    DeltaLayerTestDesc::new_with_inferred_key_range(\n                        Lsn(0x20)..Lsn(0x30),\n                        vec![(key1, Lsn(0x20), Value::Image(Bytes::new()))],\n                    ),\n                    DeltaLayerTestDesc::new_with_inferred_key_range(\n                        Lsn(0x20)..Lsn(0x30),\n                        vec![(key2, Lsn(0x20), Value::Image(Bytes::new()))],\n                    ),\n                ],\n                // image layers\n                vec![\n                    (Lsn(0x10), vec![(key1, test_img(\"metadata key 1\"))]),\n                    (\n                        Lsn(0x30),\n                        vec![\n                            (key0, test_img(\"metadata key 0\")),\n                            (key3, test_img(\"metadata key 3\")),\n                        ],\n                    ),\n                ],\n                Lsn(0x30),\n            )\n            .await?;\n\n        let lsn = Lsn(0x30);\n        let old_lsn = Lsn(0x20);\n\n        assert_eq!(\n            get_vectored_impl_wrapper(&tline, key0, lsn, &ctx).await?,\n            Some(test_img(\"metadata key 0\"))\n        );\n        assert_eq!(\n            get_vectored_impl_wrapper(&tline, key1, lsn, &ctx).await?,\n            None,\n        );\n        assert_eq!(\n            get_vectored_impl_wrapper(&tline, key2, lsn, &ctx).await?,\n            None,\n        );\n        assert_eq!(\n            get_vectored_impl_wrapper(&tline, key1, old_lsn, &ctx).await?,\n            Some(Bytes::new()),\n        );\n        assert_eq!(\n            get_vectored_impl_wrapper(&tline, key2, old_lsn, &ctx).await?,\n            Some(Bytes::new()),\n        );\n        assert_eq!(\n            get_vectored_impl_wrapper(&tline, key3, lsn, &ctx).await?,\n            Some(test_img(\"metadata key 3\"))\n        );\n\n        Ok(())\n    }\n\n    #[tokio::test]\n    async fn test_metadata_tombstone_image_creation() {\n        let harness = TenantHarness::create(\"test_metadata_tombstone_image_creation\")\n            .await\n            .unwrap();\n        let (tenant, ctx) = harness.load().await;\n        let io_concurrency = IoConcurrency::spawn_for_test();\n\n        let key0 = Key::from_hex(\"620000000033333333444444445500000000\").unwrap();\n        let key1 = Key::from_hex(\"620000000033333333444444445500000001\").unwrap();\n        let key2 = Key::from_hex(\"620000000033333333444444445500000002\").unwrap();\n        let key3 = Key::from_hex(\"620000000033333333444444445500000003\").unwrap();\n\n        let tline = tenant\n            .create_test_timeline_with_layers(\n                TIMELINE_ID,\n                Lsn(0x10),\n                DEFAULT_PG_VERSION,\n                &ctx,\n                Vec::new(), // in-memory layers\n                // delta layers\n                vec![\n                    DeltaLayerTestDesc::new_with_inferred_key_range(\n                        Lsn(0x10)..Lsn(0x20),\n                        vec![(key2, Lsn(0x10), Value::Image(test_img(\"metadata key 2\")))],\n                    ),\n                    DeltaLayerTestDesc::new_with_inferred_key_range(\n                        Lsn(0x20)..Lsn(0x30),\n                        vec![(key1, Lsn(0x20), Value::Image(Bytes::new()))],\n                    ),\n                    DeltaLayerTestDesc::new_with_inferred_key_range(\n                        Lsn(0x20)..Lsn(0x30),\n                        vec![(key2, Lsn(0x20), Value::Image(Bytes::new()))],\n                    ),\n                    DeltaLayerTestDesc::new_with_inferred_key_range(\n                        Lsn(0x30)..Lsn(0x40),\n                        vec![\n                            (key0, Lsn(0x30), Value::Image(test_img(\"metadata key 0\"))),\n                            (key3, Lsn(0x30), Value::Image(test_img(\"metadata key 3\"))),\n                        ],\n                    ),\n                ],\n                // image layers\n                vec![(Lsn(0x10), vec![(key1, test_img(\"metadata key 1\"))])],\n                Lsn(0x40),\n            )\n            .await\n            .unwrap();\n\n        let cancel = CancellationToken::new();\n\n        // Image layer creation happens on the disk_consistent_lsn so we need to force set it now.\n        tline.force_set_disk_consistent_lsn(Lsn(0x40));\n        tline\n            .compact(\n                &cancel,\n                {\n                    let mut flags = EnumSet::new();\n                    flags.insert(CompactFlags::ForceImageLayerCreation);\n                    flags.insert(CompactFlags::ForceRepartition);\n                    flags\n                },\n                &ctx,\n            )\n            .await\n            .unwrap();\n        // Image layers are created at repartition LSN\n        let images = tline\n            .inspect_image_layers(Lsn(0x40), &ctx, io_concurrency.clone())\n            .await\n            .unwrap()\n            .into_iter()\n            .filter(|(k, _)| k.is_metadata_key())\n            .collect::<Vec<_>>();\n        assert_eq!(images.len(), 2); // the image layer should only contain two existing keys, tombstones should be removed.\n    }\n\n    #[tokio::test]\n    async fn test_metadata_tombstone_empty_image_creation() {\n        let harness = TenantHarness::create(\"test_metadata_tombstone_empty_image_creation\")\n            .await\n            .unwrap();\n        let (tenant, ctx) = harness.load().await;\n        let io_concurrency = IoConcurrency::spawn_for_test();\n\n        let key1 = Key::from_hex(\"620000000033333333444444445500000001\").unwrap();\n        let key2 = Key::from_hex(\"620000000033333333444444445500000002\").unwrap();\n\n        let tline = tenant\n            .create_test_timeline_with_layers(\n                TIMELINE_ID,\n                Lsn(0x10),\n                DEFAULT_PG_VERSION,\n                &ctx,\n                Vec::new(), // in-memory layers\n                // delta layers\n                vec![\n                    DeltaLayerTestDesc::new_with_inferred_key_range(\n                        Lsn(0x10)..Lsn(0x20),\n                        vec![(key2, Lsn(0x10), Value::Image(test_img(\"metadata key 2\")))],\n                    ),\n                    DeltaLayerTestDesc::new_with_inferred_key_range(\n                        Lsn(0x20)..Lsn(0x30),\n                        vec![(key1, Lsn(0x20), Value::Image(Bytes::new()))],\n                    ),\n                    DeltaLayerTestDesc::new_with_inferred_key_range(\n                        Lsn(0x20)..Lsn(0x30),\n                        vec![(key2, Lsn(0x20), Value::Image(Bytes::new()))],\n                    ),\n                ],\n                // image layers\n                vec![(Lsn(0x10), vec![(key1, test_img(\"metadata key 1\"))])],\n                Lsn(0x30),\n            )\n            .await\n            .unwrap();\n\n        let cancel = CancellationToken::new();\n\n        tline\n            .compact(\n                &cancel,\n                {\n                    let mut flags = EnumSet::new();\n                    flags.insert(CompactFlags::ForceImageLayerCreation);\n                    flags.insert(CompactFlags::ForceRepartition);\n                    flags\n                },\n                &ctx,\n            )\n            .await\n            .unwrap();\n\n        // Image layers are created at last_record_lsn\n        let images = tline\n            .inspect_image_layers(Lsn(0x30), &ctx, io_concurrency.clone())\n            .await\n            .unwrap()\n            .into_iter()\n            .filter(|(k, _)| k.is_metadata_key())\n            .collect::<Vec<_>>();\n        assert_eq!(images.len(), 0); // the image layer should not contain tombstones, or it is not created\n    }\n\n    #[tokio::test]\n    async fn test_simple_bottom_most_compaction_images() -> anyhow::Result<()> {\n        let harness = TenantHarness::create(\"test_simple_bottom_most_compaction_images\").await?;\n        let (tenant, ctx) = harness.load().await;\n        let io_concurrency = IoConcurrency::spawn_for_test();\n\n        fn get_key(id: u32) -> Key {\n            // using aux key here b/c they are guaranteed to be inside `collect_keyspace`.\n            let mut key = Key::from_hex(\"620000000033333333444444445500000000\").unwrap();\n            key.field6 = id;\n            key\n        }\n\n        // We create\n        // - one bottom-most image layer,\n        // - a delta layer D1 crossing the GC horizon with data below and above the horizon,\n        // - a delta layer D2 crossing the GC horizon with data only below the horizon,\n        // - a delta layer D3 above the horizon.\n        //\n        //                             | D3 |\n        //  | D1 |\n        // -|    |-- gc horizon -----------------\n        //  |    |                | D2 |\n        // --------- img layer ------------------\n        //\n        // What we should expact from this compaction is:\n        //                             | D3 |\n        //  | Part of D1 |\n        // --------- img layer with D1+D2 at GC horizon------------------\n\n        // img layer at 0x10\n        let img_layer = (0..10)\n            .map(|id| (get_key(id), Bytes::from(format!(\"value {id}@0x10\"))))\n            .collect_vec();\n\n        let delta1 = vec![\n            (\n                get_key(1),\n                Lsn(0x20),\n                Value::Image(Bytes::from(\"value 1@0x20\")),\n            ),\n            (\n                get_key(2),\n                Lsn(0x30),\n                Value::Image(Bytes::from(\"value 2@0x30\")),\n            ),\n            (\n                get_key(3),\n                Lsn(0x40),\n                Value::Image(Bytes::from(\"value 3@0x40\")),\n            ),\n        ];\n        let delta2 = vec![\n            (\n                get_key(5),\n                Lsn(0x20),\n                Value::Image(Bytes::from(\"value 5@0x20\")),\n            ),\n            (\n                get_key(6),\n                Lsn(0x20),\n                Value::Image(Bytes::from(\"value 6@0x20\")),\n            ),\n        ];\n        let delta3 = vec![\n            (\n                get_key(8),\n                Lsn(0x48),\n                Value::Image(Bytes::from(\"value 8@0x48\")),\n            ),\n            (\n                get_key(9),\n                Lsn(0x48),\n                Value::Image(Bytes::from(\"value 9@0x48\")),\n            ),\n        ];\n\n        let tline = tenant\n            .create_test_timeline_with_layers(\n                TIMELINE_ID,\n                Lsn(0x10),\n                DEFAULT_PG_VERSION,\n                &ctx,\n                Vec::new(), // in-memory layers\n                vec![\n                    DeltaLayerTestDesc::new_with_inferred_key_range(Lsn(0x20)..Lsn(0x48), delta1),\n                    DeltaLayerTestDesc::new_with_inferred_key_range(Lsn(0x20)..Lsn(0x48), delta2),\n                    DeltaLayerTestDesc::new_with_inferred_key_range(Lsn(0x48)..Lsn(0x50), delta3),\n                ], // delta layers\n                vec![(Lsn(0x10), img_layer)], // image layers\n                Lsn(0x50),\n            )\n            .await?;\n        {\n            tline\n                .applied_gc_cutoff_lsn\n                .lock_for_write()\n                .store_and_unlock(Lsn(0x30))\n                .wait()\n                .await;\n            // Update GC info\n            let mut guard = tline.gc_info.write().unwrap();\n            guard.cutoffs.time = Some(Lsn(0x30));\n            guard.cutoffs.space = Lsn(0x30);\n        }\n\n        let expected_result = [\n            Bytes::from_static(b\"value 0@0x10\"),\n            Bytes::from_static(b\"value 1@0x20\"),\n            Bytes::from_static(b\"value 2@0x30\"),\n            Bytes::from_static(b\"value 3@0x40\"),\n            Bytes::from_static(b\"value 4@0x10\"),\n            Bytes::from_static(b\"value 5@0x20\"),\n            Bytes::from_static(b\"value 6@0x20\"),\n            Bytes::from_static(b\"value 7@0x10\"),\n            Bytes::from_static(b\"value 8@0x48\"),\n            Bytes::from_static(b\"value 9@0x48\"),\n        ];\n\n        for (idx, expected) in expected_result.iter().enumerate() {\n            assert_eq!(\n                tline\n                    .get(get_key(idx as u32), Lsn(0x50), &ctx)\n                    .await\n                    .unwrap(),\n                expected\n            );\n        }\n\n        let cancel = CancellationToken::new();\n        tline\n            .compact_with_gc(\n                &cancel,\n                CompactOptions::default_for_gc_compaction_unit_tests(),\n                &ctx,\n            )\n            .await\n            .unwrap();\n\n        for (idx, expected) in expected_result.iter().enumerate() {\n            assert_eq!(\n                tline\n                    .get(get_key(idx as u32), Lsn(0x50), &ctx)\n                    .await\n                    .unwrap(),\n                expected\n            );\n        }\n\n        // Check if the image layer at the GC horizon contains exactly what we want\n        let image_at_gc_horizon = tline\n            .inspect_image_layers(Lsn(0x30), &ctx, io_concurrency.clone())\n            .await\n            .unwrap()\n            .into_iter()\n            .filter(|(k, _)| k.is_metadata_key())\n            .collect::<Vec<_>>();\n\n        assert_eq!(image_at_gc_horizon.len(), 10);\n        let expected_result = [\n            Bytes::from_static(b\"value 0@0x10\"),\n            Bytes::from_static(b\"value 1@0x20\"),\n            Bytes::from_static(b\"value 2@0x30\"),\n            Bytes::from_static(b\"value 3@0x10\"),\n            Bytes::from_static(b\"value 4@0x10\"),\n            Bytes::from_static(b\"value 5@0x20\"),\n            Bytes::from_static(b\"value 6@0x20\"),\n            Bytes::from_static(b\"value 7@0x10\"),\n            Bytes::from_static(b\"value 8@0x10\"),\n            Bytes::from_static(b\"value 9@0x10\"),\n        ];\n        for idx in 0..10 {\n            assert_eq!(\n                image_at_gc_horizon[idx],\n                (get_key(idx as u32), expected_result[idx].clone())\n            );\n        }\n\n        // Check if old layers are removed / new layers have the expected LSN\n        let all_layers = inspect_and_sort(&tline, None).await;\n        assert_eq!(\n            all_layers,\n            vec![\n                // Image layer at GC horizon\n                PersistentLayerKey {\n                    key_range: Key::MIN..Key::MAX,\n                    lsn_range: Lsn(0x30)..Lsn(0x31),\n                    is_delta: false\n                },\n                // The delta layer below the horizon\n                PersistentLayerKey {\n                    key_range: get_key(3)..get_key(4),\n                    lsn_range: Lsn(0x30)..Lsn(0x48),\n                    is_delta: true\n                },\n                // The delta3 layer that should not be picked for the compaction\n                PersistentLayerKey {\n                    key_range: get_key(8)..get_key(10),\n                    lsn_range: Lsn(0x48)..Lsn(0x50),\n                    is_delta: true\n                }\n            ]\n        );\n\n        // increase GC horizon and compact again\n        {\n            tline\n                .applied_gc_cutoff_lsn\n                .lock_for_write()\n                .store_and_unlock(Lsn(0x40))\n                .wait()\n                .await;\n            // Update GC info\n            let mut guard = tline.gc_info.write().unwrap();\n            guard.cutoffs.time = Some(Lsn(0x40));\n            guard.cutoffs.space = Lsn(0x40);\n        }\n        tline\n            .compact_with_gc(\n                &cancel,\n                CompactOptions::default_for_gc_compaction_unit_tests(),\n                &ctx,\n            )\n            .await\n            .unwrap();\n\n        Ok(())\n    }\n\n    #[cfg(feature = \"testing\")]\n    #[tokio::test]\n    async fn test_neon_test_record() -> anyhow::Result<()> {\n        let harness = TenantHarness::create(\"test_neon_test_record\").await?;\n        let (tenant, ctx) = harness.load().await;\n\n        fn get_key(id: u32) -> Key {\n            // using aux key here b/c they are guaranteed to be inside `collect_keyspace`.\n            let mut key = Key::from_hex(\"620000000033333333444444445500000000\").unwrap();\n            key.field6 = id;\n            key\n        }\n\n        let delta1 = vec![\n            (\n                get_key(1),\n                Lsn(0x20),\n                Value::WalRecord(NeonWalRecord::wal_append(\",0x20\")),\n            ),\n            (\n                get_key(1),\n                Lsn(0x30),\n                Value::WalRecord(NeonWalRecord::wal_append(\",0x30\")),\n            ),\n            (get_key(2), Lsn(0x10), Value::Image(\"0x10\".into())),\n            (\n                get_key(2),\n                Lsn(0x20),\n                Value::WalRecord(NeonWalRecord::wal_append(\",0x20\")),\n            ),\n            (\n                get_key(2),\n                Lsn(0x30),\n                Value::WalRecord(NeonWalRecord::wal_append(\",0x30\")),\n            ),\n            (get_key(3), Lsn(0x10), Value::Image(\"0x10\".into())),\n            (\n                get_key(3),\n                Lsn(0x20),\n                Value::WalRecord(NeonWalRecord::wal_clear(\"c\")),\n            ),\n            (get_key(4), Lsn(0x10), Value::Image(\"0x10\".into())),\n            (\n                get_key(4),\n                Lsn(0x20),\n                Value::WalRecord(NeonWalRecord::wal_init(\"i\")),\n            ),\n            (\n                get_key(4),\n                Lsn(0x30),\n                Value::WalRecord(NeonWalRecord::wal_append_conditional(\"j\", \"i\")),\n            ),\n            (\n                get_key(5),\n                Lsn(0x20),\n                Value::WalRecord(NeonWalRecord::wal_init(\"1\")),\n            ),\n            (\n                get_key(5),\n                Lsn(0x30),\n                Value::WalRecord(NeonWalRecord::wal_append_conditional(\"j\", \"2\")),\n            ),\n        ];\n        let image1 = vec![(get_key(1), \"0x10\".into())];\n\n        let tline = tenant\n            .create_test_timeline_with_layers(\n                TIMELINE_ID,\n                Lsn(0x10),\n                DEFAULT_PG_VERSION,\n                &ctx,\n                Vec::new(), // in-memory layers\n                vec![DeltaLayerTestDesc::new_with_inferred_key_range(\n                    Lsn(0x10)..Lsn(0x40),\n                    delta1,\n                )], // delta layers\n                vec![(Lsn(0x10), image1)], // image layers\n                Lsn(0x50),\n            )\n            .await?;\n\n        assert_eq!(\n            tline.get(get_key(1), Lsn(0x50), &ctx).await?,\n            Bytes::from_static(b\"0x10,0x20,0x30\")\n        );\n        assert_eq!(\n            tline.get(get_key(2), Lsn(0x50), &ctx).await?,\n            Bytes::from_static(b\"0x10,0x20,0x30\")\n        );\n\n        // Need to remove the limit of \"Neon WAL redo requires base image\".\n\n        assert_eq!(\n            tline.get(get_key(3), Lsn(0x50), &ctx).await?,\n            Bytes::from_static(b\"c\")\n        );\n        assert_eq!(\n            tline.get(get_key(4), Lsn(0x50), &ctx).await?,\n            Bytes::from_static(b\"ij\")\n        );\n\n        // Manual testing required: currently, read errors will panic the process in debug mode. So we\n        // cannot enable this assertion in the unit test.\n        // assert!(tline.get(get_key(5), Lsn(0x50), &ctx).await.is_err());\n\n        Ok(())\n    }\n\n    #[tokio::test]\n    async fn test_lsn_lease() -> anyhow::Result<()> {\n        let (tenant, ctx) = TenantHarness::create(\"test_lsn_lease\")\n            .await\n            .unwrap()\n            .load()\n            .await;\n        // set a non-zero lease length to test the feature\n        tenant\n            .update_tenant_config(|mut conf| {\n                conf.lsn_lease_length = Some(LsnLease::DEFAULT_LENGTH);\n                Ok(conf)\n            })\n            .unwrap();\n\n        let key = Key::from_hex(\"010000000033333333444444445500000000\").unwrap();\n\n        let end_lsn = Lsn(0x100);\n        let image_layers = (0x20..=0x90)\n            .step_by(0x10)\n            .map(|n| (Lsn(n), vec![(key, test_img(&format!(\"data key at {n:x}\")))]))\n            .collect();\n\n        let timeline = tenant\n            .create_test_timeline_with_layers(\n                TIMELINE_ID,\n                Lsn(0x10),\n                DEFAULT_PG_VERSION,\n                &ctx,\n                Vec::new(), // in-memory layers\n                Vec::new(),\n                image_layers,\n                end_lsn,\n            )\n            .await?;\n\n        let leased_lsns = [0x30, 0x50, 0x70];\n        let mut leases = Vec::new();\n        leased_lsns.iter().for_each(|n| {\n            leases.push(\n                timeline\n                    .init_lsn_lease(Lsn(*n), timeline.get_lsn_lease_length(), &ctx)\n                    .expect(\"lease request should succeed\"),\n            );\n        });\n\n        let updated_lease_0 = timeline\n            .renew_lsn_lease(Lsn(leased_lsns[0]), Duration::from_secs(0), &ctx)\n            .expect(\"lease renewal should succeed\");\n        assert_eq!(\n            updated_lease_0.valid_until, leases[0].valid_until,\n            \" Renewing with shorter lease should not change the lease.\"\n        );\n\n        let updated_lease_1 = timeline\n            .renew_lsn_lease(\n                Lsn(leased_lsns[1]),\n                timeline.get_lsn_lease_length() * 2,\n                &ctx,\n            )\n            .expect(\"lease renewal should succeed\");\n        assert!(\n            updated_lease_1.valid_until > leases[1].valid_until,\n            \"Renewing with a long lease should renew lease with later expiration time.\"\n        );\n\n        // Force set disk consistent lsn so we can get the cutoff at `end_lsn`.\n        info!(\n            \"applied_gc_cutoff_lsn: {}\",\n            *timeline.get_applied_gc_cutoff_lsn()\n        );\n        timeline.force_set_disk_consistent_lsn(end_lsn);\n\n        let res = tenant\n            .gc_iteration(\n                Some(TIMELINE_ID),\n                0,\n                Duration::ZERO,\n                &CancellationToken::new(),\n                &ctx,\n            )\n            .await\n            .unwrap();\n\n        // Keeping everything <= Lsn(0x80) b/c leases:\n        // 0/10: initdb layer\n        // (0/20..=0/70).step_by(0x10): image layers added when creating the timeline.\n        assert_eq!(res.layers_needed_by_leases, 7);\n        // Keeping 0/90 b/c it is the latest layer.\n        assert_eq!(res.layers_not_updated, 1);\n        // Removed 0/80.\n        assert_eq!(res.layers_removed, 1);\n\n        // Make lease on a already GC-ed LSN.\n        // 0/80 does not have a valid lease + is below latest_gc_cutoff\n        assert!(Lsn(0x80) < *timeline.get_applied_gc_cutoff_lsn());\n        timeline\n            .init_lsn_lease(Lsn(0x80), timeline.get_lsn_lease_length(), &ctx)\n            .expect_err(\"lease request on GC-ed LSN should fail\");\n\n        // Should still be able to renew a currently valid lease\n        // Assumption: original lease to is still valid for 0/50.\n        // (use `Timeline::init_lsn_lease` for testing so it always does validation)\n        timeline\n            .init_lsn_lease(Lsn(leased_lsns[1]), timeline.get_lsn_lease_length(), &ctx)\n            .expect(\"lease renewal with validation should succeed\");\n\n        Ok(())\n    }\n\n    #[tokio::test]\n    async fn test_failed_flush_should_not_update_disk_consistent_lsn() -> anyhow::Result<()> {\n        //\n        // Setup\n        //\n        let harness = TenantHarness::create_custom(\n            \"test_failed_flush_should_not_upload_disk_consistent_lsn\",\n            pageserver_api::models::TenantConfig::default(),\n            TenantId::generate(),\n            ShardIdentity::new(ShardNumber(0), ShardCount(4), ShardStripeSize(128)).unwrap(),\n            Generation::new(1),\n        )\n        .await?;\n        let (tenant, ctx) = harness.load().await;\n\n        let timeline = tenant\n            .create_test_timeline(TIMELINE_ID, Lsn(0x10), DEFAULT_PG_VERSION, &ctx)\n            .await?;\n        assert_eq!(timeline.get_shard_identity().count, ShardCount(4));\n        let mut writer = timeline.writer().await;\n        writer\n            .put(\n                *TEST_KEY,\n                Lsn(0x20),\n                &Value::Image(test_img(\"foo at 0x20\")),\n                &ctx,\n            )\n            .await?;\n        writer.finish_write(Lsn(0x20));\n        drop(writer);\n        timeline.freeze_and_flush().await.unwrap();\n\n        timeline.remote_client.wait_completion().await.unwrap();\n        let disk_consistent_lsn = timeline.get_disk_consistent_lsn();\n        let remote_consistent_lsn = timeline.get_remote_consistent_lsn_projected();\n        assert_eq!(Some(disk_consistent_lsn), remote_consistent_lsn);\n\n        //\n        // Test\n        //\n\n        let mut writer = timeline.writer().await;\n        writer\n            .put(\n                *TEST_KEY,\n                Lsn(0x30),\n                &Value::Image(test_img(\"foo at 0x30\")),\n                &ctx,\n            )\n            .await?;\n        writer.finish_write(Lsn(0x30));\n        drop(writer);\n\n        fail::cfg(\n            \"flush-layer-before-update-remote-consistent-lsn\",\n            \"return()\",\n        )\n        .unwrap();\n\n        let flush_res = timeline.freeze_and_flush().await;\n        // if flush failed, the disk/remote consistent LSN should not be updated\n        assert!(flush_res.is_err());\n        assert_eq!(disk_consistent_lsn, timeline.get_disk_consistent_lsn());\n        assert_eq!(\n            remote_consistent_lsn,\n            timeline.get_remote_consistent_lsn_projected()\n        );\n\n        Ok(())\n    }\n\n    #[cfg(feature = \"testing\")]\n    #[tokio::test]\n    async fn test_simple_bottom_most_compaction_deltas_1() -> anyhow::Result<()> {\n        test_simple_bottom_most_compaction_deltas_helper(\n            \"test_simple_bottom_most_compaction_deltas_1\",\n            false,\n        )\n        .await\n    }\n\n    #[cfg(feature = \"testing\")]\n    #[tokio::test]\n    async fn test_simple_bottom_most_compaction_deltas_2() -> anyhow::Result<()> {\n        test_simple_bottom_most_compaction_deltas_helper(\n            \"test_simple_bottom_most_compaction_deltas_2\",\n            true,\n        )\n        .await\n    }\n\n    #[cfg(feature = \"testing\")]\n    async fn test_simple_bottom_most_compaction_deltas_helper(\n        test_name: &'static str,\n        use_delta_bottom_layer: bool,\n    ) -> anyhow::Result<()> {\n        let harness = TenantHarness::create(test_name).await?;\n        let (tenant, ctx) = harness.load().await;\n\n        fn get_key(id: u32) -> Key {\n            // using aux key here b/c they are guaranteed to be inside `collect_keyspace`.\n            let mut key = Key::from_hex(\"620000000033333333444444445500000000\").unwrap();\n            key.field6 = id;\n            key\n        }\n\n        // We create\n        // - one bottom-most image layer,\n        // - a delta layer D1 crossing the GC horizon with data below and above the horizon,\n        // - a delta layer D2 crossing the GC horizon with data only below the horizon,\n        // - a delta layer D3 above the horizon.\n        //\n        //                             | D3 |\n        //  | D1 |\n        // -|    |-- gc horizon -----------------\n        //  |    |                | D2 |\n        // --------- img layer ------------------\n        //\n        // What we should expact from this compaction is:\n        //                             | D3 |\n        //  | Part of D1 |\n        // --------- img layer with D1+D2 at GC horizon------------------\n\n        // img layer at 0x10\n        let img_layer = (0..10)\n            .map(|id| (get_key(id), Bytes::from(format!(\"value {id}@0x10\"))))\n            .collect_vec();\n        // or, delta layer at 0x10 if `use_delta_bottom_layer` is true\n        let delta4 = (0..10)\n            .map(|id| {\n                (\n                    get_key(id),\n                    Lsn(0x08),\n                    Value::WalRecord(NeonWalRecord::wal_init(format!(\"value {id}@0x10\"))),\n                )\n            })\n            .collect_vec();\n\n        let delta1 = vec![\n            (\n                get_key(1),\n                Lsn(0x20),\n                Value::WalRecord(NeonWalRecord::wal_append(\"@0x20\")),\n            ),\n            (\n                get_key(2),\n                Lsn(0x30),\n                Value::WalRecord(NeonWalRecord::wal_append(\"@0x30\")),\n            ),\n            (\n                get_key(3),\n                Lsn(0x28),\n                Value::WalRecord(NeonWalRecord::wal_append(\"@0x28\")),\n            ),\n            (\n                get_key(3),\n                Lsn(0x30),\n                Value::WalRecord(NeonWalRecord::wal_append(\"@0x30\")),\n            ),\n            (\n                get_key(3),\n                Lsn(0x40),\n                Value::WalRecord(NeonWalRecord::wal_append(\"@0x40\")),\n            ),\n        ];\n        let delta2 = vec![\n            (\n                get_key(5),\n                Lsn(0x20),\n                Value::WalRecord(NeonWalRecord::wal_append(\"@0x20\")),\n            ),\n            (\n                get_key(6),\n                Lsn(0x20),\n                Value::WalRecord(NeonWalRecord::wal_append(\"@0x20\")),\n            ),\n        ];\n        let delta3 = vec![\n            (\n                get_key(8),\n                Lsn(0x48),\n                Value::WalRecord(NeonWalRecord::wal_append(\"@0x48\")),\n            ),\n            (\n                get_key(9),\n                Lsn(0x48),\n                Value::WalRecord(NeonWalRecord::wal_append(\"@0x48\")),\n            ),\n        ];\n\n        let tline = if use_delta_bottom_layer {\n            tenant\n                .create_test_timeline_with_layers(\n                    TIMELINE_ID,\n                    Lsn(0x08),\n                    DEFAULT_PG_VERSION,\n                    &ctx,\n                    Vec::new(), // in-memory layers\n                    vec![\n                        DeltaLayerTestDesc::new_with_inferred_key_range(\n                            Lsn(0x08)..Lsn(0x10),\n                            delta4,\n                        ),\n                        DeltaLayerTestDesc::new_with_inferred_key_range(\n                            Lsn(0x20)..Lsn(0x48),\n                            delta1,\n                        ),\n                        DeltaLayerTestDesc::new_with_inferred_key_range(\n                            Lsn(0x20)..Lsn(0x48),\n                            delta2,\n                        ),\n                        DeltaLayerTestDesc::new_with_inferred_key_range(\n                            Lsn(0x48)..Lsn(0x50),\n                            delta3,\n                        ),\n                    ], // delta layers\n                    vec![],     // image layers\n                    Lsn(0x50),\n                )\n                .await?\n        } else {\n            tenant\n                .create_test_timeline_with_layers(\n                    TIMELINE_ID,\n                    Lsn(0x10),\n                    DEFAULT_PG_VERSION,\n                    &ctx,\n                    Vec::new(), // in-memory layers\n                    vec![\n                        DeltaLayerTestDesc::new_with_inferred_key_range(\n                            Lsn(0x10)..Lsn(0x48),\n                            delta1,\n                        ),\n                        DeltaLayerTestDesc::new_with_inferred_key_range(\n                            Lsn(0x10)..Lsn(0x48),\n                            delta2,\n                        ),\n                        DeltaLayerTestDesc::new_with_inferred_key_range(\n                            Lsn(0x48)..Lsn(0x50),\n                            delta3,\n                        ),\n                    ], // delta layers\n                    vec![(Lsn(0x10), img_layer)], // image layers\n                    Lsn(0x50),\n                )\n                .await?\n        };\n        {\n            tline\n                .applied_gc_cutoff_lsn\n                .lock_for_write()\n                .store_and_unlock(Lsn(0x30))\n                .wait()\n                .await;\n            // Update GC info\n            let mut guard = tline.gc_info.write().unwrap();\n            *guard = GcInfo {\n                retain_lsns: vec![],\n                cutoffs: GcCutoffs {\n                    time: Some(Lsn(0x30)),\n                    space: Lsn(0x30),\n                },\n                leases: Default::default(),\n                within_ancestor_pitr: false,\n            };\n        }\n\n        let expected_result = [\n            Bytes::from_static(b\"value 0@0x10\"),\n            Bytes::from_static(b\"value 1@0x10@0x20\"),\n            Bytes::from_static(b\"value 2@0x10@0x30\"),\n            Bytes::from_static(b\"value 3@0x10@0x28@0x30@0x40\"),\n            Bytes::from_static(b\"value 4@0x10\"),\n            Bytes::from_static(b\"value 5@0x10@0x20\"),\n            Bytes::from_static(b\"value 6@0x10@0x20\"),\n            Bytes::from_static(b\"value 7@0x10\"),\n            Bytes::from_static(b\"value 8@0x10@0x48\"),\n            Bytes::from_static(b\"value 9@0x10@0x48\"),\n        ];\n\n        let expected_result_at_gc_horizon = [\n            Bytes::from_static(b\"value 0@0x10\"),\n            Bytes::from_static(b\"value 1@0x10@0x20\"),\n            Bytes::from_static(b\"value 2@0x10@0x30\"),\n            Bytes::from_static(b\"value 3@0x10@0x28@0x30\"),\n            Bytes::from_static(b\"value 4@0x10\"),\n            Bytes::from_static(b\"value 5@0x10@0x20\"),\n            Bytes::from_static(b\"value 6@0x10@0x20\"),\n            Bytes::from_static(b\"value 7@0x10\"),\n            Bytes::from_static(b\"value 8@0x10\"),\n            Bytes::from_static(b\"value 9@0x10\"),\n        ];\n\n        for idx in 0..10 {\n            assert_eq!(\n                tline\n                    .get(get_key(idx as u32), Lsn(0x50), &ctx)\n                    .await\n                    .unwrap(),\n                &expected_result[idx]\n            );\n            assert_eq!(\n                tline\n                    .get(get_key(idx as u32), Lsn(0x30), &ctx)\n                    .await\n                    .unwrap(),\n                &expected_result_at_gc_horizon[idx]\n            );\n        }\n\n        let cancel = CancellationToken::new();\n        tline\n            .compact_with_gc(\n                &cancel,\n                CompactOptions::default_for_gc_compaction_unit_tests(),\n                &ctx,\n            )\n            .await\n            .unwrap();\n\n        for idx in 0..10 {\n            assert_eq!(\n                tline\n                    .get(get_key(idx as u32), Lsn(0x50), &ctx)\n                    .await\n                    .unwrap(),\n                &expected_result[idx]\n            );\n            assert_eq!(\n                tline\n                    .get(get_key(idx as u32), Lsn(0x30), &ctx)\n                    .await\n                    .unwrap(),\n                &expected_result_at_gc_horizon[idx]\n            );\n        }\n\n        // increase GC horizon and compact again\n        {\n            tline\n                .applied_gc_cutoff_lsn\n                .lock_for_write()\n                .store_and_unlock(Lsn(0x40))\n                .wait()\n                .await;\n            // Update GC info\n            let mut guard = tline.gc_info.write().unwrap();\n            guard.cutoffs.time = Some(Lsn(0x40));\n            guard.cutoffs.space = Lsn(0x40);\n        }\n        tline\n            .compact_with_gc(\n                &cancel,\n                CompactOptions::default_for_gc_compaction_unit_tests(),\n                &ctx,\n            )\n            .await\n            .unwrap();\n\n        Ok(())\n    }\n\n    #[cfg(feature = \"testing\")]\n    #[tokio::test]\n    async fn test_generate_key_retention() -> anyhow::Result<()> {\n        let harness = TenantHarness::create(\"test_generate_key_retention\").await?;\n        let (tenant, ctx) = harness.load().await;\n        let tline = tenant\n            .create_test_timeline(TIMELINE_ID, Lsn(0x10), DEFAULT_PG_VERSION, &ctx)\n            .await?;\n        tline.force_advance_lsn(Lsn(0x70));\n        let key = Key::from_hex(\"010000000033333333444444445500000000\").unwrap();\n        let history = vec![\n            (\n                key,\n                Lsn(0x10),\n                Value::WalRecord(NeonWalRecord::wal_init(\"0x10\")),\n            ),\n            (\n                key,\n                Lsn(0x20),\n                Value::WalRecord(NeonWalRecord::wal_append(\";0x20\")),\n            ),\n            (\n                key,\n                Lsn(0x30),\n                Value::WalRecord(NeonWalRecord::wal_append(\";0x30\")),\n            ),\n            (\n                key,\n                Lsn(0x40),\n                Value::WalRecord(NeonWalRecord::wal_append(\";0x40\")),\n            ),\n            (\n                key,\n                Lsn(0x50),\n                Value::WalRecord(NeonWalRecord::wal_append(\";0x50\")),\n            ),\n            (\n                key,\n                Lsn(0x60),\n                Value::WalRecord(NeonWalRecord::wal_append(\";0x60\")),\n            ),\n            (\n                key,\n                Lsn(0x70),\n                Value::WalRecord(NeonWalRecord::wal_append(\";0x70\")),\n            ),\n            (\n                key,\n                Lsn(0x80),\n                Value::Image(Bytes::copy_from_slice(\n                    b\"0x10;0x20;0x30;0x40;0x50;0x60;0x70;0x80\",\n                )),\n            ),\n            (\n                key,\n                Lsn(0x90),\n                Value::WalRecord(NeonWalRecord::wal_append(\";0x90\")),\n            ),\n        ];\n        let res = tline\n            .generate_key_retention(\n                key,\n                &history,\n                Lsn(0x60),\n                &[Lsn(0x20), Lsn(0x40), Lsn(0x50)],\n                3,\n                None,\n                true,\n            )\n            .await\n            .unwrap();\n        let expected_res = KeyHistoryRetention {\n            below_horizon: vec![\n                (\n                    Lsn(0x20),\n                    KeyLogAtLsn(vec![(\n                        Lsn(0x20),\n                        Value::Image(Bytes::from_static(b\"0x10;0x20\")),\n                    )]),\n                ),\n                (\n                    Lsn(0x40),\n                    KeyLogAtLsn(vec![\n                        (\n                            Lsn(0x30),\n                            Value::WalRecord(NeonWalRecord::wal_append(\";0x30\")),\n                        ),\n                        (\n                            Lsn(0x40),\n                            Value::WalRecord(NeonWalRecord::wal_append(\";0x40\")),\n                        ),\n                    ]),\n                ),\n                (\n                    Lsn(0x50),\n                    KeyLogAtLsn(vec![(\n                        Lsn(0x50),\n                        Value::Image(Bytes::copy_from_slice(b\"0x10;0x20;0x30;0x40;0x50\")),\n                    )]),\n                ),\n                (\n                    Lsn(0x60),\n                    KeyLogAtLsn(vec![(\n                        Lsn(0x60),\n                        Value::WalRecord(NeonWalRecord::wal_append(\";0x60\")),\n                    )]),\n                ),\n            ],\n            above_horizon: KeyLogAtLsn(vec![\n                (\n                    Lsn(0x70),\n                    Value::WalRecord(NeonWalRecord::wal_append(\";0x70\")),\n                ),\n                (\n                    Lsn(0x80),\n                    Value::Image(Bytes::copy_from_slice(\n                        b\"0x10;0x20;0x30;0x40;0x50;0x60;0x70;0x80\",\n                    )),\n                ),\n                (\n                    Lsn(0x90),\n                    Value::WalRecord(NeonWalRecord::wal_append(\";0x90\")),\n                ),\n            ]),\n        };\n        assert_eq!(res, expected_res);\n\n        // We expect GC-compaction to run with the original GC. This would create a situation that\n        // the original GC algorithm removes some delta layers b/c there are full image coverage,\n        // therefore causing some keys to have an incomplete history below the lowest retain LSN.\n        // For example, we have\n        // ```plain\n        // init delta @ 0x10, image @ 0x20, delta @ 0x30 (gc_horizon), image @ 0x40.\n        // ```\n        // Now the GC horizon moves up, and we have\n        // ```plain\n        // init delta @ 0x10, image @ 0x20, delta @ 0x30, image @ 0x40 (gc_horizon)\n        // ```\n        // The original GC algorithm kicks in, and removes delta @ 0x10, image @ 0x20.\n        // We will end up with\n        // ```plain\n        // delta @ 0x30, image @ 0x40 (gc_horizon)\n        // ```\n        // Now we run the GC-compaction, and this key does not have a full history.\n        // We should be able to handle this partial history and drop everything before the\n        // gc_horizon image.\n\n        let history = vec![\n            (\n                key,\n                Lsn(0x20),\n                Value::WalRecord(NeonWalRecord::wal_append(\";0x20\")),\n            ),\n            (\n                key,\n                Lsn(0x30),\n                Value::WalRecord(NeonWalRecord::wal_append(\";0x30\")),\n            ),\n            (\n                key,\n                Lsn(0x40),\n                Value::Image(Bytes::copy_from_slice(b\"0x10;0x20;0x30;0x40\")),\n            ),\n            (\n                key,\n                Lsn(0x50),\n                Value::WalRecord(NeonWalRecord::wal_append(\";0x50\")),\n            ),\n            (\n                key,\n                Lsn(0x60),\n                Value::WalRecord(NeonWalRecord::wal_append(\";0x60\")),\n            ),\n            (\n                key,\n                Lsn(0x70),\n                Value::WalRecord(NeonWalRecord::wal_append(\";0x70\")),\n            ),\n            (\n                key,\n                Lsn(0x80),\n                Value::Image(Bytes::copy_from_slice(\n                    b\"0x10;0x20;0x30;0x40;0x50;0x60;0x70;0x80\",\n                )),\n            ),\n            (\n                key,\n                Lsn(0x90),\n                Value::WalRecord(NeonWalRecord::wal_append(\";0x90\")),\n            ),\n        ];\n        let res = tline\n            .generate_key_retention(\n                key,\n                &history,\n                Lsn(0x60),\n                &[Lsn(0x40), Lsn(0x50)],\n                3,\n                None,\n                true,\n            )\n            .await\n            .unwrap();\n        let expected_res = KeyHistoryRetention {\n            below_horizon: vec![\n                (\n                    Lsn(0x40),\n                    KeyLogAtLsn(vec![(\n                        Lsn(0x40),\n                        Value::Image(Bytes::copy_from_slice(b\"0x10;0x20;0x30;0x40\")),\n                    )]),\n                ),\n                (\n                    Lsn(0x50),\n                    KeyLogAtLsn(vec![(\n                        Lsn(0x50),\n                        Value::WalRecord(NeonWalRecord::wal_append(\";0x50\")),\n                    )]),\n                ),\n                (\n                    Lsn(0x60),\n                    KeyLogAtLsn(vec![(\n                        Lsn(0x60),\n                        Value::WalRecord(NeonWalRecord::wal_append(\";0x60\")),\n                    )]),\n                ),\n            ],\n            above_horizon: KeyLogAtLsn(vec![\n                (\n                    Lsn(0x70),\n                    Value::WalRecord(NeonWalRecord::wal_append(\";0x70\")),\n                ),\n                (\n                    Lsn(0x80),\n                    Value::Image(Bytes::copy_from_slice(\n                        b\"0x10;0x20;0x30;0x40;0x50;0x60;0x70;0x80\",\n                    )),\n                ),\n                (\n                    Lsn(0x90),\n                    Value::WalRecord(NeonWalRecord::wal_append(\";0x90\")),\n                ),\n            ]),\n        };\n        assert_eq!(res, expected_res);\n\n        // In case of branch compaction, the branch itself does not have the full history, and we need to provide\n        // the ancestor image in the test case.\n\n        let history = vec![\n            (\n                key,\n                Lsn(0x20),\n                Value::WalRecord(NeonWalRecord::wal_append(\";0x20\")),\n            ),\n            (\n                key,\n                Lsn(0x30),\n                Value::WalRecord(NeonWalRecord::wal_append(\";0x30\")),\n            ),\n            (\n                key,\n                Lsn(0x40),\n                Value::WalRecord(NeonWalRecord::wal_append(\";0x40\")),\n            ),\n            (\n                key,\n                Lsn(0x70),\n                Value::WalRecord(NeonWalRecord::wal_append(\";0x70\")),\n            ),\n        ];\n        let res = tline\n            .generate_key_retention(\n                key,\n                &history,\n                Lsn(0x60),\n                &[],\n                3,\n                Some((key, Lsn(0x10), Bytes::copy_from_slice(b\"0x10\"))),\n                true,\n            )\n            .await\n            .unwrap();\n        let expected_res = KeyHistoryRetention {\n            below_horizon: vec![(\n                Lsn(0x60),\n                KeyLogAtLsn(vec![(\n                    Lsn(0x60),\n                    Value::Image(Bytes::copy_from_slice(b\"0x10;0x20;0x30;0x40\")), // use the ancestor image to reconstruct the page\n                )]),\n            )],\n            above_horizon: KeyLogAtLsn(vec![(\n                Lsn(0x70),\n                Value::WalRecord(NeonWalRecord::wal_append(\";0x70\")),\n            )]),\n        };\n        assert_eq!(res, expected_res);\n\n        let history = vec![\n            (\n                key,\n                Lsn(0x20),\n                Value::WalRecord(NeonWalRecord::wal_append(\";0x20\")),\n            ),\n            (\n                key,\n                Lsn(0x40),\n                Value::WalRecord(NeonWalRecord::wal_append(\";0x40\")),\n            ),\n            (\n                key,\n                Lsn(0x60),\n                Value::WalRecord(NeonWalRecord::wal_append(\";0x60\")),\n            ),\n            (\n                key,\n                Lsn(0x70),\n                Value::WalRecord(NeonWalRecord::wal_append(\";0x70\")),\n            ),\n        ];\n        let res = tline\n            .generate_key_retention(\n                key,\n                &history,\n                Lsn(0x60),\n                &[Lsn(0x30)],\n                3,\n                Some((key, Lsn(0x10), Bytes::copy_from_slice(b\"0x10\"))),\n                true,\n            )\n            .await\n            .unwrap();\n        let expected_res = KeyHistoryRetention {\n            below_horizon: vec![\n                (\n                    Lsn(0x30),\n                    KeyLogAtLsn(vec![(\n                        Lsn(0x20),\n                        Value::WalRecord(NeonWalRecord::wal_append(\";0x20\")),\n                    )]),\n                ),\n                (\n                    Lsn(0x60),\n                    KeyLogAtLsn(vec![(\n                        Lsn(0x60),\n                        Value::Image(Bytes::copy_from_slice(b\"0x10;0x20;0x40;0x60\")),\n                    )]),\n                ),\n            ],\n            above_horizon: KeyLogAtLsn(vec![(\n                Lsn(0x70),\n                Value::WalRecord(NeonWalRecord::wal_append(\";0x70\")),\n            )]),\n        };\n        assert_eq!(res, expected_res);\n\n        Ok(())\n    }\n\n    #[cfg(feature = \"testing\")]\n    #[tokio::test]\n    async fn test_simple_bottom_most_compaction_with_retain_lsns() -> anyhow::Result<()> {\n        let harness =\n            TenantHarness::create(\"test_simple_bottom_most_compaction_with_retain_lsns\").await?;\n        let (tenant, ctx) = harness.load().await;\n\n        fn get_key(id: u32) -> Key {\n            // using aux key here b/c they are guaranteed to be inside `collect_keyspace`.\n            let mut key = Key::from_hex(\"620000000033333333444444445500000000\").unwrap();\n            key.field6 = id;\n            key\n        }\n\n        let img_layer = (0..10)\n            .map(|id| (get_key(id), Bytes::from(format!(\"value {id}@0x10\"))))\n            .collect_vec();\n\n        let delta1 = vec![\n            (\n                get_key(1),\n                Lsn(0x20),\n                Value::WalRecord(NeonWalRecord::wal_append(\"@0x20\")),\n            ),\n            (\n                get_key(2),\n                Lsn(0x30),\n                Value::WalRecord(NeonWalRecord::wal_append(\"@0x30\")),\n            ),\n            (\n                get_key(3),\n                Lsn(0x28),\n                Value::WalRecord(NeonWalRecord::wal_append(\"@0x28\")),\n            ),\n            (\n                get_key(3),\n                Lsn(0x30),\n                Value::WalRecord(NeonWalRecord::wal_append(\"@0x30\")),\n            ),\n            (\n                get_key(3),\n                Lsn(0x40),\n                Value::WalRecord(NeonWalRecord::wal_append(\"@0x40\")),\n            ),\n        ];\n        let delta2 = vec![\n            (\n                get_key(5),\n                Lsn(0x20),\n                Value::WalRecord(NeonWalRecord::wal_append(\"@0x20\")),\n            ),\n            (\n                get_key(6),\n                Lsn(0x20),\n                Value::WalRecord(NeonWalRecord::wal_append(\"@0x20\")),\n            ),\n        ];\n        let delta3 = vec![\n            (\n                get_key(8),\n                Lsn(0x48),\n                Value::WalRecord(NeonWalRecord::wal_append(\"@0x48\")),\n            ),\n            (\n                get_key(9),\n                Lsn(0x48),\n                Value::WalRecord(NeonWalRecord::wal_append(\"@0x48\")),\n            ),\n        ];\n\n        let tline = tenant\n            .create_test_timeline_with_layers(\n                TIMELINE_ID,\n                Lsn(0x10),\n                DEFAULT_PG_VERSION,\n                &ctx,\n                Vec::new(), // in-memory layers\n                vec![\n                    DeltaLayerTestDesc::new_with_inferred_key_range(Lsn(0x10)..Lsn(0x48), delta1),\n                    DeltaLayerTestDesc::new_with_inferred_key_range(Lsn(0x10)..Lsn(0x48), delta2),\n                    DeltaLayerTestDesc::new_with_inferred_key_range(Lsn(0x48)..Lsn(0x50), delta3),\n                ], // delta layers\n                vec![(Lsn(0x10), img_layer)], // image layers\n                Lsn(0x50),\n            )\n            .await?;\n        {\n            tline\n                .applied_gc_cutoff_lsn\n                .lock_for_write()\n                .store_and_unlock(Lsn(0x30))\n                .wait()\n                .await;\n            // Update GC info\n            let mut guard = tline.gc_info.write().unwrap();\n            *guard = GcInfo {\n                retain_lsns: vec![\n                    (Lsn(0x10), tline.timeline_id, MaybeOffloaded::No),\n                    (Lsn(0x20), tline.timeline_id, MaybeOffloaded::No),\n                ],\n                cutoffs: GcCutoffs {\n                    time: Some(Lsn(0x30)),\n                    space: Lsn(0x30),\n                },\n                leases: Default::default(),\n                within_ancestor_pitr: false,\n            };\n        }\n\n        let expected_result = [\n            Bytes::from_static(b\"value 0@0x10\"),\n            Bytes::from_static(b\"value 1@0x10@0x20\"),\n            Bytes::from_static(b\"value 2@0x10@0x30\"),\n            Bytes::from_static(b\"value 3@0x10@0x28@0x30@0x40\"),\n            Bytes::from_static(b\"value 4@0x10\"),\n            Bytes::from_static(b\"value 5@0x10@0x20\"),\n            Bytes::from_static(b\"value 6@0x10@0x20\"),\n            Bytes::from_static(b\"value 7@0x10\"),\n            Bytes::from_static(b\"value 8@0x10@0x48\"),\n            Bytes::from_static(b\"value 9@0x10@0x48\"),\n        ];\n\n        let expected_result_at_gc_horizon = [\n            Bytes::from_static(b\"value 0@0x10\"),\n            Bytes::from_static(b\"value 1@0x10@0x20\"),\n            Bytes::from_static(b\"value 2@0x10@0x30\"),\n            Bytes::from_static(b\"value 3@0x10@0x28@0x30\"),\n            Bytes::from_static(b\"value 4@0x10\"),\n            Bytes::from_static(b\"value 5@0x10@0x20\"),\n            Bytes::from_static(b\"value 6@0x10@0x20\"),\n            Bytes::from_static(b\"value 7@0x10\"),\n            Bytes::from_static(b\"value 8@0x10\"),\n            Bytes::from_static(b\"value 9@0x10\"),\n        ];\n\n        let expected_result_at_lsn_20 = [\n            Bytes::from_static(b\"value 0@0x10\"),\n            Bytes::from_static(b\"value 1@0x10@0x20\"),\n            Bytes::from_static(b\"value 2@0x10\"),\n            Bytes::from_static(b\"value 3@0x10\"),\n            Bytes::from_static(b\"value 4@0x10\"),\n            Bytes::from_static(b\"value 5@0x10@0x20\"),\n            Bytes::from_static(b\"value 6@0x10@0x20\"),\n            Bytes::from_static(b\"value 7@0x10\"),\n            Bytes::from_static(b\"value 8@0x10\"),\n            Bytes::from_static(b\"value 9@0x10\"),\n        ];\n\n        let expected_result_at_lsn_10 = [\n            Bytes::from_static(b\"value 0@0x10\"),\n            Bytes::from_static(b\"value 1@0x10\"),\n            Bytes::from_static(b\"value 2@0x10\"),\n            Bytes::from_static(b\"value 3@0x10\"),\n            Bytes::from_static(b\"value 4@0x10\"),\n            Bytes::from_static(b\"value 5@0x10\"),\n            Bytes::from_static(b\"value 6@0x10\"),\n            Bytes::from_static(b\"value 7@0x10\"),\n            Bytes::from_static(b\"value 8@0x10\"),\n            Bytes::from_static(b\"value 9@0x10\"),\n        ];\n\n        let verify_result = || async {\n            let gc_horizon = {\n                let gc_info = tline.gc_info.read().unwrap();\n                gc_info.cutoffs.time.unwrap_or_default()\n            };\n            for idx in 0..10 {\n                assert_eq!(\n                    tline\n                        .get(get_key(idx as u32), Lsn(0x50), &ctx)\n                        .await\n                        .unwrap(),\n                    &expected_result[idx]\n                );\n                assert_eq!(\n                    tline\n                        .get(get_key(idx as u32), gc_horizon, &ctx)\n                        .await\n                        .unwrap(),\n                    &expected_result_at_gc_horizon[idx]\n                );\n                assert_eq!(\n                    tline\n                        .get(get_key(idx as u32), Lsn(0x20), &ctx)\n                        .await\n                        .unwrap(),\n                    &expected_result_at_lsn_20[idx]\n                );\n                assert_eq!(\n                    tline\n                        .get(get_key(idx as u32), Lsn(0x10), &ctx)\n                        .await\n                        .unwrap(),\n                    &expected_result_at_lsn_10[idx]\n                );\n            }\n        };\n\n        verify_result().await;\n\n        let cancel = CancellationToken::new();\n        let mut dryrun_flags = EnumSet::new();\n        dryrun_flags.insert(CompactFlags::DryRun);\n\n        tline\n            .compact_with_gc(\n                &cancel,\n                CompactOptions {\n                    flags: dryrun_flags,\n                    ..CompactOptions::default_for_gc_compaction_unit_tests()\n                },\n                &ctx,\n            )\n            .await\n            .unwrap();\n        // We expect layer map to be the same b/c the dry run flag, but we don't know whether there will be other background jobs\n        // cleaning things up, and therefore, we don't do sanity checks on the layer map during unit tests.\n        verify_result().await;\n\n        tline\n            .compact_with_gc(\n                &cancel,\n                CompactOptions::default_for_gc_compaction_unit_tests(),\n                &ctx,\n            )\n            .await\n            .unwrap();\n        verify_result().await;\n\n        // compact again\n        tline\n            .compact_with_gc(\n                &cancel,\n                CompactOptions::default_for_gc_compaction_unit_tests(),\n                &ctx,\n            )\n            .await\n            .unwrap();\n        verify_result().await;\n\n        // increase GC horizon and compact again\n        {\n            tline\n                .applied_gc_cutoff_lsn\n                .lock_for_write()\n                .store_and_unlock(Lsn(0x38))\n                .wait()\n                .await;\n            // Update GC info\n            let mut guard = tline.gc_info.write().unwrap();\n            guard.cutoffs.time = Some(Lsn(0x38));\n            guard.cutoffs.space = Lsn(0x38);\n        }\n        tline\n            .compact_with_gc(\n                &cancel,\n                CompactOptions::default_for_gc_compaction_unit_tests(),\n                &ctx,\n            )\n            .await\n            .unwrap();\n        verify_result().await; // no wals between 0x30 and 0x38, so we should obtain the same result\n\n        // not increasing the GC horizon and compact again\n        tline\n            .compact_with_gc(\n                &cancel,\n                CompactOptions::default_for_gc_compaction_unit_tests(),\n                &ctx,\n            )\n            .await\n            .unwrap();\n        verify_result().await;\n\n        Ok(())\n    }\n\n    #[cfg(feature = \"testing\")]\n    #[tokio::test]\n    async fn test_simple_bottom_most_compaction_with_retain_lsns_single_key() -> anyhow::Result<()>\n    {\n        let harness =\n            TenantHarness::create(\"test_simple_bottom_most_compaction_with_retain_lsns_single_key\")\n                .await?;\n        let (tenant, ctx) = harness.load().await;\n\n        fn get_key(id: u32) -> Key {\n            // using aux key here b/c they are guaranteed to be inside `collect_keyspace`.\n            let mut key = Key::from_hex(\"620000000033333333444444445500000000\").unwrap();\n            key.field6 = id;\n            key\n        }\n\n        let img_layer = (0..10)\n            .map(|id| (get_key(id), Bytes::from(format!(\"value {id}@0x10\"))))\n            .collect_vec();\n\n        let delta1 = vec![\n            (\n                get_key(1),\n                Lsn(0x20),\n                Value::WalRecord(NeonWalRecord::wal_append(\"@0x20\")),\n            ),\n            (\n                get_key(1),\n                Lsn(0x28),\n                Value::WalRecord(NeonWalRecord::wal_append(\"@0x28\")),\n            ),\n        ];\n        let delta2 = vec![\n            (\n                get_key(1),\n                Lsn(0x30),\n                Value::WalRecord(NeonWalRecord::wal_append(\"@0x30\")),\n            ),\n            (\n                get_key(1),\n                Lsn(0x38),\n                Value::WalRecord(NeonWalRecord::wal_append(\"@0x38\")),\n            ),\n        ];\n        let delta3 = vec![\n            (\n                get_key(8),\n                Lsn(0x48),\n                Value::WalRecord(NeonWalRecord::wal_append(\"@0x48\")),\n            ),\n            (\n                get_key(9),\n                Lsn(0x48),\n                Value::WalRecord(NeonWalRecord::wal_append(\"@0x48\")),\n            ),\n        ];\n\n        let tline = tenant\n            .create_test_timeline_with_layers(\n                TIMELINE_ID,\n                Lsn(0x10),\n                DEFAULT_PG_VERSION,\n                &ctx,\n                Vec::new(), // in-memory layers\n                vec![\n                    // delta1 and delta 2 only contain a single key but multiple updates\n                    DeltaLayerTestDesc::new_with_inferred_key_range(Lsn(0x10)..Lsn(0x30), delta1),\n                    DeltaLayerTestDesc::new_with_inferred_key_range(Lsn(0x30)..Lsn(0x50), delta2),\n                    DeltaLayerTestDesc::new_with_inferred_key_range(Lsn(0x10)..Lsn(0x50), delta3),\n                ], // delta layers\n                vec![(Lsn(0x10), img_layer)], // image layers\n                Lsn(0x50),\n            )\n            .await?;\n        {\n            tline\n                .applied_gc_cutoff_lsn\n                .lock_for_write()\n                .store_and_unlock(Lsn(0x30))\n                .wait()\n                .await;\n            // Update GC info\n            let mut guard = tline.gc_info.write().unwrap();\n            *guard = GcInfo {\n                retain_lsns: vec![\n                    (Lsn(0x10), tline.timeline_id, MaybeOffloaded::No),\n                    (Lsn(0x20), tline.timeline_id, MaybeOffloaded::No),\n                ],\n                cutoffs: GcCutoffs {\n                    time: Some(Lsn(0x30)),\n                    space: Lsn(0x30),\n                },\n                leases: Default::default(),\n                within_ancestor_pitr: false,\n            };\n        }\n\n        let expected_result = [\n            Bytes::from_static(b\"value 0@0x10\"),\n            Bytes::from_static(b\"value 1@0x10@0x20@0x28@0x30@0x38\"),\n            Bytes::from_static(b\"value 2@0x10\"),\n            Bytes::from_static(b\"value 3@0x10\"),\n            Bytes::from_static(b\"value 4@0x10\"),\n            Bytes::from_static(b\"value 5@0x10\"),\n            Bytes::from_static(b\"value 6@0x10\"),\n            Bytes::from_static(b\"value 7@0x10\"),\n            Bytes::from_static(b\"value 8@0x10@0x48\"),\n            Bytes::from_static(b\"value 9@0x10@0x48\"),\n        ];\n\n        let expected_result_at_gc_horizon = [\n            Bytes::from_static(b\"value 0@0x10\"),\n            Bytes::from_static(b\"value 1@0x10@0x20@0x28@0x30\"),\n            Bytes::from_static(b\"value 2@0x10\"),\n            Bytes::from_static(b\"value 3@0x10\"),\n            Bytes::from_static(b\"value 4@0x10\"),\n            Bytes::from_static(b\"value 5@0x10\"),\n            Bytes::from_static(b\"value 6@0x10\"),\n            Bytes::from_static(b\"value 7@0x10\"),\n            Bytes::from_static(b\"value 8@0x10\"),\n            Bytes::from_static(b\"value 9@0x10\"),\n        ];\n\n        let expected_result_at_lsn_20 = [\n            Bytes::from_static(b\"value 0@0x10\"),\n            Bytes::from_static(b\"value 1@0x10@0x20\"),\n            Bytes::from_static(b\"value 2@0x10\"),\n            Bytes::from_static(b\"value 3@0x10\"),\n            Bytes::from_static(b\"value 4@0x10\"),\n            Bytes::from_static(b\"value 5@0x10\"),\n            Bytes::from_static(b\"value 6@0x10\"),\n            Bytes::from_static(b\"value 7@0x10\"),\n            Bytes::from_static(b\"value 8@0x10\"),\n            Bytes::from_static(b\"value 9@0x10\"),\n        ];\n\n        let expected_result_at_lsn_10 = [\n            Bytes::from_static(b\"value 0@0x10\"),\n            Bytes::from_static(b\"value 1@0x10\"),\n            Bytes::from_static(b\"value 2@0x10\"),\n            Bytes::from_static(b\"value 3@0x10\"),\n            Bytes::from_static(b\"value 4@0x10\"),\n            Bytes::from_static(b\"value 5@0x10\"),\n            Bytes::from_static(b\"value 6@0x10\"),\n            Bytes::from_static(b\"value 7@0x10\"),\n            Bytes::from_static(b\"value 8@0x10\"),\n            Bytes::from_static(b\"value 9@0x10\"),\n        ];\n\n        let verify_result = || async {\n            let gc_horizon = {\n                let gc_info = tline.gc_info.read().unwrap();\n                gc_info.cutoffs.time.unwrap_or_default()\n            };\n            for idx in 0..10 {\n                assert_eq!(\n                    tline\n                        .get(get_key(idx as u32), Lsn(0x50), &ctx)\n                        .await\n                        .unwrap(),\n                    &expected_result[idx]\n                );\n                assert_eq!(\n                    tline\n                        .get(get_key(idx as u32), gc_horizon, &ctx)\n                        .await\n                        .unwrap(),\n                    &expected_result_at_gc_horizon[idx]\n                );\n                assert_eq!(\n                    tline\n                        .get(get_key(idx as u32), Lsn(0x20), &ctx)\n                        .await\n                        .unwrap(),\n                    &expected_result_at_lsn_20[idx]\n                );\n                assert_eq!(\n                    tline\n                        .get(get_key(idx as u32), Lsn(0x10), &ctx)\n                        .await\n                        .unwrap(),\n                    &expected_result_at_lsn_10[idx]\n                );\n            }\n        };\n\n        verify_result().await;\n\n        let cancel = CancellationToken::new();\n        let mut dryrun_flags = EnumSet::new();\n        dryrun_flags.insert(CompactFlags::DryRun);\n\n        tline\n            .compact_with_gc(\n                &cancel,\n                CompactOptions {\n                    flags: dryrun_flags,\n                    ..CompactOptions::default_for_gc_compaction_unit_tests()\n                },\n                &ctx,\n            )\n            .await\n            .unwrap();\n        // We expect layer map to be the same b/c the dry run flag, but we don't know whether there will be other background jobs\n        // cleaning things up, and therefore, we don't do sanity checks on the layer map during unit tests.\n        verify_result().await;\n\n        tline\n            .compact_with_gc(\n                &cancel,\n                CompactOptions::default_for_gc_compaction_unit_tests(),\n                &ctx,\n            )\n            .await\n            .unwrap();\n        verify_result().await;\n\n        // compact again\n        tline\n            .compact_with_gc(\n                &cancel,\n                CompactOptions::default_for_gc_compaction_unit_tests(),\n                &ctx,\n            )\n            .await\n            .unwrap();\n        verify_result().await;\n\n        Ok(())\n    }\n\n    #[cfg(feature = \"testing\")]\n    #[tokio::test]\n    async fn test_simple_bottom_most_compaction_on_branch() -> anyhow::Result<()> {\n        use models::CompactLsnRange;\n\n        let harness = TenantHarness::create(\"test_simple_bottom_most_compaction_on_branch\").await?;\n        let (tenant, ctx) = harness.load().await;\n\n        fn get_key(id: u32) -> Key {\n            let mut key = Key::from_hex(\"000000000033333333444444445500000000\").unwrap();\n            key.field6 = id;\n            key\n        }\n\n        let img_layer = (0..10)\n            .map(|id| (get_key(id), Bytes::from(format!(\"value {id}@0x10\"))))\n            .collect_vec();\n\n        let delta1 = vec![\n            (\n                get_key(1),\n                Lsn(0x20),\n                Value::WalRecord(NeonWalRecord::wal_append(\"@0x20\")),\n            ),\n            (\n                get_key(2),\n                Lsn(0x30),\n                Value::WalRecord(NeonWalRecord::wal_append(\"@0x30\")),\n            ),\n            (\n                get_key(3),\n                Lsn(0x28),\n                Value::WalRecord(NeonWalRecord::wal_append(\"@0x28\")),\n            ),\n            (\n                get_key(3),\n                Lsn(0x30),\n                Value::WalRecord(NeonWalRecord::wal_append(\"@0x30\")),\n            ),\n            (\n                get_key(3),\n                Lsn(0x40),\n                Value::WalRecord(NeonWalRecord::wal_append(\"@0x40\")),\n            ),\n        ];\n        let delta2 = vec![\n            (\n                get_key(5),\n                Lsn(0x20),\n                Value::WalRecord(NeonWalRecord::wal_append(\"@0x20\")),\n            ),\n            (\n                get_key(6),\n                Lsn(0x20),\n                Value::WalRecord(NeonWalRecord::wal_append(\"@0x20\")),\n            ),\n        ];\n        let delta3 = vec![\n            (\n                get_key(8),\n                Lsn(0x48),\n                Value::WalRecord(NeonWalRecord::wal_append(\"@0x48\")),\n            ),\n            (\n                get_key(9),\n                Lsn(0x48),\n                Value::WalRecord(NeonWalRecord::wal_append(\"@0x48\")),\n            ),\n        ];\n\n        let parent_tline = tenant\n            .create_test_timeline_with_layers(\n                TIMELINE_ID,\n                Lsn(0x10),\n                DEFAULT_PG_VERSION,\n                &ctx,\n                vec![],                       // in-memory layers\n                vec![],                       // delta layers\n                vec![(Lsn(0x18), img_layer)], // image layers\n                Lsn(0x18),\n            )\n            .await?;\n\n        parent_tline.add_extra_test_dense_keyspace(KeySpace::single(get_key(0)..get_key(10)));\n\n        let branch_tline = tenant\n            .branch_timeline_test_with_layers(\n                &parent_tline,\n                NEW_TIMELINE_ID,\n                Some(Lsn(0x18)),\n                &ctx,\n                vec![\n                    DeltaLayerTestDesc::new_with_inferred_key_range(Lsn(0x20)..Lsn(0x48), delta1),\n                    DeltaLayerTestDesc::new_with_inferred_key_range(Lsn(0x20)..Lsn(0x48), delta2),\n                    DeltaLayerTestDesc::new_with_inferred_key_range(Lsn(0x48)..Lsn(0x50), delta3),\n                ], // delta layers\n                vec![], // image layers\n                Lsn(0x50),\n            )\n            .await?;\n\n        branch_tline.add_extra_test_dense_keyspace(KeySpace::single(get_key(0)..get_key(10)));\n\n        {\n            parent_tline\n                .applied_gc_cutoff_lsn\n                .lock_for_write()\n                .store_and_unlock(Lsn(0x10))\n                .wait()\n                .await;\n            // Update GC info\n            let mut guard = parent_tline.gc_info.write().unwrap();\n            *guard = GcInfo {\n                retain_lsns: vec![(Lsn(0x18), branch_tline.timeline_id, MaybeOffloaded::No)],\n                cutoffs: GcCutoffs {\n                    time: Some(Lsn(0x10)),\n                    space: Lsn(0x10),\n                },\n                leases: Default::default(),\n                within_ancestor_pitr: false,\n            };\n        }\n\n        {\n            branch_tline\n                .applied_gc_cutoff_lsn\n                .lock_for_write()\n                .store_and_unlock(Lsn(0x50))\n                .wait()\n                .await;\n            // Update GC info\n            let mut guard = branch_tline.gc_info.write().unwrap();\n            *guard = GcInfo {\n                retain_lsns: vec![(Lsn(0x40), branch_tline.timeline_id, MaybeOffloaded::No)],\n                cutoffs: GcCutoffs {\n                    time: Some(Lsn(0x50)),\n                    space: Lsn(0x50),\n                },\n                leases: Default::default(),\n                within_ancestor_pitr: false,\n            };\n        }\n\n        let expected_result_at_gc_horizon = [\n            Bytes::from_static(b\"value 0@0x10\"),\n            Bytes::from_static(b\"value 1@0x10@0x20\"),\n            Bytes::from_static(b\"value 2@0x10@0x30\"),\n            Bytes::from_static(b\"value 3@0x10@0x28@0x30@0x40\"),\n            Bytes::from_static(b\"value 4@0x10\"),\n            Bytes::from_static(b\"value 5@0x10@0x20\"),\n            Bytes::from_static(b\"value 6@0x10@0x20\"),\n            Bytes::from_static(b\"value 7@0x10\"),\n            Bytes::from_static(b\"value 8@0x10@0x48\"),\n            Bytes::from_static(b\"value 9@0x10@0x48\"),\n        ];\n\n        let expected_result_at_lsn_40 = [\n            Bytes::from_static(b\"value 0@0x10\"),\n            Bytes::from_static(b\"value 1@0x10@0x20\"),\n            Bytes::from_static(b\"value 2@0x10@0x30\"),\n            Bytes::from_static(b\"value 3@0x10@0x28@0x30@0x40\"),\n            Bytes::from_static(b\"value 4@0x10\"),\n            Bytes::from_static(b\"value 5@0x10@0x20\"),\n            Bytes::from_static(b\"value 6@0x10@0x20\"),\n            Bytes::from_static(b\"value 7@0x10\"),\n            Bytes::from_static(b\"value 8@0x10\"),\n            Bytes::from_static(b\"value 9@0x10\"),\n        ];\n\n        let verify_result = || async {\n            for idx in 0..10 {\n                assert_eq!(\n                    branch_tline\n                        .get(get_key(idx as u32), Lsn(0x50), &ctx)\n                        .await\n                        .unwrap(),\n                    &expected_result_at_gc_horizon[idx]\n                );\n                assert_eq!(\n                    branch_tline\n                        .get(get_key(idx as u32), Lsn(0x40), &ctx)\n                        .await\n                        .unwrap(),\n                    &expected_result_at_lsn_40[idx]\n                );\n            }\n        };\n\n        verify_result().await;\n\n        let cancel = CancellationToken::new();\n        branch_tline\n            .compact_with_gc(\n                &cancel,\n                CompactOptions::default_for_gc_compaction_unit_tests(),\n                &ctx,\n            )\n            .await\n            .unwrap();\n\n        verify_result().await;\n\n        // Piggyback a compaction with above_lsn. Ensure it works correctly when the specified LSN intersects with the layer files.\n        // Now we already have a single large delta layer, so the compaction min_layer_lsn should be the same as ancestor LSN (0x18).\n        branch_tline\n            .compact_with_gc(\n                &cancel,\n                CompactOptions {\n                    compact_lsn_range: Some(CompactLsnRange::above(Lsn(0x40))),\n                    ..CompactOptions::default_for_gc_compaction_unit_tests()\n                },\n                &ctx,\n            )\n            .await\n            .unwrap();\n\n        verify_result().await;\n\n        Ok(())\n    }\n\n    // Regression test for https://github.com/neondatabase/neon/issues/9012\n    // Create an image arrangement where we have to read at different LSN ranges\n    // from a delta layer. This is achieved by overlapping an image layer on top of\n    // a delta layer. Like so:\n    //\n    //     A      B\n    // +----------------+ -> delta_layer\n    // |                |                           ^ lsn\n    // |       =========|-> nested_image_layer      |\n    // |       C        |                           |\n    // +----------------+                           |\n    // ======== -> baseline_image_layer             +-------> key\n    //\n    //\n    // When querying the key range [A, B) we need to read at different LSN ranges\n    // for [A, C) and [C, B). This test checks that the described edge case is handled correctly.\n    #[cfg(feature = \"testing\")]\n    #[tokio::test]\n    async fn test_vectored_read_with_nested_image_layer() -> anyhow::Result<()> {\n        let harness = TenantHarness::create(\"test_vectored_read_with_nested_image_layer\").await?;\n        let (tenant, ctx) = harness.load().await;\n\n        let will_init_keys = [2, 6];\n        fn get_key(id: u32) -> Key {\n            let mut key = Key::from_hex(\"110000000033333333444444445500000000\").unwrap();\n            key.field6 = id;\n            key\n        }\n\n        let mut expected_key_values = HashMap::new();\n\n        let baseline_image_layer_lsn = Lsn(0x10);\n        let mut baseline_img_layer = Vec::new();\n        for i in 0..5 {\n            let key = get_key(i);\n            let value = format!(\"value {i}@{baseline_image_layer_lsn}\");\n\n            let removed = expected_key_values.insert(key, value.clone());\n            assert!(removed.is_none());\n\n            baseline_img_layer.push((key, Bytes::from(value)));\n        }\n\n        let nested_image_layer_lsn = Lsn(0x50);\n        let mut nested_img_layer = Vec::new();\n        for i in 5..10 {\n            let key = get_key(i);\n            let value = format!(\"value {i}@{nested_image_layer_lsn}\");\n\n            let removed = expected_key_values.insert(key, value.clone());\n            assert!(removed.is_none());\n\n            nested_img_layer.push((key, Bytes::from(value)));\n        }\n\n        let mut delta_layer_spec = Vec::default();\n        let delta_layer_start_lsn = Lsn(0x20);\n        let mut delta_layer_end_lsn = delta_layer_start_lsn;\n\n        for i in 0..10 {\n            let key = get_key(i);\n            let key_in_nested = nested_img_layer\n                .iter()\n                .any(|(key_with_img, _)| *key_with_img == key);\n            let lsn = {\n                if key_in_nested {\n                    Lsn(nested_image_layer_lsn.0 + 0x10)\n                } else {\n                    delta_layer_start_lsn\n                }\n            };\n\n            let will_init = will_init_keys.contains(&i);\n            if will_init {\n                delta_layer_spec.push((key, lsn, Value::WalRecord(NeonWalRecord::wal_init(\"\"))));\n\n                expected_key_values.insert(key, \"\".to_string());\n            } else {\n                let delta = format!(\"@{lsn}\");\n                delta_layer_spec.push((\n                    key,\n                    lsn,\n                    Value::WalRecord(NeonWalRecord::wal_append(&delta)),\n                ));\n\n                expected_key_values\n                    .get_mut(&key)\n                    .expect(\"An image exists for each key\")\n                    .push_str(delta.as_str());\n            }\n            delta_layer_end_lsn = std::cmp::max(delta_layer_start_lsn, lsn);\n        }\n\n        delta_layer_end_lsn = Lsn(delta_layer_end_lsn.0 + 1);\n\n        assert!(\n            nested_image_layer_lsn > delta_layer_start_lsn\n                && nested_image_layer_lsn < delta_layer_end_lsn\n        );\n\n        let tline = tenant\n            .create_test_timeline_with_layers(\n                TIMELINE_ID,\n                baseline_image_layer_lsn,\n                DEFAULT_PG_VERSION,\n                &ctx,\n                vec![], // in-memory layers\n                vec![DeltaLayerTestDesc::new_with_inferred_key_range(\n                    delta_layer_start_lsn..delta_layer_end_lsn,\n                    delta_layer_spec,\n                )], // delta layers\n                vec![\n                    (baseline_image_layer_lsn, baseline_img_layer),\n                    (nested_image_layer_lsn, nested_img_layer),\n                ], // image layers\n                delta_layer_end_lsn,\n            )\n            .await?;\n\n        let query = VersionedKeySpaceQuery::uniform(\n            KeySpace::single(get_key(0)..get_key(10)),\n            delta_layer_end_lsn,\n        );\n\n        let results = tline\n            .get_vectored(query, IoConcurrency::sequential(), &ctx)\n            .await\n            .expect(\"No vectored errors\");\n        for (key, res) in results {\n            let value = res.expect(\"No key errors\");\n            let expected_value = expected_key_values.remove(&key).expect(\"No unknown keys\");\n            assert_eq!(value, Bytes::from(expected_value));\n        }\n\n        Ok(())\n    }\n\n    #[cfg(feature = \"testing\")]\n    #[tokio::test]\n    async fn test_vectored_read_with_image_layer_inside_inmem() -> anyhow::Result<()> {\n        let harness =\n            TenantHarness::create(\"test_vectored_read_with_image_layer_inside_inmem\").await?;\n        let (tenant, ctx) = harness.load().await;\n\n        let will_init_keys = [2, 6];\n        fn get_key(id: u32) -> Key {\n            let mut key = Key::from_hex(\"110000000033333333444444445500000000\").unwrap();\n            key.field6 = id;\n            key\n        }\n\n        let mut expected_key_values = HashMap::new();\n\n        let baseline_image_layer_lsn = Lsn(0x10);\n        let mut baseline_img_layer = Vec::new();\n        for i in 0..5 {\n            let key = get_key(i);\n            let value = format!(\"value {i}@{baseline_image_layer_lsn}\");\n\n            let removed = expected_key_values.insert(key, value.clone());\n            assert!(removed.is_none());\n\n            baseline_img_layer.push((key, Bytes::from(value)));\n        }\n\n        let nested_image_layer_lsn = Lsn(0x50);\n        let mut nested_img_layer = Vec::new();\n        for i in 5..10 {\n            let key = get_key(i);\n            let value = format!(\"value {i}@{nested_image_layer_lsn}\");\n\n            let removed = expected_key_values.insert(key, value.clone());\n            assert!(removed.is_none());\n\n            nested_img_layer.push((key, Bytes::from(value)));\n        }\n\n        let frozen_layer = {\n            let lsn_range = Lsn(0x40)..Lsn(0x60);\n            let mut data = Vec::new();\n            for i in 0..10 {\n                let key = get_key(i);\n                let key_in_nested = nested_img_layer\n                    .iter()\n                    .any(|(key_with_img, _)| *key_with_img == key);\n                let lsn = {\n                    if key_in_nested {\n                        Lsn(nested_image_layer_lsn.0 + 5)\n                    } else {\n                        lsn_range.start\n                    }\n                };\n\n                let will_init = will_init_keys.contains(&i);\n                if will_init {\n                    data.push((key, lsn, Value::WalRecord(NeonWalRecord::wal_init(\"\"))));\n\n                    expected_key_values.insert(key, \"\".to_string());\n                } else {\n                    let delta = format!(\"@{lsn}\");\n                    data.push((\n                        key,\n                        lsn,\n                        Value::WalRecord(NeonWalRecord::wal_append(&delta)),\n                    ));\n\n                    expected_key_values\n                        .get_mut(&key)\n                        .expect(\"An image exists for each key\")\n                        .push_str(delta.as_str());\n                }\n            }\n\n            InMemoryLayerTestDesc {\n                lsn_range,\n                is_open: false,\n                data,\n            }\n        };\n\n        let (open_layer, last_record_lsn) = {\n            let start_lsn = Lsn(0x70);\n            let mut data = Vec::new();\n            let mut end_lsn = Lsn(0);\n            for i in 0..10 {\n                let key = get_key(i);\n                let lsn = Lsn(start_lsn.0 + i as u64);\n                let delta = format!(\"@{lsn}\");\n                data.push((\n                    key,\n                    lsn,\n                    Value::WalRecord(NeonWalRecord::wal_append(&delta)),\n                ));\n\n                expected_key_values\n                    .get_mut(&key)\n                    .expect(\"An image exists for each key\")\n                    .push_str(delta.as_str());\n\n                end_lsn = std::cmp::max(end_lsn, lsn);\n            }\n\n            (\n                InMemoryLayerTestDesc {\n                    lsn_range: start_lsn..Lsn::MAX,\n                    is_open: true,\n                    data,\n                },\n                end_lsn,\n            )\n        };\n\n        assert!(\n            nested_image_layer_lsn > frozen_layer.lsn_range.start\n                && nested_image_layer_lsn < frozen_layer.lsn_range.end\n        );\n\n        let tline = tenant\n            .create_test_timeline_with_layers(\n                TIMELINE_ID,\n                baseline_image_layer_lsn,\n                DEFAULT_PG_VERSION,\n                &ctx,\n                vec![open_layer, frozen_layer], // in-memory layers\n                Vec::new(),                     // delta layers\n                vec![\n                    (baseline_image_layer_lsn, baseline_img_layer),\n                    (nested_image_layer_lsn, nested_img_layer),\n                ], // image layers\n                last_record_lsn,\n            )\n            .await?;\n\n        let query = VersionedKeySpaceQuery::uniform(\n            KeySpace::single(get_key(0)..get_key(10)),\n            last_record_lsn,\n        );\n\n        let results = tline\n            .get_vectored(query, IoConcurrency::sequential(), &ctx)\n            .await\n            .expect(\"No vectored errors\");\n        for (key, res) in results {\n            let value = res.expect(\"No key errors\");\n            let expected_value = expected_key_values.remove(&key).expect(\"No unknown keys\");\n            assert_eq!(value, Bytes::from(expected_value.clone()));\n\n            tracing::info!(\"key={key} value={expected_value}\");\n        }\n\n        Ok(())\n    }\n\n    // A randomized read path test. Generates a layer map according to a deterministic\n    // specification. Fills the (key, LSN) space in random manner and then performs\n    // random scattered queries validating the results against in-memory storage.\n    //\n    // See this internal Notion page for a diagram of the layer map:\n    // https://www.notion.so/neondatabase/Read-Path-Unit-Testing-Fuzzing-1d1f189e0047806c8e5cd37781b0a350?pvs=4\n    //\n    // A fuzzing mode is also supported. In this mode, the test will use a random\n    // seed instead of a hardcoded one. Use it in conjunction with `cargo stress`\n    // to run multiple instances in parallel:\n    //\n    // $ RUST_BACKTRACE=1 RUST_LOG=INFO \\\n    //   cargo stress --package=pageserver --features=testing,fuzz-read-path --release -- test_read_path\n    #[cfg(feature = \"testing\")]\n    #[tokio::test]\n    async fn test_read_path() -> anyhow::Result<()> {\n        use rand::seq::IndexedRandom;\n\n        let seed = if cfg!(feature = \"fuzz-read-path\") {\n            let seed: u64 = rand::rng().random();\n            seed\n        } else {\n            // Use a hard-coded seed when not in fuzzing mode.\n            // Note that with the current approach results are not reproducible\n            // accross platforms and Rust releases.\n            const SEED: u64 = 0;\n            SEED\n        };\n\n        let mut random = StdRng::seed_from_u64(seed);\n\n        let (queries, will_init_chance, gap_chance) = if cfg!(feature = \"fuzz-read-path\") {\n            const QUERIES: u64 = 5000;\n            let will_init_chance: u8 = random.random_range(0..=10);\n            let gap_chance: u8 = random.random_range(0..=50);\n\n            (QUERIES, will_init_chance, gap_chance)\n        } else {\n            const QUERIES: u64 = 1000;\n            const WILL_INIT_CHANCE: u8 = 1;\n            const GAP_CHANCE: u8 = 5;\n\n            (QUERIES, WILL_INIT_CHANCE, GAP_CHANCE)\n        };\n\n        let harness = TenantHarness::create(\"test_read_path\").await?;\n        let (tenant, ctx) = harness.load().await;\n\n        tracing::info!(\"Using random seed: {seed}\");\n        tracing::info!(%will_init_chance, %gap_chance, \"Fill params\");\n\n        // Define the layer map shape. Note that this part is not randomized.\n\n        const KEY_DIMENSION_SIZE: u32 = 99;\n        let start_key = Key::from_hex(\"110000000033333333444444445500000000\").unwrap();\n        let end_key = start_key.add(KEY_DIMENSION_SIZE);\n        let total_key_range = start_key..end_key;\n        let total_key_range_size = end_key.to_i128() - start_key.to_i128();\n        let total_start_lsn = Lsn(104);\n        let last_record_lsn = Lsn(504);\n\n        assert!(total_key_range_size % 3 == 0);\n\n        let in_memory_layers_shape = vec![\n            (total_key_range.clone(), Lsn(304)..Lsn(400)),\n            (total_key_range.clone(), Lsn(400)..last_record_lsn),\n        ];\n\n        let delta_layers_shape = vec![\n            (\n                start_key..(start_key.add((total_key_range_size / 3) as u32)),\n                Lsn(200)..Lsn(304),\n            ),\n            (\n                (start_key.add((total_key_range_size / 3) as u32))\n                    ..(start_key.add((total_key_range_size * 2 / 3) as u32)),\n                Lsn(200)..Lsn(304),\n            ),\n            (\n                (start_key.add((total_key_range_size * 2 / 3) as u32))\n                    ..(start_key.add(total_key_range_size as u32)),\n                Lsn(200)..Lsn(304),\n            ),\n        ];\n\n        let image_layers_shape = vec![\n            (\n                start_key.add((total_key_range_size * 2 / 3 - 10) as u32)\n                    ..start_key.add((total_key_range_size * 2 / 3 + 10) as u32),\n                Lsn(456),\n            ),\n            (\n                start_key.add((total_key_range_size / 3 - 10) as u32)\n                    ..start_key.add((total_key_range_size / 3 + 10) as u32),\n                Lsn(256),\n            ),\n            (total_key_range.clone(), total_start_lsn),\n        ];\n\n        let specification = TestTimelineSpecification {\n            start_lsn: total_start_lsn,\n            last_record_lsn,\n            in_memory_layers_shape,\n            delta_layers_shape,\n            image_layers_shape,\n            gap_chance,\n            will_init_chance,\n        };\n\n        // Create and randomly fill in the layers according to the specification\n        let (tline, storage, interesting_lsns) = randomize_timeline(\n            &tenant,\n            TIMELINE_ID,\n            DEFAULT_PG_VERSION,\n            specification,\n            &mut random,\n            &ctx,\n        )\n        .await?;\n\n        // Now generate queries based on the interesting lsns that we've collected.\n        //\n        // While there's still room in the query, pick and interesting LSN and a random\n        // key. Then roll the dice to see if the next key should also be included in\n        // the query. When the roll fails, break the \"batch\" and pick another point in the\n        // (key, LSN) space.\n\n        const PICK_NEXT_CHANCE: u8 = 50;\n        for _ in 0..queries {\n            let query = {\n                let mut keyspaces_at_lsn: HashMap<Lsn, KeySpaceRandomAccum> = HashMap::default();\n                let mut used_keys: HashSet<Key> = HashSet::default();\n\n                while used_keys.len() < tenant.conf.max_get_vectored_keys.get() {\n                    let selected_lsn = interesting_lsns.choose(&mut random).expect(\"not empty\");\n                    let mut selected_key =\n                        start_key.add(random.random_range(0..KEY_DIMENSION_SIZE));\n\n                    while used_keys.len() < tenant.conf.max_get_vectored_keys.get() {\n                        if used_keys.contains(&selected_key)\n                            || selected_key >= start_key.add(KEY_DIMENSION_SIZE)\n                        {\n                            break;\n                        }\n\n                        keyspaces_at_lsn\n                            .entry(*selected_lsn)\n                            .or_default()\n                            .add_key(selected_key);\n                        used_keys.insert(selected_key);\n\n                        let pick_next = random.random_range(0..=100) <= PICK_NEXT_CHANCE;\n                        if pick_next {\n                            selected_key = selected_key.next();\n                        } else {\n                            break;\n                        }\n                    }\n                }\n\n                VersionedKeySpaceQuery::scattered(\n                    keyspaces_at_lsn\n                        .into_iter()\n                        .map(|(lsn, acc)| (lsn, acc.to_keyspace()))\n                        .collect(),\n                )\n            };\n\n            // Run the query and validate the results\n\n            let results = tline\n                .get_vectored(query.clone(), IoConcurrency::Sequential, &ctx)\n                .await;\n\n            let blobs = match results {\n                Ok(ok) => ok,\n                Err(err) => {\n                    panic!(\"seed={seed} Error returned for query {query}: {err}\");\n                }\n            };\n\n            for (key, key_res) in blobs.into_iter() {\n                match key_res {\n                    Ok(blob) => {\n                        let requested_at_lsn = query.map_key_to_lsn(&key);\n                        let expected = storage.get(key, requested_at_lsn);\n\n                        if blob != expected {\n                            tracing::error!(\n                                \"seed={seed} Mismatch for {key}@{requested_at_lsn} from query: {query}\"\n                            );\n                        }\n\n                        assert_eq!(blob, expected);\n                    }\n                    Err(err) => {\n                        let requested_at_lsn = query.map_key_to_lsn(&key);\n\n                        panic!(\n                            \"seed={seed} Error returned for {key}@{requested_at_lsn} from query {query}: {err}\"\n                        );\n                    }\n                }\n            }\n        }\n\n        Ok(())\n    }\n\n    fn sort_layer_key(k1: &PersistentLayerKey, k2: &PersistentLayerKey) -> std::cmp::Ordering {\n        (\n            k1.is_delta,\n            k1.key_range.start,\n            k1.key_range.end,\n            k1.lsn_range.start,\n            k1.lsn_range.end,\n        )\n            .cmp(&(\n                k2.is_delta,\n                k2.key_range.start,\n                k2.key_range.end,\n                k2.lsn_range.start,\n                k2.lsn_range.end,\n            ))\n    }\n\n    async fn inspect_and_sort(\n        tline: &Arc<Timeline>,\n        filter: Option<std::ops::Range<Key>>,\n    ) -> Vec<PersistentLayerKey> {\n        let mut all_layers = tline.inspect_historic_layers().await.unwrap();\n        if let Some(filter) = filter {\n            all_layers.retain(|layer| overlaps_with(&layer.key_range, &filter));\n        }\n        all_layers.sort_by(sort_layer_key);\n        all_layers\n    }\n\n    #[cfg(feature = \"testing\")]\n    fn check_layer_map_key_eq(\n        mut left: Vec<PersistentLayerKey>,\n        mut right: Vec<PersistentLayerKey>,\n    ) {\n        left.sort_by(sort_layer_key);\n        right.sort_by(sort_layer_key);\n        if left != right {\n            eprintln!(\"---LEFT---\");\n            for left in left.iter() {\n                eprintln!(\"{left}\");\n            }\n            eprintln!(\"---RIGHT---\");\n            for right in right.iter() {\n                eprintln!(\"{right}\");\n            }\n            assert_eq!(left, right);\n        }\n    }\n\n    #[cfg(feature = \"testing\")]\n    #[tokio::test]\n    async fn test_simple_partial_bottom_most_compaction() -> anyhow::Result<()> {\n        let harness = TenantHarness::create(\"test_simple_partial_bottom_most_compaction\").await?;\n        let (tenant, ctx) = harness.load().await;\n\n        fn get_key(id: u32) -> Key {\n            // using aux key here b/c they are guaranteed to be inside `collect_keyspace`.\n            let mut key = Key::from_hex(\"620000000033333333444444445500000000\").unwrap();\n            key.field6 = id;\n            key\n        }\n\n        // img layer at 0x10\n        let img_layer = (0..10)\n            .map(|id| (get_key(id), Bytes::from(format!(\"value {id}@0x10\"))))\n            .collect_vec();\n\n        let delta1 = vec![\n            (\n                get_key(1),\n                Lsn(0x20),\n                Value::Image(Bytes::from(\"value 1@0x20\")),\n            ),\n            (\n                get_key(2),\n                Lsn(0x30),\n                Value::Image(Bytes::from(\"value 2@0x30\")),\n            ),\n            (\n                get_key(3),\n                Lsn(0x40),\n                Value::Image(Bytes::from(\"value 3@0x40\")),\n            ),\n        ];\n        let delta2 = vec![\n            (\n                get_key(5),\n                Lsn(0x20),\n                Value::Image(Bytes::from(\"value 5@0x20\")),\n            ),\n            (\n                get_key(6),\n                Lsn(0x20),\n                Value::Image(Bytes::from(\"value 6@0x20\")),\n            ),\n        ];\n        let delta3 = vec![\n            (\n                get_key(8),\n                Lsn(0x48),\n                Value::Image(Bytes::from(\"value 8@0x48\")),\n            ),\n            (\n                get_key(9),\n                Lsn(0x48),\n                Value::Image(Bytes::from(\"value 9@0x48\")),\n            ),\n        ];\n\n        let tline = tenant\n            .create_test_timeline_with_layers(\n                TIMELINE_ID,\n                Lsn(0x10),\n                DEFAULT_PG_VERSION,\n                &ctx,\n                vec![], // in-memory layers\n                vec![\n                    DeltaLayerTestDesc::new_with_inferred_key_range(Lsn(0x20)..Lsn(0x48), delta1),\n                    DeltaLayerTestDesc::new_with_inferred_key_range(Lsn(0x20)..Lsn(0x48), delta2),\n                    DeltaLayerTestDesc::new_with_inferred_key_range(Lsn(0x48)..Lsn(0x50), delta3),\n                ], // delta layers\n                vec![(Lsn(0x10), img_layer)], // image layers\n                Lsn(0x50),\n            )\n            .await?;\n\n        {\n            tline\n                .applied_gc_cutoff_lsn\n                .lock_for_write()\n                .store_and_unlock(Lsn(0x30))\n                .wait()\n                .await;\n            // Update GC info\n            let mut guard = tline.gc_info.write().unwrap();\n            *guard = GcInfo {\n                retain_lsns: vec![(Lsn(0x20), tline.timeline_id, MaybeOffloaded::No)],\n                cutoffs: GcCutoffs {\n                    time: Some(Lsn(0x30)),\n                    space: Lsn(0x30),\n                },\n                leases: Default::default(),\n                within_ancestor_pitr: false,\n            };\n        }\n\n        let cancel = CancellationToken::new();\n\n        // Do a partial compaction on key range 0..2\n        tline\n            .compact_with_gc(\n                &cancel,\n                CompactOptions {\n                    flags: EnumSet::new(),\n                    compact_key_range: Some((get_key(0)..get_key(2)).into()),\n                    ..CompactOptions::default_for_gc_compaction_unit_tests()\n                },\n                &ctx,\n            )\n            .await\n            .unwrap();\n        let all_layers = inspect_and_sort(&tline, Some(get_key(0)..get_key(10))).await;\n        check_layer_map_key_eq(\n            all_layers,\n            vec![\n                // newly-generated image layer for the partial compaction range 0-2\n                PersistentLayerKey {\n                    key_range: get_key(0)..get_key(2),\n                    lsn_range: Lsn(0x20)..Lsn(0x21),\n                    is_delta: false,\n                },\n                PersistentLayerKey {\n                    key_range: get_key(0)..get_key(10),\n                    lsn_range: Lsn(0x10)..Lsn(0x11),\n                    is_delta: false,\n                },\n                // delta1 is split and the second part is rewritten\n                PersistentLayerKey {\n                    key_range: get_key(2)..get_key(4),\n                    lsn_range: Lsn(0x20)..Lsn(0x48),\n                    is_delta: true,\n                },\n                PersistentLayerKey {\n                    key_range: get_key(5)..get_key(7),\n                    lsn_range: Lsn(0x20)..Lsn(0x48),\n                    is_delta: true,\n                },\n                PersistentLayerKey {\n                    key_range: get_key(8)..get_key(10),\n                    lsn_range: Lsn(0x48)..Lsn(0x50),\n                    is_delta: true,\n                },\n            ],\n        );\n\n        // Do a partial compaction on key range 2..4\n        tline\n            .compact_with_gc(\n                &cancel,\n                CompactOptions {\n                    flags: EnumSet::new(),\n                    compact_key_range: Some((get_key(2)..get_key(4)).into()),\n                    ..CompactOptions::default_for_gc_compaction_unit_tests()\n                },\n                &ctx,\n            )\n            .await\n            .unwrap();\n        let all_layers = inspect_and_sort(&tline, Some(get_key(0)..get_key(10))).await;\n        check_layer_map_key_eq(\n            all_layers,\n            vec![\n                PersistentLayerKey {\n                    key_range: get_key(0)..get_key(2),\n                    lsn_range: Lsn(0x20)..Lsn(0x21),\n                    is_delta: false,\n                },\n                PersistentLayerKey {\n                    key_range: get_key(0)..get_key(10),\n                    lsn_range: Lsn(0x10)..Lsn(0x11),\n                    is_delta: false,\n                },\n                // image layer generated for the compaction range 2-4\n                PersistentLayerKey {\n                    key_range: get_key(2)..get_key(4),\n                    lsn_range: Lsn(0x20)..Lsn(0x21),\n                    is_delta: false,\n                },\n                // we have key2/key3 above the retain_lsn, so we still need this delta layer\n                PersistentLayerKey {\n                    key_range: get_key(2)..get_key(4),\n                    lsn_range: Lsn(0x20)..Lsn(0x48),\n                    is_delta: true,\n                },\n                PersistentLayerKey {\n                    key_range: get_key(5)..get_key(7),\n                    lsn_range: Lsn(0x20)..Lsn(0x48),\n                    is_delta: true,\n                },\n                PersistentLayerKey {\n                    key_range: get_key(8)..get_key(10),\n                    lsn_range: Lsn(0x48)..Lsn(0x50),\n                    is_delta: true,\n                },\n            ],\n        );\n\n        // Do a partial compaction on key range 4..9\n        tline\n            .compact_with_gc(\n                &cancel,\n                CompactOptions {\n                    flags: EnumSet::new(),\n                    compact_key_range: Some((get_key(4)..get_key(9)).into()),\n                    ..CompactOptions::default_for_gc_compaction_unit_tests()\n                },\n                &ctx,\n            )\n            .await\n            .unwrap();\n        let all_layers = inspect_and_sort(&tline, Some(get_key(0)..get_key(10))).await;\n        check_layer_map_key_eq(\n            all_layers,\n            vec![\n                PersistentLayerKey {\n                    key_range: get_key(0)..get_key(2),\n                    lsn_range: Lsn(0x20)..Lsn(0x21),\n                    is_delta: false,\n                },\n                PersistentLayerKey {\n                    key_range: get_key(0)..get_key(10),\n                    lsn_range: Lsn(0x10)..Lsn(0x11),\n                    is_delta: false,\n                },\n                PersistentLayerKey {\n                    key_range: get_key(2)..get_key(4),\n                    lsn_range: Lsn(0x20)..Lsn(0x21),\n                    is_delta: false,\n                },\n                PersistentLayerKey {\n                    key_range: get_key(2)..get_key(4),\n                    lsn_range: Lsn(0x20)..Lsn(0x48),\n                    is_delta: true,\n                },\n                // image layer generated for this compaction range\n                PersistentLayerKey {\n                    key_range: get_key(4)..get_key(9),\n                    lsn_range: Lsn(0x20)..Lsn(0x21),\n                    is_delta: false,\n                },\n                PersistentLayerKey {\n                    key_range: get_key(8)..get_key(10),\n                    lsn_range: Lsn(0x48)..Lsn(0x50),\n                    is_delta: true,\n                },\n            ],\n        );\n\n        // Do a partial compaction on key range 9..10\n        tline\n            .compact_with_gc(\n                &cancel,\n                CompactOptions {\n                    flags: EnumSet::new(),\n                    compact_key_range: Some((get_key(9)..get_key(10)).into()),\n                    ..CompactOptions::default_for_gc_compaction_unit_tests()\n                },\n                &ctx,\n            )\n            .await\n            .unwrap();\n        let all_layers = inspect_and_sort(&tline, Some(get_key(0)..get_key(10))).await;\n        check_layer_map_key_eq(\n            all_layers,\n            vec![\n                PersistentLayerKey {\n                    key_range: get_key(0)..get_key(2),\n                    lsn_range: Lsn(0x20)..Lsn(0x21),\n                    is_delta: false,\n                },\n                PersistentLayerKey {\n                    key_range: get_key(0)..get_key(10),\n                    lsn_range: Lsn(0x10)..Lsn(0x11),\n                    is_delta: false,\n                },\n                PersistentLayerKey {\n                    key_range: get_key(2)..get_key(4),\n                    lsn_range: Lsn(0x20)..Lsn(0x21),\n                    is_delta: false,\n                },\n                PersistentLayerKey {\n                    key_range: get_key(2)..get_key(4),\n                    lsn_range: Lsn(0x20)..Lsn(0x48),\n                    is_delta: true,\n                },\n                PersistentLayerKey {\n                    key_range: get_key(4)..get_key(9),\n                    lsn_range: Lsn(0x20)..Lsn(0x21),\n                    is_delta: false,\n                },\n                // image layer generated for the compaction range\n                PersistentLayerKey {\n                    key_range: get_key(9)..get_key(10),\n                    lsn_range: Lsn(0x20)..Lsn(0x21),\n                    is_delta: false,\n                },\n                PersistentLayerKey {\n                    key_range: get_key(8)..get_key(10),\n                    lsn_range: Lsn(0x48)..Lsn(0x50),\n                    is_delta: true,\n                },\n            ],\n        );\n\n        // Do a partial compaction on key range 0..10, all image layers below LSN 20 can be replaced with new ones.\n        tline\n            .compact_with_gc(\n                &cancel,\n                CompactOptions {\n                    flags: EnumSet::new(),\n                    compact_key_range: Some((get_key(0)..get_key(10)).into()),\n                    ..CompactOptions::default_for_gc_compaction_unit_tests()\n                },\n                &ctx,\n            )\n            .await\n            .unwrap();\n        let all_layers = inspect_and_sort(&tline, Some(get_key(0)..get_key(10))).await;\n        check_layer_map_key_eq(\n            all_layers,\n            vec![\n                // aha, we removed all unnecessary image/delta layers and got a very clean layer map!\n                PersistentLayerKey {\n                    key_range: get_key(0)..get_key(10),\n                    lsn_range: Lsn(0x20)..Lsn(0x21),\n                    is_delta: false,\n                },\n                PersistentLayerKey {\n                    key_range: get_key(2)..get_key(4),\n                    lsn_range: Lsn(0x20)..Lsn(0x48),\n                    is_delta: true,\n                },\n                PersistentLayerKey {\n                    key_range: get_key(8)..get_key(10),\n                    lsn_range: Lsn(0x48)..Lsn(0x50),\n                    is_delta: true,\n                },\n            ],\n        );\n        Ok(())\n    }\n\n    #[cfg(feature = \"testing\")]\n    #[tokio::test]\n    async fn test_timeline_offload_retain_lsn() -> anyhow::Result<()> {\n        let harness = TenantHarness::create(\"test_timeline_offload_retain_lsn\")\n            .await\n            .unwrap();\n        let (tenant, ctx) = harness.load().await;\n        let tline_parent = tenant\n            .create_test_timeline(TIMELINE_ID, Lsn(0x10), DEFAULT_PG_VERSION, &ctx)\n            .await\n            .unwrap();\n        let tline_child = tenant\n            .branch_timeline_test(&tline_parent, NEW_TIMELINE_ID, Some(Lsn(0x20)), &ctx)\n            .await\n            .unwrap();\n        {\n            let gc_info_parent = tline_parent.gc_info.read().unwrap();\n            assert_eq!(\n                gc_info_parent.retain_lsns,\n                vec![(Lsn(0x20), tline_child.timeline_id, MaybeOffloaded::No)]\n            );\n        }\n        // We have to directly call the remote_client instead of using the archive function to avoid constructing broker client...\n        tline_child\n            .remote_client\n            .schedule_index_upload_for_timeline_archival_state(TimelineArchivalState::Archived)\n            .unwrap();\n        tline_child.remote_client.wait_completion().await.unwrap();\n        offload_timeline(&tenant, &tline_child)\n            .instrument(tracing::info_span!(parent: None, \"offload_test\", tenant_id=%\"test\", shard_id=%\"test\", timeline_id=%\"test\"))\n            .await.unwrap();\n        let child_timeline_id = tline_child.timeline_id;\n        Arc::try_unwrap(tline_child).unwrap();\n\n        {\n            let gc_info_parent = tline_parent.gc_info.read().unwrap();\n            assert_eq!(\n                gc_info_parent.retain_lsns,\n                vec![(Lsn(0x20), child_timeline_id, MaybeOffloaded::Yes)]\n            );\n        }\n\n        tenant\n            .get_offloaded_timeline(child_timeline_id)\n            .unwrap()\n            .defuse_for_tenant_drop();\n\n        Ok(())\n    }\n\n    #[cfg(feature = \"testing\")]\n    #[tokio::test]\n    async fn test_simple_bottom_most_compaction_above_lsn() -> anyhow::Result<()> {\n        let harness = TenantHarness::create(\"test_simple_bottom_most_compaction_above_lsn\").await?;\n        let (tenant, ctx) = harness.load().await;\n\n        fn get_key(id: u32) -> Key {\n            // using aux key here b/c they are guaranteed to be inside `collect_keyspace`.\n            let mut key = Key::from_hex(\"620000000033333333444444445500000000\").unwrap();\n            key.field6 = id;\n            key\n        }\n\n        let img_layer = (0..10)\n            .map(|id| (get_key(id), Bytes::from(format!(\"value {id}@0x10\"))))\n            .collect_vec();\n\n        let delta1 = vec![(\n            get_key(1),\n            Lsn(0x20),\n            Value::WalRecord(NeonWalRecord::wal_append(\"@0x20\")),\n        )];\n        let delta4 = vec![(\n            get_key(1),\n            Lsn(0x28),\n            Value::WalRecord(NeonWalRecord::wal_append(\"@0x28\")),\n        )];\n        let delta2 = vec![\n            (\n                get_key(1),\n                Lsn(0x30),\n                Value::WalRecord(NeonWalRecord::wal_append(\"@0x30\")),\n            ),\n            (\n                get_key(1),\n                Lsn(0x38),\n                Value::WalRecord(NeonWalRecord::wal_append(\"@0x38\")),\n            ),\n        ];\n        let delta3 = vec![\n            (\n                get_key(8),\n                Lsn(0x48),\n                Value::WalRecord(NeonWalRecord::wal_append(\"@0x48\")),\n            ),\n            (\n                get_key(9),\n                Lsn(0x48),\n                Value::WalRecord(NeonWalRecord::wal_append(\"@0x48\")),\n            ),\n        ];\n\n        let tline = tenant\n            .create_test_timeline_with_layers(\n                TIMELINE_ID,\n                Lsn(0x10),\n                DEFAULT_PG_VERSION,\n                &ctx,\n                vec![], // in-memory layers\n                vec![\n                    // delta1/2/4 only contain a single key but multiple updates\n                    DeltaLayerTestDesc::new_with_inferred_key_range(Lsn(0x20)..Lsn(0x28), delta1),\n                    DeltaLayerTestDesc::new_with_inferred_key_range(Lsn(0x30)..Lsn(0x50), delta2),\n                    DeltaLayerTestDesc::new_with_inferred_key_range(Lsn(0x28)..Lsn(0x30), delta4),\n                    DeltaLayerTestDesc::new_with_inferred_key_range(Lsn(0x30)..Lsn(0x50), delta3),\n                ], // delta layers\n                vec![(Lsn(0x10), img_layer)], // image layers\n                Lsn(0x50),\n            )\n            .await?;\n        {\n            tline\n                .applied_gc_cutoff_lsn\n                .lock_for_write()\n                .store_and_unlock(Lsn(0x30))\n                .wait()\n                .await;\n            // Update GC info\n            let mut guard = tline.gc_info.write().unwrap();\n            *guard = GcInfo {\n                retain_lsns: vec![\n                    (Lsn(0x10), tline.timeline_id, MaybeOffloaded::No),\n                    (Lsn(0x20), tline.timeline_id, MaybeOffloaded::No),\n                ],\n                cutoffs: GcCutoffs {\n                    time: Some(Lsn(0x30)),\n                    space: Lsn(0x30),\n                },\n                leases: Default::default(),\n                within_ancestor_pitr: false,\n            };\n        }\n\n        let expected_result = [\n            Bytes::from_static(b\"value 0@0x10\"),\n            Bytes::from_static(b\"value 1@0x10@0x20@0x28@0x30@0x38\"),\n            Bytes::from_static(b\"value 2@0x10\"),\n            Bytes::from_static(b\"value 3@0x10\"),\n            Bytes::from_static(b\"value 4@0x10\"),\n            Bytes::from_static(b\"value 5@0x10\"),\n            Bytes::from_static(b\"value 6@0x10\"),\n            Bytes::from_static(b\"value 7@0x10\"),\n            Bytes::from_static(b\"value 8@0x10@0x48\"),\n            Bytes::from_static(b\"value 9@0x10@0x48\"),\n        ];\n\n        let expected_result_at_gc_horizon = [\n            Bytes::from_static(b\"value 0@0x10\"),\n            Bytes::from_static(b\"value 1@0x10@0x20@0x28@0x30\"),\n            Bytes::from_static(b\"value 2@0x10\"),\n            Bytes::from_static(b\"value 3@0x10\"),\n            Bytes::from_static(b\"value 4@0x10\"),\n            Bytes::from_static(b\"value 5@0x10\"),\n            Bytes::from_static(b\"value 6@0x10\"),\n            Bytes::from_static(b\"value 7@0x10\"),\n            Bytes::from_static(b\"value 8@0x10\"),\n            Bytes::from_static(b\"value 9@0x10\"),\n        ];\n\n        let expected_result_at_lsn_20 = [\n            Bytes::from_static(b\"value 0@0x10\"),\n            Bytes::from_static(b\"value 1@0x10@0x20\"),\n            Bytes::from_static(b\"value 2@0x10\"),\n            Bytes::from_static(b\"value 3@0x10\"),\n            Bytes::from_static(b\"value 4@0x10\"),\n            Bytes::from_static(b\"value 5@0x10\"),\n            Bytes::from_static(b\"value 6@0x10\"),\n            Bytes::from_static(b\"value 7@0x10\"),\n            Bytes::from_static(b\"value 8@0x10\"),\n            Bytes::from_static(b\"value 9@0x10\"),\n        ];\n\n        let expected_result_at_lsn_10 = [\n            Bytes::from_static(b\"value 0@0x10\"),\n            Bytes::from_static(b\"value 1@0x10\"),\n            Bytes::from_static(b\"value 2@0x10\"),\n            Bytes::from_static(b\"value 3@0x10\"),\n            Bytes::from_static(b\"value 4@0x10\"),\n            Bytes::from_static(b\"value 5@0x10\"),\n            Bytes::from_static(b\"value 6@0x10\"),\n            Bytes::from_static(b\"value 7@0x10\"),\n            Bytes::from_static(b\"value 8@0x10\"),\n            Bytes::from_static(b\"value 9@0x10\"),\n        ];\n\n        let verify_result = || async {\n            let gc_horizon = {\n                let gc_info = tline.gc_info.read().unwrap();\n                gc_info.cutoffs.time.unwrap_or_default()\n            };\n            for idx in 0..10 {\n                assert_eq!(\n                    tline\n                        .get(get_key(idx as u32), Lsn(0x50), &ctx)\n                        .await\n                        .unwrap(),\n                    &expected_result[idx]\n                );\n                assert_eq!(\n                    tline\n                        .get(get_key(idx as u32), gc_horizon, &ctx)\n                        .await\n                        .unwrap(),\n                    &expected_result_at_gc_horizon[idx]\n                );\n                assert_eq!(\n                    tline\n                        .get(get_key(idx as u32), Lsn(0x20), &ctx)\n                        .await\n                        .unwrap(),\n                    &expected_result_at_lsn_20[idx]\n                );\n                assert_eq!(\n                    tline\n                        .get(get_key(idx as u32), Lsn(0x10), &ctx)\n                        .await\n                        .unwrap(),\n                    &expected_result_at_lsn_10[idx]\n                );\n            }\n        };\n\n        verify_result().await;\n\n        let cancel = CancellationToken::new();\n        tline\n            .compact_with_gc(\n                &cancel,\n                CompactOptions {\n                    compact_lsn_range: Some(CompactLsnRange::above(Lsn(0x28))),\n                    ..CompactOptions::default_for_gc_compaction_unit_tests()\n                },\n                &ctx,\n            )\n            .await\n            .unwrap();\n        verify_result().await;\n\n        let all_layers = inspect_and_sort(&tline, Some(get_key(0)..get_key(10))).await;\n        check_layer_map_key_eq(\n            all_layers,\n            vec![\n                // The original image layer, not compacted\n                PersistentLayerKey {\n                    key_range: get_key(0)..get_key(10),\n                    lsn_range: Lsn(0x10)..Lsn(0x11),\n                    is_delta: false,\n                },\n                // Delta layer below the specified above_lsn not compacted\n                PersistentLayerKey {\n                    key_range: get_key(1)..get_key(2),\n                    lsn_range: Lsn(0x20)..Lsn(0x28),\n                    is_delta: true,\n                },\n                // Delta layer compacted above the LSN\n                PersistentLayerKey {\n                    key_range: get_key(1)..get_key(10),\n                    lsn_range: Lsn(0x28)..Lsn(0x50),\n                    is_delta: true,\n                },\n            ],\n        );\n\n        // compact again\n        tline\n            .compact_with_gc(\n                &cancel,\n                CompactOptions::default_for_gc_compaction_unit_tests(),\n                &ctx,\n            )\n            .await\n            .unwrap();\n        verify_result().await;\n\n        let all_layers = inspect_and_sort(&tline, Some(get_key(0)..get_key(10))).await;\n        check_layer_map_key_eq(\n            all_layers,\n            vec![\n                // The compacted image layer (full key range)\n                PersistentLayerKey {\n                    key_range: Key::MIN..Key::MAX,\n                    lsn_range: Lsn(0x10)..Lsn(0x11),\n                    is_delta: false,\n                },\n                // All other data in the delta layer\n                PersistentLayerKey {\n                    key_range: get_key(1)..get_key(10),\n                    lsn_range: Lsn(0x10)..Lsn(0x50),\n                    is_delta: true,\n                },\n            ],\n        );\n\n        Ok(())\n    }\n\n    #[cfg(feature = \"testing\")]\n    #[tokio::test]\n    async fn test_simple_bottom_most_compaction_rectangle() -> anyhow::Result<()> {\n        let harness = TenantHarness::create(\"test_simple_bottom_most_compaction_rectangle\").await?;\n        let (tenant, ctx) = harness.load().await;\n\n        fn get_key(id: u32) -> Key {\n            // using aux key here b/c they are guaranteed to be inside `collect_keyspace`.\n            let mut key = Key::from_hex(\"620000000033333333444444445500000000\").unwrap();\n            key.field6 = id;\n            key\n        }\n\n        let img_layer = (0..10)\n            .map(|id| (get_key(id), Bytes::from(format!(\"value {id}@0x10\"))))\n            .collect_vec();\n\n        let delta1 = vec![(\n            get_key(1),\n            Lsn(0x20),\n            Value::WalRecord(NeonWalRecord::wal_append(\"@0x20\")),\n        )];\n        let delta4 = vec![(\n            get_key(1),\n            Lsn(0x28),\n            Value::WalRecord(NeonWalRecord::wal_append(\"@0x28\")),\n        )];\n        let delta2 = vec![\n            (\n                get_key(1),\n                Lsn(0x30),\n                Value::WalRecord(NeonWalRecord::wal_append(\"@0x30\")),\n            ),\n            (\n                get_key(1),\n                Lsn(0x38),\n                Value::WalRecord(NeonWalRecord::wal_append(\"@0x38\")),\n            ),\n        ];\n        let delta3 = vec![\n            (\n                get_key(8),\n                Lsn(0x48),\n                Value::WalRecord(NeonWalRecord::wal_append(\"@0x48\")),\n            ),\n            (\n                get_key(9),\n                Lsn(0x48),\n                Value::WalRecord(NeonWalRecord::wal_append(\"@0x48\")),\n            ),\n        ];\n\n        let tline = tenant\n            .create_test_timeline_with_layers(\n                TIMELINE_ID,\n                Lsn(0x10),\n                DEFAULT_PG_VERSION,\n                &ctx,\n                vec![], // in-memory layers\n                vec![\n                    // delta1/2/4 only contain a single key but multiple updates\n                    DeltaLayerTestDesc::new_with_inferred_key_range(Lsn(0x20)..Lsn(0x28), delta1),\n                    DeltaLayerTestDesc::new_with_inferred_key_range(Lsn(0x30)..Lsn(0x50), delta2),\n                    DeltaLayerTestDesc::new_with_inferred_key_range(Lsn(0x28)..Lsn(0x30), delta4),\n                    DeltaLayerTestDesc::new_with_inferred_key_range(Lsn(0x30)..Lsn(0x50), delta3),\n                ], // delta layers\n                vec![(Lsn(0x10), img_layer)], // image layers\n                Lsn(0x50),\n            )\n            .await?;\n        {\n            tline\n                .applied_gc_cutoff_lsn\n                .lock_for_write()\n                .store_and_unlock(Lsn(0x30))\n                .wait()\n                .await;\n            // Update GC info\n            let mut guard = tline.gc_info.write().unwrap();\n            *guard = GcInfo {\n                retain_lsns: vec![\n                    (Lsn(0x10), tline.timeline_id, MaybeOffloaded::No),\n                    (Lsn(0x20), tline.timeline_id, MaybeOffloaded::No),\n                ],\n                cutoffs: GcCutoffs {\n                    time: Some(Lsn(0x30)),\n                    space: Lsn(0x30),\n                },\n                leases: Default::default(),\n                within_ancestor_pitr: false,\n            };\n        }\n\n        let expected_result = [\n            Bytes::from_static(b\"value 0@0x10\"),\n            Bytes::from_static(b\"value 1@0x10@0x20@0x28@0x30@0x38\"),\n            Bytes::from_static(b\"value 2@0x10\"),\n            Bytes::from_static(b\"value 3@0x10\"),\n            Bytes::from_static(b\"value 4@0x10\"),\n            Bytes::from_static(b\"value 5@0x10\"),\n            Bytes::from_static(b\"value 6@0x10\"),\n            Bytes::from_static(b\"value 7@0x10\"),\n            Bytes::from_static(b\"value 8@0x10@0x48\"),\n            Bytes::from_static(b\"value 9@0x10@0x48\"),\n        ];\n\n        let expected_result_at_gc_horizon = [\n            Bytes::from_static(b\"value 0@0x10\"),\n            Bytes::from_static(b\"value 1@0x10@0x20@0x28@0x30\"),\n            Bytes::from_static(b\"value 2@0x10\"),\n            Bytes::from_static(b\"value 3@0x10\"),\n            Bytes::from_static(b\"value 4@0x10\"),\n            Bytes::from_static(b\"value 5@0x10\"),\n            Bytes::from_static(b\"value 6@0x10\"),\n            Bytes::from_static(b\"value 7@0x10\"),\n            Bytes::from_static(b\"value 8@0x10\"),\n            Bytes::from_static(b\"value 9@0x10\"),\n        ];\n\n        let expected_result_at_lsn_20 = [\n            Bytes::from_static(b\"value 0@0x10\"),\n            Bytes::from_static(b\"value 1@0x10@0x20\"),\n            Bytes::from_static(b\"value 2@0x10\"),\n            Bytes::from_static(b\"value 3@0x10\"),\n            Bytes::from_static(b\"value 4@0x10\"),\n            Bytes::from_static(b\"value 5@0x10\"),\n            Bytes::from_static(b\"value 6@0x10\"),\n            Bytes::from_static(b\"value 7@0x10\"),\n            Bytes::from_static(b\"value 8@0x10\"),\n            Bytes::from_static(b\"value 9@0x10\"),\n        ];\n\n        let expected_result_at_lsn_10 = [\n            Bytes::from_static(b\"value 0@0x10\"),\n            Bytes::from_static(b\"value 1@0x10\"),\n            Bytes::from_static(b\"value 2@0x10\"),\n            Bytes::from_static(b\"value 3@0x10\"),\n            Bytes::from_static(b\"value 4@0x10\"),\n            Bytes::from_static(b\"value 5@0x10\"),\n            Bytes::from_static(b\"value 6@0x10\"),\n            Bytes::from_static(b\"value 7@0x10\"),\n            Bytes::from_static(b\"value 8@0x10\"),\n            Bytes::from_static(b\"value 9@0x10\"),\n        ];\n\n        let verify_result = || async {\n            let gc_horizon = {\n                let gc_info = tline.gc_info.read().unwrap();\n                gc_info.cutoffs.time.unwrap_or_default()\n            };\n            for idx in 0..10 {\n                assert_eq!(\n                    tline\n                        .get(get_key(idx as u32), Lsn(0x50), &ctx)\n                        .await\n                        .unwrap(),\n                    &expected_result[idx]\n                );\n                assert_eq!(\n                    tline\n                        .get(get_key(idx as u32), gc_horizon, &ctx)\n                        .await\n                        .unwrap(),\n                    &expected_result_at_gc_horizon[idx]\n                );\n                assert_eq!(\n                    tline\n                        .get(get_key(idx as u32), Lsn(0x20), &ctx)\n                        .await\n                        .unwrap(),\n                    &expected_result_at_lsn_20[idx]\n                );\n                assert_eq!(\n                    tline\n                        .get(get_key(idx as u32), Lsn(0x10), &ctx)\n                        .await\n                        .unwrap(),\n                    &expected_result_at_lsn_10[idx]\n                );\n            }\n        };\n\n        verify_result().await;\n\n        let cancel = CancellationToken::new();\n\n        tline\n            .compact_with_gc(\n                &cancel,\n                CompactOptions {\n                    compact_key_range: Some((get_key(0)..get_key(2)).into()),\n                    compact_lsn_range: Some((Lsn(0x20)..Lsn(0x28)).into()),\n                    ..CompactOptions::default_for_gc_compaction_unit_tests()\n                },\n                &ctx,\n            )\n            .await\n            .unwrap();\n        verify_result().await;\n\n        let all_layers = inspect_and_sort(&tline, Some(get_key(0)..get_key(10))).await;\n        check_layer_map_key_eq(\n            all_layers,\n            vec![\n                // The original image layer, not compacted\n                PersistentLayerKey {\n                    key_range: get_key(0)..get_key(10),\n                    lsn_range: Lsn(0x10)..Lsn(0x11),\n                    is_delta: false,\n                },\n                // According the selection logic, we select all layers with start key <= 0x28, so we would merge the layer 0x20-0x28 and\n                // the layer 0x28-0x30 into one.\n                PersistentLayerKey {\n                    key_range: get_key(1)..get_key(2),\n                    lsn_range: Lsn(0x20)..Lsn(0x30),\n                    is_delta: true,\n                },\n                // Above the upper bound and untouched\n                PersistentLayerKey {\n                    key_range: get_key(1)..get_key(2),\n                    lsn_range: Lsn(0x30)..Lsn(0x50),\n                    is_delta: true,\n                },\n                // This layer is untouched\n                PersistentLayerKey {\n                    key_range: get_key(8)..get_key(10),\n                    lsn_range: Lsn(0x30)..Lsn(0x50),\n                    is_delta: true,\n                },\n            ],\n        );\n\n        tline\n            .compact_with_gc(\n                &cancel,\n                CompactOptions {\n                    compact_key_range: Some((get_key(3)..get_key(8)).into()),\n                    compact_lsn_range: Some((Lsn(0x28)..Lsn(0x40)).into()),\n                    ..CompactOptions::default_for_gc_compaction_unit_tests()\n                },\n                &ctx,\n            )\n            .await\n            .unwrap();\n        verify_result().await;\n\n        let all_layers = inspect_and_sort(&tline, Some(get_key(0)..get_key(10))).await;\n        check_layer_map_key_eq(\n            all_layers,\n            vec![\n                // The original image layer, not compacted\n                PersistentLayerKey {\n                    key_range: get_key(0)..get_key(10),\n                    lsn_range: Lsn(0x10)..Lsn(0x11),\n                    is_delta: false,\n                },\n                // Not in the compaction key range, uncompacted\n                PersistentLayerKey {\n                    key_range: get_key(1)..get_key(2),\n                    lsn_range: Lsn(0x20)..Lsn(0x30),\n                    is_delta: true,\n                },\n                // Not in the compaction key range, uncompacted but need rewrite because the delta layer overlaps with the range\n                PersistentLayerKey {\n                    key_range: get_key(1)..get_key(2),\n                    lsn_range: Lsn(0x30)..Lsn(0x50),\n                    is_delta: true,\n                },\n                // Note that when we specify the LSN upper bound to be 0x40, the compaction algorithm will not try to cut the layer\n                // horizontally in half. Instead, it will include all LSNs that overlap with 0x40. So the real max_lsn of the compaction\n                // becomes 0x50.\n                PersistentLayerKey {\n                    key_range: get_key(8)..get_key(10),\n                    lsn_range: Lsn(0x30)..Lsn(0x50),\n                    is_delta: true,\n                },\n            ],\n        );\n\n        // compact again\n        tline\n            .compact_with_gc(\n                &cancel,\n                CompactOptions {\n                    compact_key_range: Some((get_key(0)..get_key(5)).into()),\n                    compact_lsn_range: Some((Lsn(0x20)..Lsn(0x50)).into()),\n                    ..CompactOptions::default_for_gc_compaction_unit_tests()\n                },\n                &ctx,\n            )\n            .await\n            .unwrap();\n        verify_result().await;\n\n        let all_layers = inspect_and_sort(&tline, Some(get_key(0)..get_key(10))).await;\n        check_layer_map_key_eq(\n            all_layers,\n            vec![\n                // The original image layer, not compacted\n                PersistentLayerKey {\n                    key_range: get_key(0)..get_key(10),\n                    lsn_range: Lsn(0x10)..Lsn(0x11),\n                    is_delta: false,\n                },\n                // The range gets compacted\n                PersistentLayerKey {\n                    key_range: get_key(1)..get_key(2),\n                    lsn_range: Lsn(0x20)..Lsn(0x50),\n                    is_delta: true,\n                },\n                // Not touched during this iteration of compaction\n                PersistentLayerKey {\n                    key_range: get_key(8)..get_key(10),\n                    lsn_range: Lsn(0x30)..Lsn(0x50),\n                    is_delta: true,\n                },\n            ],\n        );\n\n        // final full compaction\n        tline\n            .compact_with_gc(\n                &cancel,\n                CompactOptions::default_for_gc_compaction_unit_tests(),\n                &ctx,\n            )\n            .await\n            .unwrap();\n        verify_result().await;\n\n        let all_layers = inspect_and_sort(&tline, Some(get_key(0)..get_key(10))).await;\n        check_layer_map_key_eq(\n            all_layers,\n            vec![\n                // The compacted image layer (full key range)\n                PersistentLayerKey {\n                    key_range: Key::MIN..Key::MAX,\n                    lsn_range: Lsn(0x10)..Lsn(0x11),\n                    is_delta: false,\n                },\n                // All other data in the delta layer\n                PersistentLayerKey {\n                    key_range: get_key(1)..get_key(10),\n                    lsn_range: Lsn(0x10)..Lsn(0x50),\n                    is_delta: true,\n                },\n            ],\n        );\n\n        Ok(())\n    }\n\n    #[cfg(feature = \"testing\")]\n    #[tokio::test]\n    async fn test_bottom_most_compation_redo_failure() -> anyhow::Result<()> {\n        let harness = TenantHarness::create(\"test_bottom_most_compation_redo_failure\").await?;\n        let (tenant, ctx) = harness.load().await;\n\n        fn get_key(id: u32) -> Key {\n            // using aux key here b/c they are guaranteed to be inside `collect_keyspace`.\n            let mut key = Key::from_hex(\"620000000033333333444444445500000000\").unwrap();\n            key.field6 = id;\n            key\n        }\n\n        let img_layer = (0..10)\n            .map(|id| (get_key(id), Bytes::from(format!(\"value {id}@0x10\"))))\n            .collect_vec();\n\n        let delta1 = vec![\n            (\n                get_key(1),\n                Lsn(0x20),\n                Value::WalRecord(NeonWalRecord::wal_append(\"@0x20\")),\n            ),\n            (\n                get_key(1),\n                Lsn(0x24),\n                Value::WalRecord(NeonWalRecord::wal_append(\"@0x24\")),\n            ),\n            (\n                get_key(1),\n                Lsn(0x28),\n                // This record will fail to redo\n                Value::WalRecord(NeonWalRecord::wal_append_conditional(\"@0x28\", \"???\")),\n            ),\n        ];\n\n        let tline = tenant\n            .create_test_timeline_with_layers(\n                TIMELINE_ID,\n                Lsn(0x10),\n                DEFAULT_PG_VERSION,\n                &ctx,\n                vec![], // in-memory layers\n                vec![DeltaLayerTestDesc::new_with_inferred_key_range(\n                    Lsn(0x20)..Lsn(0x30),\n                    delta1,\n                )], // delta layers\n                vec![(Lsn(0x10), img_layer)], // image layers\n                Lsn(0x50),\n            )\n            .await?;\n        {\n            tline\n                .applied_gc_cutoff_lsn\n                .lock_for_write()\n                .store_and_unlock(Lsn(0x30))\n                .wait()\n                .await;\n            // Update GC info\n            let mut guard = tline.gc_info.write().unwrap();\n            *guard = GcInfo {\n                retain_lsns: vec![],\n                cutoffs: GcCutoffs {\n                    time: Some(Lsn(0x30)),\n                    space: Lsn(0x30),\n                },\n                leases: Default::default(),\n                within_ancestor_pitr: false,\n            };\n        }\n\n        let cancel = CancellationToken::new();\n\n        // Compaction will fail, but should not fire any critical error.\n        // Gc-compaction currently cannot figure out what keys are not in the keyspace during the compaction\n        // process. It will always try to redo the logs it reads and if it doesn't work, fail the entire\n        // compaction job. Tracked in <https://github.com/neondatabase/neon/issues/10395>.\n        let res = tline\n            .compact_with_gc(\n                &cancel,\n                CompactOptions {\n                    compact_key_range: None,\n                    compact_lsn_range: None,\n                    ..CompactOptions::default_for_gc_compaction_unit_tests()\n                },\n                &ctx,\n            )\n            .await;\n        assert!(res.is_err());\n\n        Ok(())\n    }\n\n    #[cfg(feature = \"testing\")]\n    #[tokio::test]\n    async fn test_synthetic_size_calculation_with_invisible_branches() -> anyhow::Result<()> {\n        use pageserver_api::models::TimelineVisibilityState;\n\n        use crate::tenant::size::gather_inputs;\n\n        let tenant_conf = pageserver_api::models::TenantConfig {\n            // Ensure that we don't compute gc_cutoffs (which needs reading the layer files)\n            pitr_interval: Some(Duration::ZERO),\n            ..Default::default()\n        };\n        let harness = TenantHarness::create_custom(\n            \"test_synthetic_size_calculation_with_invisible_branches\",\n            tenant_conf,\n            TenantId::generate(),\n            ShardIdentity::unsharded(),\n            Generation::new(0xdeadbeef),\n        )\n        .await?;\n        let (tenant, ctx) = harness.load().await;\n        let main_tline = tenant\n            .create_test_timeline_with_layers(\n                TIMELINE_ID,\n                Lsn(0x10),\n                DEFAULT_PG_VERSION,\n                &ctx,\n                vec![],\n                vec![],\n                vec![],\n                Lsn(0x100),\n            )\n            .await?;\n\n        let snapshot1 = TimelineId::from_array(hex!(\"11223344556677881122334455667790\"));\n        tenant\n            .branch_timeline_test_with_layers(\n                &main_tline,\n                snapshot1,\n                Some(Lsn(0x20)),\n                &ctx,\n                vec![],\n                vec![],\n                Lsn(0x50),\n            )\n            .await?;\n        let snapshot2 = TimelineId::from_array(hex!(\"11223344556677881122334455667791\"));\n        tenant\n            .branch_timeline_test_with_layers(\n                &main_tline,\n                snapshot2,\n                Some(Lsn(0x30)),\n                &ctx,\n                vec![],\n                vec![],\n                Lsn(0x50),\n            )\n            .await?;\n        let snapshot3 = TimelineId::from_array(hex!(\"11223344556677881122334455667792\"));\n        tenant\n            .branch_timeline_test_with_layers(\n                &main_tline,\n                snapshot3,\n                Some(Lsn(0x40)),\n                &ctx,\n                vec![],\n                vec![],\n                Lsn(0x50),\n            )\n            .await?;\n        let limit = Arc::new(Semaphore::new(1));\n        let max_retention_period = None;\n        let mut logical_size_cache = HashMap::new();\n        let cause = LogicalSizeCalculationCause::EvictionTaskImitation;\n        let cancel = CancellationToken::new();\n\n        let inputs = gather_inputs(\n            &tenant,\n            &limit,\n            max_retention_period,\n            &mut logical_size_cache,\n            cause,\n            &cancel,\n            &ctx,\n        )\n        .instrument(info_span!(\n            \"gather_inputs\",\n            tenant_id = \"unknown\",\n            shard_id = \"unknown\",\n        ))\n        .await?;\n        use crate::tenant::size::{LsnKind, ModelInputs, SegmentMeta};\n        use LsnKind::*;\n        use tenant_size_model::Segment;\n        let ModelInputs { mut segments, .. } = inputs;\n        segments.retain(|s| s.timeline_id == TIMELINE_ID);\n        for segment in segments.iter_mut() {\n            segment.segment.parent = None; // We don't care about the parent for the test\n            segment.segment.size = None; // We don't care about the size for the test\n        }\n        assert_eq!(\n            segments,\n            [\n                SegmentMeta {\n                    segment: Segment {\n                        parent: None,\n                        lsn: 0x10,\n                        size: None,\n                        needed: false,\n                    },\n                    timeline_id: TIMELINE_ID,\n                    kind: BranchStart,\n                },\n                SegmentMeta {\n                    segment: Segment {\n                        parent: None,\n                        lsn: 0x20,\n                        size: None,\n                        needed: false,\n                    },\n                    timeline_id: TIMELINE_ID,\n                    kind: BranchPoint,\n                },\n                SegmentMeta {\n                    segment: Segment {\n                        parent: None,\n                        lsn: 0x30,\n                        size: None,\n                        needed: false,\n                    },\n                    timeline_id: TIMELINE_ID,\n                    kind: BranchPoint,\n                },\n                SegmentMeta {\n                    segment: Segment {\n                        parent: None,\n                        lsn: 0x40,\n                        size: None,\n                        needed: false,\n                    },\n                    timeline_id: TIMELINE_ID,\n                    kind: BranchPoint,\n                },\n                SegmentMeta {\n                    segment: Segment {\n                        parent: None,\n                        lsn: 0x100,\n                        size: None,\n                        needed: false,\n                    },\n                    timeline_id: TIMELINE_ID,\n                    kind: GcCutOff,\n                }, // we need to retain everything above the last branch point\n                SegmentMeta {\n                    segment: Segment {\n                        parent: None,\n                        lsn: 0x100,\n                        size: None,\n                        needed: true,\n                    },\n                    timeline_id: TIMELINE_ID,\n                    kind: BranchEnd,\n                },\n            ]\n        );\n\n        main_tline\n            .remote_client\n            .schedule_index_upload_for_timeline_invisible_state(\n                TimelineVisibilityState::Invisible,\n            )?;\n        main_tline.remote_client.wait_completion().await?;\n        let inputs = gather_inputs(\n            &tenant,\n            &limit,\n            max_retention_period,\n            &mut logical_size_cache,\n            cause,\n            &cancel,\n            &ctx,\n        )\n        .instrument(info_span!(\n            \"gather_inputs\",\n            tenant_id = \"unknown\",\n            shard_id = \"unknown\",\n        ))\n        .await?;\n        let ModelInputs { mut segments, .. } = inputs;\n        segments.retain(|s| s.timeline_id == TIMELINE_ID);\n        for segment in segments.iter_mut() {\n            segment.segment.parent = None; // We don't care about the parent for the test\n            segment.segment.size = None; // We don't care about the size for the test\n        }\n        assert_eq!(\n            segments,\n            [\n                SegmentMeta {\n                    segment: Segment {\n                        parent: None,\n                        lsn: 0x10,\n                        size: None,\n                        needed: false,\n                    },\n                    timeline_id: TIMELINE_ID,\n                    kind: BranchStart,\n                },\n                SegmentMeta {\n                    segment: Segment {\n                        parent: None,\n                        lsn: 0x20,\n                        size: None,\n                        needed: false,\n                    },\n                    timeline_id: TIMELINE_ID,\n                    kind: BranchPoint,\n                },\n                SegmentMeta {\n                    segment: Segment {\n                        parent: None,\n                        lsn: 0x30,\n                        size: None,\n                        needed: false,\n                    },\n                    timeline_id: TIMELINE_ID,\n                    kind: BranchPoint,\n                },\n                SegmentMeta {\n                    segment: Segment {\n                        parent: None,\n                        lsn: 0x40,\n                        size: None,\n                        needed: false,\n                    },\n                    timeline_id: TIMELINE_ID,\n                    kind: BranchPoint,\n                },\n                SegmentMeta {\n                    segment: Segment {\n                        parent: None,\n                        lsn: 0x40, // Branch end LSN == last branch point LSN\n                        size: None,\n                        needed: true,\n                    },\n                    timeline_id: TIMELINE_ID,\n                    kind: BranchEnd,\n                },\n            ]\n        );\n\n        Ok(())\n    }\n\n    #[tokio::test]\n    async fn test_get_force_image_creation_lsn() -> anyhow::Result<()> {\n        let tenant_conf = pageserver_api::models::TenantConfig {\n            pitr_interval: Some(Duration::from_secs(7 * 3600)),\n            image_layer_force_creation_period: Some(Duration::from_secs(3600)),\n            ..Default::default()\n        };\n\n        let tenant_id = TenantId::generate();\n\n        let harness = TenantHarness::create_custom(\n            \"test_get_force_image_creation_lsn\",\n            tenant_conf,\n            tenant_id,\n            ShardIdentity::unsharded(),\n            Generation::new(1),\n        )\n        .await?;\n        let (tenant, ctx) = harness.load().await;\n        let timeline = tenant\n            .create_test_timeline(TIMELINE_ID, Lsn(0x10), DEFAULT_PG_VERSION, &ctx)\n            .await?;\n        timeline.gc_info.write().unwrap().cutoffs.time = Some(Lsn(100));\n        {\n            let writer = timeline.writer().await;\n            writer.finish_write(Lsn(5000));\n        }\n\n        let image_creation_lsn = timeline.get_force_image_creation_lsn().unwrap();\n        assert_eq!(image_creation_lsn, Lsn(4300));\n        Ok(())\n    }\n}\n"
  },
  {
    "path": "pageserver/src/utilization.rs",
    "content": "//! An utilization metric which is used to decide on which pageserver to put next tenant.\n//!\n//! The metric is exposed via `GET /v1/utilization`. Refer and maintain its openapi spec as the\n//! truth.\n\nuse std::path::Path;\n\nuse anyhow::Context;\nuse pageserver_api::models::PageserverUtilization;\nuse utils::serde_percent::Percent;\n\nuse crate::config::PageServerConf;\nuse crate::metrics::NODE_UTILIZATION_SCORE;\nuse crate::tenant::mgr::TenantManager;\n\npub(crate) fn regenerate(\n    conf: &PageServerConf,\n    tenants_path: &Path,\n    tenant_manager: &TenantManager,\n) -> anyhow::Result<PageserverUtilization> {\n    let statvfs = nix::sys::statvfs::statvfs(tenants_path)\n        .map_err(std::io::Error::from)\n        .context(\"statvfs tenants directory\")?;\n\n    // https://unix.stackexchange.com/a/703650\n    let blocksz = if statvfs.fragment_size() > 0 {\n        statvfs.fragment_size()\n    } else {\n        statvfs.block_size()\n    };\n\n    #[cfg_attr(not(target_os = \"macos\"), allow(clippy::unnecessary_cast))]\n    let free = statvfs.blocks_available() as u64 * blocksz;\n\n    #[cfg_attr(not(target_os = \"macos\"), allow(clippy::unnecessary_cast))]\n    let used = statvfs\n        .blocks()\n        // use blocks_free instead of available here to match df in case someone compares\n        .saturating_sub(statvfs.blocks_free()) as u64\n        * blocksz;\n\n    let captured_at = std::time::SystemTime::now();\n\n    // Calculate aggregate utilization from tenants on this pageserver\n    let (disk_wanted_bytes, shard_count) = tenant_manager.calculate_utilization()?;\n\n    // Fetch the fraction of disk space which may be used\n    let disk_usable_pct = if conf.disk_usage_based_eviction.enabled {\n        conf.disk_usage_based_eviction.max_usage_pct\n    } else {\n        Percent::new(100).unwrap()\n    };\n\n    // Express a static value for how many shards we may schedule on one node\n    const MAX_SHARDS: u32 = 2500;\n\n    let mut doc = PageserverUtilization {\n        disk_usage_bytes: used,\n        free_space_bytes: free,\n        disk_wanted_bytes,\n        disk_usable_pct,\n        shard_count,\n        max_shard_count: MAX_SHARDS,\n        utilization_score: None,\n        captured_at: utils::serde_system_time::SystemTime(captured_at),\n    };\n\n    // Initialize `PageserverUtilization::utilization_score`\n    let score = doc.cached_score();\n    NODE_UTILIZATION_SCORE.set(score);\n\n    Ok(doc)\n}\n"
  },
  {
    "path": "pageserver/src/virtual_file/io_engine/tokio_epoll_uring_ext.rs",
    "content": "//! Like [`::tokio_epoll_uring::thread_local_system()`], but with pageserver-specific\n//! handling in case the instance can't launched.\n//!\n//! This is primarily necessary due to ENOMEM aka OutOfMemory errors during io_uring creation\n//! on older kernels, such as some (but not all) older kernels in the Linux 5.10 series.\n//! See <https://github.com/neondatabase/neon/issues/6373#issuecomment-1905814391> for more details.\n\nuse std::sync::Arc;\nuse std::sync::atomic::{AtomicU32, AtomicU64, Ordering};\n\nuse tokio_epoll_uring::{System, SystemHandle};\nuse tokio_util::sync::CancellationToken;\nuse tracing::{Instrument, error, info, info_span, warn};\nuse utils::backoff::{DEFAULT_BASE_BACKOFF_SECONDS, DEFAULT_MAX_BACKOFF_SECONDS};\n\nuse crate::metrics::tokio_epoll_uring::{self as metrics, THREAD_LOCAL_METRICS_STORAGE};\nuse crate::virtual_file::on_fatal_io_error;\n\n#[derive(Clone)]\nstruct ThreadLocalState(Arc<ThreadLocalStateInner>);\n\nstruct ThreadLocalStateInner {\n    cell: tokio::sync::OnceCell<SystemHandle<metrics::ThreadLocalMetrics>>,\n    launch_attempts: AtomicU32,\n    /// populated through fetch_add from [`THREAD_LOCAL_STATE_ID`]\n    thread_local_state_id: u64,\n}\n\nimpl Drop for ThreadLocalStateInner {\n    fn drop(&mut self) {\n        THREAD_LOCAL_METRICS_STORAGE.remove_system(self.thread_local_state_id);\n    }\n}\n\nimpl ThreadLocalState {\n    pub fn new() -> Self {\n        Self(Arc::new(ThreadLocalStateInner {\n            cell: tokio::sync::OnceCell::default(),\n            launch_attempts: AtomicU32::new(0),\n            thread_local_state_id: THREAD_LOCAL_STATE_ID.fetch_add(1, Ordering::Relaxed),\n        }))\n    }\n\n    pub fn make_id_string(&self) -> String {\n        format!(\"{}\", self.0.thread_local_state_id)\n    }\n}\n\nstatic THREAD_LOCAL_STATE_ID: AtomicU64 = AtomicU64::new(0);\n\nthread_local! {\n    static THREAD_LOCAL: ThreadLocalState = ThreadLocalState::new();\n}\n\n/// Panics if we cannot [`System::launch`].\npub async fn thread_local_system() -> Handle {\n    let fake_cancel = CancellationToken::new();\n    loop {\n        let thread_local_state = THREAD_LOCAL.with(|arc| arc.clone());\n        let inner = &thread_local_state.0;\n        let get_or_init_res = inner\n            .cell\n            .get_or_try_init(|| async {\n                let attempt_no = inner\n                    .launch_attempts\n                    .fetch_add(1, std::sync::atomic::Ordering::Relaxed);\n                let span = info_span!(\"tokio_epoll_uring_ext::thread_local_system\", thread_local=%thread_local_state.make_id_string(), %attempt_no);\n                async {\n                    // Rate-limit retries per thread-local.\n                    // NB: doesn't yield to executor at attempt_no=0.\n                    utils::backoff::exponential_backoff(\n                        attempt_no,\n                        DEFAULT_BASE_BACKOFF_SECONDS,\n                        DEFAULT_MAX_BACKOFF_SECONDS,\n                        &fake_cancel,\n                    )\n                    .await;\n                    let per_system_metrics = metrics::THREAD_LOCAL_METRICS_STORAGE.register_system(inner.thread_local_state_id);\n                    let res = System::launch_with_metrics(per_system_metrics)\n                    // this might move us to another executor thread => loop outside the get_or_try_init, not inside it\n                    .await;\n                    match res {\n                        Ok(system) => {\n                            info!(\"successfully launched system\");\n                            metrics::THREAD_LOCAL_LAUNCH_SUCCESSES.inc();\n                            Ok(system)\n                        }\n                        Err(tokio_epoll_uring::LaunchResult::IoUringBuild(e)) if e.kind() == std::io::ErrorKind::OutOfMemory => {\n                            warn!(\"not enough locked memory to tokio-epoll-uring, will retry\");\n                            info_span!(\"stats\").in_scope(|| {\n                                emit_launch_failure_process_stats();\n                            });\n                            metrics::THREAD_LOCAL_LAUNCH_FAILURES.inc();\n                            metrics::THREAD_LOCAL_METRICS_STORAGE.remove_system(inner.thread_local_state_id);\n                            Err(())\n                        }\n                        // abort the process instead of panicking because pageserver usually becomes half-broken if we panic somewhere.\n                        // This is equivalent to a fatal IO error.\n                        Err(ref e @ tokio_epoll_uring::LaunchResult::IoUringBuild(ref inner)) => {\n                            error!(error=%e, \"failed to launch thread-local tokio-epoll-uring, this should not happen, aborting process\");\n                            info_span!(\"stats\").in_scope(|| {\n                                emit_launch_failure_process_stats();\n                            });\n                            on_fatal_io_error(inner, \"launch thread-local tokio-epoll-uring\");\n                        },\n                    }\n                }\n                .instrument(span)\n                .await\n            })\n            .await;\n        if get_or_init_res.is_ok() {\n            return Handle(thread_local_state);\n        }\n    }\n}\n\nfn emit_launch_failure_process_stats() {\n    // tokio-epoll-uring stats\n    // vmlck + rlimit\n    // number of threads\n    // rss / system memory usage generally\n\n    let tokio_epoll_uring::metrics::GlobalMetrics {\n        systems_created,\n        systems_destroyed,\n    } = tokio_epoll_uring::metrics::global();\n    info!(systems_created, systems_destroyed, \"tokio-epoll-uring\");\n\n    match procfs::process::Process::myself() {\n        Ok(myself) => {\n            match myself.limits() {\n                Ok(limits) => {\n                    info!(?limits.max_locked_memory, \"/proc/self/limits\");\n                }\n                Err(error) => {\n                    info!(%error, \"no limit stats due to error\");\n                }\n            }\n\n            match myself.status() {\n                Ok(status) => {\n                    let procfs::process::Status {\n                        vmsize,\n                        vmlck,\n                        vmpin,\n                        vmrss,\n                        rssanon,\n                        rssfile,\n                        rssshmem,\n                        vmdata,\n                        vmstk,\n                        vmexe,\n                        vmlib,\n                        vmpte,\n                        threads,\n                        ..\n                    } = status;\n                    info!(\n                        vmsize,\n                        vmlck,\n                        vmpin,\n                        vmrss,\n                        rssanon,\n                        rssfile,\n                        rssshmem,\n                        vmdata,\n                        vmstk,\n                        vmexe,\n                        vmlib,\n                        vmpte,\n                        threads,\n                        \"/proc/self/status\"\n                    );\n                }\n                Err(error) => {\n                    info!(%error, \"no status status due to error\");\n                }\n            }\n        }\n        Err(error) => {\n            info!(%error, \"no process stats due to error\");\n        }\n    };\n}\n\n#[derive(Clone)]\npub struct Handle(ThreadLocalState);\n\nimpl std::ops::Deref for Handle {\n    type Target = SystemHandle<metrics::ThreadLocalMetrics>;\n\n    fn deref(&self) -> &Self::Target {\n        self.0\n            .0\n            .cell\n            .get()\n            .expect(\"must be already initialized when using this\")\n    }\n}\n"
  },
  {
    "path": "pageserver/src/virtual_file/io_engine.rs",
    "content": "//! [`super::VirtualFile`] supports different IO engines.\n//!\n//! The [`IoEngineKind`] enum identifies them.\n//!\n//! The choice of IO engine is global.\n//! Initialize using [`init`].\n//!\n//! Then use [`get`] and  [`super::OpenOptions`].\n//!\n//!\n\n#[cfg(target_os = \"linux\")]\npub(super) mod tokio_epoll_uring_ext;\n\nuse tokio_epoll_uring::IoBuf;\nuse tracing::Instrument;\n\npub(crate) use super::api::IoEngineKind;\n#[derive(Clone, Copy)]\n#[repr(u8)]\npub(crate) enum IoEngine {\n    NotSet,\n    StdFs,\n    #[cfg(target_os = \"linux\")]\n    TokioEpollUring,\n}\n\nimpl From<IoEngineKind> for IoEngine {\n    fn from(value: IoEngineKind) -> Self {\n        match value {\n            IoEngineKind::StdFs => IoEngine::StdFs,\n            #[cfg(target_os = \"linux\")]\n            IoEngineKind::TokioEpollUring => IoEngine::TokioEpollUring,\n        }\n    }\n}\n\nimpl TryFrom<u8> for IoEngine {\n    type Error = u8;\n\n    fn try_from(value: u8) -> Result<Self, Self::Error> {\n        Ok(match value {\n            v if v == (IoEngine::NotSet as u8) => IoEngine::NotSet,\n            v if v == (IoEngine::StdFs as u8) => IoEngine::StdFs,\n            #[cfg(target_os = \"linux\")]\n            v if v == (IoEngine::TokioEpollUring as u8) => IoEngine::TokioEpollUring,\n            x => return Err(x),\n        })\n    }\n}\n\nstatic IO_ENGINE: AtomicU8 = AtomicU8::new(IoEngine::NotSet as u8);\n\npub(crate) fn set(engine_kind: IoEngineKind) {\n    let engine: IoEngine = engine_kind.into();\n    IO_ENGINE.store(engine as u8, std::sync::atomic::Ordering::Relaxed);\n    #[cfg(not(test))]\n    {\n        let metric = &crate::metrics::virtual_file_io_engine::KIND;\n        metric.reset();\n        metric\n            .with_label_values(&[&format!(\"{engine_kind}\")])\n            .set(1);\n    }\n}\n\n#[cfg(not(test))]\npub(super) fn init(engine_kind: IoEngineKind) {\n    set(engine_kind);\n}\n\n/// Longer-term, this API should only be used by [`super::VirtualFile`].\npub(crate) fn get() -> IoEngine {\n    let cur = IoEngine::try_from(IO_ENGINE.load(Ordering::Relaxed)).unwrap();\n    if cfg!(test) {\n        let env_var_name = \"NEON_PAGESERVER_UNIT_TEST_VIRTUAL_FILE_IOENGINE\";\n        match cur {\n            IoEngine::NotSet => {\n                let kind = match std::env::var(env_var_name) {\n                    Ok(v) => match v.parse::<IoEngineKind>() {\n                        Ok(engine_kind) => engine_kind,\n                        Err(e) => {\n                            panic!(\n                                \"invalid VirtualFile io engine for env var {env_var_name}: {e:#}: {v:?}\"\n                            )\n                        }\n                    },\n                    Err(std::env::VarError::NotPresent) => {\n                        #[cfg(target_os = \"linux\")]\n                        {\n                            IoEngineKind::TokioEpollUring\n                        }\n                        #[cfg(not(target_os = \"linux\"))]\n                        {\n                            IoEngineKind::StdFs\n                        }\n                    }\n                    Err(std::env::VarError::NotUnicode(_)) => {\n                        panic!(\"env var {env_var_name} is not unicode\");\n                    }\n                };\n                self::set(kind);\n                self::get()\n            }\n            x => x,\n        }\n    } else {\n        cur\n    }\n}\n\nuse std::os::unix::prelude::FileExt;\nuse std::sync::atomic::{AtomicU8, Ordering};\n#[cfg(target_os = \"linux\")]\nuse {std::time::Duration, tracing::info};\n\nuse super::owned_buffers_io::io_buf_ext::FullSlice;\nuse super::owned_buffers_io::slice::SliceMutExt;\nuse super::{FileGuard, Metadata};\n\n#[cfg(target_os = \"linux\")]\npub(super) fn epoll_uring_error_to_std(\n    e: tokio_epoll_uring::Error<std::io::Error>,\n) -> std::io::Error {\n    match e {\n        tokio_epoll_uring::Error::Op(e) => e,\n        tokio_epoll_uring::Error::System(system) => std::io::Error::other(system),\n    }\n}\n\nimpl IoEngine {\n    pub(super) async fn read_at<Buf>(\n        &self,\n        file_guard: FileGuard,\n        offset: u64,\n        mut slice: tokio_epoll_uring::Slice<Buf>,\n    ) -> (\n        (FileGuard, tokio_epoll_uring::Slice<Buf>),\n        std::io::Result<usize>,\n    )\n    where\n        Buf: tokio_epoll_uring::IoBufMut + Send,\n    {\n        match self {\n            IoEngine::NotSet => panic!(\"not initialized\"),\n            IoEngine::StdFs => {\n                let rust_slice = slice.as_mut_rust_slice_full_zeroed();\n                let res = file_guard.with_std_file(|std_file| std_file.read_at(rust_slice, offset));\n                ((file_guard, slice), res)\n            }\n            #[cfg(target_os = \"linux\")]\n            IoEngine::TokioEpollUring => {\n                let system = tokio_epoll_uring_ext::thread_local_system().await;\n                let (resources, res) =\n                    retry_ecanceled_once((file_guard, slice), |(file_guard, slice)| async {\n                        system.read(file_guard, offset, slice).await\n                    })\n                    .await;\n                (resources, res.map_err(epoll_uring_error_to_std))\n            }\n        }\n    }\n    pub(super) async fn sync_all(&self, file_guard: FileGuard) -> (FileGuard, std::io::Result<()>) {\n        match self {\n            IoEngine::NotSet => panic!(\"not initialized\"),\n            IoEngine::StdFs => {\n                let res = file_guard.with_std_file(|std_file| std_file.sync_all());\n                (file_guard, res)\n            }\n            #[cfg(target_os = \"linux\")]\n            IoEngine::TokioEpollUring => {\n                let system = tokio_epoll_uring_ext::thread_local_system().await;\n                let (resources, res) = retry_ecanceled_once(file_guard, |file_guard| async {\n                    system.fsync(file_guard).await\n                })\n                .await;\n                (resources, res.map_err(epoll_uring_error_to_std))\n            }\n        }\n    }\n    pub(super) async fn sync_data(\n        &self,\n        file_guard: FileGuard,\n    ) -> (FileGuard, std::io::Result<()>) {\n        match self {\n            IoEngine::NotSet => panic!(\"not initialized\"),\n            IoEngine::StdFs => {\n                let res = file_guard.with_std_file(|std_file| std_file.sync_data());\n                (file_guard, res)\n            }\n            #[cfg(target_os = \"linux\")]\n            IoEngine::TokioEpollUring => {\n                let system = tokio_epoll_uring_ext::thread_local_system().await;\n                let (resources, res) = retry_ecanceled_once(file_guard, |file_guard| async {\n                    system.fdatasync(file_guard).await\n                })\n                .await;\n                (resources, res.map_err(epoll_uring_error_to_std))\n            }\n        }\n    }\n    pub(super) async fn metadata(\n        &self,\n        file_guard: FileGuard,\n    ) -> (FileGuard, std::io::Result<Metadata>) {\n        match self {\n            IoEngine::NotSet => panic!(\"not initialized\"),\n            IoEngine::StdFs => {\n                let res =\n                    file_guard.with_std_file(|std_file| std_file.metadata().map(Metadata::from));\n                (file_guard, res)\n            }\n            #[cfg(target_os = \"linux\")]\n            IoEngine::TokioEpollUring => {\n                let system = tokio_epoll_uring_ext::thread_local_system().await;\n                let (resources, res) = retry_ecanceled_once(file_guard, |file_guard| async {\n                    system.statx(file_guard).await\n                })\n                .await;\n                (\n                    resources,\n                    res.map_err(epoll_uring_error_to_std).map(Metadata::from),\n                )\n            }\n        }\n    }\n\n    pub(super) async fn set_len(\n        &self,\n        file_guard: FileGuard,\n        len: u64,\n    ) -> (FileGuard, std::io::Result<()>) {\n        match self {\n            IoEngine::NotSet => panic!(\"not initialized\"),\n            IoEngine::StdFs => {\n                let res = file_guard.with_std_file(|std_file| std_file.set_len(len));\n                (file_guard, res)\n            }\n            #[cfg(target_os = \"linux\")]\n            IoEngine::TokioEpollUring => {\n                // TODO: ftruncate op for tokio-epoll-uring\n                // Don't forget to use retry_ecanceled_once\n                let res = file_guard.with_std_file(|std_file| std_file.set_len(len));\n                (file_guard, res)\n            }\n        }\n    }\n\n    pub(super) async fn write_at<B: IoBuf + Send>(\n        &self,\n        file_guard: FileGuard,\n        offset: u64,\n        buf: FullSlice<B>,\n    ) -> ((FileGuard, FullSlice<B>), std::io::Result<usize>) {\n        match self {\n            IoEngine::NotSet => panic!(\"not initialized\"),\n            IoEngine::StdFs => {\n                let result = file_guard.with_std_file(|std_file| std_file.write_at(&buf, offset));\n                ((file_guard, buf), result)\n            }\n            #[cfg(target_os = \"linux\")]\n            IoEngine::TokioEpollUring => {\n                let system = tokio_epoll_uring_ext::thread_local_system().await;\n                let ((file_guard, slice), res) = retry_ecanceled_once(\n                    (file_guard, buf.into_raw_slice()),\n                    async |(file_guard, buf)| system.write(file_guard, offset, buf).await,\n                )\n                .await;\n                (\n                    (file_guard, FullSlice::must_new(slice)),\n                    res.map_err(epoll_uring_error_to_std),\n                )\n            }\n        }\n    }\n\n    /// If we switch a user of [`tokio::fs`] to use [`super::io_engine`],\n    /// they'd start blocking the executor thread if [`IoEngine::StdFs`] is configured\n    /// whereas before the switch to [`super::io_engine`], that wasn't the case.\n    /// This method helps avoid such a regression.\n    ///\n    /// Panics if the `spawn_blocking` fails, see [`tokio::task::JoinError`] for reasons why that can happen.\n    pub(crate) async fn spawn_blocking_and_block_on_if_std<Fut, R>(&self, work: Fut) -> R\n    where\n        Fut: 'static + Send + std::future::Future<Output = R>,\n        R: 'static + Send,\n    {\n        match self {\n            IoEngine::NotSet => panic!(\"not initialized\"),\n            IoEngine::StdFs => {\n                let span = tracing::info_span!(\"spawn_blocking_block_on_if_std\");\n                tokio::task::spawn_blocking({\n                    move || tokio::runtime::Handle::current().block_on(work.instrument(span))\n                })\n                .await\n                .expect(\"failed to join blocking code most likely it panicked, panicking as well\")\n            }\n            #[cfg(target_os = \"linux\")]\n            IoEngine::TokioEpollUring => work.await,\n        }\n    }\n}\n\n/// We observe in tests that stop pageserver with SIGTERM immediately after it was ingesting data,\n/// occasionally buffered writers fail (and get retried by BufferedWriter) with ECANCELED.\n/// The problem is believed to be a race condition in how io_uring handles punted async work (io-wq) and signals.\n/// Investigation ticket: <https://github.com/neondatabase/neon/issues/11446>\n///\n/// This function retries the operation once if it fails with ECANCELED.\n/// ONLY USE FOR IDEMPOTENT [`super::VirtualFile`] operations.\n#[cfg(target_os = \"linux\")]\npub(super) async fn retry_ecanceled_once<F, Fut, T, V>(\n    resources: T,\n    f: F,\n) -> (T, Result<V, tokio_epoll_uring::Error<std::io::Error>>)\nwhere\n    F: Fn(T) -> Fut,\n    Fut: std::future::Future<Output = (T, Result<V, tokio_epoll_uring::Error<std::io::Error>>)>,\n    T: Send,\n    V: Send,\n{\n    let (resources, res) = f(resources).await;\n    let Err(e) = res else {\n        return (resources, res);\n    };\n    let tokio_epoll_uring::Error::Op(err) = e else {\n        return (resources, Err(e));\n    };\n    if err.raw_os_error() != Some(nix::libc::ECANCELED) {\n        return (resources, Err(tokio_epoll_uring::Error::Op(err)));\n    }\n    {\n        static RATE_LIMIT: std::sync::Mutex<utils::rate_limit::RateLimit> =\n            std::sync::Mutex::new(utils::rate_limit::RateLimit::new(Duration::from_secs(1)));\n        let mut guard = RATE_LIMIT.lock().unwrap();\n        guard.call2(|rate_limit_stats| {\n            info!(\n                %rate_limit_stats, \"ECANCELED observed, assuming it is due to a signal being received by the submitting thread, retrying after a delay; this message is rate-limited\"\n            );\n        });\n        drop(guard);\n    }\n    tokio::time::sleep(Duration::from_millis(100)).await; // something big enough to beat even heavily overcommitted CI runners\n    let (resources, res) = f(resources).await;\n    (resources, res)\n}\n\npub(super) fn panic_operation_must_be_idempotent() {\n    panic!(\n        \"unsupported; io_engine may retry operations internally and thus needs them to be idempotent (retry_ecanceled_once)\"\n    )\n}\n\npub enum FeatureTestResult {\n    PlatformPreferred(IoEngineKind),\n    Worse {\n        engine: IoEngineKind,\n        remark: String,\n    },\n}\n\nimpl FeatureTestResult {\n    #[cfg(target_os = \"linux\")]\n    const PLATFORM_PREFERRED: IoEngineKind = IoEngineKind::TokioEpollUring;\n    #[cfg(not(target_os = \"linux\"))]\n    const PLATFORM_PREFERRED: IoEngineKind = IoEngineKind::StdFs;\n}\n\nimpl From<FeatureTestResult> for IoEngineKind {\n    fn from(val: FeatureTestResult) -> Self {\n        match val {\n            FeatureTestResult::PlatformPreferred(e) => e,\n            FeatureTestResult::Worse { engine, .. } => engine,\n        }\n    }\n}\n\n/// Somewhat costly under the hood, do only once.\n/// Panics if we can't set up the feature test.\npub fn feature_test() -> anyhow::Result<FeatureTestResult> {\n    std::thread::spawn(|| {\n\n        #[cfg(not(target_os = \"linux\"))]\n        {\n            Ok(FeatureTestResult::PlatformPreferred(\n                FeatureTestResult::PLATFORM_PREFERRED,\n            ))\n        }\n        #[cfg(target_os = \"linux\")]\n        {\n            let rt = tokio::runtime::Builder::new_current_thread()\n                .enable_all()\n                .build()\n                .unwrap();\n            Ok(match rt.block_on(tokio_epoll_uring::System::launch()) {\n                Ok(_) => FeatureTestResult::PlatformPreferred({\n                    assert!(matches!(\n                        IoEngineKind::TokioEpollUring,\n                        FeatureTestResult::PLATFORM_PREFERRED\n                    ));\n                    FeatureTestResult::PLATFORM_PREFERRED\n                }),\n                Err(tokio_epoll_uring::LaunchResult::IoUringBuild(e)) => {\n                    let remark = match e.raw_os_error() {\n                        Some(nix::libc::EPERM) => {\n                            // fall back\n                            \"creating tokio-epoll-uring fails with EPERM, assuming it's admin-disabled \"\n                                .to_string()\n                        }\n                    Some(nix::libc::EFAULT) => {\n                            // fail feature test\n                            anyhow::bail!(\n                                \"creating tokio-epoll-uring fails with EFAULT, might have corrupted memory\"\n                            );\n                        }\n                        Some(_) | None => {\n                            // fall back\n                            format!(\"creating tokio-epoll-uring fails with error: {e:#}\")\n                        }\n                };\n                    FeatureTestResult::Worse {\n                        engine: IoEngineKind::StdFs,\n                        remark,\n                    }\n                }\n            })\n        }\n    })\n    .join()\n    .unwrap()\n}\n\n/// For use in benchmark binaries only.\n///\n/// Benchmarks which initialize `virtual_file` need to know what engine to use, but we also\n/// don't want to silently fall back to slower I/O engines in a benchmark: this could waste\n/// developer time trying to figure out why it's slow.\n///\n/// In practice, this method will either return IoEngineKind::TokioEpollUring, or panic.\npub fn io_engine_for_bench() -> IoEngineKind {\n    #[cfg(not(target_os = \"linux\"))]\n    {\n        panic!(\"This benchmark does I/O and can only give a representative result on Linux\");\n    }\n    #[cfg(target_os = \"linux\")]\n    {\n        match feature_test().unwrap() {\n            FeatureTestResult::PlatformPreferred(engine) => engine,\n            FeatureTestResult::Worse {\n                engine: _engine,\n                remark,\n            } => {\n                panic!(\"This benchmark does I/O can requires the preferred I/O engine: {remark}\");\n            }\n        }\n    }\n}\n"
  },
  {
    "path": "pageserver/src/virtual_file/metadata.rs",
    "content": "use std::fs;\n\npub enum Metadata {\n    StdFs(fs::Metadata),\n    #[cfg(target_os = \"linux\")]\n    TokioEpollUring(Box<tokio_epoll_uring::ops::statx::statx>),\n}\n\n#[cfg(target_os = \"linux\")]\nimpl From<Box<tokio_epoll_uring::ops::statx::statx>> for Metadata {\n    fn from(value: Box<tokio_epoll_uring::ops::statx::statx>) -> Self {\n        Metadata::TokioEpollUring(value)\n    }\n}\n\nimpl From<std::fs::Metadata> for Metadata {\n    fn from(value: std::fs::Metadata) -> Self {\n        Metadata::StdFs(value)\n    }\n}\n\nimpl Metadata {\n    pub fn len(&self) -> u64 {\n        match self {\n            Metadata::StdFs(metadata) => metadata.len(),\n            #[cfg(target_os = \"linux\")]\n            Metadata::TokioEpollUring(statx) => statx.stx_size,\n        }\n    }\n}\n"
  },
  {
    "path": "pageserver/src/virtual_file/open_options.rs",
    "content": "//! Enum-dispatch to the `OpenOptions` type of the respective [`super::IoEngineKind`];\n\nuse std::os::fd::OwnedFd;\nuse std::os::unix::fs::OpenOptionsExt;\nuse std::path::Path;\n\nuse super::io_engine::IoEngine;\n\n#[derive(Debug, Clone)]\npub struct OpenOptions {\n    /// We keep a copy of the write() flag we pass to the `inner`` `OptionOptions`\n    /// to support [`Self::is_write`].\n    write: bool,\n    /// We don't expose + pass through a raw `custom_flags()` style API.\n    /// The only custom flag we support is `O_DIRECT`, which we track here\n    /// and map to `custom_flags()` in the [`Self::open`] method.\n    direct: bool,\n    inner: Inner,\n}\n#[derive(Debug, Clone)]\nenum Inner {\n    StdFs(std::fs::OpenOptions),\n    #[cfg(target_os = \"linux\")]\n    TokioEpollUring(tokio_epoll_uring::ops::open_at::OpenOptions),\n}\n\nimpl Default for OpenOptions {\n    fn default() -> Self {\n        let inner = match super::io_engine::get() {\n            IoEngine::NotSet => panic!(\"io engine not set\"),\n            IoEngine::StdFs => Inner::StdFs(std::fs::OpenOptions::new()),\n            #[cfg(target_os = \"linux\")]\n            IoEngine::TokioEpollUring => {\n                Inner::TokioEpollUring(tokio_epoll_uring::ops::open_at::OpenOptions::new())\n            }\n        };\n        Self {\n            write: false,\n            direct: false,\n            inner,\n        }\n    }\n}\n\nimpl OpenOptions {\n    pub fn new() -> OpenOptions {\n        Self::default()\n    }\n\n    pub(super) fn is_write(&self) -> bool {\n        self.write\n    }\n\n    pub(super) fn is_direct(&self) -> bool {\n        self.direct\n    }\n\n    pub fn read(mut self, read: bool) -> Self {\n        match &mut self.inner {\n            Inner::StdFs(x) => {\n                let _ = x.read(read);\n            }\n            #[cfg(target_os = \"linux\")]\n            Inner::TokioEpollUring(x) => {\n                let _ = x.read(read);\n            }\n        }\n        self\n    }\n\n    pub fn write(mut self, write: bool) -> Self {\n        self.write = write;\n        match &mut self.inner {\n            Inner::StdFs(x) => {\n                let _ = x.write(write);\n            }\n            #[cfg(target_os = \"linux\")]\n            Inner::TokioEpollUring(x) => {\n                let _ = x.write(write);\n            }\n        }\n        self\n    }\n\n    pub fn create(mut self, create: bool) -> Self {\n        match &mut self.inner {\n            Inner::StdFs(x) => {\n                let _ = x.create(create);\n            }\n            #[cfg(target_os = \"linux\")]\n            Inner::TokioEpollUring(x) => {\n                let _ = x.create(create);\n            }\n        }\n        self\n    }\n\n    pub fn create_new(mut self, create_new: bool) -> Self {\n        match &mut self.inner {\n            Inner::StdFs(x) => {\n                let _ = x.create_new(create_new);\n            }\n            #[cfg(target_os = \"linux\")]\n            Inner::TokioEpollUring(x) => {\n                let _ = x.create_new(create_new);\n            }\n        }\n        self\n    }\n\n    pub fn truncate(mut self, truncate: bool) -> Self {\n        match &mut self.inner {\n            Inner::StdFs(x) => {\n                let _ = x.truncate(truncate);\n            }\n            #[cfg(target_os = \"linux\")]\n            Inner::TokioEpollUring(x) => {\n                let _ = x.truncate(truncate);\n            }\n        }\n        self\n    }\n\n    /// Don't use, `O_APPEND` is not supported.\n    pub fn append(&mut self, _append: bool) {\n        super::io_engine::panic_operation_must_be_idempotent();\n    }\n\n    pub(in crate::virtual_file) async fn open(&self, path: &Path) -> std::io::Result<OwnedFd> {\n        #[cfg_attr(not(target_os = \"linux\"), allow(unused_mut))]\n        let mut custom_flags = 0;\n        if self.direct {\n            #[cfg(target_os = \"linux\")]\n            {\n                custom_flags |= nix::libc::O_DIRECT;\n            }\n            #[cfg(not(target_os = \"linux\"))]\n            {\n                // Other platforms may be used for development but don't necessarily have a 1:1 equivalent to Linux's O_DIRECT (macOS!).\n                // Just don't set the flag; to catch alignment bugs typical for O_DIRECT,\n                // we have a runtime validation layer inside `VirtualFile::write_at` and `VirtualFile::read_at`.\n                static WARNING: std::sync::Once = std::sync::Once::new();\n                WARNING.call_once(|| {\n                    let span = tracing::info_span!(parent: None, \"open_options\");\n                    let _enter = span.enter();\n                    tracing::warn!(\"your platform is not a supported production platform, ignoing request for O_DIRECT; this could hide alignment bugs; this warning is logged once per process\");\n                });\n            }\n        }\n\n        match self.inner.clone() {\n            Inner::StdFs(mut x) => x\n                .custom_flags(custom_flags)\n                .open(path)\n                .map(|file| file.into()),\n            #[cfg(target_os = \"linux\")]\n            Inner::TokioEpollUring(mut x) => {\n                x.custom_flags(custom_flags);\n                let system = super::io_engine::tokio_epoll_uring_ext::thread_local_system().await;\n                let (_, res) = super::io_engine::retry_ecanceled_once((), |()| async {\n                    let res = system.open(path, &x).await;\n                    ((), res)\n                })\n                .await;\n                res.map_err(super::io_engine::epoll_uring_error_to_std)\n            }\n        }\n    }\n\n    pub fn mode(mut self, mode: u32) -> Self {\n        match &mut self.inner {\n            Inner::StdFs(x) => {\n                let _ = x.mode(mode);\n            }\n            #[cfg(target_os = \"linux\")]\n            Inner::TokioEpollUring(x) => {\n                let _ = x.mode(mode);\n            }\n        }\n        self\n    }\n\n    pub fn direct(mut self, direct: bool) -> Self {\n        self.direct = direct;\n        self\n    }\n}\n"
  },
  {
    "path": "pageserver/src/virtual_file/owned_buffers_io/aligned_buffer/alignment.rs",
    "content": "pub trait Alignment: std::marker::Unpin + 'static {\n    /// Returns the required alignments.\n    fn align(&self) -> usize;\n}\n\n/// Alignment at compile time.\n#[derive(Debug, Clone, Copy)]\npub struct ConstAlign<const A: usize>;\n\nimpl<const A: usize> Alignment for ConstAlign<A> {\n    fn align(&self) -> usize {\n        A\n    }\n}\n\n/// Alignment at run time.\n#[derive(Debug, Clone, Copy)]\npub struct RuntimeAlign {\n    align: usize,\n}\n\nimpl Alignment for RuntimeAlign {\n    fn align(&self) -> usize {\n        self.align\n    }\n}\n"
  },
  {
    "path": "pageserver/src/virtual_file/owned_buffers_io/aligned_buffer/buffer.rs",
    "content": "use std::ops::{Deref, Range, RangeBounds};\nuse std::sync::Arc;\n\nuse super::alignment::Alignment;\nuse super::raw::RawAlignedBuffer;\nuse super::{AlignedBufferMut, ConstAlign};\n\n/// An shared, immutable aligned buffer type.\n#[derive(Clone, Debug)]\npub struct AlignedBuffer<A: Alignment> {\n    /// Shared raw buffer.\n    raw: Arc<RawAlignedBuffer<A>>,\n    /// Range that specifies the current slice.\n    range: Range<usize>,\n}\n\nimpl<A: Alignment> AlignedBuffer<A> {\n    /// Creates an immutable `IoBuffer` from the raw buffer\n    pub(super) fn from_raw(raw: RawAlignedBuffer<A>, range: Range<usize>) -> Self {\n        AlignedBuffer {\n            raw: Arc::new(raw),\n            range,\n        }\n    }\n\n    /// Returns the number of bytes in the buffer, also referred to as its 'length'.\n    #[inline]\n    pub fn len(&self) -> usize {\n        self.range.len()\n    }\n\n    /// Returns the alignment of the buffer.\n    #[inline]\n    pub fn align(&self) -> usize {\n        self.raw.align()\n    }\n\n    #[inline]\n    fn as_ptr(&self) -> *const u8 {\n        // SAFETY: `self.range.start` is guaranteed to be within [0, self.len()).\n        unsafe { self.raw.as_ptr().add(self.range.start) }\n    }\n\n    /// Extracts a slice containing the entire buffer.\n    ///\n    /// Equivalent to `&s[..]`.\n    #[inline]\n    fn as_slice(&self) -> &[u8] {\n        &self.raw.as_slice()[self.range.start..self.range.end]\n    }\n\n    /// Returns a slice of self for the index range `[begin..end)`.\n    pub fn slice(&self, range: impl RangeBounds<usize>) -> Self {\n        use core::ops::Bound;\n        let len = self.len();\n\n        let begin = match range.start_bound() {\n            Bound::Included(&n) => n,\n            Bound::Excluded(&n) => n.checked_add(1).expect(\"out of range\"),\n            Bound::Unbounded => 0,\n        };\n\n        let end = match range.end_bound() {\n            Bound::Included(&n) => n.checked_add(1).expect(\"out of range\"),\n            Bound::Excluded(&n) => n,\n            Bound::Unbounded => len,\n        };\n\n        assert!(\n            begin <= end,\n            \"range start must not be greater than end: {begin:?} <= {end:?}\",\n        );\n        assert!(end <= len, \"range end out of bounds: {end:?} <= {len:?}\",);\n\n        let begin = self.range.start + begin;\n        let end = self.range.start + end;\n\n        AlignedBuffer {\n            raw: Arc::clone(&self.raw),\n            range: begin..end,\n        }\n    }\n\n    /// Returns the mutable aligned buffer, if the immutable aligned buffer\n    /// has exactly one strong reference. Otherwise returns `None`.\n    pub fn into_mut(self) -> Option<AlignedBufferMut<A>> {\n        let raw = Arc::into_inner(self.raw)?;\n        Some(AlignedBufferMut::from_raw(raw))\n    }\n}\n\nimpl<A: Alignment> Deref for AlignedBuffer<A> {\n    type Target = [u8];\n\n    fn deref(&self) -> &Self::Target {\n        self.as_slice()\n    }\n}\n\nimpl<A: Alignment> AsRef<[u8]> for AlignedBuffer<A> {\n    fn as_ref(&self) -> &[u8] {\n        self.as_slice()\n    }\n}\n\nimpl<A: Alignment> PartialEq<[u8]> for AlignedBuffer<A> {\n    fn eq(&self, other: &[u8]) -> bool {\n        self.as_slice().eq(other)\n    }\n}\n\nimpl<const A: usize, const N: usize> From<&[u8; N]> for AlignedBuffer<ConstAlign<A>> {\n    fn from(value: &[u8; N]) -> Self {\n        let mut buf = AlignedBufferMut::with_capacity(N);\n        buf.extend_from_slice(value);\n        buf.freeze()\n    }\n}\n\n/// SAFETY: the underlying buffer references a stable memory region.\nunsafe impl<A: Alignment> tokio_epoll_uring::IoBuf for AlignedBuffer<A> {\n    fn stable_ptr(&self) -> *const u8 {\n        self.as_ptr()\n    }\n\n    fn bytes_init(&self) -> usize {\n        self.len()\n    }\n\n    fn bytes_total(&self) -> usize {\n        self.len()\n    }\n}\n"
  },
  {
    "path": "pageserver/src/virtual_file/owned_buffers_io/aligned_buffer/buffer_mut.rs",
    "content": "use std::mem::MaybeUninit;\nuse std::ops::{Deref, DerefMut};\n\nuse super::alignment::{Alignment, ConstAlign};\nuse super::buffer::AlignedBuffer;\nuse super::raw::RawAlignedBuffer;\n\n/// A mutable aligned buffer type.\n#[derive(Debug)]\npub struct AlignedBufferMut<A: Alignment> {\n    raw: RawAlignedBuffer<A>,\n}\n\nimpl<const A: usize> AlignedBufferMut<ConstAlign<A>> {\n    /// Constructs a new, empty `IoBufferMut` with at least the specified capacity and alignment.\n    ///\n    /// The buffer will be able to hold at most `capacity` elements and will never resize.\n    ///\n    ///\n    /// # Panics\n    ///\n    /// Panics if the new capacity exceeds `isize::MAX` _bytes_, or if the following alignment requirement is not met:\n    /// * `align` must not be zero,\n    ///\n    /// * `align` must be a power of two,\n    ///\n    /// * `capacity`, when rounded up to the nearest multiple of `align`,\n    ///   must not overflow isize (i.e., the rounded value must be\n    ///   less than or equal to `isize::MAX`).\n    pub fn with_capacity(capacity: usize) -> Self {\n        AlignedBufferMut {\n            raw: RawAlignedBuffer::with_capacity(capacity),\n        }\n    }\n\n    /// Constructs a new `IoBufferMut` with at least the specified capacity and alignment, filled with zeros.\n    pub fn with_capacity_zeroed(capacity: usize) -> Self {\n        use bytes::BufMut;\n        let mut buf = Self::with_capacity(capacity);\n        buf.put_bytes(0, capacity);\n        // SAFETY: `put_bytes` filled the entire buffer.\n        unsafe { buf.set_len(capacity) };\n        buf\n    }\n}\n\nimpl<A: Alignment> AlignedBufferMut<A> {\n    /// Constructs a mutable aligned buffer from raw.\n    pub(super) fn from_raw(raw: RawAlignedBuffer<A>) -> Self {\n        AlignedBufferMut { raw }\n    }\n\n    /// Returns the total number of bytes the buffer can hold.\n    #[inline]\n    pub fn capacity(&self) -> usize {\n        self.raw.capacity()\n    }\n\n    /// Returns the alignment of the buffer.\n    #[inline]\n    pub fn align(&self) -> usize {\n        self.raw.align()\n    }\n\n    /// Returns the number of bytes in the buffer, also referred to as its 'length'.\n    #[inline]\n    pub fn len(&self) -> usize {\n        self.raw.len()\n    }\n\n    /// Force the length of the buffer to `new_len`.\n    #[inline]\n    unsafe fn set_len(&mut self, new_len: usize) {\n        // SAFETY: the caller is unsafe\n        unsafe { self.raw.set_len(new_len) }\n    }\n\n    #[inline]\n    fn as_ptr(&self) -> *const u8 {\n        self.raw.as_ptr()\n    }\n\n    #[inline]\n    fn as_mut_ptr(&mut self) -> *mut u8 {\n        self.raw.as_mut_ptr()\n    }\n\n    /// Extracts a slice containing the entire buffer.\n    ///\n    /// Equivalent to `&s[..]`.\n    #[inline]\n    fn as_slice(&self) -> &[u8] {\n        self.raw.as_slice()\n    }\n\n    /// Extracts a mutable slice of the entire buffer.\n    ///\n    /// Equivalent to `&mut s[..]`.\n    fn as_mut_slice(&mut self) -> &mut [u8] {\n        self.raw.as_mut_slice()\n    }\n\n    /// Drops the all the contents of the buffer, setting its length to `0`.\n    #[inline]\n    pub fn clear(&mut self) {\n        self.raw.clear()\n    }\n\n    /// Reserves capacity for at least `additional` more bytes to be inserted\n    /// in the given `IoBufferMut`. The collection may reserve more space to\n    /// speculatively avoid frequent reallocations. After calling `reserve`,\n    /// capacity will be greater than or equal to `self.len() + additional`.\n    /// Does nothing if capacity is already sufficient.\n    ///\n    /// # Panics\n    ///\n    /// Panics if the new capacity exceeds `isize::MAX` _bytes_.\n    pub fn reserve(&mut self, additional: usize) {\n        self.raw.reserve(additional);\n    }\n\n    /// Shortens the buffer, keeping the first len bytes.\n    pub fn truncate(&mut self, len: usize) {\n        self.raw.truncate(len);\n    }\n\n    /// Consumes and leaks the `IoBufferMut`, returning a mutable reference to the contents, &'a mut [u8].\n    pub fn leak<'a>(self) -> &'a mut [u8] {\n        self.raw.leak()\n    }\n\n    pub fn freeze(self) -> AlignedBuffer<A> {\n        let len = self.len();\n        AlignedBuffer::from_raw(self.raw, 0..len)\n    }\n\n    /// Clones and appends all elements in a slice to the buffer. Reserves additional capacity as needed.\n    #[inline]\n    pub fn extend_from_slice(&mut self, extend: &[u8]) {\n        let cnt = extend.len();\n        self.reserve(cnt);\n\n        // SAFETY: we already reserved additional `cnt` bytes, safe to perform memcpy.\n        unsafe {\n            let dst = self.spare_capacity_mut();\n            // Reserved above\n            debug_assert!(dst.len() >= cnt);\n\n            core::ptr::copy_nonoverlapping(extend.as_ptr(), dst.as_mut_ptr().cast(), cnt);\n        }\n        // SAFETY: We do have at least `cnt` bytes remaining before advance.\n        unsafe {\n            bytes::BufMut::advance_mut(self, cnt);\n        }\n    }\n\n    /// Returns the remaining spare capacity of the vector as a slice of `MaybeUninit<u8>`.\n    #[inline]\n    fn spare_capacity_mut(&mut self) -> &mut [MaybeUninit<u8>] {\n        // SAFETY: we guarantees that the `Self::capacity()` bytes from\n        // `Self::as_mut_ptr()` are allocated.\n        unsafe {\n            let ptr = self.as_mut_ptr().add(self.len());\n            let len = self.capacity() - self.len();\n\n            core::slice::from_raw_parts_mut(ptr.cast(), len)\n        }\n    }\n}\n\nimpl<A: Alignment> Deref for AlignedBufferMut<A> {\n    type Target = [u8];\n\n    fn deref(&self) -> &Self::Target {\n        self.as_slice()\n    }\n}\n\nimpl<A: Alignment> DerefMut for AlignedBufferMut<A> {\n    fn deref_mut(&mut self) -> &mut Self::Target {\n        self.as_mut_slice()\n    }\n}\n\nimpl<A: Alignment> AsRef<[u8]> for AlignedBufferMut<A> {\n    fn as_ref(&self) -> &[u8] {\n        self.as_slice()\n    }\n}\n\nimpl<A: Alignment> AsMut<[u8]> for AlignedBufferMut<A> {\n    fn as_mut(&mut self) -> &mut [u8] {\n        self.as_mut_slice()\n    }\n}\n\nimpl<A: Alignment> PartialEq<[u8]> for AlignedBufferMut<A> {\n    fn eq(&self, other: &[u8]) -> bool {\n        self.as_slice().eq(other)\n    }\n}\n\n/// SAFETY: When advancing the internal cursor, the caller needs to make sure the bytes advcanced past have been initialized.\nunsafe impl<A: Alignment> bytes::BufMut for AlignedBufferMut<A> {\n    #[inline]\n    fn remaining_mut(&self) -> usize {\n        // Although a `Vec` can have at most isize::MAX bytes, we never want to grow `IoBufferMut`.\n        // Thus, it can have at most `self.capacity` bytes.\n        self.capacity() - self.len()\n    }\n\n    // SAFETY: Caller needs to make sure the bytes being advanced past have been initialized.\n    #[inline]\n    unsafe fn advance_mut(&mut self, cnt: usize) {\n        let len = self.len();\n        let remaining = self.remaining_mut();\n\n        if remaining < cnt {\n            panic_advance(cnt, remaining);\n        }\n\n        // SAFETY: Addition will not overflow since the sum is at most the capacity.\n        unsafe {\n            self.set_len(len + cnt);\n        }\n    }\n\n    #[inline]\n    fn chunk_mut(&mut self) -> &mut bytes::buf::UninitSlice {\n        let cap = self.capacity();\n        let len = self.len();\n\n        // SAFETY: Since `self.ptr` is valid for `cap` bytes, `self.ptr.add(len)` must be\n        // valid for `cap - len` bytes. The subtraction will not underflow since\n        // `len <= cap`.\n        unsafe {\n            bytes::buf::UninitSlice::from_raw_parts_mut(self.as_mut_ptr().add(len), cap - len)\n        }\n    }\n}\n\n/// Panic with a nice error message.\n#[cold]\nfn panic_advance(idx: usize, len: usize) -> ! {\n    panic!(\"advance out of bounds: the len is {len} but advancing by {idx}\");\n}\n\n/// Safety: [`AlignedBufferMut`] has exclusive ownership of the io buffer,\n/// and the underlying pointer remains stable while io-uring is owning the buffer.\n/// The tokio-epoll-uring crate itself will not resize the buffer and will respect\n/// [`tokio_epoll_uring::IoBuf::bytes_total`].\nunsafe impl<A: Alignment> tokio_epoll_uring::IoBuf for AlignedBufferMut<A> {\n    fn stable_ptr(&self) -> *const u8 {\n        self.as_ptr()\n    }\n\n    fn bytes_init(&self) -> usize {\n        self.len()\n    }\n\n    fn bytes_total(&self) -> usize {\n        self.capacity()\n    }\n}\n\n// SAFETY: See above.\nunsafe impl<A: Alignment> tokio_epoll_uring::IoBufMut for AlignedBufferMut<A> {\n    fn stable_mut_ptr(&mut self) -> *mut u8 {\n        self.as_mut_ptr()\n    }\n\n    unsafe fn set_init(&mut self, init_len: usize) {\n        if self.len() < init_len {\n            // SAFETY: caller function is unsafe\n            unsafe {\n                self.set_len(init_len);\n            }\n        }\n    }\n}\n\nimpl<A: Alignment> std::io::Write for AlignedBufferMut<A> {\n    fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {\n        self.extend_from_slice(buf);\n        Ok(buf.len())\n    }\n\n    fn flush(&mut self) -> std::io::Result<()> {\n        Ok(())\n    }\n}\n\n#[cfg(test)]\nmod tests {\n\n    use super::*;\n\n    const ALIGN: usize = 4 * 1024;\n    type TestIoBufferMut = AlignedBufferMut<ConstAlign<ALIGN>>;\n\n    #[test]\n    fn test_with_capacity() {\n        let v = TestIoBufferMut::with_capacity(ALIGN * 4);\n        assert_eq!(v.len(), 0);\n        assert_eq!(v.capacity(), ALIGN * 4);\n        assert_eq!(v.align(), ALIGN);\n        assert_eq!(v.as_ptr().align_offset(ALIGN), 0);\n\n        let v = TestIoBufferMut::with_capacity(ALIGN / 2);\n        assert_eq!(v.len(), 0);\n        assert_eq!(v.capacity(), ALIGN / 2);\n        assert_eq!(v.align(), ALIGN);\n        assert_eq!(v.as_ptr().align_offset(ALIGN), 0);\n    }\n\n    #[test]\n    fn test_with_capacity_zeroed() {\n        let v = TestIoBufferMut::with_capacity_zeroed(ALIGN);\n        assert_eq!(v.len(), ALIGN);\n        assert_eq!(v.capacity(), ALIGN);\n        assert_eq!(v.align(), ALIGN);\n        assert_eq!(v.as_ptr().align_offset(ALIGN), 0);\n        assert_eq!(&v[..], &[0; ALIGN])\n    }\n\n    #[test]\n    fn test_reserve() {\n        use bytes::BufMut;\n        let mut v = TestIoBufferMut::with_capacity(ALIGN);\n        let capacity = v.capacity();\n        v.reserve(capacity);\n        assert_eq!(v.capacity(), capacity);\n        let data = [b'a'; ALIGN];\n        v.put(&data[..]);\n        v.reserve(capacity);\n        assert!(v.capacity() >= capacity * 2);\n        assert_eq!(&v[..], &data[..]);\n        let capacity = v.capacity();\n        v.clear();\n        v.reserve(capacity);\n        assert_eq!(capacity, v.capacity());\n    }\n\n    #[test]\n    fn test_bytes_put() {\n        use bytes::BufMut;\n        let mut v = TestIoBufferMut::with_capacity(ALIGN * 4);\n        let x = [b'a'; ALIGN];\n\n        for _ in 0..2 {\n            for _ in 0..4 {\n                v.put(&x[..]);\n            }\n            assert_eq!(v.len(), ALIGN * 4);\n            assert_eq!(v.capacity(), ALIGN * 4);\n            assert_eq!(v.align(), ALIGN);\n            assert_eq!(v.as_ptr().align_offset(ALIGN), 0);\n            v.clear()\n        }\n        assert_eq!(v.len(), 0);\n        assert_eq!(v.capacity(), ALIGN * 4);\n        assert_eq!(v.align(), ALIGN);\n        assert_eq!(v.as_ptr().align_offset(ALIGN), 0);\n    }\n\n    #[test]\n    #[should_panic]\n    fn test_bytes_put_panic() {\n        use bytes::BufMut;\n        const ALIGN: usize = 4 * 1024;\n        let mut v = TestIoBufferMut::with_capacity(ALIGN * 4);\n        let x = [b'a'; ALIGN];\n        for _ in 0..5 {\n            v.put_slice(&x[..]);\n        }\n    }\n\n    #[test]\n    fn test_io_buf_put_slice() {\n        use tokio_epoll_uring::BoundedBufMut;\n        const ALIGN: usize = 4 * 1024;\n        let mut v = TestIoBufferMut::with_capacity(ALIGN);\n        let x = [b'a'; ALIGN];\n\n        for _ in 0..2 {\n            v.put_slice(&x[..]);\n            assert_eq!(v.len(), ALIGN);\n            assert_eq!(v.capacity(), ALIGN);\n            assert_eq!(v.align(), ALIGN);\n            assert_eq!(v.as_ptr().align_offset(ALIGN), 0);\n            v.clear()\n        }\n        assert_eq!(v.len(), 0);\n        assert_eq!(v.capacity(), ALIGN);\n        assert_eq!(v.align(), ALIGN);\n        assert_eq!(v.as_ptr().align_offset(ALIGN), 0);\n    }\n}\n"
  },
  {
    "path": "pageserver/src/virtual_file/owned_buffers_io/aligned_buffer/raw.rs",
    "content": "use core::slice;\nuse std::alloc::{self, Layout};\nuse std::cmp;\nuse std::mem::ManuallyDrop;\n\nuse super::alignment::{Alignment, ConstAlign};\n\n#[derive(Debug)]\nstruct AlignedBufferPtr(*mut u8);\n\n// SAFETY: We gurantees no one besides `IoBufferPtr` itself has the raw pointer.\nunsafe impl Send for AlignedBufferPtr {}\n\n// SAFETY: We gurantees no one besides `IoBufferPtr` itself has the raw pointer.\nunsafe impl Sync for AlignedBufferPtr {}\n\n/// An aligned buffer type.\n#[derive(Debug)]\npub struct RawAlignedBuffer<A: Alignment> {\n    ptr: AlignedBufferPtr,\n    capacity: usize,\n    len: usize,\n    align: A,\n}\n\nimpl<const A: usize> RawAlignedBuffer<ConstAlign<A>> {\n    /// Constructs a new, empty `IoBufferMut` with at least the specified capacity and alignment.\n    ///\n    /// The buffer will be able to hold at most `capacity` elements and will never resize.\n    ///\n    ///\n    /// # Panics\n    ///\n    /// Panics if the new capacity exceeds `isize::MAX` _bytes_, or if the following alignment requirement is not met:\n    /// * `align` must not be zero,\n    ///\n    /// * `align` must be a power of two,\n    ///\n    /// * `capacity`, when rounded up to the nearest multiple of `align`,\n    ///   must not overflow isize (i.e., the rounded value must be\n    ///   less than or equal to `isize::MAX`).\n    pub fn with_capacity(capacity: usize) -> Self {\n        let align = ConstAlign::<A>;\n        let layout = Layout::from_size_align(capacity, align.align()).expect(\"Invalid layout\");\n\n        // SAFETY:  Making an allocation with a sized and aligned layout. The memory is manually freed with the same layout.\n        let ptr = unsafe {\n            let ptr = alloc::alloc(layout);\n            if ptr.is_null() {\n                alloc::handle_alloc_error(layout);\n            }\n            AlignedBufferPtr(ptr)\n        };\n\n        RawAlignedBuffer {\n            ptr,\n            capacity,\n            len: 0,\n            align,\n        }\n    }\n}\n\nimpl<A: Alignment> RawAlignedBuffer<A> {\n    /// Returns the total number of bytes the buffer can hold.\n    #[inline]\n    pub fn capacity(&self) -> usize {\n        self.capacity\n    }\n\n    /// Returns the alignment of the buffer.\n    #[inline]\n    pub fn align(&self) -> usize {\n        self.align.align()\n    }\n\n    /// Returns the number of bytes in the buffer, also referred to as its 'length'.\n    #[inline]\n    pub fn len(&self) -> usize {\n        self.len\n    }\n\n    /// Force the length of the buffer to `new_len`.\n    #[inline]\n    pub unsafe fn set_len(&mut self, new_len: usize) {\n        debug_assert!(new_len <= self.capacity());\n        self.len = new_len;\n    }\n\n    #[inline]\n    pub fn as_ptr(&self) -> *const u8 {\n        self.ptr.0\n    }\n\n    #[inline]\n    pub fn as_mut_ptr(&mut self) -> *mut u8 {\n        self.ptr.0\n    }\n\n    /// Extracts a slice containing the entire buffer.\n    ///\n    /// Equivalent to `&s[..]`.\n    #[inline]\n    pub fn as_slice(&self) -> &[u8] {\n        // SAFETY: The pointer is valid and `len` bytes are initialized.\n        unsafe { slice::from_raw_parts(self.as_ptr(), self.len) }\n    }\n\n    /// Extracts a mutable slice of the entire buffer.\n    ///\n    /// Equivalent to `&mut s[..]`.\n    pub fn as_mut_slice(&mut self) -> &mut [u8] {\n        // SAFETY: The pointer is valid and `len` bytes are initialized.\n        unsafe { slice::from_raw_parts_mut(self.as_mut_ptr(), self.len) }\n    }\n\n    /// Drops the all the contents of the buffer, setting its length to `0`.\n    #[inline]\n    pub fn clear(&mut self) {\n        self.len = 0;\n    }\n\n    /// Reserves capacity for at least `additional` more bytes to be inserted\n    /// in the given `IoBufferMut`. The collection may reserve more space to\n    /// speculatively avoid frequent reallocations. After calling `reserve`,\n    /// capacity will be greater than or equal to `self.len() + additional`.\n    /// Does nothing if capacity is already sufficient.\n    ///\n    /// # Panics\n    ///\n    /// Panics if the new capacity exceeds `isize::MAX` _bytes_.\n    pub fn reserve(&mut self, additional: usize) {\n        if additional > self.capacity() - self.len() {\n            self.reserve_inner(additional);\n        }\n    }\n\n    fn reserve_inner(&mut self, additional: usize) {\n        let Some(required_cap) = self.len().checked_add(additional) else {\n            capacity_overflow()\n        };\n\n        let old_capacity = self.capacity();\n        let align = self.align();\n        // This guarantees exponential growth. The doubling cannot overflow\n        // because `cap <= isize::MAX` and the type of `cap` is `usize`.\n        let cap = cmp::max(old_capacity * 2, required_cap);\n\n        if !is_valid_alloc(cap) {\n            capacity_overflow()\n        }\n        let new_layout = Layout::from_size_align(cap, self.align()).expect(\"Invalid layout\");\n\n        let old_ptr = self.as_mut_ptr();\n\n        // SAFETY: old allocation was allocated with std::alloc::alloc with the same layout,\n        // and we panics on null pointer.\n        let (ptr, cap) = unsafe {\n            let old_layout = Layout::from_size_align_unchecked(old_capacity, align);\n            let ptr = alloc::realloc(old_ptr, old_layout, new_layout.size());\n            if ptr.is_null() {\n                alloc::handle_alloc_error(new_layout);\n            }\n            (AlignedBufferPtr(ptr), cap)\n        };\n\n        self.ptr = ptr;\n        self.capacity = cap;\n    }\n\n    /// Shortens the buffer, keeping the first len bytes.\n    pub fn truncate(&mut self, len: usize) {\n        if len > self.len {\n            return;\n        }\n        self.len = len;\n    }\n\n    /// Consumes and leaks the `IoBufferMut`, returning a mutable reference to the contents, &'a mut [u8].\n    pub fn leak<'a>(self) -> &'a mut [u8] {\n        let mut buf = ManuallyDrop::new(self);\n        // SAFETY: leaking the buffer as intended.\n        unsafe { slice::from_raw_parts_mut(buf.as_mut_ptr(), buf.len) }\n    }\n}\n\nfn capacity_overflow() -> ! {\n    panic!(\"capacity overflow\")\n}\n\n// We need to guarantee the following:\n// * We don't ever allocate `> isize::MAX` byte-size objects.\n// * We don't overflow `usize::MAX` and actually allocate too little.\n//\n// On 64-bit we just need to check for overflow since trying to allocate\n// `> isize::MAX` bytes will surely fail. On 32-bit and 16-bit we need to add\n// an extra guard for this in case we're running on a platform which can use\n// all 4GB in user-space, e.g., PAE or x32.\n#[inline]\nfn is_valid_alloc(alloc_size: usize) -> bool {\n    !(usize::BITS < 64 && alloc_size > isize::MAX as usize)\n}\n\nimpl<A: Alignment> Drop for RawAlignedBuffer<A> {\n    fn drop(&mut self) {\n        // SAFETY: memory was allocated with std::alloc::alloc with the same layout.\n        unsafe {\n            alloc::dealloc(\n                self.as_mut_ptr(),\n                Layout::from_size_align_unchecked(self.capacity, self.align.align()),\n            )\n        }\n    }\n}\n"
  },
  {
    "path": "pageserver/src/virtual_file/owned_buffers_io/aligned_buffer/slice.rs",
    "content": "use std::ops::{Deref, DerefMut};\n\nuse super::alignment::{Alignment, ConstAlign};\n\n/// Newtype for an aligned slice.\npub struct AlignedSlice<'a, const N: usize, A: Alignment> {\n    /// underlying byte slice\n    buf: &'a mut [u8; N],\n    /// alignment marker\n    _align: A,\n}\n\nimpl<'a, const N: usize, const A: usize> AlignedSlice<'a, N, ConstAlign<A>> {\n    /// Create a new aligned slice from a mutable byte slice. The input must already satisify the alignment.\n    pub unsafe fn new_unchecked(buf: &'a mut [u8; N]) -> Self {\n        let _align = ConstAlign::<A>;\n        assert_eq!(buf.as_ptr().align_offset(_align.align()), 0);\n        AlignedSlice { buf, _align }\n    }\n}\n\nimpl<const N: usize, A: Alignment> Deref for AlignedSlice<'_, N, A> {\n    type Target = [u8; N];\n\n    fn deref(&self) -> &Self::Target {\n        self.buf\n    }\n}\n\nimpl<const N: usize, A: Alignment> DerefMut for AlignedSlice<'_, N, A> {\n    fn deref_mut(&mut self) -> &mut Self::Target {\n        self.buf\n    }\n}\n\nimpl<const N: usize, A: Alignment> AsRef<[u8; N]> for AlignedSlice<'_, N, A> {\n    fn as_ref(&self) -> &[u8; N] {\n        self.buf\n    }\n}\n"
  },
  {
    "path": "pageserver/src/virtual_file/owned_buffers_io/aligned_buffer.rs",
    "content": "pub mod alignment;\npub mod buffer;\npub mod buffer_mut;\npub mod raw;\npub mod slice;\n\npub use alignment::*;\npub use buffer_mut::AlignedBufferMut;\npub use slice::AlignedSlice;\n"
  },
  {
    "path": "pageserver/src/virtual_file/owned_buffers_io/io_buf_aligned.rs",
    "content": "use tokio_epoll_uring::{IoBuf, IoBufMut};\n\nuse crate::virtual_file::{IoBuffer, IoBufferMut, PageWriteGuardBuf};\n\n/// A marker trait for a mutable aligned buffer type.\npub trait IoBufAlignedMut: IoBufMut {}\n\n/// A marker trait for an aligned buffer type.\npub trait IoBufAligned: IoBuf {}\n\nimpl IoBufAlignedMut for IoBufferMut {}\n\nimpl IoBufAligned for IoBuffer {}\n\nimpl IoBufAlignedMut for PageWriteGuardBuf {}\n"
  },
  {
    "path": "pageserver/src/virtual_file/owned_buffers_io/io_buf_ext.rs",
    "content": "//! See [`FullSlice`].\n\nuse std::ops::{Deref, Range};\n\nuse bytes::{Bytes, BytesMut};\nuse tokio_epoll_uring::{BoundedBuf, IoBuf, Slice};\n\nuse super::write::CheapCloneForRead;\nuse crate::virtual_file::{IoBuffer, IoBufferMut};\n\n/// The true owned equivalent for Rust [`slice`]. Use this for the write path.\n///\n/// Unlike [`tokio_epoll_uring::Slice`], which we unfortunately inherited from `tokio-uring`,\n/// [`FullSlice`] is guaranteed to have all its bytes initialized. This means that\n/// [`<FullSlice as Deref<Target = [u8]>>::len`] is equal to [`Slice::bytes_init`] and [`Slice::bytes_total`].\n///\npub struct FullSlice<B> {\n    slice: Slice<B>,\n}\n\nimpl<B> FullSlice<B>\nwhere\n    B: IoBuf,\n{\n    pub(crate) fn must_new(slice: Slice<B>) -> Self {\n        assert_eq!(slice.bytes_init(), slice.bytes_total());\n        FullSlice { slice }\n    }\n    pub(crate) fn into_raw_slice(self) -> Slice<B> {\n        let FullSlice { slice: s } = self;\n        s\n    }\n}\n\nimpl<B> Deref for FullSlice<B>\nwhere\n    B: IoBuf,\n{\n    type Target = [u8];\n\n    fn deref(&self) -> &[u8] {\n        let rust_slice = &self.slice[..];\n        assert_eq!(rust_slice.len(), self.slice.bytes_init());\n        assert_eq!(rust_slice.len(), self.slice.bytes_total());\n        rust_slice\n    }\n}\n\nimpl<B> CheapCloneForRead for FullSlice<B>\nwhere\n    B: IoBuf + CheapCloneForRead,\n{\n    fn cheap_clone(&self) -> Self {\n        let bounds = self.slice.bounds();\n        let clone = self.slice.get_ref().cheap_clone();\n        let slice = clone.slice(bounds);\n        Self { slice }\n    }\n}\n\npub(crate) trait IoBufExt {\n    /// Get a [`FullSlice`] for the entire buffer, i.e., `self[..]` or `self[0..self.len()]`.\n    fn slice_len(self) -> FullSlice<Self>\n    where\n        Self: Sized;\n}\n\nmacro_rules! impl_io_buf_ext {\n    ($T:ty) => {\n        impl IoBufExt for $T {\n            #[inline(always)]\n            fn slice_len(self) -> FullSlice<Self> {\n                let len = self.len();\n                let s = if len == 0 {\n                    // `BoundedBuf::slice(0..len)` or `BoundedBuf::slice(..)` has an incorrect assertion,\n                    // causing a panic if len == 0.\n                    // The Slice::from_buf_bounds has the correct assertion (<= instead of <).\n                    // => https://github.com/neondatabase/tokio-epoll-uring/issues/46\n                    let slice = self.slice_full();\n                    let mut bounds: Range<_> = slice.bounds();\n                    bounds.end = bounds.start;\n                    Slice::from_buf_bounds(slice.into_inner(), bounds)\n                } else {\n                    self.slice(0..len)\n                };\n                FullSlice::must_new(s)\n            }\n        }\n    };\n}\n\nimpl_io_buf_ext!(Bytes);\nimpl_io_buf_ext!(BytesMut);\nimpl_io_buf_ext!(Vec<u8>);\nimpl_io_buf_ext!(IoBufferMut);\nimpl_io_buf_ext!(IoBuffer);\n"
  },
  {
    "path": "pageserver/src/virtual_file/owned_buffers_io/slice.rs",
    "content": "use tokio_epoll_uring::{BoundedBuf, BoundedBufMut, IoBufMut, Slice};\n\npub(crate) trait SliceMutExt {\n    /// Get a `&mut[0..self.bytes_total()`] slice, for when you need to do borrow-based IO.\n    ///\n    /// See the test case `test_slice_full_zeroed` for the difference to just doing `&slice[..]`\n    fn as_mut_rust_slice_full_zeroed(&mut self) -> &mut [u8];\n}\n\nimpl<B> SliceMutExt for Slice<B>\nwhere\n    B: IoBufMut,\n{\n    #[inline(always)]\n    fn as_mut_rust_slice_full_zeroed(&mut self) -> &mut [u8] {\n        // zero-initialize the uninitialized parts of the buffer so we can create a Rust slice\n        //\n        // SAFETY: we own `slice`, don't write outside the bounds\n        unsafe {\n            let to_init = self.bytes_total() - self.bytes_init();\n            self.stable_mut_ptr()\n                .add(self.bytes_init())\n                .write_bytes(0, to_init);\n            self.set_init(self.bytes_total());\n        };\n        let bytes_total = self.bytes_total();\n        &mut self[0..bytes_total]\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use std::io::Read;\n\n    use bytes::Buf;\n    use tokio_epoll_uring::Slice;\n\n    use super::*;\n\n    #[test]\n    fn test_slice_full_zeroed() {\n        let make_fake_file = || bytes::BytesMut::from(&b\"12345\"[..]).reader();\n\n        // before we start the test, let's make sure we have a shared understanding of what slice_full does\n        {\n            let buf = Vec::with_capacity(3);\n            let slice: Slice<_> = buf.slice_full();\n            assert_eq!(slice.bytes_init(), 0);\n            assert_eq!(slice.bytes_total(), 3);\n            let rust_slice = &slice[..];\n            assert_eq!(\n                rust_slice.len(),\n                0,\n                \"Slice only derefs to a &[u8] of the initialized part\"\n            );\n        }\n\n        // and also let's establish a shared understanding of .slice()\n        {\n            let buf = Vec::with_capacity(3);\n            let slice: Slice<_> = buf.slice(0..2);\n            assert_eq!(slice.bytes_init(), 0);\n            assert_eq!(slice.bytes_total(), 2);\n            let rust_slice = &slice[..];\n            assert_eq!(\n                rust_slice.len(),\n                0,\n                \"Slice only derefs to a &[u8] of the initialized part\"\n            );\n        }\n\n        // the above leads to the easy mistake of using slice[..] for borrow-based IO like so:\n        {\n            let buf = Vec::with_capacity(3);\n            let mut slice: Slice<_> = buf.slice_full();\n            assert_eq!(slice[..].len(), 0);\n            let mut file = make_fake_file();\n            file.read_exact(&mut slice[..]).unwrap(); // one might think this reads 3 bytes but it reads 0\n            assert_eq!(&slice[..] as &[u8], &[][..] as &[u8]);\n        }\n\n        // With owned buffers IO like with VirtualFilem, you could totally\n        // pass in a `Slice` with bytes_init()=0 but bytes_total()=5\n        // and it will read 5 bytes into the slice, and return a slice that has bytes_init()=5.\n        {\n            // TODO: demo\n        }\n\n        //\n        // Ok, now that we have a shared understanding let's demo how to use the extension trait.\n        //\n\n        // slice_full()\n        {\n            let buf = Vec::with_capacity(3);\n            let mut slice: Slice<_> = buf.slice_full();\n            let rust_slice = slice.as_mut_rust_slice_full_zeroed();\n            assert_eq!(rust_slice.len(), 3);\n            assert_eq!(rust_slice, &[0, 0, 0]);\n            let mut file = make_fake_file();\n            file.read_exact(rust_slice).unwrap();\n            assert_eq!(rust_slice, b\"123\");\n            assert_eq!(&slice[..], b\"123\");\n        }\n\n        // .slice(..)\n        {\n            let buf = Vec::with_capacity(3);\n            let mut slice: Slice<_> = buf.slice(0..2);\n            let rust_slice = slice.as_mut_rust_slice_full_zeroed();\n            assert_eq!(rust_slice.len(), 2);\n            assert_eq!(rust_slice, &[0, 0]);\n            let mut file = make_fake_file();\n            file.read_exact(rust_slice).unwrap();\n            assert_eq!(rust_slice, b\"12\");\n            assert_eq!(&slice[..], b\"12\");\n        }\n    }\n}\n"
  },
  {
    "path": "pageserver/src/virtual_file/owned_buffers_io/write/flush.rs",
    "content": "use std::ops::ControlFlow;\n\nuse tokio_util::sync::CancellationToken;\nuse tracing::{Instrument, info_span, warn};\nuse utils::sync::duplex;\n\nuse super::{Buffer, CheapCloneForRead, OwnedAsyncWriter};\nuse crate::context::RequestContext;\nuse crate::virtual_file::MaybeFatalIo;\nuse crate::virtual_file::owned_buffers_io::io_buf_aligned::IoBufAligned;\nuse crate::virtual_file::owned_buffers_io::io_buf_ext::FullSlice;\n\n/// A handle to the flush task.\npub struct FlushHandle<Buf, W> {\n    inner: Option<FlushHandleInner<Buf, W>>,\n}\n\npub struct FlushHandleInner<Buf, W> {\n    /// A bi-directional channel that sends (buffer, offset) for writes,\n    /// and receives recyled buffer.\n    channel: duplex::mpsc::Duplex<Request<Buf>, FullSlice<Buf>>,\n    /// Join handle for the background flush task.\n    join_handle: tokio::task::JoinHandle<Result<W, FlushTaskError>>,\n}\n\nstruct FlushRequest<Buf> {\n    slice: FullSlice<Buf>,\n    offset: u64,\n    #[cfg(test)]\n    ready_to_flush_rx: Option<tokio::sync::oneshot::Receiver<()>>,\n    #[cfg(test)]\n    done_flush_tx: Option<tokio::sync::oneshot::Sender<()>>,\n}\n\npub struct ShutdownRequest {\n    pub set_len: Option<u64>,\n}\n\nenum Request<Buf> {\n    Flush(FlushRequest<Buf>),\n    Shutdown(ShutdownRequest),\n}\n\nimpl<Buf> Request<Buf> {\n    fn op_str(&self) -> &'static str {\n        match self {\n            Request::Flush(_) => \"flush\",\n            Request::Shutdown(_) => \"shutdown\",\n        }\n    }\n}\n\n/// Constructs a request and a control object for a new flush operation.\n#[cfg(not(test))]\nfn new_flush_op<Buf>(slice: FullSlice<Buf>, offset: u64) -> (FlushRequest<Buf>, FlushControl) {\n    let request = FlushRequest { slice, offset };\n    let control = FlushControl::untracked();\n\n    (request, control)\n}\n\n/// Constructs a request and a control object for a new flush operation.\n#[cfg(test)]\nfn new_flush_op<Buf>(slice: FullSlice<Buf>, offset: u64) -> (FlushRequest<Buf>, FlushControl) {\n    let (ready_to_flush_tx, ready_to_flush_rx) = tokio::sync::oneshot::channel();\n    let (done_flush_tx, done_flush_rx) = tokio::sync::oneshot::channel();\n    let control = FlushControl::not_started(ready_to_flush_tx, done_flush_rx);\n\n    let request = FlushRequest {\n        slice,\n        offset,\n        ready_to_flush_rx: Some(ready_to_flush_rx),\n        done_flush_tx: Some(done_flush_tx),\n    };\n    (request, control)\n}\n\n/// A handle to a `FlushRequest` that allows unit tests precise control over flush behavior.\n#[cfg(test)]\npub(crate) struct FlushControl {\n    not_started: FlushNotStarted,\n}\n\n#[cfg(not(test))]\npub(crate) struct FlushControl;\n\nimpl FlushControl {\n    #[cfg(test)]\n    fn not_started(\n        ready_to_flush_tx: tokio::sync::oneshot::Sender<()>,\n        done_flush_rx: tokio::sync::oneshot::Receiver<()>,\n    ) -> Self {\n        FlushControl {\n            not_started: FlushNotStarted {\n                ready_to_flush_tx,\n                done_flush_rx,\n            },\n        }\n    }\n\n    #[cfg(not(test))]\n    fn untracked() -> Self {\n        FlushControl\n    }\n\n    /// In tests, turn flush control into a not started state.\n    #[cfg(test)]\n    pub(crate) fn into_not_started(self) -> FlushNotStarted {\n        self.not_started\n    }\n\n    /// Release control to the submitted buffer.\n    ///\n    /// In `cfg(test)` environment, the buffer is guranteed to be flushed to disk after [`FlushControl::release`] is finishes execution.\n    pub async fn release(self) {\n        #[cfg(test)]\n        {\n            self.not_started\n                .ready_to_flush()\n                .wait_until_flush_is_done()\n                .await;\n        }\n    }\n}\n\nimpl<Buf, W> FlushHandle<Buf, W>\nwhere\n    Buf: IoBufAligned + Send + Sync + CheapCloneForRead,\n    W: OwnedAsyncWriter + Send + Sync + 'static + std::fmt::Debug,\n{\n    /// Spawns a new background flush task and obtains a handle.\n    ///\n    /// Handle and background task are connected through a duplex channel.\n    /// Dirty buffers are sent to the background task for flushing.\n    /// Clean buffers are sent back to the handle for reuse.\n    ///\n    /// The queue depth is 1, and the passed-in `buf` seeds the queue depth.\n    /// I.e., the passed-in buf is immediately available to the handle as a recycled buffer.\n    pub fn spawn_new<B>(\n        file: W,\n        buf: B,\n        gate_guard: utils::sync::gate::GateGuard,\n        cancel: CancellationToken,\n        ctx: RequestContext,\n        span: tracing::Span,\n    ) -> Self\n    where\n        B: Buffer<IoBuf = Buf> + Send + 'static,\n    {\n        let (front, back) = duplex::mpsc::channel(1);\n        back.try_send(buf.flush())\n            .expect(\"we just created it with capacity 1\");\n\n        let join_handle = tokio::spawn(\n            FlushBackgroundTask::new(back, file, gate_guard, cancel, ctx)\n                .run()\n                .instrument(span),\n        );\n\n        FlushHandle {\n            inner: Some(FlushHandleInner {\n                channel: front,\n                join_handle,\n            }),\n        }\n    }\n\n    /// Submits a buffer to be flushed in the background task.\n    /// Returns a buffer that completed flushing for re-use, length reset to 0, capacity unchanged.\n    /// If `save_buf_for_read` is true, then we save the buffer in `Self::maybe_flushed`, otherwise\n    /// clear `maybe_flushed`.\n    pub async fn flush(\n        &mut self,\n        slice: FullSlice<Buf>,\n        offset: u64,\n    ) -> Result<(FullSlice<Buf>, FlushControl), FlushTaskError> {\n        let (request, flush_control) = new_flush_op(slice, offset);\n\n        // Submits the buffer to the background task.\n        self.send(Request::Flush(request)).await?;\n\n        // Wait for an available buffer from the background flush task.\n        // This is the BACKPRESSURE mechanism: if the flush task can't keep up,\n        // then the write path will eventually wait for it here.\n        let Some(recycled) = self.inner_mut().channel.recv().await else {\n            return self.handle_error().await;\n        };\n\n        Ok((recycled, flush_control))\n    }\n\n    /// Sends poison pill to flush task and waits for it to exit.\n    pub async fn shutdown(&mut self, req: ShutdownRequest) -> Result<W, FlushTaskError> {\n        self.send(Request::Shutdown(req)).await?;\n        self.wait().await\n    }\n\n    async fn send(&mut self, request: Request<Buf>) -> Result<(), FlushTaskError> {\n        let submit = self.inner_mut().channel.send(request).await;\n        if submit.is_err() {\n            return self.handle_error().await;\n        }\n        Ok(())\n    }\n\n    async fn handle_error<T>(&mut self) -> Result<T, FlushTaskError> {\n        Err(self\n            .wait()\n            .await\n            .expect_err(\"flush task only disconnects duplex if it exits with an error\"))\n    }\n\n    async fn wait(&mut self) -> Result<W, FlushTaskError> {\n        let handle = self\n            .inner\n            .take()\n            .expect(\"must not use after we returned an error\");\n        drop(handle.channel.tx);\n        handle.join_handle.await.unwrap()\n    }\n\n    /// Gets a mutable reference to the inner handle. Panics if [`Self::inner`] is `None`.\n    /// This only happens if the handle is used after an error.\n    fn inner_mut(&mut self) -> &mut FlushHandleInner<Buf, W> {\n        self.inner\n            .as_mut()\n            .expect(\"must not use after we returned an error\")\n    }\n}\n\n/// A background task for flushing data to disk.\npub struct FlushBackgroundTask<Buf, W> {\n    /// A bi-directional channel that receives (buffer, offset) for writes,\n    /// and send back recycled buffer.\n    channel: duplex::mpsc::Duplex<FullSlice<Buf>, Request<Buf>>,\n    /// A writter for persisting data to disk.\n    writer: W,\n    ctx: RequestContext,\n    cancel: CancellationToken,\n    /// Prevent timeline from shuting down until the flush background task finishes flushing all remaining buffers to disk.\n    _gate_guard: utils::sync::gate::GateGuard,\n}\n\n#[derive(Debug, thiserror::Error)]\npub enum FlushTaskError {\n    #[error(\"flush task cancelled\")]\n    Cancelled,\n}\n\nimpl FlushTaskError {\n    pub fn is_cancel(&self) -> bool {\n        match self {\n            FlushTaskError::Cancelled => true,\n        }\n    }\n    pub fn into_anyhow(self) -> anyhow::Error {\n        match self {\n            FlushTaskError::Cancelled => anyhow::anyhow!(self),\n        }\n    }\n}\n\nimpl<Buf, W> FlushBackgroundTask<Buf, W>\nwhere\n    Buf: IoBufAligned + Send + Sync,\n    W: OwnedAsyncWriter + Sync + 'static,\n{\n    /// Creates a new background flush task.\n    fn new(\n        channel: duplex::mpsc::Duplex<FullSlice<Buf>, Request<Buf>>,\n        file: W,\n        gate_guard: utils::sync::gate::GateGuard,\n        cancel: CancellationToken,\n        ctx: RequestContext,\n    ) -> Self {\n        FlushBackgroundTask {\n            channel,\n            writer: file,\n            _gate_guard: gate_guard,\n            cancel,\n            ctx,\n        }\n    }\n\n    /// Runs the background flush task.\n    async fn run(mut self) -> Result<W, FlushTaskError> {\n        //  Exit condition: channel is closed and there is no remaining buffer to be flushed\n        while let Some(request) = self.channel.recv().await {\n            let op_kind = request.op_str();\n\n            // Perform the requested operation.\n            //\n            // Error handling happens according to the current policy of crashing\n            // on fatal IO errors and retrying in place otherwise (deeming all other errors retryable).\n            // (The upper layers of the Pageserver write path are not equipped to retry write errors\n            //  becasuse they often deallocate the buffers that were already written).\n            //\n            // TODO: use utils::backoff::retry once async closures are actually usable\n            //\n            let mut request_storage = Some(request);\n            for attempt in 1.. {\n                if self.cancel.is_cancelled() {\n                    return Err(FlushTaskError::Cancelled);\n                }\n                let result = async {\n                    let request: Request<Buf> = request_storage .take().expect(\n                        \"likely previous invocation of this future didn't get polled to completion\",\n                    );\n                    match &request {\n                        Request::Shutdown(ShutdownRequest { set_len: None }) => {\n                            request_storage = Some(request);\n                            return ControlFlow::Break(());\n                        },\n                        Request::Flush(_) | Request::Shutdown(ShutdownRequest { set_len: Some(_) }) => {\n                        },\n                    }\n                    if attempt > 1 {\n                        warn!(op=%request.op_str(), \"retrying\");\n                    }\n                    // borrows so we can async move the requests into async block while not moving these borrows here\n                    let writer = &self.writer;\n                    let request_storage = &mut request_storage;\n                    let ctx = &self.ctx;\n                    let io_fut = match request {\n                        Request::Flush(FlushRequest { slice, offset, #[cfg(test)] ready_to_flush_rx, #[cfg(test)] done_flush_tx }) => futures::future::Either::Left(async move {\n                            #[cfg(test)]\n                            if let Some(ready_to_flush_rx) = ready_to_flush_rx {\n                                {\n                                    // In test, wait for control to signal that we are ready to flush.\n                                    if ready_to_flush_rx.await.is_err() {\n                                        tracing::debug!(\"control dropped\");\n                                    }\n                                }\n                            }\n                            let (slice, res) = writer.write_all_at(slice, offset, ctx).await;\n                            *request_storage = Some(Request::Flush(FlushRequest {\n                                slice,\n                                offset,\n                                #[cfg(test)]\n                                ready_to_flush_rx: None, // the contract is that we notify before first attempt\n                                #[cfg(test)]\n                                done_flush_tx\n                            }));\n                            res\n                        }),\n                        Request::Shutdown(ShutdownRequest { set_len }) => futures::future::Either::Right(async move {\n                            let set_len = set_len.expect(\"we filter out the None case above\");\n                            let res = writer.set_len(set_len, ctx).await;\n                            *request_storage = Some(Request::Shutdown(ShutdownRequest {\n                                set_len: Some(set_len),\n                            }));\n                            res\n                        }),\n                    };\n                    // Don't cancel the io_fut by doing tokio::select with self.cancel.cancelled().\n                    // The underlying tokio-epoll-uring slot / kernel operation is still ongoing and occupies resources.\n                    // If we retry indefinitely, we'll deplete those resources.\n                    // Future: teach tokio-epoll-uring io_uring operation cancellation, but still,\n                    // wait for cancelled ops to complete and discard their error.\n                    let res = io_fut.await;\n                    let res = res.maybe_fatal_err(\"owned_buffers_io flush\");\n                    let Err(err) = res else {\n                        if attempt > 1 {\n                            warn!(op=%op_kind, \"retry succeeded\");\n                        }\n                        return ControlFlow::Break(());\n                    };\n                    warn!(%err, \"error flushing buffered writer buffer to disk, retrying after backoff\");\n                    utils::backoff::exponential_backoff(attempt, 1.0, 10.0, &self.cancel).await;\n                    ControlFlow::Continue(())\n                }\n                .instrument(info_span!(\"attempt\", %attempt, %op_kind))\n                .await;\n                match result {\n                    ControlFlow::Break(()) => break,\n                    ControlFlow::Continue(()) => continue,\n                }\n            }\n            let request = request_storage.expect(\"loop must have run at least once\");\n\n            let slice = match request {\n                Request::Flush(FlushRequest {\n                    slice,\n                    #[cfg(test)]\n                    mut done_flush_tx,\n                    ..\n                }) => {\n                    #[cfg(test)]\n                    {\n                        // In test, tell control we are done flushing buffer.\n                        if done_flush_tx.take().expect(\"always Some\").send(()).is_err() {\n                            tracing::debug!(\"control dropped\");\n                        }\n                    }\n                    slice\n                }\n                Request::Shutdown(_) => {\n                    // next iteration will observe recv() returning None\n                    continue;\n                }\n            };\n\n            // Sends the buffer back to the handle for reuse. The handle is in charged of cleaning the buffer.\n            let send_res = self.channel.send(slice).await;\n            if send_res.is_err() {\n                // Although channel is closed. Still need to finish flushing the remaining buffers.\n                continue;\n            }\n        }\n\n        Ok(self.writer)\n    }\n}\n\n#[cfg(test)]\npub(crate) struct FlushNotStarted {\n    ready_to_flush_tx: tokio::sync::oneshot::Sender<()>,\n    done_flush_rx: tokio::sync::oneshot::Receiver<()>,\n}\n\n#[cfg(test)]\npub(crate) struct FlushInProgress {\n    done_flush_rx: tokio::sync::oneshot::Receiver<()>,\n}\n\n#[cfg(test)]\npub(crate) struct FlushDone;\n\n#[cfg(test)]\nimpl FlushNotStarted {\n    /// Signals the background task the buffer is ready to flush to disk.\n    pub fn ready_to_flush(self) -> FlushInProgress {\n        self.ready_to_flush_tx\n            .send(())\n            .map(|_| FlushInProgress {\n                done_flush_rx: self.done_flush_rx,\n            })\n            .unwrap()\n    }\n}\n\n#[cfg(test)]\nimpl FlushInProgress {\n    /// Waits until background flush is done.\n    pub async fn wait_until_flush_is_done(self) -> FlushDone {\n        self.done_flush_rx.await.unwrap();\n        FlushDone\n    }\n}\n"
  },
  {
    "path": "pageserver/src/virtual_file/owned_buffers_io/write.rs",
    "content": "mod flush;\n\nuse bytes::BufMut;\npub(crate) use flush::FlushControl;\nuse flush::FlushHandle;\npub(crate) use flush::FlushTaskError;\nuse flush::ShutdownRequest;\nuse tokio_epoll_uring::IoBuf;\nuse tokio_util::sync::CancellationToken;\nuse tracing::trace;\n\nuse super::io_buf_aligned::IoBufAligned;\nuse super::io_buf_aligned::IoBufAlignedMut;\nuse super::io_buf_ext::{FullSlice, IoBufExt};\nuse crate::context::RequestContext;\nuse crate::virtual_file::UsizeIsU64;\nuse crate::virtual_file::{IoBuffer, IoBufferMut};\n\npub(crate) trait CheapCloneForRead {\n    /// Returns a cheap clone of the buffer.\n    fn cheap_clone(&self) -> Self;\n}\n\nimpl CheapCloneForRead for IoBuffer {\n    fn cheap_clone(&self) -> Self {\n        // Cheap clone over an `Arc`.\n        self.clone()\n    }\n}\n\n/// A trait for doing owned-buffer write IO.\n/// Think [`tokio::io::AsyncWrite`] but with owned buffers.\n/// The owned buffers need to be aligned due to Direct IO requirements.\npub trait OwnedAsyncWriter {\n    fn write_all_at<Buf: IoBufAligned + Send>(\n        &self,\n        buf: FullSlice<Buf>,\n        offset: u64,\n        ctx: &RequestContext,\n    ) -> impl std::future::Future<Output = (FullSlice<Buf>, std::io::Result<()>)> + Send;\n    fn set_len(\n        &self,\n        len: u64,\n        ctx: &RequestContext,\n    ) -> impl Future<Output = std::io::Result<()>> + Send;\n}\n\n/// A wrapper aorund an [`OwnedAsyncWriter`] that uses a [`Buffer`] to batch\n/// small writes into larger writes of size [`Buffer::cap`].\n///\n/// The buffer is flushed if and only if it is full ([`Buffer::pending`] == [`Buffer::cap`]).\n/// This guarantees that writes to the filesystem happen\n/// - at offsets that are multiples of [`Buffer::cap`]\n/// - in lengths that are multiples of [`Buffer::cap`]\n///\n/// Above property is useful for Direct IO, where whatever the\n/// effectively dominating disk-sector/filesystem-block/memory-page size\n/// determines the requirements on\n/// - the alignment of the pointer passed to the read/write operation\n/// - the value of `count` (i.e., the length of the read/write operation)\n///   which must be a multiple of the dominating sector/block/page size.\n///\n/// See [`BufferedWriter::shutdown`] / [`BufferedWriterShutdownMode`] for different\n/// ways of dealing with the special case that the buffer is not full by the time\n/// we are done writing.\n///\n/// The first flush to the underlying `W` happens at offset `start_offset` (arg of [`BufferedWriter::new`]).\n/// The next flush is to offset `start_offset + Buffer::cap`. The one after at `start_offset + 2 * Buffer::cap` and so on.\n///\n/// TODO: decouple buffer capacity from alignment requirement.\n/// Right now we assume [`Buffer::cap`] is the alignment requirement,\n/// but actually [`Buffer::cap`] should only determine how often we flush\n/// while writing, while a separate alignment requirement argument should\n/// be passed to determine alignment requirement. This could be used by\n/// [`BufferedWriterShutdownMode::PadThenTruncate`] to avoid excessive\n/// padding of zeroes. For example, today, with a capacity of 64KiB, we\n/// would pad up to 64KiB-1 bytes of zeroes, then truncate off 64KiB-1.\n/// This is wasteful, e.g., if the alignment requirement is 4KiB, we only\n/// need to pad & truncate up to 4KiB-1 bytes of zeroes\n///\n// TODO(yuchen): For large write, implementing buffer bypass for aligned parts of the write could be beneficial to throughput,\n// since we would avoid copying majority of the data into the internal buffer.\n// https://github.com/neondatabase/neon/issues/10101\npub struct BufferedWriter<B: Buffer, W> {\n    /// Clone of the buffer that was last submitted to the flush loop.\n    /// `None` if no flush request has been submitted, Some forever after.\n    pub(super) maybe_flushed: Option<FullSlice<B::IoBuf>>,\n    /// New writes are accumulated here.\n    /// `None` only during submission while we wait for flush loop to accept\n    /// the full dirty buffer in exchange for a clean buffer.\n    /// If that exchange fails with an [`FlushTaskError`], the write path\n    /// bails and leaves this as `None`.\n    /// Subsequent writes will panic if attempted.\n    /// The read path continues to work without error because [`Self::maybe_flushed`]\n    /// and [`Self::bytes_submitted`] are advanced before the flush loop exchange starts,\n    /// so, they will never try to read from [`Self::mutable`] anyway, because it's past\n    /// the [`Self::maybe_flushed`] point.\n    mutable: Option<B>,\n    /// A handle to the background flush task for writting data to disk.\n    flush_handle: FlushHandle<B::IoBuf, W>,\n    /// The number of bytes submitted to the background task.\n    bytes_submitted: u64,\n}\n\n/// How [`BufferedWriter::shutdown`] should deal with pending (=not-yet-flushed) data.\n///\n/// Cf the [`BufferedWriter`] comment's paragraph for context on why we need to think about this.\npub enum BufferedWriterShutdownMode {\n    /// Drop pending data, don't write back to file.\n    DropTail,\n    /// Pad the pending data with zeroes (cf [`usize::next_multiple_of`]).\n    ZeroPadToNextMultiple(usize),\n    /// Fill the IO buffer with zeroes, flush to disk, the `ftruncate` the\n    /// file to the exact number of bytes written to [`Self`].\n    ///\n    /// TODO: see in [`BufferedWriter`] comment about decoupling buffer capacity from alignment requirement.\n    PadThenTruncate,\n}\n\nimpl<B, Buf, W> BufferedWriter<B, W>\nwhere\n    B: IoBufAlignedMut + Buffer<IoBuf = Buf> + Send + 'static,\n    Buf: IoBufAligned + Send + Sync + CheapCloneForRead,\n    W: OwnedAsyncWriter + Send + Sync + 'static + std::fmt::Debug,\n{\n    /// Creates a new buffered writer.\n    ///\n    /// The `buf_new` function provides a way to initialize the owned buffers used by this writer.\n    pub fn new(\n        writer: W,\n        start_offset: u64,\n        buf_new: impl Fn() -> B,\n        gate_guard: utils::sync::gate::GateGuard,\n        cancel: CancellationToken,\n        ctx: &RequestContext,\n        flush_task_span: tracing::Span,\n    ) -> Self {\n        Self {\n            mutable: Some(buf_new()),\n            maybe_flushed: None,\n            flush_handle: FlushHandle::spawn_new(\n                writer,\n                buf_new(),\n                gate_guard,\n                cancel,\n                ctx.attached_child(),\n                flush_task_span,\n            ),\n            bytes_submitted: start_offset,\n        }\n    }\n\n    /// Returns the number of bytes submitted to the background flush task.\n    pub fn bytes_submitted(&self) -> u64 {\n        self.bytes_submitted\n    }\n\n    /// Panics if used after any of the write paths returned an error\n    pub fn inspect_mutable(&self) -> Option<&B> {\n        self.mutable.as_ref()\n    }\n\n    /// Gets a reference to the maybe flushed read-only buffer.\n    /// Returns `None` if the writer has not submitted any flush request.\n    pub fn inspect_maybe_flushed(&self) -> Option<&FullSlice<Buf>> {\n        self.maybe_flushed.as_ref()\n    }\n\n    #[cfg_attr(target_os = \"macos\", allow(dead_code))]\n    pub async fn shutdown(\n        mut self,\n        mode: BufferedWriterShutdownMode,\n        ctx: &RequestContext,\n    ) -> Result<(u64, W), FlushTaskError> {\n        let mut mutable = self.mutable.take().expect(\"must not use after an error\");\n        let unpadded_pending = mutable.pending();\n        let final_len: u64;\n        let shutdown_req;\n        match mode {\n            BufferedWriterShutdownMode::DropTail => {\n                trace!(pending=%mutable.pending(), \"dropping pending data\");\n                drop(mutable);\n\n                final_len = self.bytes_submitted;\n                shutdown_req = ShutdownRequest { set_len: None };\n            }\n            BufferedWriterShutdownMode::ZeroPadToNextMultiple(next_multiple) => {\n                let len = mutable.pending();\n                let cap = mutable.cap();\n                assert!(\n                    len <= cap,\n                    \"buffer impl ensures this, but let's check because the extend_with below would panic if we go beyond\"\n                );\n                let padded_len = len.next_multiple_of(next_multiple);\n                assert!(\n                    padded_len <= cap,\n                    \"caller specified a multiple that is larger than the buffer capacity\"\n                );\n                let count = padded_len - len;\n                mutable.extend_with(0, count);\n                trace!(count, \"padding with zeros\");\n                self.mutable = Some(mutable);\n\n                final_len = self.bytes_submitted + padded_len.into_u64();\n                shutdown_req = ShutdownRequest { set_len: None };\n            }\n            BufferedWriterShutdownMode::PadThenTruncate => {\n                let len = mutable.pending();\n                let cap = mutable.cap();\n                // TODO: see struct comment TODO on decoupling buffer capacity from alignment requirement.\n                let alignment_requirement = cap;\n                assert!(len <= cap, \"buffer impl should ensure this\");\n                let padding_end_offset = len.next_multiple_of(alignment_requirement);\n                assert!(\n                    padding_end_offset <= cap,\n                    \"{padding_end_offset} <= {cap}  ({alignment_requirement})\"\n                );\n                let count = padding_end_offset - len;\n                mutable.extend_with(0, count);\n                trace!(count, \"padding with zeros\");\n                self.mutable = Some(mutable);\n\n                final_len = self.bytes_submitted + len.into_u64();\n                shutdown_req = ShutdownRequest {\n                    // Avoid set_len call if we didn't need to pad anything.\n                    set_len: if count > 0 { Some(final_len) } else { None },\n                };\n            }\n        };\n        let padded_pending = self.mutable.as_ref().map(|b| b.pending());\n        trace!(unpadded_pending, padded_pending, \"padding done\");\n        if self.mutable.is_some() {\n            self.flush(ctx).await?;\n        }\n        let Self {\n            mutable: _,\n            maybe_flushed: _,\n            mut flush_handle,\n            bytes_submitted: _,\n        } = self;\n        let writer = flush_handle.shutdown(shutdown_req).await?;\n\n        Ok((final_len, writer))\n    }\n\n    #[cfg(test)]\n    pub(crate) fn mutable(&self) -> &B {\n        self.mutable.as_ref().expect(\"must not use after an error\")\n    }\n\n    #[cfg_attr(target_os = \"macos\", allow(dead_code))]\n    pub async fn write_buffered_borrowed(\n        &mut self,\n        chunk: &[u8],\n        ctx: &RequestContext,\n    ) -> Result<usize, FlushTaskError> {\n        let (len, control) = self.write_buffered_borrowed_controlled(chunk, ctx).await?;\n        if let Some(control) = control {\n            control.release().await;\n        }\n        Ok(len)\n    }\n\n    /// In addition to bytes submitted in this write, also returns a handle that can control the flush behavior.\n    pub(crate) async fn write_buffered_borrowed_controlled(\n        &mut self,\n        mut chunk: &[u8],\n        ctx: &RequestContext,\n    ) -> Result<(usize, Option<FlushControl>), FlushTaskError> {\n        let chunk_len = chunk.len();\n        let mut control: Option<FlushControl> = None;\n        while !chunk.is_empty() {\n            let buf = self.mutable.as_mut().expect(\"must not use after an error\");\n            let need = buf.cap() - buf.pending();\n            let have = chunk.len();\n            let n = std::cmp::min(need, have);\n            buf.extend_from_slice(&chunk[..n]);\n            chunk = &chunk[n..];\n            if buf.pending() >= buf.cap() {\n                assert_eq!(buf.pending(), buf.cap());\n                if let Some(control) = control.take() {\n                    control.release().await;\n                }\n                control = self.flush(ctx).await?;\n            }\n        }\n        Ok((chunk_len, control))\n    }\n\n    /// This function can only error if the flush task got cancelled.\n    /// In that case, we leave [`Self::mutable`] intentionally as `None`.\n    ///\n    /// The read path continues to function correctly; it can read up to the\n    /// point where it could read before, i.e., including what was in [`Self::mutable`]\n    /// before the call to this function, because that's now stored in [`Self::maybe_flushed`].\n    ///\n    /// The write path becomes unavailable and will panic if used.\n    /// The only correct solution to retry writes is to discard the entire [`BufferedWriter`],\n    /// which upper layers of pageserver write path currently do not support.\n    /// It is in fact quite hard to reason about what exactly happens in today's code.\n    /// Best case we accumulate junk in the EphemeralFile, worst case is data corruption.\n    #[must_use = \"caller must explcitly check the flush control\"]\n    async fn flush(\n        &mut self,\n        _ctx: &RequestContext,\n    ) -> Result<Option<FlushControl>, FlushTaskError> {\n        let buf = self.mutable.take().expect(\"must not use after an error\");\n        let buf_len = buf.pending();\n        if buf_len == 0 {\n            self.mutable = Some(buf);\n            return Ok(None);\n        }\n        // Prepare the buffer for read while flushing.\n        let slice = buf.flush();\n        // NB: this assignment also drops thereference to the old buffer, allowing us to re-own & make it mutable below.\n        self.maybe_flushed = Some(slice.cheap_clone());\n        let offset = self.bytes_submitted;\n        self.bytes_submitted += u64::try_from(buf_len).unwrap();\n\n        // If we return/panic here or later, we'll leave mutable = None, breaking further\n        // writers, but the read path should still work.\n        let (recycled, flush_control) = self.flush_handle.flush(slice, offset).await?;\n\n        // The only other place that could hold a reference to the recycled buffer\n        // is in `Self::maybe_flushed`, but we have already replace it with the new buffer.\n        let recycled = Buffer::reuse_after_flush(recycled.into_raw_slice().into_inner());\n\n        // We got back some recycled buffer, can open up for more writes again.\n        self.mutable = Some(recycled);\n\n        Ok(Some(flush_control))\n    }\n}\n\n/// A [`Buffer`] is used by [`BufferedWriter`] to batch smaller writes into larger ones.\npub trait Buffer {\n    type IoBuf: IoBuf;\n\n    /// Capacity of the buffer. Must not change over the lifetime `self`.`\n    fn cap(&self) -> usize;\n\n    /// Add data to the buffer.\n    /// Panics if there is not enough room to accomodate `other`'s content, i.e.,\n    /// panics if `other.len() > self.cap() - self.pending()`.\n    fn extend_from_slice(&mut self, other: &[u8]);\n\n    /// Add `count` bytes `val` into `self`.\n    /// Panics if `count > self.cap() - self.pending()`.\n    fn extend_with(&mut self, val: u8, count: usize);\n\n    /// Number of bytes in the buffer.\n    fn pending(&self) -> usize;\n\n    /// Turns `self` into a [`FullSlice`] of the pending data\n    /// so we can use [`tokio_epoll_uring`] to write it to disk.\n    fn flush(self) -> FullSlice<Self::IoBuf>;\n\n    /// After the write to disk is done and we have gotten back the slice,\n    /// [`BufferedWriter`] uses this method to re-use the io buffer.\n    fn reuse_after_flush(iobuf: Self::IoBuf) -> Self;\n}\n\nimpl Buffer for IoBufferMut {\n    type IoBuf = IoBuffer;\n\n    fn cap(&self) -> usize {\n        self.capacity()\n    }\n\n    fn extend_from_slice(&mut self, other: &[u8]) {\n        if self.len() + other.len() > self.cap() {\n            panic!(\"Buffer capacity exceeded\");\n        }\n\n        IoBufferMut::extend_from_slice(self, other);\n    }\n\n    fn extend_with(&mut self, val: u8, count: usize) {\n        if self.len() + count > self.cap() {\n            panic!(\"Buffer capacity exceeded\");\n        }\n\n        IoBufferMut::put_bytes(self, val, count);\n    }\n\n    fn pending(&self) -> usize {\n        self.len()\n    }\n\n    fn flush(self) -> FullSlice<Self::IoBuf> {\n        self.freeze().slice_len()\n    }\n\n    /// Caller should make sure that `iobuf` only have one strong reference before invoking this method.\n    fn reuse_after_flush(iobuf: Self::IoBuf) -> Self {\n        let mut recycled = iobuf\n            .into_mut()\n            .expect(\"buffer should only have one strong reference\");\n        recycled.clear();\n        recycled\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use std::sync::Mutex;\n\n    use rstest::rstest;\n\n    use super::*;\n    use crate::context::{DownloadBehavior, RequestContext};\n    use crate::task_mgr::TaskKind;\n\n    #[derive(Debug, PartialEq, Eq)]\n    enum Op {\n        Write { buf: Vec<u8>, offset: u64 },\n        SetLen { len: u64 },\n    }\n\n    #[derive(Default, Debug)]\n    struct RecorderWriter {\n        /// record bytes and write offsets.\n        recording: Mutex<Vec<Op>>,\n    }\n\n    impl OwnedAsyncWriter for RecorderWriter {\n        async fn write_all_at<Buf: IoBufAligned + Send>(\n            &self,\n            buf: FullSlice<Buf>,\n            offset: u64,\n            _: &RequestContext,\n        ) -> (FullSlice<Buf>, std::io::Result<()>) {\n            self.recording.lock().unwrap().push(Op::Write {\n                buf: Vec::from(&buf[..]),\n                offset,\n            });\n            (buf, Ok(()))\n        }\n        async fn set_len(&self, len: u64, _ctx: &RequestContext) -> std::io::Result<()> {\n            self.recording.lock().unwrap().push(Op::SetLen { len });\n            Ok(())\n        }\n    }\n\n    fn test_ctx() -> RequestContext {\n        RequestContext::new(TaskKind::UnitTest, DownloadBehavior::Error)\n    }\n\n    #[rstest]\n    #[tokio::test]\n    async fn test_write_all_borrowed_always_goes_through_buffer(\n        #[values(\n            BufferedWriterShutdownMode::DropTail,\n            BufferedWriterShutdownMode::ZeroPadToNextMultiple(2),\n            BufferedWriterShutdownMode::PadThenTruncate\n        )]\n        mode: BufferedWriterShutdownMode,\n    ) -> anyhow::Result<()> {\n        let ctx = test_ctx();\n        let ctx = &ctx;\n        let recorder = RecorderWriter::default();\n        let gate = utils::sync::gate::Gate::default();\n        let cancel = CancellationToken::new();\n        let cap = 4;\n        let mut writer = BufferedWriter::<_, RecorderWriter>::new(\n            recorder,\n            0,\n            || IoBufferMut::with_capacity(cap),\n            gate.enter()?,\n            cancel,\n            ctx,\n            tracing::Span::none(),\n        );\n\n        writer.write_buffered_borrowed(b\"abc\", ctx).await?;\n        writer.write_buffered_borrowed(b\"\", ctx).await?;\n        writer.write_buffered_borrowed(b\"d\", ctx).await?;\n        writer.write_buffered_borrowed(b\"efg\", ctx).await?;\n        writer.write_buffered_borrowed(b\"hijklm\", ctx).await?;\n\n        let mut expect = {\n            [(0, b\"abcd\"), (4, b\"efgh\"), (8, b\"ijkl\")]\n                .into_iter()\n                .map(|(offset, v)| Op::Write {\n                    offset,\n                    buf: v[..].to_vec(),\n                })\n                .collect::<Vec<_>>()\n        };\n        let expect_next_offset = 12;\n\n        match &mode {\n            BufferedWriterShutdownMode::DropTail => (),\n            // We test the case with padding to next multiple of 2 so that it's different\n            // from the alignment requirement of 4 inferred from buffer capacity.\n            // See TODOs in the `BufferedWriter` struct comment on decoupling buffer capacity from alignment requirement.\n            BufferedWriterShutdownMode::ZeroPadToNextMultiple(2) => {\n                expect.push(Op::Write {\n                    offset: expect_next_offset,\n                    // it's legitimate for pad-to-next multiple 2 to be < alignment requirement 4 inferred from buffer capacity\n                    buf: b\"m\\0\".to_vec(),\n                });\n            }\n            BufferedWriterShutdownMode::ZeroPadToNextMultiple(_) => unimplemented!(),\n            BufferedWriterShutdownMode::PadThenTruncate => {\n                expect.push(Op::Write {\n                    offset: expect_next_offset,\n                    buf: b\"m\\0\\0\\0\".to_vec(),\n                });\n                expect.push(Op::SetLen { len: 13 });\n            }\n        }\n\n        let (_, recorder) = writer.shutdown(mode, ctx).await?;\n        assert_eq!(&*recorder.recording.lock().unwrap(), &expect);\n        Ok(())\n    }\n\n    #[tokio::test]\n    async fn test_set_len_is_skipped_if_not_needed() -> anyhow::Result<()> {\n        let ctx = test_ctx();\n        let ctx = &ctx;\n        let recorder = RecorderWriter::default();\n        let gate = utils::sync::gate::Gate::default();\n        let cancel = CancellationToken::new();\n        let cap = 4;\n        let mut writer = BufferedWriter::<_, RecorderWriter>::new(\n            recorder,\n            0,\n            || IoBufferMut::with_capacity(cap),\n            gate.enter()?,\n            cancel,\n            ctx,\n            tracing::Span::none(),\n        );\n\n        // write a multiple of `cap`\n        writer.write_buffered_borrowed(b\"abc\", ctx).await?;\n        writer.write_buffered_borrowed(b\"defgh\", ctx).await?;\n\n        let (_, recorder) = writer\n            .shutdown(BufferedWriterShutdownMode::PadThenTruncate, ctx)\n            .await?;\n\n        let expect = {\n            [(0, b\"abcd\"), (4, b\"efgh\")]\n                .into_iter()\n                .map(|(offset, v)| Op::Write {\n                    offset,\n                    buf: v[..].to_vec(),\n                })\n                .collect::<Vec<_>>()\n        };\n\n        assert_eq!(\n            &*recorder.recording.lock().unwrap(),\n            &expect,\n            \"set_len should not be called if the buffer is already aligned\"\n        );\n\n        Ok(())\n    }\n}\n"
  },
  {
    "path": "pageserver/src/virtual_file/temporary.rs",
    "content": "use tracing::error;\nuse utils::sync::gate::GateGuard;\n\nuse crate::context::RequestContext;\n\nuse super::{\n    MaybeFatalIo, VirtualFile,\n    owned_buffers_io::{\n        io_buf_aligned::IoBufAligned, io_buf_ext::FullSlice, write::OwnedAsyncWriter,\n    },\n};\n\n/// A wrapper around [`super::VirtualFile`] that deletes the file on drop.\n/// For use as a [`OwnedAsyncWriter`] in [`super::owned_buffers_io::write::BufferedWriter`].\n#[derive(Debug)]\npub struct TempVirtualFile {\n    inner: Option<Inner>,\n}\n\n#[derive(Debug)]\nstruct Inner {\n    file: VirtualFile,\n    /// Gate guard is held on as long as we need to do operations in the path (delete on drop)\n    _gate_guard: GateGuard,\n}\n\nimpl OwnedAsyncWriter for TempVirtualFile {\n    fn write_all_at<Buf: IoBufAligned + Send>(\n        &self,\n        buf: FullSlice<Buf>,\n        offset: u64,\n        ctx: &RequestContext,\n    ) -> impl std::future::Future<Output = (FullSlice<Buf>, std::io::Result<()>)> + Send {\n        VirtualFile::write_all_at(self, buf, offset, ctx)\n    }\n\n    async fn set_len(&self, len: u64, ctx: &RequestContext) -> std::io::Result<()> {\n        VirtualFile::set_len(self, len, ctx).await\n    }\n}\n\nimpl Drop for TempVirtualFile {\n    fn drop(&mut self) {\n        let Some(Inner { file, _gate_guard }) = self.inner.take() else {\n            return;\n        };\n        let path = file.path();\n        if let Err(e) =\n            std::fs::remove_file(path).maybe_fatal_err(\"failed to remove the virtual file\")\n        {\n            error!(err=%e, path=%path, \"failed to remove\");\n        }\n        drop(_gate_guard);\n    }\n}\n\nimpl std::ops::Deref for TempVirtualFile {\n    type Target = VirtualFile;\n\n    fn deref(&self) -> &Self::Target {\n        &self\n            .inner\n            .as_ref()\n            .expect(\"only None after into_inner or drop\")\n            .file\n    }\n}\n\nimpl std::ops::DerefMut for TempVirtualFile {\n    fn deref_mut(&mut self) -> &mut Self::Target {\n        &mut self\n            .inner\n            .as_mut()\n            .expect(\"only None after into_inner or drop\")\n            .file\n    }\n}\n\nimpl TempVirtualFile {\n    /// The caller is responsible for ensuring that the path of `virtual_file` is not reused\n    /// until after this TempVirtualFile's `Drop` impl has completed.\n    /// Failure to do so will result in unlinking of the reused path by the original instance's Drop impl.\n    /// The best way to do so is by using a monotonic counter as a disambiguator.\n    /// TODO: centralize this disambiguator pattern inside this struct.\n    ///   => <https://github.com/neondatabase/neon/pull/11549#issuecomment-2824592831>\n    pub fn new(virtual_file: VirtualFile, gate_guard: GateGuard) -> Self {\n        Self {\n            inner: Some(Inner {\n                file: virtual_file,\n                _gate_guard: gate_guard,\n            }),\n        }\n    }\n\n    /// Dismantle this wrapper and return the underlying [`VirtualFile`].\n    /// This disables auto-unlinking functionality that is the essence of this wrapper.\n    ///\n    /// The gate guard is dropped as well; it is the callers responsibility to ensure filesystem\n    /// operations after calls to this functions are still gated by some other gate guard.\n    ///\n    /// TODO:\n    /// - centralize the common usage pattern of callers (sync_all(self), rename(self, dst), sync_all(dst.parent))\n    ///   => <https://github.com/neondatabase/neon/pull/11549#issuecomment-2824592831>\n    pub fn disarm_into_inner(mut self) -> VirtualFile {\n        self.inner\n            .take()\n            .expect(\"only None after into_inner or drop, and we are into_inner, and we consume\")\n            .file\n    }\n}\n"
  },
  {
    "path": "pageserver/src/virtual_file.rs",
    "content": "//! VirtualFile is like a normal File, but it's not bound directly to\n//! a file descriptor.\n//!\n//! Instead, the file is opened when it's read from,\n//! and if too many files are open globally in the system, least-recently\n//! used ones are closed.\n//!\n//! To track which files have been recently used, we use the clock algorithm\n//! with a 'recently_used' flag on each slot.\n//!\n//! This is similar to PostgreSQL's virtual file descriptor facility in\n//! src/backend/storage/file/fd.c\n//!\nuse std::fs::File;\nuse std::io::{Error, ErrorKind};\nuse std::os::fd::{AsRawFd, FromRawFd, IntoRawFd, OwnedFd, RawFd};\nuse std::sync::LazyLock;\nuse std::sync::atomic::{AtomicBool, AtomicU8, AtomicUsize, Ordering};\n\nuse camino::{Utf8Path, Utf8PathBuf};\nuse once_cell::sync::OnceCell;\nuse owned_buffers_io::aligned_buffer::buffer::AlignedBuffer;\nuse owned_buffers_io::aligned_buffer::{AlignedBufferMut, AlignedSlice, ConstAlign};\nuse owned_buffers_io::io_buf_aligned::{IoBufAligned, IoBufAlignedMut};\nuse owned_buffers_io::io_buf_ext::FullSlice;\nuse pageserver_api::config::defaults::DEFAULT_IO_BUFFER_ALIGNMENT;\nuse tokio::sync::{RwLock, RwLockReadGuard, RwLockWriteGuard};\nuse tokio::time::Instant;\nuse tokio_epoll_uring::{BoundedBuf, IoBuf, IoBufMut, Slice};\n\nuse self::owned_buffers_io::write::OwnedAsyncWriter;\nuse crate::assert_u64_eq_usize::UsizeIsU64;\nuse crate::context::RequestContext;\nuse crate::metrics::{STORAGE_IO_TIME_METRIC, StorageIoOperation};\nuse crate::page_cache::{PAGE_SZ, PageWriteGuard};\n\npub(crate) use api::IoMode;\npub(crate) use io_engine::IoEngineKind;\npub use io_engine::{\n    FeatureTestResult as IoEngineFeatureTestResult, feature_test as io_engine_feature_test,\n    io_engine_for_bench,\n};\npub(crate) use metadata::Metadata;\npub(crate) use open_options::*;\npub use pageserver_api::models::virtual_file as api;\npub use temporary::TempVirtualFile;\n\npub(crate) mod io_engine;\nmod metadata;\nmod open_options;\nmod temporary;\npub(crate) mod owned_buffers_io {\n    //! Abstractions for IO with owned buffers.\n    //!\n    //! Not actually tied to [`crate::virtual_file`] specifically, but, it's the primary\n    //! reason we need this abstraction.\n    //!\n    //! Over time, this could move into the `tokio-epoll-uring` crate, maybe `uring-common`,\n    //! but for the time being we're proving out the primitives in the neon.git repo\n    //! for faster iteration.\n\n    pub(crate) mod aligned_buffer;\n    pub(crate) mod io_buf_aligned;\n    pub(crate) mod io_buf_ext;\n    pub(crate) mod slice;\n    pub(crate) mod write;\n}\n\n#[derive(Debug)]\npub struct VirtualFile {\n    inner: VirtualFileInner,\n    _mode: IoMode,\n}\n\nimpl VirtualFile {\n    /// Open a file in read-only mode. Like File::open.\n    ///\n    /// Insensitive to `virtual_file_io_mode` setting.\n    pub async fn open<P: AsRef<Utf8Path>>(\n        path: P,\n        ctx: &RequestContext,\n    ) -> Result<Self, std::io::Error> {\n        let inner = VirtualFileInner::open(path, ctx).await?;\n        Ok(VirtualFile {\n            inner,\n            _mode: IoMode::Buffered,\n        })\n    }\n\n    /// Open a file in read-only mode. Like File::open.\n    ///\n    /// `O_DIRECT` will be enabled base on `virtual_file_io_mode`.\n    pub async fn open_v2<P: AsRef<Utf8Path>>(\n        path: P,\n        ctx: &RequestContext,\n    ) -> Result<Self, std::io::Error> {\n        Self::open_with_options_v2(path.as_ref(), OpenOptions::new().read(true), ctx).await\n    }\n\n    /// `O_DIRECT` will be enabled base on `virtual_file_io_mode`.\n    pub async fn open_with_options_v2<P: AsRef<Utf8Path>>(\n        path: P,\n        mut open_options: OpenOptions,\n        ctx: &RequestContext,\n    ) -> Result<Self, std::io::Error> {\n        let mode = get_io_mode();\n        let direct = match (mode, open_options.is_write()) {\n            (IoMode::Buffered, _) => false,\n            (IoMode::Direct, false) => true,\n            (IoMode::Direct, true) => false,\n            (IoMode::DirectRw, _) => true,\n        };\n        open_options = open_options.direct(direct);\n        let inner = VirtualFileInner::open_with_options(path, open_options, ctx).await?;\n        Ok(VirtualFile { inner, _mode: mode })\n    }\n\n    pub fn path(&self) -> &Utf8Path {\n        self.inner.path.as_path()\n    }\n\n    pub async fn crashsafe_overwrite<B: BoundedBuf<Buf = Buf> + Send, Buf: IoBuf + Send>(\n        final_path: Utf8PathBuf,\n        tmp_path: Utf8PathBuf,\n        content: B,\n    ) -> std::io::Result<()> {\n        VirtualFileInner::crashsafe_overwrite(final_path, tmp_path, content).await\n    }\n\n    pub async fn sync_all(&self) -> Result<(), Error> {\n        if SYNC_MODE.load(std::sync::atomic::Ordering::Relaxed) == SyncMode::UnsafeNoSync as u8 {\n            return Ok(());\n        }\n        self.inner.sync_all().await\n    }\n\n    pub async fn sync_data(&self) -> Result<(), Error> {\n        if SYNC_MODE.load(std::sync::atomic::Ordering::Relaxed) == SyncMode::UnsafeNoSync as u8 {\n            return Ok(());\n        }\n        self.inner.sync_data().await\n    }\n\n    pub async fn set_len(&self, len: u64, ctx: &RequestContext) -> Result<(), Error> {\n        self.inner.set_len(len, ctx).await\n    }\n\n    pub async fn metadata(&self) -> Result<Metadata, Error> {\n        self.inner.metadata().await\n    }\n\n    pub async fn read_exact_at<Buf>(\n        &self,\n        slice: Slice<Buf>,\n        offset: u64,\n        ctx: &RequestContext,\n    ) -> Result<Slice<Buf>, Error>\n    where\n        Buf: IoBufAlignedMut + Send,\n    {\n        self.inner.read_exact_at(slice, offset, ctx).await\n    }\n\n    pub async fn read_exact_at_page(\n        &self,\n        page: PageWriteGuard<'static>,\n        offset: u64,\n        ctx: &RequestContext,\n    ) -> Result<PageWriteGuard<'static>, Error> {\n        self.inner.read_exact_at_page(page, offset, ctx).await\n    }\n\n    pub async fn write_all_at<Buf: IoBufAligned + Send>(\n        &self,\n        buf: FullSlice<Buf>,\n        offset: u64,\n        ctx: &RequestContext,\n    ) -> (FullSlice<Buf>, Result<(), Error>) {\n        self.inner.write_all_at(buf, offset, ctx).await\n    }\n\n    pub(crate) async fn read_to_string<P: AsRef<Utf8Path>>(\n        path: P,\n        ctx: &RequestContext,\n    ) -> std::io::Result<String> {\n        let file = VirtualFile::open(path, ctx).await?; // TODO: open_v2\n        let mut buf = Vec::new();\n        let mut tmp = vec![0; 128];\n        let mut pos: u64 = 0;\n        loop {\n            let slice = tmp.slice(..128);\n            let (slice, res) = file.inner.read_at(slice, pos, ctx).await;\n            match res {\n                Ok(0) => break,\n                Ok(n) => {\n                    pos += n as u64;\n                    buf.extend_from_slice(&slice[..n]);\n                }\n                Err(ref e) if e.kind() == std::io::ErrorKind::Interrupted => {}\n                Err(e) => return Err(e),\n            }\n            tmp = slice.into_inner();\n        }\n        String::from_utf8(buf).map_err(|_| {\n            std::io::Error::new(ErrorKind::InvalidData, \"file contents are not valid UTF-8\")\n        })\n    }\n}\n\n/// Indicates whether to enable fsync, fdatasync, or O_SYNC/O_DSYNC when writing\n/// files. Switching this off is unsafe and only used for testing on machines\n/// with slow drives.\n#[repr(u8)]\npub enum SyncMode {\n    Sync,\n    UnsafeNoSync,\n}\n\nimpl TryFrom<u8> for SyncMode {\n    type Error = u8;\n\n    fn try_from(value: u8) -> Result<Self, Self::Error> {\n        Ok(match value {\n            v if v == (SyncMode::Sync as u8) => SyncMode::Sync,\n            v if v == (SyncMode::UnsafeNoSync as u8) => SyncMode::UnsafeNoSync,\n            x => return Err(x),\n        })\n    }\n}\n\n///\n/// A virtual file descriptor. You can use this just like std::fs::File, but internally\n/// the underlying file is closed if the system is low on file descriptors,\n/// and re-opened when it's accessed again.\n///\n/// Like with std::fs::File, multiple threads can read/write the file concurrently,\n/// holding just a shared reference the same VirtualFile, using the read_at() / write_at()\n/// functions from the FileExt trait. But the functions from the Read/Write/Seek traits\n/// require a mutable reference, because they modify the \"current position\".\n///\n/// Each VirtualFile has a physical file descriptor in the global OPEN_FILES array, at the\n/// slot that 'handle points to, if the underlying file is currently open. If it's not\n/// currently open, the 'handle' can still point to the slot where it was last kept. The\n/// 'tag' field is used to detect whether the handle still is valid or not.\n///\n#[derive(Debug)]\npub struct VirtualFileInner {\n    /// Lazy handle to the global file descriptor cache. The slot that this points to\n    /// might contain our File, or it may be empty, or it may contain a File that\n    /// belongs to a different VirtualFile.\n    handle: RwLock<SlotHandle>,\n\n    /// File path and options to use to open it.\n    ///\n    /// Note: this only contains the options needed to re-open it. For example,\n    /// if a new file is created, we only pass the create flag when it's initially\n    /// opened, in the VirtualFile::create() function, and strip the flag before\n    /// storing it here.\n    pub path: Utf8PathBuf,\n    open_options: OpenOptions,\n}\n\n#[derive(Debug, PartialEq, Clone, Copy)]\nstruct SlotHandle {\n    /// Index into OPEN_FILES.slots\n    index: usize,\n\n    /// Value of 'tag' in the slot. If slot's tag doesn't match, then the slot has\n    /// been recycled and no longer contains the FD for this virtual file.\n    tag: u64,\n}\n\n/// OPEN_FILES is the global array that holds the physical file descriptors that\n/// are currently open. Each slot in the array is protected by a separate lock,\n/// so that different files can be accessed independently. The lock must be held\n/// in write mode to replace the slot with a different file, but a read mode\n/// is enough to operate on the file, whether you're reading or writing to it.\n///\n/// OPEN_FILES starts in uninitialized state, and it's initialized by\n/// the virtual_file::init() function. It must be called exactly once at page\n/// server startup.\nstatic OPEN_FILES: OnceCell<OpenFiles> = OnceCell::new();\n\nstruct OpenFiles {\n    slots: &'static [Slot],\n\n    /// clock arm for the clock algorithm\n    next: AtomicUsize,\n}\n\nstruct Slot {\n    inner: RwLock<SlotInner>,\n\n    /// has this file been used since last clock sweep?\n    recently_used: AtomicBool,\n}\n\nstruct SlotInner {\n    /// Counter that's incremented every time a different file is stored here.\n    /// To avoid the ABA problem.\n    tag: u64,\n\n    /// the underlying file\n    file: Option<OwnedFd>,\n}\n\n/// Impl of [`tokio_epoll_uring::IoBuf`] and [`tokio_epoll_uring::IoBufMut`] for [`PageWriteGuard`].\nstruct PageWriteGuardBuf {\n    page: PageWriteGuard<'static>,\n}\n// Safety: the [`PageWriteGuard`] gives us exclusive ownership of the page cache slot,\n// and the location remains stable even if [`Self`] or the [`PageWriteGuard`] is moved.\n// Page cache pages are zero-initialized, so, wrt uninitialized memory we're good.\n// (Page cache tracks separately whether the contents are valid, see `PageWriteGuard::mark_valid`.)\nunsafe impl tokio_epoll_uring::IoBuf for PageWriteGuardBuf {\n    fn stable_ptr(&self) -> *const u8 {\n        self.page.as_ptr()\n    }\n    fn bytes_init(&self) -> usize {\n        self.page.len()\n    }\n    fn bytes_total(&self) -> usize {\n        self.page.len()\n    }\n}\n// Safety: see above, plus: the ownership of [`PageWriteGuard`] means exclusive access,\n// hence it's safe to hand out the `stable_mut_ptr()`.\nunsafe impl tokio_epoll_uring::IoBufMut for PageWriteGuardBuf {\n    fn stable_mut_ptr(&mut self) -> *mut u8 {\n        self.page.as_mut_ptr()\n    }\n\n    unsafe fn set_init(&mut self, pos: usize) {\n        // There shouldn't really be any reason to call this API since bytes_init() == bytes_total().\n        assert!(pos <= self.page.len());\n    }\n}\n\nimpl OpenFiles {\n    /// Find a slot to use, evicting an existing file descriptor if needed.\n    ///\n    /// On return, we hold a lock on the slot, and its 'tag' has been updated\n    /// recently_used has been set. It's all ready for reuse.\n    async fn find_victim_slot(&self) -> (SlotHandle, RwLockWriteGuard<SlotInner>) {\n        //\n        // Run the clock algorithm to find a slot to replace.\n        //\n        let num_slots = self.slots.len();\n        let mut retries = 0;\n        let mut slot;\n        let mut slot_guard;\n        let index;\n        loop {\n            let next = self.next.fetch_add(1, Ordering::AcqRel) % num_slots;\n            slot = &self.slots[next];\n\n            // If the recently_used flag on this slot is set, continue the clock\n            // sweep. Otherwise try to use this slot. If we cannot acquire the\n            // lock, also continue the clock sweep.\n            //\n            // We only continue in this manner for a while, though. If we loop\n            // through the array twice without finding a victim, just pick the\n            // next slot and wait until we can reuse it. This way, we avoid\n            // spinning in the extreme case that all the slots are busy with an\n            // I/O operation.\n            if retries < num_slots * 2 {\n                if !slot.recently_used.swap(false, Ordering::Release) {\n                    if let Ok(guard) = slot.inner.try_write() {\n                        slot_guard = guard;\n                        index = next;\n                        break;\n                    }\n                }\n                retries += 1;\n            } else {\n                slot_guard = slot.inner.write().await;\n                index = next;\n                break;\n            }\n        }\n\n        //\n        // We now have the victim slot locked. If it was in use previously, close the\n        // old file.\n        //\n        if let Some(old_file) = slot_guard.file.take() {\n            // the normal path of dropping VirtualFile uses \"close\", use \"close-by-replace\" here to\n            // distinguish the two.\n            STORAGE_IO_TIME_METRIC\n                .get(StorageIoOperation::CloseByReplace)\n                .observe_closure_duration(|| drop(old_file));\n        }\n\n        // Prepare the slot for reuse and return it\n        slot_guard.tag += 1;\n        slot.recently_used.store(true, Ordering::Relaxed);\n        (\n            SlotHandle {\n                index,\n                tag: slot_guard.tag,\n            },\n            slot_guard,\n        )\n    }\n}\n\n/// Identify error types that should alwways terminate the process.  Other\n/// error types may be elegible for retry.\npub(crate) fn is_fatal_io_error(e: &std::io::Error) -> bool {\n    use nix::errno::Errno::*;\n    match e.raw_os_error().map(nix::errno::Errno::from_raw) {\n        Some(EIO) => {\n            // Terminate on EIO because we no longer trust the device to store\n            // data safely, or to uphold persistence guarantees on fsync.\n            true\n        }\n        Some(EROFS) => {\n            // Terminate on EROFS because a filesystem is usually remounted\n            // readonly when it has experienced some critical issue, so the same\n            // logic as EIO applies.\n            true\n        }\n        Some(EACCES) => {\n            // Terminate on EACCESS because we should always have permissions\n            // for our own data dir: if we don't, then we can't do our job and\n            // need administrative intervention to fix permissions.  Terminating\n            // is the best way to make sure we stop cleanly rather than going\n            // into infinite retry loops, and will make it clear to the outside\n            // world that we need help.\n            true\n        }\n        _ => {\n            // Treat all other local file I/O errors are retryable.  This includes:\n            // - ENOSPC: we stay up and wait for eviction to free some space\n            // - EINVAL, EBADF, EBADFD: this is a code bug, not a filesystem/hardware issue\n            // - WriteZero, Interrupted: these are used internally VirtualFile\n            false\n        }\n    }\n}\n\n/// Call this when the local filesystem gives us an error with an external\n/// cause: this includes EIO, EROFS, and EACCESS: all these indicate either\n/// bad storage or bad configuration, and we can't fix that from inside\n/// a running process.\npub(crate) fn on_fatal_io_error(e: &std::io::Error, context: &str) -> ! {\n    let backtrace = std::backtrace::Backtrace::force_capture();\n    tracing::error!(\"Fatal I/O error: {e}: {context})\\n{backtrace}\");\n    std::process::abort();\n}\n\npub(crate) trait MaybeFatalIo<T> {\n    fn maybe_fatal_err(self, context: &str) -> std::io::Result<T>;\n    fn fatal_err(self, context: &str) -> T;\n}\n\nimpl<T> MaybeFatalIo<T> for std::io::Result<T> {\n    /// Terminate the process if the result is an error of a fatal type, else pass it through\n    ///\n    /// This is appropriate for writes, where we typically want to die on EIO/ACCES etc, but\n    /// not on ENOSPC.\n    fn maybe_fatal_err(self, context: &str) -> std::io::Result<T> {\n        if let Err(e) = &self {\n            if is_fatal_io_error(e) {\n                on_fatal_io_error(e, context);\n            }\n        }\n        self\n    }\n\n    /// Terminate the process on any I/O error.\n    ///\n    /// This is appropriate for reads on files that we know exist: they should always work.\n    fn fatal_err(self, context: &str) -> T {\n        match self {\n            Ok(v) => v,\n            Err(e) => {\n                on_fatal_io_error(&e, context);\n            }\n        }\n    }\n}\n\n/// Observe duration for the given storage I/O operation\n///\n/// Unlike `observe_closure_duration`, this supports async,\n/// where \"support\" means that we measure wall clock time.\nmacro_rules! observe_duration {\n    ($op:expr, $($body:tt)*) => {{\n        let instant = Instant::now();\n        let result = $($body)*;\n        let elapsed = instant.elapsed().as_secs_f64();\n        STORAGE_IO_TIME_METRIC\n            .get($op)\n            .observe(elapsed);\n        result\n    }}\n}\n\nmacro_rules! with_file {\n    ($this:expr, $op:expr, | $ident:ident | $($body:tt)*) => {{\n        let $ident = $this.lock_file().await?;\n        observe_duration!($op, $($body)*)\n    }};\n    ($this:expr, $op:expr, | mut $ident:ident | $($body:tt)*) => {{\n        let mut $ident = $this.lock_file().await?;\n        observe_duration!($op, $($body)*)\n    }};\n}\n\nimpl VirtualFileInner {\n    /// Open a file in read-only mode. Like File::open.\n    pub async fn open<P: AsRef<Utf8Path>>(\n        path: P,\n        ctx: &RequestContext,\n    ) -> Result<VirtualFileInner, std::io::Error> {\n        Self::open_with_options(path.as_ref(), OpenOptions::new().read(true), ctx).await\n    }\n\n    /// Open a file with given options.\n    ///\n    /// Note: If any custom flags were set in 'open_options' through OpenOptionsExt,\n    /// they will be applied also when the file is subsequently re-opened, not only\n    /// on the first time. Make sure that's sane!\n    pub async fn open_with_options<P: AsRef<Utf8Path>>(\n        path: P,\n        open_options: OpenOptions,\n        _ctx: &RequestContext,\n    ) -> Result<VirtualFileInner, std::io::Error> {\n        let path = path.as_ref();\n        let (handle, mut slot_guard) = get_open_files().find_victim_slot().await;\n\n        // NB: there is also StorageIoOperation::OpenAfterReplace which is for the case\n        // where our caller doesn't get to use the returned VirtualFile before its\n        // slot gets re-used by someone else.\n        let file = observe_duration!(StorageIoOperation::Open, {\n            open_options.open(path.as_std_path()).await?\n        });\n\n        // Strip all options other than read and write.\n        //\n        // It would perhaps be nicer to check just for the read and write flags\n        // explicitly, but OpenOptions doesn't contain any functions to read flags,\n        // only to set them.\n        let reopen_options = open_options\n            .clone()\n            .create(false)\n            .create_new(false)\n            .truncate(false);\n\n        let vfile = VirtualFileInner {\n            handle: RwLock::new(handle),\n            path: path.to_owned(),\n            open_options: reopen_options,\n        };\n\n        // TODO: Under pressure, it's likely the slot will get re-used and\n        // the underlying file closed before they get around to using it.\n        // => https://github.com/neondatabase/neon/issues/6065\n        slot_guard.file.replace(file);\n\n        Ok(vfile)\n    }\n\n    /// Async version of [`::utils::crashsafe::overwrite`].\n    ///\n    /// # NB:\n    ///\n    /// Doesn't actually use the [`VirtualFile`] file descriptor cache, but,\n    /// it did at an earlier time.\n    /// And it will use this module's [`io_engine`] in the near future, so, leaving it here.\n    pub async fn crashsafe_overwrite<B: BoundedBuf<Buf = Buf> + Send, Buf: IoBuf + Send>(\n        final_path: Utf8PathBuf,\n        tmp_path: Utf8PathBuf,\n        content: B,\n    ) -> std::io::Result<()> {\n        // TODO: use tokio_epoll_uring if configured as `io_engine`.\n        // See https://github.com/neondatabase/neon/issues/6663\n\n        tokio::task::spawn_blocking(move || {\n            let slice_storage;\n            let content_len = content.bytes_init();\n            let content = if content.bytes_init() > 0 {\n                slice_storage = Some(content.slice(0..content_len));\n                slice_storage.as_deref().expect(\"just set it to Some()\")\n            } else {\n                &[]\n            };\n            utils::crashsafe::overwrite(&final_path, &tmp_path, content)\n                .maybe_fatal_err(\"crashsafe_overwrite\")\n        })\n        .await\n        .expect(\"blocking task is never aborted\")\n    }\n\n    /// Call File::sync_all() on the underlying File.\n    pub async fn sync_all(&self) -> Result<(), Error> {\n        with_file!(self, StorageIoOperation::Fsync, |file_guard| {\n            let (_file_guard, res) = io_engine::get().sync_all(file_guard).await;\n            res.maybe_fatal_err(\"sync_all\")\n        })\n    }\n\n    /// Call File::sync_data() on the underlying File.\n    pub async fn sync_data(&self) -> Result<(), Error> {\n        with_file!(self, StorageIoOperation::Fsync, |file_guard| {\n            let (_file_guard, res) = io_engine::get().sync_data(file_guard).await;\n            res.maybe_fatal_err(\"sync_data\")\n        })\n    }\n\n    pub async fn metadata(&self) -> Result<Metadata, Error> {\n        with_file!(self, StorageIoOperation::Metadata, |file_guard| {\n            let (_file_guard, res) = io_engine::get().metadata(file_guard).await;\n            res\n        })\n    }\n\n    pub async fn set_len(&self, len: u64, _ctx: &RequestContext) -> Result<(), Error> {\n        with_file!(self, StorageIoOperation::SetLen, |file_guard| {\n            let (_file_guard, res) = io_engine::get().set_len(file_guard, len).await;\n            res.maybe_fatal_err(\"set_len\")\n        })\n    }\n\n    /// Helper function internal to `VirtualFile` that looks up the underlying File,\n    /// opens it and evicts some other File if necessary. The passed parameter is\n    /// assumed to be a function available for the physical `File`.\n    ///\n    /// We are doing it via a macro as Rust doesn't support async closures that\n    /// take on parameters with lifetimes.\n    async fn lock_file(&self) -> Result<FileGuard, Error> {\n        let open_files = get_open_files();\n\n        let mut handle_guard = {\n            // Read the cached slot handle, and see if the slot that it points to still\n            // contains our File.\n            //\n            // We only need to hold the handle lock while we read the current handle. If\n            // another thread closes the file and recycles the slot for a different file,\n            // we will notice that the handle we read is no longer valid and retry.\n            let mut handle = *self.handle.read().await;\n            loop {\n                // Check if the slot contains our File\n                {\n                    let slot = &open_files.slots[handle.index];\n                    let slot_guard = slot.inner.read().await;\n                    if slot_guard.tag == handle.tag && slot_guard.file.is_some() {\n                        // Found a cached file descriptor.\n                        slot.recently_used.store(true, Ordering::Relaxed);\n                        return Ok(FileGuard { slot_guard });\n                    }\n                }\n\n                // The slot didn't contain our File. We will have to open it ourselves,\n                // but before that, grab a write lock on handle in the VirtualFile, so\n                // that no other thread will try to concurrently open the same file.\n                let handle_guard = self.handle.write().await;\n\n                // If another thread changed the handle while we were not holding the lock,\n                // then the handle might now be valid again. Loop back to retry.\n                if *handle_guard != handle {\n                    handle = *handle_guard;\n                    continue;\n                }\n                break handle_guard;\n            }\n        };\n\n        // We need to open the file ourselves. The handle in the VirtualFile is\n        // now locked in write-mode. Find a free slot to put it in.\n        let (handle, mut slot_guard) = open_files.find_victim_slot().await;\n\n        // Re-open the physical file.\n        // NB: we use StorageIoOperation::OpenAferReplace for this to distinguish this\n        // case from StorageIoOperation::Open. This helps with identifying thrashing\n        // of the virtual file descriptor cache.\n        let file = observe_duration!(StorageIoOperation::OpenAfterReplace, {\n            self.open_options.open(self.path.as_std_path()).await?\n        });\n\n        // Store the File in the slot and update the handle in the VirtualFile\n        // to point to it.\n        slot_guard.file.replace(file);\n\n        *handle_guard = handle;\n\n        Ok(FileGuard {\n            slot_guard: slot_guard.downgrade(),\n        })\n    }\n\n    /// Read the file contents in range `offset..(offset + slice.bytes_total())` into `slice[0..slice.bytes_total()]`.\n    ///\n    /// The returned `Slice<Buf>` is equivalent to the input `slice`, i.e., it's the same view into the same buffer.\n    pub async fn read_exact_at<Buf>(\n        &self,\n        slice: Slice<Buf>,\n        offset: u64,\n        ctx: &RequestContext,\n    ) -> Result<Slice<Buf>, Error>\n    where\n        Buf: IoBufAlignedMut + Send,\n    {\n        let assert_we_return_original_bounds = if cfg!(debug_assertions) {\n            Some((slice.stable_ptr() as usize, slice.bytes_total()))\n        } else {\n            None\n        };\n\n        let original_bounds = slice.bounds();\n        let (buf, res) =\n            read_exact_at_impl(slice, offset, |buf, offset| self.read_at(buf, offset, ctx)).await;\n        let res = res.map(|_| buf.slice(original_bounds));\n\n        if let Some(original_bounds) = assert_we_return_original_bounds {\n            if let Ok(slice) = &res {\n                let returned_bounds = (slice.stable_ptr() as usize, slice.bytes_total());\n                assert_eq!(original_bounds, returned_bounds);\n            }\n        }\n\n        res\n    }\n\n    /// Like [`Self::read_exact_at`] but for [`PageWriteGuard`].\n    pub async fn read_exact_at_page(\n        &self,\n        page: PageWriteGuard<'static>,\n        offset: u64,\n        ctx: &RequestContext,\n    ) -> Result<PageWriteGuard<'static>, Error> {\n        let buf = PageWriteGuardBuf { page }.slice_full();\n        debug_assert_eq!(buf.bytes_total(), PAGE_SZ);\n        self.read_exact_at(buf, offset, ctx)\n            .await\n            .map(|slice| slice.into_inner().page)\n    }\n\n    // Copied from https://doc.rust-lang.org/1.72.0/src/std/os/unix/fs.rs.html#219-235\n    pub async fn write_all_at<Buf: IoBuf + Send>(\n        &self,\n        buf: FullSlice<Buf>,\n        mut offset: u64,\n        ctx: &RequestContext,\n    ) -> (FullSlice<Buf>, Result<(), Error>) {\n        let buf = buf.into_raw_slice();\n        let bounds = buf.bounds();\n        let restore =\n            |buf: Slice<_>| FullSlice::must_new(Slice::from_buf_bounds(buf.into_inner(), bounds));\n        let mut buf = buf;\n        while !buf.is_empty() {\n            let (tmp, res) = self.write_at(FullSlice::must_new(buf), offset, ctx).await;\n            buf = tmp.into_raw_slice();\n            match res {\n                Ok(0) => {\n                    return (\n                        restore(buf),\n                        Err(Error::new(\n                            std::io::ErrorKind::WriteZero,\n                            \"failed to write whole buffer\",\n                        )),\n                    );\n                }\n                Ok(n) => {\n                    buf = buf.slice(n..);\n                    offset += n as u64;\n                }\n                Err(e) if e.kind() == std::io::ErrorKind::Interrupted => {}\n                Err(e) => return (restore(buf), Err(e)),\n            }\n        }\n        (restore(buf), Ok(()))\n    }\n\n    pub(super) async fn read_at<Buf>(\n        &self,\n        buf: tokio_epoll_uring::Slice<Buf>,\n        offset: u64,\n        ctx: &RequestContext,\n    ) -> (tokio_epoll_uring::Slice<Buf>, Result<usize, Error>)\n    where\n        Buf: tokio_epoll_uring::IoBufMut + Send,\n    {\n        self.validate_direct_io(\n            Slice::stable_ptr(&buf).addr(),\n            Slice::bytes_total(&buf),\n            offset,\n        );\n\n        let file_guard = match self\n            .lock_file()\n            .await\n            .maybe_fatal_err(\"lock_file inside VirtualFileInner::read_at\")\n        {\n            Ok(file_guard) => file_guard,\n            Err(e) => return (buf, Err(e)),\n        };\n\n        observe_duration!(StorageIoOperation::Read, {\n            let ((_file_guard, buf), res) = io_engine::get().read_at(file_guard, offset, buf).await;\n            let res = res.maybe_fatal_err(\"io_engine read_at inside VirtualFileInner::read_at\");\n            if let Ok(size) = res {\n                ctx.io_size_metrics().read.add(size.into_u64());\n            }\n            (buf, res)\n        })\n    }\n\n    async fn write_at<B: IoBuf + Send>(\n        &self,\n        buf: FullSlice<B>,\n        offset: u64,\n        ctx: &RequestContext,\n    ) -> (FullSlice<B>, Result<usize, Error>) {\n        self.validate_direct_io(buf.as_ptr().addr(), buf.len(), offset);\n\n        let file_guard = match self.lock_file().await {\n            Ok(file_guard) => file_guard,\n            Err(e) => return (buf, Err(e)),\n        };\n        observe_duration!(StorageIoOperation::Write, {\n            let ((_file_guard, buf), result) =\n                io_engine::get().write_at(file_guard, offset, buf).await;\n            let result = result.maybe_fatal_err(\"write_at\");\n            if let Ok(size) = result {\n                ctx.io_size_metrics().write.add(size.into_u64());\n            }\n            (buf, result)\n        })\n    }\n\n    /// Validate all reads and writes to adhere to the O_DIRECT requirements of our production systems.\n    ///\n    /// Validating it iin userspace sets a consistent bar, independent of what actual OS/filesystem/block device is in use.\n    fn validate_direct_io(&self, addr: usize, size: usize, offset: u64) {\n        // TODO: eventually enable validation in the builds we use in real environments like staging, preprod, and prod.\n        if !(cfg!(feature = \"testing\") || cfg!(test)) {\n            return;\n        }\n        if !self.open_options.is_direct() {\n            return;\n        }\n\n        // Validate buffer memory alignment.\n        //\n        // What practically matters as of Linux 6.1 is bdev_dma_alignment()\n        // which is practically between 512 and 4096.\n        // On our production systems, the value is 512.\n        // The IoBuffer/IoBufferMut hard-code that value.\n        //\n        // Because the alloctor might return _more_ aligned addresses than requested,\n        // there is a chance that testing would not catch violations of a runtime requirement stricter than 512.\n        {\n            let requirement = get_io_buffer_alignment();\n            let remainder = addr % requirement;\n            assert!(\n                remainder == 0,\n                \"Direct I/O buffer must be aligned: buffer_addr=0x{addr:x} % 0x{requirement:x} = 0x{remainder:x}\"\n            );\n        }\n\n        // Validate offset alignment.\n        //\n        // We hard-code 512 throughout the code base.\n        // So enforce just that and not anything more restrictive.\n        // Even the shallowest testing will expose more restrictive requirements if those ever arise.\n        {\n            let requirement = get_io_buffer_alignment() as u64;\n            let remainder = offset % requirement;\n            assert!(\n                remainder == 0,\n                \"Direct I/O offset must be aligned: offset=0x{offset:x} % 0x{requirement:x} = 0x{remainder:x}\"\n            );\n        }\n\n        // Validate buffer size multiple requirement.\n        //\n        // The requirement in Linux 6.1 is bdev_logical_block_size().\n        // On our production systems, that is 512.\n        {\n            let requirement = get_io_buffer_alignment();\n            let remainder = size % requirement;\n            assert!(\n                remainder == 0,\n                \"Direct I/O buffer size must be a multiple of {requirement}: size=0x{size:x} % 0x{requirement:x} = 0x{remainder:x}\"\n            );\n        }\n    }\n}\n\n// Adapted from https://doc.rust-lang.org/1.72.0/src/std/os/unix/fs.rs.html#117-135\npub async fn read_exact_at_impl<Buf, F, Fut>(\n    mut buf: tokio_epoll_uring::Slice<Buf>,\n    mut offset: u64,\n    mut read_at: F,\n) -> (Buf, std::io::Result<()>)\nwhere\n    Buf: IoBufMut + Send,\n    F: FnMut(tokio_epoll_uring::Slice<Buf>, u64) -> Fut,\n    Fut: std::future::Future<Output = (tokio_epoll_uring::Slice<Buf>, std::io::Result<usize>)>,\n{\n    while buf.bytes_total() != 0 {\n        let res;\n        (buf, res) = read_at(buf, offset).await;\n        match res {\n            Ok(0) => break,\n            Ok(n) => {\n                buf = buf.slice(n..);\n                offset += n as u64;\n            }\n            Err(ref e) if e.kind() == std::io::ErrorKind::Interrupted => {}\n            Err(e) => return (buf.into_inner(), Err(e)),\n        }\n    }\n    // NB: don't use `buf.is_empty()` here; it is from the\n    // `impl Deref for Slice { Target = [u8] }`; the &[u8]\n    // returned by it only covers the initialized portion of `buf`.\n    // Whereas we're interested in ensuring that we filled the entire\n    // buffer that the user passed in.\n    if buf.bytes_total() != 0 {\n        (\n            buf.into_inner(),\n            Err(std::io::Error::new(\n                std::io::ErrorKind::UnexpectedEof,\n                \"failed to fill whole buffer\",\n            )),\n        )\n    } else {\n        assert_eq!(buf.len(), buf.bytes_total());\n        (buf.into_inner(), Ok(()))\n    }\n}\n\n#[cfg(test)]\nmod test_read_exact_at_impl {\n\n    use std::collections::VecDeque;\n    use std::sync::Arc;\n\n    use tokio_epoll_uring::{BoundedBuf, BoundedBufMut};\n\n    use super::read_exact_at_impl;\n\n    struct Expectation {\n        offset: u64,\n        bytes_total: usize,\n        result: std::io::Result<Vec<u8>>,\n    }\n    struct MockReadAt {\n        expectations: VecDeque<Expectation>,\n    }\n\n    impl MockReadAt {\n        async fn read_at(\n            &mut self,\n            mut buf: tokio_epoll_uring::Slice<Vec<u8>>,\n            offset: u64,\n        ) -> (tokio_epoll_uring::Slice<Vec<u8>>, std::io::Result<usize>) {\n            let exp = self\n                .expectations\n                .pop_front()\n                .expect(\"read_at called but we have no expectations left\");\n            assert_eq!(exp.offset, offset);\n            assert_eq!(exp.bytes_total, buf.bytes_total());\n            match exp.result {\n                Ok(bytes) => {\n                    assert!(bytes.len() <= buf.bytes_total());\n                    buf.put_slice(&bytes);\n                    (buf, Ok(bytes.len()))\n                }\n                Err(e) => (buf, Err(e)),\n            }\n        }\n    }\n\n    impl Drop for MockReadAt {\n        fn drop(&mut self) {\n            assert_eq!(self.expectations.len(), 0);\n        }\n    }\n\n    #[tokio::test]\n    async fn test_basic() {\n        let buf = Vec::with_capacity(5).slice_full();\n        let mock_read_at = Arc::new(tokio::sync::Mutex::new(MockReadAt {\n            expectations: VecDeque::from(vec![Expectation {\n                offset: 0,\n                bytes_total: 5,\n                result: Ok(vec![b'a', b'b', b'c', b'd', b'e']),\n            }]),\n        }));\n        let (buf, res) = read_exact_at_impl(buf, 0, |buf, offset| {\n            let mock_read_at = Arc::clone(&mock_read_at);\n            async move { mock_read_at.lock().await.read_at(buf, offset).await }\n        })\n        .await;\n        assert!(res.is_ok());\n        assert_eq!(buf, vec![b'a', b'b', b'c', b'd', b'e']);\n    }\n\n    #[tokio::test]\n    async fn test_empty_buf_issues_no_syscall() {\n        let buf = Vec::new().slice_full();\n        let mock_read_at = Arc::new(tokio::sync::Mutex::new(MockReadAt {\n            expectations: VecDeque::new(),\n        }));\n        let (_buf, res) = read_exact_at_impl(buf, 0, |buf, offset| {\n            let mock_read_at = Arc::clone(&mock_read_at);\n            async move { mock_read_at.lock().await.read_at(buf, offset).await }\n        })\n        .await;\n        assert!(res.is_ok());\n    }\n\n    #[tokio::test]\n    async fn test_two_read_at_calls_needed_until_buf_filled() {\n        let buf = Vec::with_capacity(4).slice_full();\n        let mock_read_at = Arc::new(tokio::sync::Mutex::new(MockReadAt {\n            expectations: VecDeque::from(vec![\n                Expectation {\n                    offset: 0,\n                    bytes_total: 4,\n                    result: Ok(vec![b'a', b'b']),\n                },\n                Expectation {\n                    offset: 2,\n                    bytes_total: 2,\n                    result: Ok(vec![b'c', b'd']),\n                },\n            ]),\n        }));\n        let (buf, res) = read_exact_at_impl(buf, 0, |buf, offset| {\n            let mock_read_at = Arc::clone(&mock_read_at);\n            async move { mock_read_at.lock().await.read_at(buf, offset).await }\n        })\n        .await;\n        assert!(res.is_ok());\n        assert_eq!(buf, vec![b'a', b'b', b'c', b'd']);\n    }\n\n    #[tokio::test]\n    async fn test_eof_before_buffer_full() {\n        let buf = Vec::with_capacity(3).slice_full();\n        let mock_read_at = Arc::new(tokio::sync::Mutex::new(MockReadAt {\n            expectations: VecDeque::from(vec![\n                Expectation {\n                    offset: 0,\n                    bytes_total: 3,\n                    result: Ok(vec![b'a']),\n                },\n                Expectation {\n                    offset: 1,\n                    bytes_total: 2,\n                    result: Ok(vec![b'b']),\n                },\n                Expectation {\n                    offset: 2,\n                    bytes_total: 1,\n                    result: Ok(vec![]),\n                },\n            ]),\n        }));\n        let (_buf, res) = read_exact_at_impl(buf, 0, |buf, offset| {\n            let mock_read_at = Arc::clone(&mock_read_at);\n            async move { mock_read_at.lock().await.read_at(buf, offset).await }\n        })\n        .await;\n        let Err(err) = res else {\n            panic!(\"should return an error\");\n        };\n        assert_eq!(err.kind(), std::io::ErrorKind::UnexpectedEof);\n        assert_eq!(format!(\"{err}\"), \"failed to fill whole buffer\");\n        // buffer contents on error are unspecified\n    }\n}\n\nstruct FileGuard {\n    slot_guard: RwLockReadGuard<'static, SlotInner>,\n}\n\nimpl AsRef<OwnedFd> for FileGuard {\n    fn as_ref(&self) -> &OwnedFd {\n        // This unwrap is safe because we only create `FileGuard`s\n        // if we know that the file is Some.\n        self.slot_guard.file.as_ref().unwrap()\n    }\n}\n\nimpl FileGuard {\n    /// Soft deprecation: we'll move VirtualFile to async APIs and remove this function eventually.\n    fn with_std_file<F, R>(&self, with: F) -> R\n    where\n        F: FnOnce(&File) -> R,\n    {\n        // SAFETY:\n        // - lifetime of the fd: `file` doesn't outlive the OwnedFd stored in `self`.\n        // - `&` usage below: `self` is `&`, hence Rust typesystem guarantees there are is no `&mut`\n        let file = unsafe { File::from_raw_fd(self.as_ref().as_raw_fd()) };\n        let res = with(&file);\n        let _ = file.into_raw_fd();\n        res\n    }\n}\n\nimpl tokio_epoll_uring::IoFd for FileGuard {\n    unsafe fn as_fd(&self) -> RawFd {\n        let owned_fd: &OwnedFd = self.as_ref();\n        owned_fd.as_raw_fd()\n    }\n}\n\n#[cfg(test)]\nimpl VirtualFile {\n    pub(crate) async fn read_blk(\n        &self,\n        blknum: u32,\n        ctx: &RequestContext,\n    ) -> Result<crate::tenant::block_io::BlockLease<'_>, std::io::Error> {\n        self.inner.read_blk(blknum, ctx).await\n    }\n}\n\n#[cfg(test)]\nimpl VirtualFileInner {\n    pub(crate) async fn read_blk(\n        &self,\n        blknum: u32,\n        ctx: &RequestContext,\n    ) -> Result<crate::tenant::block_io::BlockLease<'_>, std::io::Error> {\n        use crate::page_cache::PAGE_SZ;\n        let slice = IoBufferMut::with_capacity(PAGE_SZ).slice_full();\n        assert_eq!(slice.bytes_total(), PAGE_SZ);\n        let slice = self\n            .read_exact_at(slice, blknum as u64 * (PAGE_SZ as u64), ctx)\n            .await?;\n        Ok(crate::tenant::block_io::BlockLease::IoBufferMut(\n            slice.into_inner(),\n        ))\n    }\n}\n\nimpl Drop for VirtualFileInner {\n    /// If a VirtualFile is dropped, close the underlying file if it was open.\n    fn drop(&mut self) {\n        let handle = self.handle.get_mut();\n\n        fn clean_slot(slot: &Slot, mut slot_guard: RwLockWriteGuard<'_, SlotInner>, tag: u64) {\n            if slot_guard.tag == tag {\n                slot.recently_used.store(false, Ordering::Relaxed);\n                // there is also operation \"close-by-replace\" for closes done on eviction for\n                // comparison.\n                if let Some(fd) = slot_guard.file.take() {\n                    STORAGE_IO_TIME_METRIC\n                        .get(StorageIoOperation::Close)\n                        .observe_closure_duration(|| drop(fd));\n                }\n            }\n        }\n\n        // We don't have async drop so we cannot directly await the lock here.\n        // Instead, first do a best-effort attempt at closing the underlying\n        // file descriptor by using `try_write`, and if that fails, spawn\n        // a tokio task to do it asynchronously: we just want it to be\n        // cleaned up eventually.\n        // Most of the time, the `try_lock` should succeed though,\n        // as we have `&mut self` access. In other words, if the slot\n        // is still occupied by our file, there should be no access from\n        // other I/O operations; the only other possible place to lock\n        // the slot is the lock algorithm looking for free slots.\n        let slot = &get_open_files().slots[handle.index];\n        if let Ok(slot_guard) = slot.inner.try_write() {\n            clean_slot(slot, slot_guard, handle.tag);\n        } else {\n            let tag = handle.tag;\n            tokio::spawn(async move {\n                let slot_guard = slot.inner.write().await;\n                clean_slot(slot, slot_guard, tag);\n            });\n        };\n    }\n}\n\nimpl OwnedAsyncWriter for VirtualFile {\n    async fn write_all_at<Buf: IoBufAligned + Send>(\n        &self,\n        buf: FullSlice<Buf>,\n        offset: u64,\n        ctx: &RequestContext,\n    ) -> (FullSlice<Buf>, std::io::Result<()>) {\n        VirtualFile::write_all_at(self, buf, offset, ctx).await\n    }\n    async fn set_len(&self, len: u64, ctx: &RequestContext) -> std::io::Result<()> {\n        VirtualFile::set_len(self, len, ctx).await\n    }\n}\n\nimpl OpenFiles {\n    fn new(num_slots: usize) -> OpenFiles {\n        let mut slots = Box::new(Vec::with_capacity(num_slots));\n        for _ in 0..num_slots {\n            let slot = Slot {\n                recently_used: AtomicBool::new(false),\n                inner: RwLock::new(SlotInner { tag: 0, file: None }),\n            };\n            slots.push(slot);\n        }\n\n        OpenFiles {\n            next: AtomicUsize::new(0),\n            slots: Box::leak(slots),\n        }\n    }\n}\n\n///\n/// Initialize the virtual file module. This must be called once at page\n/// server startup.\n///\n#[cfg(not(test))]\npub fn init(num_slots: usize, engine: IoEngineKind, mode: IoMode, sync_mode: SyncMode) {\n    if OPEN_FILES.set(OpenFiles::new(num_slots)).is_err() {\n        panic!(\"virtual_file::init called twice\");\n    }\n    set_io_mode(mode);\n    io_engine::init(engine);\n    SYNC_MODE.store(sync_mode as u8, std::sync::atomic::Ordering::Relaxed);\n    crate::metrics::virtual_file_descriptor_cache::SIZE_MAX.set(num_slots as u64);\n}\n\nconst TEST_MAX_FILE_DESCRIPTORS: usize = 10;\n\n// Get a handle to the global slots array.\nfn get_open_files() -> &'static OpenFiles {\n    //\n    // In unit tests, page server startup doesn't happen and no one calls\n    // virtual_file::init(). Initialize it here, with a small array.\n    //\n    // This applies to the virtual file tests below, but all other unit\n    // tests too, so the virtual file facility is always usable in\n    // unit tests.\n    //\n    if cfg!(test) {\n        OPEN_FILES.get_or_init(|| OpenFiles::new(TEST_MAX_FILE_DESCRIPTORS))\n    } else {\n        OPEN_FILES.get().expect(\"virtual_file::init not called yet\")\n    }\n}\n\n/// Gets the io buffer alignment.\npub(crate) const fn get_io_buffer_alignment() -> usize {\n    DEFAULT_IO_BUFFER_ALIGNMENT\n}\n\npub(crate) type IoBufferMut = AlignedBufferMut<ConstAlign<{ get_io_buffer_alignment() }>>;\npub(crate) type IoBuffer = AlignedBuffer<ConstAlign<{ get_io_buffer_alignment() }>>;\npub(crate) type IoPageSlice<'a> =\n    AlignedSlice<'a, PAGE_SZ, ConstAlign<{ get_io_buffer_alignment() }>>;\n\nstatic IO_MODE: LazyLock<AtomicU8> = LazyLock::new(|| AtomicU8::new(IoMode::preferred() as u8));\n\npub fn set_io_mode(mode: IoMode) {\n    IO_MODE.store(mode as u8, std::sync::atomic::Ordering::Relaxed);\n}\n\npub(crate) fn get_io_mode() -> IoMode {\n    IoMode::try_from(IO_MODE.load(Ordering::Relaxed)).unwrap()\n}\n\nstatic SYNC_MODE: AtomicU8 = AtomicU8::new(SyncMode::Sync as u8);\n\n#[cfg(test)]\nmod tests {\n    use std::os::unix::fs::FileExt;\n    use std::sync::Arc;\n\n    use owned_buffers_io::io_buf_ext::IoBufExt;\n    use rand::Rng;\n    use rand::seq::SliceRandom;\n\n    use super::*;\n    use crate::context::DownloadBehavior;\n    use crate::task_mgr::TaskKind;\n\n    #[tokio::test]\n    async fn test_virtual_files() -> anyhow::Result<()> {\n        let ctx =\n            RequestContext::new(TaskKind::UnitTest, DownloadBehavior::Error).with_scope_unit_test();\n        let testdir = crate::config::PageServerConf::test_repo_dir(\"test_virtual_files\");\n        std::fs::create_dir_all(&testdir)?;\n\n        let zeropad512 = |content: &[u8]| {\n            let mut buf = IoBufferMut::with_capacity_zeroed(512);\n            buf[..content.len()].copy_from_slice(content);\n            buf.freeze().slice_len()\n        };\n\n        let path_a = testdir.join(\"file_a\");\n        let file_a = VirtualFile::open_with_options_v2(\n            path_a.clone(),\n            OpenOptions::new()\n                .read(true)\n                .write(true)\n                // set create & truncate flags to ensure when we trigger a reopen later in this test,\n                // the reopen_options must have masked out those flags; if they don't, then\n                // the after reopen we will fail to read the `content_a` that we write here.\n                .create(true)\n                .truncate(true),\n            &ctx,\n        )\n        .await?;\n        let (_, res) = file_a.write_all_at(zeropad512(b\"content_a\"), 0, &ctx).await;\n        res?;\n\n        let path_b = testdir.join(\"file_b\");\n        let file_b = VirtualFile::open_with_options_v2(\n            path_b.clone(),\n            OpenOptions::new()\n                .read(true)\n                .write(true)\n                .create(true)\n                .truncate(true),\n            &ctx,\n        )\n        .await?;\n        let (_, res) = file_b.write_all_at(zeropad512(b\"content_b\"), 0, &ctx).await;\n        res?;\n\n        let assert_first_512_eq = async |vfile: &VirtualFile, expect: &[u8]| {\n            let buf = vfile\n                .read_exact_at(IoBufferMut::with_capacity_zeroed(512).slice_full(), 0, &ctx)\n                .await\n                .unwrap();\n            assert_eq!(&buf[..], &zeropad512(expect)[..]);\n        };\n\n        // Open a lot of file descriptors / VirtualFile instances.\n        // Enough to cause some evictions in the fd cache.\n\n        let mut file_b_dupes = Vec::new();\n        for _ in 0..100 {\n            let vfile = VirtualFile::open_with_options_v2(\n                path_b.clone(),\n                OpenOptions::new().read(true),\n                &ctx,\n            )\n            .await?;\n            assert_first_512_eq(&vfile, b\"content_b\").await;\n            file_b_dupes.push(vfile);\n        }\n\n        // make sure we opened enough files to definitely cause evictions.\n        assert!(file_b_dupes.len() > TEST_MAX_FILE_DESCRIPTORS * 2);\n\n        // The underlying file descriptor for 'file_a' should be closed now. Try to read\n        // from it again. The VirtualFile reopens the file internally.\n        assert_first_512_eq(&file_a, b\"content_a\").await;\n\n        // Check that all the other FDs still work too. Use them in random order for\n        // good measure.\n        file_b_dupes.as_mut_slice().shuffle(&mut rand::rng());\n        for vfile in file_b_dupes.iter_mut() {\n            assert_first_512_eq(vfile, b\"content_b\").await;\n        }\n\n        Ok(())\n    }\n\n    /// Test using VirtualFiles from many threads concurrently. This tests both using\n    /// a lot of VirtualFiles concurrently, causing evictions, and also using the same\n    /// VirtualFile from multiple threads concurrently.\n    #[tokio::test]\n    async fn test_vfile_concurrency() -> Result<(), Error> {\n        const SIZE: usize = 8 * 1024;\n        const VIRTUAL_FILES: usize = 100;\n        const THREADS: usize = 100;\n        const SAMPLE: [u8; SIZE] = [0xADu8; SIZE];\n\n        let ctx =\n            RequestContext::new(TaskKind::UnitTest, DownloadBehavior::Error).with_scope_unit_test();\n        let testdir = crate::config::PageServerConf::test_repo_dir(\"vfile_concurrency\");\n        std::fs::create_dir_all(&testdir)?;\n\n        // Create a test file.\n        let test_file_path = testdir.join(\"concurrency_test_file\");\n        {\n            let file = File::create(&test_file_path)?;\n            file.write_all_at(&SAMPLE, 0)?;\n        }\n\n        // Open the file many times.\n        let mut files = Vec::new();\n        for _ in 0..VIRTUAL_FILES {\n            let f = VirtualFile::open_with_options_v2(\n                &test_file_path,\n                OpenOptions::new().read(true),\n                &ctx,\n            )\n            .await?;\n            files.push(f);\n        }\n        let files = Arc::new(files);\n\n        // Launch many threads, and use the virtual files concurrently in random order.\n        let rt = tokio::runtime::Builder::new_multi_thread()\n            .worker_threads(THREADS)\n            .thread_name(\"test_vfile_concurrency thread\")\n            .build()\n            .unwrap();\n        let mut hdls = Vec::new();\n        for _threadno in 0..THREADS {\n            let files = files.clone();\n            let ctx = ctx.detached_child(TaskKind::UnitTest, DownloadBehavior::Error);\n            let hdl = rt.spawn(async move {\n                let mut buf = IoBufferMut::with_capacity_zeroed(SIZE);\n                for _ in 1..1000 {\n                    let f = &files[rand::rng().random_range(0..files.len())];\n                    buf = f\n                        .read_exact_at(buf.slice_full(), 0, &ctx)\n                        .await\n                        .unwrap()\n                        .into_inner();\n                    assert!(buf[..] == SAMPLE);\n                }\n            });\n            hdls.push(hdl);\n        }\n        for hdl in hdls {\n            hdl.await?;\n        }\n        std::mem::forget(rt);\n\n        Ok(())\n    }\n\n    #[tokio::test]\n    async fn test_atomic_overwrite_basic() {\n        let testdir = crate::config::PageServerConf::test_repo_dir(\"test_atomic_overwrite_basic\");\n        std::fs::create_dir_all(&testdir).unwrap();\n\n        let path = testdir.join(\"myfile\");\n        let tmp_path = testdir.join(\"myfile.tmp\");\n\n        VirtualFileInner::crashsafe_overwrite(path.clone(), tmp_path.clone(), b\"foo\".to_vec())\n            .await\n            .unwrap();\n\n        let post = std::fs::read_to_string(&path).unwrap();\n        assert_eq!(post, \"foo\");\n        assert!(!tmp_path.exists());\n\n        VirtualFileInner::crashsafe_overwrite(path.clone(), tmp_path.clone(), b\"bar\".to_vec())\n            .await\n            .unwrap();\n\n        let post = std::fs::read_to_string(&path).unwrap();\n        assert_eq!(post, \"bar\");\n        assert!(!tmp_path.exists());\n    }\n\n    #[tokio::test]\n    async fn test_atomic_overwrite_preexisting_tmp() {\n        let testdir =\n            crate::config::PageServerConf::test_repo_dir(\"test_atomic_overwrite_preexisting_tmp\");\n        std::fs::create_dir_all(&testdir).unwrap();\n\n        let path = testdir.join(\"myfile\");\n        let tmp_path = testdir.join(\"myfile.tmp\");\n\n        std::fs::write(&tmp_path, \"some preexisting junk that should be removed\").unwrap();\n        assert!(tmp_path.exists());\n\n        VirtualFileInner::crashsafe_overwrite(path.clone(), tmp_path.clone(), b\"foo\".to_vec())\n            .await\n            .unwrap();\n\n        let post = std::fs::read_to_string(&path).unwrap();\n        assert_eq!(post, \"foo\");\n        assert!(!tmp_path.exists());\n    }\n}\n"
  },
  {
    "path": "pageserver/src/walingest.rs",
    "content": "//!\n//! Parse PostgreSQL WAL records and store them in a neon Timeline.\n//!\n//! The pipeline for ingesting WAL looks like this:\n//!\n//! WAL receiver  -> [`wal_decoder`] ->  WalIngest  ->   Repository\n//!\n//! The WAL receiver receives a stream of WAL from the WAL safekeepers.\n//! Records get decoded and interpreted in the [`wal_decoder`] module\n//! and then stored to the Repository by WalIngest.\n//!\n//! The neon Repository can store page versions in two formats: as\n//! page images, or a WAL records. [`wal_decoder::models::InterpretedWalRecord::from_bytes_filtered`]\n//! extracts page images out of some WAL records, but mostly it's WAL\n//! records. If a WAL record modifies multiple pages, WalIngest\n//! will call Repository::put_rel_wal_record or put_rel_page_image functions\n//! separately for each modified page.\n//!\n//! To reconstruct a page using a WAL record, the Repository calls the\n//! code in walredo.rs. walredo.rs passes most WAL records to the WAL\n//! redo Postgres process, but some records it can handle directly with\n//! bespoken Rust code.\n\nuse std::backtrace::Backtrace;\nuse std::collections::HashMap;\nuse std::sync::atomic::AtomicBool;\nuse std::sync::{Arc, OnceLock};\nuse std::time::{Duration, Instant, SystemTime};\n\nuse bytes::{Buf, Bytes};\nuse pageserver_api::key::{Key, rel_block_to_key};\nuse pageserver_api::reltag::{BlockNumber, RelTag, SlruKind};\nuse pageserver_api::shard::ShardIdentity;\nuse postgres_ffi::walrecord::*;\nuse postgres_ffi::{\n    PgMajorVersion, TransactionId, dispatch_pgversion, enum_pgversion, enum_pgversion_dispatch,\n    fsm_logical_to_physical, pg_constants,\n};\nuse postgres_ffi_types::TimestampTz;\nuse postgres_ffi_types::forknum::{FSM_FORKNUM, INIT_FORKNUM, MAIN_FORKNUM, VISIBILITYMAP_FORKNUM};\nuse tracing::*;\nuse utils::bin_ser::{DeserializeError, SerializeError};\nuse utils::lsn::Lsn;\nuse utils::rate_limit::RateLimit;\nuse utils::{critical_timeline, failpoint_support};\nuse wal_decoder::models::record::NeonWalRecord;\nuse wal_decoder::models::*;\n\nuse crate::ZERO_PAGE;\nuse crate::context::RequestContext;\nuse crate::metrics::WAL_INGEST;\nuse crate::pgdatadir_mapping::{DatadirModification, Version};\nuse crate::span::debug_assert_current_span_has_tenant_and_timeline_id;\nuse crate::tenant::{PageReconstructError, Timeline};\n\nenum_pgversion! {CheckPoint, pgv::CheckPoint}\n\nimpl CheckPoint {\n    fn encode(&self) -> Result<Bytes, SerializeError> {\n        enum_pgversion_dispatch!(self, CheckPoint, cp, { cp.encode() })\n    }\n\n    fn update_next_xid(&mut self, xid: u32) -> bool {\n        enum_pgversion_dispatch!(self, CheckPoint, cp, { cp.update_next_xid(xid) })\n    }\n\n    pub fn update_next_multixid(&mut self, multi_xid: u32, multi_offset: u32) -> bool {\n        enum_pgversion_dispatch!(self, CheckPoint, cp, {\n            cp.update_next_multixid(multi_xid, multi_offset)\n        })\n    }\n}\n\n/// Temporary limitation of WAL lag warnings after attach\n///\n/// After tenant attach, we want to limit WAL lag warnings because\n/// we don't look at the WAL until the attach is complete, which\n/// might take a while.\npub struct WalLagCooldown {\n    /// Until when should this limitation apply at all\n    active_until: std::time::Instant,\n    /// The maximum lag to suppress. Lags above this limit get reported anyways.\n    max_lag: Duration,\n}\n\nimpl WalLagCooldown {\n    pub fn new(attach_start: Instant, attach_duration: Duration) -> Self {\n        Self {\n            active_until: attach_start + attach_duration * 3 + Duration::from_secs(120),\n            max_lag: attach_duration * 2 + Duration::from_secs(60),\n        }\n    }\n}\n\npub struct WalIngest {\n    attach_wal_lag_cooldown: Arc<OnceLock<WalLagCooldown>>,\n    shard: ShardIdentity,\n    checkpoint: CheckPoint,\n    checkpoint_modified: bool,\n    warn_ingest_lag: WarnIngestLag,\n}\n\nstruct WarnIngestLag {\n    lag_msg_ratelimit: RateLimit,\n    future_lsn_msg_ratelimit: RateLimit,\n    timestamp_invalid_msg_ratelimit: RateLimit,\n}\n\npub struct WalIngestError {\n    pub backtrace: std::backtrace::Backtrace,\n    pub kind: WalIngestErrorKind,\n}\n\n#[derive(thiserror::Error, Debug)]\npub enum WalIngestErrorKind {\n    #[error(transparent)]\n    #[allow(private_interfaces)]\n    PageReconstructError(#[from] PageReconstructError),\n    #[error(transparent)]\n    DeserializationFailure(#[from] DeserializeError),\n    #[error(transparent)]\n    SerializationFailure(#[from] SerializeError),\n    #[error(\"the request contains data not supported by pageserver: {0} @ {1}\")]\n    InvalidKey(Key, Lsn),\n    #[error(\"twophase file for xid {0} already exists\")]\n    FileAlreadyExists(u64),\n    #[error(\"slru segment {0:?}/{1} already exists\")]\n    SlruAlreadyExists(SlruKind, u32),\n    #[error(\"relation already exists\")]\n    RelationAlreadyExists(RelTag),\n    #[error(\"invalid reldir key {0}\")]\n    InvalidRelDirKey(Key),\n\n    #[error(transparent)]\n    LogicalError(anyhow::Error),\n    #[error(transparent)]\n    EncodeAuxFileError(anyhow::Error),\n    #[error(transparent)]\n    MaybeRelSizeV2Error(anyhow::Error),\n\n    #[error(\"timeline shutting down\")]\n    Cancelled,\n}\n\nimpl<T> From<T> for WalIngestError\nwhere\n    WalIngestErrorKind: From<T>,\n{\n    fn from(value: T) -> Self {\n        WalIngestError {\n            backtrace: Backtrace::capture(),\n            kind: WalIngestErrorKind::from(value),\n        }\n    }\n}\n\nimpl std::error::Error for WalIngestError {\n    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {\n        self.kind.source()\n    }\n}\n\nimpl core::fmt::Display for WalIngestError {\n    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {\n        self.kind.fmt(f)\n    }\n}\n\nimpl core::fmt::Debug for WalIngestError {\n    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {\n        if f.alternate() {\n            f.debug_map()\n                .key(&\"backtrace\")\n                .value(&self.backtrace)\n                .key(&\"kind\")\n                .value(&self.kind)\n                .finish()\n        } else {\n            writeln!(f, \"Error: {:?}\", self.kind)?;\n            if self.backtrace.status() == std::backtrace::BacktraceStatus::Captured {\n                writeln!(f, \"Stack backtrace: {:?}\", self.backtrace)?;\n            }\n            Ok(())\n        }\n    }\n}\n\n#[macro_export]\nmacro_rules! ensure_walingest {\n    ($($t:tt)*) => {\n        _ = || -> Result<(), anyhow::Error> {\n            anyhow::ensure!($($t)*);\n            Ok(())\n        }().map_err(WalIngestErrorKind::LogicalError)?;\n    };\n}\n\nimpl WalIngest {\n    pub async fn new(\n        timeline: &Timeline,\n        startpoint: Lsn,\n        ctx: &RequestContext,\n    ) -> Result<WalIngest, WalIngestError> {\n        // Fetch the latest checkpoint into memory, so that we can compare with it\n        // quickly in `ingest_record` and update it when it changes.\n        let checkpoint_bytes = timeline.get_checkpoint(startpoint, ctx).await?;\n        let pgversion = timeline.pg_version;\n\n        let checkpoint = dispatch_pgversion!(pgversion, {\n            let checkpoint = pgv::CheckPoint::decode(&checkpoint_bytes)?;\n            trace!(\"CheckPoint.nextXid = {}\", checkpoint.nextXid.value);\n            <pgv::CheckPoint as Into<CheckPoint>>::into(checkpoint)\n        });\n\n        Ok(WalIngest {\n            shard: *timeline.get_shard_identity(),\n            checkpoint,\n            checkpoint_modified: false,\n            attach_wal_lag_cooldown: timeline.attach_wal_lag_cooldown.clone(),\n            warn_ingest_lag: WarnIngestLag {\n                lag_msg_ratelimit: RateLimit::new(std::time::Duration::from_secs(10)),\n                future_lsn_msg_ratelimit: RateLimit::new(std::time::Duration::from_secs(10)),\n                timestamp_invalid_msg_ratelimit: RateLimit::new(std::time::Duration::from_secs(10)),\n            },\n        })\n    }\n\n    /// Ingest an interpreted PostgreSQL WAL record by doing writes to the underlying key value\n    /// storage of a given timeline.\n    ///\n    /// This function updates `lsn` field of `DatadirModification`\n    ///\n    /// This function returns `true` if the record was ingested, and `false` if it was filtered out\n    pub async fn ingest_record(\n        &mut self,\n        interpreted: InterpretedWalRecord,\n        modification: &mut DatadirModification<'_>,\n        ctx: &RequestContext,\n    ) -> Result<bool, WalIngestError> {\n        WAL_INGEST.records_received.inc();\n        let prev_len = modification.len();\n\n        modification.set_lsn(interpreted.next_record_lsn)?;\n\n        if matches!(interpreted.flush_uncommitted, FlushUncommittedRecords::Yes) {\n            // Records of this type should always be preceded by a commit(), as they\n            // rely on reading data pages back from the Timeline.\n            assert!(!modification.has_dirty_data());\n        }\n\n        assert!(!self.checkpoint_modified);\n        if interpreted.xid != pg_constants::INVALID_TRANSACTION_ID\n            && self.checkpoint.update_next_xid(interpreted.xid)\n        {\n            self.checkpoint_modified = true;\n        }\n\n        failpoint_support::sleep_millis_async!(\"wal-ingest-record-sleep\");\n\n        match interpreted.metadata_record {\n            Some(MetadataRecord::Heapam(rec)) => match rec {\n                HeapamRecord::ClearVmBits(clear_vm_bits) => {\n                    self.ingest_clear_vm_bits(clear_vm_bits, modification, ctx)\n                        .await?;\n                }\n            },\n            Some(MetadataRecord::Neonrmgr(rec)) => match rec {\n                NeonrmgrRecord::ClearVmBits(clear_vm_bits) => {\n                    self.ingest_clear_vm_bits(clear_vm_bits, modification, ctx)\n                        .await?;\n                }\n            },\n            Some(MetadataRecord::Smgr(rec)) => match rec {\n                SmgrRecord::Create(create) => {\n                    self.ingest_xlog_smgr_create(create, modification, ctx)\n                        .await?;\n                }\n                SmgrRecord::Truncate(truncate) => {\n                    self.ingest_xlog_smgr_truncate(truncate, modification, ctx)\n                        .await?;\n                }\n            },\n            Some(MetadataRecord::Dbase(rec)) => match rec {\n                DbaseRecord::Create(create) => {\n                    self.ingest_xlog_dbase_create(create, modification, ctx)\n                        .await?;\n                }\n                DbaseRecord::Drop(drop) => {\n                    self.ingest_xlog_dbase_drop(drop, modification, ctx).await?;\n                }\n            },\n            Some(MetadataRecord::Clog(rec)) => match rec {\n                ClogRecord::ZeroPage(zero_page) => {\n                    self.ingest_clog_zero_page(zero_page, modification, ctx)\n                        .await?;\n                }\n                ClogRecord::Truncate(truncate) => {\n                    self.ingest_clog_truncate(truncate, modification, ctx)\n                        .await?;\n                }\n            },\n            Some(MetadataRecord::Xact(rec)) => {\n                self.ingest_xact_record(rec, modification, ctx).await?;\n            }\n            Some(MetadataRecord::MultiXact(rec)) => match rec {\n                MultiXactRecord::ZeroPage(zero_page) => {\n                    self.ingest_multixact_zero_page(zero_page, modification, ctx)\n                        .await?;\n                }\n                MultiXactRecord::Create(create) => {\n                    self.ingest_multixact_create(modification, &create)?;\n                }\n                MultiXactRecord::Truncate(truncate) => {\n                    self.ingest_multixact_truncate(modification, &truncate, ctx)\n                        .await?;\n                }\n            },\n            Some(MetadataRecord::Relmap(rec)) => match rec {\n                RelmapRecord::Update(update) => {\n                    self.ingest_relmap_update(update, modification, ctx).await?;\n                }\n            },\n            Some(MetadataRecord::Xlog(rec)) => match rec {\n                XlogRecord::Raw(raw) => {\n                    self.ingest_raw_xlog_record(raw, modification, ctx).await?;\n                }\n            },\n            Some(MetadataRecord::LogicalMessage(rec)) => match rec {\n                LogicalMessageRecord::Put(put) => {\n                    self.ingest_logical_message_put(put, modification, ctx)\n                        .await?;\n                }\n                #[cfg(feature = \"testing\")]\n                LogicalMessageRecord::Failpoint => {\n                    // This is a convenient way to make the WAL ingestion pause at\n                    // particular point in the WAL. For more fine-grained control,\n                    // we could peek into the message and only pause if it contains\n                    // a particular string, for example, but this is enough for now.\n                    failpoint_support::sleep_millis_async!(\n                        \"pageserver-wal-ingest-logical-message-sleep\"\n                    );\n                }\n            },\n            Some(MetadataRecord::Standby(rec)) => {\n                self.ingest_standby_record(rec).unwrap();\n            }\n            Some(MetadataRecord::Replorigin(rec)) => {\n                self.ingest_replorigin_record(rec, modification).await?;\n            }\n            None => {\n                // There are two cases through which we end up here:\n                // 1. The resource manager for the original PG WAL record\n                //    is [`pg_constants::RM_TBLSPC_ID`]. This is not a supported\n                //    record type within Neon.\n                // 2. The resource manager id was unknown to\n                //    [`wal_decoder::decoder::MetadataRecord::from_decoded`].\n                // TODO(vlad): Tighten this up more once we build confidence\n                // that case (2) does not happen in the field.\n            }\n        }\n\n        modification\n            .ingest_batch(interpreted.batch, &self.shard, ctx)\n            .await?;\n\n        // If checkpoint data was updated, store the new version in the repository\n        if self.checkpoint_modified {\n            let new_checkpoint_bytes = self.checkpoint.encode()?;\n\n            modification.put_checkpoint(new_checkpoint_bytes)?;\n            self.checkpoint_modified = false;\n        }\n\n        // Note that at this point this record is only cached in the modification\n        // until commit() is called to flush the data into the repository and update\n        // the latest LSN.\n\n        Ok(modification.len() > prev_len)\n    }\n\n    /// This is the same as AdjustToFullTransactionId(xid) in PostgreSQL\n    fn adjust_to_full_transaction_id(&self, xid: TransactionId) -> Result<u64, WalIngestError> {\n        let next_full_xid =\n            enum_pgversion_dispatch!(&self.checkpoint, CheckPoint, cp, { cp.nextXid.value });\n\n        let next_xid = (next_full_xid) as u32;\n        let mut epoch = (next_full_xid >> 32) as u32;\n\n        if xid > next_xid {\n            // Wraparound occurred, must be from a prev epoch.\n            if epoch == 0 {\n                Err(WalIngestErrorKind::LogicalError(anyhow::anyhow!(\n                    \"apparent XID wraparound with prepared transaction XID {xid}, nextXid is {next_full_xid}\"\n                )))?;\n            }\n            epoch -= 1;\n        }\n\n        Ok(((epoch as u64) << 32) | xid as u64)\n    }\n\n    async fn ingest_clear_vm_bits(\n        &mut self,\n        clear_vm_bits: ClearVmBits,\n        modification: &mut DatadirModification<'_>,\n        ctx: &RequestContext,\n    ) -> Result<(), WalIngestError> {\n        let ClearVmBits {\n            new_heap_blkno,\n            old_heap_blkno,\n            flags,\n            vm_rel,\n        } = clear_vm_bits;\n        // Clear the VM bits if required.\n        let mut new_vm_blk = new_heap_blkno.map(pg_constants::HEAPBLK_TO_MAPBLOCK);\n        let mut old_vm_blk = old_heap_blkno.map(pg_constants::HEAPBLK_TO_MAPBLOCK);\n\n        // VM bits can only be cleared on the shard(s) owning the VM relation, and must be within\n        // its view of the VM relation size. Out of caution, error instead of failing WAL ingestion,\n        // as there has historically been cases where PostgreSQL has cleared spurious VM pages. See:\n        // https://github.com/neondatabase/neon/pull/10634.\n        let Some(vm_size) = get_relsize(modification, vm_rel, ctx).await? else {\n            critical_timeline!(\n                modification.tline.tenant_shard_id,\n                modification.tline.timeline_id,\n                // Hadron: No need to raise the corruption flag here; the caller of `ingest_record()` will do it.\n                None::<&AtomicBool>,\n                \"clear_vm_bits for unknown VM relation {vm_rel}\"\n            );\n            return Ok(());\n        };\n        if let Some(blknum) = new_vm_blk {\n            if blknum >= vm_size {\n                critical_timeline!(\n                    modification.tline.tenant_shard_id,\n                    modification.tline.timeline_id,\n                    // Hadron: No need to raise the corruption flag here; the caller of `ingest_record()` will do it.\n                    None::<&AtomicBool>,\n                    \"new_vm_blk {blknum} not in {vm_rel} of size {vm_size}\"\n                );\n                new_vm_blk = None;\n            }\n        }\n        if let Some(blknum) = old_vm_blk {\n            if blknum >= vm_size {\n                critical_timeline!(\n                    modification.tline.tenant_shard_id,\n                    modification.tline.timeline_id,\n                    // Hadron: No need to raise the corruption flag here; the caller of `ingest_record()` will do it.\n                    None::<&AtomicBool>,\n                    \"old_vm_blk {blknum} not in {vm_rel} of size {vm_size}\"\n                );\n                old_vm_blk = None;\n            }\n        }\n\n        if new_vm_blk.is_none() && old_vm_blk.is_none() {\n            return Ok(());\n        } else if new_vm_blk == old_vm_blk {\n            // An UPDATE record that needs to clear the bits for both old and the new page, both of\n            // which reside on the same VM page.\n            self.put_rel_wal_record(\n                modification,\n                vm_rel,\n                new_vm_blk.unwrap(),\n                NeonWalRecord::ClearVisibilityMapFlags {\n                    new_heap_blkno,\n                    old_heap_blkno,\n                    flags,\n                },\n                ctx,\n            )\n            .await?;\n        } else {\n            // Clear VM bits for one heap page, or for two pages that reside on different VM pages.\n            if let Some(new_vm_blk) = new_vm_blk {\n                self.put_rel_wal_record(\n                    modification,\n                    vm_rel,\n                    new_vm_blk,\n                    NeonWalRecord::ClearVisibilityMapFlags {\n                        new_heap_blkno,\n                        old_heap_blkno: None,\n                        flags,\n                    },\n                    ctx,\n                )\n                .await?;\n            }\n            if let Some(old_vm_blk) = old_vm_blk {\n                self.put_rel_wal_record(\n                    modification,\n                    vm_rel,\n                    old_vm_blk,\n                    NeonWalRecord::ClearVisibilityMapFlags {\n                        new_heap_blkno: None,\n                        old_heap_blkno,\n                        flags,\n                    },\n                    ctx,\n                )\n                .await?;\n            }\n        }\n        Ok(())\n    }\n\n    /// Subroutine of ingest_record(), to handle an XLOG_DBASE_CREATE record.\n    async fn ingest_xlog_dbase_create(\n        &mut self,\n        create: DbaseCreate,\n        modification: &mut DatadirModification<'_>,\n        ctx: &RequestContext,\n    ) -> Result<(), WalIngestError> {\n        let DbaseCreate {\n            db_id,\n            tablespace_id,\n            src_db_id,\n            src_tablespace_id,\n        } = create;\n\n        let rels = modification\n            .tline\n            .list_rels(\n                src_tablespace_id,\n                src_db_id,\n                Version::Modified(modification),\n                ctx,\n            )\n            .await?;\n\n        debug!(\"ingest_xlog_dbase_create: {} rels\", rels.len());\n\n        // Copy relfilemap\n        let filemap = modification\n            .tline\n            .get_relmap_file(\n                src_tablespace_id,\n                src_db_id,\n                Version::Modified(modification),\n                ctx,\n            )\n            .await?;\n        modification\n            .put_relmap_file(tablespace_id, db_id, filemap, ctx)\n            .await?;\n\n        let mut num_rels_copied = 0;\n        let mut num_blocks_copied = 0;\n        for src_rel in rels {\n            assert_eq!(src_rel.spcnode, src_tablespace_id);\n            assert_eq!(src_rel.dbnode, src_db_id);\n\n            let nblocks = modification\n                .tline\n                .get_rel_size(src_rel, Version::Modified(modification), ctx)\n                .await?;\n            let dst_rel = RelTag {\n                spcnode: tablespace_id,\n                dbnode: db_id,\n                relnode: src_rel.relnode,\n                forknum: src_rel.forknum,\n            };\n\n            modification.put_rel_creation(dst_rel, nblocks, ctx).await?;\n\n            // Copy content\n            debug!(\"copying rel {} to {}, {} blocks\", src_rel, dst_rel, nblocks);\n            for blknum in 0..nblocks {\n                // Sharding:\n                //  - src and dst are always on the same shard, because they differ only by dbNode, and\n                //    dbNode is not included in the hash inputs for sharding.\n                //  - This WAL command is replayed on all shards, but each shard only copies the blocks\n                //    that belong to it.\n                let src_key = rel_block_to_key(src_rel, blknum);\n                if !self.shard.is_key_local(&src_key) {\n                    debug!(\n                        \"Skipping non-local key {} during XLOG_DBASE_CREATE\",\n                        src_key\n                    );\n                    continue;\n                }\n                debug!(\n                    \"copying block {} from {} ({}) to {}\",\n                    blknum, src_rel, src_key, dst_rel\n                );\n\n                let content = modification\n                    .tline\n                    .get_rel_page_at_lsn(\n                        src_rel,\n                        blknum,\n                        Version::Modified(modification),\n                        ctx,\n                        crate::tenant::storage_layer::IoConcurrency::sequential(),\n                    )\n                    .await?;\n                modification.put_rel_page_image(dst_rel, blknum, content)?;\n                num_blocks_copied += 1;\n            }\n\n            num_rels_copied += 1;\n        }\n\n        info!(\n            \"Created database {}/{}, copied {} blocks in {} rels\",\n            tablespace_id, db_id, num_blocks_copied, num_rels_copied\n        );\n        Ok(())\n    }\n\n    async fn ingest_xlog_dbase_drop(\n        &mut self,\n        dbase_drop: DbaseDrop,\n        modification: &mut DatadirModification<'_>,\n        ctx: &RequestContext,\n    ) -> Result<(), WalIngestError> {\n        let DbaseDrop {\n            db_id,\n            tablespace_ids,\n        } = dbase_drop;\n        for tablespace_id in tablespace_ids {\n            trace!(\"Drop db {}, {}\", tablespace_id, db_id);\n            modification.drop_dbdir(tablespace_id, db_id, ctx).await?;\n        }\n\n        Ok(())\n    }\n\n    async fn ingest_xlog_smgr_create(\n        &mut self,\n        create: SmgrCreate,\n        modification: &mut DatadirModification<'_>,\n        ctx: &RequestContext,\n    ) -> Result<(), WalIngestError> {\n        let SmgrCreate { rel } = create;\n        self.put_rel_creation(modification, rel, ctx).await?;\n        Ok(())\n    }\n\n    /// Subroutine of ingest_record(), to handle an XLOG_SMGR_TRUNCATE record.\n    ///\n    /// This is the same logic as in PostgreSQL's smgr_redo() function.\n    async fn ingest_xlog_smgr_truncate(\n        &mut self,\n        truncate: XlSmgrTruncate,\n        modification: &mut DatadirModification<'_>,\n        ctx: &RequestContext,\n    ) -> Result<(), WalIngestError> {\n        let XlSmgrTruncate {\n            blkno,\n            rnode,\n            flags,\n        } = truncate;\n\n        let spcnode = rnode.spcnode;\n        let dbnode = rnode.dbnode;\n        let relnode = rnode.relnode;\n\n        if flags & pg_constants::SMGR_TRUNCATE_HEAP != 0 {\n            let rel = RelTag {\n                spcnode,\n                dbnode,\n                relnode,\n                forknum: MAIN_FORKNUM,\n            };\n\n            self.put_rel_truncation(modification, rel, blkno, ctx)\n                .await?;\n        }\n        if flags & pg_constants::SMGR_TRUNCATE_FSM != 0 {\n            let rel = RelTag {\n                spcnode,\n                dbnode,\n                relnode,\n                forknum: FSM_FORKNUM,\n            };\n\n            // Zero out the last remaining FSM page, if this shard owns it. We are not precise here,\n            // and instead of digging in the FSM bitmap format we just clear the whole page.\n            let fsm_logical_page_no = blkno / pg_constants::SLOTS_PER_FSM_PAGE;\n            let mut fsm_physical_page_no = fsm_logical_to_physical(fsm_logical_page_no);\n            if blkno % pg_constants::SLOTS_PER_FSM_PAGE != 0\n                && self\n                    .shard\n                    .is_key_local(&rel_block_to_key(rel, fsm_physical_page_no))\n            {\n                modification.put_rel_page_image_zero(rel, fsm_physical_page_no)?;\n                fsm_physical_page_no += 1;\n            }\n            // Truncate this shard's view of the FSM relation size, if it even has one.\n            let nblocks = get_relsize(modification, rel, ctx).await?.unwrap_or(0);\n            if nblocks > fsm_physical_page_no {\n                self.put_rel_truncation(modification, rel, fsm_physical_page_no, ctx)\n                    .await?;\n            }\n        }\n        if flags & pg_constants::SMGR_TRUNCATE_VM != 0 {\n            let rel = RelTag {\n                spcnode,\n                dbnode,\n                relnode,\n                forknum: VISIBILITYMAP_FORKNUM,\n            };\n\n            // last remaining block, byte, and bit\n            let mut vm_page_no = blkno / (pg_constants::VM_HEAPBLOCKS_PER_PAGE as u32);\n            let trunc_byte = blkno as usize % pg_constants::VM_HEAPBLOCKS_PER_PAGE\n                / pg_constants::VM_HEAPBLOCKS_PER_BYTE;\n            let trunc_offs = blkno as usize % pg_constants::VM_HEAPBLOCKS_PER_BYTE\n                * pg_constants::VM_BITS_PER_HEAPBLOCK;\n\n            // Unless the new size is exactly at a visibility map page boundary, the\n            // tail bits in the last remaining map page, representing truncated heap\n            // blocks, need to be cleared. This is not only tidy, but also necessary\n            // because we don't get a chance to clear the bits if the heap is extended\n            // again. Only do this on the shard that owns the page.\n            if (trunc_byte != 0 || trunc_offs != 0)\n                && self.shard.is_key_local(&rel_block_to_key(rel, vm_page_no))\n            {\n                modification.put_rel_wal_record(\n                    rel,\n                    vm_page_no,\n                    NeonWalRecord::TruncateVisibilityMap {\n                        trunc_byte,\n                        trunc_offs,\n                    },\n                )?;\n                vm_page_no += 1;\n            }\n            // Truncate this shard's view of the VM relation size, if it even has one.\n            let nblocks = get_relsize(modification, rel, ctx).await?.unwrap_or(0);\n            if nblocks > vm_page_no {\n                self.put_rel_truncation(modification, rel, vm_page_no, ctx)\n                    .await?;\n            }\n        }\n        Ok(())\n    }\n\n    fn warn_on_ingest_lag(\n        &mut self,\n        conf: &crate::config::PageServerConf,\n        wal_timestamp: TimestampTz,\n    ) {\n        debug_assert_current_span_has_tenant_and_timeline_id();\n        let now = SystemTime::now();\n        let rate_limits = &mut self.warn_ingest_lag;\n\n        let ts = enum_pgversion_dispatch!(&self.checkpoint, CheckPoint, _cp, {\n            pgv::xlog_utils::try_from_pg_timestamp(wal_timestamp)\n        });\n\n        match ts {\n            Ok(ts) => {\n                match now.duration_since(ts) {\n                    Ok(lag) => {\n                        if lag > conf.wait_lsn_timeout {\n                            rate_limits.lag_msg_ratelimit.call2(|rate_limit_stats| {\n                                if let Some(cooldown) = self.attach_wal_lag_cooldown.get() {\n                                    if std::time::Instant::now() < cooldown.active_until && lag <= cooldown.max_lag {\n                                        return;\n                                    }\n                                } else {\n                                    // Still loading? We shouldn't be here\n                                }\n                                let lag = humantime::format_duration(lag);\n                                warn!(%rate_limit_stats, %lag, \"ingesting record with timestamp lagging more than wait_lsn_timeout\");\n                            })\n                        }\n                    }\n                    Err(e) => {\n                        let delta_t = e.duration();\n                        // determined by prod victoriametrics query: 1000 * (timestamp(node_time_seconds{neon_service=\"pageserver\"}) - node_time_seconds)\n                        // => https://www.robustperception.io/time-metric-from-the-node-exporter/\n                        const IGNORED_DRIFT: Duration = Duration::from_millis(100);\n                        if delta_t > IGNORED_DRIFT {\n                            let delta_t = humantime::format_duration(delta_t);\n                            rate_limits.future_lsn_msg_ratelimit.call2(|rate_limit_stats| {\n                                warn!(%rate_limit_stats, %delta_t, \"ingesting record with timestamp from future\");\n                            })\n                        }\n                    }\n                };\n            }\n            Err(error) => {\n                rate_limits.timestamp_invalid_msg_ratelimit.call2(|rate_limit_stats| {\n                    warn!(%rate_limit_stats, %error, \"ingesting record with invalid timestamp, cannot calculate lag and will fail find-lsn-for-timestamp type queries\");\n                })\n            }\n        }\n    }\n\n    /// Subroutine of ingest_record(), to handle an XLOG_XACT_* records.\n    ///\n    async fn ingest_xact_record(\n        &mut self,\n        record: XactRecord,\n        modification: &mut DatadirModification<'_>,\n        ctx: &RequestContext,\n    ) -> Result<(), WalIngestError> {\n        let (xact_common, is_commit, is_prepared) = match record {\n            XactRecord::Prepare(XactPrepare { xl_xid, data }) => {\n                let xid: u64 = if modification.tline.pg_version >= PgMajorVersion::PG17 {\n                    self.adjust_to_full_transaction_id(xl_xid)?\n                } else {\n                    xl_xid as u64\n                };\n                return modification.put_twophase_file(xid, data, ctx).await;\n            }\n            XactRecord::Commit(common) => (common, true, false),\n            XactRecord::Abort(common) => (common, false, false),\n            XactRecord::CommitPrepared(common) => (common, true, true),\n            XactRecord::AbortPrepared(common) => (common, false, true),\n        };\n\n        let XactCommon {\n            parsed,\n            origin_id,\n            xl_xid,\n            lsn,\n        } = xact_common;\n\n        // Record update of CLOG pages\n        let mut pageno = parsed.xid / pg_constants::CLOG_XACTS_PER_PAGE;\n        let mut segno = pageno / pg_constants::SLRU_PAGES_PER_SEGMENT;\n        let mut rpageno = pageno % pg_constants::SLRU_PAGES_PER_SEGMENT;\n        let mut page_xids: Vec<TransactionId> = vec![parsed.xid];\n\n        self.warn_on_ingest_lag(modification.tline.conf, parsed.xact_time);\n\n        for subxact in &parsed.subxacts {\n            let subxact_pageno = subxact / pg_constants::CLOG_XACTS_PER_PAGE;\n            if subxact_pageno != pageno {\n                // This subxact goes to different page. Write the record\n                // for all the XIDs on the previous page, and continue\n                // accumulating XIDs on this new page.\n                modification.put_slru_wal_record(\n                    SlruKind::Clog,\n                    segno,\n                    rpageno,\n                    if is_commit {\n                        NeonWalRecord::ClogSetCommitted {\n                            xids: page_xids,\n                            timestamp: parsed.xact_time,\n                        }\n                    } else {\n                        NeonWalRecord::ClogSetAborted { xids: page_xids }\n                    },\n                )?;\n                page_xids = Vec::new();\n            }\n            pageno = subxact_pageno;\n            segno = pageno / pg_constants::SLRU_PAGES_PER_SEGMENT;\n            rpageno = pageno % pg_constants::SLRU_PAGES_PER_SEGMENT;\n            page_xids.push(*subxact);\n        }\n        modification.put_slru_wal_record(\n            SlruKind::Clog,\n            segno,\n            rpageno,\n            if is_commit {\n                NeonWalRecord::ClogSetCommitted {\n                    xids: page_xids,\n                    timestamp: parsed.xact_time,\n                }\n            } else {\n                NeonWalRecord::ClogSetAborted { xids: page_xids }\n            },\n        )?;\n\n        // Group relations to drop by dbNode.  This map will contain all relations that _might_\n        // exist, we will reduce it to which ones really exist later.  This map can be huge if\n        // the transaction touches a huge number of relations (there is no bound on this in\n        // postgres).\n        let mut drop_relations: HashMap<(u32, u32), Vec<RelTag>> = HashMap::new();\n\n        for xnode in &parsed.xnodes {\n            for forknum in MAIN_FORKNUM..=INIT_FORKNUM {\n                let rel = RelTag {\n                    forknum,\n                    spcnode: xnode.spcnode,\n                    dbnode: xnode.dbnode,\n                    relnode: xnode.relnode,\n                };\n                drop_relations\n                    .entry((xnode.spcnode, xnode.dbnode))\n                    .or_default()\n                    .push(rel);\n            }\n        }\n\n        // Execute relation drops in a batch: the number may be huge, so deleting individually is prohibitively expensive\n        modification.put_rel_drops(drop_relations, ctx).await?;\n\n        if origin_id != 0 {\n            modification\n                .set_replorigin(origin_id, parsed.origin_lsn)\n                .await?;\n        }\n\n        if is_prepared {\n            // Remove twophase file. see RemoveTwoPhaseFile() in postgres code\n            trace!(\n                \"Drop twophaseFile for xid {} parsed_xact.xid {} here at {}\",\n                xl_xid, parsed.xid, lsn,\n            );\n\n            let xid: u64 = if modification.tline.pg_version >= PgMajorVersion::PG17 {\n                self.adjust_to_full_transaction_id(parsed.xid)?\n            } else {\n                parsed.xid as u64\n            };\n            modification.drop_twophase_file(xid, ctx).await?;\n        }\n\n        Ok(())\n    }\n\n    async fn ingest_clog_truncate(\n        &mut self,\n        truncate: ClogTruncate,\n        modification: &mut DatadirModification<'_>,\n        ctx: &RequestContext,\n    ) -> Result<(), WalIngestError> {\n        let ClogTruncate {\n            pageno,\n            oldest_xid,\n            oldest_xid_db,\n        } = truncate;\n\n        info!(\n            \"RM_CLOG_ID truncate pageno {} oldestXid {} oldestXidDB {}\",\n            pageno, oldest_xid, oldest_xid_db\n        );\n\n        // In Postgres, oldestXid and oldestXidDB are updated in memory when the CLOG is\n        // truncated, but a checkpoint record with the updated values isn't written until\n        // later. In Neon, a server can start at any LSN, not just on a checkpoint record,\n        // so we keep the oldestXid and oldestXidDB up-to-date.\n        enum_pgversion_dispatch!(&mut self.checkpoint, CheckPoint, cp, {\n            cp.oldestXid = oldest_xid;\n            cp.oldestXidDB = oldest_xid_db;\n        });\n        self.checkpoint_modified = true;\n\n        // TODO Treat AdvanceOldestClogXid() or write a comment why we don't need it\n\n        let latest_page_number =\n            enum_pgversion_dispatch!(self.checkpoint, CheckPoint, cp, { cp.nextXid.value }) as u32\n                / pg_constants::CLOG_XACTS_PER_PAGE;\n\n        // Now delete all segments containing pages between xlrec.pageno\n        // and latest_page_number.\n\n        // First, make an important safety check:\n        // the current endpoint page must not be eligible for removal.\n        // See SimpleLruTruncate() in slru.c\n        if dispatch_pgversion!(modification.tline.pg_version, {\n            pgv::nonrelfile_utils::clogpage_precedes(latest_page_number, pageno)\n        }) {\n            info!(\"could not truncate directory pg_xact apparent wraparound\");\n            return Ok(());\n        }\n\n        // Iterate via SLRU CLOG segments and drop segments that we're ready to truncate\n        //\n        // We cannot pass 'lsn' to the Timeline.list_nonrels(), or it\n        // will block waiting for the last valid LSN to advance up to\n        // it. So we use the previous record's LSN in the get calls\n        // instead.\n        if modification.tline.get_shard_identity().is_shard_zero() {\n            for segno in modification\n                .tline\n                .list_slru_segments(SlruKind::Clog, Version::Modified(modification), ctx)\n                .await?\n            {\n                let segpage = segno * pg_constants::SLRU_PAGES_PER_SEGMENT;\n\n                let may_delete = dispatch_pgversion!(modification.tline.pg_version, {\n                    pgv::nonrelfile_utils::slru_may_delete_clogsegment(segpage, pageno)\n                });\n\n                if may_delete {\n                    modification\n                        .drop_slru_segment(SlruKind::Clog, segno, ctx)\n                        .await?;\n                    trace!(\"Drop CLOG segment {:>04X}\", segno);\n                }\n            }\n        }\n\n        Ok(())\n    }\n\n    async fn ingest_clog_zero_page(\n        &mut self,\n        zero_page: ClogZeroPage,\n        modification: &mut DatadirModification<'_>,\n        ctx: &RequestContext,\n    ) -> Result<(), WalIngestError> {\n        let ClogZeroPage { segno, rpageno } = zero_page;\n\n        self.put_slru_page_image(\n            modification,\n            SlruKind::Clog,\n            segno,\n            rpageno,\n            ZERO_PAGE.clone(),\n            ctx,\n        )\n        .await\n    }\n\n    fn ingest_multixact_create(\n        &mut self,\n        modification: &mut DatadirModification,\n        xlrec: &XlMultiXactCreate,\n    ) -> Result<(), WalIngestError> {\n        // Create WAL record for updating the multixact-offsets page\n        let pageno = xlrec.mid / pg_constants::MULTIXACT_OFFSETS_PER_PAGE as u32;\n        let segno = pageno / pg_constants::SLRU_PAGES_PER_SEGMENT;\n        let rpageno = pageno % pg_constants::SLRU_PAGES_PER_SEGMENT;\n\n        modification.put_slru_wal_record(\n            SlruKind::MultiXactOffsets,\n            segno,\n            rpageno,\n            NeonWalRecord::MultixactOffsetCreate {\n                mid: xlrec.mid,\n                moff: xlrec.moff,\n            },\n        )?;\n\n        // Create WAL records for the update of each affected multixact-members page\n        let mut members = xlrec.members.iter();\n        let mut offset = xlrec.moff;\n        loop {\n            let pageno = offset / pg_constants::MULTIXACT_MEMBERS_PER_PAGE as u32;\n\n            // How many members fit on this page?\n            let page_remain = pg_constants::MULTIXACT_MEMBERS_PER_PAGE as u32\n                - offset % pg_constants::MULTIXACT_MEMBERS_PER_PAGE as u32;\n\n            let mut this_page_members: Vec<MultiXactMember> = Vec::new();\n            for _ in 0..page_remain {\n                if let Some(m) = members.next() {\n                    this_page_members.push(m.clone());\n                } else {\n                    break;\n                }\n            }\n            if this_page_members.is_empty() {\n                // all done\n                break;\n            }\n            let n_this_page = this_page_members.len();\n\n            modification.put_slru_wal_record(\n                SlruKind::MultiXactMembers,\n                pageno / pg_constants::SLRU_PAGES_PER_SEGMENT,\n                pageno % pg_constants::SLRU_PAGES_PER_SEGMENT,\n                NeonWalRecord::MultixactMembersCreate {\n                    moff: offset,\n                    members: this_page_members,\n                },\n            )?;\n\n            // Note: The multixact members can wrap around, even within one WAL record.\n            offset = offset.wrapping_add(n_this_page as u32);\n        }\n        let next_offset = offset;\n        assert!(xlrec.moff.wrapping_add(xlrec.nmembers) == next_offset);\n\n        // Update next-multi-xid and next-offset\n        //\n        // NB: In PostgreSQL, the next-multi-xid stored in the control file is allowed to\n        // go to 0, and it's fixed up by skipping to FirstMultiXactId in functions that\n        // read it, like GetNewMultiXactId(). This is different from how nextXid is\n        // incremented! nextXid skips over < FirstNormalTransactionId when the value\n        // is stored, so it's never 0 in a checkpoint.\n        //\n        // I don't know why it's done that way, it seems less error-prone to skip over 0\n        // when the value is stored rather than when it's read. But let's do it the same\n        // way here.\n        let next_multi_xid = xlrec.mid.wrapping_add(1);\n\n        if self\n            .checkpoint\n            .update_next_multixid(next_multi_xid, next_offset)\n        {\n            self.checkpoint_modified = true;\n        }\n\n        // Also update the next-xid with the highest member. According to the comments in\n        // multixact_redo(), this shouldn't be necessary, but let's do the same here.\n        let max_mbr_xid = xlrec.members.iter().fold(None, |acc, mbr| {\n            if let Some(max_xid) = acc {\n                if mbr.xid.wrapping_sub(max_xid) as i32 > 0 {\n                    Some(mbr.xid)\n                } else {\n                    acc\n                }\n            } else {\n                Some(mbr.xid)\n            }\n        });\n\n        if let Some(max_xid) = max_mbr_xid {\n            if self.checkpoint.update_next_xid(max_xid) {\n                self.checkpoint_modified = true;\n            }\n        }\n        Ok(())\n    }\n\n    async fn ingest_multixact_truncate(\n        &mut self,\n        modification: &mut DatadirModification<'_>,\n        xlrec: &XlMultiXactTruncate,\n        ctx: &RequestContext,\n    ) -> Result<(), WalIngestError> {\n        let (maxsegment, startsegment, endsegment) =\n            enum_pgversion_dispatch!(&mut self.checkpoint, CheckPoint, cp, {\n                cp.oldestMulti = xlrec.end_trunc_off;\n                cp.oldestMultiDB = xlrec.oldest_multi_db;\n                let maxsegment: i32 = pgv::nonrelfile_utils::mx_offset_to_member_segment(\n                    pg_constants::MAX_MULTIXACT_OFFSET,\n                );\n                let startsegment: i32 =\n                    pgv::nonrelfile_utils::mx_offset_to_member_segment(xlrec.start_trunc_memb);\n                let endsegment: i32 =\n                    pgv::nonrelfile_utils::mx_offset_to_member_segment(xlrec.end_trunc_memb);\n                (maxsegment, startsegment, endsegment)\n            });\n\n        self.checkpoint_modified = true;\n\n        // PerformMembersTruncation\n        let mut segment: i32 = startsegment;\n\n        // Delete all the segments except the last one. The last segment can still\n        // contain, possibly partially, valid data.\n        if modification.tline.get_shard_identity().is_shard_zero() {\n            while segment != endsegment {\n                modification\n                    .drop_slru_segment(SlruKind::MultiXactMembers, segment as u32, ctx)\n                    .await?;\n\n                /* move to next segment, handling wraparound correctly */\n                if segment == maxsegment {\n                    segment = 0;\n                } else {\n                    segment += 1;\n                }\n            }\n        }\n\n        // Truncate offsets\n        // FIXME: this did not handle wraparound correctly\n\n        Ok(())\n    }\n\n    async fn ingest_multixact_zero_page(\n        &mut self,\n        zero_page: MultiXactZeroPage,\n        modification: &mut DatadirModification<'_>,\n        ctx: &RequestContext,\n    ) -> Result<(), WalIngestError> {\n        let MultiXactZeroPage {\n            slru_kind,\n            segno,\n            rpageno,\n        } = zero_page;\n        self.put_slru_page_image(\n            modification,\n            slru_kind,\n            segno,\n            rpageno,\n            ZERO_PAGE.clone(),\n            ctx,\n        )\n        .await\n    }\n\n    async fn ingest_relmap_update(\n        &mut self,\n        update: RelmapUpdate,\n        modification: &mut DatadirModification<'_>,\n        ctx: &RequestContext,\n    ) -> Result<(), WalIngestError> {\n        let RelmapUpdate { update, buf } = update;\n\n        modification\n            .put_relmap_file(update.tsid, update.dbid, buf, ctx)\n            .await\n    }\n\n    async fn ingest_raw_xlog_record(\n        &mut self,\n        raw_record: RawXlogRecord,\n        modification: &mut DatadirModification<'_>,\n        ctx: &RequestContext,\n    ) -> Result<(), WalIngestError> {\n        let RawXlogRecord { info, lsn, mut buf } = raw_record;\n        let pg_version = modification.tline.pg_version;\n\n        if info == pg_constants::XLOG_PARAMETER_CHANGE {\n            if let CheckPoint::V17(cp) = &mut self.checkpoint {\n                let rec = v17::XlParameterChange::decode(&mut buf);\n                cp.wal_level = rec.wal_level;\n                self.checkpoint_modified = true;\n            }\n        } else if info == pg_constants::XLOG_END_OF_RECOVERY {\n            if let CheckPoint::V17(cp) = &mut self.checkpoint {\n                let rec = v17::XlEndOfRecovery::decode(&mut buf);\n                cp.wal_level = rec.wal_level;\n                self.checkpoint_modified = true;\n            }\n        }\n\n        enum_pgversion_dispatch!(&mut self.checkpoint, CheckPoint, cp, {\n            if info == pg_constants::XLOG_NEXTOID {\n                let next_oid = buf.get_u32_le();\n                if cp.nextOid != next_oid {\n                    cp.nextOid = next_oid;\n                    self.checkpoint_modified = true;\n                }\n            } else if info == pg_constants::XLOG_CHECKPOINT_ONLINE\n                || info == pg_constants::XLOG_CHECKPOINT_SHUTDOWN\n            {\n                let mut checkpoint_bytes = [0u8; pgv::xlog_utils::SIZEOF_CHECKPOINT];\n                buf.copy_to_slice(&mut checkpoint_bytes);\n                let xlog_checkpoint = pgv::CheckPoint::decode(&checkpoint_bytes)?;\n                trace!(\n                    \"xlog_checkpoint.oldestXid={}, checkpoint.oldestXid={}\",\n                    xlog_checkpoint.oldestXid, cp.oldestXid\n                );\n                if (cp.oldestXid.wrapping_sub(xlog_checkpoint.oldestXid) as i32) < 0 {\n                    cp.oldestXid = xlog_checkpoint.oldestXid;\n                }\n                trace!(\n                    \"xlog_checkpoint.oldestActiveXid={}, checkpoint.oldestActiveXid={}\",\n                    xlog_checkpoint.oldestActiveXid, cp.oldestActiveXid\n                );\n\n                // A shutdown checkpoint has `oldestActiveXid == InvalidTransactionid`,\n                // because at shutdown, all in-progress transactions will implicitly\n                // end. Postgres startup code knows that, and allows hot standby to start\n                // immediately from a shutdown checkpoint.\n                //\n                // In Neon, Postgres hot standby startup always behaves as if starting from\n                // an online checkpoint. It needs a valid `oldestActiveXid` value, so\n                // instead of overwriting self.checkpoint.oldestActiveXid with\n                // InvalidTransactionid from the checkpoint WAL record, update it to a\n                // proper value, knowing that there are no in-progress transactions at this\n                // point, except for prepared transactions.\n                //\n                // See also the neon code changes in the InitWalRecovery() function.\n                if xlog_checkpoint.oldestActiveXid == pg_constants::INVALID_TRANSACTION_ID\n                    && info == pg_constants::XLOG_CHECKPOINT_SHUTDOWN\n                {\n                    let oldest_active_xid = if pg_version >= PgMajorVersion::PG17 {\n                        let mut oldest_active_full_xid = cp.nextXid.value;\n                        for xid in modification.tline.list_twophase_files(lsn, ctx).await? {\n                            if xid < oldest_active_full_xid {\n                                oldest_active_full_xid = xid;\n                            }\n                        }\n                        oldest_active_full_xid as u32\n                    } else {\n                        let mut oldest_active_xid = cp.nextXid.value as u32;\n                        for xid in modification.tline.list_twophase_files(lsn, ctx).await? {\n                            let narrow_xid = xid as u32;\n                            if (narrow_xid.wrapping_sub(oldest_active_xid) as i32) < 0 {\n                                oldest_active_xid = narrow_xid;\n                            }\n                        }\n                        oldest_active_xid\n                    };\n                    cp.oldestActiveXid = oldest_active_xid;\n                } else {\n                    cp.oldestActiveXid = xlog_checkpoint.oldestActiveXid;\n                }\n                // NB: We abuse the Checkpoint.redo field:\n                //\n                // - In PostgreSQL, the Checkpoint struct doesn't store the information\n                //   of whether this is an online checkpoint or a shutdown checkpoint. It's\n                //   stored in the XLOG info field of the WAL record, shutdown checkpoints\n                //   use record type XLOG_CHECKPOINT_SHUTDOWN and online checkpoints use\n                //   XLOG_CHECKPOINT_ONLINE. We don't store the original WAL record headers\n                //   in the pageserver, however.\n                //\n                // - In PostgreSQL, the Checkpoint.redo field stores the *start* of the\n                //   checkpoint record, if it's a shutdown checkpoint. But when we are\n                //   starting from a shutdown checkpoint, the basebackup LSN is the *end*\n                //   of the shutdown checkpoint WAL record. That makes it difficult to\n                //   correctly detect whether we're starting from a shutdown record or\n                //   not.\n                //\n                // To address both of those issues, we store 0 in the redo field if it's\n                // an online checkpoint record, and the record's *end* LSN if it's a\n                // shutdown checkpoint. We don't need the original redo pointer in neon,\n                // because we don't perform WAL replay at startup anyway, so we can get\n                // away with abusing the redo field like this.\n                //\n                // XXX: Ideally, we would persist the extra information in a more\n                // explicit format, rather than repurpose the fields of the Postgres\n                // struct like this. However, we already have persisted data like this,\n                // so we need to maintain backwards compatibility.\n                //\n                // NB: We didn't originally have this convention, so there are still old\n                // persisted records that didn't do this. Before, we didn't update the\n                // persisted redo field at all. That means that old records have a bogus\n                // redo pointer that points to some old value, from the checkpoint record\n                // that was originally imported from the data directory. If it was a\n                // project created in Neon, that means it points to the first checkpoint\n                // after initdb. That's OK for our purposes: all such old checkpoints are\n                // treated as old online checkpoints when the basebackup is created.\n                cp.redo = if info == pg_constants::XLOG_CHECKPOINT_SHUTDOWN {\n                    // Store the *end* LSN of the checkpoint record. Or to be precise,\n                    // the start LSN of the *next* record, i.e. if the record ends\n                    // exactly at page boundary, the redo LSN points to just after the\n                    // page header on the next page.\n                    lsn.into()\n                } else {\n                    Lsn::INVALID.into()\n                };\n\n                // Write a new checkpoint key-value pair on every checkpoint record, even\n                // if nothing really changed. Not strictly required, but it seems nice to\n                // have some trace of the checkpoint records in the layer files at the same\n                // LSNs.\n                self.checkpoint_modified = true;\n            }\n        });\n\n        if info == pg_constants::XLOG_CHECKPOINT_SHUTDOWN {\n            modification.tline.prepare_basebackup(lsn);\n        }\n\n        Ok(())\n    }\n\n    async fn ingest_logical_message_put(\n        &mut self,\n        put: PutLogicalMessage,\n        modification: &mut DatadirModification<'_>,\n        ctx: &RequestContext,\n    ) -> Result<(), WalIngestError> {\n        let PutLogicalMessage { path, buf } = put;\n        modification.put_file(path.as_str(), &buf, ctx).await\n    }\n\n    fn ingest_standby_record(&mut self, record: StandbyRecord) -> Result<(), WalIngestError> {\n        match record {\n            StandbyRecord::RunningXacts(running_xacts) => {\n                enum_pgversion_dispatch!(&mut self.checkpoint, CheckPoint, cp, {\n                    cp.oldestActiveXid = running_xacts.oldest_running_xid;\n                });\n\n                self.checkpoint_modified = true;\n            }\n        }\n\n        Ok(())\n    }\n\n    async fn ingest_replorigin_record(\n        &mut self,\n        record: ReploriginRecord,\n        modification: &mut DatadirModification<'_>,\n    ) -> Result<(), WalIngestError> {\n        match record {\n            ReploriginRecord::Set(set) => {\n                modification\n                    .set_replorigin(set.node_id, set.remote_lsn)\n                    .await?;\n            }\n            ReploriginRecord::Drop(drop) => {\n                modification.drop_replorigin(drop.node_id).await?;\n            }\n        }\n\n        Ok(())\n    }\n\n    async fn put_rel_creation(\n        &mut self,\n        modification: &mut DatadirModification<'_>,\n        rel: RelTag,\n        ctx: &RequestContext,\n    ) -> Result<(), WalIngestError> {\n        modification.put_rel_creation(rel, 0, ctx).await?;\n        Ok(())\n    }\n\n    #[cfg(test)]\n    async fn put_rel_page_image(\n        &mut self,\n        modification: &mut DatadirModification<'_>,\n        rel: RelTag,\n        blknum: BlockNumber,\n        img: Bytes,\n        ctx: &RequestContext,\n    ) -> Result<(), WalIngestError> {\n        self.handle_rel_extend(modification, rel, blknum, ctx)\n            .await?;\n        modification.put_rel_page_image(rel, blknum, img)?;\n        Ok(())\n    }\n\n    async fn put_rel_wal_record(\n        &mut self,\n        modification: &mut DatadirModification<'_>,\n        rel: RelTag,\n        blknum: BlockNumber,\n        rec: NeonWalRecord,\n        ctx: &RequestContext,\n    ) -> Result<(), WalIngestError> {\n        self.handle_rel_extend(modification, rel, blknum, ctx)\n            .await?;\n        modification.put_rel_wal_record(rel, blknum, rec)?;\n        Ok(())\n    }\n\n    async fn put_rel_truncation(\n        &mut self,\n        modification: &mut DatadirModification<'_>,\n        rel: RelTag,\n        nblocks: BlockNumber,\n        ctx: &RequestContext,\n    ) -> Result<(), WalIngestError> {\n        modification.put_rel_truncation(rel, nblocks, ctx).await?;\n        Ok(())\n    }\n\n    async fn handle_rel_extend(\n        &mut self,\n        modification: &mut DatadirModification<'_>,\n        rel: RelTag,\n        blknum: BlockNumber,\n        ctx: &RequestContext,\n    ) -> Result<(), WalIngestError> {\n        let new_nblocks = blknum + 1;\n        // Check if the relation exists. We implicitly create relations on first\n        // record.\n        let old_nblocks = modification.create_relation_if_required(rel, ctx).await?;\n\n        if new_nblocks > old_nblocks {\n            //info!(\"extending {} {} to {}\", rel, old_nblocks, new_nblocks);\n            modification.put_rel_extend(rel, new_nblocks, ctx).await?;\n\n            let mut key = rel_block_to_key(rel, blknum);\n\n            // fill the gap with zeros\n            let mut gap_blocks_filled: u64 = 0;\n            for gap_blknum in old_nblocks..blknum {\n                key.field6 = gap_blknum;\n\n                if self.shard.get_shard_number(&key) != self.shard.number {\n                    continue;\n                }\n\n                modification.put_rel_page_image_zero(rel, gap_blknum)?;\n                gap_blocks_filled += 1;\n            }\n\n            WAL_INGEST\n                .gap_blocks_zeroed_on_rel_extend\n                .inc_by(gap_blocks_filled);\n\n            // Log something when relation extends cause use to fill gaps\n            // with zero pages. Logging is rate limited per pg version to\n            // avoid skewing.\n            if gap_blocks_filled > 0 {\n                use std::sync::Mutex;\n\n                use once_cell::sync::Lazy;\n                use utils::rate_limit::RateLimit;\n\n                struct RateLimitPerPgVersion {\n                    rate_limiters: [Lazy<Mutex<RateLimit>>; 4],\n                }\n\n                impl RateLimitPerPgVersion {\n                    const fn new() -> Self {\n                        Self {\n                            rate_limiters: [const {\n                                Lazy::new(|| Mutex::new(RateLimit::new(Duration::from_secs(30))))\n                            }; 4],\n                        }\n                    }\n\n                    const fn rate_limiter(\n                        &self,\n                        pg_version: PgMajorVersion,\n                    ) -> Option<&Lazy<Mutex<RateLimit>>> {\n                        const MIN_PG_VERSION: u32 = PgMajorVersion::PG14.major_version_num();\n                        const MAX_PG_VERSION: u32 = PgMajorVersion::PG17.major_version_num();\n                        let pg_version = pg_version.major_version_num();\n\n                        if pg_version < MIN_PG_VERSION || pg_version > MAX_PG_VERSION {\n                            return None;\n                        }\n\n                        Some(&self.rate_limiters[(pg_version - MIN_PG_VERSION) as usize])\n                    }\n                }\n\n                static LOGGED: RateLimitPerPgVersion = RateLimitPerPgVersion::new();\n                if let Some(rate_limiter) = LOGGED.rate_limiter(modification.tline.pg_version) {\n                    if let Ok(mut locked) = rate_limiter.try_lock() {\n                        locked.call(|| {\n                            info!(\n                                lsn=%modification.get_lsn(),\n                                pg_version=%modification.tline.pg_version,\n                                rel=%rel,\n                                \"Filled {} gap blocks on rel extend to {} from {}\",\n                                gap_blocks_filled,\n                                new_nblocks,\n                                old_nblocks);\n                        });\n                    }\n                }\n            }\n        }\n        Ok(())\n    }\n\n    async fn put_slru_page_image(\n        &mut self,\n        modification: &mut DatadirModification<'_>,\n        kind: SlruKind,\n        segno: u32,\n        blknum: BlockNumber,\n        img: Bytes,\n        ctx: &RequestContext,\n    ) -> Result<(), WalIngestError> {\n        if !self.shard.is_shard_zero() {\n            return Ok(());\n        }\n\n        self.handle_slru_extend(modification, kind, segno, blknum, ctx)\n            .await?;\n        modification.put_slru_page_image(kind, segno, blknum, img)?;\n        Ok(())\n    }\n\n    async fn handle_slru_extend(\n        &mut self,\n        modification: &mut DatadirModification<'_>,\n        kind: SlruKind,\n        segno: u32,\n        blknum: BlockNumber,\n        ctx: &RequestContext,\n    ) -> Result<(), WalIngestError> {\n        // we don't use a cache for this like we do for relations. SLRUS are explcitly\n        // extended with ZEROPAGE records, not with commit records, so it happens\n        // a lot less frequently.\n\n        let new_nblocks = blknum + 1;\n        // Check if the relation exists. We implicitly create relations on first\n        // record.\n        // TODO: would be nice if to be more explicit about it\n        let old_nblocks = if !modification\n            .tline\n            .get_slru_segment_exists(kind, segno, Version::Modified(modification), ctx)\n            .await?\n        {\n            // create it with 0 size initially, the logic below will extend it\n            modification\n                .put_slru_segment_creation(kind, segno, 0, ctx)\n                .await?;\n            0\n        } else {\n            modification\n                .tline\n                .get_slru_segment_size(kind, segno, Version::Modified(modification), ctx)\n                .await?\n        };\n\n        if new_nblocks > old_nblocks {\n            trace!(\n                \"extending SLRU {:?} seg {} from {} to {} blocks\",\n                kind, segno, old_nblocks, new_nblocks\n            );\n            modification.put_slru_extend(kind, segno, new_nblocks)?;\n\n            // fill the gap with zeros\n            for gap_blknum in old_nblocks..blknum {\n                modification.put_slru_page_image_zero(kind, segno, gap_blknum)?;\n            }\n        }\n        Ok(())\n    }\n}\n\n/// Returns the size of the relation as of this modification, or None if the relation doesn't exist.\n///\n/// This is only accurate on shard 0. On other shards, it will return the size up to the highest\n/// page number stored in the shard, or None if the shard does not have any pages for it.\nasync fn get_relsize(\n    modification: &DatadirModification<'_>,\n    rel: RelTag,\n    ctx: &RequestContext,\n) -> Result<Option<BlockNumber>, PageReconstructError> {\n    if !modification\n        .tline\n        .get_rel_exists(rel, Version::Modified(modification), ctx)\n        .await?\n    {\n        return Ok(None);\n    }\n    modification\n        .tline\n        .get_rel_size(rel, Version::Modified(modification), ctx)\n        .await\n        .map(Some)\n}\n\n#[allow(clippy::bool_assert_comparison)]\n#[cfg(test)]\nmod tests {\n    use anyhow::Result;\n    use postgres_ffi::PgMajorVersion;\n    use postgres_ffi::RELSEG_SIZE;\n\n    use super::*;\n    use crate::DEFAULT_PG_VERSION;\n    use crate::tenant::harness::*;\n    use crate::tenant::remote_timeline_client::{INITDB_PATH, remote_initdb_archive_path};\n    use crate::tenant::storage_layer::IoConcurrency;\n\n    /// Arbitrary relation tag, for testing.\n    const TESTREL_A: RelTag = RelTag {\n        spcnode: 0,\n        dbnode: 111,\n        relnode: 1000,\n        forknum: 0,\n    };\n\n    fn assert_current_logical_size(_timeline: &Timeline, _lsn: Lsn) {\n        // TODO\n    }\n\n    #[tokio::test]\n    async fn test_zeroed_checkpoint_decodes_correctly() -> Result<(), anyhow::Error> {\n        for i in PgMajorVersion::ALL {\n            dispatch_pgversion!(i, {\n                pgv::CheckPoint::decode(&pgv::ZERO_CHECKPOINT)?;\n            });\n        }\n\n        Ok(())\n    }\n\n    async fn init_walingest_test(tline: &Timeline, ctx: &RequestContext) -> Result<WalIngest> {\n        let mut m = tline.begin_modification(Lsn(0x10));\n        m.put_checkpoint(dispatch_pgversion!(\n            tline.pg_version,\n            pgv::ZERO_CHECKPOINT.clone()\n        ))?;\n        m.put_relmap_file(0, 111, Bytes::from(\"\"), ctx).await?; // dummy relmapper file\n        m.commit(ctx).await?;\n        let walingest = WalIngest::new(tline, Lsn(0x10), ctx).await?;\n\n        Ok(walingest)\n    }\n\n    #[tokio::test]\n    async fn test_relsize() -> Result<()> {\n        let (tenant, ctx) = TenantHarness::create(\"test_relsize\").await?.load().await;\n        let io_concurrency = IoConcurrency::spawn_for_test();\n        let tline = tenant\n            .create_test_timeline(TIMELINE_ID, Lsn(8), DEFAULT_PG_VERSION, &ctx)\n            .await?;\n        let mut walingest = init_walingest_test(&tline, &ctx).await?;\n\n        let mut m = tline.begin_modification(Lsn(0x20));\n        walingest.put_rel_creation(&mut m, TESTREL_A, &ctx).await?;\n        walingest\n            .put_rel_page_image(&mut m, TESTREL_A, 0, test_img(\"foo blk 0 at 2\"), &ctx)\n            .await?;\n        m.commit(&ctx).await?;\n        let mut m = tline.begin_modification(Lsn(0x30));\n        walingest\n            .put_rel_page_image(&mut m, TESTREL_A, 0, test_img(\"foo blk 0 at 3\"), &ctx)\n            .await?;\n        m.commit(&ctx).await?;\n        let mut m = tline.begin_modification(Lsn(0x40));\n        walingest\n            .put_rel_page_image(&mut m, TESTREL_A, 1, test_img(\"foo blk 1 at 4\"), &ctx)\n            .await?;\n        m.commit(&ctx).await?;\n        let mut m = tline.begin_modification(Lsn(0x50));\n        walingest\n            .put_rel_page_image(&mut m, TESTREL_A, 2, test_img(\"foo blk 2 at 5\"), &ctx)\n            .await?;\n        m.commit(&ctx).await?;\n\n        assert_current_logical_size(&tline, Lsn(0x50));\n\n        let test_span = tracing::info_span!(parent: None, \"test\",\n                                            tenant_id=%tline.tenant_shard_id.tenant_id,\n                                            shard_id=%tline.tenant_shard_id.shard_slug(),\n                                            timeline_id=%tline.timeline_id);\n\n        // The relation was created at LSN 2, not visible at LSN 1 yet.\n        assert_eq!(\n            tline\n                .get_rel_exists(TESTREL_A, Version::at(Lsn(0x10)), &ctx)\n                .await?,\n            false\n        );\n        assert!(\n            tline\n                .get_rel_size(TESTREL_A, Version::at(Lsn(0x10)), &ctx)\n                .await\n                .is_err()\n        );\n        assert_eq!(\n            tline\n                .get_rel_exists(TESTREL_A, Version::at(Lsn(0x20)), &ctx)\n                .await?,\n            true\n        );\n        assert_eq!(\n            tline\n                .get_rel_size(TESTREL_A, Version::at(Lsn(0x20)), &ctx)\n                .await?,\n            1\n        );\n        assert_eq!(\n            tline\n                .get_rel_size(TESTREL_A, Version::at(Lsn(0x50)), &ctx)\n                .await?,\n            3\n        );\n\n        // Check page contents at each LSN\n        assert_eq!(\n            tline\n                .get_rel_page_at_lsn(\n                    TESTREL_A,\n                    0,\n                    Version::at(Lsn(0x20)),\n                    &ctx,\n                    io_concurrency.clone()\n                )\n                .instrument(test_span.clone())\n                .await?,\n            test_img(\"foo blk 0 at 2\")\n        );\n\n        assert_eq!(\n            tline\n                .get_rel_page_at_lsn(\n                    TESTREL_A,\n                    0,\n                    Version::at(Lsn(0x30)),\n                    &ctx,\n                    io_concurrency.clone()\n                )\n                .instrument(test_span.clone())\n                .await?,\n            test_img(\"foo blk 0 at 3\")\n        );\n\n        assert_eq!(\n            tline\n                .get_rel_page_at_lsn(\n                    TESTREL_A,\n                    0,\n                    Version::at(Lsn(0x40)),\n                    &ctx,\n                    io_concurrency.clone()\n                )\n                .instrument(test_span.clone())\n                .await?,\n            test_img(\"foo blk 0 at 3\")\n        );\n        assert_eq!(\n            tline\n                .get_rel_page_at_lsn(\n                    TESTREL_A,\n                    1,\n                    Version::at(Lsn(0x40)),\n                    &ctx,\n                    io_concurrency.clone()\n                )\n                .instrument(test_span.clone())\n                .await?,\n            test_img(\"foo blk 1 at 4\")\n        );\n\n        assert_eq!(\n            tline\n                .get_rel_page_at_lsn(\n                    TESTREL_A,\n                    0,\n                    Version::at(Lsn(0x50)),\n                    &ctx,\n                    io_concurrency.clone()\n                )\n                .instrument(test_span.clone())\n                .await?,\n            test_img(\"foo blk 0 at 3\")\n        );\n        assert_eq!(\n            tline\n                .get_rel_page_at_lsn(\n                    TESTREL_A,\n                    1,\n                    Version::at(Lsn(0x50)),\n                    &ctx,\n                    io_concurrency.clone()\n                )\n                .instrument(test_span.clone())\n                .await?,\n            test_img(\"foo blk 1 at 4\")\n        );\n        assert_eq!(\n            tline\n                .get_rel_page_at_lsn(\n                    TESTREL_A,\n                    2,\n                    Version::at(Lsn(0x50)),\n                    &ctx,\n                    io_concurrency.clone()\n                )\n                .instrument(test_span.clone())\n                .await?,\n            test_img(\"foo blk 2 at 5\")\n        );\n\n        // Truncate last block\n        let mut m = tline.begin_modification(Lsn(0x60));\n        walingest\n            .put_rel_truncation(&mut m, TESTREL_A, 2, &ctx)\n            .await?;\n        m.commit(&ctx).await?;\n        assert_current_logical_size(&tline, Lsn(0x60));\n\n        // Check reported size and contents after truncation\n        assert_eq!(\n            tline\n                .get_rel_size(TESTREL_A, Version::at(Lsn(0x60)), &ctx)\n                .await?,\n            2\n        );\n        assert_eq!(\n            tline\n                .get_rel_page_at_lsn(\n                    TESTREL_A,\n                    0,\n                    Version::at(Lsn(0x60)),\n                    &ctx,\n                    io_concurrency.clone()\n                )\n                .instrument(test_span.clone())\n                .await?,\n            test_img(\"foo blk 0 at 3\")\n        );\n        assert_eq!(\n            tline\n                .get_rel_page_at_lsn(\n                    TESTREL_A,\n                    1,\n                    Version::at(Lsn(0x60)),\n                    &ctx,\n                    io_concurrency.clone()\n                )\n                .instrument(test_span.clone())\n                .await?,\n            test_img(\"foo blk 1 at 4\")\n        );\n\n        // should still see the truncated block with older LSN\n        assert_eq!(\n            tline\n                .get_rel_size(TESTREL_A, Version::at(Lsn(0x50)), &ctx)\n                .await?,\n            3\n        );\n        assert_eq!(\n            tline\n                .get_rel_page_at_lsn(\n                    TESTREL_A,\n                    2,\n                    Version::at(Lsn(0x50)),\n                    &ctx,\n                    io_concurrency.clone()\n                )\n                .instrument(test_span.clone())\n                .await?,\n            test_img(\"foo blk 2 at 5\")\n        );\n\n        // Truncate to zero length\n        let mut m = tline.begin_modification(Lsn(0x68));\n        walingest\n            .put_rel_truncation(&mut m, TESTREL_A, 0, &ctx)\n            .await?;\n        m.commit(&ctx).await?;\n        assert_eq!(\n            tline\n                .get_rel_size(TESTREL_A, Version::at(Lsn(0x68)), &ctx)\n                .await?,\n            0\n        );\n\n        // Extend from 0 to 2 blocks, leaving a gap\n        let mut m = tline.begin_modification(Lsn(0x70));\n        walingest\n            .put_rel_page_image(&mut m, TESTREL_A, 1, test_img(\"foo blk 1\"), &ctx)\n            .await?;\n        m.commit(&ctx).await?;\n        assert_eq!(\n            tline\n                .get_rel_size(TESTREL_A, Version::at(Lsn(0x70)), &ctx)\n                .await?,\n            2\n        );\n        assert_eq!(\n            tline\n                .get_rel_page_at_lsn(\n                    TESTREL_A,\n                    0,\n                    Version::at(Lsn(0x70)),\n                    &ctx,\n                    io_concurrency.clone()\n                )\n                .instrument(test_span.clone())\n                .await?,\n            ZERO_PAGE\n        );\n        assert_eq!(\n            tline\n                .get_rel_page_at_lsn(\n                    TESTREL_A,\n                    1,\n                    Version::at(Lsn(0x70)),\n                    &ctx,\n                    io_concurrency.clone()\n                )\n                .instrument(test_span.clone())\n                .await?,\n            test_img(\"foo blk 1\")\n        );\n\n        // Extend a lot more, leaving a big gap that spans across segments\n        let mut m = tline.begin_modification(Lsn(0x80));\n        walingest\n            .put_rel_page_image(&mut m, TESTREL_A, 1500, test_img(\"foo blk 1500\"), &ctx)\n            .await?;\n        m.commit(&ctx).await?;\n        assert_eq!(\n            tline\n                .get_rel_size(TESTREL_A, Version::at(Lsn(0x80)), &ctx)\n                .await?,\n            1501\n        );\n        for blk in 2..1500 {\n            assert_eq!(\n                tline\n                    .get_rel_page_at_lsn(\n                        TESTREL_A,\n                        blk,\n                        Version::at(Lsn(0x80)),\n                        &ctx,\n                        io_concurrency.clone()\n                    )\n                    .instrument(test_span.clone())\n                    .await?,\n                ZERO_PAGE\n            );\n        }\n        assert_eq!(\n            tline\n                .get_rel_page_at_lsn(\n                    TESTREL_A,\n                    1500,\n                    Version::at(Lsn(0x80)),\n                    &ctx,\n                    io_concurrency.clone()\n                )\n                .instrument(test_span.clone())\n                .await?,\n            test_img(\"foo blk 1500\")\n        );\n\n        Ok(())\n    }\n\n    // Test what happens if we dropped a relation\n    // and then created it again within the same layer.\n    #[tokio::test]\n    async fn test_drop_extend() -> Result<()> {\n        let (tenant, ctx) = TenantHarness::create(\"test_drop_extend\")\n            .await?\n            .load()\n            .await;\n        let tline = tenant\n            .create_test_timeline(TIMELINE_ID, Lsn(8), DEFAULT_PG_VERSION, &ctx)\n            .await?;\n        let mut walingest = init_walingest_test(&tline, &ctx).await?;\n\n        let mut m = tline.begin_modification(Lsn(0x20));\n        walingest\n            .put_rel_page_image(&mut m, TESTREL_A, 0, test_img(\"foo blk 0 at 2\"), &ctx)\n            .await?;\n        m.commit(&ctx).await?;\n\n        // Check that rel exists and size is correct\n        assert_eq!(\n            tline\n                .get_rel_exists(TESTREL_A, Version::at(Lsn(0x20)), &ctx)\n                .await?,\n            true\n        );\n        assert_eq!(\n            tline\n                .get_rel_size(TESTREL_A, Version::at(Lsn(0x20)), &ctx)\n                .await?,\n            1\n        );\n\n        // Drop rel\n        let mut m = tline.begin_modification(Lsn(0x30));\n        let mut rel_drops = HashMap::new();\n        rel_drops.insert((TESTREL_A.spcnode, TESTREL_A.dbnode), vec![TESTREL_A]);\n        m.put_rel_drops(rel_drops, &ctx).await?;\n        m.commit(&ctx).await?;\n\n        // Check that rel is not visible anymore\n        assert_eq!(\n            tline\n                .get_rel_exists(TESTREL_A, Version::at(Lsn(0x30)), &ctx)\n                .await?,\n            false\n        );\n\n        // FIXME: should fail\n        //assert!(tline.get_rel_size(TESTREL_A, Lsn(0x30), false)?.is_none());\n\n        // Re-create it\n        let mut m = tline.begin_modification(Lsn(0x40));\n        walingest\n            .put_rel_page_image(&mut m, TESTREL_A, 0, test_img(\"foo blk 0 at 4\"), &ctx)\n            .await?;\n        m.commit(&ctx).await?;\n\n        // Check that rel exists and size is correct\n        assert_eq!(\n            tline\n                .get_rel_exists(TESTREL_A, Version::at(Lsn(0x40)), &ctx)\n                .await?,\n            true\n        );\n        assert_eq!(\n            tline\n                .get_rel_size(TESTREL_A, Version::at(Lsn(0x40)), &ctx)\n                .await?,\n            1\n        );\n\n        Ok(())\n    }\n\n    // Test what happens if we truncated a relation\n    // so that one of its segments was dropped\n    // and then extended it again within the same layer.\n    #[tokio::test]\n    async fn test_truncate_extend() -> Result<()> {\n        let (tenant, ctx) = TenantHarness::create(\"test_truncate_extend\")\n            .await?\n            .load()\n            .await;\n        let io_concurrency = IoConcurrency::spawn_for_test();\n        let tline = tenant\n            .create_test_timeline(TIMELINE_ID, Lsn(8), DEFAULT_PG_VERSION, &ctx)\n            .await?;\n        let mut walingest = init_walingest_test(&tline, &ctx).await?;\n\n        // Create a 20 MB relation (the size is arbitrary)\n        let relsize = 20 * 1024 * 1024 / 8192;\n        let mut m = tline.begin_modification(Lsn(0x20));\n        for blkno in 0..relsize {\n            let data = format!(\"foo blk {} at {}\", blkno, Lsn(0x20));\n            walingest\n                .put_rel_page_image(&mut m, TESTREL_A, blkno, test_img(&data), &ctx)\n                .await?;\n        }\n        m.commit(&ctx).await?;\n\n        let test_span = tracing::info_span!(parent: None, \"test\",\n                                            tenant_id=%tline.tenant_shard_id.tenant_id,\n                                            shard_id=%tline.tenant_shard_id.shard_slug(),\n                                            timeline_id=%tline.timeline_id);\n\n        // The relation was created at LSN 20, not visible at LSN 1 yet.\n        assert_eq!(\n            tline\n                .get_rel_exists(TESTREL_A, Version::at(Lsn(0x10)), &ctx)\n                .await?,\n            false\n        );\n        assert!(\n            tline\n                .get_rel_size(TESTREL_A, Version::at(Lsn(0x10)), &ctx)\n                .await\n                .is_err()\n        );\n\n        assert_eq!(\n            tline\n                .get_rel_exists(TESTREL_A, Version::at(Lsn(0x20)), &ctx)\n                .await?,\n            true\n        );\n        assert_eq!(\n            tline\n                .get_rel_size(TESTREL_A, Version::at(Lsn(0x20)), &ctx)\n                .await?,\n            relsize\n        );\n\n        // Check relation content\n        for blkno in 0..relsize {\n            let lsn = Lsn(0x20);\n            let data = format!(\"foo blk {blkno} at {lsn}\");\n            assert_eq!(\n                tline\n                    .get_rel_page_at_lsn(\n                        TESTREL_A,\n                        blkno,\n                        Version::at(lsn),\n                        &ctx,\n                        io_concurrency.clone()\n                    )\n                    .instrument(test_span.clone())\n                    .await?,\n                test_img(&data)\n            );\n        }\n\n        // Truncate relation so that second segment was dropped\n        // - only leave one page\n        let mut m = tline.begin_modification(Lsn(0x60));\n        walingest\n            .put_rel_truncation(&mut m, TESTREL_A, 1, &ctx)\n            .await?;\n        m.commit(&ctx).await?;\n\n        // Check reported size and contents after truncation\n        assert_eq!(\n            tline\n                .get_rel_size(TESTREL_A, Version::at(Lsn(0x60)), &ctx)\n                .await?,\n            1\n        );\n\n        for blkno in 0..1 {\n            let lsn = Lsn(0x20);\n            let data = format!(\"foo blk {blkno} at {lsn}\");\n            assert_eq!(\n                tline\n                    .get_rel_page_at_lsn(\n                        TESTREL_A,\n                        blkno,\n                        Version::at(Lsn(0x60)),\n                        &ctx,\n                        io_concurrency.clone()\n                    )\n                    .instrument(test_span.clone())\n                    .await?,\n                test_img(&data)\n            );\n        }\n\n        // should still see all blocks with older LSN\n        assert_eq!(\n            tline\n                .get_rel_size(TESTREL_A, Version::at(Lsn(0x50)), &ctx)\n                .await?,\n            relsize\n        );\n        for blkno in 0..relsize {\n            let lsn = Lsn(0x20);\n            let data = format!(\"foo blk {blkno} at {lsn}\");\n            assert_eq!(\n                tline\n                    .get_rel_page_at_lsn(\n                        TESTREL_A,\n                        blkno,\n                        Version::at(Lsn(0x50)),\n                        &ctx,\n                        io_concurrency.clone()\n                    )\n                    .instrument(test_span.clone())\n                    .await?,\n                test_img(&data)\n            );\n        }\n\n        // Extend relation again.\n        // Add enough blocks to create second segment\n        let lsn = Lsn(0x80);\n        let mut m = tline.begin_modification(lsn);\n        for blkno in 0..relsize {\n            let data = format!(\"foo blk {blkno} at {lsn}\");\n            walingest\n                .put_rel_page_image(&mut m, TESTREL_A, blkno, test_img(&data), &ctx)\n                .await?;\n        }\n        m.commit(&ctx).await?;\n\n        assert_eq!(\n            tline\n                .get_rel_exists(TESTREL_A, Version::at(Lsn(0x80)), &ctx)\n                .await?,\n            true\n        );\n        assert_eq!(\n            tline\n                .get_rel_size(TESTREL_A, Version::at(Lsn(0x80)), &ctx)\n                .await?,\n            relsize\n        );\n        // Check relation content\n        for blkno in 0..relsize {\n            let lsn = Lsn(0x80);\n            let data = format!(\"foo blk {blkno} at {lsn}\");\n            assert_eq!(\n                tline\n                    .get_rel_page_at_lsn(\n                        TESTREL_A,\n                        blkno,\n                        Version::at(Lsn(0x80)),\n                        &ctx,\n                        io_concurrency.clone()\n                    )\n                    .instrument(test_span.clone())\n                    .await?,\n                test_img(&data)\n            );\n        }\n\n        Ok(())\n    }\n\n    /// Test get_relsize() and truncation with a file larger than 1 GB, so that it's\n    /// split into multiple 1 GB segments in Postgres.\n    #[tokio::test]\n    async fn test_large_rel() -> Result<()> {\n        let (tenant, ctx) = TenantHarness::create(\"test_large_rel\").await?.load().await;\n        let tline = tenant\n            .create_test_timeline(TIMELINE_ID, Lsn(8), DEFAULT_PG_VERSION, &ctx)\n            .await?;\n        let mut walingest = init_walingest_test(&tline, &ctx).await?;\n\n        let mut lsn = 0x10;\n        for blknum in 0..RELSEG_SIZE + 1 {\n            lsn += 0x10;\n            let mut m = tline.begin_modification(Lsn(lsn));\n            let img = test_img(&format!(\"foo blk {} at {}\", blknum, Lsn(lsn)));\n            walingest\n                .put_rel_page_image(&mut m, TESTREL_A, blknum as BlockNumber, img, &ctx)\n                .await?;\n            m.commit(&ctx).await?;\n        }\n\n        assert_current_logical_size(&tline, Lsn(lsn));\n\n        assert_eq!(\n            tline\n                .get_rel_size(TESTREL_A, Version::at(Lsn(lsn)), &ctx)\n                .await?,\n            RELSEG_SIZE + 1\n        );\n\n        // Truncate one block\n        lsn += 0x10;\n        let mut m = tline.begin_modification(Lsn(lsn));\n        walingest\n            .put_rel_truncation(&mut m, TESTREL_A, RELSEG_SIZE, &ctx)\n            .await?;\n        m.commit(&ctx).await?;\n        assert_eq!(\n            tline\n                .get_rel_size(TESTREL_A, Version::at(Lsn(lsn)), &ctx)\n                .await?,\n            RELSEG_SIZE\n        );\n        assert_current_logical_size(&tline, Lsn(lsn));\n\n        // Truncate another block\n        lsn += 0x10;\n        let mut m = tline.begin_modification(Lsn(lsn));\n        walingest\n            .put_rel_truncation(&mut m, TESTREL_A, RELSEG_SIZE - 1, &ctx)\n            .await?;\n        m.commit(&ctx).await?;\n        assert_eq!(\n            tline\n                .get_rel_size(TESTREL_A, Version::at(Lsn(lsn)), &ctx)\n                .await?,\n            RELSEG_SIZE - 1\n        );\n        assert_current_logical_size(&tline, Lsn(lsn));\n\n        // Truncate to 1500, and then truncate all the way down to 0, one block at a time\n        // This tests the behavior at segment boundaries\n        let mut size: i32 = 3000;\n        while size >= 0 {\n            lsn += 0x10;\n            let mut m = tline.begin_modification(Lsn(lsn));\n            walingest\n                .put_rel_truncation(&mut m, TESTREL_A, size as BlockNumber, &ctx)\n                .await?;\n            m.commit(&ctx).await?;\n            assert_eq!(\n                tline\n                    .get_rel_size(TESTREL_A, Version::at(Lsn(lsn)), &ctx)\n                    .await?,\n                size as BlockNumber\n            );\n\n            size -= 1;\n        }\n        assert_current_logical_size(&tline, Lsn(lsn));\n\n        Ok(())\n    }\n\n    /// Replay a wal segment file taken directly from safekeepers.\n    ///\n    /// This test is useful for benchmarking since it allows us to profile only\n    /// the walingest code in a single-threaded executor, and iterate more quickly\n    /// without waiting for unrelated steps.\n    #[tokio::test]\n    async fn test_ingest_real_wal() {\n        use postgres_ffi::WAL_SEGMENT_SIZE;\n        use postgres_ffi::waldecoder::WalStreamDecoder;\n\n        use crate::tenant::harness::*;\n\n        // Define test data path and constants.\n        //\n        // Steps to reconstruct the data, if needed:\n        // 1. Run the pgbench python test\n        // 2. Take the first wal segment file from safekeeper\n        // 3. Compress it using `zstd --long input_file`\n        // 4. Copy initdb.tar.zst from local_fs_remote_storage\n        // 5. Grep sk logs for \"restart decoder\" to get startpoint\n        // 6. Run just the decoder from this test to get the endpoint.\n        //    It's the last LSN the decoder will output.\n        let pg_version = PgMajorVersion::PG15; // The test data was generated by pg15\n        let path = \"test_data/sk_wal_segment_from_pgbench\";\n        let wal_segment_path = format!(\"{path}/000000010000000000000001.zst\");\n        let source_initdb_path = format!(\"{path}/{INITDB_PATH}\");\n        let startpoint = Lsn::from_hex(\"14AEC08\").unwrap();\n        let _endpoint = Lsn::from_hex(\"1FFFF98\").unwrap();\n\n        let harness = TenantHarness::create(\"test_ingest_real_wal\").await.unwrap();\n        let span = harness\n            .span()\n            .in_scope(|| info_span!(\"timeline_span\", timeline_id=%TIMELINE_ID));\n        let (tenant, ctx) = harness.load().await;\n\n        let remote_initdb_path =\n            remote_initdb_archive_path(&tenant.tenant_shard_id().tenant_id, &TIMELINE_ID);\n        let initdb_path = harness.remote_fs_dir.join(remote_initdb_path.get_path());\n\n        std::fs::create_dir_all(initdb_path.parent().unwrap())\n            .expect(\"creating test dir should work\");\n        std::fs::copy(source_initdb_path, initdb_path).expect(\"copying the initdb.tar.zst works\");\n\n        // Bootstrap a real timeline. We can't use create_test_timeline because\n        // it doesn't create a real checkpoint, and Walingest::new tries to parse\n        // the garbage data.\n        let tline = tenant\n            .bootstrap_timeline_test(TIMELINE_ID, pg_version, Some(TIMELINE_ID), &ctx)\n            .await\n            .unwrap();\n\n        // We fully read and decompress this into memory before decoding\n        // to get a more accurate perf profile of the decoder.\n        let bytes = {\n            use async_compression::tokio::bufread::ZstdDecoder;\n            let file = tokio::fs::File::open(wal_segment_path).await.unwrap();\n            let reader = tokio::io::BufReader::new(file);\n            let decoder = ZstdDecoder::new(reader);\n            let mut reader = tokio::io::BufReader::new(decoder);\n            let mut buffer = Vec::new();\n            tokio::io::copy_buf(&mut reader, &mut buffer).await.unwrap();\n            buffer\n        };\n\n        // TODO start a profiler too\n        let started_at = std::time::Instant::now();\n\n        // Initialize walingest\n        let xlogoff: usize = startpoint.segment_offset(WAL_SEGMENT_SIZE);\n        let mut decoder = WalStreamDecoder::new(startpoint, pg_version);\n        let mut walingest = WalIngest::new(tline.as_ref(), startpoint, &ctx)\n            .await\n            .unwrap();\n        let mut modification = tline.begin_modification(startpoint);\n        println!(\"decoding {} bytes\", bytes.len() - xlogoff);\n\n        // Decode and ingest wal. We process the wal in chunks because\n        // that's what happens when we get bytes from safekeepers.\n        for chunk in bytes[xlogoff..].chunks(50) {\n            decoder.feed_bytes(chunk);\n            while let Some((lsn, recdata)) = decoder.poll_decode().unwrap() {\n                let interpreted = InterpretedWalRecord::from_bytes_filtered(\n                    recdata,\n                    &[*modification.tline.get_shard_identity()],\n                    lsn,\n                    modification.tline.pg_version,\n                )\n                .unwrap()\n                .remove(modification.tline.get_shard_identity())\n                .unwrap();\n\n                walingest\n                    .ingest_record(interpreted, &mut modification, &ctx)\n                    .instrument(span.clone())\n                    .await\n                    .unwrap();\n            }\n            modification.commit(&ctx).await.unwrap();\n        }\n\n        let duration = started_at.elapsed();\n        println!(\"done in {duration:?}\");\n    }\n}\n"
  },
  {
    "path": "pageserver/src/walredo/apply_neon.rs",
    "content": "use anyhow::Context;\nuse byteorder::{ByteOrder, LittleEndian};\nuse bytes::BytesMut;\nuse pageserver_api::key::Key;\nuse pageserver_api::reltag::SlruKind;\nuse postgres_ffi::v14::nonrelfile_utils::{\n    mx_offset_to_flags_bitshift, mx_offset_to_flags_offset, mx_offset_to_member_offset,\n    transaction_id_set_status,\n};\nuse postgres_ffi::{BLCKSZ, pg_constants};\nuse postgres_ffi_types::forknum::VISIBILITYMAP_FORKNUM;\nuse tracing::*;\nuse utils::lsn::Lsn;\nuse wal_decoder::models::record::NeonWalRecord;\n\n/// Can this request be served by neon redo functions\n/// or we need to pass it to wal-redo postgres process?\npub(crate) fn can_apply_in_neon(rec: &NeonWalRecord) -> bool {\n    // Currently, we don't have bespoken Rust code to replay any\n    // Postgres WAL records. But everything else is handled in neon.\n    #[allow(clippy::match_like_matches_macro)]\n    match rec {\n        NeonWalRecord::Postgres {\n            will_init: _,\n            rec: _,\n        } => false,\n        _ => true,\n    }\n}\n\npub(crate) fn apply_in_neon(\n    record: &NeonWalRecord,\n    lsn: Lsn,\n    key: Key,\n    page: &mut BytesMut,\n) -> Result<(), anyhow::Error> {\n    match record {\n        NeonWalRecord::Postgres {\n            will_init: _,\n            rec: _,\n        } => {\n            anyhow::bail!(\"tried to pass postgres wal record to neon WAL redo\");\n        }\n        //\n        // Code copied from PostgreSQL `visibilitymap_prepare_truncate` function in `visibilitymap.c`\n        //\n        NeonWalRecord::TruncateVisibilityMap {\n            trunc_byte,\n            trunc_offs,\n        } => {\n            // sanity check that this is modifying the correct relation\n            let (rel, _) = key.to_rel_block().context(\"invalid record\")?;\n            assert!(\n                rel.forknum == VISIBILITYMAP_FORKNUM,\n                \"TruncateVisibilityMap record on unexpected rel {rel}\"\n            );\n            let map = &mut page[pg_constants::MAXALIGN_SIZE_OF_PAGE_HEADER_DATA..];\n            map[*trunc_byte + 1..].fill(0u8);\n            /*----\n             * Mask out the unwanted bits of the last remaining byte.\n             *\n             * ((1 << 0) - 1) = 00000000\n             * ((1 << 1) - 1) = 00000001\n             * ...\n             * ((1 << 6) - 1) = 00111111\n             * ((1 << 7) - 1) = 01111111\n             *----\n             */\n            map[*trunc_byte] &= (1 << *trunc_offs) - 1;\n        }\n        NeonWalRecord::ClearVisibilityMapFlags {\n            new_heap_blkno,\n            old_heap_blkno,\n            flags,\n        } => {\n            // sanity check that this is modifying the correct relation\n            let (rel, blknum) = key.to_rel_block().context(\"invalid record\")?;\n            assert!(\n                rel.forknum == VISIBILITYMAP_FORKNUM,\n                \"ClearVisibilityMapFlags record on unexpected rel {rel}\"\n            );\n            if let Some(heap_blkno) = *new_heap_blkno {\n                // Calculate the VM block and offset that corresponds to the heap block.\n                let map_block = pg_constants::HEAPBLK_TO_MAPBLOCK(heap_blkno);\n                let map_byte = pg_constants::HEAPBLK_TO_MAPBYTE(heap_blkno);\n                let map_offset = pg_constants::HEAPBLK_TO_OFFSET(heap_blkno);\n\n                // Check that we're modifying the correct VM block.\n                assert!(map_block == blknum);\n\n                // equivalent to PageGetContents(page)\n                let map = &mut page[pg_constants::MAXALIGN_SIZE_OF_PAGE_HEADER_DATA..];\n\n                map[map_byte as usize] &= !(flags << map_offset);\n                // The page should never be empty, but we're checking it anyway as a precaution, so that if it is empty for some reason anyway, we don't make matters worse by setting the LSN on it.\n                if !postgres_ffi::page_is_new(page) {\n                    postgres_ffi::page_set_lsn(page, lsn);\n                }\n            }\n\n            // Repeat for 'old_heap_blkno', if any\n            if let Some(heap_blkno) = *old_heap_blkno {\n                let map_block = pg_constants::HEAPBLK_TO_MAPBLOCK(heap_blkno);\n                let map_byte = pg_constants::HEAPBLK_TO_MAPBYTE(heap_blkno);\n                let map_offset = pg_constants::HEAPBLK_TO_OFFSET(heap_blkno);\n\n                assert!(map_block == blknum);\n\n                let map = &mut page[pg_constants::MAXALIGN_SIZE_OF_PAGE_HEADER_DATA..];\n\n                map[map_byte as usize] &= !(flags << map_offset);\n                // The page should never be empty, but we're checking it anyway as a precaution, so that if it is empty for some reason anyway, we don't make matters worse by setting the LSN on it.\n                if !postgres_ffi::page_is_new(page) {\n                    postgres_ffi::page_set_lsn(page, lsn);\n                }\n            }\n        }\n        // Non-relational WAL records are handled here, with custom code that has the\n        // same effects as the corresponding Postgres WAL redo function.\n        NeonWalRecord::ClogSetCommitted { xids, timestamp } => {\n            let (slru_kind, segno, blknum) = key.to_slru_block().context(\"invalid record\")?;\n            assert_eq!(\n                slru_kind,\n                SlruKind::Clog,\n                \"ClogSetCommitted record with unexpected key {key}\"\n            );\n            for &xid in xids {\n                let pageno = xid / pg_constants::CLOG_XACTS_PER_PAGE;\n                let expected_segno = pageno / pg_constants::SLRU_PAGES_PER_SEGMENT;\n                let expected_blknum = pageno % pg_constants::SLRU_PAGES_PER_SEGMENT;\n\n                // Check that we're modifying the correct CLOG block.\n                assert!(\n                    segno == expected_segno,\n                    \"ClogSetCommitted record for XID {xid} with unexpected key {key}\"\n                );\n                assert!(\n                    blknum == expected_blknum,\n                    \"ClogSetCommitted record for XID {xid} with unexpected key {key}\"\n                );\n\n                transaction_id_set_status(xid, pg_constants::TRANSACTION_STATUS_COMMITTED, page);\n            }\n\n            // Append the timestamp\n            if page.len() == BLCKSZ as usize + 8 {\n                page.truncate(BLCKSZ as usize);\n            }\n            if page.len() == BLCKSZ as usize {\n                page.extend_from_slice(&timestamp.to_be_bytes());\n            } else {\n                warn!(\n                    \"CLOG blk {} in seg {} has invalid size {}\",\n                    blknum,\n                    segno,\n                    page.len()\n                );\n            }\n        }\n        NeonWalRecord::ClogSetAborted { xids } => {\n            let (slru_kind, segno, blknum) = key.to_slru_block().context(\"invalid record\")?;\n            assert_eq!(\n                slru_kind,\n                SlruKind::Clog,\n                \"ClogSetAborted record with unexpected key {key}\"\n            );\n            for &xid in xids {\n                let pageno = xid / pg_constants::CLOG_XACTS_PER_PAGE;\n                let expected_segno = pageno / pg_constants::SLRU_PAGES_PER_SEGMENT;\n                let expected_blknum = pageno % pg_constants::SLRU_PAGES_PER_SEGMENT;\n\n                // Check that we're modifying the correct CLOG block.\n                assert!(\n                    segno == expected_segno,\n                    \"ClogSetAborted record for XID {xid} with unexpected key {key}\"\n                );\n                assert!(\n                    blknum == expected_blknum,\n                    \"ClogSetAborted record for XID {xid} with unexpected key {key}\"\n                );\n\n                transaction_id_set_status(xid, pg_constants::TRANSACTION_STATUS_ABORTED, page);\n            }\n        }\n        NeonWalRecord::MultixactOffsetCreate { mid, moff } => {\n            let (slru_kind, segno, blknum) = key.to_slru_block().context(\"invalid record\")?;\n            assert_eq!(\n                slru_kind,\n                SlruKind::MultiXactOffsets,\n                \"MultixactOffsetCreate record with unexpected key {key}\"\n            );\n            // Compute the block and offset to modify.\n            // See RecordNewMultiXact in PostgreSQL sources.\n            let pageno = mid / pg_constants::MULTIXACT_OFFSETS_PER_PAGE as u32;\n            let entryno = mid % pg_constants::MULTIXACT_OFFSETS_PER_PAGE as u32;\n            let offset = (entryno * 4) as usize;\n\n            // Check that we're modifying the correct multixact-offsets block.\n            let expected_segno = pageno / pg_constants::SLRU_PAGES_PER_SEGMENT;\n            let expected_blknum = pageno % pg_constants::SLRU_PAGES_PER_SEGMENT;\n            assert!(\n                segno == expected_segno,\n                \"MultiXactOffsetsCreate record for multi-xid {mid} with unexpected key {key}\"\n            );\n            assert!(\n                blknum == expected_blknum,\n                \"MultiXactOffsetsCreate record for multi-xid {mid} with unexpected key {key}\"\n            );\n\n            LittleEndian::write_u32(&mut page[offset..offset + 4], *moff);\n        }\n        NeonWalRecord::MultixactMembersCreate { moff, members } => {\n            let (slru_kind, segno, blknum) = key.to_slru_block().context(\"invalid record\")?;\n            assert_eq!(\n                slru_kind,\n                SlruKind::MultiXactMembers,\n                \"MultixactMembersCreate record with unexpected key {key}\"\n            );\n            for (i, member) in members.iter().enumerate() {\n                let offset = moff + i as u32;\n\n                // Compute the block and offset to modify.\n                // See RecordNewMultiXact in PostgreSQL sources.\n                let pageno = offset / pg_constants::MULTIXACT_MEMBERS_PER_PAGE as u32;\n                let memberoff = mx_offset_to_member_offset(offset);\n                let flagsoff = mx_offset_to_flags_offset(offset);\n                let bshift = mx_offset_to_flags_bitshift(offset);\n\n                // Check that we're modifying the correct multixact-members block.\n                let expected_segno = pageno / pg_constants::SLRU_PAGES_PER_SEGMENT;\n                let expected_blknum = pageno % pg_constants::SLRU_PAGES_PER_SEGMENT;\n                assert!(\n                    segno == expected_segno,\n                    \"MultiXactMembersCreate record for offset {moff} with unexpected key {key}\"\n                );\n                assert!(\n                    blknum == expected_blknum,\n                    \"MultiXactMembersCreate record for offset {moff} with unexpected key {key}\"\n                );\n\n                let mut flagsval = LittleEndian::read_u32(&page[flagsoff..flagsoff + 4]);\n                flagsval &= !(((1 << pg_constants::MXACT_MEMBER_BITS_PER_XACT) - 1) << bshift);\n                flagsval |= member.status << bshift;\n                LittleEndian::write_u32(&mut page[flagsoff..flagsoff + 4], flagsval);\n                LittleEndian::write_u32(&mut page[memberoff..memberoff + 4], member.xid);\n            }\n        }\n        NeonWalRecord::AuxFile { .. } => {\n            // No-op: this record will never be created in aux v2.\n            warn!(\"AuxFile record should not be created in aux v2\");\n        }\n        #[cfg(feature = \"testing\")]\n        NeonWalRecord::Test {\n            append,\n            clear,\n            will_init,\n            only_if,\n        } => {\n            use bytes::BufMut;\n            if *will_init {\n                assert!(*clear, \"init record must be clear to ensure correctness\");\n                assert!(\n                    page.is_empty(),\n                    \"init record must be the first entry to ensure correctness\"\n                );\n            }\n            if *clear {\n                page.clear();\n            }\n            if let Some(only_if) = only_if {\n                if page != only_if.as_bytes() {\n                    return Err(anyhow::anyhow!(\n                        \"the current image does not match the expected image, cannot append\"\n                    ));\n                }\n            }\n            page.put_slice(append.as_bytes());\n        }\n    }\n    Ok(())\n}\n"
  },
  {
    "path": "pageserver/src/walredo/process/no_leak_child.rs",
    "content": "use std::io;\nuse std::ops::{Deref, DerefMut};\nuse std::process::{Child, Command};\n\nuse pageserver_api::shard::TenantShardId;\nuse tracing::{error, info, instrument};\n\nuse crate::metrics::{WAL_REDO_PROCESS_COUNTERS, WalRedoKillCause};\n\n/// Wrapper type around `std::process::Child` which guarantees that the child\n/// will be killed and waited-for by this process before being dropped.\npub(crate) struct NoLeakChild {\n    pub(crate) tenant_id: TenantShardId,\n    pub(crate) child: Option<Child>,\n}\n\nimpl Deref for NoLeakChild {\n    type Target = Child;\n\n    fn deref(&self) -> &Self::Target {\n        self.child.as_ref().expect(\"must not use from drop\")\n    }\n}\n\nimpl DerefMut for NoLeakChild {\n    fn deref_mut(&mut self) -> &mut Self::Target {\n        self.child.as_mut().expect(\"must not use from drop\")\n    }\n}\n\nimpl NoLeakChild {\n    pub(crate) fn spawn(tenant_id: TenantShardId, command: &mut Command) -> io::Result<Self> {\n        let child = command.spawn()?;\n        Ok(NoLeakChild {\n            tenant_id,\n            child: Some(child),\n        })\n    }\n\n    pub(crate) fn kill_and_wait(mut self, cause: WalRedoKillCause) {\n        let child = match self.child.take() {\n            Some(child) => child,\n            None => return,\n        };\n        Self::kill_and_wait_impl(child, cause);\n    }\n\n    #[instrument(skip_all, fields(pid=child.id(), ?cause))]\n    pub(crate) fn kill_and_wait_impl(mut child: Child, cause: WalRedoKillCause) {\n        scopeguard::defer! {\n            WAL_REDO_PROCESS_COUNTERS.killed_by_cause[cause].inc();\n        }\n        let res = child.kill();\n        if let Err(e) = res {\n            // This branch is very unlikely because:\n            // - We (= pageserver) spawned this process successfully, so, we're allowed to kill it.\n            // - This is the only place that calls .kill()\n            // - We consume `self`, so, .kill() can't be called twice.\n            // - If the process exited by itself or was killed by someone else,\n            //   .kill() will still succeed because we haven't wait()'ed yet.\n            //\n            // So, if we arrive here, we have really no idea what happened,\n            // whether the PID stored in self.child is still valid, etc.\n            // If this function were fallible, we'd return an error, but\n            // since it isn't, all we can do is log an error and proceed\n            // with the wait().\n            error!(error = %e, \"failed to SIGKILL; subsequent wait() might fail or wait for wrong process\");\n        }\n\n        match child.wait() {\n            Ok(exit_status) => {\n                info!(exit_status = %exit_status, \"wait successful\");\n            }\n            Err(e) => {\n                error!(error = %e, \"wait error; might leak the child process; it will show as zombie (defunct)\");\n            }\n        }\n    }\n}\n\nimpl Drop for NoLeakChild {\n    fn drop(&mut self) {\n        let child = match self.child.take() {\n            Some(child) => child,\n            None => return,\n        };\n        let tenant_shard_id = self.tenant_id;\n        // Offload the kill+wait of the child process into the background.\n        // If someone stops the runtime, we'll leak the child process.\n        // We can ignore that case because we only stop the runtime on pageserver exit.\n        tokio::runtime::Handle::current().spawn(async move {\n            tokio::task::spawn_blocking(move || {\n                // Intentionally don't inherit the tracing context from whoever is dropping us.\n                // This thread here is going to outlive of our dropper.\n                let span = tracing::info_span!(\n                    \"walredo\",\n                    tenant_id = %tenant_shard_id.tenant_id,\n                    shard_id = %tenant_shard_id.shard_slug()\n                );\n                let _entered = span.enter();\n                Self::kill_and_wait_impl(child, WalRedoKillCause::NoLeakChildDrop);\n            })\n            .await\n        });\n    }\n}\n\npub(crate) trait NoLeakChildCommandExt {\n    fn spawn_no_leak_child(&mut self, tenant_id: TenantShardId) -> io::Result<NoLeakChild>;\n}\n\nimpl NoLeakChildCommandExt for Command {\n    fn spawn_no_leak_child(&mut self, tenant_id: TenantShardId) -> io::Result<NoLeakChild> {\n        NoLeakChild::spawn(tenant_id, self)\n    }\n}\n"
  },
  {
    "path": "pageserver/src/walredo/process/protocol.rs",
    "content": "use bytes::BufMut;\nuse pageserver_api::reltag::RelTag;\nuse serde::Serialize;\nuse utils::bin_ser::BeSer;\nuse utils::lsn::Lsn;\n\n///\n/// `RelTag` + block number (`blknum`) gives us a unique id of the page in the cluster.\n///\n/// In Postgres `BufferTag` structure is used for exactly the same purpose.\n/// [See more related comments here](https://github.com/postgres/postgres/blob/99c5852e20a0987eca1c38ba0c09329d4076b6a0/src/include/storage/buf_internals.h#L91).\n///\n#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy, Serialize)]\npub(crate) struct BufferTag {\n    pub rel: RelTag,\n    pub blknum: u32,\n}\n\npub(crate) fn build_begin_redo_for_block_msg(tag: BufferTag, buf: &mut Vec<u8>) {\n    let len = 4 + 1 + 4 * 4;\n\n    buf.put_u8(b'B');\n    buf.put_u32(len as u32);\n\n    tag.ser_into(buf)\n        .expect(\"serialize BufferTag should always succeed\");\n}\n\npub(crate) fn build_push_page_msg(tag: BufferTag, base_img: &[u8], buf: &mut Vec<u8>) {\n    assert!(base_img.len() == 8192);\n\n    let len = 4 + 1 + 4 * 4 + base_img.len();\n\n    buf.put_u8(b'P');\n    buf.put_u32(len as u32);\n    tag.ser_into(buf)\n        .expect(\"serialize BufferTag should always succeed\");\n    buf.put(base_img);\n}\n\npub(crate) fn build_apply_record_msg(endlsn: Lsn, rec: &[u8], buf: &mut Vec<u8>) {\n    let len = 4 + 8 + rec.len();\n\n    buf.put_u8(b'A');\n    buf.put_u32(len as u32);\n    buf.put_u64(endlsn.0);\n    buf.put(rec);\n}\n\npub(crate) fn build_get_page_msg(tag: BufferTag, buf: &mut Vec<u8>) {\n    let len = 4 + 1 + 4 * 4;\n\n    buf.put_u8(b'G');\n    buf.put_u32(len as u32);\n    tag.ser_into(buf)\n        .expect(\"serialize BufferTag should always succeed\");\n}\n\npub(crate) fn build_ping_msg(buf: &mut Vec<u8>) {\n    buf.put_u8(b'H');\n    buf.put_u32(4);\n}\n"
  },
  {
    "path": "pageserver/src/walredo/process.rs",
    "content": "mod no_leak_child;\n/// The IPC protocol that pageserver and walredo process speak over their shared pipe.\nmod protocol;\n\nuse std::collections::VecDeque;\nuse std::process::{Command, Stdio};\n#[cfg(feature = \"testing\")]\nuse std::sync::atomic::AtomicUsize;\nuse std::time::Duration;\n\nuse anyhow::Context;\nuse bytes::Bytes;\nuse pageserver_api::reltag::RelTag;\nuse pageserver_api::shard::TenantShardId;\nuse postgres_ffi::{BLCKSZ, PgMajorVersion};\nuse tokio::io::{AsyncReadExt, AsyncWriteExt};\nuse tracing::{Instrument, debug, error, instrument};\nuse utils::lsn::Lsn;\nuse utils::poison::Poison;\nuse wal_decoder::models::record::NeonWalRecord;\n\nuse self::no_leak_child::NoLeakChild;\nuse crate::config::PageServerConf;\nuse crate::metrics::{WAL_REDO_PROCESS_COUNTERS, WAL_REDO_RECORD_COUNTER, WalRedoKillCause};\nuse crate::page_cache::PAGE_SZ;\nuse crate::span::debug_assert_current_span_has_tenant_id;\n\npub struct WalRedoProcess {\n    #[allow(dead_code)]\n    conf: &'static PageServerConf,\n    #[cfg(feature = \"testing\")]\n    tenant_shard_id: TenantShardId,\n    // Some() on construction, only becomes None on Drop.\n    child: Option<NoLeakChild>,\n    stdout: tokio::sync::Mutex<Poison<ProcessOutput>>,\n    stdin: tokio::sync::Mutex<Poison<ProcessInput>>,\n    /// Counter to separate same sized walredo inputs failing at the same millisecond.\n    #[cfg(feature = \"testing\")]\n    dump_sequence: AtomicUsize,\n}\n\nstruct ProcessInput {\n    stdin: tokio::process::ChildStdin,\n    n_requests: usize,\n}\n\nstruct ProcessOutput {\n    stdout: tokio::process::ChildStdout,\n    pending_responses: VecDeque<Option<Bytes>>,\n    n_processed_responses: usize,\n}\n\nimpl WalRedoProcess {\n    //\n    // Start postgres binary in special WAL redo mode.\n    //\n    #[instrument(skip_all,fields(pg_version=pg_version.major_version_num()))]\n    pub(crate) fn launch(\n        conf: &'static PageServerConf,\n        tenant_shard_id: TenantShardId,\n        pg_version: PgMajorVersion,\n    ) -> anyhow::Result<Self> {\n        crate::span::debug_assert_current_span_has_tenant_id();\n\n        let pg_bin_dir_path = conf.pg_bin_dir(pg_version).context(\"pg_bin_dir\")?; // TODO these should be infallible.\n        let pg_lib_dir_path = conf.pg_lib_dir(pg_version).context(\"pg_lib_dir\")?;\n\n        use no_leak_child::NoLeakChildCommandExt;\n        // Start postgres itself\n        let child = Command::new(pg_bin_dir_path.join(\"postgres\"))\n            // the first arg must be --wal-redo so the child process enters into walredo mode\n            .arg(\"--wal-redo\")\n            // the child doesn't process this arg, but, having it in the argv helps indentify the\n            // walredo process for a particular tenant when debugging a pagserver\n            .args([\"--tenant-shard-id\", &format!(\"{tenant_shard_id}\")])\n            .stdin(Stdio::piped())\n            .stderr(Stdio::piped())\n            .stdout(Stdio::piped())\n            .env_clear()\n            .env(\"LD_LIBRARY_PATH\", &pg_lib_dir_path)\n            .env(\"DYLD_LIBRARY_PATH\", &pg_lib_dir_path)\n            .env(\n                \"ASAN_OPTIONS\",\n                std::env::var(\"ASAN_OPTIONS\").unwrap_or_default(),\n            )\n            .env(\n                \"UBSAN_OPTIONS\",\n                std::env::var(\"UBSAN_OPTIONS\").unwrap_or_default(),\n            )\n            // NB: The redo process is not trusted after we sent it the first\n            // walredo work. Before that, it is trusted. Specifically, we trust\n            // it to\n            // 1. close all file descriptors except stdin, stdout, stderr because\n            //    pageserver might not be 100% diligent in setting FD_CLOEXEC on all\n            //    the files it opens, and\n            // 2. to use seccomp to sandbox itself before processing the first\n            //    walredo request.\n            .spawn_no_leak_child(tenant_shard_id)\n            .context(\"spawn process\")?;\n        WAL_REDO_PROCESS_COUNTERS.started.inc();\n        let mut child = scopeguard::guard(child, |child| {\n            error!(\"killing wal-redo-postgres process due to a problem during launch\");\n            child.kill_and_wait(WalRedoKillCause::Startup);\n        });\n\n        let stdin = child.stdin.take().unwrap();\n        let stdout = child.stdout.take().unwrap();\n        let stderr = child.stderr.take().unwrap();\n        let stderr = tokio::process::ChildStderr::from_std(stderr)\n            .context(\"convert to tokio::ChildStderr\")?;\n        let stdin =\n            tokio::process::ChildStdin::from_std(stdin).context(\"convert to tokio::ChildStdin\")?;\n        let stdout = tokio::process::ChildStdout::from_std(stdout)\n            .context(\"convert to tokio::ChildStdout\")?;\n\n        // all fallible operations post-spawn are complete, so get rid of the guard\n        let child = scopeguard::ScopeGuard::into_inner(child);\n\n        tokio::spawn(\n            async move {\n                scopeguard::defer! {\n                    debug!(\"wal-redo-postgres stderr_logger_task finished\");\n                    crate::metrics::WAL_REDO_PROCESS_COUNTERS.active_stderr_logger_tasks_finished.inc();\n                }\n                debug!(\"wal-redo-postgres stderr_logger_task started\");\n                crate::metrics::WAL_REDO_PROCESS_COUNTERS.active_stderr_logger_tasks_started.inc();\n\n                use tokio::io::AsyncBufReadExt;\n                let mut stderr_lines = tokio::io::BufReader::new(stderr);\n                let mut buf = Vec::new();\n                let res = loop {\n                    buf.clear();\n                    // TODO we don't trust the process to cap its stderr length.\n                    // Currently it can do unbounded Vec allocation.\n                    match stderr_lines.read_until(b'\\n', &mut buf).await {\n                        Ok(0) => break Ok(()), // eof\n                        Ok(num_bytes) => {\n                            let output = String::from_utf8_lossy(&buf[..num_bytes]);\n                            if !output.contains(\"LOG:\") {\n                               error!(%output, \"received output\");\n                            }\n                        }\n                        Err(e) => {\n                            break Err(e);\n                        }\n                    }\n                };\n                match res {\n                    Ok(()) => (),\n                    Err(e) => {\n                        error!(error=?e, \"failed to read from walredo stderr\");\n                    }\n                }\n            }.instrument(tracing::info_span!(parent: None, \"wal-redo-postgres-stderr\", pid = child.id(), tenant_id = %tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug(), %pg_version))\n        );\n\n        Ok(Self {\n            conf,\n            #[cfg(feature = \"testing\")]\n            tenant_shard_id,\n            child: Some(child),\n            stdin: tokio::sync::Mutex::new(Poison::new(\n                \"stdin\",\n                ProcessInput {\n                    stdin,\n                    n_requests: 0,\n                },\n            )),\n            stdout: tokio::sync::Mutex::new(Poison::new(\n                \"stdout\",\n                ProcessOutput {\n                    stdout,\n                    pending_responses: VecDeque::new(),\n                    n_processed_responses: 0,\n                },\n            )),\n            #[cfg(feature = \"testing\")]\n            dump_sequence: AtomicUsize::default(),\n        })\n    }\n\n    pub(crate) fn id(&self) -> u32 {\n        self.child\n            .as_ref()\n            .expect(\"must not call this during Drop\")\n            .id()\n    }\n\n    /// Apply given WAL records ('records') over an old page image. Returns\n    /// new page image.\n    ///\n    /// # Cancel-Safety\n    ///\n    /// Cancellation safe.\n    #[instrument(skip_all, fields(pid=%self.id()))]\n    pub(crate) async fn apply_wal_records(\n        &self,\n        rel: RelTag,\n        blknum: u32,\n        base_img: &Option<Bytes>,\n        records: &[(Lsn, NeonWalRecord)],\n        wal_redo_timeout: Duration,\n    ) -> anyhow::Result<Bytes> {\n        debug_assert_current_span_has_tenant_id();\n\n        let tag = protocol::BufferTag { rel, blknum };\n\n        // Serialize all the messages to send the WAL redo process first.\n        //\n        // This could be problematic if there are millions of records to replay,\n        // but in practice the number of records is usually so small that it doesn't\n        // matter, and it's better to keep this code simple.\n        //\n        // Most requests start with a before-image with BLCKSZ bytes, followed by\n        // by some other WAL records. Start with a buffer that can hold that\n        // comfortably.\n        let mut writebuf: Vec<u8> = Vec::with_capacity((BLCKSZ as usize) * 3);\n        protocol::build_begin_redo_for_block_msg(tag, &mut writebuf);\n        if let Some(img) = base_img {\n            protocol::build_push_page_msg(tag, img, &mut writebuf);\n        }\n        for (lsn, rec) in records.iter() {\n            if let NeonWalRecord::Postgres {\n                will_init: _,\n                rec: postgres_rec,\n            } = rec\n            {\n                protocol::build_apply_record_msg(*lsn, postgres_rec, &mut writebuf);\n            } else {\n                anyhow::bail!(\"tried to pass neon wal record to postgres WAL redo\");\n            }\n        }\n        protocol::build_get_page_msg(tag, &mut writebuf);\n        WAL_REDO_RECORD_COUNTER.inc_by(records.len() as u64);\n\n        let Ok(res) =\n            tokio::time::timeout(wal_redo_timeout, self.apply_wal_records0(&writebuf)).await\n        else {\n            anyhow::bail!(\"WAL redo timed out\");\n        };\n\n        if res.is_err() {\n            // not all of these can be caused by this particular input, however these are so rare\n            // in tests so capture all.\n            self.record_and_log(&writebuf);\n        }\n\n        res\n    }\n\n    /// Do a ping request-response roundtrip.\n    ///\n    /// Not used in production, but by Rust benchmarks.\n    pub(crate) async fn ping(&self, timeout: Duration) -> anyhow::Result<()> {\n        let mut writebuf: Vec<u8> = Vec::with_capacity(4);\n        protocol::build_ping_msg(&mut writebuf);\n        let Ok(res) = tokio::time::timeout(timeout, self.apply_wal_records0(&writebuf)).await\n        else {\n            anyhow::bail!(\"WAL redo ping timed out\");\n        };\n        let response = res?;\n        if response.len() != PAGE_SZ {\n            anyhow::bail!(\n                \"WAL redo ping response should respond with page-sized response: {}\",\n                response.len()\n            );\n        }\n        Ok(())\n    }\n\n    /// # Cancel-Safety\n    ///\n    /// When not polled to completion (e.g. because in `tokio::select!` another\n    /// branch becomes ready before this future), concurrent and subsequent\n    /// calls may fail due to [`utils::poison::Poison::check_and_arm`] calls.\n    /// Dispose of this process instance and create a new one.\n    async fn apply_wal_records0(&self, writebuf: &[u8]) -> anyhow::Result<Bytes> {\n        let request_no = {\n            let mut lock_guard = self.stdin.lock().await;\n            let mut poison_guard = lock_guard.check_and_arm()?;\n            let input = poison_guard.data_mut();\n            input\n                .stdin\n                .write_all(writebuf)\n                .await\n                .context(\"write to walredo stdin\")?;\n            let request_no = input.n_requests;\n            input.n_requests += 1;\n            poison_guard.disarm();\n            request_no\n        };\n\n        // To improve walredo performance we separate sending requests and receiving\n        // responses. Them are protected by different mutexes (output and input).\n        // If thread T1, T2, T3 send requests D1, D2, D3 to walredo process\n        // then there is not warranty that T1 will first granted output mutex lock.\n        // To address this issue we maintain number of sent requests, number of processed\n        // responses and ring buffer with pending responses. After sending response\n        // (under input mutex), threads remembers request number. Then it releases\n        // input mutex, locks output mutex and fetch in ring buffer all responses until\n        // its stored request number. The it takes correspondent element from\n        // pending responses ring buffer and truncate all empty elements from the front,\n        // advancing processed responses number.\n\n        let mut lock_guard = self.stdout.lock().await;\n        let mut poison_guard = lock_guard.check_and_arm()?;\n        let output = poison_guard.data_mut();\n        let n_processed_responses = output.n_processed_responses;\n        while n_processed_responses + output.pending_responses.len() <= request_no {\n            // We expect the WAL redo process to respond with an 8k page image. We read it\n            // into this buffer.\n            let mut resultbuf = vec![0; BLCKSZ.into()];\n            output\n                .stdout\n                .read_exact(&mut resultbuf)\n                .await\n                .context(\"read walredo stdout\")?;\n            output\n                .pending_responses\n                .push_back(Some(Bytes::from(resultbuf)));\n        }\n        // Replace our request's response with None in `pending_responses`.\n        // Then make space in the ring buffer by clearing out any seqence of contiguous\n        // `None`'s from the front of `pending_responses`.\n        // NB: We can't pop_front() because other requests' responses because another\n        // requester might have grabbed the output mutex before us:\n        // T1: grab input mutex\n        // T1: send request_no 23\n        // T1: release input mutex\n        // T2: grab input mutex\n        // T2: send request_no 24\n        // T2: release input mutex\n        // T2: grab output mutex\n        // T2: n_processed_responses + output.pending_responses.len() <= request_no\n        //            23                                0                   24\n        // T2: enters poll loop that reads stdout\n        // T2: put response for 23 into pending_responses\n        // T2: put response for 24 into pending_resposnes\n        // pending_responses now looks like this: Front Some(response_23) Some(response_24) Back\n        // T2: takes its response_24\n        // pending_responses now looks like this: Front Some(response_23) None Back\n        // T2: does the while loop below\n        // pending_responses now looks like this: Front Some(response_23) None Back\n        // T2: releases output mutex\n        // T1: grabs output mutex\n        // T1: n_processed_responses + output.pending_responses.len() > request_no\n        //            23                                2                   23\n        // T1: skips poll loop that reads stdout\n        // T1: takes its response_23\n        // pending_responses now looks like this: Front None None Back\n        // T2: does the while loop below\n        // pending_responses now looks like this: Front Back\n        // n_processed_responses now has value 25\n        let res = output.pending_responses[request_no - n_processed_responses]\n            .take()\n            .expect(\"we own this request_no, nobody else is supposed to take it\");\n        while let Some(front) = output.pending_responses.front() {\n            if front.is_none() {\n                output.pending_responses.pop_front();\n                output.n_processed_responses += 1;\n            } else {\n                break;\n            }\n        }\n        poison_guard.disarm();\n        Ok(res)\n    }\n\n    #[cfg(feature = \"testing\")]\n    fn record_and_log(&self, writebuf: &[u8]) {\n        use std::sync::atomic::Ordering;\n\n        let millis = std::time::SystemTime::now()\n            .duration_since(std::time::SystemTime::UNIX_EPOCH)\n            .unwrap()\n            .as_millis();\n\n        let seq = self.dump_sequence.fetch_add(1, Ordering::Relaxed);\n\n        // these files will be collected to an allure report\n        let filename = format!(\"walredo-{millis}-{}-{seq}.walredo\", writebuf.len());\n\n        let path = self.conf.tenant_path(&self.tenant_shard_id).join(&filename);\n\n        use std::io::Write;\n        let res = std::fs::OpenOptions::new()\n            .write(true)\n            .create_new(true)\n            .read(true)\n            .open(path)\n            .and_then(|mut f| f.write_all(writebuf));\n\n        // trip up allowed_errors\n        if let Err(e) = res {\n            tracing::error!(target=%filename, length=writebuf.len(), \"failed to write out the walredo errored input: {e}\");\n        } else {\n            tracing::error!(filename, \"erroring walredo input saved\");\n        }\n    }\n\n    #[cfg(not(feature = \"testing\"))]\n    fn record_and_log(&self, _: &[u8]) {}\n}\n\nimpl Drop for WalRedoProcess {\n    fn drop(&mut self) {\n        self.child\n            .take()\n            .expect(\"we only do this once\")\n            .kill_and_wait(WalRedoKillCause::WalRedoProcessDrop);\n        // no way to wait for stderr_logger_task from Drop because that is async only\n    }\n}\n"
  },
  {
    "path": "pageserver/src/walredo.rs",
    "content": "//!\n//! WAL redo. This service runs PostgreSQL in a special wal_redo mode\n//! to apply given WAL records over an old page image and return new\n//! page image.\n//!\n//! We rely on Postgres to perform WAL redo for us. We launch a\n//! postgres process in special \"wal redo\" mode that's similar to\n//! single-user mode. We then pass the previous page image, if any,\n//! and all the WAL records we want to apply, to the postgres\n//! process. Then we get the page image back. Communication with the\n//! postgres process happens via stdin/stdout\n//!\n//! See pgxn/neon_walredo/walredoproc.c for the other side of\n//! this communication.\n//!\n//! The Postgres process is assumed to be secure against malicious WAL\n//! records. It achieves it by dropping privileges before replaying\n//! any WAL records, so that even if an attacker hijacks the Postgres\n//! process, he cannot escape out of it.\n\n/// Process lifecycle and abstracction for the IPC protocol.\nmod process;\n\n/// Code to apply [`NeonWalRecord`]s.\npub(crate) mod apply_neon;\n\nuse std::future::Future;\nuse std::sync::Arc;\nuse std::time::{Duration, Instant};\n\nuse anyhow::Context;\nuse bytes::{Bytes, BytesMut};\nuse pageserver_api::key::Key;\nuse pageserver_api::models::{WalRedoManagerProcessStatus, WalRedoManagerStatus};\nuse pageserver_api::shard::TenantShardId;\nuse postgres_ffi::PgMajorVersion;\nuse tracing::*;\nuse utils::lsn::Lsn;\nuse utils::sync::gate::GateError;\nuse utils::sync::heavier_once_cell;\nuse wal_decoder::models::record::NeonWalRecord;\n\nuse crate::config::PageServerConf;\nuse crate::metrics::{\n    WAL_REDO_BYTES_HISTOGRAM, WAL_REDO_PROCESS_LAUNCH_DURATION_HISTOGRAM,\n    WAL_REDO_RECORDS_HISTOGRAM, WAL_REDO_TIME,\n};\n\n/// The real implementation that uses a Postgres process to\n/// perform WAL replay.\n///\n/// Only one thread can use the process at a time, that is controlled by the\n/// Mutex. In the future, we might want to launch a pool of processes to allow\n/// concurrent replay of multiple records.\npub struct PostgresRedoManager {\n    tenant_shard_id: TenantShardId,\n    conf: &'static PageServerConf,\n    last_redo_at: std::sync::Mutex<Option<Instant>>,\n    /// We use [`heavier_once_cell`] for\n    ///\n    /// 1. coalescing the lazy spawning of walredo processes ([`ProcessOnceCell::Spawned`])\n    /// 2. prevent new processes from being spawned on [`Self::shutdown`] (=> [`ProcessOnceCell::ManagerShutDown`]).\n    ///\n    /// # Spawning\n    ///\n    /// Redo requests use the once cell to coalesce onto one call to [`process::WalRedoProcess::launch`].\n    ///\n    /// Notably, requests don't use the [`heavier_once_cell::Guard`] to keep ahold of the\n    /// their process object; we use [`Arc::clone`] for that.\n    ///\n    /// This is primarily because earlier implementations that didn't  use [`heavier_once_cell`]\n    /// had that behavior; it's probably unnecessary.\n    /// The only merit of it is that if one walredo process encounters an error,\n    /// it can take it out of rotation (= using [`heavier_once_cell::Guard::take_and_deinit`].\n    /// and retry redo, thereby starting the new process, while other redo tasks might\n    /// still be using the old redo process. But, those other tasks will most likely\n    /// encounter an error as well, and errors are an unexpected condition anyway.\n    /// So, probably we could get rid of the `Arc` in the future.\n    ///\n    /// # Shutdown\n    ///\n    /// See [`Self::launched_processes`].\n    redo_process: heavier_once_cell::OnceCell<ProcessOnceCell>,\n\n    /// Gate that is entered when launching a walredo process and held open\n    /// until the process has been `kill()`ed and `wait()`ed upon.\n    ///\n    /// Manager shutdown waits for this gate to close after setting the\n    /// [`ProcessOnceCell::ManagerShutDown`] state in [`Self::redo_process`].\n    ///\n    /// This type of usage is a bit unusual because gates usually keep track of\n    /// concurrent operations, e.g., every [`Self::request_redo`] that is inflight.\n    /// But we use it here to keep track of the _processes_ that we have launched,\n    /// which may outlive any individual redo request because\n    /// - we keep walredo process around until its quiesced to amortize spawn cost and\n    /// - the Arc may be held by multiple concurrent redo requests, so, just because\n    ///   you replace the [`Self::redo_process`] cell's content doesn't mean the\n    ///   process gets killed immediately.\n    ///\n    /// We could simplify this by getting rid of the [`Arc`].\n    /// See the comment on [`Self::redo_process`] for more details.\n    launched_processes: utils::sync::gate::Gate,\n}\n\n/// See [`PostgresRedoManager::redo_process`].\nenum ProcessOnceCell {\n    Spawned(Arc<Process>),\n    ManagerShutDown,\n}\n\nstruct Process {\n    process: process::WalRedoProcess,\n    /// This field is last in this struct so the guard gets dropped _after_ [`Self::process`].\n    /// (Reminder: dropping [`Self::process`] synchronously sends SIGKILL and then `wait()`s for it to exit).\n    _launched_processes_guard: utils::sync::gate::GateGuard,\n}\n\nimpl std::ops::Deref for Process {\n    type Target = process::WalRedoProcess;\n\n    fn deref(&self) -> &Self::Target {\n        &self.process\n    }\n}\n\n#[derive(Debug, thiserror::Error)]\npub enum Error {\n    #[error(\"cancelled\")]\n    Cancelled,\n    #[error(transparent)]\n    Other(#[from] anyhow::Error),\n}\n\nmacro_rules! bail {\n    ($($arg:tt)*) => {\n        return Err($crate::walredo::Error::Other(::anyhow::anyhow!($($arg)*)));\n    }\n}\n\n#[derive(Debug, Clone, Copy)]\npub enum RedoAttemptType {\n    /// Used for the read path. Will fire critical errors and retry twice if failure.\n    ReadPage,\n    // Used for legacy compaction (only used in image compaction). Will fire critical errors and retry once if failure.\n    LegacyCompaction,\n    // Used for gc compaction. Will not fire critical errors and not retry.\n    GcCompaction,\n}\n\nimpl std::fmt::Display for RedoAttemptType {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        match self {\n            RedoAttemptType::ReadPage => write!(f, \"read page\"),\n            RedoAttemptType::LegacyCompaction => write!(f, \"legacy compaction\"),\n            RedoAttemptType::GcCompaction => write!(f, \"gc compaction\"),\n        }\n    }\n}\n\n///\n/// Public interface of WAL redo manager\n///\nimpl PostgresRedoManager {\n    ///\n    /// Request the WAL redo manager to apply some WAL records\n    ///\n    /// The WAL redo is handled by a separate thread, so this just sends a request\n    /// to the thread and waits for response.\n    ///\n    /// # Cancel-Safety\n    ///\n    /// This method is cancellation-safe.\n    pub async fn request_redo(\n        &self,\n        key: Key,\n        lsn: Lsn,\n        base_img: Option<(Lsn, Bytes)>,\n        records: Vec<(Lsn, NeonWalRecord)>,\n        pg_version: PgMajorVersion,\n        redo_attempt_type: RedoAttemptType,\n    ) -> Result<Bytes, Error> {\n        if records.is_empty() {\n            bail!(\"invalid WAL redo request with no records\");\n        }\n\n        let max_retry_attempts = match redo_attempt_type {\n            RedoAttemptType::ReadPage => 2,\n            RedoAttemptType::LegacyCompaction => 1,\n            RedoAttemptType::GcCompaction => 0,\n        };\n\n        let base_img_lsn = base_img.as_ref().map(|p| p.0).unwrap_or(Lsn::INVALID);\n        let mut img = base_img.map(|p| p.1);\n        let mut batch_neon = apply_neon::can_apply_in_neon(&records[0].1);\n        let mut batch_start = 0;\n        for (i, record) in records.iter().enumerate().skip(1) {\n            let rec_neon = apply_neon::can_apply_in_neon(&record.1);\n\n            if rec_neon != batch_neon {\n                let result = if batch_neon {\n                    self.apply_batch_neon(key, lsn, img, &records[batch_start..i])\n                } else {\n                    self.apply_batch_postgres(\n                        key,\n                        lsn,\n                        img,\n                        base_img_lsn,\n                        &records[batch_start..i],\n                        self.conf.wal_redo_timeout,\n                        pg_version,\n                        max_retry_attempts,\n                        redo_attempt_type,\n                    )\n                    .await\n                };\n                img = Some(result?);\n\n                batch_neon = rec_neon;\n                batch_start = i;\n            }\n        }\n        // last batch\n        if batch_neon {\n            self.apply_batch_neon(key, lsn, img, &records[batch_start..])\n        } else {\n            self.apply_batch_postgres(\n                key,\n                lsn,\n                img,\n                base_img_lsn,\n                &records[batch_start..],\n                self.conf.wal_redo_timeout,\n                pg_version,\n                max_retry_attempts,\n                redo_attempt_type,\n            )\n            .await\n        }\n    }\n\n    /// Do a ping request-response roundtrip.\n    ///\n    /// Not used in production, but by Rust benchmarks.\n    ///\n    /// # Cancel-Safety\n    ///\n    /// This method is cancellation-safe.\n    pub async fn ping(&self, pg_version: PgMajorVersion) -> Result<(), Error> {\n        self.do_with_walredo_process(pg_version, |proc| async move {\n            proc.ping(Duration::from_secs(1))\n                .await\n                .map_err(Error::Other)\n        })\n        .await\n    }\n\n    pub fn status(&self) -> WalRedoManagerStatus {\n        WalRedoManagerStatus {\n            last_redo_at: {\n                let at = *self.last_redo_at.lock().unwrap();\n                at.and_then(|at| {\n                    let age = at.elapsed();\n                    // map any chrono errors silently to None here\n                    chrono::Utc::now().checked_sub_signed(chrono::Duration::from_std(age).ok()?)\n                })\n            },\n            process: self.redo_process.get().and_then(|p| match &*p {\n                ProcessOnceCell::Spawned(p) => Some(WalRedoManagerProcessStatus { pid: p.id() }),\n                ProcessOnceCell::ManagerShutDown => None,\n            }),\n        }\n    }\n}\n\nimpl PostgresRedoManager {\n    ///\n    /// Create a new PostgresRedoManager.\n    ///\n    pub fn new(\n        conf: &'static PageServerConf,\n        tenant_shard_id: TenantShardId,\n    ) -> PostgresRedoManager {\n        // The actual process is launched lazily, on first request.\n        PostgresRedoManager {\n            tenant_shard_id,\n            conf,\n            last_redo_at: std::sync::Mutex::default(),\n            redo_process: heavier_once_cell::OnceCell::default(),\n            launched_processes: utils::sync::gate::Gate::default(),\n        }\n    }\n\n    /// Shut down the WAL redo manager.\n    ///\n    /// Returns `true` if this call was the one that initiated shutdown.\n    /// `true` may be observed by no caller if the first caller stops polling.\n    ///\n    /// After this future completes\n    /// - no redo process is running\n    /// - no new redo process will be spawned\n    /// - redo requests that need walredo process will fail with [`Error::Cancelled`]\n    /// - [`apply_neon`]-only redo requests may still work, but this may change in the future\n    ///\n    /// # Cancel-Safety\n    ///\n    /// This method is cancellation-safe.\n    pub async fn shutdown(&self) -> bool {\n        // prevent new processes from being spawned\n        let maybe_permit = match self.redo_process.get_or_init_detached().await {\n            Ok(guard) => {\n                if matches!(&*guard, ProcessOnceCell::ManagerShutDown) {\n                    None\n                } else {\n                    let (proc, permit) = guard.take_and_deinit();\n                    drop(proc); // this just drops the Arc, its refcount may not be zero yet\n                    Some(permit)\n                }\n            }\n            Err(permit) => Some(permit),\n        };\n        let it_was_us = if let Some(permit) = maybe_permit {\n            self.redo_process\n                .set(ProcessOnceCell::ManagerShutDown, permit);\n            true\n        } else {\n            false\n        };\n        // wait for ongoing requests to drain and the refcounts of all Arc<WalRedoProcess> that\n        // we ever launched to drop to zero, which when it happens synchronously kill()s & wait()s\n        // for the underlying process.\n        self.launched_processes.close().await;\n        it_was_us\n    }\n\n    /// This type doesn't have its own background task to check for idleness: we\n    /// rely on our owner calling this function periodically in its own housekeeping\n    /// loops.\n    pub(crate) fn maybe_quiesce(&self, idle_timeout: Duration) {\n        if let Ok(g) = self.last_redo_at.try_lock() {\n            if let Some(last_redo_at) = *g {\n                if last_redo_at.elapsed() >= idle_timeout {\n                    drop(g);\n                    drop(self.redo_process.get().map(|guard| guard.take_and_deinit()));\n                }\n            }\n        }\n    }\n\n    /// # Cancel-Safety\n    ///\n    /// This method is cancel-safe iff `closure` is cancel-safe.\n    async fn do_with_walredo_process<\n        F: FnOnce(Arc<Process>) -> Fut,\n        Fut: Future<Output = Result<O, Error>>,\n        O,\n    >(\n        &self,\n        pg_version: PgMajorVersion,\n        closure: F,\n    ) -> Result<O, Error> {\n        let proc: Arc<Process> = match self.redo_process.get_or_init_detached().await {\n            Ok(guard) => match &*guard {\n                ProcessOnceCell::Spawned(proc) => Arc::clone(proc),\n                ProcessOnceCell::ManagerShutDown => {\n                    return Err(Error::Cancelled);\n                }\n            },\n            Err(permit) => {\n                let start = Instant::now();\n                // acquire guard before spawning process, so that we don't spawn new processes\n                // if the gate is already closed.\n                let _launched_processes_guard = match self.launched_processes.enter() {\n                    Ok(guard) => guard,\n                    Err(GateError::GateClosed) => unreachable!(\n                        \"shutdown sets the once cell to `ManagerShutDown` state before closing the gate\"\n                    ),\n                };\n                let proc = Arc::new(Process {\n                    process: process::WalRedoProcess::launch(\n                        self.conf,\n                        self.tenant_shard_id,\n                        pg_version,\n                    )\n                    .context(\"launch walredo process\")?,\n                    _launched_processes_guard,\n                });\n                let duration = start.elapsed();\n                WAL_REDO_PROCESS_LAUNCH_DURATION_HISTOGRAM.observe(duration.as_secs_f64());\n                info!(\n                    elapsed_ms = duration.as_millis(),\n                    pid = proc.id(),\n                    \"launched walredo process\"\n                );\n                self.redo_process\n                    .set(ProcessOnceCell::Spawned(Arc::clone(&proc)), permit);\n                proc\n            }\n        };\n\n        // async closures are unstable, would support &Process\n        let result = closure(proc.clone()).await;\n\n        if result.is_err() {\n            // Avoid concurrent callers hitting the same issue by taking `proc` out of the rotation.\n            // Note that there may be other tasks concurrent with us that also hold `proc`.\n            // We have to deal with that here.\n            // Also read the doc comment on field `self.redo_process`.\n            //\n            // NB: there may still be other concurrent threads using `proc`.\n            // The last one will send SIGKILL when the underlying Arc reaches refcount 0.\n            //\n            // NB: the drop impl blocks the dropping thread with a wait() system call for\n            // the child process. In some ways the blocking is actually good: if we\n            // deferred the waiting into the background / to tokio if we used `tokio::process`,\n            // it could happen that if walredo always fails immediately, we spawn processes faster\n            // than we can SIGKILL & `wait` for them to exit. By doing it the way we do here,\n            // we limit this risk of run-away to at most $num_runtimes * $num_executor_threads.\n            // This probably needs revisiting at some later point.\n            match self.redo_process.get() {\n                None => (),\n                Some(guard) => {\n                    match &*guard {\n                        ProcessOnceCell::ManagerShutDown => {}\n                        ProcessOnceCell::Spawned(guard_proc) => {\n                            if Arc::ptr_eq(&proc, guard_proc) {\n                                // We're the first to observe an error from `proc`, it's our job to take it out of rotation.\n                                guard.take_and_deinit();\n                            } else {\n                                // Another task already spawned another redo process (further up in this method)\n                                // and put it into `redo_process`. Do nothing, our view of the world is behind.\n                            }\n                        }\n                    }\n                }\n            }\n            // The last task that does this `drop()` of `proc` will do a blocking `wait()` syscall.\n            drop(proc);\n        }\n\n        result\n    }\n\n    ///\n    /// Process one request for WAL redo using wal-redo postgres\n    ///\n    /// # Cancel-Safety\n    ///\n    /// Cancellation safe.\n    #[allow(clippy::too_many_arguments)]\n    async fn apply_batch_postgres(\n        &self,\n        key: Key,\n        lsn: Lsn,\n        base_img: Option<Bytes>,\n        base_img_lsn: Lsn,\n        records: &[(Lsn, NeonWalRecord)],\n        wal_redo_timeout: Duration,\n        pg_version: PgMajorVersion,\n        max_retry_attempts: u32,\n        redo_attempt_type: RedoAttemptType,\n    ) -> Result<Bytes, Error> {\n        *(self.last_redo_at.lock().unwrap()) = Some(Instant::now());\n\n        let (rel, blknum) = key.to_rel_block().context(\"invalid record\")?;\n        let mut n_attempts = 0u32;\n        loop {\n            let base_img = &base_img;\n            let closure = |proc: Arc<Process>| async move {\n                let started_at = std::time::Instant::now();\n\n                // Relational WAL records are applied using wal-redo-postgres\n                let result = proc\n                    .apply_wal_records(rel, blknum, base_img, records, wal_redo_timeout)\n                    .await\n                    .context(\"apply_wal_records\");\n\n                let duration = started_at.elapsed();\n\n                let len = records.len();\n                let nbytes = records.iter().fold(0, |acumulator, record| {\n                    acumulator\n                        + match &record.1 {\n                            NeonWalRecord::Postgres { rec, .. } => rec.len(),\n                            _ => unreachable!(\"Only PostgreSQL records are accepted in this batch\"),\n                        }\n                });\n\n                WAL_REDO_TIME.observe(duration.as_secs_f64());\n                WAL_REDO_RECORDS_HISTOGRAM.observe(len as f64);\n                WAL_REDO_BYTES_HISTOGRAM.observe(nbytes as f64);\n\n                debug!(\n                    \"postgres applied {} WAL records ({} bytes) in {} us to reconstruct page image at LSN {}\",\n                    len,\n                    nbytes,\n                    duration.as_micros(),\n                    lsn\n                );\n\n                if let Err(e) = result.as_ref() {\n                    macro_rules! message {\n                        ($level:tt) => {\n                            $level!(\n                                \"error applying {} WAL records {}..{} ({} bytes) to key {} during {}, from base image with LSN {} to reconstruct page image at LSN {} n_attempts={}: {:?}\",\n                                records.len(),\n                                records.first().map(|p| p.0).unwrap_or(Lsn(0)),\n                                records.last().map(|p| p.0).unwrap_or(Lsn(0)),\n                                nbytes,\n                                key,\n                                redo_attempt_type,\n                                base_img_lsn,\n                                lsn,\n                                n_attempts,\n                                e,\n                            )\n                        }\n                    }\n                    match redo_attempt_type {\n                        RedoAttemptType::ReadPage => message!(error),\n                        RedoAttemptType::LegacyCompaction => message!(error),\n                        RedoAttemptType::GcCompaction => message!(warn),\n                    }\n                }\n\n                result.map_err(Error::Other)\n            };\n            let result = self.do_with_walredo_process(pg_version, closure).await;\n\n            if result.is_ok() && n_attempts != 0 {\n                info!(n_attempts, \"retried walredo succeeded\");\n            }\n            n_attempts += 1;\n            if n_attempts > max_retry_attempts || result.is_ok() {\n                return result;\n            }\n        }\n    }\n\n    ///\n    /// Process a batch of WAL records using bespoken Neon code.\n    ///\n    fn apply_batch_neon(\n        &self,\n        key: Key,\n        lsn: Lsn,\n        base_img: Option<Bytes>,\n        records: &[(Lsn, NeonWalRecord)],\n    ) -> Result<Bytes, Error> {\n        let start_time = Instant::now();\n\n        let mut page = BytesMut::new();\n        if let Some(fpi) = base_img {\n            // If full-page image is provided, then use it...\n            page.extend_from_slice(&fpi[..]);\n        } else {\n            // All the current WAL record types that we can handle require a base image.\n            bail!(\"invalid neon WAL redo request with no base image\");\n        }\n\n        // Apply all the WAL records in the batch\n        for (record_lsn, record) in records.iter() {\n            self.apply_record_neon(key, &mut page, *record_lsn, record)?;\n        }\n        // Success!\n        let duration = start_time.elapsed();\n        // FIXME: using the same metric here creates a bimodal distribution by default, and because\n        // there could be multiple batch sizes this would be N+1 modal.\n        WAL_REDO_TIME.observe(duration.as_secs_f64());\n\n        debug!(\n            \"neon applied {} WAL records in {} us to reconstruct page image at LSN {}\",\n            records.len(),\n            duration.as_micros(),\n            lsn\n        );\n\n        Ok(page.freeze())\n    }\n\n    fn apply_record_neon(\n        &self,\n        key: Key,\n        page: &mut BytesMut,\n        record_lsn: Lsn,\n        record: &NeonWalRecord,\n    ) -> anyhow::Result<()> {\n        apply_neon::apply_in_neon(record, record_lsn, key, page)?;\n\n        Ok(())\n    }\n}\n\n#[cfg(test)]\npub(crate) mod harness {\n    use super::PostgresRedoManager;\n    use crate::config::PageServerConf;\n    use utils::{id::TenantId, shard::TenantShardId};\n\n    pub struct RedoHarness {\n        // underscored because unused, except for removal at drop\n        _repo_dir: camino_tempfile::Utf8TempDir,\n        pub manager: PostgresRedoManager,\n        tenant_shard_id: TenantShardId,\n    }\n\n    impl RedoHarness {\n        pub fn new() -> anyhow::Result<Self> {\n            crate::tenant::harness::setup_logging();\n\n            let repo_dir = camino_tempfile::tempdir()?;\n            let conf = PageServerConf::dummy_conf(repo_dir.path().to_path_buf());\n            let conf = Box::leak(Box::new(conf));\n            let tenant_shard_id = TenantShardId::unsharded(TenantId::generate());\n\n            let manager = PostgresRedoManager::new(conf, tenant_shard_id);\n\n            Ok(RedoHarness {\n                _repo_dir: repo_dir,\n                manager,\n                tenant_shard_id,\n            })\n        }\n        pub fn span(&self) -> tracing::Span {\n            tracing::info_span!(\"RedoHarness\", tenant_id=%self.tenant_shard_id.tenant_id, shard_id=%self.tenant_shard_id.shard_slug())\n        }\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use std::str::FromStr;\n\n    use bytes::Bytes;\n    use pageserver_api::key::Key;\n    use postgres_ffi::PgMajorVersion;\n    use tracing::Instrument;\n    use utils::lsn::Lsn;\n    use wal_decoder::models::record::NeonWalRecord;\n\n    use crate::walredo::RedoAttemptType;\n    use crate::walredo::harness::RedoHarness;\n\n    #[tokio::test]\n    async fn test_ping() {\n        let h = RedoHarness::new().unwrap();\n\n        h.manager\n            .ping(PgMajorVersion::PG14)\n            .instrument(h.span())\n            .await\n            .expect(\"ping should work\");\n    }\n\n    #[tokio::test]\n    async fn short_v14_redo() {\n        let expected = std::fs::read(\"test_data/short_v14_redo.page\").unwrap();\n\n        let h = RedoHarness::new().unwrap();\n\n        let page = h\n            .manager\n            .request_redo(\n                Key {\n                    field1: 0,\n                    field2: 1663,\n                    field3: 13010,\n                    field4: 1259,\n                    field5: 0,\n                    field6: 0,\n                },\n                Lsn::from_str(\"0/16E2408\").unwrap(),\n                None,\n                short_records(),\n                PgMajorVersion::PG14,\n                RedoAttemptType::ReadPage,\n            )\n            .instrument(h.span())\n            .await\n            .unwrap();\n\n        assert_eq!(&expected, &*page);\n    }\n\n    #[tokio::test]\n    async fn short_v14_fails_for_wrong_key_but_returns_zero_page() {\n        let h = RedoHarness::new().unwrap();\n\n        let page = h\n            .manager\n            .request_redo(\n                Key {\n                    field1: 0,\n                    field2: 1663,\n                    // key should be 13010\n                    field3: 13130,\n                    field4: 1259,\n                    field5: 0,\n                    field6: 0,\n                },\n                Lsn::from_str(\"0/16E2408\").unwrap(),\n                None,\n                short_records(),\n                PgMajorVersion::PG14,\n                RedoAttemptType::ReadPage,\n            )\n            .instrument(h.span())\n            .await\n            .unwrap();\n\n        // TODO: there will be some stderr printout, which is forwarded to tracing that could\n        // perhaps be captured as long as it's in the same thread.\n        assert_eq!(page, crate::ZERO_PAGE);\n    }\n\n    #[tokio::test]\n    async fn test_stderr() {\n        let h = RedoHarness::new().unwrap();\n        h\n            .manager\n            .request_redo(\n                Key::from_i128(0),\n                Lsn::INVALID,\n                None,\n                short_records(),\n                PgMajorVersion::PG16, /* 16 currently produces stderr output on startup, which adds a nice extra edge */\n                RedoAttemptType::ReadPage,\n            )\n            .instrument(h.span())\n            .await\n            .unwrap_err();\n    }\n\n    #[allow(clippy::octal_escapes)]\n    fn short_records() -> Vec<(Lsn, NeonWalRecord)> {\n        vec![\n            (\n                Lsn::from_str(\"0/16A9388\").unwrap(),\n                NeonWalRecord::Postgres {\n                    will_init: true,\n                    rec: Bytes::from_static(b\"j\\x03\\0\\0\\0\\x04\\0\\0\\xe8\\x7fj\\x01\\0\\0\\0\\0\\0\\n\\0\\0\\xd0\\x16\\x13Y\\0\\x10\\0\\04\\x03\\xd4\\0\\x05\\x7f\\x06\\0\\0\\xd22\\0\\0\\xeb\\x04\\0\\0\\0\\0\\0\\0\\xff\\x03\\0\\0\\0\\0\\x80\\xeca\\x01\\0\\0\\x01\\0\\xd4\\0\\xa0\\x1d\\0 \\x04 \\0\\0\\0\\0/\\0\\x01\\0\\xa0\\x9dX\\x01\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0.\\0\\x01\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\00\\x9f\\x9a\\x01P\\x9e\\xb2\\x01\\0\\x04\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\x02\\0!\\0\\x01\\x08 \\xff\\xff\\xff?\\0\\0\\0\\0\\0\\0@\\0\\0another_table\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\x98\\x08\\0\\0\\x02@\\0\\0\\0\\0\\0\\0\\n\\0\\0\\0\\x02\\0\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\x80\\xbf\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0pr\\x01\\0\\0\\0\\0\\0\\0\\0\\0\\x01d\\0\\0\\0\\0\\0\\0\\x04\\0\\0\\x01\\0\\0\\0\\0\\0\\0\\0\\x0c\\x02\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0/\\0!\\x80\\x03+ \\xff\\xff\\xff\\x7f\\0\\0\\0\\0\\0\\xdf\\x04\\0\\0pg_type\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\x0b\\0\\0\\0G\\0\\0\\0\\0\\0\\0\\0\\n\\0\\0\\0\\x02\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\x0e\\0\\0\\0\\0@\\x16D\\x0e\\0\\0\\0K\\x10\\0\\0\\x01\\0pr \\0\\0\\0\\0\\0\\0\\0\\0\\x01n\\0\\0\\0\\0\\0\\xd6\\x02\\0\\0\\x01\\0\\0\\0[\\x01\\0\\0\\0\\0\\0\\0\\0\\t\\x04\\0\\0\\x02\\0\\0\\0\\x01\\0\\0\\0\\n\\0\\0\\0\\n\\0\\0\\0\\x7f\\0\\0\\0\\0\\0\\0\\0\\n\\0\\0\\0\\x02\\0\\0\\0\\0\\0\\0C\\x01\\0\\0\\x15\\x01\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0.\\0!\\x80\\x03+ \\xff\\xff\\xff\\x7f\\0\\0\\0\\0\\0;\\n\\0\\0pg_statistic\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\x0b\\0\\0\\0\\xfd.\\0\\0\\0\\0\\0\\0\\n\\0\\0\\0\\x02\\0\\0\\0;\\n\\0\\0\\0\\0\\0\\0\\x13\\0\\0\\0\\0\\0\\xcbC\\x13\\0\\0\\0\\x18\\x0b\\0\\0\\x01\\0pr\\x1f\\0\\0\\0\\0\\0\\0\\0\\0\\x01n\\0\\0\\0\\0\\0\\xd6\\x02\\0\\0\\x01\\0\\0\\0C\\x01\\0\\0\\0\\0\\0\\0\\0\\t\\x04\\0\\0\\x01\\0\\0\\0\\x01\\0\\0\\0\\n\\0\\0\\0\\n\\0\\0\\0\\x7f\\0\\0\\0\\0\\0\\0\\x02\\0\\x01\")\n                }\n            ),\n            (\n                Lsn::from_str(\"0/16D4080\").unwrap(),\n                NeonWalRecord::Postgres {\n                    will_init: false,\n                    rec: Bytes::from_static(b\"\\xbc\\0\\0\\0\\0\\0\\0\\0h?m\\x01\\0\\0\\0\\0p\\n\\0\\09\\x08\\xa3\\xea\\0 \\x8c\\0\\x7f\\x06\\0\\0\\xd22\\0\\0\\xeb\\x04\\0\\0\\0\\0\\0\\0\\xff\\x02\\0@\\0\\0another_table\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\x98\\x08\\0\\0\\x02@\\0\\0\\0\\0\\0\\0\\n\\0\\0\\0\\x02\\0\\0\\0\\0@\\0\\0\\0\\0\\0\\0\\x05\\0\\0\\0\\0@zD\\x05\\0\\0\\0\\0\\0\\0\\0\\0\\0pr\\x01\\0\\0\\0\\0\\0\\0\\0\\0\\x01d\\0\\0\\0\\0\\0\\0\\x04\\0\\0\\x01\\0\\0\\0\\x02\\0\")\n                }\n            )\n        ]\n    }\n}\n"
  },
  {
    "path": "pageserver/test_data/indices/mixed_workload/README.md",
    "content": "\n# This was captured from one shard of a large tenant in staging.\n\n# It has a mixture of deltas and image layers, >1000 layers in total.\n\n# This is suitable for general smoke tests that want an index which is not\n# trivially small, but doesn't contain weird/pathological cases.\n"
  },
  {
    "path": "pageserver/test_data/indices/mixed_workload/index_part.json",
    "content": "{\"version\":7,\"layer_metadata\":{\"000000067F00004005000060F300069883DB-000000067F00004005000060F300069D13FA__00000178B8B10551-0000017C9F5597E1\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300039A4000-000000067F00004005000060F300039C0000__0000010D77B487A0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300039FC000-000000067F00004005000060F30003A0F066__0000010D77B487A0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB43000082C0F1-000000067F000040050081DB43000086E169__000000A583FBFB91-000000A9EB8C4489\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30000478000-000000067F00004005000060F3000047C000__000000174479FC18\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F3000012C000-000000067F00004005000060F300001F0000__0000018624969468\":{\"file_size\":134422528,\"generation\":7,\"shard\":\"0008\"},\"000000067F00004005000060F700019E8000-000000067F00004005000060F700019EC000__0000014EDD256548\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300018E0FE6-000000067F00004005000060F3000193A10B__00000075CC373F31-00000079F2A2F311\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016E85370000004000-030000000000000000000000000000000002__0000018613F0A050\":{\"file_size\":14172160,\"generation\":3,\"shard\":\"0008\"},\"000000067F00004005000060F300034847BD-000000067F00004005000060F300034BD86C__000000EBC9213D59-000000EFA7EAA9E1\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB430000C80000-000000067F000040050081DB430000C84000__000000BDAFECFC00\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F100000CCBA0-000000067F00004005000060F20100000000__0000000D80565628\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C0000CA4000-000000067F00004005016EA00C0000CE0000__0000019E7001E460\":{\"file_size\":134422528,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060FB00013BC000-000000067F00004005000060FB0001400000__000000603CA8F2F0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C0001240000-000000067F00004005016EA00C0001244000__000001A95031E5B8\":{\"file_size\":134422528,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060F30004EC52E9-000000067F00004005000060F30004F1638A__000001440D3D0C69-0000014784964B91\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB430000E10000-000000067F000040050081DB430000E14000__000000C483D0D6B8\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500EB4A480000007F0F-000000067F0000400500EB4A480000037E20__000000F309FCDD19-000000F6661C9241\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30004FE8000-000000067F00004005000060F3000502905D__0000014784964B91-0000014B000D1821\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB43000072C000-000000067F000040050081DB430000768000__000000A5A3F27398\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30005E3B48F-000000067F00004005000060F30005EF454F__00000164DEE06671-0000016834A3FC91\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500E3A2A100000B7E04-030000000000000000000000000000000002__000000E7C2F1B249-000000EBC9213D59\":{\"file_size\":30146560,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400501025D90000009029B-000000067F0000400501025D950100000000__0000011B688FEDC8\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB430000A10000-000000067F000040050081DB430000A14000__000000AFE87558B0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30002F5105E-000000067F00004005000060F30002F9A0EB__000000D74E29AAD1-000000DBBFA87AE1\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB000187FE22-000000067F000040050081D80C0100000000__00000075E5D2A930\":{\"file_size\":59138048,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB4300001E8000-000000067F000040050081DB4300001EC000__00000081AA3C40F0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB000184C000-000000067F00004005000060FB000187FE22__00000075E5D2A930\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30005A16504-000000067F00004005000060F30005A57691__0000015DD1D3C809-0000016143292911\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F100005C0000-000000067F00004005000060F100005C821A__000000601F43CF09-000000636DE92159\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000000000000000000000000000000000-000000067F00004005000060F00300000000__000001BCB572A4E0\":{\"file_size\":2310144,\"generation\":17,\"shard\":\"0008\"},\"000000067F00004005000060F30002214000-000000067F00004005000060F30002264247__000000A5A3F27398\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500E3A2A10000110000-000000067F0000400500E3A2A10000114000__000000EFDE07FFD8\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30006864000-000000067F00004005000060F30006868000__00000178C5D5D3A8\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500DBCED500000D0000-000000067F0000400500DBCED500000D4000__000000E4D847F4E0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F3000274C000-000000067F00004005000060F30002790000__000000BAC0041E18\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C00009274AB-030000000000000000000000000000000002__000001935283F9B9-00000196C9018F59\":{\"file_size\":60104704,\"generation\":11,\"shard\":\"0008\"},\"000000067F0000400500C782E4000023D359-000000067F0000400500C782E400002A5E4B__000000D31E48D7C9-000000D74E29AAD1\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70001780DB7-000000067F00004005000060F700017E1391__0000013C9C0E3339-0000013FEFA7D709\":{\"file_size\":268460032,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB4300004E4000-000000067F000040050081DB4300004F8000__0000009A24DF6768\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C00018C0000-000000067F00004005016EA00C00018C4000__000001B3F17FE4E0\":{\"file_size\":134422528,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060F300056DC000-000000067F00004005000060F300056E0000__00000159B010F6C0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0001F14230-000000067F000040050081D80C0100000000__0000018613F0A050\":{\"file_size\":59138048,\"generation\":3,\"shard\":\"0008\"},\"000000067F00004005010F9F120000004000-030000000000000000000000000000000002__0000012E77D3BF00\":{\"file_size\":105775104,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30002D80000-000000067F00004005000060F30002D84000__000000D037B2DBD0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70000122BBF-000000067F00004005000060F7000013B18E__000000114A805939-00000013FB921C81\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30002B10000-000000067F00004005000060F30002B88FF2__000000C462B3C2A9-000000C824C09619\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30006320C60-000000067F00004005000060F30006349DA2__0000016E1FBB7B99-000001715E483C79\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C000079E393-000000067F00004005016EA00C00009BF728__00000196C9018F59-0000019A2EAFE7A9\":{\"file_size\":268451840,\"generation\":11,\"shard\":\"0008\"},\"000000067F0000400500F67839000005C000-000000067F0000400500F67839000006AEF4__0000010D77B487A0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C0001D7F71A-030000000000000000000000000000000002__000001BA93C39481-000001BCB572A4E1\":{\"file_size\":50880512,\"generation\":17,\"shard\":\"0008\"},\"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__000001BCB572C481-000001BCB572C5D9\":{\"file_size\":24576,\"generation\":20,\"shard\":\"0008\"},\"000000067F00004005000060F70001570000-000000067F00004005000060F70001574000__0000012E77D3BF00\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F3000042C000-000000067F00004005000060F30000478000__000000174479FC18\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__000001BCB572C5D9-000001BCB572DFF9\":{\"file_size\":24576,\"generation\":22,\"shard\":\"0008\"},\"000000067F00004005000060FB00015FCD31-030000000000000000000000000000000002__000000698F2C3A38\":{\"file_size\":147456,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30005C841ED-000000067F00004005000060F30005C95225__0000016143292911-00000164DEE06671\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30001B4A119-000000067F00004005000060F30100000000__0000008196C976A1-0000008625CF2891\":{\"file_size\":200990720,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300019790A2-000000067F00004005000060F300019C2056__00000079F2A2F311-0000007E3A9BFD29\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0001838000-000000067F00004005000060FB000183C000__00000075E5D2A930\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30001C00FE1-000000067F00004005000060F30001C0A0A3__0000008625CF2891-00000089F4693119\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300056E0000-000000067F00004005000060F300056E4000__00000159B010F6C0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70000BBD532-000000067F00004005000060F80100000000__000000AFD23C27B9-000000B2B5C4E8F9\":{\"file_size\":96477184,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30000F9B026-000000067F00004005000060F30100000000__00000047E31D98D1-0000004C49155071\":{\"file_size\":173834240,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB430000500000-000000067F000040050081DB430000504000__0000009A24DF6768\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30004971675-000000067F00004005000060F300049B26A8__000001398B56A519-0000013C9C0E3339\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30003102107-000000067F00004005000060F300031130BC__000000DE2A8E4FC9-000000E1CD2FBBE9\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300048A4000-000000067F00004005000060F30004900000__00000139CF156B58\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C00004B8000-000000067F00004005016EA00C00004BC000__000001936E73D028\":{\"file_size\":134422528,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060FB0001A71688-000000067F00004005000060FB0001A8A1CD__0000007E3A9BFD29-0000008196C976A1\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30000E60000-000000067F00004005000060F30000E64000__00000047F1F2B800\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300023B0FF7-000000067F00004005000060F300024020ED__000000A9EB8C4489-000000ACA44C8E99\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C00003F8000-000000067F00004005016EA00C00003FC000__000001936E73D028\":{\"file_size\":134422528,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060F30004B2B250-000000067F00004005000060F30004B5431C__0000013C9C0E3339-0000013FEFA7D709\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70000050000-000000067F00004005000060F700000885C5__000000044854EBD1-00000008B6B51879\":{\"file_size\":268460032,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB000097168A-030000000000000000000000000000000002__00000028C365FBE1-0000002D2A8E0B81\":{\"file_size\":120299520,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F3000625C000-000000067F00004005000060F30006270000__0000017171761D90\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0001BA8000-000000067F00004005000060FB0001BC0B44__0000008625CF2891-00000089F4693119\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30003344134-000000067F00004005000060F3000336D193__000000E4C63CFA21-000000E7C2F1B249\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30006B10FFF-000000067F00004005000060F30006B22072__0000017C9F5597E1-0000018022640391\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30006E34000-000000067F00004005000060F30006E70000__000001848D082B20\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F6000008238C-000000067F00004005000060F60100000000__00000139CF156B58\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70000A30000-000000067F00004005000060F70100000000__0000009DF02C1241-000000A173C00489\":{\"file_size\":269688832,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0001CE16ED-000000067F000040050081D80C0100000000__0000008DDCD70B68\":{\"file_size\":59138048,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB4300011B0000-000000067F000040050081DB4300011B4000__000000DBD29DC248\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500F3A25C000010C0D1-000000067F0000400500F3A25C000011E137__000001048B25A8E9-0000010779A7F551\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70000004000-000000067F00004005000060F70000029ED0__000000027AF9D7D0\":{\"file_size\":134422528,\"generation\":1,\"shard\":\"0008\"},\"000000067F00004005000060F60000058F73-000000067F00004005000060F60100000000__000000E4D847F4E0\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C0001C3F636-000000067F00004005016EA00C0001CC74D7__000001B6FFE46BC9-000001BA93C39481\":{\"file_size\":268451840,\"generation\":11,\"shard\":\"0008\"},\"000000067F0000400500EB4A480000101089-000000067F0000400500EB4A48000012798C__000000F6661C9241-000000F901689359\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB4300007A8000-000000067F000040050081DB4300007AC000__000000A5A3F27398\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F1000010043F-000000067F00004005000060F20100000000__0000000D55A212C9-000000114A805939\":{\"file_size\":182878208,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0001EAC000-000000067F00004005000060FB0001F14230__0000009A24DF6768\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F3000616F6B2-000000067F00004005000060F300061B8705__0000016B49A934C1-0000016E1FBB7B99\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30005C9E3C4-000000067F00004005000060F30005CCF3C5__0000016143292911-00000164DEE06671\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70001AA0000-000000067F00004005000060F70001AB05CB__0000015304A396B9-0000015670D6AFD9\":{\"file_size\":268460032,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F3000073C000-000000067F00004005000060F30000775A02__0000002427BD8BD0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB4300003AE21D-000000067F000040050081DB43000045029C__0000008DBE2855F9-000000923719A971\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70001B04000-000000067F00004005000060F70001B18000__00000159B010F6C0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30000E74000-000000067F00004005000060F30000E78000__00000047F1F2B800\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F7000182C000-000000067F00004005000060F700018871D6__000001444EB7FC10\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0000DE8B45-000000067F00004005000060FB0000DF968A__000000417D21ACF9-00000044B4679349\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30000E78000-000000067F00004005000060F30000E7C000__00000047F1F2B800\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB000140C000-030000000000000000000000000000000002__000000603CA8F2F0\":{\"file_size\":89522176,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB00011CA1CD-000000067F00004005000060FB00011F2D11__0000005413AB3641-00000057593D8169\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C000144FB4E-000000067F00004005016EA00C00014B79E7__000001A931C135B1-000001AC25760149\":{\"file_size\":268451840,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060F700015A195C-000000067F00004005000060F80100000000__0000012E77D3BF00\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70000FC0000-000000067F00004005000060F70000FC4000__000000E4D847F4E0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500EB4A48000012798C-000000067F0000400500EB4A48000013F89B__000000F6661C9241-000000F901689359\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C0001CE4000-000000067F00004005016EA00C0001D18000__000001BCB572A4E0\":{\"file_size\":134422528,\"generation\":17,\"shard\":\"0008\"},\"000000067F00004005000060F30005FC519A-000000067F00004005000060F30005FE621A__0000016834A3FC91-0000016B49A934C1\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C0000370000-000000067F00004005016EA00C0000374000__000001936E73D028\":{\"file_size\":134422528,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005016EA00C0001760000-000000067F00004005016EA00C0001764000__000001B3F17FE4E0\":{\"file_size\":134422528,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060F100003A0000-000000067F00004005000060F100003B8214__0000003D03FCCDB9-000000417D21ACF9\":{\"file_size\":268460032,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300006B0000-000000067F00004005000060F300006B4000__0000002427BD8BD0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB00004E1FF6-030000000000000000000000000000000002__000000174479FC18\":{\"file_size\":147456,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F3000502905D-000000067F00004005000060F300050321C0__0000014784964B91-0000014B000D1821\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70001AB05CB-000000067F00004005000060F70001AB8B97__0000015304A396B9-0000015670D6AFD9\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C000151F7C5-000000067F00004005016EA00C000158F667__000001AC25760149-000001AFC313C819\":{\"file_size\":268451840,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060F70000B9C000-000000067F00004005000060F80100000000__000000AFE87558B0\":{\"file_size\":83533824,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F7000141882A-000000067F00004005000060F80100000000__00000122E1129DA0\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500EB4A48000018F5CD-000000067F0000400500EB4A48000019F4DD__000000F6661C9241-000000F901689359\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F7000196C000-000000067F00004005000060F70001990000__0000014EDD256548\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300029C623C-000000067F00004005000060F30100000000__000000BD9A7C56D9-000000C0C9EB88E1\":{\"file_size\":81313792,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300027C0000-000000067F00004005000060F300027C4000__000000BAC0041E18\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500FB3D300000001487-000000067F0000400500FB3D300100000000__0000010FB1BE19B9-00000113456156F1\":{\"file_size\":24428544,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300056D8000-000000067F00004005000060F300056DC000__00000159B010F6C0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F700003C0000-000000067F00004005000060F700003C4000__0000003D2AB09B68\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F3000664E3CA-000000067F00004005000060F30100000000__000001715E483C79-000001751A7D7589\":{\"file_size\":288645120,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050100D04D000004B5AD-000000067F000040050100D04D00000634BB__0000010D5DC42EF9-0000010FB1BE19B9\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500DBCED5000002C000-000000067F0000400500DBCED50000078000__000000E4D847F4E0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C0000C20000-000000067F00004005016EA00C0000C24000__0000019E7001E460\":{\"file_size\":134422528,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060F70001B30000-000000067F00004005000060F70001B34000__00000159B010F6C0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F700009C035C-000000067F00004005000060F80100000000__0000009A1ABDE921-0000009DF02C1241\":{\"file_size\":264159232,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30003B33945-000000067F00004005000060F30100000000__0000010FB1BE19B9-00000113456156F1\":{\"file_size\":155344896,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C000079FCFA-000000067F00004005016EA00C00007C7B9C__000001935283F9B9-00000196C9018F59\":{\"file_size\":268451840,\"generation\":11,\"shard\":\"0008\"},\"000000067F0000400500EB4A480000218000-000000067F0000400500EB4A48000021C000__000000FCD84FE628\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30005D1D0DC-000000067F00004005000060F30005D76250__00000164DEE06671-0000016834A3FC91\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB000149B774-000000067F00004005000060FB00014A42B8__000000601F43CF09-000000636DE92159\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30003D0B155-000000067F00004005000060F30003D14206__00000117EDA82C11-0000011B632CC319\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300020FC052-000000067F00004005000060F300021050B0__0000009DF02C1241-000000A173C00489\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30002268000-000000067F00004005000060F300022B9050__000000A583FBFB91-000000A9EB8C4489\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB4300004FC000-000000067F000040050081DB430000500000__0000009A24DF6768\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300060A93B5-000000067F00004005000060F300060C2210__0000016834A3FC91-0000016B49A934C1\":{\"file_size\":263479296,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F3000674C000-000000067F00004005000060F30006798000__00000178C5D5D3A8\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB4300007F913A-030000000000000000000000000000000002__000000A5A3F27398\":{\"file_size\":139264,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500DBCED500000F4000-030000000000000000000000000000000002__000000E4D847F4E0\":{\"file_size\":103907328,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70001348000-000000067F00004005000060F70100000000__0000011B632CC319-0000011F1A40FA69\":{\"file_size\":270753792,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F10000030000-000000067F00004005000060F20100000000__000000021DC73119-000000044854EBD1\":{\"file_size\":267771904,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050107B54701FFFFFFFF-000000067F000040050107B5470300000000__0000011F1A40FA69-00000122A7BB7B29\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30006674000-000000067F00004005000060F30006690000__00000178C5D5D3A8\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050107B54701FFFFFFFF-000000067F000040050107B5470300000000__0000011B632CC319-0000011F1A40FA69\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30000298000-000000067F00004005000060F3000029C000__0000000D80565628\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70000F185D4-000000067F00004005000060F80100000000__000000DBBFA87AE1-000000DE2A8E4FC9\":{\"file_size\":249135104,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300049CB712-000000067F00004005000060F30004A048A8__000001398B56A519-0000013C9C0E3339\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F700004B1E77-000000067F00004005000060F80100000000__00000047F1F2B800\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30004B00000-000000067F00004005000060F30004B1111A__0000013C9C0E3339-0000013FEFA7D709\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30006D14000-000000067F00004005000060F30006D30000__000001848D082B20\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C00002D77AE-030000000000000000000000000000000002__000001880F984A29-0000018C496B6DB1\":{\"file_size\":81018880,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060F300002D0000-000000067F00004005000060F30000370FD1__0000000D55A212C9-000000114A805939\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500D69D790000028000-000000067F0000400500D69D79000002C000__000000EFDE07FFD8\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30002170000-000000067F00004005000060F30002174000__000000A5A3F27398\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30000F59017-000000067F00004005000060F30000F91FFF__00000047E31D98D1-0000004C49155071\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F6000006A37A-000000067F00004005000060F60100000000__000001180B3FF408\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F6000002F012-000000067F00004005000060F60100000000__00000081AA3C40F0\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30005614000-000000067F00004005000060F30005688000__00000159B010F6C0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300036C8000-000000067F00004005000060F300036F91FE__000000FCCD5238B1-000000FF8B261599\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C0001ADF63C-030000000000000000000000000000000002__000001B3E1B95181-000001B6FFE46BC9\":{\"file_size\":64421888,\"generation\":11,\"shard\":\"0008\"},\"000000067F0000400500EB4A480000057D31-000000067F0000400500EB4A48000008FC41__000000F309FCDD19-000000F6661C9241\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C0000F58000-000000067F00004005016EA00C0000F5C000__000001A95031E5B8\":{\"file_size\":134422528,\"generation\":11,\"shard\":\"0008\"},\"000000067F000040050081DB430000908000-000000067F000040050081DB43000094A076__000000A9EB8C4489-000000ACA44C8E99\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F3000471200E-000000067F00004005000060F3000474302B__000001334140FC21-00000137115BE4D9\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB4300000403DA-030000000000000000000000000000000002__00000075E5D2A930\":{\"file_size\":139264,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F60000079C4E-000000067F00004005000060F60100000000__0000012E77D3BF00\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500F67839000003C000-000000067F0000400500F678390000058000__0000010D77B487A0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0001C80000-000000067F00004005000060FB0001C84000__0000008DDCD70B68\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300047F5138-000000067F00004005000060F3000480620C__000001334140FC21-00000137115BE4D9\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30006B5C09E-000000067F00004005000060F30006BAD108__0000017C9F5597E1-0000018022640391\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70001410F57-000000067F00004005000060F70001429534__00000122A7BB7B29-0000012694E36301\":{\"file_size\":268460032,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C00006B4000-000000067F00004005016EA00C00006E0000__000001936E73D028\":{\"file_size\":134422528,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060F700009605D8-000000067F00004005000060F80100000000__000000923719A971-00000096262826C9\":{\"file_size\":251338752,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70000C8CD0C-000000067F00004005000060F80100000000__000000BAC0041E18\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F700012B8000-000000067F00004005000060F80100000000__00000113456156F1-00000117EDA82C11\":{\"file_size\":265781248,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C000049C000-000000067F00004005016EA00C00004A8000__000001936E73D028\":{\"file_size\":134422528,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060F70000C78000-000000067F00004005000060F70000C7C000__000000BAC0041E18\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30006B4B0BB-000000067F00004005000060F30006B5C09E__0000017C9F5597E1-0000018022640391\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0001844000-000000067F00004005000060FB0001848000__00000075E5D2A930\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300067F0000-000000067F00004005000060F300067F4000__00000178C5D5D3A8\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30004C80000-000000067F00004005000060F30004C84000__000001444EB7FC10\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30002A4C000-000000067F00004005000060F30002A98000__000000C483D0D6B8\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30002480000-000000067F00004005000060F30002484000__000000AFE87558B0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F3000306A02D-000000067F00004005000060F30100000000__000000DBBFA87AE1-000000DE2A8E4FC9\":{\"file_size\":191299584,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70001510000-000000067F00004005000060F70001514000__0000012E77D3BF00\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30005BDB15B-000000067F00004005000060F30005C841ED__0000016143292911-00000164DEE06671\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0001E98000-000000067F00004005000060FB0001E9C000__0000009A24DF6768\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300057942F4-000000067F00004005000060F300057DD292__00000159A7EC8CB9-0000015DD1D3C809\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30005698000-000000067F00004005000060F3000569C000__00000159B010F6C0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30002983166-000000067F00004005000060F3000299C28F__000000BD9A7C56D9-000000C0C9EB88E1\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C0000C24000-000000067F00004005016EA00C0000CA0000__0000019E7001E460\":{\"file_size\":134422528,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060F300033D7D7C-000000067F00004005000060F30003458D42__000000E7C2F1B249-000000EBC9213D59\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB430000A1C000-000000067F000040050081DB430000A30379__000000AFE87558B0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30002D93639-000000067F00004005000060F50100000000__000000D037B2DBD0\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C000029C195-000000067F00004005016EA00C000029C196__000001BA93C39481-000001BCB572A4E1\":{\"file_size\":32768,\"generation\":17,\"shard\":\"0008\"},\"000000067F00004005000060F30000A5F9BB-000000067F00004005000060F60100000000__000000321AA80270\":{\"file_size\":81657856,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30002D84000-000000067F00004005000060F30002D93639__000000D037B2DBD0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30005D1C000-000000067F00004005000060F30005D70000__000001684518AF20\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB4300010C8000-000000067F000040050081DB4300010E2072__000000D01F399709-000000D31E48D7C9\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB43000058AF5E-000000067F000040050081DB4300005BCFD7__0000009A1ABDE921-0000009DF02C1241\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F7000034611E-000000067F00004005000060F80100000000__000000321AA80270\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300000C1095-000000067F00004005000060F60100000000__000000021DC73119-000000044854EBD1\":{\"file_size\":220635136,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB000183C000-000000067F00004005000060FB0001840000__00000075E5D2A930\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30006C8729E-000000067F00004005000060F30006C98340__0000017C9F5597E1-0000018022640391\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30005138000-000000067F00004005000060F3000513C000__0000014EDD256548\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300053E30C3-000000067F00004005000060F300053F40CC__0000014EC58A4A79-0000015304A396B9\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB43000002C000-000000067F000040050081DB4300000403DA__00000075E5D2A930\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30004970000-000000067F00004005000060F30004974000__00000139CF156B58\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30003C08000-000000067F00004005000060F30003C0C000__000001180B3FF408\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB000103AD12-000000067F00004005000060FB000104B856__0000004C49155071-0000004F31878919\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C00004AC000-000000067F00004005016EA00C00004B8000__000001936E73D028\":{\"file_size\":134422528,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005016EA00C0000DB7D33-000000067F00004005016EA00C0000E47BD2__0000019E2C5DCEE1-000001A1DD8B4481\":{\"file_size\":268451840,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060F30001F30000-000000067F00004005000060F30001F34000__0000009A24DF6768\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050109FFA2000000C000-030000000000000000000000000000000002__000001180B3FF408\":{\"file_size\":70516736,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F700017405D4-000000067F00004005000060F70001758B92__000001398B56A519-0000013C9C0E3339\":{\"file_size\":268460032,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300030B0000-000000067F00004005000060F300030C0FE5__000000DE2A8E4FC9-000000E1CD2FBBE9\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005010660F501FFFFFFFF-000000067F00004005010660F50300000000__00000122A7BB7B29-0000012694E36301\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30002168000-000000067F00004005000060F3000216C000__000000A5A3F27398\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F60000046A83-000000067F00004005000060F60100000000__000000BAC0041E18\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0001368000-000000067F00004005000060FB000136C000__000000603CA8F2F0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70000184000-000000067F00004005000060F80100000000__000000174479FC18\":{\"file_size\":93143040,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB00012A8000-000000067F00004005000060FB0100000000__00000057593D8169-0000005C01565329\":{\"file_size\":273711104,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F700007B0000-000000067F00004005000060F700007D05C8__00000075CC373F31-00000079F2A2F311\":{\"file_size\":268468224,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0001680B45-000000067F00004005000060FB000169968A__000000698AF6E809-0000006DDB29D589\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300050CC000-000000067F00004005000060F300050E8000__0000014EDD256548\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000000000000000000000000000000000-000000067F00004005000060F00300000000__0000018613F0A050\":{\"file_size\":2310144,\"generation\":3,\"shard\":\"0008\"},\"000000067F00004005000060F70001B1C000-000000067F00004005000060F70001B30000__00000159B010F6C0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70000F50000-000000067F00004005000060F70000F705D6__000000DE2A8E4FC9-000000E1CD2FBBE9\":{\"file_size\":268460032,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050109CD330100000000-000000067F000040050109FFA2000000C000__000001180B3FF408\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500EB4A4800001FC000-000000067F0000400500EB4A480000200000__000000FCD84FE628\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F3000240B12A-000000067F00004005000060F300024440AE__000000A9EB8C4489-000000ACA44C8E99\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F3000008228D-000000067F00004005000060F60100000000__000000027AF9D7D0\":{\"file_size\":24576,\"generation\":1,\"shard\":\"0008\"},\"000000067F00004005016EA00C000042C000-000000067F00004005016EA00C0000478000__000001936E73D028\":{\"file_size\":134422528,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060FB0000FF8000-000000067F00004005000060FB0001000B44__0000004C49155071-0000004F31878919\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB000169968A-000000067F00004005000060FB00016D21CF__000000698AF6E809-0000006DDB29D589\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F100005F821C-000000067F00004005000060F20100000000__000000636DE92159-000000663565F8C9\":{\"file_size\":149954560,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C0001D7C000-000000067F00004005016EA00C0001E03DD8__000001BCB572A4E0\":{\"file_size\":134422528,\"generation\":17,\"shard\":\"0008\"},\"000000067F0000400500F678390000058000-000000067F0000400500F67839000005C000__0000010D77B487A0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500EB4A4800003A7E20-000000067F0000400500EB4A4800003BFD31__000000FCCD5238B1-000000FF8B261599\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C0001228000-000000067F00004005016EA00C000122C000__000001A95031E5B8\":{\"file_size\":134422528,\"generation\":11,\"shard\":\"0008\"},\"000000067F000040050081DB430000F0C0E9-000000067F000040050081DB430000F4E15B__000000C462B3C2A9-000000C824C09619\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70000758000-000000067F00004005000060F80100000000__0000006DDB29D589-000000722F474369\":{\"file_size\":264781824,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300068640AF-000000067F00004005000060F3000686D0DE__00000178B8B10551-0000017C9F5597E1\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C000047C000-000000067F00004005016EA00C0000498000__000001936E73D028\":{\"file_size\":134422528,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060F30006166575-000000067F00004005000060F3000616F6B2__0000016B49A934C1-0000016E1FBB7B99\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70001B18000-000000067F00004005000060F70001B1C000__00000159B010F6C0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F700016EC000-000000067F00004005000060F70001708000__00000139CF156B58\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30005CCF3C5-000000067F00004005000060F30005D184F6__0000016143292911-00000164DEE06671\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30002848000-000000067F00004005000060F3000285901B__000000BAB1E56C91-000000BD9A7C56D9\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300039C0000-000000067F00004005000060F300039C4000__0000010D77B487A0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30002464000-000000067F00004005000060F30002480000__000000AFE87558B0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C00011D0000-000000067F00004005016EA00C00011D4000__000001A95031E5B8\":{\"file_size\":134422528,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060F30003D44283-000000067F00004005000060F30003D952B0__0000011B632CC319-0000011F1A40FA69\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500EB4A480100000000-000000067F0000400500EE16BC0000044000__000000F91FE84F08\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F3000533205E-000000067F00004005000060F300053E30C3__0000014EC58A4A79-0000015304A396B9\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F6000009A255-000000067F00004005000060F60300000000__0000017CC2FD7288\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70001B00000-000000067F00004005000060F70001B04000__00000159B010F6C0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30004958000-000000067F00004005000060F3000495C000__00000139CF156B58\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70000518000-000000067F00004005000060F80100000000__0000004C49155071-0000004F31878919\":{\"file_size\":262373376,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300064D8000-000000067F00004005000060F3000658113F__000001715E483C79-000001751A7D7589\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500FDA1F80000014000-000000067F0000400500FDA1F80000020D42__0000010D77B487A0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0000284000-000000067F00004005000060FB00002D4B6A__0000000D80565628\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70000CDBB9C-000000067F00004005000060F80100000000__000000BD9A7C56D9-000000C0C9EB88E1\":{\"file_size\":148865024,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C0001298000-000000067F00004005016EA00C000129C000__000001A95031E5B8\":{\"file_size\":134422528,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060FB0001DD8000-000000067F00004005000060FB0001DF0B43__000000923719A971-00000096262826C9\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70001220000-000000067F00004005000060F70001224000__0000010D77B487A0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30002908000-000000067F00004005000060F30002920FA0__000000BD9A7C56D9-000000C0C9EB88E1\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C0000F5C000-000000067F00004005016EA00C0000F90000__000001A95031E5B8\":{\"file_size\":134422528,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005016EA00C0001E03DD8-030000000000000000000000000000000002__000001BCB572A4E0\":{\"file_size\":139264,\"generation\":17,\"shard\":\"0008\"},\"000000067F00004005000060F30003998000-000000067F00004005000060F3000399C000__0000010D77B487A0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C00014E75C6-030000000000000000000000000000000002__000001A931C135B1-000001AC25760149\":{\"file_size\":51486720,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005010660F500000F44CB-000000067F00004005010660F70100000000__000001180B3FF408\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C00003FC000-000000067F00004005016EA00C0000400000__000001936E73D028\":{\"file_size\":134422528,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060F30003810000-000000067F00004005000060F30003849093__000001048B25A8E9-0000010779A7F551\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30006B00000-000000067F00004005000060F30006B10FFF__0000017C9F5597E1-0000018022640391\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0001541688-000000067F00004005000060FB000154A1CD__000000636DE92159-000000663565F8C9\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0001098000-000000067F00004005000060FB000109C000__00000054161C34B8\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F700011912D4-000000067F00004005000060F80100000000__00000104BD37F348\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30002A40000-000000067F00004005000060F30002A44000__000000C483D0D6B8\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30001448000-000000067F00004005000060F300014B0F7B__000000601F43CF09-000000636DE92159\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0001009688-000000067F00004005000060FB000102A1CE__0000004C49155071-0000004F31878919\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500EE16BC00001A4000-000000067F0000400500EE16BC00001E0000__00000104BD37F348\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0000B58B45-000000067F00004005000060FB0000B6168A__0000003579F03331-0000003959DA2DE9\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500D69D7900000AC000-000000067F0000400500D69D7900000BDAF5__000000EFDE07FFD8\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F3000193A10B-000000067F00004005000060F30100000000__00000075CC373F31-00000079F2A2F311\":{\"file_size\":198148096,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C00005A0000-000000067F00004005016EA00C00005A4000__000001936E73D028\":{\"file_size\":134422528,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060F700000E0000-000000067F00004005000060F80100000000__0000000D80565628\":{\"file_size\":112009216,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F3000690F2FD-000000067F00004005000060F300069883DB__00000178B8B10551-0000017C9F5597E1\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300004C6B83-000000067F00004005000060F60100000000__000000174479FC18\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30001E18000-000000067F00004005000060F30001E50FF3__000000923719A971-00000096262826C9\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300043B4000-000000067F00004005000060F300043B8000__0000012E77D3BF00\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F100006C0000-000000067F00004005000060F20100000000__000000722F474369-00000075CC373F31\":{\"file_size\":267665408,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70000A78000-000000067F00004005000060F70000A7C000__000000A5A3F27398\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB00011C1688-000000067F00004005000060FB00011CA1CD__0000005413AB3641-00000057593D8169\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C00004E8000-000000067F00004005016EA00C00004EC000__000001936E73D028\":{\"file_size\":134422528,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005016EA00C0000257A6F-000000067F00004005016EA00C000029F90B__000001880F984A29-0000018C496B6DB1\":{\"file_size\":268451840,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060FB0001590000-000000067F00004005000060FB0001594000__000000698F2C3A38\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C000193189A-030000000000000000000000000000000002__000001B3F17FE4E0\":{\"file_size\":139264,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060F300027C4000-000000067F00004005000060F30002828000__000000BAC0041E18\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C0000B40000-000000067F00004005016EA00C0000B44000__0000019E7001E460\":{\"file_size\":134422528,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060F30006694000-000000067F00004005000060F300066F0000__00000178C5D5D3A8\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB00015C8000-000000067F00004005000060FB00015CC000__000000698F2C3A38\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30003B84000-000000067F00004005000060F30003B90000__000001180B3FF408\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30006704000-000000067F00004005000060F30006748000__00000178C5D5D3A8\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0000793506-030000000000000000000000000000000002__0000002427BD8BD0\":{\"file_size\":147456,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30004F1638A-000000067F00004005000060F30100000000__000001440D3D0C69-0000014784964B91\":{\"file_size\":93708288,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F80100000000-000000067F00004005000060FB0000014000__000000027AF9D7D0\":{\"file_size\":134422528,\"generation\":1,\"shard\":\"0008\"},\"000000067F00004005000060F70000180000-000000067F00004005000060F70000184000__000000174479FC18\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30004A2693B-000000067F00004005000060F30004A7F98F__000001398B56A519-0000013C9C0E3339\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30002C71F27-000000067F00004005000060F30002C9AFB8__000000C824C09619-000000CC13D2E549\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300038075AF-000000067F00004005000060F30100000000__000000FF8B261599-000001048B25A8E9\":{\"file_size\":49823744,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500DBCED50000028000-000000067F0000400500DBCED5000002C000__000000E4D847F4E0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30004188000-000000067F00004005000060F300041D9101__0000012694E36301-0000012A3F140591\":{\"file_size\":268460032,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30006868000-000000067F00004005000060F50100000000__00000178C5D5D3A8\":{\"file_size\":116645888,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30003A789A0-000000067F00004005000060F30003AB9907__0000010FB1BE19B9-00000113456156F1\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500EB4A480000368000-000000067F0000400500EB4A48000036FF11__000000FCCD5238B1-000000FF8B261599\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300047EC0CA-000000067F00004005000060F300047F5138__000001334140FC21-00000137115BE4D9\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70001AB8B97-000000067F00004005000060F70001AC115C__0000015304A396B9-0000015670D6AFD9\":{\"file_size\":268460032,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70000D61283-000000067F00004005000060F70000D8985C__000000C462B3C2A9-000000C824C09619\":{\"file_size\":268460032,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300011D1111-000000067F00004005000060F3000122A1D5__0000005413AB3641-00000057593D8169\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C0001967D34-000000067F00004005016EA00C000197FBD0__000001B3E1B95181-000001B6FFE46BC9\":{\"file_size\":268451840,\"generation\":11,\"shard\":\"0008\"},\"000000067F0000400500FA2AD3000004D85C-000000067F0000400500FB3D300100000000__0000010D77B487A0\":{\"file_size\":31309824,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB4300005BCFD7-000000067F000040050081DB4300005D704F__0000009A1ABDE921-0000009DF02C1241\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F10000004000-000000067F00004005000060F100000260F2__000000027AF9D7D0\":{\"file_size\":134422528,\"generation\":1,\"shard\":\"0008\"},\"000000067F0000400500EE16BC00000F8000-000000067F0000400500EE16BC000014158C__000000F901689359-000000FCCD5238B1\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30000921E8A-000000067F00004005000060F60100000000__00000028C365FBE1-0000002D2A8E0B81\":{\"file_size\":228564992,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0001190000-000000067F00004005000060FB0001198B44__0000005413AB3641-00000057593D8169\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300067A0000-000000067F00004005000060F300067A4000__00000178C5D5D3A8\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F10000200000-000000067F00004005000060F10000204000__0000002427BD8BD0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30003FF0FBB-000000067F00004005000060F3000407201D__00000122A7BB7B29-0000012694E36301\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F3000001C000-000000067F00004005000060F3000008228D__000000027AF9D7D0\":{\"file_size\":134422528,\"generation\":1,\"shard\":\"0008\"},\"000000067F00004005016EA00C0001CD7376-030000000000000000000000000000000002__000001B6FFE46BC9-000001BA93C39481\":{\"file_size\":70238208,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060FB0000EBC000-000000067F00004005000060FB0000EC8000__00000047F1F2B800\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F3000293210E-000000067F00004005000060F30002983166__000000BD9A7C56D9-000000C0C9EB88E1\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F3000151F271-000000067F00004005000060F30100000000__000000636DE92159-000000663565F8C9\":{\"file_size\":41271296,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30004880000-000000067F00004005000060F30004884000__00000139CF156B58\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F10000518222-000000067F00004005000060F20100000000__0000005413AB3641-00000057593D8169\":{\"file_size\":169492480,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C00003E0000-000000067F00004005016EA00C00003E4000__000001936E73D028\":{\"file_size\":134422528,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060F30000775A02-000000067F00004005000060F60100000000__0000002427BD8BD0\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C000197FBD0-000000067F00004005016EA00C00019C7A6A__000001B3E1B95181-000001B6FFE46BC9\":{\"file_size\":268451840,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060F3000067114B-000000067F00004005000060F60100000000__0000001B59EEB909-0000001FFBC01501\":{\"file_size\":232669184,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0001408000-000000067F00004005000060FB000140C000__000000603CA8F2F0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500EB4A4800001F8000-000000067F0000400500EB4A4800001FC000__000000FCD84FE628\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500EB4A480000290000-000000067F0000400500EB4A480000294000__000000FCD84FE628\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30003061089-000000067F00004005000060F3000306A02D__000000DBBFA87AE1-000000DE2A8E4FC9\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30001CE4000-000000067F00004005000060F30001CF0197__0000008DDCD70B68\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70000E20000-000000067F00004005000060F70000E24000__000000D037B2DBD0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB4300001D0000-000000067F000040050081DB4300001D4000__00000081AA3C40F0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30005D184F6-000000067F00004005000060F30100000000__0000016143292911-00000164DEE06671\":{\"file_size\":200163328,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300066F4000-000000067F00004005000060F30006700000__00000178C5D5D3A8\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB430000A38000-000000067F000040050081DB430000A4A074__000000AFD23C27B9-000000B2B5C4E8F9\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30000F38000-000000067F00004005000060F30000F59017__00000047E31D98D1-0000004C49155071\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0000C0C000-000000067F00004005000060FB0000C18000__0000003D2AB09B68\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30006D34000-000000067F00004005000060F30006D60000__000001848D082B20\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005010660F501FFFFFFFF-000000067F00004005010660F50300000000__0000011F1A40FA69-00000122A7BB7B29\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F700013E85D1-000000067F00004005000060F70001410BBC__0000011F1A40FA69-00000122A7BB7B29\":{\"file_size\":268460032,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0000538B44-000000067F00004005000060FB0000551689__0000001737D88379-0000001B59EEB909\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70001410000-000000067F00004005000060F70001414000__00000126C3C69FC0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300032F1113-000000067F00004005000060F3000330A1C8__000000E4C63CFA21-000000E7C2F1B249\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30004974000-000000067F00004005000060F3000498DC49__00000139CF156B58\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F3000625EB45-000000067F00004005000060F30006277C61__0000016E1FBB7B99-000001715E483C79\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F700019E8E81-000000067F00004005000060F80100000000__0000014EC58A4A79-0000015304A396B9\":{\"file_size\":246792192,\"generation\":2,\"shard\":\"0008\"},\"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__000001BCB5730259-000001BCB5732691\":{\"file_size\":24576,\"generation\":187,\"shard\":\"0008\"},\"000000067F000040050081DB4300001CC000-000000067F000040050081DB4300001D0000__00000081AA3C40F0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30002C00000-000000067F00004005000060F30002C18FAE__000000C824C09619-000000CC13D2E549\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70000FC4000-000000067F00004005000060F70000FCD85E__000000E4D847F4E0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB000107C39B-030000000000000000000000000000000002__0000004C49155071-0000004F31878919\":{\"file_size\":133349376,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C0000F90000-000000067F00004005016EA00C0000F94000__000001A95031E5B8\":{\"file_size\":134422528,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005016EA00C0000F98000-000000067F00004005016EA00C0000F9C000__000001A95031E5B8\":{\"file_size\":134422528,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060F700019EC000-000000067F00004005000060F80100000000__0000014EDD256548\":{\"file_size\":7421952,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300069FA3F6-000000067F00004005000060F30006A0B44C__00000178B8B10551-0000017C9F5597E1\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB4300003AC000-000000067F000040050081DB4300003B27DA__0000008DDCD70B68\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30005A57691-000000067F00004005000060F30005B00697__0000015DD1D3C809-0000016143292911\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300060CB2C8-000000067F00004005000060F300060D4415__0000016B49A934C1-0000016E1FBB7B99\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F3000495C000-000000067F00004005000060F30004970000__00000139CF156B58\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500D69D7900000D1C5F-000000067F0000400500D69D7900000F1B5B__000000EFA7EAA9E1-000000F309FCDD19\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C0001358000-030000000000000000000000000000000002__000001A95031E5B8\":{\"file_size\":21110784,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060F3000430C000-000000067F00004005000060F30004370000__0000012E77D3BF00\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30004904000-000000067F00004005000060F30004958000__00000139CF156B58\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30000008000-000000067F00004005000060F30000378000__00000186146441F1-0000018624969469\":{\"file_size\":33357824,\"generation\":6,\"shard\":\"0008\"},\"000000067F00004005000060F700005C0000-000000067F00004005000060F700005C85CE__00000057593D8169-0000005C01565329\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C0000B04000-000000067F00004005016EA00C0000B40000__0000019E7001E460\":{\"file_size\":134422528,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060F30002920FA0-000000067F00004005000060F3000293210E__000000BD9A7C56D9-000000C0C9EB88E1\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30002058000-000000067F00004005000060F30002070F71__0000009DF02C1241-000000A173C00489\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F3000686D0DE-000000067F00004005000060F3000689E295__00000178B8B10551-0000017C9F5597E1\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500FA2AD30000004000-000000067F0000400500FA2AD30000030000__0000010D77B487A0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C00009BF728-000000067F00004005016EA00C0000A575C7__00000196C9018F59-0000019A2EAFE7A9\":{\"file_size\":268451840,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060F30004374000-000000067F00004005000060F300043B0000__0000012E77D3BF00\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300051F0000-000000067F00004005000060F300051F4000__0000014EDD256548\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30006B22072-000000067F00004005000060F30006B4B0BB__0000017C9F5597E1-0000018022640391\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F3000328FA4E-000000067F00004005000060F50100000000__000000E4D847F4E0\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C000000FEA0-000000067F00004005016EA00C000001FD3E__0000018624969469-000001880F984A29\":{\"file_size\":268451840,\"generation\":11,\"shard\":\"0008\"},\"000000067F0000400500EB4A48000019F4DD-030000000000000000000000000000000002__000000F6661C9241-000000F901689359\":{\"file_size\":59498496,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C00003EC000-000000067F00004005016EA00C00003F8000__000001936E73D028\":{\"file_size\":134422528,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005016EA00C000073C000-000000067F00004005016EA00C000074F43B__000001936E73D028\":{\"file_size\":134422528,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060F30003542BFF-000000067F00004005000060F50100000000__000000EFDE07FFD8\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70001771169-000000067F00004005000060F80100000000__000001398B56A519-0000013C9C0E3339\":{\"file_size\":263454720,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB4300003B27DA-030000000000000000000000000000000002__0000008DDCD70B68\":{\"file_size\":139264,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F3000542AFB0-000000067F00004005000060F30005474062__0000015304A396B9-0000015670D6AFD9\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F7000057C94F-000000067F00004005000060F80100000000__00000054161C34B8\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300055861F2-000000067F00004005000060F30100000000__0000015304A396B9-0000015670D6AFD9\":{\"file_size\":127393792,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30001D79136-000000067F00004005000060F30100000000__0000008DBE2855F9-000000923719A971\":{\"file_size\":227958784,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F10000218000-000000067F00004005000060F1000021C000__0000002427BD8BD0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C0001CD4000-000000067F00004005016EA00C0001CE0000__000001BCB572A4E0\":{\"file_size\":134422528,\"generation\":17,\"shard\":\"0008\"},\"000000067F00004005000060F300017EC000-000000067F00004005000060F30001886B2A__00000075E5D2A930\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30001188000-000000067F00004005000060F300011D1111__0000005413AB3641-00000057593D8169\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0000ECC000-000000067F00004005000060FB0000F050F2__00000047F1F2B800\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300018C0000-000000067F00004005000060F300018E0FE6__00000075CC373F31-00000079F2A2F311\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C00006E4000-000000067F00004005016EA00C0000738000__000001936E73D028\":{\"file_size\":134422528,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060F30002790000-000000067F00004005000060F30002794000__000000BAC0041E18\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500F3A25C00001B850B-000000067F0000400500F56D510100000000__0000011B688FEDC8\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F100001F8000-000000067F00004005000060F100001FC000__0000002427BD8BD0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70000810000-000000067F00004005000060F80100000000__00000079F2A2F311-0000007E3A9BFD29\":{\"file_size\":263454720,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F100006CBF87-000000067F00004005000060F20100000000__000000A5A3F27398\":{\"file_size\":15851520,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500F7D2DD0100000000-000000067F0000400500F8E3A50000014000__0000010D77B487A0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F700010AABC7-000000067F00004005000060F80100000000__000000EFDE07FFD8\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30003B80000-000000067F00004005000060F30003B84000__000001180B3FF408\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB430000078000-000000067F000040050081DB4300000AA080__00000075CC373F31-00000079F2A2F311\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30002618000-000000067F00004005000060F30002680F9D__000000B2B5C4E8F9-000000B768469051\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30002A48000-000000067F00004005000060F30002A4C000__000000C483D0D6B8\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70001994000-000000067F00004005000060F700019E8000__0000014EDD256548\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0000B6168A-000000067F00004005000060FB0000B6A1D0__0000003579F03331-0000003959DA2DE9\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB000147A0EC-000000067F00004005000060FB000148AC30__000000601F43CF09-000000636DE92159\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500EE16BC0000060000-000000067F0000400500EE16BC0000064000__000000F91FE84F08\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30003458D42-000000067F00004005000060F30003481DDB__000000E7C2F1B249-000000EBC9213D59\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30006E30000-000000067F00004005000060F30006E34000__000001848D082B20\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F700017F8000-000000067F00004005000060F700017FC000__000001444EB7FC10\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30004C50000-000000067F00004005000060F30004C54000__000001444EB7FC10\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70001720000-000000067F00004005000060F80100000000__00000139CF156B58\":{\"file_size\":63463424,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB430000A8E15E-000000067F000040050081DB430000A98000__000000AFD23C27B9-000000B2B5C4E8F9\":{\"file_size\":265404416,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30004BAE526-000000067F00004005000060F30004BE7584__0000013C9C0E3339-0000013FEFA7D709\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C0001ADF97B-000000067F00004005016EA00C0001B0FD2A__000001B6FFE46BC9-000001BA93C39481\":{\"file_size\":268451840,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060F60000014000-000000067F00004005000060F60100000000__0000003D2AB09B68\":{\"file_size\":83329024,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0000C1C000-000000067F00004005000060FB0000C70000__0000003D2AB09B68\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30005240000-000000067F00004005000060F30005244000__0000014EDD256548\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB43000077C000-000000067F000040050081DB430000790000__000000A5A3F27398\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30006D60000-000000067F00004005000060F30006D64000__000001848D082B20\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30004C54000-000000067F00004005000060F30004C60000__000001444EB7FC10\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000000000000000001-000000067F0000400500000A690000000002__00000186146441F1-0000018624969469\":{\"file_size\":57344,\"generation\":6,\"shard\":\"0008\"},\"000000067F00004005000060F30005688000-000000067F00004005000060F3000568C000__00000159B010F6C0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30004370000-000000067F00004005000060F30004374000__0000012E77D3BF00\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300051F4000-000000067F00004005000060F30005210000__0000014EDD256548\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30004DD8000-000000067F00004005000060F30004DDC000__000001444EB7FC10\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500C782E400001AFD31-000000067F0000400500C782E400001B7C41__000000D01F399709-000000D31E48D7C9\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30000BB103B-000000067F00004005000060F60000014C3A__0000003579F03331-0000003959DA2DE9\":{\"file_size\":268460032,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500D19D030100000000-000000067F0000400500D69D790000024000__000000EFDE07FFD8\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB43000028B253-030000000000000000000000000000000002__0000008196C976A1-0000008625CF2891\":{\"file_size\":151224320,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30004DD8000-000000067F00004005000060F30004E40FFC__000001440D3D0C69-0000014784964B91\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005010F44EB0100000000-000000067F00004005010F57CB000000C000__00000126C3C69FC0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30003BCC000-000000067F00004005000060F30003C08000__000001180B3FF408\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30005B80000-000000067F00004005000060F30005B89170__0000016143292911-00000164DEE06671\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C000135FCAD-000000067F00004005016EA00C000144FB4E__000001A931C135B1-000001AC25760149\":{\"file_size\":268451840,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005010660F500000B0000-000000067F00004005010660F500000B4000__000001180B3FF408\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30000D31030-000000067F00004005000060F30100000000__0000003D03FCCDB9-000000417D21ACF9\":{\"file_size\":233791488,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30002C18FAE-000000067F00004005000060F30002C71F27__000000C824C09619-000000CC13D2E549\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500EB4A48000041FB53-000000067F0000400500EB4A480000447A64__000000FCCD5238B1-000000FF8B261599\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500EE16BC0000048000-000000067F0000400500EE16BC000004C000__000000F91FE84F08\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB00009D0000-000000067F00004005000060FB00009D4000__000000321AA80270\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F100004365FE-000000067F00004005000060F20100000000__00000047F1F2B800\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30006BAD108-000000067F00004005000060F30006C0E146__0000017C9F5597E1-0000018022640391\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300006B4000-000000067F00004005000060F300006E0000__0000002427BD8BD0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F3000327C000-000000067F00004005000060F3000328FA4E__000000E4D847F4E0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30003B94000-000000067F00004005000060F30003BC8000__000001180B3FF408\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30003CB8FCF-000000067F00004005000060F30003CCA0B9__00000117EDA82C11-0000011B632CC319\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30003EA902F-000000067F00004005000060F30003F72201__0000011F1A40FA69-00000122A7BB7B29\":{\"file_size\":268460032,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30004C64000-000000067F00004005000060F30004C80000__000001444EB7FC10\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB430000194000-000000067F000040050081DB4300001C8000__00000081AA3C40F0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB01FFFFFFFF-000000067F00004005000060FB0300000000__0000018613A0DEA9-00000186146441F1\":{\"file_size\":73728,\"generation\":5,\"shard\":\"0008\"},\"000000067F00004005000060F300038B5F5B-000000067F00004005000060F300038FF04F__0000010779A7F551-0000010A5E65DF39\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB4300001C8000-000000067F000040050081DB4300001CC000__00000081AA3C40F0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500C782E40000137F10-000000067F0000400500C782E40000177E20__000000D01F399709-000000D31E48D7C9\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB000139C000-000000067F00004005000060FB00013B8000__000000603CA8F2F0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500EB4A480000447A64-000000067F0000400500EB4A480100000000__000000FCCD5238B1-000000FF8B261599\":{\"file_size\":40550400,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70000418000-000000067F00004005000060F700004405CF__0000003D03FCCDB9-000000417D21ACF9\":{\"file_size\":268460032,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB430000728000-000000067F000040050081DB43000072C000__000000A5A3F27398\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300014B0F7B-000000067F00004005000060F30100000000__000000601F43CF09-000000636DE92159\":{\"file_size\":83951616,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30005F3303F-000000067F00004005000060F30005FA40AD__0000016834A3FC91-0000016B49A934C1\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300012442A9-000000067F00004005000060F3000129D29A__00000057593D8169-0000005C01565329\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB4300010B14AB-000000067F000040050081DB430100000000__000000D037B2DBD0\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C00014CF88D-000000067F00004005016EA00C00014D7727__000001A931C135B1-000001AC25760149\":{\"file_size\":268451840,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060F30006A0B44C-000000067F00004005000060F30006A7C566__00000178B8B10551-0000017C9F5597E1\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F1000062EE46-000000067F00004005000060F20100000000__000000698F2C3A38\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C0001CE0000-000000067F00004005016EA00C0001CE4000__000001BCB572A4E0\":{\"file_size\":134422528,\"generation\":17,\"shard\":\"0008\"},\"000000067F00004005000060F30000250000-000000067F00004005000060F30000254000__0000000D80565628\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300050E8000-000000067F00004005000060F300050EC000__0000014EDD256548\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F3000259F4A3-000000067F00004005000060F30100000000__000000AFD23C27B9-000000B2B5C4E8F9\":{\"file_size\":44433408,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB430000A640EA-000000067F000040050081DB430000A8E15E__000000AFD23C27B9-000000B2B5C4E8F9\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30003050000-000000067F00004005000060F30003061089__000000DBBFA87AE1-000000DE2A8E4FC9\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500F3A25C0000158000-000000067F0000400500F3A25C000016A065__0000010779A7F551-0000010A5E65DF39\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB4300010A4000-000000067F000040050081DB4300010B14AB__000000D037B2DBD0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500EE16BC00001E0000-000000067F0000400500EE16BC00001E4000__00000104BD37F348\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300055B8000-000000067F00004005000060F300055BC000__00000159B010F6C0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C0000CE4000-000000067F00004005016EA00C0000D30000__0000019E7001E460\":{\"file_size\":134422528,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060F30003640000-000000067F00004005000060F30003644000__000000F91FE84F08\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500EB4A48000014F7AC-000000067F0000400500EB4A4800001876BD__000000F6661C9241-000000F901689359\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C0001CD338E-000000067F00004005016EA00C0001CE79E0__000001BA93C39481-000001BCB572A4E1\":{\"file_size\":268451840,\"generation\":17,\"shard\":\"0008\"},\"000000067F00004005000060FB0001530B44-000000067F00004005000060FB0001541688__000000636DE92159-000000663565F8C9\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300031D516C-000000067F00004005000060F30100000000__000000DE2A8E4FC9-000000E1CD2FBBE9\":{\"file_size\":137863168,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C00019C7A6A-000000067F00004005016EA00C00019F7907__000001B3E1B95181-000001B6FFE46BC9\":{\"file_size\":268451840,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005016EA00C0000E7F7A7-000000067F00004005016EA00C0000F3F647__0000019E2C5DCEE1-000001A1DD8B4481\":{\"file_size\":268451840,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060F300032C0000-000000067F00004005000060F300032F1113__000000E4C63CFA21-000000E7C2F1B249\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C00006E0000-000000067F00004005016EA00C00006E4000__000001936E73D028\":{\"file_size\":134422528,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060F7000019EA78-000000067F00004005000060F80100000000__0000001737D88379-0000001B59EEB909\":{\"file_size\":50946048,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C0001B4FBC9-000000067F00004005016EA00C0001BBFA66__000001B6FFE46BC9-000001BA93C39481\":{\"file_size\":268451840,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060FB0001660000-000000067F00004005000060FB0001680B45__000000698AF6E809-0000006DDB29D589\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30002BAA1DD-000000067F00004005000060F30100000000__000000C462B3C2A9-000000C824C09619\":{\"file_size\":203554816,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300049B26A8-000000067F00004005000060F300049CB712__000001398B56A519-0000013C9C0E3339\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70000CCB5CD-000000067F00004005000060F70000CDBB9C__000000BD9A7C56D9-000000C0C9EB88E1\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB430000EEA075-000000067F000040050081DB430000F0C0E9__000000C462B3C2A9-000000C824C09619\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300003E0000-000000067F00004005000060F300003E8FBC__000000114A805939-00000013FB921C81\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30006C9C000-000000067F00004005000060F30006CA0000__000001848D082B20\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70000C7C000-000000067F00004005000060F70000C8CD0C__000000BAC0041E18\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0001148000-000000067F00004005000060FB000114C000__00000054161C34B8\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70001232ACF-000000067F00004005000060F80100000000__0000010D77B487A0\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70000FE8000-000000067F00004005000060F700010105DB__000000E4C63CFA21-000000E7C2F1B249\":{\"file_size\":268460032,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500EB4A480000355928-000000067F0000400500EB4A480100000000__000000FCD84FE628\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F700003FE341-000000067F00004005000060F80100000000__0000003D2AB09B68\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F3000244D189-000000067F00004005000060F30100000000__000000A9EB8C4489-000000ACA44C8E99\":{\"file_size\":212566016,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F700003B85C7-000000067F00004005000060F80100000000__0000003579F03331-0000003959DA2DE9\":{\"file_size\":208945152,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F100005A2B80-000000067F00004005000060F20100000000__000000603CA8F2F0\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB000070C000-000000067F00004005000060FB0000718000__0000002427BD8BD0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB01FFFFFFFF-000000067F00004005000060FB0300000000__00000186146441F1-0000018624969469\":{\"file_size\":24576,\"generation\":6,\"shard\":\"0008\"},\"000000067F00004005000060FB000180C000-000000067F00004005000060FB0001838000__00000075E5D2A930\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500EE16BC0000044000-000000067F0000400500EE16BC0000048000__000000F91FE84F08\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F10100000000-000000067F00004005000060F10300000000__000000A583FBFB91-000000A9EB8C4489\":{\"file_size\":483328,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30004EA41A5-000000067F00004005000060F30004EC52E9__000001440D3D0C69-0000014784964B91\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30003AB9907-000000067F00004005000060F30003AF28CB__0000010FB1BE19B9-00000113456156F1\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0000974000-000000067F00004005000060FB00009D0000__000000321AA80270\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300038720A2-000000067F00004005000060F300038A3082__000001048B25A8E9-0000010779A7F551\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB430000452BA1-000000067F000040050081DB4300004C4C1E__000000923719A971-00000096262826C9\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300017AA0CE-000000067F00004005000060F30100000000__0000006DDB29D589-000000722F474369\":{\"file_size\":202719232,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB430000504000-000000067F000040050081DB430000560000__0000009A24DF6768\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30004B5431C-000000067F00004005000060F30004B654F6__0000013C9C0E3339-0000013FEFA7D709\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30000C20000-000000067F00004005000060F30000C24000__0000003D2AB09B68\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300028920E4-000000067F00004005000060F30100000000__000000BAB1E56C91-000000BD9A7C56D9\":{\"file_size\":200351744,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB4300004C4C1E-030000000000000000000000000000000002__000000923719A971-00000096262826C9\":{\"file_size\":192356352,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB430000190000-000000067F000040050081DB430000194000__00000081AA3C40F0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB430000E88000-000000067F000040050081DB430000E8C000__000000C483D0D6B8\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C0000738000-000000067F00004005016EA00C000073C000__000001936E73D028\":{\"file_size\":134422528,\"generation\":11,\"shard\":\"0008\"},\"000000067F000040050081DB430000578EE6-000000067F000040050081DB43000058AF5E__0000009A1ABDE921-0000009DF02C1241\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30001C38000-000000067F00004005000060F30001C3C000__0000008DDCD70B68\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB430000B7C0EA-030000000000000000000000000000000002__000000B2B5C4E8F9-000000B768469051\":{\"file_size\":133464064,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F3000625B8F0-000000067F00004005000060F30100000000__0000016B49A934C1-0000016E1FBB7B99\":{\"file_size\":139640832,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB000109C000-000000067F00004005000060FB0001110000__00000054161C34B8\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__000001BCB572DFF9-000001BCB5730259\":{\"file_size\":24576,\"generation\":41,\"shard\":\"0008\"},\"000000067F00004005000060FB0000AA8000-000000067F00004005000060FB0000AD0B45__0000003203FB5749-0000003579F03331\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300043F8000-000000067F00004005000060F300043FC000__0000012E77D3BF00\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500EB4A4800003C7C42-000000067F0000400500EB4A48000041FB53__000000FCCD5238B1-000000FF8B261599\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30005BA213F-000000067F00004005000060F30005BDB15B__0000016143292911-00000164DEE06671\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300063FE10E-000000067F00004005000060F30100000000__0000016E1FBB7B99-000001715E483C79\":{\"file_size\":111067136,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30000F91FFF-000000067F00004005000060F30000F9B026__00000047E31D98D1-0000004C49155071\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30003650000-000000067F00004005000060F30003654000__000000F91FE84F08\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300050A412B-000000067F00004005000060F300050B5199__0000014784964B91-0000014B000D1821\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C0001D78000-000000067F00004005016EA00C0001D7C000__000001BCB572A4E0\":{\"file_size\":134422528,\"generation\":17,\"shard\":\"0008\"},\"000000067F00004005016EA00C0001244000-000000067F00004005016EA00C0001298000__000001A95031E5B8\":{\"file_size\":134422528,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060F100001FC000-000000067F00004005000060F10000200000__0000002427BD8BD0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C0000CA0000-000000067F00004005016EA00C0000CA4000__0000019E7001E460\":{\"file_size\":134422528,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060F3000498DC49-000000067F00004005000060F50100000000__00000139CF156B58\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F60000036EA0-000000067F00004005000060F60100000000__0000009A24DF6768\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0000928B45-000000067F00004005000060FB000097168A__00000028C365FBE1-0000002D2A8E0B81\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30006854000-000000067F00004005000060F30006858000__00000178C5D5D3A8\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050109FFA2000000C3F5-030000000000000000000000000000000002__00000117EDA82C11-0000011B632CC319\":{\"file_size\":226066432,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30003A6D1B3-000000067F00004005000060F30100000000__0000010D5DC42EF9-0000010FB1BE19B9\":{\"file_size\":117620736,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30002D2C000-000000067F00004005000060F30002D80000__000000D037B2DBD0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30003A31FB6-000000067F00004005000060F30003A3B020__0000010D5DC42EF9-0000010FB1BE19B9\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C000160723E-000000067F00004005016EA00C00016570D9__000001AC25760149-000001AFC313C819\":{\"file_size\":268451840,\"generation\":11,\"shard\":\"0008\"},\"000000067F0000400500FB3D310000018000-000000067F0000400500FB3D31000001C000__0000010D77B487A0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70001708000-000000067F00004005000060F7000170C000__00000139CF156B58\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F3000283C3E7-000000067F00004005000060F50100000000__000000BAC0041E18\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB00018F0000-000000067F00004005000060FB0100000000__00000075CC373F31-00000079F2A2F311\":{\"file_size\":268959744,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0000EC8000-000000067F00004005000060FB0000ECC000__00000047F1F2B800\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C0000F9C000-000000067F00004005016EA00C0000FF0000__000001A95031E5B8\":{\"file_size\":134422528,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060F30002680F9D-000000067F00004005000060F3000274A080__000000B2B5C4E8F9-000000B768469051\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F3000679C000-000000067F00004005000060F300067A0000__00000178C5D5D3A8\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F3000428313F-000000067F00004005000060F300042CC1BD__0000012694E36301-0000012A3F140591\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00000FFFFFFFF-030000000000000000000000000000000002__00000186146441F1-0000018624969469\":{\"file_size\":24576,\"generation\":6,\"shard\":\"0008\"},\"000000067F00004005000060FB00017D8000-000000067F00004005000060FB00017DC000__00000075E5D2A930\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F700017FC000-000000067F00004005000060F70001828000__000001444EB7FC10\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30002FD317C-000000067F00004005000060F30002FF427D__000000D74E29AAD1-000000DBBFA87AE1\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0001701588-000000067F00004005000060FB00017120CE__0000006DDB29D589-000000722F474369\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500000A3000FFFFFFFF-000000067F0000400500000A690000000002__000001BA93C39481-000001BCB572A4E1\":{\"file_size\":40960,\"generation\":17,\"shard\":\"0008\"},\"000000067F00004005000060FB0000638B45-030000000000000000000000000000000002__0000001B59EEB909-0000001FFBC01501\":{\"file_size\":252010496,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB430000394000-000000067F000040050081DB4300003A8000__0000008DDCD70B68\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30001CF0197-000000067F00004005000060F50100000000__0000008DDCD70B68\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500EB4A4800000DFB51-000000067F0000400500EB4A4800000E7A62__000000F309FCDD19-000000F6661C9241\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F7000014C000-000000067F00004005000060F70000180000__000000174479FC18\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30005948000-000000067F00004005000060F300059790CD__0000015DD1D3C809-0000016143292911\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30000853115-000000067F00004005000060F60100000000__00000023FEF9F321-00000028C365FBE1\":{\"file_size\":176136192,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30004884000-000000067F00004005000060F30004888000__00000139CF156B58\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F3000513C000-000000067F00004005000060F30005160000__0000014EDD256548\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500F3A25C000017C000-000000067F0000400500F3A25C00001B850B__0000010D77B487A0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30006382F14-000000067F00004005000060F3000638C06D__0000016E1FBB7B99-000001715E483C79\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500E3A2A10000017F02-000000067F0000400500E3A2A100000B7E04__000000E7C2F1B249-000000EBC9213D59\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0001000B44-000000067F00004005000060FB0001009688__0000004C49155071-0000004F31878919\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500D69D790100000000-000000067F0000400500DBCED50000024000__000000E4D847F4E0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB4300010A0000-000000067F000040050081DB4300010A4000__000000D037B2DBD0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0000310000-000000067F00004005000060FB0000348B45__0000000D55A212C9-000000114A805939\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F60000060038-000000067F00004005000060F60100000000__000000F91FE84F08\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30001CE0000-000000067F00004005000060F30001CE4000__0000008DDCD70B68\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB4300000AA080-000000067F000040050081DB4300000D40FF__00000075CC373F31-00000079F2A2F311\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0000551689-030000000000000000000000000000000002__0000001737D88379-0000001B59EEB909\":{\"file_size\":227418112,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0000D90000-000000067F00004005000060FB0100000000__0000003D03FCCDB9-000000417D21ACF9\":{\"file_size\":272769024,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300059CC403-000000067F00004005000060F300059F53C6__0000015DD1D3C809-0000016143292911\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30001F2C000-000000067F00004005000060F30001F30000__0000009A24DF6768\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0000014000-000000067F00004005000060FB0000084772__000000027AF9D7D0\":{\"file_size\":134422528,\"generation\":1,\"shard\":\"0008\"},\"000000067F00004005000060F30004B654F6-000000067F00004005000060F30004BAE526__0000013C9C0E3339-0000013FEFA7D709\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30002450000-000000067F00004005000060F30002454000__000000AFE87558B0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30003A0F066-000000067F00004005000060F50100000000__0000010D77B487A0\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F60000032EBE-000000067F00004005000060F60100000000__0000008DDCD70B68\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB00001D8000-000000067F00004005000060FB00001DC000__0000000D80565628\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C0000670000-000000067F00004005016EA00C0000674000__000001936E73D028\":{\"file_size\":134422528,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005016EA00C0001344000-000000067F00004005016EA00C0001358000__000001A95031E5B8\":{\"file_size\":134422528,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005016EA00C0000D30000-000000067F00004005016EA00C0000D34000__0000019E7001E460\":{\"file_size\":134422528,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005016EA00C000012FE9A-000000067F00004005016EA00C00001F7D38__000001880F984A29-0000018C496B6DB1\":{\"file_size\":268451840,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060F70000BF0000-000000067F00004005000060F70100000000__000000B2B5C4E8F9-000000B768469051\":{\"file_size\":273809408,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300005A0000-000000067F00004005000060F3000067114B__0000001B59EEB909-0000001FFBC01501\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500EB4A48000021C000-000000067F0000400500EB4A480000290000__000000FCD84FE628\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C0000F3C000-000000067F00004005016EA00C0000F58000__000001A95031E5B8\":{\"file_size\":134422528,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005016EA00C000074F43B-030000000000000000000000000000000002__000001936E73D028\":{\"file_size\":139264,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005010F57CB000000C000-000000067F00004005010F99A50100000000__00000126C3C69FC0\":{\"file_size\":22978560,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F700017E1391-000000067F00004005000060F80100000000__0000013C9C0E3339-0000013FEFA7D709\":{\"file_size\":232677376,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C0001CC74D7-000000067F00004005016EA00C0001CD7376__000001B6FFE46BC9-000001BA93C39481\":{\"file_size\":268451840,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060F700005C85CE-000000067F00004005000060F700005E8B9D__00000057593D8169-0000005C01565329\":{\"file_size\":268460032,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30003FCD352-000000067F00004005000060F30100000000__0000011F1A40FA69-00000122A7BB7B29\":{\"file_size\":124788736,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500C782E400002A5E4B-000000067F0000400500C782E400002CDD5C__000000D31E48D7C9-000000D74E29AAD1\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F700018871D6-000000067F00004005000060F80100000000__000001444EB7FC10\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30003D252C8-000000067F00004005000060F30100000000__00000117EDA82C11-0000011B632CC319\":{\"file_size\":205963264,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0001408A62-000000067F00004005000060FB00014195A7__000000601F43CF09-000000636DE92159\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500C782E400001B7C41-000000067F0000400500C782E400001C7B51__000000D01F399709-000000D31E48D7C9\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0000110000-000000067F00004005000060FB0100000000__000000044854EBD1-00000008B6B51879\":{\"file_size\":272613376,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300004E8000-000000067F00004005000060F60100000000__0000001737D88379-0000001B59EEB909\":{\"file_size\":260579328,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30006DF4000-000000067F00004005000060F30006E30000__000001848D082B20\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB430000C84000-030000000000000000000000000000000002__000000BAC0041E18\":{\"file_size\":59998208,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30002B88FF2-000000067F00004005000060F30002BAA1DD__000000C462B3C2A9-000000C824C09619\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0000434000-000000067F00004005000060FB00004A0000__000000174479FC18\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30004DA8000-000000067F00004005000060F30004DAC000__000001444EB7FC10\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB4300004E0000-000000067F000040050081DB4300004E4000__0000009A24DF6768\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500EE16BC00001E4000-000000067F0000400500EE16BC0000201716__00000104BD37F348\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB430000C440EA-000000067F000040050081DB430000C5E15B__000000B768469051-000000BAB1E56C91\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500D69D7900000BDAF5-000000067F0000400500D69D790100000000__000000EFDE07FFD8\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30002A9C000-000000067F00004005000060F30002AEED02__000000C483D0D6B8\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30004DAC000-000000067F00004005000060F30004DD8000__000001444EB7FC10\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70000B94000-000000067F00004005000060F70000B98000__000000AFE87558B0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30002454000-000000067F00004005000060F30002460000__000000AFE87558B0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F100001059CB-000000067F00004005000060F10000125BF2__000000114A805939-00000013FB921C81\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C0000D362CA-000000067F00004005016EA00C0000DB7D33__0000019E2C5DCEE1-000001A1DD8B4481\":{\"file_size\":268451840,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060F30001C0A0A3-000000067F00004005000060F30100000000__0000008625CF2891-00000089F4693119\":{\"file_size\":203063296,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300066F0000-000000067F00004005000060F300066F4000__00000178C5D5D3A8\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70001414000-000000067F00004005000060F70001428000__00000126C3C69FC0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300014CC16D-000000067F00004005000060F300014D5280__000000636DE92159-000000663565F8C9\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB000172AC12-030000000000000000000000000000000002__0000006DDB29D589-000000722F474369\":{\"file_size\":186875904,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB430000E4C000-000000067F000040050081DB430000E88000__000000C483D0D6B8\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300063A50CD-000000067F00004005000060F300063FE10E__0000016E1FBB7B99-000001715E483C79\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30005419E9C-000000067F00004005000060F3000542AFB0__0000015304A396B9-0000015670D6AFD9\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500EE16BC000014158C-030000000000000000000000000000000002__000000F901689359-000000FCCD5238B1\":{\"file_size\":67854336,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C00015FF3A0-000000067F00004005016EA00C000160723E__000001AC25760149-000001AFC313C819\":{\"file_size\":268451840,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005016EA00C00008E760F-000000067F00004005016EA00C00009274AB__000001935283F9B9-00000196C9018F59\":{\"file_size\":268451840,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060F70000B98000-000000067F00004005000060F70000B9C000__000000AFE87558B0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB00004A4000-000000067F00004005000060FB00004E1FF6__000000174479FC18\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30006670000-000000067F00004005000060F30006674000__00000178C5D5D3A8\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70000185EE9-000000067F00004005000060F7000018E4B6__0000001737D88379-0000001B59EEB909\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500D19D030000067CA9-030000000000000000000000000000000002__000000DBBFA87AE1-000000DE2A8E4FC9\":{\"file_size\":29319168,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500FF2A51000000BFFB-030000000000000000000000000000000002__0000010D77B487A0\":{\"file_size\":139264,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30004A048A8-000000067F00004005000060F30004A1D870__000001398B56A519-0000013C9C0E3339\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300004BC000-000000067F00004005000060F300004C6B83__000000174479FC18\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30005290FC9-000000067F00004005000060F3000533205E__0000014EC58A4A79-0000015304A396B9\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300031130BC-000000067F00004005000060F300031C40D1__000000DE2A8E4FC9-000000E1CD2FBBE9\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500D19D030000047EE2-000000067F0000400500D19D03000004FDC6__000000DBBFA87AE1-000000DE2A8E4FC9\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30002A44000-000000067F00004005000060F30002A48000__000000C483D0D6B8\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30003DAE2DC-000000067F00004005000060F30003DD734C__0000011B632CC319-0000011F1A40FA69\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500F8E3A50000014000-000000067F0000400500F8E3A5000004A25C__0000010D77B487A0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F100002F03E9-000000067F00004005000060F20100000000__000000321AA80270\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70001138000-000000067F00004005000060F80100000000__000000FCCD5238B1-000000FF8B261599\":{\"file_size\":72695808,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300056E4000-000000067F00004005000060F50100000000__00000159B010F6C0\":{\"file_size\":13393920,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70000A7C000-000000067F00004005000060F70000ABD9C4__000000A5A3F27398\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0000CC6E51-030000000000000000000000000000000002__0000003D2AB09B68\":{\"file_size\":147456,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F60000091EFF-000000067F00004005000060F60100000000__0000014EDD256548\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500EB4A48000008FC41-000000067F0000400500EB4A4800000DFB51__000000F309FCDD19-000000F6661C9241\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30001F363B4-000000067F00004005000060F30001F574A6__0000009A1ABDE921-0000009DF02C1241\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C0001CD0000-000000067F00004005016EA00C0001CD4000__000001BCB572A4E0\":{\"file_size\":134422528,\"generation\":17,\"shard\":\"0008\"},\"000000067F00004005000060F300059B324D-000000067F00004005000060F300059CC403__0000015DD1D3C809-0000016143292911\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30002530000-000000067F00004005000060F30002534000__000000AFE87558B0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F6000004B633-000000067F00004005000060F60100000000__000000C483D0D6B8\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F700011E0000-000000067F00004005000060F80100000000__0000010779A7F551-0000010A5E65DF39\":{\"file_size\":262922240,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30006690000-000000067F00004005000060F30006694000__00000178C5D5D3A8\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70000100E18-000000067F00004005000060F700001213F2__0000000D55A212C9-000000114A805939\":{\"file_size\":268460032,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500FF2A510000004000-000000067F0000400500FF2A51000000BFFB__0000010D77B487A0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0000EB8000-000000067F00004005000060FB0000EBC000__00000047F1F2B800\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C0000674000-000000067F00004005016EA00C00006B0000__000001936E73D028\":{\"file_size\":134422528,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060F70000EF85D6-000000067F00004005000060F80100000000__000000D74E29AAD1-000000DBBFA87AE1\":{\"file_size\":262897664,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F700005E8B9D-000000067F00004005000060F700005F9158__00000057593D8169-0000005C01565329\":{\"file_size\":268460032,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30004E40FFC-000000067F00004005000060F30004E7A062__000001440D3D0C69-0000014784964B91\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500EB4A480000037E20-000000067F0000400500EB4A480000057D31__000000F309FCDD19-000000F6661C9241\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400501101C0901FFFFFFFF-030000000000000000000000000000000002__0000012E71CF31F9-000001334140FC21\":{\"file_size\":65060864,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70000B10000-000000067F00004005000060F70100000000__000000A583FBFB91-000000A9EB8C4489\":{\"file_size\":272646144,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300056E104B-000000067F00004005000060F3000570A19E__00000159A7EC8CB9-0000015DD1D3C809\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300059790CD-000000067F00004005000060F300059AA115__0000015DD1D3C809-0000016143292911\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70000B54000-000000067F00004005000060F70000B90000__000000AFE87558B0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300041D9101-000000067F00004005000060F3000424A099__0000012694E36301-0000012A3F140591\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F700000E085E-000000067F00004005000060F70000100E18__0000000D55A212C9-000000114A805939\":{\"file_size\":268460032,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300051B0000-000000067F00004005000060F300051B4000__0000014EDD256548\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__000001BCB572A4E1-000001BCB572C329\":{\"file_size\":24576,\"generation\":17,\"shard\":\"0008\"},\"000000067F00004005000060F30006D30000-000000067F00004005000060F30006D34000__000001848D082B20\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500FDA1F80000020D42-000000067F0000400500FDA1F80100000000__0000010D77B487A0\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081D80C0100000000-000000067F000040050081DB430000024000__00000075E5D2A930\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F600000235B4-000000067F00004005000060F60100000000__000000603CA8F2F0\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500C782E400000A0000-000000067F0000400500C782E400000A4000__000000D037B2DBD0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30002264247-000000067F00004005000060F50100000000__000000A5A3F27398\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F3000302C2D6-000000067F00004005000060F50100000000__000000DBD29DC248\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C000129C000-000000067F00004005016EA00C0001340000__000001A95031E5B8\":{\"file_size\":134422528,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060F700016E8000-000000067F00004005000060F700016EC000__00000139CF156B58\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300023A0000-000000067F00004005000060F300023B0FF7__000000A9EB8C4489-000000ACA44C8E99\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F20100000000-000000067F00004005000060F3000000C000__000000027AF9D7D0\":{\"file_size\":134422528,\"generation\":1,\"shard\":\"0008\"},\"000000067F00004005016EA00C0000374000-000000067F00004005016EA00C00003E0000__000001936E73D028\":{\"file_size\":134422528,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060F70000368000-000000067F00004005000060F80100000000__0000003203FB5749-0000003579F03331\":{\"file_size\":263249920,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB4300006310C9-030000000000000000000000000000000002__0000009A1ABDE921-0000009DF02C1241\":{\"file_size\":208953344,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0000DC8000-000000067F00004005000060FB0000DE8B45__000000417D21ACF9-00000044B4679349\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0000530000-000000067F00004005000060FB0000538B44__0000001737D88379-0000001B59EEB909\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB430000024000-000000067F000040050081DB430000028000__00000075E5D2A930\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F3000488C000-000000067F00004005000060F30004898000__00000139CF156B58\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300044D3639-000000067F00004005000060F50100000000__0000012E77D3BF00\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005010450640000000570-000000067F0000400501046F39000000BDD2__0000010FB1BE19B9-00000113456156F1\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300021050B0-000000067F00004005000060F3000212E160__0000009DF02C1241-000000A173C00489\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F700010DD440-000000067F00004005000060F80100000000__000000F309FCDD19-000000F6661C9241\":{\"file_size\":91758592,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0000AD0B45-000000067F00004005000060FB0000AE168A__0000003203FB5749-0000003579F03331\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F7000013B18E-000000067F00004005000060F7000014B73D__000000114A805939-00000013FB921C81\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C0001938000-000000067F00004005016EA00C000193FE9D__000001B3E1B95181-000001B6FFE46BC9\":{\"file_size\":268451840,\"generation\":11,\"shard\":\"0008\"},\"000000067F0000400500C782E400000A4000-000000067F0000400500C782E4000012A71E__000000D037B2DBD0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30001A40000-000000067F00004005000060F30001A44000__00000081AA3C40F0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C00008578D4-000000067F00004005016EA00C00008CF772__000001935283F9B9-00000196C9018F59\":{\"file_size\":268451840,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060F30001CC0000-000000067F00004005000060F30001CC4000__0000008DDCD70B68\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30004D20000-000000067F00004005000060F30004D24000__000001444EB7FC10\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C00003E8000-000000067F00004005016EA00C00003EC000__000001936E73D028\":{\"file_size\":134422528,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060F300039C4000-000000067F00004005000060F300039F8000__0000010D77B487A0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30005164000-000000067F00004005000060F300051B0000__0000014EDD256548\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300039F8000-000000067F00004005000060F300039FC000__0000010D77B487A0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB4300010F46BD-000000067F000040050081DB430100000000__000000D31E48D7C9-000000D74E29AAD1\":{\"file_size\":113999872,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30002E630CF-000000067F00004005000060F30100000000__000000D31E48D7C9-000000D74E29AAD1\":{\"file_size\":171999232,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C0000ACF305-000000067F00004005016EA00C0000ADF1AB__00000196C9018F59-0000019A2EAFE7A9\":{\"file_size\":268451840,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060F30006748000-000000067F00004005000060F3000674C000__00000178C5D5D3A8\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30003810000-000000067F00004005000060F50100000000__00000104BD37F348\":{\"file_size\":11739136,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F1000021C000-000000067F00004005000060F20100000000__0000002427BD8BD0\":{\"file_size\":132448256,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C00017EC000-000000067F00004005016EA00C00018C0000__000001B3F17FE4E0\":{\"file_size\":134422528,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060F7000025DA3C-000000067F00004005000060F80100000000__0000002427BD8BD0\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB00007F0000-000000067F00004005000060FB0000860B45__00000023FEF9F321-00000028C365FBE1\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30003FF0000-000000067F00004005000060F30003FF4000__00000126C3C69FC0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0000E0AD15-000000067F00004005000060FB0000E1B859__000000417D21ACF9-00000044B4679349\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005010ADFA80000004000-000000067F00004005010F2BD40100000000__00000126C3C69FC0\":{\"file_size\":13369344,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30004898000-000000067F00004005000060F3000489C000__00000139CF156B58\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30003D2B1B0-000000067F00004005000060F30003D44283__0000011B632CC319-0000011F1A40FA69\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C0000FF4000-000000067F00004005016EA00C0001188000__000001A95031E5B8\":{\"file_size\":134422528,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005010F99A50100000000-000000067F00004005010F9F120000004000__00000126C3C69FC0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30001F34000-000000067F00004005000060F30001F38F48__0000009A24DF6768\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F700018A0000-000000067F00004005000060F700018D85CA__000001440D3D0C69-0000014784964B91\":{\"file_size\":268460032,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300029A526C-000000067F00004005000060F300029C623C__000000BD9A7C56D9-000000C0C9EB88E1\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB00017DC000-000000067F00004005000060FB0001808000__00000075E5D2A930\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500DBCED50000024000-000000067F0000400500DBCED50000028000__000000E4D847F4E0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500EE16BC0000201716-000000067F0000400500EE16C40100000000__0000012A77C1B0B0\":{\"file_size\":32768,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30006D10000-000000067F00004005000060F30006D14000__000001848D082B20\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB430001064000-000000067F000040050081DB4300010A0000__000000D037B2DBD0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500F3A25C01FFFFFFFF-000000067F0000400500F3A25C0300000000__0000011B632CC319-0000011F1A40FA69\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30001340000-000000067F00004005000060F30001344000__000000603CA8F2F0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30003E98000-000000067F00004005000060F30003EA902F__0000011F1A40FA69-00000122A7BB7B29\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30006C0E146-000000067F00004005000060F30006C8729E__0000017C9F5597E1-0000018022640391\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F600000166C4-000000067F00004005000060F60100000000__0000003D03FCCDB9-000000417D21ACF9\":{\"file_size\":54165504,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F10000180000-000000067F00004005000060F1000018821A__0000001737D88379-0000001B59EEB909\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C000193FE9D-000000067F00004005016EA00C0001967D34__000001B3E1B95181-000001B6FFE46BC9\":{\"file_size\":268451840,\"generation\":11,\"shard\":\"0008\"},\"000000067F000040050081DB43000076C000-000000067F000040050081DB430000778000__000000A5A3F27398\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300050321C0-000000067F00004005000060F30005063187__0000014784964B91-0000014B000D1821\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500DBCED500000D4000-000000067F0000400500DBCED500000F0000__000000E4D847F4E0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300004B8000-000000067F00004005000060F300004BC000__000000174479FC18\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB000022C000-000000067F00004005000060FB0000280000__0000000D80565628\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0000DF968A-000000067F00004005000060FB0000E021D0__000000417D21ACF9-00000044B4679349\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0000228000-000000067F00004005000060FB000022C000__0000000D80565628\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB00015D8000-000000067F00004005000060FB00015DC000__000000698F2C3A38\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30005B89170-000000067F00004005000060F30005BA213F__0000016143292911-00000164DEE06671\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300043B0000-000000067F00004005000060F300043B4000__0000012E77D3BF00\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB4300004F8000-000000067F000040050081DB4300004FC000__0000009A24DF6768\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30006860000-000000067F00004005000060F30006864000__00000178C5D5D3A8\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30000ADA0D0-000000067F00004005000060F30000B0300C__0000003203FB5749-0000003579F03331\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500FF2A510000000000-000000067F000040050100D04D000004369C__0000010D5DC42EF9-0000010FB1BE19B9\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500F3A25C00000BB439-030000000000000000000000000000000002__00000104BD37F348\":{\"file_size\":139264,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C0001C078FA-000000067F00004005016EA00C0001C0F79A__000001B6FFE46BC9-000001BA93C39481\":{\"file_size\":268451840,\"generation\":11,\"shard\":\"0008\"},\"000000067F000040050081DB430000B4A075-000000067F000040050081DB430000B7C0EA__000000B2B5C4E8F9-000000B768469051\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F3000117C10C-000000067F00004005000060F50100000000__00000054161C34B8\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C0000E47BD2-000000067F00004005016EA00C0000E67A6E__0000019E2C5DCEE1-000001A1DD8B4481\":{\"file_size\":268451840,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060F30005D23BB5-000000067F00004005000060F50100000000__00000164EA9EC9A8\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F3000336D193-000000067F00004005000060F3000337DCF3__000000E4C63CFA21-000000E7C2F1B249\":{\"file_size\":259473408,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300001F0000-000000067F00004005000060F300001F4000__0000000D80565628\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0000084772-030000000000000000000000000000000002__000000027AF9D7D0\":{\"file_size\":147456,\"generation\":1,\"shard\":\"0008\"},\"000000067F00004005016EA00C0001CE79E0-000000067F00004005016EA00C0001D1F87B__000001BA93C39481-000001BCB572A4E1\":{\"file_size\":268451840,\"generation\":17,\"shard\":\"0008\"},\"000000067F0000400500EB4A4800FFFFFFFF-000000067F0000400500EB4A480100000000__000000FF8B261599-000001048B25A8E9\":{\"file_size\":1318912,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70000488000-000000067F00004005000060F7000048C000__00000047F1F2B800\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C0000ADF1AB-000000067F00004005016EA00C0100000000__00000196C9018F59-0000019A2EAFE7A9\":{\"file_size\":282132480,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060FB000071C000-000000067F00004005000060FB0000793506__0000002427BD8BD0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30006850000-000000067F00004005000060F30006854000__00000178C5D5D3A8\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB430000390000-000000067F000040050081DB430000394000__0000008DDCD70B68\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F3000020C000-000000067F00004005000060F30000250000__0000000D80565628\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0001398000-000000067F00004005000060FB000139C000__000000603CA8F2F0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30003648000-000000067F00004005000060F3000364C000__000000F91FE84F08\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500C782E400001C7B51-000000067F0000400500C782E4000023FA62__000000D01F399709-000000D31E48D7C9\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C0001788000-000000067F00004005016EA00C000178C000__000001B3F17FE4E0\":{\"file_size\":134422528,\"generation\":11,\"shard\":\"0008\"},\"000000067F000040050081DB430000C3A075-000000067F000040050081DB430000C440EA__000000B768469051-000000BAB1E56C91\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300036FE561-000000067F00004005000060F300038075AF__000000FF8B261599-000001048B25A8E9\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500D19D03000004FDC6-000000067F0000400500D19D030000067CA9__000000DBBFA87AE1-000000DE2A8E4FC9\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0000C00000-000000067F00004005000060FB0000C04000__0000003D2AB09B68\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F3000282C000-000000067F00004005000060F3000283C3E7__000000BAC0041E18\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C00006B0000-000000067F00004005016EA00C00006B4000__000001936E73D028\":{\"file_size\":134422528,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060F30001789027-000000067F00004005000060F300017AA0CE__0000006DDB29D589-000000722F474369\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30004558000-000000067F00004005000060F300045C1062__0000012E71CF31F9-000001334140FC21\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0000C08000-000000067F00004005000060FB0000C0C000__0000003D2AB09B68\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30006DCC000-000000067F00004005000060F30006DF0000__000001848D082B20\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30004B221FE-000000067F00004005000060F30004B2B250__0000013C9C0E3339-0000013FEFA7D709\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C00018C4000-000000067F00004005016EA00C00018E0000__000001B3F17FE4E0\":{\"file_size\":134422528,\"generation\":11,\"shard\":\"0008\"},\"000000067F000040050081DB430000564000-000000067F000040050081DB430000578000__0000009A24DF6768\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F3000274A080-000000067F00004005000060F30100000000__000000B2B5C4E8F9-000000B768469051\":{\"file_size\":199057408,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300046D0EA8-000000067F00004005000060F3000471200E__000001334140FC21-00000137115BE4D9\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0001114000-000000067F00004005000060FB0001120000__00000054161C34B8\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30003FEC000-000000067F00004005000060F30003FF0000__00000126C3C69FC0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F10000368000-000000067F00004005000060F10100000000__0000003959DA2DE9-0000003D03FCCDB9\":{\"file_size\":269967360,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500C782E4000012A71E-030000000000000000000000000000000002__000000D037B2DBD0\":{\"file_size\":139264,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30006C98000-000000067F00004005000060F30006C9C000__000001848D082B20\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300055BC000-000000067F00004005000060F30005610000__00000159B010F6C0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0000F050F2-030000000000000000000000000000000002__00000047F1F2B800\":{\"file_size\":147456,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30002484000-000000067F00004005000060F300024D8000__000000AFE87558B0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30003FE8000-000000067F00004005000060F30003FEC000__00000126C3C69FC0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500DBCED500000A8000-000000067F0000400500DBCED500000AC000__000000E4D847F4E0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F700006C3D76-000000067F00004005000060F80100000000__000000663565F8C9-000000698AF6E809\":{\"file_size\":139821056,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30002534000-000000067F00004005000060F3000253B7A3__000000AFE87558B0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F3000412D27C-000000067F00004005000060F30004156457__00000122A7BB7B29-0000012694E36301\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70000910000-000000067F00004005000060F700009385D4__0000008DBE2855F9-000000923719A971\":{\"file_size\":268460032,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30002510000-000000067F00004005000060F30002514000__000000AFE87558B0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30002210000-000000067F00004005000060F30002214000__000000A5A3F27398\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30003FF4000-000000067F00004005000060F30004070000__00000126C3C69FC0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C0001BBFA66-000000067F00004005016EA00C0001C078FA__000001B6FFE46BC9-000001BA93C39481\":{\"file_size\":268451840,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060F3000424A099-000000067F00004005000060F3000428313F__0000012694E36301-0000012A3F140591\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300036F91FE-000000067F00004005000060F30100000000__000000FCCD5238B1-000000FF8B261599\":{\"file_size\":164118528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0000718000-000000067F00004005000060FB000071C000__0000002427BD8BD0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005010F44EB000000C000-000000067F00004005010F44EB0100000000__00000126C3C69FC0\":{\"file_size\":70696960,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30005214000-000000067F00004005000060F30005240000__0000014EDD256548\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0000A7AF6E-030000000000000000000000000000000002__000000321AA80270\":{\"file_size\":147456,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30005063187-000000067F00004005000060F300050A412B__0000014784964B91-0000014B000D1821\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F100005E8000-000000067F00004005000060F100005F821C__000000636DE92159-000000663565F8C9\":{\"file_size\":268460032,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300020830BE-000000067F00004005000060F300020FC052__0000009DF02C1241-000000A173C00489\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300065BB235-000000067F00004005000060F300065F42B4__000001715E483C79-000001751A7D7589\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500FA2AD30000034000-000000067F0000400500FA2AD3000004D85C__0000010D77B487A0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C00017A8000-000000067F00004005016EA00C00017AC000__000001B3F17FE4E0\":{\"file_size\":134422528,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060FB00008D8000-000000067F00004005000060FB0000928B45__00000028C365FBE1-0000002D2A8E0B81\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30000798000-000000067F00004005000060F300007C1007__00000023FEF9F321-00000028C365FBE1\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500D19D030000040000-000000067F0000400500D19D030000047EE2__000000DBBFA87AE1-000000DE2A8E4FC9\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30001AB1583-000000067F00004005000060F50100000000__00000081AA3C40F0\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30001AD8000-000000067F00004005000060F30001B09104__0000008196C976A1-0000008625CF2891\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0000E1B859-030000000000000000000000000000000002__000000417D21ACF9-00000044B4679349\":{\"file_size\":156844032,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0001E9C000-000000067F00004005000060FB0001EA8000__0000009A24DF6768\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0001374000-000000067F00004005000060FB0001398000__000000603CA8F2F0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB000155C000-000000067F00004005000060FB0001590000__000000698F2C3A38\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500F3A25C00000EA069-000000067F0000400500F3A25C000010C0D1__000001048B25A8E9-0000010779A7F551\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F3000568C000-000000067F00004005000060F30005698000__00000159B010F6C0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0000C74000-000000067F00004005000060FB0000C98000__0000003D2AB09B68\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F700004F0000-000000067F00004005000060F80100000000__00000047E31D98D1-0000004C49155071\":{\"file_size\":264921088,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30005598000-000000067F00004005000060F3000559C000__00000159B010F6C0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70001429534-000000067F00004005000060F80100000000__00000122A7BB7B29-0000012694E36301\":{\"file_size\":231964672,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70000780000-000000067F00004005000060F80100000000__000000722F474369-00000075CC373F31\":{\"file_size\":263340032,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300019F31AA-000000067F00004005000060F30100000000__00000079F2A2F311-0000007E3A9BFD29\":{\"file_size\":168484864,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB430000822079-000000067F000040050081DB43000082C0F1__000000A583FBFB91-000000A9EB8C4489\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB4300007AC000-000000067F000040050081DB4300007F913A__000000A5A3F27398\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30005847319-000000067F00004005000060F300058C8000__00000159A7EC8CB9-0000015DD1D3C809\":{\"file_size\":261505024,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0001E21687-000000067F00004005000060FB0100000000__000000923719A971-00000096262826C9\":{\"file_size\":224403456,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30003C98000-000000067F00004005000060F30003CB8FCF__00000117EDA82C11-0000011B632CC319\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB43000045029C-030000000000000000000000000000000002__0000008DBE2855F9-000000923719A971\":{\"file_size\":89505792,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F3000559C000-000000067F00004005000060F300055B8000__00000159B010F6C0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F3000285901B-000000067F00004005000060F300028920E4__000000BAB1E56C91-000000BD9A7C56D9\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30000E64000-000000067F00004005000060F30000E70000__00000047F1F2B800\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300015FB022-000000067F00004005000060F3000160410C__000000698AF6E809-0000006DDB29D589\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30006FDA081-000000067F00004005000060F30100000000__00000184624E5741-000001860C80A151\":{\"file_size\":202276864,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500EB4A480000107973-000000067F0000400500EE16BC0100000000__000000F309FCDD19-000000F6661C9241\":{\"file_size\":275456000,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300031C40D1-000000067F00004005000060F300031D516C__000000DE2A8E4FC9-000000E1CD2FBBE9\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C00001F7D38-000000067F00004005016EA00C000020FBCF__000001880F984A29-0000018C496B6DB1\":{\"file_size\":268451840,\"generation\":11,\"shard\":\"0008\"},\"000000067F0000400500FDA1F80100000000-000000067F0000400500FF2A510000004000__0000010D77B487A0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70001182EC9-000000067F00004005000060F80100000000__000000FF8B261599-000001048B25A8E9\":{\"file_size\":174284800,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F700011528FB-000000067F00004005000060F70001182EC9__000000FF8B261599-000001048B25A8E9\":{\"file_size\":268460032,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300024DC000-000000067F00004005000060F30002510000__000000AFE87558B0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB00000B0000-030000000000000000000000000000000002__000000021DC73119-000000044854EBD1\":{\"file_size\":259375104,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0001DF0B43-000000067F00004005000060FB0001E21687__000000923719A971-00000096262826C9\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F10000088000-000000067F00004005000060F10000090000__00000008B6B51879-0000000D55A212C9\":{\"file_size\":264142848,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30003968000-000000067F00004005000060F3000396C000__0000010D77B487A0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C00017AC000-000000067F00004005016EA00C00017E8000__000001B3F17FE4E0\":{\"file_size\":134422528,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060F1000019C73D-000000067F00004005000060F20100000000__0000001B59EEB909-0000001FFBC01501\":{\"file_size\":124698624,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F700001F8000-000000067F00004005000060F700002005D2__0000001B59EEB909-0000001FFBC01501\":{\"file_size\":268460032,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0001110000-000000067F00004005000060FB0001114000__00000054161C34B8\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F1000019842A-000000067F00004005000060F20100000000__0000001737D88379-0000001B59EEB909\":{\"file_size\":145137664,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F700003BC000-000000067F00004005000060F700003C0000__0000003D2AB09B68\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0000280000-000000067F00004005000060FB0000284000__0000000D80565628\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500DBCED5000007C000-000000067F0000400500DBCED500000A8000__000000E4D847F4E0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__000001BCB5732691-000001BCB5734CD9\":{\"file_size\":24576,\"generation\":239,\"shard\":\"0008\"},\"000000067F00004005010660F70100000000-000000067F000040050107B547000006C000__000001180B3FF408\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30000C24000-000000067F00004005000060F30000CA0000__0000003D2AB09B68\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F3000569C000-000000067F00004005000060F300056D8000__00000159B010F6C0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C00000C7A73-030000000000000000000000000000000002__0000018624969469-000001880F984A29\":{\"file_size\":40566784,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060F30001344000-000000067F00004005000060F30001358000__000000603CA8F2F0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30001F38F48-000000067F00004005000060F50100000000__0000009A24DF6768\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30001760000-000000067F00004005000060F30001789027__0000006DDB29D589-000000722F474369\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F1000018821A-000000067F00004005000060F1000019842A__0000001737D88379-0000001B59EEB909\":{\"file_size\":268460032,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300059AA115-000000067F00004005000060F300059B324D__0000015DD1D3C809-0000016143292911\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0001400000-000000067F00004005000060FB0001404000__000000603CA8F2F0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500EB4A4800000E7A62-000000067F0000400500EB4A480000107973__000000F309FCDD19-000000F6661C9241\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30000498000-000000067F00004005000060F3000049C000__000000174479FC18\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70000D24000-000000067F00004005000060F70000D38000__000000C483D0D6B8\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB43000120E409-000000067F000040050081DB430300000000__0000018613F0A050\":{\"file_size\":24576,\"generation\":3,\"shard\":\"0008\"},\"000000067F00004005000060FB0001A8A1CD-000000067F00004005000060FB0100000000__0000007E3A9BFD29-0000008196C976A1\":{\"file_size\":199622656,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30006270000-000000067F00004005000060F50100000000__0000016E41E03CA0\":{\"file_size\":71114752,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0000BAAD15-030000000000000000000000000000000002__0000003579F03331-0000003959DA2DE9\":{\"file_size\":182321152,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F700016205B5-000000067F00004005000060F80100000000__0000012E71CF31F9-000001334140FC21\":{\"file_size\":266862592,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300030C0FE5-000000067F00004005000060F30003102107__000000DE2A8E4FC9-000000E1CD2FBBE9\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C00004BC000-000000067F00004005016EA00C00004E8000__000001936E73D028\":{\"file_size\":134422528,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060F10000440000-000000067F00004005000060F1000046821B__00000047E31D98D1-0000004C49155071\":{\"file_size\":268460032,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB4300009C8000-000000067F000040050081DB4300009CC000__000000AFE87558B0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F7000106C000-000000067F00004005000060F700010AABC7__000000EFDE07FFD8\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F3000367733F-000000067F00004005000060F50100000000__000000F91FE84F08\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C0000478000-000000067F00004005016EA00C000047C000__000001936E73D028\":{\"file_size\":134422528,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060F30002E4104A-000000067F00004005000060F30002E4A157__000000D31E48D7C9-000000D74E29AAD1\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0001370000-000000067F00004005000060FB0001374000__000000603CA8F2F0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30004B1111A-000000067F00004005000060F30004B221FE__0000013C9C0E3339-0000013FEFA7D709\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C000029C000-000000067F00004005016EA00C00002D0000__000001BCB572A4E0\":{\"file_size\":134422528,\"generation\":17,\"shard\":\"0008\"},\"000000067F00004005000060F30001C3C000-000000067F00004005000060F30001CC0000__0000008DDCD70B68\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB000136C000-000000067F00004005000060FB0001370000__000000603CA8F2F0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F10000488000-000000067F00004005000060F10100000000__0000004C49155071-0000004F31878919\":{\"file_size\":268754944,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30000B0300C-000000067F00004005000060F60100000000__0000003203FB5749-0000003579F03331\":{\"file_size\":212885504,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C0001C0F79A-000000067F00004005016EA00C0001C3F636__000001B6FFE46BC9-000001BA93C39481\":{\"file_size\":268451840,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060F3000399C000-000000067F00004005000060F300039A0000__0000010D77B487A0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70001574000-000000067F00004005000060F700015A195C__0000012E77D3BF00\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30005B00697-000000067F00004005000060F30100000000__0000015DD1D3C809-0000016143292911\":{\"file_size\":282025984,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300050C8000-000000067F00004005000060F300050CC000__0000014EDD256548\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F700000885C5-000000067F00004005000060F80100000000__000000044854EBD1-00000008B6B51879\":{\"file_size\":253878272,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30001407F7A-000000067F00004005000060F50100000000__000000603CA8F2F0\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70000B90000-000000067F00004005000060F70000B94000__000000AFE87558B0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB430000560000-000000067F000040050081DB430000564000__0000009A24DF6768\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70001720000-000000067F00004005000060F700017405D4__000001398B56A519-0000013C9C0E3339\":{\"file_size\":268460032,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300043CC000-000000067F00004005000060F300043F8000__0000012E77D3BF00\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F3000129D29A-000000067F00004005000060F30100000000__00000057593D8169-0000005C01565329\":{\"file_size\":110788608,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300003F9F83-000000067F00004005000060F30000402F4A__000000114A805939-00000013FB921C81\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70001940000-000000067F00004005000060F700019685CE__0000014784964B91-0000014B000D1821\":{\"file_size\":268460032,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300043B8000-000000067F00004005000060F300043BC000__0000012E77D3BF00\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30000370FD1-000000067F00004005000060F60100000000__0000000D55A212C9-000000114A805939\":{\"file_size\":232144896,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30003849093-000000067F00004005000060F300038720A2__000001048B25A8E9-0000010779A7F551\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F100003C0432-000000067F00004005000060F20100000000__0000003D03FCCDB9-000000417D21ACF9\":{\"file_size\":262701056,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F700014F85DF-000000067F00004005000060F70001510BBE__0000012694E36301-0000012A3F140591\":{\"file_size\":268460032,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F3000253B7A3-000000067F00004005000060F50100000000__000000AFE87558B0\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0001404000-000000067F00004005000060FB0001408000__000000603CA8F2F0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30003F942CF-000000067F00004005000060F30003FCD352__0000011F1A40FA69-00000122A7BB7B29\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0000B38000-000000067F00004005000060FB0000B58B45__0000003579F03331-0000003959DA2DE9\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70000B505C8-000000067F00004005000060F80100000000__000000A9EB8C4489-000000ACA44C8E99\":{\"file_size\":226459648,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F3000612D506-000000067F00004005000060F30006166575__0000016B49A934C1-0000016E1FBB7B99\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F700000DC000-000000067F00004005000060F700000E0000__0000000D80565628\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500FB3D31000000C000-000000067F0000400500FB3D310000018000__0000010D77B487A0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__000001BCB572C329-000001BCB572C481\":{\"file_size\":24576,\"generation\":19,\"shard\":\"0008\"},\"000000067F00004005000060F30002828000-000000067F00004005000060F3000282C000__000000BAC0041E18\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300015B0000-000000067F00004005000060F300015B4000__000000698F2C3A38\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500DBCED50000078000-000000067F0000400500DBCED5000007C000__000000E4D847F4E0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB43000086E169-030000000000000000000000000000000002__000000A583FBFB91-000000A9EB8C4489\":{\"file_size\":77471744,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400501046F39000000BDD2-000000067F00004005010660F500000161F7__0000010FB1BE19B9-00000113456156F1\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500FB3D3101FFFFFFFF-000000067F0000400500FB3D310300000000__00000122A7BB7B29-0000012694E36301\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500EE16BC00000F28ED-030000000000000000000000000000000002__000000F91FE84F08\":{\"file_size\":147456,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30004E9307A-000000067F00004005000060F30004EA41A5__000001440D3D0C69-0000014784964B91\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB00016D21CF-030000000000000000000000000000000002__000000698AF6E809-0000006DDB29D589\":{\"file_size\":226353152,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500EB4A4800001876BD-000000067F0000400500EB4A48000018F5CD__000000F6661C9241-000000F901689359\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500C782E400002E5B84-030000000000000000000000000000000002__000000DBD29DC248\":{\"file_size\":139264,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70000D8985C-000000067F00004005000060F70000DA1E38__000000C462B3C2A9-000000C824C09619\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB430000C28000-000000067F000040050081DB430000C3A075__000000B768469051-000000BAB1E56C91\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F3000407201D-000000067F00004005000060F300040E319D__00000122A7BB7B29-0000012694E36301\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F6000002B3CE-000000067F00004005000060F60100000000__00000075E5D2A930\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70000D60000-000000067F00004005000060F80100000000__000000C483D0D6B8\":{\"file_size\":133947392,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70000F705D6-000000067F00004005000060F80100000000__000000DE2A8E4FC9-000000E1CD2FBBE9\":{\"file_size\":259842048,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30004E7A062-000000067F00004005000060F30004E9307A__000001440D3D0C69-0000014784964B91\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30006810000-000000067F00004005000060F30006814000__00000178C5D5D3A8\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F700007D05C8-000000067F00004005000060F80100000000__00000075CC373F31-00000079F2A2F311\":{\"file_size\":251740160,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000000000000000001-000000067F0000400500000A690000000002__0000018624969469-000001880F984A29\":{\"file_size\":40960,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060FB00014D8000-000000067F00004005000060FB0001530B44__000000636DE92159-000000663565F8C9\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0001EA8000-000000067F00004005000060FB0001EAC000__0000009A24DF6768\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F3000230A0C7-000000067F00004005000060F30100000000__000000A583FBFB91-000000A9EB8C4489\":{\"file_size\":213680128,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30000A98000-000000067F00004005000060F30000AC9024__0000003203FB5749-0000003579F03331\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30003F72201-000000067F00004005000060F30003F7B254__0000011F1A40FA69-00000122A7BB7B29\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C0000498000-000000067F00004005016EA00C000049C000__000001936E73D028\":{\"file_size\":134422528,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060F30004CB8000-000000067F00004005000060F30004CBC000__000001444EB7FC10\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300042CC1BD-000000067F00004005000060F300042D51D6__0000012694E36301-0000012A3F140591\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500FB3D310000028681-000000067F0000400500FB3D320100000000__0000010D77B487A0\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F3000474302B-000000067F00004005000060F300047EC0CA__000001334140FC21-00000137115BE4D9\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30003204000-000000067F00004005000060F30003278000__000000E4D847F4E0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300024020ED-000000067F00004005000060F3000240B12A__000000A9EB8C4489-000000ACA44C8E99\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F3000216C000-000000067F00004005000060F30002170000__000000A5A3F27398\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F6000005DD43-000000067F00004005000060F60100000000__000000EFDE07FFD8\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0000348B45-000000067F00004005000060FB000037968A__0000000D55A212C9-000000114A805939\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB430000778000-000000067F000040050081DB43000077C000__000000A5A3F27398\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB4300011B4000-000000067F000040050081DB43000120E409__000000DBD29DC248\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30003CCA0B9-000000067F00004005000060F30003D0B155__00000117EDA82C11-0000011B632CC319\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB00009D4000-000000067F00004005000060FB0000A7AF6E__000000321AA80270\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F700008F0000-000000067F00004005000060F80100000000__00000089F4693119-0000008DBE2855F9\":{\"file_size\":262905856,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30006CA0000-000000067F00004005000060F30006CA4000__000001848D082B20\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0000E021D0-000000067F00004005000060FB0000E0AD15__000000417D21ACF9-00000044B4679349\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30003654000-000000067F00004005000060F3000367733F__000000F91FE84F08\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70000DC0000-000000067F00004005000060F70000DE05C8__000000C824C09619-000000CC13D2E549\":{\"file_size\":268460032,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F700018D85CA-000000067F00004005000060F80100000000__000001440D3D0C69-0000014784964B91\":{\"file_size\":260775936,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0000EAC000-000000067F00004005000060FB0000EB8000__00000047F1F2B800\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30000E70000-000000067F00004005000060F30000E74000__00000047F1F2B800\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30005FE621A-000000067F00004005000060F30005FFF23F__0000016834A3FC91-0000016B49A934C1\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70000D20000-000000067F00004005000060F70000D24000__000000C483D0D6B8\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30005244000-000000067F00004005000060F3000525C065__0000014EDD256548\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400501025D9001FFFFFFFF-000000067F0000400501025D900300000000__0000011B632CC319-0000011F1A40FA69\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30001CD4000-000000067F00004005000060F30001CE0000__0000008DDCD70B68\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C0000E77906-000000067F00004005016EA00C0000E7F7A7__0000019E2C5DCEE1-000001A1DD8B4481\":{\"file_size\":268451840,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060F300046B41AA-000000067F00004005000060F30100000000__0000012E71CF31F9-000001334140FC21\":{\"file_size\":199688192,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050100D04D00000634BB-030000000000000000000000000000000002__0000010D5DC42EF9-0000010FB1BE19B9\":{\"file_size\":173744128,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30000CA4000-000000067F00004005000060F30000CB16B6__0000003D2AB09B68\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30004DDC000-000000067F00004005000060F30004DF086C__000001444EB7FC10\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30005D7F2DE-000000067F00004005000060F30005DA03A8__00000164DEE06671-0000016834A3FC91\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300048A0000-000000067F00004005000060F300048A4000__00000139CF156B58\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F100003954D3-000000067F00004005000060F20100000000__0000003D2AB09B68\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300043BC000-000000067F00004005000060F300043C8000__0000012E77D3BF00\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C0001D1C000-000000067F00004005016EA00C0001D78000__000001BCB572A4E0\":{\"file_size\":134422528,\"generation\":17,\"shard\":\"0008\"},\"000000067F00004005000060F100000D8000-000000067F00004005000060F100000E021B__0000000D55A212C9-000000114A805939\":{\"file_size\":268460032,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300060A0282-000000067F00004005000060F300060A93B5__0000016834A3FC91-0000016B49A934C1\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F1000021D8F8-000000067F00004005000060F20100000000__00000023FEF9F321-00000028C365FBE1\":{\"file_size\":88227840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30000018000-000000067F00004005000060F3000001C000__000000027AF9D7D0\":{\"file_size\":134422528,\"generation\":1,\"shard\":\"0008\"},\"000000067F000040050081DB430000E48000-000000067F000040050081DB430000E4C000__000000C483D0D6B8\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300003E8FBC-000000067F00004005000060F300003F9F83__000000114A805939-00000013FB921C81\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30004868000-000000067F00004005000060F3000486C000__00000139CF156B58\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F700013D0000-000000067F00004005000060F700013E85D1__0000011F1A40FA69-00000122A7BB7B29\":{\"file_size\":268460032,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0001203856-030000000000000000000000000000000002__0000005413AB3641-00000057593D8169\":{\"file_size\":157130752,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F3000029C000-000000067F00004005000060F300002C4887__0000000D80565628\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30005160000-000000067F00004005000060F30005164000__0000014EDD256548\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500FB3D31000001C000-000000067F0000400500FB3D310000028681__0000010D77B487A0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C000029F90B-000000067F00004005016EA00C00002D77AE__000001880F984A29-0000018C496B6DB1\":{\"file_size\":268451840,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060F30003620000-000000067F00004005000060F30100000000__000000F309FCDD19-000000F6661C9241\":{\"file_size\":249372672,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30003B90000-000000067F00004005000060F30003B94000__000001180B3FF408\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300001F4000-000000067F00004005000060F30000208000__0000000D80565628\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30001BB8000-000000067F00004005000060F30001C00FE1__0000008625CF2891-00000089F4693119\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30005210000-000000067F00004005000060F30005214000__0000014EDD256548\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30002070F71-000000067F00004005000060F30002079FDE__0000009DF02C1241-000000A173C00489\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30000B40000-000000067F00004005000060F30000BB103B__0000003579F03331-0000003959DA2DE9\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F10000290000-000000067F00004005000060F10000298000__00000028C365FBE1-0000002D2A8E0B81\":{\"file_size\":264134656,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C00007C7B9C-000000067F00004005016EA00C0000807A34__000001935283F9B9-00000196C9018F59\":{\"file_size\":268451840,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060FB0001548000-000000067F00004005000060FB000154C000__000000698F2C3A38\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F100005FC000-000000067F00004005000060F1000062EE46__000000698F2C3A38\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500EE16BC00001A0000-000000067F0000400500EE16BC00001A4000__00000104BD37F348\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C0000F94000-000000067F00004005016EA00C0000F98000__000001A95031E5B8\":{\"file_size\":134422528,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060F70000290000-000000067F00004005000060F80100000000__00000023FEF9F321-00000028C365FBE1\":{\"file_size\":265764864,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0001BC0B44-000000067F00004005000060FB0001BD1689__0000008625CF2891-00000089F4693119\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F3000337DCF2-000000067F00004005000060F30003386D10__000000E7C2F1B249-000000EBC9213D59\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300045C1062-000000067F00004005000060F3000460202F__0000012E71CF31F9-000001334140FC21\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30006814000-000000067F00004005000060F30006850000__00000178C5D5D3A8\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C000073DFA8-000000067F00004005016EA00C000079FCFA__000001935283F9B9-00000196C9018F59\":{\"file_size\":268451840,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005016EA00C000178C000-000000067F00004005016EA00C00017A8000__000001B3F17FE4E0\":{\"file_size\":134422528,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060F1000051D1AE-000000067F00004005000060F20100000000__00000057593D8169-0000005C01565329\":{\"file_size\":103145472,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300034BD86C-000000067F00004005000060F30100000000__000000EBC9213D59-000000EFA7EAA9E1\":{\"file_size\":95617024,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C0000008000-000000067F00004005016EA00C000000FEA0__0000018624969469-000001880F984A29\":{\"file_size\":268451840,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060F1000014C000-000000067F00004005000060F1000015F545__000000174479FC18\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500FB3D300000000EAB-000000067F0000400500FB3D300100000000__0000010D5DC42EF9-0000010FB1BE19B9\":{\"file_size\":12976128,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB430000028000-000000067F000040050081DB43000002C000__00000075E5D2A930\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0001BD1689-000000067F00004005000060FB0100000000__0000008625CF2891-00000089F4693119\":{\"file_size\":223690752,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500EB4A480000000000-000000067F0000400500EB4A480000000001__000000FF8B261599-000001048B25A8E9\":{\"file_size\":32768,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30003D952B0-000000067F00004005000060F30003DAE2DC__0000011B632CC319-0000011F1A40FA69\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70000B30000-000000067F00004005000060F70000B505C8__000000A9EB8C4489-000000ACA44C8E99\":{\"file_size\":268460032,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F3000549D0A6-000000067F00004005000060F300055861F2__0000015304A396B9-0000015670D6AFD9\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F1000046821B-000000067F00004005000060F20100000000__00000047E31D98D1-0000004C49155071\":{\"file_size\":266969088,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300043C8000-000000067F00004005000060F300043CC000__0000012E77D3BF00\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30001E720A2-000000067F00004005000060F30100000000__000000923719A971-00000096262826C9\":{\"file_size\":141344768,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB4300003A8000-000000067F000040050081DB4300003AC000__0000008DDCD70B68\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F700006AB7A6-000000067F00004005000060F700006C3D76__000000663565F8C9-000000698AF6E809\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F3000570A19E-000000067F00004005000060F3000573B206__00000159A7EC8CB9-0000015DD1D3C809\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30003AF28CB-000000067F00004005000060F30003B33945__0000010FB1BE19B9-00000113456156F1\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB00015CC000-000000067F00004005000060FB00015D8000__000000698F2C3A38\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500D69D7900000A9CFB-000000067F0000400500D69D7900000D1C5F__000000EFA7EAA9E1-000000F309FCDD19\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30002A30000-000000067F00004005000060F30002A34000__000000C483D0D6B8\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F3000047C000-000000067F00004005000060F30000498000__000000174479FC18\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30005FFF23F-000000067F00004005000060F300060A0282__0000016834A3FC91-0000016B49A934C1\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C000029C194-000000067F00004005016EA00C00004EF809__0000018EC67807C9-000001935283F9B9\":{\"file_size\":268451840,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060F30006D64000-000000067F00004005000060F30006DC8000__000001848D082B20\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C0001340000-000000067F00004005016EA00C0001344000__000001A95031E5B8\":{\"file_size\":134422528,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005016EA00C0000BB0000-000000067F00004005016EA00C0000BB4000__0000019E7001E460\":{\"file_size\":134422528,\"generation\":11,\"shard\":\"0008\"},\"000000067F0000400500EB4A480000000000-000000067F0000400500EB4A480000007F0F__000000F309FCDD19-000000F6661C9241\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500E3A2A10000114000-000000067F0000400500E3A2A1000016321A__000000EFDE07FFD8\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB430000578000-030000000000000000000000000000000002__0000009A24DF6768\":{\"file_size\":107642880,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30006798000-000000067F00004005000060F3000679C000__00000178C5D5D3A8\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F100000E021B-000000067F00004005000060F1000010043F__0000000D55A212C9-000000114A805939\":{\"file_size\":268460032,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB430000DA8000-030000000000000000000000000000000002__000000BD9A7C56D9-000000C0C9EB88E1\":{\"file_size\":233201664,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F100004EC079-000000067F00004005000060F20100000000__00000054161C34B8\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F7000170C000-000000067F00004005000060F70001720000__00000139CF156B58\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70000FCD85E-000000067F00004005000060F80100000000__000000E4D847F4E0\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C00015B74FF-000000067F00004005016EA00C00015FF3A0__000001AC25760149-000001AFC313C819\":{\"file_size\":268451840,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060F30000AC9024-000000067F00004005000060F30000ADA0D0__0000003203FB5749-0000003579F03331\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500EE16C40100000000-000000067F0000400500F3A25C000006C000__00000104BD37F348\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500D69D7900000F1B5B-000000067F0000400500D69D790100000000__000000EFA7EAA9E1-000000F309FCDD19\":{\"file_size\":233275392,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30003C0C000-000000067F00004005000060F30003C257AD__000001180B3FF408\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30000E44000-000000067F00004005000060F30000E60000__00000047F1F2B800\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F7000018E4B6-000000067F00004005000060F7000019EA78__0000001737D88379-0000001B59EEB909\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C00017E8000-000000067F00004005016EA00C00017EC000__000001B3F17FE4E0\":{\"file_size\":134422528,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060F30003A4C09C-000000067F00004005000060F30003A6D1B3__0000010D5DC42EF9-0000010FB1BE19B9\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F100000260F2-000000067F00004005000060F20100000000__000000027AF9D7D0\":{\"file_size\":24576,\"generation\":1,\"shard\":\"0008\"},\"000000067F00004005016EA00C0000097BDA-000000067F00004005016EA00C00000C7A73__0000018624969469-000001880F984A29\":{\"file_size\":268451840,\"generation\":11,\"shard\":\"0008\"},\"000000067F0000400500C782E400002CDD5C-030000000000000000000000000000000002__000000D31E48D7C9-000000D74E29AAD1\":{\"file_size\":90923008,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F3000685C000-000000067F00004005000060F30006860000__00000178C5D5D3A8\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0001C84000-000000067F00004005000060FB0001CE16ED__0000008DDCD70B68\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB430000CC4BC2-000000067F000040050081DB430000CD6C36__000000BAB1E56C91-000000BD9A7C56D9\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30006349DA2-000000067F00004005000060F30006382F14__0000016E1FBB7B99-000001715E483C79\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F3000212E160-000000067F00004005000060F30100000000__0000009DF02C1241-000000A173C00489\":{\"file_size\":224731136,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30001FF8691-000000067F00004005000060F30100000000__0000009A1ABDE921-0000009DF02C1241\":{\"file_size\":256114688,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300067F4000-000000067F00004005000060F30006810000__00000178C5D5D3A8\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F700015A8000-000000067F00004005000060F700016205B5__0000012E71CF31F9-000001334140FC21\":{\"file_size\":268460032,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500D69D790000024000-000000067F0000400500D69D790000028000__000000EFDE07FFD8\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F700007AE010-000000067F00004005000060F80100000000__00000075E5D2A930\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C0000428000-000000067F00004005016EA00C000042C000__000001936E73D028\":{\"file_size\":134422528,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060F30001E74000-000000067F00004005000060F30001F28000__0000009A24DF6768\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300038FF04F-000000067F00004005000060F30100000000__0000010779A7F551-0000010A5E65DF39\":{\"file_size\":45359104,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C0001B0FD2A-000000067F00004005016EA00C0001B4FBC9__000001B6FFE46BC9-000001BA93C39481\":{\"file_size\":268451840,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060F30006858000-000000067F00004005000060F3000685C000__00000178C5D5D3A8\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30002F9A0EB-000000067F00004005000060F30002FD317C__000000D74E29AAD1-000000DBBFA87AE1\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB430000808000-000000067F000040050081DB430000822079__000000A583FBFB91-000000A9EB8C4489\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB00015DC000-000000067F00004005000060FB00015F0000__000000698F2C3A38\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F7000021C000-000000067F00004005000060F7000025DA3C__0000002427BD8BD0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500D69D79000007C000-000000067F0000400500D69D7900000A8000__000000EFDE07FFD8\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F6000001EE3D-000000067F00004005000060F60100000000__00000054161C34B8\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB430000F4E15B-030000000000000000000000000000000002__000000C462B3C2A9-000000C824C09619\":{\"file_size\":73662464,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30001F28000-000000067F00004005000060F30001F2C000__0000009A24DF6768\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB4300001F1DA6-030000000000000000000000000000000002__00000081AA3C40F0\":{\"file_size\":139264,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70001758B92-000000067F00004005000060F70001771169__000001398B56A519-0000013C9C0E3339\":{\"file_size\":268460032,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500E3A2A10000010000-000000067F0000400500E3A2A10000017F02__000000E7C2F1B249-000000EBC9213D59\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30002A98000-000000067F00004005000060F30002A9C000__000000C483D0D6B8\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F3000573B206-000000067F00004005000060F300057942F4__00000159A7EC8CB9-0000015DD1D3C809\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0000860B45-030000000000000000000000000000000002__00000023FEF9F321-00000028C365FBE1\":{\"file_size\":252788736,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F7000090B929-000000067F00004005000060F80100000000__0000008DDCD70B68\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F7000014B73D-000000067F00004005000060F80100000000__000000114A805939-00000013FB921C81\":{\"file_size\":146432000,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70000D3C000-000000067F00004005000060F70000D60000__000000C483D0D6B8\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70001514000-000000067F00004005000060F70001528000__0000012E77D3BF00\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C0001764000-000000067F00004005016EA00C0001788000__000001B3F17FE4E0\":{\"file_size\":134422528,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060F30001358000-000000067F00004005000060F3000135C000__000000603CA8F2F0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0001594000-000000067F00004005000060FB00015C8000__000000698F2C3A38\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300004AC000-000000067F00004005000060F300004B8000__000000174479FC18\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30005610000-000000067F00004005000060F30005614000__00000159B010F6C0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30002794000-000000067F00004005000060F300027C0000__000000BAC0041E18\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30004C60000-000000067F00004005000060F30004C64000__000001444EB7FC10\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F700003A0000-000000067F00004005000060F700003B85C7__0000003579F03331-0000003959DA2DE9\":{\"file_size\":268468224,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500DBCED500000F1034-030000000000000000000000000000000002__000000E4C63CFA21-000000E7C2F1B249\":{\"file_size\":247480320,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300051B4000-000000067F00004005000060F300051F0000__0000014EDD256548\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F6000003C77D-000000067F00004005000060F60100000000__000000A5A3F27398\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005010660F500000161F7-030000000000000000000000000000000002__0000010FB1BE19B9-00000113456156F1\":{\"file_size\":64757760,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30003F7B254-000000067F00004005000060F30003F942CF__0000011F1A40FA69-00000122A7BB7B29\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30004900000-000000067F00004005000060F30004904000__00000139CF156B58\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30006F18000-000000067F00004005000060F30006F1C000__000001848D082B20\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30003A21037-000000067F00004005000060F30003A31FB6__0000010D5DC42EF9-0000010FB1BE19B9\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30000DB0000-000000067F00004005000060F30000E40F86__000000417D21ACF9-00000044B4679349\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0001A60B43-000000067F00004005000060FB0001A71688__0000007E3A9BFD29-0000008196C976A1\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30006DC8000-000000067F00004005000060F30006DCC000__000001848D082B20\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F700006E38F6-000000067F00004005000060F80100000000__000000698F2C3A38\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F3000122B1C9-000000067F00004005000060F300012442A9__00000057593D8169-0000005C01565329\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0000EA8000-000000067F00004005000060FB0000EAC000__00000047F1F2B800\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70001B5A072-000000067F00004005000060F80100000000__00000159B010F6C0\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C000144DCA3-000000067F00004005016EA00C000151F7C5__000001AC25760149-000001AFC313C819\":{\"file_size\":268451840,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060F600000711FF-000000067F00004005000060F60100000000__00000122E1129DA0\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300050EC000-000000067F00004005000060F30005138000__0000014EDD256548\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30005260000-000000067F00004005000060F30005290FC9__0000014EC58A4A79-0000015304A396B9\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F700012DE407-000000067F00004005000060F80100000000__000001180B3FF408\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70000F10000-000000067F00004005000060F70000F185D4__000000DBBFA87AE1-000000DE2A8E4FC9\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70000D38000-000000067F00004005000060F70000D3C000__000000C483D0D6B8\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F6000006671F-000000067F00004005000060F60100000000__0000010D77B487A0\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300059F53C6-000000067F00004005000060F30005A16504__0000015DD1D3C809-0000016143292911\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB430000B08000-000000067F000040050081DB430000B4A075__000000B2B5C4E8F9-000000B768469051\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F7000152C000-000000067F00004005000060F70001570000__0000012E77D3BF00\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30000128000-000000067F00004005000060F3000012C000__0000018624969468\":{\"file_size\":134422528,\"generation\":7,\"shard\":\"0008\"},\"000000067F00004005000060F70000E24000-000000067F00004005000060F70000E387D6__000000D037B2DBD0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB4300002791D8-000000067F000040050081DB43000028B253__0000008196C976A1-0000008625CF2891\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F600000500F7-000000067F00004005000060F60100000000__000000D037B2DBD0\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70000ABD9C4-000000067F00004005000060F80100000000__000000A5A3F27398\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB4300009CC000-000000067F000040050081DB430000A10000__000000AFE87558B0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F700002005D2-000000067F00004005000060F80100000000__0000001B59EEB909-0000001FFBC01501\":{\"file_size\":261169152,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0001AA656E-000000067F000040050081D80C0100000000__00000081AA3C40F0\":{\"file_size\":59138048,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB430000E14000-000000067F000040050081DB430000E48000__000000C483D0D6B8\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30003DD734C-000000067F00004005000060F30003E40000__0000011B632CC319-0000011F1A40FA69\":{\"file_size\":261046272,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500D19D0300FFFFFFFF-030000000000000000000000000000000002__000000DE2A8E4FC9-000000E1CD2FBBE9\":{\"file_size\":5373952,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30001588000-000000067F00004005000060F3000158C000__000000698F2C3A38\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500DBCED500000AC000-000000067F0000400500DBCED500000D0000__000000E4D847F4E0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500EB4A48000013F89B-000000067F0000400500EB4A48000014F7AC__000000F6661C9241-000000F901689359\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB4300005D704F-000000067F000040050081DB4300006310C9__0000009A1ABDE921-0000009DF02C1241\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB430000A14000-000000067F000040050081DB430000A18000__000000AFE87558B0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30001F574A6-000000067F00004005000060F30001FF8691__0000009A1ABDE921-0000009DF02C1241\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500FB3D320100000000-000000067F0000400500FDA1F80000014000__0000010D77B487A0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30001B09104-000000067F00004005000060F30001B4A119__0000008196C976A1-0000008625CF2891\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005011035750100000000-030000000000000000000000000000000002__00000159B010F6C0\":{\"file_size\":78626816,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F1000015F545-000000067F00004005000060F20100000000__000000174479FC18\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F3000638C06D-000000067F00004005000060F300063A50CD__0000016E1FBB7B99-000001715E483C79\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F3000299C28F-000000067F00004005000060F300029A526C__000000BD9A7C56D9-000000C0C9EB88E1\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F3000364C000-000000067F00004005000060F30003650000__000000F91FE84F08\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C0000CE0000-000000067F00004005016EA00C0000CE4000__0000019E7001E460\":{\"file_size\":134422528,\"generation\":11,\"shard\":\"0008\"},\"000000067F000040050081DB430000794000-000000067F000040050081DB4300007A8000__000000A5A3F27398\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB430000A18000-000000067F000040050081DB430000A1C000__000000AFE87558B0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F3000000C000-000000067F00004005000060F30000018000__000000027AF9D7D0\":{\"file_size\":134422528,\"generation\":1,\"shard\":\"0008\"},\"000000067F000040050081DB4300000D40FF-030000000000000000000000000000000002__00000075CC373F31-00000079F2A2F311\":{\"file_size\":78061568,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F60000099FD8-000000067F00004005000060F60100000000__00000159B010F6C0\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F3000330A1C8-000000067F00004005000060F3000332B1B6__000000E4C63CFA21-000000E7C2F1B249\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30006FA900D-000000067F00004005000060F30006FDA081__00000184624E5741-000001860C80A151\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB000148AC30-000000067F00004005000060FB000149B774__000000601F43CF09-000000636DE92159\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500F3A25C01FFFFFFFF-000000067F0000400500F3A25C0300000000__0000011F1A40FA69-00000122A7BB7B29\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30000EF1FC3-000000067F00004005000060F50100000000__00000047F1F2B800\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30006A7C566-000000067F00004005000060F30100000000__00000178B8B10551-0000017C9F5597E1\":{\"file_size\":173072384,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB000104B856-000000067F00004005000060FB000107C39B__0000004C49155071-0000004F31878919\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70000030000-000000067F00004005000060F80100000000__000000021DC73119-000000044854EBD1\":{\"file_size\":261341184,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30003580FD3-000000067F00004005000060F30100000000__000000EFA7EAA9E1-000000F309FCDD19\":{\"file_size\":228188160,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70001224000-000000067F00004005000060F70001232ACF__0000010D77B487A0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300022B9050-000000067F00004005000060F3000230A0C7__000000A583FBFB91-000000A9EB8C4489\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30006654000-000000067F00004005000060F30006670000__00000178C5D5D3A8\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F700010D0000-000000067F00004005000060F700010D85CF__000000EFA7EAA9E1-000000F309FCDD19\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB430000FD8000-030000000000000000000000000000000002__000000C824C09619-000000CC13D2E549\":{\"file_size\":237559808,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB00015F0000-000000067F00004005000060FB00015F4000__000000698F2C3A38\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F60100000000-000000067F00004005000060F70000004000__000000027AF9D7D0\":{\"file_size\":134422528,\"generation\":1,\"shard\":\"0008\"},\"000000067F00004005000060F70000DA1E38-000000067F00004005000060F80100000000__000000C462B3C2A9-000000C824C09619\":{\"file_size\":209821696,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30005D76250-000000067F00004005000060F30005D7F2DE__00000164DEE06671-0000016834A3FC91\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F10000418000-000000067F00004005000060F10100000000__00000044B4679349-00000047E31D98D1\":{\"file_size\":269148160,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70001B61000-000000067F00004005000060F80100000000__0000018613F0A050\":{\"file_size\":65150976,\"generation\":3,\"shard\":\"0008\"},\"000000067F00004005000060F300008C8000-000000067F00004005000060F300008E0F49__00000028C365FBE1-0000002D2A8E0B81\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB4300002D8000-030000000000000000000000000000000002__0000008625CF2891-00000089F4693119\":{\"file_size\":231907328,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0000C04000-000000067F00004005000060FB0000C08000__0000003D2AB09B68\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0001808000-000000067F00004005000060FB000180C000__00000075E5D2A930\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB430000A30379-030000000000000000000000000000000002__000000AFE87558B0\":{\"file_size\":139264,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F700010D85CF-000000067F00004005000060F80100000000__000000EFA7EAA9E1-000000F309FCDD19\":{\"file_size\":164970496,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0000C70000-000000067F00004005000060FB0000C74000__0000003D2AB09B68\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C0001188000-000000067F00004005016EA00C000118C000__000001A95031E5B8\":{\"file_size\":134422528,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060F70000CB85B3-000000067F00004005000060F70000CC8B74__000000BAB1E56C91-000000BD9A7C56D9\":{\"file_size\":268460032,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30004A1D870-000000067F00004005000060F30004A2693B__000001398B56A519-0000013C9C0E3339\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C00008CF772-000000067F00004005016EA00C00008E760F__000001935283F9B9-00000196C9018F59\":{\"file_size\":268451840,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005016EA00C0000D34000-000000067F00004005016EA00C0000D5D1E9__0000019E7001E460\":{\"file_size\":134422528,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005016EA00C00014B79E7-000000067F00004005016EA00C00014CF88D__000001A931C135B1-000001AC25760149\":{\"file_size\":268451840,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060F300040E319D-000000067F00004005000060F300040F41F4__00000122A7BB7B29-0000012694E36301\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30002FF427D-000000067F00004005000060F30100000000__000000D74E29AAD1-000000DBBFA87AE1\":{\"file_size\":156073984,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30005E0A466-000000067F00004005000060F30005E3B48F__00000164DEE06671-0000016834A3FC91\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F700005F9158-000000067F00004005000060F80100000000__00000057593D8169-0000005C01565329\":{\"file_size\":230768640,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C00018E4000-000000067F00004005016EA00C000193189A__000001B3F17FE4E0\":{\"file_size\":134422528,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060F30005F0202C-000000067F00004005000060F30005F3303F__0000016834A3FC91-0000016B49A934C1\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F10000148000-000000067F00004005000060F1000014C000__000000174479FC18\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300060C0000-000000067F00004005000060F300060C4000__0000016E41E03CA0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0000C9C000-000000067F00004005000060FB0000CC6E51__0000003D2AB09B68\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050107B54700000A0EB1-000000067F000040050109CD330100000000__000001180B3FF408\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C00004EC000-000000067F00004005016EA00C00005A0000__000001936E73D028\":{\"file_size\":134422528,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005016EA00C0000A9F465-000000067F00004005016EA00C0000ACF305__00000196C9018F59-0000019A2EAFE7A9\":{\"file_size\":268451840,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060F30000208000-000000067F00004005000060F3000020C000__0000000D80565628\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500F3A25C000011E137-000000067F0000400500F67839000003E09B__000001048B25A8E9-0000010779A7F551\":{\"file_size\":268460032,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30000402F4A-000000067F00004005000060F60100000000__000000114A805939-00000013FB921C81\":{\"file_size\":166469632,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C00004A8000-000000067F00004005016EA00C00004AC000__000001936E73D028\":{\"file_size\":134422528,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060F70001968000-000000067F00004005000060F7000196C000__0000014EDD256548\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30006EF8000-000000067F00004005000060F30006EFC000__000001848D082B20\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C0000BB4000-000000067F00004005016EA00C0000C20000__0000019E7001E460\":{\"file_size\":134422528,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060F700009C0000-000000067F00004005000060F80100000000__0000009A24DF6768\":{\"file_size\":37371904,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30004C84000-000000067F00004005000060F30004CB8000__000001444EB7FC10\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30002514000-000000067F00004005000060F30002530000__000000AFE87558B0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70000DE05C8-000000067F00004005000060F80100000000__000000C824C09619-000000CC13D2E549\":{\"file_size\":259473408,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F301FFFFFFFF-000000067F00004005000060F30300000000__00000186146441F1-0000018624969469\":{\"file_size\":57344,\"generation\":6,\"shard\":\"0008\"},\"000000067F00004005000060F30001886B2A-000000067F00004005000060F50100000000__00000075E5D2A930\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F700006A8000-000000067F00004005000060F80100000000__000000636DE92159-000000663565F8C9\":{\"file_size\":117022720,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB000154C000-000000067F00004005000060FB0001558000__000000698F2C3A38\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300053F40CC-000000067F00004005000060F30100000000__0000014EC58A4A79-0000015304A396B9\":{\"file_size\":223453184,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30005C95225-000000067F00004005000060F30005C9E3C4__0000016143292911-00000164DEE06671\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F3000558C000-000000067F00004005000060F30005598000__00000159B010F6C0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30003FFA699-000000067F00004005000060F50100000000__00000122E1129DA0\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30006F1C000-000000067F00004005000060F50100000000__000001848D082B20\":{\"file_size\":24117248,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F3000486C000-000000067F00004005000060F30004878000__00000139CF156B58\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300019C2056-000000067F00004005000060F300019F31AA__00000079F2A2F311-0000007E3A9BFD29\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500EE16BC000004C000-000000067F0000400500EE16BC0000060000__000000F91FE84F08\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F7000046EAB9-000000067F00004005000060F80100000000__000000417D21ACF9-00000044B4679349\":{\"file_size\":48717824,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB430000790000-000000067F000040050081DB430000794000__000000A5A3F27398\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500D69D79000002C000-000000067F0000400500D69D790000078000__000000EFDE07FFD8\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F60000026C90-000000067F00004005000060F60100000000__000000698F2C3A38\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30000738000-000000067F00004005000060F3000073C000__0000002427BD8BD0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F10000204000-000000067F00004005000060F10000218000__0000002427BD8BD0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500C782E40000177E20-000000067F0000400500C782E400001AFD31__000000D01F399709-000000D31E48D7C9\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F7000048C000-000000067F00004005000060F700004B1E77__00000047F1F2B800\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300015F8000-000000067F00004005000060F50100000000__000000698F2C3A38\":{\"file_size\":131276800,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30000428000-000000067F00004005000060F3000042C000__000000174479FC18\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB43000038C000-000000067F000040050081DB430000390000__0000008DDCD70B68\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB000102A1CE-000000067F00004005000060FB000103AD12__0000004C49155071-0000004F31878919\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0001848000-000000067F00004005000060FB000184C000__00000075E5D2A930\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB00001DC000-000000067F00004005000060FB0000228000__0000000D80565628\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C00011D4000-000000067F00004005016EA00C0001228000__000001A95031E5B8\":{\"file_size\":134422528,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005016EA00C000011775B-030000000000000000000000000000000002__0000018820A34650\":{\"file_size\":139264,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060F700011B8000-000000067F00004005000060F80100000000__000001048B25A8E9-0000010779A7F551\":{\"file_size\":263897088,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F3000660D31F-000000067F00004005000060F3000664E3CA__000001715E483C79-000001751A7D7589\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500EE16BC0000064000-000000067F0000400500EE16BC00000F28ED__000000F91FE84F08\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F3000525C065-000000067F00004005000060F50100000000__0000014EDD256548\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30004A7F98F-000000067F00004005000060F30100000000__000001398B56A519-0000013C9C0E3339\":{\"file_size\":47595520,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050100D04D000004369C-000000067F000040050100D04D000004B5AD__0000010D5DC42EF9-0000010FB1BE19B9\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F6000001A6E2-000000067F00004005000060F60100000000__00000047F1F2B800\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F700004405CF-000000067F00004005000060F80100000000__0000003D03FCCDB9-000000417D21ACF9\":{\"file_size\":198836224,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30002D28000-000000067F00004005000060F30002D2C000__000000D037B2DBD0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500F56D510100000000-000000067F0000400500F67839000003C000__0000010D77B487A0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70000E387D6-000000067F00004005000060F80100000000__000000D037B2DBD0\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F3000213C000-000000067F00004005000060F30002168000__000000A5A3F27398\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300060D4415-000000067F00004005000060F3000612D506__0000016B49A934C1-0000016E1FBB7B99\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500FB3D3100000546CB-000000067F0000400500FB3D320100000000__00000122E1129DA0\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB430000D18CA9-030000000000000000000000000000000002__000000BAB1E56C91-000000BD9A7C56D9\":{\"file_size\":210288640,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F60000062E4F-000000067F00004005000060F60100000000__00000104BD37F348\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500F3A25C000016A065-000000067F0000400500F3A25C000017C0CB__0000010779A7F551-0000010A5E65DF39\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0001AD0000-000000067F00004005000060FB0001B28B44__0000008196C976A1-0000008625CF2891\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30000254000-000000067F00004005000060F30000298000__0000000D80565628\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB430000E8C000-000000067F000040050081DB430000EA0000__000000C483D0D6B8\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300040F41F4-000000067F00004005000060F3000412D27C__00000122A7BB7B29-0000012694E36301\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB00013B8000-000000067F00004005000060FB00013BC000__000000603CA8F2F0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F700000D8000-000000067F00004005000060F700000DC000__0000000D80565628\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70000958000-000000067F00004005000060F700009605D8__000000923719A971-00000096262826C9\":{\"file_size\":268460032,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB00004A0000-000000067F00004005000060FB00004A4000__000000174479FC18\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F700001213F2-000000067F00004005000060F80100000000__0000000D55A212C9-000000114A805939\":{\"file_size\":55320576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30004156457-000000067F00004005000060F30100000000__00000122A7BB7B29-0000012694E36301\":{\"file_size\":96927744,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30003278000-000000067F00004005000060F3000327C000__000000E4D847F4E0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C000158F667-000000067F00004005016EA00C00015B74FF__000001AC25760149-000001AFC313C819\":{\"file_size\":268451840,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060FB0001D50000-000000067F00004005000060FB0001D88B43__0000008DBE2855F9-000000923719A971\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F60000054AE8-000000067F00004005000060F60100000000__000000DBD29DC248\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300002C4887-000000067F00004005000060F60100000000__0000000D80565628\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70001B34000-000000067F00004005000060F70001B5A072__00000159B010F6C0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F600000416A8-000000067F00004005000060F60100000000__000000AFE87558B0\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F10000050000-000000067F00004005000060F10000058000__000000044854EBD1-00000008B6B51879\":{\"file_size\":264011776,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300043FC000-000000067F00004005000060F300044D3639__0000012E77D3BF00\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30004878000-000000067F00004005000060F3000487C000__00000139CF156B58\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F3000396C000-000000067F00004005000060F30003998000__0000010D77B487A0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C00019F7907-000000067F00004005016EA00C0001A477A4__000001B3E1B95181-000001B6FFE46BC9\":{\"file_size\":268443648,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005016EA00C00014D7727-000000067F00004005016EA00C00014E75C6__000001A931C135B1-000001AC25760149\":{\"file_size\":268451840,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005016EA00C00016570D9-030000000000000000000000000000000002__000001AC25760149-000001AFC313C819\":{\"file_size\":86335488,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060F70001270000-000000067F00004005000060F80100000000__0000010FB1BE19B9-00000113456156F1\":{\"file_size\":265363456,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500EB4A4800003BFD31-000000067F0000400500EB4A4800003C7C42__000000FCCD5238B1-000000FF8B261599\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300014B31F8-000000067F00004005000060F300014CC16D__000000636DE92159-000000663565F8C9\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C0000D5D1E9-030000000000000000000000000000000002__0000019E7001E460\":{\"file_size\":139264,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060F100003B8214-000000067F00004005000060F100003C0432__0000003D03FCCDB9-000000417D21ACF9\":{\"file_size\":268460032,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C0001346854-000000067F00004005016EA00C000135FCAD__000001A931C135B1-000001AC25760149\":{\"file_size\":268451840,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060F3000160410C-000000067F00004005000060F3000165515A__000000698AF6E809-0000006DDB29D589\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB000118B12B-030000000000000000000000000000000002__00000054161C34B8\":{\"file_size\":147456,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30006DF0000-000000067F00004005000060F30006DF4000__000001848D082B20\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F700003C4000-000000067F00004005000060F700003FE341__0000003D2AB09B68\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30000FF0000-000000067F00004005000060F30100000000__0000004C49155071-0000004F31878919\":{\"file_size\":256286720,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB00015F4000-000000067F00004005000060FB00015FCD31__000000698F2C3A38\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30005816253-000000067F00004005000060F30005847319__00000159A7EC8CB9-0000015DD1D3C809\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30002460000-000000067F00004005000060F30002464000__000000AFE87558B0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F7000113A337-000000067F00004005000060F700011528FB__000000FF8B261599-000001048B25A8E9\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB000037968A-030000000000000000000000000000000002__0000000D55A212C9-000000114A805939\":{\"file_size\":226426880,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C0000128000-000000067F00004005016EA00C000012FE9A__000001880F984A29-0000018C496B6DB1\":{\"file_size\":268451840,\"generation\":11,\"shard\":\"0008\"},\"000000067F0000400500EB4A48000036FF11-000000067F0000400500EB4A4800003A7E20__000000FCCD5238B1-000000FF8B261599\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F3000658113F-000000067F00004005000060F3000659A203__000001715E483C79-000001751A7D7589\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C0001D18000-000000067F00004005016EA00C0001D1C000__000001BCB572A4E0\":{\"file_size\":134422528,\"generation\":17,\"shard\":\"0008\"},\"000000067F00004005000060F30001A44000-000000067F00004005000060F30001AB1583__00000081AA3C40F0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F10000138000-000000067F00004005000060F1000013C000__000000174479FC18\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300009BC000-000000067F00004005000060F30000A50000__000000321AA80270\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F7000110E30C-000000067F00004005000060F80100000000__000000F91FE84F08\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F50100000000-000000067F00004005000060F60000014000__0000003D2AB09B68\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30006F18000-000000067F00004005000060F30006FA900D__00000184624E5741-000001860C80A151\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0001D88B43-000000067F00004005000060FB0100000000__0000008DBE2855F9-000000923719A971\":{\"file_size\":249028608,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F3000122A1D5-000000067F00004005000060F30100000000__0000005413AB3641-00000057593D8169\":{\"file_size\":48783360,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30006277C61-000000067F00004005000060F30006320C60__0000016E1FBB7B99-000001715E483C79\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB430000388000-000000067F000040050081DB43000038C000__0000008DDCD70B68\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C0000E67A6E-000000067F00004005016EA00C0000E77906__0000019E2C5DCEE1-000001A1DD8B4481\":{\"file_size\":268451840,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060F300009B8000-000000067F00004005000060F300009BC000__000000321AA80270\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400501025D900000068000-000000067F00004005010450640000000570__0000010FB1BE19B9-00000113456156F1\":{\"file_size\":268460032,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB00002D4B6A-030000000000000000000000000000000002__0000000D80565628\":{\"file_size\":147456,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30001E50FF3-000000067F00004005000060F30001E720A2__000000923719A971-00000096262826C9\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C00005A4000-000000067F00004005016EA00C0000670000__000001936E73D028\":{\"file_size\":134422528,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060FB0000C18000-000000067F00004005000060FB0000C1C000__0000003D2AB09B68\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70000BA4F5B-000000067F00004005000060F70000BBD532__000000AFD23C27B9-000000B2B5C4E8F9\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70001AC115C-000000067F00004005000060F80100000000__0000015304A396B9-0000015670D6AFD9\":{\"file_size\":237248512,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30004D24000-000000067F00004005000060F30004DA8000__000001444EB7FC10\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30006CA4000-000000067F00004005000060F30006D10000__000001848D082B20\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500EE16BC00001433D0-030000000000000000000000000000000002__000000FCCD5238B1-000000FF8B261599\":{\"file_size\":146407424,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F3000165515A-000000067F00004005000060F30100000000__000000698AF6E809-0000006DDB29D589\":{\"file_size\":112680960,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C000118C000-000000067F00004005016EA00C00011D0000__000001A95031E5B8\":{\"file_size\":134422528,\"generation\":11,\"shard\":\"0008\"},\"000000067F000040050081DB43000094A076-030000000000000000000000000000000002__000000A9EB8C4489-000000ACA44C8E99\":{\"file_size\":176054272,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70001528000-000000067F00004005000060F7000152C000__0000012E77D3BF00\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB430000C82B50-000000067F000040050081DB430000CC4BC2__000000BAB1E56C91-000000BD9A7C56D9\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB4300001EF15A-000000067F000040050081DB4300002791D8__0000008196C976A1-0000008625CF2891\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F10000125BF2-000000067F00004005000060F20100000000__000000114A805939-00000013FB921C81\":{\"file_size\":78782464,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30000E40F86-000000067F00004005000060F30100000000__000000417D21ACF9-00000044B4679349\":{\"file_size\":111108096,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C0000FF0000-000000067F00004005016EA00C0000FF4000__000001A95031E5B8\":{\"file_size\":134422528,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060F30000CB16B6-000000067F00004005000060F50100000000__0000003D2AB09B68\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70001990000-000000067F00004005000060F70001994000__0000014EDD256548\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30000A54000-000000067F00004005000060F30000A5F9BB__000000321AA80270\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300061B8705-000000067F00004005000060F300061D9774__0000016B49A934C1-0000016E1FBB7B99\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F7000084C000-000000067F00004005000060F70000858000__00000081AA3C40F0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70000848000-000000067F00004005000060F7000084C000__00000081AA3C40F0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30001D18000-000000067F00004005000060F30001D79136__0000008DBE2855F9-000000923719A971\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0001558000-000000067F00004005000060FB000155C000__000000698F2C3A38\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300024440AE-000000067F00004005000060F3000244D189__000000A9EB8C4489-000000ACA44C8E99\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30002CFC020-000000067F00004005000060F30100000000__000000C824C09619-000000CC13D2E549\":{\"file_size\":150708224,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB430000A4A074-000000067F000040050081DB430000A640EA__000000AFD23C27B9-000000B2B5C4E8F9\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0000C98000-000000067F00004005000060FB0000C9C000__0000003D2AB09B68\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0001840000-000000067F00004005000060FB0001844000__00000075E5D2A930\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30000802123-000000067F00004005000060F30000853115__00000023FEF9F321-00000028C365FBE1\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70000029ED0-000000067F00004005000060F80100000000__000000027AF9D7D0\":{\"file_size\":24576,\"generation\":1,\"shard\":\"0008\"},\"000000067F00004005016EA00C00003E4000-000000067F00004005016EA00C00003E8000__000001936E73D028\":{\"file_size\":134422528,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060F30004CBC000-000000067F00004005000060F30004D20000__000001444EB7FC10\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C000122C000-000000067F00004005016EA00C0001240000__000001A95031E5B8\":{\"file_size\":134422528,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060F30004DF086C-000000067F00004005000060F50100000000__000001444EB7FC10\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300050B5199-000000067F00004005000060F30100000000__0000014784964B91-0000014B000D1821\":{\"file_size\":126124032,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C0001A477A4-000000067F00004005016EA00C0001ADF63C__000001B3E1B95181-000001B6FFE46BC9\":{\"file_size\":268451840,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060F70001828000-000000067F00004005000060F7000182C000__000001444EB7FC10\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F100004F0000-000000067F00004005000060F10000518222__0000005413AB3641-00000057593D8169\":{\"file_size\":268460032,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30005EFD576-000000067F00004005000060F30100000000__00000164DEE06671-0000016834A3FC91\":{\"file_size\":193077248,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500F8E3A50100000000-000000067F0000400500FA2AD30000004000__0000010D77B487A0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F3000258E3A9-000000067F00004005000060F3000259F4A3__000000AFD23C27B9-000000B2B5C4E8F9\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70000C90000-000000067F00004005000060F70000CB85B3__000000BAB1E56C91-000000BD9A7C56D9\":{\"file_size\":268460032,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB000114C000-000000067F00004005000060FB000118B12B__00000054161C34B8\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30003644000-000000067F00004005000060F30003648000__000000F91FE84F08\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0001A50000-000000067F00004005000060FB0001A60B43__0000007E3A9BFD29-0000008196C976A1\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30003C257AD-000000067F00004005000060F50100000000__000001180B3FF408\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30002DE8000-000000067F00004005000060F30002E4104A__000000D31E48D7C9-000000D74E29AAD1\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500F3A25C00000C8000-000000067F0000400500F3A25C00000EA069__000001048B25A8E9-0000010779A7F551\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30002174000-000000067F00004005000060F30002210000__000000A5A3F27398\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300014D5280-000000067F00004005000060F300014E6333__000000636DE92159-000000663565F8C9\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F3000332B1B6-000000067F00004005000060F30003344134__000000E4C63CFA21-000000E7C2F1B249\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300065F42B4-000000067F00004005000060F3000660D31F__000001715E483C79-000001751A7D7589\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB4300010E264A-000000067F000040050081DB4300010F46BD__000000D31E48D7C9-000000D74E29AAD1\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300069D13FA-000000067F00004005000060F300069FA3F6__00000178B8B10551-0000017C9F5597E1\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300061D9774-000000067F00004005000060F30006222843__0000016B49A934C1-0000016E1FBB7B99\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F100005C821A-000000067F00004005000060F20100000000__000000601F43CF09-000000636DE92159\":{\"file_size\":265183232,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500EB4A480000200000-000000067F0000400500EB4A480000204000__000000FCD84FE628\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70001690000-000000067F00004005000060F70100000000__000001334140FC21-00000137115BE4D9\":{\"file_size\":273965056,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C0000A575C7-000000067F00004005016EA00C0000A9F465__00000196C9018F59-0000019A2EAFE7A9\":{\"file_size\":268451840,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060FB0001E6C000-000000067F00004005000060FB0001E98000__0000009A24DF6768\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB00014195A7-000000067F00004005000060FB000147A0EC__000000601F43CF09-000000636DE92159\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0000AE168A-030000000000000000000000000000000002__0000003203FB5749-0000003579F03331\":{\"file_size\":223379456,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30000CA0000-000000067F00004005000060F30000CA4000__0000003D2AB09B68\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300006E4000-000000067F00004005000060F30000738000__0000002427BD8BD0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300006E0000-000000067F00004005000060F300006E4000__0000002427BD8BD0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0001124000-000000067F00004005000060FB0001148000__00000054161C34B8\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500D69D7900000A8000-000000067F0000400500D69D7900000AC000__000000EFDE07FFD8\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500C782E40000130000-000000067F0000400500C782E40000137F10__000000D01F399709-000000D31E48D7C9\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C000020FBCF-000000067F00004005016EA00C0000257A6F__000001880F984A29-0000018C496B6DB1\":{\"file_size\":268451840,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060FB0001B28B44-000000067F00004005000060FB0100000000__0000008196C976A1-0000008625CF2891\":{\"file_size\":249454592,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0001120000-000000067F00004005000060FB0001124000__00000054161C34B8\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30005474062-000000067F00004005000060F3000549D0A6__0000015304A396B9-0000015670D6AFD9\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500C782E4000023FA62-030000000000000000000000000000000002__000000D01F399709-000000D31E48D7C9\":{\"file_size\":245366784,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB430000160484-030000000000000000000000000000000002__00000079F2A2F311-0000007E3A9BFD29\":{\"file_size\":226582528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300038A4FB4-000000067F00004005000060F300038B5F5B__0000010779A7F551-0000010A5E65DF39\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300017E8000-000000067F00004005000060F300017EC000__00000075E5D2A930\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500FB3D300100000000-000000067F0000400500FB3D31000000C000__0000010D77B487A0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F700010105DB-000000067F00004005000060F80100000000__000000E4C63CFA21-000000E7C2F1B249\":{\"file_size\":254935040,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70000858570-000000067F00004005000060F80100000000__0000008196C976A1-0000008625CF2891\":{\"file_size\":252985344,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB4300001D4000-000000067F000040050081DB4300001E8000__00000081AA3C40F0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB00005E0000-000000067F00004005000060FB0000638B45__0000001B59EEB909-0000001FFBC01501\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050107B547000006C000-000000067F000040050107B54700000A0EB1__000001180B3FF408\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0000430000-000000067F00004005000060FB0000434000__000000174479FC18\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300014E6333-000000067F00004005000060F3000151F271__000000636DE92159-000000663565F8C9\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500FB3D300100000000-000000067F0000400500FB3D300300000000__00000117EDA82C11-0000011B632CC319\":{\"file_size\":65536,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30004BE7584-000000067F00004005000060F30100000000__0000013C9C0E3339-0000013FEFA7D709\":{\"file_size\":58204160,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70001068000-000000067F00004005000060F80100000000__000000E7C2F1B249-000000EBC9213D59\":{\"file_size\":168730624,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F1000013C000-000000067F00004005000060F10000148000__000000174479FC18\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F3000659A203-000000067F00004005000060F300065BB235__000001715E483C79-000001751A7D7589\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70000EC0000-000000067F00004005000060F70000EF85D6__000000D74E29AAD1-000000DBBFA87AE1\":{\"file_size\":268460032,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005010660F500000B4000-000000067F00004005010660F500000F44CB__000001180B3FF408\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300067A4000-000000067F00004005000060F300067F0000__00000178C5D5D3A8\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500DBCED500000F0000-000000067F0000400500DBCED500000F4000__000000E4D847F4E0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB430000768000-000000067F000040050081DB43000076C000__000000A5A3F27398\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C00018E0000-000000067F00004005016EA00C00018E4000__000001B3F17FE4E0\":{\"file_size\":134422528,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060F30000A50000-000000067F00004005000060F30000A54000__000000321AA80270\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0001E68000-000000067F00004005000060FB0001E6C000__0000009A24DF6768\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30001960000-000000067F00004005000060F300019790A2__00000079F2A2F311-0000007E3A9BFD29\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0000B6A1D0-000000067F00004005000060FB0000BAAD15__0000003579F03331-0000003959DA2DE9\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30002E4A157-000000067F00004005000060F30002E630CF__000000D31E48D7C9-000000D74E29AAD1\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30006E70000-000000067F00004005000060F30006E74000__000001848D082B20\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F700004464DD-000000067F00004005000060F7000046EAB9__000000417D21ACF9-00000044B4679349\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500EB4A480000204000-000000067F0000400500EB4A480000218000__000000FCD84FE628\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300042D51D6-000000067F00004005000060F3000430E1E9__0000012694E36301-0000012A3F140591\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0000F30000-000000067F00004005000060FB0100000000__00000047E31D98D1-0000004C49155071\":{\"file_size\":272302080,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB4300006F8000-030000000000000000000000000000000002__0000009DF02C1241-000000A173C00489\":{\"file_size\":235110400,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB4300001EC000-000000067F000040050081DB4300001F1DA6__00000081AA3C40F0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300038A3082-000000067F00004005000060F30100000000__000001048B25A8E9-0000010779A7F551\":{\"file_size\":76644352,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C0000400000-000000067F00004005016EA00C0000404000__000001936E73D028\":{\"file_size\":134422528,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060F30003481DDB-000000067F00004005000060F30100000000__000000E7C2F1B249-000000EBC9213D59\":{\"file_size\":107814912,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F3000489C000-000000067F00004005000060F300048A0000__00000139CF156B58\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB430000CD6C36-000000067F000040050081DB430000D18CA9__000000BAB1E56C91-000000BD9A7C56D9\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30004888000-000000067F00004005000060F3000488C000__00000139CF156B58\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300008E0F49-000000067F00004005000060F30000921E8A__00000028C365FBE1-0000002D2A8E0B81\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500C782E40000074000-000000067F0000400500C782E400000A0000__000000D037B2DBD0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB00011F2D11-000000067F00004005000060FB0001203856__0000005413AB3641-00000057593D8169\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300046330B1-000000067F00004005000060F300046B41AA__0000012E71CF31F9-000001334140FC21\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30003548000-000000067F00004005000060F30003580FD3__000000EFA7EAA9E1-000000F309FCDD19\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0001198B44-000000067F00004005000060FB00011C1688__0000005413AB3641-00000057593D8169\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F3000049C000-000000067F00004005000060F300004A8000__000000174479FC18\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C0000B44000-000000067F00004005016EA00C0000BB0000__0000019E7001E460\":{\"file_size\":134422528,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060F700014F0000-000000067F00004005000060F700014F85DF__0000012694E36301-0000012A3F140591\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB430000C5E15B-000000067F000040050081DB430000C801D1__000000B768469051-000000BAB1E56C91\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30003A10000-000000067F00004005000060F30003A21037__0000010D5DC42EF9-0000010FB1BE19B9\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30006EFC000-000000067F00004005000060F30006F18000__000001848D082B20\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C0001D1F87B-000000067F00004005016EA00C0001D7F71A__000001BA93C39481-000001BCB572A4E1\":{\"file_size\":268451840,\"generation\":17,\"shard\":\"0008\"},\"000000067F00004005000060F30002A34000-000000067F00004005000060F30002A40000__000000C483D0D6B8\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70000F0AA88-000000067F00004005000060F80100000000__000000DBD29DC248\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30006700000-000000067F00004005000060F30006704000__00000178C5D5D3A8\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30001CC4000-000000067F00004005000060F30001CD0000__0000008DDCD70B68\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70000858000-000000067F00004005000060F80100000000__00000081AA3C40F0\":{\"file_size\":48439296,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB4300000D6407-000000067F000040050081DB430000160484__00000079F2A2F311-0000007E3A9BFD29\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300057DD292-000000067F00004005000060F30005816253__00000159A7EC8CB9-0000015DD1D3C809\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30006222843-000000067F00004005000060F3000625B8F0__0000016B49A934C1-0000016E1FBB7B99\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0000410000-000000067F00004005000060FB0000430B46__000000114A805939-00000013FB921C81\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F100006A8000-000000067F00004005000060F100006B0000__0000006DDB29D589-000000722F474369\":{\"file_size\":264110080,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F3000460202F-000000067F00004005000060F300046330B1__0000012E71CF31F9-000001334140FC21\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30006E74000-000000067F00004005000060F30006EF8000__000001848D082B20\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30003A3B020-000000067F00004005000060F30003A4C09C__0000010D5DC42EF9-0000010FB1BE19B9\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30002535462-000000067F00004005000060F3000258E3A9__000000AFD23C27B9-000000B2B5C4E8F9\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500EB4A480000294000-000000067F0000400500EB4A480000355928__000000FCD84FE628\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016E85370000000000-030000000000000000000000000000000002__00000159A7EC8CB9-0000015DD1D3C809\":{\"file_size\":152190976,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F3000158C000-000000067F00004005000060F300015B0000__000000698F2C3A38\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30003386D10-000000067F00004005000060F300033D7D7C__000000E7C2F1B249-000000EBC9213D59\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30000E7C000-000000067F00004005000060F30000EF1FC3__00000047F1F2B800\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500FA2AD30000030000-000000067F0000400500FA2AD30000034000__0000010D77B487A0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30005588000-000000067F00004005000060F3000558C000__00000159B010F6C0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300039A0000-000000067F00004005000060F300039A4000__0000010D77B487A0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F6000008A13D-000000067F00004005000060F60100000000__000001444EB7FC10\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB00017120CE-000000067F00004005000060FB000172AC12__0000006DDB29D589-000000722F474369\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30003200000-000000067F00004005000060F30003204000__000000E4D847F4E0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300007C1007-000000067F00004005000060F30000802123__00000023FEF9F321-00000028C365FBE1\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500F3A25C000006C000-000000067F0000400500F3A25C00000BB439__00000104BD37F348\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300015B4000-000000067F00004005000060F300015F8000__000000698F2C3A38\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300060C220F-000000067F00004005000060F300060CB2C8__0000016B49A934C1-0000016E1FBB7B99\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500F8E3A5000004A25C-000000067F0000400500F8E3A50100000000__0000010D77B487A0\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30002C9AFB8-000000067F00004005000060F30002CFC020__000000C824C09619-000000CC13D2E549\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005010F2BD40100000000-000000067F00004005010F44EB000000C000__00000126C3C69FC0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30002AEED02-000000067F00004005000060F50100000000__000000C483D0D6B8\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30002EB8000-000000067F00004005000060F30002F5105E__000000D74E29AAD1-000000DBBFA87AE1\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500E3A2A1000016321A-030000000000000000000000000000000002__000000EFDE07FFD8\":{\"file_size\":139264,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F3000135C000-000000067F00004005000060F30001407F7A__000000603CA8F2F0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500F67839000006AEF4-000000067F0000400500F7D2DD0100000000__0000010D77B487A0\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30005DA03A8-000000067F00004005000060F30005DC93F1__00000164DEE06671-0000016834A3FC91\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB4300010E2072-000000067F000040050081DB430100000000__000000D01F399709-000000D31E48D7C9\":{\"file_size\":15392768,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300004A8000-000000067F00004005000060F300004AC000__000000174479FC18\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB00016E0A44-000000067F00004005000060FB0001701588__0000006DDB29D589-000000722F474369\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F300024D8000-000000067F00004005000060F300024DC000__000000AFE87558B0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30003BC8000-000000067F00004005000060F30003BCC000__000001180B3FF408\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F00100000000-000000067F00004005000060F10000004000__000000027AF9D7D0\":{\"file_size\":134422528,\"generation\":1,\"shard\":\"0008\"},\"000000067F000040050081DB430100000000-000000067F0000400500C782E40000074000__000000D037B2DBD0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30003D14206-000000067F00004005000060F30003D252C8__00000117EDA82C11-0000011B632CC319\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F700006479E7-000000067F00004005000060F80100000000__000000603CA8F2F0\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70000B9C988-000000067F00004005000060F70000BA4F5B__000000AFD23C27B9-000000B2B5C4E8F9\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F0000400500D69D790000078000-000000067F0000400500D69D79000007C000__000000EFDE07FFD8\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70000CC8B74-000000067F00004005000060F80100000000__000000BAB1E56C91-000000BD9A7C56D9\":{\"file_size\":95657984,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB0000708000-000000067F00004005000060FB000070C000__0000002427BD8BD0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB430000EA0000-000000067F000040050081DB430000EEA075__000000C462B3C2A9-000000C824C09619\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C000001FD3E-000000067F00004005016EA00C0000097BDA__0000018624969469-000001880F984A29\":{\"file_size\":268451840,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060F3000689E295-000000067F00004005000060F3000690F2FD__00000178B8B10551-0000017C9F5597E1\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30000CE0000-000000067F00004005000060F30000D31030__0000003D03FCCDB9-000000417D21ACF9\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F000040050081DB430000EA0000-030000000000000000000000000000000002__000000C483D0D6B8\":{\"file_size\":20307968,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C0000807A34-000000067F00004005016EA00C00008578D4__000001935283F9B9-00000196C9018F59\":{\"file_size\":268451840,\"generation\":11,\"shard\":\"0008\"},\"000000067F000040050081DB430001060000-000000067F000040050081DB430001064000__000000D037B2DBD0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F3000480F32C-000000067F00004005000060F3000486837F__000001334140FC21-00000137115BE4D9\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F700009385D4-000000067F00004005000060F80100000000__0000008DBE2855F9-000000923719A971\":{\"file_size\":252207104,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30000090000-000000067F00004005000060F300000C1095__000000021DC73119-000000044854EBD1\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F3000480620C-000000067F00004005000060F3000480F32C__000001334140FC21-00000137115BE4D9\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30005FA40AD-000000067F00004005000060F30005FC519A__0000016834A3FC91-0000016B49A934C1\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060FB00014A42B8-030000000000000000000000000000000002__000000601F43CF09-000000636DE92159\":{\"file_size\":137322496,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30001CD0000-000000067F00004005000060F30001CD4000__0000008DDCD70B68\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005016EA00C0000404000-000000067F00004005016EA00C0000428000__000001936E73D028\":{\"file_size\":134422528,\"generation\":11,\"shard\":\"0008\"},\"000000067F00004005000060F30002079FDE-000000067F00004005000060F300020830BE__0000009DF02C1241-000000A173C00489\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F3000487C000-000000067F00004005000060F30004880000__00000139CF156B58\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005010A188401FFFFFFFF-000000067F00004005010A18840300000000__00000137115BE4D9-000001398B56A519\":{\"file_size\":24576,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F70000218000-000000067F00004005000060F7000021C000__0000002427BD8BD0\":{\"file_size\":134422528,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30005EF454F-000000067F00004005000060F30005EFD576__00000164DEE06671-0000016834A3FC91\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"},\"000000067F00004005000060F30005DC93F1-000000067F00004005000060F30005E0A466__00000164DEE06671-0000016834A3FC91\":{\"file_size\":268451840,\"generation\":2,\"shard\":\"0008\"}},\"disk_consistent_lsn\":\"1BC/B5734CD8\",\"metadata_bytes\":{\"disk_consistent_lsn\":\"1BC/B5734CD8\",\"prev_record_lsn\":\"1BC/B5734CB0\",\"ancestor_timeline\":null,\"ancestor_lsn\":\"0/0\",\"latest_gc_cutoff_lsn\":\"1BC/B5732690\",\"initdb_lsn\":\"0/14EE150\",\"pg_version\":16},\"lineage\":{}}\n"
  },
  {
    "path": "pgxn/.dir-locals.el",
    "content": ";; see also src/tools/editors/emacs.samples for more complete settings\n\n((c-mode . ((c-basic-offset . 4)\n            (c-file-style . \"bsd\")\n            (fill-column . 78)\n            (indent-tabs-mode . t)\n            (tab-width . 4)))\n (nxml-mode . ((fill-column . 78)\n               (indent-tabs-mode . nil)))\n (perl-mode . ((perl-indent-level . 4)\n               (perl-continued-statement-offset . 2)\n               (perl-continued-brace-offset . -2)\n               (perl-brace-offset . 0)\n               (perl-brace-imaginary-offset . 0)\n               (perl-label-offset . -2)\n               (indent-tabs-mode . t)\n               (tab-width . 4)))\n (sgml-mode . ((fill-column . 78)\n               (indent-tabs-mode . nil))))\n"
  },
  {
    "path": "pgxn/.editorconfig",
    "content": "root = true\n\n[*.{c,h,l,y,pl,pm}]\nindent_style = tab\nindent_size = tab\ntab_width = 4\n\n[*.{sgml,xml}]\nindent_style = space\nindent_size = 1\n\n[*.xsl]\nindent_style = space\nindent_size = 2\n"
  },
  {
    "path": "pgxn/Makefile",
    "content": "# This makefile assumes that 'pg_config' is in the path, or is passed in the\n# PG_CONFIG variable.\n#\n# This is used in two different ways:\n#\n# 1. The main makefile calls this, when you invoke the `make neon-pg-ext-%`\n#    target. It passes PG_CONFIG pointing to pg_install/%/bin/pg_config.\n#    This is a VPATH build; the current directory is build/pgxn-%, and\n#    the path to the Makefile is passed with the -f argument.\n#\n# 2. compute-node.Dockerfile invokes this to build the compute extensions\n#    for the specific Postgres version. It relies on pg_config already\n#    being in $(PATH).\n\nsrcdir = $(dir $(firstword $(MAKEFILE_LIST)))\n\nPG_CONFIG = pg_config\n\nsubdirs = neon neon_rmgr neon_walredo neon_utils neon_test_utils\n\n.PHONY: install install-compute install-storage $(subdirs)\ninstall: $(subdirs)\ninstall-compute: neon neon_utils neon_test_utils neon_rmgr\ninstall-storage: neon_rmgr neon_walredo\n\n$(subdirs): %:\n\tmkdir -p $*\n\t$(MAKE) PG_CONFIG=$(PG_CONFIG) -C $* -f $(abspath $(srcdir)/$@/Makefile) install\n"
  },
  {
    "path": "pgxn/neon/Makefile",
    "content": "# pgxs/neon/Makefile\n\n\nMODULE_big = neon\nOBJS = \\\n\t$(WIN32RES) \\\n\tcommunicator.o \\\n\tcommunicator_process.o \\\n\textension_server.o \\\n\tfile_cache.o \\\n\thll.o \\\n\tlibpagestore.o \\\n\tlogical_replication_monitor.o \\\n\tneon.o \\\n\tneon_lwlsncache.o \\\n\tneon_pgversioncompat.o \\\n\tneon_perf_counters.o \\\n\tneon_utils.o \\\n\tneon_walreader.o \\\n\tpagestore_smgr.o \\\n\trelsize_cache.o \\\n\tunstable_extensions.o \\\n\twalproposer.o \\\n\twalproposer_pg.o \\\n\tneon_ddl_handler.o \\\n\twalsender_hooks.o \\\n\t$(NEON_CARGO_ARTIFACT_TARGET_DIR)/libcommunicator.a\n\nPG_CPPFLAGS = -I$(libpq_srcdir)\nSHLIB_LINK_INTERNAL = $(libpq)\nSHLIB_LINK = -lcurl\n\nUNAME_S := $(shell uname -s)\nifeq ($(UNAME_S), Darwin)\n    SHLIB_LINK += -framework Security -framework CoreFoundation -framework SystemConfiguration\n\n    # Link against object files for the current macOS version, to avoid spurious linker warnings.\n    MACOSX_DEPLOYMENT_TARGET := $(shell xcrun --sdk macosx --show-sdk-version)\n    export MACOSX_DEPLOYMENT_TARGET\nendif\n\nEXTENSION = neon\nDATA = \\\n\tneon--1.0.sql \\\n\tneon--1.0--1.1.sql \\\n\tneon--1.1--1.2.sql \\\n\tneon--1.2--1.3.sql \\\n\tneon--1.3--1.4.sql \\\n\tneon--1.4--1.5.sql \\\n\tneon--1.5--1.6.sql \\\n\tneon--1.6--1.5.sql \\\n\tneon--1.5--1.4.sql \\\n\tneon--1.4--1.3.sql \\\n\tneon--1.3--1.2.sql \\\n\tneon--1.2--1.1.sql \\\n\tneon--1.1--1.0.sql\nPGFILEDESC = \"neon - cloud storage for PostgreSQL\"\n\nEXTRA_CLEAN = \\\n\tlibwalproposer.a\n\nWALPROP_OBJS = \\\n\t$(WIN32RES) \\\n\twalproposer.o \\\n\tneon_utils.o \\\n\twalproposer_compat.o\n\n# libcommunicator.a is built by cargo from the Rust sources under communicator/\n# subdirectory. `cargo build` also generates communicator_bindings.h.\ncommunicator_process.o: communicator/communicator_bindings.h\nfile_cache.o: communicator/communicator_bindings.h\n\n$(NEON_CARGO_ARTIFACT_TARGET_DIR)/libcommunicator.a communicator/communicator_bindings.h &:\n\t(cd $(srcdir)/communicator && cargo build $(CARGO_BUILD_FLAGS) $(CARGO_PROFILE))\n\n# Force `cargo build` every time. Some of the Rust sources might have\n# changed.\n.PHONY: $(NEON_CARGO_ARTIFACT_TARGET_DIR)/libcommunicator.a communicator/communicator_bindings.h\n\n.PHONY: walproposer-lib\nwalproposer-lib: CPPFLAGS += -DWALPROPOSER_LIB\nwalproposer-lib: libwalproposer.a;\n\n.PHONY: libwalproposer.a\nlibwalproposer.a: $(WALPROP_OBJS)\n\t$(RM) $@\n\t$(AR) $(AROPT) $@ $^\n\n# needs vars:\n# FIND_TYPEDEF pointing to find_typedef\n# INDENT pointing to pg_bsd_indent\n# PGINDENT_SCRIPT pointing to pgindent (be careful with PGINDENT var name:\n#   pgindent will pick it up as pg_bsd_indent path).\n.PHONY: pgindent\npgindent:\n\t+@ echo top_srcdir=$(top_srcdir) top_builddir=$(top_builddir) srcdir=$(srcdir)\n\t$(FIND_TYPEDEF) . > neon.typedefs\n\tINDENT=$(INDENT) $(PGINDENT_SCRIPT) --typedefs neon.typedefs $(srcdir)/*.c $(srcdir)/*.h\n\nPG_CONFIG = pg_config\nPGXS := $(shell $(PG_CONFIG) --pgxs)\ninclude $(PGXS)\n"
  },
  {
    "path": "pgxn/neon/README.md",
    "content": "neon extension consists of several parts:\n\n### shared preload library `neon.so`\n\n- implements storage manager API and network communications with remote page server.\n\n- walproposer: implements broadcast protocol between postgres and WAL safekeepers.\n\n- control plane connector:  Captures updates to roles/databases using ProcessUtility_hook and sends them to the control ProcessUtility_hook.\n\n- remote extension server: Request compute_ctl to download extension files.\n\n- file_cache: Local file cache is used to temporary store relations pages in local file system for better performance.\n\n- relsize_cache: Relation size cache for better neon performance.\n\n### SQL functions in `neon--*.sql`\n\nUtility functions to expose neon specific information to user and metrics collection.\nThis extension is created in all databases in the cluster by default.\n"
  },
  {
    "path": "pgxn/neon/bitmap.h",
    "content": "#ifndef NEON_BITMAP_H\n#define NEON_BITMAP_H\n\n/*\n * Utilities for manipulating bits8* as bitmaps.\n */\n\n#define BITMAP_ISSET(bm, bit) ((bm)[(bit) >> 3] & (1 << ((bit) & 7)))\n#define BITMAP_SET(bm, bit) (bm)[(bit) >> 3] |= (1 << ((bit) & 7))\n#define BITMAP_CLR(bm, bit) (bm)[(bit) >> 3] &= ~(1 << ((bit) & 7))\n\n#endif\t\t\t\t\t\t\t/* NEON_BITMAP_H */\n"
  },
  {
    "path": "pgxn/neon/communicator/.gitignore",
    "content": "# generated file (with cbindgen, see build.rs)\ncommunicator_bindings.h\n"
  },
  {
    "path": "pgxn/neon/communicator/Cargo.toml",
    "content": "[package]\nname = \"communicator\"\nversion = \"0.1.0\"\nlicense.workspace = true\nedition.workspace = true\n\n[lib]\ncrate-type = [\"staticlib\"]\n\n[features]\n# 'testing' feature is currently unused in the communicator, but we accept it for convenience of\n# calling build scripts, so that you can pass the same feature to all packages.\ntesting = []\n# 'rest_broker' feature is currently unused in the communicator, but we accept it for convenience of\n# calling build scripts, so that you can pass the same feature to all packages.\nrest_broker = []\n\n[dependencies]\naxum.workspace = true\nhttp.workspace = true\ntokio = { workspace = true, features = [\"macros\", \"net\", \"io-util\", \"rt\", \"rt-multi-thread\"] }\ntracing.workspace = true\ntracing-subscriber.workspace = true\n\nmeasured.workspace = true\nutils.workspace = true\nworkspace_hack = { version = \"0.1\", path = \"../../../workspace_hack\" }\n\n[build-dependencies]\ncbindgen.workspace = true\n"
  },
  {
    "path": "pgxn/neon/communicator/README.md",
    "content": "# Communicator\n\nThis package provides the so-called \"compute-pageserver communicator\",\nor just \"communicator\" in short. The communicator is a separate\nbackground worker process that runs in the PostgreSQL server. It's\npart of the neon extension. Currently, it only provides an HTTP\nendpoint for metrics, but in the future it will evolve to handle all\ncommunications with the pageservers.\n\n## Source code view\n\npgxn/neon/communicator_process.c\n    Contains code needed to start up the communicator process, and\n    the glue that interacts with PostgreSQL code and the Rust\n    code in the communicator process.\n\n\npgxn/neon/communicator/src/worker_process/\n    Worker process main loop and glue code\n\nAt compilation time, pgxn/neon/communicator/ produces a static\nlibrary, libcommunicator.a. It is linked to the neon.so extension\nlibrary.\n"
  },
  {
    "path": "pgxn/neon/communicator/build.rs",
    "content": "use std::env;\n\nfn main() -> Result<(), Box<dyn std::error::Error>> {\n    let crate_dir = env::var(\"CARGO_MANIFEST_DIR\").unwrap();\n\n    match cbindgen::generate(crate_dir) {\n        Ok(bindings) => {\n            bindings.write_to_file(\"communicator_bindings.h\");\n        }\n        Err(cbindgen::Error::ParseSyntaxError { .. }) => {\n            // This means there was a syntax error in the Rust sources. Don't panic, because\n            // we want the build to continue and the Rust compiler to hit the error. The\n            // Rust compiler produces a better error message than cbindgen.\n            eprintln!(\"Generating C bindings failed because of a Rust syntax error\");\n        }\n        Err(err) => panic!(\"Unable to generate C bindings: {err:?}\"),\n    };\n\n    Ok(())\n}\n"
  },
  {
    "path": "pgxn/neon/communicator/cbindgen.toml",
    "content": "language = \"C\"\n\n[enum]\nprefix_with_name = true\n"
  },
  {
    "path": "pgxn/neon/communicator/src/lib.rs",
    "content": "mod worker_process;\n\n/// Name of the Unix Domain Socket that serves the metrics, and other APIs in the\n/// future. This is within the Postgres data directory.\nconst NEON_COMMUNICATOR_SOCKET_NAME: &str = \"neon-communicator.socket\";\n"
  },
  {
    "path": "pgxn/neon/communicator/src/worker_process/callbacks.rs",
    "content": "//! C callbacks to PostgreSQL facilities that the neon extension needs to provide. These\n//! are implemented in `neon/pgxn/communicator_process.c`. The function signatures better\n//! match!\n//!\n//! These are called from the communicator threads! Careful what you do, most Postgres\n//! functions are not safe to call in that context.\n\n#[cfg(not(test))]\nunsafe extern \"C\" {\n    pub fn callback_set_my_latch_unsafe();\n    pub fn callback_get_lfc_metrics_unsafe() -> LfcMetrics;\n}\n\n// Compile unit tests with dummy versions of the functions. Unit tests cannot call back\n// into the C code. (As of this writing, no unit tests even exists in the communicator\n// package, but the code coverage build still builds these and tries to link with the\n// external C code.)\n#[cfg(test)]\nunsafe fn callback_set_my_latch_unsafe() {\n    panic!(\"not usable in unit tests\");\n}\n#[cfg(test)]\nunsafe fn callback_get_lfc_metrics_unsafe() -> LfcMetrics {\n    panic!(\"not usable in unit tests\");\n}\n\n// safe wrappers\n\npub(super) fn callback_set_my_latch() {\n    unsafe { callback_set_my_latch_unsafe() };\n}\n\npub(super) fn callback_get_lfc_metrics() -> LfcMetrics {\n    unsafe { callback_get_lfc_metrics_unsafe() }\n}\n\n/// Return type of the callback_get_lfc_metrics() function.\n#[repr(C)]\npub struct LfcMetrics {\n    pub lfc_cache_size_limit: i64,\n    pub lfc_hits: i64,\n    pub lfc_misses: i64,\n    pub lfc_used: i64,\n    pub lfc_writes: i64,\n\n    // working set size looking back 1..60 minutes.\n    //\n    // Index 0 is the size of the working set accessed within last 1 minute,\n    // index 59 is the size of the working set accessed within last 60 minutes.\n    pub lfc_approximate_working_set_size_windows: [i64; 60],\n}\n"
  },
  {
    "path": "pgxn/neon/communicator/src/worker_process/control_socket.rs",
    "content": "//! Communicator control socket.\n//!\n//! Currently, the control socket is used to provide information about the communicator\n//! process, file cache etc. as prometheus metrics. In the future, it can be used to\n//! expose more things.\n//!\n//! The exporter speaks HTTP, listens on a Unix Domain Socket under the Postgres\n//! data directory. For debugging, you can access it with curl:\n//!\n//! ```sh\n//! curl --unix-socket neon-communicator.socket http://localhost/metrics\n//! ```\n//!\nuse axum::Router;\nuse axum::body::Body;\nuse axum::extract::State;\nuse axum::response::Response;\nuse http::StatusCode;\nuse http::header::CONTENT_TYPE;\n\nuse measured::MetricGroup;\nuse measured::text::BufferedTextEncoder;\n\nuse std::io::ErrorKind;\n\nuse tokio::net::UnixListener;\n\nuse crate::NEON_COMMUNICATOR_SOCKET_NAME;\nuse crate::worker_process::main_loop::CommunicatorWorkerProcessStruct;\n\nimpl CommunicatorWorkerProcessStruct {\n    /// Launch the listener\n    pub(crate) async fn launch_control_socket_listener(\n        &'static self,\n    ) -> Result<(), std::io::Error> {\n        use axum::routing::get;\n        let app = Router::new()\n            .route(\"/metrics\", get(get_metrics))\n            .route(\"/autoscaling_metrics\", get(get_autoscaling_metrics))\n            .route(\"/debug/panic\", get(handle_debug_panic))\n            .with_state(self);\n\n        // If the server is restarted, there might be an old socket still\n        // lying around. Remove it first.\n        match std::fs::remove_file(NEON_COMMUNICATOR_SOCKET_NAME) {\n            Ok(()) => {\n                tracing::warn!(\"removed stale control socket\");\n            }\n            Err(e) if e.kind() == ErrorKind::NotFound => {}\n            Err(e) => {\n                tracing::error!(\"could not remove stale control socket: {e:#}\");\n                // Try to proceed anyway. It will likely fail below though.\n            }\n        };\n\n        // Create the unix domain socket and start listening on it\n        let listener = UnixListener::bind(NEON_COMMUNICATOR_SOCKET_NAME)?;\n\n        tokio::spawn(async {\n            tracing::info!(\"control socket listener spawned\");\n            axum::serve(listener, app)\n                .await\n                .expect(\"axum::serve never returns\")\n        });\n\n        Ok(())\n    }\n}\n\n/// Expose all Prometheus metrics.\nasync fn get_metrics(State(state): State<&CommunicatorWorkerProcessStruct>) -> Response {\n    tracing::trace!(\"/metrics requested\");\n    metrics_to_response(&state).await\n}\n\n/// Expose Prometheus metrics, for use by the autoscaling agent.\n///\n/// This is a subset of all the metrics.\nasync fn get_autoscaling_metrics(\n    State(state): State<&CommunicatorWorkerProcessStruct>,\n) -> Response {\n    tracing::trace!(\"/metrics requested\");\n    metrics_to_response(&state.lfc_metrics).await\n}\n\nasync fn handle_debug_panic(State(_state): State<&CommunicatorWorkerProcessStruct>) -> Response {\n    panic!(\"test HTTP handler task panic\");\n}\n\n/// Helper function to convert prometheus metrics to a text response\nasync fn metrics_to_response(metrics: &(dyn MetricGroup<BufferedTextEncoder> + Sync)) -> Response {\n    let mut enc = BufferedTextEncoder::new();\n    metrics\n        .collect_group_into(&mut enc)\n        .unwrap_or_else(|never| match never {});\n\n    Response::builder()\n        .status(StatusCode::OK)\n        .header(CONTENT_TYPE, \"application/text\")\n        .body(Body::from(enc.finish()))\n        .unwrap()\n}\n"
  },
  {
    "path": "pgxn/neon/communicator/src/worker_process/lfc_metrics.rs",
    "content": "use measured::{\n    FixedCardinalityLabel, Gauge, GaugeVec, LabelGroup, MetricGroup,\n    label::{LabelName, LabelValue, StaticLabelSet},\n    metric::{MetricEncoding, gauge::GaugeState, group::Encoding},\n};\n\nuse super::callbacks::callback_get_lfc_metrics;\n\npub(crate) struct LfcMetricsCollector;\n\n#[derive(MetricGroup)]\n#[metric(new())]\nstruct LfcMetricsGroup {\n    /// LFC cache size limit in bytes\n    lfc_cache_size_limit: Gauge,\n    /// LFC cache hits\n    lfc_hits: Gauge,\n    /// LFC cache misses\n    lfc_misses: Gauge,\n    /// LFC chunks used (chunk = 1MB)\n    lfc_used: Gauge,\n    /// LFC cache writes\n    lfc_writes: Gauge,\n    /// Approximate working set size in pages of 8192 bytes\n    #[metric(init = GaugeVec::dense())]\n    lfc_approximate_working_set_size_windows: GaugeVec<StaticLabelSet<MinuteAsSeconds>>,\n}\n\nimpl<T: Encoding> MetricGroup<T> for LfcMetricsCollector\nwhere\n    GaugeState: MetricEncoding<T>,\n{\n    fn collect_group_into(&self, enc: &mut T) -> Result<(), <T as Encoding>::Err> {\n        let g = LfcMetricsGroup::new();\n\n        let lfc_metrics = callback_get_lfc_metrics();\n\n        g.lfc_cache_size_limit.set(lfc_metrics.lfc_cache_size_limit);\n        g.lfc_hits.set(lfc_metrics.lfc_hits);\n        g.lfc_misses.set(lfc_metrics.lfc_misses);\n        g.lfc_used.set(lfc_metrics.lfc_used);\n        g.lfc_writes.set(lfc_metrics.lfc_writes);\n\n        for i in 0..60 {\n            let val = lfc_metrics.lfc_approximate_working_set_size_windows[i];\n            g.lfc_approximate_working_set_size_windows\n                .set(MinuteAsSeconds(i), val);\n        }\n\n        g.collect_group_into(enc)\n    }\n}\n\n/// This stores the values in range 0..60,\n/// encodes them as seconds (60, 120, 180, ..., 3600)\n#[derive(Clone, Copy)]\nstruct MinuteAsSeconds(usize);\n\nimpl FixedCardinalityLabel for MinuteAsSeconds {\n    fn cardinality() -> usize {\n        60\n    }\n\n    fn encode(&self) -> usize {\n        self.0\n    }\n\n    fn decode(value: usize) -> Self {\n        Self(value)\n    }\n}\n\nimpl LabelValue for MinuteAsSeconds {\n    fn visit<V: measured::label::LabelVisitor>(&self, v: V) -> V::Output {\n        v.write_int((self.0 + 1) as i64 * 60)\n    }\n}\n\nimpl LabelGroup for MinuteAsSeconds {\n    fn visit_values(&self, v: &mut impl measured::label::LabelGroupVisitor) {\n        v.write_value(LabelName::from_str(\"duration_seconds\"), self);\n    }\n}\n"
  },
  {
    "path": "pgxn/neon/communicator/src/worker_process/logging.rs",
    "content": "//! Glue code to hook up Rust logging with the `tracing` crate to the PostgreSQL log\n//!\n//! In the Rust threads, the log messages are written to a mpsc Channel, and the Postgres\n//! process latch is raised. That wakes up the loop in the main thread, see\n//! `communicator_new_bgworker_main()`. It reads the message from the channel and\n//! ereport()s it. This ensures that only one thread, the main thread, calls the\n//! PostgreSQL logging routines at any time.\n\nuse std::ffi::c_char;\nuse std::sync::atomic::{AtomicU64, Ordering};\nuse std::sync::mpsc::sync_channel;\nuse std::sync::mpsc::{Receiver, SyncSender};\nuse std::sync::mpsc::{TryRecvError, TrySendError};\n\nuse tracing::info;\nuse tracing::{Event, Level, Metadata, Subscriber};\nuse tracing_subscriber::filter::LevelFilter;\nuse tracing_subscriber::fmt::format::Writer;\nuse tracing_subscriber::fmt::{FmtContext, FormatEvent, FormatFields, FormattedFields, MakeWriter};\nuse tracing_subscriber::registry::LookupSpan;\n\nuse crate::worker_process::callbacks::callback_set_my_latch;\n\n/// This handle is passed to the C code, and used by [`communicator_worker_poll_logging`]\npub struct LoggingReceiver {\n    receiver: Receiver<FormattedEventWithMeta>,\n}\n\n/// This is passed to `tracing`\nstruct LoggingSender {\n    sender: SyncSender<FormattedEventWithMeta>,\n}\n\nstatic DROPPED_EVENT_COUNT: AtomicU64 = AtomicU64::new(0);\n\n/// Called once, at worker process startup. The returned LoggingState is passed back\n/// in the subsequent calls to `pump_logging`. It is opaque to the C code.\n#[unsafe(no_mangle)]\npub extern \"C\" fn communicator_worker_configure_logging() -> Box<LoggingReceiver> {\n    let (sender, receiver) = sync_channel(1000);\n\n    let receiver = LoggingReceiver { receiver };\n    let sender = LoggingSender { sender };\n\n    use tracing_subscriber::prelude::*;\n    let r = tracing_subscriber::registry();\n\n    let r = r.with(\n        tracing_subscriber::fmt::layer()\n            .with_ansi(false)\n            .event_format(SimpleFormatter)\n            .with_writer(sender)\n            // TODO: derive this from log_min_messages? Currently the code in\n            // communicator_process.c forces log_min_messages='INFO'.\n            .with_filter(LevelFilter::from_level(Level::INFO)),\n    );\n    r.init();\n\n    info!(\"communicator process logging started\");\n\n    Box::new(receiver)\n}\n\n/// Read one message from the logging queue. This is essentially a wrapper to Receiver,\n/// with a C-friendly signature.\n///\n/// The message is copied into *errbuf, which is a caller-supplied buffer of size\n/// `errbuf_len`.  If the message doesn't fit in the buffer, it is truncated. It is always\n/// NULL-terminated.\n///\n/// The error level is returned *elevel_p. It's one of the PostgreSQL error levels, see\n/// elog.h\n///\n/// If there was a message, *dropped_event_count_p is also updated with a counter of how\n/// many log messages in total has been dropped. By comparing that with the value from\n/// previous call, you can tell how many were dropped since last call.\n///\n/// Returns:\n///\n///   0 if there were no messages\n///   1 if there was a message. The message and its level are returned in\n///     *errbuf and *elevel_p. *dropped_event_count_p is also updated.\n///  -1 on error, i.e the other end of the queue was disconnected\n#[unsafe(no_mangle)]\npub extern \"C\" fn communicator_worker_poll_logging(\n    state: &mut LoggingReceiver,\n    errbuf: *mut c_char,\n    errbuf_len: u32,\n    elevel_p: &mut i32,\n    dropped_event_count_p: &mut u64,\n) -> i32 {\n    let msg = match state.receiver.try_recv() {\n        Err(TryRecvError::Empty) => return 0,\n        Err(TryRecvError::Disconnected) => return -1,\n        Ok(msg) => msg,\n    };\n\n    let src: &[u8] = &msg.message;\n    let dst: *mut u8 = errbuf.cast();\n    let len = std::cmp::min(src.len(), errbuf_len as usize - 1);\n    unsafe {\n        std::ptr::copy_nonoverlapping(src.as_ptr(), dst, len);\n        *(dst.add(len)) = b'\\0'; // NULL terminator\n    }\n\n    // Map the tracing Level to PostgreSQL elevel.\n    //\n    // XXX: These levels are copied from PostgreSQL's elog.h. Introduce another enum to\n    // hide these?\n    *elevel_p = match msg.level {\n        Level::TRACE => 10, // DEBUG5\n        Level::DEBUG => 14, // DEBUG1\n        Level::INFO => 17,  // INFO\n        Level::WARN => 19,  // WARNING\n        Level::ERROR => 21, // ERROR\n    };\n\n    *dropped_event_count_p = DROPPED_EVENT_COUNT.load(Ordering::Relaxed);\n\n    1\n}\n\n//---- The following functions can be called from any thread ----\n\n#[derive(Clone)]\nstruct FormattedEventWithMeta {\n    message: Vec<u8>,\n    level: tracing::Level,\n}\n\nimpl Default for FormattedEventWithMeta {\n    fn default() -> Self {\n        FormattedEventWithMeta {\n            message: Vec::new(),\n            level: tracing::Level::DEBUG,\n        }\n    }\n}\n\nstruct EventBuilder<'a> {\n    event: FormattedEventWithMeta,\n\n    sender: &'a LoggingSender,\n}\n\nimpl std::io::Write for EventBuilder<'_> {\n    fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {\n        self.event.message.write(buf)\n    }\n    fn flush(&mut self) -> std::io::Result<()> {\n        self.sender.send_event(self.event.clone());\n        Ok(())\n    }\n}\n\nimpl Drop for EventBuilder<'_> {\n    fn drop(&mut self) {\n        let sender = self.sender;\n        let event = std::mem::take(&mut self.event);\n\n        sender.send_event(event);\n    }\n}\n\nimpl<'a> MakeWriter<'a> for LoggingSender {\n    type Writer = EventBuilder<'a>;\n\n    fn make_writer(&'a self) -> Self::Writer {\n        panic!(\"not expected to be called when make_writer_for is implemented\");\n    }\n\n    fn make_writer_for(&'a self, meta: &Metadata<'_>) -> Self::Writer {\n        EventBuilder {\n            event: FormattedEventWithMeta {\n                message: Vec::new(),\n                level: *meta.level(),\n            },\n            sender: self,\n        }\n    }\n}\n\nimpl LoggingSender {\n    fn send_event(&self, e: FormattedEventWithMeta) {\n        match self.sender.try_send(e) {\n            Ok(()) => {\n                // notify the main thread\n                callback_set_my_latch();\n            }\n            Err(TrySendError::Disconnected(_)) => {}\n            Err(TrySendError::Full(_)) => {\n                // The queue is full, cannot send any more. To avoid blocking the tokio\n                // thread, simply drop the message. Better to lose some logs than get\n                // stuck if there's a problem with the logging.\n                //\n                // Record the fact that was a message was dropped by incrementing the\n                // counter.\n                DROPPED_EVENT_COUNT.fetch_add(1, Ordering::Relaxed);\n            }\n        }\n    }\n}\n\n/// Simple formatter implementation for tracing_subscriber, which prints the log spans and\n/// message part like the default formatter, but no timestamp or error level. The error\n/// level is captured separately by `FormattedEventWithMeta', and when the error is\n/// printed by the main thread, with PostgreSQL ereport(), it gets a timestamp at that\n/// point. (The timestamp printed will therefore lag behind the timestamp on the event\n/// here, if the main thread doesn't process the log message promptly)\nstruct SimpleFormatter;\n\nimpl<S, N> FormatEvent<S, N> for SimpleFormatter\nwhere\n    S: Subscriber + for<'a> LookupSpan<'a>,\n    N: for<'a> FormatFields<'a> + 'static,\n{\n    fn format_event(\n        &self,\n        ctx: &FmtContext<'_, S, N>,\n        mut writer: Writer<'_>,\n        event: &Event<'_>,\n    ) -> std::fmt::Result {\n        // Format all the spans in the event's span context.\n        if let Some(scope) = ctx.event_scope() {\n            for span in scope.from_root() {\n                write!(writer, \"{}\", span.name())?;\n\n                // `FormattedFields` is a formatted representation of the span's fields,\n                // which is stored in its extensions by the `fmt` layer's `new_span`\n                // method. The fields will have been formatted by the same field formatter\n                // that's provided to the event formatter in the `FmtContext`.\n                let ext = span.extensions();\n                let fields = &ext\n                    .get::<FormattedFields<N>>()\n                    .expect(\"will never be `None`\");\n\n                // Skip formatting the fields if the span had no fields.\n                if !fields.is_empty() {\n                    write!(writer, \"{{{fields}}}\")?;\n                }\n                write!(writer, \": \")?;\n            }\n        }\n\n        // Write fields on the event\n        ctx.field_format().format_fields(writer.by_ref(), event)?;\n\n        Ok(())\n    }\n}\n"
  },
  {
    "path": "pgxn/neon/communicator/src/worker_process/main_loop.rs",
    "content": "use std::str::FromStr as _;\n\nuse crate::worker_process::lfc_metrics::LfcMetricsCollector;\n\nuse measured::MetricGroup;\nuse measured::metric::MetricEncoding;\nuse measured::metric::gauge::GaugeState;\nuse measured::metric::group::Encoding;\nuse utils::id::{TenantId, TimelineId};\n\npub struct CommunicatorWorkerProcessStruct {\n    runtime: tokio::runtime::Runtime,\n\n    /*** Metrics ***/\n    pub(crate) lfc_metrics: LfcMetricsCollector,\n}\n\n/// Launch the communicator process's Rust subsystems\npub(super) fn init(\n    tenant_id: Option<&str>,\n    timeline_id: Option<&str>,\n) -> Result<&'static CommunicatorWorkerProcessStruct, String> {\n    // The caller validated these already\n    let _tenant_id = tenant_id\n        .map(TenantId::from_str)\n        .transpose()\n        .map_err(|e| format!(\"invalid tenant ID: {e}\"))?;\n    let _timeline_id = timeline_id\n        .map(TimelineId::from_str)\n        .transpose()\n        .map_err(|e| format!(\"invalid timeline ID: {e}\"))?;\n\n    let runtime = tokio::runtime::Builder::new_multi_thread()\n        .enable_all()\n        .thread_name(\"communicator thread\")\n        .build()\n        .unwrap();\n\n    let worker_struct = CommunicatorWorkerProcessStruct {\n        // Note: it's important to not drop the runtime, or all the tasks are dropped\n        // too. Including it in the returned struct is one way to keep it around.\n        runtime,\n\n        // metrics\n        lfc_metrics: LfcMetricsCollector,\n    };\n    let worker_struct = Box::leak(Box::new(worker_struct));\n\n    // Start the listener on the control socket\n    worker_struct\n        .runtime\n        .block_on(worker_struct.launch_control_socket_listener())\n        .map_err(|e| e.to_string())?;\n\n    Ok(worker_struct)\n}\n\nimpl<T> MetricGroup<T> for CommunicatorWorkerProcessStruct\nwhere\n    T: Encoding,\n    GaugeState: MetricEncoding<T>,\n{\n    fn collect_group_into(&self, enc: &mut T) -> Result<(), T::Err> {\n        self.lfc_metrics.collect_group_into(enc)\n    }\n}\n"
  },
  {
    "path": "pgxn/neon/communicator/src/worker_process/mod.rs",
    "content": "//! This code runs in the communicator worker process. This provides\n//! the glue code to:\n//!\n//! - launch the main loop,\n//! - receive IO requests from backends and process them,\n//! - write results back to backends.\n\nmod callbacks;\nmod control_socket;\nmod lfc_metrics;\nmod logging;\nmod main_loop;\nmod worker_interface;\n"
  },
  {
    "path": "pgxn/neon/communicator/src/worker_process/worker_interface.rs",
    "content": "//! Functions called from the C code in the worker process\n\nuse std::ffi::{CStr, CString, c_char};\n\nuse crate::worker_process::main_loop;\nuse crate::worker_process::main_loop::CommunicatorWorkerProcessStruct;\n\n/// Launch the communicator's tokio tasks, which do most of the work.\n///\n/// The caller has initialized the process as a regular PostgreSQL background worker\n/// process.\n///\n/// Inputs:\n///   `tenant_id` and `timeline_id` can be NULL, if we're been launched in \"non-Neon\" mode,\n///   where we use local storage instead of connecting to remote neon storage. That's\n///   currently only used in some unit tests.\n///\n/// Result:\n///   Returns pointer to CommunicatorWorkerProcessStruct, which is a handle to running\n///   Rust tasks. The C code can use it to interact with the Rust parts. On failure, returns\n///   None/NULL, and an error message is returned in *error_p\n///\n/// This is called only once in the process, so the returned struct, and error message in\n/// case of failure, are simply leaked.\n#[unsafe(no_mangle)]\npub extern \"C\" fn communicator_worker_launch(\n    tenant_id: *const c_char,\n    timeline_id: *const c_char,\n    error_p: *mut *const c_char,\n) -> Option<&'static CommunicatorWorkerProcessStruct> {\n    // Convert the arguments into more convenient Rust types\n    let tenant_id = if tenant_id.is_null() {\n        None\n    } else {\n        let cstr = unsafe { CStr::from_ptr(tenant_id) };\n        Some(cstr.to_str().expect(\"assume UTF-8\"))\n    };\n    let timeline_id = if timeline_id.is_null() {\n        None\n    } else {\n        let cstr = unsafe { CStr::from_ptr(timeline_id) };\n        Some(cstr.to_str().expect(\"assume UTF-8\"))\n    };\n\n    // The `init` function does all the work.\n    let result = main_loop::init(tenant_id, timeline_id);\n\n    // On failure, return the error message to the C caller in *error_p.\n    match result {\n        Ok(worker_struct) => Some(worker_struct),\n        Err(errmsg) => {\n            let errmsg = CString::new(errmsg).expect(\"no nuls within error message\");\n            let errmsg = Box::leak(errmsg.into_boxed_c_str());\n            let p: *const c_char = errmsg.as_ptr();\n\n            unsafe { *error_p = p };\n            None\n        }\n    }\n}\n"
  },
  {
    "path": "pgxn/neon/communicator.c",
    "content": "/*-------------------------------------------------------------------------\n *\n * communicator.c\n *\t  Functions for communicating with remote pageservers.\n *\n * This is the so-called \"legacy\" communicator. It consists of functions that\n * are called from the smgr implementation, in pagestore_smgr.c. There are\n * plans to replace this with a different implementation, see RFC.\n *\n * The communicator is a collection of functions that are called in each\n * backend, when the backend needs to read a page or other information. It\n * does not spawn background threads or anything like that. To process\n * responses to prefetch requests in a timely fashion, however, it registers\n * a ProcessInterrupts hook that gets called periodically from any\n * CHECK_FOR_INTERRUPTS() point in the backend.\n *\n * By the time the functions in this file are called, the caller has already\n * established that a request to the pageserver is necessary. The functions\n * are only called for permanent relations (i.e. not temp or unlogged tables).\n * Before making a call to the communicator, the caller has already checked\n * the relation size or local file cache.\n *\n * However, when processing responses to getpage requests, the communicator\n * writes pages directly to the LFC.\n *\n * The communicator functions take request LSNs as arguments; the caller is\n * responsible for determining the correct LSNs to use. There's one exception\n * to that, in prefetch_do_request(); it sometimes calls back to\n * neon_get_request_lsns().  That's because sometimes a suitable response is\n * found in the prefetch buffer and the request LSns are not needed, and the\n * caller doesn't know whether it's needed or not.\n *\n * The main interface consists of the following \"synchronous\" calls:\n *\n * communicator_exists\t\t\t- Returns true if a relation file exists\n * communicator_nblocks\t\t\t- Returns a relation's size\n * communicator_dbsize\t\t\t- Returns a databases's total size\n * communicator_read_at_lsnv\t- Read contents of one relation block\n * communicator_read_slru_segment - Read contents of one SLRU segment\n *\n * In addition, there functions related to prefetching:\n * communicator_prefetch_register_bufferv - Start prefetching a page\n * communicator_prefetch_lookupv - Check if a page is already in prefetch queue\n *\n * Misc other functions:\n * - communicator_init\t\t\t- Initialize the module at startup\n * - communicator_prefetch_pump_state - Called periodically to advance the state\n *\n *\n * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group\n * Portions Copyright (c) 1994, Regents of the University of California\n *\n *-------------------------------------------------------------------------\n */\n#include \"postgres.h\"\n\n#include \"access/xlog.h\"\n#include \"access/xlogdefs.h\"\n#include \"access/xlog_internal.h\"\n#include \"access/xlogutils.h\"\n#include \"common/hashfn.h\"\n#include \"executor/instrument.h\"\n#include \"libpq/pqformat.h\"\n#include \"miscadmin.h\"\n#include \"port/pg_iovec.h\"\n#include \"postmaster/interrupt.h\"\n#include \"replication/walsender.h\"\n#include \"storage/ipc.h\"\n#include \"utils/timeout.h\"\n\n#include \"bitmap.h\"\n#include \"communicator.h\"\n#include \"file_cache.h\"\n#include \"neon.h\"\n#include \"neon_perf_counters.h\"\n#include \"pagestore_client.h\"\n\n#if PG_VERSION_NUM >= 150000\n#include \"access/xlogrecovery.h\"\n#endif\n\n#define NEON_PANIC_CONNECTION_STATE(shard_no, elvl, message, ...) \\\n\tneon_shard_log(shard_no, elvl, \"Broken connection state: \" message, \\\n\t\t\t\t   ##__VA_ARGS__)\n\npage_server_api *page_server;\n\n/*\n * Various settings related to prompt (fast) handling of PageStream responses\n * at any CHECK_FOR_INTERRUPTS point.\n */\nint\t\t\t\treadahead_getpage_pull_timeout_ms = 50;\nstatic int\t\tPS_TIMEOUT_ID = 0;\nstatic bool\t\ttimeout_set = false;\nstatic bool\t\ttimeout_signaled = false;\n\n/*\n * We have a CHECK_FOR_INTERRUPTS in page_server->receive(), and we don't want\n * that to handle any getpage responses if we're already working on the\n * backlog of those, as we'd hit issues with determining which prefetch slot\n * we just got a response for.\n *\n * To protect against that, we have this variable that's set whenever we start\n * receiving data for prefetch slots, so that we don't get confused.\n *\n * Note that in certain error cases during readpage we may leak r_r_g=true,\n * which results in a failure to pick up further responses until we first\n * actively try to receive new getpage responses.\n */\nstatic bool\t\treadpage_reentrant_guard = false;\n\nstatic void pagestore_timeout_handler(void);\n\n#define START_PREFETCH_RECEIVE_WORK() \\\n\tdo { \\\n\t\treadpage_reentrant_guard = true; \\\n\t} while (false)\n\n#define END_PREFETCH_RECEIVE_WORK() \\\n\tdo { \\\n\t\treadpage_reentrant_guard = false; \\\n\t\tif (unlikely(timeout_signaled && !InterruptPending)) \\\n\t\t\tInterruptPending = true; \\\n\t} while (false)\n\n/*\n * Prefetch implementation:\n *\n * Prefetch is performed locally by each backend.\n *\n * There can be up to readahead_buffer_size active IO requests registered at\n * any time. Requests using smgr_prefetch are sent to the pageserver, but we\n * don't wait on the response. Requests using smgr_read are either read from\n * the buffer, or (if that's not possible) we wait on the response to arrive -\n * this also will allow us to receive other prefetched pages.\n * Each request is immediately written to the output buffer of the pageserver\n * connection, but may not be flushed if smgr_prefetch is used: pageserver\n * flushes sent requests on manual flush, or every neon.flush_output_after\n * unflushed requests; which is not necessarily always and all the time.\n *\n * Once we have received a response, this value will be stored in the response\n * buffer, indexed in a hash table. This allows us to retain our buffered\n * prefetch responses even when we have cache misses.\n *\n * Reading of prefetch responses is delayed until them are actually needed\n * (smgr_read). In case of prefetch miss or any other SMGR request other than\n * smgr_read, all prefetch responses in the pipeline will need to be read from\n * the connection; the responses are stored for later use.\n *\n * NOTE: The current implementation of the prefetch system implements a ring\n * buffer of up to readahead_buffer_size requests. If there are more _read and\n * _prefetch requests between the initial _prefetch and the _read of a buffer,\n * the prefetch request will have been dropped from this prefetch buffer, and\n * your prefetch was wasted.\n */\n\n/*\n * State machine:\n *\n * not in hash : in hash\n *             :\n * UNUSED ------> REQUESTED --> RECEIVED\n *   ^         :      |            |\n *   |         :      v            |\n *   |         : TAG_REMAINS       |\n *   |         :      |            |\n *   +----------------+------------+\n *             :\n */\ntypedef enum PrefetchStatus\n{\n\tPRFS_UNUSED = 0,\t\t\t/* unused slot */\n\tPRFS_REQUESTED,\t\t\t\t/* request was written to the sendbuffer to\n\t\t\t\t\t\t\t\t * PS, but not necessarily flushed. all fields\n\t\t\t\t\t\t\t\t * except response valid */\n\tPRFS_RECEIVED,\t\t\t\t/* all fields valid */\n\tPRFS_TAG_REMAINS,\t\t\t/* only buftag and my_ring_index are still\n\t\t\t\t\t\t\t\t * valid */\n} PrefetchStatus;\n\n/* must fit in uint8; bits 0x1 are used */\ntypedef enum {\n\tPRFSF_NONE\t= 0x0,\n\tPRFSF_LFC\t= 0x1  /* received prefetch result is stored in LFC */\n} PrefetchRequestFlags;\n\ntypedef struct PrefetchRequest\n{\n\tBufferTag\tbuftag;\t\t\t/* must be first entry in the struct */\n\tshardno_t\tshard_no;\n\tuint8\t\tstatus;\t\t/* see PrefetchStatus for valid values */\n\tuint8\t\tflags;\t\t/* see PrefetchRequestFlags */\n\tneon_request_lsns request_lsns;\n\tNeonRequestId reqid;\n\tNeonResponse *response;\t\t/* may be null */\n\tuint64\t\tmy_ring_index;\n} PrefetchRequest;\n\n/* prefetch buffer lookup hash table */\n\ntypedef struct PrfHashEntry\n{\n\tPrefetchRequest *slot;\n\tuint32\t\tstatus;\n\tuint32\t\thash;\n} PrfHashEntry;\n\n#define SH_PREFIX\t\t\tprfh\n#define SH_ELEMENT_TYPE\t\tPrfHashEntry\n#define SH_KEY_TYPE\t\t\tPrefetchRequest *\n#define SH_KEY\t\t\t\tslot\n#define SH_STORE_HASH\n#define SH_GET_HASH(tb, a)\t((a)->hash)\n#define SH_HASH_KEY(tb, key) hash_bytes( \\\n\t((const unsigned char *) &(key)->buftag), \\\n\tsizeof(BufferTag) \\\n)\n\n#define SH_EQUAL(tb, a, b)\t(BufferTagsEqual(&(a)->buftag, &(b)->buftag))\n#define SH_SCOPE\t\t\tstatic inline\n#define SH_DEFINE\n#define SH_DECLARE\n#include \"lib/simplehash.h\"\n\n/*\n * PrefetchState maintains the state of (prefetch) getPage@LSN requests.\n * It maintains a (ring) buffer of in-flight requests and responses.\n *\n * We maintain several indexes into the ring buffer:\n * ring_unused >= ring_flush >= ring_receive >= ring_last >= 0\n *\n * ring_unused points to the first unused slot of the buffer\n * ring_receive is the next request that is to be received\n * ring_last is the oldest received entry in the buffer\n *\n * Apart from being an entry in the ring buffer of prefetch requests, each\n * PrefetchRequest that is not UNUSED is indexed in prf_hash by buftag.\n */\ntypedef struct PrefetchState\n{\n\tMemoryContext bufctx;\t\t/* context for prf_buffer[].response\n\t\t\t\t\t\t\t\t * allocations */\n\tMemoryContext errctx;\t\t/* context for prf_buffer[].response\n\t\t\t\t\t\t\t\t * allocations */\n\tMemoryContext hashctx;\t\t/* context for prf_buffer */\n\n\t/* buffer indexes */\n\tuint64\t\tring_unused;\t/* first unused slot */\n\tuint64\t\tring_flush;\t\t/* next request to flush */\n\tuint64\t\tring_receive;\t/* next slot that is to receive a response */\n\tuint64\t\tring_last;\t\t/* min slot with a response value */\n\n\t/* metrics / statistics  */\n\tint\t\t\tn_responses_buffered;\t/* count of PS responses not yet in\n\t\t\t\t\t\t\t\t\t\t * buffers */\n\tint\t\t\tn_requests_inflight;\t/* count of PS requests considered in\n\t\t\t\t\t\t\t\t\t\t * flight */\n\tint\t\t\tn_unused;\t\t/* count of buffers < unused, > last, that are\n\t\t\t\t\t\t\t\t * also unused */\n\n\t/* the buffers */\n\tprfh_hash\t*prf_hash;\n\tint\t\t\tmax_shard_no;\n\t/* Mark shards involved in prefetch */\n\tuint8\t\tshard_bitmap[(MAX_SHARDS + 7)/8];\n\tPrefetchRequest prf_buffer[];\t/* prefetch buffers */\n} PrefetchState;\n\nstatic PrefetchState *MyPState;\n\n#define GetPrfSlotNoCheck(ring_index) ( \\\n\t&MyPState->prf_buffer[((ring_index) % readahead_buffer_size)] \\\n)\n\n#define GetPrfSlot(ring_index) ( \\\n\t( \\\n\t\tAssertMacro((ring_index) < MyPState->ring_unused && \\\n\t\t\t\t\t(ring_index) >= MyPState->ring_last), \\\n\t\tGetPrfSlotNoCheck(ring_index) \\\n\t) \\\n)\n\n#define ReceiveBufferNeedsCompaction() (\\\n\t(MyPState->n_responses_buffered / 8) < ( \\\n\t\tMyPState->ring_receive - \\\n\t\t\tMyPState->ring_last - \\\n\t\t\tMyPState->n_responses_buffered \\\n\t) \\\n)\n\nstatic process_interrupts_callback_t prev_interrupt_cb;\n\nstatic bool compact_prefetch_buffers(void);\nstatic void consume_prefetch_responses(void);\nstatic uint64 prefetch_register_bufferv(BufferTag tag, neon_request_lsns *frlsns,\n\t\t\t\t\t\t\t\t\t\tBlockNumber nblocks, const bits8 *mask,\n\t\t\t\t\t\t\t\t\t\tbool is_prefetch);\nstatic bool prefetch_read(PrefetchRequest *slot);\nstatic void prefetch_do_request(PrefetchRequest *slot, neon_request_lsns *force_request_lsns);\nstatic bool prefetch_wait_for(uint64 ring_index);\nstatic void prefetch_cleanup_trailing_unused(void);\nstatic inline void prefetch_set_unused(uint64 ring_index);\n\nstatic bool neon_prefetch_response_usable(neon_request_lsns *request_lsns,\n\t\t\t\t\t\t\t\t\t\t  PrefetchRequest *slot);\nstatic bool communicator_processinterrupts(void);\n\nvoid\npg_init_communicator(void)\n{\n\tprev_interrupt_cb = ProcessInterruptsCallback;\n\tProcessInterruptsCallback = communicator_processinterrupts;\n}\n\nstatic bool\ncompact_prefetch_buffers(void)\n{\n\tuint64\t\tempty_ring_index = MyPState->ring_last;\n\tuint64\t\tsearch_ring_index = MyPState->ring_receive;\n\tint\t\t\tn_moved = 0;\n\n\tif (MyPState->ring_receive == MyPState->ring_last)\n\t\treturn false;\n\n\twhile (search_ring_index > MyPState->ring_last)\n\t{\n\t\tsearch_ring_index--;\n\t\tif (GetPrfSlot(search_ring_index)->status == PRFS_UNUSED)\n\t\t{\n\t\t\tempty_ring_index = search_ring_index;\n\t\t\tbreak;\n\t\t}\n\t}\n\n\t/*\n\t * Here we have established: slots < search_ring_index have an unknown\n\t * state (not scanned) slots >= search_ring_index and <= empty_ring_index\n\t * are unused slots > empty_ring_index are in use, or outside our buffer's\n\t * range. ... unless search_ring_index <= ring_last\n\t *\n\t * Therefore, there is a gap of at least one unused items between\n\t * search_ring_index and empty_ring_index (both inclusive), which grows as\n\t * we hit more unused items while moving backwards through the array.\n\t */\n\n\twhile (search_ring_index > MyPState->ring_last)\n\t{\n\t\tPrefetchRequest *source_slot;\n\t\tPrefetchRequest *target_slot;\n\t\tbool\t\tfound;\n\n\t\t/* update search index to an unprocessed entry */\n\t\tsearch_ring_index--;\n\n\t\tsource_slot = GetPrfSlot(search_ring_index);\n\n\t\tif (source_slot->status == PRFS_UNUSED)\n\t\t\tcontinue;\n\n\t\t/* slot is used -- start moving slot */\n\t\ttarget_slot = GetPrfSlot(empty_ring_index);\n\n\t\tAssert(source_slot->status == PRFS_RECEIVED);\n\t\tAssert(target_slot->status == PRFS_UNUSED);\n\n\t\ttarget_slot->buftag = source_slot->buftag;\n\t\ttarget_slot->shard_no = source_slot->shard_no;\n\t\ttarget_slot->status = source_slot->status;\n\t\ttarget_slot->flags = source_slot->flags;\n\t\ttarget_slot->response = source_slot->response;\n\t\ttarget_slot->reqid = source_slot->reqid;\n\t\ttarget_slot->request_lsns = source_slot->request_lsns;\n\t\ttarget_slot->my_ring_index = empty_ring_index;\n\n\t\tprfh_delete(MyPState->prf_hash, source_slot);\n\t\tprfh_insert(MyPState->prf_hash, target_slot, &found);\n\n\t\tAssert(!found);\n\n\t\t/* Adjust the location of our known-empty slot */\n\t\tempty_ring_index--;\n\n\t\t/* empty the moved slot */\n\t\tsource_slot->status = PRFS_UNUSED;\n\t\tsource_slot->buftag = (BufferTag)\n\t\t{\n\t\t\t0\n\t\t};\n\t\tsource_slot->response = NULL;\n\t\tsource_slot->my_ring_index = 0;\n\t\tsource_slot->request_lsns = (neon_request_lsns) {\n\t\t\tInvalidXLogRecPtr, InvalidXLogRecPtr, InvalidXLogRecPtr\n\t\t};\n\n\t\t/* update bookkeeping */\n\t\tn_moved++;\n\t}\n\n\t/*\n\t * Only when we've moved slots we can expect trailing unused slots, so\n\t * only then we clean up trailing unused slots.\n\t */\n\tif (n_moved > 0)\n\t{\n\t\tprefetch_cleanup_trailing_unused();\n\t\treturn true;\n\t}\n\n\treturn false;\n}\n\n/*\n * Check that prefetch response matches the slot\n */\nstatic void\ncheck_getpage_response(PrefetchRequest* slot, NeonResponse* resp)\n{\n\tif (resp->tag != T_NeonGetPageResponse && resp->tag != T_NeonErrorResponse)\n\t{\n\t\tneon_shard_log(slot->shard_no, PANIC, \"Unexpected prefetch response %d, ring_receive=\" UINT64_FORMAT \", ring_flush=\" UINT64_FORMAT \", ring_unused=\" UINT64_FORMAT \"\",\n\t\t\t\t\t   resp->tag, MyPState->ring_receive, MyPState->ring_flush, MyPState->ring_unused);\n\t}\n\tif (neon_protocol_version >= 3)\n\t{\n\t\tNRelFileInfo rinfo = BufTagGetNRelFileInfo(slot->buftag);\n\t\tif (resp->tag == T_NeonGetPageResponse)\n\t\t{\n\t\t\tNeonGetPageResponse * getpage_resp = (NeonGetPageResponse *)resp;\n\t\t\tif (resp->reqid != slot->reqid ||\n\t\t\t\tresp->lsn != slot->request_lsns.request_lsn ||\n\t\t\t\tresp->not_modified_since != slot->request_lsns.not_modified_since ||\n\t\t\t\t!RelFileInfoEquals(getpage_resp->req.rinfo, rinfo) ||\n\t\t\t\tgetpage_resp->req.forknum != slot->buftag.forkNum ||\n\t\t\t\tgetpage_resp->req.blkno != slot->buftag.blockNum)\n\t\t\t{\n\t\t\t\tNEON_PANIC_CONNECTION_STATE(slot->shard_no, PANIC,\n\t\t\t\t\t\t\t\t\t\t\t\"Receive unexpected getpage response {reqid=\" UINT64_HEX_FORMAT \",lsn=%X/%08X, since=%X/%08X, rel=%u/%u/%u.%u, block=%u} to get page request {reqid=\" UINT64_HEX_FORMAT \",lsn=%X/%08X, since=%X/%08X, rel=%u/%u/%u.%u, block=%u}\",\n\t\t\t\t\t\t\t\t\t\t\tresp->reqid, LSN_FORMAT_ARGS(resp->lsn), LSN_FORMAT_ARGS(resp->not_modified_since), RelFileInfoFmt(getpage_resp->req.rinfo), getpage_resp->req.forknum, getpage_resp->req.blkno,\n\t\t\t\t\t\t\t\t\t\t\tslot->reqid, LSN_FORMAT_ARGS(slot->request_lsns.request_lsn), LSN_FORMAT_ARGS(slot->request_lsns.not_modified_since), RelFileInfoFmt(rinfo), slot->buftag.forkNum, slot->buftag.blockNum);\n\t\t\t}\n\t\t}\n\t\telse if (resp->reqid != slot->reqid ||\n\t\t\t\t resp->lsn != slot->request_lsns.request_lsn ||\n\t\t\t\t resp->not_modified_since != slot->request_lsns.not_modified_since)\n\t\t{\n\t\t\telog(WARNING, NEON_TAG \"Error message {reqid=\" UINT64_HEX_FORMAT \",lsn=%X/%08X, since=%X/%08X} doesn't match exists request {reqid=\" UINT64_HEX_FORMAT \",lsn=%X/%08X, since=%X/%08X}\",\n\t\t\t\t resp->reqid, LSN_FORMAT_ARGS(resp->lsn), LSN_FORMAT_ARGS(resp->not_modified_since),\n\t\t\t\t slot->reqid, LSN_FORMAT_ARGS(slot->request_lsns.request_lsn), LSN_FORMAT_ARGS(slot->request_lsns.not_modified_since));\n\t\t}\n\t}\n}\n\n/*\n * If there might be responses still in the TCP buffer, then we should try to\n * use those, to reduce any TCP backpressure on the OS/PS side.\n *\n * This procedure handles that.\n *\n * Note that this works because we don't pipeline non-getPage requests.\n *\n * NOTE: This procedure is not allowed to throw errors that should be handled\n * by SMGR-related code, as this can be called from every CHECK_FOR_INTERRUPTS\n * point inside and outside PostgreSQL.\n *\n * This still does throw errors when it receives malformed responses from PS.\n */\nvoid\ncommunicator_prefetch_pump_state(void)\n{\n\tSTART_PREFETCH_RECEIVE_WORK();\n\n\twhile (MyPState->ring_receive != MyPState->ring_flush)\n\t{\n\t\tNeonResponse   *response;\n\t\tPrefetchRequest *slot;\n\t\tMemoryContext\told;\n\n\t\tslot = GetPrfSlot(MyPState->ring_receive);\n\n\t\told = MemoryContextSwitchTo(MyPState->errctx);\n\t\tresponse = page_server->try_receive(slot->shard_no);\n\t\tMemoryContextSwitchTo(old);\n\n\t\tif (response == NULL)\n\t\t\tbreak;\n\n\t\tcheck_getpage_response(slot, response);\n\n\t\t/* The slot should still be valid */\n\t\tif (slot->status != PRFS_REQUESTED ||\n\t\t\tslot->response != NULL ||\n\t\t\tslot->my_ring_index != MyPState->ring_receive)\n\t\t{\n\t\t\tneon_shard_log(slot->shard_no, PANIC,\n\t\t\t\t\t\t   \"Incorrect prefetch slot state after receive: status=%d response=%p my=\" UINT64_FORMAT \" receive=\" UINT64_FORMAT \"\",\n\t\t\t\t\t\t   slot->status, slot->response,\n\t\t\t\t\t\t   slot->my_ring_index, MyPState->ring_receive);\n\t\t}\n\t\t/* update prefetch state */\n\t\tMyPState->n_responses_buffered += 1;\n\t\tMyPState->n_requests_inflight -= 1;\n\t\tMyPState->ring_receive += 1;\n\t\tMyNeonCounters->getpage_prefetches_buffered =\n\t\t\tMyPState->n_responses_buffered;\n\n\t\t/* update slot state */\n\t\tslot->status = PRFS_RECEIVED;\n\t\tslot->response = response;\n\n\t\tif (response->tag == T_NeonGetPageResponse && !(slot->flags & PRFSF_LFC) && lfc_store_prefetch_result)\n\t\t{\n\t\t\t/*\n\t\t\t * Store prefetched result in LFC (please read comments to lfc_prefetch\n\t\t\t * explaining why it can be done without holding shared buffer lock\n\t\t\t */\n\t\t\tif (lfc_prefetch(BufTagGetNRelFileInfo(slot->buftag), slot->buftag.forkNum, slot->buftag.blockNum, ((NeonGetPageResponse*)response)->page, slot->request_lsns.not_modified_since))\n\t\t\t{\n\t\t\t\tslot->flags |= PRFSF_LFC;\n\t\t\t}\n\t\t}\n\t}\n\n\tEND_PREFETCH_RECEIVE_WORK();\n\n\tcommunicator_reconfigure_timeout_if_needed();\n}\n\nvoid\nreadahead_buffer_resize(int newsize, void *extra)\n{\n\tuint64\t\tend,\n\t\t\t\tnfree = newsize;\n\tPrefetchState *newPState;\n\tSize\t\tnewprfs_size = offsetof(PrefetchState, prf_buffer) +\n\t\t(sizeof(PrefetchRequest) * newsize);\n\n\t/* don't try to re-initialize if we haven't initialized yet */\n\tif (MyPState == NULL)\n\t\treturn;\n\n\t/*\n\t * Make sure that we don't lose track of active prefetch requests by\n\t * ensuring we have received all but the last n requests (n = newsize).\n\t */\n\tif (MyPState->n_requests_inflight > newsize)\n\t{\n\t\tprefetch_wait_for(MyPState->ring_unused - newsize - 1);\n\t\tAssert(MyPState->n_requests_inflight <= newsize);\n\t}\n\n\t/* construct the new PrefetchState, and copy over the memory contexts */\n\tnewPState = MemoryContextAllocZero(TopMemoryContext, newprfs_size);\n\n\tnewPState->bufctx = MyPState->bufctx;\n\tnewPState->errctx = MyPState->errctx;\n\tnewPState->hashctx = MyPState->hashctx;\n\tnewPState->prf_hash = prfh_create(MyPState->hashctx, newsize, NULL);\n\tnewPState->n_unused = newsize;\n\tnewPState->n_requests_inflight = 0;\n\tnewPState->n_responses_buffered = 0;\n\tnewPState->ring_last = newsize;\n\tnewPState->ring_unused = newsize;\n\tnewPState->ring_receive = newsize;\n\tnewPState->max_shard_no = MyPState->max_shard_no;\n\tmemcpy(newPState->shard_bitmap, MyPState->shard_bitmap, sizeof(MyPState->shard_bitmap));\n\n\t/*\n\t * Copy over the prefetches.\n\t *\n\t * We populate the prefetch array from the end; to retain the most recent\n\t * prefetches, but this has the benefit of only needing to do one\n\t * iteration on the dataset, and trivial compaction.\n\t */\n\tfor (end = MyPState->ring_unused - 1;\n\t\t end >= MyPState->ring_last && end != UINT64_MAX && nfree != 0;\n\t\t end -= 1)\n\t{\n\t\tPrefetchRequest *slot = GetPrfSlot(end);\n\t\tPrefetchRequest *newslot;\n\t\tbool\t\tfound;\n\n\t\tif (slot->status == PRFS_UNUSED)\n\t\t\tcontinue;\n\n\t\tnfree -= 1;\n\n\t\tnewslot = &newPState->prf_buffer[nfree];\n\t\t*newslot = *slot;\n\t\tnewslot->my_ring_index = nfree;\n\n\t\tprfh_insert(newPState->prf_hash, newslot, &found);\n\n\t\tAssert(!found);\n\n\t\tswitch (newslot->status)\n\t\t{\n\t\t\tcase PRFS_UNUSED:\n\t\t\t\tpg_unreachable();\n\t\t\tcase PRFS_REQUESTED:\n\t\t\t\tnewPState->n_requests_inflight += 1;\n\t\t\t\tnewPState->ring_receive -= 1;\n\t\t\t\tnewPState->ring_last -= 1;\n\t\t\t\tbreak;\n\t\t\tcase PRFS_RECEIVED:\n\t\t\t\tnewPState->n_responses_buffered += 1;\n\t\t\t\tnewPState->ring_last -= 1;\n\t\t\t\tbreak;\n\t\t\tcase PRFS_TAG_REMAINS:\n\t\t\t\tnewPState->ring_last -= 1;\n\t\t\t\tbreak;\n\t\t}\n\t\tnewPState->n_unused -= 1;\n\t}\n\tnewPState->ring_flush = newPState->ring_receive;\n\n\tMyNeonCounters->getpage_prefetches_buffered =\n\t\tMyPState->n_responses_buffered;\n\tMyNeonCounters->pageserver_open_requests =\n\t\tMyPState->n_requests_inflight;\n\n\tfor (; end >= MyPState->ring_last && end != UINT64_MAX; end -= 1)\n\t{\n\t\tPrefetchRequest *slot = GetPrfSlot(end);\n\t\tAssert(slot->status != PRFS_REQUESTED);\n\t\tif (slot->status == PRFS_RECEIVED)\n\t\t{\n\t\t\tpfree(slot->response);\n\t\t}\n\t}\n\n\tprfh_destroy(MyPState->prf_hash);\n\tpfree(MyPState);\n\tMyPState = newPState;\n}\n\n\n/*\n * Callback to be called on backend exit to ensure correct state of compute-PS communication\n * in case of backend cancel\n */\nstatic void\nprefetch_on_exit(int code, Datum arg)\n{\n\tif (code != 0) /* do disconnect only on abnormal backend termination */\n\t{\n\t\tshardno_t shard_no = DatumGetInt32(arg);\n\t\tprefetch_on_ps_disconnect();\n\t\tpage_server->disconnect(shard_no);\n\t}\n}\n\n\n/*\n * Make sure that there are no responses still in the buffer.\n *\n * This function may indirectly update MyPState->pfs_hash; which invalidates\n * any active pointers into the hash table.\n */\nstatic void\nconsume_prefetch_responses(void)\n{\n\tif (MyPState->ring_receive < MyPState->ring_unused)\n\t\tprefetch_wait_for(MyPState->ring_unused - 1);\n\t/*\n\t * We know for sure we're not working on any prefetch pages after\n\t * this.\n\t */\n\tEND_PREFETCH_RECEIVE_WORK();\n}\n\nstatic void\nprefetch_cleanup_trailing_unused(void)\n{\n\tuint64\t\tring_index;\n\tPrefetchRequest *slot;\n\n\twhile (MyPState->ring_last < MyPState->ring_receive)\n\t{\n\t\tring_index = MyPState->ring_last;\n\t\tslot = GetPrfSlot(ring_index);\n\n\t\tif (slot->status == PRFS_UNUSED)\n\t\t\tMyPState->ring_last += 1;\n\t\telse\n\t\t\tbreak;\n\t}\n}\n\n\nstatic bool\nprefetch_flush_requests(void)\n{\n\tfor (shardno_t shard_no = 0; shard_no < MyPState->max_shard_no; shard_no++)\n\t{\n\t\tif (BITMAP_ISSET(MyPState->shard_bitmap, shard_no))\n\t\t{\n\t\t\tif (!page_server->flush(shard_no))\n\t\t\t\treturn false;\n\t\t\tBITMAP_CLR(MyPState->shard_bitmap, shard_no);\n\t\t}\n\t}\n\tMyPState->max_shard_no = 0;\n\treturn true;\n}\n\n/*\n * Wait for slot of ring_index to have received its response.\n * The caller is responsible for making sure the request buffer is flushed.\n *\n * NOTE: this function may indirectly update MyPState->pfs_hash; which\n * invalidates any active pointers into the hash table.\n * NOTE: callers should make sure they can handle query cancellations in this\n * function's call path.\n */\nstatic bool\nprefetch_wait_for(uint64 ring_index)\n{\n\tPrefetchRequest *entry;\n\tbool\t\tresult = true;\n\n\tif (MyPState->ring_flush <= ring_index &&\n\t\tMyPState->ring_unused > MyPState->ring_flush)\n\t{\n\t\tif (!prefetch_flush_requests())\n\t\t\treturn false;\n\t\tMyPState->ring_flush = MyPState->ring_unused;\n\t}\n\n\tAssert(MyPState->ring_unused > ring_index);\n\n\tSTART_PREFETCH_RECEIVE_WORK();\n\n\twhile (MyPState->ring_receive <= ring_index)\n\t{\n\t\tentry = GetPrfSlot(MyPState->ring_receive);\n\n\t\tAssert(entry->status == PRFS_REQUESTED);\n\t\tif (!prefetch_read(entry))\n\t\t{\n\t\t\tresult = false;\n\t\t\tbreak;\n\t\t}\n\t\tCHECK_FOR_INTERRUPTS();\n\t}\n\n\tif (result)\n\t{\n\t\t/* Check that slot is actually received (srver can be disconnected in prefetch_pump_state called from CHECK_FOR_INTERRUPTS */\n\t\tPrefetchRequest *slot = GetPrfSlot(ring_index);\n\t\tresult = slot->status == PRFS_RECEIVED;\n\t}\n\tEND_PREFETCH_RECEIVE_WORK();\n\n\treturn result;\n;\n}\n\n/*\n * Read the response of a prefetch request into its slot.\n *\n * The caller is responsible for making sure that the request for this buffer\n * was flushed to the PageServer.\n *\n * NOTE: this function may indirectly update MyPState->pfs_hash; which\n * invalidates any active pointers into the hash table.\n *\n * NOTE: this does IO, and can get canceled out-of-line.\n */\nstatic bool\nprefetch_read(PrefetchRequest *slot)\n{\n\tNeonResponse *response;\n\tMemoryContext old;\n\tBufferTag\tbuftag;\n\tshardno_t\tshard_no;\n\tuint64\t\tmy_ring_index;\n\n\tAssert(slot->status == PRFS_REQUESTED);\n\tAssert(slot->response == NULL);\n\tAssert(slot->my_ring_index == MyPState->ring_receive);\n\tAssert(readpage_reentrant_guard || AmPrewarmWorker);\n\n\tif (slot->status != PRFS_REQUESTED ||\n\t\tslot->response != NULL ||\n\t\tslot->my_ring_index != MyPState->ring_receive)\n\t{\n\t\tneon_shard_log(slot->shard_no, PANIC,\n\t\t\t\t\t   \"Incorrect prefetch read: status=%d response=%p my=\" UINT64_FORMAT \" receive=\" UINT64_FORMAT \"\",\n\t\t\t\t\t   slot->status, slot->response,\n\t\t\t\t\t   slot->my_ring_index, MyPState->ring_receive);\n\t}\n\n\t/*\n\t * Copy the request info so that if an error happens and the prefetch\n\t * queue is flushed during the receive call, we can print the original\n\t * values in the error message\n\t */\n\tbuftag = slot->buftag;\n\tshard_no = slot->shard_no;\n\tmy_ring_index = slot->my_ring_index;\n\n\told = MemoryContextSwitchTo(MyPState->errctx);\n\tresponse = (NeonResponse *) page_server->receive(shard_no);\n\tMemoryContextSwitchTo(old);\n\tif (response)\n\t{\n\t\tcheck_getpage_response(slot, response);\n\n\t\t/* The slot should still be valid */\n\t\tif (slot->status != PRFS_REQUESTED ||\n\t\t\tslot->response != NULL ||\n\t\t\tslot->my_ring_index != MyPState->ring_receive)\n\t\t{\n\t\t\tneon_shard_log(shard_no, PANIC,\n\t\t\t\t\t\t   \"Incorrect prefetch slot state after receive: status=%d response=%p my=\" UINT64_FORMAT \" receive=\" UINT64_FORMAT \"\",\n\t\t\t\t\t\t   slot->status, slot->response,\n\t\t\t\t\t\t   slot->my_ring_index, MyPState->ring_receive);\n\t\t}\n\n\t\t/* update prefetch state */\n\t\tMyPState->n_responses_buffered += 1;\n\t\tMyPState->n_requests_inflight -= 1;\n\t\tMyPState->ring_receive += 1;\n\t\tMyNeonCounters->getpage_prefetches_buffered =\n\t\t\tMyPState->n_responses_buffered;\n\n\t\t/* update slot state */\n\t\tslot->status = PRFS_RECEIVED;\n\t\tslot->response = response;\n\n\t\tif (response->tag == T_NeonGetPageResponse && !(slot->flags & PRFSF_LFC) && lfc_store_prefetch_result)\n\t\t{\n\t\t\t/*\n\t\t\t * Store prefetched result in LFC (please read comments to lfc_prefetch\n\t\t\t * explaining why it can be done without holding shared buffer lock\n\t\t\t */\n\t\t\tif (lfc_prefetch(BufTagGetNRelFileInfo(buftag), buftag.forkNum, buftag.blockNum, ((NeonGetPageResponse*)response)->page, slot->request_lsns.not_modified_since))\n\t\t\t{\n\t\t\t\tslot->flags |= PRFSF_LFC;\n\t\t\t}\n\t\t}\n\t\treturn true;\n\t}\n\telse\n\t{\n\t\t/*\n\t\t * Note: The slot might no longer be valid, if the connection was lost\n\t\t * and the prefetch queue was flushed during the receive call\n\t\t */\n\t\tneon_shard_log(shard_no, LOG,\n\t\t\t\t\t   \"No response from reading prefetch entry \" UINT64_FORMAT \": %u/%u/%u.%u block %u. This can be caused by a concurrent disconnect\",\n\t\t\t\t\t   my_ring_index,\n\t\t\t\t\t   RelFileInfoFmt(BufTagGetNRelFileInfo(buftag)),\n\t\t\t\t\t   buftag.forkNum, buftag.blockNum);\n\t\treturn false;\n\t}\n}\n\n\n/*\n * Wait completion of previosly registered prefetch request.\n * Prefetch result should be placed in LFC by prefetch_wait_for.\n */\nbool\ncommunicator_prefetch_receive(BufferTag tag)\n{\n\tPrfHashEntry *entry;\n\tPrefetchRequest hashkey;\n\n\tAssert(readpage_reentrant_guard || AmPrewarmWorker); /* do not pump prefetch state in prewarm worker */\n\thashkey.buftag = tag;\n\tentry = prfh_lookup(MyPState->prf_hash, &hashkey);\n\tif (entry != NULL && prefetch_wait_for(entry->slot->my_ring_index))\n\t{\n\t\tprefetch_set_unused(entry->slot->my_ring_index);\n\t\treturn true;\n\t}\n\treturn false;\n}\n\n/*\n * Disconnect hook - drop prefetches when the connection drops\n *\n * If we don't remove the failed prefetches, we'd be serving incorrect\n * data to the smgr.\n */\nvoid\nprefetch_on_ps_disconnect(void)\n{\n\tMyPState->ring_flush = MyPState->ring_unused;\n\n\t/* Nothing should cancel disconnect: we should not leave connection in opaque state */\n\tHOLD_INTERRUPTS();\n\n\twhile (MyPState->ring_receive < MyPState->ring_unused)\n\t{\n\t\tPrefetchRequest *slot;\n\t\tuint64\t\tring_index = MyPState->ring_receive;\n\n\t\tslot = GetPrfSlot(ring_index);\n\n\t\tAssert(slot->status == PRFS_REQUESTED);\n\t\tAssert(slot->my_ring_index == ring_index);\n\n\t\t/*\n\t\t * Drop connection to all shards which have prefetch requests.\n\t\t * It is not a problem to call disconnect multiple times on the same connection\n\t\t * because disconnect implementation in libpagestore.c will check if connection\n\t\t * is alive and do nothing of connection was already dropped.\n\t\t */\n\t\tpage_server->disconnect(slot->shard_no);\n\n\t\t/* clean up the request */\n\t\tslot->status = PRFS_TAG_REMAINS;\n\t\tMyPState->n_requests_inflight -= 1;\n\t\tMyPState->ring_receive += 1;\n\n\t\tprefetch_set_unused(ring_index);\n\t\tpgBufferUsage.prefetch.expired += 1;\n\t\tMyNeonCounters->getpage_prefetch_discards_total += 1;\n\t}\n\n\t/*\n\t * We can have gone into retry due to network error, so update stats with\n\t * the latest available\n\t */\n\tMyNeonCounters->pageserver_open_requests =\n\t\tMyPState->n_requests_inflight;\n\tMyNeonCounters->getpage_prefetches_buffered =\n\t\tMyPState->n_responses_buffered;\n\n\tRESUME_INTERRUPTS();\n}\n\n/*\n * prefetch_set_unused() - clear a received prefetch slot\n *\n * The slot at ring_index must be a current member of the ring buffer,\n * and may not be in the PRFS_REQUESTED state.\n *\n * NOTE: this function will update MyPState->pfs_hash; which invalidates any\n * active pointers into the hash table.\n */\nstatic inline void\nprefetch_set_unused(uint64 ring_index)\n{\n\tPrefetchRequest *slot;\n\n\tif (ring_index < MyPState->ring_last)\n\t\treturn;\t\t\t\t\t/* Should already be unused */\n\n\tslot = GetPrfSlot(ring_index);\n\tif (slot->status == PRFS_UNUSED)\n\t\treturn;\n\n\tAssert(slot->status == PRFS_RECEIVED || slot->status == PRFS_TAG_REMAINS);\n\n\tif (slot->status == PRFS_RECEIVED)\n\t{\n\t\tpfree(slot->response);\n\t\tslot->response = NULL;\n\n\t\tMyPState->n_responses_buffered -= 1;\n\t\tMyPState->n_unused += 1;\n\n\t\tMyNeonCounters->getpage_prefetches_buffered =\n\t\t\tMyPState->n_responses_buffered;\n\t}\n\telse\n\t{\n\t\tAssert(slot->response == NULL);\n\t}\n\n\tprfh_delete(MyPState->prf_hash, slot);\n\n\t/* clear all fields */\n\tMemSet(slot, 0, sizeof(PrefetchRequest));\n\tslot->status = PRFS_UNUSED;\n\n\t/* run cleanup if we're holding back ring_last */\n\tif (MyPState->ring_last == ring_index)\n\t\tprefetch_cleanup_trailing_unused();\n\n\t/*\n\t * ... and try to store the buffered responses more compactly if > 12.5%\n\t * of the buffer is gaps\n\t */\n\telse if (ReceiveBufferNeedsCompaction())\n\t\tcompact_prefetch_buffers();\n}\n\n/*\n * Send one prefetch request to the pageserver. To wait for the response, call\n * prefetch_wait_for().\n */\nstatic void\nprefetch_do_request(PrefetchRequest *slot, neon_request_lsns *force_request_lsns)\n{\n\tbool\t\tfound;\n\tuint64\t\tmySlotNo PG_USED_FOR_ASSERTS_ONLY = slot->my_ring_index;\n\n\tNeonGetPageRequest request = {\n\t\t.hdr.tag = T_NeonGetPageRequest,\n\t\t/* lsn and not_modified_since are filled in below */\n\t\t.rinfo = BufTagGetNRelFileInfo(slot->buftag),\n\t\t.forknum = slot->buftag.forkNum,\n\t\t.blkno = slot->buftag.blockNum,\n\t};\n\n\tAssert(mySlotNo == MyPState->ring_unused);\n\n\tif (force_request_lsns)\n\t\tslot->request_lsns = *force_request_lsns;\n\telse\n\t\tneon_get_request_lsns(BufTagGetNRelFileInfo(slot->buftag),\n\t\t\t\t\t\t\t  slot->buftag.forkNum, slot->buftag.blockNum,\n\t\t\t\t\t\t\t  &slot->request_lsns, 1);\n\trequest.hdr.lsn = slot->request_lsns.request_lsn;\n\trequest.hdr.not_modified_since = slot->request_lsns.not_modified_since;\n\n\tAssert(slot->response == NULL);\n\tAssert(slot->my_ring_index == MyPState->ring_unused);\n\n\twhile (!page_server->send(slot->shard_no, (NeonRequest *) &request))\n\t{\n\t\tAssert(mySlotNo == MyPState->ring_unused);\n\t\t/* loop */\n\t}\n\tslot->reqid = request.hdr.reqid;\n\n\t/* update prefetch state */\n\tMyPState->n_requests_inflight += 1;\n\tMyPState->n_unused -= 1;\n\tMyPState->ring_unused += 1;\n\tBITMAP_SET(MyPState->shard_bitmap, slot->shard_no);\n\tMyPState->max_shard_no = Max(slot->shard_no+1, MyPState->max_shard_no);\n\n\t/* update slot state */\n\tslot->status = PRFS_REQUESTED;\n\tprfh_insert(MyPState->prf_hash, slot, &found);\n\tAssert(!found);\n}\n\n/*\n * Lookup of already received prefetch requests. Only already received responses matching required LSNs are accepted.\n * Present pages are marked in \"mask\" bitmap and total number of such pages is returned.\n */\nint\ncommunicator_prefetch_lookupv(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blocknum,\n\t\t\t\t\t\t\t  neon_request_lsns *lsns, BlockNumber nblocks,\n\t\t\t\t\t\t\t  void **buffers, bits8 *mask)\n{\n\tint hits = 0;\n\tPrefetchRequest hashkey;\n\n\t/*\n\t * Use an intermediate PrefetchRequest struct as the hash key to ensure\n\t * correct alignment and that the padding bytes are cleared.\n\t */\n\tmemset(&hashkey.buftag, 0, sizeof(BufferTag));\n\tCopyNRelFileInfoToBufTag(hashkey.buftag, rinfo);\n\thashkey.buftag.forkNum = forknum;\n\n\tfor (int i = 0; i < nblocks; i++)\n\t{\n\t\tPrfHashEntry *entry;\n\n\t\thashkey.buftag.blockNum = blocknum + i;\n\t\tentry = prfh_lookup(MyPState->prf_hash, &hashkey);\n\n\t\tif (entry != NULL)\n\t\t{\n\t\t\tPrefetchRequest *slot = entry->slot;\n\t\t\tuint64 ring_index = slot->my_ring_index;\n\t\t\tAssert(slot == GetPrfSlot(ring_index));\n\n\t\t\tAssert(slot->status != PRFS_UNUSED);\n\t\t\tAssert(MyPState->ring_last <= ring_index &&\n\t\t\t\t   ring_index < MyPState->ring_unused);\n\t\t\tAssert(BufferTagsEqual(&slot->buftag, &hashkey.buftag));\n\n\t\t\tif (slot->status != PRFS_RECEIVED)\n\t\t\t\tcontinue;\n\n\t\t\t/*\n\t\t\t * If the caller specified a request LSN to use, only accept\n\t\t\t * prefetch responses that satisfy that request.\n\t\t\t */\n\t\t\tif (!neon_prefetch_response_usable(&lsns[i], slot))\n\t\t\t\tcontinue;\n\n\t\t\t/*\n\t\t\t * Ignore errors\n\t\t\t */\n\t\t\tif (slot->response->tag == T_NeonErrorResponse)\n\t\t\t{\n\t\t\t\tcontinue;\n\t\t\t}\n\t\t\tAssert(slot->response->tag == T_NeonGetPageResponse); /* checked by check_getpage_response when response was assigned to the slot */\n\t\t\tmemcpy(buffers[i], ((NeonGetPageResponse*)slot->response)->page, BLCKSZ);\n\n\n\t\t\t/*\n\t\t\t * With lfc_store_prefetch_result=true prefetch result is stored in LFC in prefetch_pump_state when response is received\n\t\t\t * from page server. But if lfc_store_prefetch_result=false then it is not yet stored in LFC and we have to do it here\n\t\t\t * under buffer lock.\n\t\t\t */\n\t\t\tif (!lfc_store_prefetch_result)\n\t\t\t\tlfc_write(rinfo, forknum, blocknum + i, buffers[i]);\n\n\t\t\tprefetch_set_unused(ring_index);\n\t\t\tBITMAP_SET(mask, i);\n\n\t\t\thits += 1;\n\t\t\tinc_getpage_wait(0);\n\t\t}\n\t}\n\tpgBufferUsage.prefetch.hits += hits;\n\treturn hits;\n}\n\n/*\n * prefetch_register_bufferv() - register and prefetch buffers\n *\n * Register that we may want the contents of BufferTag in the near future.\n * This is used when issuing a speculative prefetch request, but also when\n * performing a synchronous request and need the buffer right now.\n *\n * If force_request_lsns is not NULL, those values are sent to the\n * pageserver. If NULL, we utilize the lastWrittenLsn -infrastructure\n * to calculate the LSNs to send.\n *\n * Bits set in *mask (if present) indicate pages already read; i.e. pages we\n * can skip in this process.\n *\n * When performing a prefetch rather than a synchronous request,\n * is_prefetch==true. Currently, it only affects how the request is accounted\n * in the perf counters.\n *\n * NOTE: this function may indirectly update MyPState->pfs_hash; which\n * invalidates any active pointers into the hash table.\n */\nvoid\ncommunicator_prefetch_register_bufferv(BufferTag tag, neon_request_lsns *frlsns,\n\t\t\t\t\t\t\t\t\t   BlockNumber nblocks, const bits8 *mask)\n{\n\tuint64\t\tring_index PG_USED_FOR_ASSERTS_ONLY;\n\n\tring_index = prefetch_register_bufferv(tag, frlsns, nblocks, mask, true);\n\n\tAssert(ring_index < MyPState->ring_unused &&\n\t\t   MyPState->ring_last <= ring_index);\n}\n\n/* Internal version. Returns the ring index of the last block (result of this function is used only\n*  when nblocks==1)\n*/\nstatic uint64\nprefetch_register_bufferv(BufferTag tag, neon_request_lsns *frlsns,\n\t\t\t\t\t\t  BlockNumber nblocks, const bits8 *mask,\n\t\t\t\t\t\t  bool is_prefetch)\n{\n\tuint64\t\tlast_ring_index;\n\tPrefetchRequest hashkey;\n#ifdef USE_ASSERT_CHECKING\n\tbool\t\tany_hits = false;\n#endif\n\t/* We will never read further ahead than our buffer can store. */\n\tnblocks = Max(1, Min(nblocks, readahead_buffer_size));\n\n\t/*\n\t * Use an intermediate PrefetchRequest struct as the hash key to ensure\n\t * correct alignment and that the padding bytes are cleared.\n\t */\n\tmemset(&hashkey.buftag, 0, sizeof(BufferTag));\n\thashkey.buftag = tag;\n\nRetry:\n\t/*\n\t * We can have gone into retry due to network error, so update stats with\n\t * the latest available\n\t */\n\tMyNeonCounters->pageserver_open_requests =\n\t\tMyPState->ring_unused - MyPState->ring_receive;\n\tMyNeonCounters->getpage_prefetches_buffered =\n\t\tMyPState->n_responses_buffered;\n\tlast_ring_index = UINT64_MAX;\n\n\tfor (int i = 0; i < nblocks; i++)\n\t{\n\t\tPrefetchRequest *slot = NULL;\n\t\tPrfHashEntry *entry = NULL;\n\t\tneon_request_lsns *lsns;\n\n\t\tif (PointerIsValid(mask) && BITMAP_ISSET(mask, i))\n\t\t\tcontinue;\n\n\t\tif (frlsns)\n\t\t\tlsns = &frlsns[i];\n\t\telse\n\t\t\tlsns = NULL;\n\n#ifdef USE_ASSERT_CHECKING\n\t\tany_hits = true;\n#endif\n\n\t\tslot = NULL;\n\t\tentry = NULL;\n\n\t\thashkey.buftag.blockNum = tag.blockNum + i;\n\t\tentry = prfh_lookup(MyPState->prf_hash, &hashkey);\n\n\t\tif (entry != NULL)\n\t\t{\n\t\t\tslot = entry->slot;\n\t\t\tlast_ring_index = slot->my_ring_index;\n\t\t\tAssert(slot == GetPrfSlot(last_ring_index));\n\n\t\t\tAssert(slot->status != PRFS_UNUSED);\n\t\t\tAssert(MyPState->ring_last <= last_ring_index &&\n\t\t\t\t   last_ring_index < MyPState->ring_unused);\n\t\t\tAssert(BufferTagsEqual(&slot->buftag, &hashkey.buftag));\n\n\t\t\t/*\n\t\t\t * If the caller specified a request LSN to use, only accept\n\t\t\t * prefetch responses that satisfy that request.\n\t\t\t */\n\t\t\tif (!is_prefetch)\n\t\t\t{\n\t\t\t\tif (!neon_prefetch_response_usable(lsns, slot))\n\t\t\t\t{\n\t\t\t\t\t/* Wait for the old request to finish and discard it */\n\t\t\t\t\tif (!prefetch_wait_for(last_ring_index))\n\t\t\t\t\t\tgoto Retry;\n\t\t\t\t\tprefetch_set_unused(last_ring_index);\n\t\t\t\t\tentry = NULL;\n\t\t\t\t\tslot = NULL;\n\t\t\t\t\tpgBufferUsage.prefetch.expired += 1;\n\t\t\t\t\tMyNeonCounters->getpage_prefetch_discards_total += 1;\n\t\t\t\t}\n\t\t\t}\n\n\t\t\tif (entry != NULL)\n\t\t\t{\n\t\t\t\t/*\n\t\t\t\t * We received a prefetch for a page that was recently read\n\t\t\t\t * and removed from the buffers. Remove that request from the\n\t\t\t\t * buffers.\n\t\t\t\t */\n\t\t\t\tif (slot->status == PRFS_TAG_REMAINS)\n\t\t\t\t{\n\t\t\t\t\tprefetch_set_unused(last_ring_index);\n\t\t\t\t\tentry = NULL;\n\t\t\t\t\tslot = NULL;\n\t\t\t\t}\n\t\t\t\telse\n\t\t\t\t{\n\t\t\t\t\t/* The buffered request is good enough, return that index */\n\t\t\t\t\tif (is_prefetch)\n\t\t\t\t\t\tpgBufferUsage.prefetch.duplicates++;\n\t\t\t\t\tcontinue;\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\t\telse if (!is_prefetch)\n\t\t{\n\t\t\tpgBufferUsage.prefetch.misses += 1;\n\t\t\tMyNeonCounters->getpage_prefetch_misses_total++;\n\t\t}\n\t\t/*\n\t\t * We can only leave the block above by finding that there's\n\t\t * no entry that can satisfy this request, either because there\n\t\t * was no entry, or because the entry was invalid or didn't satisfy\n\t\t * the LSNs provided.\n\t\t *\n\t\t * The code should've made sure to clear up the data.\n\t\t */\n\t\tAssert(entry == NULL);\n\t\tAssert(slot == NULL);\n\n\t\t/* There should be no buffer overflow */\n\t\tAssert(MyPState->ring_last + readahead_buffer_size >= MyPState->ring_unused);\n\n\t\t/*\n\t\t * If the prefetch queue is full, we need to make room by clearing the\n\t\t * oldest slot. If the oldest slot holds a buffer that was already\n\t\t * received, we can just throw it away; we fetched the page\n\t\t * unnecessarily in that case. If the oldest slot holds a request that\n\t\t * we haven't received a response for yet, we have to wait for the\n\t\t * response to that before we can continue. We might not have even\n\t\t * flushed the request to the pageserver yet, it might be just sitting\n\t\t * in the output buffer. In that case, we flush it and wait for the\n\t\t * response. (We could decide not to send it, but it's hard to abort\n\t\t * when the request is already in the output buffer, and 'not sending'\n\t\t * a prefetch request kind of goes against the principles of\n\t\t * prefetching)\n\t\t */\n\t\tif (MyPState->ring_last + readahead_buffer_size == MyPState->ring_unused)\n\t\t{\n\t\t\tuint64\t\tcleanup_index = MyPState->ring_last;\n\n\t\t\tslot = GetPrfSlot(cleanup_index);\n\n\t\t\tAssert(slot->status != PRFS_UNUSED);\n\n\t\t\t/*\n\t\t\t * If there is good reason to run compaction on the prefetch buffers,\n\t\t\t * try to do that.\n\t\t\t */\n\t\t\tif (ReceiveBufferNeedsCompaction() && compact_prefetch_buffers())\n\t\t\t{\n\t\t\t\tAssert(slot->status == PRFS_UNUSED);\n\t\t\t}\n\t\t\telse\n\t\t\t{\n\t\t\t\t/*\n\t\t\t\t * We have the slot for ring_last, so that must still be in\n\t\t\t\t * progress\n\t\t\t\t */\n\t\t\t\tswitch (slot->status)\n\t\t\t\t{\n\t\t\t\t\tcase PRFS_REQUESTED:\n\t\t\t\t\t\tAssert(MyPState->ring_receive == cleanup_index);\n\t\t\t\t\t\tif (!prefetch_wait_for(cleanup_index))\n\t\t\t\t\t\t\tgoto Retry;\n\t\t\t\t\t\tprefetch_set_unused(cleanup_index);\n\t\t\t\t\t\tpgBufferUsage.prefetch.expired += 1;\n\t\t\t\t\t\tMyNeonCounters->getpage_prefetch_discards_total += 1;\n\t\t\t\t\t\tbreak;\n\t\t\t\t\tcase PRFS_RECEIVED:\n\t\t\t\t\tcase PRFS_TAG_REMAINS:\n\t\t\t\t\t\tprefetch_set_unused(cleanup_index);\n\t\t\t\t\t\tpgBufferUsage.prefetch.expired += 1;\n\t\t\t\t\t\tMyNeonCounters->getpage_prefetch_discards_total += 1;\n\t\t\t\t\t\tbreak;\n\t\t\t\t\tdefault:\n\t\t\t\t\t\tpg_unreachable();\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\n\t\t/*\n\t\t * The next buffer pointed to by `ring_unused` is now definitely empty, so\n\t\t * we can insert the new request to it.\n\t\t */\n\t\tlast_ring_index = MyPState->ring_unused;\n\n\t\tAssert(MyPState->ring_last <= last_ring_index &&\n\t\t\t   last_ring_index <= MyPState->ring_unused);\n\n\t\tslot = GetPrfSlotNoCheck(last_ring_index);\n\n\t\tAssert(slot->status == PRFS_UNUSED);\n\n\t\t/*\n\t\t * We must update the slot data before insertion, because the hash\n\t\t * function reads the buffer tag from the slot.\n\t\t */\n\t\tslot->buftag = hashkey.buftag;\n\t\tslot->shard_no = get_shard_number(&tag);\n\t\tslot->my_ring_index = last_ring_index;\n\t\tslot->flags = 0;\n\n\t\tif (is_prefetch)\n\t\t\tMyNeonCounters->getpage_prefetch_requests_total++;\n\t\telse\n\t\t\tMyNeonCounters->getpage_sync_requests_total++;\n\n\t\tprefetch_do_request(slot, lsns);\n\t}\n\n\tMyNeonCounters->pageserver_open_requests =\n\t\tMyPState->ring_unused - MyPState->ring_receive;\n\n\tAssert(any_hits);\n\tAssert(last_ring_index != UINT64_MAX);\n\n\tAssert(GetPrfSlot(last_ring_index)->status == PRFS_REQUESTED ||\n\t\t   GetPrfSlot(last_ring_index)->status == PRFS_RECEIVED);\n\tAssert(MyPState->ring_last <= last_ring_index &&\n\t\t   last_ring_index < MyPState->ring_unused);\n\n\tif (flush_every_n_requests > 0 &&\n\t\tMyPState->ring_unused - MyPState->ring_flush >= flush_every_n_requests)\n\t{\n\t\tif (!prefetch_flush_requests())\n\t\t{\n\t\t\t/*\n\t\t\t * Prefetch set is reset in case of error, so we should try to\n\t\t\t * register our request once again\n\t\t\t */\n\t\t\tgoto Retry;\n\t\t}\n\t\tMyPState->ring_flush = MyPState->ring_unused;\n\t}\n\n\treturn last_ring_index;\n}\n\nstatic bool\nequal_requests(NeonRequest* a, NeonRequest* b)\n{\n\treturn a->reqid == b->reqid && a->lsn == b->lsn && a->not_modified_since == b->not_modified_since;\n}\n\n\n/*\n * Note: this function can get canceled and use a long jump to the next catch\n * context. Take care.\n */\nstatic NeonResponse *\npage_server_request(void const *req)\n{\n\tNeonResponse *resp = NULL;\n\tBufferTag tag = {0};\n\tshardno_t shard_no;\n\n\tswitch (messageTag(req))\n\t{\n\t\tcase T_NeonExistsRequest:\n\t\t\tCopyNRelFileInfoToBufTag(tag, ((NeonExistsRequest *) req)->rinfo);\n\t\t\tbreak;\n\t\tcase T_NeonNblocksRequest:\n\t\t\tCopyNRelFileInfoToBufTag(tag, ((NeonNblocksRequest *) req)->rinfo);\n\t\t\tbreak;\n\t\tcase T_NeonDbSizeRequest:\n\t\t\tNInfoGetDbOid(BufTagGetNRelFileInfo(tag)) = ((NeonDbSizeRequest *) req)->dbNode;\n\t\t\tbreak;\n\t\tcase T_NeonGetPageRequest:\n\t\t\tCopyNRelFileInfoToBufTag(tag, ((NeonGetPageRequest *) req)->rinfo);\n\t\t\ttag.blockNum = ((NeonGetPageRequest *) req)->blkno;\n\t\t\tbreak;\n\t\tdefault:\n\t\t\tneon_log(PANIC, \"Unexpected request tag: %d\", messageTag(req));\n\t}\n\tshard_no = get_shard_number(&tag);\n\n\t/*\n\t * Current sharding model assumes that all metadata is present only at shard 0.\n\t * We still need to call get_shard_no() to check if shard map is up-to-date.\n\t */\n\tif (((NeonRequest *) req)->tag != T_NeonGetPageRequest)\n\t{\n\t\tshard_no = 0;\n\t}\n\n\tconsume_prefetch_responses();\n\n\tPG_TRY();\n\t{\n\t\tbefore_shmem_exit(prefetch_on_exit, Int32GetDatum(shard_no));\n\t\tdo\n\t\t{\n\t\t\twhile (!page_server->send(shard_no, (NeonRequest *) req)\n\t\t\t\t   || !page_server->flush(shard_no))\n\t\t\t{\n\t\t\t\t/* do nothing */\n\t\t\t}\n\t\t\tMyNeonCounters->pageserver_open_requests++;\n\t\t\tresp = page_server->receive(shard_no);\n\t\t\tMyNeonCounters->pageserver_open_requests--;\n\t\t} while (resp == NULL);\n\t\tcancel_before_shmem_exit(prefetch_on_exit, Int32GetDatum(shard_no));\n\t}\n\tPG_CATCH();\n\t{\n\t\tcancel_before_shmem_exit(prefetch_on_exit, Int32GetDatum(shard_no));\n\t\t/* Nothing should cancel disconnect: we should not leave connection in opaque state */\n\t\tHOLD_INTERRUPTS();\n\t\tpage_server->disconnect(shard_no);\n\t\tMyNeonCounters->pageserver_open_requests = 0;\n\t\tRESUME_INTERRUPTS();\n\n\t\tPG_RE_THROW();\n\t}\n\tPG_END_TRY();\n\n\n\treturn resp;\n}\n\n\nStringInfoData\nnm_pack_request(NeonRequest *msg)\n{\n\tStringInfoData s;\n\n\tinitStringInfo(&s);\n\n\tpq_sendbyte(&s, msg->tag);\n\tif (neon_protocol_version >= 3)\n\t{\n\t\tpq_sendint64(&s, msg->reqid);\n\t}\n\tpq_sendint64(&s, msg->lsn);\n\tpq_sendint64(&s, msg->not_modified_since);\n\n\tswitch (messageTag(msg))\n\t{\n\t\t\t/* pagestore_client -> pagestore */\n\t\tcase T_NeonExistsRequest:\n\t\t\t{\n\t\t\t\tNeonExistsRequest *msg_req = (NeonExistsRequest *) msg;\n\n\t\t\t\tpq_sendint32(&s, NInfoGetSpcOid(msg_req->rinfo));\n\t\t\t\tpq_sendint32(&s, NInfoGetDbOid(msg_req->rinfo));\n\t\t\t\tpq_sendint32(&s, NInfoGetRelNumber(msg_req->rinfo));\n\t\t\t\tpq_sendbyte(&s, msg_req->forknum);\n\n\t\t\t\tbreak;\n\t\t\t}\n\t\tcase T_NeonNblocksRequest:\n\t\t\t{\n\t\t\t\tNeonNblocksRequest *msg_req = (NeonNblocksRequest *) msg;\n\n\t\t\t\tpq_sendint32(&s, NInfoGetSpcOid(msg_req->rinfo));\n\t\t\t\tpq_sendint32(&s, NInfoGetDbOid(msg_req->rinfo));\n\t\t\t\tpq_sendint32(&s, NInfoGetRelNumber(msg_req->rinfo));\n\t\t\t\tpq_sendbyte(&s, msg_req->forknum);\n\n\t\t\t\tbreak;\n\t\t\t}\n\t\tcase T_NeonDbSizeRequest:\n\t\t\t{\n\t\t\t\tNeonDbSizeRequest *msg_req = (NeonDbSizeRequest *) msg;\n\n\t\t\t\tpq_sendint32(&s, msg_req->dbNode);\n\n\t\t\t\tbreak;\n\t\t\t}\n\t\tcase T_NeonGetPageRequest:\n\t\t\t{\n\t\t\t\tNeonGetPageRequest *msg_req = (NeonGetPageRequest *) msg;\n\n\t\t\t\tpq_sendint32(&s, NInfoGetSpcOid(msg_req->rinfo));\n\t\t\t\tpq_sendint32(&s, NInfoGetDbOid(msg_req->rinfo));\n\t\t\t\tpq_sendint32(&s, NInfoGetRelNumber(msg_req->rinfo));\n\t\t\t\tpq_sendbyte(&s, msg_req->forknum);\n\t\t\t\tpq_sendint32(&s, msg_req->blkno);\n\n\t\t\t\tbreak;\n\t\t\t}\n\n\t\tcase T_NeonGetSlruSegmentRequest:\n\t\t\t{\n\t\t\t\tNeonGetSlruSegmentRequest *msg_req = (NeonGetSlruSegmentRequest *) msg;\n\n\t\t\t\tpq_sendbyte(&s, msg_req->kind);\n\t\t\t\tpq_sendint32(&s, msg_req->segno);\n\n\t\t\t\tbreak;\n\t\t\t}\n\n\t\t\t/* pagestore -> pagestore_client. We never need to create these. */\n\t\tcase T_NeonExistsResponse:\n\t\tcase T_NeonNblocksResponse:\n\t\tcase T_NeonGetPageResponse:\n\t\tcase T_NeonErrorResponse:\n\t\tcase T_NeonDbSizeResponse:\n\t\tcase T_NeonGetSlruSegmentResponse:\n\t\tdefault:\n\t\t\tneon_log(PANIC, \"unexpected neon message tag 0x%02x\", msg->tag);\n\t\t\tbreak;\n\t}\n\treturn s;\n}\n\nNeonResponse *\nnm_unpack_response(StringInfo s)\n{\n\tNeonMessageTag tag = pq_getmsgbyte(s);\n\tNeonResponse resp_hdr = {0}; /* make valgrind happy */\n\tNeonResponse *resp = NULL;\n\n\tresp_hdr.tag = tag;\n\tif (neon_protocol_version >= 3)\n\t{\n\t\tresp_hdr.reqid = pq_getmsgint64(s);\n\t\tresp_hdr.lsn = pq_getmsgint64(s);\n\t\tresp_hdr.not_modified_since = pq_getmsgint64(s);\n\t}\n\tswitch (tag)\n\t{\n\t\t\t/* pagestore -> pagestore_client */\n\t\tcase T_NeonExistsResponse:\n\t\t\t{\n\t\t\t\tNeonExistsResponse *msg_resp = palloc0(sizeof(NeonExistsResponse));\n\n\t\t\t\tif (neon_protocol_version >= 3)\n\t\t\t\t{\n\t\t\t\t\tNInfoGetSpcOid(msg_resp->req.rinfo) = pq_getmsgint(s, 4);\n\t\t\t\t\tNInfoGetDbOid(msg_resp->req.rinfo) = pq_getmsgint(s, 4);\n\t\t\t\t\tNInfoGetRelNumber(msg_resp->req.rinfo) = pq_getmsgint(s, 4);\n\t\t\t\t\tmsg_resp->req.forknum = pq_getmsgbyte(s);\n\t\t\t\t}\n\t\t\t\tmsg_resp->req.hdr = resp_hdr;\n\t\t\t\tmsg_resp->exists = pq_getmsgbyte(s);\n\t\t\t\tpq_getmsgend(s);\n\n\t\t\t\tresp = (NeonResponse *) msg_resp;\n\t\t\t\tbreak;\n\t\t\t}\n\n\t\tcase T_NeonNblocksResponse:\n\t\t\t{\n\t\t\t\tNeonNblocksResponse *msg_resp = palloc0(sizeof(NeonNblocksResponse));\n\n\t\t\t\tif (neon_protocol_version >= 3)\n\t\t\t\t{\n\t\t\t\t\tNInfoGetSpcOid(msg_resp->req.rinfo) = pq_getmsgint(s, 4);\n\t\t\t\t\tNInfoGetDbOid(msg_resp->req.rinfo) = pq_getmsgint(s, 4);\n\t\t\t\t\tNInfoGetRelNumber(msg_resp->req.rinfo) = pq_getmsgint(s, 4);\n\t\t\t\t\tmsg_resp->req.forknum = pq_getmsgbyte(s);\n\t\t\t\t}\n\t\t\t\tmsg_resp->req.hdr = resp_hdr;\n\t\t\t\tmsg_resp->n_blocks = pq_getmsgint(s, 4);\n\t\t\t\tpq_getmsgend(s);\n\n\t\t\t\tresp = (NeonResponse *) msg_resp;\n\t\t\t\tbreak;\n\t\t\t}\n\n\t\tcase T_NeonGetPageResponse:\n\t\t\t{\n\t\t\t\tNeonGetPageResponse *msg_resp;\n\n\t\t\t\tmsg_resp = MemoryContextAllocZero(MyPState->bufctx, PS_GETPAGERESPONSE_SIZE);\n\t\t\t\tif (neon_protocol_version >= 3)\n\t\t\t\t{\n\t\t\t\t\tNInfoGetSpcOid(msg_resp->req.rinfo) = pq_getmsgint(s, 4);\n\t\t\t\t\tNInfoGetDbOid(msg_resp->req.rinfo) = pq_getmsgint(s, 4);\n\t\t\t\t\tNInfoGetRelNumber(msg_resp->req.rinfo) = pq_getmsgint(s, 4);\n\t\t\t\t\tmsg_resp->req.forknum = pq_getmsgbyte(s);\n\t\t\t\t\tmsg_resp->req.blkno = pq_getmsgint(s, 4);\n\t\t\t\t}\n\t\t\t\tmsg_resp->req.hdr = resp_hdr;\n\t\t\t\t/* XXX:\tshould be varlena */\n\t\t\t\tmemcpy(msg_resp->page, pq_getmsgbytes(s, BLCKSZ), BLCKSZ);\n\t\t\t\tpq_getmsgend(s);\n\n\t\t\t\tAssert(msg_resp->req.hdr.tag == T_NeonGetPageResponse);\n\n\t\t\t\tresp = (NeonResponse *) msg_resp;\n\t\t\t\tbreak;\n\t\t\t}\n\n\t\tcase T_NeonDbSizeResponse:\n\t\t\t{\n\t\t\t\tNeonDbSizeResponse *msg_resp = palloc0(sizeof(NeonDbSizeResponse));\n\n\t\t\t\tif (neon_protocol_version >= 3)\n\t\t\t\t{\n\t\t\t\t\tmsg_resp->req.dbNode = pq_getmsgint(s, 4);\n\t\t\t\t}\n\t\t\t\tmsg_resp->req.hdr = resp_hdr;\n\t\t\t\tmsg_resp->db_size = pq_getmsgint64(s);\n\t\t\t\tpq_getmsgend(s);\n\n\t\t\t\tresp = (NeonResponse *) msg_resp;\n\t\t\t\tbreak;\n\t\t\t}\n\n\t\tcase T_NeonErrorResponse:\n\t\t\t{\n\t\t\t\tNeonErrorResponse *msg_resp;\n\t\t\t\tsize_t\t\tmsglen;\n\t\t\t\tconst char *msgtext;\n\n\t\t\t\tmsgtext = pq_getmsgrawstring(s);\n\t\t\t\tmsglen = strlen(msgtext);\n\n\t\t\t\tmsg_resp = palloc0(sizeof(NeonErrorResponse) + msglen + 1);\n\t\t\t\tmsg_resp->req = resp_hdr;\n\t\t\t\tmemcpy(msg_resp->message, msgtext, msglen + 1);\n\t\t\t\tpq_getmsgend(s);\n\n\t\t\t\tresp = (NeonResponse *) msg_resp;\n\t\t\t\tbreak;\n\t\t\t}\n\n\t\tcase T_NeonGetSlruSegmentResponse:\n\t\t    {\n\t\t\t\tNeonGetSlruSegmentResponse *msg_resp;\n\t\t\t\tint n_blocks;\n\t\t\t\tmsg_resp = palloc0(sizeof(NeonGetSlruSegmentResponse));\n\n\t\t\t\tif (neon_protocol_version >= 3)\n\t\t\t\t{\n\t\t\t\t\tmsg_resp->req.kind = pq_getmsgbyte(s);\n\t\t\t\t\tmsg_resp->req.segno = pq_getmsgint(s, 4);\n\t\t\t\t}\n\t\t\t\tmsg_resp->req.hdr = resp_hdr;\n\n\t\t\t\tn_blocks = pq_getmsgint(s, 4);\n\t\t\t\tmsg_resp->n_blocks = n_blocks;\n\t\t\t\tmemcpy(msg_resp->data, pq_getmsgbytes(s, n_blocks * BLCKSZ), n_blocks * BLCKSZ);\n\t\t\t\tpq_getmsgend(s);\n\n\t\t\t\tresp = (NeonResponse *) msg_resp;\n\t\t\t\tbreak;\n\t\t\t}\n\n\t\t\t/*\n\t\t\t * pagestore_client -> pagestore\n\t\t\t *\n\t\t\t * We create these ourselves, and don't need to decode them.\n\t\t\t */\n\t\tcase T_NeonExistsRequest:\n\t\tcase T_NeonNblocksRequest:\n\t\tcase T_NeonGetPageRequest:\n\t\tcase T_NeonDbSizeRequest:\n\t\tcase T_NeonGetSlruSegmentRequest:\n\t\tdefault:\n\t\t\tneon_log(PANIC, \"unexpected neon message tag 0x%02x\", tag);\n\t\t\tbreak;\n\t}\n\n\treturn resp;\n}\n\n/* dump to json for debugging / error reporting purposes */\nchar *\nnm_to_string(NeonMessage *msg)\n{\n\tStringInfoData s;\n\n\tinitStringInfo(&s);\n\n\tswitch (messageTag(msg))\n\t{\n\t\t\t/* pagestore_client -> pagestore */\n\t\tcase T_NeonExistsRequest:\n\t\t\t{\n\t\t\t\tNeonExistsRequest *msg_req = (NeonExistsRequest *) msg;\n\n\t\t\t\tappendStringInfoString(&s, \"{\\\"type\\\": \\\"NeonExistsRequest\\\"\");\n\t\t\t\tappendStringInfo(&s, \", \\\"rinfo\\\": \\\"%u/%u/%u\\\"\", RelFileInfoFmt(msg_req->rinfo));\n\t\t\t\tappendStringInfo(&s, \", \\\"forknum\\\": %d\", msg_req->forknum);\n\t\t\t\tappendStringInfo(&s, \", \\\"lsn\\\": \\\"%X/%X\\\"\", LSN_FORMAT_ARGS(msg_req->hdr.lsn));\n\t\t\t\tappendStringInfo(&s, \", \\\"not_modified_since\\\": \\\"%X/%X\\\"\", LSN_FORMAT_ARGS(msg_req->hdr.not_modified_since));\n\t\t\t\tappendStringInfoChar(&s, '}');\n\t\t\t\tbreak;\n\t\t\t}\n\n\t\tcase T_NeonNblocksRequest:\n\t\t\t{\n\t\t\t\tNeonNblocksRequest *msg_req = (NeonNblocksRequest *) msg;\n\n\t\t\t\tappendStringInfoString(&s, \"{\\\"type\\\": \\\"NeonNblocksRequest\\\"\");\n\t\t\t\tappendStringInfo(&s, \", \\\"rinfo\\\": \\\"%u/%u/%u\\\"\", RelFileInfoFmt(msg_req->rinfo));\n\t\t\t\tappendStringInfo(&s, \", \\\"forknum\\\": %d\", msg_req->forknum);\n\t\t\t\tappendStringInfo(&s, \", \\\"lsn\\\": \\\"%X/%X\\\"\", LSN_FORMAT_ARGS(msg_req->hdr.lsn));\n\t\t\t\tappendStringInfo(&s, \", \\\"not_modified_since\\\": \\\"%X/%X\\\"\", LSN_FORMAT_ARGS(msg_req->hdr.not_modified_since));\n\t\t\t\tappendStringInfoChar(&s, '}');\n\t\t\t\tbreak;\n\t\t\t}\n\n\t\tcase T_NeonGetPageRequest:\n\t\t\t{\n\t\t\t\tNeonGetPageRequest *msg_req = (NeonGetPageRequest *) msg;\n\n\t\t\t\tappendStringInfoString(&s, \"{\\\"type\\\": \\\"NeonGetPageRequest\\\"\");\n\t\t\t\tappendStringInfo(&s, \", \\\"rinfo\\\": \\\"%u/%u/%u\\\"\", RelFileInfoFmt(msg_req->rinfo));\n\t\t\t\tappendStringInfo(&s, \", \\\"forknum\\\": %d\", msg_req->forknum);\n\t\t\t\tappendStringInfo(&s, \", \\\"blkno\\\": %u\", msg_req->blkno);\n\t\t\t\tappendStringInfo(&s, \", \\\"lsn\\\": \\\"%X/%X\\\"\", LSN_FORMAT_ARGS(msg_req->hdr.lsn));\n\t\t\t\tappendStringInfo(&s, \", \\\"not_modified_since\\\": \\\"%X/%X\\\"\", LSN_FORMAT_ARGS(msg_req->hdr.not_modified_since));\n\t\t\t\tappendStringInfoChar(&s, '}');\n\t\t\t\tbreak;\n\t\t\t}\n\t\tcase T_NeonDbSizeRequest:\n\t\t\t{\n\t\t\t\tNeonDbSizeRequest *msg_req = (NeonDbSizeRequest *) msg;\n\n\t\t\t\tappendStringInfoString(&s, \"{\\\"type\\\": \\\"NeonDbSizeRequest\\\"\");\n\t\t\t\tappendStringInfo(&s, \", \\\"dbnode\\\": \\\"%u\\\"\", msg_req->dbNode);\n\t\t\t\tappendStringInfo(&s, \", \\\"lsn\\\": \\\"%X/%X\\\"\", LSN_FORMAT_ARGS(msg_req->hdr.lsn));\n\t\t\t\tappendStringInfo(&s, \", \\\"not_modified_since\\\": \\\"%X/%X\\\"\", LSN_FORMAT_ARGS(msg_req->hdr.not_modified_since));\n\t\t\t\tappendStringInfoChar(&s, '}');\n\t\t\t\tbreak;\n\t\t\t}\n\t\tcase T_NeonGetSlruSegmentRequest:\n\t\t\t{\n\t\t\t\tNeonGetSlruSegmentRequest *msg_req = (NeonGetSlruSegmentRequest *) msg;\n\n\t\t\t\tappendStringInfoString(&s, \"{\\\"type\\\": \\\"NeonGetSlruSegmentRequest\\\"\");\n\t\t\t\tappendStringInfo(&s, \", \\\"kind\\\": %u\", msg_req->kind);\n\t\t\t\tappendStringInfo(&s, \", \\\"segno\\\": %u\", msg_req->segno);\n\t\t\t\tappendStringInfo(&s, \", \\\"lsn\\\": \\\"%X/%X\\\"\", LSN_FORMAT_ARGS(msg_req->hdr.lsn));\n\t\t\t\tappendStringInfo(&s, \", \\\"not_modified_since\\\": \\\"%X/%X\\\"\", LSN_FORMAT_ARGS(msg_req->hdr.not_modified_since));\n\t\t\t\tappendStringInfoChar(&s, '}');\n\t\t\t\tbreak;\n\t\t\t}\n\t\t\t/* pagestore -> pagestore_client */\n\t\tcase T_NeonExistsResponse:\n\t\t\t{\n\t\t\t\tNeonExistsResponse *msg_resp = (NeonExistsResponse *) msg;\n\n\t\t\t\tappendStringInfoString(&s, \"{\\\"type\\\": \\\"NeonExistsResponse\\\"\");\n\t\t\t\tappendStringInfo(&s, \", \\\"exists\\\": %d}\",\n\t\t\t\t\t\t\t\t msg_resp->exists);\n\t\t\t\tappendStringInfoChar(&s, '}');\n\n\t\t\t\tbreak;\n\t\t\t}\n\t\tcase T_NeonNblocksResponse:\n\t\t\t{\n\t\t\t\tNeonNblocksResponse *msg_resp = (NeonNblocksResponse *) msg;\n\n\t\t\t\tappendStringInfoString(&s, \"{\\\"type\\\": \\\"NeonNblocksResponse\\\"\");\n\t\t\t\tappendStringInfo(&s, \", \\\"n_blocks\\\": %u}\",\n\t\t\t\t\t\t\t\t msg_resp->n_blocks);\n\t\t\t\tappendStringInfoChar(&s, '}');\n\n\t\t\t\tbreak;\n\t\t\t}\n\t\tcase T_NeonGetPageResponse:\n\t\t\t{\n\t\t\t\tNeonGetPageResponse *msg_resp = (NeonGetPageResponse *) msg;\n\n\t\t\t\tappendStringInfoString(&s, \"{\\\"type\\\": \\\"NeonGetPageResponse\\\"\");\n\t\t\t\tappendStringInfo(&s, \", \\\"rinfo\\\": %u/%u/%u\", RelFileInfoFmt(msg_resp->req.rinfo));\n\t\t\t\tappendStringInfo(&s, \", \\\"forknum\\\": %d\", msg_resp->req.forknum);\n\t\t\t\tappendStringInfo(&s, \", \\\"blkno\\\": %u\", msg_resp->req.blkno);\n\t\t\t\tappendStringInfoChar(&s, '}');\n\t\t\t\tbreak;\n\t\t\t}\n\t\tcase T_NeonErrorResponse:\n\t\t\t{\n\t\t\t\tNeonErrorResponse *msg_resp = (NeonErrorResponse *) msg;\n\n\t\t\t\t/* FIXME: escape double-quotes in the message */\n\t\t\t\tappendStringInfoString(&s, \"{\\\"type\\\": \\\"NeonErrorResponse\\\"\");\n\t\t\t\tappendStringInfo(&s, \", \\\"message\\\": \\\"%s\\\"}\", msg_resp->message);\n\t\t\t\tappendStringInfoChar(&s, '}');\n\t\t\t\tbreak;\n\t\t\t}\n\t\tcase T_NeonDbSizeResponse:\n\t\t\t{\n\t\t\t\tNeonDbSizeResponse *msg_resp = (NeonDbSizeResponse *) msg;\n\n\t\t\t\tappendStringInfoString(&s, \"{\\\"type\\\": \\\"NeonDbSizeResponse\\\"\");\n\t\t\t\tappendStringInfo(&s, \", \\\"db_size\\\": \" INT64_FORMAT \"}\",\n\t\t\t\t\t\t\t\t msg_resp->db_size);\n\t\t\t\tappendStringInfoChar(&s, '}');\n\n\t\t\t\tbreak;\n\t\t\t}\n\t\tcase T_NeonGetSlruSegmentResponse:\n\t\t\t{\n\t\t\t\tNeonGetSlruSegmentResponse *msg_resp = (NeonGetSlruSegmentResponse *) msg;\n\n\t\t\t\tappendStringInfoString(&s, \"{\\\"type\\\": \\\"NeonGetSlruSegmentResponse\\\"\");\n\t\t\t\tappendStringInfo(&s, \", \\\"n_blocks\\\": %u}\",\n\t\t\t\t\t\t\t\t msg_resp->n_blocks);\n\t\t\t\tappendStringInfoChar(&s, '}');\n\n\t\t\t\tbreak;\n\t\t\t}\n\n\t\tdefault:\n\t\t\tappendStringInfo(&s, \"{\\\"type\\\": \\\"unknown 0x%02x\\\"\", msg->tag);\n\t}\n\treturn s.data;\n}\n\n/*\n *\tcommunicator_init() -- Initialize per-backend private state\n */\nvoid\ncommunicator_init(void)\n{\n\tSize\t\tprfs_size;\n\n\tif (MyPState != NULL)\n\t\treturn;\n\n\t/*\n\t * Sanity check that theperf counters array is sized correctly. We got\n\t * this wrong once, and the formula for max number of backends and aux\n\t * processes might well change in the future, so better safe than sorry.\n\t * This is a very cheap check so we do it even without assertions.  On\n\t * v14, this gets called before initializing MyProc, so we cannot perform\n\t * the check here. That's OK, we don't expect the logic to change in old\n\t * releases.\n\t */\n#if PG_VERSION_NUM>=150000\n\tif (MyNeonCounters >= &neon_per_backend_counters_shared[NUM_NEON_PERF_COUNTER_SLOTS])\n\t\telog(ERROR, \"MyNeonCounters points past end of array\");\n#endif\n\n\tprfs_size = offsetof(PrefetchState, prf_buffer) +\n\t\tsizeof(PrefetchRequest) * readahead_buffer_size;\n\n\tMyPState = MemoryContextAllocZero(TopMemoryContext, prfs_size);\n\n\tMyPState->n_unused = readahead_buffer_size;\n\n\tMyPState->bufctx = SlabContextCreate(TopMemoryContext,\n\t\t\t\t\t\t\t\t\t\t \"NeonSMGR/prefetch\",\n\t\t\t\t\t\t\t\t\t\t SLAB_DEFAULT_BLOCK_SIZE * 17,\n\t\t\t\t\t\t\t\t\t\t PS_GETPAGERESPONSE_SIZE);\n\tMyPState->errctx = AllocSetContextCreate(TopMemoryContext,\n\t\t\t\t\t\t\t\t\t\t\t \"NeonSMGR/errors\",\n\t\t\t\t\t\t\t\t\t\t\t ALLOCSET_DEFAULT_SIZES);\n\tMyPState->hashctx = AllocSetContextCreate(TopMemoryContext,\n\t\t\t\t\t\t\t\t\t\t\t  \"NeonSMGR/prefetch\",\n\t\t\t\t\t\t\t\t\t\t\t  ALLOCSET_DEFAULT_SIZES);\n\n\tMyPState->prf_hash = prfh_create(MyPState->hashctx,\n\t\t\t\t\t\t\t\t\t readahead_buffer_size, NULL);\n}\n\n/*\n *  neon_prefetch_response_usable -- Can a new request be satisfied by old one?\n *\n * This is used to check if the response to a prefetch request can be used to\n * satisfy a page read now.\n */\nstatic bool\nneon_prefetch_response_usable(neon_request_lsns *request_lsns,\n\t\t\t\t\t\t\t  PrefetchRequest *slot)\n{\n\t/* sanity check the LSN's on the old and the new request */\n\tAssert(request_lsns->request_lsn >= request_lsns->not_modified_since);\n\tAssert(request_lsns->effective_request_lsn >= request_lsns->not_modified_since);\n\tAssert(request_lsns->effective_request_lsn <= request_lsns->request_lsn);\n\tAssert(slot->request_lsns.request_lsn >= slot->request_lsns.not_modified_since);\n\tAssert(slot->request_lsns.effective_request_lsn >= slot->request_lsns.not_modified_since);\n\tAssert(slot->request_lsns.effective_request_lsn <= slot->request_lsns.request_lsn);\n\tAssert(slot->status != PRFS_UNUSED);\n\n\t/*\n\t * The new request's LSN should never be older than the old one.  This\n\t * could be an Assert, except that for testing purposes, we do provide an\n\t * interface in neon_test_utils to fetch pages at arbitary LSNs, which\n\t * violates this.\n\t *\n\t * Similarly, the not_modified_since value calculated for a page should\n\t * never move backwards. This assumption is a bit fragile; if we updated\n\t * the last-written cache when we read in a page, for example, then it\n\t * might. But as the code stands, it should not.\n\t *\n\t * (If two backends issue a request at the same time, they might race and\n\t * calculate LSNs \"out of order\" with each other, but the prefetch queue\n\t * is backend-private at the moment.)\n\t */\n\tif (request_lsns->effective_request_lsn < slot->request_lsns.effective_request_lsn ||\n\t\trequest_lsns->not_modified_since < slot->request_lsns.not_modified_since)\n\t{\n\t\tereport(LOG,\n\t\t\t\t(errcode(ERRCODE_IO_ERROR),\n\t\t\t\t errmsg(NEON_TAG \"request with unexpected LSN after prefetch\"),\n\t\t\t\t errdetail(\"Request %X/%X not_modified_since %X/%X, prefetch %X/%X not_modified_since %X/%X)\",\n\t\t\t\t\t\t   LSN_FORMAT_ARGS(request_lsns->effective_request_lsn),\n\t\t\t\t\t\t   LSN_FORMAT_ARGS(request_lsns->not_modified_since),\n\t\t\t\t\t\t   LSN_FORMAT_ARGS(slot->request_lsns.effective_request_lsn),\n\t\t\t\t\t\t   LSN_FORMAT_ARGS(slot->request_lsns.not_modified_since))));\n\t\treturn false;\n\t}\n\n\t/*---\n\t * Each request to the pageserver has three LSN values associated with it:\n\t * `not_modified_since`, `request_lsn`, and 'effective_request_lsn'.\n\t * `not_modified_since` and `request_lsn` are sent to the pageserver, but\n\t * in the primary node, we always use UINT64_MAX as the `request_lsn`, so\n\t * we remember `effective_request_lsn` separately. In a primary,\n\t * `effective_request_lsn` is the same as  `not_modified_since`.\n\t * See comments in neon_get_request_lsns why we can not use last flush WAL position here.\n\t *\n\t * To determine whether a response to a GetPage request issued earlier is\n\t * still valid to satisfy a new page read, we look at the\n\t * (not_modified_since, effective_request_lsn] range of the request. It is\n\t * effectively a claim that the page has not been modified between those\n\t * LSNs.  If the range of the old request in the queue overlaps with the\n\t * new request, we know that the page hasn't been modified in the union of\n\t * the ranges. We can use the response to old request to satisfy the new\n\t * request in that case. For example:\n\t *\n\t *              100      500\n\t * Old request:  +--------+\n\t *\n\t *                     400      800\n\t * New request:         +--------+\n\t *\n\t * The old request claims that the page was not modified between LSNs 100\n\t * and 500, and the second claims that it was not modified between 400 and\n\t * 800. Together they mean that the page was not modified between 100 and\n\t * 800. Therefore the response to the old request is also valid for the\n\t * new request.\n\t *\n\t * This logic also holds at the boundary case that the old request's LSN\n\t * matches the new request's not_modified_since LSN exactly:\n\t *\n\t *              100      500\n\t * Old request:  +--------+\n\t *\n\t *                       500      900\n\t * New request:           +--------+\n\t *\n\t * The response to the old request is the page as it was at LSN 500, and\n\t * the page hasn't been changed in the range (500, 900], therefore the\n\t * response is valid also for the new request.\n\t */\n\n\t/* this follows from the checks above */\n\tAssert(request_lsns->effective_request_lsn >= slot->request_lsns.not_modified_since);\n\n\treturn request_lsns->not_modified_since <= slot->request_lsns.effective_request_lsn;\n}\n\n/*\n *\tDoes the physical file exist?\n */\nbool\ncommunicator_exists(NRelFileInfo rinfo, ForkNumber forkNum, neon_request_lsns *request_lsns)\n{\n\tbool\t\texists;\n\tNeonResponse *resp;\n\n\t{\n\t\tNeonExistsRequest request = {\n\t\t\t.hdr.tag = T_NeonExistsRequest,\n\t\t\t.hdr.lsn = request_lsns->request_lsn,\n\t\t\t.hdr.not_modified_since = request_lsns->not_modified_since,\n\t\t\t.rinfo = rinfo,\n\t\t\t.forknum = forkNum\n\t\t};\n\n\t\tresp = page_server_request(&request);\n\n\t\tswitch (resp->tag)\n\t\t{\n\t\t\tcase T_NeonExistsResponse:\n\t\t\t{\n\t\t\t\tNeonExistsResponse* exists_resp = (NeonExistsResponse *) resp;\n\t\t\t\tif (neon_protocol_version >= 3)\n\t\t\t\t{\n\t\t\t\t\tif (!equal_requests(resp, &request.hdr) ||\n\t\t\t\t\t\t!RelFileInfoEquals(exists_resp->req.rinfo, request.rinfo) ||\n\t\t\t\t\t\texists_resp->req.forknum != request.forknum)\n\t\t\t\t\t{\n\t\t\t\t\t\tNEON_PANIC_CONNECTION_STATE(0, PANIC,\n\t\t\t\t\t\t\t\t\t\t\t\t\t\"Unexpect response {reqid=\" UINT64_HEX_FORMAT \",lsn=%X/%08X, since=%X/%08X, rel=%u/%u/%u.%u} to exits request {reqid=\" UINT64_HEX_FORMAT \",lsn=%X/%08X, since=%X/%08X, rel=%u/%u/%u.%u}\",\n\t\t\t\t\t\t\t\t\t\t\t\t\tresp->reqid, LSN_FORMAT_ARGS(resp->lsn), LSN_FORMAT_ARGS(resp->not_modified_since), RelFileInfoFmt(exists_resp->req.rinfo), exists_resp->req.forknum,\n\t\t\t\t\t\t\t\t\t\t\t\t\trequest.hdr.reqid, LSN_FORMAT_ARGS(request.hdr.lsn), LSN_FORMAT_ARGS(request.hdr.not_modified_since), RelFileInfoFmt(request.rinfo), request.forknum);\n\t\t\t\t\t}\n\t\t\t\t}\n\t\t\t\texists = exists_resp->exists;\n\t\t\t\tbreak;\n\t\t\t}\n\t\t\tcase T_NeonErrorResponse:\n\t\t\t\tif (neon_protocol_version >= 3)\n\t\t\t\t{\n\t\t\t\t\tif (!equal_requests(resp, &request.hdr))\n\t\t\t\t\t{\n\t\t\t\t\t\telog(WARNING, NEON_TAG \"Error message {reqid=\" UINT64_HEX_FORMAT \",lsn=%X/%08X, since=%X/%08X} doesn't match exists request {reqid=\" UINT64_HEX_FORMAT \",lsn=%X/%08X, since=%X/%08X}\",\n\t\t\t\t\t\t\t resp->reqid, LSN_FORMAT_ARGS(resp->lsn), LSN_FORMAT_ARGS(resp->not_modified_since),\n\t\t\t\t\t\t\t request.hdr.reqid, LSN_FORMAT_ARGS(request.hdr.lsn), LSN_FORMAT_ARGS(request.hdr.not_modified_since));\n\t\t\t\t\t}\n\t\t\t\t}\n\t\t\t\tereport(ERROR,\n\t\t\t\t\t\t(errcode(ERRCODE_IO_ERROR),\n\t\t\t\t\t\t errmsg(NEON_TAG \"[reqid \" UINT64_HEX_FORMAT \"] could not read relation existence of rel %u/%u/%u.%u from page server at lsn %X/%08X\",\n\t\t\t\t\t\t\t\tresp->reqid,\n\t\t\t\t\t\t\t\tRelFileInfoFmt(rinfo),\n\t\t\t\t\t\t\t\tforkNum,\n\t\t\t\t\t\t\t\tLSN_FORMAT_ARGS(request_lsns->effective_request_lsn)),\n\t\t\t\t\t\t errdetail(\"page server returned error: %s\",\n\t\t\t\t\t\t\t\t   ((NeonErrorResponse *) resp)->message)));\n\t\t\t\tbreak;\n\n\t\t\tdefault:\n\t\t\t\tNEON_PANIC_CONNECTION_STATE(0, PANIC,\n\t\t\t\t\t\t\t\t\t\t\t\"Expected Exists (0x%02x) or Error (0x%02x) response to ExistsRequest, but got 0x%02x\",\n\t\t\t\t\t\t\t\t\t\t\tT_NeonExistsResponse, T_NeonErrorResponse, resp->tag);\n\t\t}\n\t\tpfree(resp);\n\t}\n\treturn exists;\n}\n\n/*\n * Read N pages at a specific LSN.\n *\n * *mask is set for pages read at a previous point in time, and which we\n * should not touch, nor overwrite.\n * New bits should be set in *mask for the pages we'successfully read.\n *\n * The offsets in request_lsns, buffers, and mask are linked.\n */\nvoid\ncommunicator_read_at_lsnv(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber base_blockno,\n\t\t\t\t\t\t  neon_request_lsns *request_lsns,\n\t\t\t\t\t\t  void **buffers, BlockNumber nblocks, const bits8 *mask)\n{\n\tNeonResponse *resp;\n\tuint64\t\tring_index;\n\tPrfHashEntry *entry;\n\tPrefetchRequest *slot;\n\tPrefetchRequest hashkey;\n\n\tAssert(PointerIsValid(request_lsns));\n\tAssert(nblocks >= 1);\n\n\t/*\n\t * Use an intermediate PrefetchRequest struct as the hash key to ensure\n\t * correct alignment and that the padding bytes are cleared.\n\t */\n\tmemset(&hashkey.buftag, 0, sizeof(BufferTag));\n\tCopyNRelFileInfoToBufTag(hashkey.buftag, rinfo);\n\thashkey.buftag.forkNum = forkNum;\n\thashkey.buftag.blockNum = base_blockno;\n\n\t/*\n\t * The redo process does not lock pages that it needs to replay but are\n\t * not in the shared buffers, so a concurrent process may request the page\n\t * after redo has decided it won't redo that page and updated the LwLSN\n\t * for that page. If we're in hot standby we need to take care that we\n\t * don't return until after REDO has finished replaying up to that LwLSN,\n\t * as the page should have been locked up to that point.\n\t *\n\t * See also the description on neon_redo_read_buffer_filter below.\n\t *\n\t * NOTE: It is possible that the WAL redo process will still do IO due to\n\t * concurrent failed read IOs. Those IOs should never have a request_lsn\n\t * that is as large as the WAL record we're currently replaying, if it\n\t * weren't for the behaviour of the LwLsn cache that uses the highest\n\t * value of the LwLsn cache when the entry is not found.\n\t */\n\t(void) prefetch_register_bufferv(hashkey.buftag, request_lsns, nblocks, mask, false);\n\n\tfor (int i = 0; i < nblocks; i++)\n\t{\n\t\tvoid\t   *buffer = buffers[i];\n\t\tBlockNumber blockno = base_blockno + i;\n\t\tneon_request_lsns *reqlsns = &request_lsns[i];\n\t\tTimestampTz\t\tstart_ts, end_ts;\n\n\t\tif (PointerIsValid(mask) && BITMAP_ISSET(mask, i))\n\t\t\tcontinue;\n\n\t\tstart_ts = GetCurrentTimestamp();\n\n\t\tif (RecoveryInProgress() && MyBackendType != B_STARTUP)\n\t\t\tXLogWaitForReplayOf(reqlsns->request_lsn);\n\n\t\t/*\n\t\t * Try to find prefetched page in the list of received pages.\n\t\t */\nRetry:\n\t\thashkey.buftag.blockNum = blockno;\n\t\tentry = prfh_lookup(MyPState->prf_hash, &hashkey);\n\n\t\tif (entry != NULL)\n\t\t{\n\t\t\tslot = entry->slot;\n\t\t\tif (neon_prefetch_response_usable(reqlsns, slot))\n\t\t\t{\n\t\t\t\tring_index = slot->my_ring_index;\n\t\t\t}\n\t\t\telse\n\t\t\t{\n\t\t\t\t/*\n\t\t\t\t * Cannot use this prefetch, discard it\n\t\t\t\t *\n\t\t\t\t * We can't drop cache for not-yet-received requested items. It is\n\t\t\t\t * unlikely this happens, but it can happen if prefetch distance\n\t\t\t\t * is large enough and a backend didn't consume all prefetch\n\t\t\t\t * requests.\n\t\t\t\t */\n\t\t\t\tif (slot->status == PRFS_REQUESTED)\n\t\t\t\t{\n\t\t\t\t\tif (!prefetch_wait_for(slot->my_ring_index))\n\t\t\t\t\t\tgoto Retry;\n\t\t\t\t}\n\t\t\t\t/* drop caches */\n\t\t\t\tprefetch_set_unused(slot->my_ring_index);\n\t\t\t\tpgBufferUsage.prefetch.expired += 1;\n\t\t\t\tMyNeonCounters->getpage_prefetch_discards_total++;\n\t\t\t\t/* make it look like a prefetch cache miss */\n\t\t\t\tentry = NULL;\n\t\t\t}\n\t\t}\n\n\t\tdo\n\t\t{\n\t\t\tif (entry == NULL)\n\t\t\t{\n\t\t\t\tring_index = prefetch_register_bufferv(hashkey.buftag, reqlsns, 1, NULL, false);\n\t\t\t\tAssert(ring_index != UINT64_MAX);\n\t\t\t\tslot = GetPrfSlot(ring_index);\n\t\t\t}\n\t\t\telse\n\t\t\t{\n\t\t\t\t/*\n\t\t\t\t * Empty our reference to the prefetch buffer's hash entry. When\n\t\t\t\t * we wait for prefetches, the entry reference is invalidated by\n\t\t\t\t * potential updates to the hash, and when we reconnect to the\n\t\t\t\t * pageserver the prefetch we're waiting for may be dropped, in\n\t\t\t\t * which case we need to retry and take the branch above.\n\t\t\t\t */\n\t\t\t\tentry = NULL;\n\t\t\t}\n\n\t\t\tAssert(slot->my_ring_index == ring_index);\n\t\t\tAssert(MyPState->ring_last <= ring_index &&\n\t\t\t\t   MyPState->ring_unused > ring_index);\n\t\t\tAssert(slot->status != PRFS_UNUSED);\n\t\t\tAssert(GetPrfSlot(ring_index) == slot);\n\n\t\t} while (!prefetch_wait_for(ring_index));\n\n\t\tAssert(slot->status == PRFS_RECEIVED);\n\t\tAssert(memcmp(&hashkey.buftag, &slot->buftag, sizeof(BufferTag)) == 0);\n\t\tAssert(hashkey.buftag.blockNum == base_blockno + i);\n\n\t\t/* We already checked that response match request when storing it in slot */\n\t\tresp = slot->response;\n\n\t\tswitch (resp->tag)\n\t\t{\n\t\t\tcase T_NeonGetPageResponse:\n\t\t\t{\n\t\t\t\tNeonGetPageResponse* getpage_resp = (NeonGetPageResponse *) resp;\n\t\t\t\tmemcpy(buffer, getpage_resp->page, BLCKSZ);\n\n\t\t\t\t/*\n\t\t\t\t * With lfc_store_prefetch_result=true prefetch result is stored in LFC in prefetch_pump_state when response is received\n\t\t\t\t * from page server. But if lfc_store_prefetch_result=false then it is not yet stored in LFC and we have to do it here\n\t\t\t\t * under buffer lock.\n\t\t\t\t */\n\t\t\t\tif (!lfc_store_prefetch_result)\n\t\t\t\t\tlfc_write(rinfo, forkNum, blockno, buffer);\n\t\t\t\tbreak;\n\t\t\t}\n\t\t\tcase T_NeonErrorResponse:\n\t\t\t\tereport(ERROR,\n\t\t\t\t\t\t(errcode(ERRCODE_IO_ERROR),\n\t\t\t\t\t\t errmsg(NEON_TAG \"[shard %d, reqid \" UINT64_HEX_FORMAT \"] could not read block %u in rel %u/%u/%u.%u from page server at lsn %X/%08X\",\n\t\t\t\t\t\t\t\tslot->shard_no, resp->reqid, blockno, RelFileInfoFmt(rinfo),\n\t\t\t\t\t\t\t\tforkNum, LSN_FORMAT_ARGS(reqlsns->effective_request_lsn)),\n\t\t\t\t\t\t errdetail(\"page server returned error: %s\",\n\t\t\t\t\t\t\t\t   ((NeonErrorResponse *) resp)->message)));\n\t\t\t\tbreak;\n\t\t\tdefault:\n\t\t\t\tNEON_PANIC_CONNECTION_STATE(slot->shard_no, PANIC,\n\t\t\t\t\t\t\t\t\t\t\t\"Expected GetPage (0x%02x) or Error (0x%02x) response to GetPageRequest, but got 0x%02x\",\n\t\t\t\t\t\t\t\t\t\t\tT_NeonGetPageResponse, T_NeonErrorResponse, resp->tag);\n\t\t}\n\n\t\t/* buffer was used, clean up for later reuse */\n\t\tprefetch_set_unused(ring_index);\n\t\tprefetch_cleanup_trailing_unused();\n\n\t\tend_ts = GetCurrentTimestamp();\n\t\tinc_getpage_wait(end_ts >= start_ts ? (end_ts - start_ts) : 0);\n\t}\n}\n\n/*\n *\tneon_nblocks() -- Get the number of blocks stored in a relation.\n */\nBlockNumber\ncommunicator_nblocks(NRelFileInfo rinfo, ForkNumber forknum, neon_request_lsns *request_lsns)\n{\n\tNeonResponse *resp;\n\tBlockNumber n_blocks;\n\n\t{\n\t\tNeonNblocksRequest request = {\n\t\t\t.hdr.tag = T_NeonNblocksRequest,\n\t\t\t.hdr.lsn = request_lsns->request_lsn,\n\t\t\t.hdr.not_modified_since = request_lsns->not_modified_since,\n\t\t\t.rinfo = rinfo,\n\t\t\t.forknum = forknum,\n\t\t};\n\n\t\tresp = page_server_request(&request);\n\n\t\tswitch (resp->tag)\n\t\t{\n\t\t\tcase T_NeonNblocksResponse:\n\t\t\t{\n\t\t\t\tNeonNblocksResponse * relsize_resp = (NeonNblocksResponse *) resp;\n\t\t\t\tif (neon_protocol_version >= 3)\n\t\t\t\t{\n\t\t\t\t\tif (!equal_requests(resp, &request.hdr) ||\n\t\t\t\t\t\t!RelFileInfoEquals(relsize_resp->req.rinfo, request.rinfo) ||\n\t\t\t\t\t\trelsize_resp->req.forknum != forknum)\n\t\t\t\t\t{\n\t\t\t\t\t\tNEON_PANIC_CONNECTION_STATE(0, PANIC,\n\t\t\t\t\t\t\t\t\t\t\t\t\t\"Unexpect response {reqid=\" UINT64_HEX_FORMAT \",lsn=%X/%08X, since=%X/%08X, rel=%u/%u/%u.%u} to get relsize request {reqid=\" UINT64_HEX_FORMAT \",lsn=%X/%08X, since=%X/%08X, rel=%u/%u/%u.%u}\",\n\t\t\t\t\t\t\t\t\t\t\t\t\tresp->reqid, LSN_FORMAT_ARGS(resp->lsn), LSN_FORMAT_ARGS(resp->not_modified_since), RelFileInfoFmt(relsize_resp->req.rinfo), relsize_resp->req.forknum,\n\t\t\t\t\t\t\t\t\t\t\t\t\trequest.hdr.reqid, LSN_FORMAT_ARGS(request.hdr.lsn), LSN_FORMAT_ARGS(request.hdr.not_modified_since), RelFileInfoFmt(request.rinfo), forknum);\n\t\t\t\t\t}\n\t\t\t\t}\n\t\t\t\tn_blocks = relsize_resp->n_blocks;\n\t\t\t\tbreak;\n\t\t\t}\n\t\t\tcase T_NeonErrorResponse:\n\t\t\t\tif (neon_protocol_version >= 3)\n\t\t\t\t{\n\t\t\t\t\tif (!equal_requests(resp, &request.hdr))\n\t\t\t\t\t{\n\t\t\t\t\t\telog(WARNING, NEON_TAG \"Error message {reqid=\" UINT64_HEX_FORMAT \",lsn=%X/%08X, since=%X/%08X} doesn't match get relsize request {reqid=\" UINT64_HEX_FORMAT \",lsn=%X/%08X, since=%X/%08X}\",\n\t\t\t\t\t\t\t resp->reqid, LSN_FORMAT_ARGS(resp->lsn), LSN_FORMAT_ARGS(resp->not_modified_since),\n\t\t\t\t\t\t\t request.hdr.reqid, LSN_FORMAT_ARGS(request.hdr.lsn), LSN_FORMAT_ARGS(request.hdr.not_modified_since));\n\t\t\t\t\t}\n\t\t\t\t}\n\t\t\t\tereport(ERROR,\n\t\t\t\t\t\t(errcode(ERRCODE_IO_ERROR),\n\t\t\t\t\t\t errmsg(NEON_TAG \"[reqid \" UINT64_HEX_FORMAT \"] could not read relation size of rel %u/%u/%u.%u from page server at lsn %X/%08X\",\n\t\t\t\t\t\t\t\tresp->reqid,\n\t\t\t\t\t\t\t\tRelFileInfoFmt(rinfo),\n\t\t\t\t\t\t\t\tforknum,\n\t\t\t\t\t\t\t\tLSN_FORMAT_ARGS(request_lsns->effective_request_lsn)),\n\t\t\t\t\t\t errdetail(\"page server returned error: %s\",\n\t\t\t\t\t\t\t\t   ((NeonErrorResponse *) resp)->message)));\n\t\t\t\tbreak;\n\n\t\t\tdefault:\n\t\t\t\tNEON_PANIC_CONNECTION_STATE(0, PANIC,\n\t\t\t\t\t\t\t\t\t\t\t\"Expected Nblocks (0x%02x) or Error (0x%02x) response to NblocksRequest, but got 0x%02x\",\n\t\t\t\t\t\t\t\t\t\t\tT_NeonNblocksResponse, T_NeonErrorResponse, resp->tag);\n\t\t}\n\n\t\tpfree(resp);\n\t}\n\treturn n_blocks;\n}\n\n/*\n *\tneon_db_size() -- Get the size of the database in bytes.\n */\nint64\ncommunicator_dbsize(Oid dbNode, neon_request_lsns *request_lsns)\n{\n\tNeonResponse *resp;\n\tint64\t\tdb_size;\n\n\t{\n\t\tNeonDbSizeRequest request = {\n\t\t\t.hdr.tag = T_NeonDbSizeRequest,\n\t\t\t.hdr.lsn = request_lsns->request_lsn,\n\t\t\t.hdr.not_modified_since = request_lsns->not_modified_since,\n\t\t\t.dbNode = dbNode,\n\t\t};\n\n\t\tresp = page_server_request(&request);\n\n\t\tswitch (resp->tag)\n\t\t{\n\t\t\tcase T_NeonDbSizeResponse:\n\t\t\t{\n\t\t\t\tNeonDbSizeResponse* dbsize_resp = (NeonDbSizeResponse *) resp;\n\t\t\t\tif (neon_protocol_version >= 3)\n\t\t\t\t{\n\t\t\t\t\tif (!equal_requests(resp, &request.hdr) ||\n\t\t\t\t\t\tdbsize_resp->req.dbNode != dbNode)\n\t\t\t\t\t{\n\t\t\t\t\t\tNEON_PANIC_CONNECTION_STATE(0, PANIC,\n\t\t\t\t\t\t\t\t\t\t\t\t\t\"Unexpect response {reqid=\" UINT64_HEX_FORMAT \",lsn=%X/%08X, since=%X/%08X, dbNode=%u} to get DB size request {reqid=\" UINT64_HEX_FORMAT \",lsn=%X/%08X, since=%X/%08X, dbNode=%u}\",\n\t\t\t\t\t\t\t\t\t\t\t\t\tresp->reqid, LSN_FORMAT_ARGS(resp->lsn), LSN_FORMAT_ARGS(resp->not_modified_since), dbsize_resp->req.dbNode,\n\t\t\t\t\t\t\t\t\t\t\t\t\trequest.hdr.reqid, LSN_FORMAT_ARGS(request.hdr.lsn), LSN_FORMAT_ARGS(request.hdr.not_modified_since), dbNode);\n\t\t\t\t\t}\n\t\t\t\t}\n\t\t\t\tdb_size = dbsize_resp->db_size;\n\t\t\t\tbreak;\n\t\t\t}\n\t\t\tcase T_NeonErrorResponse:\n\t\t\t\tif (neon_protocol_version >= 3)\n\t\t\t\t{\n\t\t\t\t\tif (!equal_requests(resp, &request.hdr))\n\t\t\t\t\t{\n\t\t\t\t\t\telog(WARNING, NEON_TAG \"Error message {reqid=\" UINT64_HEX_FORMAT \",lsn=%X/%08X, since=%X/%08X} doesn't match get DB size request {reqid=\" UINT64_HEX_FORMAT \",lsn=%X/%08X, since=%X/%08X}\",\n\t\t\t\t\t\t\t resp->reqid, LSN_FORMAT_ARGS(resp->lsn), LSN_FORMAT_ARGS(resp->not_modified_since),\n\t\t\t\t\t\t\t request.hdr.reqid, LSN_FORMAT_ARGS(request.hdr.lsn), LSN_FORMAT_ARGS(request.hdr.not_modified_since));\n\t\t\t\t\t}\n\t\t\t\t}\n\t\t\t\tereport(ERROR,\n\t\t\t\t\t\t(errcode(ERRCODE_IO_ERROR),\n\t\t\t\t\t\t errmsg(NEON_TAG \"[reqid \" UINT64_HEX_FORMAT \"] could not read db size of db %u from page server at lsn %X/%08X\",\n\t\t\t\t\t\t\t\tresp->reqid,\n\t\t\t\t\t\t\t\tdbNode, LSN_FORMAT_ARGS(request_lsns->effective_request_lsn)),\n\t\t\t\t\t\t errdetail(\"page server returned error: %s\",\n\t\t\t\t\t\t\t\t   ((NeonErrorResponse *) resp)->message)));\n\t\t\t\tbreak;\n\n\t\t\tdefault:\n\t\t\t\tNEON_PANIC_CONNECTION_STATE(0, PANIC,\n\t\t\t\t\t\t\t\t\t\t\t\"Expected DbSize (0x%02x) or Error (0x%02x) response to DbSizeRequest, but got 0x%02x\",\n\t\t\t\t\t\t\t\t\t\t\tT_NeonDbSizeResponse, T_NeonErrorResponse, resp->tag);\n\t\t}\n\n\t\tpfree(resp);\n\t}\n\treturn db_size;\n}\n\nint\ncommunicator_read_slru_segment(SlruKind kind, int64 segno, neon_request_lsns *request_lsns,\n\t\t\t\t\t\t\t   void *buffer)\n{\n\tint\t\t\tn_blocks;\n\tshardno_t\tshard_no = 0; /* All SLRUs are at shard 0 */\n\tNeonResponse *resp = NULL;\n\tNeonGetSlruSegmentRequest request;\n\n\trequest = (NeonGetSlruSegmentRequest) {\n\t\t.hdr.tag = T_NeonGetSlruSegmentRequest,\n\t\t.hdr.lsn = request_lsns->request_lsn,\n\t\t.hdr.not_modified_since = request_lsns->not_modified_since,\n\t\t.kind = kind,\n\t\t.segno = segno\n\t};\n\n\tconsume_prefetch_responses();\n\n\tPG_TRY();\n\t{\n\t\tbefore_shmem_exit(prefetch_on_exit, Int32GetDatum(shard_no));\n\t\tdo\n\t\t{\n\t\t\twhile (!page_server->send(shard_no, &request.hdr) || !page_server->flush(shard_no));\n\t\t\tresp = page_server->receive(shard_no);\n\t\t} while (resp == NULL);\n\t\tcancel_before_shmem_exit(prefetch_on_exit, Int32GetDatum(shard_no));\n\t}\n\tPG_CATCH();\n\t{\n\t\tcancel_before_shmem_exit(prefetch_on_exit, Int32GetDatum(shard_no));\n\t\t/* Nothing should cancel disconnect: we should not leave connection in opaque state */\n\t\tHOLD_INTERRUPTS();\n\t\tpage_server->disconnect(shard_no);\n\t\tRESUME_INTERRUPTS();\n\n\t\tPG_RE_THROW();\n\t}\n\tPG_END_TRY();\n\n\tswitch (resp->tag)\n\t{\n\t\tcase T_NeonGetSlruSegmentResponse:\n\t\t{\n\t\t\tNeonGetSlruSegmentResponse* slru_resp = (NeonGetSlruSegmentResponse *) resp;\n\t\t\tif (neon_protocol_version >= 3)\n\t\t\t{\n\t\t\t\tif (!equal_requests(resp, &request.hdr) ||\n\t\t\t\t\tslru_resp->req.kind != kind ||\n\t\t\t\t\tslru_resp->req.segno != segno)\n\t\t\t\t{\n\t\t\t\t\tNEON_PANIC_CONNECTION_STATE(0, PANIC,\n\t\t\t\t\t\t\t\t\t\t\t\t\"Unexpect response {reqid=\" UINT64_HEX_FORMAT \",lsn=%X/%08X, since=%X/%08X, kind=%u, segno=%u} to get SLRU segment request {reqid=\" UINT64_HEX_FORMAT \",lsn=%X/%08X, since=%X/%08X, kind=%u, segno=%lluu}\",\n\t\t\t\t\t\t\t\t\t\t\t\tresp->reqid, LSN_FORMAT_ARGS(resp->lsn), LSN_FORMAT_ARGS(resp->not_modified_since), slru_resp->req.kind, slru_resp->req.segno,\n\t\t\t\t\t\t\t\t\t\t\t\trequest.hdr.reqid, LSN_FORMAT_ARGS(request.hdr.lsn), LSN_FORMAT_ARGS(request.hdr.not_modified_since), kind, (unsigned long long) segno);\n\t\t\t\t}\n\t\t\t}\n\t\t\tn_blocks = slru_resp->n_blocks;\n\t\t\tmemcpy(buffer, slru_resp->data, n_blocks*BLCKSZ);\n\t\t\tbreak;\n\t\t}\n\t\tcase T_NeonErrorResponse:\n\t\t\tif (neon_protocol_version >= 3)\n\t\t\t{\n\t\t\t\tif (!equal_requests(resp, &request.hdr))\n\t\t\t\t{\n\t\t\t\t\telog(WARNING, NEON_TAG \"Error message {reqid=\" UINT64_HEX_FORMAT \",lsn=%X/%08X, since=%X/%08X} doesn't match get SLRU segment request {reqid=\" UINT64_HEX_FORMAT \",lsn=%X/%08X, since=%X/%08X}\",\n\t\t\t\t\t\t resp->reqid, LSN_FORMAT_ARGS(resp->lsn), LSN_FORMAT_ARGS(resp->not_modified_since),\n\t\t\t\t\t\t request.hdr.reqid, LSN_FORMAT_ARGS(request.hdr.lsn), LSN_FORMAT_ARGS(request.hdr.not_modified_since));\n\t\t\t\t}\n\t\t\t}\n\t\t\tereport(ERROR,\n\t\t\t\t\t(errcode(ERRCODE_IO_ERROR),\n\t\t\t\t\t errmsg(NEON_TAG \"[reqid \" UINT64_HEX_FORMAT \"] could not read SLRU %d segment %llu at lsn %X/%08X\",\n\t\t\t\t\t\t\tresp->reqid,\n\t\t\t\t\t\t\tkind,\n\t\t\t\t\t\t\t(unsigned long long) segno,\n\t\t\t\t\t\t\tLSN_FORMAT_ARGS(request_lsns->request_lsn)),\n\t\t\t\t\t errdetail(\"page server returned error: %s\",\n\t\t\t\t\t\t\t   ((NeonErrorResponse *) resp)->message)));\n\t\t\tbreak;\n\n\t\tdefault:\n\t\t\tNEON_PANIC_CONNECTION_STATE(0, PANIC,\n\t\t\t\t\t\t\t\t\t\t\"Expected GetSlruSegment (0x%02x) or Error (0x%02x) response to GetSlruSegmentRequest, but got 0x%02x\",\n\t\t\t\t\t\t\t\t\t\tT_NeonGetSlruSegmentResponse, T_NeonErrorResponse, resp->tag);\n\t}\n\tpfree(resp);\n\n\tcommunicator_reconfigure_timeout_if_needed();\n\treturn n_blocks;\n}\n\nvoid\ncommunicator_reconfigure_timeout_if_needed(void)\n{\n\tbool\tneeds_set = MyPState->ring_receive != MyPState->ring_unused &&\n\t\t\t\t\t\t!AmPrewarmWorker && /* do not pump prefetch state in prewarm worker */\n\t\t\t\t\t\treadahead_getpage_pull_timeout_ms > 0;\n\n\tif (needs_set != timeout_set)\n\t{\n\t\t/* The background writer doens't (shouldn't) read any pages */\n\t\tAssert(!AmBackgroundWriterProcess());\n\t\t/* The checkpointer doens't (shouldn't) read any pages */\n\t\tAssert(!AmCheckpointerProcess());\n\n\t\tif (unlikely(PS_TIMEOUT_ID == 0))\n\t\t{\n\t\t\tPS_TIMEOUT_ID = RegisterTimeout(USER_TIMEOUT, pagestore_timeout_handler);\n\t\t}\n\n\t\tif (needs_set)\n\t\t{\n#if PG_MAJORVERSION_NUM <= 14\n\t\t\tenable_timeout_after(PS_TIMEOUT_ID, readahead_getpage_pull_timeout_ms);\n#else\n\t\t\tenable_timeout_every(\n\t\t\t\tPS_TIMEOUT_ID,\n\t\t\t\tTimestampTzPlusMilliseconds(GetCurrentTimestamp(),\n\t\t\t\t\t\t\t\t\t\t\treadahead_getpage_pull_timeout_ms),\n\t\t\t\treadahead_getpage_pull_timeout_ms\n\t\t\t);\n#endif\n\t\t\ttimeout_set = true;\n\t\t}\n\t\telse\n\t\t{\n\t\t\tAssert(timeout_set);\n\t\t\tdisable_timeout(PS_TIMEOUT_ID, false);\n\t\t\ttimeout_set = false;\n\t\t}\n\t}\n}\n\nstatic void\npagestore_timeout_handler(void)\n{\n#if PG_MAJORVERSION_NUM <= 14\n\t/*\n\t * PG14: Setting a repeating timeout is not possible, so we signal here\n\t * that the timeout has already been reset, and by telling the system\n\t * that system will re-schedule it later if we need to.\n\t */\n\ttimeout_set = false;\n#endif\n\ttimeout_signaled = true;\n\tInterruptPending = true;\n}\n\n/*\n * Process new data received in our active PageStream sockets.\n *\n * This relies on the invariant that all pipelined yet-to-be-received requests\n * are getPage requests managed by MyPState. This is currently true, any\n * modification will probably require some stuff to make it work again.\n */\nstatic bool\ncommunicator_processinterrupts(void)\n{\n\tif (timeout_signaled)\n\t{\n\t\tif (!readpage_reentrant_guard && readahead_getpage_pull_timeout_ms > 0)\n\t\t\tcommunicator_prefetch_pump_state();\n\n\t\ttimeout_signaled = false;\n\t\tcommunicator_reconfigure_timeout_if_needed();\n\t}\n\n\tif (!prev_interrupt_cb)\n\t\treturn false;\n\n\treturn prev_interrupt_cb();\n}\n"
  },
  {
    "path": "pgxn/neon/communicator.h",
    "content": "/*-------------------------------------------------------------------------\n *\n * communicator.h\n *\t  internal interface for communicating with remote pageservers\n *\n *\n * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group\n * Portions Copyright (c) 1994, Regents of the University of California\n *\n *-------------------------------------------------------------------------\n */\n#ifndef COMMUNICATOR_h\n#define COMMUNICATOR_h\n\n#include \"neon_pgversioncompat.h\"\n\n#include \"storage/buf_internals.h\"\n\n#include \"pagestore_client.h\"\n\n/* initialization at postmaster startup */\nextern void pg_init_communicator(void);\n\n/* initialization at backend startup */\nextern void communicator_init(void);\n\nextern bool communicator_exists(NRelFileInfo rinfo, ForkNumber forkNum,\n\t\t\t\t\t\t\t\tneon_request_lsns *request_lsns);\nextern BlockNumber communicator_nblocks(NRelFileInfo rinfo, ForkNumber forknum,\n\t\t\t\t\t\t\t\t\t\tneon_request_lsns *request_lsns);\nextern int64 communicator_dbsize(Oid dbNode, neon_request_lsns *request_lsns);\nextern void communicator_read_at_lsnv(NRelFileInfo rinfo, ForkNumber forkNum,\n\t\t\t\t\t\t\t\t\t  BlockNumber base_blockno, neon_request_lsns *request_lsns,\n\t\t\t\t\t\t\t\t\t  void **buffers, BlockNumber nblocks, const bits8 *mask);\nextern int communicator_prefetch_lookupv(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blocknum,\n\t\t\t\t\t\t\t\t\t\t neon_request_lsns *lsns,\n\t\t\t\t\t\t\t\t\t\t BlockNumber nblocks, void **buffers, bits8 *mask);\nextern void communicator_prefetch_register_bufferv(BufferTag tag, neon_request_lsns *frlsns,\n\t\t\t\t\t\t\t\t\t\t\t\t   BlockNumber nblocks, const bits8 *mask);\nextern bool communicator_prefetch_receive(BufferTag tag);\n\nextern int communicator_read_slru_segment(SlruKind kind, int64 segno,\n\t\t\t\t\t\t\t\t\t\t  neon_request_lsns *request_lsns,\n\t\t\t\t\t\t\t\t\t\t  void *buffer);\n\nextern void communicator_reconfigure_timeout_if_needed(void);\nextern void communicator_prefetch_pump_state(void);\n\n\n#endif\n"
  },
  {
    "path": "pgxn/neon/communicator_process.c",
    "content": "/*-------------------------------------------------------------------------\n *\n * communicator_process.c\n *\t  Functions for starting up the communicator background worker process.\n *\n * Currently, the communicator process only functions as a metrics\n * exporter. It provides an HTTP endpoint for polling a limited set of\n * metrics. TODO: In the future, it will do much more, i.e. handle all\n * the communications with the pageservers.\n *\n * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group\n * Portions Copyright (c) 1994, Regents of the University of California\n *\n *-------------------------------------------------------------------------\n */\n#include \"postgres.h\"\n\n#include <unistd.h>\n\n#include \"miscadmin.h\"\n#include \"postmaster/bgworker.h\"\n#include \"postmaster/interrupt.h\"\n#include \"postmaster/postmaster.h\"\n#include \"replication/walsender.h\"\n#include \"storage/ipc.h\"\n#include \"storage/latch.h\"\n#include \"storage/pmsignal.h\"\n#include \"storage/procsignal.h\"\n#include \"tcop/tcopprot.h\"\n#include \"utils/timestamp.h\"\n\n#include \"communicator_process.h\"\n#include \"file_cache.h\"\n#include \"neon.h\"\n#include \"neon_perf_counters.h\"\n\n/* the rust bindings, generated by cbindgen */\n#include \"communicator/communicator_bindings.h\"\n\nstatic void pump_logging(struct LoggingReceiver *logging);\nPGDLLEXPORT void communicator_new_bgworker_main(Datum main_arg);\n\n/**** Initialization functions. These run in postmaster ****/\n\nvoid\npg_init_communicator_process(void)\n{\n\tBackgroundWorker bgw;\n\n\t/* Initialize the background worker process */\n\tmemset(&bgw, 0, sizeof(bgw));\n\tbgw.bgw_flags = BGWORKER_SHMEM_ACCESS;\n\tbgw.bgw_start_time = BgWorkerStart_PostmasterStart;\n\tsnprintf(bgw.bgw_library_name, BGW_MAXLEN, \"neon\");\n\tsnprintf(bgw.bgw_function_name, BGW_MAXLEN, \"communicator_new_bgworker_main\");\n\tsnprintf(bgw.bgw_name, BGW_MAXLEN, \"Storage communicator process\");\n\tsnprintf(bgw.bgw_type, BGW_MAXLEN, \"Storage communicator process\");\n\tbgw.bgw_restart_time = 5;\n\tbgw.bgw_notify_pid = 0;\n\tbgw.bgw_main_arg = (Datum) 0;\n\n\tRegisterBackgroundWorker(&bgw);\n}\n\n/**** Worker process functions. These run in the communicator worker process ****/\n\n/*\n * Entry point for the communicator bgworker process\n */\nvoid\ncommunicator_new_bgworker_main(Datum main_arg)\n{\n\tstruct LoggingReceiver *logging;\n\tconst char *errmsg = NULL;\n\tconst struct CommunicatorWorkerProcessStruct *proc_handle;\n\n\t/*\n\t * Pretend that this process is a WAL sender. That affects the shutdown\n\t * sequence: WAL senders are shut down last, after the final checkpoint\n\t * has been written. That's what we want for the communicator process too.\n\t */\n\tam_walsender = true;\n\tMarkPostmasterChildWalSender();\n\n\t/* Establish signal handlers. */\n\tpqsignal(SIGUSR1, procsignal_sigusr1_handler);\n\t/*\n\t * Postmaster sends us SIGUSR2 when all regular backends and bgworkers\n\t * have exited, and it's time for us to exit too\n\t */\n\tpqsignal(SIGUSR2, die);\n\tpqsignal(SIGHUP, SignalHandlerForConfigReload);\n\tpqsignal(SIGTERM, die);\n\n\tBackgroundWorkerUnblockSignals();\n\n\t/*\n\t * By default, INFO messages are not printed to the log. We want\n\t * `tracing::info!` messages emitted from the communicator to be printed,\n\t * however, so increase the log level.\n\t *\n\t * XXX: This overrides any user-set value from the config file. That's not\n\t * great, but on the other hand, there should be little reason for user to\n\t * control the verbosity of the communicator. It's not too verbose by\n\t * default.\n\t */\n\tSetConfigOption(\"log_min_messages\", \"INFO\", PGC_SUSET, PGC_S_OVERRIDE);\n\n\tlogging = communicator_worker_configure_logging();\n\n\tproc_handle = communicator_worker_launch(\n\t\tneon_tenant[0] == '\\0' ? NULL : neon_tenant,\n\t\tneon_timeline[0] == '\\0' ? NULL : neon_timeline,\n\t\t&errmsg\n\t\t);\n\tif (proc_handle == NULL)\n\t{\n\t\t/*\n\t\t * Something went wrong. Before exiting, forward any log messages that\n\t\t * might've been generated during the failed launch.\n\t\t */\n\t\tpump_logging(logging);\n\n\t\telog(PANIC, \"%s\", errmsg);\n\t}\n\n\t/*\n\t * The Rust tokio runtime has been launched, and it's running in the\n\t * background now. This loop in the main thread handles any interactions\n\t * we need with the rest of PostgreSQL.\n\t *\n\t * NB: This process is now multi-threaded! The Rust threads do not call\n\t * into any Postgres functions, but it's not entirely clear which Postgres\n\t * functions are safe to call from this main thread either. Be very\n\t * careful about adding anything non-trivial here.\n\t *\n\t * Also note that we try to react quickly to any log messages arriving\n\t * from the Rust thread. Be careful to not do anything too expensive here\n\t * that might cause delays.\n\t */\n\telog(LOG, \"communicator threads started\");\n\tfor (;;)\n\t{\n\t\tTimestampTz before;\n\t\tlong\t\tduration;\n\n\t\tResetLatch(MyLatch);\n\n\t\t/*\n\t\t * Forward any log messages from the Rust threads into the normal\n\t\t * Postgres logging facility.\n\t\t */\n\t\tpump_logging(logging);\n\n\t\t/*\n\t\t * Check interrupts like system shutdown or config reload\n\t\t *\n\t\t * We mustn't block for too long within this loop, or we risk the log\n\t\t * queue to fill up and messages to be lost. Also, even if we can keep\n\t\t * up, if there's a long delay between sending a message and printing\n\t\t * it to the log, the timestamps on the messages get skewed, which is\n\t\t * confusing.\n\t\t *\n\t\t * We expect processing interrupts to happen fast enough that it's OK,\n\t\t * but measure it just in case, and print a warning if it takes longer\n\t\t * than 100 ms.\n\t\t */\n#define LOG_SKEW_WARNING_MS\t\t\t100\n\t\tbefore = GetCurrentTimestamp();\n\n\t\tCHECK_FOR_INTERRUPTS();\n\t\tif (ConfigReloadPending)\n\t\t{\n\t\t\tConfigReloadPending = false;\n\t\t\tProcessConfigFile(PGC_SIGHUP);\n\t\t}\n\n\t\tduration = TimestampDifferenceMilliseconds(before, GetCurrentTimestamp());\n\t\tif (duration > LOG_SKEW_WARNING_MS)\n\t\t\telog(WARNING, \"handling interrupts took %ld ms, communicator log timestamps might be skewed\", duration);\n\n\t\t/*\n\t\t * Wait until we are woken up. The rust threads will set the latch\n\t\t * when there's a log message to forward.\n\t\t */\n\t\t(void) WaitLatch(MyLatch,\n\t\t\t\t\t\t WL_LATCH_SET | WL_EXIT_ON_PM_DEATH,\n\t\t\t\t\t\t 0,\n\t\t\t\t\t\t PG_WAIT_EXTENSION);\n\t}\n}\n\nstatic void\npump_logging(struct LoggingReceiver *logging)\n{\n\tchar\t\terrbuf[1000];\n\tint\t\t\televel;\n\tint32\t\trc;\n\tstatic uint64_t last_dropped_event_count = 0;\n\tuint64_t\t\tdropped_event_count;\n\tuint64_t\t\tdropped_now;\n\n\tfor (;;)\n\t{\n\t\trc = communicator_worker_poll_logging(logging,\n\t\t\t\t\t\t\t\t\t\t\t  errbuf,\n\t\t\t\t\t\t\t\t\t\t\t  sizeof(errbuf),\n\t\t\t\t\t\t\t\t\t\t\t  &elevel,\n\t\t\t\t\t\t\t\t\t\t\t  &dropped_event_count);\n\t\tif (rc == 0)\n\t\t{\n\t\t\t/* nothing to do */\n\t\t\tbreak;\n\t\t}\n\t\telse if (rc == 1)\n\t\t{\n\t\t\t/* Because we don't want to exit on error */\n\n\t\t\tif (message_level_is_interesting(elevel))\n\t\t\t{\n\t\t\t\t/*\n\t\t\t\t * Prevent interrupts while cleaning up.\n\t\t\t\t *\n\t\t\t\t * (Not sure if this is required, but all the error handlers\n\t\t\t\t * in Postgres that are installed as sigsetjmp() targets do\n\t\t\t\t * this, so let's follow the example)\n\t\t\t\t */\n\t\t\t\tHOLD_INTERRUPTS();\n\n\t\t\t\terrstart(elevel, TEXTDOMAIN);\n\t\t\t\terrmsg_internal(\"[COMMUNICATOR] %s\", errbuf);\n\t\t\t\tEmitErrorReport();\n\t\t\t\tFlushErrorState();\n\n\t\t\t\t/* Now we can allow interrupts again */\n\t\t\t\tRESUME_INTERRUPTS();\n\t\t\t}\n\t\t}\n\t\telse if (rc == -1)\n\t\t{\n\t\t\telog(ERROR, \"logging channel was closed unexpectedly\");\n\t\t}\n\t}\n\n\t/*\n\t * If the queue was full at any time since the last time we reported it,\n\t * report how many messages were lost. We do this outside the loop, so\n\t * that if the logging system is clogged, we don't exacerbate it by\n\t * printing lots of warnings about dropped messages.\n\t */\n\tdropped_now = dropped_event_count - last_dropped_event_count;\n\tif (dropped_now != 0)\n\t{\n\t\telog(WARNING, \"%lu communicator log messages were dropped because the log buffer was full\",\n\t\t\t (unsigned long) dropped_now);\n\t\tlast_dropped_event_count = dropped_event_count;\n\t}\n}\n\n/****\n * Callbacks from the rust code, in the communicator process.\n *\n * NOTE: These must be thread-safe! It's very limited which PostgreSQL\n * functions you can use!!!\n *\n * The signatures of these need to match those in the Rust code.\n */\n\nvoid\ncallback_set_my_latch_unsafe(void)\n{\n\tSetLatch(MyLatch);\n}\n"
  },
  {
    "path": "pgxn/neon/communicator_process.h",
    "content": "/*-------------------------------------------------------------------------\n *\n * communicator_process.h\n *\t\tCommunicator process\n *\n *\n * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group\n * Portions Copyright (c) 1994, Regents of the University of California\n *\n *-------------------------------------------------------------------------\n */\n#ifndef COMMUNICATOR_PROCESS_H\n#define COMMUNICATOR_PROCESS_H\n\nextern void pg_init_communicator_process(void);\n\n#endif\t\t\t/* COMMUNICATOR_PROCESS_H */\n"
  },
  {
    "path": "pgxn/neon/extension_server.c",
    "content": "/*-------------------------------------------------------------------------\n *\n * extension_server.c\n *\t  Request compute_ctl to download extension files.\n *\n *-------------------------------------------------------------------------\n */\n#include \"postgres.h\"\n\n#include <curl/curl.h>\n\n#include \"utils/guc.h\"\n\n#include \"extension_server.h\"\n#include \"neon_utils.h\"\n\nint\thadron_extension_server_port = 0;\nstatic int\textension_server_request_timeout = 60;\nstatic int\textension_server_connect_timeout = 60;\n\nstatic download_extension_file_hook_type prev_download_extension_file_hook = NULL;\n\n/*\n  * to download all SQL (and data) files for an extension:\n  * curl -X POST http://localhost:8080/extension_server/postgis\n  * it covers two possible extension files layouts:\n  * 1. extension_name--version--platform.sql\n  * 2. extension_name/extension_name--version.sql\n  *    extension_name/extra_files.csv\n  * to download specific library file:\n  * curl -X POST http://localhost:8080/extension_server/postgis-3.so?is_library=true\n  */\nstatic bool\nneon_download_extension_file_http(const char *filename, bool is_library)\n{\n\tCURLcode\tres;\n\tbool\t\tret = false;\n\tCURL\t   *handle = NULL;\n\tchar\t   *compute_ctl_url;\n\n\thandle = alloc_curl_handle();\n\n\tcurl_easy_setopt(handle, CURLOPT_CUSTOMREQUEST, \"POST\");\n\tif (extension_server_request_timeout > 0)\n\t\tcurl_easy_setopt(handle, CURLOPT_TIMEOUT, (long)extension_server_request_timeout /* seconds */ );\n\tif (extension_server_connect_timeout > 0)\n\t\tcurl_easy_setopt(handle, CURLOPT_CONNECTTIMEOUT, (long)extension_server_connect_timeout /* seconds */ );\n\n\tcompute_ctl_url = psprintf(\"http://localhost:%d/extension_server/%s%s\",\n\t\t\t\t\t\t\t   hadron_extension_server_port, filename, is_library ? \"?is_library=true\" : \"\");\n\n\telog(LOG, \"Sending request to compute_ctl: %s\", compute_ctl_url);\n\n\tcurl_easy_setopt(handle, CURLOPT_URL, compute_ctl_url);\n\n\t/* Perform the request, res will get the return code */\n\tres = curl_easy_perform(handle);\n\tcurl_easy_cleanup(handle);\n\n\t/* Check for errors */\n\tif (res == CURLE_OK)\n\t{\n\t\tret = true;\n\t}\n\telse\n\t{\n\t\t/*\n\t\t * Don't error here because postgres will try to find the file and will\n\t\t * fail with some proper error message if it's not found.\n\t\t */\n\t\telog(WARNING, \"neon_download_extension_file_http failed: %s\\n\", curl_easy_strerror(res));\n\t}\n\n\treturn ret;\n}\n\nvoid\npg_init_extension_server()\n{\n\t/* Port to connect to compute_ctl on localhost */\n\t/* to request extension files. */\n\tDefineCustomIntVariable(\"neon.extension_server_port\",\n\t\t\t\t\t\t\t\"connection string to the compute_ctl\",\n\t\t\t\t\t\t\tNULL,\n\t\t\t\t\t\t\t&hadron_extension_server_port,\n\t\t\t\t\t\t\t0, 0, INT_MAX,\n\t\t\t\t\t\t\tPGC_POSTMASTER,\n\t\t\t\t\t\t\t0,\t/* no flags required */\n\t\t\t\t\t\t\tNULL, NULL, NULL);\n\n\tDefineCustomIntVariable(\"neon.extension_server_request_timeout\",\n\t\t\t\t\t\t\t\"timeout for fetching extensions in seconds\",\n\t\t\t\t\t\t\tNULL,\n\t\t\t\t\t\t\t&extension_server_request_timeout,\n\t\t\t\t\t\t\t60, 0, INT_MAX,\n\t\t\t\t\t\t\tPGC_SUSET,\n\t\t\t\t\t\t\tGUC_UNIT_S,\n\t\t\t\t\t\t\tNULL, NULL, NULL);\n\n\tDefineCustomIntVariable(\"neon.extension_server_connect_timeout\",\n\t\t\t\t\t\t\t\"timeout for connecting to the extension server in seconds\",\n\t\t\t\t\t\t\tNULL,\n\t\t\t\t\t\t\t&extension_server_connect_timeout,\n\t\t\t\t\t\t\t60, 0, INT_MAX,\n\t\t\t\t\t\t\tPGC_SUSET,\n\t\t\t\t\t\t\tGUC_UNIT_S,\n\t\t\t\t\t\t\tNULL, NULL, NULL);\n\n\t/* set download_extension_file_hook */\n\tprev_download_extension_file_hook = download_extension_file_hook;\n\tdownload_extension_file_hook = neon_download_extension_file_http;\n}\n"
  },
  {
    "path": "pgxn/neon/extension_server.h",
    "content": "/*-------------------------------------------------------------------------\n *\n * extension_server.h\n *\t  Request compute_ctl to download extension files.\n *\n *-------------------------------------------------------------------------\n */\n\n#ifndef EXTENSION_SERVER_H\n#define EXTENSION_SERVER_H\n\nvoid pg_init_extension_server(void);\n\n#endif\t\t\t\t\t\t\t/* EXTENSION_SERVER_H */\n"
  },
  {
    "path": "pgxn/neon/file_cache.c",
    "content": "/*-------------------------------------------------------------------------\n *\n * file_cache.c\n *\n *\n * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group\n * Portions Copyright (c) 1994, Regents of the University of California\n *\n *-------------------------------------------------------------------------\n */\n\n#include \"postgres.h\"\n\n#include <sys/file.h>\n#include <unistd.h>\n#include <fcntl.h>\n\n#include \"neon_pgversioncompat.h\"\n\n#include \"access/parallel.h\"\n#include \"access/xlog.h\"\n#include \"funcapi.h\"\n#include \"miscadmin.h\"\n#include \"common/hashfn.h\"\n#include \"pgstat.h\"\n#include \"port/pg_iovec.h\"\n#include \"postmaster/bgworker.h\"\n#include \"postmaster/interrupt.h\"\n#include RELFILEINFO_HDR\n#include \"storage/buf_internals.h\"\n#include \"storage/fd.h\"\n#include \"storage/ipc.h\"\n#include \"storage/latch.h\"\n#include \"storage/lwlock.h\"\n#include \"storage/pg_shmem.h\"\n#include \"storage/procsignal.h\"\n#include \"tcop/tcopprot.h\"\n#include \"utils/builtins.h\"\n#include \"utils/dynahash.h\"\n#include \"utils/guc.h\"\n\n#if PG_VERSION_NUM >= 150000\n#include \"access/xlogrecovery.h\"\n#endif\n\n#include \"hll.h\"\n#include \"bitmap.h\"\n#include \"file_cache.h\"\n#include \"neon.h\"\n#include \"neon_lwlsncache.h\"\n#include \"neon_perf_counters.h\"\n#include \"neon_utils.h\"\n#include \"pagestore_client.h\"\n#include \"communicator.h\"\n\n#include \"communicator/communicator_bindings.h\"\n\n#define CriticalAssert(cond) do if (!(cond)) elog(PANIC, \"LFC: assertion %s failed at %s:%d: \", #cond, __FILE__, __LINE__); while (0)\n\n/*\n * Local file cache is used to temporary store relations pages in local file system.\n * All blocks of all relations are stored inside one file and addressed using shared hash map.\n * Currently LRU eviction policy based on L2 list is used as replacement algorithm.\n * As far as manipulation of L2-list requires global critical section, we are not using partitioned hash.\n * Also we are using exclusive lock even for read operation because LRU requires relinking element in L2 list.\n * If this lock become a bottleneck, we can consider other eviction strategies, for example clock algorithm.\n *\n * Cache is always reconstructed at node startup, so we do not need to save mapping somewhere and worry about\n * its consistency.\n\n *\n * ## Holes\n *\n * The LFC can be resized on the fly, up to a maximum size that's determined\n * at server startup (neon.max_file_cache_size). After server startup, we\n * expand the underlying file when needed, until it reaches the soft limit\n * (neon.file_cache_size_limit). If the soft limit is later reduced, we shrink\n * the LFC by punching holes in the underlying file with a\n * fallocate(FALLOC_FL_PUNCH_HOLE) call. The nominal size of the file doesn't\n * shrink, but the disk space it uses does.\n *\n * Each hole is tracked by a dummy FileCacheEntry, which are kept in the\n * 'holes' linked list. They are entered into the chunk hash table, with a\n * special key where the blockNumber is used to store the 'offset' of the\n * hole, and all other fields are zero. Holes are never looked up in the hash\n * table, we only enter them there to have a FileCacheEntry that we can keep\n * in the linked list. If the soft limit is raised again, we reuse the holes\n * before extending the nominal size of the file.\n */\n\n/* Local file storage allocation chunk.\n * Should be power of two. Using larger than page chunks can\n * 1. Reduce hash-map memory footprint: 8TB database contains billion pages\n *    and size of hash entry is 40 bytes, so we need 40Gb just for hash map.\n *    1Mb chunks can reduce hash map size to 320Mb.\n * 2. Improve access locality, subsequent pages will be allocated together improving seqscan speed\n */\n#define MAX_BLOCKS_PER_CHUNK_LOG  7 /* 1Mb chunk */\n#define MAX_BLOCKS_PER_CHUNK\t  (1 << MAX_BLOCKS_PER_CHUNK_LOG)\n\n#define MB\t\t\t\t\t((uint64)1024*1024)\n\n#define SIZE_MB_TO_CHUNKS(size) ((uint32)((size) * MB / BLCKSZ >> lfc_chunk_size_log))\n#define BLOCK_TO_CHUNK_OFF(blkno) ((blkno) & (lfc_blocks_per_chunk-1))\n\n/*\n * Blocks are read or written to LFC file outside LFC critical section.\n * To synchronize access to such block, writer set state of such block to PENDING.\n * If some other backend (read or writer) see PENDING status, it change it to REQUESTED and start\n * waiting until status is changed on conditional variable.\n * When writer completes is operation, it checks if status is REQUESTED and if so, broadcast conditional variable,\n * waking up all backend waiting for access to this block.\n */\ntypedef enum FileCacheBlockState\n{\n\tUNAVAILABLE, /* block is not present in cache */\n\tAVAILABLE,   /* block can be used */\n\tPENDING,     /* block is loaded */\n\tREQUESTED    /* some other backend is waiting for block to be loaded */\n} FileCacheBlockState;\n\n\ntypedef struct FileCacheEntry\n{\n\tBufferTag\tkey;\n\tuint32\t\thash;\n\tuint32\t\toffset;\n\tuint32\t\taccess_count;\n\tdlist_node\tlist_node;\t\t/* LRU/holes list node */\n\tuint32\t\tstate[FLEXIBLE_ARRAY_MEMBER]; /* two bits per block */\n} FileCacheEntry;\n\n#define FILE_CACHE_ENRTY_SIZE MAXALIGN(offsetof(FileCacheEntry, state) + (lfc_blocks_per_chunk*2+31)/32*4)\n#define GET_STATE(entry, i) (((entry)->state[(i) / 16] >> ((i) % 16 * 2)) & 3)\n#define SET_STATE(entry, i, new_state) (entry)->state[(i) / 16] = ((entry)->state[(i) / 16] & ~(3 << ((i) % 16 * 2))) | ((new_state) << ((i) % 16 * 2))\n\n#define N_COND_VARS \t64\n#define CV_WAIT_TIMEOUT\t10\n\n#define MAX_PREWARM_WORKERS 8\n\ntypedef struct PrewarmWorkerState\n{\n\tuint32\t\tprewarmed_pages;\n\tuint32\t\tskipped_pages;\n\tTimestampTz completed;\n} PrewarmWorkerState;\n\ntypedef struct FileCacheControl\n{\n\tuint64\t\tgeneration;\t\t/* generation is needed to handle correct hash\n\t\t\t\t\t\t\t\t * reenabling */\n\tuint32\t\tsize;\t\t\t/* size of cache file in chunks */\n\tuint32\t\tused;\t\t\t/* number of used chunks */\n\tuint32\t\tused_pages;\t\t/* number of used pages */\n\tuint32\t\tpinned;\t\t\t/* number of pinned chunks */\n\tuint32\t\tlimit;\t\t\t/* shared copy of lfc_size_limit */\n\tuint64\t\thits;\n\tuint64\t\tmisses;\n\tuint64\t\twrites;\t\t\t/* number of writes issued */\n\tuint64\t\ttime_read;\t\t/* time spent reading (us) */\n\tuint64\t\ttime_write;\t\t/* time spent writing (us) */\n\tuint64\t\tresizes;        /* number of LFC resizes   */\n\tuint64\t\tevicted_pages;\t/* number of evicted pages */\n\tdlist_head\tlru;\t\t\t/* double linked list for LRU replacement\n\t\t\t\t\t\t\t\t * algorithm */\n\tdlist_head  holes;          /* double linked list of punched holes */\n\n\tConditionVariable cv[N_COND_VARS]; /* turnstile of condition variables */\n\n\t/*\n\t * Estimation of working set size.\n\t *\n\t * This is not guarded by the lock. No locking is needed because all the\n\t * writes to the \"registers\" are simple 64-bit stores, to update a\n\t * timestamp. We assume that:\n\t *\n\t * - 64-bit stores are atomic. We could enforce that by using\n\t *   pg_atomic_uint64 instead of TimestampTz as the datatype in hll.h, but\n\t *   for now we just rely on it implicitly.\n\t *\n\t * - Even if they're not, and there is a race between two stores, it\n\t *   doesn't matter much which one wins because they're both updating the\n\t *   register with the current timestamp. Or you have a race between\n\t *   resetting the register and updating it, in which case it also doesn't\n\t *   matter much which one wins.\n\t *\n\t * - If they're not atomic, you might get an occasional \"torn write\" if\n\t *   you're really unlucky, but we tolerate that too. It just means that\n\t *   the estimate will be a little off, until the register is updated\n\t *   again.\n\t */\n\tHyperLogLogState wss_estimation;\n\n\t/* Prewarmer state */\n\tPrewarmWorkerState prewarm_workers[MAX_PREWARM_WORKERS];\n\tsize_t n_prewarm_workers;\n\tsize_t n_prewarm_entries;\n\tsize_t total_prewarm_pages;\n\tsize_t prewarm_batch;\n\tbool   prewarm_active;\n\tbool   prewarm_canceled;\n\tdsm_handle prewarm_lfc_state_handle;\n} FileCacheControl;\n\n#define FILE_CACHE_STATE_MAGIC 0xfcfcfcfc\n\n#define FILE_CACHE_STATE_BITMAP(fcs)\t((uint8*)&(fcs)->chunks[(fcs)->n_chunks])\n#define FILE_CACHE_STATE_SIZE_FOR_CHUNKS(n_chunks)\t(sizeof(FileCacheState) + (n_chunks)*sizeof(BufferTag) + (((n_chunks) * lfc_blocks_per_chunk)+7)/8)\n#define FILE_CACHE_STATE_SIZE(fcs)\t\t(sizeof(FileCacheState) + (fcs->n_chunks)*sizeof(BufferTag) + (((fcs->n_chunks) << fcs->chunk_size_log)+7)/8)\n\nstatic HTAB *lfc_hash;\nstatic int\tlfc_desc = -1;\nstatic LWLockId lfc_lock;\nstatic int\tlfc_max_size;\nstatic int\tlfc_size_limit;\nstatic int\tlfc_prewarm_limit;\nstatic int\tlfc_prewarm_batch;\nstatic int\tlfc_chunk_size_log = MAX_BLOCKS_PER_CHUNK_LOG;\nstatic int\tlfc_blocks_per_chunk = MAX_BLOCKS_PER_CHUNK;\nstatic char *lfc_path;\nstatic uint64 lfc_generation;\nstatic FileCacheControl *lfc_ctl;\nstatic bool lfc_do_prewarm;\n\nbool lfc_store_prefetch_result;\nbool lfc_prewarm_update_ws_estimation;\n\nbool AmPrewarmWorker;\n\n#define LFC_ENABLED() (lfc_ctl->limit != 0)\n\nPGDLLEXPORT void lfc_prewarm_main(Datum main_arg);\n\n/*\n * Close LFC file if opened.\n * All backends should close their LFC files once LFC is disabled.\n */\nstatic void\nlfc_close_file(void)\n{\n\tif (lfc_desc >= 0)\n\t{\n\t\tclose(lfc_desc);\n\t\tlfc_desc = -1;\n\t}\n}\n\n/*\n * Local file cache is optional and Neon can work without it.\n * In case of any any errors with this cache, we should disable it but to not throw error.\n * Also we should allow  re-enable it if source of failure (lack of disk space, permissions,...) is fixed.\n * All cache content should be invalidated to avoid reading of stale or corrupted data\n */\nstatic void\nlfc_switch_off(void)\n{\n\tint\t\t\tfd;\n\n\tif (LFC_ENABLED())\n\t{\n\t\tHASH_SEQ_STATUS status;\n\t\tFileCacheEntry *entry;\n\n\t\t/* Invalidate hash */\n\t\thash_seq_init(&status, lfc_hash);\n\t\twhile ((entry = hash_seq_search(&status)) != NULL)\n\t\t{\n\t\t\thash_search_with_hash_value(lfc_hash, &entry->key, entry->hash, HASH_REMOVE, NULL);\n\t\t}\n\t\tlfc_ctl->generation += 1;\n\t\tlfc_ctl->size = 0;\n\t\tlfc_ctl->pinned = 0;\n\t\tlfc_ctl->used = 0;\n\t\tlfc_ctl->used_pages = 0;\n\t\tlfc_ctl->limit = 0;\n\t\tdlist_init(&lfc_ctl->lru);\n\t\tdlist_init(&lfc_ctl->holes);\n\n\t\t/*\n\t\t * We need to use unlink to to avoid races in LFC write, because it is not\n\t\t * protected by lock\n\t\t */\n\t\tunlink(lfc_path);\n\n\t\tfd = BasicOpenFile(lfc_path, O_RDWR | O_CREAT | O_TRUNC);\n\t\tif (fd < 0)\n\t\t\telog(WARNING, \"LFC: failed to recreate local file cache %s: %m\", lfc_path);\n\t\telse\n\t\t\tclose(fd);\n\n\t\t/* Wakeup waiting backends */\n\t\tfor (int i = 0; i < N_COND_VARS; i++)\n\t\t\tConditionVariableBroadcast(&lfc_ctl->cv[i]);\n\t}\n\tlfc_close_file();\n}\n\nstatic void\nlfc_disable(char const *op)\n{\n\telog(WARNING, \"LFC: failed to %s local file cache at %s: %m, disabling local file cache\", op, lfc_path);\n\n\tLWLockAcquire(lfc_lock, LW_EXCLUSIVE);\n\tlfc_switch_off();\n\tLWLockRelease(lfc_lock);\n}\n\n/*\n * This check is done without obtaining lfc_lock, so it is unreliable\n */\nstatic bool\nlfc_maybe_disabled(void)\n{\n\treturn !lfc_ctl || !LFC_ENABLED();\n}\n\n/*\n * Open LFC file if not opened yet or generation is changed.\n * Should be called under LFC lock.\n */\nstatic bool\nlfc_ensure_opened(void)\n{\n\tif (lfc_generation != lfc_ctl->generation)\n\t{\n\t\tlfc_close_file();\n\t\tlfc_generation = lfc_ctl->generation;\n\t}\n\t/* Open cache file if not done yet */\n\tif (lfc_desc < 0)\n\t{\n\t\tlfc_desc = BasicOpenFile(lfc_path, O_RDWR);\n\n\t\tif (lfc_desc < 0)\n\t\t{\n\t\t\tlfc_disable(\"open\");\n\t\t\treturn false;\n\t\t}\n\t}\n\treturn true;\n}\n\nvoid\nLfcShmemInit(void)\n{\n\tbool\t\tfound;\n\tstatic HASHCTL info;\n\n\tif (lfc_max_size <= 0)\n\t\treturn;\n\n\tlfc_ctl = (FileCacheControl *) ShmemInitStruct(\"lfc\", sizeof(FileCacheControl), &found);\n\tif (!found)\n\t{\n\t\tint\t\t\tfd;\n\t\tuint32\t\tn_chunks = SIZE_MB_TO_CHUNKS(lfc_max_size);\n\n\t\tlfc_lock = (LWLockId) GetNamedLWLockTranche(\"lfc_lock\");\n\t\tinfo.keysize = sizeof(BufferTag);\n\t\tinfo.entrysize = FILE_CACHE_ENRTY_SIZE;\n\n\t\t/*\n\t\t * n_chunks+1 because we add new element to hash table before eviction\n\t\t * of victim\n\t\t */\n\t\tlfc_hash = ShmemInitHash(\"lfc_hash\",\n\t\t\t\t\t\t\t\t n_chunks + 1, n_chunks + 1,\n\t\t\t\t\t\t\t\t &info,\n\t\t\t\t\t\t\t\t HASH_ELEM | HASH_BLOBS);\n\t\tmemset(lfc_ctl, 0, sizeof(FileCacheControl));\n\t\tdlist_init(&lfc_ctl->lru);\n\t\tdlist_init(&lfc_ctl->holes);\n\n\t\t/* Initialize hyper-log-log structure for estimating working set size */\n\t\tinitSHLL(&lfc_ctl->wss_estimation);\n\n\t\t/* Recreate file cache on restart */\n\t\tfd = BasicOpenFile(lfc_path, O_RDWR | O_CREAT | O_TRUNC);\n\t\tif (fd < 0)\n\t\t{\n\t\t\telog(WARNING, \"LFC: failed to create local file cache %s: %m\", lfc_path);\n\t\t\tlfc_ctl->limit = 0;\n\t\t}\n\t\telse\n\t\t{\n\t\t\tclose(fd);\n\t\t\tlfc_ctl->limit = SIZE_MB_TO_CHUNKS(lfc_size_limit);\n\t\t}\n\n\t\t/* Initialize turnstile of condition variables */\n\t\tfor (int i = 0; i < N_COND_VARS; i++)\n\t\t\tConditionVariableInit(&lfc_ctl->cv[i]);\n\n\t}\n}\n\nvoid\nLfcShmemRequest(void)\n{\n\tif (lfc_max_size > 0)\n\t{\n\t\tRequestAddinShmemSpace(sizeof(FileCacheControl) + hash_estimate_size(SIZE_MB_TO_CHUNKS(lfc_max_size) + 1, FILE_CACHE_ENRTY_SIZE));\n\t\tRequestNamedLWLockTranche(\"lfc_lock\", 1);\n\t}\n}\n\nstatic bool\nis_normal_backend(void)\n{\n\t/*\n\t * Stats collector detach shared memory, so we should not try to access\n\t * shared memory here. Parallel workers first assign default value (0), so\n\t * not perform truncation in parallel workers. The Postmaster can handle\n\t * SIGHUP and it has access to shared memory (UsedShmemSegAddr != NULL),\n\t * but has no PGPROC.\n\t */\n\treturn lfc_ctl && MyProc && UsedShmemSegAddr && !IsParallelWorker();\n}\n\nstatic bool\nlfc_check_chunk_size(int *newval, void **extra, GucSource source)\n{\n\tif (*newval & (*newval - 1))\n\t{\n\t\telog(ERROR, \"LFC chunk size should be power of two\");\n\t\treturn false;\n\t}\n\treturn true;\n}\n\nstatic void\nlfc_change_chunk_size(int newval, void* extra)\n{\n\tlfc_chunk_size_log = pg_ceil_log2_32(newval);\n}\n\n\nstatic bool\nlfc_check_limit_hook(int *newval, void **extra, GucSource source)\n{\n\tif (*newval > lfc_max_size)\n\t{\n\t\telog(ERROR, \"LFC: neon.file_cache_size_limit can not be larger than neon.max_file_cache_size\");\n\t\treturn false;\n\t}\n\treturn true;\n}\n\nstatic void\nlfc_change_limit_hook(int newval, void *extra)\n{\n\tuint32\t\tnew_size = SIZE_MB_TO_CHUNKS(newval);\n\n\tif (!lfc_ctl || !is_normal_backend())\n\t\treturn;\n\n\tLWLockAcquire(lfc_lock, LW_EXCLUSIVE);\n\n\t/* Open LFC file only if LFC was enabled or we are going to reenable it */\n\tif (newval == 0 && !LFC_ENABLED())\n\t{\n\t\tLWLockRelease(lfc_lock);\n\t\t/* File should be reopened if LFC is reenabled */\n\t\tlfc_close_file();\n\t\treturn;\n\t}\n\n\tif (!lfc_ensure_opened())\n\t{\n\t\tLWLockRelease(lfc_lock);\n\t\treturn;\n\t}\n\n\tif (lfc_ctl->limit != new_size)\n\t{\n\t\tlfc_ctl->resizes += 1;\n\t}\n\n\twhile (new_size < lfc_ctl->used && !dlist_is_empty(&lfc_ctl->lru))\n\t{\n\t\t/*\n\t\t * Shrink cache by throwing away least recently accessed chunks and\n\t\t * returning their space to file system\n\t\t */\n\t\tFileCacheEntry *victim = dlist_container(FileCacheEntry, list_node, dlist_pop_head_node(&lfc_ctl->lru));\n\t\tFileCacheEntry *hole;\n\t\tuint32\t\toffset = victim->offset;\n\t\tuint32\t\thash;\n\t\tbool\t\tfound;\n\t\tBufferTag\tholetag;\n\n\t\tCriticalAssert(victim->access_count == 0);\n#ifdef FALLOC_FL_PUNCH_HOLE\n\t\tif (fallocate(lfc_desc, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, (off_t) victim->offset * lfc_blocks_per_chunk * BLCKSZ, lfc_blocks_per_chunk * BLCKSZ) < 0)\n\t\t\tneon_log(LOG, \"Failed to punch hole in file: %m\");\n#endif\n\t\t/* We remove the old entry, and re-enter a hole to the hash table */\n\t\tfor (int i = 0; i < lfc_blocks_per_chunk; i++)\n\t\t{\n\t\t\tbool is_page_cached = GET_STATE(victim, i) == AVAILABLE;\n\t\t\tlfc_ctl->used_pages -= is_page_cached;\n\t\t\tlfc_ctl->evicted_pages += is_page_cached;\n\t\t}\n\t\thash_search_with_hash_value(lfc_hash, &victim->key, victim->hash, HASH_REMOVE, NULL);\n\n\t\tmemset(&holetag, 0, sizeof(holetag));\n\t\tholetag.blockNum = offset;\n\t\thash = get_hash_value(lfc_hash, &holetag);\n\t\thole = hash_search_with_hash_value(lfc_hash, &holetag, hash, HASH_ENTER, &found);\n\t\thole->hash = hash;\n\t\thole->offset = offset;\n\t\thole->access_count = 0;\n\t\tCriticalAssert(!found);\n\t\tdlist_push_tail(&lfc_ctl->holes, &hole->list_node);\n\n\t\tlfc_ctl->used -= 1;\n\t}\n\tif (new_size == 0)\n\t\tlfc_switch_off();\n\telse\n\t\tlfc_ctl->limit = new_size;\n\n\tneon_log(DEBUG1, \"set local file cache limit to %d\", new_size);\n\n\tLWLockRelease(lfc_lock);\n}\n\nvoid\nlfc_init(void)\n{\n\t/*\n\t * In order to create our shared memory area, we have to be loaded via\n\t * shared_preload_libraries.\n\t */\n\tif (!process_shared_preload_libraries_in_progress)\n\t\tneon_log(ERROR, \"Neon module should be loaded via shared_preload_libraries\");\n\n\n\tDefineCustomBoolVariable(\"neon.store_prefetch_result_in_lfc\",\n\t\t\t\t\t\t\t\"Immediately store received prefetch result in LFC\",\n\t\t\t\t\t\t\tNULL,\n\t\t\t\t\t\t\t&lfc_store_prefetch_result,\n\t\t\t\t\t\t\tfalse,\n\t\t\t\t\t\t\tPGC_SUSET,\n\t\t\t\t\t\t\t0,\n\t\t\t\t\t\t\tNULL,\n\t\t\t\t\t\t\tNULL,\n\t\t\t\t\t\t\tNULL);\n\n\tDefineCustomBoolVariable(\"neon.prewarm_update_ws_estimation\",\n\t\t\t\t\t\t\t\"Consider prewarmed pages for working set estimation\",\n\t\t\t\t\t\t\tNULL,\n\t\t\t\t\t\t\t&lfc_prewarm_update_ws_estimation,\n\t\t\t\t\t\t\ttrue,\n\t\t\t\t\t\t\tPGC_SUSET,\n\t\t\t\t\t\t\t0,\n\t\t\t\t\t\t\tNULL,\n\t\t\t\t\t\t\tNULL,\n\t\t\t\t\t\t\tNULL);\n\n\tDefineCustomIntVariable(\"neon.max_file_cache_size\",\n\t\t\t\t\t\t\t\"Maximal size of Neon local file cache\",\n\t\t\t\t\t\t\tNULL,\n\t\t\t\t\t\t\t&lfc_max_size,\n\t\t\t\t\t\t\t0,\t/* disabled by default */\n\t\t\t\t\t\t\t0,\n\t\t\t\t\t\t\tINT_MAX,\n\t\t\t\t\t\t\tPGC_POSTMASTER,\n\t\t\t\t\t\t\tGUC_UNIT_MB,\n\t\t\t\t\t\t\tNULL,\n\t\t\t\t\t\t\tNULL,\n\t\t\t\t\t\t\tNULL);\n\n\tDefineCustomIntVariable(\"neon.file_cache_size_limit\",\n\t\t\t\t\t\t\t\"Current limit for size of Neon local file cache\",\n\t\t\t\t\t\t\tNULL,\n\t\t\t\t\t\t\t&lfc_size_limit,\n\t\t\t\t\t\t\t0,\t/* disabled by default */\n\t\t\t\t\t\t\t0,\n\t\t\t\t\t\t\tINT_MAX,\n\t\t\t\t\t\t\tPGC_SIGHUP,\n\t\t\t\t\t\t\tGUC_UNIT_MB,\n\t\t\t\t\t\t\tlfc_check_limit_hook,\n\t\t\t\t\t\t\tlfc_change_limit_hook,\n\t\t\t\t\t\t\tNULL);\n\n\tDefineCustomStringVariable(\"neon.file_cache_path\",\n\t\t\t\t\t\t\t   \"Path to local file cache (can be raw device)\",\n\t\t\t\t\t\t\t   NULL,\n\t\t\t\t\t\t\t   &lfc_path,\n\t\t\t\t\t\t\t   \"file.cache\",\n\t\t\t\t\t\t\t   PGC_POSTMASTER,\n\t\t\t\t\t\t\t   0,\n\t\t\t\t\t\t\t   NULL,\n\t\t\t\t\t\t\t   NULL,\n\t\t\t\t\t\t\t   NULL);\n\n\tDefineCustomIntVariable(\"neon.file_cache_chunk_size\",\n\t\t\t\t\t\t\t\"LFC chunk size in blocks (should be power of two)\",\n\t\t\t\t\t\t\tNULL,\n\t\t\t\t\t\t\t&lfc_blocks_per_chunk,\n\t\t\t\t\t\t\tMAX_BLOCKS_PER_CHUNK,\n\t\t\t\t\t\t\t1,\n\t\t\t\t\t\t\tMAX_BLOCKS_PER_CHUNK,\n\t\t\t\t\t\t\tPGC_POSTMASTER,\n\t\t\t\t\t\t\tGUC_UNIT_BLOCKS,\n\t\t\t\t\t\t\tlfc_check_chunk_size,\n\t\t\t\t\t\t\tlfc_change_chunk_size,\n\t\t\t\t\t\t\tNULL);\n\n\tDefineCustomIntVariable(\"neon.file_cache_prewarm_limit\",\n\t\t\t\t\t\t\t\"Maximal number of prewarmed chunks\",\n\t\t\t\t\t\t\tNULL,\n\t\t\t\t\t\t\t&lfc_prewarm_limit,\n\t\t\t\t\t\t\tINT_MAX,\t/* no limit by default */\n\t\t\t\t\t\t\t0,\n\t\t\t\t\t\t\tINT_MAX,\n\t\t\t\t\t\t\tPGC_SIGHUP,\n\t\t\t\t\t\t\t0,\n\t\t\t\t\t\t\tNULL,\n\t\t\t\t\t\t\tNULL,\n\t\t\t\t\t\t\tNULL);\n\n\tDefineCustomIntVariable(\"neon.file_cache_prewarm_batch\",\n\t\t\t\t\t\t\t\"Number of pages retrivied by prewarm from page server\",\n\t\t\t\t\t\t\tNULL,\n\t\t\t\t\t\t\t&lfc_prewarm_batch,\n\t\t\t\t\t\t\t64,\n\t\t\t\t\t\t\t1,\n\t\t\t\t\t\t\tINT_MAX,\n\t\t\t\t\t\t\tPGC_SIGHUP,\n\t\t\t\t\t\t\t0,\n\t\t\t\t\t\t\tNULL,\n\t\t\t\t\t\t\tNULL,\n\t\t\t\t\t\t\tNULL);\n}\n\n/*\n * Dump a list of pages that are currently in the LFC\n *\n * This is used to get a snapshot that can be used to prewarm the LFC later.\n */\nFileCacheState*\nlfc_get_state(size_t max_entries)\n{\n\tFileCacheState* fcs = NULL;\n\n\tif (lfc_maybe_disabled() || max_entries == 0)\t/* fast exit if file cache is disabled */\n\t\treturn NULL;\n\n\tLWLockAcquire(lfc_lock, LW_SHARED);\n\n\tif (LFC_ENABLED())\n\t{\n\t\tdlist_iter iter;\n\t\tsize_t i = 0;\n\t\tuint8* bitmap;\n\t\tsize_t n_pages = 0;\n\t\tsize_t n_entries = Min(max_entries, lfc_ctl->used - lfc_ctl->pinned);\n\t\tsize_t state_size = FILE_CACHE_STATE_SIZE_FOR_CHUNKS(n_entries);\n\t\tfcs = (FileCacheState*)palloc0(state_size);\n\t\tSET_VARSIZE(fcs, state_size);\n\t\tfcs->magic = FILE_CACHE_STATE_MAGIC;\n\t\tfcs->chunk_size_log = lfc_chunk_size_log;\n\t\tfcs->n_chunks = n_entries;\n\t\tbitmap = FILE_CACHE_STATE_BITMAP(fcs);\n\n\t\tdlist_reverse_foreach(iter, &lfc_ctl->lru)\n\t\t{\n\t\t\tFileCacheEntry *entry = dlist_container(FileCacheEntry, list_node, iter.cur);\n\t\t\tfcs->chunks[i] = entry->key;\n\t\t\tfor (int j = 0; j < lfc_blocks_per_chunk; j++)\n\t\t\t{\n\t\t\t\tif (GET_STATE(entry, j) != UNAVAILABLE)\n\t\t\t\t{\n\t\t\t\t\t/* Validate the buffer tag before including it */\n\t\t\t\t\tBufferTag test_tag = entry->key;\n\t\t\t\t\ttest_tag.blockNum += j;\n\n\t\t\t\t\tif (BufferTagIsValid(&test_tag))\n\t\t\t\t\t{\n\t\t\t\t\t\tBITMAP_SET(bitmap, i*lfc_blocks_per_chunk + j);\n\t\t\t\t\t\tn_pages += 1;\n\t\t\t\t\t}\n\t\t\t\t\telse\n\t\t\t\t\t{\n\t\t\t\t\t\telog(ERROR, \"LFC: Skipping invalid buffer tag during cache state capture: blockNum=%u\", test_tag.blockNum);\n\t\t\t\t\t}\n\t\t\t\t}\n\t\t\t}\n\t\t\tif (++i == n_entries)\n\t\t\t\tbreak;\n\t\t}\n\t\tAssert(i == n_entries);\n\t\tfcs->n_pages = n_pages;\n\t\tAssert(pg_popcount((char*)bitmap, ((n_entries << lfc_chunk_size_log) + 7)/8) == n_pages);\n\t\telog(LOG, \"LFC: save state of %d chunks %d pages (validated)\", (int)n_entries, (int)n_pages);\n\t}\n\n\tLWLockRelease(lfc_lock);\n\n\treturn fcs;\n}\n\n/*\n * Prewarm LFC cache to the specified state. It uses lfc_prefetch function to load prewarmed page without hoilding shared buffer lock\n * and avoid race conditions with other backends.\n */\nvoid\nlfc_prewarm(FileCacheState* fcs, uint32 n_workers)\n{\n\tsize_t fcs_chunk_size_log;\n\tsize_t n_entries;\n\tsize_t prewarm_batch = Min(lfc_prewarm_batch, readahead_buffer_size);\n\tsize_t fcs_size;\n\tuint32_t max_prefetch_pages;\n\tdsm_segment *seg;\n\tBackgroundWorkerHandle* bgw_handle[MAX_PREWARM_WORKERS];\n\n\n\tif (!lfc_ensure_opened())\n\t\treturn;\n\n\tif (prewarm_batch == 0 || lfc_prewarm_limit == 0 || n_workers == 0)\n\t{\n\t\telog(LOG, \"LFC: prewarm is disabled\");\n\t\treturn;\n\t}\n\n\tif (n_workers > MAX_PREWARM_WORKERS)\n\t{\n\t\telog(ERROR, \"LFC: Too much prewarm workers, maximum is %d\", MAX_PREWARM_WORKERS);\n\t}\n\n\tif (fcs == NULL || fcs->n_chunks == 0)\n\t{\n\t\telog(LOG, \"LFC: nothing to prewarm\");\n\t\treturn;\n\t}\n\n\tif (fcs->magic != FILE_CACHE_STATE_MAGIC)\n\t{\n\t\telog(ERROR, \"LFC: Invalid file cache state magic: %X\", fcs->magic);\n\t}\n\n\tfcs_size = VARSIZE(fcs);\n\tif (FILE_CACHE_STATE_SIZE(fcs) != fcs_size)\n\t{\n\t\telog(ERROR, \"LFC: Invalid file cache state size: %u vs. %u\", (unsigned)FILE_CACHE_STATE_SIZE(fcs), VARSIZE(fcs));\n\t}\n\n\tfcs_chunk_size_log = fcs->chunk_size_log;\n\tif (fcs_chunk_size_log > MAX_BLOCKS_PER_CHUNK_LOG)\n\t{\n\t\telog(ERROR, \"LFC: Invalid chunk size log: %u\", fcs->chunk_size_log);\n\t}\n\n\tn_entries = Min(fcs->n_chunks, lfc_prewarm_limit);\n\tAssert(n_entries != 0);\n\n\tmax_prefetch_pages = n_entries << fcs_chunk_size_log;\n\tif (fcs->n_pages > max_prefetch_pages) {\n\t\telog(ERROR, \"LFC: Number of pages in file cache state (%d) is more than the limit (%d)\", fcs->n_pages, max_prefetch_pages);\n\t}\n\n\tLWLockAcquire(lfc_lock, LW_EXCLUSIVE);\n\n\t/* Do not prewarm more entries than LFC limit */\n\tif (lfc_ctl->limit <= lfc_ctl->size)\n\t{\n\t\telog(LOG, \"LFC: skip prewarm because LFC is already filled\");\n\t\tLWLockRelease(lfc_lock);\n\t\treturn;\n\t}\n\n\tif (lfc_ctl->prewarm_active)\n\t{\n\t\tLWLockRelease(lfc_lock);\n\t\telog(ERROR, \"LFC: skip prewarm because another prewarm is still active\");\n\t}\n\tlfc_ctl->n_prewarm_entries = n_entries;\n\tlfc_ctl->n_prewarm_workers = n_workers;\n\tlfc_ctl->prewarm_active = true;\n\tlfc_ctl->prewarm_canceled = false;\n\tlfc_ctl->prewarm_batch = prewarm_batch;\n\tmemset(lfc_ctl->prewarm_workers, 0, n_workers*sizeof(PrewarmWorkerState));\n\n\tLWLockRelease(lfc_lock);\n\n\t/* Calculate total number of pages to be prewarmed */\n\tlfc_ctl->total_prewarm_pages = fcs->n_pages;\n\n\tseg = dsm_create(fcs_size, 0);\n\tmemcpy(dsm_segment_address(seg), fcs, fcs_size);\n\tlfc_ctl->prewarm_lfc_state_handle = dsm_segment_handle(seg);\n\n\t/* Spawn background workers */\n\tfor (uint32 i = 0; i < n_workers; i++)\n\t{\n\t\tBackgroundWorker worker = {0};\n\n\t\tworker.bgw_flags = BGWORKER_SHMEM_ACCESS;\n\t\tworker.bgw_start_time = BgWorkerStart_ConsistentState;\n\t\tworker.bgw_restart_time = BGW_NEVER_RESTART;\n\t\tstrcpy(worker.bgw_library_name, \"neon\");\n\t\tstrcpy(worker.bgw_function_name, \"lfc_prewarm_main\");\n\t\tsnprintf(worker.bgw_name, BGW_MAXLEN, \"LFC prewarm worker %d\", i+1);\n\t\tstrcpy(worker.bgw_type, \"LFC prewarm worker\");\n\t\tworker.bgw_main_arg = Int32GetDatum(i);\n\t\t/* must set notify PID to wait for shutdown */\n\t\tworker.bgw_notify_pid = MyProcPid;\n\n\t\tif (!RegisterDynamicBackgroundWorker(&worker, &bgw_handle[i]))\n\t\t{\n\t\t\tereport(LOG,\n\t\t\t\t\t(errcode(ERRCODE_INSUFFICIENT_RESOURCES),\n\t\t\t\t\t errmsg(\"LFC: registering dynamic bgworker prewarm failed\"),\n\t\t\t\t\t errhint(\"Consider increasing the configuration parameter \\\"%s\\\".\", \"max_worker_processes\")));\n\t\t\tn_workers = i;\n\t\t\tlfc_ctl->prewarm_canceled = true;\n\t\t\tbreak;\n\t\t}\n\t}\n\n\tfor (uint32 i = 0; i < n_workers; i++)\n\t{\n\t\tbool interrupted;\n\t\tdo\n\t\t{\n\t\t\tinterrupted = false;\n\t\t\tPG_TRY();\n\t\t\t{\n\t\t\t\tBgwHandleStatus status = WaitForBackgroundWorkerShutdown(bgw_handle[i]);\n\t\t\t\tif (status != BGWH_STOPPED && status != BGWH_POSTMASTER_DIED)\n\t\t\t\t{\n\t\t\t\t\telog(LOG, \"LFC: Unexpected status of prewarm worker termination: %d\", status);\n\t\t\t\t}\n\t\t\t}\n\t\t\tPG_CATCH();\n\t\t\t{\n\t\t\t\telog(LOG, \"LFC: cancel prewarm\");\n\t\t\t\tlfc_ctl->prewarm_canceled = true;\n\t\t\t\tinterrupted = true;\n\t\t\t}\n\t\t\tPG_END_TRY();\n\t\t} while (interrupted);\n\n\t\tif (!lfc_ctl->prewarm_workers[i].completed)\n\t\t{\n\t\t\t/* Background worker doesn't set completion time: it means that it was abnormally terminated */\n\t\t\telog(LOG, \"LFC: prewarm worker %d failed\", i+1);\n\t\t\t/* Set completion time to prevent get_prewarm_info from considering this worker as active */\n\t\t\tlfc_ctl->prewarm_workers[i].completed = GetCurrentTimestamp();\n\t\t}\n\t}\n\tdsm_detach(seg);\n\n\tLWLockAcquire(lfc_lock, LW_EXCLUSIVE);\n\tlfc_ctl->prewarm_active = false;\n\tLWLockRelease(lfc_lock);\n}\n\nvoid\nlfc_prewarm_main(Datum main_arg)\n{\n\tsize_t snd_idx = 0, rcv_idx = 0;\n\tsize_t n_sent = 0, n_received = 0;\n\tsize_t fcs_chunk_size_log;\n\tsize_t max_prefetch_pages;\n\tsize_t prewarm_batch;\n\tsize_t n_workers;\n\tdsm_segment *seg;\n\tFileCacheState* fcs;\n\tuint8* bitmap;\n\tBufferTag tag;\n\tPrewarmWorkerState* ws;\n\tuint32 worker_id = DatumGetInt32(main_arg);\n\n\tAmPrewarmWorker = true;\n\n\tpqsignal(SIGTERM, die);\n\tBackgroundWorkerUnblockSignals();\n\n\tseg = dsm_attach(lfc_ctl->prewarm_lfc_state_handle);\n\tif (seg == NULL)\n\t\tereport(ERROR,\n\t\t\t\t(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),\n\t\t\t\t errmsg(\"could not map dynamic shared memory segment\")));\n\n\tfcs = (FileCacheState*) dsm_segment_address(seg);\n\tprewarm_batch = lfc_ctl->prewarm_batch;\n\tfcs_chunk_size_log = fcs->chunk_size_log;\n\tn_workers = lfc_ctl->n_prewarm_workers;\n\tmax_prefetch_pages = lfc_ctl->n_prewarm_entries << fcs_chunk_size_log;\n\tws = &lfc_ctl->prewarm_workers[worker_id];\n\tbitmap = FILE_CACHE_STATE_BITMAP(fcs);\n\n\t/* enable prefetch in LFC */\n\tlfc_store_prefetch_result = true;\n\tlfc_do_prewarm = true; /* Flag for lfc_prefetch preventing replacement of existed entries if LFC cache is full */\n\n\telog(LOG, \"LFC: worker %d start prewarming\", worker_id);\n\twhile (!lfc_ctl->prewarm_canceled)\n\t{\n\t\tif (snd_idx < max_prefetch_pages)\n\t\t{\n\t\t\tif ((snd_idx >> fcs_chunk_size_log) % n_workers != worker_id)\n\t\t\t{\n\t\t\t\t/* If there are multiple workers, split chunks between them */\n\t\t\t\tsnd_idx += 1 << fcs_chunk_size_log;\n\t\t\t}\n\t\t\telse\n\t\t\t{\n\t\t\t\tif (BITMAP_ISSET(bitmap, snd_idx))\n\t\t\t\t{\n\t\t\t\t\ttag = fcs->chunks[snd_idx >> fcs_chunk_size_log];\n\t\t\t\t\ttag.blockNum += snd_idx & ((1 << fcs_chunk_size_log) - 1);\n\n\t\t\t\t\tif (!BufferTagIsValid(&tag)) {\n\t\t\t\t\t\telog(ERROR, \"LFC: Invalid buffer tag: %u\", tag.blockNum);\n\t\t\t\t\t}\n\n\t\t\t\t\tif (!lfc_cache_contains(BufTagGetNRelFileInfo(tag), tag.forkNum, tag.blockNum))\n\t\t\t\t\t{\n\t\t\t\t\t\t(void)communicator_prefetch_register_bufferv(tag, NULL, 1, NULL);\n\t\t\t\t\t\tn_sent += 1;\n\t\t\t\t\t}\n\t\t\t\t\telse\n\t\t\t\t\t{\n\t\t\t\t\t\tws->skipped_pages += 1;\n\t\t\t\t\t\tBITMAP_CLR(bitmap, snd_idx);\n\t\t\t\t\t}\n\t\t\t\t}\n\t\t\t\tsnd_idx += 1;\n\t\t\t}\n\t\t}\n\t\tif (n_sent >= n_received + prewarm_batch || snd_idx == max_prefetch_pages)\n\t\t{\n\t\t\tif (n_received == n_sent && snd_idx == max_prefetch_pages)\n\t\t\t{\n\t\t\t\tbreak;\n\t\t\t}\n\t\t\tif ((rcv_idx >> fcs_chunk_size_log) % n_workers != worker_id)\n\t\t\t{\n\t\t\t\t/* Skip chunks processed by other workers */\n\t\t\t\trcv_idx += 1 << fcs_chunk_size_log;\n\t\t\t\tcontinue;\n\t\t\t}\n\n\t\t\t/* Locate next block to prefetch */\n\t\t\twhile (!BITMAP_ISSET(bitmap, rcv_idx))\n\t\t\t{\n\t\t\t\trcv_idx += 1;\n\t\t\t}\n\t\t\ttag = fcs->chunks[rcv_idx >> fcs_chunk_size_log];\n\t\t\ttag.blockNum += rcv_idx & ((1 << fcs_chunk_size_log) - 1);\n\t\t\tif (communicator_prefetch_receive(tag))\n\t\t\t{\n\t\t\t\tws->prewarmed_pages += 1;\n\t\t\t}\n\t\t\telse\n\t\t\t{\n\t\t\t\tws->skipped_pages += 1;\n\t\t\t}\n\t\t\trcv_idx += 1;\n\t\t\tn_received += 1;\n\t\t}\n\t}\n\t/* No need to perform prefetch cleanup here because prewarm worker will be terminated and\n\t * connection to PS dropped just after return from this function.\n\t */\n\tAssert(n_sent == n_received || lfc_ctl->prewarm_canceled);\n\telog(LOG, \"LFC: worker %d complete prewarming: loaded %ld pages\", worker_id, (long)n_received);\n\tlfc_ctl->prewarm_workers[worker_id].completed = GetCurrentTimestamp();\n}\n\nvoid\nlfc_invalidate(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber nblocks)\n{\n\tBufferTag\ttag;\n\tFileCacheEntry *entry;\n\tuint32\t\thash;\n\n\tif (lfc_maybe_disabled())\t/* fast exit if file cache is disabled */\n\t\treturn;\n\n\tCopyNRelFileInfoToBufTag(tag, rinfo);\n\ttag.forkNum = forkNum;\n\n\tCriticalAssert(BufTagGetRelNumber(&tag) != InvalidRelFileNumber);\n\n\tLWLockAcquire(lfc_lock, LW_EXCLUSIVE);\n\tif (LFC_ENABLED())\n\t{\n\t\tfor (BlockNumber blkno = 0; blkno < nblocks; blkno += lfc_blocks_per_chunk)\n\t\t{\n\t\t\ttag.blockNum = blkno;\n\t\t\thash = get_hash_value(lfc_hash, &tag);\n\t\t\tentry = hash_search_with_hash_value(lfc_hash, &tag, hash, HASH_FIND, NULL);\n\t\t\tif (entry != NULL)\n\t\t\t{\n\t\t\t\tfor (int i = 0; i < lfc_blocks_per_chunk; i++)\n\t\t\t\t{\n\t\t\t\t\tif (GET_STATE(entry, i) == AVAILABLE)\n\t\t\t\t\t{\n\t\t\t\t\t\tlfc_ctl->used_pages -= 1;\n\t\t\t\t\t\tSET_STATE(entry, i, UNAVAILABLE);\n\t\t\t\t\t}\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\t}\n\tLWLockRelease(lfc_lock);\n}\n\n/*\n * Check if page is present in the cache.\n * Returns true if page is found in local cache.\n */\nbool\nlfc_cache_contains(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno)\n{\n\tBufferTag\ttag;\n\tFileCacheEntry *entry;\n\tint\t\t\tchunk_offs = BLOCK_TO_CHUNK_OFF(blkno);\n\tbool\t\tfound = false;\n\tuint32\t\thash;\n\n\tif (lfc_maybe_disabled())\t/* fast exit if file cache is disabled */\n\t\treturn false;\n\n\tCopyNRelFileInfoToBufTag(tag, rinfo);\n\ttag.forkNum = forkNum;\n\ttag.blockNum = blkno - chunk_offs;\n\n\tCriticalAssert(BufTagGetRelNumber(&tag) != InvalidRelFileNumber);\n\thash = get_hash_value(lfc_hash, &tag);\n\n\tLWLockAcquire(lfc_lock, LW_SHARED);\n\tif (LFC_ENABLED())\n\t{\n\t\tentry = hash_search_with_hash_value(lfc_hash, &tag, hash, HASH_FIND, NULL);\n\t\tfound = entry != NULL && GET_STATE(entry, chunk_offs) != UNAVAILABLE;\n\t}\n\tLWLockRelease(lfc_lock);\n\treturn found;\n}\n\n/*\n * Check if page is present in the cache.\n * Returns true if page is found in local cache.\n */\nint\nlfc_cache_containsv(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,\n\t\t\t\t\tint nblocks, bits8 *bitmap)\n{\n\tBufferTag\ttag;\n\tFileCacheEntry *entry;\n\tuint32\t\tchunk_offs;\n\tint\t\t\tfound = 0;\n\tuint32\t\thash;\n\tint\t\t\ti = 0;\n\n\tif (lfc_maybe_disabled())\t/* fast exit if file cache is disabled */\n\t\treturn 0;\n\n\tCopyNRelFileInfoToBufTag(tag, rinfo);\n\ttag.forkNum = forkNum;\n\n\tCriticalAssert(BufTagGetRelNumber(&tag) != InvalidRelFileNumber);\n\n\tchunk_offs = BLOCK_TO_CHUNK_OFF(blkno);\n\ttag.blockNum = blkno - chunk_offs;\n\thash = get_hash_value(lfc_hash, &tag);\n\n\tLWLockAcquire(lfc_lock, LW_SHARED);\n\n\tif (!LFC_ENABLED())\n\t{\n\t\tLWLockRelease(lfc_lock);\n\t\treturn 0;\n\t}\n\twhile (true)\n\t{\n\t\tint\t\tthis_chunk = Min(nblocks - i, lfc_blocks_per_chunk - chunk_offs);\n\t\tentry = hash_search_with_hash_value(lfc_hash, &tag, hash, HASH_FIND, NULL);\n\n\t\tif (entry != NULL)\n\t\t{\n\t\t\tfor (; chunk_offs < lfc_blocks_per_chunk && i < nblocks; chunk_offs++, i++)\n\t\t\t{\n\t\t\t\tif (GET_STATE(entry, chunk_offs) != UNAVAILABLE)\n\t\t\t\t{\n\t\t\t\t\tBITMAP_SET(bitmap, i);\n\t\t\t\t\tfound++;\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\t\telse\n\t\t{\n\t\t\ti += this_chunk;\n\t\t}\n\t\t/*\n\t\t * Break out of the iteration before doing expensive stuff for\n\t\t * a next iteration\n\t\t */\n\t\tif (i >= nblocks)\n\t\t\tbreak;\n\n\t\t/*\n\t\t * Prepare for the next iteration. We don't unlock here, as that'd\n\t\t * probably be more expensive than the gains it'd get us.\n\t\t */\n\t\tchunk_offs = BLOCK_TO_CHUNK_OFF(blkno + i);\n\t\ttag.blockNum = (blkno + i) - chunk_offs;\n\t\thash = get_hash_value(lfc_hash, &tag);\n\t}\n\n\tLWLockRelease(lfc_lock);\n\n#ifdef USE_ASSERT_CHECKING\n\t{\n\t\tint count = 0;\n\n\t\tfor (int j = 0; j < nblocks; j++)\n\t\t{\n\t\t\tif (BITMAP_ISSET(bitmap, j))\n\t\t\t\tcount++;\n\t\t}\n\n\t\tAssert(count == found);\n\t}\n#endif\n\n\treturn found;\n}\n\n#if PG_MAJORVERSION_NUM >= 16\nstatic PGIOAlignedBlock voidblock = {0};\n#else\nstatic PGAlignedBlock voidblock = {0};\n#endif\n#define SCRIBBLEPAGE (&voidblock.data)\n\n/*\n * Try to read pages from local cache.\n * Returns the number of pages read from the local cache, and sets bits in\n * 'mask' for the pages which were read. This may scribble over buffers not\n * marked in 'mask', so be careful with operation ordering.\n *\n * In case of error local file cache is disabled (lfc->limit is set to zero),\n * and -1 is returned.\n *\n * If the mask argument is supplied, we'll only try to read those pages which\n * don't have their bits set on entry. At exit, pages which were successfully\n * read from LFC will have their bits set.\n */\nint\nlfc_readv_select(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,\n\t\t\t\t void **buffers, BlockNumber nblocks, bits8 *mask)\n{\n\tBufferTag\ttag;\n\tFileCacheEntry *entry;\n\tssize_t\t\trc;\n\tuint32\t\thash;\n\tuint64\t\tgeneration;\n\tuint32\t\tentry_offset;\n\tint\t\t\tblocks_read = 0;\n\tint\t\t\tbuf_offset = 0;\n\n\tif (lfc_maybe_disabled())\t/* fast exit if file cache is disabled */\n\t\treturn -1;\n\n\tCopyNRelFileInfoToBufTag(tag, rinfo);\n\ttag.forkNum = forkNum;\n\n\tCriticalAssert(BufTagGetRelNumber(&tag) != InvalidRelFileNumber);\n\n\t/* Update working set size estimate for the blocks */\n\tfor (int i = 0; i < nblocks; i++)\n\t{\n\t\ttag.blockNum = blkno + i;\n\t\taddSHLL(&lfc_ctl->wss_estimation, hash_bytes((uint8_t const*)&tag, sizeof(tag)));\n\t}\n\n\t/*\n\t * For every chunk that has blocks we're interested in, we\n\t * 1. get the chunk header\n\t * 2. Check if the chunk actually has the blocks we're interested in\n\t * 3. Read the blocks we're looking for (in one preadv), assuming they exist\n\t * 4. Update the statistics for the read call.\n\t *\n\t * If there is an error, we do an early return.\n\t */\n\twhile (nblocks > 0)\n\t{\n\t\tstruct iovec iov[PG_IOV_MAX];\n\t\tuint8\tchunk_mask[MAX_BLOCKS_PER_CHUNK / 8] = {0};\n\t\tint\t\tchunk_offs = BLOCK_TO_CHUNK_OFF(blkno);\n\t\tint\t\tblocks_in_chunk = Min(nblocks, lfc_blocks_per_chunk - chunk_offs);\n\t\tint\t\titeration_hits = 0;\n\t\tint\t\titeration_misses = 0;\n\t\tuint64\tio_time_us = 0;\n\t\tint\t\tn_blocks_to_read = 0;\n\t\tint\t\tiov_last_used = 0;\n\t\tint\t\tfirst_block_in_chunk_read = -1;\n\t\tConditionVariable* cv;\n\n\t\tAssert(blocks_in_chunk > 0);\n\n\t\tfor (int i = 0; i < blocks_in_chunk; i++)\n\t\t{\n\t\t\tiov[i].iov_len = BLCKSZ;\n\t\t\t/* mask not set = we must do work */\n\t\t\tif (!BITMAP_ISSET(mask, buf_offset + i))\n\t\t\t{\n\t\t\t\tiov[i].iov_base = buffers[buf_offset + i];\n\t\t\t\tn_blocks_to_read++;\n\t\t\t\tiov_last_used = i + 1;\n\n\t\t\t\tif (first_block_in_chunk_read == -1)\n\t\t\t\t{\n\t\t\t\t\tfirst_block_in_chunk_read = i;\n\t\t\t\t}\n\t\t\t}\n\t\t\t/* mask set = we must do no work */\n\t\t\telse\n\t\t\t{\n\t\t\t\t/* don't scribble on pages we weren't requested to write to */\n\t\t\t\tiov[i].iov_base = SCRIBBLEPAGE;\n\t\t\t}\n\t\t}\n\n\t\t/* shortcut IO */\n\t\tif (n_blocks_to_read == 0)\n\t\t{\n\t\t\tbuf_offset += blocks_in_chunk;\n\t\t\tnblocks -= blocks_in_chunk;\n\t\t\tblkno += blocks_in_chunk;\n\t\t\tcontinue;\n\t\t}\n\n\t\t/*\n\t\t * The effective iov size must be >= the number of blocks we're about\n\t\t * to read.\n\t\t */\n\t\tAssert(iov_last_used - first_block_in_chunk_read >= n_blocks_to_read);\n\n\t\ttag.blockNum = blkno - chunk_offs;\n\t\thash = get_hash_value(lfc_hash, &tag);\n\t\tcv = &lfc_ctl->cv[hash % N_COND_VARS];\n\n\t\tLWLockAcquire(lfc_lock, LW_EXCLUSIVE);\n\n\t\t/* We can return the blocks we've read before LFC got disabled;\n\t\t * assuming we read any. */\n\t\tif (!LFC_ENABLED() || !lfc_ensure_opened())\n\t\t{\n\t\t\tLWLockRelease(lfc_lock);\n\t\t\treturn blocks_read;\n\t\t}\n\n\t\tentry = hash_search_with_hash_value(lfc_hash, &tag, hash, HASH_FIND, NULL);\n\t\tif (entry == NULL)\n\t\t{\n\t\t\t/* Pages are not cached */\n\t\t\tlfc_ctl->misses += blocks_in_chunk;\n\t\t\tpgBufferUsage.file_cache.misses += blocks_in_chunk;\n\t\t\tLWLockRelease(lfc_lock);\n\n\t\t\tbuf_offset += blocks_in_chunk;\n\t\t\tnblocks -= blocks_in_chunk;\n\t\t\tblkno += blocks_in_chunk;\n\n\t\t\tcontinue;\n\t\t}\n\n\t\t/* Unlink entry from LRU list to pin it for the duration of IO operation */\n\t\tif (entry->access_count++ == 0)\n\t\t{\n\t\t\tlfc_ctl->pinned += 1;\n\t\t\tdlist_delete(&entry->list_node);\n\t\t}\n\t\tgeneration = lfc_ctl->generation;\n\t\tentry_offset = entry->offset;\n\n\t\tfor (int i = first_block_in_chunk_read; i < iov_last_used; i++)\n\t\t{\n\t\t\tFileCacheBlockState state = UNAVAILABLE;\n\t\t\tbool sleeping = false;\n\n\t\t\t/* no need to work on something we're not interested in */\n\t\t\tif (BITMAP_ISSET(mask, buf_offset + i))\n\t\t\t\tcontinue;\n\n\t\t\twhile (lfc_ctl->generation == generation)\n\t\t\t{\n\t\t\t\tstate = GET_STATE(entry, chunk_offs + i);\n\t\t\t\tif (state == PENDING) {\n\t\t\t\t\tSET_STATE(entry, chunk_offs + i, REQUESTED);\n\t\t\t\t} else if (state != REQUESTED) {\n\t\t\t\t\tbreak;\n\t\t\t\t}\n\t\t\t\tif (!sleeping)\n\t\t\t\t{\n\t\t\t\t\tConditionVariablePrepareToSleep(cv);\n\t\t\t\t\tsleeping = true;\n\t\t\t\t}\n\t\t\t\tLWLockRelease(lfc_lock);\n\t\t\t\tConditionVariableTimedSleep(cv, CV_WAIT_TIMEOUT, WAIT_EVENT_NEON_LFC_CV_WAIT);\n\t\t\t\tLWLockAcquire(lfc_lock, LW_EXCLUSIVE);\n\t\t\t}\n\t\t\tif (sleeping)\n\t\t\t{\n\t\t\t\tConditionVariableCancelSleep();\n\t\t\t}\n\t\t\tif (state == AVAILABLE)\n\t\t\t{\n\t\t\t\tBITMAP_SET(chunk_mask, i);\n\t\t\t\titeration_hits++;\n\t\t\t}\n\t\t\telse\n\t\t\t\titeration_misses++;\n\t\t}\n\t\tLWLockRelease(lfc_lock);\n\n\t\tAssert(iteration_hits + iteration_misses > 0);\n\n\t\tif (iteration_hits != 0)\n\t\t{\n\t\t\t/* chunk offset (#\n\t\t\t   of pages) into the LFC file */\n\t\t\toff_t\tfirst_read_offset = (off_t) entry_offset * lfc_blocks_per_chunk;\n\t\t\tint\t\tnwrite = iov_last_used - first_block_in_chunk_read;\n\t\t\t/* offset of first IOV */\n\t\t\tfirst_read_offset += chunk_offs + first_block_in_chunk_read;\n\n\t\t\tpgstat_report_wait_start(WAIT_EVENT_NEON_LFC_READ);\n\n\t\t\t/* Read only the blocks we're interested in, limiting */\n\t\t\trc = preadv(lfc_desc, &iov[first_block_in_chunk_read],\n\t\t\t\t\t\tnwrite, first_read_offset * BLCKSZ);\n\t\t\tpgstat_report_wait_end();\n\n\t\t\tif (rc != (BLCKSZ * nwrite))\n\t\t\t{\n\t\t\t\tlfc_disable(\"read\");\n\t\t\t\treturn -1;\n\t\t\t}\n\t\t}\n\n\t\t/* Place entry to the head of LRU list */\n\t\tLWLockAcquire(lfc_lock, LW_EXCLUSIVE);\n\n\t\tif (lfc_ctl->generation == generation)\n\t\t{\n\t\t\tCriticalAssert(LFC_ENABLED());\n\t\t\tlfc_ctl->hits += iteration_hits;\n\t\t\tlfc_ctl->misses += iteration_misses;\n\t\t\tpgBufferUsage.file_cache.hits += iteration_hits;\n\t\t\tpgBufferUsage.file_cache.misses += iteration_misses;\n\n\t\t\tif (iteration_hits)\n\t\t\t{\n\t\t\t\tlfc_ctl->time_read += io_time_us;\n\t\t\t\tinc_page_cache_read_wait(io_time_us);\n\t\t\t\t/*\n\t\t\t\t * We successfully read the pages we know were valid when we\n\t\t\t\t * started reading; now mark those pages as read\n\t\t\t\t */\n\t\t\t\tfor (int i = first_block_in_chunk_read; i < iov_last_used; i++)\n\t\t\t\t{\n\t\t\t\t\tif (BITMAP_ISSET(chunk_mask, i))\n\t\t\t\t\t\tBITMAP_SET(mask, buf_offset + i);\n\t\t\t\t}\n\t\t\t}\n\n\t\t\tCriticalAssert(entry->access_count > 0);\n\t\t\tif (--entry->access_count == 0)\n\t\t\t{\n\t\t\t\tlfc_ctl->pinned -= 1;\n\t\t\t\tdlist_push_tail(&lfc_ctl->lru, &entry->list_node);\n\t\t\t}\n\t\t}\n\t\telse\n\t\t{\n\t\t\t/* generation mismatch, assume error condition */\n\t\t\tlfc_close_file();\n\t\t\tLWLockRelease(lfc_lock);\n\t\t\treturn -1;\n\t\t}\n\n\t\tLWLockRelease(lfc_lock);\n\n\t\tbuf_offset += blocks_in_chunk;\n\t\tnblocks -= blocks_in_chunk;\n\t\tblkno += blocks_in_chunk;\n\t\tblocks_read += iteration_hits;\n\t}\n\n\treturn blocks_read;\n}\n\n/*\n * Initialize new LFC hash entry, perform eviction if needed.\n * Returns false if there are no unpinned entries and chunk can not be added.\n */\nstatic bool\nlfc_init_new_entry(FileCacheEntry* entry, uint32 hash)\n{\n\t/*-----------\n\t * If the chunk wasn't already in the LFC then we have these\n\t * options, in order of preference:\n\t *\n\t * Unless there is no space available, we can:\n\t *  1. Use an entry from the `holes` list, and\n\t *  2. Create a new entry.\n\t * We can always, regardless of space in the LFC:\n\t *  3. evict an entry from LRU, and\n\t *  4. ignore the write operation (the least favorite option)\n\t */\n\tif (lfc_ctl->used < lfc_ctl->limit)\n\t{\n\t\tif (!dlist_is_empty(&lfc_ctl->holes))\n\t\t{\n\t\t\t/* We can reuse a hole that was left behind when the LFC was shrunk previously */\n\t\t\tFileCacheEntry *hole = dlist_container(FileCacheEntry, list_node,\n\t\t\t\t\t\t\t\t\t\t\t\t   dlist_pop_head_node(&lfc_ctl->holes));\n\t\t\tuint32 offset = hole->offset;\n\t\t\tbool hole_found;\n\n\t\t\thash_search_with_hash_value(lfc_hash, &hole->key,\n\t\t\t\t\t\t\t\t\t\thole->hash, HASH_REMOVE, &hole_found);\n\t\t\tCriticalAssert(hole_found);\n\n\t\t\tlfc_ctl->used += 1;\n\t\t\tentry->offset = offset;\t\t\t/* reuse the hole */\n\t\t}\n\t\telse\n\t\t{\n\t\t\tlfc_ctl->used += 1;\n\t\t\tentry->offset = lfc_ctl->size++;/* allocate new chunk at end\n\t\t\t\t\t\t\t\t\t\t\t * of file */\n\t\t}\n\t}\n\t/*\n\t * We've already used up all allocated LFC entries.\n\t *\n\t * If we can clear an entry from the LRU, do that.\n\t * If we can't (e.g. because all other slots are being accessed)\n\t * then we will remove this entry from the hash and continue\n\t * on to the next chunk, as we may not exceed the limit.\n\t *\n\t * While prewarming LFC we do not want to replace existed entries,\n\t * so we just stop prewarm is LFC cache is full.\n\t */\n\telse if (!dlist_is_empty(&lfc_ctl->lru) && !lfc_do_prewarm)\n\t{\n\t\t/* Cache overflow: evict least recently used chunk */\n\t\tFileCacheEntry *victim = dlist_container(FileCacheEntry, list_node,\n\t\t\t\t\t\t\t\t\t\t\t\t dlist_pop_head_node(&lfc_ctl->lru));\n\n\t\tfor (int i = 0; i < lfc_blocks_per_chunk; i++)\n\t\t{\n\t\t\tbool is_page_cached = GET_STATE(victim, i) == AVAILABLE;\n\t\t\tlfc_ctl->used_pages -= is_page_cached;\n\t\t\tlfc_ctl->evicted_pages += is_page_cached;\n\t\t}\n\n\t\tCriticalAssert(victim->access_count == 0);\n\t\tentry->offset = victim->offset; /* grab victim's chunk */\n\t\thash_search_with_hash_value(lfc_hash, &victim->key,\n\t\t\t\t\t\t\t\t\tvictim->hash, HASH_REMOVE, NULL);\n\t\tneon_log(DEBUG2, \"Swap file cache page\");\n\t}\n\telse\n\t{\n\t\t/* Can't add this chunk - we don't have the space for it */\n\t\thash_search_with_hash_value(lfc_hash, &entry->key, hash,\n\t\t\t\t\t\t\t\t\tHASH_REMOVE, NULL);\n\t\tlfc_ctl->prewarm_canceled = true; /* cancel prewarm if LFC limit is reached */\n\t\treturn false;\n\t}\n\n\tentry->access_count = 1;\n\tentry->hash = hash;\n\tlfc_ctl->pinned += 1;\n\n\tfor (int i = 0; i < lfc_blocks_per_chunk; i++)\n\t\tSET_STATE(entry, i, UNAVAILABLE);\n\n\treturn true;\n}\n\n/*\n * Store received prefetch result in LFC cache.\n * Unlike lfc_read/lfc_write this call is is not protected by shared buffer lock.\n * So we should be ready that other backends will try to concurrently read or write this block.\n * We do not store prefetched block if it already exists in LFC or it's not_modified_since LSN is smaller\n * than current last written LSN (LwLSN).\n *\n * We can enforce correctness of storing page in LFC by the following steps:\n * 1. Check under LFC lock that page in not present in LFC.\n * 2. Check under LFC lock that LwLSN is not changed since prefetch request time (not_modified_since).\n * 3. Change page state to \"Pending\" under LFC lock to prevent all other backends to read or write this\n *    pages until this write is completed.\n * 4. Assume that some other backend creates new image of the page without reading it\n *    (because reads will be blocked because of 2). This version of the page is stored in shared buffer.\n *    Any attempt to throw away this page from shared buffer will be blocked, because Postgres first\n *    needs to save dirty page and write will be blocked because of 2.\n *    So any backend trying to access this page, will take it from shared buffer without accessing\n *    SMGR and LFC.\n * 5. After write completion we once again obtain LFC lock and wake-up all waiting backends.\n *    If there is some backend waiting to write new image of the page (4) then now it will be able to\n *    do it,overwriting old (prefetched) page image. As far as this write will be completed before\n *    shared buffer can be reassigned, not other backend can see old page image.\n*/\nbool\nlfc_prefetch(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blkno,\n\t\t\t const void* buffer, XLogRecPtr lsn)\n{\n\tBufferTag\ttag;\n\tFileCacheEntry *entry;\n\tssize_t\t\trc;\n\tbool\t\tfound;\n\tuint32\t\thash;\n\tuint64\t\tgeneration;\n\tuint32\t\tentry_offset;\n\tinstr_time io_start, io_end;\n\tConditionVariable* cv;\n\tFileCacheBlockState state;\n\tXLogRecPtr lwlsn;\n\n\tint\t\tchunk_offs = BLOCK_TO_CHUNK_OFF(blkno);\n\n\tif (lfc_maybe_disabled())\t/* fast exit if file cache is disabled */\n\t\treturn false;\n\n\tCopyNRelFileInfoToBufTag(tag, rinfo);\n\tCriticalAssert(BufTagGetRelNumber(&tag) != InvalidRelFileNumber);\n\ttag.forkNum = forknum;\n\n\t/* Update working set size estimate for the blocks */\n\tif (lfc_prewarm_update_ws_estimation)\n\t{\n\t\ttag.blockNum = blkno;\n\t\taddSHLL(&lfc_ctl->wss_estimation, hash_bytes((uint8_t const*)&tag, sizeof(tag)));\n\t}\n\n\ttag.blockNum = blkno - chunk_offs;\n\thash = get_hash_value(lfc_hash, &tag);\n\tcv = &lfc_ctl->cv[hash % N_COND_VARS];\n\n\tLWLockAcquire(lfc_lock, LW_EXCLUSIVE);\n\n\tif (!LFC_ENABLED() || !lfc_ensure_opened())\n\t{\n\t\tLWLockRelease(lfc_lock);\n\t\treturn false;\n\t}\n\n\tlwlsn = neon_get_lwlsn(rinfo, forknum, blkno);\n\n\tif (lwlsn > lsn)\n\t{\n\t\telog(DEBUG1, \"Skip LFC write for %u because LwLSN=%X/%X is greater than not_nodified_since LSN %X/%X\",\n\t\t\t blkno, LSN_FORMAT_ARGS(lwlsn), LSN_FORMAT_ARGS(lsn));\n\t\tLWLockRelease(lfc_lock);\n\t\treturn false;\n\t}\n\n\tentry = hash_search_with_hash_value(lfc_hash, &tag, hash, HASH_ENTER, &found);\n\tif (found)\n\t{\n\t\tstate = GET_STATE(entry, chunk_offs);\n\t\tif (state != UNAVAILABLE) {\n\t\t\t/* Do not rewrite existed LFC entry */\n\t\t\tLWLockRelease(lfc_lock);\n\t\t\treturn false;\n\t\t}\n\t\t/*\n\t\t * Unlink entry from LRU list to pin it for the duration of IO\n\t\t * operation\n\t\t */\n\t\tif (entry->access_count++ == 0)\n\t\t{\n\t\t\tlfc_ctl->pinned += 1;\n\t\t\tdlist_delete(&entry->list_node);\n\t\t}\n\t}\n\telse\n\t{\n\t\tif (!lfc_init_new_entry(entry, hash))\n\t\t{\n\t\t\t/*\n\t\t\t * We can't process this chunk due to lack of space in LFC,\n\t\t\t * so skip to the next one\n\t\t\t */\n\t\t\tLWLockRelease(lfc_lock);\n\t\t\treturn false;\n\t\t}\n\t}\n\n\tgeneration = lfc_ctl->generation;\n\tentry_offset = entry->offset;\n\n\tSET_STATE(entry, chunk_offs, PENDING);\n\n\tLWLockRelease(lfc_lock);\n\n\tpgstat_report_wait_start(WAIT_EVENT_NEON_LFC_WRITE);\n\tINSTR_TIME_SET_CURRENT(io_start);\n\trc = pwrite(lfc_desc, buffer, BLCKSZ,\n\t\t\t\t((off_t) entry_offset * lfc_blocks_per_chunk + chunk_offs) * BLCKSZ);\n\tINSTR_TIME_SET_CURRENT(io_end);\n\tpgstat_report_wait_end();\n\n\tif (rc != BLCKSZ)\n\t{\n\t\tlfc_disable(\"write\");\n\t}\n\telse\n\t{\n\t\tLWLockAcquire(lfc_lock, LW_EXCLUSIVE);\n\n\t\tif (lfc_ctl->generation == generation)\n\t\t{\n\t\t\tuint64\ttime_spent_us;\n\t\t\tCriticalAssert(LFC_ENABLED());\n\t\t\t/* Place entry to the head of LRU list */\n\t\t\tCriticalAssert(entry->access_count > 0);\n\n\t\t\tlfc_ctl->writes += 1;\n\t\t\tINSTR_TIME_SUBTRACT(io_start, io_end);\n\t\t\ttime_spent_us = INSTR_TIME_GET_MICROSEC(io_start);\n\t\t\tlfc_ctl->time_write += time_spent_us;\n\t\t\tinc_page_cache_write_wait(time_spent_us);\n\n\t\t\tif (--entry->access_count == 0)\n\t\t\t{\n\t\t\t\tlfc_ctl->pinned -= 1;\n\t\t\t\tdlist_push_tail(&lfc_ctl->lru, &entry->list_node);\n\t\t\t}\n\n\t\t\tstate = GET_STATE(entry, chunk_offs);\n\t\t\tif (state == REQUESTED) {\n\t\t\t\tConditionVariableBroadcast(cv);\n\t\t\t}\n\t\t\tif (state != AVAILABLE)\n\t\t\t{\n\t\t\t\tlfc_ctl->used_pages += 1;\n\t\t\t\tSET_STATE(entry, chunk_offs, AVAILABLE);\n\t\t\t}\n\t\t}\n\t\telse\n\t\t{\n\t\t\tlfc_close_file();\n\t\t}\n\t\tLWLockRelease(lfc_lock);\n\t}\n\treturn true;\n}\n\n/*\n * Put page in local file cache.\n * If cache is full then evict some other page.\n */\nvoid\nlfc_writev(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,\n\t\t   const void *const *buffers, BlockNumber nblocks)\n{\n\tBufferTag\ttag;\n\tFileCacheEntry *entry;\n\tssize_t\t\trc;\n\tbool\t\tfound;\n\tuint32\t\thash;\n\tuint64\t\tgeneration;\n\tuint32\t\tentry_offset;\n\tint\t\t\tbuf_offset = 0;\n\n\tif (lfc_maybe_disabled())\t/* fast exit if file cache is disabled */\n\t\treturn;\n\n\tCopyNRelFileInfoToBufTag(tag, rinfo);\n\tCriticalAssert(BufTagGetRelNumber(&tag) != InvalidRelFileNumber);\n\ttag.forkNum = forkNum;\n\n\t/* Update working set size estimate for the blocks */\n\tfor (int i = 0; i < nblocks; i++)\n\t{\n\t\ttag.blockNum = blkno + i;\n\t\taddSHLL(&lfc_ctl->wss_estimation, hash_bytes((uint8_t const*)&tag, sizeof(tag)));\n\t}\n\n\tLWLockAcquire(lfc_lock, LW_EXCLUSIVE);\n\n\tif (!LFC_ENABLED() || !lfc_ensure_opened())\n\t{\n\t\tLWLockRelease(lfc_lock);\n\t\treturn;\n\t}\n\tgeneration = lfc_ctl->generation;\n\n\t/*\n\t * For every chunk that has blocks we're interested in, we\n\t * 1. get the chunk header\n\t * 2. Check if the chunk actually has the blocks we're interested in\n\t * 3. Read the blocks we're looking for (in one preadv), assuming they exist\n\t * 4. Update the statistics for the read call.\n\t *\n\t * If there is an error, we do an early return.\n\t */\n\twhile (nblocks > 0)\n\t{\n\t\tstruct iovec iov[PG_IOV_MAX];\n\t\tint\t\tchunk_offs = BLOCK_TO_CHUNK_OFF(blkno);\n\t\tint\t\tblocks_in_chunk = Min(nblocks, lfc_blocks_per_chunk - chunk_offs);\n\t\tinstr_time io_start, io_end;\n\t\tConditionVariable* cv;\n\n\t\tAssert(blocks_in_chunk > 0);\n\n\t\tfor (int i = 0; i < blocks_in_chunk; i++)\n\t\t{\n\t\t\tiov[i].iov_base = unconstify(void *, buffers[buf_offset + i]);\n\t\t\tiov[i].iov_len = BLCKSZ;\n\t\t}\n\n\t\ttag.blockNum = blkno - chunk_offs;\n\t\thash = get_hash_value(lfc_hash, &tag);\n\t\tcv = &lfc_ctl->cv[hash % N_COND_VARS];\n\n\t\tentry = hash_search_with_hash_value(lfc_hash, &tag, hash, HASH_ENTER, &found);\n\t\tif (found)\n\t\t{\n\t\t\t/*\n\t\t\t * Unlink entry from LRU list to pin it for the duration of IO\n\t\t\t * operation\n\t\t\t */\n\t\t\tif (entry->access_count++ == 0)\n\t\t\t{\n\t\t\t\tlfc_ctl->pinned += 1;\n\t\t\t\tdlist_delete(&entry->list_node);\n\t\t\t}\n\t\t}\n\t\telse\n\t\t{\n\t\t\tif (!lfc_init_new_entry(entry, hash))\n\t\t\t{\n\t\t\t\t/*\n\t\t\t\t * We can't process this chunk due to lack of space in LFC,\n\t\t\t\t * so skip to the next one\n\t\t\t\t */\n\t\t\t\tblkno += blocks_in_chunk;\n\t\t\t\tbuf_offset += blocks_in_chunk;\n\t\t\t\tnblocks -= blocks_in_chunk;\n\t\t\t\tcontinue;\n\t\t\t}\n\t\t}\n\n\t\tentry_offset = entry->offset;\n\n\t\tfor (int i = 0; i < blocks_in_chunk; i++)\n\t\t{\n\t\t\tFileCacheBlockState state = UNAVAILABLE;\n\t\t\tbool sleeping = false;\n\t\t\twhile (lfc_ctl->generation == generation)\n\t\t\t{\n\t\t\t\tstate = GET_STATE(entry, chunk_offs + i);\n\t\t\t\tif (state == PENDING) {\n\t\t\t\t\tSET_STATE(entry, chunk_offs + i, REQUESTED);\n\t\t\t\t} else if (state == UNAVAILABLE) {\n\t\t\t\t\tSET_STATE(entry, chunk_offs + i, PENDING);\n\t\t\t\t\tbreak;\n\t\t\t\t} else if (state == AVAILABLE) {\n\t\t\t\t\tbreak;\n\t\t\t\t}\n\t\t\t\tif (!sleeping)\n\t\t\t\t{\n\t\t\t\t\tConditionVariablePrepareToSleep(cv);\n\t\t\t\t\tsleeping = true;\n\t\t\t\t}\n\t\t\t\tLWLockRelease(lfc_lock);\n\t\t\t\tConditionVariableTimedSleep(cv, CV_WAIT_TIMEOUT, WAIT_EVENT_NEON_LFC_CV_WAIT);\n\t\t\t\tLWLockAcquire(lfc_lock, LW_EXCLUSIVE);\n\t\t\t}\n\t\t\tif (sleeping)\n\t\t\t{\n\t\t\t\tConditionVariableCancelSleep();\n\t\t\t}\n\t\t}\n\t\tLWLockRelease(lfc_lock);\n\n\t\tpgstat_report_wait_start(WAIT_EVENT_NEON_LFC_WRITE);\n\t\tINSTR_TIME_SET_CURRENT(io_start);\n\t\trc = pwritev(lfc_desc, iov, blocks_in_chunk,\n\t\t\t\t\t ((off_t) entry_offset * lfc_blocks_per_chunk + chunk_offs) * BLCKSZ);\n\t\tINSTR_TIME_SET_CURRENT(io_end);\n\t\tpgstat_report_wait_end();\n\n\t\tif (rc != BLCKSZ * blocks_in_chunk)\n\t\t{\n\t\t\tlfc_disable(\"write\");\n\t\t\treturn;\n\t\t}\n\t\telse\n\t\t{\n\t\t\tLWLockAcquire(lfc_lock, LW_EXCLUSIVE);\n\n\t\t\tif (lfc_ctl->generation == generation)\n\t\t\t{\n\t\t\t\tuint64\ttime_spent_us;\n\t\t\t\tCriticalAssert(LFC_ENABLED());\n\t\t\t\t/* Place entry to the head of LRU list */\n\t\t\t\tCriticalAssert(entry->access_count > 0);\n\n\t\t\t\tlfc_ctl->writes += blocks_in_chunk;\n\t\t\t\tINSTR_TIME_SUBTRACT(io_start, io_end);\n\t\t\t\ttime_spent_us = INSTR_TIME_GET_MICROSEC(io_start);\n\t\t\t\tlfc_ctl->time_write += time_spent_us;\n\t\t\t\tinc_page_cache_write_wait(time_spent_us);\n\n\t\t\t\tif (--entry->access_count == 0)\n\t\t\t\t{\n\t\t\t\t\tlfc_ctl->pinned -= 1;\n\t\t\t\t\tdlist_push_tail(&lfc_ctl->lru, &entry->list_node);\n\t\t\t\t}\n\n\t\t\t\tfor (int i = 0; i < blocks_in_chunk; i++)\n\t\t\t\t{\n\t\t\t\t\tFileCacheBlockState state = GET_STATE(entry, chunk_offs + i);\n\t\t\t\t\tif (state == REQUESTED)\n\t\t\t\t\t{\n\t\t\t\t\t\tConditionVariableBroadcast(cv);\n\t\t\t\t\t}\n\t\t\t\t\tif (state != AVAILABLE)\n\t\t\t\t\t{\n\t\t\t\t\t\tlfc_ctl->used_pages += 1;\n\t\t\t\t\t\tSET_STATE(entry, chunk_offs + i, AVAILABLE);\n\t\t\t\t\t}\n\t\t\t\t}\n\t\t\t}\n\t\t\telse\n\t\t\t{\n\t\t\t\t/* stop iteration if LFC was disabled */\n\t\t\t\tlfc_close_file();\n\t\t\t\tbreak;\n\t\t\t}\n\t\t}\n\t\tblkno += blocks_in_chunk;\n\t\tbuf_offset += blocks_in_chunk;\n\t\tnblocks -= blocks_in_chunk;\n\t}\n\tLWLockRelease(lfc_lock);\n}\n\n/*\n * Return metrics about the LFC.\n *\n * The return format is a palloc'd array of LfcStatsEntrys. The size\n * of the returned array is returned in *num_entries.\n */\nLfcStatsEntry *\nlfc_get_stats(size_t *num_entries)\n{\n\tLfcStatsEntry *entries;\n\tsize_t\t\tn = 0;\n\n#define MAX_ENTRIES 10\n\tentries = palloc(sizeof(LfcStatsEntry) * MAX_ENTRIES);\n\n\tentries[n++] = (LfcStatsEntry) {\"file_cache_chunk_size_pages\", lfc_ctl == NULL,\n\t\t\t\t\t\t\t\t\tlfc_ctl ? lfc_blocks_per_chunk : 0 };\n\tentries[n++] = (LfcStatsEntry) {\"file_cache_misses\", lfc_ctl == NULL,\n\t\t\t\t\t\t\t\t\tlfc_ctl ? lfc_ctl->misses : 0};\n\tentries[n++] = (LfcStatsEntry) {\"file_cache_hits\", lfc_ctl == NULL,\n\t\t\t\t\t\t\t\t\tlfc_ctl ? lfc_ctl->hits : 0 };\n\tentries[n++] = (LfcStatsEntry) {\"file_cache_used\", lfc_ctl == NULL,\n\t\t\t\t\t\t\t\t\tlfc_ctl ? lfc_ctl->used : 0 };\n\tentries[n++] = (LfcStatsEntry) {\"file_cache_writes\", lfc_ctl == NULL,\n\t\t\t\t\t\t\t\t\tlfc_ctl ? lfc_ctl->writes : 0 };\n\tentries[n++] = (LfcStatsEntry) {\"file_cache_size\", lfc_ctl == NULL,\n\t\t\t\t\t\t\t\t\tlfc_ctl ? lfc_ctl->size : 0 };\n\tentries[n++] = (LfcStatsEntry) {\"file_cache_used_pages\", lfc_ctl == NULL,\n\t\t\t\t\t\t\t\t\tlfc_ctl ? lfc_ctl->used_pages : 0 };\n\tentries[n++] = (LfcStatsEntry) {\"file_cache_evicted_pages\", lfc_ctl == NULL,\n\t\t\t\t\t\t\t\t\tlfc_ctl ? lfc_ctl->evicted_pages : 0 };\n\tentries[n++] = (LfcStatsEntry) {\"file_cache_limit\", lfc_ctl == NULL,\n\t\t\t\t\t\t\t\t\tlfc_ctl ? lfc_ctl->limit : 0 };\n\tentries[n++] = (LfcStatsEntry) {\"file_cache_chunks_pinned\", lfc_ctl == NULL,\n\t\t\t\t\t\t\t\t\tlfc_ctl ? lfc_ctl->pinned : 0 };\n\tAssert(n <= MAX_ENTRIES);\n#undef MAX_ENTRIES\n\n\t*num_entries = n;\n\treturn entries;\n}\n\n\n/*\n * Function returning data from the local file cache\n * relation node/tablespace/database/blocknum and access_counter\n */\nLocalCachePagesRec *\nlfc_local_cache_pages(size_t *num_entries)\n{\n\tHASH_SEQ_STATUS status;\n\tFileCacheEntry *entry;\n\tsize_t\t\tn_pages;\n\tsize_t\t\tn;\n\tLocalCachePagesRec *result;\n\n\tif (!lfc_ctl)\n\t{\n\t\t*num_entries = 0;\n\t\treturn NULL;\n\t}\n\n\tLWLockAcquire(lfc_lock, LW_SHARED);\n\tif (!LFC_ENABLED())\n\t{\n\t\tLWLockRelease(lfc_lock);\n\t\t*num_entries = 0;\n\t\treturn NULL;\n\t}\n\n\t/* Count the pages first */\n\tn_pages = 0;\n\thash_seq_init(&status, lfc_hash);\n\twhile ((entry = hash_seq_search(&status)) != NULL)\n\t{\n\t\t/* Skip hole tags */\n\t\tif (NInfoGetRelNumber(BufTagGetNRelFileInfo(entry->key)) != 0)\n\t\t{\n\t\t\tfor (int i = 0; i < lfc_blocks_per_chunk; i++)\n\t\t\t\tn_pages += GET_STATE(entry, i) == AVAILABLE;\n\t\t}\n\t}\n\n\tif (n_pages == 0)\n\t{\n\t\tLWLockRelease(lfc_lock);\n\t\t*num_entries = 0;\n\t\treturn NULL;\n\t}\n\n\tresult = (LocalCachePagesRec *)\n\t\tMemoryContextAllocHuge(CurrentMemoryContext,\n\t\t\t\t\t\t\t   sizeof(LocalCachePagesRec) * n_pages);\n\n\t/*\n\t * Scan through all the cache entries, saving the relevant fields\n\t * in the result structure.\n\t */\n\tn = 0;\n\thash_seq_init(&status, lfc_hash);\n\twhile ((entry = hash_seq_search(&status)) != NULL)\n\t{\n\t\tfor (int i = 0; i < lfc_blocks_per_chunk; i++)\n\t\t{\n\t\t\tif (NInfoGetRelNumber(BufTagGetNRelFileInfo(entry->key)) != 0)\n\t\t\t{\n\t\t\t\tif (GET_STATE(entry, i) == AVAILABLE)\n\t\t\t\t{\n\t\t\t\t\tresult[n].pageoffs = entry->offset * lfc_blocks_per_chunk + i;\n\t\t\t\t\tresult[n].relfilenode = NInfoGetRelNumber(BufTagGetNRelFileInfo(entry->key));\n\t\t\t\t\tresult[n].reltablespace = NInfoGetSpcOid(BufTagGetNRelFileInfo(entry->key));\n\t\t\t\t\tresult[n].reldatabase = NInfoGetDbOid(BufTagGetNRelFileInfo(entry->key));\n\t\t\t\t\tresult[n].forknum = entry->key.forkNum;\n\t\t\t\t\tresult[n].blocknum = entry->key.blockNum + i;\n\t\t\t\t\tresult[n].accesscount = entry->access_count;\n\t\t\t\t\tn += 1;\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\t}\n\tAssert(n_pages == n);\n\tLWLockRelease(lfc_lock);\n\n\t*num_entries = n_pages;\n\treturn result;\n}\n\n/*\n * Internal implementation of the approximate_working_set_size_seconds()\n * function.\n */\nint32\nlfc_approximate_working_set_size_seconds(time_t duration, bool reset)\n{\n\tint32\t\tdc;\n\n\tif (lfc_size_limit == 0)\n\t\treturn -1;\n\n\tdc = (int32) estimateSHLL(&lfc_ctl->wss_estimation, duration);\n\tif (reset)\n\t\tmemset(lfc_ctl->wss_estimation.regs, 0, sizeof lfc_ctl->wss_estimation.regs);\n\treturn dc;\n}\n\n/*\n * Get metrics, for the built-in metrics exporter that's part of the communicator\n * process.\n *\n * NB: This is called from a Rust tokio task inside the communicator process.\n * Acquiring lwlocks, elog(), allocating memory or anything else non-trivial\n * is strictly prohibited here!\n */\nstruct LfcMetrics\ncallback_get_lfc_metrics_unsafe(void)\n{\n\tstruct LfcMetrics result = {\n\t\t.lfc_cache_size_limit = (int64) lfc_size_limit * 1024 * 1024,\n\t\t.lfc_hits = lfc_ctl ? lfc_ctl->hits : 0,\n\t\t.lfc_misses = lfc_ctl ? lfc_ctl->misses : 0,\n\t\t.lfc_used = lfc_ctl ? lfc_ctl->used : 0,\n\t\t.lfc_writes = lfc_ctl ? lfc_ctl->writes : 0,\n\t};\n\n\tif (lfc_ctl)\n\t{\n\t\tfor (int minutes = 1; minutes <= 60; minutes++)\n\t\t{\n\t\t\tresult.lfc_approximate_working_set_size_windows[minutes - 1] =\n\t\t\t\tlfc_approximate_working_set_size_seconds(minutes * 60, false);\n\t\t}\n\t}\n\n\treturn result;\n}\n\n\nPG_FUNCTION_INFO_V1(get_local_cache_state);\n\nDatum\nget_local_cache_state(PG_FUNCTION_ARGS)\n{\n\tsize_t max_entries = PG_ARGISNULL(0) ? lfc_prewarm_limit : PG_GETARG_INT32(0);\n\tFileCacheState* fcs = lfc_get_state(max_entries);\n\tif (fcs != NULL)\n\t\tPG_RETURN_BYTEA_P((bytea*)fcs);\n\telse\n\t\tPG_RETURN_NULL();\n}\n\nPG_FUNCTION_INFO_V1(prewarm_local_cache);\n\nDatum\nprewarm_local_cache(PG_FUNCTION_ARGS)\n{\n\tbytea* state = PG_GETARG_BYTEA_PP(0);\n\tuint32 n_workers =  PG_GETARG_INT32(1);\n\tFileCacheState* fcs = (FileCacheState*)state;\n\n\tlfc_prewarm(fcs, n_workers);\n\n\tPG_RETURN_NULL();\n}\n\nPG_FUNCTION_INFO_V1(get_prewarm_info);\n\nDatum\nget_prewarm_info(PG_FUNCTION_ARGS)\n{\n\tDatum\t\tvalues[4];\n\tbool\t\tnulls[4];\n\tTupleDesc\ttupdesc;\n\tuint32 prewarmed_pages = 0;\n\tuint32 skipped_pages = 0;\n\tuint32 active_workers = 0;\n\tuint32 total_pages;\n\tsize_t n_workers;\n\n\tif (lfc_size_limit == 0)\n\t\tPG_RETURN_NULL();\n\n\tLWLockAcquire(lfc_lock, LW_SHARED);\n\tif (!lfc_ctl || lfc_ctl->n_prewarm_workers == 0)\n\t{\n\t\tLWLockRelease(lfc_lock);\n\t\tPG_RETURN_NULL();\n\t}\n\tn_workers = lfc_ctl->n_prewarm_workers;\n\ttotal_pages = lfc_ctl->total_prewarm_pages;\n\tfor (size_t i = 0; i < n_workers; i++)\n\t{\n\t\tPrewarmWorkerState* ws = &lfc_ctl->prewarm_workers[i];\n\t\tprewarmed_pages += ws->prewarmed_pages;\n\t\tskipped_pages += ws->skipped_pages;\n\t\tactive_workers += ws->completed != 0;\n\t}\n\tLWLockRelease(lfc_lock);\n\n\ttupdesc = CreateTemplateTupleDesc(4);\n\tTupleDescInitEntry(tupdesc, (AttrNumber) 1, \"total_pages\", INT4OID, -1, 0);\n\tTupleDescInitEntry(tupdesc, (AttrNumber) 2, \"prewarmed_pages\", INT4OID, -1, 0);\n\tTupleDescInitEntry(tupdesc, (AttrNumber) 3, \"skipped_pages\", INT4OID, -1, 0);\n\tTupleDescInitEntry(tupdesc, (AttrNumber) 4, \"active_workers\", INT4OID, -1, 0);\n\ttupdesc = BlessTupleDesc(tupdesc);\n\n\tMemSet(nulls, 0, sizeof(nulls));\n\n\tvalues[0] = Int32GetDatum(total_pages);\n\tvalues[1] = Int32GetDatum(prewarmed_pages);\n\tvalues[2] = Int32GetDatum(skipped_pages);\n\tvalues[3] = Int32GetDatum(active_workers);\n\n\tPG_RETURN_DATUM(HeapTupleGetDatum(heap_form_tuple(tupdesc, values, nulls)));\n}\n"
  },
  {
    "path": "pgxn/neon/file_cache.h",
    "content": "/*-------------------------------------------------------------------------\n *\n * file_cache.h\n *\t  Local File Cache definitions\n *\n * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group\n * Portions Copyright (c) 1994, Regents of the University of California\n *\n *-------------------------------------------------------------------------\n */\n#ifndef FILE_CACHE_h\n#define FILE_CACHE_h\n\n#include \"neon_pgversioncompat.h\"\n\ntypedef struct FileCacheState\n{\n\tint32\t\tvl_len_;\t\t/* varlena header (do not touch directly!) */\n\tuint32\t\tmagic;\n\tuint32\t\tn_chunks;\n\tuint32\t\tn_pages;\n\tuint16\t\tchunk_size_log;\n\tBufferTag\tchunks[FLEXIBLE_ARRAY_MEMBER];\n\t/* followed by bitmap */\n} FileCacheState;\n\n/* GUCs */\nextern bool lfc_store_prefetch_result;\n\n/* functions for local file cache */\nextern void lfc_invalidate(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber nblocks);\nextern void lfc_writev(NRelFileInfo rinfo, ForkNumber forkNum,\n\t\t\t\t\t   BlockNumber blkno, const void *const *buffers,\n\t\t\t\t\t   BlockNumber nblocks);\n/* returns number of blocks read, with one bit set in *read for each  */\nextern int lfc_readv_select(NRelFileInfo rinfo, ForkNumber forkNum,\n\t\t\t\t\t\t\tBlockNumber blkno, void **buffers,\n\t\t\t\t\t\t\tBlockNumber nblocks, bits8 *mask);\n\nextern bool lfc_cache_contains(NRelFileInfo rinfo, ForkNumber forkNum,\n\t\t\t\t\t\t\t   BlockNumber blkno);\nextern int lfc_cache_containsv(NRelFileInfo rinfo, ForkNumber forkNum,\n\t\t\t\t\t\t\t   BlockNumber blkno, int nblocks, bits8 *bitmap);\nextern void lfc_init(void);\nextern bool lfc_prefetch(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blkno,\n\t\t\t\t\t\t const void* buffer, XLogRecPtr lsn);\nextern FileCacheState* lfc_get_state(size_t max_entries);\nextern void lfc_prewarm(FileCacheState* fcs, uint32 n_workers);\n\ntypedef struct LfcStatsEntry\n{\n\tconst char *metric_name;\n\tbool\t\tisnull;\n\tuint64\t\tvalue;\n} LfcStatsEntry;\nextern LfcStatsEntry *lfc_get_stats(size_t *num_entries);\n\ntypedef struct\n{\n\tuint32\t\tpageoffs;\n\tOid\t\t\trelfilenode;\n\tOid\t\t\treltablespace;\n\tOid\t\t\treldatabase;\n\tForkNumber\tforknum;\n\tBlockNumber blocknum;\n\tuint16\t\taccesscount;\n} LocalCachePagesRec;\nextern LocalCachePagesRec *lfc_local_cache_pages(size_t *num_entries);\n\nextern int32 lfc_approximate_working_set_size_seconds(time_t duration, bool reset);\n\n\nstatic inline bool\nlfc_read(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,\n\t\t void *buffer)\n{\n\tbits8\t\trv = 0;\n\treturn lfc_readv_select(rinfo, forkNum, blkno, &buffer, 1, &rv) == 1;\n}\n\nstatic inline void\nlfc_write(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,\n\t\t  const void *buffer)\n{\n\treturn lfc_writev(rinfo, forkNum, blkno, &buffer, 1);\n}\n\n#endif\t\t\t\t\t\t\t/* FILE_CACHE_H */\n"
  },
  {
    "path": "pgxn/neon/hll.c",
    "content": "/*-------------------------------------------------------------------------\n *\n * hll.c\n *\t  Sliding HyperLogLog cardinality estimator\n *\n * Portions Copyright (c) 2014-2023, PostgreSQL Global Development Group\n *\n * Implements https://hal.science/hal-00465313/document\n * \n * Based on Hideaki Ohno's C++ implementation.  This is probably not ideally\n * suited to estimating the cardinality of very large sets;  in particular, we\n * have not attempted to further optimize the implementation as described in\n * the Heule, Nunkesser and Hall paper \"HyperLogLog in Practice: Algorithmic\n * Engineering of a State of The Art Cardinality Estimation Algorithm\".\n *\n * A sparse representation of HyperLogLog state is used, with fixed space\n * overhead.\n *\n * The copyright terms of Ohno's original version (the MIT license) follow.\n *\n * IDENTIFICATION\n *\t  src/backend/lib/hyperloglog.c\n *\n *-------------------------------------------------------------------------\n */\n\n/*\n * Copyright (c) 2013 Hideaki Ohno <hide.o.j55{at}gmail.com>\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the 'Software'), to\n * deal in the Software without restriction, including without limitation the\n * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or\n * sell copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING\n * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS\n * IN THE SOFTWARE.\n */\n\n#include <math.h>\n\n#include \"postgres.h\"\n#include \"funcapi.h\"\n#include \"port/pg_bitutils.h\"\n#include \"utils/timestamp.h\"\n#include \"hll.h\"\n\n\n#define POW_2_32\t\t\t(4294967296.0)\n#define NEG_POW_2_32\t\t(-4294967296.0)\n\n#define ALPHA_MM ((0.7213 / (1.0 + 1.079 / HLL_N_REGISTERS)) * HLL_N_REGISTERS * HLL_N_REGISTERS)\n\n/*\n * Worker for addHyperLogLog().\n *\n * Calculates the position of the first set bit in first b bits of x argument\n * starting from the first, reading from most significant to least significant\n * bits.\n *\n * Example (when considering fist 10 bits of x):\n *\n * rho(x = 0b1000000000)   returns 1\n * rho(x = 0b0010000000)   returns 3\n * rho(x = 0b0000000000)   returns b + 1\n *\n * \"The binary address determined by the first b bits of x\"\n *\n * Return value \"j\" used to index bit pattern to watch.\n */\nstatic inline uint8\nrho(uint32 x, uint8 b)\n{\n\tuint8\t\tj = 1;\n\n\tif (x == 0)\n\t\treturn b + 1;\n\n\tj = 32 - pg_leftmost_one_pos32(x);\n\n\tif (j > b)\n\t\treturn b + 1;\n\n\treturn j;\n}\n\n/*\n * Initialize HyperLogLog track state\n */\nvoid\ninitSHLL(HyperLogLogState *cState)\n{\n\tmemset(cState->regs, 0, sizeof(cState->regs));\n}\n\n/*\n * Adds element to the estimator, from caller-supplied hash.\n *\n * It is critical that the hash value passed be an actual hash value, typically\n * generated using hash_any().  The algorithm relies on a specific bit-pattern\n * observable in conjunction with stochastic averaging.  There must be a\n * uniform distribution of bits in hash values for each distinct original value\n * observed.\n */\nvoid\naddSHLL(HyperLogLogState *cState, uint32 hash)\n{\n\tuint8\t\tcount;\n\tuint32\t\tindex;\n\n\tTimestampTz\tnow = GetCurrentTimestamp();\n\t/* Use the first \"k\" (registerWidth) bits as a zero based index */\n\tindex = hash >> HLL_C_BITS;\n\n\t/* Compute the rank of the remaining 32 - \"k\" (registerWidth) bits */\n\tcount = rho(hash << HLL_BIT_WIDTH, HLL_C_BITS) - 1;\n\tAssert(count <= HLL_C_BITS);\n\tcState->regs[index][count] = now;\n}\n\nstatic uint8\ngetMaximum(const TimestampTz* reg, TimestampTz since)\n{\n\tuint8 max = 0;\n\n\tfor (size_t i = 0; i < HLL_C_BITS + 1; i++)\n\t{\n\t\tif (reg[i] >= since)\n\t\t{\n\t\t\tmax = i + 1;\n\t\t}\n\t}\n\n\treturn max;\n}\n\n\n/*\n * Estimates cardinality, based on elements added so far\n */\ndouble\nestimateSHLL(HyperLogLogState *cState, time_t duration)\n{\n\tdouble\t\tresult;\n\tdouble\t\tsum = 0.0;\n\tsize_t\t\ti;\n\tuint8       R[HLL_N_REGISTERS];\n\t/* 0 indicates uninitialized timestamp, so if we need to cover the whole range than starts with 1 */\n\tTimestampTz since = duration == (time_t)-1 ? 1 : GetCurrentTimestamp() - duration * USECS_PER_SEC;\n\n\tfor (i = 0; i < HLL_N_REGISTERS; i++)\n\t{\n\t\tR[i] = getMaximum(cState->regs[i], since);\n\t\tsum += 1.0 / pow(2.0, R[i]);\n\t}\n\n\t/* result set to \"raw\" HyperLogLog estimate (E in the HyperLogLog paper) */\n\tresult = ALPHA_MM / sum;\n\n\tif (result <= (5.0 / 2.0) * HLL_N_REGISTERS)\n\t{\n\t\t/* Small range correction */\n\t\tint\t\t\tzero_count = 0;\n\n\t\tfor (i = 0; i < HLL_N_REGISTERS; i++)\n\t\t{\n\t\t\tzero_count += R[i] == 0;\n\t\t}\n\n\t\tif (zero_count != 0)\n\t\t\tresult = HLL_N_REGISTERS * log((double) HLL_N_REGISTERS /\n\t\t\t\t\t\t\t\t\t\t   zero_count);\n\t}\n\telse if (result > (1.0 / 30.0) * POW_2_32)\n\t{\n\t\t/* Large range correction */\n\t\tresult = NEG_POW_2_32 * log(1.0 - (result / POW_2_32));\n\t}\n\n\treturn result;\n}\n\n"
  },
  {
    "path": "pgxn/neon/hll.h",
    "content": "/*-------------------------------------------------------------------------\n *\n * hll.h\n *\t  Sliding HyperLogLog cardinality estimator\n *\n * Portions Copyright (c) 2014-2023, PostgreSQL Global Development Group\n *\n * Implements https://hal.science/hal-00465313/document\n * \n * Based on Hideaki Ohno's C++ implementation.  This is probably not ideally\n * suited to estimating the cardinality of very large sets;  in particular, we\n * have not attempted to further optimize the implementation as described in\n * the Heule, Nunkesser and Hall paper \"HyperLogLog in Practice: Algorithmic\n * Engineering of a State of The Art Cardinality Estimation Algorithm\".\n *\n * A sparse representation of HyperLogLog state is used, with fixed space\n * overhead.\n *\n * The copyright terms of Ohno's original version (the MIT license) follow.\n *\n * IDENTIFICATION\n *\t  src/backend/lib/hyperloglog.c\n *\n *-------------------------------------------------------------------------\n */\n\n/*\n * Copyright (c) 2013 Hideaki Ohno <hide.o.j55{at}gmail.com>\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the 'Software'), to\n * deal in the Software without restriction, including without limitation the\n * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or\n * sell copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING\n * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS\n * IN THE SOFTWARE.\n */\n\n#ifndef HLL_H\n#define HLL_H\n\n#define HLL_BIT_WIDTH   10\n#define HLL_C_BITS      (32 - HLL_BIT_WIDTH)\n#define HLL_N_REGISTERS (1 << HLL_BIT_WIDTH)\n\n/*\n * HyperLogLog is an approximate technique for computing the number of distinct\n * entries in a set.  Importantly, it does this by using a fixed amount of\n * memory.  See the 2007 paper \"HyperLogLog: the analysis of a near-optimal\n * cardinality estimation algorithm\" for more.\n *\n * Instead of a single counter for every bits register, we have a timestamp\n * for every valid number of bits we can encounter. Every time we encounter\n * a certain number of bits, we update the timestamp in those registers to\n * the current timestamp.\n *\n * We can query the sketch's stored cardinality for the range of some timestamp\n * up to now: For each register, we return the highest bits bucket that has a\n * modified timestamp >= the query timestamp. This value is the number of bits\n * for this register in the normal HLL calculation.\n *\n * The memory usage is 2^B * (C + 1) * sizeof(TimetampTz), or 184kiB.\n * Usage could be halved if we decide to reduce the required time dimension\n * precision; as 32 bits in second precision should be enough for statistics.\n * However, that is not yet implemented.\n */\ntypedef struct HyperLogLogState\n{\n\tTimestampTz regs[HLL_N_REGISTERS][HLL_C_BITS + 1];\n} HyperLogLogState;\n\nextern void   initSHLL(HyperLogLogState *cState);\nextern void   addSHLL(HyperLogLogState *cState, uint32 hash);\nextern double estimateSHLL(HyperLogLogState *cState, time_t dutration);\n\n#endif\n"
  },
  {
    "path": "pgxn/neon/libpagestore.c",
    "content": "/*-------------------------------------------------------------------------\n *\n * libpagestore.c\n *\t  Handles network communications with the remote pagestore.\n *\n * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group\n * Portions Copyright (c) 1994, Regents of the University of California\n *\n *-------------------------------------------------------------------------\n */\n#include \"postgres.h\"\n\n#include <math.h>\n#include <sys/socket.h>\n\n#include <curl/curl.h>\n\n#include \"libpq-int.h\"\n\n#include \"access/xlog.h\"\n#include \"common/hashfn.h\"\n#include \"fmgr.h\"\n#include \"libpq-fe.h\"\n#include \"libpq/libpq.h\"\n#include \"libpq/pqformat.h\"\n#include \"miscadmin.h\"\n#include \"pgstat.h\"\n#include \"portability/instr_time.h\"\n#include \"postmaster/interrupt.h\"\n#include \"storage/buf_internals.h\"\n#include \"storage/fd.h\"\n#include \"storage/ipc.h\"\n#include \"storage/lwlock.h\"\n#include \"storage/pg_shmem.h\"\n#include \"utils/guc.h\"\n#include \"utils/memutils.h\"\n\n#include \"neon.h\"\n#include \"neon_perf_counters.h\"\n#include \"neon_utils.h\"\n#include \"pagestore_client.h\"\n#include \"walproposer.h\"\n\n#ifdef __linux__\n#include <sys/ioctl.h>\n#include <linux/sockios.h>\n#endif\n\n#define PageStoreTrace DEBUG5\n\n#define MIN_RECONNECT_INTERVAL_USEC 1000\n#define MAX_RECONNECT_INTERVAL_USEC 1000000\n\nenum NeonComputeMode {\n\tCP_MODE_PRIMARY = 0,\n\tCP_MODE_REPLICA,\n\tCP_MODE_STATIC\n};\n\nstatic const struct config_enum_entry neon_compute_modes[] = {\n\t{\"primary\", CP_MODE_PRIMARY, false},\n\t{\"replica\", CP_MODE_REPLICA, false},\n\t{\"static\", CP_MODE_STATIC, false},\n\t{NULL, 0, false}\n};\n\n/* GUCs */\nchar\t   *neon_timeline;\nchar\t   *neon_tenant;\nchar\t   *neon_project_id;\nchar\t   *neon_branch_id;\nchar\t   *neon_endpoint_id;\nint32\t\tmax_cluster_size;\nchar\t   *pageserver_connstring;\nchar\t   *neon_auth_token;\n\nint\t\t\treadahead_buffer_size = 128;\nint\t\t\tflush_every_n_requests = 8;\n\nint         neon_protocol_version = 3;\n\nstatic int\tneon_compute_mode = 0;\nstatic int\tmax_reconnect_attempts = 60;\nstatic int\tstripe_size;\nstatic int\tmax_sockets;\n\nstatic int pageserver_response_log_timeout = 10000;\n/* 2.5 minutes. A bit higher than highest default TCP retransmission timeout */\nstatic int pageserver_response_disconnect_timeout = 150000;\n\nstatic int\tconf_refresh_reconnect_attempt_threshold = 16;\n// Hadron: timeout for refresh errors (1 minute)\nstatic uint64 \tkRefreshErrorTimeoutUSec = 1 * USECS_PER_MINUTE;\n\ntypedef struct\n{\n\tchar\t\tconnstring[MAX_SHARDS][MAX_PAGESERVER_CONNSTRING_SIZE];\n\tsize_t\t\tnum_shards;\n\tsize_t\t\tstripe_size;\n} ShardMap;\n\n/*\n * PagestoreShmemState is kept in shared memory. It contains the connection\n * strings for each shard.\n *\n * The \"neon.pageserver_connstring\" GUC is marked with the PGC_SIGHUP option,\n * allowing it to be changed using pg_reload_conf(). The control plane can\n * update the connection string if the pageserver crashes, is relocated, or\n * new shards are added. A parsed copy of the current value of the GUC is kept\n * in shared memory, updated by the postmaster, because regular backends don't\n * reload the config during query execution, but we might need to re-establish\n * the pageserver connection with the new connection string even in the middle\n * of a query.\n *\n * The shared memory copy is protected by a lockless algorithm using two\n * atomic counters. The counters allow a backend to quickly check if the value\n * has changed since last access, and to detect and retry copying the value if\n * the postmaster changes the value concurrently. (Postmaster doesn't have a\n * PGPROC entry and therefore cannot use LWLocks.)\n *\n * stripe_size is now also part of ShardMap, although it is defined by separate GUC.\n * Postgres doesn't provide any mechanism to enforce dependencies between GUCs,\n * that it we we have to rely on order of GUC definition in config file.\n * \"neon.stripe_size\" should be defined prior to \"neon.pageserver_connstring\"\n */\ntypedef struct\n{\n\tpg_atomic_uint64 begin_update_counter;\n\tpg_atomic_uint64 end_update_counter;\n\tShardMap\tshard_map;\n} PagestoreShmemState;\n\nstatic PagestoreShmemState *pagestore_shared;\nstatic uint64 pagestore_local_counter = 0;\n\ntypedef enum PSConnectionState {\n\tPS_Disconnected,\t\t\t/* no connection yet */\n\tPS_Connecting_Startup,\t\t/* connection starting up */\n\tPS_Connecting_PageStream,\t/* negotiating pagestream */\n\tPS_Connected,\t\t\t\t/* connected, pagestream established */\n} PSConnectionState;\n\n/* This backend's per-shard connections */\ntypedef struct\n{\n\tTimestampTz\t\tlast_connect_time; /* read-only debug value */\n\tTimestampTz\t\tlast_reconnect_time;\n\tuint32\t\t\tdelay_us;\n\tint\t\t\t\tn_reconnect_attempts;\n\n\t/*---\n\t * Pageserver connection state, i.e.\n\t *\tdisconnected: conn == NULL, wes == NULL;\n\t *\tconn_startup: connection initiated, waiting for connection establishing\n\t *\tconn_ps:      PageStream query sent, waiting for confirmation\n\t *\tconnected:    PageStream established\n\t */\n\tPSConnectionState state;\n\tPGconn\t\t   *conn;\n\n\t/* request / response counters for debugging */\n\tuint64\t\t\tnrequests_sent;\n\tuint64\t\t\tnresponses_received;\n\n\t/* State for the receive timeout mechanism in call_PQgetCopyData() */\n\tinstr_time\t\treceive_start_time;\t\t\t/* when we started waiting */\n\tinstr_time\t\treceive_last_log_time;\t\t/* when we last printed a log message for the wait */\n\tbool\t\t\treceive_logged;\t\t\t\t/* has the wait been logged */\n\n\t/*---\n\t * WaitEventSet containing:\n\t *\t- WL_SOCKET_READABLE on 'conn'\n\t *\t- WL_LATCH_SET on MyLatch, and\n\t *\t- WL_EXIT_ON_PM_DEATH.\n\t */\n\tWaitEventSet   *wes_read;\n} PageServer;\n\nstatic uint32 local_request_counter;\n#define GENERATE_REQUEST_ID() (((NeonRequestId)MyProcPid << 32) | ++local_request_counter)\n\nstatic PageServer page_servers[MAX_SHARDS];\n\nstatic bool pageserver_flush(shardno_t shard_no);\nstatic void pageserver_disconnect(shardno_t shard_no);\nstatic void pageserver_disconnect_shard(shardno_t shard_no);\n// HADRON\nshardno_t get_num_shards(void);\n\nstatic bool\nPagestoreShmemIsValid(void)\n{\n\treturn pagestore_shared && UsedShmemSegAddr;\n}\n\n/*\n * Parse a comma-separated list of connection strings into a ShardMap.\n *\n * If 'result' is NULL, just checks that the input is valid. If the input is\n * not valid, returns false. The contents of *result are undefined in\n * that case, and must not be relied on.\n */\nstatic bool\nParseShardMap(const char *connstr, ShardMap *result)\n{\n\tconst char *p;\n\tint\t\t\tnshards = 0;\n\n\tif (result)\n\t\tmemset(result, 0, sizeof(ShardMap));\n\n\tp = connstr;\n\tnshards = 0;\n\tfor (;;)\n\t{\n\t\tconst char *sep;\n\t\tsize_t\t\tconnstr_len;\n\n\t\tsep = strchr(p, ',');\n\t\tconnstr_len = sep != NULL ? sep - p : strlen(p);\n\n\t\tif (connstr_len == 0 && sep == NULL)\n\t\t\tbreak;\t\t\t\t/* ignore trailing comma */\n\n\t\tif (nshards >= MAX_SHARDS)\n\t\t{\n\t\t\tneon_log(LOG, \"Too many shards\");\n\t\t\treturn false;\n\t\t}\n\t\tif (connstr_len >= MAX_PAGESERVER_CONNSTRING_SIZE)\n\t\t{\n\t\t\tneon_log(LOG, \"Connection string too long\");\n\t\t\treturn false;\n\t\t}\n\t\tif (result)\n\t\t{\n\t\t\tmemcpy(result->connstring[nshards], p, connstr_len);\n\t\t\tresult->connstring[nshards][connstr_len] = '\\0';\n\t\t}\n\t\tnshards++;\n\n\t\tif (sep == NULL)\n\t\t\tbreak;\n\t\tp = sep + 1;\n\t}\n\tif (result)\n\t{\n\t\tresult->num_shards = nshards;\n\t\tresult->stripe_size = stripe_size;\n\t}\n\n\treturn true;\n}\n\nstatic bool\nCheckPageserverConnstring(char **newval, void **extra, GucSource source)\n{\n\tchar\t   *p = *newval;\n\n\treturn ParseShardMap(p, NULL);\n}\n\nstatic void\nAssignPageserverConnstring(const char *newval, void *extra)\n{\n\tShardMap\tshard_map;\n\n\t/*\n\t * Only postmaster updates the copy in shared memory.\n\t */\n\tif (!PagestoreShmemIsValid() || IsUnderPostmaster)\n\t\treturn;\n\n\tif (!ParseShardMap(newval, &shard_map))\n\t{\n\t\t/*\n\t\t * shouldn't happen, because we already checked the value in\n\t\t * CheckPageserverConnstring\n\t\t */\n\t\telog(ERROR, \"could not parse shard map\");\n\t}\n\n\tif (memcmp(&pagestore_shared->shard_map, &shard_map, sizeof(ShardMap)) != 0)\n\t{\n\t\tpg_atomic_add_fetch_u64(&pagestore_shared->begin_update_counter, 1);\n\t\tpg_write_barrier();\n\t\tmemcpy(&pagestore_shared->shard_map, &shard_map, sizeof(ShardMap));\n\t\tpg_write_barrier();\n\t\tpg_atomic_add_fetch_u64(&pagestore_shared->end_update_counter, 1);\n\t}\n\telse\n\t{\n\t\t/* no change */\n\t}\n}\n\n/* BEGIN_HADRON */\n/**\n * Return the total number of shards seen in the shard map.\n */\nshardno_t get_num_shards(void)\n{\n\tconst ShardMap *shard_map;\n\n\tAssert(pagestore_shared);\n\tshard_map = &pagestore_shared->shard_map;\n\n\tAssert(shard_map != NULL);\n\treturn shard_map->num_shards;\n}\n/* END_HADRON */\n\n/*\n * Get the current number of shards, and/or the connection string for a\n * particular shard from the shard map in shared memory.\n *\n * If num_shards_p is not NULL, it is set to the current number of shards.\n *\n * If connstr_p is not NULL, the connection string for 'shard_no' is copied to\n * it. It must point to a buffer at least MAX_PAGESERVER_CONNSTRING_SIZE bytes\n * long.\n *\n * As a side-effect, if the shard map in shared memory had changed since the\n * last call, terminates all existing connections to all pageservers.\n */\nstatic void\nload_shard_map(shardno_t shard_no, char *connstr_p, shardno_t *num_shards_p, size_t* stripe_size_p)\n{\n\tuint64\t\tbegin_update_counter;\n\tuint64\t\tend_update_counter;\n\tShardMap   *shard_map = &pagestore_shared->shard_map;\n\tshardno_t\tnum_shards;\n\tsize_t\t\tstripe_size;\n\n\t/*\n\t * Postmaster can update the shared memory values concurrently, in which\n\t * case we would copy a garbled mix of the old and new values. We will\n\t * detect it because the counter's won't match, and retry. But it's\n\t * important that we don't do anything within the retry-loop that would\n\t * depend on the string having valid contents.\n\t */\n\tdo\n\t{\n\t\tbegin_update_counter = pg_atomic_read_u64(&pagestore_shared->begin_update_counter);\n\t\tend_update_counter = pg_atomic_read_u64(&pagestore_shared->end_update_counter);\n\n\t\tnum_shards = shard_map->num_shards;\n\t\tstripe_size = shard_map->stripe_size;\n\t\tif (connstr_p && shard_no < MAX_SHARDS)\n\t\t\tstrlcpy(connstr_p, shard_map->connstring[shard_no], MAX_PAGESERVER_CONNSTRING_SIZE);\n\t\tpg_memory_barrier();\n\t}\n\twhile (begin_update_counter != end_update_counter\n\t\t   || begin_update_counter != pg_atomic_read_u64(&pagestore_shared->begin_update_counter)\n\t\t   || end_update_counter != pg_atomic_read_u64(&pagestore_shared->end_update_counter));\n\n\tif (connstr_p && shard_no >= num_shards)\n\t\tneon_log(ERROR, \"Shard %d is greater or equal than number of shards %d\",\n\t\t\t\t shard_no, num_shards);\n\n\t/*\n\t * If any of the connection strings changed, reset all connections.\n\t */\n\tif (pagestore_local_counter != end_update_counter)\n\t{\n\t\tfor (shardno_t i = 0; i < MAX_SHARDS; i++)\n\t\t{\n\t\t\tif (page_servers[i].conn)\n\t\t\t\tpageserver_disconnect(i);\n\t\t}\n\t\tpagestore_local_counter = end_update_counter;\n\n        /* Reserve file descriptors for sockets */\n\t\twhile (max_sockets < num_shards)\n\t\t{\n\t\t\tmax_sockets += 1;\n\t\t\tReserveExternalFD();\n\t\t}\n\t}\n\n\tif (num_shards_p)\n\t\t*num_shards_p = num_shards;\n\tif (stripe_size_p)\n\t\t*stripe_size_p = stripe_size;\n}\n\n#define MB (1024*1024)\n\nshardno_t\nget_shard_number(BufferTag *tag)\n{\n\tshardno_t\tn_shards;\n\tsize_t\t\tstripe_size;\n\tuint32\t\thash;\n\n\tload_shard_map(0, NULL, &n_shards, &stripe_size);\n\n#if PG_MAJORVERSION_NUM < 16\n\thash = murmurhash32(tag->rnode.relNode);\n\thash = hash_combine(hash, murmurhash32(tag->blockNum / stripe_size));\n#else\n\thash = murmurhash32(tag->relNumber);\n\thash = hash_combine(hash, murmurhash32(tag->blockNum / stripe_size));\n#endif\n\n\treturn hash % n_shards;\n}\n\nstatic inline void\nCLEANUP_AND_DISCONNECT(PageServer *shard)\n{\n\tif (shard->wes_read)\n\t{\n\t\tFreeWaitEventSet(shard->wes_read);\n\t\tshard->wes_read = NULL;\n\t}\n\tif (shard->conn)\n\t{\n\t\tMyNeonCounters->pageserver_disconnects_total++;\n\t\tPQfinish(shard->conn);\n\t\tshard->conn = NULL;\n\t}\n\n\tshard->state = PS_Disconnected;\n}\n\n/*\n * Connect to a pageserver, or continue to try to connect if we're yet to\n * complete the connection (e.g. due to receiving an earlier cancellation\n * during connection start).\n * Returns true if successfully connected; false if the connection failed.\n *\n * Throws errors in unrecoverable situations, or when this backend's query\n * is canceled.\n */\nstatic bool\npageserver_connect(shardno_t shard_no, int elevel)\n{\n\tPageServer *shard = &page_servers[shard_no];\n\tchar\t\tconnstr[MAX_PAGESERVER_CONNSTRING_SIZE];\n\n\t/*\n\t * Get the connection string for this shard. If the shard map has been\n\t * updated since we last looked, this will also disconnect any existing\n\t * pageserver connections as a side effect.\n\t * Note that connstr is used both during connection start, and when we\n\t * log the successful connection.\n\t */\n\tload_shard_map(shard_no, connstr, NULL, NULL);\n\n\tswitch (shard->state)\n\t{\n\tcase PS_Disconnected:\n\t{\n\t\tconst char *keywords[5];\n\t\tconst char *values[5];\n\t\tchar pid_str[16] = { 0 };\n\t\tchar endpoint_str[36] = { 0 };\n\t\tint\t\t\tn_pgsql_params;\n\t\tTimestampTz\tnow;\n\t\tint64\t\tus_since_last_attempt;\n\n\t\t/* Make sure we start with a clean slate */\n\t\tCLEANUP_AND_DISCONNECT(shard);\n\n\t\tneon_shard_log(shard_no, DEBUG5, \"Connection state: Disconnected\");\n\n\t\tnow = GetCurrentTimestamp();\n\t\tus_since_last_attempt = (int64) (now - shard->last_reconnect_time);\n\n\t\t/*\n\t\t * Make sure we don't do exponential backoff with a constant multiplier\n\t\t * of 0 us, as that doesn't really do much for timeouts...\n\t\t *\n\t\t * cf. https://github.com/neondatabase/neon/issues/7897\n\t\t */\n\t\tif (shard->delay_us == 0)\n\t\t\tshard->delay_us = MIN_RECONNECT_INTERVAL_USEC;\n\n\t\t/*\n\t\t * If we did other tasks between reconnect attempts, then we won't\n\t\t * need to wait as long as a full delay.\n\t\t *\n\t\t * This is a loop to protect against interrupted sleeps.\n\t\t */\n\t\twhile (us_since_last_attempt < shard->delay_us)\n\t\t{\n\t\t\tpg_usleep(shard->delay_us - us_since_last_attempt);\n\n\t\t\t/* At least we should handle cancellations here */\n\t\t\tCHECK_FOR_INTERRUPTS();\n\n\t\t\tnow = GetCurrentTimestamp();\n\t\t\tus_since_last_attempt = (int64) (now - shard->last_reconnect_time);\n\t\t}\n\n\t\t/* update the delay metric */\n\t\tshard->delay_us = Min(shard->delay_us * 2, MAX_RECONNECT_INTERVAL_USEC);\n\t\tshard->last_reconnect_time = now;\n\n\t\t/*\n\t\t * Connect using the connection string we got from the\n\t\t * neon.pageserver_connstring GUC. If the NEON_AUTH_TOKEN environment\n\t\t * variable was set, use that as the password.\n\t\t *\n\t\t * The connection options are parsed in the order they're given, so when\n\t\t * we set the password before the connection string, the connection string\n\t\t * can override the password from the env variable. Seems useful, although\n\t\t * we don't currently use that capability anywhere.\n\t\t */\n\t\tn_pgsql_params = 0;\n\n\t\t/*\n\t\t * Pageserver logs include this in the connection's tracing span.\n\t\t * This allows for reasier log correlation between compute and pageserver.\n\t\t */\n\t\tkeywords[n_pgsql_params] = \"application_name\";\n\t\t{\n\t\t\tint ret = snprintf(pid_str, sizeof(pid_str), \"%d\", MyProcPid);\n\t\t\tif (ret < 0 || ret >= (int)(sizeof(pid_str)))\n\t\t\t\telog(FATAL, \"stack-allocated buffer too small to hold pid\");\n\t\t}\n\t\t/* lifetime: PQconnectStartParams strdups internally */\n\t\tvalues[n_pgsql_params] = (const char*) pid_str;\n\t\tn_pgsql_params++;\n\n\t\tkeywords[n_pgsql_params] = \"dbname\";\n\t\tvalues[n_pgsql_params] = connstr;\n\t\tn_pgsql_params++;\n\n\t\tif (neon_auth_token)\n\t\t{\n\t\t\tkeywords[n_pgsql_params] = \"password\";\n\t\t\tvalues[n_pgsql_params] = neon_auth_token;\n\t\t\tn_pgsql_params++;\n\t\t}\n\n\t\t{\n\t\t\tbool param_set = false;\n\t\t\tswitch (neon_compute_mode)\n\t\t\t{\n\t\t\t\tcase CP_MODE_PRIMARY:\n\t\t\t\t\tstrncpy(endpoint_str, \"-c neon.compute_mode=primary\", sizeof(endpoint_str));\n\t\t\t\t\tparam_set = true;\n\t\t\t\t\tbreak;\n\t\t\t\tcase CP_MODE_REPLICA:\n\t\t\t\t\tstrncpy(endpoint_str, \"-c neon.compute_mode=replica\", sizeof(endpoint_str));\n\t\t\t\t\tparam_set = true;\n\t\t\t\t\tbreak;\n\t\t\t\tcase CP_MODE_STATIC:\n\t\t\t\t\tstrncpy(endpoint_str, \"-c neon.compute_mode=static\", sizeof(endpoint_str));\n\t\t\t\t\tparam_set = true;\n\t\t\t\t\tbreak;\n\t\t\t}\n\t\t\tif (param_set)\n\t\t\t{\n\t\t\t\tkeywords[n_pgsql_params] = \"options\";\n\t\t\t\tvalues[n_pgsql_params] = endpoint_str;\n\t\t\t\tn_pgsql_params++;\n\t\t\t}\n\t\t}\n\n\t\tkeywords[n_pgsql_params] = NULL;\n\t\tvalues[n_pgsql_params] = NULL;\n\n\t\tshard->conn = PQconnectStartParams(keywords, values, 1);\n\t\tif (PQstatus(shard->conn) == CONNECTION_BAD)\n\t\t{\n\t\t\tchar\t   *msg = pchomp(PQerrorMessage(shard->conn));\n\t\t\tCLEANUP_AND_DISCONNECT(shard);\n\t\t\tereport(elevel,\n\t\t\t\t\t(errcode(ERRCODE_SQLCLIENT_UNABLE_TO_ESTABLISH_SQLCONNECTION),\n\t\t\t\t\t\terrmsg(NEON_TAG \"[shard %d] could not establish connection to pageserver\", shard_no),\n\t\t\t\t\t\terrdetail_internal(\"%s\", msg)));\n\t\t\tpfree(msg);\n\t\t\treturn false;\n\t\t}\n\t\tshard->state = PS_Connecting_Startup;\n\t}\n\t/* FALLTHROUGH */\n\tcase PS_Connecting_Startup:\n\t{\n\t\tchar\t   *pagestream_query;\n\t\tint\t\t\tps_send_query_ret;\n\t\tbool\t\tconnected = false;\n\t\tint poll_result = PGRES_POLLING_WRITING;\n\t\tneon_shard_log(shard_no, DEBUG5, \"Connection state: Connecting_Startup\");\n\n\t\tdo\n\t\t{\n\t\t\tswitch (poll_result)\n\t\t\t{\n\t\t\tdefault: /* unknown/unused states are handled as a failed connection */\n\t\t\tcase PGRES_POLLING_FAILED:\n\t\t\t\t{\n\t\t\t\t\tchar\t   *pqerr = PQerrorMessage(shard->conn);\n\t\t\t\t\tchar\t   *msg = NULL;\n\t\t\t\t\tneon_shard_log(shard_no, DEBUG5, \"POLLING_FAILED\");\n\n\t\t\t\t\tif (pqerr)\n\t\t\t\t\t\tmsg = pchomp(pqerr);\n\n\t\t\t\t\tCLEANUP_AND_DISCONNECT(shard);\n\n\t\t\t\t\tif (msg)\n\t\t\t\t\t{\n\t\t\t\t\t\tneon_shard_log(shard_no, elevel,\n\t\t\t\t\t\t\t\t\t   \"could not connect to pageserver: %s\",\n\t\t\t\t\t\t\t\t\t   msg);\n\t\t\t\t\t\tpfree(msg);\n\t\t\t\t\t}\n\t\t\t\t\telse\n\t\t\t\t\t\tneon_shard_log(shard_no, elevel,\n\t\t\t\t\t\t\t\t\t   \"could not connect to pageserver\");\n\n\t\t\t\t\treturn false;\n\t\t\t\t}\n\t\t\tcase PGRES_POLLING_READING:\n\t\t\t\t/* Sleep until there's something to do */\n\t\t\t\twhile (true)\n\t\t\t\t{\n\t\t\t\t\tint rc = WaitLatchOrSocket(MyLatch,\n\t\t\t\t\t\t\t\t\t\t\t   WL_EXIT_ON_PM_DEATH | WL_LATCH_SET | WL_SOCKET_READABLE,\n\t\t\t\t\t\t\t\t\t\t\t   PQsocket(shard->conn),\n\t\t\t\t\t\t\t\t\t\t\t   0,\n\t\t\t\t\t\t\t\t\t\t\t   WAIT_EVENT_NEON_PS_STARTING);\n\t\t\t\t\telog(DEBUG5, \"PGRES_POLLING_READING=>%d\", rc);\n\t\t\t\t\tif (rc & WL_LATCH_SET)\n\t\t\t\t\t{\n\t\t\t\t\t\tResetLatch(MyLatch);\n\t\t\t\t\t\t/* query cancellation, backend shutdown */\n\t\t\t\t\t\tCHECK_FOR_INTERRUPTS();\n\t\t\t\t\t}\n\t\t\t\t\tif (rc & WL_SOCKET_READABLE)\n\t\t\t\t\t\tbreak;\n\t\t\t\t}\n\t\t\t\t/* PQconnectPoll() handles the socket polling state updates */\n\n\t\t\t\tbreak;\n\t\t\tcase PGRES_POLLING_WRITING:\n\t\t\t\t/* Sleep until there's something to do */\n\t\t\t\twhile (true)\n\t\t\t\t{\n\t\t\t\t\tint rc = WaitLatchOrSocket(MyLatch,\n\t\t\t\t\t\t\t\t\t\t\t   WL_EXIT_ON_PM_DEATH | WL_LATCH_SET | WL_SOCKET_WRITEABLE,\n\t\t\t\t\t\t\t\t\t\t\t   PQsocket(shard->conn),\n\t\t\t\t\t\t\t\t\t\t\t   0,\n\t\t\t\t\t\t\t\t\t\t\t   WAIT_EVENT_NEON_PS_STARTING);\n\t\t\t\t\telog(DEBUG5, \"PGRES_POLLING_WRITING=>%d\", rc);\n\t\t\t\t\tif (rc & WL_LATCH_SET)\n\t\t\t\t\t{\n\t\t\t\t\t\tResetLatch(MyLatch);\n\t\t\t\t\t\t/* query cancellation, backend shutdown */\n\t\t\t\t\t\tCHECK_FOR_INTERRUPTS();\n\t\t\t\t\t}\n\t\t\t\t\tif (rc & WL_SOCKET_WRITEABLE)\n\t\t\t\t\t\tbreak;\n\t\t\t\t}\n\t\t\t\t/* PQconnectPoll() handles the socket polling state updates */\n\n\t\t\t\tbreak;\n\t\t\tcase PGRES_POLLING_OK:\n\t\t\t\tneon_shard_log(shard_no, DEBUG5, \"POLLING_OK\");\n\t\t\t\tconnected = true;\n\t\t\t\tbreak;\n\t\t\t}\n\t\t\tpoll_result = PQconnectPoll(shard->conn);\n\t\t\telog(DEBUG5, \"PQconnectPoll=>%d\", poll_result);\n\t\t}\n\t\twhile (!connected);\n\n\t\t/* No more polling needed; connection succeeded */\n\t\tshard->last_connect_time = GetCurrentTimestamp();\n\n#if PG_MAJORVERSION_NUM >= 17\n\t\tshard->wes_read = CreateWaitEventSet(NULL, 3);\n#else\n\t\tshard->wes_read = CreateWaitEventSet(TopMemoryContext, 3);\n#endif\n\t\tAddWaitEventToSet(shard->wes_read, WL_LATCH_SET, PGINVALID_SOCKET,\n\t\t\t\t\t\t  MyLatch, NULL);\n\t\tAddWaitEventToSet(shard->wes_read, WL_EXIT_ON_PM_DEATH, PGINVALID_SOCKET,\n\t\t\t\t\t\t  NULL, NULL);\n\t\tAddWaitEventToSet(shard->wes_read, WL_SOCKET_READABLE, PQsocket(shard->conn), NULL, NULL);\n\n\n\t\tswitch (neon_protocol_version)\n\t\t{\n\t\tcase 3:\n\t\t\tpagestream_query = psprintf(\"pagestream_v3 %s %s\", neon_tenant, neon_timeline);\n\t\t\tbreak;\n\t\tcase 2:\n\t\t\tpagestream_query = psprintf(\"pagestream_v2 %s %s\", neon_tenant, neon_timeline);\n\t\t\tbreak;\n\t\tdefault:\n\t\t\telog(ERROR, \"unexpected neon_protocol_version %d\", neon_protocol_version);\n\t\t}\n\n\t\tif (PQstatus(shard->conn) == CONNECTION_BAD)\n\t\t{\n\t\t\tchar\t   *msg = pchomp(PQerrorMessage(shard->conn));\n\n\t\t\tCLEANUP_AND_DISCONNECT(shard);\n\n\t\t\tereport(elevel,\n\t\t\t\t\t(errcode(ERRCODE_SQLCLIENT_UNABLE_TO_ESTABLISH_SQLCONNECTION),\n\t\t\t\t\t\terrmsg(NEON_TAG \"[shard %d] could not establish connection to pageserver\", shard_no),\n\t\t\t\t\t\terrdetail_internal(\"%s\", msg)));\n\t\t\tpfree(msg);\n\t\t\treturn false;\n\t\t}\n\n\t\tps_send_query_ret = PQsendQuery(shard->conn, pagestream_query);\n\t\tpfree(pagestream_query);\n\t\tif (ps_send_query_ret != 1)\n\t\t{\n\t\t\tCLEANUP_AND_DISCONNECT(shard);\n\n\t\t\tneon_shard_log(shard_no, elevel, \"could not send pagestream command to pageserver\");\n\t\t\treturn false;\n\t\t}\n\n\t\tshard->state = PS_Connecting_PageStream;\n\t}\n\t/* FALLTHROUGH */\n\tcase PS_Connecting_PageStream:\n\t{\n\t\tneon_shard_log(shard_no, DEBUG5, \"Connection state: Connecting_PageStream\");\n\n\t\tif (PQstatus(shard->conn) == CONNECTION_BAD)\n\t\t{\n\t\t\tchar\t   *msg = pchomp(PQerrorMessage(shard->conn));\n\t\t\tCLEANUP_AND_DISCONNECT(shard);\n\t\t\tereport(elevel,\n\t\t\t\t\t(errcode(ERRCODE_SQLCLIENT_UNABLE_TO_ESTABLISH_SQLCONNECTION),\n\t\t\t\t\t\terrmsg(NEON_TAG \"[shard %d] could not establish connection to pageserver\", shard_no),\n\t\t\t\t\t\terrdetail_internal(\"%s\", msg)));\n\t\t\tpfree(msg);\n\t\t\treturn false;\n\t\t}\n\n\t\twhile (PQisBusy(shard->conn))\n\t\t{\n\t\t\tWaitEvent\tevent;\n\n\t\t\t/* Sleep until there's something to do */\n\t\t\t(void) WaitEventSetWait(shard->wes_read, -1L, &event, 1,\n\t\t\t\t\t\t\t\t\tWAIT_EVENT_NEON_PS_CONFIGURING);\n\t\t\tResetLatch(MyLatch);\n\n\t\t\tCHECK_FOR_INTERRUPTS();\n\n\t\t\t/* Data available in socket? */\n\t\t\tif (event.events & WL_SOCKET_READABLE)\n\t\t\t{\n\t\t\t\tif (!PQconsumeInput(shard->conn))\n\t\t\t\t{\n\t\t\t\t\tchar\t   *msg = pchomp(PQerrorMessage(shard->conn));\n\n\t\t\t\t\tCLEANUP_AND_DISCONNECT(shard);\n\t\t\t\t\tneon_shard_log(shard_no, elevel, \"could not complete handshake with pageserver: %s\",\n\t\t\t\t\t\t\t\t   msg);\n\t\t\t\t\tpfree(msg);\n\t\t\t\t\treturn false;\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\n\t\tshard->state = PS_Connected;\n\t\tshard->nrequests_sent = 0;\n\t\tshard->nresponses_received = 0;\n\t\tINSTR_TIME_SET_ZERO(shard->receive_start_time);\n\t\tINSTR_TIME_SET_ZERO(shard->receive_last_log_time);\n\t\tshard->receive_logged = false;\n\t}\n\t/* FALLTHROUGH */\n\tcase PS_Connected:\n\t\t/*\n\t\t * We successfully connected. Future connections to this PageServer\n\t\t * will do fast retries again, with exponential backoff.\n\t\t */\n\t\tshard->delay_us = MIN_RECONNECT_INTERVAL_USEC;\n\n\t\tneon_shard_log(shard_no, DEBUG5, \"Connection state: Connected\");\n\t\tneon_shard_log(shard_no, LOG, \"libpagestore: connected to '%s' with protocol version %d\", connstr, neon_protocol_version);\n\t\treturn true;\n\tdefault:\n\t\tneon_shard_log(shard_no, ERROR, \"libpagestore: invalid connection state %d\", shard->state);\n\t}\n\n\tpg_unreachable();\n}\n\nstatic void\nget_socket_stats(int socketfd, int *sndbuf, int *recvbuf)\n{\n\t*sndbuf = -1;\n\t*recvbuf = -1;\n\n#ifdef __linux__\n\t/*\n\t * get kernel's send and recv queue size via ioctl\n\t * https://elixir.bootlin.com/linux/v6.1.128/source/include/uapi/linux/sockios.h#L25-L27\n\t */\n\tif (socketfd != -1)\n\t{\n\t\tint\t\t\tioctl_err;\n\n\t\tioctl_err = ioctl(socketfd, SIOCOUTQ, sndbuf);\n\t\tif (ioctl_err!= 0) {\n\t\t\t*sndbuf = -errno;\n\t\t}\n\t\tioctl_err = ioctl(socketfd, FIONREAD, recvbuf);\n\t\tif (ioctl_err != 0) {\n\t\t\t*recvbuf = -errno;\n\t\t}\n\t}\n#endif\n}\n\n/*\n * Tries to get the local port of a socket. Sets 'port' to -1 on error.\n */\nstatic void\nget_local_port(int socketfd, int *port)\n{\n\tstruct sockaddr_in addr;\n\tsocklen_t addr_len = sizeof(addr);\n\n\tmemset(&addr, 0, addr_len);\n\tif (getsockname(socketfd, (struct sockaddr*) &addr, &addr_len) == 0)\n\t{\n\t\t*port = ntohs(addr.sin_port);\n\t} else {\n\t\t*port = -1;\n\t}\n}\n\n/*\n * A wrapper around PQgetCopyData that checks for interrupts while sleeping.\n */\nstatic int\ncall_PQgetCopyData(shardno_t shard_no, char **buffer)\n{\n\tint\t\t\tret;\n\tPageServer *shard = &page_servers[shard_no];\n\tPGconn\t   *pageserver_conn = shard->conn;\n\tinstr_time\tnow,\n\t\t\t\tsince_start,\n\t\t\t\tsince_last_log;\n\nretry:\n\tret = PQgetCopyData(pageserver_conn, buffer, 1 /* async */ );\n\n\tif (ret == 0)\n\t{\n\t\tWaitEvent\toccurred_event;\n\t\tint\t\t\tnoccurred;\n\t\tdouble\t\tlog_timeout,\n\t\t\t\t\tdisconnect_timeout;\n\t\tlong\t\ttimeout;\n\n\t\t/*\n\t\t * Calculate time elapsed since the start, and since the last progress\n\t\t * log message. On first call, remember the start time.\n\t\t */\n\t\tINSTR_TIME_SET_CURRENT(now);\n\t\tif (INSTR_TIME_IS_ZERO(shard->receive_start_time))\n\t\t{\n\t\t\tshard->receive_start_time = now;\n\t\t\tINSTR_TIME_SET_ZERO(since_start);\n\t\t\tshard->receive_last_log_time = now;\n\t\t\tINSTR_TIME_SET_ZERO(since_last_log);\n\t\t\tshard->receive_logged = false;\n\t\t}\n\t\telse\n\t\t{\n\t\t\tsince_start = now;\n\t\t\tINSTR_TIME_SUBTRACT(since_start, shard->receive_start_time);\n\t\t\tsince_last_log = now;\n\t\t\tINSTR_TIME_SUBTRACT(since_last_log, shard->receive_last_log_time);\n\t\t}\n\n\t\t/* Sleep until the log or disconnect timeout is reached. */\n\t\tlog_timeout = Max(0, (double) pageserver_response_log_timeout - INSTR_TIME_GET_MILLISEC(since_last_log));\n\t\tdisconnect_timeout = Max(0, (double) pageserver_response_disconnect_timeout - INSTR_TIME_GET_MILLISEC(since_start));\n\t\ttimeout = (long) ceil(Min(log_timeout, disconnect_timeout));\n\n\t\tnoccurred = WaitEventSetWait(shard->wes_read, timeout, &occurred_event, 1,\n\t\t\t\t\t\t\t\t\t WAIT_EVENT_NEON_PS_READ);\n\t\tResetLatch(MyLatch);\n\n\t\tCHECK_FOR_INTERRUPTS();\n\n\t\t/* Data available in socket? */\n\t\tif (noccurred > 0 && (occurred_event.events & WL_SOCKET_READABLE) != 0)\n\t\t{\n\t\t\tif (!PQconsumeInput(pageserver_conn))\n\t\t\t{\n\t\t\t\tchar\t   *msg = pchomp(PQerrorMessage(pageserver_conn));\n\n\t\t\t\tneon_shard_log(shard_no, LOG, \"could not get response from pageserver: %s\", msg);\n\t\t\t\tpfree(msg);\n\t\t\t\treturn -1;\n\t\t\t}\n\t\t\tgoto retry;\n\t\t}\n\n\t\t/* Timeout was reached, or we were interrupted for some other reason */\n\t\tINSTR_TIME_SET_CURRENT(now);\n\t\tsince_last_log = now;\n\t\tINSTR_TIME_SUBTRACT(since_last_log, shard->receive_last_log_time);\n\t\tsince_start = now;\n\t\tINSTR_TIME_SUBTRACT(since_start, shard->receive_start_time);\n\n\t\t/*\n\t\t * As a debugging aid, if we don't get a response to a pageserver request\n\t\t * for a long time, print a log message.\n\t\t *\n\t\t * The default neon.pageserver_response_log_timeout value, 10 s, is\n\t\t * very generous. Normally we expect a response in a few\n\t\t * milliseconds. We have metrics to track latencies in normal ranges,\n\t\t * but in the cases that take exceptionally long, it's useful to log\n\t\t * the exact timestamps.\n\t\t */\n\t\tif (INSTR_TIME_GET_MILLISEC(since_last_log) >= pageserver_response_log_timeout)\n\t\t{\n\t\t\tint\t\t\tport;\n\t\t\tint\t\t\tsndbuf;\n\t\t\tint\t\t\trecvbuf;\n\t\t\tuint64*\t\tmax_wait;\n\n\t\t\tget_local_port(PQsocket(pageserver_conn), &port);\n\t\t\tget_socket_stats(PQsocket(pageserver_conn), &sndbuf, &recvbuf);\n\n\t\t\tneon_shard_log(shard_no, LOG,\n\t\t\t\t\t\t   \"no response received from pageserver for %0.3f s, still waiting (sent \" UINT64_FORMAT \" requests, received \" UINT64_FORMAT \" responses) (socket port=%d sndbuf=%d recvbuf=%d) (conn start=%d end=%d)\",\n\t\t\t\t\t\t   INSTR_TIME_GET_DOUBLE(since_start),\n\t\t\t\t\t\t   shard->nrequests_sent, shard->nresponses_received, port, sndbuf, recvbuf,\n\t\t\t\t           pageserver_conn->inStart, pageserver_conn->inEnd);\n\t\t\tshard->receive_last_log_time = now;\n\t\t\tMyNeonCounters->compute_getpage_stuck_requests_total += !shard->receive_logged;\n\t\t\tshard->receive_logged = true;\n\t\t\tmax_wait = &MyNeonCounters->compute_getpage_max_inflight_stuck_time_ms;\n\t\t\t*max_wait = Max(*max_wait, INSTR_TIME_GET_MILLISEC(since_start));\n\t\t}\n\n\t\t/*\n\t\t * If an even longer time has passed without receiving a response from\n\t\t * the pageserver, disconnect.  That triggers a reconnection attempt\n\t\t * in the caller.\n\t\t *\n\t\t * If this happens, the pageserver is likely dead and isn't coming\n\t\t * back, or there's some kind of a network glitch and the connection\n\t\t * is permanently gone. Without this, if the pageserver or the network\n\t\t * connection is dead, it could take a very long time (15 minutes or\n\t\t * more) until the TCP keepalive timeout notices that. Even if we\n\t\t * would in fact get a response if we just waited a little longer,\n\t\t * there's a good chance that we'll get the response sooner by\n\t\t * reconnecting.\n\t\t */\n\t\tif (INSTR_TIME_GET_MILLISEC(since_start) >= pageserver_response_disconnect_timeout)\n\t\t{\n\t\t\tint \t\tport;\n\t\t\tget_local_port(PQsocket(pageserver_conn), &port);\n\t\t\tneon_shard_log(shard_no, LOG, \"no response from pageserver for %0.3f s, disconnecting (socket port=%d)\",\n\t\t\t\t\t   INSTR_TIME_GET_DOUBLE(since_start), port);\n\t\t\tMyNeonCounters->compute_getpage_max_inflight_stuck_time_ms = 0;\n\t\t\tpageserver_disconnect(shard_no);\n\t\t\treturn -1;\n\t\t}\n\n\t\tgoto retry;\n\t}\n\n\t/*\n\t * If we logged earlier that the response is taking a long time, log\n\t * another message when the response is finally received.\n\t */\n\tif (shard->receive_logged)\n\t{\n\t\tINSTR_TIME_SET_CURRENT(now);\n\t\tsince_start = now;\n\t\tINSTR_TIME_SUBTRACT(since_start, shard->receive_start_time);\n\t\tneon_shard_log(shard_no, LOG,\n\t\t\t\t\t   \"received response from pageserver after %0.3f s\",\n\t\t\t\t\t   INSTR_TIME_GET_DOUBLE(since_start));\n\t}\n\tINSTR_TIME_SET_ZERO(shard->receive_start_time);\n\tINSTR_TIME_SET_ZERO(shard->receive_last_log_time);\n\tshard->receive_logged = false;\n\tMyNeonCounters->compute_getpage_max_inflight_stuck_time_ms = 0;\n\n\treturn ret;\n}\n\n/*\n * Reset prefetch and drop connection to the shard.\n * It also drops connection to all other shards involved in prefetch, through\n * prefetch_on_ps_disconnect().\n */\nstatic void\npageserver_disconnect(shardno_t shard_no)\n{\n\t/*\n\t * If the connection to any pageserver is lost, we throw away the\n\t * whole prefetch queue, even for other pageservers. It should not\n\t * cause big problems, because connection loss is supposed to be a\n\t * rare event.\n\t */\n\tprefetch_on_ps_disconnect();\n\n\tpageserver_disconnect_shard(shard_no);\n}\n\n/*\n * Disconnect from specified shard\n */\nstatic void\npageserver_disconnect_shard(shardno_t shard_no)\n{\n\tPageServer *shard = &page_servers[shard_no];\n\t/*\n\t * If anything goes wrong while we were sending a request, it's not clear\n\t * what state the connection is in. For example, if we sent the request\n\t * but didn't receive a response yet, we might receive the response some\n\t * time later after we have already sent a new unrelated request. Close\n\t * the connection to avoid getting confused.\n\t * Similarly, even when we're in PS_DISCONNECTED, we may have junk to\n\t * clean up: It is possible that we encountered an error allocating any\n\t * of the wait event sets or the psql connection, or failed when we tried\n\t * to attach wait events to the WaitEventSets.\n\t */\n\tCLEANUP_AND_DISCONNECT(shard);\n\n\tshard->state = PS_Disconnected;\n}\n\n// BEGIN HADRON\n/*\n * Nudge compute_ctl to refresh our configuration. Called when we suspect we may be\n * connecting to the wrong pageservers due to a stale configuration.\n *\n * This is a best-effort operation. If we couldn't send the local loopback HTTP request\n * to compute_ctl or if the request fails for any reason, we just log the error and move\n * on.\n */\n\nextern int hadron_extension_server_port;\n\n// The timestamp (usec) of the first error that occurred while trying to refresh the configuration.\n// Will be reset to 0 after a successful refresh.\nstatic uint64 first_recorded_refresh_error_usec = 0;\n\n// Request compute_ctl to refresh the configuration. This operation may fail, e.g., if the compute_ctl\n// is already in the configuration state. The function returns true if the caller needs to cancel the\n// current query to avoid dead/live lock.\nstatic bool\nhadron_request_configuration_refresh() {\n\tstatic CURL\t   *handle = NULL;\n\tCURLcode\tres;\n\tchar\t   *compute_ctl_url;\n\tbool cancel_query = false;\n\n\tif (!lakebase_mode)\n\t\treturn false;\n\n\tif (handle == NULL)\n\t{\n\t\thandle = alloc_curl_handle();\n\n\t\tcurl_easy_setopt(handle, CURLOPT_CUSTOMREQUEST, \"POST\");\n\t\tcurl_easy_setopt(handle, CURLOPT_TIMEOUT, 3L /* seconds */ );\n\t\tcurl_easy_setopt(handle, CURLOPT_POSTFIELDS, \"\");\n\t}\n\n\t// Set the URL\n\tcompute_ctl_url = psprintf(\"http://localhost:%d/refresh_configuration\", hadron_extension_server_port);\n\n\n\telog(LOG, \"Sending refresh configuration request to compute_ctl: %s\", compute_ctl_url);\n\n\tcurl_easy_setopt(handle, CURLOPT_URL, compute_ctl_url);\n\n\tres = curl_easy_perform(handle);\n\tif (res != CURLE_OK )\n\t{\n\t\telog(WARNING, \"refresh_configuration request failed: %s\\n\", curl_easy_strerror(res));\n\t}\n\telse\n\t{\n\t\tlong http_code = 0;\n\t\tcurl_easy_getinfo(handle, CURLINFO_RESPONSE_CODE, &http_code);\n\t\tif ( res != CURLE_OK )\n\t\t{\n\t\t\telog(WARNING, \"compute_ctl refresh_configuration request getinfo failed: %s\\n\", curl_easy_strerror(res));\n\t\t}\n\t\telse\n\t\t{\n\t\t\telog(LOG, \"compute_ctl refresh_configuration got HTTP response: %ld\\n\", http_code);\n\t\t\tif( http_code == 200 )\n\t\t\t{\n\t\t\t\tfirst_recorded_refresh_error_usec = 0;\n\t\t\t}\n\t\t\telse\n\t\t\t{\n\t\t\t\tif (first_recorded_refresh_error_usec == 0)\n\t\t\t\t{\n\t\t\t\t\tfirst_recorded_refresh_error_usec = GetCurrentTimestamp();\n\t\t\t\t}\n\t\t\t\telse if(GetCurrentTimestamp() - first_recorded_refresh_error_usec > kRefreshErrorTimeoutUSec)\n\t\t\t\t{\n\t\t\t\t\t{\n\t\t\t\t\t\tfirst_recorded_refresh_error_usec = 0;\n\t\t\t\t\t\tcancel_query = true;\n\t\t\t\t\t}\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\t}\n\n\t// In regular Postgres usage, it is not necessary to manually free memory allocated by palloc (psprintf) because\n\t// it will be cleaned up after the \"memory context\" is reset (e.g. after the query or the transaction is finished).\n\t// However, the number of times this function gets called during a single query/transaction can be unbounded due to\n\t// the various retry loops around calls to pageservers. Therefore, we need to manually free this memory here.\n\tif (compute_ctl_url != NULL)\n\t{\n\t\tpfree(compute_ctl_url);\n\t}\n\treturn cancel_query;\n}\n// END HADRON\n\nstatic bool\npageserver_send(shardno_t shard_no, NeonRequest *request)\n{\n\tStringInfoData req_buff;\n\tPageServer *shard = &page_servers[shard_no];\n\tPGconn\t   *pageserver_conn;\n\n\tMyNeonCounters->pageserver_requests_sent_total++;\n\n\t/* If the connection was lost for some reason, reconnect */\n\tif (shard->state == PS_Connected && PQstatus(shard->conn) == CONNECTION_BAD)\n\t{\n\t\tneon_shard_log(shard_no, LOG, \"pageserver_send disconnect bad connection\");\n\t\tpageserver_disconnect(shard_no);\n\t\tpageserver_conn = NULL;\n\t}\n\n\trequest->reqid = GENERATE_REQUEST_ID();\n\treq_buff = nm_pack_request(request);\n\n\t/*\n\t * If pageserver is stopped, the connections from compute node are broken.\n\t * The compute node doesn't notice that immediately, but it will cause the\n\t * next request to fail, usually on the next query. That causes\n\t * user-visible errors if pageserver is restarted, or the tenant is moved\n\t * from one pageserver to another. See\n\t * https://github.com/neondatabase/neon/issues/1138 So try to reestablish\n\t * connection in case of failure.\n\t */\n\tif (shard->state != PS_Connected)\n\t{\n\t\twhile (!pageserver_connect(shard_no, shard->n_reconnect_attempts < max_reconnect_attempts ? LOG : ERROR))\n\t\t{\n\t\t\tshard->n_reconnect_attempts += 1;\n\t\t\tif (shard->n_reconnect_attempts > conf_refresh_reconnect_attempt_threshold\n\t\t\t\t&& hadron_request_configuration_refresh() )\n\t\t\t{\n\t\t\t\tneon_shard_log(shard_no, ERROR, \"request failed too many times, cancelling query\");\n\t\t\t}\n\t\t}\n\t\tshard->n_reconnect_attempts = 0;\n\t} else {\n\t\tAssert(shard->conn != NULL);\n\t}\n\n\tpageserver_conn = shard->conn;\n\n\t/*\n\t * Send request.\n\t *\n\t * In principle, this could block if the output buffer is full, and we\n\t * should use async mode and check for interrupts while waiting. In\n\t * practice, our requests are small enough to always fit in the output and\n\t * TCP buffer.\n\t *\n\t * Note that this also will fail when the connection is in the\n\t * PGRES_POLLING_WRITING state. It's kinda dirty to disconnect at this\n\t * point, but on the grand scheme of things it's only a small issue.\n\t */\n\tshard->nrequests_sent++;\n\tif (PQputCopyData(pageserver_conn, req_buff.data, req_buff.len) <= 0)\n\t{\n\t\tchar\t   *msg = pchomp(PQerrorMessage(pageserver_conn));\n\n\t\tpageserver_disconnect(shard_no);\n\t\tneon_shard_log(shard_no, LOG, \"pageserver_send disconnected: failed to send page request (try to reconnect): %s\", msg);\n\t\tpfree(msg);\n\t\tpfree(req_buff.data);\n\t\treturn false;\n\t}\n\n\tpfree(req_buff.data);\n\n\tif (message_level_is_interesting(PageStoreTrace))\n\t{\n\t\tchar\t   *msg = nm_to_string((NeonMessage *) request);\n\n\t\tneon_shard_log(shard_no, PageStoreTrace, \"sent request: %s\", msg);\n\t\tpfree(msg);\n\t}\n\n\treturn true;\n}\n\nstatic NeonResponse *\npageserver_receive(shardno_t shard_no)\n{\n\tStringInfoData resp_buff;\n\tNeonResponse *resp;\n\tPageServer *shard = &page_servers[shard_no];\n\tPGconn\t   *pageserver_conn = shard->conn;\n\t/* read response */\n\tint\t\t\trc;\n\n\tif (shard->state != PS_Connected)\n\t{\n\t\tneon_shard_log(shard_no, LOG,\n\t\t\t\t\t   \"pageserver_receive: returning NULL for non-connected pageserver connection: 0x%02x\",\n\t\t\t\t\t   shard->state);\n\t\treturn NULL;\n\t}\n\n\tAssert(pageserver_conn);\n\n\trc = call_PQgetCopyData(shard_no, &resp_buff.data);\n\tif (rc >= 0)\n\t{\n\t\t/* call_PQgetCopyData handles rc == 0 */\n\t\tAssert(rc > 0);\n\n\t\tPG_TRY();\n\t\t{\n\t\t\tresp_buff.len = rc;\n\t\t\tresp_buff.cursor = 0;\n\t\t\tresp = nm_unpack_response(&resp_buff);\n\t\t\tPQfreemem(resp_buff.data);\n\t\t}\n\t\tPG_CATCH();\n\t\t{\n\t\t\tneon_shard_log(shard_no, LOG, \"pageserver_receive: disconnect due to failure while parsing response\");\n\t\t\tpageserver_disconnect(shard_no);\n\t\t\tPG_RE_THROW();\n\t\t}\n\t\tPG_END_TRY();\n\n\t\tif (message_level_is_interesting(PageStoreTrace))\n\t\t{\n\t\t\tchar\t   *msg = nm_to_string((NeonMessage *) resp);\n\n\t\t\tneon_shard_log(shard_no, PageStoreTrace, \"got response: %s\", msg);\n\t\t\tpfree(msg);\n\t\t}\n\t}\n\telse if (rc == -1 && shard->state == PS_Disconnected)\n\t{\n\t\t/* If the state is 'Disconnected', the disconnection message was already logged */\n\t\tresp = NULL;\n\t}\n\telse if (rc == -1)\n\t{\n\t\tchar\t   *msg = pchomp(PQerrorMessage(pageserver_conn));\n\n\t\tneon_shard_log(shard_no, LOG, \"pageserver_receive disconnect: psql end of copy data: %s\", msg);\n\t\tpfree(msg);\n\t\tpageserver_disconnect(shard_no);\n\t\tresp = NULL;\n\n\t\t/*\n\t\t * Always poke compute_ctl to request a configuration refresh if we have issues receiving data from pageservers after\n\t\t * successfully connecting to it. It could be an indication that we are connecting to the wrong pageservers (e.g. PS\n\t\t * is in secondary mode or otherwise refuses to respond our request).\n\t\t */\n\t\thadron_request_configuration_refresh();\n\t}\n\telse if (rc == -2)\n\t{\n\t\tchar\t   *msg = pchomp(PQerrorMessage(pageserver_conn));\n\n\t\tpageserver_disconnect(shard_no);\n\t\thadron_request_configuration_refresh();\n\t\tneon_shard_log(shard_no, ERROR, \"pageserver_receive disconnect: could not read COPY data: %s\", msg);\n\t}\n\telse\n\t{\n\t\tpageserver_disconnect(shard_no);\n\t\thadron_request_configuration_refresh();\n\t\tneon_shard_log(shard_no, ERROR, \"pageserver_receive disconnect: unexpected PQgetCopyData return value: %d\", rc);\n\t}\n\n\tshard->nresponses_received++;\n\treturn (NeonResponse *) resp;\n}\n\nstatic NeonResponse *\npageserver_try_receive(shardno_t shard_no)\n{\n\tStringInfoData resp_buff;\n\tNeonResponse *resp;\n\tPageServer *shard = &page_servers[shard_no];\n\tPGconn\t   *pageserver_conn = shard->conn;\n\tint\trc;\n\n\tif (shard->state != PS_Connected)\n\t\treturn NULL;\n\n\tAssert(pageserver_conn);\n\n\trc = PQgetCopyData(shard->conn, &resp_buff.data, 1 /* async */);\n\tif (rc == 0)\n\t{\n\t\tif (!PQconsumeInput(shard->conn))\n\t\t{\n\t\t\treturn NULL;\n\t\t}\n\t\trc = PQgetCopyData(shard->conn, &resp_buff.data, 1 /* async */);\n\t}\n\n\tif (rc == 0)\n\t\treturn NULL;\n\telse if (rc > 0)\n\t{\n\t\tPG_TRY();\n\t\t{\n\t\t\tresp_buff.len = rc;\n\t\t\tresp_buff.cursor = 0;\n\t\t\tresp = nm_unpack_response(&resp_buff);\n\t\t\tPQfreemem(resp_buff.data);\n\t\t}\n\t\tPG_CATCH();\n\t\t{\n\t\t\tneon_shard_log(shard_no, LOG, \"pageserver_receive: disconnect due to failure while parsing response\");\n\t\t\tpageserver_disconnect(shard_no);\n\t\t\t/*\n\t\t\t * Malformed responses from PageServer are a reason to raise\n\t\t\t * errors and cancel transactions.\n\t\t\t */\n\t\t\tPG_RE_THROW();\n\t\t}\n\t\tPG_END_TRY();\n\n\t\tif (message_level_is_interesting(PageStoreTrace))\n\t\t{\n\t\t\tchar\t   *msg = nm_to_string((NeonMessage *) resp);\n\n\t\t\tneon_shard_log(shard_no, PageStoreTrace, \"got response: %s\", msg);\n\t\t\tpfree(msg);\n\t\t}\n\t}\n\telse if (rc == -1)\n\t{\n\t\tneon_shard_log(shard_no, LOG, \"pageserver_receive disconnect: psql end of copy data: %s\", pchomp(PQerrorMessage(pageserver_conn)));\n\t\tpageserver_disconnect(shard_no);\n\t\tresp = NULL;\n\t\thadron_request_configuration_refresh();\n\t}\n\telse if (rc == -2)\n\t{\n\t\tchar\t   *msg = pchomp(PQerrorMessage(pageserver_conn));\n\n\t\tpageserver_disconnect(shard_no);\n\t\thadron_request_configuration_refresh();\n\t\tneon_shard_log(shard_no, LOG, \"pageserver_receive disconnect: could not read COPY data: %s\", msg);\n\t\tresp = NULL;\n\t}\n\telse\n\t{\n\t\tpageserver_disconnect(shard_no);\n\t\thadron_request_configuration_refresh();\n\t\tneon_shard_log(shard_no, ERROR, \"pageserver_receive disconnect: unexpected PQgetCopyData return value: %d\", rc);\n\t}\n\n\t/*\n\t * Always poke compute_ctl to request a configuration refresh if we have issues receiving data from pageservers after\n\t * successfully connecting to it. It could be an indication that we are connecting to the wrong pageservers (e.g. PS\n\t * is in secondary mode or otherwise refuses to respond our request).\n\t */\n\tif ( rc < 0 && hadron_request_configuration_refresh() )\n\t{\n\t\tneon_shard_log(shard_no, ERROR, \"refresh_configuration request failed, cancelling query\");\n\t}\n\n\tshard->nresponses_received++;\n\treturn (NeonResponse *) resp;\n}\n\n\nstatic bool\npageserver_flush(shardno_t shard_no)\n{\n\tPGconn\t   *pageserver_conn = page_servers[shard_no].conn;\n\n\tif (page_servers[shard_no].state != PS_Connected)\n\t{\n\t\tneon_shard_log(shard_no, WARNING, \"Tried to flush while disconnected\");\n\t}\n\telse\n\t{\n\t\tMyNeonCounters->pageserver_send_flushes_total++;\n\t\tif (PQflush(pageserver_conn))\n\t\t{\n\t\t\tchar\t   *msg = pchomp(PQerrorMessage(pageserver_conn));\n\n\t\t\tpageserver_disconnect(shard_no);\n\t\t\tneon_shard_log(shard_no, LOG, \"pageserver_flush disconnect because failed to flush page requests: %s\", msg);\n\t\t\tpfree(msg);\n\t\t\treturn false;\n\t\t}\n\t}\n\n\treturn true;\n}\n\npage_server_api api =\n{\n\t.send = pageserver_send,\n\t.flush = pageserver_flush,\n\t.receive = pageserver_receive,\n\t.try_receive = pageserver_try_receive,\n\t.disconnect = pageserver_disconnect_shard\n};\n\nstatic bool\ncheck_neon_id(char **newval, void **extra, GucSource source)\n{\n\tuint8\t\tid[16];\n\n\treturn **newval == '\\0' || HexDecodeString(id, *newval, 16);\n}\n\nvoid\nPagestoreShmemInit(void)\n{\n\tbool\t\tfound;\n\n\tpagestore_shared = ShmemInitStruct(\"libpagestore shared state\",\n\t\t\t\t\t\t\t\t\t   sizeof(PagestoreShmemState),\n\t\t\t\t\t\t\t\t\t   &found);\n\tif (!found)\n\t{\n\t\tpg_atomic_init_u64(&pagestore_shared->begin_update_counter, 0);\n\t\tpg_atomic_init_u64(&pagestore_shared->end_update_counter, 0);\n\t\tmemset(&pagestore_shared->shard_map, 0, sizeof(ShardMap));\n\t\tAssignPageserverConnstring(pageserver_connstring, NULL);\n\t}\n}\n\nvoid\nPagestoreShmemRequest(void)\n{\n\tRequestAddinShmemSpace(sizeof(PagestoreShmemState));\n}\n\n/*\n * Module initialization function\n */\nvoid\npg_init_libpagestore(void)\n{\n\tDefineCustomStringVariable(\"neon.pageserver_connstring\",\n\t\t\t\t\t\t\t   \"connection string to the page server\",\n\t\t\t\t\t\t\t   NULL,\n\t\t\t\t\t\t\t   &pageserver_connstring,\n\t\t\t\t\t\t\t   \"\",\n\t\t\t\t\t\t\t   PGC_SIGHUP,\n\t\t\t\t\t\t\t   0,\t/* no flags required */\n\t\t\t\t\t\t\t   CheckPageserverConnstring, AssignPageserverConnstring, NULL);\n\n\tDefineCustomStringVariable(\"neon.timeline_id\",\n\t\t\t\t\t\t\t   \"Neon timeline_id the server is running on\",\n\t\t\t\t\t\t\t   NULL,\n\t\t\t\t\t\t\t   &neon_timeline,\n\t\t\t\t\t\t\t   \"\",\n\t\t\t\t\t\t\t   PGC_POSTMASTER,\n\t\t\t\t\t\t\t   0,\t/* no flags required */\n\t\t\t\t\t\t\t   check_neon_id, NULL, NULL);\n\n\tDefineCustomStringVariable(\"neon.tenant_id\",\n\t\t\t\t\t\t\t   \"Neon tenant_id the server is running on\",\n\t\t\t\t\t\t\t   NULL,\n\t\t\t\t\t\t\t   &neon_tenant,\n\t\t\t\t\t\t\t   \"\",\n\t\t\t\t\t\t\t   PGC_POSTMASTER,\n\t\t\t\t\t\t\t   0,\t/* no flags required */\n\t\t\t\t\t\t\t   check_neon_id, NULL, NULL);\n\n\tDefineCustomStringVariable(\"neon.project_id\",\n\t\t\t\t\t\t\t   \"Neon project_id the server is running on\",\n\t\t\t\t\t\t\t   NULL,\n\t\t\t\t\t\t\t   &neon_project_id,\n\t\t\t\t\t\t\t   \"\",\n\t\t\t\t\t\t\t   PGC_POSTMASTER,\n\t\t\t\t\t\t\t   0,\t/* no flags required */\n\t\t\t\t\t\t\t   NULL, NULL, NULL);\n\tDefineCustomStringVariable(\"neon.branch_id\",\n\t\t\t\t\t\t\t   \"Neon branch_id the server is running on\",\n\t\t\t\t\t\t\t   NULL,\n\t\t\t\t\t\t\t   &neon_branch_id,\n\t\t\t\t\t\t\t   \"\",\n\t\t\t\t\t\t\t   PGC_POSTMASTER,\n\t\t\t\t\t\t\t   0,\t/* no flags required */\n\t\t\t\t\t\t\t   NULL, NULL, NULL);\n\tDefineCustomStringVariable(\"neon.endpoint_id\",\n\t\t\t\t\t\t\t   \"Neon endpoint_id the server is running on\",\n\t\t\t\t\t\t\t   NULL,\n\t\t\t\t\t\t\t   &neon_endpoint_id,\n\t\t\t\t\t\t\t   \"\",\n\t\t\t\t\t\t\t   PGC_POSTMASTER,\n\t\t\t\t\t\t\t   0,\t/* no flags required */\n\t\t\t\t\t\t\t   NULL, NULL, NULL);\n\n\tDefineCustomIntVariable(\"neon.stripe_size\",\n\t\t\t\t\t\t\t\"sharding stripe size\",\n\t\t\t\t\t\t\tNULL,\n\t\t\t\t\t\t\t&stripe_size,\n\t\t\t\t\t\t\t2048, 1, INT_MAX,\n\t\t\t\t\t\t\tPGC_SIGHUP,\n\t\t\t\t\t\t\tGUC_UNIT_BLOCKS,\n\t\t\t\t\t\t\tNULL, NULL, NULL);\n\n\tDefineCustomIntVariable(\"neon.max_cluster_size\",\n\t\t\t\t\t\t\t\"cluster size limit\",\n\t\t\t\t\t\t\tNULL,\n\t\t\t\t\t\t\t&max_cluster_size,\n\t\t\t\t\t\t\t-1, -1, INT_MAX,\n\t\t\t\t\t\t\tPGC_SIGHUP,\n\t\t\t\t\t\t\tGUC_UNIT_MB,\n\t\t\t\t\t\t\tNULL, NULL, NULL);\n\tDefineCustomIntVariable(\"neon.flush_output_after\",\n\t\t\t\t\t\t\t\"Flush the output buffer after every N unflushed requests\",\n\t\t\t\t\t\t\tNULL,\n\t\t\t\t\t\t\t&flush_every_n_requests,\n\t\t\t\t\t\t\t8, -1, INT_MAX,\n\t\t\t\t\t\t\tPGC_USERSET,\n\t\t\t\t\t\t\t0,\t/* no flags required */\n\t\t\t\t\t\t\tNULL, NULL, NULL);\n\tDefineCustomIntVariable(\"neon.max_reconnect_attempts\",\n\t\t\t\t\t\t\t\"Maximal attempts to reconnect to pages server (with 1 second timeout)\",\n\t\t\t\t\t\t\tNULL,\n\t\t\t\t\t\t\t&max_reconnect_attempts,\n\t\t\t\t\t\t\t60, 0, INT_MAX,\n\t\t\t\t\t\t\tPGC_USERSET,\n\t\t\t\t\t\t\t0,\n\t\t\t\t\t\t\tNULL, NULL, NULL);\n\tDefineCustomIntVariable(\"neon.readahead_buffer_size\",\n\t\t\t\t\t\t\t\"number of prefetches to buffer\",\n\t\t\t\t\t\t\t\"This buffer is used to hold and manage prefetched \"\n\t\t\t\t\t\t\t\"data; so it is important that this buffer is at \"\n\t\t\t\t\t\t\t\"least as large as the configured value of all \"\n\t\t\t\t\t\t\t\"tablespaces' effective_io_concurrency and \"\n\t\t\t\t\t\t\t\"maintenance_io_concurrency, and your sessions' \"\n\t\t\t\t\t\t\t\"values for these settings.\",\n\t\t\t\t\t\t\t&readahead_buffer_size,\n\t\t\t\t\t\t\t128, 16, 1024,\n\t\t\t\t\t\t\tPGC_USERSET,\n\t\t\t\t\t\t\t0,\t/* no flags required */\n\t\t\t\t\t\t\tNULL, (GucIntAssignHook) &readahead_buffer_resize, NULL);\n\tDefineCustomIntVariable(\"neon.readahead_getpage_pull_timeout\",\n\t\t\t\t\t\t\t\"readahead response pull timeout\",\n\t\t\t\t\t\t\t\"Time between active tries to pull data from the \"\n\t\t\t\t\t\t\t\"PageStream connection when we have pages which \"\n\t\t\t\t\t\t\t\"were read ahead but not yet received.\",\n\t\t\t\t\t\t\t&readahead_getpage_pull_timeout_ms,\n\t\t\t\t\t\t\t50, 0, 5 * 60 * 1000,\n\t\t\t\t\t\t\tPGC_USERSET,\n\t\t\t\t\t\t\tGUC_UNIT_MS,\n\t\t\t\t\t\t\tNULL, NULL, NULL);\n\tDefineCustomIntVariable(\"neon.protocol_version\",\n\t\t\t\t\t\t\t\"Version of compute<->page server protocol\",\n\t\t\t\t\t\t\tNULL,\n\t\t\t\t\t\t\t&neon_protocol_version,\n\t\t\t\t\t\t\t3,\t/* use protocol version 3 */\n\t\t\t\t\t\t\t2,\t/* min */\n\t\t\t\t\t\t\t3,\t/* max */\n\t\t\t\t\t\t\tPGC_SU_BACKEND,\n\t\t\t\t\t\t\t0,\t/* no flags required */\n\t\t\t\t\t\t\tNULL, NULL, NULL);\n\tDefineCustomIntVariable(\"hadron.conf_refresh_reconnect_attempt_threshold\",\n\t\t\t\t\t\t\t\"Threshold of the number of consecutive failed pageserver \"\n\t\t\t\t\t\t\t\"connection attempts (per shard) before signaling \"\n\t\t\t\t\t\t\t\"compute_ctl for a configuration refresh.\",\n\t\t\t\t\t\t\tNULL,\n\t\t\t\t\t\t\t&conf_refresh_reconnect_attempt_threshold,\n\t\t\t\t\t\t\t16, 0, INT_MAX,\n\t\t\t\t\t\t\tPGC_USERSET,\n\t\t\t\t\t\t\t0,\n\t\t\t\t\t\t\tNULL, NULL, NULL);\n\n\tDefineCustomIntVariable(\"neon.pageserver_response_log_timeout\",\n\t\t\t\t\t\t\t\"pageserver response log timeout\",\n\t\t\t\t\t\t\t\"If the pageserver doesn't respond to a request within this timeout, \"\n\t\t\t\t\t\t\t\"a message is printed to the log.\",\n\t\t\t\t\t\t\t&pageserver_response_log_timeout,\n\t\t\t\t\t\t\t10000, 100, INT_MAX,\n\t\t\t\t\t\t\tPGC_SUSET,\n\t\t\t\t\t\t\tGUC_UNIT_MS,\n\t\t\t\t\t\t\tNULL, NULL, NULL);\n\n\tDefineCustomIntVariable(\"neon.pageserver_response_disconnect_timeout\",\n\t\t\t\t\t\t\t\"pageserver response diconnect timeout\",\n\t\t\t\t\t\t\t\"If the pageserver doesn't respond to a request within this timeout, \"\n\t\t\t\t\t\t\t\"disconnect and reconnect.\",\n\t\t\t\t\t\t\t&pageserver_response_disconnect_timeout,\n\t\t\t\t\t\t\t150000, 100, INT_MAX,\n\t\t\t\t\t\t\tPGC_SUSET,\n\t\t\t\t\t\t\tGUC_UNIT_MS,\n\t\t\t\t\t\t\tNULL, NULL, NULL);\n\n\tDefineCustomEnumVariable(\n\t\t\t\t\t\t\t\"neon.compute_mode\",\n\t\t\t\t\t\t\t\"The compute endpoint node type\",\n\t\t\t\t\t\t\tNULL,\n\t\t\t\t\t\t\t&neon_compute_mode,\n\t\t\t\t\t\t\tCP_MODE_PRIMARY,\n\t\t\t\t\t\t\tneon_compute_modes,\n\t\t\t\t\t\t\tPGC_POSTMASTER,\n\t\t\t\t\t\t\t0,\n\t\t\t\t\t\t\tNULL, NULL, NULL);\n\n\tif (page_server != NULL)\n\t\tneon_log(ERROR, \"libpagestore already loaded\");\n\n\tneon_log(PageStoreTrace, \"libpagestore already loaded\");\n\tpage_server = &api;\n\n\t/*\n\t * Retrieve the auth token to use when connecting to pageserver and\n\t * safekeepers\n\t */\n\tneon_auth_token = getenv(\"NEON_AUTH_TOKEN\");\n\tif (neon_auth_token)\n\t\tneon_log(LOG, \"using storage auth token from NEON_AUTH_TOKEN environment variable\");\n\n\tif (pageserver_connstring[0])\n\t{\n\t\tneon_log(PageStoreTrace, \"set neon_smgr hook\");\n\t\tsmgr_hook = smgr_neon;\n\t\tsmgr_init_hook = smgr_init_neon;\n\t\tdbsize_hook = neon_dbsize;\n\t}\n\n\tmemset(page_servers, 0, sizeof(page_servers));\n}\n"
  },
  {
    "path": "pgxn/neon/libpqwalproposer.h",
    "content": "/*\n * Interface to set of libpq wrappers walproposer and neon_walreader need.\n * Similar to libpqwalreceiver, but it has blocking connection establishment and\n * pqexec which don't fit us. Implementation is at walproposer_pg.c.\n */\n#ifndef ___LIBPQWALPROPOSER_H__\n#define ___LIBPQWALPROPOSER_H__\n\n/* Re-exported and modified ExecStatusType */\ntypedef enum\n{\n\t/* We received a single CopyBoth result */\n\tWP_EXEC_SUCCESS_COPYBOTH,\n\n\t/*\n\t * Any success result other than a single CopyBoth was received. The\n\t * specifics of the result were already logged, but it may be useful to\n\t * provide an error message indicating which safekeeper messed up.\n\t *\n\t * Do not expect PQerrorMessage to be appropriately set.\n\t */\n\tWP_EXEC_UNEXPECTED_SUCCESS,\n\n\t/*\n\t * No result available at this time. Wait until read-ready, then call\n\t * again. Internally, this is returned when PQisBusy indicates that\n\t * PQgetResult would block.\n\t */\n\tWP_EXEC_NEEDS_INPUT,\n\t/* Catch-all failure. Check PQerrorMessage. */\n\tWP_EXEC_FAILED,\n} WalProposerExecStatusType;\n\n/* Possible return values from walprop_async_read */\ntypedef enum\n{\n\t/* The full read was successful. buf now points to the data */\n\tPG_ASYNC_READ_SUCCESS,\n\n\t/*\n\t * The read is ongoing. Wait until the connection is read-ready, then try\n\t * again.\n\t */\n\tPG_ASYNC_READ_TRY_AGAIN,\n\t/* Reading failed. Check PQerrorMessage(conn) */\n\tPG_ASYNC_READ_FAIL,\n} PGAsyncReadResult;\n\n/* Possible return values from walprop_async_write */\ntypedef enum\n{\n\t/* The write fully completed */\n\tPG_ASYNC_WRITE_SUCCESS,\n\n\t/*\n\t * The write started, but you'll need to call PQflush some more times to\n\t * finish it off. We just tried, so it's best to wait until the connection\n\t * is read- or write-ready to try again.\n\t *\n\t * If it becomes read-ready, call PQconsumeInput and flush again. If it\n\t * becomes write-ready, just call PQflush.\n\t */\n\tPG_ASYNC_WRITE_TRY_FLUSH,\n\t/* Writing failed. Check PQerrorMessage(conn) */\n\tPG_ASYNC_WRITE_FAIL,\n} PGAsyncWriteResult;\n\n/*\n * This header is included by walproposer.h to define walproposer_api; if we're\n * building walproposer without pg, ignore libpq part, leaving only interface\n * types.\n */\n#ifndef WALPROPOSER_LIB\n\n#include \"libpq-fe.h\"\n\n/*\n * Sometimes working directly with underlying PGconn is simpler, export the\n * whole thing for simplicity.\n */\ntypedef struct WalProposerConn\n{\n\tPGconn\t   *pg_conn;\n\tbool\t\tis_nonblocking; /* whether the connection is non-blocking */\n\tchar\t   *recvbuf;\t\t/* last received CopyData message from\n\t\t\t\t\t\t\t\t * walprop_async_read */\n} WalProposerConn;\n\nextern WalProposerConn *libpqwp_connect_start(char *conninfo);\nextern bool libpqwp_send_query(WalProposerConn *conn, char *query);\nextern WalProposerExecStatusType libpqwp_get_query_result(WalProposerConn *conn);\nextern PGAsyncReadResult libpqwp_async_read(WalProposerConn *conn, char **buf, int *amount);\nextern void libpqwp_disconnect(WalProposerConn *conn);\n\n#endif\t\t\t\t\t\t\t/* WALPROPOSER_LIB */\n#endif\t\t\t\t\t\t\t/* ___LIBPQWALPROPOSER_H__ */\n"
  },
  {
    "path": "pgxn/neon/logical_replication_monitor.c",
    "content": "#include \"postgres.h\"\n\n#include <dirent.h>\n#include <limits.h>\n#include <string.h>\n#include <signal.h>\n#include <sys/stat.h>\n\n#include \"miscadmin.h\"\n#include \"postmaster/bgworker.h\"\n#include \"postmaster/interrupt.h\"\n#include \"replication/slot.h\"\n#include \"storage/fd.h\"\n#include \"storage/procsignal.h\"\n#include \"tcop/tcopprot.h\"\n#include \"utils/guc.h\"\n#include \"utils/wait_event.h\"\n\n#include \"logical_replication_monitor.h\"\n\n#define LS_MONITOR_CHECK_INTERVAL 10000 /* ms */\n\nstatic int\tlogical_replication_max_snap_files = 10000;\n\n/*\n * According to Chi (shyzh), the pageserver _should_ be good with 10 MB worth of\n * snapshot files. Let's use 8 MB since 8 is a power of 2.\n */\nstatic int\tlogical_replication_max_logicalsnapdir_size = 8000;\n\n/*\n * A primitive description of a logical snapshot file including the LSN of the\n * file and its size.\n */\ntypedef struct SnapDesc {\n\tXLogRecPtr\tlsn;\n\toff_t\t\tsz;\n} SnapDesc;\n\nPGDLLEXPORT void LogicalSlotsMonitorMain(Datum main_arg);\n\n/*\n * Sorts an array of snapshot descriptors by their LSN.\n */\nstatic int\nSnapDescComparator(const void *a, const void *b)\n{\n\tconst SnapDesc\t*desc1 = a;\n\tconst SnapDesc\t*desc2 = b;\n\n\tif (desc1->lsn < desc2->lsn)\n\t\treturn 1;\n\telse if (desc1->lsn == desc2->lsn)\n\t\treturn 0;\n\telse\n\t\treturn -1;\n}\n\n/*\n * Look at .snap files and calculate minimum allowed restart_lsn of slot so that\n * next gc would leave not more than logical_replication_max_snap_files; all\n * slots having lower restart_lsn should be dropped.\n */\nstatic XLogRecPtr\nget_snapshots_cutoff_lsn(void)\n{\n/* PG 18 has a constant defined for this, PG_LOGICAL_SNAPSHOTS_DIR */\n#define SNAPDIR \"pg_logical/snapshots\"\n\n\tDIR\t\t   *dirdesc;\n\tint\t\t\tdirdesc_fd;\n\tstruct dirent *de;\n\tsize_t\t\tsnapshot_index = 0;\n\tSnapDesc   *snapshot_descriptors;\n\tsize_t\t\tdescriptors_allocated = 1024;\n\tXLogRecPtr\tcutoff = 0;\n\toff_t\t\tlogicalsnapdir_size = 0;\n\tconst int\tlogical_replication_max_logicalsnapdir_size_bytes = logical_replication_max_logicalsnapdir_size * 1000;\n\n\tif (logical_replication_max_snap_files < 0 && logical_replication_max_logicalsnapdir_size < 0)\n\t\treturn 0;\n\n\tsnapshot_descriptors = palloc(sizeof(*snapshot_descriptors) * descriptors_allocated);\n\n\tdirdesc = AllocateDir(SNAPDIR);\n\tdirdesc_fd = dirfd(dirdesc);\n\tif (dirdesc_fd == -1)\n\t\tereport(ERROR, errmsg(\"failed to get a file descriptor for \" SNAPDIR \": %m\"));\n\n\t/* find all .snap files and get their lsns */\n\twhile ((de = ReadDir(dirdesc, SNAPDIR)) != NULL)\n\t{\n\t\tuint32\t\thi;\n\t\tuint32\t\tlo;\n\t\tstruct stat\tst;\n\t\tXLogRecPtr\tlsn;\n\t\tSnapDesc   *desc;\n\n\t\tif (strcmp(de->d_name, \".\") == 0 ||\n\t\t\tstrcmp(de->d_name, \"..\") == 0)\n\t\t\tcontinue;\n\n\t\tif (sscanf(de->d_name, \"%X-%X.snap\", &hi, &lo) != 2)\n\t\t{\n\t\t\tereport(LOG,\n\t\t\t\t\t(errmsg(\"could not parse file name as .snap file \\\"%s\\\"\", de->d_name)));\n\t\t\tcontinue;\n\t\t}\n\n\t\tlsn = ((uint64) hi) << 32 | lo;\n\t\telog(DEBUG5, \"found snap file %X/%X\", LSN_FORMAT_ARGS(lsn));\n\n\t\tif (fstatat(dirdesc_fd, de->d_name, &st, 0) == -1)\n\t\t\tereport(ERROR, errmsg(\"failed to get the size of \" SNAPDIR \"/%s: %m\", de->d_name));\n\n\t\tif (descriptors_allocated == snapshot_index)\n\t\t{\n\t\t\tdescriptors_allocated *= 2;\n\t\t\tsnapshot_descriptors = repalloc(snapshot_descriptors, sizeof(*snapshot_descriptors) * descriptors_allocated);\n\t\t}\n\n\t\tdesc = &snapshot_descriptors[snapshot_index++];\n\t\tdesc->lsn = lsn;\n\t\tdesc->sz = st.st_size;\n\t}\n\n\tqsort(snapshot_descriptors, snapshot_index, sizeof(*snapshot_descriptors), SnapDescComparator);\n\n\t/* Are there more snapshot files than specified? */\n\tif (logical_replication_max_snap_files <= snapshot_index)\n\t{\n\t\tcutoff = snapshot_descriptors[logical_replication_max_snap_files - 1].lsn;\n\t\telog(LOG,\n\t\t\t\"ls_monitor: number of snapshot files, %zu, is larger than limit of %d\",\n\t\t\tsnapshot_index, logical_replication_max_snap_files);\n\t}\n\n\t/* Is the size of the logical snapshots directory larger than specified?\n\t *\n\t * It's possible we could hit both thresholds, so remove any extra files\n\t * first, and then truncate based on size of the remaining files.\n\t */\n\tif (logicalsnapdir_size > logical_replication_max_logicalsnapdir_size_bytes)\n\t{\n\t\t/* Unfortunately, iterating the directory does not guarantee any order\n\t\t * so we can't cache an index in the preceding loop.\n\t\t */\n\n\t\toff_t\t\tsz;\n\t\tconst XLogRecPtr original = cutoff;\n\n\t\tsz = snapshot_descriptors[0].sz;\n\t\tfor (size_t i = 1; i < logical_replication_max_snap_files; ++i)\n\t\t{\n\t\t\tif (sz > logical_replication_max_logicalsnapdir_size_bytes)\n\t\t\t{\n\t\t\t\tcutoff = snapshot_descriptors[i - 1].lsn;\n\t\t\t\tbreak;\n\t\t\t}\n\n\t\t\tsz += snapshot_descriptors[i].sz;\n\t\t}\n\n\t\tif (cutoff != original)\n\t\t\telog(LOG, \"ls_monitor: \" SNAPDIR \" is larger than %d KB\",\n\t\t\t\t logical_replication_max_logicalsnapdir_size);\n\t}\n\n\tpfree(snapshot_descriptors);\n\tFreeDir(dirdesc);\n\n\treturn cutoff;\n\n#undef SNAPDIR\n}\n\nvoid\nInitLogicalReplicationMonitor(void)\n{\n\tBackgroundWorker bgw;\n\n\tDefineCustomIntVariable(\n\t\t\t\t\t\t\t\"neon.logical_replication_max_snap_files\",\n\t\t\t\t\t\t\t\"Maximum allowed logical replication .snap files. When exceeded, slots are dropped until the limit is met. -1 disables the limit.\",\n\t\t\t\t\t\t\tNULL,\n\t\t\t\t\t\t\t&logical_replication_max_snap_files,\n\t\t\t\t\t\t\t10000, -1, INT_MAX,\n\t\t\t\t\t\t\tPGC_SIGHUP,\n\t\t\t\t\t\t\t0,\n\t\t\t\t\t\t\tNULL, NULL, NULL);\n\n\tDefineCustomIntVariable(\n\t\t\t\t\t\t\t\"neon.logical_replication_max_logicalsnapdir_size\",\n\t\t\t\t\t\t\t\"Maximum allowed size of the pg_logical/snapshots directory (KB). When exceeded, slots are dropped until the limit is met. -1 disables the limit.\",\n\t\t\t\t\t\t\tNULL,\n\t\t\t\t\t\t\t&logical_replication_max_logicalsnapdir_size,\n\t\t\t\t\t\t\t8000, -1, INT_MAX,\n\t\t\t\t\t\t\tPGC_SIGHUP,\n\t\t\t\t\t\t\tGUC_UNIT_KB,\n\t\t\t\t\t\t\tNULL, NULL, NULL);\n\n\tmemset(&bgw, 0, sizeof(bgw));\n\tbgw.bgw_flags = BGWORKER_SHMEM_ACCESS;\n\tbgw.bgw_start_time = BgWorkerStart_RecoveryFinished;\n\tsnprintf(bgw.bgw_library_name, BGW_MAXLEN, \"neon\");\n\tsnprintf(bgw.bgw_function_name, BGW_MAXLEN, \"LogicalSlotsMonitorMain\");\n\tsnprintf(bgw.bgw_name, BGW_MAXLEN, \"Logical replication monitor\");\n\tsnprintf(bgw.bgw_type, BGW_MAXLEN, \"Logical replication monitor\");\n\tbgw.bgw_restart_time = 5;\n\tbgw.bgw_notify_pid = 0;\n\tbgw.bgw_main_arg = (Datum) 0;\n\n\tRegisterBackgroundWorker(&bgw);\n}\n\n/*\n * Unused logical replication slots pins WAL and prevent deletion of snapshots.\n * WAL bloat is guarded by max_slot_wal_keep_size; this bgw removes slots which\n * need too many .snap files. These files are stored as AUX files, which are a\n * pageserver mechanism for storing non-relation data. AUX files are shipped in\n * in the basebackup which is requested by compute_ctl before Postgres starts.\n * The larger the time to retrieve the basebackup, the more likely it is the\n * compute will be killed by the control plane due to a timeout.\n */\nvoid\nLogicalSlotsMonitorMain(Datum main_arg)\n{\n\t/* Establish signal handlers. */\n\tpqsignal(SIGUSR1, procsignal_sigusr1_handler);\n\tpqsignal(SIGHUP, SignalHandlerForConfigReload);\n\tpqsignal(SIGTERM, die);\n\n\tBackgroundWorkerUnblockSignals();\n\n\tfor (;;)\n\t{\n\t\tXLogRecPtr\tcutoff_lsn;\n\n\t\t/* In case of a SIGHUP, just reload the configuration. */\n\t\tif (ConfigReloadPending)\n\t\t{\n\t\t\tConfigReloadPending = false;\n\t\t\tProcessConfigFile(PGC_SIGHUP);\n\t\t}\n\n\t\t/* Get the cutoff LSN */\n\t\tcutoff_lsn = get_snapshots_cutoff_lsn();\n\t\tif (cutoff_lsn > 0)\n\t\t{\n\t\t\tfor (int i = 0; i < max_replication_slots; i++)\n\t\t\t{\n\t\t\t\tchar\t\tslot_name[NAMEDATALEN];\n\t\t\t\tReplicationSlot *s = &ReplicationSlotCtl->replication_slots[i];\n\t\t\t\tXLogRecPtr\trestart_lsn;\n\n\t\t\t\tLWLockAcquire(ReplicationSlotControlLock, LW_SHARED);\n\n\t\t\t\t/* Consider only active logical repliction slots */\n\t\t\t\tif (!s->in_use || !SlotIsLogical(s))\n\t\t\t\t{\n\t\t\t\t\tLWLockRelease(ReplicationSlotControlLock);\n\t\t\t\t\tcontinue;\n\t\t\t\t}\n\n\t\t\t\t/*\n\t\t\t\t * Retrieve the restart LSN to determine if we need to drop the\n\t\t\t\t * slot\n\t\t\t\t */\n\t\t\t\tSpinLockAcquire(&s->mutex);\n\t\t\t\trestart_lsn = s->data.restart_lsn;\n\t\t\t\tSpinLockRelease(&s->mutex);\n\n\t\t\t\tstrlcpy(slot_name, s->data.name.data, sizeof(slot_name));\n\t\t\t\tLWLockRelease(ReplicationSlotControlLock);\n\n\t\t\t\tif (restart_lsn >= cutoff_lsn)\n\t\t\t\t{\n\t\t\t\t\telog(LOG, \"ls_monitor: not dropping replication slot %s because restart LSN %X/%X is greater than cutoff LSN %X/%X\",\n\t\t\t\t\t\t slot_name, LSN_FORMAT_ARGS(restart_lsn), LSN_FORMAT_ARGS(cutoff_lsn));\n\t\t\t\t\tcontinue;\n\t\t\t\t}\n\n\t\t\t\telog(LOG, \"ls_monitor: dropping replication slot %s because restart LSN %X/%X lower than cutoff LSN %X/%X\",\n\t\t\t\t\t slot_name, LSN_FORMAT_ARGS(restart_lsn), LSN_FORMAT_ARGS(cutoff_lsn));\n\n\t\t\t\t/* now try to drop it, killing owner before, if any */\n\t\t\t\tfor (;;)\n\t\t\t\t{\n\t\t\t\t\tpid_t\t\tactive_pid;\n\n\t\t\t\t\tSpinLockAcquire(&s->mutex);\n\t\t\t\t\tactive_pid = s->active_pid;\n\t\t\t\t\tSpinLockRelease(&s->mutex);\n\n\t\t\t\t\tif (active_pid == 0)\n\t\t\t\t\t{\n\t\t\t\t\t\t/*\n\t\t\t\t\t\t * Slot is released, try to drop it. Though of course,\n\t\t\t\t\t\t * it could have been reacquired, so drop can ERROR\n\t\t\t\t\t\t * out. Similarly, it could have been dropped in the\n\t\t\t\t\t\t * meanwhile.\n\t\t\t\t\t\t *\n\t\t\t\t\t\t * In principle we could remove pg_try/pg_catch, that\n\t\t\t\t\t\t * would restart the whole bgworker.\n\t\t\t\t\t\t */\n\t\t\t\t\t\tConditionVariableCancelSleep();\n\t\t\t\t\t\tPG_TRY();\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\tReplicationSlotDrop(slot_name, true);\n\t\t\t\t\t\t\telog(LOG, \"ls_monitor: replication slot %s dropped\", slot_name);\n\t\t\t\t\t\t}\n\t\t\t\t\t\tPG_CATCH();\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\t/* log ERROR and reset elog stack */\n\t\t\t\t\t\t\tEmitErrorReport();\n\t\t\t\t\t\t\tFlushErrorState();\n\t\t\t\t\t\t\telog(LOG, \"ls_monitor: failed to drop replication slot %s\", slot_name);\n\t\t\t\t\t\t}\n\t\t\t\t\t\tPG_END_TRY();\n\t\t\t\t\t\tbreak;\n\t\t\t\t\t}\n\t\t\t\t\telse\n\t\t\t\t\t{\n\t\t\t\t\t\t/* kill the owner and wait for release */\n\t\t\t\t\t\telog(LOG, \"ls_monitor: killing replication slot %s owner %d\", slot_name, active_pid);\n\t\t\t\t\t\t(void) kill(active_pid, SIGTERM);\n\t\t\t\t\t\t/* We shouldn't get stuck, but to be safe add timeout. */\n\t\t\t\t\t\tConditionVariableTimedSleep(&s->active_cv, 1000, WAIT_EVENT_REPLICATION_SLOT_DROP);\n\t\t\t\t\t}\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\n\t\t(void) WaitLatch(MyLatch,\n\t\t\t\t\t\t WL_LATCH_SET | WL_EXIT_ON_PM_DEATH | WL_TIMEOUT,\n\t\t\t\t\t\t LS_MONITOR_CHECK_INTERVAL,\n\t\t\t\t\t\t PG_WAIT_EXTENSION);\n\t\tResetLatch(MyLatch);\n\t\tCHECK_FOR_INTERRUPTS();\n\t}\n}\n"
  },
  {
    "path": "pgxn/neon/logical_replication_monitor.h",
    "content": "#ifndef __NEON_LOGICAL_REPLICATION_MONITOR_H__\n#define __NEON_LOGICAL_REPLICATION_MONITOR_H__\n\nvoid InitLogicalReplicationMonitor(void);\n\n#endif\n"
  },
  {
    "path": "pgxn/neon/neon--1.0--1.1.sql",
    "content": "\\echo Use \"ALTER EXTENSION neon UPDATE TO '1.1'\" to load this file. \\quit\n\nCREATE FUNCTION neon_get_lfc_stats()\nRETURNS SETOF RECORD\nAS 'MODULE_PATHNAME', 'neon_get_lfc_stats'\nLANGUAGE C PARALLEL SAFE;\n\n-- Create a view for convenient access.\nCREATE VIEW neon_lfc_stats AS\n\tSELECT P.* FROM neon_get_lfc_stats() AS P (lfc_key text, lfc_value bigint);\n"
  },
  {
    "path": "pgxn/neon/neon--1.0.sql",
    "content": "\\echo Use \"CREATE EXTENSION neon\" to load this file. \\quit\n\nCREATE FUNCTION pg_cluster_size()\nRETURNS bigint\nAS 'MODULE_PATHNAME', 'pg_cluster_size'\nLANGUAGE C STRICT\nPARALLEL UNSAFE;\n\nCREATE FUNCTION backpressure_lsns(\n    OUT received_lsn pg_lsn,\n    OUT disk_consistent_lsn pg_lsn,\n    OUT remote_consistent_lsn pg_lsn\n)\nRETURNS record\nAS 'MODULE_PATHNAME', 'backpressure_lsns'\nLANGUAGE C STRICT\nPARALLEL UNSAFE;\n\nCREATE FUNCTION backpressure_throttling_time()\nRETURNS bigint\nAS 'MODULE_PATHNAME', 'backpressure_throttling_time'\nLANGUAGE C STRICT\nPARALLEL UNSAFE;\n\nCREATE FUNCTION local_cache_pages()\nRETURNS SETOF RECORD\nAS 'MODULE_PATHNAME', 'local_cache_pages'\nLANGUAGE C PARALLEL SAFE;\n\n-- Create a view for convenient access.\nCREATE VIEW local_cache AS\n\tSELECT P.* FROM local_cache_pages() AS P\n\t(pageoffs int8, relfilenode oid, reltablespace oid, reldatabase oid,\n\t relforknumber int2, relblocknumber int8, accesscount int4);\n"
  },
  {
    "path": "pgxn/neon/neon--1.1--1.0.sql",
    "content": "-- the order of operations is important here\n-- because the view depends on the function\n\nDROP VIEW IF EXISTS neon_lfc_stats CASCADE;\n\nDROP FUNCTION IF EXISTS neon_get_lfc_stats CASCADE;\n"
  },
  {
    "path": "pgxn/neon/neon--1.1--1.2.sql",
    "content": "\\echo Use \"ALTER EXTENSION neon UPDATE TO '1.2'\" to load this file. \\quit\n\n-- Create a convenient view similar to pg_stat_database\n-- that exposes all lfc stat values in one row.\nCREATE OR REPLACE VIEW NEON_STAT_FILE_CACHE AS \n   WITH lfc_stats AS (\n   SELECT \n     stat_name, \n     count\n   FROM neon_get_lfc_stats() AS t(stat_name text, count bigint)\n   ),\n   lfc_values AS (\n   SELECT \n     MAX(CASE WHEN stat_name = 'file_cache_misses' THEN count ELSE NULL END) AS file_cache_misses,\n     MAX(CASE WHEN stat_name = 'file_cache_hits'   THEN count ELSE NULL END) AS file_cache_hits,\n     MAX(CASE WHEN stat_name = 'file_cache_used'   THEN count ELSE NULL END) AS file_cache_used,\n     MAX(CASE WHEN stat_name = 'file_cache_writes' THEN count ELSE NULL END) AS file_cache_writes,\n     -- Calculate the file_cache_hit_ratio within the same CTE for simplicity\n     CASE \n        WHEN MAX(CASE WHEN stat_name = 'file_cache_misses' THEN count ELSE 0 END) + MAX(CASE WHEN stat_name = 'file_cache_hits' THEN count ELSE 0 END) = 0 THEN NULL\n        ELSE ROUND((MAX(CASE WHEN stat_name = 'file_cache_hits' THEN count ELSE 0 END)::DECIMAL / \n        (MAX(CASE WHEN stat_name = 'file_cache_hits' THEN count ELSE 0 END) + MAX(CASE WHEN stat_name = 'file_cache_misses' THEN count ELSE 0 END))) * 100, 2)\n     END AS file_cache_hit_ratio\n   FROM lfc_stats\n   )\nSELECT file_cache_misses, file_cache_hits, file_cache_used, file_cache_writes, file_cache_hit_ratio from lfc_values;\n\n-- externalize the view to all users in role pg_monitor\nGRANT SELECT ON NEON_STAT_FILE_CACHE TO PG_MONITOR;"
  },
  {
    "path": "pgxn/neon/neon--1.2--1.1.sql",
    "content": "DROP VIEW IF EXISTS NEON_STAT_FILE_CACHE CASCADE;\n"
  },
  {
    "path": "pgxn/neon/neon--1.2--1.3.sql",
    "content": "\\echo Use \"ALTER EXTENSION neon UPDATE TO '1.3'\" to load this file. \\quit\n\nCREATE FUNCTION approximate_working_set_size(reset bool)\nRETURNS integer\nAS 'MODULE_PATHNAME', 'approximate_working_set_size'\nLANGUAGE C PARALLEL SAFE;\n\nGRANT EXECUTE ON FUNCTION approximate_working_set_size(bool) TO pg_monitor;\n\n"
  },
  {
    "path": "pgxn/neon/neon--1.3--1.2.sql",
    "content": "DROP FUNCTION IF EXISTS approximate_working_set_size(bool) CASCADE;\n"
  },
  {
    "path": "pgxn/neon/neon--1.3--1.4.sql",
    "content": "\\echo Use \"ALTER EXTENSION neon UPDATE TO '1.4'\" to load this file. \\quit\n\nCREATE FUNCTION approximate_working_set_size_seconds(duration integer default null)\nRETURNS integer\nAS 'MODULE_PATHNAME', 'approximate_working_set_size_seconds'\nLANGUAGE C PARALLEL SAFE;\n\nGRANT EXECUTE ON FUNCTION approximate_working_set_size_seconds(integer) TO pg_monitor;\n\n"
  },
  {
    "path": "pgxn/neon/neon--1.4--1.3.sql",
    "content": "DROP FUNCTION IF EXISTS approximate_working_set_size_seconds(integer) CASCADE;\n"
  },
  {
    "path": "pgxn/neon/neon--1.4--1.5.sql",
    "content": "\\echo Use \"ALTER EXTENSION neon UPDATE TO '1.5'\" to load this file. \\quit\n\n\nCREATE FUNCTION get_backend_perf_counters()\nRETURNS SETOF RECORD\nAS 'MODULE_PATHNAME', 'neon_get_backend_perf_counters'\nLANGUAGE C PARALLEL SAFE;\n\nCREATE FUNCTION get_perf_counters()\nRETURNS SETOF RECORD\nAS 'MODULE_PATHNAME', 'neon_get_perf_counters'\nLANGUAGE C PARALLEL SAFE;\n\n-- Show various metrics, for each backend. Note that the values are not reset\n-- when a backend exits. When a new backend starts with the backend ID, it will\n-- continue accumulating the values from where the old backend left. If you are\n-- only interested in the changes from your own session, store the values at the\n-- beginning of the session somewhere, and subtract them on subsequent calls.\n--\n-- For histograms, 'bucket_le' is the upper bound of the histogram bucket.\nCREATE VIEW neon_backend_perf_counters AS\n  SELECT P.procno, P.pid, P.metric, P.bucket_le, P.value\n  FROM get_backend_perf_counters() AS P (\n    procno integer,\n    pid integer,\n    metric text,\n    bucket_le float8,\n    value float8\n  );\n\n-- Summary across all backends. (This could also be implemented with\n-- an aggregate query over neon_backend_perf_counters view.)\nCREATE VIEW neon_perf_counters AS\n  SELECT P.metric, P.bucket_le, P.value\n  FROM get_perf_counters() AS P (\n    metric text,\n    bucket_le float8,\n    value float8\n  );\n"
  },
  {
    "path": "pgxn/neon/neon--1.5--1.4.sql",
    "content": "DROP VIEW IF EXISTS neon_perf_counters;\nDROP VIEW IF EXISTS neon_backend_perf_counters;\nDROP FUNCTION IF EXISTS get_perf_counters();\nDROP FUNCTION IF EXISTS get_backend_perf_counters();\n"
  },
  {
    "path": "pgxn/neon/neon--1.5--1.6.sql",
    "content": "\\echo Use \"ALTER EXTENSION neon UPDATE TO '1.6'\" to load this file. \\quit\n\nCREATE FUNCTION get_prewarm_info(out total_pages integer, out prewarmed_pages integer, out skipped_pages integer, out active_workers integer)\nRETURNS record\nAS 'MODULE_PATHNAME', 'get_prewarm_info'\nLANGUAGE C STRICT\nPARALLEL SAFE;\n\nCREATE FUNCTION get_local_cache_state(max_chunks integer default null)\nRETURNS bytea\nAS 'MODULE_PATHNAME', 'get_local_cache_state'\nLANGUAGE C\nPARALLEL UNSAFE;\n\nCREATE FUNCTION prewarm_local_cache(state bytea, n_workers integer default 1)\nRETURNS void\nAS 'MODULE_PATHNAME', 'prewarm_local_cache'\nLANGUAGE C STRICT\nPARALLEL UNSAFE;\n\n\n\n"
  },
  {
    "path": "pgxn/neon/neon--1.6--1.5.sql",
    "content": "DROP FUNCTION IF EXISTS get_prewarm_info(out total_pages integer, out prewarmed_pages integer, out skipped_pages integer, out active_workers integer);\n\nDROP FUNCTION IF EXISTS get_local_cache_state(max_chunks integer);\n\nDROP FUNCTION IF EXISTS prewarm_local_cache(state bytea, n_workers integer);\n\n\n"
  },
  {
    "path": "pgxn/neon/neon.c",
    "content": "/*-------------------------------------------------------------------------\n *\n * neon.c\n *\t  Main entry point into the neon extension\n *\n *-------------------------------------------------------------------------\n */\n#include \"postgres.h\"\n#include \"fmgr.h\"\n\n#include \"miscadmin.h\"\n#include \"pgstat.h\"\n#include \"access/subtrans.h\"\n#include \"access/twophase.h\"\n#include \"access/xlog.h\"\n#if PG_MAJORVERSION_NUM >= 15\n#include \"access/xlogrecovery.h\"\n#endif\n#include \"executor/instrument.h\"\n#include \"replication/logical.h\"\n#include \"replication/logicallauncher.h\"\n#include \"replication/slot.h\"\n#include \"replication/walsender.h\"\n#include \"storage/proc.h\"\n#include \"storage/ipc.h\"\n#include \"funcapi.h\"\n#include \"access/htup_details.h\"\n#include \"utils/builtins.h\"\n#include \"utils/pg_lsn.h\"\n#include \"utils/guc.h\"\n#include \"utils/guc_tables.h\"\n\n#include \"communicator.h\"\n#include \"communicator_process.h\"\n#include \"extension_server.h\"\n#include \"file_cache.h\"\n#include \"neon.h\"\n#include \"neon_ddl_handler.h\"\n#include \"neon_lwlsncache.h\"\n#include \"neon_perf_counters.h\"\n#include \"logical_replication_monitor.h\"\n#include \"unstable_extensions.h\"\n#include \"walsender_hooks.h\"\n#if PG_MAJORVERSION_NUM >= 16\n#include \"storage/ipc.h\"\n#endif\n\nPG_MODULE_MAGIC;\nvoid\t\t_PG_init(void);\n\nbool lakebase_mode = false;\n\nstatic int  running_xacts_overflow_policy;\nstatic emit_log_hook_type prev_emit_log_hook;\nstatic bool monitor_query_exec_time = false;\n\nstatic ExecutorStart_hook_type prev_ExecutorStart = NULL;\nstatic ExecutorEnd_hook_type prev_ExecutorEnd = NULL;\n\nstatic void neon_ExecutorStart(QueryDesc *queryDesc, int eflags);\nstatic void neon_ExecutorEnd(QueryDesc *queryDesc);\n\nstatic shmem_startup_hook_type prev_shmem_startup_hook;\nstatic void neon_shmem_startup_hook(void);\nstatic void neon_shmem_request_hook(void);\n\n#if PG_MAJORVERSION_NUM >= 15\nstatic shmem_request_hook_type prev_shmem_request_hook = NULL;\n#endif\n\n\n#if PG_MAJORVERSION_NUM >= 17\nuint32\t\tWAIT_EVENT_NEON_LFC_MAINTENANCE;\nuint32\t\tWAIT_EVENT_NEON_LFC_READ;\nuint32\t\tWAIT_EVENT_NEON_LFC_TRUNCATE;\nuint32\t\tWAIT_EVENT_NEON_LFC_WRITE;\nuint32\t\tWAIT_EVENT_NEON_LFC_CV_WAIT;\nuint32\t\tWAIT_EVENT_NEON_PS_STARTING;\nuint32\t\tWAIT_EVENT_NEON_PS_CONFIGURING;\nuint32\t\tWAIT_EVENT_NEON_PS_SEND;\nuint32\t\tWAIT_EVENT_NEON_PS_READ;\nuint32\t\tWAIT_EVENT_NEON_WAL_DL;\n#endif\n\nint databricks_test_hook = 0;\n\nenum RunningXactsOverflowPolicies {\n\tOP_IGNORE,\n\tOP_SKIP,\n\tOP_WAIT\n};\n\nstatic const struct config_enum_entry running_xacts_overflow_policies[] = {\n\t{\"ignore\", OP_IGNORE, false},\n\t{\"skip\", OP_SKIP, false},\n\t{\"wait\", OP_WAIT, false},\n\t{NULL, 0, false}\n};\n\nstatic const struct config_enum_entry debug_compare_local_modes[] = {\n\t{\"none\", DEBUG_COMPARE_LOCAL_NONE, false},\n\t{\"prefetch\", DEBUG_COMPARE_LOCAL_PREFETCH, false},\n\t{\"lfc\", DEBUG_COMPARE_LOCAL_LFC, false},\n\t{\"all\", DEBUG_COMPARE_LOCAL_ALL, false},\n\t{NULL, 0, false}\n};\n\n/*\n * XXX: These private to procarray.c, but we need them here.\n */\n#define PROCARRAY_MAXPROCS\t(MaxBackends + max_prepared_xacts)\n#define TOTAL_MAX_CACHED_SUBXIDS \\\n\t((PGPROC_MAX_CACHED_SUBXIDS + 1) * PROCARRAY_MAXPROCS)\n\n/*\n * Restore running-xact information by scanning the CLOG at startup.\n *\n * In PostgreSQL, a standby always has to wait for a running-xacts WAL record\n * to arrive before it can start accepting queries. Furthermore, if there are\n * transactions with too many subxids (> 64) open to fit in the in-memory\n * subxids cache, the running-xacts record will be marked as \"suboverflowed\",\n * and the standby will need to also wait for the currently in-progress\n * transactions to finish.\n *\n * That's not great in PostgreSQL, because a hot standby does not necessary\n * open up for queries immediately as you might expect. But it's worse in\n * Neon: A standby in Neon doesn't need to start WAL replay from a checkpoint\n * record; it can start at any LSN. Postgres arranges things so that there is\n * a running-xacts record soon after every checkpoint record, but when you\n * start from an arbitrary LSN, that doesn't help. If the primary is idle, or\n * not running at all, it might never write a new running-xacts record,\n * leaving the replica in a limbo where it can never start accepting queries.\n *\n * To mitigate that, we have an additional mechanism to find the running-xacts\n * information: we scan the CLOG, making note of any XIDs not marked as\n * committed or aborted. They are added to the Postgres known-assigned XIDs\n * array by calling ProcArrayApplyRecoveryInfo() in the caller of this\n * function.\n *\n * There is one big limitation with that mechanism: The size of the\n * known-assigned XIDs is limited, so if there are a lot of in-progress XIDs,\n * we have to give up. Furthermore, we don't know how many of the in-progress\n * XIDs are subtransactions, and if we use up all the space in the\n * known-assigned XIDs array for subtransactions, we might run out of space in\n * the array later during WAL replay, causing the replica to shut down with\n * \"ERROR: too many KnownAssignedXids\". The safe # of XIDs that we can add to\n * the known-assigned array without risking that error later is very low,\n * merely PGPROC_MAX_CACHED_SUBXIDS == 64, so we take our chances and use up\n * to half of the known-assigned XIDs array for the subtransactions, even\n * though that risks getting the error later.\n *\n * Note: It's OK if the recovered list of XIDs includes some transactions that\n * have crashed in the primary, and hence will never commit. They will be seen\n * as in-progress, until we see a new next running-acts record with an\n * oldestActiveXid that invalidates them. That's how the known-assigned XIDs\n * array always works.\n *\n * If scraping the CLOG doesn't succeed for some reason, like the subxid\n * overflow, Postgres will fall back to waiting for a running-xacts record\n * like usual.\n *\n * Returns true if a complete list of in-progress XIDs was scraped.\n */\nstatic bool\nRestoreRunningXactsFromClog(CheckPoint *checkpoint, TransactionId **xids, int *nxids)\n{\n\tTransactionId from;\n\tTransactionId till;\n\tint\t\t\tmax_xcnt;\n\tTransactionId *prepared_xids = NULL;\n\tint\t\t\tn_prepared_xids;\n\tTransactionId *restored_xids = NULL;\n\tint\t\t\tn_restored_xids;\n\tint\t\t\tnext_prepared_idx;\n\n\tAssert(*xids == NULL);\n\n\t/*\n\t * If the checkpoint doesn't have a valid oldestActiveXid, bail out. We\n\t * don't know where to start the scan.\n\t *\n\t * This shouldn't happen, because the pageserver always maintains a valid\n\t * oldestActiveXid nowadays. Except when starting at an old point in time\n\t * that was ingested before the pageserver was taught to do that.\n\t */\n\tif (!TransactionIdIsValid(checkpoint->oldestActiveXid))\n\t{\n\t\telog(LOG, \"cannot restore running-xacts from CLOG because oldestActiveXid is not set\");\n\t\tgoto fail;\n\t}\n\n\t/*\n\t * We will scan the CLOG starting from the oldest active XID.\n\t *\n\t * In some corner cases, the oldestActiveXid from the last checkpoint\n\t * might already have been truncated from the CLOG. That is,\n\t * oldestActiveXid might be older than oldestXid. That's possible because\n\t * oldestActiveXid is only updated at checkpoints. After the last\n\t * checkpoint, the oldest transaction might have committed, and the CLOG\n\t * might also have been already truncated. So if oldestActiveXid is older\n\t * than oldestXid, start at oldestXid instead. (Otherwise we'd try to\n\t * access CLOG segments that have already been truncated away.)\n\t */\n\tfrom = TransactionIdPrecedes(checkpoint->oldestXid, checkpoint->oldestActiveXid)\n\t\t? checkpoint->oldestActiveXid : checkpoint->oldestXid;\n\ttill = XidFromFullTransactionId(checkpoint->nextXid);\n\n\t/*\n\t * To avoid \"too many KnownAssignedXids\" error later during replay, we\n\t * limit number of collected transactions. This is a tradeoff: if we are\n\t * willing to consume more of the KnownAssignedXids space for the XIDs\n\t * now, that allows us to start up, but we might run out of space later.\n\t *\n\t * The size of the KnownAssignedXids array is TOTAL_MAX_CACHED_SUBXIDS,\n\t * which is (PGPROC_MAX_CACHED_SUBXIDS + 1) * PROCARRAY_MAXPROCS). In\n\t * PostgreSQL, that's always enough because the primary will always write\n\t * an XLOG_XACT_ASSIGNMENT record if a transaction has more than\n\t * PGPROC_MAX_CACHED_SUBXIDS subtransactions. Seeing that record allows\n\t * the standby to mark the XIDs in pg_subtrans and removing them from the\n\t * KnowingAssignedXids array.\n\t *\n\t * Here, we don't know which XIDs belong to subtransactions that have\n\t * already been WAL-logged with an XLOG_XACT_ASSIGNMENT record. If we\n\t * wanted to be totally safe and avoid the possibility of getting a \"too\n\t * many KnownAssignedXids\" error later, we would have to limit ourselves\n\t * to PGPROC_MAX_CACHED_SUBXIDS, which is not much. And that includes top\n\t * transaction IDs too, because we cannot distinguish between top\n\t * transaction IDs and subtransactions here.\n\t *\n\t * Somewhat arbitrarily, we use up to half of KnownAssignedXids. That\n\t * strikes a sensible balance between being useful, and risking a \"too\n\t * many KnownAssignedXids\" error later.\n\t */\n\tmax_xcnt = TOTAL_MAX_CACHED_SUBXIDS / 2;\n\n\t/*\n\t * Collect XIDs of prepared transactions in an array. This includes only\n\t * their top-level XIDs. We assume that StandbyRecoverPreparedTransactions\n\t * has already been called, so we can find all the sub-transactions in\n\t * pg_subtrans.\n\t */\n\tPrescanPreparedTransactions(&prepared_xids, &n_prepared_xids);\n\tqsort(prepared_xids, n_prepared_xids, sizeof(TransactionId), xidLogicalComparator);\n\n\t/*\n\t * Scan the CLOG, collecting in-progress XIDs into 'restored_xids'.\n\t */\n\telog(DEBUG1, \"scanning CLOG between %u and %u for in-progress XIDs\", from, till);\n\trestored_xids = (TransactionId *) palloc(max_xcnt * sizeof(TransactionId));\n\tn_restored_xids = 0;\n\tnext_prepared_idx = 0;\n\n\tfor (TransactionId xid = from; xid != till;)\n\t{\n\t\tXLogRecPtr\txidlsn;\n\t\tXidStatus\txidstatus;\n\n\t\txidstatus = TransactionIdGetStatus(xid, &xidlsn);\n\n\t\t/*\n\t\t * \"Merge\" the prepared transactions into the restored_xids array as\n\t\t * we go.  The prepared transactions array is sorted. This is mostly\n\t\t * a sanity check to ensure that all the prepared transactions are\n\t\t * seen as in-progress. (There is a check after the loop that we didn't\n\t\t * miss any.)\n\t\t */\n\t\tif (next_prepared_idx < n_prepared_xids && xid == prepared_xids[next_prepared_idx])\n\t\t{\n\t\t\t/*\n\t\t\t * This is a top-level transaction ID of a prepared transaction.\n\t\t\t * Include it in the array.\n\t\t\t */\n\n\t\t\t/* sanity check */\n\t\t\tif (xidstatus != TRANSACTION_STATUS_IN_PROGRESS)\n\t\t\t{\n\t\t\t\telog(LOG, \"prepared transaction %u has unexpected status %X, cannot restore running-xacts from CLOG\",\n\t\t\t\t\t xid, xidstatus);\n\t\t\t\tAssert(false);\n\t\t\t\tgoto fail;\n\t\t\t}\n\n\t\t\telog(DEBUG1, \"XID %u: was next prepared xact (%d / %d)\", xid, next_prepared_idx, n_prepared_xids);\n\t\t\tnext_prepared_idx++;\n\t\t}\n\t\telse if (xidstatus == TRANSACTION_STATUS_COMMITTED)\n\t\t{\n\t\t\telog(DEBUG1, \"XID %u: was committed\", xid);\n\t\t\tgoto skip;\n\t\t}\n\t\telse if (xidstatus == TRANSACTION_STATUS_ABORTED)\n\t\t{\n\t\t\telog(DEBUG1, \"XID %u: was aborted\", xid);\n\t\t\tgoto skip;\n\t\t}\n\t\telse if (xidstatus == TRANSACTION_STATUS_IN_PROGRESS)\n\t\t{\n\t\t\t/*\n\t\t\t * In-progress transactions are included in the array.\n\t\t\t *\n\t\t\t * Except subtransactions of the prepared transactions. They are\n\t\t\t * already set in pg_subtrans, and hence don't need to be tracked\n\t\t\t * in the known-assigned XIDs array.\n\t\t\t */\n\t\t\tif (n_prepared_xids > 0)\n\t\t\t{\n\t\t\t\tTransactionId parent = SubTransGetParent(xid);\n\n\t\t\t\tif (TransactionIdIsValid(parent))\n\t\t\t\t{\n\t\t\t\t\t/*\n\t\t\t\t\t * This is a subtransaction belonging to a prepared\n\t\t\t\t\t * transaction.\n\t\t\t\t\t *\n\t\t\t\t\t * Sanity check that it is in the prepared XIDs array. It\n\t\t\t\t\t * should be, because StandbyRecoverPreparedTransactions\n\t\t\t\t\t * populated pg_subtrans, and no other XID should be set\n\t\t\t\t\t * in it yet. (This also relies on the fact that\n\t\t\t\t\t * StandbyRecoverPreparedTransactions sets the parent of\n\t\t\t\t\t * each subxid to point directly to the top-level XID,\n\t\t\t\t\t * rather than restoring the original subtransaction\n\t\t\t\t\t * hierarchy.)\n\t\t\t\t\t */\n\t\t\t\t\tif (bsearch(&parent, prepared_xids, next_prepared_idx,\n\t\t\t\t\t\t\t\tsizeof(TransactionId), xidLogicalComparator) == NULL)\n\t\t\t\t\t{\n\t\t\t\t\t\telog(LOG, \"sub-XID %u has unexpected parent %u, cannot restore running-xacts from CLOG\",\n\t\t\t\t\t\t\t xid, parent);\n\t\t\t\t\t\tAssert(false);\n\t\t\t\t\t\tgoto fail;\n\t\t\t\t\t}\n\t\t\t\t\telog(DEBUG1, \"XID %u: was a subtransaction of prepared xid %u\", xid, parent);\n\t\t\t\t\tgoto skip;\n\t\t\t\t}\n\t\t\t}\n\n\t\t\t/* include it in the array */\n\t\t\telog(DEBUG1, \"XID %u: is in progress\", xid);\n\t\t}\n\t\telse\n\t\t{\n\t\t\t/*\n\t\t\t * SUB_COMMITTED is a transient state used at commit. We don't\n\t\t\t * expect to see that here.\n\t\t\t */\n\t\t\telog(LOG, \"XID %u has unexpected status %X in pg_xact, cannot restore running-xacts from CLOG\",\n\t\t\t\t xid, xidstatus);\n\t\t\tAssert(false);\n\t\t\tgoto fail;\n\t\t}\n\n\t\tif (n_restored_xids >= max_xcnt)\n\t\t{\n\t\t\t/*\n\t\t\t * Overflowed. We won't be able to install the RunningTransactions\n\t\t\t * snapshot.\n\t\t\t */\n\t\t\telog(LOG, \"too many running xacts to restore from the CLOG; oldestXid=%u oldestActiveXid=%u nextXid %u\",\n\t\t\t\t checkpoint->oldestXid, checkpoint->oldestActiveXid,\n\t\t\t\t XidFromFullTransactionId(checkpoint->nextXid));\n\n\t\t\tswitch (running_xacts_overflow_policy)\n\t\t\t{\n\t\t\t\tcase OP_WAIT:\n\t\t\t\t\tgoto fail;\n\t\t\t\tcase OP_IGNORE:\n\t\t\t\t\tgoto success;\n\t\t\t\tcase OP_SKIP:\n\t\t\t\t\tn_restored_xids = 0;\n\t\t\t\t\tgoto success;\n\t\t\t}\n\t\t}\n\n\t\trestored_xids[n_restored_xids++] = xid;\n\n\tskip:\n\t\tTransactionIdAdvance(xid);\n\t}\n\n\t/* sanity check */\n\tif (next_prepared_idx != n_prepared_xids)\n\t{\n\t\telog(LOG, \"prepared transaction ID %u was not visited in the CLOG scan, cannot restore running-xacts from CLOG\",\n\t\t\t prepared_xids[next_prepared_idx]);\n\t\tAssert(false);\n\t\tgoto fail;\n\t}\n   success:\n\telog(LOG, \"restored %d running xacts by scanning the CLOG; oldestXid=%u oldestActiveXid=%u nextXid %u\",\n\t\t n_restored_xids, checkpoint->oldestXid, checkpoint->oldestActiveXid, XidFromFullTransactionId(checkpoint->nextXid));\n\t*nxids = n_restored_xids;\n\t*xids = restored_xids;\n\tif (prepared_xids)\n\t\tpfree(prepared_xids);\n\treturn true;\n\n fail:\n\t*nxids = 0;\n\t*xids = NULL;\n\tif (restored_xids)\n\t\tpfree(restored_xids);\n\tif (prepared_xids)\n\t\tpfree(prepared_xids);\n\treturn false;\n}\n\n\n/*\n * pgbouncer is able to track GUCs reported by Postgres.\n * But most parameters cannot be tracked this way. The only parameters that can be tracked are ones\n * that Postgres reports to the client. Unfortunately `search_path` is not reported by Postgres:\n * https://www.postgresql.org/message-id/flat/CAGECzQQ6xFcgrg%2Be0p9mCumtK362TiA6vTiiZKoYbS8OXggwuQ%40mail.gmail.com#be4bfd7a9cf1f0633bdb2d1790a0a1be\n * This code sets GUC_REPORT flag for `search_path`making it possible to include it in\n * pgbouncer's `track_extra_parameters` list.\n *\n * This code is inspired by how the Citus extension does this, see\n * https://github.com/citusdata/citus/blob/2a263fe69a707d16ef24378f7650742386b0968f/src/backend/distributed/shared_library_init.c#L2694\n */\nstatic void\nReportSearchPath(void)\n{\n#if PG_VERSION_NUM >= 160000\n\tint nGucs = 0;\n\tstruct config_generic **gucs = get_guc_variables(&nGucs);\n#else\n\tstruct config_generic **gucs = get_guc_variables();\n\tint nGucs = GetNumConfigOptions();\n#endif\n\n\tfor (int i = 0; i < nGucs; i++)\n\t{\n\t\tstruct config_generic *guc = (struct config_generic *) gucs[i];\n\n\t\tif (strcmp(guc->name, \"search_path\") == 0)\n\t\t{\n\t\t\tguc->flags |= GUC_REPORT;\n\t\t}\n\t}\n}\n\n#if PG_VERSION_NUM < 150000\n/*\n * PG14 uses separate backend for stats collector having no access to shared memory.\n * As far as AUX mechanism requires access to shared memory, persisting pgstat.stat file\n * is not supported in PG14. And so there is no definition of neon_pgstat_file_size_limit\n * variable, so we have to declare it here.\n */\nstatic int neon_pgstat_file_size_limit;\n#endif\n\nstatic void DatabricksSqlErrorHookImpl(ErrorData *edata) {\n\tif (prev_emit_log_hook != NULL) {\n\t\tprev_emit_log_hook(edata);\n\t}\n\n\tif (edata->sqlerrcode == ERRCODE_DATA_CORRUPTED) {\n\t\tpg_atomic_fetch_add_u32(&databricks_metrics_shared->data_corruption_count, 1);\n\t} else if (edata->sqlerrcode == ERRCODE_INDEX_CORRUPTED) {\n\t\tpg_atomic_fetch_add_u32(&databricks_metrics_shared->index_corruption_count, 1);\n\t} else if (edata->sqlerrcode == ERRCODE_INTERNAL_ERROR) {\n\t\tpg_atomic_fetch_add_u32(&databricks_metrics_shared->internal_error_count, 1);\n\t}\n}\n\nvoid\n_PG_init(void)\n{\n\t/*\n\t * Also load 'neon_rmgr'. This makes it unnecessary to list both 'neon'\n\t * and 'neon_rmgr' in shared_preload_libraries.\n\t */\n#if PG_VERSION_NUM >= 160000\n\tload_file(\"$libdir/neon_rmgr\", false);\n#endif\n\n\tif (lakebase_mode) {\n\t\tprev_emit_log_hook = emit_log_hook;\n\t\temit_log_hook = DatabricksSqlErrorHookImpl;\n\t}\n\n\t/*\n\t * Initializing a pre-loaded Postgres extension happens in three stages:\n\t *\n\t * 1. _PG_init() is called early at postmaster startup. In this stage, no\n\t *    shared memory has been allocated yet. Core Postgres GUCs have been\n\t *    initialized from the config files, but notably, MaxBackends has not\n\t *    calculated yet. In this stage, we must register any extension GUCs\n\t *    and can do other early initialization that doesn't depend on shared\n\t *    memory. In this stage we must also register \"shmem request\" and\n\t *    \"shmem starutup\" hooks, to be called in stages 2 and 3.\n\t *\n\t * 2. After MaxBackends have been calculated, the \"shmem request\" hooks\n\t *    are called. The hooks can reserve shared memory by calling\n\t *    RequestAddinShmemSpace and RequestNamedLWLockTranche().  The \"shmem\n\t *    request hooks\" are a new mechanism in Postgres v15. In v14 and\n\t *    below, you had to make those Requests in stage 1 already, which\n\t *    means they could not depend on MaxBackends. (See hack in\n\t *    NeonPerfCountersShmemRequest())\n\t *\n\t * 3. After some more runtime-computed GUCs that affect the amount of\n\t *    shared memory needed have been calculated, the \"shmem startup\" hooks\n\t *    are called. In this stage, we allocate any shared memory, LWLocks\n\t *    and other shared resources.\n\t *\n\t * Here, in the 'neon' extension, we register just one shmem request hook\n\t * and one startup hook, which call into functions in all the subsystems\n\t * that are part of the extension. On v14, the ShmemRequest functions are\n\t * called in stage 1, and on v15 onwards they are called in stage 2.\n\t */\n\n\t/* Stage 1: Define GUCs, and other early intialization */\n\tpg_init_libpagestore();\n\trelsize_hash_init();\n\tlfc_init();\n\tpg_init_walproposer();\n\tinit_lwlsncache();\n\n\tpg_init_communicator_process();\n\n\tpg_init_communicator();\n\tCustom_XLogReaderRoutines = NeonOnDemandXLogReaderRoutines;\n\n\tInitUnstableExtensionsSupport();\n\tInitLogicalReplicationMonitor();\n\tInitDDLHandler();\n\n\tpg_init_extension_server();\n\n\trestore_running_xacts_callback = RestoreRunningXactsFromClog;\n\n\tDefineCustomBoolVariable(\n\t\t\t\t\t\t\t\"neon.disable_logical_replication_subscribers\",\n\t\t\t\t\t\t\t\"Disable incoming logical replication\",\n\t\t\t\t\t\t\tNULL,\n\t\t\t\t\t\t\t&disable_logical_replication_subscribers,\n\t\t\t\t\t\t\tfalse,\n\t\t\t\t\t\t\tPGC_SIGHUP,\n\t\t\t\t\t\t\t0,\n\t\t\t\t\t\t\tNULL, NULL, NULL);\n\tDefineCustomBoolVariable(\n\t\t\t\t\t\t\t\"neon.disable_wal_prevlink_checks\",\n\t\t\t\t\t\t\t\"Disable validation of prev link in WAL records\",\n\t\t\t\t\t\t\tNULL,\n\t\t\t\t\t\t\t&disable_wal_prev_lsn_checks,\n\t\t\t\t\t\t\tfalse,\n\t\t\t\t\t\t\tPGC_SIGHUP,\n\t\t\t\t\t\t\t0,\n\t\t\t\t\t\t\tNULL, NULL, NULL);\n\n\tDefineCustomBoolVariable(\n\t\t\t\t\t\t\t\"neon.monitor_query_exec_time\",\n\t\t\t\t\t\t\t\"Collect infortmation about query execution time\",\n\t\t\t\t\t\t\tNULL,\n\t\t\t\t\t\t\t&monitor_query_exec_time,\n\t\t\t\t\t\t\tfalse,\n\t\t\t\t\t\t\tPGC_USERSET,\n\t\t\t\t\t\t\t0,\n\t\t\t\t\t\t\tNULL, NULL, NULL);\n\n\tDefineCustomBoolVariable(\n\t\t\t\t\t\t\t\"neon.allow_replica_misconfig\",\n\t\t\t\t\t\t\t\"Allow replica startup when some critical GUCs have smaller value than on primary node\",\n\t\t\t\t\t\t\tNULL,\n\t\t\t\t\t\t\t&allowReplicaMisconfig,\n\t\t\t\t\t\t\ttrue,\n\t\t\t\t\t\t\tPGC_POSTMASTER,\n\t\t\t\t\t\t\t0,\n\t\t\t\t\t\t\tNULL, NULL, NULL);\n\n\tDefineCustomEnumVariable(\n\t\t\t\t\t\t\t\"neon.running_xacts_overflow_policy\",\n\t\t\t\t\t\t\t\"Action performed on snapshot overflow when restoring runnings xacts from CLOG\",\n\t\t\t\t\t\t\tNULL,\n\t\t\t\t\t\t\t&running_xacts_overflow_policy,\n\t\t\t\t\t\t\tOP_IGNORE,\n\t\t\t\t\t\t\trunning_xacts_overflow_policies,\n\t\t\t\t\t\t\tPGC_POSTMASTER,\n\t\t\t\t\t\t\t0,\n\t\t\t\t\t\t\tNULL, NULL, NULL);\n\n\tDefineCustomIntVariable(\"neon.pgstat_file_size_limit\",\n\t\t\t\t\t\t\t\"Maximal size of pgstat.stat file saved in Neon storage\",\n\t\t\t\t\t\t\t\"Zero value disables persisting pgstat.stat file\",\n\t\t\t\t\t\t\t&neon_pgstat_file_size_limit,\n\t\t\t\t\t\t\t0, 0, 1000000, /* disabled by default */\n\t\t\t\t\t\t\tPGC_SIGHUP,\n\t\t\t\t\t\t\tGUC_UNIT_KB,\n\t\t\t\t\t\t\tNULL, NULL, NULL);\n\n\tDefineCustomEnumVariable(\n\t\t\t\t\t\t\t\"neon.debug_compare_local\",\n\t\t\t\t\t\t\t\"Debug mode for comparing content of pages in prefetch ring/LFC/PS and local disk\",\n\t\t\t\t\t\t\tNULL,\n\t\t\t\t\t\t\t&debug_compare_local,\n\t\t\t\t\t\t\tDEBUG_COMPARE_LOCAL_NONE,\n\t\t\t\t\t\t\tdebug_compare_local_modes,\n\t\t\t\t\t\t\tPGC_POSTMASTER,\n\t\t\t\t\t\t\t0,\n\t\t\t\t\t\t\tNULL, NULL, NULL);\n\n\tDefineCustomStringVariable(\n\t\t\t\t\t\t\t\"neon.privileged_role_name\",\n\t\t\t\t\t\t\t\"Name of the 'weak' superuser role, which we give to the users\",\n\t\t\t\t\t\t\tNULL,\n\t\t\t\t\t\t\t&privileged_role_name,\n\t\t\t\t\t\t\t\"neon_superuser\",\n\t\t\t\t\t\t\tPGC_POSTMASTER, 0, NULL, NULL, NULL);\n\n\tDefineCustomBoolVariable(\n\t\t\t\t\t\t\t\"neon.lakebase_mode\",\n\t\t\t\t\t\t\t\"Is neon running in Lakebase?\",\n\t\t\t\t\t\t\tNULL,\n\t\t\t\t\t\t\t&lakebase_mode,\n\t\t\t\t\t\t\tfalse,\n\t\t\t\t\t\t\tPGC_POSTMASTER,\n\t\t\t\t\t\t\t0,\n\t\t\t\t\t\t\tNULL, NULL, NULL);\n\n\t// A test hook used in sql regress to trigger specific behaviors\n\t// to test features easily.\n\tDefineCustomIntVariable(\n\t\t\t\t\t\t\t\"databricks.test_hook\",\n\t\t\t\t\t\t\t\"The test hook used in sql regress tests only\",\n\t\t\t\t\t\t\tNULL,\n\t\t\t\t\t\t\t&databricks_test_hook,\n\t\t\t\t\t\t\t0,\n\t\t\t\t\t\t\t0, INT32_MAX,\n\t\t\t\t\t\t\tPGC_SUSET,\n\t\t\t\t\t\t\t0,\n\t\t\t\t\t\t\tNULL, NULL, NULL);\n\n\t/*\n\t * Important: This must happen after other parts of the extension are\n\t * loaded, otherwise any settings to GUCs that were set before the\n\t * extension was loaded will be removed.\n\t */\n\tEmitWarningsOnPlaceholders(\"neon\");\n\n\tReportSearchPath();\n\n\t/*\n\t * Register initialization hooks for stage 2. (On v14, there's no \"shmem\n\t * request\" hooks, so call the ShmemRequest functions immediately.)\n\t */\n#if PG_VERSION_NUM >= 150000\n\tprev_shmem_request_hook = shmem_request_hook;\n\tshmem_request_hook = neon_shmem_request_hook;\n#else\n\tneon_shmem_request_hook();\n#endif\n\n\t/* Register hooks for stage 3 */\n\tprev_shmem_startup_hook = shmem_startup_hook;\n\tshmem_startup_hook = neon_shmem_startup_hook;\n\n\t/* Other misc initialization */\n\tprev_ExecutorStart = ExecutorStart_hook;\n\tExecutorStart_hook = neon_ExecutorStart;\n\tprev_ExecutorEnd = ExecutorEnd_hook;\n\tExecutorEnd_hook = neon_ExecutorEnd;\n}\n\n/* Various functions exposed at SQL level */\n\nPG_FUNCTION_INFO_V1(pg_cluster_size);\nPG_FUNCTION_INFO_V1(backpressure_lsns);\nPG_FUNCTION_INFO_V1(backpressure_throttling_time);\nPG_FUNCTION_INFO_V1(approximate_working_set_size_seconds);\nPG_FUNCTION_INFO_V1(approximate_working_set_size);\nPG_FUNCTION_INFO_V1(neon_get_lfc_stats);\nPG_FUNCTION_INFO_V1(local_cache_pages);\n\nDatum\npg_cluster_size(PG_FUNCTION_ARGS)\n{\n\tint64\t\tsize;\n\n\tsize = GetNeonCurrentClusterSize();\n\n\tif (size == 0)\n\t\tPG_RETURN_NULL();\n\n\tPG_RETURN_INT64(size);\n}\n\nDatum\nbackpressure_lsns(PG_FUNCTION_ARGS)\n{\n\tXLogRecPtr\twritePtr;\n\tXLogRecPtr\tflushPtr;\n\tXLogRecPtr\tapplyPtr;\n\tDatum\t\tvalues[3];\n\tbool\t\tnulls[3];\n\tTupleDesc\ttupdesc;\n\n\treplication_feedback_get_lsns(&writePtr, &flushPtr, &applyPtr);\n\n\ttupdesc = CreateTemplateTupleDesc(3);\n\tTupleDescInitEntry(tupdesc, (AttrNumber) 1, \"received_lsn\", PG_LSNOID, -1, 0);\n\tTupleDescInitEntry(tupdesc, (AttrNumber) 2, \"disk_consistent_lsn\", PG_LSNOID, -1, 0);\n\tTupleDescInitEntry(tupdesc, (AttrNumber) 3, \"remote_consistent_lsn\", PG_LSNOID, -1, 0);\n\ttupdesc = BlessTupleDesc(tupdesc);\n\n\tMemSet(nulls, 0, sizeof(nulls));\n\tvalues[0] = LSNGetDatum(writePtr);\n\tvalues[1] = LSNGetDatum(flushPtr);\n\tvalues[2] = LSNGetDatum(applyPtr);\n\n\tPG_RETURN_DATUM(HeapTupleGetDatum(heap_form_tuple(tupdesc, values, nulls)));\n}\n\nDatum\nbackpressure_throttling_time(PG_FUNCTION_ARGS)\n{\n\tPG_RETURN_UINT64(BackpressureThrottlingTime());\n}\n\nDatum\napproximate_working_set_size_seconds(PG_FUNCTION_ARGS)\n{\n\ttime_t\t\tduration;\n\tint32\t\tdc;\n\n\tduration = PG_ARGISNULL(0) ? (time_t) -1 : PG_GETARG_INT32(0);\n\n\tdc = lfc_approximate_working_set_size_seconds(duration, false);\n\tif (dc < 0)\n\t\tPG_RETURN_NULL();\n\telse\n\t\tPG_RETURN_INT32(dc);\n}\n\nDatum\napproximate_working_set_size(PG_FUNCTION_ARGS)\n{\n\tbool\t\treset = PG_GETARG_BOOL(0);\n\tint32\t\tdc;\n\n\tdc = lfc_approximate_working_set_size_seconds(-1, reset);\n\tif (dc < 0)\n\t\tPG_RETURN_NULL();\n\telse\n\t\tPG_RETURN_INT32(dc);\n}\n\nDatum\nneon_get_lfc_stats(PG_FUNCTION_ARGS)\n{\n#define NUM_NEON_GET_STATS_COLS        2\n\tReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;\n\tLfcStatsEntry *entries;\n\tsize_t\t\tnum_entries;\n\n\tInitMaterializedSRF(fcinfo, 0);\n\n\t/* lfc_get_stats() does all the heavy lifting */\n\tentries = lfc_get_stats(&num_entries);\n\n\t/* Convert the LfcStatsEntrys to a result set */\n\tfor (size_t i = 0; i < num_entries; i++)\n\t{\n\t\tLfcStatsEntry *entry = &entries[i];\n\t\tDatum\t\tvalues[NUM_NEON_GET_STATS_COLS];\n\t\tbool\t\tnulls[NUM_NEON_GET_STATS_COLS];\n\n\t\tvalues[0] = CStringGetTextDatum(entry->metric_name);\n\t\tnulls[0] = false;\n\t\tvalues[1] = Int64GetDatum(entry->isnull ? 0 : entry->value);\n\t\tnulls[1] = entry->isnull;\n\t\ttuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);\n\t}\n\tPG_RETURN_VOID();\n\n#undef NUM_NEON_GET_STATS_COLS\n}\n\nDatum\nlocal_cache_pages(PG_FUNCTION_ARGS)\n{\n#define NUM_LOCALCACHE_PAGES_COLS\t7\n\tReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;\n\tLocalCachePagesRec *entries;\n\tsize_t\t\tnum_entries;\n\n\tInitMaterializedSRF(fcinfo, 0);\n\n\t/* lfc_local_cache_pages() does all the heavy lifting */\n\tentries = lfc_local_cache_pages(&num_entries);\n\n\t/* Convert the LocalCachePagesRec structs to a result set */\n\tfor (size_t i = 0; i < num_entries; i++)\n\t{\n\t\tLocalCachePagesRec *entry = &entries[i];\n\t\tDatum\t\tvalues[NUM_LOCALCACHE_PAGES_COLS];\n\t\tbool\t\tnulls[NUM_LOCALCACHE_PAGES_COLS] = {\n\t\t\tfalse, false, false, false, false, false, false\n\t\t};\n\n\t\tvalues[0] = Int64GetDatum((int64) entry->pageoffs);\n\t\tvalues[1] = ObjectIdGetDatum(entry->relfilenode);\n\t\tvalues[2] = ObjectIdGetDatum(entry->reltablespace);\n\t\tvalues[3] = ObjectIdGetDatum(entry->reldatabase);\n\t\tvalues[4] = ObjectIdGetDatum(entry->forknum);\n\t\tvalues[5] = Int64GetDatum((int64) entry->blocknum);\n\t\tvalues[6] = Int32GetDatum(entry->accesscount);\n\n\t\t/* Build and return the tuple. */\n\t\ttuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);\n\t}\n\n\tPG_RETURN_VOID();\n\n#undef NUM_LOCALCACHE_PAGES_COLS\n}\n\n/*\n * Initialization stage 2: make requests for the amount of shared memory we\n * will need.\n *\n * For a high-level explanation of the initialization process, see _PG_init().\n */\nstatic void\nneon_shmem_request_hook(void)\n{\n#if PG_VERSION_NUM >= 150000\n\tif (prev_shmem_request_hook)\n\t\tprev_shmem_request_hook();\n#endif\n\n\tLfcShmemRequest();\n\tNeonPerfCountersShmemRequest();\n\tPagestoreShmemRequest();\n\tRelsizeCacheShmemRequest();\n\tWalproposerShmemRequest();\n\tLwLsnCacheShmemRequest();\n}\n\n\n/*\n * Initialization stage 3: Initialize shared memory.\n *\n * For a high-level explanation of the initialization process, see _PG_init().\n */\nstatic void\nneon_shmem_startup_hook(void)\n{\n\tif (prev_shmem_startup_hook)\n\t\tprev_shmem_startup_hook();\n\n\tLWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE);\n\n\tLfcShmemInit();\n\tNeonPerfCountersShmemInit();\n\tif (lakebase_mode) {\n\t\tDatabricksMetricsShmemInit();\n\t}\n\tPagestoreShmemInit();\n\tRelsizeCacheShmemInit();\n\tWalproposerShmemInit();\n\tLwLsnCacheShmemInit();\n\n#if PG_MAJORVERSION_NUM >= 17\n\tWAIT_EVENT_NEON_LFC_MAINTENANCE = WaitEventExtensionNew(\"Neon/FileCache_Maintenance\");\n\tWAIT_EVENT_NEON_LFC_READ = WaitEventExtensionNew(\"Neon/FileCache_Read\");\n\tWAIT_EVENT_NEON_LFC_TRUNCATE = WaitEventExtensionNew(\"Neon/FileCache_Truncate\");\n\tWAIT_EVENT_NEON_LFC_WRITE = WaitEventExtensionNew(\"Neon/FileCache_Write\");\n\tWAIT_EVENT_NEON_LFC_CV_WAIT = WaitEventExtensionNew(\"Neon/FileCache_CvWait\");\n\tWAIT_EVENT_NEON_PS_STARTING = WaitEventExtensionNew(\"Neon/PS_Starting\");\n\tWAIT_EVENT_NEON_PS_CONFIGURING = WaitEventExtensionNew(\"Neon/PS_Configuring\");\n\tWAIT_EVENT_NEON_PS_SEND = WaitEventExtensionNew(\"Neon/PS_SendIO\");\n\tWAIT_EVENT_NEON_PS_READ = WaitEventExtensionNew(\"Neon/PS_ReadIO\");\n\tWAIT_EVENT_NEON_WAL_DL = WaitEventExtensionNew(\"Neon/WAL_Download\");\n#endif\n\n\tLWLockRelease(AddinShmemInitLock);\n}\n\n/*\n * ExecutorStart hook: start up tracking if needed\n */\nstatic void\nneon_ExecutorStart(QueryDesc *queryDesc, int eflags)\n{\n\tif (prev_ExecutorStart)\n\t\tprev_ExecutorStart(queryDesc, eflags);\n\telse\n\t\tstandard_ExecutorStart(queryDesc, eflags);\n\n\tif (monitor_query_exec_time)\n\t{\n\t\t/*\n\t\t * Set up to track total elapsed time in ExecutorRun.  Make sure the\n\t\t * space is allocated in the per-query context so it will go away at\n\t\t * ExecutorEnd.\n\t\t */\n\t\tif (queryDesc->totaltime == NULL)\n\t\t{\n\t\t\tMemoryContext oldcxt;\n\n\t\t\toldcxt = MemoryContextSwitchTo(queryDesc->estate->es_query_cxt);\n\t\t\tqueryDesc->totaltime = InstrAlloc(1, INSTRUMENT_TIMER, false);\n\t\t\tMemoryContextSwitchTo(oldcxt);\n\t\t}\n\t}\n}\n\n/*\n * ExecutorEnd hook: store results if needed\n */\nstatic void\nneon_ExecutorEnd(QueryDesc *queryDesc)\n{\n\tif (monitor_query_exec_time && queryDesc->totaltime)\n\t{\n\t\t/*\n\t\t * Make sure stats accumulation is done.  (Note: it's okay if several\n\t\t * levels of hook all do this.)\n\t\t */\n\t\tInstrEndLoop(queryDesc->totaltime);\n\n\t\tinc_query_time(queryDesc->totaltime->total*1000000); /* convert to usec */\n\t}\n\n\tif (prev_ExecutorEnd)\n\t\tprev_ExecutorEnd(queryDesc);\n\telse\n\t\tstandard_ExecutorEnd(queryDesc);\n}\n"
  },
  {
    "path": "pgxn/neon/neon.control",
    "content": "# neon extension\ncomment = 'cloud storage for PostgreSQL'\ndefault_version = '1.6'\nmodule_pathname = '$libdir/neon'\nrelocatable = true\ntrusted = true\n"
  },
  {
    "path": "pgxn/neon/neon.h",
    "content": "/*-------------------------------------------------------------------------\n *\n * neon.h\n *\t  Functions used in the initialization of this extension.\n *\n *-------------------------------------------------------------------------\n */\n\n#ifndef NEON_H\n#define NEON_H\n\n#include \"access/xlogdefs.h\"\n#include \"utils/wait_event.h\"\n\n/* GUCs */\nextern char *neon_auth_token;\nextern char *neon_timeline;\nextern char *neon_tenant;\nextern char *wal_acceptors_list;\nextern int\twal_acceptor_reconnect_timeout;\nextern int\twal_acceptor_connection_timeout;\nextern int\treadahead_getpage_pull_timeout_ms;\nextern bool\tdisable_wal_prev_lsn_checks;\nextern bool\tlakebase_mode;\n\nextern bool AmPrewarmWorker;\n\n#if PG_MAJORVERSION_NUM >= 17\nextern uint32\t\tWAIT_EVENT_NEON_LFC_MAINTENANCE;\nextern uint32\t\tWAIT_EVENT_NEON_LFC_READ;\nextern uint32\t\tWAIT_EVENT_NEON_LFC_TRUNCATE;\nextern uint32\t\tWAIT_EVENT_NEON_LFC_WRITE;\nextern uint32\t\tWAIT_EVENT_NEON_LFC_CV_WAIT;\nextern uint32\t\tWAIT_EVENT_NEON_PS_STARTING;\nextern uint32\t\tWAIT_EVENT_NEON_PS_CONFIGURING;\nextern uint32\t\tWAIT_EVENT_NEON_PS_SEND;\nextern uint32\t\tWAIT_EVENT_NEON_PS_READ;\nextern uint32\t\tWAIT_EVENT_NEON_WAL_DL;\n#else\n#define WAIT_EVENT_NEON_LFC_MAINTENANCE\tPG_WAIT_EXTENSION\n#define WAIT_EVENT_NEON_LFC_READ\t\tWAIT_EVENT_BUFFILE_READ\n#define WAIT_EVENT_NEON_LFC_TRUNCATE\tWAIT_EVENT_BUFFILE_TRUNCATE\n#define WAIT_EVENT_NEON_LFC_WRITE\t\tWAIT_EVENT_BUFFILE_WRITE\n#define WAIT_EVENT_NEON_LFC_CV_WAIT \tWAIT_EVENT_BUFFILE_READ\n#define WAIT_EVENT_NEON_PS_STARTING\t\tPG_WAIT_EXTENSION\n#define WAIT_EVENT_NEON_PS_CONFIGURING\tPG_WAIT_EXTENSION\n#define WAIT_EVENT_NEON_PS_SEND\t\t\tPG_WAIT_EXTENSION\n#define WAIT_EVENT_NEON_PS_READ\t\t\tPG_WAIT_EXTENSION\n#define WAIT_EVENT_NEON_WAL_DL\t\t\tWAIT_EVENT_WAL_READ\n#endif\n\n\n#define NEON_TAG \"[NEON_SMGR] \"\n#define neon_log(tag, fmt, ...) ereport(tag,                                  \\\n\t\t\t\t\t\t\t\t\t\t(errmsg(NEON_TAG fmt, ##__VA_ARGS__), \\\n\t\t\t\t\t\t\t\t\t\t errhidestmt(true), errhidecontext(true), errposition(0), internalerrposition(0)))\n#define neon_shard_log(shard_no, tag, fmt, ...) ereport(tag,\t\\\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t(errmsg(NEON_TAG \"[shard %d] \" fmt, shard_no, ##__VA_ARGS__), \\\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t errhidestmt(true), errhidecontext(true), errposition(0), internalerrposition(0)))\n\n\nextern void pg_init_libpagestore(void);\nextern void pg_init_walproposer(void);\n\nextern uint64 BackpressureThrottlingTime(void);\nextern void SetNeonCurrentClusterSize(uint64 size);\nextern uint64 GetNeonCurrentClusterSize(void);\nextern void replication_feedback_get_lsns(XLogRecPtr *writeLsn, XLogRecPtr *flushLsn, XLogRecPtr *applyLsn);\n\nextern PGDLLEXPORT void WalProposerSync(int argc, char *argv[]);\nextern PGDLLEXPORT void WalProposerMain(Datum main_arg);\nextern PGDLLEXPORT void LogicalSlotsMonitorMain(Datum main_arg);\n\nextern void LfcShmemRequest(void);\nextern void PagestoreShmemRequest(void);\nextern void RelsizeCacheShmemRequest(void);\nextern void WalproposerShmemRequest(void);\nextern void LwLsnCacheShmemRequest(void);\nextern void NeonPerfCountersShmemRequest(void);\n\nextern void LfcShmemInit(void);\nextern void PagestoreShmemInit(void);\nextern void RelsizeCacheShmemInit(void);\nextern void WalproposerShmemInit(void);\nextern void LwLsnCacheShmemInit(void);\nextern void NeonPerfCountersShmemInit(void);\n\n\n#endif\t\t\t\t\t\t\t/* NEON_H */\n"
  },
  {
    "path": "pgxn/neon/neon_ddl_handler.c",
    "content": "/*-------------------------------------------------------------------------\n *\n * neon_ddl_handler.c\n *\t  Captures updates to roles/databases using ProcessUtility_hook and\n *        sends them to the control ProcessUtility_hook. The changes are sent\n *        via HTTP to the URL specified by the GUC neon.console_url when the\n *        transaction commits. Forwarding may be disabled temporarily by\n *        setting neon.forward_ddl to false.\n *\n *        Currently, the transaction may abort AFTER\n *        changes have already been forwarded, and that case is not handled.\n *        Subtransactions are handled using a stack of hash tables, which\n *        accumulate changes. On subtransaction commit, the top of the stack\n *        is merged with the table below it.\n *\n *    Support event triggers for {privileged_role_name}\n *\n * IDENTIFICATION\n *\t contrib/neon/neon_dll_handler.c\n *\n *-------------------------------------------------------------------------\n */\n\n#include \"postgres.h\"\n\n#include <curl/curl.h>\n#include <unistd.h>\n\n#include \"access/xact.h\"\n#include \"catalog/pg_authid.h\"\n#include \"catalog/pg_proc.h\"\n#include \"commands/defrem.h\"\n#include \"commands/event_trigger.h\"\n#include \"commands/user.h\"\n#include \"fmgr.h\"\n#include \"libpq/crypt.h\"\n#include \"miscadmin.h\"\n#include \"nodes/makefuncs.h\"\n#include \"parser/parse_func.h\"\n#include \"tcop/pquery.h\"\n#include \"tcop/utility.h\"\n#include \"utils/acl.h\"\n#include \"utils/guc.h\"\n#include \"utils/hsearch.h\"\n#include \"utils/memutils.h\"\n#include \"utils/jsonb.h\"\n#include <utils/lsyscache.h>\n#include <utils/syscache.h>\n\n#include \"neon_ddl_handler.h\"\n#include \"neon_utils.h\"\n#include \"neon.h\"\n\nstatic ProcessUtility_hook_type PreviousProcessUtilityHook = NULL;\nstatic fmgr_hook_type next_fmgr_hook = NULL;\nstatic needs_fmgr_hook_type next_needs_fmgr_hook = NULL;\nstatic bool neon_event_triggers = true;\n\nstatic const char *jwt_token = NULL;\n\n/* GUCs */\nstatic char *ConsoleURL = NULL;\nstatic bool ForwardDDL = true;\nstatic bool RegressTestMode = false;\n\n/*\n * CURL docs say that this buffer must exist until we call curl_easy_cleanup\n * (which we never do), so we make this a static\n */\nstatic char CurlErrorBuf[CURL_ERROR_SIZE];\n\ntypedef enum\n{\n\tOp_Set,\t\t\t\t\t\t/* An upsert: Either a creation or an alter */\n\tOp_Delete,\n} OpType;\n\ntypedef struct\n{\n\tchar\t\tname[NAMEDATALEN];\n\tOid\t\t\towner;\n\tchar\t\told_name[NAMEDATALEN];\n\tOpType\t\ttype;\n} DbEntry;\n\ntypedef struct\n{\n\tchar\t\tname[NAMEDATALEN];\n\tchar\t\told_name[NAMEDATALEN];\n\tconst char *password;\n\tOpType\t\ttype;\n} RoleEntry;\n\n/*\n * We keep one of these for each subtransaction in a stack. When a subtransaction\n * commits, we merge the top of the stack into the table below it. It is allocated in the\n * subtransaction's context.\n */\ntypedef struct DdlHashTable\n{\n\tstruct DdlHashTable *prev_table;\n\tsize_t\t\tsubtrans_level;\n\tHTAB\t   *db_table;\n\tHTAB\t   *role_table;\n} DdlHashTable;\n\nstatic DdlHashTable RootTable;\nstatic DdlHashTable *CurrentDdlTable = &RootTable;\nstatic int SubtransLevel; /* current nesting level of subtransactions */\n\nstatic void\nPushKeyValue(JsonbParseState **state, char *key, char *value)\n{\n\tJsonbValue\tk,\n\t\t\t\tv;\n\n\tk.type = jbvString;\n\tk.val.string.len = strlen(key);\n\tk.val.string.val = key;\n\tv.type = jbvString;\n\tv.val.string.len = strlen(value);\n\tv.val.string.val = value;\n\tpushJsonbValue(state, WJB_KEY, &k);\n\tpushJsonbValue(state, WJB_VALUE, &v);\n}\n\nstatic char *\nConstructDeltaMessage()\n{\n\tJsonbParseState *state = NULL;\n\n\tpushJsonbValue(&state, WJB_BEGIN_OBJECT, NULL);\n\tif (RootTable.db_table)\n\t{\n\t\tJsonbValue\tdbs;\n\t\tHASH_SEQ_STATUS status;\n\t\tDbEntry    *entry;\n\n\t\tdbs.type = jbvString;\n\t\tdbs.val.string.val = \"dbs\";\n\t\tdbs.val.string.len = strlen(dbs.val.string.val);\n\t\tpushJsonbValue(&state, WJB_KEY, &dbs);\n\t\tpushJsonbValue(&state, WJB_BEGIN_ARRAY, NULL);\n\n\t\thash_seq_init(&status, RootTable.db_table);\n\t\twhile ((entry = hash_seq_search(&status)) != NULL)\n\t\t{\n\t\t\tpushJsonbValue(&state, WJB_BEGIN_OBJECT, NULL);\n\t\t\tPushKeyValue(&state, \"op\", entry->type == Op_Set ? \"set\" : \"del\");\n\t\t\tPushKeyValue(&state, \"name\", entry->name);\n\t\t\tif (entry->owner != InvalidOid)\n\t\t\t{\n\t\t\t\tPushKeyValue(&state, \"owner\", GetUserNameFromId(entry->owner, false));\n\t\t\t}\n\t\t\tif (entry->old_name[0] != '\\0')\n\t\t\t{\n\t\t\t\tPushKeyValue(&state, \"old_name\", entry->old_name);\n\t\t\t}\n\t\t\tpushJsonbValue(&state, WJB_END_OBJECT, NULL);\n\t\t}\n\t\tpushJsonbValue(&state, WJB_END_ARRAY, NULL);\n\t}\n\n\tif (RootTable.role_table)\n\t{\n\t\tJsonbValue\troles;\n\t\tHASH_SEQ_STATUS status;\n\t\tRoleEntry  *entry;\n\n\t\troles.type = jbvString;\n\t\troles.val.string.val = \"roles\";\n\t\troles.val.string.len = strlen(roles.val.string.val);\n\t\tpushJsonbValue(&state, WJB_KEY, &roles);\n\t\tpushJsonbValue(&state, WJB_BEGIN_ARRAY, NULL);\n\n\t\thash_seq_init(&status, RootTable.role_table);\n\t\twhile ((entry = hash_seq_search(&status)) != NULL)\n\t\t{\n\t\t\tpushJsonbValue(&state, WJB_BEGIN_OBJECT, NULL);\n\t\t\tPushKeyValue(&state, \"op\", entry->type == Op_Set ? \"set\" : \"del\");\n\t\t\tPushKeyValue(&state, \"name\", entry->name);\n\t\t\tif (entry->password)\n\t\t\t{\n#if PG_MAJORVERSION_NUM == 14\n\t\t\t\tchar\t   *logdetail;\n#else\n\t\t\t\tconst char *logdetail;\n#endif\n\t\t\t\tchar\t   *encrypted_password;\n\t\t\t\tPushKeyValue(&state, \"password\", (char *) entry->password);\n\t\t\t\tencrypted_password = get_role_password(entry->name, &logdetail);\n\n\t\t\t\tif (encrypted_password)\n\t\t\t\t{\n\t\t\t\t\tPushKeyValue(&state, \"encrypted_password\", encrypted_password);\n\t\t\t\t}\n\t\t\t\telse\n\t\t\t\t{\n\t\t\t\t\telog(ERROR, \"Failed to get encrypted password: %s\", logdetail);\n\t\t\t\t}\n\t\t\t}\n\t\t\tif (entry->old_name[0] != '\\0')\n\t\t\t{\n\t\t\t\tPushKeyValue(&state, \"old_name\", entry->old_name);\n\t\t\t}\n\t\t\tpushJsonbValue(&state, WJB_END_OBJECT, NULL);\n\t\t}\n\t\tpushJsonbValue(&state, WJB_END_ARRAY, NULL);\n\t}\n\t{\n\t\tJsonbValue *result = pushJsonbValue(&state, WJB_END_OBJECT, NULL);\n\t\tJsonb\t   *jsonb = JsonbValueToJsonb(result);\n\n\t\treturn JsonbToCString(NULL, &jsonb->root, 0 /* estimated_len */ );\n\t}\n}\n\n#define ERROR_SIZE 1024\n\ntypedef struct\n{\n\tchar\t\tstr[ERROR_SIZE];\n\tsize_t\t\tsize;\n} ErrorString;\n\nstatic size_t\nErrorWriteCallback(char *ptr, size_t size, size_t nmemb, void *userdata)\n{\n\t/* Docs say size is always 1 */\n\tErrorString *str = userdata;\n\n\tsize_t\t\tto_write = nmemb;\n\n\t/* +1 for null terminator */\n\tif (str->size + nmemb + 1 >= ERROR_SIZE)\n\t\tto_write = ERROR_SIZE - str->size - 1;\n\n\t/* Ignore everyrthing past the first ERROR_SIZE bytes */\n\tif (to_write == 0)\n\t\treturn nmemb;\n\tmemcpy(str->str + str->size, ptr, to_write);\n\tstr->size += to_write;\n\tstr->str[str->size] = '\\0';\n\treturn nmemb;\n}\n\nstatic void\nSendDeltasToControlPlane()\n{\n\tstatic CURL\t\t*handle = NULL;\n\n\tif (!RootTable.db_table && !RootTable.role_table)\n\t\treturn;\n\tif (!ConsoleURL)\n\t{\n\t\telog(LOG, \"ConsoleURL not set, skipping forwarding\");\n\t\treturn;\n\t}\n\tif (!ForwardDDL)\n\t\treturn;\n\n\tif (handle == NULL)\n\t{\n\t\tstruct curl_slist *headers = NULL;\n\n\t\theaders = curl_slist_append(headers, \"Content-Type: application/json\");\n\t\tif (headers == NULL)\n\t\t{\n\t\t\telog(ERROR, \"Failed to set Content-Type header\");\n\t\t}\n\n\t\tif (jwt_token)\n\t\t{\n\t\t\tchar\t\tauth_header[8192];\n\n\t\t\tsnprintf(auth_header, sizeof(auth_header), \"Authorization: Bearer %s\", jwt_token);\n\t\t\theaders = curl_slist_append(headers, auth_header);\n\t\t\tif (headers == NULL)\n\t\t\t{\n\t\t\t\telog(ERROR, \"Failed to set Authorization header\");\n\t\t\t}\n\t\t}\n\n\t\thandle = alloc_curl_handle();\n\n\t\tcurl_easy_setopt(handle, CURLOPT_CUSTOMREQUEST, \"PATCH\");\n\t\tcurl_easy_setopt(handle, CURLOPT_HTTPHEADER, headers);\n\t\tcurl_easy_setopt(handle, CURLOPT_URL, ConsoleURL);\n\t\tcurl_easy_setopt(handle, CURLOPT_ERRORBUFFER, CurlErrorBuf);\n\t\tcurl_easy_setopt(handle, CURLOPT_TIMEOUT, 3L /* seconds */ );\n\t\tcurl_easy_setopt(handle, CURLOPT_WRITEFUNCTION, ErrorWriteCallback);\n\t}\n\n\t{\n\t\tchar\t   *message = ConstructDeltaMessage();\n\t\tErrorString str;\n\t\tconst int\tnum_retries = 5;\n\t\tCURLcode\tcurl_status;\n\t\tlong\t\tresponse_code;\n\n\t\tstr.size = 0;\n\n\t\tcurl_easy_setopt(handle, CURLOPT_POSTFIELDS, message);\n\t\tcurl_easy_setopt(handle, CURLOPT_WRITEDATA, &str);\n\n\t\tfor (int i = 0; i < num_retries; i++)\n\t\t{\n\t\t\tif ((curl_status = curl_easy_perform(handle)) == 0)\n\t\t\t\tbreak;\n\t\t\telog(LOG, \"Curl request failed on attempt %d: %s\", i, CurlErrorBuf);\n\t\t\tpg_usleep(1000 * 1000);\n\t\t}\n\t\tif (curl_status != CURLE_OK)\n\t\t\telog(ERROR, \"Failed to perform curl request: %s\", CurlErrorBuf);\n\n\t\tif (curl_easy_getinfo(handle, CURLINFO_RESPONSE_CODE, &response_code) != CURLE_UNKNOWN_OPTION)\n\t\t{\n\t\t\tif (response_code != 200)\n\t\t\t{\n\t\t\t\tif (str.size != 0)\n\t\t\t\t{\n\t\t\t\t\telog(ERROR,\n\t\t\t\t\t\t \"Received HTTP code %ld from control plane: %s\",\n\t\t\t\t\t\t response_code,\n\t\t\t\t\t\t str.str);\n\t\t\t\t}\n\t\t\t\telse\n\t\t\t\t{\n\t\t\t\t\telog(ERROR,\n\t\t\t\t\t\t \"Received HTTP code %ld from control plane\",\n\t\t\t\t\t\t response_code);\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\t}\n}\n\nstatic void\nInitCurrentDdlTableIfNeeded()\n{\n\t/* Lazy construction of DllHashTable chain */\n\tif (SubtransLevel > CurrentDdlTable->subtrans_level)\n\t{\n\t\tDdlHashTable *new_table = MemoryContextAlloc(CurTransactionContext, sizeof(DdlHashTable));\n\t\tnew_table->prev_table = CurrentDdlTable;\n\t\tnew_table->subtrans_level = SubtransLevel;\n\t\tnew_table->role_table = NULL;\n\t\tnew_table->db_table = NULL;\n\t\tCurrentDdlTable = new_table;\n\t}\n}\n\nstatic void\nInitDbTableIfNeeded()\n{\n\tInitCurrentDdlTableIfNeeded();\n\tif (!CurrentDdlTable->db_table)\n\t{\n\t\tHASHCTL\t\tdb_ctl = {};\n\n\t\tdb_ctl.keysize = NAMEDATALEN;\n\t\tdb_ctl.entrysize = sizeof(DbEntry);\n\t\tdb_ctl.hcxt = CurTransactionContext;\n\t\tCurrentDdlTable->db_table = hash_create(\n\t\t\t\t\t\t\t\t\t\t\t\t\"Dbs Created\",\n\t\t\t\t\t\t\t\t\t\t\t\t4,\n\t\t\t\t\t\t\t\t\t\t\t\t&db_ctl,\n\t\t\t\t\t\t\t\t\t\t\t\tHASH_ELEM | HASH_STRINGS | HASH_CONTEXT);\n\t}\n}\n\nstatic void\nInitRoleTableIfNeeded()\n{\n\tInitCurrentDdlTableIfNeeded();\n\tif (!CurrentDdlTable->role_table)\n\t{\n\t\tHASHCTL\t\trole_ctl = {};\n\n\t\trole_ctl.keysize = NAMEDATALEN;\n\t\trole_ctl.entrysize = sizeof(RoleEntry);\n\t\trole_ctl.hcxt = CurTransactionContext;\n\t\tCurrentDdlTable->role_table = hash_create(\n\t\t\t\t\t\t\t\t\t\t\t\t  \"Roles Created\",\n\t\t\t\t\t\t\t\t\t\t\t\t  4,\n\t\t\t\t\t\t\t\t\t\t\t\t  &role_ctl,\n\t\t\t\t\t\t\t\t\t\t\t\t  HASH_ELEM | HASH_STRINGS | HASH_CONTEXT);\n\t}\n}\n\nstatic void\nPushTable()\n{\n\tSubtransLevel += 1;\n}\n\nstatic void\nMergeTable()\n{\n\tDdlHashTable *old_table;\n\n\tAssert(SubtransLevel >= CurrentDdlTable->subtrans_level);\n\tif (--SubtransLevel >= CurrentDdlTable->subtrans_level)\n\t{\n\t\treturn;\n\t}\n\n\told_table = CurrentDdlTable;\n\tCurrentDdlTable = old_table->prev_table;\n\n\tif (old_table->db_table)\n\t{\n\t\tDbEntry    *entry;\n\t\tHASH_SEQ_STATUS status;\n\n\t\tInitDbTableIfNeeded();\n\n\t\thash_seq_init(&status, old_table->db_table);\n\t\twhile ((entry = hash_seq_search(&status)) != NULL)\n\t\t{\n\t\t\tDbEntry    *to_write = hash_search(\n\t\t\t\t\t\t\t\t\t\t\t   CurrentDdlTable->db_table,\n\t\t\t\t\t\t\t\t\t\t\t   entry->name,\n\t\t\t\t\t\t\t\t\t\t\t   HASH_ENTER,\n\t\t\t\t\t\t\t\t\t\t\t   NULL);\n\n\t\t\tto_write->type = entry->type;\n\t\t\tif (entry->owner != InvalidOid)\n\t\t\t\tto_write->owner = entry->owner;\n\t\t\tstrlcpy(to_write->old_name, entry->old_name, NAMEDATALEN);\n\t\t\tif (entry->old_name[0] != '\\0')\n\t\t\t{\n\t\t\t\tbool\t\tfound_old = false;\n\t\t\t\tDbEntry    *old = hash_search(\n\t\t\t\t\t\t\t\t\t\t\t  CurrentDdlTable->db_table,\n\t\t\t\t\t\t\t\t\t\t\t  entry->old_name,\n\t\t\t\t\t\t\t\t\t\t\t  HASH_FIND,\n\t\t\t\t\t\t\t\t\t\t\t  &found_old);\n\n\t\t\t\tif (found_old)\n\t\t\t\t{\n\t\t\t\t\tif (old->old_name[0] != '\\0')\n\t\t\t\t\t\tstrlcpy(to_write->old_name, old->old_name, NAMEDATALEN);\n\t\t\t\t\telse\n\t\t\t\t\t\tstrlcpy(to_write->old_name, entry->old_name, NAMEDATALEN);\n\t\t\t\t\thash_search(\n\t\t\t\t\t\t\t\tCurrentDdlTable->db_table,\n\t\t\t\t\t\t\t\tentry->old_name,\n\t\t\t\t\t\t\t\tHASH_REMOVE,\n\t\t\t\t\t\t\t\tNULL);\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\t\thash_destroy(old_table->db_table);\n\t}\n\n\tif (old_table->role_table)\n\t{\n\t\tRoleEntry  *entry;\n\t\tHASH_SEQ_STATUS status;\n\n\t\tInitRoleTableIfNeeded();\n\n\t\thash_seq_init(&status, old_table->role_table);\n\t\twhile ((entry = hash_seq_search(&status)) != NULL)\n\t\t{\n\t\t\tRoleEntry * old;\n\t\t\tbool found_old = false;\n\t\t\tRoleEntry  *to_write = hash_search(\n\t\t\t\t\t\t\t\t\t\t\t   CurrentDdlTable->role_table,\n\t\t\t\t\t\t\t\t\t\t\t   entry->name,\n\t\t\t\t\t\t\t\t\t\t\t   HASH_ENTER,\n\t\t\t\t\t\t\t\t\t\t\t   NULL);\n\n\t\t\tto_write->type = entry->type;\n\t\t\tto_write->password = entry->password;\n\t\t\tstrlcpy(to_write->old_name, entry->old_name, NAMEDATALEN);\n\t\t\tif (entry->old_name[0] == '\\0')\n\t\t\t\tcontinue;\n\n\t\t\told = hash_search(\n\t\t\t\t\t\t\t  CurrentDdlTable->role_table,\n\t\t\t\t\t\t\t  entry->old_name,\n\t\t\t\t\t\t\t  HASH_FIND,\n\t\t\t\t\t\t\t  &found_old);\n\t\t\tif (!found_old)\n\t\t\t\tcontinue;\n\t\t\tstrlcpy(to_write->old_name, old->old_name, NAMEDATALEN);\n\t\t\thash_search(CurrentDdlTable->role_table,\n\t\t\t\t\t\tentry->old_name,\n\t\t\t\t\t\tHASH_REMOVE,\n\t\t\t\t\t\tNULL);\n\t\t}\n\t\thash_destroy(old_table->role_table);\n\t}\n}\n\nstatic void\nPopTable()\n{\n\tAssert(SubtransLevel >= CurrentDdlTable->subtrans_level);\n\tif (--SubtransLevel < CurrentDdlTable->subtrans_level)\n\t{\n\t\t/*\n\t\t * Current table gets freed because it is allocated in aborted\n\t\t * subtransaction's memory context.\n\t\t */\n\t\tCurrentDdlTable = CurrentDdlTable->prev_table;\n\t}\n}\n\nstatic void\nNeonSubXactCallback(\n\t\t\t\t\tSubXactEvent event,\n\t\t\t\t\tSubTransactionId mySubid,\n\t\t\t\t\tSubTransactionId parentSubid,\n\t\t\t\t\tvoid *arg)\n{\n\tswitch (event)\n\t{\n\t\tcase SUBXACT_EVENT_START_SUB:\n\t\t\treturn PushTable();\n\t\tcase SUBXACT_EVENT_COMMIT_SUB:\n\t\t\treturn MergeTable();\n\t\tcase SUBXACT_EVENT_ABORT_SUB:\n\t\t\treturn PopTable();\n\t\tdefault:\n\t\t\treturn;\n\t}\n}\n\nstatic void\nNeonXactCallback(XactEvent event, void *arg)\n{\n\tif (event == XACT_EVENT_PRE_COMMIT || event == XACT_EVENT_PARALLEL_PRE_COMMIT)\n\t{\n\t\tSendDeltasToControlPlane();\n\t}\n\tRootTable.role_table = NULL;\n\tRootTable.db_table = NULL;\n\tAssert(CurrentDdlTable == &RootTable);\n}\n\nstatic bool\nIsPrivilegedRole(const char *role_name)\n{\n\tAssert(role_name);\n\n\treturn strcmp(role_name, privileged_role_name) == 0;\n}\n\nstatic void\nHandleCreateDb(CreatedbStmt *stmt)\n{\n\tDefElem    *downer = NULL;\n\tListCell   *option;\n\tbool\t\tfound = false;\n\tDbEntry    *entry;\n\n\tInitDbTableIfNeeded();\n\n\tforeach(option, stmt->options)\n\t{\n\t\tDefElem    *defel = lfirst(option);\n\n\t\tif (strcmp(defel->defname, \"owner\") == 0)\n\t\t\tdowner = defel;\n\t}\n\n\tentry = hash_search(CurrentDdlTable->db_table,\n\t\t\t\t\t\tstmt->dbname,\n\t\t\t\t\t\tHASH_ENTER,\n\t\t\t\t\t\t&found);\n\tif (!found)\n\t\tmemset(entry->old_name, 0, sizeof(entry->old_name));\n\n\tentry->type = Op_Set;\n\tif (downer && downer->arg)\n\t{\n\t\tconst char *owner_name = defGetString(downer);\n\n\t\tif (IsPrivilegedRole(owner_name))\n\t\t\telog(ERROR, \"could not create a database with owner %s\", privileged_role_name);\n\n\t\tentry->owner = get_role_oid(owner_name, false);\n\t}\n\telse\n\t{\n\t\tentry->owner = GetUserId();\n\t}\n}\n\nstatic void\nHandleAlterOwner(AlterOwnerStmt *stmt)\n{\n\tconst char *name;\n\tbool\t\tfound = false;\n\tDbEntry    *entry;\n\tconst char *new_owner;\n\n\tif (stmt->objectType != OBJECT_DATABASE)\n\t\treturn;\n\tInitDbTableIfNeeded();\n\n\tname = strVal(stmt->object);\n\tentry = hash_search(CurrentDdlTable->db_table,\n\t\t\t\t\t\tname,\n\t\t\t\t\t\tHASH_ENTER,\n\t\t\t\t\t\t&found);\n\tif (!found)\n\t\tmemset(entry->old_name, 0, sizeof(entry->old_name));\n\n\tnew_owner = get_rolespec_name(stmt->newowner);\n\tif (IsPrivilegedRole(new_owner))\n\t\telog(ERROR, \"could not alter owner to %s\", privileged_role_name);\n\n\tentry->owner = get_role_oid(new_owner, false);\n\tentry->type = Op_Set;\n}\n\nstatic void\nHandleDbRename(RenameStmt *stmt)\n{\n\tbool\t\tfound = false;\n\tDbEntry    *entry;\n\tDbEntry    *entry_for_new_name;\n\n\tAssert(stmt->renameType == OBJECT_DATABASE);\n\tInitDbTableIfNeeded();\n\tentry = hash_search(CurrentDdlTable->db_table,\n\t\t\t\t\t\tstmt->subname,\n\t\t\t\t\t\tHASH_FIND,\n\t\t\t\t\t\t&found);\n\n\tentry_for_new_name = hash_search(CurrentDdlTable->db_table,\n\t\t\t\t\t\t\t\t\t stmt->newname,\n\t\t\t\t\t\t\t\t\t HASH_ENTER,\n\t\t\t\t\t\t\t\t\t NULL);\n\tentry_for_new_name->type = Op_Set;\n\n\tif (found)\n\t{\n\t\tif (entry->old_name[0] != '\\0')\n\t\t\tstrlcpy(entry_for_new_name->old_name, entry->old_name, NAMEDATALEN);\n\t\telse\n\t\t\tstrlcpy(entry_for_new_name->old_name, entry->name, NAMEDATALEN);\n\t\tentry_for_new_name->owner = entry->owner;\n\t\thash_search(CurrentDdlTable->db_table,\n\t\t\t\t\tstmt->subname,\n\t\t\t\t\tHASH_REMOVE,\n\t\t\t\t\tNULL);\n\t}\n\telse\n\t{\n\t\tstrlcpy(entry_for_new_name->old_name, stmt->subname, NAMEDATALEN);\n\t\tentry_for_new_name->owner = InvalidOid;\n\t}\n}\n\nstatic void\nHandleDropDb(DropdbStmt *stmt)\n{\n\tbool\t\tfound = false;\n\tDbEntry    *entry;\n\n\tInitDbTableIfNeeded();\n\n\tentry = hash_search(CurrentDdlTable->db_table,\n\t\t\t\t\t\tstmt->dbname,\n\t\t\t\t\t\tHASH_ENTER,\n\t\t\t\t\t\t&found);\n\tentry->type = Op_Delete;\n\tentry->owner = InvalidOid;\n\tif (!found)\n\t\tmemset(entry->old_name, 0, sizeof(entry->old_name));\n}\n\nstatic void\nHandleCreateRole(CreateRoleStmt *stmt)\n{\n\tbool\t\tfound = false;\n\tRoleEntry  *entry;\n\tDefElem    *dpass;\n\tListCell   *option;\n\n\tInitRoleTableIfNeeded();\n\n\tdpass = NULL;\n\tforeach(option, stmt->options)\n\t{\n\t\tDefElem    *defel = lfirst(option);\n\n\t\tif (strcmp(defel->defname, \"password\") == 0)\n\t\t\tdpass = defel;\n\t}\n\n\tentry = hash_search(CurrentDdlTable->role_table,\n\t\t\t\t\t\tstmt->role,\n\t\t\t\t\t\tHASH_ENTER,\n\t\t\t\t\t\t&found);\n\tif (!found)\n\t\tmemset(entry->old_name, 0, sizeof(entry->old_name));\n\tif (dpass && dpass->arg)\n\t\tentry->password = MemoryContextStrdup(CurTransactionContext, strVal(dpass->arg));\n\telse\n\t\tentry->password = NULL;\n\tentry->type = Op_Set;\n}\n\nstatic void\nHandleAlterRole(AlterRoleStmt *stmt)\n{\n\tchar\t   *role_name;\n\tDefElem    *dpass;\n\tListCell   *option;\n\tbool\t\tfound = false;\n\tRoleEntry  *entry;\n\n\tInitRoleTableIfNeeded();\n\n\trole_name = get_rolespec_name(stmt->role);\n\tif (IsPrivilegedRole(role_name) && !superuser())\n\t\telog(ERROR, \"could not ALTER %s\", privileged_role_name);\n\n\tdpass = NULL;\n\tforeach(option, stmt->options)\n\t{\n\t\tDefElem    *defel = lfirst(option);\n\n\t\tif (strcmp(defel->defname, \"password\") == 0)\n\t\t\tdpass = defel;\n\t}\n\n\t/* We only care about updates to the password */\n\tif (!dpass)\n\t{\n\t\tpfree(role_name);\n\t\treturn;\n\t}\n\n\tentry = hash_search(CurrentDdlTable->role_table,\n\t\t\t\t\t\trole_name,\n\t\t\t\t\t\tHASH_ENTER,\n\t\t\t\t\t\t&found);\n\tif (!found)\n\t\tmemset(entry->old_name, 0, sizeof(entry->old_name));\n\tif (dpass->arg)\n\t\tentry->password = MemoryContextStrdup(CurTransactionContext, strVal(dpass->arg));\n\telse\n\t\tentry->password = NULL;\n\tentry->type = Op_Set;\n\n\tpfree(role_name);\n}\n\nstatic void\nHandleRoleRename(RenameStmt *stmt)\n{\n\tbool\t\tfound = false;\n\tRoleEntry  *entry;\n\tRoleEntry  *entry_for_new_name;\n\n\tAssert(stmt->renameType == OBJECT_ROLE);\n\tInitRoleTableIfNeeded();\n\n\tentry = hash_search(CurrentDdlTable->role_table,\n\t\t\t\t\t\tstmt->subname,\n\t\t\t\t\t\tHASH_FIND,\n\t\t\t\t\t\t&found);\n\n\tentry_for_new_name = hash_search(CurrentDdlTable->role_table,\n\t\t\t\t\t\t\t\t\t stmt->newname,\n\t\t\t\t\t\t\t\t\t HASH_ENTER,\n\t\t\t\t\t\t\t\t\t NULL);\n\n\tentry_for_new_name->type = Op_Set;\n\tif (found)\n\t{\n\t\tif (entry->old_name[0] != '\\0')\n\t\t\tstrlcpy(entry_for_new_name->old_name, entry->old_name, NAMEDATALEN);\n\t\telse\n\t\t\tstrlcpy(entry_for_new_name->old_name, entry->name, NAMEDATALEN);\n\t\tentry_for_new_name->password = entry->password;\n\t\thash_search(\n\t\t\t\t\tCurrentDdlTable->role_table,\n\t\t\t\t\tentry->name,\n\t\t\t\t\tHASH_REMOVE,\n\t\t\t\t\tNULL);\n\t}\n\telse\n\t{\n\t\tstrlcpy(entry_for_new_name->old_name, stmt->subname, NAMEDATALEN);\n\t\tentry_for_new_name->password = NULL;\n\t}\n}\n\nstatic void\nHandleDropRole(DropRoleStmt *stmt)\n{\n\tListCell   *item;\n\n\tInitRoleTableIfNeeded();\n\n\tforeach(item, stmt->roles)\n\t{\n\t\tRoleSpec   *spec = lfirst(item);\n\t\tbool\t\tfound = false;\n\t\tRoleEntry  *entry = hash_search(\n\t\t\t\t\t\t\t\t\t\tCurrentDdlTable->role_table,\n\t\t\t\t\t\t\t\t\t\tspec->rolename,\n\t\t\t\t\t\t\t\t\t\tHASH_ENTER,\n\t\t\t\t\t\t\t\t\t\t&found);\n\n\t\tentry->type = Op_Delete;\n\t\tentry->password = NULL;\n\t\tif (!found)\n\t\t\tmemset(entry->old_name, 0, sizeof(entry->old_name));\n\t}\n}\n\n\nstatic void\nHandleRename(RenameStmt *stmt)\n{\n\tif (stmt->renameType == OBJECT_DATABASE)\n\t\treturn HandleDbRename(stmt);\n\telse if (stmt->renameType == OBJECT_ROLE)\n\t\treturn HandleRoleRename(stmt);\n}\n\n\n/*\n * Support for Event Triggers.\n *\n * In vanilla only superuser can create Event Triggers.\n *\n * We allow it for {privileged_role_name} by temporary switching to superuser. But as\n * far as event trigger can fire in superuser context we should protect\n * superuser from execution of arbitrary user's code.\n *\n * The idea was taken from Supabase PR series starting at\n *   https://github.com/supabase/supautils/pull/98\n */\n\nstatic bool\nneon_needs_fmgr_hook(Oid functionId) {\n\n\treturn (next_needs_fmgr_hook && (*next_needs_fmgr_hook) (functionId))\n\t\t|| get_func_rettype(functionId) == EVENT_TRIGGEROID;\n}\n\nstatic void\nLookupFuncOwnerSecDef(Oid functionId, Oid *funcOwner, bool *is_secdef)\n{\n\tForm_pg_proc procForm;\n\tHeapTuple proc_tup = SearchSysCache1(PROCOID, ObjectIdGetDatum(functionId));\n\n\tif (!HeapTupleIsValid(proc_tup))\n\t\tereport(ERROR,\n\t\t\t\t(errmsg(\"cache lookup failed for function %u\", functionId)));\n\n\tprocForm = (Form_pg_proc) GETSTRUCT(proc_tup);\n\n\t*funcOwner = procForm->proowner;\n\t*is_secdef = procForm->prosecdef;\n\n\tReleaseSysCache(proc_tup);\n}\n\n\nPG_FUNCTION_INFO_V1(noop);\nDatum noop(__attribute__ ((unused)) PG_FUNCTION_ARGS) { PG_RETURN_VOID();}\n\nstatic void\nforce_noop(FmgrInfo *finfo)\n{\n    finfo->fn_addr   = (PGFunction) noop;\n    finfo->fn_oid    = InvalidOid;           /* not a known function OID anymore */\n    finfo->fn_nargs  = 0;                    /* no arguments for noop */\n    finfo->fn_strict = false;\n    finfo->fn_retset = false;\n    finfo->fn_stats  = 0;                    /* no stats collection */\n    finfo->fn_extra  = NULL;                 /* clear out old context data */\n    finfo->fn_mcxt   = CurrentMemoryContext;\n    finfo->fn_expr   = NULL;                 /* no parse tree */\n}\n\n\n/*\n * Skip executing Event Triggers execution for superusers, because Event\n * Triggers are SECURITY DEFINER and user provided code could then attempt\n * privilege escalation.\n *\n * Also skip executing Event Triggers when GUC neon.event_triggers has been\n * set to false. This might be necessary to be able to connect again after a\n * LOGIN Event Trigger has been installed that would prevent connections as\n * {privileged_role_name}.\n */\nstatic void\nneon_fmgr_hook(FmgrHookEventType event, FmgrInfo *flinfo, Datum *private)\n{\n\t/*\n\t * It can be other needs_fmgr_hook which cause our hook to be invoked for\n\t * non-trigger function, so recheck that is is trigger function.\n\t */\n\tif (flinfo->fn_oid != InvalidOid &&\n\t\tget_func_rettype(flinfo->fn_oid) != EVENT_TRIGGEROID)\n\t{\n\t\tif (next_fmgr_hook)\n\t\t\t(*next_fmgr_hook) (event, flinfo, private);\n\n\t\treturn;\n\t}\n\n\t/*\n\t * The {privileged_role_name} role can use the GUC neon.event_triggers to disable\n\t * firing Event Trigger.\n\t *\n\t *   SET neon.event_triggers TO false;\n\t *\n\t * This only applies to the {privileged_role_name} role though, and only allows\n\t * skipping Event Triggers owned by {privileged_role_name}, which we check by\n\t * proxy of the Event Trigger function being owned by {privileged_role_name}.\n\t *\n\t * A role that is created in role {privileged_role_name} should be allowed to also\n\t * benefit from the neon_event_triggers GUC, and will be considered the\n\t * same as the {privileged_role_name} role.\n\t */\n\tif (event == FHET_START\n\t\t&& !neon_event_triggers\n\t\t&& is_privileged_role())\n\t{\n\t\tOid weak_superuser_oid = get_role_oid(privileged_role_name, false);\n\n\t\t/* Find the Function Attributes (owner Oid, security definer) */\n\t\tconst char *fun_owner_name = NULL;\n\t\tOid fun_owner = InvalidOid;\n\t\tbool fun_is_secdef = false;\n\n\t\tLookupFuncOwnerSecDef(flinfo->fn_oid, &fun_owner, &fun_is_secdef);\n\t\tfun_owner_name = GetUserNameFromId(fun_owner, false);\n\n\t\tif (IsPrivilegedRole(fun_owner_name)\n\t\t\t|| has_privs_of_role(fun_owner, weak_superuser_oid))\n\t\t{\n\t\t\telog(WARNING,\n\t\t\t\t \"Skipping Event Trigger: neon.event_triggers is false\");\n\n\t\t\t/*\n\t\t\t * we can't skip execution directly inside the fmgr_hook so instead we\n\t\t\t * change the event trigger function to a noop function.\n\t\t\t */\n\t\t\tforce_noop(flinfo);\n\t\t}\n\t}\n\n\t/*\n\t * Fire Event Trigger if both function owner and current user are\n\t * superuser. Allow executing Event Trigger function that belongs to a\n\t * superuser when connected as a non-superuser, even when the function is\n\t * SECURITY DEFINER.\n\t */\n    else if (event == FHET_START\n\t\t/* still enable it to pass pg_regress tests */\n\t\t&& !RegressTestMode)\n\t{\n\t\t/*\n\t\t * Get the current user oid as of before SECURITY DEFINER change of\n\t\t * CurrentUserId, and that would be SessionUserId.\n\t\t */\n\t\tOid current_role_oid = GetSessionUserId();\n\t\tbool role_is_super = superuser_arg(current_role_oid);\n\n\t\t/* Find the Function Attributes (owner Oid, security definer) */\n\t\tOid function_owner = InvalidOid;\n\t\tbool function_is_secdef = false;\n\t\tbool function_is_owned_by_super = false;\n\n\t\tLookupFuncOwnerSecDef(flinfo->fn_oid, &function_owner, &function_is_secdef);\n\n\t\tfunction_is_owned_by_super = superuser_arg(function_owner);\n\n\t\t/*\n\t\t * Refuse to run functions that belongs to a non-superuser when the\n\t\t * current user is a superuser.\n\t\t *\n\t\t * We could run a SECURITY DEFINER user-function here and be safe with\n\t\t * privilege escalation risks, but superuser roles are only used for\n\t\t * infrastructure maintenance operations, where we prefer to skip\n\t\t * running user-defined code.\n\t\t */\n\t\tif (role_is_super && !function_is_owned_by_super)\n\t\t{\n\t\t\tchar *func_name = get_func_name(flinfo->fn_oid);\n\n\t\t\tereport(WARNING,\n\t\t\t\t\t(errmsg(\"Skipping Event Trigger\"),\n\t\t\t\t\t errdetail(\"Event Trigger function \\\"%s\\\" \"\n\t\t\t\t\t\t\t   \"is owned by non-superuser role \\\"%s\\\", \"\n\t\t\t\t\t\t\t   \"and current_user \\\"%s\\\" is superuser\",\n\t\t\t\t\t\t\t   func_name,\n\t\t\t\t\t\t\t   GetUserNameFromId(function_owner, false),\n\t\t\t\t\t\t\t   GetUserNameFromId(current_role_oid, false))));\n\n\t\t\t/*\n\t\t\t * we can't skip execution directly inside the fmgr_hook so\n\t\t\t * instead we change the event trigger function to a noop\n\t\t\t * function.\n\t\t\t */\n\t\t\tforce_noop(flinfo);\n\t\t}\n\n\t}\n\n\tif (next_fmgr_hook)\n\t\t(*next_fmgr_hook) (event, flinfo, private);\n}\n\nstatic Oid prev_role_oid = 0;\nstatic int prev_role_sec_context = 0;\nstatic bool switched_to_superuser = false;\n\n/*\n * Switch tp superuser if not yet superuser.\n * Returns false if already switched to superuser.\n */\nstatic bool\nswitch_to_superuser(void)\n{\n    Oid superuser_oid;\n\n\tif (switched_to_superuser)\n\t\treturn false;\n\tswitched_to_superuser = true;\n\n\tsuperuser_oid = get_role_oid(\"cloud_admin\", true /*missing_ok*/);\n\tif (superuser_oid == InvalidOid)\n\t\tsuperuser_oid = BOOTSTRAP_SUPERUSERID;\n\n    GetUserIdAndSecContext(&prev_role_oid, &prev_role_sec_context);\n    SetUserIdAndSecContext(superuser_oid, prev_role_sec_context |\n                                              SECURITY_LOCAL_USERID_CHANGE |\n                                              SECURITY_RESTRICTED_OPERATION);\n\treturn true;\n}\n\nstatic void\nswitch_to_original_role(void)\n{\n    SetUserIdAndSecContext(prev_role_oid, prev_role_sec_context);\n    switched_to_superuser = false;\n}\n\n/*\n * ALTER ROLE ... SUPERUSER;\n *\n * Used internally to give superuser to a non-privileged role to allow\n * ownership of superuser-only objects such as Event Trigger.\n *\n *   ALTER ROLE foo SUPERUSER;\n *   ALTER EVENT TRIGGER ... OWNED BY foo;\n *   ALTER ROLE foo NOSUPERUSER;\n *\n * Now the EVENT TRIGGER is owned by foo, who can DROP it without having to be\n * superuser again.\n */\nstatic void\nalter_role_super(const char* rolename, bool make_super)\n{\n\tAlterRoleStmt *alter_stmt = makeNode(AlterRoleStmt);\n\n\tDefElem *defel_superuser =\n#if PG_MAJORVERSION_NUM <= 14\n\t\tmakeDefElem(\"superuser\", (Node *) makeInteger(make_super), -1);\n#else\n\t\tmakeDefElem(\"superuser\", (Node *) makeBoolean(make_super), -1);\n#endif\n\n\tRoleSpec *rolespec   = makeNode(RoleSpec);\n\trolespec->roletype   = ROLESPEC_CSTRING;\n\trolespec->rolename   = pstrdup(rolename);\n\trolespec->location   = -1;\n\n\talter_stmt->role = rolespec;\n\talter_stmt->options = list_make1(defel_superuser);\n\n#if PG_MAJORVERSION_NUM < 15\n\tAlterRole(alter_stmt);\n#else\n\t/* ParseState *pstate, AlterRoleStmt *stmt */\n\tAlterRole(NULL, alter_stmt);\n#endif\n\n\tCommandCounterIncrement();\n}\n\n\n/*\n * Changes the OWNER of an Event Trigger.\n *\n * Event Triggers can only be owned by superusers, so this ALTER ROLE with\n * SUPERUSER and then removes the property.\n */\nstatic void\nalter_event_trigger_owner(const char *obj_name, Oid role_oid)\n{\n\tchar* role_name = GetUserNameFromId(role_oid, false);\n\n\talter_role_super(role_name, true);\n\n\tAlterEventTriggerOwner(obj_name, role_oid);\n\tCommandCounterIncrement();\n\n\talter_role_super(role_name, false);\n}\n\n\n/*\n * Neon processing of the CREATE EVENT TRIGGER requires special attention and\n * is worth having its own ProcessUtility_hook for that.\n */\nstatic void\nProcessCreateEventTrigger(\n\t\t\t\t   PlannedStmt *pstmt,\n\t\t\t\t   const char *queryString,\n\t\t\t\t   bool readOnlyTree,\n\t\t\t\t   ProcessUtilityContext context,\n\t\t\t\t   ParamListInfo params,\n\t\t\t\t   QueryEnvironment *queryEnv,\n\t\t\t\t   DestReceiver *dest,\n\t\t\t\t   QueryCompletion *qc)\n{\n\tNode\t   *parseTree = pstmt->utilityStmt;\n\tbool\t\tsudo = false;\n\n\t/* We double-check that after local variable declaration block */\n\tCreateEventTrigStmt *stmt = (CreateEventTrigStmt *) parseTree;\n\n\t/*\n\t * We are going to change the current user privileges (sudo) and might\n\t * need after execution cleanup. For that we want to capture the UserId\n\t * before changing it for our sudo implementation.\n\t */\n\tconst Oid current_user_id = GetUserId();\n\tbool current_user_is_super = superuser_arg(current_user_id);\n\n\tif (nodeTag(parseTree) != T_CreateEventTrigStmt)\n\t{\n\t\tereport(ERROR,\n\t\t\t\terrcode(ERRCODE_INTERNAL_ERROR),\n\t\t\t\terrmsg(\"ProcessCreateEventTrigger called for the wrong command\"));\n\t}\n\n\t/*\n\t * Allow {privileged_role_name} to create Event Trigger, while keeping the\n\t * ownership of the object.\n\t *\n\t * For that we give superuser membership to the role for the execution of\n\t * the command.\n\t */\n\tif (IsTransactionState() && is_privileged_role())\n\t{\n\t\t/* Find the Event Trigger function Oid */\n\t\tOid func_oid = LookupFuncName(stmt->funcname, 0, NULL, false);\n\n\t\t/* Find the Function Owner Oid */\n\t\tOid func_owner = InvalidOid;\n\t\tbool is_secdef = false;\n\t\tbool function_is_owned_by_super = false;\n\n\t\tLookupFuncOwnerSecDef(func_oid, &func_owner, &is_secdef);\n\n\t\tfunction_is_owned_by_super = superuser_arg(func_owner);\n\n\t\tif(!current_user_is_super && function_is_owned_by_super)\n\t\t{\n\t\t\tereport(ERROR,\n\t\t\t\t\t(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),\n\t\t\t\t\t errmsg(\"Permission denied to execute \"\n\t\t\t\t\t\t\t\"a function owned by a superuser role\"),\n\t\t\t\t\t errdetail(\"current user \\\"%s\\\" is not a superuser \"\n\t\t\t\t\t\t\t   \"and Event Trigger function \\\"%s\\\" \"\n\t\t\t\t\t\t\t   \"is owned by a superuser\",\n\t\t\t\t\t\t\t   GetUserNameFromId(current_user_id, false),\n\t\t\t\t\t\t\t   NameListToString(stmt->funcname))));\n\t\t}\n\n\t\tif(current_user_is_super && !function_is_owned_by_super)\n\t\t{\n\t\t\tereport(ERROR,\n\t\t\t\t\t(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),\n\t\t\t\t\t errmsg(\"Permission denied to execute \"\n\t\t\t\t\t\t\t\"a function owned by a non-superuser role\"),\n\t\t\t\t\t errdetail(\"current user \\\"%s\\\" is a superuser \"\n\t\t\t\t\t\t\t   \"and function \\\"%s\\\" is \"\n\t\t\t\t\t\t\t   \"owned by a non-superuser\",\n\t\t\t\t\t\t\t   GetUserNameFromId(current_user_id, false),\n\t\t\t\t\t\t\t   NameListToString(stmt->funcname))));\n\t\t}\n\n\t\tsudo = switch_to_superuser();\n\t}\n\n\tPG_TRY();\n\t{\n\t\tif (PreviousProcessUtilityHook)\n\t\t{\n\t\t\tPreviousProcessUtilityHook(\n\t\t\t\tpstmt,\n\t\t\t\tqueryString,\n\t\t\t\treadOnlyTree,\n\t\t\t\tcontext,\n\t\t\t\tparams,\n\t\t\t\tqueryEnv,\n\t\t\t\tdest,\n\t\t\t\tqc);\n\t\t}\n\t\telse\n\t\t{\n\t\t\tstandard_ProcessUtility(\n\t\t\t\tpstmt,\n\t\t\t\tqueryString,\n\t\t\t\treadOnlyTree,\n\t\t\t\tcontext,\n\t\t\t\tparams,\n\t\t\t\tqueryEnv,\n\t\t\t\tdest,\n\t\t\t\tqc);\n\t\t}\n\n\t\t/*\n\t\t * Now that the Event Trigger has been installed via our sudo\n\t\t * mechanism, if the original role was not a superuser then change\n\t\t * the event trigger ownership back to the original role.\n\t\t *\n\t\t * That way [ ALTER | DROP ] EVENT TRIGGER commands just work.\n\t\t */\n\t\tif (IsTransactionState() && is_privileged_role())\n\t\t{\n\t\t\tif (!current_user_is_super)\n\t\t\t{\n\t\t\t\t/*\n\t\t\t\t * Change event trigger owner to the current role (making\n\t\t\t\t * it a privileged role during the ALTER OWNER command).\n\t\t\t\t */\n\t\t\t\talter_event_trigger_owner(stmt->trigname, current_user_id);\n\t\t\t}\n\t\t}\n\t}\n\tPG_FINALLY();\n\t{\n\t\tif (sudo)\n\t\t\tswitch_to_original_role();\n\t}\n\tPG_END_TRY();\n}\n\n\n/*\n * Neon hooks for DDLs (handling privileges, limiting features, etc).\n */\nstatic void\nNeonProcessUtility(\n\t\t\t\t   PlannedStmt *pstmt,\n\t\t\t\t   const char *queryString,\n\t\t\t\t   bool readOnlyTree,\n\t\t\t\t   ProcessUtilityContext context,\n\t\t\t\t   ParamListInfo params,\n\t\t\t\t   QueryEnvironment *queryEnv,\n\t\t\t\t   DestReceiver *dest,\n\t\t\t\t   QueryCompletion *qc)\n{\n\tNode\t   *parseTree = pstmt->utilityStmt;\n\n\t/*\n\t * The process utility hook for CREATE EVENT TRIGGER is its own\n\t * implementation and warrant being addressed separately from here.\n\t */\n\tif (nodeTag(parseTree) == T_CreateEventTrigStmt)\n\t{\n\t\tProcessCreateEventTrigger(\n\t\t\t\tpstmt,\n\t\t\t\tqueryString,\n\t\t\t\treadOnlyTree,\n\t\t\t\tcontext,\n\t\t\t\tparams,\n\t\t\t\tqueryEnv,\n\t\t\t\tdest,\n\t\t\t\tqc);\n\t\treturn;\n\t}\n\n\t/*\n\t * Other commands that need Neon specific implementations are handled here:\n\t */\n\tswitch (nodeTag(parseTree))\n\t{\n\t\tcase T_CreatedbStmt:\n\t\t\tHandleCreateDb(castNode(CreatedbStmt, parseTree));\n\t\t\tbreak;\n\t\tcase T_AlterOwnerStmt:\n\t\t\tHandleAlterOwner(castNode(AlterOwnerStmt, parseTree));\n\t\t\tbreak;\n\t\tcase T_RenameStmt:\n\t\t\tHandleRename(castNode(RenameStmt, parseTree));\n\t\t\tbreak;\n\t\tcase T_DropdbStmt:\n\t\t\tHandleDropDb(castNode(DropdbStmt, parseTree));\n\t\t\tbreak;\n\t\tcase T_CreateRoleStmt:\n\t\t\tHandleCreateRole(castNode(CreateRoleStmt, parseTree));\n\t\t\tbreak;\n\t\tcase T_AlterRoleStmt:\n\t\t\tHandleAlterRole(castNode(AlterRoleStmt, parseTree));\n\t\t\tbreak;\n\t\tcase T_DropRoleStmt:\n\t\t\tHandleDropRole(castNode(DropRoleStmt, parseTree));\n\t\t\tbreak;\n\t\tcase T_CreateTableSpaceStmt:\n\t\t\tif (!RegressTestMode)\n\t\t\t{\n\t\t\t\tereport(ERROR,\n\t\t\t\t\t(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),\n\t\t\t\t\terrmsg(\"CREATE TABLESPACE is not supported on Neon\")));\n\t\t\t}\n   \t\t\tbreak;\n\t\tdefault:\n\t\t\tbreak;\n\t}\n\n\tif (PreviousProcessUtilityHook)\n\t{\n\t\tPreviousProcessUtilityHook(\n\t\t\tpstmt,\n\t\t\tqueryString,\n\t\t\treadOnlyTree,\n\t\t\tcontext,\n\t\t\tparams,\n\t\t\tqueryEnv,\n\t\t\tdest,\n\t\t\tqc);\n\t}\n\telse\n\t{\n\t\tstandard_ProcessUtility(\n\t\t\tpstmt,\n\t\t\tqueryString,\n\t\t\treadOnlyTree,\n\t\t\tcontext,\n\t\t\tparams,\n\t\t\tqueryEnv,\n\t\t\tdest,\n\t\t\tqc);\n\t}\n}\n\n/*\n * Only {privileged_role_name} is granted privilege to edit neon.event_triggers GUC.\n */\nstatic void\nneon_event_triggers_assign_hook(bool newval, void *extra)\n{\n\tif (IsTransactionState() && !is_privileged_role())\n\t{\n\t\tereport(ERROR,\n\t\t\t\t(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),\n\t\t\t\t errmsg(\"permission denied to set neon.event_triggers\"),\n\t\t\t\t errdetail(\"Only \\\"%s\\\" is allowed to set the GUC\", privileged_role_name)));\n\t}\n}\n\n\nvoid\nInitDDLHandler()\n{\n\tPreviousProcessUtilityHook = ProcessUtility_hook;\n\tProcessUtility_hook = NeonProcessUtility;\n\n    next_needs_fmgr_hook = needs_fmgr_hook;\n\tneeds_fmgr_hook = neon_needs_fmgr_hook;\n\n\tnext_fmgr_hook = fmgr_hook;\n\tfmgr_hook = neon_fmgr_hook;\n\n\tRegisterXactCallback(NeonXactCallback, NULL);\n\tRegisterSubXactCallback(NeonSubXactCallback, NULL);\n\n\t/*\n\t * The GUC neon.event_triggers should provide the same effect as the\n\t * Postgres GUC event_triggers, but the neon one is PGC_USERSET.\n\t *\n\t * This allows using the GUC in the connection string and work out of a\n\t * LOGIN Event Trigger that would break database access, all without\n\t * having to edit and reload the Postgres configuration file.\n\t */\n\tDefineCustomBoolVariable(\n\t\t\t\t\t\t\t \"neon.event_triggers\",\n\t\t\t\t\t\t\t \"Enable firing of event triggers\",\n\t\t\t\t\t\t\t NULL,\n\t\t\t\t\t\t\t &neon_event_triggers,\n\t\t\t\t\t\t\t true,\n\t\t\t\t\t\t\t PGC_USERSET,\n\t\t\t\t\t\t\t 0,\n\t\t\t\t\t\t\t NULL,\n\t\t\t\t\t\t\t neon_event_triggers_assign_hook,\n\t\t\t\t\t\t\t NULL);\n\n\tDefineCustomStringVariable(\n\t\t\t\t\t\t\t   \"neon.console_url\",\n\t\t\t\t\t\t\t   \"URL of the Neon Console, which will be forwarded changes to dbs and roles\",\n\t\t\t\t\t\t\t   NULL,\n\t\t\t\t\t\t\t   &ConsoleURL,\n\t\t\t\t\t\t\t   NULL,\n\t\t\t\t\t\t\t   PGC_POSTMASTER,\n\t\t\t\t\t\t\t   0,\n\t\t\t\t\t\t\t   NULL,\n\t\t\t\t\t\t\t   NULL,\n\t\t\t\t\t\t\t   NULL);\n\n\tDefineCustomBoolVariable(\n\t\t\t\t\t\t\t \"neon.forward_ddl\",\n\t\t\t\t\t\t\t \"Controls whether to forward DDL to the control plane\",\n\t\t\t\t\t\t\t NULL,\n\t\t\t\t\t\t\t &ForwardDDL,\n\t\t\t\t\t\t\t true,\n\t\t\t\t\t\t\t PGC_SUSET,\n\t\t\t\t\t\t\t 0,\n\t\t\t\t\t\t\t NULL,\n\t\t\t\t\t\t\t NULL,\n\t\t\t\t\t\t\t NULL);\n\n\tDefineCustomBoolVariable(\n\t\t\t\t\t\t\t \"neon.regress_test_mode\",\n\t\t\t\t\t\t\t \"Controls whether we are running in the regression test mode\",\n\t\t\t\t\t\t\t NULL,\n\t\t\t\t\t\t\t &RegressTestMode,\n\t\t\t\t\t\t\t false,\n\t\t\t\t\t\t\t PGC_SUSET,\n\t\t\t\t\t\t\t 0,\n\t\t\t\t\t\t\t NULL,\n\t\t\t\t\t\t\t NULL,\n\t\t\t\t\t\t\t NULL);\n\n\tjwt_token = getenv(\"NEON_CONTROL_PLANE_TOKEN\");\n\tif (!jwt_token)\n\t{\n\t\telog(LOG, \"Missing NEON_CONTROL_PLANE_TOKEN environment variable, forwarding will not be authenticated\");\n\t}\n\n}\n"
  },
  {
    "path": "pgxn/neon/neon_ddl_handler.h",
    "content": "#ifndef CONTROL_DDL_HANDLER_H\n#define CONTROL_DDL_HANDLER_H\n\nvoid\t\tInitDDLHandler(void);\n\n#endif\n"
  },
  {
    "path": "pgxn/neon/neon_lwlsncache.c",
    "content": "#include \"postgres.h\"\n\n#include \"neon.h\"\n#include \"neon_lwlsncache.h\"\n\n#include \"miscadmin.h\"\n#include \"access/xlog.h\"\n#include \"access/xlog_internal.h\"\n#include \"storage/ipc.h\"\n#include \"storage/shmem.h\"\n#include \"storage/buf_internals.h\"\n#include \"utils/guc.h\"\n#include \"utils/hsearch.h\"\n\n\n\ntypedef struct LastWrittenLsnCacheEntry\n{\n\tBufferTag\tkey;\n\tXLogRecPtr\tlsn;\n\t/* double linked list for LRU replacement algorithm */\n\tdlist_node\tlru_node;\n} LastWrittenLsnCacheEntry;\n\ntypedef struct LwLsnCacheCtl {\n\tint lastWrittenLsnCacheSize;\n\t/*\n\t* Maximal last written LSN for pages not present in lastWrittenLsnCache\n\t*/\n\tXLogRecPtr  maxLastWrittenLsn;\n\n\t/*\n\t* Double linked list to implement LRU replacement policy for last written LSN cache.\n\t* Access to this list as well as to last written LSN cache is protected by 'LastWrittenLsnLock'.\n\t*/\n\tdlist_head lastWrittenLsnLRU;\n} LwLsnCacheCtl;\n\n\n/*\n * Cache of last written LSN for each relation page.\n * Also to provide request LSN for smgrnblocks, smgrexists there is pseudokey=InvalidBlockId which stores LSN of last\n * relation metadata update.\n * Size of the cache is limited by GUC variable lastWrittenLsnCacheSize (\"lsn_cache_size\"),\n * pages are replaced using LRU algorithm, based on L2-list.\n * Access to this cache is protected by 'LastWrittenLsnLock'.\n */\nstatic HTAB *lastWrittenLsnCache;\n\nLwLsnCacheCtl* LwLsnCache;\n\nstatic int lwlsn_cache_size = (128 * 1024); \n\n\nstatic void\nlwlc_register_gucs(void)\n{\n\tDefineCustomIntVariable(\"neon.last_written_lsn_cache_size\",\n\t\t\t\t\t\t\t\"Size of last written LSN cache used by Neon\",\n\t\t\t\t\t\t\tNULL,\n\t\t\t\t\t\t\t&lwlsn_cache_size,\n\t\t\t\t\t\t\t(128*1024), 1024, INT_MAX,\n\t\t\t\t\t\t\tPGC_POSTMASTER,\n\t\t\t\t\t\t\t0, /* plain units */\n\t\t\t\t\t\t\tNULL, NULL, NULL);\n}\n\nstatic XLogRecPtr SetLastWrittenLSNForBlockRangeInternal(XLogRecPtr lsn,\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t NRelFileInfo rlocator,\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t ForkNumber forknum,\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t BlockNumber from,\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t BlockNumber n_blocks);\n\n/* All the necessary hooks are defined here */\n\n\n/* These hold the set_lwlsn_* hooks which were installed before ours, if any */\nstatic set_lwlsn_block_range_hook_type prev_set_lwlsn_block_range_hook = NULL;\nstatic set_lwlsn_block_v_hook_type prev_set_lwlsn_block_v_hook = NULL;\nstatic set_lwlsn_block_hook_type prev_set_lwlsn_block_hook = NULL;\nstatic set_max_lwlsn_hook_type prev_set_max_lwlsn_hook = NULL;\nstatic set_lwlsn_relation_hook_type prev_set_lwlsn_relation_hook = NULL;\nstatic set_lwlsn_db_hook_type prev_set_lwlsn_db_hook = NULL;\n\nstatic void neon_set_max_lwlsn(XLogRecPtr lsn);\n\nvoid\ninit_lwlsncache(void)\n{\n\tif (!process_shared_preload_libraries_in_progress)\n\t\tereport(ERROR, errcode(ERRCODE_INTERNAL_ERROR), errmsg(\"Loading of shared preload libraries is not in progress. Exiting\"));\n\t\n\tlwlc_register_gucs();\n\n\tprev_set_lwlsn_block_range_hook = set_lwlsn_block_range_hook;\n\tset_lwlsn_block_range_hook = neon_set_lwlsn_block_range;\n\tprev_set_lwlsn_block_v_hook = set_lwlsn_block_v_hook;\n\tset_lwlsn_block_v_hook = neon_set_lwlsn_block_v;\n\tprev_set_lwlsn_block_hook = set_lwlsn_block_hook;\n\tset_lwlsn_block_hook = neon_set_lwlsn_block;\n\tprev_set_max_lwlsn_hook = set_max_lwlsn_hook;\n\tset_max_lwlsn_hook = neon_set_max_lwlsn;\n\tprev_set_lwlsn_relation_hook = set_lwlsn_relation_hook;\n\tset_lwlsn_relation_hook = neon_set_lwlsn_relation;\n\tprev_set_lwlsn_db_hook = set_lwlsn_db_hook;\n\tset_lwlsn_db_hook = neon_set_lwlsn_db;\n}\n\n\nvoid\nLwLsnCacheShmemRequest(void)\n{\n\tSize requested_size = sizeof(LwLsnCacheCtl);\n\n\trequested_size += hash_estimate_size(lwlsn_cache_size, sizeof(LastWrittenLsnCacheEntry));\n\n\tRequestAddinShmemSpace(requested_size);\n}\n\nvoid\nLwLsnCacheShmemInit(void)\n{\n\tstatic HASHCTL info;\n\tbool found;\n\tif (lwlsn_cache_size > 0)\n\t{\n\t\tinfo.keysize = sizeof(BufferTag);\n\t\tinfo.entrysize = sizeof(LastWrittenLsnCacheEntry);\n\t\tlastWrittenLsnCache = ShmemInitHash(\"last_written_lsn_cache\",\n\t\t\tlwlsn_cache_size, lwlsn_cache_size,\n\t\t\t\t\t\t\t\t\t\t&info,\n\t\t\t\t\t\t\t\t\t\tHASH_ELEM | HASH_BLOBS);\n\t\tLwLsnCache = ShmemInitStruct(\"neon/LwLsnCacheCtl\", sizeof(LwLsnCacheCtl), &found);\n\t\t// Now set the size in the struct\n\t\tLwLsnCache->lastWrittenLsnCacheSize = lwlsn_cache_size;\n\t\tif (found) {\n\t\t\treturn;\n\t\t}\n\t}\n\tdlist_init(&LwLsnCache->lastWrittenLsnLRU);\n    LwLsnCache->maxLastWrittenLsn = GetRedoRecPtr();\n}\n\n/*\n * neon_get_lwlsn -- Returns maximal LSN of written page.\n * It returns an upper bound for the last written LSN of a given page,\n * either from a cached last written LSN or a global maximum last written LSN.\n * If rnode is InvalidOid then we calculate maximum among all cached LSN and maxLastWrittenLsn.\n * If cache is large enough, iterating through all hash items may be rather expensive.\n * But neon_get_lwlsn(InvalidOid) is used only by neon_dbsize which is not performance critical.\n */\nXLogRecPtr\nneon_get_lwlsn(NRelFileInfo rlocator, ForkNumber forknum, BlockNumber blkno)\n{\n\tXLogRecPtr lsn;\n\tLastWrittenLsnCacheEntry* entry;\n\n\tAssert(LwLsnCache->lastWrittenLsnCacheSize != 0);\n\n\tLWLockAcquire(LastWrittenLsnLock, LW_SHARED);\n\n\t/* Maximal last written LSN among all non-cached pages */\n\tlsn = LwLsnCache->maxLastWrittenLsn;\n\n\tif (NInfoGetRelNumber(rlocator) != InvalidOid)\n\t{\n\t\tBufferTag key;\n\t\tOid spcOid = NInfoGetSpcOid(rlocator);\n\t\tOid dbOid = NInfoGetDbOid(rlocator);\n\t\tOid relNumber = NInfoGetRelNumber(rlocator);\n\t\tBufTagInit(key,  relNumber, forknum, blkno, spcOid, dbOid);\n\t\t\n\t\tentry = hash_search(lastWrittenLsnCache, &key, HASH_FIND, NULL);\n\t\tif (entry != NULL)\n\t\t\tlsn = entry->lsn;\n\t\telse\n\t\t{\n\t\t\tLWLockRelease(LastWrittenLsnLock);\n\t\t\tLWLockAcquire(LastWrittenLsnLock, LW_EXCLUSIVE);\n\t\t\t/*\n\t\t\t * In case of statements CREATE TABLE AS SELECT... or INSERT FROM SELECT... we are fetching data from source table\n\t\t\t * and storing it in destination table. It cause problems with prefetch last-written-lsn is known for the pages of\n\t\t\t * source table (which for example happens after compute restart). In this case we get get global value of\n\t\t\t * last-written-lsn which is changed frequently as far as we are writing pages of destination table.\n\t\t\t * As a result request-lsn for the prefetch and request-let when this page is actually needed are different\n\t\t\t * and we got exported prefetch request. So it actually disarms prefetch.\n\t\t\t * To prevent that, we re-insert the page with the latest LSN, so that it's\n\t\t\t * less likely the LSN for this page will get evicted from the LwLsnCache\n\t\t\t * before the page is read.\n\t\t\t */\n\t\t\t lsn = SetLastWrittenLSNForBlockRangeInternal(lsn, rlocator, forknum, blkno, 1);\n\t\t}\n\t}\n\telse\n\t{\n\t\tHASH_SEQ_STATUS seq;\n\t\t/* Find maximum of all cached LSNs */\n\t\thash_seq_init(&seq, lastWrittenLsnCache);\n\t\twhile ((entry = (LastWrittenLsnCacheEntry *) hash_seq_search(&seq)) != NULL)\n\t\t{\n\t\t\tif (entry->lsn > lsn)\n\t\t\t\tlsn = entry->lsn;\n\t\t}\n\t}\n\tLWLockRelease(LastWrittenLsnLock);\n\n\treturn lsn;\n}\n\nstatic void neon_set_max_lwlsn(XLogRecPtr lsn) {\n\tLWLockAcquire(LastWrittenLsnLock, LW_EXCLUSIVE);\n\tLwLsnCache->maxLastWrittenLsn = lsn;\n\tLWLockRelease(LastWrittenLsnLock);\n}\n\n/*\n * GetLastWrittenLSN -- Returns maximal LSN of written page.\n * It returns an upper bound for the last written LSN of a given page,\n * either from a cached last written LSN or a global maximum last written LSN.\n * If rnode is InvalidOid then we calculate maximum among all cached LSN and maxLastWrittenLsn.\n * If cache is large enough, iterating through all hash items may be rather expensive.\n * But GetLastWrittenLSN(InvalidOid) is used only by neon_dbsize which is not performance critical.\n */\nvoid\nneon_get_lwlsn_v(NRelFileInfo relfilenode, ForkNumber forknum,\n\t\t\t\t   BlockNumber blkno, int nblocks, XLogRecPtr *lsns)\n{\n\tLastWrittenLsnCacheEntry* entry;\n\tXLogRecPtr lsn;\n\n\tAssert(LwLsnCache->lastWrittenLsnCacheSize != 0);\n\tAssert(nblocks > 0);\n\tAssert(PointerIsValid(lsns));\n\n\tLWLockAcquire(LastWrittenLsnLock, LW_SHARED);\n\n\tif (NInfoGetRelNumber(relfilenode) != InvalidOid)\n\t{\n\t\tBufferTag key;\n\t\tbool missed_keys = false;\n\t\tOid spcOid = NInfoGetSpcOid(relfilenode);\n\t\tOid dbOid = NInfoGetDbOid(relfilenode);\n\t\tOid relNumber = NInfoGetRelNumber(relfilenode);\n\t\tBufTagInit(key,  relNumber, forknum, blkno, spcOid, dbOid);\n\n\t\tfor (int i = 0; i < nblocks; i++)\n\t\t{\n\t\t\t/* Maximal last written LSN among all non-cached pages */\n\t\t\tkey.blockNum = blkno + i;\n\n\t\t\tentry = hash_search(lastWrittenLsnCache, &key, HASH_FIND, NULL);\n\t\t\tif (entry != NULL)\n\t\t\t{\n \t\t\t\tlsns[i] = entry->lsn;\n\t\t\t}\n\t\t\telse\n\t\t\t{\n\t\t\t\t/* Mark this block's LSN as missing - we'll update the LwLSN for missing blocks in bulk later */\n\t\t\t\tlsns[i] = InvalidXLogRecPtr;\n\t\t\t\tmissed_keys = true;\n\t\t\t}\n\t\t}\n\n\t\t/*\n\t\t * If we had any missing LwLSN entries, we add the missing ones now.\n\t\t * By doing the insertions in one batch, we decrease lock contention.\n\t\t */\n\t\tif (missed_keys)\n\t\t{\n\t\t\tLWLockRelease(LastWrittenLsnLock);\n\t\t\tLWLockAcquire(LastWrittenLsnLock, LW_EXCLUSIVE);\n\n\t\t\tlsn = LwLsnCache->maxLastWrittenLsn;\n\n\t\t\tfor (int i = 0; i < nblocks; i++)\n\t\t\t{\n\t\t\t\tif (lsns[i] == InvalidXLogRecPtr)\n\t\t\t\t{\n\t\t\t\t\tlsns[i] = lsn;\n\t\t\t\t\tSetLastWrittenLSNForBlockRangeInternal(lsn, relfilenode, forknum, blkno + i, 1);\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\t}\n\telse\n\t{\n\t\tHASH_SEQ_STATUS seq;\n\t\tlsn = LwLsnCache->maxLastWrittenLsn;\n\t\t/* Find maximum of all cached LSNs */\n\t\thash_seq_init(&seq, lastWrittenLsnCache);\n\t\twhile ((entry = (LastWrittenLsnCacheEntry *) hash_seq_search(&seq)) != NULL)\n\t\t{\n\t\t\tif (entry->lsn > lsn)\n\t\t\t\tlsn = entry->lsn;\n\t\t}\n\n\t\tfor (int i = 0; i < nblocks; i++)\n\t\t\tlsns[i] = lsn;\n\t}\n\tLWLockRelease(LastWrittenLsnLock);\n}\n\n/*\n * Guts for SetLastWrittenLSNForBlockRange.\n * Caller must ensure LastWrittenLsnLock is held in exclusive mode.\n */\nstatic XLogRecPtr\nSetLastWrittenLSNForBlockRangeInternal(XLogRecPtr lsn,\n\t\t\t\t\t\t\t\t\t   NRelFileInfo rlocator,\n\t\t\t\t\t\t\t\t\t   ForkNumber forknum,\n\t\t\t\t\t\t\t\t\t   BlockNumber from,\n\t\t\t\t\t\t\t\t\t   BlockNumber n_blocks)\n{\n\tif (NInfoGetRelNumber(rlocator) == InvalidOid)\n\t{\n\t\tif (lsn > LwLsnCache->maxLastWrittenLsn)\n\t\tLwLsnCache->maxLastWrittenLsn = lsn;\n\t\telse\n\t\t\tlsn = LwLsnCache->maxLastWrittenLsn;\n\t}\n\telse\n\t{\n\t\tLastWrittenLsnCacheEntry* entry;\n\t\tBufferTag key;\n\t\tbool found;\n\t\tBlockNumber i;\n\n\t\tOid spcOid = NInfoGetSpcOid(rlocator);\n\t\tOid dbOid = NInfoGetDbOid(rlocator);\n\t\tOid relNumber = NInfoGetRelNumber(rlocator);\n\t\tBufTagInit(key,  relNumber, forknum, from, spcOid, dbOid);\n\t\tfor (i = 0; i < n_blocks; i++)\n\t\t{\n\t\t\tkey.blockNum = from + i;\n\t\t\tentry = hash_search(lastWrittenLsnCache, &key, HASH_ENTER, &found);\n\t\t\tif (found)\n\t\t\t{\n\t\t\t\tif (lsn > entry->lsn)\n\t\t\t\t\tentry->lsn = lsn;\n\t\t\t\telse\n\t\t\t\t\tlsn = entry->lsn;\n\t\t\t\t/* Unlink from LRU list */\n\t\t\t\tdlist_delete(&entry->lru_node);\n\t\t\t}\n\t\t\telse\n\t\t\t{\n\t\t\t\tentry->lsn = lsn;\n\t\t\t\tif (hash_get_num_entries(lastWrittenLsnCache) > LwLsnCache->lastWrittenLsnCacheSize)\n\t\t\t\t{\n\t\t\t\t\t/* Replace least recently used entry */\n\t\t\t\t\tLastWrittenLsnCacheEntry* victim = dlist_container(LastWrittenLsnCacheEntry, lru_node, dlist_pop_head_node(&LwLsnCache->lastWrittenLsnLRU));\n\t\t\t\t\t/* Adjust max LSN for not cached relations/chunks if needed */\n\t\t\t\t\tif (victim->lsn > LwLsnCache->maxLastWrittenLsn)\n\t\t\t\t\tLwLsnCache->maxLastWrittenLsn = victim->lsn;\n\n\t\t\t\t\thash_search(lastWrittenLsnCache, victim, HASH_REMOVE, NULL);\n\t\t\t\t}\n\t\t\t}\n\t\t\t/* Link to the end of LRU list */\n\t\t\tdlist_push_tail(&LwLsnCache->lastWrittenLsnLRU, &entry->lru_node);\n\t\t}\n\t}\n\treturn lsn;\n}\n\n/*\n * SetLastWrittenLSNForBlockRange -- Set maximal LSN of written page range.\n * We maintain cache of last written LSNs with limited size and LRU replacement\n * policy. Keeping last written LSN for each page allows to use old LSN when\n * requesting pages of unchanged or appended relations. Also it is critical for\n * efficient work of prefetch in case massive update operations (like vacuum or remove).\n *\n * rlocator.relNumber can be InvalidOid, in this case maxLastWrittenLsn is updated.\n * SetLastWrittenLsn with dummy rlocator is used by createdb and dbase_redo functions.\n */\nXLogRecPtr\nneon_set_lwlsn_block_range(XLogRecPtr lsn, NRelFileInfo rlocator, ForkNumber forknum, BlockNumber from, BlockNumber n_blocks)\n{\n\tif (lsn == InvalidXLogRecPtr || n_blocks == 0 || LwLsnCache->lastWrittenLsnCacheSize == 0)\n\t\treturn lsn;\n\n\tAssert(lsn >= WalSegMinSize);\n\tLWLockAcquire(LastWrittenLsnLock, LW_EXCLUSIVE);\n\tlsn = SetLastWrittenLSNForBlockRangeInternal(lsn, rlocator, forknum, from, n_blocks);\n\tLWLockRelease(LastWrittenLsnLock);\n\n\treturn lsn;\n}\n\n/*\n * neon_set_lwlsn_block_v -- Set maximal LSN of pages to their respective\n * LSNs.\n *\n * We maintain cache of last written LSNs with limited size and LRU replacement\n * policy. Keeping last written LSN for each page allows to use old LSN when\n * requesting pages of unchanged or appended relations. Also it is critical for\n * efficient work of prefetch in case massive update operations (like vacuum or remove).\n *\n * Note: This is different from SetLastWrittenLSNForBlockRange[Internal], in that this\n * specifies per-block LSNs, rather than only a single LSN.\n */\nXLogRecPtr\nneon_set_lwlsn_block_v(const XLogRecPtr *lsns, NRelFileInfo relfilenode,\n\t\t\t\t\t\t   ForkNumber forknum, BlockNumber blockno,\n\t\t\t\t\t\t   int nblocks)\n{\n\tLastWrittenLsnCacheEntry* entry;\n\tBufferTag\tkey;\n\tbool\t\tfound;\n\tXLogRecPtr\tmax = InvalidXLogRecPtr;\n\tOid spcOid = NInfoGetSpcOid(relfilenode);\n\tOid dbOid = NInfoGetDbOid(relfilenode);\n\tOid relNumber = NInfoGetRelNumber(relfilenode);\n\n\tif (lsns == NULL || nblocks == 0 || LwLsnCache->lastWrittenLsnCacheSize == 0 ||\n\t\tNInfoGetRelNumber(relfilenode) == InvalidOid)\n\t\treturn InvalidXLogRecPtr;\n\n\tBufTagInit(key,  relNumber, forknum, blockno, spcOid, dbOid);\n\n\tLWLockAcquire(LastWrittenLsnLock, LW_EXCLUSIVE);\n\n\tfor (int i = 0; i < nblocks; i++)\n\t{\n\t\tXLogRecPtr\tlsn = lsns[i];\n\n\t\tif (lsn == InvalidXLogRecPtr)\n\t\t\tcontinue;\n\n\t\tAssert(lsn >= WalSegMinSize);\n\t\tkey.blockNum = blockno + i;\n\t\tentry = hash_search(lastWrittenLsnCache, &key, HASH_ENTER, &found);\n\t\tif (found)\n\t\t{\n\t\t\tif (lsn > entry->lsn)\n\t\t\t\tentry->lsn = lsn;\n\t\t\telse\n\t\t\t\tlsn = entry->lsn;\n\t\t\t/* Unlink from LRU list */\n\t\t\tdlist_delete(&entry->lru_node);\n\t\t}\n\t\telse\n\t\t{\n\t\t\tentry->lsn = lsn;\n\t\t\tif (hash_get_num_entries(lastWrittenLsnCache) > LwLsnCache->lastWrittenLsnCacheSize)\n\t\t\t{\n\t\t\t\t/* Replace least recently used entry */\n\t\t\t\tLastWrittenLsnCacheEntry* victim = dlist_container(LastWrittenLsnCacheEntry, lru_node, dlist_pop_head_node(&LwLsnCache->lastWrittenLsnLRU));\n\t\t\t\t/* Adjust max LSN for not cached relations/chunks if needed */\n\t\t\t\tif (victim->lsn > LwLsnCache->maxLastWrittenLsn)\n\t\t\t\t\tLwLsnCache->maxLastWrittenLsn = victim->lsn;\n\n\t\t\t\thash_search(lastWrittenLsnCache, victim, HASH_REMOVE, NULL);\n\t\t\t}\n\t\t}\n\t\t/* Link to the end of LRU list */\n\t\tdlist_push_tail(&LwLsnCache->lastWrittenLsnLRU, &entry->lru_node);\n\t\tmax = Max(max, lsn);\n\t}\n\n\tLWLockRelease(LastWrittenLsnLock);\n\n\treturn max;\n}\n\n/*\n * SetLastWrittenLSNForBlock -- Set maximal LSN for block\n */\nXLogRecPtr\nneon_set_lwlsn_block(XLogRecPtr lsn, NRelFileInfo rlocator, ForkNumber forknum, BlockNumber blkno)\n{\n\treturn neon_set_lwlsn_block_range(lsn, rlocator, forknum, blkno, 1);\n}\n\n/*\n * neon_set_lwlsn_relation -- Set maximal LSN for relation metadata\n */\nXLogRecPtr\nneon_set_lwlsn_relation(XLogRecPtr lsn, NRelFileInfo rlocator, ForkNumber forknum)\n{\n\treturn neon_set_lwlsn_block(lsn, rlocator, forknum, REL_METADATA_PSEUDO_BLOCKNO);\n}\n\n/*\n * neon_set_lwlsn_db -- Set maximal LSN for the whole database\n */\nXLogRecPtr\nneon_set_lwlsn_db(XLogRecPtr lsn)\n{\n\tNRelFileInfo dummyNode = {InvalidOid, InvalidOid, InvalidOid};\n\treturn neon_set_lwlsn_block(lsn, dummyNode, MAIN_FORKNUM, 0);\n}\n\n"
  },
  {
    "path": "pgxn/neon/neon_lwlsncache.h",
    "content": "#ifndef NEON_LWLSNCACHE_H\n#define NEON_LWLSNCACHE_H\n\n#include \"neon_pgversioncompat.h\"\n\nvoid init_lwlsncache(void);\n\n/* Hooks */\nXLogRecPtr neon_get_lwlsn(NRelFileInfo rlocator, ForkNumber forknum, BlockNumber blkno);\nvoid neon_get_lwlsn_v(NRelFileInfo relfilenode, ForkNumber forknum, BlockNumber blkno, int nblocks, XLogRecPtr *lsns);\nXLogRecPtr neon_set_lwlsn_block_range(XLogRecPtr lsn, NRelFileInfo rlocator, ForkNumber forknum, BlockNumber from, BlockNumber n_blocks);\nXLogRecPtr neon_set_lwlsn_block_v(const XLogRecPtr *lsns, NRelFileInfo relfilenode, ForkNumber forknum, BlockNumber blockno, int nblocks);\nXLogRecPtr neon_set_lwlsn_block(XLogRecPtr lsn, NRelFileInfo rlocator, ForkNumber forknum, BlockNumber blkno);\nXLogRecPtr neon_set_lwlsn_relation(XLogRecPtr lsn, NRelFileInfo rlocator, ForkNumber forknum);\nXLogRecPtr neon_set_lwlsn_db(XLogRecPtr lsn);\n\n#endif /* NEON_LWLSNCACHE_H */"
  },
  {
    "path": "pgxn/neon/neon_perf_counters.c",
    "content": "/*-------------------------------------------------------------------------\n *\n * neon_perf_counters.c\n *\t  Collect statistics about Neon I/O\n *\n * Each backend has its own set of counters in shared memory.\n *\n *-------------------------------------------------------------------------\n */\n#include \"postgres.h\"\n\n#include <math.h>\n\n#include \"funcapi.h\"\n#include \"miscadmin.h\"\n#include \"storage/proc.h\"\n#include \"storage/shmem.h\"\n#include \"utils/builtins.h\"\n\n#include \"neon.h\"\n#include \"neon_perf_counters.h\"\n#include \"walproposer.h\"\n\n/* BEGIN_HADRON */\ndatabricks_metrics *databricks_metrics_shared;\n\nSize\nDatabricksMetricsShmemSize(void)\n{\n\treturn sizeof(databricks_metrics);\n}\n\nvoid\nDatabricksMetricsShmemInit(void)\n{\n\tbool\t\tfound;\n\n\tdatabricks_metrics_shared =\n\t\tShmemInitStruct(\"Databricks counters\",\n\t\t\t\t\t\tDatabricksMetricsShmemSize(),\n\t\t\t\t\t\t&found);\n\tAssert(found == IsUnderPostmaster);\n\tif (!found)\n\t{\n\t\tpg_atomic_init_u32(&databricks_metrics_shared->index_corruption_count, 0);\n\t\tpg_atomic_init_u32(&databricks_metrics_shared->data_corruption_count, 0);\n\t\tpg_atomic_init_u32(&databricks_metrics_shared->internal_error_count, 0);\n\t\tpg_atomic_init_u32(&databricks_metrics_shared->ps_corruption_detected, 0);\n\t}\n}\n/* END_HADRON */\n\nneon_per_backend_counters *neon_per_backend_counters_shared;\n\nvoid\nNeonPerfCountersShmemRequest(void)\n{\n\tSize size;\n#if PG_MAJORVERSION_NUM < 15\n\t/* Hack: in PG14 MaxBackends is not initialized at the time of calling NeonPerfCountersShmemRequest function.\n\t * Do it ourselves and then undo to prevent assertion failure\n\t */\n\tAssert(MaxBackends == 0); /* not initialized yet */\n\tInitializeMaxBackends();\n\tsize = mul_size(NUM_NEON_PERF_COUNTER_SLOTS, sizeof(neon_per_backend_counters));\n\tMaxBackends = 0;\n#else\n\tsize = mul_size(NUM_NEON_PERF_COUNTER_SLOTS, sizeof(neon_per_backend_counters));\n#endif\n\tif (lakebase_mode) {\n\t\tsize = add_size(size, DatabricksMetricsShmemSize());\n\t}\n\tRequestAddinShmemSpace(size);\n}\n\nvoid\nNeonPerfCountersShmemInit(void)\n{\n\tbool\t\tfound;\n\n\tneon_per_backend_counters_shared =\n\t\tShmemInitStruct(\"Neon perf counters\",\n\t\t\t\t\t\tmul_size(NUM_NEON_PERF_COUNTER_SLOTS,\n\t\t\t\t\t\t\t\t sizeof(neon_per_backend_counters)),\n\t\t\t\t\t\t&found);\n\tAssert(found == IsUnderPostmaster);\n\tif (!found)\n\t{\n\t\t/* shared memory is initialized to zeros, so nothing to do here */\n\t}\n}\n\nstatic inline void\ninc_iohist(IOHistogram hist, uint64 latency_us)\n{\n\tint\t\t\tlo = 0;\n\tint\t\t\thi = NUM_IO_WAIT_BUCKETS - 1;\n\n\t/* Find the right bucket with binary search */\n\twhile (lo < hi)\n\t{\n\t\tint\t\t\tmid = (lo + hi) / 2;\n\n\t\tif (latency_us < io_wait_bucket_thresholds[mid])\n\t\t\thi = mid;\n\t\telse\n\t\t\tlo = mid + 1;\n\t}\n\thist->wait_us_bucket[lo]++;\n\thist->wait_us_sum += latency_us;\n\thist->wait_us_count++;\n}\n\nstatic inline void\ninc_qthist(QTHistogram hist, uint64 elapsed_us)\n{\n\tint\t\t\tlo = 0;\n\tint\t\t\thi = NUM_QT_BUCKETS - 1;\n\n\t/* Find the right bucket with binary search */\n\twhile (lo < hi)\n\t{\n\t\tint\t\t\tmid = (lo + hi) / 2;\n\n\t\tif (elapsed_us < qt_bucket_thresholds[mid])\n\t\t\thi = mid;\n\t\telse\n\t\t\tlo = mid + 1;\n\t}\n\thist->elapsed_us_bucket[lo]++;\n\thist->elapsed_us_sum += elapsed_us;\n\thist->elapsed_us_count++;\n}\n\n/*\n * Count a GetPage wait operation.\n */\nvoid\ninc_getpage_wait(uint64 latency)\n{\n\tinc_iohist(&MyNeonCounters->getpage_hist, latency);\n}\n\n/*\n * Count an LFC read wait operation.\n */\nvoid\ninc_page_cache_read_wait(uint64 latency)\n{\n\tinc_iohist(&MyNeonCounters->file_cache_read_hist, latency);\n}\n\n/*\n * Count an LFC write wait operation.\n */\nvoid\ninc_page_cache_write_wait(uint64 latency)\n{\n\tinc_iohist(&MyNeonCounters->file_cache_write_hist, latency);\n}\n\n\nvoid\ninc_query_time(uint64 elapsed)\n{\n\tinc_qthist(&MyNeonCounters->query_time_hist, elapsed);\n}\n\n/*\n * Support functions for the views, neon_backend_perf_counters and\n * neon_perf_counters.\n */\n\ntypedef struct\n{\n\tconst char *name;\n\tbool\t\tis_bucket;\n\tdouble\t\tbucket_le;\n\tdouble\t\tvalue;\n} metric_t;\n\nstatic int\nio_histogram_to_metrics(IOHistogram histogram,\n\t\t\t\t\t\tmetric_t *metrics,\n\t\t\t\t\t\tconst char *count,\n\t\t\t\t\t\tconst char *sum,\n\t\t\t\t\t\tconst char *bucket)\n{\n\tint\t\ti = 0;\n\tuint64\tbucket_accum = 0;\n\n\tmetrics[i].name = count;\n\tmetrics[i].is_bucket = false;\n\tmetrics[i].value = (double) histogram->wait_us_count;\n\ti++;\n\tmetrics[i].name = sum;\n\tmetrics[i].is_bucket = false;\n\tmetrics[i].value = (double) histogram->wait_us_sum / 1000000.0;\n\ti++;\n\tfor (int bucketno = 0; bucketno < NUM_IO_WAIT_BUCKETS; bucketno++)\n\t{\n\t\tuint64\t\tthreshold = io_wait_bucket_thresholds[bucketno];\n\n\t\tbucket_accum += histogram->wait_us_bucket[bucketno];\n\n\t\tmetrics[i].name = bucket;\n\t\tmetrics[i].is_bucket = true;\n\t\tmetrics[i].bucket_le = (threshold == UINT64_MAX) ? INFINITY : ((double) threshold) / 1000000.0;\n\t\tmetrics[i].value = (double) bucket_accum;\n\t\ti++;\n\t}\n\n\treturn i;\n}\n\nstatic int\nqt_histogram_to_metrics(QTHistogram histogram,\n\t\t\t\t\t\tmetric_t *metrics,\n\t\t\t\t\t\tconst char *count,\n\t\t\t\t\t\tconst char *sum,\n\t\t\t\t\t\tconst char *bucket)\n{\n\tint\t\ti = 0;\n\tuint64\tbucket_accum = 0;\n\n\tmetrics[i].name = count;\n\tmetrics[i].is_bucket = false;\n\tmetrics[i].value = (double) histogram->elapsed_us_count;\n\ti++;\n\tmetrics[i].name = sum;\n\tmetrics[i].is_bucket = false;\n\tmetrics[i].value = (double) histogram->elapsed_us_sum / 1000000.0;\n\ti++;\n\tfor (int bucketno = 0; bucketno < NUM_QT_BUCKETS; bucketno++)\n\t{\n\t\tuint64\t\tthreshold = qt_bucket_thresholds[bucketno];\n\n\t\tbucket_accum += histogram->elapsed_us_bucket[bucketno];\n\n\t\tmetrics[i].name = bucket;\n\t\tmetrics[i].is_bucket = true;\n\t\tmetrics[i].bucket_le = (threshold == UINT64_MAX) ? INFINITY : ((double) threshold) / 1000000.0;\n\t\tmetrics[i].value = (double) bucket_accum;\n\t\ti++;\n\t}\n\n\treturn i;\n}\n\nstatic metric_t *\nneon_perf_counters_to_metrics(neon_per_backend_counters *counters)\n{\n#define NUM_METRICS ((2 + NUM_IO_WAIT_BUCKETS) * 3 + (2 + NUM_QT_BUCKETS) + 12)\n\tmetric_t   *metrics = palloc((NUM_METRICS + 1) * sizeof(metric_t));\n\tint\t\t\ti = 0;\n\n#define APPEND_METRIC(_name) do { \\\n\t\tmetrics[i].name = #_name; \\\n\t\tmetrics[i].is_bucket = false; \\\n\t\tmetrics[i].value = (double) counters->_name; \\\n\t\ti++; \\\n\t} while (false)\n\n\ti += io_histogram_to_metrics(&counters->getpage_hist, &metrics[i],\n\t\t\t\t\t\t\t\t \"getpage_wait_seconds_count\",\n\t\t\t\t\t\t\t\t \"getpage_wait_seconds_sum\",\n\t\t\t\t\t\t\t\t \"getpage_wait_seconds_bucket\");\n\n\tAPPEND_METRIC(getpage_prefetch_requests_total);\n\tAPPEND_METRIC(getpage_sync_requests_total);\n\tAPPEND_METRIC(compute_getpage_stuck_requests_total);\n\tAPPEND_METRIC(compute_getpage_max_inflight_stuck_time_ms);\n\tAPPEND_METRIC(getpage_prefetch_misses_total);\n\tAPPEND_METRIC(getpage_prefetch_discards_total);\n\tAPPEND_METRIC(pageserver_requests_sent_total);\n\tAPPEND_METRIC(pageserver_disconnects_total);\n\tAPPEND_METRIC(pageserver_send_flushes_total);\n\tAPPEND_METRIC(pageserver_open_requests);\n\tAPPEND_METRIC(getpage_prefetches_buffered);\n\n\tAPPEND_METRIC(file_cache_hits_total);\n\n\ti += io_histogram_to_metrics(&counters->file_cache_read_hist, &metrics[i],\n\t\t\t\t\t\t\t\t \"file_cache_read_wait_seconds_count\",\n\t\t\t\t\t\t\t\t \"file_cache_read_wait_seconds_sum\",\n\t\t\t\t\t\t\t\t \"file_cache_read_wait_seconds_bucket\");\n\ti += io_histogram_to_metrics(&counters->file_cache_write_hist, &metrics[i],\n\t\t\t\t\t\t\t\t \"file_cache_write_wait_seconds_count\",\n\t\t\t\t\t\t\t\t \"file_cache_write_wait_seconds_sum\",\n\t\t\t\t\t\t\t\t \"file_cache_write_wait_seconds_bucket\");\n\n\ti += qt_histogram_to_metrics(&counters->query_time_hist, &metrics[i],\n\t\t\t\t\t\t\t\t \"query_time_seconds_count\",\n\t\t\t\t\t\t\t\t \"query_time_seconds_sum\",\n\t\t\t\t\t\t\t\t \"query_time_seconds_bucket\");\n\n\tAssert(i == NUM_METRICS);\n\n#undef APPEND_METRIC\n#undef NUM_METRICS\n\n\t/* NULL entry marks end of array */\n\tmetrics[i].name = NULL;\n\tmetrics[i].value = 0;\n\n\treturn metrics;\n}\n\n/*\n * Write metric to three output Datums\n */\nstatic void\nmetric_to_datums(metric_t *m, Datum *values, bool *nulls)\n{\n\tvalues[0] = CStringGetTextDatum(m->name);\n\tnulls[0] = false;\n\tif (m->is_bucket)\n\t{\n\t\tvalues[1] = Float8GetDatum(m->bucket_le);\n\t\tnulls[1] = false;\n\t}\n\telse\n\t{\n\t\tvalues[1] = (Datum) 0;\n\t\tnulls[1] = true;\n\t}\n\tvalues[2] = Float8GetDatum(m->value);\n\tnulls[2] = false;\n}\n\nPG_FUNCTION_INFO_V1(neon_get_backend_perf_counters);\nDatum\nneon_get_backend_perf_counters(PG_FUNCTION_ARGS)\n{\n\tReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;\n\tDatum\t\tvalues[5];\n\tbool\t\tnulls[5];\n\n\t/* We put all the tuples into a tuplestore in one go. */\n\tInitMaterializedSRF(fcinfo, 0);\n\n\tfor (int procno = 0; procno < NUM_NEON_PERF_COUNTER_SLOTS; procno++)\n\t{\n\t\tPGPROC\t   *proc = GetPGProcByNumber(procno);\n\t\tint\t\t\tpid = proc->pid;\n\t\tneon_per_backend_counters *counters = &neon_per_backend_counters_shared[procno];\n\t\tmetric_t   *metrics = neon_perf_counters_to_metrics(counters);\n\n\t\tvalues[0] = Int32GetDatum(procno);\n\t\tnulls[0] = false;\n\t\tvalues[1] = Int32GetDatum(pid);\n\t\tnulls[1] = false;\n\n\t\tfor (int i = 0; metrics[i].name != NULL; i++)\n\t\t{\n\t\t\tmetric_to_datums(&metrics[i], &values[2], &nulls[2]);\n\t\t\ttuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);\n\t\t}\n\n\t\tpfree(metrics);\n\t}\n\n\treturn (Datum) 0;\n}\n\nstatic inline void\nio_histogram_merge_into(IOHistogram into, IOHistogram from)\n{\n\tinto->wait_us_count += from->wait_us_count;\n\tinto->wait_us_sum += from->wait_us_sum;\n\tfor (int bucketno = 0; bucketno < NUM_IO_WAIT_BUCKETS; bucketno++)\n\t\tinto->wait_us_bucket[bucketno] += from->wait_us_bucket[bucketno];\n}\n\nstatic inline void\nqt_histogram_merge_into(QTHistogram into, QTHistogram from)\n{\n\tinto->elapsed_us_count += from->elapsed_us_count;\n\tinto->elapsed_us_sum += from->elapsed_us_sum;\n\tfor (int bucketno = 0; bucketno < NUM_QT_BUCKETS; bucketno++)\n\t\tinto->elapsed_us_bucket[bucketno] += from->elapsed_us_bucket[bucketno];\n}\n\nPG_FUNCTION_INFO_V1(neon_get_perf_counters);\nDatum\nneon_get_perf_counters(PG_FUNCTION_ARGS)\n{\n\tReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;\n\tDatum\t\tvalues[3];\n\tbool\t\tnulls[3];\n\tneon_per_backend_counters totals = {0};\n\tmetric_t   *metrics;\n\n\t/* BEGIN_HADRON */\n\tWalproposerShmemState *wp_shmem;\n\tuint32 num_safekeepers;\n\tuint32 num_active_safekeepers;\n\t/* END_HADRON */\n\n\t/* We put all the tuples into a tuplestore in one go. */\n\tInitMaterializedSRF(fcinfo, 0);\n\n\t/* Aggregate the counters across all backends */\n\tfor (int procno = 0; procno < NUM_NEON_PERF_COUNTER_SLOTS; procno++)\n\t{\n\t\tneon_per_backend_counters *counters = &neon_per_backend_counters_shared[procno];\n\n\t\tio_histogram_merge_into(&totals.getpage_hist, &counters->getpage_hist);\n\t\ttotals.getpage_prefetch_requests_total += counters->getpage_prefetch_requests_total;\n\t\ttotals.getpage_sync_requests_total += counters->getpage_sync_requests_total;\n\t\ttotals.getpage_prefetch_misses_total += counters->getpage_prefetch_misses_total;\n\t\ttotals.getpage_prefetch_discards_total += counters->getpage_prefetch_discards_total;\n\t\ttotals.pageserver_requests_sent_total += counters->pageserver_requests_sent_total;\n\t\ttotals.pageserver_disconnects_total += counters->pageserver_disconnects_total;\n\t\ttotals.pageserver_send_flushes_total += counters->pageserver_send_flushes_total;\n\t\ttotals.pageserver_open_requests += counters->pageserver_open_requests;\n\t\ttotals.getpage_prefetches_buffered += counters->getpage_prefetches_buffered;\n\t\ttotals.file_cache_hits_total += counters->file_cache_hits_total;\n\t\ttotals.compute_getpage_stuck_requests_total += counters->compute_getpage_stuck_requests_total;\n\t\ttotals.compute_getpage_max_inflight_stuck_time_ms = Max(\n\t\t\ttotals.compute_getpage_max_inflight_stuck_time_ms,\n\t\t\tcounters->compute_getpage_max_inflight_stuck_time_ms);\n\t\tio_histogram_merge_into(&totals.file_cache_read_hist, &counters->file_cache_read_hist);\n\t\tio_histogram_merge_into(&totals.file_cache_write_hist, &counters->file_cache_write_hist);\n\t\tqt_histogram_merge_into(&totals.query_time_hist, &counters->query_time_hist);\n\t}\n\n\tmetrics = neon_perf_counters_to_metrics(&totals);\n\tfor (int i = 0; metrics[i].name != NULL; i++)\n\t{\n\t\tmetric_to_datums(&metrics[i], &values[0], &nulls[0]);\n\t\ttuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);\n\t}\n\n\tif (lakebase_mode) {\n\n\t\tif (databricks_test_hook == TestHookCorruption) {\n\t\t\tereport(ERROR,\n\t\t\t\t\t\t(errcode(ERRCODE_DATA_CORRUPTED),\n\t\t\t\t\t\terrmsg(\"test corruption\")));\n\t\t}\n\n\t\t// Not ideal but piggyback our databricks counters into the neon perf counters view\n\t\t// so that we don't need to introduce neon--1.x+1.sql to add a new view.\n\t\t{\n\t\t// Keeping this code in its own block to work around the C90 \"don't mix declarations and code\" rule when we define\n\t\t// the `databricks_metrics` array in the next block. Yes, we are seriously dealing with C90 rules in 2025.\n\n\t\t// Read safekeeper status from wal proposer shared memory first.\n\t\t// Note that we are taking a mutex when reading from walproposer shared memory so that the total safekeeper count is\n\t\t// consistent with the active wal acceptors count. Assuming that we don't query this view too often the mutex should\n\t\t// not be a huge deal.\n\t\twp_shmem = GetWalpropShmemState();\n\t\tSpinLockAcquire(&wp_shmem->mutex);\n\t\tnum_safekeepers = wp_shmem->num_safekeepers;\n\t\tnum_active_safekeepers = 0;\n\t\tfor (int i = 0; i < num_safekeepers; i++) {\n\t\t\tif (wp_shmem->safekeeper_status[i] == 1) {\n\t\t\t\tnum_active_safekeepers++;\n\t\t\t}\n\t\t}\n\t\tSpinLockRelease(&wp_shmem->mutex);\n\t}\n\t{\n\t\t\tmetric_t databricks_metrics[] = {\n\t\t\t\t{\"sql_index_corruption_count\", false, 0, (double) pg_atomic_read_u32(&databricks_metrics_shared->index_corruption_count)},\n\t\t\t\t{\"sql_data_corruption_count\", false, 0, (double) pg_atomic_read_u32(&databricks_metrics_shared->data_corruption_count)},\n\t\t\t\t{\"sql_internal_error_count\", false, 0, (double) pg_atomic_read_u32(&databricks_metrics_shared->internal_error_count)},\n\t\t\t\t{\"ps_corruption_detected\", false, 0, (double) pg_atomic_read_u32(&databricks_metrics_shared->ps_corruption_detected)},\n\t\t\t\t{\"num_active_safekeepers\", false, 0.0, (double) num_active_safekeepers},\n\t\t\t\t{\"num_configured_safekeepers\", false, 0.0, (double) num_safekeepers},\n\t\t\t\t{NULL, false, 0, 0},\n\t\t\t};\n\t\t\tfor (int i = 0; databricks_metrics[i].name != NULL; i++)\n\t\t\t{\n\t\t\t\tmetric_to_datums(&databricks_metrics[i], &values[0], &nulls[0]);\n\t\t\t\ttuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);\n\t\t\t}\n\t\t}\n\t\t/* END_HADRON */\n\t}\n\n\tpfree(metrics);\n\n\treturn (Datum) 0;\n}\n"
  },
  {
    "path": "pgxn/neon/neon_perf_counters.h",
    "content": "/*-------------------------------------------------------------------------\n *\n * neon_perf_counters.h\n *\t  Performance counters for neon storage requests\n *-------------------------------------------------------------------------\n */\n\n#ifndef NEON_PERF_COUNTERS_H\n#define NEON_PERF_COUNTERS_H\n\n#if PG_VERSION_NUM >= 170000\n#include \"storage/procnumber.h\"\n#else\n#include \"storage/backendid.h\"\n#endif\n#include \"storage/proc.h\"\n\nstatic const uint64 io_wait_bucket_thresholds[] = {\n\t       2,        3,        6,        10,  /* 0 us   - 10 us */\n\t      20,       30,       60,       100,  /* 10 us  - 100 us */\n\t     200,      300,      600,\t   1000,  /* 100 us - 1 ms */\n\t    2000,     3000,     6000,     10000,  /* 1 ms   - 10 ms */\n\t   20000,    30000,    60000,    100000,  /* 10 ms  - 100 ms */\n\t  200000,   300000,   600000,   1000000,  /* 100 ms - 1 s */\n\t 2000000,  3000000,  6000000,  10000000,  /* 1 s - 10 s */\n\tUINT64_MAX,\n};\n#define NUM_IO_WAIT_BUCKETS (lengthof(io_wait_bucket_thresholds))\n\ntypedef struct IOHistogramData\n{\n\tuint64\t\twait_us_count;\n\tuint64\t\twait_us_sum;\n\tuint64\t\twait_us_bucket[NUM_IO_WAIT_BUCKETS];\n} IOHistogramData;\n\ntypedef IOHistogramData *IOHistogram;\n\nstatic const uint64 qt_bucket_thresholds[] = {\n\t       2,        3,        6,        10,  /* 0 us   - 10 us */\n\t      20,       30,       60,       100,  /* 10 us  - 100 us */\n\t     200,      300,      600,\t   1000,  /* 100 us - 1 ms */\n\t    2000,     3000,     6000,     10000,  /* 1 ms   - 10 ms */\n\t   20000,    30000,    60000,    100000,  /* 10 ms  - 100 ms */\n\t  200000,   300000,   600000,   1000000,  /* 100 ms - 1 s */\n\t 2000000,  3000000,  6000000,  10000000,  /* 1 s - 10 s */\n\t20000000, 30000000, 60000000, 100000000,  /* 10 s - 100 s */\n\tUINT64_MAX,\n};\n#define NUM_QT_BUCKETS (lengthof(qt_bucket_thresholds))\n\ntypedef struct QTHistogramData\n{\n\tuint64\t\telapsed_us_count;\n\tuint64\t\telapsed_us_sum;\n\tuint64\t\telapsed_us_bucket[NUM_QT_BUCKETS];\n} QTHistogramData;\n\ntypedef QTHistogramData *QTHistogram;\n\ntypedef struct\n{\n\t/*\n\t * Histogram for how long an smgrread() request needs to wait for response\n\t * from pageserver. When prefetching is effective, these wait times can be\n\t * lower than the network latency to the pageserver, even zero, if the\n\t * page is already readily prefetched whenever we need to read a page.\n\t *\n\t * Note: we accumulate these in microseconds, because that's convenient in\n\t * the backend, but the 'neon_backend_perf_counters' view will convert\n\t * them to seconds, to make them more idiomatic as prometheus metrics.\n\t */\n\tIOHistogramData getpage_hist;\n\n\t/*\n\t * Total number of speculative prefetch Getpage requests and synchronous\n\t * GetPage requests sent.\n\t */\n\tuint64\t\tgetpage_prefetch_requests_total;\n\tuint64\t\tgetpage_sync_requests_total;\n\n\t/* \n\t * Total number of Getpage requests left without an answer for more than\n\t * pageserver_response_log_timeout but less than pageserver_response_disconnect_timeout\n\t */\n\tuint64 compute_getpage_stuck_requests_total;\n\n\t/* \n\t * Longest waiting time for active stuck requests. If a stuck request gets a\n\t * response or disconnects, this metric is updated\n\t */\n\tuint64 compute_getpage_max_inflight_stuck_time_ms;\n\n\t/*\n\t * Total number of readahead misses; consisting of either prefetches that\n\t * don't satisfy the LSN bounds, or cases where no readahead was issued\n\t * for the read.\n\t */\n\tuint64\t\tgetpage_prefetch_misses_total;\n\n\t/*\n\t * Number of prefetched responses that were discarded becuase the\n\t * prefetched page was not needed or because it was concurrently fetched /\n\t * modified by another backend.\n\t */\n\tuint64\t\tgetpage_prefetch_discards_total;\n\n\t/*\n\t * Total number of requests send to pageserver. (prefetch_requests_total\n\t * and sync_request_total count only GetPage requests, this counts all\n\t * request types.)\n\t */\n\tuint64\t\tpageserver_requests_sent_total;\n\n\t/*\n\t * Number of times the connection to the pageserver was lost and the\n\t * backend had to reconnect. Note that this doesn't count the first\n\t * connection in each backend, only reconnects.\n\t */\n\tuint64\t\tpageserver_disconnects_total;\n\n\t/*\n\t * Number of network flushes to the pageserver. Synchronous requests are\n\t * flushed immediately, but when prefetching requests are sent in batches,\n\t * this can be smaller than pageserver_requests_sent_total.\n\t */\n\tuint64\t\tpageserver_send_flushes_total;\n\t\n\t/*\n\t * Number of open requests to PageServer.\n\t */\n\tuint64\t\tpageserver_open_requests;\n\n\t/*\n\t * Number of unused prefetches currently cached in this backend.\n\t */\n\tuint64\t\tgetpage_prefetches_buffered;\n\n\t/*\n\t * Number of requests satisfied from the LFC.\n\t *\n\t * This is redundant with the server-wide file_cache_hits, but this gives\n\t * per-backend granularity, and it's handy to have this in the same place\n\t * as counters for requests that went to the pageserver. Maybe move all\n\t * the LFC stats to this struct in the future?\n\t */\n\tuint64\t\tfile_cache_hits_total;\n\n\t/* LFC I/O time buckets */\n\tIOHistogramData file_cache_read_hist;\n\tIOHistogramData file_cache_write_hist;\n\n\t/*\n\t * Histogram of query execution time.\n\t */\n\tQTHistogramData query_time_hist;\n} neon_per_backend_counters;\n\n/* Pointer to the shared memory array of neon_per_backend_counters structs */\nextern neon_per_backend_counters *neon_per_backend_counters_shared;\n\n/*\n * Size of the perf counters array in shared memory. One slot for each backend\n * and aux process. IOW one for each PGPROC slot, except for slots reserved\n * for prepared transactions, because they're not real processes and cannot do\n * I/O.\n */\n#define NUM_NEON_PERF_COUNTER_SLOTS (MaxBackends + NUM_AUXILIARY_PROCS)\n\n#define MyNeonCounters (&neon_per_backend_counters_shared[MyProcNumber])\n\nextern void inc_getpage_wait(uint64 latency);\nextern void inc_page_cache_read_wait(uint64 latency);\nextern void inc_page_cache_write_wait(uint64 latency);\nextern void inc_query_time(uint64 elapsed);\n\nextern Size NeonPerfCountersShmemSize(void);\nextern void NeonPerfCountersShmemInit(void);\n\n/* BEGIN_HADRON */\ntypedef struct\n{\n\tpg_atomic_uint32 index_corruption_count;\n\tpg_atomic_uint32 data_corruption_count;\n\tpg_atomic_uint32 internal_error_count;\n\tpg_atomic_uint32 ps_corruption_detected;\n} databricks_metrics;\n\nextern databricks_metrics *databricks_metrics_shared;\n\nextern Size DatabricksMetricsShmemSize(void);\nextern void DatabricksMetricsShmemInit(void);\n\nextern int databricks_test_hook;\n\nstatic const int TestHookCorruption = 1;\n/* END_HADRON */\n\n\n#endif\t\t\t\t\t\t\t/* NEON_PERF_COUNTERS_H */\n"
  },
  {
    "path": "pgxn/neon/neon_pgversioncompat.c",
    "content": "/*\n * Support functions for the compatibility macros in neon_pgversioncompat.h\n */\n#include \"postgres.h\"\n\n#include \"funcapi.h\"\n#include \"miscadmin.h\"\n#include \"access/xlog.h\"\n#include \"utils/tuplestore.h\"\n\n#include \"neon_pgversioncompat.h\"\n\n#if PG_MAJORVERSION_NUM < 15\nvoid\nInitMaterializedSRF(FunctionCallInfo fcinfo, bits32 flags)\n{\n\tReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;\n\tTuplestorestate *tupstore;\n\tMemoryContext old_context,\n\t\t\t\tper_query_ctx;\n\tTupleDesc\tstored_tupdesc;\n\n\t/* check to see if caller supports returning a tuplestore */\n\tif (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo))\n\t\tereport(ERROR,\n\t\t\t\t(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),\n\t\t\t\t errmsg(\"set-valued function called in context that cannot accept a set\")));\n\n\t/*\n\t * Store the tuplestore and the tuple descriptor in ReturnSetInfo.  This\n\t * must be done in the per-query memory context.\n\t */\n\tper_query_ctx = rsinfo->econtext->ecxt_per_query_memory;\n\told_context = MemoryContextSwitchTo(per_query_ctx);\n\n\tif (get_call_result_type(fcinfo, NULL, &stored_tupdesc) != TYPEFUNC_COMPOSITE)\n\t\telog(ERROR, \"return type must be a row type\");\n\n\ttupstore = tuplestore_begin_heap(false, false, work_mem);\n\trsinfo->returnMode = SFRM_Materialize;\n\trsinfo->setResult = tupstore;\n\trsinfo->setDesc = stored_tupdesc;\n\tMemoryContextSwitchTo(old_context);\n}\n\nTimeLineID GetWALInsertionTimeLine(void)\n{\n\treturn ThisTimeLineID + 1;\n}\n\n\n#endif\n\n"
  },
  {
    "path": "pgxn/neon/neon_pgversioncompat.h",
    "content": "/*\n * Compatibility macros to cover up differences between supported PostgreSQL versions,\n * to help with compiling the same sources for all of them.\n */\n\n#ifndef NEON_PGVERSIONCOMPAT_H\n#define NEON_PGVERSIONCOMPAT_H\n\n#include \"fmgr.h\"\n#include \"storage/buf_internals.h\"\n\n#if PG_MAJORVERSION_NUM < 16\ntypedef PGAlignedBlock PGIOAlignedBlock;\n#endif\n\n#if PG_MAJORVERSION_NUM < 17\n#define NRelFileInfoBackendIsTemp(rinfo) (rinfo.backend != InvalidBackendId)\n#else\n#define NRelFileInfoBackendIsTemp(rinfo) (rinfo.backend != INVALID_PROC_NUMBER)\n#endif\n\n#define RelFileInfoEquals(a, b) ( \\\n\tNInfoGetSpcOid(a) == NInfoGetSpcOid(b) && \\\n\tNInfoGetDbOid(a) == NInfoGetDbOid(b) && \\\n\tNInfoGetRelNumber(a) == NInfoGetRelNumber(b) \\\n)\n\n/* These macros were turned into static inline functions in v16 */\n#if PG_MAJORVERSION_NUM < 16\nstatic inline bool\nBufferTagsEqual(const BufferTag *tag1, const BufferTag *tag2)\n{\n\treturn BUFFERTAGS_EQUAL(*tag1, *tag2);\n}\n\nstatic inline void\nInitBufferTag(BufferTag *tag, const RelFileNode *rnode,\n\t\t\t  ForkNumber forkNum, BlockNumber blockNum)\n{\n\tINIT_BUFFERTAG(*tag, *rnode, forkNum, blockNum);\n}\n#endif\n\n/* RelFileNode -> RelFileLocator rework */\n#if PG_MAJORVERSION_NUM < 16\n#define USE_RELFILENODE\n\n#define RELFILEINFO_HDR \"storage/relfilenode.h\"\n\n#define NRelFileInfo RelFileNode\n#define NRelFileInfoBackend RelFileNodeBackend\n#define NRelFileNumber Oid\n\n#define InfoFromRelation(rel) (rel)->rd_node\n#define InfoFromSMgrRel(srel) (srel)->smgr_rnode.node\n#define InfoBFromSMgrRel(srel) (srel)->smgr_rnode\n#define InfoFromNInfoB(ninfob) ninfob.node\n\n#define RelFileInfoFmt(rinfo) \\\n\t(rinfo).spcNode, \\\n\t(rinfo).dbNode, \\\n\t(rinfo).relNode\n\n#define RelFileInfoBackendFmt(ninfob) \\\n\t(ninfob).backend, \\\n\t(ninfob).node.spcNode, \\\n\t(ninfob).node.dbNode, \\\n\t(ninfob).node.relNode\n\n#define NInfoGetSpcOid(ninfo)\t\t(ninfo).spcNode\n#define NInfoGetDbOid(ninfo)\t\t(ninfo).dbNode\n#define NInfoGetRelNumber(ninfo)\t(ninfo).relNode\n\n#define CopyNRelFileInfoToBufTag(tag, rinfo) \\\n\tdo { \\\n\t\t(tag).rnode = (rinfo); \\\n\t} while (false)\n\n#define BufTagGetNRelFileInfo(tag) (tag).rnode\n\n#define BufTagGetRelNumber(tagp) ((tagp)->rnode.relNode)\n\n#define BufTagInit(tag, rel_number, fork_number, block_number, spc_oid, db_oid) \\\n\tdo { \\\n\t\tRelFileNode rnode = { .spcNode = (spc_oid), .dbNode = (db_oid), .relNode = (rel_number)}; \\\n\t\t(tag).forkNum = (fork_number);\t\t\t\t\t\t\t\t\\\n\t\t(tag).blockNum = (block_number);\t\t\t\t\t\t\t\\\n\t\t(tag).rnode = rnode;\t\t\t\t\t\t\t\t\t\t\\\n\t} while (false)\n\n#define InvalidRelFileNumber InvalidOid\n\n#define SMgrRelGetRelInfo(reln)\t   \t((reln)->smgr_rnode.node)\n\n#define DropRelationAllLocalBuffers DropRelFileNodeAllLocalBuffers\n\n#else\t\t\t\t\t\t\t/* major version >= 16 */\n\n#define USE_RELFILELOCATOR\n\n#define RELFILEINFO_HDR \"storage/relfilelocator.h\"\n\n#define NRelFileInfo RelFileLocator\n#define NRelFileInfoBackend RelFileLocatorBackend\n\n#define InfoFromRelation(rel) (rel)->rd_locator\n#define InfoFromSMgrRel(srel) (srel)->smgr_rlocator.locator\n#define InfoBFromSMgrRel(srel) (srel)->smgr_rlocator\n#define InfoFromNInfoB(ninfob) (ninfob).locator\n\n#define RelFileInfoFmt(rinfo) \\\n\t(rinfo).spcOid, \\\n\t(rinfo).dbOid, \\\n\t(rinfo).relNumber\n#define RelFileInfoBackendFmt(ninfob) \\\n\t(ninfob).backend, \\\n\t(ninfob).locator.spcOid, \\\n\t(ninfob).locator.dbOid, \\\n\t(ninfob).locator.relNumber\n\n#define NInfoGetSpcOid(ninfo)\t\t(ninfo).spcOid\n#define NInfoGetDbOid(ninfo)\t\t(ninfo).dbOid\n#define NInfoGetRelNumber(ninfo)\t(ninfo).relNumber\n\n#define CopyNRelFileInfoToBufTag(tag, rinfo) \\\n\tdo { \\\n\t\t(tag).spcOid = (rinfo).spcOid; \\\n\t\t(tag).dbOid = (rinfo).dbOid; \\\n\t\t(tag).relNumber = (rinfo).relNumber; \\\n\t} while (false)\n\n#define BufTagGetNRelFileInfo(tag) \\\n\t((RelFileLocator) { \\\n\t\t.spcOid = (tag).spcOid, \\\n\t\t.dbOid = (tag).dbOid, \\\n\t\t.relNumber = (tag).relNumber, \\\n\t})\n\n#define BufTagInit(tag, rel_number, fork_number, block_number, spc_oid, db_oid) \\\n\tdo { \\\n\t\t(tag).forkNum = (fork_number);\t\t\t\t\t\\\n\t\t(tag).blockNum = (block_number);\t\t\t\t\\\n\t\t(tag).spcOid = (spc_oid);\t\t\t\t\t\t\\\n\t\t(tag).dbOid = (db_oid);\t\t\t\t\t\t\t\\\n\t\t(tag).relNumber = (rel_number);\t\t\t\t\t\\\n\t} while (false)\n\n#define SMgrRelGetRelInfo(reln)\t   \t((reln)->smgr_rlocator)\n\n#define DropRelationAllLocalBuffers DropRelationAllLocalBuffers\n#endif\n\n#define NRelFileInfoInvalidate(rinfo) do { \\\n\t\tNInfoGetSpcOid(rinfo) = InvalidOid; \\\n\t\tNInfoGetDbOid(rinfo) = InvalidOid; \\\n\t\tNInfoGetRelNumber(rinfo) = InvalidRelFileNumber; \\\n\t} while (0)\n\n#if PG_MAJORVERSION_NUM < 17\n#define ProcNumber BackendId\n#define INVALID_PROC_NUMBER InvalidBackendId\n#define AmAutoVacuumWorkerProcess() (IsAutoVacuumWorkerProcess())\n#endif\n\n#if PG_MAJORVERSION_NUM < 17\n#define\tMyProcNumber (MyProc - &ProcGlobal->allProcs[0])\n#endif\n\n#if PG_MAJORVERSION_NUM < 15\nextern void InitMaterializedSRF(FunctionCallInfo fcinfo, bits32 flags);\nextern TimeLineID GetWALInsertionTimeLine(void);\n#endif\n\n/* format codes not present in PG17-; but available in PG18+ */\n#define INT64_HEX_FORMAT \"%\" INT64_MODIFIER \"x\"\n#define UINT64_HEX_FORMAT \"%\" INT64_MODIFIER \"x\"\n\n#endif\t\t\t\t\t\t\t/* NEON_PGVERSIONCOMPAT_H */\n"
  },
  {
    "path": "pgxn/neon/neon_utils.c",
    "content": "#include <sys/resource.h>\n\n#ifndef WALPROPOSER_LIB\n#include <curl/curl.h>\n#endif\n\n#include \"postgres.h\"\n\n#include \"neon_utils.h\"\n#include \"lib/stringinfo.h\"\n#include \"libpq/pqformat.h\"\n\n/*\n * Convert a character which represents a hexadecimal digit to an integer.\n *\n * Returns -1 if the character is not a hexadecimal digit.\n */\nstatic int\nHexDecodeChar(char c)\n{\n\tif (c >= '0' && c <= '9')\n\t\treturn c - '0';\n\tif (c >= 'a' && c <= 'f')\n\t\treturn c - 'a' + 10;\n\tif (c >= 'A' && c <= 'F')\n\t\treturn c - 'A' + 10;\n\n\treturn -1;\n}\n\n/*\n * Decode a hex string into a byte string, 2 hex chars per byte.\n *\n * Returns false if invalid characters are encountered; otherwise true.\n */\nbool\nHexDecodeString(uint8 *result, char *input, int nbytes)\n{\n\tint\t\t\ti;\n\n\tfor (i = 0; i < nbytes; ++i)\n\t{\n\t\tint\t\t\tn1 = HexDecodeChar(input[i * 2]);\n\t\tint\t\t\tn2 = HexDecodeChar(input[i * 2 + 1]);\n\n\t\tif (n1 < 0 || n2 < 0)\n\t\t\treturn false;\n\t\tresult[i] = n1 * 16 + n2;\n\t}\n\n\treturn true;\n}\n\n/* --------------------------------\n *\t\tpq_getmsgint16\t- get a binary 2-byte int from a message buffer\n * --------------------------------\n */\nuint16\npq_getmsgint16(StringInfo msg)\n{\n\treturn pq_getmsgint(msg, 2);\n}\n\n/* --------------------------------\n *\t\tpq_getmsgint32\t- get a binary 4-byte int from a message buffer\n * --------------------------------\n */\nuint32\npq_getmsgint32(StringInfo msg)\n{\n\treturn pq_getmsgint(msg, 4);\n}\n\n/* --------------------------------\n *\t\tpq_getmsgint32_le\t- get a binary 4-byte int from a message buffer in native (LE) order\n * --------------------------------\n */\nuint32\npq_getmsgint32_le(StringInfo msg)\n{\n\tuint32\t\tn32;\n\n\tpq_copymsgbytes(msg, (char *) &n32, sizeof(n32));\n\n\treturn n32;\n}\n\n/* --------------------------------\n *\t\tpq_getmsgint64\t- get a binary 8-byte int from a message buffer in native (LE) order\n * --------------------------------\n */\nuint64\npq_getmsgint64_le(StringInfo msg)\n{\n\tuint64\t\tn64;\n\n\tpq_copymsgbytes(msg, (char *) &n64, sizeof(n64));\n\n\treturn n64;\n}\n\n/* append a binary [u]int32 to a StringInfo buffer in native (LE) order */\nvoid\npq_sendint32_le(StringInfo buf, uint32 i)\n{\n\tenlargeStringInfo(buf, sizeof(uint32));\n\tmemcpy(buf->data + buf->len, &i, sizeof(uint32));\n\tbuf->len += sizeof(uint32);\n}\n\n/* append a binary [u]int64 to a StringInfo buffer in native (LE) order */\nvoid\npq_sendint64_le(StringInfo buf, uint64 i)\n{\n\tenlargeStringInfo(buf, sizeof(uint64));\n\tmemcpy(buf->data + buf->len, &i, sizeof(uint64));\n\tbuf->len += sizeof(uint64);\n}\n\n/*\n * Disables core dump for the current process.\n */\nvoid\ndisable_core_dump()\n{\n\tstruct rlimit rlim;\n\n#ifdef WALPROPOSER_LIB\t\t\t/* skip in simulation mode */\n\treturn;\n#endif\n\n\trlim.rlim_cur = 0;\n\trlim.rlim_max = 0;\n\tif (setrlimit(RLIMIT_CORE, &rlim))\n\t{\n\t\tint\t\t\tsave_errno = errno;\n\n\t\tfprintf(stderr, \"WARNING: disable cores setrlimit failed: %s\", strerror(save_errno));\n\t}\n}\n\n#ifndef WALPROPOSER_LIB\n\n/*\n * On macOS with a libcurl that has IPv6 support, curl_global_init() calls\n * SCDynamicStoreCopyProxies(), which makes the program multithreaded. An ideal\n * place to call curl_global_init() would be _PG_init(), but Neon has to be\n * added to shared_preload_libraries, which are loaded in the Postmaster\n * process. The Postmaster is not supposed to become multithreaded at any point\n * in its lifecycle. Postgres doesn't have any good hook that I know of to\n * initialize per-backend structures, so we have to check this on any\n * allocation of a CURL handle.\n *\n * Free the allocated CURL handle with curl_easy_cleanup(3).\n *\n * https://developer.apple.com/documentation/systemconfiguration/1517088-scdynamicstorecopyproxies\n */\nCURL *\nalloc_curl_handle(void)\n{\n\tstatic bool curl_initialized = false;\n\n\tCURL *handle;\n\n\tif (unlikely(!curl_initialized))\n\t{\n\t\t/* Protected by mutex internally */\n\t\tif (curl_global_init(CURL_GLOBAL_DEFAULT))\n\t\t{\n\t\t\telog(ERROR, \"Failed to initialize curl\");\n\t\t}\n\n\t\tcurl_initialized = true;\n\t}\n\n\thandle = curl_easy_init();\n\tif (handle == NULL)\n\t{\n\t\telog(ERROR, \"Failed to initialize curl handle\");\n\t}\n\n\treturn handle;\n}\n\n#endif\n\n/*\n * Check if a BufferTag is valid by verifying all its fields are not invalid.\n */\nbool\nBufferTagIsValid(const BufferTag *tag)\n{\n\t#if PG_MAJORVERSION_NUM >= 16\n\treturn (tag->spcOid != InvalidOid) &&\n\t\t(tag->relNumber != InvalidRelFileNumber) &&\n\t\t(tag->forkNum != InvalidForkNumber) &&\n\t\t(tag->blockNum != InvalidBlockNumber);\n\t#else\n\treturn (tag->rnode.spcNode != InvalidOid) &&\n\t\t(tag->rnode.relNode != InvalidOid) &&\n\t\t(tag->forkNum != InvalidForkNumber) &&\n\t\t(tag->blockNum != InvalidBlockNumber);\n\t#endif\n}\n"
  },
  {
    "path": "pgxn/neon/neon_utils.h",
    "content": "#ifndef __NEON_UTILS_H__\n#define __NEON_UTILS_H__\n\n#include \"lib/stringinfo.h\"\n#include \"storage/buf_internals.h\"\n\n#ifndef WALPROPOSER_LIB\n#include <curl/curl.h>\n#endif\n\nbool\t\tHexDecodeString(uint8 *result, char *input, int nbytes);\nuint16      pq_getmsgint16(StringInfo msg);\nuint32      pq_getmsgint32(StringInfo msg);\nuint32\t\tpq_getmsgint32_le(StringInfo msg);\nuint64\t\tpq_getmsgint64_le(StringInfo msg);\nvoid\t\tpq_sendint32_le(StringInfo buf, uint32 i);\nvoid\t\tpq_sendint64_le(StringInfo buf, uint64 i);\nvoid        disable_core_dump(void);\n\n/* Buffer tag validation function */\nbool\t\tBufferTagIsValid(const BufferTag *tag);\n\n#ifndef WALPROPOSER_LIB\n\nCURL *\t\talloc_curl_handle(void);\n\n#endif\n\n#endif\t\t\t\t\t\t\t/* __NEON_UTILS_H__ */\n"
  },
  {
    "path": "pgxn/neon/neon_walreader.c",
    "content": "/*\n * Like WALRead, but when WAL segment doesn't exist locally instead of throwing\n * ERROR asynchronously tries to fetch it from the most advanced safekeeper.\n *\n * We can't use libpqwalreceiver as it blocks during connection establishment\n * (and waiting for PQExec result), so use libpqwalproposer instead.\n *\n * TODO: keepalives are currently never sent, so the other side can close the\n * connection prematurely.\n *\n * TODO: close conn if reading takes too long to prevent stuck connections.\n */\n#include \"postgres.h\"\n\n#include <sys/stat.h>\n#include <unistd.h>\n\n#include \"access/xlog_internal.h\"\n#include \"access/xlogdefs.h\"\n#include \"access/xlogreader.h\"\n#include \"libpq/pqformat.h\"\n#include \"storage/fd.h\"\n#include \"utils/memutils.h\"\n#include \"utils/wait_event.h\"\n\n#include \"libpq-fe.h\"\n\n#include \"neon_walreader.h\"\n#include \"walproposer.h\"\n\n#define NEON_WALREADER_ERR_MSG_LEN 512\n\n/*\n * Can be called where NeonWALReader *state is available in the context, adds log_prefix.\n */\n#define nwr_log(elevel, fmt, ...) elog(elevel, \"%s\" fmt, state->log_prefix, ## __VA_ARGS__)\n\nstatic NeonWALReadResult NeonWALReadRemote(NeonWALReader *state, char *buf, XLogRecPtr startptr, Size count, TimeLineID tli);\nstatic NeonWALReadResult NeonWALReaderReadMsg(NeonWALReader *state);\nstatic bool NeonWALReadLocal(NeonWALReader *state, char *buf, XLogRecPtr startptr, Size count, TimeLineID tli);\nstatic bool is_wal_segment_exists(XLogSegNo segno, int segsize,\n\t\t\t\t\t\t\t\t  TimeLineID tli);\n\n/*\n * State of connection to donor safekeeper.\n */\ntypedef enum\n{\n\t/* no remote connection */\n\tRS_NONE,\n\t/* doing PQconnectPoll, need readable socket */\n\tRS_CONNECTING_READ,\n\t/* doing PQconnectPoll, need writable socket */\n\tRS_CONNECTING_WRITE,\n\t/* Waiting for START_REPLICATION result */\n\tRS_WAIT_EXEC_RESULT,\n\t/* replication stream established */\n\tRS_ESTABLISHED,\n} NeonWALReaderRemoteState;\n\nstruct NeonWALReader\n{\n\t/*\n\t * LSN before which we assume WAL is not available locally. Exists because\n\t * though first segment after startup always exists, part before\n\t * basebackup LSN is filled with zeros.\n\t */\n\tXLogRecPtr\tavailable_lsn;\n\tWALSegmentContext segcxt;\n\tWALOpenSegment seg;\n\tint\t\t\twre_errno;\n\tTimeLineID\tlocal_active_tlid;\n\t/* Explains failure to read, static for simplicity. */\n\tchar\t\terr_msg[NEON_WALREADER_ERR_MSG_LEN];\n\n\t/*\n\t * Saved info about request in progress, used to check validity of\n\t * arguments after resume and remember how far we accomplished it. req_lsn\n\t * is 0 if there is no request in progress.\n\t */\n\tXLogRecPtr\treq_lsn;\n\tSize\t\treq_len;\n\tSize\t\treq_progress;\n\tchar\t\tdonor_conninfo[MAXCONNINFO];\n\tchar\t\tdonor_name[64]; /* saved donor safekeeper name for logging */\n\tXLogRecPtr\tdonor_lsn;\n\t/* state of connection to safekeeper */\n\tNeonWALReaderRemoteState rem_state;\n\tWalProposerConn *wp_conn;\n\n\t/*\n\t * position in wp_conn recvbuf from which we'll copy WAL next time, or\n\t * NULL if there is no unprocessed message\n\t */\n\tchar\t   *wal_ptr;\n\tSize\t\twal_rem_len;\t/* how many unprocessed bytes left in recvbuf */\n\n\t/*\n\t * LSN of wal_ptr position according to walsender to cross check against\n\t * read request\n\t */\n\tXLogRecPtr\trem_lsn;\n\n\t/* prepended to lines logged by neon_walreader, if provided */\n\tchar\t\tlog_prefix[64];\n};\n\n/* palloc and initialize NeonWALReader */\nNeonWALReader *\nNeonWALReaderAllocate(int wal_segment_size, XLogRecPtr available_lsn, char *log_prefix, TimeLineID tlid)\n{\n\tNeonWALReader *reader;\n\n\t/*\n\t * Note: we allocate in TopMemoryContext, reusing the reader for all process\n\t * reads.\n\t */\n\treader = (NeonWALReader *)\n\t\tMemoryContextAllocZero(TopMemoryContext, sizeof(NeonWALReader));\n\n\treader->available_lsn = available_lsn;\n\treader->local_active_tlid = tlid;\n\treader->seg.ws_file = -1;\n\treader->seg.ws_segno = 0;\n\treader->seg.ws_tli = 0;\n\treader->segcxt.ws_segsize = wal_segment_size;\n\n\treader->rem_state = RS_NONE;\n\n\tif (log_prefix)\n\t\tstrlcpy(reader->log_prefix, log_prefix, sizeof(reader->log_prefix));\n\n\treturn reader;\n}\n\nvoid\nNeonWALReaderFree(NeonWALReader *state)\n{\n\tif (state->seg.ws_file != -1)\n\t\tneon_wal_segment_close(state);\n\tif (state->wp_conn)\n\t\tlibpqwp_disconnect(state->wp_conn);\n\tpfree(state);\n}\n\n/*\n * Like vanilla WALRead, but if requested position is before available_lsn or\n * WAL segment doesn't exist on disk, it tries to fetch needed segment from the\n * advanced safekeeper.\n *\n * Read 'count' bytes into 'buf', starting at location 'startptr', from WAL\n * fetched from timeline 'tli'.\n *\n * Returns NEON_WALREAD_SUCCESS if succeeded, NEON_WALREAD_ERROR if an error\n * occurs, in which case 'err' has the description. Error always closes remote\n * connection, if there was any, so socket subscription should be removed.\n *\n * NEON_WALREAD_WOULDBLOCK means caller should obtain socket to wait for with\n * NeonWALReaderSocket and call NeonWALRead again with exactly the same\n * arguments when NeonWALReaderEvents happen on the socket. Note that per libpq\n * docs during connection establishment (before first successful read) socket\n * underneath might change.\n *\n * Also, eventually walreader should switch from remote to local read; caller\n * should remove subscription to socket then by checking NeonWALReaderEvents\n * after successful read (otherwise next read might reopen the connection with\n * different socket).\n *\n * Reading not monotonically is not supported and will result in error.\n *\n * Caller should be sure that WAL up to requested LSN exists, otherwise\n * NEON_WALREAD_WOULDBLOCK might be always returned.\n */\nNeonWALReadResult\nNeonWALRead(NeonWALReader *state, char *buf, XLogRecPtr startptr, Size count, TimeLineID tli)\n{\n\t/*\n\t * If requested data is before known available basebackup lsn or there is\n\t * already active remote state, do remote read.\n\t */\n\tif (startptr < state->available_lsn || state->rem_state != RS_NONE)\n\t{\n\t\treturn NeonWALReadRemote(state, buf, startptr, count, tli);\n\t}\n\tif (NeonWALReadLocal(state, buf, startptr, count, tli))\n\t{\n\t\treturn NEON_WALREAD_SUCCESS;\n\t}\n\telse if (state->wre_errno == ENOENT)\n\t{\n\t\tnwr_log(LOG, \"local read at %X/%X len %zu failed as segment file doesn't exist, attempting remote\",\n\t\t\t\tLSN_FORMAT_ARGS(startptr), count);\n\t\treturn NeonWALReadRemote(state, buf, startptr, count, tli);\n\t}\n\telse\n\t{\n\t\treturn NEON_WALREAD_ERROR;\n\t}\n}\n\n/* Do the read from remote safekeeper. */\nstatic NeonWALReadResult\nNeonWALReadRemote(NeonWALReader *state, char *buf, XLogRecPtr startptr, Size count, TimeLineID tli)\n{\n\tif (state->rem_state == RS_NONE)\n\t{\n\t\tif (!NeonWALReaderUpdateDonor(state))\n\t\t{\n\t\t\tsnprintf(state->err_msg, sizeof(state->err_msg),\n\t\t\t\t\t \"failed to establish remote connection to fetch WAL: no donor available\");\n\t\t\treturn NEON_WALREAD_ERROR;\n\n\t\t}\n\t\t/* no connection yet; start one */\n\t\tnwr_log(LOG, \"establishing connection to %s, lsn=%X/%X to fetch WAL\", state->donor_name, LSN_FORMAT_ARGS(state->donor_lsn));\n\t\tstate->wp_conn = libpqwp_connect_start(state->donor_conninfo);\n\t\tif (PQstatus(state->wp_conn->pg_conn) == CONNECTION_BAD)\n\t\t{\n\t\t\tsnprintf(state->err_msg, sizeof(state->err_msg),\n\t\t\t\t\t \"failed to connect to %s to fetch WAL: immediately failed with %s\",\n\t\t\t\t\t state->donor_name, PQerrorMessage(state->wp_conn->pg_conn));\n\t\t\tNeonWALReaderResetRemote(state);\n\t\t\treturn NEON_WALREAD_ERROR;\n\t\t}\n\t\t/* we'll poll immediately */\n\t\tstate->rem_state = RS_CONNECTING_WRITE;\n\t\treturn NEON_WALREAD_WOULDBLOCK;\n\t}\n\n\tif (state->rem_state == RS_CONNECTING_READ || state->rem_state == RS_CONNECTING_WRITE)\n\t{\n\t\tswitch (PQconnectPoll(state->wp_conn->pg_conn))\n\t\t{\n\t\t\tcase PGRES_POLLING_FAILED:\n\t\t\t\tsnprintf(state->err_msg, sizeof(state->err_msg),\n\t\t\t\t\t\t \"failed to connect to %s to fetch WAL: poll error: %s\",\n\t\t\t\t\t\t state->donor_name, PQerrorMessage(state->wp_conn->pg_conn));\n\t\t\t\tNeonWALReaderResetRemote(state);\n\t\t\t\treturn NEON_WALREAD_ERROR;\n\t\t\tcase PGRES_POLLING_READING:\n\t\t\t\tstate->rem_state = RS_CONNECTING_READ;\n\t\t\t\treturn NEON_WALREAD_WOULDBLOCK;\n\t\t\tcase PGRES_POLLING_WRITING:\n\t\t\t\tstate->rem_state = RS_CONNECTING_WRITE;\n\t\t\t\treturn NEON_WALREAD_WOULDBLOCK;\n\t\t\tcase PGRES_POLLING_OK:\n\t\t\t\t{\n\t\t\t\t\t/* connection successfully established */\n\t\t\t\t\tchar\t\tstart_repl_query[128];\n\t\t\t\t\tterm_t\t\tterm = pg_atomic_read_u64(&GetWalpropShmemState()->mineLastElectedTerm);\n\n\t\t\t\t\t/*\n\t\t\t\t\t * Set elected walproposer's term to pull only data from\n\t\t\t\t\t * its history. Note: for logical walsender it means we\n\t\t\t\t\t * might stream WAL not yet committed by safekeepers. It\n\t\t\t\t\t * would be cleaner to fix this.\n\t\t\t\t\t *\n\t\t\t\t\t * mineLastElectedTerm shouldn't be 0 at this point\n\t\t\t\t\t * because we checked above that donor exists and it\n\t\t\t\t\t * appears only after successfull election.\n\t\t\t\t\t */\n\t\t\t\t\tAssert(term > 0);\n\t\t\t\t\tsnprintf(start_repl_query, sizeof(start_repl_query),\n\t\t\t\t\t\t\t \"START_REPLICATION PHYSICAL %X/%X (term='\" UINT64_FORMAT \"')\",\n\t\t\t\t\t\t\t LSN_FORMAT_ARGS(startptr), term);\n\t\t\t\t\tnwr_log(LOG, \"connection to %s to fetch WAL succeeded, running %s\",\n\t\t\t\t\t\t\tstate->donor_name, start_repl_query);\n\t\t\t\t\tif (!libpqwp_send_query(state->wp_conn, start_repl_query))\n\t\t\t\t\t{\n\t\t\t\t\t\tsnprintf(state->err_msg, sizeof(state->err_msg),\n\t\t\t\t\t\t\t\t \"failed to send %s query to %s: %s\",\n\t\t\t\t\t\t\t\t start_repl_query, state->donor_name, PQerrorMessage(state->wp_conn->pg_conn));\n\t\t\t\t\t\tNeonWALReaderResetRemote(state);\n\t\t\t\t\t\treturn NEON_WALREAD_ERROR;\n\t\t\t\t\t}\n\t\t\t\t\tstate->rem_state = RS_WAIT_EXEC_RESULT;\n\t\t\t\t\tbreak;\n\t\t\t\t}\n\n\t\t\tdefault:\t\t\t/* there is unused PGRES_POLLING_ACTIVE */\n\t\t\t\tAssert(false);\n\t\t\t\treturn NEON_WALREAD_ERROR;\t/* keep the compiler quiet */\n\t\t}\n\t}\n\n\tif (state->rem_state == RS_WAIT_EXEC_RESULT)\n\t{\n\t\tswitch (libpqwp_get_query_result(state->wp_conn))\n\t\t{\n\t\t\tcase WP_EXEC_SUCCESS_COPYBOTH:\n\t\t\t\tstate->rem_state = RS_ESTABLISHED;\n\t\t\t\tbreak;\n\t\t\tcase WP_EXEC_NEEDS_INPUT:\n\t\t\t\treturn NEON_WALREAD_WOULDBLOCK;\n\t\t\tcase WP_EXEC_FAILED:\n\t\t\t\tsnprintf(state->err_msg, sizeof(state->err_msg),\n\t\t\t\t\t\t \"get START_REPLICATION result from %s failed: %s\",\n\t\t\t\t\t\t state->donor_name, PQerrorMessage(state->wp_conn->pg_conn));\n\t\t\t\tNeonWALReaderResetRemote(state);\n\t\t\t\treturn NEON_WALREAD_ERROR;\n\t\t\tdefault:\t\t\t/* can't happen */\n\t\t\t\tsnprintf(state->err_msg, sizeof(state->err_msg),\n\t\t\t\t\t\t \"get START_REPLICATION result from %s: unexpected result\",\n\t\t\t\t\t\t state->donor_name);\n\t\t\t\tNeonWALReaderResetRemote(state);\n\t\t\t\treturn NEON_WALREAD_ERROR;\n\t\t}\n\t}\n\n\tAssert(state->rem_state == RS_ESTABLISHED);\n\n\t/*\n\t * If we had the request before, verify args are the same and advance the\n\t * result ptr according to the progress; otherwise register the request.\n\t */\n\tif (state->req_lsn != InvalidXLogRecPtr)\n\t{\n\t\tif (state->req_lsn != startptr || state->req_len != count)\n\t\t{\n\t\t\tsnprintf(state->err_msg, sizeof(state->err_msg),\n\t\t\t\t\t \"args changed during request, was %X/%X %zu, now %X/%X %zu\",\n\t\t\t\t\t LSN_FORMAT_ARGS(state->req_lsn), state->req_len, LSN_FORMAT_ARGS(startptr), count);\n\t\t\tNeonWALReaderResetRemote(state);\n\t\t\treturn NEON_WALREAD_ERROR;\n\t\t}\n\t\tnwr_log(DEBUG5, \"continuing remote read at req_lsn=%X/%X len=%zu, req_progress=%zu\",\n\t\t\t\tLSN_FORMAT_ARGS(startptr),\n\t\t\t\tcount,\n\t\t\t\tstate->req_progress);\n\t\tbuf += state->req_progress;\n\t}\n\telse\n\t{\n\t\tstate->req_lsn = startptr;\n\t\tstate->req_len = count;\n\t\tstate->req_progress = 0;\n\t\tnwr_log(DEBUG5, \"starting remote read req_lsn=%X/%X len=%zu\",\n\t\t\t\tLSN_FORMAT_ARGS(startptr),\n\t\t\t\tcount);\n\t}\n\n\twhile (true)\n\t{\n\t\tSize\t\tto_copy;\n\n\t\t/*\n\t\t * If we have no ready data, receive new message.\n\t\t */\n\t\tif (state->wal_rem_len == 0 &&\n\n\t\t/*\n\t\t * check for the sake of 0 length reads; walproposer does these for\n\t\t * heartbeats, though generally they shouldn't hit remote source.\n\t\t */\n\t\t\tstate->req_len - state->req_progress > 0)\n\t\t{\n\t\t\tNeonWALReadResult read_msg_res = NeonWALReaderReadMsg(state);\n\n\t\t\tif (read_msg_res != NEON_WALREAD_SUCCESS)\n\t\t\t\treturn read_msg_res;\n\t\t}\n\n\t\tif (state->req_lsn + state->req_progress != state->rem_lsn)\n\t\t{\n\t\t\tsnprintf(state->err_msg, sizeof(state->err_msg),\n\t\t\t\t\t \"expected remote WAL at %X/%X but got %X/%X. Non monotonic read requests could have caused this. req_lsn=%X/%X len=%zu\",\n\t\t\t\t\t LSN_FORMAT_ARGS(state->req_lsn + state->req_progress),\n\t\t\t\t\t LSN_FORMAT_ARGS(state->rem_lsn),\n\t\t\t\t\t LSN_FORMAT_ARGS(state->req_lsn),\n\t\t\t\t\t state->req_len);\n\t\t\tNeonWALReaderResetRemote(state);\n\t\t\treturn NEON_WALREAD_ERROR;\n\t\t}\n\n\t\t/* We can copy min of (available, requested) bytes. */\n\t\tto_copy =\n\t\t\tMin(state->req_len - state->req_progress, state->wal_rem_len);\n\t\tmemcpy(buf, state->wal_ptr, to_copy);\n\t\tstate->wal_ptr += to_copy;\n\t\tstate->wal_rem_len -= to_copy;\n\t\tstate->rem_lsn += to_copy;\n\t\tif (state->wal_rem_len == 0)\n\t\t\tstate->wal_ptr = NULL;\t/* freed by libpqwalproposer */\n\t\tbuf += to_copy;\n\t\tstate->req_progress += to_copy;\n\t\tif (state->req_progress == state->req_len)\n\t\t{\n\t\t\tXLogSegNo\tnext_segno;\n\t\t\tXLogSegNo\treq_segno;\n\n\t\t\tXLByteToSeg(state->req_lsn, req_segno, state->segcxt.ws_segsize);\n\t\t\tXLByteToSeg(state->rem_lsn, next_segno, state->segcxt.ws_segsize);\n\n\t\t\t/*\n\t\t\t * Request completed. If there is a chance of serving next one\n\t\t\t * locally, close the connection.\n\t\t\t */\n\t\t\tif (state->req_lsn < state->available_lsn &&\n\t\t\t\tstate->rem_lsn >= state->available_lsn)\n\t\t\t{\n\t\t\t\tnwr_log(LOG, \"closing remote connection as available_lsn %X/%X crossed and next read at %X/%X is likely to be served locally\",\n\t\t\t\t\t\tLSN_FORMAT_ARGS(state->available_lsn), LSN_FORMAT_ARGS(state->rem_lsn));\n\t\t\t\tNeonWALReaderResetRemote(state);\n\t\t\t}\n\t\t\telse if (state->rem_lsn >= state->available_lsn && next_segno > req_segno &&\n\t\t\t\t\t is_wal_segment_exists(next_segno, state->segcxt.ws_segsize, tli))\n\t\t\t{\n\t\t\t\tnwr_log(LOG, \"closing remote connection as WAL file at next lsn %X/%X exists\",\n\t\t\t\t\t\tLSN_FORMAT_ARGS(state->rem_lsn));\n\t\t\t\tNeonWALReaderResetRemote(state);\n\t\t\t}\n\t\t\tstate->req_lsn = InvalidXLogRecPtr;\n\t\t\tstate->req_len = 0;\n\t\t\tstate->req_progress = 0;\n\n\t\t\t/* Update the current segment info. */\n\t\t\tstate->seg.ws_tli = tli;\n\n\t\t\treturn NEON_WALREAD_SUCCESS;\n\t\t}\n\t}\n}\n\n/*\n * Read one WAL message from the stream, sets state->wal_ptr in case of success.\n * Resets remote state in case of failure.\n */\nstatic NeonWALReadResult\nNeonWALReaderReadMsg(NeonWALReader *state)\n{\n\twhile (true)\t\t\t\t/* loop until we get 'w' */\n\t{\n\t\tchar\t   *copydata_ptr;\n\t\tint\t\t\tcopydata_size;\n\t\tStringInfoData s;\n\t\tchar\t\tmsg_type;\n\t\tint\t\t\thdrlen;\n\n\t\tAssert(state->rem_state == RS_ESTABLISHED);\n\t\tAssert(state->wal_ptr == NULL && state->wal_rem_len == 0);\n\n\t\tswitch (libpqwp_async_read(state->wp_conn,\n\t\t\t\t\t\t\t\t   &copydata_ptr,\n\t\t\t\t\t\t\t\t   &copydata_size))\n\t\t{\n\t\t\tcase PG_ASYNC_READ_SUCCESS:\n\t\t\t\tbreak;\n\t\t\tcase PG_ASYNC_READ_TRY_AGAIN:\n\t\t\t\treturn NEON_WALREAD_WOULDBLOCK;\n\t\t\tcase PG_ASYNC_READ_FAIL:\n\t\t\t\tsnprintf(state->err_msg,\n\t\t\t\t\t\t sizeof(state->err_msg),\n\t\t\t\t\t\t \"req_lsn=%X/%X, req_len=%zu, req_progress=%zu, get copydata failed: %s\",\n\t\t\t\t\t\t LSN_FORMAT_ARGS(state->req_lsn),\n\t\t\t\t\t\t state->req_len,\n\t\t\t\t\t\t state->req_progress,\n\t\t\t\t\t\t PQerrorMessage(state->wp_conn->pg_conn));\n\t\t\t\tgoto err;\n\t\t}\n\n\t\t/* put data on StringInfo to parse */\n\t\ts.data = copydata_ptr;\n\t\ts.len = copydata_size;\n\t\ts.cursor = 0;\n\t\ts.maxlen = -1;\n\n\t\tif (copydata_size == 0)\n\t\t{\n\t\t\tsnprintf(state->err_msg,\n\t\t\t\t\t sizeof(state->err_msg),\n\t\t\t\t\t \"zero length copydata received\");\n\t\t\tgoto err;\n\t\t}\n\t\tmsg_type = pq_getmsgbyte(&s);\n\t\tswitch (msg_type)\n\t\t{\n\t\t\tcase 'w':\n\t\t\t\t{\n\t\t\t\t\tXLogRecPtr\tstart_lsn;\n\n\t\t\t\t\thdrlen = sizeof(int64) + sizeof(int64) + sizeof(int64);\n\t\t\t\t\tif (s.len - s.cursor < hdrlen)\n\t\t\t\t\t{\n\t\t\t\t\t\tsnprintf(state->err_msg,\n\t\t\t\t\t\t\t\t sizeof(state->err_msg),\n\t\t\t\t\t\t\t\t \"invalid WAL message received from primary\");\n\t\t\t\t\t\tgoto err;\n\t\t\t\t\t}\n\n\t\t\t\t\tstart_lsn = pq_getmsgint64(&s);\n\t\t\t\t\tpq_getmsgint64(&s); /* XLogRecPtr\tend_lsn; */\n\t\t\t\t\tpq_getmsgint64(&s); /* TimestampTz send_time */\n\n\t\t\t\t\tstate->rem_lsn = start_lsn;\n\t\t\t\t\tstate->wal_rem_len = (Size) (s.len - s.cursor);\n\t\t\t\t\tstate->wal_ptr = (char *) pq_getmsgbytes(&s, s.len - s.cursor);\n\t\t\t\t\tnwr_log(DEBUG5, \"received WAL msg at %X/%X len %zu\",\n\t\t\t\t\t\t\tLSN_FORMAT_ARGS(state->rem_lsn), state->wal_rem_len);\n\n\t\t\t\t\treturn NEON_WALREAD_SUCCESS;\n\t\t\t\t}\n\t\t\tcase 'k':\n\t\t\t\t{\n\t\t\t\t\tXLogRecPtr\tend_lsn;\n\t\t\t\t\tbool\t\treply_requested;\n\n\t\t\t\t\thdrlen = sizeof(int64) + sizeof(int64) + sizeof(char);\n\t\t\t\t\tif (s.len - s.cursor < hdrlen)\n\t\t\t\t\t{\n\t\t\t\t\t\tsnprintf(state->err_msg, sizeof(state->err_msg),\n\t\t\t\t\t\t\t\t \"invalid keepalive message received from primary\");\n\t\t\t\t\t\tgoto err;\n\t\t\t\t\t}\n\n\t\t\t\t\tend_lsn = pq_getmsgint64(&s);\n\t\t\t\t\tpq_getmsgint64(&s); /* TimestampTz timestamp; */\n\t\t\t\t\treply_requested = pq_getmsgbyte(&s);\n\t\t\t\t\tnwr_log(DEBUG5, \"received keepalive end_lsn=%X/%X reply_requested=%d\",\n\t\t\t\t\t\t\tLSN_FORMAT_ARGS(end_lsn),\n\t\t\t\t\t\t\treply_requested);\n\t\t\t\t\tif (end_lsn < state->req_lsn + state->req_len)\n\t\t\t\t\t{\n\t\t\t\t\t\tsnprintf(state->err_msg, sizeof(state->err_msg),\n\t\t\t\t\t\t\t\t \"closing remote connection: requested WAL up to %X/%X, but current donor %s has only up to %X/%X\",\n\t\t\t\t\t\t\t\t LSN_FORMAT_ARGS(state->req_lsn + state->req_len), state->donor_name, LSN_FORMAT_ARGS(end_lsn));\n\t\t\t\t\t\tgoto err;\n\t\t\t\t\t}\n\t\t\t\t\tcontinue;\n\t\t\t\t}\n\t\t\tdefault:\n\t\t\t\tnwr_log(WARNING, \"invalid replication message type %d\", msg_type);\n\t\t\t\tcontinue;\n\t\t}\n\t}\nerr:\n\tNeonWALReaderResetRemote(state);\n\treturn NEON_WALREAD_ERROR;\n}\n\n/* reset remote connection and request in progress */\nvoid\nNeonWALReaderResetRemote(NeonWALReader *state)\n{\n\tstate->req_lsn = InvalidXLogRecPtr;\n\tstate->req_len = 0;\n\tstate->req_progress = 0;\n\tstate->rem_state = RS_NONE;\n\tif (state->wp_conn)\n\t{\n\t\tlibpqwp_disconnect(state->wp_conn);\n\t\tstate->wp_conn = NULL;\n\t}\n\tstate->donor_name[0] = '\\0';\n\tstate->wal_ptr = NULL;\n\tstate->wal_rem_len = 0;\n\tstate->rem_lsn = InvalidXLogRecPtr;\n}\n\n/*\n * Return socket of connection to remote source. Must be called only when\n * connection exists (NeonWALReaderEvents returns non zero).\n */\npgsocket\nNeonWALReaderSocket(NeonWALReader *state)\n{\n\tif (!state->wp_conn)\n\t\tnwr_log(FATAL, \"NeonWALReaderSocket is called without active remote connection\");\n\treturn PQsocket(state->wp_conn->pg_conn);\n}\n\n/*\n * Whether remote connection is established. Once this is done, until successful\n * local read or error socket is stable and user can update socket events\n * instead of readding it each time.\n */\nbool\nNeonWALReaderIsRemConnEstablished(NeonWALReader *state)\n{\n\treturn state->rem_state == RS_ESTABLISHED;\n}\n\n/*\n * Whether remote connection is established. Once this is done, until successful\n * local read or error socket is stable and user can update socket events\n * instead of readding it each time.\n */\nTimeLineID\nNeonWALReaderLocalActiveTimeLineID(NeonWALReader *state)\n{\n\treturn state->local_active_tlid;\n}\n\n/*\n * Returns events user should wait on connection socket or 0 if remote\n * connection is not active.\n */\nextern uint32\nNeonWALReaderEvents(NeonWALReader *state)\n{\n\tswitch (state->rem_state)\n\t{\n\t\tcase RS_NONE:\n\t\t\treturn 0;\n\t\tcase RS_CONNECTING_READ:\n\t\t\treturn WL_SOCKET_READABLE;\n\t\tcase RS_CONNECTING_WRITE:\n\t\t\treturn WL_SOCKET_WRITEABLE;\n\t\tcase RS_WAIT_EXEC_RESULT:\n\t\tcase RS_ESTABLISHED:\n\t\t\treturn WL_SOCKET_READABLE;\n\t\tdefault:\n\t\t\tAssert(false);\n\t\t\treturn 0;\t\t\t/* make compiler happy */\n\t}\n}\n\nstatic bool\nNeonWALReadLocal(NeonWALReader *state, char *buf, XLogRecPtr startptr, Size count, TimeLineID tli)\n{\n\tchar\t   *p;\n\tXLogRecPtr\trecptr;\n\tSize\t\tnbytes;\n\n\tp = buf;\n\trecptr = startptr;\n\tnbytes = count;\n\n/* Try to read directly from WAL buffers first. */\n#if PG_MAJORVERSION_NUM >= 17\n\t{\n\t\tSize\trbytes;\n\t\trbytes = WALReadFromBuffers(p, recptr, nbytes, tli);\n\t\trecptr += rbytes;\n\t\tnbytes -= rbytes;\n\t\tp += rbytes;\n\t}\n#endif\n\n\twhile (nbytes > 0)\n\t{\n\t\tuint32\t\tstartoff;\n\t\tint\t\t\tsegbytes;\n\t\tint\t\t\treadbytes;\n\t\tXLogSegNo\tlastRemovedSegNo;\n\n\t\tstartoff = XLogSegmentOffset(recptr, state->segcxt.ws_segsize);\n\n\t\t/*\n\t\t * If the data we want is not in a segment we have open, close what we\n\t\t * have (if anything) and open the next one, using the caller's\n\t\t * provided openSegment callback.\n\t\t */\n\t\tif (state->seg.ws_file < 0 ||\n\t\t\t!XLByteInSeg(recptr, state->seg.ws_segno, state->segcxt.ws_segsize) ||\n\t\t\ttli != state->seg.ws_tli)\n\t\t{\n\t\t\tXLogSegNo\tnextSegNo;\n\n\t\t\tneon_wal_segment_close(state);\n\n\t\t\tXLByteToSeg(recptr, nextSegNo, state->segcxt.ws_segsize);\n\t\t\tif (!neon_wal_segment_open(state, nextSegNo, &tli))\n\t\t\t{\n\t\t\t\tchar\t\tfname[MAXFNAMELEN];\n\n\t\t\t\tstate->wre_errno = errno;\n\n\t\t\t\tXLogFileName(fname, tli, nextSegNo, state->segcxt.ws_segsize);\n\t\t\t\tsnprintf(state->err_msg, sizeof(state->err_msg), \"failed to open WAL segment %s while reading at %X/%X: %s\",\n\t\t\t\t\t\t fname, LSN_FORMAT_ARGS(recptr), strerror(state->wre_errno));\n\t\t\t\treturn false;\n\t\t\t}\n\n\t\t\t/* This shouldn't happen -- indicates a bug in segment_open */\n\t\t\tAssert(state->seg.ws_file >= 0);\n\n\t\t\t/* Update the current segment info. */\n\t\t\tstate->seg.ws_tli = tli;\n\t\t\tstate->seg.ws_segno = nextSegNo;\n\t\t}\n\n\t\t/* How many bytes are within this segment? */\n\t\tif (nbytes > (state->segcxt.ws_segsize - startoff))\n\t\t\tsegbytes = state->segcxt.ws_segsize - startoff;\n\t\telse\n\t\t\tsegbytes = nbytes;\n\n#ifndef FRONTEND\n\t\tpgstat_report_wait_start(WAIT_EVENT_WAL_READ);\n#endif\n\n\t\t/* Reset errno first; eases reporting non-errno-affecting errors */\n\t\terrno = 0;\n\t\treadbytes = pg_pread(state->seg.ws_file, p, segbytes, (off_t) startoff);\n\n#ifndef FRONTEND\n\t\tpgstat_report_wait_end();\n#endif\n\n\t\tif (readbytes <= 0)\n\t\t{\n\t\t\tchar\t\tfname[MAXFNAMELEN];\n\n\t\t\tXLogFileName(fname, state->seg.ws_tli, state->seg.ws_segno, state->segcxt.ws_segsize);\n\n\t\t\tif (readbytes < 0)\n\t\t\t{\n\t\t\t\tstate->wre_errno = errno;\n\t\t\t\tsnprintf(state->err_msg, sizeof(state->err_msg), \"could not read from log segment %s, offset %d: %m: %s\",\n\t\t\t\t\t\t fname, startoff, strerror(state->wre_errno));\n\t\t\t}\n\t\t\telse\n\t\t\t{\n\t\t\t\tsnprintf(state->err_msg, sizeof(state->err_msg), \"could not read from log segment %s, offset %d: %m: unexpected EOF\",\n\t\t\t\t\t\t fname, startoff);\n\t\t\t}\n\t\t\treturn false;\n\t\t}\n\n\t\t/*\n\t\t * Recheck that the segment hasn't been removed while we were reading\n\t\t * it.\n\t\t */\n\t\tlastRemovedSegNo = XLogGetLastRemovedSegno();\n\t\tif (state->seg.ws_segno <= lastRemovedSegNo)\n\t\t{\n\t\t\tchar\t\tfname[MAXFNAMELEN];\n\n\t\t\tstate->wre_errno = ENOENT;\n\n\t\t\tXLogFileName(fname, tli, state->seg.ws_segno, state->segcxt.ws_segsize);\n\t\t\tsnprintf(state->err_msg, sizeof(state->err_msg), \"WAL segment %s has been removed during the read, lastRemovedSegNo \" UINT64_FORMAT,\n\t\t\t\t\t fname, lastRemovedSegNo);\n\t\t\treturn false;\n\t\t}\n\n\t\t/* Update state for read */\n\t\trecptr += readbytes;\n\t\tnbytes -= readbytes;\n\t\tp += readbytes;\n\t}\n\n\treturn true;\n}\n\nXLogRecPtr\nNeonWALReaderGetRemLsn(NeonWALReader *state)\n{\n\treturn state->rem_lsn;\n}\n\nconst WALOpenSegment *\nNeonWALReaderGetSegment(NeonWALReader *state)\n{\n\treturn &state->seg;\n}\n\n/*\n * Copy of vanilla wal_segment_open, but returns false in case of error instead\n * of ERROR, with errno set.\n *\n * XLogReaderRoutine->segment_open callback for local pg_wal files\n */\nbool\nneon_wal_segment_open(NeonWALReader *state, XLogSegNo nextSegNo,\n\t\t\t\t\t  TimeLineID *tli_p)\n{\n\tTimeLineID\ttli = *tli_p;\n\tchar\t\tpath[MAXPGPATH];\n\n\tXLogFilePath(path, tli, nextSegNo, state->segcxt.ws_segsize);\n\tnwr_log(DEBUG5, \"opening %s\", path);\n\tstate->seg.ws_file = BasicOpenFile(path, O_RDONLY | PG_BINARY);\n\tif (state->seg.ws_file >= 0)\n\t\treturn true;\n\n\treturn false;\n}\n\nstatic bool\nis_wal_segment_exists(XLogSegNo segno, int segsize, TimeLineID tli)\n{\n\tstruct stat stat_buffer;\n\tchar\t\tpath[MAXPGPATH];\n\n\tXLogFilePath(path, tli, segno, segsize);\n\treturn stat(path, &stat_buffer) == 0;\n}\n\n/* copy of vanilla wal_segment_close with NeonWALReader */\nvoid\nneon_wal_segment_close(NeonWALReader *state)\n{\n\tif (state->seg.ws_file >= 0)\n\t{\n\t\tclose(state->seg.ws_file);\n\t\t/* need to check errno? */\n\t\tstate->seg.ws_file = -1;\n\t}\n}\n\nchar *\nNeonWALReaderErrMsg(NeonWALReader *state)\n{\n\treturn state->err_msg;\n}\n\n/*\n * Returns true if there is a donor, and false otherwise\n */\nbool\nNeonWALReaderUpdateDonor(NeonWALReader *state)\n{\n\tWalproposerShmemState *wps = GetWalpropShmemState();\n\n\tSpinLockAcquire(&wps->mutex);\n\tmemcpy(state->donor_name, wps->donor_name, sizeof(state->donor_name));\n\tmemcpy(state->donor_conninfo, wps->donor_conninfo, sizeof(state->donor_conninfo));\n\tstate->donor_lsn = wps->donor_lsn;\n\tSpinLockRelease(&wps->mutex);\n\treturn state->donor_name[0] != '\\0';\n}\n"
  },
  {
    "path": "pgxn/neon/neon_walreader.h",
    "content": "#ifndef __NEON_WALREADER_H__\n#define __NEON_WALREADER_H__\n\n#include \"access/xlogdefs.h\"\n\n/* forward declare so we don't have to expose the struct to the public */\nstruct NeonWALReader;\ntypedef struct NeonWALReader NeonWALReader;\n\n/* avoid including walproposer.h as it includes us */\nstruct WalProposer;\ntypedef struct WalProposer WalProposer;\n\n/* NeonWALRead return value */\ntypedef enum\n{\n\tNEON_WALREAD_SUCCESS,\n\tNEON_WALREAD_WOULDBLOCK,\n\tNEON_WALREAD_ERROR,\n} NeonWALReadResult;\n\nextern NeonWALReader *NeonWALReaderAllocate(int wal_segment_size, XLogRecPtr available_lsn, char *log_prefix, TimeLineID tlid);\nextern void NeonWALReaderFree(NeonWALReader *state);\nextern void NeonWALReaderResetRemote(NeonWALReader *state);\nextern TimeLineID NeonWALReaderLocalActiveTimeLineID(NeonWALReader *state);\nextern NeonWALReadResult NeonWALRead(NeonWALReader *state, char *buf, XLogRecPtr startptr, Size count, TimeLineID tli);\nextern pgsocket NeonWALReaderSocket(NeonWALReader *state);\nextern uint32 NeonWALReaderEvents(NeonWALReader *state);\nextern bool NeonWALReaderIsRemConnEstablished(NeonWALReader *state);\nextern char *NeonWALReaderErrMsg(NeonWALReader *state);\nextern XLogRecPtr NeonWALReaderGetRemLsn(NeonWALReader *state);\nextern const WALOpenSegment *NeonWALReaderGetSegment(NeonWALReader *state);\nextern bool neon_wal_segment_open(NeonWALReader *state, XLogSegNo nextSegNo, TimeLineID *tli_p);\nextern void neon_wal_segment_close(NeonWALReader *state);\nextern bool NeonWALReaderUpdateDonor(NeonWALReader *state);\n\n\n#endif\t\t\t\t\t\t\t/* __NEON_WALREADER_H__ */\n"
  },
  {
    "path": "pgxn/neon/pagestore_client.h",
    "content": "/*-------------------------------------------------------------------------\n *\n * pagestore_client.h\n *\n *\n * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group\n * Portions Copyright (c) 1994, Regents of the University of California\n *\n *-------------------------------------------------------------------------\n */\n#ifndef PAGESTORE_CLIENT_h\n#define PAGESTORE_CLIENT_h\n\n#include \"neon_pgversioncompat.h\"\n\n#include \"access/slru.h\"\n#include \"access/xlogdefs.h\"\n#include RELFILEINFO_HDR\n#include \"lib/stringinfo.h\"\n#include \"storage/block.h\"\n#include \"storage/buf_internals.h\"\n\n#define MAX_SHARDS 128\n#define MAX_PAGESERVER_CONNSTRING_SIZE 256\n\ntypedef enum\n{\n\t/* pagestore_client -> pagestore */\n\tT_NeonExistsRequest = 0,\n\tT_NeonNblocksRequest,\n\tT_NeonGetPageRequest,\n\tT_NeonDbSizeRequest,\n\tT_NeonGetSlruSegmentRequest,\n\t/* future tags above this line */\n\tT_NeonTestRequest = 99, /* only in cfg(feature = \"testing\") */\n\n\t/* pagestore -> pagestore_client */\n\tT_NeonExistsResponse = 100,\n\tT_NeonNblocksResponse,\n\tT_NeonGetPageResponse,\n\tT_NeonErrorResponse,\n\tT_NeonDbSizeResponse,\n\tT_NeonGetSlruSegmentResponse,\n\t/* future tags above this line */\n\tT_NeonTestResponse = 199, /* only in cfg(feature = \"testing\") */\n} NeonMessageTag;\n\ntypedef uint64 NeonRequestId;\n\n/* base struct for c-style inheritance */\ntypedef struct\n{\n\tNeonMessageTag tag;\n\tNeonRequestId reqid;\n\tXLogRecPtr\tlsn;\n\tXLogRecPtr\tnot_modified_since;\n} NeonMessage;\n\n#define messageTag(m) (((const NeonMessage *)(m))->tag)\n\n/* SLRUs downloadable from page server */\ntypedef enum {\n\tSLRU_CLOG,\n\tSLRU_MULTIXACT_MEMBERS,\n\tSLRU_MULTIXACT_OFFSETS\n} SlruKind;\n\n/*--\n * supertype of all the Neon*Request structs below.\n *\n * All requests contain two LSNs:\n *\n * lsn:                request page (or relation size, etc) at this LSN\n * not_modified_since: Hint that the page hasn't been modified between\n *                     this LSN and the request LSN (`lsn`).\n *\n * To request the latest version of a page, you can use MAX_LSN as the request\n * LSN.\n *\n * If you don't know any better, you can always set 'not_modified_since' equal\n * to 'lsn', but providing a lower value can speed up processing the request\n * in the pageserver, as it doesn't need to wait for the WAL to arrive, and it\n * can skip traversing through recent layers which we know to not contain any\n * versions for the requested page.\n *\n * These structs describe the V2 of these requests. (The old now-defunct V1\n * protocol contained just one LSN and a boolean 'latest' flag.)\n *\n * V3 version of protocol adds request ID to all requests. This request ID is also included in response\n * as well as other fields from requests, which allows to verify that we receive response for our request.\n * We copy fields from request to response to make checking more reliable: request ID is formed from process ID\n * and local counter, so in principle there can be duplicated requests IDs if process PID is reused.\n */\ntypedef NeonMessage NeonRequest;\n\ntypedef struct\n{\n\tNeonRequest hdr;\n\tNRelFileInfo rinfo;\n\tForkNumber\tforknum;\n} NeonExistsRequest;\n\ntypedef struct\n{\n\tNeonRequest hdr;\n\tNRelFileInfo rinfo;\n\tForkNumber\tforknum;\n} NeonNblocksRequest;\n\ntypedef struct\n{\n\tNeonRequest hdr;\n\tOid\t\t\tdbNode;\n} NeonDbSizeRequest;\n\ntypedef struct\n{\n\tNeonRequest hdr;\n\tNRelFileInfo rinfo;\n\tForkNumber\tforknum;\n\tBlockNumber blkno;\n} NeonGetPageRequest;\n\ntypedef struct\n{\n\tNeonRequest hdr;\n\tSlruKind\tkind;\n\tint\t\t\tsegno;\n} NeonGetSlruSegmentRequest;\n\n\n/* supertype of all the Neon*Response structs below */\ntypedef NeonMessage NeonResponse;\n\ntypedef struct\n{\n\tNeonExistsRequest req;\n\tbool\t\texists;\n} NeonExistsResponse;\n\ntypedef struct\n{\n\tNeonNblocksRequest req;\n\tuint32\t\tn_blocks;\n} NeonNblocksResponse;\n\ntypedef struct\n{\n\tNeonGetPageRequest req;\n\tchar\t\tpage[FLEXIBLE_ARRAY_MEMBER];\n} NeonGetPageResponse;\n\n#define PS_GETPAGERESPONSE_SIZE (MAXALIGN(offsetof(NeonGetPageResponse, page) + BLCKSZ))\n\ntypedef struct\n{\n\tNeonDbSizeRequest req;\n\tint64\t\tdb_size;\n} NeonDbSizeResponse;\n\ntypedef struct\n{\n\tNeonResponse req;\n\tchar\t\tmessage[FLEXIBLE_ARRAY_MEMBER]; /* null-terminated error\n\t\t\t\t\t\t\t\t\t\t\t\t * message */\n} NeonErrorResponse;\n\ntypedef struct\n{\n\tNeonGetSlruSegmentRequest req;\n\tint\t\t\tn_blocks;\n\tchar\t\tdata[BLCKSZ * SLRU_PAGES_PER_SEGMENT];\n} NeonGetSlruSegmentResponse;\n\n\nextern StringInfoData nm_pack_request(NeonRequest *msg);\nextern NeonResponse *nm_unpack_response(StringInfo s);\nextern char *nm_to_string(NeonMessage *msg);\n\n/*\n * If debug_compare_local>DEBUG_COMPARE_LOCAL_NONE, we pass through all the SMGR API\n * calls to md.c, and *also* do the calls to the Page Server. On every\n * read, compare the versions we read from local disk and Page Server,\n * and Assert that they are identical.\n */\ntypedef enum\n{\n\tDEBUG_COMPARE_LOCAL_NONE,     /* normal mode - pages are storted locally only for unlogged relations */\n\tDEBUG_COMPARE_LOCAL_PREFETCH, /* if page is found in prefetch ring, then compare it with local and return */\n\tDEBUG_COMPARE_LOCAL_LFC,      /* if page is found in LFC or prefetch ring, then compare it with local and return */\n\tDEBUG_COMPARE_LOCAL_ALL       /* always fetch page from PS and compare it with local */\n} DebugCompareLocalMode;\n\nextern int debug_compare_local;\n\n/*\n * API\n */\n\ntypedef uint16 shardno_t;\n\ntypedef struct\n{\n\t/*\n\t * Send this request to the PageServer associated with this shard.\n\t * This function assigns request_id to the request which can be extracted by caller from request struct.\n\t */\n\tbool\t\t(*send) (shardno_t  shard_no, NeonRequest * request);\n\t/*\n\t * Blocking read for the next response of this shard.\n\t *\n\t * When a CANCEL signal is handled, the connection state will be\n\t * unmodified.\n\t */\n\tNeonResponse *(*receive) (shardno_t shard_no);\n\t/*\n\t * Try get the next response from the TCP buffers, if any.\n\t * Returns NULL when the data is not yet available.\n\t *\n\t * This will raise errors only for malformed responses (we can't put them\n\t * back into connection). All other error conditions are soft errors and\n\t * return NULL as \"no response available\".\n\t */\n\tNeonResponse *(*try_receive) (shardno_t shard_no);\n\t/*\n\t * Make sure all requests are sent to PageServer.\n\t */\n\tbool\t\t(*flush) (shardno_t shard_no);\n\t/*\n\t * Disconnect from this pageserver shard.\n\t */\n\tvoid        (*disconnect) (shardno_t shard_no);\n} page_server_api;\n\nextern void prefetch_on_ps_disconnect(void);\n\nextern page_server_api *page_server;\n\nextern char *pageserver_connstring;\nextern int\tflush_every_n_requests;\nextern int\treadahead_buffer_size;\nextern char *neon_timeline;\nextern char *neon_tenant;\nextern int32 max_cluster_size;\nextern int  neon_protocol_version;\n\nextern shardno_t get_shard_number(BufferTag* tag);\n\nextern const f_smgr *smgr_neon(ProcNumber backend, NRelFileInfo rinfo);\nextern void smgr_init_neon(void);\nextern void readahead_buffer_resize(int newsize, void *extra);\n\n\n/*\n * LSN values associated with each request to the pageserver\n */\ntypedef struct\n{\n\t/*\n\t * 'request_lsn' is the main value that determines which page version to\n\t * fetch.\n\t */\n\tXLogRecPtr request_lsn;\n\n\t/*\n\t * A hint to the pageserver that the requested page hasn't been modified\n\t * between this LSN and 'request_lsn'. That allows the pageserver to\n\t * return the page faster, without waiting for 'request_lsn' to arrive in\n\t * the pageserver, as long as 'not_modified_since' has arrived.\n\t */\n\tXLogRecPtr not_modified_since;\n\n\t/*\n\t * 'effective_request_lsn' is not included in the request that's sent to\n\t * the pageserver, but is used to keep track of the latest LSN of when the\n\t * request was made. In a standby server, this is always the same as the\n\t * 'request_lsn', but in the primary we use UINT64_MAX as the\n\t * 'request_lsn' to request the latest page version, so we need this\n\t * separate field to remember that latest LSN was when the request was\n\t * made. It's needed to manage prefetch request, to verify if the response\n\t * to a prefetched request is still valid.\n\t */\n\tXLogRecPtr effective_request_lsn;\n} neon_request_lsns;\n\nextern PGDLLEXPORT void neon_read_at_lsn(NRelFileInfo rnode, ForkNumber forkNum, BlockNumber blkno,\n\t\t\t\t\t\t\t\t\t\t neon_request_lsns request_lsns, void *buffer);\nextern int64 neon_dbsize(Oid dbNode);\n\nextern void neon_get_request_lsns(NRelFileInfo rinfo, ForkNumber forknum,\n\t\t\t\t\t\t\t\t  BlockNumber blkno, neon_request_lsns *output,\n\t\t\t\t\t\t\t\t  BlockNumber nblocks);\n\n/* utils for neon relsize cache */\nextern void relsize_hash_init(void);\nextern bool get_cached_relsize(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber *size);\nextern void set_cached_relsize(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber size);\nextern void update_cached_relsize(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber size);\nextern void forget_cached_relsize(NRelFileInfo rinfo, ForkNumber forknum);\n\n#endif\t\t\t\t\t\t\t/* PAGESTORE_CLIENT_H */\n"
  },
  {
    "path": "pgxn/neon/pagestore_smgr.c",
    "content": "/*-------------------------------------------------------------------------\n *\n * pagestore_smgr.c\n *\n *\n *\n * Temporary and unlogged rels\n * ---------------------------\n *\n * Temporary and unlogged tables are stored locally, by md.c. The functions\n * here just pass the calls through to corresponding md.c functions.\n *\n * Index build operations that use the buffer cache are also handled locally,\n * just like unlogged tables. Such operations must be marked by calling\n * smgr_start_unlogged_build() and friends.\n *\n * In order to know what relations are permanent and which ones are not, we\n * have added a 'smgr_relpersistence' field to SmgrRelationData, and it is set\n * by smgropen() callers, when they have the relcache entry at hand.  However,\n * sometimes we need to open an SmgrRelation for a relation without the\n * relcache. That is needed when we evict a buffer; we might not have the\n * SmgrRelation for that relation open yet. To deal with that, the\n * 'relpersistence' can be left to zero, meaning we don't know if it's\n * permanent or not. Most operations are not allowed with relpersistence==0,\n * but smgrwrite() does work, which is what we need for buffer eviction.  and\n * smgrunlink() so that a backend doesn't need to have the relcache entry at\n * transaction commit, where relations that were dropped in the transaction\n * are unlinked.\n *\n * If smgrwrite() is called and smgr_relpersistence == 0, we check if the\n * relation file exists locally or not. If it does exist, we assume it's an\n * unlogged relation and write the page there. Otherwise it must be a\n * permanent relation, WAL-logged and stored on the page server, and we ignore\n * the write like we do for permanent relations.\n *\n *\n * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group\n * Portions Copyright (c) 1994, Regents of the University of California\n *\n *-------------------------------------------------------------------------\n */\n#include \"postgres.h\"\n\n#include \"access/parallel.h\"\n#include \"access/xact.h\"\n#include \"access/xlog.h\"\n#include \"access/xlogdefs.h\"\n#include \"access/xloginsert.h\"\n#include \"access/xlog_internal.h\"\n#include \"access/xlogutils.h\"\n#include \"catalog/pg_class.h\"\n#include \"pgstat.h\"\n#include \"postmaster/autovacuum.h\"\n#include \"postmaster/interrupt.h\"\n#include \"port/pg_iovec.h\"\n#include \"replication/walsender.h\"\n#include \"storage/bufmgr.h\"\n#include \"storage/buf_internals.h\"\n#include \"storage/fsm_internals.h\"\n#include \"storage/md.h\"\n#include \"storage/smgr.h\"\n\n#include \"bitmap.h\"\n#include \"communicator.h\"\n#include \"file_cache.h\"\n#include \"neon.h\"\n#include \"neon_lwlsncache.h\"\n#include \"neon_perf_counters.h\"\n#include \"pagestore_client.h\"\n\n#if PG_VERSION_NUM >= 150000\n#include \"access/xlogrecovery.h\"\n#endif\n\n#include \"access/nbtree.h\"\n#include \"storage/bufpage.h\"\n#include \"access/xlog_internal.h\"\n\nstatic char *hexdump_page(char *page);\n\n#define IS_LOCAL_REL(reln) (\\\n\tNInfoGetDbOid(InfoFromSMgrRel(reln)) != 0 && \\\n\t\tNInfoGetRelNumber(InfoFromSMgrRel(reln)) >= FirstNormalObjectId \\\n)\n\nconst int\tSmgrTrace = DEBUG5;\n\n/* unlogged relation build states */\ntypedef enum\n{\n\tUNLOGGED_BUILD_NOT_IN_PROGRESS = 0,\n\tUNLOGGED_BUILD_PHASE_1,\n\tUNLOGGED_BUILD_PHASE_2,\n\tUNLOGGED_BUILD_NOT_PERMANENT\n} UnloggedBuildPhase;\n\nint debug_compare_local;\n\nstatic NRelFileInfo unlogged_build_rel_info;\nstatic UnloggedBuildPhase unlogged_build_phase = UNLOGGED_BUILD_NOT_IN_PROGRESS;\n\nstatic bool neon_redo_read_buffer_filter(XLogReaderState *record, uint8 block_id);\nstatic bool (*old_redo_read_buffer_filter) (XLogReaderState *record, uint8 block_id) = NULL;\n\nstatic BlockNumber neon_nblocks(SMgrRelation reln, ForkNumber forknum);\n\n/*\n * Wrapper around log_newpage() that makes a temporary copy of the block and\n * WAL-logs that. This makes it safe to use while holding only a shared lock\n * on the page, see XLogSaveBufferForHint. We don't use XLogSaveBufferForHint\n * directly because it skips the logging if the LSN is new enough.\n */\nstatic XLogRecPtr\nlog_newpage_copy(NRelFileInfo * rinfo, ForkNumber forkNum, BlockNumber blkno,\n\t\t\t\t Page page, bool page_std)\n{\n\tPGIOAlignedBlock copied_buffer;\n\n\tmemcpy(copied_buffer.data, page, BLCKSZ);\n\treturn log_newpage(rinfo, forkNum, blkno, copied_buffer.data, page_std);\n}\n\n#if PG_MAJORVERSION_NUM >= 17\n/*\n * Wrapper around log_newpages() that makes a temporary copy of the block and\n * WAL-logs that. This makes it safe to use while holding only a shared lock\n * on the page, see XLogSaveBufferForHint. We don't use XLogSaveBufferForHint\n * directly because it skips the logging if the LSN is new enough.\n */\nstatic XLogRecPtr\nlog_newpages_copy(NRelFileInfo * rinfo, ForkNumber forkNum, BlockNumber blkno,\n\t\t\t\t  BlockNumber nblocks, Page *pages, bool page_std)\n{\n\tPGIOAlignedBlock copied_buffer[XLR_MAX_BLOCK_ID];\n\tBlockNumber\tblknos[XLR_MAX_BLOCK_ID];\n\tPage\t\tpageptrs[XLR_MAX_BLOCK_ID];\n\tint\t\t\tnregistered = 0;\n\n\tfor (int i = 0; i < nblocks; i++)\n\t{\n\t\tPage\tpage = copied_buffer[nregistered].data;\n\t\tmemcpy(page, pages[i], BLCKSZ);\n\t\tpageptrs[nregistered] = page;\n\t\tblknos[nregistered] = blkno + i;\n\n\t\t++nregistered;\n\n\t\tif (nregistered >= XLR_MAX_BLOCK_ID)\n\t\t{\n\t\t\tlog_newpages(rinfo, forkNum, nregistered, blknos, pageptrs,\n\t\t\t\t\t\t page_std);\n\t\t\tnregistered = 0;\n\t\t}\n\t}\n\n\tif (nregistered != 0)\n\t{\n\t\tlog_newpages(rinfo, forkNum, nregistered, blknos, pageptrs,\n\t\t\t\t\t page_std);\n\t}\n\n\treturn ProcLastRecPtr;\n}\n#endif /* PG_MAJORVERSION_NUM >= 17 */\n\n/*\n * Is 'buffer' identical to a freshly initialized empty heap page?\n */\nstatic bool\nPageIsEmptyHeapPage(char *buffer)\n{\n\tPGIOAlignedBlock empty_page;\n\n\tPageInit((Page) empty_page.data, BLCKSZ, 0);\n\n\treturn memcmp(buffer, empty_page.data, BLCKSZ) == 0;\n}\n\n#if PG_MAJORVERSION_NUM >= 17\nstatic void\nneon_wallog_pagev(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,\n\t\t\t\t  BlockNumber nblocks, const char **buffers, bool force)\n{\n#define BLOCK_BATCH_SIZE\t16\n\tbool\t\tlog_pages;\n\tBlockNumber\tbatch_blockno = blocknum;\n\tXLogRecPtr\tlsns[BLOCK_BATCH_SIZE];\n\tint\t\t\tbatch_size = 0;\n\n\t/*\n\t * Whenever a VM or FSM page is evicted, WAL-log it. FSM and (some) VM\n\t * changes are not WAL-logged when the changes are made, so this is our\n\t * last chance to log them, otherwise they're lost. That's OK for\n\t * correctness, the non-logged updates are not critical. But we want to\n\t * have a reasonably up-to-date VM and FSM in the page server.\n\t */\n\tlog_pages = false;\n\tif (force)\n\t{\n\t\tAssert(XLogInsertAllowed());\n\t\tlog_pages = true;\n\t}\n\telse if (XLogInsertAllowed() &&\n\t\t\t (forknum == FSM_FORKNUM || forknum == VISIBILITYMAP_FORKNUM))\n\t{\n\t\tlog_pages = true;\n\t}\n\n\tif (log_pages)\n\t{\n\t\tXLogRecPtr\trecptr;\n\t\trecptr = log_newpages_copy(&InfoFromSMgrRel(reln), forknum, blocknum,\n\t\t\t\t\t\t\t\t   nblocks, (Page *) buffers, false);\n\n\t\tfor (int i = 0; i < nblocks; i++)\n\t\t\tPageSetLSN(unconstify(char *, buffers[i]), recptr);\n\n\t\tereport(SmgrTrace,\n\t\t\t\t(errmsg(NEON_TAG \"Page %u through %u of relation %u/%u/%u.%u \"\n\t\t\t\t\t\t\t\t \"were force logged, lsn=%X/%X\",\n\t\t\t\t\t\tblocknum, blocknum + nblocks,\n\t\t\t\t\t\tRelFileInfoFmt(InfoFromSMgrRel(reln)),\n\t\t\t\t\t\tforknum, LSN_FORMAT_ARGS(recptr))));\n\t}\n\n\tfor (int i = 0; i < nblocks; i++)\n\t{\n\t\tPage\t\tpage = (Page) buffers[i];\n\t\tBlockNumber blkno = blocknum + i;\n\t\tXLogRecPtr\tlsn = PageGetLSN(page);\n\n\t\tif (lsn == InvalidXLogRecPtr)\n\t\t{\n\t\t\t/*\n\t\t\t * When PostgreSQL extends a relation, it calls smgrextend() with an\n\t\t\t * all-zeros pages, and we can just ignore that in Neon. We do need to\n\t\t\t * remember the new size, though, so that smgrnblocks() returns the\n\t\t\t * right answer after the rel has been extended. We rely on the\n\t\t\t * relsize cache for that.\n\t\t\t *\n\t\t\t * A completely empty heap page doesn't need to be WAL-logged, either.\n\t\t\t * The heapam can leave such a page behind, if e.g. an insert errors\n\t\t\t * out after initializing the page, but before it has inserted the\n\t\t\t * tuple and WAL-logged the change. When we read the page from the\n\t\t\t * page server, it will come back as all-zeros. That's OK, the heapam\n\t\t\t * will initialize an all-zeros page on first use.\n\t\t\t *\n\t\t\t * In other scenarios, evicting a dirty page with no LSN is a bad\n\t\t\t * sign: it implies that the page was not WAL-logged, and its contents\n\t\t\t * will be lost when it's evicted.\n\t\t\t */\n\t\t\tif (PageIsNew(page))\n\t\t\t{\n\t\t\t\tereport(SmgrTrace,\n\t\t\t\t\t\t(errmsg(NEON_TAG \"Page %u of relation %u/%u/%u.%u is all-zeros\",\n\t\t\t\t\t\t\t\tblkno,\n\t\t\t\t\t\t\t\tRelFileInfoFmt(InfoFromSMgrRel(reln)),\n\t\t\t\t\t\t\t\tforknum)));\n\t\t\t}\n\t\t\telse if (PageIsEmptyHeapPage(page))\n\t\t\t{\n\t\t\t\tereport(SmgrTrace,\n\t\t\t\t\t\t(errmsg(NEON_TAG \"Page %u of relation %u/%u/%u.%u is an empty heap page with no LSN\",\n\t\t\t\t\t\t\t\tblkno,\n\t\t\t\t\t\t\t\tRelFileInfoFmt(InfoFromSMgrRel(reln)),\n\t\t\t\t\t\t\t\tforknum)));\n\t\t\t}\n\t\t\telse if (forknum != FSM_FORKNUM && forknum != VISIBILITYMAP_FORKNUM)\n\t\t\t{\n\t\t\t\t/*\n\t\t\t\t * Its a bad sign if there is a page with zero LSN in the buffer\n\t\t\t\t * cache in a standby, too. However, PANICing seems like a cure\n\t\t\t\t * worse than the disease, as the damage has likely already been\n\t\t\t\t * done in the primary. So in a standby, make this an assertion,\n\t\t\t\t * and in a release build just LOG the error and soldier on. We\n\t\t\t\t * update the last-written LSN of the page with a conservative\n\t\t\t\t * value in that case, which is the last replayed LSN.\n\t\t\t\t */\n\t\t\t\tereport(RecoveryInProgress() ? LOG : PANIC,\n\t\t\t\t\t\t(errmsg(NEON_TAG \"Page %u of relation %u/%u/%u.%u is evicted with zero LSN\",\n\t\t\t\t\t\t\t\tblkno,\n\t\t\t\t\t\t\t\tRelFileInfoFmt(InfoFromSMgrRel(reln)),\n\t\t\t\t\t\t\t\tforknum)));\n\t\t\t\tAssert(false);\n\n\t\t\t\tlsn = GetXLogReplayRecPtr(NULL); /* in standby mode, soldier on */\n\t\t\t}\n\t\t}\n\t\telse\n\t\t{\n\t\t\tereport(SmgrTrace,\n\t\t\t\t\t(errmsg(NEON_TAG \"Evicting page %u of relation %u/%u/%u.%u with lsn=%X/%X\",\n\t\t\t\t\t\t\tblkno,\n\t\t\t\t\t\t\tRelFileInfoFmt(InfoFromSMgrRel(reln)),\n\t\t\t\t\t\t\tforknum, LSN_FORMAT_ARGS(lsn))));\n\t\t}\n\n\t\t/*\n\t\t * Remember the LSN on this page. When we read the page again, we must\n\t\t * read the same or newer version of it.\n\t\t */\n\t\tlsns[batch_size++] = lsn;\n\n\t\tif (batch_size >= BLOCK_BATCH_SIZE)\n\t\t{\n\t\t\tneon_set_lwlsn_block_v(lsns, InfoFromSMgrRel(reln), forknum,\n\t\t\t\t\t\t\t\t\t   batch_blockno,\n\t\t\t\t\t\t\t\t\t   batch_size);\n\t\t\tbatch_blockno += batch_size;\n\t\t\tbatch_size = 0;\n\t\t}\n\t}\n\n\tif (batch_size != 0)\n\t{\n\t\tneon_set_lwlsn_block_v(lsns, InfoFromSMgrRel(reln), forknum,\n\t\t\t\t\t\t\t\t   batch_blockno,\n\t\t\t\t\t\t\t\t   batch_size);\n\t}\n}\n#endif\n\n/*\n * A page is being evicted from the shared buffer cache. Update the\n * last-written LSN of the page, and WAL-log it if needed.\n */\n#if PG_MAJORVERSION_NUM < 16\nstatic void\nneon_wallog_page(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool force)\n#else\nstatic void\nneon_wallog_page(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const char *buffer, bool force)\n#endif\n{\n\tXLogRecPtr\tlsn = PageGetLSN((Page) buffer);\n\tbool\t\tlog_page;\n\n\t/*\n\t * Whenever a VM or FSM page is evicted, WAL-log it. FSM and (some) VM\n\t * changes are not WAL-logged when the changes are made, so this is our\n\t * last chance to log them, otherwise they're lost. That's OK for\n\t * correctness, the non-logged updates are not critical. But we want to\n\t * have a reasonably up-to-date VM and FSM in the page server.\n\t */\n\tlog_page = false;\n\tif (force)\n\t{\n\t\tAssert(XLogInsertAllowed());\n\t\tlog_page = true;\n\t}\n\telse if (XLogInsertAllowed() &&\n\t\t\t !ShutdownRequestPending &&\n\t\t\t (forknum == FSM_FORKNUM || forknum == VISIBILITYMAP_FORKNUM))\n\t{\n\t\tlog_page = true;\n\t}\n\n\tif (log_page)\n\t{\n\t\tXLogRecPtr\trecptr;\n\n\t\trecptr = log_newpage_copy(&InfoFromSMgrRel(reln), forknum, blocknum,\n\t\t\t\t\t\t\t\t  (Page) buffer, false);\n\t\tXLogFlush(recptr);\n\t\tlsn = recptr;\n\t\tereport(SmgrTrace,\n\t\t\t\t(errmsg(NEON_TAG \"Page %u of relation %u/%u/%u.%u was force logged. Evicted at lsn=%X/%X\",\n\t\t\t\t\t\tblocknum,\n\t\t\t\t\t\tRelFileInfoFmt(InfoFromSMgrRel(reln)),\n\t\t\t\t\t\tforknum, LSN_FORMAT_ARGS(lsn))));\n\t}\n\n\tif (lsn == InvalidXLogRecPtr)\n\t{\n\t\t/*\n\t\t * When PostgreSQL extends a relation, it calls smgrextend() with an\n\t\t * all-zeros pages, and we can just ignore that in Neon. We do need to\n\t\t * remember the new size, though, so that smgrnblocks() returns the\n\t\t * right answer after the rel has been extended. We rely on the\n\t\t * relsize cache for that.\n\t\t *\n\t\t * A completely empty heap page doesn't need to be WAL-logged, either.\n\t\t * The heapam can leave such a page behind, if e.g. an insert errors\n\t\t * out after initializing the page, but before it has inserted the\n\t\t * tuple and WAL-logged the change. When we read the page from the\n\t\t * page server, it will come back as all-zeros. That's OK, the heapam\n\t\t * will initialize an all-zeros page on first use.\n\t\t *\n\t\t * In other scenarios, evicting a dirty page with no LSN is a bad\n\t\t * sign: it implies that the page was not WAL-logged, and its contents\n\t\t * will be lost when it's evicted.\n\t\t */\n\t\tif (PageIsNew((Page) buffer))\n\t\t{\n\t\t\tereport(SmgrTrace,\n\t\t\t\t\t(errmsg(NEON_TAG \"Page %u of relation %u/%u/%u.%u is all-zeros\",\n\t\t\t\t\t\t\tblocknum,\n\t\t\t\t\t\t\tRelFileInfoFmt(InfoFromSMgrRel(reln)),\n\t\t\t\t\t\t\tforknum)));\n\t\t}\n\t\telse if (PageIsEmptyHeapPage((Page) buffer))\n\t\t{\n\t\t\tereport(SmgrTrace,\n\t\t\t\t\t(errmsg(NEON_TAG \"Page %u of relation %u/%u/%u.%u is an empty heap page with no LSN\",\n\t\t\t\t\t\t\tblocknum,\n\t\t\t\t\t\t\tRelFileInfoFmt(InfoFromSMgrRel(reln)),\n\t\t\t\t\t\t\tforknum)));\n\t\t}\n\t\telse if (forknum != FSM_FORKNUM && forknum != VISIBILITYMAP_FORKNUM)\n\t\t{\n\t\t\t/*\n\t\t\t * Its a bad sign if there is a page with zero LSN in the buffer\n\t\t\t * cache in a standby, too. However, PANICing seems like a cure\n\t\t\t * worse than the disease, as the damage has likely already been\n\t\t\t * done in the primary. So in a standby, make this an assertion,\n\t\t\t * and in a release build just LOG the error and soldier on. We\n\t\t\t * update the last-written LSN of the page with a conservative\n\t\t\t * value in that case, which is the last replayed LSN.\n\t\t\t */\n\t\t\tereport(RecoveryInProgress() ? LOG : PANIC,\n\t\t\t\t\t(errmsg(NEON_TAG \"Page %u of relation %u/%u/%u.%u is evicted with zero LSN\",\n\t\t\t\t\t\t\tblocknum,\n\t\t\t\t\t\t\tRelFileInfoFmt(InfoFromSMgrRel(reln)),\n\t\t\t\t\t\t\tforknum)));\n\t\t\tAssert(false);\n\n\t\t\tlsn = GetXLogReplayRecPtr(NULL); /* in standby mode, soldier on */\n\t\t}\n\t}\n\telse\n\t{\n\t\tereport(SmgrTrace,\n\t\t\t\t(errmsg(NEON_TAG \"Evicting page %u of relation %u/%u/%u.%u with lsn=%X/%X\",\n\t\t\t\t\t\tblocknum,\n\t\t\t\t\t\tRelFileInfoFmt(InfoFromSMgrRel(reln)),\n\t\t\t\t\t\tforknum, LSN_FORMAT_ARGS(lsn))));\n\t}\n\n\t/*\n\t * Remember the LSN on this page. When we read the page again, we must\n\t * read the same or newer version of it.\n\t */\n\tneon_set_lwlsn_block(lsn, InfoFromSMgrRel(reln), forknum, blocknum);\n}\n\n/*\n *\tneon_init() -- Initialize private state\n */\nstatic void\nneon_init(void)\n{\n\t/*\n\t * Sanity check that theperf counters array is sized correctly. We got\n\t * this wrong once, and the formula for max number of backends and aux\n\t * processes might well change in the future, so better safe than sorry.\n\t * This is a very cheap check so we do it even without assertions.  On\n\t * v14, this gets called before initializing MyProc, so we cannot perform\n\t * the check here. That's OK, we don't expect the logic to change in old\n\t * releases.\n\t */\n#if PG_VERSION_NUM>=150000\n\tif (MyNeonCounters >= &neon_per_backend_counters_shared[NUM_NEON_PERF_COUNTER_SLOTS])\n\t\telog(ERROR, \"MyNeonCounters points past end of array\");\n#endif\n\n\told_redo_read_buffer_filter = redo_read_buffer_filter;\n\tredo_read_buffer_filter = neon_redo_read_buffer_filter;\n\n\tif (debug_compare_local)\n\t{\n\t\tmdinit();\n\t}\n}\n\n/*\n * GetXLogInsertRecPtr uses XLogBytePosToRecPtr to convert logical insert (reserved) position\n * to physical position in WAL. It always adds SizeOfXLogShortPHD:\n *\t\tseg_offset += fullpages * XLOG_BLCKSZ + bytesleft + SizeOfXLogShortPHD;\n * so even if there are no records on the page, offset will be SizeOfXLogShortPHD.\n * It may cause problems with XLogFlush. So return pointer backward to the origin of the page.\n */\nstatic XLogRecPtr\nnm_adjust_lsn(XLogRecPtr lsn)\n{\n\t/*\n\t * If lsn points to the beging of first record on page or segment, then\n\t * \"return\" it back to the page origin\n\t */\n\tif ((lsn & (XLOG_BLCKSZ - 1)) == SizeOfXLogShortPHD)\n\t{\n\t\tlsn -= SizeOfXLogShortPHD;\n\t}\n\telse if ((lsn & (wal_segment_size - 1)) == SizeOfXLogLongPHD)\n\t{\n\t\tlsn -= SizeOfXLogLongPHD;\n\t}\n\treturn lsn;\n}\n\n\n/*\n * Return LSN for requesting pages and number of blocks from page server\n *\n * XXX: exposed so that prefetch_do_request() can call back here.\n */\nvoid\nneon_get_request_lsns(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blkno,\n\t\t\t\t\t  neon_request_lsns *output, BlockNumber nblocks)\n{\n\tXLogRecPtr\tlast_written_lsns[PG_IOV_MAX];\n\n\tAssert(nblocks <= PG_IOV_MAX);\n\n\tneon_get_lwlsn_v(rinfo, forknum, blkno, (int) nblocks, last_written_lsns);\n\n\tfor (int i = 0; i < nblocks; i++)\n\t{\n\t\tlast_written_lsns[i] = nm_adjust_lsn(last_written_lsns[i]);\n\t\tAssert(last_written_lsns[i] != InvalidXLogRecPtr);\n\t}\n\n\tif (RecoveryInProgress())\n\t{\n\t\t/*---\n\t\t * In broad strokes, a replica always requests the page at the current\n\t\t * replay LSN. But looking closer, what exactly is the replay LSN? Is\n\t\t * it the last replayed record, or the record being replayed? And does\n\t\t * the startup process performing the replay need to do something\n\t\t * differently than backends running queries? Let's take a closer look\n\t\t * at the different scenarios:\n\t\t *\n\t\t * 1. Startup process reads a page, last_written_lsn is old.\n\t\t *\n\t\t * Read the old version of the page. We will apply the WAL record on\n\t\t * it to bring it up-to-date.\n\t\t *\n\t\t * We could read the new version, with the changes from this WAL\n\t\t * record already applied, to offload the work of replaying the record\n\t\t * to the pageserver. The pageserver might not have received the WAL\n\t\t * record yet, though, so a read of the old page version and applying\n\t\t * the record ourselves is likely faster. Also, the redo function\n\t\t * might be surprised if the changes have already applied. That's\n\t\t * normal during crash recovery, but not in hot standby.\n\t\t *\n\t\t * 2. Startup process reads a page, last_written_lsn == record we're\n\t\t *    replaying.\n\t\t *\n\t\t * Can this happen? There are a few theoretical cases when it might:\n\t\t *\n\t\t * A) The redo function reads the same page twice. We had already read\n\t\t *    and applied the changes once, and now we're reading it for the\n\t\t *    second time.  That would be a rather silly thing for a redo\n\t\t *    function to do, and I'm not aware of any that would do it.\n\t\t *\n\t\t * B) The redo function modifies multiple pages, and it already\n\t\t *    applied the changes to one of the pages, released the lock on\n\t\t *    it, and is now reading a second page.  Furthermore, the first\n\t\t *    page was already evicted from the buffer cache, and also from\n\t\t *    the last-written LSN cache, so that the per-relation or global\n\t\t *    last-written LSN was already updated. All the WAL redo functions\n\t\t *    hold the locks on pages that they modify, until all the changes\n\t\t *    have been modified (?), which would make that impossible.\n\t\t *    However, we skip the locking, if the page isn't currently in the\n\t\t *    page cache (see neon_redo_read_buffer_filter below).\n\t\t *\n\t\t * Even if the one of the above cases were possible in theory, they\n\t\t * would also require the pages being modified by the redo function to\n\t\t * be immediately evicted from the page cache.\n\t\t *\n\t\t * So this probably does not happen in practice. But if it does, we\n\t\t * request the new version, including the changes from the record\n\t\t * being replayed. That seems like the correct behavior in any case.\n\t\t *\n\t\t * 3. Backend process reads a page with old last-written LSN\n\t\t *\n\t\t * Nothing special here. Read the old version.\n\t\t *\n\t\t * 4. Backend process reads a page with last_written_lsn == record being replayed\n\t\t *\n\t\t * This can happen, if the redo function has started to run, and saw\n\t\t * that the page isn't present in the page cache (see\n\t\t * neon_redo_read_buffer_filter below).  Normally, in a normal\n\t\t * Postgres server, the redo function would hold a lock on the page,\n\t\t * so we would get blocked waiting the redo function to release the\n\t\t * lock. To emulate that, wait for the WAL replay of the record to\n\t\t * finish.\n\t\t */\n\t\t/* Request the page at the end of the last fully replayed LSN. */\n\t\tXLogRecPtr replay_lsn = GetXLogReplayRecPtr(NULL);\n\n\t\tfor (int i = 0; i < nblocks; i++)\n\t\t{\n\t\t\tneon_request_lsns *result = &output[i];\n\t\t\tXLogRecPtr\tlast_written_lsn = last_written_lsns[i];\n\n\t\t\tif (last_written_lsn > replay_lsn)\n\t\t\t{\n\t\t\t\t/* GetCurrentReplayRecPtr was introduced in v15 */\n#if PG_VERSION_NUM >= 150000\n\t\t\t\tAssert(last_written_lsn == GetCurrentReplayRecPtr(NULL));\n#endif\n\n\t\t\t\t/*\n\t\t\t\t * Cases 2 and 4. If this is a backend (case 4), the\n\t\t\t\t * neon_read_at_lsn() call later will wait for the WAL record to be\n\t\t\t\t * fully replayed.\n\t\t\t\t */\n\t\t\t\tresult->request_lsn = last_written_lsn;\n\t\t\t}\n\t\t\telse\n\t\t\t{\n\t\t\t\t/* cases 1 and 3 */\n\t\t\t\tresult->request_lsn = replay_lsn;\n\t\t\t}\n\n\t\t\tresult->not_modified_since = last_written_lsn;\n\t\t\tresult->effective_request_lsn = result->request_lsn;\n\t\t\tAssert(last_written_lsn <= result->request_lsn);\n\n\t\t\tneon_log(DEBUG1, \"neon_get_request_lsns request lsn %X/%X, not_modified_since %X/%X\",\n\t\t\t\t\t LSN_FORMAT_ARGS(result->request_lsn), LSN_FORMAT_ARGS(result->not_modified_since));\n\t\t}\n\t}\n\telse\n\t{\n\t\tXLogRecPtr\tflushlsn;\n#if PG_VERSION_NUM >= 150000\n\t\tflushlsn = GetFlushRecPtr(NULL);\n#else\n\t\tflushlsn = GetFlushRecPtr();\n#endif\n\n\t\tfor (int i = 0; i < nblocks; i++)\n\t\t{\n\t\t\tneon_request_lsns *result = &output[i];\n\t\t\tXLogRecPtr\tlast_written_lsn = last_written_lsns[i];\n\n\t\t\t/*\n\t\t\t * Use the latest LSN that was evicted from the buffer cache as the\n\t\t\t * 'not_modified_since' hint. Any pages modified by later WAL records\n\t\t\t * must still in the buffer cache, so our request cannot concern\n\t\t\t * those.\n\t\t\t */\n\t\t\tneon_log(DEBUG1, \"neon_get_request_lsns GetLastWrittenLSN lsn %X/%X\",\n\t\t\t\t\t LSN_FORMAT_ARGS(last_written_lsn));\n\n\t\t\t/*\n\t\t\t * Is it possible that the last-written LSN is ahead of last flush\n\t\t\t * LSN? Generally not, we shouldn't evict a page from the buffer cache\n\t\t\t * before all its modifications have been safely flushed. That's the\n\t\t\t * \"WAL before data\" rule. However, such case does exist at index\n\t\t\t * building, _bt_blwritepage logs the full page without flushing WAL\n\t\t\t * before smgrextend (files are fsynced before build ends).\n\t\t\t */\n\t\t\tif (last_written_lsn > flushlsn)\n\t\t\t{\n\t\t\t\tneon_log(DEBUG5, \"last-written LSN %X/%X is ahead of last flushed LSN %X/%X\",\n\t\t\t\t\t\t LSN_FORMAT_ARGS(last_written_lsn),\n\t\t\t\t\t\t LSN_FORMAT_ARGS(flushlsn));\n\t\t\t\tXLogFlush(last_written_lsn);\n\t\t\t}\n\n\t\t\t/*\n\t\t\t * Request the very latest version of the page. In principle we\n\t\t\t * want to read the page at the current insert LSN, and we could\n\t\t\t * use that value in the request. However, there's a corner case\n\t\t\t * with pageserver's garbage collection. If the GC horizon is\n\t\t\t * set to a very small value, it's possible that by the time\n\t\t\t * that the pageserver processes our request, the GC horizon has\n\t\t\t * already moved past the LSN we calculate here. Standby servers\n\t\t\t * always have that problem as the can always lag behind the\n\t\t\t * primary, but for the primary we can avoid it by always\n\t\t\t * requesting the latest page, by setting request LSN to\n\t\t\t * UINT64_MAX.\n\t\t\t *\n\t\t\t * effective_request_lsn is used to check that received response is still valid.\n\t\t\t * In case of primary node it is last written LSN. Originally we used flush_lsn here,\n\t\t\t * but it is not correct. Consider the following scenario:\n\t\t\t * 1. Backend A wants to prefetch block X\n\t\t\t * 2. Backend A checks that block X is not present in the shared buffer cache\n\t\t\t * 3. Backend A calls prefetch_do_request, which calls neon_get_request_lsns\n\t\t\t * 4. neon_get_request_lsns obtains LwLSN=11 for the block\n\t\t\t * 5. Backend B downloads block X, updates and wallogs it with LSN=13\n\t\t\t * 6. Block X is once again evicted from shared buffers, its LwLSN is set to LSN=13\n\t\t\t * 7. Backend A is still executing in neon_get_request_lsns(). It calls 'flushlsn = GetFlushRecPtr();'.\n\t\t\t *    Let's say that it is LSN=14\n\t\t\t * 8. Backend A uses LSN=14 as effective_lsn in the prefetch slot. The request stored in the slot is\n\t\t\t *    [not_modified_since=11, effective_request_lsn=14]\n\t\t\t * 9. Backend A sends the prefetch request, pageserver processes it, and sends response.\n\t\t\t *    The last LSN that the pageserver had processed was LSN=12, so the page image in the response is valid at LSN=12.\n\t\t\t * 10. Backend A calls smgrread() for page X with LwLSN=13\n\t\t\t * 11. Backend A finds in prefetch ring the response for the prefetch request with [not_modified_since=11, effective_lsn=Lsn14],\n\t\t\t * so it satisfies neon_prefetch_response_usable condition.\n\t\t\t *\n\t\t\t * Things go wrong in step 7-8, when [not_modified_since=11, effective_request_lsn=14] is determined for the request.\n\t\t\t * That is incorrect, because the page has in fact been modified at LSN=13. The invariant is that for any request,\n\t\t\t * there should not be any modifications to a page between its not_modified_since and (effective_)request_lsn values.\n\t\t\t *\n\t\t\t * The problem can be fixed by callingGetFlushRecPtr() before checking if the page is in the buffer cache.\n\t\t\t * But you can't do that within smgrprefetch(), would need to modify the caller.\n\t\t\t */\n\t\t\tresult->request_lsn = UINT64_MAX;\n\t\t\tresult->not_modified_since = last_written_lsn;\n\t\t\tresult->effective_request_lsn = last_written_lsn;\n\t\t}\n\t}\n}\n\n/*\n *\tneon_exists() -- Does the physical file exist?\n */\nstatic bool\nneon_exists(SMgrRelation reln, ForkNumber forkNum)\n{\n\tBlockNumber n_blocks;\n\tneon_request_lsns request_lsns;\n\n\tswitch (reln->smgr_relpersistence)\n\t{\n\t\tcase 0:\n\n\t\t\t/*\n\t\t\t * We don't know if it's an unlogged rel stored locally, or\n\t\t\t * permanent rel stored in the page server. First check if it\n\t\t\t * exists locally. If it does, great. Otherwise check if it exists\n\t\t\t * in the page server.\n\t\t\t */\n\t\t\tif (mdexists(reln, forkNum))\n\t\t\t\treturn true;\n\t\t\tbreak;\n\n\t\tcase RELPERSISTENCE_PERMANENT:\n\t\t\tbreak;\n\n\t\tcase RELPERSISTENCE_TEMP:\n\t\tcase RELPERSISTENCE_UNLOGGED:\n\t\t\treturn mdexists(reln, forkNum);\n\n\t\tdefault:\n\t\t\tneon_log(ERROR, \"unknown relpersistence '%c'\", reln->smgr_relpersistence);\n\t}\n\n\tif (get_cached_relsize(InfoFromSMgrRel(reln), forkNum, &n_blocks))\n\t{\n\t\treturn true;\n\t}\n\n\t/*\n\t * \\d+ on a view calls smgrexists with 0/0/0 relfilenode. The page server\n\t * will error out if you check that, because the whole dbdir for\n\t * tablespace 0, db 0 doesn't exists. We possibly should change the page\n\t * server to accept that and return 'false', to be consistent with\n\t * mdexists(). But we probably also should fix pg_table_size() to not call\n\t * smgrexists() with bogus relfilenode.\n\t *\n\t * For now, handle that special case here.\n\t */\n#if PG_MAJORVERSION_NUM >= 16\n\tif (reln->smgr_rlocator.locator.spcOid == 0 &&\n\t\treln->smgr_rlocator.locator.dbOid == 0 &&\n\t\treln->smgr_rlocator.locator.relNumber == 0)\n#else\n\tif (reln->smgr_rnode.node.spcNode == 0 &&\n\t\treln->smgr_rnode.node.dbNode == 0 &&\n\t\treln->smgr_rnode.node.relNode == 0)\n#endif\n\t{\n\t\treturn false;\n\t}\n\n\tneon_get_request_lsns(InfoFromSMgrRel(reln), forkNum,\n\t\t\t\t\t\t  REL_METADATA_PSEUDO_BLOCKNO, &request_lsns, 1);\n\n\treturn communicator_exists(InfoFromSMgrRel(reln), forkNum, &request_lsns);\n}\n\n/*\n *\tneon_create() -- Create a new relation on neond storage\n *\n * If isRedo is true, it's okay for the relation to exist already.\n */\nstatic void\nneon_create(SMgrRelation reln, ForkNumber forkNum, bool isRedo)\n{\n\tswitch (reln->smgr_relpersistence)\n\t{\n\t\tcase 0:\n\t\t\tneon_log(ERROR, \"cannot call smgrcreate() on rel with unknown persistence\");\n\n\t\tcase RELPERSISTENCE_PERMANENT:\n\t\t\tbreak;\n\n\t\tcase RELPERSISTENCE_TEMP:\n\t\tcase RELPERSISTENCE_UNLOGGED:\n\t\t\tif (debug_compare_local)\n\t\t\t{\n\t\t\t\tmdcreate(reln, forkNum, forkNum == INIT_FORKNUM || isRedo);\n\t\t\t\tif (forkNum == MAIN_FORKNUM)\n\t\t\t\t\tmdcreate(reln, INIT_FORKNUM, true);\n\t\t\t}\n\t\t\telse\n\t\t\t{\n\t\t\t\tmdcreate(reln, forkNum, isRedo);\n\t\t\t}\n\t\t\treturn;\n\n\t\tdefault:\n\t\t\tneon_log(ERROR, \"unknown relpersistence '%c'\", reln->smgr_relpersistence);\n\t}\n\n\tneon_log(SmgrTrace, \"Create relation %u/%u/%u.%u\",\n\t\t RelFileInfoFmt(InfoFromSMgrRel(reln)),\n\t\t forkNum);\n\n\t/*\n\t * Newly created relation is empty, remember that in the relsize cache.\n\t *\n\t * Note that in REDO, this is called to make sure the relation fork\n\t * exists, but it does not truncate the relation. So, we can only update\n\t * the relsize if it didn't exist before.\n\t *\n\t * Also, in redo, we must make sure to update the cached size of the\n\t * relation, as that is the primary source of truth for REDO's file length\n\t * considerations, and as file extension isn't (perfectly) logged, we need\n\t * to take care of that before we hit file size checks.\n\t *\n\t * FIXME: This is currently not just an optimization, but required for\n\t * correctness. Postgres can call smgrnblocks() on the newly-created\n\t * relation. Currently, we don't call SetLastWrittenLSN() when a new\n\t * relation created, so if we didn't remember the size in the relsize\n\t * cache, we might call smgrnblocks() on the newly-created relation before\n\t * the creation WAL record hass been received by the page server.\n\t */\n\tif (isRedo)\n\t{\n\t\tupdate_cached_relsize(InfoFromSMgrRel(reln), forkNum, 0);\n\t\tget_cached_relsize(InfoFromSMgrRel(reln), forkNum,\n\t\t\t\t\t\t   &reln->smgr_cached_nblocks[forkNum]);\n\t}\n\telse\n\t\tset_cached_relsize(InfoFromSMgrRel(reln), forkNum, 0);\n\n\tif (debug_compare_local)\n\t{\n\t\tif (IS_LOCAL_REL(reln))\n\t\t\tmdcreate(reln, forkNum, isRedo);\n\t}\n}\n\n/*\n *\tneon_unlink() -- Unlink a relation.\n *\n * Note that we're passed a RelFileNodeBackend --- by the time this is called,\n * there won't be an SMgrRelation hashtable entry anymore.\n *\n * forkNum can be a fork number to delete a specific fork, or InvalidForkNumber\n * to delete all forks.\n *\n *\n * If isRedo is true, it's unsurprising for the relation to be already gone.\n * Also, we should remove the file immediately instead of queuing a request\n * for later, since during redo there's no possibility of creating a\n * conflicting relation.\n *\n * Note: any failure should be reported as WARNING not ERROR, because\n * we are usually not in a transaction anymore when this is called.\n */\nstatic void\nneon_unlink(NRelFileInfoBackend rinfo, ForkNumber forkNum, bool isRedo)\n{\n\t/*\n\t * Might or might not exist locally, depending on whether it's an unlogged\n\t * or permanent relation (or if debug_compare_local is set). Try to\n\t * unlink, it won't do any harm if the file doesn't exist.\n\t */\n\tmdunlink(rinfo, forkNum, isRedo);\n\tif (!NRelFileInfoBackendIsTemp(rinfo))\n\t{\n\t\tforget_cached_relsize(InfoFromNInfoB(rinfo), forkNum);\n\t}\n}\n\n/*\n *\tneon_extend() -- Add a block to the specified relation.\n *\n *\t\tThe semantics are nearly the same as mdwrite(): write at the\n *\t\tspecified position.  However, this is to be used for the case of\n *\t\textending a relation (i.e., blocknum is at or beyond the current\n *\t\tEOF).  Note that we assume writing a block beyond current EOF\n *\t\tcauses intervening file space to become filled with zeroes.\n */\nstatic void\n#if PG_MAJORVERSION_NUM < 16\nneon_extend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno,\n\t\t\tchar *buffer, bool skipFsync)\n#else\nneon_extend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno,\n\t\t\tconst void *buffer, bool skipFsync)\n#endif\n{\n\tXLogRecPtr\tlsn;\n\tBlockNumber n_blocks = 0;\n\n\tswitch (reln->smgr_relpersistence)\n\t{\n\t\tcase 0:\n\t\t\tneon_log(ERROR, \"cannot call smgrextend() on rel with unknown persistence\");\n\t\t\tbreak;\n\n\t\tcase RELPERSISTENCE_PERMANENT:\n\t\t\tif (RelFileInfoEquals(unlogged_build_rel_info, InfoFromSMgrRel(reln)))\n\t\t\t{\n\t\t\t\tmdextend(reln, forkNum, blkno, buffer, skipFsync);\n\t\t\t\treturn;\n\t\t\t}\n\t\t\tbreak;\n\n\t\tcase RELPERSISTENCE_TEMP:\n\t\tcase RELPERSISTENCE_UNLOGGED:\n\t\t\tmdextend(reln, forkNum, blkno, buffer, skipFsync);\n\t\t\treturn;\n\n\t\tdefault:\n\t\t\tneon_log(ERROR, \"unknown relpersistence '%c'\", reln->smgr_relpersistence);\n\t}\n\n\t/*\n\t * Check that the cluster size limit has not been exceeded.\n\t *\n\t * Temporary and unlogged relations are not included in the cluster size\n\t * measured by the page server, so ignore those. Autovacuum processes are\n\t * also exempt.\n\t */\n\tif (max_cluster_size > 0 &&\n\t\treln->smgr_relpersistence == RELPERSISTENCE_PERMANENT &&\n\t\t!AmAutoVacuumWorkerProcess())\n\t{\n\t\tuint64\t\tcurrent_size = GetNeonCurrentClusterSize();\n\n\t\tif (current_size >= ((uint64) max_cluster_size) * 1024 * 1024)\n\t\t\tereport(ERROR,\n\t\t\t\t\t(errcode(ERRCODE_DISK_FULL),\n\t\t\t\t\t errmsg(\"could not extend file because project size limit (%d MB) has been exceeded\",\n\t\t\t\t\t\t\tmax_cluster_size),\n\t\t\t\t\t errhint(\"This limit is defined externally by the project size limit, and internally by neon.max_cluster_size GUC\")));\n\t}\n\n\t/*\n\t * Usually Postgres doesn't extend relation on more than one page (leaving\n\t * holes). But this rule is violated in PG-15 where\n\t * CreateAndCopyRelationData call smgrextend for destination relation n\n\t * using size of source relation\n\t */\n\tn_blocks = neon_nblocks(reln, forkNum);\n\twhile (n_blocks < blkno)\n\t\tneon_wallog_page(reln, forkNum, n_blocks++, buffer, true);\n\n\tneon_wallog_page(reln, forkNum, blkno, buffer, false);\n\tset_cached_relsize(InfoFromSMgrRel(reln), forkNum, blkno + 1);\n\n\tlsn = PageGetLSN((Page) buffer);\n\tneon_log(SmgrTrace, \"smgrextend called for %u/%u/%u.%u blk %u, page LSN: %X/%08X\",\n\t\t RelFileInfoFmt(InfoFromSMgrRel(reln)),\n\t\t forkNum, blkno,\n\t\t (uint32) (lsn >> 32), (uint32) lsn);\n\n\tlfc_write(InfoFromSMgrRel(reln), forkNum, blkno, buffer);\n\n\tif (debug_compare_local)\n\t{\n\t\tif (IS_LOCAL_REL(reln))\n\t\t\tmdextend(reln, forkNum, blkno, buffer, skipFsync);\n\t}\n\n\t/*\n\t * smgr_extend is often called with an all-zeroes page, so\n\t * lsn==InvalidXLogRecPtr. An smgr_write() call will come for the buffer\n\t * later, after it has been initialized with the real page contents, and\n\t * it is eventually evicted from the buffer cache. But we need a valid LSN\n\t * to the relation metadata update now.\n\t */\n\tif (lsn == InvalidXLogRecPtr)\n\t{\n\t\tlsn = GetXLogInsertRecPtr();\n\t\tneon_set_lwlsn_block(lsn, InfoFromSMgrRel(reln), forkNum, blkno);\n\t}\n\tneon_set_lwlsn_relation(lsn, InfoFromSMgrRel(reln), forkNum);\n}\n\n#if PG_MAJORVERSION_NUM >= 16\nstatic void\nneon_zeroextend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blocknum,\n\t\t\t\tint nblocks, bool skipFsync)\n{\n\tconst PGIOAlignedBlock buffer = {0};\n\tint\t\t\tremblocks = nblocks;\n\tXLogRecPtr\tlsn = 0;\n\n\tswitch (reln->smgr_relpersistence)\n\t{\n\t\tcase 0:\n\t\t\tneon_log(ERROR, \"cannot call smgrextend() on rel with unknown persistence\");\n\t\t\tbreak;\n\n\t\tcase RELPERSISTENCE_PERMANENT:\n\t\t\tif (RelFileInfoEquals(unlogged_build_rel_info, InfoFromSMgrRel(reln)))\n\t\t\t{\n\t\t\t\tmdzeroextend(reln, forkNum, blocknum, nblocks, skipFsync);\n\t\t\t\treturn;\n\t\t\t}\n\t\t\tbreak;\n\n\t\tcase RELPERSISTENCE_TEMP:\n\t\tcase RELPERSISTENCE_UNLOGGED:\n\t\t\tmdzeroextend(reln, forkNum, blocknum, nblocks, skipFsync);\n\t\t\treturn;\n\n\t\tdefault:\n\t\t\tneon_log(ERROR, \"unknown relpersistence '%c'\", reln->smgr_relpersistence);\n\t}\n\n\tif (max_cluster_size > 0 &&\n\t\treln->smgr_relpersistence == RELPERSISTENCE_PERMANENT &&\n\t\t!AmAutoVacuumWorkerProcess())\n\t{\n\t\tuint64\t\tcurrent_size = GetNeonCurrentClusterSize();\n\n\t\tif (current_size >= ((uint64) max_cluster_size) * 1024 * 1024)\n\t\t\tereport(ERROR,\n\t\t\t\t\t(errcode(ERRCODE_DISK_FULL),\n\t\t\t\t\t errmsg(\"could not extend file because project size limit (%d MB) has been exceeded\",\n\t\t\t\t\t\t\tmax_cluster_size),\n\t\t\t\t\t errhint(\"This limit is defined by neon.max_cluster_size GUC\")));\n\t}\n\n\t/*\n\t * If a relation manages to grow to 2^32-1 blocks, refuse to extend it any\n\t * more --- we mustn't create a block whose number actually is\n\t * InvalidBlockNumber or larger.\n\t */\n\tif ((uint64) blocknum + nblocks >= (uint64) InvalidBlockNumber)\n\t\tereport(ERROR,\n\t\t\t\t(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),\n\t\t\t\t errmsg(NEON_TAG \"cannot extend file \\\"%s\\\" beyond %u blocks\",\n\t\t\t\t\t\trelpath(reln->smgr_rlocator, forkNum),\n\t\t\t\t\t\tInvalidBlockNumber)));\n\n\tif (debug_compare_local)\n\t{\n\t\tif (IS_LOCAL_REL(reln))\n\t\t\tmdzeroextend(reln, forkNum, blocknum, nblocks, skipFsync);\n\t}\n\n\t/* Don't log any pages if we're not allowed to do so. */\n\tif (!XLogInsertAllowed())\n\t\treturn;\n\n\t/* ensure we have enough xlog buffers to log max-sized records */\n\tXLogEnsureRecordSpace(Min(remblocks, (XLR_MAX_BLOCK_ID - 1)), 0);\n\n\t/*\n\t * Iterate over all the pages. They are collected into batches of\n\t * XLR_MAX_BLOCK_ID pages, and a single WAL-record is written for each\n\t * batch.\n\t */\n\twhile (remblocks > 0)\n\t{\n\t\tint\t\t\tcount = Min(remblocks, XLR_MAX_BLOCK_ID);\n\n\t\tXLogBeginInsert();\n\n\t\tfor (int i = 0; i < count; i++)\n\t\t\tXLogRegisterBlock(i, &InfoFromSMgrRel(reln), forkNum, blocknum + i,\n\t\t\t\t\t\t\t  (char *) buffer.data, REGBUF_FORCE_IMAGE | REGBUF_STANDARD);\n\n\t\tlsn = XLogInsert(RM_XLOG_ID, XLOG_FPI);\n\n\t\tfor (int i = 0; i < count; i++)\n\t\t{\n\t\t\tlfc_write(InfoFromSMgrRel(reln), forkNum, blocknum + i, buffer.data);\n\t\t\tneon_set_lwlsn_block(lsn, InfoFromSMgrRel(reln), forkNum,\n\t\t\t\t\t\t\t\t\t  blocknum + i);\n\t\t}\n\n\t\tblocknum += count;\n\t\tremblocks -= count;\n\t}\n\n\tAssert(lsn != 0);\n\n\tneon_set_lwlsn_relation(lsn, InfoFromSMgrRel(reln), forkNum);\n\tset_cached_relsize(InfoFromSMgrRel(reln), forkNum, blocknum);\n}\n#endif\n\n/*\n *  neon_open() -- Initialize newly-opened relation.\n */\nstatic void\nneon_open(SMgrRelation reln)\n{\n\t/*\n\t * We don't have anything special to do here. Call mdopen() to let md.c\n\t * initialize itself. That's only needed for temporary or unlogged\n\t * relations, but it's dirt cheap so do it always to make sure the md\n\t * fields are initialized, for debugging purposes if nothing else.\n\t */\n\tmdopen(reln);\n\n\t/* no work */\n\tneon_log(SmgrTrace, \"open noop\");\n}\n\n/*\n *\tneon_close() -- Close the specified relation, if it isn't closed already.\n */\nstatic void\nneon_close(SMgrRelation reln, ForkNumber forknum)\n{\n\t/*\n\t * Let md.c close it, if it had it open. Doesn't hurt to do this even for\n\t * permanent relations that have no local storage.\n\t */\n\tmdclose(reln, forknum);\n}\n\n\n#if PG_MAJORVERSION_NUM >= 17\n/*\n *\tneon_prefetch() -- Initiate asynchronous read of the specified block of a relation\n */\nstatic bool\nneon_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,\n\t\t\t  int nblocks)\n{\n\tBufferTag\ttag;\n\n\tswitch (reln->smgr_relpersistence)\n\t{\n\t\tcase 0:\t\t\t\t\t/* probably shouldn't happen, but ignore it */\n\t\tcase RELPERSISTENCE_PERMANENT:\n\t\t\tbreak;\n\n\t\tcase RELPERSISTENCE_TEMP:\n\t\tcase RELPERSISTENCE_UNLOGGED:\n\t\t\treturn mdprefetch(reln, forknum, blocknum, nblocks);\n\n\t\tdefault:\n\t\t\tneon_log(ERROR, \"unknown relpersistence '%c'\", reln->smgr_relpersistence);\n\t}\n\n\ttag.spcOid = reln->smgr_rlocator.locator.spcOid;\n\ttag.dbOid = reln->smgr_rlocator.locator.dbOid;\n\ttag.relNumber = reln->smgr_rlocator.locator.relNumber;\n\ttag.forkNum = forknum;\n\n\twhile (nblocks > 0)\n\t{\n\t\tint\t\titerblocks = Min(nblocks, PG_IOV_MAX);\n\t\tbits8\tlfc_present[PG_IOV_MAX / 8] = {0};\n\n\t\tif (lfc_cache_containsv(InfoFromSMgrRel(reln), forknum, blocknum,\n\t\t\t\t\t\t\t\titerblocks, lfc_present) == iterblocks)\n\t\t{\n\t\t\tnblocks -= iterblocks;\n\t\t\tblocknum += iterblocks;\n\t\t\tcontinue;\n\t\t}\n\n\t\ttag.blockNum = blocknum;\n\n\t\tcommunicator_prefetch_register_bufferv(tag, NULL, iterblocks, lfc_present);\n\n\t\tnblocks -= iterblocks;\n\t\tblocknum += iterblocks;\n\t}\n\n\tcommunicator_prefetch_pump_state();\n\n\treturn false;\n}\n\n\n#else /* PG_MAJORVERSION_NUM >= 17 */\n/*\n *\tneon_prefetch() -- Initiate asynchronous read of the specified block of a relation\n */\nstatic bool\nneon_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)\n{\n\tBufferTag\ttag;\n\n\tswitch (reln->smgr_relpersistence)\n\t{\n\t\tcase 0:\t\t\t\t\t/* probably shouldn't happen, but ignore it */\n\t\tcase RELPERSISTENCE_PERMANENT:\n\t\t\tbreak;\n\n\t\tcase RELPERSISTENCE_TEMP:\n\t\tcase RELPERSISTENCE_UNLOGGED:\n\t\t\treturn mdprefetch(reln, forknum, blocknum);\n\n\t\tdefault:\n\t\t\tneon_log(ERROR, \"unknown relpersistence '%c'\", reln->smgr_relpersistence);\n\t}\n\n\tif (lfc_cache_contains(InfoFromSMgrRel(reln), forknum, blocknum))\n\t\treturn false;\n\n\ttag.forkNum = forknum;\n\ttag.blockNum = blocknum;\n\n\tCopyNRelFileInfoToBufTag(tag, InfoFromSMgrRel(reln));\n\n\tcommunicator_prefetch_register_bufferv(tag, NULL, 1, NULL);\n\n\tcommunicator_prefetch_pump_state();\n\n\treturn false;\n}\n#endif /* PG_MAJORVERSION_NUM < 17 */\n\n\n/*\n * neon_writeback() -- Tell the kernel to write pages back to storage.\n *\n * This accepts a range of blocks because flushing several pages at once is\n * considerably more efficient than doing so individually.\n */\nstatic void\nneon_writeback(SMgrRelation reln, ForkNumber forknum,\n\t\t\t   BlockNumber blocknum, BlockNumber nblocks)\n{\n\tswitch (reln->smgr_relpersistence)\n\t{\n\t\tcase 0:\n\t\t\t/* mdwriteback() does nothing if the file doesn't exist */\n\t\t\tmdwriteback(reln, forknum, blocknum, nblocks);\n\t\t\tbreak;\n\n\t\tcase RELPERSISTENCE_PERMANENT:\n\t\t\tbreak;\n\n\t\tcase RELPERSISTENCE_TEMP:\n\t\tcase RELPERSISTENCE_UNLOGGED:\n\t\t\tmdwriteback(reln, forknum, blocknum, nblocks);\n\t\t\treturn;\n\n\t\tdefault:\n\t\t\tneon_log(ERROR, \"unknown relpersistence '%c'\", reln->smgr_relpersistence);\n\t}\n\n\t/*\n\t * TODO: WAL sync up to lwLsn for the indicated blocks\n\t * Without that sync, writeback doesn't actually guarantee the data is\n\t * persistently written, which does seem to be one of the assumed\n\t * properties of this smgr API call.\n\t */\n\tneon_log(SmgrTrace, \"writeback noop\");\n\n\tcommunicator_prefetch_pump_state();\n\n\tif (debug_compare_local)\n\t{\n\t\tif (IS_LOCAL_REL(reln))\n\t\t\tmdwriteback(reln, forknum, blocknum, nblocks);\n\t}\n}\n\n/*\n * While function is defined in the neon extension it's used within neon_test_utils directly.\n * To avoid breaking tests in the runtime please keep function signature in sync.\n */\nvoid\nneon_read_at_lsn(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,\n\t\t\t\t neon_request_lsns request_lsns, void *buffer)\n{\n\tcommunicator_read_at_lsnv(rinfo, forkNum, blkno, &request_lsns, &buffer, 1, NULL);\n}\n\nstatic void\ncompare_with_local(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno, void* buffer, XLogRecPtr request_lsn)\n{\n\tif (forkNum == MAIN_FORKNUM && IS_LOCAL_REL(reln))\n\t{\n\t\tchar\t\tpageserver_masked[BLCKSZ];\n\t\tPGIOAlignedBlock mdbuf;\n\t\tPGIOAlignedBlock mdbuf_masked;\n\n#if PG_MAJORVERSION_NUM >= 17\n\t\t{\n\t\t\tvoid* mdbuffers[1] = { mdbuf.data };\n\t\t\tmdreadv(reln, forkNum, blkno, mdbuffers, 1);\n\t\t}\n#else\n\t\tmdread(reln, forkNum, blkno, mdbuf.data);\n#endif\n\n\t\tmemcpy(pageserver_masked, buffer, BLCKSZ);\n\t\tmemcpy(mdbuf_masked.data, mdbuf.data, BLCKSZ);\n\n\t\tif (PageIsNew((Page) mdbuf.data))\n\t\t{\n\t\t\tif (!PageIsNew((Page) pageserver_masked))\n\t\t\t{\n\t\t\t\tneon_log(PANIC, \"page is new in MD but not in Page Server at blk %u in rel %u/%u/%u fork %u (request LSN %X/%08X):\\n%s\\n\",\n\t\t\t\t\t blkno,\n\t\t\t\t\t RelFileInfoFmt(InfoFromSMgrRel(reln)),\n\t\t\t\t\t forkNum,\n\t\t\t\t\t (uint32) (request_lsn >> 32), (uint32) request_lsn,\n\t\t\t\t\t hexdump_page(buffer));\n\t\t\t}\n\t\t}\n\t\telse if (PageIsNew((Page) buffer))\n\t\t{\n\t\t\tneon_log(PANIC, \"page is new in Page Server but not in MD at blk %u in rel %u/%u/%u fork %u (request LSN %X/%08X):\\n%s\\n\",\n\t\t\t\t blkno,\n\t\t\t\t RelFileInfoFmt(InfoFromSMgrRel(reln)),\n\t\t\t\t forkNum,\n\t\t\t\t (uint32) (request_lsn >> 32), (uint32) request_lsn,\n\t\t\t\t hexdump_page(mdbuf.data));\n\t\t}\n\t\telse if (PageGetSpecialSize(mdbuf.data) == 0)\n\t\t{\n\t\t\t/* assume heap */\n\t\t\tRmgrTable[RM_HEAP_ID].rm_mask(mdbuf_masked.data, blkno);\n\t\t\tRmgrTable[RM_HEAP_ID].rm_mask(pageserver_masked, blkno);\n\n\t\t\tif (memcmp(mdbuf_masked.data, pageserver_masked, BLCKSZ) != 0)\n\t\t\t{\n\t\t\t\tneon_log(PANIC, \"heap buffers differ at blk %u in rel %u/%u/%u fork %u (request LSN %X/%08X):\\n------ MD ------\\n%s\\n------ Page Server ------\\n%s\\n\",\n\t\t\t\t\t blkno,\n\t\t\t\t\t RelFileInfoFmt(InfoFromSMgrRel(reln)),\n\t\t\t\t\t forkNum,\n\t\t\t\t\t (uint32) (request_lsn >> 32), (uint32) request_lsn,\n\t\t\t\t\t hexdump_page(mdbuf_masked.data),\n\t\t\t\t\t hexdump_page(pageserver_masked));\n\t\t\t}\n\t\t}\n\t\telse if (PageGetSpecialSize(mdbuf.data) == MAXALIGN(sizeof(BTPageOpaqueData)))\n\t\t{\n\t\t\tif (((BTPageOpaqueData *) PageGetSpecialPointer(mdbuf.data))->btpo_cycleid < MAX_BT_CYCLE_ID)\n\t\t\t{\n\t\t\t\t/* assume btree */\n\t\t\t\tRmgrTable[RM_BTREE_ID].rm_mask(mdbuf_masked.data, blkno);\n\t\t\t\tRmgrTable[RM_BTREE_ID].rm_mask(pageserver_masked, blkno);\n\n\t\t\t\tif (memcmp(mdbuf_masked.data, pageserver_masked, BLCKSZ) != 0)\n\t\t\t\t{\n\t\t\t\t\tneon_log(PANIC, \"btree buffers differ at blk %u in rel %u/%u/%u fork %u (request LSN %X/%08X):\\n------ MD ------\\n%s\\n------ Page Server ------\\n%s\\n\",\n\t\t\t\t\t\t blkno,\n\t\t\t\t\t\t RelFileInfoFmt(InfoFromSMgrRel(reln)),\n\t\t\t\t\t\t forkNum,\n\t\t\t\t\t\t (uint32) (request_lsn >> 32), (uint32) request_lsn,\n\t\t\t\t\t\t hexdump_page(mdbuf_masked.data),\n\t\t\t\t\t\t hexdump_page(pageserver_masked));\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\t}\n}\n\n\n#if PG_MAJORVERSION_NUM < 17\n\n/*\n *\tneon_read() -- Read the specified block from a relation.\n */\n#if PG_MAJORVERSION_NUM < 16\nstatic void\nneon_read(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno, char *buffer)\n#else\nstatic void\nneon_read(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno, void *buffer)\n#endif\n{\n\tneon_request_lsns request_lsns;\n\tbits8\t\tpresent;\n\tvoid\t   *bufferp;\n\n\tswitch (reln->smgr_relpersistence)\n\t{\n\t\tcase 0:\n\t\t\tneon_log(ERROR, \"cannot call smgrread() on rel with unknown persistence\");\n\t\t\tbreak;\n\n\t\tcase RELPERSISTENCE_PERMANENT:\n\t\t\tif (RelFileInfoEquals(unlogged_build_rel_info, InfoFromSMgrRel(reln)))\n\t\t\t{\n\t\t\t\tmdread(reln, forkNum, blkno, buffer);\n\t\t\t\treturn;\n\t\t\t}\n\t\t\tbreak;\n\n\t\tcase RELPERSISTENCE_TEMP:\n\t\tcase RELPERSISTENCE_UNLOGGED:\n\t\t\tmdread(reln, forkNum, blkno, buffer);\n\t\t\treturn;\n\n\t\tdefault:\n\t\t\tneon_log(ERROR, \"unknown relpersistence '%c'\", reln->smgr_relpersistence);\n\t}\n\n\t/* Try to read PS results if they are available */\n\tcommunicator_prefetch_pump_state();\n\n\tneon_get_request_lsns(InfoFromSMgrRel(reln), forkNum, blkno, &request_lsns, 1);\n\n\tpresent = 0;\n\tbufferp = buffer;\n\tif (communicator_prefetch_lookupv(InfoFromSMgrRel(reln), forkNum, blkno, &request_lsns, 1, &bufferp, &present))\n\t{\n\t\t/* Prefetch hit */\n\t\tif (debug_compare_local >= DEBUG_COMPARE_LOCAL_PREFETCH)\n\t\t{\n\t\t\tcompare_with_local(reln, forkNum, blkno, buffer, request_lsns.request_lsn);\n\t\t}\n\t\tif (debug_compare_local <= DEBUG_COMPARE_LOCAL_PREFETCH)\n\t\t{\n\t\t\treturn;\n\t\t}\n\t}\n\n\t/* Try to read from local file cache */\n\tif (lfc_read(InfoFromSMgrRel(reln), forkNum, blkno, buffer))\n\t{\n\t\tMyNeonCounters->file_cache_hits_total++;\n\t\tif (debug_compare_local >= DEBUG_COMPARE_LOCAL_LFC)\n\t\t{\n\t\t\tcompare_with_local(reln, forkNum, blkno, buffer, request_lsns.request_lsn);\n\t\t}\n\t\tif (debug_compare_local <= DEBUG_COMPARE_LOCAL_LFC)\n\t\t{\n\t\t\treturn;\n\t\t}\n\t}\n\n\tneon_read_at_lsn(InfoFromSMgrRel(reln), forkNum, blkno, request_lsns, buffer);\n\n\t/*\n\t * Try to receive prefetch results once again just to make sure we don't leave the smgr code while the OS might still have buffered bytes.\n\t */\n\tcommunicator_prefetch_pump_state();\n\n\tif (debug_compare_local)\n\t{\n\t\tcompare_with_local(reln, forkNum, blkno, buffer, request_lsns.request_lsn);\n\t}\n}\n#endif /* PG_MAJORVERSION_NUM <= 16 */\n\n#if PG_MAJORVERSION_NUM >= 17\n\nstatic void\ncompare_with_localv(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno, void** buffers, BlockNumber nblocks, neon_request_lsns* request_lsns, bits8* read_pages)\n{\n\tif (forkNum == MAIN_FORKNUM && IS_LOCAL_REL(reln))\n\t{\n\t\tfor (BlockNumber i = 0; i < nblocks; i++)\n\t\t{\n\t\t\tif (BITMAP_ISSET(read_pages, i))\n\t\t\t{\n\t\t\t\tcompare_with_local(reln, forkNum, blkno + i, buffers[i], request_lsns[i].request_lsn);\n\t\t\t}\n\t\t}\n\t}\n}\n\n\nstatic void\nneon_readv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,\n\t\t   void **buffers, BlockNumber nblocks)\n{\n\tbits8\t\tread_pages[PG_IOV_MAX / 8];\n\tneon_request_lsns request_lsns[PG_IOV_MAX];\n\tint\t\t\tlfc_result;\n\tint\t\t\tprefetch_result;\n\n\tswitch (reln->smgr_relpersistence)\n\t{\n\t\tcase 0:\n\t\t\tneon_log(ERROR, \"cannot call smgrread() on rel with unknown persistence\");\n\t\t\tbreak;\n\n\t\tcase RELPERSISTENCE_PERMANENT:\n\t\t\tif (RelFileInfoEquals(unlogged_build_rel_info, InfoFromSMgrRel(reln)))\n\t\t\t{\n\t\t\t\tmdreadv(reln, forknum, blocknum, buffers, nblocks);\n\t\t\t\treturn;\n\t\t\t}\n\t\t\tbreak;\n\n\t\tcase RELPERSISTENCE_TEMP:\n\t\tcase RELPERSISTENCE_UNLOGGED:\n\t\t\tmdreadv(reln, forknum, blocknum, buffers, nblocks);\n\t\t\treturn;\n\n\t\tdefault:\n\t\t\tneon_log(ERROR, \"unknown relpersistence '%c'\", reln->smgr_relpersistence);\n\t}\n\n\tif (nblocks > PG_IOV_MAX)\n\t\tneon_log(ERROR, \"Read request too large: %d is larger than max %d\",\n\t\t\t\t nblocks, PG_IOV_MAX);\n\n\t/* Try to read PS results if they are available */\n\tcommunicator_prefetch_pump_state();\n\n\tneon_get_request_lsns(InfoFromSMgrRel(reln), forknum, blocknum,\n\t\t\t\t\t\t  request_lsns, nblocks);\n\n\tmemset(read_pages, 0, sizeof(read_pages));\n\n\tprefetch_result = communicator_prefetch_lookupv(InfoFromSMgrRel(reln), forknum,\n\t\t\t\t\t\t\t\t\t\t\t\t\tblocknum, request_lsns, nblocks,\n\t\t\t\t\t\t\t\t\t\t\t\t\tbuffers, read_pages);\n\n\tif (debug_compare_local >= DEBUG_COMPARE_LOCAL_PREFETCH)\n\t{\n\t\tcompare_with_localv(reln, forknum, blocknum, buffers, nblocks, request_lsns, read_pages);\n\t}\n\tif (debug_compare_local <= DEBUG_COMPARE_LOCAL_PREFETCH && prefetch_result == nblocks)\n\t{\n\t\treturn;\n\t}\n\tif (debug_compare_local > DEBUG_COMPARE_LOCAL_PREFETCH)\n\t{\n\t\tmemset(read_pages, 0, sizeof(read_pages));\n\t}\n\n\n\t/* Try to read from local file cache */\n\tlfc_result = lfc_readv_select(InfoFromSMgrRel(reln), forknum, blocknum, buffers,\n\t\t\t\t\t\t\t\t  nblocks, read_pages);\n\n\tif (lfc_result > 0)\n\t\tMyNeonCounters->file_cache_hits_total += lfc_result;\n\n\tif (debug_compare_local >= DEBUG_COMPARE_LOCAL_LFC)\n\t{\n\t\tcompare_with_localv(reln, forknum, blocknum, buffers, nblocks, request_lsns, read_pages);\n\t}\n\tif (debug_compare_local <= DEBUG_COMPARE_LOCAL_LFC && prefetch_result + lfc_result == nblocks)\n\t{\n\t\t/* Read all blocks from LFC, so we're done */\n\t\treturn;\n\t}\n\tif (debug_compare_local > DEBUG_COMPARE_LOCAL_LFC)\n\t{\n\t\tmemset(read_pages, 0, sizeof(read_pages));\n\t}\n\n\tcommunicator_read_at_lsnv(InfoFromSMgrRel(reln), forknum, blocknum, request_lsns,\n\t\t\t\t\t\t\t  buffers, nblocks, read_pages);\n\n\t/*\n\t * Try to receive prefetch results once again just to make sure we don't leave the smgr code while the OS might still have buffered bytes.\n\t */\n\tcommunicator_prefetch_pump_state();\n\n\tif (debug_compare_local)\n\t{\n\t\tmemset(read_pages, 0xFF, sizeof(read_pages));\n\t\tcompare_with_localv(reln, forknum, blocknum, buffers, nblocks, request_lsns, read_pages);\n\t}\n}\n#endif\n\nstatic char *\nhexdump_page(char *page)\n{\n\tStringInfoData result;\n\n\tinitStringInfo(&result);\n\n\tfor (int i = 0; i < BLCKSZ; i++)\n\t{\n\t\tif (i % 8 == 0)\n\t\t\tappendStringInfo(&result, \" \");\n\t\tif (i % 40 == 0)\n\t\t\tappendStringInfo(&result, \"\\n\");\n\t\tappendStringInfo(&result, \"%02x\", (unsigned char) (page[i]));\n\t}\n\n\treturn result.data;\n}\n\n#if PG_MAJORVERSION_NUM < 17\n/*\n *\tneon_write() -- Write the supplied block at the appropriate location.\n *\n *\t\tThis is to be used only for updating already-existing blocks of a\n *\t\trelation (ie, those before the current EOF).  To extend a relation,\n *\t\tuse mdextend().\n */\nstatic void\n#if PG_MAJORVERSION_NUM < 16\nneon_write(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool skipFsync)\n#else\nneon_write(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void *buffer, bool skipFsync)\n#endif\n{\n\tXLogRecPtr\tlsn;\n\n\tswitch (reln->smgr_relpersistence)\n\t{\n\t\tcase 0:\n\t\t\t/* This is a bit tricky. Check if the relation exists locally */\n\t\t\tif (mdexists(reln, debug_compare_local ? INIT_FORKNUM : forknum))\n\t\t\t{\n\t\t\t\t/* It exists locally. Guess it's unlogged then. */\n#if PG_MAJORVERSION_NUM >= 17\n\t\t\t\tmdwritev(reln, forknum, blocknum, &buffer, 1, skipFsync);\n#else\n\t\t\t\tmdwrite(reln, forknum, blocknum, buffer, skipFsync);\n#endif\n\t\t\t\t/*\n\t\t\t\t * We could set relpersistence now that we have determined\n\t\t\t\t * that it's local. But we don't dare to do it, because that\n\t\t\t\t * would immediately allow reads as well, which shouldn't\n\t\t\t\t * happen. We could cache it with a different 'relpersistence'\n\t\t\t\t * value, but this isn't performance critical.\n\t\t\t\t */\n\t\t\t\treturn;\n\t\t\t}\n\t\t\tbreak;\n\n\t\tcase RELPERSISTENCE_PERMANENT:\n\t\t\tif (RelFileInfoEquals(unlogged_build_rel_info, InfoFromSMgrRel(reln)))\n\t\t\t{\n#if PG_MAJORVERSION_NUM >= 17\n\t\t\t\tmdwritev(reln, forknum, blocknum, &buffer, 1, skipFsync);\n#else\n\t\t\t\tmdwrite(reln, forknum, blocknum, buffer, skipFsync);\n#endif\n\t\t\t\treturn;\n\t\t\t}\n\t\t\tbreak;\n\n\t\tcase RELPERSISTENCE_TEMP:\n\t\tcase RELPERSISTENCE_UNLOGGED:\n\t\t\t#if PG_MAJORVERSION_NUM >= 17\n\t\t\tmdwritev(reln, forknum, blocknum, &buffer, 1, skipFsync);\n\t\t\t#else\n\t\t\tmdwrite(reln, forknum, blocknum, buffer, skipFsync);\n\t\t\t#endif\n\t\t\treturn;\n\t\tdefault:\n\t\t\tneon_log(ERROR, \"unknown relpersistence '%c'\", reln->smgr_relpersistence);\n\t}\n\n\tneon_wallog_page(reln, forknum, blocknum, buffer, false);\n\n\tlsn = PageGetLSN((Page) buffer);\n\tneon_log(SmgrTrace, \"smgrwrite called for %u/%u/%u.%u blk %u, page LSN: %X/%08X\",\n\t\t RelFileInfoFmt(InfoFromSMgrRel(reln)),\n\t\t forknum, blocknum,\n\t\t (uint32) (lsn >> 32), (uint32) lsn);\n\n\tlfc_write(InfoFromSMgrRel(reln), forknum, blocknum, buffer);\n\n\tcommunicator_prefetch_pump_state();\n\n\tif (debug_compare_local)\n\t{\n\t\tif (IS_LOCAL_REL(reln))\n\t\t{\n\t\t#if PG_MAJORVERSION_NUM >= 17\n\t\t\tmdwritev(reln, forknum, blocknum, &buffer, 1, skipFsync);\n\t\t#else\n\t\t\tmdwrite(reln, forknum, blocknum, buffer, skipFsync);\n\t\t#endif\n\t\t}\n\t}\n}\n#endif\n\n\n\n#if PG_MAJORVERSION_NUM >= 17\nstatic void\nneon_writev(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno,\n\t\t\t const void **buffers, BlockNumber nblocks, bool skipFsync)\n{\n\tswitch (reln->smgr_relpersistence)\n\t{\n\t\tcase 0:\n\t\t\t/* This is a bit tricky. Check if the relation exists locally */\n\t\t\tif (mdexists(reln, debug_compare_local ? INIT_FORKNUM : forknum))\n\t\t\t{\n\t\t\t\t/* It exists locally. Guess it's unlogged then. */\n\t\t\t\tmdwritev(reln, forknum, blkno, buffers, nblocks, skipFsync);\n\n\t\t\t\t/*\n\t\t\t\t * We could set relpersistence now that we have determined\n\t\t\t\t * that it's local. But we don't dare to do it, because that\n\t\t\t\t * would immediately allow reads as well, which shouldn't\n\t\t\t\t * happen. We could cache it with a different 'relpersistence'\n\t\t\t\t * value, but this isn't performance critical.\n\t\t\t\t */\n\t\t\t\treturn;\n\t\t\t}\n\t\t\tbreak;\n\n\t\tcase RELPERSISTENCE_PERMANENT:\n\t\t\tif (RelFileInfoEquals(unlogged_build_rel_info, InfoFromSMgrRel(reln)))\n\t\t\t{\n\t\t\t\tmdwritev(reln, forknum, blkno, buffers, nblocks, skipFsync);\n\t\t\t\treturn;\n\t\t\t}\n\t\t\tbreak;\n\n\t\tcase RELPERSISTENCE_TEMP:\n\t\tcase RELPERSISTENCE_UNLOGGED:\n\t\t\tmdwritev(reln, forknum, blkno, buffers, nblocks, skipFsync);\n\t\t\treturn;\n\t\tdefault:\n\t\t\tneon_log(ERROR, \"unknown relpersistence '%c'\", reln->smgr_relpersistence);\n\t}\n\n\tneon_wallog_pagev(reln, forknum, blkno, nblocks, (const char **) buffers, false);\n\n\tlfc_writev(InfoFromSMgrRel(reln), forknum, blkno, buffers, nblocks);\n\n\tcommunicator_prefetch_pump_state();\n\n\tif (debug_compare_local)\n\t{\n\t\tif (IS_LOCAL_REL(reln))\n\t\t\tmdwritev(reln, forknum, blkno, buffers, nblocks, skipFsync);\n\t}\n}\n\n#endif\n\n/*\n *\tneon_nblocks() -- Get the number of blocks stored in a relation.\n */\nstatic BlockNumber\nneon_nblocks(SMgrRelation reln, ForkNumber forknum)\n{\n\tBlockNumber n_blocks;\n\tneon_request_lsns request_lsns;\n\n\tswitch (reln->smgr_relpersistence)\n\t{\n\t\tcase 0:\n\t\t\tneon_log(ERROR, \"cannot call smgrnblocks() on rel with unknown persistence\");\n\t\t\tbreak;\n\n\t\tcase RELPERSISTENCE_PERMANENT:\n\t\t\tif (RelFileInfoEquals(unlogged_build_rel_info, InfoFromSMgrRel(reln)))\n\t\t\t{\n\t\t\t\treturn mdnblocks(reln, forknum);\n\t\t\t}\n\t\t\tbreak;\n\n\t\tcase RELPERSISTENCE_TEMP:\n\t\tcase RELPERSISTENCE_UNLOGGED:\n\t\t\treturn mdnblocks(reln, forknum);\n\n\t\tdefault:\n\t\t\tneon_log(ERROR, \"unknown relpersistence '%c'\", reln->smgr_relpersistence);\n\t}\n\n\tif (get_cached_relsize(InfoFromSMgrRel(reln), forknum, &n_blocks))\n\t{\n\t\tneon_log(SmgrTrace, \"cached nblocks for %u/%u/%u.%u: %u blocks\",\n\t\t\t RelFileInfoFmt(InfoFromSMgrRel(reln)),\n\t\t\t forknum, n_blocks);\n\t\treturn n_blocks;\n\t}\n\n\tneon_get_request_lsns(InfoFromSMgrRel(reln), forknum,\n\t\t\t\t\t\t  REL_METADATA_PSEUDO_BLOCKNO, &request_lsns, 1);\n\n\tn_blocks = communicator_nblocks(InfoFromSMgrRel(reln), forknum, &request_lsns);\n\tupdate_cached_relsize(InfoFromSMgrRel(reln), forknum, n_blocks);\n\n\tneon_log(SmgrTrace, \"neon_nblocks: rel %u/%u/%u fork %u (request LSN %X/%08X): %u blocks\",\n\t\t\t RelFileInfoFmt(InfoFromSMgrRel(reln)),\n\t\t\t forknum,\n\t\t\t LSN_FORMAT_ARGS(request_lsns.effective_request_lsn),\n\t\t\t n_blocks);\n\n\treturn n_blocks;\n}\n\n/*\n *\tneon_db_size() -- Get the size of the database in bytes.\n */\nint64\nneon_dbsize(Oid dbNode)\n{\n\tint64\t\tdb_size;\n\tneon_request_lsns request_lsns;\n\tNRelFileInfo dummy_node = {0};\n\n\tneon_get_request_lsns(dummy_node, MAIN_FORKNUM,\n\t\t\t\t\t\t  REL_METADATA_PSEUDO_BLOCKNO, &request_lsns, 1);\n\n\tdb_size = communicator_dbsize(dbNode, &request_lsns);\n\n\tneon_log(SmgrTrace, \"neon_dbsize: db %u (request LSN %X/%08X): %ld bytes\",\n\t\t\t dbNode, LSN_FORMAT_ARGS(request_lsns.effective_request_lsn), db_size);\n\n\treturn db_size;\n}\n\n/*\n *\tneon_truncate() -- Truncate relation to specified number of blocks.\n */\nstatic void\nneon_truncate(SMgrRelation reln, ForkNumber forknum, BlockNumber old_blocks, BlockNumber nblocks)\n{\n\tXLogRecPtr\tlsn;\n\n\tswitch (reln->smgr_relpersistence)\n\t{\n\t\tcase 0:\n\t\t\tneon_log(ERROR, \"cannot call smgrtruncate() on rel with unknown persistence\");\n\t\t\tbreak;\n\n\t\tcase RELPERSISTENCE_PERMANENT:\n\t\t\tif (RelFileInfoEquals(unlogged_build_rel_info, InfoFromSMgrRel(reln)))\n\t\t\t{\n\t\t\t\tmdtruncate(reln, forknum, old_blocks, nblocks);\n\t\t\t\treturn;\n\t\t\t}\n\t\t\tbreak;\n\n\t\tcase RELPERSISTENCE_TEMP:\n\t\tcase RELPERSISTENCE_UNLOGGED:\n\t\t\tmdtruncate(reln, forknum, old_blocks, nblocks);\n\t\t\treturn;\n\n\t\tdefault:\n\t\t\tneon_log(ERROR, \"unknown relpersistence '%c'\", reln->smgr_relpersistence);\n\t}\n\n\tset_cached_relsize(InfoFromSMgrRel(reln), forknum, nblocks);\n\n\t/*\n\t * Truncating a relation drops all its buffers from the buffer cache\n\t * without calling smgrwrite() on them. But we must account for that in\n\t * our tracking of last-written-LSN all the same: any future smgrnblocks()\n\t * request must return the new size after the truncation. We don't know\n\t * what the LSN of the truncation record was, so be conservative and use\n\t * the most recently inserted WAL record's LSN.\n\t */\n\tlsn = GetXLogInsertRecPtr();\n\tlsn = nm_adjust_lsn(lsn);\n\n\t/*\n\t * Flush it, too. We don't actually care about it here, but let's uphold\n\t * the invariant that last-written LSN <= flush LSN.\n\t */\n\tXLogFlush(lsn);\n\n\t/*\n\t * Truncate may affect several chunks of relations. So we should either\n\t * update last written LSN for all of them, or update LSN for \"dummy\"\n\t * metadata block. Second approach seems more efficient. If the relation\n\t * is extended again later, the extension will update the last-written LSN\n\t * for the extended pages, so there's no harm in leaving behind obsolete\n\t * entries for the truncated chunks.\n\t */\n\tneon_set_lwlsn_relation(lsn, InfoFromSMgrRel(reln), forknum);\n\n\tif (debug_compare_local)\n\t{\n\t\tif (IS_LOCAL_REL(reln))\n\t\t\tmdtruncate(reln, forknum, old_blocks, nblocks);\n\t}\n}\n\n/*\n *\tneon_immedsync() -- Immediately sync a relation to stable storage.\n *\n * Note that only writes already issued are synced; this routine knows\n * nothing of dirty buffers that may exist inside the buffer manager.  We\n * sync active and inactive segments; smgrDoPendingSyncs() relies on this.\n * Consider a relation skipping WAL.  Suppose a checkpoint syncs blocks of\n * some segment, then mdtruncate() renders that segment inactive.  If we\n * crash before the next checkpoint syncs the newly-inactive segment, that\n * segment may survive recovery, reintroducing unwanted data into the table.\n */\nstatic void\nneon_immedsync(SMgrRelation reln, ForkNumber forknum)\n{\n\tswitch (reln->smgr_relpersistence)\n\t{\n\t\tcase 0:\n\t\t\tneon_log(ERROR, \"cannot call smgrimmedsync() on rel with unknown persistence\");\n\t\t\tbreak;\n\n\t\tcase RELPERSISTENCE_PERMANENT:\n\t\t\tbreak;\n\n\t\tcase RELPERSISTENCE_TEMP:\n\t\tcase RELPERSISTENCE_UNLOGGED:\n\t\t\tmdimmedsync(reln, forknum);\n\t\t\treturn;\n\n\t\tdefault:\n\t\t\tneon_log(ERROR, \"unknown relpersistence '%c'\", reln->smgr_relpersistence);\n\t}\n\n\tneon_log(SmgrTrace, \"[NEON_SMGR] immedsync noop\");\n\n\tcommunicator_prefetch_pump_state();\n\n\tif (debug_compare_local)\n\t{\n\t\tif (IS_LOCAL_REL(reln))\n\t\t\tmdimmedsync(reln, forknum);\n\t}\n}\n\n#if PG_MAJORVERSION_NUM >= 17\nstatic void\nneon_registersync(SMgrRelation reln, ForkNumber forknum)\n{\n\tswitch (reln->smgr_relpersistence)\n\t{\n\t\tcase 0:\n\t\t\tneon_log(ERROR, \"cannot call smgrregistersync() on rel with unknown persistence\");\n\t\t\tbreak;\n\n\t\tcase RELPERSISTENCE_PERMANENT:\n\t\t\tbreak;\n\n\t\tcase RELPERSISTENCE_TEMP:\n\t\tcase RELPERSISTENCE_UNLOGGED:\n\t\t\tmdregistersync(reln, forknum);\n\t\t\treturn;\n\n\t\tdefault:\n\t\t\tneon_log(ERROR, \"unknown relpersistence '%c'\", reln->smgr_relpersistence);\n\t}\n\n\tneon_log(SmgrTrace, \"[NEON_SMGR] registersync noop\");\n\n\tif (debug_compare_local)\n\t{\n\t\tif (IS_LOCAL_REL(reln))\n\t\t\tmdimmedsync(reln, forknum);\n\t}\n}\n#endif\n\n\n/*\n * neon_start_unlogged_build() -- Starting build operation on a rel.\n *\n * Some indexes are built in two phases, by first populating the table with\n * regular inserts, using the shared buffer cache but skipping WAL-logging,\n * and WAL-logging the whole relation after it's done. Neon relies on the\n * WAL to reconstruct pages, so we cannot use the page server in the\n * first phase when the changes are not logged.\n */\nstatic void\nneon_start_unlogged_build(SMgrRelation reln)\n{\n\t/*\n\t * Currently, there can be only one unlogged relation build operation in\n\t * progress at a time. That's enough for the current usage.\n\t */\n\tif (unlogged_build_phase != UNLOGGED_BUILD_NOT_IN_PROGRESS)\n\t\tneon_log(ERROR, \"unlogged relation build is already in progress\");\n\n\tereport(SmgrTrace,\n\t\t\t(errmsg(NEON_TAG \"starting unlogged build of relation %u/%u/%u\",\n\t\t\t\t\tRelFileInfoFmt(InfoFromSMgrRel(reln)))));\n\n\tswitch (reln->smgr_relpersistence)\n\t{\n\t\tcase 0:\n\t\t\tneon_log(ERROR, \"cannot call smgr_start_unlogged_build() on rel with unknown persistence\");\n\t\t\tbreak;\n\n\t\tcase RELPERSISTENCE_PERMANENT:\n\t\t\tbreak;\n\n\t\tcase RELPERSISTENCE_TEMP:\n\t\tcase RELPERSISTENCE_UNLOGGED:\n\t\t\tunlogged_build_rel_info = InfoFromSMgrRel(reln);\n\t\t\tunlogged_build_phase = UNLOGGED_BUILD_NOT_PERMANENT;\n\t\t\tif (debug_compare_local)\n\t\t\t{\n\t\t\t\tif (!IsParallelWorker())\n\t\t\t\t\tmdcreate(reln, INIT_FORKNUM, true);\n\t\t\t}\n\t\t\treturn;\n\n\t\tdefault:\n\t\t\tneon_log(ERROR, \"unknown relpersistence '%c'\", reln->smgr_relpersistence);\n\t}\n\n#if PG_MAJORVERSION_NUM >= 17\n\t/*\n\t * We have to disable this check for pg14-16 because sorted build of GIST index requires\n\t * to perform unlogged build several times\n\t */\n\tif (smgrnblocks(reln, MAIN_FORKNUM) != 0)\n\t\tneon_log(ERROR, \"cannot perform unlogged index build, index is not empty \");\n#endif\n\n\tunlogged_build_rel_info = InfoFromSMgrRel(reln);\n\tunlogged_build_phase = UNLOGGED_BUILD_PHASE_1;\n\n\t/*\n\t * Create the local file. In a parallel build, the leader is expected to\n\t * call this first and do it.\n\t *\n\t * FIXME: should we pass isRedo true to create the tablespace dir if it\n\t * doesn't exist? Is it needed?\n\t */\n \tif (!IsParallelWorker())\n\t{\n\t\tmdcreate(reln, debug_compare_local ? INIT_FORKNUM : MAIN_FORKNUM, false);\n\t}\n}\n\n/*\n * neon_finish_unlogged_build_phase_1()\n *\n * Call this after you have finished populating a relation in unlogged mode,\n * before you start WAL-logging it.\n */\nstatic void\nneon_finish_unlogged_build_phase_1(SMgrRelation reln)\n{\n\tAssert(RelFileInfoEquals(unlogged_build_rel_info, InfoFromSMgrRel(reln)));\n\n\tereport(SmgrTrace,\n\t\t\t(errmsg(NEON_TAG \"finishing phase 1 of unlogged build of relation %u/%u/%u\",\n\t\t\t\t\tRelFileInfoFmt((unlogged_build_rel_info)))));\n\n\tif (unlogged_build_phase == UNLOGGED_BUILD_NOT_PERMANENT)\n\t\treturn;\n\n\tAssert(unlogged_build_phase == UNLOGGED_BUILD_PHASE_1);\n\n\t/*\n\t * In a parallel build, (only) the leader process performs the 2nd\n\t * phase.\n\t */\n\tif (IsParallelWorker())\n\t{\n\t\tNRelFileInfoInvalidate(unlogged_build_rel_info);\n\t\tunlogged_build_phase = UNLOGGED_BUILD_NOT_IN_PROGRESS;\n\t}\n\telse\n\t\tunlogged_build_phase = UNLOGGED_BUILD_PHASE_2;\n}\n\n/*\n * neon_end_unlogged_build() -- Finish an unlogged rel build.\n *\n * Call this after you have finished WAL-logging a relation that was\n * first populated without WAL-logging.\n *\n * This removes the local copy of the rel, since it's now been fully\n * WAL-logged and is present in the page server.\n */\nstatic void\nneon_end_unlogged_build(SMgrRelation reln)\n{\n\tNRelFileInfoBackend rinfob = InfoBFromSMgrRel(reln);\n\n\tAssert(RelFileInfoEquals(unlogged_build_rel_info, InfoFromSMgrRel(reln)));\n\n\tereport(SmgrTrace,\n\t\t\t(errmsg(NEON_TAG \"ending unlogged build of relation %u/%u/%u\",\n\t\t\t\t\tRelFileInfoFmt(unlogged_build_rel_info))));\n\n\tif (unlogged_build_phase != UNLOGGED_BUILD_NOT_PERMANENT)\n\t{\n\t\tXLogRecPtr recptr;\n\t\tBlockNumber nblocks;\n\n\t\tAssert(unlogged_build_phase == UNLOGGED_BUILD_PHASE_2);\n\n\t\t/*\n\t\t * Update the last-written LSN cache.\n\t\t *\n\t\t * The relation is still on local disk so we can get the size by\n\t\t * calling mdnblocks() directly. For the LSN, GetXLogInsertRecPtr() is\n\t\t * very conservative. If we could assume that this function is called\n\t\t * from the same backend that WAL-logged the contents, we could use\n\t\t * XactLastRecEnd here. But better safe than sorry.\n\t\t */\n\t\tnblocks = mdnblocks(reln, MAIN_FORKNUM);\n\t\trecptr = GetXLogInsertRecPtr();\n\n\t\tneon_set_lwlsn_block_range(recptr,\n\t\t\t\t\t\t\t\t   InfoFromNInfoB(rinfob),\n\t\t\t\t\t\t\t\t   MAIN_FORKNUM, 0, nblocks);\n\t\tneon_set_lwlsn_relation(recptr,\n\t\t\t\t\t\t\t\tInfoFromNInfoB(rinfob),\n\t\t\t\t\t\t\t\tMAIN_FORKNUM);\n\n\t\t/* Remove local copy */\n\t\tfor (int forknum = 0; forknum <= MAX_FORKNUM; forknum++)\n\t\t{\n\t\t\tneon_log(SmgrTrace, \"forgetting cached relsize for %u/%u/%u.%u\",\n\t\t\t\t RelFileInfoFmt(InfoFromNInfoB(rinfob)),\n\t\t\t\t forknum);\n\n\t\t\tforget_cached_relsize(InfoFromNInfoB(rinfob), forknum);\n\t\t\tlfc_invalidate(InfoFromNInfoB(rinfob), forknum, nblocks);\n\n\t\t\tmdclose(reln, forknum);\n\t\t\tif (!debug_compare_local)\n\t\t\t{\n\t\t\t\t/* use isRedo == true, so that we drop it immediately */\n\t\t\t\tmdunlink(rinfob, forknum, true);\n\t\t\t}\n\t\t}\n\t\tif (debug_compare_local)\n\t\t\tmdunlink(rinfob, INIT_FORKNUM, true);\n\t}\n\tNRelFileInfoInvalidate(unlogged_build_rel_info);\n\tunlogged_build_phase = UNLOGGED_BUILD_NOT_IN_PROGRESS;\n}\n\n#define STRPREFIX(str, prefix) (strncmp(str, prefix, strlen(prefix)) == 0)\n\nstatic int\nneon_read_slru_segment(SMgrRelation reln, const char* path, int segno, void* buffer)\n{\n\tXLogRecPtr\trequest_lsn,\n\t\t\t\tnot_modified_since;\n\tSlruKind\tkind;\n\tint\t\t\tn_blocks;\n\tneon_request_lsns request_lsns;\n\n\t/*\n\t * Compute a request LSN to use, similar to neon_get_request_lsns() but the\n\t * logic is a bit simpler.\n\t */\n\tif (RecoveryInProgress())\n\t{\n\t\trequest_lsn = GetXLogReplayRecPtr(NULL);\n\t\tif (request_lsn == InvalidXLogRecPtr)\n\t\t{\n\t\t\t/*\n\t\t\t * This happens in neon startup, we start up without replaying any\n\t\t\t * records.\n\t\t\t */\n\t\t\trequest_lsn = GetRedoStartLsn();\n\t\t}\n\t\trequest_lsn = nm_adjust_lsn(request_lsn);\n\t}\n\telse\n\t\trequest_lsn = UINT64_MAX;\n\n\t/*\n\t * GetRedoStartLsn() returns LSN of the basebackup. We know that the SLRU\n\t * segment has not changed since the basebackup, because in order to\n\t * modify it, we would have had to download it already. And once\n\t * downloaded, we never evict SLRU segments from local disk.\n\t */\n\tnot_modified_since = nm_adjust_lsn(GetRedoStartLsn());\n\n\tif (STRPREFIX(path, \"pg_xact\"))\n\t\tkind = SLRU_CLOG;\n\telse if (STRPREFIX(path, \"pg_multixact/members\"))\n\t\tkind = SLRU_MULTIXACT_MEMBERS;\n\telse if (STRPREFIX(path, \"pg_multixact/offsets\"))\n\t\tkind = SLRU_MULTIXACT_OFFSETS;\n\telse\n\t\treturn -1;\n\n\trequest_lsns.request_lsn = request_lsn;\n\trequest_lsns.not_modified_since = not_modified_since;\n\trequest_lsns.effective_request_lsn = request_lsn;\n\n\tn_blocks = communicator_read_slru_segment(kind, segno, &request_lsns, buffer);\n\n\treturn n_blocks;\n}\n\nstatic void\nAtEOXact_neon(XactEvent event, void *arg)\n{\n\tswitch (event)\n\t{\n\t\tcase XACT_EVENT_ABORT:\n\t\tcase XACT_EVENT_PARALLEL_ABORT:\n\n\t\t\t/*\n\t\t\t * Forget about any build we might have had in progress. The local\n\t\t\t * file will be unlinked by smgrDoPendingDeletes()\n\t\t\t */\n\t\t\tNRelFileInfoInvalidate(unlogged_build_rel_info);\n\t\t\tunlogged_build_phase = UNLOGGED_BUILD_NOT_IN_PROGRESS;\n\t\t\tbreak;\n\n\t\tcase XACT_EVENT_COMMIT:\n\t\tcase XACT_EVENT_PARALLEL_COMMIT:\n\t\tcase XACT_EVENT_PREPARE:\n\t\tcase XACT_EVENT_PRE_COMMIT:\n\t\tcase XACT_EVENT_PARALLEL_PRE_COMMIT:\n\t\tcase XACT_EVENT_PRE_PREPARE:\n\t\t\tif (unlogged_build_phase != UNLOGGED_BUILD_NOT_IN_PROGRESS)\n\t\t\t{\n\t\t\t\tNRelFileInfoInvalidate(unlogged_build_rel_info);\n\t\t\t\tunlogged_build_phase = UNLOGGED_BUILD_NOT_IN_PROGRESS;\n\t\t\t\tereport(ERROR,\n\t\t\t\t\t\t(errcode(ERRCODE_INTERNAL_ERROR),\n\t\t\t\t\t\t (errmsg(NEON_TAG \"unlogged index build was not properly finished\"))));\n\t\t\t}\n\t\t\tbreak;\n\t}\n\tcommunicator_reconfigure_timeout_if_needed();\n}\n\nstatic const struct f_smgr neon_smgr =\n{\n\t.smgr_init = neon_init,\n\t.smgr_shutdown = NULL,\n\t.smgr_open = neon_open,\n\t.smgr_close = neon_close,\n\t.smgr_create = neon_create,\n\t.smgr_exists = neon_exists,\n\t.smgr_unlink = neon_unlink,\n\t.smgr_extend = neon_extend,\n#if PG_MAJORVERSION_NUM >= 16\n\t.smgr_zeroextend = neon_zeroextend,\n#endif\n#if PG_MAJORVERSION_NUM >= 17\n\t.smgr_prefetch = neon_prefetch,\n\t.smgr_readv = neon_readv,\n\t.smgr_writev = neon_writev,\n#else\n\t.smgr_prefetch = neon_prefetch,\n\t.smgr_read = neon_read,\n\t.smgr_write = neon_write,\n#endif\n\n\t.smgr_writeback = neon_writeback,\n\t.smgr_nblocks = neon_nblocks,\n\t.smgr_truncate = neon_truncate,\n\t.smgr_immedsync = neon_immedsync,\n#if PG_MAJORVERSION_NUM >= 17\n\t.smgr_registersync = neon_registersync,\n#endif\n\t.smgr_start_unlogged_build = neon_start_unlogged_build,\n\t.smgr_finish_unlogged_build_phase_1 = neon_finish_unlogged_build_phase_1,\n\t.smgr_end_unlogged_build = neon_end_unlogged_build,\n\n\t.smgr_read_slru_segment = neon_read_slru_segment,\n};\n\nconst f_smgr *\nsmgr_neon(ProcNumber backend, NRelFileInfo rinfo)\n{\n\n\t/* Don't use page server for temp relations */\n\tif (backend != INVALID_PROC_NUMBER)\n\t\treturn smgr_standard(backend, rinfo);\n\telse\n\t\treturn &neon_smgr;\n}\n\nvoid\nsmgr_init_neon(void)\n{\n\tRegisterXactCallback(AtEOXact_neon, NULL);\n\n\tsmgr_init_standard();\n\tneon_init();\n\tcommunicator_init();\n}\n\n\nstatic void\nneon_extend_rel_size(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blkno, XLogRecPtr end_recptr)\n{\n\tBlockNumber relsize;\n\n\t/* This is only used in WAL replay */\n\tAssert(RecoveryInProgress());\n\n\t/* Extend the relation if we know its size */\n\tif (get_cached_relsize(rinfo, forknum, &relsize))\n\t{\n\t\tif (relsize < blkno + 1)\n\t\t{\n\t\t\tupdate_cached_relsize(rinfo, forknum, blkno + 1);\n\t\t\tneon_set_lwlsn_relation(end_recptr, rinfo, forknum);\n\t\t}\n\t}\n\telse\n\t{\n\t\t/*\n\t\t * Size was not cached. We populate the cache now, with the size of\n\t\t * the relation measured after this WAL record is applied.\n\t\t *\n\t\t * This length is later reused when we open the smgr to read the\n\t\t * block, which is fine and expected.\n\t\t */\n\t\tneon_request_lsns request_lsns;\n\n\t\tneon_get_request_lsns(rinfo, forknum,\n\t\t\t\t\t\t\t  REL_METADATA_PSEUDO_BLOCKNO, &request_lsns, 1);\n\n\t\trelsize = communicator_nblocks(rinfo, forknum, &request_lsns);\n\n\t\trelsize = Max(relsize, blkno + 1);\n\n\t\tset_cached_relsize(rinfo, forknum, relsize);\n\t\tneon_set_lwlsn_relation(end_recptr, rinfo, forknum);\n\n\t\tneon_log(SmgrTrace, \"Set length to %d\", relsize);\n\t}\n}\n\n#define FSM_TREE_DEPTH\t((SlotsPerFSMPage >= 1626) ? 3 : 4)\n\n/*\n * TODO: May be it is better to make correspondent function from freespace.c public?\n */\nstatic BlockNumber\nget_fsm_physical_block(BlockNumber heapblk)\n{\n\tBlockNumber pages;\n\tint\t\t\tleafno;\n\tint\t\t\tl;\n\n\t/*\n\t * Calculate the logical page number of the first leaf page below the\n\t * given page.\n\t */\n\tleafno = heapblk / SlotsPerFSMPage;\n\n\t/* Count upper level nodes required to address the leaf page */\n\tpages = 0;\n\tfor (l = 0; l < FSM_TREE_DEPTH; l++)\n\t{\n\t\tpages += leafno + 1;\n\t\tleafno /= SlotsPerFSMPage;\n\t}\n\n\t/* Turn the page count into 0-based block number */\n\treturn pages - 1;\n}\n\n\n/*\n * Return whether we can skip the redo for this block.\n *\n * The conditions for skipping the IO are:\n *\n * - The block is not in the shared buffers, and\n * - The block is not in the local file cache\n *\n * ... because any subsequent read of the page requires us to read\n * the new version of the page from the PageServer. We do not\n * check the local file cache; we instead evict the page from LFC: it\n * is cheaper than going through the FS calls to read the page, and\n * limits the number of lock operations used in the REDO process.\n *\n * We have one exception to the rules for skipping IO: We always apply\n * changes to shared catalogs' pages. Although this is mostly out of caution,\n * catalog updates usually result in backends rebuilding their catalog snapshot,\n * which means it's quite likely the modified page is going to be used soon.\n *\n * It is important to note that skipping WAL redo for a page also means\n * the page isn't locked by the redo process, as there is no Buffer\n * being returned, nor is there a buffer descriptor to lock.\n * This means that any IO that wants to read this block needs to wait\n * for the WAL REDO process to finish processing the WAL record before\n * it allows the system to start reading the block, as releasing the\n * block early could lead to phantom reads.\n *\n * For example, REDO for a WAL record that modifies 3 blocks could skip\n * the first block, wait for a lock on the second, and then modify the\n * third block. Without skipping, all blocks would be locked and phantom\n * reads would not occur, but with skipping, a concurrent process could\n * read block 1 with post-REDO contents and read block 3 with pre-REDO\n * contents, where with REDO locking it would wait on block 1 and see\n * block 3 with post-REDO contents only.\n */\nstatic bool\nneon_redo_read_buffer_filter(XLogReaderState *record, uint8 block_id)\n{\n\tXLogRecPtr\tend_recptr = record->EndRecPtr;\n\tNRelFileInfo rinfo;\n\tForkNumber\tforknum;\n\tBlockNumber blkno;\n\tBufferTag\ttag;\n\tuint32\t\thash;\n\tLWLock\t   *partitionLock;\n\tint\t\t\tbuf_id;\n\tbool\t\tno_redo_needed;\n\n\tif (old_redo_read_buffer_filter && old_redo_read_buffer_filter(record, block_id))\n\t\treturn true;\n\n#if PG_VERSION_NUM < 150000\n\tif (!XLogRecGetBlockTag(record, block_id, &rinfo, &forknum, &blkno))\n\t\tneon_log(PANIC, \"failed to locate backup block with ID %d\", block_id);\n#else\n\tXLogRecGetBlockTag(record, block_id, &rinfo, &forknum, &blkno);\n#endif\n\n\tCopyNRelFileInfoToBufTag(tag, rinfo);\n\ttag.forkNum = forknum;\n\ttag.blockNum = blkno;\n\n\thash = BufTableHashCode(&tag);\n\tpartitionLock = BufMappingPartitionLock(hash);\n\n\t/*\n\t * Lock the partition of shared_buffers so that it can't be updated\n\t * concurrently.\n\t */\n\tLWLockAcquire(partitionLock, LW_SHARED);\n\n\t/*\n\t * Out of an abundance of caution, we always run redo on shared catalogs,\n\t * regardless of whether the block is stored in shared buffers. See also\n\t * this function's top comment.\n\t */\n\tif (!OidIsValid(NInfoGetDbOid(rinfo)))\n\t{\n\t\tno_redo_needed = false;\n\t}\n\telse\n\t{\n\t\t/* Try to find the relevant buffer */\n\t\tbuf_id = BufTableLookup(&tag, hash);\n\n\t\tno_redo_needed = buf_id < 0;\n\t}\n\n\t/*\n\t * we don't have the buffer in memory, update lwLsn past this record, also\n\t * evict page from file cache\n\t */\n\tif (no_redo_needed)\n\t{\n\t\tneon_set_lwlsn_block(end_recptr, rinfo, forknum, blkno);\n\t\t/*\n\t\t * Redo changes if page exists in LFC.\n\t\t * We should perform this check after assigning LwLSN to prevent\n\t\t * prefetching of some older version of the page by some other backend.\n\t\t */\n\t\tno_redo_needed = !lfc_cache_contains(rinfo, forknum, blkno);\n\t}\n\n\tLWLockRelease(partitionLock);\n\n\tneon_extend_rel_size(rinfo, forknum, blkno, end_recptr);\n\tif (forknum == MAIN_FORKNUM)\n\t{\n\t\tneon_extend_rel_size(rinfo, FSM_FORKNUM, get_fsm_physical_block(blkno), end_recptr);\n\t}\n\treturn no_redo_needed;\n}\n"
  },
  {
    "path": "pgxn/neon/relsize_cache.c",
    "content": "/*-------------------------------------------------------------------------\n *\n * relsize_cache.c\n *      Relation size cache for better zentih performance.\n *\n * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group\n * Portions Copyright (c) 1994, Regents of the University of California\n *\n *-------------------------------------------------------------------------\n */\n#include \"postgres.h\"\n\n#include \"neon.h\"\n#include \"neon_pgversioncompat.h\"\n\n#include \"miscadmin.h\"\n#include \"pagestore_client.h\"\n#include RELFILEINFO_HDR\n#include \"storage/smgr.h\"\n#include \"storage/lwlock.h\"\n#include \"storage/ipc.h\"\n#include \"storage/shmem.h\"\n#include \"catalog/pg_tablespace_d.h\"\n#include \"utils/dynahash.h\"\n#include \"utils/guc.h\"\n\ntypedef struct\n{\n\tNRelFileInfo rinfo;\n\tForkNumber\tforknum;\n} RelTag;\n\ntypedef struct\n{\n\tRelTag\t\ttag;\n\tBlockNumber size;\n\tdlist_node\tlru_node;\t\t/* LRU list node */\n} RelSizeEntry;\n\ntypedef struct\n{\n\tsize_t      size;\n\tuint64\t\thits;\n\tuint64\t\tmisses;\n\tuint64\t\twrites;\n\tdlist_head\tlru;\t\t\t/* double linked list for LRU replacement\n\t\t\t\t\t\t\t\t * algorithm */\n} RelSizeHashControl;\n\n/*\n * Size of a cache entry is 36 bytes. So this default will take about 2.3 MB,\n * which seems reasonable.\n */\n#define DEFAULT_RELSIZE_HASH_SIZE (64 * 1024)\n\nstatic HTAB *relsize_hash;\nstatic LWLockId relsize_lock;\nstatic int\trelsize_hash_size = DEFAULT_RELSIZE_HASH_SIZE;\nstatic RelSizeHashControl* relsize_ctl;\n\nvoid\nRelsizeCacheShmemInit(void)\n{\n\tstatic HASHCTL info;\n\tbool found;\n\n\trelsize_ctl = (RelSizeHashControl *) ShmemInitStruct(\"relsize_hash\", sizeof(RelSizeHashControl), &found);\n\tif (!found)\n\t{\n\t\trelsize_lock = (LWLockId) GetNamedLWLockTranche(\"neon_relsize\");\n\t\tinfo.keysize = sizeof(RelTag);\n\t\tinfo.entrysize = sizeof(RelSizeEntry);\n\t\trelsize_hash = ShmemInitHash(\"neon_relsize\",\n\t\t\t\t\t\t\t\t\t relsize_hash_size, relsize_hash_size,\n\t\t\t\t\t\t\t\t\t &info,\n\t\t\t\t\t\t\t\t\t HASH_ELEM | HASH_BLOBS);\n\t\trelsize_ctl->size = 0;\n\t\trelsize_ctl->hits = 0;\n\t\trelsize_ctl->misses = 0;\n\t\trelsize_ctl->writes = 0;\n\t\tdlist_init(&relsize_ctl->lru);\n\t}\n}\n\nbool\nget_cached_relsize(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber *size)\n{\n\tbool\t\tfound = false;\n\n\tif (relsize_hash_size > 0)\n\t{\n\t\tRelTag\t\ttag;\n\t\tRelSizeEntry *entry;\n\n\t\ttag.rinfo = rinfo;\n\t\ttag.forknum = forknum;\n\t\t/* We need exclusive lock here because of LRU list manipulation */\n\t\tLWLockAcquire(relsize_lock, LW_EXCLUSIVE);\n\t\tentry = hash_search(relsize_hash, &tag, HASH_FIND, NULL);\n\t\tif (entry != NULL)\n\t\t{\n\t\t\t*size = entry->size;\n\t\t\trelsize_ctl->hits += 1;\n\t\t\tfound = true;\n\t\t\t/* Move entry to the LRU list tail */\n\t\t\tdlist_delete(&entry->lru_node);\n\t\t\tdlist_push_tail(&relsize_ctl->lru, &entry->lru_node);\n\t\t}\n\t\telse\n\t\t{\n\t\t\trelsize_ctl->misses += 1;\n\t\t}\n\t\tLWLockRelease(relsize_lock);\n\t}\n\treturn found;\n}\n\nvoid\nset_cached_relsize(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber size)\n{\n\tif (relsize_hash_size > 0)\n\t{\n\t\tRelTag\t\ttag;\n\t\tRelSizeEntry *entry;\n\t\tbool\t\tfound = false;\n\n\t\ttag.rinfo = rinfo;\n\t\ttag.forknum = forknum;\n\t\tLWLockAcquire(relsize_lock, LW_EXCLUSIVE);\n\t\t/*\n\t\t * This should actually never happen! Below we check if hash is full and delete least recently user item in this case.\n\t\t * But for further safety we also perform check here.\n\t\t */\n\t\twhile ((entry = hash_search(relsize_hash, &tag, HASH_ENTER_NULL, &found)) == NULL)\n\t\t{\n\t\t\tRelSizeEntry *victim = dlist_container(RelSizeEntry, lru_node, dlist_pop_head_node(&relsize_ctl->lru));\n\t\t\thash_search(relsize_hash, &victim->tag, HASH_REMOVE, NULL);\n\t\t\tAssert(relsize_ctl->size > 0);\n\t\t\trelsize_ctl->size -= 1;\n\t\t}\n\t\tentry->size = size;\n\t\tif (!found)\n\t\t{\n\t\t\tif (++relsize_ctl->size == relsize_hash_size)\n\t\t\t{\n\t\t\t\t/*\n\t\t\t\t * Remove least recently used elment from the hash.\n\t\t\t\t * Hash size after is becomes `relsize_hash_size-1`.\n\t\t\t\t * But it is not considered to be a problem, because size of this hash is expecrted large enough and +-1 doesn't matter.\n\t\t\t\t */\n\t\t\t\tRelSizeEntry *victim = dlist_container(RelSizeEntry, lru_node, dlist_pop_head_node(&relsize_ctl->lru));\n\t\t\t\thash_search(relsize_hash, &victim->tag, HASH_REMOVE, NULL);\n\t\t\t\trelsize_ctl->size -= 1;\n\t\t\t}\n\t\t}\n\t\telse\n\t\t{\n\t\t\tdlist_delete(&entry->lru_node);\n\t\t}\n\t\tdlist_push_tail(&relsize_ctl->lru, &entry->lru_node);\n\t\trelsize_ctl->writes += 1;\n\t\tLWLockRelease(relsize_lock);\n\t}\n}\n\nvoid\nupdate_cached_relsize(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber size)\n{\n\tif (relsize_hash_size > 0)\n\t{\n\t\tRelTag\t\ttag;\n\t\tRelSizeEntry *entry;\n\t\tbool\t\tfound;\n\n\t\ttag.rinfo = rinfo;\n\t\ttag.forknum = forknum;\n\t\tLWLockAcquire(relsize_lock, LW_EXCLUSIVE);\n\t\tentry = hash_search(relsize_hash, &tag, HASH_ENTER, &found);\n\t\tif (!found || entry->size < size)\n\t\t\tentry->size = size;\n\t\tif (!found)\n\t\t{\n\t\t\tif (++relsize_ctl->size == relsize_hash_size)\n\t\t\t{\n\t\t\t\tRelSizeEntry *victim = dlist_container(RelSizeEntry, lru_node, dlist_pop_head_node(&relsize_ctl->lru));\n\t\t\t\thash_search(relsize_hash, &victim->tag, HASH_REMOVE, NULL);\n\t\t\t\trelsize_ctl->size -= 1;\n\t\t\t}\n\t\t}\n\t\telse\n\t\t{\n\t\t\tdlist_delete(&entry->lru_node);\n\t\t}\n\t\trelsize_ctl->writes += 1;\n\t\tdlist_push_tail(&relsize_ctl->lru, &entry->lru_node);\n\t\tLWLockRelease(relsize_lock);\n\t}\n}\n\nvoid\nforget_cached_relsize(NRelFileInfo rinfo, ForkNumber forknum)\n{\n\tif (relsize_hash_size > 0)\n\t{\n\t\tRelTag\t\ttag;\n\t\tRelSizeEntry *entry;\n\t\ttag.rinfo = rinfo;\n\t\ttag.forknum = forknum;\n\t\tLWLockAcquire(relsize_lock, LW_EXCLUSIVE);\n\t\tentry = hash_search(relsize_hash, &tag, HASH_REMOVE, NULL);\n\t\tif (entry)\n\t\t{\n\t\t\tdlist_delete(&entry->lru_node);\n\t\t\trelsize_ctl->size -= 1;\n\t\t}\n\t\tLWLockRelease(relsize_lock);\n\t}\n}\n\nvoid\nrelsize_hash_init(void)\n{\n\tDefineCustomIntVariable(\"neon.relsize_hash_size\",\n\t\t\t\t\t\t\t\"Sets the maximum number of cached relation sizes for neon\",\n\t\t\t\t\t\t\tNULL,\n\t\t\t\t\t\t\t&relsize_hash_size,\n\t\t\t\t\t\t\tDEFAULT_RELSIZE_HASH_SIZE,\n\t\t\t\t\t\t\t0,\n\t\t\t\t\t\t\tINT_MAX,\n\t\t\t\t\t\t\tPGC_POSTMASTER,\n\t\t\t\t\t\t\t0,\n\t\t\t\t\t\t\tNULL, NULL, NULL);\n}\n\n/*\n * shmem_request hook: request additional shared resources.  We'll allocate or\n * attach to the shared resources in neon_smgr_shmem_startup().\n */\nvoid\nRelsizeCacheShmemRequest(void)\n{\n\tRequestAddinShmemSpace(sizeof(RelSizeHashControl) + hash_estimate_size(relsize_hash_size, sizeof(RelSizeEntry)));\n\tRequestNamedLWLockTranche(\"neon_relsize\", 1);\n}\n"
  },
  {
    "path": "pgxn/neon/unstable_extensions.c",
    "content": "#include <stdlib.h>\n#include <string.h>\n\n#include \"postgres.h\"\n\n#include \"nodes/plannodes.h\"\n#include \"nodes/parsenodes.h\"\n#include \"tcop/utility.h\"\n#include \"utils/errcodes.h\"\n#include \"utils/guc.h\"\n\n#include \"neon_pgversioncompat.h\"\n#include \"unstable_extensions.h\"\n\nstatic bool\t\t\t\t\tallow_unstable_extensions = false;\nstatic char\t\t\t\t   *unstable_extensions = NULL;\n\nstatic ProcessUtility_hook_type PreviousProcessUtilityHook = NULL;\n\nstatic bool\nlist_contains(char const* comma_separated_list, char const* val)\n{\n\tchar const* occ = comma_separated_list;\n\tsize_t val_len = strlen(val);\n\n\tif (val_len == 0)\n\t\treturn false;\n\n\twhile ((occ = strstr(occ, val)) != NULL)\n\t{\n\t\tif ((occ == comma_separated_list || occ[-1] == ',')\n\t\t\t&& (occ[val_len] == '\\0' || occ[val_len] == ','))\n\t\t{\n\t\t\treturn true;\n\t\t}\n\t\tocc += val_len;\n\t}\n\n\treturn false;\n}\n\n\nstatic void\nCheckUnstableExtension(\n\tPlannedStmt *pstmt,\n\tconst char *queryString,\n\tbool readOnlyTree,\n\tProcessUtilityContext context,\n\tParamListInfo params,\n\tQueryEnvironment *queryEnv,\n\tDestReceiver *dest,\n\tQueryCompletion *qc)\n{\n\tNode\t   *parseTree = pstmt->utilityStmt;\n\n\tif (allow_unstable_extensions || unstable_extensions == NULL)\n\t\tgoto process;\n\n\tswitch (nodeTag(parseTree))\n\t{\n\t\tcase T_CreateExtensionStmt:\n\t\t{\n\t\t\tCreateExtensionStmt *stmt = castNode(CreateExtensionStmt, parseTree);\n\t\t\tif (list_contains(unstable_extensions, stmt->extname))\n\t\t\t{\n\t\t\t\tereport(ERROR,\n\t\t\t\t\t\t(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),\n\t\t\t\t\t\t errmsg(\"%s extension is in beta and may be unstable or introduce backward-incompatible changes.\\nWe recommend testing it in a separate, dedicated Neon project.\", stmt->extname),\n\t\t\t\t\t\t errhint(\"to proceed with installation, run SET neon.allow_unstable_extensions='true'\")));\n\t\t\t}\n\t\t\tbreak;\n\t\t}\n\t\tdefault:\n\t\t\tgoto process;\n\t}\n\nprocess:\n\tif (PreviousProcessUtilityHook)\n\t{\n\t\tPreviousProcessUtilityHook(\n\t\t\tpstmt,\n\t\t\tqueryString,\n\t\t\treadOnlyTree,\n\t\t\tcontext,\n\t\t\tparams,\n\t\t\tqueryEnv,\n\t\t\tdest,\n\t\t\tqc);\n\t}\n\telse\n\t{\n\t\tstandard_ProcessUtility(\n\t\t\tpstmt,\n\t\t\tqueryString,\n\t\t\treadOnlyTree,\n\t\t\tcontext,\n\t\t\tparams,\n\t\t\tqueryEnv,\n\t\t\tdest,\n\t\t\tqc);\n\t}\n}\n\nvoid\nInitUnstableExtensionsSupport(void)\n{\n\tDefineCustomBoolVariable(\n\t\t\"neon.allow_unstable_extensions\",\n\t\t\"Allow unstable extensions to be installed and used\",\n\t\tNULL,\n\t\t&allow_unstable_extensions,\n\t\tfalse,\n\t\tPGC_USERSET,\n\t\t0,\n\t\tNULL, NULL, NULL);\n\n\tDefineCustomStringVariable(\n\t\t\"neon.unstable_extensions\",\n\t\t\"List of unstable extensions\",\n\t\tNULL,\n\t\t&unstable_extensions,\n\t\tNULL,\n\t\tPGC_SUSET,\n\t\t0,\n\t\tNULL, NULL, NULL);\n\n\tPreviousProcessUtilityHook = ProcessUtility_hook;\n\tProcessUtility_hook = CheckUnstableExtension;\n}\n"
  },
  {
    "path": "pgxn/neon/unstable_extensions.h",
    "content": "#ifndef __NEON_UNSTABLE_EXTENSIONS_H__\n#define __NEON_UNSTABLE_EXTENSIONS_H__\n\nvoid InitUnstableExtensionsSupport(void);\n\n#endif\n"
  },
  {
    "path": "pgxn/neon/walproposer.c",
    "content": "/*-------------------------------------------------------------------------\n *\n * walproposer.c\n *\n * Proposer/leader part of the total order broadcast protocol between postgres\n * and WAL safekeepers.\n *\n * We have two ways of launching WalProposer:\n *\n *   1. As a background worker which will pretend to be physical WalSender.\n * \t\tWalProposer will receive notifications about new available WAL and\n * \t\twill immediately broadcast it to alive safekeepers.\n *\n *   2. As a standalone utility by running `postgres --sync-safekeepers`. That\n *      is needed to create LSN from which it is safe to start postgres. More\n *      specifically it addresses following problems:\n *\n *      a) Chicken-or-the-egg problem: compute postgres needs data directory\n *         with non-rel files that are downloaded from pageserver by calling\n *         basebackup@LSN. This LSN is not arbitrary, it must include all\n *         previously committed transactions and defined through consensus\n *         voting, which happens... in walproposer, a part of compute node.\n *\n *      b) Just warranting such LSN is not enough, we must also actually commit\n *         it and make sure there is a safekeeper who knows this LSN is\n *         committed so WAL before it can be streamed to pageserver -- otherwise\n *         basebackup will hang waiting for WAL. Advancing commit_lsn without\n *         playing consensus game is impossible, so speculative 'let's just poll\n *         safekeepers, learn start LSN of future epoch and run basebackup'\n *         won't work.\n *\n * Both ways are implemented in walproposer_pg.c file. This file contains\n * generic part of walproposer which can be used in both cases, but can also\n * be used as an independent library.\n *\n *-------------------------------------------------------------------------\n */\n#include <sys/resource.h>\n\n#include \"postgres.h\"\n#include \"libpq/pqformat.h\"\n#include \"neon.h\"\n#include \"walproposer.h\"\n#include \"neon_utils.h\"\n\n/* Prototypes for private functions */\nstatic void WalProposerLoop(WalProposer *wp);\nstatic void ShutdownConnection(Safekeeper *sk);\nstatic void ResetConnection(Safekeeper *sk);\nstatic long TimeToReconnect(WalProposer *wp, TimestampTz now);\nstatic void ReconnectSafekeepers(WalProposer *wp);\nstatic void AdvancePollState(Safekeeper *sk, uint32 events);\nstatic void HandleConnectionEvent(Safekeeper *sk);\nstatic void SendStartWALPush(Safekeeper *sk);\nstatic void RecvStartWALPushResult(Safekeeper *sk);\nstatic void SendProposerGreeting(Safekeeper *sk);\nstatic void RecvAcceptorGreeting(Safekeeper *sk);\nstatic void SendVoteRequest(Safekeeper *sk);\nstatic void RecvVoteResponse(Safekeeper *sk);\nstatic bool VotesCollected(WalProposer *wp);\nstatic void HandleElectedProposer(WalProposer *wp);\nstatic term_t GetHighestTerm(TermHistory *th);\nstatic term_t GetLastLogTerm(Safekeeper *sk);\nstatic void ProcessPropStartPos(WalProposer *wp);\nstatic void SendProposerElected(Safekeeper *sk);\nstatic void StartStreaming(Safekeeper *sk);\nstatic void SendMessageToNode(Safekeeper *sk);\nstatic void BroadcastAppendRequest(WalProposer *wp);\nstatic void HandleActiveState(Safekeeper *sk, uint32 events);\nstatic bool SendAppendRequests(Safekeeper *sk);\nstatic bool RecvAppendResponses(Safekeeper *sk);\nstatic XLogRecPtr CalculateMinFlushLsn(WalProposer *wp);\nstatic XLogRecPtr GetAcknowledgedByQuorumWALPosition(WalProposer *wp);\nstatic void PAMessageSerialize(WalProposer *wp, ProposerAcceptorMessage *msg, StringInfo buf, int proto_version);\nstatic void HandleSafekeeperResponse(WalProposer *wp, Safekeeper *sk);\nstatic bool AsyncRead(Safekeeper *sk, char **buf, int *buf_size);\nstatic bool AsyncReadMessage(Safekeeper *sk, AcceptorProposerMessage *anymsg);\nstatic bool BlockingWrite(Safekeeper *sk, void *msg, size_t msg_size, SafekeeperState success_state);\nstatic bool AsyncWrite(Safekeeper *sk, void *msg, size_t msg_size, SafekeeperState flush_state);\nstatic bool AsyncFlush(Safekeeper *sk);\nstatic int\tCompareLsn(const void *a, const void *b);\nstatic char *FormatSafekeeperState(Safekeeper *sk);\nstatic void AssertEventsOkForState(uint32 events, Safekeeper *sk);\nstatic char *FormatEvents(WalProposer *wp, uint32 events);\nstatic void UpdateDonorShmem(WalProposer *wp);\nstatic char *MembershipConfigurationToString(MembershipConfiguration *mconf);\nstatic void MembershipConfigurationCopy(MembershipConfiguration *src, MembershipConfiguration *dst);\nstatic void MembershipConfigurationFree(MembershipConfiguration *mconf);\n\nWalProposer *\nWalProposerCreate(WalProposerConfig *config, walproposer_api api)\n{\n\tchar\t   *host;\n\tchar\t   *sep;\n\tchar\t   *port;\n\tWalProposer *wp;\n\n\twp = palloc0(sizeof(WalProposer));\n\twp->config = config;\n\twp->api = api;\n\twp->localTimeLineID = config->pgTimeline;\n\twp->state = WPS_COLLECTING_TERMS;\n\twp->mconf.generation = INVALID_GENERATION;\n\twp->mconf.members.len = 0;\n\twp->mconf.new_members.len = 0;\n\n\twp_log(LOG, \"neon.safekeepers=%s\", wp->config->safekeepers_list);\n\n\t/*\n\t * If safekeepers list starts with g# parse generation number followed by\n\t * :\n\t */\n\tif (strncmp(wp->config->safekeepers_list, \"g#\", 2) == 0)\n\t{\n\t\tchar\t   *endptr;\n\n\t\terrno = 0;\n\t\twp->safekeepers_generation = strtoul(wp->config->safekeepers_list + 2, &endptr, 10);\n\t\tif (errno != 0)\n\t\t{\n\t\t\twp_log(FATAL, \"failed to parse neon.safekeepers generation number: %m\");\n\t\t}\n\t\tif (*endptr != ':')\n\t\t{\n\t\t\twp_log(FATAL, \"failed to parse neon.safekeepers: no colon after generation\");\n\t\t}\n\t\t/* Skip past : to the first hostname. */\n\t\thost = endptr + 1;\n\t}\n\telse\n\t{\n\t\twp->safekeepers_generation = INVALID_GENERATION;\n\t\thost = wp->config->safekeepers_list;\n\t}\n\twp_log(LOG, \"safekeepers_generation=%u\", wp->safekeepers_generation);\n\n\tfor (; host != NULL && *host != '\\0'; host = sep)\n\t{\n\t\tport = strchr(host, ':');\n\t\tif (port == NULL)\n\t\t{\n\t\t\twp_log(FATAL, \"port is not specified\");\n\t\t}\n\t\t*port++ = '\\0';\n\t\tsep = strchr(port, ',');\n\t\tif (sep != NULL)\n\t\t\t*sep++ = '\\0';\n\t\tif (wp->n_safekeepers + 1 >= MAX_SAFEKEEPERS)\n\t\t{\n\t\t\twp_log(FATAL, \"too many safekeepers\");\n\t\t}\n\t\twp->safekeeper[wp->n_safekeepers].host = host;\n\t\twp->safekeeper[wp->n_safekeepers].port = port;\n\t\twp->safekeeper[wp->n_safekeepers].state = SS_OFFLINE;\n\t\twp->safekeeper[wp->n_safekeepers].active_state = SS_ACTIVE_SEND;\n\t\twp->safekeeper[wp->n_safekeepers].wp = wp;\n\t\t/* BEGIN_HADRON */\n\t\twp->safekeeper[wp->n_safekeepers].index = wp->n_safekeepers;\n\t\t/* END_HADRON */\n\t\t{\n\t\t\tSafekeeper *sk = &wp->safekeeper[wp->n_safekeepers];\n\t\t\tint\t\t\twritten = 0;\n\n\t\t\twritten = snprintf((char *) &sk->conninfo, MAXCONNINFO,\n\t\t\t\t\t\t\t   \"%s host=%s port=%s dbname=replication options='-c timeline_id=%s tenant_id=%s'\",\n\t\t\t\t\t\t\t   wp->config->safekeeper_conninfo_options, sk->host, sk->port,\n\t\t\t\t\t\t\t   wp->config->neon_timeline, wp->config->neon_tenant);\n\t\t\tif (written > MAXCONNINFO || written < 0)\n\t\t\t\twp_log(FATAL, \"could not create connection string for safekeeper %s:%s\", sk->host, sk->port);\n\t\t}\n\n\t\tinitStringInfo(&wp->safekeeper[wp->n_safekeepers].outbuf);\n\t\twp->safekeeper[wp->n_safekeepers].startStreamingAt = InvalidXLogRecPtr;\n\t\twp->safekeeper[wp->n_safekeepers].streamingAt = InvalidXLogRecPtr;\n\t\twp->n_safekeepers += 1;\n\t}\n\tif (wp->n_safekeepers < 1)\n\t{\n\t\twp_log(FATAL, \"safekeepers addresses are not specified\");\n\t}\n\twp->quorum = wp->n_safekeepers / 2 + 1;\n\n\tif (wp->config->proto_version != 2 && wp->config->proto_version != 3)\n\t\twp_log(FATAL, \"unsupported safekeeper protocol version %d\", wp->config->proto_version);\n\tif (wp->safekeepers_generation > INVALID_GENERATION && wp->config->proto_version < 3)\n\t\twp_log(FATAL, \"enabling generations requires protocol version 3\");\n\twp_log(LOG, \"using safekeeper protocol version %d\", wp->config->proto_version);\n\t\n\t/* BEGIN_HADRON */\n\twp->api.reset_safekeeper_statuses_for_metrics(wp, wp->n_safekeepers);\n\t/* END_HADRON */\n\n\t/* Fill the greeting package */\n\twp->greetRequest.pam.tag = 'g';\n\tif (!wp->config->neon_tenant)\n\t\twp_log(FATAL, \"neon.tenant_id is not provided\");\n\twp->greetRequest.tenant_id = wp->config->neon_tenant;\n\tif (!wp->config->neon_timeline)\n\t\twp_log(FATAL, \"neon.timeline_id is not provided\");\n\twp->greetRequest.timeline_id = wp->config->neon_timeline;\n\twp->greetRequest.pg_version = PG_VERSION_NUM;\n\twp->greetRequest.system_id = wp->config->systemId;\n\twp->greetRequest.wal_seg_size = wp->config->wal_segment_size;\n\n\twp->api.init_event_set(wp);\n\n\treturn wp;\n}\n\nvoid\nWalProposerFree(WalProposer *wp)\n{\n\tMembershipConfigurationFree(&wp->mconf);\n\tfor (int i = 0; i < wp->n_safekeepers; i++)\n\t{\n\t\tSafekeeper *sk = &wp->safekeeper[i];\n\n\t\tAssert(sk->outbuf.data != NULL);\n\t\tpfree(sk->outbuf.data);\n\t\tMembershipConfigurationFree(&sk->greetResponse.mconf);\n\t\tif (sk->voteResponse.termHistory.entries)\n\t\t\tpfree(sk->voteResponse.termHistory.entries);\n\t\tsk->voteResponse.termHistory.entries = NULL;\n\t}\n\tif (wp->propTermHistory.entries != NULL)\n\t\tpfree(wp->propTermHistory.entries);\n\twp->propTermHistory.entries = NULL;\n\n\tpfree(wp);\n}\n\nstatic bool\nWalProposerGenerationsEnabled(WalProposer *wp)\n{\n\treturn wp->safekeepers_generation != INVALID_GENERATION;\n}\n\n/*\n * Create new AppendRequest message and start sending it. This function is\n * called from walsender every time the new WAL is available.\n */\nvoid\nWalProposerBroadcast(WalProposer *wp, XLogRecPtr startpos, XLogRecPtr endpos)\n{\n\tAssert(startpos == wp->availableLsn && endpos >= wp->availableLsn);\n\twp->availableLsn = endpos;\n\tBroadcastAppendRequest(wp);\n}\n\n/*\n * Advance the WAL proposer state machine, waiting each time for events to occur.\n * Will exit only when latch is set, i.e. new WAL should be pushed from walsender\n * to walproposer.\n */\nvoid\nWalProposerPoll(WalProposer *wp)\n{\n\twhile (true)\n\t{\n\t\tSafekeeper *sk = NULL;\n\t\tint\t\t\trc = 0;\n\t\tuint32\t\tevents = 0;\n\t\tTimestampTz now = wp->api.get_current_timestamp(wp);\n\t\tlong\t\ttimeout = TimeToReconnect(wp, now);\n\n\t\trc = wp->api.wait_event_set(wp, timeout, &sk, &events);\n\n\t\t/* Exit loop if latch is set (we got new WAL) */\n\t\tif (rc == 1 && (events & WL_LATCH_SET))\n\t\t\tbreak;\n\n\t\t/*\n\t\t * If the event contains something that one of our safekeeper states\n\t\t * was waiting for, we'll advance its state.\n\t\t */\n\t\tif (rc == 1 && (events & WL_SOCKET_MASK))\n\t\t{\n\t\t\tAssert(sk != NULL);\n\t\t\tAdvancePollState(sk, events);\n\t\t}\n\n\t\t/*\n\t\t * If the timeout expired, attempt to reconnect to any safekeepers\n\t\t * that we dropped\n\t\t */\n\t\tReconnectSafekeepers(wp);\n\n\t\tif (rc == 0)\t\t\t/* timeout expired */\n\t\t{\n\t\t\t/*\n\t\t\t * Ensure flushrecptr is set to a recent value. This fixes a case\n\t\t\t * where we've not been notified of new WAL records when we were\n\t\t\t * planning on consuming them.\n\t\t\t */\n\t\t\tif (!wp->config->syncSafekeepers)\n\t\t\t{\n\t\t\t\tXLogRecPtr\tflushed = wp->api.get_flush_rec_ptr(wp);\n\n\t\t\t\tif (flushed > wp->availableLsn)\n\t\t\t\t\tbreak;\n\t\t\t}\n\t\t}\n\n\t\tnow = wp->api.get_current_timestamp(wp);\n\t\t/* timeout expired: poll state */\n\t\tif (rc == 0 || TimeToReconnect(wp, now) <= 0)\n\t\t{\n\t\t\t/*\n\t\t\t * If no WAL was generated during timeout (and we have already\n\t\t\t * collected the quorum), then send empty keepalive message\n\t\t\t */\n\t\t\tif (wp->availableLsn != InvalidXLogRecPtr)\n\t\t\t{\n\t\t\t\tBroadcastAppendRequest(wp);\n\t\t\t}\n\n\t\t\t/*\n\t\t\t * Abandon connection attempts which take too long.\n\t\t\t */\n\t\t\tnow = wp->api.get_current_timestamp(wp);\n\t\t\tfor (int i = 0; i < wp->n_safekeepers; i++)\n\t\t\t{\n\t\t\t\tsk = &wp->safekeeper[i];\n\t\t\t\tif (TimestampDifferenceExceeds(sk->latestMsgReceivedAt, now,\n\t\t\t\t\t\t\t\t\t\t\t   wp->config->safekeeper_connection_timeout))\n\t\t\t\t{\n\t\t\t\t\twp_log(WARNING, \"terminating connection to safekeeper '%s:%s' in '%s' state: no messages received during the last %dms or connection attempt took longer than that\",\n\t\t\t\t\t\t   sk->host, sk->port, FormatSafekeeperState(sk), wp->config->safekeeper_connection_timeout);\n\t\t\t\t\tShutdownConnection(sk);\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\t}\n}\n\nvoid\nWalProposerStart(WalProposer *wp)\n{\n\n\t/* Initiate connections to all safekeeper nodes */\n\tfor (int i = 0; i < wp->n_safekeepers; i++)\n\t{\n\t\tResetConnection(&wp->safekeeper[i]);\n\t}\n\n\tWalProposerLoop(wp);\n}\n\nstatic void\nWalProposerLoop(WalProposer *wp)\n{\n\twhile (true)\n\t\tWalProposerPoll(wp);\n}\n\n\n/* Shuts down and cleans up the connection for a safekeeper. Sets its state to SS_OFFLINE */\nstatic void\nShutdownConnection(Safekeeper *sk)\n{\n\tsk->state = SS_OFFLINE;\n\tsk->streamingAt = InvalidXLogRecPtr;\n\n\t/* BEGIN_HADRON */\n\tsk->wp->api.update_safekeeper_status_for_metrics(sk->wp, sk->index, 0);\n\t/* END_HADRON */\n\n\tMembershipConfigurationFree(&sk->greetResponse.mconf);\n\tif (sk->voteResponse.termHistory.entries)\n\t\tpfree(sk->voteResponse.termHistory.entries);\n\tsk->voteResponse.termHistory.entries = NULL;\n\n\tsk->wp->api.conn_finish(sk);\n\tsk->wp->api.rm_safekeeper_event_set(sk);\n}\n\n/*\n * This function is called to establish new connection or to reestablish\n * connection in case of connection failure.\n *\n * On success, sets the state to SS_CONNECTING_WRITE.\n */\nstatic void\nResetConnection(Safekeeper *sk)\n{\n\tWalProposer *wp = sk->wp;\n\n\tif (sk->state != SS_OFFLINE)\n\t{\n\t\tShutdownConnection(sk);\n\t}\n\n\t/*\n\t * Try to establish new connection, it will update sk->conn.\n\t */\n\twp->api.conn_connect_start(sk);\n\n\t/*\n\t * PQconnectStart won't actually start connecting until we run\n\t * PQconnectPoll. Before we do that though, we need to check that it\n\t * didn't immediately fail.\n\t */\n\tif (wp->api.conn_status(sk) == WP_CONNECTION_BAD)\n\t{\n\t\t/*---\n\t\t * According to libpq docs:\n\t\t *   \"If the result is CONNECTION_BAD, the connection attempt has already failed,\n\t\t *    typically because of invalid connection parameters.\"\n\t\t * We should report this failure. Do not print the exact `conninfo` as it may\n\t\t * contain e.g. password. The error message should already provide enough information.\n\t\t *\n\t\t * https://www.postgresql.org/docs/devel/libpq-connect.html#LIBPQ-PQCONNECTSTARTPARAMS\n\t\t */\n\t\twp_log(WARNING, \"immediate failure to connect with node '%s:%s':\\n\\terror: %s\",\n\t\t\t   sk->host, sk->port, wp->api.conn_error_message(sk));\n\n\t\t/*\n\t\t * Even though the connection failed, we still need to clean up the\n\t\t * object\n\t\t */\n\t\twp->api.conn_finish(sk);\n\t\treturn;\n\t}\n\n\t/*\n\t * The documentation for PQconnectStart states that we should call\n\t * PQconnectPoll in a loop until it returns PGRES_POLLING_OK or\n\t * PGRES_POLLING_FAILED. The other two possible returns indicate whether\n\t * we should wait for reading or writing on the socket. For the first\n\t * iteration of the loop, we're expected to wait until the socket becomes\n\t * writable.\n\t *\n\t * The wording of the documentation is a little ambiguous; thankfully\n\t * there's an example in the postgres source itself showing this behavior.\n\t * (see libpqrcv_connect, defined in\n\t * src/backend/replication/libpqwalreceiver/libpqwalreceiver.c)\n\t */\n\twp_log(LOG, \"connecting with node %s:%s\", sk->host, sk->port);\n\n\tsk->state = SS_CONNECTING_WRITE;\n\tsk->latestMsgReceivedAt = wp->api.get_current_timestamp(wp);\n\n\twp->api.add_safekeeper_event_set(sk, WL_SOCKET_WRITEABLE);\n\treturn;\n}\n\n/*\n * How much milliseconds left till we should attempt reconnection to\n * safekeepers? Returns 0 if it is already high time, -1 if we never reconnect\n * (do we actually need this?).\n */\nstatic long\nTimeToReconnect(WalProposer *wp, TimestampTz now)\n{\n\tTimestampTz passed;\n\tTimestampTz till_reconnect;\n\n\tif (wp->config->safekeeper_reconnect_timeout <= 0)\n\t\treturn -1;\n\n\tpassed = now - wp->last_reconnect_attempt;\n\ttill_reconnect = wp->config->safekeeper_reconnect_timeout * 1000 - passed;\n\tif (till_reconnect <= 0)\n\t\treturn 0;\n\treturn (long) (till_reconnect / 1000);\n}\n\n/* If the timeout has expired, attempt to reconnect to all offline safekeepers */\nstatic void\nReconnectSafekeepers(WalProposer *wp)\n{\n\tTimestampTz now = wp->api.get_current_timestamp(wp);\n\n\tif (TimeToReconnect(wp, now) == 0)\n\t{\n\t\twp->last_reconnect_attempt = now;\n\t\tfor (int i = 0; i < wp->n_safekeepers; i++)\n\t\t{\n\t\t\tif (wp->safekeeper[i].state == SS_OFFLINE)\n\t\t\t\tResetConnection(&wp->safekeeper[i]);\n\t\t}\n\t}\n}\n\n/*\n * Performs the logic for advancing the state machine of the specified safekeeper,\n * given that a certain set of events has occurred.\n */\nstatic void\nAdvancePollState(Safekeeper *sk, uint32 events)\n{\n#ifdef WALPROPOSER_LIB\t\t\t/* wp_log needs wp in lib build */\n\tWalProposer *wp = sk->wp;\n#endif\n\n\t/*\n\t * Sanity check. We assume further down that the operations don't block\n\t * because the socket is ready.\n\t */\n\tAssertEventsOkForState(events, sk);\n\n\t/* Execute the code corresponding to the current state */\n\tswitch (sk->state)\n\t{\n\t\t\t/*\n\t\t\t * safekeepers are only taken out of SS_OFFLINE by calls to\n\t\t\t * ResetConnection\n\t\t\t */\n\t\tcase SS_OFFLINE:\n\t\t\twp_log(FATAL, \"unexpected safekeeper %s:%s state advancement: is offline\",\n\t\t\t\t   sk->host, sk->port);\n\t\t\tbreak;\t\t\t\t/* actually unreachable, but prevents\n\t\t\t\t\t\t\t\t * -Wimplicit-fallthrough */\n\n\t\t\t/*\n\t\t\t * Both connecting states run the same logic. The only difference\n\t\t\t * is the events they're expecting\n\t\t\t */\n\t\tcase SS_CONNECTING_READ:\n\t\tcase SS_CONNECTING_WRITE:\n\t\t\tHandleConnectionEvent(sk);\n\t\t\tbreak;\n\n\t\t\t/*\n\t\t\t * Waiting for a successful CopyBoth response.\n\t\t\t */\n\t\tcase SS_WAIT_EXEC_RESULT:\n\t\t\tRecvStartWALPushResult(sk);\n\t\t\tbreak;\n\n\t\t\t/*\n\t\t\t * Finish handshake comms: receive information about the\n\t\t\t * safekeeper.\n\t\t\t */\n\t\tcase SS_HANDSHAKE_RECV:\n\t\t\tRecvAcceptorGreeting(sk);\n\t\t\tbreak;\n\n\t\t\t/*\n\t\t\t * Voting is an idle state - we don't expect any events to\n\t\t\t * trigger. Refer to the execution of SS_HANDSHAKE_RECV to see how\n\t\t\t * nodes are transferred from SS_VOTING to sending actual vote\n\t\t\t * requests.\n\t\t\t */\n\t\tcase SS_WAIT_VOTING:\n\t\t\twp_log(WARNING, \"EOF from node %s:%s in %s state\", sk->host,\n\t\t\t\t   sk->port, FormatSafekeeperState(sk));\n\t\t\tResetConnection(sk);\n\t\t\treturn;\n\n\t\t\t/* Read the safekeeper response for our candidate */\n\t\tcase SS_WAIT_VERDICT:\n\t\t\tRecvVoteResponse(sk);\n\t\t\tbreak;\n\n\t\t\t/* Flush proposer announcement message */\n\t\tcase SS_SEND_ELECTED_FLUSH:\n\n\t\t\t/*\n\t\t\t * AsyncFlush ensures we only move on to SS_ACTIVE once the flush\n\t\t\t * completes. If we still have more to do, we'll wait until the\n\t\t\t * next poll comes along.\n\t\t\t */\n\t\t\tif (!AsyncFlush(sk))\n\t\t\t\treturn;\n\n\t\t\t/* flush is done, event set and state will be updated later */\n\t\t\tStartStreaming(sk);\n\t\t\tbreak;\n\n\t\t\t/*\n\t\t\t * Idle state for waiting votes from quorum.\n\t\t\t */\n\t\tcase SS_WAIT_ELECTED:\n\t\t\twp_log(WARNING, \"EOF from node %s:%s in %s state\", sk->host,\n\t\t\t\t   sk->port, FormatSafekeeperState(sk));\n\t\t\tResetConnection(sk);\n\t\t\treturn;\n\n\t\t\t/*\n\t\t\t * Active state is used for streaming WAL and receiving feedback.\n\t\t\t */\n\t\tcase SS_ACTIVE:\n\t\t\tHandleActiveState(sk, events);\n\t\t\tbreak;\n\t}\n}\n\nstatic void\nHandleConnectionEvent(Safekeeper *sk)\n{\n\tWalProposer *wp = sk->wp;\n\tWalProposerConnectPollStatusType result = wp->api.conn_connect_poll(sk);\n\n\t/* The new set of events we'll wait on, after updating */\n\tuint32\t\tnew_events = WL_NO_EVENTS;\n\n\tswitch (result)\n\t{\n\t\tcase WP_CONN_POLLING_OK:\n\t\t\twp_log(LOG, \"connected with node %s:%s\", sk->host,\n\t\t\t\t   sk->port);\n\t\t\tsk->latestMsgReceivedAt = wp->api.get_current_timestamp(wp);\n\n\t\t\t/*\n\t\t\t * We have to pick some event to update event set. We'll\n\t\t\t * eventually need the socket to be readable, so we go with that.\n\t\t\t */\n\t\t\tnew_events = WL_SOCKET_READABLE;\n\t\t\tbreak;\n\n\t\t\t/*\n\t\t\t * If we need to poll to finish connecting, continue doing that\n\t\t\t */\n\t\tcase WP_CONN_POLLING_READING:\n\t\t\tsk->state = SS_CONNECTING_READ;\n\t\t\tnew_events = WL_SOCKET_READABLE;\n\t\t\tbreak;\n\t\tcase WP_CONN_POLLING_WRITING:\n\t\t\tsk->state = SS_CONNECTING_WRITE;\n\t\t\tnew_events = WL_SOCKET_WRITEABLE;\n\t\t\tbreak;\n\n\t\tcase WP_CONN_POLLING_FAILED:\n\t\t\twp_log(WARNING, \"failed to connect to node '%s:%s': %s\",\n\t\t\t\t   sk->host, sk->port, wp->api.conn_error_message(sk));\n\n\t\t\t/*\n\t\t\t * If connecting failed, we don't want to restart the connection\n\t\t\t * because that might run us into a loop. Instead, shut it down --\n\t\t\t * it'll naturally restart at a slower interval on calls to\n\t\t\t * ReconnectSafekeepers.\n\t\t\t */\n\t\t\tShutdownConnection(sk);\n\t\t\treturn;\n\t}\n\n\t/*\n\t * Because PQconnectPoll can change the socket, we have to un-register the\n\t * old event and re-register an event on the new socket.\n\t */\n\twp->api.rm_safekeeper_event_set(sk);\n\twp->api.add_safekeeper_event_set(sk, new_events);\n\n\t/* If we successfully connected, send START_WAL_PUSH query */\n\tif (result == WP_CONN_POLLING_OK)\n\t\tSendStartWALPush(sk);\n}\n\n/*\n * Send \"START_WAL_PUSH\" message as an empty query to the safekeeper. Performs\n * a blocking send, then immediately moves to SS_WAIT_EXEC_RESULT. If something\n * goes wrong, change state to SS_OFFLINE and shutdown the connection.\n */\nstatic void\nSendStartWALPush(Safekeeper *sk)\n{\n\tWalProposer *wp = sk->wp;\n\n\t/* Forbid implicit timeline creation if generations are enabled. */\n\tchar\t   *allow_timeline_creation = WalProposerGenerationsEnabled(wp) ? \"false\" : \"true\";\n#define CMD_LEN 512\n\tchar\t\tcmd[CMD_LEN];\n\n\n\tsnprintf(cmd, CMD_LEN, \"START_WAL_PUSH (proto_version '%d', allow_timeline_creation '%s')\", wp->config->proto_version, allow_timeline_creation);\n\tif (!wp->api.conn_send_query(sk, cmd))\n\t{\n\t\twp_log(WARNING, \"failed to send '%s' query to safekeeper %s:%s: %s\",\n\t\t\t   cmd, sk->host, sk->port, wp->api.conn_error_message(sk));\n\t\tShutdownConnection(sk);\n\t\treturn;\n\t}\n\tsk->state = SS_WAIT_EXEC_RESULT;\n\twp->api.update_event_set(sk, WL_SOCKET_READABLE);\n}\n\nstatic void\nRecvStartWALPushResult(Safekeeper *sk)\n{\n\tWalProposer *wp = sk->wp;\n\n\tswitch (wp->api.conn_get_query_result(sk))\n\t{\n\t\t\t/*\n\t\t\t * Successful result, move on to starting the handshake\n\t\t\t */\n\t\tcase WP_EXEC_SUCCESS_COPYBOTH:\n\n\t\t\tSendProposerGreeting(sk);\n\t\t\tbreak;\n\n\t\t\t/*\n\t\t\t * Needs repeated calls to finish. Wait until the socket is\n\t\t\t * readable\n\t\t\t */\n\t\tcase WP_EXEC_NEEDS_INPUT:\n\n\t\t\t/*\n\t\t\t * SS_WAIT_EXEC_RESULT is always reached through an event, so we\n\t\t\t * don't need to update the event set\n\t\t\t */\n\t\t\tbreak;\n\n\t\tcase WP_EXEC_FAILED:\n\t\t\twp_log(WARNING, \"failed to send query to safekeeper %s:%s: %s\",\n\t\t\t\t   sk->host, sk->port, wp->api.conn_error_message(sk));\n\t\t\tShutdownConnection(sk);\n\t\t\treturn;\n\n\t\t\t/*\n\t\t\t * Unexpected result -- funamdentally an error, but we want to\n\t\t\t * produce a custom message, rather than a generic \"something went\n\t\t\t * wrong\"\n\t\t\t */\n\t\tcase WP_EXEC_UNEXPECTED_SUCCESS:\n\t\t\twp_log(WARNING, \"received bad response from safekeeper %s:%s query execution\",\n\t\t\t\t   sk->host, sk->port);\n\t\t\tShutdownConnection(sk);\n\t\t\treturn;\n\t}\n}\n\n/*\n * Start handshake: first of all send information about the\n * walproposer. After sending, we wait on SS_HANDSHAKE_RECV for\n * a response to finish the handshake.\n */\nstatic void\nSendProposerGreeting(Safekeeper *sk)\n{\n\tWalProposer *wp = sk->wp;\n\tchar\t   *mconf_toml = MembershipConfigurationToString(&wp->greetRequest.mconf);\n\n\twp_log(LOG, \"sending ProposerGreeting to safekeeper %s:%s with mconf = %s\", sk->host, sk->port, mconf_toml);\n\tpfree(mconf_toml);\n\n\tPAMessageSerialize(wp, (ProposerAcceptorMessage *) &wp->greetRequest,\n\t\t\t\t\t   &sk->outbuf, wp->config->proto_version);\n\n\t/*\n\t * On failure, logging & resetting the connection is handled. We just need\n\t * to handle the control flow.\n\t */\n\tBlockingWrite(sk, sk->outbuf.data, sk->outbuf.len, SS_HANDSHAKE_RECV);\n}\n\n/*\n * Assuming `sk` sent its node id, find such member(s) in wp->mconf and set ptr in\n * members_safekeepers & new_members_safekeepers to sk.\n */\nstatic void\nUpdateMemberSafekeeperPtr(WalProposer *wp, Safekeeper *sk)\n{\n\t/* members_safekeepers etc are fixed size, sanity check mconf size */\n\tif (wp->mconf.members.len > MAX_SAFEKEEPERS)\n\t\twp_log(FATAL, \"too many members %d in mconf\", wp->mconf.members.len);\n\tif (wp->mconf.new_members.len > MAX_SAFEKEEPERS)\n\t\twp_log(FATAL, \"too many new_members %d in mconf\", wp->mconf.new_members.len);\n\n\t/* node id is not known until greeting is received */\n\tif (sk->state < SS_WAIT_VOTING)\n\t\treturn;\n\n\t/* 0 is assumed to be invalid node id, should never happen */\n\tif (sk->greetResponse.nodeId == 0)\n\t{\n\t\twp_log(WARNING, \"safekeeper %s:%s sent zero node id\", sk->host, sk->port);\n\t\treturn;\n\t}\n\n\tfor (uint32 i = 0; i < wp->mconf.members.len; i++)\n\t{\n\t\tSafekeeperId *sk_id = &wp->mconf.members.m[i];\n\n\t\tif (sk_id->node_id == sk->greetResponse.nodeId)\n\t\t{\n\t\t\t/*\n\t\t\t * If mconf or list of safekeepers to connect to changed (the\n\t\t\t * latter always currently goes through restart though),\n\t\t\t * ResetMemberSafekeeperPtrs is expected to be called before\n\t\t\t * UpdateMemberSafekeeperPtr. So, other value suggests that we are\n\t\t\t * connected to the same sk under different host name, complain\n\t\t\t * about that.\n\t\t\t */\n\t\t\tif (wp->members_safekeepers[i] != NULL && wp->members_safekeepers[i] != sk)\n\t\t\t{\n\t\t\t\twp_log(WARNING, \"safekeeper {id = %lu, ep = %s:%u } in members[%u] is already mapped to connection slot %lu\",\n\t\t\t\t\t   sk_id->node_id, sk_id->host, sk_id->port, i, wp->members_safekeepers[i] - wp->safekeeper);\n\t\t\t}\n\t\t\twp_log(LOG, \"safekeeper {id = %lu, ep = %s:%u } in members[%u] mapped to connection slot %lu\",\n\t\t\t\t   sk_id->node_id, sk_id->host, sk_id->port, i, sk - wp->safekeeper);\n\t\t\twp->members_safekeepers[i] = sk;\n\t\t}\n\t}\n\t/* repeat for new_members */\n\tfor (uint32 i = 0; i < wp->mconf.new_members.len; i++)\n\t{\n\t\tSafekeeperId *sk_id = &wp->mconf.new_members.m[i];\n\n\t\tif (sk_id->node_id == sk->greetResponse.nodeId)\n\t\t{\n\t\t\tif (wp->new_members_safekeepers[i] != NULL && wp->new_members_safekeepers[i] != sk)\n\t\t\t{\n\t\t\t\twp_log(WARNING, \"safekeeper {id = %lu, ep = %s:%u } in new_members[%u] is already mapped to connection slot %lu\",\n\t\t\t\t\t   sk_id->node_id, sk_id->host, sk_id->port, i, wp->new_members_safekeepers[i] - wp->safekeeper);\n\t\t\t}\n\t\t\twp_log(LOG, \"safekeeper {id = %lu, ep = %s:%u } in new_members[%u] mapped to connection slot %lu\",\n\t\t\t\t   sk_id->node_id, sk_id->host, sk_id->port, i, sk - wp->safekeeper);\n\t\t\twp->new_members_safekeepers[i] = sk;\n\t\t}\n\t}\n}\n\n/*\n * Reset wp->members_safekeepers & new_members_safekeepers and refill them.\n * Called after wp changes mconf.\n */\nstatic void\nResetMemberSafekeeperPtrs(WalProposer *wp)\n{\n\tmemset(&wp->members_safekeepers, 0, sizeof(Safekeeper *) * MAX_SAFEKEEPERS);\n\tmemset(&wp->new_members_safekeepers, 0, sizeof(Safekeeper *) * MAX_SAFEKEEPERS);\n\tfor (int i = 0; i < wp->n_safekeepers; i++)\n\t{\n\t\tif (wp->safekeeper[i].state >= SS_WAIT_VOTING)\n\t\t\tUpdateMemberSafekeeperPtr(wp, &wp->safekeeper[i]);\n\t}\n}\n\nstatic uint32\nMsetQuorum(MemberSet *mset)\n{\n\tAssert(mset->len > 0);\n\treturn mset->len / 2 + 1;\n}\n\n/* Does n forms quorum in mset? */\nstatic bool\nMsetHasQuorum(MemberSet *mset, uint32 n)\n{\n\treturn n >= MsetQuorum(mset);\n}\n\n/*\n * TermsCollected helper for a single member set `mset`.\n *\n * `msk` is the member -> safekeeper mapping for mset, i.e. members_safekeepers\n * or new_members_safekeepers.\n */\nstatic bool\nTermsCollectedMset(WalProposer *wp, MemberSet *mset, Safekeeper **msk, StringInfo s)\n{\n\tuint32\t\tn_greeted = 0;\n\n\tfor (uint32 i = 0; i < mset->len; i++)\n\t{\n\t\tSafekeeper *sk = msk[i];\n\n\t\tif (sk != NULL && sk->state == SS_WAIT_VOTING)\n\t\t{\n\t\t\tif (n_greeted > 0)\n\t\t\t\tappendStringInfoString(s, \", \");\n\t\t\tappendStringInfo(s, \"{id = %lu, ep = %s:%s}\", sk->greetResponse.nodeId, sk->host, sk->port);\n\t\t\tn_greeted++;\n\t\t}\n\t}\n\tappendStringInfo(s, \", %u/%u total\", n_greeted, mset->len);\n\treturn MsetHasQuorum(mset, n_greeted);\n}\n\n/*\n * Have we received greeting from enough (quorum) safekeepers to start voting?\n */\nstatic bool\nTermsCollected(WalProposer *wp)\n{\n\tStringInfoData s;\t\t\t/* str for logging */\n\tbool\t\tcollected = false;\n\n\t/* legacy: generations disabled */\n\tif (!WalProposerGenerationsEnabled(wp) && wp->mconf.generation == INVALID_GENERATION)\n\t{\n\t\tcollected = wp->n_connected >= wp->quorum;\n\t\tif (collected)\n\t\t{\n\t\t\twp->propTerm++;\n\t\t\twp_log(LOG, \"walproposer connected to quorum (%d) safekeepers, propTerm=\" INT64_FORMAT \", starting voting\", wp->quorum, wp->propTerm);\n\t\t}\n\t\treturn collected;\n\t}\n\n\t/*\n\t * With generations enabled, we start campaign only when 1) some mconf is\n\t * actually received 2) we have greetings from majority of members as well\n\t * as from majority of new_members if it exists.\n\t */\n\tif (wp->mconf.generation == INVALID_GENERATION)\n\t\treturn false;\n\n\tinitStringInfo(&s);\n\tappendStringInfoString(&s, \"mset greeters: \");\n\tif (!TermsCollectedMset(wp, &wp->mconf.members, wp->members_safekeepers, &s))\n\t\tgoto res;\n\tif (wp->mconf.new_members.len > 0)\n\t{\n\t\tappendStringInfoString(&s, \", new_mset greeters: \");\n\t\tif (!TermsCollectedMset(wp, &wp->mconf.new_members, wp->new_members_safekeepers, &s))\n\t\t\tgoto res;\n\t}\n\twp->propTerm++;\n\twp_log(LOG, \"walproposer connected to quorum of safekeepers: %s, propTerm=\" INT64_FORMAT \", starting voting\", s.data, wp->propTerm);\n\tcollected = true;\n\nres:\n\tpfree(s.data);\n\treturn collected;\n}\n\nstatic void\nRecvAcceptorGreeting(Safekeeper *sk)\n{\n\tWalProposer *wp = sk->wp;\n\tchar\t   *mconf_toml;\n\n\t/*\n\t * If our reading doesn't immediately succeed, any necessary error\n\t * handling or state setting is taken care of. We can leave any other work\n\t * until later.\n\t */\n\tsk->greetResponse.apm.tag = 'g';\n\tif (!AsyncReadMessage(sk, (AcceptorProposerMessage *) &sk->greetResponse))\n\t\treturn;\n\n\tmconf_toml = MembershipConfigurationToString(&sk->greetResponse.mconf);\n\twp_log(LOG, \"received AcceptorGreeting from safekeeper %s:%s, node_id = %lu, mconf = %s, term=\" UINT64_FORMAT,\n\t\t   sk->host, sk->port, sk->greetResponse.nodeId, mconf_toml, sk->greetResponse.term);\n\tpfree(mconf_toml);\n\n\t/*\n\t * Adopt mconf of safekeepers if it is higher.\n\t */\n\tif (sk->greetResponse.mconf.generation > wp->mconf.generation)\n\t{\n\t\t/* sanity check before adopting, should never happen */\n\t\tif (sk->greetResponse.mconf.members.len == 0)\n\t\t{\n\t\t\twp_log(FATAL, \"mconf %u has zero members\", sk->greetResponse.mconf.generation);\n\t\t}\n\n\t\t/*\n\t\t * If we at least started campaign, restart wp to get elected in the\n\t\t * new mconf. Note: in principle once wp is already elected\n\t\t * re-election is not required, but being conservative here is not\n\t\t * bad.\n\t\t *\n\t\t * TODO: put mconf to shmem to immediately pick it up on start,\n\t\t * otherwise if some safekeeper(s) misses latest mconf and gets\n\t\t * connected the first, it may cause redundant restarts here.\n\t\t *\n\t\t * More generally, it would be nice to restart walproposer (wiping\n\t\t * election state) without restarting the process. In particular, that\n\t\t * would allow sync-safekeepers not to die here if it intersected with\n\t\t * sk migration (as well as remove 1s delay).\n\t\t *\n\t\t * Note that assign_neon_safekeepers also currently restarts the\n\t\t * process, so during normal migration walproposer may restart twice.\n\t\t */\n\t\tif (wp->state >= WPS_CAMPAIGN)\n\t\t{\n\t\t\twp_log(FATAL, \"restarting to adopt mconf generation %d\", sk->greetResponse.mconf.generation);\n\t\t}\n\t\tMembershipConfigurationFree(&wp->mconf);\n\t\tMembershipConfigurationCopy(&sk->greetResponse.mconf, &wp->mconf);\n\t\tResetMemberSafekeeperPtrs(wp);\n\t\t/* full conf was just logged above */\n\t\twp_log(LOG, \"changed mconf to generation %u\", wp->mconf.generation);\n\t}\n\n\t/* Protocol is all good, move to voting. */\n\tsk->state = SS_WAIT_VOTING;\n\n\t/* In greeting safekeeper sent its id; update mappings accordingly. */\n\tUpdateMemberSafekeeperPtr(wp, sk);\n\n\t/*\n\t * Note: it would be better to track the counter on per safekeeper basis,\n\t * but at worst walproposer would restart with 'term rejected', so leave\n\t * as is for now.\n\t */\n\t++wp->n_connected;\n\tif (wp->state == WPS_COLLECTING_TERMS)\n\t{\n\t\t/* We're still collecting terms from the majority. */\n\t\twp->propTerm = Max(sk->greetResponse.term, wp->propTerm);\n\n\t\t/* Quorum is acquired, prepare the vote request. */\n\t\tif (TermsCollected(wp))\n\t\t{\n\t\t\twp->state = WPS_CAMPAIGN;\n\t\t\twp->voteRequest.pam.tag = 'v';\n\t\t\twp->voteRequest.generation = wp->mconf.generation;\n\t\t\twp->voteRequest.term = wp->propTerm;\n\t\t}\n\t}\n\telse if (sk->greetResponse.term > wp->propTerm)\n\t{\n\t\t/* Another compute with higher term is running. */\n\t\twp_log(FATAL, \"WAL acceptor %s:%s with term \" INT64_FORMAT \" rejects our connection request with term \" INT64_FORMAT \"\",\n\t\t\t   sk->host, sk->port,\n\t\t\t   sk->greetResponse.term, wp->propTerm);\n\t}\n\n\t/*\n\t * If we have quorum, start (or just send vote request to newly connected\n\t * node) election, otherwise wait until we have more greetings.\n\t */\n\tif (wp->state == WPS_COLLECTING_TERMS)\n\t{\n\t\t/*\n\t\t * SS_VOTING is an idle state; read-ready indicates the connection\n\t\t * closed.\n\t\t */\n\t\twp->api.update_event_set(sk, WL_SOCKET_READABLE);\n\t}\n\telse\n\t{\n\t\t/*\n\t\t * Now send voting request to the cohort and wait responses\n\t\t */\n\t\tfor (int j = 0; j < wp->n_safekeepers; j++)\n\t\t{\n\t\t\tif (wp->safekeeper[j].state == SS_WAIT_VOTING)\n\t\t\t\tSendVoteRequest(&wp->safekeeper[j]);\n\t\t}\n\t}\n}\n\nstatic void\nSendVoteRequest(Safekeeper *sk)\n{\n\tWalProposer *wp = sk->wp;\n\n\tPAMessageSerialize(wp, (ProposerAcceptorMessage *) &wp->voteRequest,\n\t\t\t\t\t   &sk->outbuf, wp->config->proto_version);\n\n\t/* We have quorum for voting, send our vote request */\n\twp_log(LOG, \"requesting vote from sk {id = %lu, ep = %s:%s} for generation %u term \" UINT64_FORMAT,\n\t\t   sk->greetResponse.nodeId, sk->host, sk->port, wp->voteRequest.generation, wp->voteRequest.term);\n\t/* On failure, logging & resetting is handled */\n\tBlockingWrite(sk, sk->outbuf.data, sk->outbuf.len, SS_WAIT_VERDICT);\n\t/* If successful, wait for read-ready with SS_WAIT_VERDICT */\n}\n\nstatic void\nRecvVoteResponse(Safekeeper *sk)\n{\n\tWalProposer *wp = sk->wp;\n\n\tAssert(wp->state >= WPS_CAMPAIGN);\n\n\tsk->voteResponse.apm.tag = 'v';\n\tif (!AsyncReadMessage(sk, (AcceptorProposerMessage *) &sk->voteResponse))\n\t\treturn;\n\n\twp_log(LOG,\n\t\t   \"got VoteResponse from sk {id = %lu, ep = %s:%s}, generation=%u, term=%lu, voteGiven=%u, last_log_term=\" UINT64_FORMAT \", flushLsn=%X/%X, truncateLsn=%X/%X\",\n\t\t   sk->greetResponse.nodeId, sk->host, sk->port, sk->voteResponse.generation, sk->voteResponse.term,\n\t\t   sk->voteResponse.voteGiven,\n\t\t   GetHighestTerm(&sk->voteResponse.termHistory),\n\t\t   LSN_FORMAT_ARGS(sk->voteResponse.flushLsn),\n\t\t   LSN_FORMAT_ARGS(sk->voteResponse.truncateLsn));\n\n\t/*\n\t * In case of acceptor rejecting our vote, bail out, but only if either it\n\t * already lives in strictly higher term (concurrent compute spotted) or\n\t * we are not elected yet and thus need the vote.\n\t */\n\tif ((!sk->voteResponse.voteGiven) &&\n\t\t(sk->voteResponse.term > wp->propTerm || wp->state == WPS_CAMPAIGN))\n\t{\n\t\twp_log(FATAL, \"WAL acceptor %s:%s with term \" INT64_FORMAT \" rejects our connection request with term \" INT64_FORMAT \"\",\n\t\t\t   sk->host, sk->port,\n\t\t\t   sk->voteResponse.term, wp->propTerm);\n\t}\n\tAssert(sk->voteResponse.term == wp->propTerm);\n\n\t/* ready for elected message */\n\tsk->state = SS_WAIT_ELECTED;\n\n\t/* Are we already elected? */\n\tif (wp->state == WPS_CAMPAIGN)\n\t{\n\t\t/* no; check if this vote makes us elected */\n\t\tif (VotesCollected(wp))\n\t\t{\n\t\t\twp->state = WPS_ELECTED;\n\t\t\tHandleElectedProposer(wp);\n\t\t}\n\t\telse\n\t\t{\n\t\t\t/* can't do much yet, no quorum */\n\t\t\treturn;\n\t\t}\n\t}\n\telse\n\t{\n\t\tAssert(wp->state == WPS_ELECTED);\n\t\t/* send elected only to this sk */\n\t\tSendProposerElected(sk);\n\t}\n}\n\n/*\n * VotesCollected helper for a single member set `mset`.\n *\n * `msk` is the member -> safekeeper mapping for mset, i.e. members_safekeepers\n * or new_members_safekeepers.\n */\nstatic bool\nVotesCollectedMset(WalProposer *wp, MemberSet *mset, Safekeeper **msk, StringInfo s)\n{\n\tuint32\t\tn_votes = 0;\n\n\tfor (uint32 i = 0; i < mset->len; i++)\n\t{\n\t\tSafekeeper *sk = msk[i];\n\n\t\tif (sk != NULL && sk->state == SS_WAIT_ELECTED)\n\t\t{\n\t\t\tAssert(sk->voteResponse.voteGiven);\n\n\t\t\t/*\n\t\t\t * Find the highest vote. NULL check is for the legacy case where\n\t\t\t * safekeeper might be not initialized with LSN at all and return\n\t\t\t * 0 LSN in the vote response; we still want to set donor to\n\t\t\t * something in this case.\n\t\t\t */\n\t\t\tif (GetLastLogTerm(sk) > wp->donorLastLogTerm ||\n\t\t\t\t(GetLastLogTerm(sk) == wp->donorLastLogTerm &&\n\t\t\t\t sk->voteResponse.flushLsn > wp->propTermStartLsn) ||\n\t\t\t\twp->donor == NULL)\n\t\t\t{\n\t\t\t\twp->donorLastLogTerm = GetLastLogTerm(sk);\n\t\t\t\twp->propTermStartLsn = sk->voteResponse.flushLsn;\n\t\t\t\twp->donor = sk;\n\t\t\t}\n\t\t\twp->truncateLsn = Max(sk->voteResponse.truncateLsn, wp->truncateLsn);\n\n\t\t\tif (n_votes > 0)\n\t\t\t\tappendStringInfoString(s, \", \");\n\t\t\tappendStringInfo(s, \"{id = %lu, ep = %s:%s}\", sk->greetResponse.nodeId, sk->host, sk->port);\n\t\t\tn_votes++;\n\t\t}\n\t}\n\tappendStringInfo(s, \", %u/%u total\", n_votes, mset->len);\n\treturn MsetHasQuorum(mset, n_votes);\n}\n\n\n/*\n * Checks if enough votes has been collected to get elected and if that's the\n * case finds the highest vote, setting donor, donorLastLogTerm,\n * propTermStartLsn fields. Also sets truncateLsn.\n */\nstatic bool\nVotesCollected(WalProposer *wp)\n{\n\tStringInfoData s;\t\t\t/* str for logging */\n\tbool\t\tcollected = false;\n\n\t/* assumed to be called only when not elected yet */\n\tAssert(wp->state == WPS_CAMPAIGN);\n\n\twp->propTermStartLsn = InvalidXLogRecPtr;\n\twp->donorLastLogTerm = 0;\n\twp->truncateLsn = InvalidXLogRecPtr;\n\n\t/* legacy: generations disabled */\n\tif (!WalProposerGenerationsEnabled(wp) && wp->mconf.generation == INVALID_GENERATION)\n\t{\n\t\tint\t\t\tn_ready = 0;\n\n\t\tfor (int i = 0; i < wp->n_safekeepers; i++)\n\t\t{\n\t\t\tif (wp->safekeeper[i].state == SS_WAIT_ELECTED)\n\t\t\t{\n\t\t\t\tn_ready++;\n\n\t\t\t\tif (GetLastLogTerm(&wp->safekeeper[i]) > wp->donorLastLogTerm ||\n\t\t\t\t\t(GetLastLogTerm(&wp->safekeeper[i]) == wp->donorLastLogTerm &&\n\t\t\t\t\t wp->safekeeper[i].voteResponse.flushLsn > wp->propTermStartLsn) ||\n\t\t\t\t\twp->donor == NULL)\n\t\t\t\t{\n\t\t\t\t\twp->donorLastLogTerm = GetLastLogTerm(&wp->safekeeper[i]);\n\t\t\t\t\twp->propTermStartLsn = wp->safekeeper[i].voteResponse.flushLsn;\n\t\t\t\t\twp->donor = &wp->safekeeper[i];\n\t\t\t\t}\n\t\t\t\twp->truncateLsn = Max(wp->safekeeper[i].voteResponse.truncateLsn, wp->truncateLsn);\n\t\t\t}\n\t\t}\n\t\tcollected = n_ready >= wp->quorum;\n\t\tif (collected)\n\t\t{\n\t\t\twp_log(LOG, \"walproposer elected with %d/%d votes\", n_ready, wp->n_safekeepers);\n\t\t}\n\t\treturn collected;\n\t}\n\n\t/*\n\t * if generations are enabled we're expected to get to voting only when\n\t * mconf is established.\n\t */\n\tAssert(wp->mconf.generation != INVALID_GENERATION);\n\n\t/*\n\t * We must get votes from both msets if both are present.\n\t */\n\tinitStringInfo(&s);\n\tappendStringInfoString(&s, \"mset voters: \");\n\tif (!VotesCollectedMset(wp, &wp->mconf.members, wp->members_safekeepers, &s))\n\t\tgoto res;\n\tif (wp->mconf.new_members.len > 0)\n\t{\n\t\tappendStringInfoString(&s, \", new_mset voters: \");\n\t\tif (!VotesCollectedMset(wp, &wp->mconf.new_members, wp->new_members_safekeepers, &s))\n\t\t\tgoto res;\n\t}\n\twp_log(LOG, \"walproposer elected, %s\", s.data);\n\tcollected = true;\n\nres:\n\tpfree(s.data);\n\treturn collected;\n}\n\n/*\n * Called once a majority of acceptors have voted for us and current proposer\n * has been elected.\n *\n * Sends ProposerElected message to all acceptors in SS_WAIT_ELECTED state and starts\n * replication from walsender.\n */\nstatic void\nHandleElectedProposer(WalProposer *wp)\n{\n\tProcessPropStartPos(wp);\n\tAssert(wp->propTermStartLsn != InvalidXLogRecPtr);\n\n\t/*\n\t * Synchronously download WAL from the most advanced safekeeper. We do\n\t * that only for logical replication (and switching logical walsenders to\n\t * neon_walreader is a todo.)\n\t */\n\tif (!wp->api.recovery_download(wp, wp->donor))\n\t{\n\t\twp_log(FATAL, \"failed to download WAL for logical replicaiton\");\n\t}\n\n\tif (wp->truncateLsn == wp->propTermStartLsn && wp->config->syncSafekeepers)\n\t{\n\t\t/* Sync is not needed: just exit */\n\t\twp->api.finish_sync_safekeepers(wp, wp->propTermStartLsn);\n\t\t/* unreachable */\n\t}\n\n\tfor (int i = 0; i < wp->n_safekeepers; i++)\n\t{\n\t\tif (wp->safekeeper[i].state == SS_WAIT_ELECTED)\n\t\t\tSendProposerElected(&wp->safekeeper[i]);\n\t}\n\n\t/*\n\t * The proposer has been elected, and there will be no quorum waiting\n\t * after this point. There will be no safekeeper with state\n\t * SS_WAIT_ELECTED also, because that state is used only for quorum\n\t * waiting.\n\t */\n\n\tif (wp->config->syncSafekeepers)\n\t{\n\t\t/*\n\t\t * Send empty message to enforce receiving feedback even from nodes\n\t\t * who are fully recovered; this is required to learn they switched\n\t\t * epoch which finishes sync-safeekepers who doesn't generate any real\n\t\t * new records. Will go away once we switch to async acks.\n\t\t */\n\t\tBroadcastAppendRequest(wp);\n\n\t\t/* keep polling until all safekeepers are synced */\n\t\treturn;\n\t}\n\n\twp->api.start_streaming(wp, wp->propTermStartLsn);\n\t/* Should not return here */\n}\n\n/* latest term in TermHistory, or 0 is there is no entries */\nstatic term_t\nGetHighestTerm(TermHistory *th)\n{\n\treturn th->n_entries > 0 ? th->entries[th->n_entries - 1].term : 0;\n}\n\n/* safekeeper's epoch is the term of the highest entry in the log */\nstatic term_t\nGetLastLogTerm(Safekeeper *sk)\n{\n\treturn GetHighestTerm(&sk->voteResponse.termHistory);\n}\n\n/* If LSN points to the page header, skip it */\nstatic XLogRecPtr\nSkipXLogPageHeader(WalProposer *wp, XLogRecPtr lsn)\n{\n\tif (XLogSegmentOffset(lsn, wp->config->wal_segment_size) == 0)\n\t{\n\t\tlsn += SizeOfXLogLongPHD;\n\t}\n\telse if (lsn % XLOG_BLCKSZ == 0)\n\t{\n\t\tlsn += SizeOfXLogShortPHD;\n\t}\n\treturn lsn;\n}\n\n/*\n * Called after quorum gave votes and proposer starting position (highest vote\n * term + flush LSN) -- is determined (VotesCollected true), this function\n * adopts it: pushes LSN to shmem, sets wp term history, verifies that the\n * basebackup matches.\n */\nstatic void\nProcessPropStartPos(WalProposer *wp)\n{\n\tTermHistory *dth;\n\tWalproposerShmemState *walprop_shared;\n\n\t/* must have collected votes */\n\tAssert(wp->state == WPS_ELECTED);\n\n\t/*\n\t * If propTermStartLsn is 0, it means flushLsn is 0 everywhere, we are\n\t * bootstrapping and nothing was committed yet. Start streaming from the\n\t * basebackup LSN then.\n\t *\n\t * In case of sync-safekeepers just exit: proceeding is not only pointless\n\t * but harmful, because we'd give safekeepers term history starting with\n\t * 0/0. These hacks will go away once we disable implicit timeline\n\t * creation on safekeepers and create it with non zero LSN from the start.\n\t */\n\tif (wp->propTermStartLsn == InvalidXLogRecPtr)\n\t{\n\t\tif (!wp->config->syncSafekeepers)\n\t\t{\n\t\t\twp->propTermStartLsn = wp->truncateLsn = wp->api.get_redo_start_lsn(wp);\n\t\t\twp_log(LOG, \"bumped epochStartLsn to the first record %X/%X\", LSN_FORMAT_ARGS(wp->propTermStartLsn));\n\t\t}\n\t\telse\n\t\t{\n\t\t\twp_log(LOG, \"elected with zero propTermStartLsn in sync-safekeepers, exiting\");\n\t\t\twp->api.finish_sync_safekeepers(wp, wp->propTermStartLsn);\n\t\t}\n\t}\n\tpg_atomic_write_u64(&wp->api.get_shmem_state(wp)->propEpochStartLsn, wp->propTermStartLsn);\n\n\tAssert(wp->truncateLsn != InvalidXLogRecPtr || wp->config->syncSafekeepers);\n\n\t/*\n\t * We will be generating WAL since propTermStartLsn, so we should set\n\t * availableLsn to mark this LSN as the latest available position.\n\t */\n\twp->availableLsn = wp->propTermStartLsn;\n\n\t/*\n\t * Proposer's term history is the donor's + its own entry.\n\t */\n\tdth = &wp->donor->voteResponse.termHistory;\n\twp->propTermHistory.n_entries = dth->n_entries + 1;\n\twp->propTermHistory.entries = palloc(sizeof(TermSwitchEntry) * wp->propTermHistory.n_entries);\n\tif (dth->n_entries > 0)\n\t\tmemcpy(wp->propTermHistory.entries, dth->entries, sizeof(TermSwitchEntry) * dth->n_entries);\n\twp->propTermHistory.entries[wp->propTermHistory.n_entries - 1].term = wp->propTerm;\n\twp->propTermHistory.entries[wp->propTermHistory.n_entries - 1].lsn = wp->propTermStartLsn;\n\n\twp_log(LOG, \"walproposer elected in term \" UINT64_FORMAT \", epochStartLsn %X/%X, donor %s:%s, truncate_lsn %X/%X\",\n\t\t   wp->propTerm,\n\t\t   LSN_FORMAT_ARGS(wp->propTermStartLsn),\n\t\t   wp->donor->host, wp->donor->port,\n\t\t   LSN_FORMAT_ARGS(wp->truncateLsn));\n\n\t/*\n\t * Ensure the basebackup we are running (at RedoStartLsn) matches LSN\n\t * since which we are going to write according to the consensus. If not,\n\t * we must bail out, as clog and other non rel data is inconsistent.\n\t */\n\twalprop_shared = wp->api.get_shmem_state(wp);\n\tif (!wp->config->syncSafekeepers && !walprop_shared->replica_promote)\n\t{\n\t\t/*\n\t\t * Basebackup LSN always points to the beginning of the record (not\n\t\t * the page), as StartupXLOG most probably wants it this way.\n\t\t * Safekeepers don't skip header as they need continious stream of\n\t\t * data, so correct LSN for comparison.\n\t\t */\n\t\tif (SkipXLogPageHeader(wp, wp->propTermStartLsn) != wp->api.get_redo_start_lsn(wp))\n\t\t{\n\t\t\t/*\n\t\t\t * However, allow to proceed if last_log_term on the node which\n\t\t\t * gave the highest vote (i.e. point where we are going to start\n\t\t\t * writing) actually had been won by me; plain restart of\n\t\t\t * walproposer not intervened by concurrent compute which wrote\n\t\t\t * WAL is ok.\n\t\t\t *\n\t\t\t * This avoids compute crash after manual term_bump.\n\t\t\t */\n\t\t\tif (!((dth->n_entries >= 1) && (dth->entries[dth->n_entries - 1].term ==\n\t\t\t\t\t\t\t\t\t\t\tpg_atomic_read_u64(&walprop_shared->mineLastElectedTerm))))\n\t\t\t{\n\t\t\t\t/*\n\t\t\t\t * Panic to restart PG as we need to retake basebackup.\n\t\t\t\t * However, don't dump core as this is kinda expected\n\t\t\t\t * scenario.\n\t\t\t\t */\n\t\t\t\tdisable_core_dump();\n\t\t\t\twp_log(PANIC,\n\t\t\t\t\t   \"collected propTermStartLsn %X/%X, but basebackup LSN %X/%X\",\n\t\t\t\t\t   LSN_FORMAT_ARGS(wp->propTermStartLsn),\n\t\t\t\t\t   LSN_FORMAT_ARGS(wp->api.get_redo_start_lsn(wp)));\n\t\t\t}\n\t\t}\n\t}\n\tpg_atomic_write_u64(&walprop_shared->mineLastElectedTerm, wp->propTerm);\n}\n\n/*\n * Determine for sk the starting streaming point and send it message\n * 1) Announcing we are elected proposer (which immediately advances epoch if\n *    safekeeper is synced, being important for sync-safekeepers)\n * 2) Communicating starting streaming point -- safekeeper must truncate its WAL\n *    beyond it -- and history of term switching.\n *\n * Sets sk->startStreamingAt.\n */\nstatic void\nSendProposerElected(Safekeeper *sk)\n{\n\tWalProposer *wp = sk->wp;\n\tProposerElected msg;\n\tTermHistory *th;\n\tterm_t\t\tlastCommonTerm;\n\tint\t\t\tidx;\n\n\t/* Now that we are ready to send it's a good moment to create WAL reader */\n\twp->api.wal_reader_allocate(sk);\n\n\t/*\n\t * Determine start LSN by comparing safekeeper's log term switch history\n\t * and proposer's, searching for the divergence point.\n\t *\n\t * Note: there is a vanishingly small chance of no common point even if\n\t * there is some WAL on safekeeper, if immediately after bootstrap compute\n\t * wrote some WAL on single sk and died; we stream since the beginning\n\t * then.\n\t */\n\tth = &sk->voteResponse.termHistory;\n\n\t/* We must start somewhere. */\n\tAssert(wp->propTermHistory.n_entries >= 1);\n\n\tfor (idx = 0; idx < Min(wp->propTermHistory.n_entries, th->n_entries); idx++)\n\t{\n\t\tif (wp->propTermHistory.entries[idx].term != th->entries[idx].term)\n\t\t\tbreak;\n\t\t/* term must begin everywhere at the same point */\n\t\tAssert(wp->propTermHistory.entries[idx].lsn == th->entries[idx].lsn);\n\t}\n\tidx--;\t\t\t\t\t\t/* step back to the last common term */\n\tif (idx < 0)\n\t{\n\t\t/* safekeeper is empty or no common point, start from the beginning */\n\t\tsk->startStreamingAt = wp->propTermHistory.entries[0].lsn;\n\t\twp_log(LOG, \"no common point with sk %s:%s, streaming since first term at %X/%X, termHistory.n_entries=%u\",\n\t\t\t   sk->host, sk->port, LSN_FORMAT_ARGS(sk->startStreamingAt), wp->propTermHistory.n_entries);\n\t}\n\telse\n\t{\n\t\t/*\n\t\t * End of (common) term is the start of the next except it is the last\n\t\t * one; there it is flush_lsn in case of safekeeper or, in case of\n\t\t * proposer, LSN it is currently writing, but then we just pick\n\t\t * safekeeper pos as it obviously can't be higher.\n\t\t */\n\t\tif (wp->propTermHistory.entries[idx].term == wp->propTerm)\n\t\t{\n\t\t\tsk->startStreamingAt = sk->voteResponse.flushLsn;\n\t\t}\n\t\telse\n\t\t{\n\t\t\tXLogRecPtr\tpropEndLsn = wp->propTermHistory.entries[idx + 1].lsn;\n\t\t\tXLogRecPtr\tskEndLsn = (idx + 1 < th->n_entries ? th->entries[idx + 1].lsn : sk->voteResponse.flushLsn);\n\n\t\t\tsk->startStreamingAt = Min(propEndLsn, skEndLsn);\n\t\t}\n\t}\n\n\tAssert(sk->startStreamingAt <= wp->availableLsn);\n\n\tmsg.apm.tag = 'e';\n\tmsg.generation = wp->mconf.generation;\n\tmsg.term = wp->propTerm;\n\tmsg.startStreamingAt = sk->startStreamingAt;\n\tmsg.termHistory = &wp->propTermHistory;\n\n\tlastCommonTerm = idx >= 0 ? wp->propTermHistory.entries[idx].term : 0;\n\twp_log(LOG,\n\t\t   \"sending elected msg to node \" UINT64_FORMAT \" generation=%u term=\" UINT64_FORMAT \", startStreamingAt=%X/%X (lastCommonTerm=\" UINT64_FORMAT \"), termHistory.n_entries=%u to %s:%s\",\n\t\t   sk->greetResponse.nodeId, msg.generation, msg.term, LSN_FORMAT_ARGS(msg.startStreamingAt),\n\t\t   lastCommonTerm, msg.termHistory->n_entries, sk->host, sk->port);\n\n\tPAMessageSerialize(wp, (ProposerAcceptorMessage *) &msg, &sk->outbuf, wp->config->proto_version);\n\tif (!AsyncWrite(sk, sk->outbuf.data, sk->outbuf.len, SS_SEND_ELECTED_FLUSH))\n\t\treturn;\n\n\tStartStreaming(sk);\n}\n\n/*\n * Start streaming to safekeeper sk, always updates state to SS_ACTIVE and sets\n * correct event set.\n */\nstatic void\nStartStreaming(Safekeeper *sk)\n{\n\t/*\n\t * This is the only entrypoint to state SS_ACTIVE. It's executed exactly\n\t * once for a connection.\n\t */\n\tsk->state = SS_ACTIVE;\n\tsk->active_state = SS_ACTIVE_SEND;\n\tsk->streamingAt = sk->startStreamingAt;\n\n\t/* BEGIN_HADRON */\n\tsk->wp->api.update_safekeeper_status_for_metrics(sk->wp, sk->index, 1);\n\t/* END_HADRON */\n\n\t/*\n\t * Donors can only be in SS_ACTIVE state, so we potentially update the\n\t * donor when we switch one to SS_ACTIVE.\n\t */\n\tUpdateDonorShmem(sk->wp);\n\n\t/* event set will be updated inside SendMessageToNode */\n\tSendMessageToNode(sk);\n}\n\n/*\n * Try to send message to the particular node. Always updates event set. Will\n * send at least one message, if socket is ready.\n *\n * Can be used only for safekeepers in SS_ACTIVE state. State can be changed\n * in case of errors.\n */\nstatic void\nSendMessageToNode(Safekeeper *sk)\n{\n\tAssert(sk->state == SS_ACTIVE);\n\n\t/*\n\t * Note: we always send everything to the safekeeper until WOULDBLOCK or\n\t * nothing left to send\n\t */\n\tHandleActiveState(sk, WL_SOCKET_WRITEABLE);\n}\n\n/*\n * Broadcast new message to all caught-up safekeepers\n */\nstatic void\nBroadcastAppendRequest(WalProposer *wp)\n{\n\tfor (int i = 0; i < wp->n_safekeepers; i++)\n\t\tif (wp->safekeeper[i].state == SS_ACTIVE)\n\t\t\tSendMessageToNode(&wp->safekeeper[i]);\n}\n\nstatic void\nPrepareAppendRequest(WalProposer *wp, AppendRequestHeader *req, XLogRecPtr beginLsn, XLogRecPtr endLsn)\n{\n\tAssert(endLsn >= beginLsn);\n\treq->apm.tag = 'a';\n\treq->generation = wp->mconf.generation;\n\treq->term = wp->propTerm;\n\treq->beginLsn = beginLsn;\n\treq->endLsn = endLsn;\n\treq->commitLsn = wp->commitLsn;\n\treq->truncateLsn = wp->truncateLsn;\n}\n\n/*\n * Process all events happened in SS_ACTIVE state, update event set after that.\n */\nstatic void\nHandleActiveState(Safekeeper *sk, uint32 events)\n{\n\tWalProposer *wp = sk->wp;\n\n\t/*\n\t * Note: we don't known which socket awoke us (sk or nwr). However, as\n\t * SendAppendRequests always tries to send at least one msg in\n\t * SS_ACTIVE_SEND be careful not to go there if are only after sk\n\t * response, otherwise it'd create busy loop of pings.\n\t */\n\tif (events & WL_SOCKET_WRITEABLE || sk->active_state == SS_ACTIVE_READ_WAL)\n\t\tif (!SendAppendRequests(sk))\n\t\t\treturn;\n\n\tif (events & WL_SOCKET_READABLE)\n\t\tif (!RecvAppendResponses(sk))\n\t\t\treturn;\n\n#if PG_VERSION_NUM >= 150000\n\t/* expected never to happen, c.f. walprop_pg_active_state_update_event_set */\n\tif (events & WL_SOCKET_CLOSED)\n\t{\n\t\twp_log(WARNING, \"connection to %s:%s in active state failed, got WL_SOCKET_CLOSED on neon_walreader socket\",\n\t\t\t   sk->host, sk->port);\n\t\tShutdownConnection(sk);\n\t\treturn;\n\t}\n#endif\n\n\t/* configures event set for yield whatever is the substate */\n\twp->api.active_state_update_event_set(sk);\n}\n\n/*\n * Send WAL messages starting from sk->streamingAt until the end or non-writable\n * socket or neon_walreader blocks, whichever comes first; active_state is\n * updated accordingly. Caller should take care of updating event set. Even if\n * no unsent WAL is available, at least one empty message will be sent as a\n * heartbeat, if socket is ready.\n *\n * Resets state and kills the connections if any error on them is encountered.\n * Returns false in this case, true otherwise.\n */\nstatic bool\nSendAppendRequests(Safekeeper *sk)\n{\n\tWalProposer *wp = sk->wp;\n\tXLogRecPtr\tendLsn;\n\tPGAsyncWriteResult writeResult;\n\tbool\t\tsentAnything = false;\n\tAppendRequestHeader *req;\n\n\tif (sk->active_state == SS_ACTIVE_FLUSH)\n\t{\n\t\tif (!AsyncFlush(sk))\n\n\t\t\t/*\n\t\t\t * AsyncFlush failed, that could happen if the socket is closed or\n\t\t\t * we have nothing to write and should wait for writeable socket.\n\t\t\t */\n\t\t\treturn sk->state == SS_ACTIVE;\n\n\t\t/* Event set will be updated in the end of HandleActiveState */\n\t\tsk->active_state = SS_ACTIVE_SEND;\n\t}\n\n\twhile (sk->streamingAt != wp->availableLsn || !sentAnything)\n\t{\n\t\tif (sk->active_state == SS_ACTIVE_SEND)\n\t\t{\n\t\t\tsentAnything = true;\n\n\t\t\tendLsn = sk->streamingAt;\n\t\t\tendLsn += MAX_SEND_SIZE;\n\n\t\t\t/* if we went beyond available WAL, back off */\n\t\t\tif (endLsn > wp->availableLsn)\n\t\t\t{\n\t\t\t\tendLsn = wp->availableLsn;\n\t\t\t}\n\n\t\t\treq = &sk->appendRequest;\n\t\t\tPrepareAppendRequest(sk->wp, &sk->appendRequest, sk->streamingAt, endLsn);\n\n\t\t\twp_log(DEBUG5, \"sending message len %ld beginLsn=%X/%X endLsn=%X/%X commitLsn=%X/%X truncateLsn=%X/%X to %s:%s\",\n\t\t\t\t   req->endLsn - req->beginLsn,\n\t\t\t\t   LSN_FORMAT_ARGS(req->beginLsn),\n\t\t\t\t   LSN_FORMAT_ARGS(req->endLsn),\n\t\t\t\t   LSN_FORMAT_ARGS(req->commitLsn),\n\t\t\t\t   LSN_FORMAT_ARGS(wp->truncateLsn), sk->host, sk->port);\n\n\t\t\tresetStringInfo(&sk->outbuf);\n\n\t\t\t/* write AppendRequest header */\n\t\t\tPAMessageSerialize(wp, (ProposerAcceptorMessage *) req, &sk->outbuf, wp->config->proto_version);\n\t\t\t/* prepare for reading WAL into the outbuf */\n\t\t\tenlargeStringInfo(&sk->outbuf, req->endLsn - req->beginLsn);\n\t\t\tsk->active_state = SS_ACTIVE_READ_WAL;\n\t\t}\n\n\t\tif (sk->active_state == SS_ACTIVE_READ_WAL)\n\t\t{\n\t\t\tchar\t   *errmsg;\n\t\t\tint\t\t\treq_len;\n\n\t\t\treq = &sk->appendRequest;\n\t\t\treq_len = req->endLsn - req->beginLsn;\n\n\t\t\t/*\n\t\t\t * We send zero sized AppenRequests as heartbeats; don't wal_read\n\t\t\t * for these.\n\t\t\t */\n\t\t\tif (req_len > 0)\n\t\t\t{\n\t\t\t\tswitch (wp->api.wal_read(sk,\n\t\t\t\t\t\t\t\t\t\t &sk->outbuf.data[sk->outbuf.len],\n\t\t\t\t\t\t\t\t\t\t req->beginLsn,\n\t\t\t\t\t\t\t\t\t\t req_len,\n\t\t\t\t\t\t\t\t\t\t &errmsg))\n\t\t\t\t{\n\t\t\t\t\tcase NEON_WALREAD_SUCCESS:\n\t\t\t\t\t\tbreak;\n\t\t\t\t\tcase NEON_WALREAD_WOULDBLOCK:\n\t\t\t\t\t\treturn true;\n\t\t\t\t\tcase NEON_WALREAD_ERROR:\n\t\t\t\t\t\twp_log(WARNING, \"WAL reading for node %s:%s failed: %s\",\n\t\t\t\t\t\t\t   sk->host, sk->port, errmsg);\n\t\t\t\t\t\tShutdownConnection(sk);\n\t\t\t\t\t\treturn false;\n\t\t\t\t\tdefault:\n\t\t\t\t\t\tAssert(false);\n\t\t\t\t}\n\t\t\t}\n\n\t\t\tsk->outbuf.len += req_len;\n\n\t\t\twriteResult = wp->api.conn_async_write(sk, sk->outbuf.data, sk->outbuf.len);\n\n\t\t\t/* Mark current message as sent, whatever the result is */\n\t\t\tsk->streamingAt = req->endLsn;\n\n\t\t\tswitch (writeResult)\n\t\t\t{\n\t\t\t\tcase PG_ASYNC_WRITE_SUCCESS:\n\t\t\t\t\t/* Continue writing the next message */\n\t\t\t\t\tsk->active_state = SS_ACTIVE_SEND;\n\t\t\t\t\tbreak;\n\n\t\t\t\tcase PG_ASYNC_WRITE_TRY_FLUSH:\n\n\t\t\t\t\t/*\n\t\t\t\t\t * We still need to call PQflush some more to finish the\n\t\t\t\t\t * job. Caller function will handle this by setting right\n\t\t\t\t\t * event set.\n\t\t\t\t\t */\n\t\t\t\t\tsk->active_state = SS_ACTIVE_FLUSH;\n\t\t\t\t\treturn true;\n\n\t\t\t\tcase PG_ASYNC_WRITE_FAIL:\n\t\t\t\t\twp_log(WARNING, \"failed to send to node %s:%s in %s state: %s\",\n\t\t\t\t\t\t   sk->host, sk->port, FormatSafekeeperState(sk),\n\t\t\t\t\t\t   wp->api.conn_error_message(sk));\n\t\t\t\t\tShutdownConnection(sk);\n\t\t\t\t\treturn false;\n\t\t\t\tdefault:\n\t\t\t\t\tAssert(false);\n\t\t\t\t\treturn false;\n\t\t\t}\n\t\t}\n\t}\n\n\treturn true;\n}\n\n/*\n * Receive and process all available feedback.\n *\n * Resets state and kills the connection if any error on it is encountered.\n * Returns false in this case, true otherwise.\n *\n * NB: This function can call SendMessageToNode and produce new messages.\n */\nstatic bool\nRecvAppendResponses(Safekeeper *sk)\n{\n\tWalProposer *wp = sk->wp;\n\tbool\t\treadAnything = false;\n\n\twhile (true)\n\t{\n\t\t/*\n\t\t * If our reading doesn't immediately succeed, any necessary error\n\t\t * handling or state setting is taken care of. We can leave any other\n\t\t * work until later.\n\t\t */\n\t\tsk->appendResponse.apm.tag = 'a';\n\t\tif (!AsyncReadMessage(sk, (AcceptorProposerMessage *) &sk->appendResponse))\n\t\t\tbreak;\n\n\t\twp_log(DEBUG2, \"received message term=\" INT64_FORMAT \" flushLsn=%X/%X commitLsn=%X/%X from %s:%s\",\n\t\t\t   sk->appendResponse.term,\n\t\t\t   LSN_FORMAT_ARGS(sk->appendResponse.flushLsn),\n\t\t\t   LSN_FORMAT_ARGS(sk->appendResponse.commitLsn),\n\t\t\t   sk->host, sk->port);\n\n\t\treadAnything = true;\n\n\t\t/* should never happen: sk is expected to send ERROR instead */\n\t\tif (sk->appendResponse.generation != wp->mconf.generation)\n\t\t{\n\t\t\twp_log(FATAL, \"safekeeper {id = %lu, ep = %s:%s} sent response with generation %u, expected %u\",\n\t\t\t\t   sk->greetResponse.nodeId, sk->host, sk->port,\n\t\t\t\t   sk->appendResponse.generation, wp->mconf.generation);\n\t\t}\n\n\t\tif (sk->appendResponse.term > wp->propTerm)\n\t\t{\n\t\t\t/*\n\t\t\t *\n\t\t\t * Term has changed to higher one, probably another compute is\n\t\t\t * running. If this is the case we could PANIC as well because\n\t\t\t * likely it inserted some data and our basebackup is unsuitable\n\t\t\t * anymore. However, we also bump term manually (term_bump\n\t\t\t * endpoint) on safekeepers for migration purposes, in this case\n\t\t\t * we do want compute to stay alive. So restart walproposer with\n\t\t\t * FATAL instead of panicking; if basebackup is spoiled next\n\t\t\t * election will notice this.\n\t\t\t */\n\t\t\twp_log(FATAL, \"WAL acceptor %s:%s with term \" INT64_FORMAT \" rejected our request, our term \" INT64_FORMAT \", meaning another compute is running at the same time, and it conflicts with us\",\n\t\t\t\t   sk->host, sk->port,\n\t\t\t\t   sk->appendResponse.term, wp->propTerm);\n\t\t}\n\n\t\tHandleSafekeeperResponse(wp, sk);\n\t}\n\n\tif (!readAnything)\n\t\treturn sk->state == SS_ACTIVE;\n\n\treturn sk->state == SS_ACTIVE;\n}\n\n#define psfeedback_log(fmt, key, ...) \\\n\twp_log(DEBUG2, \"ParsePageserverFeedbackMessage: %s \" fmt, key, __VA_ARGS__)\n\n/* Parse a PageserverFeedback message, or the PageserverFeedback part of an AppendResponse */\nstatic void\nParsePageserverFeedbackMessage(WalProposer *wp, StringInfo reply_message, PageserverFeedback *ps_feedback)\n{\n\tuint8\t\tnkeys;\n\tint\t\t\ti;\n\n\t/* initialize the struct before parsing */\n\tmemset(ps_feedback, 0, sizeof(PageserverFeedback));\n\tps_feedback->present = true;\n\n\t/* get number of custom keys */\n\tnkeys = pq_getmsgbyte(reply_message);\n\n\tfor (i = 0; i < nkeys; i++)\n\t{\n\t\tconst char *key = pq_getmsgrawstring(reply_message);\n\t\tunsigned int value_len = pq_getmsgint(reply_message, sizeof(int32));\n\n\t\tif (strcmp(key, \"current_timeline_size\") == 0)\n\t\t{\n\t\t\tAssert(value_len == sizeof(int64));\n\t\t\tps_feedback->currentClusterSize = pq_getmsgint64(reply_message);\n\t\t\tpsfeedback_log(UINT64_FORMAT, key, ps_feedback->currentClusterSize);\n\t\t}\n\t\telse if ((strcmp(key, \"ps_writelsn\") == 0) || (strcmp(key, \"last_received_lsn\") == 0))\n\t\t{\n\t\t\tAssert(value_len == sizeof(int64));\n\t\t\tps_feedback->last_received_lsn = pq_getmsgint64(reply_message);\n\t\t\tpsfeedback_log(\"%X/%X\", key, LSN_FORMAT_ARGS(ps_feedback->last_received_lsn));\n\t\t}\n\t\telse if ((strcmp(key, \"ps_flushlsn\") == 0) || (strcmp(key, \"disk_consistent_lsn\") == 0))\n\t\t{\n\t\t\tAssert(value_len == sizeof(int64));\n\t\t\tps_feedback->disk_consistent_lsn = pq_getmsgint64(reply_message);\n\t\t\tpsfeedback_log(\"%X/%X\", key, LSN_FORMAT_ARGS(ps_feedback->disk_consistent_lsn));\n\t\t}\n\t\telse if ((strcmp(key, \"ps_applylsn\") == 0) || (strcmp(key, \"remote_consistent_lsn\") == 0))\n\t\t{\n\t\t\tAssert(value_len == sizeof(int64));\n\t\t\tps_feedback->remote_consistent_lsn = pq_getmsgint64(reply_message);\n\t\t\tpsfeedback_log(\"%X/%X\", key, LSN_FORMAT_ARGS(ps_feedback->remote_consistent_lsn));\n\t\t}\n\t\telse if ((strcmp(key, \"ps_replytime\") == 0) || (strcmp(key, \"replytime\") == 0))\n\t\t{\n\t\t\tAssert(value_len == sizeof(int64));\n\t\t\tps_feedback->replytime = pq_getmsgint64(reply_message);\n\t\t\tpsfeedback_log(\"%s\", key, timestamptz_to_str(ps_feedback->replytime));\n\t\t}\n\t\telse if (strcmp(key, \"shard_number\") == 0)\n\t\t{\n\t\t\tAssert(value_len == sizeof(uint32));\n\t\t\tps_feedback->shard_number = pq_getmsgint(reply_message, sizeof(uint32));\n\t\t\tpsfeedback_log(\"%u\", key, ps_feedback->shard_number);\n\t\t}\n\t\telse if (strcmp(key, \"corruption_detected\") == 0)\n\t\t{\n\t\t\tAssert(value_len == 1);\n\t\t\tps_feedback->corruption_detected = pq_getmsgbyte(reply_message) != 0;\n\t\t\tpsfeedback_log(\"%s\", key, ps_feedback->corruption_detected ? \"true\" : \"false\");\n\t\t}\n\t\telse\n\t\t{\n\t\t\t/*\n\t\t\t * Skip unknown keys to support backward compatibile protocol\n\t\t\t * changes\n\t\t\t */\n\t\t\twp_log(LOG, \"ParsePageserverFeedbackMessage: unknown key: %s len %d\", key, value_len);\n\t\t\tpq_getmsgbytes(reply_message, value_len);\n\t\t};\n\t}\n}\n\n/*\n * Get minimum of flushed LSNs of all safekeepers, which is the LSN of the\n * last WAL record that can be safely discarded.\n */\nstatic XLogRecPtr\nCalculateMinFlushLsn(WalProposer *wp)\n{\n\tXLogRecPtr\tlsn = wp->n_safekeepers > 0\n\t\t? wp->safekeeper[0].appendResponse.flushLsn\n\t\t: InvalidXLogRecPtr;\n\n\tfor (int i = 1; i < wp->n_safekeepers; i++)\n\t{\n\t\tlsn = Min(lsn, wp->safekeeper[i].appendResponse.flushLsn);\n\t}\n\treturn lsn;\n}\n\n/*\n * GetAcknowledgedByQuorumWALPosition for a single member set `mset`.\n *\n * `msk` is the member -> safekeeper mapping for mset, i.e. members_safekeepers\n * or new_members_safekeepers.\n */\nstatic XLogRecPtr\nGetCommittedMset(WalProposer *wp, MemberSet *mset, Safekeeper **msk)\n{\n\tXLogRecPtr\tresponses[MAX_SAFEKEEPERS];\n\n\t/*\n\t * Ascending sort acknowledged LSNs.\n\t */\n\tAssert(mset->len <= MAX_SAFEKEEPERS);\n\tfor (uint32 i = 0; i < mset->len; i++)\n\t{\n\t\tSafekeeper *sk = msk[i];\n\n\t\t/*\n\t\t * Like in Raft, we aren't allowed to commit entries from previous\n\t\t * terms, so ignore reported LSN until it gets to propTermStartLsn.\n\t\t *\n\t\t * Note: we ignore sk state, which is ok: before first ack flushLsn is\n\t\t * 0, and later we just preserve value across reconnections. It would\n\t\t * be ok to check for SS_ACTIVE as well.\n\t\t */\n\t\tif (sk != NULL && sk->appendResponse.flushLsn >= wp->propTermStartLsn)\n\t\t{\n\t\t\tresponses[i] = sk->appendResponse.flushLsn;\n\t\t}\n\t\telse\n\t\t{\n\t\t\tresponses[i] = 0;\n\t\t}\n\t}\n\tqsort(responses, mset->len, sizeof(XLogRecPtr), CompareLsn);\n\n\t/*\n\t * And get value committed by the quorum. A way to view this: to get the\n\t * highest value committed on the quorum, in the ordered array we skip n -\n\t * n_quorum elements to get to the first (lowest) value present on all sks\n\t * of the highest quorum.\n\t */\n\treturn responses[mset->len - MsetQuorum(mset)];\n}\n\n/*\n * Calculate WAL position acknowledged by quorum, i.e. which may be regarded\n * committed.\n *\n * Zero may be returned when there is no quorum of nodes recovered to term start\n * lsn which sent feedback yet.\n */\nstatic XLogRecPtr\nGetAcknowledgedByQuorumWALPosition(WalProposer *wp)\n{\n\tXLogRecPtr\tcommitted;\n\n\t/* legacy: generations disabled */\n\tif (!WalProposerGenerationsEnabled(wp) && wp->mconf.generation == INVALID_GENERATION)\n\t{\n\t\tXLogRecPtr\tresponses[MAX_SAFEKEEPERS];\n\n\t\t/*\n\t\t * Sort acknowledged LSNs\n\t\t */\n\t\tfor (int i = 0; i < wp->n_safekeepers; i++)\n\t\t{\n\t\t\t/*\n\t\t\t * Like in Raft, we aren't allowed to commit entries from previous\n\t\t\t * terms, so ignore reported LSN until it gets to\n\t\t\t * propTermStartLsn.\n\t\t\t *\n\t\t\t * Note: we ignore sk state, which is ok: before first ack\n\t\t\t * flushLsn is 0, and later we just preserve value across\n\t\t\t * reconnections. It would be ok to check for SS_ACTIVE as well.\n\t\t\t */\n\t\t\tresponses[i] = wp->safekeeper[i].appendResponse.flushLsn >= wp->propTermStartLsn ? wp->safekeeper[i].appendResponse.flushLsn : 0;\n\t\t}\n\t\tqsort(responses, wp->n_safekeepers, sizeof(XLogRecPtr), CompareLsn);\n\n\t\t/*\n\t\t * Get the smallest LSN committed by quorum\n\t\t */\n\t\treturn responses[wp->n_safekeepers - wp->quorum];\n\t}\n\n\tcommitted = GetCommittedMset(wp, &wp->mconf.members, wp->members_safekeepers);\n\tif (wp->mconf.new_members.len > 0)\n\t{\n\t\tXLogRecPtr\tnew_mset_committed = GetCommittedMset(wp, &wp->mconf.new_members, wp->new_members_safekeepers);\n\n\t\tcommitted = Min(committed, new_mset_committed);\n\t}\n\treturn committed;\n}\n\n/*\n * Return safekeeper with active connection from which WAL can be downloaded, or\n * none if it doesn't exist. donor_lsn is set to end position of the donor to\n * the best of our knowledge.\n */\nstatic void\nUpdateDonorShmem(WalProposer *wp)\n{\n\tSafekeeper *donor = NULL;\n\tint\t\t\ti;\n\tXLogRecPtr\tdonor_lsn = InvalidXLogRecPtr;\n\n\tif (wp->state < WPS_ELECTED)\n\t{\n\t\twp_log(WARNING, \"UpdateDonorShmem called before elections are won\");\n\t\treturn;\n\t}\n\n\t/*\n\t * First, consider node which had determined our term start LSN as we know\n\t * about its position immediately after election before any feedbacks are\n\t * sent.\n\t */\n\tif (wp->donor->state >= SS_WAIT_ELECTED)\n\t{\n\t\tdonor = wp->donor;\n\t\tdonor_lsn = wp->propTermStartLsn;\n\t}\n\n\t/*\n\t * But also check feedbacks from all nodes with live connections and take\n\t * the highest one. Note: if node sends feedbacks it already processed\n\t * elected message so its term is fine.\n\t */\n\tfor (i = 0; i < wp->n_safekeepers; i++)\n\t{\n\t\tSafekeeper *sk = &wp->safekeeper[i];\n\n\t\tif (sk->state == SS_ACTIVE && sk->appendResponse.flushLsn > donor_lsn)\n\t\t{\n\t\t\tdonor = sk;\n\t\t\tdonor_lsn = sk->appendResponse.flushLsn;\n\t\t}\n\t}\n\n\tif (donor == NULL)\n\t{\n\t\twp_log(WARNING, \"UpdateDonorShmem didn't find a suitable donor, skipping\");\n\t\treturn;\n\t}\n\twp->api.update_donor(wp, donor, donor_lsn);\n}\n\n/*\n * Process AppendResponse message from safekeeper.\n */\nstatic void\nHandleSafekeeperResponse(WalProposer *wp, Safekeeper *fromsk)\n{\n\tXLogRecPtr\tcandidateTruncateLsn;\n\tXLogRecPtr\tnewCommitLsn;\n\n\tnewCommitLsn = GetAcknowledgedByQuorumWALPosition(wp);\n\tif (newCommitLsn > wp->commitLsn)\n\t{\n\t\twp->commitLsn = newCommitLsn;\n\t\t/* Send new value to all safekeepers. */\n\t\tBroadcastAppendRequest(wp);\n\t}\n\n\t/*\n\t * Unlock syncrep waiters, update ps_feedback, CheckGracefulShutdown().\n\t * The last one will terminate the process if the shutdown is requested\n\t * and WAL is committed by the quorum. BroadcastAppendRequest() should be\n\t * called to notify safekeepers about the new commitLsn.\n\t */\n\twp->api.process_safekeeper_feedback(wp, fromsk);\n\n\t/*\n\t * Try to advance truncateLsn -- the last record flushed to all\n\t * safekeepers.\n\t *\n\t * Advanced truncateLsn should be not higher than commitLsn. This prevents\n\t * surprising violation of truncateLsn <= commitLsn invariant which might\n\t * occur because commitLsn generally can't be advanced based on feedback\n\t * from safekeeper who is still in the previous epoch (similar to 'leader\n\t * can't commit entries from previous term' in Raft); 2)\n\t */\n\tcandidateTruncateLsn = CalculateMinFlushLsn(wp);\n\tcandidateTruncateLsn = Min(candidateTruncateLsn, wp->commitLsn);\n\tif (candidateTruncateLsn > wp->truncateLsn)\n\t{\n\t\twp->truncateLsn = candidateTruncateLsn;\n\t}\n\n\t/*\n\t * Generally sync is done when majority reached propTermStartLsn so we\n\t * committed it and made the majority aware of it, ensuring they are ready\n\t * to give all WAL to pageserver. It would mean whichever majority is\n\t * alive, there will be at least one safekeeper who is able to stream WAL\n\t * to pageserver to make basebackup possible. However, since at the moment\n\t * we don't have any good mechanism of defining the healthy and most\n\t * advanced safekeeper who should push the wal into pageserver and\n\t * basically the random one gets connected, to prevent hanging basebackup\n\t * (due to pageserver connecting to not-synced-safekeeper) we currently\n\t * wait for all seemingly alive safekeepers to get synced.\n\t */\n\tif (wp->config->syncSafekeepers)\n\t{\n\t\tfor (int i = 0; i < wp->n_safekeepers; i++)\n\t\t{\n\t\t\tSafekeeper *sk = &wp->safekeeper[i];\n\t\t\tbool\t\tsynced = sk->appendResponse.commitLsn >= wp->propTermStartLsn;\n\n\t\t\t/* alive safekeeper which is not synced yet; wait for it */\n\t\t\tif (sk->state != SS_OFFLINE && !synced)\n\t\t\t\treturn;\n\t\t}\n\n\t\tif (newCommitLsn >= wp->propTermStartLsn)\n\t\t{\n\t\t\t/* A quorum of safekeepers has been synced! */\n\n\t\t\t/*\n\t\t\t * Send empty message to broadcast latest truncateLsn to all\n\t\t\t * safekeepers. This helps to finish next sync-safekeepers\n\t\t\t * eailier, by skipping recovery step.\n\t\t\t *\n\t\t\t * We don't need to wait for response because it doesn't affect\n\t\t\t * correctness, and TCP should be able to deliver the message to\n\t\t\t * safekeepers in case of network working properly.\n\t\t\t */\n\t\t\tBroadcastAppendRequest(wp);\n\n\t\t\twp->api.finish_sync_safekeepers(wp, wp->propTermStartLsn);\n\t\t\t/* unreachable */\n\t\t}\n\t}\n}\n\n/* Serialize MembershipConfiguration into buf. */\nstatic void\nMembershipConfigurationSerialize(MembershipConfiguration *mconf, StringInfo buf)\n{\n\tuint32\t\ti;\n\n\tpq_sendint32(buf, mconf->generation);\n\n\tpq_sendint32(buf, mconf->members.len);\n\tfor (i = 0; i < mconf->members.len; i++)\n\t{\n\t\tpq_sendint64(buf, mconf->members.m[i].node_id);\n\t\tpq_send_ascii_string(buf, mconf->members.m[i].host);\n\t\tpq_sendint16(buf, mconf->members.m[i].port);\n\t}\n\n\t/*\n\t * There is no special mark for absent new_members; zero members in\n\t * invalid, so zero len means absent.\n\t */\n\tpq_sendint32(buf, mconf->new_members.len);\n\tfor (i = 0; i < mconf->new_members.len; i++)\n\t{\n\t\tpq_sendint64(buf, mconf->new_members.m[i].node_id);\n\t\tpq_send_ascii_string(buf, mconf->new_members.m[i].host);\n\t\tpq_sendint16(buf, mconf->new_members.m[i].port);\n\t}\n}\n\n/* Serialize proposer -> acceptor message into buf using specified version */\nstatic void\nPAMessageSerialize(WalProposer *wp, ProposerAcceptorMessage *msg, StringInfo buf, int proto_version)\n{\n\t/* both version are supported currently until we fully migrate to 3 */\n\tAssert(proto_version == 3 || proto_version == 2);\n\n\tresetStringInfo(buf);\n\n\tif (proto_version == 3)\n\t{\n\t\t/*\n\t\t * v2 sends structs for some messages as is, so commonly send tag only\n\t\t * for v3\n\t\t */\n\t\tpq_sendint8(buf, msg->tag);\n\n\t\tswitch (msg->tag)\n\t\t{\n\t\t\tcase 'g':\n\t\t\t\t{\n\t\t\t\t\tProposerGreeting *m = (ProposerGreeting *) msg;\n\n\t\t\t\t\tpq_send_ascii_string(buf, m->tenant_id);\n\t\t\t\t\tpq_send_ascii_string(buf, m->timeline_id);\n\t\t\t\t\tMembershipConfigurationSerialize(&m->mconf, buf);\n\t\t\t\t\tpq_sendint32(buf, m->pg_version);\n\t\t\t\t\tpq_sendint64(buf, m->system_id);\n\t\t\t\t\tpq_sendint32(buf, m->wal_seg_size);\n\t\t\t\t\tbreak;\n\t\t\t\t}\n\t\t\tcase 'v':\n\t\t\t\t{\n\t\t\t\t\tVoteRequest *m = (VoteRequest *) msg;\n\n\t\t\t\t\tpq_sendint32(buf, m->generation);\n\t\t\t\t\tpq_sendint64(buf, m->term);\n\t\t\t\t\tbreak;\n\n\t\t\t\t}\n\t\t\tcase 'e':\n\t\t\t\t{\n\t\t\t\t\tProposerElected *m = (ProposerElected *) msg;\n\n\t\t\t\t\tpq_sendint32(buf, m->generation);\n\t\t\t\t\tpq_sendint64(buf, m->term);\n\t\t\t\t\tpq_sendint64(buf, m->startStreamingAt);\n\t\t\t\t\tpq_sendint32(buf, m->termHistory->n_entries);\n\t\t\t\t\tfor (uint32 i = 0; i < m->termHistory->n_entries; i++)\n\t\t\t\t\t{\n\t\t\t\t\t\tpq_sendint64(buf, m->termHistory->entries[i].term);\n\t\t\t\t\t\tpq_sendint64(buf, m->termHistory->entries[i].lsn);\n\t\t\t\t\t}\n\t\t\t\t\tbreak;\n\t\t\t\t}\n\t\t\tcase 'a':\n\t\t\t\t{\n\t\t\t\t\t/*\n\t\t\t\t\t * Note: this serializes only AppendRequestHeader, caller\n\t\t\t\t\t * is expected to append WAL data later.\n\t\t\t\t\t */\n\t\t\t\t\tAppendRequestHeader *m = (AppendRequestHeader *) msg;\n\n\t\t\t\t\tpq_sendint32(buf, m->generation);\n\t\t\t\t\tpq_sendint64(buf, m->term);\n\t\t\t\t\tpq_sendint64(buf, m->beginLsn);\n\t\t\t\t\tpq_sendint64(buf, m->endLsn);\n\t\t\t\t\tpq_sendint64(buf, m->commitLsn);\n\t\t\t\t\tpq_sendint64(buf, m->truncateLsn);\n\t\t\t\t\tbreak;\n\t\t\t\t}\n\t\t\tdefault:\n\t\t\t\twp_log(FATAL, \"unexpected message type %c to serialize\", msg->tag);\n\t\t}\n\t\treturn;\n\t}\n\n\tif (proto_version == 2)\n\t{\n\t\tswitch (msg->tag)\n\t\t{\n\t\t\tcase 'g':\n\t\t\t\t{\n\t\t\t\t\t/* v2 sent struct as is */\n\t\t\t\t\tProposerGreeting *m = (ProposerGreeting *) msg;\n\t\t\t\t\tProposerGreetingV2 greetRequestV2;\n\n\t\t\t\t\t/* Fill also v2 struct. */\n\t\t\t\t\tgreetRequestV2.tag = 'g';\n\t\t\t\t\tgreetRequestV2.protocolVersion = proto_version;\n\t\t\t\t\tgreetRequestV2.pgVersion = m->pg_version;\n\n\t\t\t\t\t/*\n\t\t\t\t\t * v3 removed this field because it's easier to pass as\n\t\t\t\t\t * libq or START_WAL_PUSH options\n\t\t\t\t\t */\n\t\t\t\t\tmemset(&greetRequestV2.proposerId, 0, sizeof(greetRequestV2.proposerId));\n\t\t\t\t\tgreetRequestV2.systemId = wp->config->systemId;\n\t\t\t\t\tif (*m->timeline_id != '\\0' &&\n\t\t\t\t\t\t!HexDecodeString(greetRequestV2.timeline_id, m->timeline_id, 16))\n\t\t\t\t\t\twp_log(FATAL, \"could not parse neon.timeline_id, %s\", m->timeline_id);\n\t\t\t\t\tif (*m->tenant_id != '\\0' &&\n\t\t\t\t\t\t!HexDecodeString(greetRequestV2.tenant_id, m->tenant_id, 16))\n\t\t\t\t\t\twp_log(FATAL, \"could not parse neon.tenant_id, %s\", m->tenant_id);\n\n\t\t\t\t\tgreetRequestV2.timeline = wp->config->pgTimeline;\n\t\t\t\t\tgreetRequestV2.walSegSize = wp->config->wal_segment_size;\n\n\t\t\t\t\tpq_sendbytes(buf, (char *) &greetRequestV2, sizeof(greetRequestV2));\n\t\t\t\t\tbreak;\n\t\t\t\t}\n\t\t\tcase 'v':\n\t\t\t\t{\n\t\t\t\t\t/* v2 sent struct as is */\n\t\t\t\t\tVoteRequest *m = (VoteRequest *) msg;\n\t\t\t\t\tVoteRequestV2 voteRequestV2;\n\n\t\t\t\t\tvoteRequestV2.tag = m->pam.tag;\n\t\t\t\t\tvoteRequestV2.term = m->term;\n\t\t\t\t\t/* removed field */\n\t\t\t\t\tmemset(&voteRequestV2.proposerId, 0, sizeof(voteRequestV2.proposerId));\n\t\t\t\t\tpq_sendbytes(buf, (char *) &voteRequestV2, sizeof(voteRequestV2));\n\t\t\t\t\tbreak;\n\t\t\t\t}\n\t\t\tcase 'e':\n\t\t\t\t{\n\t\t\t\t\tProposerElected *m = (ProposerElected *) msg;\n\n\t\t\t\t\tpq_sendint64_le(buf, m->apm.tag);\n\t\t\t\t\tpq_sendint64_le(buf, m->term);\n\t\t\t\t\tpq_sendint64_le(buf, m->startStreamingAt);\n\t\t\t\t\tpq_sendint32_le(buf, m->termHistory->n_entries);\n\t\t\t\t\tfor (int i = 0; i < m->termHistory->n_entries; i++)\n\t\t\t\t\t{\n\t\t\t\t\t\tpq_sendint64_le(buf, m->termHistory->entries[i].term);\n\t\t\t\t\t\tpq_sendint64_le(buf, m->termHistory->entries[i].lsn);\n\t\t\t\t\t}\n\n\t\t\t\t\t/*\n\t\t\t\t\t * Removed timeline_start_lsn. Still send it as a valid\n\t\t\t\t\t * value until safekeepers taking it from term history are\n\t\t\t\t\t * deployed.\n\t\t\t\t\t */\n\t\t\t\t\tpq_sendint64_le(buf, m->termHistory->entries[0].lsn);\n\t\t\t\t\tbreak;\n\t\t\t\t}\n\t\t\tcase 'a':\n\n\t\t\t\t/*\n\t\t\t\t * Note: this serializes only AppendRequestHeader, caller is\n\t\t\t\t * expected to append WAL data later.\n\t\t\t\t */\n\t\t\t\t{\n\t\t\t\t\t/* v2 sent struct as is */\n\t\t\t\t\tAppendRequestHeader *m = (AppendRequestHeader *) msg;\n\t\t\t\t\tAppendRequestHeaderV2 appendRequestHeaderV2;\n\n\t\t\t\t\tappendRequestHeaderV2.tag = m->apm.tag;\n\t\t\t\t\tappendRequestHeaderV2.term = m->term;\n\t\t\t\t\tappendRequestHeaderV2.epochStartLsn = 0;\t/* removed field */\n\t\t\t\t\tappendRequestHeaderV2.beginLsn = m->beginLsn;\n\t\t\t\t\tappendRequestHeaderV2.endLsn = m->endLsn;\n\t\t\t\t\tappendRequestHeaderV2.commitLsn = m->commitLsn;\n\t\t\t\t\tappendRequestHeaderV2.truncateLsn = m->truncateLsn;\n\t\t\t\t\t/* removed field */\n\t\t\t\t\tmemset(&appendRequestHeaderV2.proposerId, 0, sizeof(appendRequestHeaderV2.proposerId));\n\n\t\t\t\t\tpq_sendbytes(buf, (char *) &appendRequestHeaderV2, sizeof(appendRequestHeaderV2));\n\t\t\t\t\tbreak;\n\t\t\t\t}\n\n\t\t\tdefault:\n\t\t\t\twp_log(FATAL, \"unexpected message type %c to serialize\", msg->tag);\n\t\t}\n\t\treturn;\n\t}\n\twp_log(FATAL, \"unexpected proto_version %d\", proto_version);\n}\n\n/*\n * Try to read CopyData message from i'th safekeeper, resetting connection on\n * failure.\n */\nstatic bool\nAsyncRead(Safekeeper *sk, char **buf, int *buf_size)\n{\n\tWalProposer *wp = sk->wp;\n\n\tswitch (wp->api.conn_async_read(sk, buf, buf_size))\n\t{\n\t\tcase PG_ASYNC_READ_SUCCESS:\n\t\t\treturn true;\n\n\t\tcase PG_ASYNC_READ_TRY_AGAIN:\n\t\t\t/* WL_SOCKET_READABLE is always set during copyboth */\n\t\t\treturn false;\n\n\t\tcase PG_ASYNC_READ_FAIL:\n\t\t\twp_log(WARNING, \"failed to read from node %s:%s in %s state: %s\", sk->host,\n\t\t\t\t   sk->port, FormatSafekeeperState(sk),\n\t\t\t\t   wp->api.conn_error_message(sk));\n\t\t\tShutdownConnection(sk);\n\t\t\treturn false;\n\t}\n\tAssert(false);\n\treturn false;\n}\n\n/* Deserialize membership configuration from buf to mconf. */\nstatic void\nMembershipConfigurationDeserialize(MembershipConfiguration *mconf, StringInfo buf)\n{\n\tuint32\t\ti;\n\n\tmconf->generation = pq_getmsgint32(buf);\n\tmconf->members.len = pq_getmsgint32(buf);\n\tmconf->members.m = palloc0(sizeof(SafekeeperId) * mconf->members.len);\n\tfor (i = 0; i < mconf->members.len; i++)\n\t{\n\t\tconst char *buf_host;\n\n\t\tmconf->members.m[i].node_id = pq_getmsgint64(buf);\n\t\tbuf_host = pq_getmsgrawstring(buf);\n\t\tstrlcpy(mconf->members.m[i].host, buf_host, sizeof(mconf->members.m[i].host));\n\t\tmconf->members.m[i].port = pq_getmsgint16(buf);\n\t}\n\tmconf->new_members.len = pq_getmsgint32(buf);\n\tmconf->new_members.m = palloc0(sizeof(SafekeeperId) * mconf->new_members.len);\n\tfor (i = 0; i < mconf->new_members.len; i++)\n\t{\n\t\tconst char *buf_host;\n\n\t\tmconf->new_members.m[i].node_id = pq_getmsgint64(buf);\n\t\tbuf_host = pq_getmsgrawstring(buf);\n\t\tstrlcpy(mconf->new_members.m[i].host, buf_host, sizeof(mconf->new_members.m[i].host));\n\t\tmconf->new_members.m[i].port = pq_getmsgint16(buf);\n\t}\n}\n\n/*\n * Read next message with known type into provided struct, by reading a CopyData\n * block from the safekeeper's postgres connection, returning whether the read\n * was successful.\n *\n * If the read needs more polling, we return 'false' and keep the state\n * unmodified, waiting until it becomes read-ready to try again. If it fully\n * failed, a warning is emitted and the connection is reset.\n *\n * Note: it pallocs if needed, i.e. for AcceptorGreeting and VoteResponse fields.\n */\nstatic bool\nAsyncReadMessage(Safekeeper *sk, AcceptorProposerMessage *anymsg)\n{\n\tWalProposer *wp = sk->wp;\n\n\tchar\t   *buf;\n\tint\t\t\tbuf_size;\n\tuint8\t\ttag;\n\tStringInfoData s;\n\n\tif (!(AsyncRead(sk, &buf, &buf_size)))\n\t\treturn false;\n\tsk->latestMsgReceivedAt = wp->api.get_current_timestamp(wp);\n\n\t/* parse it */\n\ts.data = buf;\n\ts.len = buf_size;\n\ts.maxlen = buf_size;\n\ts.cursor = 0;\n\n\tif (wp->config->proto_version == 3)\n\t{\n\t\ttag = pq_getmsgbyte(&s);\n\t\tif (tag != anymsg->tag)\n\t\t{\n\t\t\twp_log(WARNING, \"unexpected message tag %c from node %s:%s in state %s\", (char) tag, sk->host,\n\t\t\t\t   sk->port, FormatSafekeeperState(sk));\n\t\t\tResetConnection(sk);\n\t\t\treturn false;\n\t\t}\n\t\tswitch (tag)\n\t\t{\n\t\t\tcase 'g':\n\t\t\t\t{\n\t\t\t\t\tAcceptorGreeting *msg = (AcceptorGreeting *) anymsg;\n\n\t\t\t\t\tmsg->nodeId = pq_getmsgint64(&s);\n\t\t\t\t\tMembershipConfigurationDeserialize(&msg->mconf, &s);\n\t\t\t\t\tmsg->term = pq_getmsgint64(&s);\n\t\t\t\t\tpq_getmsgend(&s);\n\t\t\t\t\treturn true;\n\t\t\t\t}\n\t\t\tcase 'v':\n\t\t\t\t{\n\t\t\t\t\tVoteResponse *msg = (VoteResponse *) anymsg;\n\n\t\t\t\t\tmsg->generation = pq_getmsgint32(&s);\n\t\t\t\t\tmsg->term = pq_getmsgint64(&s);\n\t\t\t\t\tmsg->voteGiven = pq_getmsgbyte(&s);\n\t\t\t\t\tmsg->flushLsn = pq_getmsgint64(&s);\n\t\t\t\t\tmsg->truncateLsn = pq_getmsgint64(&s);\n\t\t\t\t\tmsg->termHistory.n_entries = pq_getmsgint32(&s);\n\t\t\t\t\tmsg->termHistory.entries = palloc(sizeof(TermSwitchEntry) * msg->termHistory.n_entries);\n\t\t\t\t\tfor (uint32 i = 0; i < msg->termHistory.n_entries; i++)\n\t\t\t\t\t{\n\t\t\t\t\t\tmsg->termHistory.entries[i].term = pq_getmsgint64(&s);\n\t\t\t\t\t\tmsg->termHistory.entries[i].lsn = pq_getmsgint64(&s);\n\t\t\t\t\t}\n\t\t\t\t\tpq_getmsgend(&s);\n\t\t\t\t\treturn true;\n\t\t\t\t}\n\t\t\tcase 'a':\n\t\t\t\t{\n\t\t\t\t\tAppendResponse *msg = (AppendResponse *) anymsg;\n\n\t\t\t\t\tmsg->generation = pq_getmsgint32(&s);\n\t\t\t\t\tmsg->term = pq_getmsgint64(&s);\n\t\t\t\t\tmsg->flushLsn = pq_getmsgint64(&s);\n\t\t\t\t\tmsg->commitLsn = pq_getmsgint64(&s);\n\t\t\t\t\tmsg->hs.ts = pq_getmsgint64(&s);\n\t\t\t\t\tmsg->hs.xmin.value = pq_getmsgint64(&s);\n\t\t\t\t\tmsg->hs.catalog_xmin.value = pq_getmsgint64(&s);\n\t\t\t\t\tif (s.len > s.cursor)\n\t\t\t\t\t\tParsePageserverFeedbackMessage(wp, &s, &msg->ps_feedback);\n\t\t\t\t\telse\n\t\t\t\t\t\tmsg->ps_feedback.present = false;\n\t\t\t\t\tpq_getmsgend(&s);\n\t\t\t\t\treturn true;\n\t\t\t\t}\n\t\t\tdefault:\n\t\t\t\t{\n\t\t\t\t\twp_log(FATAL, \"unexpected message tag %c to read\", (char) tag);\n\t\t\t\t\treturn false;\n\t\t\t\t}\n\t\t}\n\t}\n\telse if (wp->config->proto_version == 2)\n\t{\n\t\ttag = pq_getmsgint64_le(&s);\n\t\tif (tag != anymsg->tag)\n\t\t{\n\t\t\twp_log(WARNING, \"unexpected message tag %c from node %s:%s in state %s\", (char) tag, sk->host,\n\t\t\t\t   sk->port, FormatSafekeeperState(sk));\n\t\t\tResetConnection(sk);\n\t\t\treturn false;\n\t\t}\n\t\tswitch (tag)\n\t\t{\n\t\t\tcase 'g':\n\t\t\t\t{\n\t\t\t\t\tAcceptorGreeting *msg = (AcceptorGreeting *) anymsg;\n\n\t\t\t\t\tmsg->term = pq_getmsgint64_le(&s);\n\t\t\t\t\tmsg->nodeId = pq_getmsgint64_le(&s);\n\t\t\t\t\tpq_getmsgend(&s);\n\t\t\t\t\treturn true;\n\t\t\t\t}\n\n\t\t\tcase 'v':\n\t\t\t\t{\n\t\t\t\t\tVoteResponse *msg = (VoteResponse *) anymsg;\n\n\t\t\t\t\tmsg->term = pq_getmsgint64_le(&s);\n\t\t\t\t\tmsg->voteGiven = pq_getmsgint64_le(&s);\n\t\t\t\t\tmsg->flushLsn = pq_getmsgint64_le(&s);\n\t\t\t\t\tmsg->truncateLsn = pq_getmsgint64_le(&s);\n\t\t\t\t\tmsg->termHistory.n_entries = pq_getmsgint32_le(&s);\n\t\t\t\t\tmsg->termHistory.entries = palloc(sizeof(TermSwitchEntry) * msg->termHistory.n_entries);\n\t\t\t\t\tfor (int i = 0; i < msg->termHistory.n_entries; i++)\n\t\t\t\t\t{\n\t\t\t\t\t\tmsg->termHistory.entries[i].term = pq_getmsgint64_le(&s);\n\t\t\t\t\t\tmsg->termHistory.entries[i].lsn = pq_getmsgint64_le(&s);\n\t\t\t\t\t}\n\t\t\t\t\tpq_getmsgint64_le(&s);\t/* timelineStartLsn */\n\t\t\t\t\tpq_getmsgend(&s);\n\t\t\t\t\treturn true;\n\t\t\t\t}\n\n\t\t\tcase 'a':\n\t\t\t\t{\n\t\t\t\t\tAppendResponse *msg = (AppendResponse *) anymsg;\n\n\t\t\t\t\tmsg->term = pq_getmsgint64_le(&s);\n\t\t\t\t\tmsg->flushLsn = pq_getmsgint64_le(&s);\n\t\t\t\t\tmsg->commitLsn = pq_getmsgint64_le(&s);\n\t\t\t\t\tmsg->hs.ts = pq_getmsgint64_le(&s);\n\t\t\t\t\tmsg->hs.xmin.value = pq_getmsgint64_le(&s);\n\t\t\t\t\tmsg->hs.catalog_xmin.value = pq_getmsgint64_le(&s);\n\t\t\t\t\tif (s.len > s.cursor)\n\t\t\t\t\t\tParsePageserverFeedbackMessage(wp, &s, &msg->ps_feedback);\n\t\t\t\t\telse\n\t\t\t\t\t\tmsg->ps_feedback.present = false;\n\t\t\t\t\tpq_getmsgend(&s);\n\t\t\t\t\treturn true;\n\t\t\t\t}\n\n\t\t\tdefault:\n\t\t\t\t{\n\t\t\t\t\twp_log(FATAL, \"unexpected message tag %c to read\", (char) tag);\n\t\t\t\t\treturn false;\n\t\t\t\t}\n\t\t}\n\t}\n\twp_log(FATAL, \"unsupported proto_version %d\", wp->config->proto_version);\n\treturn false;\t\t\t\t/* keep the compiler quiet */\n}\n\n/*\n * Blocking equivalent to AsyncWrite.\n *\n * We use this everywhere messages are small enough that they should fit in a\n * single packet.\n */\nstatic bool\nBlockingWrite(Safekeeper *sk, void *msg, size_t msg_size, SafekeeperState success_state)\n{\n\tWalProposer *wp = sk->wp;\n\tuint32\t\tsk_events;\n\tuint32\t\tnwr_events;\n\n\tif (!wp->api.conn_blocking_write(sk, msg, msg_size))\n\t{\n\t\twp_log(WARNING, \"failed to send to node %s:%s in %s state: %s\",\n\t\t\t   sk->host, sk->port, FormatSafekeeperState(sk),\n\t\t\t   wp->api.conn_error_message(sk));\n\t\tShutdownConnection(sk);\n\t\treturn false;\n\t}\n\n\tsk->state = success_state;\n\n\t/*\n\t * If the new state will be waiting for events to happen, update the event\n\t * set to wait for those\n\t */\n\tSafekeeperStateDesiredEvents(sk, &sk_events, &nwr_events);\n\n\t/*\n\t * nwr_events is relevant only during SS_ACTIVE which doesn't use\n\t * BlockingWrite\n\t */\n\tAssert(!nwr_events);\n\tif (sk_events)\n\t\twp->api.update_event_set(sk, sk_events);\n\n\treturn true;\n}\n\n/*\n * Starts a write into the 'i'th safekeeper's postgres connection, moving to\n * flush_state (adjusting eventset) if write still needs flushing.\n *\n * Returns false if sending is unfinished (requires flushing or conn failed).\n * Upon failure, a warning is emitted and the connection is reset.\n */\nstatic bool\nAsyncWrite(Safekeeper *sk, void *msg, size_t msg_size, SafekeeperState flush_state)\n{\n\tWalProposer *wp = sk->wp;\n\n\tswitch (wp->api.conn_async_write(sk, msg, msg_size))\n\t{\n\t\tcase PG_ASYNC_WRITE_SUCCESS:\n\t\t\treturn true;\n\t\tcase PG_ASYNC_WRITE_TRY_FLUSH:\n\n\t\t\t/*\n\t\t\t * We still need to call PQflush some more to finish the job; go\n\t\t\t * to the appropriate state. Update the event set at the bottom of\n\t\t\t * this function\n\t\t\t */\n\t\t\tsk->state = flush_state;\n\t\t\twp->api.update_event_set(sk, WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE);\n\t\t\treturn false;\n\t\tcase PG_ASYNC_WRITE_FAIL:\n\t\t\twp_log(WARNING, \"failed to send to node %s:%s in %s state: %s\",\n\t\t\t\t   sk->host, sk->port, FormatSafekeeperState(sk),\n\t\t\t\t   wp->api.conn_error_message(sk));\n\t\t\tShutdownConnection(sk);\n\t\t\treturn false;\n\t\tdefault:\n\t\t\tAssert(false);\n\t\t\treturn false;\n\t}\n}\n\n/*\n * Flushes a previous call to AsyncWrite. This only needs to be called when the\n * socket becomes read or write ready *after* calling AsyncWrite.\n *\n * If flushing successfully completes returns true, otherwise false. Event set\n * is updated only if connection fails, otherwise caller should manually unset\n * WL_SOCKET_WRITEABLE.\n */\nstatic bool\nAsyncFlush(Safekeeper *sk)\n{\n\tWalProposer *wp = sk->wp;\n\n\t/*---\n\t * PQflush returns:\n\t *   0 if successful                    [we're good to move on]\n\t *   1 if unable to send everything yet [call PQflush again]\n\t *  -1 if it failed                     [emit an error]\n\t */\n\tswitch (wp->api.conn_flush(sk))\n\t{\n\t\tcase 0:\n\t\t\t/* flush is done */\n\t\t\treturn true;\n\t\tcase 1:\n\t\t\t/* Nothing to do; try again when the socket's ready */\n\t\t\treturn false;\n\t\tcase -1:\n\t\t\twp_log(WARNING, \"failed to flush write to node %s:%s in %s state: %s\",\n\t\t\t\t   sk->host, sk->port, FormatSafekeeperState(sk),\n\t\t\t\t   wp->api.conn_error_message(sk));\n\t\t\tResetConnection(sk);\n\t\t\treturn false;\n\t\tdefault:\n\t\t\tAssert(false);\n\t\t\treturn false;\n\t}\n}\n\nstatic int\nCompareLsn(const void *a, const void *b)\n{\n\tXLogRecPtr\tlsn1 = *((const XLogRecPtr *) a);\n\tXLogRecPtr\tlsn2 = *((const XLogRecPtr *) b);\n\n\tif (lsn1 < lsn2)\n\t\treturn -1;\n\telse if (lsn1 == lsn2)\n\t\treturn 0;\n\telse\n\t\treturn 1;\n}\n\n/* Returns a human-readable string corresonding to the SafekeeperState\n *\n * The string should not be freed.\n *\n * The strings are intended to be used as a prefix to \"state\", e.g.:\n *\n *   wp_log(LOG, \"currently in %s state\", FormatSafekeeperState(sk));\n *\n * If this sort of phrasing doesn't fit the message, instead use something like:\n *\n *   wp_log(LOG, \"currently in state [%s]\", FormatSafekeeperState(sk));\n */\nstatic char *\nFormatSafekeeperState(Safekeeper *sk)\n{\n\tchar\t   *return_val = NULL;\n\n\tswitch (sk->state)\n\t{\n\t\tcase SS_OFFLINE:\n\t\t\treturn_val = \"offline\";\n\t\t\tbreak;\n\t\tcase SS_CONNECTING_READ:\n\t\tcase SS_CONNECTING_WRITE:\n\t\t\treturn_val = \"connecting\";\n\t\t\tbreak;\n\t\tcase SS_WAIT_EXEC_RESULT:\n\t\t\treturn_val = \"receiving query result\";\n\t\t\tbreak;\n\t\tcase SS_HANDSHAKE_RECV:\n\t\t\treturn_val = \"handshake (receiving)\";\n\t\t\tbreak;\n\t\tcase SS_WAIT_VOTING:\n\t\t\treturn_val = \"voting\";\n\t\t\tbreak;\n\t\tcase SS_WAIT_VERDICT:\n\t\t\treturn_val = \"wait-for-verdict\";\n\t\t\tbreak;\n\t\tcase SS_SEND_ELECTED_FLUSH:\n\t\t\treturn_val = \"send-announcement-flush\";\n\t\t\tbreak;\n\t\tcase SS_WAIT_ELECTED:\n\t\t\treturn_val = \"idle\";\n\t\t\tbreak;\n\t\tcase SS_ACTIVE:\n\t\t\tswitch (sk->active_state)\n\t\t\t{\n\t\t\t\tcase SS_ACTIVE_SEND:\n\t\t\t\t\treturn_val = \"active send\";\n\t\t\t\t\tbreak;\n\t\t\t\tcase SS_ACTIVE_READ_WAL:\n\t\t\t\t\treturn_val = \"active read WAL\";\n\t\t\t\t\tbreak;\n\t\t\t\tcase SS_ACTIVE_FLUSH:\n\t\t\t\t\treturn_val = \"active flush\";\n\t\t\t\t\tbreak;\n\t\t\t}\n\t\t\tbreak;\n\t}\n\n\tAssert(return_val != NULL);\n\n\treturn return_val;\n}\n\n/* Asserts that the provided events are expected for given safekeeper's state */\nstatic void\nAssertEventsOkForState(uint32 events, Safekeeper *sk)\n{\n\tuint32\t\tsk_events;\n\tuint32\t\tnwr_events;\n\tuint32\t\texpected;\n\tbool\t\tevents_ok_for_state;\t/* long name so the `Assert` is more\n\t\t\t\t\t\t\t\t\t\t * clear later */\n\tWalProposer *wp = sk->wp;\n\n\tSafekeeperStateDesiredEvents(sk, &sk_events, &nwr_events);\n\n\t/*\n\t * Without one more level of notify target indirection we have no way to\n\t * distinguish which socket woke up us, so just union expected events.\n\t */\n\texpected = sk_events | nwr_events;\n\tevents_ok_for_state = ((events & expected) != 0);\n\n\tif (!events_ok_for_state)\n\t{\n\t\t/*\n\t\t * To give a descriptive message in the case of failure, we use elog\n\t\t * and then an assertion that's guaranteed to fail.\n\t\t */\n\t\twp_log(WARNING, \"events %s mismatched for safekeeper %s:%s in state [%s]\",\n\t\t\t   FormatEvents(wp, events), sk->host, sk->port, FormatSafekeeperState(sk));\n\t\tAssert(events_ok_for_state);\n\t}\n}\n\n/* Returns the set of events for both safekeeper (sk_events) and neon_walreader\n * (nwr_events) sockets a safekeeper in this state should be waiting on.\n *\n * This will return WL_NO_EVENTS (= 0) for some events. */\nvoid\nSafekeeperStateDesiredEvents(Safekeeper *sk, uint32 *sk_events, uint32 *nwr_events)\n{\n\tWalProposer *wp = sk->wp;\n\n\t*nwr_events = 0;\t\t\t/* nwr_events is empty for most states */\n\n\t/* If the state doesn't have a modifier, we can check the base state */\n\tswitch (sk->state)\n\t{\n\t\t\t/* Connecting states say what they want in the name */\n\t\tcase SS_CONNECTING_READ:\n\t\t\t*sk_events = WL_SOCKET_READABLE;\n\t\t\treturn;\n\t\tcase SS_CONNECTING_WRITE:\n\t\t\t*sk_events = WL_SOCKET_WRITEABLE;\n\t\t\treturn;\n\n\t\t\t/* Reading states need the socket to be read-ready to continue */\n\t\tcase SS_WAIT_EXEC_RESULT:\n\t\tcase SS_HANDSHAKE_RECV:\n\t\tcase SS_WAIT_VERDICT:\n\t\t\t*sk_events = WL_SOCKET_READABLE;\n\t\t\treturn;\n\n\t\t\t/*\n\t\t\t * Idle states use read-readiness as a sign that the connection\n\t\t\t * has been disconnected.\n\t\t\t */\n\t\tcase SS_WAIT_VOTING:\n\t\tcase SS_WAIT_ELECTED:\n\t\t\t*sk_events = WL_SOCKET_READABLE;\n\t\t\treturn;\n\n\t\tcase SS_SEND_ELECTED_FLUSH:\n\t\t\t*sk_events = WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE;\n\t\t\treturn;\n\n\t\tcase SS_ACTIVE:\n\t\t\tswitch (sk->active_state)\n\t\t\t{\n\t\t\t\t\t/*\n\t\t\t\t\t * Everything is sent; we just wait for sk responses and\n\t\t\t\t\t * latch.\n\t\t\t\t\t *\n\t\t\t\t\t * Note: this assumes we send all available WAL to\n\t\t\t\t\t * safekeeper in one wakeup (unless it blocks). Otherwise\n\t\t\t\t\t * we would want WL_SOCKET_WRITEABLE here to finish the\n\t\t\t\t\t * work.\n\t\t\t\t\t */\n\t\t\t\tcase SS_ACTIVE_SEND:\n\t\t\t\t\t*sk_events = WL_SOCKET_READABLE;\n\t\t\t\t\t/* c.f. walprop_pg_active_state_update_event_set */\n#if PG_VERSION_NUM >= 150000\n\t\t\t\t\tif (wp->api.wal_reader_events(sk))\n\t\t\t\t\t\t*nwr_events = WL_SOCKET_CLOSED;\n#endif\t\t\t\t\t\t\t/* on PG 14 nwr_events remains 0 */\n\t\t\t\t\treturn;\n\n\t\t\t\t\t/*\n\t\t\t\t\t * Waiting for neon_walreader socket, but we still read\n\t\t\t\t\t * responses from sk socket.\n\t\t\t\t\t */\n\t\t\t\tcase SS_ACTIVE_READ_WAL:\n\t\t\t\t\t*sk_events = WL_SOCKET_READABLE;\n\t\t\t\t\t*nwr_events = wp->api.wal_reader_events(sk);\n\t\t\t\t\treturn;\n\n\t\t\t\t\t/*\n\t\t\t\t\t * Need to flush the sk socket, so ignore neon_walreader\n\t\t\t\t\t * one and set write interest on sk.\n\t\t\t\t\t */\n\t\t\t\tcase SS_ACTIVE_FLUSH:\n\t\t\t\t\t*sk_events = WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE;\n#if PG_VERSION_NUM >= 150000\n\t\t\t\t\t/* c.f. walprop_pg_active_state_update_event_set */\n\t\t\t\t\tif (wp->api.wal_reader_events(sk))\n\t\t\t\t\t\t*nwr_events = WL_SOCKET_CLOSED;\n#endif\t\t\t\t\t\t\t/* on PG 14 nwr_events remains 0 */\n\t\t\t\t\treturn;\n\t\t\t}\n\t\t\treturn;\n\n\t\t\t/* The offline state expects no events. */\n\t\tcase SS_OFFLINE:\n\t\t\t*sk_events = 0;\n\t\t\treturn;\n\n\t\tdefault:\n\t\t\tAssert(false);\n\t}\n}\n\n/* Returns a human-readable string corresponding to the event set\n *\n * If the events do not correspond to something set as the `events` field of a `WaitEvent`, the\n * returned string may be meaingless.\n *\n * The string should not be freed. It should also not be expected to remain the same between\n * function calls. */\nstatic char *\nFormatEvents(WalProposer *wp, uint32 events)\n{\n\tstatic char return_str[8];\n\n\t/* Helper variable to check if there's extra bits */\n\tuint32\t\tall_flags = WL_LATCH_SET\n\t\t| WL_SOCKET_READABLE\n\t\t| WL_SOCKET_WRITEABLE\n\t\t| WL_TIMEOUT\n\t\t| WL_POSTMASTER_DEATH\n\t\t| WL_EXIT_ON_PM_DEATH\n\t\t| WL_SOCKET_CONNECTED;\n\n\t/*\n\t * The formatting here isn't supposed to be *particularly* useful -- it's\n\t * just to give an sense of what events have been triggered without\n\t * needing to remember your powers of two.\n\t */\n\n\treturn_str[0] = (events & WL_LATCH_SET) ? 'L' : '_';\n\treturn_str[1] = (events & WL_SOCKET_READABLE) ? 'R' : '_';\n\treturn_str[2] = (events & WL_SOCKET_WRITEABLE) ? 'W' : '_';\n\treturn_str[3] = (events & WL_TIMEOUT) ? 'T' : '_';\n\treturn_str[4] = (events & WL_POSTMASTER_DEATH) ? 'D' : '_';\n\treturn_str[5] = (events & WL_EXIT_ON_PM_DEATH) ? 'E' : '_';\n\treturn_str[5] = (events & WL_SOCKET_CONNECTED) ? 'C' : '_';\n\n\tif (events & (~all_flags))\n\t{\n\t\twp_log(WARNING, \"event formatting found unexpected component %d\",\n\t\t\t   events & (~all_flags));\n\t\treturn_str[6] = '*';\n\t\treturn_str[7] = '\\0';\n\t}\n\telse\n\t\treturn_str[6] = '\\0';\n\n\treturn (char *) &return_str;\n}\n\n/* Dump mconf as toml for observability / debugging. Result is palloc'ed. */\nstatic char *\nMembershipConfigurationToString(MembershipConfiguration *mconf)\n{\n\tStringInfoData s;\n\tuint32\t\ti;\n\n\tinitStringInfo(&s);\n\tappendStringInfo(&s, \"{gen = %u\", mconf->generation);\n\tappendStringInfoString(&s, \", members = [\");\n\tfor (i = 0; i < mconf->members.len; i++)\n\t{\n\t\tif (i > 0)\n\t\t\tappendStringInfoString(&s, \", \");\n\t\tappendStringInfo(&s, \"{node_id = %lu\", mconf->members.m[i].node_id);\n\t\tappendStringInfo(&s, \", host = %s\", mconf->members.m[i].host);\n\t\tappendStringInfo(&s, \", port = %u }\", mconf->members.m[i].port);\n\t}\n\tappendStringInfo(&s, \"], new_members = [\");\n\tfor (i = 0; i < mconf->new_members.len; i++)\n\t{\n\t\tif (i > 0)\n\t\t\tappendStringInfoString(&s, \", \");\n\t\tappendStringInfo(&s, \"{node_id = %lu\", mconf->new_members.m[i].node_id);\n\t\tappendStringInfo(&s, \", host = %s\", mconf->new_members.m[i].host);\n\t\tappendStringInfo(&s, \", port = %u }\", mconf->new_members.m[i].port);\n\t}\n\tappendStringInfoString(&s, \"]}\");\n\treturn s.data;\n}\n\nstatic void\nMembershipConfigurationCopy(MembershipConfiguration *src, MembershipConfiguration *dst)\n{\n\tdst->generation = src->generation;\n\tdst->members.len = src->members.len;\n\tdst->members.m = palloc0(sizeof(SafekeeperId) * dst->members.len);\n\tmemcpy(dst->members.m, src->members.m, sizeof(SafekeeperId) * dst->members.len);\n\tdst->new_members.len = src->new_members.len;\n\tdst->new_members.m = palloc0(sizeof(SafekeeperId) * dst->new_members.len);\n\tmemcpy(dst->new_members.m, src->new_members.m, sizeof(SafekeeperId) * dst->new_members.len);\n}\n\nstatic void\nMembershipConfigurationFree(MembershipConfiguration *mconf)\n{\n\tif (mconf->members.m)\n\t\tpfree(mconf->members.m);\n\tmconf->members.m = NULL;\n\tif (mconf->new_members.m)\n\t\tpfree(mconf->new_members.m);\n\tmconf->new_members.m = NULL;\n}\n"
  },
  {
    "path": "pgxn/neon/walproposer.h",
    "content": "#ifndef __NEON_WALPROPOSER_H__\n#define __NEON_WALPROPOSER_H__\n\n#include \"access/transam.h\"\n#include \"access/xlogdefs.h\"\n#include \"access/xlog_internal.h\"\n#include \"nodes/replnodes.h\"\n#include \"replication/walreceiver.h\"\n#include \"utils/uuid.h\"\n\n#include \"libpqwalproposer.h\"\n#include \"neon_walreader.h\"\n#include \"pagestore_client.h\"\n\n#define MAX_SAFEKEEPERS 32\n#define MAX_SEND_SIZE (XLOG_BLCKSZ * 16)\t/* max size of a single* WAL\n\t\t\t\t\t\t\t\t\t\t\t * message */\n/*\n * In the spirit of WL_SOCKET_READABLE and others, this corresponds to no events having occurred,\n * because all WL_* events are given flags equal to some (1 << i), starting from i = 0\n */\n#define WL_NO_EVENTS 0\n\nstruct WalProposerConn;\t\t\t/* Defined in libpqwalproposer.h */\ntypedef struct WalProposerConn WalProposerConn;\n\n/*\n * WAL safekeeper state, which is used to wait for some event.\n *\n * States are listed here in the order that they're executed.\n *\n * Most states, upon failure, will move back to SS_OFFLINE by calls to\n * ResetConnection or ShutdownConnection.\n */\ntypedef enum\n{\n\t/*\n\t * Does not have an active connection and will stay that way until further\n\t * notice.\n\t *\n\t * Moves to SS_CONNECTING_WRITE by calls to ResetConnection.\n\t */\n\tSS_OFFLINE,\n\n\t/*\n\t * Connecting states. \"_READ\" waits for the socket to be available for\n\t * reading, \"_WRITE\" waits for writing. There's no difference in the code\n\t * they execute when polled, but we have this distinction in order to\n\t * recreate the event set in HackyRemoveWalProposerEvent.\n\t *\n\t * After the connection is made, \"START_WAL_PUSH\" query is sent.\n\t */\n\tSS_CONNECTING_WRITE,\n\tSS_CONNECTING_READ,\n\n\t/*\n\t * Waiting for the result of the \"START_WAL_PUSH\" command.\n\t *\n\t * After we get a successful result, sends handshake to safekeeper.\n\t */\n\tSS_WAIT_EXEC_RESULT,\n\n\t/*\n\t * Executing the receiving half of the handshake. After receiving, moves\n\t * to SS_VOTING.\n\t */\n\tSS_HANDSHAKE_RECV,\n\n\t/*\n\t * Waiting to participate in voting, but a quorum hasn't yet been reached.\n\t * This is an idle state - we do not expect AdvancePollState to be called.\n\t *\n\t * Moved externally by execution of SS_HANDSHAKE_RECV, when we received a\n\t * quorum of handshakes.\n\t */\n\tSS_WAIT_VOTING,\n\n\t/*\n\t * Already sent voting information, waiting to receive confirmation from\n\t * the node. After receiving, moves to SS_WAIT_ELECTED, if the quorum\n\t * isn't reached yet.\n\t */\n\tSS_WAIT_VERDICT,\n\n\t/* Need to flush ProposerElected message. */\n\tSS_SEND_ELECTED_FLUSH,\n\n\t/*\n\t * Waiting for quorum to send WAL. Idle state. If the socket becomes\n\t * read-ready, the connection has been closed.\n\t *\n\t * Moves to SS_ACTIVE only by call to StartStreaming.\n\t */\n\tSS_WAIT_ELECTED,\n\n\t/*\n\t * Active phase, when we acquired quorum and have WAL to send or feedback\n\t * to read.\n\t */\n\tSS_ACTIVE,\n} SafekeeperState;\n\n/*\n * Sending WAL substates of SS_ACTIVE.\n */\ntypedef enum\n{\n\t/*\n\t * We are ready to send more WAL, waiting for latch set to learn about\n\t * more WAL becoming available (or just a timeout to send heartbeat).\n\t */\n\tSS_ACTIVE_SEND,\n\n\t/*\n\t * Polling neon_walreader to receive chunk of WAL (probably remotely) to\n\t * send to this safekeeper.\n\t *\n\t * Note: socket management is done completely inside walproposer_pg for\n\t * simplicity, and thus simulation doesn't test it. Which is fine as\n\t * simulation is mainly aimed at consensus checks, not waiteventset\n\t * management.\n\t *\n\t * Also, while in this state we don't touch safekeeper socket, so in\n\t * theory it might close connection as inactive. This can be addressed if\n\t * needed; however, while fetching WAL we should regularly send it, so the\n\t * problem is unlikely. Vice versa is also true (SS_ACTIVE doesn't handle\n\t * walreader socket), but similarly shouldn't be a problem.\n\t */\n\tSS_ACTIVE_READ_WAL,\n\n\t/*\n\t * Waiting for write readiness to flush the socket.\n\t */\n\tSS_ACTIVE_FLUSH,\n} SafekeeperActiveState;\n\n/* Consensus logical timestamp. */\ntypedef uint64 term_t;\n\n/* neon storage node id */\ntypedef uint64 NNodeId;\n\n/*\n * Number uniquely identifying safekeeper membership configuration.\n * This and following structs pair ones in membership.rs.\n */\ntypedef uint32 Generation;\n#define INVALID_GENERATION 0\n\ntypedef struct SafekeeperId\n{\n\tNNodeId\t\tnode_id;\n\tchar\t\thost[MAXCONNINFO];\n\tuint16\t\tport;\n} SafekeeperId;\n\n/* Set of safekeepers. */\ntypedef struct MemberSet\n{\n\tuint32\t\tlen;\t\t\t/* number of members */\n\tSafekeeperId *m;\t\t\t/* ids themselves */\n} MemberSet;\n\n/*\n * Timeline safekeeper membership configuration as sent in the\n * protocol.\n */\ntypedef struct MembershipConfiguration\n{\n\tGeneration\tgeneration;\n\tMemberSet\tmembers;\n\t/* Has 0 n_members in non joint conf. */\n\tMemberSet\tnew_members;\n} MembershipConfiguration;\n\n/*\n * Proposer <-> Acceptor messaging.\n */\n\ntypedef struct ProposerAcceptorMessage\n{\n\tuint8\t\ttag;\n} ProposerAcceptorMessage;\n\n/* Initial Proposer -> Acceptor message */\ntypedef struct ProposerGreeting\n{\n\tProposerAcceptorMessage pam;\t/* message tag */\n\n\t/*\n\t * tenant/timeline ids as C strings with standard hex notation for ease of\n\t * printing. In principle they are not strictly needed as ttid is also\n\t * passed as libpq options.\n\t */\n\tchar\t   *tenant_id;\n\tchar\t   *timeline_id;\n\t/* Full conf is carried to allow safekeeper switch */\n\tMembershipConfiguration mconf;\n\n\t/*\n\t * pg_version and wal_seg_size are used for timeline creation until we\n\t * fully migrate to doing externally. systemId is only used as a sanity\n\t * cross check.\n\t */\n\tuint32\t\tpg_version;\t\t/* in PG_VERSION_NUM format */\n\tuint64\t\tsystem_id;\t\t/* Postgres system identifier. */\n\tuint32\t\twal_seg_size;\n} ProposerGreeting;\n\n/* protocol v2 variant, kept while wp supports it */\ntypedef struct ProposerGreetingV2\n{\n\tuint64\t\ttag;\t\t\t/* message tag */\n\tuint32\t\tprotocolVersion;\t/* proposer-safekeeper protocol version */\n\tuint32\t\tpgVersion;\n\tpg_uuid_t\tproposerId;\n\tuint64\t\tsystemId;\t\t/* Postgres system identifier */\n\tuint8\t\ttimeline_id[16];\t/* Neon timeline id */\n\tuint8\t\ttenant_id[16];\n\tTimeLineID\ttimeline;\n\tuint32\t\twalSegSize;\n} ProposerGreetingV2;\n\ntypedef struct AcceptorProposerMessage\n{\n\tuint8\t\ttag;\n} AcceptorProposerMessage;\n\n/*\n * Acceptor -> Proposer initial response: the highest term acceptor voted for,\n * its node id and configuration.\n */\ntypedef struct AcceptorGreeting\n{\n\tAcceptorProposerMessage apm;\n\tNNodeId\t\tnodeId;\n\tMembershipConfiguration mconf;\n\tterm_t\t\tterm;\n} AcceptorGreeting;\n\n/*\n * Proposer -> Acceptor vote request.\n */\ntypedef struct VoteRequest\n{\n\tProposerAcceptorMessage pam;\t/* message tag */\n\tGeneration\tgeneration;\t\t/* membership conf generation */\n\tterm_t\t\tterm;\n} VoteRequest;\n\n/* protocol v2 variant, kept while wp supports it */\ntypedef struct VoteRequestV2\n{\n\tuint64\t\ttag;\n\tterm_t\t\tterm;\n\tpg_uuid_t\tproposerId;\t\t/* for monitoring/debugging */\n} VoteRequestV2;\n\n/* Element of term switching chain. */\ntypedef struct TermSwitchEntry\n{\n\tterm_t\t\tterm;\n\tXLogRecPtr\tlsn;\n} TermSwitchEntry;\n\ntypedef struct TermHistory\n{\n\tuint32\t\tn_entries;\n\tTermSwitchEntry *entries;\n} TermHistory;\n\n/* Vote itself, sent from safekeeper to proposer */\ntypedef struct VoteResponse\n{\n\tAcceptorProposerMessage apm;\n\n\t/*\n\t * Membership conf generation. It's not strictly required because on\n\t * mismatch safekeeper is expected to ERROR the connection, but let's\n\t * sanity check it.\n\t */\n\tGeneration\tgeneration;\n\tterm_t\t\tterm;\n\tuint8\t\tvoteGiven;\n\n\t/*\n\t * Safekeeper flush_lsn (end of WAL) + history of term switches allow\n\t * proposer to choose the most advanced one.\n\t */\n\tXLogRecPtr\tflushLsn;\n\tXLogRecPtr\ttruncateLsn;\t/* minimal LSN which may be needed for*\n\t\t\t\t\t\t\t\t * recovery of some safekeeper */\n\tTermHistory termHistory;\n} VoteResponse;\n\n/*\n * Proposer -> Acceptor message announcing proposer is elected and communicating\n * epoch history to it.\n */\ntypedef struct ProposerElected\n{\n\tAcceptorProposerMessage apm;\n\tGeneration\tgeneration;\t\t/* membership conf generation */\n\tterm_t\t\tterm;\n\t/* proposer will send since this point */\n\tXLogRecPtr\tstartStreamingAt;\n\t/* history of term switches up to this proposer */\n\tTermHistory *termHistory;\n} ProposerElected;\n\n/*\n * Header of request with WAL message sent from proposer to safekeeper.\n */\ntypedef struct AppendRequestHeader\n{\n\tAcceptorProposerMessage apm;\n\tGeneration\tgeneration;\t\t/* membership conf generation */\n\tterm_t\t\tterm;\t\t\t/* term of the proposer */\n\tXLogRecPtr\tbeginLsn;\t\t/* start position of message in WAL */\n\tXLogRecPtr\tendLsn;\t\t\t/* end position of message in WAL */\n\tXLogRecPtr\tcommitLsn;\t\t/* LSN committed by quorum of safekeepers */\n\n\t/*\n\t * minimal LSN which may be needed for recovery of some safekeeper (end\n\t * lsn + 1 of last chunk streamed to everyone)\n\t */\n\tXLogRecPtr\ttruncateLsn;\n\t/* in the AppendRequest message, WAL data follows */\n} AppendRequestHeader;\n\n/* protocol v2 variant, kept while wp supports it */\ntypedef struct AppendRequestHeaderV2\n{\n\tuint64\t\ttag;\n\tterm_t\t\tterm;\t\t\t/* term of the proposer */\n\n\t/*\n\t * LSN since which current proposer appends WAL (begin_lsn of its first\n\t * record); determines epoch switch point.\n\t */\n\tXLogRecPtr\tepochStartLsn;\n\tXLogRecPtr\tbeginLsn;\t\t/* start position of message in WAL */\n\tXLogRecPtr\tendLsn;\t\t\t/* end position of message in WAL */\n\tXLogRecPtr\tcommitLsn;\t\t/* LSN committed by quorum of safekeepers */\n\n\t/*\n\t * minimal LSN which may be needed for recovery of some safekeeper (end\n\t * lsn + 1 of last chunk streamed to everyone)\n\t */\n\tXLogRecPtr\ttruncateLsn;\n\tpg_uuid_t\tproposerId;\t\t/* for monitoring/debugging */\n\t/* in the AppendRequest message, WAL data follows */\n} AppendRequestHeaderV2;\n\n/*\n * Hot standby feedback received from replica\n */\ntypedef struct HotStandbyFeedback\n{\n\tTimestampTz ts;\n\tFullTransactionId xmin;\n\tFullTransactionId catalog_xmin;\n} HotStandbyFeedback;\n\ntypedef struct PageserverFeedback\n{\n\t/* true if AppendResponse contains this feedback */\n\tbool\t\tpresent;\n\t/* current size of the timeline on pageserver */\n\tuint64\t\tcurrentClusterSize;\n\t/* standby_status_update fields that safekeeper received from pageserver */\n\tXLogRecPtr\tlast_received_lsn;\n\tXLogRecPtr\tdisk_consistent_lsn;\n\tXLogRecPtr\tremote_consistent_lsn;\n\tTimestampTz replytime;\n\tuint32\t\tshard_number;\n\t/* true if the pageserver has detected data corruption in the timeline */\n\tbool\t\tcorruption_detected;\n} PageserverFeedback;\n\n/* BEGIN_HADRON */\n/**\n * WAL proposer is the only backend that will update `sent_bytes` and `last_recorded_time_us`.\n * Once the `sent_bytes` reaches the limit, it puts backpressure on PG backends.\n *\n * A PG backend checks `should_limit` to see if it should hit backpressure.\n * - If yes, it also checks the `last_recorded_time_us` to see\n *   if it's time to push more WALs. This is because the WAL proposer\n *   only resets `should_limit` to 0 after it is notified about new WALs\n *   which might take a while.\n */\ntypedef struct WalRateLimiter\n{\n\t/* The effective wal write rate. Could be changed dynamically\n\tbased on whether PG has backpressure or not.*/\n\tpg_atomic_uint32 effective_max_wal_bytes_per_second;\n\t/* If the value is 1, PG backends will hit backpressure until the time has past batch_end_time_us. */\n\tpg_atomic_uint32 should_limit;\n\t/* The number of bytes sent in the current second. */\n\tuint64\t\tsent_bytes;\n\t/* The timestamp when the write starts in the current batch. A batch is a time interval (e.g., )that we \n\ttrack and throttle writes. Most times a batch is 1s, but it could become larger if the PG overwrites the WALs\n\tand we will adjust the batch accordingly to compensate (e.g., if PG writes 10MB at once and max WAL write rate\n\tis 1MB/s, then the current batch will become 10s). */\n\tpg_atomic_uint64 batch_start_time_us;\n\t/* The timestamp (in the future) that the current batch should end and accept more writes\n\t(after should_limit is set to 1). */\n\tpg_atomic_uint64 batch_end_time_us;\n} WalRateLimiter;\n/* END_HADRON */\n\ntypedef struct WalproposerShmemState\n{\n\tpg_atomic_uint64 propEpochStartLsn;\n\tchar\t\tdonor_name[64];\n\tchar\t\tdonor_conninfo[MAXCONNINFO];\n\tXLogRecPtr\tdonor_lsn;\n\n\tslock_t\t\tmutex;\n\tpg_atomic_uint64 mineLastElectedTerm;\n\tpg_atomic_uint64 backpressureThrottlingTime;\n\tpg_atomic_uint64 currentClusterSize;\n\n\t/* last feedback from each shard */\n\tPageserverFeedback shard_ps_feedback[MAX_SHARDS];\n\tint\t\t\tnum_shards;\n\tbool\t\treplica_promote;\n\n\t/* aggregated feedback with min LSNs across shards */\n\tPageserverFeedback min_ps_feedback;\n\n\t/* BEGIN_HADRON */\n\t/* The WAL rate limiter */\n\tWalRateLimiter wal_rate_limiter;\n\t/* Number of safekeepers in the config */\n\tuint32 num_safekeepers;\n\t/* Per-safekeeper status flags: 0=inactive, 1=active */\n\tuint8 safekeeper_status[MAX_SAFEKEEPERS];\n\t/* END_HADRON */\n} WalproposerShmemState;\n\n/*\n * Report safekeeper state to proposer\n */\ntypedef struct AppendResponse\n{\n\tAcceptorProposerMessage apm;\n\n\t/*\n\t * Membership conf generation. It's not strictly required because on\n\t * mismatch safekeeper is expected to ERROR the connection, but let's\n\t * sanity check it.\n\t */\n\tGeneration\tgeneration;\n\n\t/*\n\t * Current term of the safekeeper; if it is higher than proposer's, the\n\t * compute is out of date.\n\t */\n\tterm_t\t\tterm;\n\t/* TODO: add comment */\n\tXLogRecPtr\tflushLsn;\n\t/* Safekeeper reports back his awareness about which WAL is committed, as */\n\t/* this is a criterion for walproposer --sync mode exit */\n\tXLogRecPtr\tcommitLsn;\n\tHotStandbyFeedback hs;\n\t/* Feedback received from pageserver includes standby_status_update fields */\n\t/* and custom neon feedback. */\n\t/* This part of the message is extensible. */\n\tPageserverFeedback ps_feedback;\n} AppendResponse;\n\n/*  PageserverFeedback is extensible part of the message that is parsed separately */\n/*  Other fields are fixed part */\n#define APPENDRESPONSE_FIXEDPART_SIZE 56\n\nstruct WalProposer;\ntypedef struct WalProposer WalProposer;\n\n/*\n * Descriptor of safekeeper\n */\ntypedef struct Safekeeper\n{\n\tWalProposer *wp;\n\n\tchar const *host;\n\tchar const *port;\n\n\t/* BEGIN_HADRON */\n\t/* index of this safekeeper in the WalProposer array */\n\tuint32 index;\n\t/* END_HADRON */\n\n\t/*\n\t * connection string for connecting/reconnecting.\n\t *\n\t * May contain private information like password and should not be logged.\n\t */\n\tchar\t\tconninfo[MAXCONNINFO];\n\n\t/*\n\t * Temporary buffer for the message being sent to the safekeeper.\n\t */\n\tStringInfoData outbuf;\n\n\t/*\n\t * Streaming will start here; must be record boundary.\n\t */\n\tXLogRecPtr\tstartStreamingAt;\n\n\tXLogRecPtr\tstreamingAt;\t/* current streaming position */\n\tAppendRequestHeader appendRequest;\t/* request for sending to safekeeper */\n\n\tSafekeeperState state;\t\t/* safekeeper state machine state */\n\tSafekeeperActiveState active_state;\n\tTimestampTz latestMsgReceivedAt;\t/* when latest msg is received */\n\tAcceptorGreeting greetResponse; /* acceptor greeting */\n\tVoteResponse voteResponse;\t/* the vote */\n\tAppendResponse appendResponse;\t/* feedback for master */\n\n\n\t/* postgres-specific fields */\n#ifndef WALPROPOSER_LIB\n\n\t/*\n\t * postgres protocol connection to the WAL acceptor\n\t *\n\t * Equals NULL only when state = SS_OFFLINE. Nonblocking is set once we\n\t * reach SS_ACTIVE; not before.\n\t */\n\tWalProposerConn *conn;\n\n\t/*\n\t * WAL reader, allocated for each safekeeper.\n\t */\n\tNeonWALReader *xlogreader;\n\n\t/*\n\t * Position in wait event set. Equal to -1 if no event\n\t */\n\tint\t\t\teventPos;\n\n\t/*\n\t * Neon WAL reader position in wait event set, or -1 if no socket. Note\n\t * that event must be removed not only on error/failure, but also on\n\t * successful *local* read, as next read might again be remote, but with\n\t * different socket.\n\t */\n\tint\t\t\tnwrEventPos;\n\n\t/*\n\t * Per libpq docs, during connection establishment socket might change,\n\t * remember here if it is stable to avoid readding to the event set if\n\t * possible. Must be reset whenever nwr event is deleted.\n\t */\n\tbool\t\tnwrConnEstablished;\n#endif\n\n\n\t/* WalProposer library specifics */\n#ifdef WALPROPOSER_LIB\n\n\t/*\n\t * Buffer for incoming messages. Usually Rust vector is stored here.\n\t * Caller is responsible for freeing the buffer.\n\t */\n\tStringInfoData inbuf;\n#endif\n} Safekeeper;\n\n/* Re-exported PostgresPollingStatusType */\ntypedef enum\n{\n\tWP_CONN_POLLING_FAILED = 0,\n\tWP_CONN_POLLING_READING,\n\tWP_CONN_POLLING_WRITING,\n\tWP_CONN_POLLING_OK,\n\n\t/*\n\t * 'libpq-fe.h' still has PGRES_POLLING_ACTIVE, but says it's unused.\n\t * We've removed it here to avoid clutter.\n\t */\n} WalProposerConnectPollStatusType;\n\n/* Re-exported ConnStatusType */\ntypedef enum\n{\n\tWP_CONNECTION_OK,\n\tWP_CONNECTION_BAD,\n\n\t/*\n\t * The original ConnStatusType has many more tags, but requests that they\n\t * not be relied upon (except for displaying to the user). We don't need\n\t * that extra functionality, so we collect them into a single tag here.\n\t */\n\tWP_CONNECTION_IN_PROGRESS,\n} WalProposerConnStatusType;\n\n/*\n * Collection of hooks for walproposer, to call postgres functions,\n * read WAL and send it over the network.\n */\ntypedef struct walproposer_api\n{\n\t/*\n\t * Get WalproposerShmemState. This is used to store information about last\n\t * elected term.\n\t */\n\tWalproposerShmemState *(*get_shmem_state) (WalProposer *wp);\n\n\t/*\n\t * Start receiving notifications about new WAL. This is an infinite loop\n\t * which calls WalProposerBroadcast() and WalProposerPoll() to send the\n\t * WAL.\n\t */\n\tvoid\t\t(*start_streaming) (WalProposer *wp, XLogRecPtr startpos);\n\n\t/* Get pointer to the latest available WAL. */\n\tXLogRecPtr\t(*get_flush_rec_ptr) (WalProposer *wp);\n\n\t/* Update current donor info in WalProposer Shmem */\n\tvoid\t\t(*update_donor) (WalProposer *wp, Safekeeper *donor, XLogRecPtr donor_lsn);\n\n\t/* Get current time. */\n\tTimestampTz (*get_current_timestamp) (WalProposer *wp);\n\n\t/* Current error message, aka PQerrorMessage. */\n\tchar\t   *(*conn_error_message) (Safekeeper *sk);\n\n\t/* Connection status, aka PQstatus. */\n\tWalProposerConnStatusType (*conn_status) (Safekeeper *sk);\n\n\t/* Start the connection, aka PQconnectStart. */\n\tvoid\t\t(*conn_connect_start) (Safekeeper *sk);\n\n\t/* Poll an asynchronous connection, aka PQconnectPoll. */\n\tWalProposerConnectPollStatusType (*conn_connect_poll) (Safekeeper *sk);\n\n\t/* Send a blocking SQL query, aka PQsendQuery. */\n\tbool\t\t(*conn_send_query) (Safekeeper *sk, char *query);\n\n\t/* Read the query result, aka PQgetResult. */\n\tWalProposerExecStatusType (*conn_get_query_result) (Safekeeper *sk);\n\n\t/* Flush buffer to the network, aka PQflush. */\n\tint\t\t\t(*conn_flush) (Safekeeper *sk);\n\n\t/* Reset sk state: close pq connection, deallocate xlogreader. */\n\tvoid\t\t(*conn_finish) (Safekeeper *sk);\n\n\t/*\n\t * Try to read CopyData message from the safekeeper, aka PQgetCopyData.\n\t *\n\t * On success, the data is placed in *buf. It is valid until the next call\n\t * to this function.\n\t *\n\t * Returns PG_ASYNC_READ_FAIL on closed connection.\n\t */\n\tPGAsyncReadResult (*conn_async_read) (Safekeeper *sk, char **buf, int *amount);\n\n\t/* Try to write CopyData message, aka PQputCopyData. */\n\tPGAsyncWriteResult (*conn_async_write) (Safekeeper *sk, void const *buf, size_t size);\n\n\t/* Blocking CopyData write, aka PQputCopyData + PQflush. */\n\tbool\t\t(*conn_blocking_write) (Safekeeper *sk, void const *buf, size_t size);\n\n\t/*\n\t * Download WAL before basebackup for logical walsenders from sk, if\n\t * needed\n\t */\n\tbool\t\t(*recovery_download) (WalProposer *wp, Safekeeper *sk);\n\n\t/* Allocate WAL reader. */\n\tvoid\t\t(*wal_reader_allocate) (Safekeeper *sk);\n\n\t/* Read WAL from disk to buf. */\n\tNeonWALReadResult (*wal_read) (Safekeeper *sk, char *buf, XLogRecPtr startptr, Size count, char **errmsg);\n\n\t/* Returns events to be awaited on WAL reader, if any. */\n\tuint32\t\t(*wal_reader_events) (Safekeeper *sk);\n\n\t/* Initialize event set. */\n\tvoid\t\t(*init_event_set) (WalProposer *wp);\n\n\t/* Update events for an existing safekeeper connection. */\n\tvoid\t\t(*update_event_set) (Safekeeper *sk, uint32 events);\n\n\t/* Configure wait event set for yield in SS_ACTIVE. */\n\tvoid\t\t(*active_state_update_event_set) (Safekeeper *sk);\n\n\t/* Add a new safekeeper connection to the event set. */\n\tvoid\t\t(*add_safekeeper_event_set) (Safekeeper *sk, uint32 events);\n\n\t/* Remove safekeeper connection from event set */\n\tvoid\t\t(*rm_safekeeper_event_set) (Safekeeper *sk);\n\n\t/*\n\t * Wait until some event happens: - timeout is reached - socket event for\n\t * safekeeper connection - new WAL is available\n\t *\n\t * Returns 0 if timeout is reached, 1 if some event happened. Updates\n\t * events mask to indicate events and sets sk to the safekeeper which has\n\t * an event.\n\t *\n\t * On timeout, events is set to WL_NO_EVENTS. On socket event, events is\n\t * set to WL_SOCKET_READABLE and/or WL_SOCKET_WRITEABLE. When socket is\n\t * closed, events is set to WL_SOCKET_READABLE.\n\t *\n\t * WL_SOCKET_WRITEABLE is usually set only when we need to flush the\n\t * buffer. It can be returned only if caller asked for this event in the\n\t * last *_event_set call.\n\t */\n\tint\t\t\t(*wait_event_set) (WalProposer *wp, long timeout, Safekeeper **sk, uint32 *events);\n\n\t/* Read random bytes. */\n\tbool\t\t(*strong_random) (WalProposer *wp, void *buf, size_t len);\n\n\t/*\n\t * Get a basebackup LSN. Used to cross-validate with the latest available\n\t * LSN on the safekeepers.\n\t */\n\tXLogRecPtr\t(*get_redo_start_lsn) (WalProposer *wp);\n\n\t/*\n\t * Finish sync safekeepers with the given LSN. This function should not\n\t * return and should exit the program.\n\t */\n\tvoid\t\t(*finish_sync_safekeepers) (WalProposer *wp, XLogRecPtr lsn) __attribute__((noreturn)) ;\n\t/*\n\t * Called after every AppendResponse from the safekeeper. Used to\n\t * propagate backpressure feedback and to confirm WAL persistence (has\n\t * been commited on the quorum of safekeepers).\n\t */\n\tvoid\t\t(*process_safekeeper_feedback) (WalProposer *wp, Safekeeper *sk);\n\n\t/*\n\t * Write a log message to the internal log processor. This is used only\n\t * when walproposer is compiled as a library. Otherwise, all logging is\n\t * handled by elog().\n\t */\n\tvoid\t\t(*log_internal) (WalProposer *wp, int level, const char *line);\n\n\t/*\n\t * BEGIN_HADRON\n\t * APIs manipulating shared memory state used for Safekeeper quorum health metrics.\n\t */\n\n\t/*\n\t * Reset the safekeeper statuses in shared memory for metric purposes.\n\t */\n\tvoid\t\t(*reset_safekeeper_statuses_for_metrics) (WalProposer *wp, uint32 num_safekeepers);\n\n\t/*\n\t * Update the safekeeper status in shared memory for metric purposes.\n\t */\n\tvoid\t\t(*update_safekeeper_status_for_metrics) (WalProposer *wp, uint32 sk_index, uint8 status);\n\n\t/* END_HADRON */\n} walproposer_api;\n\n/*\n * Configuration of the WAL proposer.\n */\ntypedef struct WalProposerConfig\n{\n\t/* hex-encoded TenantId cstr */\n\tchar\t   *neon_tenant;\n\n\t/* hex-encoded TimelineId cstr */\n\tchar\t   *neon_timeline;\n\n\t/*\n\t * Comma-separated list of safekeepers, in the following format:\n\t * host1:port1,host2:port2,host3:port3\n\t *\n\t * This cstr should be editable.\n\t */\n\tchar\t   *safekeepers_list;\n\n\t/* libpq connection info options. */\n\tchar\t   *safekeeper_conninfo_options;\n\n\t/*\n\t * WalProposer reconnects to offline safekeepers once in this interval.\n\t * Time is in milliseconds.\n\t */\n\tint\t\t\tsafekeeper_reconnect_timeout;\n\n\t/*\n\t * WalProposer terminates the connection if it doesn't receive any message\n\t * from the safekeeper in this interval. Time is in milliseconds.\n\t */\n\tint\t\t\tsafekeeper_connection_timeout;\n\n\t/*\n\t * WAL segment size. Will be passed to safekeepers in greet request. Also\n\t * used to detect page headers.\n\t */\n\tint\t\t\twal_segment_size;\n\n\t/*\n\t * If safekeeper was started in sync mode, walproposer will not subscribe\n\t * for new WAL and will exit when quorum of safekeepers will be synced to\n\t * the latest available LSN.\n\t */\n\tbool\t\tsyncSafekeepers;\n\n\t/* Will be passed to safekeepers in greet request. */\n\tuint64\t\tsystemId;\n\n\t/* Will be passed to safekeepers in greet request. */\n\tTimeLineID\tpgTimeline;\n\n\tint\t\t\tproto_version;\n\n#ifdef WALPROPOSER_LIB\n\tvoid\t   *callback_data;\n#endif\n} WalProposerConfig;\n\ntypedef enum\n{\n\t/* collecting greetings to determine term to campaign for */\n\tWPS_COLLECTING_TERMS,\n\t/* campaing started, waiting for votes */\n\tWPS_CAMPAIGN,\n\t/* successfully elected */\n\tWPS_ELECTED,\n} WalProposerState;\n\n/*\n * WAL proposer state.\n */\ntypedef struct WalProposer\n{\n\tWalProposerConfig *config;\n\tWalProposerState state;\n\t/* Current walproposer membership configuration */\n\tMembershipConfiguration mconf;\n\n\t/*\n\t * Parallels mconf.members with pointers to the member's slot in\n\t * safekeepers array of connections, or NULL if such member is not\n\t * connected. Helps to avoid looking slot per id through all\n\t * .safekeepers[] when doing quorum checks.\n\t */\n\tSafekeeper *members_safekeepers[MAX_SAFEKEEPERS];\n\t/* As above, but for new_members. */\n\tSafekeeper *new_members_safekeepers[MAX_SAFEKEEPERS];\n\n\t/* (n_safekeepers / 2) + 1. Used for static pre-generations quorum checks. */\n\tint\t\t\tquorum;\n\n\t/*\n\t * Generation of the membership conf of which safekeepers[] are presumably\n\t * members. To make cplane life a bit easier and have more control in\n\t * tests with which sks walproposer gets connected neon.safekeepers GUC\n\t * doesn't provide full mconf, only the list of endpoints to connect to.\n\t * We still would like to know generation associated with it because 1) we\n\t * need some handle to enforce using generations in walproposer, and\n\t * non-zero value of this serves the purpose; 2) currently we don't do\n\t * that, but in theory walproposer can update list of safekeepers to\n\t * connect to upon receiving mconf from safekeepers, and generation number\n\t * must be checked to see which list is newer.\n\t */\n\tGeneration\tsafekeepers_generation;\n\t/* Number of occupied slots in safekeepers[] */\n\tint\t\t\tn_safekeepers;\n\t/* Safekeepers walproposer is connecting to. */\n\tSafekeeper\tsafekeeper[MAX_SAFEKEEPERS];\n\n\t/* Current local TimeLineId in use */\n\tTimeLineID\tlocalTimeLineID;\n\n\t/* WAL has been generated up to this point */\n\tXLogRecPtr\tavailableLsn;\n\n\t/* cached GetAcknowledgedByQuorumWALPosition result */\n\tXLogRecPtr\tcommitLsn;\n\n\tProposerGreeting greetRequest;\n\tProposerGreetingV2 greetRequestV2;\n\n\t/* Vote request for safekeeper */\n\tVoteRequest voteRequest;\n\n\t/*\n\t * Minimal LSN which may be needed for recovery of some safekeeper,\n\t * record-aligned (first record which might not yet received by someone).\n\t */\n\tXLogRecPtr\ttruncateLsn;\n\n\t/*\n\t * Term of the proposer. We want our term to be highest and unique, so we\n\t * collect terms from safekeepers quorum, choose max and +1. After that\n\t * our term is fixed and must not change. If we observe that some\n\t * safekeeper has higher term, it means that we have another running\n\t * compute, so we must stop immediately.\n\t */\n\tterm_t\t\tpropTerm;\n\n\t/* term history of the proposer */\n\tTermHistory propTermHistory;\n\n\t/* epoch start lsn of the proposer */\n\tXLogRecPtr\tpropTermStartLsn;\n\n\t/* Most advanced acceptor epoch */\n\tterm_t\t\tdonorLastLogTerm;\n\n\t/* Most advanced acceptor */\n\tSafekeeper *donor;\n\n\t/* timeline globally starts at this LSN */\n\tXLogRecPtr\ttimelineStartLsn;\n\n\t/* number of successful connections over the lifetime of walproposer */\n\tint\t\t\tn_connected;\n\n\t/*\n\t * Timestamp of the last reconnection attempt. Related to\n\t * config->safekeeper_reconnect_timeout\n\t */\n\tTimestampTz last_reconnect_attempt;\n\n\twalproposer_api api;\n} WalProposer;\n\nextern WalProposer *WalProposerCreate(WalProposerConfig *config, walproposer_api api);\nextern void WalProposerStart(WalProposer *wp);\nextern void WalProposerBroadcast(WalProposer *wp, XLogRecPtr startpos, XLogRecPtr endpos);\nextern void WalProposerPoll(WalProposer *wp);\nextern void WalProposerFree(WalProposer *wp);\n\nextern WalproposerShmemState *GetWalpropShmemState(void);\n\n/*\n * WaitEventSet API doesn't allow to remove socket, so walproposer_pg uses it to\n * recreate set from scratch, hence the export.\n */\nextern void SafekeeperStateDesiredEvents(Safekeeper *sk, uint32 *sk_events, uint32 *nwr_events);\nextern TimeLineID walprop_pg_get_timeline_id(void);\n\n\n#define WPEVENT\t\t1337\t\t/* special log level for walproposer internal\n\t\t\t\t\t\t\t\t * events */\n\n#define WP_LOG_PREFIX \"[WP] \"\n\n/*\n * wp_log is used in pure wp code (walproposer.c), allowing API callback to\n * catch logging.\n */\n#ifdef WALPROPOSER_LIB\nextern void WalProposerLibLog(WalProposer *wp, int elevel, char *fmt,...) pg_attribute_printf(3, 4);\n#define wp_log(elevel, fmt, ...) WalProposerLibLog(wp, elevel, fmt, ## __VA_ARGS__)\n#else\n#define wp_log(elevel, fmt, ...) elog(elevel, WP_LOG_PREFIX fmt, ## __VA_ARGS__)\n#endif\n\n/*\n * And wpg_log is used all other (postgres specific) walproposer code, just\n * adding prefix.\n */\n#define wpg_log(elevel, fmt, ...) elog(elevel, WP_LOG_PREFIX fmt, ## __VA_ARGS__)\n\n#endif\t\t\t\t\t\t\t/* __NEON_WALPROPOSER_H__ */\n"
  },
  {
    "path": "pgxn/neon/walproposer_compat.c",
    "content": "/*\n * Contains copied/adapted functions from libpq and some internal postgres functions.\n * This is needed to avoid linking to full postgres server installation. This file\n * is compiled as a part of libwalproposer static library.\n */\n#include \"postgres.h\"\n\n#include <stdio.h>\n\n#include \"libpq/pqformat.h\"\n#include \"miscadmin.h\"\n#include \"utils/datetime.h\"\n#include \"walproposer.h\"\n\nvoid\nExceptionalCondition(const char *conditionName,\n\t\t\t\t\t const char *fileName, int lineNumber)\n{\n\tfprintf(stderr, \"ExceptionalCondition: %s:%d: %s\\n\",\n\t\t\tfileName, lineNumber, conditionName);\n\tfprintf(stderr, \"aborting...\\n\");\n\texit(1);\n}\n\nvoid\npq_copymsgbytes(StringInfo msg, char *buf, int datalen)\n{\n\tif (datalen < 0 || datalen > (msg->len - msg->cursor))\n\t\tExceptionalCondition(\"insufficient data left in message\", __FILE__, __LINE__);\n\tmemcpy(buf, &msg->data[msg->cursor], datalen);\n\tmsg->cursor += datalen;\n}\n\n/* --------------------------------\n *\t\tpq_getmsgint\t- get a binary integer from a message buffer\n *\n *\t\tValues are treated as unsigned.\n * --------------------------------\n */\nunsigned int\npq_getmsgint(StringInfo msg, int b)\n{\n\tunsigned int result;\n\tunsigned char n8;\n\tuint16\t\tn16;\n\tuint32\t\tn32;\n\n\tswitch (b)\n\t{\n\t\tcase 1:\n\t\t\tpq_copymsgbytes(msg, (char *) &n8, 1);\n\t\t\tresult = n8;\n\t\t\tbreak;\n\t\tcase 2:\n\t\t\tpq_copymsgbytes(msg, (char *) &n16, 2);\n\t\t\tresult = pg_ntoh16(n16);\n\t\t\tbreak;\n\t\tcase 4:\n\t\t\tpq_copymsgbytes(msg, (char *) &n32, 4);\n\t\t\tresult = pg_ntoh32(n32);\n\t\t\tbreak;\n\t\tdefault:\n\t\t\tfprintf(stderr, \"unsupported integer size %d\\n\", b);\n\t\t\tExceptionalCondition(\"unsupported integer size\", __FILE__, __LINE__);\n\t\t\tresult = 0;\t\t\t/* keep compiler quiet */\n\t\t\tbreak;\n\t}\n\treturn result;\n}\n\n/* --------------------------------\n *\t\tpq_getmsgint64\t- get a binary 8-byte int from a message buffer\n *\n * It is tempting to merge this with pq_getmsgint, but we'd have to make the\n * result int64 for all data widths --- that could be a big performance\n * hit on machines where int64 isn't efficient.\n * --------------------------------\n */\nint64\npq_getmsgint64(StringInfo msg)\n{\n\tuint64\t\tn64;\n\n\tpq_copymsgbytes(msg, (char *) &n64, sizeof(n64));\n\n\treturn pg_ntoh64(n64);\n}\n\n/* --------------------------------\n *\t\tpq_getmsgbyte\t- get a raw byte from a message buffer\n * --------------------------------\n */\nint\npq_getmsgbyte(StringInfo msg)\n{\n\tif (msg->cursor >= msg->len)\n\t\tExceptionalCondition(\"no data left in message\", __FILE__, __LINE__);\n\treturn (unsigned char) msg->data[msg->cursor++];\n}\n\n/* --------------------------------\n *\t\tpq_getmsgbytes\t- get raw data from a message buffer\n *\n *\t\tReturns a pointer directly into the message buffer; note this\n *\t\tmay not have any particular alignment.\n * --------------------------------\n */\nconst char *\npq_getmsgbytes(StringInfo msg, int datalen)\n{\n\tconst char *result;\n\n\tif (datalen < 0 || datalen > (msg->len - msg->cursor))\n\t\tExceptionalCondition(\"insufficient data left in message\", __FILE__, __LINE__);\n\tresult = &msg->data[msg->cursor];\n\tmsg->cursor += datalen;\n\treturn result;\n}\n\n/* --------------------------------\n *\t\tpq_getmsgrawstring - get a null-terminated text string - NO conversion\n *\n *\t\tReturns a pointer directly into the message buffer.\n * --------------------------------\n */\nconst char *\npq_getmsgrawstring(StringInfo msg)\n{\n\tchar\t   *str;\n\tint\t\t\tslen;\n\n\tstr = &msg->data[msg->cursor];\n\n\t/*\n\t * It's safe to use strlen() here because a StringInfo is guaranteed to\n\t * have a trailing null byte.  But check we found a null inside the\n\t * message.\n\t */\n\tslen = strlen(str);\n\tif (msg->cursor + slen >= msg->len)\n\t\tExceptionalCondition(\"invalid string in message\", __FILE__, __LINE__);\n\tmsg->cursor += slen + 1;\n\n\treturn str;\n}\n\n/* --------------------------------\n *\t\tpq_getmsgend\t- verify message fully consumed\n * --------------------------------\n */\nvoid\npq_getmsgend(StringInfo msg)\n{\n\tif (msg->cursor != msg->len)\n\t\tExceptionalCondition(\"invalid msg format\", __FILE__, __LINE__);\n}\n\n/* --------------------------------\n *\t\tpq_sendbytes\t- append raw data to a StringInfo buffer\n * --------------------------------\n */\nvoid\npq_sendbytes(StringInfo buf, const void *data, int datalen)\n{\n\t/* use variant that maintains a trailing null-byte, out of caution */\n\tappendBinaryStringInfo(buf, data, datalen);\n}\n\n/* --------------------------------\n *\t\tpq_send_ascii_string\t- append a null-terminated text string (without conversion)\n *\n * This function intentionally bypasses encoding conversion, instead just\n * silently replacing any non-7-bit-ASCII characters with question marks.\n * It is used only when we are having trouble sending an error message to\n * the client with normal localization and encoding conversion.  The caller\n * should already have taken measures to ensure the string is just ASCII;\n * the extra work here is just to make certain we don't send a badly encoded\n * string to the client (which might or might not be robust about that).\n *\n * NB: passed text string must be null-terminated, and so is the data\n * sent to the frontend.\n * --------------------------------\n */\nvoid\npq_send_ascii_string(StringInfo buf, const char *str)\n{\n\twhile (*str)\n\t{\n\t\tchar\t\tch = *str++;\n\n\t\tif (IS_HIGHBIT_SET(ch))\n\t\t\tch = '?';\n\t\tappendStringInfoCharMacro(buf, ch);\n\t}\n\tappendStringInfoChar(buf, '\\0');\n}\n\n/*\n * Produce a C-string representation of a TimestampTz.\n *\n * This is mostly for use in emitting messages.\n */\nconst char *\ntimestamptz_to_str(TimestampTz t)\n{\n\tstatic char buf[MAXDATELEN + 1];\n\n\tsnprintf(buf, sizeof(buf), \"TimestampTz(%ld)\", t);\n\treturn buf;\n}\n\nbool\nTimestampDifferenceExceeds(TimestampTz start_time,\n\t\t\t\t\t\t   TimestampTz stop_time,\n\t\t\t\t\t\t   int msec)\n{\n\tTimestampTz diff = stop_time - start_time;\n\n\treturn (diff >= msec * INT64CONST(1000));\n}\n\nvoid\nWalProposerLibLog(WalProposer *wp, int elevel, char *fmt,...)\n{\n\tchar\t\tbuf[1024];\n\tva_list\t\targs;\n\n\tfmt = _(fmt);\n\n\tva_start(args, fmt);\n\tvsnprintf(buf, sizeof(buf), fmt, args);\n\tva_end(args);\n\n\twp->api.log_internal(wp, elevel, buf);\n}\n"
  },
  {
    "path": "pgxn/neon/walproposer_pg.c",
    "content": "/*\n * Implementation of postgres based walproposer disk and IO routines, i.e. the\n * real ones. The reason this is separate from walproposer.c is ability to\n * replace them with mocks, allowing to do simulation testing.\n *\n * Also contains initialization of postgres based walproposer.\n */\n\n#include \"postgres.h\"\n\n#include <signal.h>\n#include <unistd.h>\n#include <sys/stat.h>\n#include \"access/xact.h\"\n#include \"access/xlog.h\"\n#include \"access/xlogdefs.h\"\n#include \"access/xlogutils.h\"\n#include \"access/xloginsert.h\"\n#if PG_VERSION_NUM >= 150000\n#include \"access/xlogrecovery.h\"\n#endif\n#include \"storage/fd.h\"\n#include \"storage/latch.h\"\n#include \"miscadmin.h\"\n#include \"pgstat.h\"\n#include \"access/xlog.h\"\n#include \"libpq/pqformat.h\"\n#include \"replication/slot.h\"\n#include \"replication/walreceiver.h\"\n#include \"replication/walsender_private.h\"\n#include \"postmaster/bgworker.h\"\n#include \"postmaster/interrupt.h\"\n#include \"postmaster/postmaster.h\"\n#include \"storage/pmsignal.h\"\n#include \"storage/proc.h\"\n#include \"storage/ipc.h\"\n#include \"storage/lwlock.h\"\n#include \"storage/pg_shmem.h\"\n#include \"storage/shmem.h\"\n#include \"storage/spin.h\"\n#include \"tcop/tcopprot.h\"\n#include \"utils/builtins.h\"\n#include \"utils/guc.h\"\n#include \"utils/memutils.h\"\n#include \"utils/ps_status.h\"\n#include \"utils/timestamp.h\"\n\n#include \"libpq-fe.h\"\n\n#include \"libpqwalproposer.h\"\n#include \"neon.h\"\n#include \"neon_perf_counters.h\"\n#include \"neon_walreader.h\"\n#include \"walproposer.h\"\n\n#define XLOG_HDR_SIZE (1 + 8 * 3)\t/* 'w' + startPos + walEnd + timestamp */\n#define XLOG_HDR_START_POS 1\t/* offset of start position in wal sender*\n\t\t\t\t\t\t\t\t * message header */\n\n#define MB ((XLogRecPtr)1024 * 1024)\n\n#define WAL_PROPOSER_SLOT_NAME \"wal_proposer_slot\"\n\n/* GUCs */\nchar\t   *wal_acceptors_list = \"\";\nint\t\t\twal_acceptor_reconnect_timeout = 1000;\nint\t\t\twal_acceptor_connection_timeout = 10000;\nint\t\t\tsafekeeper_proto_version = 3;\nchar\t   *safekeeper_conninfo_options = \"\";\n/* BEGIN_HADRON */\nint         databricks_max_wal_mb_per_second = -1;\n// during throttling, we will limit the effective WAL write rate to 10KB.\n// PG can still push some WAL to SK, but at a very low rate.\nint \t\tdatabricks_throttled_max_wal_bytes_per_second = 10 * 1024;\n// The max sleep time of a batch. This is to make sure the rate limiter does not\n// overshoot too much and block PG for a very long time.\n// This is set as 5 minuetes for now. PG can send as much as 10MB of WALs to SK in one batch,\n// so this effectively caps the write rate to ~30KB/s in the worst case.\nstatic uint64 kRateLimitMaxBatchUSecs = 300 * USECS_PER_SEC;\n/* END_HADRON */\n\n/* Set to true in the walproposer bgw. */\nstatic bool am_walproposer;\nstatic WalproposerShmemState *walprop_shared;\nstatic WalProposerConfig walprop_config;\nstatic XLogRecPtr sentPtr = InvalidXLogRecPtr;\nstatic const walproposer_api walprop_pg;\nstatic volatile sig_atomic_t got_SIGUSR2 = false;\nstatic bool reported_sigusr2 = false;\n\nstatic XLogRecPtr standby_flush_lsn = InvalidXLogRecPtr;\nstatic XLogRecPtr standby_apply_lsn = InvalidXLogRecPtr;\nstatic HotStandbyFeedback agg_hs_feedback;\n\nstatic void nwp_register_gucs(void);\nstatic void assign_neon_safekeepers(const char *newval, void *extra);\nstatic uint64 backpressure_lag_impl(void);\nstatic uint64 hadron_backpressure_lag_impl(void);\nstatic uint64 startup_backpressure_wrap(void);\nstatic bool backpressure_throttling_impl(void);\nstatic void walprop_register_bgworker(void);\n\nstatic void walprop_pg_init_standalone_sync_safekeepers(void);\nstatic void walprop_pg_init_walsender(void);\nstatic void walprop_pg_init_bgworker(void);\nstatic TimestampTz walprop_pg_get_current_timestamp(WalProposer *wp);\nstatic void walprop_pg_load_libpqwalreceiver(void);\n\nstatic process_interrupts_callback_t PrevProcessInterruptsCallback = NULL;\nstatic void WalproposerShmemInit_SyncSafekeeper(void);\n\n\nstatic void StartProposerReplication(WalProposer *wp, StartReplicationCmd *cmd);\nstatic void WalSndLoop(WalProposer *wp);\nstatic void XLogBroadcastWalProposer(WalProposer *wp);\n\nstatic void add_nwr_event_set(Safekeeper *sk, uint32 events);\nstatic void update_nwr_event_set(Safekeeper *sk, uint32 events);\nstatic void rm_safekeeper_event_set(Safekeeper *to_remove, bool is_sk);\n\nstatic void CheckGracefulShutdown(WalProposer *wp);\n\n/* BEGIN_HADRON */\nshardno_t get_num_shards(void);\n\nstatic int positive_mb_to_bytes(int mb)\n{\n\tif (mb <= 0)\n\t{\n\t\treturn mb;\n\t}\n\telse\n\t{\n\t\treturn mb * 1024 * 1024;\n\t}\n}\n/* END_HADRON */\n\nstatic void\ninit_walprop_config(bool syncSafekeepers)\n{\n\twalprop_config.neon_tenant = neon_tenant;\n\twalprop_config.neon_timeline = neon_timeline;\n\t/* WalProposerCreate scribbles directly on it, so pstrdup */\n\twalprop_config.safekeepers_list = pstrdup(wal_acceptors_list);\n\twalprop_config.safekeeper_conninfo_options = pstrdup(safekeeper_conninfo_options);\n\twalprop_config.safekeeper_reconnect_timeout = wal_acceptor_reconnect_timeout;\n\twalprop_config.safekeeper_connection_timeout = wal_acceptor_connection_timeout;\n\twalprop_config.wal_segment_size = wal_segment_size;\n\twalprop_config.syncSafekeepers = syncSafekeepers;\n\tif (!syncSafekeepers)\n\t\twalprop_config.systemId = GetSystemIdentifier();\n\telse\n\t\twalprop_config.systemId = 0;\n\twalprop_config.pgTimeline = walprop_pg_get_timeline_id();\n\twalprop_config.proto_version = safekeeper_proto_version;\n}\n\n/*\n * Entry point for `postgres --sync-safekeepers`.\n */\nPGDLLEXPORT void\nWalProposerSync(int argc, char *argv[])\n{\n\tWalProposer *wp;\n\n\tinit_walprop_config(true);\n\tWalproposerShmemInit_SyncSafekeeper();\n\twalprop_pg_init_standalone_sync_safekeepers();\n\twalprop_pg_load_libpqwalreceiver();\n\n\twp = WalProposerCreate(&walprop_config, walprop_pg);\n\n\tWalProposerStart(wp);\n}\n\n/*\n * WAL proposer bgworker entry point.\n */\nPGDLLEXPORT void\nWalProposerMain(Datum main_arg)\n{\n\tWalProposer *wp;\n\n\tif (*wal_acceptors_list == '\\0')\n\t{\n\t\twpg_log(WARNING, \"Safekeepers list is empty\");\n\t\treturn;\n\t}\n\n\tinit_walprop_config(false);\n\twalprop_pg_init_bgworker();\n\tam_walproposer = true;\n\twalprop_pg_load_libpqwalreceiver();\n\n\twp = WalProposerCreate(&walprop_config, walprop_pg);\n\twp->localTimeLineID = GetWALInsertionTimeLine();\n\twp->last_reconnect_attempt = walprop_pg_get_current_timestamp(wp);\n\n\twalprop_pg_init_walsender();\n\tWalProposerStart(wp);\n}\n\n/*\n * Initialize GUCs, bgworker, shmem and backpressure.\n */\nvoid\npg_init_walproposer(void)\n{\n\tif (!process_shared_preload_libraries_in_progress)\n\t\treturn;\n\n\tnwp_register_gucs();\n\n\tdelay_backend_us = &startup_backpressure_wrap;\n\tPrevProcessInterruptsCallback = ProcessInterruptsCallback;\n\tProcessInterruptsCallback = backpressure_throttling_impl;\n\n\twalprop_register_bgworker();\n}\n\nstatic void\nnwp_register_gucs(void)\n{\n\tDefineCustomStringVariable(\n\t\t\t\t\t\t\t   \"neon.safekeepers\",\n\t\t\t\t\t\t\t   \"List of Neon WAL acceptors (host:port)\",\n\t\t\t\t\t\t\t   NULL,\t/* long_desc */\n\t\t\t\t\t\t\t   &wal_acceptors_list, /* valueAddr */\n\t\t\t\t\t\t\t   \"\",\t/* bootValue */\n\t\t\t\t\t\t\t   PGC_SIGHUP,\n\t\t\t\t\t\t\t   GUC_LIST_INPUT,\t/* extensions can't use*\n\t\t\t\t\t\t\t\t\t\t\t\t * GUC_LIST_QUOTE */\n\t\t\t\t\t\t\t   NULL, assign_neon_safekeepers, NULL);\n\n\tDefineCustomStringVariable(\n\t\t\t\t\t\t\t   \"neon.safekeeper_conninfo_options\",\n\t\t\t\t\t\t\t   \"libpq keyword parameters and values to apply to safekeeper connections\",\n\t\t\t\t\t\t\t   NULL,\n\t\t\t\t\t\t\t   &safekeeper_conninfo_options,\n\t\t\t\t\t\t\t   \"\",\n\t\t\t\t\t\t\t   PGC_POSTMASTER,\n\t\t\t\t\t\t\t   0,\n\t\t\t\t\t\t\t   NULL, NULL, NULL);\n\n\tDefineCustomIntVariable(\n\t\t\t\t\t\t\t\"neon.safekeeper_reconnect_timeout\",\n\t\t\t\t\t\t\t\"Walproposer reconnects to offline safekeepers once in this interval.\",\n\t\t\t\t\t\t\tNULL,\n\t\t\t\t\t\t\t&wal_acceptor_reconnect_timeout,\n\t\t\t\t\t\t\t1000, 0, INT_MAX,\t/* default, min, max */\n\t\t\t\t\t\t\tPGC_SIGHUP, /* context */\n\t\t\t\t\t\t\tGUC_UNIT_MS,\t/* flags */\n\t\t\t\t\t\t\tNULL, NULL, NULL);\n\n\tDefineCustomIntVariable(\n\t\t\t\t\t\t\t\"neon.safekeeper_connect_timeout\",\n\t\t\t\t\t\t\t\"Connection or connection attempt to safekeeper is terminated if no message is received (or connection attempt doesn't finish) within this period.\",\n\t\t\t\t\t\t\tNULL,\n\t\t\t\t\t\t\t&wal_acceptor_connection_timeout,\n\t\t\t\t\t\t\t10000, 0, INT_MAX,\n\t\t\t\t\t\t\tPGC_SIGHUP,\n\t\t\t\t\t\t\tGUC_UNIT_MS,\n\t\t\t\t\t\t\tNULL, NULL, NULL);\n\n\tDefineCustomIntVariable(\n\t\t\t\t\t\t\t\"neon.safekeeper_proto_version\",\n\t\t\t\t\t\t\t\"Version of compute <-> safekeeper protocol.\",\n\t\t\t\t\t\t\t\"Used while migrating from 2 to 3.\",\n\t\t\t\t\t\t\t&safekeeper_proto_version,\n\t\t\t\t\t\t\t3, 0, INT_MAX,\n\t\t\t\t\t\t\tPGC_POSTMASTER,\n\t\t\t\t\t\t\t0,\n\t\t\t\t\t\t\tNULL, NULL, NULL);\n\n    /* BEGIN_HADRON */\n    DefineCustomIntVariable(\n                            \"databricks.max_wal_mb_per_second\",\n                            \"The maximum WAL MB per second allowed. If breached, sending WAL hit the backpressure. Setting to -1 disables the limit.\",\n                            NULL,\n                            &databricks_max_wal_mb_per_second,\n                            -1, -1, INT_MAX,\n                            PGC_SUSET,\n                            GUC_UNIT_MB,\n                            NULL, NULL, NULL);\n\n\tDefineCustomIntVariable(\n\t\t\t\t\t\t\t\"databricks.throttled_max_wal_bytes_per_second\",\n\t\t\t\t\t\t\t\"The maximum WAL bytes per second when PG is being throttled.\",\n\t\t\t\t\t\t\tNULL,\n\t\t\t\t\t\t\t&databricks_throttled_max_wal_bytes_per_second,\n\t\t\t\t\t\t\t10 * 1024, 0, INT_MAX,\n\t\t\t\t\t\t\tPGC_SUSET,\n\t\t\t\t\t\t\tGUC_UNIT_BYTE,\n\t\t\t\t\t\t\tNULL, NULL, NULL);\n    /* END_HADRON */\n}\n\n\nstatic int\nsplit_safekeepers_list(char *safekeepers_list, char *safekeepers[])\n{\n\tint\t\t\tn_safekeepers = 0;\n\tchar\t   *curr_sk = safekeepers_list;\n\n\tfor (char *coma = safekeepers_list; coma != NULL && *coma != '\\0'; curr_sk = coma)\n\t{\n\t\tif (++n_safekeepers >= MAX_SAFEKEEPERS)\n\t\t{\n\t\t\twpg_log(FATAL, \"too many safekeepers\");\n\t\t}\n\n\t\tcoma = strchr(coma, ',');\n\t\tsafekeepers[n_safekeepers - 1] = curr_sk;\n\n\t\tif (coma != NULL)\n\t\t{\n\t\t\t*coma++ = '\\0';\n\t\t}\n\t}\n\n\treturn n_safekeepers;\n}\n\nstatic char *split_off_safekeepers_generation(char *safekeepers_list, uint32 *generation)\n{\n\tchar\t   *endptr;\n\n\tif (strncmp(safekeepers_list, \"g#\", 2) != 0)\n\t{\n\t\treturn safekeepers_list;\n\t}\n\telse\n\t{\n\t\terrno = 0;\n\t\t*generation = strtoul(safekeepers_list + 2, &endptr, 10);\n\t\tif (errno != 0)\n\t\t{\n\t\t\twp_log(FATAL, \"failed to parse neon.safekeepers generation number: %m\");\n\t\t}\n\t\tif (*endptr != ':')\n\t\t{\n\t\t\twp_log(FATAL, \"failed to parse neon.safekeepers: no colon after generation\");\n\t\t}\n\t\treturn endptr + 1;\n\t}\n}\n\n/*\n * Accept two coma-separated strings with list of safekeeper host:port addresses.\n * Split them into arrays and return false if two sets do not match, ignoring the order.\n */\nstatic bool\nsafekeepers_cmp(char *old, char *new)\n{\n\tchar\t   *safekeepers_old[MAX_SAFEKEEPERS];\n\tchar\t   *safekeepers_new[MAX_SAFEKEEPERS];\n\tint\t\t\tlen_old = 0;\n\tint\t\t\tlen_new = 0;\n\tuint32\t\tgen_old = INVALID_GENERATION;\n\tuint32\t\tgen_new = INVALID_GENERATION;\n\n\told = split_off_safekeepers_generation(old, &gen_old);\n\tnew = split_off_safekeepers_generation(new, &gen_new);\n\n\tif (gen_old != gen_new)\n\t{\n\t\treturn false;\n\t}\n\n\tlen_old = split_safekeepers_list(old, safekeepers_old);\n\tlen_new = split_safekeepers_list(new, safekeepers_new);\n\n\tif (len_old != len_new)\n\t{\n\t\treturn false;\n\t}\n\n\tqsort(&safekeepers_old, len_old, sizeof(char *), pg_qsort_strcmp);\n\tqsort(&safekeepers_new, len_new, sizeof(char *), pg_qsort_strcmp);\n\n\tfor (int i = 0; i < len_new; i++)\n\t{\n\t\tif (strcmp(safekeepers_old[i], safekeepers_new[i]) != 0)\n\t\t{\n\t\t\treturn false;\n\t\t}\n\t}\n\n\treturn true;\n}\n\n/*\n * GUC assign_hook for neon.safekeepers. Restarts walproposer through FATAL if\n * the list changed.\n */\nstatic void\nassign_neon_safekeepers(const char *newval, void *extra)\n{\n\tchar\t   *newval_copy;\n\tchar\t   *oldval;\n\n\tif (newval && *newval != '\\0' && UsedShmemSegAddr && walprop_shared && RecoveryInProgress())\n\t\twalprop_shared->replica_promote = true;\n\n\tif (!am_walproposer)\n\t\treturn;\n\n\tif (!newval)\n\t{\n\t\t/* should never happen */\n\t\twpg_log(FATAL, \"neon.safekeepers is empty\");\n\t}\n\n\t/* Copy values because we will modify them in split_safekeepers_list() */\n\tnewval_copy = pstrdup(newval);\n\toldval = pstrdup(wal_acceptors_list);\n\n\t/*\n\t * TODO: restarting through FATAL is stupid and introduces 1s delay before\n\t * next bgw start. We should refactor walproposer to allow graceful exit\n\t * and thus remove this delay. XXX: If you change anything here, sync with\n\t * test_safekeepers_reconfigure_reorder.\n\t */\n\tif (!safekeepers_cmp(oldval, newval_copy))\n\t{\n\t\twpg_log(FATAL, \"restarting walproposer to change safekeeper list from %s to %s\",\n\t\t\t\twal_acceptors_list, newval);\n\t}\n\tpfree(newval_copy);\n\tpfree(oldval);\n}\n\n/* BEGIN_HADRON */\nstatic uint64 hadron_backpressure_lag_impl(void)\n{\n\tstruct WalproposerShmemState* state = NULL;\n\tuint64 lag = 0;\n\n\tif(max_cluster_size < 0){\n\t\t// if max cluster size is not set, then we don't apply backpressure because we're reconfiguring PG\n\t\treturn 0;\n\t}\n\n\tlag = backpressure_lag_impl();\n\tstate = GetWalpropShmemState();\n\tif ( state != NULL && databricks_max_wal_mb_per_second != -1 )\n\t{\n\t\tint old_limit = pg_atomic_read_u32(&state->wal_rate_limiter.effective_max_wal_bytes_per_second);\n\t\tint new_limit = (lag == 0)? positive_mb_to_bytes(databricks_max_wal_mb_per_second) : databricks_throttled_max_wal_bytes_per_second;\n\t\tif( old_limit != new_limit )\n\t\t{\n\t\t\tuint64 batch_start_time = pg_atomic_read_u64(&state->wal_rate_limiter.batch_start_time_us);\n\t\t\tuint64 batch_end_time = pg_atomic_read_u64(&state->wal_rate_limiter.batch_end_time_us);\n\t\t\t// the rate limit has changed, we need to reset the rate limiter's batch end time\n\t\t\tpg_atomic_write_u32(&state->wal_rate_limiter.effective_max_wal_bytes_per_second, new_limit);\n\t\t\tpg_atomic_write_u64(&state->wal_rate_limiter.batch_end_time_us, Min(batch_start_time + USECS_PER_SEC, batch_end_time));\n\t\t}\n\t\tif( new_limit == -1 )\n\t\t{\n\t\t\treturn 0;\n\t\t}\n\n\t\tif (pg_atomic_read_u32(&state->wal_rate_limiter.should_limit) == true)\n\t\t{\n\t\t\tTimestampTz now = GetCurrentTimestamp();\n\t\t\tstruct WalRateLimiter *limiter = &state->wal_rate_limiter;\n\t\t\tuint64 batch_end_time = pg_atomic_read_u64(&limiter->batch_end_time_us);\n\t\t\tif ( now >= batch_end_time )\n\t\t\t{\n\t\t\t\t/*\n\t\t\t\t* The backend has past the batch end time and it's time to push more WALs.\n\t\t\t\t* If the backends are pushing WALs too fast, the wal proposer will rate limit them again.\n\t\t\t\t*/\n\t\t\t\tuint32 expected = true;\n\t\t\t\tpg_atomic_compare_exchange_u32(&state->wal_rate_limiter.should_limit, &expected, false);\n\t\t\t\treturn 0;\n\t\t\t}\n\t\t\treturn Max(lag, 1);\n\t\t}\n\t\t// rate limiter decides to not throttle, then return 0.\n\t\treturn 0;\n\t}\n\n\treturn lag;\n}\n/* END_HADRON */\n\n/* Check if we need to suspend inserts because of lagging replication. */\nstatic uint64\nbackpressure_lag_impl(void)\n{\n\tif (max_replication_apply_lag > 0 || max_replication_flush_lag > 0 || max_replication_write_lag > 0)\n\t{\n\t\tXLogRecPtr\twritePtr;\n\t\tXLogRecPtr\tflushPtr;\n\t\tXLogRecPtr\tapplyPtr;\n#if PG_VERSION_NUM >= 150000\n\t\tXLogRecPtr\tmyFlushLsn = GetFlushRecPtr(NULL);\n#else\n\t\tXLogRecPtr\tmyFlushLsn = GetFlushRecPtr();\n#endif\n\t\treplication_feedback_get_lsns(&writePtr, &flushPtr, &applyPtr);\n\n\t\telog(DEBUG2, \"current flushLsn %X/%X PageserverFeedback: write %X/%X flush %X/%X apply %X/%X\",\n\t\t\t LSN_FORMAT_ARGS(myFlushLsn),\n\t\t\t LSN_FORMAT_ARGS(writePtr),\n\t\t\t LSN_FORMAT_ARGS(flushPtr),\n\t\t\t LSN_FORMAT_ARGS(applyPtr));\n\n\t\tif (lakebase_mode)\n\t\t{\n\t\t\t// in case PG does not have shard map initialized, we assume PG always has 1 shard at minimum.\n\t\t\tshardno_t num_shards = Max(1, get_num_shards());\n\t\t\tint tenant_max_replication_apply_lag = num_shards * max_replication_apply_lag;\n\t\t\tint tenant_max_replication_flush_lag = num_shards * max_replication_flush_lag;\n\t\t\tint tenant_max_replication_write_lag = num_shards * max_replication_write_lag;\n\n\t\t\tif ((writePtr != InvalidXLogRecPtr && tenant_max_replication_write_lag > 0 && myFlushLsn > writePtr + tenant_max_replication_write_lag * MB))\n\t\t\t{\n\t\t\t\treturn (myFlushLsn - writePtr - tenant_max_replication_write_lag * MB);\n\t\t\t}\n\n\t\t\tif ((flushPtr != InvalidXLogRecPtr && tenant_max_replication_flush_lag > 0 && myFlushLsn > flushPtr + tenant_max_replication_flush_lag * MB))\n\t\t\t{\n\t\t\t\treturn (myFlushLsn - flushPtr - tenant_max_replication_flush_lag * MB);\n\t\t\t}\n\n\t\t\tif ((applyPtr != InvalidXLogRecPtr && tenant_max_replication_apply_lag > 0 && myFlushLsn > applyPtr + tenant_max_replication_apply_lag * MB))\n\t\t\t{\n\t\t\t\treturn (myFlushLsn - applyPtr - tenant_max_replication_apply_lag * MB);\n\t\t\t}\n\t\t}\n\t\telse\n\t\t{\n\t\t\tif ((writePtr != InvalidXLogRecPtr && max_replication_write_lag > 0 && myFlushLsn > writePtr + max_replication_write_lag * MB))\n\t\t\t{\n\t\t\t\treturn (myFlushLsn - writePtr - max_replication_write_lag * MB);\n\t\t\t}\n\n\t\t\tif ((flushPtr != InvalidXLogRecPtr && max_replication_flush_lag > 0 && myFlushLsn > flushPtr + max_replication_flush_lag * MB))\n\t\t\t{\n\t\t\t\treturn (myFlushLsn - flushPtr - max_replication_flush_lag * MB);\n\t\t\t}\n\n\t\t\tif ((applyPtr != InvalidXLogRecPtr && max_replication_apply_lag > 0 && myFlushLsn > applyPtr + max_replication_apply_lag * MB))\n\t\t\t{\n\t\t\t\treturn (myFlushLsn - applyPtr - max_replication_apply_lag * MB);\n\t\t\t}\n\t\t}\n\t}\n\treturn 0;\n}\n\n/*\n * We don't apply backpressure when we're the postmaster, or the startup\n * process, because in postmaster we can't apply backpressure, and in\n * the startup process we can't afford to slow down.\n */\nstatic uint64\nstartup_backpressure_wrap(void)\n{\n\tif (AmStartupProcess() || !IsUnderPostmaster)\n\t\treturn 0;\n\n\tdelay_backend_us = &hadron_backpressure_lag_impl;\n\n\treturn hadron_backpressure_lag_impl();\n}\n\n/*\n * WalproposerShmemSize --- report amount of shared memory space needed\n */\nstatic Size\nWalproposerShmemSize(void)\n{\n\treturn sizeof(WalproposerShmemState);\n}\n\nvoid\nWalproposerShmemInit(void)\n{\n\tbool\t\tfound;\n\n\twalprop_shared = ShmemInitStruct(\"Walproposer shared state\",\n\t\t\t\t\t\t\t\t\t sizeof(WalproposerShmemState),\n\t\t\t\t\t\t\t\t\t &found);\n\n\tif (!found)\n\t{\n\t\tmemset(walprop_shared, 0, WalproposerShmemSize());\n\t\tSpinLockInit(&walprop_shared->mutex);\n\t\tpg_atomic_init_u64(&walprop_shared->propEpochStartLsn, 0);\n\t\tpg_atomic_init_u64(&walprop_shared->mineLastElectedTerm, 0);\n\t\tpg_atomic_init_u64(&walprop_shared->backpressureThrottlingTime, 0);\n\t\tpg_atomic_init_u64(&walprop_shared->currentClusterSize, 0);\n\t\t/* BEGIN_HADRON */\n\t\tpg_atomic_init_u32(&walprop_shared->wal_rate_limiter.effective_max_wal_bytes_per_second, -1);\n\t\tpg_atomic_init_u32(&walprop_shared->wal_rate_limiter.should_limit, 0);\n\t\tpg_atomic_init_u64(&walprop_shared->wal_rate_limiter.batch_start_time_us, 0);\n\t\tpg_atomic_init_u64(&walprop_shared->wal_rate_limiter.batch_end_time_us, 0);\n\t\t/* END_HADRON */\n\t}\n}\n\nstatic void\nWalproposerShmemInit_SyncSafekeeper(void)\n{\n\twalprop_shared = palloc(WalproposerShmemSize());\n\tmemset(walprop_shared, 0, WalproposerShmemSize());\n\tSpinLockInit(&walprop_shared->mutex);\n\tpg_atomic_init_u64(&walprop_shared->propEpochStartLsn, 0);\n\tpg_atomic_init_u64(&walprop_shared->mineLastElectedTerm, 0);\n\tpg_atomic_init_u64(&walprop_shared->backpressureThrottlingTime, 0);\n\t/* BEGIN_HADRON */\n\tpg_atomic_init_u32(&walprop_shared->wal_rate_limiter.effective_max_wal_bytes_per_second, -1);\n\tpg_atomic_init_u32(&walprop_shared->wal_rate_limiter.should_limit, 0);\n\tpg_atomic_init_u64(&walprop_shared->wal_rate_limiter.batch_start_time_us, 0);\n\tpg_atomic_init_u64(&walprop_shared->wal_rate_limiter.batch_end_time_us, 0);\n\t/* END_HADRON */\n}\n\n#define BACK_PRESSURE_DELAY 10000L // 0.01 sec\n\nstatic bool\nbackpressure_throttling_impl(void)\n{\n\tuint64\t\tlag;\n\tTimestampTz start,\n\t\t\t\tstop;\n\tbool\t\tretry = false;\n\tchar\t   *new_status = NULL;\n\tconst char *old_status;\n\tint\t\t\tlen;\n\n\tif (PointerIsValid(PrevProcessInterruptsCallback))\n\t\tretry = PrevProcessInterruptsCallback();\n\n\t/*\n\t * Don't throttle read only transactions or wal sender. Do throttle CREATE\n\t * INDEX CONCURRENTLY, however. It performs some stages outside a\n\t * transaction, even though it writes a lot of WAL. Check PROC_IN_SAFE_IC\n\t * flag to cover that case.\n\t */\n\tif (am_walsender\n\t\t|| (!(MyProc->statusFlags & PROC_IN_SAFE_IC)\n\t\t\t&& !TransactionIdIsValid(GetCurrentTransactionIdIfAny())))\n\t\treturn retry;\n\n\t/* Calculate replicas lag */\n\tlag = hadron_backpressure_lag_impl();\n\tif (lag == 0)\n\t\treturn retry;\n\n\told_status = get_ps_display(&len);\n\tnew_status = (char *) palloc(len + 64 + 1);\n\tmemcpy(new_status, old_status, len);\n\tsnprintf(new_status + len, 64, \"backpressure throttling: lag %lu\", lag);\n\tset_ps_display(new_status);\n\tnew_status[len] = '\\0';\t\t/* truncate off \" backpressure ...\" to later\n\t\t\t\t\t\t\t\t * reset the ps */\n\n\telog(DEBUG2, \"backpressure throttling: lag %lu\", lag);\n\tstart = GetCurrentTimestamp();\n\tpg_usleep(BACK_PRESSURE_DELAY);\n\tstop = GetCurrentTimestamp();\n\tpg_atomic_add_fetch_u64(&walprop_shared->backpressureThrottlingTime, stop - start);\n\n\t/* Reset ps display */\n\tset_ps_display(new_status);\n\tpfree(new_status);\n\n\treturn true;\n}\n\nuint64\nBackpressureThrottlingTime(void)\n{\n\treturn pg_atomic_read_u64(&walprop_shared->backpressureThrottlingTime);\n}\n\n/*\n * Register a background worker proposing WAL to wal acceptors.\n * We start walproposer bgworker even for replicas in order to support possible replica promotion.\n * When pg_promote() function is called, then walproposer bgworker registered with BgWorkerStart_RecoveryFinished\n * is automatically launched when promotion is completed.\n */\nstatic void\nwalprop_register_bgworker(void)\n{\n\tBackgroundWorker bgw;\n\n\tmemset(&bgw, 0, sizeof(bgw));\n\tbgw.bgw_flags = BGWORKER_SHMEM_ACCESS;\n\tbgw.bgw_start_time = BgWorkerStart_RecoveryFinished;\n\tsnprintf(bgw.bgw_library_name, BGW_MAXLEN, \"neon\");\n\tsnprintf(bgw.bgw_function_name, BGW_MAXLEN, \"WalProposerMain\");\n\tsnprintf(bgw.bgw_name, BGW_MAXLEN, \"WAL proposer\");\n\tsnprintf(bgw.bgw_type, BGW_MAXLEN, \"WAL proposer\");\n\tbgw.bgw_restart_time = 1;\n\tbgw.bgw_notify_pid = 0;\n\tbgw.bgw_main_arg = (Datum) 0;\n\n\tRegisterBackgroundWorker(&bgw);\n}\n\n/* shmem handling */\n\n/*\n * shmem_request hook: request additional shared resources.  We'll allocate or\n * attach to the shared resources in WalproposerShmemInit().\n */\nvoid\nWalproposerShmemRequest(void)\n{\n\tRequestAddinShmemSpace(WalproposerShmemSize());\n}\n\nWalproposerShmemState *\nGetWalpropShmemState(void)\n{\n\tAssert(walprop_shared != NULL);\n\treturn walprop_shared;\n}\n\nstatic WalproposerShmemState *\nwalprop_pg_get_shmem_state(WalProposer *wp)\n{\n\tAssert(walprop_shared != NULL);\n\treturn walprop_shared;\n}\n\n/*\n * Record new ps_feedback in the array with shards and update min_feedback.\n */\nstatic PageserverFeedback\nrecord_pageserver_feedback(PageserverFeedback *ps_feedback, shardno_t num_shards)\n{\n\tPageserverFeedback min_feedback;\n\n\tAssert(ps_feedback->present);\n\tAssert(ps_feedback->shard_number < MAX_SHARDS);\n\tAssert(ps_feedback->shard_number < num_shards);\n\n\t// Begin Hadron: Record any corruption signal from the pageserver first.\n\tif (ps_feedback->corruption_detected) {\n\t\tpg_atomic_write_u32(&databricks_metrics_shared->ps_corruption_detected, 1);\n\t}\n\n\tSpinLockAcquire(&walprop_shared->mutex);\n\n\t// Hadron: Update the num_shards from the source-of-truth (shard map) lazily when we receive\n\t// a new pageserver feedback.\n\twalprop_shared->num_shards = Max(walprop_shared->num_shards, num_shards);\n\n\t/* Update the feedback */\n\tmemcpy(&walprop_shared->shard_ps_feedback[ps_feedback->shard_number], ps_feedback, sizeof(PageserverFeedback));\n\n\t/* Calculate min LSNs */\n\tmemcpy(&min_feedback, ps_feedback, sizeof(PageserverFeedback));\n\tfor (int i = 0; i < walprop_shared->num_shards; i++)\n\t{\n\t\tPageserverFeedback *feedback = &walprop_shared->shard_ps_feedback[i];\n\n\t\tif (feedback->present)\n\t\t{\n\t\t\tif (min_feedback.last_received_lsn == InvalidXLogRecPtr || feedback->last_received_lsn < min_feedback.last_received_lsn)\n\t\t\t\tmin_feedback.last_received_lsn = feedback->last_received_lsn;\n\n\t\t\tif (min_feedback.disk_consistent_lsn == InvalidXLogRecPtr || feedback->disk_consistent_lsn < min_feedback.disk_consistent_lsn)\n\t\t\t\tmin_feedback.disk_consistent_lsn = feedback->disk_consistent_lsn;\n\n\t\t\tif (min_feedback.remote_consistent_lsn == InvalidXLogRecPtr || feedback->remote_consistent_lsn < min_feedback.remote_consistent_lsn)\n\t\t\t\tmin_feedback.remote_consistent_lsn = feedback->remote_consistent_lsn;\n\t\t}\n\t}\n\t/* Copy min_feedback back to shmem */\n\tmemcpy(&walprop_shared->min_ps_feedback, &min_feedback, sizeof(PageserverFeedback));\n\n\tSpinLockRelease(&walprop_shared->mutex);\n\n\treturn min_feedback;\n}\n\nvoid\nreplication_feedback_get_lsns(XLogRecPtr *writeLsn, XLogRecPtr *flushLsn, XLogRecPtr *applyLsn)\n{\n\tSpinLockAcquire(&walprop_shared->mutex);\n\t*writeLsn = walprop_shared->min_ps_feedback.last_received_lsn;\n\t*flushLsn = walprop_shared->min_ps_feedback.disk_consistent_lsn;\n\t*applyLsn = walprop_shared->min_ps_feedback.remote_consistent_lsn;\n\tSpinLockRelease(&walprop_shared->mutex);\n}\n\n/*\n * Start walproposer streaming replication\n */\nstatic void\nwalprop_pg_start_streaming(WalProposer *wp, XLogRecPtr startpos)\n{\n\tStartReplicationCmd cmd;\n\n\twpg_log(LOG, \"WAL proposer starts streaming at %X/%X\",\n\t\t\tLSN_FORMAT_ARGS(startpos));\n\tcmd.slotname = WAL_PROPOSER_SLOT_NAME;\n\tcmd.timeline = wp->config->pgTimeline;\n\tcmd.startpoint = startpos;\n\tStartProposerReplication(wp, &cmd);\n}\n\nstatic void\nwalprop_pg_init_walsender(void)\n{\n\tam_walsender = true;\n\tInitWalSender();\n\tInitProcessPhase2();\n\n\t/* Create replication slot for WAL proposer if not exists */\n\tif (SearchNamedReplicationSlot(WAL_PROPOSER_SLOT_NAME, false) == NULL)\n\t{\n#if PG_MAJORVERSION_NUM >= 17\n\t\tReplicationSlotCreate(WAL_PROPOSER_SLOT_NAME, false, RS_PERSISTENT,\n\t\t\t\t\t\t\t  false, false, false);\n#else\n\t\tReplicationSlotCreate(WAL_PROPOSER_SLOT_NAME, false, RS_PERSISTENT, false);\n#endif\n\t\tReplicationSlotReserveWal();\n\t\t/* Write this slot to disk */\n\t\tReplicationSlotMarkDirty();\n\t\tReplicationSlotSave();\n\t\tReplicationSlotRelease();\n\t}\n}\n\nstatic void\nwalprop_pg_init_standalone_sync_safekeepers(void)\n{\n\tstruct stat stat_buf;\n\n#if PG_VERSION_NUM < 150000\n\tThisTimeLineID = 1;\n#endif\n\n\t/*\n\t * Initialize postmaster_alive_fds as WaitEventSet checks them.\n\t *\n\t * Copied from InitPostmasterDeathWatchHandle()\n\t */\n\tif (pipe(postmaster_alive_fds) < 0)\n\t\tereport(FATAL,\n\t\t\t\t(errcode_for_file_access(),\n\t\t\t\t errmsg_internal(\"could not create pipe to monitor postmaster death: %m\")));\n\tif (fcntl(postmaster_alive_fds[POSTMASTER_FD_WATCH], F_SETFL, O_NONBLOCK) == -1)\n\t\tereport(FATAL,\n\t\t\t\t(errcode_for_socket_access(),\n\t\t\t\t errmsg_internal(\"could not set postmaster death monitoring pipe to nonblocking mode: %m\")));\n\n\tChangeToDataDir();\n\n\t/* Create pg_wal directory, if it doesn't exist */\n\tif (stat(XLOGDIR, &stat_buf) != 0)\n\t{\n\t\tereport(LOG, (errmsg(\"creating missing WAL directory \\\"%s\\\"\", XLOGDIR)));\n\t\tif (MakePGDirectory(XLOGDIR) < 0)\n\t\t{\n\t\t\tereport(ERROR,\n\t\t\t\t\t(errcode_for_file_access(),\n\t\t\t\t\t errmsg(\"could not create directory \\\"%s\\\": %m\",\n\t\t\t\t\t\t\tXLOGDIR)));\n\t\t\texit(1);\n\t\t}\n\t}\n\tBackgroundWorkerUnblockSignals();\n}\n\n/*\n * We pretend to be a walsender process, and the lifecycle of a walsender is\n * slightly different than other procesess. At shutdown, walsender processes\n * stay alive until the very end, after the checkpointer has written the\n * shutdown checkpoint. When the checkpointer exits, the postmaster sends all\n * remaining walsender processes SIGUSR2. On receiving SIGUSR2, we try to send\n * the remaining WAL, and then exit. This ensures that the checkpoint record\n * reaches durable storage (in safekeepers), before the server shuts down\n * completely.\n */\nstatic void\nwalprop_sigusr2(SIGNAL_ARGS)\n{\n\tint\t\t\tsave_errno = errno;\n\n\tgot_SIGUSR2 = true;\n\tSetLatch(MyLatch);\n\terrno = save_errno;\n}\n\nstatic void\nwalprop_pg_init_bgworker(void)\n{\n#if PG_VERSION_NUM >= 150000\n\tTimeLineID\ttli;\n#endif\n\n\t/* Establish signal handlers. */\n\tpqsignal(SIGUSR1, procsignal_sigusr1_handler);\n\tpqsignal(SIGHUP, SignalHandlerForConfigReload);\n\tpqsignal(SIGTERM, die);\n\tpqsignal(SIGUSR2, walprop_sigusr2);\n\n\tBackgroundWorkerUnblockSignals();\n\n\tapplication_name = (char *) \"walproposer\";\t/* for\n\t\t\t\t\t\t\t\t\t\t\t\t * synchronous_standby_names */\n\n#if PG_VERSION_NUM >= 150000\n\t/* FIXME pass proper tli to WalProposerInit ? */\n\tGetXLogReplayRecPtr(&tli);\n#else\n\tGetXLogReplayRecPtr(&ThisTimeLineID);\n#endif\n}\n\nstatic XLogRecPtr\nwalprop_pg_get_flush_rec_ptr(WalProposer *wp)\n{\n#if PG_MAJORVERSION_NUM < 15\n\treturn GetFlushRecPtr();\n#else\n\treturn GetFlushRecPtr(NULL);\n#endif\n}\n\nstatic TimestampTz\nwalprop_pg_get_current_timestamp(WalProposer *wp)\n{\n\treturn GetCurrentTimestamp();\n}\n\nTimeLineID\nwalprop_pg_get_timeline_id(void)\n{\n#if PG_VERSION_NUM >= 150000\n\t/* FIXME don't use hardcoded timeline id */\n\treturn 1;\n#else\n\treturn ThisTimeLineID;\n#endif\n}\n\nstatic void\nwalprop_pg_load_libpqwalreceiver(void)\n{\n\tload_file(\"libpqwalreceiver\", false);\n\tif (WalReceiverFunctions == NULL)\n\t\twpg_log(ERROR, \"libpqwalreceiver didn't initialize correctly\");\n}\n\nstatic void\nwalprop_pg_update_donor(WalProposer *wp, Safekeeper *donor, XLogRecPtr donor_lsn)\n{\n\tWalproposerShmemState *wps = wp->api.get_shmem_state(wp);\n\tchar\t\tdonor_name[64];\n\n\tpg_snprintf(donor_name, sizeof(donor_name), \"%s:%s\", donor->host, donor->port);\n\tSpinLockAcquire(&wps->mutex);\n\tmemcpy(wps->donor_name, donor_name, sizeof(donor_name));\n\tmemcpy(wps->donor_conninfo, donor->conninfo, sizeof(donor->conninfo));\n\twps->donor_lsn = donor_lsn;\n\tSpinLockRelease(&wps->mutex);\n}\n\n/* Helper function */\nstatic bool\nensure_nonblocking_status(WalProposerConn *conn, bool is_nonblocking)\n{\n\t/* If we're already correctly blocking or nonblocking, all good */\n\tif (is_nonblocking == conn->is_nonblocking)\n\t\treturn true;\n\n\t/* Otherwise, set it appropriately */\n\tif (PQsetnonblocking(conn->pg_conn, is_nonblocking) == -1)\n\t\treturn false;\n\n\tconn->is_nonblocking = is_nonblocking;\n\treturn true;\n}\n\n/* Exported function definitions */\nstatic char *\nwalprop_error_message(Safekeeper *sk)\n{\n\treturn PQerrorMessage(sk->conn->pg_conn);\n}\n\nstatic WalProposerConnStatusType\nwalprop_status(Safekeeper *sk)\n{\n\tswitch (PQstatus(sk->conn->pg_conn))\n\t{\n\t\tcase CONNECTION_OK:\n\t\t\treturn WP_CONNECTION_OK;\n\t\tcase CONNECTION_BAD:\n\t\t\treturn WP_CONNECTION_BAD;\n\t\tdefault:\n\t\t\treturn WP_CONNECTION_IN_PROGRESS;\n\t}\n}\n\nWalProposerConn *\nlibpqwp_connect_start(char *conninfo)\n{\n\n\tPGconn\t   *pg_conn;\n\tWalProposerConn *conn;\n\tconst char *keywords[3];\n\tconst char *values[3];\n\tint\t\t\tn;\n\tchar\t   *password = neon_auth_token;\n\n\n\t/*\n\t * Connect using the given connection string. If the NEON_AUTH_TOKEN\n\t * environment variable was set, use that as the password.\n\t *\n\t * The connection options are parsed in the order they're given, so when\n\t * we set the password before the connection string, the connection string\n\t * can override the password from the env variable. Seems useful, although\n\t * we don't currently use that capability anywhere.\n\t */\n\tn = 0;\n\tif (password)\n\t{\n\t\tkeywords[n] = \"password\";\n\t\tvalues[n] = password;\n\t\tn++;\n\t}\n\tkeywords[n] = \"dbname\";\n\tvalues[n] = conninfo;\n\tn++;\n\tkeywords[n] = NULL;\n\tvalues[n] = NULL;\n\tn++;\n\tpg_conn = PQconnectStartParams(keywords, values, 1);\n\n\t/*\n\t * \"If the result is null, then libpq has been unable to allocate a new\n\t * PGconn structure\"\n\t */\n\tif (!pg_conn)\n\t\twpg_log(FATAL, \"failed to allocate new PGconn object\");\n\n\t/*\n\t * And in theory this allocation can fail as well, but it's incredibly\n\t * unlikely if we just successfully allocated a PGconn.\n\t *\n\t * palloc will exit on failure though, so there's not much we could do if\n\t * it *did* fail.\n\t */\n\tconn = (WalProposerConn*)MemoryContextAllocZero(TopMemoryContext, sizeof(WalProposerConn));\n\tconn->pg_conn = pg_conn;\n\tconn->is_nonblocking = false;\t/* connections always start in blocking\n\t\t\t\t\t\t\t\t\t * mode */\n\tconn->recvbuf = NULL;\n\treturn conn;\n}\n\nstatic void\nwalprop_connect_start(Safekeeper *sk)\n{\n\tAssert(sk->conn == NULL);\n\tsk->conn = libpqwp_connect_start(sk->conninfo);\n}\n\nstatic WalProposerConnectPollStatusType\nwalprop_connect_poll(Safekeeper *sk)\n{\n\tWalProposerConnectPollStatusType return_val;\n\n\tswitch (PQconnectPoll(sk->conn->pg_conn))\n\t{\n\t\tcase PGRES_POLLING_FAILED:\n\t\t\treturn_val = WP_CONN_POLLING_FAILED;\n\t\t\tbreak;\n\t\tcase PGRES_POLLING_READING:\n\t\t\treturn_val = WP_CONN_POLLING_READING;\n\t\t\tbreak;\n\t\tcase PGRES_POLLING_WRITING:\n\t\t\treturn_val = WP_CONN_POLLING_WRITING;\n\t\t\tbreak;\n\t\tcase PGRES_POLLING_OK:\n\t\t\treturn_val = WP_CONN_POLLING_OK;\n\t\t\tbreak;\n\n\t\t\t/*\n\t\t\t * There's a comment at its source about this constant being\n\t\t\t * unused. We'll expect it's never returned.\n\t\t\t */\n\t\tcase PGRES_POLLING_ACTIVE:\n\t\t\twpg_log(FATAL, \"unexpected PGRES_POLLING_ACTIVE returned from PQconnectPoll\");\n\n\t\t\t/*\n\t\t\t * This return is never actually reached, but it's here to make\n\t\t\t * the compiler happy\n\t\t\t */\n\t\t\treturn WP_CONN_POLLING_FAILED;\n\n\t\tdefault:\n\t\t\tAssert(false);\n\t\t\treturn_val = WP_CONN_POLLING_FAILED;\t/* keep the compiler quiet */\n\t}\n\n\treturn return_val;\n}\n\nextern bool\nlibpqwp_send_query(WalProposerConn *conn, char *query)\n{\n\t/*\n\t * We need to be in blocking mode for sending the query to run without\n\t * requiring a call to PQflush\n\t */\n\tif (!ensure_nonblocking_status(conn, false))\n\t\treturn false;\n\n\t/* PQsendQuery returns 1 on success, 0 on failure */\n\tif (!PQsendQuery(conn->pg_conn, query))\n\t\treturn false;\n\n\treturn true;\n}\n\nstatic bool\nwalprop_send_query(Safekeeper *sk, char *query)\n{\n\treturn libpqwp_send_query(sk->conn, query);\n}\n\nWalProposerExecStatusType\nlibpqwp_get_query_result(WalProposerConn *conn)\n{\n\n\tPGresult   *result;\n\tWalProposerExecStatusType return_val;\n\n\t/* Marker variable if we need to log an unexpected success result */\n\tchar\t   *unexpected_success = NULL;\n\n\t/* Consume any input that we might be missing */\n\tif (!PQconsumeInput(conn->pg_conn))\n\t\treturn WP_EXEC_FAILED;\n\n\tif (PQisBusy(conn->pg_conn))\n\t\treturn WP_EXEC_NEEDS_INPUT;\n\n\n\tresult = PQgetResult(conn->pg_conn);\n\n\t/*\n\t * PQgetResult returns NULL only if getting the result was successful &\n\t * there's no more of the result to get.\n\t */\n\tif (!result)\n\t{\n\t\twpg_log(WARNING, \"[libpqwalproposer] Unexpected successful end of command results\");\n\t\treturn WP_EXEC_UNEXPECTED_SUCCESS;\n\t}\n\n\t/* Helper macro to reduce boilerplate */\n#define UNEXPECTED_SUCCESS(msg) \\\n\t\treturn_val = WP_EXEC_UNEXPECTED_SUCCESS; \\\n\t\tunexpected_success = msg; \\\n\t\tbreak;\n\n\n\tswitch (PQresultStatus(result))\n\t{\n\t\t\t/* \"true\" success case */\n\t\tcase PGRES_COPY_BOTH:\n\t\t\treturn_val = WP_EXEC_SUCCESS_COPYBOTH;\n\t\t\tbreak;\n\n\t\t\t/* Unexpected success case */\n\t\tcase PGRES_EMPTY_QUERY:\n\t\t\tUNEXPECTED_SUCCESS(\"empty query return\");\n\t\tcase PGRES_COMMAND_OK:\n\t\t\tUNEXPECTED_SUCCESS(\"data-less command end\");\n\t\tcase PGRES_TUPLES_OK:\n\t\t\tUNEXPECTED_SUCCESS(\"tuples return\");\n\t\tcase PGRES_COPY_OUT:\n\t\t\tUNEXPECTED_SUCCESS(\"'Copy Out' response\");\n\t\tcase PGRES_COPY_IN:\n\t\t\tUNEXPECTED_SUCCESS(\"'Copy In' response\");\n\t\tcase PGRES_SINGLE_TUPLE:\n\t\t\tUNEXPECTED_SUCCESS(\"single tuple return\");\n\t\tcase PGRES_PIPELINE_SYNC:\n\t\t\tUNEXPECTED_SUCCESS(\"pipeline sync point\");\n\n\t\t\t/* Failure cases */\n\t\tcase PGRES_BAD_RESPONSE:\n\t\tcase PGRES_NONFATAL_ERROR:\n\t\tcase PGRES_FATAL_ERROR:\n\t\tcase PGRES_PIPELINE_ABORTED:\n\t\t\treturn_val = WP_EXEC_FAILED;\n\t\t\tbreak;\n\n\t\tdefault:\n\t\t\tAssert(false);\n\t\t\treturn_val = WP_EXEC_FAILED;\t/* keep the compiler quiet */\n\t}\n\n\tif (unexpected_success)\n\t\twpg_log(WARNING, \"[libpqwalproposer] Unexpected successful %s\", unexpected_success);\n\n\treturn return_val;\n}\n\nstatic WalProposerExecStatusType\nwalprop_get_query_result(Safekeeper *sk)\n{\n\treturn libpqwp_get_query_result(sk->conn);\n}\n\nstatic pgsocket\nwalprop_socket(Safekeeper *sk)\n{\n\treturn PQsocket(sk->conn->pg_conn);\n}\n\nstatic int\nwalprop_flush(Safekeeper *sk)\n{\n\treturn (PQflush(sk->conn->pg_conn));\n}\n\n/* Like libpqrcv_receive. *buf is valid until the next call. */\nPGAsyncReadResult\nlibpqwp_async_read(WalProposerConn *conn, char **buf, int *amount)\n{\n\tint\t\t\trawlen;\n\n\tif (conn->recvbuf != NULL)\n\t{\n\t\tPQfreemem(conn->recvbuf);\n\t\tconn->recvbuf = NULL;\n\t}\n\n\t/* Try to receive a CopyData message */\n\trawlen = PQgetCopyData(conn->pg_conn, &conn->recvbuf, true);\n\tif (rawlen == 0)\n\t{\n\t\t/* Try consuming some data. */\n\t\tif (!PQconsumeInput(conn->pg_conn))\n\t\t{\n\t\t\t*amount = 0;\n\t\t\t*buf = NULL;\n\t\t\treturn PG_ASYNC_READ_FAIL;\n\t\t}\n\t\t/* Now that we've consumed some input, try again */\n\t\trawlen = PQgetCopyData(conn->pg_conn, &conn->recvbuf, true);\n\t}\n\n\t/*\n\t * The docs for PQgetCopyData list the return values as: 0 if the copy is\n\t * still in progress, but no \"complete row\" is available -1 if the copy is\n\t * done -2 if an error occurred (> 0) if it was successful; that value is\n\t * the amount transferred.\n\t *\n\t * The protocol we use between walproposer and safekeeper means that we\n\t * *usually* wouldn't expect to see that the copy is done, but this can\n\t * sometimes be triggered by the server returning an ErrorResponse (which\n\t * also happens to have the effect that the copy is done).\n\t */\n\tswitch (rawlen)\n\t{\n\t\tcase 0:\n\t\t\t*amount = 0;\n\t\t\t*buf = NULL;\n\t\t\treturn PG_ASYNC_READ_TRY_AGAIN;\n\t\tcase -1:\n\t\t\t{\n\t\t\t\t/*\n\t\t\t\t * If we get -1, it's probably because of a server error; the\n\t\t\t\t * safekeeper won't normally send a CopyDone message.\n\t\t\t\t *\n\t\t\t\t * We can check PQgetResult to make sure that the server\n\t\t\t\t * failed; it'll always result in PGRES_FATAL_ERROR\n\t\t\t\t */\n\t\t\t\tExecStatusType status = PQresultStatus(PQgetResult(conn->pg_conn));\n\n\t\t\t\tif (status != PGRES_FATAL_ERROR)\n\t\t\t\t\twpg_log(FATAL, \"unexpected result status %d after failed PQgetCopyData\", status);\n\n\t\t\t\t/*\n\t\t\t\t * If there was actually an error, it'll be properly reported\n\t\t\t\t * by calls to PQerrorMessage -- we don't have to do anything\n\t\t\t\t * else\n\t\t\t\t */\n\t\t\t\t*amount = 0;\n\t\t\t\t*buf = NULL;\n\t\t\t\treturn PG_ASYNC_READ_FAIL;\n\t\t\t}\n\t\tcase -2:\n\t\t\t*amount = 0;\n\t\t\t*buf = NULL;\n\t\t\treturn PG_ASYNC_READ_FAIL;\n\t\tdefault:\n\t\t\t/* Positive values indicate the size of the returned result */\n\t\t\t*amount = rawlen;\n\t\t\t*buf = conn->recvbuf;\n\t\t\treturn PG_ASYNC_READ_SUCCESS;\n\t}\n}\n\n/*\n * Receive a message from the safekeeper.\n *\n * On success, the data is placed in *buf. It is valid until the next call\n * to this function.\n */\nstatic PGAsyncReadResult\nwalprop_async_read(Safekeeper *sk, char **buf, int *amount)\n{\n\treturn libpqwp_async_read(sk->conn, buf, amount);\n}\n\nstatic PGAsyncWriteResult\nwalprop_async_write(Safekeeper *sk, void const *buf, size_t size)\n{\n\tint\t\t\tresult;\n\n\t/* If we aren't in non-blocking mode, switch to it. */\n\tif (!ensure_nonblocking_status(sk->conn, true))\n\t\treturn PG_ASYNC_WRITE_FAIL;\n\n\t/*\n\t * The docs for PQputcopyData list the return values as: 1 if the data was\n\t * queued, 0 if it was not queued because of full buffers, or -1 if an\n\t * error occurred\n\t */\n\tresult = PQputCopyData(sk->conn->pg_conn, buf, size);\n\n\t/*\n\t * We won't get a result of zero because walproposer always empties the\n\t * connection's buffers before sending more\n\t */\n\tAssert(result != 0);\n\n\tswitch (result)\n\t{\n\t\tcase 1:\n\t\t\t/* good -- continue */\n\t\t\tbreak;\n\t\tcase -1:\n\t\t\treturn PG_ASYNC_WRITE_FAIL;\n\t\tdefault:\n\t\t\twpg_log(FATAL, \"invalid return %d from PQputCopyData\", result);\n\t}\n\n\t/*\n\t * After queueing the data, we still need to flush to get it to send. This\n\t * might take multiple tries, but we don't want to wait around until it's\n\t * done.\n\t *\n\t * PQflush has the following returns (directly quoting the docs): 0 if\n\t * sucessful, 1 if it was unable to send all the data in the send queue\n\t * yet -1 if it failed for some reason\n\t */\n\tswitch (result = PQflush(sk->conn->pg_conn))\n\t{\n\t\tcase 0:\n\t\t\treturn PG_ASYNC_WRITE_SUCCESS;\n\t\tcase 1:\n\t\t\treturn PG_ASYNC_WRITE_TRY_FLUSH;\n\t\tcase -1:\n\t\t\treturn PG_ASYNC_WRITE_FAIL;\n\t\tdefault:\n\t\t\twpg_log(FATAL, \"invalid return %d from PQflush\", result);\n\t}\n}\n\n/*\n * This function is very similar to walprop_async_write. For more\n * information, refer to the comments there.\n */\nstatic bool\nwalprop_blocking_write(Safekeeper *sk, void const *buf, size_t size)\n{\n\tint\t\t\tresult;\n\n\t/* If we are in non-blocking mode, switch out of it. */\n\tif (!ensure_nonblocking_status(sk->conn, false))\n\t\treturn false;\n\n\tif ((result = PQputCopyData(sk->conn->pg_conn, buf, size)) == -1)\n\t\treturn false;\n\n\tAssert(result == 1);\n\n\t/* Because the connection is non-blocking, flushing returns 0 or -1 */\n\n\tif ((result = PQflush(sk->conn->pg_conn)) == -1)\n\t\treturn false;\n\n\tAssert(result == 0);\n\treturn true;\n}\n\nvoid\nlibpqwp_disconnect(WalProposerConn *conn)\n{\n\tif (conn->recvbuf != NULL)\n\t\tPQfreemem(conn->recvbuf);\n\tPQfinish(conn->pg_conn);\n\tpfree(conn);\n}\n\nstatic void\nwalprop_finish(Safekeeper *sk)\n{\n\tif (sk->conn)\n\t{\n\t\tlibpqwp_disconnect(sk->conn);\n\t\tsk->conn = NULL;\n\t}\n\n\t/* free xlogreader */\n\tif (sk->xlogreader)\n\t{\n\t\tNeonWALReaderFree(sk->xlogreader);\n\t\tsk->xlogreader = NULL;\n\t}\n\trm_safekeeper_event_set(sk, false);\n}\n\n/*\n * Subscribe for new WAL and stream it in the loop to safekeepers.\n *\n * At the moment, this never returns, but an ereport(ERROR) will take us back\n * to the main loop.\n */\nstatic void\nStartProposerReplication(WalProposer *wp, StartReplicationCmd *cmd)\n{\n\tXLogRecPtr\tFlushPtr;\n\t__attribute__((unused)) TimeLineID currTLI;\n\n#if PG_VERSION_NUM < 150000\n\tif (ThisTimeLineID == 0)\n\t\tThisTimeLineID = 1;\n#endif\n\n\t/*\n\t * We assume here that we're logging enough information in the WAL for\n\t * log-shipping, since this is checked in PostmasterMain().\n\t *\n\t * NOTE: wal_level can only change at shutdown, so in most cases it is\n\t * difficult for there to be WAL data that we can still see that was\n\t * written at wal_level='minimal'.\n\t */\n\n\tif (cmd->slotname)\n\t{\n\t\tReplicationSlotAcquire(cmd->slotname, true);\n\t\tif (SlotIsLogical(MyReplicationSlot))\n\t\t\tereport(ERROR,\n\t\t\t\t\t(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),\n\t\t\t\t\t errmsg(\"cannot use a logical replication slot for physical replication\")));\n\n\t\t/*\n\t\t * We don't need to verify the slot's restart_lsn here; instead we\n\t\t * rely on the caller requesting the starting point to use.  If the\n\t\t * WAL segment doesn't exist, we'll fail later.\n\t\t */\n\t}\n\n\t/*\n\t * Select the timeline. If it was given explicitly by the client, use\n\t * that. Otherwise use the timeline of the last replayed record, which is\n\t * kept in ThisTimeLineID.\n\t *\n\t * Neon doesn't currently use PG Timelines, but it may in the future, so\n\t * we keep this code around to lighten the load for when we need it.\n\t */\n#if PG_VERSION_NUM >= 150000\n\tFlushPtr = GetFlushRecPtr(&currTLI);\n#else\n\tFlushPtr = GetFlushRecPtr();\n\tcurrTLI = ThisTimeLineID;\n#endif\n\n\t/*\n\t * XXX: Move straight to STOPPING state, skipping the STREAMING state.\n\t *\n\t * This is a bit weird. Normal walsenders stay in STREAMING state, until\n\t * the checkpointer signals them that it is about to start writing the\n\t * shutdown checkpoint. The walsenders acknowledge that they have received\n\t * that signal by switching to STOPPING state. That tells the walsenders\n\t * that they must not write any new WAL.\n\t *\n\t * However, we cannot easily intercept that signal from the checkpointer.\n\t * It's sent by WalSndInitStopping(), using\n\t * SendProcSignal(PROCSIGNAL_WALSND_INIT_STOPPING). It's received by\n\t * HandleWalSndInitStopping, which sets a process-local got_STOPPING flag.\n\t * However, that's all private to walsender.c.\n\t *\n\t * We don't need to do anything special upon receiving the signal, the\n\t * walproposer doesn't write any WAL anyway, so we skip the STREAMING\n\t * state and go directly to STOPPING mode. That way, the checkpointer\n\t * won't wait for us.\n\t */\n\tWalSndSetState(WALSNDSTATE_STOPPING);\n\n\t/*\n\t * Don't allow a request to stream from a future point in WAL that hasn't\n\t * been flushed to disk in this server yet.\n\t */\n\tif (FlushPtr < cmd->startpoint)\n\t{\n\t\tereport(ERROR,\n\t\t\t\t(errmsg(\"requested starting point %X/%X is ahead of the WAL flush position of this server %X/%X\",\n\t\t\t\t\t\tLSN_FORMAT_ARGS(cmd->startpoint),\n\t\t\t\t\t\tLSN_FORMAT_ARGS(FlushPtr))));\n\t}\n\n\t/* Start streaming from the requested point */\n\tsentPtr = cmd->startpoint;\n\n\t/* Initialize shared memory status, too */\n\tSpinLockAcquire(&MyWalSnd->mutex);\n\tMyWalSnd->sentPtr = sentPtr;\n\tSpinLockRelease(&MyWalSnd->mutex);\n\n\tSyncRepInitConfig();\n\n\t/* Infinite send loop, never returns */\n\tWalSndLoop(wp);\n\n\tWalSndSetState(WALSNDSTATE_STARTUP);\n\n\tif (cmd->slotname)\n\t\tReplicationSlotRelease();\n}\n\n/*\n * Main loop that waits for LSN updates and calls the walproposer.\n * Synchronous replication sets latch in WalSndWakeup at walsender.c\n */\nstatic void\nWalSndLoop(WalProposer *wp)\n{\n\t/* Clear any already-pending wakeups */\n\tResetLatch(MyLatch);\n\n\tfor (;;)\n\t{\n\t\tCHECK_FOR_INTERRUPTS();\n\n\t\tXLogBroadcastWalProposer(wp);\n\t\tWalProposerPoll(wp);\n\t}\n}\n\n/*\n * Notify walproposer about the new WAL position.\n */\nstatic void\nXLogBroadcastWalProposer(WalProposer *wp)\n{\n\tXLogRecPtr\tstartptr;\n\tXLogRecPtr\tendptr;\n\tstruct WalproposerShmemState *state = NULL;\n\tTimestampTz now = 0;\n\tint effective_max_wal_bytes_per_second = 0;\n\n\t/* Start from the last sent position */\n\tstartptr = sentPtr;\n\n\t/*\n\t * Streaming the current timeline on a primary.\n\t *\n\t * Attempt to send all data that's already been written out and fsync'd to\n\t * disk.  We cannot go further than what's been written out given the\n\t * current implementation of WALRead().  And in any case it's unsafe to\n\t * send WAL that is not securely down to disk on the primary: if the\n\t * primary subsequently crashes and restarts, standbys must not have\n\t * applied any WAL that got lost on the primary.\n\t */\n#if PG_VERSION_NUM >= 150000\n\tendptr = GetFlushRecPtr(NULL);\n#else\n\tendptr = GetFlushRecPtr();\n#endif\n\n\t/*\n\t * Record the current system time as an approximation of the time at which\n\t * this WAL location was written for the purposes of lag tracking.\n\t *\n\t * In theory we could make XLogFlush() record a time in shmem whenever WAL\n\t * is flushed and we could get that time as well as the LSN when we call\n\t * GetFlushRecPtr() above (and likewise for the cascading standby\n\t * equivalent), but rather than putting any new code into the hot WAL path\n\t * it seems good enough to capture the time here.  We should reach this\n\t * after XLogFlush() runs WalSndWakeupProcessRequests(), and although that\n\t * may take some time, we read the WAL flush pointer and take the time\n\t * very close to together here so that we'll get a later position if it is\n\t * still moving.\n\t *\n\t * Because LagTrackerWrite ignores samples when the LSN hasn't advanced,\n\t * this gives us a cheap approximation for the WAL flush time for this\n\t * LSN.\n\t *\n\t * Note that the LSN is not necessarily the LSN for the data contained in\n\t * the present message; it's the end of the WAL, which might be further\n\t * ahead.  All the lag tracking machinery cares about is finding out when\n\t * that arbitrary LSN is eventually reported as written, flushed and\n\t * applied, so that it can measure the elapsed time.\n\t */\n\tnow = GetCurrentTimestamp();\n\tLagTrackerWrite(endptr, now);\n\n\t/* Do we have any work to do? */\n\tAssert(startptr <= endptr);\n\tif (endptr <= startptr)\n\t\treturn;\n\n\t/* BEGIN_HADRON */\n\tstate = GetWalpropShmemState();\n\teffective_max_wal_bytes_per_second = pg_atomic_read_u32(&state->wal_rate_limiter.effective_max_wal_bytes_per_second);\n\tif (effective_max_wal_bytes_per_second != -1 && state != NULL)\n\t{\n\t\tstruct WalRateLimiter *limiter = &state->wal_rate_limiter;\n\t\tuint64 batch_end_time = pg_atomic_read_u64(&limiter->batch_end_time_us);\n\t\tif ( now >= batch_end_time )\n\t\t{\n\t\t\t// Reset the rate limiter to start a new batch\n\t\t\tlimiter->sent_bytes = 0;\n\t\t\tpg_atomic_write_u32(&limiter->should_limit, false);\n\t\t\tpg_atomic_write_u64(&limiter->batch_start_time_us, now);\n\t\t\t/* tentatively assign the batch end time as 1s from now. This could result in one of the following cases:\n\t\t\t1. If sent_bytes does not reach effective_max_wal_bytes_per_second in 1s,\n\t\t\tthen we will reset the current batch and clear sent_bytes. No throttling happens.\n\t\t\t2. Otherwise, we will recompute the end time (below) based on how many bytes are actually written,\n\t\t\tand throttle PG until the batch end time. */\n\t\t\tpg_atomic_write_u64(&limiter->batch_end_time_us, now + USECS_PER_SEC);\n\t\t}\n\t\tlimiter->sent_bytes += (endptr - startptr);\n\t\tif (limiter->sent_bytes > effective_max_wal_bytes_per_second)\n\t\t{\n\t\t\tuint64_t batch_start_time = pg_atomic_read_u64(&limiter->batch_start_time_us);\n\t\t\tuint64 throttle_usecs = USECS_PER_SEC * limiter->sent_bytes / Max(effective_max_wal_bytes_per_second, 1);\n\t\t\tif (throttle_usecs > kRateLimitMaxBatchUSecs){\n\t\t\t\telog(LOG, \"throttle_usecs %lu is too large, limiting to %lu\", throttle_usecs, kRateLimitMaxBatchUSecs);\n\t\t\t\tthrottle_usecs = kRateLimitMaxBatchUSecs;\n\t\t\t}\n\n\t\t\tpg_atomic_write_u32(&limiter->should_limit, true);\n\t\t\tpg_atomic_write_u64(&limiter->batch_end_time_us, batch_start_time + throttle_usecs);\n\t\t}\n\t}\n\t/* END_HADRON */\n\n\tWalProposerBroadcast(wp, startptr, endptr);\n\tsentPtr = endptr;\n\n\t/* Update shared memory status */\n\t{\n\t\tWalSnd\t   *walsnd = MyWalSnd;\n\n\t\tSpinLockAcquire(&walsnd->mutex);\n\t\twalsnd->sentPtr = sentPtr;\n\t\tSpinLockRelease(&walsnd->mutex);\n\t}\n\n\t/* Report progress of XLOG streaming in PS display */\n\tif (update_process_title)\n\t{\n\t\tchar\t\tactivitymsg[50];\n\n\t\tsnprintf(activitymsg, sizeof(activitymsg), \"streaming %X/%X\",\n\t\t\t\t LSN_FORMAT_ARGS(sentPtr));\n\t\tset_ps_display(activitymsg);\n\t}\n}\n\n/*\n  Used to download WAL before basebackup for walproposer/logical walsenders. No\n  longer used, replaced by neon_walreader; but callback still exists because\n  simulation tests use it.\n */\nstatic bool\nWalProposerRecovery(WalProposer *wp, Safekeeper *sk)\n{\n\treturn true;\n}\n\nstatic void\nwalprop_pg_wal_reader_allocate(Safekeeper *sk)\n{\n\tchar\t\tlog_prefix[64];\n\n\tsnprintf(log_prefix, sizeof(log_prefix), WP_LOG_PREFIX \"sk %s:%s nwr: \", sk->host, sk->port);\n\tAssert(!sk->xlogreader);\n\tsk->xlogreader = NeonWALReaderAllocate(wal_segment_size, sk->wp->propTermStartLsn, log_prefix, sk->wp->localTimeLineID);\n\tif (sk->xlogreader == NULL)\n\t\twpg_log(FATAL, \"failed to allocate xlog reader\");\n}\n\nstatic NeonWALReadResult\nwalprop_pg_wal_read(Safekeeper *sk, char *buf, XLogRecPtr startptr, Size count, char **errmsg)\n{\n\tNeonWALReadResult res;\n\n\tres = NeonWALRead(sk->xlogreader,\n\t\t\t\t\t  buf,\n\t\t\t\t\t  startptr,\n\t\t\t\t\t  count,\n\t\t\t\t\t  sk->wp->localTimeLineID);\n\n\tif (res == NEON_WALREAD_SUCCESS)\n\t{\n\t\t/*\n\t\t * If we have the socket subscribed, but walreader doesn't need any\n\t\t * events, it must mean that remote connection just closed hoping to\n\t\t * do next read locally. Remove the socket then. It is important to do\n\t\t * as otherwise next read might open another connection and we won't\n\t\t * be able to distinguish whether we have correct socket added in wait\n\t\t * event set.\n\t\t */\n\t\tif (NeonWALReaderEvents(sk->xlogreader) == 0)\n\t\t\trm_safekeeper_event_set(sk, false);\n\t}\n\telse if (res == NEON_WALREAD_ERROR)\n\t{\n\t\t*errmsg = NeonWALReaderErrMsg(sk->xlogreader);\n\t}\n\n\treturn res;\n}\n\nstatic uint32\nwalprop_pg_wal_reader_events(Safekeeper *sk)\n{\n\treturn NeonWALReaderEvents(sk->xlogreader);\n}\n\nstatic WaitEventSet *waitEvents;\n\nstatic void\nwalprop_pg_free_event_set(WalProposer *wp)\n{\n\tif (waitEvents)\n\t{\n\t\tFreeWaitEventSet(waitEvents);\n\t\twaitEvents = NULL;\n\t}\n\n\tfor (int i = 0; i < wp->n_safekeepers; i++)\n\t{\n\t\twp->safekeeper[i].eventPos = -1;\n\t\twp->safekeeper[i].nwrEventPos = -1;\n\t\twp->safekeeper[i].nwrConnEstablished = false;\n\t}\n}\n\nstatic void\nwalprop_pg_init_event_set(WalProposer *wp)\n{\n\tif (waitEvents)\n\t\twpg_log(FATAL, \"double-initialization of event set\");\n\n\t/* for each sk, we have socket plus potentially socket for neon walreader */\n#if PG_MAJORVERSION_NUM >= 17\n\twaitEvents = CreateWaitEventSet(NULL, 2 + 2 * wp->n_safekeepers);\n#else\n\twaitEvents = CreateWaitEventSet(TopMemoryContext, 2 + 2 * wp->n_safekeepers);\n#endif\n\tAddWaitEventToSet(waitEvents, WL_LATCH_SET, PGINVALID_SOCKET,\n\t\t\t\t\t  MyLatch, NULL);\n\tAddWaitEventToSet(waitEvents, WL_EXIT_ON_PM_DEATH, PGINVALID_SOCKET,\n\t\t\t\t\t  NULL, NULL);\n\n\tfor (int i = 0; i < wp->n_safekeepers; i++)\n\t{\n\t\twp->safekeeper[i].eventPos = -1;\n\t\twp->safekeeper[i].nwrEventPos = -1;\n\t\twp->safekeeper[i].nwrConnEstablished = false;\n\t}\n}\n\n/* add safekeeper socket to wait event set */\nstatic void\nwalprop_pg_add_safekeeper_event_set(Safekeeper *sk, uint32 events)\n{\n\tAssert(sk->eventPos == -1);\n\tsk->eventPos = AddWaitEventToSet(waitEvents, events, walprop_socket(sk), NULL, sk);\n}\n\n/* add neon wal reader socket to wait event set */\nstatic void\nadd_nwr_event_set(Safekeeper *sk, uint32 events)\n{\n\tAssert(sk->nwrEventPos == -1);\n\tsk->nwrEventPos = AddWaitEventToSet(waitEvents, events, NeonWALReaderSocket(sk->xlogreader), NULL, sk);\n\tsk->nwrConnEstablished = NeonWALReaderIsRemConnEstablished(sk->xlogreader);\n\twpg_log(DEBUG5, \"sk %s:%s: added nwr socket events %d\", sk->host, sk->port, events);\n}\n\nstatic void\nwalprop_pg_update_event_set(Safekeeper *sk, uint32 events)\n{\n\t/* eventPos = -1 when we don't have an event */\n\tAssert(sk->eventPos != -1);\n\n\tModifyWaitEvent(waitEvents, sk->eventPos, events, NULL);\n}\n\n/*\n * Update neon_walreader event.\n * Can be called when nwr socket doesn't exist, does nothing in this case.\n */\nstatic void\nupdate_nwr_event_set(Safekeeper *sk, uint32 events)\n{\n\t/* eventPos = -1 when we don't have an event */\n\tif (sk->nwrEventPos != -1)\n\t\tModifyWaitEvent(waitEvents, sk->nwrEventPos, events, NULL);\n}\n\n\nstatic void\nwalprop_pg_active_state_update_event_set(Safekeeper *sk)\n{\n\tuint32\t\tsk_events;\n\tuint32\t\tnwr_events;\n\n\tAssert(sk->state == SS_ACTIVE);\n\tSafekeeperStateDesiredEvents(sk, &sk_events, &nwr_events);\n\n\t/*\n\t * If we need to wait for neon_walreader, ensure we have up to date socket\n\t * in the wait event set.\n\t */\n\tif (sk->active_state == SS_ACTIVE_READ_WAL)\n\t{\n\t\t/*\n\t\t * If conn is established and socket is thus stable, update the event\n\t\t * directly; otherwise re-add it.\n\t\t */\n\t\tif (sk->nwrConnEstablished)\n\t\t{\n\t\t\tAssert(sk->nwrEventPos != -1);\n\t\t\tupdate_nwr_event_set(sk, nwr_events);\n\t\t}\n\t\telse\n\t\t{\n\t\t\trm_safekeeper_event_set(sk, false);\n\t\t\tadd_nwr_event_set(sk, nwr_events);\n\t\t}\n\t}\n\telse\n\t{\n\t\t/*\n\t\t * Hack: we should always set 0 here, but for random reasons\n\t\t * WaitEventSet (WaitEventAdjustEpoll) asserts that there is at least\n\t\t * some event. Since there is also no way to remove socket except\n\t\t * reconstructing the whole set, SafekeeperStateDesiredEvents instead\n\t\t * gives WL_SOCKET_CLOSED if socket exists. We never expect it to\n\t\t * trigger.\n\t\t *\n\t\t * On PG 14 which doesn't have WL_SOCKET_CLOSED resort to event\n\t\t * removal.\n\t\t */\n#if PG_VERSION_NUM >= 150000\n\t\tAssert(nwr_events == WL_SOCKET_CLOSED || nwr_events == 0);\n\t\tupdate_nwr_event_set(sk, WL_SOCKET_CLOSED);\n#else\t\t\t\t\t\t\t/* pg 14 */\n\t\trm_safekeeper_event_set(sk, false);\n#endif\n\t}\n\twalprop_pg_update_event_set(sk, sk_events);\n}\n\nstatic void\nwalprop_pg_rm_safekeeper_event_set(Safekeeper *to_remove)\n{\n\trm_safekeeper_event_set(to_remove, true);\n}\n\n/*\n * A hacky way to remove single event from the event set. Can be called if event\n * doesn't exist, does nothing in this case.\n *\n * Note: Internally, this completely reconstructs the event set. It should be\n * avoided if possible.\n *\n * If is_sk is true, socket of connection to safekeeper is removed; otherwise\n * socket of neon_walreader.\n */\nstatic void\nrm_safekeeper_event_set(Safekeeper *to_remove, bool is_sk)\n{\n\tWalProposer *wp = to_remove->wp;\n\n\twpg_log(DEBUG5, \"sk %s:%s: removing event, is_sk %d\",\n\t\t\tto_remove->host, to_remove->port, is_sk);\n\n\t/*\n\t * Shortpath for exiting if have nothing to do. We never call this\n\t * function with safekeeper socket not existing, but do that with neon\n\t * walreader socket.\n\t */\n\tif ((is_sk && to_remove->eventPos == -1) ||\n\t\t(!is_sk && to_remove->nwrEventPos == -1))\n\t{\n\t\treturn;\n\t}\n\n\t/* Remove the existing event set, assign sk->eventPos = -1 */\n\twalprop_pg_free_event_set(wp);\n\n\t/* Re-initialize it without adding any safekeeper events */\n\twp->api.init_event_set(wp);\n\n\t/*\n\t * loop through the existing safekeepers. If they aren't the one we're\n\t * removing, and if they have a socket we can use, re-add the applicable\n\t * events.\n\t */\n\tfor (int i = 0; i < wp->n_safekeepers; i++)\n\t{\n\t\tSafekeeper *sk = &wp->safekeeper[i];\n\n\t\t/*\n\t\t * If this safekeeper isn't offline, add events for it, except for the\n\t\t * event requested to remove.\n\t\t */\n\t\tif (sk->state != SS_OFFLINE)\n\t\t{\n\t\t\tuint32\t\tsk_events;\n\t\t\tuint32\t\tnwr_events;\n\n\t\t\tSafekeeperStateDesiredEvents(sk, &sk_events, &nwr_events);\n\n\t\t\tif (sk != to_remove || !is_sk)\n\t\t\t{\n\t\t\t\t/* will set sk->eventPos */\n\t\t\t\twp->api.add_safekeeper_event_set(sk, sk_events);\n\t\t\t}\n\t\t\tif ((sk != to_remove || is_sk) && nwr_events)\n\t\t\t{\n\t\t\t\tadd_nwr_event_set(sk, nwr_events);\n\t\t\t}\n\t\t}\n\t}\n}\n\nstatic int\nwalprop_pg_wait_event_set(WalProposer *wp, long timeout, Safekeeper **sk, uint32 *events)\n{\n\tWaitEvent\tevent = {0};\n\tint\t\t\trc = 0;\n\tbool\t\tlate_cv_trigger = false;\n\n\t*sk = NULL;\n\t*events = 0;\n\n#if PG_MAJORVERSION_NUM >= 16\n\tif (WalSndCtl != NULL)\n\t\tConditionVariablePrepareToSleep(&WalSndCtl->wal_flush_cv);\n\n\t/*\n\t * Now that we prepared the condvar, check flush ptr again -- it might\n\t * have changed before we subscribed to cv so we missed the wakeup.\n\t *\n\t * Do that only when we're interested in new WAL: without sync-safekeepers\n\t * and if election already passed.\n\t */\n\tif (!wp->config->syncSafekeepers && wp->availableLsn != InvalidXLogRecPtr && GetFlushRecPtr(NULL) > wp->availableLsn)\n\t{\n\t\tConditionVariableCancelSleep();\n\t\tResetLatch(MyLatch);\n\n\t\tCheckGracefulShutdown(wp);\n\n\t\t*events = WL_LATCH_SET;\n\t\treturn 1;\n\t}\n#endif\n\n\t/*\n\t * Wait for a wait event to happen, or timeout: - Safekeeper socket can\n\t * become available for READ or WRITE - Our latch got set, because *\n\t * PG15-: We got woken up by a process triggering the WalSender * PG16+:\n\t * WalSndCtl->wal_flush_cv was triggered\n\t */\n\trc = WaitEventSetWait(waitEvents, timeout,\n\t\t\t\t\t\t  &event, 1, WAIT_EVENT_WAL_SENDER_MAIN);\n#if PG_MAJORVERSION_NUM >= 16\n\tif (WalSndCtl != NULL)\n\t\tlate_cv_trigger = ConditionVariableCancelSleep();\n#endif\n\n\t/*\n\t * Process config if requested. This restarts walproposer if safekeepers\n\t * list changed. Don't do that for sync-safekeepers because quite probably\n\t * it (re-reading config) won't work without some effort, and\n\t * sync-safekeepers should be quick to finish anyway.\n\t */\n\tif (!wp->config->syncSafekeepers && ConfigReloadPending)\n\t{\n\t\tConfigReloadPending = false;\n\t\tProcessConfigFile(PGC_SIGHUP);\n\t}\n\n\t/*\n\t * If wait is terminated by latch set (walsenders' latch is set on each\n\t * wal flush). (no need for pm death check due to WL_EXIT_ON_PM_DEATH)\n\t */\n\tif ((rc == 1 && (event.events & WL_LATCH_SET)) || late_cv_trigger)\n\t{\n\t\t/* Reset our latch */\n\t\tResetLatch(MyLatch);\n\t\t*events = WL_LATCH_SET;\n\t\treturn 1;\n\t}\n\n\t/*\n\t * If the event contains something about the socket, it means we got an\n\t * event from a safekeeper socket.\n\t */\n\tif (rc == 1 && (event.events & WL_SOCKET_MASK))\n\t{\n\t\t*sk = (Safekeeper *) event.user_data;\n\t\t*events = event.events;\n\t\treturn 1;\n\t}\n\n\t/* XXX: Can we have non-timeout event here? */\n\t*events = event.events;\n\treturn rc;\n}\n\nstatic void __attribute__((noreturn))\nwalprop_pg_finish_sync_safekeepers(WalProposer *wp, XLogRecPtr lsn)\n{\n\tfprintf(stdout, \"%X/%X\\n\", LSN_FORMAT_ARGS(lsn));\n\texit(0);\n}\n\n/*\n * Like vanilla walsender, on sigusr2 send all remaining WAL and exit.\n *\n * Note that unlike sync-safekeepers waiting here is not reliable: we\n * don't check that majority of safekeepers received and persisted\n * commit_lsn -- only that walproposer reached it (which immediately\n * broadcasts new value). Doing that without incurring redundant control\n * file syncing would need wp -> sk protocol change. OTOH unlike\n * sync-safekeepers which must bump commit_lsn or basebackup will fail,\n * this catchup is important only for tests where safekeepers/network\n * don't crash on their own.\n */\nstatic void\nCheckGracefulShutdown(WalProposer *wp)\n{\n\tif (got_SIGUSR2)\n\t{\n\t\tif (!reported_sigusr2)\n\t\t{\n\t\t\tXLogRecPtr\tflushPtr = walprop_pg_get_flush_rec_ptr(wp);\n\n\t\t\twpg_log(LOG, \"walproposer will send and wait for remaining WAL between %X/%X and %X/%X\",\n\t\t\t\t\tLSN_FORMAT_ARGS(wp->commitLsn), LSN_FORMAT_ARGS(flushPtr));\n\t\t\treported_sigusr2 = true;\n\t\t}\n\n\t\tif (wp->commitLsn >= walprop_pg_get_flush_rec_ptr(wp))\n\t\t{\n\t\t\twpg_log(LOG, \"walproposer sent all WAL up to %X/%X, exiting\",\n\t\t\t\t\tLSN_FORMAT_ARGS(wp->commitLsn));\n\t\t\tproc_exit(0);\n\t\t}\n\t}\n}\n\n/*\n * Combine hot standby feedbacks from all safekeepers.\n */\nstatic void\nCombineHotStanbyFeedbacks(HotStandbyFeedback *hs, WalProposer *wp)\n{\n\ths->ts = 0;\n\ths->xmin = InvalidFullTransactionId;\n\ths->catalog_xmin = InvalidFullTransactionId;\n\n\tfor (int i = 0; i < wp->n_safekeepers; i++)\n\t{\n\n\t\tif (wp->safekeeper[i].state == SS_ACTIVE)\n\t\t{\n\t\t\tHotStandbyFeedback *skhs = &wp->safekeeper[i].appendResponse.hs;\n\n\t\t\tif (FullTransactionIdIsNormal(skhs->xmin)\n\t\t\t\t&& (!FullTransactionIdIsValid(hs->xmin) || FullTransactionIdPrecedes(skhs->xmin, hs->xmin)))\n\t\t\t{\n\t\t\t\ths->xmin = skhs->xmin;\n\t\t\t\ths->ts = skhs->ts;\n\t\t\t}\n\t\t\tif (FullTransactionIdIsNormal(skhs->catalog_xmin)\n\t\t\t\t&& (!FullTransactionIdIsValid(hs->catalog_xmin) || FullTransactionIdPrecedes(skhs->catalog_xmin, hs->catalog_xmin)))\n\t\t\t{\n\t\t\t\ths->catalog_xmin = skhs->catalog_xmin;\n\t\t\t\ths->ts = skhs->ts;\n\t\t\t}\n\t\t}\n\t}\n}\n\n/*\n * Based on commitLsn and safekeeper responses including pageserver feedback,\n * 1) Propagate cluster size received from ps to ensure the limit.\n * 2) Propagate pageserver LSN positions to ensure backpressure limits.\n * 3) Advance walproposer slot to commitLsn (releasing WAL & waking up waiters).\n * 4) Propagate hot standby feedback.\n *\n * None of that is functional in sync-safekeepers.\n */\nstatic void\nwalprop_pg_process_safekeeper_feedback(WalProposer *wp, Safekeeper *sk)\n{\n\tHotStandbyFeedback hsFeedback;\n\tbool\t\tneedToAdvanceSlot = false;\n\n\tif (wp->config->syncSafekeepers)\n\t\treturn;\n\n\n\t/* handle fresh ps_feedback */\n\tif (sk->appendResponse.ps_feedback.present)\n\t{\n\t\tshardno_t num_shards = get_num_shards();\n\n\t\t// During shard split, we receive ps_feedback from child shards before\n\t\t// the split commits and our shard map GUC has been updated. We must\n\t\t// filter out such feedback here because record_pageserver_feedback()\n\t\t// doesn't do it.\n\t\t//\n\t\t// NB: what we would actually want to happen is that we only receive\n\t\t// ps_feedback from the parent shards when the split is committed, then\n\t\t// apply the split to our set of tracked feedback and from here on only\n\t\t// receive ps_feedback from child shards. This filter condition doesn't\n\t\t// do that: if we split from N parent to 2N child shards, the first N\n\t\t// child shards' feedback messages will pass this condition, even before\n\t\t// the split is committed. That's a bit sloppy, but OK for now.\n\t\tif (sk->appendResponse.ps_feedback.shard_number < num_shards)\n\t\t{\n\t\t\tPageserverFeedback min_feedback = record_pageserver_feedback(&sk->appendResponse.ps_feedback, num_shards);\n\n\t\t\t/* Only one main shard sends non-zero currentClusterSize */\n\t\t\tif (sk->appendResponse.ps_feedback.currentClusterSize > 0)\n\t\t\t\tSetNeonCurrentClusterSize(sk->appendResponse.ps_feedback.currentClusterSize);\n\n\t\t\tif (min_feedback.disk_consistent_lsn != standby_apply_lsn)\n\t\t\t{\n\t\t\t\tstandby_apply_lsn = min_feedback.disk_consistent_lsn;\n\t\t\t\tneedToAdvanceSlot = true;\n\t\t\t}\n\t\t}\n\t\telse\n\t\t{\n\t\t\t// HADRON\n\t\t\telog(DEBUG2, \"Ignoring pageserver feedback for unknown shard %d (current shard number %d)\",\n\t\t\t\tsk->appendResponse.ps_feedback.shard_number, num_shards);\n\t\t}\n\t}\n\n\tif (wp->commitLsn > standby_flush_lsn)\n\t{\n\t\tstandby_flush_lsn = wp->commitLsn;\n\t\tneedToAdvanceSlot = true;\n\t}\n\n\tif (needToAdvanceSlot)\n\t{\n\t\t/*\n\t\t * Advance the replication slot to commitLsn. WAL before it is\n\t\t * hardened and will be fetched from one of safekeepers by\n\t\t * neon_walreader if needed.\n\t\t *\n\t\t * Also wakes up syncrep waiters.\n\t\t */\n\t\tProcessStandbyReply(\n\t\t/* write_lsn -  This is what durably stored in safekeepers quorum. */\n\t\t\t\t\t\t\tstandby_flush_lsn,\n\t\t/* flush_lsn - This is what durably stored in safekeepers quorum. */\n\t\t\t\t\t\t\tstandby_flush_lsn,\n\n\t\t/*\n\t\t * apply_lsn - This is what processed and durably saved at*\n\t\t * pageserver.\n\t\t */\n\t\t\t\t\t\t\tstandby_apply_lsn,\n\t\t\t\t\t\t\twalprop_pg_get_current_timestamp(wp), false);\n\t}\n\n\tCombineHotStanbyFeedbacks(&hsFeedback, wp);\n\tif (memcmp(&hsFeedback, &agg_hs_feedback, sizeof hsFeedback) != 0)\n\t{\n\t\tFullTransactionId xmin = hsFeedback.xmin;\n\t\tFullTransactionId catalog_xmin = hsFeedback.catalog_xmin;\n\t\tFullTransactionId next_xid = ReadNextFullTransactionId();\n\n\t\t/*\n\t\t * Page server is updating nextXid in checkpoint each 1024\n\t\t * transactions, so feedback xmin can be actually larger then nextXid\n\t\t * and function TransactionIdInRecentPast return false in this case,\n\t\t * preventing update of slot's xmin.\n\t\t */\n\t\tif (FullTransactionIdPrecedes(next_xid, xmin))\n\t\t\txmin = next_xid;\n\t\tif (FullTransactionIdPrecedes(next_xid, catalog_xmin))\n\t\t\tcatalog_xmin = next_xid;\n\t\tagg_hs_feedback = hsFeedback;\n\t\telog(DEBUG2, \"ProcessStandbyHSFeedback(xmin=%d, catalog_xmin=%d\", XidFromFullTransactionId(hsFeedback.xmin), XidFromFullTransactionId(hsFeedback.catalog_xmin));\n\t\tProcessStandbyHSFeedback(hsFeedback.ts,\n\t\t\t\t\t\t\t\t XidFromFullTransactionId(xmin),\n\t\t\t\t\t\t\t\t EpochFromFullTransactionId(xmin),\n\t\t\t\t\t\t\t\t XidFromFullTransactionId(catalog_xmin),\n\t\t\t\t\t\t\t\t EpochFromFullTransactionId(catalog_xmin));\n\t}\n\n\tCheckGracefulShutdown(wp);\n}\n\nstatic XLogRecPtr\nwalprop_pg_get_redo_start_lsn(WalProposer *wp)\n{\n\treturn GetRedoStartLsn();\n}\n\nstatic bool\nwalprop_pg_strong_random(WalProposer *wp, void *buf, size_t len)\n{\n\treturn pg_strong_random(buf, len);\n}\n\nstatic void\nwalprop_pg_log_internal(WalProposer *wp, int level, const char *line)\n{\n\telog(FATAL, \"unexpected log_internal message at level %d: %s\", level, line);\n}\n\nvoid\nSetNeonCurrentClusterSize(uint64 size)\n{\n\tpg_atomic_write_u64(&walprop_shared->currentClusterSize, size);\n}\n\nuint64\nGetNeonCurrentClusterSize(void)\n{\n\treturn pg_atomic_read_u64(&walprop_shared->currentClusterSize);\n}\nuint64\t\tGetNeonCurrentClusterSize(void);\n\n/* BEGIN_HADRON */\nstatic void\nwalprop_pg_reset_safekeeper_statuses_for_metrics(WalProposer *wp, uint32 num_safekeepers)\n{\n\tWalproposerShmemState* shmem = wp->api.get_shmem_state(wp);\n\tSpinLockAcquire(&shmem->mutex);\n\tshmem->num_safekeepers = num_safekeepers;\n\tmemset(shmem->safekeeper_status, 0, sizeof(shmem->safekeeper_status));\n\tSpinLockRelease(&shmem->mutex);\n}\n\nstatic void\nwalprop_pg_update_safekeeper_status_for_metrics(WalProposer *wp, uint32 sk_index, uint8 status)\n{\n\tWalproposerShmemState* shmem = wp->api.get_shmem_state(wp);\n\tAssert(sk_index < MAX_SAFEKEEPERS);\n\tSpinLockAcquire(&shmem->mutex);\n\tshmem->safekeeper_status[sk_index] = status;\n\tSpinLockRelease(&shmem->mutex);\n}\n/* END_HADRON */\n\nstatic const walproposer_api walprop_pg = {\n\t.get_shmem_state = walprop_pg_get_shmem_state,\n\t.start_streaming = walprop_pg_start_streaming,\n\t.get_flush_rec_ptr = walprop_pg_get_flush_rec_ptr,\n\t.update_donor = walprop_pg_update_donor,\n\t.get_current_timestamp = walprop_pg_get_current_timestamp,\n\t.conn_error_message = walprop_error_message,\n\t.conn_status = walprop_status,\n\t.conn_connect_start = walprop_connect_start,\n\t.conn_connect_poll = walprop_connect_poll,\n\t.conn_send_query = walprop_send_query,\n\t.conn_get_query_result = walprop_get_query_result,\n\t.conn_flush = walprop_flush,\n\t.conn_finish = walprop_finish,\n\t.conn_async_read = walprop_async_read,\n\t.conn_async_write = walprop_async_write,\n\t.conn_blocking_write = walprop_blocking_write,\n\t.recovery_download = WalProposerRecovery,\n\t.wal_reader_allocate = walprop_pg_wal_reader_allocate,\n\t.wal_read = walprop_pg_wal_read,\n\t.wal_reader_events = walprop_pg_wal_reader_events,\n\t.init_event_set = walprop_pg_init_event_set,\n\t.update_event_set = walprop_pg_update_event_set,\n\t.active_state_update_event_set = walprop_pg_active_state_update_event_set,\n\t.add_safekeeper_event_set = walprop_pg_add_safekeeper_event_set,\n\t.rm_safekeeper_event_set = walprop_pg_rm_safekeeper_event_set,\n\t.wait_event_set = walprop_pg_wait_event_set,\n\t.strong_random = walprop_pg_strong_random,\n\t.get_redo_start_lsn = walprop_pg_get_redo_start_lsn,\n\t.finish_sync_safekeepers = walprop_pg_finish_sync_safekeepers,\n\t.process_safekeeper_feedback = walprop_pg_process_safekeeper_feedback,\n\t.log_internal = walprop_pg_log_internal,\n\t.reset_safekeeper_statuses_for_metrics = walprop_pg_reset_safekeeper_statuses_for_metrics,\n\t.update_safekeeper_status_for_metrics = walprop_pg_update_safekeeper_status_for_metrics,\n};\n"
  },
  {
    "path": "pgxn/neon/walsender_hooks.c",
    "content": "/*-------------------------------------------------------------------------\n *\n * walsender_hooks.c\n *\n * Implements XLogReaderRoutine in terms of NeonWALReader. Allows for\n * fetching WAL from safekeepers, which normal xlogreader can't do.\n *\n *-------------------------------------------------------------------------\n */\n#include \"walsender_hooks.h\"\n#include \"postgres.h\"\n#include \"fmgr.h\"\n#include \"access/xlogdefs.h\"\n#include \"replication/walsender.h\"\n#include \"access/xlog.h\"\n#include \"access/xlog_internal.h\"\n#include \"access/xlogreader.h\"\n#include \"miscadmin.h\"\n#include \"utils/wait_event.h\"\n#include \"utils/guc.h\"\n#include \"postmaster/interrupt.h\"\n\n#include \"neon.h\"\n#include \"neon_walreader.h\"\n#include \"walproposer.h\"\n\nstatic NeonWALReader *wal_reader = NULL;\n\nstruct WalSnd;\nextern struct WalSnd *MyWalSnd;\nextern XLogRecPtr WalSndWaitForWal(XLogRecPtr loc);\nextern bool GetDonorShmem(XLogRecPtr *donor_lsn);\nextern XLogRecPtr GetXLogReplayRecPtr(TimeLineID *replayTLI);\n\nbool disable_wal_prev_lsn_checks = false;\n\nstatic XLogRecPtr\nNeonWALReadWaitForWAL(XLogRecPtr loc)\n{\n\twhile (!NeonWALReaderUpdateDonor(wal_reader))\n\t{\n\t\tpg_usleep(1000);\n\t\tCHECK_FOR_INTERRUPTS();\n\t}\n\n\t// Walsender sends keepalives and stuff, so better use its normal wait\n\tif (MyWalSnd != NULL)\n\t\treturn WalSndWaitForWal(loc);\n\n\tfor (;;)\n\t{\n\t\tXLogRecPtr flush_ptr;\n\t\tif (!RecoveryInProgress())\n#if PG_VERSION_NUM >= 150000\n\t\t\tflush_ptr = GetFlushRecPtr(NULL);\n#else\n\t\t\tflush_ptr = GetFlushRecPtr();\n#endif\n\t\telse\n\t\t\tflush_ptr = GetXLogReplayRecPtr(NULL);\n\n\t\tif (loc <= flush_ptr)\n\t\t\treturn flush_ptr;\n\n\t\tCHECK_FOR_INTERRUPTS();\n\t\tpg_usleep(1000);\n\t}\n}\n\nstatic int\nNeonWALPageRead(\n\t\t\t\tXLogReaderState *xlogreader,\n\t\t\t\tXLogRecPtr targetPagePtr,\n\t\t\t\tint reqLen,\n\t\t\t\tXLogRecPtr targetRecPtr,\n\t\t\t\tchar *readBuf)\n{\n\tXLogRecPtr\trem_lsn;\n\n\t/* Wait for flush pointer to advance past our request */\n\tXLogRecPtr\tflushptr = NeonWALReadWaitForWAL(targetPagePtr + reqLen);\n\tint\t\t\tcount;\n\n\tif (flushptr < targetPagePtr + reqLen)\n\t\treturn -1;\n\n\txlogreader->skip_lsn_checks = disable_wal_prev_lsn_checks;\n\n\t/* Read at most XLOG_BLCKSZ bytes */\n\tif (targetPagePtr + XLOG_BLCKSZ <= flushptr)\n\t\tcount = XLOG_BLCKSZ;\n\telse\n\t\tcount = flushptr - targetPagePtr;\n\n\t/*\n\t * Sometimes walsender requests non-monotonic sequences of WAL. If that's\n\t * the case, we have to reset streaming from remote at the correct\n\t * position. For example, walsender may try to verify the segment header\n\t * when trying to read in the middle of it.\n\t */\n\trem_lsn = NeonWALReaderGetRemLsn(wal_reader);\n\tif (rem_lsn != InvalidXLogRecPtr && targetPagePtr != rem_lsn)\n\t{\n\t\tNeonWALReaderResetRemote(wal_reader);\n\t}\n\n\tfor (;;)\n\t{\n\t\tNeonWALReadResult res = NeonWALRead(\n\t\t\t\t\t\t\t\t\t\t\twal_reader,\n\t\t\t\t\t\t\t\t\t\t\treadBuf,\n\t\t\t\t\t\t\t\t\t\t\ttargetPagePtr,\n\t\t\t\t\t\t\t\t\t\t\tcount,\n\t\t\t\t\t\t\t\t\t\t\tNeonWALReaderLocalActiveTimeLineID(wal_reader));\n\n\t\tif (res == NEON_WALREAD_SUCCESS)\n\t\t{\n\t\t\t/*\n\t\t\t * Setting ws_tli is required by the XLogReaderRoutine, it is used\n\t\t\t * for segment name generation in error reports.\n\t\t\t *\n\t\t\t * ReadPageInternal updates ws_segno after calling cb on its own\n\t\t\t * and XLogReaderRoutine description doesn't require it, but\n\t\t\t * WALRead sets, let's follow it.\n\t\t\t */\n\t\t\txlogreader->seg.ws_tli = NeonWALReaderGetSegment(wal_reader)->ws_tli;\n\t\t\txlogreader->seg.ws_segno = NeonWALReaderGetSegment(wal_reader)->ws_segno;\n\n\t\t\t/*\n\t\t\t * ws_file doesn't exist in case of remote read, and isn't used by\n\t\t\t * xlogreader except by WALRead on which we don't rely anyway.\n\t\t\t */\n\t\t\treturn count;\n\t\t}\n\t\tif (res == NEON_WALREAD_ERROR)\n\t\t{\n\t\t\telog(ERROR, \"[walsender] Failed to read WAL (req_lsn=%X/%X, len=%d): %s\",\n\t\t\t\t LSN_FORMAT_ARGS(targetPagePtr),\n\t\t\t\t reqLen,\n\t\t\t\t NeonWALReaderErrMsg(wal_reader));\n\t\t\treturn -1;\n\t\t}\n\n\t\t/*\n\t\t * Res is WOULDBLOCK, so we wait on the socket, recreating event set\n\t\t * if necessary\n\t\t */\n\t\t{\n\n\t\t\tpgsocket\tsock = NeonWALReaderSocket(wal_reader);\n\t\t\tuint32_t\treader_events = NeonWALReaderEvents(wal_reader);\n\t\t\tlong\t\ttimeout_ms = 1000;\n\n\t\t\tResetLatch(MyLatch);\n\t\t\tCHECK_FOR_INTERRUPTS();\n\t\t\tif (ConfigReloadPending)\n\t\t\t{\n\t\t\t\tConfigReloadPending = false;\n\t\t\t\tProcessConfigFile(PGC_SIGHUP);\n\t\t\t}\n\n\t\t\tWaitLatchOrSocket(\n\t\t\t\t\t\t\t  MyLatch,\n\t\t\t\t\t\t\t  WL_LATCH_SET | WL_EXIT_ON_PM_DEATH | reader_events,\n\t\t\t\t\t\t\t  sock,\n\t\t\t\t\t\t\t  timeout_ms,\n\t\t\t\t\t\t\t  WAIT_EVENT_NEON_WAL_DL);\n\t\t}\n\t}\n}\n\nstatic void\nNeonWALReadSegmentOpen(XLogReaderState *xlogreader, XLogSegNo nextSegNo, TimeLineID *tli_p)\n{\n\tneon_wal_segment_open(wal_reader, nextSegNo, tli_p);\n\txlogreader->seg.ws_file = NeonWALReaderGetSegment(wal_reader)->ws_file;\n}\n\nstatic void\nNeonWALReadSegmentClose(XLogReaderState *xlogreader)\n{\n\tneon_wal_segment_close(wal_reader);\n\txlogreader->seg.ws_file = NeonWALReaderGetSegment(wal_reader)->ws_file;\n}\n\nvoid\nNeonOnDemandXLogReaderRoutines(XLogReaderRoutine *xlr)\n{\n\t/*\n\t * If safekeepers are not configured, assume we don't need neon_walreader,\n\t * i.e. running neon fork locally.\n\t */\n\tif (wal_acceptors_list[0] == '\\0')\n\t\treturn;\n\n\tif (!wal_reader)\n\t{\n\t\tXLogRecPtr\tbasebackupLsn = GetRedoStartLsn();\n\n\t\t/* should never happen */\n\t\tif (basebackupLsn == 0)\n\t\t{\n\t\t\telog(ERROR, \"unable to start walsender when basebackupLsn is 0\");\n\t\t}\n\t\twal_reader = NeonWALReaderAllocate(wal_segment_size, basebackupLsn, \"[walsender] \", 1);\n\t}\n\txlr->page_read = NeonWALPageRead;\n\txlr->segment_open = NeonWALReadSegmentOpen;\n\txlr->segment_close = NeonWALReadSegmentClose;\n}\n"
  },
  {
    "path": "pgxn/neon/walsender_hooks.h",
    "content": "#ifndef __WALSENDER_HOOKS_H__\n#define __WALSENDER_HOOKS_H__\n\nstruct XLogReaderRoutine;\nvoid\t\tNeonOnDemandXLogReaderRoutines(struct XLogReaderRoutine *xlr);\n\n#endif\n"
  },
  {
    "path": "pgxn/neon_rmgr/Makefile",
    "content": "# pgxs/neon/Makefile\n\n\nMODULE_big = neon_rmgr\nOBJS = \\\n\t$(WIN32RES) \\\n\tneon_rmgr.o \\\n\tneon_rmgr_decode.o \\\n\tneon_rmgr_desc.o\n\n\nEXTENSION = neon_rmgr\nDATA = \nPGFILEDESC = \"Neon WAL Resource Manager - custom WAL records used to make Neon work (since PG 16)\"\n\n\nPG_CONFIG = pg_config\nPGXS := $(shell $(PG_CONFIG) --pgxs)\ninclude $(PGXS)\n"
  },
  {
    "path": "pgxn/neon_rmgr/neon_rmgr.c",
    "content": "#include \"postgres.h\"\n#include \"fmgr.h\"\n\n#if PG_MAJORVERSION_NUM >= 16\n#include \"access/bufmask.h\"\n#include \"access/heapam_xlog.h\"\n#include \"access/htup_details.h\"\n#include \"access/neon_xlog.h\"\n#include \"access/rmgr.h\"\n#include \"access/visibilitymap.h\"\n#include \"access/xlog_internal.h\"\n#include \"access/xlogutils.h\"\n#include \"miscadmin.h\"\n#include \"storage/buf.h\"\n#include \"storage/bufmgr.h\"\n#include \"storage/bufpage.h\"\n#include \"storage/freespace.h\"\n#include \"neon_rmgr.h\"\n\nPG_MODULE_MAGIC;\nvoid\t\t_PG_init(void);\n\nstatic void neon_rm_redo(XLogReaderState *record);\nstatic void neon_rm_startup(void);\nstatic void neon_rm_cleanup(void);\nstatic void neon_rm_mask(char *pagedata, BlockNumber blkno);\n\nstatic void redo_neon_heap_insert(XLogReaderState *record);\nstatic void redo_neon_heap_delete(XLogReaderState *record);\nstatic void redo_neon_heap_update(XLogReaderState *record, bool hot_update);\nstatic void redo_neon_heap_lock(XLogReaderState *record);\nstatic void redo_neon_heap_multi_insert(XLogReaderState *record);\n\nconst static RmgrData NeonRmgr = {\n\t.rm_name = \"neon\",\n\t.rm_redo = neon_rm_redo,\n\t.rm_desc = neon_rm_desc,\n\t.rm_identify = neon_rm_identify,\n\t.rm_startup = neon_rm_startup,\n\t.rm_cleanup = neon_rm_cleanup,\n\t.rm_mask = neon_rm_mask,\n\t.rm_decode = neon_rm_decode,\n};\n\nvoid\n_PG_init(void)\n{\n\tif (!process_shared_preload_libraries_in_progress)\n\t\treturn;\n\n\tRegisterCustomRmgr(RM_NEON_ID, &NeonRmgr);\n}\n\nstatic void\nneon_rm_redo(XLogReaderState *record)\n{\n\tuint8\t\tinfo = XLogRecGetInfo(record) & ~XLR_INFO_MASK;\n\n\tswitch (info & XLOG_NEON_OPMASK)\n\t{\n\t\tcase XLOG_NEON_HEAP_INSERT:\n\t\t\tredo_neon_heap_insert(record);\n\t\t\tbreak;\n\t\tcase XLOG_NEON_HEAP_DELETE:\n\t\t\tredo_neon_heap_delete(record);\n\t\t\tbreak;\n\t\tcase XLOG_NEON_HEAP_UPDATE:\n\t\t\tredo_neon_heap_update(record, false);\n\t\t\tbreak;\n\t\tcase XLOG_NEON_HEAP_HOT_UPDATE:\n\t\t\tredo_neon_heap_update(record, true);\n\t\t\tbreak;\n\t\tcase XLOG_NEON_HEAP_LOCK:\n\t\t\tredo_neon_heap_lock(record);\n\t\t\tbreak;\n\t\tcase XLOG_NEON_HEAP_MULTI_INSERT:\n\t\t\tredo_neon_heap_multi_insert(record);\n\t\t\tbreak;\n\t\tdefault:\n\t\t\telog(PANIC, \"neon_rm_redo: unknown op code %u\", info);\n\t}\n}\n\nstatic void\nneon_rm_startup(void)\n{\n\t/* nothing to do here */\n}\n\nstatic void\nneon_rm_cleanup(void)\n{\n\t/* nothing to do here */\n}\n\nstatic void\nneon_rm_mask(char *pagedata, BlockNumber blkno)\n{\n\tPage\t\tpage = (Page) pagedata;\n\tOffsetNumber off;\n\n\tmask_page_lsn_and_checksum(page);\n\n\tmask_page_hint_bits(page);\n\tmask_unused_space(page);\n\n\tfor (off = 1; off <= PageGetMaxOffsetNumber(page); off++)\n\t{\n\t\tItemId\t\tiid = PageGetItemId(page, off);\n\t\tchar\t   *page_item;\n\n\t\tpage_item = (char *) (page + ItemIdGetOffset(iid));\n\n\t\tif (ItemIdIsNormal(iid))\n\t\t{\n\t\t\tHeapTupleHeader page_htup = (HeapTupleHeader) page_item;\n\n\t\t\t/*\n\t\t\t * If xmin of a tuple is not yet frozen, we should ignore\n\t\t\t * differences in hint bits, since they can be set without\n\t\t\t * emitting WAL.\n\t\t\t */\n\t\t\tif (!HeapTupleHeaderXminFrozen(page_htup))\n\t\t\t\tpage_htup->t_infomask &= ~HEAP_XACT_MASK;\n\t\t\telse\n\t\t\t{\n\t\t\t\t/* Still we need to mask xmax hint bits. */\n\t\t\t\tpage_htup->t_infomask &= ~HEAP_XMAX_INVALID;\n\t\t\t\tpage_htup->t_infomask &= ~HEAP_XMAX_COMMITTED;\n\t\t\t}\n\n\t\t\t/*\n\t\t\t * During replay, we set Command Id to FirstCommandId. Hence, mask\n\t\t\t * it. See heap_xlog_insert() for details.\n\t\t\t */\n\t\t\tpage_htup->t_choice.t_heap.t_field3.t_cid = MASK_MARKER;\n\n\t\t\t/*\n\t\t\t * For a speculative tuple, heap_insert() does not set ctid in the\n\t\t\t * caller-passed heap tuple itself, leaving the ctid field to\n\t\t\t * contain a speculative token value - a per-backend monotonically\n\t\t\t * increasing identifier. Besides, it does not WAL-log ctid under\n\t\t\t * any circumstances.\n\t\t\t *\n\t\t\t * During redo, heap_xlog_insert() sets t_ctid to current block\n\t\t\t * number and self offset number. It doesn't care about any\n\t\t\t * speculative insertions on the primary. Hence, we set t_ctid to\n\t\t\t * current block number and self offset number to ignore any\n\t\t\t * inconsistency.\n\t\t\t */\n\t\t\tif (HeapTupleHeaderIsSpeculative(page_htup))\n\t\t\t\tItemPointerSet(&page_htup->t_ctid, blkno, off);\n\n\t\t\t/*\n\t\t\t * NB: Not ignoring ctid changes due to the tuple having moved\n\t\t\t * (i.e. HeapTupleHeaderIndicatesMovedPartitions), because that's\n\t\t\t * important information that needs to be in-sync between primary\n\t\t\t * and standby, and thus is WAL logged.\n\t\t\t */\n\t\t}\n\n\t\t/*\n\t\t * Ignore any padding bytes after the tuple, when the length of the\n\t\t * item is not MAXALIGNed.\n\t\t */\n\t\tif (ItemIdHasStorage(iid))\n\t\t{\n\t\t\tint\t\t\tlen = ItemIdGetLength(iid);\n\t\t\tint\t\t\tpadlen = MAXALIGN(len) - len;\n\n\t\t\tif (padlen > 0)\n\t\t\t\tmemset(page_item + len, MASK_MARKER, padlen);\n\t\t}\n\t}\n}\n\n\n/*\n * COPIED FROM heapam.c\n * Given an \"infobits\" field from an XLog record, set the correct bits in the\n * given infomask and infomask2 for the tuple touched by the record.\n *\n * (This is the reverse of compute_infobits).\n */\nstatic void\nfix_infomask_from_infobits(uint8 infobits, uint16 *infomask, uint16 *infomask2)\n{\n\t*infomask &= ~(HEAP_XMAX_IS_MULTI | HEAP_XMAX_LOCK_ONLY |\n\t\t\t\t   HEAP_XMAX_KEYSHR_LOCK | HEAP_XMAX_EXCL_LOCK | HEAP_COMBOCID);\n\t*infomask2 &= ~HEAP_KEYS_UPDATED;\n\n\tif (infobits & XLHL_XMAX_IS_MULTI)\n\t\t*infomask |= HEAP_XMAX_IS_MULTI;\n\tif (infobits & XLHL_XMAX_LOCK_ONLY)\n\t\t*infomask |= HEAP_XMAX_LOCK_ONLY;\n\tif (infobits & XLHL_XMAX_EXCL_LOCK)\n\t\t*infomask |= HEAP_XMAX_EXCL_LOCK;\n\tif (infobits & XLHL_COMBOCID)\n\t\t*infomask |= HEAP_COMBOCID;\n\t/* note HEAP_XMAX_SHR_LOCK isn't considered here */\n\tif (infobits & XLHL_XMAX_KEYSHR_LOCK)\n\t\t*infomask |= HEAP_XMAX_KEYSHR_LOCK;\n\n\tif (infobits & XLHL_KEYS_UPDATED)\n\t\t*infomask2 |= HEAP_KEYS_UPDATED;\n}\n\nstatic void\nredo_neon_heap_insert(XLogReaderState *record)\n{\n\tXLogRecPtr\tlsn = record->EndRecPtr;\n\txl_neon_heap_insert *xlrec = (xl_neon_heap_insert *) XLogRecGetData(record);\n\tBuffer\t\tbuffer;\n\tPage\t\tpage;\n\tunion\n\t{\n\t\tHeapTupleHeaderData hdr;\n\t\tchar\t\tdata[MaxHeapTupleSize];\n\t}\t\t\ttbuf;\n\tHeapTupleHeader htup;\n\txl_neon_heap_header xlhdr;\n\tuint32\t\tnewlen;\n\tSize\t\tfreespace = 0;\n\tRelFileLocator target_locator;\n\tBlockNumber blkno;\n\tItemPointerData target_tid;\n\tXLogRedoAction action;\n\n\tXLogRecGetBlockTag(record, 0, &target_locator, NULL, &blkno);\n\tItemPointerSetBlockNumber(&target_tid, blkno);\n\tItemPointerSetOffsetNumber(&target_tid, xlrec->offnum);\n\n\t/*\n\t * The visibility map may need to be fixed even if the heap page is\n\t * already up-to-date.\n\t */\n\tif (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)\n\t{\n\t\tRelation\treln = CreateFakeRelcacheEntry(target_locator);\n\t\tBuffer\t\tvmbuffer = InvalidBuffer;\n\n\t\tvisibilitymap_pin(reln, blkno, &vmbuffer);\n\t\tvisibilitymap_clear(reln, blkno, vmbuffer, VISIBILITYMAP_VALID_BITS);\n\t\tReleaseBuffer(vmbuffer);\n\t\tFreeFakeRelcacheEntry(reln);\n\t}\n\n\t/*\n\t * If we inserted the first and only tuple on the page, re-initialize the\n\t * page from scratch.\n\t */\n\tif (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE)\n\t{\n\t\tbuffer = XLogInitBufferForRedo(record, 0);\n\t\tpage = BufferGetPage(buffer);\n\t\tPageInit(page, BufferGetPageSize(buffer), 0);\n\t\taction = BLK_NEEDS_REDO;\n\t}\n\telse\n\t\taction = XLogReadBufferForRedo(record, 0, &buffer);\n\tif (action == BLK_NEEDS_REDO)\n\t{\n\t\tSize\t\tdatalen;\n\t\tchar\t   *data;\n\n\t\tpage = BufferGetPage(buffer);\n\n\t\tif (PageGetMaxOffsetNumber(page) + 1 < xlrec->offnum)\n\t\t\telog(PANIC, \"neon_rm_redo: invalid max offset number\");\n\n\t\tdata = XLogRecGetBlockData(record, 0, &datalen);\n\n\t\tnewlen = datalen - SizeOfNeonHeapHeader;\n\t\tAssert(datalen > SizeOfNeonHeapHeader && newlen <= MaxHeapTupleSize);\n\t\tmemcpy((char *) &xlhdr, data, SizeOfNeonHeapHeader);\n\t\tdata += SizeOfNeonHeapHeader;\n\n\t\thtup = &tbuf.hdr;\n\t\tMemSet((char *) htup, 0, SizeofHeapTupleHeader);\n\t\t/* PG73FORMAT: get bitmap [+ padding] [+ oid] + data */\n\t\tmemcpy((char *) htup + SizeofHeapTupleHeader,\n\t\t\t   data,\n\t\t\t   newlen);\n\t\tnewlen += SizeofHeapTupleHeader;\n\t\thtup->t_infomask2 = xlhdr.t_infomask2;\n\t\thtup->t_infomask = xlhdr.t_infomask;\n\t\thtup->t_hoff = xlhdr.t_hoff;\n\t\tHeapTupleHeaderSetXmin(htup, XLogRecGetXid(record));\n\t\thtup->t_choice.t_heap.t_field3.t_cid = xlhdr.t_cid;\n\t\thtup->t_ctid = target_tid;\n\n\t\tif (PageAddItem(page, (Item) htup, newlen, xlrec->offnum,\n\t\t\t\t\t\ttrue, true) == InvalidOffsetNumber)\n\t\t\telog(PANIC, \"neon_rm_redo: failed to add tuple\");\n\n\t\tfreespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */\n\n\t\tPageSetLSN(page, lsn);\n\n\t\tif (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)\n\t\t\tPageClearAllVisible(page);\n\n\t\t/* XLH_INSERT_ALL_FROZEN_SET implies that all tuples are visible */\n\t\tif (xlrec->flags & XLH_INSERT_ALL_FROZEN_SET)\n\t\t\tPageSetAllVisible(page);\n\n\t\tMarkBufferDirty(buffer);\n\t}\n\tif (BufferIsValid(buffer))\n\t\tUnlockReleaseBuffer(buffer);\n\n\t/*\n\t * If the page is running low on free space, update the FSM as well.\n\t * Arbitrarily, our definition of \"low\" is less than 20%. We can't do much\n\t * better than that without knowing the fill-factor for the table.\n\t *\n\t * XXX: Don't do this if the page was restored from full page image. We\n\t * don't bother to update the FSM in that case, it doesn't need to be\n\t * totally accurate anyway.\n\t */\n\tif (action == BLK_NEEDS_REDO && freespace < BLCKSZ / 5)\n\t\tXLogRecordPageWithFreeSpace(target_locator, blkno, freespace);\n}\n\nstatic void\nredo_neon_heap_delete(XLogReaderState *record)\n{\n\tXLogRecPtr\tlsn = record->EndRecPtr;\n\txl_neon_heap_delete *xlrec = (xl_neon_heap_delete *) XLogRecGetData(record);\n\tBuffer\t\tbuffer;\n\tPage\t\tpage;\n\tItemId\t\tlp = NULL;\n\tHeapTupleHeader htup;\n\tBlockNumber blkno;\n\tRelFileLocator target_locator;\n\tItemPointerData target_tid;\n\n\tXLogRecGetBlockTag(record, 0, &target_locator, NULL, &blkno);\n\tItemPointerSetBlockNumber(&target_tid, blkno);\n\tItemPointerSetOffsetNumber(&target_tid, xlrec->offnum);\n\n\t/*\n\t * The visibility map may need to be fixed even if the heap page is\n\t * already up-to-date.\n\t */\n\tif (xlrec->flags & XLH_DELETE_ALL_VISIBLE_CLEARED)\n\t{\n\t\tRelation\treln = CreateFakeRelcacheEntry(target_locator);\n\t\tBuffer\t\tvmbuffer = InvalidBuffer;\n\n\t\tvisibilitymap_pin(reln, blkno, &vmbuffer);\n\t\tvisibilitymap_clear(reln, blkno, vmbuffer, VISIBILITYMAP_VALID_BITS);\n\t\tReleaseBuffer(vmbuffer);\n\t\tFreeFakeRelcacheEntry(reln);\n\t}\n\n\tif (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)\n\t{\n\t\tpage = BufferGetPage(buffer);\n\n\t\tif (PageGetMaxOffsetNumber(page) >= xlrec->offnum)\n\t\t\tlp = PageGetItemId(page, xlrec->offnum);\n\n\t\tif (PageGetMaxOffsetNumber(page) < xlrec->offnum || !ItemIdIsNormal(lp))\n\t\t\telog(PANIC, \"neon_rm_redo: invalid lp\");\n\n\t\thtup = (HeapTupleHeader) PageGetItem(page, lp);\n\n\t\thtup->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);\n\t\thtup->t_infomask2 &= ~HEAP_KEYS_UPDATED;\n\t\tHeapTupleHeaderClearHotUpdated(htup);\n\t\tfix_infomask_from_infobits(xlrec->infobits_set,\n\t\t\t\t\t\t\t\t   &htup->t_infomask, &htup->t_infomask2);\n\t\tif (!(xlrec->flags & XLH_DELETE_IS_SUPER))\n\t\t\tHeapTupleHeaderSetXmax(htup, xlrec->xmax);\n\t\telse\n\t\t\tHeapTupleHeaderSetXmin(htup, InvalidTransactionId);\n\t\thtup->t_choice.t_heap.t_field3.t_cid = xlrec->t_cid;\n\n\t\t/* Mark the page as a candidate for pruning */\n\t\tPageSetPrunable(page, XLogRecGetXid(record));\n\n\t\tif (xlrec->flags & XLH_DELETE_ALL_VISIBLE_CLEARED)\n\t\t\tPageClearAllVisible(page);\n\n\t\t/* Make sure t_ctid is set correctly */\n\t\tif (xlrec->flags & XLH_DELETE_IS_PARTITION_MOVE)\n\t\t\tHeapTupleHeaderSetMovedPartitions(htup);\n\t\telse\n\t\t\thtup->t_ctid = target_tid;\n\t\tPageSetLSN(page, lsn);\n\t\tMarkBufferDirty(buffer);\n\t}\n\tif (BufferIsValid(buffer))\n\t\tUnlockReleaseBuffer(buffer);\n}\n\nstatic void\nredo_neon_heap_update(XLogReaderState *record, bool hot_update)\n{\n\tXLogRecPtr\tlsn = record->EndRecPtr;\n\txl_neon_heap_update *xlrec = (xl_neon_heap_update *) XLogRecGetData(record);\n\tRelFileLocator rlocator;\n\tBlockNumber oldblk;\n\tBlockNumber newblk;\n\tItemPointerData newtid;\n\tBuffer\t\tobuffer,\n\t\t\t\tnbuffer;\n\tPage\t\tpage;\n\tOffsetNumber offnum;\n\tItemId\t\tlp = NULL;\n\tHeapTupleData oldtup;\n\tHeapTupleHeader htup;\n\tuint16\t\tprefixlen = 0,\n\t\t\t\tsuffixlen = 0;\n\tchar\t   *newp;\n\tunion\n\t{\n\t\tHeapTupleHeaderData hdr;\n\t\tchar\t\tdata[MaxHeapTupleSize];\n\t}\t\t\ttbuf;\n\txl_neon_heap_header xlhdr;\n\tuint32\t\tnewlen;\n\tSize\t\tfreespace = 0;\n\tXLogRedoAction oldaction;\n\tXLogRedoAction newaction;\n\n\t/* initialize to keep the compiler quiet */\n\toldtup.t_data = NULL;\n\toldtup.t_len = 0;\n\n\tXLogRecGetBlockTag(record, 0, &rlocator, NULL, &newblk);\n\tif (XLogRecGetBlockTagExtended(record, 1, NULL, NULL, &oldblk, NULL))\n\t{\n\t\t/* HOT updates are never done across pages */\n\t\tAssert(!hot_update);\n\t}\n\telse\n\t\toldblk = newblk;\n\n\tItemPointerSet(&newtid, newblk, xlrec->new_offnum);\n\n\t/*\n\t * The visibility map may need to be fixed even if the heap page is\n\t * already up-to-date.\n\t */\n\tif (xlrec->flags & XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED)\n\t{\n\t\tRelation\treln = CreateFakeRelcacheEntry(rlocator);\n\t\tBuffer\t\tvmbuffer = InvalidBuffer;\n\n\t\tvisibilitymap_pin(reln, oldblk, &vmbuffer);\n\t\tvisibilitymap_clear(reln, oldblk, vmbuffer, VISIBILITYMAP_VALID_BITS);\n\t\tReleaseBuffer(vmbuffer);\n\t\tFreeFakeRelcacheEntry(reln);\n\t}\n\n\t/*\n\t * In normal operation, it is important to lock the two pages in\n\t * page-number order, to avoid possible deadlocks against other update\n\t * operations going the other way.  However, during WAL replay there can\n\t * be no other update happening, so we don't need to worry about that. But\n\t * we *do* need to worry that we don't expose an inconsistent state to Hot\n\t * Standby queries --- so the original page can't be unlocked before we've\n\t * added the new tuple to the new page.\n\t */\n\n\t/* Deal with old tuple version */\n\toldaction = XLogReadBufferForRedo(record, (oldblk == newblk) ? 0 : 1,\n\t\t\t\t\t\t\t\t\t  &obuffer);\n\tif (oldaction == BLK_NEEDS_REDO)\n\t{\n\t\tpage = BufferGetPage(obuffer);\n\t\toffnum = xlrec->old_offnum;\n\t\tif (PageGetMaxOffsetNumber(page) >= offnum)\n\t\t\tlp = PageGetItemId(page, offnum);\n\n\t\tif (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp))\n\t\t\telog(PANIC, \"neon_rm_redo: invalid lp\");\n\n\t\thtup = (HeapTupleHeader) PageGetItem(page, lp);\n\n\t\toldtup.t_data = htup;\n\t\toldtup.t_len = ItemIdGetLength(lp);\n\n\t\thtup->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);\n\t\thtup->t_infomask2 &= ~HEAP_KEYS_UPDATED;\n\t\tif (hot_update)\n\t\t\tHeapTupleHeaderSetHotUpdated(htup);\n\t\telse\n\t\t\tHeapTupleHeaderClearHotUpdated(htup);\n\t\tfix_infomask_from_infobits(xlrec->old_infobits_set, &htup->t_infomask,\n\t\t\t\t\t\t\t\t   &htup->t_infomask2);\n\t\tHeapTupleHeaderSetXmax(htup, xlrec->old_xmax);\n\t\thtup->t_choice.t_heap.t_field3.t_cid = xlrec->t_cid;\n\t\t/* Set forward chain link in t_ctid */\n\t\thtup->t_ctid = newtid;\n\n\t\t/* Mark the page as a candidate for pruning */\n\t\tPageSetPrunable(page, XLogRecGetXid(record));\n\n\t\tif (xlrec->flags & XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED)\n\t\t\tPageClearAllVisible(page);\n\n\t\tPageSetLSN(page, lsn);\n\t\tMarkBufferDirty(obuffer);\n\t}\n\n\t/*\n\t * Read the page the new tuple goes into, if different from old.\n\t */\n\tif (oldblk == newblk)\n\t{\n\t\tnbuffer = obuffer;\n\t\tnewaction = oldaction;\n\t}\n\telse if (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE)\n\t{\n\t\tnbuffer = XLogInitBufferForRedo(record, 0);\n\t\tpage = (Page) BufferGetPage(nbuffer);\n\t\tPageInit(page, BufferGetPageSize(nbuffer), 0);\n\t\tnewaction = BLK_NEEDS_REDO;\n\t}\n\telse\n\t\tnewaction = XLogReadBufferForRedo(record, 0, &nbuffer);\n\n\t/*\n\t * The visibility map may need to be fixed even if the heap page is\n\t * already up-to-date.\n\t */\n\tif (xlrec->flags & XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED)\n\t{\n\t\tRelation\treln = CreateFakeRelcacheEntry(rlocator);\n\t\tBuffer\t\tvmbuffer = InvalidBuffer;\n\n\t\tvisibilitymap_pin(reln, newblk, &vmbuffer);\n\t\tvisibilitymap_clear(reln, newblk, vmbuffer, VISIBILITYMAP_VALID_BITS);\n\t\tReleaseBuffer(vmbuffer);\n\t\tFreeFakeRelcacheEntry(reln);\n\t}\n\n\t/* Deal with new tuple */\n\tif (newaction == BLK_NEEDS_REDO)\n\t{\n\t\tchar\t   *recdata;\n\t\tchar\t   *recdata_end;\n\t\tSize\t\tdatalen;\n\t\tSize\t\ttuplen;\n\n\t\trecdata = XLogRecGetBlockData(record, 0, &datalen);\n\t\trecdata_end = recdata + datalen;\n\n\t\tpage = BufferGetPage(nbuffer);\n\n\t\toffnum = xlrec->new_offnum;\n\t\tif (PageGetMaxOffsetNumber(page) + 1 < offnum)\n\t\t\telog(PANIC, \"neon_rm_redo: invalid max offset number\");\n\n\t\tif (xlrec->flags & XLH_UPDATE_PREFIX_FROM_OLD)\n\t\t{\n\t\t\tAssert(newblk == oldblk);\n\t\t\tmemcpy(&prefixlen, recdata, sizeof(uint16));\n\t\t\trecdata += sizeof(uint16);\n\t\t}\n\t\tif (xlrec->flags & XLH_UPDATE_SUFFIX_FROM_OLD)\n\t\t{\n\t\t\tAssert(newblk == oldblk);\n\t\t\tmemcpy(&suffixlen, recdata, sizeof(uint16));\n\t\t\trecdata += sizeof(uint16);\n\t\t}\n\n\t\tmemcpy((char *) &xlhdr, recdata, SizeOfNeonHeapHeader);\n\t\trecdata += SizeOfNeonHeapHeader;\n\n\t\ttuplen = recdata_end - recdata;\n\t\tAssert(tuplen <= MaxHeapTupleSize);\n\n\t\thtup = &tbuf.hdr;\n\t\tMemSet((char *) htup, 0, SizeofHeapTupleHeader);\n\n\t\t/*\n\t\t * Reconstruct the new tuple using the prefix and/or suffix from the\n\t\t * old tuple, and the data stored in the WAL record.\n\t\t */\n\t\tnewp = (char *) htup + SizeofHeapTupleHeader;\n\t\tif (prefixlen > 0)\n\t\t{\n\t\t\tint\t\t\tlen;\n\n\t\t\t/* copy bitmap [+ padding] [+ oid] from WAL record */\n\t\t\tlen = xlhdr.t_hoff - SizeofHeapTupleHeader;\n\t\t\tmemcpy(newp, recdata, len);\n\t\t\trecdata += len;\n\t\t\tnewp += len;\n\n\t\t\t/* copy prefix from old tuple */\n\t\t\tmemcpy(newp, (char *) oldtup.t_data + oldtup.t_data->t_hoff, prefixlen);\n\t\t\tnewp += prefixlen;\n\n\t\t\t/* copy new tuple data from WAL record */\n\t\t\tlen = tuplen - (xlhdr.t_hoff - SizeofHeapTupleHeader);\n\t\t\tmemcpy(newp, recdata, len);\n\t\t\trecdata += len;\n\t\t\tnewp += len;\n\t\t}\n\t\telse\n\t\t{\n\t\t\t/*\n\t\t\t * copy bitmap [+ padding] [+ oid] + data from record, all in one\n\t\t\t * go\n\t\t\t */\n\t\t\tmemcpy(newp, recdata, tuplen);\n\t\t\trecdata += tuplen;\n\t\t\tnewp += tuplen;\n\t\t}\n\t\tAssert(recdata == recdata_end);\n\n\t\t/* copy suffix from old tuple */\n\t\tif (suffixlen > 0)\n\t\t\tmemcpy(newp, (char *) oldtup.t_data + oldtup.t_len - suffixlen, suffixlen);\n\n\t\tnewlen = SizeofHeapTupleHeader + tuplen + prefixlen + suffixlen;\n\t\thtup->t_infomask2 = xlhdr.t_infomask2;\n\t\thtup->t_infomask = xlhdr.t_infomask;\n\t\thtup->t_hoff = xlhdr.t_hoff;\n\n\t\tHeapTupleHeaderSetXmin(htup, XLogRecGetXid(record));\n\t\thtup->t_choice.t_heap.t_field3.t_cid = xlhdr.t_cid;\n\t\tHeapTupleHeaderSetXmax(htup, xlrec->new_xmax);\n\t\t/* Make sure there is no forward chain link in t_ctid */\n\t\thtup->t_ctid = newtid;\n\n\t\toffnum = PageAddItem(page, (Item) htup, newlen, offnum, true, true);\n\t\tif (offnum == InvalidOffsetNumber)\n\t\t\telog(PANIC, \"neon_rm_redo: failed to add tuple\");\n\n\t\tif (xlrec->flags & XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED)\n\t\t\tPageClearAllVisible(page);\n\n\t\tfreespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */\n\n\t\tPageSetLSN(page, lsn);\n\t\tMarkBufferDirty(nbuffer);\n\t}\n\n\tif (BufferIsValid(nbuffer) && nbuffer != obuffer)\n\t\tUnlockReleaseBuffer(nbuffer);\n\tif (BufferIsValid(obuffer))\n\t\tUnlockReleaseBuffer(obuffer);\n\n\t/*\n\t * If the new page is running low on free space, update the FSM as well.\n\t * Arbitrarily, our definition of \"low\" is less than 20%. We can't do much\n\t * better than that without knowing the fill-factor for the table.\n\t *\n\t * However, don't update the FSM on HOT updates, because after crash\n\t * recovery, either the old or the new tuple will certainly be dead and\n\t * prunable. After pruning, the page will have roughly as much free space\n\t * as it did before the update, assuming the new tuple is about the same\n\t * size as the old one.\n\t *\n\t * XXX: Don't do this if the page was restored from full page image. We\n\t * don't bother to update the FSM in that case, it doesn't need to be\n\t * totally accurate anyway.\n\t */\n\tif (newaction == BLK_NEEDS_REDO && !hot_update && freespace < BLCKSZ / 5)\n\t\tXLogRecordPageWithFreeSpace(rlocator, newblk, freespace);\n}\n\nstatic void\nredo_neon_heap_lock(XLogReaderState *record)\n{\n\tXLogRecPtr\tlsn = record->EndRecPtr;\n\txl_neon_heap_lock *xlrec = (xl_neon_heap_lock *) XLogRecGetData(record);\n\tBuffer\t\tbuffer;\n\tPage\t\tpage;\n\tOffsetNumber offnum;\n\tItemId\t\tlp = NULL;\n\tHeapTupleHeader htup;\n\n\t/*\n\t * The visibility map may need to be fixed even if the heap page is\n\t * already up-to-date.\n\t */\n\tif (xlrec->flags & XLH_LOCK_ALL_FROZEN_CLEARED)\n\t{\n\t\tRelFileLocator rlocator;\n\t\tBuffer\t\tvmbuffer = InvalidBuffer;\n\t\tBlockNumber block;\n\t\tRelation\treln;\n\n\t\tXLogRecGetBlockTag(record, 0, &rlocator, NULL, &block);\n\t\treln = CreateFakeRelcacheEntry(rlocator);\n\n\t\tvisibilitymap_pin(reln, block, &vmbuffer);\n\t\tvisibilitymap_clear(reln, block, vmbuffer, VISIBILITYMAP_ALL_FROZEN);\n\n\t\tReleaseBuffer(vmbuffer);\n\t\tFreeFakeRelcacheEntry(reln);\n\t}\n\n\tif (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)\n\t{\n\t\tpage = (Page) BufferGetPage(buffer);\n\n\t\toffnum = xlrec->offnum;\n\t\tif (PageGetMaxOffsetNumber(page) >= offnum)\n\t\t\tlp = PageGetItemId(page, offnum);\n\n\t\tif (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp))\n\t\t\telog(PANIC, \"neon_rm_redo: invalid lp\");\n\n\t\thtup = (HeapTupleHeader) PageGetItem(page, lp);\n\n\t\thtup->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);\n\t\thtup->t_infomask2 &= ~HEAP_KEYS_UPDATED;\n\t\tfix_infomask_from_infobits(xlrec->infobits_set, &htup->t_infomask,\n\t\t\t\t\t\t\t\t   &htup->t_infomask2);\n\n\t\t/*\n\t\t * Clear relevant update flags, but only if the modified infomask says\n\t\t * there's no update.\n\t\t */\n\t\tif (HEAP_XMAX_IS_LOCKED_ONLY(htup->t_infomask))\n\t\t{\n\t\t\tHeapTupleHeaderClearHotUpdated(htup);\n\t\t\t/* Make sure there is no forward chain link in t_ctid */\n\t\t\tItemPointerSet(&htup->t_ctid,\n\t\t\t\t\t\t   BufferGetBlockNumber(buffer),\n\t\t\t\t\t\t   offnum);\n\t\t}\n\t\tHeapTupleHeaderSetXmax(htup, xlrec->xmax);\n\t\thtup->t_choice.t_heap.t_field3.t_cid = xlrec->t_cid;\n\t\tPageSetLSN(page, lsn);\n\t\tMarkBufferDirty(buffer);\n\t}\n\tif (BufferIsValid(buffer))\n\t\tUnlockReleaseBuffer(buffer);\n}\n\nstatic void\nredo_neon_heap_multi_insert(XLogReaderState *record)\n{\n\tXLogRecPtr\tlsn = record->EndRecPtr;\n\txl_neon_heap_multi_insert *xlrec;\n\tRelFileLocator rlocator;\n\tBlockNumber blkno;\n\tBuffer\t\tbuffer;\n\tPage\t\tpage;\n\tunion\n\t{\n\t\tHeapTupleHeaderData hdr;\n\t\tchar\t\tdata[MaxHeapTupleSize];\n\t}\t\t\ttbuf;\n\tHeapTupleHeader htup;\n\tuint32\t\tnewlen;\n\tSize\t\tfreespace = 0;\n\tint\t\t\ti;\n\tbool\t\tisinit = (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE) != 0;\n\tXLogRedoAction action;\n\n\t/*\n\t * Insertion doesn't overwrite MVCC data, so no conflict processing is\n\t * required.\n\t */\n\txlrec = (xl_neon_heap_multi_insert *) XLogRecGetData(record);\n\n\tXLogRecGetBlockTag(record, 0, &rlocator, NULL, &blkno);\n\n\t/* check that the mutually exclusive flags are not both set */\n\tAssert(!((xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED) &&\n\t\t\t (xlrec->flags & XLH_INSERT_ALL_FROZEN_SET)));\n\n\t/*\n\t * The visibility map may need to be fixed even if the heap page is\n\t * already up-to-date.\n\t */\n\tif (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)\n\t{\n\t\tRelation\treln = CreateFakeRelcacheEntry(rlocator);\n\t\tBuffer\t\tvmbuffer = InvalidBuffer;\n\n\t\tvisibilitymap_pin(reln, blkno, &vmbuffer);\n\t\tvisibilitymap_clear(reln, blkno, vmbuffer, VISIBILITYMAP_VALID_BITS);\n\t\tReleaseBuffer(vmbuffer);\n\t\tFreeFakeRelcacheEntry(reln);\n\t}\n\n\tif (isinit)\n\t{\n\t\tbuffer = XLogInitBufferForRedo(record, 0);\n\t\tpage = BufferGetPage(buffer);\n\t\tPageInit(page, BufferGetPageSize(buffer), 0);\n\t\taction = BLK_NEEDS_REDO;\n\t}\n\telse\n\t\taction = XLogReadBufferForRedo(record, 0, &buffer);\n\tif (action == BLK_NEEDS_REDO)\n\t{\n\t\tchar\t   *tupdata;\n\t\tchar\t   *endptr;\n\t\tSize\t\tlen;\n\n\t\t/* Tuples are stored as block data */\n\t\ttupdata = XLogRecGetBlockData(record, 0, &len);\n\t\tendptr = tupdata + len;\n\n\t\tpage = (Page) BufferGetPage(buffer);\n\n\t\tfor (i = 0; i < xlrec->ntuples; i++)\n\t\t{\n\t\t\tOffsetNumber offnum;\n\t\t\txl_neon_multi_insert_tuple *xlhdr;\n\n\t\t\t/*\n\t\t\t * If we're reinitializing the page, the tuples are stored in\n\t\t\t * order from FirstOffsetNumber. Otherwise there's an array of\n\t\t\t * offsets in the WAL record, and the tuples come after that.\n\t\t\t */\n\t\t\tif (isinit)\n\t\t\t\toffnum = FirstOffsetNumber + i;\n\t\t\telse\n\t\t\t\toffnum = xlrec->offsets[i];\n\t\t\tif (PageGetMaxOffsetNumber(page) + 1 < offnum)\n\t\t\t\telog(PANIC, \"neon_rm_redo: invalid max offset number\");\n\n\t\t\txlhdr = (xl_neon_multi_insert_tuple *) SHORTALIGN(tupdata);\n\t\t\ttupdata = ((char *) xlhdr) + SizeOfNeonMultiInsertTuple;\n\n\t\t\tnewlen = xlhdr->datalen;\n\t\t\tAssert(newlen <= MaxHeapTupleSize);\n\t\t\thtup = &tbuf.hdr;\n\t\t\tMemSet((char *) htup, 0, SizeofHeapTupleHeader);\n\t\t\t/* PG73FORMAT: get bitmap [+ padding] [+ oid] + data */\n\t\t\tmemcpy((char *) htup + SizeofHeapTupleHeader,\n\t\t\t\t   (char *) tupdata,\n\t\t\t\t   newlen);\n\t\t\ttupdata += newlen;\n\n\t\t\tnewlen += SizeofHeapTupleHeader;\n\t\t\thtup->t_infomask2 = xlhdr->t_infomask2;\n\t\t\thtup->t_infomask = xlhdr->t_infomask;\n\t\t\thtup->t_hoff = xlhdr->t_hoff;\n\t\t\tHeapTupleHeaderSetXmin(htup, XLogRecGetXid(record));\n\t\t\thtup->t_choice.t_heap.t_field3.t_cid = xlrec->t_cid;\n\t\t\tItemPointerSetBlockNumber(&htup->t_ctid, blkno);\n\t\t\tItemPointerSetOffsetNumber(&htup->t_ctid, offnum);\n\n\t\t\toffnum = PageAddItem(page, (Item) htup, newlen, offnum, true, true);\n\t\t\tif (offnum == InvalidOffsetNumber)\n\t\t\t\telog(PANIC, \"neon_rm_redo: failed to add tuple\");\n\t\t}\n\t\tif (tupdata != endptr)\n\t\t\telog(PANIC, \"neon_rm_redo: total tuple length mismatch\");\n\n\t\tfreespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */\n\n\t\tPageSetLSN(page, lsn);\n\n\t\tif (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)\n\t\t\tPageClearAllVisible(page);\n\n\t\t/* XLH_INSERT_ALL_FROZEN_SET implies that all tuples are visible */\n\t\tif (xlrec->flags & XLH_INSERT_ALL_FROZEN_SET)\n\t\t\tPageSetAllVisible(page);\n\n\t\tMarkBufferDirty(buffer);\n\t}\n\tif (BufferIsValid(buffer))\n\t\tUnlockReleaseBuffer(buffer);\n\n\t/*\n\t * If the page is running low on free space, update the FSM as well.\n\t * Arbitrarily, our definition of \"low\" is less than 20%. We can't do much\n\t * better than that without knowing the fill-factor for the table.\n\t *\n\t * XXX: Don't do this if the page was restored from full page image. We\n\t * don't bother to update the FSM in that case, it doesn't need to be\n\t * totally accurate anyway.\n\t */\n\tif (action == BLK_NEEDS_REDO && freespace < BLCKSZ / 5)\n\t\tXLogRecordPageWithFreeSpace(rlocator, blkno, freespace);\n}\n\n#else\n/* safeguard for older PostgreSQL versions */\nPG_MODULE_MAGIC;\n#endif\n"
  },
  {
    "path": "pgxn/neon_rmgr/neon_rmgr.control",
    "content": "# neon_rmgr extension\ncomment = 'Neon WAL Resource Manager - custom WAL records used to make Neon work (since PG 16)'\ndefault_version = '1.0'\nmodule_pathname = '$libdir/neon_rmgr'\n"
  },
  {
    "path": "pgxn/neon_rmgr/neon_rmgr.h",
    "content": "#ifndef NEON_RMGR_H\n#define NEON_RMGR_H\n#if PG_MAJORVERSION_NUM >= 16\n#include \"access/xlog_internal.h\"\n#include \"replication/decode.h\"\n#include \"replication/logical.h\"\n\nextern void neon_rm_desc(StringInfo buf, XLogReaderState *record);\nextern void neon_rm_decode(LogicalDecodingContext *ctx, XLogRecordBuffer *buf);\nextern const char *neon_rm_identify(uint8 info);\n\n#endif\n#endif //NEON_RMGR_H\n"
  },
  {
    "path": "pgxn/neon_rmgr/neon_rmgr_decode.c",
    "content": "#include \"postgres.h\"\n\n#if PG_MAJORVERSION_NUM >= 16\n\n#include \"access/heapam_xlog.h\"\n#include \"access/neon_xlog.h\"\n#include \"replication/decode.h\"\n#include \"replication/logical.h\"\n#include \"replication/snapbuild.h\"\n\n#include \"neon_rmgr.h\"\n\n#endif /* PG >= 16 */\n\n#if PG_MAJORVERSION_NUM == 16\n\n/* individual record(group)'s handlers */\nstatic void DecodeNeonInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf);\nstatic void DecodeNeonUpdate(LogicalDecodingContext *ctx, XLogRecordBuffer *buf);\nstatic void DecodeNeonDelete(LogicalDecodingContext *ctx, XLogRecordBuffer *buf);\nstatic void DecodeNeonMultiInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf);\n\n/* common function to decode tuples */\nstatic void DecodeXLogTuple(char *data, Size len, ReorderBufferTupleBuf *tuple);\n\n\nvoid\nneon_rm_decode(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)\n{\n\tuint8\t\tinfo = XLogRecGetInfo(buf->record) & XLOG_NEON_OPMASK;\n\tTransactionId xid = XLogRecGetXid(buf->record);\n\tSnapBuild  *builder = ctx->snapshot_builder;\n\n\tReorderBufferProcessXid(ctx->reorder, xid, buf->origptr);\n\n\t/*\n\t * If we don't have snapshot or we are just fast-forwarding, there is no\n\t * point in decoding data changes.\n\t */\n\tif (SnapBuildCurrentState(builder) < SNAPBUILD_FULL_SNAPSHOT ||\n\t\tctx->fast_forward)\n\t\treturn;\n\n\tswitch (info)\n\t{\n\t\tcase XLOG_NEON_HEAP_INSERT:\n\t\t\tif (SnapBuildProcessChange(builder, xid, buf->origptr))\n\t\t\t\tDecodeNeonInsert(ctx, buf);\n\t\t\tbreak;\n\t\tcase XLOG_NEON_HEAP_DELETE:\n\t\t\tif (SnapBuildProcessChange(builder, xid, buf->origptr))\n\t\t\t\tDecodeNeonDelete(ctx, buf);\n\t\t\tbreak;\n\t\tcase XLOG_NEON_HEAP_UPDATE:\n\t\tcase XLOG_NEON_HEAP_HOT_UPDATE:\n\t\t\tif (SnapBuildProcessChange(builder, xid, buf->origptr))\n\t\t\t\tDecodeNeonUpdate(ctx, buf);\n\t\t\tbreak;\n\t\tcase XLOG_NEON_HEAP_LOCK:\n\t\t\tbreak;\n\t\tcase XLOG_NEON_HEAP_MULTI_INSERT:\n\t\t\tif (SnapBuildProcessChange(builder, xid, buf->origptr))\n\t\t\t\tDecodeNeonMultiInsert(ctx, buf);\n\t\t\tbreak;\n\t\tdefault:\n\t\t\telog(ERROR, \"unexpected RM_HEAP_ID record type: %u\", info);\n\t\t\tbreak;\n\t}\n}\n\nstatic inline bool\nFilterByOrigin(LogicalDecodingContext *ctx, RepOriginId origin_id)\n{\n\tif (ctx->callbacks.filter_by_origin_cb == NULL)\n\t\treturn false;\n\n\treturn filter_by_origin_cb_wrapper(ctx, origin_id);\n}\n\n/*\n * Parse XLOG_HEAP_INSERT (not MULTI_INSERT!) records into tuplebufs.\n *\n * Deletes can contain the new tuple.\n */\nstatic void\nDecodeNeonInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)\n{\n\tSize\t\tdatalen;\n\tchar\t   *tupledata;\n\tSize\t\ttuplelen;\n\tXLogReaderState *r = buf->record;\n\txl_neon_heap_insert *xlrec;\n\tReorderBufferChange *change;\n\tRelFileLocator target_locator;\n\n\txlrec = (xl_neon_heap_insert *) XLogRecGetData(r);\n\n\t/*\n\t * Ignore insert records without new tuples (this does happen when\n\t * raw_heap_insert marks the TOAST record as HEAP_INSERT_NO_LOGICAL).\n\t */\n\tif (!(xlrec->flags & XLH_INSERT_CONTAINS_NEW_TUPLE))\n\t\treturn;\n\n\t/* only interested in our database */\n\tXLogRecGetBlockTag(r, 0, &target_locator, NULL, NULL);\n\tif (target_locator.dbOid != ctx->slot->data.database)\n\t\treturn;\n\n\t/* output plugin doesn't look for this origin, no need to queue */\n\tif (FilterByOrigin(ctx, XLogRecGetOrigin(r)))\n\t\treturn;\n\n\tchange = ReorderBufferGetChange(ctx->reorder);\n\tif (!(xlrec->flags & XLH_INSERT_IS_SPECULATIVE))\n\t\tchange->action = REORDER_BUFFER_CHANGE_INSERT;\n\telse\n\t\tchange->action = REORDER_BUFFER_CHANGE_INTERNAL_SPEC_INSERT;\n\tchange->origin_id = XLogRecGetOrigin(r);\n\n\tmemcpy(&change->data.tp.rlocator, &target_locator, sizeof(RelFileLocator));\n\n\ttupledata = XLogRecGetBlockData(r, 0, &datalen);\n\ttuplelen = datalen - SizeOfNeonHeapHeader;\n\n\tchange->data.tp.newtuple =\n\t\tReorderBufferGetTupleBuf(ctx->reorder, tuplelen);\n\n\tDecodeXLogTuple(tupledata, datalen, change->data.tp.newtuple);\n\n\tchange->data.tp.clear_toast_afterwards = true;\n\n\tReorderBufferQueueChange(ctx->reorder, XLogRecGetXid(r), buf->origptr,\n\t\t\t\t\t\t\t change,\n\t\t\t\t\t\t\t xlrec->flags & XLH_INSERT_ON_TOAST_RELATION);\n}\n\n/*\n * Parse XLOG_HEAP_DELETE from wal into proper tuplebufs.\n *\n * Deletes can possibly contain the old primary key.\n */\nstatic void\nDecodeNeonDelete(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)\n{\n\tXLogReaderState *r = buf->record;\n\txl_neon_heap_delete *xlrec;\n\tReorderBufferChange *change;\n\tRelFileLocator target_locator;\n\n\txlrec = (xl_neon_heap_delete *) XLogRecGetData(r);\n\n\t/* only interested in our database */\n\tXLogRecGetBlockTag(r, 0, &target_locator, NULL, NULL);\n\tif (target_locator.dbOid != ctx->slot->data.database)\n\t\treturn;\n\n\t/* output plugin doesn't look for this origin, no need to queue */\n\tif (FilterByOrigin(ctx, XLogRecGetOrigin(r)))\n\t\treturn;\n\n\tchange = ReorderBufferGetChange(ctx->reorder);\n\n\tif (xlrec->flags & XLH_DELETE_IS_SUPER)\n\t\tchange->action = REORDER_BUFFER_CHANGE_INTERNAL_SPEC_ABORT;\n\telse\n\t\tchange->action = REORDER_BUFFER_CHANGE_DELETE;\n\n\tchange->origin_id = XLogRecGetOrigin(r);\n\n\tmemcpy(&change->data.tp.rlocator, &target_locator, sizeof(RelFileLocator));\n\n\t/* old primary key stored */\n\tif (xlrec->flags & XLH_DELETE_CONTAINS_OLD)\n\t{\n\t\tSize\t\tdatalen = XLogRecGetDataLen(r) - SizeOfNeonHeapHeader;\n\t\tSize\t\ttuplelen = datalen - SizeOfNeonHeapHeader;\n\n\t\tAssert(XLogRecGetDataLen(r) > (SizeOfNeonHeapDelete + SizeOfNeonHeapHeader));\n\n\t\tchange->data.tp.oldtuple =\n\t\t\tReorderBufferGetTupleBuf(ctx->reorder, tuplelen);\n\n\t\tDecodeXLogTuple((char *) xlrec + SizeOfNeonHeapDelete,\n\t\t\t\t\t\tdatalen, change->data.tp.oldtuple);\n\t}\n\n\tchange->data.tp.clear_toast_afterwards = true;\n\n\tReorderBufferQueueChange(ctx->reorder, XLogRecGetXid(r), buf->origptr,\n\t\t\t\t\t\t\t change, false);\n}\n\n/*\n * Parse XLOG_HEAP_UPDATE and XLOG_HEAP_HOT_UPDATE, which have the same layout\n * in the record, from wal into proper tuplebufs.\n *\n * Updates can possibly contain a new tuple and the old primary key.\n */\nstatic void\nDecodeNeonUpdate(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)\n{\n\tXLogReaderState *r = buf->record;\n\txl_neon_heap_update *xlrec;\n\tReorderBufferChange *change;\n\tchar\t   *data;\n\tRelFileLocator target_locator;\n\n\txlrec = (xl_neon_heap_update *) XLogRecGetData(r);\n\n\t/* only interested in our database */\n\tXLogRecGetBlockTag(r, 0, &target_locator, NULL, NULL);\n\tif (target_locator.dbOid != ctx->slot->data.database)\n\t\treturn;\n\n\t/* output plugin doesn't look for this origin, no need to queue */\n\tif (FilterByOrigin(ctx, XLogRecGetOrigin(r)))\n\t\treturn;\n\n\tchange = ReorderBufferGetChange(ctx->reorder);\n\tchange->action = REORDER_BUFFER_CHANGE_UPDATE;\n\tchange->origin_id = XLogRecGetOrigin(r);\n\tmemcpy(&change->data.tp.rlocator, &target_locator, sizeof(RelFileLocator));\n\n\tif (xlrec->flags & XLH_UPDATE_CONTAINS_NEW_TUPLE)\n\t{\n\t\tSize\t\tdatalen;\n\t\tSize\t\ttuplelen;\n\n\t\tdata = XLogRecGetBlockData(r, 0, &datalen);\n\n\t\ttuplelen = datalen - SizeOfNeonHeapHeader;\n\n\t\tchange->data.tp.newtuple =\n\t\t\tReorderBufferGetTupleBuf(ctx->reorder, tuplelen);\n\n\t\tDecodeXLogTuple(data, datalen, change->data.tp.newtuple);\n\t}\n\n\tif (xlrec->flags & XLH_UPDATE_CONTAINS_OLD)\n\t{\n\t\tSize\t\tdatalen;\n\t\tSize\t\ttuplelen;\n\n\t\t/* caution, remaining data in record is not aligned */\n\t\tdata = XLogRecGetData(r) + SizeOfNeonHeapUpdate;\n\t\tdatalen = XLogRecGetDataLen(r) - SizeOfNeonHeapUpdate;\n\t\ttuplelen = datalen - SizeOfNeonHeapHeader;\n\n\t\tchange->data.tp.oldtuple =\n\t\t\tReorderBufferGetTupleBuf(ctx->reorder, tuplelen);\n\n\t\tDecodeXLogTuple(data, datalen, change->data.tp.oldtuple);\n\t}\n\n\tchange->data.tp.clear_toast_afterwards = true;\n\n\tReorderBufferQueueChange(ctx->reorder, XLogRecGetXid(r), buf->origptr,\n\t\t\t\t\t\t\t change, false);\n}\n\n/*\n * Decode XLOG_HEAP2_MULTI_INSERT_insert record into multiple tuplebufs.\n *\n * Currently MULTI_INSERT will always contain the full tuples.\n */\nstatic void\nDecodeNeonMultiInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)\n{\n\tXLogReaderState *r = buf->record;\n\txl_neon_heap_multi_insert *xlrec;\n\tint\t\t\ti;\n\tchar\t   *data;\n\tchar\t   *tupledata;\n\tSize\t\ttuplelen;\n\tRelFileLocator rlocator;\n\n\txlrec = (xl_neon_heap_multi_insert *) XLogRecGetData(r);\n\n\t/*\n\t * Ignore insert records without new tuples.  This happens when a\n\t * multi_insert is done on a catalog or on a non-persistent relation.\n\t */\n\tif (!(xlrec->flags & XLH_INSERT_CONTAINS_NEW_TUPLE))\n\t\treturn;\n\n\t/* only interested in our database */\n\tXLogRecGetBlockTag(r, 0, &rlocator, NULL, NULL);\n\tif (rlocator.dbOid != ctx->slot->data.database)\n\t\treturn;\n\n\t/* output plugin doesn't look for this origin, no need to queue */\n\tif (FilterByOrigin(ctx, XLogRecGetOrigin(r)))\n\t\treturn;\n\n\t/*\n\t * We know that this multi_insert isn't for a catalog, so the block should\n\t * always have data even if a full-page write of it is taken.\n\t */\n\ttupledata = XLogRecGetBlockData(r, 0, &tuplelen);\n\tAssert(tupledata != NULL);\n\n\tdata = tupledata;\n\tfor (i = 0; i < xlrec->ntuples; i++)\n\t{\n\t\tReorderBufferChange *change;\n\t\txl_neon_multi_insert_tuple *xlhdr;\n\t\tint\t\t\tdatalen;\n\t\tReorderBufferTupleBuf *tuple;\n\t\tHeapTupleHeader header;\n\n\t\tchange = ReorderBufferGetChange(ctx->reorder);\n\t\tchange->action = REORDER_BUFFER_CHANGE_INSERT;\n\t\tchange->origin_id = XLogRecGetOrigin(r);\n\n\t\tmemcpy(&change->data.tp.rlocator, &rlocator, sizeof(RelFileLocator));\n\n\t\txlhdr = (xl_neon_multi_insert_tuple *) SHORTALIGN(data);\n\t\tdata = ((char *) xlhdr) + SizeOfNeonMultiInsertTuple;\n\t\tdatalen = xlhdr->datalen;\n\n\t\tchange->data.tp.newtuple =\n\t\t\tReorderBufferGetTupleBuf(ctx->reorder, datalen);\n\n\t\ttuple = change->data.tp.newtuple;\n\t\theader = tuple->tuple.t_data;\n\n\t\t/* not a disk based tuple */\n\t\tItemPointerSetInvalid(&tuple->tuple.t_self);\n\n\t\t/*\n\t\t * We can only figure this out after reassembling the transactions.\n\t\t */\n\t\ttuple->tuple.t_tableOid = InvalidOid;\n\n\t\ttuple->tuple.t_len = datalen + SizeofHeapTupleHeader;\n\n\t\tmemset(header, 0, SizeofHeapTupleHeader);\n\n\t\tmemcpy((char *) tuple->tuple.t_data + SizeofHeapTupleHeader,\n\t\t\t   (char *) data,\n\t\t\t   datalen);\n\t\theader->t_infomask = xlhdr->t_infomask;\n\t\theader->t_infomask2 = xlhdr->t_infomask2;\n\t\theader->t_hoff = xlhdr->t_hoff;\n\n\t\t/*\n\t\t * Reset toast reassembly state only after the last row in the last\n\t\t * xl_multi_insert_tuple record emitted by one heap_multi_insert()\n\t\t * call.\n\t\t */\n\t\tif (xlrec->flags & XLH_INSERT_LAST_IN_MULTI &&\n\t\t\t(i + 1) == xlrec->ntuples)\n\t\t\tchange->data.tp.clear_toast_afterwards = true;\n\t\telse\n\t\t\tchange->data.tp.clear_toast_afterwards = false;\n\n\t\tReorderBufferQueueChange(ctx->reorder, XLogRecGetXid(r),\n\t\t\t\t\t\t\t\t buf->origptr, change, false);\n\n\t\t/* move to the next xl_neon_multi_insert_tuple entry */\n\t\tdata += datalen;\n\t}\n\tAssert(data == tupledata + tuplelen);\n}\n\n/*\n * Read a HeapTuple as WAL logged by heap_insert, heap_update and heap_delete\n * (but not by heap_multi_insert) into a tuplebuf.\n *\n * The size 'len' and the pointer 'data' in the record need to be\n * computed outside as they are record specific.\n */\nstatic void\nDecodeXLogTuple(char *data, Size len, ReorderBufferTupleBuf *tuple)\n{\n\txl_neon_heap_header xlhdr;\n\tint\t\t\tdatalen = len - SizeOfNeonHeapHeader;\n\tHeapTupleHeader header;\n\n\tAssert(datalen >= 0);\n\n\ttuple->tuple.t_len = datalen + SizeofHeapTupleHeader;\n\theader = tuple->tuple.t_data;\n\n\t/* not a disk based tuple */\n\tItemPointerSetInvalid(&tuple->tuple.t_self);\n\n\t/* we can only figure this out after reassembling the transactions */\n\ttuple->tuple.t_tableOid = InvalidOid;\n\n\t/* data is not stored aligned, copy to aligned storage */\n\tmemcpy((char *) &xlhdr,\n\t\t   data,\n\t\t   SizeOfNeonHeapHeader);\n\n\tmemset(header, 0, SizeofHeapTupleHeader);\n\n\tmemcpy(((char *) tuple->tuple.t_data) + SizeofHeapTupleHeader,\n\t\t   data + SizeOfNeonHeapHeader,\n\t\t   datalen);\n\n\theader->t_infomask = xlhdr.t_infomask;\n\theader->t_infomask2 = xlhdr.t_infomask2;\n\theader->t_hoff = xlhdr.t_hoff;\n}\n#endif\n\n#if PG_MAJORVERSION_NUM == 17\n\n/* individual record(group)'s handlers */\nstatic void DecodeNeonInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf);\nstatic void DecodeNeonUpdate(LogicalDecodingContext *ctx, XLogRecordBuffer *buf);\nstatic void DecodeNeonDelete(LogicalDecodingContext *ctx, XLogRecordBuffer *buf);\nstatic void DecodeNeonMultiInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf);\n\n/* common function to decode tuples */\nstatic void DecodeXLogTuple(char *data, Size len, HeapTuple tuple);\n\n\nvoid\nneon_rm_decode(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)\n{\n\tuint8\t\tinfo = XLogRecGetInfo(buf->record) & XLOG_NEON_OPMASK;\n\tTransactionId xid = XLogRecGetXid(buf->record);\n\tSnapBuild  *builder = ctx->snapshot_builder;\n\n\tReorderBufferProcessXid(ctx->reorder, xid, buf->origptr);\n\n\t/*\n\t * If we don't have snapshot or we are just fast-forwarding, there is no\n\t * point in decoding data changes.\n\t */\n\tif (SnapBuildCurrentState(builder) < SNAPBUILD_FULL_SNAPSHOT ||\n\t\tctx->fast_forward)\n\t\treturn;\n\n\tswitch (info)\n\t{\n\t\tcase XLOG_NEON_HEAP_INSERT:\n\t\t\tif (SnapBuildProcessChange(builder, xid, buf->origptr))\n\t\t\t\tDecodeNeonInsert(ctx, buf);\n\t\t\tbreak;\n\t\tcase XLOG_NEON_HEAP_DELETE:\n\t\t\tif (SnapBuildProcessChange(builder, xid, buf->origptr))\n\t\t\t\tDecodeNeonDelete(ctx, buf);\n\t\t\tbreak;\n\t\tcase XLOG_NEON_HEAP_UPDATE:\n\t\tcase XLOG_NEON_HEAP_HOT_UPDATE:\n\t\t\tif (SnapBuildProcessChange(builder, xid, buf->origptr))\n\t\t\t\tDecodeNeonUpdate(ctx, buf);\n\t\t\tbreak;\n\t\tcase XLOG_NEON_HEAP_LOCK:\n\t\t\tbreak;\n\t\tcase XLOG_NEON_HEAP_MULTI_INSERT:\n\t\t\tif (SnapBuildProcessChange(builder, xid, buf->origptr))\n\t\t\t\tDecodeNeonMultiInsert(ctx, buf);\n\t\t\tbreak;\n\t\tdefault:\n\t\t\telog(ERROR, \"unexpected RM_HEAP_ID record type: %u\", info);\n\t\t\tbreak;\n\t}\n}\n\nstatic inline bool\nFilterByOrigin(LogicalDecodingContext *ctx, RepOriginId origin_id)\n{\n\tif (ctx->callbacks.filter_by_origin_cb == NULL)\n\t\treturn false;\n\n\treturn filter_by_origin_cb_wrapper(ctx, origin_id);\n}\n\n/*\n * Parse XLOG_HEAP_INSERT (not MULTI_INSERT!) records into tuplebufs.\n *\n * Deletes can contain the new tuple.\n */\nstatic void\nDecodeNeonInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)\n{\n\tSize\t\tdatalen;\n\tchar\t   *tupledata;\n\tSize\t\ttuplelen;\n\tXLogReaderState *r = buf->record;\n\txl_neon_heap_insert *xlrec;\n\tReorderBufferChange *change;\n\tRelFileLocator target_locator;\n\n\txlrec = (xl_neon_heap_insert *) XLogRecGetData(r);\n\n\t/*\n\t * Ignore insert records without new tuples (this does happen when\n\t * raw_heap_insert marks the TOAST record as HEAP_INSERT_NO_LOGICAL).\n\t */\n\tif (!(xlrec->flags & XLH_INSERT_CONTAINS_NEW_TUPLE))\n\t\treturn;\n\n\t/* only interested in our database */\n\tXLogRecGetBlockTag(r, 0, &target_locator, NULL, NULL);\n\tif (target_locator.dbOid != ctx->slot->data.database)\n\t\treturn;\n\n\t/* output plugin doesn't look for this origin, no need to queue */\n\tif (FilterByOrigin(ctx, XLogRecGetOrigin(r)))\n\t\treturn;\n\n\tchange = ReorderBufferGetChange(ctx->reorder);\n\tif (!(xlrec->flags & XLH_INSERT_IS_SPECULATIVE))\n\t\tchange->action = REORDER_BUFFER_CHANGE_INSERT;\n\telse\n\t\tchange->action = REORDER_BUFFER_CHANGE_INTERNAL_SPEC_INSERT;\n\tchange->origin_id = XLogRecGetOrigin(r);\n\n\tmemcpy(&change->data.tp.rlocator, &target_locator, sizeof(RelFileLocator));\n\n\ttupledata = XLogRecGetBlockData(r, 0, &datalen);\n\ttuplelen = datalen - SizeOfHeapHeader;\n\n\tchange->data.tp.newtuple =\n\t\tReorderBufferGetTupleBuf(ctx->reorder, tuplelen);\n\n\tDecodeXLogTuple(tupledata, datalen, change->data.tp.newtuple);\n\n\tchange->data.tp.clear_toast_afterwards = true;\n\n\tReorderBufferQueueChange(ctx->reorder, XLogRecGetXid(r), buf->origptr,\n\t\t\t\t\t\t\t change,\n\t\t\t\t\t\t\t xlrec->flags & XLH_INSERT_ON_TOAST_RELATION);\n}\n\n/*\n * Parse XLOG_HEAP_DELETE from wal into proper tuplebufs.\n *\n * Deletes can possibly contain the old primary key.\n */\nstatic void\nDecodeNeonDelete(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)\n{\n\tXLogReaderState *r = buf->record;\n\txl_neon_heap_delete *xlrec;\n\tReorderBufferChange *change;\n\tRelFileLocator target_locator;\n\n\txlrec = (xl_neon_heap_delete *) XLogRecGetData(r);\n\n\t/* only interested in our database */\n\tXLogRecGetBlockTag(r, 0, &target_locator, NULL, NULL);\n\tif (target_locator.dbOid != ctx->slot->data.database)\n\t\treturn;\n\n\t/* output plugin doesn't look for this origin, no need to queue */\n\tif (FilterByOrigin(ctx, XLogRecGetOrigin(r)))\n\t\treturn;\n\n\tchange = ReorderBufferGetChange(ctx->reorder);\n\n\tif (xlrec->flags & XLH_DELETE_IS_SUPER)\n\t\tchange->action = REORDER_BUFFER_CHANGE_INTERNAL_SPEC_ABORT;\n\telse\n\t\tchange->action = REORDER_BUFFER_CHANGE_DELETE;\n\n\tchange->origin_id = XLogRecGetOrigin(r);\n\n\tmemcpy(&change->data.tp.rlocator, &target_locator, sizeof(RelFileLocator));\n\n\t/* old primary key stored */\n\tif (xlrec->flags & XLH_DELETE_CONTAINS_OLD)\n\t{\n\t\tSize\t\tdatalen = XLogRecGetDataLen(r) - SizeOfNeonHeapHeader;\n\t\tSize\t\ttuplelen = datalen - SizeOfNeonHeapHeader;\n\n\t\tAssert(XLogRecGetDataLen(r) > (SizeOfNeonHeapDelete + SizeOfNeonHeapHeader));\n\n\t\tchange->data.tp.oldtuple =\n\t\t\tReorderBufferGetTupleBuf(ctx->reorder, tuplelen);\n\n\t\tDecodeXLogTuple((char *) xlrec + SizeOfNeonHeapDelete,\n\t\t\t\t\t\tdatalen, change->data.tp.oldtuple);\n\t}\n\n\tchange->data.tp.clear_toast_afterwards = true;\n\n\tReorderBufferQueueChange(ctx->reorder, XLogRecGetXid(r), buf->origptr,\n\t\t\t\t\t\t\t change, false);\n}\n\n/*\n * Parse XLOG_HEAP_UPDATE and XLOG_HEAP_HOT_UPDATE, which have the same layout\n * in the record, from wal into proper tuplebufs.\n *\n * Updates can possibly contain a new tuple and the old primary key.\n */\nstatic void\nDecodeNeonUpdate(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)\n{\n\tXLogReaderState *r = buf->record;\n\txl_neon_heap_update *xlrec;\n\tReorderBufferChange *change;\n\tchar\t   *data;\n\tRelFileLocator target_locator;\n\n\txlrec = (xl_neon_heap_update *) XLogRecGetData(r);\n\n\t/* only interested in our database */\n\tXLogRecGetBlockTag(r, 0, &target_locator, NULL, NULL);\n\tif (target_locator.dbOid != ctx->slot->data.database)\n\t\treturn;\n\n\t/* output plugin doesn't look for this origin, no need to queue */\n\tif (FilterByOrigin(ctx, XLogRecGetOrigin(r)))\n\t\treturn;\n\n\tchange = ReorderBufferGetChange(ctx->reorder);\n\tchange->action = REORDER_BUFFER_CHANGE_UPDATE;\n\tchange->origin_id = XLogRecGetOrigin(r);\n\tmemcpy(&change->data.tp.rlocator, &target_locator, sizeof(RelFileLocator));\n\n\tif (xlrec->flags & XLH_UPDATE_CONTAINS_NEW_TUPLE)\n\t{\n\t\tSize\t\tdatalen;\n\t\tSize\t\ttuplelen;\n\n\t\tdata = XLogRecGetBlockData(r, 0, &datalen);\n\n\t\ttuplelen = datalen - SizeOfNeonHeapHeader;\n\n\t\tchange->data.tp.newtuple =\n\t\t\tReorderBufferGetTupleBuf(ctx->reorder, tuplelen);\n\n\t\tDecodeXLogTuple(data, datalen, change->data.tp.newtuple);\n\t}\n\n\tif (xlrec->flags & XLH_UPDATE_CONTAINS_OLD)\n\t{\n\t\tSize\t\tdatalen;\n\t\tSize\t\ttuplelen;\n\n\t\t/* caution, remaining data in record is not aligned */\n\t\tdata = XLogRecGetData(r) + SizeOfNeonHeapUpdate;\n\t\tdatalen = XLogRecGetDataLen(r) - SizeOfNeonHeapUpdate;\n\t\ttuplelen = datalen - SizeOfNeonHeapHeader;\n\n\t\tchange->data.tp.oldtuple =\n\t\t\tReorderBufferGetTupleBuf(ctx->reorder, tuplelen);\n\n\t\tDecodeXLogTuple(data, datalen, change->data.tp.oldtuple);\n\t}\n\n\tchange->data.tp.clear_toast_afterwards = true;\n\n\tReorderBufferQueueChange(ctx->reorder, XLogRecGetXid(r), buf->origptr,\n\t\t\t\t\t\t\t change, false);\n}\n\n/*\n * Decode XLOG_HEAP2_MULTI_INSERT_insert record into multiple tuplebufs.\n *\n * Currently MULTI_INSERT will always contain the full tuples.\n */\nstatic void\nDecodeNeonMultiInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)\n{\n\tXLogReaderState *r = buf->record;\n\txl_neon_heap_multi_insert *xlrec;\n\tint\t\t\ti;\n\tchar\t   *data;\n\tchar\t   *tupledata;\n\tSize\t\ttuplelen;\n\tRelFileLocator rlocator;\n\n\txlrec = (xl_neon_heap_multi_insert *) XLogRecGetData(r);\n\n\t/*\n\t * Ignore insert records without new tuples.  This happens when a\n\t * multi_insert is done on a catalog or on a non-persistent relation.\n\t */\n\tif (!(xlrec->flags & XLH_INSERT_CONTAINS_NEW_TUPLE))\n\t\treturn;\n\n\t/* only interested in our database */\n\tXLogRecGetBlockTag(r, 0, &rlocator, NULL, NULL);\n\tif (rlocator.dbOid != ctx->slot->data.database)\n\t\treturn;\n\n\t/* output plugin doesn't look for this origin, no need to queue */\n\tif (FilterByOrigin(ctx, XLogRecGetOrigin(r)))\n\t\treturn;\n\n\t/*\n\t * We know that this multi_insert isn't for a catalog, so the block should\n\t * always have data even if a full-page write of it is taken.\n\t */\n\ttupledata = XLogRecGetBlockData(r, 0, &tuplelen);\n\tAssert(tupledata != NULL);\n\n\tdata = tupledata;\n\tfor (i = 0; i < xlrec->ntuples; i++)\n\t{\n\t\tReorderBufferChange *change;\n\t\txl_neon_multi_insert_tuple *xlhdr;\n\t\tint\t\t\tdatalen;\n\t\tHeapTuple\ttuple;\n\t\tHeapTupleHeader header;\n\n\t\tchange = ReorderBufferGetChange(ctx->reorder);\n\t\tchange->action = REORDER_BUFFER_CHANGE_INSERT;\n\t\tchange->origin_id = XLogRecGetOrigin(r);\n\n\t\tmemcpy(&change->data.tp.rlocator, &rlocator, sizeof(RelFileLocator));\n\n\t\txlhdr = (xl_neon_multi_insert_tuple *) SHORTALIGN(data);\n\t\tdata = ((char *) xlhdr) + SizeOfNeonMultiInsertTuple;\n\t\tdatalen = xlhdr->datalen;\n\n\t\tchange->data.tp.newtuple =\n\t\t\tReorderBufferGetTupleBuf(ctx->reorder, datalen);\n\n\t\ttuple = change->data.tp.newtuple;\n\t\theader = tuple->t_data;\n\n\t\t/* not a disk based tuple */\n\t\tItemPointerSetInvalid(&tuple->t_self);\n\n\t\t/*\n\t\t * We can only figure this out after reassembling the transactions.\n\t\t */\n\t\ttuple->t_tableOid = InvalidOid;\n\n\t\ttuple->t_len = datalen + SizeofHeapTupleHeader;\n\n\t\tmemset(header, 0, SizeofHeapTupleHeader);\n\n\t\tmemcpy((char *) tuple->t_data + SizeofHeapTupleHeader,\n\t\t\t   (char *) data,\n\t\t\t   datalen);\n\t\theader->t_infomask = xlhdr->t_infomask;\n\t\theader->t_infomask2 = xlhdr->t_infomask2;\n\t\theader->t_hoff = xlhdr->t_hoff;\n\n\t\t/*\n\t\t * Reset toast reassembly state only after the last row in the last\n\t\t * xl_multi_insert_tuple record emitted by one heap_multi_insert()\n\t\t * call.\n\t\t */\n\t\tif (xlrec->flags & XLH_INSERT_LAST_IN_MULTI &&\n\t\t\t(i + 1) == xlrec->ntuples)\n\t\t\tchange->data.tp.clear_toast_afterwards = true;\n\t\telse\n\t\t\tchange->data.tp.clear_toast_afterwards = false;\n\n\t\tReorderBufferQueueChange(ctx->reorder, XLogRecGetXid(r),\n\t\t\t\t\t\t\t\t buf->origptr, change, false);\n\n\t\t/* move to the next xl_neon_multi_insert_tuple entry */\n\t\tdata += datalen;\n\t}\n\tAssert(data == tupledata + tuplelen);\n}\n\n/*\n * Read a HeapTuple as WAL logged by heap_insert, heap_update and heap_delete\n * (but not by heap_multi_insert) into a tuplebuf.\n *\n * The size 'len' and the pointer 'data' in the record need to be\n * computed outside as they are record specific.\n */\nstatic void\nDecodeXLogTuple(char *data, Size len, HeapTuple tuple)\n{\n\txl_neon_heap_header xlhdr;\n\tint\t\t\tdatalen = len - SizeOfNeonHeapHeader;\n\tHeapTupleHeader header;\n\n\tAssert(datalen >= 0);\n\n\ttuple->t_len = datalen + SizeofHeapTupleHeader;\n\theader = tuple->t_data;\n\n\t/* not a disk based tuple */\n\tItemPointerSetInvalid(&tuple->t_self);\n\n\t/* we can only figure this out after reassembling the transactions */\n\ttuple->t_tableOid = InvalidOid;\n\n\t/* data is not stored aligned, copy to aligned storage */\n\tmemcpy((char *) &xlhdr,\n\t\t   data,\n\t\t   SizeOfNeonHeapHeader);\n\n\tmemset(header, 0, SizeofHeapTupleHeader);\n\n\tmemcpy(((char *) tuple->t_data) + SizeofHeapTupleHeader,\n\t\t   data + SizeOfNeonHeapHeader,\n\t\t   datalen);\n\n\theader->t_infomask = xlhdr.t_infomask;\n\theader->t_infomask2 = xlhdr.t_infomask2;\n\theader->t_hoff = xlhdr.t_hoff;\n}\n#endif\n"
  },
  {
    "path": "pgxn/neon_rmgr/neon_rmgr_desc.c",
    "content": "#include \"postgres.h\"\n#if PG_MAJORVERSION_NUM >= 16\n#include \"access/heapam_xlog.h\"\n#include \"access/neon_xlog.h\"\n#include \"access/rmgr.h\"\n#include \"access/rmgrdesc_utils.h\"\n#include \"access/xlog_internal.h\"\n#include \"miscadmin.h\"\n#include \"storage/buf.h\"\n#include \"storage/bufpage.h\"\n\n#include \"neon_rmgr.h\"\n\n/*\n * NOTE: \"keyname\" argument cannot have trailing spaces or punctuation\n * characters\n */\nstatic void\ninfobits_desc(StringInfo buf, uint8 infobits, const char *keyname)\n{\n\tappendStringInfo(buf, \"%s: [\", keyname);\n\n\tAssert(buf->data[buf->len - 1] != ' ');\n\n\tif (infobits & XLHL_XMAX_IS_MULTI)\n\t\tappendStringInfoString(buf, \"IS_MULTI, \");\n\tif (infobits & XLHL_XMAX_LOCK_ONLY)\n\t\tappendStringInfoString(buf, \"LOCK_ONLY, \");\n\tif (infobits & XLHL_XMAX_EXCL_LOCK)\n\t\tappendStringInfoString(buf, \"EXCL_LOCK, \");\n\tif (infobits & XLHL_XMAX_KEYSHR_LOCK)\n\t\tappendStringInfoString(buf, \"KEYSHR_LOCK, \");\n\tif (infobits & XLHL_KEYS_UPDATED)\n\t\tappendStringInfoString(buf, \"KEYS_UPDATED, \");\n\n\tif (buf->data[buf->len - 1] == ' ')\n\t{\n\t\t/* Truncate-away final unneeded \", \"  */\n\t\tAssert(buf->data[buf->len - 2] == ',');\n\t\tbuf->len -= 2;\n\t\tbuf->data[buf->len] = '\\0';\n\t}\n\n\tappendStringInfoString(buf, \"]\");\n}\n\nvoid\nneon_rm_desc(StringInfo buf, XLogReaderState *record)\n{\n\tchar\t   *rec = XLogRecGetData(record);\n\tuint8\t\tinfo = XLogRecGetInfo(record) & ~XLR_INFO_MASK;\n\n\tinfo &= XLOG_NEON_OPMASK;\n\n\tif (info == XLOG_NEON_HEAP_INSERT)\n\t{\n\t\txl_neon_heap_insert *xlrec = (xl_neon_heap_insert *) rec;\n\n\t\tappendStringInfo(buf, \"off: %u, flags: 0x%02X\",\n\t\t\t\t\t\t xlrec->offnum,\n\t\t\t\t\t\t xlrec->flags);\n\t}\n\telse if (info == XLOG_NEON_HEAP_DELETE)\n\t{\n\t\txl_neon_heap_delete *xlrec = (xl_neon_heap_delete *) rec;\n\n\t\tappendStringInfo(buf, \"xmax: %u, off: %u, \",\n\t\t\t\t\t\t xlrec->xmax, xlrec->offnum);\n\t\tinfobits_desc(buf, xlrec->infobits_set, \"infobits\");\n\t\tappendStringInfo(buf, \", flags: 0x%02X\", xlrec->flags);\n\t}\n\telse if (info == XLOG_NEON_HEAP_UPDATE)\n\t{\n\t\txl_neon_heap_update *xlrec = (xl_neon_heap_update *) rec;\n\n\t\tappendStringInfo(buf, \"old_xmax: %u, old_off: %u, \",\n\t\t\t\t\t\t xlrec->old_xmax, xlrec->old_offnum);\n\t\tinfobits_desc(buf, xlrec->old_infobits_set, \"old_infobits\");\n\t\tappendStringInfo(buf, \", flags: 0x%02X, new_xmax: %u, new_off: %u\",\n\t\t\t\t\t\t xlrec->flags, xlrec->new_xmax, xlrec->new_offnum);\n\t}\n\telse if (info == XLOG_NEON_HEAP_HOT_UPDATE)\n\t{\n\t\txl_neon_heap_update *xlrec = (xl_neon_heap_update *) rec;\n\n\t\tappendStringInfo(buf, \"old_xmax: %u, old_off: %u, \",\n\t\t\t\t\t\t xlrec->old_xmax, xlrec->old_offnum);\n\t\tinfobits_desc(buf, xlrec->old_infobits_set, \"old_infobits\");\n\t\tappendStringInfo(buf, \", flags: 0x%02X, new_xmax: %u, new_off: %u\",\n\t\t\t\t\t\t xlrec->flags, xlrec->new_xmax, xlrec->new_offnum);\n\t}\n\telse if (info == XLOG_NEON_HEAP_LOCK)\n\t{\n\t\txl_neon_heap_lock *xlrec = (xl_neon_heap_lock *) rec;\n\n\t\tappendStringInfo(buf, \"xmax: %u, off: %u, \",\n\t\t\t\t\t\t xlrec->xmax, xlrec->offnum);\n\t\tinfobits_desc(buf, xlrec->infobits_set, \"infobits\");\n\t\tappendStringInfo(buf, \", flags: 0x%02X\", xlrec->flags);\n\t}\n\telse if (info == XLOG_NEON_HEAP_MULTI_INSERT)\n\t{\n\t\txl_neon_heap_multi_insert *xlrec = (xl_neon_heap_multi_insert *) rec;\n\t\tbool\t\tisinit = (XLogRecGetInfo(record) & XLOG_NEON_INIT_PAGE) != 0;\n\n\t\tappendStringInfo(buf, \"ntuples: %d, flags: 0x%02X\", xlrec->ntuples,\n\t\t\t\t\t\t xlrec->flags);\n\n\t\tif (XLogRecHasBlockData(record, 0) && !isinit)\n\t\t{\n\t\t\tappendStringInfoString(buf, \", offsets:\");\n\t\t\tarray_desc(buf, xlrec->offsets, sizeof(OffsetNumber),\n\t\t\t\t\t   xlrec->ntuples, &offset_elem_desc, NULL);\n\t\t}\n\t}\n}\n\nconst char *\nneon_rm_identify(uint8 info)\n{\n\tconst char *id = NULL;\n\n\tswitch (info & ~XLR_INFO_MASK)\n\t{\n\t\tcase XLOG_NEON_HEAP_INSERT:\n\t\t\tid = \"INSERT\";\n\t\t\tbreak;\n\t\tcase XLOG_NEON_HEAP_INSERT | XLOG_NEON_INIT_PAGE:\n\t\t\tid = \"INSERT+INIT\";\n\t\t\tbreak;\n\t\tcase XLOG_NEON_HEAP_DELETE:\n\t\t\tid = \"DELETE\";\n\t\t\tbreak;\n\t\tcase XLOG_NEON_HEAP_UPDATE:\n\t\t\tid = \"UPDATE\";\n\t\t\tbreak;\n\t\tcase XLOG_NEON_HEAP_UPDATE | XLOG_NEON_INIT_PAGE:\n\t\t\tid = \"UPDATE+INIT\";\n\t\t\tbreak;\n\t\tcase XLOG_NEON_HEAP_HOT_UPDATE:\n\t\t\tid = \"HOT_UPDATE\";\n\t\t\tbreak;\n\t\tcase XLOG_NEON_HEAP_HOT_UPDATE | XLOG_HEAP_INIT_PAGE:\n\t\t\tid = \"HOT_UPDATE+INIT\";\n\t\t\tbreak;\n\t\tcase XLOG_NEON_HEAP_LOCK:\n\t\t\tid = \"LOCK\";\n\t\t\tbreak;\n\t\tcase XLOG_NEON_HEAP_MULTI_INSERT:\n\t\t\tid = \"MULTI_INSERT\";\n\t\t\tbreak;\n\t\tcase XLOG_NEON_HEAP_MULTI_INSERT | XLOG_NEON_INIT_PAGE:\n\t\t\tid = \"MULTI_INSERT+INIT\";\n\t\t\tbreak;\n\t}\n\n\treturn id;\n}\n\n#endif\n"
  },
  {
    "path": "pgxn/neon_test_utils/Makefile",
    "content": "# pgxs/neon_test_utils/Makefile\n\n\nMODULE_big = neon_test_utils\nOBJS = \\\n\t$(WIN32RES) \\\n\tneontest.o\n\nEXTENSION = neon_test_utils\nDATA = neon_test_utils--1.3.sql\nPGFILEDESC = \"neon_test_utils - helpers for neon testing and debugging\"\n\nPG_CONFIG = pg_config\nPGXS := $(shell $(PG_CONFIG) --pgxs)\ninclude $(PGXS)\n"
  },
  {
    "path": "pgxn/neon_test_utils/neon_test_utils--1.3.sql",
    "content": "-- complain if script is sourced in psql, rather than via CREATE EXTENSION\n\\echo Use \"CREATE EXTENSION neon_test_utils\" to load this file. \\quit\n\nCREATE FUNCTION test_consume_xids(nxids int)\nRETURNS VOID\nAS 'MODULE_PATHNAME', 'test_consume_xids'\nLANGUAGE C STRICT\nPARALLEL UNSAFE;\n\nCREATE FUNCTION test_consume_oids(oid int)\nRETURNS VOID\nAS 'MODULE_PATHNAME', 'test_consume_oids'\nLANGUAGE C STRICT\nPARALLEL UNSAFE;\n\nCREATE FUNCTION test_consume_cpu(seconds int)\nRETURNS VOID\nAS 'MODULE_PATHNAME', 'test_consume_cpu'\nLANGUAGE C STRICT\nPARALLEL UNSAFE;\n\nCREATE FUNCTION test_consume_memory(megabytes int)\nRETURNS VOID\nAS 'MODULE_PATHNAME', 'test_consume_memory'\nLANGUAGE C STRICT\nPARALLEL UNSAFE;\n\nCREATE FUNCTION test_release_memory(megabytes int DEFAULT NULL)\nRETURNS VOID\nAS 'MODULE_PATHNAME', 'test_release_memory'\nLANGUAGE C\nPARALLEL UNSAFE;\n\nCREATE FUNCTION clear_buffer_cache()\nRETURNS VOID\nAS 'MODULE_PATHNAME', 'clear_buffer_cache'\nLANGUAGE C STRICT\nPARALLEL UNSAFE;\n\nCREATE FUNCTION get_raw_page_at_lsn(relname text, forkname text, blocknum int8, request_lsn pg_lsn, not_modified_since pg_lsn)\nRETURNS bytea\nAS 'MODULE_PATHNAME', 'get_raw_page_at_lsn'\nLANGUAGE C PARALLEL UNSAFE;\n\nCREATE FUNCTION get_raw_page_at_lsn(tbspc oid, db oid, relfilenode oid, forknum int8, blocknum int8, request_lsn pg_lsn, not_modified_since pg_lsn)\nRETURNS bytea\nAS 'MODULE_PATHNAME', 'get_raw_page_at_lsn_ex'\nLANGUAGE C PARALLEL UNSAFE;\n\nCREATE FUNCTION neon_xlogflush(lsn pg_lsn DEFAULT NULL)\nRETURNS VOID\nAS 'MODULE_PATHNAME', 'neon_xlogflush'\nLANGUAGE C PARALLEL UNSAFE;\n\nCREATE FUNCTION trigger_panic()\nRETURNS VOID\nAS 'MODULE_PATHNAME', 'trigger_panic'\nLANGUAGE C PARALLEL UNSAFE;\n\nCREATE FUNCTION trigger_segfault()\nRETURNS VOID\nAS 'MODULE_PATHNAME', 'trigger_segfault'\nLANGUAGE C PARALLEL UNSAFE;\n\n-- Alias for `trigger_segfault`, just because `SELECT 💣()` looks fun\nCREATE OR REPLACE FUNCTION 💣() RETURNS void\nLANGUAGE plpgsql AS $$\nBEGIN\n    PERFORM trigger_segfault();\nEND;\n$$;\n"
  },
  {
    "path": "pgxn/neon_test_utils/neon_test_utils.control",
    "content": "# neon_test_utils extension\ncomment = 'helpers for neon testing and debugging'\ndefault_version = '1.3'\nmodule_pathname = '$libdir/neon_test_utils'\nrelocatable = true\ntrusted = true\n"
  },
  {
    "path": "pgxn/neon_test_utils/neontest.c",
    "content": "/*-------------------------------------------------------------------------\n *\n * neontest.c\n *\t  Helpers for neon testing and debugging\n *\n * IDENTIFICATION\n *\t contrib/neon_test_utils/neontest.c\n *\n *-------------------------------------------------------------------------\n */\n#include \"postgres.h\"\n\n#include \"../neon/neon_pgversioncompat.h\"\n\n#include \"access/relation.h\"\n#include \"access/xact.h\"\n#include \"access/xlog.h\"\n#include \"access/xlog_internal.h\"\n#include \"catalog/namespace.h\"\n#include \"fmgr.h\"\n#include \"funcapi.h\"\n#include \"miscadmin.h\"\n#include \"storage/buf_internals.h\"\n#include \"storage/bufmgr.h\"\n#include \"storage/fd.h\"\n#include \"utils/builtins.h\"\n#include \"utils/pg_lsn.h\"\n#include \"utils/rel.h\"\n#include \"utils/varlena.h\"\n#include \"utils/wait_event.h\"\n#include \"../neon/pagestore_client.h\"\n\nPG_MODULE_MAGIC;\n\nextern void _PG_init(void);\n\nPG_FUNCTION_INFO_V1(test_consume_xids);\nPG_FUNCTION_INFO_V1(test_consume_oids);\nPG_FUNCTION_INFO_V1(test_consume_cpu);\nPG_FUNCTION_INFO_V1(test_consume_memory);\nPG_FUNCTION_INFO_V1(test_release_memory);\nPG_FUNCTION_INFO_V1(clear_buffer_cache);\nPG_FUNCTION_INFO_V1(get_raw_page_at_lsn);\nPG_FUNCTION_INFO_V1(get_raw_page_at_lsn_ex);\nPG_FUNCTION_INFO_V1(neon_xlogflush);\nPG_FUNCTION_INFO_V1(trigger_panic);\nPG_FUNCTION_INFO_V1(trigger_segfault);\n\n/*\n * Linkage to functions in neon module.\n * The signature here would need to be updated whenever function parameters change in pagestore_smgr.c\n */\ntypedef void (*neon_read_at_lsn_type) (NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,\n\t\t\t\t\t\t\t\t\t   neon_request_lsns request_lsns, void *buffer);\n\nstatic neon_read_at_lsn_type neon_read_at_lsn_ptr;\n\n/*\n * Module initialize function: fetch function pointers for cross-module calls.\n */\nvoid\n_PG_init(void)\n{\n\t/* Asserts verify that typedefs above match original declarations */\n\tAssertVariableIsOfType(&neon_read_at_lsn, neon_read_at_lsn_type);\n\tneon_read_at_lsn_ptr = (neon_read_at_lsn_type)\n\t\tload_external_function(\"$libdir/neon\", \"neon_read_at_lsn\",\n\t\t\t\t\t\t\t   true, NULL);\n}\n\n#define neon_read_at_lsn neon_read_at_lsn_ptr\n\n/*\n * test_consume_oids(int4), for rapidly consuming OIDs, to test wraparound.\n * Unlike test_consume_xids which is passed number of xids to be consumed,\n * this function is given the target Oid.\n */\nDatum\ntest_consume_oids(PG_FUNCTION_ARGS)\n{\n\tint32 oid = PG_GETARG_INT32(0);\n\n\twhile (oid != GetNewObjectId());\n\n\tPG_RETURN_VOID();\n}\n\n/*\n * test_consume_xids(int4), for rapidly consuming XIDs, to test wraparound.\n */\nDatum\ntest_consume_xids(PG_FUNCTION_ARGS)\n{\n\tint32\t\tnxids = PG_GETARG_INT32(0);\n\tTransactionId topxid;\n\tFullTransactionId fullxid;\n\tTransactionId xid;\n\tTransactionId targetxid;\n\n\t/* make sure we have a top-XID first */\n\ttopxid = GetTopTransactionId();\n\n\txid = ReadNextTransactionId();\n\n\ttargetxid = xid + nxids;\n\twhile (targetxid < FirstNormalTransactionId)\n\t\ttargetxid++;\n\n\twhile (TransactionIdPrecedes(xid, targetxid))\n\t{\n\t\tfullxid = GetNewTransactionId(true);\n\t\txid = XidFromFullTransactionId(fullxid);\n\t\telog(DEBUG1, \"topxid: %u xid: %u\", topxid, xid);\n\t}\n\n\tPG_RETURN_VOID();\n}\n\n\n/*\n * test_consume_cpu(seconds int). Keeps one CPU busy for the given number of seconds.\n */\nDatum\ntest_consume_cpu(PG_FUNCTION_ARGS)\n{\n\tint32\t\tseconds = PG_GETARG_INT32(0);\n\tTimestampTz start;\n\tuint64\t\ttotal_iterations = 0;\n\n\tstart = GetCurrentTimestamp();\n\n\tfor (;;)\n\t{\n\t\tTimestampTz elapsed;\n\n\t\telapsed = GetCurrentTimestamp() - start;\n\t\tif (elapsed > (TimestampTz) seconds * USECS_PER_SEC)\n\t\t\tbreak;\n\n\t\t/* keep spinning */\n\t\tfor (int i = 0; i < 1000000; i++)\n\t\t\ttotal_iterations++;\n\t\telog(DEBUG2, \"test_consume_cpu(): %lu iterations in total\", total_iterations);\n\n\t\tCHECK_FOR_INTERRUPTS();\n\t}\n\n\tPG_RETURN_VOID();\n}\n\nstatic MemoryContext consume_cxt = NULL;\nstatic slist_head consumed_memory_chunks;\nstatic int64 num_memory_chunks;\n\n/*\n * test_consume_memory(megabytes int).\n *\n * Consume given amount of memory. The allocation is made in TopMemoryContext,\n * so it outlives the function, until you call test_release_memory to\n * explicitly release it, or close the session.\n */\nDatum\ntest_consume_memory(PG_FUNCTION_ARGS)\n{\n\tint32\t\tmegabytes = PG_GETARG_INT32(0);\n\n\t/*\n\t * Consume the memory in a new memory context, so that it's convenient to\n\t * release and to display it separately in a possible memory context dump.\n\t */\n\tif (consume_cxt == NULL)\n\t\tconsume_cxt = AllocSetContextCreate(TopMemoryContext,\n\t\t\t\t\t\t\t\t\t\t\t\"test_consume_memory\",\n\t\t\t\t\t\t\t\t\t\t\tALLOCSET_DEFAULT_SIZES);\n\n\tfor (int32 i = 0; i < megabytes; i++)\n\t{\n\t\tchar\t   *p;\n\n\t\tp = MemoryContextAllocZero(consume_cxt, 1024 * 1024);\n\n\t\t/* touch the memory, so that it's really allocated by the kernel */\n\t\tfor (int j = 0; j < 1024 * 1024; j += 1024)\n\t\t\tp[j] = j % 0xFF;\n\n\t\tslist_push_head(&consumed_memory_chunks, (slist_node *) p);\n\t\tnum_memory_chunks++;\n\t}\n\n\tPG_RETURN_VOID();\n}\n\n/*\n * test_release_memory(megabytes int). NULL releases all\n */\nDatum\ntest_release_memory(PG_FUNCTION_ARGS)\n{\n\tif (PG_ARGISNULL(0))\n\t{\n\t\tif (consume_cxt)\n\t\t{\n\t\t\tMemoryContextDelete(consume_cxt);\n\t\t\tconsume_cxt = NULL;\n\t\t\tnum_memory_chunks = 0;\n\t\t}\n\t}\n\telse\n\t{\n\t\tint32\t\tchunks_to_release = PG_GETARG_INT32(0);\n\n\t\tif (chunks_to_release > num_memory_chunks)\n\t\t{\n\t\t\telog(WARNING, \"only %lu MB is consumed, releasing it all\", num_memory_chunks);\n\t\t\tchunks_to_release = num_memory_chunks;\n\t\t}\n\n\t\tfor (int32 i = 0; i < chunks_to_release; i++)\n\t\t{\n\t\t\tslist_node *chunk = slist_pop_head_node(&consumed_memory_chunks);\n\n\t\t\tpfree(chunk);\n\t\t\tnum_memory_chunks--;\n\t\t}\n\t}\n\n\tPG_RETURN_VOID();\n}\n\n/*\n * Flush the buffer cache, evicting all pages that are not currently pinned.\n */\nDatum\nclear_buffer_cache(PG_FUNCTION_ARGS)\n{\n\tbool\t\tsave_neon_test_evict;\n\n\t/*\n\t * Temporarily set the neon_test_evict GUC, so that when we pin and\n\t * unpin a buffer, the buffer is evicted. We use that hack to evict all\n\t * buffers, as there is no explicit \"evict this buffer\" function in the\n\t * buffer manager.\n\t */\n\tsave_neon_test_evict = neon_test_evict;\n\tneon_test_evict = true;\n\tPG_TRY();\n\t{\n\t\t/* Scan through all the buffers */\n\t\tfor (int i = 0; i < NBuffers; i++)\n\t\t{\n\t\t\tBufferDesc *bufHdr;\n\t\t\tuint32\t\tbuf_state;\n\t\t\tBuffer\t\tbufferid;\n\t\t\tbool\t\tisvalid;\n\t\t\tNRelFileInfo rinfo;\n\t\t\tForkNumber\tforknum;\n\t\t\tBlockNumber blocknum;\n\n\t\t\t/* Peek into the buffer header to see what page it holds. */\n\t\t\tbufHdr = GetBufferDescriptor(i);\n\t\t\tbuf_state = LockBufHdr(bufHdr);\n\n\t\t\tif ((buf_state & BM_VALID) && (buf_state & BM_TAG_VALID))\n\t\t\t\tisvalid = true;\n\t\t\telse\n\t\t\t\tisvalid = false;\n\t\t\tbufferid = BufferDescriptorGetBuffer(bufHdr);\n\t\t\trinfo = BufTagGetNRelFileInfo(bufHdr->tag);\n\t\t\tforknum = bufHdr->tag.forkNum;\n\t\t\tblocknum = bufHdr->tag.blockNum;\n\n\t\t\tUnlockBufHdr(bufHdr, buf_state);\n\n\t\t\t/*\n\t\t\t * Pin the buffer, and release it again. Because we have\n\t\t\t * neon_test_evict==true, this will evict the page from the\n\t\t\t * buffer cache if no one else is holding a pin on it.\n\t\t\t */\n\t\t\tif (isvalid)\n\t\t\t{\n\t\t\t\tif (ReadRecentBuffer(rinfo, forknum, blocknum, bufferid))\n\t\t\t\t\tReleaseBuffer(bufferid);\n\t\t\t}\n\t\t}\n\t}\n\tPG_FINALLY();\n\t{\n\t\t/* restore the GUC */\n\t\tneon_test_evict = save_neon_test_evict;\n\t}\n\tPG_END_TRY();\n\n\tPG_RETURN_VOID();\n}\n\n/*\n * Reads the page from page server without buffer cache\n * usage mimics get_raw_page() in pageinspect, but offers reading versions at specific LSN\n * NULL read lsn will result in reading the latest version.\n *\n * Note: reading latest version will result in waiting for latest changes to reach the page server,\n *       if this is undesirable, use pageinspect' get_raw_page that uses buffered access to the latest page\n */\nDatum\nget_raw_page_at_lsn(PG_FUNCTION_ARGS)\n{\n\tbytea\t   *raw_page;\n\tForkNumber\tforknum;\n\tRangeVar   *relrv;\n\tRelation\trel;\n\tchar\t   *raw_page_data;\n\ttext\t   *relname;\n\ttext\t   *forkname;\n\tuint32\t\tblkno;\n\tneon_request_lsns\trequest_lsns;\n\n\tif (PG_NARGS() != 5)\n\t\telog(ERROR, \"unexpected number of arguments in SQL function signature\");\n\n\tif (PG_ARGISNULL(0) || PG_ARGISNULL(1) || PG_ARGISNULL(2))\n\t\tPG_RETURN_NULL();\n\n\trelname = PG_GETARG_TEXT_PP(0);\n\tforkname = PG_GETARG_TEXT_PP(1);\n\tblkno = PG_GETARG_UINT32(2);\n\n\trequest_lsns.request_lsn = PG_ARGISNULL(3) ? GetXLogInsertRecPtr() : PG_GETARG_LSN(3);\n\trequest_lsns.not_modified_since = PG_ARGISNULL(4) ? request_lsns.request_lsn : PG_GETARG_LSN(4);\n\t/*\n\t * For the time being, use the same LSN for request and\n\t * effective request LSN. If any test needed to use UINT64_MAX\n\t * as the request LSN, we'd need to add effective_request_lsn\n\t * as a new argument.\n\t */\n\trequest_lsns.effective_request_lsn = request_lsns.request_lsn;\n\n\tif (!superuser())\n\t\tereport(ERROR,\n\t\t\t\t(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),\n\t\t\t\t errmsg(\"must be superuser to use raw page functions\")));\n\n\trelrv = makeRangeVarFromNameList(textToQualifiedNameList(relname));\n\trel = relation_openrv(relrv, AccessShareLock);\n\n\t/* Check that this relation has storage */\n\tif (rel->rd_rel->relkind == RELKIND_VIEW)\n\t\tereport(ERROR,\n\t\t\t\t(errcode(ERRCODE_WRONG_OBJECT_TYPE),\n\t\t\t\t errmsg(\"cannot get raw page from view \\\"%s\\\"\",\n\t\t\t\t\t\tRelationGetRelationName(rel))));\n\tif (rel->rd_rel->relkind == RELKIND_COMPOSITE_TYPE)\n\t\tereport(ERROR,\n\t\t\t\t(errcode(ERRCODE_WRONG_OBJECT_TYPE),\n\t\t\t\t errmsg(\"cannot get raw page from composite type \\\"%s\\\"\",\n\t\t\t\t\t\tRelationGetRelationName(rel))));\n\tif (rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)\n\t\tereport(ERROR,\n\t\t\t\t(errcode(ERRCODE_WRONG_OBJECT_TYPE),\n\t\t\t\t errmsg(\"cannot get raw page from foreign table \\\"%s\\\"\",\n\t\t\t\t\t\tRelationGetRelationName(rel))));\n\tif (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)\n\t\tereport(ERROR,\n\t\t\t\t(errcode(ERRCODE_WRONG_OBJECT_TYPE),\n\t\t\t\t errmsg(\"cannot get raw page from partitioned table \\\"%s\\\"\",\n\t\t\t\t\t\tRelationGetRelationName(rel))));\n\tif (rel->rd_rel->relkind == RELKIND_PARTITIONED_INDEX)\n\t\tereport(ERROR,\n\t\t\t\t(errcode(ERRCODE_WRONG_OBJECT_TYPE),\n\t\t\t\t errmsg(\"cannot get raw page from partitioned index \\\"%s\\\"\",\n\t\t\t\t\t\tRelationGetRelationName(rel))));\n\n\t/*\n\t * Reject attempts to read non-local temporary relations; we would be\n\t * likely to get wrong data since we have no visibility into the owning\n\t * session's local buffers.\n\t */\n\tif (RELATION_IS_OTHER_TEMP(rel))\n\t\tereport(ERROR,\n\t\t\t\t(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),\n\t\t\t\t errmsg(\"cannot access temporary tables of other sessions\")));\n\n\tforknum = forkname_to_number(text_to_cstring(forkname));\n\n\t/* Initialize buffer to copy to */\n\traw_page = (bytea *) palloc(BLCKSZ + VARHDRSZ);\n\tSET_VARSIZE(raw_page, BLCKSZ + VARHDRSZ);\n\traw_page_data = VARDATA(raw_page);\n\n\tneon_read_at_lsn(InfoFromRelation(rel), forknum, blkno, request_lsns,\n\t\t\t\t\t raw_page_data);\n\n\trelation_close(rel, AccessShareLock);\n\n\tPG_RETURN_BYTEA_P(raw_page);\n}\n\n/*\n * Another option to read a relation page from page server without cache\n * this version doesn't validate input and allows reading blocks of dropped relations\n *\n * Note: reading latest version will result in waiting for latest changes to reach the page server,\n *  if this is undesirable, use pageinspect' get_raw_page that uses buffered access to the latest page\n */\nDatum\nget_raw_page_at_lsn_ex(PG_FUNCTION_ARGS)\n{\n\tchar\t   *raw_page_data;\n\n\tif (PG_NARGS() != 7)\n\t\telog(ERROR, \"unexpected number of arguments in SQL function signature\");\n\n\tif (!superuser())\n\t\tereport(ERROR,\n\t\t\t\t(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),\n\t\t\t\t errmsg(\"must be superuser to use raw page functions\")));\n\n\tif (PG_ARGISNULL(0) || PG_ARGISNULL(1) || PG_ARGISNULL(2) ||\n\t\tPG_ARGISNULL(3) || PG_ARGISNULL(4))\n\t\tPG_RETURN_NULL();\n\n\t{\n\t\tNRelFileInfo rinfo = {\n#if PG_MAJORVERSION_NUM < 16\n\t\t\t.spcNode = PG_GETARG_OID(0),\n\t\t\t.dbNode = PG_GETARG_OID(1),\n\t\t\t.relNode = PG_GETARG_OID(2)\n#else\n\t\t\t.spcOid = PG_GETARG_OID(0),\n\t\t\t.dbOid = PG_GETARG_OID(1),\n\t\t\t.relNumber = PG_GETARG_OID(2)\n#endif\n\t\t};\n\n\t\tForkNumber\tforknum = PG_GETARG_UINT32(3);\n\t\tuint32\t\tblkno = PG_GETARG_UINT32(4);\n\t\tneon_request_lsns\trequest_lsns;\n\n\t\t/* Initialize buffer to copy to */\n\t\tbytea\t   *raw_page = (bytea *) palloc(BLCKSZ + VARHDRSZ);\n\n\t\trequest_lsns.request_lsn = PG_ARGISNULL(5) ? GetXLogInsertRecPtr() : PG_GETARG_LSN(5);\n\t\trequest_lsns.not_modified_since = PG_ARGISNULL(6) ? request_lsns.request_lsn : PG_GETARG_LSN(6);\n\t\t/*\n\t\t * For the time being, use the same LSN for request\n\t\t * and effective request LSN. If any test needed to\n\t\t * use UINT64_MAX as the request LSN, we'd need to add\n\t\t * effective_request_lsn as a new argument.\n\t\t */\n\t\trequest_lsns.effective_request_lsn = request_lsns.request_lsn;\n\n\t\tSET_VARSIZE(raw_page, BLCKSZ + VARHDRSZ);\n\t\traw_page_data = VARDATA(raw_page);\n\n\t\tneon_read_at_lsn(rinfo, forknum, blkno, request_lsns, raw_page_data);\n\t\tPG_RETURN_BYTEA_P(raw_page);\n\t}\n}\n\n/*\n * Directly calls XLogFlush(lsn) to flush WAL buffers.\n *\n * If 'lsn' is not specified (is NULL), flush all generated WAL.\n */\nDatum\nneon_xlogflush(PG_FUNCTION_ARGS)\n{\n\tXLogRecPtr\tlsn;\n\n\tif (RecoveryInProgress())\n\t\tereport(ERROR,\n\t\t\t\t(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),\n\t\t\t\t errmsg(\"recovery is in progress\"),\n\t\t\t\t errhint(\"cannot flush WAL during recovery.\")));\n\n\tif (!PG_ARGISNULL(0))\n\t\tlsn = PG_GETARG_LSN(0);\n\telse\n\t{\n\t\tlsn = GetXLogInsertRecPtr();\n\n\t\t/*---\n\t\t * The LSN returned by GetXLogInsertRecPtr() is the position where the\n\t\t * next inserted record would begin. If the last record ended just at\n\t\t * the page boundary, the next record will begin after the page header\n\t\t * on the next page, but the next page's page header has not been\n\t\t * written yet. If we tried to flush it, XLogFlush() would throw an\n\t\t * error:\n\t\t *\n\t\t * ERROR : xlog flush request %X/%X is not satisfied --- flushed only to %X/%X\n\t\t *\n\t\t * To avoid that, if the insert position points to just after the page\n\t\t * header, back off to page boundary.\n\t\t */\n\t\tif (lsn % XLOG_BLCKSZ == SizeOfXLogShortPHD &&\n\t\t\tXLogSegmentOffset(lsn, wal_segment_size) > XLOG_BLCKSZ)\n\t\t\tlsn -= SizeOfXLogShortPHD;\n\t\telse if (lsn % XLOG_BLCKSZ == SizeOfXLogLongPHD &&\n\t\t\t\t XLogSegmentOffset(lsn, wal_segment_size) < XLOG_BLCKSZ)\n\t\t\tlsn -= SizeOfXLogLongPHD;\n\t}\n\n\tXLogFlush(lsn);\n\tPG_RETURN_VOID();\n}\n\n/*\n * Function to trigger panic.\n */\nDatum\ntrigger_panic(PG_FUNCTION_ARGS)\n{\n    elog(PANIC, \"neon_test_utils: panic\");\n    PG_RETURN_VOID();\n}\n\n/*\n * Function to trigger a segfault.\n */\nDatum\ntrigger_segfault(PG_FUNCTION_ARGS)\n{\n    int *ptr = NULL;\n    *ptr = 42;\n    PG_RETURN_VOID();\n}\n"
  },
  {
    "path": "pgxn/neon_utils/Makefile",
    "content": "# pgxs/neon_utils/Makefile\n\n\nMODULE_big = neon_utils\nOBJS = \\\n\t$(WIN32RES) \\\n\tneon_utils.o\n\nEXTENSION = neon_utils\nDATA = neon_utils--1.0.sql\nPGFILEDESC = \"neon_utils - small useful functions\"\n\nPG_CONFIG = pg_config\nPGXS := $(shell $(PG_CONFIG) --pgxs)\ninclude $(PGXS)\n"
  },
  {
    "path": "pgxn/neon_utils/neon_utils--1.0.sql",
    "content": "CREATE FUNCTION num_cpus()\nRETURNS int\nAS 'MODULE_PATHNAME', 'num_cpus'\nLANGUAGE C STRICT\nPARALLEL UNSAFE\nVOLATILE;\n"
  },
  {
    "path": "pgxn/neon_utils/neon_utils.c",
    "content": "/*-------------------------------------------------------------------------\n *\n * neon_utils.c\n *\t  neon_utils - small useful functions\n *\n * IDENTIFICATION\n *\t contrib/neon_utils/neon_utils.c\n *\n *-------------------------------------------------------------------------\n */\n#ifdef _WIN32\n#include <windows.h>\n#else\n#include <unistd.h>\n#endif\n\n#include \"postgres.h\"\n#include \"fmgr.h\"\n\nPG_MODULE_MAGIC;\n\nPG_FUNCTION_INFO_V1(num_cpus);\n\nDatum\nnum_cpus(PG_FUNCTION_ARGS)\n{\n#ifdef _WIN32\n\tSYSTEM_INFO sysinfo;\n\tGetSystemInfo(&sysinfo);\n\tuint32 num_cpus = (uint32) sysinfo.dwNumberOfProcessors;\n#else\n\tuint32 num_cpus = (uint32) sysconf(_SC_NPROCESSORS_ONLN);\n#endif\n\tPG_RETURN_UINT32(num_cpus);\n}\n"
  },
  {
    "path": "pgxn/neon_utils/neon_utils.control",
    "content": "# neon_utils extension\ncomment = 'neon_utils - small useful functions'\ndefault_version = '1.0'\nmodule_pathname = '$libdir/neon_utils'\nrelocatable = true\ntrusted = true\n"
  },
  {
    "path": "pgxn/neon_walredo/Makefile",
    "content": "# pgxs/neon_walredo/Makefile\n\nMODULE_big = neon_walredo\nOBJS = \\\n\t$(WIN32RES) \\\n\tinmem_smgr.o \\\n\twalredoproc.o \\\n\n# This really should be guarded by $(with_libseccomp), but I couldn't\n# make that work with pgxs. So we always compile it, but its contents\n# are wrapped in #ifdef HAVE_LIBSECCOMP instead.\nOBJS += seccomp.o\n\nPGFILEDESC = \"neon_walredo - helper process that runs in Neon pageserver\"\n\nPG_CONFIG = pg_config\nPGXS := $(shell $(PG_CONFIG) --pgxs)\ninclude $(PGXS)\n\nifeq ($(with_libseccomp),yes)\nSHLIB_LINK += -lseccomp\nendif\n"
  },
  {
    "path": "pgxn/neon_walredo/inmem_smgr.c",
    "content": "/*-------------------------------------------------------------------------\n *\n * inmem_smgr.c\n *\n * This is an implementation of the SMGR interface, used in the WAL redo\n * process. It has no persistent storage, the pages that are written out\n * are kept in a small number of in-memory buffers.\n *\n * Normally, replaying a WAL record only needs to access a handful of\n * buffers, which fit in the normal buffer cache, so this is just for\n * \"overflow\" storage when the buffer cache is not large enough.\n *\n *\n * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group\n * Portions Copyright (c) 1994, Regents of the University of California\n *\n *-------------------------------------------------------------------------\n */\n#include \"postgres.h\"\n\n#include \"../neon/neon_pgversioncompat.h\"\n\n#include \"access/xlog.h\"\n#include \"storage/block.h\"\n#include \"storage/buf_internals.h\"\n#include RELFILEINFO_HDR\n#include \"storage/smgr.h\"\n\n#if PG_VERSION_NUM >= 150000\n#include \"access/xlogutils.h\"\n#endif\n\n#include \"inmem_smgr.h\"\n\n/* Size of the in-memory smgr: XLR_MAX_BLOCK_ID is 32, so assume that 64 will be enough */\n#define MAX_PAGES 64\n\n/* If more than WARN_PAGES are used, print a warning in the log */\n#define WARN_PAGES 32\n\nstatic BufferTag page_tag[MAX_PAGES];\nstatic char page_body[MAX_PAGES][BLCKSZ];\nstatic int\tused_pages;\n\nstatic int\nlocate_page(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno)\n{\n\tNRelFileInfo rinfo = InfoFromSMgrRel(reln);\n\n\t/* We only hold a small number of pages, so linear search */\n\tfor (int i = 0; i < used_pages; i++)\n\t{\n\t\tif (RelFileInfoEquals(rinfo, BufTagGetNRelFileInfo(page_tag[i]))\n\t\t\t&& forknum == page_tag[i].forkNum\n\t\t\t&& blkno == page_tag[i].blockNum)\n\t\t{\n\t\t\treturn i;\n\t\t}\n\t}\n\treturn -1;\n}\n\n\n/* neon wal-redo storage manager functionality */\nstatic void inmem_init(void);\nstatic void inmem_open(SMgrRelation reln);\nstatic void inmem_close(SMgrRelation reln, ForkNumber forknum);\nstatic void inmem_create(SMgrRelation reln, ForkNumber forknum, bool isRedo);\nstatic bool inmem_exists(SMgrRelation reln, ForkNumber forknum);\nstatic void inmem_unlink(NRelFileInfoBackend rinfo, ForkNumber forknum, bool isRedo);\n#if PG_MAJORVERSION_NUM >= 17\nstatic bool inmem_prefetch(SMgrRelation reln, ForkNumber forknum,\n\t\t\t\t\t\t   BlockNumber blocknum, int nblocks);\n#else\nstatic bool inmem_prefetch(SMgrRelation reln, ForkNumber forknum,\n\t\t\t\t\t\t   BlockNumber blocknum);\n#endif\n#if PG_MAJORVERSION_NUM < 16\nstatic void inmem_extend(SMgrRelation reln, ForkNumber forknum,\n\t\t\t\t\t\t BlockNumber blocknum, char *buffer, bool skipFsync);\nstatic void inmem_read(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,\n\t\t\t\t\t   char *buffer);\nstatic void inmem_write(SMgrRelation reln, ForkNumber forknum,\n\t\t\t\t\t\tBlockNumber blocknum, char *buffer, bool skipFsync);\n#else\nstatic void inmem_extend(SMgrRelation reln, ForkNumber forknum,\n\t\t\t\t\t\t BlockNumber blocknum, const void *buffer, bool skipFsync);\nstatic void inmem_zeroextend(SMgrRelation reln, ForkNumber forknum,\n\t\t\t\t\t\t\t BlockNumber blocknum, int nblocks, bool skipFsync);\nstatic void inmem_read(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,\n\t\t\t\t\t   void *buffer);\nstatic void inmem_write(SMgrRelation reln, ForkNumber forknum,\n\t\t\t\t\t\tBlockNumber blocknum, const void *buffer, bool skipFsync);\n#endif\nstatic void inmem_writeback(SMgrRelation reln, ForkNumber forknum,\n\t\t\t\t\t\t\tBlockNumber blocknum, BlockNumber nblocks);\nstatic BlockNumber inmem_nblocks(SMgrRelation reln, ForkNumber forknum);\nstatic void inmem_truncate(SMgrRelation reln, ForkNumber forknum,\n\t\t\t\t\t\t   BlockNumber old_blocks, BlockNumber nblocks);\nstatic void inmem_immedsync(SMgrRelation reln, ForkNumber forknum);\n#if PG_MAJORVERSION_NUM >= 17\nstatic void inmem_registersync(SMgrRelation reln, ForkNumber forknum);\n#endif\n\n/*\n *\tinmem_init() -- Initialize private state\n */\nstatic void\ninmem_init(void)\n{\n\tused_pages = 0;\n}\n\n/*\n *\tinmem_exists() -- Does the physical file exist?\n */\nstatic bool\ninmem_exists(SMgrRelation reln, ForkNumber forknum)\n{\n\tNRelFileInfo rinfo = InfoFromSMgrRel(reln);\n\n\tfor (int i = 0; i < used_pages; i++)\n\t{\n\t\tif (RelFileInfoEquals(rinfo, BufTagGetNRelFileInfo(page_tag[i]))\n\t\t\t&& forknum == page_tag[i].forkNum)\n\t\t{\n\t\t\treturn true;\n\t\t}\n\t}\n\treturn false;\n}\n\n/*\n *\tinmem_create() -- Create a new relation on neon storage\n *\n * If isRedo is true, it's okay for the relation to exist already.\n */\nstatic void\ninmem_create(SMgrRelation reln, ForkNumber forknum, bool isRedo)\n{\n}\n\n/*\n *\tinmem_unlink() -- Unlink a relation.\n */\nstatic void\ninmem_unlink(NRelFileInfoBackend rinfo, ForkNumber forknum, bool isRedo)\n{\n}\n\n/*\n *\tinmem_extend() -- Add a block to the specified relation.\n *\n *\t\tThe semantics are nearly the same as mdwrite(): write at the\n *\t\tspecified position.  However, this is to be used for the case of\n *\t\textending a relation (i.e., blocknum is at or beyond the current\n *\t\tEOF).  Note that we assume writing a block beyond current EOF\n *\t\tcauses intervening file space to become filled with zeroes.\n */\nstatic void\ninmem_extend(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno,\n#if PG_MAJORVERSION_NUM < 16\n\t\t\t char *buffer, bool skipFsync)\n#else\n\t\t\t const void *buffer, bool skipFsync)\n#endif\n{\n\t/* same as smgwrite() for us */\n\tinmem_write(reln, forknum, blkno, buffer, skipFsync);\n}\n\n#if PG_MAJORVERSION_NUM >= 16\nstatic void\ninmem_zeroextend(SMgrRelation reln, ForkNumber forknum,\n\t\t\t\t BlockNumber blocknum, int nblocks, bool skipFsync)\n{\n\t/* Do nothing: inmem_read will return zero page in any case */\n}\n#endif\n\n/*\n *  inmem_open() -- Initialize newly-opened relation.\n */\nstatic void\ninmem_open(SMgrRelation reln)\n{\n}\n\n/*\n *\tinmem_close() -- Close the specified relation, if it isn't closed already.\n */\nstatic void\ninmem_close(SMgrRelation reln, ForkNumber forknum)\n{\n}\n\n#if PG_MAJORVERSION_NUM >= 17\nstatic bool\ninmem_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,\n\t\t\t   int nblocks)\n{\n\treturn true;\n}\n#else\n/*\n *\tinmem_prefetch() -- Initiate asynchronous read of the specified block of a relation\n */\nstatic bool\ninmem_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)\n{\n\treturn true;\n}\n#endif\n\n/*\n * inmem_writeback() -- Tell the kernel to write pages back to storage.\n */\nstatic void\ninmem_writeback(SMgrRelation reln, ForkNumber forknum,\n\t\t\t\tBlockNumber blocknum, BlockNumber nblocks)\n{\n}\n\n/*\n *\tinmem_read() -- Read the specified block from a relation.\n */\n#if PG_MAJORVERSION_NUM < 16\nstatic void\ninmem_read(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno,\n\t\t   char *buffer)\n#else\nstatic void\ninmem_read(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno,\n\t\t   void *buffer)\n#endif\n{\n\tint\t\t\tpg;\n\n\tpg = locate_page(reln, forknum, blkno);\n\tif (pg < 0)\n\t\tmemset(buffer, 0, BLCKSZ);\n\telse\n\t\tmemcpy(buffer, page_body[pg], BLCKSZ);\n}\n\n#if PG_MAJORVERSION_NUM >= 17\nstatic void\ninmem_readv(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno,\n\t\t\tvoid **buffers, BlockNumber nblocks)\n{\n\tfor (int i = 0; i < nblocks; i++)\n\t{\n\t\tinmem_read(reln, forknum, blkno, buffers[i]);\n\t}\n}\n#endif\n\n/*\n *\tinmem_write() -- Write the supplied block at the appropriate location.\n *\n *\t\tThis is to be used only for updating already-existing blocks of a\n *\t\trelation (ie, those before the current EOF).  To extend a relation,\n *\t\tuse mdextend().\n */\nstatic void\ninmem_write(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,\n#if PG_MAJORVERSION_NUM < 16\n\t\t\tchar *buffer, bool skipFsync)\n#else\n\t\t\tconst void *buffer, bool skipFsync)\n#endif\n{\n\tint\t\t\tpg;\n\n\tpg = locate_page(reln, forknum, blocknum);\n\tif (pg < 0)\n\t{\n\t\t/*\n\t\t * We assume the buffer cache is large enough to hold all the buffers\n\t\t * needed for most operations. Overflowing to this \"in-mem smgr\" in\n\t\t * rare cases is OK. But if we find that we're using more than\n\t\t * WARN_PAGES, print a warning so that we get alerted and get to\n\t\t * investigate why we're accessing so many buffers.\n\t\t */\n\t\tif (used_pages >= WARN_PAGES)\n\t\t\tereport(WARNING, (errmsg(\"inmem_write() called for %u/%u/%u.%u blk %u: used_pages %u\",\n\t\t\t\t\t\t\t\t   RelFileInfoFmt(InfoFromSMgrRel(reln)),\n\t\t\t\t\t\t\t\t   forknum,\n\t\t\t\t\t\t\t\t   blocknum,\n\t\t\t\t\t\t\t\t   used_pages), errbacktrace()));\n\t\tif (used_pages == MAX_PAGES)\n\t\t\telog(ERROR, \"Inmem storage overflow\");\n\n\t\tpg = used_pages;\n\t\tused_pages++;\n\n\t\tInitBufferTag(&page_tag[pg], &InfoFromSMgrRel(reln), forknum, blocknum);\n\t}\n\telse\n\t{\n\t\telog(DEBUG1, \"inmem_write() called for %u/%u/%u.%u blk %u: found at %u\",\n\t\t\t RelFileInfoFmt(InfoFromSMgrRel(reln)),\n\t\t\t forknum,\n\t\t\t blocknum,\n\t\t\t used_pages);\n\t}\n\tmemcpy(page_body[pg], buffer, BLCKSZ);\n}\n\n#if PG_MAJORVERSION_NUM >= 17\nstatic void\ninmem_writev(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno,\n\t\t\t const void **buffers, BlockNumber nblocks, bool skipFsync)\n{\n\tfor (int i = 0; i < nblocks; i++)\n\t{\n\t\tinmem_write(reln, forknum, blkno, buffers[i], skipFsync);\n\t}\n}\n#endif\n\n/*\n *\tinmem_nblocks() -- Get the number of blocks stored in a relation.\n */\nstatic BlockNumber\ninmem_nblocks(SMgrRelation reln, ForkNumber forknum)\n{\n\t/*\n\t * It's not clear why a WAL redo function would call smgrnblocks(). During\n\t * recovery, at least before reaching consistency, the size of a relation\n\t * could be arbitrarily small, if it was truncated after the record being\n\t * replayed, or arbitrarily large if it was extended afterwards. But one\n\t * place where it's called is in XLogReadBufferExtended(): it extends the\n\t * relation, if it's smaller than the requested page. That's a waste of\n\t * time in the WAL redo process. Pretend that all relations are maximally\n\t * sized to avoid it.\n\t */\n\treturn MaxBlockNumber;\n}\n\n/*\n *\tinmem_truncate() -- Truncate relation to specified number of blocks.\n */\nstatic void\ninmem_truncate(SMgrRelation reln, ForkNumber forknum, BlockNumber old_blocks, BlockNumber nblocks)\n{\n}\n\n/*\n *\tinmem_immedsync() -- Immediately sync a relation to stable storage.\n */\nstatic void\ninmem_immedsync(SMgrRelation reln, ForkNumber forknum)\n{\n}\n\n#if PG_MAJORVERSION_NUM >= 17\nstatic void\ninmem_registersync(SMgrRelation reln, ForkNumber forknum)\n{\n}\n#endif\n\nstatic const struct f_smgr inmem_smgr =\n{\n\t.smgr_init = inmem_init,\n\t.smgr_shutdown = NULL,\n\t.smgr_open = inmem_open,\n\t.smgr_close = inmem_close,\n\t.smgr_create = inmem_create,\n\t.smgr_exists = inmem_exists,\n\t.smgr_unlink = inmem_unlink,\n\t.smgr_extend = inmem_extend,\n#if PG_MAJORVERSION_NUM >= 16\n\t.smgr_zeroextend = inmem_zeroextend,\n#endif\n#if PG_MAJORVERSION_NUM >= 17\n\t.smgr_prefetch = inmem_prefetch,\n\t.smgr_readv = inmem_readv,\n\t.smgr_writev = inmem_writev,\n#else\n\t.smgr_prefetch = inmem_prefetch,\n\t.smgr_read = inmem_read,\n\t.smgr_write = inmem_write,\n#endif\n\t.smgr_writeback = inmem_writeback,\n\t.smgr_nblocks = inmem_nblocks,\n\t.smgr_truncate = inmem_truncate,\n\t.smgr_immedsync = inmem_immedsync,\n\n#if PG_MAJORVERSION_NUM >= 17\n\t.smgr_registersync = inmem_registersync,\n#endif\n\n\t.smgr_start_unlogged_build = NULL,\n\t.smgr_finish_unlogged_build_phase_1 = NULL,\n\t.smgr_end_unlogged_build = NULL,\n\t.smgr_read_slru_segment = NULL,\n};\n\nconst f_smgr *\nsmgr_inmem(ProcNumber backend, NRelFileInfo rinfo)\n{\n\tAssert(InRecovery);\n\t// // What does this code do?\n\t// if (backend != INVALID_PROC_NUMBER)\n\t// \treturn smgr_standard(backend, rinfo);\n\t// else\n\treturn &inmem_smgr;\n}\n\nvoid\nsmgr_init_inmem()\n{\n\tinmem_init();\n}\n"
  },
  {
    "path": "pgxn/neon_walredo/inmem_smgr.h",
    "content": "/*-------------------------------------------------------------------------\n *\n * inmem_smgr.h\n *\n *\n * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group\n * Portions Copyright (c) 1994, Regents of the University of California\n *\n *-------------------------------------------------------------------------\n */\n#ifndef INMEM_SMGR_H\n#define INMEM_SMGR_H\n\nextern const f_smgr *smgr_inmem(ProcNumber backend, NRelFileInfo rinfo);\nextern void smgr_init_inmem(void);\n\n#endif /* INMEM_SMGR_H */\n"
  },
  {
    "path": "pgxn/neon_walredo/neon_seccomp.h",
    "content": "#ifndef NEON_SECCOMP_H\n#define NEON_SECCOMP_H\n\n#include <seccomp.h>\n\ntypedef struct {\n    int    psr_syscall; /* syscall number */\n    uint32 psr_action;  /* libseccomp action, e.g. SCMP_ACT_ALLOW */\n} PgSeccompRule;\n\n#define PG_SCMP(syscall, action)                \\\n    (PgSeccompRule) {                           \\\n        .psr_syscall = SCMP_SYS(syscall),       \\\n        .psr_action = (action),                 \\\n    }\n\n#define PG_SCMP_ALLOW(syscall) \\\n    PG_SCMP(syscall, SCMP_ACT_ALLOW)\n\nextern void seccomp_load_rules(PgSeccompRule *syscalls, int count);\n\n#endif /* NEON_SECCOMP_H */\n"
  },
  {
    "path": "pgxn/neon_walredo/seccomp.c",
    "content": "/*-------------------------------------------------------------------------\n *\n * seccomp.c\n *\t  Secure Computing BPF API wrapper.\n *\n * Pageserver delegates complex WAL decoding duties to postgres,\n * which means that the latter might fall victim to carefully designed\n * malicious WAL records and start doing harmful things to the system.\n * To prevent this, it has been decided to limit possible interactions\n * with the outside world using the Secure Computing BPF mode.\n *\n * This code is intended to support both x86_64 and aarch64. The latter\n * doesn't implement some syscalls like open and select. We allow both\n * select (absent on aarch64) and pselect6 (present on both architectures)\n * We call select(2) through libc, and the libc wrapper calls select or pselect6\n * depending on the architecture. You can check which syscalls are present on\n * different architectures with the `scmp_sys_resolver` tool from the\n * seccomp package.\n *\n * We use this mode to disable all syscalls not in the allowlist. This\n * approach has its pros & cons:\n *\n *  - We have to carefully handpick and maintain the set of syscalls\n *    required for the WAL redo process. Core dumps help with that.\n *    The method of trial and error seems to work reasonably well,\n *    but it would be nice to find a proper way to \"prove\" that\n *    the set in question is both necessary and sufficient.\n *\n *  - Once we enter the seccomp bpf mode, it's impossible to lift those\n *    restrictions (otherwise, what kind of \"protection\" would that be?).\n *    Thus, we have to either enable extra syscalls for the clean shutdown,\n *    or exit the process immediately via _exit() instead of proc_exit().\n *\n *  - Should we simply use SCMP_ACT_KILL_PROCESS, or implement a custom\n *    facility to deal with the forbidden syscalls? If we'd like to embed\n *    a startup security test, we should go with the latter; In that\n *    case, which one of the following options is preferable?\n *\n *      * Catch the denied syscalls with a signal handler using SCMP_ACT_TRAP.\n *        Provide a common signal handler with a static switch to override\n *        its behavior for the test case. This would undermine the whole\n *        purpose of such protection, so we'd have to go further and remap\n *        the memory backing the switch as readonly, then ban mprotect().\n *        Ugly and fragile, to say the least.\n *\n *      * Yet again, catch the denied syscalls using SCMP_ACT_TRAP.\n *        Provide 2 different signal handlers: one for a test case,\n *        another for the main processing loop. Install the first one,\n *        enable seccomp, perform the test, switch to the second one,\n *        finally ban sigaction(), presto!\n *\n *      * Spoof the result of a syscall using SECCOMP_RET_ERRNO for the\n *        test, then ban it altogether with another filter. The downside\n *        of this solution is that we don't actually check that\n *        SCMP_ACT_KILL_PROCESS/SCMP_ACT_TRAP works.\n *\n *    Either approach seems to require two eBPF filter programs,\n *    which is unfortunate: the man page tells this is uncommon.\n *    Maybe I (@funbringer) am missing something, though; I encourage\n *    any reader to get familiar with it and scrutinize my conclusions.\n *\n * TODOs and ideas in no particular order:\n *\n *  - Do something about mmap() in musl's malloc().\n *    Definitely not a priority if we don't care about musl.\n *\n *  - See if we can untangle PG's shutdown sequence (involving unlink()):\n *\n *      * Simplify (or rather get rid of) shmem setup in PG's WAL redo mode.\n *      * Investigate chroot() or mount namespaces for better FS isolation.\n *      * (Per Heikki) Simply call _exit(), no big deal.\n *      * Come up with a better idea?\n *\n *  - Make use of seccomp's argument inspection (for what?).\n *    Unfortunately, it views all syscall arguments as scalars,\n *    so it won't work for e.g. string comparison in unlink().\n *\n *  - Benchmark with bpf jit on/off, try seccomp_syscall_priority().\n *\n *  - Test against various linux distros & glibc versions.\n *    I suspect that certain libc functions might involve slightly\n *    different syscalls, e.g. select/pselect6/pselect6_time64/whatever.\n *\n *-------------------------------------------------------------------------\n */\n\n#include \"postgres.h\"\n\n/*\n * I couldn't find a good way to do a conditional OBJS += seccomp.o in\n * the Makefile, so this file is compiled even when seccomp is disabled,\n * it's just empty in that case.\n */\n#ifdef HAVE_LIBSECCOMP\n\n#include <fcntl.h>\n#include <unistd.h>\n\n#include \"miscadmin.h\"\n\n#include \"neon_seccomp.h\"\n\nstatic void die(int code, const char *str);\n\nstatic bool seccomp_test_sighandler_done = false;\nstatic void seccomp_test_sighandler(int signum, siginfo_t *info, void *cxt);\nstatic void seccomp_deny_sighandler(int signum, siginfo_t *info, void *cxt);\n\nstatic int do_seccomp_load_rules(PgSeccompRule *rules, int count, uint32 def_action);\n\nvoid\nseccomp_load_rules(PgSeccompRule *rules, int count)\n{\n\tstruct sigaction action = { .sa_flags = SA_SIGINFO };\n\tPgSeccompRule rule;\n\tlong fd;\n\n\t/*\n\t * Install a test signal handler.\n\t * XXX: pqsignal() is too restrictive for our purposes,\n\t * since we'd like to examine the contents of siginfo_t.\n\t */\n\taction.sa_sigaction = seccomp_test_sighandler;\n\tif (sigaction(SIGSYS, &action, NULL) != 0)\n\t\tereport(FATAL,\n\t\t\t\t(errcode(ERRCODE_SYSTEM_ERROR),\n\t\t\t\t errmsg(\"seccomp: could not install test SIGSYS handler\")));\n\n\t/*\n\t * First, check that open of a well-known file works.\n\t * XXX: We use raw syscall() to call the very openat() which is\n\t * present both on x86_64 and on aarch64.\n\t */\n\tfd = syscall(SCMP_SYS(openat), AT_FDCWD, \"/dev/null\", O_RDONLY, 0);\n\tif (seccomp_test_sighandler_done)\n\t\tereport(FATAL,\n\t\t\t\t(errcode(ERRCODE_SYSTEM_ERROR),\n\t\t\t\t errmsg(\"seccomp: signal handler test flag was set unexpectedly\")));\n\tif (fd < 0)\n\t\tereport(FATAL,\n\t\t\t\t(errcode(ERRCODE_SYSTEM_ERROR),\n\t\t\t\t errmsg(\"seccomp: could not open /dev/null for seccomp testing: %m\")));\n\tclose((int) fd);\n\n\t/* Set a trap on openat() to test seccomp bpf */\n\trule = PG_SCMP(openat, SCMP_ACT_TRAP);\n\tif (do_seccomp_load_rules(&rule, 1, SCMP_ACT_ALLOW) != 0)\n\t\tereport(FATAL,\n\t\t\t\t(errcode(ERRCODE_SYSTEM_ERROR),\n\t\t\t\t errmsg(\"seccomp: could not load test trap\")));\n\n\t/* Finally, check that openat() now raises SIGSYS */\n\t(void) syscall(SCMP_SYS(openat), AT_FDCWD, \"/dev/null\", O_RDONLY, 0);\n\tif (!seccomp_test_sighandler_done)\n\t\tereport(FATAL,\n\t\t\t\t(errcode(ERRCODE_SYSTEM_ERROR),\n\t\t\t\t errmsg(\"seccomp: SIGSYS handler doesn't seem to work\")));\n\n\t/* Now that everything seems to work, install a proper handler */\n\taction.sa_sigaction = seccomp_deny_sighandler;\n\tif (sigaction(SIGSYS, &action, NULL) != 0)\n\t\tereport(FATAL,\n\t\t\t\t(errcode(ERRCODE_SYSTEM_ERROR),\n\t\t\t\t errmsg(\"seccomp: could not install SIGSYS handler\")));\n\n\t/* If this succeeds, any syscall not in the list will crash the process */\n\tif (do_seccomp_load_rules(rules, count, SCMP_ACT_TRAP) != 0)\n\t\tereport(FATAL,\n\t\t\t\t(errcode(ERRCODE_SYSTEM_ERROR),\n\t\t\t\t errmsg(\"seccomp: could not enter seccomp mode\")));\n}\n\n/*\n * Enter seccomp mode with a BPF filter that will only allow\n * certain syscalls to proceed.\n */\nstatic int\ndo_seccomp_load_rules(PgSeccompRule *rules, int count, uint32 def_action)\n{\n\tscmp_filter_ctx ctx;\n\tint rc = -1;\n\n\t/* Create a context with a default action for syscalls not in the list */\n\tif ((ctx = seccomp_init(def_action)) == NULL)\n\t\tgoto cleanup;\n\n\tfor (int i = 0; i < count; i++)\n\t{\n\t\tPgSeccompRule *rule = &rules[i];\n\t\tif ((rc = seccomp_rule_add(ctx, rule->psr_action, rule->psr_syscall, 0)) != 0)\n\t\t\tgoto cleanup;\n\t}\n\n\t/* Try building & loading the program into the kernel */\n\tif ((rc = seccomp_load(ctx)) != 0)\n\t\tgoto cleanup;\n\ncleanup:\n\t/*\n\t * We don't need the context anymore regardless of the result,\n\t * since either we failed or the eBPF program has already been\n\t * loaded into the linux kernel.\n\t */\n\tseccomp_release(ctx);\n\treturn rc;\n}\n\nstatic void\ndie(int code, const char *str)\n{\n\t/* work around gcc ignoring that it shouldn't warn on (void) result being unused */\n\tssize_t _unused pg_attribute_unused();\n\t/* Best effort write to stderr */\n\t_unused = write(fileno(stderr), str, strlen(str));\n\n\t/* XXX: we don't want to run any atexit callbacks */\n\t_exit(code);\n}\n\nstatic void\nseccomp_test_sighandler(int signum, siginfo_t *info, void *cxt pg_attribute_unused())\n{\n#define DIE_PREFIX \"seccomp test signal handler: \"\n\n\t/* Check that this signal handler is used only for a single test case */\n\tif (seccomp_test_sighandler_done)\n\t\tdie(1, DIE_PREFIX \"test handler should only be used for 1 test\\n\");\n\tseccomp_test_sighandler_done = true;\n\n\tif (signum != SIGSYS)\n\t\tdie(1, DIE_PREFIX \"bad signal number\\n\");\n\n\t/* TODO: maybe somehow extract the hardcoded syscall number */\n\tif (info->si_syscall != SCMP_SYS(openat))\n\t\tdie(1, DIE_PREFIX \"bad syscall number\\n\");\n\n#undef DIE_PREFIX\n}\n\nstatic void\nseccomp_deny_sighandler(int signum, siginfo_t *info, void *cxt pg_attribute_unused())\n{\n\t/*\n\t * Unfortunately, we can't use seccomp_syscall_resolve_num_arch()\n\t * to resolve the syscall's name, since it calls strdup()\n\t * under the hood (wtf!).\n\t */\n\tchar buffer[128];\n\t(void)snprintf(buffer, lengthof(buffer),\n\t\t\t\"---------------------------------------\\n\"\n\t\t\t\"seccomp: bad syscall %d\\n\"\n\t\t\t\"---------------------------------------\\n\",\n\t\t\tinfo->si_syscall);\n\n\t/*\n\t * Instead of silently crashing the process with\n\t * a fake SIGSYS caused by SCMP_ACT_KILL_PROCESS,\n\t * we'd like to receive a real SIGSYS to print the\n\t * message and *then* immediately exit.\n\t */\n\tdie(1, buffer);\n}\n\n#endif\t\t/* HAVE_LIBSECCOMP */\n"
  },
  {
    "path": "pgxn/neon_walredo/walredoproc.c",
    "content": "/*-------------------------------------------------------------------------\n *\n * walredoproc.c\n *\t  Entry point for WAL redo helper\n *\n *\n * This file contains an alternative main() function for the 'postgres'\n * binary. In the special mode, we go into a special mode that's similar\n * to the single user mode. We don't launch postmaster or any auxiliary\n * processes. Instead, we wait for command from 'stdin', and respond to\n * 'stdout'.\n *\n * The protocol through stdin/stdout is loosely based on the libpq protocol.\n * The process accepts messages through stdin, and each message has the format:\n *\n * char   msgtype;\n * int32  length; // length of message including 'length' but excluding\n *                // 'msgtype', in network byte order\n * <payload>\n *\n * There are three message types:\n *\n * BeginRedoForBlock ('B'): Prepare for WAL replay for given block\n * PushPage ('P'): Copy a page image (in the payload) to buffer cache\n * ApplyRecord ('A'): Apply a WAL record (in the payload)\n * GetPage ('G'): Return a page image from buffer cache.\n * Ping ('H'): Return the input message.\n *\n * Currently, you only get a response to GetPage requests; the response is\n * simply a 8k page, without any headers. Errors are logged to stderr.\n *\n * FIXME:\n * - this currently requires a valid PGDATA, and creates a lock file there\n *   like a normal postmaster. There's no fundamental reason for that, though.\n * - should have EndRedoForBlock, and flush page cache, to allow using this\n *   mechanism for more than one block without restarting the process.\n *\n *\n * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group\n * Portions Copyright (c) 1994, Regents of the University of California\n *\n *-------------------------------------------------------------------------\n */\n\n#include \"postgres.h\"\n\n#include \"../neon/neon_pgversioncompat.h\"\n\n#include <fcntl.h>\n#include <limits.h>\n#include <signal.h>\n#include <unistd.h>\n#include <sys/socket.h>\n#ifdef HAVE_SYS_SELECT_H\n#include <sys/select.h>\n#endif\n#ifdef HAVE_SYS_RESOURCE_H\n#include <sys/time.h>\n#include <sys/resource.h>\n#endif\n\n#if defined(HAVE_LIBSECCOMP) && defined(__GLIBC__)\n#define MALLOC_NO_MMAP\n#include <malloc.h>\n#endif\n\n#if PG_MAJORVERSION_NUM < 16\n#ifndef HAVE_GETRUSAGE\n#include \"rusagestub.h\"\n#endif\n#endif\n\n#include \"access/clog.h\"\n#include \"access/commit_ts.h\"\n#include \"access/heapam.h\"\n#include \"access/multixact.h\"\n#include \"access/nbtree.h\"\n#include \"access/subtrans.h\"\n#include \"access/syncscan.h\"\n#include \"access/twophase.h\"\n#include \"access/xlog.h\"\n#include \"access/xlog_internal.h\"\n#if PG_VERSION_NUM >= 150000\n#include \"access/xlogrecovery.h\"\n#endif\n#include \"access/xlogutils.h\"\n#include \"catalog/pg_class.h\"\n#include \"commands/async.h\"\n#include \"libpq/pqformat.h\"\n#include \"miscadmin.h\"\n#include \"pgstat.h\"\n#include \"postmaster/autovacuum.h\"\n#include \"postmaster/bgworker_internals.h\"\n#include \"postmaster/bgwriter.h\"\n#include \"postmaster/postmaster.h\"\n#include \"replication/logicallauncher.h\"\n#include \"replication/origin.h\"\n#include \"replication/slot.h\"\n#include \"replication/walreceiver.h\"\n#include \"replication/walsender.h\"\n#include \"storage/buf_internals.h\"\n#include \"storage/bufmgr.h\"\n#include \"storage/dsm.h\"\n#if PG_MAJORVERSION_NUM >= 17\n#include \"storage/dsm_registry.h\"\n#endif\n#include \"storage/ipc.h\"\n#include \"storage/pg_shmem.h\"\n#include \"storage/pmsignal.h\"\n#include \"storage/predicate.h\"\n#include \"storage/proc.h\"\n#include \"storage/procarray.h\"\n#include \"storage/procsignal.h\"\n#include \"storage/sinvaladt.h\"\n#include \"storage/smgr.h\"\n#include \"storage/spin.h\"\n#include \"tcop/tcopprot.h\"\n#include \"utils/memutils.h\"\n#include \"utils/ps_status.h\"\n#include \"utils/snapmgr.h\"\n\n#include \"inmem_smgr.h\"\n\n#ifdef HAVE_LIBSECCOMP\n#include \"neon_seccomp.h\"\n#endif\n\nPG_MODULE_MAGIC;\n\nstatic int\tReadRedoCommand(StringInfo inBuf);\nstatic void BeginRedoForBlock(StringInfo input_message);\nstatic void PushPage(StringInfo input_message);\nstatic void ApplyRecord(StringInfo input_message);\nstatic void apply_error_callback(void *arg);\nstatic bool redo_block_filter(XLogReaderState *record, uint8 block_id);\nstatic void GetPage(StringInfo input_message);\nstatic void Ping(StringInfo input_message);\nstatic ssize_t buffered_read(void *buf, size_t count);\nstatic void CreateFakeSharedMemoryAndSemaphores(void);\n\nstatic BufferTag target_redo_tag;\n\nstatic XLogReaderState *reader_state;\n\n#define TRACE DEBUG1\n\n#ifdef HAVE_LIBSECCOMP\n\n\n/*\n * https://man7.org/linux/man-pages/man2/close_range.2.html\n *\n * The `close_range` syscall is available as of Linux 5.9.\n *\n * The `close_range` libc wrapper is only available in glibc >= 2.34.\n * Debian Bullseye ships a libc package based on glibc 2.31.\n * => write the wrapper ourselves, using the syscall number from the kernel headers.\n *\n * If the Linux uAPI headers don't define the system call number,\n * fail the build deliberately rather than ifdef'ing it to ENOSYS.\n * We prefer a compile time over a runtime error for walredo.\n */\n#include <unistd.h>\n#include <sys/syscall.h>\n#include <errno.h>\n\nstatic int\nclose_range_syscall(unsigned int start_fd, unsigned int count, unsigned int flags)\n{\n    return syscall(__NR_close_range, start_fd, count, flags);\n}\n\n\nstatic PgSeccompRule allowed_syscalls[] =\n{\n\t/* Hard requirements */\n\tPG_SCMP_ALLOW(exit_group),\n\tPG_SCMP_ALLOW(pselect6),\n\tPG_SCMP_ALLOW(read),\n\tPG_SCMP_ALLOW(select),\n\tPG_SCMP_ALLOW(write),\n\n\t/* Memory allocation */\n\tPG_SCMP_ALLOW(brk),\n#ifndef MALLOC_NO_MMAP\n\t/* TODO: musl doesn't have mallopt */\n\tPG_SCMP_ALLOW(mmap),\n\tPG_SCMP_ALLOW(munmap),\n#endif\n\t/*\n\t * getpid() is called on assertion failure, in ExceptionalCondition.\n\t * It's not really needed, but seems pointless to hide it either. The\n\t * system call unlikely to expose a kernel vulnerability, and the PID\n\t * is stored in MyProcPid anyway.\n\t */\n\tPG_SCMP_ALLOW(getpid),\n\tPG_SCMP_ALLOW(futex), /* needed for errbacktrace */\n\n\t/* Enable those for a proper shutdown. */\n#if 0\n\t   PG_SCMP_ALLOW(munmap),\n\t   PG_SCMP_ALLOW(shmctl),\n\t   PG_SCMP_ALLOW(shmdt),\n\t   PG_SCMP_ALLOW(unlink),\t/* shm_unlink */\n#endif\n};\n\nstatic void\nenter_seccomp_mode(void)\n{\n\t/*\n\t * The pageserver process relies on us to close all the file descriptors\n\t * it potentially leaked to us, _before_ we start processing potentially dangerous\n\t * wal records. See the comment in the Rust code that launches this process.\n\t */\n\tif (close_range_syscall(3, ~0U, 0) != 0)\n\t\tereport(FATAL,\n\t\t\t\t(errcode(ERRCODE_SYSTEM_ERROR),\n\t\t\t\t errmsg(\"seccomp: could not close files >= fd 3\")));\n\n#ifdef MALLOC_NO_MMAP\n\t/* Ask glibc not to use mmap() */\n\tmallopt(M_MMAP_MAX, 0);\n#endif\n\n\tseccomp_load_rules(allowed_syscalls, lengthof(allowed_syscalls));\n}\n#endif /* HAVE_LIBSECCOMP */\n\nPGDLLEXPORT void\nWalRedoMain(int argc, char *argv[]);\n\n/*\n * Entry point for the WAL redo process.\n *\n * Performs similar initialization as PostgresMain does for normal\n * backend processes. Some initialization was done in CallExtMain\n * already.\n */\nPGDLLEXPORT void\nWalRedoMain(int argc, char *argv[])\n{\n\tint\t\t\tfirstchar;\n\tStringInfoData input_message;\n#ifdef HAVE_LIBSECCOMP\n\tbool\t\tenable_seccomp;\n#endif\n\n\tam_wal_redo_postgres = true;\n\t/*\n\t * Pageserver treats any output to stderr as an ERROR, so we must\n\t * set the log level as early as possible to only log FATAL and \n\t * above during WAL redo (note that loglevel ERROR also logs LOG,\n\t * which is super strange but that's not something we can solve\n\t * for here. ¯\\_(-_-)_/¯\n\t */\n\tSetConfigOption(\"log_min_messages\", \"WARNING\", PGC_SUSET, PGC_S_OVERRIDE);\n\tSetConfigOption(\"client_min_messages\", \"ERROR\", PGC_SUSET,\n\t\t\t\t\tPGC_S_OVERRIDE);\n\n\t/*\n\t * WAL redo does not need a large number of buffers. And speed of\n\t * DropRelationAllLocalBuffers() is proportional to the number of\n\t * buffers. So let's keep it small (default value is 1024)\n\t */\n\tnum_temp_buffers = 4;\n\tNBuffers = 4;\n\n\t/*\n\t * install the simple in-memory smgr\n\t */\n\tsmgr_hook = smgr_inmem;\n\tsmgr_init_hook = smgr_init_inmem;\n\n#if PG_VERSION_NUM >= 160000\n\t/* make rmgr registry believe we can register the resource manager */\n\tprocess_shared_preload_libraries_in_progress = true;\n\tload_file(\"$libdir/neon_rmgr\", false);\n\tprocess_shared_preload_libraries_in_progress = false;\n#endif\n\n\t/* Initialize MaxBackends (if under postmaster, was done already) */\n\tMaxConnections = 1;\n\tmax_worker_processes = 0;\n\tmax_parallel_workers = 0;\n\tmax_wal_senders = 0;\n\tInitializeMaxBackends();\n\n#if PG_VERSION_NUM >= 150000\n\tprocess_shmem_requests();\n\tInitializeShmemGUCs();\n\n\t/*\n\t * This will try to access data directory which we do not set.\n\t * Seems to be pretty safe to disable.\n\t */\n\t/* InitializeWalConsistencyChecking(); */\n#endif\n\n\t/*\n\t * We have our own version of CreateSharedMemoryAndSemaphores() that\n\t * sets up local memory instead of shared one.\n\t */\n\tCreateFakeSharedMemoryAndSemaphores();\n\n\t/*\n\t * Remember stand-alone backend startup time,roughly at the same point\n\t * during startup that postmaster does so.\n\t */\n\tPgStartTime = GetCurrentTimestamp();\n\n\t/*\n\t * Create a per-backend PGPROC struct in shared memory. We must do\n\t * this before we can use LWLocks.\n\t */\n\tInitAuxiliaryProcess();\n\n\tSetProcessingMode(NormalProcessing);\n\n\t/* Redo routines won't work if we're not \"in recovery\" */\n\tInRecovery = true;\n\n\t/*\n\t * Create the memory context we will use in the main loop.\n\t *\n\t * MessageContext is reset once per iteration of the main loop, ie, upon\n\t * completion of processing of each command message from the client.\n\t */\n\tMessageContext = AllocSetContextCreate(TopMemoryContext,\n\t\t\t\t\t\t\t\t\t\t   \"MessageContext\",\n\t\t\t\t\t\t\t\t\t\t   ALLOCSET_DEFAULT_SIZES);\n\n\t/* we need a ResourceOwner to hold buffer pins */\n\tAssert(CurrentResourceOwner == NULL);\n\tCurrentResourceOwner = ResourceOwnerCreate(NULL, \"wal redo\");\n\n\t/* Initialize resource managers */\n\tfor (int rmid = 0; rmid <= RM_MAX_ID; rmid++)\n\t{\n\t\tif (RmgrTable[rmid].rm_startup != NULL)\n\t\t\tRmgrTable[rmid].rm_startup();\n\t}\n\treader_state = XLogReaderAllocate(wal_segment_size, NULL, XL_ROUTINE(), NULL);\n\n#ifdef HAVE_LIBSECCOMP\n\t/* We prefer opt-out to opt-in for greater security */\n\tenable_seccomp = true;\n\tfor (int i = 1; i < argc; i++)\n\t\tif (strcmp(argv[i], \"--disable-seccomp\") == 0)\n\t\t\tenable_seccomp = false;\n\n\t/*\n\t * We deliberately delay the transition to the seccomp mode\n\t * until it's time to enter the main processing loop;\n\t * else we'd have to add a lot more syscalls to the allowlist.\n\t */\n\tif (enable_seccomp)\n\t\tenter_seccomp_mode();\n#endif /* HAVE_LIBSECCOMP */\n\n\t/*\n\t * Main processing loop\n\t */\n\tMemoryContextSwitchTo(MessageContext);\n\tinitStringInfo(&input_message);\n#if PG_MAJORVERSION_NUM >= 16\n\tMyBackendType = B_BACKEND;\n#endif\n\n\tfor (;;)\n\t{\n\t\t/* Release memory left over from prior query cycle. */\n\t\tresetStringInfo(&input_message);\n\n\t\tset_ps_display(\"idle\");\n\n\t\t/*\n\t\t * (3) read a command (loop blocks here)\n\t\t */\n\t\tfirstchar = ReadRedoCommand(&input_message);\n\t\tswitch (firstchar)\n\t\t{\n\t\t\tcase 'B':\t\t\t/* BeginRedoForBlock */\n\t\t\t\tBeginRedoForBlock(&input_message);\n\t\t\t\tbreak;\n\n\t\t\tcase 'P':\t\t\t/* PushPage */\n\t\t\t\tPushPage(&input_message);\n\t\t\t\tbreak;\n\n\t\t\tcase 'A':\t\t\t/* ApplyRecord */\n\t\t\t\tApplyRecord(&input_message);\n\t\t\t\tbreak;\n\n\t\t\tcase 'G':\t\t\t/* GetPage */\n\t\t\t\tGetPage(&input_message);\n\t\t\t\tbreak;\n\n\t\t\tcase 'H': \t\t\t/* Ping */\n\t\t\t\tPing(&input_message);\n\t\t\t\tbreak;\n\n\t\t\t\t/*\n\t\t\t\t * EOF means we're done. Perform normal shutdown.\n\t\t\t\t */\n\t\t\tcase EOF:\n\t\t\t\tereport(LOG,\n\t\t\t\t\t\t(errmsg(\"received EOF on stdin, shutting down\")));\n\n#ifdef HAVE_LIBSECCOMP\n\t\t\t\t/*\n\t\t\t\t * Skip the shutdown sequence, leaving some garbage behind.\n\t\t\t\t * Hopefully, postgres will clean it up in the next run.\n\t\t\t\t * This way we don't have to enable extra syscalls, which is nice.\n\t\t\t\t * See enter_seccomp_mode() above.\n\t\t\t\t */\n\t\t\t\tif (enable_seccomp)\n\t\t\t\t\t_exit(0);\n#endif /* HAVE_LIBSECCOMP */\n\t\t\t\t/*\n\t\t\t\t * NOTE: if you are tempted to add more code here, DON'T!\n\t\t\t\t * Whatever you had in mind to do should be set up as an\n\t\t\t\t * on_proc_exit or on_shmem_exit callback, instead. Otherwise\n\t\t\t\t * it will fail to be called during other backend-shutdown\n\t\t\t\t * scenarios.\n\t\t\t\t */\n\t\t\t\tproc_exit(0);\n\n\t\t\tdefault:\n\t\t\t\tereport(FATAL,\n\t\t\t\t\t\t(errcode(ERRCODE_PROTOCOL_VIOLATION),\n\t\t\t\t\t\t errmsg(\"invalid frontend message type %d\",\n\t\t\t\t\t\t\t\tfirstchar)));\n\t\t}\n\t}\t\t\t\t\t\t\t/* end of input-reading loop */\n}\n\n\n/*\n * Initialize dummy shmem.\n *\n * This code follows CreateSharedMemoryAndSemaphores() but manually sets up\n * the shmem header and skips few initialization steps that are not needed for\n * WAL redo.\n *\n * I've also tried removing most of initialization functions that request some\n * memory (like ApplyLauncherShmemInit and friends) but in reality it haven't had\n * any sizeable effect on RSS, so probably such clean up not worth the risk of having\n * half-initialized postgres.\n */\nstatic void\nCreateFakeSharedMemoryAndSemaphores(void)\n{\n\tPGShmemHeader *hdr;\n\tSize\t\tsize;\n\tint\t\t\tnumSemas;\n\tchar\t\tcwd[MAXPGPATH];\n\n#if PG_VERSION_NUM >= 150000\n\tsize = CalculateShmemSize(&numSemas);\n#else\n\t/*\n\t * Postgres v14 doesn't have a separate CalculateShmemSize(). Use result of the\n\t * corresponging calculation in CreateSharedMemoryAndSemaphores()\n\t */\n\tsize = 1409024;\n\tnumSemas = 10;\n#endif\n\n\t/* Dummy implementation of PGSharedMemoryCreate() */\n\t{\n\t\thdr = (PGShmemHeader *) malloc(size);\n\t\tif (!hdr)\n\t\t\tereport(FATAL,\n\t\t\t\t\t(errcode(ERRCODE_OUT_OF_MEMORY),\n\t\t\t\t\t errmsg(\"[neon-wal-redo] can not allocate (pseudo-) shared memory\")));\n\n\t\thdr->creatorPID = getpid();\n\t\thdr->magic = PGShmemMagic;\n\t\thdr->dsm_control = 0;\n\t\thdr->device = 42; /* not relevant for non-shared memory */\n\t\thdr->inode = 43; /* not relevant for non-shared memory */\n\t\thdr->totalsize = size;\n\t\thdr->freeoffset = MAXALIGN(sizeof(PGShmemHeader));\n\n\t\tUsedShmemSegAddr = hdr;\n\t\tUsedShmemSegID = (unsigned long) 42; /* not relevant for non-shared memory */\n\t}\n\n\tInitShmemAccess(hdr);\n\n\t/*\n\t * Reserve semaphores uses dir name as a source of entropy. Set it to cwd(). Rest\n\t * of the code does not need DataDir access so nullify DataDir after\n\t * PGReserveSemaphores() to error out if something will try to access it.\n\t */\n\tif (!getcwd(cwd, MAXPGPATH))\n\t\tereport(FATAL,\n\t\t\t(errcode(ERRCODE_INTERNAL_ERROR),\n\t\t\t errmsg(\"[neon-wal-redo] can not read current directory name\")));\n\tDataDir = cwd;\n\tPGReserveSemaphores(numSemas);\n\tDataDir = NULL;\n\n\t/*\n\t * The rest of function follows CreateSharedMemoryAndSemaphores() closely,\n\t * skipped parts are marked with comments.\n\t */\n\tInitShmemAllocation();\n\n\t/*\n\t * Now initialize LWLocks, which do shared memory allocation and are\n\t * needed for InitShmemIndex.\n\t */\n\tCreateLWLocks();\n\n\t/*\n\t * Set up shmem.c index hashtable\n\t */\n\tInitShmemIndex();\n\n\t/*\n\t * Set up xlog, clog, and buffers\n\t */\n#if PG_MAJORVERSION_NUM >= 17\n\tDSMRegistryShmemInit();\n\tVarsupShmemInit();\n#endif\n\tXLOGShmemInit();\n\tCLOGShmemInit();\n\tCommitTsShmemInit();\n\tSUBTRANSShmemInit();\n\tMultiXactShmemInit();\n\tInitBufferPool();\n\n\t/*\n\t * Set up lock manager\n\t */\n\tInitLocks();\n\n\t/*\n\t * Set up predicate lock manager\n\t */\n\tInitPredicateLocks();\n\n\t/*\n\t * Set up process table\n\t */\n\tif (!IsUnderPostmaster)\n\t\tInitProcGlobal();\n\tCreateSharedProcArray();\n\tCreateSharedBackendStatus();\n\tTwoPhaseShmemInit();\n\tBackgroundWorkerShmemInit();\n\n\t/*\n\t * Set up shared-inval messaging\n\t */\n\tCreateSharedInvalidationState();\n\n\t/*\n\t * Set up interprocess signaling mechanisms\n\t */\n\tPMSignalShmemInit();\n\tProcSignalShmemInit();\n\tCheckpointerShmemInit();\n\tAutoVacuumShmemInit();\n\tReplicationSlotsShmemInit();\n\tReplicationOriginShmemInit();\n\tWalSndShmemInit();\n\tWalRcvShmemInit();\n\tPgArchShmemInit();\n\tApplyLauncherShmemInit();\n\n\t/*\n\t * Set up other modules that need some shared memory space\n\t */\n#if PG_MAJORVERSION_NUM < 17\n\t/* \"snapshot too old\" was removed in PG17, and with it the SnapMgr */\n\tSnapMgrInit();\n#endif\n\tBTreeShmemInit();\n\tSyncScanShmemInit();\n\t/* Skip due to the 'pg_notify' directory check */\n\t/* AsyncShmemInit(); */\n\n#ifdef EXEC_BACKEND\n\n\t/*\n\t * Alloc the win32 shared backend array\n\t */\n\tif (!IsUnderPostmaster)\n\t\tShmemBackendArrayAllocation();\n#endif\n\n\t/*\n\t * Now give loadable modules a chance to set up their shmem allocations\n\t */\n\tif (shmem_startup_hook)\n\t\tshmem_startup_hook();\n}\n\n\n/* Version compatility wrapper for ReadBufferWithoutRelcache */\nstatic inline Buffer\nNeonRedoReadBuffer(NRelFileInfo rinfo,\n\t\t   ForkNumber forkNum, BlockNumber blockNum,\n\t\t   ReadBufferMode mode)\n{\n#if PG_VERSION_NUM >= 150000\n\treturn ReadBufferWithoutRelcache(rinfo, forkNum, blockNum, mode,\n\t\t\t\t\t\t\t\t\t NULL, /* no strategy */\n\t\t\t\t\t\t\t\t\t true); /* WAL redo is only performed on permanent rels */\n#else\n\treturn ReadBufferWithoutRelcache(rinfo, forkNum, blockNum, mode,\n\t\t\t\t\t\t\t\t\t NULL); /* no strategy */\n#endif\n}\n\n\n/*\n * Some debug function that may be handy for now.\n */\npg_attribute_unused()\nstatic char *\npprint_buffer(char *data, int len)\n{\n\tStringInfoData s;\n\n\tinitStringInfo(&s);\n\tappendStringInfo(&s, \"\\n\");\n\tfor (int i = 0; i < len; i++) {\n\n\t\tappendStringInfo(&s, \"%02x \", (*(((char *) data) + i) & 0xff) );\n\t\tif (i % 32 == 31) {\n\t\t\tappendStringInfo(&s, \"\\n\");\n\t\t}\n\t}\n\tappendStringInfo(&s, \"\\n\");\n\n\treturn s.data;\n}\n\n/* ----------------------------------------------------------------\n *\t\troutines to obtain user input\n * ----------------------------------------------------------------\n */\n\n/*\n * Read next command from the client.\n *\n *\tthe string entered by the user is placed in its parameter inBuf,\n *\tand we act like a Q message was received.\n *\n *\tEOF is returned if end-of-file input is seen; time to shut down.\n * ----------------\n */\nstatic int\nReadRedoCommand(StringInfo inBuf)\n{\n\tssize_t\t\tret;\n\tchar\t\thdr[1 + sizeof(int32)];\n\tint\t\t\tqtype;\n\tint32\t\tlen;\n\n\t/* Read message type and message length */\n\tret = buffered_read(hdr, sizeof(hdr));\n\tif (ret != sizeof(hdr))\n\t{\n\t\tif (ret == 0)\n\t\t\treturn EOF;\n\t\telse if (ret < 0)\n\t\t\tereport(ERROR,\n\t\t\t\t\t(errcode(ERRCODE_CONNECTION_FAILURE),\n\t\t\t\t\t errmsg(\"could not read message header: %m\")));\n\t\telse\n\t\t\tereport(ERROR,\n\t\t\t\t\t(errcode(ERRCODE_PROTOCOL_VIOLATION),\n\t\t\t\t\t errmsg(\"unexpected EOF\")));\n\t}\n\n\tqtype = hdr[0];\n\tmemcpy(&len, &hdr[1], sizeof(int32));\n\tlen = pg_ntoh32(len);\n\n\tif (len < 4)\n\t\tereport(ERROR,\n\t\t\t\t(errcode(ERRCODE_PROTOCOL_VIOLATION),\n\t\t\t\t errmsg(\"invalid message length\")));\n\n\tlen -= 4;\t\t\t\t\t/* discount length itself */\n\n\t/* Read the message payload */\n\tenlargeStringInfo(inBuf, len);\n\tret = buffered_read(inBuf->data, len);\n\tif (ret != len)\n\t{\n\t\tif (ret < 0)\n\t\t\tereport(ERROR,\n\t\t\t\t\t(errcode(ERRCODE_CONNECTION_FAILURE),\n\t\t\t\t\t errmsg(\"could not read message: %m\")));\n\t\telse\n\t\t\tereport(ERROR,\n\t\t\t\t\t(errcode(ERRCODE_PROTOCOL_VIOLATION),\n\t\t\t\t\t errmsg(\"unexpected EOF\")));\n\t}\n\tinBuf->len = len;\n\tinBuf->data[len] = '\\0';\n\n\treturn qtype;\n}\n\n/*\n * Prepare for WAL replay on given block\n */\nstatic void\nBeginRedoForBlock(StringInfo input_message)\n{\n\tNRelFileInfo rinfo;\n\tForkNumber forknum;\n\tBlockNumber blknum;\n\tSMgrRelation reln;\n\n\t/*\n\t * message format:\n\t *\n\t * spcNode\n\t * dbNode\n\t * relNode\n\t * ForkNumber\n\t * BlockNumber\n\t */\n\tforknum = pq_getmsgbyte(input_message);\n#if PG_MAJORVERSION_NUM < 16\n\trinfo.spcNode = pq_getmsgint(input_message, 4);\n\trinfo.dbNode = pq_getmsgint(input_message, 4);\n\trinfo.relNode = pq_getmsgint(input_message, 4);\n#else\n\trinfo.spcOid = pq_getmsgint(input_message, 4);\n\trinfo.dbOid = pq_getmsgint(input_message, 4);\n\trinfo.relNumber = pq_getmsgint(input_message, 4);\n#endif\n\tblknum = pq_getmsgint(input_message, 4);\n\twal_redo_buffer = InvalidBuffer;\n\n\tInitBufferTag(&target_redo_tag, &rinfo, forknum, blknum);\n\n\telog(TRACE, \"BeginRedoForBlock %u/%u/%u.%d blk %u\",\n\t\t RelFileInfoFmt(rinfo),\n\t\t target_redo_tag.forkNum,\n\t\t target_redo_tag.blockNum);\n\n\treln = smgropen(rinfo, INVALID_PROC_NUMBER, RELPERSISTENCE_PERMANENT);\n\tif (reln->smgr_cached_nblocks[forknum] == InvalidBlockNumber ||\n\t\treln->smgr_cached_nblocks[forknum] < blknum + 1)\n\t{\n\t\treln->smgr_cached_nblocks[forknum] = blknum + 1;\n\t}\n}\n\n/*\n * Receive a page given by the client, and put it into buffer cache.\n */\nstatic void\nPushPage(StringInfo input_message)\n{\n\tNRelFileInfo rinfo;\n\tForkNumber forknum;\n\tBlockNumber blknum;\n\tconst char *content;\n\tBuffer\t\tbuf;\n\tPage\t\tpage;\n\n\t/*\n\t * message format:\n\t *\n\t * spcNode\n\t * dbNode\n\t * relNode\n\t * ForkNumber\n\t * BlockNumber\n\t * 8k page content\n\t */\n\tforknum = pq_getmsgbyte(input_message);\n#if PG_MAJORVERSION_NUM < 16\n\trinfo.spcNode = pq_getmsgint(input_message, 4);\n\trinfo.dbNode = pq_getmsgint(input_message, 4);\n\trinfo.relNode = pq_getmsgint(input_message, 4);\n#else\n\trinfo.spcOid = pq_getmsgint(input_message, 4);\n\trinfo.dbOid = pq_getmsgint(input_message, 4);\n\trinfo.relNumber = pq_getmsgint(input_message, 4);\n#endif\n\tblknum = pq_getmsgint(input_message, 4);\n\tcontent = pq_getmsgbytes(input_message, BLCKSZ);\n\n\tbuf = NeonRedoReadBuffer(rinfo, forknum, blknum, RBM_ZERO_AND_LOCK);\n\twal_redo_buffer = buf;\n\tpage = BufferGetPage(buf);\n\tmemcpy(page, content, BLCKSZ);\n\tMarkBufferDirty(buf); /* pro forma */\n\tUnlockReleaseBuffer(buf);\n}\n\n/*\n * Receive a WAL record, and apply it.\n *\n * All the pages should be loaded into the buffer cache by PushPage calls already.\n */\nstatic void\nApplyRecord(StringInfo input_message)\n{\n\tchar\t   *errormsg;\n\tXLogRecPtr\tlsn;\n\tXLogRecord *record;\n\tint\t\t\tnleft;\n\tErrorContextCallback errcallback;\n#if PG_VERSION_NUM >= 150000\n\tDecodedXLogRecord *decoded;\n#define STATIC_DECODEBUF_SIZE (64 * 1024)\n\tstatic char *static_decodebuf = NULL;\n\tsize_t\t\trequired_space;\n#endif\n\n\t/*\n\t * message format:\n\t *\n\t * LSN (the *end* of the record)\n\t * record\n\t */\n\tlsn = pq_getmsgint64(input_message);\n\n\tsmgrinit();\t\t\t\t\t/* reset inmem smgr state */\n\n\t/* note: the input must be aligned here */\n\trecord = (XLogRecord *) pq_getmsgbytes(input_message, sizeof(XLogRecord));\n\n\tnleft = input_message->len - input_message->cursor;\n\tif (record->xl_tot_len != sizeof(XLogRecord) + nleft)\n\t\telog(ERROR, \"mismatch between record (%d) and message size (%d)\",\n\t\t\t record->xl_tot_len, (int) sizeof(XLogRecord) + nleft);\n\n\t/* Setup error traceback support for ereport() */\n\terrcallback.callback = apply_error_callback;\n\terrcallback.arg = (void *) reader_state;\n\terrcallback.previous = error_context_stack;\n\terror_context_stack = &errcallback;\n\n\tXLogBeginRead(reader_state, lsn);\n\n#if PG_VERSION_NUM >= 150000\n\t/*\n\t * For reasonably small records, reuse a fixed size buffer to reduce\n\t * palloc overhead.\n\t */\n\trequired_space = DecodeXLogRecordRequiredSpace(record->xl_tot_len);\n\tif (required_space <= STATIC_DECODEBUF_SIZE)\n\t{\n\t\tif (static_decodebuf == NULL)\n\t\t\tstatic_decodebuf = MemoryContextAlloc(TopMemoryContext, STATIC_DECODEBUF_SIZE);\n\t\tdecoded = (DecodedXLogRecord *) static_decodebuf;\n\t}\n\telse\n\t\tdecoded = palloc(required_space);\n\n\tif (!DecodeXLogRecord(reader_state, decoded, record, lsn, &errormsg))\n\t\telog(ERROR, \"failed to decode WAL record: %s\", errormsg);\n\telse\n\t{\n\t\t/* Record the location of the next record. */\n\t\tdecoded->next_lsn = reader_state->NextRecPtr;\n\n\t\t/*\n\t\t * Update the pointers to the beginning and one-past-the-end of this\n\t\t * record, again for the benefit of historical code that expected the\n\t\t * decoder to track this rather than accessing these fields of the record\n\t\t * itself.\n\t\t */\n\t\treader_state->record = decoded;\n\t\treader_state->ReadRecPtr = decoded->lsn;\n\t\treader_state->EndRecPtr = decoded->next_lsn;\n\t}\n#else\n\t/*\n\t * In lieu of calling XLogReadRecord, store the record 'decoded_record'\n\t * buffer directly.\n\t */\n\treader_state->ReadRecPtr = lsn;\n\treader_state->decoded_record = record;\n\tif (!DecodeXLogRecord(reader_state, record, &errormsg))\n\t\telog(ERROR, \"failed to decode WAL record: %s\", errormsg);\n#endif\n\n\t/* Ignore any other blocks than the ones the caller is interested in */\n\tredo_read_buffer_filter = redo_block_filter;\n\n\tRmgrTable[record->xl_rmid].rm_redo(reader_state);\n\n\t/*\n\t * If no base image of the page was provided by PushPage, initialize\n\t * wal_redo_buffer here. The first WAL record must initialize the page\n\t * in that case.\n\t */\n\tif (BufferIsInvalid(wal_redo_buffer))\n\t{\n\t\twal_redo_buffer = NeonRedoReadBuffer(BufTagGetNRelFileInfo(target_redo_tag),\n\t\t\t\t\t\t\t\t\t\t\t target_redo_tag.forkNum,\n\t\t\t\t\t\t\t\t\t\t\t target_redo_tag.blockNum,\n\t\t\t\t\t\t\t\t\t\t\t RBM_NORMAL);\n\t\tAssert(!BufferIsInvalid(wal_redo_buffer));\n\t\tReleaseBuffer(wal_redo_buffer);\n\t}\n\n\tredo_read_buffer_filter = NULL;\n\n\t/* Pop the error context stack */\n\terror_context_stack = errcallback.previous;\n\n\telog(TRACE, \"applied WAL record with LSN %X/%X\",\n\t\t (uint32) (lsn >> 32), (uint32) lsn);\n\n#if PG_VERSION_NUM >= 150000\n\tif ((char *) decoded != static_decodebuf)\n\t\tpfree(decoded);\n#endif\n}\n\n/*\n * Error context callback for errors occurring during ApplyRecord\n */\nstatic void\napply_error_callback(void *arg)\n{\n\tXLogReaderState *record = (XLogReaderState *) arg;\n\tStringInfoData buf;\n\n\tinitStringInfo(&buf);\n#if PG_VERSION_NUM >= 150000\n\tif (record->record)\n#else\n\tif (record->decoded_record)\n#endif\n\t\txlog_outdesc(&buf, record);\n\n\t/* translator: %s is a WAL record description */\n\terrcontext(\"WAL redo at %X/%X for %s\",\n\t\t\t   LSN_FORMAT_ARGS(record->ReadRecPtr),\n\t\t\t   buf.data);\n\n\tpfree(buf.data);\n}\n\n\n\nstatic bool\nredo_block_filter(XLogReaderState *record, uint8 block_id)\n{\n\tBufferTag\ttarget_tag;\n\tNRelFileInfo rinfo;\n\n#if PG_VERSION_NUM >= 150000\n\tXLogRecGetBlockTag(record, block_id,\n\t\t\t\t\t   &rinfo, &target_tag.forkNum, &target_tag.blockNum);\n#else\n\tif (!XLogRecGetBlockTag(record, block_id,\n\t\t\t\t\t\t\t&rinfo, &target_tag.forkNum, &target_tag.blockNum))\n\t{\n\t\t/* Caller specified a bogus block_id */\n\t\telog(PANIC, \"failed to locate backup block with ID %d\", block_id);\n\t}\n#endif\n\tCopyNRelFileInfoToBufTag(target_tag, rinfo);\n\n\t/*\n\t * Can a WAL redo function ever access a relation other than the one that\n\t * it modifies? I don't see why it would.\n\t * Custom RMGRs may be affected by this.\n\t */\n\tif (!RelFileInfoEquals(rinfo, BufTagGetNRelFileInfo(target_redo_tag)))\n\t\telog(WARNING, \"REDO accessing unexpected page: %u/%u/%u.%u blk %u\",\n\t\t\t RelFileInfoFmt(rinfo), target_tag.forkNum, target_tag.blockNum);\n\n\t/*\n\t * If this block isn't one we are currently restoring, then return 'true'\n\t * so that this gets ignored\n\t */\n\treturn !BufferTagsEqual(&target_tag, &target_redo_tag);\n}\n\n/*\n * Get a page image back from buffer cache.\n *\n * After applying some records.\n */\nstatic void\nGetPage(StringInfo input_message)\n{\n\tNRelFileInfo rinfo;\n\tForkNumber forknum;\n\tBlockNumber blknum;\n\tBuffer\t\tbuf;\n\tPage\t\tpage;\n\tint\t\t\ttot_written;\n\n\t/*\n\t * message format:\n\t *\n\t * spcNode\n\t * dbNode\n\t * relNode\n\t * ForkNumber\n\t * BlockNumber\n\t */\n\tforknum = pq_getmsgbyte(input_message);\n#if PG_MAJORVERSION_NUM < 16\n\trinfo.spcNode = pq_getmsgint(input_message, 4);\n\trinfo.dbNode = pq_getmsgint(input_message, 4);\n\trinfo.relNode = pq_getmsgint(input_message, 4);\n#else\n\trinfo.spcOid = pq_getmsgint(input_message, 4);\n\trinfo.dbOid = pq_getmsgint(input_message, 4);\n\trinfo.relNumber = pq_getmsgint(input_message, 4);\n#endif\n\tblknum = pq_getmsgint(input_message, 4);\n\n\t/* FIXME: check that we got a BeginRedoForBlock message or this earlier */\n\n\tbuf = NeonRedoReadBuffer(rinfo, forknum, blknum, RBM_NORMAL);\n\tAssert(buf == wal_redo_buffer);\n\tpage = BufferGetPage(buf);\n\t/* single thread, so don't bother locking the page */\n\n\t/* Response: Page content */\n\ttot_written = 0;\n\tdo {\n\t\tssize_t\t\trc;\n\n\t\trc = write(STDOUT_FILENO, &page[tot_written], BLCKSZ - tot_written);\n\t\tif (rc < 0) {\n\t\t\t/* If interrupted by signal, just retry */\n\t\t\tif (errno == EINTR)\n\t\t\t\tcontinue;\n\t\t\tereport(ERROR,\n\t\t\t\t\t(errcode_for_file_access(),\n\t\t\t\t\t errmsg(\"could not write to stdout: %m\")));\n\t\t}\n\t\ttot_written += rc;\n\t} while (tot_written < BLCKSZ);\n\n\tReleaseBuffer(buf);\n\tDropRelationAllLocalBuffers(rinfo);\n\twal_redo_buffer = InvalidBuffer;\n\n\telog(TRACE, \"Page sent back for block %u\", blknum);\n}\n\n\nstatic void\nPing(StringInfo input_message)\n{\n\tint\t\t\ttot_written;\n\t/* Response: the input message */\n\ttot_written = 0;\n\tdo {\n\t\tssize_t\t\trc;\n\t\t/* We don't need alignment, but it's bad practice to use char[BLCKSZ] */\n#if PG_VERSION_NUM >= 160000\n\t\tstatic const PGIOAlignedBlock response;\n#else\n\t\tstatic const PGAlignedBlock response;\n#endif\n\t\trc = write(STDOUT_FILENO, &response.data[tot_written], BLCKSZ - tot_written);\n\t\tif (rc < 0) {\n\t\t\t/* If interrupted by signal, just retry */\n\t\t\tif (errno == EINTR)\n\t\t\t\tcontinue;\n\t\t\tereport(ERROR,\n\t\t\t\t\t(errcode_for_file_access(),\n\t\t\t\t\t errmsg(\"could not write to stdout: %m\")));\n\t\t}\n\t\ttot_written += rc;\n\t} while (tot_written < BLCKSZ);\n\n\telog(TRACE, \"Page sent back for ping\");\n}\n\n\n/* Buffer used by buffered_read() */\nstatic char stdin_buf[16 * 1024];\nstatic size_t stdin_len = 0;\t/* # of bytes in buffer */\nstatic size_t stdin_ptr = 0;\t/* # of bytes already consumed */\n\n/*\n * Like read() on stdin, but buffered.\n *\n * We cannot use libc's buffered fread(), because it uses syscalls that we\n * have disabled with seccomp(). Depending on the platform, it can call\n * 'fstat' or 'newfstatat'. 'fstat' is probably harmless, but 'newfstatat'\n * seems problematic because it allows interrogating files by path name.\n *\n * The return value is the number of bytes read. On error, -1 is returned, and\n * errno is set appropriately. Unlike read(), this fills the buffer completely\n * unless an error happens or EOF is reached.\n */\nstatic ssize_t\nbuffered_read(void *buf, size_t count)\n{\n\tchar\t   *dst = buf;\n\n\twhile (count > 0)\n\t{\n\t\tsize_t\t\tnthis;\n\n\t\tif (stdin_ptr == stdin_len)\n\t\t{\n\t\t\tssize_t\t\tret;\n\n\t\t\tret = read(STDIN_FILENO, stdin_buf, sizeof(stdin_buf));\n\t\t\tif (ret < 0)\n\t\t\t{\n\t\t\t\t/* don't do anything here that could set 'errno' */\n\t\t\t\treturn ret;\n\t\t\t}\n\t\t\tif (ret == 0)\n\t\t\t{\n\t\t\t\t/* EOF */\n\t\t\t\tbreak;\n\t\t\t}\n\t\t\tstdin_len = (size_t) ret;\n\t\t\tstdin_ptr = 0;\n\t\t}\n\t\tnthis = Min(stdin_len - stdin_ptr, count);\n\n\t\tmemcpy(dst, &stdin_buf[stdin_ptr], nthis);\n\n\t\tstdin_ptr += nthis;\n\t\tcount -= nthis;\n\t\tdst += nthis;\n\t}\n\n\treturn (dst - (char *) buf);\n}\n"
  },
  {
    "path": "postgres.mk",
    "content": "# Sub-makefile for compiling PostgreSQL as part of Neon. This is\n# included from the main Makefile, and is not meant to be called\n# directly.\n#\n# CI workflows and Dockerfiles can take advantage of the following\n# properties for caching:\n#\n# - Compiling the targets in this file only builds the PostgreSQL sources\n#   under the vendor/ subdirectory, nothing else from the repository.\n# - All outputs go to POSTGRES_INSTALL_DIR (by default 'pg_install',\n#   see parent Makefile)\n# - intermediate build artifacts go to BUILD_DIR\n#\n#\n# Variables passed from the parent Makefile that control what gets\n# installed and where:\n# - POSTGRES_VERSIONS\n# - POSTGRES_INSTALL_DIR\n# - BUILD_DIR\n#\n# Variables passed from the parent Makefile that affect the build\n# process and the resulting binaries:\n# - PG_CONFIGURE_OPTS\n# - PG_CFLAGS\n# - PG_LDFLAGS\n# - EXTRA_PATH_OVERRIDES\n\n###\n### Main targets\n###\n### These are called from the main Makefile, and can also be called\n### directly from command line\n\n# Compile and install a specific PostgreSQL version\npostgres-install-%: postgres-configure-% \\\n\t\t  postgres-headers-install-% # to prevent `make install` conflicts with neon's `postgres-headers`\n\n# Install the PostgreSQL header files into $(POSTGRES_INSTALL_DIR)/<version>/include\n#\n# This is implicitly part of the 'postgres-install-%' target, but this can be handy\n# if you want to install just the headers without building PostgreSQL, e.g. for building\n# extensions.\npostgres-headers-install-%: postgres-configure-%\n\t+@echo \"Installing PostgreSQL $* headers\"\n\t$(MAKE) -C $(BUILD_DIR)/$*/src/include MAKELEVEL=0 install\n\n# Run Postgres regression tests\npostgres-check-%: postgres-install-%\n\t$(MAKE) -C $(BUILD_DIR)/$* MAKELEVEL=0 check\n\n###\n### Shorthands for the main targets, for convenience\n###\n\n# Same as the above main targets, but for all supported PostgreSQL versions\n# For example, 'make postgres-install' is equivalent to\n# 'make postgres-install-v14 postgres-install-v15 postgres-install-v16 postgres-install-v17'\nall_version_targets=postgres-install postgres-headers-install postgres-check\n.PHONY: $(all_version_targets)\n$(all_version_targets): postgres-%: $(foreach pg_version,$(POSTGRES_VERSIONS),postgres-%-$(pg_version))\n\n.PHONY: postgres\npostgres: postgres-install\n\n.PHONY: postgres-headers\npostgres-headers: postgres-headers-install\n\n# 'postgres-v17' is an alias for 'postgres-install-v17' etc.\n$(foreach pg_version,$(POSTGRES_VERSIONS),postgres-$(pg_version)): postgres-%: postgres-install-%\n\n###\n### Intermediate targets\n###\n### These are not intended to be called directly, but are dependencies for the\n### main targets.\n\n# Run 'configure'\n$(BUILD_DIR)/%/config.status:\n\tmkdir -p $(BUILD_DIR)\n\ttest -e $(BUILD_DIR)/CACHEDIR.TAG || echo \"$(CACHEDIR_TAG_CONTENTS)\" > $(BUILD_DIR)/CACHEDIR.TAG\n\n\t+@echo \"Configuring Postgres $* build\"\n\t@test -s $(ROOT_PROJECT_DIR)/vendor/postgres-$*/configure || { \\\n\t\techo \"\\nPostgres submodule not found in $(ROOT_PROJECT_DIR)/vendor/postgres-$*/, execute \"; \\\n\t\techo \"'git submodule update --init --recursive --depth 2 --progress .' in project root.\\n\"; \\\n\t\texit 1; }\n\tmkdir -p $(BUILD_DIR)/$*\n\n\tVERSION=$*; \\\n\tEXTRA_VERSION=$$(cd $(ROOT_PROJECT_DIR)/vendor/postgres-$$VERSION && git rev-parse HEAD); \\\n\t(cd $(BUILD_DIR)/$$VERSION && \\\n\tenv PATH=\"$(EXTRA_PATH_OVERRIDES):$$PATH\" $(ROOT_PROJECT_DIR)/vendor/postgres-$$VERSION/configure \\\n\t\tCFLAGS='$(PG_CFLAGS)' LDFLAGS='$(PG_LDFLAGS)' \\\n\t\t$(PG_CONFIGURE_OPTS) --with-extra-version=\" ($$EXTRA_VERSION)\" \\\n\t\t--prefix=$(abspath $(POSTGRES_INSTALL_DIR))/$$VERSION > configure.log)\n\n# nicer alias to run 'configure'.\n#\n# This tries to accomplish this rule:\n#\n# postgres-configure-%: $(BUILD_DIR)/%/config.status\n#\n# XXX: I'm not sure why the above rule doesn't work directly. But this accomplishses\n# the same thing\n$(foreach pg_version,$(POSTGRES_VERSIONS),postgres-configure-$(pg_version)): postgres-configure-%: FORCE $(BUILD_DIR)/%/config.status\n\n# Compile and install PostgreSQL (and a few contrib modules used in tests)\npostgres-install-%: postgres-configure-% \\\n\t\t  postgres-headers-install-% # to prevent `make install` conflicts with neon's `postgres-headers-install`\n\t+@echo \"Compiling PostgreSQL $*\"\n\t$(MAKE) -C $(BUILD_DIR)/$* MAKELEVEL=0 install\n\t$(MAKE) -C $(BUILD_DIR)/$*/contrib/pg_prewarm install\n\t$(MAKE) -C $(BUILD_DIR)/$*/contrib/pg_buffercache install\n\t$(MAKE) -C $(BUILD_DIR)/$*/contrib/pg_visibility install\n\t$(MAKE) -C $(BUILD_DIR)/$*/contrib/pageinspect install\n\t$(MAKE) -C $(BUILD_DIR)/$*/contrib/pg_trgm install\n\t$(MAKE) -C $(BUILD_DIR)/$*/contrib/amcheck install\n\t$(MAKE) -C $(BUILD_DIR)/$*/contrib/test_decoding install\n\n.PHONY: FORCE\nFORCE:\n"
  },
  {
    "path": "pre-commit.py",
    "content": "#!/usr/bin/env python3\n\nfrom __future__ import annotations\n\nimport argparse\nimport enum\nimport os\nimport subprocess\nimport sys\n\n\n@enum.unique\nclass Color(enum.Enum):\n    RED = \"\\033[0;31m\"\n    GREEN = \"\\033[0;33m\"\n    CYAN = \"\\033[0;36m\"\n\n\nNC = \"\\033[0m\"  # No Color\n\n\ndef colorify(\n    s: str,\n    color: Color,\n    no_color: bool = False,\n):\n    if no_color:\n        return s\n    return f\"{color.value}{s}{NC}\"\n\n\ndef cargo_fmt(fix_inplace: bool = False, no_color: bool = False) -> str:\n    cmd = \"cargo fmt\"\n    if not fix_inplace:\n        cmd += \" --check\"\n    if no_color:\n        cmd += \" -- --color=never\"\n    return cmd\n\n\ndef ruff_check(fix_inplace: bool) -> str:\n    cmd = \"poetry run ruff check\"\n    if fix_inplace:\n        cmd += \" --fix\"\n    return cmd\n\n\ndef ruff_format(fix_inplace: bool) -> str:\n    cmd = \"poetry run ruff format\"\n    if not fix_inplace:\n        cmd += \" --diff --check\"\n    return cmd\n\n\ndef mypy() -> str:\n    return \"poetry run mypy\"\n\n\ndef get_commit_files() -> list[str]:\n    files = subprocess.check_output(\"git diff --cached --name-only --diff-filter=ACM\".split())\n    return files.decode().splitlines()\n\n\ndef check(\n    name: str,\n    suffix: str,\n    cmd: str,\n    changed_files: list[str],\n    no_color: bool = False,\n    append_files_to_cmd: bool = True,\n):\n    print(f\"Checking: {name} \", end=\"\")\n    applicable_files = list(filter(lambda fname: fname.strip().endswith(suffix), changed_files))\n    if not applicable_files:\n        print(colorify(\"[NOT APPLICABLE]\", Color.CYAN, no_color))\n        return\n\n    if append_files_to_cmd:\n        cmd = f\"{cmd} {' '.join(applicable_files)}\"\n\n    res = subprocess.run(cmd.split(), capture_output=True)\n    if res.returncode != 0:\n        print(colorify(\"[FAILED]\", Color.RED, no_color))\n        if name == \"mypy\":\n            print(\"Please inspect the output below and fix type mismatches.\")\n        else:\n            print(\"Please inspect the output below and run make fmt to fix automatically.\")\n        if suffix == \".py\":\n            print(\n                \"If the output is empty, ensure that you've installed Python tooling by\\n\"\n                \"running './scripts/pysync' in the current directory (no root needed)\"\n            )\n        print()\n        print(res.stdout.decode())\n        sys.exit(1)\n\n    print(colorify(\"[OK]\", Color.GREEN, no_color))\n\n\nif __name__ == \"__main__\":\n    parser = argparse.ArgumentParser()\n    parser.add_argument(\"--fix-inplace\", action=\"store_true\", help=\"apply fixes inplace\")\n    parser.add_argument(\n        \"--no-color\",\n        action=\"store_true\",\n        help=\"disable colored output\",\n        default=not sys.stdout.isatty() or os.getenv(\"TERM\") == \"dumb\",\n    )\n    args = parser.parse_args()\n\n    files = get_commit_files()\n    check(\n        name=\"cargo fmt\",\n        suffix=\".rs\",\n        cmd=cargo_fmt(fix_inplace=args.fix_inplace, no_color=args.no_color),\n        changed_files=files,\n        no_color=args.no_color,\n        append_files_to_cmd=False,\n    )\n    check(\n        name=\"ruff check\",\n        suffix=\".py\",\n        cmd=ruff_check(fix_inplace=args.fix_inplace),\n        changed_files=files,\n        no_color=args.no_color,\n    )\n    check(\n        name=\"ruff format\",\n        suffix=\".py\",\n        cmd=ruff_format(fix_inplace=args.fix_inplace),\n        changed_files=files,\n        no_color=args.no_color,\n    )\n    check(\n        name=\"mypy\",\n        suffix=\".py\",\n        cmd=mypy(),\n        changed_files=files,\n        no_color=args.no_color,\n    )\n"
  },
  {
    "path": "proxy/Cargo.toml",
    "content": "[package]\nname = \"proxy\"\nversion = \"0.1.0\"\nedition = \"2024\"\nlicense.workspace = true\n\n[features]\ndefault = []\ntesting = [\"dep:tokio-postgres\"]\nrest_broker = [\"dep:subzero-core\", \"dep:ouroboros\"]\n\n[dependencies]\nahash.workspace = true\nanyhow.workspace = true\narc-swap.workspace = true\nasync-compression.workspace = true\nasync-trait.workspace = true\natomic-take.workspace = true\naws-config.workspace = true\naws-credential-types.workspace = true\naws-sdk-iam.workspace = true\naws-sigv4.workspace = true\nbase64.workspace = true\nbstr.workspace = true\nbytes = { workspace = true, features = [\"serde\"] }\ncamino.workspace = true\nchrono.workspace = true\nclap = { workspace = true, features = [\"derive\", \"env\"] }\nclashmap.workspace = true\ncompute_api.workspace = true\nconsumption_metrics.workspace = true\nenv_logger.workspace = true\nframed-websockets.workspace = true\nfutures.workspace = true\nhashbrown.workspace = true\nhex.workspace = true\nhmac.workspace = true\nhostname.workspace = true\nhttp.workspace = true\nhttp-utils.workspace = true\nhumantime.workspace = true\nhumantime-serde.workspace = true\nhyper0.workspace = true\nhyper = { workspace = true, features = [\"server\", \"http1\", \"http2\"] }\nhyper-util = { version = \"0.1\", features = [\"server\", \"http1\", \"http2\", \"tokio\"] }\nhttp-body-util = { version = \"0.1\" }\ngettid = \"0.1.3\"\nindexmap = { workspace = true, features = [\"serde\"] }\nipnet.workspace = true\nitertools.workspace = true\nitoa.workspace = true\njson = { path = \"../libs/proxy/json\" }\nlasso = { workspace = true, features = [\"multi-threaded\"] }\nmeasured = { workspace = true, features = [\"lasso\"] }\nmetrics.workspace = true\nmoka.workspace = true\nonce_cell.workspace = true\nopentelemetry = { workspace = true, features = [\"trace\"] }\npapaya = \"0.2.0\"\nparking_lot.workspace = true\nparquet.workspace = true\nparquet_derive.workspace = true\npin-project-lite.workspace = true\npostgres_backend.workspace = true\npostgres-client = { package = \"tokio-postgres2\", path = \"../libs/proxy/tokio-postgres2\" }\npostgres-protocol = { package = \"postgres-protocol2\", path = \"../libs/proxy/postgres-protocol2\" }\npq_proto.workspace = true\nrand.workspace = true\nrand_core.workspace = true\nregex.workspace = true\nremote_storage = { version = \"0.1\", path = \"../libs/remote_storage/\" }\nreqwest = { workspace = true, features = [\"rustls-tls-native-roots\"] }\nreqwest-middleware = { workspace = true, features = [\"json\"] }\nreqwest-retry.workspace = true\nreqwest-tracing.workspace = true\nrustc-hash.workspace = true\nrustls.workspace = true\nrustls-native-certs.workspace = true\nrustls-pemfile.workspace = true\nscopeguard.workspace = true\nserde.workspace = true\nserde_json.workspace = true\nsha2 = { workspace = true, features = [\"asm\", \"oid\"] }\nsmol_str.workspace = true\nsmallvec.workspace = true\nsocket2.workspace = true\nstrum_macros.workspace = true\nsubtle.workspace = true\nthiserror.workspace = true\ntikv-jemallocator.workspace = true\ntikv-jemalloc-ctl = { workspace = true, features = [\"use_std\"] }\ntokio-postgres = { workspace = true, optional = true }\ntokio-rustls.workspace = true\ntokio-util.workspace = true\ntokio = { workspace = true, features = [\"signal\"] }\ntracing-subscriber.workspace = true\ntracing-utils.workspace = true\ntracing.workspace = true\ntracing-log.workspace = true\ntracing-opentelemetry.workspace = true\ntry-lock.workspace = true\ntyped-json.workspace = true\nurl.workspace = true\nurlencoding.workspace = true\nutils.workspace = true\nuuid.workspace = true\nx509-cert.workspace = true\nredis.workspace = true\nzerocopy.workspace = true\nzeroize.workspace = true\n# uncomment this to use the real subzero-core crate\n# subzero-core = { git = \"https://github.com/neondatabase/subzero\", rev = \"396264617e78e8be428682f87469bb25429af88a\", features = [\"postgresql\"], optional = true }\n# this is a stub for the subzero-core crate\nsubzero-core = { path = \"../libs/proxy/subzero_core\", features = [\"postgresql\"], optional = true}\nouroboros = { version = \"0.18\", optional = true }\n\n# jwt stuff\njose-jwa = \"0.1.2\"\njose-jwk = { version = \"0.1.2\", features = [\"p256\", \"p384\", \"rsa\"] }\nsignature = \"2\"\necdsa = \"0.16\"\np256 = { version = \"0.13\", features = [\"jwk\"] }\ned25519-dalek = { version = \"2\", default-features = false, features = [\"rand_core\"] }\nrsa = \"0.9\"\n\nworkspace_hack.workspace = true\n\n[dev-dependencies]\nassert-json-diff.workspace = true\ncamino-tempfile.workspace = true\nfallible-iterator.workspace = true\nflate2.workspace = true\ntokio-tungstenite.workspace = true\npbkdf2 = { workspace = true, features = [\"simple\", \"std\"] }\nrcgen.workspace = true\nrstest.workspace = true\nwalkdir.workspace = true\nrand_distr = \"0.5\"\ntokio-postgres.workspace = true\ntracing-test = \"0.2\"\n"
  },
  {
    "path": "proxy/README.md",
    "content": "# Proxy\n\nProxy binary accepts `--auth-backend` CLI option, which determines auth scheme and cluster routing method. Following routing backends are currently implemented:\n\n* console\n  new SCRAM-based console API; uses SNI info to select the destination project (endpoint soon)\n* postgres\n  uses postgres to select auth secrets of existing roles. Useful for local testing\n* web (or link)\n  sends login link for all usernames\n\nAlso proxy can expose following services to the external world:\n\n* postgres protocol over TCP -- usual postgres endpoint compatible with usual\n  postgres drivers\n* postgres protocol over WebSockets -- same protocol tunneled over websockets\n  for environments where TCP connection is not available. We have our own\n  implementation of a client that uses node-postgres and tunnels traffic through\n  websockets: https://github.com/neondatabase/serverless\n* SQL over HTTP -- service that accepts POST requests with SQL text over HTTP\n  and responds with JSON-serialised results.\n\n\n## SQL over HTTP\n\nContrary to the usual postgres proto over TCP and WebSockets using plain\none-shot HTTP request achieves smaller amortized latencies in edge setups due to\nfewer round trips and an enhanced open connection reuse by the v8 engine. Also\nsuch endpoint could be used directly without any driver.\n\nTo play with it locally one may start proxy over a local postgres installation\n(see end of this page on how to generate certs with openssl):\n\n```\nLOGFMT=text ./target/debug/proxy -c server.crt -k server.key --auth-backend=postgres --auth-endpoint=postgres://stas@127.0.0.1:5432/stas --wss 0.0.0.0:4444\n```\n\nIf both postgres and proxy are running you may send a SQL query:\n```console\ncurl -k -X POST 'https://proxy.local.neon.build:4444/sql' \\\n  -H 'Neon-Connection-String: postgres://stas:pass@proxy.local.neon.build:4444/postgres' \\\n  -H 'Content-Type: application/json' \\\n  --data '{\n    \"query\":\"SELECT $1::int[] as arr, $2::jsonb as obj, 42 as num\",\n    \"params\":[ \"{{1,2},{\\\"3\\\",4}}\", {\"key\":\"val\", \"ikey\":4242}]\n  }' | jq\n```\n```json\n{\n  \"command\": \"SELECT\",\n  \"fields\": [\n    { \"dataTypeID\": 1007, \"name\": \"arr\" },\n    { \"dataTypeID\": 3802, \"name\": \"obj\" },\n    { \"dataTypeID\": 23, \"name\": \"num\" }\n  ],\n  \"rowCount\": 1,\n  \"rows\": [\n    {\n      \"arr\": [[1,2],[3,4]],\n      \"num\": 42,\n      \"obj\": {\n        \"ikey\": 4242,\n        \"key\": \"val\"\n      }\n    }\n  ]\n}\n```\n\n\nWith the current approach we made the following design decisions:\n\n1. SQL injection protection: We employed the extended query protocol, modifying\n   the rust-postgres driver to send queries in one roundtrip using a text\n   protocol rather than binary, bypassing potential issues like those identified\n   in sfackler/rust-postgres#1030.\n\n2. Postgres type compatibility: As not all postgres types have binary\n   representations (e.g., acl's in pg_class), we adjusted rust-postgres to\n   respond with text protocol, simplifying serialization and fixing queries with\n   text-only types in response.\n\n3. Data type conversion: Considering JSON supports fewer data types than\n   Postgres, we perform conversions where possible, passing all other types as\n   strings. Key conversions include:\n   - postgres int2, int4, float4, float8 -> json number (NaN and Inf remain\n     text)\n   - postgres bool, null, text -> json bool, null, string\n   - postgres array -> json array\n   - postgres json and jsonb -> json object\n\n4. Alignment with node-postgres: To facilitate integration with js libraries,\n   we've matched the response structure of node-postgres, returning command tags\n   and column oids. Command tag capturing was added to the rust-postgres\n   functionality as part of this change.\n\n### Output options\n\nUser can pass several optional headers that will affect resulting json.\n\n1. `Neon-Raw-Text-Output: true`. Return postgres values as text, without parsing them. So numbers, objects, booleans, nulls and arrays will be returned as text. That can be useful in cases when client code wants to implement it's own parsing or reuse parsing libraries from e.g. node-postgres.\n2. `Neon-Array-Mode: true`. Return postgres rows as arrays instead of objects. That is more compact representation and also helps in some edge\ncases where it is hard to use rows represented as objects (e.g. when several fields have the same name).\n\n## Test proxy locally\n\nProxy determines project name from the subdomain, request to the `round-rice-566201.somedomain.tld` will be routed to the project named `round-rice-566201`. Unfortunately, `/etc/hosts` does not support domain wildcards, so we can use *.local.neon.build` which resolves to `127.0.0.1`.\n\nWe will need to have a postgres instance. Assuming that we have set up docker we can set it up as follows:\n```sh\ndocker run \\\n  --detach \\\n  --name proxy-postgres \\\n  --env POSTGRES_PASSWORD=proxy-postgres \\\n  --publish 5432:5432 \\\n  postgres:17-bookworm\n```\n\nNext step is setting up auth table and schema as well as creating role (without the JWT table):\n```sh\ndocker exec -it proxy-postgres psql -U postgres -c \"CREATE SCHEMA IF NOT EXISTS neon_control_plane\"\ndocker exec -it proxy-postgres psql -U postgres -c \"CREATE TABLE neon_control_plane.endpoints (endpoint_id VARCHAR(255) PRIMARY KEY, allowed_ips VARCHAR(255))\"\ndocker exec -it proxy-postgres psql -U postgres -c \"CREATE ROLE proxy WITH SUPERUSER LOGIN PASSWORD 'password';\"\n```\n\nIf you want to test query cancellation, redis is also required:\n```sh\ndocker run --detach --name proxy-redis --publish 6379:6379 redis:7.0\n```\n\nLet's create self-signed certificate by running:\n```sh\nopenssl req -new -x509 -days 365 -nodes -text -out server.crt -keyout server.key -subj \"/CN=*.local.neon.build\"\n```\n\nThen we need to build proxy with 'testing' feature and run, e.g.:\n```sh\nRUST_LOG=proxy LOGFMT=text cargo run -p proxy --bin proxy --features testing -- \\\n  --auth-backend postgres --auth-endpoint 'postgresql://postgres:proxy-postgres@127.0.0.1:5432/postgres' \\\n  --redis-auth-type=\"plain\" --redis-plain=\"redis://127.0.0.1:6379\" \\\n  -c server.crt -k server.key\n```\n\nNow from client you can start a new session:\n\n```sh\nPGSSLROOTCERT=./server.crt psql  \"postgresql://proxy:password@endpoint.local.neon.build:4432/postgres?sslmode=verify-full\"\n```\n\n## auth broker setup:\n\nCreate a postgres instance:\n```sh\ndocker run \\\n  --detach \\\n  --name proxy-postgres \\\n  --env POSTGRES_HOST_AUTH_METHOD=trust \\\n  --env POSTGRES_USER=authenticated \\\n  --env POSTGRES_DB=database \\\n  --publish 5432:5432 \\\n  postgres:17-bookworm\n```\n\nCreate a configuration file called `local_proxy.json` in the root of the repo (used also by the auth broker to validate JWTs)\n```sh\n{\n    \"jwks\": [\n        {\n            \"id\": \"1\",\n            \"role_names\": [\"authenticator\", \"authenticated\", \"anon\"],\n            \"jwks_url\": \"https://climbing-minnow-11.clerk.accounts.dev/.well-known/jwks.json\",\n            \"provider_name\": \"foo\",\n            \"jwt_audience\": null\n        }\n    ]\n}\n```\n\nStart the local proxy:\n```sh\ncargo run --bin local_proxy --features testing -- \\\n  --disable-pg-session-jwt \\\n  --http 0.0.0.0:7432\n```\n\nStart the auth/rest broker:\n\nNote: to enable the rest broker you need to replace the stub subzero-core crate with the real one.\n\n```sh\ncargo add -p proxy subzero-core --git https://github.com/neondatabase/subzero --rev 396264617e78e8be428682f87469bb25429af88a\n```\n\n```sh\nLOGFMT=text OTEL_SDK_DISABLED=true cargo run --bin proxy --features testing,rest_broker -- \\\n  -c server.crt -k server.key \\\n  --is-auth-broker true \\\n  --is-rest-broker true \\\n  --wss 0.0.0.0:8080 \\\n  --http 0.0.0.0:7002 \\\n  --auth-backend local\n```\n\nCreate a JWT in your auth provider (e.g. Clerk) and set it in the `NEON_JWT` environment variable.\n```sh\nexport NEON_JWT=\"...\"\n```\n\nRun a query against the auth broker:\n```sh\ncurl -k \"https://foo.local.neon.build:8080/sql\" \\\n  -H \"Authorization: Bearer $NEON_JWT\" \\\n  -H \"neon-connection-string: postgresql://authenticator@foo.local.neon.build/database\" \\\n  -d '{\"query\":\"select 1\",\"params\":[]}'\n```\n\nMake a rest request against the auth broker (rest broker):\n```sh\ncurl -k \"https://foo.local.neon.build:8080/database/rest/v1/items?select=id,name&id=eq.1\" \\\n-H \"Authorization: Bearer $NEON_JWT\"\n```\n"
  },
  {
    "path": "proxy/src/auth/backend/classic.rs",
    "content": "use tokio::io::{AsyncRead, AsyncWrite};\nuse tracing::{debug, info, warn};\n\nuse super::{ComputeCredentials, ComputeUserInfo};\nuse crate::auth::backend::ComputeCredentialKeys;\nuse crate::auth::{self, AuthFlow};\nuse crate::config::AuthenticationConfig;\nuse crate::context::RequestContext;\nuse crate::control_plane::AuthSecret;\nuse crate::stream::{PqStream, Stream};\nuse crate::{compute, sasl};\n\npub(super) async fn authenticate(\n    ctx: &RequestContext,\n    creds: ComputeUserInfo,\n    client: &mut PqStream<Stream<impl AsyncRead + AsyncWrite + Unpin>>,\n    config: &'static AuthenticationConfig,\n    secret: AuthSecret,\n) -> auth::Result<ComputeCredentials> {\n    let scram_keys = match secret {\n        AuthSecret::Scram(secret) => {\n            debug!(\"auth endpoint chooses SCRAM\");\n\n            let auth_outcome = tokio::time::timeout(\n                config.scram_protocol_timeout,\n                AuthFlow::new(client, auth::Scram(&secret, ctx)).authenticate(),\n            )\n            .await\n            .inspect_err(|_| warn!(\"error processing scram messages error = authentication timed out, execution time exceeded {} seconds\", config.scram_protocol_timeout.as_secs()))\n            .map_err(auth::AuthError::user_timeout)?\n            .inspect_err(|error| warn!(?error, \"error processing scram messages\"))?;\n\n            let client_key = match auth_outcome {\n                sasl::Outcome::Success(key) => key,\n                sasl::Outcome::Failure(reason) => {\n                    // TODO: warnings?\n                    // TODO: should we get rid of this because double logging?\n                    info!(\"auth backend failed with an error: {reason}\");\n                    return Err(auth::AuthError::password_failed(&*creds.user));\n                }\n            };\n\n            compute::ScramKeys {\n                client_key: client_key.as_bytes(),\n                server_key: secret.server_key.as_bytes(),\n            }\n        }\n    };\n\n    Ok(ComputeCredentials {\n        info: creds,\n        keys: ComputeCredentialKeys::AuthKeys(postgres_client::config::AuthKeys::ScramSha256(\n            scram_keys,\n        )),\n    })\n}\n"
  },
  {
    "path": "proxy/src/auth/backend/console_redirect.rs",
    "content": "use std::fmt;\n\nuse async_trait::async_trait;\nuse postgres_client::config::SslMode;\nuse thiserror::Error;\nuse tokio::io::{AsyncRead, AsyncWrite};\nuse tracing::{info, info_span};\n\nuse crate::auth::backend::ComputeUserInfo;\nuse crate::cache::Cached;\nuse crate::cache::node_info::CachedNodeInfo;\nuse crate::compute::AuthInfo;\nuse crate::config::AuthenticationConfig;\nuse crate::context::RequestContext;\nuse crate::control_plane::client::cplane_proxy_v1;\nuse crate::control_plane::{self, NodeInfo};\nuse crate::error::{ReportableError, UserFacingError};\nuse crate::pqproto::BeMessage;\nuse crate::proxy::NeonOptions;\nuse crate::proxy::wake_compute::WakeComputeBackend;\nuse crate::stream::PqStream;\nuse crate::types::RoleName;\nuse crate::{auth, compute, waiters};\n\n#[derive(Debug, Error)]\npub(crate) enum ConsoleRedirectError {\n    #[error(transparent)]\n    WaiterRegister(#[from] waiters::RegisterError),\n\n    #[error(transparent)]\n    WaiterWait(#[from] waiters::WaitError),\n\n    #[error(transparent)]\n    Io(#[from] std::io::Error),\n}\n\n#[derive(Debug)]\npub struct ConsoleRedirectBackend {\n    console_uri: reqwest::Url,\n    api: cplane_proxy_v1::NeonControlPlaneClient,\n}\n\nimpl fmt::Debug for cplane_proxy_v1::NeonControlPlaneClient {\n    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {\n        write!(f, \"NeonControlPlaneClient\")\n    }\n}\n\nimpl UserFacingError for ConsoleRedirectError {\n    fn to_string_client(&self) -> String {\n        \"Internal error\".to_string()\n    }\n}\n\nimpl ReportableError for ConsoleRedirectError {\n    fn get_error_kind(&self) -> crate::error::ErrorKind {\n        match self {\n            Self::WaiterRegister(_) => crate::error::ErrorKind::Service,\n            Self::WaiterWait(_) => crate::error::ErrorKind::Service,\n            Self::Io(_) => crate::error::ErrorKind::ClientDisconnect,\n        }\n    }\n}\n\nfn hello_message(\n    redirect_uri: &reqwest::Url,\n    session_id: &str,\n    duration: std::time::Duration,\n) -> String {\n    let formatted_duration = humantime::format_duration(duration).to_string();\n    format!(\n        concat![\n            \"Welcome to Neon!\\n\",\n            \"Authenticate by visiting (will expire in {duration}):\\n\",\n            \"    {redirect_uri}{session_id}\\n\\n\",\n        ],\n        duration = formatted_duration,\n        redirect_uri = redirect_uri,\n        session_id = session_id,\n    )\n}\n\npub(crate) fn new_psql_session_id() -> String {\n    hex::encode(rand::random::<[u8; 8]>())\n}\n\nimpl ConsoleRedirectBackend {\n    pub fn new(console_uri: reqwest::Url, api: cplane_proxy_v1::NeonControlPlaneClient) -> Self {\n        Self { console_uri, api }\n    }\n\n    pub(crate) fn get_api(&self) -> &cplane_proxy_v1::NeonControlPlaneClient {\n        &self.api\n    }\n\n    pub(crate) async fn authenticate(\n        &self,\n        ctx: &RequestContext,\n        auth_config: &'static AuthenticationConfig,\n        client: &mut PqStream<impl AsyncRead + AsyncWrite + Unpin>,\n    ) -> auth::Result<(ConsoleRedirectNodeInfo, AuthInfo, ComputeUserInfo)> {\n        authenticate(ctx, auth_config, &self.console_uri, client)\n            .await\n            .map(|(node_info, auth_info, user_info)| {\n                (ConsoleRedirectNodeInfo(node_info), auth_info, user_info)\n            })\n    }\n}\n\npub struct ConsoleRedirectNodeInfo(pub(super) NodeInfo);\n\n#[async_trait]\nimpl WakeComputeBackend for ConsoleRedirectNodeInfo {\n    async fn wake_compute(\n        &self,\n        _ctx: &RequestContext,\n    ) -> Result<CachedNodeInfo, control_plane::errors::WakeComputeError> {\n        Ok(Cached::new_uncached(self.0.clone()))\n    }\n}\n\nasync fn authenticate(\n    ctx: &RequestContext,\n    auth_config: &'static AuthenticationConfig,\n    link_uri: &reqwest::Url,\n    client: &mut PqStream<impl AsyncRead + AsyncWrite + Unpin>,\n) -> auth::Result<(NodeInfo, AuthInfo, ComputeUserInfo)> {\n    ctx.set_auth_method(crate::context::AuthMethod::ConsoleRedirect);\n\n    // registering waiter can fail if we get unlucky with rng.\n    // just try again.\n    let (psql_session_id, waiter) = loop {\n        let psql_session_id = new_psql_session_id();\n\n        if let Ok(waiter) = control_plane::mgmt::get_waiter(&psql_session_id) {\n            break (psql_session_id, waiter);\n        }\n    };\n\n    let span = info_span!(\"console_redirect\", psql_session_id = &psql_session_id);\n    let greeting = hello_message(\n        link_uri,\n        &psql_session_id,\n        auth_config.console_redirect_confirmation_timeout,\n    );\n\n    // Give user a URL to spawn a new database.\n    info!(parent: &span, \"sending the auth URL to the user\");\n    client.write_message(BeMessage::AuthenticationOk);\n    client.write_message(BeMessage::ParameterStatus {\n        name: b\"client_encoding\",\n        value: b\"UTF8\",\n    });\n    client.write_message(BeMessage::NoticeResponse(&greeting));\n    client.flush().await?;\n\n    // Wait for console response via control plane (see `mgmt`).\n    info!(parent: &span, \"waiting for console's reply...\");\n    let db_info = tokio::time::timeout(auth_config.console_redirect_confirmation_timeout, waiter)\n        .await\n        .map_err(|_elapsed| {\n            auth::AuthError::confirmation_timeout(\n                auth_config.console_redirect_confirmation_timeout.into(),\n            )\n        })?\n        .map_err(ConsoleRedirectError::from)?;\n\n    if auth_config.ip_allowlist_check_enabled\n        && let Some(allowed_ips) = &db_info.allowed_ips\n        && !auth::check_peer_addr_is_in_list(&ctx.peer_addr(), allowed_ips)\n    {\n        return Err(auth::AuthError::ip_address_not_allowed(ctx.peer_addr()));\n    }\n\n    // Check if the access over the public internet is allowed, otherwise block. Note that\n    // the console redirect is not behind the VPC service endpoint, so we don't need to check\n    // the VPC endpoint ID.\n    if let Some(public_access_allowed) = db_info.public_access_allowed\n        && !public_access_allowed\n    {\n        return Err(auth::AuthError::NetworkNotAllowed);\n    }\n\n    // Backwards compatibility. pg_sni_proxy uses \"--\" in domain names\n    // while direct connections do not. Once we migrate to pg_sni_proxy\n    // everywhere, we can remove this.\n    let ssl_mode = if db_info.host.contains(\"--\") {\n        // we need TLS connection with SNI info to properly route it\n        SslMode::Require\n    } else {\n        SslMode::Disable\n    };\n\n    let conn_info = compute::ConnectInfo {\n        host: db_info.host.into(),\n        port: db_info.port,\n        ssl_mode,\n        host_addr: None,\n    };\n    let auth_info =\n        AuthInfo::for_console_redirect(&db_info.dbname, &db_info.user, db_info.password.as_deref());\n\n    let user: RoleName = db_info.user.into();\n    let user_info = ComputeUserInfo {\n        endpoint: db_info.aux.endpoint_id.as_str().into(),\n        user: user.clone(),\n        options: NeonOptions::default(),\n    };\n\n    ctx.set_dbname(db_info.dbname.into());\n    ctx.set_user(user);\n    ctx.set_project(db_info.aux.clone());\n    info!(\"woken up a compute node\");\n\n    Ok((\n        NodeInfo {\n            conn_info,\n            aux: db_info.aux,\n        },\n        auth_info,\n        user_info,\n    ))\n}\n"
  },
  {
    "path": "proxy/src/auth/backend/hacks.rs",
    "content": "use tokio::io::{AsyncRead, AsyncWrite};\nuse tracing::{debug, info};\n\nuse super::{ComputeCredentials, ComputeUserInfo, ComputeUserInfoNoEndpoint};\nuse crate::auth::{self, AuthFlow};\nuse crate::config::AuthenticationConfig;\nuse crate::context::RequestContext;\nuse crate::control_plane::AuthSecret;\nuse crate::intern::{EndpointIdInt, RoleNameInt};\nuse crate::sasl;\nuse crate::stream::{self, Stream};\n\n/// Compared to [SCRAM](crate::scram), cleartext password auth saves\n/// one round trip and *expensive* computations (>= 4096 HMAC iterations).\n/// These properties are benefical for serverless JS workers, so we\n/// use this mechanism for websocket connections.\npub(crate) async fn authenticate_cleartext(\n    ctx: &RequestContext,\n    info: ComputeUserInfo,\n    client: &mut stream::PqStream<Stream<impl AsyncRead + AsyncWrite + Unpin>>,\n    secret: AuthSecret,\n    config: &'static AuthenticationConfig,\n) -> auth::Result<ComputeCredentials> {\n    debug!(\"cleartext auth flow override is enabled, proceeding\");\n    ctx.set_auth_method(crate::context::AuthMethod::Cleartext);\n\n    let ep = EndpointIdInt::from(&info.endpoint);\n    let role = RoleNameInt::from(&info.user);\n\n    let auth_flow = AuthFlow::new(\n        client,\n        auth::CleartextPassword {\n            secret,\n            endpoint: ep,\n            role,\n            pool: config.scram_thread_pool.clone(),\n        },\n    );\n    let auth_outcome = {\n        // pause the timer while we communicate with the client\n        let _paused = ctx.latency_timer_pause(crate::metrics::Waiting::Client);\n\n        // cleartext auth is only allowed to the ws/http protocol.\n        // If we're here, we already received the password in the first message.\n        // Scram protocol will be executed on the proxy side.\n        auth_flow.authenticate().await?\n    };\n\n    let keys = match auth_outcome {\n        sasl::Outcome::Success(key) => key,\n        sasl::Outcome::Failure(reason) => {\n            info!(\"auth backend failed with an error: {reason}\");\n            return Err(auth::AuthError::password_failed(&*info.user));\n        }\n    };\n\n    Ok(ComputeCredentials { info, keys })\n}\n\n/// Workaround for clients which don't provide an endpoint (project) name.\n/// Similar to [`authenticate_cleartext`], but there's a specific password format,\n/// and passwords are not yet validated (we don't know how to validate them!)\npub(crate) async fn password_hack_no_authentication(\n    ctx: &RequestContext,\n    info: ComputeUserInfoNoEndpoint,\n    client: &mut stream::PqStream<Stream<impl AsyncRead + AsyncWrite + Unpin>>,\n) -> auth::Result<(ComputeUserInfo, Vec<u8>)> {\n    debug!(\"project not specified, resorting to the password hack auth flow\");\n    ctx.set_auth_method(crate::context::AuthMethod::Cleartext);\n\n    // pause the timer while we communicate with the client\n    let _paused = ctx.latency_timer_pause(crate::metrics::Waiting::Client);\n\n    let payload = AuthFlow::new(client, auth::PasswordHack)\n        .get_password()\n        .await?;\n\n    debug!(project = &*payload.endpoint, \"received missing parameter\");\n\n    // Report tentative success; compute node will check the password anyway.\n    Ok((\n        ComputeUserInfo {\n            user: info.user,\n            options: info.options,\n            endpoint: payload.endpoint,\n        },\n        payload.password,\n    ))\n}\n"
  },
  {
    "path": "proxy/src/auth/backend/jwt.rs",
    "content": "use std::borrow::Cow;\nuse std::future::Future;\nuse std::sync::Arc;\nuse std::time::{Duration, SystemTime};\n\nuse arc_swap::ArcSwapOption;\nuse base64::Engine as _;\nuse base64::prelude::BASE64_URL_SAFE_NO_PAD;\nuse clashmap::ClashMap;\nuse jose_jwk::crypto::KeyInfo;\nuse reqwest::{Client, redirect};\nuse reqwest_retry::RetryTransientMiddleware;\nuse reqwest_retry::policies::ExponentialBackoff;\nuse serde::de::Visitor;\nuse serde::{Deserialize, Deserializer};\nuse serde_json::value::RawValue;\nuse signature::Verifier;\nuse thiserror::Error;\nuse tokio::time::Instant;\n\nuse crate::auth::backend::ComputeCredentialKeys;\nuse crate::context::RequestContext;\nuse crate::control_plane::errors::GetEndpointJwksError;\nuse crate::http::read_body_with_limit;\nuse crate::intern::RoleNameInt;\nuse crate::types::{EndpointId, RoleName};\n\n// TODO(conrad): make these configurable.\nconst CLOCK_SKEW_LEEWAY: Duration = Duration::from_secs(30);\nconst MIN_RENEW: Duration = Duration::from_secs(30);\nconst AUTO_RENEW: Duration = Duration::from_secs(300);\nconst MAX_RENEW: Duration = Duration::from_secs(3600);\nconst MAX_JWK_BODY_SIZE: usize = 64 * 1024;\nconst JWKS_USER_AGENT: &str = \"neon-proxy\";\n\nconst JWKS_CONNECT_TIMEOUT: Duration = Duration::from_secs(2);\nconst JWKS_FETCH_TIMEOUT: Duration = Duration::from_secs(5);\nconst JWKS_FETCH_RETRIES: u32 = 3;\n\n/// How to get the JWT auth rules\npub(crate) trait FetchAuthRules: Clone + Send + Sync + 'static {\n    fn fetch_auth_rules(\n        &self,\n        ctx: &RequestContext,\n        endpoint: EndpointId,\n    ) -> impl Future<Output = Result<Vec<AuthRule>, FetchAuthRulesError>> + Send;\n}\n\n#[derive(Error, Debug)]\npub(crate) enum FetchAuthRulesError {\n    #[error(transparent)]\n    GetEndpointJwks(#[from] GetEndpointJwksError),\n\n    #[error(\"JWKs settings for this role were not configured\")]\n    RoleJwksNotConfigured,\n}\n\n#[derive(Clone)]\npub(crate) struct AuthRule {\n    pub(crate) id: String,\n    pub(crate) jwks_url: url::Url,\n    pub(crate) audience: Option<String>,\n    pub(crate) role_names: Vec<RoleNameInt>,\n}\n\npub struct JwkCache {\n    client: reqwest_middleware::ClientWithMiddleware,\n\n    map: ClashMap<(EndpointId, RoleName), Arc<JwkCacheEntryLock>>,\n}\n\npub(crate) struct JwkCacheEntry {\n    /// Should refetch at least every hour to verify when old keys have been removed.\n    /// Should refetch when new key IDs are seen only every 5 minutes or so\n    last_retrieved: Instant,\n\n    /// cplane will return multiple JWKs urls that we need to scrape.\n    key_sets: ahash::HashMap<String, KeySet>,\n}\n\nimpl JwkCacheEntry {\n    fn find_jwk_and_audience(\n        &self,\n        key_id: &str,\n        role_name: &RoleName,\n    ) -> Option<(&jose_jwk::Jwk, Option<&str>)> {\n        self.key_sets\n            .values()\n            // make sure our requested role has access to the key set\n            .filter(|key_set| key_set.role_names.iter().any(|role| **role == **role_name))\n            // try and find the requested key-id in the key set\n            .find_map(|key_set| {\n                key_set\n                    .find_key(key_id)\n                    .map(|jwk| (jwk, key_set.audience.as_deref()))\n            })\n    }\n}\n\nstruct KeySet {\n    jwks: jose_jwk::JwkSet,\n    audience: Option<String>,\n    role_names: Vec<RoleNameInt>,\n}\n\nimpl KeySet {\n    fn find_key(&self, key_id: &str) -> Option<&jose_jwk::Jwk> {\n        self.jwks\n            .keys\n            .iter()\n            .find(|jwk| jwk.prm.kid.as_deref() == Some(key_id))\n    }\n}\n\npub(crate) struct JwkCacheEntryLock {\n    cached: ArcSwapOption<JwkCacheEntry>,\n    lookup: tokio::sync::Semaphore,\n}\n\nimpl Default for JwkCacheEntryLock {\n    fn default() -> Self {\n        JwkCacheEntryLock {\n            cached: ArcSwapOption::empty(),\n            lookup: tokio::sync::Semaphore::new(1),\n        }\n    }\n}\n\n#[derive(Deserialize)]\nstruct JwkSet<'a> {\n    /// we parse into raw-value because not all keys in a JWKS are ones\n    /// we can parse directly, so we parse them lazily.\n    #[serde(borrow)]\n    keys: Vec<&'a RawValue>,\n}\n\n/// Given a jwks_url, fetch the JWKS and parse out all the signing JWKs.\n/// Returns `None` and log a warning if there are any errors.\nasync fn fetch_jwks(\n    client: &reqwest_middleware::ClientWithMiddleware,\n    jwks_url: url::Url,\n) -> Option<jose_jwk::JwkSet> {\n    let req = client.get(jwks_url.clone());\n    // TODO(conrad): We need to filter out URLs that point to local resources. Public internet only.\n    let resp = req.send().await.and_then(|r| {\n        r.error_for_status()\n            .map_err(reqwest_middleware::Error::Reqwest)\n    });\n\n    let resp = match resp {\n        Ok(r) => r,\n        // TODO: should we re-insert JWKs if we want to keep this JWKs URL?\n        // I expect these failures would be quite sparse.\n        Err(e) => {\n            tracing::warn!(url=?jwks_url, error=?e, \"could not fetch JWKs\");\n            return None;\n        }\n    };\n\n    let resp: http::Response<reqwest::Body> = resp.into();\n\n    let bytes = match read_body_with_limit(resp.into_body(), MAX_JWK_BODY_SIZE).await {\n        Ok(bytes) => bytes,\n        Err(e) => {\n            tracing::warn!(url=?jwks_url, error=?e, \"could not decode JWKs\");\n            return None;\n        }\n    };\n\n    let jwks = match serde_json::from_slice::<JwkSet>(&bytes) {\n        Ok(jwks) => jwks,\n        Err(e) => {\n            tracing::warn!(url=?jwks_url, error=?e, \"could not decode JWKs\");\n            return None;\n        }\n    };\n\n    // `jose_jwk::Jwk` is quite large (288 bytes). Let's not pre-allocate for what we don't need.\n    //\n    // Even though we limit our responses to 64KiB, we could still receive a payload like\n    // `{\"keys\":[` + repeat(`0`).take(30000).join(`,`) + `]}`. Parsing this as `RawValue` uses 468KiB.\n    // Pre-allocating the corresponding `Vec::<jose_jwk::Jwk>::with_capacity(30000)` uses 8.2MiB.\n    let mut keys = vec![];\n\n    let mut failed = 0;\n    for key in jwks.keys {\n        let key = match serde_json::from_str::<jose_jwk::Jwk>(key.get()) {\n            Ok(key) => key,\n            Err(e) => {\n                tracing::debug!(url=?jwks_url, failed=?e, \"could not decode JWK\");\n                failed += 1;\n                continue;\n            }\n        };\n\n        // if `use` (called `cls` in rust) is specified to be something other than signing,\n        // we can skip storing it.\n        if key\n            .prm\n            .cls\n            .as_ref()\n            .is_some_and(|c| *c != jose_jwk::Class::Signing)\n        {\n            continue;\n        }\n\n        keys.push(key);\n    }\n\n    keys.shrink_to_fit();\n\n    if failed > 0 {\n        tracing::warn!(url=?jwks_url, failed, \"could not decode JWKs\");\n    }\n\n    if keys.is_empty() {\n        tracing::warn!(url=?jwks_url, \"no valid JWKs found inside the response body\");\n        return None;\n    }\n\n    Some(jose_jwk::JwkSet { keys })\n}\n\nimpl JwkCacheEntryLock {\n    async fn acquire_permit(self: &Arc<Self>) -> JwkRenewalPermit<'_> {\n        JwkRenewalPermit::acquire_permit(self).await\n    }\n\n    fn try_acquire_permit(self: &Arc<Self>) -> Option<JwkRenewalPermit<'_>> {\n        JwkRenewalPermit::try_acquire_permit(self)\n    }\n\n    async fn renew_jwks<F: FetchAuthRules>(\n        &self,\n        _permit: JwkRenewalPermit<'_>,\n        ctx: &RequestContext,\n        client: &reqwest_middleware::ClientWithMiddleware,\n        endpoint: EndpointId,\n        auth_rules: &F,\n    ) -> Result<Arc<JwkCacheEntry>, JwtError> {\n        // double check that no one beat us to updating the cache.\n        let now = Instant::now();\n        let guard = self.cached.load_full();\n        if let Some(cached) = guard {\n            let last_update = now.duration_since(cached.last_retrieved);\n            if last_update < Duration::from_secs(300) {\n                return Ok(cached);\n            }\n        }\n\n        let rules = auth_rules.fetch_auth_rules(ctx, endpoint).await?;\n        let mut key_sets =\n            ahash::HashMap::with_capacity_and_hasher(rules.len(), ahash::RandomState::new());\n\n        // TODO(conrad): run concurrently\n        // TODO(conrad): strip the JWKs urls (should be checked by cplane as well - cloud#16284)\n        for rule in rules {\n            if let Some(jwks) = fetch_jwks(client, rule.jwks_url).await {\n                key_sets.insert(\n                    rule.id,\n                    KeySet {\n                        jwks,\n                        audience: rule.audience,\n                        role_names: rule.role_names,\n                    },\n                );\n            }\n        }\n\n        let entry = Arc::new(JwkCacheEntry {\n            last_retrieved: now,\n            key_sets,\n        });\n        self.cached.swap(Some(Arc::clone(&entry)));\n\n        Ok(entry)\n    }\n\n    async fn get_or_update_jwk_cache<F: FetchAuthRules>(\n        self: &Arc<Self>,\n        ctx: &RequestContext,\n        client: &reqwest_middleware::ClientWithMiddleware,\n        endpoint: EndpointId,\n        fetch: &F,\n    ) -> Result<Arc<JwkCacheEntry>, JwtError> {\n        let now = Instant::now();\n        let guard = self.cached.load_full();\n\n        // if we have no cached JWKs, try and get some\n        let Some(cached) = guard else {\n            let _paused = ctx.latency_timer_pause(crate::metrics::Waiting::Compute);\n            let permit = self.acquire_permit().await;\n            return self.renew_jwks(permit, ctx, client, endpoint, fetch).await;\n        };\n\n        let last_update = now.duration_since(cached.last_retrieved);\n\n        // check if the cached JWKs need updating.\n        if last_update > MAX_RENEW {\n            let _paused = ctx.latency_timer_pause(crate::metrics::Waiting::Compute);\n            let permit = self.acquire_permit().await;\n\n            // it's been too long since we checked the keys. wait for them to update.\n            return self.renew_jwks(permit, ctx, client, endpoint, fetch).await;\n        }\n\n        // every 5 minutes we should spawn a job to eagerly update the token.\n        if last_update > AUTO_RENEW {\n            if let Some(permit) = self.try_acquire_permit() {\n                tracing::debug!(\"JWKs should be renewed. Renewal permit acquired\");\n                let permit = permit.into_owned();\n                let entry = self.clone();\n                let client = client.clone();\n                let fetch = fetch.clone();\n                let ctx = ctx.clone();\n                tokio::spawn(async move {\n                    if let Err(e) = entry\n                        .renew_jwks(permit, &ctx, &client, endpoint, &fetch)\n                        .await\n                    {\n                        tracing::warn!(error=?e, \"could not fetch JWKs in background job\");\n                    }\n                });\n            } else {\n                tracing::debug!(\"JWKs should be renewed. Renewal permit already taken, skipping\");\n            }\n        }\n\n        Ok(cached)\n    }\n\n    async fn check_jwt<F: FetchAuthRules>(\n        self: &Arc<Self>,\n        ctx: &RequestContext,\n        jwt: &str,\n        client: &reqwest_middleware::ClientWithMiddleware,\n        endpoint: EndpointId,\n        role_name: &RoleName,\n        fetch: &F,\n    ) -> Result<ComputeCredentialKeys, JwtError> {\n        // JWT compact form is defined to be\n        // <B64(Header)> || . || <B64(Payload)> || . || <B64(Signature)>\n        // where Signature = alg(<B64(Header)> || . || <B64(Payload)>);\n\n        let (header_payload, signature) = jwt\n            .rsplit_once('.')\n            .ok_or(JwtEncodingError::InvalidCompactForm)?;\n        let (header, payload) = header_payload\n            .split_once('.')\n            .ok_or(JwtEncodingError::InvalidCompactForm)?;\n\n        let header = BASE64_URL_SAFE_NO_PAD.decode(header)?;\n        let header = serde_json::from_slice::<JwtHeader<'_>>(&header)?;\n\n        let payloadb = BASE64_URL_SAFE_NO_PAD.decode(payload)?;\n        let payload = serde_json::from_slice::<JwtPayload<'_>>(&payloadb)?;\n\n        if let Some(iss) = &payload.issuer {\n            ctx.set_jwt_issuer(iss.as_ref().to_owned());\n        }\n\n        let sig = BASE64_URL_SAFE_NO_PAD.decode(signature)?;\n\n        let kid = header.key_id.ok_or(JwtError::MissingKeyId)?;\n\n        let mut guard = self\n            .get_or_update_jwk_cache(ctx, client, endpoint.clone(), fetch)\n            .await?;\n\n        // get the key from the JWKs if possible. If not, wait for the keys to update.\n        let (jwk, expected_audience) = loop {\n            match guard.find_jwk_and_audience(&kid, role_name) {\n                Some(jwk) => break jwk,\n                None if guard.last_retrieved.elapsed() > MIN_RENEW => {\n                    let _paused = ctx.latency_timer_pause(crate::metrics::Waiting::Compute);\n\n                    let permit = self.acquire_permit().await;\n                    guard = self\n                        .renew_jwks(permit, ctx, client, endpoint.clone(), fetch)\n                        .await?;\n                }\n                _ => return Err(JwtError::JwkNotFound),\n            }\n        };\n\n        if !jwk.is_supported(&header.algorithm) {\n            return Err(JwtError::SignatureAlgorithmNotSupported);\n        }\n\n        match &jwk.key {\n            jose_jwk::Key::Ec(key) => {\n                verify_ec_signature(header_payload.as_bytes(), &sig, key)?;\n            }\n            jose_jwk::Key::Rsa(key) => {\n                verify_rsa_signature(header_payload.as_bytes(), &sig, key, &header.algorithm)?;\n            }\n            key => return Err(JwtError::UnsupportedKeyType(key.into())),\n        }\n\n        tracing::debug!(?payload, \"JWT signature valid with claims\");\n\n        if let Some(aud) = expected_audience\n            && payload.audience.0.iter().all(|s| s != aud)\n        {\n            return Err(JwtError::InvalidClaims(\n                JwtClaimsError::InvalidJwtTokenAudience,\n            ));\n        }\n\n        let now = SystemTime::now();\n\n        if let Some(exp) = payload.expiration\n            && now >= exp + CLOCK_SKEW_LEEWAY\n        {\n            return Err(JwtError::InvalidClaims(JwtClaimsError::JwtTokenHasExpired(\n                exp.duration_since(SystemTime::UNIX_EPOCH)\n                    .unwrap_or_default()\n                    .as_secs(),\n            )));\n        }\n\n        if let Some(nbf) = payload.not_before\n            && nbf >= now + CLOCK_SKEW_LEEWAY\n        {\n            return Err(JwtError::InvalidClaims(\n                JwtClaimsError::JwtTokenNotYetReadyToUse(\n                    nbf.duration_since(SystemTime::UNIX_EPOCH)\n                        .unwrap_or_default()\n                        .as_secs(),\n                ),\n            ));\n        }\n\n        Ok(ComputeCredentialKeys::JwtPayload(payloadb))\n    }\n}\n\nimpl JwkCache {\n    pub(crate) async fn check_jwt<F: FetchAuthRules>(\n        &self,\n        ctx: &RequestContext,\n        endpoint: EndpointId,\n        role_name: &RoleName,\n        fetch: &F,\n        jwt: &str,\n    ) -> Result<ComputeCredentialKeys, JwtError> {\n        // try with just a read lock first\n        let key = (endpoint.clone(), role_name.clone());\n        let entry = self.map.get(&key).as_deref().map(Arc::clone);\n        let entry = entry.unwrap_or_else(|| {\n            // acquire a write lock after to insert.\n            let entry = self.map.entry(key).or_default();\n            Arc::clone(&*entry)\n        });\n\n        entry\n            .check_jwt(ctx, jwt, &self.client, endpoint, role_name, fetch)\n            .await\n    }\n}\n\nimpl Default for JwkCache {\n    fn default() -> Self {\n        let client = Client::builder()\n            .user_agent(JWKS_USER_AGENT)\n            .redirect(redirect::Policy::none())\n            .tls_built_in_native_certs(true)\n            .connect_timeout(JWKS_CONNECT_TIMEOUT)\n            .timeout(JWKS_FETCH_TIMEOUT)\n            .build()\n            .expect(\"client config should be valid\");\n\n        // Retry up to 3 times with increasing intervals between attempts.\n        let retry_policy = ExponentialBackoff::builder().build_with_max_retries(JWKS_FETCH_RETRIES);\n\n        let client = reqwest_middleware::ClientBuilder::new(client)\n            .with(RetryTransientMiddleware::new_with_policy(retry_policy))\n            .build();\n\n        JwkCache {\n            client,\n            map: ClashMap::default(),\n        }\n    }\n}\n\nfn verify_ec_signature(data: &[u8], sig: &[u8], key: &jose_jwk::Ec) -> Result<(), JwtError> {\n    use ecdsa::Signature;\n    use signature::Verifier;\n\n    match key.crv {\n        jose_jwk::EcCurves::P256 => {\n            let pk = p256::PublicKey::try_from(key).map_err(JwtError::InvalidP256Key)?;\n            let key = p256::ecdsa::VerifyingKey::from(&pk);\n            let sig = Signature::from_slice(sig)?;\n            key.verify(data, &sig)?;\n        }\n        key => return Err(JwtError::UnsupportedEcKeyType(key)),\n    }\n\n    Ok(())\n}\n\nfn verify_rsa_signature(\n    data: &[u8],\n    sig: &[u8],\n    key: &jose_jwk::Rsa,\n    alg: &jose_jwa::Algorithm,\n) -> Result<(), JwtError> {\n    use jose_jwa::{Algorithm, Signing};\n    use rsa::RsaPublicKey;\n    use rsa::pkcs1v15::{Signature, VerifyingKey};\n\n    let key = RsaPublicKey::try_from(key).map_err(JwtError::InvalidRsaKey)?;\n\n    match alg {\n        Algorithm::Signing(Signing::Rs256) => {\n            let key = VerifyingKey::<sha2::Sha256>::new(key);\n            let sig = Signature::try_from(sig)?;\n            key.verify(data, &sig)?;\n        }\n        _ => return Err(JwtError::InvalidRsaSigningAlgorithm),\n    }\n\n    Ok(())\n}\n\n/// <https://datatracker.ietf.org/doc/html/rfc7515#section-4.1>\n#[derive(serde::Deserialize, serde::Serialize)]\nstruct JwtHeader<'a> {\n    /// must be a supported alg\n    #[serde(rename = \"alg\")]\n    algorithm: jose_jwa::Algorithm,\n    /// key id, must be provided for our usecase\n    #[serde(rename = \"kid\", borrow)]\n    key_id: Option<Cow<'a, str>>,\n}\n\n/// <https://datatracker.ietf.org/doc/html/rfc7519#section-4.1>\n#[derive(serde::Deserialize, Debug)]\n#[allow(dead_code)]\nstruct JwtPayload<'a> {\n    /// Audience - Recipient for which the JWT is intended\n    #[serde(rename = \"aud\", default)]\n    audience: OneOrMany,\n    /// Expiration - Time after which the JWT expires\n    #[serde(rename = \"exp\", deserialize_with = \"numeric_date_opt\", default)]\n    expiration: Option<SystemTime>,\n    /// Not before - Time before which the JWT is not valid\n    #[serde(rename = \"nbf\", deserialize_with = \"numeric_date_opt\", default)]\n    not_before: Option<SystemTime>,\n\n    // the following entries are only extracted for the sake of debug logging.\n    /// Issuer of the JWT\n    #[serde(rename = \"iss\", borrow)]\n    issuer: Option<Cow<'a, str>>,\n    /// Subject of the JWT (the user)\n    #[serde(rename = \"sub\", borrow)]\n    subject: Option<Cow<'a, str>>,\n    /// Unique token identifier\n    #[serde(rename = \"jti\", borrow)]\n    jwt_id: Option<Cow<'a, str>>,\n    /// Unique session identifier\n    #[serde(rename = \"sid\", borrow)]\n    session_id: Option<Cow<'a, str>>,\n}\n\n/// `OneOrMany` supports parsing either a single item or an array of items.\n///\n/// Needed for <https://datatracker.ietf.org/doc/html/rfc7519#section-4.1.3>\n///\n/// > The \"aud\" (audience) claim identifies the recipients that the JWT is\n/// > intended for.  Each principal intended to process the JWT MUST\n/// > identify itself with a value in the audience claim.  If the principal\n/// > processing the claim does not identify itself with a value in the\n/// > \"aud\" claim when this claim is present, then the JWT MUST be\n/// > rejected.  In the general case, the \"aud\" value is **an array of case-\n/// > sensitive strings**, each containing a StringOrURI value.  In the\n/// > special case when the JWT has one audience, the \"aud\" value MAY be a\n/// > **single case-sensitive string** containing a StringOrURI value.  The\n/// > interpretation of audience values is generally application specific.\n/// > Use of this claim is OPTIONAL.\n#[derive(Default, Debug)]\nstruct OneOrMany(Vec<String>);\n\nimpl<'de> Deserialize<'de> for OneOrMany {\n    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>\n    where\n        D: Deserializer<'de>,\n    {\n        struct OneOrManyVisitor;\n        impl<'de> Visitor<'de> for OneOrManyVisitor {\n            type Value = OneOrMany;\n\n            fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {\n                formatter.write_str(\"a single string or an array of strings\")\n            }\n\n            fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>\n            where\n                E: serde::de::Error,\n            {\n                Ok(OneOrMany(vec![v.to_owned()]))\n            }\n\n            fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error>\n            where\n                A: serde::de::SeqAccess<'de>,\n            {\n                let mut v = vec![];\n                while let Some(s) = seq.next_element()? {\n                    v.push(s);\n                }\n                Ok(OneOrMany(v))\n            }\n        }\n        deserializer.deserialize_any(OneOrManyVisitor)\n    }\n}\n\nfn numeric_date_opt<'de, D: Deserializer<'de>>(d: D) -> Result<Option<SystemTime>, D::Error> {\n    <Option<u64>>::deserialize(d)?\n        .map(|t| {\n            SystemTime::UNIX_EPOCH\n                .checked_add(Duration::from_secs(t))\n                .ok_or_else(|| {\n                    serde::de::Error::custom(format_args!(\"timestamp out of bounds: {t}\"))\n                })\n        })\n        .transpose()\n}\n\nstruct JwkRenewalPermit<'a> {\n    inner: Option<JwkRenewalPermitInner<'a>>,\n}\n\nenum JwkRenewalPermitInner<'a> {\n    Owned(Arc<JwkCacheEntryLock>),\n    Borrowed(&'a Arc<JwkCacheEntryLock>),\n}\n\nimpl JwkRenewalPermit<'_> {\n    fn into_owned(mut self) -> JwkRenewalPermit<'static> {\n        JwkRenewalPermit {\n            inner: self.inner.take().map(JwkRenewalPermitInner::into_owned),\n        }\n    }\n\n    async fn acquire_permit(from: &Arc<JwkCacheEntryLock>) -> JwkRenewalPermit<'_> {\n        match from.lookup.acquire().await {\n            Ok(permit) => {\n                permit.forget();\n                JwkRenewalPermit {\n                    inner: Some(JwkRenewalPermitInner::Borrowed(from)),\n                }\n            }\n            Err(_) => panic!(\"semaphore should not be closed\"),\n        }\n    }\n\n    fn try_acquire_permit(from: &Arc<JwkCacheEntryLock>) -> Option<JwkRenewalPermit<'_>> {\n        match from.lookup.try_acquire() {\n            Ok(permit) => {\n                permit.forget();\n                Some(JwkRenewalPermit {\n                    inner: Some(JwkRenewalPermitInner::Borrowed(from)),\n                })\n            }\n            Err(tokio::sync::TryAcquireError::NoPermits) => None,\n            Err(tokio::sync::TryAcquireError::Closed) => panic!(\"semaphore should not be closed\"),\n        }\n    }\n}\n\nimpl JwkRenewalPermitInner<'_> {\n    fn into_owned(self) -> JwkRenewalPermitInner<'static> {\n        match self {\n            JwkRenewalPermitInner::Owned(p) => JwkRenewalPermitInner::Owned(p),\n            JwkRenewalPermitInner::Borrowed(p) => JwkRenewalPermitInner::Owned(Arc::clone(p)),\n        }\n    }\n}\n\nimpl Drop for JwkRenewalPermit<'_> {\n    fn drop(&mut self) {\n        let entry = match &self.inner {\n            None => return,\n            Some(JwkRenewalPermitInner::Owned(p)) => p,\n            Some(JwkRenewalPermitInner::Borrowed(p)) => *p,\n        };\n        entry.lookup.add_permits(1);\n    }\n}\n\n#[derive(Error, Debug)]\n#[non_exhaustive]\npub(crate) enum JwtError {\n    #[error(\"jwk not found\")]\n    JwkNotFound,\n\n    #[error(\"missing key id\")]\n    MissingKeyId,\n\n    #[error(\"Provided authentication token is not a valid JWT encoding\")]\n    JwtEncoding(#[from] JwtEncodingError),\n\n    #[error(transparent)]\n    InvalidClaims(#[from] JwtClaimsError),\n\n    #[error(\"invalid P256 key\")]\n    InvalidP256Key(jose_jwk::crypto::Error),\n\n    #[error(\"invalid RSA key\")]\n    InvalidRsaKey(jose_jwk::crypto::Error),\n\n    #[error(\"invalid RSA signing algorithm\")]\n    InvalidRsaSigningAlgorithm,\n\n    #[error(\"unsupported EC key type {0:?}\")]\n    UnsupportedEcKeyType(jose_jwk::EcCurves),\n\n    #[error(\"unsupported key type {0:?}\")]\n    UnsupportedKeyType(KeyType),\n\n    #[error(\"signature algorithm not supported\")]\n    SignatureAlgorithmNotSupported,\n\n    #[error(\"signature error: {0}\")]\n    Signature(#[from] signature::Error),\n\n    #[error(\"failed to fetch auth rules: {0}\")]\n    FetchAuthRules(#[from] FetchAuthRulesError),\n}\n\nimpl From<base64::DecodeError> for JwtError {\n    fn from(err: base64::DecodeError) -> Self {\n        JwtEncodingError::Base64Decode(err).into()\n    }\n}\n\nimpl From<serde_json::Error> for JwtError {\n    fn from(err: serde_json::Error) -> Self {\n        JwtEncodingError::SerdeJson(err).into()\n    }\n}\n\n#[derive(Error, Debug)]\n#[non_exhaustive]\npub enum JwtEncodingError {\n    #[error(transparent)]\n    Base64Decode(#[from] base64::DecodeError),\n\n    #[error(transparent)]\n    SerdeJson(#[from] serde_json::Error),\n\n    #[error(\"invalid compact form\")]\n    InvalidCompactForm,\n}\n\n#[derive(Error, Debug, PartialEq)]\n#[non_exhaustive]\npub enum JwtClaimsError {\n    #[error(\"invalid JWT token audience\")]\n    InvalidJwtTokenAudience,\n\n    #[error(\"JWT token has expired (exp={0})\")]\n    JwtTokenHasExpired(u64),\n\n    #[error(\"JWT token is not yet ready to use (nbf={0})\")]\n    JwtTokenNotYetReadyToUse(u64),\n}\n\n#[allow(dead_code, reason = \"Debug use only\")]\n#[derive(Debug)]\npub(crate) enum KeyType {\n    Ec(jose_jwk::EcCurves),\n    Rsa,\n    Oct,\n    Okp(jose_jwk::OkpCurves),\n    Unknown,\n}\n\nimpl From<&jose_jwk::Key> for KeyType {\n    fn from(key: &jose_jwk::Key) -> Self {\n        match key {\n            jose_jwk::Key::Ec(ec) => Self::Ec(ec.crv),\n            jose_jwk::Key::Rsa(_rsa) => Self::Rsa,\n            jose_jwk::Key::Oct(_oct) => Self::Oct,\n            jose_jwk::Key::Okp(okp) => Self::Okp(okp.crv),\n            _ => Self::Unknown,\n        }\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use std::future::IntoFuture;\n    use std::net::SocketAddr;\n    use std::time::SystemTime;\n\n    use bytes::Bytes;\n    use http::Response;\n    use http_body_util::Full;\n    use hyper::service::service_fn;\n    use hyper_util::rt::TokioIo;\n    use rand_core::OsRng;\n    use rsa::pkcs8::DecodePrivateKey;\n    use serde::Serialize;\n    use serde_json::json;\n    use signature::Signer;\n    use tokio::net::TcpListener;\n\n    use super::*;\n    use crate::types::RoleName;\n\n    fn new_ec_jwk(kid: String) -> (p256::SecretKey, jose_jwk::Jwk) {\n        let sk = p256::SecretKey::random(&mut OsRng);\n        let pk = sk.public_key().into();\n        let jwk = jose_jwk::Jwk {\n            key: jose_jwk::Key::Ec(pk),\n            prm: jose_jwk::Parameters {\n                kid: Some(kid),\n                alg: Some(jose_jwa::Algorithm::Signing(jose_jwa::Signing::Es256)),\n                ..Default::default()\n            },\n        };\n        (sk, jwk)\n    }\n\n    fn new_rsa_jwk(key: &str, kid: String) -> (rsa::RsaPrivateKey, jose_jwk::Jwk) {\n        let sk = rsa::RsaPrivateKey::from_pkcs8_pem(key).unwrap();\n        let pk = sk.to_public_key().into();\n        let jwk = jose_jwk::Jwk {\n            key: jose_jwk::Key::Rsa(pk),\n            prm: jose_jwk::Parameters {\n                kid: Some(kid),\n                alg: Some(jose_jwa::Algorithm::Signing(jose_jwa::Signing::Rs256)),\n                ..Default::default()\n            },\n        };\n        (sk, jwk)\n    }\n\n    fn now() -> u64 {\n        SystemTime::now()\n            .duration_since(SystemTime::UNIX_EPOCH)\n            .unwrap()\n            .as_secs()\n    }\n\n    fn build_jwt_payload(kid: String, sig: jose_jwa::Signing) -> String {\n        let now = now();\n        let body = typed_json::json! {{\n            \"exp\": now + 3600,\n            \"nbf\": now,\n            \"aud\": [\"audience1\", \"neon\", \"audience2\"],\n            \"sub\": \"user1\",\n            \"sid\": \"session1\",\n            \"jti\": \"token1\",\n            \"iss\": \"neon-testing\",\n        }};\n        build_custom_jwt_payload(kid, body, sig)\n    }\n\n    fn build_custom_jwt_payload(\n        kid: String,\n        body: impl Serialize,\n        sig: jose_jwa::Signing,\n    ) -> String {\n        let header = JwtHeader {\n            algorithm: jose_jwa::Algorithm::Signing(sig),\n            key_id: Some(Cow::Owned(kid)),\n        };\n\n        let header = BASE64_URL_SAFE_NO_PAD.encode(serde_json::to_string(&header).unwrap());\n        let body = BASE64_URL_SAFE_NO_PAD.encode(serde_json::to_string(&body).unwrap());\n\n        format!(\"{header}.{body}\")\n    }\n\n    fn new_ec_jwt(kid: String, key: &p256::SecretKey) -> String {\n        use p256::ecdsa::{Signature, SigningKey};\n\n        let payload = build_jwt_payload(kid, jose_jwa::Signing::Es256);\n        let sig: Signature = SigningKey::from(key).sign(payload.as_bytes());\n        let sig = BASE64_URL_SAFE_NO_PAD.encode(sig.to_bytes());\n\n        format!(\"{payload}.{sig}\")\n    }\n\n    fn new_custom_ec_jwt(kid: String, key: &p256::SecretKey, body: impl Serialize) -> String {\n        use p256::ecdsa::{Signature, SigningKey};\n\n        let payload = build_custom_jwt_payload(kid, body, jose_jwa::Signing::Es256);\n        let sig: Signature = SigningKey::from(key).sign(payload.as_bytes());\n        let sig = BASE64_URL_SAFE_NO_PAD.encode(sig.to_bytes());\n\n        format!(\"{payload}.{sig}\")\n    }\n\n    fn new_rsa_jwt(kid: String, key: rsa::RsaPrivateKey) -> String {\n        use rsa::pkcs1v15::SigningKey;\n        use rsa::signature::SignatureEncoding;\n\n        let payload = build_jwt_payload(kid, jose_jwa::Signing::Rs256);\n        let sig = SigningKey::<sha2::Sha256>::new(key).sign(payload.as_bytes());\n        let sig = BASE64_URL_SAFE_NO_PAD.encode(sig.to_bytes());\n\n        format!(\"{payload}.{sig}\")\n    }\n\n    // RSA key gen is slow....\n    const RS1: &str = \"-----BEGIN PRIVATE KEY-----\nMIIEvwIBADANBgkqhkiG9w0BAQEFAASCBKkwggSlAgEAAoIBAQDNuWBIWTlo+54Y\naifpGInIrpv6LlsbI/2/2CC81Arlx4RsABORklgA9XSGwaCbHTshHsfd1S916JwA\nSpjyPQYWfqo6iAV8a4MhjIeJIkRr74prDCSzOGZvIc6VaGeCIb9clf3HSrPHm3hA\ncfLMB8/p5MgoxERPDOIn3XYoS9SEEuP7l0LkmEZMerg6W6lDjQRDny0Lb50Jky9X\nmDqnYXBhs99ranbwL5vjy0ba6OIeCWFJme5u+rv5C/P0BOYrJfGxIcEoKa8Ukw5s\nPlM+qrz9ope1eOuXMNNdyFDReNBUyaM1AwBAayU5rz57crer7K/UIofaJ42T4cMM\nnx/SWfBNAgMBAAECggEACqdpBxYn1PoC6/zDaFzu9celKEWyTiuE/qRwvZa1ocS9\nZOJ0IPvVNud/S2NHsADJiSOQ8joSJScQvSsf1Ju4bv3MTw+wSQtAVUJz2nQ92uEi\n5/xPAkEPfP3hNvebNLAOuvrBk8qYmOPCTIQaMNrOt6wzeXkAmJ9wLuRXNCsJLHW+\nKLpf2WdgTYxqK06ZiJERFgJ2r1MsC2IgTydzjOAdEIrtMarerTLqqCpwFrk/l0cz\n1O2OAb17ZxmhuzMhjNMin81c8F2fZAGMeOjn92Jl5kUsYw/pG+0S8QKlbveR/fdP\nWe2tJsgXw2zD0q7OJpp8NXS2yddrZGyysYsof983wQKBgQD2McqNJqo+eWL5zony\nUbL19loYw0M15EjhzIuzW1Jk0rPj65yQyzpJ6pqicRuWr34MvzCx+ZHM2b3jSiNu\nGES2fnC7xLIKyeRxfqsXF71xz+6UStEGRQX27r1YWEtyQVuBhvlqB+AGWP3PYAC+\nHecZecnZ+vcihJ2K3+l5O3paVQKBgQDV6vKH5h2SY9vgO8obx0P7XSS+djHhmPuU\nf8C/Fq6AuRbIA1g04pzuLU2WS9T26eIjgM173uVNg2TuqJveWzz+CAAp6nCR6l24\nDBg49lMGCWrMo4FqPG46QkUqvK8uSj42GkX/e5Rut1Gyu0209emeM6h2d2K15SvY\n9563tYSmGQKBgQDwcH5WTi20KA7e07TroJi8GKWzS3gneNUpGQBS4VxdtV4UuXXF\n/4TkzafJ/9cm2iurvUmMd6XKP9lw0mY5zp/E70WgTCBp4vUlVsU3H2tYbO+filYL\n3ntNx6nKTykX4/a/UJfj0t8as+zli+gNxNx/h+734V9dKdFG4Rl+2fTLpQKBgQCE\nqJkTEe+Q0wCOBEYICADupwqcWqwAXWDW7IrZdfVtulqYWwqecVIkmk+dPxWosc4d\nekjz4nyNH0i+gC15LVebqdaAJ/T7aD4KXuW+nXNLMRfcJCGjgipRUruWD0EMEdqW\nrqBuGXMpXeH6VxGPgVkJVLvKC6tZZe9VM+pnvteuMQKBgQC8GaL+Lz+al4biyZBf\nJE8ekWrIotq/gfUBLP7x70+PB9bNtXtlgmTvjgYg4jiu3KR/ZIYYQ8vfVgkb6tDI\nrWGZw86Pzuoi1ppg/pYhKk9qrmCIT4HPEXbHl7ATahu2BOCIU3hybjTh2lB6LbX9\n8LMFlz1QPqSZYN/A/kOcLBfa3A==\n-----END PRIVATE KEY-----\n\";\n    const RS2: &str = \"-----BEGIN PRIVATE KEY-----\nMIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQDipm6FIKSRab3J\nHwmK18t7hp+pohllxIDUSPi7S5mIhN/JG2Plq2Lp746E/fuT8dcBF2R4sJlG2L0J\nzmxOvBU/i/sQF9s1i4CEfg05k2//gKENIEsF3pMMmrH+mcZi0TTD6rezHpdVxPHk\nqWxSyOCtIJV29X+wxPwAB59kQFHzy2ooPB1isZcpE8tO0KthAM+oZ3KuCwE0++cO\nIWLeq9aPwyKhtip/xjTMxd1kzdKh592mGSyzr9D0QSWOYFGvgJXANDdiPdhSSOLt\nECWPNPlm2FQvGGvYYBafUqz7VumKHE6x8J6lKdYa2J0ZdDzCIo2IHzlxe+RZNgwy\nuAD2jhVxAgMBAAECggEAbsZHWBu3MzcKQiVARbLoygvnN0J5xUqAaMDtiKUPejDv\nK1yOu67DXnDuKEP2VL2rhuYG/hHaKE1AP227c9PrUq6424m9YvM2sgrlrdFIuQkG\nLeMtp8W7+zoUasp/ssZrUqICfLIj5xCl5UuFHQT/Ar7dLlIYwa3VOLKBDb9+Dnfe\nQH5/So4uMXG6vw34JN9jf+eAc8Yt0PeIz62ycvRwdpTJQ0MxZN9ZKpCAQp+VTuXT\nzlzNvDMilabEdqUvAyGyz8lBLNl0wdaVrqPqAEWM5U45QXsdFZknWammP7/tijeX\n0z+Bi0J0uSEU5X502zm7GArj/NNIiWMcjmDjwUUhwQKBgQD9C2GoqxOxuVPYqwYR\n+Jz7f2qMjlSP8adA5Lzuh8UKXDp8JCEQC8ryweLzaOKS9C5MAw+W4W2wd4nJoQI1\nP1dgGvBlfvEeRHMgqWtq7FuTsjSe7e0uSEkC4ngDb4sc0QOpv15cMuEz+4+aFLPL\nx29EcHWAaBX+rkid3zpQHFU4eQKBgQDlTCEqRuXwwa3V+Sq+mNWzD9QIGtD87TH/\nFPO/Ij/cK2+GISgFDqhetiGTH4qrvPL0psPT+iH5zGFYcoFmTtwLdWQJdxhxz0bg\niX/AceyX5e1Bm+ThT36sU83NrxKPkrdk6jNmr2iUF1OTzTwUKOYdHOPZqdMPfF4M\n4XAaWVT2uQKBgQD4nKcNdU+7LE9Rr+4d1/o8Klp/0BMK/ayK2HE7lc8kt6qKb2DA\niCWUTqPw7Fq3cQrPia5WWhNP7pJEtFkcAaiR9sW7onW5fBz0uR+dhK0QtmR2xWJj\nN4fsOp8ZGQ0/eae0rh1CTobucLkM9EwV6VLLlgYL67e4anlUCo8bSEr+WQKBgQCB\nuf6RgqcY/RqyklPCnYlZ0zyskS9nyXKd1GbK3j+u+swP4LZZlh9f5j88k33LCA2U\nqLzmMwAB6cWxWqcnELqhqPq9+ClWSmTZKDGk2U936NfAZMirSGRsbsVi9wfTPriP\nWYlXMSpDjqb0WgsBhNob4npubQxCGKTFOM5Jufy90QKBgB0Lte1jX144uaXx6dtB\nrjXNuWNir0Jy31wHnQuCA+XnfUgPcrKmRLm8taMbXgZwxkNvgFkpUWU8aPEK08Ne\nX0n5X2/pBLJzxZc62ccvZYVnctBiFs6HbSnxpuMQCfkt/BcR/ttIepBQQIW86wHL\n5JiconnI5aLek0QVPoFaVXFa\n-----END PRIVATE KEY-----\n\";\n\n    #[derive(Clone)]\n    struct Fetch(Vec<AuthRule>);\n\n    impl FetchAuthRules for Fetch {\n        async fn fetch_auth_rules(\n            &self,\n            _ctx: &RequestContext,\n            _endpoint: EndpointId,\n        ) -> Result<Vec<AuthRule>, FetchAuthRulesError> {\n            Ok(self.0.clone())\n        }\n    }\n\n    async fn jwks_server(\n        router: impl for<'a> Fn(&'a str) -> Option<Vec<u8>> + Send + Sync + 'static,\n    ) -> SocketAddr {\n        let router = Arc::new(router);\n        let service = service_fn(move |req| {\n            let router = Arc::clone(&router);\n            async move {\n                match router(req.uri().path()) {\n                    Some(body) => Response::builder()\n                        .status(200)\n                        .body(Full::new(Bytes::from(body))),\n                    None => Response::builder()\n                        .status(404)\n                        .body(Full::new(Bytes::new())),\n                }\n            }\n        });\n\n        let listener = TcpListener::bind(\"0.0.0.0:0\").await.unwrap();\n        let server = hyper::server::conn::http1::Builder::new();\n        let addr = listener.local_addr().unwrap();\n        tokio::spawn(async move {\n            loop {\n                let (s, _) = listener.accept().await.unwrap();\n                let serve = server.serve_connection(TokioIo::new(s), service.clone());\n                tokio::spawn(serve.into_future());\n            }\n        });\n\n        addr\n    }\n\n    #[tokio::test]\n    async fn check_jwt_happy_path() {\n        let (rs1, jwk1) = new_rsa_jwk(RS1, \"rs1\".into());\n        let (rs2, jwk2) = new_rsa_jwk(RS2, \"rs2\".into());\n        let (ec1, jwk3) = new_ec_jwk(\"ec1\".into());\n        let (ec2, jwk4) = new_ec_jwk(\"ec2\".into());\n\n        let foo_jwks = jose_jwk::JwkSet {\n            keys: vec![jwk1, jwk3],\n        };\n        let bar_jwks = jose_jwk::JwkSet {\n            keys: vec![jwk2, jwk4],\n        };\n\n        let jwks_addr = jwks_server(move |path| match path {\n            \"/foo\" => Some(serde_json::to_vec(&foo_jwks).unwrap()),\n            \"/bar\" => Some(serde_json::to_vec(&bar_jwks).unwrap()),\n            _ => None,\n        })\n        .await;\n\n        let role_name1 = RoleName::from(\"anonymous\");\n        let role_name2 = RoleName::from(\"authenticated\");\n\n        let roles = vec![\n            RoleNameInt::from(&role_name1),\n            RoleNameInt::from(&role_name2),\n        ];\n        let rules = vec![\n            AuthRule {\n                id: \"foo\".to_owned(),\n                jwks_url: format!(\"http://{jwks_addr}/foo\").parse().unwrap(),\n                audience: None,\n                role_names: roles.clone(),\n            },\n            AuthRule {\n                id: \"bar\".to_owned(),\n                jwks_url: format!(\"http://{jwks_addr}/bar\").parse().unwrap(),\n                audience: None,\n                role_names: roles.clone(),\n            },\n        ];\n\n        let fetch = Fetch(rules);\n        let jwk_cache = JwkCache::default();\n\n        let endpoint = EndpointId::from(\"ep\");\n\n        let jwt1 = new_rsa_jwt(\"rs1\".into(), rs1);\n        let jwt2 = new_rsa_jwt(\"rs2\".into(), rs2);\n        let jwt3 = new_ec_jwt(\"ec1\".into(), &ec1);\n        let jwt4 = new_ec_jwt(\"ec2\".into(), &ec2);\n\n        let tokens = [jwt1, jwt2, jwt3, jwt4];\n        let role_names = [role_name1, role_name2];\n        for role in &role_names {\n            for token in &tokens {\n                jwk_cache\n                    .check_jwt(\n                        &RequestContext::test(),\n                        endpoint.clone(),\n                        role,\n                        &fetch,\n                        token,\n                    )\n                    .await\n                    .unwrap();\n            }\n        }\n    }\n\n    /// AWS Cognito escapes the `/` in the URL.\n    #[tokio::test]\n    async fn check_jwt_regression_cognito_issuer() {\n        let (key, jwk) = new_ec_jwk(\"key\".into());\n\n        let now = now();\n        let token = new_custom_ec_jwt(\n            \"key\".into(),\n            &key,\n            typed_json::json! {{\n                \"sub\": \"dd9a73fd-e785-4a13-aae1-e691ce43e89d\",\n                // cognito uses `\\/`. I cannot replicated that easily here as serde_json will refuse\n                // to write that escape character. instead I will make a bogus URL using `\\` instead.\n                \"iss\": \"https:\\\\\\\\cognito-idp.us-west-2.amazonaws.com\\\\us-west-2_abcdefgh\",\n                \"client_id\": \"abcdefghijklmnopqrstuvwxyz\",\n                \"origin_jti\": \"6759d132-3fe7-446e-9e90-2fe7e8017893\",\n                \"event_id\": \"ec9c36ab-b01d-46a0-94e4-87fde6767065\",\n                \"token_use\": \"access\",\n                \"scope\": \"aws.cognito.signin.user.admin\",\n                \"auth_time\":now,\n                \"exp\":now + 60,\n                \"iat\":now,\n                \"jti\": \"b241614b-0b93-4bdc-96db-0a3c7061d9c0\",\n                \"username\": \"dd9a73fd-e785-4a13-aae1-e691ce43e89d\",\n            }},\n        );\n\n        let jwks = jose_jwk::JwkSet { keys: vec![jwk] };\n\n        let jwks_addr = jwks_server(move |_path| Some(serde_json::to_vec(&jwks).unwrap())).await;\n\n        let role_name = RoleName::from(\"anonymous\");\n        let rules = vec![AuthRule {\n            id: \"aws-cognito\".to_owned(),\n            jwks_url: format!(\"http://{jwks_addr}/\").parse().unwrap(),\n            audience: None,\n            role_names: vec![RoleNameInt::from(&role_name)],\n        }];\n\n        let fetch = Fetch(rules);\n        let jwk_cache = JwkCache::default();\n\n        let endpoint = EndpointId::from(\"ep\");\n\n        jwk_cache\n            .check_jwt(\n                &RequestContext::test(),\n                endpoint.clone(),\n                &role_name,\n                &fetch,\n                &token,\n            )\n            .await\n            .unwrap();\n    }\n\n    #[tokio::test]\n    async fn check_jwt_invalid_signature() {\n        let (_, jwk) = new_ec_jwk(\"1\".into());\n        let (key, _) = new_ec_jwk(\"1\".into());\n\n        // has a matching kid, but signed by the wrong key\n        let bad_jwt = new_ec_jwt(\"1\".into(), &key);\n\n        let jwks = jose_jwk::JwkSet { keys: vec![jwk] };\n        let jwks_addr = jwks_server(move |path| match path {\n            \"/\" => Some(serde_json::to_vec(&jwks).unwrap()),\n            _ => None,\n        })\n        .await;\n\n        let role = RoleName::from(\"authenticated\");\n\n        let rules = vec![AuthRule {\n            id: String::new(),\n            jwks_url: format!(\"http://{jwks_addr}/\").parse().unwrap(),\n            audience: None,\n            role_names: vec![RoleNameInt::from(&role)],\n        }];\n\n        let fetch = Fetch(rules);\n        let jwk_cache = JwkCache::default();\n\n        let ep = EndpointId::from(\"ep\");\n\n        let ctx = RequestContext::test();\n        let err = jwk_cache\n            .check_jwt(&ctx, ep, &role, &fetch, &bad_jwt)\n            .await\n            .unwrap_err();\n        assert!(\n            matches!(err, JwtError::Signature(_)),\n            \"expected \\\"signature error\\\", got {err:?}\"\n        );\n    }\n\n    #[tokio::test]\n    async fn check_jwt_unknown_role() {\n        let (key, jwk) = new_rsa_jwk(RS1, \"1\".into());\n        let jwt = new_rsa_jwt(\"1\".into(), key);\n\n        let jwks = jose_jwk::JwkSet { keys: vec![jwk] };\n        let jwks_addr = jwks_server(move |path| match path {\n            \"/\" => Some(serde_json::to_vec(&jwks).unwrap()),\n            _ => None,\n        })\n        .await;\n\n        let role = RoleName::from(\"authenticated\");\n        let rules = vec![AuthRule {\n            id: String::new(),\n            jwks_url: format!(\"http://{jwks_addr}/\").parse().unwrap(),\n            audience: None,\n            role_names: vec![RoleNameInt::from(&role)],\n        }];\n\n        let fetch = Fetch(rules);\n        let jwk_cache = JwkCache::default();\n\n        let ep = EndpointId::from(\"ep\");\n\n        // this role_name is not accepted\n        let bad_role_name = RoleName::from(\"cloud_admin\");\n\n        let ctx = RequestContext::test();\n        let err = jwk_cache\n            .check_jwt(&ctx, ep, &bad_role_name, &fetch, &jwt)\n            .await\n            .unwrap_err();\n\n        assert!(\n            matches!(err, JwtError::JwkNotFound),\n            \"expected \\\"jwk not found\\\", got {err:?}\"\n        );\n    }\n\n    #[tokio::test]\n    async fn check_jwt_invalid_claims() {\n        let (key, jwk) = new_ec_jwk(\"1\".into());\n\n        let jwks = jose_jwk::JwkSet { keys: vec![jwk] };\n        let jwks_addr = jwks_server(move |path| match path {\n            \"/\" => Some(serde_json::to_vec(&jwks).unwrap()),\n            _ => None,\n        })\n        .await;\n\n        let now = SystemTime::now()\n            .duration_since(SystemTime::UNIX_EPOCH)\n            .unwrap()\n            .as_secs();\n\n        struct Test {\n            body: serde_json::Value,\n            error: JwtClaimsError,\n        }\n\n        let table = vec![\n            Test {\n                body: json! {{\n                    \"nbf\": now + 60,\n                    \"aud\": \"neon\",\n                }},\n                error: JwtClaimsError::JwtTokenNotYetReadyToUse(now + 60),\n            },\n            Test {\n                body: json! {{\n                    \"exp\": now - 60,\n                    \"aud\": [\"neon\"],\n                }},\n                error: JwtClaimsError::JwtTokenHasExpired(now - 60),\n            },\n            Test {\n                body: json! {{\n                }},\n                error: JwtClaimsError::InvalidJwtTokenAudience,\n            },\n            Test {\n                body: json! {{\n                    \"aud\": [],\n                }},\n                error: JwtClaimsError::InvalidJwtTokenAudience,\n            },\n            Test {\n                body: json! {{\n                    \"aud\": \"foo\",\n                }},\n                error: JwtClaimsError::InvalidJwtTokenAudience,\n            },\n            Test {\n                body: json! {{\n                    \"aud\": [\"foo\"],\n                }},\n                error: JwtClaimsError::InvalidJwtTokenAudience,\n            },\n            Test {\n                body: json! {{\n                    \"aud\": [\"foo\", \"bar\"],\n                }},\n                error: JwtClaimsError::InvalidJwtTokenAudience,\n            },\n        ];\n\n        let role = RoleName::from(\"authenticated\");\n\n        let rules = vec![AuthRule {\n            id: String::new(),\n            jwks_url: format!(\"http://{jwks_addr}/\").parse().unwrap(),\n            audience: Some(\"neon\".to_string()),\n            role_names: vec![RoleNameInt::from(&role)],\n        }];\n\n        let fetch = Fetch(rules);\n        let jwk_cache = JwkCache::default();\n\n        let ep = EndpointId::from(\"ep\");\n\n        let ctx = RequestContext::test();\n        for test in table {\n            let jwt = new_custom_ec_jwt(\"1\".into(), &key, test.body);\n\n            match jwk_cache\n                .check_jwt(&ctx, ep.clone(), &role, &fetch, &jwt)\n                .await\n            {\n                Err(JwtError::InvalidClaims(error)) if error == test.error => {}\n                Err(err) => {\n                    panic!(\"expected {:?}, got {err:?}\", test.error)\n                }\n                Ok(_payload) => {\n                    panic!(\"expected {:?}, got ok\", test.error)\n                }\n            }\n        }\n    }\n\n    #[tokio::test]\n    async fn check_jwk_keycloak_regression() {\n        let (rs, valid_jwk) = new_rsa_jwk(RS1, \"rs1\".into());\n        let valid_jwk = serde_json::to_value(valid_jwk).unwrap();\n\n        // This is valid, but we cannot parse it as we have no support for encryption JWKs, only signature based ones.\n        // This is taken directly from keycloak.\n        let invalid_jwk = serde_json::json! {\n            {\n                \"kid\": \"U-Jc9xRli84eNqRpYQoIPF-GNuRWV3ZvAIhziRW2sbQ\",\n                \"kty\": \"RSA\",\n                \"alg\": \"RSA-OAEP\",\n                \"use\": \"enc\",\n                \"n\": \"yypYWsEKmM_wWdcPnSGLSm5ytw1WG7P7EVkKSulcDRlrM6HWj3PR68YS8LySYM2D9Z-79oAdZGKhIfzutqL8rK1vS14zDuPpAM-RWY3JuQfm1O_-1DZM8-07PmVRegP5KPxsKblLf_My8ByH6sUOIa1p2rbe2q_b0dSTXYu1t0dW-cGL5VShc400YymvTwpc-5uYNsaVxZajnB7JP1OunOiuCJ48AuVp3PqsLzgoXqlXEB1ZZdch3xT3bxaTtNruGvG4xmLZY68O_T3yrwTCNH2h_jFdGPyXdyZToCMSMK2qSbytlfwfN55pT9Vv42Lz1YmoB7XRjI9aExKPc5AxFw\",\n                \"e\": \"AQAB\",\n                \"x5c\": [\n                    \"MIICmzCCAYMCBgGS41E6azANBgkqhkiG9w0BAQsFADARMQ8wDQYDVQQDDAZtYXN0ZXIwHhcNMjQxMDMxMTYwMTQ0WhcNMzQxMDMxMTYwMzI0WjARMQ8wDQYDVQQDDAZtYXN0ZXIwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDLKlhawQqYz/BZ1w+dIYtKbnK3DVYbs/sRWQpK6VwNGWszodaPc9HrxhLwvJJgzYP1n7v2gB1kYqEh/O62ovysrW9LXjMO4+kAz5FZjcm5B+bU7/7UNkzz7Ts+ZVF6A/ko/GwpuUt/8zLwHIfqxQ4hrWnatt7ar9vR1JNdi7W3R1b5wYvlVKFzjTRjKa9PClz7m5g2xpXFlqOcHsk/U66c6K4InjwC5Wnc+qwvOCheqVcQHVll1yHfFPdvFpO02u4a8bjGYtljrw79PfKvBMI0faH+MV0Y/Jd3JlOgIxIwrapJvK2V/B83nmlP1W/jYvPViagHtdGMj1oTEo9zkDEXAgMBAAEwDQYJKoZIhvcNAQELBQADggEBAECYX59+Q9v6c9sb6Q0/C6IgLWG2nVCgVE1YWwIzz+68WrhlmNCRuPjY94roB+tc2tdHbj+Nh3LMzJk7L1KCQoW1+LPK6A6E8W9ad0YPcuw8csV2pUA3+H56exQMH0fUAPQAU7tXWvnQ7otcpV1XA8afn/NTMTsnxi9mSkor8MLMYQ3aeRyh1+LAchHBthWiltqsSUqXrbJF59u5p0ghquuKcWR3TXsA7klGYBgGU5KAJifr9XT87rN0bOkGvbeWAgKvnQnjZwxdnLqTfp/pRY/PiJJHhgIBYPIA7STGnMPjmJ995i34zhnbnd8WHXJA3LxrIMqLW/l8eIdvtM1w8KI=\"\n                ],\n                \"x5t\": \"QhfzMMnuAfkReTgZ1HtrfyOeeZs\",\n                \"x5t#S256\": \"cmHDUdKgLiRCEN28D5FBy9IJLFmR7QWfm77SLhGTCTU\"\n            }\n        };\n\n        let jwks = serde_json::json! {{ \"keys\": [invalid_jwk, valid_jwk ] }};\n        let jwks_addr = jwks_server(move |path| match path {\n            \"/\" => Some(serde_json::to_vec(&jwks).unwrap()),\n            _ => None,\n        })\n        .await;\n\n        let role_name = RoleName::from(\"anonymous\");\n        let role = RoleNameInt::from(&role_name);\n\n        let rules = vec![AuthRule {\n            id: \"foo\".to_owned(),\n            jwks_url: format!(\"http://{jwks_addr}/\").parse().unwrap(),\n            audience: None,\n            role_names: vec![role],\n        }];\n\n        let fetch = Fetch(rules);\n        let jwk_cache = JwkCache::default();\n\n        let endpoint = EndpointId::from(\"ep\");\n\n        let token = new_rsa_jwt(\"rs1\".into(), rs);\n\n        jwk_cache\n            .check_jwt(\n                &RequestContext::test(),\n                endpoint.clone(),\n                &role_name,\n                &fetch,\n                &token,\n            )\n            .await\n            .unwrap();\n    }\n}\n"
  },
  {
    "path": "proxy/src/auth/backend/local.rs",
    "content": "use std::net::SocketAddr;\n\nuse arc_swap::ArcSwapOption;\nuse postgres_client::config::SslMode;\nuse tokio::sync::Semaphore;\n\nuse super::jwt::{AuthRule, FetchAuthRules};\nuse crate::auth::backend::jwt::FetchAuthRulesError;\nuse crate::compute::ConnectInfo;\nuse crate::compute_ctl::ComputeCtlApi;\nuse crate::context::RequestContext;\nuse crate::control_plane::NodeInfo;\nuse crate::control_plane::messages::{ColdStartInfo, EndpointJwksResponse, MetricsAuxInfo};\nuse crate::http;\nuse crate::intern::{BranchIdTag, EndpointIdTag, InternId, ProjectIdTag};\nuse crate::types::EndpointId;\nuse crate::url::ApiUrl;\n\npub struct LocalBackend {\n    pub(crate) initialize: Semaphore,\n    pub(crate) compute_ctl: ComputeCtlApi,\n    pub(crate) node_info: NodeInfo,\n}\n\nimpl LocalBackend {\n    pub fn new(postgres_addr: SocketAddr, compute_ctl: ApiUrl) -> Self {\n        LocalBackend {\n            initialize: Semaphore::new(1),\n            compute_ctl: ComputeCtlApi {\n                api: http::Endpoint::new(compute_ctl, http::new_client()),\n            },\n            node_info: NodeInfo {\n                conn_info: ConnectInfo {\n                    host_addr: Some(postgres_addr.ip()),\n                    host: postgres_addr.ip().to_string().into(),\n                    port: postgres_addr.port(),\n                    ssl_mode: SslMode::Disable,\n                },\n                // TODO(conrad): make this better reflect compute info rather than endpoint info.\n                aux: MetricsAuxInfo {\n                    endpoint_id: EndpointIdTag::get_interner().get_or_intern(\"local\"),\n                    project_id: ProjectIdTag::get_interner().get_or_intern(\"local\"),\n                    branch_id: BranchIdTag::get_interner().get_or_intern(\"local\"),\n                    compute_id: \"local\".into(),\n                    cold_start_info: ColdStartInfo::WarmCached,\n                },\n            },\n        }\n    }\n}\n\n#[derive(Clone, Copy)]\npub(crate) struct StaticAuthRules;\n\npub static JWKS_ROLE_MAP: ArcSwapOption<EndpointJwksResponse> = ArcSwapOption::const_empty();\n\nimpl FetchAuthRules for StaticAuthRules {\n    async fn fetch_auth_rules(\n        &self,\n        _ctx: &RequestContext,\n        _endpoint: EndpointId,\n    ) -> Result<Vec<AuthRule>, FetchAuthRulesError> {\n        let mappings = JWKS_ROLE_MAP.load();\n        let role_mappings = mappings\n            .as_deref()\n            .ok_or(FetchAuthRulesError::RoleJwksNotConfigured)?;\n        let mut rules = vec![];\n        for setting in &role_mappings.jwks {\n            rules.push(AuthRule {\n                id: setting.id.clone(),\n                jwks_url: setting.jwks_url.clone(),\n                audience: setting.jwt_audience.clone(),\n                role_names: setting.role_names.clone(),\n            });\n        }\n\n        Ok(rules)\n    }\n}\n"
  },
  {
    "path": "proxy/src/auth/backend/mod.rs",
    "content": "mod classic;\nmod console_redirect;\nmod hacks;\npub mod jwt;\npub mod local;\n\nuse std::sync::Arc;\n\npub use console_redirect::ConsoleRedirectBackend;\npub(crate) use console_redirect::ConsoleRedirectError;\nuse local::LocalBackend;\nuse postgres_client::config::AuthKeys;\nuse serde::{Deserialize, Serialize};\nuse tokio::io::{AsyncRead, AsyncWrite};\nuse tracing::{debug, info};\n\nuse crate::auth::{self, ComputeUserInfoMaybeEndpoint, validate_password_and_exchange};\nuse crate::cache::Cached;\nuse crate::cache::node_info::CachedNodeInfo;\nuse crate::config::AuthenticationConfig;\nuse crate::context::RequestContext;\nuse crate::control_plane::client::ControlPlaneClient;\nuse crate::control_plane::errors::GetAuthInfoError;\nuse crate::control_plane::messages::EndpointRateLimitConfig;\nuse crate::control_plane::{\n    self, AccessBlockerFlags, AuthSecret, ControlPlaneApi, EndpointAccessControl, RoleAccessControl,\n};\nuse crate::intern::{EndpointIdInt, RoleNameInt};\nuse crate::pqproto::BeMessage;\nuse crate::proxy::NeonOptions;\nuse crate::proxy::wake_compute::WakeComputeBackend;\nuse crate::rate_limiter::EndpointRateLimiter;\nuse crate::stream::Stream;\nuse crate::types::{EndpointCacheKey, EndpointId, RoleName};\nuse crate::{scram, stream};\n\n/// Alternative to [`std::borrow::Cow`] but doesn't need `T: ToOwned` as we don't need that functionality\npub enum MaybeOwned<'a, T> {\n    Owned(T),\n    Borrowed(&'a T),\n}\n\nimpl<T> std::ops::Deref for MaybeOwned<'_, T> {\n    type Target = T;\n\n    fn deref(&self) -> &Self::Target {\n        match self {\n            MaybeOwned::Owned(t) => t,\n            MaybeOwned::Borrowed(t) => t,\n        }\n    }\n}\n\n/// This type serves two purposes:\n///\n/// * When `T` is `()`, it's just a regular auth backend selector\n///   which we use in [`crate::config::ProxyConfig`].\n///\n/// * However, when we substitute `T` with [`ComputeUserInfoMaybeEndpoint`],\n///   this helps us provide the credentials only to those auth\n///   backends which require them for the authentication process.\npub enum Backend<'a, T> {\n    /// Cloud API (V2).\n    ControlPlane(MaybeOwned<'a, ControlPlaneClient>, T),\n    /// Local proxy uses configured auth credentials and does not wake compute\n    Local(MaybeOwned<'a, LocalBackend>),\n}\n\nimpl std::fmt::Display for Backend<'_, ()> {\n    fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        match self {\n            Self::ControlPlane(api, ()) => match &**api {\n                ControlPlaneClient::ProxyV1(endpoint) => fmt\n                    .debug_tuple(\"ControlPlane::ProxyV1\")\n                    .field(&endpoint.url())\n                    .finish(),\n                #[cfg(any(test, feature = \"testing\"))]\n                ControlPlaneClient::PostgresMock(endpoint) => {\n                    let url = endpoint.url();\n                    match url::Url::parse(url) {\n                        Ok(mut url) => {\n                            let _ = url.set_password(Some(\"_redacted_\"));\n                            let url = url.as_str();\n                            fmt.debug_tuple(\"ControlPlane::PostgresMock\")\n                                .field(&url)\n                                .finish()\n                        }\n                        Err(_) => fmt\n                            .debug_tuple(\"ControlPlane::PostgresMock\")\n                            .field(&url)\n                            .finish(),\n                    }\n                }\n                #[cfg(test)]\n                ControlPlaneClient::Test(_) => fmt.debug_tuple(\"ControlPlane::Test\").finish(),\n            },\n            Self::Local(_) => fmt.debug_tuple(\"Local\").finish(),\n        }\n    }\n}\n\nimpl<T> Backend<'_, T> {\n    /// Very similar to [`std::option::Option::as_ref`].\n    /// This helps us pass structured config to async tasks.\n    pub(crate) fn as_ref(&self) -> Backend<'_, &T> {\n        match self {\n            Self::ControlPlane(c, x) => Backend::ControlPlane(MaybeOwned::Borrowed(c), x),\n            Self::Local(l) => Backend::Local(MaybeOwned::Borrowed(l)),\n        }\n    }\n\n    pub(crate) fn get_api(&self) -> &ControlPlaneClient {\n        match self {\n            Self::ControlPlane(api, _) => api,\n            Self::Local(_) => panic!(\"Local backend has no API\"),\n        }\n    }\n\n    pub(crate) fn is_local_proxy(&self) -> bool {\n        matches!(self, Self::Local(_))\n    }\n}\n\nimpl<'a, T> Backend<'a, T> {\n    /// Very similar to [`std::option::Option::map`].\n    /// Maps [`Backend<T>`] to [`Backend<R>`] by applying\n    /// a function to a contained value.\n    pub(crate) fn map<R>(self, f: impl FnOnce(T) -> R) -> Backend<'a, R> {\n        match self {\n            Self::ControlPlane(c, x) => Backend::ControlPlane(c, f(x)),\n            Self::Local(l) => Backend::Local(l),\n        }\n    }\n}\nimpl<'a, T, E> Backend<'a, Result<T, E>> {\n    /// Very similar to [`std::option::Option::transpose`].\n    /// This is most useful for error handling.\n    pub(crate) fn transpose(self) -> Result<Backend<'a, T>, E> {\n        match self {\n            Self::ControlPlane(c, x) => x.map(|x| Backend::ControlPlane(c, x)),\n            Self::Local(l) => Ok(Backend::Local(l)),\n        }\n    }\n}\n\npub(crate) struct ComputeCredentials {\n    pub(crate) info: ComputeUserInfo,\n    pub(crate) keys: ComputeCredentialKeys,\n}\n\n#[derive(Debug, Clone)]\npub(crate) struct ComputeUserInfoNoEndpoint {\n    pub(crate) user: RoleName,\n    pub(crate) options: NeonOptions,\n}\n\n#[derive(Debug, Clone, Default, Serialize, Deserialize)]\npub(crate) struct ComputeUserInfo {\n    pub(crate) endpoint: EndpointId,\n    pub(crate) user: RoleName,\n    pub(crate) options: NeonOptions,\n}\n\nimpl ComputeUserInfo {\n    pub(crate) fn endpoint_cache_key(&self) -> EndpointCacheKey {\n        self.options.get_cache_key(&self.endpoint)\n    }\n}\n\n#[cfg_attr(test, derive(Debug))]\npub(crate) enum ComputeCredentialKeys {\n    AuthKeys(AuthKeys),\n    JwtPayload(Vec<u8>),\n}\n\nimpl TryFrom<ComputeUserInfoMaybeEndpoint> for ComputeUserInfo {\n    // user name\n    type Error = ComputeUserInfoNoEndpoint;\n\n    fn try_from(user_info: ComputeUserInfoMaybeEndpoint) -> Result<Self, Self::Error> {\n        match user_info.endpoint_id {\n            None => Err(ComputeUserInfoNoEndpoint {\n                user: user_info.user,\n                options: user_info.options,\n            }),\n            Some(endpoint) => Ok(ComputeUserInfo {\n                endpoint,\n                user: user_info.user,\n                options: user_info.options,\n            }),\n        }\n    }\n}\n\n/// True to its name, this function encapsulates our current auth trade-offs.\n/// Here, we choose the appropriate auth flow based on circumstances.\n///\n/// All authentication flows will emit an AuthenticationOk message if successful.\nasync fn auth_quirks(\n    ctx: &RequestContext,\n    api: &impl control_plane::ControlPlaneApi,\n    user_info: ComputeUserInfoMaybeEndpoint,\n    client: &mut stream::PqStream<Stream<impl AsyncRead + AsyncWrite + Unpin>>,\n    allow_cleartext: bool,\n    config: &'static AuthenticationConfig,\n    endpoint_rate_limiter: Arc<EndpointRateLimiter>,\n) -> auth::Result<ComputeCredentials> {\n    // If there's no project so far, that entails that client doesn't\n    // support SNI or other means of passing the endpoint (project) name.\n    // We now expect to see a very specific payload in the place of password.\n    let (info, unauthenticated_password) = match user_info.try_into() {\n        Err(info) => {\n            let (info, password) =\n                hacks::password_hack_no_authentication(ctx, info, client).await?;\n            ctx.set_endpoint_id(info.endpoint.clone());\n            (info, Some(password))\n        }\n        Ok(info) => (info, None),\n    };\n\n    debug!(\"fetching authentication info and allowlists\");\n\n    let access_controls = api\n        .get_endpoint_access_control(ctx, &info.endpoint, &info.user)\n        .await?;\n\n    access_controls.check(\n        ctx,\n        config.ip_allowlist_check_enabled,\n        config.is_vpc_acccess_proxy,\n    )?;\n\n    access_controls.connection_attempt_rate_limit(ctx, &info.endpoint, &endpoint_rate_limiter)?;\n\n    let role_access = api\n        .get_role_access_control(ctx, &info.endpoint, &info.user)\n        .await?;\n\n    let secret = if let Some(secret) = role_access.secret {\n        secret\n    } else {\n        // If we don't have an authentication secret, we mock one to\n        // prevent malicious probing (possible due to missing protocol steps).\n        // This mocked secret will never lead to successful authentication.\n        info!(\"authentication info not found, mocking it\");\n        AuthSecret::Scram(scram::ServerSecret::mock(rand::random()))\n    };\n\n    match authenticate_with_secret(\n        ctx,\n        secret,\n        info,\n        client,\n        unauthenticated_password,\n        allow_cleartext,\n        config,\n    )\n    .await\n    {\n        Ok(keys) => Ok(keys),\n        Err(e) => Err(e),\n    }\n}\n\nasync fn authenticate_with_secret(\n    ctx: &RequestContext,\n    secret: AuthSecret,\n    info: ComputeUserInfo,\n    client: &mut stream::PqStream<Stream<impl AsyncRead + AsyncWrite + Unpin>>,\n    unauthenticated_password: Option<Vec<u8>>,\n    allow_cleartext: bool,\n    config: &'static AuthenticationConfig,\n) -> auth::Result<ComputeCredentials> {\n    if let Some(password) = unauthenticated_password {\n        let ep = EndpointIdInt::from(&info.endpoint);\n        let role = RoleNameInt::from(&info.user);\n\n        let auth_outcome =\n            validate_password_and_exchange(&config.scram_thread_pool, ep, role, &password, secret)\n                .await?;\n        let keys = match auth_outcome {\n            crate::sasl::Outcome::Success(key) => key,\n            crate::sasl::Outcome::Failure(reason) => {\n                info!(\"auth backend failed with an error: {reason}\");\n                return Err(auth::AuthError::password_failed(&*info.user));\n            }\n        };\n\n        // we have authenticated the password\n        client.write_message(BeMessage::AuthenticationOk);\n\n        return Ok(ComputeCredentials { info, keys });\n    }\n\n    // -- the remaining flows are self-authenticating --\n\n    // Perform cleartext auth if we're allowed to do that.\n    // Currently, we use it for websocket connections (latency).\n    if allow_cleartext {\n        ctx.set_auth_method(crate::context::AuthMethod::Cleartext);\n        return hacks::authenticate_cleartext(ctx, info, client, secret, config).await;\n    }\n\n    // Finally, proceed with the main auth flow (SCRAM-based).\n    classic::authenticate(ctx, info, client, config, secret).await\n}\n\nimpl<'a> Backend<'a, ComputeUserInfoMaybeEndpoint> {\n    /// Get username from the credentials.\n    pub(crate) fn get_user(&self) -> &str {\n        match self {\n            Self::ControlPlane(_, user_info) => &user_info.user,\n            Self::Local(_) => \"local\",\n        }\n    }\n\n    /// Authenticate the client via the requested backend, possibly using credentials.\n    #[tracing::instrument(fields(allow_cleartext = allow_cleartext), skip_all)]\n    pub(crate) async fn authenticate(\n        self,\n        ctx: &RequestContext,\n        client: &mut stream::PqStream<Stream<impl AsyncRead + AsyncWrite + Unpin>>,\n        allow_cleartext: bool,\n        config: &'static AuthenticationConfig,\n        endpoint_rate_limiter: Arc<EndpointRateLimiter>,\n    ) -> auth::Result<Backend<'a, ComputeCredentials>> {\n        let res = match self {\n            Self::ControlPlane(api, user_info) => {\n                debug!(\n                    user = &*user_info.user,\n                    project = user_info.endpoint(),\n                    \"performing authentication using the console\"\n                );\n\n                let auth_res = auth_quirks(\n                    ctx,\n                    &*api,\n                    user_info.clone(),\n                    client,\n                    allow_cleartext,\n                    config,\n                    endpoint_rate_limiter,\n                )\n                .await;\n                match auth_res {\n                    Ok(credentials) => Ok(Backend::ControlPlane(api, credentials)),\n                    Err(e) => {\n                        // The password could have been changed, so we invalidate the cache.\n                        // We should only invalidate the cache if the TTL might have expired.\n                        if e.is_password_failed()\n                            && let ControlPlaneClient::ProxyV1(api) = &*api\n                            && let Some(ep) = &user_info.endpoint_id\n                        {\n                            api.caches\n                                .project_info\n                                .maybe_invalidate_role_secret(ep, &user_info.user);\n                        }\n\n                        Err(e)\n                    }\n                }\n            }\n            Self::Local(_) => {\n                return Err(auth::AuthError::bad_auth_method(\"invalid for local proxy\"));\n            }\n        };\n\n        // TODO: replace with some metric\n        info!(\"user successfully authenticated\");\n        res\n    }\n}\n\nimpl Backend<'_, ComputeUserInfo> {\n    pub(crate) async fn get_role_secret(\n        &self,\n        ctx: &RequestContext,\n    ) -> Result<RoleAccessControl, GetAuthInfoError> {\n        match self {\n            Self::ControlPlane(api, user_info) => {\n                api.get_role_access_control(ctx, &user_info.endpoint, &user_info.user)\n                    .await\n            }\n            Self::Local(_) => Ok(RoleAccessControl { secret: None }),\n        }\n    }\n\n    pub(crate) async fn get_endpoint_access_control(\n        &self,\n        ctx: &RequestContext,\n    ) -> Result<EndpointAccessControl, GetAuthInfoError> {\n        match self {\n            Self::ControlPlane(api, user_info) => {\n                api.get_endpoint_access_control(ctx, &user_info.endpoint, &user_info.user)\n                    .await\n            }\n            Self::Local(_) => Ok(EndpointAccessControl {\n                allowed_ips: Arc::new(vec![]),\n                allowed_vpce: Arc::new(vec![]),\n                flags: AccessBlockerFlags::default(),\n                rate_limits: EndpointRateLimitConfig::default(),\n            }),\n        }\n    }\n}\n\n#[async_trait::async_trait]\nimpl WakeComputeBackend for Backend<'_, ComputeUserInfo> {\n    async fn wake_compute(\n        &self,\n        ctx: &RequestContext,\n    ) -> Result<CachedNodeInfo, control_plane::errors::WakeComputeError> {\n        match self {\n            Self::ControlPlane(api, info) => api.wake_compute(ctx, info).await,\n            Self::Local(local) => Ok(Cached::new_uncached(local.node_info.clone())),\n        }\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    #![allow(clippy::unimplemented, clippy::unwrap_used)]\n\n    use std::sync::Arc;\n\n    use bytes::BytesMut;\n    use control_plane::AuthSecret;\n    use fallible_iterator::FallibleIterator;\n    use once_cell::sync::Lazy;\n    use postgres_protocol::authentication::sasl::{ChannelBinding, ScramSha256};\n    use postgres_protocol::message::backend::Message as PgMessage;\n    use postgres_protocol::message::frontend;\n    use tokio::io::{AsyncRead, AsyncReadExt, AsyncWriteExt};\n\n    use super::auth_quirks;\n    use super::jwt::JwkCache;\n    use crate::auth::{ComputeUserInfoMaybeEndpoint, IpPattern};\n    use crate::cache::node_info::CachedNodeInfo;\n    use crate::config::AuthenticationConfig;\n    use crate::context::RequestContext;\n    use crate::control_plane::messages::EndpointRateLimitConfig;\n    use crate::control_plane::{\n        self, AccessBlockerFlags, EndpointAccessControl, RoleAccessControl,\n    };\n    use crate::proxy::NeonOptions;\n    use crate::rate_limiter::EndpointRateLimiter;\n    use crate::scram::ServerSecret;\n    use crate::scram::threadpool::ThreadPool;\n    use crate::stream::{PqStream, Stream};\n\n    struct Auth {\n        ips: Vec<IpPattern>,\n        vpc_endpoint_ids: Vec<String>,\n        access_blocker_flags: AccessBlockerFlags,\n        secret: AuthSecret,\n    }\n\n    impl control_plane::ControlPlaneApi for Auth {\n        async fn get_role_access_control(\n            &self,\n            _ctx: &RequestContext,\n            _endpoint: &crate::types::EndpointId,\n            _role: &crate::types::RoleName,\n        ) -> Result<RoleAccessControl, control_plane::errors::GetAuthInfoError> {\n            Ok(RoleAccessControl {\n                secret: Some(self.secret.clone()),\n            })\n        }\n\n        async fn get_endpoint_access_control(\n            &self,\n            _ctx: &RequestContext,\n            _endpoint: &crate::types::EndpointId,\n            _role: &crate::types::RoleName,\n        ) -> Result<EndpointAccessControl, control_plane::errors::GetAuthInfoError> {\n            Ok(EndpointAccessControl {\n                allowed_ips: Arc::new(self.ips.clone()),\n                allowed_vpce: Arc::new(self.vpc_endpoint_ids.clone()),\n                flags: self.access_blocker_flags,\n                rate_limits: EndpointRateLimitConfig::default(),\n            })\n        }\n\n        async fn get_endpoint_jwks(\n            &self,\n            _ctx: &RequestContext,\n            _endpoint: &crate::types::EndpointId,\n        ) -> Result<Vec<super::jwt::AuthRule>, control_plane::errors::GetEndpointJwksError>\n        {\n            unimplemented!()\n        }\n\n        async fn wake_compute(\n            &self,\n            _ctx: &RequestContext,\n            _user_info: &super::ComputeUserInfo,\n        ) -> Result<CachedNodeInfo, control_plane::errors::WakeComputeError> {\n            unimplemented!()\n        }\n    }\n\n    static CONFIG: Lazy<AuthenticationConfig> = Lazy::new(|| AuthenticationConfig {\n        jwks_cache: JwkCache::default(),\n        scram_thread_pool: ThreadPool::new(1),\n        scram_protocol_timeout: std::time::Duration::from_secs(5),\n        ip_allowlist_check_enabled: true,\n        is_vpc_acccess_proxy: false,\n        is_auth_broker: false,\n        accept_jwts: false,\n        console_redirect_confirmation_timeout: std::time::Duration::from_secs(5),\n    });\n\n    async fn read_message(r: &mut (impl AsyncRead + Unpin), b: &mut BytesMut) -> PgMessage {\n        loop {\n            r.read_buf(&mut *b).await.unwrap();\n            if let Some(m) = PgMessage::parse(&mut *b).unwrap() {\n                break m;\n            }\n        }\n    }\n\n    #[tokio::test]\n    async fn auth_quirks_scram() {\n        let (mut client, server) = tokio::io::duplex(1024);\n        let mut stream = PqStream::new_skip_handshake(Stream::from_raw(server));\n\n        let ctx = RequestContext::test();\n        let api = Auth {\n            ips: vec![],\n            vpc_endpoint_ids: vec![],\n            access_blocker_flags: AccessBlockerFlags::default(),\n            secret: AuthSecret::Scram(ServerSecret::build(\"my-secret-password\").await.unwrap()),\n        };\n\n        let user_info = ComputeUserInfoMaybeEndpoint {\n            user: \"conrad\".into(),\n            endpoint_id: Some(\"endpoint\".into()),\n            options: NeonOptions::default(),\n        };\n\n        let handle = tokio::spawn(async move {\n            let mut scram = ScramSha256::new(b\"my-secret-password\", ChannelBinding::unsupported());\n\n            let mut read = BytesMut::new();\n\n            // server should offer scram\n            match read_message(&mut client, &mut read).await {\n                PgMessage::AuthenticationSasl(a) => {\n                    let options: Vec<&str> = a.mechanisms().collect().unwrap();\n                    assert_eq!(options, [\"SCRAM-SHA-256\"]);\n                }\n                _ => panic!(\"wrong message\"),\n            }\n\n            // client sends client-first-message\n            let mut write = BytesMut::new();\n            frontend::sasl_initial_response(\"SCRAM-SHA-256\", scram.message(), &mut write).unwrap();\n            client.write_all(&write).await.unwrap();\n\n            // server response with server-first-message\n            match read_message(&mut client, &mut read).await {\n                PgMessage::AuthenticationSaslContinue(a) => {\n                    scram.update(a.data()).await.unwrap();\n                }\n                _ => panic!(\"wrong message\"),\n            }\n\n            // client response with client-final-message\n            write.clear();\n            frontend::sasl_response(scram.message(), &mut write).unwrap();\n            client.write_all(&write).await.unwrap();\n\n            // server response with server-final-message\n            match read_message(&mut client, &mut read).await {\n                PgMessage::AuthenticationSaslFinal(a) => {\n                    scram.finish(a.data()).unwrap();\n                }\n                _ => panic!(\"wrong message\"),\n            }\n        });\n        let endpoint_rate_limiter = Arc::new(EndpointRateLimiter::new_with_shards(\n            EndpointRateLimiter::DEFAULT,\n            64,\n        ));\n\n        let _creds = auth_quirks(\n            &ctx,\n            &api,\n            user_info,\n            &mut stream,\n            false,\n            &CONFIG,\n            endpoint_rate_limiter,\n        )\n        .await\n        .unwrap();\n\n        // flush the final server message\n        stream.flush().await.unwrap();\n\n        handle.await.unwrap();\n    }\n\n    #[tokio::test]\n    async fn auth_quirks_cleartext() {\n        let (mut client, server) = tokio::io::duplex(1024);\n        let mut stream = PqStream::new_skip_handshake(Stream::from_raw(server));\n\n        let ctx = RequestContext::test();\n        let api = Auth {\n            ips: vec![],\n            vpc_endpoint_ids: vec![],\n            access_blocker_flags: AccessBlockerFlags::default(),\n            secret: AuthSecret::Scram(ServerSecret::build(\"my-secret-password\").await.unwrap()),\n        };\n\n        let user_info = ComputeUserInfoMaybeEndpoint {\n            user: \"conrad\".into(),\n            endpoint_id: Some(\"endpoint\".into()),\n            options: NeonOptions::default(),\n        };\n\n        let handle = tokio::spawn(async move {\n            let mut read = BytesMut::new();\n            let mut write = BytesMut::new();\n\n            // server should offer cleartext\n            match read_message(&mut client, &mut read).await {\n                PgMessage::AuthenticationCleartextPassword => {}\n                _ => panic!(\"wrong message\"),\n            }\n\n            // client responds with password\n            write.clear();\n            frontend::password_message(b\"my-secret-password\", &mut write).unwrap();\n            client.write_all(&write).await.unwrap();\n        });\n        let endpoint_rate_limiter = Arc::new(EndpointRateLimiter::new_with_shards(\n            EndpointRateLimiter::DEFAULT,\n            64,\n        ));\n\n        let _creds = auth_quirks(\n            &ctx,\n            &api,\n            user_info,\n            &mut stream,\n            true,\n            &CONFIG,\n            endpoint_rate_limiter,\n        )\n        .await\n        .unwrap();\n\n        handle.await.unwrap();\n    }\n\n    #[tokio::test]\n    async fn auth_quirks_password_hack() {\n        let (mut client, server) = tokio::io::duplex(1024);\n        let mut stream = PqStream::new_skip_handshake(Stream::from_raw(server));\n\n        let ctx = RequestContext::test();\n        let api = Auth {\n            ips: vec![],\n            vpc_endpoint_ids: vec![],\n            access_blocker_flags: AccessBlockerFlags::default(),\n            secret: AuthSecret::Scram(ServerSecret::build(\"my-secret-password\").await.unwrap()),\n        };\n\n        let user_info = ComputeUserInfoMaybeEndpoint {\n            user: \"conrad\".into(),\n            endpoint_id: None,\n            options: NeonOptions::default(),\n        };\n\n        let handle = tokio::spawn(async move {\n            let mut read = BytesMut::new();\n\n            // server should offer cleartext\n            match read_message(&mut client, &mut read).await {\n                PgMessage::AuthenticationCleartextPassword => {}\n                _ => panic!(\"wrong message\"),\n            }\n\n            // client responds with password\n            let mut write = BytesMut::new();\n            frontend::password_message(b\"endpoint=my-endpoint;my-secret-password\", &mut write)\n                .unwrap();\n            client.write_all(&write).await.unwrap();\n        });\n\n        let endpoint_rate_limiter = Arc::new(EndpointRateLimiter::new_with_shards(\n            EndpointRateLimiter::DEFAULT,\n            64,\n        ));\n\n        let creds = auth_quirks(\n            &ctx,\n            &api,\n            user_info,\n            &mut stream,\n            true,\n            &CONFIG,\n            endpoint_rate_limiter,\n        )\n        .await\n        .unwrap();\n\n        assert_eq!(creds.info.endpoint, \"my-endpoint\");\n\n        handle.await.unwrap();\n    }\n}\n"
  },
  {
    "path": "proxy/src/auth/credentials.rs",
    "content": "//! User credentials used in authentication.\n\nuse std::collections::HashSet;\nuse std::net::IpAddr;\nuse std::str::FromStr;\n\nuse itertools::Itertools;\nuse thiserror::Error;\nuse tracing::{debug, warn};\n\nuse crate::auth::password_hack::parse_endpoint_param;\nuse crate::context::RequestContext;\nuse crate::error::{ReportableError, UserFacingError};\nuse crate::metrics::{Metrics, SniGroup, SniKind};\nuse crate::pqproto::StartupMessageParams;\nuse crate::proxy::NeonOptions;\nuse crate::serverless::{AUTH_BROKER_SNI, SERVERLESS_DRIVER_SNI};\nuse crate::types::{EndpointId, RoleName};\n\n#[derive(Debug, Error, PartialEq, Eq, Clone)]\npub(crate) enum ComputeUserInfoParseError {\n    #[error(\"Parameter '{0}' is missing in startup packet.\")]\n    MissingKey(&'static str),\n\n    #[error(\n        \"Inconsistent project name inferred from \\\n         SNI ('{}') and project option ('{}').\",\n        .domain, .option,\n    )]\n    InconsistentProjectNames {\n        domain: EndpointId,\n        option: EndpointId,\n    },\n\n    #[error(\"Project name ('{0}') must contain only alphanumeric characters and hyphen.\")]\n    MalformedProjectName(EndpointId),\n}\n\nimpl UserFacingError for ComputeUserInfoParseError {}\n\nimpl ReportableError for ComputeUserInfoParseError {\n    fn get_error_kind(&self) -> crate::error::ErrorKind {\n        crate::error::ErrorKind::User\n    }\n}\n\n/// Various client credentials which we use for authentication.\n/// Note that we don't store any kind of client key or password here.\n#[derive(Debug, Clone, PartialEq, Eq)]\npub(crate) struct ComputeUserInfoMaybeEndpoint {\n    pub(crate) user: RoleName,\n    pub(crate) endpoint_id: Option<EndpointId>,\n    pub(crate) options: NeonOptions,\n}\n\nimpl ComputeUserInfoMaybeEndpoint {\n    #[inline]\n    pub(crate) fn endpoint(&self) -> Option<&str> {\n        self.endpoint_id.as_deref()\n    }\n}\n\npub(crate) fn endpoint_sni(sni: &str, common_names: &HashSet<String>) -> Option<EndpointId> {\n    let (subdomain, common_name) = sni.split_once('.')?;\n    if !common_names.contains(common_name) {\n        return None;\n    }\n    if subdomain == SERVERLESS_DRIVER_SNI || subdomain == AUTH_BROKER_SNI {\n        return None;\n    }\n    Some(EndpointId::from(subdomain))\n}\n\nimpl ComputeUserInfoMaybeEndpoint {\n    pub(crate) fn parse(\n        ctx: &RequestContext,\n        params: &StartupMessageParams,\n        sni: Option<&str>,\n        common_names: Option<&HashSet<String>>,\n    ) -> Result<Self, ComputeUserInfoParseError> {\n        // Some parameters are stored in the startup message.\n        let get_param = |key| {\n            params\n                .get(key)\n                .ok_or(ComputeUserInfoParseError::MissingKey(key))\n        };\n        let user: RoleName = get_param(\"user\")?.into();\n\n        // Project name might be passed via PG's command-line options.\n        let endpoint_option = params\n            .options_raw()\n            .and_then(|options| {\n                // We support both `project` (deprecated) and `endpoint` options for backward compatibility.\n                // However, if both are present, we don't exactly know which one to use.\n                // Therefore we require that only one of them is present.\n                options\n                    .filter_map(parse_endpoint_param)\n                    .at_most_one()\n                    .ok()?\n            })\n            .map(|name| name.into());\n\n        let endpoint_from_domain =\n            sni.and_then(|sni_str| common_names.and_then(|cn| endpoint_sni(sni_str, cn)));\n\n        let endpoint = match (endpoint_option, endpoint_from_domain) {\n            // Invariant: if we have both project name variants, they should match.\n            (Some(option), Some(domain)) if option != domain => {\n                Some(Err(ComputeUserInfoParseError::InconsistentProjectNames {\n                    domain,\n                    option,\n                }))\n            }\n            // Invariant: project name may not contain certain characters.\n            (a, b) => a.or(b).map(|name| {\n                if project_name_valid(name.as_ref()) {\n                    Ok(name)\n                } else {\n                    Err(ComputeUserInfoParseError::MalformedProjectName(name))\n                }\n            }),\n        }\n        .transpose()?;\n\n        if let Some(ep) = &endpoint {\n            ctx.set_endpoint_id(ep.clone());\n        }\n\n        let metrics = Metrics::get();\n        debug!(%user, \"credentials\");\n\n        let protocol = ctx.protocol();\n        let kind = if sni.is_some() {\n            debug!(\"Connection with sni\");\n            SniKind::Sni\n        } else if endpoint.is_some() {\n            debug!(\"Connection without sni\");\n            SniKind::NoSni\n        } else {\n            debug!(\"Connection with password hack\");\n            SniKind::PasswordHack\n        };\n\n        metrics\n            .proxy\n            .accepted_connections_by_sni\n            .inc(SniGroup { protocol, kind });\n\n        let options = NeonOptions::parse_params(params);\n\n        Ok(Self {\n            user,\n            endpoint_id: endpoint,\n            options,\n        })\n    }\n}\n\npub(crate) fn check_peer_addr_is_in_list(peer_addr: &IpAddr, ip_list: &[IpPattern]) -> bool {\n    ip_list.is_empty() || ip_list.iter().any(|pattern| check_ip(peer_addr, pattern))\n}\n\n#[derive(Debug, Clone, Eq, PartialEq)]\npub(crate) enum IpPattern {\n    Subnet(ipnet::IpNet),\n    Range(IpAddr, IpAddr),\n    Single(IpAddr),\n    None,\n}\n\nimpl<'de> serde::de::Deserialize<'de> for IpPattern {\n    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>\n    where\n        D: serde::Deserializer<'de>,\n    {\n        struct StrVisitor;\n        impl serde::de::Visitor<'_> for StrVisitor {\n            type Value = IpPattern;\n\n            fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n                write!(\n                    formatter,\n                    \"comma separated list with ip address, ip address range, or ip address subnet mask\"\n                )\n            }\n\n            fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>\n            where\n                E: serde::de::Error,\n            {\n                Ok(parse_ip_pattern(v).unwrap_or_else(|e| {\n                    warn!(\"Cannot parse ip pattern {v}: {e}\");\n                    IpPattern::None\n                }))\n            }\n        }\n        deserializer.deserialize_str(StrVisitor)\n    }\n}\n\nimpl FromStr for IpPattern {\n    type Err = anyhow::Error;\n\n    fn from_str(s: &str) -> Result<Self, Self::Err> {\n        parse_ip_pattern(s)\n    }\n}\n\nfn parse_ip_pattern(pattern: &str) -> anyhow::Result<IpPattern> {\n    if pattern.contains('/') {\n        let subnet: ipnet::IpNet = pattern.parse()?;\n        return Ok(IpPattern::Subnet(subnet));\n    }\n    if let Some((start, end)) = pattern.split_once('-') {\n        let start: IpAddr = start.parse()?;\n        let end: IpAddr = end.parse()?;\n        return Ok(IpPattern::Range(start, end));\n    }\n    let addr: IpAddr = pattern.parse()?;\n    Ok(IpPattern::Single(addr))\n}\n\nfn check_ip(ip: &IpAddr, pattern: &IpPattern) -> bool {\n    match pattern {\n        IpPattern::Subnet(subnet) => subnet.contains(ip),\n        IpPattern::Range(start, end) => start <= ip && ip <= end,\n        IpPattern::Single(addr) => addr == ip,\n        IpPattern::None => false,\n    }\n}\n\nfn project_name_valid(name: &str) -> bool {\n    name.chars().all(|c| c.is_alphanumeric() || c == '-')\n}\n\n#[cfg(test)]\nmod tests {\n    use ComputeUserInfoParseError::*;\n    use serde_json::json;\n\n    use super::*;\n\n    #[test]\n    fn parse_bare_minimum() -> anyhow::Result<()> {\n        // According to postgresql, only `user` should be required.\n        let options = StartupMessageParams::new([(\"user\", \"john_doe\")]);\n        let ctx = RequestContext::test();\n        let user_info = ComputeUserInfoMaybeEndpoint::parse(&ctx, &options, None, None)?;\n        assert_eq!(user_info.user, \"john_doe\");\n        assert_eq!(user_info.endpoint_id, None);\n\n        Ok(())\n    }\n\n    #[test]\n    fn parse_excessive() -> anyhow::Result<()> {\n        let options = StartupMessageParams::new([\n            (\"user\", \"john_doe\"),\n            (\"database\", \"world\"), // should be ignored\n            (\"foo\", \"bar\"),        // should be ignored\n        ]);\n        let ctx = RequestContext::test();\n        let user_info = ComputeUserInfoMaybeEndpoint::parse(&ctx, &options, None, None)?;\n        assert_eq!(user_info.user, \"john_doe\");\n        assert_eq!(user_info.endpoint_id, None);\n\n        Ok(())\n    }\n\n    #[test]\n    fn parse_project_from_sni() -> anyhow::Result<()> {\n        let options = StartupMessageParams::new([(\"user\", \"john_doe\")]);\n\n        let sni = Some(\"foo.localhost\");\n        let common_names = Some([\"localhost\".into()].into());\n\n        let ctx = RequestContext::test();\n        let user_info =\n            ComputeUserInfoMaybeEndpoint::parse(&ctx, &options, sni, common_names.as_ref())?;\n        assert_eq!(user_info.user, \"john_doe\");\n        assert_eq!(user_info.endpoint_id.as_deref(), Some(\"foo\"));\n        assert_eq!(user_info.options.get_cache_key(\"foo\"), \"foo\");\n\n        Ok(())\n    }\n\n    #[test]\n    fn parse_project_from_options() -> anyhow::Result<()> {\n        let options = StartupMessageParams::new([\n            (\"user\", \"john_doe\"),\n            (\"options\", \"-ckey=1 project=bar -c geqo=off\"),\n        ]);\n\n        let ctx = RequestContext::test();\n        let user_info = ComputeUserInfoMaybeEndpoint::parse(&ctx, &options, None, None)?;\n        assert_eq!(user_info.user, \"john_doe\");\n        assert_eq!(user_info.endpoint_id.as_deref(), Some(\"bar\"));\n\n        Ok(())\n    }\n\n    #[test]\n    fn parse_endpoint_from_options() -> anyhow::Result<()> {\n        let options = StartupMessageParams::new([\n            (\"user\", \"john_doe\"),\n            (\"options\", \"-ckey=1 endpoint=bar -c geqo=off\"),\n        ]);\n\n        let ctx = RequestContext::test();\n        let user_info = ComputeUserInfoMaybeEndpoint::parse(&ctx, &options, None, None)?;\n        assert_eq!(user_info.user, \"john_doe\");\n        assert_eq!(user_info.endpoint_id.as_deref(), Some(\"bar\"));\n\n        Ok(())\n    }\n\n    #[test]\n    fn parse_three_endpoints_from_options() -> anyhow::Result<()> {\n        let options = StartupMessageParams::new([\n            (\"user\", \"john_doe\"),\n            (\n                \"options\",\n                \"-ckey=1 endpoint=one endpoint=two endpoint=three -c geqo=off\",\n            ),\n        ]);\n\n        let ctx = RequestContext::test();\n        let user_info = ComputeUserInfoMaybeEndpoint::parse(&ctx, &options, None, None)?;\n        assert_eq!(user_info.user, \"john_doe\");\n        assert!(user_info.endpoint_id.is_none());\n\n        Ok(())\n    }\n\n    #[test]\n    fn parse_when_endpoint_and_project_are_in_options() -> anyhow::Result<()> {\n        let options = StartupMessageParams::new([\n            (\"user\", \"john_doe\"),\n            (\"options\", \"-ckey=1 endpoint=bar project=foo -c geqo=off\"),\n        ]);\n\n        let ctx = RequestContext::test();\n        let user_info = ComputeUserInfoMaybeEndpoint::parse(&ctx, &options, None, None)?;\n        assert_eq!(user_info.user, \"john_doe\");\n        assert!(user_info.endpoint_id.is_none());\n\n        Ok(())\n    }\n\n    #[test]\n    fn parse_projects_identical() -> anyhow::Result<()> {\n        let options = StartupMessageParams::new([(\"user\", \"john_doe\"), (\"options\", \"project=baz\")]);\n\n        let sni = Some(\"baz.localhost\");\n        let common_names = Some([\"localhost\".into()].into());\n\n        let ctx = RequestContext::test();\n        let user_info =\n            ComputeUserInfoMaybeEndpoint::parse(&ctx, &options, sni, common_names.as_ref())?;\n        assert_eq!(user_info.user, \"john_doe\");\n        assert_eq!(user_info.endpoint_id.as_deref(), Some(\"baz\"));\n\n        Ok(())\n    }\n\n    #[test]\n    fn parse_multi_common_names() -> anyhow::Result<()> {\n        let options = StartupMessageParams::new([(\"user\", \"john_doe\")]);\n\n        let common_names = Some([\"a.com\".into(), \"b.com\".into()].into());\n        let sni = Some(\"p1.a.com\");\n        let ctx = RequestContext::test();\n        let user_info =\n            ComputeUserInfoMaybeEndpoint::parse(&ctx, &options, sni, common_names.as_ref())?;\n        assert_eq!(user_info.endpoint_id.as_deref(), Some(\"p1\"));\n\n        let common_names = Some([\"a.com\".into(), \"b.com\".into()].into());\n        let sni = Some(\"p1.b.com\");\n        let ctx = RequestContext::test();\n        let user_info =\n            ComputeUserInfoMaybeEndpoint::parse(&ctx, &options, sni, common_names.as_ref())?;\n        assert_eq!(user_info.endpoint_id.as_deref(), Some(\"p1\"));\n\n        Ok(())\n    }\n\n    #[test]\n    fn parse_projects_different() {\n        let options =\n            StartupMessageParams::new([(\"user\", \"john_doe\"), (\"options\", \"project=first\")]);\n\n        let sni = Some(\"second.localhost\");\n        let common_names = Some([\"localhost\".into()].into());\n\n        let ctx = RequestContext::test();\n        let err = ComputeUserInfoMaybeEndpoint::parse(&ctx, &options, sni, common_names.as_ref())\n            .expect_err(\"should fail\");\n        match err {\n            InconsistentProjectNames { domain, option } => {\n                assert_eq!(option, \"first\");\n                assert_eq!(domain, \"second\");\n            }\n            _ => panic!(\"bad error: {err:?}\"),\n        }\n    }\n\n    #[test]\n    fn parse_unknown_sni() {\n        let options = StartupMessageParams::new([(\"user\", \"john_doe\")]);\n\n        let sni = Some(\"project.localhost\");\n        let common_names = Some([\"example.com\".into()].into());\n\n        let ctx = RequestContext::test();\n        let info = ComputeUserInfoMaybeEndpoint::parse(&ctx, &options, sni, common_names.as_ref())\n            .unwrap();\n\n        assert!(info.endpoint_id.is_none());\n    }\n\n    #[test]\n    fn parse_unknown_sni_with_options() {\n        let options = StartupMessageParams::new([\n            (\"user\", \"john_doe\"),\n            (\"options\", \"endpoint=foo-bar-baz-1234\"),\n        ]);\n\n        let sni = Some(\"project.localhost\");\n        let common_names = Some([\"example.com\".into()].into());\n\n        let ctx = RequestContext::test();\n        let info = ComputeUserInfoMaybeEndpoint::parse(&ctx, &options, sni, common_names.as_ref())\n            .unwrap();\n\n        assert_eq!(info.endpoint_id.as_deref(), Some(\"foo-bar-baz-1234\"));\n    }\n\n    #[test]\n    fn parse_neon_options() -> anyhow::Result<()> {\n        let options = StartupMessageParams::new([\n            (\"user\", \"john_doe\"),\n            (\"options\", \"neon_lsn:0/2 neon_endpoint_type:read_write\"),\n        ]);\n\n        let sni = Some(\"project.localhost\");\n        let common_names = Some([\"localhost\".into()].into());\n        let ctx = RequestContext::test();\n        let user_info =\n            ComputeUserInfoMaybeEndpoint::parse(&ctx, &options, sni, common_names.as_ref())?;\n        assert_eq!(user_info.endpoint_id.as_deref(), Some(\"project\"));\n        assert_eq!(\n            user_info.options.get_cache_key(\"project\"),\n            \"project endpoint_type:read_write lsn:0/2\"\n        );\n\n        Ok(())\n    }\n\n    #[test]\n    fn test_check_peer_addr_is_in_list() {\n        fn check(v: serde_json::Value) -> bool {\n            let peer_addr = IpAddr::from([127, 0, 0, 1]);\n            let ip_list: Vec<IpPattern> = serde_json::from_value(v).unwrap();\n            check_peer_addr_is_in_list(&peer_addr, &ip_list)\n        }\n\n        assert!(check(json!([])));\n        assert!(check(json!([\"127.0.0.1\"])));\n        assert!(!check(json!([\"8.8.8.8\"])));\n        // If there is an incorrect address, it will be skipped.\n        assert!(check(json!([\"88.8.8\", \"127.0.0.1\"])));\n    }\n    #[test]\n    fn test_parse_ip_v4() -> anyhow::Result<()> {\n        let peer_addr = IpAddr::from([127, 0, 0, 1]);\n        // Ok\n        assert_eq!(parse_ip_pattern(\"127.0.0.1\")?, IpPattern::Single(peer_addr));\n        assert_eq!(\n            parse_ip_pattern(\"127.0.0.1/31\")?,\n            IpPattern::Subnet(ipnet::IpNet::new(peer_addr, 31)?)\n        );\n        assert_eq!(\n            parse_ip_pattern(\"0.0.0.0-200.0.1.2\")?,\n            IpPattern::Range(IpAddr::from([0, 0, 0, 0]), IpAddr::from([200, 0, 1, 2]))\n        );\n\n        // Error\n        assert!(parse_ip_pattern(\"300.0.1.2\").is_err());\n        assert!(parse_ip_pattern(\"30.1.2\").is_err());\n        assert!(parse_ip_pattern(\"127.0.0.1/33\").is_err());\n        assert!(parse_ip_pattern(\"127.0.0.1-127.0.3\").is_err());\n        assert!(parse_ip_pattern(\"1234.0.0.1-127.0.3.0\").is_err());\n        Ok(())\n    }\n\n    #[test]\n    fn test_check_ipv4() -> anyhow::Result<()> {\n        let peer_addr = IpAddr::from([127, 0, 0, 1]);\n        let peer_addr_next = IpAddr::from([127, 0, 0, 2]);\n        let peer_addr_prev = IpAddr::from([127, 0, 0, 0]);\n        // Success\n        assert!(check_ip(&peer_addr, &IpPattern::Single(peer_addr)));\n        assert!(check_ip(\n            &peer_addr,\n            &IpPattern::Subnet(ipnet::IpNet::new(peer_addr_prev, 31)?)\n        ));\n        assert!(check_ip(\n            &peer_addr,\n            &IpPattern::Subnet(ipnet::IpNet::new(peer_addr_next, 30)?)\n        ));\n        assert!(check_ip(\n            &peer_addr,\n            &IpPattern::Range(IpAddr::from([0, 0, 0, 0]), IpAddr::from([200, 0, 1, 2]))\n        ));\n        assert!(check_ip(\n            &peer_addr,\n            &IpPattern::Range(peer_addr, peer_addr)\n        ));\n\n        // Not success\n        assert!(!check_ip(&peer_addr, &IpPattern::Single(peer_addr_prev)));\n        assert!(!check_ip(\n            &peer_addr,\n            &IpPattern::Subnet(ipnet::IpNet::new(peer_addr_next, 31)?)\n        ));\n        assert!(!check_ip(\n            &peer_addr,\n            &IpPattern::Range(IpAddr::from([0, 0, 0, 0]), peer_addr_prev)\n        ));\n        assert!(!check_ip(\n            &peer_addr,\n            &IpPattern::Range(peer_addr_next, IpAddr::from([128, 0, 0, 0]))\n        ));\n        // There is no check that for range start <= end. But it's fine as long as for all this cases the result is false.\n        assert!(!check_ip(\n            &peer_addr,\n            &IpPattern::Range(peer_addr, peer_addr_prev)\n        ));\n        Ok(())\n    }\n\n    #[test]\n    fn test_connection_blocker() {\n        fn check(v: serde_json::Value) -> bool {\n            let peer_addr = IpAddr::from([127, 0, 0, 1]);\n            let ip_list: Vec<IpPattern> = serde_json::from_value(v).unwrap();\n            check_peer_addr_is_in_list(&peer_addr, &ip_list)\n        }\n\n        assert!(check(json!([])));\n        assert!(check(json!([\"127.0.0.1\"])));\n        assert!(!check(json!([\"255.255.255.255\"])));\n    }\n}\n"
  },
  {
    "path": "proxy/src/auth/flow.rs",
    "content": "//! Main authentication flow.\n\nuse std::sync::Arc;\n\nuse postgres_protocol::authentication::sasl::{SCRAM_SHA_256, SCRAM_SHA_256_PLUS};\nuse tokio::io::{AsyncRead, AsyncWrite};\nuse tracing::info;\n\nuse super::backend::ComputeCredentialKeys;\nuse super::{AuthError, PasswordHackPayload};\nuse crate::context::RequestContext;\nuse crate::control_plane::AuthSecret;\nuse crate::intern::{EndpointIdInt, RoleNameInt};\nuse crate::pqproto::{BeAuthenticationSaslMessage, BeMessage};\nuse crate::sasl;\nuse crate::scram::threadpool::ThreadPool;\nuse crate::scram::{self};\nuse crate::stream::{PqStream, Stream};\nuse crate::tls::TlsServerEndPoint;\n\n/// Use [SCRAM](crate::scram)-based auth in [`AuthFlow`].\npub(crate) struct Scram<'a>(\n    pub(crate) &'a scram::ServerSecret,\n    pub(crate) &'a RequestContext,\n);\n\nimpl Scram<'_> {\n    #[inline(always)]\n    fn first_message(&self, channel_binding: bool) -> BeMessage<'_> {\n        if channel_binding {\n            BeMessage::AuthenticationSasl(BeAuthenticationSaslMessage::Methods(scram::METHODS))\n        } else {\n            BeMessage::AuthenticationSasl(BeAuthenticationSaslMessage::Methods(\n                scram::METHODS_WITHOUT_PLUS,\n            ))\n        }\n    }\n}\n\n/// Use an ad hoc auth flow (for clients which don't support SNI) proposed in\n/// <https://github.com/neondatabase/cloud/issues/1620#issuecomment-1165332290>.\npub(crate) struct PasswordHack;\n\n/// Use clear-text password auth called `password` in docs\n/// <https://www.postgresql.org/docs/current/auth-password.html>\npub(crate) struct CleartextPassword {\n    pub(crate) pool: Arc<ThreadPool>,\n    pub(crate) endpoint: EndpointIdInt,\n    pub(crate) role: RoleNameInt,\n    pub(crate) secret: AuthSecret,\n}\n\n/// This wrapper for [`PqStream`] performs client authentication.\n#[must_use]\npub(crate) struct AuthFlow<'a, S, State> {\n    /// The underlying stream which implements libpq's protocol.\n    stream: &'a mut PqStream<Stream<S>>,\n    /// State might contain ancillary data.\n    state: State,\n    tls_server_end_point: TlsServerEndPoint,\n}\n\n/// Initial state of the stream wrapper.\nimpl<'a, S: AsyncRead + AsyncWrite + Unpin, M> AuthFlow<'a, S, M> {\n    /// Create a new wrapper for client authentication.\n    pub(crate) fn new(stream: &'a mut PqStream<Stream<S>>, method: M) -> Self {\n        let tls_server_end_point = stream.get_ref().tls_server_end_point();\n\n        Self {\n            stream,\n            state: method,\n            tls_server_end_point,\n        }\n    }\n}\n\nimpl<S: AsyncRead + AsyncWrite + Unpin> AuthFlow<'_, S, PasswordHack> {\n    /// Perform user authentication. Raise an error in case authentication failed.\n    pub(crate) async fn get_password(self) -> super::Result<PasswordHackPayload> {\n        self.stream\n            .write_message(BeMessage::AuthenticationCleartextPassword);\n        self.stream.flush().await?;\n\n        let msg = self.stream.read_password_message().await?;\n        let password = msg\n            .strip_suffix(&[0])\n            .ok_or(AuthError::MalformedPassword(\"missing terminator\"))?;\n\n        let payload = PasswordHackPayload::parse(password)\n            // If we ended up here and the payload is malformed, it means that\n            // the user neither enabled SNI nor resorted to any other method\n            // for passing the project name we rely on. We should show them\n            // the most helpful error message and point to the documentation.\n            .ok_or(AuthError::MissingEndpointName)?;\n\n        Ok(payload)\n    }\n}\n\nimpl<S: AsyncRead + AsyncWrite + Unpin> AuthFlow<'_, S, CleartextPassword> {\n    /// Perform user authentication. Raise an error in case authentication failed.\n    pub(crate) async fn authenticate(self) -> super::Result<sasl::Outcome<ComputeCredentialKeys>> {\n        self.stream\n            .write_message(BeMessage::AuthenticationCleartextPassword);\n        self.stream.flush().await?;\n\n        let msg = self.stream.read_password_message().await?;\n        let password = msg\n            .strip_suffix(&[0])\n            .ok_or(AuthError::MalformedPassword(\"missing terminator\"))?;\n\n        let outcome = validate_password_and_exchange(\n            &self.state.pool,\n            self.state.endpoint,\n            self.state.role,\n            password,\n            self.state.secret,\n        )\n        .await?;\n\n        if let sasl::Outcome::Success(_) = &outcome {\n            self.stream.write_message(BeMessage::AuthenticationOk);\n        }\n\n        Ok(outcome)\n    }\n}\n\n/// Stream wrapper for handling [SCRAM](crate::scram) auth.\nimpl<S: AsyncRead + AsyncWrite + Unpin> AuthFlow<'_, S, Scram<'_>> {\n    /// Perform user authentication. Raise an error in case authentication failed.\n    pub(crate) async fn authenticate(self) -> super::Result<sasl::Outcome<scram::ScramKey>> {\n        let Scram(secret, ctx) = self.state;\n        let channel_binding = self.tls_server_end_point;\n\n        // send sasl message.\n        {\n            // pause the timer while we communicate with the client\n            let _paused = ctx.latency_timer_pause(crate::metrics::Waiting::Client);\n\n            let sasl = self.state.first_message(channel_binding.supported());\n            self.stream.write_message(sasl);\n            self.stream.flush().await?;\n        }\n\n        // complete sasl handshake.\n        sasl::authenticate(ctx, self.stream, |method| {\n            // Currently, the only supported SASL method is SCRAM.\n            match method {\n                SCRAM_SHA_256 => ctx.set_auth_method(crate::context::AuthMethod::ScramSha256),\n                SCRAM_SHA_256_PLUS => {\n                    ctx.set_auth_method(crate::context::AuthMethod::ScramSha256Plus);\n                }\n                method => return Err(sasl::Error::BadAuthMethod(method.into())),\n            }\n\n            // TODO: make this a metric instead\n            info!(\"client chooses {}\", method);\n\n            Ok(scram::Exchange::new(secret, rand::random, channel_binding))\n        })\n        .await\n        .map_err(AuthError::Sasl)\n    }\n}\n\npub(crate) async fn validate_password_and_exchange(\n    pool: &ThreadPool,\n    endpoint: EndpointIdInt,\n    role: RoleNameInt,\n    password: &[u8],\n    secret: AuthSecret,\n) -> super::Result<sasl::Outcome<ComputeCredentialKeys>> {\n    match secret {\n        // perform scram authentication as both client and server to validate the keys\n        AuthSecret::Scram(scram_secret) => {\n            let outcome =\n                crate::scram::exchange(pool, endpoint, role, &scram_secret, password).await?;\n\n            let client_key = match outcome {\n                sasl::Outcome::Success(client_key) => client_key,\n                sasl::Outcome::Failure(reason) => return Ok(sasl::Outcome::Failure(reason)),\n            };\n\n            let keys = crate::compute::ScramKeys {\n                client_key: client_key.as_bytes(),\n                server_key: scram_secret.server_key.as_bytes(),\n            };\n\n            Ok(sasl::Outcome::Success(ComputeCredentialKeys::AuthKeys(\n                postgres_client::config::AuthKeys::ScramSha256(keys),\n            )))\n        }\n    }\n}\n"
  },
  {
    "path": "proxy/src/auth/mod.rs",
    "content": "//! Client authentication mechanisms.\n\npub mod backend;\npub use backend::Backend;\n\nmod credentials;\npub(crate) use credentials::{\n    ComputeUserInfoMaybeEndpoint, ComputeUserInfoParseError, IpPattern, check_peer_addr_is_in_list,\n    endpoint_sni,\n};\n\nmod password_hack;\nuse password_hack::PasswordHackPayload;\npub(crate) use password_hack::parse_endpoint_param;\n\nmod flow;\nuse std::io;\nuse std::net::IpAddr;\n\npub(crate) use flow::*;\nuse thiserror::Error;\nuse tokio::time::error::Elapsed;\n\nuse crate::auth::backend::jwt::JwtError;\nuse crate::control_plane;\nuse crate::error::{ReportableError, UserFacingError};\n\n/// Convenience wrapper for the authentication error.\npub(crate) type Result<T> = std::result::Result<T, AuthError>;\n\n/// Common authentication error.\n#[derive(Debug, Error)]\npub(crate) enum AuthError {\n    #[error(transparent)]\n    ConsoleRedirect(#[from] backend::ConsoleRedirectError),\n\n    #[error(transparent)]\n    GetAuthInfo(#[from] control_plane::errors::GetAuthInfoError),\n\n    /// SASL protocol errors (includes [SCRAM](crate::scram)).\n    #[error(transparent)]\n    Sasl(#[from] crate::sasl::Error),\n\n    #[error(\"Unsupported authentication method: {0}\")]\n    BadAuthMethod(Box<str>),\n\n    #[error(\"Malformed password message: {0}\")]\n    MalformedPassword(&'static str),\n\n    #[error(\n        \"Endpoint ID is not specified. \\\n        Either please upgrade the postgres client library (libpq) for SNI support \\\n        or pass the endpoint ID (first part of the domain name) as a parameter: '?options=endpoint%3D<endpoint-id>'. \\\n        See more at https://neon.tech/sni\"\n    )]\n    MissingEndpointName,\n\n    #[error(\n        \"VPC endpoint ID is not specified. \\\n        This endpoint requires a VPC endpoint ID to connect.\"\n    )]\n    MissingVPCEndpointId,\n\n    #[error(\"password authentication failed for user '{0}'\")]\n    PasswordFailed(Box<str>),\n\n    /// Errors produced by e.g. [`crate::stream::PqStream`].\n    #[error(transparent)]\n    Io(#[from] io::Error),\n\n    #[error(\n        \"This IP address {0} is not allowed to connect to this endpoint. \\\n        Please add it to the allowed list in the Neon console. \\\n        Make sure to check for IPv4 or IPv6 addresses.\"\n    )]\n    IpAddressNotAllowed(IpAddr),\n\n    #[error(\"This connection is trying to access this endpoint from a blocked network.\")]\n    NetworkNotAllowed,\n\n    #[error(\n        \"This VPC endpoint id {0} is not allowed to connect to this endpoint. \\\n        Please add it to the allowed list in the Neon console.\"\n    )]\n    VpcEndpointIdNotAllowed(String),\n\n    #[error(\"Too many connections to this endpoint. Please try again later.\")]\n    TooManyConnections,\n\n    #[error(\"Authentication timed out\")]\n    UserTimeout(Elapsed),\n\n    #[error(\"Disconnected due to inactivity after {0}.\")]\n    ConfirmationTimeout(humantime::Duration),\n\n    #[error(transparent)]\n    Jwt(#[from] JwtError),\n}\n\nimpl AuthError {\n    pub(crate) fn bad_auth_method(name: impl Into<Box<str>>) -> Self {\n        AuthError::BadAuthMethod(name.into())\n    }\n\n    pub(crate) fn password_failed(user: impl Into<Box<str>>) -> Self {\n        AuthError::PasswordFailed(user.into())\n    }\n\n    pub(crate) fn ip_address_not_allowed(ip: IpAddr) -> Self {\n        AuthError::IpAddressNotAllowed(ip)\n    }\n\n    pub(crate) fn vpc_endpoint_id_not_allowed(id: String) -> Self {\n        AuthError::VpcEndpointIdNotAllowed(id)\n    }\n\n    pub(crate) fn too_many_connections() -> Self {\n        AuthError::TooManyConnections\n    }\n\n    pub(crate) fn is_password_failed(&self) -> bool {\n        matches!(self, AuthError::PasswordFailed(_))\n    }\n\n    pub(crate) fn user_timeout(elapsed: Elapsed) -> Self {\n        AuthError::UserTimeout(elapsed)\n    }\n\n    pub(crate) fn confirmation_timeout(timeout: humantime::Duration) -> Self {\n        AuthError::ConfirmationTimeout(timeout)\n    }\n}\n\nimpl UserFacingError for AuthError {\n    fn to_string_client(&self) -> String {\n        match self {\n            Self::ConsoleRedirect(e) => e.to_string_client(),\n            Self::GetAuthInfo(e) => e.to_string_client(),\n            Self::Sasl(e) => e.to_string_client(),\n            Self::PasswordFailed(_) => self.to_string(),\n            Self::BadAuthMethod(_) => self.to_string(),\n            Self::MalformedPassword(_) => self.to_string(),\n            Self::MissingEndpointName => self.to_string(),\n            Self::MissingVPCEndpointId => self.to_string(),\n            Self::Io(_) => \"Internal error\".to_string(),\n            Self::IpAddressNotAllowed(_) => self.to_string(),\n            Self::NetworkNotAllowed => self.to_string(),\n            Self::VpcEndpointIdNotAllowed(_) => self.to_string(),\n            Self::TooManyConnections => self.to_string(),\n            Self::UserTimeout(_) => self.to_string(),\n            Self::ConfirmationTimeout(_) => self.to_string(),\n            Self::Jwt(_) => self.to_string(),\n        }\n    }\n}\n\nimpl ReportableError for AuthError {\n    fn get_error_kind(&self) -> crate::error::ErrorKind {\n        match self {\n            Self::ConsoleRedirect(e) => e.get_error_kind(),\n            Self::GetAuthInfo(e) => e.get_error_kind(),\n            Self::Sasl(e) => e.get_error_kind(),\n            Self::PasswordFailed(_) => crate::error::ErrorKind::User,\n            Self::BadAuthMethod(_) => crate::error::ErrorKind::User,\n            Self::MalformedPassword(_) => crate::error::ErrorKind::User,\n            Self::MissingEndpointName => crate::error::ErrorKind::User,\n            Self::MissingVPCEndpointId => crate::error::ErrorKind::User,\n            Self::Io(_) => crate::error::ErrorKind::ClientDisconnect,\n            Self::IpAddressNotAllowed(_) => crate::error::ErrorKind::User,\n            Self::NetworkNotAllowed => crate::error::ErrorKind::User,\n            Self::VpcEndpointIdNotAllowed(_) => crate::error::ErrorKind::User,\n            Self::TooManyConnections => crate::error::ErrorKind::RateLimit,\n            Self::UserTimeout(_) => crate::error::ErrorKind::User,\n            Self::ConfirmationTimeout(_) => crate::error::ErrorKind::User,\n            Self::Jwt(_) => crate::error::ErrorKind::User,\n        }\n    }\n}\n"
  },
  {
    "path": "proxy/src/auth/password_hack.rs",
    "content": "//! Payload for ad hoc authentication method for clients that don't support SNI.\n//! See the `impl` for [`super::backend::Backend<ClientCredentials>`].\n//! Read more: <https://github.com/neondatabase/cloud/issues/1620#issuecomment-1165332290>.\n//! UPDATE (Mon Aug  8 13:20:34 UTC 2022): the payload format has been simplified.\n\nuse bstr::ByteSlice;\n\nuse crate::types::EndpointId;\n\npub(crate) struct PasswordHackPayload {\n    pub(crate) endpoint: EndpointId,\n    pub(crate) password: Vec<u8>,\n}\n\nimpl PasswordHackPayload {\n    pub(crate) fn parse(bytes: &[u8]) -> Option<Self> {\n        // The format is `project=<utf-8>;<password-bytes>` or `project=<utf-8>$<password-bytes>`.\n        let separators = [\";\", \"$\"];\n        for sep in separators {\n            if let Some((endpoint, password)) = bytes.split_once_str(sep) {\n                let endpoint = endpoint.to_str().ok()?;\n                return Some(Self {\n                    endpoint: parse_endpoint_param(endpoint)?.into(),\n                    password: password.to_owned(),\n                });\n            }\n        }\n\n        None\n    }\n}\n\npub(crate) fn parse_endpoint_param(bytes: &str) -> Option<&str> {\n    bytes\n        .strip_prefix(\"project=\")\n        .or_else(|| bytes.strip_prefix(\"endpoint=\"))\n}\n\n#[cfg(test)]\nmod tests {\n    use super::*;\n\n    #[test]\n    fn parse_endpoint_param_fn() {\n        let input = \"\";\n        assert!(parse_endpoint_param(input).is_none());\n\n        let input = \"project=\";\n        assert_eq!(parse_endpoint_param(input), Some(\"\"));\n\n        let input = \"project=foobar\";\n        assert_eq!(parse_endpoint_param(input), Some(\"foobar\"));\n\n        let input = \"endpoint=\";\n        assert_eq!(parse_endpoint_param(input), Some(\"\"));\n\n        let input = \"endpoint=foobar\";\n        assert_eq!(parse_endpoint_param(input), Some(\"foobar\"));\n\n        let input = \"other_option=foobar\";\n        assert!(parse_endpoint_param(input).is_none());\n    }\n\n    #[test]\n    fn parse_password_hack_payload_project() {\n        let bytes = b\"\";\n        assert!(PasswordHackPayload::parse(bytes).is_none());\n\n        let bytes = b\"project=\";\n        assert!(PasswordHackPayload::parse(bytes).is_none());\n\n        let bytes = b\"project=;\";\n        let payload: PasswordHackPayload =\n            PasswordHackPayload::parse(bytes).expect(\"parsing failed\");\n        assert_eq!(payload.endpoint, \"\");\n        assert_eq!(payload.password, b\"\");\n\n        let bytes = b\"project=foobar;pass;word\";\n        let payload = PasswordHackPayload::parse(bytes).expect(\"parsing failed\");\n        assert_eq!(payload.endpoint, \"foobar\");\n        assert_eq!(payload.password, b\"pass;word\");\n    }\n\n    #[test]\n    fn parse_password_hack_payload_endpoint() {\n        let bytes = b\"\";\n        assert!(PasswordHackPayload::parse(bytes).is_none());\n\n        let bytes = b\"endpoint=\";\n        assert!(PasswordHackPayload::parse(bytes).is_none());\n\n        let bytes = b\"endpoint=;\";\n        let payload = PasswordHackPayload::parse(bytes).expect(\"parsing failed\");\n        assert_eq!(payload.endpoint, \"\");\n        assert_eq!(payload.password, b\"\");\n\n        let bytes = b\"endpoint=foobar;pass;word\";\n        let payload = PasswordHackPayload::parse(bytes).expect(\"parsing failed\");\n        assert_eq!(payload.endpoint, \"foobar\");\n        assert_eq!(payload.password, b\"pass;word\");\n    }\n\n    #[test]\n    fn parse_password_hack_payload_dollar() {\n        let bytes = b\"\";\n        assert!(PasswordHackPayload::parse(bytes).is_none());\n\n        let bytes = b\"endpoint=\";\n        assert!(PasswordHackPayload::parse(bytes).is_none());\n\n        let bytes = b\"endpoint=$\";\n        let payload = PasswordHackPayload::parse(bytes).expect(\"parsing failed\");\n        assert_eq!(payload.endpoint, \"\");\n        assert_eq!(payload.password, b\"\");\n\n        let bytes = b\"endpoint=foobar$pass$word\";\n        let payload = PasswordHackPayload::parse(bytes).expect(\"parsing failed\");\n        assert_eq!(payload.endpoint, \"foobar\");\n        assert_eq!(payload.password, b\"pass$word\");\n    }\n}\n"
  },
  {
    "path": "proxy/src/batch.rs",
    "content": "//! Batch processing system based on intrusive linked lists.\n//!\n//! Enqueuing a batch job requires no allocations, with\n//! direct support for cancelling jobs early.\nuse std::collections::BTreeMap;\nuse std::pin::pin;\nuse std::sync::Mutex;\n\nuse scopeguard::ScopeGuard;\nuse tokio::sync::oneshot;\nuse tokio::sync::oneshot::error::TryRecvError;\n\nuse crate::ext::LockExt;\n\ntype ProcResult<P> = Result<<P as QueueProcessing>::Res, <P as QueueProcessing>::Err>;\n\npub trait QueueProcessing: Send + 'static {\n    type Req: Send + 'static;\n    type Res: Send;\n    type Err: Send + Clone;\n\n    /// Get the desired batch size.\n    fn batch_size(&self, queue_size: usize) -> usize;\n\n    /// This applies a full batch of events.\n    /// Must respond with a full batch of replies.\n    ///\n    /// If this apply can error, it's expected that errors be forwarded to each Self::Res.\n    ///\n    /// Batching does not need to happen atomically.\n    fn apply(\n        &mut self,\n        req: Vec<Self::Req>,\n    ) -> impl Future<Output = Result<Vec<Self::Res>, Self::Err>> + Send;\n}\n\n#[derive(thiserror::Error)]\npub enum BatchQueueError<E: Clone, C> {\n    #[error(transparent)]\n    Result(E),\n    #[error(transparent)]\n    Cancelled(C),\n}\n\npub struct BatchQueue<P: QueueProcessing> {\n    processor: tokio::sync::Mutex<P>,\n    inner: Mutex<BatchQueueInner<P>>,\n}\n\nstruct BatchJob<P: QueueProcessing> {\n    req: P::Req,\n    res: tokio::sync::oneshot::Sender<Result<P::Res, P::Err>>,\n}\n\nimpl<P: QueueProcessing> BatchQueue<P> {\n    pub fn new(p: P) -> Self {\n        Self {\n            processor: tokio::sync::Mutex::new(p),\n            inner: Mutex::new(BatchQueueInner {\n                version: 0,\n                queue: BTreeMap::new(),\n            }),\n        }\n    }\n\n    /// Perform a single request-response process, this may be batched internally.\n    ///\n    /// This function is not cancel safe.\n    pub async fn call<R>(\n        &self,\n        req: P::Req,\n        cancelled: impl Future<Output = R>,\n    ) -> Result<P::Res, BatchQueueError<P::Err, R>> {\n        let (id, mut rx) = self.inner.lock_propagate_poison().register_job(req);\n\n        let mut cancelled = pin!(cancelled);\n        let resp: Option<Result<P::Res, P::Err>> = loop {\n            // try become the leader, or try wait for success.\n            let mut processor = tokio::select! {\n                // try become leader.\n                p = self.processor.lock() => p,\n                // wait for success.\n                resp = &mut rx => break resp.ok(),\n                // wait for cancellation.\n                cancel = cancelled.as_mut() => {\n                    let mut inner = self.inner.lock_propagate_poison();\n                    if inner.queue.remove(&id).is_some() {\n                        tracing::warn!(\"batched task cancelled before completion\");\n                    }\n                    return Err(BatchQueueError::Cancelled(cancel));\n                },\n            };\n\n            tracing::debug!(id, \"batch: became leader\");\n            let (reqs, resps) = self.inner.lock_propagate_poison().get_batch(&processor);\n\n            // snitch incase the task gets cancelled.\n            let cancel_safety = scopeguard::guard((), |()| {\n                if !std::thread::panicking() {\n                    tracing::error!(\n                        id,\n                        \"batch: leader cancelled, despite not being cancellation safe\"\n                    );\n                }\n            });\n\n            // apply a batch.\n            // if this is cancelled, jobs will not be completed and will panic.\n            let values = processor.apply(reqs).await;\n\n            // good: we didn't get cancelled.\n            ScopeGuard::into_inner(cancel_safety);\n\n            match values {\n                Ok(values) => {\n                    if values.len() != resps.len() {\n                        tracing::error!(\n                            \"batch: invalid response size, expected={}, got={}\",\n                            resps.len(),\n                            values.len()\n                        );\n                    }\n\n                    // send response values.\n                    for (tx, value) in std::iter::zip(resps, values) {\n                        if tx.send(Ok(value)).is_err() {\n                            // receiver hung up but that's fine.\n                        }\n                    }\n                }\n\n                Err(err) => {\n                    for tx in resps {\n                        if tx.send(Err(err.clone())).is_err() {\n                            // receiver hung up but that's fine.\n                        }\n                    }\n                }\n            }\n\n            match rx.try_recv() {\n                Ok(resp) => break Some(resp),\n                Err(TryRecvError::Closed) => break None,\n                // edge case - there was a race condition where\n                // we became the leader but were not in the batch.\n                //\n                // Example:\n                // thread 1: register job id=1\n                // thread 2: register job id=2\n                // thread 2: processor.lock().await\n                // thread 1: processor.lock().await\n                // thread 2: becomes leader, batch_size=1, jobs=[1].\n                Err(TryRecvError::Empty) => {}\n            }\n        };\n\n        tracing::debug!(id, \"batch: job completed\");\n\n        resp.expect(\"no response found. batch processer should not panic\")\n            .map_err(BatchQueueError::Result)\n    }\n}\n\nstruct BatchQueueInner<P: QueueProcessing> {\n    version: u64,\n    queue: BTreeMap<u64, BatchJob<P>>,\n}\n\nimpl<P: QueueProcessing> BatchQueueInner<P> {\n    fn register_job(&mut self, req: P::Req) -> (u64, oneshot::Receiver<ProcResult<P>>) {\n        let (tx, rx) = oneshot::channel();\n\n        let id = self.version;\n\n        // Overflow concern:\n        // This is a u64, and we might enqueue 2^16 tasks per second.\n        // This gives us 2^48 seconds (9 million years).\n        // Even if this does overflow, it will not break, but some\n        // jobs with the higher version might never get prioritised.\n        self.version += 1;\n\n        self.queue.insert(id, BatchJob { req, res: tx });\n\n        tracing::debug!(id, \"batch: registered job in the queue\");\n\n        (id, rx)\n    }\n\n    fn get_batch(&mut self, p: &P) -> (Vec<P::Req>, Vec<oneshot::Sender<ProcResult<P>>>) {\n        let batch_size = p.batch_size(self.queue.len());\n        let mut reqs = Vec::with_capacity(batch_size);\n        let mut resps = Vec::with_capacity(batch_size);\n        let mut ids = Vec::with_capacity(batch_size);\n\n        while reqs.len() < batch_size {\n            let Some((id, job)) = self.queue.pop_first() else {\n                break;\n            };\n            reqs.push(job.req);\n            resps.push(job.res);\n            ids.push(id);\n        }\n\n        tracing::debug!(ids=?ids, \"batch: acquired jobs\");\n\n        (reqs, resps)\n    }\n}\n"
  },
  {
    "path": "proxy/src/bin/local_proxy.rs",
    "content": "#[global_allocator]\nstatic GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;\n\n#[tokio::main]\nasync fn main() -> anyhow::Result<()> {\n    proxy::binary::local_proxy::run().await\n}\n"
  },
  {
    "path": "proxy/src/bin/pg_sni_router.rs",
    "content": "//! A stand-alone program that routes connections, e.g. from\n//! `aaa--bbb--1234.external.domain` to `aaa.bbb.internal.domain:1234`.\n//!\n//! This allows connecting to pods/services running in the same Kubernetes cluster from\n//! the outside. Similar to an ingress controller for HTTPS.\n\n#[tokio::main]\nasync fn main() -> anyhow::Result<()> {\n    proxy::binary::pg_sni_router::run().await\n}\n"
  },
  {
    "path": "proxy/src/bin/proxy.rs",
    "content": "#[global_allocator]\nstatic GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;\n\n#[allow(non_upper_case_globals)]\n#[unsafe(export_name = \"malloc_conf\")]\npub static malloc_conf: &[u8] = b\"prof:true,prof_active:true,lg_prof_sample:21\\0\";\n\n#[tokio::main]\nasync fn main() -> anyhow::Result<()> {\n    proxy::binary::proxy::run().await\n}\n"
  },
  {
    "path": "proxy/src/binary/local_proxy.rs",
    "content": "use std::env;\nuse std::net::SocketAddr;\nuse std::pin::pin;\nuse std::sync::Arc;\nuse std::time::Duration;\n\nuse anyhow::bail;\nuse arc_swap::ArcSwapOption;\nuse camino::Utf8PathBuf;\nuse clap::Parser;\nuse futures::future::Either;\nuse tokio::net::TcpListener;\nuse tokio::sync::Notify;\nuse tokio::task::JoinSet;\nuse tokio_util::sync::CancellationToken;\nuse tracing::{debug, error, info};\nuse utils::sentry_init::init_sentry;\nuse utils::{pid_file, project_build_tag, project_git_version};\n\nuse crate::auth::backend::jwt::JwkCache;\nuse crate::auth::backend::local::LocalBackend;\nuse crate::auth::{self};\nuse crate::cancellation::CancellationHandler;\n#[cfg(feature = \"rest_broker\")]\nuse crate::config::RestConfig;\nuse crate::config::{\n    self, AuthenticationConfig, ComputeConfig, HttpConfig, ProxyConfig, RetryConfig,\n    refresh_config_loop,\n};\nuse crate::control_plane::locks::ApiLocks;\nuse crate::http::health_server::AppMetrics;\nuse crate::metrics::{Metrics, ServiceInfo};\nuse crate::rate_limiter::{EndpointRateLimiter, LeakyBucketConfig, RateBucketInfo};\nuse crate::scram::threadpool::ThreadPool;\nuse crate::serverless::cancel_set::CancelSet;\nuse crate::serverless::{self, GlobalConnPoolOptions};\nuse crate::tls::client_config::compute_client_config_with_root_certs;\nuse crate::url::ApiUrl;\n\nproject_git_version!(GIT_VERSION);\nproject_build_tag!(BUILD_TAG);\n\n/// Neon proxy/router\n#[derive(Parser)]\n#[command(version = GIT_VERSION, about)]\nstruct LocalProxyCliArgs {\n    /// listen for incoming metrics connections on ip:port\n    #[clap(long, default_value = \"127.0.0.1:7001\")]\n    metrics: String,\n    /// listen for incoming http connections on ip:port\n    #[clap(long)]\n    http: String,\n    /// timeout for the TLS handshake\n    #[clap(long, default_value = \"15s\", value_parser = humantime::parse_duration)]\n    handshake_timeout: tokio::time::Duration,\n    /// lock for `connect_compute` api method. example: \"shards=32,permits=4,epoch=10m,timeout=1s\". (use `permits=0` to disable).\n    #[clap(long, default_value = config::ConcurrencyLockOptions::DEFAULT_OPTIONS_CONNECT_COMPUTE_LOCK)]\n    connect_compute_lock: String,\n    #[clap(flatten)]\n    sql_over_http: SqlOverHttpArgs,\n    /// User rate limiter max number of requests per second.\n    ///\n    /// Provided in the form `<Requests Per Second>@<Bucket Duration Size>`.\n    /// Can be given multiple times for different bucket sizes.\n    #[clap(long, default_values_t = RateBucketInfo::DEFAULT_ENDPOINT_SET)]\n    user_rps_limit: Vec<RateBucketInfo>,\n    /// Whether to retry the connection to the compute node\n    #[clap(long, default_value = config::RetryConfig::CONNECT_TO_COMPUTE_DEFAULT_VALUES)]\n    connect_to_compute_retry: String,\n    /// Address of the postgres server\n    #[clap(long, default_value = \"127.0.0.1:5432\")]\n    postgres: SocketAddr,\n    /// Address of the internal compute-ctl api service\n    #[clap(long, default_value = \"http://127.0.0.1:3081/\")]\n    compute_ctl: ApiUrl,\n    /// Path of the local proxy config file\n    #[clap(long, default_value = \"./local_proxy.json\")]\n    config_path: Utf8PathBuf,\n    /// Path of the local proxy PID file\n    #[clap(long, default_value = \"./local_proxy.pid\")]\n    pid_path: Utf8PathBuf,\n    /// Disable pg_session_jwt extension installation\n    /// This is useful for testing the local proxy with vanilla postgres.\n    #[clap(long, default_value = \"false\")]\n    #[cfg(feature = \"testing\")]\n    disable_pg_session_jwt: bool,\n}\n\n#[derive(clap::Args, Clone, Copy, Debug)]\nstruct SqlOverHttpArgs {\n    /// How many connections to pool for each endpoint. Excess connections are discarded\n    #[clap(long, default_value_t = 200)]\n    sql_over_http_pool_max_total_conns: usize,\n\n    /// How long pooled connections should remain idle for before closing\n    #[clap(long, default_value = \"5m\", value_parser = humantime::parse_duration)]\n    sql_over_http_idle_timeout: tokio::time::Duration,\n\n    #[clap(long, default_value_t = 100)]\n    sql_over_http_client_conn_threshold: u64,\n\n    #[clap(long, default_value_t = 16)]\n    sql_over_http_cancel_set_shards: usize,\n\n    #[clap(long, default_value_t = 10 * 1024 * 1024)] // 10 MiB\n    sql_over_http_max_request_size_bytes: usize,\n\n    #[clap(long, default_value_t = 10 * 1024 * 1024)] // 10 MiB\n    sql_over_http_max_response_size_bytes: usize,\n}\n\npub async fn run() -> anyhow::Result<()> {\n    let _logging_guard = crate::logging::init_local_proxy()?;\n    let _panic_hook_guard = utils::logging::replace_panic_hook_with_tracing_panic_hook();\n    let _sentry_guard = init_sentry(Some(GIT_VERSION.into()), &[]);\n\n    // TODO: refactor these to use labels\n    debug!(\"Version: {GIT_VERSION}\");\n    debug!(\"Build_tag: {BUILD_TAG}\");\n    let neon_metrics = ::metrics::NeonMetrics::new(::metrics::BuildInfo {\n        revision: GIT_VERSION,\n        build_tag: BUILD_TAG,\n    });\n\n    let jemalloc = match crate::jemalloc::MetricRecorder::new() {\n        Ok(t) => Some(t),\n        Err(e) => {\n            tracing::error!(error = ?e, \"could not start jemalloc metrics loop\");\n            None\n        }\n    };\n\n    let args = LocalProxyCliArgs::parse();\n    let config = build_config(&args)?;\n    let auth_backend = build_auth_backend(&args);\n\n    // before we bind to any ports, write the process ID to a file\n    // so that compute-ctl can find our process later\n    // in order to trigger the appropriate SIGHUP on config change.\n    //\n    // This also claims a \"lock\" that makes sure only one instance\n    // of local_proxy runs at a time.\n    let _process_guard = loop {\n        match pid_file::claim_for_current_process(&args.pid_path) {\n            Ok(guard) => break guard,\n            Err(e) => {\n                // compute-ctl might have tried to read the pid-file to let us\n                // know about some config change. We should try again.\n                error!(path=?args.pid_path, \"could not claim PID file guard: {e:?}\");\n                tokio::time::sleep(Duration::from_secs(1)).await;\n            }\n        }\n    };\n\n    let metrics_listener = TcpListener::bind(args.metrics).await?.into_std()?;\n    let http_listener = TcpListener::bind(args.http).await?;\n    let shutdown = CancellationToken::new();\n\n    // todo: should scale with CU\n    let endpoint_rate_limiter = Arc::new(EndpointRateLimiter::new_with_shards(\n        LeakyBucketConfig {\n            rps: 10.0,\n            max: 100.0,\n        },\n        16,\n    ));\n\n    let mut maintenance_tasks = JoinSet::new();\n\n    let refresh_config_notify = Arc::new(Notify::new());\n    maintenance_tasks.spawn(crate::signals::handle(shutdown.clone(), {\n        let refresh_config_notify = Arc::clone(&refresh_config_notify);\n        move || {\n            refresh_config_notify.notify_one();\n        }\n    }));\n\n    // trigger the first config load **after** setting up the signal hook\n    // to avoid the race condition where:\n    // 1. No config file registered when local_proxy starts up\n    // 2. The config file is written but the signal hook is not yet received\n    // 3. local_proxy completes startup but has no config loaded, despite there being a registerd config.\n    refresh_config_notify.notify_one();\n    tokio::spawn(refresh_config_loop(\n        config,\n        args.config_path,\n        refresh_config_notify,\n    ));\n\n    maintenance_tasks.spawn(crate::http::health_server::task_main(\n        metrics_listener,\n        AppMetrics {\n            jemalloc,\n            neon_metrics,\n            proxy: crate::metrics::Metrics::get(),\n        },\n    ));\n\n    let task = serverless::task_main(\n        config,\n        auth_backend,\n        http_listener,\n        shutdown.clone(),\n        Arc::new(CancellationHandler::new(&config.connect_to_compute)),\n        endpoint_rate_limiter,\n    );\n\n    Metrics::get()\n        .service\n        .info\n        .set_label(ServiceInfo::running());\n\n    match futures::future::select(pin!(maintenance_tasks.join_next()), pin!(task)).await {\n        // exit immediately on maintenance task completion\n        Either::Left((Some(res), _)) => match crate::error::flatten_err(res)? {},\n        // exit with error immediately if all maintenance tasks have ceased (should be caught by branch above)\n        Either::Left((None, _)) => bail!(\"no maintenance tasks running. invalid state\"),\n        // exit immediately on client task error\n        Either::Right((res, _)) => res?,\n    }\n\n    Ok(())\n}\n\n/// ProxyConfig is created at proxy startup, and lives forever.\nfn build_config(args: &LocalProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {\n    let config::ConcurrencyLockOptions {\n        shards,\n        limiter,\n        epoch,\n        timeout,\n    } = args.connect_compute_lock.parse()?;\n    info!(\n        ?limiter,\n        shards,\n        ?epoch,\n        \"Using NodeLocks (connect_compute)\"\n    );\n    let connect_compute_locks = ApiLocks::new(\n        \"connect_compute_lock\",\n        limiter,\n        shards,\n        timeout,\n        epoch,\n        &Metrics::get().proxy.connect_compute_lock,\n    );\n\n    let http_config = HttpConfig {\n        accept_websockets: false,\n        pool_options: GlobalConnPoolOptions {\n            gc_epoch: Duration::from_secs(60),\n            pool_shards: 2,\n            idle_timeout: args.sql_over_http.sql_over_http_idle_timeout,\n            opt_in: false,\n\n            max_conns_per_endpoint: args.sql_over_http.sql_over_http_pool_max_total_conns,\n            max_total_conns: args.sql_over_http.sql_over_http_pool_max_total_conns,\n        },\n        cancel_set: CancelSet::new(args.sql_over_http.sql_over_http_cancel_set_shards),\n        client_conn_threshold: args.sql_over_http.sql_over_http_client_conn_threshold,\n        max_request_size_bytes: args.sql_over_http.sql_over_http_max_request_size_bytes,\n        max_response_size_bytes: args.sql_over_http.sql_over_http_max_response_size_bytes,\n    };\n\n    let compute_config = ComputeConfig {\n        retry: RetryConfig::parse(RetryConfig::CONNECT_TO_COMPUTE_DEFAULT_VALUES)?,\n        tls: Arc::new(compute_client_config_with_root_certs()?),\n        timeout: Duration::from_secs(2),\n    };\n\n    let greetings = env::var_os(\"NEON_MOTD\").map_or(String::new(), |s| match s.into_string() {\n        Ok(s) => s,\n        Err(_) => {\n            debug!(\"NEON_MOTD environment variable is not valid UTF-8\");\n            String::new()\n        }\n    });\n\n    Ok(Box::leak(Box::new(ProxyConfig {\n        tls_config: ArcSwapOption::from(None),\n        metric_collection: None,\n        http_config,\n        authentication_config: AuthenticationConfig {\n            jwks_cache: JwkCache::default(),\n            scram_thread_pool: ThreadPool::new(0),\n            scram_protocol_timeout: Duration::from_secs(10),\n            ip_allowlist_check_enabled: true,\n            is_vpc_acccess_proxy: false,\n            is_auth_broker: false,\n            accept_jwts: true,\n            console_redirect_confirmation_timeout: Duration::ZERO,\n        },\n        #[cfg(feature = \"rest_broker\")]\n        rest_config: RestConfig {\n            is_rest_broker: false,\n            db_schema_cache: None,\n            max_schema_size: 0,\n            hostname_prefix: String::new(),\n        },\n        proxy_protocol_v2: config::ProxyProtocolV2::Rejected,\n        handshake_timeout: Duration::from_secs(10),\n        wake_compute_retry_config: RetryConfig::parse(RetryConfig::WAKE_COMPUTE_DEFAULT_VALUES)?,\n        connect_compute_locks,\n        connect_to_compute: compute_config,\n        greetings,\n        #[cfg(feature = \"testing\")]\n        disable_pg_session_jwt: args.disable_pg_session_jwt,\n    })))\n}\n\n/// auth::Backend is created at proxy startup, and lives forever.\nfn build_auth_backend(args: &LocalProxyCliArgs) -> &'static auth::Backend<'static, ()> {\n    let auth_backend = crate::auth::Backend::Local(crate::auth::backend::MaybeOwned::Owned(\n        LocalBackend::new(args.postgres, args.compute_ctl.clone()),\n    ));\n\n    Box::leak(Box::new(auth_backend))\n}\n"
  },
  {
    "path": "proxy/src/binary/mod.rs",
    "content": "//! All binaries have the body of their main() defined here, so that the code\n//! is also covered by code style configs in lib.rs and the unused-code check is\n//! more effective when practically all modules are private to the lib.\n\npub mod local_proxy;\npub mod pg_sni_router;\npub mod proxy;\n"
  },
  {
    "path": "proxy/src/binary/pg_sni_router.rs",
    "content": "//! A stand-alone program that routes connections, e.g. from\n//! `aaa--bbb--1234.external.domain` to `aaa.bbb.internal.domain:1234`.\n//!\n//! This allows connecting to pods/services running in the same Kubernetes cluster from\n//! the outside. Similar to an ingress controller for HTTPS.\n\nuse std::io;\nuse std::net::SocketAddr;\nuse std::path::Path;\nuse std::sync::Arc;\n\nuse anyhow::{Context, anyhow, bail, ensure};\nuse clap::Arg;\nuse futures::future::Either;\nuse futures::{FutureExt, TryFutureExt};\nuse itertools::Itertools;\nuse rustls::crypto::ring;\nuse rustls::pki_types::{DnsName, PrivateKeyDer};\nuse tokio::io::{AsyncRead, AsyncReadExt, AsyncWrite, AsyncWriteExt};\nuse tokio::net::TcpListener;\nuse tokio_rustls::TlsConnector;\nuse tokio_rustls::server::TlsStream;\nuse tokio_util::sync::CancellationToken;\nuse tracing::{Instrument, error, info};\nuse utils::project_git_version;\nuse utils::sentry_init::init_sentry;\n\nuse crate::context::RequestContext;\nuse crate::metrics::{Metrics, ServiceInfo};\nuse crate::pglb::TlsRequired;\nuse crate::pqproto::FeStartupPacket;\nuse crate::protocol2::ConnectionInfo;\nuse crate::proxy::{ErrorSource, copy_bidirectional_client_compute};\nuse crate::stream::{PqStream, Stream};\nuse crate::util::run_until_cancelled;\n\nproject_git_version!(GIT_VERSION);\n\nfn cli() -> clap::Command {\n    clap::Command::new(\"Neon proxy/router\")\n        .version(GIT_VERSION)\n        .arg(\n            Arg::new(\"listen\")\n                .short('l')\n                .long(\"listen\")\n                .help(\"listen for incoming client connections on ip:port\")\n                .default_value(\"127.0.0.1:4432\"),\n        )\n        .arg(\n            Arg::new(\"listen-tls\")\n                .long(\"listen-tls\")\n                .help(\"listen for incoming client connections on ip:port, requiring TLS to compute\")\n                .default_value(\"127.0.0.1:4433\"),\n        )\n        .arg(\n            Arg::new(\"tls-key\")\n                .short('k')\n                .long(\"tls-key\")\n                .help(\"path to TLS key for client postgres connections\")\n                .required(true),\n        )\n        .arg(\n            Arg::new(\"tls-cert\")\n                .short('c')\n                .long(\"tls-cert\")\n                .help(\"path to TLS cert for client postgres connections\")\n                .required(true),\n        )\n        .arg(\n            Arg::new(\"dest\")\n                .short('d')\n                .long(\"destination\")\n                .help(\"append this domain zone to the SNI hostname to get the destination address\")\n                .required(true),\n        )\n}\n\npub async fn run() -> anyhow::Result<()> {\n    let _logging_guard = crate::logging::init()?;\n    let _panic_hook_guard = utils::logging::replace_panic_hook_with_tracing_panic_hook();\n    let _sentry_guard = init_sentry(Some(GIT_VERSION.into()), &[]);\n\n    let args = cli().get_matches();\n    let destination: String = args\n        .get_one::<String>(\"dest\")\n        .expect(\"string argument defined\")\n        .parse()?;\n\n    // Configure TLS\n    let tls_config = match (\n        args.get_one::<String>(\"tls-key\"),\n        args.get_one::<String>(\"tls-cert\"),\n    ) {\n        (Some(key_path), Some(cert_path)) => parse_tls(key_path.as_ref(), cert_path.as_ref())?,\n        _ => bail!(\"tls-key and tls-cert must be specified\"),\n    };\n\n    let compute_tls_config =\n        Arc::new(crate::tls::client_config::compute_client_config_with_root_certs()?);\n\n    // Start listening for incoming client connections\n    let proxy_address: SocketAddr = args\n        .get_one::<String>(\"listen\")\n        .expect(\"listen argument defined\")\n        .parse()?;\n    let proxy_address_compute_tls: SocketAddr = args\n        .get_one::<String>(\"listen-tls\")\n        .expect(\"listen-tls argument defined\")\n        .parse()?;\n\n    info!(\"Starting sni router on {proxy_address}\");\n    info!(\"Starting sni router on {proxy_address_compute_tls}\");\n    let proxy_listener = TcpListener::bind(proxy_address).await?;\n    let proxy_listener_compute_tls = TcpListener::bind(proxy_address_compute_tls).await?;\n\n    let cancellation_token = CancellationToken::new();\n    let dest = Arc::new(destination);\n\n    let main = tokio::spawn(task_main(\n        dest.clone(),\n        tls_config.clone(),\n        None,\n        proxy_listener,\n        cancellation_token.clone(),\n    ))\n    .map(crate::error::flatten_err);\n\n    let main_tls = tokio::spawn(task_main(\n        dest,\n        tls_config,\n        Some(compute_tls_config),\n        proxy_listener_compute_tls,\n        cancellation_token.clone(),\n    ))\n    .map(crate::error::flatten_err);\n\n    Metrics::get()\n        .service\n        .info\n        .set_label(ServiceInfo::running());\n\n    let signals_task = tokio::spawn(crate::signals::handle(cancellation_token, || {}));\n\n    // the signal task cant ever succeed.\n    // the main task can error, or can succeed on cancellation.\n    // we want to immediately exit on either of these cases\n    let main = futures::future::try_join(main, main_tls);\n    let signal = match futures::future::select(signals_task, main).await {\n        Either::Left((res, _)) => crate::error::flatten_err(res)?,\n        Either::Right((res, _)) => {\n            res?;\n            return Ok(());\n        }\n    };\n\n    // maintenance tasks return `Infallible` success values, this is an impossible value\n    // so this match statically ensures that there are no possibilities for that value\n    match signal {}\n}\n\npub(super) fn parse_tls(\n    key_path: &Path,\n    cert_path: &Path,\n) -> anyhow::Result<Arc<rustls::ServerConfig>> {\n    let key = {\n        let key_bytes = std::fs::read(key_path).context(\"TLS key file\")?;\n\n        let mut keys = rustls_pemfile::pkcs8_private_keys(&mut &key_bytes[..]).collect_vec();\n\n        ensure!(keys.len() == 1, \"keys.len() = {} (should be 1)\", keys.len());\n        PrivateKeyDer::Pkcs8(\n            keys.pop()\n                .expect(\"keys should not be empty\")\n                .context(format!(\n                    \"Failed to read TLS keys at '{}'\",\n                    key_path.display()\n                ))?,\n        )\n    };\n\n    let cert_chain_bytes = std::fs::read(cert_path).context(format!(\n        \"Failed to read TLS cert file at '{}.'\",\n        cert_path.display()\n    ))?;\n\n    let cert_chain: Vec<_> = {\n        rustls_pemfile::certs(&mut &cert_chain_bytes[..])\n            .try_collect()\n            .with_context(|| {\n                format!(\n                    \"Failed to read TLS certificate chain from bytes from file at '{}'.\",\n                    cert_path.display()\n                )\n            })?\n    };\n\n    let tls_config =\n        rustls::ServerConfig::builder_with_provider(Arc::new(ring::default_provider()))\n            .with_protocol_versions(&[&rustls::version::TLS13, &rustls::version::TLS12])\n            .context(\"ring should support TLS1.2 and TLS1.3\")?\n            .with_no_client_auth()\n            .with_single_cert(cert_chain, key)?\n            .into();\n\n    Ok(tls_config)\n}\n\npub(super) async fn task_main(\n    dest_suffix: Arc<String>,\n    tls_config: Arc<rustls::ServerConfig>,\n    compute_tls_config: Option<Arc<rustls::ClientConfig>>,\n    listener: tokio::net::TcpListener,\n    cancellation_token: CancellationToken,\n) -> anyhow::Result<()> {\n    // When set for the server socket, the keepalive setting\n    // will be inherited by all accepted client sockets.\n    socket2::SockRef::from(&listener).set_keepalive(true)?;\n\n    let connections = tokio_util::task::task_tracker::TaskTracker::new();\n\n    while let Some(accept_result) =\n        run_until_cancelled(listener.accept(), &cancellation_token).await\n    {\n        let (socket, peer_addr) = accept_result?;\n\n        let session_id = uuid::Uuid::new_v4();\n        let tls_config = Arc::clone(&tls_config);\n        let dest_suffix = Arc::clone(&dest_suffix);\n        let compute_tls_config = compute_tls_config.clone();\n\n        connections.spawn(\n            async move {\n                socket\n                    .set_nodelay(true)\n                    .context(\"failed to set socket option\")?;\n\n                let ctx = RequestContext::new(\n                    session_id,\n                    ConnectionInfo {\n                        addr: peer_addr,\n                        extra: None,\n                    },\n                    crate::metrics::Protocol::SniRouter,\n                );\n                handle_client(ctx, dest_suffix, tls_config, compute_tls_config, socket).await\n            }\n            .unwrap_or_else(|e| {\n                if let Some(FirstMessage(io_error)) = e.downcast_ref() {\n                    // this is noisy. if we get EOF on the very first message that's likely\n                    // just NLB doing a healthcheck.\n                    if io_error.kind() == io::ErrorKind::UnexpectedEof {\n                        return;\n                    }\n                }\n\n                // Acknowledge that the task has finished with an error.\n                error!(\"per-client task finished with an error: {e:#}\");\n            })\n            .instrument(tracing::info_span!(\"handle_client\", ?session_id)),\n        );\n    }\n\n    connections.close();\n    drop(listener);\n\n    connections.wait().await;\n\n    info!(\"all client connections have finished\");\n    Ok(())\n}\n\n#[derive(Debug, thiserror::Error)]\n#[error(transparent)]\nstruct FirstMessage(io::Error);\n\nasync fn ssl_handshake<S: AsyncRead + AsyncWrite + Unpin>(\n    ctx: &RequestContext,\n    raw_stream: S,\n    tls_config: Arc<rustls::ServerConfig>,\n) -> anyhow::Result<TlsStream<S>> {\n    let (mut stream, msg) = PqStream::parse_startup(Stream::from_raw(raw_stream))\n        .await\n        .map_err(FirstMessage)?;\n\n    match msg {\n        FeStartupPacket::SslRequest { direct: None } => {\n            let raw = stream.accept_tls().await?;\n\n            Ok(raw\n                .upgrade(tls_config, !ctx.has_private_peer_addr())\n                .await?)\n        }\n        unexpected => {\n            info!(\n                ?unexpected,\n                \"unexpected startup packet, rejecting connection\"\n            );\n            Err(stream.throw_error(TlsRequired, None).await)?\n        }\n    }\n}\n\nasync fn handle_client(\n    ctx: RequestContext,\n    dest_suffix: Arc<String>,\n    tls_config: Arc<rustls::ServerConfig>,\n    compute_tls_config: Option<Arc<rustls::ClientConfig>>,\n    stream: impl AsyncRead + AsyncWrite + Unpin,\n) -> anyhow::Result<()> {\n    let mut tls_stream = ssl_handshake(&ctx, stream, tls_config).await?;\n\n    // Cut off first part of the SNI domain\n    // We receive required destination details in the format of\n    //   `{k8s_service_name}--{k8s_namespace}--{port}.non-sni-domain`\n    let sni = tls_stream\n        .get_ref()\n        .1\n        .server_name()\n        .ok_or(anyhow!(\"SNI missing\"))?;\n    let dest: Vec<&str> = sni\n        .split_once('.')\n        .context(\"invalid SNI\")?\n        .0\n        .splitn(3, \"--\")\n        .collect();\n    let port = dest[2].parse::<u16>().context(\"invalid port\")?;\n    let destination = format!(\"{}.{}.{}:{}\", dest[0], dest[1], dest_suffix, port);\n\n    info!(\"destination: {}\", destination);\n\n    let mut client = tokio::net::TcpStream::connect(&destination).await?;\n\n    let client = if let Some(compute_tls_config) = compute_tls_config {\n        info!(\"upgrading TLS\");\n\n        // send SslRequest\n        client\n            .write_all(b\"\\x00\\x00\\x00\\x08\\x04\\xd2\\x16\\x2f\")\n            .await?;\n\n        // wait for S/N respons\n        let mut resp = b'N';\n        client.read_exact(std::slice::from_mut(&mut resp)).await?;\n\n        // error if not S\n        ensure!(resp == b'S', \"compute refused TLS\");\n\n        // upgrade to TLS.\n        let domain = DnsName::try_from(destination)?;\n        let domain = rustls::pki_types::ServerName::DnsName(domain);\n        let client = TlsConnector::from(compute_tls_config)\n            .connect(domain, client)\n            .await?;\n        Connection::Tls(client)\n    } else {\n        Connection::Raw(client)\n    };\n\n    // doesn't yet matter as pg-sni-router doesn't report analytics logs\n    ctx.set_success();\n    ctx.log_connect();\n\n    // Starting from here we only proxy the client's traffic.\n    info!(\"performing the proxy pass...\");\n\n    let res = match client {\n        Connection::Raw(mut c) => copy_bidirectional_client_compute(&mut tls_stream, &mut c).await,\n        Connection::Tls(mut c) => copy_bidirectional_client_compute(&mut tls_stream, &mut c).await,\n    };\n\n    match res {\n        Ok(_) => Ok(()),\n        Err(ErrorSource::Client(err)) => Err(err).context(\"client\"),\n        Err(ErrorSource::Compute(err)) => Err(err).context(\"compute\"),\n    }\n}\n\n#[allow(clippy::large_enum_variant)]\nenum Connection {\n    Raw(tokio::net::TcpStream),\n    Tls(tokio_rustls::client::TlsStream<tokio::net::TcpStream>),\n}\n"
  },
  {
    "path": "proxy/src/binary/proxy.rs",
    "content": "use std::env;\nuse std::net::SocketAddr;\nuse std::path::PathBuf;\nuse std::pin::pin;\nuse std::sync::Arc;\nuse std::time::Duration;\n\n#[cfg(any(test, feature = \"testing\"))]\nuse anyhow::Context;\nuse anyhow::{bail, ensure};\nuse arc_swap::ArcSwapOption;\n#[cfg(any(test, feature = \"testing\"))]\nuse camino::Utf8PathBuf;\nuse futures::future::Either;\nuse itertools::{Itertools, Position};\nuse rand::Rng;\nuse remote_storage::RemoteStorageConfig;\nuse tokio::net::TcpListener;\n#[cfg(any(test, feature = \"testing\"))]\nuse tokio::sync::Notify;\nuse tokio::task::JoinSet;\nuse tokio_util::sync::CancellationToken;\nuse tracing::{debug, error, info, warn};\nuse utils::sentry_init::init_sentry;\nuse utils::{project_build_tag, project_git_version};\n\nuse crate::auth::backend::jwt::JwkCache;\n#[cfg(any(test, feature = \"testing\"))]\nuse crate::auth::backend::local::LocalBackend;\nuse crate::auth::backend::{ConsoleRedirectBackend, MaybeOwned};\nuse crate::batch::BatchQueue;\nuse crate::cancellation::{CancellationHandler, CancellationProcessor};\n#[cfg(feature = \"rest_broker\")]\nuse crate::config::RestConfig;\n#[cfg(any(test, feature = \"testing\"))]\nuse crate::config::refresh_config_loop;\nuse crate::config::{\n    self, AuthenticationConfig, CacheOptions, ComputeConfig, HttpConfig, ProjectInfoCacheOptions,\n    ProxyConfig, ProxyProtocolV2, remote_storage_from_toml,\n};\nuse crate::context::parquet::ParquetUploadArgs;\nuse crate::http::health_server::AppMetrics;\nuse crate::metrics::{Metrics, ServiceInfo};\nuse crate::rate_limiter::{EndpointRateLimiter, RateBucketInfo, WakeComputeRateLimiter};\nuse crate::redis::connection_with_credentials_provider::ConnectionWithCredentialsProvider;\nuse crate::redis::kv_ops::RedisKVClient;\nuse crate::redis::{elasticache, notifications};\nuse crate::scram::threadpool::ThreadPool;\nuse crate::serverless::GlobalConnPoolOptions;\nuse crate::serverless::cancel_set::CancelSet;\n#[cfg(feature = \"rest_broker\")]\nuse crate::serverless::rest::DbSchemaCache;\nuse crate::tls::client_config::compute_client_config_with_root_certs;\n#[cfg(any(test, feature = \"testing\"))]\nuse crate::url::ApiUrl;\nuse crate::{auth, control_plane, http, serverless, usage_metrics};\n\nproject_git_version!(GIT_VERSION);\nproject_build_tag!(BUILD_TAG);\n\nuse clap::{Parser, ValueEnum};\n\n#[derive(Clone, Debug, ValueEnum)]\n#[clap(rename_all = \"kebab-case\")]\nenum AuthBackendType {\n    #[clap(alias(\"cplane-v1\"))]\n    ControlPlane,\n\n    #[clap(alias(\"link\"))]\n    ConsoleRedirect,\n\n    #[cfg(any(test, feature = \"testing\"))]\n    Postgres,\n\n    #[cfg(any(test, feature = \"testing\"))]\n    Local,\n}\n\n/// Neon proxy/router\n#[derive(Parser)]\n#[command(version = GIT_VERSION, about)]\nstruct ProxyCliArgs {\n    /// Name of the region this proxy is deployed in\n    #[clap(long, default_value_t = String::new())]\n    region: String,\n    /// listen for incoming client connections on ip:port\n    #[clap(short, long, default_value = \"127.0.0.1:4432\")]\n    proxy: SocketAddr,\n    #[clap(value_enum, long, default_value_t = AuthBackendType::ConsoleRedirect)]\n    auth_backend: AuthBackendType,\n    /// Path of the local proxy config file (used for local-file auth backend)\n    #[clap(long, default_value = \"./local_proxy.json\")]\n    #[cfg(any(test, feature = \"testing\"))]\n    config_path: Utf8PathBuf,\n    /// listen for management callback connection on ip:port\n    #[clap(short, long, default_value = \"127.0.0.1:7000\")]\n    mgmt: SocketAddr,\n    /// listen for incoming http connections (metrics, etc) on ip:port\n    #[clap(long, default_value = \"127.0.0.1:7001\")]\n    http: SocketAddr,\n    /// listen for incoming wss connections on ip:port\n    #[clap(long)]\n    wss: Option<SocketAddr>,\n    /// redirect unauthenticated users to the given uri in case of console redirect auth\n    #[clap(short, long, default_value = \"http://localhost:3000/psql_session/\")]\n    uri: String,\n    /// cloud API endpoint for authenticating users\n    #[clap(\n        short,\n        long,\n        default_value = \"http://localhost:3000/authenticate_proxy_request/\"\n    )]\n    auth_endpoint: String,\n    /// JWT used to connect to control plane.\n    #[clap(\n        long,\n        value_name = \"JWT\",\n        default_value = \"\",\n        env = \"NEON_PROXY_TO_CONTROLPLANE_TOKEN\"\n    )]\n    control_plane_token: Arc<str>,\n    /// if this is not local proxy, this toggles whether we accept jwt or passwords for http\n    #[clap(long, default_value_t = false, value_parser = clap::builder::BoolishValueParser::new(), action = clap::ArgAction::Set)]\n    is_auth_broker: bool,\n    /// path to TLS key for client postgres connections\n    ///\n    /// tls-key and tls-cert are for backwards compatibility, we can put all certs in one dir\n    #[clap(short = 'k', long, alias = \"ssl-key\")]\n    tls_key: Option<PathBuf>,\n    /// path to TLS cert for client postgres connections\n    ///\n    /// tls-key and tls-cert are for backwards compatibility, we can put all certs in one dir\n    #[clap(short = 'c', long, alias = \"ssl-cert\")]\n    tls_cert: Option<PathBuf>,\n    /// Allow writing TLS session keys to the given file pointed to by the environment variable `SSLKEYLOGFILE`.\n    #[clap(long, alias = \"allow-ssl-keylogfile\")]\n    allow_tls_keylogfile: bool,\n    /// path to directory with TLS certificates for client postgres connections\n    #[clap(long)]\n    certs_dir: Option<PathBuf>,\n    /// timeout for the TLS handshake\n    #[clap(long, default_value = \"15s\", value_parser = humantime::parse_duration)]\n    handshake_timeout: tokio::time::Duration,\n    /// cache for `wake_compute` api method (use `size=0` to disable)\n    #[clap(long, default_value = config::CacheOptions::CACHE_DEFAULT_OPTIONS)]\n    wake_compute_cache: String,\n    /// lock for `wake_compute` api method. example: \"shards=32,permits=4,epoch=10m,timeout=1s\". (use `permits=0` to disable).\n    #[clap(long, default_value = config::ConcurrencyLockOptions::DEFAULT_OPTIONS_WAKE_COMPUTE_LOCK)]\n    wake_compute_lock: String,\n    /// lock for `connect_compute` api method. example: \"shards=32,permits=4,epoch=10m,timeout=1s\". (use `permits=0` to disable).\n    #[clap(long, default_value = config::ConcurrencyLockOptions::DEFAULT_OPTIONS_CONNECT_COMPUTE_LOCK)]\n    connect_compute_lock: String,\n    #[clap(flatten)]\n    sql_over_http: SqlOverHttpArgs,\n    /// timeout for scram authentication protocol\n    #[clap(long, default_value = \"15s\", value_parser = humantime::parse_duration)]\n    scram_protocol_timeout: tokio::time::Duration,\n    /// size of the threadpool for password hashing\n    #[clap(long, default_value_t = 4)]\n    scram_thread_pool_size: u8,\n    /// Endpoint rate limiter max number of requests per second.\n    ///\n    /// Provided in the form `<Requests Per Second>@<Bucket Duration Size>`.\n    /// Can be given multiple times for different bucket sizes.\n    #[clap(long, default_values_t = RateBucketInfo::DEFAULT_ENDPOINT_SET)]\n    endpoint_rps_limit: Vec<RateBucketInfo>,\n    /// Wake compute rate limiter max number of requests per second.\n    #[clap(long, default_values_t = RateBucketInfo::DEFAULT_SET)]\n    wake_compute_limit: Vec<RateBucketInfo>,\n    /// Cancellation channel size (max queue size for redis kv client)\n    #[clap(long, default_value_t = 1024)]\n    cancellation_ch_size: usize,\n    /// Cancellation ops batch size for redis\n    #[clap(long, default_value_t = 8)]\n    cancellation_batch_size: usize,\n    /// redis url for plain authentication\n    #[clap(long, alias(\"redis-notifications\"))]\n    redis_plain: Option<String>,\n    /// what from the available authentications type to use for redis. Supported are \"irsa\" and \"plain\".\n    #[clap(long, default_value = \"irsa\")]\n    redis_auth_type: String,\n    /// redis host for irsa authentication\n    #[clap(long)]\n    redis_host: Option<String>,\n    /// redis port for irsa authentication\n    #[clap(long)]\n    redis_port: Option<u16>,\n    /// redis cluster name for irsa authentication\n    #[clap(long)]\n    redis_cluster_name: Option<String>,\n    /// redis user_id for irsa authentication\n    #[clap(long)]\n    redis_user_id: Option<String>,\n    /// aws region for irsa authentication\n    #[clap(long, default_value_t = String::new())]\n    aws_region: String,\n    /// cache for `project_info` (use `size=0` to disable)\n    #[clap(long, default_value = config::ProjectInfoCacheOptions::CACHE_DEFAULT_OPTIONS)]\n    project_info_cache: String,\n    /// cache for all valid endpoints\n    // TODO: remove after a couple of releases.\n    #[clap(long, default_value_t = String::new())]\n    #[deprecated]\n    endpoint_cache_config: String,\n    #[clap(flatten)]\n    parquet_upload: ParquetUploadArgs,\n\n    /// http endpoint to receive periodic metric updates\n    #[clap(long)]\n    metric_collection_endpoint: Option<String>,\n    /// how often metrics should be sent to a collection endpoint\n    #[clap(long)]\n    metric_collection_interval: Option<String>,\n    /// interval for backup metric collection\n    #[clap(long, default_value = \"10m\", value_parser = humantime::parse_duration)]\n    metric_backup_collection_interval: std::time::Duration,\n    /// remote storage configuration for backup metric collection\n    /// Encoded as toml (same format as pageservers), eg\n    /// `{bucket_name='the-bucket',bucket_region='us-east-1',prefix_in_bucket='proxy',endpoint='http://minio:9000'}`\n    #[clap(long, value_parser = remote_storage_from_toml)]\n    metric_backup_collection_remote_storage: Option<RemoteStorageConfig>,\n    /// chunk size for backup metric collection\n    /// Size of each event is no more than 400 bytes, so 2**22 is about 200MB before the compression.\n    #[clap(long, default_value = \"4194304\")]\n    metric_backup_collection_chunk_size: usize,\n\n    /// Whether to retry the connection to the compute node\n    #[clap(long, default_value = config::RetryConfig::CONNECT_TO_COMPUTE_DEFAULT_VALUES)]\n    connect_to_compute_retry: String,\n    /// Whether to retry the wake_compute request\n    #[clap(long, default_value = config::RetryConfig::WAKE_COMPUTE_DEFAULT_VALUES)]\n    wake_compute_retry: String,\n\n    /// Configure if this is a private access proxy for the POC: In that case the proxy will ignore the IP allowlist\n    #[clap(long, default_value_t = false, value_parser = clap::builder::BoolishValueParser::new(), action = clap::ArgAction::Set)]\n    is_private_access_proxy: bool,\n\n    /// Configure whether all incoming requests have a Proxy Protocol V2 packet.\n    #[clap(value_enum, long, default_value_t = ProxyProtocolV2::Rejected)]\n    proxy_protocol_v2: ProxyProtocolV2,\n\n    /// Time the proxy waits for the webauth session to be confirmed by the control plane.\n    // TODO: rename to `console_redirect_confirmation_timeout`.\n    #[clap(long, default_value = \"2m\", value_parser = humantime::parse_duration)]\n    webauth_confirmation_timeout: std::time::Duration,\n\n    #[clap(flatten)]\n    pg_sni_router: PgSniRouterArgs,\n\n    /// if this is not local proxy, this toggles whether we accept Postgres REST requests\n    #[clap(long, default_value_t = false, value_parser = clap::builder::BoolishValueParser::new(), action = clap::ArgAction::Set)]\n    #[cfg(feature = \"rest_broker\")]\n    is_rest_broker: bool,\n\n    /// cache for `db_schema_cache` introspection (use `size=0` to disable)\n    #[clap(long, default_value = \"size=1000,ttl=1h\")]\n    #[cfg(feature = \"rest_broker\")]\n    db_schema_cache: String,\n\n    /// Maximum size allowed for schema in bytes\n    #[clap(long, default_value_t = 5 * 1024 * 1024)] // 5MB\n    #[cfg(feature = \"rest_broker\")]\n    max_schema_size: usize,\n\n    /// Hostname prefix to strip from request hostname to get database hostname\n    #[clap(long, default_value = \"apirest.\")]\n    #[cfg(feature = \"rest_broker\")]\n    hostname_prefix: String,\n}\n\n#[derive(clap::Args, Clone, Copy, Debug)]\nstruct SqlOverHttpArgs {\n    /// timeout for http connection requests\n    #[clap(long, default_value = \"15s\", value_parser = humantime::parse_duration)]\n    sql_over_http_timeout: tokio::time::Duration,\n\n    /// Whether the SQL over http pool is opt-in\n    #[clap(long, default_value_t = true, value_parser = clap::builder::BoolishValueParser::new(), action = clap::ArgAction::Set)]\n    sql_over_http_pool_opt_in: bool,\n\n    /// How many connections to pool for each endpoint. Excess connections are discarded\n    #[clap(long, default_value_t = 20)]\n    sql_over_http_pool_max_conns_per_endpoint: usize,\n\n    /// How many connections to pool for each endpoint. Excess connections are discarded\n    #[clap(long, default_value_t = 20000)]\n    sql_over_http_pool_max_total_conns: usize,\n\n    /// How long pooled connections should remain idle for before closing\n    #[clap(long, default_value = \"5m\", value_parser = humantime::parse_duration)]\n    sql_over_http_idle_timeout: tokio::time::Duration,\n\n    /// Duration each shard will wait on average before a GC sweep.\n    /// A longer time will causes sweeps to take longer but will interfere less frequently.\n    #[clap(long, default_value = \"10m\", value_parser = humantime::parse_duration)]\n    sql_over_http_pool_gc_epoch: tokio::time::Duration,\n\n    /// How many shards should the global pool have. Must be a power of two.\n    /// More shards will introduce less contention for pool operations, but can\n    /// increase memory used by the pool\n    #[clap(long, default_value_t = 128)]\n    sql_over_http_pool_shards: usize,\n\n    #[clap(long, default_value_t = 10000)]\n    sql_over_http_client_conn_threshold: u64,\n\n    #[clap(long, default_value_t = 64)]\n    sql_over_http_cancel_set_shards: usize,\n\n    #[clap(long, default_value_t = 10 * 1024 * 1024)] // 10 MiB\n    sql_over_http_max_request_size_bytes: usize,\n\n    #[clap(long, default_value_t = 10 * 1024 * 1024)] // 10 MiB\n    sql_over_http_max_response_size_bytes: usize,\n}\n\n#[derive(clap::Args, Clone, Debug)]\nstruct PgSniRouterArgs {\n    /// listen for incoming client connections on ip:port\n    #[clap(id = \"sni-router-listen\", long, default_value = \"127.0.0.1:4432\")]\n    listen: SocketAddr,\n    /// listen for incoming client connections on ip:port, requiring TLS to compute\n    #[clap(id = \"sni-router-listen-tls\", long, default_value = \"127.0.0.1:4433\")]\n    listen_tls: SocketAddr,\n    /// path to TLS key for client postgres connections\n    #[clap(id = \"sni-router-tls-key\", long)]\n    tls_key: Option<PathBuf>,\n    /// path to TLS cert for client postgres connections\n    #[clap(id = \"sni-router-tls-cert\", long)]\n    tls_cert: Option<PathBuf>,\n    /// append this domain zone to the SNI hostname to get the destination address\n    #[clap(id = \"sni-router-destination\", long)]\n    dest: Option<String>,\n}\n\npub async fn run() -> anyhow::Result<()> {\n    let _logging_guard = crate::logging::init()?;\n    let _panic_hook_guard = utils::logging::replace_panic_hook_with_tracing_panic_hook();\n    let _sentry_guard = init_sentry(Some(GIT_VERSION.into()), &[]);\n\n    // TODO: refactor these to use labels\n    info!(\"Version: {GIT_VERSION}\");\n    info!(\"Build_tag: {BUILD_TAG}\");\n    let neon_metrics = ::metrics::NeonMetrics::new(::metrics::BuildInfo {\n        revision: GIT_VERSION,\n        build_tag: BUILD_TAG,\n    });\n\n    let jemalloc = match crate::jemalloc::MetricRecorder::new() {\n        Ok(t) => Some(t),\n        Err(e) => {\n            error!(error = ?e, \"could not start jemalloc metrics loop\");\n            None\n        }\n    };\n\n    let args = ProxyCliArgs::parse();\n    let config = build_config(&args)?;\n    let auth_backend = build_auth_backend(&args)?;\n\n    match auth_backend {\n        Either::Left(auth_backend) => info!(\"Authentication backend: {auth_backend}\"),\n        Either::Right(auth_backend) => info!(\"Authentication backend: {auth_backend:?}\"),\n    }\n    info!(\"Using region: {}\", args.aws_region);\n    let redis_client = configure_redis(&args).await?;\n\n    // Check that we can bind to address before further initialization\n    info!(\"Starting http on {}\", args.http);\n    let http_listener = TcpListener::bind(args.http).await?.into_std()?;\n\n    info!(\"Starting mgmt on {}\", args.mgmt);\n    let mgmt_listener = TcpListener::bind(args.mgmt).await?;\n\n    let proxy_listener = if args.is_auth_broker {\n        None\n    } else {\n        info!(\"Starting proxy on {}\", args.proxy);\n        Some(TcpListener::bind(args.proxy).await?)\n    };\n\n    let sni_router_listeners = {\n        let args = &args.pg_sni_router;\n        if args.dest.is_some() {\n            ensure!(\n                args.tls_key.is_some(),\n                \"sni-router-tls-key must be provided\"\n            );\n            ensure!(\n                args.tls_cert.is_some(),\n                \"sni-router-tls-cert must be provided\"\n            );\n\n            info!(\n                \"Starting pg-sni-router on {} and {}\",\n                args.listen, args.listen_tls\n            );\n\n            Some((\n                TcpListener::bind(args.listen).await?,\n                TcpListener::bind(args.listen_tls).await?,\n            ))\n        } else {\n            None\n        }\n    };\n\n    // TODO: rename the argument to something like serverless.\n    // It now covers more than just websockets, it also covers SQL over HTTP.\n    let serverless_listener = if let Some(serverless_address) = args.wss {\n        info!(\"Starting wss on {serverless_address}\");\n        Some(TcpListener::bind(serverless_address).await?)\n    } else if args.is_auth_broker {\n        bail!(\"wss arg must be present for auth-broker\")\n    } else {\n        None\n    };\n\n    let cancellation_token = CancellationToken::new();\n\n    let cancellation_handler = Arc::new(CancellationHandler::new(&config.connect_to_compute));\n\n    let endpoint_rate_limiter = Arc::new(EndpointRateLimiter::new_with_shards(\n        RateBucketInfo::to_leaky_bucket(&args.endpoint_rps_limit)\n            .unwrap_or(EndpointRateLimiter::DEFAULT),\n        64,\n    ));\n\n    #[cfg(any(test, feature = \"testing\"))]\n    let refresh_config_notify = Arc::new(Notify::new());\n    // client facing tasks. these will exit on error or on cancellation\n    // cancellation returns Ok(())\n    let mut client_tasks = JoinSet::new();\n    match auth_backend {\n        Either::Left(auth_backend) => {\n            if let Some(proxy_listener) = proxy_listener {\n                client_tasks.spawn(crate::pglb::task_main(\n                    config,\n                    auth_backend,\n                    proxy_listener,\n                    cancellation_token.clone(),\n                    cancellation_handler.clone(),\n                    endpoint_rate_limiter.clone(),\n                ));\n            }\n\n            if let Some(serverless_listener) = serverless_listener {\n                client_tasks.spawn(serverless::task_main(\n                    config,\n                    auth_backend,\n                    serverless_listener,\n                    cancellation_token.clone(),\n                    cancellation_handler.clone(),\n                    endpoint_rate_limiter.clone(),\n                ));\n            }\n\n            // if auth backend is local, we need to load the config file\n            #[cfg(any(test, feature = \"testing\"))]\n            if let auth::Backend::Local(_) = &auth_backend {\n                refresh_config_notify.notify_one();\n                tokio::spawn(refresh_config_loop(\n                    config,\n                    args.config_path,\n                    refresh_config_notify.clone(),\n                ));\n            }\n        }\n        Either::Right(auth_backend) => {\n            if let Some(proxy_listener) = proxy_listener {\n                client_tasks.spawn(crate::console_redirect_proxy::task_main(\n                    config,\n                    auth_backend,\n                    proxy_listener,\n                    cancellation_token.clone(),\n                    cancellation_handler.clone(),\n                ));\n            }\n        }\n    }\n\n    // spawn pg-sni-router mode.\n    if let Some((listen, listen_tls)) = sni_router_listeners {\n        let args = args.pg_sni_router;\n        let dest = args.dest.expect(\"already asserted it is set\");\n        let key_path = args.tls_key.expect(\"already asserted it is set\");\n        let cert_path = args.tls_cert.expect(\"already asserted it is set\");\n\n        let tls_config = super::pg_sni_router::parse_tls(&key_path, &cert_path)?;\n\n        let dest = Arc::new(dest);\n\n        client_tasks.spawn(super::pg_sni_router::task_main(\n            dest.clone(),\n            tls_config.clone(),\n            None,\n            listen,\n            cancellation_token.clone(),\n        ));\n\n        client_tasks.spawn(super::pg_sni_router::task_main(\n            dest,\n            tls_config,\n            Some(config.connect_to_compute.tls.clone()),\n            listen_tls,\n            cancellation_token.clone(),\n        ));\n    }\n\n    client_tasks.spawn(crate::context::parquet::worker(\n        cancellation_token.clone(),\n        args.parquet_upload,\n        args.region,\n    ));\n\n    // maintenance tasks. these never return unless there's an error\n    let mut maintenance_tasks = JoinSet::new();\n\n    maintenance_tasks.spawn(crate::signals::handle(cancellation_token.clone(), {\n        move || {\n            #[cfg(any(test, feature = \"testing\"))]\n            refresh_config_notify.notify_one();\n        }\n    }));\n    maintenance_tasks.spawn(http::health_server::task_main(\n        http_listener,\n        AppMetrics {\n            jemalloc,\n            neon_metrics,\n            proxy: crate::metrics::Metrics::get(),\n        },\n    ));\n    maintenance_tasks.spawn(control_plane::mgmt::task_main(mgmt_listener));\n\n    // add a task to flush the db_schema cache every 10 minutes\n    #[cfg(feature = \"rest_broker\")]\n    if let Some(db_schema_cache) = &config.rest_config.db_schema_cache {\n        maintenance_tasks.spawn(db_schema_cache.maintain());\n    }\n\n    if let Some(metrics_config) = &config.metric_collection {\n        // TODO: Add gc regardles of the metric collection being enabled.\n        maintenance_tasks.spawn(usage_metrics::task_main(metrics_config));\n    }\n\n    if let Some(client) = redis_client {\n        // Try to connect to Redis 3 times with 1 + (0..0.1) second interval.\n        // This prevents immediate exit and pod restart,\n        // which can cause hammering of the redis in case of connection issues.\n        // cancellation key management\n        let mut redis_kv_client = RedisKVClient::new(client.clone());\n        for attempt in (0..3).with_position() {\n            match redis_kv_client.try_connect().await {\n                Ok(()) => {\n                    info!(\"Connected to Redis KV client\");\n                    cancellation_handler.init_tx(BatchQueue::new(CancellationProcessor {\n                        client: redis_kv_client,\n                        batch_size: args.cancellation_batch_size,\n                    }));\n\n                    break;\n                }\n                Err(e) => {\n                    error!(\"Failed to connect to Redis KV client: {e}\");\n                    if matches!(attempt, Position::Last(_)) {\n                        bail!(\n                            \"Failed to connect to Redis KV client after {} attempts\",\n                            attempt.into_inner()\n                        );\n                    }\n                    let jitter = rand::rng().random_range(0..100);\n                    tokio::time::sleep(Duration::from_millis(1000 + jitter)).await;\n                }\n            }\n        }\n\n        #[allow(irrefutable_let_patterns)]\n        if let Either::Left(auth::Backend::ControlPlane(api, ())) = &auth_backend\n            && let crate::control_plane::client::ControlPlaneClient::ProxyV1(api) = &**api\n        {\n            // project info cache and invalidation of that cache.\n            let cache = api.caches.project_info.clone();\n            maintenance_tasks.spawn(notifications::task_main(client, cache.clone()));\n            maintenance_tasks.spawn(async move { cache.gc_worker().await });\n        }\n    }\n\n    Metrics::get()\n        .service\n        .info\n        .set_label(ServiceInfo::running());\n\n    let maintenance = loop {\n        // get one complete task\n        match futures::future::select(\n            pin!(maintenance_tasks.join_next()),\n            pin!(client_tasks.join_next()),\n        )\n        .await\n        {\n            // exit immediately on maintenance task completion\n            Either::Left((Some(res), _)) => break crate::error::flatten_err(res)?,\n            // exit with error immediately if all maintenance tasks have ceased (should be caught by branch above)\n            Either::Left((None, _)) => bail!(\"no maintenance tasks running. invalid state\"),\n            // exit immediately on client task error\n            Either::Right((Some(res), _)) => crate::error::flatten_err(res)?,\n            // exit if all our client tasks have shutdown gracefully\n            Either::Right((None, _)) => return Ok(()),\n        }\n    };\n\n    // maintenance tasks return Infallible success values, this is an impossible value\n    // so this match statically ensures that there are no possibilities for that value\n    match maintenance {}\n}\n\n/// ProxyConfig is created at proxy startup, and lives forever.\nfn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {\n    let thread_pool = ThreadPool::new(args.scram_thread_pool_size);\n    Metrics::get()\n        .proxy\n        .scram_pool\n        .0\n        .set(thread_pool.metrics.clone())\n        .ok();\n\n    let tls_config = match (&args.tls_key, &args.tls_cert) {\n        (Some(key_path), Some(cert_path)) => Some(config::configure_tls(\n            key_path,\n            cert_path,\n            args.certs_dir.as_deref(),\n            args.allow_tls_keylogfile,\n        )?),\n        (None, None) => None,\n        _ => bail!(\"either both or neither tls-key and tls-cert must be specified\"),\n    };\n    let tls_config = ArcSwapOption::from(tls_config.map(Arc::new));\n\n    let backup_metric_collection_config = config::MetricBackupCollectionConfig {\n        remote_storage_config: args.metric_backup_collection_remote_storage.clone(),\n        chunk_size: args.metric_backup_collection_chunk_size,\n    };\n\n    let metric_collection = match (\n        &args.metric_collection_endpoint,\n        &args.metric_collection_interval,\n    ) {\n        (Some(endpoint), Some(interval)) => Some(config::MetricCollectionConfig {\n            endpoint: endpoint.parse()?,\n            interval: humantime::parse_duration(interval)?,\n            backup_metric_collection_config,\n        }),\n        (None, None) => None,\n        _ => bail!(\n            \"either both or neither metric-collection-endpoint \\\n             and metric-collection-interval must be specified\"\n        ),\n    };\n\n    let config::ConcurrencyLockOptions {\n        shards,\n        limiter,\n        epoch,\n        timeout,\n    } = args.connect_compute_lock.parse()?;\n    info!(\n        ?limiter,\n        shards,\n        ?epoch,\n        \"Using NodeLocks (connect_compute)\"\n    );\n    let connect_compute_locks = control_plane::locks::ApiLocks::new(\n        \"connect_compute_lock\",\n        limiter,\n        shards,\n        timeout,\n        epoch,\n        &Metrics::get().proxy.connect_compute_lock,\n    );\n\n    let http_config = HttpConfig {\n        accept_websockets: !args.is_auth_broker,\n        pool_options: GlobalConnPoolOptions {\n            max_conns_per_endpoint: args.sql_over_http.sql_over_http_pool_max_conns_per_endpoint,\n            gc_epoch: args.sql_over_http.sql_over_http_pool_gc_epoch,\n            pool_shards: args.sql_over_http.sql_over_http_pool_shards,\n            idle_timeout: args.sql_over_http.sql_over_http_idle_timeout,\n            opt_in: args.sql_over_http.sql_over_http_pool_opt_in,\n            max_total_conns: args.sql_over_http.sql_over_http_pool_max_total_conns,\n        },\n        cancel_set: CancelSet::new(args.sql_over_http.sql_over_http_cancel_set_shards),\n        client_conn_threshold: args.sql_over_http.sql_over_http_client_conn_threshold,\n        max_request_size_bytes: args.sql_over_http.sql_over_http_max_request_size_bytes,\n        max_response_size_bytes: args.sql_over_http.sql_over_http_max_response_size_bytes,\n    };\n    let authentication_config = AuthenticationConfig {\n        jwks_cache: JwkCache::default(),\n        scram_thread_pool: thread_pool,\n        scram_protocol_timeout: args.scram_protocol_timeout,\n        ip_allowlist_check_enabled: !args.is_private_access_proxy,\n        is_vpc_acccess_proxy: args.is_private_access_proxy,\n        is_auth_broker: args.is_auth_broker,\n        #[cfg(not(feature = \"rest_broker\"))]\n        accept_jwts: args.is_auth_broker,\n        #[cfg(feature = \"rest_broker\")]\n        accept_jwts: args.is_auth_broker || args.is_rest_broker,\n        console_redirect_confirmation_timeout: args.webauth_confirmation_timeout,\n    };\n\n    let compute_config = ComputeConfig {\n        retry: config::RetryConfig::parse(&args.connect_to_compute_retry)?,\n        tls: Arc::new(compute_client_config_with_root_certs()?),\n        timeout: Duration::from_secs(2),\n    };\n\n    #[cfg(feature = \"rest_broker\")]\n    let rest_config = {\n        let db_schema_cache_config: CacheOptions = args.db_schema_cache.parse()?;\n        info!(\"Using DbSchemaCache with options={db_schema_cache_config:?}\");\n\n        let db_schema_cache = if args.is_rest_broker {\n            Some(DbSchemaCache::new(db_schema_cache_config))\n        } else {\n            None\n        };\n\n        RestConfig {\n            is_rest_broker: args.is_rest_broker,\n            db_schema_cache,\n            max_schema_size: args.max_schema_size,\n            hostname_prefix: args.hostname_prefix.clone(),\n        }\n    };\n\n    let mut greetings = env::var_os(\"NEON_MOTD\").map_or(String::new(), |s| match s.into_string() {\n        Ok(s) => s,\n        Err(_) => {\n            debug!(\"NEON_MOTD environment variable is not valid UTF-8\");\n            String::new()\n        }\n    });\n\n    match &args.auth_backend {\n        AuthBackendType::ControlPlane => {}\n        #[cfg(any(test, feature = \"testing\"))]\n        AuthBackendType::Postgres => {}\n        #[cfg(any(test, feature = \"testing\"))]\n        AuthBackendType::Local => {}\n        AuthBackendType::ConsoleRedirect => {\n            greetings = \"Connected to database\".to_string();\n        }\n    }\n\n    let config = ProxyConfig {\n        tls_config,\n        metric_collection,\n        http_config,\n        authentication_config,\n        proxy_protocol_v2: args.proxy_protocol_v2,\n        handshake_timeout: args.handshake_timeout,\n        wake_compute_retry_config: config::RetryConfig::parse(&args.wake_compute_retry)?,\n        connect_compute_locks,\n        connect_to_compute: compute_config,\n        greetings,\n        #[cfg(feature = \"testing\")]\n        disable_pg_session_jwt: false,\n        #[cfg(feature = \"rest_broker\")]\n        rest_config,\n    };\n\n    let config = Box::leak(Box::new(config));\n\n    tokio::spawn(config.connect_compute_locks.garbage_collect_worker());\n\n    Ok(config)\n}\n\n/// auth::Backend is created at proxy startup, and lives forever.\nfn build_auth_backend(\n    args: &ProxyCliArgs,\n) -> anyhow::Result<Either<&'static auth::Backend<'static, ()>, &'static ConsoleRedirectBackend>> {\n    match &args.auth_backend {\n        AuthBackendType::ControlPlane => {\n            let wake_compute_cache_config: CacheOptions = args.wake_compute_cache.parse()?;\n            let project_info_cache_config: ProjectInfoCacheOptions =\n                args.project_info_cache.parse()?;\n\n            info!(\"Using NodeInfoCache (wake_compute) with options={wake_compute_cache_config:?}\");\n            info!(\n                \"Using AllowedIpsCache (wake_compute) with options={project_info_cache_config:?}\"\n            );\n\n            let caches = Box::leak(Box::new(control_plane::caches::ApiCaches::new(\n                wake_compute_cache_config,\n                project_info_cache_config,\n            )));\n\n            let config::ConcurrencyLockOptions {\n                shards,\n                limiter,\n                epoch,\n                timeout,\n            } = args.wake_compute_lock.parse()?;\n            info!(?limiter, shards, ?epoch, \"Using NodeLocks (wake_compute)\");\n            let locks = Box::leak(Box::new(control_plane::locks::ApiLocks::new(\n                \"wake_compute_lock\",\n                limiter,\n                shards,\n                timeout,\n                epoch,\n                &Metrics::get().wake_compute_lock,\n            )));\n            tokio::spawn(locks.garbage_collect_worker());\n\n            let url: crate::url::ApiUrl = args.auth_endpoint.parse()?;\n\n            let endpoint = http::Endpoint::new(url, http::new_client());\n\n            let mut wake_compute_rps_limit = args.wake_compute_limit.clone();\n            RateBucketInfo::validate(&mut wake_compute_rps_limit)?;\n            let wake_compute_endpoint_rate_limiter =\n                Arc::new(WakeComputeRateLimiter::new(wake_compute_rps_limit));\n\n            let api = control_plane::client::cplane_proxy_v1::NeonControlPlaneClient::new(\n                endpoint,\n                args.control_plane_token.clone(),\n                caches,\n                locks,\n                wake_compute_endpoint_rate_limiter,\n            );\n\n            let api = control_plane::client::ControlPlaneClient::ProxyV1(api);\n            let auth_backend = auth::Backend::ControlPlane(MaybeOwned::Owned(api), ());\n            let config = Box::leak(Box::new(auth_backend));\n\n            Ok(Either::Left(config))\n        }\n\n        #[cfg(any(test, feature = \"testing\"))]\n        AuthBackendType::Postgres => {\n            let mut url: ApiUrl = args.auth_endpoint.parse()?;\n            if url.password().is_none() {\n                let password = env::var(\"PGPASSWORD\")\n                    .with_context(|| \"auth-endpoint does not contain a password and environment variable `PGPASSWORD` is not set\")?;\n                url.set_password(Some(&password))\n                    .expect(\"Failed to set password\");\n            }\n            let api = control_plane::client::mock::MockControlPlane::new(\n                url,\n                !args.is_private_access_proxy,\n            );\n            let api = control_plane::client::ControlPlaneClient::PostgresMock(api);\n\n            let auth_backend = auth::Backend::ControlPlane(MaybeOwned::Owned(api), ());\n\n            let config = Box::leak(Box::new(auth_backend));\n\n            Ok(Either::Left(config))\n        }\n\n        AuthBackendType::ConsoleRedirect => {\n            let wake_compute_cache_config: CacheOptions = args.wake_compute_cache.parse()?;\n            let project_info_cache_config: ProjectInfoCacheOptions =\n                args.project_info_cache.parse()?;\n\n            info!(\"Using NodeInfoCache (wake_compute) with options={wake_compute_cache_config:?}\");\n            info!(\n                \"Using AllowedIpsCache (wake_compute) with options={project_info_cache_config:?}\"\n            );\n\n            let caches = Box::leak(Box::new(control_plane::caches::ApiCaches::new(\n                wake_compute_cache_config,\n                project_info_cache_config,\n            )));\n\n            let config::ConcurrencyLockOptions {\n                shards,\n                limiter,\n                epoch,\n                timeout,\n            } = args.wake_compute_lock.parse()?;\n            info!(?limiter, shards, ?epoch, \"Using NodeLocks (wake_compute)\");\n            let locks = Box::leak(Box::new(control_plane::locks::ApiLocks::new(\n                \"wake_compute_lock\",\n                limiter,\n                shards,\n                timeout,\n                epoch,\n                &Metrics::get().wake_compute_lock,\n            )));\n\n            let url = args.uri.clone().parse()?;\n            let ep_url: crate::url::ApiUrl = args.auth_endpoint.parse()?;\n            let endpoint = http::Endpoint::new(ep_url, http::new_client());\n            let mut wake_compute_rps_limit = args.wake_compute_limit.clone();\n            RateBucketInfo::validate(&mut wake_compute_rps_limit)?;\n            let wake_compute_endpoint_rate_limiter =\n                Arc::new(WakeComputeRateLimiter::new(wake_compute_rps_limit));\n\n            // Since we use only get_allowed_ips_and_secret() wake_compute_endpoint_rate_limiter\n            // and locks are not used in ConsoleRedirectBackend,\n            // but they are required by the NeonControlPlaneClient\n            let api = control_plane::client::cplane_proxy_v1::NeonControlPlaneClient::new(\n                endpoint,\n                args.control_plane_token.clone(),\n                caches,\n                locks,\n                wake_compute_endpoint_rate_limiter,\n            );\n\n            let backend = ConsoleRedirectBackend::new(url, api);\n            let config = Box::leak(Box::new(backend));\n\n            Ok(Either::Right(config))\n        }\n\n        #[cfg(any(test, feature = \"testing\"))]\n        AuthBackendType::Local => {\n            let postgres: SocketAddr = \"127.0.0.1:7432\".parse()?;\n            let compute_ctl: ApiUrl = \"http://127.0.0.1:3081/\".parse()?;\n            let auth_backend = crate::auth::Backend::Local(\n                crate::auth::backend::MaybeOwned::Owned(LocalBackend::new(postgres, compute_ctl)),\n            );\n\n            let config = Box::leak(Box::new(auth_backend));\n\n            Ok(Either::Left(config))\n        }\n    }\n}\n\nasync fn configure_redis(\n    args: &ProxyCliArgs,\n) -> anyhow::Result<Option<ConnectionWithCredentialsProvider>> {\n    // TODO: untangle the config args\n    let redis_client = match &*args.redis_auth_type {\n        \"plain\" => match &args.redis_plain {\n            None => {\n                bail!(\"plain auth requires redis_plain to be set\");\n            }\n            Some(url) => {\n                Some(ConnectionWithCredentialsProvider::new_with_static_credentials(url.clone()))\n            }\n        },\n        \"irsa\" => match (&args.redis_host, args.redis_port) {\n            (Some(host), Some(port)) => Some(\n                ConnectionWithCredentialsProvider::new_with_credentials_provider(\n                    host.clone(),\n                    port,\n                    elasticache::CredentialsProvider::new(\n                        args.aws_region.clone(),\n                        args.redis_cluster_name.clone(),\n                        args.redis_user_id.clone(),\n                    )\n                    .await,\n                ),\n            ),\n            (None, None) => {\n                // todo: upgrade to error?\n                warn!(\n                    \"irsa auth requires redis-host and redis-port to be set, continuing without regional_redis_client\"\n                );\n                None\n            }\n            _ => {\n                bail!(\"redis-host and redis-port must be specified together\");\n            }\n        },\n        auth_type => {\n            bail!(\"unknown auth type {auth_type:?} given\")\n        }\n    };\n\n    Ok(redis_client)\n}\n\n#[cfg(test)]\nmod tests {\n    use std::time::Duration;\n\n    use clap::Parser;\n\n    use crate::rate_limiter::RateBucketInfo;\n\n    #[test]\n    fn parse_endpoint_rps_limit() {\n        let config = super::ProxyCliArgs::parse_from([\n            \"proxy\",\n            \"--endpoint-rps-limit\",\n            \"100@1s\",\n            \"--endpoint-rps-limit\",\n            \"20@30s\",\n        ]);\n\n        assert_eq!(\n            config.endpoint_rps_limit,\n            vec![\n                RateBucketInfo::new(100, Duration::from_secs(1)),\n                RateBucketInfo::new(20, Duration::from_secs(30)),\n            ]\n        );\n    }\n}\n"
  },
  {
    "path": "proxy/src/cache/common.rs",
    "content": "use std::ops::{Deref, DerefMut};\nuse std::time::{Duration, Instant};\n\nuse moka::Expiry;\nuse moka::notification::RemovalCause;\n\nuse crate::control_plane::messages::ControlPlaneErrorMessage;\nuse crate::metrics::{\n    CacheEviction, CacheKind, CacheOutcome, CacheOutcomeGroup, CacheRemovalCause, Metrics,\n};\n\n/// Default TTL used when caching errors from control plane.\npub const DEFAULT_ERROR_TTL: Duration = Duration::from_secs(30);\n\n/// A generic trait which exposes types of cache's key and value,\n/// as well as the notion of cache entry invalidation.\n/// This is useful for [`Cached`].\npub(crate) trait Cache {\n    /// Entry's key.\n    type Key;\n\n    /// Entry's value.\n    type Value;\n\n    /// Invalidate an entry using a lookup info.\n    /// We don't have an empty default impl because it's error-prone.\n    fn invalidate(&self, _: &Self::Key);\n}\n\nimpl<C: Cache> Cache for &C {\n    type Key = C::Key;\n    type Value = C::Value;\n\n    fn invalidate(&self, info: &Self::Key) {\n        C::invalidate(self, info);\n    }\n}\n\n/// Wrapper for convenient entry invalidation.\npub(crate) struct Cached<C: Cache, V = <C as Cache>::Value> {\n    /// Cache + lookup info.\n    pub(crate) token: Option<(C, C::Key)>,\n\n    /// The value itself.\n    pub(crate) value: V,\n}\n\nimpl<C: Cache, V> Cached<C, V> {\n    /// Place any entry into this wrapper; invalidation will be a no-op.\n    pub(crate) fn new_uncached(value: V) -> Self {\n        Self { token: None, value }\n    }\n\n    /// Drop this entry from a cache if it's still there.\n    pub(crate) fn invalidate(self) -> V {\n        if let Some((cache, info)) = &self.token {\n            cache.invalidate(info);\n        }\n        self.value\n    }\n\n    /// Tell if this entry is actually cached.\n    pub(crate) fn cached(&self) -> bool {\n        self.token.is_some()\n    }\n}\n\nimpl<C: Cache, V> Deref for Cached<C, V> {\n    type Target = V;\n\n    fn deref(&self) -> &Self::Target {\n        &self.value\n    }\n}\n\nimpl<C: Cache, V> DerefMut for Cached<C, V> {\n    fn deref_mut(&mut self) -> &mut Self::Target {\n        &mut self.value\n    }\n}\n\npub type ControlPlaneResult<T> = Result<T, Box<ControlPlaneErrorMessage>>;\n\n#[derive(Clone, Copy)]\npub struct CplaneExpiry {\n    pub error: Duration,\n}\n\nimpl Default for CplaneExpiry {\n    fn default() -> Self {\n        Self {\n            error: DEFAULT_ERROR_TTL,\n        }\n    }\n}\n\nimpl CplaneExpiry {\n    pub fn expire_early<V>(\n        &self,\n        value: &ControlPlaneResult<V>,\n        updated: Instant,\n    ) -> Option<Duration> {\n        match value {\n            Ok(_) => None,\n            Err(err) => Some(self.expire_err_early(err, updated)),\n        }\n    }\n\n    pub fn expire_err_early(&self, err: &ControlPlaneErrorMessage, updated: Instant) -> Duration {\n        err.status\n            .as_ref()\n            .and_then(|s| s.details.retry_info.as_ref())\n            .map_or(self.error, |r| r.retry_at.into_std() - updated)\n    }\n}\n\nimpl<K, V> Expiry<K, ControlPlaneResult<V>> for CplaneExpiry {\n    fn expire_after_create(\n        &self,\n        _key: &K,\n        value: &ControlPlaneResult<V>,\n        created_at: Instant,\n    ) -> Option<Duration> {\n        self.expire_early(value, created_at)\n    }\n\n    fn expire_after_update(\n        &self,\n        _key: &K,\n        value: &ControlPlaneResult<V>,\n        updated_at: Instant,\n        _duration_until_expiry: Option<Duration>,\n    ) -> Option<Duration> {\n        self.expire_early(value, updated_at)\n    }\n}\n\npub fn eviction_listener(kind: CacheKind, cause: RemovalCause) {\n    let cause = match cause {\n        RemovalCause::Expired => CacheRemovalCause::Expired,\n        RemovalCause::Explicit => CacheRemovalCause::Explicit,\n        RemovalCause::Replaced => CacheRemovalCause::Replaced,\n        RemovalCause::Size => CacheRemovalCause::Size,\n    };\n    Metrics::get()\n        .cache\n        .evicted_total\n        .inc(CacheEviction { cache: kind, cause });\n}\n\n#[inline]\npub fn count_cache_outcome<T>(kind: CacheKind, cache_result: Option<T>) -> Option<T> {\n    let outcome = if cache_result.is_some() {\n        CacheOutcome::Hit\n    } else {\n        CacheOutcome::Miss\n    };\n    Metrics::get().cache.request_total.inc(CacheOutcomeGroup {\n        cache: kind,\n        outcome,\n    });\n    cache_result\n}\n\n#[inline]\npub fn count_cache_insert(kind: CacheKind) {\n    Metrics::get().cache.inserted_total.inc(kind);\n}\n"
  },
  {
    "path": "proxy/src/cache/mod.rs",
    "content": "pub(crate) mod common;\npub(crate) mod node_info;\npub(crate) mod project_info;\n\npub(crate) use common::{Cached, ControlPlaneResult, CplaneExpiry};\n"
  },
  {
    "path": "proxy/src/cache/node_info.rs",
    "content": "use crate::cache::common::{Cache, count_cache_insert, count_cache_outcome, eviction_listener};\nuse crate::cache::{Cached, ControlPlaneResult, CplaneExpiry};\nuse crate::config::CacheOptions;\nuse crate::control_plane::NodeInfo;\nuse crate::metrics::{CacheKind, Metrics};\nuse crate::types::EndpointCacheKey;\n\npub(crate) struct NodeInfoCache(moka::sync::Cache<EndpointCacheKey, ControlPlaneResult<NodeInfo>>);\npub(crate) type CachedNodeInfo = Cached<&'static NodeInfoCache, NodeInfo>;\n\nimpl Cache for NodeInfoCache {\n    type Key = EndpointCacheKey;\n    type Value = ControlPlaneResult<NodeInfo>;\n\n    fn invalidate(&self, info: &EndpointCacheKey) {\n        self.0.invalidate(info);\n    }\n}\n\nimpl NodeInfoCache {\n    pub fn new(config: CacheOptions) -> Self {\n        let builder = moka::sync::Cache::builder()\n            .name(\"node_info\")\n            .expire_after(CplaneExpiry::default());\n        let builder = config.moka(builder);\n\n        if let Some(size) = config.size {\n            Metrics::get()\n                .cache\n                .capacity\n                .set(CacheKind::NodeInfo, size as i64);\n        }\n\n        let builder = builder\n            .eviction_listener(|_k, _v, cause| eviction_listener(CacheKind::NodeInfo, cause));\n\n        Self(builder.build())\n    }\n\n    pub fn insert(&self, key: EndpointCacheKey, value: ControlPlaneResult<NodeInfo>) {\n        count_cache_insert(CacheKind::NodeInfo);\n        self.0.insert(key, value);\n    }\n\n    pub fn get(&self, key: &EndpointCacheKey) -> Option<ControlPlaneResult<NodeInfo>> {\n        count_cache_outcome(CacheKind::NodeInfo, self.0.get(key))\n    }\n\n    pub fn get_entry(\n        &'static self,\n        key: &EndpointCacheKey,\n    ) -> Option<ControlPlaneResult<CachedNodeInfo>> {\n        self.get(key).map(|res| {\n            res.map(|value| Cached {\n                token: Some((self, key.clone())),\n                value,\n            })\n        })\n    }\n}\n"
  },
  {
    "path": "proxy/src/cache/project_info.rs",
    "content": "use std::collections::HashSet;\nuse std::convert::Infallible;\n\nuse clashmap::ClashMap;\nuse moka::sync::Cache;\nuse tracing::{debug, info};\n\nuse crate::cache::common::{\n    ControlPlaneResult, CplaneExpiry, count_cache_insert, count_cache_outcome, eviction_listener,\n};\nuse crate::config::ProjectInfoCacheOptions;\nuse crate::control_plane::messages::{ControlPlaneErrorMessage, Reason};\nuse crate::control_plane::{EndpointAccessControl, RoleAccessControl};\nuse crate::intern::{AccountIdInt, EndpointIdInt, ProjectIdInt, RoleNameInt};\nuse crate::metrics::{CacheKind, Metrics};\nuse crate::types::{EndpointId, RoleName};\n\n/// Cache for project info.\n/// This is used to cache auth data for endpoints.\n/// Invalidation is done by console notifications or by TTL (if console notifications are disabled).\n///\n/// We also store endpoint-to-project mapping in the cache, to be able to access per-endpoint data.\n/// One may ask, why the data is stored per project, when on the user request there is only data about the endpoint available?\n/// On the cplane side updates are done per project (or per branch), so it's easier to invalidate the whole project cache.\npub struct ProjectInfoCache {\n    role_controls: Cache<(EndpointIdInt, RoleNameInt), ControlPlaneResult<RoleAccessControl>>,\n    ep_controls: Cache<EndpointIdInt, ControlPlaneResult<EndpointAccessControl>>,\n\n    project2ep: ClashMap<ProjectIdInt, HashSet<EndpointIdInt>>,\n    // FIXME(stefan): we need a way to GC the account2ep map.\n    account2ep: ClashMap<AccountIdInt, HashSet<EndpointIdInt>>,\n\n    config: ProjectInfoCacheOptions,\n}\n\nimpl ProjectInfoCache {\n    pub fn invalidate_endpoint_access(&self, endpoint_id: EndpointIdInt) {\n        info!(\"invalidating endpoint access for `{endpoint_id}`\");\n        self.ep_controls.invalidate(&endpoint_id);\n    }\n\n    pub fn invalidate_endpoint_access_for_project(&self, project_id: ProjectIdInt) {\n        info!(\"invalidating endpoint access for project `{project_id}`\");\n        let endpoints = self\n            .project2ep\n            .get(&project_id)\n            .map(|kv| kv.value().clone())\n            .unwrap_or_default();\n        for endpoint_id in endpoints {\n            self.ep_controls.invalidate(&endpoint_id);\n        }\n    }\n\n    pub fn invalidate_endpoint_access_for_org(&self, account_id: AccountIdInt) {\n        info!(\"invalidating endpoint access for org `{account_id}`\");\n        let endpoints = self\n            .account2ep\n            .get(&account_id)\n            .map(|kv| kv.value().clone())\n            .unwrap_or_default();\n        for endpoint_id in endpoints {\n            self.ep_controls.invalidate(&endpoint_id);\n        }\n    }\n\n    pub fn invalidate_role_secret_for_project(\n        &self,\n        project_id: ProjectIdInt,\n        role_name: RoleNameInt,\n    ) {\n        info!(\n            \"invalidating role secret for project_id `{}` and role_name `{}`\",\n            project_id, role_name,\n        );\n        let endpoints = self\n            .project2ep\n            .get(&project_id)\n            .map(|kv| kv.value().clone())\n            .unwrap_or_default();\n        for endpoint_id in endpoints {\n            self.role_controls.invalidate(&(endpoint_id, role_name));\n        }\n    }\n}\n\nimpl ProjectInfoCache {\n    pub(crate) fn new(config: ProjectInfoCacheOptions) -> Self {\n        Metrics::get().cache.capacity.set(\n            CacheKind::ProjectInfoRoles,\n            (config.size * config.max_roles) as i64,\n        );\n        Metrics::get()\n            .cache\n            .capacity\n            .set(CacheKind::ProjectInfoEndpoints, config.size as i64);\n\n        // we cache errors for 30 seconds, unless retry_at is set.\n        let expiry = CplaneExpiry::default();\n        Self {\n            role_controls: Cache::builder()\n                .name(\"project_info_roles\")\n                .eviction_listener(|_k, _v, cause| {\n                    eviction_listener(CacheKind::ProjectInfoRoles, cause);\n                })\n                .max_capacity(config.size * config.max_roles)\n                .time_to_live(config.ttl)\n                .expire_after(expiry)\n                .build(),\n            ep_controls: Cache::builder()\n                .name(\"project_info_endpoints\")\n                .eviction_listener(|_k, _v, cause| {\n                    eviction_listener(CacheKind::ProjectInfoEndpoints, cause);\n                })\n                .max_capacity(config.size)\n                .time_to_live(config.ttl)\n                .expire_after(expiry)\n                .build(),\n            project2ep: ClashMap::new(),\n            account2ep: ClashMap::new(),\n            config,\n        }\n    }\n\n    pub(crate) fn get_role_secret(\n        &self,\n        endpoint_id: &EndpointId,\n        role_name: &RoleName,\n    ) -> Option<ControlPlaneResult<RoleAccessControl>> {\n        let endpoint_id = EndpointIdInt::get(endpoint_id)?;\n        let role_name = RoleNameInt::get(role_name)?;\n\n        count_cache_outcome(\n            CacheKind::ProjectInfoRoles,\n            self.role_controls.get(&(endpoint_id, role_name)),\n        )\n    }\n\n    pub(crate) fn get_endpoint_access(\n        &self,\n        endpoint_id: &EndpointId,\n    ) -> Option<ControlPlaneResult<EndpointAccessControl>> {\n        let endpoint_id = EndpointIdInt::get(endpoint_id)?;\n\n        count_cache_outcome(\n            CacheKind::ProjectInfoEndpoints,\n            self.ep_controls.get(&endpoint_id),\n        )\n    }\n\n    pub(crate) fn insert_endpoint_access(\n        &self,\n        account_id: Option<AccountIdInt>,\n        project_id: Option<ProjectIdInt>,\n        endpoint_id: EndpointIdInt,\n        role_name: RoleNameInt,\n        controls: EndpointAccessControl,\n        role_controls: RoleAccessControl,\n    ) {\n        if let Some(account_id) = account_id {\n            self.insert_account2endpoint(account_id, endpoint_id);\n        }\n        if let Some(project_id) = project_id {\n            self.insert_project2endpoint(project_id, endpoint_id);\n        }\n\n        debug!(\n            key = &*endpoint_id,\n            \"created a cache entry for endpoint access\"\n        );\n\n        count_cache_insert(CacheKind::ProjectInfoEndpoints);\n        count_cache_insert(CacheKind::ProjectInfoRoles);\n\n        self.ep_controls.insert(endpoint_id, Ok(controls));\n        self.role_controls\n            .insert((endpoint_id, role_name), Ok(role_controls));\n    }\n\n    pub(crate) fn insert_endpoint_access_err(\n        &self,\n        endpoint_id: EndpointIdInt,\n        role_name: RoleNameInt,\n        msg: Box<ControlPlaneErrorMessage>,\n    ) {\n        debug!(\n            key = &*endpoint_id,\n            \"created a cache entry for an endpoint access error\"\n        );\n\n        // RoleProtected is the only role-specific error that control plane can give us.\n        // If a given role name does not exist, it still returns a successful response,\n        // just with an empty secret.\n        if msg.get_reason() != Reason::RoleProtected {\n            // We can cache all the other errors in ep_controls because they don't\n            // depend on what role name we pass to control plane.\n            self.ep_controls\n                .entry(endpoint_id)\n                .and_compute_with(|entry| match entry {\n                    // leave the entry alone if it's already Ok\n                    Some(entry) if entry.value().is_ok() => moka::ops::compute::Op::Nop,\n                    // replace the entry\n                    _ => {\n                        count_cache_insert(CacheKind::ProjectInfoEndpoints);\n                        moka::ops::compute::Op::Put(Err(msg.clone()))\n                    }\n                });\n        }\n\n        count_cache_insert(CacheKind::ProjectInfoRoles);\n        self.role_controls\n            .insert((endpoint_id, role_name), Err(msg));\n    }\n\n    fn insert_project2endpoint(&self, project_id: ProjectIdInt, endpoint_id: EndpointIdInt) {\n        if let Some(mut endpoints) = self.project2ep.get_mut(&project_id) {\n            endpoints.insert(endpoint_id);\n        } else {\n            self.project2ep\n                .insert(project_id, HashSet::from([endpoint_id]));\n        }\n    }\n\n    fn insert_account2endpoint(&self, account_id: AccountIdInt, endpoint_id: EndpointIdInt) {\n        if let Some(mut endpoints) = self.account2ep.get_mut(&account_id) {\n            endpoints.insert(endpoint_id);\n        } else {\n            self.account2ep\n                .insert(account_id, HashSet::from([endpoint_id]));\n        }\n    }\n\n    pub fn maybe_invalidate_role_secret(&self, _endpoint_id: &EndpointId, _role_name: &RoleName) {\n        // TODO: Expire the value early if the key is idle.\n        // Currently not an issue as we would just use the TTL to decide, which is what already happens.\n    }\n\n    pub async fn gc_worker(&self) -> anyhow::Result<Infallible> {\n        let mut interval = tokio::time::interval(self.config.gc_interval);\n        loop {\n            interval.tick().await;\n            self.ep_controls.run_pending_tasks();\n            self.role_controls.run_pending_tasks();\n        }\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use std::sync::Arc;\n    use std::time::Duration;\n\n    use super::*;\n    use crate::control_plane::messages::{Details, EndpointRateLimitConfig, ErrorInfo, Status};\n    use crate::control_plane::{AccessBlockerFlags, AuthSecret};\n    use crate::scram::ServerSecret;\n\n    #[tokio::test]\n    async fn test_project_info_cache_settings() {\n        let cache = ProjectInfoCache::new(ProjectInfoCacheOptions {\n            size: 1,\n            max_roles: 2,\n            ttl: Duration::from_secs(1),\n            gc_interval: Duration::from_secs(600),\n        });\n        let project_id: Option<ProjectIdInt> = Some(ProjectIdInt::from(&\"project\".into()));\n        let endpoint_id: EndpointId = \"endpoint\".into();\n        let account_id = None;\n\n        let user1: RoleName = \"user1\".into();\n        let user2: RoleName = \"user2\".into();\n        let secret1 = Some(AuthSecret::Scram(ServerSecret::mock([1; 32])));\n        let secret2 = None;\n        let allowed_ips = Arc::new(vec![\n            \"127.0.0.1\".parse().unwrap(),\n            \"127.0.0.2\".parse().unwrap(),\n        ]);\n\n        cache.insert_endpoint_access(\n            account_id,\n            project_id,\n            (&endpoint_id).into(),\n            (&user1).into(),\n            EndpointAccessControl {\n                allowed_ips: allowed_ips.clone(),\n                allowed_vpce: Arc::new(vec![]),\n                flags: AccessBlockerFlags::default(),\n                rate_limits: EndpointRateLimitConfig::default(),\n            },\n            RoleAccessControl {\n                secret: secret1.clone(),\n            },\n        );\n\n        cache.insert_endpoint_access(\n            account_id,\n            project_id,\n            (&endpoint_id).into(),\n            (&user2).into(),\n            EndpointAccessControl {\n                allowed_ips: allowed_ips.clone(),\n                allowed_vpce: Arc::new(vec![]),\n                flags: AccessBlockerFlags::default(),\n                rate_limits: EndpointRateLimitConfig::default(),\n            },\n            RoleAccessControl {\n                secret: secret2.clone(),\n            },\n        );\n\n        let cached = cache.get_role_secret(&endpoint_id, &user1).unwrap();\n        assert_eq!(cached.unwrap().secret, secret1);\n\n        let cached = cache.get_role_secret(&endpoint_id, &user2).unwrap();\n        assert_eq!(cached.unwrap().secret, secret2);\n\n        // Shouldn't add more than 2 roles.\n        let user3: RoleName = \"user3\".into();\n        let secret3 = Some(AuthSecret::Scram(ServerSecret::mock([3; 32])));\n\n        cache.role_controls.run_pending_tasks();\n        cache.insert_endpoint_access(\n            account_id,\n            project_id,\n            (&endpoint_id).into(),\n            (&user3).into(),\n            EndpointAccessControl {\n                allowed_ips: allowed_ips.clone(),\n                allowed_vpce: Arc::new(vec![]),\n                flags: AccessBlockerFlags::default(),\n                rate_limits: EndpointRateLimitConfig::default(),\n            },\n            RoleAccessControl {\n                secret: secret3.clone(),\n            },\n        );\n\n        cache.role_controls.run_pending_tasks();\n        assert_eq!(cache.role_controls.entry_count(), 2);\n\n        tokio::time::sleep(Duration::from_secs(2)).await;\n\n        cache.role_controls.run_pending_tasks();\n        assert_eq!(cache.role_controls.entry_count(), 0);\n    }\n\n    #[tokio::test]\n    async fn test_caching_project_info_errors() {\n        let cache = ProjectInfoCache::new(ProjectInfoCacheOptions {\n            size: 10,\n            max_roles: 10,\n            ttl: Duration::from_secs(1),\n            gc_interval: Duration::from_secs(600),\n        });\n        let project_id = Some(ProjectIdInt::from(&\"project\".into()));\n        let endpoint_id: EndpointId = \"endpoint\".into();\n        let account_id = None;\n\n        let user1: RoleName = \"user1\".into();\n        let user2: RoleName = \"user2\".into();\n        let secret = Some(AuthSecret::Scram(ServerSecret::mock([1; 32])));\n\n        let role_msg = Box::new(ControlPlaneErrorMessage {\n            error: \"role is protected and cannot be used for password-based authentication\"\n                .to_owned()\n                .into_boxed_str(),\n            http_status_code: http::StatusCode::NOT_FOUND,\n            status: Some(Status {\n                code: \"PERMISSION_DENIED\".to_owned().into_boxed_str(),\n                message: \"role is protected and cannot be used for password-based authentication\"\n                    .to_owned()\n                    .into_boxed_str(),\n                details: Details {\n                    error_info: Some(ErrorInfo {\n                        reason: Reason::RoleProtected,\n                    }),\n                    retry_info: None,\n                    user_facing_message: None,\n                },\n            }),\n        });\n\n        let generic_msg = Box::new(ControlPlaneErrorMessage {\n            error: \"oh noes\".to_owned().into_boxed_str(),\n            http_status_code: http::StatusCode::NOT_FOUND,\n            status: None,\n        });\n\n        let get_role_secret =\n            |endpoint_id, role_name| cache.get_role_secret(endpoint_id, role_name).unwrap();\n        let get_endpoint_access = |endpoint_id| cache.get_endpoint_access(endpoint_id).unwrap();\n\n        // stores role-specific errors only for get_role_secret\n        cache.insert_endpoint_access_err((&endpoint_id).into(), (&user1).into(), role_msg.clone());\n        assert_eq!(\n            get_role_secret(&endpoint_id, &user1).unwrap_err().error,\n            role_msg.error\n        );\n        assert!(cache.get_endpoint_access(&endpoint_id).is_none());\n\n        // stores non-role specific errors for both get_role_secret and get_endpoint_access\n        cache.insert_endpoint_access_err(\n            (&endpoint_id).into(),\n            (&user1).into(),\n            generic_msg.clone(),\n        );\n        assert_eq!(\n            get_role_secret(&endpoint_id, &user1).unwrap_err().error,\n            generic_msg.error\n        );\n        assert_eq!(\n            get_endpoint_access(&endpoint_id).unwrap_err().error,\n            generic_msg.error\n        );\n\n        // error isn't returned for other roles in the same endpoint\n        assert!(cache.get_role_secret(&endpoint_id, &user2).is_none());\n\n        // success for a role does not overwrite errors for other roles\n        cache.insert_endpoint_access(\n            account_id,\n            project_id,\n            (&endpoint_id).into(),\n            (&user2).into(),\n            EndpointAccessControl {\n                allowed_ips: Arc::new(vec![]),\n                allowed_vpce: Arc::new(vec![]),\n                flags: AccessBlockerFlags::default(),\n                rate_limits: EndpointRateLimitConfig::default(),\n            },\n            RoleAccessControl {\n                secret: secret.clone(),\n            },\n        );\n        assert!(get_role_secret(&endpoint_id, &user1).is_err());\n        assert!(get_role_secret(&endpoint_id, &user2).is_ok());\n        // ...but does clear the access control error\n        assert!(get_endpoint_access(&endpoint_id).is_ok());\n\n        // storing an error does not overwrite successful access control response\n        cache.insert_endpoint_access_err(\n            (&endpoint_id).into(),\n            (&user2).into(),\n            generic_msg.clone(),\n        );\n        assert!(get_role_secret(&endpoint_id, &user2).is_err());\n        assert!(get_endpoint_access(&endpoint_id).is_ok());\n    }\n}\n"
  },
  {
    "path": "proxy/src/cancellation.rs",
    "content": "use std::convert::Infallible;\nuse std::net::{IpAddr, SocketAddr};\nuse std::pin::pin;\nuse std::sync::{Arc, OnceLock};\nuse std::time::Duration;\n\nuse futures::FutureExt;\nuse ipnet::{IpNet, Ipv4Net, Ipv6Net};\nuse postgres_client::RawCancelToken;\nuse postgres_client::tls::MakeTlsConnect;\nuse redis::{Cmd, FromRedisValue, SetExpiry, SetOptions, Value};\nuse serde::{Deserialize, Serialize};\nuse thiserror::Error;\nuse tokio::net::TcpStream;\nuse tokio::time::timeout;\nuse tracing::{debug, error, info};\n\nuse crate::auth::AuthError;\nuse crate::auth::backend::ComputeUserInfo;\nuse crate::batch::{BatchQueue, BatchQueueError, QueueProcessing};\nuse crate::config::ComputeConfig;\nuse crate::context::RequestContext;\nuse crate::control_plane::ControlPlaneApi;\nuse crate::error::ReportableError;\nuse crate::ext::LockExt;\nuse crate::metrics::{CancelChannelSizeGuard, CancellationRequest, Metrics, RedisMsgKind};\nuse crate::pqproto::CancelKeyData;\nuse crate::rate_limiter::LeakyBucketRateLimiter;\nuse crate::redis::keys::KeyPrefix;\nuse crate::redis::kv_ops::{RedisKVClient, RedisKVClientError};\nuse crate::util::run_until;\n\ntype IpSubnetKey = IpNet;\n\n/// Initial period and TTL is shorter to clear keys of short-lived connections faster.\nconst CANCEL_KEY_INITIAL_PERIOD: Duration = Duration::from_secs(60);\nconst CANCEL_KEY_REFRESH_PERIOD: Duration = Duration::from_secs(10 * 60);\n/// `CANCEL_KEY_TTL_SLACK` is added to the periods to determine the actual TTL.\nconst CANCEL_KEY_TTL_SLACK: Duration = Duration::from_secs(30);\n\n// Message types for sending through mpsc channel\npub enum CancelKeyOp {\n    Store {\n        key: CancelKeyData,\n        value: Box<str>,\n        expire: Duration,\n    },\n    Refresh {\n        key: CancelKeyData,\n        expire: Duration,\n    },\n    Get {\n        key: CancelKeyData,\n    },\n    GetOld {\n        key: CancelKeyData,\n    },\n}\n\nimpl CancelKeyOp {\n    const fn redis_msg_kind(&self) -> RedisMsgKind {\n        match self {\n            CancelKeyOp::Store { .. } => RedisMsgKind::Set,\n            CancelKeyOp::Refresh { .. } => RedisMsgKind::Expire,\n            CancelKeyOp::Get { .. } => RedisMsgKind::Get,\n            CancelKeyOp::GetOld { .. } => RedisMsgKind::HGet,\n        }\n    }\n\n    fn cancel_channel_metric_guard(&self) -> CancelChannelSizeGuard<'static> {\n        Metrics::get()\n            .proxy\n            .cancel_channel_size\n            .guard(self.redis_msg_kind())\n    }\n}\n\n#[derive(thiserror::Error, Debug, Clone)]\npub enum PipelineError {\n    #[error(\"could not send cmd to redis: {0}\")]\n    RedisKVClient(Arc<RedisKVClientError>),\n    #[error(\"incorrect number of responses from redis\")]\n    IncorrectNumberOfResponses,\n}\n\npub struct Pipeline {\n    inner: redis::Pipeline,\n    replies: usize,\n}\n\nimpl Pipeline {\n    fn with_capacity(n: usize) -> Self {\n        Self {\n            inner: redis::Pipeline::with_capacity(n),\n            replies: 0,\n        }\n    }\n\n    async fn execute(self, client: &mut RedisKVClient) -> Result<Vec<Value>, PipelineError> {\n        let responses = self.replies;\n        let batch_size = self.inner.len();\n\n        if !client.credentials_refreshed() {\n            tracing::debug!(\n                \"Redis credentials are not refreshed. Sleeping for 5 seconds before retrying...\"\n            );\n            tokio::time::sleep(Duration::from_secs(5)).await;\n        }\n\n        match client.query(&self.inner).await {\n            // for each reply, we expect that many values.\n            Ok(Value::Array(values)) if values.len() == responses => {\n                debug!(\n                    batch_size,\n                    responses, \"successfully completed cancellation jobs\",\n                );\n                Ok(values.into_iter().collect())\n            }\n            Ok(value) => {\n                error!(batch_size, ?value, \"unexpected redis return value\");\n                Err(PipelineError::IncorrectNumberOfResponses)\n            }\n            Err(err) => Err(PipelineError::RedisKVClient(Arc::new(err))),\n        }\n    }\n\n    fn add_command(&mut self, cmd: Cmd) {\n        self.inner.add_command(cmd);\n        self.replies += 1;\n    }\n}\n\nimpl CancelKeyOp {\n    fn register(&self, pipe: &mut Pipeline) {\n        match self {\n            CancelKeyOp::Store { key, value, expire } => {\n                let key = KeyPrefix::Cancel(*key).build_redis_key();\n                pipe.add_command(Cmd::set_options(\n                    &key,\n                    &**value,\n                    SetOptions::default().with_expiration(SetExpiry::EX(expire.as_secs())),\n                ));\n            }\n            CancelKeyOp::Refresh { key, expire } => {\n                let key = KeyPrefix::Cancel(*key).build_redis_key();\n                pipe.add_command(Cmd::expire(&key, expire.as_secs() as i64));\n            }\n            CancelKeyOp::GetOld { key } => {\n                let key = KeyPrefix::Cancel(*key).build_redis_key();\n                pipe.add_command(Cmd::hget(key, \"data\"));\n            }\n            CancelKeyOp::Get { key } => {\n                let key = KeyPrefix::Cancel(*key).build_redis_key();\n                pipe.add_command(Cmd::get(key));\n            }\n        }\n    }\n}\n\npub struct CancellationProcessor {\n    pub client: RedisKVClient,\n    pub batch_size: usize,\n}\n\nimpl QueueProcessing for CancellationProcessor {\n    type Req = (CancelChannelSizeGuard<'static>, CancelKeyOp);\n    type Res = redis::Value;\n    type Err = PipelineError;\n\n    fn batch_size(&self, _queue_size: usize) -> usize {\n        self.batch_size\n    }\n\n    async fn apply(&mut self, batch: Vec<Self::Req>) -> Result<Vec<Self::Res>, Self::Err> {\n        if !self.client.credentials_refreshed() {\n            // this will cause a timeout for cancellation operations\n            tracing::debug!(\n                \"Redis credentials are not refreshed. Sleeping for 5 seconds before retrying...\"\n            );\n            tokio::time::sleep(Duration::from_secs(5)).await;\n        }\n\n        let mut pipeline = Pipeline::with_capacity(batch.len());\n\n        let batch_size = batch.len();\n        debug!(batch_size, \"running cancellation jobs\");\n\n        for (_, op) in &batch {\n            op.register(&mut pipeline);\n        }\n\n        pipeline.execute(&mut self.client).await\n    }\n}\n\n/// Enables serving `CancelRequest`s.\n///\n/// If `CancellationPublisher` is available, cancel request will be used to publish the cancellation key to other proxy instances.\npub struct CancellationHandler {\n    compute_config: &'static ComputeConfig,\n    // rate limiter of cancellation requests\n    limiter: Arc<std::sync::Mutex<LeakyBucketRateLimiter<IpSubnetKey>>>,\n    tx: OnceLock<BatchQueue<CancellationProcessor>>, // send messages to the redis KV client task\n}\n\n#[derive(Debug, Error)]\npub(crate) enum CancelError {\n    #[error(\"{0}\")]\n    IO(#[from] std::io::Error),\n\n    #[error(\"{0}\")]\n    Postgres(#[from] postgres_client::Error),\n\n    #[error(\"rate limit exceeded\")]\n    RateLimit,\n\n    #[error(\"Authentication error\")]\n    AuthError(#[from] AuthError),\n\n    #[error(\"key not found\")]\n    NotFound,\n\n    #[error(\"proxy service error\")]\n    InternalError,\n}\n\nimpl ReportableError for CancelError {\n    fn get_error_kind(&self) -> crate::error::ErrorKind {\n        match self {\n            CancelError::IO(_) => crate::error::ErrorKind::Compute,\n            CancelError::Postgres(e) if e.as_db_error().is_some() => {\n                crate::error::ErrorKind::Postgres\n            }\n            CancelError::Postgres(_) => crate::error::ErrorKind::Compute,\n            CancelError::RateLimit => crate::error::ErrorKind::RateLimit,\n            CancelError::NotFound | CancelError::AuthError(_) => crate::error::ErrorKind::User,\n            CancelError::InternalError => crate::error::ErrorKind::Service,\n        }\n    }\n}\n\nimpl CancellationHandler {\n    pub fn new(compute_config: &'static ComputeConfig) -> Self {\n        Self {\n            compute_config,\n            tx: OnceLock::new(),\n            limiter: Arc::new(std::sync::Mutex::new(\n                LeakyBucketRateLimiter::<IpSubnetKey>::new_with_shards(\n                    LeakyBucketRateLimiter::<IpSubnetKey>::DEFAULT,\n                    64,\n                ),\n            )),\n        }\n    }\n\n    pub fn init_tx(&self, queue: BatchQueue<CancellationProcessor>) {\n        self.tx\n            .set(queue)\n            .map_err(|_| {})\n            .expect(\"cancellation queue should be registered once\");\n    }\n\n    pub(crate) fn get_key(self: Arc<Self>) -> Session {\n        // we intentionally generate a random \"backend pid\" and \"secret key\" here.\n        // we use the corresponding u64 as an identifier for the\n        // actual endpoint+pid+secret for postgres/pgbouncer.\n        //\n        // if we forwarded the backend_pid from postgres to the client, there would be a lot\n        // of overlap between our computes as most pids are small (~100).\n\n        let key: CancelKeyData = rand::random();\n\n        debug!(\"registered new query cancellation key {key}\");\n        Session {\n            key,\n            cancellation_handler: self,\n        }\n    }\n\n    /// This is not cancel safe\n    async fn get_cancel_key(\n        &self,\n        key: CancelKeyData,\n    ) -> Result<Option<CancelClosure>, CancelError> {\n        const TIMEOUT: Duration = Duration::from_secs(5);\n\n        let Some(tx) = self.tx.get() else {\n            tracing::warn!(\"cancellation handler is not available\");\n            return Err(CancelError::InternalError);\n        };\n\n        let guard = Metrics::get()\n            .proxy\n            .cancel_channel_size\n            .guard(RedisMsgKind::Get);\n        let op = CancelKeyOp::Get { key };\n        let result = timeout(\n            TIMEOUT,\n            tx.call((guard, op), std::future::pending::<Infallible>()),\n        )\n        .await\n        .map_err(|_| {\n            tracing::warn!(\"timed out waiting to receive GetCancelData response\");\n            CancelError::RateLimit\n        })?;\n\n        // We may still have cancel keys set with HSET <key> \"data\".\n        // Check error type and retry with HGET.\n        // TODO: remove code after HSET is not used anymore.\n        let result = if let Err(err) = result.as_ref()\n            && let BatchQueueError::Result(err) = err\n            && let PipelineError::RedisKVClient(err) = err\n            && let RedisKVClientError::Redis(err) = &**err\n            && let Some(errcode) = err.code()\n            && errcode == \"WRONGTYPE\"\n        {\n            let guard = Metrics::get()\n                .proxy\n                .cancel_channel_size\n                .guard(RedisMsgKind::HGet);\n            let op = CancelKeyOp::GetOld { key };\n            timeout(\n                TIMEOUT,\n                tx.call((guard, op), std::future::pending::<Infallible>()),\n            )\n            .await\n            .map_err(|_| {\n                tracing::warn!(\"timed out waiting to receive GetCancelData response\");\n                CancelError::RateLimit\n            })?\n        } else {\n            result\n        };\n\n        let result = result.map_err(|e| {\n            tracing::warn!(\"failed to receive GetCancelData response: {e}\");\n            CancelError::InternalError\n        })?;\n\n        let cancel_state_str = String::from_owned_redis_value(result).map_err(|e| {\n            tracing::warn!(\"failed to receive GetCancelData response: {e}\");\n            CancelError::InternalError\n        })?;\n\n        let cancel_closure: CancelClosure =\n            serde_json::from_str(&cancel_state_str).map_err(|e| {\n                tracing::warn!(\"failed to deserialize cancel state: {e}\");\n                CancelError::InternalError\n            })?;\n\n        Ok(Some(cancel_closure))\n    }\n\n    /// Try to cancel a running query for the corresponding connection.\n    /// If the cancellation key is not found, it will be published to Redis.\n    /// check_allowed - if true, check if the IP is allowed to cancel the query.\n    /// Will fetch IP allowlist internally.\n    ///\n    /// return Result primarily for tests\n    ///\n    /// This is not cancel safe\n    pub(crate) async fn cancel_session<T: ControlPlaneApi>(\n        &self,\n        key: CancelKeyData,\n        ctx: RequestContext,\n        check_ip_allowed: bool,\n        check_vpc_allowed: bool,\n        auth_backend: &T,\n    ) -> Result<(), CancelError> {\n        let subnet_key = match ctx.peer_addr() {\n            IpAddr::V4(ip) => IpNet::V4(Ipv4Net::new_assert(ip, 24).trunc()), // use defaut mask here\n            IpAddr::V6(ip) => IpNet::V6(Ipv6Net::new_assert(ip, 64).trunc()),\n        };\n\n        let allowed = {\n            let rate_limit_config = None;\n            let limiter = self.limiter.lock_propagate_poison();\n            limiter.check(subnet_key, rate_limit_config, 1)\n        };\n        if !allowed {\n            // log only the subnet part of the IP address to know which subnet is rate limited\n            tracing::warn!(\"Rate limit exceeded. Skipping cancellation message, {subnet_key}\");\n            Metrics::get()\n                .proxy\n                .cancellation_requests_total\n                .inc(CancellationRequest {\n                    kind: crate::metrics::CancellationOutcome::RateLimitExceeded,\n                });\n            return Err(CancelError::RateLimit);\n        }\n\n        let cancel_state = self.get_cancel_key(key).await.map_err(|e| {\n            tracing::warn!(\"failed to receive RedisOp response: {e}\");\n            CancelError::InternalError\n        })?;\n\n        let Some(cancel_closure) = cancel_state else {\n            tracing::warn!(\"query cancellation key not found: {key}\");\n            Metrics::get()\n                .proxy\n                .cancellation_requests_total\n                .inc(CancellationRequest {\n                    kind: crate::metrics::CancellationOutcome::NotFound,\n                });\n            return Err(CancelError::NotFound);\n        };\n\n        let info = &cancel_closure.user_info;\n        let access_controls = auth_backend\n            .get_endpoint_access_control(&ctx, &info.endpoint, &info.user)\n            .await\n            .map_err(|e| CancelError::AuthError(e.into()))?;\n\n        access_controls.check(&ctx, check_ip_allowed, check_vpc_allowed)?;\n\n        Metrics::get()\n            .proxy\n            .cancellation_requests_total\n            .inc(CancellationRequest {\n                kind: crate::metrics::CancellationOutcome::Found,\n            });\n        info!(\"cancelling query per user's request using key {key}\");\n        cancel_closure.try_cancel_query(self.compute_config).await\n    }\n}\n\n/// This should've been a [`std::future::Future`], but\n/// it's impossible to name a type of an unboxed future\n/// (we'd need something like `#![feature(type_alias_impl_trait)]`).\n#[derive(Debug, Clone, Serialize, Deserialize)]\npub struct CancelClosure {\n    pub socket_addr: SocketAddr,\n    pub cancel_token: RawCancelToken,\n    pub hostname: String, // for pg_sni router\n    pub user_info: ComputeUserInfo,\n}\n\nimpl CancelClosure {\n    /// Cancels the query running on user's compute node.\n    pub(crate) async fn try_cancel_query(\n        &self,\n        compute_config: &ComputeConfig,\n    ) -> Result<(), CancelError> {\n        let socket = TcpStream::connect(self.socket_addr).await?;\n\n        let tls = <_ as MakeTlsConnect<tokio::net::TcpStream>>::make_tls_connect(\n            compute_config,\n            &self.hostname,\n        )\n        .map_err(|e| CancelError::IO(std::io::Error::other(e.to_string())))?;\n\n        self.cancel_token.cancel_query_raw(socket, tls).await?;\n        debug!(\"query was cancelled\");\n        Ok(())\n    }\n}\n\n/// Helper for registering query cancellation tokens.\npub(crate) struct Session {\n    /// The user-facing key identifying this session.\n    key: CancelKeyData,\n    cancellation_handler: Arc<CancellationHandler>,\n}\n\nimpl Session {\n    pub(crate) fn key(&self) -> &CancelKeyData {\n        &self.key\n    }\n\n    /// Ensure the cancel key is continously refreshed,\n    /// but stop when the channel is dropped.\n    ///\n    /// This is not cancel safe\n    pub(crate) async fn maintain_cancel_key(\n        &self,\n        session_id: uuid::Uuid,\n        cancel: tokio::sync::oneshot::Receiver<Infallible>,\n        cancel_closure: &CancelClosure,\n        compute_config: &ComputeConfig,\n    ) {\n        let Some(tx) = self.cancellation_handler.tx.get() else {\n            tracing::warn!(\"cancellation handler is not available\");\n            // don't exit, as we only want to exit if cancelled externally.\n            std::future::pending().await\n        };\n\n        let closure_json = serde_json::to_string(&cancel_closure)\n            .expect(\"serialising to json string should not fail\")\n            .into_boxed_str();\n\n        let mut cancel = pin!(cancel);\n\n        enum State {\n            Init,\n            Refresh,\n        }\n\n        let mut state = State::Init;\n        loop {\n            let (op, mut wait_interval) = match state {\n                State::Init => {\n                    tracing::debug!(\n                        src=%self.key,\n                        dest=?cancel_closure.cancel_token,\n                        \"registering cancellation key\"\n                    );\n                    (\n                        CancelKeyOp::Store {\n                            key: self.key,\n                            value: closure_json.clone(),\n                            expire: CANCEL_KEY_INITIAL_PERIOD + CANCEL_KEY_TTL_SLACK,\n                        },\n                        CANCEL_KEY_INITIAL_PERIOD,\n                    )\n                }\n\n                State::Refresh => {\n                    tracing::debug!(\n                        src=%self.key,\n                        dest=?cancel_closure.cancel_token,\n                        \"refreshing cancellation key\"\n                    );\n                    (\n                        CancelKeyOp::Refresh {\n                            key: self.key,\n                            expire: CANCEL_KEY_REFRESH_PERIOD + CANCEL_KEY_TTL_SLACK,\n                        },\n                        CANCEL_KEY_REFRESH_PERIOD,\n                    )\n                }\n            };\n\n            match tx\n                .call((op.cancel_channel_metric_guard(), op), cancel.as_mut())\n                .await\n            {\n                // SET returns OK\n                Ok(Value::Okay) => {\n                    tracing::debug!(\n                        src=%self.key,\n                        dest=?cancel_closure.cancel_token,\n                        \"registered cancellation key\"\n                    );\n                    state = State::Refresh;\n                }\n\n                // EXPIRE returns 1\n                Ok(Value::Int(1)) => {\n                    tracing::debug!(\n                        src=%self.key,\n                        dest=?cancel_closure.cancel_token,\n                        \"refreshed cancellation key\"\n                    );\n                }\n\n                Ok(_) => {\n                    // Any other response likely means the key expired.\n                    tracing::warn!(src=%self.key, \"refreshing cancellation key failed\");\n                    // Re-enter the SET loop quickly to repush full data.\n                    state = State::Init;\n                    wait_interval = Duration::ZERO;\n                }\n\n                // retry immediately.\n                Err(BatchQueueError::Result(error)) => {\n                    tracing::warn!(?error, \"error refreshing cancellation key\");\n                    // Small delay to prevent busy loop with high cpu and logging.\n                    wait_interval = Duration::from_millis(10);\n                }\n\n                Err(BatchQueueError::Cancelled(Err(_cancelled))) => break,\n            }\n\n            // wait before continuing. break immediately if cancelled.\n            if run_until(tokio::time::sleep(wait_interval), cancel.as_mut())\n                .await\n                .is_err()\n            {\n                break;\n            }\n        }\n\n        if let Err(err) = cancel_closure\n            .try_cancel_query(compute_config)\n            .boxed()\n            .await\n        {\n            tracing::warn!(\n                ?session_id,\n                ?err,\n                \"could not cancel the query in the database\"\n            );\n        }\n    }\n}\n"
  },
  {
    "path": "proxy/src/compute/mod.rs",
    "content": "mod tls;\n\nuse std::fmt::Debug;\nuse std::io;\nuse std::net::{IpAddr, SocketAddr};\n\nuse futures::{FutureExt, TryFutureExt};\nuse itertools::Itertools;\nuse postgres_client::config::{AuthKeys, ChannelBinding, SslMode};\nuse postgres_client::connect_raw::StartupStream;\nuse postgres_client::error::SqlState;\nuse postgres_client::maybe_tls_stream::MaybeTlsStream;\nuse postgres_client::tls::MakeTlsConnect;\nuse thiserror::Error;\nuse tokio::net::{TcpStream, lookup_host};\nuse tracing::{debug, error, info, warn};\n\nuse crate::auth::backend::ComputeCredentialKeys;\nuse crate::auth::parse_endpoint_param;\nuse crate::compute::tls::TlsError;\nuse crate::config::ComputeConfig;\nuse crate::context::RequestContext;\nuse crate::control_plane::client::ApiLockError;\nuse crate::control_plane::errors::WakeComputeError;\nuse crate::control_plane::messages::MetricsAuxInfo;\nuse crate::error::{ErrorKind, ReportableError, UserFacingError};\nuse crate::metrics::{Metrics, NumDbConnectionsGuard};\nuse crate::pqproto::StartupMessageParams;\nuse crate::proxy::connect_compute::TlsNegotiation;\nuse crate::proxy::neon_option;\nuse crate::types::Host;\n\npub const COULD_NOT_CONNECT: &str = \"Couldn't connect to compute node\";\n\n#[derive(Debug, Error)]\npub(crate) enum PostgresError {\n    /// This error doesn't seem to reveal any secrets; for instance,\n    /// `postgres_client::error::Kind` doesn't contain ip addresses and such.\n    #[error(\"{COULD_NOT_CONNECT}: {0}\")]\n    Postgres(#[from] postgres_client::Error),\n}\n\nimpl UserFacingError for PostgresError {\n    fn to_string_client(&self) -> String {\n        match self {\n            // This helps us drop irrelevant library-specific prefixes.\n            // TODO: propagate severity level and other parameters.\n            PostgresError::Postgres(err) => match err.as_db_error() {\n                Some(err) => {\n                    let msg = err.message();\n\n                    if msg.starts_with(\"unsupported startup parameter: \")\n                        || msg.starts_with(\"unsupported startup parameter in options: \")\n                    {\n                        format!(\n                            \"{msg}. Please use unpooled connection or remove this parameter from the startup package. More details: https://neon.tech/docs/connect/connection-errors#unsupported-startup-parameter\"\n                        )\n                    } else {\n                        msg.to_owned()\n                    }\n                }\n                None => err.to_string(),\n            },\n        }\n    }\n}\n\nimpl ReportableError for PostgresError {\n    fn get_error_kind(&self) -> ErrorKind {\n        match self {\n            PostgresError::Postgres(err) => match err.as_db_error() {\n                Some(err) if err.code() == &SqlState::INVALID_CATALOG_NAME => ErrorKind::User,\n                Some(_) => ErrorKind::Postgres,\n                None => ErrorKind::Compute,\n            },\n        }\n    }\n}\n\n#[derive(Debug, Error)]\npub(crate) enum ConnectionError {\n    #[error(\"{COULD_NOT_CONNECT}: {0}\")]\n    TlsError(#[from] TlsError),\n\n    #[error(\"{COULD_NOT_CONNECT}: {0}\")]\n    WakeComputeError(#[from] WakeComputeError),\n\n    #[error(\"error acquiring resource permit: {0}\")]\n    TooManyConnectionAttempts(#[from] ApiLockError),\n\n    #[cfg(test)]\n    #[error(\"retryable: {retryable}, wakeable: {wakeable}, kind: {kind:?}\")]\n    TestError {\n        retryable: bool,\n        wakeable: bool,\n        kind: crate::error::ErrorKind,\n    },\n}\n\nimpl UserFacingError for ConnectionError {\n    fn to_string_client(&self) -> String {\n        match self {\n            ConnectionError::WakeComputeError(err) => err.to_string_client(),\n            ConnectionError::TooManyConnectionAttempts(_) => {\n                \"Failed to acquire permit to connect to the database. Too many database connection attempts are currently ongoing.\".to_owned()\n            }\n            ConnectionError::TlsError(_) => COULD_NOT_CONNECT.to_owned(),\n            #[cfg(test)]\n            ConnectionError::TestError { .. } => self.to_string(),\n        }\n    }\n}\n\nimpl ReportableError for ConnectionError {\n    fn get_error_kind(&self) -> ErrorKind {\n        match self {\n            ConnectionError::TlsError(_) => ErrorKind::Compute,\n            ConnectionError::WakeComputeError(e) => e.get_error_kind(),\n            ConnectionError::TooManyConnectionAttempts(e) => e.get_error_kind(),\n            #[cfg(test)]\n            ConnectionError::TestError { kind, .. } => *kind,\n        }\n    }\n}\n\n/// A pair of `ClientKey` & `ServerKey` for `SCRAM-SHA-256`.\npub(crate) type ScramKeys = postgres_client::config::ScramKeys<32>;\n\n#[derive(Clone)]\npub enum Auth {\n    /// Only used during console-redirect.\n    Password(Vec<u8>),\n    /// Used by sql-over-http, ws, tcp.\n    Scram(Box<ScramKeys>),\n}\n\n/// A config for authenticating to the compute node.\npub(crate) struct AuthInfo {\n    /// None for local-proxy, as we use trust-based localhost auth.\n    /// Some for sql-over-http, ws, tcp, and in most cases for console-redirect.\n    /// Might be None for console-redirect, but that's only a consequence of testing environments ATM.\n    auth: Option<Auth>,\n    server_params: StartupMessageParams,\n\n    channel_binding: ChannelBinding,\n\n    /// Console redirect sets user and database, we shouldn't re-use those from the params.\n    skip_db_user: bool,\n}\n\n/// Contains only the data needed to establish a secure connection to compute.\n#[derive(Clone)]\npub struct ConnectInfo {\n    pub host_addr: Option<IpAddr>,\n    pub host: Host,\n    pub port: u16,\n    pub ssl_mode: SslMode,\n}\n\n/// Creation and initialization routines.\nimpl AuthInfo {\n    pub(crate) fn for_console_redirect(db: &str, user: &str, pw: Option<&str>) -> Self {\n        let mut server_params = StartupMessageParams::default();\n        server_params.insert(\"database\", db);\n        server_params.insert(\"user\", user);\n        Self {\n            auth: pw.map(|pw| Auth::Password(pw.as_bytes().to_owned())),\n            server_params,\n            skip_db_user: true,\n            // pg-sni-router is a mitm so this would fail.\n            channel_binding: ChannelBinding::Disable,\n        }\n    }\n\n    pub(crate) fn with_auth_keys(keys: ComputeCredentialKeys) -> Self {\n        Self {\n            auth: match keys {\n                ComputeCredentialKeys::AuthKeys(AuthKeys::ScramSha256(auth_keys)) => {\n                    Some(Auth::Scram(Box::new(auth_keys)))\n                }\n                ComputeCredentialKeys::JwtPayload(_) => None,\n            },\n            server_params: StartupMessageParams::default(),\n            skip_db_user: false,\n            channel_binding: ChannelBinding::Prefer,\n        }\n    }\n}\n\nimpl ConnectInfo {\n    pub fn to_postgres_client_config(&self) -> postgres_client::Config {\n        let mut config = postgres_client::Config::new(self.host.to_string(), self.port);\n        config.ssl_mode(self.ssl_mode);\n        if let Some(host_addr) = self.host_addr {\n            config.set_host_addr(host_addr);\n        }\n        config\n    }\n}\n\nimpl AuthInfo {\n    fn enrich(&self, mut config: postgres_client::Config) -> postgres_client::Config {\n        match &self.auth {\n            Some(Auth::Scram(keys)) => config.auth_keys(AuthKeys::ScramSha256(**keys)),\n            Some(Auth::Password(pw)) => config.password(pw),\n            None => &mut config,\n        };\n        config.channel_binding(self.channel_binding);\n        for (k, v) in self.server_params.iter() {\n            config.set_param(k, v);\n        }\n        config\n    }\n\n    /// Apply startup message params to the connection config.\n    pub(crate) fn set_startup_params(\n        &mut self,\n        params: &StartupMessageParams,\n        arbitrary_params: bool,\n    ) {\n        if !arbitrary_params {\n            self.server_params.insert(\"client_encoding\", \"UTF8\");\n        }\n        for (k, v) in params.iter() {\n            match k {\n                // Only set `user` if it's not present in the config.\n                // Console redirect auth flow takes username from the console's response.\n                \"user\" | \"database\" if self.skip_db_user => {}\n                \"options\" => {\n                    if let Some(options) = filtered_options(v) {\n                        self.server_params.insert(k, &options);\n                    }\n                }\n                \"user\" | \"database\" | \"application_name\" | \"replication\" => {\n                    self.server_params.insert(k, v);\n                }\n\n                // if we allow arbitrary params, then we forward them through.\n                // this is a flag for a period of backwards compatibility\n                k if arbitrary_params => {\n                    self.server_params.insert(k, v);\n                }\n                _ => {}\n            }\n        }\n    }\n\n    pub async fn authenticate(\n        &self,\n        ctx: &RequestContext,\n        compute: &mut ComputeConnection,\n    ) -> Result<(), PostgresError> {\n        // client config with stubbed connect info.\n        // TODO(conrad): should we rewrite this to bypass tokio-postgres2 entirely,\n        // utilising pqproto.rs.\n        let mut tmp_config = postgres_client::Config::new(String::new(), 0);\n        // We have already established SSL if necessary.\n        tmp_config.ssl_mode(SslMode::Disable);\n        let tmp_config = self.enrich(tmp_config);\n\n        let pause = ctx.latency_timer_pause(crate::metrics::Waiting::Compute);\n        tmp_config.authenticate(&mut compute.stream).await?;\n        drop(pause);\n\n        Ok(())\n    }\n}\n\nimpl ConnectInfo {\n    /// Establish a raw TCP+TLS connection to the compute node.\n    async fn connect_raw(\n        &self,\n        config: &ComputeConfig,\n        tls: TlsNegotiation,\n    ) -> Result<(SocketAddr, MaybeTlsStream<TcpStream, RustlsStream>), TlsError> {\n        let timeout = config.timeout;\n\n        // wrap TcpStream::connect with timeout\n        let connect_with_timeout = |addrs| {\n            tokio::time::timeout(timeout, TcpStream::connect(addrs)).map(move |res| match res {\n                Ok(tcpstream_connect_res) => tcpstream_connect_res,\n                Err(_) => Err(io::Error::new(\n                    io::ErrorKind::TimedOut,\n                    format!(\"exceeded connection timeout {timeout:?}\"),\n                )),\n            })\n        };\n\n        let connect_once = |addrs| {\n            debug!(\"trying to connect to compute node at {addrs:?}\");\n            connect_with_timeout(addrs).and_then(|stream| async {\n                let socket_addr = stream.peer_addr()?;\n                let socket = socket2::SockRef::from(&stream);\n                // Disable Nagle's algorithm to not introduce latency between\n                // client and compute.\n                socket.set_nodelay(true)?;\n                // This prevents load balancer from severing the connection.\n                socket.set_keepalive(true)?;\n                Ok((socket_addr, stream))\n            })\n        };\n\n        // We can't reuse connection establishing logic from `postgres_client` here,\n        // because it has no means for extracting the underlying socket which we\n        // require for our business.\n        let port = self.port;\n        let host = &*self.host;\n\n        let addrs = match self.host_addr {\n            Some(addr) => vec![SocketAddr::new(addr, port)],\n            None => lookup_host((host, port)).await?.collect(),\n        };\n\n        match connect_once(&*addrs).await {\n            Ok((sockaddr, stream)) => Ok((\n                sockaddr,\n                tls::connect_tls(stream, self.ssl_mode, config, host, tls).await?,\n            )),\n            Err(err) => {\n                warn!(\"couldn't connect to compute node at {host}:{port}: {err}\");\n                Err(TlsError::Connection(err))\n            }\n        }\n    }\n}\n\npub type RustlsStream = <ComputeConfig as MakeTlsConnect<tokio::net::TcpStream>>::Stream;\npub type MaybeRustlsStream = MaybeTlsStream<tokio::net::TcpStream, RustlsStream>;\n\npub struct ComputeConnection {\n    /// Socket connected to a compute node.\n    pub stream: StartupStream<tokio::net::TcpStream, RustlsStream>,\n    /// Labels for proxy's metrics.\n    pub aux: MetricsAuxInfo,\n    pub hostname: Host,\n    pub ssl_mode: SslMode,\n    pub socket_addr: SocketAddr,\n    pub guage: NumDbConnectionsGuard<'static>,\n}\n\nimpl ConnectInfo {\n    /// Connect to a corresponding compute node.\n    pub async fn connect(\n        &self,\n        ctx: &RequestContext,\n        aux: &MetricsAuxInfo,\n        config: &ComputeConfig,\n        tls: TlsNegotiation,\n    ) -> Result<ComputeConnection, ConnectionError> {\n        let pause = ctx.latency_timer_pause(crate::metrics::Waiting::Compute);\n        let (socket_addr, stream) = self.connect_raw(config, tls).await?;\n        drop(pause);\n\n        tracing::Span::current().record(\"compute_id\", tracing::field::display(&aux.compute_id));\n\n        // TODO: lots of useful info but maybe we can move it elsewhere (eg traces?)\n        info!(\n            cold_start_info = ctx.cold_start_info().as_str(),\n            \"connected to compute node at {} ({socket_addr}) sslmode={:?}, latency={}, query_id={}\",\n            self.host,\n            self.ssl_mode,\n            ctx.get_proxy_latency(),\n            ctx.get_testodrome_id().unwrap_or_default(),\n        );\n\n        let stream = StartupStream::new(stream);\n        let connection = ComputeConnection {\n            stream,\n            socket_addr,\n            hostname: self.host.clone(),\n            ssl_mode: self.ssl_mode,\n            aux: aux.clone(),\n            guage: Metrics::get().proxy.db_connections.guard(ctx.protocol()),\n        };\n\n        Ok(connection)\n    }\n}\n\n/// Retrieve `options` from a startup message, dropping all proxy-secific flags.\nfn filtered_options(options: &str) -> Option<String> {\n    #[allow(unstable_name_collisions)]\n    let options: String = StartupMessageParams::parse_options_raw(options)\n        .filter(|opt| parse_endpoint_param(opt).is_none() && neon_option(opt).is_none())\n        .intersperse(\" \") // TODO: use impl from std once it's stabilized\n        .collect();\n\n    // Don't even bother with empty options.\n    if options.is_empty() {\n        return None;\n    }\n\n    Some(options)\n}\n\n#[cfg(test)]\nmod tests {\n    use super::*;\n\n    #[test]\n    fn test_filtered_options() {\n        // Empty options is unlikely to be useful anyway.\n        let params = \"\";\n        assert_eq!(filtered_options(params), None);\n\n        // It's likely that clients will only use options to specify endpoint/project.\n        let params = \"project=foo\";\n        assert_eq!(filtered_options(params), None);\n\n        // Same, because unescaped whitespaces are no-op.\n        let params = \" project=foo \";\n        assert_eq!(filtered_options(params).as_deref(), None);\n\n        let params = r\"\\  project=foo \\ \";\n        assert_eq!(filtered_options(params).as_deref(), Some(r\"\\  \\ \"));\n\n        let params = \"project = foo\";\n        assert_eq!(filtered_options(params).as_deref(), Some(\"project = foo\"));\n\n        let params = \"project = foo neon_endpoint_type:read_write   neon_lsn:0/2 neon_proxy_params_compat:true\";\n        assert_eq!(filtered_options(params).as_deref(), Some(\"project = foo\"));\n    }\n}\n"
  },
  {
    "path": "proxy/src/compute/tls.rs",
    "content": "use futures::FutureExt;\nuse postgres_client::config::SslMode;\nuse postgres_client::maybe_tls_stream::MaybeTlsStream;\nuse postgres_client::tls::{MakeTlsConnect, TlsConnect};\nuse rustls::pki_types::InvalidDnsNameError;\nuse thiserror::Error;\nuse tokio::io::{AsyncRead, AsyncWrite};\n\nuse crate::pqproto::request_tls;\nuse crate::proxy::connect_compute::TlsNegotiation;\nuse crate::proxy::retry::CouldRetry;\n\n#[derive(Debug, Error)]\npub enum TlsError {\n    #[error(transparent)]\n    Dns(#[from] InvalidDnsNameError),\n    #[error(transparent)]\n    Connection(#[from] std::io::Error),\n    #[error(\"TLS required but not provided\")]\n    Required,\n}\n\nimpl CouldRetry for TlsError {\n    fn could_retry(&self) -> bool {\n        match self {\n            TlsError::Dns(_) => false,\n            TlsError::Connection(err) => err.could_retry(),\n            // perhaps compute didn't realise it supports TLS?\n            TlsError::Required => true,\n        }\n    }\n}\n\npub async fn connect_tls<S, T>(\n    mut stream: S,\n    mode: SslMode,\n    tls: &T,\n    host: &str,\n    negotiation: TlsNegotiation,\n) -> Result<MaybeTlsStream<S, T::Stream>, TlsError>\nwhere\n    S: AsyncRead + AsyncWrite + Unpin + Send,\n    T: MakeTlsConnect<\n            S,\n            Error = InvalidDnsNameError,\n            TlsConnect: TlsConnect<S, Error = std::io::Error, Future: Send>,\n        >,\n{\n    match mode {\n        SslMode::Disable => return Ok(MaybeTlsStream::Raw(stream)),\n        SslMode::Prefer | SslMode::Require => {}\n    }\n\n    match negotiation {\n        // No TLS request needed\n        TlsNegotiation::Direct => {}\n        // TLS request successful\n        TlsNegotiation::Postgres if request_tls(&mut stream).await? => {}\n        // TLS request failed but is required\n        TlsNegotiation::Postgres if SslMode::Require == mode => return Err(TlsError::Required),\n        // TLS request failed but is not required\n        TlsNegotiation::Postgres => return Ok(MaybeTlsStream::Raw(stream)),\n    }\n\n    Ok(MaybeTlsStream::Tls(\n        tls.make_tls_connect(host)?.connect(stream).boxed().await?,\n    ))\n}\n"
  },
  {
    "path": "proxy/src/compute_ctl/mod.rs",
    "content": "use compute_api::responses::GenericAPIError;\nuse hyper::{Method, StatusCode};\nuse serde::de::DeserializeOwned;\nuse serde::{Deserialize, Serialize};\nuse thiserror::Error;\n\nuse crate::http;\nuse crate::types::{DbName, RoleName};\nuse crate::url::ApiUrl;\n\npub struct ComputeCtlApi {\n    pub(crate) api: http::Endpoint,\n}\n\n#[derive(Serialize, Debug)]\npub struct ExtensionInstallRequest {\n    pub extension: &'static str,\n    pub database: DbName,\n    pub version: &'static str,\n}\n\n#[derive(Serialize, Debug)]\npub struct SetRoleGrantsRequest {\n    pub database: DbName,\n    pub schema: &'static str,\n    pub privileges: Vec<Privilege>,\n    pub role: RoleName,\n}\n\n#[derive(Clone, Debug, Deserialize)]\npub struct ExtensionInstallResponse {}\n\n#[derive(Clone, Debug, Deserialize)]\npub struct SetRoleGrantsResponse {}\n\n#[derive(Debug, Serialize, Deserialize, Clone, Copy)]\n#[serde(rename_all = \"UPPERCASE\")]\npub enum Privilege {\n    Usage,\n}\n\n#[derive(Error, Debug)]\npub enum ComputeCtlError {\n    #[error(\"connection error: {0}\")]\n    Connection(#[source] reqwest_middleware::Error),\n    #[error(\"request error [{status}]: {body:?}\")]\n    Request {\n        status: StatusCode,\n        body: Option<GenericAPIError>,\n    },\n    #[error(\"response parsing error: {0}\")]\n    Response(#[source] reqwest::Error),\n}\n\nimpl ComputeCtlApi {\n    pub async fn install_extension(\n        &self,\n        req: &ExtensionInstallRequest,\n    ) -> Result<ExtensionInstallResponse, ComputeCtlError> {\n        self.generic_request(req, Method::POST, |url| {\n            url.path_segments_mut().push(\"extensions\");\n        })\n        .await\n    }\n\n    pub async fn grant_role(\n        &self,\n        req: &SetRoleGrantsRequest,\n    ) -> Result<SetRoleGrantsResponse, ComputeCtlError> {\n        self.generic_request(req, Method::POST, |url| {\n            url.path_segments_mut().push(\"grants\");\n        })\n        .await\n    }\n\n    async fn generic_request<Req, Resp>(\n        &self,\n        req: &Req,\n        method: Method,\n        url: impl for<'a> FnOnce(&'a mut ApiUrl),\n    ) -> Result<Resp, ComputeCtlError>\n    where\n        Req: Serialize,\n        Resp: DeserializeOwned,\n    {\n        let resp = self\n            .api\n            .request_with_url(method, url)\n            .json(req)\n            .send()\n            .await\n            .map_err(ComputeCtlError::Connection)?;\n\n        let status = resp.status();\n        if status.is_client_error() || status.is_server_error() {\n            let body = resp.json().await.ok();\n            return Err(ComputeCtlError::Request { status, body });\n        }\n\n        resp.json().await.map_err(ComputeCtlError::Response)\n    }\n}\n"
  },
  {
    "path": "proxy/src/config.rs",
    "content": "use std::str::FromStr;\nuse std::sync::Arc;\nuse std::time::Duration;\n\nuse anyhow::{Context, Ok, bail, ensure};\nuse arc_swap::ArcSwapOption;\nuse camino::{Utf8Path, Utf8PathBuf};\nuse clap::ValueEnum;\nuse compute_api::spec::LocalProxySpec;\nuse remote_storage::RemoteStorageConfig;\nuse thiserror::Error;\nuse tokio::sync::Notify;\nuse tracing::{debug, error, info, warn};\n\nuse crate::auth::backend::jwt::JwkCache;\nuse crate::auth::backend::local::JWKS_ROLE_MAP;\nuse crate::control_plane::locks::ApiLocks;\nuse crate::control_plane::messages::{EndpointJwksResponse, JwksSettings};\nuse crate::ext::TaskExt;\nuse crate::intern::RoleNameInt;\nuse crate::rate_limiter::{RateLimitAlgorithm, RateLimiterConfig};\nuse crate::scram;\nuse crate::serverless::GlobalConnPoolOptions;\nuse crate::serverless::cancel_set::CancelSet;\n#[cfg(feature = \"rest_broker\")]\nuse crate::serverless::rest::DbSchemaCache;\npub use crate::tls::server_config::{TlsConfig, configure_tls};\nuse crate::types::{Host, RoleName};\n\npub struct ProxyConfig {\n    pub tls_config: ArcSwapOption<TlsConfig>,\n    pub metric_collection: Option<MetricCollectionConfig>,\n    pub http_config: HttpConfig,\n    pub authentication_config: AuthenticationConfig,\n    #[cfg(feature = \"rest_broker\")]\n    pub rest_config: RestConfig,\n    pub proxy_protocol_v2: ProxyProtocolV2,\n    pub handshake_timeout: Duration,\n    pub wake_compute_retry_config: RetryConfig,\n    pub connect_compute_locks: ApiLocks<Host>,\n    pub connect_to_compute: ComputeConfig,\n    pub greetings: String, // Greeting message sent to the client after connection establishment and contains session_id.\n    #[cfg(feature = \"testing\")]\n    pub disable_pg_session_jwt: bool,\n}\n\npub struct ComputeConfig {\n    pub retry: RetryConfig,\n    pub tls: Arc<rustls::ClientConfig>,\n    pub timeout: Duration,\n}\n\n#[derive(Copy, Clone, Debug, ValueEnum, PartialEq)]\npub enum ProxyProtocolV2 {\n    /// Connection will error if PROXY protocol v2 header is missing\n    Required,\n    /// Connection will error if PROXY protocol v2 header is provided\n    Rejected,\n}\n\n#[derive(Debug)]\npub struct MetricCollectionConfig {\n    pub endpoint: reqwest::Url,\n    pub interval: Duration,\n    pub backup_metric_collection_config: MetricBackupCollectionConfig,\n}\n\npub struct HttpConfig {\n    pub accept_websockets: bool,\n    pub pool_options: GlobalConnPoolOptions,\n    pub cancel_set: CancelSet,\n    pub client_conn_threshold: u64,\n    pub max_request_size_bytes: usize,\n    pub max_response_size_bytes: usize,\n}\n\npub struct AuthenticationConfig {\n    pub scram_thread_pool: Arc<scram::threadpool::ThreadPool>,\n    pub scram_protocol_timeout: tokio::time::Duration,\n    pub ip_allowlist_check_enabled: bool,\n    pub is_vpc_acccess_proxy: bool,\n    pub jwks_cache: JwkCache,\n    pub is_auth_broker: bool,\n    pub accept_jwts: bool,\n    pub console_redirect_confirmation_timeout: tokio::time::Duration,\n}\n\n#[cfg(feature = \"rest_broker\")]\npub struct RestConfig {\n    pub is_rest_broker: bool,\n    pub db_schema_cache: Option<DbSchemaCache>,\n    pub max_schema_size: usize,\n    pub hostname_prefix: String,\n}\n\n#[derive(Debug)]\npub struct MetricBackupCollectionConfig {\n    pub remote_storage_config: Option<RemoteStorageConfig>,\n    pub chunk_size: usize,\n}\n\npub fn remote_storage_from_toml(s: &str) -> anyhow::Result<RemoteStorageConfig> {\n    RemoteStorageConfig::from_toml(&s.parse()?)\n}\n\n/// Helper for cmdline cache options parsing.\n#[derive(Debug)]\npub struct CacheOptions {\n    /// Max number of entries.\n    pub size: Option<u64>,\n    /// Entry's time-to-live.\n    pub absolute_ttl: Option<Duration>,\n    /// Entry's time-to-idle.\n    pub idle_ttl: Option<Duration>,\n}\n\nimpl CacheOptions {\n    /// Default options for [`crate::cache::node_info::NodeInfoCache`].\n    pub const CACHE_DEFAULT_OPTIONS: &'static str = \"size=4000,idle_ttl=4m\";\n\n    /// Parse cache options passed via cmdline.\n    /// Example: [`Self::CACHE_DEFAULT_OPTIONS`].\n    fn parse(options: &str) -> anyhow::Result<Self> {\n        let mut size = None;\n        let mut absolute_ttl = None;\n        let mut idle_ttl = None;\n\n        for option in options.split(',') {\n            let (key, value) = option\n                .split_once('=')\n                .with_context(|| format!(\"bad key-value pair: {option}\"))?;\n\n            match key {\n                \"size\" => size = Some(value.parse()?),\n                \"absolute_ttl\" | \"ttl\" => absolute_ttl = Some(humantime::parse_duration(value)?),\n                \"idle_ttl\" | \"tti\" => idle_ttl = Some(humantime::parse_duration(value)?),\n                unknown => bail!(\"unknown key: {unknown}\"),\n            }\n        }\n\n        Ok(Self {\n            size,\n            absolute_ttl,\n            idle_ttl,\n        })\n    }\n\n    pub fn moka<K, V, C>(\n        &self,\n        mut builder: moka::sync::CacheBuilder<K, V, C>,\n    ) -> moka::sync::CacheBuilder<K, V, C> {\n        if let Some(size) = self.size {\n            builder = builder.max_capacity(size);\n        }\n        if let Some(ttl) = self.absolute_ttl {\n            builder = builder.time_to_live(ttl);\n        }\n        if let Some(tti) = self.idle_ttl {\n            builder = builder.time_to_idle(tti);\n        }\n        builder\n    }\n}\n\nimpl FromStr for CacheOptions {\n    type Err = anyhow::Error;\n\n    fn from_str(options: &str) -> Result<Self, Self::Err> {\n        let error = || format!(\"failed to parse cache options '{options}'\");\n        Self::parse(options).with_context(error)\n    }\n}\n\n/// Helper for cmdline cache options parsing.\n#[derive(Debug)]\npub struct ProjectInfoCacheOptions {\n    /// Max number of entries.\n    pub size: u64,\n    /// Entry's time-to-live.\n    pub ttl: Duration,\n    /// Max number of roles per endpoint.\n    pub max_roles: u64,\n    /// Gc interval.\n    pub gc_interval: Duration,\n}\n\nimpl ProjectInfoCacheOptions {\n    /// Default options for [`crate::cache::project_info::ProjectInfoCache`].\n    pub const CACHE_DEFAULT_OPTIONS: &'static str =\n        \"size=10000,ttl=4m,max_roles=10,gc_interval=60m\";\n\n    /// Parse cache options passed via cmdline.\n    /// Example: [`Self::CACHE_DEFAULT_OPTIONS`].\n    fn parse(options: &str) -> anyhow::Result<Self> {\n        let mut size = None;\n        let mut ttl = None;\n        let mut max_roles = None;\n        let mut gc_interval = None;\n\n        for option in options.split(',') {\n            let (key, value) = option\n                .split_once('=')\n                .with_context(|| format!(\"bad key-value pair: {option}\"))?;\n\n            match key {\n                \"size\" => size = Some(value.parse()?),\n                \"ttl\" => ttl = Some(humantime::parse_duration(value)?),\n                \"max_roles\" => max_roles = Some(value.parse()?),\n                \"gc_interval\" => gc_interval = Some(humantime::parse_duration(value)?),\n                unknown => bail!(\"unknown key: {unknown}\"),\n            }\n        }\n\n        // TTL doesn't matter if cache is always empty.\n        if let Some(0) = size {\n            ttl.get_or_insert(Duration::default());\n        }\n\n        Ok(Self {\n            size: size.context(\"missing `size`\")?,\n            ttl: ttl.context(\"missing `ttl`\")?,\n            max_roles: max_roles.context(\"missing `max_roles`\")?,\n            gc_interval: gc_interval.context(\"missing `gc_interval`\")?,\n        })\n    }\n}\n\nimpl FromStr for ProjectInfoCacheOptions {\n    type Err = anyhow::Error;\n\n    fn from_str(options: &str) -> Result<Self, Self::Err> {\n        let error = || format!(\"failed to parse cache options '{options}'\");\n        Self::parse(options).with_context(error)\n    }\n}\n\n/// This is a config for connect to compute and wake compute.\n#[derive(Clone, Copy, Debug)]\npub struct RetryConfig {\n    /// Number of times we should retry.\n    pub max_retries: u32,\n    /// Retry duration is base_delay * backoff_factor ^ n, where n starts at 0\n    pub base_delay: tokio::time::Duration,\n    /// Exponential base for retry wait duration\n    pub backoff_factor: f64,\n}\n\nimpl RetryConfig {\n    // Default options for RetryConfig.\n\n    /// Total delay for 5 retries with 200ms base delay and 2 backoff factor is about 6s.\n    pub const CONNECT_TO_COMPUTE_DEFAULT_VALUES: &'static str =\n        \"num_retries=5,base_retry_wait_duration=200ms,retry_wait_exponent_base=2\";\n    /// Total delay for 8 retries with 100ms base delay and 1.6 backoff factor is about 7s.\n    /// Cplane has timeout of 60s on each request. 8m7s in total.\n    pub const WAKE_COMPUTE_DEFAULT_VALUES: &'static str =\n        \"num_retries=8,base_retry_wait_duration=100ms,retry_wait_exponent_base=1.6\";\n\n    /// Parse retry options passed via cmdline.\n    /// Example: [`Self::CONNECT_TO_COMPUTE_DEFAULT_VALUES`].\n    pub fn parse(options: &str) -> anyhow::Result<Self> {\n        let mut num_retries = None;\n        let mut base_retry_wait_duration = None;\n        let mut retry_wait_exponent_base = None;\n\n        for option in options.split(',') {\n            let (key, value) = option\n                .split_once('=')\n                .with_context(|| format!(\"bad key-value pair: {option}\"))?;\n\n            match key {\n                \"num_retries\" => num_retries = Some(value.parse()?),\n                \"base_retry_wait_duration\" => {\n                    base_retry_wait_duration = Some(humantime::parse_duration(value)?);\n                }\n                \"retry_wait_exponent_base\" => retry_wait_exponent_base = Some(value.parse()?),\n                unknown => bail!(\"unknown key: {unknown}\"),\n            }\n        }\n\n        Ok(Self {\n            max_retries: num_retries.context(\"missing `num_retries`\")?,\n            base_delay: base_retry_wait_duration.context(\"missing `base_retry_wait_duration`\")?,\n            backoff_factor: retry_wait_exponent_base\n                .context(\"missing `retry_wait_exponent_base`\")?,\n        })\n    }\n}\n\n/// Helper for cmdline cache options parsing.\n#[derive(serde::Deserialize)]\npub struct ConcurrencyLockOptions {\n    /// The number of shards the lock map should have\n    pub shards: usize,\n    /// The number of allowed concurrent requests for each endpoitn\n    #[serde(flatten)]\n    pub limiter: RateLimiterConfig,\n    /// Garbage collection epoch\n    #[serde(deserialize_with = \"humantime_serde::deserialize\")]\n    pub epoch: Duration,\n    /// Lock timeout\n    #[serde(deserialize_with = \"humantime_serde::deserialize\")]\n    pub timeout: Duration,\n}\n\nimpl ConcurrencyLockOptions {\n    /// Default options for [`crate::control_plane::client::ApiLocks`].\n    pub const DEFAULT_OPTIONS_WAKE_COMPUTE_LOCK: &'static str = \"permits=0\";\n    /// Default options for [`crate::control_plane::client::ApiLocks`].\n    pub const DEFAULT_OPTIONS_CONNECT_COMPUTE_LOCK: &'static str =\n        \"shards=64,permits=100,epoch=10m,timeout=10ms\";\n\n    // pub const DEFAULT_OPTIONS_WAKE_COMPUTE_LOCK: &'static str = \"shards=32,permits=4,epoch=10m,timeout=1s\";\n\n    /// Parse lock options passed via cmdline.\n    /// Example: [`Self::DEFAULT_OPTIONS_WAKE_COMPUTE_LOCK`].\n    fn parse(options: &str) -> anyhow::Result<Self> {\n        let options = options.trim();\n        if options.starts_with('{') && options.ends_with('}') {\n            return Ok(serde_json::from_str(options)?);\n        }\n\n        let mut shards = None;\n        let mut permits = None;\n        let mut epoch = None;\n        let mut timeout = None;\n\n        for option in options.split(',') {\n            let (key, value) = option\n                .split_once('=')\n                .with_context(|| format!(\"bad key-value pair: {option}\"))?;\n\n            match key {\n                \"shards\" => shards = Some(value.parse()?),\n                \"permits\" => permits = Some(value.parse()?),\n                \"epoch\" => epoch = Some(humantime::parse_duration(value)?),\n                \"timeout\" => timeout = Some(humantime::parse_duration(value)?),\n                unknown => bail!(\"unknown key: {unknown}\"),\n            }\n        }\n\n        // these dont matter if lock is disabled\n        if let Some(0) = permits {\n            timeout = Some(Duration::default());\n            epoch = Some(Duration::default());\n            shards = Some(2);\n        }\n\n        let permits = permits.context(\"missing `permits`\")?;\n        let out = Self {\n            shards: shards.context(\"missing `shards`\")?,\n            limiter: RateLimiterConfig {\n                algorithm: RateLimitAlgorithm::Fixed,\n                initial_limit: permits,\n            },\n            epoch: epoch.context(\"missing `epoch`\")?,\n            timeout: timeout.context(\"missing `timeout`\")?,\n        };\n\n        ensure!(out.shards > 1, \"shard count must be > 1\");\n        ensure!(\n            out.shards.is_power_of_two(),\n            \"shard count must be a power of two\"\n        );\n\n        Ok(out)\n    }\n}\n\nimpl FromStr for ConcurrencyLockOptions {\n    type Err = anyhow::Error;\n\n    fn from_str(options: &str) -> Result<Self, Self::Err> {\n        let error = || format!(\"failed to parse cache lock options '{options}'\");\n        Self::parse(options).with_context(error)\n    }\n}\n\n#[derive(Error, Debug)]\npub(crate) enum RefreshConfigError {\n    #[error(transparent)]\n    Read(#[from] std::io::Error),\n    #[error(transparent)]\n    Parse(#[from] serde_json::Error),\n    #[error(transparent)]\n    Validate(anyhow::Error),\n    #[error(transparent)]\n    Tls(anyhow::Error),\n}\n\npub(crate) async fn refresh_config_loop(config: &ProxyConfig, path: Utf8PathBuf, rx: Arc<Notify>) {\n    let mut init = true;\n    loop {\n        rx.notified().await;\n\n        match refresh_config_inner(config, &path).await {\n            std::result::Result::Ok(()) => {}\n            // don't log for file not found errors if this is the first time we are checking\n            // for computes that don't use local_proxy, this is not an error.\n            Err(RefreshConfigError::Read(e))\n                if init && e.kind() == std::io::ErrorKind::NotFound =>\n            {\n                debug!(error=?e, ?path, \"could not read config file\");\n            }\n            Err(RefreshConfigError::Tls(e)) => {\n                error!(error=?e, ?path, \"could not read TLS certificates\");\n            }\n            Err(e) => {\n                error!(error=?e, ?path, \"could not read config file\");\n            }\n        }\n\n        init = false;\n    }\n}\n\npub(crate) async fn refresh_config_inner(\n    config: &ProxyConfig,\n    path: &Utf8Path,\n) -> Result<(), RefreshConfigError> {\n    let bytes = tokio::fs::read(&path).await?;\n    let data: LocalProxySpec = serde_json::from_slice(&bytes)?;\n\n    let mut jwks_set = vec![];\n\n    fn parse_jwks_settings(jwks: compute_api::spec::JwksSettings) -> anyhow::Result<JwksSettings> {\n        let mut jwks_url = url::Url::from_str(&jwks.jwks_url).context(\"parsing JWKS url\")?;\n\n        ensure!(\n            jwks_url.has_authority()\n                && (jwks_url.scheme() == \"http\" || jwks_url.scheme() == \"https\"),\n            \"Invalid JWKS url. Must be HTTP\",\n        );\n\n        ensure!(\n            jwks_url.host().is_some_and(|h| h != url::Host::Domain(\"\")),\n            \"Invalid JWKS url. No domain listed\",\n        );\n\n        // clear username, password and ports\n        jwks_url\n            .set_username(\"\")\n            .expect(\"url can be a base and has a valid host and is not a file. should not error\");\n        jwks_url\n            .set_password(None)\n            .expect(\"url can be a base and has a valid host and is not a file. should not error\");\n        // local testing is hard if we need to have a specific restricted port\n        if cfg!(not(feature = \"testing\")) {\n            jwks_url.set_port(None).expect(\n                \"url can be a base and has a valid host and is not a file. should not error\",\n            );\n        }\n\n        // clear query params\n        jwks_url.set_fragment(None);\n        jwks_url.query_pairs_mut().clear().finish();\n\n        if jwks_url.scheme() != \"https\" {\n            // local testing is hard if we need to set up https support.\n            if cfg!(not(feature = \"testing\")) {\n                jwks_url\n                    .set_scheme(\"https\")\n                    .expect(\"should not error to set the scheme to https if it was http\");\n            } else {\n                warn!(scheme = jwks_url.scheme(), \"JWKS url is not HTTPS\");\n            }\n        }\n\n        Ok(JwksSettings {\n            id: jwks.id,\n            jwks_url,\n            _provider_name: jwks.provider_name,\n            jwt_audience: jwks.jwt_audience,\n            role_names: jwks\n                .role_names\n                .into_iter()\n                .map(RoleName::from)\n                .map(|s| RoleNameInt::from(&s))\n                .collect(),\n        })\n    }\n\n    for jwks in data.jwks.into_iter().flatten() {\n        jwks_set.push(parse_jwks_settings(jwks).map_err(RefreshConfigError::Validate)?);\n    }\n\n    info!(\"successfully loaded new config\");\n    JWKS_ROLE_MAP.store(Some(Arc::new(EndpointJwksResponse { jwks: jwks_set })));\n\n    if let Some(tls_config) = data.tls {\n        let tls_config = tokio::task::spawn_blocking(move || {\n            crate::tls::server_config::configure_tls(\n                tls_config.key_path.as_ref(),\n                tls_config.cert_path.as_ref(),\n                None,\n                false,\n            )\n        })\n        .await\n        .propagate_task_panic()\n        .map_err(RefreshConfigError::Tls)?;\n        config.tls_config.store(Some(Arc::new(tls_config)));\n    }\n\n    std::result::Result::Ok(())\n}\n\n#[cfg(test)]\nmod tests {\n    use super::*;\n    use crate::rate_limiter::Aimd;\n\n    #[test]\n    fn test_parse_cache_options() -> anyhow::Result<()> {\n        let CacheOptions {\n            size,\n            absolute_ttl,\n            idle_ttl: _,\n        } = \"size=4096,ttl=5min\".parse()?;\n        assert_eq!(size, Some(4096));\n        assert_eq!(absolute_ttl, Some(Duration::from_secs(5 * 60)));\n\n        let CacheOptions {\n            size,\n            absolute_ttl,\n            idle_ttl: _,\n        } = \"ttl=4m,size=2\".parse()?;\n        assert_eq!(size, Some(2));\n        assert_eq!(absolute_ttl, Some(Duration::from_secs(4 * 60)));\n\n        let CacheOptions {\n            size,\n            absolute_ttl,\n            idle_ttl: _,\n        } = \"size=0,ttl=1s\".parse()?;\n        assert_eq!(size, Some(0));\n        assert_eq!(absolute_ttl, Some(Duration::from_secs(1)));\n\n        let CacheOptions {\n            size,\n            absolute_ttl,\n            idle_ttl: _,\n        } = \"size=0\".parse()?;\n        assert_eq!(size, Some(0));\n        assert_eq!(absolute_ttl, None);\n\n        Ok(())\n    }\n\n    #[test]\n    fn test_parse_lock_options() -> anyhow::Result<()> {\n        let ConcurrencyLockOptions {\n            epoch,\n            limiter,\n            shards,\n            timeout,\n        } = \"shards=32,permits=4,epoch=10m,timeout=1s\".parse()?;\n        assert_eq!(epoch, Duration::from_secs(10 * 60));\n        assert_eq!(timeout, Duration::from_secs(1));\n        assert_eq!(shards, 32);\n        assert_eq!(limiter.initial_limit, 4);\n        assert_eq!(limiter.algorithm, RateLimitAlgorithm::Fixed);\n\n        let ConcurrencyLockOptions {\n            epoch,\n            limiter,\n            shards,\n            timeout,\n        } = \"epoch=60s,shards=16,timeout=100ms,permits=8\".parse()?;\n        assert_eq!(epoch, Duration::from_secs(60));\n        assert_eq!(timeout, Duration::from_millis(100));\n        assert_eq!(shards, 16);\n        assert_eq!(limiter.initial_limit, 8);\n        assert_eq!(limiter.algorithm, RateLimitAlgorithm::Fixed);\n\n        let ConcurrencyLockOptions {\n            epoch,\n            limiter,\n            shards,\n            timeout,\n        } = \"permits=0\".parse()?;\n        assert_eq!(epoch, Duration::ZERO);\n        assert_eq!(timeout, Duration::ZERO);\n        assert_eq!(shards, 2);\n        assert_eq!(limiter.initial_limit, 0);\n        assert_eq!(limiter.algorithm, RateLimitAlgorithm::Fixed);\n\n        Ok(())\n    }\n\n    #[test]\n    fn test_parse_json_lock_options() -> anyhow::Result<()> {\n        let ConcurrencyLockOptions {\n            epoch,\n            limiter,\n            shards,\n            timeout,\n        } = r#\"{\"shards\":32,\"initial_limit\":44,\"aimd\":{\"min\":5,\"max\":500,\"inc\":10,\"dec\":0.9,\"utilisation\":0.8},\"epoch\":\"10m\",\"timeout\":\"1s\"}\"#\n            .parse()?;\n        assert_eq!(epoch, Duration::from_secs(10 * 60));\n        assert_eq!(timeout, Duration::from_secs(1));\n        assert_eq!(shards, 32);\n        assert_eq!(limiter.initial_limit, 44);\n        assert_eq!(\n            limiter.algorithm,\n            RateLimitAlgorithm::Aimd {\n                conf: Aimd {\n                    min: 5,\n                    max: 500,\n                    dec: 0.9,\n                    inc: 10,\n                    utilisation: 0.8\n                }\n            },\n        );\n\n        Ok(())\n    }\n}\n"
  },
  {
    "path": "proxy/src/console_redirect_proxy.rs",
    "content": "use std::sync::Arc;\n\nuse futures::{FutureExt, TryFutureExt};\nuse postgres_client::RawCancelToken;\nuse tokio::io::{AsyncRead, AsyncWrite};\nuse tokio_util::sync::CancellationToken;\nuse tracing::{Instrument, debug, error, info};\n\nuse crate::auth::backend::ConsoleRedirectBackend;\nuse crate::cancellation::{CancelClosure, CancellationHandler};\nuse crate::config::{ProxyConfig, ProxyProtocolV2};\nuse crate::context::RequestContext;\nuse crate::error::ReportableError;\nuse crate::metrics::{Metrics, NumClientConnectionsGuard};\nuse crate::pglb::ClientRequestError;\nuse crate::pglb::handshake::{HandshakeData, handshake};\nuse crate::pglb::passthrough::ProxyPassthrough;\nuse crate::protocol2::{ConnectHeader, ConnectionInfo, read_proxy_protocol};\nuse crate::proxy::{\n    ErrorSource, connect_compute, forward_compute_params_to_client, send_client_greeting,\n};\nuse crate::util::run_until_cancelled;\n\npub async fn task_main(\n    config: &'static ProxyConfig,\n    backend: &'static ConsoleRedirectBackend,\n    listener: tokio::net::TcpListener,\n    cancellation_token: CancellationToken,\n    cancellation_handler: Arc<CancellationHandler>,\n) -> anyhow::Result<()> {\n    scopeguard::defer! {\n        info!(\"proxy has shut down\");\n    }\n\n    // When set for the server socket, the keepalive setting\n    // will be inherited by all accepted client sockets.\n    socket2::SockRef::from(&listener).set_keepalive(true)?;\n\n    let connections = tokio_util::task::task_tracker::TaskTracker::new();\n    let cancellations = tokio_util::task::task_tracker::TaskTracker::new();\n\n    while let Some(accept_result) =\n        run_until_cancelled(listener.accept(), &cancellation_token).await\n    {\n        let (socket, peer_addr) = accept_result?;\n\n        let conn_gauge = Metrics::get()\n            .proxy\n            .client_connections\n            .guard(crate::metrics::Protocol::Tcp);\n\n        let session_id = uuid::Uuid::new_v4();\n        let cancellation_handler = Arc::clone(&cancellation_handler);\n        let cancellations = cancellations.clone();\n\n        debug!(protocol = \"tcp\", %session_id, \"accepted new TCP connection\");\n\n        connections.spawn(async move {\n            let (socket, conn_info) = match config.proxy_protocol_v2 {\n                ProxyProtocolV2::Required => {\n                    match read_proxy_protocol(socket).await {\n                        Err(e) => {\n                            error!(\"per-client task finished with an error: {e:#}\");\n                            return;\n                        }\n                        // our load balancers will not send any more data. let's just exit immediately\n                        Ok((_socket, ConnectHeader::Local)) => {\n                            debug!(\"healthcheck received\");\n                            return;\n                        }\n                        Ok((socket, ConnectHeader::Proxy(info))) => (socket, info),\n                    }\n                }\n                // ignore the header - it cannot be confused for a postgres or http connection so will\n                // error later.\n                ProxyProtocolV2::Rejected => (\n                    socket,\n                    ConnectionInfo {\n                        addr: peer_addr,\n                        extra: None,\n                    },\n                ),\n            };\n\n            match socket.set_nodelay(true) {\n                Ok(()) => {}\n                Err(e) => {\n                    error!(\n                        \"per-client task finished with an error: failed to set socket option: {e:#}\"\n                    );\n                    return;\n                }\n            }\n\n            let ctx = RequestContext::new(session_id, conn_info, crate::metrics::Protocol::Tcp);\n\n            let res = handle_client(\n                config,\n                backend,\n                &ctx,\n                cancellation_handler,\n                socket,\n                conn_gauge,\n                cancellations,\n            )\n            .instrument(ctx.span())\n            .boxed()\n            .await;\n\n            match res {\n                Err(e) => {\n                    ctx.set_error_kind(e.get_error_kind());\n                    error!(parent: &ctx.span(), \"per-client task finished with an error: {e:#}\");\n                }\n                Ok(None) => {\n                    ctx.set_success();\n                }\n                Ok(Some(p)) => {\n                    ctx.set_success();\n                    let _disconnect = ctx.log_connect();\n                    match p.proxy_pass().await {\n                        Ok(()) => {}\n                        Err(ErrorSource::Client(e)) => {\n                            error!(\n                                ?session_id,\n                                \"per-client task finished with an IO error from the client: {e:#}\"\n                            );\n                        }\n                        Err(ErrorSource::Compute(e)) => {\n                            error!(\n                                ?session_id,\n                                \"per-client task finished with an IO error from the compute: {e:#}\"\n                            );\n                        }\n                    }\n                }\n            }\n        });\n    }\n\n    connections.close();\n    cancellations.close();\n    drop(listener);\n\n    // Drain connections\n    connections.wait().await;\n    cancellations.wait().await;\n\n    Ok(())\n}\n\n#[allow(clippy::too_many_arguments)]\npub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin + Send>(\n    config: &'static ProxyConfig,\n    backend: &'static ConsoleRedirectBackend,\n    ctx: &RequestContext,\n    cancellation_handler: Arc<CancellationHandler>,\n    stream: S,\n    conn_gauge: NumClientConnectionsGuard<'static>,\n    cancellations: tokio_util::task::task_tracker::TaskTracker,\n) -> Result<Option<ProxyPassthrough<S>>, ClientRequestError> {\n    debug!(\n        protocol = %ctx.protocol(),\n        \"handling interactive connection from client\"\n    );\n\n    let metrics = &Metrics::get().proxy;\n    let proto = ctx.protocol();\n    let request_gauge = metrics.connection_requests.guard(proto);\n\n    let tls = config.tls_config.load();\n    let tls = tls.as_deref();\n\n    let record_handshake_error = !ctx.has_private_peer_addr();\n    let pause = ctx.latency_timer_pause(crate::metrics::Waiting::Client);\n    let do_handshake = handshake(ctx, stream, tls, record_handshake_error);\n\n    let (mut stream, params) = match tokio::time::timeout(config.handshake_timeout, do_handshake)\n        .await??\n    {\n        HandshakeData::Startup(stream, params) => (stream, params),\n        HandshakeData::Cancel(cancel_key_data) => {\n            // spawn a task to cancel the session, but don't wait for it\n            cancellations.spawn({\n                let cancellation_handler_clone  = Arc::clone(&cancellation_handler);\n                let ctx = ctx.clone();\n                let cancel_span = tracing::span!(parent: None, tracing::Level::INFO, \"cancel_session\", session_id = ?ctx.session_id());\n                cancel_span.follows_from(tracing::Span::current());\n                async move {\n                    cancellation_handler_clone\n                        .cancel_session(\n                            cancel_key_data,\n                            ctx,\n                            config.authentication_config.ip_allowlist_check_enabled,\n                            config.authentication_config.is_vpc_acccess_proxy,\n                            backend.get_api(),\n                        )\n                        .await\n                        .inspect_err(|e | debug!(error = ?e, \"cancel_session failed\")).ok();\n                }.instrument(cancel_span)\n            });\n\n            return Ok(None);\n        }\n    };\n    drop(pause);\n\n    ctx.set_db_options(params.clone());\n\n    let (node_info, mut auth_info, user_info) = match backend\n        .authenticate(ctx, &config.authentication_config, &mut stream)\n        .await\n    {\n        Ok(auth_result) => auth_result,\n        Err(e) => Err(stream.throw_error(e, Some(ctx)).await)?,\n    };\n    auth_info.set_startup_params(&params, true);\n\n    let mut node = connect_compute::connect_to_compute(\n        ctx,\n        config,\n        &node_info,\n        connect_compute::TlsNegotiation::Postgres,\n    )\n    .or_else(|e| async { Err(stream.throw_error(e, Some(ctx)).await) })\n    .await?;\n\n    auth_info\n        .authenticate(ctx, &mut node)\n        .or_else(|e| async { Err(stream.throw_error(e, Some(ctx)).await) })\n        .await?;\n    send_client_greeting(ctx, &config.greetings, &mut stream);\n\n    let session = cancellation_handler.get_key();\n\n    let (process_id, secret_key) =\n        forward_compute_params_to_client(ctx, *session.key(), &mut stream, &mut node.stream)\n            .await?;\n    let stream = stream.flush_and_into_inner().await?;\n    let hostname = node.hostname.to_string();\n\n    let session_id = ctx.session_id();\n    let (cancel_on_shutdown, cancel) = tokio::sync::oneshot::channel();\n    tokio::spawn(async move {\n        session\n            .maintain_cancel_key(\n                session_id,\n                cancel,\n                &CancelClosure {\n                    socket_addr: node.socket_addr,\n                    cancel_token: RawCancelToken {\n                        ssl_mode: node.ssl_mode,\n                        process_id,\n                        secret_key,\n                    },\n                    hostname,\n                    user_info,\n                },\n                &config.connect_to_compute,\n            )\n            .await;\n    });\n\n    Ok(Some(ProxyPassthrough {\n        client: stream,\n        compute: node.stream.into_framed().into_inner(),\n\n        aux: node.aux,\n        private_link_id: None,\n\n        _cancel_on_shutdown: cancel_on_shutdown,\n\n        _req: request_gauge,\n        _conn: conn_gauge,\n        _db_conn: node.guage,\n    }))\n}\n"
  },
  {
    "path": "proxy/src/context/mod.rs",
    "content": "//! Connection request monitoring contexts\n\nuse std::net::IpAddr;\n\nuse chrono::Utc;\nuse once_cell::sync::OnceCell;\nuse smol_str::SmolStr;\nuse tokio::sync::mpsc;\nuse tracing::field::display;\nuse tracing::{Span, error, info_span};\nuse try_lock::TryLock;\nuse uuid::Uuid;\n\nuse self::parquet::RequestData;\nuse crate::control_plane::messages::{ColdStartInfo, MetricsAuxInfo};\nuse crate::error::ErrorKind;\nuse crate::intern::{BranchIdInt, ProjectIdInt};\nuse crate::metrics::{LatencyAccumulated, LatencyTimer, Metrics, Protocol, Waiting};\nuse crate::pqproto::StartupMessageParams;\nuse crate::protocol2::{ConnectionInfo, ConnectionInfoExtra};\nuse crate::types::{DbName, EndpointId, RoleName};\n\npub mod parquet;\n\npub(crate) static LOG_CHAN: OnceCell<mpsc::WeakUnboundedSender<RequestData>> = OnceCell::new();\npub(crate) static LOG_CHAN_DISCONNECT: OnceCell<mpsc::WeakUnboundedSender<RequestData>> =\n    OnceCell::new();\n\n/// Context data for a single request to connect to a database.\n///\n/// This data should **not** be used for connection logic, only for observability and limiting purposes.\n/// All connection logic should instead use strongly typed state machines, not a bunch of Options.\npub struct RequestContext(\n    /// To allow easier use of the ctx object, we have interior mutability.\n    /// I would typically use a RefCell but that would break the `Send` requirements\n    /// so we need something with thread-safety. `TryLock` is a cheap alternative\n    /// that offers similar semantics to a `RefCell` but with synchronisation.\n    TryLock<RequestContextInner>,\n);\n\nstruct RequestContextInner {\n    pub(crate) conn_info: ConnectionInfo,\n    pub(crate) session_id: Uuid,\n    pub(crate) protocol: Protocol,\n    first_packet: chrono::DateTime<Utc>,\n    pub(crate) span: Span,\n\n    // filled in as they are discovered\n    project: Option<ProjectIdInt>,\n    branch: Option<BranchIdInt>,\n    endpoint_id: Option<EndpointId>,\n    dbname: Option<DbName>,\n    user: Option<RoleName>,\n    application: Option<SmolStr>,\n    user_agent: Option<SmolStr>,\n    error_kind: Option<ErrorKind>,\n    pub(crate) auth_method: Option<AuthMethod>,\n    jwt_issuer: Option<String>,\n    success: bool,\n    pub(crate) cold_start_info: ColdStartInfo,\n    pg_options: Option<StartupMessageParams>,\n    testodrome_query_id: Option<SmolStr>,\n\n    // extra\n    // This sender is here to keep the request monitoring channel open while requests are taking place.\n    sender: Option<mpsc::UnboundedSender<RequestData>>,\n    // This sender is only used to log the length of session in case of success.\n    disconnect_sender: Option<mpsc::UnboundedSender<RequestData>>,\n    pub(crate) latency_timer: LatencyTimer,\n    disconnect_timestamp: Option<chrono::DateTime<Utc>>,\n}\n\n#[derive(Clone, Debug)]\npub(crate) enum AuthMethod {\n    // aka link\n    ConsoleRedirect,\n    ScramSha256,\n    ScramSha256Plus,\n    Cleartext,\n    Jwt,\n}\n\nimpl Clone for RequestContext {\n    fn clone(&self) -> Self {\n        let inner = self.0.try_lock().expect(\"should not deadlock\");\n        let new = RequestContextInner {\n            conn_info: inner.conn_info.clone(),\n            session_id: inner.session_id,\n            protocol: inner.protocol,\n            first_packet: inner.first_packet,\n            span: info_span!(\"background_task\"),\n\n            project: inner.project,\n            branch: inner.branch,\n            endpoint_id: inner.endpoint_id.clone(),\n            dbname: inner.dbname.clone(),\n            user: inner.user.clone(),\n            application: inner.application.clone(),\n            user_agent: inner.user_agent.clone(),\n            error_kind: inner.error_kind,\n            auth_method: inner.auth_method.clone(),\n            jwt_issuer: inner.jwt_issuer.clone(),\n            success: inner.success,\n            cold_start_info: inner.cold_start_info,\n            pg_options: inner.pg_options.clone(),\n            testodrome_query_id: inner.testodrome_query_id.clone(),\n\n            sender: None,\n            disconnect_sender: None,\n            latency_timer: LatencyTimer::noop(inner.protocol),\n            disconnect_timestamp: inner.disconnect_timestamp,\n        };\n\n        Self(TryLock::new(new))\n    }\n}\n\nimpl RequestContext {\n    pub fn new(session_id: Uuid, conn_info: ConnectionInfo, protocol: Protocol) -> Self {\n        // TODO: be careful with long lived spans\n        let span = info_span!(\n            \"connect_request\",\n            %protocol,\n            ?session_id,\n            %conn_info,\n            ep = tracing::field::Empty,\n            role = tracing::field::Empty,\n        );\n\n        let inner = RequestContextInner {\n            conn_info,\n            session_id,\n            protocol,\n            first_packet: Utc::now(),\n            span,\n\n            project: None,\n            branch: None,\n            endpoint_id: None,\n            dbname: None,\n            user: None,\n            application: None,\n            user_agent: None,\n            error_kind: None,\n            auth_method: None,\n            jwt_issuer: None,\n            success: false,\n            cold_start_info: ColdStartInfo::Unknown,\n            pg_options: None,\n            testodrome_query_id: None,\n\n            sender: LOG_CHAN.get().and_then(|tx| tx.upgrade()),\n            disconnect_sender: LOG_CHAN_DISCONNECT.get().and_then(|tx| tx.upgrade()),\n            latency_timer: LatencyTimer::new(protocol),\n            disconnect_timestamp: None,\n        };\n\n        Self(TryLock::new(inner))\n    }\n\n    #[cfg(test)]\n    pub(crate) fn test() -> Self {\n        use std::net::SocketAddr;\n        let ip = IpAddr::from([127, 0, 0, 1]);\n        let addr = SocketAddr::new(ip, 5432);\n        let conn_info = ConnectionInfo { addr, extra: None };\n        RequestContext::new(Uuid::now_v7(), conn_info, Protocol::Tcp)\n    }\n\n    pub(crate) fn console_application_name(&self) -> String {\n        let this = self.0.try_lock().expect(\"should not deadlock\");\n        format!(\n            \"{}/{}\",\n            this.application.as_deref().unwrap_or_default(),\n            this.protocol\n        )\n    }\n\n    pub(crate) fn set_cold_start_info(&self, info: ColdStartInfo) {\n        self.0\n            .try_lock()\n            .expect(\"should not deadlock\")\n            .set_cold_start_info(info);\n    }\n\n    pub(crate) fn set_db_options(&self, options: StartupMessageParams) {\n        let mut this = self.0.try_lock().expect(\"should not deadlock\");\n        this.set_application(options.get(\"application_name\").map(SmolStr::from));\n        if let Some(user) = options.get(\"user\") {\n            this.set_user(user.into());\n        }\n        if let Some(dbname) = options.get(\"database\") {\n            this.set_dbname(dbname.into());\n        }\n\n        // Try to get testodrome_query_id directly from parameters\n        if let Some(options_str) = options.get(\"options\") {\n            // If not found directly, try to extract it from the options string\n            for option in options_str.split_whitespace() {\n                if let Some(value) = option.strip_prefix(\"neon_query_id:\") {\n                    this.set_testodrome_id(value.into());\n                    break;\n                }\n            }\n        }\n\n        this.pg_options = Some(options);\n    }\n\n    pub(crate) fn set_project(&self, x: MetricsAuxInfo) {\n        let mut this = self.0.try_lock().expect(\"should not deadlock\");\n        if this.endpoint_id.is_none() {\n            this.set_endpoint_id(x.endpoint_id.as_str().into());\n        }\n        this.branch = Some(x.branch_id);\n        this.project = Some(x.project_id);\n        this.set_cold_start_info(x.cold_start_info);\n    }\n\n    pub(crate) fn set_project_id(&self, project_id: ProjectIdInt) {\n        let mut this = self.0.try_lock().expect(\"should not deadlock\");\n        this.project = Some(project_id);\n    }\n\n    pub(crate) fn set_endpoint_id(&self, endpoint_id: EndpointId) {\n        self.0\n            .try_lock()\n            .expect(\"should not deadlock\")\n            .set_endpoint_id(endpoint_id);\n    }\n\n    pub(crate) fn set_dbname(&self, dbname: DbName) {\n        self.0\n            .try_lock()\n            .expect(\"should not deadlock\")\n            .set_dbname(dbname);\n    }\n\n    pub(crate) fn set_user(&self, user: RoleName) {\n        self.0\n            .try_lock()\n            .expect(\"should not deadlock\")\n            .set_user(user);\n    }\n\n    pub(crate) fn set_user_agent(&self, user_agent: Option<SmolStr>) {\n        self.0\n            .try_lock()\n            .expect(\"should not deadlock\")\n            .set_user_agent(user_agent);\n    }\n\n    pub(crate) fn set_testodrome_id(&self, query_id: SmolStr) {\n        self.0\n            .try_lock()\n            .expect(\"should not deadlock\")\n            .set_testodrome_id(query_id);\n    }\n\n    pub(crate) fn set_auth_method(&self, auth_method: AuthMethod) {\n        let mut this = self.0.try_lock().expect(\"should not deadlock\");\n        this.auth_method = Some(auth_method);\n    }\n\n    pub(crate) fn set_jwt_issuer(&self, jwt_issuer: String) {\n        let mut this = self.0.try_lock().expect(\"should not deadlock\");\n        this.jwt_issuer = Some(jwt_issuer);\n    }\n\n    pub fn has_private_peer_addr(&self) -> bool {\n        self.0\n            .try_lock()\n            .expect(\"should not deadlock\")\n            .has_private_peer_addr()\n    }\n\n    pub(crate) fn set_error_kind(&self, kind: ErrorKind) {\n        let mut this = self.0.try_lock().expect(\"should not deadlock\");\n        // Do not record errors from the private address to metrics.\n        if !this.has_private_peer_addr() {\n            Metrics::get().proxy.errors_total.inc(kind);\n        }\n        if let Some(ep) = &this.endpoint_id {\n            let metric = &Metrics::get().proxy.endpoints_affected_by_errors;\n            let label = metric.with_labels(kind);\n            metric.get_metric(label).measure(ep);\n        }\n        this.error_kind = Some(kind);\n    }\n\n    pub fn set_success(&self) {\n        let mut this = self.0.try_lock().expect(\"should not deadlock\");\n        this.success = true;\n    }\n\n    pub fn log_connect(self) -> DisconnectLogger {\n        let mut this = self.0.into_inner();\n        this.log_connect();\n\n        // close current span.\n        this.span = Span::none();\n\n        DisconnectLogger(this)\n    }\n\n    pub(crate) fn protocol(&self) -> Protocol {\n        self.0.try_lock().expect(\"should not deadlock\").protocol\n    }\n\n    pub(crate) fn span(&self) -> Span {\n        self.0.try_lock().expect(\"should not deadlock\").span.clone()\n    }\n\n    pub(crate) fn session_id(&self) -> Uuid {\n        self.0.try_lock().expect(\"should not deadlock\").session_id\n    }\n\n    pub(crate) fn peer_addr(&self) -> IpAddr {\n        self.0\n            .try_lock()\n            .expect(\"should not deadlock\")\n            .conn_info\n            .addr\n            .ip()\n    }\n\n    pub(crate) fn extra(&self) -> Option<ConnectionInfoExtra> {\n        self.0\n            .try_lock()\n            .expect(\"should not deadlock\")\n            .conn_info\n            .extra\n            .clone()\n    }\n\n    pub(crate) fn cold_start_info(&self) -> ColdStartInfo {\n        self.0\n            .try_lock()\n            .expect(\"should not deadlock\")\n            .cold_start_info\n    }\n\n    pub(crate) fn latency_timer_pause(&self, waiting_for: Waiting) -> LatencyTimerPause<'_> {\n        LatencyTimerPause {\n            ctx: self,\n            start: tokio::time::Instant::now(),\n            waiting_for,\n        }\n    }\n\n    pub(crate) fn latency_timer_pause_at(\n        &self,\n        at: tokio::time::Instant,\n        waiting_for: Waiting,\n    ) -> LatencyTimerPause<'_> {\n        LatencyTimerPause {\n            ctx: self,\n            start: at,\n            waiting_for,\n        }\n    }\n\n    pub(crate) fn get_proxy_latency(&self) -> LatencyAccumulated {\n        self.0\n            .try_lock()\n            .expect(\"should not deadlock\")\n            .latency_timer\n            .accumulated()\n    }\n\n    pub(crate) fn get_testodrome_id(&self) -> Option<SmolStr> {\n        self.0\n            .try_lock()\n            .expect(\"should not deadlock\")\n            .testodrome_query_id\n            .clone()\n    }\n\n    pub(crate) fn success(&self) {\n        self.0\n            .try_lock()\n            .expect(\"should not deadlock\")\n            .latency_timer\n            .success();\n    }\n}\n\npub(crate) struct LatencyTimerPause<'a> {\n    ctx: &'a RequestContext,\n    start: tokio::time::Instant,\n    waiting_for: Waiting,\n}\n\nimpl Drop for LatencyTimerPause<'_> {\n    fn drop(&mut self) {\n        self.ctx\n            .0\n            .try_lock()\n            .expect(\"should not deadlock\")\n            .latency_timer\n            .unpause(self.start, self.waiting_for);\n    }\n}\n\nimpl RequestContextInner {\n    fn set_cold_start_info(&mut self, info: ColdStartInfo) {\n        self.cold_start_info = info;\n        self.latency_timer.cold_start_info(info);\n    }\n\n    fn set_endpoint_id(&mut self, endpoint_id: EndpointId) {\n        if self.endpoint_id.is_none() {\n            self.span.record(\"ep\", display(&endpoint_id));\n            let metric = &Metrics::get().proxy.connecting_endpoints;\n            let label = metric.with_labels(self.protocol);\n            metric.get_metric(label).measure(&endpoint_id);\n            self.endpoint_id = Some(endpoint_id);\n        }\n    }\n\n    fn set_application(&mut self, app: Option<SmolStr>) {\n        if let Some(app) = app {\n            self.application = Some(app);\n        }\n    }\n\n    fn set_user_agent(&mut self, user_agent: Option<SmolStr>) {\n        self.user_agent = user_agent;\n    }\n\n    fn set_dbname(&mut self, dbname: DbName) {\n        self.dbname = Some(dbname);\n    }\n\n    fn set_user(&mut self, user: RoleName) {\n        self.span.record(\"role\", display(&user));\n        self.user = Some(user);\n    }\n\n    fn set_testodrome_id(&mut self, query_id: SmolStr) {\n        self.testodrome_query_id = Some(query_id);\n    }\n\n    fn has_private_peer_addr(&self) -> bool {\n        match self.conn_info.addr.ip() {\n            IpAddr::V4(ip) => ip.is_private(),\n            IpAddr::V6(_) => false,\n        }\n    }\n\n    fn log_connect(&mut self) {\n        if let Some(tx) = self.sender.take() {\n            // If type changes, this error handling needs to be updated.\n            let tx: mpsc::UnboundedSender<RequestData> = tx;\n            if let Err(e) = tx.send(RequestData::from(&*self)) {\n                error!(\"log_connect channel send failed: {e}\");\n            }\n        }\n    }\n\n    fn log_disconnect(&mut self) {\n        // If we are here, it's guaranteed that the user successfully connected to the endpoint.\n        // Here we log the length of the session.\n        self.disconnect_timestamp = Some(Utc::now());\n        if let Some(tx) = self.disconnect_sender.take() {\n            // If type changes, this error handling needs to be updated.\n            let tx: mpsc::UnboundedSender<RequestData> = tx;\n            if let Err(e) = tx.send(RequestData::from(&*self)) {\n                error!(\"log_disconnect channel send failed: {e}\");\n            }\n        }\n    }\n}\n\nimpl Drop for RequestContextInner {\n    fn drop(&mut self) {\n        if self.sender.is_some() {\n            self.log_connect();\n        }\n    }\n}\n\npub struct DisconnectLogger(RequestContextInner);\n\nimpl Drop for DisconnectLogger {\n    fn drop(&mut self) {\n        self.0.log_disconnect();\n    }\n}\n"
  },
  {
    "path": "proxy/src/context/parquet.rs",
    "content": "use std::sync::Arc;\nuse std::time::SystemTime;\n\nuse anyhow::Context;\nuse bytes::buf::Writer;\nuse bytes::{BufMut, BytesMut};\nuse chrono::{Datelike, Timelike};\nuse futures::{Stream, StreamExt};\nuse parquet::basic::Compression;\nuse parquet::file::metadata::RowGroupMetaDataPtr;\nuse parquet::file::properties::{DEFAULT_PAGE_SIZE, WriterProperties, WriterPropertiesPtr};\nuse parquet::file::writer::SerializedFileWriter;\nuse parquet::record::RecordWriter;\nuse remote_storage::{GenericRemoteStorage, RemotePath, RemoteStorageConfig, TimeoutOrCancel};\nuse serde::ser::SerializeMap;\nuse tokio::sync::mpsc;\nuse tokio::time;\nuse tokio_util::sync::CancellationToken;\nuse tracing::{Span, debug, info};\nuse utils::backoff;\n\nuse super::{LOG_CHAN, RequestContextInner};\nuse crate::config::remote_storage_from_toml;\nuse crate::context::LOG_CHAN_DISCONNECT;\nuse crate::ext::TaskExt;\nuse crate::pqproto::StartupMessageParams;\n\n#[derive(clap::Args, Clone, Debug)]\npub struct ParquetUploadArgs {\n    /// Storage location to upload the parquet files to.\n    /// Encoded as toml (same format as pageservers), eg\n    /// `{bucket_name='the-bucket',bucket_region='us-east-1',prefix_in_bucket='proxy',endpoint='http://minio:9000'}`\n    #[clap(long, value_parser = remote_storage_from_toml)]\n    parquet_upload_remote_storage: Option<RemoteStorageConfig>,\n\n    #[clap(long, value_parser = remote_storage_from_toml)]\n    parquet_upload_disconnect_events_remote_storage: Option<RemoteStorageConfig>,\n\n    /// How many rows to include in a row group\n    #[clap(long, default_value_t = 8192)]\n    parquet_upload_row_group_size: usize,\n\n    /// How large each column page should be in bytes\n    #[clap(long, default_value_t = DEFAULT_PAGE_SIZE)]\n    parquet_upload_page_size: usize,\n\n    /// How large the total parquet file should be in bytes\n    #[clap(long, default_value_t = 100_000_000)]\n    parquet_upload_size: i64,\n\n    /// How long to wait before forcing a file upload\n    #[clap(long, default_value = \"20m\", value_parser = humantime::parse_duration)]\n    parquet_upload_maximum_duration: tokio::time::Duration,\n\n    /// What level of compression to use\n    #[clap(long, default_value_t = Compression::UNCOMPRESSED)]\n    parquet_upload_compression: Compression,\n}\n\n// Occasional network issues and such can cause remote operations to fail, and\n// that's expected. If a upload fails, we log it at info-level, and retry.\n// But after FAILED_UPLOAD_WARN_THRESHOLD retries, we start to log it at WARN\n// level instead, as repeated failures can mean a more serious problem. If it\n// fails more than FAILED_UPLOAD_RETRIES times, we give up\npub(crate) const FAILED_UPLOAD_WARN_THRESHOLD: u32 = 3;\npub(crate) const FAILED_UPLOAD_MAX_RETRIES: u32 = 10;\n\n// the parquet crate leaves a lot to be desired...\n// what follows is an attempt to write parquet files with minimal allocs.\n// complication: parquet is a columnar format, while we want to write in as rows.\n// design:\n// * we batch up to 1024 rows, then flush them into a 'row group'\n// * after each rowgroup write, we check the length of the file and upload to s3 if large enough\n\n#[derive(parquet_derive::ParquetRecordWriter)]\npub(crate) struct RequestData {\n    region: String,\n    protocol: &'static str,\n    /// Must be UTC. The derive macro doesn't like the timezones\n    timestamp: chrono::NaiveDateTime,\n    session_id: uuid::Uuid,\n    peer_addr: String,\n    username: Option<String>,\n    application_name: Option<String>,\n    user_agent: Option<String>,\n    endpoint_id: Option<String>,\n    database: Option<String>,\n    project: Option<String>,\n    branch: Option<String>,\n    pg_options: Option<String>,\n    auth_method: Option<&'static str>,\n    jwt_issuer: Option<String>,\n\n    error: Option<&'static str>,\n    /// Success is counted if we form a HTTP response with sql rows inside\n    /// Or if we make it to proxy_pass\n    success: bool,\n    /// Indicates if the cplane started the new compute node for this request.\n    cold_start_info: &'static str,\n    /// Tracks time from session start (HTTP request/libpq TCP handshake)\n    /// Through to success/failure\n    duration_us: u64,\n    /// If the session was successful after the disconnect, will be created one more event with filled `disconnect_timestamp`.\n    disconnect_timestamp: Option<chrono::NaiveDateTime>,\n}\n\nstruct Options<'a> {\n    options: &'a StartupMessageParams,\n}\n\nimpl serde::Serialize for Options<'_> {\n    fn serialize<S>(&self, s: S) -> Result<S::Ok, S::Error>\n    where\n        S: serde::Serializer,\n    {\n        let mut state = s.serialize_map(None)?;\n        for (k, v) in self.options.iter() {\n            state.serialize_entry(k, v)?;\n        }\n        state.end()\n    }\n}\n\nimpl From<&RequestContextInner> for RequestData {\n    fn from(value: &RequestContextInner) -> Self {\n        Self {\n            session_id: value.session_id,\n            peer_addr: value.conn_info.addr.ip().to_string(),\n            timestamp: value.first_packet.naive_utc(),\n            username: value.user.as_deref().map(String::from),\n            application_name: value.application.as_deref().map(String::from),\n            user_agent: value.user_agent.as_deref().map(String::from),\n            endpoint_id: value.endpoint_id.as_deref().map(String::from),\n            database: value.dbname.as_deref().map(String::from),\n            project: value.project.as_deref().map(String::from),\n            branch: value.branch.as_deref().map(String::from),\n            pg_options: value\n                .pg_options\n                .as_ref()\n                .and_then(|options| serde_json::to_string(&Options { options }).ok()),\n            auth_method: value.auth_method.as_ref().map(|x| match x {\n                super::AuthMethod::ConsoleRedirect => \"console_redirect\",\n                super::AuthMethod::ScramSha256 => \"scram_sha_256\",\n                super::AuthMethod::ScramSha256Plus => \"scram_sha_256_plus\",\n                super::AuthMethod::Cleartext => \"cleartext\",\n                super::AuthMethod::Jwt => \"jwt\",\n            }),\n            jwt_issuer: value.jwt_issuer.clone(),\n            protocol: value.protocol.as_str(),\n            region: String::new(),\n            error: value.error_kind.as_ref().map(|e| e.to_metric_label()),\n            success: value.success,\n            cold_start_info: value.cold_start_info.as_str(),\n            duration_us: SystemTime::from(value.first_packet)\n                .elapsed()\n                .unwrap_or_default()\n                .as_micros() as u64, // 584 millenia... good enough\n            disconnect_timestamp: value.disconnect_timestamp.map(|x| x.naive_utc()),\n        }\n    }\n}\n\n/// Parquet request context worker\n///\n/// It listened on a channel for all completed requests, extracts the data and writes it into a parquet file,\n/// then uploads a completed batch to S3\npub async fn worker(\n    cancellation_token: CancellationToken,\n    config: ParquetUploadArgs,\n    region: String,\n) -> anyhow::Result<()> {\n    let Some(remote_storage_config) = config.parquet_upload_remote_storage else {\n        tracing::warn!(\"parquet request upload: no s3 bucket configured\");\n        return Ok(());\n    };\n\n    let (tx, mut rx) = mpsc::unbounded_channel();\n    LOG_CHAN\n        .set(tx.downgrade())\n        .expect(\"only one worker should set the channel\");\n\n    // setup row stream that will close on cancellation\n    let cancellation_token2 = cancellation_token.clone();\n    tokio::spawn(async move {\n        cancellation_token2.cancelled().await;\n        // dropping this sender will cause the channel to close only once\n        // all the remaining inflight requests have been completed.\n        drop(tx);\n    });\n    let rx = futures::stream::poll_fn(move |cx| rx.poll_recv(cx));\n    let rx = rx.map(RequestData::from);\n\n    let storage = GenericRemoteStorage::from_config(&remote_storage_config)\n        .await\n        .context(\"remote storage init\")?;\n\n    let properties = WriterProperties::builder()\n        .set_data_page_size_limit(config.parquet_upload_page_size)\n        .set_compression(config.parquet_upload_compression);\n\n    let parquet_config = ParquetConfig {\n        propeties: Arc::new(properties.build()),\n        rows_per_group: config.parquet_upload_row_group_size,\n        file_size: config.parquet_upload_size,\n        max_duration: config.parquet_upload_maximum_duration,\n\n        #[cfg(any(test, feature = \"testing\"))]\n        test_remote_failures: 0,\n    };\n\n    // TODO(anna): consider moving this to a separate function.\n    if let Some(disconnect_events_storage_config) =\n        config.parquet_upload_disconnect_events_remote_storage\n    {\n        let (tx_disconnect, mut rx_disconnect) = mpsc::unbounded_channel();\n        LOG_CHAN_DISCONNECT\n            .set(tx_disconnect.downgrade())\n            .expect(\"only one worker should set the channel\");\n\n        // setup row stream that will close on cancellation\n        tokio::spawn(async move {\n            cancellation_token.cancelled().await;\n            // dropping this sender will cause the channel to close only once\n            // all the remaining inflight requests have been completed.\n            drop(tx_disconnect);\n        });\n        let rx_disconnect = futures::stream::poll_fn(move |cx| rx_disconnect.poll_recv(cx));\n        let rx_disconnect = rx_disconnect.map(RequestData::from);\n\n        let storage_disconnect =\n            GenericRemoteStorage::from_config(&disconnect_events_storage_config)\n                .await\n                .context(\"remote storage for disconnect events init\")?;\n        let parquet_config_disconnect = parquet_config.clone();\n        tokio::try_join!(\n            worker_inner(storage, rx, parquet_config, &region),\n            worker_inner(\n                storage_disconnect,\n                rx_disconnect,\n                parquet_config_disconnect,\n                &region\n            )\n        )\n        .map(|_| ())\n    } else {\n        worker_inner(storage, rx, parquet_config, &region).await\n    }\n}\n\n#[derive(Clone, Debug)]\nstruct ParquetConfig {\n    propeties: WriterPropertiesPtr,\n    rows_per_group: usize,\n    file_size: i64,\n\n    max_duration: tokio::time::Duration,\n\n    #[cfg(any(test, feature = \"testing\"))]\n    test_remote_failures: u64,\n}\n\nasync fn worker_inner(\n    storage: GenericRemoteStorage,\n    rx: impl Stream<Item = RequestData>,\n    config: ParquetConfig,\n    region: &str,\n) -> anyhow::Result<()> {\n    #[cfg(any(test, feature = \"testing\"))]\n    let storage = if config.test_remote_failures > 0 {\n        GenericRemoteStorage::unreliable_wrapper(storage, config.test_remote_failures, 100)\n    } else {\n        storage\n    };\n\n    let mut rx = std::pin::pin!(rx);\n\n    let mut rows = Vec::with_capacity(config.rows_per_group);\n\n    let schema = rows.as_slice().schema()?;\n    let buffer = BytesMut::new();\n    let w = buffer.writer();\n    let mut w = SerializedFileWriter::new(w, schema.clone(), config.propeties.clone())?;\n\n    let mut last_upload = time::Instant::now();\n\n    let mut len = 0;\n    while let Some(mut row) = rx.next().await {\n        region.clone_into(&mut row.region);\n        rows.push(row);\n        let force = last_upload.elapsed() > config.max_duration;\n        if rows.len() == config.rows_per_group || force {\n            let rg_meta;\n            (rows, w, rg_meta) = flush_rows(rows, w).await?;\n            len += rg_meta.compressed_size();\n        }\n        if len > config.file_size || force {\n            last_upload = time::Instant::now();\n            let file = upload_parquet(w, len, &storage).await?;\n            w = SerializedFileWriter::new(file, schema.clone(), config.propeties.clone())?;\n            len = 0;\n        }\n    }\n\n    if !rows.is_empty() {\n        let rg_meta;\n        (_, w, rg_meta) = flush_rows(rows, w).await?;\n        len += rg_meta.compressed_size();\n    }\n\n    if !w.flushed_row_groups().is_empty() {\n        let _rtchk: Writer<BytesMut> = upload_parquet(w, len, &storage).await?;\n    }\n\n    Ok(())\n}\n\nasync fn flush_rows<W>(\n    rows: Vec<RequestData>,\n    mut w: SerializedFileWriter<W>,\n) -> anyhow::Result<(\n    Vec<RequestData>,\n    SerializedFileWriter<W>,\n    RowGroupMetaDataPtr,\n)>\nwhere\n    W: std::io::Write + Send + 'static,\n{\n    let span = Span::current();\n    let (mut rows, w, rg_meta) = tokio::task::spawn_blocking(move || {\n        let _enter = span.enter();\n\n        let mut rg = w.next_row_group()?;\n        rows.as_slice().write_to_row_group(&mut rg)?;\n        let rg_meta = rg.close()?;\n\n        let size = rg_meta.compressed_size();\n        let compression = rg_meta.compressed_size() as f64 / rg_meta.total_byte_size() as f64;\n\n        debug!(size, compression, \"flushed row group to parquet file\");\n\n        Ok::<_, parquet::errors::ParquetError>((rows, w, rg_meta))\n    })\n    .await\n    .propagate_task_panic()?;\n\n    rows.clear();\n    Ok((rows, w, rg_meta))\n}\n\nasync fn upload_parquet(\n    mut w: SerializedFileWriter<Writer<BytesMut>>,\n    len: i64,\n    storage: &GenericRemoteStorage,\n) -> anyhow::Result<Writer<BytesMut>> {\n    let len_uncompressed = w\n        .flushed_row_groups()\n        .iter()\n        .map(|rg| rg.total_byte_size())\n        .sum::<i64>();\n\n    // I don't know how compute intensive this is, although it probably isn't much... better be safe than sorry.\n    // finish method only available on the fork: https://github.com/apache/arrow-rs/issues/5253\n    let (mut buffer, metadata) =\n        tokio::task::spawn_blocking(move || -> parquet::errors::Result<_> {\n            let metadata = w.finish()?;\n            let buffer = std::mem::take(w.inner_mut().get_mut());\n            Ok((buffer, metadata))\n        })\n        .await\n        .propagate_task_panic()?;\n\n    let data = buffer.split().freeze();\n\n    let compression = len as f64 / len_uncompressed as f64;\n    let size = data.len();\n    let now = chrono::Utc::now();\n    let id = uuid::Uuid::new_v7(uuid::Timestamp::from_unix(\n        uuid::NoContext,\n        // we won't be running this in 1970. this cast is ok\n        now.timestamp() as u64,\n        now.timestamp_subsec_nanos(),\n    ));\n\n    info!(\n        %id,\n        rows = metadata.num_rows,\n        size, compression, \"uploading request parquet file\"\n    );\n\n    let year = now.year();\n    let month = now.month();\n    let day = now.day();\n    let hour = now.hour();\n    // segment files by time for S3 performance\n    let path = RemotePath::from_string(&format!(\n        \"{year:04}/{month:02}/{day:02}/{hour:02}/requests_{id}.parquet\"\n    ))?;\n    let cancel = CancellationToken::new();\n    let maybe_err = backoff::retry(\n        || async {\n            let stream = futures::stream::once(futures::future::ready(Ok(data.clone())));\n            storage\n                .upload(stream, data.len(), &path, None, &cancel)\n                .await\n        },\n        TimeoutOrCancel::caused_by_cancel,\n        FAILED_UPLOAD_WARN_THRESHOLD,\n        FAILED_UPLOAD_MAX_RETRIES,\n        \"request_data_upload\",\n        // we don't want cancellation to interrupt here, so we make a dummy cancel token\n        &cancel,\n    )\n    .await\n    .ok_or_else(|| anyhow::Error::new(TimeoutOrCancel::Cancel))\n    .and_then(|x| x)\n    .with_context(|| format!(\"request_data_upload: path={path}\"))\n    .err();\n\n    if let Some(err) = maybe_err {\n        tracing::error!(%id, %path, error = ?err, \"failed to upload request data\");\n    }\n\n    Ok(buffer.writer())\n}\n\n#[cfg(test)]\nmod tests {\n    use std::net::Ipv4Addr;\n    use std::num::NonZeroUsize;\n    use std::sync::Arc;\n\n    use camino::Utf8Path;\n    use clap::Parser;\n    use futures::{Stream, StreamExt};\n    use itertools::Itertools;\n    use parquet::basic::{Compression, ZstdLevel};\n    use parquet::file::properties::{DEFAULT_PAGE_SIZE, WriterProperties};\n    use parquet::file::reader::FileReader;\n    use parquet::file::serialized_reader::SerializedFileReader;\n    use rand::rngs::StdRng;\n    use rand::{Rng, SeedableRng};\n    use remote_storage::{\n        DEFAULT_MAX_KEYS_PER_LIST_RESPONSE, DEFAULT_REMOTE_STORAGE_S3_CONCURRENCY_LIMIT,\n        GenericRemoteStorage, RemoteStorageConfig, RemoteStorageKind, S3Config,\n    };\n    use tokio::sync::mpsc;\n    use tokio::time;\n    use walkdir::WalkDir;\n\n    use super::{ParquetConfig, ParquetUploadArgs, RequestData, worker_inner};\n\n    #[derive(Parser)]\n    struct ProxyCliArgs {\n        #[clap(flatten)]\n        parquet_upload: ParquetUploadArgs,\n    }\n\n    #[test]\n    fn default_parser() {\n        let ProxyCliArgs { parquet_upload } = ProxyCliArgs::parse_from([\"proxy\"]);\n        assert_eq!(parquet_upload.parquet_upload_remote_storage, None);\n        assert_eq!(parquet_upload.parquet_upload_row_group_size, 8192);\n        assert_eq!(parquet_upload.parquet_upload_page_size, DEFAULT_PAGE_SIZE);\n        assert_eq!(parquet_upload.parquet_upload_size, 100_000_000);\n        assert_eq!(\n            parquet_upload.parquet_upload_maximum_duration,\n            time::Duration::from_secs(20 * 60)\n        );\n        assert_eq!(\n            parquet_upload.parquet_upload_compression,\n            Compression::UNCOMPRESSED\n        );\n    }\n\n    #[test]\n    fn full_parser() {\n        let ProxyCliArgs { parquet_upload } = ProxyCliArgs::parse_from([\n            \"proxy\",\n            \"--parquet-upload-remote-storage\",\n            \"{bucket_name='default',prefix_in_bucket='proxy/',bucket_region='us-east-1',endpoint='http://minio:9000'}\",\n            \"--parquet-upload-row-group-size\",\n            \"100\",\n            \"--parquet-upload-page-size\",\n            \"10000\",\n            \"--parquet-upload-size\",\n            \"10000000\",\n            \"--parquet-upload-maximum-duration\",\n            \"10m\",\n            \"--parquet-upload-compression\",\n            \"zstd(5)\",\n        ]);\n        assert_eq!(\n            parquet_upload.parquet_upload_remote_storage,\n            Some(RemoteStorageConfig {\n                storage: RemoteStorageKind::AwsS3(S3Config {\n                    bucket_name: \"default\".into(),\n                    bucket_region: \"us-east-1\".into(),\n                    prefix_in_bucket: Some(\"proxy/\".into()),\n                    endpoint: Some(\"http://minio:9000\".into()),\n                    concurrency_limit: NonZeroUsize::new(\n                        DEFAULT_REMOTE_STORAGE_S3_CONCURRENCY_LIMIT\n                    )\n                    .unwrap(),\n                    max_keys_per_list_response: DEFAULT_MAX_KEYS_PER_LIST_RESPONSE,\n                    upload_storage_class: None,\n                }),\n                timeout: RemoteStorageConfig::DEFAULT_TIMEOUT,\n                small_timeout: RemoteStorageConfig::DEFAULT_SMALL_TIMEOUT,\n            })\n        );\n        assert_eq!(parquet_upload.parquet_upload_row_group_size, 100);\n        assert_eq!(parquet_upload.parquet_upload_page_size, 10000);\n        assert_eq!(parquet_upload.parquet_upload_size, 10_000_000);\n        assert_eq!(\n            parquet_upload.parquet_upload_maximum_duration,\n            time::Duration::from_secs(10 * 60)\n        );\n        assert_eq!(\n            parquet_upload.parquet_upload_compression,\n            Compression::ZSTD(ZstdLevel::try_new(5).unwrap())\n        );\n    }\n\n    fn generate_request_data(rng: &mut impl Rng) -> RequestData {\n        RequestData {\n            session_id: uuid::Builder::from_random_bytes(rng.random()).into_uuid(),\n            peer_addr: Ipv4Addr::from(rng.random::<[u8; 4]>()).to_string(),\n            timestamp: chrono::DateTime::from_timestamp_millis(\n                rng.random_range(1703862754..1803862754),\n            )\n            .unwrap()\n            .naive_utc(),\n            application_name: Some(\"test\".to_owned()),\n            user_agent: Some(\"test-user-agent\".to_owned()),\n            username: Some(hex::encode(rng.random::<[u8; 4]>())),\n            endpoint_id: Some(hex::encode(rng.random::<[u8; 16]>())),\n            database: Some(hex::encode(rng.random::<[u8; 16]>())),\n            project: Some(hex::encode(rng.random::<[u8; 16]>())),\n            branch: Some(hex::encode(rng.random::<[u8; 16]>())),\n            pg_options: None,\n            auth_method: None,\n            jwt_issuer: None,\n            protocol: [\"tcp\", \"ws\", \"http\"][rng.random_range(0..3)],\n            region: String::new(),\n            error: None,\n            success: rng.random(),\n            cold_start_info: \"no\",\n            duration_us: rng.random_range(0..30_000_000),\n            disconnect_timestamp: None,\n        }\n    }\n\n    fn random_stream(len: usize) -> impl Stream<Item = RequestData> + Unpin {\n        let mut rng = StdRng::from_seed([0x39; 32]);\n        futures::stream::iter(\n            std::iter::repeat_with(move || generate_request_data(&mut rng)).take(len),\n        )\n    }\n\n    async fn run_test(\n        tmpdir: &Utf8Path,\n        config: ParquetConfig,\n        rx: impl Stream<Item = RequestData>,\n    ) -> Vec<(u64, usize, i64)> {\n        let remote_storage_config = RemoteStorageConfig {\n            storage: RemoteStorageKind::LocalFs {\n                local_path: tmpdir.to_path_buf(),\n            },\n            timeout: std::time::Duration::from_secs(120),\n            small_timeout: std::time::Duration::from_secs(30),\n        };\n        let storage = GenericRemoteStorage::from_config(&remote_storage_config)\n            .await\n            .unwrap();\n\n        worker_inner(storage, rx, config, \"us-east-1\")\n            .await\n            .unwrap();\n\n        let mut files = WalkDir::new(tmpdir.as_std_path())\n            .into_iter()\n            .filter_map(|entry| entry.ok())\n            .filter(|entry| entry.file_type().is_file())\n            .map(|entry| entry.path().to_path_buf())\n            .collect_vec();\n        files.sort();\n\n        files\n            .into_iter()\n            .map(|path| std::fs::File::open(tmpdir.as_std_path().join(path)).unwrap())\n            .map(|file| {\n                (\n                    file.metadata().unwrap(),\n                    SerializedFileReader::new(file).unwrap().metadata().clone(),\n                )\n            })\n            .map(|(file_meta, parquet_meta)| {\n                (\n                    file_meta.len(),\n                    parquet_meta.num_row_groups(),\n                    parquet_meta.file_metadata().num_rows(),\n                )\n            })\n            .collect()\n    }\n\n    #[tokio::test]\n    async fn verify_parquet_no_compression() {\n        let tmpdir = camino_tempfile::tempdir().unwrap();\n\n        let config = ParquetConfig {\n            propeties: Arc::new(WriterProperties::new()),\n            rows_per_group: 2_000,\n            file_size: 1_000_000,\n            max_duration: time::Duration::from_secs(20 * 60),\n            test_remote_failures: 0,\n        };\n\n        let rx = random_stream(50_000);\n        let file_stats = run_test(tmpdir.path(), config, rx).await;\n\n        assert_eq!(\n            file_stats,\n            [\n                (1313878, 3, 6000),\n                (1313891, 3, 6000),\n                (1314058, 3, 6000),\n                (1313914, 3, 6000),\n                (1313760, 3, 6000),\n                (1314084, 3, 6000),\n                (1313965, 3, 6000),\n                (1313911, 3, 6000),\n                (438290, 1, 2000)\n            ]\n        );\n\n        tmpdir.close().unwrap();\n    }\n\n    #[tokio::test]\n    async fn verify_parquet_strong_compression() {\n        let tmpdir = camino_tempfile::tempdir().unwrap();\n\n        let config = ParquetConfig {\n            propeties: Arc::new(\n                WriterProperties::builder()\n                    .set_compression(parquet::basic::Compression::ZSTD(\n                        ZstdLevel::try_new(10).unwrap(),\n                    ))\n                    .build(),\n            ),\n            rows_per_group: 2_000,\n            file_size: 1_000_000,\n            max_duration: time::Duration::from_secs(20 * 60),\n            test_remote_failures: 0,\n        };\n\n        let rx = random_stream(50_000);\n        let file_stats = run_test(tmpdir.path(), config, rx).await;\n\n        // with strong compression, the files are smaller\n        assert_eq!(\n            file_stats,\n            [\n                (1206039, 5, 10000),\n                (1205798, 5, 10000),\n                (1205776, 5, 10000),\n                (1206051, 5, 10000),\n                (1205746, 5, 10000)\n            ]\n        );\n\n        tmpdir.close().unwrap();\n    }\n\n    #[tokio::test]\n    async fn verify_parquet_unreliable_upload() {\n        let tmpdir = camino_tempfile::tempdir().unwrap();\n\n        let config = ParquetConfig {\n            propeties: Arc::new(WriterProperties::new()),\n            rows_per_group: 2_000,\n            file_size: 1_000_000,\n            max_duration: time::Duration::from_secs(20 * 60),\n            test_remote_failures: 2,\n        };\n\n        let rx = random_stream(50_000);\n        let file_stats = run_test(tmpdir.path(), config, rx).await;\n\n        assert_eq!(\n            file_stats,\n            [\n                (1313878, 3, 6000),\n                (1313891, 3, 6000),\n                (1314058, 3, 6000),\n                (1313914, 3, 6000),\n                (1313760, 3, 6000),\n                (1314084, 3, 6000),\n                (1313965, 3, 6000),\n                (1313911, 3, 6000),\n                (438290, 1, 2000)\n            ]\n        );\n\n        tmpdir.close().unwrap();\n    }\n\n    #[tokio::test(start_paused = true)]\n    async fn verify_parquet_regular_upload() {\n        let tmpdir = camino_tempfile::tempdir().unwrap();\n\n        let config = ParquetConfig {\n            propeties: Arc::new(WriterProperties::new()),\n            rows_per_group: 2_000,\n            file_size: 1_000_000,\n            max_duration: time::Duration::from_secs(60),\n            test_remote_failures: 2,\n        };\n\n        let (tx, mut rx) = mpsc::unbounded_channel();\n\n        tokio::spawn(async move {\n            for _ in 0..3 {\n                let mut s = random_stream(3000);\n                while let Some(r) = s.next().await {\n                    tx.send(r).unwrap();\n                }\n                time::sleep(time::Duration::from_secs(70)).await;\n            }\n        });\n\n        let rx = futures::stream::poll_fn(move |cx| rx.poll_recv(cx));\n        let file_stats = run_test(tmpdir.path(), config, rx).await;\n\n        // files are smaller than the size threshold, but they took too long to fill so were flushed early\n        assert_eq!(\n            file_stats,\n            [(658552, 2, 3001), (658265, 2, 3000), (658061, 2, 2999)]\n        );\n\n        tmpdir.close().unwrap();\n    }\n}\n"
  },
  {
    "path": "proxy/src/control_plane/client/cplane_proxy_v1.rs",
    "content": "//! Production console backend.\n\nuse std::net::IpAddr;\nuse std::str::FromStr;\nuse std::sync::Arc;\n\nuse ::http::HeaderName;\nuse ::http::header::AUTHORIZATION;\nuse bytes::Bytes;\nuse futures::TryFutureExt;\nuse hyper::StatusCode;\nuse postgres_client::config::SslMode;\nuse tokio::time::Instant;\nuse tracing::{Instrument, debug, info, info_span, warn};\n\nuse super::super::messages::{ControlPlaneErrorMessage, GetEndpointAccessControl, WakeCompute};\nuse crate::auth::backend::ComputeUserInfo;\nuse crate::auth::backend::jwt::AuthRule;\nuse crate::cache::Cached;\nuse crate::cache::node_info::CachedNodeInfo;\nuse crate::context::RequestContext;\nuse crate::control_plane::caches::ApiCaches;\nuse crate::control_plane::errors::{\n    ControlPlaneError, GetAuthInfoError, GetEndpointJwksError, WakeComputeError,\n};\nuse crate::control_plane::locks::ApiLocks;\nuse crate::control_plane::messages::{ColdStartInfo, EndpointJwksResponse};\nuse crate::control_plane::{\n    AccessBlockerFlags, AuthInfo, AuthSecret, EndpointAccessControl, NodeInfo, RoleAccessControl,\n};\nuse crate::metrics::Metrics;\nuse crate::proxy::retry::CouldRetry;\nuse crate::rate_limiter::WakeComputeRateLimiter;\nuse crate::types::{EndpointCacheKey, EndpointId, RoleName};\nuse crate::{compute, http, scram};\n\npub(crate) const X_REQUEST_ID: HeaderName = HeaderName::from_static(\"x-request-id\");\n\n#[derive(Clone)]\npub struct NeonControlPlaneClient {\n    endpoint: http::Endpoint,\n    pub caches: &'static ApiCaches,\n    pub(crate) locks: &'static ApiLocks<EndpointCacheKey>,\n    pub(crate) wake_compute_endpoint_rate_limiter: Arc<WakeComputeRateLimiter>,\n    // put in a shared ref so we don't copy secrets all over in memory\n    jwt: Arc<str>,\n}\n\nimpl NeonControlPlaneClient {\n    /// Construct an API object containing the auth parameters.\n    pub fn new(\n        endpoint: http::Endpoint,\n        jwt: Arc<str>,\n        caches: &'static ApiCaches,\n        locks: &'static ApiLocks<EndpointCacheKey>,\n        wake_compute_endpoint_rate_limiter: Arc<WakeComputeRateLimiter>,\n    ) -> Self {\n        Self {\n            endpoint,\n            caches,\n            locks,\n            wake_compute_endpoint_rate_limiter,\n            jwt,\n        }\n    }\n\n    pub(crate) fn url(&self) -> &str {\n        self.endpoint.url().as_str()\n    }\n\n    async fn get_and_cache_auth_info<T>(\n        &self,\n        ctx: &RequestContext,\n        endpoint: &EndpointId,\n        role: &RoleName,\n        cache_key: &EndpointId,\n        extract: impl FnOnce(&EndpointAccessControl, &RoleAccessControl) -> T,\n    ) -> Result<T, GetAuthInfoError> {\n        match self.do_get_auth_req(ctx, endpoint, role).await {\n            Ok(auth_info) => {\n                let control = EndpointAccessControl {\n                    allowed_ips: Arc::new(auth_info.allowed_ips),\n                    allowed_vpce: Arc::new(auth_info.allowed_vpc_endpoint_ids),\n                    flags: auth_info.access_blocker_flags,\n                    rate_limits: auth_info.rate_limits,\n                };\n                let role_control = RoleAccessControl {\n                    secret: auth_info.secret,\n                };\n                let res = extract(&control, &role_control);\n\n                self.caches.project_info.insert_endpoint_access(\n                    auth_info.account_id,\n                    auth_info.project_id,\n                    cache_key.into(),\n                    role.into(),\n                    control,\n                    role_control,\n                );\n\n                if let Some(project_id) = auth_info.project_id {\n                    ctx.set_project_id(project_id);\n                }\n\n                Ok(res)\n            }\n            Err(err) => match err {\n                GetAuthInfoError::ApiError(ControlPlaneError::Message(ref msg)) => {\n                    let retry_info = msg.status.as_ref().and_then(|s| s.details.retry_info);\n\n                    // If we can retry this error, do not cache it,\n                    // unless we were given a retry delay.\n                    if msg.could_retry() && retry_info.is_none() {\n                        return Err(err);\n                    }\n\n                    self.caches.project_info.insert_endpoint_access_err(\n                        cache_key.into(),\n                        role.into(),\n                        msg.clone(),\n                    );\n\n                    Err(err)\n                }\n                err => Err(err),\n            },\n        }\n    }\n\n    async fn do_get_auth_req(\n        &self,\n        ctx: &RequestContext,\n        endpoint: &EndpointId,\n        role: &RoleName,\n    ) -> Result<AuthInfo, GetAuthInfoError> {\n        async {\n            let response = {\n                let request = self\n                    .endpoint\n                    .get_path(\"get_endpoint_access_control\")\n                    .header(X_REQUEST_ID, ctx.session_id().to_string())\n                    .header(AUTHORIZATION, format!(\"Bearer {}\", &self.jwt))\n                    .query(&[(\"session_id\", ctx.session_id())])\n                    .query(&[\n                        (\"application_name\", ctx.console_application_name().as_str()),\n                        (\"endpointish\", endpoint.as_str()),\n                        (\"role\", role.as_str()),\n                    ])\n                    .build()?;\n\n                debug!(url = request.url().as_str(), \"sending http request\");\n                let start = Instant::now();\n                let _pause = ctx.latency_timer_pause_at(start, crate::metrics::Waiting::Cplane);\n                let response = self.endpoint.execute(request).await?;\n\n                info!(duration = ?start.elapsed(), \"received http response\");\n\n                response\n            };\n\n            let body = match parse_body::<GetEndpointAccessControl>(\n                response.status(),\n                response.bytes().await?,\n            ) {\n                Ok(body) => body,\n                // Error 404 is special: it's ok not to have a secret.\n                // TODO(anna): retry\n                Err(e) => {\n                    return if e.get_reason().is_not_found() {\n                        // TODO: refactor this because it's weird\n                        // this is a failure to authenticate but we return Ok.\n                        Ok(AuthInfo::default())\n                    } else {\n                        Err(e.into())\n                    };\n                }\n            };\n\n            let secret = if body.role_secret.is_empty() {\n                None\n            } else {\n                let secret = scram::ServerSecret::parse(&body.role_secret)\n                    .map(AuthSecret::Scram)\n                    .ok_or(GetAuthInfoError::BadSecret)?;\n                Some(secret)\n            };\n            let allowed_ips = body.allowed_ips.unwrap_or_default();\n            Metrics::get()\n                .proxy\n                .allowed_ips_number\n                .observe(allowed_ips.len() as f64);\n            let allowed_vpc_endpoint_ids = body.allowed_vpc_endpoint_ids.unwrap_or_default();\n            Metrics::get()\n                .proxy\n                .allowed_vpc_endpoint_ids\n                .observe(allowed_vpc_endpoint_ids.len() as f64);\n            let block_public_connections = body.block_public_connections.unwrap_or_default();\n            let block_vpc_connections = body.block_vpc_connections.unwrap_or_default();\n            Ok(AuthInfo {\n                secret,\n                allowed_ips,\n                allowed_vpc_endpoint_ids,\n                project_id: body.project_id,\n                account_id: body.account_id,\n                access_blocker_flags: AccessBlockerFlags {\n                    public_access_blocked: block_public_connections,\n                    vpc_access_blocked: block_vpc_connections,\n                },\n                rate_limits: body.rate_limits,\n            })\n        }\n        .inspect_err(|e| tracing::debug!(error = ?e))\n        .instrument(info_span!(\"do_get_auth_info\"))\n        .await\n    }\n\n    async fn do_get_endpoint_jwks(\n        &self,\n        ctx: &RequestContext,\n        endpoint: &EndpointId,\n    ) -> Result<Vec<AuthRule>, GetEndpointJwksError> {\n        let request_id = ctx.session_id().to_string();\n        async {\n            let request = self\n                .endpoint\n                .get_with_url(|url| {\n                    url.path_segments_mut()\n                        .push(\"endpoints\")\n                        .push(endpoint.as_str())\n                        .push(\"jwks\");\n                })\n                .header(X_REQUEST_ID, &request_id)\n                .header(AUTHORIZATION, format!(\"Bearer {}\", &self.jwt))\n                .query(&[(\"session_id\", ctx.session_id())])\n                .build()\n                .map_err(GetEndpointJwksError::RequestBuild)?;\n\n            debug!(url = request.url().as_str(), \"sending http request\");\n            let start = Instant::now();\n            let pause = ctx.latency_timer_pause(crate::metrics::Waiting::Cplane);\n            let response = self\n                .endpoint\n                .execute(request)\n                .await\n                .map_err(GetEndpointJwksError::RequestExecute)?;\n            drop(pause);\n            info!(duration = ?start.elapsed(), \"received http response\");\n\n            let body = parse_body::<EndpointJwksResponse>(\n                response.status(),\n                response.bytes().await.map_err(ControlPlaneError::from)?,\n            )?;\n\n            let rules = body\n                .jwks\n                .into_iter()\n                .map(|jwks| AuthRule {\n                    id: jwks.id,\n                    jwks_url: jwks.jwks_url,\n                    audience: jwks.jwt_audience,\n                    role_names: jwks.role_names,\n                })\n                .collect();\n\n            Ok(rules)\n        }\n        .inspect_err(|e| tracing::debug!(error = ?e))\n        .instrument(info_span!(\"do_get_endpoint_jwks\"))\n        .await\n    }\n\n    async fn do_wake_compute(\n        &self,\n        ctx: &RequestContext,\n        user_info: &ComputeUserInfo,\n    ) -> Result<NodeInfo, WakeComputeError> {\n        let request_id = ctx.session_id().to_string();\n        let application_name = ctx.console_application_name();\n        async {\n            let mut request_builder = self\n                .endpoint\n                .get_path(\"wake_compute\")\n                .header(\"X-Request-ID\", &request_id)\n                .header(\"Authorization\", format!(\"Bearer {}\", &self.jwt))\n                .query(&[(\"session_id\", ctx.session_id())])\n                .query(&[\n                    (\"application_name\", application_name.as_str()),\n                    (\"endpointish\", user_info.endpoint.as_str()),\n                ]);\n\n            let options = user_info.options.to_deep_object();\n            if !options.is_empty() {\n                request_builder = request_builder.query(&options);\n            }\n\n            let request = request_builder.build()?;\n\n            debug!(url = request.url().as_str(), \"sending http request\");\n            let start = Instant::now();\n            let pause = ctx.latency_timer_pause(crate::metrics::Waiting::Cplane);\n            let response = self.endpoint.execute(request).await?;\n            drop(pause);\n            info!(duration = ?start.elapsed(), \"received http response\");\n            let body = parse_body::<WakeCompute>(response.status(), response.bytes().await?)?;\n\n            let Some((host, port)) = parse_host_port(&body.address) else {\n                return Err(WakeComputeError::BadComputeAddress(body.address));\n            };\n\n            let host_addr = IpAddr::from_str(host).ok();\n\n            let ssl_mode = match &body.server_name {\n                Some(_) => SslMode::Require,\n                None => SslMode::Disable,\n            };\n            let host = match body.server_name {\n                Some(host) => host.into(),\n                None => host.into(),\n            };\n\n            let node = NodeInfo {\n                conn_info: compute::ConnectInfo {\n                    host_addr,\n                    host,\n                    port,\n                    ssl_mode,\n                },\n                aux: body.aux,\n            };\n\n            Ok(node)\n        }\n        .inspect_err(|e| tracing::debug!(error = ?e))\n        .instrument(info_span!(\"do_wake_compute\"))\n        .await\n    }\n}\n\nimpl super::ControlPlaneApi for NeonControlPlaneClient {\n    #[tracing::instrument(skip_all)]\n    async fn get_role_access_control(\n        &self,\n        ctx: &RequestContext,\n        endpoint: &EndpointId,\n        role: &RoleName,\n    ) -> Result<RoleAccessControl, GetAuthInfoError> {\n        let key = endpoint.normalize();\n\n        if let Some(role_control) = self.caches.project_info.get_role_secret(&key, role) {\n            return match role_control {\n                Err(msg) => {\n                    info!(key = &*key, \"found cached get_role_access_control error\");\n\n                    Err(GetAuthInfoError::ApiError(ControlPlaneError::Message(msg)))\n                }\n                Ok(role_control) => {\n                    debug!(key = &*key, \"found cached role access control\");\n                    Ok(role_control)\n                }\n            };\n        }\n\n        self.get_and_cache_auth_info(ctx, endpoint, role, &key, |_, role_control| {\n            role_control.clone()\n        })\n        .await\n    }\n\n    #[tracing::instrument(skip_all)]\n    async fn get_endpoint_access_control(\n        &self,\n        ctx: &RequestContext,\n        endpoint: &EndpointId,\n        role: &RoleName,\n    ) -> Result<EndpointAccessControl, GetAuthInfoError> {\n        let key = endpoint.normalize();\n\n        if let Some(control) = self.caches.project_info.get_endpoint_access(&key) {\n            return match control {\n                Err(msg) => {\n                    info!(\n                        key = &*key,\n                        \"found cached get_endpoint_access_control error\"\n                    );\n\n                    Err(GetAuthInfoError::ApiError(ControlPlaneError::Message(msg)))\n                }\n                Ok(control) => {\n                    debug!(key = &*key, \"found cached endpoint access control\");\n                    Ok(control)\n                }\n            };\n        }\n\n        self.get_and_cache_auth_info(ctx, endpoint, role, &key, |control, _| control.clone())\n            .await\n    }\n\n    #[tracing::instrument(skip_all)]\n    async fn get_endpoint_jwks(\n        &self,\n        ctx: &RequestContext,\n        endpoint: &EndpointId,\n    ) -> Result<Vec<AuthRule>, GetEndpointJwksError> {\n        self.do_get_endpoint_jwks(ctx, endpoint).await\n    }\n\n    #[tracing::instrument(skip_all)]\n    async fn wake_compute(\n        &self,\n        ctx: &RequestContext,\n        user_info: &ComputeUserInfo,\n    ) -> Result<CachedNodeInfo, WakeComputeError> {\n        let key = user_info.endpoint_cache_key();\n\n        macro_rules! check_cache {\n            () => {\n                if let Some(info) = self.caches.node_info.get_entry(&key) {\n                    return match info {\n                        Err(msg) => {\n                            info!(key = &*key, \"found cached wake_compute error\");\n\n                            Err(WakeComputeError::ControlPlane(ControlPlaneError::Message(\n                                msg,\n                            )))\n                        }\n                        Ok(info) => {\n                            debug!(key = &*key, \"found cached compute node info\");\n                            ctx.set_project(info.aux.clone());\n                            Ok(info)\n                        }\n                    };\n                }\n            };\n        }\n\n        // Every time we do a wakeup http request, the compute node will stay up\n        // for some time (highly depends on the console's scale-to-zero policy);\n        // The connection info remains the same during that period of time,\n        // which means that we might cache it to reduce the load and latency.\n        check_cache!();\n\n        let permit = self.locks.get_permit(&key).await?;\n\n        // after getting back a permit - it's possible the cache was filled\n        // double check\n        if permit.should_check_cache() {\n            // TODO: if there is something in the cache, mark the permit as success.\n            check_cache!();\n        }\n\n        // check rate limit\n        if !self\n            .wake_compute_endpoint_rate_limiter\n            .check(user_info.endpoint.normalize_intern(), 1)\n        {\n            return Err(WakeComputeError::TooManyConnections);\n        }\n\n        let node = permit.release_result(self.do_wake_compute(ctx, user_info).await);\n        match node {\n            Ok(node) => {\n                ctx.set_project(node.aux.clone());\n                debug!(key = &*key, \"created a cache entry for woken compute node\");\n\n                let mut stored_node = node.clone();\n                // store the cached node as 'warm_cached'\n                stored_node.aux.cold_start_info = ColdStartInfo::WarmCached;\n                self.caches.node_info.insert(key.clone(), Ok(stored_node));\n\n                Ok(Cached {\n                    token: Some((&self.caches.node_info, key)),\n                    value: node,\n                })\n            }\n            Err(err) => match err {\n                WakeComputeError::ControlPlane(ControlPlaneError::Message(ref msg)) => {\n                    let retry_info = msg.status.as_ref().and_then(|s| s.details.retry_info);\n\n                    // If we can retry this error, do not cache it,\n                    // unless we were given a retry delay.\n                    if msg.could_retry() && retry_info.is_none() {\n                        return Err(err);\n                    }\n\n                    debug!(\n                        key = &*key,\n                        \"created a cache entry for the wake compute error\"\n                    );\n\n                    self.caches.node_info.insert(key, Err(msg.clone()));\n\n                    Err(err)\n                }\n                err => Err(err),\n            },\n        }\n    }\n}\n\n/// Parse http response body, taking status code into account.\nfn parse_body<T: for<'a> serde::Deserialize<'a>>(\n    status: StatusCode,\n    body: Bytes,\n) -> Result<T, ControlPlaneError> {\n    if status.is_success() {\n        // We shouldn't log raw body because it may contain secrets.\n        info!(\"request succeeded, processing the body\");\n        return Ok(serde_json::from_slice(&body).map_err(std::io::Error::other)?);\n    }\n\n    // Log plaintext to be able to detect, whether there are some cases not covered by the error struct.\n    info!(\"response_error plaintext: {:?}\", body);\n\n    // Don't throw an error here because it's not as important\n    // as the fact that the request itself has failed.\n    let mut body = serde_json::from_slice(&body).unwrap_or_else(|e| {\n        warn!(\"failed to parse error body: {e}\");\n        Box::new(ControlPlaneErrorMessage {\n            error: \"reason unclear (malformed error message)\".into(),\n            http_status_code: status,\n            status: None,\n        })\n    });\n    body.http_status_code = status;\n\n    warn!(\"console responded with an error ({status}): {body:?}\");\n    Err(ControlPlaneError::Message(body))\n}\n\nfn parse_host_port(input: &str) -> Option<(&str, u16)> {\n    let (host, port) = input.rsplit_once(':')?;\n    let ipv6_brackets: &[_] = &['[', ']'];\n    Some((host.trim_matches(ipv6_brackets), port.parse().ok()?))\n}\n\n#[cfg(test)]\nmod tests {\n    use super::*;\n\n    #[test]\n    fn test_parse_host_port_v4() {\n        let (host, port) = parse_host_port(\"127.0.0.1:5432\").expect(\"failed to parse\");\n        assert_eq!(host, \"127.0.0.1\");\n        assert_eq!(port, 5432);\n    }\n\n    #[test]\n    fn test_parse_host_port_v6() {\n        let (host, port) = parse_host_port(\"[2001:db8::1]:5432\").expect(\"failed to parse\");\n        assert_eq!(host, \"2001:db8::1\");\n        assert_eq!(port, 5432);\n    }\n\n    #[test]\n    fn test_parse_host_port_url() {\n        let (host, port) = parse_host_port(\"compute-foo-bar-1234.default.svc.cluster.local:5432\")\n            .expect(\"failed to parse\");\n        assert_eq!(host, \"compute-foo-bar-1234.default.svc.cluster.local\");\n        assert_eq!(port, 5432);\n    }\n}\n"
  },
  {
    "path": "proxy/src/control_plane/client/mock.rs",
    "content": "//! Mock console backend which relies on a user-provided postgres instance.\n\nuse std::io;\nuse std::net::{IpAddr, Ipv4Addr};\nuse std::str::FromStr;\nuse std::sync::Arc;\n\nuse futures::TryFutureExt;\nuse postgres_client::config::SslMode;\nuse thiserror::Error;\nuse tokio_postgres::Client;\nuse tracing::{Instrument, error, info, info_span, warn};\n\nuse crate::auth::IpPattern;\nuse crate::auth::backend::ComputeUserInfo;\nuse crate::auth::backend::jwt::AuthRule;\nuse crate::cache::Cached;\nuse crate::cache::node_info::CachedNodeInfo;\nuse crate::compute::ConnectInfo;\nuse crate::context::RequestContext;\nuse crate::control_plane::errors::{\n    ControlPlaneError, GetAuthInfoError, GetEndpointJwksError, WakeComputeError,\n};\nuse crate::control_plane::messages::{EndpointRateLimitConfig, MetricsAuxInfo};\nuse crate::control_plane::{\n    AccessBlockerFlags, AuthInfo, AuthSecret, EndpointAccessControl, NodeInfo, RoleAccessControl,\n};\nuse crate::intern::RoleNameInt;\nuse crate::scram;\nuse crate::types::{BranchId, EndpointId, ProjectId, RoleName};\nuse crate::url::ApiUrl;\n\n#[derive(Debug, Error)]\nenum MockApiError {\n    #[error(\"Failed to read password: {0}\")]\n    PasswordNotSet(tokio_postgres::Error),\n}\n\nimpl From<MockApiError> for ControlPlaneError {\n    fn from(e: MockApiError) -> Self {\n        io::Error::other(e).into()\n    }\n}\n\nimpl From<tokio_postgres::Error> for ControlPlaneError {\n    fn from(e: tokio_postgres::Error) -> Self {\n        io::Error::other(e).into()\n    }\n}\n\n#[derive(Clone)]\npub struct MockControlPlane {\n    endpoint: ApiUrl,\n    ip_allowlist_check_enabled: bool,\n}\n\nimpl MockControlPlane {\n    pub fn new(endpoint: ApiUrl, ip_allowlist_check_enabled: bool) -> Self {\n        Self {\n            endpoint,\n            ip_allowlist_check_enabled,\n        }\n    }\n\n    pub(crate) fn url(&self) -> &str {\n        self.endpoint.as_str()\n    }\n\n    async fn do_get_auth_info(\n        &self,\n        endpoint: &EndpointId,\n        role: &RoleName,\n    ) -> Result<AuthInfo, GetAuthInfoError> {\n        let (secret, allowed_ips) = async {\n            // Perhaps we could persist this connection, but then we'd have to\n            // write more code for reopening it if it got closed, which doesn't\n            // seem worth it.\n            let (client, connection) =\n                tokio_postgres::connect(self.endpoint.as_str(), tokio_postgres::NoTls).await?;\n\n            tokio::spawn(connection);\n\n            let secret = if let Some(entry) = get_execute_postgres_query(\n                &client,\n                \"select rolpassword from pg_catalog.pg_authid where rolname = $1\",\n                &[&role.as_str()],\n                \"rolpassword\",\n            )\n            .await?\n            {\n                info!(\"got a secret: {entry}\"); // safe since it's not a prod scenario\n                scram::ServerSecret::parse(&entry).map(AuthSecret::Scram)\n            } else {\n                warn!(\"user '{role}' does not exist\");\n                None\n            };\n\n            let allowed_ips = if self.ip_allowlist_check_enabled {\n                match get_execute_postgres_query(\n                    &client,\n                    \"select allowed_ips from neon_control_plane.endpoints where endpoint_id = $1\",\n                    &[&endpoint.as_str()],\n                    \"allowed_ips\",\n                )\n                .await?\n                {\n                    Some(s) => {\n                        info!(\"got allowed_ips: {s}\");\n                        s.split(',')\n                            .map(|s| {\n                                IpPattern::from_str(s).expect(\"mocked ip pattern should be correct\")\n                            })\n                            .collect()\n                    }\n                    None => vec![],\n                }\n            } else {\n                vec![]\n            };\n\n            Ok((secret, allowed_ips))\n        }\n        .inspect_err(|e: &GetAuthInfoError| tracing::error!(\"{e}\"))\n        .instrument(info_span!(\"postgres\", url = self.endpoint.as_str()))\n        .await?;\n        Ok(AuthInfo {\n            secret,\n            allowed_ips,\n            allowed_vpc_endpoint_ids: vec![],\n            project_id: None,\n            account_id: None,\n            access_blocker_flags: AccessBlockerFlags::default(),\n            rate_limits: EndpointRateLimitConfig::default(),\n        })\n    }\n\n    async fn do_get_endpoint_jwks(\n        &self,\n        endpoint: &EndpointId,\n    ) -> Result<Vec<AuthRule>, GetEndpointJwksError> {\n        let (client, connection) =\n            tokio_postgres::connect(self.endpoint.as_str(), tokio_postgres::NoTls).await?;\n\n        let connection = tokio::spawn(connection);\n\n        let res = client.query(\n                \"select id, jwks_url, audience, role_names from neon_control_plane.endpoint_jwks where endpoint_id = $1\",\n                &[&endpoint.as_str()],\n            )\n            .await?;\n\n        let mut rows = vec![];\n        for row in res {\n            rows.push(AuthRule {\n                id: row.get(\"id\"),\n                jwks_url: url::Url::parse(row.get(\"jwks_url\"))?,\n                audience: row.get(\"audience\"),\n                role_names: row\n                    .get::<_, Vec<String>>(\"role_names\")\n                    .into_iter()\n                    .map(RoleName::from)\n                    .map(|s| RoleNameInt::from(&s))\n                    .collect(),\n            });\n        }\n\n        drop(client);\n        connection.await??;\n\n        Ok(rows)\n    }\n\n    async fn do_wake_compute(&self) -> Result<NodeInfo, WakeComputeError> {\n        let port = self.endpoint.port().unwrap_or(5432);\n        let conn_info = match self.endpoint.host_str() {\n            None => ConnectInfo {\n                host_addr: Some(IpAddr::V4(Ipv4Addr::LOCALHOST)),\n                host: \"localhost\".into(),\n                port,\n                ssl_mode: SslMode::Disable,\n            },\n            Some(host) => ConnectInfo {\n                host_addr: IpAddr::from_str(host).ok(),\n                host: host.into(),\n                port,\n                ssl_mode: SslMode::Disable,\n            },\n        };\n\n        let node = NodeInfo {\n            conn_info,\n            aux: MetricsAuxInfo {\n                endpoint_id: (&EndpointId::from(\"endpoint\")).into(),\n                project_id: (&ProjectId::from(\"project\")).into(),\n                branch_id: (&BranchId::from(\"branch\")).into(),\n                compute_id: \"compute\".into(),\n                cold_start_info: crate::control_plane::messages::ColdStartInfo::Warm,\n            },\n        };\n\n        Ok(node)\n    }\n}\n\nasync fn get_execute_postgres_query(\n    client: &Client,\n    query: &str,\n    params: &[&(dyn tokio_postgres::types::ToSql + Sync)],\n    idx: &str,\n) -> Result<Option<String>, GetAuthInfoError> {\n    let rows = client.query(query, params).await?;\n\n    // We can get at most one row, because `rolname` is unique.\n    let Some(row) = rows.first() else {\n        // This means that the user doesn't exist, so there can be no secret.\n        // However, this is still a *valid* outcome which is very similar\n        // to getting `404 Not found` from the Neon console.\n        return Ok(None);\n    };\n\n    let entry = row.try_get(idx).map_err(MockApiError::PasswordNotSet)?;\n    Ok(Some(entry))\n}\n\nimpl super::ControlPlaneApi for MockControlPlane {\n    async fn get_endpoint_access_control(\n        &self,\n        _ctx: &RequestContext,\n        endpoint: &EndpointId,\n        role: &RoleName,\n    ) -> Result<EndpointAccessControl, GetAuthInfoError> {\n        let info = self.do_get_auth_info(endpoint, role).await?;\n        Ok(EndpointAccessControl {\n            allowed_ips: Arc::new(info.allowed_ips),\n            allowed_vpce: Arc::new(info.allowed_vpc_endpoint_ids),\n            flags: info.access_blocker_flags,\n            rate_limits: info.rate_limits,\n        })\n    }\n\n    async fn get_role_access_control(\n        &self,\n        _ctx: &RequestContext,\n        endpoint: &EndpointId,\n        role: &RoleName,\n    ) -> Result<RoleAccessControl, GetAuthInfoError> {\n        let info = self.do_get_auth_info(endpoint, role).await?;\n        Ok(RoleAccessControl {\n            secret: info.secret,\n        })\n    }\n\n    async fn get_endpoint_jwks(\n        &self,\n        _ctx: &RequestContext,\n        endpoint: &EndpointId,\n    ) -> Result<Vec<AuthRule>, GetEndpointJwksError> {\n        self.do_get_endpoint_jwks(endpoint).await\n    }\n\n    #[tracing::instrument(skip_all)]\n    async fn wake_compute(\n        &self,\n        _ctx: &RequestContext,\n        _user_info: &ComputeUserInfo,\n    ) -> Result<CachedNodeInfo, WakeComputeError> {\n        self.do_wake_compute().map_ok(Cached::new_uncached).await\n    }\n}\n"
  },
  {
    "path": "proxy/src/control_plane/client/mod.rs",
    "content": "pub mod cplane_proxy_v1;\n#[cfg(any(test, feature = \"testing\"))]\npub mod mock;\n\nuse std::hash::Hash;\nuse std::sync::Arc;\nuse std::time::Duration;\n\nuse clashmap::ClashMap;\nuse tokio::time::Instant;\nuse tracing::{debug, info};\n\nuse super::{EndpointAccessControl, RoleAccessControl};\nuse crate::auth::backend::ComputeUserInfo;\nuse crate::auth::backend::jwt::{AuthRule, FetchAuthRules, FetchAuthRulesError};\nuse crate::cache::node_info::{CachedNodeInfo, NodeInfoCache};\nuse crate::cache::project_info::ProjectInfoCache;\nuse crate::config::{CacheOptions, ProjectInfoCacheOptions};\nuse crate::context::RequestContext;\nuse crate::control_plane::{ControlPlaneApi, errors};\nuse crate::error::ReportableError;\nuse crate::metrics::ApiLockMetrics;\nuse crate::rate_limiter::{DynamicLimiter, Outcome, RateLimiterConfig, Token};\nuse crate::types::EndpointId;\n\n#[non_exhaustive]\n#[derive(Clone)]\npub enum ControlPlaneClient {\n    /// Proxy V1 control plane API\n    ProxyV1(cplane_proxy_v1::NeonControlPlaneClient),\n    /// Local mock control plane.\n    #[cfg(any(test, feature = \"testing\"))]\n    PostgresMock(mock::MockControlPlane),\n    /// Internal testing\n    #[cfg(test)]\n    #[allow(private_interfaces)]\n    Test(Box<dyn TestControlPlaneClient>),\n}\n\nimpl ControlPlaneApi for ControlPlaneClient {\n    async fn get_role_access_control(\n        &self,\n        ctx: &RequestContext,\n        endpoint: &EndpointId,\n        role: &crate::types::RoleName,\n    ) -> Result<RoleAccessControl, errors::GetAuthInfoError> {\n        match self {\n            Self::ProxyV1(api) => api.get_role_access_control(ctx, endpoint, role).await,\n            #[cfg(any(test, feature = \"testing\"))]\n            Self::PostgresMock(api) => api.get_role_access_control(ctx, endpoint, role).await,\n            #[cfg(test)]\n            Self::Test(_api) => {\n                unreachable!(\"this function should never be called in the test backend\")\n            }\n        }\n    }\n\n    async fn get_endpoint_access_control(\n        &self,\n        ctx: &RequestContext,\n        endpoint: &EndpointId,\n        role: &crate::types::RoleName,\n    ) -> Result<EndpointAccessControl, errors::GetAuthInfoError> {\n        match self {\n            Self::ProxyV1(api) => api.get_endpoint_access_control(ctx, endpoint, role).await,\n            #[cfg(any(test, feature = \"testing\"))]\n            Self::PostgresMock(api) => api.get_endpoint_access_control(ctx, endpoint, role).await,\n            #[cfg(test)]\n            Self::Test(api) => api.get_access_control(),\n        }\n    }\n\n    async fn get_endpoint_jwks(\n        &self,\n        ctx: &RequestContext,\n        endpoint: &EndpointId,\n    ) -> Result<Vec<AuthRule>, errors::GetEndpointJwksError> {\n        match self {\n            Self::ProxyV1(api) => api.get_endpoint_jwks(ctx, endpoint).await,\n            #[cfg(any(test, feature = \"testing\"))]\n            Self::PostgresMock(api) => api.get_endpoint_jwks(ctx, endpoint).await,\n            #[cfg(test)]\n            Self::Test(_api) => Ok(vec![]),\n        }\n    }\n\n    async fn wake_compute(\n        &self,\n        ctx: &RequestContext,\n        user_info: &ComputeUserInfo,\n    ) -> Result<CachedNodeInfo, errors::WakeComputeError> {\n        match self {\n            Self::ProxyV1(api) => api.wake_compute(ctx, user_info).await,\n            #[cfg(any(test, feature = \"testing\"))]\n            Self::PostgresMock(api) => api.wake_compute(ctx, user_info).await,\n            #[cfg(test)]\n            Self::Test(api) => api.wake_compute(),\n        }\n    }\n}\n\n#[cfg(test)]\npub(crate) trait TestControlPlaneClient: Send + Sync + 'static {\n    fn wake_compute(&self) -> Result<CachedNodeInfo, errors::WakeComputeError>;\n\n    fn get_access_control(&self) -> Result<EndpointAccessControl, errors::GetAuthInfoError>;\n\n    fn dyn_clone(&self) -> Box<dyn TestControlPlaneClient>;\n}\n\n#[cfg(test)]\nimpl Clone for Box<dyn TestControlPlaneClient> {\n    fn clone(&self) -> Self {\n        TestControlPlaneClient::dyn_clone(&**self)\n    }\n}\n\n/// Various caches for [`control_plane`](super).\npub struct ApiCaches {\n    /// Cache for the `wake_compute` API method.\n    pub(crate) node_info: NodeInfoCache,\n    /// Cache which stores project_id -> endpoint_ids mapping.\n    pub project_info: Arc<ProjectInfoCache>,\n}\n\nimpl ApiCaches {\n    pub fn new(\n        wake_compute_cache_config: CacheOptions,\n        project_info_cache_config: ProjectInfoCacheOptions,\n    ) -> Self {\n        Self {\n            node_info: NodeInfoCache::new(wake_compute_cache_config),\n            project_info: Arc::new(ProjectInfoCache::new(project_info_cache_config)),\n        }\n    }\n}\n\n/// Various caches for [`control_plane`](super).\npub struct ApiLocks<K> {\n    name: &'static str,\n    node_locks: ClashMap<K, Arc<DynamicLimiter>>,\n    config: RateLimiterConfig,\n    timeout: Duration,\n    epoch: std::time::Duration,\n    metrics: &'static ApiLockMetrics,\n}\n\n#[derive(Debug, thiserror::Error)]\npub(crate) enum ApiLockError {\n    #[error(\"timeout acquiring resource permit\")]\n    TimeoutError(#[from] tokio::time::error::Elapsed),\n}\n\nimpl ReportableError for ApiLockError {\n    fn get_error_kind(&self) -> crate::error::ErrorKind {\n        match self {\n            ApiLockError::TimeoutError(_) => crate::error::ErrorKind::RateLimit,\n        }\n    }\n}\n\nimpl<K: Hash + Eq + Clone> ApiLocks<K> {\n    pub fn new(\n        name: &'static str,\n        config: RateLimiterConfig,\n        shards: usize,\n        timeout: Duration,\n        epoch: std::time::Duration,\n        metrics: &'static ApiLockMetrics,\n    ) -> Self {\n        Self {\n            name,\n            node_locks: ClashMap::with_shard_amount(shards),\n            config,\n            timeout,\n            epoch,\n            metrics,\n        }\n    }\n\n    pub(crate) async fn get_permit(&self, key: &K) -> Result<WakeComputePermit, ApiLockError> {\n        if self.config.initial_limit == 0 {\n            return Ok(WakeComputePermit {\n                permit: Token::disabled(),\n            });\n        }\n        let now = Instant::now();\n        let semaphore = {\n            // get fast path\n            if let Some(semaphore) = self.node_locks.get(key) {\n                semaphore.clone()\n            } else {\n                self.node_locks\n                    .entry(key.clone())\n                    .or_insert_with(|| {\n                        self.metrics.semaphores_registered.inc();\n                        DynamicLimiter::new(self.config)\n                    })\n                    .clone()\n            }\n        };\n        let permit = semaphore.acquire_timeout(self.timeout).await;\n\n        self.metrics\n            .semaphore_acquire_seconds\n            .observe(now.elapsed().as_secs_f64());\n\n        if permit.is_ok() {\n            debug!(elapsed = ?now.elapsed(), \"acquired permit\");\n        } else {\n            debug!(elapsed = ?now.elapsed(), \"timed out acquiring permit\");\n        }\n        Ok(WakeComputePermit { permit: permit? })\n    }\n\n    pub async fn garbage_collect_worker(&self) {\n        if self.config.initial_limit == 0 {\n            return;\n        }\n        let mut interval =\n            tokio::time::interval(self.epoch / (self.node_locks.shards().len()) as u32);\n        loop {\n            for (i, shard) in self.node_locks.shards().iter().enumerate() {\n                interval.tick().await;\n                // temporary lock a single shard and then clear any semaphores that aren't currently checked out\n                // race conditions: if strong_count == 1, there's no way that it can increase while the shard is locked\n                // therefore releasing it is safe from race conditions\n                info!(\n                    name = self.name,\n                    shard = i,\n                    \"performing epoch reclamation on api lock\"\n                );\n                let mut lock = shard.write();\n                let timer = self.metrics.reclamation_lag_seconds.start_timer();\n                let count = lock\n                    .extract_if(|(_, semaphore)| Arc::strong_count(semaphore) == 1)\n                    .count();\n                drop(lock);\n                self.metrics.semaphores_unregistered.inc_by(count as u64);\n                timer.observe();\n            }\n        }\n    }\n}\n\npub(crate) struct WakeComputePermit {\n    permit: Token,\n}\n\nimpl WakeComputePermit {\n    pub(crate) fn should_check_cache(&self) -> bool {\n        !self.permit.is_disabled()\n    }\n    pub(crate) fn release(self, outcome: Outcome) {\n        self.permit.release(outcome);\n    }\n    pub(crate) fn release_result<T, E>(self, res: Result<T, E>) -> Result<T, E> {\n        match res {\n            Ok(_) => self.release(Outcome::Success),\n            Err(_) => self.release(Outcome::Overload),\n        }\n        res\n    }\n}\n\nimpl FetchAuthRules for ControlPlaneClient {\n    async fn fetch_auth_rules(\n        &self,\n        ctx: &RequestContext,\n        endpoint: EndpointId,\n    ) -> Result<Vec<AuthRule>, FetchAuthRulesError> {\n        self.get_endpoint_jwks(ctx, &endpoint)\n            .await\n            .map_err(FetchAuthRulesError::GetEndpointJwks)\n    }\n}\n"
  },
  {
    "path": "proxy/src/control_plane/errors.rs",
    "content": "use std::io;\n\nuse thiserror::Error;\n\nuse crate::control_plane::client::ApiLockError;\nuse crate::control_plane::messages::{self, ControlPlaneErrorMessage, Reason};\nuse crate::error::{ErrorKind, ReportableError, UserFacingError};\nuse crate::proxy::retry::CouldRetry;\n\n/// A go-to error message which doesn't leak any detail.\npub(crate) const REQUEST_FAILED: &str = \"Control plane request failed\";\n\n/// Common console API error.\n#[derive(Debug, Error)]\npub(crate) enum ControlPlaneError {\n    /// Error returned by the console itself.\n    #[error(\"{REQUEST_FAILED} with {0}\")]\n    Message(Box<ControlPlaneErrorMessage>),\n\n    /// Various IO errors like broken pipe or malformed payload.\n    #[error(\"{REQUEST_FAILED}: {0}\")]\n    Transport(#[from] std::io::Error),\n}\n\nimpl ControlPlaneError {\n    /// Returns HTTP status code if it's the reason for failure.\n    pub(crate) fn get_reason(&self) -> messages::Reason {\n        match self {\n            ControlPlaneError::Message(e) => e.get_reason(),\n            ControlPlaneError::Transport(_) => messages::Reason::Unknown,\n        }\n    }\n}\n\nimpl UserFacingError for ControlPlaneError {\n    fn to_string_client(&self) -> String {\n        match self {\n            // To minimize risks, only select errors are forwarded to users.\n            ControlPlaneError::Message(c) => c.get_user_facing_message(),\n            ControlPlaneError::Transport(_) => REQUEST_FAILED.to_owned(),\n        }\n    }\n}\n\nimpl ReportableError for ControlPlaneError {\n    fn get_error_kind(&self) -> ErrorKind {\n        match self {\n            ControlPlaneError::Message(e) => match e.get_reason() {\n                Reason::RoleProtected\n                | Reason::ResourceNotFound\n                | Reason::ProjectNotFound\n                | Reason::EndpointNotFound\n                | Reason::EndpointDisabled\n                | Reason::BranchNotFound\n                | Reason::WrongLsnOrTimestamp => ErrorKind::User,\n\n                Reason::RateLimitExceeded => ErrorKind::ServiceRateLimit,\n\n                Reason::NonDefaultBranchComputeTimeExceeded\n                | Reason::ActiveTimeQuotaExceeded\n                | Reason::ComputeTimeQuotaExceeded\n                | Reason::WrittenDataQuotaExceeded\n                | Reason::DataTransferQuotaExceeded\n                | Reason::LogicalSizeQuotaExceeded\n                | Reason::ActiveEndpointsLimitExceeded => ErrorKind::Quota,\n\n                Reason::ConcurrencyLimitReached\n                | Reason::LockAlreadyTaken\n                | Reason::RunningOperations\n                | Reason::EndpointIdle\n                | Reason::ProjectUnderMaintenance\n                | Reason::Unknown => ErrorKind::ControlPlane,\n            },\n            ControlPlaneError::Transport(_) => ErrorKind::ControlPlane,\n        }\n    }\n}\n\nimpl CouldRetry for ControlPlaneError {\n    fn could_retry(&self) -> bool {\n        match self {\n            // retry some transport errors\n            Self::Transport(io) => io.could_retry(),\n            Self::Message(e) => e.could_retry(),\n        }\n    }\n}\n\nimpl From<reqwest::Error> for ControlPlaneError {\n    fn from(e: reqwest::Error) -> Self {\n        io::Error::other(e).into()\n    }\n}\n\nimpl From<reqwest_middleware::Error> for ControlPlaneError {\n    fn from(e: reqwest_middleware::Error) -> Self {\n        io::Error::other(e).into()\n    }\n}\n\n#[derive(Debug, Error)]\npub(crate) enum GetAuthInfoError {\n    // We shouldn't include the actual secret here.\n    #[error(\"Console responded with a malformed auth secret\")]\n    BadSecret,\n\n    #[error(transparent)]\n    ApiError(ControlPlaneError),\n}\n\n// This allows more useful interactions than `#[from]`.\nimpl<E: Into<ControlPlaneError>> From<E> for GetAuthInfoError {\n    fn from(e: E) -> Self {\n        Self::ApiError(e.into())\n    }\n}\n\nimpl UserFacingError for GetAuthInfoError {\n    fn to_string_client(&self) -> String {\n        match self {\n            // We absolutely should not leak any secrets!\n            Self::BadSecret => REQUEST_FAILED.to_owned(),\n            // However, API might return a meaningful error.\n            Self::ApiError(e) => e.to_string_client(),\n        }\n    }\n}\n\nimpl ReportableError for GetAuthInfoError {\n    fn get_error_kind(&self) -> ErrorKind {\n        match self {\n            Self::BadSecret => ErrorKind::ControlPlane,\n            Self::ApiError(_) => ErrorKind::ControlPlane,\n        }\n    }\n}\n\n#[derive(Debug, Error)]\npub(crate) enum WakeComputeError {\n    #[error(\"Console responded with a malformed compute address: {0}\")]\n    BadComputeAddress(Box<str>),\n\n    #[error(transparent)]\n    ControlPlane(ControlPlaneError),\n\n    #[error(\"Too many connections attempts\")]\n    TooManyConnections,\n\n    #[error(\"error acquiring resource permit: {0}\")]\n    TooManyConnectionAttempts(#[from] ApiLockError),\n}\n\n// This allows more useful interactions than `#[from]`.\nimpl<E: Into<ControlPlaneError>> From<E> for WakeComputeError {\n    fn from(e: E) -> Self {\n        Self::ControlPlane(e.into())\n    }\n}\n\nimpl UserFacingError for WakeComputeError {\n    fn to_string_client(&self) -> String {\n        match self {\n            // We shouldn't show user the address even if it's broken.\n            // Besides, user is unlikely to care about this detail.\n            Self::BadComputeAddress(_) => REQUEST_FAILED.to_owned(),\n            // However, control plane might return a meaningful error.\n            Self::ControlPlane(e) => e.to_string_client(),\n\n            Self::TooManyConnections => self.to_string(),\n\n            Self::TooManyConnectionAttempts(_) => {\n                \"Failed to acquire permit to connect to the database. Too many database connection attempts are currently ongoing.\".to_owned()\n            }\n        }\n    }\n}\n\nimpl ReportableError for WakeComputeError {\n    fn get_error_kind(&self) -> crate::error::ErrorKind {\n        match self {\n            Self::BadComputeAddress(_) => crate::error::ErrorKind::ControlPlane,\n            Self::ControlPlane(e) => e.get_error_kind(),\n            Self::TooManyConnections => crate::error::ErrorKind::RateLimit,\n            Self::TooManyConnectionAttempts(e) => e.get_error_kind(),\n        }\n    }\n}\n\nimpl CouldRetry for WakeComputeError {\n    fn could_retry(&self) -> bool {\n        match self {\n            Self::BadComputeAddress(_) => false,\n            Self::ControlPlane(e) => e.could_retry(),\n            Self::TooManyConnections => false,\n            Self::TooManyConnectionAttempts(_) => false,\n        }\n    }\n}\n\n#[derive(Debug, Error)]\npub enum GetEndpointJwksError {\n    #[error(\"failed to build control plane request: {0}\")]\n    RequestBuild(#[source] reqwest::Error),\n\n    #[error(\"failed to send control plane request: {0}\")]\n    RequestExecute(#[source] reqwest_middleware::Error),\n\n    #[error(transparent)]\n    ControlPlane(#[from] ControlPlaneError),\n\n    #[cfg(any(test, feature = \"testing\"))]\n    #[error(transparent)]\n    TokioPostgres(#[from] tokio_postgres::Error),\n\n    #[cfg(any(test, feature = \"testing\"))]\n    #[error(transparent)]\n    ParseUrl(#[from] url::ParseError),\n\n    #[cfg(any(test, feature = \"testing\"))]\n    #[error(transparent)]\n    TaskJoin(#[from] tokio::task::JoinError),\n}\n"
  },
  {
    "path": "proxy/src/control_plane/messages.rs",
    "content": "use std::fmt::{self, Display};\nuse std::time::Duration;\n\nuse measured::FixedCardinalityLabel;\nuse serde::{Deserialize, Serialize};\nuse smol_str::SmolStr;\nuse tokio::time::Instant;\n\nuse crate::auth::IpPattern;\nuse crate::intern::{AccountIdInt, BranchIdInt, EndpointIdInt, ProjectIdInt, RoleNameInt};\nuse crate::proxy::retry::CouldRetry;\n\n/// Generic error response with human-readable description.\n/// Note that we can't always present it to user as is.\n#[derive(Debug, Deserialize, Clone)]\npub(crate) struct ControlPlaneErrorMessage {\n    pub(crate) error: Box<str>,\n    #[serde(skip)]\n    pub(crate) http_status_code: http::StatusCode,\n    pub(crate) status: Option<Status>,\n}\n\nimpl ControlPlaneErrorMessage {\n    pub(crate) fn get_reason(&self) -> Reason {\n        self.status\n            .as_ref()\n            .and_then(|s| s.details.error_info.as_ref())\n            .map_or(Reason::Unknown, |e| e.reason)\n    }\n\n    pub(crate) fn get_user_facing_message(&self) -> String {\n        use super::errors::REQUEST_FAILED;\n        self.status\n            .as_ref()\n            .and_then(|s| s.details.user_facing_message.as_ref())\n            .map_or_else(|| {\n                // Ask @neondatabase/control-plane for review before adding more.\n                match self.http_status_code {\n                    http::StatusCode::NOT_FOUND => {\n                        // Status 404: failed to get a project-related resource.\n                        format!(\"{REQUEST_FAILED}: endpoint cannot be found\")\n                    }\n                    http::StatusCode::NOT_ACCEPTABLE => {\n                        // Status 406: endpoint is disabled (we don't allow connections).\n                        format!(\"{REQUEST_FAILED}: endpoint is disabled\")\n                    }\n                    http::StatusCode::LOCKED | http::StatusCode::UNPROCESSABLE_ENTITY => {\n                        // Status 423: project might be in maintenance mode (or bad state), or quotas exceeded.\n                        format!(\"{REQUEST_FAILED}: endpoint is temporarily unavailable. Check your quotas and/or contact our support.\")\n                    }\n                    _ => REQUEST_FAILED.to_owned(),\n                }\n            }, |m| m.message.clone().into())\n    }\n}\n\nimpl Display for ControlPlaneErrorMessage {\n    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {\n        let msg: &str = self\n            .status\n            .as_ref()\n            .and_then(|s| s.details.user_facing_message.as_ref())\n            .map_or_else(|| self.error.as_ref(), |m| m.message.as_ref());\n        write!(f, \"{msg}\")\n    }\n}\n\nimpl CouldRetry for ControlPlaneErrorMessage {\n    fn could_retry(&self) -> bool {\n        // If the error message does not have a status,\n        // the error is unknown and probably should not retry automatically\n        let Some(status) = &self.status else {\n            return false;\n        };\n\n        // retry if the retry info is set.\n        if status.details.retry_info.is_some() {\n            return true;\n        }\n\n        // if no retry info set, attempt to use the error code to guess the retry state.\n        let reason = status\n            .details\n            .error_info\n            .map_or(Reason::Unknown, |e| e.reason);\n\n        reason.can_retry()\n    }\n}\n\n#[derive(Debug, Deserialize, Clone)]\n#[allow(dead_code)]\npub(crate) struct Status {\n    pub(crate) code: Box<str>,\n    pub(crate) message: Box<str>,\n    pub(crate) details: Details,\n}\n\n#[derive(Debug, Deserialize, Clone)]\npub(crate) struct Details {\n    pub(crate) error_info: Option<ErrorInfo>,\n    pub(crate) retry_info: Option<RetryInfo>,\n    pub(crate) user_facing_message: Option<UserFacingMessage>,\n}\n\n#[derive(Copy, Clone, Debug, Deserialize)]\npub(crate) struct ErrorInfo {\n    pub(crate) reason: Reason,\n    // Schema could also have `metadata` field, but it's not structured. Skip it for now.\n}\n\n#[derive(Clone, Copy, Debug, Deserialize, Default, PartialEq, Eq)]\npub(crate) enum Reason {\n    /// RoleProtected indicates that the role is protected and the attempted operation is not permitted on protected roles.\n    #[serde(rename = \"ROLE_PROTECTED\")]\n    RoleProtected,\n    /// ResourceNotFound indicates that a resource (project, endpoint, branch, etc.) wasn't found,\n    /// usually due to the provided ID not being correct or because the subject doesn't have enough permissions to\n    /// access the requested resource.\n    /// Prefer a more specific reason if possible, e.g., ProjectNotFound, EndpointNotFound, etc.\n    #[serde(rename = \"RESOURCE_NOT_FOUND\")]\n    ResourceNotFound,\n    /// ProjectNotFound indicates that the project wasn't found, usually due to the provided ID not being correct,\n    /// or that the subject doesn't have enough permissions to access the requested project.\n    #[serde(rename = \"PROJECT_NOT_FOUND\")]\n    ProjectNotFound,\n    /// EndpointNotFound indicates that the endpoint wasn't found, usually due to the provided ID not being correct,\n    /// or that the subject doesn't have enough permissions to access the requested endpoint.\n    #[serde(rename = \"ENDPOINT_NOT_FOUND\")]\n    EndpointNotFound,\n    /// EndpointDisabled indicates that the endpoint has been disabled and does not accept connections.\n    #[serde(rename = \"ENDPOINT_DISABLED\")]\n    EndpointDisabled,\n    /// BranchNotFound indicates that the branch wasn't found, usually due to the provided ID not being correct,\n    /// or that the subject doesn't have enough permissions to access the requested branch.\n    #[serde(rename = \"BRANCH_NOT_FOUND\")]\n    BranchNotFound,\n    /// WrongLsnOrTimestamp indicates that the specified LSN or timestamp are wrong.\n    #[serde(rename = \"WRONG_LSN_OR_TIMESTAMP\")]\n    WrongLsnOrTimestamp,\n    /// RateLimitExceeded indicates that the rate limit for the operation has been exceeded.\n    #[serde(rename = \"RATE_LIMIT_EXCEEDED\")]\n    RateLimitExceeded,\n    /// NonDefaultBranchComputeTimeExceeded indicates that the compute time quota of non-default branches has been\n    /// exceeded.\n    #[serde(rename = \"NON_PRIMARY_BRANCH_COMPUTE_TIME_EXCEEDED\")]\n    NonDefaultBranchComputeTimeExceeded,\n    /// ActiveTimeQuotaExceeded indicates that the active time quota was exceeded.\n    #[serde(rename = \"ACTIVE_TIME_QUOTA_EXCEEDED\")]\n    ActiveTimeQuotaExceeded,\n    /// ComputeTimeQuotaExceeded indicates that the compute time quota was exceeded.\n    #[serde(rename = \"COMPUTE_TIME_QUOTA_EXCEEDED\")]\n    ComputeTimeQuotaExceeded,\n    /// WrittenDataQuotaExceeded indicates that the written data quota was exceeded.\n    #[serde(rename = \"WRITTEN_DATA_QUOTA_EXCEEDED\")]\n    WrittenDataQuotaExceeded,\n    /// DataTransferQuotaExceeded indicates that the data transfer quota was exceeded.\n    #[serde(rename = \"DATA_TRANSFER_QUOTA_EXCEEDED\")]\n    DataTransferQuotaExceeded,\n    /// LogicalSizeQuotaExceeded indicates that the logical size quota was exceeded.\n    #[serde(rename = \"LOGICAL_SIZE_QUOTA_EXCEEDED\")]\n    LogicalSizeQuotaExceeded,\n    /// ActiveEndpointsLimitExceeded indicates that the limit of concurrently active endpoints was exceeded.\n    #[serde(rename = \"ACTIVE_ENDPOINTS_LIMIT_EXCEEDED\")]\n    ActiveEndpointsLimitExceeded,\n    /// RunningOperations indicates that the project already has some running operations\n    /// and scheduling of new ones is prohibited.\n    #[serde(rename = \"RUNNING_OPERATIONS\")]\n    RunningOperations,\n    /// ConcurrencyLimitReached indicates that the concurrency limit for an action was reached.\n    #[serde(rename = \"CONCURRENCY_LIMIT_REACHED\")]\n    ConcurrencyLimitReached,\n    /// LockAlreadyTaken indicates that the we attempted to take a lock that was already taken.\n    #[serde(rename = \"LOCK_ALREADY_TAKEN\")]\n    LockAlreadyTaken,\n    /// EndpointIdle indicates that the endpoint cannot become active, because it's idle.\n    #[serde(rename = \"ENDPOINT_IDLE\")]\n    EndpointIdle,\n    /// ProjectUnderMaintenance indicates that the project is currently ongoing maintenance,\n    /// and thus cannot accept connections.\n    #[serde(rename = \"PROJECT_UNDER_MAINTENANCE\")]\n    ProjectUnderMaintenance,\n    #[default]\n    #[serde(other)]\n    Unknown,\n}\n\nimpl Reason {\n    pub(crate) fn is_not_found(self) -> bool {\n        matches!(\n            self,\n            Reason::ResourceNotFound\n                | Reason::ProjectNotFound\n                | Reason::EndpointNotFound\n                | Reason::BranchNotFound\n        )\n    }\n\n    pub(crate) fn can_retry(self) -> bool {\n        match self {\n            // do not retry role protected errors\n            // not a transient error\n            Reason::RoleProtected => false,\n            // on retry, it will still not be found or valid\n            Reason::ResourceNotFound\n            | Reason::ProjectNotFound\n            | Reason::EndpointNotFound\n            | Reason::EndpointDisabled\n            | Reason::BranchNotFound\n            | Reason::WrongLsnOrTimestamp => false,\n            // we were asked to go away\n            Reason::RateLimitExceeded\n            | Reason::NonDefaultBranchComputeTimeExceeded\n            | Reason::ActiveTimeQuotaExceeded\n            | Reason::ComputeTimeQuotaExceeded\n            | Reason::WrittenDataQuotaExceeded\n            | Reason::DataTransferQuotaExceeded\n            | Reason::LogicalSizeQuotaExceeded\n            | Reason::ActiveEndpointsLimitExceeded => false,\n            // transient error. control plane is currently busy\n            // but might be ready soon\n            Reason::RunningOperations\n            | Reason::ConcurrencyLimitReached\n            | Reason::LockAlreadyTaken\n            | Reason::EndpointIdle\n            | Reason::ProjectUnderMaintenance => true,\n            // unknown error. better not retry it.\n            Reason::Unknown => false,\n        }\n    }\n}\n\n#[derive(Copy, Clone, Debug, Deserialize)]\n#[allow(dead_code)]\npub(crate) struct RetryInfo {\n    #[serde(rename = \"retry_delay_ms\", deserialize_with = \"milliseconds_from_now\")]\n    pub(crate) retry_at: Instant,\n}\n\nfn milliseconds_from_now<'de, D: serde::Deserializer<'de>>(d: D) -> Result<Instant, D::Error> {\n    let millis = u64::deserialize(d)?;\n    Ok(Instant::now() + Duration::from_millis(millis))\n}\n\n#[derive(Debug, Deserialize, Clone)]\npub(crate) struct UserFacingMessage {\n    pub(crate) message: Box<str>,\n}\n\n/// Response which holds client's auth secret, e.g. [`crate::scram::ServerSecret`].\n/// Returned by the `/get_endpoint_access_control` API method.\n#[derive(Deserialize)]\npub(crate) struct GetEndpointAccessControl {\n    pub(crate) role_secret: Box<str>,\n\n    pub(crate) project_id: Option<ProjectIdInt>,\n    pub(crate) account_id: Option<AccountIdInt>,\n\n    pub(crate) allowed_ips: Option<Vec<IpPattern>>,\n    pub(crate) allowed_vpc_endpoint_ids: Option<Vec<String>>,\n    pub(crate) block_public_connections: Option<bool>,\n    pub(crate) block_vpc_connections: Option<bool>,\n\n    #[serde(default)]\n    pub(crate) rate_limits: EndpointRateLimitConfig,\n}\n\n#[derive(Copy, Clone, Deserialize, Default, Debug)]\npub struct EndpointRateLimitConfig {\n    pub connection_attempts: ConnectionAttemptsLimit,\n}\n\n#[derive(Copy, Clone, Deserialize, Default, Debug)]\npub struct ConnectionAttemptsLimit {\n    pub tcp: Option<LeakyBucketSetting>,\n    pub ws: Option<LeakyBucketSetting>,\n    pub http: Option<LeakyBucketSetting>,\n}\n\n#[derive(Copy, Clone, Deserialize, Debug)]\npub struct LeakyBucketSetting {\n    pub rps: f64,\n    pub burst: f64,\n}\n\n/// Response which holds compute node's `host:port` pair.\n/// Returned by the `/proxy_wake_compute` API method.\n#[derive(Debug, Deserialize)]\npub(crate) struct WakeCompute {\n    pub(crate) address: Box<str>,\n    pub(crate) server_name: Option<String>,\n    pub(crate) aux: MetricsAuxInfo,\n}\n\n/// Async response which concludes the console redirect auth flow.\n/// Also known as `kickResponse` in the console.\n#[derive(Debug, Deserialize)]\npub(crate) struct KickSession<'a> {\n    /// Session ID is assigned by the proxy.\n    pub(crate) session_id: &'a str,\n\n    /// Compute node connection params.\n    #[serde(deserialize_with = \"KickSession::parse_db_info\")]\n    pub(crate) result: DatabaseInfo,\n}\n\nimpl KickSession<'_> {\n    fn parse_db_info<'de, D>(des: D) -> Result<DatabaseInfo, D::Error>\n    where\n        D: serde::Deserializer<'de>,\n    {\n        #[derive(Deserialize)]\n        enum Wrapper {\n            // Currently, console only reports `Success`.\n            // `Failure(String)` used to be here... RIP.\n            Success(DatabaseInfo),\n        }\n\n        Wrapper::deserialize(des).map(|x| match x {\n            Wrapper::Success(info) => info,\n        })\n    }\n}\n\n/// Compute node connection params.\n#[derive(Deserialize)]\npub(crate) struct DatabaseInfo {\n    pub(crate) host: Box<str>,\n    pub(crate) port: u16,\n    pub(crate) dbname: Box<str>,\n    pub(crate) user: Box<str>,\n    /// Console always provides a password, but it might\n    /// be inconvenient for debug with local PG instance.\n    pub(crate) password: Option<Box<str>>,\n    pub(crate) aux: MetricsAuxInfo,\n    #[serde(default)]\n    pub(crate) allowed_ips: Option<Vec<IpPattern>>,\n    #[serde(default)]\n    pub(crate) allowed_vpc_endpoint_ids: Option<Vec<String>>,\n    #[serde(default)]\n    pub(crate) public_access_allowed: Option<bool>,\n}\n\n// Manually implement debug to omit sensitive info.\nimpl fmt::Debug for DatabaseInfo {\n    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {\n        f.debug_struct(\"DatabaseInfo\")\n            .field(\"host\", &self.host)\n            .field(\"port\", &self.port)\n            .field(\"dbname\", &self.dbname)\n            .field(\"user\", &self.user)\n            .field(\"allowed_ips\", &self.allowed_ips)\n            .field(\"allowed_vpc_endpoint_ids\", &self.allowed_vpc_endpoint_ids)\n            .finish_non_exhaustive()\n    }\n}\n\n/// Various labels for prometheus metrics.\n/// Also known as `ProxyMetricsAuxInfo` in the console.\n#[derive(Debug, Deserialize, Clone)]\npub(crate) struct MetricsAuxInfo {\n    pub(crate) endpoint_id: EndpointIdInt,\n    pub(crate) project_id: ProjectIdInt,\n    pub(crate) branch_id: BranchIdInt,\n    // note: we don't use interned strings for compute IDs.\n    // they churn too quickly and we have no way to clean up interned strings.\n    pub(crate) compute_id: SmolStr,\n    #[serde(default)]\n    pub(crate) cold_start_info: ColdStartInfo,\n}\n\n#[derive(Debug, Default, Serialize, Deserialize, Clone, Copy, FixedCardinalityLabel)]\n#[serde(rename_all = \"snake_case\")]\npub enum ColdStartInfo {\n    #[default]\n    Unknown,\n    /// Compute was already running\n    Warm,\n    #[serde(rename = \"pool_hit\")]\n    #[label(rename = \"pool_hit\")]\n    /// Compute was not running but there was an available VM\n    VmPoolHit,\n    #[serde(rename = \"pool_miss\")]\n    #[label(rename = \"pool_miss\")]\n    /// Compute was not running and there were no VMs available\n    VmPoolMiss,\n\n    // not provided by control plane\n    /// Connection available from HTTP pool\n    HttpPoolHit,\n    /// Cached connection info\n    WarmCached,\n}\n\nimpl ColdStartInfo {\n    pub(crate) fn as_str(self) -> &'static str {\n        match self {\n            ColdStartInfo::Unknown => \"unknown\",\n            ColdStartInfo::Warm => \"warm\",\n            ColdStartInfo::VmPoolHit => \"pool_hit\",\n            ColdStartInfo::VmPoolMiss => \"pool_miss\",\n            ColdStartInfo::HttpPoolHit => \"http_pool_hit\",\n            ColdStartInfo::WarmCached => \"warm_cached\",\n        }\n    }\n}\n\n#[derive(Debug, Deserialize, Clone)]\npub struct EndpointJwksResponse {\n    pub jwks: Vec<JwksSettings>,\n}\n\n#[derive(Debug, Deserialize, Clone)]\npub struct JwksSettings {\n    pub id: String,\n    pub jwks_url: url::Url,\n    #[serde(rename = \"provider_name\")]\n    pub _provider_name: String,\n    pub jwt_audience: Option<String>,\n    pub role_names: Vec<RoleNameInt>,\n}\n\n#[cfg(test)]\nmod tests {\n    use serde_json::json;\n\n    use super::*;\n\n    fn dummy_aux() -> serde_json::Value {\n        json!({\n            \"endpoint_id\": \"endpoint\",\n            \"project_id\": \"project\",\n            \"branch_id\": \"branch\",\n            \"compute_id\": \"compute\",\n            \"cold_start_info\": \"unknown\",\n        })\n    }\n\n    #[test]\n    fn parse_kick_session() -> anyhow::Result<()> {\n        // This is what the console's kickResponse looks like.\n        let json = json!({\n            \"session_id\": \"deadbeef\",\n            \"result\": {\n                \"Success\": {\n                    \"host\": \"localhost\",\n                    \"port\": 5432,\n                    \"dbname\": \"postgres\",\n                    \"user\": \"john_doe\",\n                    \"password\": \"password\",\n                    \"aux\": dummy_aux(),\n                }\n            }\n        });\n        serde_json::from_str::<KickSession<'_>>(&json.to_string())?;\n\n        Ok(())\n    }\n\n    #[test]\n    fn parse_db_info() -> anyhow::Result<()> {\n        // with password\n        serde_json::from_value::<DatabaseInfo>(json!({\n            \"host\": \"localhost\",\n            \"port\": 5432,\n            \"dbname\": \"postgres\",\n            \"user\": \"john_doe\",\n            \"password\": \"password\",\n            \"aux\": dummy_aux(),\n        }))?;\n\n        // without password\n        serde_json::from_value::<DatabaseInfo>(json!({\n            \"host\": \"localhost\",\n            \"port\": 5432,\n            \"dbname\": \"postgres\",\n            \"user\": \"john_doe\",\n            \"aux\": dummy_aux(),\n        }))?;\n\n        // new field (forward compatibility)\n        serde_json::from_value::<DatabaseInfo>(json!({\n            \"host\": \"localhost\",\n            \"port\": 5432,\n            \"dbname\": \"postgres\",\n            \"user\": \"john_doe\",\n            \"project\": \"hello_world\",\n            \"N.E.W\": \"forward compatibility check\",\n            \"aux\": dummy_aux(),\n        }))?;\n\n        // with allowed_ips\n        let dbinfo = serde_json::from_value::<DatabaseInfo>(json!({\n            \"host\": \"localhost\",\n            \"port\": 5432,\n            \"dbname\": \"postgres\",\n            \"user\": \"john_doe\",\n            \"password\": \"password\",\n            \"aux\": dummy_aux(),\n            \"allowed_ips\": [\"127.0.0.1\"],\n        }))?;\n\n        assert_eq!(\n            dbinfo.allowed_ips,\n            Some(vec![IpPattern::Single(\"127.0.0.1\".parse()?)])\n        );\n\n        Ok(())\n    }\n\n    #[test]\n    fn parse_wake_compute() -> anyhow::Result<()> {\n        let json = json!({\n            \"address\": \"0.0.0.0\",\n            \"aux\": dummy_aux(),\n        });\n        serde_json::from_str::<WakeCompute>(&json.to_string())?;\n        Ok(())\n    }\n\n    #[test]\n    fn parse_get_role_secret() -> anyhow::Result<()> {\n        // Empty `allowed_ips` and `allowed_vpc_endpoint_ids` field.\n        let json = json!({\n            \"role_secret\": \"secret\",\n        });\n        serde_json::from_str::<GetEndpointAccessControl>(&json.to_string())?;\n        let json = json!({\n            \"role_secret\": \"secret\",\n            \"allowed_ips\": [\"8.8.8.8\"],\n        });\n        serde_json::from_str::<GetEndpointAccessControl>(&json.to_string())?;\n        let json = json!({\n            \"role_secret\": \"secret\",\n            \"allowed_vpc_endpoint_ids\": [\"vpce-0abcd1234567890ef\"],\n        });\n        serde_json::from_str::<GetEndpointAccessControl>(&json.to_string())?;\n        let json = json!({\n            \"role_secret\": \"secret\",\n            \"allowed_ips\": [\"8.8.8.8\"],\n            \"allowed_vpc_endpoint_ids\": [\"vpce-0abcd1234567890ef\"],\n        });\n        serde_json::from_str::<GetEndpointAccessControl>(&json.to_string())?;\n        let json = json!({\n            \"role_secret\": \"secret\",\n            \"allowed_ips\": [\"8.8.8.8\"],\n            \"allowed_vpc_endpoint_ids\": [\"vpce-0abcd1234567890ef\"],\n            \"project_id\": \"project\",\n        });\n        serde_json::from_str::<GetEndpointAccessControl>(&json.to_string())?;\n\n        Ok(())\n    }\n}\n"
  },
  {
    "path": "proxy/src/control_plane/mgmt.rs",
    "content": "use std::convert::Infallible;\n\nuse anyhow::Context;\nuse once_cell::sync::Lazy;\nuse postgres_backend::{AuthType, PostgresBackend, PostgresBackendTCP, QueryError};\nuse pq_proto::{BeMessage, SINGLE_COL_ROWDESC};\nuse tokio::net::{TcpListener, TcpStream};\nuse tokio_util::sync::CancellationToken;\nuse tracing::{Instrument, error, info, info_span};\n\nuse crate::control_plane::messages::{DatabaseInfo, KickSession};\nuse crate::waiters::{self, Waiter, Waiters};\n\nstatic CPLANE_WAITERS: Lazy<Waiters<ComputeReady>> = Lazy::new(Default::default);\n\n/// Give caller an opportunity to wait for the cloud's reply.\npub(crate) fn get_waiter(\n    psql_session_id: impl Into<String>,\n) -> Result<Waiter<'static, ComputeReady>, waiters::RegisterError> {\n    CPLANE_WAITERS.register(psql_session_id.into())\n}\n\npub(crate) fn notify(psql_session_id: &str, msg: ComputeReady) -> Result<(), waiters::NotifyError> {\n    CPLANE_WAITERS.notify(psql_session_id, msg)\n}\n\n/// Management API listener task.\n/// It spawns management response handlers needed for the console redirect auth flow.\npub async fn task_main(listener: TcpListener) -> anyhow::Result<Infallible> {\n    scopeguard::defer! {\n        info!(\"mgmt has shut down\");\n    }\n\n    loop {\n        let (socket, peer_addr) = listener.accept().await?;\n        info!(\"accepted connection from {peer_addr}\");\n\n        socket\n            .set_nodelay(true)\n            .context(\"failed to set client socket option\")?;\n\n        let span = info_span!(\"mgmt\", peer = %peer_addr);\n\n        tokio::task::spawn(\n            async move {\n                info!(\"serving a new management API connection\");\n\n                // these might be long running connections, have a separate logging for cancelling\n                // on shutdown and other ways of stopping.\n                let cancelled = scopeguard::guard(tracing::Span::current(), |span| {\n                    let _e = span.entered();\n                    info!(\"management API task cancelled\");\n                });\n\n                if let Err(e) = handle_connection(socket).await {\n                    error!(\"serving failed with an error: {e}\");\n                } else {\n                    info!(\"serving completed\");\n                }\n\n                // we can no longer get dropped\n                scopeguard::ScopeGuard::into_inner(cancelled);\n            }\n            .instrument(span),\n        );\n    }\n}\n\nasync fn handle_connection(socket: TcpStream) -> Result<(), QueryError> {\n    let pgbackend = PostgresBackend::new(socket, AuthType::Trust, None)?;\n    pgbackend\n        .run(&mut MgmtHandler, &CancellationToken::new())\n        .await\n}\n\n/// A message received by `mgmt` when a compute node is ready.\npub(crate) type ComputeReady = DatabaseInfo;\n\n// TODO: replace with an http-based protocol.\nstruct MgmtHandler;\n\nimpl postgres_backend::Handler<tokio::net::TcpStream> for MgmtHandler {\n    async fn process_query(\n        &mut self,\n        pgb: &mut PostgresBackendTCP,\n        query: &str,\n    ) -> Result<(), QueryError> {\n        try_process_query(pgb, query).map_err(|e| {\n            error!(\"failed to process response: {e:?}\");\n            e\n        })\n    }\n}\n\nfn try_process_query(pgb: &mut PostgresBackendTCP, query: &str) -> Result<(), QueryError> {\n    let resp: KickSession<'_> =\n        serde_json::from_str(query).context(\"Failed to parse query as json\")?;\n\n    let span = info_span!(\"event\", session_id = resp.session_id);\n    let _enter = span.enter();\n    info!(\"got response: {:?}\", resp.result);\n\n    match notify(resp.session_id, resp.result) {\n        Ok(()) => {\n            pgb.write_message_noflush(&SINGLE_COL_ROWDESC)?\n                .write_message_noflush(&BeMessage::DataRow(&[Some(b\"ok\")]))?\n                .write_message_noflush(&BeMessage::CommandComplete(b\"SELECT 1\"))?;\n        }\n        Err(e) => {\n            error!(\"failed to deliver response to per-client task\");\n            pgb.write_message_noflush(&BeMessage::ErrorResponse(&e.to_string(), None))?;\n        }\n    }\n\n    Ok(())\n}\n"
  },
  {
    "path": "proxy/src/control_plane/mod.rs",
    "content": "//! Various stuff for dealing with the Neon Console.\n//! Later we might move some API wrappers here.\n\n/// Payloads used in the console's APIs.\npub mod messages;\n\n/// Wrappers for console APIs and their mocks.\npub mod client;\n\npub(crate) mod errors;\n\nuse std::sync::Arc;\n\nuse messages::EndpointRateLimitConfig;\n\nuse crate::auth::backend::ComputeUserInfo;\nuse crate::auth::backend::jwt::AuthRule;\nuse crate::auth::{AuthError, IpPattern, check_peer_addr_is_in_list};\nuse crate::cache::node_info::CachedNodeInfo;\nuse crate::context::RequestContext;\nuse crate::control_plane::messages::MetricsAuxInfo;\nuse crate::intern::{AccountIdInt, EndpointIdInt, ProjectIdInt};\nuse crate::protocol2::ConnectionInfoExtra;\nuse crate::rate_limiter::{EndpointRateLimiter, LeakyBucketConfig};\nuse crate::types::{EndpointId, RoleName};\nuse crate::{compute, scram};\n\n/// Various cache-related types.\npub mod caches {\n    pub use super::client::ApiCaches;\n}\n\n/// Various cache-related types.\npub mod locks {\n    pub use super::client::ApiLocks;\n}\n\n/// Console's management API.\npub mod mgmt;\n\n/// Auth secret which is managed by the cloud.\n#[derive(Clone, Eq, PartialEq, Debug)]\npub(crate) enum AuthSecret {\n    /// [SCRAM](crate::scram) authentication info.\n    Scram(scram::ServerSecret),\n}\n\n#[derive(Default)]\npub(crate) struct AuthInfo {\n    pub(crate) secret: Option<AuthSecret>,\n    /// List of IP addresses allowed for the autorization.\n    pub(crate) allowed_ips: Vec<IpPattern>,\n    /// List of VPC endpoints allowed for the autorization.\n    pub(crate) allowed_vpc_endpoint_ids: Vec<String>,\n    /// Project ID. This is used for cache invalidation.\n    pub(crate) project_id: Option<ProjectIdInt>,\n    /// Account ID. This is used for cache invalidation.\n    pub(crate) account_id: Option<AccountIdInt>,\n    /// Are public connections or VPC connections blocked?\n    pub(crate) access_blocker_flags: AccessBlockerFlags,\n    /// The rate limits for this endpoint.\n    pub(crate) rate_limits: EndpointRateLimitConfig,\n}\n\n/// Info for establishing a connection to a compute node.\n#[derive(Clone)]\npub(crate) struct NodeInfo {\n    pub(crate) conn_info: compute::ConnectInfo,\n\n    /// Labels for proxy's metrics.\n    pub(crate) aux: MetricsAuxInfo,\n}\n\n#[derive(Copy, Clone, Default, Debug)]\npub(crate) struct AccessBlockerFlags {\n    pub public_access_blocked: bool,\n    pub vpc_access_blocked: bool,\n}\n\n#[derive(Clone, Debug)]\npub struct RoleAccessControl {\n    pub secret: Option<AuthSecret>,\n}\n\n#[derive(Clone, Debug)]\npub struct EndpointAccessControl {\n    pub allowed_ips: Arc<Vec<IpPattern>>,\n    pub allowed_vpce: Arc<Vec<String>>,\n    pub flags: AccessBlockerFlags,\n\n    pub rate_limits: EndpointRateLimitConfig,\n}\n\nimpl EndpointAccessControl {\n    pub fn check(\n        &self,\n        ctx: &RequestContext,\n        check_ip_allowed: bool,\n        check_vpc_allowed: bool,\n    ) -> Result<(), AuthError> {\n        if check_ip_allowed && !check_peer_addr_is_in_list(&ctx.peer_addr(), &self.allowed_ips) {\n            return Err(AuthError::IpAddressNotAllowed(ctx.peer_addr()));\n        }\n\n        // check if a VPC endpoint ID is coming in and if yes, if it's allowed\n        if check_vpc_allowed {\n            if self.flags.vpc_access_blocked {\n                return Err(AuthError::NetworkNotAllowed);\n            }\n\n            let incoming_vpc_endpoint_id = match ctx.extra() {\n                None => return Err(AuthError::MissingVPCEndpointId),\n                Some(ConnectionInfoExtra::Aws { vpce_id }) => vpce_id.to_string(),\n                Some(ConnectionInfoExtra::Azure { link_id }) => link_id.to_string(),\n            };\n\n            let vpce = &self.allowed_vpce;\n            // TODO: For now an empty VPC endpoint ID list means all are allowed. We should replace that.\n            if !vpce.is_empty() && !vpce.contains(&incoming_vpc_endpoint_id) {\n                return Err(AuthError::vpc_endpoint_id_not_allowed(\n                    incoming_vpc_endpoint_id,\n                ));\n            }\n        } else if self.flags.public_access_blocked {\n            return Err(AuthError::NetworkNotAllowed);\n        }\n\n        Ok(())\n    }\n\n    pub fn connection_attempt_rate_limit(\n        &self,\n        ctx: &RequestContext,\n        endpoint: &EndpointId,\n        rate_limiter: &EndpointRateLimiter,\n    ) -> Result<(), AuthError> {\n        let endpoint = EndpointIdInt::from(endpoint);\n\n        let limits = &self.rate_limits.connection_attempts;\n        let config = match ctx.protocol() {\n            crate::metrics::Protocol::Http => limits.http,\n            crate::metrics::Protocol::Ws => limits.ws,\n            crate::metrics::Protocol::Tcp => limits.tcp,\n            crate::metrics::Protocol::SniRouter => return Ok(()),\n        };\n        let config = config.and_then(|config| {\n            if config.rps <= 0.0 || config.burst <= 0.0 {\n                return None;\n            }\n\n            Some(LeakyBucketConfig::new(config.rps, config.burst))\n        });\n\n        if !rate_limiter.check(endpoint, config, 1) {\n            return Err(AuthError::too_many_connections());\n        }\n\n        Ok(())\n    }\n}\n\n/// This will allocate per each call, but the http requests alone\n/// already require a few allocations, so it should be fine.\npub(crate) trait ControlPlaneApi {\n    async fn get_role_access_control(\n        &self,\n        ctx: &RequestContext,\n        endpoint: &EndpointId,\n        role: &RoleName,\n    ) -> Result<RoleAccessControl, errors::GetAuthInfoError>;\n\n    async fn get_endpoint_access_control(\n        &self,\n        ctx: &RequestContext,\n        endpoint: &EndpointId,\n        role: &RoleName,\n    ) -> Result<EndpointAccessControl, errors::GetAuthInfoError>;\n\n    async fn get_endpoint_jwks(\n        &self,\n        ctx: &RequestContext,\n        endpoint: &EndpointId,\n    ) -> Result<Vec<AuthRule>, errors::GetEndpointJwksError>;\n\n    /// Wake up the compute node and return the corresponding connection info.\n    async fn wake_compute(\n        &self,\n        ctx: &RequestContext,\n        user_info: &ComputeUserInfo,\n    ) -> Result<CachedNodeInfo, errors::WakeComputeError>;\n}\n"
  },
  {
    "path": "proxy/src/error.rs",
    "content": "use std::fmt;\n\nuse anyhow::Context;\nuse measured::FixedCardinalityLabel;\nuse tokio::task::JoinError;\n\n/// Marks errors that may be safely shown to a client.\n/// This trait can be seen as a specialized version of [`ToString`].\n///\n/// NOTE: This trait should not be implemented for [`anyhow::Error`], since it\n/// is way too convenient and tends to proliferate all across the codebase,\n/// ultimately leading to accidental leaks of sensitive data.\npub(crate) trait UserFacingError: ReportableError {\n    /// Format the error for client, stripping all sensitive info.\n    ///\n    /// Although this might be a no-op for many types, it's highly\n    /// recommended to override the default impl in case error type\n    /// contains anything sensitive: various IDs, IP addresses etc.\n    #[inline(always)]\n    fn to_string_client(&self) -> String {\n        self.to_string()\n    }\n}\n\n#[derive(Copy, Clone, Debug, Eq, PartialEq, FixedCardinalityLabel)]\n#[label(singleton = \"type\")]\npub enum ErrorKind {\n    /// Wrong password, unknown endpoint, protocol violation, etc...\n    User,\n\n    /// Network error between user and proxy. Not necessarily user error\n    #[label(rename = \"clientdisconnect\")]\n    ClientDisconnect,\n\n    /// Proxy self-imposed user rate limits\n    #[label(rename = \"ratelimit\")]\n    RateLimit,\n\n    /// Proxy self-imposed service-wise rate limits\n    #[label(rename = \"serviceratelimit\")]\n    ServiceRateLimit,\n\n    /// Proxy quota limit violation\n    #[label(rename = \"quota\")]\n    Quota,\n\n    /// internal errors\n    Service,\n\n    /// Error communicating with control plane\n    #[label(rename = \"controlplane\")]\n    ControlPlane,\n\n    /// Postgres error\n    Postgres,\n\n    /// Error communicating with compute\n    Compute,\n}\n\nimpl ErrorKind {\n    pub(crate) fn to_metric_label(self) -> &'static str {\n        match self {\n            ErrorKind::User => \"user\",\n            ErrorKind::ClientDisconnect => \"clientdisconnect\",\n            ErrorKind::RateLimit => \"ratelimit\",\n            ErrorKind::ServiceRateLimit => \"serviceratelimit\",\n            ErrorKind::Quota => \"quota\",\n            ErrorKind::Service => \"service\",\n            ErrorKind::ControlPlane => \"controlplane\",\n            ErrorKind::Postgres => \"postgres\",\n            ErrorKind::Compute => \"compute\",\n        }\n    }\n}\n\npub(crate) trait ReportableError: fmt::Display + Send + 'static {\n    fn get_error_kind(&self) -> ErrorKind;\n}\n\n/// Flattens `Result<Result<T>>` into `Result<T>`.\npub fn flatten_err<T>(r: Result<anyhow::Result<T>, JoinError>) -> anyhow::Result<T> {\n    r.context(\"join error\").and_then(|x| x)\n}\n"
  },
  {
    "path": "proxy/src/ext.rs",
    "content": "use std::panic::resume_unwind;\nuse std::sync::{Mutex, MutexGuard};\n\nuse tokio::task::JoinError;\n\npub(crate) trait LockExt<T> {\n    fn lock_propagate_poison(&self) -> MutexGuard<'_, T>;\n}\n\nimpl<T> LockExt<T> for Mutex<T> {\n    /// Lock the mutex and panic if the mutex was poisoned.\n    #[track_caller]\n    fn lock_propagate_poison(&self) -> MutexGuard<'_, T> {\n        match self.lock() {\n            Ok(guard) => guard,\n            // poison occurs when another thread panicked while holding the lock guard.\n            // since panicking is often unrecoverable, propagating the poison panic is reasonable.\n            Err(poison) => panic!(\"{poison}\"),\n        }\n    }\n}\n\npub(crate) trait TaskExt<T> {\n    fn propagate_task_panic(self) -> T;\n}\n\nimpl<T> TaskExt<T> for Result<T, JoinError> {\n    /// Unwrap the result and panic if the inner task panicked.\n    /// Also panics if the task was cancelled\n    #[track_caller]\n    fn propagate_task_panic(self) -> T {\n        match self {\n            Ok(t) => t,\n            // Using resume_unwind prevents the panic hook being called twice.\n            // Since we use this for structured concurrency, there is only\n            // 1 logical panic, so this is more correct.\n            Err(e) if e.is_panic() => resume_unwind(e.into_panic()),\n            Err(e) => panic!(\"unexpected task error: {e}\"),\n        }\n    }\n}\n"
  },
  {
    "path": "proxy/src/http/health_server.rs",
    "content": "use std::convert::Infallible;\nuse std::net::TcpListener;\nuse std::sync::{Arc, Mutex};\n\nuse anyhow::{anyhow, bail};\nuse http_utils::endpoint::{self, profile_cpu_handler, profile_heap_handler, request_span};\nuse http_utils::error::ApiError;\nuse http_utils::json::json_response;\nuse http_utils::{RouterBuilder, RouterService};\nuse hyper0::header::CONTENT_TYPE;\nuse hyper0::{Body, Request, Response, StatusCode};\nuse measured::MetricGroup;\nuse measured::text::BufferedTextEncoder;\nuse metrics::NeonMetrics;\nuse tracing::{info, info_span};\n\nuse crate::ext::{LockExt, TaskExt};\nuse crate::jemalloc;\n\nasync fn status_handler(_: Request<Body>) -> Result<Response<Body>, ApiError> {\n    json_response(StatusCode::OK, \"\")\n}\n\nfn make_router(metrics: AppMetrics) -> RouterBuilder<hyper0::Body, ApiError> {\n    let state = Arc::new(Mutex::new(PrometheusHandler {\n        encoder: BufferedTextEncoder::new(),\n        metrics,\n    }));\n\n    endpoint::make_router()\n        .get(\"/metrics\", move |r| {\n            let state = state.clone();\n            request_span(r, move |b| prometheus_metrics_handler(b, state))\n        })\n        .get(\"/v1/status\", status_handler)\n        .get(\"/profile/cpu\", move |r| {\n            request_span(r, profile_cpu_handler)\n        })\n        .get(\"/profile/heap\", move |r| {\n            request_span(r, profile_heap_handler)\n        })\n}\n\npub async fn task_main(\n    http_listener: TcpListener,\n    metrics: AppMetrics,\n) -> anyhow::Result<Infallible> {\n    scopeguard::defer! {\n        info!(\"http has shut down\");\n    }\n\n    let service = || RouterService::new(make_router(metrics).build()?);\n\n    hyper0::Server::from_tcp(http_listener)?\n        .serve(service().map_err(|e| anyhow!(e))?)\n        .await?;\n\n    bail!(\"hyper server without shutdown handling cannot shutdown successfully\");\n}\n\nstruct PrometheusHandler {\n    encoder: BufferedTextEncoder,\n    metrics: AppMetrics,\n}\n\n#[derive(MetricGroup)]\npub struct AppMetrics {\n    #[metric(namespace = \"jemalloc\")]\n    pub jemalloc: Option<jemalloc::MetricRecorder>,\n    #[metric(flatten)]\n    pub neon_metrics: NeonMetrics,\n    #[metric(flatten)]\n    pub proxy: &'static crate::metrics::Metrics,\n}\n\nasync fn prometheus_metrics_handler(\n    _req: Request<Body>,\n    state: Arc<Mutex<PrometheusHandler>>,\n) -> Result<Response<Body>, ApiError> {\n    let started_at = std::time::Instant::now();\n\n    let span = info_span!(\"blocking\");\n    let body = tokio::task::spawn_blocking(move || {\n        let _span = span.entered();\n\n        let mut state = state.lock_propagate_poison();\n        let PrometheusHandler { encoder, metrics } = &mut *state;\n\n        metrics\n            .collect_group_into(&mut *encoder)\n            .unwrap_or_else(|infallible| match infallible {});\n\n        let body = encoder.finish();\n\n        tracing::info!(\n            bytes = body.len(),\n            elapsed_ms = started_at.elapsed().as_millis(),\n            \"responded /metrics\"\n        );\n\n        body\n    })\n    .await\n    .propagate_task_panic();\n\n    let response = Response::builder()\n        .status(200)\n        .header(CONTENT_TYPE, \"text/plain; version=0.0.4\")\n        .body(Body::from(body))\n        .expect(\"response headers should be valid\");\n\n    Ok(response)\n}\n"
  },
  {
    "path": "proxy/src/http/mod.rs",
    "content": "//! HTTP client and server impls.\n//! Other modules should use stuff from this module instead of\n//! directly relying on deps like `reqwest` (think loose coupling).\n\npub mod health_server;\n\nuse std::time::{Duration, Instant};\n\nuse bytes::Bytes;\nuse futures::FutureExt;\nuse http::Method;\nuse http_body_util::BodyExt;\nuse hyper::body::Body;\npub(crate) use reqwest::{Request, Response};\nuse reqwest_middleware::RequestBuilder;\npub(crate) use reqwest_middleware::{ClientWithMiddleware, Error};\npub(crate) use reqwest_retry::RetryTransientMiddleware;\npub(crate) use reqwest_retry::policies::ExponentialBackoff;\nuse thiserror::Error;\n\nuse crate::metrics::{ConsoleRequest, Metrics};\nuse crate::url::ApiUrl;\n\n/// This is the preferred way to create new http clients,\n/// because it takes care of observability (OpenTelemetry).\n/// We deliberately don't want to replace this with a public static.\npub fn new_client() -> ClientWithMiddleware {\n    let client = reqwest::ClientBuilder::new()\n        .build()\n        .expect(\"Failed to create http client\");\n\n    reqwest_middleware::ClientBuilder::new(client)\n        .with(reqwest_tracing::TracingMiddleware::default())\n        .build()\n}\n\npub(crate) fn new_client_with_timeout(\n    request_timeout: Duration,\n    total_retry_duration: Duration,\n) -> ClientWithMiddleware {\n    let timeout_client = reqwest::ClientBuilder::new()\n        .timeout(request_timeout)\n        .build()\n        .expect(\"Failed to create http client with timeout\");\n\n    let retry_policy =\n        ExponentialBackoff::builder().build_with_total_retry_duration(total_retry_duration);\n\n    reqwest_middleware::ClientBuilder::new(timeout_client)\n        .with(reqwest_tracing::TracingMiddleware::default())\n        // As per docs, \"This middleware always errors when given requests with streaming bodies\".\n        // That's all right because we only use this client to send `serde_json::RawValue`, which\n        // is not a stream.\n        //\n        // ex-maintainer note:\n        // this limitation can be fixed if streaming is necessary.\n        // retries will still not be performed, but it wont error immediately\n        .with(RetryTransientMiddleware::new_with_policy(retry_policy))\n        .build()\n}\n\n/// Thin convenience wrapper for an API provided by an http endpoint.\n#[derive(Debug, Clone)]\npub struct Endpoint {\n    /// API's base URL.\n    endpoint: ApiUrl,\n    /// Connection manager with built-in pooling.\n    client: ClientWithMiddleware,\n}\n\nimpl Endpoint {\n    /// Construct a new HTTP endpoint wrapper.\n    /// Http client is not constructed under the hood so that it can be shared.\n    pub fn new(endpoint: ApiUrl, client: impl Into<ClientWithMiddleware>) -> Self {\n        Self {\n            endpoint,\n            client: client.into(),\n        }\n    }\n\n    #[inline(always)]\n    pub(crate) fn url(&self) -> &ApiUrl {\n        &self.endpoint\n    }\n\n    /// Return a [builder](RequestBuilder) for a `GET` request,\n    /// appending a single `path` segment to the base endpoint URL.\n    pub(crate) fn get_path(&self, path: &str) -> RequestBuilder {\n        self.get_with_url(|u| {\n            u.path_segments_mut().push(path);\n        })\n    }\n\n    /// Return a [builder](RequestBuilder) for a `GET` request,\n    /// accepting a closure to modify the url path segments for more complex paths queries.\n    pub(crate) fn get_with_url(&self, f: impl for<'a> FnOnce(&'a mut ApiUrl)) -> RequestBuilder {\n        self.request_with_url(Method::GET, f)\n    }\n\n    /// Return a [builder](RequestBuilder) for a request,\n    /// accepting a closure to modify the url path segments for more complex paths queries.\n    pub(crate) fn request_with_url(\n        &self,\n        method: Method,\n        f: impl for<'a> FnOnce(&'a mut ApiUrl),\n    ) -> RequestBuilder {\n        let mut url = self.endpoint.clone();\n        f(&mut url);\n        self.client.request(method, url.into_inner())\n    }\n\n    /// Execute a [request](reqwest::Request).\n    pub(crate) fn execute(\n        &self,\n        request: Request,\n    ) -> impl Future<Output = Result<Response, Error>> {\n        let metric = Metrics::get()\n            .proxy\n            .console_request_latency\n            .with_labels(ConsoleRequest {\n                request: request.url().path(),\n            });\n\n        let req = self.client.execute(request).boxed();\n\n        async move {\n            let start = Instant::now();\n            scopeguard::defer!({\n                Metrics::get()\n                    .proxy\n                    .console_request_latency\n                    .get_metric(metric)\n                    .observe_duration_since(start);\n            });\n\n            req.await\n        }\n    }\n}\n\n#[derive(Error, Debug)]\npub(crate) enum ReadBodyError<E> {\n    #[error(\"Content length exceeds limit of {limit} bytes\")]\n    BodyTooLarge { limit: usize },\n\n    #[error(transparent)]\n    Read(#[from] E),\n}\n\npub(crate) async fn read_body_with_limit<E>(\n    mut b: impl Body<Data = Bytes, Error = E> + Unpin,\n    limit: usize,\n) -> Result<Vec<u8>, ReadBodyError<E>> {\n    // We could use `b.limited().collect().await.to_bytes()` here\n    // but this ends up being slightly more efficient as far as I can tell.\n\n    // check the lower bound of the size hint.\n    // in reqwest, this value is influenced by the Content-Length header.\n    let lower_bound = match usize::try_from(b.size_hint().lower()) {\n        Ok(bound) if bound <= limit => bound,\n        _ => return Err(ReadBodyError::BodyTooLarge { limit }),\n    };\n    let mut bytes = Vec::with_capacity(lower_bound);\n\n    while let Some(frame) = b.frame().await.transpose()? {\n        if let Ok(data) = frame.into_data() {\n            if bytes.len() + data.len() > limit {\n                return Err(ReadBodyError::BodyTooLarge { limit });\n            }\n            bytes.extend_from_slice(&data);\n        }\n    }\n\n    Ok(bytes)\n}\n\n#[cfg(test)]\nmod tests {\n    use reqwest::Client;\n\n    use super::*;\n\n    #[test]\n    fn optional_query_params() -> anyhow::Result<()> {\n        let url = \"http://example.com\".parse()?;\n        let endpoint = Endpoint::new(url, Client::new());\n\n        // Validate that this pattern makes sense.\n        let req = endpoint\n            .get_path(\"frobnicate\")\n            .query(&[\n                (\"foo\", Some(\"10\")), // should be just `foo=10`\n                (\"bar\", None),       // shouldn't be passed at all\n            ])\n            .build()?;\n\n        assert_eq!(req.url().as_str(), \"http://example.com/frobnicate?foo=10\");\n\n        Ok(())\n    }\n\n    #[test]\n    fn uuid_params() -> anyhow::Result<()> {\n        let url = \"http://example.com\".parse()?;\n        let endpoint = Endpoint::new(url, Client::new());\n\n        let req = endpoint\n            .get_path(\"frobnicate\")\n            .query(&[(\"session_id\", uuid::Uuid::nil())])\n            .build()?;\n\n        assert_eq!(\n            req.url().as_str(),\n            \"http://example.com/frobnicate?session_id=00000000-0000-0000-0000-000000000000\"\n        );\n\n        Ok(())\n    }\n}\n"
  },
  {
    "path": "proxy/src/intern.rs",
    "content": "use std::hash::BuildHasherDefault;\nuse std::marker::PhantomData;\nuse std::num::NonZeroUsize;\nuse std::ops::Index;\nuse std::sync::OnceLock;\n\nuse lasso::{Capacity, MemoryLimits, Spur, ThreadedRodeo};\nuse rustc_hash::FxHasher;\n\nuse crate::types::{AccountId, BranchId, EndpointId, ProjectId, RoleName};\n\npub trait InternId: Sized + 'static {\n    fn get_interner() -> &'static StringInterner<Self>;\n}\n\npub struct StringInterner<Id> {\n    inner: ThreadedRodeo<Spur, BuildHasherDefault<FxHasher>>,\n    _id: PhantomData<Id>,\n}\n\n#[derive(PartialEq, Debug, Clone, Copy, Eq, Hash)]\npub struct InternedString<Id> {\n    inner: Spur,\n    _id: PhantomData<Id>,\n}\n\nimpl<Id: InternId> std::fmt::Display for InternedString<Id> {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        self.as_str().fmt(f)\n    }\n}\n\nimpl<Id: InternId> InternedString<Id> {\n    pub(crate) fn as_str(&self) -> &'static str {\n        Id::get_interner().inner.resolve(&self.inner)\n    }\n    pub(crate) fn get(s: &str) -> Option<Self> {\n        Id::get_interner().get(s)\n    }\n}\n\nimpl<Id: InternId> AsRef<str> for InternedString<Id> {\n    fn as_ref(&self) -> &str {\n        self.as_str()\n    }\n}\n\nimpl<Id: InternId> std::ops::Deref for InternedString<Id> {\n    type Target = str;\n    fn deref(&self) -> &str {\n        self.as_str()\n    }\n}\n\nimpl<'de, Id: InternId> serde::de::Deserialize<'de> for InternedString<Id> {\n    fn deserialize<D: serde::de::Deserializer<'de>>(d: D) -> Result<Self, D::Error> {\n        struct Visitor<Id>(PhantomData<Id>);\n        impl<Id: InternId> serde::de::Visitor<'_> for Visitor<Id> {\n            type Value = InternedString<Id>;\n\n            fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n                formatter.write_str(\"a string\")\n            }\n\n            fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>\n            where\n                E: serde::de::Error,\n            {\n                Ok(Id::get_interner().get_or_intern(v))\n            }\n        }\n        d.deserialize_str(Visitor::<Id>(PhantomData))\n    }\n}\n\nimpl<Id: InternId> serde::Serialize for InternedString<Id> {\n    fn serialize<S: serde::Serializer>(&self, s: S) -> Result<S::Ok, S::Error> {\n        self.as_str().serialize(s)\n    }\n}\n\nimpl<Id: InternId> StringInterner<Id> {\n    pub(crate) fn new() -> Self {\n        StringInterner {\n            inner: ThreadedRodeo::with_capacity_memory_limits_and_hasher(\n                Capacity::new(2500, NonZeroUsize::new(1 << 16).expect(\"value is nonzero\")),\n                // unbounded\n                MemoryLimits::for_memory_usage(usize::MAX),\n                BuildHasherDefault::<FxHasher>::default(),\n            ),\n            _id: PhantomData,\n        }\n    }\n\n    #[cfg(test)]\n    fn len(&self) -> usize {\n        self.inner.len()\n    }\n\n    #[cfg(test)]\n    fn current_memory_usage(&self) -> usize {\n        self.inner.current_memory_usage()\n    }\n\n    pub(crate) fn get_or_intern(&self, s: &str) -> InternedString<Id> {\n        InternedString {\n            inner: self.inner.get_or_intern(s),\n            _id: PhantomData,\n        }\n    }\n\n    pub(crate) fn get(&self, s: &str) -> Option<InternedString<Id>> {\n        Some(InternedString {\n            inner: self.inner.get(s)?,\n            _id: PhantomData,\n        })\n    }\n}\n\nimpl<Id: InternId> Index<InternedString<Id>> for StringInterner<Id> {\n    type Output = str;\n\n    fn index(&self, index: InternedString<Id>) -> &Self::Output {\n        self.inner.resolve(&index.inner)\n    }\n}\n\nimpl<Id: InternId> Default for StringInterner<Id> {\n    fn default() -> Self {\n        Self::new()\n    }\n}\n\n#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]\npub struct RoleNameTag;\nimpl InternId for RoleNameTag {\n    fn get_interner() -> &'static StringInterner<Self> {\n        static ROLE_NAMES: OnceLock<StringInterner<RoleNameTag>> = OnceLock::new();\n        ROLE_NAMES.get_or_init(Default::default)\n    }\n}\npub type RoleNameInt = InternedString<RoleNameTag>;\nimpl From<&RoleName> for RoleNameInt {\n    fn from(value: &RoleName) -> Self {\n        RoleNameTag::get_interner().get_or_intern(value)\n    }\n}\n\n#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]\npub struct EndpointIdTag;\nimpl InternId for EndpointIdTag {\n    fn get_interner() -> &'static StringInterner<Self> {\n        static ROLE_NAMES: OnceLock<StringInterner<EndpointIdTag>> = OnceLock::new();\n        ROLE_NAMES.get_or_init(Default::default)\n    }\n}\npub type EndpointIdInt = InternedString<EndpointIdTag>;\nimpl From<&EndpointId> for EndpointIdInt {\n    fn from(value: &EndpointId) -> Self {\n        EndpointIdTag::get_interner().get_or_intern(value)\n    }\n}\nimpl From<EndpointId> for EndpointIdInt {\n    fn from(value: EndpointId) -> Self {\n        EndpointIdTag::get_interner().get_or_intern(&value)\n    }\n}\n\n#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]\npub struct BranchIdTag;\nimpl InternId for BranchIdTag {\n    fn get_interner() -> &'static StringInterner<Self> {\n        static ROLE_NAMES: OnceLock<StringInterner<BranchIdTag>> = OnceLock::new();\n        ROLE_NAMES.get_or_init(Default::default)\n    }\n}\npub type BranchIdInt = InternedString<BranchIdTag>;\nimpl From<&BranchId> for BranchIdInt {\n    fn from(value: &BranchId) -> Self {\n        BranchIdTag::get_interner().get_or_intern(value)\n    }\n}\nimpl From<BranchId> for BranchIdInt {\n    fn from(value: BranchId) -> Self {\n        BranchIdTag::get_interner().get_or_intern(&value)\n    }\n}\n\n#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]\npub struct ProjectIdTag;\nimpl InternId for ProjectIdTag {\n    fn get_interner() -> &'static StringInterner<Self> {\n        static ROLE_NAMES: OnceLock<StringInterner<ProjectIdTag>> = OnceLock::new();\n        ROLE_NAMES.get_or_init(Default::default)\n    }\n}\npub type ProjectIdInt = InternedString<ProjectIdTag>;\nimpl From<&ProjectId> for ProjectIdInt {\n    fn from(value: &ProjectId) -> Self {\n        ProjectIdTag::get_interner().get_or_intern(value)\n    }\n}\nimpl From<ProjectId> for ProjectIdInt {\n    fn from(value: ProjectId) -> Self {\n        ProjectIdTag::get_interner().get_or_intern(&value)\n    }\n}\n\n#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]\npub struct AccountIdTag;\nimpl InternId for AccountIdTag {\n    fn get_interner() -> &'static StringInterner<Self> {\n        static ROLE_NAMES: OnceLock<StringInterner<AccountIdTag>> = OnceLock::new();\n        ROLE_NAMES.get_or_init(Default::default)\n    }\n}\npub type AccountIdInt = InternedString<AccountIdTag>;\nimpl From<&AccountId> for AccountIdInt {\n    fn from(value: &AccountId) -> Self {\n        AccountIdTag::get_interner().get_or_intern(value)\n    }\n}\nimpl From<AccountId> for AccountIdInt {\n    fn from(value: AccountId) -> Self {\n        AccountIdTag::get_interner().get_or_intern(&value)\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use std::sync::OnceLock;\n\n    use super::InternId;\n    use crate::intern::StringInterner;\n\n    struct MyId;\n    impl InternId for MyId {\n        fn get_interner() -> &'static StringInterner<Self> {\n            pub(crate) static ROLE_NAMES: OnceLock<StringInterner<MyId>> = OnceLock::new();\n            ROLE_NAMES.get_or_init(Default::default)\n        }\n    }\n\n    #[test]\n    fn push_many_strings() {\n        use rand::rngs::StdRng;\n        use rand::{Rng, SeedableRng};\n        use rand_distr::Zipf;\n\n        let endpoint_dist = Zipf::new(500000.0, 0.8).unwrap();\n        let endpoints = StdRng::seed_from_u64(272488357).sample_iter(endpoint_dist);\n\n        let interner = MyId::get_interner();\n\n        const N: usize = 100_000;\n        let mut verify = Vec::with_capacity(N);\n        for endpoint in endpoints.take(N) {\n            let endpoint = format!(\"ep-string-interning-{endpoint}\");\n            let key = interner.get_or_intern(&endpoint);\n            verify.push((endpoint, key));\n        }\n\n        for (s, key) in verify {\n            assert_eq!(interner[key], s);\n        }\n\n        // 2031616/59861 = 34 bytes per string\n        assert_eq!(interner.len(), 59_861);\n        // will have other overhead for the internal hashmaps that are not accounted for.\n        assert_eq!(interner.current_memory_usage(), 2_031_616);\n    }\n}\n"
  },
  {
    "path": "proxy/src/jemalloc.rs",
    "content": "use std::marker::PhantomData;\n\nuse measured::label::NoLabels;\nuse measured::metric::gauge::GaugeState;\nuse measured::metric::group::Encoding;\nuse measured::metric::name::MetricNameEncoder;\nuse measured::metric::{MetricEncoding, MetricFamilyEncoding, MetricType};\nuse measured::text::TextEncoder;\nuse measured::{LabelGroup, MetricGroup};\nuse tikv_jemalloc_ctl::{config, epoch, epoch_mib, stats, version};\n\npub struct MetricRecorder {\n    epoch: epoch_mib,\n    inner: Metrics,\n}\n\n#[derive(MetricGroup)]\nstruct Metrics {\n    active_bytes: JemallocGaugeFamily<stats::active_mib>,\n    allocated_bytes: JemallocGaugeFamily<stats::allocated_mib>,\n    mapped_bytes: JemallocGaugeFamily<stats::mapped_mib>,\n    metadata_bytes: JemallocGaugeFamily<stats::metadata_mib>,\n    resident_bytes: JemallocGaugeFamily<stats::resident_mib>,\n    retained_bytes: JemallocGaugeFamily<stats::retained_mib>,\n}\n\nimpl<Enc: Encoding> MetricGroup<Enc> for MetricRecorder\nwhere\n    Metrics: MetricGroup<Enc>,\n{\n    fn collect_group_into(&self, enc: &mut Enc) -> Result<(), Enc::Err> {\n        if self.epoch.advance().is_ok() {\n            self.inner.collect_group_into(enc)?;\n        }\n        Ok(())\n    }\n}\n\nimpl MetricRecorder {\n    pub fn new() -> Result<Self, anyhow::Error> {\n        tracing::debug!(\n            config = config::malloc_conf::read()?,\n            version = version::read()?,\n            \"starting jemalloc recorder\"\n        );\n\n        Ok(Self {\n            epoch: epoch::mib()?,\n            inner: Metrics {\n                active_bytes: JemallocGaugeFamily(stats::active::mib()?),\n                allocated_bytes: JemallocGaugeFamily(stats::allocated::mib()?),\n                mapped_bytes: JemallocGaugeFamily(stats::mapped::mib()?),\n                metadata_bytes: JemallocGaugeFamily(stats::metadata::mib()?),\n                resident_bytes: JemallocGaugeFamily(stats::resident::mib()?),\n                retained_bytes: JemallocGaugeFamily(stats::retained::mib()?),\n            },\n        })\n    }\n}\n\nstruct JemallocGauge<T>(PhantomData<T>);\n\nimpl<T> Default for JemallocGauge<T> {\n    fn default() -> Self {\n        JemallocGauge(PhantomData)\n    }\n}\nimpl<T> MetricType for JemallocGauge<T> {\n    type Metadata = T;\n}\n\nstruct JemallocGaugeFamily<T>(T);\nimpl<M, T: Encoding> MetricFamilyEncoding<T> for JemallocGaugeFamily<M>\nwhere\n    JemallocGauge<M>: MetricEncoding<T, Metadata = M>,\n{\n    fn collect_family_into(&self, name: impl MetricNameEncoder, enc: &mut T) -> Result<(), T::Err> {\n        JemallocGauge::write_type(&name, enc)?;\n        JemallocGauge(PhantomData).collect_into(&self.0, NoLabels, name, enc)\n    }\n}\n\nmacro_rules! jemalloc_gauge {\n    ($stat:ident, $mib:ident) => {\n        impl<W: std::io::Write> MetricEncoding<TextEncoder<W>> for JemallocGauge<stats::$mib> {\n            fn write_type(\n                name: impl MetricNameEncoder,\n                enc: &mut TextEncoder<W>,\n            ) -> Result<(), std::io::Error> {\n                GaugeState::write_type(name, enc)\n            }\n\n            fn collect_into(\n                &self,\n                mib: &stats::$mib,\n                labels: impl LabelGroup,\n                name: impl MetricNameEncoder,\n                enc: &mut TextEncoder<W>,\n            ) -> Result<(), std::io::Error> {\n                if let Ok(v) = mib.read() {\n                    GaugeState::new(v as i64).collect_into(&(), labels, name, enc)?;\n                }\n                Ok(())\n            }\n        }\n    };\n}\n\njemalloc_gauge!(active, active_mib);\njemalloc_gauge!(allocated, allocated_mib);\njemalloc_gauge!(mapped, mapped_mib);\njemalloc_gauge!(metadata, metadata_mib);\njemalloc_gauge!(resident, resident_mib);\njemalloc_gauge!(retained, retained_mib);\n"
  },
  {
    "path": "proxy/src/lib.rs",
    "content": "// rustc lints/lint groups\n// https://doc.rust-lang.org/rustc/lints/groups.html\n#![deny(deprecated, future_incompatible, let_underscore, nonstandard_style)]\n#![warn(clippy::all, clippy::pedantic, clippy::cargo)]\n// List of denied lints from the clippy::restriction group.\n// https://rust-lang.github.io/rust-clippy/master/index.html#?groups=restriction\n#![warn(\n    clippy::undocumented_unsafe_blocks,\n    // TODO: Enable once all individual checks are enabled.\n    //clippy::as_conversions,\n    clippy::dbg_macro,\n    clippy::empty_enum_variants_with_brackets,\n    clippy::exit,\n    clippy::float_cmp_const,\n    clippy::lossy_float_literal,\n    clippy::macro_use_imports,\n    clippy::manual_ok_or,\n    // TODO: consider clippy::map_err_ignore\n    // TODO: consider clippy::mem_forget\n    clippy::rc_mutex,\n    clippy::rest_pat_in_fully_bound_structs,\n    clippy::string_add,\n    clippy::string_to_string,\n    clippy::todo,\n    clippy::unimplemented,\n    clippy::unwrap_used,\n)]\n// List of permanently allowed lints.\n#![allow(\n    // It's ok to cast bool to u8, etc.\n    clippy::cast_lossless,\n    // Seems unavoidable.\n    clippy::multiple_crate_versions,\n    // While #[must_use] is a great feature this check is too noisy.\n    clippy::must_use_candidate,\n    // Inline consts, structs, fns, imports, etc. are ok if they're used by\n    // the following statement(s).\n    clippy::items_after_statements,\n)]\n// List of temporarily allowed lints.\n// TODO: fix code and reduce list or move to permanent list above.\n#![expect(\n    clippy::cargo_common_metadata,\n    clippy::cast_possible_truncation,\n    clippy::cast_possible_wrap,\n    clippy::cast_precision_loss,\n    clippy::cast_sign_loss,\n    clippy::doc_markdown,\n    clippy::inline_always,\n    clippy::match_same_arms,\n    clippy::match_wild_err_arm,\n    clippy::missing_errors_doc,\n    clippy::missing_panics_doc,\n    clippy::module_name_repetitions,\n    clippy::needless_pass_by_value,\n    clippy::redundant_closure_for_method_calls,\n    clippy::similar_names,\n    clippy::single_match_else,\n    clippy::struct_excessive_bools,\n    clippy::struct_field_names,\n    clippy::too_many_lines,\n    clippy::unused_self\n)]\n#![allow(\n    clippy::unsafe_derive_deserialize,\n    reason = \"false positive: https://github.com/rust-lang/rust-clippy/issues/15120\"\n)]\n#![cfg_attr(\n    any(test, feature = \"testing\"),\n    allow(\n        clippy::needless_raw_string_hashes,\n        clippy::unreadable_literal,\n        clippy::unused_async,\n    )\n)]\n// List of temporarily allowed lints to unblock beta/nightly.\n#![allow(unknown_lints)]\n\npub mod binary;\n\nmod auth;\nmod batch;\nmod cache;\nmod cancellation;\nmod compute;\nmod compute_ctl;\nmod config;\nmod console_redirect_proxy;\nmod context;\nmod control_plane;\nmod error;\nmod ext;\nmod http;\nmod intern;\nmod jemalloc;\nmod logging;\nmod metrics;\nmod parse;\nmod pglb;\nmod pqproto;\nmod protocol2;\nmod proxy;\nmod rate_limiter;\nmod redis;\nmod sasl;\nmod scram;\nmod serverless;\nmod signals;\nmod stream;\nmod tls;\nmod types;\nmod url;\nmod usage_metrics;\nmod util;\nmod waiters;\n"
  },
  {
    "path": "proxy/src/logging.rs",
    "content": "use std::cell::RefCell;\nuse std::collections::HashMap;\nuse std::sync::Arc;\nuse std::{env, io};\n\nuse chrono::{DateTime, Utc};\nuse opentelemetry::trace::TraceContextExt;\nuse tracing::subscriber::Interest;\nuse tracing::{Event, Metadata, Span, Subscriber, callsite, span};\nuse tracing_opentelemetry::OpenTelemetrySpanExt;\nuse tracing_subscriber::filter::{EnvFilter, LevelFilter};\nuse tracing_subscriber::fmt::format::{Format, Full};\nuse tracing_subscriber::fmt::time::SystemTime;\nuse tracing_subscriber::fmt::{FormatEvent, FormatFields};\nuse tracing_subscriber::layer::{Context, Layer};\nuse tracing_subscriber::prelude::*;\nuse tracing_subscriber::registry::LookupSpan;\n\nuse crate::metrics::Metrics;\n\n/// Initialize logging and OpenTelemetry tracing and exporter.\n///\n/// Logging can be configured using `RUST_LOG` environment variable.\n///\n/// OpenTelemetry is configured with OTLP/HTTP exporter. It picks up\n/// configuration from environment variables. For example, to change the\n/// destination, set `OTEL_EXPORTER_OTLP_ENDPOINT=http://jaeger:4318`.\n/// See <https://opentelemetry.io/docs/reference/specification/sdk-environment-variables>\npub fn init() -> anyhow::Result<LoggingGuard> {\n    let logfmt = LogFormat::from_env()?;\n\n    let env_filter = EnvFilter::builder()\n        .with_default_directive(LevelFilter::INFO.into())\n        .from_env_lossy()\n        .add_directive(\n            \"aws_config=info\"\n                .parse()\n                .expect(\"this should be a valid filter directive\"),\n        )\n        .add_directive(\n            \"azure_core::policies::transport=off\"\n                .parse()\n                .expect(\"this should be a valid filter directive\"),\n        );\n\n    let provider = tracing_utils::init_tracing(\"proxy\", tracing_utils::ExportConfig::default());\n    let otlp_layer = provider.as_ref().map(tracing_utils::layer);\n\n    let json_log_layer = if logfmt == LogFormat::Json {\n        Some(JsonLoggingLayer::new(\n            RealClock,\n            StderrWriter {\n                stderr: std::io::stderr(),\n            },\n            &[\"conn_id\", \"ep\", \"query_id\", \"request_id\", \"session_id\"],\n        ))\n    } else {\n        None\n    };\n\n    let text_log_layer = if logfmt == LogFormat::Text {\n        Some(\n            tracing_subscriber::fmt::layer()\n                .with_ansi(false)\n                .with_writer(std::io::stderr)\n                .with_target(false),\n        )\n    } else {\n        None\n    };\n\n    tracing_subscriber::registry()\n        .with(env_filter)\n        .with(otlp_layer)\n        .with(json_log_layer)\n        .with(text_log_layer)\n        .try_init()?;\n\n    Ok(LoggingGuard(provider))\n}\n\n/// Initialize logging for local_proxy with log prefix and no opentelemetry.\n///\n/// Logging can be configured using `RUST_LOG` environment variable.\npub fn init_local_proxy() -> anyhow::Result<LoggingGuard> {\n    let env_filter = EnvFilter::builder()\n        .with_default_directive(LevelFilter::INFO.into())\n        .from_env_lossy();\n\n    let fmt_layer = tracing_subscriber::fmt::layer()\n        .with_ansi(false)\n        .with_writer(std::io::stderr)\n        .event_format(LocalProxyFormatter(Format::default().with_target(false)));\n\n    tracing_subscriber::registry()\n        .with(env_filter)\n        .with(fmt_layer)\n        .try_init()?;\n\n    Ok(LoggingGuard(None))\n}\n\npub struct LocalProxyFormatter(Format<Full, SystemTime>);\n\nimpl<S, N> FormatEvent<S, N> for LocalProxyFormatter\nwhere\n    S: Subscriber + for<'a> LookupSpan<'a>,\n    N: for<'a> FormatFields<'a> + 'static,\n{\n    fn format_event(\n        &self,\n        ctx: &tracing_subscriber::fmt::FmtContext<'_, S, N>,\n        mut writer: tracing_subscriber::fmt::format::Writer<'_>,\n        event: &tracing::Event<'_>,\n    ) -> std::fmt::Result {\n        writer.write_str(\"[local_proxy] \")?;\n        self.0.format_event(ctx, writer, event)\n    }\n}\n\npub struct LoggingGuard(Option<tracing_utils::Provider>);\n\nimpl Drop for LoggingGuard {\n    fn drop(&mut self) {\n        if let Some(p) = &self.0 {\n            // Shutdown trace pipeline gracefully, so that it has a chance to send any\n            // pending traces before we exit.\n            tracing::info!(\"shutting down the tracing machinery\");\n            drop(p.shutdown());\n        }\n    }\n}\n\n#[derive(Copy, Clone, PartialEq, Eq, Default, Debug)]\nenum LogFormat {\n    Text,\n    #[default]\n    Json,\n}\n\nimpl LogFormat {\n    fn from_env() -> anyhow::Result<Self> {\n        let logfmt = env::var(\"LOGFMT\");\n        Ok(match logfmt.as_deref() {\n            Err(_) => LogFormat::default(),\n            Ok(\"text\") => LogFormat::Text,\n            Ok(\"json\") => LogFormat::Json,\n            Ok(logfmt) => anyhow::bail!(\"unknown log format: {logfmt}\"),\n        })\n    }\n}\n\ntrait MakeWriter {\n    fn make_writer(&self) -> impl io::Write;\n}\n\nstruct StderrWriter {\n    stderr: io::Stderr,\n}\n\nimpl MakeWriter for StderrWriter {\n    #[inline]\n    fn make_writer(&self) -> impl io::Write {\n        self.stderr.lock()\n    }\n}\n\n// TODO: move into separate module or even separate crate.\ntrait Clock {\n    fn now(&self) -> DateTime<Utc>;\n}\n\nstruct RealClock;\n\nimpl Clock for RealClock {\n    #[inline]\n    fn now(&self) -> DateTime<Utc> {\n        Utc::now()\n    }\n}\n\n/// Name of the field used by tracing crate to store the event message.\nconst MESSAGE_FIELD: &str = \"message\";\n\n/// Tracing used to enforce that spans/events have no more than 32 fields.\n/// It seems this is no longer the case, but it's still documented in some places.\n/// Generally, we shouldn't expect more than 32 fields anyway, so we can try and\n/// rely on it for some (minor) performance gains.\nconst MAX_TRACING_FIELDS: usize = 32;\n\nthread_local! {\n    /// Thread-local instance with per-thread buffer for log writing.\n    static EVENT_FORMATTER: RefCell<EventFormatter> = const { RefCell::new(EventFormatter::new()) };\n    /// Cached OS thread ID.\n    static THREAD_ID: u64 = gettid::gettid();\n}\n\n/// Map for values fixed at callsite registration.\n// We use papaya here because registration rarely happens post-startup.\n// papaya is good for read-heavy workloads.\n//\n// We use rustc_hash here because callsite::Identifier will always be an integer with low-bit entropy,\n// since it's always a pointer to static mutable data. rustc_hash was designed for low-bit entropy.\ntype CallsiteMap<T> =\n    papaya::HashMap<callsite::Identifier, T, std::hash::BuildHasherDefault<rustc_hash::FxHasher>>;\n\n/// Implements tracing layer to handle events specific to logging.\nstruct JsonLoggingLayer<C: Clock, W: MakeWriter> {\n    clock: C,\n    writer: W,\n\n    /// tracks which fields of each **event** are duplicates\n    skipped_field_indices: CallsiteMap<SkippedFieldIndices>,\n\n    /// tracks callsite names to an ID.\n    callsite_name_ids: papaya::HashMap<&'static str, u32, ahash::RandomState>,\n\n    span_info: CallsiteMap<CallsiteSpanInfo>,\n\n    /// Fields we want to keep track of in a separate json object.\n    extract_fields: &'static [&'static str],\n}\n\nimpl<C: Clock, W: MakeWriter> JsonLoggingLayer<C, W> {\n    fn new(clock: C, writer: W, extract_fields: &'static [&'static str]) -> Self {\n        JsonLoggingLayer {\n            clock,\n            skipped_field_indices: CallsiteMap::default(),\n            span_info: CallsiteMap::default(),\n            callsite_name_ids: papaya::HashMap::default(),\n            writer,\n            extract_fields,\n        }\n    }\n\n    #[inline]\n    fn span_info(&self, metadata: &'static Metadata<'static>) -> CallsiteSpanInfo {\n        self.span_info\n            .pin()\n            .get_or_insert_with(metadata.callsite(), || {\n                CallsiteSpanInfo::new(&self.callsite_name_ids, metadata, self.extract_fields)\n            })\n            .clone()\n    }\n}\n\nimpl<S, C: Clock + 'static, W: MakeWriter + 'static> Layer<S> for JsonLoggingLayer<C, W>\nwhere\n    S: Subscriber + for<'a> LookupSpan<'a>,\n{\n    fn on_event(&self, event: &Event<'_>, ctx: Context<'_, S>) {\n        use std::io::Write;\n\n        // TODO: consider special tracing subscriber to grab timestamp very\n        //       early, before OTel machinery, and add as event extension.\n        let now = self.clock.now();\n\n        EVENT_FORMATTER.with(|f| {\n            let mut borrow = f.try_borrow_mut();\n            let formatter = match borrow.as_deref_mut() {\n                Ok(formatter) => formatter,\n                // If the thread local formatter is borrowed,\n                // then we likely hit an edge case were we panicked during formatting.\n                // We allow the logging to proceed with an uncached formatter.\n                Err(_) => &mut EventFormatter::new(),\n            };\n\n            formatter.format(\n                now,\n                event,\n                &ctx,\n                &self.skipped_field_indices,\n                self.extract_fields,\n            );\n\n            let mut writer = self.writer.make_writer();\n            if writer.write_all(formatter.buffer()).is_err() {\n                Metrics::get().proxy.logging_errors_count.inc();\n            }\n        });\n    }\n\n    /// Registers a SpanFields instance as span extension.\n    fn on_new_span(&self, attrs: &span::Attributes<'_>, id: &span::Id, ctx: Context<'_, S>) {\n        let span = ctx.span(id).expect(\"span must exist\");\n\n        let mut fields = SpanFields::new(self.span_info(span.metadata()));\n        attrs.record(&mut fields);\n\n        // This is a new span: the extensions should not be locked\n        // unless some layer spawned a thread to process this span.\n        // I don't think any layers do that.\n        span.extensions_mut().insert(fields);\n    }\n\n    fn on_record(&self, id: &span::Id, values: &span::Record<'_>, ctx: Context<'_, S>) {\n        let span = ctx.span(id).expect(\"span must exist\");\n\n        // assumption: `on_record` is rarely called.\n        // assumption: a span being updated by one thread,\n        //             and formatted by another thread is even rarer.\n        let mut ext = span.extensions_mut();\n        if let Some(fields) = ext.get_mut::<SpanFields>() {\n            values.record(fields);\n        }\n    }\n\n    /// Called (lazily) roughly once per event/span instance. We quickly check\n    /// for duplicate field names and record duplicates as skippable. Last field wins.\n    fn register_callsite(&self, metadata: &'static Metadata<'static>) -> Interest {\n        debug_assert!(\n            metadata.fields().len() <= MAX_TRACING_FIELDS,\n            \"callsite {metadata:?} has too many fields.\"\n        );\n\n        if !metadata.is_event() {\n            // register the span info.\n            self.span_info(metadata);\n            // Must not be never because we wouldn't get trace and span data.\n            return Interest::always();\n        }\n\n        let mut field_indices = SkippedFieldIndices::default();\n        let mut seen_fields = HashMap::new();\n        for field in metadata.fields() {\n            if let Some(old_index) = seen_fields.insert(field.name(), field.index()) {\n                field_indices.set(old_index);\n            }\n        }\n\n        if !field_indices.is_empty() {\n            self.skipped_field_indices\n                .pin()\n                .insert(metadata.callsite(), field_indices);\n        }\n\n        Interest::always()\n    }\n}\n\n/// Any span info that is fixed to a particular callsite. Not variable between span instances.\n#[derive(Clone)]\nstruct CallsiteSpanInfo {\n    /// index of each field to extract. usize::MAX if not found.\n    extract: Arc<[usize]>,\n\n    /// tracks the fixed \"callsite ID\" for each span.\n    /// note: this is not stable between runs.\n    normalized_name: Arc<str>,\n}\n\nimpl CallsiteSpanInfo {\n    fn new(\n        callsite_name_ids: &papaya::HashMap<&'static str, u32, ahash::RandomState>,\n        metadata: &'static Metadata<'static>,\n        extract_fields: &[&'static str],\n    ) -> Self {\n        let names: Vec<&'static str> = metadata.fields().iter().map(|f| f.name()).collect();\n\n        // get all the indices of span fields we want to focus\n        let extract = extract_fields\n            .iter()\n            // use rposition, since we want last match wins.\n            .map(|f1| names.iter().rposition(|f2| f1 == f2).unwrap_or(usize::MAX))\n            .collect();\n\n        // normalized_name is unique for each callsite, but it is not\n        // unified across separate proxy instances.\n        // todo: can we do better here?\n        let cid = *callsite_name_ids\n            .pin()\n            .update_or_insert(metadata.name(), |&cid| cid + 1, 0);\n\n        // we hope that most span names are unique, in which case this will always be 0\n        let normalized_name = if cid == 0 {\n            metadata.name().into()\n        } else {\n            // if the span name is not unique, add the numeric ID to span name to distinguish it.\n            // sadly this is non-determinstic, across restarts but we should fix it by disambiguating re-used span names instead.\n            format!(\"{}#{cid}\", metadata.name()).into()\n        };\n\n        Self {\n            extract,\n            normalized_name,\n        }\n    }\n}\n\n#[derive(Clone)]\nstruct RawValue(Box<[u8]>);\n\nimpl RawValue {\n    fn new(v: impl json::ValueEncoder) -> Self {\n        Self(json::value_to_vec!(|val| v.encode(val)).into_boxed_slice())\n    }\n}\n\nimpl json::ValueEncoder for &RawValue {\n    fn encode(self, v: json::ValueSer<'_>) {\n        v.write_raw_json(&self.0);\n    }\n}\n\n/// Stores span field values recorded during the spans lifetime.\nstruct SpanFields {\n    values: [Option<RawValue>; MAX_TRACING_FIELDS],\n\n    /// cached span info so we can avoid extra hashmap lookups in the hot path.\n    span_info: CallsiteSpanInfo,\n}\n\nimpl SpanFields {\n    fn new(span_info: CallsiteSpanInfo) -> Self {\n        Self {\n            span_info,\n            values: [const { None }; MAX_TRACING_FIELDS],\n        }\n    }\n}\n\nimpl tracing::field::Visit for SpanFields {\n    #[inline]\n    fn record_f64(&mut self, field: &tracing::field::Field, value: f64) {\n        self.values[field.index()] = Some(RawValue::new(value));\n    }\n\n    #[inline]\n    fn record_i64(&mut self, field: &tracing::field::Field, value: i64) {\n        self.values[field.index()] = Some(RawValue::new(value));\n    }\n\n    #[inline]\n    fn record_u64(&mut self, field: &tracing::field::Field, value: u64) {\n        self.values[field.index()] = Some(RawValue::new(value));\n    }\n\n    #[inline]\n    fn record_i128(&mut self, field: &tracing::field::Field, value: i128) {\n        if let Ok(value) = i64::try_from(value) {\n            self.values[field.index()] = Some(RawValue::new(value));\n        } else {\n            self.values[field.index()] = Some(RawValue::new(format_args!(\"{value}\")));\n        }\n    }\n\n    #[inline]\n    fn record_u128(&mut self, field: &tracing::field::Field, value: u128) {\n        if let Ok(value) = u64::try_from(value) {\n            self.values[field.index()] = Some(RawValue::new(value));\n        } else {\n            self.values[field.index()] = Some(RawValue::new(format_args!(\"{value}\")));\n        }\n    }\n\n    #[inline]\n    fn record_bool(&mut self, field: &tracing::field::Field, value: bool) {\n        self.values[field.index()] = Some(RawValue::new(value));\n    }\n\n    #[inline]\n    fn record_bytes(&mut self, field: &tracing::field::Field, value: &[u8]) {\n        self.values[field.index()] = Some(RawValue::new(value));\n    }\n\n    #[inline]\n    fn record_str(&mut self, field: &tracing::field::Field, value: &str) {\n        self.values[field.index()] = Some(RawValue::new(value));\n    }\n\n    #[inline]\n    fn record_debug(&mut self, field: &tracing::field::Field, value: &dyn std::fmt::Debug) {\n        self.values[field.index()] = Some(RawValue::new(format_args!(\"{value:?}\")));\n    }\n\n    #[inline]\n    fn record_error(\n        &mut self,\n        field: &tracing::field::Field,\n        value: &(dyn std::error::Error + 'static),\n    ) {\n        self.values[field.index()] = Some(RawValue::new(format_args!(\"{value}\")));\n    }\n}\n\n/// List of field indices skipped during logging. Can list duplicate fields or\n/// metafields not meant to be logged.\n#[derive(Copy, Clone, Default)]\nstruct SkippedFieldIndices {\n    // 32-bits is large enough for `MAX_TRACING_FIELDS`\n    bits: u32,\n}\n\nimpl SkippedFieldIndices {\n    #[inline]\n    fn is_empty(self) -> bool {\n        self.bits == 0\n    }\n\n    #[inline]\n    fn set(&mut self, index: usize) {\n        debug_assert!(index <= 32, \"index out of bounds of 32-bit set\");\n        self.bits |= 1 << index;\n    }\n\n    #[inline]\n    fn contains(self, index: usize) -> bool {\n        self.bits & (1 << index) != 0\n    }\n}\n\n/// Formats a tracing event and writes JSON to its internal buffer including a newline.\n// TODO: buffer capacity management, truncate if too large\nstruct EventFormatter {\n    logline_buffer: Vec<u8>,\n}\n\nimpl EventFormatter {\n    #[inline]\n    const fn new() -> Self {\n        EventFormatter {\n            logline_buffer: Vec::new(),\n        }\n    }\n\n    #[inline]\n    fn buffer(&self) -> &[u8] {\n        &self.logline_buffer\n    }\n\n    fn format<S>(\n        &mut self,\n        now: DateTime<Utc>,\n        event: &Event<'_>,\n        ctx: &Context<'_, S>,\n        skipped_field_indices: &CallsiteMap<SkippedFieldIndices>,\n        extract_fields: &'static [&'static str],\n    ) where\n        S: Subscriber + for<'a> LookupSpan<'a>,\n    {\n        let timestamp = now.to_rfc3339_opts(chrono::SecondsFormat::Micros, true);\n\n        use tracing_log::NormalizeEvent;\n        let normalized_meta = event.normalized_metadata();\n        let meta = normalized_meta.as_ref().unwrap_or_else(|| event.metadata());\n\n        let skipped_field_indices = skipped_field_indices\n            .pin()\n            .get(&meta.callsite())\n            .copied()\n            .unwrap_or_default();\n\n        self.logline_buffer.clear();\n        let serializer = json::ValueSer::new(&mut self.logline_buffer);\n        json::value_as_object!(|serializer| {\n            // Timestamp comes first, so raw lines can be sorted by timestamp.\n            serializer.entry(\"timestamp\", &*timestamp);\n\n            // Level next.\n            serializer.entry(\"level\", meta.level().as_str());\n\n            // Message next.\n            let mut message_extractor =\n                MessageFieldExtractor::new(serializer.key(\"message\"), skipped_field_indices);\n            event.record(&mut message_extractor);\n            message_extractor.finish();\n\n            // Direct message fields.\n            {\n                let mut message_skipper = MessageFieldSkipper::new(\n                    serializer.key(\"fields\").object(),\n                    skipped_field_indices,\n                );\n                event.record(&mut message_skipper);\n\n                // rollback if no fields are present.\n                if message_skipper.present {\n                    message_skipper.serializer.finish();\n                }\n            }\n\n            let mut extracted = ExtractedSpanFields::new(extract_fields);\n\n            let spans = serializer.key(\"spans\");\n            json::value_as_object!(|spans| {\n                let parent_spans = ctx\n                    .event_span(event)\n                    .map_or(vec![], |parent| parent.scope().collect());\n\n                for span in parent_spans.iter().rev() {\n                    let ext = span.extensions();\n\n                    // all spans should have this extension.\n                    let Some(fields) = ext.get() else { continue };\n\n                    extracted.layer_span(fields);\n\n                    let SpanFields { values, span_info } = fields;\n\n                    let span_fields = spans.key(&*span_info.normalized_name);\n                    json::value_as_object!(|span_fields| {\n                        for (field, value) in std::iter::zip(span.metadata().fields(), values) {\n                            if let Some(value) = value {\n                                span_fields.entry(field.name(), value);\n                            }\n                        }\n                    });\n                }\n            });\n\n            // TODO: thread-local cache?\n            let pid = std::process::id();\n            // Skip adding pid 1 to reduce noise for services running in containers.\n            if pid != 1 {\n                serializer.entry(\"process_id\", pid);\n            }\n\n            THREAD_ID.with(|tid| serializer.entry(\"thread_id\", tid));\n\n            // TODO: tls cache? name could change\n            if let Some(thread_name) = std::thread::current().name()\n                && !thread_name.is_empty()\n                && thread_name != \"tokio-runtime-worker\"\n            {\n                serializer.entry(\"thread_name\", thread_name);\n            }\n\n            if let Some(task_id) = tokio::task::try_id() {\n                serializer.entry(\"task_id\", format_args!(\"{task_id}\"));\n            }\n\n            serializer.entry(\"target\", meta.target());\n\n            // Skip adding module if it's the same as target.\n            if let Some(module) = meta.module_path()\n                && module != meta.target()\n            {\n                serializer.entry(\"module\", module);\n            }\n\n            if let Some(file) = meta.file() {\n                if let Some(line) = meta.line() {\n                    serializer.entry(\"src\", format_args!(\"{file}:{line}\"));\n                } else {\n                    serializer.entry(\"src\", file);\n                }\n            }\n\n            {\n                let otel_context = Span::current().context();\n                let otel_spanref = otel_context.span();\n                let span_context = otel_spanref.span_context();\n                if span_context.is_valid() {\n                    serializer.entry(\"trace_id\", format_args!(\"{}\", span_context.trace_id()));\n                }\n            }\n\n            if extracted.has_values() {\n                // TODO: add fields from event, too?\n                let extract = serializer.key(\"extract\");\n                json::value_as_object!(|extract| {\n                    for (key, value) in std::iter::zip(extracted.names, extracted.values) {\n                        if let Some(value) = value {\n                            extract.entry(*key, &value);\n                        }\n                    }\n                });\n            }\n        });\n\n        self.logline_buffer.push(b'\\n');\n    }\n}\n\n/// Extracts the message field that's mixed will other fields.\nstruct MessageFieldExtractor<'buf> {\n    serializer: Option<json::ValueSer<'buf>>,\n    skipped_field_indices: SkippedFieldIndices,\n}\n\nimpl<'buf> MessageFieldExtractor<'buf> {\n    #[inline]\n    fn new(serializer: json::ValueSer<'buf>, skipped_field_indices: SkippedFieldIndices) -> Self {\n        Self {\n            serializer: Some(serializer),\n            skipped_field_indices,\n        }\n    }\n\n    #[inline]\n    fn finish(self) {\n        if let Some(ser) = self.serializer {\n            ser.value(\"\");\n        }\n    }\n\n    #[inline]\n    fn record_field(&mut self, field: &tracing::field::Field, v: impl json::ValueEncoder) {\n        if field.name() == MESSAGE_FIELD\n            && !self.skipped_field_indices.contains(field.index())\n            && let Some(ser) = self.serializer.take()\n        {\n            ser.value(v);\n        }\n    }\n}\n\nimpl tracing::field::Visit for MessageFieldExtractor<'_> {\n    #[inline]\n    fn record_f64(&mut self, field: &tracing::field::Field, value: f64) {\n        self.record_field(field, value);\n    }\n\n    #[inline]\n    fn record_i64(&mut self, field: &tracing::field::Field, value: i64) {\n        self.record_field(field, value);\n    }\n\n    #[inline]\n    fn record_u64(&mut self, field: &tracing::field::Field, value: u64) {\n        self.record_field(field, value);\n    }\n\n    #[inline]\n    fn record_i128(&mut self, field: &tracing::field::Field, value: i128) {\n        self.record_field(field, value);\n    }\n\n    #[inline]\n    fn record_u128(&mut self, field: &tracing::field::Field, value: u128) {\n        self.record_field(field, value);\n    }\n\n    #[inline]\n    fn record_bool(&mut self, field: &tracing::field::Field, value: bool) {\n        self.record_field(field, value);\n    }\n\n    #[inline]\n    fn record_bytes(&mut self, field: &tracing::field::Field, value: &[u8]) {\n        self.record_field(field, format_args!(\"{value:x?}\"));\n    }\n\n    #[inline]\n    fn record_str(&mut self, field: &tracing::field::Field, value: &str) {\n        self.record_field(field, value);\n    }\n\n    #[inline]\n    fn record_debug(&mut self, field: &tracing::field::Field, value: &dyn std::fmt::Debug) {\n        self.record_field(field, format_args!(\"{value:?}\"));\n    }\n\n    #[inline]\n    fn record_error(\n        &mut self,\n        field: &tracing::field::Field,\n        value: &(dyn std::error::Error + 'static),\n    ) {\n        self.record_field(field, format_args!(\"{value}\"));\n    }\n}\n\n/// A tracing field visitor that skips the message field.\nstruct MessageFieldSkipper<'buf> {\n    serializer: json::ObjectSer<'buf>,\n    skipped_field_indices: SkippedFieldIndices,\n    present: bool,\n}\n\nimpl<'buf> MessageFieldSkipper<'buf> {\n    #[inline]\n    fn new(serializer: json::ObjectSer<'buf>, skipped_field_indices: SkippedFieldIndices) -> Self {\n        Self {\n            serializer,\n            skipped_field_indices,\n            present: false,\n        }\n    }\n\n    #[inline]\n    fn record_field(&mut self, field: &tracing::field::Field, v: impl json::ValueEncoder) {\n        if field.name() != MESSAGE_FIELD\n            && !field.name().starts_with(\"log.\")\n            && !self.skipped_field_indices.contains(field.index())\n        {\n            self.serializer.entry(field.name(), v);\n            self.present |= true;\n        }\n    }\n}\n\nimpl tracing::field::Visit for MessageFieldSkipper<'_> {\n    #[inline]\n    fn record_f64(&mut self, field: &tracing::field::Field, value: f64) {\n        self.record_field(field, value);\n    }\n\n    #[inline]\n    fn record_i64(&mut self, field: &tracing::field::Field, value: i64) {\n        self.record_field(field, value);\n    }\n\n    #[inline]\n    fn record_u64(&mut self, field: &tracing::field::Field, value: u64) {\n        self.record_field(field, value);\n    }\n\n    #[inline]\n    fn record_i128(&mut self, field: &tracing::field::Field, value: i128) {\n        self.record_field(field, value);\n    }\n\n    #[inline]\n    fn record_u128(&mut self, field: &tracing::field::Field, value: u128) {\n        self.record_field(field, value);\n    }\n\n    #[inline]\n    fn record_bool(&mut self, field: &tracing::field::Field, value: bool) {\n        self.record_field(field, value);\n    }\n\n    #[inline]\n    fn record_bytes(&mut self, field: &tracing::field::Field, value: &[u8]) {\n        self.record_field(field, format_args!(\"{value:x?}\"));\n    }\n\n    #[inline]\n    fn record_str(&mut self, field: &tracing::field::Field, value: &str) {\n        self.record_field(field, value);\n    }\n\n    #[inline]\n    fn record_debug(&mut self, field: &tracing::field::Field, value: &dyn std::fmt::Debug) {\n        self.record_field(field, format_args!(\"{value:?}\"));\n    }\n\n    #[inline]\n    fn record_error(\n        &mut self,\n        field: &tracing::field::Field,\n        value: &(dyn std::error::Error + 'static),\n    ) {\n        self.record_field(field, format_args!(\"{value}\"));\n    }\n}\n\nstruct ExtractedSpanFields {\n    names: &'static [&'static str],\n    values: Vec<Option<RawValue>>,\n}\n\nimpl ExtractedSpanFields {\n    fn new(names: &'static [&'static str]) -> Self {\n        ExtractedSpanFields {\n            names,\n            values: vec![None; names.len()],\n        }\n    }\n\n    fn layer_span(&mut self, fields: &SpanFields) {\n        let SpanFields { values, span_info } = fields;\n\n        // extract the fields\n        for (i, &j) in span_info.extract.iter().enumerate() {\n            let Some(Some(value)) = values.get(j) else {\n                continue;\n            };\n\n            // TODO: replace clone with reference, if possible.\n            self.values[i] = Some(value.clone());\n        }\n    }\n\n    #[inline]\n    fn has_values(&self) -> bool {\n        self.values.iter().any(|v| v.is_some())\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use std::sync::{Arc, Mutex, MutexGuard};\n\n    use assert_json_diff::assert_json_eq;\n    use tracing::info_span;\n\n    use super::*;\n\n    struct TestClock {\n        current_time: Mutex<DateTime<Utc>>,\n    }\n\n    impl Clock for Arc<TestClock> {\n        fn now(&self) -> DateTime<Utc> {\n            *self.current_time.lock().expect(\"poisoned\")\n        }\n    }\n\n    struct VecWriter<'a> {\n        buffer: MutexGuard<'a, Vec<u8>>,\n    }\n\n    impl MakeWriter for Arc<Mutex<Vec<u8>>> {\n        fn make_writer(&self) -> impl io::Write {\n            VecWriter {\n                buffer: self.lock().expect(\"poisoned\"),\n            }\n        }\n    }\n\n    impl io::Write for VecWriter<'_> {\n        fn write(&mut self, buf: &[u8]) -> io::Result<usize> {\n            self.buffer.write(buf)\n        }\n\n        fn flush(&mut self) -> io::Result<()> {\n            Ok(())\n        }\n    }\n\n    #[test]\n    fn test_field_collection() {\n        let clock = Arc::new(TestClock {\n            current_time: Mutex::new(Utc::now()),\n        });\n        let buffer = Arc::new(Mutex::new(Vec::new()));\n        let log_layer = JsonLoggingLayer {\n            clock: clock.clone(),\n            skipped_field_indices: papaya::HashMap::default(),\n            span_info: papaya::HashMap::default(),\n            callsite_name_ids: papaya::HashMap::default(),\n            writer: buffer.clone(),\n            extract_fields: &[\"x\"],\n        };\n\n        let registry = tracing_subscriber::Registry::default().with(log_layer);\n\n        tracing::subscriber::with_default(registry, || {\n            info_span!(\"some_span\", x = 24).in_scope(|| {\n                info_span!(\"some_other_span\", y = 30).in_scope(|| {\n                    info_span!(\"some_span\", x = 40, x = 41, x = 42).in_scope(|| {\n                        tracing::error!(\n                            a = 1,\n                            a = 2,\n                            a = 3,\n                            message = \"explicit message field\",\n                            \"implicit message field\"\n                        );\n                    });\n                });\n            });\n        });\n\n        let buffer = Arc::try_unwrap(buffer)\n            .expect(\"no other reference\")\n            .into_inner()\n            .expect(\"poisoned\");\n        let actual: serde_json::Value = serde_json::from_slice(&buffer).expect(\"valid JSON\");\n        let expected: serde_json::Value = serde_json::json!(\n            {\n                \"timestamp\": clock.now().to_rfc3339_opts(chrono::SecondsFormat::Micros, true),\n                \"level\": \"ERROR\",\n                \"message\": \"explicit message field\",\n                \"fields\": {\n                    \"a\": 3,\n                },\n                \"spans\": {\n                    \"some_span\":{\n                        \"x\": 24,\n                    },\n                    \"some_other_span\": {\n                        \"y\": 30,\n                    },\n                    \"some_span#1\": {\n                        \"x\": 42,\n                    },\n                },\n                \"extract\": {\n                    \"x\": 42,\n                },\n                \"src\": actual.as_object().unwrap().get(\"src\").unwrap().as_str().unwrap(),\n                \"target\": \"proxy::logging::tests\",\n                \"process_id\": actual.as_object().unwrap().get(\"process_id\").unwrap().as_number().unwrap(),\n                \"thread_id\": actual.as_object().unwrap().get(\"thread_id\").unwrap().as_number().unwrap(),\n                \"thread_name\": \"logging::tests::test_field_collection\",\n            }\n        );\n\n        assert_json_eq!(actual, expected);\n    }\n}\n"
  },
  {
    "path": "proxy/src/metrics.rs",
    "content": "use std::sync::{Arc, OnceLock};\n\nuse lasso::ThreadedRodeo;\nuse measured::label::{\n    FixedCardinalitySet, LabelGroupSet, LabelGroupVisitor, LabelName, LabelSet, LabelValue,\n    StaticLabelSet,\n};\nuse measured::metric::group::Encoding;\nuse measured::metric::histogram::Thresholds;\nuse measured::metric::name::MetricName;\nuse measured::{\n    Counter, CounterVec, FixedCardinalityLabel, Gauge, GaugeVec, Histogram, HistogramVec,\n    LabelGroup, MetricGroup,\n};\nuse metrics::{CounterPairAssoc, CounterPairVec, HyperLogLogVec, InfoMetric};\nuse tokio::time::{self, Instant};\n\nuse crate::control_plane::messages::ColdStartInfo;\nuse crate::error::ErrorKind;\n\n#[derive(MetricGroup)]\n#[metric(new())]\npub struct Metrics {\n    #[metric(namespace = \"proxy\")]\n    #[metric(init = ProxyMetrics::new())]\n    pub proxy: ProxyMetrics,\n\n    #[metric(namespace = \"wake_compute_lock\")]\n    pub wake_compute_lock: ApiLockMetrics,\n\n    #[metric(namespace = \"service\")]\n    pub service: ServiceMetrics,\n\n    #[metric(namespace = \"cache\")]\n    pub cache: CacheMetrics,\n}\n\nimpl Metrics {\n    #[track_caller]\n    pub fn get() -> &'static Self {\n        static SELF: OnceLock<Metrics> = OnceLock::new();\n\n        SELF.get_or_init(|| {\n            let mut metrics = Metrics::new();\n\n            metrics.proxy.errors_total.init_all_dense();\n            metrics.proxy.redis_errors_total.init_all_dense();\n            metrics.proxy.redis_events_count.init_all_dense();\n            metrics.proxy.retries_metric.init_all_dense();\n            metrics.proxy.connection_failures_total.init_all_dense();\n\n            metrics\n        })\n    }\n}\n\n#[derive(MetricGroup)]\n#[metric(new())]\npub struct ProxyMetrics {\n    #[metric(flatten)]\n    pub db_connections: CounterPairVec<NumDbConnectionsGauge>,\n    #[metric(flatten)]\n    pub client_connections: CounterPairVec<NumClientConnectionsGauge>,\n    #[metric(flatten)]\n    pub connection_requests: CounterPairVec<NumConnectionRequestsGauge>,\n    #[metric(flatten)]\n    pub http_endpoint_pools: HttpEndpointPools,\n    #[metric(flatten)]\n    pub cancel_channel_size: CounterPairVec<CancelChannelSizeGauge>,\n\n    /// Time it took for proxy to establish a connection to the compute endpoint.\n    // largest bucket = 2^16 * 0.5ms = 32s\n    #[metric(metadata = Thresholds::exponential_buckets(0.0005, 2.0))]\n    pub compute_connection_latency_seconds: HistogramVec<ComputeConnectionLatencySet, 16>,\n\n    /// Time it took for proxy to receive a response from control plane.\n    #[metric(\n        // largest bucket = 2^16 * 0.2ms = 13s\n        metadata = Thresholds::exponential_buckets(0.0002, 2.0),\n    )]\n    pub console_request_latency: HistogramVec<ConsoleRequestSet, 16>,\n\n    /// Size of the HTTP request body lengths.\n    // smallest bucket = 16 bytes\n    // largest bucket = 4^12 * 16 bytes = 256MB\n    #[metric(metadata = Thresholds::exponential_buckets(16.0, 4.0))]\n    pub http_conn_content_length_bytes: HistogramVec<StaticLabelSet<HttpDirection>, 12>,\n\n    /// Time it takes to reclaim unused connection pools.\n    #[metric(metadata = Thresholds::exponential_buckets(1e-6, 2.0))]\n    pub http_pool_reclaimation_lag_seconds: Histogram<16>,\n\n    /// Number of opened connections to a database.\n    pub http_pool_opened_connections: Gauge,\n\n    /// Number of allowed ips\n    #[metric(metadata = Thresholds::with_buckets([0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 10.0, 20.0, 50.0, 100.0]))]\n    pub allowed_ips_number: Histogram<10>,\n\n    /// Number of allowed VPC endpoints IDs\n    #[metric(metadata = Thresholds::with_buckets([0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 10.0, 20.0, 50.0, 100.0]))]\n    pub allowed_vpc_endpoint_ids: Histogram<10>,\n\n    /// Number of connections, by the method we used to determine the endpoint.\n    pub accepted_connections_by_sni: CounterVec<SniSet>,\n\n    /// Number of connection failures (per kind).\n    pub connection_failures_total: CounterVec<StaticLabelSet<ConnectionFailureKind>>,\n\n    /// Number of wake-up failures (per kind).\n    pub connection_failures_breakdown: CounterVec<ConnectionFailuresBreakdownSet>,\n\n    /// Number of bytes sent/received between all clients and backends.\n    pub io_bytes: CounterVec<StaticLabelSet<Direction>>,\n\n    /// Number of IO errors while logging.\n    pub logging_errors_count: Counter,\n\n    /// Number of errors by a given classification.\n    pub errors_total: CounterVec<StaticLabelSet<crate::error::ErrorKind>>,\n\n    /// Number of cancellation requests (per found/not_found).\n    pub cancellation_requests_total: CounterVec<CancellationRequestSet>,\n\n    /// Number of errors by a given classification\n    pub redis_errors_total: CounterVec<RedisErrorsSet>,\n\n    /// Number of TLS handshake failures\n    pub tls_handshake_failures: Counter,\n\n    /// Number of SHA 256 rounds executed.\n    pub sha_rounds: Counter,\n\n    /// HLL approximate cardinality of endpoints that are connecting\n    pub connecting_endpoints: HyperLogLogVec<StaticLabelSet<Protocol>, 32>,\n\n    /// Number of endpoints affected by errors of a given classification\n    pub endpoints_affected_by_errors: HyperLogLogVec<StaticLabelSet<crate::error::ErrorKind>, 32>,\n\n    /// Number of retries (per outcome, per retry_type).\n    #[metric(metadata = Thresholds::with_buckets([0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]))]\n    pub retries_metric: HistogramVec<RetriesMetricSet, 9>,\n\n    /// Number of events consumed from redis (per event type).\n    pub redis_events_count: CounterVec<StaticLabelSet<RedisEventsCount>>,\n\n    #[metric(namespace = \"connect_compute_lock\")]\n    pub connect_compute_lock: ApiLockMetrics,\n\n    #[metric(namespace = \"scram_pool\")]\n    pub scram_pool: OnceLockWrapper<Arc<ThreadPoolMetrics>>,\n}\n\n/// A Wrapper over [`OnceLock`] to implement [`MetricGroup`].\npub struct OnceLockWrapper<T>(pub OnceLock<T>);\n\nimpl<T> Default for OnceLockWrapper<T> {\n    fn default() -> Self {\n        Self(OnceLock::new())\n    }\n}\n\nimpl<Enc: Encoding, T: MetricGroup<Enc>> MetricGroup<Enc> for OnceLockWrapper<T> {\n    fn collect_group_into(&self, enc: &mut Enc) -> Result<(), Enc::Err> {\n        if let Some(inner) = self.0.get() {\n            inner.collect_group_into(enc)?;\n        }\n        Ok(())\n    }\n}\n\n#[derive(MetricGroup)]\n#[metric(new())]\npub struct ApiLockMetrics {\n    /// Number of semaphores registered in this api lock\n    pub semaphores_registered: Counter,\n    /// Number of semaphores unregistered in this api lock\n    pub semaphores_unregistered: Counter,\n    /// Time it takes to reclaim unused semaphores in the api lock\n    #[metric(metadata = Thresholds::exponential_buckets(1e-6, 2.0))]\n    pub reclamation_lag_seconds: Histogram<16>,\n    /// Time it takes to acquire a semaphore lock\n    #[metric(metadata = Thresholds::exponential_buckets(1e-4, 2.0))]\n    pub semaphore_acquire_seconds: Histogram<16>,\n}\n\nimpl Default for ApiLockMetrics {\n    fn default() -> Self {\n        Self::new()\n    }\n}\n\n#[derive(FixedCardinalityLabel, Copy, Clone)]\n#[label(singleton = \"direction\")]\npub enum HttpDirection {\n    Request,\n    Response,\n}\n\n#[derive(FixedCardinalityLabel, Copy, Clone)]\n#[label(singleton = \"direction\")]\npub enum Direction {\n    Tx,\n    Rx,\n}\n\n#[derive(FixedCardinalityLabel, Clone, Copy, Debug)]\n#[label(singleton = \"protocol\")]\npub enum Protocol {\n    Http,\n    Ws,\n    Tcp,\n    SniRouter,\n}\n\nimpl Protocol {\n    pub fn as_str(self) -> &'static str {\n        match self {\n            Protocol::Http => \"http\",\n            Protocol::Ws => \"ws\",\n            Protocol::Tcp => \"tcp\",\n            Protocol::SniRouter => \"sni_router\",\n        }\n    }\n}\n\nimpl std::fmt::Display for Protocol {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        f.write_str(self.as_str())\n    }\n}\n\n#[derive(FixedCardinalityLabel, Copy, Clone)]\npub enum Bool {\n    True,\n    False,\n}\n\n#[derive(LabelGroup)]\n#[label(set = ConsoleRequestSet)]\npub struct ConsoleRequest<'a> {\n    #[label(dynamic_with = ThreadedRodeo, default)]\n    pub request: &'a str,\n}\n\n#[derive(MetricGroup, Default)]\npub struct HttpEndpointPools {\n    /// Number of endpoints we have registered pools for\n    pub http_pool_endpoints_registered_total: Counter,\n    /// Number of endpoints we have unregistered pools for\n    pub http_pool_endpoints_unregistered_total: Counter,\n}\n\npub struct HttpEndpointPoolsGuard<'a> {\n    dec: &'a Counter,\n}\n\nimpl Drop for HttpEndpointPoolsGuard<'_> {\n    fn drop(&mut self) {\n        self.dec.inc();\n    }\n}\n\nimpl HttpEndpointPools {\n    pub fn guard(&self) -> HttpEndpointPoolsGuard<'_> {\n        self.http_pool_endpoints_registered_total.inc();\n        HttpEndpointPoolsGuard {\n            dec: &self.http_pool_endpoints_unregistered_total,\n        }\n    }\n}\npub struct NumDbConnectionsGauge;\nimpl CounterPairAssoc for NumDbConnectionsGauge {\n    const INC_NAME: &'static MetricName = MetricName::from_str(\"opened_db_connections_total\");\n    const DEC_NAME: &'static MetricName = MetricName::from_str(\"closed_db_connections_total\");\n    const INC_HELP: &'static str = \"Number of opened connections to a database.\";\n    const DEC_HELP: &'static str = \"Number of closed connections to a database.\";\n    type LabelGroupSet = StaticLabelSet<Protocol>;\n}\npub type NumDbConnectionsGuard<'a> = metrics::MeasuredCounterPairGuard<'a, NumDbConnectionsGauge>;\n\npub struct NumClientConnectionsGauge;\nimpl CounterPairAssoc for NumClientConnectionsGauge {\n    const INC_NAME: &'static MetricName = MetricName::from_str(\"opened_client_connections_total\");\n    const DEC_NAME: &'static MetricName = MetricName::from_str(\"closed_client_connections_total\");\n    const INC_HELP: &'static str = \"Number of opened connections from a client.\";\n    const DEC_HELP: &'static str = \"Number of closed connections from a client.\";\n    type LabelGroupSet = StaticLabelSet<Protocol>;\n}\npub type NumClientConnectionsGuard<'a> =\n    metrics::MeasuredCounterPairGuard<'a, NumClientConnectionsGauge>;\n\npub struct NumConnectionRequestsGauge;\nimpl CounterPairAssoc for NumConnectionRequestsGauge {\n    const INC_NAME: &'static MetricName = MetricName::from_str(\"accepted_connections_total\");\n    const DEC_NAME: &'static MetricName = MetricName::from_str(\"closed_connections_total\");\n    const INC_HELP: &'static str = \"Number of client connections accepted.\";\n    const DEC_HELP: &'static str = \"Number of client connections closed.\";\n    type LabelGroupSet = StaticLabelSet<Protocol>;\n}\npub type NumConnectionRequestsGuard<'a> =\n    metrics::MeasuredCounterPairGuard<'a, NumConnectionRequestsGauge>;\n\npub struct CancelChannelSizeGauge;\nimpl CounterPairAssoc for CancelChannelSizeGauge {\n    const INC_NAME: &'static MetricName = MetricName::from_str(\"opened_msgs_cancel_channel_total\");\n    const DEC_NAME: &'static MetricName = MetricName::from_str(\"closed_msgs_cancel_channel_total\");\n    const INC_HELP: &'static str = \"Number of processing messages in the cancellation channel.\";\n    const DEC_HELP: &'static str = \"Number of closed messages in the cancellation channel.\";\n    type LabelGroupSet = StaticLabelSet<RedisMsgKind>;\n}\npub type CancelChannelSizeGuard<'a> = metrics::MeasuredCounterPairGuard<'a, CancelChannelSizeGauge>;\n\n#[derive(LabelGroup)]\n#[label(set = ComputeConnectionLatencySet)]\npub struct ComputeConnectionLatencyGroup {\n    protocol: Protocol,\n    cold_start_info: ColdStartInfo,\n    outcome: ConnectOutcome,\n    excluded: LatencyExclusions,\n}\n\n#[derive(FixedCardinalityLabel, Copy, Clone)]\npub enum LatencyExclusions {\n    Client,\n    ClientAndCplane,\n    ClientCplaneCompute,\n    ClientCplaneComputeRetry,\n}\n\n#[derive(LabelGroup)]\n#[label(set = SniSet)]\npub struct SniGroup {\n    pub protocol: Protocol,\n    pub kind: SniKind,\n}\n\n#[derive(FixedCardinalityLabel, Copy, Clone)]\npub enum SniKind {\n    /// Domain name based routing. SNI for libpq/websockets. Host for HTTP\n    Sni,\n    /// Metadata based routing. `options` for libpq/websockets. Header for HTTP\n    NoSni,\n    /// Metadata based routing, using the password field.\n    PasswordHack,\n}\n\n#[derive(FixedCardinalityLabel, Copy, Clone)]\n#[label(singleton = \"kind\")]\npub enum ConnectionFailureKind {\n    ComputeCached,\n    ComputeUncached,\n}\n\n#[derive(LabelGroup)]\n#[label(set = ConnectionFailuresBreakdownSet)]\npub struct ConnectionFailuresBreakdownGroup {\n    pub kind: ErrorKind,\n    pub retry: Bool,\n}\n\n#[derive(LabelGroup, Copy, Clone)]\n#[label(set = RedisErrorsSet)]\npub struct RedisErrors<'a> {\n    #[label(dynamic_with = ThreadedRodeo, default)]\n    pub channel: &'a str,\n}\n\n#[derive(FixedCardinalityLabel, Copy, Clone)]\npub enum CancellationOutcome {\n    NotFound,\n    Found,\n    RateLimitExceeded,\n}\n\n#[derive(LabelGroup)]\n#[label(set = CancellationRequestSet)]\npub struct CancellationRequest {\n    pub kind: CancellationOutcome,\n}\n\n#[derive(Clone, Copy)]\npub enum Waiting {\n    Cplane,\n    Client,\n    Compute,\n    RetryTimeout,\n}\n\n#[derive(FixedCardinalityLabel, Copy, Clone)]\n#[label(singleton = \"kind\")]\n#[allow(clippy::enum_variant_names)]\npub enum RedisMsgKind {\n    Set,\n    Get,\n    Expire,\n    HGet,\n}\n\n#[derive(Default, Clone)]\npub struct LatencyAccumulated {\n    pub cplane: time::Duration,\n    pub client: time::Duration,\n    pub compute: time::Duration,\n    pub retry: time::Duration,\n}\n\nimpl std::fmt::Display for LatencyAccumulated {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        write!(\n            f,\n            \"client: {}, cplane: {}, compute: {}, retry: {}\",\n            self.client.as_micros(),\n            self.cplane.as_micros(),\n            self.compute.as_micros(),\n            self.retry.as_micros()\n        )\n    }\n}\n\npub struct LatencyTimer {\n    // time since the stopwatch was started\n    start: time::Instant,\n    // time since the stopwatch was stopped\n    stop: Option<time::Instant>,\n    // accumulated time on the stopwatch\n    accumulated: LatencyAccumulated,\n    // label data\n    protocol: Protocol,\n    cold_start_info: ColdStartInfo,\n    outcome: ConnectOutcome,\n\n    skip_reporting: bool,\n}\n\nimpl LatencyTimer {\n    pub fn new(protocol: Protocol) -> Self {\n        Self {\n            start: time::Instant::now(),\n            stop: None,\n            accumulated: LatencyAccumulated::default(),\n            protocol,\n            cold_start_info: ColdStartInfo::Unknown,\n            // assume failed unless otherwise specified\n            outcome: ConnectOutcome::Failed,\n            skip_reporting: false,\n        }\n    }\n\n    pub(crate) fn noop(protocol: Protocol) -> Self {\n        Self {\n            start: time::Instant::now(),\n            stop: None,\n            accumulated: LatencyAccumulated::default(),\n            protocol,\n            cold_start_info: ColdStartInfo::Unknown,\n            // assume failed unless otherwise specified\n            outcome: ConnectOutcome::Failed,\n            skip_reporting: true,\n        }\n    }\n\n    pub fn unpause(&mut self, start: Instant, waiting_for: Waiting) {\n        let dur = start.elapsed();\n        match waiting_for {\n            Waiting::Cplane => self.accumulated.cplane += dur,\n            Waiting::Client => self.accumulated.client += dur,\n            Waiting::Compute => self.accumulated.compute += dur,\n            Waiting::RetryTimeout => self.accumulated.retry += dur,\n        }\n    }\n\n    pub fn cold_start_info(&mut self, cold_start_info: ColdStartInfo) {\n        self.cold_start_info = cold_start_info;\n    }\n\n    pub fn success(&mut self) {\n        // stop the stopwatch and record the time that we have accumulated\n        self.stop = Some(time::Instant::now());\n\n        // success\n        self.outcome = ConnectOutcome::Success;\n    }\n\n    pub fn accumulated(&self) -> LatencyAccumulated {\n        self.accumulated.clone()\n    }\n}\n\n#[derive(FixedCardinalityLabel, Clone, Copy, Debug)]\npub enum ConnectOutcome {\n    Success,\n    Failed,\n}\n\nimpl Drop for LatencyTimer {\n    fn drop(&mut self) {\n        if self.skip_reporting {\n            return;\n        }\n\n        let duration = self\n            .stop\n            .unwrap_or_else(time::Instant::now)\n            .duration_since(self.start);\n\n        let metric = &Metrics::get().proxy.compute_connection_latency_seconds;\n\n        // Excluding client communication from the accumulated time.\n        metric.observe(\n            ComputeConnectionLatencyGroup {\n                protocol: self.protocol,\n                cold_start_info: self.cold_start_info,\n                outcome: self.outcome,\n                excluded: LatencyExclusions::Client,\n            },\n            duration\n                .saturating_sub(self.accumulated.client)\n                .as_secs_f64(),\n        );\n\n        // Exclude client and cplane communication from the accumulated time.\n        let accumulated_total = self.accumulated.client + self.accumulated.cplane;\n        metric.observe(\n            ComputeConnectionLatencyGroup {\n                protocol: self.protocol,\n                cold_start_info: self.cold_start_info,\n                outcome: self.outcome,\n                excluded: LatencyExclusions::ClientAndCplane,\n            },\n            duration.saturating_sub(accumulated_total).as_secs_f64(),\n        );\n\n        // Exclude client, cplane, compute communication from the accumulated time.\n        let accumulated_total =\n            self.accumulated.client + self.accumulated.cplane + self.accumulated.compute;\n        metric.observe(\n            ComputeConnectionLatencyGroup {\n                protocol: self.protocol,\n                cold_start_info: self.cold_start_info,\n                outcome: self.outcome,\n                excluded: LatencyExclusions::ClientCplaneCompute,\n            },\n            duration.saturating_sub(accumulated_total).as_secs_f64(),\n        );\n\n        // Exclude client, cplane, compute, retry communication from the accumulated time.\n        let accumulated_total = self.accumulated.client\n            + self.accumulated.cplane\n            + self.accumulated.compute\n            + self.accumulated.retry;\n        metric.observe(\n            ComputeConnectionLatencyGroup {\n                protocol: self.protocol,\n                cold_start_info: self.cold_start_info,\n                outcome: self.outcome,\n                excluded: LatencyExclusions::ClientCplaneComputeRetry,\n            },\n            duration.saturating_sub(accumulated_total).as_secs_f64(),\n        );\n    }\n}\n\nimpl From<bool> for Bool {\n    fn from(value: bool) -> Self {\n        if value { Bool::True } else { Bool::False }\n    }\n}\n\n#[derive(LabelGroup)]\n#[label(set = RetriesMetricSet)]\npub struct RetriesMetricGroup {\n    pub outcome: ConnectOutcome,\n    pub retry_type: RetryType,\n}\n\n#[derive(FixedCardinalityLabel, Clone, Copy, Debug)]\npub enum RetryType {\n    WakeCompute,\n    ConnectToCompute,\n}\n\n#[derive(FixedCardinalityLabel, Clone, Copy, Debug)]\n#[label(singleton = \"event\")]\npub enum RedisEventsCount {\n    EndpointCreated,\n    BranchCreated,\n    ProjectCreated,\n    CancelSession,\n    InvalidateRole,\n    InvalidateEndpoint,\n    InvalidateProject,\n    InvalidateProjects,\n    InvalidateOrg,\n}\n\npub struct ThreadPoolWorkers(usize);\n#[derive(Copy, Clone)]\npub struct ThreadPoolWorkerId(pub usize);\n\nimpl LabelValue for ThreadPoolWorkerId {\n    fn visit<V: measured::label::LabelVisitor>(&self, v: V) -> V::Output {\n        v.write_int(self.0 as i64)\n    }\n}\n\nimpl LabelGroup for ThreadPoolWorkerId {\n    fn visit_values(&self, v: &mut impl measured::label::LabelGroupVisitor) {\n        v.write_value(LabelName::from_str(\"worker\"), self);\n    }\n}\n\nimpl LabelGroupSet for ThreadPoolWorkers {\n    type Group<'a> = ThreadPoolWorkerId;\n\n    fn cardinality(&self) -> Option<usize> {\n        Some(self.0)\n    }\n\n    fn encode_dense(&self, value: Self::Unique) -> Option<usize> {\n        Some(value)\n    }\n\n    fn decode_dense(&self, value: usize) -> Self::Group<'_> {\n        ThreadPoolWorkerId(value)\n    }\n\n    type Unique = usize;\n\n    fn encode(&self, value: Self::Group<'_>) -> Option<Self::Unique> {\n        Some(value.0)\n    }\n\n    fn decode(&self, value: &Self::Unique) -> Self::Group<'_> {\n        ThreadPoolWorkerId(*value)\n    }\n}\n\nimpl LabelSet for ThreadPoolWorkers {\n    type Value<'a> = ThreadPoolWorkerId;\n\n    fn dynamic_cardinality(&self) -> Option<usize> {\n        Some(self.0)\n    }\n\n    fn encode(&self, value: Self::Value<'_>) -> Option<usize> {\n        (value.0 < self.0).then_some(value.0)\n    }\n\n    fn decode(&self, value: usize) -> Self::Value<'_> {\n        ThreadPoolWorkerId(value)\n    }\n}\n\nimpl FixedCardinalitySet for ThreadPoolWorkers {\n    fn cardinality(&self) -> usize {\n        self.0\n    }\n}\n\n#[derive(MetricGroup)]\n#[metric(new(workers: usize))]\npub struct ThreadPoolMetrics {\n    #[metric(init = CounterVec::with_label_set(ThreadPoolWorkers(workers)))]\n    pub worker_task_turns_total: CounterVec<ThreadPoolWorkers>,\n    #[metric(init = CounterVec::with_label_set(ThreadPoolWorkers(workers)))]\n    pub worker_task_skips_total: CounterVec<ThreadPoolWorkers>,\n}\n\n#[derive(MetricGroup, Default)]\npub struct ServiceMetrics {\n    pub info: InfoMetric<ServiceInfo>,\n}\n\n#[derive(Default)]\npub struct ServiceInfo {\n    pub state: ServiceState,\n}\n\nimpl ServiceInfo {\n    pub const fn running() -> Self {\n        ServiceInfo {\n            state: ServiceState::Running,\n        }\n    }\n\n    pub const fn terminating() -> Self {\n        ServiceInfo {\n            state: ServiceState::Terminating,\n        }\n    }\n}\n\nimpl LabelGroup for ServiceInfo {\n    fn visit_values(&self, v: &mut impl LabelGroupVisitor) {\n        const STATE: &LabelName = LabelName::from_str(\"state\");\n        v.write_value(STATE, &self.state);\n    }\n}\n\n#[derive(FixedCardinalityLabel, Clone, Copy, Debug, Default)]\n#[label(singleton = \"state\")]\npub enum ServiceState {\n    #[default]\n    Init,\n    Running,\n    Terminating,\n}\n\n#[derive(MetricGroup)]\n#[metric(new())]\npub struct CacheMetrics {\n    /// The capacity of the cache\n    pub capacity: GaugeVec<StaticLabelSet<CacheKind>>,\n    /// The total number of entries inserted into the cache\n    pub inserted_total: CounterVec<StaticLabelSet<CacheKind>>,\n    /// The total number of entries removed from the cache\n    pub evicted_total: CounterVec<CacheEvictionSet>,\n    /// The total number of cache requests\n    pub request_total: CounterVec<CacheOutcomeSet>,\n}\n\nimpl Default for CacheMetrics {\n    fn default() -> Self {\n        Self::new()\n    }\n}\n\n#[derive(FixedCardinalityLabel, Clone, Copy, Debug)]\n#[label(singleton = \"cache\")]\npub enum CacheKind {\n    NodeInfo,\n    ProjectInfoEndpoints,\n    ProjectInfoRoles,\n    Schema,\n    Pbkdf2,\n}\n\n#[derive(FixedCardinalityLabel, Clone, Copy, Debug)]\npub enum CacheRemovalCause {\n    Expired,\n    Explicit,\n    Replaced,\n    Size,\n}\n\n#[derive(LabelGroup)]\n#[label(set = CacheEvictionSet)]\npub struct CacheEviction {\n    pub cache: CacheKind,\n    pub cause: CacheRemovalCause,\n}\n\n#[derive(FixedCardinalityLabel, Copy, Clone)]\npub enum CacheOutcome {\n    Hit,\n    Miss,\n}\n\n#[derive(LabelGroup)]\n#[label(set = CacheOutcomeSet)]\npub struct CacheOutcomeGroup {\n    pub cache: CacheKind,\n    pub outcome: CacheOutcome,\n}\n"
  },
  {
    "path": "proxy/src/parse.rs",
    "content": "//! Small parsing helpers.\n\nuse std::ffi::CStr;\n\npub(crate) fn split_cstr(bytes: &[u8]) -> Option<(&CStr, &[u8])> {\n    let cstr = CStr::from_bytes_until_nul(bytes).ok()?;\n    let (_, other) = bytes.split_at(cstr.to_bytes_with_nul().len());\n    Some((cstr, other))\n}\n\n#[cfg(test)]\nmod tests {\n    use super::*;\n\n    #[test]\n    fn test_split_cstr() {\n        assert!(split_cstr(b\"\").is_none());\n        assert!(split_cstr(b\"foo\").is_none());\n\n        let (cstr, rest) = split_cstr(b\"\\0\").expect(\"uh-oh\");\n        assert_eq!(cstr.to_bytes(), b\"\");\n        assert_eq!(rest, b\"\");\n\n        let (cstr, rest) = split_cstr(b\"foo\\0bar\").expect(\"uh-oh\");\n        assert_eq!(cstr.to_bytes(), b\"foo\");\n        assert_eq!(rest, b\"bar\");\n    }\n}\n"
  },
  {
    "path": "proxy/src/pglb/copy_bidirectional.rs",
    "content": "use std::future::poll_fn;\nuse std::io;\nuse std::pin::Pin;\nuse std::task::{Context, Poll, ready};\n\nuse tokio::io::{AsyncRead, AsyncWrite, ReadBuf};\nuse tracing::info;\n\n#[derive(Debug)]\nenum TransferState {\n    Running(CopyBuffer),\n    ShuttingDown(u64),\n    Done(u64),\n}\n\n#[derive(Debug)]\npub(crate) enum ErrorDirection {\n    Read(io::Error),\n    Write(io::Error),\n}\n\nimpl ErrorSource {\n    fn from_client(err: ErrorDirection) -> ErrorSource {\n        match err {\n            ErrorDirection::Read(client) => Self::Client(client),\n            ErrorDirection::Write(compute) => Self::Compute(compute),\n        }\n    }\n    fn from_compute(err: ErrorDirection) -> ErrorSource {\n        match err {\n            ErrorDirection::Write(client) => Self::Client(client),\n            ErrorDirection::Read(compute) => Self::Compute(compute),\n        }\n    }\n}\n\n#[derive(Debug)]\npub enum ErrorSource {\n    Client(io::Error),\n    Compute(io::Error),\n}\n\nfn transfer_one_direction<A, B>(\n    cx: &mut Context<'_>,\n    state: &mut TransferState,\n    r: &mut A,\n    w: &mut B,\n) -> Poll<Result<u64, ErrorDirection>>\nwhere\n    A: AsyncRead + AsyncWrite + Unpin + ?Sized,\n    B: AsyncRead + AsyncWrite + Unpin + ?Sized,\n{\n    let mut r = Pin::new(r);\n    let mut w = Pin::new(w);\n    loop {\n        match state {\n            TransferState::Running(buf) => {\n                let count = ready!(buf.poll_copy(cx, r.as_mut(), w.as_mut()))?;\n                *state = TransferState::ShuttingDown(count);\n            }\n            TransferState::ShuttingDown(count) => {\n                ready!(w.as_mut().poll_shutdown(cx)).map_err(ErrorDirection::Write)?;\n                *state = TransferState::Done(*count);\n            }\n            TransferState::Done(count) => return Poll::Ready(Ok(*count)),\n        }\n    }\n}\n\n#[tracing::instrument(skip_all)]\npub async fn copy_bidirectional_client_compute<Client, Compute>(\n    client: &mut Client,\n    compute: &mut Compute,\n) -> Result<(u64, u64), ErrorSource>\nwhere\n    Client: AsyncRead + AsyncWrite + Unpin + ?Sized,\n    Compute: AsyncRead + AsyncWrite + Unpin + ?Sized,\n{\n    let mut client_to_compute = TransferState::Running(CopyBuffer::new());\n    let mut compute_to_client = TransferState::Running(CopyBuffer::new());\n\n    poll_fn(|cx| {\n        let mut client_to_compute_result =\n            transfer_one_direction(cx, &mut client_to_compute, client, compute)\n                .map_err(ErrorSource::from_client)?;\n        let mut compute_to_client_result =\n            transfer_one_direction(cx, &mut compute_to_client, compute, client)\n                .map_err(ErrorSource::from_compute)?;\n\n        // TODO: 1 info log, with a enum label for close direction.\n\n        // Early termination checks from compute to client.\n        if let TransferState::Done(_) = compute_to_client\n            && let TransferState::Running(buf) = &client_to_compute\n        {\n            info!(\"Compute is done, terminate client\");\n            // Initiate shutdown\n            client_to_compute = TransferState::ShuttingDown(buf.amt);\n            client_to_compute_result =\n                transfer_one_direction(cx, &mut client_to_compute, client, compute)\n                    .map_err(ErrorSource::from_client)?;\n        }\n\n        // Early termination checks from client to compute.\n        if let TransferState::Done(_) = client_to_compute\n            && let TransferState::Running(buf) = &compute_to_client\n        {\n            info!(\"Client is done, terminate compute\");\n            // Initiate shutdown\n            compute_to_client = TransferState::ShuttingDown(buf.amt);\n            compute_to_client_result =\n                transfer_one_direction(cx, &mut compute_to_client, compute, client)\n                    .map_err(ErrorSource::from_compute)?;\n        }\n\n        // It is not a problem if ready! returns early ... (comment remains the same)\n        let client_to_compute = ready!(client_to_compute_result);\n        let compute_to_client = ready!(compute_to_client_result);\n\n        Poll::Ready(Ok((client_to_compute, compute_to_client)))\n    })\n    .await\n}\n\n#[derive(Debug)]\npub(super) struct CopyBuffer {\n    read_done: bool,\n    need_flush: bool,\n    pos: usize,\n    cap: usize,\n    amt: u64,\n    buf: Box<[u8]>,\n}\nconst DEFAULT_BUF_SIZE: usize = 1024;\n\nimpl CopyBuffer {\n    pub(super) fn new() -> Self {\n        Self {\n            read_done: false,\n            need_flush: false,\n            pos: 0,\n            cap: 0,\n            amt: 0,\n            buf: vec![0; DEFAULT_BUF_SIZE].into_boxed_slice(),\n        }\n    }\n\n    fn poll_fill_buf<R>(\n        &mut self,\n        cx: &mut Context<'_>,\n        reader: Pin<&mut R>,\n    ) -> Poll<io::Result<()>>\n    where\n        R: AsyncRead + ?Sized,\n    {\n        let me = &mut *self;\n        let mut buf = ReadBuf::new(&mut me.buf);\n        buf.set_filled(me.cap);\n\n        let res = reader.poll_read(cx, &mut buf);\n        if let Poll::Ready(Ok(())) = res {\n            let filled_len = buf.filled().len();\n            me.read_done = me.cap == filled_len;\n            me.cap = filled_len;\n        }\n        res\n    }\n\n    fn poll_write_buf<R, W>(\n        &mut self,\n        cx: &mut Context<'_>,\n        mut reader: Pin<&mut R>,\n        mut writer: Pin<&mut W>,\n    ) -> Poll<Result<usize, ErrorDirection>>\n    where\n        R: AsyncRead + ?Sized,\n        W: AsyncWrite + ?Sized,\n    {\n        let me = &mut *self;\n        match writer.as_mut().poll_write(cx, &me.buf[me.pos..me.cap]) {\n            Poll::Pending => {\n                // Top up the buffer towards full if we can read a bit more\n                // data - this should improve the chances of a large write\n                if !me.read_done && me.cap < me.buf.len() {\n                    ready!(me.poll_fill_buf(cx, reader.as_mut())).map_err(ErrorDirection::Read)?;\n                }\n                Poll::Pending\n            }\n            res @ Poll::Ready(_) => res.map_err(ErrorDirection::Write),\n        }\n    }\n\n    pub(super) fn poll_copy<R, W>(\n        &mut self,\n        cx: &mut Context<'_>,\n        mut reader: Pin<&mut R>,\n        mut writer: Pin<&mut W>,\n    ) -> Poll<Result<u64, ErrorDirection>>\n    where\n        R: AsyncRead + ?Sized,\n        W: AsyncWrite + ?Sized,\n    {\n        loop {\n            // If there is some space left in our buffer, then we try to read some\n            // data to continue, thus maximizing the chances of a large write.\n            if self.cap < self.buf.len() && !self.read_done {\n                match self.poll_fill_buf(cx, reader.as_mut()) {\n                    Poll::Ready(Ok(())) => (),\n                    Poll::Ready(Err(err)) => return Poll::Ready(Err(ErrorDirection::Read(err))),\n                    Poll::Pending => {\n                        // Ignore pending reads when our buffer is not empty, because\n                        // we can try to write data immediately.\n                        if self.pos == self.cap {\n                            // Try flushing when the reader has no progress to avoid deadlock\n                            // when the reader depends on buffered writer.\n                            if self.need_flush {\n                                ready!(writer.as_mut().poll_flush(cx))\n                                    .map_err(ErrorDirection::Write)?;\n                                self.need_flush = false;\n                            }\n\n                            return Poll::Pending;\n                        }\n                    }\n                }\n            }\n\n            // If our buffer has some data, let's write it out!\n            while self.pos < self.cap {\n                let i = ready!(self.poll_write_buf(cx, reader.as_mut(), writer.as_mut()))?;\n                if i == 0 {\n                    return Poll::Ready(Err(ErrorDirection::Write(io::Error::new(\n                        io::ErrorKind::WriteZero,\n                        \"write zero byte into writer\",\n                    ))));\n                }\n                self.pos += i;\n                self.amt += i as u64;\n                self.need_flush = true;\n            }\n\n            // If pos larger than cap, this loop will never stop.\n            // In particular, user's wrong poll_write implementation returning\n            // incorrect written length may lead to thread blocking.\n            debug_assert!(\n                self.pos <= self.cap,\n                \"writer returned length larger than input slice\"\n            );\n\n            // All data has been written, the buffer can be considered empty again\n            self.pos = 0;\n            self.cap = 0;\n\n            // If we've written all the data and we've seen EOF, flush out the\n            // data and finish the transfer.\n            if self.read_done {\n                ready!(writer.as_mut().poll_flush(cx)).map_err(ErrorDirection::Write)?;\n                return Poll::Ready(Ok(self.amt));\n            }\n        }\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use tokio::io::AsyncWriteExt;\n\n    use super::*;\n\n    #[tokio::test]\n    async fn test_client_to_compute() {\n        let (mut client_client, mut client_proxy) = tokio::io::duplex(8); // Create a mock duplex stream\n        let (mut compute_proxy, mut compute_client) = tokio::io::duplex(32); // Create a mock duplex stream\n\n        // Simulate 'a' finishing while there's still data for 'b'\n        client_client.write_all(b\"hello\").await.unwrap();\n        client_client.shutdown().await.unwrap();\n        compute_client.write_all(b\"Neon\").await.unwrap();\n        compute_client.shutdown().await.unwrap();\n\n        let result = copy_bidirectional_client_compute(&mut client_proxy, &mut compute_proxy)\n            .await\n            .unwrap();\n\n        // Assert correct transferred amounts\n        let (client_to_compute_count, compute_to_client_count) = result;\n        assert_eq!(client_to_compute_count, 5); // 'hello' was transferred\n        assert_eq!(compute_to_client_count, 4); // response only partially transferred or not at all\n    }\n\n    #[tokio::test]\n    async fn test_compute_to_client() {\n        let (mut client_client, mut client_proxy) = tokio::io::duplex(32); // Create a mock duplex stream\n        let (mut compute_proxy, mut compute_client) = tokio::io::duplex(8); // Create a mock duplex stream\n\n        // Simulate 'a' finishing while there's still data for 'b'\n        compute_client.write_all(b\"hello\").await.unwrap();\n        compute_client.shutdown().await.unwrap();\n        client_client\n            .write_all(b\"Neon Serverless Postgres\")\n            .await\n            .unwrap();\n\n        let result = copy_bidirectional_client_compute(&mut client_proxy, &mut compute_proxy)\n            .await\n            .unwrap();\n\n        // Assert correct transferred amounts\n        let (client_to_compute_count, compute_to_client_count) = result;\n        assert_eq!(compute_to_client_count, 5); // 'hello' was transferred\n        assert!(client_to_compute_count <= 8); // response only partially transferred or not at all\n    }\n}\n"
  },
  {
    "path": "proxy/src/pglb/handshake.rs",
    "content": "use futures::{FutureExt, TryFutureExt};\nuse thiserror::Error;\nuse tokio::io::{AsyncRead, AsyncWrite};\nuse tracing::{debug, info, warn};\n\nuse crate::auth::endpoint_sni;\nuse crate::config::TlsConfig;\nuse crate::context::RequestContext;\nuse crate::error::ReportableError;\nuse crate::metrics::Metrics;\nuse crate::pglb::TlsRequired;\nuse crate::pqproto::{\n    BeMessage, CancelKeyData, FeStartupPacket, ProtocolVersion, StartupMessageParams,\n};\nuse crate::stream::{PqStream, Stream, StreamUpgradeError};\nuse crate::tls::PG_ALPN_PROTOCOL;\n\n#[derive(Error, Debug)]\npub(crate) enum HandshakeError {\n    #[error(\"data is sent before server replied with EncryptionResponse\")]\n    EarlyData,\n\n    #[error(\"protocol violation\")]\n    ProtocolViolation,\n\n    #[error(\"{0}\")]\n    StreamUpgradeError(#[from] StreamUpgradeError),\n\n    #[error(\"{0}\")]\n    Io(#[from] std::io::Error),\n\n    #[error(\"{0}\")]\n    ReportedError(#[from] crate::stream::ReportedError),\n}\n\nimpl ReportableError for HandshakeError {\n    fn get_error_kind(&self) -> crate::error::ErrorKind {\n        match self {\n            HandshakeError::EarlyData => crate::error::ErrorKind::User,\n            HandshakeError::ProtocolViolation => crate::error::ErrorKind::User,\n            HandshakeError::StreamUpgradeError(upgrade) => match upgrade {\n                StreamUpgradeError::AlreadyTls => crate::error::ErrorKind::Service,\n                StreamUpgradeError::Io(_) => crate::error::ErrorKind::ClientDisconnect,\n            },\n            HandshakeError::Io(_) => crate::error::ErrorKind::ClientDisconnect,\n            HandshakeError::ReportedError(e) => e.get_error_kind(),\n        }\n    }\n}\n\npub(crate) enum HandshakeData<S> {\n    Startup(PqStream<Stream<S>>, StartupMessageParams),\n    Cancel(CancelKeyData),\n}\n\n/// Establish a (most probably, secure) connection with the client.\n/// For better testing experience, `stream` can be any object satisfying the traits.\n/// It's easier to work with owned `stream` here as we need to upgrade it to TLS;\n/// we also take an extra care of propagating only the select handshake errors to client.\n#[tracing::instrument(skip_all)]\npub(crate) async fn handshake<S: AsyncRead + AsyncWrite + Unpin + Send>(\n    ctx: &RequestContext,\n    stream: S,\n    mut tls: Option<&TlsConfig>,\n    record_handshake_error: bool,\n) -> Result<HandshakeData<S>, HandshakeError> {\n    // Client may try upgrading to each protocol only once\n    let (mut tried_ssl, mut tried_gss) = (false, false);\n\n    const PG_PROTOCOL_EARLIEST: ProtocolVersion = ProtocolVersion::new(3, 0);\n    const PG_PROTOCOL_LATEST: ProtocolVersion = ProtocolVersion::new(3, 0);\n\n    let (mut stream, mut msg) = PqStream::parse_startup(Stream::from_raw(stream)).await?;\n    loop {\n        match msg {\n            FeStartupPacket::SslRequest { direct } => match stream.get_ref() {\n                Stream::Raw { .. } if !tried_ssl => {\n                    tried_ssl = true;\n\n                    if let Some(tls) = tls.take() {\n                        // Upgrade raw stream into a secure TLS-backed stream.\n                        // NOTE: We've consumed `tls`; this fact will be used later.\n\n                        let mut read_buf;\n                        let raw = if let Some(direct) = &direct {\n                            read_buf = &direct[..];\n                            stream.accept_direct_tls()\n                        } else {\n                            read_buf = &[];\n                            stream.accept_tls().await?\n                        };\n\n                        let Stream::Raw { raw } = raw else {\n                            return Err(HandshakeError::StreamUpgradeError(\n                                StreamUpgradeError::AlreadyTls,\n                            ));\n                        };\n\n                        let mut res = Ok(());\n                        let accept = tokio_rustls::TlsAcceptor::from(tls.pg_config.clone())\n                            .accept_with(raw, |session| {\n                                // push the early data to the tls session\n                                while !read_buf.is_empty() {\n                                    match session.read_tls(&mut read_buf) {\n                                        Ok(_) => {}\n                                        Err(e) => {\n                                            res = Err(e);\n                                            break;\n                                        }\n                                    }\n                                }\n                            })\n                            .map_ok(Box::new)\n                            .boxed();\n\n                        res?;\n\n                        if !read_buf.is_empty() {\n                            return Err(HandshakeError::EarlyData);\n                        }\n\n                        let tls_stream = accept.await.inspect_err(|_| {\n                            if record_handshake_error {\n                                Metrics::get().proxy.tls_handshake_failures.inc();\n                            }\n                        })?;\n\n                        let conn_info = tls_stream.get_ref().1;\n\n                        // try parse endpoint\n                        let ep = conn_info\n                            .server_name()\n                            .and_then(|sni| endpoint_sni(sni, &tls.common_names));\n                        if let Some(ep) = ep {\n                            ctx.set_endpoint_id(ep);\n                        }\n\n                        // check the ALPN, if exists, as required.\n                        match conn_info.alpn_protocol() {\n                            None | Some(PG_ALPN_PROTOCOL) => {}\n                            Some(other) => {\n                                let alpn = String::from_utf8_lossy(other);\n                                warn!(%alpn, \"unexpected ALPN\");\n                                return Err(HandshakeError::ProtocolViolation);\n                            }\n                        }\n\n                        let (_, tls_server_end_point) =\n                            tls.cert_resolver.resolve(conn_info.server_name());\n\n                        let tls = Stream::Tls {\n                            tls: tls_stream,\n                            tls_server_end_point,\n                        };\n                        (stream, msg) = PqStream::parse_startup(tls).await?;\n                    } else {\n                        if direct.is_some() {\n                            // client sent us a ClientHello already, we can't do anything with it.\n                            return Err(HandshakeError::ProtocolViolation);\n                        }\n                        msg = stream.reject_encryption().await?;\n                    }\n                }\n                _ => return Err(HandshakeError::ProtocolViolation),\n            },\n            FeStartupPacket::GssEncRequest => match stream.get_ref() {\n                Stream::Raw { .. } if !tried_gss => {\n                    tried_gss = true;\n\n                    // Currently, we don't support GSSAPI\n                    msg = stream.reject_encryption().await?;\n                }\n                _ => return Err(HandshakeError::ProtocolViolation),\n            },\n            FeStartupPacket::StartupMessage { params, version }\n                if PG_PROTOCOL_EARLIEST <= version && version <= PG_PROTOCOL_LATEST =>\n            {\n                // Check that the config has been consumed during upgrade\n                // OR we didn't provide it at all (for dev purposes).\n                if tls.is_some() {\n                    Err(stream.throw_error(TlsRequired, None).await)?;\n                }\n\n                // This log highlights the start of the connection.\n                // This contains useful information for debugging, not logged elsewhere, like role name and endpoint id.\n                info!(\n                    ?version,\n                    ?params,\n                    session_type = \"normal\",\n                    \"successful handshake\"\n                );\n                break Ok(HandshakeData::Startup(stream, params));\n            }\n            // downgrade protocol version\n            FeStartupPacket::StartupMessage { params, version }\n                if version.major() == 3 && version > PG_PROTOCOL_LATEST =>\n            {\n                debug!(?version, \"unsupported minor version\");\n\n                // no protocol extensions are supported.\n                // <https://github.com/postgres/postgres/blob/ca481d3c9ab7bf69ff0c8d71ad3951d407f6a33c/src/backend/tcop/backend_startup.c#L744-L753>\n                let mut unsupported = vec![];\n                let mut supported = StartupMessageParams::default();\n\n                for (k, v) in params.iter() {\n                    if k.starts_with(\"_pq_.\") {\n                        unsupported.push(k);\n                    } else {\n                        supported.insert(k, v);\n                    }\n                }\n\n                stream.write_message(BeMessage::NegotiateProtocolVersion {\n                    version: PG_PROTOCOL_LATEST,\n                    options: &unsupported,\n                });\n                stream.flush().await?;\n\n                info!(\n                    ?version,\n                    ?params,\n                    session_type = \"normal\",\n                    \"successful handshake; unsupported minor version requested\"\n                );\n                break Ok(HandshakeData::Startup(stream, supported));\n            }\n            FeStartupPacket::StartupMessage { version, params } => {\n                warn!(\n                    ?version,\n                    ?params,\n                    session_type = \"normal\",\n                    \"unsuccessful handshake; unsupported version\"\n                );\n                return Err(HandshakeError::ProtocolViolation);\n            }\n            FeStartupPacket::CancelRequest(cancel_key_data) => {\n                info!(session_type = \"cancellation\", \"successful handshake\");\n                break Ok(HandshakeData::Cancel(cancel_key_data));\n            }\n        }\n    }\n}\n"
  },
  {
    "path": "proxy/src/pglb/inprocess.rs",
    "content": "#![allow(dead_code, reason = \"TODO: work in progress\")]\n\nuse std::pin::{Pin, pin};\nuse std::sync::Arc;\nuse std::sync::atomic::{AtomicUsize, Ordering};\nuse std::task::{Context, Poll};\nuse std::{fmt, io};\n\nuse tokio::io::{AsyncRead, AsyncWrite, DuplexStream, ReadBuf};\nuse tokio::sync::mpsc;\n\nconst STREAM_CHANNEL_SIZE: usize = 16;\nconst MAX_STREAM_BUFFER_SIZE: usize = 4096;\n\n#[derive(Debug)]\npub struct Connection {\n    stream_sender: mpsc::Sender<Stream>,\n    stream_receiver: mpsc::Receiver<Stream>,\n    stream_id_counter: Arc<AtomicUsize>,\n}\n\nimpl Connection {\n    pub fn new() -> (Connection, Connection) {\n        let (sender_a, receiver_a) = mpsc::channel(STREAM_CHANNEL_SIZE);\n        let (sender_b, receiver_b) = mpsc::channel(STREAM_CHANNEL_SIZE);\n\n        let stream_id_counter = Arc::new(AtomicUsize::new(1));\n\n        let conn_a = Connection {\n            stream_sender: sender_a,\n            stream_receiver: receiver_b,\n            stream_id_counter: Arc::clone(&stream_id_counter),\n        };\n        let conn_b = Connection {\n            stream_sender: sender_b,\n            stream_receiver: receiver_a,\n            stream_id_counter,\n        };\n\n        (conn_a, conn_b)\n    }\n\n    #[inline]\n    fn next_stream_id(&self) -> StreamId {\n        StreamId(self.stream_id_counter.fetch_add(1, Ordering::Relaxed))\n    }\n\n    #[tracing::instrument(skip_all, fields(stream_id = tracing::field::Empty, err))]\n    pub async fn open_stream(&self) -> io::Result<Stream> {\n        let (local, remote) = tokio::io::duplex(MAX_STREAM_BUFFER_SIZE);\n        let stream_id = self.next_stream_id();\n        tracing::Span::current().record(\"stream_id\", stream_id.0);\n\n        let local = Stream {\n            inner: local,\n            id: stream_id,\n        };\n        let remote = Stream {\n            inner: remote,\n            id: stream_id,\n        };\n\n        self.stream_sender\n            .send(remote)\n            .await\n            .map_err(io::Error::other)?;\n\n        Ok(local)\n    }\n\n    #[tracing::instrument(skip_all, fields(stream_id = tracing::field::Empty, err))]\n    pub async fn accept_stream(&mut self) -> io::Result<Option<Stream>> {\n        Ok(self.stream_receiver.recv().await.inspect(|stream| {\n            tracing::Span::current().record(\"stream_id\", stream.id.0);\n        }))\n    }\n}\n\n#[derive(Copy, Clone, Debug)]\npub struct StreamId(usize);\n\nimpl fmt::Display for StreamId {\n    #[inline]\n    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {\n        write!(f, \"{}\", self.0)\n    }\n}\n\n// TODO: Proper closing. Currently Streams can outlive their Connections.\n// Carry WeakSender and check strong_count?\n#[derive(Debug)]\npub struct Stream {\n    inner: DuplexStream,\n    id: StreamId,\n}\n\nimpl Stream {\n    #[inline]\n    pub fn id(&self) -> StreamId {\n        self.id\n    }\n}\n\nimpl AsyncRead for Stream {\n    #[tracing::instrument(level = \"debug\", skip_all, fields(stream_id = %self.id))]\n    #[inline]\n    fn poll_read(\n        mut self: Pin<&mut Self>,\n        cx: &mut Context<'_>,\n        buf: &mut ReadBuf<'_>,\n    ) -> Poll<io::Result<()>> {\n        pin!(&mut self.inner).poll_read(cx, buf)\n    }\n}\n\nimpl AsyncWrite for Stream {\n    #[tracing::instrument(level = \"debug\", skip_all, fields(stream_id = %self.id))]\n    #[inline]\n    fn poll_write(\n        mut self: Pin<&mut Self>,\n        cx: &mut Context<'_>,\n        buf: &[u8],\n    ) -> Poll<Result<usize, io::Error>> {\n        pin!(&mut self.inner).poll_write(cx, buf)\n    }\n\n    #[tracing::instrument(level = \"debug\", skip_all, fields(stream_id = %self.id))]\n    #[inline]\n    fn poll_flush(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Result<(), io::Error>> {\n        pin!(&mut self.inner).poll_flush(cx)\n    }\n\n    #[tracing::instrument(level = \"debug\", skip_all, fields(stream_id = %self.id))]\n    #[inline]\n    fn poll_shutdown(\n        mut self: Pin<&mut Self>,\n        cx: &mut Context<'_>,\n    ) -> Poll<Result<(), io::Error>> {\n        pin!(&mut self.inner).poll_shutdown(cx)\n    }\n\n    #[tracing::instrument(level = \"debug\", skip_all, fields(stream_id = %self.id))]\n    #[inline]\n    fn poll_write_vectored(\n        mut self: Pin<&mut Self>,\n        cx: &mut Context<'_>,\n        bufs: &[io::IoSlice<'_>],\n    ) -> Poll<Result<usize, io::Error>> {\n        pin!(&mut self.inner).poll_write_vectored(cx, bufs)\n    }\n\n    #[inline]\n    fn is_write_vectored(&self) -> bool {\n        self.inner.is_write_vectored()\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use tokio::io::{AsyncReadExt, AsyncWriteExt};\n\n    use super::*;\n\n    #[tokio::test]\n    async fn test_simple_roundtrip() {\n        let (client, mut server) = Connection::new();\n\n        let server_task = tokio::spawn(async move {\n            while let Some(mut stream) = server.accept_stream().await.unwrap() {\n                tokio::spawn(async move {\n                    let mut buf = [0; 64];\n                    loop {\n                        match stream.read(&mut buf).await.unwrap() {\n                            0 => break,\n                            n => stream.write(&buf[..n]).await.unwrap(),\n                        };\n                    }\n                });\n            }\n        });\n\n        let mut stream = client.open_stream().await.unwrap();\n        stream.write_all(b\"hello!\").await.unwrap();\n        let mut buf = [0; 64];\n        let n = stream.read(&mut buf).await.unwrap();\n        assert_eq!(n, 6);\n        assert_eq!(&buf[..n], b\"hello!\");\n\n        drop(stream);\n        drop(client);\n        server_task.await.unwrap();\n    }\n}\n"
  },
  {
    "path": "proxy/src/pglb/mod.rs",
    "content": "pub mod copy_bidirectional;\npub mod handshake;\npub mod inprocess;\npub mod passthrough;\n\nuse std::sync::Arc;\n\nuse futures::FutureExt;\nuse smol_str::ToSmolStr;\nuse thiserror::Error;\nuse tokio::io::{AsyncRead, AsyncWrite};\nuse tokio_util::sync::CancellationToken;\nuse tracing::{Instrument, debug, error, info, warn};\n\nuse crate::auth;\nuse crate::cancellation::{self, CancellationHandler};\nuse crate::config::{ProxyConfig, ProxyProtocolV2, TlsConfig};\nuse crate::context::RequestContext;\nuse crate::error::{ReportableError, UserFacingError};\nuse crate::metrics::{Metrics, NumClientConnectionsGuard};\npub use crate::pglb::copy_bidirectional::ErrorSource;\nuse crate::pglb::handshake::{HandshakeData, HandshakeError, handshake};\nuse crate::pglb::passthrough::ProxyPassthrough;\nuse crate::protocol2::{ConnectHeader, ConnectionInfo, ConnectionInfoExtra, read_proxy_protocol};\nuse crate::proxy::handle_client;\nuse crate::rate_limiter::EndpointRateLimiter;\nuse crate::stream::Stream;\nuse crate::util::run_until_cancelled;\n\npub const ERR_INSECURE_CONNECTION: &str = \"connection is insecure (try using `sslmode=require`)\";\n\n#[derive(Error, Debug)]\n#[error(\"{ERR_INSECURE_CONNECTION}\")]\npub struct TlsRequired;\n\nimpl ReportableError for TlsRequired {\n    fn get_error_kind(&self) -> crate::error::ErrorKind {\n        crate::error::ErrorKind::User\n    }\n}\n\nimpl UserFacingError for TlsRequired {}\n\npub async fn task_main(\n    config: &'static ProxyConfig,\n    auth_backend: &'static auth::Backend<'static, ()>,\n    listener: tokio::net::TcpListener,\n    cancellation_token: CancellationToken,\n    cancellation_handler: Arc<CancellationHandler>,\n    endpoint_rate_limiter: Arc<EndpointRateLimiter>,\n) -> anyhow::Result<()> {\n    scopeguard::defer! {\n        info!(\"proxy has shut down\");\n    }\n\n    // When set for the server socket, the keepalive setting\n    // will be inherited by all accepted client sockets.\n    socket2::SockRef::from(&listener).set_keepalive(true)?;\n\n    let connections = tokio_util::task::task_tracker::TaskTracker::new();\n    let cancellations = tokio_util::task::task_tracker::TaskTracker::new();\n\n    while let Some(accept_result) =\n        run_until_cancelled(listener.accept(), &cancellation_token).await\n    {\n        let (socket, peer_addr) = accept_result?;\n\n        let conn_gauge = Metrics::get()\n            .proxy\n            .client_connections\n            .guard(crate::metrics::Protocol::Tcp);\n\n        let session_id = uuid::Uuid::new_v4();\n        let cancellation_handler = Arc::clone(&cancellation_handler);\n        let cancellations = cancellations.clone();\n\n        debug!(protocol = \"tcp\", %session_id, \"accepted new TCP connection\");\n        let endpoint_rate_limiter2 = endpoint_rate_limiter.clone();\n\n        connections.spawn(async move {\n            let (socket, conn_info) = match config.proxy_protocol_v2 {\n                ProxyProtocolV2::Required => {\n                    match read_proxy_protocol(socket).await {\n                        Err(e) => {\n                            warn!(\"per-client task finished with an error: {e:#}\");\n                            return;\n                        }\n                        // our load balancers will not send any more data. let's just exit immediately\n                        Ok((_socket, ConnectHeader::Local)) => {\n                            debug!(\"healthcheck received\");\n                            return;\n                        }\n                        Ok((socket, ConnectHeader::Proxy(info))) => (socket, info),\n                    }\n                }\n                // ignore the header - it cannot be confused for a postgres or http connection so will\n                // error later.\n                ProxyProtocolV2::Rejected => (\n                    socket,\n                    ConnectionInfo {\n                        addr: peer_addr,\n                        extra: None,\n                    },\n                ),\n            };\n\n            match socket.set_nodelay(true) {\n                Ok(()) => {}\n                Err(e) => {\n                    error!(\n                        \"per-client task finished with an error: failed to set socket option: {e:#}\"\n                    );\n                    return;\n                }\n            }\n\n            let ctx = RequestContext::new(session_id, conn_info, crate::metrics::Protocol::Tcp);\n\n            let res = handle_connection(\n                config,\n                auth_backend,\n                &ctx,\n                cancellation_handler,\n                socket,\n                ClientMode::Tcp,\n                endpoint_rate_limiter2,\n                conn_gauge,\n                cancellations,\n            )\n            .instrument(ctx.span())\n            .boxed()\n            .await;\n\n            match res {\n                Err(e) => {\n                    ctx.set_error_kind(e.get_error_kind());\n                    warn!(parent: &ctx.span(), \"per-client task finished with an error: {e:#}\");\n                }\n                Ok(None) => {\n                    ctx.set_success();\n                }\n                Ok(Some(p)) => {\n                    ctx.set_success();\n                    let _disconnect = ctx.log_connect();\n                    match p.proxy_pass().await {\n                        Ok(()) => {}\n                        Err(ErrorSource::Client(e)) => {\n                            warn!(\n                                ?session_id,\n                                \"per-client task finished with an IO error from the client: {e:#}\"\n                            );\n                        }\n                        Err(ErrorSource::Compute(e)) => {\n                            error!(\n                                ?session_id,\n                                \"per-client task finished with an IO error from the compute: {e:#}\"\n                            );\n                        }\n                    }\n                }\n            }\n        });\n    }\n\n    connections.close();\n    cancellations.close();\n    drop(listener);\n\n    // Drain connections\n    connections.wait().await;\n    cancellations.wait().await;\n\n    Ok(())\n}\n\npub(crate) enum ClientMode {\n    Tcp,\n    Websockets { hostname: Option<String> },\n}\n\n/// Abstracts the logic of handling TCP vs WS clients\nimpl ClientMode {\n    pub fn allow_cleartext(&self) -> bool {\n        match self {\n            ClientMode::Tcp => false,\n            ClientMode::Websockets { .. } => true,\n        }\n    }\n\n    pub fn hostname<'a, S>(&'a self, s: &'a Stream<S>) -> Option<&'a str> {\n        match self {\n            ClientMode::Tcp => s.sni_hostname(),\n            ClientMode::Websockets { hostname } => hostname.as_deref(),\n        }\n    }\n\n    pub fn handshake_tls<'a>(&self, tls: Option<&'a TlsConfig>) -> Option<&'a TlsConfig> {\n        match self {\n            ClientMode::Tcp => tls,\n            // TLS is None here if using websockets, because the connection is already encrypted.\n            ClientMode::Websockets { .. } => None,\n        }\n    }\n}\n\n#[derive(Debug, Error)]\n// almost all errors should be reported to the user, but there's a few cases where we cannot\n// 1. Cancellation: we are not allowed to tell the client any cancellation statuses for security reasons\n// 2. Handshake: handshake reports errors if it can, otherwise if the handshake fails due to protocol violation,\n//    we cannot be sure the client even understands our error message\n// 3. PrepareClient: The client disconnected, so we can't tell them anyway...\npub(crate) enum ClientRequestError {\n    #[error(\"{0}\")]\n    Cancellation(#[from] cancellation::CancelError),\n    #[error(\"{0}\")]\n    Handshake(#[from] HandshakeError),\n    #[error(\"{0}\")]\n    HandshakeTimeout(#[from] tokio::time::error::Elapsed),\n    #[error(\"{0}\")]\n    PrepareClient(#[from] std::io::Error),\n    #[error(\"{0}\")]\n    ReportedError(#[from] crate::stream::ReportedError),\n}\n\nimpl ReportableError for ClientRequestError {\n    fn get_error_kind(&self) -> crate::error::ErrorKind {\n        match self {\n            ClientRequestError::Cancellation(e) => e.get_error_kind(),\n            ClientRequestError::Handshake(e) => e.get_error_kind(),\n            ClientRequestError::HandshakeTimeout(_) => crate::error::ErrorKind::RateLimit,\n            ClientRequestError::ReportedError(e) => e.get_error_kind(),\n            ClientRequestError::PrepareClient(_) => crate::error::ErrorKind::ClientDisconnect,\n        }\n    }\n}\n\n#[allow(clippy::too_many_arguments)]\npub(crate) async fn handle_connection<S: AsyncRead + AsyncWrite + Unpin + Send>(\n    config: &'static ProxyConfig,\n    auth_backend: &'static auth::Backend<'static, ()>,\n    ctx: &RequestContext,\n    cancellation_handler: Arc<CancellationHandler>,\n    client: S,\n    mode: ClientMode,\n    endpoint_rate_limiter: Arc<EndpointRateLimiter>,\n    conn_gauge: NumClientConnectionsGuard<'static>,\n    cancellations: tokio_util::task::task_tracker::TaskTracker,\n) -> Result<Option<ProxyPassthrough<S>>, ClientRequestError> {\n    debug!(\n        protocol = %ctx.protocol(),\n        \"handling interactive connection from client\"\n    );\n\n    let metrics = &Metrics::get().proxy;\n    let proto = ctx.protocol();\n    let request_gauge = metrics.connection_requests.guard(proto);\n\n    let tls = config.tls_config.load();\n    let tls = tls.as_deref();\n\n    let record_handshake_error = !ctx.has_private_peer_addr();\n    let pause = ctx.latency_timer_pause(crate::metrics::Waiting::Client);\n    let do_handshake = handshake(ctx, client, mode.handshake_tls(tls), record_handshake_error);\n\n    let (mut client, params) = match tokio::time::timeout(config.handshake_timeout, do_handshake)\n        .await??\n    {\n        HandshakeData::Startup(client, params) => (client, params),\n        HandshakeData::Cancel(cancel_key_data) => {\n            // spawn a task to cancel the session, but don't wait for it\n            cancellations.spawn({\n                let cancellation_handler_clone = Arc::clone(&cancellation_handler);\n                let ctx = ctx.clone();\n                let cancel_span = tracing::span!(parent: None, tracing::Level::INFO, \"cancel_session\", session_id = ?ctx.session_id());\n                cancel_span.follows_from(tracing::Span::current());\n                async move {\n                    cancellation_handler_clone\n                        .cancel_session(\n                            cancel_key_data,\n                            ctx,\n                            config.authentication_config.ip_allowlist_check_enabled,\n                            config.authentication_config.is_vpc_acccess_proxy,\n                            auth_backend.get_api(),\n                        )\n                        .await\n                        .inspect_err(|e | debug!(error = ?e, \"cancel_session failed\")).ok();\n                }.instrument(cancel_span)\n            });\n\n            return Ok(None);\n        }\n    };\n    drop(pause);\n\n    ctx.set_db_options(params.clone());\n\n    let common_names = tls.map(|tls| &tls.common_names);\n\n    let (node, cancel_on_shutdown) = handle_client(\n        config,\n        auth_backend,\n        ctx,\n        cancellation_handler,\n        &mut client,\n        &mode,\n        endpoint_rate_limiter,\n        common_names,\n        &params,\n    )\n    .await?;\n\n    let client = client.flush_and_into_inner().await?;\n\n    let private_link_id = match ctx.extra() {\n        Some(ConnectionInfoExtra::Aws { vpce_id }) => Some(vpce_id.clone()),\n        Some(ConnectionInfoExtra::Azure { link_id }) => Some(link_id.to_smolstr()),\n        None => None,\n    };\n\n    Ok(Some(ProxyPassthrough {\n        client,\n        compute: node.stream.into_framed().into_inner(),\n\n        aux: node.aux,\n        private_link_id,\n\n        _cancel_on_shutdown: cancel_on_shutdown,\n\n        _req: request_gauge,\n        _conn: conn_gauge,\n        _db_conn: node.guage,\n    }))\n}\n"
  },
  {
    "path": "proxy/src/pglb/passthrough.rs",
    "content": "use std::convert::Infallible;\n\nuse smol_str::SmolStr;\nuse tokio::io::{AsyncRead, AsyncWrite};\nuse tracing::debug;\nuse utils::measured_stream::MeasuredStream;\n\nuse super::copy_bidirectional::ErrorSource;\nuse crate::compute::MaybeRustlsStream;\nuse crate::control_plane::messages::MetricsAuxInfo;\nuse crate::metrics::{\n    Direction, Metrics, NumClientConnectionsGuard, NumConnectionRequestsGuard,\n    NumDbConnectionsGuard,\n};\nuse crate::stream::Stream;\nuse crate::usage_metrics::{Ids, MetricCounterRecorder, USAGE_METRICS};\n\n/// Forward bytes in both directions (client <-> compute).\n#[tracing::instrument(skip_all)]\npub(crate) async fn proxy_pass(\n    client: impl AsyncRead + AsyncWrite + Unpin,\n    compute: impl AsyncRead + AsyncWrite + Unpin,\n    aux: MetricsAuxInfo,\n    private_link_id: Option<SmolStr>,\n) -> Result<(), ErrorSource> {\n    // we will report ingress at a later date\n    let usage_tx = USAGE_METRICS.register(Ids {\n        endpoint_id: aux.endpoint_id,\n        branch_id: aux.branch_id,\n        private_link_id,\n    });\n\n    let metrics = &Metrics::get().proxy.io_bytes;\n    let m_sent = metrics.with_labels(Direction::Tx);\n    let mut client = MeasuredStream::new(\n        client,\n        |_| {},\n        |cnt| {\n            // Number of bytes we sent to the client (outbound).\n            metrics.get_metric(m_sent).inc_by(cnt as u64);\n            usage_tx.record_egress(cnt as u64);\n        },\n    );\n\n    let m_recv = metrics.with_labels(Direction::Rx);\n    let mut compute = MeasuredStream::new(\n        compute,\n        |_| {},\n        |cnt| {\n            // Number of bytes the client sent to the compute node (inbound).\n            metrics.get_metric(m_recv).inc_by(cnt as u64);\n            usage_tx.record_ingress(cnt as u64);\n        },\n    );\n\n    // Starting from here we only proxy the client's traffic.\n    debug!(\"performing the proxy pass...\");\n    let _ = crate::pglb::copy_bidirectional::copy_bidirectional_client_compute(\n        &mut client,\n        &mut compute,\n    )\n    .await?;\n\n    Ok(())\n}\n\npub(crate) struct ProxyPassthrough<S> {\n    pub(crate) client: Stream<S>,\n    pub(crate) compute: MaybeRustlsStream,\n\n    pub(crate) aux: MetricsAuxInfo,\n    pub(crate) private_link_id: Option<SmolStr>,\n\n    pub(crate) _cancel_on_shutdown: tokio::sync::oneshot::Sender<Infallible>,\n\n    pub(crate) _req: NumConnectionRequestsGuard<'static>,\n    pub(crate) _conn: NumClientConnectionsGuard<'static>,\n    pub(crate) _db_conn: NumDbConnectionsGuard<'static>,\n}\n\nimpl<S: AsyncRead + AsyncWrite + Unpin> ProxyPassthrough<S> {\n    pub(crate) async fn proxy_pass(self) -> Result<(), ErrorSource> {\n        proxy_pass(self.client, self.compute, self.aux, self.private_link_id).await\n    }\n}\n"
  },
  {
    "path": "proxy/src/pqproto.rs",
    "content": "//! Postgres protocol codec\n//!\n//! <https://www.postgresql.org/docs/current/protocol-message-formats.html>\n\nuse std::fmt;\nuse std::io::{self, Cursor};\n\nuse bytes::{Buf, BufMut};\nuse itertools::Itertools;\nuse rand::distr::{Distribution, StandardUniform};\nuse tokio::io::{AsyncRead, AsyncReadExt, AsyncWrite, AsyncWriteExt};\nuse zerocopy::{FromBytes, Immutable, IntoBytes, big_endian};\n\npub type ErrorCode = [u8; 5];\n\npub const FE_PASSWORD_MESSAGE: u8 = b'p';\n\npub const SQLSTATE_INTERNAL_ERROR: [u8; 5] = *b\"XX000\";\n\n/// The protocol version number.\n///\n/// The most significant 16 bits are the major version number (3 for the protocol described here).\n/// The least significant 16 bits are the minor version number (0 for the protocol described here).\n/// <https://www.postgresql.org/docs/current/protocol-message-formats.html#PROTOCOL-MESSAGE-FORMATS-STARTUPMESSAGE>\n#[derive(Clone, Copy, PartialEq, PartialOrd, FromBytes, IntoBytes, Immutable)]\n#[repr(C)]\npub struct ProtocolVersion {\n    major: big_endian::U16,\n    minor: big_endian::U16,\n}\n\nimpl ProtocolVersion {\n    pub const fn new(major: u16, minor: u16) -> Self {\n        Self {\n            major: big_endian::U16::new(major),\n            minor: big_endian::U16::new(minor),\n        }\n    }\n    pub const fn minor(self) -> u16 {\n        self.minor.get()\n    }\n    pub const fn major(self) -> u16 {\n        self.major.get()\n    }\n}\n\nimpl fmt::Debug for ProtocolVersion {\n    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {\n        f.debug_list()\n            .entry(&self.major())\n            .entry(&self.minor())\n            .finish()\n    }\n}\n\n/// <https://github.com/postgres/postgres/blob/ca481d3c9ab7bf69ff0c8d71ad3951d407f6a33c/src/include/libpq/pqcomm.h#L118>\nconst MAX_STARTUP_PACKET_LENGTH: usize = 10000;\nconst RESERVED_INVALID_MAJOR_VERSION: u16 = 1234;\n/// <https://github.com/postgres/postgres/blob/ca481d3c9ab7bf69ff0c8d71ad3951d407f6a33c/src/include/libpq/pqcomm.h#L132>\nconst CANCEL_REQUEST_CODE: ProtocolVersion = ProtocolVersion::new(1234, 5678);\n/// <https://github.com/postgres/postgres/blob/ca481d3c9ab7bf69ff0c8d71ad3951d407f6a33c/src/include/libpq/pqcomm.h#L166>\nconst NEGOTIATE_SSL_CODE: ProtocolVersion = ProtocolVersion::new(1234, 5679);\n/// <https://github.com/postgres/postgres/blob/ca481d3c9ab7bf69ff0c8d71ad3951d407f6a33c/src/include/libpq/pqcomm.h#L167>\nconst NEGOTIATE_GSS_CODE: ProtocolVersion = ProtocolVersion::new(1234, 5680);\n\n/// This first reads the startup message header, is 8 bytes.\n/// The first 4 bytes is a big-endian message length, and the next 4 bytes is a version number.\n///\n/// The length value is inclusive of the header. For example,\n/// an empty message will always have length 8.\n#[derive(Clone, Copy, FromBytes, IntoBytes, Immutable)]\n#[repr(C)]\nstruct StartupHeader {\n    len: big_endian::U32,\n    version: ProtocolVersion,\n}\n\n/// read the type from the stream using zerocopy.\n///\n/// not cancel safe.\nmacro_rules! read {\n    ($s:expr => $t:ty) => {{\n        // cannot be implemented as a function due to lack of const-generic-expr\n        let mut buf = [0; size_of::<$t>()];\n        $s.read_exact(&mut buf).await?;\n        let res: $t = zerocopy::transmute!(buf);\n        res\n    }};\n}\n\n/// Returns true if TLS is supported.\n///\n/// This is not cancel safe.\npub async fn request_tls<S>(stream: &mut S) -> io::Result<bool>\nwhere\n    S: AsyncRead + AsyncWrite + Unpin,\n{\n    let payload = StartupHeader {\n        len: 8.into(),\n        version: NEGOTIATE_SSL_CODE,\n    };\n    stream.write_all(payload.as_bytes()).await?;\n    stream.flush().await?;\n\n    // we expect back either `S` or `N` as a single byte.\n    let mut res = *b\"0\";\n    stream.read_exact(&mut res).await?;\n\n    debug_assert!(\n        res == *b\"S\" || res == *b\"N\",\n        \"unexpected SSL negotiation response: {}\",\n        char::from(res[0]),\n    );\n\n    // S for SSL.\n    Ok(res == *b\"S\")\n}\n\npub async fn read_startup<S>(stream: &mut S) -> io::Result<FeStartupPacket>\nwhere\n    S: AsyncRead + Unpin,\n{\n    let header = read!(stream => StartupHeader);\n\n    // <https://github.com/postgres/postgres/blob/04bcf9e19a4261fe9c7df37c777592c2e10c32a7/src/backend/tcop/backend_startup.c#L378-L382>\n    // First byte indicates standard SSL handshake message\n    // (It can't be a Postgres startup length because in network byte order\n    // that would be a startup packet hundreds of megabytes long)\n    if header.as_bytes()[0] == 0x16 {\n        return Ok(FeStartupPacket::SslRequest {\n            // The bytes we read for the header are actually part of a TLS ClientHello.\n            // In theory, if the ClientHello was < 8 bytes we would fail with EOF before we get here.\n            // In practice though, I see no world where a ClientHello is less than 8 bytes\n            // since it includes ephemeral keys etc.\n            direct: Some(zerocopy::transmute!(header)),\n        });\n    }\n\n    let Some(len) = (header.len.get() as usize).checked_sub(8) else {\n        return Err(io::Error::other(format!(\n            \"invalid startup message length {}, must be at least 8.\",\n            header.len,\n        )));\n    };\n\n    // TODO: add a histogram for startup packet lengths\n    if len > MAX_STARTUP_PACKET_LENGTH {\n        tracing::warn!(\"large startup message detected: {len} bytes\");\n        return Err(io::Error::other(format!(\n            \"invalid startup message length {len}\"\n        )));\n    }\n\n    match header.version {\n        // <https://www.postgresql.org/docs/current/protocol-message-formats.html#PROTOCOL-MESSAGE-FORMATS-CANCELREQUEST>\n        CANCEL_REQUEST_CODE => {\n            if len != 8 {\n                return Err(io::Error::other(\n                    \"CancelRequest message is malformed, backend PID / secret key missing\",\n                ));\n            }\n\n            Ok(FeStartupPacket::CancelRequest(\n                read!(stream => CancelKeyData),\n            ))\n        }\n        // <https://www.postgresql.org/docs/current/protocol-message-formats.html#PROTOCOL-MESSAGE-FORMATS-SSLREQUEST>\n        NEGOTIATE_SSL_CODE => {\n            // Requested upgrade to SSL (aka TLS)\n            Ok(FeStartupPacket::SslRequest { direct: None })\n        }\n        NEGOTIATE_GSS_CODE => {\n            // Requested upgrade to GSSAPI\n            Ok(FeStartupPacket::GssEncRequest)\n        }\n        version if version.major() == RESERVED_INVALID_MAJOR_VERSION => Err(io::Error::other(\n            format!(\"Unrecognized request code {version:?}\"),\n        )),\n        // StartupMessage\n        version => {\n            // The protocol version number is followed by one or more pairs of parameter name and value strings.\n            // A zero byte is required as a terminator after the last name/value pair.\n            // Parameters can appear in any order. user is required, others are optional.\n\n            let mut buf = vec![0; len];\n            stream.read_exact(&mut buf).await?;\n\n            if buf.pop() != Some(b'\\0') {\n                return Err(io::Error::other(\n                    \"StartupMessage params: missing null terminator\",\n                ));\n            }\n\n            // TODO: Don't do this.\n            // There's no guarantee that these messages are utf8,\n            // but they usually happen to be simple ascii.\n            let params = String::from_utf8(buf)\n                .map_err(|_| io::Error::other(\"StartupMessage params: invalid utf-8\"))?;\n\n            Ok(FeStartupPacket::StartupMessage {\n                version,\n                params: StartupMessageParams { params },\n            })\n        }\n    }\n}\n\n/// Read a raw postgres packet, which will respect the max length requested.\n///\n/// This returns the message tag, as well as the message body. The message\n/// body is written into `buf`, and it is otherwise completely overwritten.\n///\n/// This is not cancel safe.\npub async fn read_message<'a, S>(\n    stream: &mut S,\n    buf: &'a mut Vec<u8>,\n    max: u32,\n) -> io::Result<(u8, &'a mut [u8])>\nwhere\n    S: AsyncRead + Unpin,\n{\n    /// This first reads the header, which for regular messages in the 3.0 protocol is 5 bytes.\n    /// The first byte is a message tag, and the next 4 bytes is a big-endian length.\n    ///\n    /// Awkwardly, the length value is inclusive of itself, but not of the tag. For example,\n    /// an empty message will always have length 4.\n    #[derive(Clone, Copy, FromBytes)]\n    #[repr(C)]\n    struct Header {\n        tag: u8,\n        len: big_endian::U32,\n    }\n\n    let header = read!(stream => Header);\n\n    // as described above, the length must be at least 4.\n    let Some(len) = header.len.get().checked_sub(4) else {\n        return Err(io::Error::other(format!(\n            \"invalid startup message length {}, must be at least 4.\",\n            header.len,\n        )));\n    };\n\n    // TODO: add a histogram for message lengths\n\n    // check if the message exceeds our desired max.\n    if len > max {\n        tracing::warn!(\"large postgres message detected: {len} bytes\");\n        return Err(io::Error::other(format!(\"invalid message length {len}\")));\n    }\n\n    // read in our entire message.\n    buf.resize(len as usize, 0);\n    stream.read_exact(buf).await?;\n\n    Ok((header.tag, buf))\n}\n\npub struct WriteBuf(Cursor<Vec<u8>>);\n\nimpl Buf for WriteBuf {\n    #[inline]\n    fn remaining(&self) -> usize {\n        self.0.remaining()\n    }\n\n    #[inline]\n    fn chunk(&self) -> &[u8] {\n        self.0.chunk()\n    }\n\n    #[inline]\n    fn advance(&mut self, cnt: usize) {\n        self.0.advance(cnt);\n    }\n}\n\nimpl WriteBuf {\n    pub const fn new() -> Self {\n        Self(Cursor::new(Vec::new()))\n    }\n\n    /// Use a heuristic to determine if we should shrink the write buffer.\n    #[inline]\n    fn should_shrink(&self) -> bool {\n        let n = self.0.position() as usize;\n        let len = self.0.get_ref().len();\n\n        // the unused space at the front of our buffer is 2x the size of our filled portion.\n        n + n > len\n    }\n\n    /// Shrink the write buffer so that subsequent writes have more spare capacity.\n    #[cold]\n    fn shrink(&mut self) {\n        let n = self.0.position() as usize;\n        let buf = self.0.get_mut();\n\n        // buf repr:\n        // [----unused------|-----filled-----|-----uninit-----]\n        //                  ^ n              ^ buf.len()      ^ buf.capacity()\n        let filled = n..buf.len();\n        let filled_len = filled.len();\n        buf.copy_within(filled, 0);\n        buf.truncate(filled_len);\n        self.0.set_position(0);\n    }\n\n    /// clear the write buffer.\n    pub fn reset(&mut self) {\n        let buf = self.0.get_mut();\n        buf.clear();\n        self.0.set_position(0);\n    }\n\n    /// Shrinks the buffer if efficient to do so, and returns the remaining size.\n    pub fn occupied_len(&mut self) -> usize {\n        if self.should_shrink() {\n            self.shrink();\n        }\n        self.0.get_mut().len()\n    }\n\n    /// Write a raw message to the internal buffer.\n    ///\n    /// The size_hint value is only a hint for reserving space. It's ok if it's incorrect, since\n    /// we calculate the length after the fact.\n    pub fn write_raw(&mut self, size_hint: usize, tag: u8, f: impl FnOnce(&mut Vec<u8>)) {\n        if self.should_shrink() {\n            self.shrink();\n        }\n\n        let buf = self.0.get_mut();\n        buf.reserve(5 + size_hint);\n\n        buf.push(tag);\n        let start = buf.len();\n        buf.extend_from_slice(&[0, 0, 0, 0]);\n\n        f(buf);\n\n        let end = buf.len();\n        let len = (end - start) as u32;\n        buf[start..start + 4].copy_from_slice(&len.to_be_bytes());\n    }\n\n    /// Write an encryption response message.\n    pub fn encryption(&mut self, m: u8) {\n        self.0.get_mut().push(m);\n    }\n\n    pub fn write_error(&mut self, msg: &str, error_code: ErrorCode) {\n        self.shrink();\n\n        // <https://www.postgresql.org/docs/current/protocol-message-formats.html#PROTOCOL-MESSAGE-FORMATS-ERRORRESPONSE>\n        // <https://www.postgresql.org/docs/current/protocol-error-fields.html>\n        // \"SERROR\\0CXXXXX\\0M\\0\\0\".len() == 17\n        self.write_raw(17 + msg.len(), b'E', |buf| {\n            // Severity: ERROR\n            buf.put_slice(b\"SERROR\\0\");\n\n            // Code: error_code\n            buf.put_u8(b'C');\n            buf.put_slice(&error_code);\n            buf.put_u8(0);\n\n            // Message: msg\n            buf.put_u8(b'M');\n            buf.put_slice(msg.as_bytes());\n            buf.put_u8(0);\n\n            // End.\n            buf.put_u8(0);\n        });\n    }\n}\n\n#[derive(Debug)]\npub enum FeStartupPacket {\n    CancelRequest(CancelKeyData),\n    SslRequest {\n        direct: Option<[u8; 8]>,\n    },\n    GssEncRequest,\n    StartupMessage {\n        version: ProtocolVersion,\n        params: StartupMessageParams,\n    },\n}\n\n#[derive(Debug, Clone, Default)]\npub struct StartupMessageParams {\n    pub params: String,\n}\n\nimpl StartupMessageParams {\n    /// Get parameter's value by its name.\n    pub fn get(&self, name: &str) -> Option<&str> {\n        self.iter().find_map(|(k, v)| (k == name).then_some(v))\n    }\n\n    /// Split command-line options according to PostgreSQL's logic,\n    /// taking into account all escape sequences but leaving them as-is.\n    /// [`None`] means that there's no `options` in [`Self`].\n    pub fn options_raw(&self) -> Option<impl Iterator<Item = &str>> {\n        self.get(\"options\").map(Self::parse_options_raw)\n    }\n\n    /// Split command-line options according to PostgreSQL's logic,\n    /// taking into account all escape sequences but leaving them as-is.\n    pub fn parse_options_raw(input: &str) -> impl Iterator<Item = &str> {\n        // See `postgres: pg_split_opts`.\n        let mut last_was_escape = false;\n        input\n            .split(move |c: char| {\n                // We split by non-escaped whitespace symbols.\n                let should_split = c.is_ascii_whitespace() && !last_was_escape;\n                last_was_escape = c == '\\\\' && !last_was_escape;\n                should_split\n            })\n            .filter(|s| !s.is_empty())\n    }\n\n    /// Iterate through key-value pairs in an arbitrary order.\n    pub fn iter(&self) -> impl Iterator<Item = (&str, &str)> {\n        self.params.split_terminator('\\0').tuples()\n    }\n\n    // This function is mostly useful in tests.\n    #[cfg(test)]\n    pub fn new<'a, const N: usize>(pairs: [(&'a str, &'a str); N]) -> Self {\n        let mut b = Self {\n            params: String::new(),\n        };\n        for (k, v) in pairs {\n            b.insert(k, v);\n        }\n        b\n    }\n\n    /// Set parameter's value by its name.\n    /// name and value must not contain a \\0 byte\n    pub fn insert(&mut self, name: &str, value: &str) {\n        self.params.reserve(name.len() + value.len() + 2);\n        self.params.push_str(name);\n        self.params.push('\\0');\n        self.params.push_str(value);\n        self.params.push('\\0');\n    }\n}\n\n/// Cancel keys usually are represented as PID+SecretKey, but to proxy they're just\n/// opaque bytes.\n#[derive(Debug, Hash, PartialEq, Eq, Clone, Copy, FromBytes, IntoBytes, Immutable)]\npub struct CancelKeyData(pub big_endian::U64);\n\npub fn id_to_cancel_key(id: u64) -> CancelKeyData {\n    CancelKeyData(big_endian::U64::new(id))\n}\n\nimpl fmt::Display for CancelKeyData {\n    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {\n        let id = self.0;\n        f.debug_tuple(\"CancelKeyData\")\n            .field(&format_args!(\"{id:x}\"))\n            .finish()\n    }\n}\nimpl Distribution<CancelKeyData> for StandardUniform {\n    fn sample<R: rand::Rng + ?Sized>(&self, rng: &mut R) -> CancelKeyData {\n        id_to_cancel_key(rng.random())\n    }\n}\n\npub enum BeMessage<'a> {\n    AuthenticationOk,\n    AuthenticationSasl(BeAuthenticationSaslMessage<'a>),\n    AuthenticationCleartextPassword,\n    BackendKeyData(CancelKeyData),\n    ParameterStatus {\n        name: &'a [u8],\n        value: &'a [u8],\n    },\n    ReadyForQuery,\n    NoticeResponse(&'a str),\n    NegotiateProtocolVersion {\n        version: ProtocolVersion,\n        options: &'a [&'a str],\n    },\n}\n\n#[derive(Debug)]\npub enum BeAuthenticationSaslMessage<'a> {\n    Methods(&'a [&'a str]),\n    Continue(&'a [u8]),\n    Final(&'a [u8]),\n}\n\nimpl BeMessage<'_> {\n    /// Write the message into an internal buffer\n    pub fn write_message(self, buf: &mut WriteBuf) {\n        match self {\n            // <https://www.postgresql.org/docs/current/protocol-message-formats.html#PROTOCOL-MESSAGE-FORMATS-AUTHENTICATIONCLEARTEXTPASSWORD>\n            BeMessage::AuthenticationOk => {\n                buf.write_raw(1, b'R', |buf| buf.put_i32(0));\n            }\n            // <https://www.postgresql.org/docs/current/protocol-message-formats.html#PROTOCOL-MESSAGE-FORMATS-AUTHENTICATIONCLEARTEXTPASSWORD>\n            BeMessage::AuthenticationCleartextPassword => {\n                buf.write_raw(1, b'R', |buf| buf.put_i32(3));\n            }\n\n            // <https://www.postgresql.org/docs/current/protocol-message-formats.html#PROTOCOL-MESSAGE-FORMATS-AUTHENTICATIONSASL>\n            BeMessage::AuthenticationSasl(BeAuthenticationSaslMessage::Methods(methods)) => {\n                let len: usize = methods.iter().map(|m| m.len() + 1).sum();\n                buf.write_raw(len + 2, b'R', |buf| {\n                    buf.put_i32(10); // Specifies that SASL auth method is used.\n                    for method in methods {\n                        buf.put_slice(method.as_bytes());\n                        buf.put_u8(0);\n                    }\n                    buf.put_u8(0); // zero terminator for the list\n                });\n            }\n            // <https://www.postgresql.org/docs/current/protocol-message-formats.html#PROTOCOL-MESSAGE-FORMATS-AUTHENTICATIONSASL>\n            BeMessage::AuthenticationSasl(BeAuthenticationSaslMessage::Continue(extra)) => {\n                buf.write_raw(extra.len() + 1, b'R', |buf| {\n                    buf.put_i32(11); // Continue SASL auth.\n                    buf.put_slice(extra);\n                });\n            }\n            // <https://www.postgresql.org/docs/current/protocol-message-formats.html#PROTOCOL-MESSAGE-FORMATS-AUTHENTICATIONSASL>\n            BeMessage::AuthenticationSasl(BeAuthenticationSaslMessage::Final(extra)) => {\n                buf.write_raw(extra.len() + 1, b'R', |buf| {\n                    buf.put_i32(12); // Send final SASL message.\n                    buf.put_slice(extra);\n                });\n            }\n\n            // <https://www.postgresql.org/docs/current/protocol-message-formats.html#PROTOCOL-MESSAGE-FORMATS-BACKENDKEYDATA>\n            BeMessage::BackendKeyData(key_data) => {\n                buf.write_raw(8, b'K', |buf| buf.put_slice(key_data.as_bytes()));\n            }\n\n            // <https://www.postgresql.org/docs/current/protocol-message-formats.html#PROTOCOL-MESSAGE-FORMATS-NOTICERESPONSE>\n            // <https://www.postgresql.org/docs/current/protocol-error-fields.html>\n            BeMessage::NoticeResponse(msg) => {\n                // 'N' signalizes NoticeResponse messages\n                buf.write_raw(18 + msg.len(), b'N', |buf| {\n                    // Severity: NOTICE\n                    buf.put_slice(b\"SNOTICE\\0\");\n\n                    // Code: XX000 (ignored for notice, but still required)\n                    buf.put_slice(b\"CXX000\\0\");\n\n                    // Message: msg\n                    buf.put_u8(b'M');\n                    buf.put_slice(msg.as_bytes());\n                    buf.put_u8(0);\n\n                    // End notice.\n                    buf.put_u8(0);\n                });\n            }\n\n            // <https://www.postgresql.org/docs/current/protocol-message-formats.html#PROTOCOL-MESSAGE-FORMATS-PARAMETERSTATUS>\n            BeMessage::ParameterStatus { name, value } => {\n                buf.write_raw(name.len() + value.len() + 2, b'S', |buf| {\n                    buf.put_slice(name.as_bytes());\n                    buf.put_u8(0);\n                    buf.put_slice(value.as_bytes());\n                    buf.put_u8(0);\n                });\n            }\n\n            // <https://www.postgresql.org/docs/current/protocol-message-formats.html#PROTOCOL-MESSAGE-FORMATS-NEGOTIATEPROTOCOLVERSION>\n            BeMessage::ReadyForQuery => {\n                buf.write_raw(1, b'Z', |buf| buf.put_u8(b'I'));\n            }\n\n            // <https://www.postgresql.org/docs/current/protocol-message-formats.html#PROTOCOL-MESSAGE-FORMATS-NEGOTIATEPROTOCOLVERSION>\n            BeMessage::NegotiateProtocolVersion { version, options } => {\n                let len: usize = options.iter().map(|o| o.len() + 1).sum();\n                buf.write_raw(8 + len, b'v', |buf| {\n                    buf.put_slice(version.as_bytes());\n                    buf.put_u32(options.len() as u32);\n                    for option in options {\n                        buf.put_slice(option.as_bytes());\n                        buf.put_u8(0);\n                    }\n                });\n            }\n        }\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use std::io::Cursor;\n\n    use tokio::io::{AsyncWriteExt, duplex};\n    use zerocopy::IntoBytes;\n\n    use super::ProtocolVersion;\n    use crate::pqproto::{FeStartupPacket, read_message, read_startup};\n\n    #[tokio::test]\n    async fn reject_large_startup() {\n        // we're going to define a v3.0 startup message with far too many parameters.\n        let mut payload = vec![];\n        // 10001 + 8 bytes.\n        payload.extend_from_slice(&10009_u32.to_be_bytes());\n        payload.extend_from_slice(ProtocolVersion::new(3, 0).as_bytes());\n        payload.resize(10009, b'a');\n\n        let (mut server, mut client) = duplex(128);\n        #[rustfmt::skip]\n        let (server, client) = tokio::join!(\n            async move { read_startup(&mut server).await.unwrap_err() },\n            async move { client.write_all(&payload).await.unwrap_err() },\n        );\n\n        assert_eq!(server.to_string(), \"invalid startup message length 10001\");\n        assert_eq!(client.to_string(), \"broken pipe\");\n    }\n\n    #[tokio::test]\n    async fn reject_large_password() {\n        // we're going to define a password message that is far too long.\n        let mut payload = vec![];\n        payload.push(b'p');\n        payload.extend_from_slice(&517_u32.to_be_bytes());\n        payload.resize(518, b'a');\n\n        let (mut server, mut client) = duplex(128);\n        #[rustfmt::skip]\n        let (server, client) = tokio::join!(\n            async move { read_message(&mut server, &mut vec![], 512).await.unwrap_err() },\n            async move { client.write_all(&payload).await.unwrap_err() },\n        );\n\n        assert_eq!(server.to_string(), \"invalid message length 513\");\n        assert_eq!(client.to_string(), \"broken pipe\");\n    }\n\n    #[tokio::test]\n    async fn read_startup_message() {\n        let mut payload = vec![];\n        payload.extend_from_slice(&17_u32.to_be_bytes());\n        payload.extend_from_slice(ProtocolVersion::new(3, 0).as_bytes());\n        payload.extend_from_slice(b\"abc\\0def\\0\\0\");\n\n        let startup = read_startup(&mut Cursor::new(&payload)).await.unwrap();\n        let FeStartupPacket::StartupMessage { version, params } = startup else {\n            panic!(\"unexpected startup message: {startup:?}\");\n        };\n\n        assert_eq!(version.major(), 3);\n        assert_eq!(version.minor(), 0);\n        assert_eq!(params.params, \"abc\\0def\\0\");\n    }\n\n    #[tokio::test]\n    async fn read_ssl_message() {\n        let mut payload = vec![];\n        payload.extend_from_slice(&8_u32.to_be_bytes());\n        payload.extend_from_slice(ProtocolVersion::new(1234, 5679).as_bytes());\n\n        let startup = read_startup(&mut Cursor::new(&payload)).await.unwrap();\n        let FeStartupPacket::SslRequest { direct: None } = startup else {\n            panic!(\"unexpected startup message: {startup:?}\");\n        };\n    }\n\n    #[tokio::test]\n    async fn read_tls_message() {\n        // sample client hello taken from <https://tls13.xargs.org/#client-hello>\n        let client_hello = [\n            0x16, 0x03, 0x01, 0x00, 0xf8, 0x01, 0x00, 0x00, 0xf4, 0x03, 0x03, 0x00, 0x01, 0x02,\n            0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10,\n            0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e,\n            0x1f, 0x20, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb,\n            0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9,\n            0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, 0x00, 0x08, 0x13, 0x02, 0x13, 0x03, 0x13, 0x01,\n            0x00, 0xff, 0x01, 0x00, 0x00, 0xa3, 0x00, 0x00, 0x00, 0x18, 0x00, 0x16, 0x00, 0x00,\n            0x13, 0x65, 0x78, 0x61, 0x6d, 0x70, 0x6c, 0x65, 0x2e, 0x75, 0x6c, 0x66, 0x68, 0x65,\n            0x69, 0x6d, 0x2e, 0x6e, 0x65, 0x74, 0x00, 0x0b, 0x00, 0x04, 0x03, 0x00, 0x01, 0x02,\n            0x00, 0x0a, 0x00, 0x16, 0x00, 0x14, 0x00, 0x1d, 0x00, 0x17, 0x00, 0x1e, 0x00, 0x19,\n            0x00, 0x18, 0x01, 0x00, 0x01, 0x01, 0x01, 0x02, 0x01, 0x03, 0x01, 0x04, 0x00, 0x23,\n            0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x1e,\n            0x00, 0x1c, 0x04, 0x03, 0x05, 0x03, 0x06, 0x03, 0x08, 0x07, 0x08, 0x08, 0x08, 0x09,\n            0x08, 0x0a, 0x08, 0x0b, 0x08, 0x04, 0x08, 0x05, 0x08, 0x06, 0x04, 0x01, 0x05, 0x01,\n            0x06, 0x01, 0x00, 0x2b, 0x00, 0x03, 0x02, 0x03, 0x04, 0x00, 0x2d, 0x00, 0x02, 0x01,\n            0x01, 0x00, 0x33, 0x00, 0x26, 0x00, 0x24, 0x00, 0x1d, 0x00, 0x20, 0x35, 0x80, 0x72,\n            0xd6, 0x36, 0x58, 0x80, 0xd1, 0xae, 0xea, 0x32, 0x9a, 0xdf, 0x91, 0x21, 0x38, 0x38,\n            0x51, 0xed, 0x21, 0xa2, 0x8e, 0x3b, 0x75, 0xe9, 0x65, 0xd0, 0xd2, 0xcd, 0x16, 0x62,\n            0x54,\n        ];\n\n        let mut cursor = Cursor::new(&client_hello);\n\n        let startup = read_startup(&mut cursor).await.unwrap();\n        let FeStartupPacket::SslRequest {\n            direct: Some(prefix),\n        } = startup\n        else {\n            panic!(\"unexpected startup message: {startup:?}\");\n        };\n\n        // check that no data is lost.\n        assert_eq!(prefix, [0x16, 0x03, 0x01, 0x00, 0xf8, 0x01, 0x00, 0x00]);\n        assert_eq!(cursor.position(), 8);\n    }\n\n    #[tokio::test]\n    async fn read_message_success() {\n        let query = b\"Q\\0\\0\\0\\x0cSELECT 1Q\\0\\0\\0\\x0cSELECT 2\";\n        let mut cursor = Cursor::new(&query);\n\n        let mut buf = vec![];\n        let (tag, message) = read_message(&mut cursor, &mut buf, 100).await.unwrap();\n        assert_eq!(tag, b'Q');\n        assert_eq!(message, b\"SELECT 1\");\n\n        let (tag, message) = read_message(&mut cursor, &mut buf, 100).await.unwrap();\n        assert_eq!(tag, b'Q');\n        assert_eq!(message, b\"SELECT 2\");\n    }\n}\n"
  },
  {
    "path": "proxy/src/protocol2.rs",
    "content": "//! Proxy Protocol V2 implementation\n//! Compatible with <https://www.haproxy.org/download/3.1/doc/proxy-protocol.txt>\n\nuse core::fmt;\nuse std::io;\nuse std::net::{Ipv4Addr, Ipv6Addr, SocketAddr};\n\nuse bytes::Buf;\nuse smol_str::SmolStr;\nuse strum_macros::FromRepr;\nuse tokio::io::{AsyncRead, AsyncReadExt};\nuse zerocopy::{FromBytes, Immutable, KnownLayout, Unaligned, network_endian};\n\n/// Proxy Protocol Version 2 Header\nconst SIGNATURE: [u8; 12] = [\n    0x0D, 0x0A, 0x0D, 0x0A, 0x00, 0x0D, 0x0A, 0x51, 0x55, 0x49, 0x54, 0x0A,\n];\n\nconst LOCAL_V2: u8 = 0x20;\nconst PROXY_V2: u8 = 0x21;\n\nconst TCP_OVER_IPV4: u8 = 0x11;\nconst UDP_OVER_IPV4: u8 = 0x12;\nconst TCP_OVER_IPV6: u8 = 0x21;\nconst UDP_OVER_IPV6: u8 = 0x22;\n\n#[derive(PartialEq, Eq, Clone, Debug)]\npub struct ConnectionInfo {\n    pub addr: SocketAddr,\n    pub extra: Option<ConnectionInfoExtra>,\n}\n\n#[derive(PartialEq, Eq, Clone, Debug)]\npub enum ConnectHeader {\n    Local,\n    Proxy(ConnectionInfo),\n}\n\nimpl fmt::Display for ConnectionInfo {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        match &self.extra {\n            None => self.addr.ip().fmt(f),\n            Some(ConnectionInfoExtra::Aws { vpce_id }) => {\n                write!(f, \"vpce_id[{vpce_id:?}]:addr[{}]\", self.addr.ip())\n            }\n            Some(ConnectionInfoExtra::Azure { link_id }) => {\n                write!(f, \"link_id[{link_id}]:addr[{}]\", self.addr.ip())\n            }\n        }\n    }\n}\n\n#[derive(PartialEq, Eq, Clone, Debug)]\npub enum ConnectionInfoExtra {\n    Aws { vpce_id: SmolStr },\n    Azure { link_id: u32 },\n}\n\npub(crate) async fn read_proxy_protocol<T: AsyncRead + Unpin>(\n    mut read: T,\n) -> std::io::Result<(T, ConnectHeader)> {\n    let mut header = [0; size_of::<ProxyProtocolV2Header>()];\n    read.read_exact(&mut header).await?;\n    let header: ProxyProtocolV2Header = zerocopy::transmute!(header);\n    if header.signature != SIGNATURE {\n        return Err(std::io::Error::other(\"invalid proxy protocol header\"));\n    }\n\n    let mut payload = vec![0; usize::from(header.len.get())];\n    read.read_exact(&mut payload).await?;\n\n    let res = process_proxy_payload(header, &payload)?;\n    Ok((read, res))\n}\n\nfn process_proxy_payload(\n    header: ProxyProtocolV2Header,\n    mut payload: &[u8],\n) -> std::io::Result<ConnectHeader> {\n    match header.version_and_command {\n        // the connection was established on purpose by the proxy\n        // without being relayed. The connection endpoints are the sender and the\n        // receiver. Such connections exist when the proxy sends health-checks to the\n        // server. The receiver must accept this connection as valid and must use the\n        // real connection endpoints and discard the protocol block including the\n        // family which is ignored.\n        LOCAL_V2 => return Ok(ConnectHeader::Local),\n        // the connection was established on behalf of another node,\n        // and reflects the original connection endpoints. The receiver must then use\n        // the information provided in the protocol block to get original the address.\n        PROXY_V2 => {}\n        // other values are unassigned and must not be emitted by senders. Receivers\n        // must drop connections presenting unexpected values here.\n        _ => {\n            return Err(io::Error::other(format!(\n                \"invalid proxy protocol command 0x{:02X}. expected local (0x20) or proxy (0x21)\",\n                header.version_and_command\n            )));\n        }\n    }\n\n    let size_err =\n        \"invalid proxy protocol length. payload not large enough to fit requested IP addresses\";\n    let addr = match header.protocol_and_family {\n        TCP_OVER_IPV4 | UDP_OVER_IPV4 => {\n            let addr = payload\n                .try_get::<ProxyProtocolV2HeaderV4>()\n                .ok_or_else(|| io::Error::other(size_err))?;\n\n            SocketAddr::from((addr.src_addr.get(), addr.src_port.get()))\n        }\n        TCP_OVER_IPV6 | UDP_OVER_IPV6 => {\n            let addr = payload\n                .try_get::<ProxyProtocolV2HeaderV6>()\n                .ok_or_else(|| io::Error::other(size_err))?;\n\n            SocketAddr::from((addr.src_addr.get(), addr.src_port.get()))\n        }\n        // unspecified or unix stream. ignore the addresses\n        _ => {\n            return Err(io::Error::other(\n                \"invalid proxy protocol address family/transport protocol.\",\n            ));\n        }\n    };\n\n    let mut extra = None;\n\n    while let Some(mut tlv) = read_tlv(&mut payload) {\n        match Pp2Kind::from_repr(tlv.kind) {\n            Some(Pp2Kind::Aws) => {\n                if tlv.value.is_empty() {\n                    tracing::warn!(\"invalid aws tlv: no subtype\");\n                }\n                let subtype = tlv.value.get_u8();\n                match Pp2AwsType::from_repr(subtype) {\n                    Some(Pp2AwsType::VpceId) => match std::str::from_utf8(tlv.value) {\n                        Ok(s) => {\n                            extra = Some(ConnectionInfoExtra::Aws { vpce_id: s.into() });\n                        }\n                        Err(e) => {\n                            tracing::warn!(\"invalid aws vpce id: {e}\");\n                        }\n                    },\n                    None => {\n                        tracing::warn!(\"unknown aws tlv: subtype={subtype}\");\n                    }\n                }\n            }\n            Some(Pp2Kind::Azure) => {\n                if tlv.value.is_empty() {\n                    tracing::warn!(\"invalid azure tlv: no subtype\");\n                }\n                let subtype = tlv.value.get_u8();\n                match Pp2AzureType::from_repr(subtype) {\n                    Some(Pp2AzureType::PrivateEndpointLinkId) => {\n                        if tlv.value.len() != 4 {\n                            tracing::warn!(\"invalid azure link_id: {:?}\", tlv.value);\n                        }\n                        extra = Some(ConnectionInfoExtra::Azure {\n                            link_id: tlv.value.get_u32_le(),\n                        });\n                    }\n                    None => {\n                        tracing::warn!(\"unknown azure tlv: subtype={subtype}\");\n                    }\n                }\n            }\n            Some(kind) => {\n                tracing::debug!(\"unused tlv[{kind:?}]: {:?}\", tlv.value);\n            }\n            None => {\n                tracing::debug!(\"unknown tlv: {tlv:?}\");\n            }\n        }\n    }\n\n    Ok(ConnectHeader::Proxy(ConnectionInfo { addr, extra }))\n}\n\n#[derive(FromRepr, Debug, Copy, Clone)]\n#[repr(u8)]\nenum Pp2Kind {\n    // The following are defined by https://www.haproxy.org/download/3.1/doc/proxy-protocol.txt\n    // we don't use these but it would be interesting to know what's available\n    Alpn = 0x01,\n    Authority = 0x02,\n    Crc32C = 0x03,\n    Noop = 0x04,\n    UniqueId = 0x05,\n    Ssl = 0x20,\n    NetNs = 0x30,\n\n    /// <https://docs.aws.amazon.com/elasticloadbalancing/latest/network/edit-target-group-attributes.html#proxy-protocol>\n    Aws = 0xEA,\n\n    /// <https://learn.microsoft.com/en-us/azure/private-link/private-link-service-overview#getting-connection-information-using-tcp-proxy-v2>\n    Azure = 0xEE,\n}\n\n#[derive(FromRepr, Debug, Copy, Clone)]\n#[repr(u8)]\nenum Pp2AwsType {\n    VpceId = 0x01,\n}\n\n#[derive(FromRepr, Debug, Copy, Clone)]\n#[repr(u8)]\nenum Pp2AzureType {\n    PrivateEndpointLinkId = 0x01,\n}\n\n#[derive(Debug)]\nstruct Tlv<'a> {\n    kind: u8,\n    value: &'a [u8],\n}\n\nfn read_tlv<'a>(b: &mut &'a [u8]) -> Option<Tlv<'a>> {\n    let tlv_header = b.try_get::<TlvHeader>()?;\n    let len = usize::from(tlv_header.len.get());\n    Some(Tlv {\n        kind: tlv_header.kind,\n        value: b.split_off(..len)?,\n    })\n}\n\ntrait BufExt: Sized {\n    fn try_get<T: FromBytes>(&mut self) -> Option<T>;\n}\nimpl BufExt for &[u8] {\n    fn try_get<T: FromBytes>(&mut self) -> Option<T> {\n        let (res, rest) = T::read_from_prefix(self).ok()?;\n        *self = rest;\n        Some(res)\n    }\n}\n\n#[derive(FromBytes, KnownLayout, Immutable, Unaligned, Copy, Clone)]\n#[repr(C, packed)]\nstruct ProxyProtocolV2Header {\n    signature: [u8; 12],\n    version_and_command: u8,\n    protocol_and_family: u8,\n    len: network_endian::U16,\n}\n\n#[derive(FromBytes, KnownLayout, Immutable, Unaligned, Copy, Clone)]\n#[repr(C, packed)]\nstruct ProxyProtocolV2HeaderV4 {\n    src_addr: NetworkEndianIpv4,\n    dst_addr: NetworkEndianIpv4,\n    src_port: network_endian::U16,\n    dst_port: network_endian::U16,\n}\n\n#[derive(FromBytes, KnownLayout, Immutable, Unaligned, Copy, Clone)]\n#[repr(C, packed)]\nstruct ProxyProtocolV2HeaderV6 {\n    src_addr: NetworkEndianIpv6,\n    dst_addr: NetworkEndianIpv6,\n    src_port: network_endian::U16,\n    dst_port: network_endian::U16,\n}\n\n#[derive(FromBytes, KnownLayout, Immutable, Unaligned, Copy, Clone)]\n#[repr(C, packed)]\nstruct TlvHeader {\n    kind: u8,\n    len: network_endian::U16,\n}\n\n#[derive(FromBytes, KnownLayout, Immutable, Unaligned, Copy, Clone)]\n#[repr(transparent)]\nstruct NetworkEndianIpv4(network_endian::U32);\nimpl NetworkEndianIpv4 {\n    #[inline]\n    fn get(self) -> Ipv4Addr {\n        Ipv4Addr::from_bits(self.0.get())\n    }\n}\n\n#[derive(FromBytes, KnownLayout, Immutable, Unaligned, Copy, Clone)]\n#[repr(transparent)]\nstruct NetworkEndianIpv6(network_endian::U128);\nimpl NetworkEndianIpv6 {\n    #[inline]\n    fn get(self) -> Ipv6Addr {\n        Ipv6Addr::from_bits(self.0.get())\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use tokio::io::AsyncReadExt;\n\n    use crate::protocol2::{\n        ConnectHeader, LOCAL_V2, PROXY_V2, TCP_OVER_IPV4, UDP_OVER_IPV6, read_proxy_protocol,\n    };\n\n    #[tokio::test]\n    async fn test_ipv4() {\n        let header = super::SIGNATURE\n            // Proxy command, IPV4 | TCP\n            .chain([(2 << 4) | 1, (1 << 4) | 1].as_slice())\n            // 12 + 3 bytes\n            .chain([0, 15].as_slice())\n            // src ip\n            .chain([127, 0, 0, 1].as_slice())\n            // dst ip\n            .chain([192, 168, 0, 1].as_slice())\n            // src port\n            .chain([255, 255].as_slice())\n            // dst port\n            .chain([1, 1].as_slice())\n            // TLV\n            .chain([1, 2, 3].as_slice());\n\n        let extra_data = [0x55; 256];\n\n        let (mut read, info) = read_proxy_protocol(header.chain(extra_data.as_slice()))\n            .await\n            .unwrap();\n\n        let mut bytes = vec![];\n        read.read_to_end(&mut bytes).await.unwrap();\n\n        assert_eq!(bytes, extra_data);\n\n        let ConnectHeader::Proxy(info) = info else {\n            panic!()\n        };\n        assert_eq!(info.addr, ([127, 0, 0, 1], 65535).into());\n    }\n\n    #[tokio::test]\n    async fn test_ipv6() {\n        let header = super::SIGNATURE\n            // Proxy command, IPV6 | UDP\n            .chain([PROXY_V2, UDP_OVER_IPV6].as_slice())\n            // 36 + 3 bytes\n            .chain([0, 39].as_slice())\n            // src ip\n            .chain([15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0].as_slice())\n            // dst ip\n            .chain([0, 15, 1, 14, 2, 13, 3, 12, 4, 11, 5, 10, 6, 9, 7, 8].as_slice())\n            // src port\n            .chain([1, 1].as_slice())\n            // dst port\n            .chain([255, 255].as_slice())\n            // TLV\n            .chain([1, 2, 3].as_slice());\n\n        let extra_data = [0x55; 256];\n\n        let (mut read, info) = read_proxy_protocol(header.chain(extra_data.as_slice()))\n            .await\n            .unwrap();\n\n        let mut bytes = vec![];\n        read.read_to_end(&mut bytes).await.unwrap();\n\n        assert_eq!(bytes, extra_data);\n\n        let ConnectHeader::Proxy(info) = info else {\n            panic!()\n        };\n        assert_eq!(\n            info.addr,\n            ([15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0], 257).into()\n        );\n    }\n\n    #[tokio::test]\n    #[should_panic = \"invalid proxy protocol header\"]\n    async fn test_invalid() {\n        let data = [0x55; 256];\n\n        read_proxy_protocol(data.as_slice()).await.unwrap();\n    }\n\n    #[tokio::test]\n    #[should_panic = \"early eof\"]\n    async fn test_short() {\n        let data = [0x55; 10];\n\n        read_proxy_protocol(data.as_slice()).await.unwrap();\n    }\n\n    #[tokio::test]\n    async fn test_large_tlv() {\n        let tlv = vec![0x55; 32768];\n        let tlv_len = (tlv.len() as u16).to_be_bytes();\n        let len = (12 + 3 + tlv.len() as u16).to_be_bytes();\n\n        let header = super::SIGNATURE\n            // Proxy command, Inet << 4 | Stream\n            .chain([PROXY_V2, TCP_OVER_IPV4].as_slice())\n            // 12 + 3 bytes\n            .chain(len.as_slice())\n            // src ip\n            .chain([55, 56, 57, 58].as_slice())\n            // dst ip\n            .chain([192, 168, 0, 1].as_slice())\n            // src port\n            .chain([255, 255].as_slice())\n            // dst port\n            .chain([1, 1].as_slice())\n            // TLV\n            .chain([255].as_slice())\n            .chain(tlv_len.as_slice())\n            .chain(tlv.as_slice());\n\n        let extra_data = [0xaa; 256];\n\n        let (mut read, info) = read_proxy_protocol(header.chain(extra_data.as_slice()))\n            .await\n            .unwrap();\n\n        let mut bytes = vec![];\n        read.read_to_end(&mut bytes).await.unwrap();\n\n        assert_eq!(bytes, extra_data);\n\n        let ConnectHeader::Proxy(info) = info else {\n            panic!()\n        };\n        assert_eq!(info.addr, ([55, 56, 57, 58], 65535).into());\n    }\n\n    #[tokio::test]\n    async fn test_local() {\n        let len = 0u16.to_be_bytes();\n        let header = super::SIGNATURE\n            .chain([LOCAL_V2, 0x00].as_slice())\n            .chain(len.as_slice());\n\n        let extra_data = [0xaa; 256];\n\n        let (mut read, info) = read_proxy_protocol(header.chain(extra_data.as_slice()))\n            .await\n            .unwrap();\n\n        let mut bytes = vec![];\n        read.read_to_end(&mut bytes).await.unwrap();\n\n        assert_eq!(bytes, extra_data);\n\n        let ConnectHeader::Local = info else { panic!() };\n    }\n}\n"
  },
  {
    "path": "proxy/src/proxy/connect_auth.rs",
    "content": "use thiserror::Error;\n\nuse crate::auth::Backend;\nuse crate::auth::backend::ComputeUserInfo;\nuse crate::cache::common::Cache;\nuse crate::compute::{AuthInfo, ComputeConnection, ConnectionError, PostgresError};\nuse crate::config::ProxyConfig;\nuse crate::context::RequestContext;\nuse crate::control_plane::client::ControlPlaneClient;\nuse crate::error::{ReportableError, UserFacingError};\nuse crate::proxy::connect_compute::{TlsNegotiation, connect_to_compute};\nuse crate::proxy::retry::ShouldRetryWakeCompute;\n\n#[derive(Debug, Error)]\npub enum AuthError {\n    #[error(transparent)]\n    Auth(#[from] PostgresError),\n    #[error(transparent)]\n    Connect(#[from] ConnectionError),\n}\n\nimpl UserFacingError for AuthError {\n    fn to_string_client(&self) -> String {\n        match self {\n            AuthError::Auth(postgres_error) => postgres_error.to_string_client(),\n            AuthError::Connect(connection_error) => connection_error.to_string_client(),\n        }\n    }\n}\n\nimpl ReportableError for AuthError {\n    fn get_error_kind(&self) -> crate::error::ErrorKind {\n        match self {\n            AuthError::Auth(postgres_error) => postgres_error.get_error_kind(),\n            AuthError::Connect(connection_error) => connection_error.get_error_kind(),\n        }\n    }\n}\n\n/// Try to connect to the compute node, retrying if necessary.\n#[tracing::instrument(skip_all)]\npub(crate) async fn connect_to_compute_and_auth(\n    ctx: &RequestContext,\n    config: &ProxyConfig,\n    user_info: &Backend<'_, ComputeUserInfo>,\n    auth_info: AuthInfo,\n    tls: TlsNegotiation,\n) -> Result<ComputeConnection, AuthError> {\n    let mut attempt = 0;\n\n    // NOTE: This is messy, but should hopefully be detangled with PGLB.\n    // We wanted to separate the concerns of **connect** to compute (a PGLB operation),\n    // from **authenticate** to compute (a NeonKeeper operation).\n    //\n    // This unfortunately removed retry handling for one error case where\n    // the compute was cached, and we connected, but the compute cache was actually stale\n    // and is associated with the wrong endpoint. We detect this when the **authentication** fails.\n    // As such, we retry once here if the `authenticate` function fails and the error is valid to retry.\n    loop {\n        attempt += 1;\n        let mut node = connect_to_compute(ctx, config, user_info, tls).await?;\n\n        let res = auth_info.authenticate(ctx, &mut node).await;\n        match res {\n            Ok(()) => return Ok(node),\n            Err(e) => {\n                if attempt < 2\n                    && let Backend::ControlPlane(cplane, user_info) = user_info\n                    && let ControlPlaneClient::ProxyV1(cplane_proxy_v1) = &**cplane\n                    && e.should_retry_wake_compute()\n                {\n                    tracing::warn!(error = ?e, \"retrying wake compute\");\n                    let key = user_info.endpoint_cache_key();\n                    cplane_proxy_v1.caches.node_info.invalidate(&key);\n                    continue;\n                }\n\n                return Err(e)?;\n            }\n        }\n    }\n}\n"
  },
  {
    "path": "proxy/src/proxy/connect_compute.rs",
    "content": "use tokio::time;\nuse tracing::{debug, info, warn};\n\nuse crate::cache::node_info::CachedNodeInfo;\nuse crate::compute::{self, COULD_NOT_CONNECT, ComputeConnection};\nuse crate::config::{ComputeConfig, ProxyConfig, RetryConfig};\nuse crate::context::RequestContext;\nuse crate::control_plane::NodeInfo;\nuse crate::control_plane::locks::ApiLocks;\nuse crate::metrics::{\n    ConnectOutcome, ConnectionFailureKind, Metrics, RetriesMetricGroup, RetryType,\n};\nuse crate::proxy::retry::{ShouldRetryWakeCompute, retry_after, should_retry};\nuse crate::proxy::wake_compute::{WakeComputeBackend, wake_compute};\nuse crate::types::Host;\n\n/// If we couldn't connect, a cached connection info might be to blame\n/// (e.g. the compute node's address might've changed at the wrong time).\n/// Invalidate the cache entry (if any) to prevent subsequent errors.\n#[tracing::instrument(skip_all)]\npub(crate) fn invalidate_cache(node_info: CachedNodeInfo) -> NodeInfo {\n    let is_cached = node_info.cached();\n    if is_cached {\n        warn!(\"invalidating stalled compute node info cache entry\");\n    }\n    let label = if is_cached {\n        ConnectionFailureKind::ComputeCached\n    } else {\n        ConnectionFailureKind::ComputeUncached\n    };\n    Metrics::get().proxy.connection_failures_total.inc(label);\n\n    node_info.invalidate()\n}\n\npub(crate) trait ConnectMechanism {\n    type Connection;\n    async fn connect_once(\n        &self,\n        ctx: &RequestContext,\n        node_info: &CachedNodeInfo,\n        config: &ComputeConfig,\n    ) -> Result<Self::Connection, compute::ConnectionError>;\n}\n\nstruct TcpMechanism<'a> {\n    /// connect_to_compute concurrency lock\n    locks: &'a ApiLocks<Host>,\n    tls: TlsNegotiation,\n}\n\n#[derive(Clone, Copy, PartialEq, Eq, Debug)]\npub enum TlsNegotiation {\n    /// TLS is assumed\n    Direct,\n    /// We must ask for TLS using the postgres SSLRequest message\n    Postgres,\n}\n\nimpl ConnectMechanism for TcpMechanism<'_> {\n    type Connection = ComputeConnection;\n\n    #[tracing::instrument(skip_all, fields(\n        pid = tracing::field::Empty,\n        compute_id = tracing::field::Empty\n    ))]\n    async fn connect_once(\n        &self,\n        ctx: &RequestContext,\n        node_info: &CachedNodeInfo,\n        config: &ComputeConfig,\n    ) -> Result<ComputeConnection, compute::ConnectionError> {\n        let permit = self.locks.get_permit(&node_info.conn_info.host).await?;\n\n        permit.release_result(\n            node_info\n                .conn_info\n                .connect(ctx, &node_info.aux, config, self.tls)\n                .await,\n        )\n    }\n}\n\n/// Try to connect to the compute node, retrying if necessary.\n#[tracing::instrument(skip_all)]\npub(crate) async fn connect_to_compute<B: WakeComputeBackend>(\n    ctx: &RequestContext,\n    config: &ProxyConfig,\n    user_info: &B,\n    tls: TlsNegotiation,\n) -> Result<ComputeConnection, compute::ConnectionError> {\n    connect_to_compute_inner(\n        ctx,\n        &TcpMechanism {\n            locks: &config.connect_compute_locks,\n            tls,\n        },\n        user_info,\n        config.wake_compute_retry_config,\n        &config.connect_to_compute,\n    )\n    .await\n}\n\n/// Try to connect to the compute node, retrying if necessary.\npub(crate) async fn connect_to_compute_inner<M: ConnectMechanism, B: WakeComputeBackend>(\n    ctx: &RequestContext,\n    mechanism: &M,\n    user_info: &B,\n    wake_compute_retry_config: RetryConfig,\n    compute: &ComputeConfig,\n) -> Result<M::Connection, compute::ConnectionError> {\n    let mut num_retries = 0;\n    let node_info =\n        wake_compute(&mut num_retries, ctx, user_info, wake_compute_retry_config).await?;\n\n    // try once\n    let err = match mechanism.connect_once(ctx, &node_info, compute).await {\n        Ok(res) => {\n            ctx.success();\n            Metrics::get().proxy.retries_metric.observe(\n                RetriesMetricGroup {\n                    outcome: ConnectOutcome::Success,\n                    retry_type: RetryType::ConnectToCompute,\n                },\n                num_retries.into(),\n            );\n            return Ok(res);\n        }\n        Err(e) => e,\n    };\n\n    debug!(error = ?err, COULD_NOT_CONNECT);\n\n    let node_info = if !node_info.cached() || !err.should_retry_wake_compute() {\n        // If we just received this from cplane and not from the cache, we shouldn't retry.\n        // Do not need to retrieve a new node_info, just return the old one.\n        if !should_retry(&err, num_retries, compute.retry) {\n            Metrics::get().proxy.retries_metric.observe(\n                RetriesMetricGroup {\n                    outcome: ConnectOutcome::Failed,\n                    retry_type: RetryType::ConnectToCompute,\n                },\n                num_retries.into(),\n            );\n            return Err(err);\n        }\n        node_info\n    } else {\n        // if we failed to connect, it's likely that the compute node was suspended, wake a new compute node\n        debug!(\"compute node's state has likely changed; requesting a wake-up\");\n        invalidate_cache(node_info);\n        // TODO: increment num_retries?\n        wake_compute(&mut num_retries, ctx, user_info, wake_compute_retry_config).await?\n    };\n\n    // now that we have a new node, try connect to it repeatedly.\n    // this can error for a few reasons, for instance:\n    // * DNS connection settings haven't quite propagated yet\n    debug!(\"wake_compute success. attempting to connect\");\n    num_retries = 1;\n    loop {\n        match mechanism.connect_once(ctx, &node_info, compute).await {\n            Ok(res) => {\n                ctx.success();\n                Metrics::get().proxy.retries_metric.observe(\n                    RetriesMetricGroup {\n                        outcome: ConnectOutcome::Success,\n                        retry_type: RetryType::ConnectToCompute,\n                    },\n                    num_retries.into(),\n                );\n                // TODO: is this necessary? We have a metric.\n                info!(?num_retries, \"connected to compute node after\");\n                return Ok(res);\n            }\n            Err(e) => {\n                if !should_retry(&e, num_retries, compute.retry) {\n                    // Don't log an error here, caller will print the error\n                    Metrics::get().proxy.retries_metric.observe(\n                        RetriesMetricGroup {\n                            outcome: ConnectOutcome::Failed,\n                            retry_type: RetryType::ConnectToCompute,\n                        },\n                        num_retries.into(),\n                    );\n                    return Err(e);\n                }\n\n                warn!(error = ?e, num_retries, retriable = true, COULD_NOT_CONNECT);\n            }\n        }\n\n        let wait_duration = retry_after(num_retries, compute.retry);\n        num_retries += 1;\n\n        let pause = ctx.latency_timer_pause(crate::metrics::Waiting::RetryTimeout);\n        time::sleep(wait_duration).await;\n        drop(pause);\n    }\n}\n"
  },
  {
    "path": "proxy/src/proxy/mod.rs",
    "content": "#[cfg(test)]\nmod tests;\n\npub(crate) mod connect_auth;\npub(crate) mod connect_compute;\npub(crate) mod retry;\npub(crate) mod wake_compute;\n\nuse std::collections::HashSet;\nuse std::convert::Infallible;\nuse std::sync::Arc;\n\nuse futures::TryStreamExt;\nuse itertools::Itertools;\nuse once_cell::sync::OnceCell;\nuse postgres_client::RawCancelToken;\nuse postgres_client::connect_raw::StartupStream;\nuse postgres_protocol::message::backend::Message;\nuse regex::Regex;\nuse serde::{Deserialize, Serialize};\nuse smol_str::{SmolStr, format_smolstr};\nuse tokio::io::{AsyncRead, AsyncWrite};\nuse tokio::net::TcpStream;\nuse tokio::sync::oneshot;\nuse tracing::Instrument;\n\nuse crate::cancellation::{CancelClosure, CancellationHandler};\nuse crate::compute::{ComputeConnection, PostgresError, RustlsStream};\nuse crate::config::ProxyConfig;\nuse crate::context::RequestContext;\npub use crate::pglb::copy_bidirectional::{ErrorSource, copy_bidirectional_client_compute};\nuse crate::pglb::{ClientMode, ClientRequestError};\nuse crate::pqproto::{BeMessage, CancelKeyData, StartupMessageParams};\nuse crate::rate_limiter::EndpointRateLimiter;\nuse crate::stream::{PqStream, Stream};\nuse crate::types::EndpointCacheKey;\nuse crate::{auth, compute};\n\n#[allow(clippy::too_many_arguments)]\npub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin + Send>(\n    config: &'static ProxyConfig,\n    auth_backend: &'static auth::Backend<'static, ()>,\n    ctx: &RequestContext,\n    cancellation_handler: Arc<CancellationHandler>,\n    client: &mut PqStream<Stream<S>>,\n    mode: &ClientMode,\n    endpoint_rate_limiter: Arc<EndpointRateLimiter>,\n    common_names: Option<&HashSet<String>>,\n    params: &StartupMessageParams,\n) -> Result<(ComputeConnection, oneshot::Sender<Infallible>), ClientRequestError> {\n    let hostname = mode.hostname(client.get_ref());\n    // Extract credentials which we're going to use for auth.\n    let result = auth_backend\n        .as_ref()\n        .map(|()| auth::ComputeUserInfoMaybeEndpoint::parse(ctx, params, hostname, common_names))\n        .transpose();\n\n    let user_info = match result {\n        Ok(user_info) => user_info,\n        Err(e) => Err(client.throw_error(e, Some(ctx)).await)?,\n    };\n\n    let user = user_info.get_user().to_owned();\n    let user_info = match user_info\n        .authenticate(\n            ctx,\n            client,\n            mode.allow_cleartext(),\n            &config.authentication_config,\n            endpoint_rate_limiter,\n        )\n        .await\n    {\n        Ok(auth_result) => auth_result,\n        Err(e) => {\n            let db = params.get(\"database\");\n            let app = params.get(\"application_name\");\n            let params_span = tracing::info_span!(\"\", ?user, ?db, ?app);\n\n            return Err(client\n                .throw_error(e, Some(ctx))\n                .instrument(params_span)\n                .await)?;\n        }\n    };\n\n    let (cplane, creds) = match user_info {\n        auth::Backend::ControlPlane(cplane, creds) => (cplane, creds),\n        auth::Backend::Local(_) => unreachable!(\"local proxy does not run tcp proxy service\"),\n    };\n    let params_compat = creds.info.options.get(NeonOptions::PARAMS_COMPAT).is_some();\n    let mut auth_info = compute::AuthInfo::with_auth_keys(creds.keys);\n    auth_info.set_startup_params(params, params_compat);\n\n    let backend = auth::Backend::ControlPlane(cplane, creds.info);\n\n    // TODO: callback to pglb\n    let res = connect_auth::connect_to_compute_and_auth(\n        ctx,\n        config,\n        &backend,\n        auth_info,\n        connect_compute::TlsNegotiation::Postgres,\n    )\n    .await;\n\n    let mut node = match res {\n        Ok(node) => node,\n        Err(e) => Err(client.throw_error(e, Some(ctx)).await)?,\n    };\n\n    send_client_greeting(ctx, &config.greetings, client);\n\n    let auth::Backend::ControlPlane(_, user_info) = backend else {\n        unreachable!(\"ensured above\");\n    };\n\n    let session = cancellation_handler.get_key();\n\n    let (process_id, secret_key) =\n        forward_compute_params_to_client(ctx, *session.key(), client, &mut node.stream).await?;\n    let hostname = node.hostname.to_string();\n\n    let session_id = ctx.session_id();\n    let (cancel_on_shutdown, cancel) = oneshot::channel();\n    tokio::spawn(async move {\n        session\n            .maintain_cancel_key(\n                session_id,\n                cancel,\n                &CancelClosure {\n                    socket_addr: node.socket_addr,\n                    cancel_token: RawCancelToken {\n                        ssl_mode: node.ssl_mode,\n                        process_id,\n                        secret_key,\n                    },\n                    hostname,\n                    user_info,\n                },\n                &config.connect_to_compute,\n            )\n            .await;\n    });\n\n    Ok((node, cancel_on_shutdown))\n}\n\n/// Greet the client with any useful information.\npub(crate) fn send_client_greeting(\n    ctx: &RequestContext,\n    greetings: &String,\n    client: &mut PqStream<impl AsyncRead + AsyncWrite + Unpin>,\n) {\n    // Expose session_id to clients if we have a greeting message.\n    if !greetings.is_empty() {\n        let session_msg = format!(\"{}, session_id: {}\", greetings, ctx.session_id());\n        client.write_message(BeMessage::NoticeResponse(session_msg.as_str()));\n    }\n\n    // Forward recorded latencies for probing requests\n    if let Some(testodrome_id) = ctx.get_testodrome_id() {\n        client.write_message(BeMessage::ParameterStatus {\n            name: \"neon.testodrome_id\".as_bytes(),\n            value: testodrome_id.as_bytes(),\n        });\n\n        let latency_measured = ctx.get_proxy_latency();\n\n        client.write_message(BeMessage::ParameterStatus {\n            name: \"neon.cplane_latency\".as_bytes(),\n            value: latency_measured.cplane.as_micros().to_string().as_bytes(),\n        });\n\n        client.write_message(BeMessage::ParameterStatus {\n            name: \"neon.client_latency\".as_bytes(),\n            value: latency_measured.client.as_micros().to_string().as_bytes(),\n        });\n\n        client.write_message(BeMessage::ParameterStatus {\n            name: \"neon.compute_latency\".as_bytes(),\n            value: latency_measured.compute.as_micros().to_string().as_bytes(),\n        });\n\n        client.write_message(BeMessage::ParameterStatus {\n            name: \"neon.retry_latency\".as_bytes(),\n            value: latency_measured.retry.as_micros().to_string().as_bytes(),\n        });\n    }\n}\n\npub(crate) async fn forward_compute_params_to_client(\n    ctx: &RequestContext,\n    cancel_key_data: CancelKeyData,\n    client: &mut PqStream<impl AsyncRead + AsyncWrite + Unpin>,\n    compute: &mut StartupStream<TcpStream, RustlsStream>,\n) -> Result<(i32, i32), ClientRequestError> {\n    let mut process_id = 0;\n    let mut secret_key = 0;\n\n    let err = loop {\n        // if the client buffer is too large, let's write out some bytes now to save some space\n        client.write_if_full().await?;\n\n        let msg = match compute.try_next().await {\n            Ok(msg) => msg,\n            Err(e) => break postgres_client::Error::io(e),\n        };\n\n        match msg {\n            // Send our cancellation key data instead.\n            Some(Message::BackendKeyData(body)) => {\n                client.write_message(BeMessage::BackendKeyData(cancel_key_data));\n                process_id = body.process_id();\n                secret_key = body.secret_key();\n            }\n            // Forward all postgres connection params to the client.\n            Some(Message::ParameterStatus(body)) => {\n                if let Ok(name) = body.name()\n                    && let Ok(value) = body.value()\n                {\n                    client.write_message(BeMessage::ParameterStatus {\n                        name: name.as_bytes(),\n                        value: value.as_bytes(),\n                    });\n                }\n            }\n            // Forward all notices to the client.\n            Some(Message::NoticeResponse(notice)) => {\n                client.write_raw(notice.as_bytes().len(), b'N', |buf| {\n                    buf.extend_from_slice(notice.as_bytes());\n                });\n            }\n            Some(Message::ReadyForQuery(_)) => {\n                client.write_message(BeMessage::ReadyForQuery);\n                return Ok((process_id, secret_key));\n            }\n            Some(Message::ErrorResponse(body)) => break postgres_client::Error::db(body),\n            Some(_) => break postgres_client::Error::unexpected_message(),\n            None => break postgres_client::Error::closed(),\n        }\n    };\n\n    Err(client\n        .throw_error(PostgresError::Postgres(err), Some(ctx))\n        .await)?\n}\n\n#[derive(Debug, Clone, PartialEq, Eq, Default, Serialize, Deserialize)]\npub(crate) struct NeonOptions(Vec<(SmolStr, SmolStr)>);\n\nimpl NeonOptions {\n    // proxy options:\n\n    /// `PARAMS_COMPAT` allows opting in to forwarding all startup parameters from client to compute.\n    pub const PARAMS_COMPAT: &'static str = \"proxy_params_compat\";\n\n    // cplane options:\n\n    /// `LSN` allows provisioning an ephemeral compute with time-travel to the provided LSN.\n    const LSN: &'static str = \"lsn\";\n\n    /// `TIMESTAMP` allows provisioning an ephemeral compute with time-travel to the provided timestamp.\n    const TIMESTAMP: &'static str = \"timestamp\";\n\n    /// `ENDPOINT_TYPE` allows configuring an ephemeral compute to be read_only or read_write.\n    const ENDPOINT_TYPE: &'static str = \"endpoint_type\";\n\n    pub(crate) fn parse_params(params: &StartupMessageParams) -> Self {\n        params\n            .options_raw()\n            .map(Self::parse_from_iter)\n            .unwrap_or_default()\n    }\n\n    pub(crate) fn parse_options_raw(options: &str) -> Self {\n        Self::parse_from_iter(StartupMessageParams::parse_options_raw(options))\n    }\n\n    pub(crate) fn get(&self, key: &str) -> Option<SmolStr> {\n        self.0\n            .iter()\n            .find_map(|(k, v)| (k == key).then_some(v))\n            .cloned()\n    }\n\n    pub(crate) fn is_ephemeral(&self) -> bool {\n        self.0.iter().any(|(k, _)| match &**k {\n            // This is not a cplane option, we know it does not create ephemeral computes.\n            Self::PARAMS_COMPAT => false,\n            Self::LSN => true,\n            Self::TIMESTAMP => true,\n            Self::ENDPOINT_TYPE => true,\n            // err on the side of caution. any cplane options we don't know about\n            // might lead to ephemeral computes.\n            _ => true,\n        })\n    }\n\n    fn parse_from_iter<'a>(options: impl Iterator<Item = &'a str>) -> Self {\n        let mut options = options\n            .filter_map(neon_option)\n            .map(|(k, v)| (k.into(), v.into()))\n            .collect_vec();\n        options.sort();\n        Self(options)\n    }\n\n    pub(crate) fn get_cache_key(&self, prefix: &str) -> EndpointCacheKey {\n        // prefix + format!(\" {k}:{v}\")\n        // kinda jank because SmolStr is immutable\n        std::iter::once(prefix)\n            .chain(self.0.iter().flat_map(|(k, v)| [\" \", &**k, \":\", &**v]))\n            .collect::<SmolStr>()\n            .into()\n    }\n\n    /// <https://swagger.io/docs/specification/serialization/> DeepObject format\n    /// `paramName[prop1]=value1&paramName[prop2]=value2&...`\n    pub(crate) fn to_deep_object(&self) -> Vec<(SmolStr, SmolStr)> {\n        self.0\n            .iter()\n            .map(|(k, v)| (format_smolstr!(\"options[{}]\", k), v.clone()))\n            .collect()\n    }\n}\n\npub(crate) fn neon_option(bytes: &str) -> Option<(&str, &str)> {\n    static RE: OnceCell<Regex> = OnceCell::new();\n    let re = RE.get_or_init(|| Regex::new(r\"^neon_(\\w+):(.+)\").expect(\"regex should be correct\"));\n\n    let cap = re.captures(bytes)?;\n    let (_, [k, v]) = cap.extract();\n    Some((k, v))\n}\n"
  },
  {
    "path": "proxy/src/proxy/retry.rs",
    "content": "use std::error::Error;\nuse std::io;\n\nuse tokio::time;\n\nuse crate::compute::{self, PostgresError};\nuse crate::config::RetryConfig;\n\npub(crate) trait CouldRetry {\n    /// Returns true if the error could be retried\n    fn could_retry(&self) -> bool;\n}\n\npub(crate) trait ShouldRetryWakeCompute {\n    /// Returns true if we need to invalidate the cache for this node.\n    /// If false, we can continue retrying with the current node cache.\n    fn should_retry_wake_compute(&self) -> bool;\n}\n\npub(crate) fn should_retry(err: &impl CouldRetry, num_retries: u32, config: RetryConfig) -> bool {\n    num_retries < config.max_retries && err.could_retry()\n}\n\nimpl CouldRetry for io::Error {\n    fn could_retry(&self) -> bool {\n        use std::io::ErrorKind;\n        matches!(\n            self.kind(),\n            ErrorKind::ConnectionRefused | ErrorKind::AddrNotAvailable | ErrorKind::TimedOut\n        )\n    }\n}\n\nimpl ShouldRetryWakeCompute for postgres_client::error::DbError {\n    fn should_retry_wake_compute(&self) -> bool {\n        use postgres_client::error::SqlState;\n        // Here are errors that happens after the user successfully authenticated to the database.\n        // TODO: there are pgbouncer errors that should be retried, but they are not listed here.\n        let non_retriable_pg_errors = matches!(\n            self.code(),\n            &SqlState::TOO_MANY_CONNECTIONS\n                | &SqlState::OUT_OF_MEMORY\n                | &SqlState::SYNTAX_ERROR\n                | &SqlState::T_R_SERIALIZATION_FAILURE\n                | &SqlState::INVALID_CATALOG_NAME\n                | &SqlState::INVALID_SCHEMA_NAME\n                | &SqlState::INVALID_PARAMETER_VALUE,\n        );\n        if non_retriable_pg_errors {\n            return false;\n        }\n        // PGBouncer errors that should not trigger a wake_compute retry.\n        if self.code() == &SqlState::PROTOCOL_VIOLATION {\n            // Source for the error message:\n            // https://github.com/pgbouncer/pgbouncer/blob/f15997fe3effe3a94ba8bcc1ea562e6117d1a131/src/client.c#L1070\n            return !self\n                .message()\n                .contains(\"no more connections allowed (max_client_conn)\");\n        }\n        true\n    }\n}\n\nimpl ShouldRetryWakeCompute for postgres_client::Error {\n    fn should_retry_wake_compute(&self) -> bool {\n        if let Some(db_err) = self.source().and_then(|x| x.downcast_ref()) {\n            postgres_client::error::DbError::should_retry_wake_compute(db_err)\n        } else {\n            // likely an IO error. Possible the compute has shutdown and the\n            // cache is stale.\n            true\n        }\n    }\n}\n\nimpl CouldRetry for compute::ConnectionError {\n    fn could_retry(&self) -> bool {\n        match self {\n            compute::ConnectionError::TlsError(err) => err.could_retry(),\n            compute::ConnectionError::WakeComputeError(err) => err.could_retry(),\n            compute::ConnectionError::TooManyConnectionAttempts(_) => false,\n            #[cfg(test)]\n            compute::ConnectionError::TestError { retryable, .. } => *retryable,\n        }\n    }\n}\nimpl ShouldRetryWakeCompute for compute::ConnectionError {\n    fn should_retry_wake_compute(&self) -> bool {\n        match self {\n            // the cache entry was not checked for validity\n            compute::ConnectionError::TooManyConnectionAttempts(_) => false,\n            #[cfg(test)]\n            compute::ConnectionError::TestError { wakeable, .. } => *wakeable,\n            _ => true,\n        }\n    }\n}\n\nimpl ShouldRetryWakeCompute for PostgresError {\n    fn should_retry_wake_compute(&self) -> bool {\n        match self {\n            PostgresError::Postgres(error) => error.should_retry_wake_compute(),\n        }\n    }\n}\n\npub(crate) fn retry_after(num_retries: u32, config: RetryConfig) -> time::Duration {\n    config\n        .base_delay\n        .mul_f64(config.backoff_factor.powi((num_retries as i32) - 1))\n}\n\n#[cfg(test)]\nmod tests {\n    use postgres_client::error::{DbError, SqlState};\n\n    use super::ShouldRetryWakeCompute;\n\n    #[test]\n    fn should_retry_wake_compute_for_db_error() {\n        // These SQLStates should NOT trigger a wake_compute retry.\n        let non_retry_states = [\n            SqlState::TOO_MANY_CONNECTIONS,\n            SqlState::OUT_OF_MEMORY,\n            SqlState::SYNTAX_ERROR,\n            SqlState::T_R_SERIALIZATION_FAILURE,\n            SqlState::INVALID_CATALOG_NAME,\n            SqlState::INVALID_SCHEMA_NAME,\n            SqlState::INVALID_PARAMETER_VALUE,\n        ];\n        for state in non_retry_states {\n            let err = DbError::new_test_error(state.clone(), \"oops\".to_string());\n            assert!(\n                !err.should_retry_wake_compute(),\n                \"State {state:?} unexpectedly retried\"\n            );\n        }\n\n        // Errors coming from pgbouncer should not trigger a wake_compute retry\n        let non_retry_pgbouncer_errors = [\"no more connections allowed (max_client_conn)\"];\n        for error in non_retry_pgbouncer_errors {\n            let err = DbError::new_test_error(SqlState::PROTOCOL_VIOLATION, error.to_string());\n            assert!(\n                !err.should_retry_wake_compute(),\n                \"PGBouncer error {error:?} unexpectedly retried\"\n            );\n        }\n\n        // These SQLStates should trigger a wake_compute retry.\n        let retry_states = [\n            SqlState::CONNECTION_FAILURE,\n            SqlState::CONNECTION_EXCEPTION,\n            SqlState::CONNECTION_DOES_NOT_EXIST,\n            SqlState::SQLCLIENT_UNABLE_TO_ESTABLISH_SQLCONNECTION,\n        ];\n        for state in retry_states {\n            let err = DbError::new_test_error(state.clone(), \"oops\".to_string());\n            assert!(\n                err.should_retry_wake_compute(),\n                \"State {state:?} unexpectedly skipped retry\"\n            );\n        }\n    }\n}\n"
  },
  {
    "path": "proxy/src/proxy/tests/mitm.rs",
    "content": "//! Man-in-the-middle tests\n//!\n//! Channel binding should prevent a proxy server\n//! *that has access to create valid certificates*\n//! from controlling the TLS connection.\n\nuse std::fmt::Debug;\n\nuse bytes::{Bytes, BytesMut};\nuse futures::{SinkExt, StreamExt};\nuse postgres_client::tls::TlsConnect;\nuse postgres_protocol::message::frontend;\nuse tokio::io::{AsyncReadExt, AsyncWriteExt, DuplexStream};\nuse tokio_util::codec::{Decoder, Encoder};\n\nuse super::*;\nuse crate::config::TlsConfig;\nuse crate::context::RequestContext;\nuse crate::pglb::handshake::{HandshakeData, handshake};\n\nenum Intercept {\n    None,\n    Methods,\n    SASLResponse,\n}\n\nasync fn proxy_mitm(\n    intercept: Intercept,\n) -> (DuplexStream, DuplexStream, ClientConfig<'static>, TlsConfig) {\n    let (end_server1, client1) = tokio::io::duplex(1024);\n    let (server2, end_client2) = tokio::io::duplex(1024);\n\n    let (client_config1, server_config1) =\n        generate_tls_config(\"generic-project-name.localhost\", \"localhost\").unwrap();\n    let (client_config2, server_config2) =\n        generate_tls_config(\"generic-project-name.localhost\", \"localhost\").unwrap();\n\n    tokio::spawn(async move {\n        // begin handshake with end_server\n        let end_server = connect_tls(server2, client_config2.make_tls_connect().unwrap()).await;\n        let (end_client, startup) = match handshake(\n            &RequestContext::test(),\n            client1,\n            Some(&server_config1),\n            false,\n        )\n        .await\n        .unwrap()\n        {\n            HandshakeData::Startup(stream, params) => (stream, params),\n            HandshakeData::Cancel(_) => panic!(\"cancellation not supported\"),\n        };\n\n        let mut end_server = tokio_util::codec::Framed::new(end_server, PgFrame);\n        let end_client = end_client.flush_and_into_inner().await.unwrap();\n        let mut end_client = tokio_util::codec::Framed::new(end_client, PgFrame);\n\n        // give the end_server the startup parameters\n        let mut buf = BytesMut::new();\n        frontend::startup_message(\n            &postgres_protocol::message::frontend::StartupMessageParams {\n                params: startup.params.as_bytes().into(),\n            },\n            &mut buf,\n        )\n        .unwrap();\n        end_server.send(buf.freeze()).await.unwrap();\n\n        // proxy messages between end_client and end_server\n        loop {\n            tokio::select! {\n                message = end_server.next() => {\n                    match message {\n                        Some(Ok(message)) => {\n                            // intercept SASL and return only SCRAM-SHA-256 ;)\n                            if matches!(intercept, Intercept::Methods) && message.starts_with(b\"R\") && message[5..].starts_with(&[0,0,0,10]) {\n                                end_client.send(Bytes::from_static(b\"R\\0\\0\\0\\x17\\0\\0\\0\\x0aSCRAM-SHA-256\\0\\0\")).await.unwrap();\n                                continue;\n                            }\n                            end_client.send(message).await.unwrap();\n                        }\n                        _ => break,\n                    }\n                }\n                message = end_client.next() => {\n                    match message {\n                        Some(Ok(message)) => {\n                            // intercept SASL response and return SCRAM-SHA-256 with no channel binding ;)\n                            if matches!(intercept, Intercept::SASLResponse) && message.starts_with(b\"p\") && message[5..].starts_with(b\"SCRAM-SHA-256-PLUS\\0\") {\n                                let sasl_message = &message[1+4+19+4..];\n                                let mut new_message = b\"n,,\".to_vec();\n                                new_message.extend_from_slice(sasl_message.strip_prefix(b\"p=tls-server-end-point,,\").unwrap());\n\n                                let mut buf = BytesMut::new();\n                                frontend::sasl_initial_response(\"SCRAM-SHA-256\", &new_message, &mut buf).unwrap();\n\n                                end_server.send(buf.freeze()).await.unwrap();\n                                continue;\n                            }\n                            end_server.send(message).await.unwrap();\n                        }\n                        _ => break,\n                    }\n                }\n                else => { break }\n            }\n        }\n    });\n\n    (end_server1, end_client2, client_config1, server_config2)\n}\n\n/// taken from tokio-postgres\npub(crate) async fn connect_tls<S, T>(mut stream: S, tls: T) -> T::Stream\nwhere\n    S: AsyncRead + AsyncWrite + Unpin,\n    T: TlsConnect<S>,\n    T::Error: Debug,\n{\n    let mut buf = BytesMut::new();\n    frontend::ssl_request(&mut buf);\n    stream.write_all(&buf).await.unwrap();\n\n    let mut buf = [0];\n    stream.read_exact(&mut buf).await.unwrap();\n\n    assert!(buf[0] == b'S', \"ssl not supported by server\");\n\n    tls.connect(stream).await.unwrap()\n}\n\nstruct PgFrame;\nimpl Decoder for PgFrame {\n    type Item = Bytes;\n    type Error = std::io::Error;\n\n    fn decode(&mut self, src: &mut BytesMut) -> Result<Option<Self::Item>, Self::Error> {\n        if src.len() < 5 {\n            src.reserve(5 - src.len());\n            return Ok(None);\n        }\n        let len = u32::from_be_bytes(src[1..5].try_into().unwrap()) as usize + 1;\n        if src.len() < len {\n            src.reserve(len - src.len());\n            return Ok(None);\n        }\n        Ok(Some(src.split_to(len).freeze()))\n    }\n}\nimpl Encoder<Bytes> for PgFrame {\n    type Error = std::io::Error;\n\n    fn encode(&mut self, item: Bytes, dst: &mut BytesMut) -> Result<(), Self::Error> {\n        dst.extend_from_slice(&item);\n        Ok(())\n    }\n}\n\n/// If the client doesn't support channel bindings, it can be exploited.\n#[tokio::test]\nasync fn scram_auth_disable_channel_binding() -> anyhow::Result<()> {\n    let (server, client, client_config, server_config) = proxy_mitm(Intercept::None).await;\n    let proxy = tokio::spawn(dummy_proxy(\n        client,\n        Some(server_config),\n        Scram::new(\"password\").await?,\n    ));\n\n    let _client_err = postgres_client::Config::new(\"test\".to_owned(), 5432)\n        .channel_binding(postgres_client::config::ChannelBinding::Disable)\n        .user(\"user\")\n        .dbname(\"db\")\n        .password(\"password\")\n        .ssl_mode(SslMode::Require)\n        .tls_and_authenticate(server, client_config.make_tls_connect()?)\n        .await?;\n\n    proxy.await?\n}\n\n/// If the client chooses SCRAM-PLUS, it will fail\n#[tokio::test]\nasync fn scram_auth_prefer_channel_binding() -> anyhow::Result<()> {\n    connect_failure(\n        Intercept::None,\n        postgres_client::config::ChannelBinding::Prefer,\n    )\n    .await\n}\n\n/// If the MITM pretends like SCRAM-PLUS isn't available, but the client supports it, it will fail\n#[tokio::test]\nasync fn scram_auth_prefer_channel_binding_intercept() -> anyhow::Result<()> {\n    connect_failure(\n        Intercept::Methods,\n        postgres_client::config::ChannelBinding::Prefer,\n    )\n    .await\n}\n\n/// If the MITM pretends like the client doesn't support channel bindings, it will fail\n#[tokio::test]\nasync fn scram_auth_prefer_channel_binding_intercept_response() -> anyhow::Result<()> {\n    connect_failure(\n        Intercept::SASLResponse,\n        postgres_client::config::ChannelBinding::Prefer,\n    )\n    .await\n}\n\n/// If the client chooses SCRAM-PLUS, it will fail\n#[tokio::test]\nasync fn scram_auth_require_channel_binding() -> anyhow::Result<()> {\n    connect_failure(\n        Intercept::None,\n        postgres_client::config::ChannelBinding::Require,\n    )\n    .await\n}\n\n/// If the client requires SCRAM-PLUS, and it is spoofed to remove SCRAM-PLUS, it will fail\n#[tokio::test]\nasync fn scram_auth_require_channel_binding_intercept() -> anyhow::Result<()> {\n    connect_failure(\n        Intercept::Methods,\n        postgres_client::config::ChannelBinding::Require,\n    )\n    .await\n}\n\n/// If the client requires SCRAM-PLUS, and it is spoofed to remove SCRAM-PLUS, it will fail\n#[tokio::test]\nasync fn scram_auth_require_channel_binding_intercept_response() -> anyhow::Result<()> {\n    connect_failure(\n        Intercept::SASLResponse,\n        postgres_client::config::ChannelBinding::Require,\n    )\n    .await\n}\n\nasync fn connect_failure(\n    intercept: Intercept,\n    channel_binding: postgres_client::config::ChannelBinding,\n) -> anyhow::Result<()> {\n    let (server, client, client_config, server_config) = proxy_mitm(intercept).await;\n    let proxy = tokio::spawn(dummy_proxy(\n        client,\n        Some(server_config),\n        Scram::new(\"password\").await?,\n    ));\n\n    let _client_err = postgres_client::Config::new(\"test\".to_owned(), 5432)\n        .channel_binding(channel_binding)\n        .user(\"user\")\n        .dbname(\"db\")\n        .password(\"password\")\n        .ssl_mode(SslMode::Require)\n        .tls_and_authenticate(server, client_config.make_tls_connect()?)\n        .await\n        .err()\n        .context(\"client shouldn't be able to connect\")?;\n\n    let _server_err = proxy\n        .await?\n        .err()\n        .context(\"server shouldn't accept client\")?;\n\n    Ok(())\n}\n"
  },
  {
    "path": "proxy/src/proxy/tests/mod.rs",
    "content": "//! A group of high-level tests for connection establishing logic and auth.\n#![allow(clippy::unimplemented)]\n\nmod mitm;\n\nuse std::sync::Arc;\nuse std::time::Duration;\n\nuse anyhow::{Context, bail};\nuse async_trait::async_trait;\nuse http::StatusCode;\nuse postgres_client::config::SslMode;\nuse postgres_client::tls::{MakeTlsConnect, NoTls};\nuse rstest::rstest;\nuse rustls::crypto::ring;\nuse rustls::pki_types;\nuse tokio::io::{AsyncRead, AsyncWrite, DuplexStream};\nuse tokio::time::Instant;\nuse tracing_test::traced_test;\n\nuse super::retry::CouldRetry;\nuse crate::auth::backend::{ComputeUserInfo, MaybeOwned};\nuse crate::cache::node_info::{CachedNodeInfo, NodeInfoCache};\nuse crate::config::{CacheOptions, ComputeConfig, RetryConfig, TlsConfig};\nuse crate::context::RequestContext;\nuse crate::control_plane::client::{ControlPlaneClient, TestControlPlaneClient};\nuse crate::control_plane::messages::{ControlPlaneErrorMessage, Details, MetricsAuxInfo, Status};\nuse crate::control_plane::{self, NodeInfo};\nuse crate::error::ErrorKind;\nuse crate::pglb::ERR_INSECURE_CONNECTION;\nuse crate::pglb::handshake::{HandshakeData, handshake};\nuse crate::pqproto::BeMessage;\nuse crate::proxy::NeonOptions;\nuse crate::proxy::connect_compute::{ConnectMechanism, connect_to_compute_inner};\nuse crate::proxy::retry::retry_after;\nuse crate::stream::{PqStream, Stream};\nuse crate::tls::client_config::compute_client_config_with_certs;\nuse crate::tls::server_config::CertResolver;\nuse crate::types::{BranchId, EndpointId, ProjectId};\nuse crate::{auth, compute, sasl, scram};\n\n/// Generate a set of TLS certificates: CA + server.\nfn generate_certs(\n    hostname: &str,\n    common_name: &str,\n) -> anyhow::Result<(\n    pki_types::CertificateDer<'static>,\n    pki_types::CertificateDer<'static>,\n    pki_types::PrivateKeyDer<'static>,\n)> {\n    let ca_key = rcgen::KeyPair::generate()?;\n    let ca = {\n        let mut params = rcgen::CertificateParams::default();\n        params.is_ca = rcgen::IsCa::Ca(rcgen::BasicConstraints::Unconstrained);\n        params.self_signed(&ca_key)?\n    };\n\n    let cert_key = rcgen::KeyPair::generate()?;\n    let cert = {\n        let mut params = rcgen::CertificateParams::new(vec![hostname.into()])?;\n        params.distinguished_name = rcgen::DistinguishedName::new();\n        params\n            .distinguished_name\n            .push(rcgen::DnType::CommonName, common_name);\n        params.signed_by(&cert_key, &ca, &ca_key)?\n    };\n\n    Ok((\n        ca.der().clone(),\n        cert.der().clone(),\n        pki_types::PrivateKeyDer::Pkcs8(cert_key.serialize_der().into()),\n    ))\n}\n\nstruct ClientConfig<'a> {\n    config: Arc<rustls::ClientConfig>,\n    hostname: &'a str,\n}\n\ntype TlsConnect<S> = <ComputeConfig as MakeTlsConnect<S>>::TlsConnect;\n\nimpl ClientConfig<'_> {\n    fn make_tls_connect(self) -> anyhow::Result<TlsConnect<DuplexStream>> {\n        Ok(crate::tls::postgres_rustls::make_tls_connect(\n            &self.config,\n            self.hostname,\n        )?)\n    }\n}\n\n/// Generate TLS certificates and build rustls configs for client and server.\nfn generate_tls_config<'a>(\n    hostname: &'a str,\n    common_name: &'a str,\n) -> anyhow::Result<(ClientConfig<'a>, TlsConfig)> {\n    let (ca, cert, key) = generate_certs(hostname, common_name)?;\n\n    let tls_config = {\n        let config =\n            rustls::ServerConfig::builder_with_provider(Arc::new(ring::default_provider()))\n                .with_safe_default_protocol_versions()\n                .context(\"ring should support the default protocol versions\")?\n                .with_no_client_auth()\n                .with_single_cert(vec![cert.clone()], key.clone_key())?;\n\n        let cert_resolver = CertResolver::new(key, vec![cert])?;\n\n        let common_names = cert_resolver.get_common_names();\n\n        let config = Arc::new(config);\n\n        TlsConfig {\n            http_config: config.clone(),\n            pg_config: config,\n            common_names,\n            cert_resolver: Arc::new(cert_resolver),\n        }\n    };\n\n    let client_config = {\n        let config = Arc::new(compute_client_config_with_certs([ca]));\n\n        ClientConfig { config, hostname }\n    };\n\n    Ok((client_config, tls_config))\n}\n\n#[async_trait]\ntrait TestAuth: Sized {\n    async fn authenticate<S: AsyncRead + AsyncWrite + Unpin + Send>(\n        self,\n        stream: &mut PqStream<Stream<S>>,\n    ) -> anyhow::Result<()> {\n        stream.write_message(BeMessage::AuthenticationOk);\n        Ok(())\n    }\n}\n\nstruct NoAuth;\nimpl TestAuth for NoAuth {}\n\nstruct Scram(scram::ServerSecret);\n\nimpl Scram {\n    async fn new(password: &str) -> anyhow::Result<Self> {\n        let secret = scram::ServerSecret::build(password)\n            .await\n            .context(\"failed to generate scram secret\")?;\n        Ok(Scram(secret))\n    }\n\n    fn mock() -> Self {\n        Scram(scram::ServerSecret::mock(rand::random()))\n    }\n}\n\n#[async_trait]\nimpl TestAuth for Scram {\n    async fn authenticate<S: AsyncRead + AsyncWrite + Unpin + Send>(\n        self,\n        stream: &mut PqStream<Stream<S>>,\n    ) -> anyhow::Result<()> {\n        let outcome = auth::AuthFlow::new(stream, auth::Scram(&self.0, &RequestContext::test()))\n            .authenticate()\n            .await?;\n\n        use sasl::Outcome::*;\n        match outcome {\n            Success(_) => Ok(()),\n            Failure(reason) => bail!(\"autentication failed with an error: {reason}\"),\n        }\n    }\n}\n\n/// A dummy proxy impl which performs a handshake and reports auth success.\nasync fn dummy_proxy(\n    client: impl AsyncRead + AsyncWrite + Unpin + Send,\n    tls: Option<TlsConfig>,\n    auth: impl TestAuth + Send,\n) -> anyhow::Result<()> {\n    let mut stream = match handshake(&RequestContext::test(), client, tls.as_ref(), false).await? {\n        HandshakeData::Startup(stream, _) => stream,\n        HandshakeData::Cancel(_) => bail!(\"cancellation not supported\"),\n    };\n\n    auth.authenticate(&mut stream).await?;\n\n    stream.write_message(BeMessage::ParameterStatus {\n        name: b\"client_encoding\",\n        value: b\"UTF8\",\n    });\n    stream.write_message(BeMessage::ReadyForQuery);\n    stream.flush().await?;\n\n    Ok(())\n}\n\n#[tokio::test]\nasync fn handshake_tls_is_enforced_by_proxy() -> anyhow::Result<()> {\n    let (client, server) = tokio::io::duplex(1024);\n\n    let (_, server_config) = generate_tls_config(\"generic-project-name.localhost\", \"localhost\")?;\n    let proxy = tokio::spawn(dummy_proxy(client, Some(server_config), NoAuth));\n\n    let client_err = postgres_client::Config::new(\"test\".to_owned(), 5432)\n        .user(\"john_doe\")\n        .dbname(\"earth\")\n        .ssl_mode(SslMode::Disable)\n        .tls_and_authenticate(server, NoTls)\n        .await\n        .err() // -> Option<E>\n        .context(\"client shouldn't be able to connect\")?;\n\n    assert!(client_err.to_string().contains(ERR_INSECURE_CONNECTION));\n\n    let server_err = proxy\n        .await?\n        .err() // -> Option<E>\n        .context(\"server shouldn't accept client\")?;\n\n    assert!(client_err.to_string().contains(&server_err.to_string()));\n\n    Ok(())\n}\n\n#[tokio::test]\nasync fn handshake_tls() -> anyhow::Result<()> {\n    let (client, server) = tokio::io::duplex(1024);\n\n    let (client_config, server_config) =\n        generate_tls_config(\"generic-project-name.localhost\", \"localhost\")?;\n    let proxy = tokio::spawn(dummy_proxy(client, Some(server_config), NoAuth));\n\n    let _conn = postgres_client::Config::new(\"test\".to_owned(), 5432)\n        .user(\"john_doe\")\n        .dbname(\"earth\")\n        .ssl_mode(SslMode::Require)\n        .tls_and_authenticate(server, client_config.make_tls_connect()?)\n        .await?;\n\n    proxy.await?\n}\n\n#[tokio::test]\nasync fn handshake_raw() -> anyhow::Result<()> {\n    let (client, server) = tokio::io::duplex(1024);\n\n    let proxy = tokio::spawn(dummy_proxy(client, None, NoAuth));\n\n    let _conn = postgres_client::Config::new(\"test\".to_owned(), 5432)\n        .user(\"john_doe\")\n        .dbname(\"earth\")\n        .set_param(\"options\", \"project=generic-project-name\")\n        .ssl_mode(SslMode::Prefer)\n        .tls_and_authenticate(server, NoTls)\n        .await?;\n\n    proxy.await?\n}\n\n#[tokio::test]\nasync fn keepalive_is_inherited() -> anyhow::Result<()> {\n    use tokio::net::{TcpListener, TcpStream};\n\n    let listener = TcpListener::bind(\"127.0.0.1:0\").await?;\n    let port = listener.local_addr()?.port();\n    socket2::SockRef::from(&listener).set_keepalive(true)?;\n\n    let t = tokio::spawn(async move {\n        let (client, _) = listener.accept().await?;\n        let keepalive = socket2::SockRef::from(&client).keepalive()?;\n        anyhow::Ok(keepalive)\n    });\n\n    TcpStream::connect((\"127.0.0.1\", port)).await?;\n    assert!(t.await??, \"keepalive should be inherited\");\n\n    Ok(())\n}\n\n#[rstest]\n#[case(\"password_foo\")]\n#[case(\"pwd-bar\")]\n#[case(\"\")]\n#[tokio::test]\nasync fn scram_auth_good(#[case] password: &str) -> anyhow::Result<()> {\n    let (client, server) = tokio::io::duplex(1024);\n\n    let (client_config, server_config) =\n        generate_tls_config(\"generic-project-name.localhost\", \"localhost\")?;\n    let proxy = tokio::spawn(dummy_proxy(\n        client,\n        Some(server_config),\n        Scram::new(password).await?,\n    ));\n\n    let _conn = postgres_client::Config::new(\"test\".to_owned(), 5432)\n        .channel_binding(postgres_client::config::ChannelBinding::Require)\n        .user(\"user\")\n        .dbname(\"db\")\n        .password(password)\n        .ssl_mode(SslMode::Require)\n        .tls_and_authenticate(server, client_config.make_tls_connect()?)\n        .await?;\n\n    proxy.await?\n}\n\n#[tokio::test]\nasync fn scram_auth_disable_channel_binding() -> anyhow::Result<()> {\n    let (client, server) = tokio::io::duplex(1024);\n\n    let (client_config, server_config) =\n        generate_tls_config(\"generic-project-name.localhost\", \"localhost\")?;\n    let proxy = tokio::spawn(dummy_proxy(\n        client,\n        Some(server_config),\n        Scram::new(\"password\").await?,\n    ));\n\n    let _conn = postgres_client::Config::new(\"test\".to_owned(), 5432)\n        .channel_binding(postgres_client::config::ChannelBinding::Disable)\n        .user(\"user\")\n        .dbname(\"db\")\n        .password(\"password\")\n        .ssl_mode(SslMode::Require)\n        .tls_and_authenticate(server, client_config.make_tls_connect()?)\n        .await?;\n\n    proxy.await?\n}\n\n#[tokio::test]\nasync fn scram_auth_mock() -> anyhow::Result<()> {\n    let (client, server) = tokio::io::duplex(1024);\n\n    let (client_config, server_config) =\n        generate_tls_config(\"generic-project-name.localhost\", \"localhost\")?;\n    let proxy = tokio::spawn(dummy_proxy(client, Some(server_config), Scram::mock()));\n\n    use rand::Rng;\n    use rand::distr::Alphanumeric;\n    let password: String = rand::rng()\n        .sample_iter(&Alphanumeric)\n        .take(rand::random::<u8>() as usize)\n        .map(char::from)\n        .collect();\n\n    let _client_err = postgres_client::Config::new(\"test\".to_owned(), 5432)\n        .user(\"user\")\n        .dbname(\"db\")\n        .password(&password) // no password will match the mocked secret\n        .ssl_mode(SslMode::Require)\n        .tls_and_authenticate(server, client_config.make_tls_connect()?)\n        .await\n        .err() // -> Option<E>\n        .context(\"client shouldn't be able to connect\")?;\n\n    let _server_err = proxy\n        .await?\n        .err() // -> Option<E>\n        .context(\"server shouldn't accept client\")?;\n\n    Ok(())\n}\n\n#[test]\nfn connect_compute_total_wait() {\n    let mut total_wait = tokio::time::Duration::ZERO;\n    let config = RetryConfig {\n        base_delay: Duration::from_secs(1),\n        max_retries: 5,\n        backoff_factor: 2.0,\n    };\n    for num_retries in 1..config.max_retries {\n        total_wait += retry_after(num_retries, config);\n    }\n    assert!(f64::abs(total_wait.as_secs_f64() - 15.0) < 0.1);\n}\n\n#[derive(Clone, Copy, Debug)]\nenum ConnectAction {\n    Wake,\n    WakeCold,\n    WakeFail,\n    WakeRetry,\n    Connect,\n    // connect_once -> Err, could_retry = true, should_retry_wake_compute = true\n    Retry,\n    // connect_once -> Err, could_retry = true, should_retry_wake_compute = false\n    RetryNoWake,\n    // connect_once -> Err, could_retry = false, should_retry_wake_compute = true\n    Fail,\n    // connect_once -> Err, could_retry = false, should_retry_wake_compute = false\n    FailNoWake,\n}\n\n#[derive(Clone)]\nstruct TestConnectMechanism {\n    counter: Arc<std::sync::Mutex<usize>>,\n    sequence: Vec<ConnectAction>,\n    cache: &'static NodeInfoCache,\n}\n\nimpl TestConnectMechanism {\n    fn verify(&self) {\n        let counter = self.counter.lock().unwrap();\n        assert_eq!(\n            *counter,\n            self.sequence.len(),\n            \"sequence does not proceed to the end\"\n        );\n    }\n}\n\nimpl TestConnectMechanism {\n    fn new(sequence: Vec<ConnectAction>) -> Self {\n        Self {\n            counter: Arc::new(std::sync::Mutex::new(0)),\n            sequence,\n            cache: Box::leak(Box::new(NodeInfoCache::new(CacheOptions {\n                size: Some(1),\n                absolute_ttl: Some(Duration::from_secs(100)),\n                idle_ttl: None,\n            }))),\n        }\n    }\n}\n\n#[derive(Debug)]\nstruct TestConnection;\n\nimpl ConnectMechanism for TestConnectMechanism {\n    type Connection = TestConnection;\n\n    async fn connect_once(\n        &self,\n        _ctx: &RequestContext,\n        _node_info: &CachedNodeInfo,\n        _config: &ComputeConfig,\n    ) -> Result<Self::Connection, compute::ConnectionError> {\n        let mut counter = self.counter.lock().unwrap();\n        let action = self.sequence[*counter];\n        *counter += 1;\n        match action {\n            ConnectAction::Connect => Ok(TestConnection),\n            ConnectAction::Retry => Err(compute::ConnectionError::TestError {\n                retryable: true,\n                wakeable: true,\n                kind: ErrorKind::Compute,\n            }),\n            ConnectAction::RetryNoWake => Err(compute::ConnectionError::TestError {\n                retryable: true,\n                wakeable: false,\n                kind: ErrorKind::Compute,\n            }),\n            ConnectAction::Fail => Err(compute::ConnectionError::TestError {\n                retryable: false,\n                wakeable: true,\n                kind: ErrorKind::Compute,\n            }),\n            ConnectAction::FailNoWake => Err(compute::ConnectionError::TestError {\n                retryable: false,\n                wakeable: false,\n                kind: ErrorKind::Compute,\n            }),\n            x => panic!(\"expecting action {x:?}, connect is called instead\"),\n        }\n    }\n}\n\nimpl TestControlPlaneClient for TestConnectMechanism {\n    fn wake_compute(&self) -> Result<CachedNodeInfo, control_plane::errors::WakeComputeError> {\n        let mut counter = self.counter.lock().unwrap();\n        let action = self.sequence[*counter];\n        *counter += 1;\n        match action {\n            ConnectAction::Wake => Ok(helper_create_cached_node_info(self.cache)),\n            ConnectAction::WakeCold => Ok(CachedNodeInfo::new_uncached(\n                helper_create_uncached_node_info(),\n            )),\n            ConnectAction::WakeFail => {\n                let err = control_plane::errors::ControlPlaneError::Message(Box::new(\n                    ControlPlaneErrorMessage {\n                        http_status_code: StatusCode::BAD_REQUEST,\n                        error: \"TEST\".into(),\n                        status: None,\n                    },\n                ));\n                assert!(!err.could_retry());\n                Err(control_plane::errors::WakeComputeError::ControlPlane(err))\n            }\n            ConnectAction::WakeRetry => {\n                let err = control_plane::errors::ControlPlaneError::Message(Box::new(\n                    ControlPlaneErrorMessage {\n                        http_status_code: StatusCode::BAD_REQUEST,\n                        error: \"TEST\".into(),\n                        status: Some(Status {\n                            code: \"error\".into(),\n                            message: \"error\".into(),\n                            details: Details {\n                                error_info: None,\n                                retry_info: Some(control_plane::messages::RetryInfo {\n                                    retry_at: Instant::now() + Duration::from_millis(1),\n                                }),\n                                user_facing_message: None,\n                            },\n                        }),\n                    },\n                ));\n                assert!(err.could_retry());\n                Err(control_plane::errors::WakeComputeError::ControlPlane(err))\n            }\n            x => panic!(\"expecting action {x:?}, wake_compute is called instead\"),\n        }\n    }\n\n    fn get_access_control(\n        &self,\n    ) -> Result<control_plane::EndpointAccessControl, control_plane::errors::GetAuthInfoError> {\n        unimplemented!(\"not used in tests\")\n    }\n\n    fn dyn_clone(&self) -> Box<dyn TestControlPlaneClient> {\n        Box::new(self.clone())\n    }\n}\n\nfn helper_create_uncached_node_info() -> NodeInfo {\n    NodeInfo {\n        conn_info: compute::ConnectInfo {\n            host: \"test\".into(),\n            port: 5432,\n            ssl_mode: SslMode::Disable,\n            host_addr: None,\n        },\n        aux: MetricsAuxInfo {\n            endpoint_id: (&EndpointId::from(\"endpoint\")).into(),\n            project_id: (&ProjectId::from(\"project\")).into(),\n            branch_id: (&BranchId::from(\"branch\")).into(),\n            compute_id: \"compute\".into(),\n            cold_start_info: crate::control_plane::messages::ColdStartInfo::Warm,\n        },\n    }\n}\n\nfn helper_create_cached_node_info(cache: &'static NodeInfoCache) -> CachedNodeInfo {\n    let node = helper_create_uncached_node_info();\n    cache.insert(\"key\".into(), Ok(node.clone()));\n    CachedNodeInfo {\n        token: Some((cache, \"key\".into())),\n        value: node,\n    }\n}\n\nfn helper_create_connect_info(\n    mechanism: &TestConnectMechanism,\n) -> auth::Backend<'static, ComputeUserInfo> {\n    auth::Backend::ControlPlane(\n        MaybeOwned::Owned(ControlPlaneClient::Test(Box::new(mechanism.clone()))),\n        ComputeUserInfo {\n            endpoint: \"endpoint\".into(),\n            user: \"user\".into(),\n            options: NeonOptions::parse_options_raw(\"\"),\n        },\n    )\n}\n\nfn config() -> ComputeConfig {\n    let retry = RetryConfig {\n        base_delay: Duration::from_secs(1),\n        max_retries: 5,\n        backoff_factor: 2.0,\n    };\n\n    ComputeConfig {\n        retry,\n        tls: Arc::new(compute_client_config_with_certs(std::iter::empty())),\n        timeout: Duration::from_secs(2),\n    }\n}\n\n#[tokio::test]\nasync fn connect_to_compute_success() {\n    let _ = env_logger::try_init();\n    use ConnectAction::*;\n    let ctx = RequestContext::test();\n    let mechanism = TestConnectMechanism::new(vec![Wake, Connect]);\n    let user_info = helper_create_connect_info(&mechanism);\n    let config = config();\n    connect_to_compute_inner(&ctx, &mechanism, &user_info, config.retry, &config)\n        .await\n        .unwrap();\n    mechanism.verify();\n}\n\n#[tokio::test]\nasync fn connect_to_compute_retry() {\n    let _ = env_logger::try_init();\n    use ConnectAction::*;\n    let ctx = RequestContext::test();\n    let mechanism = TestConnectMechanism::new(vec![Wake, Retry, Wake, Connect]);\n    let user_info = helper_create_connect_info(&mechanism);\n    let config = config();\n    connect_to_compute_inner(&ctx, &mechanism, &user_info, config.retry, &config)\n        .await\n        .unwrap();\n    mechanism.verify();\n}\n\n/// Test that we don't retry if the error is not retryable.\n#[tokio::test]\nasync fn connect_to_compute_non_retry_1() {\n    let _ = env_logger::try_init();\n    use ConnectAction::*;\n    let ctx = RequestContext::test();\n    let mechanism = TestConnectMechanism::new(vec![Wake, Retry, Wake, Fail]);\n    let user_info = helper_create_connect_info(&mechanism);\n    let config = config();\n    connect_to_compute_inner(&ctx, &mechanism, &user_info, config.retry, &config)\n        .await\n        .unwrap_err();\n    mechanism.verify();\n}\n\n/// Even for non-retryable errors, we should retry at least once.\n#[tokio::test]\nasync fn connect_to_compute_non_retry_2() {\n    let _ = env_logger::try_init();\n    use ConnectAction::*;\n    let ctx = RequestContext::test();\n    let mechanism = TestConnectMechanism::new(vec![Wake, Fail, Wake, Connect]);\n    let user_info = helper_create_connect_info(&mechanism);\n    let config = config();\n    connect_to_compute_inner(&ctx, &mechanism, &user_info, config.retry, &config)\n        .await\n        .unwrap();\n    mechanism.verify();\n}\n\n/// Retry for at most `NUM_RETRIES_CONNECT` times.\n#[tokio::test]\nasync fn connect_to_compute_non_retry_3() {\n    let _ = env_logger::try_init();\n    tokio::time::pause();\n    use ConnectAction::*;\n    let ctx = RequestContext::test();\n    let mechanism =\n        TestConnectMechanism::new(vec![Wake, Retry, Wake, Retry, Retry, Retry, Retry, Retry]);\n    let user_info = helper_create_connect_info(&mechanism);\n    let wake_compute_retry_config = RetryConfig {\n        base_delay: Duration::from_secs(1),\n        max_retries: 1,\n        backoff_factor: 2.0,\n    };\n    let config = config();\n    connect_to_compute_inner(\n        &ctx,\n        &mechanism,\n        &user_info,\n        wake_compute_retry_config,\n        &config,\n    )\n    .await\n    .unwrap_err();\n    mechanism.verify();\n}\n\n/// Should retry wake compute.\n#[tokio::test]\nasync fn wake_retry() {\n    let _ = env_logger::try_init();\n    use ConnectAction::*;\n    let ctx = RequestContext::test();\n    let mechanism = TestConnectMechanism::new(vec![WakeRetry, Wake, Connect]);\n    let user_info = helper_create_connect_info(&mechanism);\n    let config = config();\n    connect_to_compute_inner(&ctx, &mechanism, &user_info, config.retry, &config)\n        .await\n        .unwrap();\n    mechanism.verify();\n}\n\n/// Wake failed with a non-retryable error.\n#[tokio::test]\nasync fn wake_non_retry() {\n    let _ = env_logger::try_init();\n    use ConnectAction::*;\n    let ctx = RequestContext::test();\n    let mechanism = TestConnectMechanism::new(vec![WakeRetry, WakeFail]);\n    let user_info = helper_create_connect_info(&mechanism);\n    let config = config();\n    connect_to_compute_inner(&ctx, &mechanism, &user_info, config.retry, &config)\n        .await\n        .unwrap_err();\n    mechanism.verify();\n}\n\n#[tokio::test]\n#[traced_test]\nasync fn fail_but_wake_invalidates_cache() {\n    let ctx = RequestContext::test();\n    let mech = TestConnectMechanism::new(vec![\n        ConnectAction::Wake,\n        ConnectAction::Fail,\n        ConnectAction::Wake,\n        ConnectAction::Connect,\n    ]);\n    let user = helper_create_connect_info(&mech);\n    let cfg = config();\n\n    connect_to_compute_inner(&ctx, &mech, &user, cfg.retry, &cfg)\n        .await\n        .unwrap();\n\n    assert!(logs_contain(\n        \"invalidating stalled compute node info cache entry\"\n    ));\n}\n\n#[tokio::test]\n#[traced_test]\nasync fn fail_no_wake_skips_cache_invalidation() {\n    let ctx = RequestContext::test();\n    let mech = TestConnectMechanism::new(vec![\n        ConnectAction::Wake,\n        ConnectAction::RetryNoWake,\n        ConnectAction::Connect,\n    ]);\n    let user = helper_create_connect_info(&mech);\n    let cfg = config();\n\n    connect_to_compute_inner(&ctx, &mech, &user, cfg.retry, &cfg)\n        .await\n        .unwrap();\n\n    assert!(!logs_contain(\n        \"invalidating stalled compute node info cache entry\"\n    ));\n}\n\n#[tokio::test]\n#[traced_test]\nasync fn retry_but_wake_invalidates_cache() {\n    let _ = env_logger::try_init();\n    use ConnectAction::*;\n\n    let ctx = RequestContext::test();\n    // Wake → Retry (retryable + wakeable) → Wake → Connect\n    let mechanism = TestConnectMechanism::new(vec![Wake, Retry, Wake, Connect]);\n    let user_info = helper_create_connect_info(&mechanism);\n    let cfg = config();\n\n    connect_to_compute_inner(&ctx, &mechanism, &user_info, cfg.retry, &cfg)\n        .await\n        .unwrap();\n    mechanism.verify();\n\n    // Because Retry has wakeable=true, we should see invalidate_cache\n    assert!(logs_contain(\n        \"invalidating stalled compute node info cache entry\"\n    ));\n}\n\n#[tokio::test]\n#[traced_test]\nasync fn retry_no_wake_skips_invalidation() {\n    let _ = env_logger::try_init();\n    use ConnectAction::*;\n\n    let ctx = RequestContext::test();\n    // Wake → RetryNoWake (retryable + NOT wakeable)\n    let mechanism = TestConnectMechanism::new(vec![Wake, RetryNoWake, Fail]);\n    let user_info = helper_create_connect_info(&mechanism);\n    let cfg = config();\n\n    connect_to_compute_inner(&ctx, &mechanism, &user_info, cfg.retry, &cfg)\n        .await\n        .unwrap_err();\n    mechanism.verify();\n\n    // Because RetryNoWake has wakeable=false, we must NOT see invalidate_cache\n    assert!(!logs_contain(\n        \"invalidating stalled compute node info cache entry\"\n    ));\n}\n\n#[tokio::test]\n#[traced_test]\nasync fn retry_no_wake_error_fast() {\n    let _ = env_logger::try_init();\n    use ConnectAction::*;\n\n    let ctx = RequestContext::test();\n    // Wake → FailNoWake (not retryable + NOT wakeable)\n    let mechanism = TestConnectMechanism::new(vec![Wake, FailNoWake]);\n    let user_info = helper_create_connect_info(&mechanism);\n    let cfg = config();\n\n    connect_to_compute_inner(&ctx, &mechanism, &user_info, cfg.retry, &cfg)\n        .await\n        .unwrap_err();\n    mechanism.verify();\n\n    // Because FailNoWake has wakeable=false, we must NOT see invalidate_cache\n    assert!(!logs_contain(\n        \"invalidating stalled compute node info cache entry\"\n    ));\n}\n\n#[tokio::test]\n#[traced_test]\nasync fn retry_cold_wake_skips_invalidation() {\n    let _ = env_logger::try_init();\n    use ConnectAction::*;\n\n    let ctx = RequestContext::test();\n    // WakeCold → FailNoWake (not retryable + NOT wakeable)\n    let mechanism = TestConnectMechanism::new(vec![WakeCold, Retry, Connect]);\n    let user_info = helper_create_connect_info(&mechanism);\n    let cfg = config();\n\n    connect_to_compute_inner(&ctx, &mechanism, &user_info, cfg.retry, &cfg)\n        .await\n        .unwrap();\n    mechanism.verify();\n}\n"
  },
  {
    "path": "proxy/src/proxy/wake_compute.rs",
    "content": "use async_trait::async_trait;\nuse tracing::{error, info};\n\nuse crate::cache::node_info::CachedNodeInfo;\nuse crate::config::RetryConfig;\nuse crate::context::RequestContext;\nuse crate::control_plane::errors::{ControlPlaneError, WakeComputeError};\nuse crate::error::ReportableError;\nuse crate::metrics::{\n    ConnectOutcome, ConnectionFailuresBreakdownGroup, Metrics, RetriesMetricGroup, RetryType,\n};\nuse crate::proxy::retry::{retry_after, should_retry};\n\n// Use macro to retain original callsite.\nmacro_rules! log_wake_compute_error {\n    (error = ?$error:expr, $num_retries:expr, retriable = $retriable:literal) => {\n        match $error {\n            WakeComputeError::ControlPlane(ControlPlaneError::Message(_)) => {\n                info!(error = ?$error, num_retries = $num_retries, retriable = $retriable, \"couldn't wake compute node\")\n            }\n            _ => error!(error = ?$error, num_retries = $num_retries, retriable = $retriable, \"couldn't wake compute node\"),\n        }\n    };\n}\n\n#[async_trait]\npub(crate) trait WakeComputeBackend {\n    async fn wake_compute(&self, ctx: &RequestContext) -> Result<CachedNodeInfo, WakeComputeError>;\n}\n\npub(crate) async fn wake_compute<B: WakeComputeBackend>(\n    num_retries: &mut u32,\n    ctx: &RequestContext,\n    api: &B,\n    config: RetryConfig,\n) -> Result<CachedNodeInfo, WakeComputeError> {\n    loop {\n        match api.wake_compute(ctx).await {\n            Err(e) if !should_retry(&e, *num_retries, config) => {\n                log_wake_compute_error!(error = ?e, num_retries, retriable = false);\n                report_error(&e, false);\n                Metrics::get().proxy.retries_metric.observe(\n                    RetriesMetricGroup {\n                        outcome: ConnectOutcome::Failed,\n                        retry_type: RetryType::WakeCompute,\n                    },\n                    (*num_retries).into(),\n                );\n                return Err(e);\n            }\n            Err(e) => {\n                log_wake_compute_error!(error = ?e, num_retries, retriable = true);\n                report_error(&e, true);\n            }\n            Ok(n) => {\n                Metrics::get().proxy.retries_metric.observe(\n                    RetriesMetricGroup {\n                        outcome: ConnectOutcome::Success,\n                        retry_type: RetryType::WakeCompute,\n                    },\n                    (*num_retries).into(),\n                );\n                // TODO: is this necessary? We have a metric.\n                // TODO: this log line is misleading as \"wake_compute\" might return cached (and stale) info.\n                info!(?num_retries, \"compute node woken up after\");\n                return Ok(n);\n            }\n        }\n\n        let wait_duration = retry_after(*num_retries, config);\n        *num_retries += 1;\n        let pause = ctx.latency_timer_pause(crate::metrics::Waiting::RetryTimeout);\n        tokio::time::sleep(wait_duration).await;\n        drop(pause);\n    }\n}\n\nfn report_error(e: &WakeComputeError, retry: bool) {\n    let kind = e.get_error_kind();\n\n    Metrics::get()\n        .proxy\n        .connection_failures_breakdown\n        .inc(ConnectionFailuresBreakdownGroup {\n            kind,\n            retry: retry.into(),\n        });\n}\n"
  },
  {
    "path": "proxy/src/rate_limiter/leaky_bucket.rs",
    "content": "use std::hash::Hash;\nuse std::sync::atomic::{AtomicUsize, Ordering};\n\nuse ahash::RandomState;\nuse clashmap::ClashMap;\nuse rand::Rng;\nuse tokio::time::Instant;\nuse tracing::info;\nuse utils::leaky_bucket::LeakyBucketState;\n\nuse crate::intern::EndpointIdInt;\n\n// Simple per-endpoint rate limiter.\npub type EndpointRateLimiter = LeakyBucketRateLimiter<EndpointIdInt>;\n\npub struct LeakyBucketRateLimiter<Key> {\n    map: ClashMap<Key, LeakyBucketState, RandomState>,\n    default_config: utils::leaky_bucket::LeakyBucketConfig,\n    access_count: AtomicUsize,\n}\n\nimpl<K: Hash + Eq> LeakyBucketRateLimiter<K> {\n    pub const DEFAULT: LeakyBucketConfig = LeakyBucketConfig {\n        rps: 600.0,\n        max: 1500.0,\n    };\n\n    pub fn new_with_shards(config: LeakyBucketConfig, shards: usize) -> Self {\n        Self {\n            map: ClashMap::with_hasher_and_shard_amount(RandomState::new(), shards),\n            default_config: config.into(),\n            access_count: AtomicUsize::new(0),\n        }\n    }\n\n    /// Check that number of connections to the endpoint is below `max_rps` rps.\n    pub(crate) fn check(&self, key: K, config: Option<LeakyBucketConfig>, n: u32) -> bool {\n        let now = Instant::now();\n\n        let config = config.map_or(self.default_config, Into::into);\n\n        if self\n            .access_count\n            .fetch_add(1, Ordering::AcqRel)\n            .is_multiple_of(2048)\n        {\n            self.do_gc(now);\n        }\n\n        let mut entry = self\n            .map\n            .entry(key)\n            .or_insert_with(|| LeakyBucketState { empty_at: now });\n\n        entry.add_tokens(&config, now, n as f64).is_ok()\n    }\n\n    fn do_gc(&self, now: Instant) {\n        info!(\n            \"cleaning up bucket rate limiter, current size = {}\",\n            self.map.len()\n        );\n        let n = self.map.shards().len();\n        let shard = rand::rng().random_range(0..n);\n        self.map.shards()[shard]\n            .write()\n            .retain(|(_, value)| !value.bucket_is_empty(now));\n    }\n}\n\npub struct LeakyBucketConfig {\n    pub rps: f64,\n    pub max: f64,\n}\n\nimpl LeakyBucketConfig {\n    pub fn new(rps: f64, max: f64) -> Self {\n        assert!(rps > 0.0, \"rps must be positive\");\n        assert!(max > 0.0, \"max must be positive\");\n        Self { rps, max }\n    }\n}\n\nimpl From<LeakyBucketConfig> for utils::leaky_bucket::LeakyBucketConfig {\n    fn from(config: LeakyBucketConfig) -> Self {\n        utils::leaky_bucket::LeakyBucketConfig::new(config.rps, config.max)\n    }\n}\n\n#[cfg(test)]\n#[allow(clippy::float_cmp)]\nmod tests {\n    use std::time::Duration;\n\n    use tokio::time::Instant;\n    use utils::leaky_bucket::LeakyBucketState;\n\n    use super::LeakyBucketConfig;\n\n    #[tokio::test(start_paused = true)]\n    async fn check() {\n        let config: utils::leaky_bucket::LeakyBucketConfig =\n            LeakyBucketConfig::new(500.0, 2000.0).into();\n        assert_eq!(config.cost, Duration::from_millis(2));\n        assert_eq!(config.bucket_width, Duration::from_secs(4));\n\n        let mut bucket = LeakyBucketState {\n            empty_at: Instant::now(),\n        };\n\n        // should work for 2000 requests this second\n        for _ in 0..2000 {\n            bucket.add_tokens(&config, Instant::now(), 1.0).unwrap();\n        }\n        bucket.add_tokens(&config, Instant::now(), 1.0).unwrap_err();\n        assert_eq!(bucket.empty_at - Instant::now(), config.bucket_width);\n\n        // in 1ms we should drain 0.5 tokens.\n        // make sure we don't lose any tokens\n        tokio::time::advance(Duration::from_millis(1)).await;\n        bucket.add_tokens(&config, Instant::now(), 1.0).unwrap_err();\n        tokio::time::advance(Duration::from_millis(1)).await;\n        bucket.add_tokens(&config, Instant::now(), 1.0).unwrap();\n\n        // in 10ms we should drain 5 tokens\n        tokio::time::advance(Duration::from_millis(10)).await;\n        for _ in 0..5 {\n            bucket.add_tokens(&config, Instant::now(), 1.0).unwrap();\n        }\n        bucket.add_tokens(&config, Instant::now(), 1.0).unwrap_err();\n\n        // in 10s we should drain 5000 tokens\n        // but cap is only 2000\n        tokio::time::advance(Duration::from_secs(10)).await;\n        for _ in 0..2000 {\n            bucket.add_tokens(&config, Instant::now(), 1.0).unwrap();\n        }\n        bucket.add_tokens(&config, Instant::now(), 1.0).unwrap_err();\n\n        // should sustain 500rps\n        for _ in 0..2000 {\n            tokio::time::advance(Duration::from_millis(10)).await;\n            for _ in 0..5 {\n                bucket.add_tokens(&config, Instant::now(), 1.0).unwrap();\n            }\n        }\n    }\n}\n"
  },
  {
    "path": "proxy/src/rate_limiter/limit_algorithm/aimd.rs",
    "content": "use super::{LimitAlgorithm, Outcome, Sample};\n\n/// Loss-based congestion avoidance.\n///\n/// Additive-increase, multiplicative decrease.\n///\n/// Adds available currency when:\n/// 1. no load-based errors are observed, and\n/// 2. the utilisation of the current limit is high.\n///\n/// Reduces available concurrency by a factor when load-based errors are detected.\n#[derive(Clone, Copy, Debug, serde::Deserialize, PartialEq)]\npub(crate) struct Aimd {\n    /// Minimum limit for AIMD algorithm.\n    pub(crate) min: usize,\n    /// Maximum limit for AIMD algorithm.\n    pub(crate) max: usize,\n    /// Decrease AIMD decrease by value in case of error.\n    pub(crate) dec: f32,\n    /// Increase AIMD increase by value in case of success.\n    pub(crate) inc: usize,\n    /// A threshold below which the limit won't be increased.\n    pub(crate) utilisation: f32,\n}\n\nimpl LimitAlgorithm for Aimd {\n    fn update(&self, old_limit: usize, sample: Sample) -> usize {\n        match sample.outcome {\n            Outcome::Success => {\n                let utilisation = sample.in_flight as f32 / old_limit as f32;\n\n                if utilisation > self.utilisation {\n                    let limit = old_limit + self.inc;\n                    let new_limit = limit.clamp(self.min, self.max);\n                    if new_limit > old_limit {\n                        tracing::info!(old_limit, new_limit, \"limit increased\");\n                    } else {\n                        tracing::debug!(old_limit, new_limit, \"limit clamped at max\");\n                    }\n\n                    new_limit\n                } else {\n                    old_limit\n                }\n            }\n            Outcome::Overload => {\n                let new_limit = old_limit as f32 * self.dec;\n\n                // Floor instead of round, so the limit reduces even with small numbers.\n                // E.g. round(2 * 0.9) = 2, but floor(2 * 0.9) = 1\n                let new_limit = new_limit.floor() as usize;\n\n                let new_limit = new_limit.clamp(self.min, self.max);\n                if new_limit < old_limit {\n                    tracing::info!(old_limit, new_limit, \"limit decreased\");\n                } else {\n                    tracing::debug!(old_limit, new_limit, \"limit clamped at min\");\n                }\n                new_limit\n            }\n        }\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use std::time::Duration;\n\n    use super::*;\n    use crate::rate_limiter::limit_algorithm::{\n        DynamicLimiter, RateLimitAlgorithm, RateLimiterConfig,\n    };\n\n    #[tokio::test(start_paused = true)]\n    async fn increase_decrease() {\n        let config = RateLimiterConfig {\n            initial_limit: 1,\n            algorithm: RateLimitAlgorithm::Aimd {\n                conf: Aimd {\n                    min: 1,\n                    max: 2,\n                    inc: 10,\n                    dec: 0.5,\n                    utilisation: 0.8,\n                },\n            },\n        };\n\n        let limiter = DynamicLimiter::new(config);\n\n        let token = limiter\n            .acquire_timeout(Duration::from_millis(1))\n            .await\n            .unwrap();\n        token.release(Outcome::Success);\n\n        assert_eq!(limiter.state().limit(), 2);\n\n        let token = limiter\n            .acquire_timeout(Duration::from_millis(1))\n            .await\n            .unwrap();\n        token.release(Outcome::Success);\n        assert_eq!(limiter.state().limit(), 2);\n\n        let token = limiter\n            .acquire_timeout(Duration::from_millis(1))\n            .await\n            .unwrap();\n        token.release(Outcome::Overload);\n        assert_eq!(limiter.state().limit(), 1);\n\n        let token = limiter\n            .acquire_timeout(Duration::from_millis(1))\n            .await\n            .unwrap();\n        token.release(Outcome::Overload);\n        assert_eq!(limiter.state().limit(), 1);\n    }\n\n    #[tokio::test(start_paused = true)]\n    async fn should_decrease_limit_on_overload() {\n        let config = RateLimiterConfig {\n            initial_limit: 10,\n            algorithm: RateLimitAlgorithm::Aimd {\n                conf: Aimd {\n                    min: 1,\n                    max: 1500,\n                    inc: 10,\n                    dec: 0.5,\n                    utilisation: 0.8,\n                },\n            },\n        };\n\n        let limiter = DynamicLimiter::new(config);\n\n        let token = limiter\n            .acquire_timeout(Duration::from_millis(100))\n            .await\n            .unwrap();\n        token.release(Outcome::Overload);\n\n        assert_eq!(limiter.state().limit(), 5, \"overload: decrease\");\n    }\n\n    #[tokio::test(start_paused = true)]\n    async fn acquire_timeout_times_out() {\n        let config = RateLimiterConfig {\n            initial_limit: 1,\n            algorithm: RateLimitAlgorithm::Aimd {\n                conf: Aimd {\n                    min: 1,\n                    max: 2,\n                    inc: 10,\n                    dec: 0.5,\n                    utilisation: 0.8,\n                },\n            },\n        };\n\n        let limiter = DynamicLimiter::new(config);\n\n        let token = limiter\n            .acquire_timeout(Duration::from_millis(1))\n            .await\n            .unwrap();\n        let now = tokio::time::Instant::now();\n        limiter\n            .acquire_timeout(Duration::from_secs(1))\n            .await\n            .err()\n            .unwrap();\n\n        assert!(now.elapsed() >= Duration::from_secs(1));\n\n        token.release(Outcome::Success);\n\n        assert_eq!(limiter.state().limit(), 2);\n    }\n\n    #[tokio::test(start_paused = true)]\n    async fn should_increase_limit_on_success_when_using_gt_util_threshold() {\n        let config = RateLimiterConfig {\n            initial_limit: 4,\n            algorithm: RateLimitAlgorithm::Aimd {\n                conf: Aimd {\n                    min: 1,\n                    max: 1500,\n                    inc: 1,\n                    dec: 0.5,\n                    utilisation: 0.5,\n                },\n            },\n        };\n\n        let limiter = DynamicLimiter::new(config);\n\n        let token = limiter\n            .acquire_timeout(Duration::from_millis(1))\n            .await\n            .unwrap();\n        let _token = limiter\n            .acquire_timeout(Duration::from_millis(1))\n            .await\n            .unwrap();\n        let _token = limiter\n            .acquire_timeout(Duration::from_millis(1))\n            .await\n            .unwrap();\n\n        token.release(Outcome::Success);\n        assert_eq!(limiter.state().limit(), 5, \"success: increase\");\n    }\n\n    #[tokio::test(start_paused = true)]\n    async fn should_not_change_limit_on_success_when_using_lt_util_threshold() {\n        let config = RateLimiterConfig {\n            initial_limit: 4,\n            algorithm: RateLimitAlgorithm::Aimd {\n                conf: Aimd {\n                    min: 1,\n                    max: 1500,\n                    inc: 10,\n                    dec: 0.5,\n                    utilisation: 0.5,\n                },\n            },\n        };\n\n        let limiter = DynamicLimiter::new(config);\n\n        let token = limiter\n            .acquire_timeout(Duration::from_millis(1))\n            .await\n            .unwrap();\n\n        token.release(Outcome::Success);\n        assert_eq!(\n            limiter.state().limit(),\n            4,\n            \"success: ignore when < half limit\"\n        );\n    }\n\n    #[tokio::test(start_paused = true)]\n    async fn should_not_change_limit_when_no_outcome() {\n        let config = RateLimiterConfig {\n            initial_limit: 10,\n            algorithm: RateLimitAlgorithm::Aimd {\n                conf: Aimd {\n                    min: 1,\n                    max: 1500,\n                    inc: 10,\n                    dec: 0.5,\n                    utilisation: 0.5,\n                },\n            },\n        };\n\n        let limiter = DynamicLimiter::new(config);\n\n        let token = limiter\n            .acquire_timeout(Duration::from_millis(1))\n            .await\n            .unwrap();\n        drop(token);\n        assert_eq!(limiter.state().limit(), 10, \"ignore\");\n    }\n}\n"
  },
  {
    "path": "proxy/src/rate_limiter/limit_algorithm.rs",
    "content": "//! Algorithms for controlling concurrency limits.\nuse std::pin::pin;\nuse std::sync::Arc;\nuse std::time::Duration;\n\nuse parking_lot::Mutex;\nuse tokio::sync::Notify;\nuse tokio::time::Instant;\nuse tokio::time::error::Elapsed;\n\nuse self::aimd::Aimd;\n\npub(crate) mod aimd;\n\n/// Whether a job succeeded or failed as a result of congestion/overload.\n///\n/// Errors not considered to be caused by overload should be ignored.\n#[derive(Debug, Clone, Copy, PartialEq, Eq)]\npub(crate) enum Outcome {\n    /// The job succeeded, or failed in a way unrelated to overload.\n    Success,\n    /// The job failed because of overload, e.g. it timed out or an explicit backpressure signal\n    /// was observed.\n    Overload,\n}\n\n/// An algorithm for controlling a concurrency limit.\npub(crate) trait LimitAlgorithm: Send + Sync + 'static {\n    /// Update the concurrency limit in response to a new job completion.\n    fn update(&self, old_limit: usize, sample: Sample) -> usize;\n}\n\n/// The result of a job (or jobs), including the [`Outcome`] (loss) and latency (delay).\n#[derive(Debug, Clone, PartialEq, Eq, Copy)]\npub(crate) struct Sample {\n    pub(crate) latency: Duration,\n    /// Jobs in flight when the sample was taken.\n    pub(crate) in_flight: usize,\n    pub(crate) outcome: Outcome,\n}\n\n#[derive(Clone, Copy, Debug, Default, serde::Deserialize, PartialEq)]\n#[serde(rename_all = \"snake_case\")]\npub(crate) enum RateLimitAlgorithm {\n    #[default]\n    Fixed,\n    Aimd {\n        #[serde(flatten)]\n        conf: Aimd,\n    },\n}\n\npub(crate) struct Fixed;\n\nimpl LimitAlgorithm for Fixed {\n    fn update(&self, old_limit: usize, _sample: Sample) -> usize {\n        old_limit\n    }\n}\n\n#[derive(Clone, Copy, Debug, serde::Deserialize, PartialEq)]\npub struct RateLimiterConfig {\n    #[serde(flatten)]\n    pub(crate) algorithm: RateLimitAlgorithm,\n    pub(crate) initial_limit: usize,\n}\n\nimpl RateLimiterConfig {\n    pub(crate) fn create_rate_limit_algorithm(self) -> Box<dyn LimitAlgorithm> {\n        match self.algorithm {\n            RateLimitAlgorithm::Fixed => Box::new(Fixed),\n            RateLimitAlgorithm::Aimd { conf } => Box::new(conf),\n        }\n    }\n}\n\npub(crate) struct LimiterInner {\n    alg: Box<dyn LimitAlgorithm>,\n    available: usize,\n    limit: usize,\n    in_flight: usize,\n}\n\nimpl LimiterInner {\n    fn update_limit(&mut self, latency: Duration, outcome: Option<Outcome>) {\n        if let Some(outcome) = outcome {\n            let sample = Sample {\n                latency,\n                in_flight: self.in_flight,\n                outcome,\n            };\n            self.limit = self.alg.update(self.limit, sample);\n        }\n    }\n\n    fn take(&mut self, ready: &Notify) -> Option<()> {\n        if self.available >= 1 {\n            self.available -= 1;\n            self.in_flight += 1;\n\n            // tell the next in the queue that there is a permit ready\n            if self.available >= 1 {\n                ready.notify_one();\n            }\n            Some(())\n        } else {\n            None\n        }\n    }\n}\n\n/// Limits the number of concurrent jobs.\n///\n/// Concurrency is limited through the use of [`Token`]s. Acquire a token to run a job, and release the\n/// token once the job is finished.\n///\n/// The limit will be automatically adjusted based on observed latency (delay) and/or failures\n/// caused by overload (loss).\npub(crate) struct DynamicLimiter {\n    config: RateLimiterConfig,\n    inner: Mutex<LimiterInner>,\n    // to notify when a token is available\n    ready: Notify,\n}\n\n/// A concurrency token, required to run a job.\n///\n/// Release the token back to the [`DynamicLimiter`] after the job is complete.\npub(crate) struct Token {\n    start: Instant,\n    limiter: Option<Arc<DynamicLimiter>>,\n}\n\n/// A snapshot of the state of the [`DynamicLimiter`].\n///\n/// Not guaranteed to be consistent under high concurrency.\n#[derive(Debug, Clone, Copy)]\n#[cfg(test)]\nstruct LimiterState {\n    limit: usize,\n}\n\nimpl DynamicLimiter {\n    /// Create a limiter with a given limit control algorithm.\n    pub(crate) fn new(config: RateLimiterConfig) -> Arc<Self> {\n        let ready = Notify::new();\n        ready.notify_one();\n\n        Arc::new(Self {\n            inner: Mutex::new(LimiterInner {\n                alg: config.create_rate_limit_algorithm(),\n                available: config.initial_limit,\n                limit: config.initial_limit,\n                in_flight: 0,\n            }),\n            ready,\n            config,\n        })\n    }\n\n    /// Try to acquire a concurrency [Token], waiting for `duration` if there are none available.\n    pub(crate) async fn acquire_timeout(\n        self: &Arc<Self>,\n        duration: Duration,\n    ) -> Result<Token, Elapsed> {\n        tokio::time::timeout(duration, self.acquire()).await?\n    }\n\n    /// Try to acquire a concurrency [Token].\n    async fn acquire(self: &Arc<Self>) -> Result<Token, Elapsed> {\n        if self.config.initial_limit == 0 {\n            // If the rate limiter is disabled, we can always acquire a token.\n            Ok(Token::disabled())\n        } else {\n            let mut notified = pin!(self.ready.notified());\n            let mut ready = notified.as_mut().enable();\n            loop {\n                if ready {\n                    let mut inner = self.inner.lock();\n                    if inner.take(&self.ready).is_some() {\n                        break Ok(Token::new(self.clone()));\n                    }\n                    notified.set(self.ready.notified());\n                }\n                notified.as_mut().await;\n                ready = true;\n            }\n        }\n    }\n\n    /// Return the concurrency [Token], along with the outcome of the job.\n    ///\n    /// The [Outcome] of the job, and the time taken to perform it, may be used\n    /// to update the concurrency limit.\n    ///\n    /// Set the outcome to `None` to ignore the job.\n    fn release_inner(&self, start: Instant, outcome: Option<Outcome>) {\n        if outcome.is_none() {\n            tracing::warn!(\"outcome is {:?}\", outcome);\n        } else {\n            tracing::debug!(\"outcome is {:?}\", outcome);\n        }\n        if self.config.initial_limit == 0 {\n            return;\n        }\n\n        let mut inner = self.inner.lock();\n\n        inner.update_limit(start.elapsed(), outcome);\n\n        inner.in_flight -= 1;\n        if inner.in_flight < inner.limit {\n            inner.available = inner.limit - inner.in_flight;\n            // At least 1 permit is now available\n            self.ready.notify_one();\n        }\n    }\n\n    /// The current state of the limiter.\n    #[cfg(test)]\n    fn state(&self) -> LimiterState {\n        let inner = self.inner.lock();\n        LimiterState { limit: inner.limit }\n    }\n}\n\nimpl Token {\n    fn new(limiter: Arc<DynamicLimiter>) -> Self {\n        Self {\n            start: Instant::now(),\n            limiter: Some(limiter),\n        }\n    }\n    pub(crate) fn disabled() -> Self {\n        Self {\n            start: Instant::now(),\n            limiter: None,\n        }\n    }\n\n    pub(crate) fn is_disabled(&self) -> bool {\n        self.limiter.is_none()\n    }\n\n    pub(crate) fn release(mut self, outcome: Outcome) {\n        self.release_mut(Some(outcome));\n    }\n\n    pub(crate) fn release_mut(&mut self, outcome: Option<Outcome>) {\n        if let Some(limiter) = self.limiter.take() {\n            limiter.release_inner(self.start, outcome);\n        }\n    }\n}\n\nimpl Drop for Token {\n    fn drop(&mut self) {\n        self.release_mut(None);\n    }\n}\n\n#[cfg(test)]\nimpl LimiterState {\n    /// The current concurrency limit.\n    fn limit(self) -> usize {\n        self.limit\n    }\n}\n"
  },
  {
    "path": "proxy/src/rate_limiter/limiter.rs",
    "content": "use std::borrow::Cow;\nuse std::collections::hash_map::RandomState;\nuse std::hash::{BuildHasher, Hash};\nuse std::sync::Mutex;\nuse std::sync::atomic::{AtomicUsize, Ordering};\n\nuse anyhow::bail;\nuse clashmap::ClashMap;\nuse itertools::Itertools;\nuse rand::rngs::StdRng;\nuse rand::{Rng, SeedableRng};\nuse tokio::time::{Duration, Instant};\nuse tracing::info;\n\nuse super::LeakyBucketConfig;\nuse crate::ext::LockExt;\nuse crate::intern::EndpointIdInt;\n\n// Simple per-endpoint rate limiter.\n//\n// Check that number of connections to the endpoint is below `max_rps` rps.\n// Purposefully ignore user name and database name as clients can reconnect\n// with different names, so we'll end up sending some http requests to\n// the control plane.\npub type WakeComputeRateLimiter = BucketRateLimiter<EndpointIdInt, StdRng, RandomState>;\n\npub struct BucketRateLimiter<Key, Rand = StdRng, Hasher = RandomState> {\n    map: ClashMap<Key, Vec<RateBucket>, Hasher>,\n    info: Cow<'static, [RateBucketInfo]>,\n    access_count: AtomicUsize,\n    rand: Mutex<Rand>,\n}\n\n#[derive(Clone, Copy)]\nstruct RateBucket {\n    start: Instant,\n    count: u32,\n}\n\nimpl RateBucket {\n    fn should_allow_request(&mut self, info: &RateBucketInfo, now: Instant, n: u32) -> bool {\n        if now - self.start < info.interval {\n            self.count + n <= info.max_rpi\n        } else {\n            // bucket expired, reset\n            self.count = 0;\n            self.start = now;\n\n            true\n        }\n    }\n\n    fn inc(&mut self, n: u32) {\n        self.count += n;\n    }\n}\n\n#[derive(Clone, Copy, PartialEq)]\npub struct RateBucketInfo {\n    pub(crate) interval: Duration,\n    // requests per interval\n    pub(crate) max_rpi: u32,\n}\n\nimpl std::fmt::Display for RateBucketInfo {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        let rps = self.rps().floor() as u64;\n        write!(f, \"{rps}@{}\", humantime::format_duration(self.interval))\n    }\n}\n\nimpl std::fmt::Debug for RateBucketInfo {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        write!(f, \"{self}\")\n    }\n}\n\nimpl std::str::FromStr for RateBucketInfo {\n    type Err = anyhow::Error;\n\n    fn from_str(s: &str) -> Result<Self, Self::Err> {\n        let Some((max_rps, interval)) = s.split_once('@') else {\n            bail!(\"invalid rate info\")\n        };\n        let max_rps = max_rps.parse()?;\n        let interval = humantime::parse_duration(interval)?;\n        Ok(Self::new(max_rps, interval))\n    }\n}\n\nimpl RateBucketInfo {\n    pub const DEFAULT_SET: [Self; 3] = [\n        Self::new(300, Duration::from_secs(1)),\n        Self::new(200, Duration::from_secs(60)),\n        Self::new(100, Duration::from_secs(600)),\n    ];\n\n    pub const DEFAULT_ENDPOINT_SET: [Self; 3] = [\n        Self::new(500, Duration::from_secs(1)),\n        Self::new(300, Duration::from_secs(60)),\n        Self::new(200, Duration::from_secs(600)),\n    ];\n\n    pub fn rps(&self) -> f64 {\n        (self.max_rpi as f64) / self.interval.as_secs_f64()\n    }\n\n    pub fn validate(info: &mut [Self]) -> anyhow::Result<()> {\n        info.sort_unstable_by_key(|info| info.interval);\n        let invalid = info\n            .iter()\n            .tuple_windows()\n            .find(|(a, b)| a.max_rpi > b.max_rpi);\n        if let Some((a, b)) = invalid {\n            bail!(\n                \"invalid bucket RPS limits. {b} allows fewer requests per bucket than {a} ({} vs {})\",\n                b.max_rpi,\n                a.max_rpi,\n            );\n        }\n\n        Ok(())\n    }\n\n    pub const fn new(max_rps: u32, interval: Duration) -> Self {\n        Self {\n            interval,\n            max_rpi: ((max_rps as u64) * (interval.as_millis() as u64) / 1000) as u32,\n        }\n    }\n\n    pub fn to_leaky_bucket(this: &[Self]) -> Option<LeakyBucketConfig> {\n        // bit of a hack - find the min rps and max rps supported and turn it into\n        // leaky bucket config instead\n\n        let mut iter = this.iter().map(|info| info.rps());\n        let first = iter.next()?;\n\n        let (min, max) = (first, first);\n        let (min, max) = iter.fold((min, max), |(min, max), rps| {\n            (f64::min(min, rps), f64::max(max, rps))\n        });\n\n        Some(LeakyBucketConfig { rps: min, max })\n    }\n}\n\nimpl<K: Hash + Eq> BucketRateLimiter<K> {\n    pub fn new(info: impl Into<Cow<'static, [RateBucketInfo]>>) -> Self {\n        Self::new_with_rand_and_hasher(info, StdRng::from_os_rng(), RandomState::new())\n    }\n}\n\nimpl<K: Hash + Eq, R: Rng, S: BuildHasher + Clone> BucketRateLimiter<K, R, S> {\n    fn new_with_rand_and_hasher(\n        info: impl Into<Cow<'static, [RateBucketInfo]>>,\n        rand: R,\n        hasher: S,\n    ) -> Self {\n        let info = info.into();\n        info!(buckets = ?info, \"endpoint rate limiter\");\n        Self {\n            info,\n            map: ClashMap::with_hasher_and_shard_amount(hasher, 64),\n            access_count: AtomicUsize::new(1), // start from 1 to avoid GC on the first request\n            rand: Mutex::new(rand),\n        }\n    }\n\n    /// Check that number of connections to the endpoint is below `max_rps` rps.\n    pub(crate) fn check(&self, key: K, n: u32) -> bool {\n        // do a partial GC every 2k requests. This cleans up ~ 1/64th of the map.\n        // worst case memory usage is about:\n        //    = 2 * 2048 * 64 * (48B + 72B)\n        //    = 30MB\n        if self\n            .access_count\n            .fetch_add(1, Ordering::AcqRel)\n            .is_multiple_of(2048)\n        {\n            self.do_gc();\n        }\n\n        let now = Instant::now();\n        let mut entry = self.map.entry(key).or_insert_with(|| {\n            vec![\n                RateBucket {\n                    start: now,\n                    count: 0,\n                };\n                self.info.len()\n            ]\n        });\n\n        let should_allow_request = entry\n            .iter_mut()\n            .zip(&*self.info)\n            .all(|(bucket, info)| bucket.should_allow_request(info, now, n));\n\n        if should_allow_request {\n            // only increment the bucket counts if the request will actually be accepted\n            entry.iter_mut().for_each(|b| b.inc(n));\n        }\n\n        should_allow_request\n    }\n\n    /// Clean the map. Simple strategy: remove all entries in a random shard.\n    /// At worst, we'll double the effective max_rps during the cleanup.\n    /// But that way deletion does not aquire mutex on each entry access.\n    pub(crate) fn do_gc(&self) {\n        info!(\n            \"cleaning up bucket rate limiter, current size = {}\",\n            self.map.len()\n        );\n        let n = self.map.shards().len();\n        // this lock is ok as the periodic cycle of do_gc makes this very unlikely to collide\n        // (impossible, infact, unless we have 2048 threads)\n        let shard = self.rand.lock_propagate_poison().random_range(0..n);\n        self.map.shards()[shard].write().clear();\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use std::hash::BuildHasherDefault;\n    use std::time::Duration;\n\n    use rand::SeedableRng;\n    use rustc_hash::FxHasher;\n    use tokio::time;\n\n    use super::{BucketRateLimiter, WakeComputeRateLimiter};\n    use crate::intern::EndpointIdInt;\n    use crate::rate_limiter::RateBucketInfo;\n    use crate::types::EndpointId;\n\n    #[test]\n    fn rate_bucket_rpi() {\n        let rate_bucket = RateBucketInfo::new(50, Duration::from_secs(5));\n        assert_eq!(rate_bucket.max_rpi, 50 * 5);\n\n        let rate_bucket = RateBucketInfo::new(50, Duration::from_millis(500));\n        assert_eq!(rate_bucket.max_rpi, 50 / 2);\n    }\n\n    #[test]\n    fn rate_bucket_parse() {\n        let rate_bucket: RateBucketInfo = \"100@10s\".parse().unwrap();\n        assert_eq!(rate_bucket.interval, Duration::from_secs(10));\n        assert_eq!(rate_bucket.max_rpi, 100 * 10);\n        assert_eq!(rate_bucket.to_string(), \"100@10s\");\n\n        let rate_bucket: RateBucketInfo = \"100@1m\".parse().unwrap();\n        assert_eq!(rate_bucket.interval, Duration::from_secs(60));\n        assert_eq!(rate_bucket.max_rpi, 100 * 60);\n        assert_eq!(rate_bucket.to_string(), \"100@1m\");\n    }\n\n    #[test]\n    fn default_rate_buckets() {\n        let mut defaults = RateBucketInfo::DEFAULT_SET;\n        RateBucketInfo::validate(&mut defaults[..]).unwrap();\n    }\n\n    #[test]\n    #[should_panic = \"invalid bucket RPS limits. 10@10s allows fewer requests per bucket than 300@1s (100 vs 300)\"]\n    fn rate_buckets_validate() {\n        let mut rates: Vec<RateBucketInfo> = [\"300@1s\", \"10@10s\"]\n            .into_iter()\n            .map(|s| s.parse().unwrap())\n            .collect();\n        RateBucketInfo::validate(&mut rates).unwrap();\n    }\n\n    #[tokio::test]\n    async fn test_rate_limits() {\n        let mut rates: Vec<RateBucketInfo> = [\"100@1s\", \"20@30s\"]\n            .into_iter()\n            .map(|s| s.parse().unwrap())\n            .collect();\n        RateBucketInfo::validate(&mut rates).unwrap();\n        let limiter = WakeComputeRateLimiter::new(rates);\n\n        let endpoint = EndpointId::from(\"ep-my-endpoint-1234\");\n        let endpoint = EndpointIdInt::from(endpoint);\n\n        time::pause();\n\n        for _ in 0..100 {\n            assert!(limiter.check(endpoint, 1));\n        }\n        // more connections fail\n        assert!(!limiter.check(endpoint, 1));\n\n        // fail even after 500ms as it's in the same bucket\n        time::advance(time::Duration::from_millis(500)).await;\n        assert!(!limiter.check(endpoint, 1));\n\n        // after a full 1s, 100 requests are allowed again\n        time::advance(time::Duration::from_millis(500)).await;\n        for _ in 1..6 {\n            for _ in 0..50 {\n                assert!(limiter.check(endpoint, 2));\n            }\n            time::advance(time::Duration::from_millis(1000)).await;\n        }\n\n        // more connections after 600 will exceed the 20rps@30s limit\n        assert!(!limiter.check(endpoint, 1));\n\n        // will still fail before the 30 second limit\n        time::advance(time::Duration::from_millis(30_000 - 6_000 - 1)).await;\n        assert!(!limiter.check(endpoint, 1));\n\n        // after the full 30 seconds, 100 requests are allowed again\n        time::advance(time::Duration::from_millis(1)).await;\n        for _ in 0..100 {\n            assert!(limiter.check(endpoint, 1));\n        }\n    }\n\n    #[tokio::test]\n    async fn test_rate_limits_gc() {\n        // fixed seeded random/hasher to ensure that the test is not flaky\n        let rand = rand::rngs::StdRng::from_seed([1; 32]);\n        let hasher = BuildHasherDefault::<FxHasher>::default();\n\n        let limiter =\n            BucketRateLimiter::new_with_rand_and_hasher(&RateBucketInfo::DEFAULT_SET, rand, hasher);\n        for i in 0..1_000_000 {\n            limiter.check(i, 1);\n        }\n        assert!(limiter.map.len() < 150_000);\n    }\n}\n"
  },
  {
    "path": "proxy/src/rate_limiter/mod.rs",
    "content": "mod leaky_bucket;\nmod limit_algorithm;\nmod limiter;\n\npub use leaky_bucket::{EndpointRateLimiter, LeakyBucketConfig, LeakyBucketRateLimiter};\n#[cfg(test)]\npub(crate) use limit_algorithm::aimd::Aimd;\npub(crate) use limit_algorithm::{\n    DynamicLimiter, Outcome, RateLimitAlgorithm, RateLimiterConfig, Token,\n};\npub use limiter::{RateBucketInfo, WakeComputeRateLimiter};\n"
  },
  {
    "path": "proxy/src/redis/connection_with_credentials_provider.rs",
    "content": "use std::sync::Arc;\nuse std::sync::atomic::{AtomicBool, Ordering};\nuse std::time::Duration;\n\nuse futures::FutureExt;\nuse redis::aio::{ConnectionLike, MultiplexedConnection};\nuse redis::{ConnectionInfo, IntoConnectionInfo, RedisConnectionInfo, RedisError, RedisResult};\nuse tokio::task::AbortHandle;\nuse tracing::{error, info, warn};\n\nuse super::elasticache::CredentialsProvider;\nuse crate::redis::elasticache::CredentialsProviderError;\n\nenum Credentials {\n    Static(ConnectionInfo),\n    Dynamic(Arc<CredentialsProvider>, redis::ConnectionAddr),\n}\n\nimpl Clone for Credentials {\n    fn clone(&self) -> Self {\n        match self {\n            Credentials::Static(info) => Credentials::Static(info.clone()),\n            Credentials::Dynamic(provider, addr) => {\n                Credentials::Dynamic(Arc::clone(provider), addr.clone())\n            }\n        }\n    }\n}\n\n#[derive(thiserror::Error, Debug)]\npub enum ConnectionProviderError {\n    #[error(transparent)]\n    Redis(#[from] RedisError),\n    #[error(transparent)]\n    CredentialsProvider(#[from] CredentialsProviderError),\n}\n\n/// A wrapper around `redis::MultiplexedConnection` that automatically refreshes the token.\n/// Provides PubSub connection without credentials refresh.\npub struct ConnectionWithCredentialsProvider {\n    credentials: Credentials,\n    // TODO: with more load on the connection, we should consider using a connection pool\n    con: Option<MultiplexedConnection>,\n    refresh_token_task: Option<AbortHandle>,\n    mutex: tokio::sync::Mutex<()>,\n    credentials_refreshed: Arc<AtomicBool>,\n}\n\nimpl Clone for ConnectionWithCredentialsProvider {\n    fn clone(&self) -> Self {\n        Self {\n            credentials: self.credentials.clone(),\n            con: None,\n            refresh_token_task: None,\n            mutex: tokio::sync::Mutex::new(()),\n            credentials_refreshed: Arc::new(AtomicBool::new(false)),\n        }\n    }\n}\n\nimpl ConnectionWithCredentialsProvider {\n    pub fn new_with_credentials_provider(\n        host: String,\n        port: u16,\n        credentials_provider: Arc<CredentialsProvider>,\n    ) -> Self {\n        Self {\n            credentials: Credentials::Dynamic(\n                credentials_provider,\n                redis::ConnectionAddr::TcpTls {\n                    host,\n                    port,\n                    insecure: false,\n                    tls_params: None,\n                },\n            ),\n            con: None,\n            refresh_token_task: None,\n            mutex: tokio::sync::Mutex::new(()),\n            credentials_refreshed: Arc::new(AtomicBool::new(false)),\n        }\n    }\n\n    pub fn new_with_static_credentials<T: IntoConnectionInfo>(params: T) -> Self {\n        Self {\n            credentials: Credentials::Static(\n                params\n                    .into_connection_info()\n                    .expect(\"static configured redis credentials should be a valid format\"),\n            ),\n            con: None,\n            refresh_token_task: None,\n            mutex: tokio::sync::Mutex::new(()),\n            credentials_refreshed: Arc::new(AtomicBool::new(true)),\n        }\n    }\n\n    async fn ping(con: &mut MultiplexedConnection) -> Result<(), ConnectionProviderError> {\n        redis::cmd(\"PING\")\n            .query_async(con)\n            .await\n            .map_err(Into::into)\n    }\n\n    pub(crate) fn credentials_refreshed(&self) -> bool {\n        self.credentials_refreshed.load(Ordering::Relaxed)\n    }\n\n    pub(crate) async fn connect(&mut self) -> Result<(), ConnectionProviderError> {\n        let _guard = self.mutex.lock().await;\n        if let Some(con) = self.con.as_mut() {\n            match Self::ping(con).await {\n                Ok(()) => {\n                    return Ok(());\n                }\n                Err(e) => {\n                    warn!(\"Error during PING: {e:?}\");\n                }\n            }\n        } else {\n            info!(\"Connection is not established\");\n        }\n        info!(\"Establishing a new connection...\");\n        self.con = None;\n        if let Some(f) = self.refresh_token_task.take() {\n            f.abort();\n        }\n        let mut con = self\n            .get_client()\n            .await?\n            .get_multiplexed_tokio_connection()\n            .await?;\n        if let Credentials::Dynamic(credentials_provider, _) = &self.credentials {\n            let credentials_provider = credentials_provider.clone();\n            let con2 = con.clone();\n            let credentials_refreshed = self.credentials_refreshed.clone();\n            let f = tokio::spawn(Self::keep_connection(\n                con2,\n                credentials_provider,\n                credentials_refreshed,\n            ));\n            self.refresh_token_task = Some(f.abort_handle());\n        }\n        match Self::ping(&mut con).await {\n            Ok(()) => {\n                info!(\"Connection succesfully established\");\n            }\n            Err(e) => {\n                warn!(\"Connection is broken. Error during PING: {e:?}\");\n            }\n        }\n        self.con = Some(con);\n        Ok(())\n    }\n\n    async fn get_connection_info(&self) -> Result<ConnectionInfo, ConnectionProviderError> {\n        match &self.credentials {\n            Credentials::Static(info) => Ok(info.clone()),\n            Credentials::Dynamic(provider, addr) => {\n                let (username, password) = provider.provide_credentials().await?;\n                Ok(ConnectionInfo {\n                    addr: addr.clone(),\n                    redis: RedisConnectionInfo {\n                        db: 0,\n                        username: Some(username),\n                        password: Some(password.clone()),\n                        // TODO: switch to RESP3 after testing new client version.\n                        protocol: redis::ProtocolVersion::RESP2,\n                    },\n                })\n            }\n        }\n    }\n\n    async fn get_client(&self) -> Result<redis::Client, ConnectionProviderError> {\n        let client = redis::Client::open(self.get_connection_info().await?)?;\n        self.credentials_refreshed.store(true, Ordering::Relaxed);\n        Ok(client)\n    }\n\n    // PubSub does not support credentials refresh.\n    // Requires manual reconnection every 12h.\n    pub(crate) async fn get_async_pubsub(&self) -> anyhow::Result<redis::aio::PubSub> {\n        Ok(self.get_client().await?.get_async_pubsub().await?)\n    }\n\n    // The connection lives for 12h.\n    // It can be prolonged with sending `AUTH` commands with the refreshed token.\n    // https://docs.aws.amazon.com/AmazonElastiCache/latest/red-ug/auth-iam.html#auth-iam-limits\n    async fn keep_connection(\n        mut con: MultiplexedConnection,\n        credentials_provider: Arc<CredentialsProvider>,\n        credentials_refreshed: Arc<AtomicBool>,\n    ) -> ! {\n        loop {\n            // The connection lives for 12h, for the sanity check we refresh it every hour.\n            tokio::time::sleep(Duration::from_secs(60 * 60)).await;\n            match Self::refresh_token(&mut con, credentials_provider.clone()).await {\n                Ok(()) => {\n                    info!(\"Token refreshed\");\n                    credentials_refreshed.store(true, Ordering::Relaxed);\n                }\n                Err(e) => {\n                    error!(\"Error during token refresh: {e:?}\");\n                    credentials_refreshed.store(false, Ordering::Relaxed);\n                }\n            }\n        }\n    }\n    async fn refresh_token(\n        con: &mut MultiplexedConnection,\n        credentials_provider: Arc<CredentialsProvider>,\n    ) -> anyhow::Result<()> {\n        let (user, password) = credentials_provider.provide_credentials().await?;\n        let _: () = redis::cmd(\"AUTH\")\n            .arg(user)\n            .arg(password)\n            .query_async(con)\n            .await?;\n        Ok(())\n    }\n    /// Sends an already encoded (packed) command into the TCP socket and\n    /// reads the single response from it.\n    pub(crate) async fn send_packed_command(\n        &mut self,\n        cmd: &redis::Cmd,\n    ) -> RedisResult<redis::Value> {\n        // Clone connection to avoid having to lock the ArcSwap in write mode\n        let con = self.con.as_mut().ok_or(redis::RedisError::from((\n            redis::ErrorKind::IoError,\n            \"Connection not established\",\n        )))?;\n        con.send_packed_command(cmd).await\n    }\n\n    /// Sends multiple already encoded (packed) command into the TCP socket\n    /// and reads `count` responses from it.  This is used to implement\n    /// pipelining.\n    pub(crate) async fn send_packed_commands(\n        &mut self,\n        cmd: &redis::Pipeline,\n        offset: usize,\n        count: usize,\n    ) -> RedisResult<Vec<redis::Value>> {\n        // Clone shared connection future to avoid having to lock the ArcSwap in write mode\n        let con = self.con.as_mut().ok_or(redis::RedisError::from((\n            redis::ErrorKind::IoError,\n            \"Connection not established\",\n        )))?;\n        con.send_packed_commands(cmd, offset, count).await\n    }\n}\n\nimpl ConnectionLike for ConnectionWithCredentialsProvider {\n    fn req_packed_command<'a>(\n        &'a mut self,\n        cmd: &'a redis::Cmd,\n    ) -> redis::RedisFuture<'a, redis::Value> {\n        self.send_packed_command(cmd).boxed()\n    }\n\n    fn req_packed_commands<'a>(\n        &'a mut self,\n        cmd: &'a redis::Pipeline,\n        offset: usize,\n        count: usize,\n    ) -> redis::RedisFuture<'a, Vec<redis::Value>> {\n        self.send_packed_commands(cmd, offset, count).boxed()\n    }\n\n    fn get_db(&self) -> i64 {\n        self.con.as_ref().map_or(0, |c| c.get_db())\n    }\n}\n"
  },
  {
    "path": "proxy/src/redis/elasticache.rs",
    "content": "use std::sync::Arc;\nuse std::time::{Duration, SystemTime};\n\nuse aws_config::Region;\nuse aws_config::environment::EnvironmentVariableCredentialsProvider;\nuse aws_config::imds::credentials::ImdsCredentialsProvider;\nuse aws_config::meta::credentials::CredentialsProviderChain;\nuse aws_config::meta::region::RegionProviderChain;\nuse aws_config::profile::ProfileFileCredentialsProvider;\nuse aws_config::provider_config::ProviderConfig;\nuse aws_config::web_identity_token::WebIdentityTokenCredentialsProvider;\nuse aws_credential_types::provider::error::CredentialsError;\nuse aws_sdk_iam::config::ProvideCredentials;\nuse aws_sigv4::http_request::{\n    self, SignableBody, SignableRequest, SignatureLocation, SigningError, SigningSettings,\n};\nuse aws_sigv4::sign::v4::signing_params::BuildError;\nuse tracing::info;\n\n#[derive(Debug)]\npub struct AWSIRSAConfig {\n    region: String,\n    service_name: String,\n    cluster_name: String,\n    user_id: String,\n    token_ttl: Duration,\n    action: String,\n}\n\nimpl AWSIRSAConfig {\n    pub fn new(region: String, cluster_name: Option<String>, user_id: Option<String>) -> Self {\n        AWSIRSAConfig {\n            region,\n            service_name: \"elasticache\".to_string(),\n            cluster_name: cluster_name.unwrap_or_default(),\n            user_id: user_id.unwrap_or_default(),\n            // \"The IAM authentication token is valid for 15 minutes\"\n            // https://docs.aws.amazon.com/memorydb/latest/devguide/auth-iam.html#auth-iam-limits\n            token_ttl: Duration::from_secs(15 * 60),\n            action: \"connect\".to_string(),\n        }\n    }\n}\n\n#[derive(thiserror::Error, Debug)]\npub enum CredentialsProviderError {\n    #[error(transparent)]\n    AwsCredentials(#[from] CredentialsError),\n    #[error(transparent)]\n    AwsSigv4Build(#[from] BuildError),\n    #[error(transparent)]\n    AwsSigv4Singing(#[from] SigningError),\n    #[error(transparent)]\n    Http(#[from] http::Error),\n}\n\n/// Credentials provider for AWS elasticache authentication.\n///\n/// Official documentation:\n/// <https://docs.aws.amazon.com/AmazonElastiCache/latest/red-ug/auth-iam.html>\n///\n/// Useful resources:\n/// <https://aws.amazon.com/blogs/database/simplify-managing-access-to-amazon-elasticache-for-redis-clusters-with-iam/>\npub struct CredentialsProvider {\n    config: AWSIRSAConfig,\n    credentials_provider: CredentialsProviderChain,\n}\n\nimpl CredentialsProvider {\n    pub async fn new(\n        aws_region: String,\n        redis_cluster_name: Option<String>,\n        redis_user_id: Option<String>,\n    ) -> Arc<CredentialsProvider> {\n        let region_provider =\n            RegionProviderChain::default_provider().or_else(Region::new(aws_region.clone()));\n        let provider_conf =\n            ProviderConfig::without_region().with_region(region_provider.region().await);\n        let aws_credentials_provider = {\n            // uses \"AWS_ACCESS_KEY_ID\", \"AWS_SECRET_ACCESS_KEY\"\n            CredentialsProviderChain::first_try(\n                \"env\",\n                EnvironmentVariableCredentialsProvider::new(),\n            )\n            // uses \"AWS_PROFILE\" / `aws sso login --profile <profile>`\n            .or_else(\n                \"profile-sso\",\n                ProfileFileCredentialsProvider::builder()\n                    .configure(&provider_conf)\n                    .build(),\n            )\n            // uses \"AWS_WEB_IDENTITY_TOKEN_FILE\", \"AWS_ROLE_ARN\", \"AWS_ROLE_SESSION_NAME\"\n            // needed to access remote extensions bucket\n            .or_else(\n                \"token\",\n                WebIdentityTokenCredentialsProvider::builder()\n                    .configure(&provider_conf)\n                    .build(),\n            )\n            // uses imds v2\n            .or_else(\"imds\", ImdsCredentialsProvider::builder().build())\n        };\n        Arc::new(CredentialsProvider {\n            config: AWSIRSAConfig::new(aws_region, redis_cluster_name, redis_user_id),\n            credentials_provider: aws_credentials_provider,\n        })\n    }\n\n    pub(crate) async fn provide_credentials(\n        &self,\n    ) -> Result<(String, String), CredentialsProviderError> {\n        let aws_credentials = self\n            .credentials_provider\n            .provide_credentials()\n            .await?\n            .into();\n        info!(\"AWS credentials successfully obtained\");\n        info!(\"Connecting to Redis with configuration: {:?}\", self.config);\n        let mut settings = SigningSettings::default();\n        settings.signature_location = SignatureLocation::QueryParams;\n        settings.expires_in = Some(self.config.token_ttl);\n        let signing_params = aws_sigv4::sign::v4::SigningParams::builder()\n            .identity(&aws_credentials)\n            .region(&self.config.region)\n            .name(&self.config.service_name)\n            .time(SystemTime::now())\n            .settings(settings)\n            .build()?\n            .into();\n        let auth_params = [\n            (\"Action\", &self.config.action),\n            (\"User\", &self.config.user_id),\n        ];\n        let auth_params = url::form_urlencoded::Serializer::new(String::new())\n            .extend_pairs(auth_params)\n            .finish();\n        let auth_uri = http::Uri::builder()\n            .scheme(\"http\")\n            .authority(self.config.cluster_name.as_bytes())\n            .path_and_query(format!(\"/?{auth_params}\"))\n            .build()?;\n        info!(\"{}\", auth_uri);\n\n        // Convert the HTTP request into a signable request\n        let signable_request = SignableRequest::new(\n            \"GET\",\n            auth_uri.to_string(),\n            std::iter::empty(),\n            SignableBody::Bytes(&[]),\n        )?;\n\n        // Sign and then apply the signature to the request\n        let (si, _) = http_request::sign(signable_request, &signing_params)?.into_parts();\n        let mut signable_request = http::Request::builder()\n            .method(\"GET\")\n            .uri(auth_uri)\n            .body(())?;\n        si.apply_to_request_http1x(&mut signable_request);\n        Ok((\n            self.config.user_id.clone(),\n            signable_request\n                .uri()\n                .to_string()\n                .replacen(\"http://\", \"\", 1),\n        ))\n    }\n}\n"
  },
  {
    "path": "proxy/src/redis/keys.rs",
    "content": "use crate::pqproto::CancelKeyData;\n\npub mod keyspace {\n    pub const CANCEL_PREFIX: &str = \"cancel\";\n}\n\n#[derive(Clone, Debug, Eq, PartialEq)]\npub(crate) enum KeyPrefix {\n    Cancel(CancelKeyData),\n}\n\nimpl KeyPrefix {\n    pub(crate) fn build_redis_key(&self) -> String {\n        match self {\n            KeyPrefix::Cancel(key) => {\n                let id = key.0.get();\n                let keyspace = keyspace::CANCEL_PREFIX;\n                format!(\"{keyspace}:{id:x}\")\n            }\n        }\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use super::*;\n    use crate::pqproto::id_to_cancel_key;\n\n    #[test]\n    fn test_build_redis_key() {\n        let cancel_key: KeyPrefix = KeyPrefix::Cancel(id_to_cancel_key(12345 << 32 | 54321));\n\n        let redis_key = cancel_key.build_redis_key();\n        assert_eq!(redis_key, \"cancel:30390000d431\");\n    }\n}\n"
  },
  {
    "path": "proxy/src/redis/kv_ops.rs",
    "content": "use std::time::Duration;\n\nuse futures::FutureExt;\nuse redis::aio::ConnectionLike;\nuse redis::{Cmd, FromRedisValue, Pipeline, RedisError, RedisResult};\n\nuse super::connection_with_credentials_provider::ConnectionWithCredentialsProvider;\nuse crate::redis::connection_with_credentials_provider::ConnectionProviderError;\n\n#[derive(thiserror::Error, Debug)]\npub enum RedisKVClientError {\n    #[error(transparent)]\n    Redis(#[from] RedisError),\n    #[error(transparent)]\n    ConnectionProvider(#[from] ConnectionProviderError),\n}\n\npub struct RedisKVClient {\n    client: ConnectionWithCredentialsProvider,\n}\n\n#[allow(async_fn_in_trait)]\npub trait Queryable {\n    async fn query<T: FromRedisValue>(&self, conn: &mut impl ConnectionLike) -> RedisResult<T>;\n}\n\nimpl Queryable for Pipeline {\n    async fn query<T: FromRedisValue>(&self, conn: &mut impl ConnectionLike) -> RedisResult<T> {\n        self.query_async(conn).await\n    }\n}\n\nimpl Queryable for Cmd {\n    async fn query<T: FromRedisValue>(&self, conn: &mut impl ConnectionLike) -> RedisResult<T> {\n        self.query_async(conn).await\n    }\n}\n\nimpl RedisKVClient {\n    pub fn new(client: ConnectionWithCredentialsProvider) -> Self {\n        Self { client }\n    }\n\n    pub async fn try_connect(&mut self) -> Result<(), RedisKVClientError> {\n        self.client\n            .connect()\n            .boxed()\n            .await\n            .inspect_err(|e| tracing::error!(\"failed to connect to redis: {e}\"))\n            .map_err(Into::into)\n    }\n\n    pub(crate) fn credentials_refreshed(&self) -> bool {\n        self.client.credentials_refreshed()\n    }\n\n    pub(crate) async fn query<T: FromRedisValue>(\n        &mut self,\n        q: &impl Queryable,\n    ) -> Result<T, RedisKVClientError> {\n        let e = match q.query(&mut self.client).await {\n            Ok(t) => return Ok(t),\n            Err(e) => e,\n        };\n\n        tracing::debug!(\"failed to run query: {e}\");\n        match e.retry_method() {\n            redis::RetryMethod::Reconnect => {\n                tracing::info!(\"Redis client is disconnected. Reconnecting...\");\n                self.try_connect().await?;\n            }\n            redis::RetryMethod::RetryImmediately => {}\n            redis::RetryMethod::WaitAndRetry => {\n                // somewhat arbitrary.\n                tokio::time::sleep(Duration::from_millis(100)).await;\n            }\n            _ => Err(e)?,\n        }\n\n        Ok(q.query(&mut self.client).await?)\n    }\n}\n"
  },
  {
    "path": "proxy/src/redis/mod.rs",
    "content": "pub mod connection_with_credentials_provider;\npub mod elasticache;\npub mod keys;\npub mod kv_ops;\npub mod notifications;\n"
  },
  {
    "path": "proxy/src/redis/notifications.rs",
    "content": "use std::convert::Infallible;\nuse std::sync::Arc;\n\nuse futures::StreamExt;\nuse redis::aio::PubSub;\nuse serde::Deserialize;\nuse tokio_util::sync::CancellationToken;\n\nuse super::connection_with_credentials_provider::ConnectionWithCredentialsProvider;\nuse crate::cache::project_info::ProjectInfoCache;\nuse crate::intern::{AccountIdInt, EndpointIdInt, ProjectIdInt, RoleNameInt};\nuse crate::metrics::{Metrics, RedisErrors, RedisEventsCount};\nuse crate::util::deserialize_json_string;\n\nconst CPLANE_CHANNEL_NAME: &str = \"neondb-proxy-ws-updates\";\nconst RECONNECT_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(20);\nconst INVALIDATION_LAG: std::time::Duration = std::time::Duration::from_secs(20);\n\nasync fn try_connect(client: &ConnectionWithCredentialsProvider) -> anyhow::Result<PubSub> {\n    let mut conn = client.get_async_pubsub().await?;\n    tracing::info!(\"subscribing to a channel `{CPLANE_CHANNEL_NAME}`\");\n    conn.subscribe(CPLANE_CHANNEL_NAME).await?;\n    Ok(conn)\n}\n\n#[derive(Debug, Deserialize)]\nstruct NotificationHeader<'a> {\n    topic: &'a str,\n}\n\n#[derive(Clone, Debug, Deserialize, Eq, PartialEq)]\n#[serde(tag = \"topic\", content = \"data\")]\nenum Notification {\n    #[serde(\n        rename = \"/account_settings_update\",\n        alias = \"/allowed_vpc_endpoints_updated_for_org\",\n        deserialize_with = \"deserialize_json_string\"\n    )]\n    AccountSettingsUpdate(InvalidateAccount),\n\n    #[serde(\n        rename = \"/endpoint_settings_update\",\n        deserialize_with = \"deserialize_json_string\"\n    )]\n    EndpointSettingsUpdate(InvalidateEndpoint),\n\n    #[serde(\n        rename = \"/project_settings_update\",\n        alias = \"/allowed_ips_updated\",\n        alias = \"/block_public_or_vpc_access_updated\",\n        alias = \"/allowed_vpc_endpoints_updated_for_projects\",\n        deserialize_with = \"deserialize_json_string\"\n    )]\n    ProjectSettingsUpdate(InvalidateProject),\n\n    #[serde(\n        rename = \"/role_setting_update\",\n        alias = \"/password_updated\",\n        deserialize_with = \"deserialize_json_string\"\n    )]\n    RoleSettingUpdate(InvalidateRole),\n\n    #[serde(\n        other,\n        deserialize_with = \"deserialize_unknown_topic\",\n        skip_serializing\n    )]\n    UnknownTopic,\n}\n\n#[derive(Clone, Debug, Deserialize, Eq, PartialEq)]\n#[serde(rename_all = \"snake_case\")]\nenum InvalidateEndpoint {\n    EndpointId(EndpointIdInt),\n    EndpointIds(Vec<EndpointIdInt>),\n}\nimpl std::ops::Deref for InvalidateEndpoint {\n    type Target = [EndpointIdInt];\n    fn deref(&self) -> &Self::Target {\n        match self {\n            Self::EndpointId(id) => std::slice::from_ref(id),\n            Self::EndpointIds(ids) => ids,\n        }\n    }\n}\n\n#[derive(Clone, Debug, Deserialize, Eq, PartialEq)]\n#[serde(rename_all = \"snake_case\")]\nenum InvalidateProject {\n    ProjectId(ProjectIdInt),\n    ProjectIds(Vec<ProjectIdInt>),\n}\nimpl std::ops::Deref for InvalidateProject {\n    type Target = [ProjectIdInt];\n    fn deref(&self) -> &Self::Target {\n        match self {\n            Self::ProjectId(id) => std::slice::from_ref(id),\n            Self::ProjectIds(ids) => ids,\n        }\n    }\n}\n\n#[derive(Clone, Debug, Deserialize, Eq, PartialEq)]\n#[serde(rename_all = \"snake_case\")]\nenum InvalidateAccount {\n    AccountId(AccountIdInt),\n    AccountIds(Vec<AccountIdInt>),\n}\nimpl std::ops::Deref for InvalidateAccount {\n    type Target = [AccountIdInt];\n    fn deref(&self) -> &Self::Target {\n        match self {\n            Self::AccountId(id) => std::slice::from_ref(id),\n            Self::AccountIds(ids) => ids,\n        }\n    }\n}\n\n#[derive(Clone, Debug, Deserialize, Eq, PartialEq)]\nstruct InvalidateRole {\n    project_id: ProjectIdInt,\n    role_name: RoleNameInt,\n}\n\n// https://github.com/serde-rs/serde/issues/1714\nfn deserialize_unknown_topic<'de, D>(deserializer: D) -> Result<(), D::Error>\nwhere\n    D: serde::Deserializer<'de>,\n{\n    deserializer.deserialize_any(serde::de::IgnoredAny)?;\n    Ok(())\n}\n\nstruct MessageHandler<C: Send + Sync + 'static> {\n    cache: Arc<C>,\n}\n\nimpl<C: Send + Sync + 'static> Clone for MessageHandler<C> {\n    fn clone(&self) -> Self {\n        Self {\n            cache: self.cache.clone(),\n        }\n    }\n}\n\nimpl MessageHandler<ProjectInfoCache> {\n    pub(crate) fn new(cache: Arc<ProjectInfoCache>) -> Self {\n        Self { cache }\n    }\n\n    #[tracing::instrument(skip(self, msg), fields(session_id = tracing::field::Empty))]\n    async fn handle_message(&self, msg: redis::Msg) -> anyhow::Result<()> {\n        let payload: String = msg.get_payload()?;\n        tracing::debug!(?payload, \"received a message payload\");\n\n        let msg: Notification = match serde_json::from_str(&payload) {\n            Ok(Notification::UnknownTopic) => {\n                match serde_json::from_str::<NotificationHeader>(&payload) {\n                    // don't update the metric for redis errors if it's just a topic we don't know about.\n                    Ok(header) => tracing::warn!(topic = header.topic, \"unknown topic\"),\n                    Err(e) => {\n                        Metrics::get().proxy.redis_errors_total.inc(RedisErrors {\n                            channel: msg.get_channel_name(),\n                        });\n                        tracing::error!(\"broken message: {e}\");\n                    }\n                }\n                return Ok(());\n            }\n            Ok(msg) => msg,\n            Err(e) => {\n                Metrics::get().proxy.redis_errors_total.inc(RedisErrors {\n                    channel: msg.get_channel_name(),\n                });\n                match serde_json::from_str::<NotificationHeader>(&payload) {\n                    Ok(header) => tracing::error!(topic = header.topic, \"broken message: {e}\"),\n                    Err(_) => tracing::error!(\"broken message: {e}\"),\n                }\n                return Ok(());\n            }\n        };\n\n        tracing::debug!(?msg, \"received a message\");\n        match msg {\n            Notification::RoleSettingUpdate { .. }\n            | Notification::EndpointSettingsUpdate { .. }\n            | Notification::ProjectSettingsUpdate { .. }\n            | Notification::AccountSettingsUpdate { .. } => {\n                invalidate_cache(self.cache.clone(), msg.clone());\n\n                let m = &Metrics::get().proxy.redis_events_count;\n                match msg {\n                    Notification::RoleSettingUpdate { .. } => {\n                        m.inc(RedisEventsCount::InvalidateRole);\n                    }\n                    Notification::EndpointSettingsUpdate { .. } => {\n                        m.inc(RedisEventsCount::InvalidateEndpoint);\n                    }\n                    Notification::ProjectSettingsUpdate { .. } => {\n                        m.inc(RedisEventsCount::InvalidateProject);\n                    }\n                    Notification::AccountSettingsUpdate { .. } => {\n                        m.inc(RedisEventsCount::InvalidateOrg);\n                    }\n                    Notification::UnknownTopic => {}\n                }\n\n                // TODO: add additional metrics for the other event types.\n\n                // It might happen that the invalid entry is on the way to be cached.\n                // To make sure that the entry is invalidated, let's repeat the invalidation in INVALIDATION_LAG seconds.\n                // TODO: include the version (or the timestamp) in the message and invalidate only if the entry is cached before the message.\n                let cache = self.cache.clone();\n                tokio::spawn(async move {\n                    tokio::time::sleep(INVALIDATION_LAG).await;\n                    invalidate_cache(cache, msg);\n                });\n            }\n\n            Notification::UnknownTopic => unreachable!(),\n        }\n\n        Ok(())\n    }\n}\n\nfn invalidate_cache(cache: Arc<ProjectInfoCache>, msg: Notification) {\n    match msg {\n        Notification::EndpointSettingsUpdate(ids) => ids\n            .iter()\n            .for_each(|&id| cache.invalidate_endpoint_access(id)),\n\n        Notification::AccountSettingsUpdate(ids) => ids\n            .iter()\n            .for_each(|&id| cache.invalidate_endpoint_access_for_org(id)),\n\n        Notification::ProjectSettingsUpdate(ids) => ids\n            .iter()\n            .for_each(|&id| cache.invalidate_endpoint_access_for_project(id)),\n\n        Notification::RoleSettingUpdate(InvalidateRole {\n            project_id,\n            role_name,\n        }) => cache.invalidate_role_secret_for_project(project_id, role_name),\n\n        Notification::UnknownTopic => unreachable!(),\n    }\n}\n\nasync fn handle_messages(\n    handler: MessageHandler<ProjectInfoCache>,\n    redis: ConnectionWithCredentialsProvider,\n    cancellation_token: CancellationToken,\n) -> anyhow::Result<()> {\n    loop {\n        if cancellation_token.is_cancelled() {\n            return Ok(());\n        }\n        let mut conn = match try_connect(&redis).await {\n            Ok(conn) => conn,\n            Err(e) => {\n                tracing::error!(\n                    \"failed to connect to redis: {e}, will try to reconnect in {RECONNECT_TIMEOUT:#?}\"\n                );\n                tokio::time::sleep(RECONNECT_TIMEOUT).await;\n                continue;\n            }\n        };\n        let mut stream = conn.on_message();\n        while let Some(msg) = stream.next().await {\n            match handler.handle_message(msg).await {\n                Ok(()) => {}\n                Err(e) => {\n                    tracing::error!(\"failed to handle message: {e}, will try to reconnect\");\n                    break;\n                }\n            }\n            if cancellation_token.is_cancelled() {\n                return Ok(());\n            }\n        }\n    }\n}\n\n/// Handle console's invalidation messages.\n#[tracing::instrument(name = \"redis_notifications\", skip_all)]\npub async fn task_main(\n    redis: ConnectionWithCredentialsProvider,\n    cache: Arc<ProjectInfoCache>,\n) -> anyhow::Result<Infallible> {\n    let handler = MessageHandler::new(cache);\n    // 6h - 1m.\n    // There will be 1 minute overlap between two tasks. But at least we can be sure that no message is lost.\n    let mut interval = tokio::time::interval(std::time::Duration::from_secs(6 * 60 * 60 - 60));\n    loop {\n        let cancellation_token = CancellationToken::new();\n        interval.tick().await;\n\n        tokio::spawn(handle_messages(\n            handler.clone(),\n            redis.clone(),\n            cancellation_token.clone(),\n        ));\n        tokio::spawn(async move {\n            tokio::time::sleep(std::time::Duration::from_secs(6 * 60 * 60)).await; // 6h.\n            cancellation_token.cancel();\n        });\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use serde_json::json;\n\n    use super::*;\n    use crate::types::{ProjectId, RoleName};\n\n    #[test]\n    fn parse_allowed_ips() -> anyhow::Result<()> {\n        let project_id: ProjectId = \"new_project\".into();\n        let data = format!(\"{{\\\"project_id\\\": \\\"{project_id}\\\"}}\");\n        let text = json!({\n            \"type\": \"message\",\n            \"topic\": \"/allowed_ips_updated\",\n            \"data\": data,\n            \"extre_fields\": \"something\"\n        })\n        .to_string();\n\n        let result: Notification = serde_json::from_str(&text)?;\n        assert_eq!(\n            result,\n            Notification::ProjectSettingsUpdate(InvalidateProject::ProjectId((&project_id).into()))\n        );\n\n        Ok(())\n    }\n\n    #[test]\n    fn parse_multiple_projects() -> anyhow::Result<()> {\n        let project_id1: ProjectId = \"new_project1\".into();\n        let project_id2: ProjectId = \"new_project2\".into();\n        let data = format!(\"{{\\\"project_ids\\\": [\\\"{project_id1}\\\",\\\"{project_id2}\\\"]}}\");\n        let text = json!({\n            \"type\": \"message\",\n            \"topic\": \"/allowed_vpc_endpoints_updated_for_projects\",\n            \"data\": data,\n            \"extre_fields\": \"something\"\n        })\n        .to_string();\n\n        let result: Notification = serde_json::from_str(&text)?;\n        assert_eq!(\n            result,\n            Notification::ProjectSettingsUpdate(InvalidateProject::ProjectIds(vec![\n                (&project_id1).into(),\n                (&project_id2).into()\n            ]))\n        );\n\n        Ok(())\n    }\n\n    #[test]\n    fn parse_password_updated() -> anyhow::Result<()> {\n        let project_id: ProjectId = \"new_project\".into();\n        let role_name: RoleName = \"new_role\".into();\n        let data = format!(\"{{\\\"project_id\\\": \\\"{project_id}\\\", \\\"role_name\\\": \\\"{role_name}\\\"}}\");\n        let text = json!({\n            \"type\": \"message\",\n            \"topic\": \"/password_updated\",\n            \"data\": data,\n            \"extre_fields\": \"something\"\n        })\n        .to_string();\n\n        let result: Notification = serde_json::from_str(&text)?;\n        assert_eq!(\n            result,\n            Notification::RoleSettingUpdate(InvalidateRole {\n                project_id: (&project_id).into(),\n                role_name: (&role_name).into(),\n            })\n        );\n\n        Ok(())\n    }\n\n    #[test]\n    fn parse_unknown_topic() -> anyhow::Result<()> {\n        let with_data = json!({\n            \"type\": \"message\",\n            \"topic\": \"/doesnotexist\",\n            \"data\": {\n                \"payload\": \"ignored\"\n            },\n            \"extra_fields\": \"something\"\n        })\n        .to_string();\n        let result: Notification = serde_json::from_str(&with_data)?;\n        assert_eq!(result, Notification::UnknownTopic);\n\n        let without_data = json!({\n            \"type\": \"message\",\n            \"topic\": \"/doesnotexist\",\n            \"extra_fields\": \"something\"\n        })\n        .to_string();\n        let result: Notification = serde_json::from_str(&without_data)?;\n        assert_eq!(result, Notification::UnknownTopic);\n\n        Ok(())\n    }\n}\n"
  },
  {
    "path": "proxy/src/sasl/channel_binding.rs",
    "content": "//! Definition and parser for channel binding flag (a part of the `GS2` header).\n\nuse base64::Engine as _;\nuse base64::prelude::BASE64_STANDARD;\n\n/// Channel binding flag (possibly with params).\n#[derive(Debug, PartialEq, Eq)]\npub(crate) enum ChannelBinding<T> {\n    /// Client doesn't support channel binding.\n    NotSupportedClient,\n    /// Client thinks server doesn't support channel binding.\n    NotSupportedServer,\n    /// Client wants to use this type of channel binding.\n    Required(T),\n}\n\nimpl<T> ChannelBinding<T> {\n    pub(crate) fn and_then<R, E>(\n        self,\n        f: impl FnOnce(T) -> Result<R, E>,\n    ) -> Result<ChannelBinding<R>, E> {\n        Ok(match self {\n            Self::NotSupportedClient => ChannelBinding::NotSupportedClient,\n            Self::NotSupportedServer => ChannelBinding::NotSupportedServer,\n            Self::Required(x) => ChannelBinding::Required(f(x)?),\n        })\n    }\n}\n\nimpl<'a> ChannelBinding<&'a str> {\n    // NB: FromStr doesn't work with lifetimes\n    pub(crate) fn parse(input: &'a str) -> Option<Self> {\n        Some(match input {\n            \"n\" => Self::NotSupportedClient,\n            \"y\" => Self::NotSupportedServer,\n            other => Self::Required(other.strip_prefix(\"p=\")?),\n        })\n    }\n}\n\nimpl<T: std::fmt::Display> ChannelBinding<T> {\n    /// Encode channel binding data as base64 for subsequent checks.\n    pub(crate) fn encode<'a, E>(\n        &self,\n        get_cbind_data: impl FnOnce(&T) -> Result<&'a [u8], E>,\n    ) -> Result<std::borrow::Cow<'static, str>, E> {\n        Ok(match self {\n            Self::NotSupportedClient => {\n                // base64::encode(\"n,,\")\n                \"biws\".into()\n            }\n            Self::NotSupportedServer => {\n                // base64::encode(\"y,,\")\n                \"eSws\".into()\n            }\n            Self::Required(mode) => {\n                let mut cbind_input = format!(\"p={mode},,\",).into_bytes();\n                cbind_input.extend_from_slice(get_cbind_data(mode)?);\n                BASE64_STANDARD.encode(&cbind_input).into()\n            }\n        })\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use super::*;\n\n    #[test]\n    fn channel_binding_encode() -> anyhow::Result<()> {\n        use ChannelBinding::*;\n\n        let cases = [\n            (NotSupportedClient, BASE64_STANDARD.encode(\"n,,\")),\n            (NotSupportedServer, BASE64_STANDARD.encode(\"y,,\")),\n            (Required(\"foo\"), BASE64_STANDARD.encode(\"p=foo,,bar\")),\n        ];\n\n        for (cb, input) in cases {\n            assert_eq!(cb.encode(|_| anyhow::Ok(b\"bar\"))?, input);\n        }\n\n        Ok(())\n    }\n}\n"
  },
  {
    "path": "proxy/src/sasl/messages.rs",
    "content": "//! Definitions for SASL messages.\n\nuse crate::parse::split_cstr;\n\n/// SASL-specific payload of [`PasswordMessage`](pq_proto::FeMessage::PasswordMessage).\n#[derive(Debug)]\npub(crate) struct FirstMessage<'a> {\n    /// Authentication method, e.g. `\"SCRAM-SHA-256\"`.\n    pub(crate) method: &'a str,\n    /// Initial client message.\n    pub(crate) message: &'a str,\n}\n\nimpl<'a> FirstMessage<'a> {\n    // NB: FromStr doesn't work with lifetimes\n    pub(crate) fn parse(bytes: &'a [u8]) -> Option<Self> {\n        let (method_cstr, tail) = split_cstr(bytes)?;\n        let method = method_cstr.to_str().ok()?;\n\n        let (len_bytes, bytes) = tail.split_first_chunk()?;\n        let len = u32::from_be_bytes(*len_bytes) as usize;\n        if len != bytes.len() {\n            return None;\n        }\n\n        let message = std::str::from_utf8(bytes).ok()?;\n        Some(Self { method, message })\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use super::*;\n\n    #[test]\n    fn parse_sasl_first_message() {\n        let proto = \"SCRAM-SHA-256\";\n        let sasl = \"n,,n=,r=KHQ2Gjc7NptyB8aov5/TnUy4\";\n        let sasl_len = (sasl.len() as u32).to_be_bytes();\n        let bytes = [proto.as_bytes(), &[0], sasl_len.as_ref(), sasl.as_bytes()].concat();\n\n        let password = FirstMessage::parse(&bytes).unwrap();\n        assert_eq!(password.method, proto);\n        assert_eq!(password.message, sasl);\n    }\n}\n"
  },
  {
    "path": "proxy/src/sasl/mod.rs",
    "content": "//! Simple Authentication and Security Layer.\n//!\n//! RFC: <https://datatracker.ietf.org/doc/html/rfc4422>.\n//!\n//! Reference implementation:\n//! * <https://github.com/postgres/postgres/blob/94226d4506e66d6e7cbf4b391f1e7393c1962841/src/backend/libpq/auth-sasl.c>\n//! * <https://github.com/postgres/postgres/blob/94226d4506e66d6e7cbf4b391f1e7393c1962841/src/interfaces/libpq/fe-auth.c>\n\nmod channel_binding;\nmod messages;\nmod stream;\n\nuse std::io;\n\npub(crate) use channel_binding::ChannelBinding;\npub(crate) use messages::FirstMessage;\npub(crate) use stream::{Outcome, authenticate};\nuse thiserror::Error;\n\nuse crate::error::{ReportableError, UserFacingError};\n\n/// Fine-grained auth errors help in writing tests.\n#[derive(Error, Debug)]\npub(crate) enum Error {\n    #[error(\"Unsupported authentication method: {0}\")]\n    BadAuthMethod(Box<str>),\n\n    #[error(\"Channel binding failed: {0}\")]\n    ChannelBindingFailed(&'static str),\n\n    #[error(\"Unsupported channel binding method: {0}\")]\n    ChannelBindingBadMethod(Box<str>),\n\n    #[error(\"Bad client message: {0}\")]\n    BadClientMessage(&'static str),\n\n    #[error(\"Internal error: missing digest\")]\n    MissingBinding,\n\n    #[error(\"could not decode salt: {0}\")]\n    Base64(#[from] base64::DecodeError),\n\n    #[error(transparent)]\n    Io(#[from] io::Error),\n}\n\nimpl UserFacingError for Error {\n    fn to_string_client(&self) -> String {\n        match self {\n            Self::ChannelBindingFailed(m) => (*m).to_string(),\n            Self::ChannelBindingBadMethod(m) => format!(\"unsupported channel binding method {m}\"),\n            _ => \"authentication protocol violation\".to_string(),\n        }\n    }\n}\n\nimpl ReportableError for Error {\n    fn get_error_kind(&self) -> crate::error::ErrorKind {\n        match self {\n            Error::BadAuthMethod(_) => crate::error::ErrorKind::User,\n            Error::ChannelBindingFailed(_) => crate::error::ErrorKind::User,\n            Error::ChannelBindingBadMethod(_) => crate::error::ErrorKind::User,\n            Error::BadClientMessage(_) => crate::error::ErrorKind::User,\n            Error::MissingBinding => crate::error::ErrorKind::Service,\n            Error::Base64(_) => crate::error::ErrorKind::ControlPlane,\n            Error::Io(_) => crate::error::ErrorKind::ClientDisconnect,\n        }\n    }\n}\n\n/// A convenient result type for SASL exchange.\npub(crate) type Result<T> = std::result::Result<T, Error>;\n\n/// A result of one SASL exchange.\n#[must_use]\npub(crate) enum Step<T, R> {\n    /// We should continue exchanging messages.\n    Continue(T, String),\n    /// The client has been authenticated successfully.\n    Success(R, String),\n    /// Authentication failed (reason attached).\n    Failure(&'static str),\n}\n\n/// Every SASL mechanism (e.g. [SCRAM](crate::scram)) is expected to implement this trait.\npub(crate) trait Mechanism: Sized {\n    /// What's produced as a result of successful authentication.\n    type Output;\n\n    /// Produce a server challenge to be sent to the client.\n    /// This is how this method is called in PostgreSQL (`libpq/sasl.h`).\n    fn exchange(self, input: &str) -> Result<Step<Self, Self::Output>>;\n}\n"
  },
  {
    "path": "proxy/src/sasl/stream.rs",
    "content": "//! Abstraction for the string-oriented SASL protocols.\n\nuse std::io;\n\nuse tokio::io::{AsyncRead, AsyncWrite};\n\nuse super::{Mechanism, Step};\nuse crate::context::RequestContext;\nuse crate::pqproto::{BeAuthenticationSaslMessage, BeMessage};\nuse crate::stream::PqStream;\n\n/// SASL authentication outcome.\n/// It's much easier to match on those two variants\n/// than to peek into a noisy protocol error type.\n#[must_use = \"caller must explicitly check for success\"]\npub(crate) enum Outcome<R> {\n    /// Authentication succeeded and produced some value.\n    Success(R),\n    /// Authentication failed (reason attached).\n    Failure(&'static str),\n}\n\npub async fn authenticate<S, F, M>(\n    ctx: &RequestContext,\n    stream: &mut PqStream<S>,\n    mechanism: F,\n) -> super::Result<Outcome<M::Output>>\nwhere\n    S: AsyncRead + AsyncWrite + Unpin,\n    F: FnOnce(&str) -> super::Result<M>,\n    M: Mechanism,\n{\n    let (mut mechanism, mut input) = {\n        // pause the timer while we communicate with the client\n        let _paused = ctx.latency_timer_pause(crate::metrics::Waiting::Client);\n\n        // Initial client message contains the chosen auth method's name.\n        let msg = stream.read_password_message().await?;\n\n        let sasl = super::FirstMessage::parse(msg)\n            .ok_or(super::Error::BadClientMessage(\"bad sasl message\"))?;\n\n        (mechanism(sasl.method)?, sasl.message)\n    };\n\n    loop {\n        match mechanism.exchange(input) {\n            Ok(Step::Continue(moved_mechanism, reply)) => {\n                mechanism = moved_mechanism;\n\n                // write reply\n                let sasl_msg = BeAuthenticationSaslMessage::Continue(reply.as_bytes());\n                stream.write_message(BeMessage::AuthenticationSasl(sasl_msg));\n                drop(reply);\n            }\n            Ok(Step::Success(result, reply)) => {\n                // write reply\n                let sasl_msg = BeAuthenticationSaslMessage::Final(reply.as_bytes());\n                stream.write_message(BeMessage::AuthenticationSasl(sasl_msg));\n                stream.write_message(BeMessage::AuthenticationOk);\n\n                // exit with success\n                break Ok(Outcome::Success(result));\n            }\n            // exit with failure\n            Ok(Step::Failure(reason)) => break Ok(Outcome::Failure(reason)),\n            Err(error) => {\n                tracing::info!(?error, \"error during SASL exchange\");\n                return Err(error);\n            }\n        }\n\n        // pause the timer while we communicate with the client\n        let _paused = ctx.latency_timer_pause(crate::metrics::Waiting::Client);\n\n        // get next input\n        stream.flush().await?;\n        let msg = stream.read_password_message().await?;\n        input = std::str::from_utf8(msg)\n            .map_err(|_| io::Error::new(io::ErrorKind::InvalidData, \"bad encoding\"))?;\n    }\n}\n"
  },
  {
    "path": "proxy/src/scram/cache.rs",
    "content": "use tokio::time::Instant;\nuse zeroize::Zeroize as _;\n\nuse super::pbkdf2;\nuse crate::cache::Cached;\nuse crate::cache::common::{Cache, count_cache_insert, count_cache_outcome, eviction_listener};\nuse crate::intern::{EndpointIdInt, RoleNameInt};\nuse crate::metrics::{CacheKind, Metrics};\n\npub(crate) struct Pbkdf2Cache(moka::sync::Cache<(EndpointIdInt, RoleNameInt), Pbkdf2CacheEntry>);\npub(crate) type CachedPbkdf2<'a> = Cached<&'a Pbkdf2Cache>;\n\nimpl Cache for Pbkdf2Cache {\n    type Key = (EndpointIdInt, RoleNameInt);\n    type Value = Pbkdf2CacheEntry;\n\n    fn invalidate(&self, info: &(EndpointIdInt, RoleNameInt)) {\n        self.0.invalidate(info);\n    }\n}\n\n/// To speed up password hashing for more active customers, we store the tail results of the\n/// PBKDF2 algorithm. If the output of PBKDF2 is U1 ^ U2 ^ ⋯ ^ Uc, then we store\n/// suffix = U17 ^ U18 ^ ⋯ ^ Uc. We only need to calculate U1 ^ U2 ^ ⋯ ^ U15 ^ U16\n/// to determine the final result.\n///\n/// The suffix alone isn't enough to crack the password. The stored_key is still required.\n/// While both are cached in memory, given they're in different locations is makes it much\n/// harder to exploit, even if any such memory exploit exists in proxy.\n#[derive(Clone)]\npub struct Pbkdf2CacheEntry {\n    /// corresponds to [`super::ServerSecret::cached_at`]\n    pub(super) cached_from: Instant,\n    pub(super) suffix: pbkdf2::Block,\n}\n\nimpl Drop for Pbkdf2CacheEntry {\n    fn drop(&mut self) {\n        self.suffix.zeroize();\n    }\n}\n\nimpl Pbkdf2Cache {\n    pub fn new() -> Self {\n        const SIZE: u64 = 100;\n        const TTL: std::time::Duration = std::time::Duration::from_secs(60);\n\n        let builder = moka::sync::Cache::builder()\n            .name(\"pbkdf2\")\n            .max_capacity(SIZE)\n            // We use time_to_live so we don't refresh the lifetime for an invalid password attempt.\n            .time_to_live(TTL);\n\n        Metrics::get()\n            .cache\n            .capacity\n            .set(CacheKind::Pbkdf2, SIZE as i64);\n\n        let builder =\n            builder.eviction_listener(|_k, _v, cause| eviction_listener(CacheKind::Pbkdf2, cause));\n\n        Self(builder.build())\n    }\n\n    pub fn insert(&self, endpoint: EndpointIdInt, role: RoleNameInt, value: Pbkdf2CacheEntry) {\n        count_cache_insert(CacheKind::Pbkdf2);\n        self.0.insert((endpoint, role), value);\n    }\n\n    fn get(&self, endpoint: EndpointIdInt, role: RoleNameInt) -> Option<Pbkdf2CacheEntry> {\n        count_cache_outcome(CacheKind::Pbkdf2, self.0.get(&(endpoint, role)))\n    }\n\n    pub fn get_entry(\n        &self,\n        endpoint: EndpointIdInt,\n        role: RoleNameInt,\n    ) -> Option<CachedPbkdf2<'_>> {\n        self.get(endpoint, role).map(|value| Cached {\n            token: Some((self, (endpoint, role))),\n            value,\n        })\n    }\n}\n"
  },
  {
    "path": "proxy/src/scram/countmin.rs",
    "content": "use std::hash::Hash;\n\n/// estimator of hash jobs per second.\n/// <https://en.wikipedia.org/wiki/Count%E2%80%93min_sketch>\npub(crate) struct CountMinSketch {\n    // one for each depth\n    hashers: Vec<ahash::RandomState>,\n    width: usize,\n    depth: usize,\n    // buckets, width*depth\n    buckets: Vec<u32>,\n}\n\nimpl CountMinSketch {\n    /// Given parameters (ε, δ),\n    ///   set width = ceil(e/ε)\n    ///   set depth = ceil(ln(1/δ))\n    ///\n    /// guarantees:\n    /// actual <= estimate\n    /// estimate <= actual + ε * N with probability 1 - δ\n    /// where N is the cardinality of the stream\n    pub(crate) fn with_params(epsilon: f64, delta: f64) -> Self {\n        CountMinSketch::new(\n            (std::f64::consts::E / epsilon).ceil() as usize,\n            (1.0_f64 / delta).ln().ceil() as usize,\n        )\n    }\n\n    fn new(width: usize, depth: usize) -> Self {\n        Self {\n            #[cfg(test)]\n            hashers: (0..depth)\n                .map(|i| {\n                    // digits of pi for good randomness\n                    ahash::RandomState::with_seeds(\n                        314159265358979323,\n                        84626433832795028,\n                        84197169399375105,\n                        82097494459230781 + i as u64,\n                    )\n                })\n                .collect(),\n            #[cfg(not(test))]\n            hashers: (0..depth).map(|_| ahash::RandomState::new()).collect(),\n            width,\n            depth,\n            buckets: vec![0; width * depth],\n        }\n    }\n\n    pub(crate) fn inc_and_return<T: Hash>(&mut self, t: &T, x: u32) -> u32 {\n        let mut min = u32::MAX;\n        for row in 0..self.depth {\n            let col = (self.hashers[row].hash_one(t) as usize) % self.width;\n\n            let row = &mut self.buckets[row * self.width..][..self.width];\n            row[col] = row[col].saturating_add(x);\n            min = std::cmp::min(min, row[col]);\n        }\n        min\n    }\n\n    pub(crate) fn reset(&mut self) {\n        self.buckets.clear();\n        self.buckets.resize(self.width * self.depth, 0);\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use rand::rngs::StdRng;\n    use rand::seq::SliceRandom;\n    use rand::{Rng, SeedableRng};\n\n    use super::CountMinSketch;\n\n    fn eval_precision(n: usize, p: f64, q: f64) -> usize {\n        // fixed value of phi for consistent test\n        let mut rng = StdRng::seed_from_u64(16180339887498948482);\n\n        #[allow(non_snake_case)]\n        let mut N = 0;\n\n        let mut ids = vec![];\n\n        for _ in 0..n {\n            // number to insert at once\n            let n = rng.random_range(1..4096);\n            // number of insert operations\n            let m = rng.random_range(1..100);\n\n            let id = uuid::Builder::from_random_bytes(rng.random()).into_uuid();\n            ids.push((id, n, m));\n\n            // N = sum(actual)\n            N += n * m;\n        }\n\n        // q% of counts will be within p of the actual value\n        let mut sketch = CountMinSketch::with_params(p / N as f64, 1.0 - q);\n\n        // insert a bunch of entries in a random order\n        let mut ids2 = ids.clone();\n        while !ids2.is_empty() {\n            ids2.shuffle(&mut rng);\n            ids2.retain_mut(|id| {\n                sketch.inc_and_return(&id.0, id.1);\n                id.2 -= 1;\n                id.2 > 0\n            });\n        }\n\n        let mut within_p = 0;\n        for (id, n, m) in ids {\n            let actual = n * m;\n            let estimate = sketch.inc_and_return(&id, 0);\n\n            // This estimate has the guarantee that actual <= estimate\n            assert!(actual <= estimate);\n\n            // This estimate has the guarantee that estimate <= actual + εN with probability 1 - δ.\n            // ε = p / N, δ = 1 - q;\n            // therefore, estimate <= actual + p with probability q.\n            if estimate as f64 <= actual as f64 + p {\n                within_p += 1;\n            }\n        }\n        within_p\n    }\n\n    #[test]\n    fn precision() {\n        assert_eq!(eval_precision(100, 100.0, 0.99), 100);\n        assert_eq!(eval_precision(1000, 100.0, 0.99), 1000);\n        assert_eq!(eval_precision(100, 4096.0, 0.99), 100);\n        assert_eq!(eval_precision(1000, 4096.0, 0.99), 1000);\n\n        // seems to be more precise than the literature indicates?\n        // probably numbers are too small to truly represent the probabilities.\n        assert_eq!(eval_precision(100, 4096.0, 0.90), 100);\n        assert_eq!(eval_precision(1000, 4096.0, 0.90), 1000);\n        assert_eq!(eval_precision(100, 4096.0, 0.1), 100);\n        assert_eq!(eval_precision(1000, 4096.0, 0.1), 978);\n    }\n\n    // returns memory usage in bytes, and the time complexity per insert.\n    fn eval_cost(p: f64, q: f64) -> (usize, usize) {\n        #[allow(non_snake_case)]\n        // N = sum(actual)\n        // Let's assume 1021 samples, all of 4096\n        let N = 1021 * 4096;\n        let sketch = CountMinSketch::with_params(p / N as f64, 1.0 - q);\n\n        let memory = size_of::<u32>() * sketch.buckets.len();\n        let time = sketch.depth;\n        (memory, time)\n    }\n\n    #[test]\n    fn memory_usage() {\n        assert_eq!(eval_cost(100.0, 0.99), (2273580, 5));\n        assert_eq!(eval_cost(4096.0, 0.99), (55520, 5));\n        assert_eq!(eval_cost(4096.0, 0.90), (33312, 3));\n        assert_eq!(eval_cost(4096.0, 0.1), (11104, 1));\n    }\n}\n"
  },
  {
    "path": "proxy/src/scram/exchange.rs",
    "content": "//! Implementation of the SCRAM authentication algorithm.\n\nuse std::convert::Infallible;\n\nuse base64::Engine as _;\nuse base64::prelude::BASE64_STANDARD;\nuse tracing::{debug, trace};\n\nuse super::messages::{\n    ClientFinalMessage, ClientFirstMessage, OwnedServerFirstMessage, SCRAM_RAW_NONCE_LEN,\n};\nuse super::pbkdf2::Pbkdf2;\nuse super::secret::ServerSecret;\nuse super::signature::SignatureBuilder;\nuse super::threadpool::ThreadPool;\nuse super::{ScramKey, pbkdf2};\nuse crate::intern::{EndpointIdInt, RoleNameInt};\nuse crate::sasl::{self, ChannelBinding, Error as SaslError};\nuse crate::scram::cache::Pbkdf2CacheEntry;\n\n/// The only channel binding mode we currently support.\n#[derive(Debug)]\nstruct TlsServerEndPoint;\n\nimpl std::fmt::Display for TlsServerEndPoint {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        write!(f, \"tls-server-end-point\")\n    }\n}\n\nimpl std::str::FromStr for TlsServerEndPoint {\n    type Err = sasl::Error;\n\n    fn from_str(s: &str) -> Result<Self, Self::Err> {\n        match s {\n            \"tls-server-end-point\" => Ok(TlsServerEndPoint),\n            _ => Err(sasl::Error::ChannelBindingBadMethod(s.into())),\n        }\n    }\n}\n\nstruct SaslSentInner {\n    cbind_flag: ChannelBinding<TlsServerEndPoint>,\n    client_first_message_bare: String,\n    server_first_message: OwnedServerFirstMessage,\n}\n\nstruct SaslInitial {\n    nonce: fn() -> [u8; SCRAM_RAW_NONCE_LEN],\n}\n\nenum ExchangeState {\n    /// Waiting for [`ClientFirstMessage`].\n    Initial(SaslInitial),\n    /// Waiting for [`ClientFinalMessage`].\n    SaltSent(SaslSentInner),\n}\n\n/// Server's side of SCRAM auth algorithm.\npub(crate) struct Exchange<'a> {\n    state: ExchangeState,\n    secret: &'a ServerSecret,\n    tls_server_end_point: crate::tls::TlsServerEndPoint,\n}\n\nimpl<'a> Exchange<'a> {\n    pub(crate) fn new(\n        secret: &'a ServerSecret,\n        nonce: fn() -> [u8; SCRAM_RAW_NONCE_LEN],\n        tls_server_end_point: crate::tls::TlsServerEndPoint,\n    ) -> Self {\n        Self {\n            state: ExchangeState::Initial(SaslInitial { nonce }),\n            secret,\n            tls_server_end_point,\n        }\n    }\n}\n\nasync fn derive_client_key(\n    pool: &ThreadPool,\n    endpoint: EndpointIdInt,\n    password: &[u8],\n    salt: &[u8],\n    iterations: u32,\n) -> pbkdf2::Block {\n    pool.spawn_job(endpoint, Pbkdf2::start(password, salt, iterations))\n        .await\n}\n\n/// For cleartext flow, we need to derive the client key to\n/// 1. authenticate the client.\n/// 2. authenticate with compute.\npub(crate) async fn exchange(\n    pool: &ThreadPool,\n    endpoint: EndpointIdInt,\n    role: RoleNameInt,\n    secret: &ServerSecret,\n    password: &[u8],\n) -> sasl::Result<sasl::Outcome<super::ScramKey>> {\n    if secret.iterations > CACHED_ROUNDS {\n        exchange_with_cache(pool, endpoint, role, secret, password).await\n    } else {\n        let salt = BASE64_STANDARD.decode(&*secret.salt_base64)?;\n        let hash = derive_client_key(pool, endpoint, password, &salt, secret.iterations).await;\n        Ok(validate_pbkdf2(secret, &hash))\n    }\n}\n\n/// Compute the client key using a cache. We cache the suffix of the pbkdf2 result only,\n/// which is not enough by itself to perform an offline brute force.\nasync fn exchange_with_cache(\n    pool: &ThreadPool,\n    endpoint: EndpointIdInt,\n    role: RoleNameInt,\n    secret: &ServerSecret,\n    password: &[u8],\n) -> sasl::Result<sasl::Outcome<super::ScramKey>> {\n    let salt = BASE64_STANDARD.decode(&*secret.salt_base64)?;\n\n    debug_assert!(\n        secret.iterations > CACHED_ROUNDS,\n        \"we should not cache password data if there isn't enough rounds needed\"\n    );\n\n    // compute the prefix of the pbkdf2 output.\n    let prefix = derive_client_key(pool, endpoint, password, &salt, CACHED_ROUNDS).await;\n\n    if let Some(entry) = pool.cache.get_entry(endpoint, role) {\n        // hot path: let's check the threadpool cache\n        if secret.cached_at == entry.cached_from {\n            // cache is valid. compute the full hash by adding the prefix to the suffix.\n            let mut hash = prefix;\n            pbkdf2::xor_assign(&mut hash, &entry.suffix);\n            let outcome = validate_pbkdf2(secret, &hash);\n\n            if matches!(outcome, sasl::Outcome::Success(_)) {\n                trace!(\"password validated from cache\");\n            }\n\n            return Ok(outcome);\n        }\n\n        // cached key is no longer valid.\n        debug!(\"invalidating cached password\");\n        entry.invalidate();\n    }\n\n    // slow path: full password hash.\n    let hash = derive_client_key(pool, endpoint, password, &salt, secret.iterations).await;\n    let outcome = validate_pbkdf2(secret, &hash);\n\n    let client_key = match outcome {\n        sasl::Outcome::Success(client_key) => client_key,\n        sasl::Outcome::Failure(_) => return Ok(outcome),\n    };\n\n    trace!(\"storing cached password\");\n\n    // time to cache, compute the suffix by subtracting the prefix from the hash.\n    let mut suffix = hash;\n    pbkdf2::xor_assign(&mut suffix, &prefix);\n\n    pool.cache.insert(\n        endpoint,\n        role,\n        Pbkdf2CacheEntry {\n            cached_from: secret.cached_at,\n            suffix,\n        },\n    );\n\n    Ok(sasl::Outcome::Success(client_key))\n}\n\nfn validate_pbkdf2(secret: &ServerSecret, hash: &pbkdf2::Block) -> sasl::Outcome<ScramKey> {\n    let client_key = super::ScramKey::client_key(&(*hash).into());\n    if secret.is_password_invalid(&client_key).into() {\n        sasl::Outcome::Failure(\"password doesn't match\")\n    } else {\n        sasl::Outcome::Success(client_key)\n    }\n}\n\nconst CACHED_ROUNDS: u32 = 16;\n\nimpl SaslInitial {\n    fn transition(\n        &self,\n        secret: &ServerSecret,\n        tls_server_end_point: &crate::tls::TlsServerEndPoint,\n        input: &str,\n    ) -> sasl::Result<sasl::Step<SaslSentInner, Infallible>> {\n        let client_first_message = ClientFirstMessage::parse(input)\n            .ok_or(SaslError::BadClientMessage(\"invalid client-first-message\"))?;\n\n        // If the flag is set to \"y\" and the server supports channel\n        // binding, the server MUST fail authentication\n        if client_first_message.cbind_flag == ChannelBinding::NotSupportedServer\n            && tls_server_end_point.supported()\n        {\n            return Err(SaslError::ChannelBindingFailed(\"SCRAM-PLUS not used\"));\n        }\n\n        let server_first_message = client_first_message.build_server_first_message(\n            &(self.nonce)(),\n            &secret.salt_base64,\n            secret.iterations,\n        );\n        let msg = server_first_message.as_str().to_owned();\n\n        let next = SaslSentInner {\n            cbind_flag: client_first_message.cbind_flag.and_then(str::parse)?,\n            client_first_message_bare: client_first_message.bare.to_owned(),\n            server_first_message,\n        };\n\n        Ok(sasl::Step::Continue(next, msg))\n    }\n}\n\nimpl SaslSentInner {\n    fn transition(\n        &self,\n        secret: &ServerSecret,\n        tls_server_end_point: &crate::tls::TlsServerEndPoint,\n        input: &str,\n    ) -> sasl::Result<sasl::Step<Infallible, super::ScramKey>> {\n        let Self {\n            cbind_flag,\n            client_first_message_bare,\n            server_first_message,\n        } = self;\n\n        let client_final_message = ClientFinalMessage::parse(input)\n            .ok_or(SaslError::BadClientMessage(\"invalid client-final-message\"))?;\n\n        let channel_binding = cbind_flag.encode(|_| match tls_server_end_point {\n            crate::tls::TlsServerEndPoint::Sha256(x) => Ok(x),\n            crate::tls::TlsServerEndPoint::Undefined => Err(SaslError::MissingBinding),\n        })?;\n\n        // This might've been caused by a MITM attack\n        if client_final_message.channel_binding != channel_binding {\n            return Err(SaslError::ChannelBindingFailed(\n                \"insecure connection: secure channel data mismatch\",\n            ));\n        }\n\n        if client_final_message.nonce != server_first_message.nonce() {\n            return Err(SaslError::BadClientMessage(\"combined nonce doesn't match\"));\n        }\n\n        let signature_builder = SignatureBuilder {\n            client_first_message_bare,\n            server_first_message: server_first_message.as_str(),\n            client_final_message_without_proof: client_final_message.without_proof,\n        };\n\n        let client_key = signature_builder\n            .build(&secret.stored_key)\n            .derive_client_key(&client_final_message.proof);\n\n        // Auth fails either if keys don't match or it's pre-determined to fail.\n        if secret.is_password_invalid(&client_key).into() {\n            return Ok(sasl::Step::Failure(\"password doesn't match\"));\n        }\n\n        let msg =\n            client_final_message.build_server_final_message(signature_builder, &secret.server_key);\n\n        Ok(sasl::Step::Success(client_key, msg))\n    }\n}\n\nimpl sasl::Mechanism for Exchange<'_> {\n    type Output = super::ScramKey;\n\n    fn exchange(mut self, input: &str) -> sasl::Result<sasl::Step<Self, Self::Output>> {\n        use ExchangeState;\n        use sasl::Step;\n        match &self.state {\n            ExchangeState::Initial(init) => {\n                match init.transition(self.secret, &self.tls_server_end_point, input)? {\n                    Step::Continue(sent, msg) => {\n                        self.state = ExchangeState::SaltSent(sent);\n                        Ok(Step::Continue(self, msg))\n                    }\n                    Step::Failure(msg) => Ok(Step::Failure(msg)),\n                }\n            }\n            ExchangeState::SaltSent(sent) => {\n                match sent.transition(self.secret, &self.tls_server_end_point, input)? {\n                    Step::Success(keys, msg) => Ok(Step::Success(keys, msg)),\n                    Step::Failure(msg) => Ok(Step::Failure(msg)),\n                }\n            }\n        }\n    }\n}\n"
  },
  {
    "path": "proxy/src/scram/key.rs",
    "content": "//! Tools for client/server/stored key management.\n\nuse hmac::Mac as _;\nuse sha2::Digest as _;\nuse subtle::ConstantTimeEq;\nuse zeroize::Zeroize as _;\n\nuse crate::metrics::Metrics;\nuse crate::scram::pbkdf2::Prf;\n\n/// Faithfully taken from PostgreSQL.\npub(crate) const SCRAM_KEY_LEN: usize = 32;\n\n/// One of the keys derived from the user's password.\n/// We use the same structure for all keys, i.e.\n/// `ClientKey`, `StoredKey`, and `ServerKey`.\n#[derive(Clone, Default, Eq, Debug)]\n#[repr(transparent)]\npub(crate) struct ScramKey {\n    bytes: [u8; SCRAM_KEY_LEN],\n}\n\nimpl Drop for ScramKey {\n    fn drop(&mut self) {\n        self.bytes.zeroize();\n    }\n}\n\nimpl PartialEq for ScramKey {\n    fn eq(&self, other: &Self) -> bool {\n        self.ct_eq(other).into()\n    }\n}\n\nimpl ConstantTimeEq for ScramKey {\n    fn ct_eq(&self, other: &Self) -> subtle::Choice {\n        self.bytes.ct_eq(&other.bytes)\n    }\n}\n\nimpl ScramKey {\n    pub(crate) fn sha256(&self) -> Self {\n        Metrics::get().proxy.sha_rounds.inc_by(1);\n        Self {\n            bytes: sha2::Sha256::digest(self.as_bytes()).into(),\n        }\n    }\n\n    pub(crate) fn as_bytes(&self) -> [u8; SCRAM_KEY_LEN] {\n        self.bytes\n    }\n\n    pub(crate) fn client_key(b: &[u8; 32]) -> Self {\n        // Prf::new_from_slice will run 2 sha256 rounds.\n        // Update + Finalize run 2 sha256 rounds.\n        Metrics::get().proxy.sha_rounds.inc_by(4);\n\n        let mut prf = Prf::new_from_slice(b).expect(\"HMAC is able to accept all key sizes\");\n        prf.update(b\"Client Key\");\n        let client_key: [u8; 32] = prf.finalize().into_bytes().into();\n        client_key.into()\n    }\n}\n\nimpl From<[u8; SCRAM_KEY_LEN]> for ScramKey {\n    #[inline(always)]\n    fn from(bytes: [u8; SCRAM_KEY_LEN]) -> Self {\n        Self { bytes }\n    }\n}\n\nimpl AsRef<[u8]> for ScramKey {\n    #[inline(always)]\n    fn as_ref(&self) -> &[u8] {\n        &self.bytes\n    }\n}\n"
  },
  {
    "path": "proxy/src/scram/messages.rs",
    "content": "//! Definitions for SCRAM messages.\n\nuse std::fmt;\nuse std::ops::Range;\n\nuse base64::Engine as _;\nuse base64::prelude::BASE64_STANDARD;\n\nuse super::base64_decode_array;\nuse super::key::{SCRAM_KEY_LEN, ScramKey};\nuse super::signature::SignatureBuilder;\nuse crate::sasl::ChannelBinding;\n\n/// Faithfully taken from PostgreSQL.\npub(crate) const SCRAM_RAW_NONCE_LEN: usize = 18;\n\n/// Although we ignore all extensions, we still have to validate the message.\nfn validate_sasl_extensions<'a>(parts: impl Iterator<Item = &'a str>) -> Option<()> {\n    for mut chars in parts.map(|s| s.chars()) {\n        let attr = chars.next()?;\n        if !attr.is_ascii_alphabetic() {\n            return None;\n        }\n        let eq = chars.next()?;\n        if eq != '=' {\n            return None;\n        }\n    }\n\n    Some(())\n}\n\n#[derive(Debug)]\npub(crate) struct ClientFirstMessage<'a> {\n    /// `client-first-message-bare`.\n    pub(crate) bare: &'a str,\n    /// Channel binding mode.\n    pub(crate) cbind_flag: ChannelBinding<&'a str>,\n    /// Client nonce.\n    pub(crate) nonce: &'a str,\n}\n\nimpl<'a> ClientFirstMessage<'a> {\n    // NB: FromStr doesn't work with lifetimes\n    pub(crate) fn parse(input: &'a str) -> Option<Self> {\n        let mut parts = input.split(',');\n\n        let cbind_flag = ChannelBinding::parse(parts.next()?)?;\n\n        // PG doesn't support authorization identity,\n        // so we don't bother defining GS2 header type\n        let authzid = parts.next()?;\n        if !authzid.is_empty() {\n            return None;\n        }\n\n        // Unfortunately, `parts.as_str()` is unstable\n        let pos = authzid.as_ptr() as usize - input.as_ptr() as usize + 1;\n        let (_, bare) = input.split_at(pos);\n\n        // In theory, these might be preceded by \"reserved-mext\" (i.e. \"m=\")\n        let username = parts.next()?.strip_prefix(\"n=\")?;\n\n        // https://github.com/postgres/postgres/blob/f83908798f78c4cafda217ca875602c88ea2ae28/src/backend/libpq/auth-scram.c#L13-L14\n        if !username.is_empty() {\n            tracing::warn!(username, \"scram username provided, but is not expected\");\n            // TODO(conrad):\n            // return None;\n        }\n\n        let nonce = parts.next()?.strip_prefix(\"r=\")?;\n\n        // Validate but ignore auth extensions\n        validate_sasl_extensions(parts)?;\n\n        Some(Self {\n            bare,\n            cbind_flag,\n            nonce,\n        })\n    }\n\n    /// Build a response to [`ClientFirstMessage`].\n    pub(crate) fn build_server_first_message(\n        &self,\n        nonce: &[u8; SCRAM_RAW_NONCE_LEN],\n        salt_base64: &str,\n        iterations: u32,\n    ) -> OwnedServerFirstMessage {\n        let mut message = String::with_capacity(128);\n        message.push_str(\"r=\");\n\n        // write combined nonce\n        let combined_nonce_start = message.len();\n        message.push_str(self.nonce);\n        BASE64_STANDARD.encode_string(nonce, &mut message);\n        let combined_nonce = combined_nonce_start..message.len();\n\n        // write salt and iterations\n        message.push_str(\",s=\");\n        message.push_str(salt_base64);\n        message.push_str(\",i=\");\n        message.push_str(itoa::Buffer::new().format(iterations));\n\n        // This design guarantees that it's impossible to create a\n        // server-first-message without receiving a client-first-message\n        OwnedServerFirstMessage {\n            message,\n            nonce: combined_nonce,\n        }\n    }\n}\n\n#[derive(Debug)]\npub(crate) struct ClientFinalMessage<'a> {\n    /// `client-final-message-without-proof`.\n    pub(crate) without_proof: &'a str,\n    /// Channel binding data (base64).\n    pub(crate) channel_binding: &'a str,\n    /// Combined client & server nonce.\n    pub(crate) nonce: &'a str,\n    /// Client auth proof.\n    pub(crate) proof: [u8; SCRAM_KEY_LEN],\n}\n\nimpl<'a> ClientFinalMessage<'a> {\n    // NB: FromStr doesn't work with lifetimes\n    pub(crate) fn parse(input: &'a str) -> Option<Self> {\n        let (without_proof, proof) = input.rsplit_once(',')?;\n\n        let mut parts = without_proof.split(',');\n        let channel_binding = parts.next()?.strip_prefix(\"c=\")?;\n        let nonce = parts.next()?.strip_prefix(\"r=\")?;\n\n        // Validate but ignore auth extensions\n        validate_sasl_extensions(parts)?;\n\n        let proof = base64_decode_array(proof.strip_prefix(\"p=\")?)?;\n\n        Some(Self {\n            without_proof,\n            channel_binding,\n            nonce,\n            proof,\n        })\n    }\n\n    /// Build a response to [`ClientFinalMessage`].\n    pub(crate) fn build_server_final_message(\n        &self,\n        signature_builder: SignatureBuilder<'_>,\n        server_key: &ScramKey,\n    ) -> String {\n        let mut buf = String::from(\"v=\");\n        BASE64_STANDARD.encode_string(signature_builder.build(server_key), &mut buf);\n\n        buf\n    }\n}\n\n/// We need to keep a convenient representation of this\n/// message for the next authentication step.\npub(crate) struct OwnedServerFirstMessage {\n    /// Owned `server-first-message`.\n    message: String,\n    /// Slice into `message`.\n    nonce: Range<usize>,\n}\n\nimpl OwnedServerFirstMessage {\n    /// Extract combined nonce from the message.\n    #[inline(always)]\n    pub(crate) fn nonce(&self) -> &str {\n        &self.message[self.nonce.clone()]\n    }\n\n    /// Get reference to a text representation of the message.\n    #[inline(always)]\n    pub(crate) fn as_str(&self) -> &str {\n        &self.message\n    }\n}\n\nimpl fmt::Debug for OwnedServerFirstMessage {\n    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {\n        f.debug_struct(\"ServerFirstMessage\")\n            .field(\"message\", &self.as_str())\n            .field(\"nonce\", &self.nonce())\n            .finish()\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use super::*;\n\n    #[test]\n    fn parse_client_first_message() {\n        use ChannelBinding::*;\n\n        // (Almost) real strings captured during debug sessions\n        let cases = [\n            (NotSupportedClient, \"n,,n=,r=t8JwklwKecDLwSsA72rHmVju\"),\n            (NotSupportedServer, \"y,,n=,r=t8JwklwKecDLwSsA72rHmVju\"),\n            (\n                Required(\"tls-server-end-point\"),\n                \"p=tls-server-end-point,,n=,r=t8JwklwKecDLwSsA72rHmVju\",\n            ),\n        ];\n\n        for (cb, input) in cases {\n            let msg = ClientFirstMessage::parse(input).unwrap();\n\n            assert_eq!(msg.bare, \"n=,r=t8JwklwKecDLwSsA72rHmVju\");\n            assert_eq!(msg.nonce, \"t8JwklwKecDLwSsA72rHmVju\");\n            assert_eq!(msg.cbind_flag, cb);\n        }\n    }\n\n    #[test]\n    fn parse_client_first_message_with_invalid_gs2_authz() {\n        assert!(ClientFirstMessage::parse(\"n,authzid,n=,r=nonce\").is_none());\n    }\n\n    #[test]\n    fn parse_client_first_message_with_extra_params() {\n        let msg = ClientFirstMessage::parse(\"n,,n=,r=nonce,a=foo,b=bar,c=baz\").unwrap();\n        assert_eq!(msg.bare, \"n=,r=nonce,a=foo,b=bar,c=baz\");\n        assert_eq!(msg.nonce, \"nonce\");\n        assert_eq!(msg.cbind_flag, ChannelBinding::NotSupportedClient);\n    }\n\n    #[test]\n    fn parse_client_first_message_with_extra_params_invalid() {\n        // must be of the form `<ascii letter>=<...>`\n        assert!(ClientFirstMessage::parse(\"n,,n=,r=nonce,abc=foo\").is_none());\n        assert!(ClientFirstMessage::parse(\"n,,n=,r=nonce,1=foo\").is_none());\n        assert!(ClientFirstMessage::parse(\"n,,n=,r=nonce,a\").is_none());\n    }\n\n    #[test]\n    fn parse_client_final_message() {\n        let input = [\n            \"c=eSws\",\n            \"r=iiYEfS3rOgn8S3rtpSdrOsHtPLWvIkdgmHxA0hf3JNOAG4dU\",\n            \"p=SRpfsIVS4Gk11w1LqQ4QvCUBZYQmqXNSDEcHqbQ3CHI=\",\n        ]\n        .join(\",\");\n\n        let msg = ClientFinalMessage::parse(&input).unwrap();\n        assert_eq!(\n            msg.without_proof,\n            \"c=eSws,r=iiYEfS3rOgn8S3rtpSdrOsHtPLWvIkdgmHxA0hf3JNOAG4dU\"\n        );\n        assert_eq!(\n            msg.nonce,\n            \"iiYEfS3rOgn8S3rtpSdrOsHtPLWvIkdgmHxA0hf3JNOAG4dU\"\n        );\n        assert_eq!(\n            BASE64_STANDARD.encode(msg.proof),\n            \"SRpfsIVS4Gk11w1LqQ4QvCUBZYQmqXNSDEcHqbQ3CHI=\"\n        );\n    }\n}\n"
  },
  {
    "path": "proxy/src/scram/mod.rs",
    "content": "//! Salted Challenge Response Authentication Mechanism.\n//!\n//! RFC: <https://datatracker.ietf.org/doc/html/rfc5802>.\n//!\n//! Reference implementation:\n//! * <https://github.com/postgres/postgres/blob/94226d4506e66d6e7cbf4b391f1e7393c1962841/src/backend/libpq/auth-scram.c>\n//! * <https://github.com/postgres/postgres/blob/94226d4506e66d6e7cbf4b391f1e7393c1962841/src/interfaces/libpq/fe-auth-scram.c>\n\nmod cache;\nmod countmin;\nmod exchange;\nmod key;\nmod messages;\nmod pbkdf2;\nmod secret;\nmod signature;\npub mod threadpool;\n\nuse base64::Engine as _;\nuse base64::prelude::BASE64_STANDARD;\npub(crate) use exchange::{Exchange, exchange};\npub(crate) use key::ScramKey;\npub(crate) use secret::ServerSecret;\n\nconst SCRAM_SHA_256: &str = \"SCRAM-SHA-256\";\nconst SCRAM_SHA_256_PLUS: &str = \"SCRAM-SHA-256-PLUS\";\n\n/// A list of supported SCRAM methods.\npub(crate) const METHODS: &[&str] = &[SCRAM_SHA_256_PLUS, SCRAM_SHA_256];\npub(crate) const METHODS_WITHOUT_PLUS: &[&str] = &[SCRAM_SHA_256];\n\n/// Decode base64 into array without any heap allocations\nfn base64_decode_array<const N: usize>(input: impl AsRef<[u8]>) -> Option<[u8; N]> {\n    let mut bytes = [0u8; N];\n\n    let size = BASE64_STANDARD.decode_slice(input, &mut bytes).ok()?;\n    if size != N {\n        return None;\n    }\n\n    Some(bytes)\n}\n\n#[cfg(test)]\nmod tests {\n    use super::threadpool::ThreadPool;\n    use super::{Exchange, ServerSecret};\n    use crate::intern::{EndpointIdInt, RoleNameInt};\n    use crate::sasl::{Mechanism, Step};\n    use crate::types::{EndpointId, RoleName};\n\n    #[test]\n    fn snapshot() {\n        let iterations = 4096;\n        let salt = \"QSXCR+Q6sek8bf92\";\n        let stored_key = \"FO+9jBb3MUukt6jJnzjPZOWc5ow/Pu6JtPyju0aqaE8=\";\n        let server_key = \"qxJ1SbmSAi5EcS0J5Ck/cKAm/+Ixa+Kwp63f4OHDgzo=\";\n        let secret = format!(\"SCRAM-SHA-256${iterations}:{salt}${stored_key}:{server_key}\",);\n        let secret = ServerSecret::parse(&secret).unwrap();\n\n        const NONCE: [u8; 18] = [\n            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,\n        ];\n        let mut exchange =\n            Exchange::new(&secret, || NONCE, crate::tls::TlsServerEndPoint::Undefined);\n\n        let client_first = \"n,,n=user,r=rOprNGfwEbeRWgbNEkqO\";\n        let client_final = \"c=biws,r=rOprNGfwEbeRWgbNEkqOAQIDBAUGBwgJCgsMDQ4PEBES,p=rw1r5Kph5ThxmaUBC2GAQ6MfXbPnNkFiTIvdb/Rear0=\";\n        let server_first =\n            \"r=rOprNGfwEbeRWgbNEkqOAQIDBAUGBwgJCgsMDQ4PEBES,s=QSXCR+Q6sek8bf92,i=4096\";\n        let server_final = \"v=qtUDIofVnIhM7tKn93EQUUt5vgMOldcDVu1HC+OH0o0=\";\n\n        exchange = match exchange.exchange(client_first).unwrap() {\n            Step::Continue(exchange, message) => {\n                assert_eq!(message, server_first);\n                exchange\n            }\n            Step::Success(_, _) => panic!(\"expected continue, got success\"),\n            Step::Failure(f) => panic!(\"{f}\"),\n        };\n\n        let key = match exchange.exchange(client_final).unwrap() {\n            Step::Success(key, message) => {\n                assert_eq!(message, server_final);\n                key\n            }\n            Step::Continue(_, _) => panic!(\"expected success, got continue\"),\n            Step::Failure(f) => panic!(\"{f}\"),\n        };\n\n        assert_eq!(\n            key.as_bytes(),\n            [\n                74, 103, 1, 132, 12, 31, 200, 48, 28, 54, 82, 232, 207, 12, 138, 189, 40, 32, 134,\n                27, 125, 170, 232, 35, 171, 167, 166, 41, 70, 228, 182, 112,\n            ]\n        );\n    }\n\n    async fn check(\n        pool: &ThreadPool,\n        scram_secret: &ServerSecret,\n        password: &[u8],\n    ) -> Result<(), &'static str> {\n        let ep = EndpointId::from(\"foo\");\n        let ep = EndpointIdInt::from(ep);\n        let role = RoleName::from(\"user\");\n        let role = RoleNameInt::from(&role);\n\n        let outcome = super::exchange(pool, ep, role, scram_secret, password)\n            .await\n            .unwrap();\n\n        match outcome {\n            crate::sasl::Outcome::Success(_) => Ok(()),\n            crate::sasl::Outcome::Failure(r) => Err(r),\n        }\n    }\n\n    async fn run_round_trip_test(server_password: &str, client_password: &str) {\n        let pool = ThreadPool::new(1);\n        let scram_secret = ServerSecret::build(server_password).await.unwrap();\n        check(&pool, &scram_secret, client_password.as_bytes())\n            .await\n            .unwrap();\n    }\n\n    #[tokio::test]\n    async fn round_trip() {\n        run_round_trip_test(\"pencil\", \"pencil\").await;\n    }\n\n    #[tokio::test]\n    #[should_panic(expected = \"password doesn't match\")]\n    async fn failure() {\n        run_round_trip_test(\"pencil\", \"eraser\").await;\n    }\n\n    #[tokio::test]\n    #[tracing_test::traced_test]\n    async fn password_cache() {\n        let pool = ThreadPool::new(1);\n        let scram_secret = ServerSecret::build(\"password\").await.unwrap();\n\n        // wrong passwords are not added to cache\n        check(&pool, &scram_secret, b\"wrong\").await.unwrap_err();\n        assert!(!logs_contain(\"storing cached password\"));\n\n        // correct passwords get cached\n        check(&pool, &scram_secret, b\"password\").await.unwrap();\n        assert!(logs_contain(\"storing cached password\"));\n\n        // wrong passwords do not match the cache\n        check(&pool, &scram_secret, b\"wrong\").await.unwrap_err();\n        assert!(!logs_contain(\"password validated from cache\"));\n\n        // correct passwords match the cache\n        check(&pool, &scram_secret, b\"password\").await.unwrap();\n        assert!(logs_contain(\"password validated from cache\"));\n    }\n}\n"
  },
  {
    "path": "proxy/src/scram/pbkdf2.rs",
    "content": "//! For postgres password authentication, we need to perform a PBKDF2 using\n//! PRF=HMAC-SHA2-256, producing only 1 block (32 bytes) of output key.\n\nuse hmac::Mac as _;\nuse hmac::digest::consts::U32;\nuse hmac::digest::generic_array::GenericArray;\nuse zeroize::Zeroize as _;\n\nuse crate::metrics::Metrics;\n\n/// The Psuedo-random function used during PBKDF2 and the SCRAM-SHA-256 handshake.\npub type Prf = hmac::Hmac<sha2::Sha256>;\npub(crate) type Block = GenericArray<u8, U32>;\n\npub(crate) struct Pbkdf2 {\n    hmac: Prf,\n    /// U{r-1} for whatever iteration r we are currently on.\n    prev: Block,\n    /// the output of `fold(xor, U{1}..U{r})` for whatever iteration r we are currently on.\n    hi: Block,\n    /// number of iterations left\n    iterations: u32,\n}\n\nimpl Drop for Pbkdf2 {\n    fn drop(&mut self) {\n        self.prev.zeroize();\n        self.hi.zeroize();\n    }\n}\n\n// inspired from <https://github.com/neondatabase/rust-postgres/blob/20031d7a9ee1addeae6e0968e3899ae6bf01cee2/postgres-protocol/src/authentication/sasl.rs#L36-L61>\nimpl Pbkdf2 {\n    pub(crate) fn start(pw: &[u8], salt: &[u8], iterations: u32) -> Self {\n        // key the HMAC and derive the first block in-place\n        let mut hmac = Prf::new_from_slice(pw).expect(\"HMAC is able to accept all key sizes\");\n\n        // U1 = PRF(Password, Salt + INT_32_BE(i))\n        // i = 1 since we only need 1 block of output.\n        hmac.update(salt);\n        hmac.update(&1u32.to_be_bytes());\n        let init_block = hmac.finalize_reset().into_bytes();\n\n        // Prf::new_from_slice will run 2 sha256 rounds.\n        // Our update + finalize run 2 sha256 rounds for each pbkdf2 round.\n        Metrics::get().proxy.sha_rounds.inc_by(4);\n\n        Self {\n            hmac,\n            // one iteration spent above\n            iterations: iterations - 1,\n            hi: init_block,\n            prev: init_block,\n        }\n    }\n\n    pub(crate) fn cost(&self) -> u32 {\n        (self.iterations).clamp(0, 4096)\n    }\n\n    /// For \"fairness\", we implement PBKDF2 with cooperative yielding, which is why we use this `turn`\n    /// function that only executes a fixed number of iterations before continuing.\n    ///\n    /// Task must be rescheuled if this returns [`std::task::Poll::Pending`].\n    pub(crate) fn turn(&mut self) -> std::task::Poll<Block> {\n        let Self {\n            hmac,\n            prev,\n            hi,\n            iterations,\n        } = self;\n\n        // only do up to 4096 iterations per turn for fairness\n        let n = (*iterations).clamp(0, 4096);\n        for _ in 0..n {\n            let next = single_round(hmac, prev);\n            xor_assign(hi, &next);\n            *prev = next;\n        }\n\n        // Our update + finalize run 2 sha256 rounds for each pbkdf2 round.\n        Metrics::get().proxy.sha_rounds.inc_by(2 * n as u64);\n\n        *iterations -= n;\n        if *iterations == 0 {\n            std::task::Poll::Ready(*hi)\n        } else {\n            std::task::Poll::Pending\n        }\n    }\n}\n\n#[inline(always)]\npub fn xor_assign(x: &mut Block, y: &Block) {\n    for (x, &y) in std::iter::zip(x, y) {\n        *x ^= y;\n    }\n}\n\n#[inline(always)]\nfn single_round(prf: &mut Prf, ui: &Block) -> Block {\n    // Ui = PRF(Password, Ui-1)\n    prf.update(ui);\n    prf.finalize_reset().into_bytes()\n}\n\n#[cfg(test)]\nmod tests {\n    use pbkdf2::pbkdf2_hmac_array;\n    use sha2::Sha256;\n\n    use super::Pbkdf2;\n\n    #[test]\n    fn works() {\n        let salt = b\"sodium chloride\";\n        let pass = b\"Ne0n_!5_50_C007\";\n\n        let mut job = Pbkdf2::start(pass, salt, 60000);\n        let hash: [u8; 32] = loop {\n            let std::task::Poll::Ready(hash) = job.turn() else {\n                continue;\n            };\n            break hash.into();\n        };\n\n        let expected = pbkdf2_hmac_array::<Sha256, 32>(pass, salt, 60000);\n        assert_eq!(hash, expected);\n    }\n}\n"
  },
  {
    "path": "proxy/src/scram/secret.rs",
    "content": "//! Tools for SCRAM server secret management.\n\nuse base64::Engine as _;\nuse base64::prelude::BASE64_STANDARD;\nuse subtle::{Choice, ConstantTimeEq};\nuse tokio::time::Instant;\n\nuse super::base64_decode_array;\nuse super::key::ScramKey;\n\n/// Server secret is produced from user's password,\n/// and is used throughout the authentication process.\n#[derive(Clone, Eq, PartialEq, Debug)]\npub(crate) struct ServerSecret {\n    /// When this secret was cached.\n    pub(crate) cached_at: Instant,\n\n    /// Number of iterations for `PBKDF2` function.\n    pub(crate) iterations: u32,\n    /// Salt used to hash user's password.\n    pub(crate) salt_base64: Box<str>,\n    /// Hashed `ClientKey`.\n    pub(crate) stored_key: ScramKey,\n    /// Used by client to verify server's signature.\n    pub(crate) server_key: ScramKey,\n    /// Should auth fail no matter what?\n    /// This is exactly the case for mocked secrets.\n    pub(crate) doomed: bool,\n}\n\nimpl ServerSecret {\n    pub(crate) fn parse(input: &str) -> Option<Self> {\n        // SCRAM-SHA-256$<iterations>:<salt>$<storedkey>:<serverkey>\n        let s = input.strip_prefix(\"SCRAM-SHA-256$\")?;\n        let (params, keys) = s.split_once('$')?;\n\n        let ((iterations, salt), (stored_key, server_key)) =\n            params.split_once(':').zip(keys.split_once(':'))?;\n\n        let secret = ServerSecret {\n            cached_at: Instant::now(),\n            iterations: iterations.parse().ok()?,\n            salt_base64: salt.into(),\n            stored_key: base64_decode_array(stored_key)?.into(),\n            server_key: base64_decode_array(server_key)?.into(),\n            doomed: false,\n        };\n\n        Some(secret)\n    }\n\n    pub(crate) fn is_password_invalid(&self, client_key: &ScramKey) -> Choice {\n        // constant time to not leak partial key match\n        client_key.sha256().ct_ne(&self.stored_key) | Choice::from(self.doomed as u8)\n    }\n\n    /// To avoid revealing information to an attacker, we use a\n    /// mocked server secret even if the user doesn't exist.\n    /// See `auth-scram.c : mock_scram_secret` for details.\n    pub(crate) fn mock(nonce: [u8; 32]) -> Self {\n        Self {\n            cached_at: Instant::now(),\n            // this doesn't reveal much information as we're going to use\n            // iteration count 1 for our generated passwords going forward.\n            // PG16 users can set iteration count=1 already today.\n            iterations: 1,\n            salt_base64: BASE64_STANDARD.encode(nonce).into_boxed_str(),\n            stored_key: ScramKey::default(),\n            server_key: ScramKey::default(),\n            doomed: true,\n        }\n    }\n\n    /// Build a new server secret from the prerequisites.\n    /// XXX: We only use this function in tests.\n    #[cfg(test)]\n    pub(crate) async fn build(password: &str) -> Option<Self> {\n        Self::parse(&postgres_protocol::password::scram_sha_256(password.as_bytes()).await)\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use super::*;\n\n    #[test]\n    fn parse_scram_secret() {\n        let iterations = 4096;\n        let salt = \"+/tQQax7twvwTj64mjBsxQ==\";\n        let stored_key = \"D5h6KTMBlUvDJk2Y8ELfC1Sjtc6k9YHjRyuRZyBNJns=\";\n        let server_key = \"Pi3QHbcluX//NDfVkKlFl88GGzlJ5LkyPwcdlN/QBvI=\";\n\n        let secret = format!(\"SCRAM-SHA-256${iterations}:{salt}${stored_key}:{server_key}\");\n\n        let parsed = ServerSecret::parse(&secret).unwrap();\n        assert_eq!(parsed.iterations, iterations);\n        assert_eq!(&*parsed.salt_base64, salt);\n\n        assert_eq!(BASE64_STANDARD.encode(parsed.stored_key), stored_key);\n        assert_eq!(BASE64_STANDARD.encode(parsed.server_key), server_key);\n    }\n}\n"
  },
  {
    "path": "proxy/src/scram/signature.rs",
    "content": "//! Tools for client/server signature management.\n\nuse hmac::Mac as _;\n\nuse super::key::{SCRAM_KEY_LEN, ScramKey};\nuse crate::metrics::Metrics;\nuse crate::scram::pbkdf2::Prf;\n\n/// A collection of message parts needed to derive the client's signature.\n#[derive(Debug)]\npub(crate) struct SignatureBuilder<'a> {\n    pub(crate) client_first_message_bare: &'a str,\n    pub(crate) server_first_message: &'a str,\n    pub(crate) client_final_message_without_proof: &'a str,\n}\n\nimpl SignatureBuilder<'_> {\n    pub(crate) fn build(&self, key: &ScramKey) -> Signature {\n        // don't know exactly. this is a rough approx\n        Metrics::get().proxy.sha_rounds.inc_by(8);\n\n        let mut mac = Prf::new_from_slice(key.as_ref()).expect(\"HMAC accepts all key sizes\");\n        mac.update(self.client_first_message_bare.as_bytes());\n        mac.update(b\",\");\n        mac.update(self.server_first_message.as_bytes());\n        mac.update(b\",\");\n        mac.update(self.client_final_message_without_proof.as_bytes());\n        Signature {\n            bytes: mac.finalize().into_bytes().into(),\n        }\n    }\n}\n\n/// A computed value which, when xored with `ClientProof`,\n/// produces `ClientKey` that we need for authentication.\n#[derive(Debug)]\n#[repr(transparent)]\npub(crate) struct Signature {\n    bytes: [u8; SCRAM_KEY_LEN],\n}\n\nimpl Signature {\n    /// Derive `ClientKey` from client's signature and proof.\n    pub(crate) fn derive_client_key(&self, proof: &[u8; SCRAM_KEY_LEN]) -> ScramKey {\n        // This is how the proof is calculated:\n        //\n        // 1. sha256(ClientKey) -> StoredKey\n        // 2. hmac_sha256(StoredKey, [messages...]) -> ClientSignature\n        // 3. ClientKey ^ ClientSignature -> ClientProof\n        //\n        // Step 3 implies that we can restore ClientKey from the proof\n        // by xoring the latter with the ClientSignature. Afterwards we\n        // can check that the presumed ClientKey meets our expectations.\n        let mut signature = self.bytes;\n        for (i, x) in proof.iter().enumerate() {\n            signature[i] ^= x;\n        }\n\n        signature.into()\n    }\n}\n\nimpl From<[u8; SCRAM_KEY_LEN]> for Signature {\n    fn from(bytes: [u8; SCRAM_KEY_LEN]) -> Self {\n        Self { bytes }\n    }\n}\n\nimpl AsRef<[u8]> for Signature {\n    fn as_ref(&self) -> &[u8] {\n        &self.bytes\n    }\n}\n"
  },
  {
    "path": "proxy/src/scram/threadpool.rs",
    "content": "//! Custom threadpool implementation for password hashing.\n//!\n//! Requirements:\n//! 1. Fairness per endpoint.\n//! 2. Yield support for high iteration counts.\n\nuse std::cell::RefCell;\nuse std::future::Future;\nuse std::pin::Pin;\nuse std::sync::atomic::{AtomicUsize, Ordering};\nuse std::sync::{Arc, Weak};\nuse std::task::{Context, Poll};\n\nuse futures::FutureExt;\nuse rand::rngs::SmallRng;\nuse rand::{Rng, SeedableRng};\n\nuse super::cache::Pbkdf2Cache;\nuse super::pbkdf2;\nuse super::pbkdf2::Pbkdf2;\nuse crate::intern::EndpointIdInt;\nuse crate::metrics::{ThreadPoolMetrics, ThreadPoolWorkerId};\nuse crate::scram::countmin::CountMinSketch;\n\npub struct ThreadPool {\n    runtime: Option<tokio::runtime::Runtime>,\n    pub metrics: Arc<ThreadPoolMetrics>,\n\n    // we hash a lot of passwords.\n    // we keep a cache of partial hashes for faster validation.\n    pub(super) cache: Pbkdf2Cache,\n}\n\n/// How often to reset the sketch values\nconst SKETCH_RESET_INTERVAL: u64 = 1021;\n\nthread_local! {\n    static STATE: RefCell<Option<ThreadRt>> = const { RefCell::new(None) };\n}\n\nimpl ThreadPool {\n    pub fn new(mut n_workers: u8) -> Arc<Self> {\n        // rayon would be nice here, but yielding in rayon does not work well afaict.\n\n        if n_workers == 0 {\n            n_workers = 1;\n        }\n\n        Arc::new_cyclic(|pool| {\n            let pool = pool.clone();\n            let worker_id = AtomicUsize::new(0);\n\n            let runtime = tokio::runtime::Builder::new_multi_thread()\n                .worker_threads(n_workers as usize)\n                .on_thread_start(move || {\n                    STATE.with_borrow_mut(|state| {\n                        *state = Some(ThreadRt {\n                            pool: pool.clone(),\n                            id: ThreadPoolWorkerId(worker_id.fetch_add(1, Ordering::Relaxed)),\n                            rng: SmallRng::from_os_rng(),\n                            // used to determine whether we should temporarily skip tasks for fairness.\n                            // 99% of estimates will overcount by no more than 4096 samples\n                            countmin: CountMinSketch::with_params(\n                                1.0 / (SKETCH_RESET_INTERVAL as f64),\n                                0.01,\n                            ),\n                            tick: 0,\n                        });\n                    });\n                })\n                .build()\n                .expect(\"password threadpool runtime should be configured correctly\");\n\n            Self {\n                runtime: Some(runtime),\n                metrics: Arc::new(ThreadPoolMetrics::new(n_workers as usize)),\n                cache: Pbkdf2Cache::new(),\n            }\n        })\n    }\n\n    pub(crate) fn spawn_job(&self, endpoint: EndpointIdInt, pbkdf2: Pbkdf2) -> JobHandle {\n        JobHandle(\n            self.runtime\n                .as_ref()\n                .expect(\"runtime is always set\")\n                .spawn(JobSpec { pbkdf2, endpoint }),\n        )\n    }\n}\n\nimpl Drop for ThreadPool {\n    fn drop(&mut self) {\n        self.runtime\n            .take()\n            .expect(\"runtime is always set\")\n            .shutdown_background();\n    }\n}\n\nstruct ThreadRt {\n    pool: Weak<ThreadPool>,\n    id: ThreadPoolWorkerId,\n    rng: SmallRng,\n    countmin: CountMinSketch,\n    tick: u64,\n}\n\nimpl ThreadRt {\n    fn should_run(&mut self, job: &JobSpec) -> bool {\n        let rate = self\n            .countmin\n            .inc_and_return(&job.endpoint, job.pbkdf2.cost());\n\n        const P: f64 = 2000.0;\n        // probability decreases as rate increases.\n        // lower probability, higher chance of being skipped\n        //\n        // estimates (rate in terms of 4096 rounds):\n        // rate = 0    => probability = 100%\n        // rate = 10   => probability = 71.3%\n        // rate = 50   => probability = 62.1%\n        // rate = 500  => probability = 52.3%\n        // rate = 1021 => probability = 49.8%\n        //\n        // My expectation is that the pool queue will only begin backing up at ~1000rps\n        // in which case the SKETCH_RESET_INTERVAL represents 1 second. Thus, the rates above\n        // are in requests per second.\n        let probability = P.ln() / (P + rate as f64).ln();\n        self.rng.random_bool(probability)\n    }\n}\n\nstruct JobSpec {\n    pbkdf2: Pbkdf2,\n    endpoint: EndpointIdInt,\n}\n\nimpl Future for JobSpec {\n    type Output = pbkdf2::Block;\n\n    fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {\n        STATE.with_borrow_mut(|state| {\n            let state = state.as_mut().expect(\"should be set on thread startup\");\n\n            state.tick = state.tick.wrapping_add(1);\n            if state.tick.is_multiple_of(SKETCH_RESET_INTERVAL) {\n                state.countmin.reset();\n            }\n\n            if state.should_run(&self) {\n                if let Some(pool) = state.pool.upgrade() {\n                    pool.metrics.worker_task_turns_total.inc(state.id);\n                }\n\n                match self.pbkdf2.turn() {\n                    Poll::Ready(result) => Poll::Ready(result),\n                    // more to do, we shall requeue\n                    Poll::Pending => {\n                        cx.waker().wake_by_ref();\n                        Poll::Pending\n                    }\n                }\n            } else {\n                if let Some(pool) = state.pool.upgrade() {\n                    pool.metrics.worker_task_skips_total.inc(state.id);\n                }\n\n                cx.waker().wake_by_ref();\n                Poll::Pending\n            }\n        })\n    }\n}\n\npub(crate) struct JobHandle(tokio::task::JoinHandle<pbkdf2::Block>);\n\nimpl Future for JobHandle {\n    type Output = pbkdf2::Block;\n\n    fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {\n        match self.0.poll_unpin(cx) {\n            Poll::Ready(Ok(ok)) => Poll::Ready(ok),\n            Poll::Ready(Err(err)) => std::panic::resume_unwind(err.into_panic()),\n            Poll::Pending => Poll::Pending,\n        }\n    }\n}\n\nimpl Drop for JobHandle {\n    fn drop(&mut self) {\n        self.0.abort();\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use super::*;\n    use crate::types::EndpointId;\n\n    #[tokio::test]\n    async fn hash_is_correct() {\n        let pool = ThreadPool::new(1);\n\n        let ep = EndpointId::from(\"foo\");\n        let ep = EndpointIdInt::from(ep);\n\n        let salt = [0x55; 32];\n        let actual = pool\n            .spawn_job(ep, Pbkdf2::start(b\"password\", &salt, 4096))\n            .await;\n\n        let expected = &[\n            10, 114, 73, 188, 140, 222, 196, 156, 214, 184, 79, 157, 119, 242, 16, 31, 53, 242,\n            178, 43, 95, 8, 225, 182, 122, 40, 219, 21, 89, 147, 64, 140,\n        ];\n        assert_eq!(actual.as_slice(), expected);\n    }\n}\n"
  },
  {
    "path": "proxy/src/serverless/backend.rs",
    "content": "use std::sync::Arc;\nuse std::time::Duration;\n\nuse ed25519_dalek::SigningKey;\nuse hyper_util::rt::{TokioExecutor, TokioIo, TokioTimer};\nuse jose_jwk::jose_b64;\nuse postgres_client::error::SqlState;\nuse postgres_client::maybe_tls_stream::MaybeTlsStream;\nuse rand_core::OsRng;\nuse tracing::field::display;\nuse tracing::{debug, info};\n\nuse super::AsyncRW;\nuse super::conn_pool::poll_client;\nuse super::conn_pool_lib::{Client, ConnInfo, EndpointConnPool, GlobalConnPool};\nuse super::http_conn_pool::{self, HttpConnPool, LocalProxyClient, poll_http2_client};\nuse super::local_conn_pool::{self, EXT_NAME, EXT_SCHEMA, EXT_VERSION, LocalConnPool};\nuse crate::auth::backend::local::StaticAuthRules;\nuse crate::auth::backend::{ComputeCredentials, ComputeUserInfo};\nuse crate::auth::{self, AuthError};\nuse crate::compute;\nuse crate::compute_ctl::{\n    ComputeCtlError, ExtensionInstallRequest, Privilege, SetRoleGrantsRequest,\n};\nuse crate::config::ProxyConfig;\nuse crate::context::RequestContext;\nuse crate::control_plane::client::ApiLockError;\nuse crate::control_plane::errors::{GetAuthInfoError, WakeComputeError};\nuse crate::error::{ErrorKind, ReportableError, UserFacingError};\nuse crate::intern::{EndpointIdInt, RoleNameInt};\nuse crate::pqproto::StartupMessageParams;\nuse crate::proxy::{connect_auth, connect_compute};\nuse crate::rate_limiter::EndpointRateLimiter;\nuse crate::types::{EndpointId, LOCAL_PROXY_SUFFIX};\n\npub(crate) struct PoolingBackend {\n    pub(crate) http_conn_pool:\n        Arc<GlobalConnPool<LocalProxyClient, HttpConnPool<LocalProxyClient>>>,\n    pub(crate) local_pool: Arc<LocalConnPool<postgres_client::Client>>,\n    pub(crate) pool:\n        Arc<GlobalConnPool<postgres_client::Client, EndpointConnPool<postgres_client::Client>>>,\n\n    pub(crate) config: &'static ProxyConfig,\n    pub(crate) auth_backend: &'static crate::auth::Backend<'static, ()>,\n    pub(crate) endpoint_rate_limiter: Arc<EndpointRateLimiter>,\n}\n\nimpl PoolingBackend {\n    pub(crate) async fn authenticate_with_password(\n        &self,\n        ctx: &RequestContext,\n        user_info: &ComputeUserInfo,\n        password: &[u8],\n    ) -> Result<ComputeCredentials, AuthError> {\n        ctx.set_auth_method(crate::context::AuthMethod::Cleartext);\n\n        let user_info = user_info.clone();\n        let backend = self.auth_backend.as_ref().map(|()| user_info.clone());\n        let access_control = backend.get_endpoint_access_control(ctx).await?;\n        access_control.check(\n            ctx,\n            self.config.authentication_config.ip_allowlist_check_enabled,\n            self.config.authentication_config.is_vpc_acccess_proxy,\n        )?;\n\n        access_control.connection_attempt_rate_limit(\n            ctx,\n            &user_info.endpoint,\n            &self.endpoint_rate_limiter,\n        )?;\n\n        let role_access = backend.get_role_secret(ctx).await?;\n        let Some(secret) = role_access.secret else {\n            // If we don't have an authentication secret, for the http flow we can just return an error.\n            info!(\"authentication info not found\");\n            return Err(AuthError::password_failed(&*user_info.user));\n        };\n\n        let ep = EndpointIdInt::from(&user_info.endpoint);\n        let role = RoleNameInt::from(&user_info.user);\n        let auth_outcome = crate::auth::validate_password_and_exchange(\n            &self.config.authentication_config.scram_thread_pool,\n            ep,\n            role,\n            password,\n            secret,\n        )\n        .await?;\n        let res = match auth_outcome {\n            crate::sasl::Outcome::Success(key) => {\n                info!(\"user successfully authenticated\");\n                Ok(key)\n            }\n            crate::sasl::Outcome::Failure(reason) => {\n                info!(\"auth backend failed with an error: {reason}\");\n                Err(AuthError::password_failed(&*user_info.user))\n            }\n        };\n        res.map(|key| ComputeCredentials {\n            info: user_info,\n            keys: key,\n        })\n    }\n\n    pub(crate) async fn authenticate_with_jwt(\n        &self,\n        ctx: &RequestContext,\n        user_info: &ComputeUserInfo,\n        jwt: String,\n    ) -> Result<ComputeCredentials, AuthError> {\n        ctx.set_auth_method(crate::context::AuthMethod::Jwt);\n\n        match &self.auth_backend {\n            crate::auth::Backend::ControlPlane(console, ()) => {\n                let keys = self\n                    .config\n                    .authentication_config\n                    .jwks_cache\n                    .check_jwt(\n                        ctx,\n                        user_info.endpoint.clone(),\n                        &user_info.user,\n                        &**console,\n                        &jwt,\n                    )\n                    .await?;\n\n                Ok(ComputeCredentials {\n                    info: user_info.clone(),\n                    keys,\n                })\n            }\n            crate::auth::Backend::Local(_) => {\n                let keys = self\n                    .config\n                    .authentication_config\n                    .jwks_cache\n                    .check_jwt(\n                        ctx,\n                        user_info.endpoint.clone(),\n                        &user_info.user,\n                        &StaticAuthRules,\n                        &jwt,\n                    )\n                    .await?;\n\n                Ok(ComputeCredentials {\n                    info: user_info.clone(),\n                    keys,\n                })\n            }\n        }\n    }\n\n    // Wake up the destination if needed. Code here is a bit involved because\n    // we reuse the code from the usual proxy and we need to prepare few structures\n    // that this code expects.\n    #[tracing::instrument(skip_all, fields(\n        pid = tracing::field::Empty,\n        compute_id = tracing::field::Empty,\n        conn_id = tracing::field::Empty,\n    ))]\n    pub(crate) async fn connect_to_compute(\n        &self,\n        ctx: &RequestContext,\n        conn_info: ConnInfo,\n        keys: ComputeCredentials,\n        force_new: bool,\n    ) -> Result<Client<postgres_client::Client>, HttpConnError> {\n        let maybe_client = if force_new {\n            debug!(\"pool: pool is disabled\");\n            None\n        } else {\n            debug!(\"pool: looking for an existing connection\");\n            self.pool.get(ctx, &conn_info)?\n        };\n\n        if let Some(client) = maybe_client {\n            return Ok(client);\n        }\n        let conn_id = uuid::Uuid::new_v4();\n        tracing::Span::current().record(\"conn_id\", display(conn_id));\n        info!(%conn_id, \"pool: opening a new connection '{conn_info}'\");\n        let backend = self.auth_backend.as_ref().map(|()| keys.info);\n\n        let mut params = StartupMessageParams::default();\n        params.insert(\"database\", &conn_info.dbname);\n        params.insert(\"user\", &conn_info.user_info.user);\n\n        let mut auth_info = compute::AuthInfo::with_auth_keys(keys.keys);\n        auth_info.set_startup_params(&params, true);\n\n        let node = connect_auth::connect_to_compute_and_auth(\n            ctx,\n            self.config,\n            &backend,\n            auth_info,\n            connect_compute::TlsNegotiation::Postgres,\n        )\n        .await?;\n\n        let (client, connection) = postgres_client::connect::managed(\n            node.stream,\n            Some(node.socket_addr.ip()),\n            postgres_client::config::Host::Tcp(node.hostname.to_string()),\n            node.socket_addr.port(),\n            node.ssl_mode,\n            Some(self.config.connect_to_compute.timeout),\n        )\n        .await?;\n\n        Ok(poll_client(\n            self.pool.clone(),\n            ctx,\n            conn_info,\n            client,\n            connection,\n            conn_id,\n            node.aux,\n        ))\n    }\n\n    // Wake up the destination if needed\n    #[tracing::instrument(skip_all, fields(\n        compute_id = tracing::field::Empty,\n        conn_id = tracing::field::Empty,\n    ))]\n    pub(crate) async fn connect_to_local_proxy(\n        &self,\n        ctx: &RequestContext,\n        conn_info: ConnInfo,\n    ) -> Result<http_conn_pool::Client<LocalProxyClient>, HttpConnError> {\n        debug!(\"pool: looking for an existing connection\");\n        if let Ok(Some(client)) = self.http_conn_pool.get(ctx, &conn_info) {\n            return Ok(client);\n        }\n\n        let conn_id = uuid::Uuid::new_v4();\n        tracing::Span::current().record(\"conn_id\", display(conn_id));\n        debug!(%conn_id, \"pool: opening a new connection '{conn_info}'\");\n        let backend = self.auth_backend.as_ref().map(|()| ComputeUserInfo {\n            user: conn_info.user_info.user.clone(),\n            endpoint: EndpointId::from(format!(\n                \"{}{LOCAL_PROXY_SUFFIX}\",\n                conn_info.user_info.endpoint.normalize()\n            )),\n            options: conn_info.user_info.options.clone(),\n        });\n\n        let node = connect_compute::connect_to_compute(\n            ctx,\n            self.config,\n            &backend,\n            connect_compute::TlsNegotiation::Direct,\n        )\n        .await?;\n\n        let stream = match node.stream.into_framed().into_inner() {\n            MaybeTlsStream::Raw(s) => Box::pin(s) as AsyncRW,\n            MaybeTlsStream::Tls(s) => Box::pin(s) as AsyncRW,\n        };\n\n        let (client, connection) = hyper::client::conn::http2::Builder::new(TokioExecutor::new())\n            .timer(TokioTimer::new())\n            .keep_alive_interval(Duration::from_secs(20))\n            .keep_alive_while_idle(true)\n            .keep_alive_timeout(Duration::from_secs(5))\n            .handshake(TokioIo::new(stream))\n            .await\n            .map_err(LocalProxyConnError::H2)?;\n\n        Ok(poll_http2_client(\n            self.http_conn_pool.clone(),\n            ctx,\n            &conn_info,\n            client,\n            connection,\n            conn_id,\n            node.aux.clone(),\n        ))\n    }\n\n    /// Connect to postgres over localhost.\n    ///\n    /// We expect postgres to be started here, so we won't do any retries.\n    ///\n    /// # Panics\n    ///\n    /// Panics if called with a non-local_proxy backend.\n    #[tracing::instrument(skip_all, fields(\n        pid = tracing::field::Empty,\n        conn_id = tracing::field::Empty,\n    ))]\n    pub(crate) async fn connect_to_local_postgres(\n        &self,\n        ctx: &RequestContext,\n        conn_info: ConnInfo,\n        disable_pg_session_jwt: bool,\n    ) -> Result<Client<postgres_client::Client>, HttpConnError> {\n        if let Some(client) = self.local_pool.get(ctx, &conn_info)? {\n            return Ok(client);\n        }\n\n        let local_backend = match &self.auth_backend {\n            auth::Backend::ControlPlane(_, ()) => {\n                unreachable!(\"only local_proxy can connect to local postgres\")\n            }\n            auth::Backend::Local(local) => local,\n        };\n\n        if !self.local_pool.initialized(&conn_info) {\n            // only install and grant usage one at a time.\n            let _permit = local_backend\n                .initialize\n                .acquire()\n                .await\n                .expect(\"semaphore should never be closed\");\n\n            // check again for race\n            if !self.local_pool.initialized(&conn_info) && !disable_pg_session_jwt {\n                local_backend\n                    .compute_ctl\n                    .install_extension(&ExtensionInstallRequest {\n                        extension: EXT_NAME,\n                        database: conn_info.dbname.clone(),\n                        version: EXT_VERSION,\n                    })\n                    .await?;\n\n                local_backend\n                    .compute_ctl\n                    .grant_role(&SetRoleGrantsRequest {\n                        schema: EXT_SCHEMA,\n                        privileges: vec![Privilege::Usage],\n                        database: conn_info.dbname.clone(),\n                        role: conn_info.user_info.user.clone(),\n                    })\n                    .await?;\n\n                self.local_pool.set_initialized(&conn_info);\n            }\n        }\n\n        let conn_id = uuid::Uuid::new_v4();\n        tracing::Span::current().record(\"conn_id\", display(conn_id));\n        info!(%conn_id, \"local_pool: opening a new connection '{conn_info}'\");\n\n        let (key, jwk) = create_random_jwk();\n\n        let mut config = local_backend\n            .node_info\n            .conn_info\n            .to_postgres_client_config();\n        config\n            .user(&conn_info.user_info.user)\n            .dbname(&conn_info.dbname);\n        if !disable_pg_session_jwt {\n            config.set_param(\n                \"options\",\n                &format!(\n                    \"-c pg_session_jwt.jwk={}\",\n                    serde_json::to_string(&jwk).expect(\"serializing jwk to json should not fail\")\n                ),\n            );\n        }\n\n        let pause = ctx.latency_timer_pause(crate::metrics::Waiting::Compute);\n        let (client, connection) = config.connect(&postgres_client::NoTls).await?;\n        drop(pause);\n\n        let pid = client.get_process_id();\n        tracing::Span::current().record(\"pid\", pid);\n\n        let mut handle = local_conn_pool::poll_client(\n            self.local_pool.clone(),\n            ctx,\n            conn_info,\n            client,\n            connection,\n            key,\n            conn_id,\n            local_backend.node_info.aux.clone(),\n        );\n\n        {\n            let (client, mut discard) = handle.inner();\n            debug!(\"setting up backend session state\");\n\n            // initiates the auth session\n            if !disable_pg_session_jwt\n                && let Err(e) = client.batch_execute(\"select auth.init();\").await\n            {\n                discard.discard();\n                return Err(e.into());\n            }\n\n            info!(\"backend session state initialized\");\n        }\n\n        Ok(handle)\n    }\n}\n\nfn create_random_jwk() -> (SigningKey, jose_jwk::Key) {\n    let key = SigningKey::generate(&mut OsRng);\n\n    let jwk = jose_jwk::Key::Okp(jose_jwk::Okp {\n        crv: jose_jwk::OkpCurves::Ed25519,\n        x: jose_b64::serde::Bytes::from(key.verifying_key().to_bytes().to_vec()),\n        d: None,\n    });\n\n    (key, jwk)\n}\n\n#[derive(Debug, thiserror::Error)]\npub(crate) enum HttpConnError {\n    #[error(\"pooled connection closed at inconsistent state\")]\n    ConnectionClosedAbruptly(#[from] tokio::sync::watch::error::SendError<uuid::Uuid>),\n    #[error(\"could not connect to compute\")]\n    ConnectError(#[from] compute::ConnectionError),\n    #[error(\"could not connect to postgres in compute\")]\n    PostgresConnectionError(#[from] postgres_client::Error),\n    #[error(\"could not connect to local-proxy in compute\")]\n    LocalProxyConnectionError(#[from] LocalProxyConnError),\n    #[error(\"could not parse JWT payload\")]\n    JwtPayloadError(serde_json::Error),\n\n    #[error(\"could not install extension: {0}\")]\n    ComputeCtl(#[from] ComputeCtlError),\n    #[error(\"could not get auth info\")]\n    GetAuthInfo(#[from] GetAuthInfoError),\n    #[error(\"user not authenticated\")]\n    AuthError(#[from] AuthError),\n    #[error(\"wake_compute returned error\")]\n    WakeCompute(#[from] WakeComputeError),\n    #[error(\"error acquiring resource permit: {0}\")]\n    TooManyConnectionAttempts(#[from] ApiLockError),\n}\n\nimpl From<connect_auth::AuthError> for HttpConnError {\n    fn from(value: connect_auth::AuthError) -> Self {\n        match value {\n            connect_auth::AuthError::Auth(compute::PostgresError::Postgres(error)) => {\n                Self::PostgresConnectionError(error)\n            }\n            connect_auth::AuthError::Connect(error) => Self::ConnectError(error),\n        }\n    }\n}\n\n#[derive(Debug, thiserror::Error)]\npub(crate) enum LocalProxyConnError {\n    #[error(\"could not establish h2 connection\")]\n    H2(#[from] hyper::Error),\n}\n\nimpl ReportableError for HttpConnError {\n    fn get_error_kind(&self) -> ErrorKind {\n        match self {\n            HttpConnError::ConnectError(e) => e.get_error_kind(),\n            HttpConnError::ConnectionClosedAbruptly(_) => ErrorKind::Compute,\n            HttpConnError::PostgresConnectionError(p) => match p.as_db_error() {\n                // user provided a wrong database name\n                Some(err) if err.code() == &SqlState::INVALID_CATALOG_NAME => ErrorKind::User,\n                // postgres rejected the connection\n                Some(_) => ErrorKind::Postgres,\n                // couldn't even reach postgres\n                None => ErrorKind::Compute,\n            },\n            HttpConnError::LocalProxyConnectionError(_) => ErrorKind::Compute,\n            HttpConnError::ComputeCtl(_) => ErrorKind::Service,\n            HttpConnError::JwtPayloadError(_) => ErrorKind::User,\n            HttpConnError::GetAuthInfo(a) => a.get_error_kind(),\n            HttpConnError::AuthError(a) => a.get_error_kind(),\n            HttpConnError::WakeCompute(w) => w.get_error_kind(),\n            HttpConnError::TooManyConnectionAttempts(w) => w.get_error_kind(),\n        }\n    }\n}\n\nimpl UserFacingError for HttpConnError {\n    fn to_string_client(&self) -> String {\n        match self {\n            HttpConnError::ConnectError(p) => p.to_string_client(),\n            HttpConnError::ConnectionClosedAbruptly(_) => self.to_string(),\n            HttpConnError::PostgresConnectionError(p) => p.to_string(),\n            HttpConnError::LocalProxyConnectionError(p) => p.to_string(),\n            HttpConnError::ComputeCtl(_) => \"could not set up the JWT authorization database extension\".to_string(),\n            HttpConnError::JwtPayloadError(p) => p.to_string(),\n            HttpConnError::GetAuthInfo(c) => c.to_string_client(),\n            HttpConnError::AuthError(c) => c.to_string_client(),\n            HttpConnError::WakeCompute(c) => c.to_string_client(),\n            HttpConnError::TooManyConnectionAttempts(_) => {\n                \"Failed to acquire permit to connect to the database. Too many database connection attempts are currently ongoing.\".to_owned()\n            }\n        }\n    }\n}\n\nimpl ReportableError for LocalProxyConnError {\n    fn get_error_kind(&self) -> ErrorKind {\n        match self {\n            LocalProxyConnError::H2(_) => ErrorKind::Compute,\n        }\n    }\n}\n\nimpl UserFacingError for LocalProxyConnError {\n    fn to_string_client(&self) -> String {\n        \"Could not establish HTTP connection to the database\".to_string()\n    }\n}\n"
  },
  {
    "path": "proxy/src/serverless/cancel_set.rs",
    "content": "//! A set for cancelling random http connections\n\nuse std::hash::{BuildHasher, BuildHasherDefault};\nuse std::num::NonZeroUsize;\nuse std::time::Duration;\n\nuse indexmap::IndexMap;\nuse parking_lot::Mutex;\nuse rand::distr::uniform::{UniformSampler, UniformUsize};\nuse rustc_hash::FxHasher;\nuse tokio::time::Instant;\nuse tokio_util::sync::CancellationToken;\nuse uuid::Uuid;\n\ntype Hasher = BuildHasherDefault<FxHasher>;\n\npub struct CancelSet {\n    shards: Box<[Mutex<CancelShard>]>,\n    // keyed by random uuid, fxhasher is fine\n    hasher: Hasher,\n}\n\npub(crate) struct CancelShard {\n    tokens: IndexMap<uuid::Uuid, (Instant, CancellationToken), Hasher>,\n}\n\nimpl CancelSet {\n    pub fn new(shards: usize) -> Self {\n        CancelSet {\n            shards: (0..shards)\n                .map(|_| {\n                    Mutex::new(CancelShard {\n                        tokens: IndexMap::with_hasher(Hasher::default()),\n                    })\n                })\n                .collect(),\n            hasher: Hasher::default(),\n        }\n    }\n\n    pub(crate) fn take(&self) -> Option<CancellationToken> {\n        let dist = UniformUsize::new_inclusive(0, usize::MAX).expect(\"valid bounds\");\n        for _ in 0..4 {\n            if let Some(token) = self.take_raw(dist.sample(&mut rand::rng())) {\n                return Some(token);\n            }\n            tracing::trace!(\"failed to get cancel token\");\n        }\n        None\n    }\n\n    fn take_raw(&self, rng: usize) -> Option<CancellationToken> {\n        NonZeroUsize::new(self.shards.len())\n            .and_then(|len| self.shards[rng % len].lock().take(rng / len))\n    }\n\n    pub(crate) fn insert(&self, id: uuid::Uuid, token: CancellationToken) -> CancelGuard<'_> {\n        let shard = NonZeroUsize::new(self.shards.len()).map(|len| {\n            let hash = self.hasher.hash_one(id) as usize;\n            let shard = &self.shards[hash % len];\n            shard.lock().insert(id, token);\n            shard\n        });\n        CancelGuard { shard, id }\n    }\n}\n\nimpl CancelShard {\n    fn take(&mut self, rng: usize) -> Option<CancellationToken> {\n        NonZeroUsize::new(self.tokens.len()).and_then(|len| {\n            // 10 second grace period so we don't cancel new connections\n            if self.tokens.get_index(rng % len)?.1.0.elapsed() < Duration::from_secs(10) {\n                return None;\n            }\n\n            let (_key, (_insert, token)) = self.tokens.swap_remove_index(rng % len)?;\n            Some(token)\n        })\n    }\n\n    fn remove(&mut self, id: uuid::Uuid) {\n        self.tokens.swap_remove(&id);\n    }\n\n    fn insert(&mut self, id: uuid::Uuid, token: CancellationToken) {\n        self.tokens.insert(id, (Instant::now(), token));\n    }\n}\n\npub(crate) struct CancelGuard<'a> {\n    shard: Option<&'a Mutex<CancelShard>>,\n    id: Uuid,\n}\n\nimpl Drop for CancelGuard<'_> {\n    fn drop(&mut self) {\n        if let Some(shard) = self.shard {\n            shard.lock().remove(self.id);\n        }\n    }\n}\n"
  },
  {
    "path": "proxy/src/serverless/conn_pool.rs",
    "content": "use std::fmt;\nuse std::pin::pin;\nuse std::sync::{Arc, Weak};\nuse std::task::{Poll, ready};\n\nuse futures::future::poll_fn;\nuse futures::{Future, FutureExt};\nuse postgres_client::tls::MakeTlsConnect;\nuse smallvec::SmallVec;\nuse tokio::net::TcpStream;\nuse tokio::time::Instant;\nuse tokio_util::sync::CancellationToken;\nuse tracing::{error, info, info_span};\n#[cfg(test)]\nuse {\n    super::conn_pool_lib::GlobalConnPoolOptions,\n    crate::auth::backend::ComputeUserInfo,\n    std::{sync::atomic, time::Duration},\n};\n\nuse super::conn_pool_lib::{\n    Client, ClientDataEnum, ClientInnerCommon, ClientInnerExt, ConnInfo, EndpointConnPool,\n    GlobalConnPool,\n};\nuse crate::config::ComputeConfig;\nuse crate::context::RequestContext;\nuse crate::control_plane::messages::MetricsAuxInfo;\nuse crate::metrics::Metrics;\n\ntype TlsStream = <ComputeConfig as MakeTlsConnect<TcpStream>>::Stream;\n\n#[derive(Debug, Clone)]\npub(crate) struct ConnInfoWithAuth {\n    pub(crate) conn_info: ConnInfo,\n    pub(crate) auth: AuthData,\n}\n\n#[derive(Debug, Clone)]\npub(crate) enum AuthData {\n    Password(SmallVec<[u8; 16]>),\n    Jwt(String),\n}\n\nimpl fmt::Display for ConnInfo {\n    // use custom display to avoid logging password\n    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {\n        write!(\n            f,\n            \"{}@{}/{}?{}\",\n            self.user_info.user,\n            self.user_info.endpoint,\n            self.dbname,\n            self.user_info.options.get_cache_key(\"\")\n        )\n    }\n}\n\npub(crate) fn poll_client<C: ClientInnerExt>(\n    global_pool: Arc<GlobalConnPool<C, EndpointConnPool<C>>>,\n    ctx: &RequestContext,\n    conn_info: ConnInfo,\n    client: C,\n    mut connection: postgres_client::Connection<TcpStream, TlsStream>,\n    conn_id: uuid::Uuid,\n    aux: MetricsAuxInfo,\n) -> Client<C> {\n    let conn_gauge = Metrics::get().proxy.db_connections.guard(ctx.protocol());\n    let mut session_id = ctx.session_id();\n    let (tx, mut rx) = tokio::sync::watch::channel(session_id);\n\n    let span = info_span!(parent: None, \"connection\", %conn_id);\n    let cold_start_info = ctx.cold_start_info();\n    span.in_scope(|| {\n        info!(cold_start_info = cold_start_info.as_str(), %conn_info, %session_id, \"new connection\");\n    });\n    let pool = match conn_info.endpoint_cache_key() {\n        Some(endpoint) => Arc::downgrade(&global_pool.get_or_create_endpoint_pool(&endpoint)),\n        None => Weak::new(),\n    };\n    let pool_clone = pool.clone();\n\n    let db_user = conn_info.db_and_user();\n    let idle = global_pool.get_idle_timeout();\n    let cancel = CancellationToken::new();\n    let cancelled = cancel.clone().cancelled_owned();\n\n    tokio::spawn(async move {\n        let _conn_gauge = conn_gauge;\n        let mut idle_timeout = pin!(tokio::time::sleep(idle));\n        let mut cancelled = pin!(cancelled);\n\n        poll_fn(move |cx| {\n            let _instrument = span.enter();\n\n            if cancelled.as_mut().poll(cx).is_ready() {\n                info!(\"connection dropped\");\n                return Poll::Ready(());\n            }\n\n            match rx.has_changed() {\n                Ok(true) => {\n                    session_id = *rx.borrow_and_update();\n                    info!(%session_id, \"changed session\");\n                    idle_timeout.as_mut().reset(Instant::now() + idle);\n                }\n                Err(_) => {\n                    info!(\"connection dropped\");\n                    return Poll::Ready(());\n                }\n                _ => {}\n            }\n\n            // 5 minute idle connection timeout\n            if idle_timeout.as_mut().poll(cx).is_ready() {\n                idle_timeout.as_mut().reset(Instant::now() + idle);\n                info!(\"connection idle\");\n                if let Some(pool) = pool.clone().upgrade() {\n                    // remove client from pool - should close the connection if it's idle.\n                    // does nothing if the client is currently checked-out and in-use\n                    if pool.write().remove_client(db_user.clone(), conn_id) {\n                        info!(\"idle connection removed\");\n                    }\n                }\n            }\n\n            match ready!(connection.poll_unpin(cx)) {\n                Err(e) => error!(%session_id, \"connection error: {}\", e),\n                Ok(()) => info!(\"connection closed\"),\n            }\n\n            // remove from connection pool\n            if let Some(pool) = pool.clone().upgrade()\n                && pool.write().remove_client(db_user.clone(), conn_id)\n            {\n                info!(\"closed connection removed\");\n            }\n\n            Poll::Ready(())\n        })\n        .await;\n    });\n    let inner = ClientInnerCommon {\n        inner: client,\n        aux,\n        conn_id,\n        data: ClientDataEnum::Remote(ClientDataRemote {\n            session: tx,\n            cancel,\n        }),\n    };\n\n    Client::new(inner, conn_info, pool_clone)\n}\n\n#[derive(Clone)]\npub(crate) struct ClientDataRemote {\n    session: tokio::sync::watch::Sender<uuid::Uuid>,\n    cancel: CancellationToken,\n}\n\nimpl ClientDataRemote {\n    pub fn session(&mut self) -> &mut tokio::sync::watch::Sender<uuid::Uuid> {\n        &mut self.session\n    }\n\n    pub fn cancel(&mut self) {\n        self.cancel.cancel();\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use std::sync::atomic::AtomicBool;\n\n    use super::*;\n    use crate::proxy::NeonOptions;\n    use crate::serverless::cancel_set::CancelSet;\n    use crate::types::{BranchId, EndpointId, ProjectId};\n\n    struct MockClient(Arc<AtomicBool>);\n    impl MockClient {\n        fn new(is_closed: bool) -> Self {\n            MockClient(Arc::new(is_closed.into()))\n        }\n    }\n    impl ClientInnerExt for MockClient {\n        fn is_closed(&self) -> bool {\n            self.0.load(atomic::Ordering::Relaxed)\n        }\n        fn get_process_id(&self) -> i32 {\n            0\n        }\n        fn reset(&mut self) -> Result<(), postgres_client::Error> {\n            Ok(())\n        }\n    }\n\n    fn create_inner() -> ClientInnerCommon<MockClient> {\n        create_inner_with(MockClient::new(false))\n    }\n\n    fn create_inner_with(client: MockClient) -> ClientInnerCommon<MockClient> {\n        ClientInnerCommon {\n            inner: client,\n            aux: MetricsAuxInfo {\n                endpoint_id: (&EndpointId::from(\"endpoint\")).into(),\n                project_id: (&ProjectId::from(\"project\")).into(),\n                branch_id: (&BranchId::from(\"branch\")).into(),\n                compute_id: \"compute\".into(),\n                cold_start_info: crate::control_plane::messages::ColdStartInfo::Warm,\n            },\n            conn_id: uuid::Uuid::new_v4(),\n            data: ClientDataEnum::Remote(ClientDataRemote {\n                session: tokio::sync::watch::Sender::new(uuid::Uuid::new_v4()),\n                cancel: CancellationToken::new(),\n            }),\n        }\n    }\n\n    #[tokio::test]\n    async fn test_pool() {\n        let _ = env_logger::try_init();\n        let config = Box::leak(Box::new(crate::config::HttpConfig {\n            accept_websockets: false,\n            pool_options: GlobalConnPoolOptions {\n                max_conns_per_endpoint: 2,\n                gc_epoch: Duration::from_secs(1),\n                pool_shards: 2,\n                idle_timeout: Duration::from_secs(1),\n                opt_in: false,\n                max_total_conns: 3,\n            },\n            cancel_set: CancelSet::new(0),\n            client_conn_threshold: u64::MAX,\n            max_request_size_bytes: usize::MAX,\n            max_response_size_bytes: usize::MAX,\n        }));\n        let pool = GlobalConnPool::new(config);\n        let conn_info = ConnInfo {\n            user_info: ComputeUserInfo {\n                user: \"user\".into(),\n                endpoint: \"endpoint\".into(),\n                options: NeonOptions::default(),\n            },\n            dbname: \"dbname\".into(),\n        };\n        let ep_pool = Arc::downgrade(\n            &pool.get_or_create_endpoint_pool(&conn_info.endpoint_cache_key().unwrap()),\n        );\n        {\n            let mut client = Client::new(create_inner(), conn_info.clone(), ep_pool.clone());\n            assert_eq!(0, pool.get_global_connections_count());\n            client.inner().1.discard();\n            // Discard should not add the connection from the pool.\n            assert_eq!(0, pool.get_global_connections_count());\n        }\n        {\n            let client = Client::new(create_inner(), conn_info.clone(), ep_pool.clone());\n            drop(client);\n            assert_eq!(1, pool.get_global_connections_count());\n        }\n        {\n            let closed_client = Client::new(\n                create_inner_with(MockClient::new(true)),\n                conn_info.clone(),\n                ep_pool.clone(),\n            );\n            drop(closed_client);\n            assert_eq!(1, pool.get_global_connections_count());\n        }\n        let is_closed: Arc<AtomicBool> = Arc::new(false.into());\n        {\n            let client = Client::new(\n                create_inner_with(MockClient(is_closed.clone())),\n                conn_info.clone(),\n                ep_pool.clone(),\n            );\n            drop(client);\n            // The client should be added to the pool.\n            assert_eq!(2, pool.get_global_connections_count());\n        }\n        {\n            let client = Client::new(create_inner(), conn_info, ep_pool);\n            drop(client);\n\n            // The client shouldn't be added to the pool. Because the ep-pool is full.\n            assert_eq!(2, pool.get_global_connections_count());\n        }\n\n        let conn_info = ConnInfo {\n            user_info: ComputeUserInfo {\n                user: \"user\".into(),\n                endpoint: \"endpoint-2\".into(),\n                options: NeonOptions::default(),\n            },\n            dbname: \"dbname\".into(),\n        };\n        let ep_pool = Arc::downgrade(\n            &pool.get_or_create_endpoint_pool(&conn_info.endpoint_cache_key().unwrap()),\n        );\n        {\n            let client = Client::new(create_inner(), conn_info.clone(), ep_pool.clone());\n            drop(client);\n            assert_eq!(3, pool.get_global_connections_count());\n        }\n        {\n            let client = Client::new(create_inner(), conn_info.clone(), ep_pool.clone());\n            drop(client);\n\n            // The client shouldn't be added to the pool. Because the global pool is full.\n            assert_eq!(3, pool.get_global_connections_count());\n        }\n\n        is_closed.store(true, atomic::Ordering::Relaxed);\n        // Do gc for all shards.\n        pool.gc(0);\n        pool.gc(1);\n        // Closed client should be removed from the pool.\n        assert_eq!(2, pool.get_global_connections_count());\n    }\n}\n"
  },
  {
    "path": "proxy/src/serverless/conn_pool_lib.rs",
    "content": "use std::collections::HashMap;\nuse std::marker::PhantomData;\nuse std::ops::Deref;\nuse std::sync::atomic::{self, AtomicUsize};\nuse std::sync::{Arc, Weak};\nuse std::time::Duration;\n\nuse clashmap::ClashMap;\nuse parking_lot::RwLock;\nuse rand::Rng;\nuse smol_str::ToSmolStr;\nuse tracing::{Span, debug, info, warn};\n\nuse super::backend::HttpConnError;\nuse super::conn_pool::ClientDataRemote;\nuse super::http_conn_pool::ClientDataHttp;\nuse super::local_conn_pool::ClientDataLocal;\nuse crate::auth::backend::ComputeUserInfo;\nuse crate::context::RequestContext;\nuse crate::control_plane::messages::{ColdStartInfo, MetricsAuxInfo};\nuse crate::metrics::{HttpEndpointPoolsGuard, Metrics};\nuse crate::protocol2::ConnectionInfoExtra;\nuse crate::types::{DbName, EndpointCacheKey, RoleName};\nuse crate::usage_metrics::{Ids, MetricCounter, USAGE_METRICS};\n\n#[derive(Debug, Clone)]\npub(crate) struct ConnInfo {\n    pub(crate) user_info: ComputeUserInfo,\n    pub(crate) dbname: DbName,\n}\n\nimpl ConnInfo {\n    // hm, change to hasher to avoid cloning?\n    pub(crate) fn db_and_user(&self) -> (DbName, RoleName) {\n        (self.dbname.clone(), self.user_info.user.clone())\n    }\n\n    pub(crate) fn endpoint_cache_key(&self) -> Option<EndpointCacheKey> {\n        // We don't want to cache http connections for ephemeral endpoints.\n        if self.user_info.options.is_ephemeral() {\n            None\n        } else {\n            Some(self.user_info.endpoint_cache_key())\n        }\n    }\n}\n\n#[derive(Clone)]\n#[allow(clippy::large_enum_variant, reason = \"TODO\")]\npub(crate) enum ClientDataEnum {\n    Remote(ClientDataRemote),\n    Local(ClientDataLocal),\n    Http(ClientDataHttp),\n}\n\n#[derive(Clone)]\npub(crate) struct ClientInnerCommon<C: ClientInnerExt> {\n    pub(crate) inner: C,\n    pub(crate) aux: MetricsAuxInfo,\n    pub(crate) conn_id: uuid::Uuid,\n    pub(crate) data: ClientDataEnum, // custom client data like session, key, jti\n}\n\nimpl<C: ClientInnerExt> Drop for ClientInnerCommon<C> {\n    fn drop(&mut self) {\n        match &mut self.data {\n            ClientDataEnum::Remote(remote_data) => {\n                remote_data.cancel();\n            }\n            ClientDataEnum::Local(local_data) => {\n                local_data.cancel();\n            }\n            ClientDataEnum::Http(_http_data) => (),\n        }\n    }\n}\n\nimpl<C: ClientInnerExt> ClientInnerCommon<C> {\n    pub(crate) fn get_conn_id(&self) -> uuid::Uuid {\n        self.conn_id\n    }\n\n    pub(crate) fn get_data(&mut self) -> &mut ClientDataEnum {\n        &mut self.data\n    }\n}\n\npub(crate) struct ConnPoolEntry<C: ClientInnerExt> {\n    pub(crate) conn: ClientInnerCommon<C>,\n    pub(crate) _last_access: std::time::Instant,\n}\n\n// Per-endpoint connection pool, (dbname, username) -> DbUserConnPool\n// Number of open connections is limited by the `max_conns_per_endpoint`.\npub(crate) struct EndpointConnPool<C: ClientInnerExt> {\n    pools: HashMap<(DbName, RoleName), DbUserConnPool<C>>,\n    total_conns: usize,\n    /// max # connections per endpoint\n    max_conns: usize,\n    _guard: HttpEndpointPoolsGuard<'static>,\n    global_connections_count: Arc<AtomicUsize>,\n    global_pool_size_max_conns: usize,\n    pool_name: String,\n}\n\nimpl<C: ClientInnerExt> EndpointConnPool<C> {\n    pub(crate) fn new(\n        hmap: HashMap<(DbName, RoleName), DbUserConnPool<C>>,\n        tconns: usize,\n        max_conns_per_endpoint: usize,\n        global_connections_count: Arc<AtomicUsize>,\n        max_total_conns: usize,\n        pname: String,\n    ) -> Self {\n        Self {\n            pools: hmap,\n            total_conns: tconns,\n            max_conns: max_conns_per_endpoint,\n            _guard: Metrics::get().proxy.http_endpoint_pools.guard(),\n            global_connections_count,\n            global_pool_size_max_conns: max_total_conns,\n            pool_name: pname,\n        }\n    }\n\n    pub(crate) fn get_conn_entry(\n        &mut self,\n        db_user: (DbName, RoleName),\n    ) -> Option<ConnPoolEntry<C>> {\n        let Self {\n            pools,\n            total_conns,\n            global_connections_count,\n            ..\n        } = self;\n        pools.get_mut(&db_user).and_then(|pool_entries| {\n            let (entry, removed) = pool_entries.get_conn_entry(total_conns);\n            global_connections_count.fetch_sub(removed, atomic::Ordering::Relaxed);\n            entry\n        })\n    }\n\n    pub(crate) fn remove_client(\n        &mut self,\n        db_user: (DbName, RoleName),\n        conn_id: uuid::Uuid,\n    ) -> bool {\n        let Self {\n            pools,\n            total_conns,\n            global_connections_count,\n            ..\n        } = self;\n        if let Some(pool) = pools.get_mut(&db_user) {\n            let old_len = pool.get_conns().len();\n            pool.get_conns()\n                .retain(|conn| conn.conn.get_conn_id() != conn_id);\n            let new_len = pool.get_conns().len();\n            let removed = old_len - new_len;\n            if removed > 0 {\n                global_connections_count.fetch_sub(removed, atomic::Ordering::Relaxed);\n                Metrics::get()\n                    .proxy\n                    .http_pool_opened_connections\n                    .get_metric()\n                    .dec_by(removed as i64);\n            }\n            *total_conns -= removed;\n            removed > 0\n        } else {\n            false\n        }\n    }\n\n    pub(crate) fn get_name(&self) -> &str {\n        &self.pool_name\n    }\n\n    pub(crate) fn get_pool(&self, db_user: (DbName, RoleName)) -> Option<&DbUserConnPool<C>> {\n        self.pools.get(&db_user)\n    }\n\n    pub(crate) fn get_pool_mut(\n        &mut self,\n        db_user: (DbName, RoleName),\n    ) -> Option<&mut DbUserConnPool<C>> {\n        self.pools.get_mut(&db_user)\n    }\n\n    pub(crate) fn put(pool: &RwLock<Self>, conn_info: &ConnInfo, mut client: ClientInnerCommon<C>) {\n        let conn_id = client.get_conn_id();\n        let (max_conn, conn_count, pool_name) = {\n            let pool = pool.read();\n            (\n                pool.global_pool_size_max_conns,\n                pool.global_connections_count\n                    .load(atomic::Ordering::Relaxed),\n                pool.get_name().to_string(),\n            )\n        };\n\n        if client.inner.is_closed() {\n            info!(%conn_id, \"{pool_name}: throwing away connection '{conn_info}' because connection is closed\");\n            return;\n        }\n\n        if let Err(error) = client.inner.reset() {\n            warn!(?error, %conn_id, \"{pool_name}: throwing away connection '{conn_info}' because connection could not be reset\");\n            return;\n        }\n\n        if conn_count >= max_conn {\n            info!(%conn_id, \"{pool_name}: throwing away connection '{conn_info}' because pool is full\");\n            return;\n        }\n\n        // return connection to the pool\n        let mut returned = false;\n        let mut per_db_size = 0;\n        let total_conns = {\n            let mut pool = pool.write();\n\n            if pool.total_conns < pool.max_conns {\n                let pool_entries = pool.pools.entry(conn_info.db_and_user()).or_default();\n                pool_entries.get_conns().push(ConnPoolEntry {\n                    conn: client,\n                    _last_access: std::time::Instant::now(),\n                });\n\n                returned = true;\n                per_db_size = pool_entries.get_conns().len();\n\n                pool.total_conns += 1;\n                pool.global_connections_count\n                    .fetch_add(1, atomic::Ordering::Relaxed);\n                Metrics::get()\n                    .proxy\n                    .http_pool_opened_connections\n                    .get_metric()\n                    .inc();\n            }\n\n            pool.total_conns\n        };\n\n        // do logging outside of the mutex\n        if returned {\n            debug!(%conn_id, \"{pool_name}: returning connection '{conn_info}' back to the pool, total_conns={total_conns}, for this (db, user)={per_db_size}\");\n        } else {\n            info!(%conn_id, \"{pool_name}: throwing away connection '{conn_info}' because pool is full, total_conns={total_conns}\");\n        }\n    }\n}\n\nimpl<C: ClientInnerExt> Drop for EndpointConnPool<C> {\n    fn drop(&mut self) {\n        if self.total_conns > 0 {\n            self.global_connections_count\n                .fetch_sub(self.total_conns, atomic::Ordering::Relaxed);\n            Metrics::get()\n                .proxy\n                .http_pool_opened_connections\n                .get_metric()\n                .dec_by(self.total_conns as i64);\n        }\n    }\n}\n\npub(crate) struct DbUserConnPool<C: ClientInnerExt> {\n    pub(crate) conns: Vec<ConnPoolEntry<C>>,\n    pub(crate) initialized: Option<bool>, // a bit ugly, exists only for local pools\n}\n\nimpl<C: ClientInnerExt> Default for DbUserConnPool<C> {\n    fn default() -> Self {\n        Self {\n            conns: Vec::new(),\n            initialized: None,\n        }\n    }\n}\n\npub(crate) trait DbUserConn<C: ClientInnerExt>: Default {\n    fn set_initialized(&mut self);\n    fn is_initialized(&self) -> bool;\n    fn clear_closed_clients(&mut self, conns: &mut usize) -> usize;\n    fn get_conn_entry(&mut self, conns: &mut usize) -> (Option<ConnPoolEntry<C>>, usize);\n    fn get_conns(&mut self) -> &mut Vec<ConnPoolEntry<C>>;\n}\n\nimpl<C: ClientInnerExt> DbUserConn<C> for DbUserConnPool<C> {\n    fn set_initialized(&mut self) {\n        self.initialized = Some(true);\n    }\n\n    fn is_initialized(&self) -> bool {\n        self.initialized.unwrap_or(false)\n    }\n\n    fn clear_closed_clients(&mut self, conns: &mut usize) -> usize {\n        let old_len = self.conns.len();\n\n        self.conns.retain(|conn| !conn.conn.inner.is_closed());\n\n        let new_len = self.conns.len();\n        let removed = old_len - new_len;\n        *conns -= removed;\n        removed\n    }\n\n    fn get_conn_entry(&mut self, conns: &mut usize) -> (Option<ConnPoolEntry<C>>, usize) {\n        let mut removed = self.clear_closed_clients(conns);\n        let conn = self.conns.pop();\n        if conn.is_some() {\n            *conns -= 1;\n            removed += 1;\n        }\n\n        Metrics::get()\n            .proxy\n            .http_pool_opened_connections\n            .get_metric()\n            .dec_by(removed as i64);\n\n        (conn, removed)\n    }\n\n    fn get_conns(&mut self) -> &mut Vec<ConnPoolEntry<C>> {\n        &mut self.conns\n    }\n}\n\npub(crate) trait EndpointConnPoolExt<C: ClientInnerExt> {\n    fn clear_closed(&mut self) -> usize;\n    fn total_conns(&self) -> usize;\n}\n\nimpl<C: ClientInnerExt> EndpointConnPoolExt<C> for EndpointConnPool<C> {\n    fn clear_closed(&mut self) -> usize {\n        let mut clients_removed: usize = 0;\n        for db_pool in self.pools.values_mut() {\n            clients_removed += db_pool.clear_closed_clients(&mut self.total_conns);\n        }\n        clients_removed\n    }\n\n    fn total_conns(&self) -> usize {\n        self.total_conns\n    }\n}\n\npub(crate) struct GlobalConnPool<C, P>\nwhere\n    C: ClientInnerExt,\n    P: EndpointConnPoolExt<C>,\n{\n    // endpoint -> per-endpoint connection pool\n    //\n    // That should be a fairly conteded map, so return reference to the per-endpoint\n    // pool as early as possible and release the lock.\n    pub(crate) global_pool: ClashMap<EndpointCacheKey, Arc<RwLock<P>>>,\n\n    /// Number of endpoint-connection pools\n    ///\n    /// [`ClashMap::len`] iterates over all inner pools and acquires a read lock on each.\n    /// That seems like far too much effort, so we're using a relaxed increment counter instead.\n    /// It's only used for diagnostics.\n    pub(crate) global_pool_size: AtomicUsize,\n\n    /// Total number of connections in the pool\n    pub(crate) global_connections_count: Arc<AtomicUsize>,\n\n    pub(crate) config: &'static crate::config::HttpConfig,\n\n    _marker: PhantomData<C>,\n}\n\n#[derive(Debug, Clone, Copy)]\npub struct GlobalConnPoolOptions {\n    // Maximum number of connections per one endpoint.\n    // Can mix different (dbname, username) connections.\n    // When running out of free slots for a particular endpoint,\n    // falls back to opening a new connection for each request.\n    pub max_conns_per_endpoint: usize,\n\n    pub gc_epoch: Duration,\n\n    pub pool_shards: usize,\n\n    pub idle_timeout: Duration,\n\n    pub opt_in: bool,\n\n    // Total number of connections in the pool.\n    pub max_total_conns: usize,\n}\n\nimpl<C, P> GlobalConnPool<C, P>\nwhere\n    C: ClientInnerExt,\n    P: EndpointConnPoolExt<C>,\n{\n    pub(crate) fn new(config: &'static crate::config::HttpConfig) -> Arc<Self> {\n        let shards = config.pool_options.pool_shards;\n        Arc::new(Self {\n            global_pool: ClashMap::with_shard_amount(shards),\n            global_pool_size: AtomicUsize::new(0),\n            config,\n            global_connections_count: Arc::new(AtomicUsize::new(0)),\n            _marker: PhantomData,\n        })\n    }\n\n    #[cfg(test)]\n    pub(crate) fn get_global_connections_count(&self) -> usize {\n        self.global_connections_count\n            .load(atomic::Ordering::Relaxed)\n    }\n\n    pub(crate) fn get_idle_timeout(&self) -> Duration {\n        self.config.pool_options.idle_timeout\n    }\n\n    pub(crate) fn shutdown(&self) {\n        // drops all strong references to endpoint-pools\n        self.global_pool.clear();\n    }\n\n    pub(crate) async fn gc_worker(&self, mut rng: impl Rng) {\n        let epoch = self.config.pool_options.gc_epoch;\n        let mut interval = tokio::time::interval(epoch / (self.global_pool.shards().len()) as u32);\n        loop {\n            interval.tick().await;\n\n            let shard = rng.random_range(0..self.global_pool.shards().len());\n            self.gc(shard);\n        }\n    }\n\n    pub(crate) fn gc(&self, shard: usize) {\n        debug!(shard, \"pool: performing epoch reclamation\");\n\n        // acquire a random shard lock\n        let mut shard = self.global_pool.shards()[shard].write();\n\n        let timer = Metrics::get()\n            .proxy\n            .http_pool_reclaimation_lag_seconds\n            .start_timer();\n        let current_len = shard.len();\n        let mut clients_removed = 0;\n        shard.retain(|(endpoint, x)| {\n            // if the current endpoint pool is unique (no other strong or weak references)\n            // then it is currently not in use by any connections.\n            if let Some(pool) = Arc::get_mut(x) {\n                let endpoints = pool.get_mut();\n                clients_removed = endpoints.clear_closed();\n\n                if endpoints.total_conns() == 0 {\n                    info!(\"pool: discarding pool for endpoint {endpoint}\");\n                    return false;\n                }\n            }\n\n            true\n        });\n\n        let new_len = shard.len();\n        drop(shard);\n        timer.observe();\n\n        // Do logging outside of the lock.\n        if clients_removed > 0 {\n            let size = self\n                .global_connections_count\n                .fetch_sub(clients_removed, atomic::Ordering::Relaxed)\n                - clients_removed;\n            Metrics::get()\n                .proxy\n                .http_pool_opened_connections\n                .get_metric()\n                .dec_by(clients_removed as i64);\n            info!(\n                \"pool: performed global pool gc. removed {clients_removed} clients, total number of clients in pool is {size}\"\n            );\n        }\n        let removed = current_len - new_len;\n\n        if removed > 0 {\n            let global_pool_size = self\n                .global_pool_size\n                .fetch_sub(removed, atomic::Ordering::Relaxed)\n                - removed;\n            info!(\"pool: performed global pool gc. size now {global_pool_size}\");\n        }\n    }\n}\n\nimpl<C: ClientInnerExt> GlobalConnPool<C, EndpointConnPool<C>> {\n    pub(crate) fn get(\n        self: &Arc<Self>,\n        ctx: &RequestContext,\n        conn_info: &ConnInfo,\n    ) -> Result<Option<Client<C>>, HttpConnError> {\n        let mut client: Option<ClientInnerCommon<C>> = None;\n        let Some(endpoint) = conn_info.endpoint_cache_key() else {\n            return Ok(None);\n        };\n\n        let endpoint_pool = self.get_or_create_endpoint_pool(&endpoint);\n        if let Some(entry) = endpoint_pool\n            .write()\n            .get_conn_entry(conn_info.db_and_user())\n        {\n            client = Some(entry.conn);\n        }\n        let endpoint_pool = Arc::downgrade(&endpoint_pool);\n\n        // ok return cached connection if found and establish a new one otherwise\n        if let Some(mut client) = client {\n            if client.inner.is_closed() {\n                info!(\"pool: cached connection '{conn_info}' is closed, opening a new one\");\n                return Ok(None);\n            }\n            tracing::Span::current()\n                .record(\"conn_id\", tracing::field::display(client.get_conn_id()));\n            tracing::Span::current().record(\n                \"pid\",\n                tracing::field::display(client.inner.get_process_id()),\n            );\n            debug!(\n                cold_start_info = ColdStartInfo::HttpPoolHit.as_str(),\n                \"pool: reusing connection '{conn_info}'\"\n            );\n\n            match client.get_data() {\n                ClientDataEnum::Local(data) => {\n                    data.session().send(ctx.session_id())?;\n                }\n\n                ClientDataEnum::Remote(data) => {\n                    data.session().send(ctx.session_id())?;\n                }\n                ClientDataEnum::Http(_) => (),\n            }\n\n            ctx.set_cold_start_info(ColdStartInfo::HttpPoolHit);\n            ctx.success();\n            return Ok(Some(Client::new(client, conn_info.clone(), endpoint_pool)));\n        }\n        Ok(None)\n    }\n\n    pub(crate) fn get_or_create_endpoint_pool(\n        self: &Arc<Self>,\n        endpoint: &EndpointCacheKey,\n    ) -> Arc<RwLock<EndpointConnPool<C>>> {\n        // fast path\n        if let Some(pool) = self.global_pool.get(endpoint) {\n            return pool.clone();\n        }\n\n        // slow path\n        let new_pool = Arc::new(RwLock::new(EndpointConnPool {\n            pools: HashMap::new(),\n            total_conns: 0,\n            max_conns: self.config.pool_options.max_conns_per_endpoint,\n            _guard: Metrics::get().proxy.http_endpoint_pools.guard(),\n            global_connections_count: self.global_connections_count.clone(),\n            global_pool_size_max_conns: self.config.pool_options.max_total_conns,\n            pool_name: String::from(\"remote\"),\n        }));\n\n        // find or create a pool for this endpoint\n        let mut created = false;\n        let pool = self\n            .global_pool\n            .entry(endpoint.clone())\n            .or_insert_with(|| {\n                created = true;\n                new_pool\n            })\n            .clone();\n\n        // log new global pool size\n        if created {\n            let global_pool_size = self\n                .global_pool_size\n                .fetch_add(1, atomic::Ordering::Relaxed)\n                + 1;\n            info!(\n                \"pool: created new pool for '{endpoint}', global pool size now {global_pool_size}\"\n            );\n        }\n\n        pool\n    }\n}\npub(crate) struct Client<C: ClientInnerExt> {\n    span: Span,\n    inner: Option<ClientInnerCommon<C>>,\n    conn_info: ConnInfo,\n    pool: Weak<RwLock<EndpointConnPool<C>>>,\n}\n\npub(crate) struct Discard<'a, C: ClientInnerExt> {\n    conn_info: &'a ConnInfo,\n    pool: &'a mut Weak<RwLock<EndpointConnPool<C>>>,\n}\n\nimpl<C: ClientInnerExt> Client<C> {\n    pub(crate) fn new(\n        inner: ClientInnerCommon<C>,\n        conn_info: ConnInfo,\n        pool: Weak<RwLock<EndpointConnPool<C>>>,\n    ) -> Self {\n        Self {\n            inner: Some(inner),\n            span: Span::current(),\n            conn_info,\n            pool,\n        }\n    }\n\n    pub(crate) fn client_inner(&mut self) -> (&mut ClientInnerCommon<C>, Discard<'_, C>) {\n        let Self {\n            inner,\n            pool,\n            conn_info,\n            span: _,\n        } = self;\n        let inner_m = inner.as_mut().expect(\"client inner should not be removed\");\n        (inner_m, Discard { conn_info, pool })\n    }\n\n    pub(crate) fn inner(&mut self) -> (&mut C, Discard<'_, C>) {\n        let Self {\n            inner,\n            pool,\n            conn_info,\n            span: _,\n        } = self;\n        let inner = inner.as_mut().expect(\"client inner should not be removed\");\n        (&mut inner.inner, Discard { conn_info, pool })\n    }\n\n    pub(crate) fn metrics(&self, ctx: &RequestContext) -> Arc<MetricCounter> {\n        let aux = &self\n            .inner\n            .as_ref()\n            .expect(\"client inner should not be removed\")\n            .aux;\n\n        let private_link_id = match ctx.extra() {\n            None => None,\n            Some(ConnectionInfoExtra::Aws { vpce_id }) => Some(vpce_id.clone()),\n            Some(ConnectionInfoExtra::Azure { link_id }) => Some(link_id.to_smolstr()),\n        };\n\n        USAGE_METRICS.register(Ids {\n            endpoint_id: aux.endpoint_id,\n            branch_id: aux.branch_id,\n            private_link_id,\n        })\n    }\n}\n\nimpl<C: ClientInnerExt> Drop for Client<C> {\n    fn drop(&mut self) {\n        let conn_info = self.conn_info.clone();\n        let client = self\n            .inner\n            .take()\n            .expect(\"client inner should not be removed\");\n        if let Some(conn_pool) = std::mem::take(&mut self.pool).upgrade() {\n            let _current_span = self.span.enter();\n            // return connection to the pool\n            EndpointConnPool::put(&conn_pool, &conn_info, client);\n        }\n    }\n}\n\nimpl<C: ClientInnerExt> Deref for Client<C> {\n    type Target = C;\n\n    fn deref(&self) -> &Self::Target {\n        &self\n            .inner\n            .as_ref()\n            .expect(\"client inner should not be removed\")\n            .inner\n    }\n}\n\npub(crate) trait ClientInnerExt: Sync + Send + 'static {\n    fn is_closed(&self) -> bool;\n    fn get_process_id(&self) -> i32;\n    fn reset(&mut self) -> Result<(), postgres_client::Error>;\n}\n\nimpl ClientInnerExt for postgres_client::Client {\n    fn is_closed(&self) -> bool {\n        self.is_closed()\n    }\n\n    fn get_process_id(&self) -> i32 {\n        self.get_process_id()\n    }\n\n    fn reset(&mut self) -> Result<(), postgres_client::Error> {\n        self.reset_session_background()\n    }\n}\n\nimpl<C: ClientInnerExt> Discard<'_, C> {\n    pub(crate) fn discard(&mut self) {\n        let conn_info = &self.conn_info;\n        if std::mem::take(self.pool).strong_count() > 0 {\n            info!(\n                \"pool: throwing away connection '{conn_info}' because connection is potentially in a broken state\"\n            );\n        }\n    }\n}\n"
  },
  {
    "path": "proxy/src/serverless/error.rs",
    "content": "use http::StatusCode;\nuse http::header::HeaderName;\n\nuse crate::auth::ComputeUserInfoParseError;\nuse crate::error::{ErrorKind, ReportableError, UserFacingError};\nuse crate::http::ReadBodyError;\n\npub trait HttpCodeError {\n    fn get_http_status_code(&self) -> StatusCode;\n}\n\n#[derive(Debug, thiserror::Error)]\npub(crate) enum ConnInfoError {\n    #[error(\"invalid header: {0}\")]\n    InvalidHeader(&'static HeaderName),\n    #[error(\"invalid connection string: {0}\")]\n    UrlParseError(#[from] url::ParseError),\n    #[error(\"incorrect scheme\")]\n    IncorrectScheme,\n    #[error(\"missing database name\")]\n    MissingDbName,\n    #[error(\"invalid database name\")]\n    InvalidDbName,\n    #[error(\"missing username\")]\n    MissingUsername,\n    #[error(\"invalid username: {0}\")]\n    InvalidUsername(#[from] std::string::FromUtf8Error),\n    #[error(\"missing authentication credentials: {0}\")]\n    MissingCredentials(Credentials),\n    #[error(\"missing hostname\")]\n    MissingHostname,\n    #[error(\"invalid hostname: {0}\")]\n    InvalidEndpoint(#[from] ComputeUserInfoParseError),\n}\n\n#[derive(Debug, thiserror::Error)]\npub(crate) enum Credentials {\n    #[error(\"required password\")]\n    Password,\n    #[error(\"required authorization bearer token in JWT format\")]\n    BearerJwt,\n}\n\nimpl ReportableError for ConnInfoError {\n    fn get_error_kind(&self) -> ErrorKind {\n        ErrorKind::User\n    }\n}\n\nimpl UserFacingError for ConnInfoError {\n    fn to_string_client(&self) -> String {\n        self.to_string()\n    }\n}\n\n#[derive(Debug, thiserror::Error)]\npub(crate) enum ReadPayloadError {\n    #[error(\"could not read the HTTP request body: {0}\")]\n    Read(#[from] hyper::Error),\n    #[error(\"request is too large (max is {limit} bytes)\")]\n    BodyTooLarge { limit: usize },\n    #[error(\"could not parse the HTTP request body: {0}\")]\n    Parse(#[from] serde_json::Error),\n}\n\nimpl From<ReadBodyError<hyper::Error>> for ReadPayloadError {\n    fn from(value: ReadBodyError<hyper::Error>) -> Self {\n        match value {\n            ReadBodyError::BodyTooLarge { limit } => Self::BodyTooLarge { limit },\n            ReadBodyError::Read(e) => Self::Read(e),\n        }\n    }\n}\n\nimpl ReportableError for ReadPayloadError {\n    fn get_error_kind(&self) -> ErrorKind {\n        match self {\n            ReadPayloadError::Read(_) => ErrorKind::ClientDisconnect,\n            ReadPayloadError::BodyTooLarge { .. } => ErrorKind::User,\n            ReadPayloadError::Parse(_) => ErrorKind::User,\n        }\n    }\n}\n\nimpl HttpCodeError for ReadPayloadError {\n    fn get_http_status_code(&self) -> StatusCode {\n        match self {\n            ReadPayloadError::Read(_) => StatusCode::BAD_REQUEST,\n            ReadPayloadError::BodyTooLarge { .. } => StatusCode::PAYLOAD_TOO_LARGE,\n            ReadPayloadError::Parse(_) => StatusCode::BAD_REQUEST,\n        }\n    }\n}\n"
  },
  {
    "path": "proxy/src/serverless/http_conn_pool.rs",
    "content": "use std::collections::VecDeque;\nuse std::sync::atomic::{self, AtomicUsize};\nuse std::sync::{Arc, Weak};\n\nuse bytes::Bytes;\nuse http_body_util::combinators::BoxBody;\nuse hyper::client::conn::http2;\nuse hyper_util::rt::{TokioExecutor, TokioIo};\nuse parking_lot::RwLock;\nuse smol_str::ToSmolStr;\nuse tracing::{Instrument, debug, error, info, info_span};\n\nuse super::AsyncRW;\nuse super::backend::HttpConnError;\nuse super::conn_pool_lib::{\n    ClientDataEnum, ClientInnerCommon, ClientInnerExt, ConnInfo, ConnPoolEntry,\n    EndpointConnPoolExt, GlobalConnPool,\n};\nuse crate::context::RequestContext;\nuse crate::control_plane::messages::{ColdStartInfo, MetricsAuxInfo};\nuse crate::metrics::{HttpEndpointPoolsGuard, Metrics};\nuse crate::protocol2::ConnectionInfoExtra;\nuse crate::types::EndpointCacheKey;\nuse crate::usage_metrics::{Ids, MetricCounter, USAGE_METRICS};\n\npub(crate) type LocalProxyClient = http2::SendRequest<BoxBody<Bytes, hyper::Error>>;\npub(crate) type LocalProxyConnection =\n    http2::Connection<TokioIo<AsyncRW>, BoxBody<Bytes, hyper::Error>, TokioExecutor>;\n\n#[derive(Clone)]\npub(crate) struct ClientDataHttp();\n\n// Per-endpoint connection pool\n// Number of open connections is limited by the `max_conns_per_endpoint`.\npub(crate) struct HttpConnPool<C: ClientInnerExt + Clone> {\n    // TODO(conrad):\n    // either we should open more connections depending on stream count\n    // (not exposed by hyper, need our own counter)\n    // or we can change this to an Option rather than a VecDeque.\n    //\n    // Opening more connections to the same db because we run out of streams\n    // seems somewhat redundant though.\n    //\n    // Probably we should run a semaphore and just the single conn. TBD.\n    conns: VecDeque<ConnPoolEntry<C>>,\n    _guard: HttpEndpointPoolsGuard<'static>,\n    global_connections_count: Arc<AtomicUsize>,\n}\n\nimpl<C: ClientInnerExt + Clone> HttpConnPool<C> {\n    fn get_conn_entry(&mut self) -> Option<ConnPoolEntry<C>> {\n        let Self { conns, .. } = self;\n\n        loop {\n            let conn = conns.pop_front()?;\n            if !conn.conn.inner.is_closed() {\n                let new_conn = ConnPoolEntry {\n                    conn: conn.conn.clone(),\n                    _last_access: std::time::Instant::now(),\n                };\n\n                conns.push_back(new_conn);\n                return Some(conn);\n            }\n        }\n    }\n\n    fn remove_conn(&mut self, conn_id: uuid::Uuid) -> bool {\n        let Self {\n            conns,\n            global_connections_count,\n            ..\n        } = self;\n\n        let old_len = conns.len();\n        conns.retain(|entry| entry.conn.conn_id != conn_id);\n        let new_len = conns.len();\n        let removed = old_len - new_len;\n        if removed > 0 {\n            global_connections_count.fetch_sub(removed, atomic::Ordering::Relaxed);\n            Metrics::get()\n                .proxy\n                .http_pool_opened_connections\n                .get_metric()\n                .dec_by(removed as i64);\n        }\n        removed > 0\n    }\n}\n\nimpl<C: ClientInnerExt + Clone> EndpointConnPoolExt<C> for HttpConnPool<C> {\n    fn clear_closed(&mut self) -> usize {\n        let Self { conns, .. } = self;\n        let old_len = conns.len();\n        conns.retain(|entry| !entry.conn.inner.is_closed());\n\n        let new_len = conns.len();\n        old_len - new_len\n    }\n\n    fn total_conns(&self) -> usize {\n        self.conns.len()\n    }\n}\n\nimpl<C: ClientInnerExt + Clone> Drop for HttpConnPool<C> {\n    fn drop(&mut self) {\n        if !self.conns.is_empty() {\n            self.global_connections_count\n                .fetch_sub(self.conns.len(), atomic::Ordering::Relaxed);\n            Metrics::get()\n                .proxy\n                .http_pool_opened_connections\n                .get_metric()\n                .dec_by(self.conns.len() as i64);\n        }\n    }\n}\n\nimpl<C: ClientInnerExt + Clone> GlobalConnPool<C, HttpConnPool<C>> {\n    #[expect(unused_results)]\n    pub(crate) fn get(\n        self: &Arc<Self>,\n        ctx: &RequestContext,\n        conn_info: &ConnInfo,\n    ) -> Result<Option<Client<C>>, HttpConnError> {\n        let result: Result<Option<Client<C>>, HttpConnError>;\n        let Some(endpoint) = conn_info.endpoint_cache_key() else {\n            result = Ok(None);\n            return result;\n        };\n        let endpoint_pool = self.get_or_create_endpoint_pool(&endpoint);\n        let Some(client) = endpoint_pool.write().get_conn_entry() else {\n            result = Ok(None);\n            return result;\n        };\n\n        tracing::Span::current().record(\"conn_id\", tracing::field::display(client.conn.conn_id));\n        debug!(\n            cold_start_info = ColdStartInfo::HttpPoolHit.as_str(),\n            \"pool: reusing connection '{conn_info}'\"\n        );\n        ctx.set_cold_start_info(ColdStartInfo::HttpPoolHit);\n        ctx.success();\n\n        Ok(Some(Client::new(client.conn.clone())))\n    }\n\n    fn get_or_create_endpoint_pool(\n        self: &Arc<Self>,\n        endpoint: &EndpointCacheKey,\n    ) -> Arc<RwLock<HttpConnPool<C>>> {\n        // fast path\n        if let Some(pool) = self.global_pool.get(endpoint) {\n            return pool.clone();\n        }\n\n        // slow path\n        let new_pool = Arc::new(RwLock::new(HttpConnPool {\n            conns: VecDeque::new(),\n            _guard: Metrics::get().proxy.http_endpoint_pools.guard(),\n            global_connections_count: self.global_connections_count.clone(),\n        }));\n\n        // find or create a pool for this endpoint\n        let mut created = false;\n        let pool = self\n            .global_pool\n            .entry(endpoint.clone())\n            .or_insert_with(|| {\n                created = true;\n                new_pool\n            })\n            .clone();\n\n        // log new global pool size\n        if created {\n            let global_pool_size = self\n                .global_pool_size\n                .fetch_add(1, atomic::Ordering::Relaxed)\n                + 1;\n            info!(\n                \"pool: created new pool for '{endpoint}', global pool size now {global_pool_size}\"\n            );\n        }\n\n        pool\n    }\n}\n\npub(crate) fn poll_http2_client(\n    global_pool: Arc<GlobalConnPool<LocalProxyClient, HttpConnPool<LocalProxyClient>>>,\n    ctx: &RequestContext,\n    conn_info: &ConnInfo,\n    client: LocalProxyClient,\n    connection: LocalProxyConnection,\n    conn_id: uuid::Uuid,\n    aux: MetricsAuxInfo,\n) -> Client<LocalProxyClient> {\n    let conn_gauge = Metrics::get().proxy.db_connections.guard(ctx.protocol());\n    let session_id = ctx.session_id();\n\n    let span = info_span!(parent: None, \"connection\", %conn_id);\n    let cold_start_info = ctx.cold_start_info();\n    span.in_scope(|| {\n        info!(cold_start_info = cold_start_info.as_str(), %conn_info, %session_id, \"new connection\");\n    });\n\n    let pool = match conn_info.endpoint_cache_key() {\n        Some(endpoint) => {\n            let pool = global_pool.get_or_create_endpoint_pool(&endpoint);\n            let client = ClientInnerCommon {\n                inner: client.clone(),\n                aux: aux.clone(),\n                conn_id,\n                data: ClientDataEnum::Http(ClientDataHttp()),\n            };\n            pool.write().conns.push_back(ConnPoolEntry {\n                conn: client,\n                _last_access: std::time::Instant::now(),\n            });\n            Metrics::get()\n                .proxy\n                .http_pool_opened_connections\n                .get_metric()\n                .inc();\n\n            Arc::downgrade(&pool)\n        }\n        None => Weak::new(),\n    };\n\n    tokio::spawn(\n        async move {\n            let _conn_gauge = conn_gauge;\n            let res = connection.await;\n            match res {\n                Ok(()) => info!(\"connection closed\"),\n                Err(e) => error!(%session_id, \"connection error: {e:?}\"),\n            }\n\n            // remove from connection pool\n            if let Some(pool) = pool.clone().upgrade()\n                && pool.write().remove_conn(conn_id)\n            {\n                info!(\"closed connection removed\");\n            }\n        }\n        .instrument(span),\n    );\n\n    let client = ClientInnerCommon {\n        inner: client,\n        aux,\n        conn_id,\n        data: ClientDataEnum::Http(ClientDataHttp()),\n    };\n\n    Client::new(client)\n}\n\npub(crate) struct Client<C: ClientInnerExt + Clone> {\n    pub(crate) inner: ClientInnerCommon<C>,\n}\n\nimpl<C: ClientInnerExt + Clone> Client<C> {\n    pub(self) fn new(inner: ClientInnerCommon<C>) -> Self {\n        Self { inner }\n    }\n\n    pub(crate) fn metrics(&self, ctx: &RequestContext) -> Arc<MetricCounter> {\n        let aux = &self.inner.aux;\n\n        let private_link_id = match ctx.extra() {\n            None => None,\n            Some(ConnectionInfoExtra::Aws { vpce_id }) => Some(vpce_id.clone()),\n            Some(ConnectionInfoExtra::Azure { link_id }) => Some(link_id.to_smolstr()),\n        };\n\n        USAGE_METRICS.register(Ids {\n            endpoint_id: aux.endpoint_id,\n            branch_id: aux.branch_id,\n            private_link_id,\n        })\n    }\n}\n\nimpl ClientInnerExt for LocalProxyClient {\n    fn is_closed(&self) -> bool {\n        self.is_closed()\n    }\n\n    fn get_process_id(&self) -> i32 {\n        // ideally throw something meaningful\n        -1\n    }\n\n    fn reset(&mut self) -> Result<(), postgres_client::Error> {\n        // We use HTTP/2.0 to talk to local proxy. HTTP is stateless,\n        // so there's nothing to reset.\n        Ok(())\n    }\n}\n"
  },
  {
    "path": "proxy/src/serverless/http_util.rs",
    "content": "//! Things stolen from `libs/utils/src/http` to add hyper 1.0 compatibility\n//! Will merge back in at some point in the future.\n\nuse anyhow::Context;\nuse bytes::Bytes;\nuse http::header::AUTHORIZATION;\nuse http::{HeaderMap, HeaderName, HeaderValue, Response, StatusCode};\nuse http_body_util::combinators::BoxBody;\nuse http_body_util::{BodyExt, Full};\nuse http_utils::error::ApiError;\nuse serde::Serialize;\nuse url::Url;\nuse uuid::Uuid;\n\nuse super::conn_pool::{AuthData, ConnInfoWithAuth};\nuse super::conn_pool_lib::ConnInfo;\nuse super::error::{ConnInfoError, Credentials};\nuse crate::auth::backend::ComputeUserInfo;\nuse crate::config::AuthenticationConfig;\nuse crate::context::RequestContext;\nuse crate::metrics::{Metrics, SniGroup, SniKind};\nuse crate::pqproto::StartupMessageParams;\nuse crate::proxy::NeonOptions;\nuse crate::types::{DbName, EndpointId, RoleName};\n\n// Common header names used across serverless modules\npub(super) static NEON_REQUEST_ID: HeaderName = HeaderName::from_static(\"neon-request-id\");\npub(super) static CONN_STRING: HeaderName = HeaderName::from_static(\"neon-connection-string\");\npub(super) static RAW_TEXT_OUTPUT: HeaderName = HeaderName::from_static(\"neon-raw-text-output\");\npub(super) static ARRAY_MODE: HeaderName = HeaderName::from_static(\"neon-array-mode\");\npub(super) static ALLOW_POOL: HeaderName = HeaderName::from_static(\"neon-pool-opt-in\");\npub(super) static TXN_ISOLATION_LEVEL: HeaderName =\n    HeaderName::from_static(\"neon-batch-isolation-level\");\npub(super) static TXN_READ_ONLY: HeaderName = HeaderName::from_static(\"neon-batch-read-only\");\npub(super) static TXN_DEFERRABLE: HeaderName = HeaderName::from_static(\"neon-batch-deferrable\");\n\npub(crate) fn uuid_to_header_value(id: Uuid) -> HeaderValue {\n    let mut uuid = [0; uuid::fmt::Hyphenated::LENGTH];\n    HeaderValue::from_str(id.as_hyphenated().encode_lower(&mut uuid[..]))\n        .expect(\"uuid hyphenated format should be all valid header characters\")\n}\n\n/// Like [`ApiError::into_response`]\npub(crate) fn api_error_into_response(this: ApiError) -> Response<BoxBody<Bytes, hyper::Error>> {\n    match this {\n        ApiError::BadRequest(err) => HttpErrorBody::response_from_msg_and_status(\n            format!(\"{err:#?}\"), // use debug printing so that we give the cause\n            StatusCode::BAD_REQUEST,\n        ),\n        ApiError::Forbidden(_) => {\n            HttpErrorBody::response_from_msg_and_status(this.to_string(), StatusCode::FORBIDDEN)\n        }\n        ApiError::Unauthorized(_) => {\n            HttpErrorBody::response_from_msg_and_status(this.to_string(), StatusCode::UNAUTHORIZED)\n        }\n        ApiError::NotFound(_) => {\n            HttpErrorBody::response_from_msg_and_status(this.to_string(), StatusCode::NOT_FOUND)\n        }\n        ApiError::Conflict(_) => {\n            HttpErrorBody::response_from_msg_and_status(this.to_string(), StatusCode::CONFLICT)\n        }\n        ApiError::PreconditionFailed(_) => HttpErrorBody::response_from_msg_and_status(\n            this.to_string(),\n            StatusCode::PRECONDITION_FAILED,\n        ),\n        ApiError::ShuttingDown => HttpErrorBody::response_from_msg_and_status(\n            \"Shutting down\".to_string(),\n            StatusCode::SERVICE_UNAVAILABLE,\n        ),\n        ApiError::ResourceUnavailable(err) => HttpErrorBody::response_from_msg_and_status(\n            err.to_string(),\n            StatusCode::SERVICE_UNAVAILABLE,\n        ),\n        ApiError::TooManyRequests(err) => HttpErrorBody::response_from_msg_and_status(\n            err.to_string(),\n            StatusCode::TOO_MANY_REQUESTS,\n        ),\n        ApiError::Timeout(err) => HttpErrorBody::response_from_msg_and_status(\n            err.to_string(),\n            StatusCode::REQUEST_TIMEOUT,\n        ),\n        ApiError::Cancelled => HttpErrorBody::response_from_msg_and_status(\n            this.to_string(),\n            StatusCode::INTERNAL_SERVER_ERROR,\n        ),\n        ApiError::InternalServerError(err) => HttpErrorBody::response_from_msg_and_status(\n            err.to_string(),\n            StatusCode::INTERNAL_SERVER_ERROR,\n        ),\n    }\n}\n\n/// Same as [`http_utils::error::HttpErrorBody`]\n#[derive(Serialize)]\nstruct HttpErrorBody {\n    pub(crate) msg: String,\n}\n\nimpl HttpErrorBody {\n    /// Same as [`http_utils::error::HttpErrorBody::response_from_msg_and_status`]\n    fn response_from_msg_and_status(\n        msg: String,\n        status: StatusCode,\n    ) -> Response<BoxBody<Bytes, hyper::Error>> {\n        HttpErrorBody { msg }.to_response(status)\n    }\n\n    /// Same as [`http_utils::error::HttpErrorBody::to_response`]\n    fn to_response(&self, status: StatusCode) -> Response<BoxBody<Bytes, hyper::Error>> {\n        Response::builder()\n            .status(status)\n            .header(http::header::CONTENT_TYPE, \"application/json\")\n            // we do not have nested maps with non string keys so serialization shouldn't fail\n            .body(\n                Full::new(Bytes::from(\n                    serde_json::to_string(self)\n                        .expect(\"serialising HttpErrorBody should never fail\"),\n                ))\n                .map_err(|x| match x {})\n                .boxed(),\n            )\n            .expect(\"content-type header should be valid\")\n    }\n}\n\n/// Same as [`http_utils::json::json_response`]\npub(crate) fn json_response<T: Serialize>(\n    status: StatusCode,\n    data: T,\n) -> Result<Response<BoxBody<Bytes, hyper::Error>>, ApiError> {\n    let json = serde_json::to_string(&data)\n        .context(\"Failed to serialize JSON response\")\n        .map_err(ApiError::InternalServerError)?;\n    let response = Response::builder()\n        .status(status)\n        .header(http::header::CONTENT_TYPE, \"application/json\")\n        .body(Full::new(Bytes::from(json)).map_err(|x| match x {}).boxed())\n        .map_err(|e| ApiError::InternalServerError(e.into()))?;\n    Ok(response)\n}\n\npub(crate) fn get_conn_info(\n    config: &'static AuthenticationConfig,\n    ctx: &RequestContext,\n    connection_string: Option<&str>,\n    headers: &HeaderMap,\n) -> Result<ConnInfoWithAuth, ConnInfoError> {\n    let connection_url = match connection_string {\n        Some(connection_string) => Url::parse(connection_string)?,\n        None => {\n            let connection_string = headers\n                .get(&CONN_STRING)\n                .ok_or(ConnInfoError::InvalidHeader(&CONN_STRING))?\n                .to_str()\n                .map_err(|_| ConnInfoError::InvalidHeader(&CONN_STRING))?;\n            Url::parse(connection_string)?\n        }\n    };\n\n    let protocol = connection_url.scheme();\n    if protocol != \"postgres\" && protocol != \"postgresql\" {\n        return Err(ConnInfoError::IncorrectScheme);\n    }\n\n    let mut url_path = connection_url\n        .path_segments()\n        .ok_or(ConnInfoError::MissingDbName)?;\n\n    let dbname: DbName =\n        urlencoding::decode(url_path.next().ok_or(ConnInfoError::InvalidDbName)?)?.into();\n    ctx.set_dbname(dbname.clone());\n\n    let username = RoleName::from(urlencoding::decode(connection_url.username())?);\n    if username.is_empty() {\n        return Err(ConnInfoError::MissingUsername);\n    }\n    ctx.set_user(username.clone());\n    // TODO: make sure this is right in the context of rest broker\n    let auth = if let Some(auth) = headers.get(&AUTHORIZATION) {\n        if !config.accept_jwts {\n            return Err(ConnInfoError::MissingCredentials(Credentials::Password));\n        }\n\n        let auth = auth\n            .to_str()\n            .map_err(|_| ConnInfoError::InvalidHeader(&AUTHORIZATION))?;\n        AuthData::Jwt(\n            auth.strip_prefix(\"Bearer \")\n                .ok_or(ConnInfoError::MissingCredentials(Credentials::BearerJwt))?\n                .into(),\n        )\n    } else if let Some(pass) = connection_url.password() {\n        // wrong credentials provided\n        if config.accept_jwts {\n            return Err(ConnInfoError::MissingCredentials(Credentials::BearerJwt));\n        }\n\n        AuthData::Password(match urlencoding::decode_binary(pass.as_bytes()) {\n            std::borrow::Cow::Borrowed(b) => b.into(),\n            std::borrow::Cow::Owned(b) => b.into(),\n        })\n    } else if config.accept_jwts {\n        return Err(ConnInfoError::MissingCredentials(Credentials::BearerJwt));\n    } else {\n        return Err(ConnInfoError::MissingCredentials(Credentials::Password));\n    };\n    let endpoint: EndpointId = match connection_url.host() {\n        Some(url::Host::Domain(hostname)) => hostname\n            .split_once('.')\n            .map_or(hostname, |(prefix, _)| prefix)\n            .into(),\n        Some(url::Host::Ipv4(_) | url::Host::Ipv6(_)) | None => {\n            return Err(ConnInfoError::MissingHostname);\n        }\n    };\n    ctx.set_endpoint_id(endpoint.clone());\n\n    let pairs = connection_url.query_pairs();\n\n    let mut options = Option::None;\n\n    let mut params = StartupMessageParams::default();\n    params.insert(\"user\", &username);\n    params.insert(\"database\", &dbname);\n    for (key, value) in pairs {\n        params.insert(&key, &value);\n        if key == \"options\" {\n            options = Some(NeonOptions::parse_options_raw(&value));\n        }\n    }\n\n    // check the URL that was used, for metrics\n    {\n        let host_endpoint = headers\n            // get the host header\n            .get(\"host\")\n            // extract the domain\n            .and_then(|h| {\n                let (host, _port) = h.to_str().ok()?.split_once(':')?;\n                Some(host)\n            })\n            // get the endpoint prefix\n            .map(|h| h.split_once('.').map_or(h, |(prefix, _)| prefix));\n\n        let kind = if host_endpoint == Some(&*endpoint) {\n            SniKind::Sni\n        } else {\n            SniKind::NoSni\n        };\n\n        let protocol = ctx.protocol();\n        Metrics::get()\n            .proxy\n            .accepted_connections_by_sni\n            .inc(SniGroup { protocol, kind });\n    }\n\n    ctx.set_user_agent(\n        headers\n            .get(hyper::header::USER_AGENT)\n            .and_then(|h| h.to_str().ok())\n            .map(Into::into),\n    );\n\n    let user_info = ComputeUserInfo {\n        endpoint,\n        user: username,\n        options: options.unwrap_or_default(),\n    };\n\n    let conn_info = ConnInfo { user_info, dbname };\n    Ok(ConnInfoWithAuth { conn_info, auth })\n}\n"
  },
  {
    "path": "proxy/src/serverless/json.rs",
    "content": "use json::{ListSer, ObjectSer, ValueSer};\nuse postgres_client::Row;\nuse postgres_client::types::{Kind, Type};\nuse serde_json::Value;\n\n//\n// Convert json non-string types to strings, so that they can be passed to Postgres\n// as parameters.\n//\npub(crate) fn json_to_pg_text(json: Vec<Value>) -> Vec<Option<String>> {\n    json.iter().map(json_value_to_pg_text).collect()\n}\n\nfn json_value_to_pg_text(value: &Value) -> Option<String> {\n    match value {\n        // special care for nulls\n        Value::Null => None,\n\n        // convert to text with escaping\n        v @ (Value::Bool(_) | Value::Number(_) | Value::Object(_)) => Some(v.to_string()),\n\n        // avoid escaping here, as we pass this as a parameter\n        Value::String(s) => Some(s.clone()),\n\n        // special care for arrays\n        Value::Array(_) => json_array_to_pg_array(value),\n    }\n}\n\n//\n// Serialize a JSON array to a Postgres array. Contrary to the strings in the params\n// in the array we need to escape the strings. Postgres is okay with arrays of form\n// '{1,\"2\",3}'::int[], so we don't check that array holds values of the same type, leaving\n// it for Postgres to check.\n//\n// Example of the same escaping in node-postgres: packages/pg/lib/utils.js\n//\nfn json_array_to_pg_array(value: &Value) -> Option<String> {\n    match value {\n        // special care for nulls\n        Value::Null => None,\n\n        // convert to text with escaping\n        // here string needs to be escaped, as it is part of the array\n        v @ (Value::Bool(_) | Value::Number(_) | Value::String(_)) => Some(v.to_string()),\n        v @ Value::Object(_) => json_array_to_pg_array(&Value::String(v.to_string())),\n\n        // recurse into array\n        Value::Array(arr) => {\n            let vals = arr\n                .iter()\n                .map(json_array_to_pg_array)\n                .map(|v| v.unwrap_or_else(|| \"NULL\".to_string()))\n                .collect::<Vec<_>>()\n                .join(\",\");\n\n            Some(format!(\"{{{vals}}}\"))\n        }\n    }\n}\n\n#[derive(Debug, thiserror::Error)]\npub(crate) enum JsonConversionError {\n    #[error(\"internal error compute returned invalid data: {0}\")]\n    AsTextError(postgres_client::Error),\n    #[error(\"parse int error: {0}\")]\n    ParseIntError(#[from] std::num::ParseIntError),\n    #[error(\"parse float error: {0}\")]\n    ParseFloatError(#[from] std::num::ParseFloatError),\n    #[error(\"parse json error: {0}\")]\n    ParseJsonError(#[from] serde_json::Error),\n    #[error(\"unbalanced array\")]\n    UnbalancedArray,\n    #[error(\"unbalanced quoted string\")]\n    UnbalancedString,\n}\n\nenum OutputMode<'a> {\n    Array(ListSer<'a>),\n    Object(ObjectSer<'a>),\n}\n\nimpl OutputMode<'_> {\n    fn key(&mut self, key: &str) -> ValueSer<'_> {\n        match self {\n            OutputMode::Array(values) => values.entry(),\n            OutputMode::Object(map) => map.key(key),\n        }\n    }\n\n    fn finish(self) {\n        match self {\n            OutputMode::Array(values) => values.finish(),\n            OutputMode::Object(map) => map.finish(),\n        }\n    }\n}\n\n//\n// Convert postgres row with text-encoded values to JSON object\n//\npub(crate) fn pg_text_row_to_json(\n    output: ValueSer,\n    row: &Row,\n    raw_output: bool,\n    array_mode: bool,\n) -> Result<(), JsonConversionError> {\n    let mut entries = if array_mode {\n        OutputMode::Array(output.list())\n    } else {\n        OutputMode::Object(output.object())\n    };\n\n    for (i, column) in row.columns().iter().enumerate() {\n        let pg_value = row.as_text(i).map_err(JsonConversionError::AsTextError)?;\n\n        let value = entries.key(column.name());\n\n        match pg_value {\n            Some(v) if raw_output => value.value(v),\n            Some(v) => pg_text_to_json(value, v, column.type_())?,\n            None => value.value(json::Null),\n        }\n    }\n\n    entries.finish();\n    Ok(())\n}\n\n//\n// Convert postgres text-encoded value to JSON value\n//\nfn pg_text_to_json(output: ValueSer, val: &str, pg_type: &Type) -> Result<(), JsonConversionError> {\n    if let Kind::Array(elem_type) = pg_type.kind() {\n        // todo: we should fetch this from postgres.\n        let delimiter = ',';\n\n        json::value_as_list!(|output| pg_array_parse(output, val, elem_type, delimiter)?);\n        return Ok(());\n    }\n\n    match *pg_type {\n        Type::BOOL => output.value(val == \"t\"),\n        Type::INT2 | Type::INT4 => {\n            let val = val.parse::<i32>()?;\n            output.value(val);\n        }\n        Type::FLOAT4 | Type::FLOAT8 => {\n            let fval = val.parse::<f64>()?;\n            if fval.is_finite() {\n                output.value(fval);\n            } else {\n                // Pass Nan, Inf, -Inf as strings\n                // JS JSON.stringify() does converts them to null, but we\n                // want to preserve them, so we pass them as strings\n                output.value(val);\n            }\n        }\n        // we assume that the string value is valid json.\n        Type::JSON | Type::JSONB => output.write_raw_json(val.as_bytes()),\n        _ => output.value(val),\n    }\n\n    Ok(())\n}\n\n/// Parse postgres array into JSON array.\n///\n/// This is a bit involved because we need to handle nested arrays and quoted\n/// values. Unlike postgres we don't check that all nested arrays have the same\n/// dimensions, we just return them as is.\n///\n/// <https://www.postgresql.org/docs/current/arrays.html#ARRAYS-IO>\n///\n/// The external text representation of an array value consists of items that are interpreted\n/// according to the I/O conversion rules for the array's element type, plus decoration that\n/// indicates the array structure. The decoration consists of curly braces (`{` and `}`) around\n/// the array value plus delimiter characters between adjacent items. The delimiter character\n/// is usually a comma (,) but can be something else: it is determined by the typdelim setting\n/// for the array's element type. Among the standard data types provided in the PostgreSQL\n/// distribution, all use a comma, except for type box, which uses a semicolon (;).\n///\n/// In a multidimensional array, each dimension (row, plane, cube, etc.)\n/// gets its own level of curly braces, and delimiters must be written between adjacent\n/// curly-braced entities of the same level.\nfn pg_array_parse(\n    elements: &mut ListSer,\n    mut pg_array: &str,\n    elem: &Type,\n    delim: char,\n) -> Result<(), JsonConversionError> {\n    // skip bounds decoration, eg:\n    // `[1:1][-2:-1][3:5]={{{1,2,3},{4,5,6}}}`\n    // technically these are significant, but we have no way to represent them in json.\n    if let Some('[') = pg_array.chars().next() {\n        let Some((_bounds, array)) = pg_array.split_once('=') else {\n            return Err(JsonConversionError::UnbalancedArray);\n        };\n        pg_array = array;\n    }\n\n    // whitespace might preceed a `{`.\n    let pg_array = pg_array.trim_start();\n\n    let rest = pg_array_parse_inner(elements, pg_array, elem, delim)?;\n    if !rest.is_empty() {\n        return Err(JsonConversionError::UnbalancedArray);\n    }\n\n    Ok(())\n}\n\n/// reads a single array from the `pg_array` string and pushes each values to `elements`.\n/// returns the rest of the `pg_array` string that was not read.\nfn pg_array_parse_inner<'a>(\n    elements: &mut ListSer,\n    mut pg_array: &'a str,\n    elem: &Type,\n    delim: char,\n) -> Result<&'a str, JsonConversionError> {\n    // array should have a `{` prefix.\n    pg_array = pg_array\n        .strip_prefix('{')\n        .ok_or(JsonConversionError::UnbalancedArray)?;\n\n    let mut q = String::new();\n\n    loop {\n        let value = elements.entry();\n        pg_array = pg_array_parse_item(value, &mut q, pg_array, elem, delim)?;\n\n        // check for separator.\n        if let Some(next) = pg_array.strip_prefix(delim) {\n            // next item.\n            pg_array = next;\n        } else {\n            break;\n        }\n    }\n\n    let Some(next) = pg_array.strip_prefix('}') else {\n        // missing `}` terminator.\n        return Err(JsonConversionError::UnbalancedArray);\n    };\n\n    // whitespace might follow a `}`.\n    Ok(next.trim_start())\n}\n\n/// reads a single item from the `pg_array` string.\n/// returns the rest of the `pg_array` string that was not read.\n///\n/// `quoted` is a scratch allocation that has no defined output.\nfn pg_array_parse_item<'a>(\n    output: ValueSer,\n    quoted: &mut String,\n    mut pg_array: &'a str,\n    elem: &Type,\n    delim: char,\n) -> Result<&'a str, JsonConversionError> {\n    // We are trying to parse an array item.\n    // This could be a new array, if this is a multi-dimentional array.\n    // This could be a quoted string representing `elem`.\n    // This could be an unquoted string representing `elem`.\n\n    // whitespace might preceed an item.\n    pg_array = pg_array.trim_start();\n\n    if pg_array.starts_with('{') {\n        // nested array.\n        pg_array =\n            json::value_as_list!(|output| pg_array_parse_inner(output, pg_array, elem, delim))?;\n        return Ok(pg_array);\n    }\n\n    if let Some(mut pg_array) = pg_array.strip_prefix('\"') {\n        // the parsed string is un-escaped and written into quoted.\n        pg_array = pg_array_parse_quoted(quoted, pg_array)?;\n\n        // we have un-escaped the string, parse it as pgtext.\n        pg_text_to_json(output, quoted, elem)?;\n\n        return Ok(pg_array);\n    }\n\n    // we need to parse an item. read until we find a delimiter or `}`.\n    let index = pg_array\n        .find([delim, '}'])\n        .ok_or(JsonConversionError::UnbalancedArray)?;\n\n    let item;\n    (item, pg_array) = pg_array.split_at(index);\n\n    // item might have trailing whitespace that we need to ignore.\n    let item = item.trim_end();\n\n    // we might have an item string:\n    // check for null\n    if item == \"NULL\" {\n        output.value(json::Null);\n    } else {\n        pg_text_to_json(output, item, elem)?;\n    }\n\n    Ok(pg_array)\n}\n\n/// reads a single quoted item from the `pg_array` string.\n///\n/// Returns the rest of the `pg_array` string that was not read.\n/// The output is written into `quoted`.\n///\n/// The pg_array string must have a `\"` terminator, but the `\"` initial value\n/// must have already been removed from the input. The terminator is removed.\nfn pg_array_parse_quoted<'a>(\n    quoted: &mut String,\n    mut pg_array: &'a str,\n) -> Result<&'a str, JsonConversionError> {\n    // The array output routine will put double quotes around element values if they are empty strings,\n    // contain curly braces, delimiter characters, double quotes, backslashes, or white space,\n    // or match the word `NULL`. Double quotes and backslashes embedded in element values will be backslash-escaped.\n    // For numeric data types it is safe to assume that double quotes will never appear,\n    // but for textual data types one should be prepared to cope with either the presence or absence of quotes.\n\n    quoted.clear();\n\n    // We write to quoted in chunks terminated by an escape character.\n    // Eg if we have the input `foo\\\"bar\"`, then we write `foo`, then `\"`, then finally `bar`.\n\n    loop {\n        // we need to parse an chunk. read until we find a '\\\\' or `\"`.\n        let i = pg_array\n            .find(['\\\\', '\"'])\n            .ok_or(JsonConversionError::UnbalancedString)?;\n\n        let chunk: &str;\n        (chunk, pg_array) = pg_array\n            .split_at_checked(i)\n            .expect(\"i is guaranteed to be in-bounds of pg_array\");\n\n        // push the chunk.\n        quoted.push_str(chunk);\n\n        // consume the chunk_end character.\n        let chunk_end: char;\n        (chunk_end, pg_array) =\n            split_first_char(pg_array).expect(\"pg_array should start with either '\\\\\\\\' or '\\\"'\");\n\n        // finished.\n        if chunk_end == '\"' {\n            // whitespace might follow the '\"'.\n            pg_array = pg_array.trim_start();\n\n            break Ok(pg_array);\n        }\n\n        // consume the escaped character.\n        let escaped: char;\n        (escaped, pg_array) =\n            split_first_char(pg_array).ok_or(JsonConversionError::UnbalancedString)?;\n\n        quoted.push(escaped);\n    }\n}\n\nfn split_first_char(s: &str) -> Option<(char, &str)> {\n    let mut chars = s.chars();\n    let c = chars.next()?;\n    Some((c, chars.as_str()))\n}\n\n#[cfg(test)]\nmod tests {\n    use serde_json::json;\n\n    use super::*;\n\n    #[test]\n    fn test_atomic_types_to_pg_params() {\n        let json = vec![Value::Bool(true), Value::Bool(false)];\n        let pg_params = json_to_pg_text(json);\n        assert_eq!(\n            pg_params,\n            vec![Some(\"true\".to_owned()), Some(\"false\".to_owned())]\n        );\n\n        let json = vec![Value::Number(serde_json::Number::from(42))];\n        let pg_params = json_to_pg_text(json);\n        assert_eq!(pg_params, vec![Some(\"42\".to_owned())]);\n\n        let json = vec![Value::String(\"foo\\\"\".to_string())];\n        let pg_params = json_to_pg_text(json);\n        assert_eq!(pg_params, vec![Some(\"foo\\\"\".to_owned())]);\n\n        let json = vec![Value::Null];\n        let pg_params = json_to_pg_text(json);\n        assert_eq!(pg_params, vec![None]);\n    }\n\n    #[test]\n    fn test_json_array_to_pg_array() {\n        // atoms and escaping\n        let json = \"[true, false, null, \\\"NULL\\\", 42, \\\"foo\\\", \\\"bar\\\\\\\"-\\\\\\\\\\\"]\";\n        let json: Value = serde_json::from_str(json).unwrap();\n        let pg_params = json_to_pg_text(vec![json]);\n        assert_eq!(\n            pg_params,\n            vec![Some(\n                \"{true,false,NULL,\\\"NULL\\\",42,\\\"foo\\\",\\\"bar\\\\\\\"-\\\\\\\\\\\"}\".to_owned()\n            )]\n        );\n\n        // nested arrays\n        let json = \"[[true, false], [null, 42], [\\\"foo\\\", \\\"bar\\\\\\\"-\\\\\\\\\\\"]]\";\n        let json: Value = serde_json::from_str(json).unwrap();\n        let pg_params = json_to_pg_text(vec![json]);\n        assert_eq!(\n            pg_params,\n            vec![Some(\n                \"{{true,false},{NULL,42},{\\\"foo\\\",\\\"bar\\\\\\\"-\\\\\\\\\\\"}}\".to_owned()\n            )]\n        );\n        // array of objects\n        let json = r#\"[{\"foo\": 1},{\"bar\": 2}]\"#;\n        let json: Value = serde_json::from_str(json).unwrap();\n        let pg_params = json_to_pg_text(vec![json]);\n        assert_eq!(\n            pg_params,\n            vec![Some(r#\"{\"{\\\"foo\\\":1}\",\"{\\\"bar\\\":2}\"}\"#.to_owned())]\n        );\n    }\n\n    fn pg_text_to_json(val: &str, pg_type: &Type) -> Value {\n        let output = json::value_to_string!(|v| super::pg_text_to_json(v, val, pg_type).unwrap());\n        serde_json::from_str(&output).unwrap()\n    }\n\n    fn pg_array_parse(pg_array: &str, pg_type: &Type) -> Value {\n        let output = json::value_to_string!(|v| json::value_as_list!(|v| {\n            super::pg_array_parse(v, pg_array, pg_type, ',').unwrap();\n        }));\n        serde_json::from_str(&output).unwrap()\n    }\n\n    #[test]\n    fn test_atomic_types_parse() {\n        assert_eq!(pg_text_to_json(\"foo\", &Type::TEXT), json!(\"foo\"));\n        assert_eq!(pg_text_to_json(\"42\", &Type::INT4), json!(42));\n        assert_eq!(pg_text_to_json(\"42\", &Type::INT2), json!(42));\n        assert_eq!(pg_text_to_json(\"42\", &Type::INT8), json!(\"42\"));\n        assert_eq!(pg_text_to_json(\"42.42\", &Type::FLOAT8), json!(42.42));\n        assert_eq!(pg_text_to_json(\"42.42\", &Type::FLOAT4), json!(42.42));\n        assert_eq!(pg_text_to_json(\"NaN\", &Type::FLOAT4), json!(\"NaN\"));\n        assert_eq!(\n            pg_text_to_json(\"Infinity\", &Type::FLOAT4),\n            json!(\"Infinity\")\n        );\n        assert_eq!(\n            pg_text_to_json(\"-Infinity\", &Type::FLOAT4),\n            json!(\"-Infinity\")\n        );\n\n        let json: Value =\n            serde_json::from_str(\"{\\\"s\\\":\\\"str\\\",\\\"n\\\":42,\\\"f\\\":4.2,\\\"a\\\":[null,3,\\\"a\\\"]}\")\n                .unwrap();\n        assert_eq!(\n            pg_text_to_json(\n                r#\"{\"s\":\"str\",\"n\":42,\"f\":4.2,\"a\":[null,3,\"a\"]}\"#,\n                &Type::JSONB\n            ),\n            json\n        );\n    }\n\n    #[test]\n    fn test_pg_array_parse_text() {\n        fn pt(pg_arr: &str) -> Value {\n            pg_array_parse(pg_arr, &Type::TEXT)\n        }\n        assert_eq!(\n            pt(r#\"{\"aa\\\"\\\\\\,a\",cha,\"bbbb\"}\"#),\n            json!([\"aa\\\"\\\\,a\", \"cha\", \"bbbb\"])\n        );\n        assert_eq!(\n            pt(r#\"{{\"foo\",\"bar\"},{\"bee\",\"bop\"}}\"#),\n            json!([[\"foo\", \"bar\"], [\"bee\", \"bop\"]])\n        );\n        assert_eq!(\n            pt(r#\"{{{{\"foo\",NULL,\"bop\",bup}}}}\"#),\n            json!([[[[\"foo\", null, \"bop\", \"bup\"]]]])\n        );\n        assert_eq!(\n            pt(r#\"{{\"1\",2,3},{4,NULL,6},{NULL,NULL,NULL}}\"#),\n            json!([[\"1\", \"2\", \"3\"], [\"4\", null, \"6\"], [null, null, null]])\n        );\n    }\n\n    #[test]\n    fn test_pg_array_parse_bool() {\n        fn pb(pg_arr: &str) -> Value {\n            pg_array_parse(pg_arr, &Type::BOOL)\n        }\n        assert_eq!(pb(r#\"{t,f,t}\"#), json!([true, false, true]));\n        assert_eq!(pb(r#\"{{t,f,t}}\"#), json!([[true, false, true]]));\n        assert_eq!(\n            pb(r#\"{{t,f},{f,t}}\"#),\n            json!([[true, false], [false, true]])\n        );\n        assert_eq!(\n            pb(r#\"{{t,NULL},{NULL,f}}\"#),\n            json!([[true, null], [null, false]])\n        );\n    }\n\n    #[test]\n    fn test_pg_array_parse_numbers() {\n        fn pn(pg_arr: &str, ty: &Type) -> Value {\n            pg_array_parse(pg_arr, ty)\n        }\n        assert_eq!(pn(r#\"{1,2,3}\"#, &Type::INT4), json!([1, 2, 3]));\n        assert_eq!(pn(r#\"{1,2,3}\"#, &Type::INT2), json!([1, 2, 3]));\n        assert_eq!(pn(r#\"{1,2,3}\"#, &Type::INT8), json!([\"1\", \"2\", \"3\"]));\n        assert_eq!(pn(r#\"{1,2,3}\"#, &Type::FLOAT4), json!([1.0, 2.0, 3.0]));\n        assert_eq!(pn(r#\"{1,2,3}\"#, &Type::FLOAT8), json!([1.0, 2.0, 3.0]));\n        assert_eq!(\n            pn(r#\"{1.1,2.2,3.3}\"#, &Type::FLOAT4),\n            json!([1.1, 2.2, 3.3])\n        );\n        assert_eq!(\n            pn(r#\"{1.1,2.2,3.3}\"#, &Type::FLOAT8),\n            json!([1.1, 2.2, 3.3])\n        );\n        assert_eq!(\n            pn(r#\"{NaN,Infinity,-Infinity}\"#, &Type::FLOAT4),\n            json!([\"NaN\", \"Infinity\", \"-Infinity\"])\n        );\n        assert_eq!(\n            pn(r#\"{NaN,Infinity,-Infinity}\"#, &Type::FLOAT8),\n            json!([\"NaN\", \"Infinity\", \"-Infinity\"])\n        );\n    }\n\n    #[test]\n    fn test_pg_array_with_decoration() {\n        fn p(pg_arr: &str) -> Value {\n            pg_array_parse(pg_arr, &Type::INT2)\n        }\n        assert_eq!(\n            p(r#\"[1:1][-2:-1][3:5]={{{1,2,3},{4,5,6}}}\"#),\n            json!([[[1, 2, 3], [4, 5, 6]]])\n        );\n    }\n\n    #[test]\n    fn test_pg_array_parse_json() {\n        fn pt(pg_arr: &str) -> Value {\n            pg_array_parse(pg_arr, &Type::JSONB)\n        }\n        assert_eq!(pt(r#\"{\"{}\"}\"#), json!([{}]));\n        assert_eq!(\n            pt(r#\"{\"{\\\"foo\\\": 1, \\\"bar\\\": 2}\"}\"#),\n            json!([{\"foo\": 1, \"bar\": 2}])\n        );\n        assert_eq!(\n            pt(r#\"{\"{\\\"foo\\\": 1}\", \"{\\\"bar\\\": 2}\"}\"#),\n            json!([{\"foo\": 1}, {\"bar\": 2}])\n        );\n        assert_eq!(\n            pt(r#\"{{\"{\\\"foo\\\": 1}\", \"{\\\"bar\\\": 2}\"}}\"#),\n            json!([[{\"foo\": 1}, {\"bar\": 2}]])\n        );\n    }\n}\n"
  },
  {
    "path": "proxy/src/serverless/local_conn_pool.rs",
    "content": "//! Manages the pool of connections between local_proxy and postgres.\n//!\n//! The pool is keyed by database and role_name, and can contain multiple connections\n//! shared between users.\n//!\n//! The pool manages the pg_session_jwt extension used for authorizing\n//! requests in the db.\n//!\n//! The first time a db/role pair is seen, local_proxy attempts to install the extension\n//! and grant usage to the role on the given schema.\n\nuse std::collections::HashMap;\nuse std::pin::pin;\nuse std::sync::Arc;\nuse std::sync::atomic::AtomicUsize;\nuse std::task::{Poll, ready};\nuse std::time::Duration;\n\nuse base64::Engine as _;\nuse base64::prelude::BASE64_URL_SAFE_NO_PAD;\nuse ed25519_dalek::{Signature, Signer, SigningKey};\nuse futures::future::poll_fn;\nuse futures::{Future, FutureExt};\nuse indexmap::IndexMap;\nuse jose_jwk::jose_b64::base64ct::{Base64UrlUnpadded, Encoding};\nuse parking_lot::RwLock;\nuse postgres_client::tls::NoTlsStream;\nuse serde_json::value::RawValue;\nuse tokio::net::TcpStream;\nuse tokio::time::Instant;\nuse tokio_util::sync::CancellationToken;\nuse tracing::{debug, error, info, info_span};\n\nuse super::backend::HttpConnError;\nuse super::conn_pool_lib::{\n    Client, ClientDataEnum, ClientInnerCommon, ClientInnerExt, ConnInfo, DbUserConn,\n    EndpointConnPool,\n};\nuse super::sql_over_http::SqlOverHttpError;\nuse crate::context::RequestContext;\nuse crate::control_plane::messages::{ColdStartInfo, MetricsAuxInfo};\nuse crate::metrics::Metrics;\n\npub(crate) const EXT_NAME: &str = \"pg_session_jwt\";\npub(crate) const EXT_VERSION: &str = \"0.3.1\";\npub(crate) const EXT_SCHEMA: &str = \"auth\";\n\n#[derive(Clone)]\npub(crate) struct ClientDataLocal {\n    session: tokio::sync::watch::Sender<uuid::Uuid>,\n    cancel: CancellationToken,\n    key: SigningKey,\n    jti: u64,\n}\n\nimpl ClientDataLocal {\n    pub fn session(&mut self) -> &mut tokio::sync::watch::Sender<uuid::Uuid> {\n        &mut self.session\n    }\n\n    pub fn cancel(&mut self) {\n        self.cancel.cancel();\n    }\n}\n\npub(crate) struct LocalConnPool<C: ClientInnerExt> {\n    global_pool: Arc<RwLock<EndpointConnPool<C>>>,\n\n    config: &'static crate::config::HttpConfig,\n}\n\nimpl<C: ClientInnerExt> LocalConnPool<C> {\n    pub(crate) fn new(config: &'static crate::config::HttpConfig) -> Arc<Self> {\n        Arc::new(Self {\n            global_pool: Arc::new(RwLock::new(EndpointConnPool::new(\n                HashMap::new(),\n                0,\n                config.pool_options.max_conns_per_endpoint,\n                Arc::new(AtomicUsize::new(0)),\n                config.pool_options.max_total_conns,\n                String::from(\"local_pool\"),\n            ))),\n            config,\n        })\n    }\n\n    pub(crate) fn get_idle_timeout(&self) -> Duration {\n        self.config.pool_options.idle_timeout\n    }\n\n    pub(crate) fn get(\n        self: &Arc<Self>,\n        ctx: &RequestContext,\n        conn_info: &ConnInfo,\n    ) -> Result<Option<Client<C>>, HttpConnError> {\n        let client = self\n            .global_pool\n            .write()\n            .get_conn_entry(conn_info.db_and_user())\n            .map(|entry| entry.conn);\n\n        // ok return cached connection if found and establish a new one otherwise\n        if let Some(mut client) = client {\n            if client.inner.is_closed() {\n                info!(\"local_pool: cached connection '{conn_info}' is closed, opening a new one\");\n                return Ok(None);\n            }\n\n            tracing::Span::current()\n                .record(\"conn_id\", tracing::field::display(client.get_conn_id()));\n            tracing::Span::current().record(\n                \"pid\",\n                tracing::field::display(client.inner.get_process_id()),\n            );\n            debug!(\n                cold_start_info = ColdStartInfo::HttpPoolHit.as_str(),\n                \"local_pool: reusing connection '{conn_info}'\"\n            );\n\n            match client.get_data() {\n                ClientDataEnum::Local(data) => {\n                    data.session().send(ctx.session_id())?;\n                }\n\n                ClientDataEnum::Remote(data) => {\n                    data.session().send(ctx.session_id())?;\n                }\n                ClientDataEnum::Http(_) => (),\n            }\n\n            ctx.set_cold_start_info(ColdStartInfo::HttpPoolHit);\n            ctx.success();\n\n            return Ok(Some(Client::new(\n                client,\n                conn_info.clone(),\n                Arc::downgrade(&self.global_pool),\n            )));\n        }\n        Ok(None)\n    }\n\n    pub(crate) fn initialized(self: &Arc<Self>, conn_info: &ConnInfo) -> bool {\n        if let Some(pool) = self.global_pool.read().get_pool(conn_info.db_and_user()) {\n            return pool.is_initialized();\n        }\n        false\n    }\n\n    pub(crate) fn set_initialized(self: &Arc<Self>, conn_info: &ConnInfo) {\n        if let Some(pool) = self\n            .global_pool\n            .write()\n            .get_pool_mut(conn_info.db_and_user())\n        {\n            pool.set_initialized();\n        }\n    }\n}\n\n#[allow(clippy::too_many_arguments)]\npub(crate) fn poll_client<C: ClientInnerExt>(\n    global_pool: Arc<LocalConnPool<C>>,\n    ctx: &RequestContext,\n    conn_info: ConnInfo,\n    client: C,\n    mut connection: postgres_client::Connection<TcpStream, NoTlsStream>,\n    key: SigningKey,\n    conn_id: uuid::Uuid,\n    aux: MetricsAuxInfo,\n) -> Client<C> {\n    let conn_gauge = Metrics::get().proxy.db_connections.guard(ctx.protocol());\n    let mut session_id = ctx.session_id();\n    let (tx, mut rx) = tokio::sync::watch::channel(session_id);\n\n    let span = info_span!(parent: None, \"connection\", %conn_id);\n    let cold_start_info = ctx.cold_start_info();\n    span.in_scope(|| {\n        info!(cold_start_info = cold_start_info.as_str(), %conn_info, %session_id, \"new connection\");\n    });\n    let pool = Arc::downgrade(&global_pool);\n\n    let db_user = conn_info.db_and_user();\n    let idle = global_pool.get_idle_timeout();\n    let cancel = CancellationToken::new();\n    let cancelled = cancel.clone().cancelled_owned();\n\n    tokio::spawn(async move {\n        let _conn_gauge = conn_gauge;\n        let mut idle_timeout = pin!(tokio::time::sleep(idle));\n        let mut cancelled = pin!(cancelled);\n\n        poll_fn(move |cx| {\n            let _instrument = span.enter();\n\n            if cancelled.as_mut().poll(cx).is_ready() {\n                info!(\"connection dropped\");\n                return Poll::Ready(());\n            }\n\n            match rx.has_changed() {\n                Ok(true) => {\n                    session_id = *rx.borrow_and_update();\n                    info!(%session_id, \"changed session\");\n                    idle_timeout.as_mut().reset(Instant::now() + idle);\n                }\n                Err(_) => {\n                    info!(\"connection dropped\");\n                    return Poll::Ready(());\n                }\n                _ => {}\n            }\n\n            // 5 minute idle connection timeout\n            if idle_timeout.as_mut().poll(cx).is_ready() {\n                idle_timeout.as_mut().reset(Instant::now() + idle);\n                info!(\"connection idle\");\n                if let Some(pool) = pool.clone().upgrade() {\n                    // remove client from pool - should close the connection if it's idle.\n                    // does nothing if the client is currently checked-out and in-use\n                    if pool\n                        .global_pool\n                        .write()\n                        .remove_client(db_user.clone(), conn_id)\n                    {\n                        info!(\"idle connection removed\");\n                    }\n                }\n            }\n\n            match ready!(connection.poll_unpin(cx)) {\n                Err(e) => error!(%session_id, \"connection error: {}\", e),\n                Ok(()) => info!(\"connection closed\"),\n            }\n\n            // remove from connection pool\n            if let Some(pool) = pool.clone().upgrade()\n                && pool\n                    .global_pool\n                    .write()\n                    .remove_client(db_user.clone(), conn_id)\n            {\n                info!(\"closed connection removed\");\n            }\n\n            Poll::Ready(())\n        })\n        .await;\n    });\n\n    let inner = ClientInnerCommon {\n        inner: client,\n        aux,\n        conn_id,\n        data: ClientDataEnum::Local(ClientDataLocal {\n            session: tx,\n            cancel,\n            key,\n            jti: 0,\n        }),\n    };\n\n    Client::new(inner, conn_info, Arc::downgrade(&global_pool.global_pool))\n}\n\nimpl ClientInnerCommon<postgres_client::Client> {\n    pub(crate) async fn set_jwt_session(&mut self, payload: &[u8]) -> Result<(), SqlOverHttpError> {\n        if let ClientDataEnum::Local(local_data) = &mut self.data {\n            local_data.jti += 1;\n            let token = resign_jwt(&local_data.key, payload, local_data.jti)?;\n\n            // initiates the auth session\n            // this is safe from query injections as the jwt format free of any escape characters.\n            let query = format!(\"select auth.jwt_session_init('{token}')\");\n            self.inner\n                .batch_execute(&query)\n                .await\n                .map_err(SqlOverHttpError::InternalPostgres)?;\n\n            let pid = self.inner.get_process_id();\n            info!(pid, jti = local_data.jti, \"user session state init\");\n            Ok(())\n        } else {\n            panic!(\"unexpected client data type\");\n        }\n    }\n}\n\n/// implements relatively efficient in-place json object key upserting\n///\n/// only supports top-level keys\nfn upsert_json_object(\n    payload: &[u8],\n    key: &str,\n    value: &RawValue,\n) -> Result<String, serde_json::Error> {\n    let mut payload = serde_json::from_slice::<IndexMap<&str, &RawValue>>(payload)?;\n    payload.insert(key, value);\n    serde_json::to_string(&payload)\n}\n\nfn resign_jwt(sk: &SigningKey, payload: &[u8], jti: u64) -> Result<String, HttpConnError> {\n    let mut buffer = itoa::Buffer::new();\n\n    // encode the jti integer to a json rawvalue\n    let jti = serde_json::from_str::<&RawValue>(buffer.format(jti))\n        .expect(\"itoa formatted integer should be guaranteed valid json\");\n\n    // update the jti in-place\n    let payload =\n        upsert_json_object(payload, \"jti\", jti).map_err(HttpConnError::JwtPayloadError)?;\n\n    // sign the jwt\n    let token = sign_jwt(sk, payload.as_bytes());\n\n    Ok(token)\n}\n\nfn sign_jwt(sk: &SigningKey, payload: &[u8]) -> String {\n    let header_len = 20;\n    let payload_len = Base64UrlUnpadded::encoded_len(payload);\n    let signature_len = Base64UrlUnpadded::encoded_len(&[0; 64]);\n    let total_len = header_len + payload_len + signature_len + 2;\n\n    let mut jwt = String::with_capacity(total_len);\n    let cap = jwt.capacity();\n\n    // we only need an empty header with the alg specified.\n    // base64url(r#\"{\"alg\":\"EdDSA\"}\"#) == \"eyJhbGciOiJFZERTQSJ9\"\n    jwt.push_str(\"eyJhbGciOiJFZERTQSJ9.\");\n\n    // encode the jwt payload in-place\n    BASE64_URL_SAFE_NO_PAD.encode_string(payload, &mut jwt);\n\n    // create the signature from the encoded header || payload\n    let sig: Signature = sk.sign(jwt.as_bytes());\n\n    jwt.push('.');\n\n    // encode the jwt signature in-place\n    BASE64_URL_SAFE_NO_PAD.encode_string(sig.to_bytes(), &mut jwt);\n\n    debug_assert_eq!(\n        jwt.len(),\n        total_len,\n        \"the jwt len should match our expected len\"\n    );\n    debug_assert_eq!(jwt.capacity(), cap, \"the jwt capacity should not change\");\n\n    jwt\n}\n\n#[cfg(test)]\nmod tests {\n    use ed25519_dalek::SigningKey;\n    use typed_json::json;\n\n    use super::resign_jwt;\n\n    #[test]\n    fn jwt_token_snapshot() {\n        let key = SigningKey::from_bytes(&[1; 32]);\n        let data =\n            json!({\"foo\":\"bar\",\"jti\":\"foo\\nbar\",\"nested\":{\"jti\":\"tricky nesting\"}}).to_string();\n\n        let jwt = resign_jwt(&key, data.as_bytes(), 2).unwrap();\n\n        // To validate the JWT, copy the JWT string and paste it into https://jwt.io/.\n        // In the public-key box, paste the following jwk public key\n        // `{\"kty\":\"OKP\",\"crv\":\"Ed25519\",\"x\":\"iojj3XQJ8ZX9UtstPLpdcspnCb8dlBIb83SIAbQPb1w\"}`\n        // Note - jwt.io doesn't support EdDSA :(\n        // https://github.com/jsonwebtoken/jsonwebtoken.github.io/issues/509\n\n        // let jwk = jose_jwk::Key::Okp(jose_jwk::Okp {\n        //     crv: jose_jwk::OkpCurves::Ed25519,\n        //     x: jose_jwk::jose_b64::serde::Bytes::from(key.verifying_key().to_bytes().to_vec()),\n        //     d: None,\n        // });\n        // println!(\"{}\", serde_json::to_string(&jwk).unwrap());\n\n        assert_eq!(\n            jwt,\n            \"eyJhbGciOiJFZERTQSJ9.eyJmb28iOiJiYXIiLCJqdGkiOjIsIm5lc3RlZCI6eyJqdGkiOiJ0cmlja3kgbmVzdGluZyJ9fQ.Cvyc2By33KI0f0obystwdy8PN111L3Sc9_Mr2CU3XshtSqSdxuRxNEZGbb_RvyJf2IzheC_s7aBZ-jLeQ9N0Bg\"\n        );\n    }\n}\n"
  },
  {
    "path": "proxy/src/serverless/mod.rs",
    "content": "//! Routers for our serverless APIs\n//!\n//! Handles both SQL over HTTP and SQL over Websockets.\n\nmod backend;\npub mod cancel_set;\nmod conn_pool;\nmod conn_pool_lib;\nmod error;\nmod http_conn_pool;\nmod http_util;\nmod json;\nmod local_conn_pool;\n#[cfg(feature = \"rest_broker\")]\npub mod rest;\nmod sql_over_http;\nmod websocket;\n\nuse std::net::{IpAddr, SocketAddr};\nuse std::pin::{Pin, pin};\nuse std::sync::Arc;\n\nuse anyhow::Context;\nuse arc_swap::ArcSwapOption;\nuse async_trait::async_trait;\nuse atomic_take::AtomicTake;\nuse bytes::Bytes;\npub use conn_pool_lib::GlobalConnPoolOptions;\nuse futures::TryFutureExt;\nuse futures::future::{Either, select};\nuse http::{Method, Response, StatusCode};\nuse http_body_util::combinators::BoxBody;\nuse http_body_util::{BodyExt, Empty};\nuse http_util::{NEON_REQUEST_ID, uuid_to_header_value};\nuse http_utils::error::ApiError;\nuse hyper::body::Incoming;\nuse hyper_util::rt::TokioExecutor;\nuse hyper_util::server::conn::auto::Builder;\nuse rand::SeedableRng;\nuse rand::rngs::StdRng;\nuse tokio::io::{AsyncRead, AsyncWrite};\nuse tokio::net::{TcpListener, TcpStream};\nuse tokio::time::timeout;\nuse tokio_rustls::TlsAcceptor;\nuse tokio_util::sync::CancellationToken;\nuse tokio_util::task::TaskTracker;\nuse tracing::{Instrument, info, warn};\n\nuse crate::cancellation::CancellationHandler;\nuse crate::config::{ProxyConfig, ProxyProtocolV2};\nuse crate::context::RequestContext;\nuse crate::ext::TaskExt;\nuse crate::metrics::Metrics;\nuse crate::protocol2::{ConnectHeader, ConnectionInfo, read_proxy_protocol};\nuse crate::rate_limiter::EndpointRateLimiter;\nuse crate::serverless::backend::PoolingBackend;\nuse crate::serverless::http_util::{api_error_into_response, json_response};\nuse crate::util::run_until_cancelled;\n\npub(crate) const SERVERLESS_DRIVER_SNI: &str = \"api\";\npub(crate) const AUTH_BROKER_SNI: &str = \"apiauth\";\n\npub async fn task_main(\n    config: &'static ProxyConfig,\n    auth_backend: &'static crate::auth::Backend<'static, ()>,\n    ws_listener: TcpListener,\n    cancellation_token: CancellationToken,\n    cancellation_handler: Arc<CancellationHandler>,\n    endpoint_rate_limiter: Arc<EndpointRateLimiter>,\n) -> anyhow::Result<()> {\n    scopeguard::defer! {\n        info!(\"websocket server has shut down\");\n    }\n\n    let local_pool = local_conn_pool::LocalConnPool::new(&config.http_config);\n    let conn_pool = conn_pool_lib::GlobalConnPool::new(&config.http_config);\n    {\n        let conn_pool = Arc::clone(&conn_pool);\n        tokio::spawn(async move {\n            conn_pool.gc_worker(StdRng::from_os_rng()).await;\n        });\n    }\n\n    // shutdown the connection pool\n    tokio::spawn({\n        let cancellation_token = cancellation_token.clone();\n        let conn_pool = conn_pool.clone();\n        async move {\n            cancellation_token.cancelled().await;\n            tokio::task::spawn_blocking(move || conn_pool.shutdown())\n                .await\n                .propagate_task_panic();\n        }\n    });\n\n    let http_conn_pool = conn_pool_lib::GlobalConnPool::new(&config.http_config);\n    {\n        let http_conn_pool = Arc::clone(&http_conn_pool);\n        tokio::spawn(async move {\n            http_conn_pool.gc_worker(StdRng::from_os_rng()).await;\n        });\n    }\n\n    // shutdown the connection pool\n    tokio::spawn({\n        let cancellation_token = cancellation_token.clone();\n        let http_conn_pool = http_conn_pool.clone();\n        async move {\n            cancellation_token.cancelled().await;\n            tokio::task::spawn_blocking(move || http_conn_pool.shutdown())\n                .await\n                .propagate_task_panic();\n        }\n    });\n\n    let backend = Arc::new(PoolingBackend {\n        http_conn_pool: Arc::clone(&http_conn_pool),\n        local_pool,\n        pool: Arc::clone(&conn_pool),\n        config,\n        auth_backend,\n        endpoint_rate_limiter: Arc::clone(&endpoint_rate_limiter),\n    });\n    let tls_acceptor: Arc<dyn MaybeTlsAcceptor> = Arc::new(&config.tls_config);\n\n    let connections = tokio_util::task::task_tracker::TaskTracker::new();\n    connections.close(); // allows `connections.wait to complete`\n\n    let cancellations = tokio_util::task::task_tracker::TaskTracker::new();\n    while let Some(res) = run_until_cancelled(ws_listener.accept(), &cancellation_token).await {\n        let (conn, peer_addr) = res.context(\"could not accept TCP stream\")?;\n        if let Err(e) = conn.set_nodelay(true) {\n            tracing::error!(\"could not set nodelay: {e}\");\n            continue;\n        }\n        let conn_id = uuid::Uuid::new_v4();\n        let http_conn_span = tracing::info_span!(\"http_conn\", ?conn_id);\n\n        let n_connections = Metrics::get()\n            .proxy\n            .client_connections\n            .sample(crate::metrics::Protocol::Http);\n        tracing::trace!(?n_connections, threshold = ?config.http_config.client_conn_threshold, \"check\");\n        if n_connections > config.http_config.client_conn_threshold {\n            tracing::trace!(\"attempting to cancel a random connection\");\n            if let Some(token) = config.http_config.cancel_set.take() {\n                tracing::debug!(\"cancelling a random connection\");\n                token.cancel();\n            }\n        }\n\n        let conn_token = cancellation_token.child_token();\n        let tls_acceptor = tls_acceptor.clone();\n        let backend = backend.clone();\n        let connections2 = connections.clone();\n        let cancellation_handler = cancellation_handler.clone();\n        let endpoint_rate_limiter = endpoint_rate_limiter.clone();\n        let cancellations = cancellations.clone();\n        connections.spawn(\n            async move {\n                let conn_token2 = conn_token.clone();\n                let _cancel_guard = config.http_config.cancel_set.insert(conn_id, conn_token2);\n\n                let session_id = uuid::Uuid::new_v4();\n\n                let _gauge = Metrics::get()\n                    .proxy\n                    .client_connections\n                    .guard(crate::metrics::Protocol::Http);\n\n                let startup_result = Box::pin(connection_startup(\n                    config,\n                    tls_acceptor,\n                    session_id,\n                    conn,\n                    peer_addr,\n                ))\n                .await;\n                let Some((conn, conn_info)) = startup_result else {\n                    return;\n                };\n\n                Box::pin(connection_handler(\n                    config,\n                    backend,\n                    connections2,\n                    cancellations,\n                    cancellation_handler,\n                    endpoint_rate_limiter,\n                    conn_token,\n                    conn,\n                    conn_info,\n                    session_id,\n                ))\n                .await;\n            }\n            .instrument(http_conn_span),\n        );\n    }\n\n    connections.wait().await;\n\n    Ok(())\n}\n\npub(crate) trait AsyncReadWrite: AsyncRead + AsyncWrite + Send + 'static {}\nimpl<T: AsyncRead + AsyncWrite + Send + 'static> AsyncReadWrite for T {}\npub(crate) type AsyncRW = Pin<Box<dyn AsyncReadWrite>>;\n\n#[async_trait]\ntrait MaybeTlsAcceptor: Send + Sync + 'static {\n    async fn accept(&self, conn: TcpStream) -> std::io::Result<AsyncRW>;\n}\n\n#[async_trait]\nimpl MaybeTlsAcceptor for &'static ArcSwapOption<crate::config::TlsConfig> {\n    async fn accept(&self, conn: TcpStream) -> std::io::Result<AsyncRW> {\n        match &*self.load() {\n            Some(config) => Ok(Box::pin(\n                TlsAcceptor::from(config.http_config.clone())\n                    .accept(conn)\n                    .await?,\n            )),\n            None => Ok(Box::pin(conn)),\n        }\n    }\n}\n\n/// Handles the TCP startup lifecycle.\n/// 1. Parses PROXY protocol V2\n/// 2. Handles TLS handshake\nasync fn connection_startup(\n    config: &ProxyConfig,\n    tls_acceptor: Arc<dyn MaybeTlsAcceptor>,\n    session_id: uuid::Uuid,\n    conn: TcpStream,\n    peer_addr: SocketAddr,\n) -> Option<(AsyncRW, ConnectionInfo)> {\n    // handle PROXY protocol\n    let (conn, conn_info) = match config.proxy_protocol_v2 {\n        ProxyProtocolV2::Required => {\n            match read_proxy_protocol(conn).await {\n                Err(e) => {\n                    warn!(\"per-client task finished with an error: {e:#}\");\n                    return None;\n                }\n                // our load balancers will not send any more data. let's just exit immediately\n                Ok((_conn, ConnectHeader::Local)) => {\n                    tracing::debug!(\"healthcheck received\");\n                    return None;\n                }\n                Ok((conn, ConnectHeader::Proxy(info))) => (conn, info),\n            }\n        }\n        // ignore the header - it cannot be confused for a postgres or http connection so will\n        // error later.\n        ProxyProtocolV2::Rejected => (\n            conn,\n            ConnectionInfo {\n                addr: peer_addr,\n                extra: None,\n            },\n        ),\n    };\n\n    let has_private_peer_addr = match conn_info.addr.ip() {\n        IpAddr::V4(ip) => ip.is_private(),\n        IpAddr::V6(_) => false,\n    };\n    info!(?session_id, %conn_info, \"accepted new TCP connection\");\n\n    // try upgrade to TLS, but with a timeout.\n    let conn = match timeout(config.handshake_timeout, tls_acceptor.accept(conn)).await {\n        Ok(Ok(conn)) => {\n            info!(?session_id, %conn_info, \"accepted new TLS connection\");\n            conn\n        }\n        // The handshake failed\n        Ok(Err(e)) => {\n            if !has_private_peer_addr {\n                Metrics::get().proxy.tls_handshake_failures.inc();\n            }\n            warn!(?session_id, %conn_info, \"failed to accept TLS connection: {e:?}\");\n            return None;\n        }\n        // The handshake timed out\n        Err(e) => {\n            if !has_private_peer_addr {\n                Metrics::get().proxy.tls_handshake_failures.inc();\n            }\n            warn!(?session_id, %conn_info, \"failed to accept TLS connection: {e:?}\");\n            return None;\n        }\n    };\n\n    Some((conn, conn_info))\n}\n\n/// Handles HTTP connection\n/// 1. With graceful shutdowns\n/// 2. With graceful request cancellation with connection failure\n/// 3. With websocket upgrade support.\n#[allow(clippy::too_many_arguments)]\nasync fn connection_handler(\n    config: &'static ProxyConfig,\n    backend: Arc<PoolingBackend>,\n    connections: TaskTracker,\n    cancellations: TaskTracker,\n    cancellation_handler: Arc<CancellationHandler>,\n    endpoint_rate_limiter: Arc<EndpointRateLimiter>,\n    cancellation_token: CancellationToken,\n    conn: AsyncRW,\n    conn_info: ConnectionInfo,\n    session_id: uuid::Uuid,\n) {\n    let session_id = AtomicTake::new(session_id);\n\n    // Cancel all current inflight HTTP requests if the HTTP connection is closed.\n    let http_cancellation_token = CancellationToken::new();\n    let _cancel_connection = http_cancellation_token.clone().drop_guard();\n\n    let conn_info2 = conn_info.clone();\n    let server = Builder::new(TokioExecutor::new());\n    let conn = server.serve_connection_with_upgrades(\n        hyper_util::rt::TokioIo::new(conn),\n        hyper::service::service_fn(move |req: hyper::Request<Incoming>| {\n            // First HTTP request shares the same session ID\n            let mut session_id = session_id.take().unwrap_or_else(uuid::Uuid::new_v4);\n\n            if matches!(backend.auth_backend, crate::auth::Backend::Local(_)) {\n                // take session_id from request, if given.\n                if let Some(id) = req\n                    .headers()\n                    .get(&NEON_REQUEST_ID)\n                    .and_then(|id| uuid::Uuid::try_parse_ascii(id.as_bytes()).ok())\n                {\n                    session_id = id;\n                }\n            }\n\n            // Cancel the current inflight HTTP request if the requets stream is closed.\n            // This is slightly different to `_cancel_connection` in that\n            // h2 can cancel individual requests with a `RST_STREAM`.\n            let http_request_token = http_cancellation_token.child_token();\n            let cancel_request = http_request_token.clone().drop_guard();\n\n            // `request_handler` is not cancel safe. It expects to be cancelled only at specific times.\n            // By spawning the future, we ensure it never gets cancelled until it decides to.\n            let cancellations = cancellations.clone();\n            let handler = connections.spawn(\n                request_handler(\n                    req,\n                    config,\n                    backend.clone(),\n                    connections.clone(),\n                    cancellation_handler.clone(),\n                    session_id,\n                    conn_info2.clone(),\n                    http_request_token,\n                    endpoint_rate_limiter.clone(),\n                    cancellations,\n                )\n                .in_current_span()\n                .map_ok_or_else(api_error_into_response, |r| r),\n            );\n            async move {\n                let mut res = handler.await;\n                cancel_request.disarm();\n\n                // add the session ID to the response\n                if let Ok(resp) = &mut res {\n                    resp.headers_mut()\n                        .append(&NEON_REQUEST_ID, uuid_to_header_value(session_id));\n                }\n\n                res\n            }\n        }),\n    );\n\n    // On cancellation, trigger the HTTP connection handler to shut down.\n    let res = match select(pin!(cancellation_token.cancelled()), pin!(conn)).await {\n        Either::Left((_cancelled, mut conn)) => {\n            tracing::debug!(%conn_info, \"cancelling connection\");\n            conn.as_mut().graceful_shutdown();\n            conn.await\n        }\n        Either::Right((res, _)) => res,\n    };\n\n    match res {\n        Ok(()) => tracing::info!(%conn_info, \"HTTP connection closed\"),\n        Err(e) => tracing::warn!(%conn_info, \"HTTP connection error {e}\"),\n    }\n}\n\n#[allow(clippy::too_many_arguments)]\nasync fn request_handler(\n    mut request: hyper::Request<Incoming>,\n    config: &'static ProxyConfig,\n    backend: Arc<PoolingBackend>,\n    ws_connections: TaskTracker,\n    cancellation_handler: Arc<CancellationHandler>,\n    session_id: uuid::Uuid,\n    conn_info: ConnectionInfo,\n    // used to cancel in-flight HTTP requests. not used to cancel websockets\n    http_cancellation_token: CancellationToken,\n    endpoint_rate_limiter: Arc<EndpointRateLimiter>,\n    cancellations: TaskTracker,\n) -> Result<Response<BoxBody<Bytes, hyper::Error>>, ApiError> {\n    let host = request\n        .headers()\n        .get(\"host\")\n        .and_then(|h| h.to_str().ok())\n        .and_then(|h| h.split(':').next())\n        .map(|s| s.to_string());\n\n    // Check if the request is a websocket upgrade request.\n    if config.http_config.accept_websockets\n        && framed_websockets::upgrade::is_upgrade_request(&request)\n    {\n        let ctx = RequestContext::new(session_id, conn_info, crate::metrics::Protocol::Ws);\n\n        ctx.set_user_agent(\n            request\n                .headers()\n                .get(hyper::header::USER_AGENT)\n                .and_then(|h| h.to_str().ok())\n                .map(Into::into),\n        );\n\n        let span = ctx.span();\n        info!(parent: &span, \"performing websocket upgrade\");\n\n        let (response, websocket) = framed_websockets::upgrade::upgrade(&mut request)\n            .map_err(|e| ApiError::BadRequest(e.into()))?;\n\n        let cancellations = cancellations.clone();\n        ws_connections.spawn(\n            async move {\n                if let Err(e) = websocket::serve_websocket(\n                    config,\n                    backend.auth_backend,\n                    ctx,\n                    websocket,\n                    cancellation_handler,\n                    endpoint_rate_limiter,\n                    host,\n                    cancellations,\n                )\n                .await\n                {\n                    warn!(\"error in websocket connection: {e:#}\");\n                }\n            }\n            .instrument(span),\n        );\n\n        // Return the response so the spawned future can continue.\n        Ok(response.map(|b| b.map_err(|x| match x {}).boxed()))\n    } else if request.uri().path() == \"/sql\" && *request.method() == Method::POST {\n        let ctx = RequestContext::new(session_id, conn_info, crate::metrics::Protocol::Http);\n        let span = ctx.span();\n\n        let testodrome_id = request\n            .headers()\n            .get(\"X-Neon-Query-ID\")\n            .and_then(|value| value.to_str().ok())\n            .map(|s| s.to_string());\n\n        if let Some(query_id) = testodrome_id {\n            info!(parent: &ctx.span(), \"testodrome query ID: {query_id}\");\n            ctx.set_testodrome_id(query_id.into());\n        }\n\n        sql_over_http::handle(config, ctx, request, backend, http_cancellation_token)\n            .instrument(span)\n            .await\n    } else if request.uri().path() == \"/sql\" && *request.method() == Method::OPTIONS {\n        Response::builder()\n            .header(\"Allow\", \"OPTIONS, POST\")\n            .header(\"Access-Control-Allow-Origin\", \"*\")\n            .header(\n                \"Access-Control-Allow-Headers\",\n                \"Authorization, Neon-Connection-String, Neon-Raw-Text-Output, Neon-Array-Mode, Neon-Pool-Opt-In, Neon-Batch-Read-Only, Neon-Batch-Isolation-Level\",\n            )\n            .header(\"Access-Control-Max-Age\", \"86400\" /* 24 hours */)\n            .status(StatusCode::OK) // 204 is also valid, but see: https://developer.mozilla.org/en-US/docs/Web/HTTP/Methods/OPTIONS#status_code\n            .body(Empty::new().map_err(|x| match x {}).boxed())\n            .map_err(|e| ApiError::InternalServerError(e.into()))\n    } else {\n        #[cfg(feature = \"rest_broker\")]\n        {\n            if config.rest_config.is_rest_broker\n            // we are testing for the path to be /database_name/rest/...\n                && request\n                    .uri()\n                    .path()\n                    .split('/')\n                    .nth(2)\n                    .is_some_and(|part| part.starts_with(\"rest\"))\n            {\n                let ctx =\n                    RequestContext::new(session_id, conn_info, crate::metrics::Protocol::Http);\n                let span = ctx.span();\n\n                let testodrome_id = request\n                    .headers()\n                    .get(\"X-Neon-Query-ID\")\n                    .and_then(|value| value.to_str().ok())\n                    .map(|s| s.to_string());\n\n                if let Some(query_id) = testodrome_id {\n                    info!(parent: &span, \"testodrome query ID: {query_id}\");\n                    ctx.set_testodrome_id(query_id.into());\n                }\n\n                rest::handle(config, ctx, request, backend, http_cancellation_token)\n                    .instrument(span)\n                    .await\n            } else {\n                json_response(StatusCode::BAD_REQUEST, \"query is not supported\")\n            }\n        }\n        #[cfg(not(feature = \"rest_broker\"))]\n        {\n            json_response(StatusCode::BAD_REQUEST, \"query is not supported\")\n        }\n    }\n}\n"
  },
  {
    "path": "proxy/src/serverless/rest.rs",
    "content": "use std::borrow::Cow;\nuse std::collections::HashMap;\nuse std::convert::Infallible;\nuse std::sync::Arc;\n\nuse bytes::Bytes;\nuse http::Method;\nuse http::header::{\n    ACCESS_CONTROL_ALLOW_HEADERS, ACCESS_CONTROL_ALLOW_METHODS, ACCESS_CONTROL_ALLOW_ORIGIN,\n    ACCESS_CONTROL_EXPOSE_HEADERS, ACCESS_CONTROL_MAX_AGE, ACCESS_CONTROL_REQUEST_HEADERS, ALLOW,\n    AUTHORIZATION, CONTENT_TYPE, HOST, ORIGIN,\n};\nuse http_body_util::combinators::BoxBody;\nuse http_body_util::{BodyExt, Empty, Full};\nuse http_utils::error::ApiError;\nuse hyper::body::Incoming;\nuse hyper::http::response::Builder;\nuse hyper::http::{HeaderMap, HeaderName, HeaderValue};\nuse hyper::{Request, Response, StatusCode};\nuse indexmap::IndexMap;\nuse moka::sync::Cache;\nuse ouroboros::self_referencing;\nuse serde::de::DeserializeOwned;\nuse serde::{Deserialize, Deserializer};\nuse serde_json::Value as JsonValue;\nuse serde_json::value::RawValue;\nuse subzero_core::api::ContentType::{ApplicationJSON, Other, SingularJSON, TextCSV};\nuse subzero_core::api::QueryNode::{Delete, FunctionCall, Insert, Update};\nuse subzero_core::api::Resolution::{IgnoreDuplicates, MergeDuplicates};\nuse subzero_core::api::{ApiResponse, ListVal, Payload, Preferences, Representation, SingleVal};\nuse subzero_core::config::{db_allowed_select_functions, db_schemas, role_claim_key};\nuse subzero_core::dynamic_statement::{JoinIterator, param, sql};\nuse subzero_core::error::Error::{\n    self as SubzeroCoreError, ContentTypeError, GucHeadersError, GucStatusError, InternalError,\n    JsonDeserialize, JwtTokenInvalid, NotFound,\n};\nuse subzero_core::error::pg_error_to_status_code;\nuse subzero_core::formatter::Param::{LV, PL, SV, Str, StrOwned};\nuse subzero_core::formatter::postgresql::{fmt_main_query, generate};\nuse subzero_core::formatter::{Param, Snippet, SqlParam};\nuse subzero_core::parser::postgrest::parse;\nuse subzero_core::permissions::{check_safe_functions, replace_select_star};\nuse subzero_core::schema::{\n    DbSchema, POSTGRESQL_INTROSPECTION_SQL, get_postgresql_configuration_query,\n};\nuse subzero_core::{content_range_header, content_range_status};\nuse tokio_util::sync::CancellationToken;\nuse tracing::{error, info};\nuse typed_json::json;\nuse url::form_urlencoded;\n\nuse super::backend::{HttpConnError, LocalProxyConnError, PoolingBackend};\nuse super::conn_pool::AuthData;\nuse super::conn_pool_lib::ConnInfo;\nuse super::error::{ConnInfoError, Credentials, HttpCodeError, ReadPayloadError};\nuse super::http_conn_pool::{self, LocalProxyClient};\nuse super::http_util::{\n    ALLOW_POOL, CONN_STRING, NEON_REQUEST_ID, RAW_TEXT_OUTPUT, TXN_ISOLATION_LEVEL, TXN_READ_ONLY,\n    get_conn_info, json_response, uuid_to_header_value,\n};\nuse super::json::JsonConversionError;\nuse crate::auth::backend::ComputeCredentialKeys;\nuse crate::cache::common::{count_cache_insert, count_cache_outcome, eviction_listener};\nuse crate::config::ProxyConfig;\nuse crate::context::RequestContext;\nuse crate::error::{ErrorKind, ReportableError, UserFacingError};\nuse crate::http::read_body_with_limit;\nuse crate::metrics::{CacheKind, Metrics};\nuse crate::serverless::sql_over_http::HEADER_VALUE_TRUE;\nuse crate::types::EndpointCacheKey;\nuse crate::util::deserialize_json_string;\n\nstatic EMPTY_JSON_SCHEMA: &str = r#\"{\"schemas\":[]}\"#;\nconst INTROSPECTION_SQL: &str = POSTGRESQL_INTROSPECTION_SQL;\nconst HEADER_VALUE_ALLOW_ALL_ORIGINS: HeaderValue = HeaderValue::from_static(\"*\");\n// CORS headers values\nconst ACCESS_CONTROL_ALLOW_METHODS_VALUE: HeaderValue =\n    HeaderValue::from_static(\"GET, POST, PATCH, PUT, DELETE, OPTIONS\");\nconst ACCESS_CONTROL_MAX_AGE_VALUE: HeaderValue = HeaderValue::from_static(\"86400\");\nconst ACCESS_CONTROL_EXPOSE_HEADERS_VALUE: HeaderValue = HeaderValue::from_static(\n    \"Content-Encoding, Content-Location, Content-Range, Content-Type, Date, Location, Server, Transfer-Encoding, Range-Unit\",\n);\nconst ACCESS_CONTROL_ALLOW_HEADERS_VALUE: HeaderValue = HeaderValue::from_static(\"Authorization\");\n\n// A wrapper around the DbSchema that allows for self-referencing\n#[self_referencing]\npub struct DbSchemaOwned {\n    schema_string: String,\n    #[covariant]\n    #[borrows(schema_string)]\n    schema: DbSchema<'this>,\n}\n\nimpl<'de> Deserialize<'de> for DbSchemaOwned {\n    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>\n    where\n        D: Deserializer<'de>,\n    {\n        let s = String::deserialize(deserializer)?;\n        DbSchemaOwned::try_new(s, |s| serde_json::from_str(s))\n            .map_err(<D::Error as serde::de::Error>::custom)\n    }\n}\n\nfn split_comma_separated(s: &str) -> Vec<String> {\n    s.split(',').map(|s| s.trim().to_string()).collect()\n}\n\nfn deserialize_comma_separated<'de, D>(deserializer: D) -> Result<Vec<String>, D::Error>\nwhere\n    D: Deserializer<'de>,\n{\n    let s = String::deserialize(deserializer)?;\n    Ok(split_comma_separated(&s))\n}\n\nfn deserialize_comma_separated_option<'de, D>(\n    deserializer: D,\n) -> Result<Option<Vec<String>>, D::Error>\nwhere\n    D: Deserializer<'de>,\n{\n    let opt = Option::<String>::deserialize(deserializer)?;\n    if let Some(s) = &opt {\n        let trimmed = s.trim();\n        if trimmed.is_empty() {\n            return Ok(None);\n        }\n        return Ok(Some(split_comma_separated(trimmed)));\n    }\n    Ok(None)\n}\n\n// The ApiConfig is the configuration for the API per endpoint\n// The configuration is read from the database and cached in the DbSchemaCache\n#[derive(Deserialize, Debug)]\npub struct ApiConfig {\n    #[serde(\n        default = \"db_schemas\",\n        deserialize_with = \"deserialize_comma_separated\"\n    )]\n    pub db_schemas: Vec<String>,\n    pub db_anon_role: Option<String>,\n    pub db_max_rows: Option<String>,\n    #[serde(default = \"db_allowed_select_functions\")]\n    pub db_allowed_select_functions: Vec<String>,\n    // #[serde(deserialize_with = \"to_tuple\", default)]\n    // pub db_pre_request: Option<(String, String)>,\n    #[allow(dead_code)]\n    #[serde(default = \"role_claim_key\")]\n    pub role_claim_key: String,\n    #[serde(default, deserialize_with = \"deserialize_comma_separated_option\")]\n    pub db_extra_search_path: Option<Vec<String>>,\n    #[serde(default, deserialize_with = \"deserialize_comma_separated_option\")]\n    pub server_cors_allowed_origins: Option<Vec<String>>,\n}\n\n// The DbSchemaCache is a cache of the ApiConfig and DbSchemaOwned for each endpoint\npub(crate) struct DbSchemaCache(Cache<EndpointCacheKey, Arc<(ApiConfig, DbSchemaOwned)>>);\nimpl DbSchemaCache {\n    pub fn new(config: crate::config::CacheOptions) -> Self {\n        let builder = Cache::builder().name(\"schema\");\n        let builder = config.moka(builder);\n\n        let metrics = &Metrics::get().cache;\n        if let Some(size) = config.size {\n            metrics.capacity.set(CacheKind::Schema, size as i64);\n        }\n\n        let builder =\n            builder.eviction_listener(|_k, _v, cause| eviction_listener(CacheKind::Schema, cause));\n\n        Self(builder.build())\n    }\n\n    pub async fn maintain(&self) -> Result<Infallible, anyhow::Error> {\n        let mut ticker = tokio::time::interval(std::time::Duration::from_secs(60));\n        loop {\n            ticker.tick().await;\n            self.0.run_pending_tasks();\n        }\n    }\n\n    pub fn get_cached(\n        &self,\n        endpoint_id: &EndpointCacheKey,\n    ) -> Option<Arc<(ApiConfig, DbSchemaOwned)>> {\n        count_cache_outcome(CacheKind::Schema, self.0.get(endpoint_id))\n    }\n    pub async fn get_remote(\n        &self,\n        endpoint_id: &EndpointCacheKey,\n        auth_header: &HeaderValue,\n        connection_string: &str,\n        client: &mut http_conn_pool::Client<LocalProxyClient>,\n        ctx: &RequestContext,\n        config: &'static ProxyConfig,\n    ) -> Result<Arc<(ApiConfig, DbSchemaOwned)>, RestError> {\n        info!(\"db_schema cache miss for endpoint: {:?}\", endpoint_id);\n        let remote_value = self\n            .internal_get_remote(auth_header, connection_string, client, ctx, config)\n            .await;\n        let (api_config, schema_owned) = match remote_value {\n            Ok((api_config, schema_owned)) => (api_config, schema_owned),\n            Err(e @ RestError::SchemaTooLarge) => {\n                // for the case where the schema is too large, we cache an empty dummy value\n                // all the other requests will fail without triggering the introspection query\n                let schema_owned = serde_json::from_str::<DbSchemaOwned>(EMPTY_JSON_SCHEMA)\n                    .map_err(|e| JsonDeserialize { source: e })?;\n\n                let api_config = ApiConfig {\n                    db_schemas: vec![],\n                    db_anon_role: None,\n                    db_max_rows: None,\n                    db_allowed_select_functions: vec![],\n                    role_claim_key: String::new(),\n                    db_extra_search_path: None,\n                    server_cors_allowed_origins: None,\n                };\n                let value = Arc::new((api_config, schema_owned));\n                count_cache_insert(CacheKind::Schema);\n                self.0.insert(endpoint_id.clone(), value);\n                return Err(e);\n            }\n            Err(e) => {\n                return Err(e);\n            }\n        };\n        let value = Arc::new((api_config, schema_owned));\n        count_cache_insert(CacheKind::Schema);\n        self.0.insert(endpoint_id.clone(), value.clone());\n        Ok(value)\n    }\n    async fn internal_get_remote(\n        &self,\n        auth_header: &HeaderValue,\n        connection_string: &str,\n        client: &mut http_conn_pool::Client<LocalProxyClient>,\n        ctx: &RequestContext,\n        config: &'static ProxyConfig,\n    ) -> Result<(ApiConfig, DbSchemaOwned), RestError> {\n        #[derive(Deserialize)]\n        struct SingleRow<Row> {\n            rows: [Row; 1],\n        }\n\n        #[derive(Deserialize)]\n        struct ConfigRow {\n            #[serde(deserialize_with = \"deserialize_json_string\")]\n            config: ApiConfig,\n        }\n\n        #[derive(Deserialize)]\n        struct SchemaRow {\n            json_schema: DbSchemaOwned,\n        }\n\n        let headers = vec![\n            (&NEON_REQUEST_ID, uuid_to_header_value(ctx.session_id())),\n            (\n                &CONN_STRING,\n                HeaderValue::from_str(connection_string).expect(\n                    \"connection string came from a header, so it must be a valid headervalue\",\n                ),\n            ),\n            (&AUTHORIZATION, auth_header.clone()),\n            (&RAW_TEXT_OUTPUT, HEADER_VALUE_TRUE),\n        ];\n\n        let query = get_postgresql_configuration_query(Some(\"pgrst.pre_config\"));\n        let SingleRow {\n            rows: [ConfigRow { config: api_config }],\n        } = make_local_proxy_request(\n            client,\n            headers.iter().cloned(),\n            QueryData {\n                query: Cow::Owned(query),\n                params: vec![],\n            },\n            config.rest_config.max_schema_size,\n        )\n        .await\n        .map_err(|e| match e {\n            RestError::ReadPayload(ReadPayloadError::BodyTooLarge { .. }) => {\n                RestError::SchemaTooLarge\n            }\n            e => e,\n        })?;\n\n        // now that we have the api_config let's run the second INTROSPECTION_SQL query\n        let SingleRow {\n            rows: [SchemaRow { json_schema }],\n        } = make_local_proxy_request(\n            client,\n            headers,\n            QueryData {\n                query: INTROSPECTION_SQL.into(),\n                params: vec![\n                    serde_json::to_value(&api_config.db_schemas)\n                        .expect(\"Vec<String> is always valid to encode as JSON\"),\n                    JsonValue::Bool(false), // include_roles_with_login\n                    JsonValue::Bool(false), // use_internal_permissions\n                ],\n            },\n            config.rest_config.max_schema_size,\n        )\n        .await\n        .map_err(|e| match e {\n            RestError::ReadPayload(ReadPayloadError::BodyTooLarge { .. }) => {\n                RestError::SchemaTooLarge\n            }\n            e => e,\n        })?;\n\n        Ok((api_config, json_schema))\n    }\n}\n\n// A type to represent a postgresql errors\n// we use our own type (instead of postgres_client::Error) because we get the error from the json response\n#[derive(Debug, thiserror::Error, Deserialize)]\npub(crate) struct PostgresError {\n    pub code: String,\n    pub message: String,\n    pub detail: Option<String>,\n    pub hint: Option<String>,\n}\nimpl HttpCodeError for PostgresError {\n    fn get_http_status_code(&self) -> StatusCode {\n        let status = pg_error_to_status_code(&self.code, true);\n        StatusCode::from_u16(status).unwrap_or(StatusCode::INTERNAL_SERVER_ERROR)\n    }\n}\nimpl ReportableError for PostgresError {\n    fn get_error_kind(&self) -> ErrorKind {\n        ErrorKind::User\n    }\n}\nimpl UserFacingError for PostgresError {\n    fn to_string_client(&self) -> String {\n        if self.code.starts_with(\"PT\") {\n            \"Postgres error\".to_string()\n        } else {\n            self.message.clone()\n        }\n    }\n}\nimpl std::fmt::Display for PostgresError {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        write!(f, \"{}\", self.message)\n    }\n}\n\n// A type to represent errors that can occur in the rest broker\n#[derive(Debug, thiserror::Error)]\npub(crate) enum RestError {\n    #[error(transparent)]\n    ReadPayload(#[from] ReadPayloadError),\n    #[error(transparent)]\n    ConnectCompute(#[from] HttpConnError),\n    #[error(transparent)]\n    ConnInfo(#[from] ConnInfoError),\n    #[error(transparent)]\n    Postgres(#[from] PostgresError),\n    #[error(transparent)]\n    JsonConversion(#[from] JsonConversionError),\n    #[error(transparent)]\n    SubzeroCore(#[from] SubzeroCoreError),\n    #[error(\"schema is too large\")]\n    SchemaTooLarge,\n}\nimpl ReportableError for RestError {\n    fn get_error_kind(&self) -> ErrorKind {\n        match self {\n            RestError::ReadPayload(e) => e.get_error_kind(),\n            RestError::ConnectCompute(e) => e.get_error_kind(),\n            RestError::ConnInfo(e) => e.get_error_kind(),\n            RestError::Postgres(_) => ErrorKind::Postgres,\n            RestError::JsonConversion(_) => ErrorKind::Postgres,\n            RestError::SubzeroCore(_) => ErrorKind::User,\n            RestError::SchemaTooLarge => ErrorKind::User,\n        }\n    }\n}\nimpl UserFacingError for RestError {\n    fn to_string_client(&self) -> String {\n        match self {\n            RestError::ReadPayload(p) => p.to_string(),\n            RestError::ConnectCompute(c) => c.to_string_client(),\n            RestError::ConnInfo(c) => c.to_string_client(),\n            RestError::SchemaTooLarge => self.to_string(),\n            RestError::Postgres(p) => p.to_string_client(),\n            RestError::JsonConversion(_) => \"could not parse postgres response\".to_string(),\n            RestError::SubzeroCore(s) => {\n                // TODO: this is a hack to get the message from the json body\n                let json = s.json_body();\n                let default_message = \"Unknown error\".to_string();\n\n                json.get(\"message\")\n                    .map_or(default_message.clone(), |m| match m {\n                        JsonValue::String(s) => s.clone(),\n                        _ => default_message,\n                    })\n            }\n        }\n    }\n}\nimpl HttpCodeError for RestError {\n    fn get_http_status_code(&self) -> StatusCode {\n        match self {\n            RestError::ReadPayload(e) => e.get_http_status_code(),\n            RestError::ConnectCompute(h) => match h.get_error_kind() {\n                ErrorKind::User => StatusCode::BAD_REQUEST,\n                _ => StatusCode::INTERNAL_SERVER_ERROR,\n            },\n            RestError::ConnInfo(_) => StatusCode::BAD_REQUEST,\n            RestError::Postgres(e) => e.get_http_status_code(),\n            RestError::JsonConversion(_) => StatusCode::INTERNAL_SERVER_ERROR,\n            RestError::SchemaTooLarge => StatusCode::INTERNAL_SERVER_ERROR,\n            RestError::SubzeroCore(e) => {\n                let status = e.status_code();\n                StatusCode::from_u16(status).unwrap_or(StatusCode::INTERNAL_SERVER_ERROR)\n            }\n        }\n    }\n}\n\n// Helper functions for the rest broker\n\nfn fmt_env_query<'a>(env: &'a HashMap<&'a str, &'a str>) -> Snippet<'a> {\n    \"select \"\n        + if env.is_empty() {\n            sql(\"null\")\n        } else {\n            env.iter()\n                .map(|(k, v)| {\n                    \"set_config(\" + param(k as &SqlParam) + \", \" + param(v as &SqlParam) + \", true)\"\n                })\n                .join(\",\")\n        }\n}\n\n// TODO: see about removing the need for cloning the values (inner things are &Cow<str> already)\nfn to_sql_param(p: &Param) -> JsonValue {\n    match p {\n        SV(SingleVal(v, ..)) => JsonValue::String(v.to_string()),\n        Str(v) => JsonValue::String((*v).to_string()),\n        StrOwned(v) => JsonValue::String((*v).clone()),\n        PL(Payload(v, ..)) => JsonValue::String(v.clone().into_owned()),\n        LV(ListVal(v, ..)) => {\n            if v.is_empty() {\n                JsonValue::String(r\"{}\".to_string())\n            } else {\n                JsonValue::String(format!(\n                    \"{{\\\"{}\\\"}}\",\n                    v.iter()\n                        .map(|e| e.replace('\\\\', \"\\\\\\\\\").replace('\\\"', \"\\\\\\\"\"))\n                        .collect::<Vec<_>>()\n                        .join(\"\\\",\\\"\")\n                ))\n            }\n        }\n    }\n}\n\n#[derive(serde::Serialize)]\nstruct QueryData<'a> {\n    query: Cow<'a, str>,\n    params: Vec<JsonValue>,\n}\n\n#[derive(serde::Serialize)]\nstruct BatchQueryData<'a> {\n    queries: Vec<QueryData<'a>>,\n}\n\nasync fn make_local_proxy_request<S: DeserializeOwned>(\n    client: &mut http_conn_pool::Client<LocalProxyClient>,\n    headers: impl IntoIterator<Item = (&HeaderName, HeaderValue)>,\n    body: QueryData<'_>,\n    max_len: usize,\n) -> Result<S, RestError> {\n    let body_string = serde_json::to_string(&body)\n        .map_err(|e| RestError::JsonConversion(JsonConversionError::ParseJsonError(e)))?;\n\n    let response = make_raw_local_proxy_request(client, headers, body_string).await?;\n\n    let response_status = response.status();\n\n    if response_status != StatusCode::OK {\n        return Err(RestError::SubzeroCore(InternalError {\n            message: \"Failed to get endpoint schema\".to_string(),\n        }));\n    }\n\n    // Capture the response body\n    let response_body = crate::http::read_body_with_limit(response.into_body(), max_len)\n        .await\n        .map_err(ReadPayloadError::from)?;\n\n    // Parse the JSON response\n    let response_json: S = serde_json::from_slice(&response_body)\n        .map_err(|e| RestError::SubzeroCore(JsonDeserialize { source: e }))?;\n\n    Ok(response_json)\n}\n\nasync fn make_raw_local_proxy_request(\n    client: &mut http_conn_pool::Client<LocalProxyClient>,\n    headers: impl IntoIterator<Item = (&HeaderName, HeaderValue)>,\n    body: String,\n) -> Result<Response<Incoming>, RestError> {\n    let local_proxy_uri = ::http::Uri::from_static(\"http://proxy.local/sql\");\n    let mut req = Request::builder().method(Method::POST).uri(local_proxy_uri);\n    let req_headers = req.headers_mut().expect(\"failed to get headers\");\n    // Add all provided headers to the request\n    for (header_name, header_value) in headers {\n        req_headers.insert(header_name, header_value.clone());\n    }\n\n    let body_boxed = Full::new(Bytes::from(body))\n        .map_err(|never| match never {}) // Convert Infallible to hyper::Error\n        .boxed();\n\n    let req = req.body(body_boxed).map_err(|_| {\n        RestError::SubzeroCore(InternalError {\n            message: \"Failed to build request\".to_string(),\n        })\n    })?;\n\n    // Send the request to the local proxy\n    client\n        .inner\n        .inner\n        .send_request(req)\n        .await\n        .map_err(LocalProxyConnError::from)\n        .map_err(HttpConnError::from)\n        .map_err(RestError::from)\n}\n\npub(crate) async fn handle(\n    config: &'static ProxyConfig,\n    ctx: RequestContext,\n    request: Request<Incoming>,\n    backend: Arc<PoolingBackend>,\n    cancel: CancellationToken,\n) -> Result<Response<BoxBody<Bytes, hyper::Error>>, ApiError> {\n    let result = handle_inner(cancel, config, &ctx, request, backend).await;\n\n    let response = match result {\n        Ok(r) => {\n            ctx.set_success();\n\n            // Handling the error response from local proxy here\n            if r.status().is_server_error() {\n                let status = r.status();\n\n                let body_bytes = r\n                    .collect()\n                    .await\n                    .map_err(|e| {\n                        ApiError::InternalServerError(anyhow::Error::msg(format!(\n                            \"could not collect http body: {e}\"\n                        )))\n                    })?\n                    .to_bytes();\n\n                if let Ok(mut json_map) =\n                    serde_json::from_slice::<IndexMap<&str, &RawValue>>(&body_bytes)\n                {\n                    let message = json_map.get(\"message\");\n                    if let Some(message) = message {\n                        let msg: String = match serde_json::from_str(message.get()) {\n                            Ok(msg) => msg,\n                            Err(_) => {\n                                \"Unable to parse the response message from server\".to_string()\n                            }\n                        };\n\n                        error!(\"Error response from local_proxy: {status} {msg}\");\n\n                        json_map.retain(|key, _| !key.starts_with(\"neon:\")); // remove all the neon-related keys\n\n                        let resp_json = serde_json::to_string(&json_map)\n                            .unwrap_or(\"failed to serialize the response message\".to_string());\n\n                        return json_response(status, resp_json);\n                    }\n                }\n\n                error!(\"Unable to parse the response message from local_proxy\");\n                return json_response(\n                    status,\n                    json!({ \"message\": \"Unable to parse the response message from server\".to_string() }),\n                );\n            }\n            r\n        }\n        Err(e @ RestError::SubzeroCore(_)) => {\n            let error_kind = e.get_error_kind();\n            ctx.set_error_kind(error_kind);\n\n            tracing::info!(\n                kind=error_kind.to_metric_label(),\n                error=%e,\n                msg=\"subzero core error\",\n                \"forwarding error to user\"\n            );\n\n            let RestError::SubzeroCore(subzero_err) = e else {\n                panic!(\"expected subzero core error\")\n            };\n\n            let json_body = subzero_err.json_body();\n            let status_code = StatusCode::from_u16(subzero_err.status_code())\n                .unwrap_or(StatusCode::INTERNAL_SERVER_ERROR);\n\n            json_response(status_code, json_body)?\n        }\n        Err(e) => {\n            let error_kind = e.get_error_kind();\n            ctx.set_error_kind(error_kind);\n\n            let message = e.to_string_client();\n            let status_code = e.get_http_status_code();\n\n            tracing::info!(\n                kind=error_kind.to_metric_label(),\n                error=%e,\n                msg=message,\n                \"forwarding error to user\"\n            );\n\n            let (code, detail, hint) = match e {\n                RestError::Postgres(e) => (\n                    if e.code.starts_with(\"PT\") {\n                        None\n                    } else {\n                        Some(e.code)\n                    },\n                    e.detail,\n                    e.hint,\n                ),\n                _ => (None, None, None),\n            };\n\n            json_response(\n                status_code,\n                json!({\n                    \"message\": message,\n                    \"code\": code,\n                    \"detail\": detail,\n                    \"hint\": hint,\n                }),\n            )?\n        }\n    };\n\n    Ok(response)\n}\n\nasync fn handle_inner(\n    _cancel: CancellationToken,\n    config: &'static ProxyConfig,\n    ctx: &RequestContext,\n    request: Request<Incoming>,\n    backend: Arc<PoolingBackend>,\n) -> Result<Response<BoxBody<Bytes, hyper::Error>>, RestError> {\n    let _requeset_gauge = Metrics::get()\n        .proxy\n        .connection_requests\n        .guard(ctx.protocol());\n    info!(\n        protocol = %ctx.protocol(),\n        \"handling interactive connection from client\"\n    );\n\n    // Read host from Host, then URI host as fallback\n    // TODO: will this be a problem if behind a load balancer?\n    // TODO: can we use the x-forwarded-host header?\n    let host = request\n        .headers()\n        .get(HOST)\n        .and_then(|v| v.to_str().ok())\n        .unwrap_or_else(|| request.uri().host().unwrap_or(\"\"));\n\n    // a valid path is /database/rest/v1/... so splitting should be [\"\", \"database\", \"rest\", \"v1\", ...]\n    let database_name = request\n        .uri()\n        .path()\n        .split('/')\n        .nth(1)\n        .ok_or(RestError::SubzeroCore(NotFound {\n            target: request.uri().path().to_string(),\n        }))?;\n\n    // we always use the authenticator role to connect to the database\n    let authenticator_role = \"authenticator\";\n\n    // Strip the hostname prefix from the host to get the database hostname\n    let database_host = host.replace(&config.rest_config.hostname_prefix, \"\");\n\n    let connection_string =\n        format!(\"postgresql://{authenticator_role}@{database_host}/{database_name}\");\n\n    let conn_info = get_conn_info(\n        &config.authentication_config,\n        ctx,\n        Some(&connection_string),\n        request.headers(),\n    )?;\n    info!(\n        user = conn_info.conn_info.user_info.user.as_str(),\n        \"credentials\"\n    );\n\n    match conn_info.auth {\n        AuthData::Jwt(jwt) => {\n            let api_prefix = format!(\"/{database_name}/rest/v1/\");\n            handle_rest_inner(\n                config,\n                ctx,\n                &api_prefix,\n                request,\n                &connection_string,\n                conn_info.conn_info,\n                jwt,\n                backend,\n            )\n            .await\n        }\n        AuthData::Password(_) => Err(RestError::ConnInfo(ConnInfoError::MissingCredentials(\n            Credentials::BearerJwt,\n        ))),\n    }\n}\n\nfn apply_common_cors_headers(\n    response: &mut Builder,\n    request_headers: &HeaderMap,\n    allowed_origins: Option<&Vec<String>>,\n) {\n    let request_origin = request_headers\n        .get(ORIGIN)\n        .map(|v| v.to_str().unwrap_or(\"\"));\n\n    let response_allow_origin = match (request_origin, allowed_origins) {\n        (Some(or), Some(allowed_origins)) => {\n            if allowed_origins.iter().any(|o| o == or) {\n                Some(HeaderValue::from_str(or).unwrap_or(HEADER_VALUE_ALLOW_ALL_ORIGINS))\n            } else {\n                None\n            }\n        }\n        (Some(_), None) => Some(HEADER_VALUE_ALLOW_ALL_ORIGINS),\n        _ => None,\n    };\n    if let Some(h) = response.headers_mut() {\n        h.insert(\n            ACCESS_CONTROL_EXPOSE_HEADERS,\n            ACCESS_CONTROL_EXPOSE_HEADERS_VALUE,\n        );\n        if let Some(origin) = response_allow_origin {\n            h.insert(ACCESS_CONTROL_ALLOW_ORIGIN, origin);\n        }\n    }\n}\n\n#[allow(clippy::too_many_arguments)]\nasync fn handle_rest_inner(\n    config: &'static ProxyConfig,\n    ctx: &RequestContext,\n    api_prefix: &str,\n    request: Request<Incoming>,\n    connection_string: &str,\n    conn_info: ConnInfo,\n    jwt: String,\n    backend: Arc<PoolingBackend>,\n) -> Result<Response<BoxBody<Bytes, hyper::Error>>, RestError> {\n    let db_schema_cache =\n        config\n            .rest_config\n            .db_schema_cache\n            .as_ref()\n            .ok_or(RestError::SubzeroCore(InternalError {\n                message: \"DB schema cache is not configured\".to_string(),\n            }))?;\n\n    let endpoint_cache_key = conn_info\n        .endpoint_cache_key()\n        .ok_or(RestError::SubzeroCore(InternalError {\n            message: \"Failed to get endpoint cache key\".to_string(),\n        }))?;\n\n    let (parts, originial_body) = request.into_parts();\n\n    // try and get the cached entry for this endpoint\n    // it contains the api config and the introspected db schema\n    let cached_entry = db_schema_cache.get_cached(&endpoint_cache_key);\n\n    let allowed_origins = cached_entry\n        .as_ref()\n        .and_then(|arc| arc.0.server_cors_allowed_origins.as_ref());\n\n    let mut response = Response::builder();\n    apply_common_cors_headers(&mut response, &parts.headers, allowed_origins);\n\n    // handle the OPTIONS request\n    if parts.method == Method::OPTIONS {\n        let allowed_headers = parts\n            .headers\n            .get(ACCESS_CONTROL_REQUEST_HEADERS)\n            .and_then(|a| a.to_str().ok())\n            .filter(|v| !v.is_empty())\n            .map_or_else(\n                || \"Authorization\".to_string(),\n                |v| format!(\"{v}, Authorization\"),\n            );\n        return response\n            .status(StatusCode::OK)\n            .header(\n                ACCESS_CONTROL_ALLOW_METHODS,\n                ACCESS_CONTROL_ALLOW_METHODS_VALUE,\n            )\n            .header(ACCESS_CONTROL_MAX_AGE, ACCESS_CONTROL_MAX_AGE_VALUE)\n            .header(\n                ACCESS_CONTROL_ALLOW_HEADERS,\n                HeaderValue::from_str(&allowed_headers)\n                    .unwrap_or(ACCESS_CONTROL_ALLOW_HEADERS_VALUE),\n            )\n            .header(ALLOW, ACCESS_CONTROL_ALLOW_METHODS_VALUE)\n            .body(Empty::new().map_err(|x| match x {}).boxed())\n            .map_err(|e| {\n                RestError::SubzeroCore(InternalError {\n                    message: e.to_string(),\n                })\n            });\n    }\n\n    // validate the jwt token\n    let jwt_parsed = backend\n        .authenticate_with_jwt(ctx, &conn_info.user_info, jwt)\n        .await\n        .map_err(HttpConnError::from)?;\n\n    let auth_header = parts\n        .headers\n        .get(AUTHORIZATION)\n        .ok_or(RestError::SubzeroCore(InternalError {\n            message: \"Authorization header is required\".to_string(),\n        }))?;\n    let mut client = backend.connect_to_local_proxy(ctx, conn_info).await?;\n\n    let entry = match cached_entry {\n        Some(e) => e,\n        None => {\n            // if not cached, get the remote entry (will run the introspection query)\n            db_schema_cache\n                .get_remote(\n                    &endpoint_cache_key,\n                    auth_header,\n                    connection_string,\n                    &mut client,\n                    ctx,\n                    config,\n                )\n                .await?\n        }\n    };\n    let (api_config, db_schema_owned) = entry.as_ref();\n\n    let db_schema = db_schema_owned.borrow_schema();\n\n    let db_schemas = &api_config.db_schemas; // list of schemas available for the api\n    let db_extra_search_path = &api_config.db_extra_search_path;\n    // TODO: use this when we get a replacement for jsonpath_lib\n    // let role_claim_key = &api_config.role_claim_key;\n    // let role_claim_path = format!(\"${role_claim_key}\");\n    let db_anon_role = &api_config.db_anon_role;\n    let max_rows = api_config.db_max_rows.as_deref();\n    let db_allowed_select_functions = api_config\n        .db_allowed_select_functions\n        .iter()\n        .map(|s| s.as_str())\n        .collect::<Vec<_>>();\n\n    // extract the jwt claims (we'll need them later to set the role and env)\n    let jwt_claims = match jwt_parsed.keys {\n        ComputeCredentialKeys::JwtPayload(payload_bytes) => {\n            // `payload_bytes` contains the raw JWT payload as Vec<u8>\n            // You can deserialize it back to JSON or parse specific claims\n            let payload: serde_json::Value = serde_json::from_slice(&payload_bytes)\n                .map_err(|e| RestError::SubzeroCore(JsonDeserialize { source: e }))?;\n            Some(payload)\n        }\n        ComputeCredentialKeys::AuthKeys(_) => None,\n    };\n\n    // read the role from the jwt claims (and set it to the \"anon\" role if not present)\n    let (role, authenticated) = match &jwt_claims {\n        Some(claims) => match claims.get(\"role\") {\n            Some(JsonValue::String(r)) => (Some(r), true),\n            _ => (db_anon_role.as_ref(), true),\n        },\n        None => (db_anon_role.as_ref(), false),\n    };\n\n    // do not allow unauthenticated requests when there is no anonymous role setup\n    if let (None, false) = (role, authenticated) {\n        return Err(RestError::SubzeroCore(JwtTokenInvalid {\n            message: \"unauthenticated requests not allowed\".to_string(),\n        }));\n    }\n\n    // start deconstructing the request because subzero core mostly works with &str\n    let method = parts.method;\n    let method_str = method.as_str();\n    let path = parts.uri.path_and_query().map_or(\"/\", |pq| pq.as_str());\n\n    // this is actually the table name (or rpc/function_name)\n    // TODO: rename this to something more descriptive\n    let root = match parts.uri.path().strip_prefix(api_prefix) {\n        Some(p) => Ok(p),\n        None => Err(RestError::SubzeroCore(NotFound {\n            target: parts.uri.path().to_string(),\n        })),\n    }?;\n\n    // pick the current schema from the headers (or the first one from config)\n    let schema_name = &DbSchema::pick_current_schema(db_schemas, method_str, &parts.headers)?;\n\n    // add the content-profile header to the response\n    let mut response_headers = vec![];\n    if db_schemas.len() > 1 {\n        response_headers.push((\"Content-Profile\".to_string(), schema_name.clone()));\n    }\n\n    // parse the query string into a Vec<(&str, &str)>\n    let query = match parts.uri.query() {\n        Some(q) => form_urlencoded::parse(q.as_bytes()).collect(),\n        None => vec![],\n    };\n    let get: Vec<(&str, &str)> = query.iter().map(|(k, v)| (&**k, &**v)).collect();\n\n    // convert the headers map to a HashMap<&str, &str>\n    let headers: HashMap<&str, &str> = parts\n        .headers\n        .iter()\n        .map(|(k, v)| (k.as_str(), v.to_str().unwrap_or(\"__BAD_HEADER__\")))\n        .collect();\n\n    let cookies = HashMap::new(); // TODO: add cookies\n\n    // Read the request body (skip for GET requests)\n    let body_as_string: Option<String> = if method == Method::GET {\n        None\n    } else {\n        let body_bytes =\n            read_body_with_limit(originial_body, config.http_config.max_request_size_bytes)\n                .await\n                .map_err(ReadPayloadError::from)?;\n        if body_bytes.is_empty() {\n            None\n        } else {\n            Some(String::from_utf8_lossy(&body_bytes).into_owned())\n        }\n    };\n\n    // parse the request into an ApiRequest struct\n    let mut api_request = parse(\n        schema_name,\n        root,\n        db_schema,\n        method_str,\n        path,\n        get,\n        body_as_string.as_deref(),\n        headers,\n        cookies,\n        max_rows,\n    )\n    .map_err(RestError::SubzeroCore)?;\n\n    let role_str = match role {\n        Some(r) => r,\n        None => \"\",\n    };\n\n    replace_select_star(db_schema, schema_name, role_str, &mut api_request.query)?;\n\n    // TODO: this is not relevant when acting as PostgREST but will be useful\n    // in the context of DBX where they need internal permissions\n    // if !disable_internal_permissions {\n    //     check_privileges(db_schema, schema_name, role_str, &api_request)?;\n    // }\n\n    check_safe_functions(&api_request, &db_allowed_select_functions)?;\n\n    // TODO: this is not relevant when acting as PostgREST but will be useful\n    // in the context of DBX where they need internal permissions\n    // if !disable_internal_permissions {\n    //     insert_policy_conditions(db_schema, schema_name, role_str, &mut api_request.query)?;\n    // }\n\n    let env_role = Some(role_str);\n\n    // construct the env (passed in to the sql context as GUCs)\n    let empty_json = \"{}\".to_string();\n    let headers_env = serde_json::to_string(&api_request.headers).unwrap_or(empty_json.clone());\n    let cookies_env = serde_json::to_string(&api_request.cookies).unwrap_or(empty_json.clone());\n    let get_env = serde_json::to_string(&api_request.get).unwrap_or(empty_json.clone());\n    let jwt_claims_env = jwt_claims\n        .as_ref()\n        .map(|v| serde_json::to_string(v).unwrap_or(empty_json.clone()))\n        .unwrap_or(if let Some(r) = env_role {\n            let claims: HashMap<&str, &str> = HashMap::from([(\"role\", r)]);\n            serde_json::to_string(&claims).unwrap_or(empty_json.clone())\n        } else {\n            empty_json.clone()\n        });\n    let mut search_path = vec![api_request.schema_name];\n    if let Some(extra) = &db_extra_search_path {\n        search_path.extend(extra.iter().map(|s| s.as_str()));\n    }\n    let search_path_str = search_path\n        .into_iter()\n        .filter(|s| !s.is_empty())\n        .collect::<Vec<_>>()\n        .join(\",\");\n    let mut env: HashMap<&str, &str> = HashMap::from([\n        (\"request.method\", api_request.method),\n        (\"request.path\", api_request.path),\n        (\"request.headers\", &headers_env),\n        (\"request.cookies\", &cookies_env),\n        (\"request.get\", &get_env),\n        (\"request.jwt.claims\", &jwt_claims_env),\n        (\"search_path\", &search_path_str),\n    ]);\n    if let Some(r) = env_role {\n        env.insert(\"role\", r);\n    }\n\n    // generate the sql statements\n    let (env_statement, env_parameters, _) = generate(fmt_env_query(&env));\n    let (main_statement, main_parameters, _) = generate(fmt_main_query(\n        db_schema,\n        api_request.schema_name,\n        &api_request,\n        &env,\n    )?);\n\n    let mut headers = vec![\n        (&NEON_REQUEST_ID, uuid_to_header_value(ctx.session_id())),\n        (\n            &CONN_STRING,\n            HeaderValue::from_str(connection_string).expect(\"invalid connection string\"),\n        ),\n        (&AUTHORIZATION, auth_header.clone()),\n        (\n            &TXN_ISOLATION_LEVEL,\n            HeaderValue::from_static(\"ReadCommitted\"),\n        ),\n        (&ALLOW_POOL, HEADER_VALUE_TRUE),\n    ];\n\n    if api_request.read_only {\n        headers.push((&TXN_READ_ONLY, HEADER_VALUE_TRUE));\n    }\n\n    // convert the parameters from subzero core representation to the local proxy repr.\n    let req_body = serde_json::to_string(&BatchQueryData {\n        queries: vec![\n            QueryData {\n                query: env_statement.into(),\n                params: env_parameters\n                    .iter()\n                    .map(|p| to_sql_param(&p.to_param()))\n                    .collect(),\n            },\n            QueryData {\n                query: main_statement.into(),\n                params: main_parameters\n                    .iter()\n                    .map(|p| to_sql_param(&p.to_param()))\n                    .collect(),\n            },\n        ],\n    })\n    .map_err(|e| RestError::JsonConversion(JsonConversionError::ParseJsonError(e)))?;\n\n    // todo: map body to count egress\n    let _metrics = client.metrics(ctx); // FIXME: is everything in the context set correctly?\n\n    // send the request to the local proxy\n    let proxy_response = make_raw_local_proxy_request(&mut client, headers, req_body).await?;\n    let (response_parts, body) = proxy_response.into_parts();\n\n    let max_response = config.http_config.max_response_size_bytes;\n    let bytes = read_body_with_limit(body, max_response)\n        .await\n        .map_err(ReadPayloadError::from)?;\n\n    // if the response status is greater than 399, then it is an error\n    // FIXME: check if there are other error codes or shapes of the response\n    if response_parts.status.as_u16() > 399 {\n        // turn this postgres error from the json into PostgresError\n        let postgres_error = serde_json::from_slice(&bytes)\n            .map_err(|e| RestError::SubzeroCore(JsonDeserialize { source: e }))?;\n\n        return Err(RestError::Postgres(postgres_error));\n    }\n\n    #[derive(Deserialize)]\n    struct QueryResults {\n        /// we run two queries, so we want only two results.\n        results: (EnvRows, MainRows),\n    }\n\n    /// `env_statement` returns nothing of interest to us\n    #[derive(Deserialize)]\n    struct EnvRows {}\n\n    #[derive(Deserialize)]\n    struct MainRows {\n        /// `main_statement` only returns a single row.\n        rows: [MainRow; 1],\n    }\n\n    #[derive(Deserialize)]\n    struct MainRow {\n        body: String,\n        page_total: Option<String>,\n        total_result_set: Option<String>,\n        response_headers: Option<String>,\n        response_status: Option<String>,\n    }\n\n    let results: QueryResults = serde_json::from_slice(&bytes)\n        .map_err(|e| RestError::SubzeroCore(JsonDeserialize { source: e }))?;\n\n    let QueryResults {\n        results: (_, MainRows { rows: [row] }),\n    } = results;\n\n    // build the intermediate response object\n    let api_response = ApiResponse {\n        page_total: row.page_total.map_or(0, |v| v.parse::<u64>().unwrap_or(0)),\n        total_result_set: row.total_result_set.map(|v| v.parse::<u64>().unwrap_or(0)),\n        top_level_offset: 0, // FIXME: check why this is 0\n        response_headers: row.response_headers,\n        response_status: row.response_status,\n        body: row.body,\n    };\n\n    // TODO: rollback the transaction if the page_total is not 1 and the accept_content_type is SingularJSON\n    // we can not do this in the context of proxy for now\n    // if api_request.accept_content_type == SingularJSON && api_response.page_total != 1 {\n    //     // rollback the transaction here\n    //     return Err(RestError::SubzeroCore(SingularityError {\n    //         count: api_response.page_total,\n    //         content_type: \"application/vnd.pgrst.object+json\".to_string(),\n    //     }));\n    // }\n\n    // TODO: rollback the transaction if the page_total is not 1 and the method is PUT\n    // we can not do this in the context of proxy for now\n    // if api_request.method == Method::PUT && api_response.page_total != 1 {\n    //     // Makes sure the querystring pk matches the payload pk\n    //     // e.g. PUT /items?id=eq.1 { \"id\" : 1, .. } is accepted,\n    //     // PUT /items?id=eq.14 { \"id\" : 2, .. } is rejected.\n    //     // If this condition is not satisfied then nothing is inserted,\n    //     // rollback the transaction here\n    //     return Err(RestError::SubzeroCore(PutMatchingPkError));\n    // }\n\n    // create and return the response to the client\n    // this section mostly deals with setting the right headers according to PostgREST specs\n    let page_total = api_response.page_total;\n    let total_result_set = api_response.total_result_set;\n    let top_level_offset = api_response.top_level_offset;\n    let response_content_type = match (&api_request.accept_content_type, &api_request.query.node) {\n        (SingularJSON, _)\n        | (\n            _,\n            FunctionCall {\n                returns_single: true,\n                is_scalar: false,\n                ..\n            },\n        ) => SingularJSON,\n        (TextCSV, _) => TextCSV,\n        _ => ApplicationJSON,\n    };\n\n    // check if the SQL env set some response headers (happens when we called a rpc function)\n    if let Some(response_headers_str) = api_response.response_headers {\n        let Ok(headers_json) =\n            serde_json::from_str::<Vec<Vec<(String, String)>>>(response_headers_str.as_str())\n        else {\n            return Err(RestError::SubzeroCore(GucHeadersError));\n        };\n\n        response_headers.extend(headers_json.into_iter().flatten());\n    }\n\n    // calculate and set the content range header\n    let lower = top_level_offset as i64;\n    let upper = top_level_offset as i64 + page_total as i64 - 1;\n    let total = total_result_set.map(|t| t as i64);\n    let content_range = match (&method, &api_request.query.node) {\n        (&Method::POST, Insert { .. }) => content_range_header(1, 0, total),\n        (&Method::DELETE, Delete { .. }) => content_range_header(1, upper, total),\n        _ => content_range_header(lower, upper, total),\n    };\n    response_headers.push((\"Content-Range\".to_string(), content_range));\n\n    // calculate the status code\n    #[rustfmt::skip]\n    let mut status = match (&method, &api_request.query.node, page_total, &api_request.preferences) {\n        (&Method::POST,   Insert { .. }, ..) => 201,\n        (&Method::DELETE, Delete { .. }, _, Some(Preferences {representation: Some(Representation::Full),..}),) => 200,\n        (&Method::DELETE, Delete { .. }, ..) => 204,\n        (&Method::PATCH,  Update { columns, .. }, 0, _) if !columns.is_empty() => 404,\n        (&Method::PATCH,  Update { .. }, _,Some(Preferences {representation: Some(Representation::Full),..}),) => 200,\n        (&Method::PATCH,  Update { .. }, ..) => 204,\n        (&Method::PUT,    Insert { .. },_,Some(Preferences {representation: Some(Representation::Full),..}),) => 200,\n        (&Method::PUT,    Insert { .. }, ..) => 204,\n        _ => content_range_status(lower, upper, total),\n    };\n\n    // add the preference-applied header\n    if let Some(Preferences {\n        resolution: Some(r),\n        ..\n    }) = api_request.preferences\n    {\n        response_headers.push((\n            \"Preference-Applied\".to_string(),\n            match r {\n                MergeDuplicates => \"resolution=merge-duplicates\".to_string(),\n                IgnoreDuplicates => \"resolution=ignore-duplicates\".to_string(),\n            },\n        ));\n    }\n\n    // check if the SQL env set some response status (happens when we called a rpc function)\n    if let Some(response_status_str) = api_response.response_status {\n        status = response_status_str\n            .parse::<u16>()\n            .map_err(|_| RestError::SubzeroCore(GucStatusError))?;\n    }\n\n    // set the content type header\n    // TODO: move this to a subzero function\n    // as_header_value(&self) -> Option<&str>\n    let http_content_type = match response_content_type {\n        SingularJSON => Ok(\"application/vnd.pgrst.object+json\"),\n        TextCSV => Ok(\"text/csv\"),\n        ApplicationJSON => Ok(\"application/json\"),\n        Other(t) => Err(RestError::SubzeroCore(ContentTypeError {\n            message: format!(\"None of these Content-Types are available: {t}\"),\n        })),\n    }?;\n\n    // build the response body\n    let response_body = Full::new(Bytes::from(api_response.body))\n        .map_err(|never| match never {})\n        .boxed();\n\n    // build the response\n    response = response\n        .status(StatusCode::from_u16(status).unwrap_or(StatusCode::INTERNAL_SERVER_ERROR))\n        .header(CONTENT_TYPE, http_content_type);\n\n    // Add all headers from response_headers vector\n    for (header_name, header_value) in response_headers {\n        response = response.header(header_name, header_value);\n    }\n\n    // add the body and return the response\n    response.body(response_body).map_err(|_| {\n        RestError::SubzeroCore(InternalError {\n            message: \"Failed to build response\".to_string(),\n        })\n    })\n}\n"
  },
  {
    "path": "proxy/src/serverless/sql_over_http.rs",
    "content": "use std::pin::pin;\nuse std::sync::Arc;\n\nuse bytes::Bytes;\nuse futures::future::{Either, select, try_join};\nuse futures::{StreamExt, TryFutureExt};\nuse http::Method;\nuse http::header::AUTHORIZATION;\nuse http_body_util::combinators::BoxBody;\nuse http_body_util::{BodyExt, Full};\nuse http_utils::error::ApiError;\nuse hyper::body::Incoming;\nuse hyper::http::{HeaderName, HeaderValue};\nuse hyper::{Request, Response, StatusCode, header};\nuse indexmap::IndexMap;\nuse postgres_client::error::{DbError, ErrorPosition, SqlState};\nuse postgres_client::{GenericClient, IsolationLevel, NoTls, ReadyForQueryStatus, Transaction};\nuse serde_json::Value;\nuse serde_json::value::RawValue;\nuse tokio::time::{self, Instant};\nuse tokio_util::sync::CancellationToken;\nuse tracing::{Level, debug, error, info};\nuse typed_json::json;\n\nuse super::backend::{LocalProxyConnError, PoolingBackend};\nuse super::conn_pool::AuthData;\nuse super::conn_pool_lib::{self, ConnInfo};\nuse super::error::{ConnInfoError, HttpCodeError, ReadPayloadError};\nuse super::http_util::{\n    ALLOW_POOL, ARRAY_MODE, CONN_STRING, NEON_REQUEST_ID, RAW_TEXT_OUTPUT, TXN_DEFERRABLE,\n    TXN_ISOLATION_LEVEL, TXN_READ_ONLY, get_conn_info, json_response, uuid_to_header_value,\n};\nuse super::json::{JsonConversionError, json_to_pg_text, pg_text_row_to_json};\nuse crate::auth::backend::ComputeCredentialKeys;\nuse crate::config::{HttpConfig, ProxyConfig};\nuse crate::context::RequestContext;\nuse crate::error::{ErrorKind, ReportableError, UserFacingError};\nuse crate::http::read_body_with_limit;\nuse crate::metrics::{HttpDirection, Metrics};\nuse crate::serverless::backend::HttpConnError;\nuse crate::usage_metrics::{MetricCounter, MetricCounterRecorder};\nuse crate::util::run_until_cancelled;\n\n#[derive(serde::Deserialize)]\n#[serde(rename_all = \"camelCase\")]\nstruct QueryData {\n    query: String,\n    #[serde(deserialize_with = \"bytes_to_pg_text\")]\n    #[serde(default)]\n    params: Vec<Option<String>>,\n    #[serde(default)]\n    array_mode: Option<bool>,\n}\n\n#[derive(serde::Deserialize)]\nstruct BatchQueryData {\n    queries: Vec<QueryData>,\n}\n\n#[derive(serde::Deserialize)]\n#[serde(untagged)]\nenum Payload {\n    Single(QueryData),\n    Batch(BatchQueryData),\n}\n\npub(super) const HEADER_VALUE_TRUE: HeaderValue = HeaderValue::from_static(\"true\");\n\nfn bytes_to_pg_text<'de, D>(deserializer: D) -> Result<Vec<Option<String>>, D::Error>\nwhere\n    D: serde::de::Deserializer<'de>,\n{\n    // TODO: consider avoiding the allocation here.\n    let json: Vec<Value> = serde::de::Deserialize::deserialize(deserializer)?;\n    Ok(json_to_pg_text(json))\n}\n\npub(crate) async fn handle(\n    config: &'static ProxyConfig,\n    ctx: RequestContext,\n    request: Request<Incoming>,\n    backend: Arc<PoolingBackend>,\n    cancel: CancellationToken,\n) -> Result<Response<BoxBody<Bytes, hyper::Error>>, ApiError> {\n    let result = handle_inner(cancel, config, &ctx, request, backend).await;\n\n    let mut response = match result {\n        Ok(r) => {\n            ctx.set_success();\n\n            // Handling the error response from local proxy here\n            if config.authentication_config.is_auth_broker && r.status().is_server_error() {\n                let status = r.status();\n\n                let body_bytes = r\n                    .collect()\n                    .await\n                    .map_err(|e| {\n                        ApiError::InternalServerError(anyhow::Error::msg(format!(\n                            \"could not collect http body: {e}\"\n                        )))\n                    })?\n                    .to_bytes();\n\n                if let Ok(mut json_map) =\n                    serde_json::from_slice::<IndexMap<&str, &RawValue>>(&body_bytes)\n                {\n                    let message = json_map.get(\"message\");\n                    if let Some(message) = message {\n                        let msg: String = match serde_json::from_str(message.get()) {\n                            Ok(msg) => msg,\n                            Err(_) => {\n                                \"Unable to parse the response message from server\".to_string()\n                            }\n                        };\n\n                        error!(\"Error response from local_proxy: {status} {msg}\");\n\n                        json_map.retain(|key, _| !key.starts_with(\"neon:\")); // remove all the neon-related keys\n\n                        let resp_json = serde_json::to_string(&json_map)\n                            .unwrap_or(\"failed to serialize the response message\".to_string());\n\n                        return json_response(status, resp_json);\n                    }\n                }\n\n                error!(\"Unable to parse the response message from local_proxy\");\n                return json_response(\n                    status,\n                    json!({ \"message\": \"Unable to parse the response message from server\".to_string() }),\n                );\n            }\n            r\n        }\n        Err(e @ SqlOverHttpError::Cancelled(_)) => {\n            let error_kind = e.get_error_kind();\n            ctx.set_error_kind(error_kind);\n\n            let message = \"Query cancelled, connection was terminated\";\n\n            tracing::info!(\n                kind=error_kind.to_metric_label(),\n                error=%e,\n                msg=message,\n                \"forwarding error to user\"\n            );\n\n            json_response(\n                StatusCode::BAD_REQUEST,\n                json!({ \"message\": message, \"code\": SqlState::PROTOCOL_VIOLATION.code() }),\n            )?\n        }\n        Err(e) => {\n            let error_kind = e.get_error_kind();\n            ctx.set_error_kind(error_kind);\n\n            let mut message = e.to_string_client();\n            let db_error = match &e {\n                SqlOverHttpError::ConnectCompute(HttpConnError::PostgresConnectionError(e))\n                | SqlOverHttpError::Postgres(e) => e.as_db_error(),\n                _ => None,\n            };\n            fn get<'a, T: Default>(db: Option<&'a DbError>, x: impl FnOnce(&'a DbError) -> T) -> T {\n                db.map(x).unwrap_or_default()\n            }\n\n            if let Some(db_error) = db_error {\n                db_error.message().clone_into(&mut message);\n            }\n\n            let position = db_error.and_then(|db| db.position());\n            let (position, internal_position, internal_query) = match position {\n                Some(ErrorPosition::Original(position)) => (Some(position.to_string()), None, None),\n                Some(ErrorPosition::Internal { position, query }) => {\n                    (None, Some(position.to_string()), Some(query.clone()))\n                }\n                None => (None, None, None),\n            };\n\n            let code = get(db_error, |db| db.code().code());\n            let severity = get(db_error, |db| db.severity());\n            let detail = get(db_error, |db| db.detail());\n            let hint = get(db_error, |db| db.hint());\n            let where_ = get(db_error, |db| db.where_());\n            let table = get(db_error, |db| db.table());\n            let column = get(db_error, |db| db.column());\n            let schema = get(db_error, |db| db.schema());\n            let datatype = get(db_error, |db| db.datatype());\n            let constraint = get(db_error, |db| db.constraint());\n            let file = get(db_error, |db| db.file());\n            let line = get(db_error, |db| db.line().map(|l| l.to_string()));\n            let routine = get(db_error, |db| db.routine());\n\n            if db_error.is_some() && error_kind == ErrorKind::User {\n                // this error contains too much info, and it's not an error we care about.\n                if tracing::enabled!(Level::DEBUG) {\n                    debug!(\n                        kind=error_kind.to_metric_label(),\n                        error=%e,\n                        msg=message,\n                        \"forwarding error to user\"\n                    );\n                } else {\n                    info!(\n                        kind = error_kind.to_metric_label(),\n                        error = \"bad query\",\n                        \"forwarding error to user\"\n                    );\n                }\n            } else {\n                info!(\n                    kind=error_kind.to_metric_label(),\n                    error=%e,\n                    msg=message,\n                    \"forwarding error to user\"\n                );\n            }\n\n            json_response(\n                e.get_http_status_code(),\n                json!({\n                    \"message\": message,\n                    \"code\": code,\n                    \"detail\": detail,\n                    \"hint\": hint,\n                    \"position\": position,\n                    \"internalPosition\": internal_position,\n                    \"internalQuery\": internal_query,\n                    \"severity\": severity,\n                    \"where\": where_,\n                    \"table\": table,\n                    \"column\": column,\n                    \"schema\": schema,\n                    \"dataType\": datatype,\n                    \"constraint\": constraint,\n                    \"file\": file,\n                    \"line\": line,\n                    \"routine\": routine,\n                }),\n            )?\n        }\n    };\n\n    response\n        .headers_mut()\n        .insert(\"Access-Control-Allow-Origin\", HeaderValue::from_static(\"*\"));\n    Ok(response)\n}\n\n#[derive(Debug, thiserror::Error)]\npub(crate) enum SqlOverHttpError {\n    #[error(\"{0}\")]\n    ReadPayload(#[from] ReadPayloadError),\n    #[error(\"{0}\")]\n    ConnectCompute(#[from] HttpConnError),\n    #[error(\"{0}\")]\n    ConnInfo(#[from] ConnInfoError),\n    #[error(\"response is too large (max is {0} bytes)\")]\n    ResponseTooLarge(usize),\n    #[error(\"invalid isolation level\")]\n    InvalidIsolationLevel,\n    /// for queries our customers choose to run\n    #[error(\"{0}\")]\n    Postgres(#[source] postgres_client::Error),\n    /// for queries we choose to run\n    #[error(\"{0}\")]\n    InternalPostgres(#[source] postgres_client::Error),\n    #[error(\"{0}\")]\n    JsonConversion(#[from] JsonConversionError),\n    #[error(\"{0}\")]\n    Cancelled(SqlOverHttpCancel),\n}\n\nimpl ReportableError for SqlOverHttpError {\n    fn get_error_kind(&self) -> ErrorKind {\n        match self {\n            SqlOverHttpError::ReadPayload(e) => e.get_error_kind(),\n            SqlOverHttpError::ConnectCompute(e) => e.get_error_kind(),\n            SqlOverHttpError::ConnInfo(e) => e.get_error_kind(),\n            SqlOverHttpError::ResponseTooLarge(_) => ErrorKind::User,\n            SqlOverHttpError::InvalidIsolationLevel => ErrorKind::User,\n            // customer initiated SQL errors.\n            SqlOverHttpError::Postgres(p) => {\n                if p.as_db_error().is_some() {\n                    ErrorKind::User\n                } else {\n                    ErrorKind::Compute\n                }\n            }\n            // proxy initiated SQL errors.\n            SqlOverHttpError::InternalPostgres(p) => {\n                if p.as_db_error().is_some() {\n                    ErrorKind::Service\n                } else {\n                    ErrorKind::Compute\n                }\n            }\n            // postgres returned a bad row format that we couldn't parse.\n            SqlOverHttpError::JsonConversion(_) => ErrorKind::Postgres,\n            SqlOverHttpError::Cancelled(c) => c.get_error_kind(),\n        }\n    }\n}\n\nimpl UserFacingError for SqlOverHttpError {\n    fn to_string_client(&self) -> String {\n        match self {\n            SqlOverHttpError::ReadPayload(p) => p.to_string(),\n            SqlOverHttpError::ConnectCompute(c) => c.to_string_client(),\n            SqlOverHttpError::ConnInfo(c) => c.to_string_client(),\n            SqlOverHttpError::ResponseTooLarge(_) => self.to_string(),\n            SqlOverHttpError::InvalidIsolationLevel => self.to_string(),\n            SqlOverHttpError::Postgres(p) => p.to_string(),\n            SqlOverHttpError::InternalPostgres(p) => p.to_string(),\n            SqlOverHttpError::JsonConversion(_) => \"could not parse postgres response\".to_string(),\n            SqlOverHttpError::Cancelled(_) => self.to_string(),\n        }\n    }\n}\n\nimpl HttpCodeError for SqlOverHttpError {\n    fn get_http_status_code(&self) -> StatusCode {\n        match self {\n            SqlOverHttpError::ReadPayload(e) => e.get_http_status_code(),\n            SqlOverHttpError::ConnectCompute(h) => match h.get_error_kind() {\n                ErrorKind::User => StatusCode::BAD_REQUEST,\n                _ => StatusCode::INTERNAL_SERVER_ERROR,\n            },\n            SqlOverHttpError::ConnInfo(_) => StatusCode::BAD_REQUEST,\n            SqlOverHttpError::ResponseTooLarge(_) => StatusCode::INSUFFICIENT_STORAGE,\n            SqlOverHttpError::InvalidIsolationLevel => StatusCode::BAD_REQUEST,\n            SqlOverHttpError::Postgres(_) => StatusCode::BAD_REQUEST,\n            SqlOverHttpError::InternalPostgres(_) => StatusCode::INTERNAL_SERVER_ERROR,\n            SqlOverHttpError::JsonConversion(_) => StatusCode::INTERNAL_SERVER_ERROR,\n            SqlOverHttpError::Cancelled(_) => StatusCode::INTERNAL_SERVER_ERROR,\n        }\n    }\n}\n\n#[derive(Debug, thiserror::Error)]\npub(crate) enum SqlOverHttpCancel {\n    #[error(\"query was cancelled\")]\n    Postgres,\n    #[error(\"query was cancelled while stuck trying to connect to the database\")]\n    Connect,\n}\n\nimpl ReportableError for SqlOverHttpCancel {\n    fn get_error_kind(&self) -> ErrorKind {\n        match self {\n            SqlOverHttpCancel::Postgres => ErrorKind::ClientDisconnect,\n            SqlOverHttpCancel::Connect => ErrorKind::ClientDisconnect,\n        }\n    }\n}\n\n#[derive(Clone, Copy, Debug)]\nstruct HttpHeaders {\n    raw_output: bool,\n    default_array_mode: bool,\n    txn_isolation_level: Option<IsolationLevel>,\n    txn_read_only: bool,\n    txn_deferrable: bool,\n}\n\nimpl HttpHeaders {\n    fn try_parse(headers: &hyper::http::HeaderMap) -> Result<Self, SqlOverHttpError> {\n        // Determine the output options. Default behaviour is 'false'. Anything that is not\n        // strictly 'true' assumed to be false.\n        let raw_output = headers.get(&RAW_TEXT_OUTPUT) == Some(&HEADER_VALUE_TRUE);\n        let default_array_mode = headers.get(&ARRAY_MODE) == Some(&HEADER_VALUE_TRUE);\n\n        // isolation level, read only and deferrable\n        let txn_isolation_level = match headers.get(&TXN_ISOLATION_LEVEL) {\n            Some(x) => Some(\n                map_header_to_isolation_level(x).ok_or(SqlOverHttpError::InvalidIsolationLevel)?,\n            ),\n            None => None,\n        };\n\n        let txn_read_only = headers.get(&TXN_READ_ONLY) == Some(&HEADER_VALUE_TRUE);\n        let txn_deferrable = headers.get(&TXN_DEFERRABLE) == Some(&HEADER_VALUE_TRUE);\n\n        Ok(Self {\n            raw_output,\n            default_array_mode,\n            txn_isolation_level,\n            txn_read_only,\n            txn_deferrable,\n        })\n    }\n}\n\nfn map_header_to_isolation_level(level: &HeaderValue) -> Option<IsolationLevel> {\n    match level.as_bytes() {\n        b\"Serializable\" => Some(IsolationLevel::Serializable),\n        b\"ReadUncommitted\" => Some(IsolationLevel::ReadUncommitted),\n        b\"ReadCommitted\" => Some(IsolationLevel::ReadCommitted),\n        b\"RepeatableRead\" => Some(IsolationLevel::RepeatableRead),\n        _ => None,\n    }\n}\n\nfn map_isolation_level_to_headers(level: IsolationLevel) -> Option<HeaderValue> {\n    match level {\n        IsolationLevel::ReadUncommitted => Some(HeaderValue::from_static(\"ReadUncommitted\")),\n        IsolationLevel::ReadCommitted => Some(HeaderValue::from_static(\"ReadCommitted\")),\n        IsolationLevel::RepeatableRead => Some(HeaderValue::from_static(\"RepeatableRead\")),\n        IsolationLevel::Serializable => Some(HeaderValue::from_static(\"Serializable\")),\n        _ => None,\n    }\n}\n\nasync fn handle_inner(\n    cancel: CancellationToken,\n    config: &'static ProxyConfig,\n    ctx: &RequestContext,\n    request: Request<Incoming>,\n    backend: Arc<PoolingBackend>,\n) -> Result<Response<BoxBody<Bytes, hyper::Error>>, SqlOverHttpError> {\n    let _requeset_gauge = Metrics::get()\n        .proxy\n        .connection_requests\n        .guard(ctx.protocol());\n    info!(\n        protocol = %ctx.protocol(),\n        \"handling interactive connection from client\"\n    );\n\n    let conn_info = get_conn_info(&config.authentication_config, ctx, None, request.headers())?;\n    info!(\n        user = conn_info.conn_info.user_info.user.as_str(),\n        \"credentials\"\n    );\n\n    match conn_info.auth {\n        AuthData::Jwt(jwt) if config.authentication_config.is_auth_broker => {\n            handle_auth_broker_inner(ctx, request, conn_info.conn_info, jwt, backend).await\n        }\n        auth => {\n            handle_db_inner(\n                cancel,\n                config,\n                ctx,\n                request,\n                conn_info.conn_info,\n                auth,\n                backend,\n            )\n            .await\n        }\n    }\n}\n\nasync fn handle_db_inner(\n    cancel: CancellationToken,\n    config: &'static ProxyConfig,\n    ctx: &RequestContext,\n    request: Request<Incoming>,\n    conn_info: ConnInfo,\n    auth: AuthData,\n    backend: Arc<PoolingBackend>,\n) -> Result<Response<BoxBody<Bytes, hyper::Error>>, SqlOverHttpError> {\n    //\n    // Determine the destination and connection params\n    //\n    let headers = request.headers();\n\n    // Allow connection pooling only if explicitly requested\n    // or if we have decided that http pool is no longer opt-in\n    let allow_pool = !config.http_config.pool_options.opt_in\n        || headers.get(&ALLOW_POOL) == Some(&HEADER_VALUE_TRUE);\n\n    let parsed_headers = HttpHeaders::try_parse(headers)?;\n\n    let mut request_len = 0;\n    let fetch_and_process_request = Box::pin(\n        async {\n            let body = read_body_with_limit(\n                request.into_body(),\n                config.http_config.max_request_size_bytes,\n            )\n            .await?;\n\n            request_len = body.len();\n\n            Metrics::get()\n                .proxy\n                .http_conn_content_length_bytes\n                .observe(HttpDirection::Request, body.len() as f64);\n\n            debug!(length = body.len(), \"request payload read\");\n            let payload: Payload = serde_json::from_slice(&body)?;\n            Ok::<Payload, ReadPayloadError>(payload) // Adjust error type accordingly\n        }\n        .map_err(SqlOverHttpError::from),\n    );\n\n    let authenticate_and_connect = Box::pin(\n        async {\n            let keys = match auth {\n                AuthData::Password(pw) => backend\n                    .authenticate_with_password(ctx, &conn_info.user_info, &pw)\n                    .await\n                    .map_err(HttpConnError::AuthError)?,\n                AuthData::Jwt(jwt) => backend\n                    .authenticate_with_jwt(ctx, &conn_info.user_info, jwt)\n                    .await\n                    .map_err(HttpConnError::AuthError)?,\n            };\n\n            let client = match keys.keys {\n                ComputeCredentialKeys::JwtPayload(payload)\n                    if backend.auth_backend.is_local_proxy() =>\n                {\n                    #[cfg(feature = \"testing\")]\n                    let disable_pg_session_jwt = config.disable_pg_session_jwt;\n                    #[cfg(not(feature = \"testing\"))]\n                    let disable_pg_session_jwt = false;\n                    let mut client = backend\n                        .connect_to_local_postgres(ctx, conn_info, disable_pg_session_jwt)\n                        .await?;\n                    if !disable_pg_session_jwt {\n                        let (cli_inner, _dsc) = client.client_inner();\n                        cli_inner.set_jwt_session(&payload).await?;\n                    }\n                    Client::Local(client)\n                }\n                _ => {\n                    let client = backend\n                        .connect_to_compute(ctx, conn_info, keys, !allow_pool)\n                        .await?;\n                    Client::Remote(client)\n                }\n            };\n\n            // not strictly necessary to mark success here,\n            // but it's just insurance for if we forget it somewhere else\n            ctx.success();\n            Ok::<_, SqlOverHttpError>(client)\n        }\n        .map_err(SqlOverHttpError::from),\n    );\n\n    let (payload, mut client) = match run_until_cancelled(\n        // Run both operations in parallel\n        try_join(\n            pin!(fetch_and_process_request),\n            pin!(authenticate_and_connect),\n        ),\n        &cancel,\n    )\n    .await\n    {\n        Some(result) => result?,\n        None => return Err(SqlOverHttpError::Cancelled(SqlOverHttpCancel::Connect)),\n    };\n\n    let mut response = Response::builder()\n        .status(StatusCode::OK)\n        .header(header::CONTENT_TYPE, \"application/json\");\n\n    // Now execute the query and return the result.\n    let json_output = match payload {\n        Payload::Single(stmt) => {\n            stmt.process(&config.http_config, cancel, &mut client, parsed_headers)\n                .await?\n        }\n        Payload::Batch(statements) => {\n            if parsed_headers.txn_read_only {\n                response = response.header(TXN_READ_ONLY.clone(), &HEADER_VALUE_TRUE);\n            }\n            if parsed_headers.txn_deferrable {\n                response = response.header(TXN_DEFERRABLE.clone(), &HEADER_VALUE_TRUE);\n            }\n            if let Some(txn_isolation_level) = parsed_headers\n                .txn_isolation_level\n                .and_then(map_isolation_level_to_headers)\n            {\n                response = response.header(TXN_ISOLATION_LEVEL.clone(), txn_isolation_level);\n            }\n\n            statements\n                .process(&config.http_config, cancel, &mut client, parsed_headers)\n                .await?\n        }\n    };\n\n    let metrics = client.metrics(ctx);\n\n    let len = json_output.len();\n    let response = response\n        .body(\n            Full::new(Bytes::from(json_output))\n                .map_err(|x| match x {})\n                .boxed(),\n        )\n        // only fails if invalid status code or invalid header/values are given.\n        // these are not user configurable so it cannot fail dynamically\n        .expect(\"building response payload should not fail\");\n\n    // count the egress bytes - we miss the TLS and header overhead but oh well...\n    // moving this later in the stack is going to be a lot of effort and ehhhh\n    metrics.record_egress(len as u64);\n    metrics.record_ingress(request_len as u64);\n\n    Metrics::get()\n        .proxy\n        .http_conn_content_length_bytes\n        .observe(HttpDirection::Response, len as f64);\n\n    Ok(response)\n}\n\nstatic HEADERS_TO_FORWARD: &[&HeaderName] = &[\n    &AUTHORIZATION,\n    &CONN_STRING,\n    &RAW_TEXT_OUTPUT,\n    &ARRAY_MODE,\n    &TXN_ISOLATION_LEVEL,\n    &TXN_READ_ONLY,\n    &TXN_DEFERRABLE,\n];\n\nasync fn handle_auth_broker_inner(\n    ctx: &RequestContext,\n    request: Request<Incoming>,\n    conn_info: ConnInfo,\n    jwt: String,\n    backend: Arc<PoolingBackend>,\n) -> Result<Response<BoxBody<Bytes, hyper::Error>>, SqlOverHttpError> {\n    backend\n        .authenticate_with_jwt(ctx, &conn_info.user_info, jwt)\n        .await\n        .map_err(HttpConnError::from)?;\n\n    let mut client = backend.connect_to_local_proxy(ctx, conn_info).await?;\n\n    let local_proxy_uri = ::http::Uri::from_static(\"http://proxy.local/sql\");\n\n    let (mut parts, body) = request.into_parts();\n    let mut req = Request::builder().method(Method::POST).uri(local_proxy_uri);\n\n    // todo(conradludgate): maybe auth-broker should parse these and re-serialize\n    // these instead just to ensure they remain normalised.\n    for &h in HEADERS_TO_FORWARD {\n        if let Some(hv) = parts.headers.remove(h) {\n            req = req.header(h, hv);\n        }\n    }\n    req = req.header(&NEON_REQUEST_ID, uuid_to_header_value(ctx.session_id()));\n\n    let req = req\n        .body(body.map_err(|e| e).boxed()) //TODO: is there a potential for a regression here?\n        .expect(\"all headers and params received via hyper should be valid for request\");\n\n    // todo: map body to count egress\n    let _metrics = client.metrics(ctx);\n\n    Ok(client\n        .inner\n        .inner\n        .send_request(req)\n        .await\n        .map_err(LocalProxyConnError::from)\n        .map_err(HttpConnError::from)?\n        .map(|b| b.boxed()))\n}\n\nimpl QueryData {\n    async fn process(\n        self,\n        config: &'static HttpConfig,\n        cancel: CancellationToken,\n        client: &mut Client,\n        parsed_headers: HttpHeaders,\n    ) -> Result<String, SqlOverHttpError> {\n        let (inner, mut discard) = client.inner();\n        let cancel_token = inner.cancel_token();\n\n        let mut json_buf = vec![];\n\n        let batch_result = match select(\n            pin!(query_to_json(\n                config,\n                &mut *inner,\n                self,\n                json::ValueSer::new(&mut json_buf),\n                parsed_headers\n            )),\n            pin!(cancel.cancelled()),\n        )\n        .await\n        {\n            Either::Left((res, __not_yet_cancelled)) => res,\n            Either::Right((_cancelled, query)) => {\n                tracing::info!(\"cancelling query\");\n                if let Err(err) = cancel_token.cancel_query(NoTls).await {\n                    tracing::warn!(?err, \"could not cancel query\");\n                }\n                // wait for the query cancellation\n                match time::timeout(time::Duration::from_millis(100), query).await {\n                    // query successed before it was cancelled.\n                    Ok(Ok(status)) => Ok(status),\n                    // query failed or was cancelled.\n                    Ok(Err(error)) => {\n                        let db_error = match &error {\n                            SqlOverHttpError::ConnectCompute(\n                                HttpConnError::PostgresConnectionError(e),\n                            )\n                            | SqlOverHttpError::Postgres(e) => e.as_db_error(),\n                            _ => None,\n                        };\n\n                        // if errored for some other reason, it might not be safe to return\n                        if !db_error.is_some_and(|e| *e.code() == SqlState::QUERY_CANCELED) {\n                            discard.discard();\n                        }\n\n                        return Err(SqlOverHttpError::Cancelled(SqlOverHttpCancel::Postgres));\n                    }\n                    Err(_timeout) => {\n                        discard.discard();\n                        return Err(SqlOverHttpError::Cancelled(SqlOverHttpCancel::Postgres));\n                    }\n                }\n            }\n        };\n\n        match batch_result {\n            // The query successfully completed.\n            Ok(_) => {\n                let json_output = String::from_utf8(json_buf).expect(\"json should be valid utf8\");\n                Ok(json_output)\n            }\n            // The query failed with an error\n            Err(e) => {\n                discard.discard();\n                Err(e)\n            }\n        }\n    }\n}\n\nimpl BatchQueryData {\n    async fn process(\n        self,\n        config: &'static HttpConfig,\n        cancel: CancellationToken,\n        client: &mut Client,\n        parsed_headers: HttpHeaders,\n    ) -> Result<String, SqlOverHttpError> {\n        info!(\"starting transaction\");\n        let (inner, mut discard) = client.inner();\n        let cancel_token = inner.cancel_token();\n        let mut builder = inner.build_transaction();\n        if let Some(isolation_level) = parsed_headers.txn_isolation_level {\n            builder = builder.isolation_level(isolation_level);\n        }\n        if parsed_headers.txn_read_only {\n            builder = builder.read_only(true);\n        }\n        if parsed_headers.txn_deferrable {\n            builder = builder.deferrable(true);\n        }\n\n        let mut transaction = builder\n            .start()\n            .await\n            .inspect_err(|_| {\n                // if we cannot start a transaction, we should return immediately\n                // and not return to the pool. connection is clearly broken\n                discard.discard();\n            })\n            .map_err(SqlOverHttpError::Postgres)?;\n\n        let json_output = match query_batch_to_json(\n            config,\n            cancel.child_token(),\n            &mut transaction,\n            self,\n            parsed_headers,\n        )\n        .await\n        {\n            Ok(json_output) => {\n                info!(\"commit\");\n                transaction\n                    .commit()\n                    .await\n                    .inspect_err(|_| {\n                        // if we cannot commit - for now don't return connection to pool\n                        // TODO: get a query status from the error\n                        discard.discard();\n                    })\n                    .map_err(SqlOverHttpError::Postgres)?;\n                json_output\n            }\n            Err(SqlOverHttpError::Cancelled(_)) => {\n                if let Err(err) = cancel_token.cancel_query(NoTls).await {\n                    tracing::warn!(?err, \"could not cancel query\");\n                }\n                // TODO: after cancelling, wait to see if we can get a status. maybe the connection is still safe.\n                discard.discard();\n\n                return Err(SqlOverHttpError::Cancelled(SqlOverHttpCancel::Postgres));\n            }\n            Err(err) => {\n                return Err(err);\n            }\n        };\n\n        Ok(json_output)\n    }\n}\n\nasync fn query_batch(\n    config: &'static HttpConfig,\n    cancel: CancellationToken,\n    transaction: &mut Transaction<'_>,\n    queries: BatchQueryData,\n    parsed_headers: HttpHeaders,\n    results: &mut json::ListSer<'_>,\n) -> Result<(), SqlOverHttpError> {\n    for stmt in queries.queries {\n        let query = pin!(query_to_json(\n            config,\n            transaction,\n            stmt,\n            results.entry(),\n            parsed_headers,\n        ));\n        let cancelled = pin!(cancel.cancelled());\n        let res = select(query, cancelled).await;\n        match res {\n            // TODO: maybe we should check that the transaction bit is set here\n            Either::Left((Ok(_), _cancelled)) => {}\n            Either::Left((Err(e), _cancelled)) => {\n                return Err(e);\n            }\n            Either::Right((_cancelled, _)) => {\n                return Err(SqlOverHttpError::Cancelled(SqlOverHttpCancel::Postgres));\n            }\n        }\n    }\n\n    Ok(())\n}\n\nasync fn query_batch_to_json(\n    config: &'static HttpConfig,\n    cancel: CancellationToken,\n    tx: &mut Transaction<'_>,\n    queries: BatchQueryData,\n    headers: HttpHeaders,\n) -> Result<String, SqlOverHttpError> {\n    let json_output = json::value_to_string!(|obj| json::value_as_object!(|obj| {\n        let results = obj.key(\"results\");\n        json::value_as_list!(|results| {\n            query_batch(config, cancel, tx, queries, headers, results).await?;\n        });\n    }));\n\n    Ok(json_output)\n}\n\nasync fn query_to_json<T: GenericClient>(\n    config: &'static HttpConfig,\n    client: &mut T,\n    data: QueryData,\n    output: json::ValueSer<'_>,\n    parsed_headers: HttpHeaders,\n) -> Result<ReadyForQueryStatus, SqlOverHttpError> {\n    let query_start = Instant::now();\n\n    let mut output = json::ObjectSer::new(output);\n    let mut row_stream = client\n        .query_raw_txt(&data.query, data.params)\n        .await\n        .map_err(SqlOverHttpError::Postgres)?;\n    let query_acknowledged = Instant::now();\n\n    let mut json_fields = output.key(\"fields\").list();\n    for c in row_stream.statement.columns() {\n        let json_field = json_fields.entry();\n        json::value_as_object!(|json_field| {\n            json_field.entry(\"name\", c.name());\n            json_field.entry(\"dataTypeID\", c.type_().oid());\n            json_field.entry(\"tableID\", c.table_oid());\n            json_field.entry(\"columnID\", c.column_id());\n            json_field.entry(\"dataTypeSize\", c.type_size());\n            json_field.entry(\"dataTypeModifier\", c.type_modifier());\n            json_field.entry(\"format\", \"text\");\n        });\n    }\n    json_fields.finish();\n\n    let array_mode = data.array_mode.unwrap_or(parsed_headers.default_array_mode);\n    let raw_output = parsed_headers.raw_output;\n\n    // Manually drain the stream into a vector to leave row_stream hanging\n    // around to get a command tag. Also check that the response is not too\n    // big.\n    let mut rows = 0;\n    let mut json_rows = output.key(\"rows\").list();\n    while let Some(row) = row_stream.next().await {\n        let row = row.map_err(SqlOverHttpError::Postgres)?;\n\n        // we don't have a streaming response support yet so this is to prevent OOM\n        // from a malicious query (eg a cross join)\n        if json_rows.as_buffer().len() > config.max_response_size_bytes {\n            return Err(SqlOverHttpError::ResponseTooLarge(\n                config.max_response_size_bytes,\n            ));\n        }\n\n        pg_text_row_to_json(json_rows.entry(), &row, raw_output, array_mode)?;\n        rows += 1;\n\n        // assumption: parsing pg text and converting to json takes CPU time.\n        // let's assume it is slightly expensive, so we should consume some cooperative budget.\n        // Especially considering that `RowStream::next` might be pulling from a batch\n        // of rows and never hit the tokio mpsc for a long time (although unlikely).\n        tokio::task::consume_budget().await;\n    }\n    json_rows.finish();\n\n    let query_resp_end = Instant::now();\n\n    let ready = row_stream.status;\n\n    // grab the command tag and number of rows affected\n    let command_tag = row_stream.command_tag.unwrap_or_default();\n    let mut command_tag_split = command_tag.split(' ');\n    let command_tag_name = command_tag_split.next().unwrap_or_default();\n    let command_tag_count = if command_tag_name == \"INSERT\" {\n        // INSERT returns OID first and then number of rows\n        command_tag_split.nth(1)\n    } else {\n        // other commands return number of rows (if any)\n        command_tag_split.next()\n    }\n    .and_then(|s| s.parse::<i64>().ok());\n\n    info!(\n        rows,\n        ?ready,\n        command_tag,\n        acknowledgement = ?(query_acknowledged - query_start),\n        response = ?(query_resp_end - query_start),\n        \"finished executing query\"\n    );\n\n    output.entry(\"command\", command_tag_name);\n    output.entry(\"rowCount\", command_tag_count);\n    output.entry(\"rowAsArray\", array_mode);\n\n    output.finish();\n    Ok(ready)\n}\n\nenum Client {\n    Remote(conn_pool_lib::Client<postgres_client::Client>),\n    Local(conn_pool_lib::Client<postgres_client::Client>),\n}\n\nenum Discard<'a> {\n    Remote(conn_pool_lib::Discard<'a, postgres_client::Client>),\n    Local(conn_pool_lib::Discard<'a, postgres_client::Client>),\n}\n\nimpl Client {\n    fn metrics(&self, ctx: &RequestContext) -> Arc<MetricCounter> {\n        match self {\n            Client::Remote(client) => client.metrics(ctx),\n            Client::Local(local_client) => local_client.metrics(ctx),\n        }\n    }\n\n    fn inner(&mut self) -> (&mut postgres_client::Client, Discard<'_>) {\n        match self {\n            Client::Remote(client) => {\n                let (c, d) = client.inner();\n                (c, Discard::Remote(d))\n            }\n            Client::Local(local_client) => {\n                let (c, d) = local_client.inner();\n                (c, Discard::Local(d))\n            }\n        }\n    }\n}\n\nimpl Discard<'_> {\n    fn discard(&mut self) {\n        match self {\n            Discard::Remote(discard) => discard.discard(),\n            Discard::Local(discard) => discard.discard(),\n        }\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use super::*;\n\n    #[test]\n    fn test_payload() {\n        let payload = \"{\\\"query\\\":\\\"SELECT * FROM users WHERE name = ?\\\",\\\"params\\\":[\\\"test\\\"],\\\"arrayMode\\\":true}\";\n        let deserialized_payload: Payload = serde_json::from_str(payload).unwrap();\n\n        match deserialized_payload {\n            Payload::Single(QueryData {\n                query,\n                params,\n                array_mode,\n            }) => {\n                assert_eq!(query, \"SELECT * FROM users WHERE name = ?\");\n                assert_eq!(params, vec![Some(String::from(\"test\"))]);\n                assert!(array_mode.unwrap());\n            }\n            Payload::Batch(_) => {\n                panic!(\"deserialization failed: case with single query, one param, and array mode\")\n            }\n        }\n\n        let payload = \"{\\\"queries\\\":[{\\\"query\\\":\\\"SELECT * FROM users0 WHERE name = ?\\\",\\\"params\\\":[\\\"test0\\\"], \\\"arrayMode\\\":false},{\\\"query\\\":\\\"SELECT * FROM users1 WHERE name = ?\\\",\\\"params\\\":[\\\"test1\\\"],\\\"arrayMode\\\":true}]}\";\n        let deserialized_payload: Payload = serde_json::from_str(payload).unwrap();\n\n        match deserialized_payload {\n            Payload::Batch(BatchQueryData { queries }) => {\n                assert_eq!(queries.len(), 2);\n                for (i, query) in queries.into_iter().enumerate() {\n                    assert_eq!(\n                        query.query,\n                        format!(\"SELECT * FROM users{i} WHERE name = ?\")\n                    );\n                    assert_eq!(query.params, vec![Some(format!(\"test{i}\"))]);\n                    assert_eq!(query.array_mode.unwrap(), i > 0);\n                }\n            }\n            Payload::Single(_) => panic!(\"deserialization failed: case with multiple queries\"),\n        }\n\n        let payload = \"{\\\"query\\\":\\\"SELECT 1\\\"}\";\n        let deserialized_payload: Payload = serde_json::from_str(payload).unwrap();\n\n        match deserialized_payload {\n            Payload::Single(QueryData {\n                query,\n                params,\n                array_mode,\n            }) => {\n                assert_eq!(query, \"SELECT 1\");\n                assert_eq!(params, vec![]);\n                assert!(array_mode.is_none());\n            }\n            Payload::Batch(_) => panic!(\"deserialization failed: case with only one query\"),\n        }\n    }\n}\n"
  },
  {
    "path": "proxy/src/serverless/websocket.rs",
    "content": "use std::pin::Pin;\nuse std::sync::Arc;\nuse std::task::{Context, Poll, ready};\n\nuse anyhow::Context as _;\nuse bytes::{Buf, BufMut, Bytes, BytesMut};\nuse framed_websockets::{Frame, OpCode, WebSocketServer};\nuse futures::{Sink, Stream};\nuse hyper::upgrade::OnUpgrade;\nuse hyper_util::rt::TokioIo;\nuse pin_project_lite::pin_project;\nuse tokio::io::{self, AsyncBufRead, AsyncRead, AsyncWrite, ReadBuf};\nuse tracing::warn;\n\nuse crate::cancellation::CancellationHandler;\nuse crate::config::ProxyConfig;\nuse crate::context::RequestContext;\nuse crate::error::ReportableError;\nuse crate::metrics::Metrics;\nuse crate::pglb::{ClientMode, handle_connection};\nuse crate::proxy::ErrorSource;\nuse crate::rate_limiter::EndpointRateLimiter;\n\npin_project! {\n    /// This is a wrapper around a [`WebSocketStream`] that\n    /// implements [`AsyncRead`] and [`AsyncWrite`].\n    pub(crate) struct WebSocketRw<S> {\n        #[pin]\n        stream: WebSocketServer<S>,\n        recv: Bytes,\n        send: BytesMut,\n    }\n}\n\nimpl<S> WebSocketRw<S> {\n    pub(crate) fn new(stream: WebSocketServer<S>) -> Self {\n        Self {\n            stream,\n            recv: Bytes::new(),\n            send: BytesMut::new(),\n        }\n    }\n}\n\nimpl<S: AsyncRead + AsyncWrite + Unpin> AsyncWrite for WebSocketRw<S> {\n    fn poll_write(\n        self: Pin<&mut Self>,\n        cx: &mut Context<'_>,\n        buf: &[u8],\n    ) -> Poll<io::Result<usize>> {\n        let this = self.project();\n        let mut stream = this.stream;\n\n        ready!(stream.as_mut().poll_ready(cx).map_err(io::Error::other))?;\n\n        this.send.put(buf);\n        match stream.as_mut().start_send(Frame::binary(this.send.split())) {\n            Ok(()) => Poll::Ready(Ok(buf.len())),\n            Err(e) => Poll::Ready(Err(io::Error::other(e))),\n        }\n    }\n\n    fn poll_flush(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<io::Result<()>> {\n        let stream = self.project().stream;\n        stream.poll_flush(cx).map_err(io::Error::other)\n    }\n\n    fn poll_shutdown(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<io::Result<()>> {\n        let stream = self.project().stream;\n        stream.poll_close(cx).map_err(io::Error::other)\n    }\n}\n\nimpl<S: AsyncRead + AsyncWrite + Unpin> AsyncRead for WebSocketRw<S> {\n    fn poll_read(\n        mut self: Pin<&mut Self>,\n        cx: &mut Context<'_>,\n        buf: &mut ReadBuf<'_>,\n    ) -> Poll<io::Result<()>> {\n        let bytes = ready!(self.as_mut().poll_fill_buf(cx))?;\n        let len = std::cmp::min(bytes.len(), buf.remaining());\n        buf.put_slice(&bytes[..len]);\n        self.consume(len);\n        Poll::Ready(Ok(()))\n    }\n}\n\nimpl<S: AsyncRead + AsyncWrite + Unpin> AsyncBufRead for WebSocketRw<S> {\n    fn poll_fill_buf(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<io::Result<&[u8]>> {\n        // Please refer to poll_fill_buf's documentation.\n        const EOF: Poll<io::Result<&[u8]>> = Poll::Ready(Ok(&[]));\n\n        let mut this = self.project();\n        loop {\n            if !this.recv.chunk().is_empty() {\n                let chunk = (*this.recv).chunk();\n                return Poll::Ready(Ok(chunk));\n            }\n\n            let res = ready!(this.stream.as_mut().poll_next(cx));\n            match res.transpose().map_err(io::Error::other)? {\n                Some(message) => match message.opcode {\n                    OpCode::Ping => {}\n                    OpCode::Pong => {}\n                    OpCode::Text => {\n                        // We expect to see only binary messages.\n                        let error = \"unexpected text message in the websocket\";\n                        warn!(length = message.payload.len(), error);\n                        return Poll::Ready(Err(io::Error::other(error)));\n                    }\n                    OpCode::Binary | OpCode::Continuation => {\n                        debug_assert!(this.recv.is_empty());\n                        *this.recv = message.payload.freeze();\n                    }\n                    OpCode::Close => return EOF,\n                },\n                None => return EOF,\n            }\n        }\n    }\n\n    fn consume(self: Pin<&mut Self>, amount: usize) {\n        self.project().recv.advance(amount);\n    }\n}\n\n#[allow(clippy::too_many_arguments)]\npub(crate) async fn serve_websocket(\n    config: &'static ProxyConfig,\n    auth_backend: &'static crate::auth::Backend<'static, ()>,\n    ctx: RequestContext,\n    websocket: OnUpgrade,\n    cancellation_handler: Arc<CancellationHandler>,\n    endpoint_rate_limiter: Arc<EndpointRateLimiter>,\n    hostname: Option<String>,\n    cancellations: tokio_util::task::task_tracker::TaskTracker,\n) -> anyhow::Result<()> {\n    let websocket = websocket.await?;\n    let websocket = WebSocketServer::after_handshake(TokioIo::new(websocket));\n\n    let conn_gauge = Metrics::get()\n        .proxy\n        .client_connections\n        .guard(crate::metrics::Protocol::Ws);\n\n    let res = Box::pin(handle_connection(\n        config,\n        auth_backend,\n        &ctx,\n        cancellation_handler,\n        WebSocketRw::new(websocket),\n        ClientMode::Websockets { hostname },\n        endpoint_rate_limiter,\n        conn_gauge,\n        cancellations,\n    ))\n    .await;\n\n    match res {\n        Err(e) => {\n            ctx.set_error_kind(e.get_error_kind());\n            Err(e.into())\n        }\n        Ok(None) => {\n            ctx.set_success();\n            Ok(())\n        }\n        Ok(Some(p)) => {\n            ctx.set_success();\n            ctx.log_connect();\n            match p.proxy_pass().await {\n                Ok(()) => Ok(()),\n                Err(ErrorSource::Client(err)) => Err(err).context(\"client\"),\n                Err(ErrorSource::Compute(err)) => Err(err).context(\"compute\"),\n            }\n        }\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use std::pin::pin;\n\n    use framed_websockets::WebSocketServer;\n    use futures::{SinkExt, StreamExt};\n    use tokio::io::{AsyncReadExt, AsyncWriteExt, duplex};\n    use tokio::task::JoinSet;\n    use tokio_tungstenite::WebSocketStream;\n    use tokio_tungstenite::tungstenite::Message;\n    use tokio_tungstenite::tungstenite::protocol::Role;\n\n    use super::WebSocketRw;\n\n    #[tokio::test]\n    async fn websocket_stream_wrapper_happy_path() {\n        let (stream1, stream2) = duplex(1024);\n\n        let mut js = JoinSet::new();\n\n        js.spawn(async move {\n            let mut client = WebSocketStream::from_raw_socket(stream1, Role::Client, None).await;\n\n            client\n                .send(Message::Binary(b\"hello world\".to_vec()))\n                .await\n                .unwrap();\n\n            let message = client.next().await.unwrap().unwrap();\n            assert_eq!(message, Message::Binary(b\"websockets are cool\".to_vec()));\n\n            client.close(None).await.unwrap();\n        });\n\n        js.spawn(async move {\n            let mut rw = pin!(WebSocketRw::new(WebSocketServer::after_handshake(stream2)));\n\n            let mut buf = vec![0; 1024];\n            let n = rw.read(&mut buf).await.unwrap();\n            assert_eq!(&buf[..n], b\"hello world\");\n\n            rw.write_all(b\"websockets are cool\").await.unwrap();\n            rw.flush().await.unwrap();\n\n            let n = rw.read_to_end(&mut buf).await.unwrap();\n            assert_eq!(n, 0);\n        });\n\n        js.join_next().await.unwrap().unwrap();\n        js.join_next().await.unwrap().unwrap();\n    }\n}\n"
  },
  {
    "path": "proxy/src/signals.rs",
    "content": "use std::convert::Infallible;\n\nuse anyhow::bail;\nuse tokio_util::sync::CancellationToken;\nuse tracing::{info, warn};\n\nuse crate::metrics::{Metrics, ServiceInfo};\n\n/// Handle unix signals appropriately.\npub async fn handle<F>(\n    token: CancellationToken,\n    mut refresh_config: F,\n) -> anyhow::Result<Infallible>\nwhere\n    F: FnMut(),\n{\n    use tokio::signal::unix::{SignalKind, signal};\n\n    let mut hangup = signal(SignalKind::hangup())?;\n    let mut interrupt = signal(SignalKind::interrupt())?;\n    let mut terminate = signal(SignalKind::terminate())?;\n\n    loop {\n        tokio::select! {\n            // Hangup is commonly used for config reload.\n            _ = hangup.recv() => {\n                info!(\"received SIGHUP\");\n                refresh_config();\n            }\n            // Shut down the whole application.\n            _ = interrupt.recv() => {\n                warn!(\"received SIGINT, exiting immediately\");\n                Metrics::get().service.info.set_label(ServiceInfo::terminating());\n                bail!(\"interrupted\");\n            }\n            _ = terminate.recv() => {\n                warn!(\"received SIGTERM, shutting down once all existing connections have closed\");\n                Metrics::get().service.info.set_label(ServiceInfo::terminating());\n                token.cancel();\n            }\n        }\n    }\n}\n"
  },
  {
    "path": "proxy/src/stream.rs",
    "content": "use std::pin::Pin;\nuse std::sync::Arc;\nuse std::{io, task};\n\nuse rustls::ServerConfig;\nuse thiserror::Error;\nuse tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt, ReadBuf};\nuse tokio_rustls::server::TlsStream;\n\nuse crate::error::{ErrorKind, ReportableError, UserFacingError};\nuse crate::metrics::Metrics;\nuse crate::pqproto::{\n    BeMessage, FE_PASSWORD_MESSAGE, FeStartupPacket, SQLSTATE_INTERNAL_ERROR, WriteBuf,\n    read_message, read_startup,\n};\nuse crate::tls::TlsServerEndPoint;\n\n/// Stream wrapper which implements libpq's protocol.\n///\n/// NOTE: This object deliberately doesn't implement [`AsyncRead`]\n/// or [`AsyncWrite`] to prevent subtle errors (e.g. trying\n/// to pass random malformed bytes through the connection).\npub struct PqStream<S> {\n    stream: S,\n    read: Vec<u8>,\n    write: WriteBuf,\n}\n\nimpl<S> PqStream<S> {\n    pub fn get_ref(&self) -> &S {\n        &self.stream\n    }\n\n    /// Construct a new libpq protocol wrapper over a stream without the first startup message.\n    #[cfg(test)]\n    pub fn new_skip_handshake(stream: S) -> Self {\n        Self {\n            stream,\n            read: Vec::new(),\n            write: WriteBuf::new(),\n        }\n    }\n}\n\nimpl<S: AsyncRead + AsyncWrite + Unpin> PqStream<S> {\n    /// Construct a new libpq protocol wrapper and read the first startup message.\n    ///\n    /// This is not cancel safe.\n    pub async fn parse_startup(mut stream: S) -> io::Result<(Self, FeStartupPacket)> {\n        let startup = read_startup(&mut stream).await?;\n        Ok((\n            Self {\n                stream,\n                read: Vec::new(),\n                write: WriteBuf::new(),\n            },\n            startup,\n        ))\n    }\n\n    /// Tell the client that encryption is not supported.\n    ///\n    /// This is not cancel safe\n    pub async fn reject_encryption(&mut self) -> io::Result<FeStartupPacket> {\n        // N for No.\n        self.write.encryption(b'N');\n        self.flush().await?;\n        read_startup(&mut self.stream).await\n    }\n}\n\nimpl<S: AsyncRead + Unpin> PqStream<S> {\n    /// Read a raw postgres packet, which will respect the max length requested.\n    /// This is not cancel safe.\n    async fn read_raw_expect(&mut self, tag: u8, max: u32) -> io::Result<&mut [u8]> {\n        let (actual_tag, msg) = read_message(&mut self.stream, &mut self.read, max).await?;\n        if actual_tag != tag {\n            return Err(io::Error::other(format!(\n                \"incorrect message tag, expected {:?}, got {:?}\",\n                tag as char, actual_tag as char,\n            )));\n        }\n        Ok(msg)\n    }\n\n    /// Read a postgres password message, which will respect the max length requested.\n    /// This is not cancel safe.\n    pub async fn read_password_message(&mut self) -> io::Result<&mut [u8]> {\n        // passwords are usually pretty short\n        // and SASL SCRAM messages are no longer than 256 bytes in my testing\n        // (a few hashes and random bytes, encoded into base64).\n        const MAX_PASSWORD_LENGTH: u32 = 512;\n        self.read_raw_expect(FE_PASSWORD_MESSAGE, MAX_PASSWORD_LENGTH)\n            .await\n    }\n}\n\n#[derive(Debug)]\npub struct ReportedError {\n    source: anyhow::Error,\n    error_kind: ErrorKind,\n}\n\nimpl ReportedError {\n    pub fn new(e: impl UserFacingError + Into<anyhow::Error>) -> Self {\n        let error_kind = e.get_error_kind();\n        Self {\n            source: e.into(),\n            error_kind,\n        }\n    }\n}\n\nimpl std::fmt::Display for ReportedError {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        self.source.fmt(f)\n    }\n}\n\nimpl std::error::Error for ReportedError {\n    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {\n        self.source.source()\n    }\n}\n\nimpl ReportableError for ReportedError {\n    fn get_error_kind(&self) -> ErrorKind {\n        self.error_kind\n    }\n}\n\nimpl<S: AsyncWrite + Unpin> PqStream<S> {\n    /// Tell the client that we are willing to accept SSL.\n    /// This is not cancel safe\n    pub async fn accept_tls(mut self) -> io::Result<S> {\n        // S for SSL.\n        self.write.encryption(b'S');\n        self.flush().await?;\n        Ok(self.stream)\n    }\n\n    /// Assert that we are using direct TLS.\n    pub fn accept_direct_tls(self) -> S {\n        self.stream\n    }\n\n    /// Write a raw message to the internal buffer.\n    pub fn write_raw(&mut self, size_hint: usize, tag: u8, f: impl FnOnce(&mut Vec<u8>)) {\n        self.write.write_raw(size_hint, tag, f);\n    }\n\n    /// Write the message into an internal buffer\n    pub fn write_message(&mut self, message: BeMessage<'_>) {\n        message.write_message(&mut self.write);\n    }\n\n    /// Write the buffer to the socket until we have some more space again.\n    pub async fn write_if_full(&mut self) -> io::Result<()> {\n        while self.write.occupied_len() > 2048 {\n            self.stream.write_buf(&mut self.write).await?;\n        }\n\n        Ok(())\n    }\n\n    /// Flush the output buffer into the underlying stream.\n    ///\n    /// This is cancel safe.\n    pub async fn flush(&mut self) -> io::Result<()> {\n        self.stream.write_all_buf(&mut self.write).await?;\n        self.write.reset();\n\n        self.stream.flush().await?;\n\n        Ok(())\n    }\n\n    /// Flush the output buffer into the underlying stream.\n    ///\n    /// This is cancel safe.\n    pub async fn flush_and_into_inner(mut self) -> io::Result<S> {\n        self.flush().await?;\n        Ok(self.stream)\n    }\n\n    /// Write the error message to the client, then re-throw it.\n    ///\n    /// Trait [`UserFacingError`] acts as an allowlist for error types.\n    /// If `ctx` is provided and has testodrome_id set, error messages will be prefixed according to error kind.\n    pub(crate) async fn throw_error<E>(\n        &mut self,\n        error: E,\n        ctx: Option<&crate::context::RequestContext>,\n    ) -> ReportedError\n    where\n        E: UserFacingError + Into<anyhow::Error>,\n    {\n        let error_kind = error.get_error_kind();\n        let msg = error.to_string_client();\n\n        if error_kind != ErrorKind::RateLimit && error_kind != ErrorKind::User {\n            tracing::info!(\n                kind = error_kind.to_metric_label(),\n                msg,\n                \"forwarding error to user\"\n            );\n        }\n\n        let probe_msg;\n        let mut msg = &*msg;\n        if let Some(ctx) = ctx\n            && ctx.get_testodrome_id().is_some()\n        {\n            let tag = match error_kind {\n                ErrorKind::User => \"client\",\n                ErrorKind::ClientDisconnect => \"client\",\n                ErrorKind::RateLimit => \"proxy\",\n                ErrorKind::ServiceRateLimit => \"proxy\",\n                ErrorKind::Quota => \"proxy\",\n                ErrorKind::Service => \"proxy\",\n                ErrorKind::ControlPlane => \"controlplane\",\n                ErrorKind::Postgres => \"other\",\n                ErrorKind::Compute => \"compute\",\n            };\n            probe_msg = typed_json::json!({\n                \"tag\": tag,\n                \"msg\": msg,\n                \"cold_start_info\": ctx.cold_start_info(),\n            })\n            .to_string();\n            msg = &probe_msg;\n        }\n\n        // TODO: either preserve the error code from postgres, or assign error codes to proxy errors.\n        self.write.write_error(msg, SQLSTATE_INTERNAL_ERROR);\n\n        self.flush()\n            .await\n            .unwrap_or_else(|e| tracing::debug!(\"write_message failed: {e}\"));\n\n        ReportedError::new(error)\n    }\n}\n\n/// Wrapper for upgrading raw streams into secure streams.\npub enum Stream<S> {\n    /// We always begin with a raw stream,\n    /// which may then be upgraded into a secure stream.\n    Raw { raw: S },\n    Tls {\n        /// We box [`TlsStream`] since it can be quite large.\n        tls: Box<TlsStream<S>>,\n        /// Channel binding parameter\n        tls_server_end_point: TlsServerEndPoint,\n    },\n}\n\nimpl<S: Unpin> Unpin for Stream<S> {}\n\nimpl<S> Stream<S> {\n    /// Construct a new instance from a raw stream.\n    pub fn from_raw(raw: S) -> Self {\n        Self::Raw { raw }\n    }\n\n    /// Return SNI hostname when it's available.\n    pub fn sni_hostname(&self) -> Option<&str> {\n        match self {\n            Stream::Raw { .. } => None,\n            Stream::Tls { tls, .. } => tls.get_ref().1.server_name(),\n        }\n    }\n\n    pub(crate) fn tls_server_end_point(&self) -> TlsServerEndPoint {\n        match self {\n            Stream::Raw { .. } => TlsServerEndPoint::Undefined,\n            Stream::Tls {\n                tls_server_end_point,\n                ..\n            } => *tls_server_end_point,\n        }\n    }\n}\n\n#[derive(Debug, Error)]\n#[error(\"Can't upgrade TLS stream\")]\npub enum StreamUpgradeError {\n    #[error(\"Bad state reached: can't upgrade TLS stream\")]\n    AlreadyTls,\n\n    #[error(\"Can't upgrade stream: IO error: {0}\")]\n    Io(#[from] io::Error),\n}\n\nimpl<S: AsyncRead + AsyncWrite + Unpin> Stream<S> {\n    /// If possible, upgrade raw stream into a secure TLS-based stream.\n    pub async fn upgrade(\n        self,\n        cfg: Arc<ServerConfig>,\n        record_handshake_error: bool,\n    ) -> Result<TlsStream<S>, StreamUpgradeError> {\n        match self {\n            Stream::Raw { raw } => Ok(tokio_rustls::TlsAcceptor::from(cfg)\n                .accept(raw)\n                .await\n                .inspect_err(|_| {\n                    if record_handshake_error {\n                        Metrics::get().proxy.tls_handshake_failures.inc();\n                    }\n                })?),\n            Stream::Tls { .. } => Err(StreamUpgradeError::AlreadyTls),\n        }\n    }\n}\n\nimpl<S: AsyncRead + AsyncWrite + Unpin> AsyncRead for Stream<S> {\n    fn poll_read(\n        mut self: Pin<&mut Self>,\n        context: &mut task::Context<'_>,\n        buf: &mut ReadBuf<'_>,\n    ) -> task::Poll<io::Result<()>> {\n        match &mut *self {\n            Self::Raw { raw } => Pin::new(raw).poll_read(context, buf),\n            Self::Tls { tls, .. } => Pin::new(tls).poll_read(context, buf),\n        }\n    }\n}\n\nimpl<S: AsyncRead + AsyncWrite + Unpin> AsyncWrite for Stream<S> {\n    fn poll_write(\n        mut self: Pin<&mut Self>,\n        context: &mut task::Context<'_>,\n        buf: &[u8],\n    ) -> task::Poll<io::Result<usize>> {\n        match &mut *self {\n            Self::Raw { raw } => Pin::new(raw).poll_write(context, buf),\n            Self::Tls { tls, .. } => Pin::new(tls).poll_write(context, buf),\n        }\n    }\n\n    fn poll_flush(\n        mut self: Pin<&mut Self>,\n        context: &mut task::Context<'_>,\n    ) -> task::Poll<io::Result<()>> {\n        match &mut *self {\n            Self::Raw { raw } => Pin::new(raw).poll_flush(context),\n            Self::Tls { tls, .. } => Pin::new(tls).poll_flush(context),\n        }\n    }\n\n    fn poll_shutdown(\n        mut self: Pin<&mut Self>,\n        context: &mut task::Context<'_>,\n    ) -> task::Poll<io::Result<()>> {\n        match &mut *self {\n            Self::Raw { raw } => Pin::new(raw).poll_shutdown(context),\n            Self::Tls { tls, .. } => Pin::new(tls).poll_shutdown(context),\n        }\n    }\n}\n"
  },
  {
    "path": "proxy/src/tls/client_config.rs",
    "content": "use std::env;\nuse std::io::Cursor;\nuse std::path::PathBuf;\nuse std::sync::Arc;\n\nuse anyhow::{Context, bail};\nuse rustls::crypto::ring;\n\n/// We use an internal certificate authority when establishing a TLS connection with compute.\nfn load_internal_certs(store: &mut rustls::RootCertStore) -> anyhow::Result<()> {\n    let Some(ca_file) = env::var_os(\"NEON_INTERNAL_CA_FILE\") else {\n        return Ok(());\n    };\n    let ca_file = PathBuf::from(ca_file);\n\n    let ca = std::fs::read(&ca_file)\n        .with_context(|| format!(\"could not read CA from {}\", ca_file.display()))?;\n\n    for cert in rustls_pemfile::certs(&mut Cursor::new(&*ca)) {\n        store\n            .add(cert.context(\"could not parse internal CA certificate\")?)\n            .context(\"could not parse internal CA certificate\")?;\n    }\n\n    Ok(())\n}\n\n/// For console redirect proxy, we need to establish a connection to compute via pg-sni-router.\n/// pg-sni-router needs TLS and uses a Let's Encrypt signed certificate, so we\n/// load certificates from our native store.\nfn load_native_certs(store: &mut rustls::RootCertStore) -> anyhow::Result<()> {\n    let der_certs = rustls_native_certs::load_native_certs();\n\n    if !der_certs.errors.is_empty() {\n        bail!(\"could not parse certificates: {:?}\", der_certs.errors);\n    }\n\n    store.add_parsable_certificates(der_certs.certs);\n\n    Ok(())\n}\n\nfn load_compute_certs() -> anyhow::Result<Arc<rustls::RootCertStore>> {\n    let mut store = rustls::RootCertStore::empty();\n    load_native_certs(&mut store)?;\n    load_internal_certs(&mut store)?;\n    Ok(Arc::new(store))\n}\n\n/// Loads the root certificates and constructs a client config suitable for connecting to the neon compute.\n/// This function is blocking.\npub fn compute_client_config_with_root_certs() -> anyhow::Result<rustls::ClientConfig> {\n    Ok(\n        rustls::ClientConfig::builder_with_provider(Arc::new(ring::default_provider()))\n            .with_safe_default_protocol_versions()\n            .expect(\"ring should support the default protocol versions\")\n            .with_root_certificates(load_compute_certs()?)\n            .with_no_client_auth(),\n    )\n}\n\n#[cfg(test)]\npub fn compute_client_config_with_certs(\n    certs: impl IntoIterator<Item = rustls::pki_types::CertificateDer<'static>>,\n) -> rustls::ClientConfig {\n    let mut store = rustls::RootCertStore::empty();\n    store.add_parsable_certificates(certs);\n\n    rustls::ClientConfig::builder_with_provider(Arc::new(ring::default_provider()))\n        .with_safe_default_protocol_versions()\n        .expect(\"ring should support the default protocol versions\")\n        .with_root_certificates(store)\n        .with_no_client_auth()\n}\n"
  },
  {
    "path": "proxy/src/tls/mod.rs",
    "content": "pub mod client_config;\npub mod postgres_rustls;\npub mod server_config;\n\nuse anyhow::Context;\nuse base64::Engine as _;\nuse base64::prelude::BASE64_STANDARD;\nuse rustls::pki_types::CertificateDer;\nuse sha2::{Digest, Sha256};\nuse tracing::{error, info};\nuse x509_cert::der::{Reader, SliceReader, oid};\n\n/// <https://github.com/postgres/postgres/blob/ca481d3c9ab7bf69ff0c8d71ad3951d407f6a33c/src/include/libpq/pqcomm.h#L159>\npub const PG_ALPN_PROTOCOL: &[u8] = b\"postgresql\";\n\n/// Channel binding parameter\n///\n/// <https://www.rfc-editor.org/rfc/rfc5929#section-4>\n/// Description: The hash of the TLS server's certificate as it\n/// appears, octet for octet, in the server's Certificate message.  Note\n/// that the Certificate message contains a certificate_list, in which\n/// the first element is the server's certificate.\n///\n/// The hash function is to be selected as follows:\n///\n/// * if the certificate's signatureAlgorithm uses a single hash\n///   function, and that hash function is either MD5 or SHA-1, then use SHA-256;\n///\n/// * if the certificate's signatureAlgorithm uses a single hash\n///   function and that hash function neither MD5 nor SHA-1, then use\n///   the hash function associated with the certificate's\n///   signatureAlgorithm;\n///\n/// * if the certificate's signatureAlgorithm uses no hash functions or\n///   uses multiple hash functions, then this channel binding type's\n///   channel bindings are undefined at this time (updates to is channel\n///   binding type may occur to address this issue if it ever arises).\n#[derive(Debug, Clone, Copy)]\npub enum TlsServerEndPoint {\n    Sha256([u8; 32]),\n    Undefined,\n}\n\nimpl TlsServerEndPoint {\n    pub fn new(cert: &CertificateDer<'_>) -> anyhow::Result<Self> {\n        const SHA256_OIDS: &[oid::ObjectIdentifier] = &[\n            // I'm explicitly not adding MD5 or SHA1 here... They're bad.\n            oid::db::rfc5912::ECDSA_WITH_SHA_256,\n            oid::db::rfc5912::SHA_256_WITH_RSA_ENCRYPTION,\n        ];\n\n        let certificate = SliceReader::new(cert)\n            .context(\"Failed to parse cerficiate\")?\n            .decode::<x509_cert::Certificate>()\n            .context(\"Failed to parse cerficiate\")?;\n\n        let subject = certificate.tbs_certificate.subject;\n        info!(%subject, \"parsing TLS certificate\");\n\n        let oid = certificate.signature_algorithm.oid;\n        if SHA256_OIDS.contains(&oid) {\n            let tls_server_end_point: [u8; 32] = Sha256::new().chain_update(cert).finalize().into();\n            info!(%subject, tls_server_end_point = %BASE64_STANDARD.encode(tls_server_end_point), \"determined channel binding\");\n            Ok(Self::Sha256(tls_server_end_point))\n        } else {\n            error!(%subject, \"unknown channel binding\");\n            Ok(Self::Undefined)\n        }\n    }\n\n    pub fn supported(&self) -> bool {\n        !matches!(self, TlsServerEndPoint::Undefined)\n    }\n}\n"
  },
  {
    "path": "proxy/src/tls/postgres_rustls.rs",
    "content": "use std::convert::TryFrom;\nuse std::sync::Arc;\n\nuse postgres_client::tls::MakeTlsConnect;\nuse rustls::pki_types::{InvalidDnsNameError, ServerName};\nuse tokio::io::{AsyncRead, AsyncWrite};\n\nuse crate::config::ComputeConfig;\n\nmod private {\n    use std::future::Future;\n    use std::io;\n    use std::pin::Pin;\n    use std::task::{Context, Poll};\n\n    use postgres_client::tls::{ChannelBinding, TlsConnect};\n    use rustls::pki_types::ServerName;\n    use tokio::io::{AsyncRead, AsyncWrite, ReadBuf};\n    use tokio_rustls::TlsConnector;\n    use tokio_rustls::client::TlsStream;\n\n    use crate::tls::TlsServerEndPoint;\n\n    pub struct TlsConnectFuture<S> {\n        inner: tokio_rustls::Connect<S>,\n    }\n\n    impl<S> Future for TlsConnectFuture<S>\n    where\n        S: AsyncRead + AsyncWrite + Unpin,\n    {\n        type Output = io::Result<RustlsStream<S>>;\n\n        fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {\n            Pin::new(&mut self.inner)\n                .poll(cx)\n                .map_ok(|s| RustlsStream(Box::new(s)))\n        }\n    }\n\n    pub struct RustlsConnect(pub RustlsConnectData);\n\n    pub struct RustlsConnectData {\n        pub hostname: ServerName<'static>,\n        pub connector: TlsConnector,\n    }\n\n    impl<S> TlsConnect<S> for RustlsConnect\n    where\n        S: AsyncRead + AsyncWrite + Unpin + Send + 'static,\n    {\n        type Stream = RustlsStream<S>;\n        type Error = io::Error;\n        type Future = TlsConnectFuture<S>;\n\n        fn connect(self, stream: S) -> Self::Future {\n            TlsConnectFuture {\n                inner: self.0.connector.connect(self.0.hostname, stream),\n            }\n        }\n    }\n\n    pub struct RustlsStream<S>(Box<TlsStream<S>>);\n\n    impl<S> postgres_client::tls::TlsStream for RustlsStream<S>\n    where\n        S: AsyncRead + AsyncWrite + Unpin,\n    {\n        fn channel_binding(&self) -> ChannelBinding {\n            let (_, session) = self.0.get_ref();\n            match session.peer_certificates() {\n                Some([cert, ..]) => TlsServerEndPoint::new(cert)\n                    .ok()\n                    .and_then(|cb| match cb {\n                        TlsServerEndPoint::Sha256(hash) => Some(hash),\n                        TlsServerEndPoint::Undefined => None,\n                    })\n                    .map_or_else(ChannelBinding::none, |hash| {\n                        ChannelBinding::tls_server_end_point(hash.to_vec())\n                    }),\n                _ => ChannelBinding::none(),\n            }\n        }\n    }\n\n    impl<S> AsyncRead for RustlsStream<S>\n    where\n        S: AsyncRead + AsyncWrite + Unpin,\n    {\n        fn poll_read(\n            mut self: Pin<&mut Self>,\n            cx: &mut Context<'_>,\n            buf: &mut ReadBuf<'_>,\n        ) -> Poll<tokio::io::Result<()>> {\n            Pin::new(&mut self.0).poll_read(cx, buf)\n        }\n    }\n\n    impl<S> AsyncWrite for RustlsStream<S>\n    where\n        S: AsyncRead + AsyncWrite + Unpin,\n    {\n        fn poll_write(\n            mut self: Pin<&mut Self>,\n            cx: &mut Context<'_>,\n            buf: &[u8],\n        ) -> Poll<tokio::io::Result<usize>> {\n            Pin::new(&mut self.0).poll_write(cx, buf)\n        }\n\n        fn poll_flush(\n            mut self: Pin<&mut Self>,\n            cx: &mut Context<'_>,\n        ) -> Poll<tokio::io::Result<()>> {\n            Pin::new(&mut self.0).poll_flush(cx)\n        }\n\n        fn poll_shutdown(\n            mut self: Pin<&mut Self>,\n            cx: &mut Context<'_>,\n        ) -> Poll<tokio::io::Result<()>> {\n            Pin::new(&mut self.0).poll_shutdown(cx)\n        }\n    }\n}\n\nimpl<S> MakeTlsConnect<S> for ComputeConfig\nwhere\n    S: AsyncRead + AsyncWrite + Unpin + Send + 'static,\n{\n    type Stream = private::RustlsStream<S>;\n    type TlsConnect = private::RustlsConnect;\n    type Error = InvalidDnsNameError;\n\n    fn make_tls_connect(&self, hostname: &str) -> Result<Self::TlsConnect, Self::Error> {\n        make_tls_connect(&self.tls, hostname)\n    }\n}\n\npub fn make_tls_connect(\n    tls: &Arc<rustls::ClientConfig>,\n    hostname: &str,\n) -> Result<private::RustlsConnect, InvalidDnsNameError> {\n    ServerName::try_from(hostname).map(|dns_name| {\n        private::RustlsConnect(private::RustlsConnectData {\n            hostname: dns_name.to_owned(),\n            connector: tls.clone().into(),\n        })\n    })\n}\n"
  },
  {
    "path": "proxy/src/tls/server_config.rs",
    "content": "use std::collections::{HashMap, HashSet};\nuse std::path::Path;\nuse std::sync::Arc;\n\nuse anyhow::{Context, bail};\nuse itertools::Itertools;\nuse rustls::crypto::ring::{self, sign};\nuse rustls::pki_types::{CertificateDer, PrivateKeyDer};\nuse rustls::sign::CertifiedKey;\nuse x509_cert::der::{Reader, SliceReader};\n\nuse super::{PG_ALPN_PROTOCOL, TlsServerEndPoint};\n\npub struct TlsConfig {\n    // unfortunate split since we cannot change the ALPN on demand.\n    // <https://github.com/rustls/rustls/issues/2260>\n    pub http_config: Arc<rustls::ServerConfig>,\n    pub pg_config: Arc<rustls::ServerConfig>,\n    pub common_names: HashSet<String>,\n    pub cert_resolver: Arc<CertResolver>,\n}\n\n/// Configure TLS for the main endpoint.\npub fn configure_tls(\n    key_path: &Path,\n    cert_path: &Path,\n    certs_dir: Option<&Path>,\n    allow_tls_keylogfile: bool,\n) -> anyhow::Result<TlsConfig> {\n    // add default certificate\n    let mut cert_resolver = CertResolver::parse_new(key_path, cert_path)?;\n\n    // add extra certificates\n    if let Some(certs_dir) = certs_dir {\n        for entry in std::fs::read_dir(certs_dir)? {\n            let entry = entry?;\n            let path = entry.path();\n            if path.is_dir() {\n                // file names aligned with default cert-manager names\n                let key_path = path.join(\"tls.key\");\n                let cert_path = path.join(\"tls.crt\");\n                if key_path.exists() && cert_path.exists() {\n                    cert_resolver.add_cert_path(&key_path, &cert_path)?;\n                }\n            }\n        }\n    }\n\n    let common_names = cert_resolver.get_common_names();\n\n    let cert_resolver = Arc::new(cert_resolver);\n\n    // allow TLS 1.2 to be compatible with older client libraries\n    let mut config =\n        rustls::ServerConfig::builder_with_provider(Arc::new(ring::default_provider()))\n            .with_protocol_versions(&[&rustls::version::TLS13, &rustls::version::TLS12])\n            .context(\"ring should support TLS1.2 and TLS1.3\")?\n            .with_no_client_auth()\n            .with_cert_resolver(cert_resolver.clone());\n\n    config.alpn_protocols = vec![PG_ALPN_PROTOCOL.to_vec()];\n\n    if allow_tls_keylogfile {\n        // KeyLogFile will check for the SSLKEYLOGFILE environment variable.\n        config.key_log = Arc::new(rustls::KeyLogFile::new());\n    }\n\n    let mut http_config = config.clone();\n    let mut pg_config = config;\n\n    http_config.alpn_protocols = vec![b\"h2\".to_vec(), b\"http/1.1\".to_vec()];\n    pg_config.alpn_protocols = vec![b\"postgresql\".to_vec()];\n\n    Ok(TlsConfig {\n        http_config: Arc::new(http_config),\n        pg_config: Arc::new(pg_config),\n        common_names,\n        cert_resolver,\n    })\n}\n\n#[derive(Debug)]\npub struct CertResolver {\n    certs: HashMap<String, (Arc<rustls::sign::CertifiedKey>, TlsServerEndPoint)>,\n    default: (Arc<rustls::sign::CertifiedKey>, TlsServerEndPoint),\n}\n\nimpl CertResolver {\n    fn parse_new(key_path: &Path, cert_path: &Path) -> anyhow::Result<Self> {\n        let (priv_key, cert_chain) = parse_key_cert(key_path, cert_path)?;\n        Self::new(priv_key, cert_chain)\n    }\n\n    pub fn new(\n        priv_key: PrivateKeyDer<'static>,\n        cert_chain: Vec<CertificateDer<'static>>,\n    ) -> anyhow::Result<Self> {\n        let (common_name, cert, tls_server_end_point) = process_key_cert(priv_key, cert_chain)?;\n\n        let mut certs = HashMap::new();\n        let default = (cert.clone(), tls_server_end_point);\n        certs.insert(common_name, (cert, tls_server_end_point));\n        Ok(Self { certs, default })\n    }\n\n    fn add_cert_path(&mut self, key_path: &Path, cert_path: &Path) -> anyhow::Result<()> {\n        let (priv_key, cert_chain) = parse_key_cert(key_path, cert_path)?;\n        self.add_cert(priv_key, cert_chain)\n    }\n\n    fn add_cert(\n        &mut self,\n        priv_key: PrivateKeyDer<'static>,\n        cert_chain: Vec<CertificateDer<'static>>,\n    ) -> anyhow::Result<()> {\n        let (common_name, cert, tls_server_end_point) = process_key_cert(priv_key, cert_chain)?;\n        self.certs.insert(common_name, (cert, tls_server_end_point));\n        Ok(())\n    }\n\n    pub fn get_common_names(&self) -> HashSet<String> {\n        self.certs.keys().cloned().collect()\n    }\n}\n\nfn parse_key_cert(\n    key_path: &Path,\n    cert_path: &Path,\n) -> anyhow::Result<(PrivateKeyDer<'static>, Vec<CertificateDer<'static>>)> {\n    let priv_key = {\n        let key_bytes = std::fs::read(key_path)\n            .with_context(|| format!(\"Failed to read TLS keys at '{}'\", key_path.display()))?;\n        rustls_pemfile::private_key(&mut &key_bytes[..])\n            .with_context(|| format!(\"Failed to parse TLS keys at '{}'\", key_path.display()))?\n            .with_context(|| format!(\"Failed to parse TLS keys at '{}'\", key_path.display()))?\n    };\n\n    let cert_chain_bytes = std::fs::read(cert_path).context(format!(\n        \"Failed to read TLS cert file at '{}.'\",\n        cert_path.display()\n    ))?;\n\n    let cert_chain = {\n        rustls_pemfile::certs(&mut &cert_chain_bytes[..])\n            .try_collect()\n            .with_context(|| {\n                format!(\n                    \"Failed to read TLS certificate chain from bytes from file at '{}'.\",\n                    cert_path.display()\n                )\n            })?\n    };\n\n    Ok((priv_key, cert_chain))\n}\n\nfn process_key_cert(\n    priv_key: PrivateKeyDer<'static>,\n    cert_chain: Vec<CertificateDer<'static>>,\n) -> anyhow::Result<(String, Arc<CertifiedKey>, TlsServerEndPoint)> {\n    let key = sign::any_supported_type(&priv_key).context(\"invalid private key\")?;\n\n    let first_cert = &cert_chain[0];\n    let tls_server_end_point = TlsServerEndPoint::new(first_cert)?;\n\n    let certificate = SliceReader::new(first_cert)\n        .context(\"Failed to parse cerficiate\")?\n        .decode::<x509_cert::Certificate>()\n        .context(\"Failed to parse cerficiate\")?;\n\n    let common_name = certificate.tbs_certificate.subject.to_string();\n\n    // We need to get the canonical name for this certificate so we can match them against any domain names\n    // seen within the proxy codebase.\n    //\n    // In scram-proxy we use wildcard certificates only, with the database endpoint as the wildcard subdomain, taken from SNI.\n    // We need to remove the wildcard prefix for the purposes of certificate selection.\n    //\n    // auth-broker does not use SNI and instead uses the Neon-Connection-String header.\n    // Auth broker has the subdomain `apiauth` we need to remove for the purposes of validating the Neon-Connection-String.\n    //\n    // Console Redirect proxy does not use any wildcard domains and does not need any certificate selection or conn string\n    // validation, so let's we can continue with any common-name\n    let common_name = if let Some(s) = common_name.strip_prefix(\"CN=*.\") {\n        s.to_string()\n    } else if let Some(s) = common_name.strip_prefix(\"CN=apiauth.\") {\n        s.to_string()\n    } else if let Some(s) = common_name.strip_prefix(\"CN=\") {\n        s.to_string()\n    } else {\n        bail!(\"Failed to parse common name from certificate\")\n    };\n\n    let cert = Arc::new(rustls::sign::CertifiedKey::new(cert_chain, key));\n\n    Ok((common_name, cert, tls_server_end_point))\n}\n\nimpl rustls::server::ResolvesServerCert for CertResolver {\n    fn resolve(\n        &self,\n        client_hello: rustls::server::ClientHello<'_>,\n    ) -> Option<Arc<rustls::sign::CertifiedKey>> {\n        Some(self.resolve(client_hello.server_name()).0)\n    }\n}\n\nimpl CertResolver {\n    pub fn resolve(\n        &self,\n        server_name: Option<&str>,\n    ) -> (Arc<rustls::sign::CertifiedKey>, TlsServerEndPoint) {\n        // loop here and cut off more and more subdomains until we find\n        // a match to get a proper wildcard support. OTOH, we now do not\n        // use nested domains, so keep this simple for now.\n        //\n        // With the current coding foo.com will match *.foo.com and that\n        // repeats behavior of the old code.\n        if let Some(mut sni_name) = server_name {\n            loop {\n                if let Some(cert) = self.certs.get(sni_name) {\n                    return cert.clone();\n                }\n                if let Some((_, rest)) = sni_name.split_once('.') {\n                    sni_name = rest;\n                } else {\n                    // The customer has some custom DNS mapping - just return\n                    // a default certificate.\n                    //\n                    // This will error if the customer uses anything stronger\n                    // than sslmode=require. That's a choice they can make.\n                    return self.default.clone();\n                }\n            }\n        } else {\n            // No SNI, use the default certificate, otherwise we can't get to\n            // options parameter which can be used to set endpoint name too.\n            // That means that non-SNI flow will not work for CNAME domains in\n            // verify-full mode.\n            //\n            // If that will be a problem we can:\n            //\n            // a) Instead of multi-cert approach use single cert with extra\n            //    domains listed in Subject Alternative Name (SAN).\n            // b) Deploy separate proxy instances for extra domains.\n            self.default.clone()\n        }\n    }\n}\n"
  },
  {
    "path": "proxy/src/types.rs",
    "content": "use crate::intern::{EndpointIdInt, EndpointIdTag, InternId};\n\nmacro_rules! smol_str_wrapper {\n    ($name:ident) => {\n        #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Default)]\n        pub struct $name(smol_str::SmolStr);\n\n        impl $name {\n            #[allow(unused)]\n            pub(crate) fn as_str(&self) -> &str {\n                self.0.as_str()\n            }\n        }\n\n        impl std::fmt::Display for $name {\n            fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n                self.0.fmt(f)\n            }\n        }\n\n        impl<T> std::cmp::PartialEq<T> for $name\n        where\n            smol_str::SmolStr: std::cmp::PartialEq<T>,\n        {\n            fn eq(&self, other: &T) -> bool {\n                self.0.eq(other)\n            }\n        }\n\n        impl<T> From<T> for $name\n        where\n            smol_str::SmolStr: From<T>,\n        {\n            fn from(x: T) -> Self {\n                Self(x.into())\n            }\n        }\n\n        impl AsRef<str> for $name {\n            fn as_ref(&self) -> &str {\n                self.0.as_ref()\n            }\n        }\n\n        impl std::ops::Deref for $name {\n            type Target = str;\n            fn deref(&self) -> &str {\n                &*self.0\n            }\n        }\n\n        impl<'de> serde::de::Deserialize<'de> for $name {\n            fn deserialize<D: serde::de::Deserializer<'de>>(d: D) -> Result<Self, D::Error> {\n                <smol_str::SmolStr as serde::de::Deserialize<'de>>::deserialize(d).map(Self)\n            }\n        }\n\n        impl serde::Serialize for $name {\n            fn serialize<S: serde::Serializer>(&self, s: S) -> Result<S::Ok, S::Error> {\n                self.0.serialize(s)\n            }\n        }\n    };\n}\n\nconst POOLER_SUFFIX: &str = \"-pooler\";\npub(crate) const LOCAL_PROXY_SUFFIX: &str = \"-local-proxy\";\n\nimpl EndpointId {\n    #[must_use]\n    fn normalize_str(&self) -> &str {\n        if let Some(stripped) = self.as_ref().strip_suffix(POOLER_SUFFIX) {\n            stripped\n        } else if let Some(stripped) = self.as_ref().strip_suffix(LOCAL_PROXY_SUFFIX) {\n            stripped\n        } else {\n            self\n        }\n    }\n\n    #[must_use]\n    pub fn normalize(&self) -> Self {\n        self.normalize_str().into()\n    }\n\n    #[must_use]\n    pub fn normalize_intern(&self) -> EndpointIdInt {\n        EndpointIdTag::get_interner().get_or_intern(self.normalize_str())\n    }\n}\n\n// 90% of role name strings are 20 characters or less.\nsmol_str_wrapper!(RoleName);\n// 50% of endpoint strings are 23 characters or less.\nsmol_str_wrapper!(EndpointId);\n// 50% of branch strings are 23 characters or less.\nsmol_str_wrapper!(BranchId);\n// 90% of project strings are 23 characters or less.\nsmol_str_wrapper!(ProjectId);\n// 90% of account strings are 23 characters or less.\nsmol_str_wrapper!(AccountId);\n\n// will usually equal endpoint ID\nsmol_str_wrapper!(EndpointCacheKey);\n\nsmol_str_wrapper!(DbName);\n\n// postgres hostname, will likely be a port:ip addr\nsmol_str_wrapper!(Host);\n"
  },
  {
    "path": "proxy/src/url.rs",
    "content": "use anyhow::bail;\n\n/// A [url](url::Url) type with additional guarantees.\n#[repr(transparent)]\n#[derive(Debug, Clone, PartialEq, Eq)]\npub struct ApiUrl(url::Url);\n\nimpl ApiUrl {\n    /// Consume the wrapper and return inner [url](url::Url).\n    pub(crate) fn into_inner(self) -> url::Url {\n        self.0\n    }\n\n    /// See [`url::Url::path_segments_mut`].\n    pub(crate) fn path_segments_mut(&mut self) -> url::PathSegmentsMut<'_> {\n        // We've already verified that it works during construction.\n        self.0.path_segments_mut().expect(\"bad API url\")\n    }\n}\n\n/// This instance imposes additional requirements on the url.\nimpl std::str::FromStr for ApiUrl {\n    type Err = anyhow::Error;\n\n    fn from_str(s: &str) -> anyhow::Result<Self> {\n        let mut url: url::Url = s.parse()?;\n\n        // Make sure that we can build upon this URL.\n        if url.path_segments_mut().is_err() {\n            bail!(\"bad API url provided\");\n        }\n\n        Ok(Self(url))\n    }\n}\n\n/// This instance is safe because it doesn't allow us to modify the object.\nimpl std::ops::Deref for ApiUrl {\n    type Target = url::Url;\n\n    fn deref(&self) -> &Self::Target {\n        &self.0\n    }\n}\n\nimpl std::ops::DerefMut for ApiUrl {\n    fn deref_mut(&mut self) -> &mut Self::Target {\n        &mut self.0\n    }\n}\n\nimpl std::fmt::Display for ApiUrl {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        self.0.fmt(f)\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use super::*;\n\n    #[test]\n    fn bad_url() {\n        let url = \"test:foobar\";\n        url.parse::<url::Url>().expect(\"unexpected parsing failure\");\n        url.parse::<ApiUrl>().expect_err(\"should not parse\");\n    }\n\n    #[test]\n    fn good_url() {\n        let url = \"test://foobar\";\n        let mut a = url.parse::<url::Url>().expect(\"unexpected parsing failure\");\n        let mut b = url.parse::<ApiUrl>().expect(\"unexpected parsing failure\");\n\n        a.path_segments_mut().unwrap().push(\"method\");\n        b.path_segments_mut().push(\"method\");\n\n        assert_eq!(a, b.into_inner());\n    }\n}\n"
  },
  {
    "path": "proxy/src/usage_metrics.rs",
    "content": "//! Periodically collect proxy consumption metrics\n//! and push them to a HTTP endpoint.\nuse std::borrow::Cow;\nuse std::convert::Infallible;\nuse std::sync::Arc;\nuse std::sync::atomic::{AtomicU64, AtomicUsize, Ordering};\nuse std::time::Duration;\n\nuse anyhow::{Context, bail};\nuse async_compression::tokio::write::GzipEncoder;\nuse bytes::Bytes;\nuse chrono::{DateTime, Datelike, Timelike, Utc};\nuse clashmap::ClashMap;\nuse clashmap::mapref::entry::Entry;\nuse consumption_metrics::{CHUNK_SIZE, Event, EventChunk, EventType, idempotency_key};\nuse once_cell::sync::Lazy;\nuse remote_storage::{GenericRemoteStorage, RemotePath, TimeoutOrCancel};\nuse serde::{Deserialize, Serialize};\nuse smol_str::SmolStr;\nuse tokio::io::AsyncWriteExt;\nuse tokio_util::sync::CancellationToken;\nuse tracing::{error, info, instrument, trace, warn};\nuse utils::backoff;\nuse uuid::{NoContext, Timestamp};\n\nuse crate::config::MetricCollectionConfig;\nuse crate::context::parquet::{FAILED_UPLOAD_MAX_RETRIES, FAILED_UPLOAD_WARN_THRESHOLD};\nuse crate::http;\nuse crate::intern::{BranchIdInt, EndpointIdInt};\n\nconst PROXY_IO_BYTES_PER_CLIENT: &str = \"proxy_io_bytes_per_client\";\n\nconst HTTP_REPORTING_REQUEST_TIMEOUT: Duration = Duration::from_secs(10);\nconst HTTP_REPORTING_RETRY_DURATION: Duration = Duration::from_secs(60);\n\n/// Key that uniquely identifies the object, this metric describes.\n/// Currently, endpoint_id is enough, but this may change later,\n/// so keep it in a named struct.\n///\n/// Both the proxy and the ingestion endpoint will live in the same region (or cell)\n/// so while the project-id is unique across regions the whole pipeline will work correctly\n/// because we enrich the event with project_id in the control-plane endpoint.\n#[derive(Eq, Hash, PartialEq, Serialize, Deserialize, Debug, Clone)]\npub(crate) struct Ids {\n    pub(crate) endpoint_id: EndpointIdInt,\n    pub(crate) branch_id: BranchIdInt,\n    #[serde(with = \"none_as_empty_string\")]\n    pub(crate) private_link_id: Option<SmolStr>,\n}\n\n#[derive(Eq, Hash, PartialEq, Serialize, Deserialize, Debug, Clone)]\nstruct Extra {\n    #[serde(flatten)]\n    ids: Ids,\n    direction: TrafficDirection,\n}\n\nmod none_as_empty_string {\n    use serde::Deserialize;\n    use smol_str::SmolStr;\n\n    #[allow(clippy::ref_option)]\n    pub fn serialize<S: serde::Serializer>(t: &Option<SmolStr>, s: S) -> Result<S::Ok, S::Error> {\n        s.serialize_str(t.as_deref().unwrap_or(\"\"))\n    }\n\n    pub fn deserialize<'de, D: serde::Deserializer<'de>>(\n        d: D,\n    ) -> Result<Option<SmolStr>, D::Error> {\n        let s = SmolStr::deserialize(d)?;\n        if s.is_empty() { Ok(None) } else { Ok(Some(s)) }\n    }\n}\n\n#[derive(Eq, Hash, PartialEq, Serialize, Deserialize, Debug, Clone)]\n#[serde(rename_all = \"lowercase\")]\npub(crate) enum TrafficDirection {\n    Ingress,\n    Egress,\n}\n\npub(crate) trait MetricCounterRecorder {\n    /// Record that some bytes were sent from the proxy to the client\n    fn record_egress(&self, bytes: u64);\n\n    /// Record that some bytes were sent from the client to the proxy\n    fn record_ingress(&self, bytes: u64);\n\n    /// Record that some connections were opened\n    fn record_connection(&self, count: usize);\n}\n\ntrait MetricCounterReporter {\n    fn get_metrics(&mut self) -> MetricsData;\n    fn move_metrics(&self) -> MetricsData;\n}\n\n#[derive(Debug)]\npub(crate) struct MetricCounter {\n    transmitted: AtomicU64,\n    received: AtomicU64,\n    opened_connections: AtomicUsize,\n}\n\nimpl MetricCounterRecorder for MetricCounter {\n    /// Record that some bytes were sent from the proxy to the client\n    fn record_egress(&self, bytes: u64) {\n        self.transmitted.fetch_add(bytes, Ordering::Relaxed);\n    }\n\n    /// Record that some bytes were sent from the proxy to the client\n    fn record_ingress(&self, bytes: u64) {\n        self.received.fetch_add(bytes, Ordering::Relaxed);\n    }\n\n    /// Record that some connections were opened\n    fn record_connection(&self, count: usize) {\n        self.opened_connections.fetch_add(count, Ordering::Relaxed);\n    }\n}\n\nimpl MetricCounterReporter for MetricCounter {\n    fn get_metrics(&mut self) -> MetricsData {\n        MetricsData {\n            received: *self.received.get_mut(),\n            transmitted: *self.transmitted.get_mut(),\n            connections: *self.opened_connections.get_mut(),\n        }\n    }\n\n    fn move_metrics(&self) -> MetricsData {\n        MetricsData {\n            received: self.received.swap(0, Ordering::Relaxed),\n            transmitted: self.transmitted.swap(0, Ordering::Relaxed),\n            connections: self.opened_connections.swap(0, Ordering::Relaxed),\n        }\n    }\n}\n\nstruct MetricsData {\n    transmitted: u64,\n    received: u64,\n    connections: usize,\n}\n\nstruct BytesSent {\n    transmitted: u64,\n    received: u64,\n}\n\ntrait Clearable {\n    /// extract the value that should be reported\n    fn should_report(self: &Arc<Self>) -> Option<BytesSent>;\n    /// Determine whether the counter should be cleared from the global map.\n    fn should_clear(self: &mut Arc<Self>) -> bool;\n}\n\nimpl<C: MetricCounterReporter> Clearable for C {\n    fn should_report(self: &Arc<Self>) -> Option<BytesSent> {\n        // heuristic to see if the branch is still open\n        // if a clone happens while we are observing, the heuristic will be incorrect.\n        //\n        // Worst case is that we won't report an event for this endpoint.\n        // However, for the strong count to be 1 it must have occured that at one instant\n        // all the endpoints were closed, so missing a report because the endpoints are closed is valid.\n        let is_open = Arc::strong_count(self) > 1;\n\n        // update cached metrics eagerly, even if they can't get sent\n        // (to avoid sending the same metrics twice)\n        // see the relevant discussion on why to do so even if the status is not success:\n        // https://github.com/neondatabase/neon/pull/4563#discussion_r1246710956\n        let MetricsData {\n            transmitted,\n            received,\n            connections,\n        } = self.move_metrics();\n\n        // Our only requirement is that we report in every interval if there was an open connection\n        // if there were no opened connections since, then we don't need to report\n        if transmitted == 0 && received == 0 && !is_open && connections == 0 {\n            None\n        } else {\n            Some(BytesSent {\n                transmitted,\n                received,\n            })\n        }\n    }\n    fn should_clear(self: &mut Arc<Self>) -> bool {\n        // we can't clear this entry if it's acquired elsewhere\n        let Some(counter) = Arc::get_mut(self) else {\n            return false;\n        };\n        let MetricsData {\n            transmitted,\n            received,\n            connections,\n        } = counter.get_metrics();\n        // clear if there's no data to report\n        transmitted == 0 && received == 0 && connections == 0\n    }\n}\n\n// endpoint and branch IDs are not user generated so we don't run the risk of hash-dos\ntype FastHasher = std::hash::BuildHasherDefault<rustc_hash::FxHasher>;\n\n#[derive(Default)]\npub(crate) struct Metrics {\n    endpoints: ClashMap<Ids, Arc<MetricCounter>, FastHasher>,\n}\n\nimpl Metrics {\n    /// Register a new byte metrics counter for this endpoint\n    pub(crate) fn register(&self, ids: Ids) -> Arc<MetricCounter> {\n        let entry = if let Some(entry) = self.endpoints.get(&ids) {\n            entry.clone()\n        } else {\n            self.endpoints\n                .entry(ids)\n                .or_insert_with(|| {\n                    Arc::new(MetricCounter {\n                        received: AtomicU64::new(0),\n                        transmitted: AtomicU64::new(0),\n                        opened_connections: AtomicUsize::new(0),\n                    })\n                })\n                .clone()\n        };\n\n        entry.record_connection(1);\n        entry\n    }\n}\n\npub(crate) static USAGE_METRICS: Lazy<Metrics> = Lazy::new(Metrics::default);\n\npub async fn task_main(config: &MetricCollectionConfig) -> anyhow::Result<Infallible> {\n    info!(\"metrics collector config: {config:?}\");\n    scopeguard::defer! {\n        info!(\"metrics collector has shut down\");\n    }\n\n    let http_client = http::new_client_with_timeout(\n        HTTP_REPORTING_REQUEST_TIMEOUT,\n        HTTP_REPORTING_RETRY_DURATION,\n    );\n    let hostname = hostname::get()?.as_os_str().to_string_lossy().into_owned();\n\n    // Even if the remote storage is not configured, we still want to clear the metrics.\n    let storage = if let Some(config) = config\n        .backup_metric_collection_config\n        .remote_storage_config\n        .as_ref()\n    {\n        Some(\n            GenericRemoteStorage::from_config(config)\n                .await\n                .context(\"remote storage init\")?,\n        )\n    } else {\n        None\n    };\n\n    let mut prev = Utc::now();\n    let mut ticker = tokio::time::interval(config.interval);\n    loop {\n        ticker.tick().await;\n\n        let now = Utc::now();\n        collect_metrics_iteration(\n            &USAGE_METRICS.endpoints,\n            &http_client,\n            &config.endpoint,\n            storage.as_ref(),\n            config.backup_metric_collection_config.chunk_size,\n            &hostname,\n            prev,\n            now,\n        )\n        .await;\n        prev = now;\n    }\n}\n\nfn collect_and_clear_metrics<C: Clearable>(\n    endpoints: &ClashMap<Ids, Arc<C>, FastHasher>,\n) -> Vec<(Ids, BytesSent)> {\n    let mut metrics_to_clear = Vec::new();\n\n    let metrics_to_send: Vec<(Ids, BytesSent)> = endpoints\n        .iter()\n        .filter_map(|counter| {\n            let key = counter.key().clone();\n            let Some(value) = counter.should_report() else {\n                metrics_to_clear.push(key);\n                return None;\n            };\n            Some((key, value))\n        })\n        .collect();\n\n    for metric in metrics_to_clear {\n        match endpoints.entry(metric) {\n            Entry::Occupied(mut counter) => {\n                if counter.get_mut().should_clear() {\n                    counter.remove_entry();\n                }\n            }\n            Entry::Vacant(_) => {}\n        }\n    }\n    metrics_to_send\n}\n\nfn create_event_chunks<'a>(\n    metrics_to_send: &'a [(Ids, BytesSent)],\n    hostname: &'a str,\n    prev: DateTime<Utc>,\n    now: DateTime<Utc>,\n    chunk_size: usize,\n) -> impl Iterator<Item = EventChunk<'a, Event<Extra, &'static str>>> + 'a {\n    metrics_to_send\n        .chunks(chunk_size)\n        .map(move |chunk| EventChunk {\n            events: chunk\n                .iter()\n                .flat_map(|(ids, bytes)| {\n                    [\n                        Event {\n                            kind: EventType::Incremental {\n                                start_time: prev,\n                                stop_time: now,\n                            },\n                            metric: PROXY_IO_BYTES_PER_CLIENT,\n                            idempotency_key: idempotency_key(hostname),\n                            value: bytes.transmitted,\n                            extra: Extra {\n                                ids: ids.clone(),\n                                direction: TrafficDirection::Egress,\n                            },\n                        },\n                        Event {\n                            kind: EventType::Incremental {\n                                start_time: prev,\n                                stop_time: now,\n                            },\n                            metric: PROXY_IO_BYTES_PER_CLIENT,\n                            idempotency_key: idempotency_key(hostname),\n                            value: bytes.received,\n                            extra: Extra {\n                                ids: ids.clone(),\n                                direction: TrafficDirection::Ingress,\n                            },\n                        },\n                    ]\n                })\n                .collect(),\n        })\n}\n\n#[expect(clippy::too_many_arguments)]\n#[instrument(skip_all)]\nasync fn collect_metrics_iteration(\n    endpoints: &ClashMap<Ids, Arc<MetricCounter>, FastHasher>,\n    client: &http::ClientWithMiddleware,\n    metric_collection_endpoint: &reqwest::Url,\n    storage: Option<&GenericRemoteStorage>,\n    outer_chunk_size: usize,\n    hostname: &str,\n    prev: DateTime<Utc>,\n    now: DateTime<Utc>,\n) {\n    info!(\n        \"starting collect_metrics_iteration. metric_collection_endpoint: {}\",\n        metric_collection_endpoint\n    );\n\n    let metrics_to_send = collect_and_clear_metrics(endpoints);\n\n    if metrics_to_send.is_empty() {\n        trace!(\"no new metrics to send\");\n    }\n\n    let cancel = CancellationToken::new();\n    let path_prefix = create_remote_path_prefix(now);\n\n    // Send metrics.\n    for chunk in create_event_chunks(&metrics_to_send, hostname, prev, now, outer_chunk_size) {\n        tokio::join!(\n            upload_main_events_chunked(client, metric_collection_endpoint, &chunk, CHUNK_SIZE),\n            async {\n                if let Err(e) = upload_backup_events(storage, &chunk, &path_prefix, &cancel).await {\n                    error!(\"failed to upload consumption events to remote storage: {e:?}\");\n                }\n            }\n        );\n    }\n}\n\nfn create_remote_path_prefix(now: DateTime<Utc>) -> String {\n    format!(\n        \"year={year:04}/month={month:02}/day={day:02}/hour={hour:02}/{hour:02}:{minute:02}:{second:02}Z\",\n        year = now.year(),\n        month = now.month(),\n        day = now.day(),\n        hour = now.hour(),\n        minute = now.minute(),\n        second = now.second(),\n    )\n}\n\nasync fn upload_main_events_chunked(\n    client: &http::ClientWithMiddleware,\n    metric_collection_endpoint: &reqwest::Url,\n    chunk: &EventChunk<'_, Event<Extra, &str>>,\n    subchunk_size: usize,\n) {\n    // Split into smaller chunks to avoid exceeding the max request size\n    for subchunk in chunk.events.chunks(subchunk_size).map(|c| EventChunk {\n        events: Cow::Borrowed(c),\n    }) {\n        let res = client\n            .post(metric_collection_endpoint.clone())\n            .json(&subchunk)\n            .send()\n            .await;\n\n        let res = match res {\n            Ok(x) => x,\n            Err(err) => {\n                // TODO: retry?\n                error!(\"failed to send metrics: {:?}\", err);\n                continue;\n            }\n        };\n\n        if !res.status().is_success() {\n            error!(\"metrics endpoint refused the sent metrics: {:?}\", res);\n            for metric in subchunk.events.iter().filter(|e| e.value > (1u64 << 40)) {\n                // Report if the metric value is suspiciously large\n                warn!(\"potentially abnormal metric value: {:?}\", metric);\n            }\n        }\n    }\n}\n\nasync fn upload_backup_events(\n    storage: Option<&GenericRemoteStorage>,\n    chunk: &EventChunk<'_, Event<Extra, &'static str>>,\n    path_prefix: &str,\n    cancel: &CancellationToken,\n) -> anyhow::Result<()> {\n    let Some(storage) = storage else {\n        warn!(\"no remote storage configured\");\n        return Ok(());\n    };\n\n    let real_now = Utc::now();\n    let id = uuid::Uuid::new_v7(Timestamp::from_unix(\n        NoContext,\n        real_now.second().into(),\n        real_now.nanosecond(),\n    ));\n    let path = format!(\"{path_prefix}_{id}.ndjson.gz\");\n    let remote_path = match RemotePath::from_string(&path) {\n        Ok(remote_path) => remote_path,\n        Err(e) => {\n            bail!(\"failed to create remote path from str {path}: {:?}\", e);\n        }\n    };\n\n    // TODO: This is async compression from Vec to Vec. Rewrite as byte stream.\n    //       Use sync compression in blocking threadpool.\n    let mut encoder = GzipEncoder::new(Vec::new());\n    for event in chunk.events.iter() {\n        let data = serde_json::to_vec(event).context(\"serialize metrics\")?;\n        encoder.write_all(&data).await.context(\"compress metrics\")?;\n        encoder.write_all(b\"\\n\").await.context(\"compress metrics\")?;\n    }\n    encoder.shutdown().await.context(\"compress metrics\")?;\n    let compressed_data: Bytes = encoder.get_ref().clone().into();\n    backoff::retry(\n        || async {\n            let stream = futures::stream::once(futures::future::ready(Ok(compressed_data.clone())));\n            storage\n                .upload(stream, compressed_data.len(), &remote_path, None, cancel)\n                .await\n        },\n        TimeoutOrCancel::caused_by_cancel,\n        FAILED_UPLOAD_WARN_THRESHOLD,\n        FAILED_UPLOAD_MAX_RETRIES,\n        \"usage_metrics_upload\",\n        cancel,\n    )\n    .await\n    .ok_or_else(|| anyhow::Error::new(TimeoutOrCancel::Cancel))\n    .and_then(|x| x)\n    .with_context(|| format!(\"usage_metrics_upload: path={remote_path}\"))?;\n    Ok(())\n}\n\n#[cfg(test)]\nmod tests {\n    use std::fs;\n    use std::io::{BufRead, BufReader};\n    use std::sync::{Arc, Mutex};\n\n    use anyhow::Error;\n    use camino_tempfile::tempdir;\n    use chrono::Utc;\n    use consumption_metrics::{Event, EventChunk};\n    use http_body_util::BodyExt;\n    use hyper::body::Incoming;\n    use hyper::server::conn::http1;\n    use hyper::service::service_fn;\n    use hyper::{Request, Response};\n    use hyper_util::rt::TokioIo;\n    use remote_storage::{RemoteStorageConfig, RemoteStorageKind};\n    use tokio::net::TcpListener;\n    use url::Url;\n\n    use super::*;\n    use crate::http;\n    use crate::types::{BranchId, EndpointId};\n\n    #[tokio::test]\n    async fn metrics() {\n        type Report = EventChunk<'static, Event<Extra, String>>;\n        let reports: Arc<Mutex<Vec<Report>>> = Arc::default();\n\n        let listener = TcpListener::bind(\"127.0.0.1:0\").await.unwrap();\n        let addr = listener.local_addr().unwrap();\n        tokio::spawn({\n            let reports = reports.clone();\n            async move {\n                loop {\n                    if let Ok((stream, _addr)) = listener.accept().await {\n                        let reports = reports.clone();\n                        http1::Builder::new()\n                            .serve_connection(\n                                TokioIo::new(stream),\n                                service_fn(move |req: Request<Incoming>| {\n                                    let reports = reports.clone();\n                                    async move {\n                                        let bytes = req.into_body().collect().await?.to_bytes();\n                                        let events = serde_json::from_slice(&bytes)?;\n                                        reports.lock().unwrap().push(events);\n                                        Ok::<_, Error>(Response::new(String::new()))\n                                    }\n                                }),\n                            )\n                            .await\n                            .unwrap();\n                    }\n                }\n            }\n        });\n\n        let metrics = Metrics::default();\n        let client = http::new_client();\n        let endpoint = Url::parse(&format!(\"http://{addr}\")).unwrap();\n        let now = Utc::now();\n\n        let storage_test_dir = tempdir().unwrap();\n        let local_fs_path = storage_test_dir.path().join(\"usage_metrics\");\n        fs::create_dir_all(&local_fs_path).unwrap();\n        let storage = GenericRemoteStorage::from_config(&RemoteStorageConfig {\n            storage: RemoteStorageKind::LocalFs {\n                local_path: local_fs_path.clone(),\n            },\n            timeout: Duration::from_secs(10),\n            small_timeout: Duration::from_secs(1),\n        })\n        .await\n        .unwrap();\n\n        let mut pushed_chunks: Vec<Report> = Vec::new();\n        let mut stored_chunks: Vec<Report> = Vec::new();\n\n        // no counters have been registered\n        collect_metrics_iteration(\n            &metrics.endpoints,\n            &client,\n            &endpoint,\n            Some(&storage),\n            1000,\n            \"foo\",\n            now,\n            now,\n        )\n        .await;\n        let r = std::mem::take(&mut *reports.lock().unwrap());\n        assert!(r.is_empty());\n\n        // register a new counter\n\n        let counter = metrics.register(Ids {\n            endpoint_id: (&EndpointId::from(\"e1\")).into(),\n            branch_id: (&BranchId::from(\"b1\")).into(),\n            private_link_id: None,\n        });\n\n        // the counter should be observed despite 0 egress\n        collect_metrics_iteration(\n            &metrics.endpoints,\n            &client,\n            &endpoint,\n            Some(&storage),\n            1000,\n            \"foo\",\n            now,\n            now,\n        )\n        .await;\n        let r = std::mem::take(&mut *reports.lock().unwrap());\n        assert_eq!(r.len(), 1);\n        assert_eq!(r[0].events.len(), 2);\n        assert_eq!(r[0].events[0].value, 0);\n        assert_eq!(r[0].events[0].extra.direction, TrafficDirection::Egress);\n        assert_eq!(r[0].events[1].value, 0);\n        assert_eq!(r[0].events[1].extra.direction, TrafficDirection::Ingress);\n        pushed_chunks.extend(r);\n\n        // record egress\n        counter.record_egress(1);\n\n        // record ingress\n        counter.record_ingress(2);\n\n        // egress should be observered\n        collect_metrics_iteration(\n            &metrics.endpoints,\n            &client,\n            &endpoint,\n            Some(&storage),\n            1000,\n            \"foo\",\n            now,\n            now,\n        )\n        .await;\n        let r = std::mem::take(&mut *reports.lock().unwrap());\n        assert_eq!(r.len(), 1);\n        assert_eq!(r[0].events.len(), 2);\n        assert_eq!(r[0].events[0].value, 1);\n        assert_eq!(r[0].events[0].extra.direction, TrafficDirection::Egress);\n        assert_eq!(r[0].events[1].value, 2);\n        assert_eq!(r[0].events[1].extra.direction, TrafficDirection::Ingress);\n        pushed_chunks.extend(r);\n\n        // release counter\n        drop(counter);\n\n        // we do not observe the counter\n        collect_metrics_iteration(\n            &metrics.endpoints,\n            &client,\n            &endpoint,\n            Some(&storage),\n            1000,\n            \"foo\",\n            now,\n            now,\n        )\n        .await;\n        let r = std::mem::take(&mut *reports.lock().unwrap());\n        assert!(r.is_empty());\n\n        // counter is unregistered\n        assert!(metrics.endpoints.is_empty());\n\n        let path_prefix = create_remote_path_prefix(now);\n        for entry in walkdir::WalkDir::new(&local_fs_path)\n            .into_iter()\n            .filter_map(|e| e.ok())\n        {\n            let path = local_fs_path.join(&path_prefix).to_string();\n            if entry.path().to_str().unwrap().starts_with(&path) {\n                let file = fs::File::open(entry.into_path()).unwrap();\n                let decoder = flate2::bufread::GzDecoder::new(BufReader::new(file));\n                let reader = BufReader::new(decoder);\n\n                let mut events: Vec<Event<Extra, String>> = Vec::new();\n                for line in reader.lines() {\n                    let line = line.unwrap();\n                    let event: Event<Extra, String> = serde_json::from_str(&line).unwrap();\n                    events.push(event);\n                }\n\n                let report = Report {\n                    events: Cow::Owned(events),\n                };\n\n                stored_chunks.push(report);\n            }\n        }\n        storage_test_dir.close().ok();\n\n        // sort by first event's idempotency key because the order of files is nondeterministic\n        pushed_chunks.sort_by_cached_key(|c| c.events[0].idempotency_key.clone());\n        stored_chunks.sort_by_cached_key(|c| c.events[0].idempotency_key.clone());\n        assert_eq!(pushed_chunks, stored_chunks);\n    }\n}\n"
  },
  {
    "path": "proxy/src/util.rs",
    "content": "use std::pin::pin;\n\nuse futures::future::{Either, select};\nuse tokio_util::sync::CancellationToken;\n\npub async fn run_until_cancelled<F: Future>(\n    f: F,\n    cancellation_token: &CancellationToken,\n) -> Option<F::Output> {\n    run_until(f, cancellation_token.cancelled()).await.ok()\n}\n\n/// Runs the future `f` unless interrupted by future `condition`.\npub async fn run_until<F1: Future, F2: Future>(\n    f: F1,\n    condition: F2,\n) -> Result<F1::Output, F2::Output> {\n    match select(pin!(f), pin!(condition)).await {\n        Either::Left((f1, _)) => Ok(f1),\n        Either::Right((f2, _)) => Err(f2),\n    }\n}\n\npub fn deserialize_json_string<'de, D, T>(deserializer: D) -> Result<T, D::Error>\nwhere\n    T: for<'de2> serde::Deserialize<'de2>,\n    D: serde::Deserializer<'de>,\n{\n    use serde::Deserialize;\n    let s = String::deserialize(deserializer)?;\n    serde_json::from_str(&s).map_err(<D::Error as serde::de::Error>::custom)\n}\n"
  },
  {
    "path": "proxy/src/waiters.rs",
    "content": "use std::pin::Pin;\nuse std::task;\n\nuse hashbrown::HashMap;\nuse parking_lot::Mutex;\nuse pin_project_lite::pin_project;\nuse thiserror::Error;\nuse tokio::sync::oneshot;\n\n#[derive(Debug, Error)]\npub(crate) enum RegisterError {\n    #[error(\"Waiter `{0}` already registered\")]\n    Occupied(String),\n}\n\n#[derive(Debug, Error)]\npub(crate) enum NotifyError {\n    #[error(\"Notify failed: waiter `{0}` not registered\")]\n    NotFound(String),\n\n    #[error(\"Notify failed: channel hangup\")]\n    Hangup,\n}\n\n#[derive(Debug, Error)]\npub(crate) enum WaitError {\n    #[error(\"Wait failed: channel hangup\")]\n    Hangup,\n}\n\npub(crate) struct Waiters<T>(pub(self) Mutex<HashMap<String, oneshot::Sender<T>>>);\n\nimpl<T> Default for Waiters<T> {\n    fn default() -> Self {\n        Waiters(Mutex::default())\n    }\n}\n\nimpl<T> Waiters<T> {\n    pub(crate) fn register(&self, key: String) -> Result<Waiter<'_, T>, RegisterError> {\n        let (tx, rx) = oneshot::channel();\n\n        self.0\n            .lock()\n            .try_insert(key.clone(), tx)\n            .map_err(|e| RegisterError::Occupied(e.entry.key().clone()))?;\n\n        Ok(Waiter {\n            receiver: rx,\n            guard: DropKey {\n                registry: self,\n                key,\n            },\n        })\n    }\n\n    pub(crate) fn notify(&self, key: &str, value: T) -> Result<(), NotifyError>\n    where\n        T: Send + Sync,\n    {\n        let tx = self\n            .0\n            .lock()\n            .remove(key)\n            .ok_or_else(|| NotifyError::NotFound(key.to_string()))?;\n\n        tx.send(value).map_err(|_| NotifyError::Hangup)\n    }\n}\n\nstruct DropKey<'a, T> {\n    key: String,\n    registry: &'a Waiters<T>,\n}\n\nimpl<T> Drop for DropKey<'_, T> {\n    fn drop(&mut self) {\n        self.registry.0.lock().remove(&self.key);\n    }\n}\n\npin_project! {\n    pub(crate) struct Waiter<'a, T> {\n        #[pin]\n        receiver: oneshot::Receiver<T>,\n        guard: DropKey<'a, T>,\n    }\n}\n\nimpl<T> std::future::Future for Waiter<'_, T> {\n    type Output = Result<T, WaitError>;\n\n    fn poll(self: Pin<&mut Self>, cx: &mut task::Context<'_>) -> task::Poll<Self::Output> {\n        self.project()\n            .receiver\n            .poll(cx)\n            .map_err(|_| WaitError::Hangup)\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use std::sync::Arc;\n\n    use super::*;\n\n    #[tokio::test]\n    async fn test_waiter() -> anyhow::Result<()> {\n        let waiters = Arc::new(Waiters::default());\n\n        let key = \"Key\";\n        let waiter = waiters.register(key.to_owned())?;\n\n        let waiters = Arc::clone(&waiters);\n        let notifier = tokio::spawn(async move {\n            waiters.notify(key, ())?;\n            Ok(())\n        });\n\n        waiter.await?;\n        notifier.await?\n    }\n}\n"
  },
  {
    "path": "pyproject.toml",
    "content": "[tool.poetry]\ndescription = \"\"\nauthors = []\npackage-mode = false\n\n[tool.poetry.dependencies]\npython = \"^3.11\"\npytest = \"^7.4.4\"\npsycopg2-binary = \"^2.9.10\"\ntyping-extensions = \"^4.12.2\"\nPyJWT = {version = \"^2.1.0\", extras = [\"crypto\"]}\nrequests = \"^2.32.4\"\npytest-xdist = \"^3.3.1\"\nasyncpg = \"^0.30.0\"\naiopg = \"^1.4.0\"\nJinja2 = \"^3.1.6\"\ntypes-requests = \"^2.31.0.0\"\ntypes-psycopg2 = \"^2.9.21.20241019\"\nboto3 = \"^1.34.11\"\nboto3-stubs = {extras = [\"s3\", \"kms\"], version = \"^1.26.16\"}\nmoto = {extras = [\"server\"], version = \"^5.0.6\"}\nbackoff = \"^2.2.1\"\npytest-lazy-fixture = \"^0.6.3\"\nprometheus-client = \"^0.14.1\"\npytest-timeout = \"^2.3.1\"\nWerkzeug = \"^3.0.6\"\npytest-order = \"^1.1.0\"\nallure-pytest = \"^2.13.5\"\npytest-asyncio = \"^0.21.0\"\ntoml = \"^0.10.2\"\npsutil = \"^5.9.4\"\ntypes-psutil = \"^5.9.5.12\"\ntypes-toml = \"^0.10.8.6\"\npytest-httpserver = \"^1.0.8\"\naiohttp = \"3.12.14\"\npytest-rerunfailures = \"^15.0\"\ntypes-pytest-lazy-fixture = \"^0.6.3.3\"\npytest-split = \"^0.8.1\"\nzstandard = \"^0.23.0\"\nhttpx = {extras = [\"http2\"], version = \"^0.26.0\"}\npytest-repeat = \"^0.9.3\"\nwebsockets = \"^12.0\"\nclickhouse-connect = \"^0.7.16\"\nkafka-python = \"^2.0.2\"\njwcrypto = \"^1.5.6\"\nh2 = \"^4.2.0\"\ntypes-jwcrypto = \"^1.5.0.20240925\"\npyyaml = \"^6.0.2\"\ntypes-pyyaml = \"^6.0.12.20240917\"\ntestcontainers = \"^4.9.0\"\n# Install a release candidate of `jsonnet`, as it supports Python 3.13\njsonnet = \"^0.21.0-rc2\"\nrequests-unixsocket = \"^0.4.1\"\n\n[tool.poetry.group.dev.dependencies]\nmypy = \"==1.13.0\"\nruff = \"^0.11.2\"\n\n[build-system]\nrequires = [\"poetry-core>=1.0.0\"]\nbuild-backend = \"poetry.core.masonry.api\"\n\n[tool.mypy]\nexclude = [\n    \"^vendor/\",\n    \"^target/\",\n    \"test_runner/performance/pgvector/loaddata.py\",\n]\ncheck_untyped_defs = true\n# Help mypy find imports when running against list of individual files.\n# Without this line it would behave differently when executed on the entire project.\nmypy_path = \"$MYPY_CONFIG_FILE_DIR:$MYPY_CONFIG_FILE_DIR/test_runner:$MYPY_CONFIG_FILE_DIR/test_runner/stubs\"\n\ndisallow_incomplete_defs = false\ndisallow_untyped_calls = false\ndisallow_untyped_decorators = false\ndisallow_untyped_defs = false\nstrict = true\n\n[[tool.mypy.overrides]]\nmodule = [\n    \"_jsonnet.*\",\n    \"asyncpg.*\",\n    \"pg8000.*\",\n    \"allure.*\",\n    \"allure_commons.*\",\n    \"allure_pytest.*\",\n    \"kafka.*\",\n    \"testcontainers.*\",\n]\nignore_missing_imports = true\n\n[tool.ruff]\ntarget-version = \"py311\"\nextend-exclude = [\n    \"vendor/\",\n    \"target/\",\n    \"test_runner/stubs/\", # Autogenerated by mypy's stubgen\n]\nline-length = 100 # this setting is rather guidance, it won't fail if it can't make the shorter\n\n[tool.ruff.lint]\nignore = [\n    \"E501\", # Line too long, we don't want to be too strict about it\n]\nselect = [\n    \"E\", # pycodestyle\n    \"F\", # Pyflakes\n    \"I\", # isort\n    \"W\", # pycodestyle\n    \"B\", # bugbear\n    \"UP\", # pyupgrade\n    \"TC\", # flake8-type-checking\n]\n"
  },
  {
    "path": "pytest.ini",
    "content": "[pytest]\nfilterwarnings =\n    error::pytest.PytestUnhandledThreadExceptionWarning\n    error::UserWarning\n    ignore:record_property is incompatible with junit_family:pytest.PytestWarning\naddopts =\n    -m 'not remote_cluster'\n    --ignore=test_runner/performance\nmarkers =\n    remote_cluster\ntestpaths =\n    test_runner\nminversion = 6.0\nlog_format = %(asctime)s.%(msecs)03d %(levelname)s [%(filename)s:%(lineno)d] %(message)s\nlog_date_format = %Y-%m-%d %H:%M:%S\nlog_cli = true\ntimeout = 300\n"
  },
  {
    "path": "run_clippy.sh",
    "content": "#!/usr/bin/env bash\nset -euo pipefail\n\n# If you save this in your path under the name \"cargo-zclippy\" (or whatever\n# name you like), then you can run it as \"cargo zclippy\" from the shell prompt.\n#\n# If your text editor has rust-analyzer integration, you can also use this new\n# command as a replacement for \"cargo check\" or \"cargo clippy\" and see clippy\n# warnings and errors right in the editor.\n# In vscode, this setting is Rust-analyzer>Check On Save:Command\n\n# NB: the CI runs the full feature powerset, so, it catches slightly more errors\n# at the expense of longer runtime. This script is used by developers, so, don't\n# do that here.\n\nthisscript=\"${BASH_SOURCE[0]}\"\nthisscript_dir=\"$(dirname \"$thisscript\")\"\nCLIPPY_COMMON_ARGS=\"$( source .neon_clippy_args; echo \"$CLIPPY_COMMON_ARGS\")\"\nexec cargo clippy --all-features $CLIPPY_COMMON_ARGS\n"
  },
  {
    "path": "rust-toolchain.toml",
    "content": "[toolchain]\nchannel = \"1.88.0\"\nprofile = \"default\"\n# The default profile includes rustc, rust-std, cargo, rust-docs, rustfmt and clippy.\n# https://rust-lang.github.io/rustup/concepts/profiles.html\n# but we also need `llvm-tools` for coverage data merges on CI\ncomponents = [\"llvm-tools\", \"rustfmt\", \"clippy\"]\n"
  },
  {
    "path": "safekeeper/Cargo.toml",
    "content": "[package]\nname = \"safekeeper\"\nversion = \"0.1.0\"\nedition = \"2024\"\nlicense.workspace = true\n\n[features]\ndefault = []\n# Enables test-only APIs, incuding failpoints. In particular, enables the `fail_point!` macro,\n# which adds some runtime cost to run tests on outage conditions\ntesting = [\"fail/failpoints\"]\nbenchmarking = []\n\n[dependencies]\nasync-stream.workspace = true\nanyhow.workspace = true\nbyteorder.workspace = true\nbytes.workspace = true\ncamino.workspace = true\ncamino-tempfile.workspace = true\nchrono.workspace = true\nclap = { workspace = true, features = [\"derive\"] }\ncrc32c.workspace = true\nfail.workspace = true\nhex.workspace = true\nhumantime.workspace = true\nhttp.workspace = true\nhyper0.workspace = true\nitertools.workspace = true\njsonwebtoken.workspace = true\nfutures.workspace = true\nonce_cell.workspace = true\nparking_lot.workspace = true\npageserver_api.workspace = true\npostgres-protocol.workspace = true\npprof.workspace = true\nrand.workspace = true\nregex.workspace = true\nreqwest = { workspace = true, features = [\"json\"] }\nrustls.workspace = true\nscopeguard.workspace = true\nserde.workspace = true\nserde_json.workspace = true\nsmallvec.workspace = true\nstrum.workspace = true\nstrum_macros.workspace = true\nthiserror.workspace = true\ntikv-jemallocator.workspace = true\ntokio = { workspace = true, features = [\"fs\"] }\ntokio-io-timeout.workspace = true\ntokio-postgres.workspace = true\ntokio-rustls.workspace = true\ntokio-tar.workspace = true\ntokio-util = { workspace = true }\ntracing.workspace = true\nurl.workspace = true\nmetrics.workspace = true\npem.workspace = true\npostgres_backend.workspace = true\npostgres_ffi.workspace = true\npostgres_ffi_types.workspace = true\npostgres_versioninfo.workspace = true\npq_proto.workspace = true\nremote_storage.workspace = true\nsafekeeper_api.workspace = true\nsafekeeper_client.workspace = true\nsha2.workspace = true\nsd-notify.workspace = true\nstorage_broker.workspace = true\ntokio-stream.workspace = true\nhttp-utils.workspace = true\nutils.workspace = true\nwal_decoder.workspace = true\nenv_logger.workspace = true\nnix.workspace = true\n\nworkspace_hack.workspace = true\n\n[dev-dependencies]\ncriterion.workspace = true\nitertools.workspace = true\nwalproposer.workspace = true\nrand.workspace = true\ndesim.workspace = true\ntracing.workspace = true\ntracing-subscriber = { workspace = true, features = [\"json\"] }\n\n[[bench]]\nname = \"receive_wal\"\nharness = false\nrequired-features = [\"benchmarking\"]\n"
  },
  {
    "path": "safekeeper/benches/README.md",
    "content": "## Safekeeper Benchmarks\n\nTo run benchmarks:\n\n```sh\n# All benchmarks.\ncargo bench --package safekeeper\n\n# Specific file.\ncargo bench --package safekeeper --bench receive_wal\n\n# Specific benchmark.\ncargo bench --package safekeeper --bench receive_wal process_msg/fsync=false\n\n# List available benchmarks.\ncargo bench --package safekeeper --benches -- --list\n\n# Generate flamegraph profiles using pprof-rs, profiling for 10 seconds.\n# Output in target/criterion/*/profile/flamegraph.svg.\ncargo bench --package safekeeper --bench receive_wal process_msg/fsync=false --profile-time 10\n```\n\nAdditional charts and statistics are available in `target/criterion/report/index.html`.\n\nBenchmarks are automatically compared against the previous run. To compare against other runs, see\n`--baseline` and `--save-baseline`."
  },
  {
    "path": "safekeeper/benches/receive_wal.rs",
    "content": "//! WAL ingestion benchmarks.\n\nuse std::io::Write as _;\n\nuse bytes::BytesMut;\nuse camino_tempfile::tempfile;\nuse criterion::{BatchSize, Bencher, Criterion, criterion_group, criterion_main};\nuse itertools::Itertools as _;\nuse postgres_ffi::v17::wal_generator::{LogicalMessageGenerator, WalGenerator};\nuse pprof::criterion::{Output, PProfProfiler};\nuse safekeeper::receive_wal::{self, WalAcceptor};\nuse safekeeper::safekeeper::{\n    AcceptorProposerMessage, AppendRequest, AppendRequestHeader, ProposerAcceptorMessage,\n};\nuse safekeeper::test_utils::Env;\nuse safekeeper_api::membership::SafekeeperGeneration as Generation;\nuse tokio::io::AsyncWriteExt as _;\nuse utils::id::{NodeId, TenantTimelineId};\nuse utils::lsn::Lsn;\n\nconst KB: usize = 1024;\nconst MB: usize = 1024 * KB;\nconst GB: usize = 1024 * MB;\n\n/// Use jemalloc and enable profiling, to mirror bin/safekeeper.rs.\n#[global_allocator]\nstatic GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;\n\n#[allow(non_upper_case_globals)]\n#[unsafe(export_name = \"malloc_conf\")]\npub static malloc_conf: &[u8] = b\"prof:true,prof_active:true,lg_prof_sample:21\\0\";\n\n// Register benchmarks with Criterion.\ncriterion_group!(\n    name = benches;\n    config = Criterion::default().with_profiler(PProfProfiler::new(100, Output::Flamegraph(None)));\n    targets = bench_process_msg,\n    bench_wal_acceptor,\n    bench_wal_acceptor_throughput,\n    bench_file_write,\n    bench_bytes_reserve,\n);\ncriterion_main!(benches);\n\n/// Benchmarks SafeKeeper::process_msg() as time per message and throughput. Each message is an\n/// AppendRequest with a single WAL record containing an XlLogicalMessage of varying size. When\n/// measuring throughput, only the logical message payload is considered, excluding\n/// segment/page/record headers.\nfn bench_process_msg(c: &mut Criterion) {\n    let mut g = c.benchmark_group(\"process_msg\");\n    for fsync in [false, true] {\n        for commit in [false, true] {\n            for size in [8, KB, 8 * KB, 128 * KB, MB] {\n                // Kind of weird to change the group throughput per benchmark, but it's the only way\n                // to vary it per benchmark. It works.\n                g.throughput(criterion::Throughput::Bytes(size as u64));\n                g.bench_function(format!(\"fsync={fsync}/commit={commit}/size={size}\"), |b| {\n                    run_bench(b, size, fsync, commit).unwrap()\n                });\n            }\n        }\n    }\n\n    // The actual benchmark. If commit is true, advance the commit LSN on every message.\n    fn run_bench(b: &mut Bencher, size: usize, fsync: bool, commit: bool) -> anyhow::Result<()> {\n        let runtime = tokio::runtime::Builder::new_current_thread() // single is fine, sync IO only\n            .enable_all()\n            .build()?;\n\n        // Construct the payload. The prefix counts towards the payload (including NUL terminator).\n        let prefix = c\"p\";\n        let prefixlen = prefix.to_bytes_with_nul().len();\n        assert!(size >= prefixlen);\n        let message = vec![0; size - prefixlen];\n\n        let walgen = &mut WalGenerator::new(LogicalMessageGenerator::new(prefix, &message), Lsn(0));\n\n        // Set up the Safekeeper.\n        let env = Env::new(fsync)?;\n        let mut safekeeper = runtime.block_on(env.make_safekeeper(\n            NodeId(1),\n            TenantTimelineId::generate(),\n            Lsn(0),\n        ))?;\n\n        b.iter_batched_ref(\n            // Pre-construct WAL records and requests. Criterion will batch them.\n            || {\n                let (lsn, record) = walgen.next().expect(\"endless WAL\");\n                ProposerAcceptorMessage::AppendRequest(AppendRequest {\n                    h: AppendRequestHeader {\n                        generation: Generation::new(0),\n                        term: 1,\n                        begin_lsn: lsn,\n                        end_lsn: lsn + record.len() as u64,\n                        commit_lsn: if commit { lsn } else { Lsn(0) }, // commit previous record\n                        truncate_lsn: Lsn(0),\n                    },\n                    wal_data: record,\n                })\n            },\n            // Benchmark message processing (time per message).\n            |msg| {\n                runtime\n                    .block_on(safekeeper.process_msg(msg))\n                    .expect(\"message failed\")\n            },\n            BatchSize::SmallInput, // automatically determine a batch size\n        );\n        Ok(())\n    }\n}\n\n/// Benchmarks WalAcceptor message processing time by sending it a batch of WAL records and waiting\n/// for it to confirm that the last LSN has been flushed to storage. We pipeline a bunch of messages\n/// instead of measuring each individual message to amortize costs (e.g. fsync), which is more\n/// realistic. Records are XlLogicalMessage with a tiny payload (~64 bytes per record including\n/// headers). Records are pre-constructed to avoid skewing the benchmark.\n///\n/// TODO: add benchmarks with in-memory storage, see comment on `Env::make_safekeeper()`:\nfn bench_wal_acceptor(c: &mut Criterion) {\n    let mut g = c.benchmark_group(\"wal_acceptor\");\n    for fsync in [false, true] {\n        for n in [1, 100, 10000] {\n            g.bench_function(format!(\"fsync={fsync}/n={n}\"), |b| {\n                run_bench(b, n, fsync).unwrap()\n            });\n        }\n    }\n\n    /// The actual benchmark. n is the number of WAL records to send in a pipelined batch.\n    fn run_bench(b: &mut Bencher, n: usize, fsync: bool) -> anyhow::Result<()> {\n        let runtime = tokio::runtime::Runtime::new()?; // needs multithreaded\n\n        let env = Env::new(fsync)?;\n        let walgen =\n            &mut WalGenerator::new(LogicalMessageGenerator::new(c\"prefix\", b\"message\"), Lsn(0));\n\n        // Create buffered channels that can fit all requests, to avoid blocking on channels.\n        let (msg_tx, msg_rx) = tokio::sync::mpsc::channel(n);\n        let (reply_tx, mut reply_rx) = tokio::sync::mpsc::channel(n);\n\n        // Spawn the WalAcceptor task.\n        runtime.block_on(async {\n            // TODO: WalAcceptor doesn't actually need a full timeline, only\n            // Safekeeper::process_msg(). Consider decoupling them to simplify the setup.\n            let tli = env\n                .make_timeline(NodeId(1), TenantTimelineId::generate(), Lsn(0))\n                .await?\n                .wal_residence_guard()\n                .await?;\n            WalAcceptor::spawn(tli, msg_rx, reply_tx, Some(0));\n            anyhow::Ok(())\n        })?;\n\n        b.iter_batched(\n            // Pre-construct a batch of WAL records and requests.\n            || {\n                walgen\n                    .take(n)\n                    .map(|(lsn, record)| AppendRequest {\n                        h: AppendRequestHeader {\n                            generation: Generation::new(0),\n                            term: 1,\n                            begin_lsn: lsn,\n                            end_lsn: lsn + record.len() as u64,\n                            commit_lsn: Lsn(0),\n                            truncate_lsn: Lsn(0),\n                        },\n                        wal_data: record,\n                    })\n                    .collect_vec()\n            },\n            // Benchmark batch ingestion (time per batch).\n            |reqs| {\n                runtime.block_on(async {\n                    let final_lsn = reqs.last().unwrap().h.end_lsn;\n                    // Stuff all the messages into the buffered channel to pipeline them.\n                    for req in reqs {\n                        let msg = ProposerAcceptorMessage::AppendRequest(req);\n                        msg_tx.send(msg).await.expect(\"send failed\");\n                    }\n                    // Wait for the last message to get flushed.\n                    while let Some(reply) = reply_rx.recv().await {\n                        if let AcceptorProposerMessage::AppendResponse(resp) = reply {\n                            if resp.flush_lsn >= final_lsn {\n                                return;\n                            }\n                        }\n                    }\n                    panic!(\"disconnected\")\n                })\n            },\n            BatchSize::PerIteration, // only run one request batch at a time\n        );\n        Ok(())\n    }\n}\n\n/// Benchmarks WalAcceptor throughput by sending 1 GB of data with varying message sizes and waiting\n/// for the last LSN to be flushed to storage. Only the actual message payload counts towards\n/// throughput, headers are excluded and considered overhead. Records are XlLogicalMessage.\n///\n/// To avoid running out of memory, messages are constructed during the benchmark.\nfn bench_wal_acceptor_throughput(c: &mut Criterion) {\n    const VOLUME: usize = GB; // NB: excludes message/page/segment headers and padding\n\n    let mut g = c.benchmark_group(\"wal_acceptor_throughput\");\n    g.sample_size(10);\n    g.throughput(criterion::Throughput::Bytes(VOLUME as u64));\n\n    for fsync in [false, true] {\n        for commit in [false, true] {\n            for size in [KB, 8 * KB, 128 * KB, MB] {\n                assert_eq!(VOLUME % size, 0, \"volume must be divisible by size\");\n                let count = VOLUME / size;\n                g.bench_function(format!(\"fsync={fsync}/commit={commit}/size={size}\"), |b| {\n                    run_bench(b, count, size, fsync, commit).unwrap()\n                });\n            }\n        }\n    }\n\n    /// The actual benchmark. size is the payload size per message, count is the number of messages.\n    /// If commit is true, advance the commit LSN on each message.\n    fn run_bench(\n        b: &mut Bencher,\n        count: usize,\n        size: usize,\n        fsync: bool,\n        commit: bool,\n    ) -> anyhow::Result<()> {\n        let runtime = tokio::runtime::Runtime::new()?; // needs multithreaded\n\n        // Construct the payload. The prefix counts towards the payload (including NUL terminator).\n        let prefix = c\"p\";\n        let prefixlen = prefix.to_bytes_with_nul().len();\n        assert!(size >= prefixlen);\n        let message = vec![0; size - prefixlen];\n\n        let walgen = &mut WalGenerator::new(LogicalMessageGenerator::new(prefix, &message), Lsn(0));\n\n        // Construct and spawn the WalAcceptor task.\n        let env = Env::new(fsync)?;\n\n        let (msg_tx, msg_rx) = tokio::sync::mpsc::channel(receive_wal::MSG_QUEUE_SIZE);\n        let (reply_tx, mut reply_rx) = tokio::sync::mpsc::channel(receive_wal::REPLY_QUEUE_SIZE);\n\n        runtime.block_on(async {\n            let tli = env\n                .make_timeline(NodeId(1), TenantTimelineId::generate(), Lsn(0))\n                .await?\n                .wal_residence_guard()\n                .await?;\n            WalAcceptor::spawn(tli, msg_rx, reply_tx, Some(0));\n            anyhow::Ok(())\n        })?;\n\n        // Ingest the WAL.\n        b.iter(|| {\n            runtime.block_on(async {\n                let reqgen = walgen.take(count).map(|(lsn, record)| AppendRequest {\n                    h: AppendRequestHeader {\n                        generation: Generation::new(0),\n                        term: 1,\n                        begin_lsn: lsn,\n                        end_lsn: lsn + record.len() as u64,\n                        commit_lsn: if commit { lsn } else { Lsn(0) }, // commit previous record\n                        truncate_lsn: Lsn(0),\n                    },\n                    wal_data: record,\n                });\n\n                // Send requests.\n                for req in reqgen {\n                    _ = reply_rx.try_recv(); // discard any replies, to avoid blocking\n                    let msg = ProposerAcceptorMessage::AppendRequest(req);\n                    msg_tx.send(msg).await.expect(\"send failed\");\n                }\n\n                // Wait for last message to get flushed.\n                while let Some(reply) = reply_rx.recv().await {\n                    if let AcceptorProposerMessage::AppendResponse(resp) = reply {\n                        if resp.flush_lsn >= walgen.lsn {\n                            return;\n                        }\n                    }\n                }\n                panic!(\"disconnected\")\n            })\n        });\n        Ok(())\n    }\n}\n\n/// Benchmarks OS write throughput by appending blocks of a given size to a file. This is intended\n/// to compare Tokio and stdlib writes, and give a baseline for optimal WAL throughput.\nfn bench_file_write(c: &mut Criterion) {\n    let mut g = c.benchmark_group(\"file_write\");\n\n    for kind in [\"stdlib\", \"tokio\"] {\n        for fsync in [false, true] {\n            for size in [8, KB, 8 * KB, 128 * KB, MB] {\n                // Kind of weird to change the group throughput per benchmark, but it's the only way to\n                // vary it per benchmark. It works.\n                g.throughput(criterion::Throughput::Bytes(size as u64));\n                g.bench_function(\n                    format!(\"{kind}/fsync={fsync}/size={size}\"),\n                    |b| match kind {\n                        \"stdlib\" => run_bench_stdlib(b, size, fsync).unwrap(),\n                        \"tokio\" => run_bench_tokio(b, size, fsync).unwrap(),\n                        name => panic!(\"unknown kind {name}\"),\n                    },\n                );\n            }\n        }\n    }\n\n    fn run_bench_stdlib(b: &mut Bencher, size: usize, fsync: bool) -> anyhow::Result<()> {\n        let mut file = tempfile()?;\n        let buf = vec![0u8; size];\n\n        b.iter(|| {\n            file.write_all(&buf).unwrap();\n            file.flush().unwrap();\n            if fsync {\n                file.sync_data().unwrap();\n            }\n        });\n\n        Ok(())\n    }\n\n    fn run_bench_tokio(b: &mut Bencher, size: usize, fsync: bool) -> anyhow::Result<()> {\n        let runtime = tokio::runtime::Runtime::new()?; // needs multithreaded\n\n        let mut file = tokio::fs::File::from_std(tempfile()?);\n        let buf = vec![0u8; size];\n\n        b.iter(|| {\n            runtime.block_on(async {\n                file.write_all(&buf).await.unwrap();\n                file.flush().await.unwrap();\n                if fsync {\n                    file.sync_data().await.unwrap();\n                }\n            })\n        });\n\n        Ok(())\n    }\n}\n\n/// Benchmarks the cost of memory allocations when receiving WAL messages. This emulates the logic\n/// in FeMessage::parse, which extends the read buffer. It is primarily intended to test jemalloc.\nfn bench_bytes_reserve(c: &mut Criterion) {\n    let mut g = c.benchmark_group(\"bytes_reserve\");\n    for size in [1, 64, KB, 8 * KB, 128 * KB] {\n        g.throughput(criterion::Throughput::Bytes(size as u64));\n        g.bench_function(format!(\"size={size}\"), |b| run_bench(b, size).unwrap());\n    }\n\n    fn run_bench(b: &mut Bencher, size: usize) -> anyhow::Result<()> {\n        let mut bytes = BytesMut::new();\n        let data = vec![0; size];\n\n        b.iter(|| {\n            bytes.reserve(size);\n            bytes.extend_from_slice(&data);\n            bytes.split_to(size).freeze();\n        });\n\n        Ok(())\n    }\n}\n"
  },
  {
    "path": "safekeeper/client/Cargo.toml",
    "content": "[package]\nname = \"safekeeper_client\"\nversion = \"0.1.0\"\nedition.workspace = true\nlicense.workspace = true\n\n[dependencies]\nhttp-utils.workspace = true\nsafekeeper_api.workspace = true\nthiserror.workspace = true\nreqwest = { workspace = true, features = [ \"stream\" ] }\nserde.workspace = true\nutils.workspace = true\nworkspace_hack = { version = \"0.1\", path = \"../../workspace_hack\" }\n"
  },
  {
    "path": "safekeeper/client/src/lib.rs",
    "content": "pub mod mgmt_api;\n"
  },
  {
    "path": "safekeeper/client/src/mgmt_api.rs",
    "content": "//! Safekeeper http client.\n//!\n//! Partially copied from pageserver client; some parts might be better to be\n//! united.\n\nuse std::error::Error as _;\n\nuse http_utils::error::HttpErrorBody;\nuse reqwest::{IntoUrl, Method, Response, StatusCode};\nuse safekeeper_api::models::{\n    self, PullTimelineRequest, PullTimelineResponse, SafekeeperStatus, SafekeeperUtilization,\n    TimelineCreateRequest,\n};\nuse utils::id::{NodeId, TenantId, TimelineId};\nuse utils::logging::SecretString;\n\n#[derive(Debug, Clone)]\npub struct Client {\n    mgmt_api_endpoint: String,\n    authorization_header: Option<SecretString>,\n    client: reqwest::Client,\n}\n\n#[derive(thiserror::Error, Debug)]\npub enum Error {\n    /// Failed to receive body (reqwest error).\n    #[error(\"receive body: {0}{}\", .0.source().map(|e| format!(\": {e}\")).unwrap_or_default())]\n    ReceiveBody(reqwest::Error),\n\n    /// Status is not ok, but failed to parse body as `HttpErrorBody`.\n    #[error(\"receive error body: {0}\")]\n    ReceiveErrorBody(String),\n\n    /// Status is not ok; parsed error in body as `HttpErrorBody`.\n    #[error(\"safekeeper API: {1}\")]\n    ApiError(StatusCode, String),\n\n    #[error(\"Cancelled\")]\n    Cancelled,\n\n    #[error(\"request timed out: {0}\")]\n    Timeout(String),\n}\n\npub type Result<T> = std::result::Result<T, Error>;\n\npub trait ResponseErrorMessageExt: Sized {\n    fn error_from_body(self) -> impl std::future::Future<Output = Result<Self>> + Send;\n}\n\n/// If status is not ok, try to extract error message from the body.\nimpl ResponseErrorMessageExt for reqwest::Response {\n    async fn error_from_body(self) -> Result<Self> {\n        let status = self.status();\n        if status.is_success() {\n            return Ok(self);\n        }\n\n        let url = self.url().to_owned();\n        Err(match self.json::<HttpErrorBody>().await {\n            Ok(HttpErrorBody { msg }) => Error::ApiError(status, msg),\n            Err(_) => {\n                Error::ReceiveErrorBody(format!(\"http error ({}) at {}.\", status.as_u16(), url))\n            }\n        })\n    }\n}\n\nimpl Client {\n    pub fn new(\n        client: reqwest::Client,\n        mgmt_api_endpoint: String,\n        jwt: Option<SecretString>,\n    ) -> Self {\n        Self {\n            mgmt_api_endpoint,\n            authorization_header: jwt\n                .map(|jwt| SecretString::from(format!(\"Bearer {}\", jwt.get_contents()))),\n            client,\n        }\n    }\n\n    pub async fn create_timeline(&self, req: &TimelineCreateRequest) -> Result<reqwest::Response> {\n        let uri = format!(\"{}/v1/tenant/timeline\", self.mgmt_api_endpoint);\n        let resp = self.post(&uri, req).await?;\n        Ok(resp)\n    }\n\n    pub async fn pull_timeline(&self, req: &PullTimelineRequest) -> Result<PullTimelineResponse> {\n        let uri = format!(\"{}/v1/pull_timeline\", self.mgmt_api_endpoint);\n        let resp = self.post(&uri, req).await?;\n        resp.json().await.map_err(Error::ReceiveBody)\n    }\n\n    pub async fn exclude_timeline(\n        &self,\n        tenant_id: TenantId,\n        timeline_id: TimelineId,\n        req: &models::TimelineMembershipSwitchRequest,\n    ) -> Result<models::TimelineDeleteResult> {\n        let uri = format!(\n            \"{}/v1/tenant/{}/timeline/{}/exclude\",\n            self.mgmt_api_endpoint, tenant_id, timeline_id\n        );\n        let resp = self.put(&uri, req).await?;\n        resp.json().await.map_err(Error::ReceiveBody)\n    }\n\n    pub async fn delete_timeline(\n        &self,\n        tenant_id: TenantId,\n        timeline_id: TimelineId,\n    ) -> Result<models::TimelineDeleteResult> {\n        let uri = format!(\n            \"{}/v1/tenant/{}/timeline/{}\",\n            self.mgmt_api_endpoint, tenant_id, timeline_id\n        );\n        let resp = self\n            .request_maybe_body(Method::DELETE, &uri, None::<()>)\n            .await?;\n        resp.json().await.map_err(Error::ReceiveBody)\n    }\n\n    pub async fn switch_timeline_membership(\n        &self,\n        tenant_id: TenantId,\n        timeline_id: TimelineId,\n        req: &models::TimelineMembershipSwitchRequest,\n    ) -> Result<models::TimelineMembershipSwitchResponse> {\n        let uri = format!(\n            \"{}/v1/tenant/{}/timeline/{}/membership\",\n            self.mgmt_api_endpoint, tenant_id, timeline_id\n        );\n        let resp = self.put(&uri, req).await?;\n        resp.json().await.map_err(Error::ReceiveBody)\n    }\n\n    pub async fn delete_tenant(&self, tenant_id: TenantId) -> Result<models::TenantDeleteResult> {\n        let uri = format!(\"{}/v1/tenant/{}\", self.mgmt_api_endpoint, tenant_id);\n        let resp = self\n            .request_maybe_body(Method::DELETE, &uri, None::<()>)\n            .await?;\n        resp.json().await.map_err(Error::ReceiveBody)\n    }\n\n    pub async fn bump_timeline_term(\n        &self,\n        tenant_id: TenantId,\n        timeline_id: TimelineId,\n        req: &models::TimelineTermBumpRequest,\n    ) -> Result<models::TimelineTermBumpResponse> {\n        let uri = format!(\n            \"{}/v1/tenant/{}/timeline/{}/term_bump\",\n            self.mgmt_api_endpoint, tenant_id, timeline_id\n        );\n        let resp = self.post(&uri, req).await?;\n        resp.json().await.map_err(Error::ReceiveBody)\n    }\n\n    pub async fn timeline_status(\n        &self,\n        tenant_id: TenantId,\n        timeline_id: TimelineId,\n    ) -> Result<Response> {\n        let uri = format!(\n            \"{}/v1/tenant/{}/timeline/{}\",\n            self.mgmt_api_endpoint, tenant_id, timeline_id\n        );\n        self.get(&uri).await\n    }\n\n    pub async fn snapshot(\n        &self,\n        tenant_id: TenantId,\n        timeline_id: TimelineId,\n        stream_to: NodeId,\n    ) -> Result<reqwest::Response> {\n        let uri = format!(\n            \"{}/v1/tenant/{}/timeline/{}/snapshot/{}\",\n            self.mgmt_api_endpoint, tenant_id, timeline_id, stream_to.0\n        );\n        self.get(&uri).await\n    }\n\n    pub async fn status(&self) -> Result<SafekeeperStatus> {\n        let uri = format!(\"{}/v1/status\", self.mgmt_api_endpoint);\n        let resp = self.get(&uri).await?;\n        resp.json().await.map_err(Error::ReceiveBody)\n    }\n\n    pub async fn utilization(&self) -> Result<SafekeeperUtilization> {\n        let uri = format!(\"{}/v1/utilization\", self.mgmt_api_endpoint);\n        let resp = self.get(&uri).await?;\n        resp.json().await.map_err(Error::ReceiveBody)\n    }\n\n    async fn post<B: serde::Serialize, U: IntoUrl>(\n        &self,\n        uri: U,\n        body: B,\n    ) -> Result<reqwest::Response> {\n        self.request(Method::POST, uri, body).await\n    }\n\n    async fn put<B: serde::Serialize, U: IntoUrl>(\n        &self,\n        uri: U,\n        body: B,\n    ) -> Result<reqwest::Response> {\n        self.request(Method::PUT, uri, body).await\n    }\n\n    async fn get<U: IntoUrl>(&self, uri: U) -> Result<reqwest::Response> {\n        self.request(Method::GET, uri, ()).await\n    }\n\n    /// Send the request and check that the status code is good.\n    async fn request<B: serde::Serialize, U: reqwest::IntoUrl>(\n        &self,\n        method: Method,\n        uri: U,\n        body: B,\n    ) -> Result<reqwest::Response> {\n        self.request_maybe_body(method, uri, Some(body)).await\n    }\n\n    /// Send the request and check that the status code is good, with an optional body.\n    async fn request_maybe_body<B: serde::Serialize, U: reqwest::IntoUrl>(\n        &self,\n        method: Method,\n        uri: U,\n        body: Option<B>,\n    ) -> Result<reqwest::Response> {\n        let res = self.request_noerror(method, uri, body).await?;\n        let response = res.error_from_body().await?;\n        Ok(response)\n    }\n\n    /// Just send the request.\n    async fn request_noerror<B: serde::Serialize, U: reqwest::IntoUrl>(\n        &self,\n        method: Method,\n        uri: U,\n        body: Option<B>,\n    ) -> Result<reqwest::Response> {\n        let mut req = self.client.request(method, uri);\n        if let Some(value) = &self.authorization_header {\n            req = req.header(reqwest::header::AUTHORIZATION, value.get_contents())\n        }\n        if let Some(body) = body {\n            req = req.json(&body);\n        }\n        req.send().await.map_err(Error::ReceiveBody)\n    }\n}\n"
  },
  {
    "path": "safekeeper/spec/.gitignore",
    "content": "*TTrace*\n*.toolbox/\nstates/\n"
  },
  {
    "path": "safekeeper/spec/MCProposerAcceptorReconfig.tla",
    "content": "---- MODULE MCProposerAcceptorReconfig ----\nEXTENDS TLC, ProposerAcceptorReconfig\n\n\\* Augments the spec with model checking constraints.\n\n\\* It slightly duplicates MCProposerAcceptorStatic, but we can't EXTENDS it\n\\* because it EXTENDS ProposerAcceptorStatic in turn. The duplication isn't big\n\\* anyway.\n\n\\* For model checking.\nCONSTANTS\n  max_entries, \\* model constraint: max log entries acceptor/proposer can hold\n  max_term, \\* model constraint: max allowed term\n  max_generation \\* mode constraint: max config generation\n\nASSUME max_entries \\in Nat /\\ max_term \\in Nat /\\ max_generation \\in Nat\n\n\\* Model space constraint.\nStateConstraint == /\\ \\A p \\in proposers:\n                     /\\ prop_state[p].term <= max_term\n                     /\\ Len(prop_state[p].wal) <= max_entries\n                   /\\ conf_store.generation <= max_generation\n\n\\* Sets of proposers and acceptors and symmetric because we don't take any\n\\* actions depending on some concrete proposer/acceptor (like IF p = p1 THEN\n\\* ...)\nProposerAcceptorSymmetry == Permutations(proposers) \\union Permutations(acceptors)\n\n\\* enforce order of the vars in the error trace with ALIAS\n\\* Note that ALIAS is supported only since version 1.8.0 which is pre-release\n\\* as of writing this.\nAlias == [\n           prop_state |-> prop_state,\n           prop_conf |-> prop_conf,\n           acc_state |-> acc_state,\n           acc_conf |-> acc_conf,\n           committed |-> committed,\n           conf_store |-> conf_store\n         ]\n\n====\n"
  },
  {
    "path": "safekeeper/spec/MCProposerAcceptorStatic.tla",
    "content": "---- MODULE MCProposerAcceptorStatic ----\nEXTENDS TLC, ProposerAcceptorStatic\n\n\\* Augments the spec with model checking constraints.\n\n\\* Note that MCProposerAcceptorReconfig duplicates it and might need to\n\\* be updated as well.\n\n\\* For model checking.\nCONSTANTS\n  max_entries, \\* model constraint: max log entries acceptor/proposer can hold\n  max_term \\* model constraint: max allowed term\n\nASSUME max_entries \\in Nat /\\ max_term \\in Nat\n\n\\* Model space constraint.\nStateConstraint == \\A p \\in proposers:\n                    /\\ prop_state[p].term <= max_term\n                    /\\ Len(prop_state[p].wal) <= max_entries\n\\* Sets of proposers and acceptors are symmetric because we don't take any\n\\* actions depending on some concrete proposer/acceptor (like IF p = p1 THEN\n\\* ...)\nProposerAcceptorSymmetry == Permutations(proposers) \\union Permutations(acceptors)\n\n\\* enforce order of the vars in the error trace with ALIAS\n\\* Note that ALIAS is supported only since version 1.8.0 which is pre-release\n\\* as of writing this.\nAlias == [\n           prop_state |-> prop_state,\n           acc_state |-> acc_state,\n           committed |-> committed\n         ]\n\n====\n"
  },
  {
    "path": "safekeeper/spec/ProposerAcceptorReconfig.tla",
    "content": "---- MODULE ProposerAcceptorReconfig ----\n\n(*\n    Spec for https://github.com/neondatabase/neon/blob/538e2312a617c65d489d391892c70b2e4d7407b5/docs/rfcs/035-safekeeper-dynamic-membership-change.md\n\n    Simplifications:\n    - The ones inherited from ProposerAcceptorStatic.\n    - We don't model transient state of the configuration change driver process\n      (storage controller in the implementation). Its actions StartChange and FinishChange\n      are taken based on the persistent state of safekeepers and conf store. The\n      justification for that is the following: once new configuration n is\n      created (e.g with StartChange or FinishChange), any old configuration\n      change driver working on older conf < n will never be able to commit\n      it to the conf store because it is protected by CAS. The\n      propagation of these older confs is still possible though, and\n      spec allows to do it through acceptors.\n      Plus the model is already pretty huge.\n    - Previous point also means that the FinishChange action is\n      based only on the current state of safekeepers, not from\n      the past. That's ok because while individual\n      acceptor <last_log_term, flush_lsn> may go down,\n      quorum one never does. So the FinishChange\n      condition which collects max of the quorum may get\n      only more strict over time.\n\n    The invariants expectedly break if any of FinishChange\n    required conditions are removed.\n*)\n\nEXTENDS Integers, Sequences, FiniteSets, TLC\n\nVARIABLES\n  \\* state which is the same in the static spec\n  prop_state,\n  acc_state,\n  committed,\n  elected_history,\n  \\* reconfiguration only state\n  prop_conf, \\* prop_conf[p] is current configuration of proposer p\n  acc_conf, \\* acc_conf[a] is current configuration of acceptor a\n  conf_store \\* configuration in the configuration store.\n\nCONSTANT\n  acceptors,\n  proposers\n\nCONSTANT NULL\n\n\\* Import ProposerAcceptorStatic under PAS.\n\\*\n\\* Note that all vars and consts are named the same and thus substituted\n\\* implicitly.\nPAS == INSTANCE ProposerAcceptorStatic\n\n\\********************************************************************************\n\\* Helpers\n\\********************************************************************************\n\n\\********************************************************************************\n\\* Type assertion\n\\********************************************************************************\n\n\\* Is c a valid config?\nIsConfig(c) ==\n    /\\ DOMAIN c = {\"generation\", \"members\", \"newMembers\"}\n    \\* Unique id of the configuration.\n    /\\ c.generation \\in Nat\n    /\\ c.members \\in SUBSET acceptors\n    \\* newMembers is NULL when it is not a joint conf.\n    /\\ \\/ c.newMembers = NULL\n       \\/ c.newMembers \\in SUBSET acceptors\n\nTypeOk ==\n    /\\ PAS!TypeOk\n    /\\ \\A p \\in proposers: IsConfig(prop_conf[p])\n    /\\ \\A a \\in acceptors: IsConfig(acc_conf[a])\n    /\\ IsConfig(conf_store)\n\n\\********************************************************************************\n\\* Initial\n\\********************************************************************************\n\nInit ==\n  /\\ PAS!Init\n  /\\ \\E init_members \\in SUBSET acceptors:\n       LET init_conf == [generation |-> 1, members |-> init_members, newMembers |-> NULL] IN\n           \\* refer to RestartProposer why it is not NULL\n           /\\ prop_conf = [p \\in proposers |-> init_conf]\n           /\\ acc_conf = [a \\in acceptors |-> init_conf]\n           /\\ conf_store = init_conf\n           \\* We could start with anything, but to reduce state space state with\n           \\* the most reasonable total acceptors - 1 conf size, which e.g.\n           \\* makes basic {a1} -> {a2} change in {a1, a2} acceptors and {a1, a2,\n           \\* a3} -> {a2, a3, a4} in {a1, a2, a3, a4} acceptors models even in\n           \\* the smallest models with single change.\n           /\\ Cardinality(init_members) = Cardinality(acceptors) - 1\n\n\\********************************************************************************\n\\* Actions\n\\********************************************************************************\n\n\\* Proposer p loses all state, restarting. In the static spec we bump restarted\n\\* proposer term to max of some quorum + 1 which is a minimal term which can win\n\\* election. With reconfigurations it's harder to calculate such a term, so keep\n\\* it simple and take random acceptor one + 1.\n\\*\n\\* Also make proposer to adopt configuration of another random acceptor. In the\n\\* impl proposer starts with NULL configuration until handshake with first\n\\* acceptor. Removing this NULL special case makes the spec a bit simpler.\nRestartProposer(p) ==\n    /\\ \\E a \\in acceptors: PAS!RestartProposerWithTerm(p, acc_state[a].term + 1)\n    /\\ \\E a \\in acceptors: prop_conf' = [prop_conf EXCEPT ![p] = acc_conf[a]]\n    /\\ UNCHANGED <<acc_conf, conf_store>>\n\n\\* Acceptor a immediately votes for proposer p.\nVote(p, a) ==\n    \\* Configuration must be the same.\n    /\\ prop_conf[p].generation = acc_conf[a].generation\n    \\* And a is expected be a member of it. This is likely redundant as long as\n    \\* becoming leader checks membership (though vote also contributes to max\n    \\* <term, lsn> calculation).\n    /\\ \\/ a \\in prop_conf[p].members\n       \\/ (prop_conf[p].newMembers /= NULL) /\\ (a \\in prop_conf[p].newMembers)\n    /\\ PAS!Vote(p, a)\n    /\\ UNCHANGED <<prop_conf, acc_conf, conf_store>>\n\n\\* Proposer p gets elected.\nBecomeLeader(p) ==\n    /\\ prop_state[p].state = \"campaign\"\n    \\* Votes must form quorum in both sets (if the newMembers exists).\n    /\\ PAS!FormsQuorum(DOMAIN prop_state[p].votes, prop_conf[p].members)\n    /\\ \\/ prop_conf[p].newMembers = NULL\n       \\* TLA+ disjunction evaluation doesn't short-circuit for a good reason:\n       \\* https://groups.google.com/g/tlaplus/c/U6tOJ4dsjVM/m/UdOznPCVBwAJ\n       \\* so repeat the null check.\n       \\/ (prop_conf[p].newMembers /= NULL)  /\\ (PAS!FormsQuorum(DOMAIN prop_state[p].votes, prop_conf[p].newMembers))\n    \\* DoBecomeLeader will copy WAL of the highest voter to proposer's WAL, so\n    \\* ensure its conf is still the same. In the impl WAL fetching also has to\n    \\* check the configuration.\n    /\\ prop_conf[p].generation = acc_conf[PAS!MaxVoteAcc(p)].generation\n    /\\ \\A a \\in DOMAIN prop_state[p].votes: prop_conf[p].generation = acc_conf[a].generation\n    /\\ PAS!DoBecomeLeader(p)\n    /\\ UNCHANGED <<prop_conf, acc_conf, conf_store>>\n\nUpdateTerm(p, a) ==\n    /\\ PAS!UpdateTerm(p, a)\n    /\\ UNCHANGED <<prop_conf, acc_conf, conf_store>>\n\nTruncateWal(p, a) ==\n    /\\ prop_state[p].state = \"leader\"\n    \\* Configuration must be the same.\n    /\\ prop_conf[p].generation = acc_conf[a].generation\n    /\\ PAS!TruncateWal(p, a)\n    /\\ UNCHANGED <<prop_conf, acc_conf, conf_store>>\n\nNewEntry(p) ==\n    /\\ PAS!NewEntry(p)\n    /\\ UNCHANGED <<prop_conf, acc_conf, conf_store>>\n\nAppendEntry(p, a) ==\n    /\\ prop_state[p].state = \"leader\"\n    \\* Configuration must be the same.\n    /\\ prop_conf[p].generation = acc_conf[a].generation\n    \\* And a is member of it. Ignoring this likely wouldn't hurt, but not useful\n    \\* either.\n    /\\ \\/ a \\in prop_conf[p].members\n       \\/ (prop_conf[p].newMembers /= NULL) /\\ (a \\in prop_conf[p].newMembers)\n    /\\ PAS!AppendEntry(p, a)\n    /\\ UNCHANGED <<prop_conf, acc_conf, conf_store>>\n\n\\* see PAS!CommitEntries for comments.\nCommitEntries(p) ==\n    /\\ prop_state[p].state = \"leader\"\n    /\\ \\E q1 \\in PAS!AllMinQuorums(prop_conf[p].members):\n           LET q1_commit_lsn == PAS!QuorumCommitLsn(p, q1) IN\n               \\* Configuration must be the same.\n               /\\ \\A a \\in q1: prop_conf[p].generation = acc_conf[a].generation\n               /\\ q1_commit_lsn /= NULL\n               \\* We must collect acks from both quorums, if newMembers is present.\n               /\\ IF prop_conf[p].newMembers = NULL THEN\n                      PAS!DoCommitEntries(p, q1_commit_lsn)\n                  ELSE\n                      \\E q2 \\in PAS!AllMinQuorums(prop_conf[p].newMembers):\n                          LET q2_commit_lsn == PAS!QuorumCommitLsn(p, q2) IN\n                              \\* Configuration must be the same.\n                              /\\ \\A a \\in q1: prop_conf[p].generation = acc_conf[a].generation\n                              /\\ q2_commit_lsn /= NULL\n                              /\\ PAS!DoCommitEntries(p, PAS!Min(q1_commit_lsn, q2_commit_lsn))\n    /\\ UNCHANGED <<prop_conf, acc_conf, conf_store>>\n\n\\* Proposer p adopts higher conf c from conf store or from some acceptor.\nProposerSwitchConf(p) ==\n    /\\ \\E c \\in ({conf_store} \\union {acc_conf[a]: a \\in acceptors}):\n        \\* p's conf is lower than c.\n        /\\ (c.generation > prop_conf[p].generation)\n        \\* We allow to bump conf without restart only when wp is already elected.\n        \\* If it isn't, the votes it has already collected are from the previous\n        \\* configuration and can't be used.\n        \\*\n        \\* So if proposer is in 'campaign' in the impl we would restart preserving\n        \\* conf and increasing term. In the spec this transition is already covered\n        \\* by more a generic RestartProposer, so we don't specify it here.\n        /\\ prop_state[p].state = \"leader\"\n        /\\ prop_conf' = [prop_conf EXCEPT ![p] = c]\n        /\\ UNCHANGED <<prop_state, acc_state, committed, elected_history, acc_conf, conf_store>>\n\n\\* Do CAS on the conf store, starting change into the new_members conf.\nStartChange(new_members) ==\n    \\* Possible only if we don't already have the change in progress.\n    /\\ conf_store.newMembers = NULL\n    \\* Not necessary, but reduces space a bit.\n    /\\ new_members /= conf_store.members\n    /\\ conf_store' = [generation |-> conf_store.generation + 1, members |-> conf_store.members, newMembers |-> new_members]\n    /\\ UNCHANGED <<prop_state, acc_state, committed, elected_history, prop_conf, acc_conf>>\n\n\\* Acceptor's last_log_term.\nAccLastLogTerm(acc) ==\n    PAS!LastLogTerm(PAS!AcceptorTermHistory(acc))\n\n\\* Do CAS on the conf store, transferring joint conf into the newMembers only.\nFinishChange ==\n    \\* have joint conf\n    /\\ conf_store.newMembers /= NULL\n    \\* The conditions for finishing the change are:\n    /\\ \\E qo \\in PAS!AllMinQuorums(conf_store.members):\n           \\* 1) Old majority must be aware of the joint conf.\n           \\* Note: generally the driver can't know current acceptor\n           \\* generation, it can only know that it once had been the\n           \\* expected one, but it might have advanced since then.\n           \\* But as explained at the top of the file if acceptor gen\n           \\* advanced, FinishChange will never be able to complete\n           \\* due to CAS anyway. We use strict equality here because\n           \\* that's what makes sense conceptually (old driver should\n           \\* abandon its attempt if it observes that conf has advanced).\n           /\\ \\A a \\in qo: conf_store.generation = acc_conf[a].generation\n           \\* 2) New member set must have log synced, i.e. some its majority needs\n           \\*    to have <last_log_term, lsn> at least as high as max of some\n           \\*    old majority.\n           \\* 3) Term must be synced, i.e. some majority of the new set must\n           \\*    have term >= than max term of some old majority.\n           \\*    This ensures that two leaders are never elected with the same\n           \\*    term even after config change (which would be bad unless we treat\n           \\*    generation as a part of term which we don't).\n           \\* 4) A majority of the new set must be aware of the joint conf.\n           \\*    This allows to safely destoy acceptor state if it is not a\n           \\*    member of its current conf (which is useful for cleanup after\n           \\*    migration as well as for aborts).\n           /\\ LET sync_pos == PAS!MaxTermLsn({[term |-> AccLastLogTerm(a), lsn |-> PAS!FlushLsn(a)]: a \\in qo})\n                  sync_term == PAS!Maximum({acc_state[a].term: a \\in qo})\n              IN\n                  \\E qn \\in PAS!AllMinQuorums(conf_store.newMembers):\n                      \\A a \\in qn:\n                          /\\ PAS!TermLsnGE([term |-> AccLastLogTerm(a), lsn |-> PAS!FlushLsn(a)], sync_pos)\n                          /\\ acc_state[a].term >= sync_term\n                          \\* The same note as above about strict equality applies here.\n                          /\\ conf_store.generation = acc_conf[a].generation\n    /\\ conf_store' = [generation |-> conf_store.generation + 1, members |-> conf_store.newMembers, newMembers |-> NULL]\n    /\\ UNCHANGED <<prop_state, acc_state, committed, elected_history, prop_conf, acc_conf>>\n\n\\* Do CAS on the conf store, aborting the change in progress.\nAbortChange ==\n    \\* have joint conf\n    /\\ conf_store.newMembers /= NULL\n    /\\ conf_store' = [generation |-> conf_store.generation + 1, members |-> conf_store.members, newMembers |-> NULL]\n    /\\ UNCHANGED <<prop_state, acc_state, committed, elected_history, prop_conf, acc_conf>>\n\n\\* Acceptor a switches to higher configuration from the conf store\n\\* or from some proposer.\nAccSwitchConf(a) ==\n    /\\ \\E c \\in ({conf_store} \\union {prop_conf[p]: p \\in proposers}):\n        /\\ acc_conf[a].generation < c.generation\n        /\\ acc_conf' = [acc_conf EXCEPT ![a] = c]\n        /\\ UNCHANGED <<prop_state, acc_state, committed, elected_history, prop_conf, conf_store>>\n\n\\* Nuke all acceptor state if it is not a member of its current conf. Models\n\\* cleanup after migration/abort.\nAccReset(a) ==\n    /\\ \\/ (acc_conf[a].newMembers = NULL) /\\ (a \\notin acc_conf[a].members)\n       \\/ (acc_conf[a].newMembers /= NULL) /\\ (a \\notin (acc_conf[a].members \\union acc_conf[a].newMembers))\n    /\\ acc_state' = [acc_state EXCEPT ![a] = PAS!InitAcc]\n    \\* Set nextSendLsn to `a` to NULL everywhere. nextSendLsn serves as a mark\n    \\* that elected proposer performed TruncateWal on the acceptor, which isn't\n    \\* true anymore after state reset. In the impl local deletion is expected to\n    \\* terminate all existing connections.\n    /\\ prop_state' = [p \\in proposers |-> [prop_state[p] EXCEPT !.nextSendLsn[a] = NULL]]\n    /\\ UNCHANGED <<committed, elected_history, prop_conf, acc_conf, conf_store>>\n\n\\*******************************************************************************\n\\* Final spec\n\\*******************************************************************************\n\nNext ==\n  \\/ \\E p \\in proposers: RestartProposer(p)\n  \\/ \\E p \\in proposers: \\E a \\in acceptors: Vote(p, a)\n  \\/ \\E p \\in proposers: BecomeLeader(p)\n  \\/ \\E p \\in proposers: \\E a \\in acceptors: UpdateTerm(p, a)\n  \\/ \\E p \\in proposers: \\E a \\in acceptors: TruncateWal(p, a)\n  \\/ \\E p \\in proposers: NewEntry(p)\n  \\/ \\E p \\in proposers: \\E a \\in acceptors: AppendEntry(p, a)\n  \\/ \\E p \\in proposers: CommitEntries(p)\n  \\/ \\E new_members \\in SUBSET acceptors: StartChange(new_members)\n  \\/ FinishChange\n  \\/ AbortChange\n  \\/ \\E p \\in proposers: ProposerSwitchConf(p)\n  \\/ \\E a \\in acceptors: AccSwitchConf(a)\n  \\/ \\E a \\in acceptors: AccReset(a)\n\nSpec == Init /\\ [][Next]_<<prop_state, acc_state, committed, elected_history, prop_conf, acc_conf, conf_store>>\n\n\\********************************************************************************\n\\* Invariants\n\\********************************************************************************\n\nAllConfs ==\n    {conf_store} \\union {prop_conf[p]: p \\in proposers} \\union {acc_conf[a]: a \\in acceptors}\n\n\\* Fairly trivial (given the conf store) invariant that different configurations\n\\* with the same generation are never issued.\nConfigSafety ==\n    \\A c1, c2 \\in AllConfs:\n        (c1.generation = c2.generation) => (c1 = c2)\n\nElectionSafety == PAS!ElectionSafety\n\nElectionSafetyFull == PAS!ElectionSafetyFull\n\nLogIsMonotonic == PAS!LogIsMonotonic\n\nLogSafety == PAS!LogSafety\n\n\\********************************************************************************\n\\* Invariants which don't need to hold, but useful for playing/debugging.\n\\********************************************************************************\n\n\\* Check that we ever switch into non joint conf.\nMaxAccConf == ~ \\E a \\in acceptors:\n    /\\ acc_conf[a].generation = 3\n    /\\ acc_conf[a].newMembers /= NULL\n\nCommittedNotTruncated == PAS!CommittedNotTruncated\n\nMaxTerm == PAS!MaxTerm\n\nMaxStoreConf == conf_store.generation <= 1\n\nMaxAccWalLen == PAS!MaxAccWalLen\n\nMaxCommitLsn == PAS!MaxCommitLsn\n\n====\n"
  },
  {
    "path": "safekeeper/spec/ProposerAcceptorStatic.tla",
    "content": "---- MODULE ProposerAcceptorStatic ----\n\n(*\n  The protocol is very similar to Raft. The key differences are:\n  - Leaders (proposers) are separated from storage nodes (acceptors), which has\n    been already an established way to think about Paxos.\n  - We don't want to stamp each log record with term, so instead carry around\n    term histories which are sequences of <term, LSN where term begins> pairs.\n    As a bonus (and subtlety) this allows the proposer to commit entries from\n    previous terms without writing new records -- if acceptor's log is caught\n    up, update of term history on it updates last_log_term as well.\n*)\n\n\\* Model simplifications:\n\\* - Instant message delivery. Notably, ProposerElected message (TruncateWal action) is not\n\\*   delayed, so we don't attempt to truncate WAL when the same wp already appended something\n\\*   on the acceptor since common point had been calculated (this should be rejected).\n\\* - old WAL is immediately copied to proposer on its election, without on-demand fetch later.\n\n\\* Some ideas how to break it to play around to get a feeling:\n\\* - replace Quorum with BadQuorum.\n\\* - remove 'don't commit entries from previous terms separately' rule in\n\\*   CommitEntries and observe figure 8 from the raft paper.\n\\*   With p2a3t4l4 32 steps error was found in 1h on 80 cores.\n\nEXTENDS Integers, Sequences, FiniteSets, TLC\n\nVARIABLES\n  prop_state, \\* prop_state[p] is state of proposer p\n  acc_state, \\* acc_state[a] is state of acceptor a\n  committed, \\* bag (set) of ever committed <<term, lsn>> entries\n  elected_history \\* counter for elected terms, see TypeOk for details\n\nCONSTANT\n  acceptors,\n  proposers\n\nCONSTANT NULL\n\n\\********************************************************************************\n\\* Helpers\n\\********************************************************************************\n\nMaximum(S) ==\n  (*************************************************************************)\n  (* If S is a set of numbers, then this define Maximum(S) to be the       *)\n  (* maximum of those numbers, or -1 if S is empty.                        *)\n  (*************************************************************************)\n  IF S = {} THEN -1 ELSE CHOOSE n \\in S : \\A m \\in S : n \\geq m\n\n\\* minimum of numbers in the set, error if set is empty\nMinimum(S) == CHOOSE min \\in S : \\A n \\in S : min <= n\n\n\\* Min of two numbers\nMin(a, b) == IF a < b THEN a ELSE b\n\n\\* Sort of 0 for functions\nEmptyF == [x \\in {} |-> 42]\nIsEmptyF(f) == DOMAIN f = {}\n\n\\* Set of values (image) of the function f. Apparently no such builtin.\nRange(f) == {f[x] : x \\in DOMAIN f}\n\n\\* If key k is in function f, map it using l, otherwise insert v. Returns the\n\\* updated function.\nUpsert(f, k, v, l(_)) ==\n    LET new_val == IF k \\in DOMAIN f THEN l(f[k]) ELSE v IN\n        (k :> new_val) @@ f\n\n\\*****************\n\n\\* Does set of acceptors `acc_set` form the quorum in the member set `members`?\n\\* Acceptors not from `members` are excluded (matters only for reconfig).\nFormsQuorum(acc_set, members) ==\n    Cardinality(acc_set \\intersect members) >= (Cardinality(members) \\div 2 + 1)\n\n\\* Like FormsQuorum, but for minimal quorum.\nFormsMinQuorum(acc_set, members) ==\n    Cardinality(acc_set \\intersect members) = (Cardinality(members) \\div 2 + 1)\n\n\\* All sets of acceptors forming minimal quorums in the member set `members`.\nAllQuorums(members) == {subset \\in SUBSET members: FormsQuorum(subset, members)}\nAllMinQuorums(members) == {subset \\in SUBSET acceptors: FormsMinQuorum(subset, members)}\n\n\\* For substituting Quorum and seeing what happens.\nFormsBadQuorum(acc_set, members) ==\n    Cardinality(acc_set \\intersect members) >= (Cardinality(members) \\div 2)\nFormsMinBadQuorum(acc_set, members) ==\n    Cardinality(acc_set \\intersect members) = (Cardinality(members) \\div 2)\nAllBadQuorums(members) == {subset \\in SUBSET acceptors: FormsBadQuorum(subset, members)}\nAllMinBadQuorums(members) == {subset \\in SUBSET acceptors: FormsMinBadQuorum(subset, members)}\n\n\\* flushLsn (end of WAL, i.e. index of next entry) of acceptor a.\nFlushLsn(a) == Len(acc_state[a].wal) + 1\n\n\\* Typedefs. Note that TLA+ Nat includes zero.\nTerms == Nat\nLsns == Nat\n\n\\********************************************************************************\n\\* Type assertion\n\\********************************************************************************\n\\* Defining sets of all possible tuples and using them in TypeOk in usual\n\\* all-tuples constructor is not practical because such definitions force\n\\* TLC to enumerate them, while they are are horribly enormous\n\\* (TLC screams \"Attempted to construct a set with too many elements\").\n\\* So instead check types manually.\n\n\n\\* Term history is a sequence of <term, LSN where term begins> pairs.\nIsTermHistory(th) ==\n    \\A th_entry \\in Range(th): th_entry.term \\in Terms /\\ th_entry.lsn \\in Lsns\n\nIsWal(w) ==\n    \\A i \\in DOMAIN w:\n        /\\ i \\in Lsns\n        /\\ w[i] \\in Terms\n\nTypeOk ==\n    /\\ \\A p \\in proposers:\n        \\* '_' in field names hinders pretty printing\n        \\* https://github.com/tlaplus/tlaplus/issues/1051\n        \\* so use camel case.\n        /\\ DOMAIN prop_state[p] = {\"state\", \"term\", \"votes\", \"termHistory\", \"wal\", \"nextSendLsn\"}\n        \\* In campaign proposer sends RequestVote and waits for acks;\n        \\* in leader he is elected.\n        /\\ prop_state[p].state \\in {\"campaign\", \"leader\"}\n        \\* term for which it will campaign, or won term in leader state\n        /\\ prop_state[p].term \\in Terms\n        \\* votes received\n        /\\ \\A voter \\in DOMAIN prop_state[p].votes: voter \\in acceptors\n        /\\ \\A vote \\in Range(prop_state[p].votes):\n               /\\ IsTermHistory(vote.termHistory)\n               /\\ vote.flushLsn \\in Lsns\n        \\* Proposer's term history. Empty while proposer is in \"campaign\".\n        /\\ IsTermHistory(prop_state[p].termHistory)\n        \\* In the model we identify WAL entries only by <term, LSN> pairs\n        \\* without additional unique id, which is enough for its purposes.\n        \\* It means that with term history fully modeled wal becomes\n        \\* redundant as it can be computed from term history + WAL length.\n        \\* However, we still keep it here and at acceptors as explicit sequence\n        \\* where index is LSN and value is the term to avoid artificial mapping to\n        \\* figure out real entries. It shouldn't bloat model much because this\n        \\* doesn't increase number of distinct states.\n        /\\ IsWal(prop_state[p].wal)\n        \\* Map of acceptor -> next lsn to send. It is set when truncate_wal is\n        \\* done so sending entries is allowed only after that. In the impl TCP\n        \\* ensures this ordering. We use NULL instead of missing value to use\n        \\* EXCEPT in AccReset.\n        /\\ \\A a \\in DOMAIN prop_state[p].nextSendLsn:\n               /\\ a \\in acceptors\n               /\\ prop_state[p].nextSendLsn[a] \\in Lsns \\union {NULL}\n    /\\ \\A a \\in acceptors:\n           /\\ DOMAIN acc_state[a] = {\"term\", \"termHistory\", \"wal\"}\n           /\\ acc_state[a].term \\in Terms\n           /\\ IsTermHistory(acc_state[a].termHistory)\n           /\\ IsWal(acc_state[a].wal)\n    /\\ \\A c \\in committed:\n           /\\ c.term \\in Terms\n           /\\ c.lsn \\in Lsns\n    \\* elected_history is a retrospective map of term -> number of times it was\n    \\* elected, for use in ElectionSafetyFull invariant. For static spec it is\n    \\* fairly convincing that it holds, but with membership change it is less\n    \\* trivial. And as we identify log entries only with <term, lsn>, importance\n    \\* of it is quite high as violation of log safety might go undetected if\n    \\* election safety is violated. Note though that this is not always the\n    \\* case, i.e. you can imagine (and TLC should find) schedule where log\n    \\* safety violation is still detected because two leaders with the same term\n    \\* commit histories which are different in previous terms, so it is not that\n    \\* crucial. Plus if spec allows ElectionSafetyFull violation, likely\n    \\* ElectionSafety will also be violated in some schedules. But neither it\n    \\* should bloat the model too much.\n    /\\ \\A term \\in DOMAIN elected_history:\n           /\\ term \\in Terms\n           /\\ elected_history[term] \\in Nat\n\n\\********************************************************************************\n\\* Initial\n\\********************************************************************************\n\nInitAcc ==\n    [\n        \\* There will be no leader in zero term, 1 is the first\n        \\* real.\n        term |-> 0,\n        \\* Again, leader in term 0 doesn't exist, but we initialize\n        \\* term histories with it to always have common point in\n        \\* them. Lsn is 1 because TLA+ sequences are indexed from 1\n        \\* (we don't want to truncate WAL out of range).\n        termHistory |-> << [term |-> 0, lsn |-> 1] >>,\n        wal |-> << >>\n    ]\n\nInit ==\n    /\\ prop_state = [p \\in proposers |-> [\n                        state |-> \"campaign\",\n                        term |-> 1,\n                        votes |-> EmptyF,\n                        termHistory |-> << >>,\n                        wal |-> << >>,\n                        nextSendLsn |-> [a \\in acceptors |-> NULL]\n                    ]]\n    /\\ acc_state = [a \\in acceptors |-> InitAcc]\n    /\\ committed = {}\n    /\\ elected_history = EmptyF\n\n\n\\********************************************************************************\n\\* Actions\n\\********************************************************************************\n\nRestartProposerWithTerm(p, new_term) ==\n    /\\ prop_state' = [prop_state EXCEPT ![p].state = \"campaign\",\n                                        ![p].term = new_term,\n                                        ![p].votes = EmptyF,\n                                        ![p].termHistory = << >>,\n                                        ![p].wal = << >>,\n                                        ![p].nextSendLsn = [a \\in acceptors |-> NULL]]\n    /\\ UNCHANGED <<acc_state, committed, elected_history>>\n\n\\* Proposer p loses all state, restarting.\n\\* For simplicity (and to reduct state space), we assume it immediately gets\n\\* current state from quorum q of acceptors determining the term he will request\n\\* to vote for.\nRestartProposer(p) ==\n    \\E q \\in AllQuorums(acceptors):\n        LET new_term == Maximum({acc_state[a].term : a \\in q}) + 1 IN\n            RestartProposerWithTerm(p, new_term)\n\n\\* Term history of acceptor a's WAL: the one saved truncated to contain only <=\n\\* local FlushLsn entries. Note that FlushLsn is the end LSN of the last entry\n\\* (and begin LSN of the next). The mental model for non strict comparison is\n\\* that once proposer is elected it immediately writes log record with zero\n\\* length. This allows leader to commit existing log without writing any new\n\\* entries. For example, assume acceptor has WAL\n\\*   1.1, 1.2\n\\* written by prop with term 1; its current <last_log_term, flush_lsn>\n\\* is <1, 3>. Now prop with term 2 and max vote from this acc is elected.\n\\* Once TruncateWAL is done, <last_log_term, flush_lsn> becomes <2, 3>\n\\* without any new records explicitly written.\nAcceptorTermHistory(a) ==\n    SelectSeq(acc_state[a].termHistory, LAMBDA th_entry: th_entry.lsn <= FlushLsn(a))\n\n\\* Acceptor a immediately votes for proposer p.\nVote(p, a) ==\n    /\\ prop_state[p].state = \"campaign\"\n    /\\ acc_state[a].term < prop_state[p].term \\* main voting condition\n    /\\ acc_state' = [acc_state EXCEPT ![a].term = prop_state[p].term]\n    /\\ LET\n           vote == [termHistory |-> AcceptorTermHistory(a), flushLsn |-> FlushLsn(a)]\n       IN\n           prop_state' = [prop_state EXCEPT ![p].votes = (a :> vote) @@ prop_state[p].votes]\n    /\\ UNCHANGED <<committed, elected_history>>\n\n\n\\* Get lastLogTerm from term history th.\nLastLogTerm(th) == th[Len(th)].term\n\n\\* Compares <term, lsn> pairs: returns true if tl1 >= tl2.\nTermLsnGE(tl1, tl2) ==\n    /\\ tl1.term >= tl2.term\n    /\\ (tl1.term = tl2.term => tl1.lsn >= tl2.lsn)\n\n\\* Choose max <term, lsn> pair in the non empty set of them.\nMaxTermLsn(term_lsn_set) ==\n    CHOOSE max_tl \\in term_lsn_set: \\A tl \\in term_lsn_set: TermLsnGE(max_tl, tl)\n\n\\* Find acceptor with the highest <last_log_term, lsn> vote in proposer p's votes.\nMaxVoteAcc(p) ==\n    CHOOSE a \\in DOMAIN prop_state[p].votes:\n        LET a_vote == prop_state[p].votes[a]\n            a_vote_term_lsn == [term |-> LastLogTerm(a_vote.termHistory), lsn |-> a_vote.flushLsn]\n            vote_term_lsns == {[term |-> LastLogTerm(v.termHistory), lsn |-> v.flushLsn]: v \\in Range(prop_state[p].votes)}\n        IN\n            a_vote_term_lsn = MaxTermLsn(vote_term_lsns)\n\n\\* Workhorse for BecomeLeader.\n\\* Assumes the check prop_state[p] votes is quorum has been done *outside*.\nDoBecomeLeader(p) ==\n    LET\n        \\* Find acceptor with the highest <last_log_term, lsn> vote.\n        max_vote_acc == MaxVoteAcc(p)\n        max_vote == prop_state[p].votes[max_vote_acc]\n        prop_th == Append(max_vote.termHistory, [term |-> prop_state[p].term, lsn |-> max_vote.flushLsn])\n    IN\n        \\* We copy all log preceding proposer's term from the max vote node so\n        \\* make sure it is still on one term with us. This is a model\n        \\* simplification which can be removed, in impl we fetch WAL on demand\n        \\* from safekeeper which has it later. Note though that in case of on\n        \\* demand fetch we must check on donor not only term match, but that\n        \\* truncate_wal had already been done (if it is not max_vote_acc).\n        /\\ acc_state[max_vote_acc].term = prop_state[p].term\n        /\\ prop_state' = [prop_state EXCEPT ![p].state = \"leader\",\n                                            ![p].termHistory = prop_th,\n                                            ![p].wal = acc_state[max_vote_acc].wal\n                         ]\n        /\\ elected_history' = Upsert(elected_history, prop_state[p].term, 1, LAMBDA c: c + 1)\n        /\\ UNCHANGED <<acc_state, committed>>\n\n\\* Proposer p gets elected.\nBecomeLeader(p) ==\n  /\\ prop_state[p].state = \"campaign\"\n  /\\ FormsQuorum(DOMAIN prop_state[p].votes, acceptors)\n  /\\ DoBecomeLeader(p)\n\n\\* Acceptor a learns about elected proposer p's term. In impl it matches to\n\\* VoteRequest/VoteResponse exchange when leader is already elected and is not\n\\* interested in the vote result.\nUpdateTerm(p, a) ==\n    /\\ prop_state[p].state = \"leader\"\n    /\\ acc_state[a].term < prop_state[p].term\n    /\\ acc_state' = [acc_state EXCEPT ![a].term = prop_state[p].term]\n    /\\ UNCHANGED <<prop_state, committed, elected_history>>\n\n\\* Find highest common point (LSN of the first divergent record) in the logs of\n\\* proposer p and acceptor a. Returns <term, lsn> of the highest common point.\nFindHighestCommonPoint(prop_th, acc_th, acc_flush_lsn) ==\n    LET\n        \\* First find index of the highest common term.\n        \\* It must exist because we initialize th with <0, 1>.\n        last_common_idx == Maximum({i \\in 1..Min(Len(prop_th), Len(acc_th)): prop_th[i].term = acc_th[i].term})\n        last_common_term == prop_th[last_common_idx].term\n        \\* Now find where it ends at both prop and acc and take min. End of term\n        \\* is the start of the next unless it is the last one; there it is\n        \\* flush_lsn in case of acceptor. In case of proposer it is the current\n        \\* writing position, but it can't be less than flush_lsn, so we\n        \\* take flush_lsn.\n        acc_common_term_end == IF last_common_idx = Len(acc_th) THEN acc_flush_lsn ELSE acc_th[last_common_idx + 1].lsn\n        prop_common_term_end == IF last_common_idx = Len(prop_th) THEN acc_flush_lsn ELSE prop_th[last_common_idx + 1].lsn\n    IN\n        [term |-> last_common_term, lsn |-> Min(acc_common_term_end, prop_common_term_end)]\n\n\\* Elected proposer p immediately truncates WAL (and sets term history) of\n\\* acceptor a before starting streaming. Establishes nextSendLsn for a.\n\\*\n\\* In impl this happens at each reconnection, here we also allow to do it\n\\* multiple times.\nTruncateWal(p, a) ==\n    /\\ prop_state[p].state = \"leader\"\n    /\\ acc_state[a].term = prop_state[p].term\n    /\\ LET\n           hcp == FindHighestCommonPoint(prop_state[p].termHistory, AcceptorTermHistory(a), FlushLsn(a))\n           next_send_lsn == (a :> hcp.lsn) @@ prop_state[p].nextSendLsn\n       IN\n           \\* Acceptor persists full history immediately; reads adjust it to the\n           \\* really existing wal with AcceptorTermHistory.\n           /\\ acc_state' = [acc_state EXCEPT ![a].termHistory = prop_state[p].termHistory,\n                                             \\* note: SubSeq is inclusive, hence -1.\n                                             ![a].wal = SubSeq(acc_state[a].wal, 1, hcp.lsn - 1)\n                           ]\n           /\\ prop_state' = [prop_state EXCEPT ![p].nextSendLsn = next_send_lsn]\n           /\\ UNCHANGED <<committed, elected_history>>\n\n\\* Append new log entry to elected proposer\nNewEntry(p) ==\n    /\\ prop_state[p].state = \"leader\"\n    /\\ LET\n           \\* entry consists only of term, index serves as LSN.\n           new_entry == prop_state[p].term\n       IN\n           /\\ prop_state' = [prop_state EXCEPT ![p].wal = Append(prop_state[p].wal, new_entry)]\n           /\\ UNCHANGED <<acc_state, committed, elected_history>>\n\n\\* Immediately append next entry from elected proposer to acceptor a.\nAppendEntry(p, a) ==\n    /\\ prop_state[p].state = \"leader\"\n    /\\ acc_state[a].term = prop_state[p].term\n    /\\ prop_state[p].nextSendLsn[a] /= NULL  \\* did TruncateWal\n    /\\ prop_state[p].nextSendLsn[a] <= Len(prop_state[p].wal)  \\* have smth to send\n    /\\ LET\n           send_lsn == prop_state[p].nextSendLsn[a]\n           entry == prop_state[p].wal[send_lsn]\n           \\* Since message delivery is instant we don't check that send_lsn follows\n           \\* the last acc record, it must always be true.\n       IN\n           /\\ prop_state' = [prop_state EXCEPT ![p].nextSendLsn[a] = send_lsn + 1]\n           /\\ acc_state' = [acc_state EXCEPT ![a].wal = Append(acc_state[a].wal, entry)]\n           /\\ UNCHANGED <<committed, elected_history>>\n\n\\* LSN where elected proposer p starts writing its records.\nPropStartLsn(p) ==\n    IF prop_state[p].state = \"leader\" THEN prop_state[p].termHistory[Len(prop_state[p].termHistory)].lsn ELSE NULL\n\n\\* LSN which can be committed by proposer p using min quorum q (check that q\n\\* forms quorum must have been done outside). NULL if there is none.\nQuorumCommitLsn(p, q) ==\n    IF\n        /\\ prop_state[p].state = \"leader\"\n        /\\ \\A a \\in q:\n            \\* Without explicit responses to appends this ensures that append\n            \\* up to FlushLsn has been accepted.\n           /\\ acc_state[a].term = prop_state[p].term\n             \\* nextSendLsn existence means TruncateWal has happened, it ensures\n             \\* acceptor's WAL (and FlushLsn) are from proper proposer's history.\n             \\* Alternatively we could compare LastLogTerm here, but that's closer to\n             \\* what we do in the impl (we check flushLsn in AppendResponse, but\n             \\* AppendRequest is processed only if HandleElected handling was good).\n           /\\ prop_state[p].nextSendLsn[a] /= NULL\n    THEN\n        \\* Now find the LSN present on all the quorum.\n        LET quorum_lsn == Minimum({FlushLsn(a): a \\in q}) IN\n            \\* This is the basic Raft rule of not committing entries from previous\n            \\* terms except along with current term entry (commit them only when\n            \\* quorum recovers, i.e. last_log_term on it reaches leader's term).\n            IF quorum_lsn >= PropStartLsn(p) THEN\n                quorum_lsn\n            ELSE\n                NULL\n    ELSE\n        NULL\n\n\\* Commit all entries on proposer p with record lsn < commit_lsn.\nDoCommitEntries(p, commit_lsn) ==\n    /\\ committed' = committed \\cup {[term |-> prop_state[p].wal[lsn], lsn |-> lsn]: lsn \\in 1..(commit_lsn - 1)}\n    /\\ UNCHANGED <<prop_state, acc_state, elected_history>>\n\n\\* Proposer p commits all entries it can using some quorum. Note that unlike\n\\* will62794/logless-reconfig this allows to commit entries from previous terms\n\\* (when conditions for that are met).\nCommitEntries(p) ==\n    /\\ prop_state[p].state = \"leader\"\n    \\* Using min quorums here is better because 1) QuorumCommitLsn for\n    \\* simplicity checks min across all accs in q. 2) it probably makes\n    \\* evaluation faster.\n    /\\ \\E q \\in AllMinQuorums(acceptors):\n           LET commit_lsn == QuorumCommitLsn(p, q) IN\n               /\\ commit_lsn /= NULL\n               /\\ DoCommitEntries(p, commit_lsn)\n\n\\*******************************************************************************\n\\* Final spec\n\\*******************************************************************************\n\nNext ==\n    \\/ \\E p \\in proposers: RestartProposer(p)\n    \\/ \\E p \\in proposers: \\E a \\in acceptors: Vote(p, a)\n    \\/ \\E p \\in proposers: BecomeLeader(p)\n    \\/ \\E p \\in proposers: \\E a \\in acceptors: UpdateTerm(p, a)\n    \\/ \\E p \\in proposers: \\E a \\in acceptors: TruncateWal(p, a)\n    \\/ \\E p \\in proposers: NewEntry(p)\n    \\/ \\E p \\in proposers: \\E a \\in acceptors: AppendEntry(p, a)\n    \\/ \\E p \\in proposers: CommitEntries(p)\n\nSpec == Init /\\ [][Next]_<<prop_state, acc_state, committed, elected_history>>\n\n\n\\********************************************************************************\n\\* Invariants\n\\********************************************************************************\n\n\\* Lighter version of ElectionSafetyFull which doesn't require elected_history.\nElectionSafety ==\n    \\A p1, p2 \\in proposers:\n        (/\\ prop_state[p1].state = \"leader\"\n         /\\ prop_state[p2].state = \"leader\"\n         /\\ prop_state[p1].term = prop_state[p2].term) => (p1 = p2)\n\n\\* Single term must never be elected more than once.\nElectionSafetyFull == \\A term \\in DOMAIN elected_history: elected_history[term] <= 1\n\n\\* Log is expected to be monotonic by <term, lsn> comparison. This is not true\n\\* in variants of multi Paxos, but in Raft (and here) it is.\nLogIsMonotonic ==\n    \\A a \\in acceptors:\n        \\A i, j \\in DOMAIN acc_state[a].wal:\n            (i > j) => (acc_state[a].wal[i] >= acc_state[a].wal[j])\n\n\\* Main invariant: If two entries are committed at the same LSN, they must be\n\\* the same entry.\nLogSafety ==\n    \\A c1, c2 \\in committed: (c1.lsn = c2.lsn) => (c1 = c2)\n\n\n\\********************************************************************************\n\\* Invariants which don't need to hold, but useful for playing/debugging.\n\\********************************************************************************\n\n\\* Limits term of elected proposers\nMaxTerm == \\A p \\in proposers: (prop_state[p].state = \"leader\" => prop_state[p].term < 2)\n\nMaxAccWalLen == \\A a \\in acceptors: Len(acc_state[a].wal) < 2\n\n\\* Limits max number of committed entries. That way we can check that we'are\n\\* actually committing something.\nMaxCommitLsn == Cardinality(committed) < 2\n\n\\* How many records with different terms can be removed in single WAL\n\\* truncation.\nMaxTruncatedTerms ==\n    \\A p \\in proposers: \\A a \\in acceptors:\n        (/\\ prop_state[p].state = \"leader\"\n         /\\ prop_state[p].term = acc_state[a].term) =>\n            LET\n                hcp == FindHighestCommonPoint(prop_state[p].termHistory, AcceptorTermHistory(a), FlushLsn(a))\n                truncated_lsns == {lsn \\in DOMAIN acc_state[a].wal: lsn >= hcp.lsn}\n                truncated_records_terms == {acc_state[a].wal[lsn]: lsn \\in truncated_lsns}\n            IN\n                Cardinality(truncated_records_terms) < 2\n\n\\* Check that TruncateWal never deletes committed record.\n\\* It might seem that this should an invariant, but it is not.\n\\* With 5 nodes, it is legit to truncate record which had been\n\\* globally committed: e.g. nodes abc can commit record of term 1 in\n\\* term 3, and after that leader of term 2 can delete such record\n\\* on d. On 10 cores TLC can find such a trace in ~7 hours.\nCommittedNotTruncated ==\n    \\A p \\in proposers: \\A a \\in acceptors:\n        (/\\ prop_state[p].state = \"leader\"\n         /\\ prop_state[p].term = acc_state[a].term) =>\n            LET\n               hcp == FindHighestCommonPoint(prop_state[p].termHistory, AcceptorTermHistory(a), FlushLsn(a))\n               truncated_lsns == {lsn \\in DOMAIN acc_state[a].wal: lsn >= hcp.lsn}\n               truncated_records == {[term |-> acc_state[a].wal[lsn], lsn |-> lsn]: lsn \\in truncated_lsns}\n            IN\n               \\A r \\in truncated_records: r \\notin committed\n\n====\n"
  },
  {
    "path": "safekeeper/spec/modelcheck.sh",
    "content": "#!/bin/bash\n\n# Usage: ./modelcheck.sh <config_file> <spec_file>, e.g.\n# ./modelcheck.sh models/MCProposerAcceptorStatic_p2_a3_t3_l3.cfg MCProposerAcceptorStatic.tla\n# ./modelcheck.sh models/MCProposerAcceptorReconfig_p2_a3_t3_l3_c3.cfg MCProposerAcceptorReconfig.tla\nCONFIG=$1\nSPEC=$2\n\nMEM=7G\nTOOLSPATH=\"/opt/TLA+Toolbox/tla2tools.jar\"\n\nmkdir -p \"tlc-results\"\nCONFIG_FILE=$(basename -- \"$CONFIG\")\noutfilename=\"$SPEC-${CONFIG_FILE}-$(date --utc +%Y-%m-%d--%H-%M-%S)\".log\noutfile=\"tlc-results/$outfilename\"\necho \"saving results to $outfile\"\ntouch $outfile\n\n# Save some info about the run.\nGIT_REV=`git rev-parse --short HEAD`\nINFO=`uname -a`\n\n# First for Linux, second for Mac.\nCPUNAMELinux=$(lscpu | grep 'Model name' | cut -f 2 -d \":\" | awk '{$1=$1}1')\nCPUCORESLinux=`nproc`\nCPUNAMEMac=`sysctl -n machdep.cpu.brand_string`\nCPUCORESMac=`sysctl -n machdep.cpu.thread_count`\n\necho \"git revision: $GIT_REV\" >> $outfile\necho \"Platform: $INFO\" >> $outfile\necho \"CPU Info Linux: $CPUNAMELinux\" >> $outfile\necho \"CPU Cores Linux: $CPUCORESLinux\" >> $outfile\necho \"CPU Info Mac: $CPUNAMEMac\" >> $outfile\necho \"CPU Cores Mac: $CPUCORESMac\" >> $outfile\necho \"Spec: $SPEC\" >> $outfile\necho \"Config: $CONFIG\" >> $outfile\necho \"----\" >> $outfile\ncat $CONFIG >> $outfile\necho \"\" >> $outfile\necho \"----\" >> $outfile\necho \"\" >> $outfile\n\n# see\n# https://lamport.azurewebsites.net/tla/current-tools.pdf\n# for TLC options.\n# OffHeapDiskFPSet is the optimal fingerprint set implementation\n# https://docs.tlapl.us/codebase:architecture#fingerprint_sets_fpsets\n#\n# Add -simulate to run in infinite simulation mode.\n# -coverage 1 is useful for profiling (check how many times actions are taken).\njava -Xmx$MEM -XX:MaxDirectMemorySize=$MEM -XX:+UseParallelGC -Dtlc2.tool.fp.FPSet.impl=tlc2.tool.fp.OffHeapDiskFPSet \\\n  -cp \"${TOOLSPATH}\" tlc2.TLC $SPEC -config $CONFIG -workers auto -gzip | tee -a $outfile\n"
  },
  {
    "path": "safekeeper/spec/models/MCProposerAcceptorReconfig_p2_a2_t2_l2_c3.cfg",
    "content": "CONSTANTS\nNULL = NULL\nproposers = {p1, p2}\nacceptors = {a1, a2}\nmax_term = 2\nmax_entries = 2\nmax_generation = 3\nSPECIFICATION Spec\nCONSTRAINT StateConstraint\nINVARIANT\nTypeOk\nConfigSafety\nElectionSafetyFull\nLogIsMonotonic\nLogSafety\n\\* As its comment explains generally it is not expected to hold, but\n\\* in such small model it is true.\nCommittedNotTruncated\nSYMMETRY ProposerAcceptorSymmetry\nCHECK_DEADLOCK FALSE\nALIAS Alias\n"
  },
  {
    "path": "safekeeper/spec/models/MCProposerAcceptorReconfig_p2_a2_t2_l2_c5.cfg",
    "content": "CONSTANTS\nNULL = NULL\nproposers = {p1, p2}\nacceptors = {a1, a2}\nmax_term = 2\nmax_entries = 2\nmax_generation = 5\nSPECIFICATION Spec\nCONSTRAINT StateConstraint\nINVARIANT\nTypeOk\nConfigSafety\nElectionSafetyFull\nLogIsMonotonic\nLogSafety\nCommittedNotTruncated\nSYMMETRY ProposerAcceptorSymmetry\nCHECK_DEADLOCK FALSE\nALIAS Alias\n"
  },
  {
    "path": "safekeeper/spec/models/MCProposerAcceptorReconfig_p2_a3_t2_l2_c3.cfg",
    "content": "CONSTANTS\nNULL = NULL\nproposers = {p1, p2}\nacceptors = {a1, a2, a3}\nmax_term = 2\nmax_entries = 2\nmax_generation = 3\nSPECIFICATION Spec\nCONSTRAINT StateConstraint\nINVARIANT\nTypeOk\nConfigSafety\nElectionSafetyFull\nLogIsMonotonic\nLogSafety\nCommittedNotTruncated\nSYMMETRY ProposerAcceptorSymmetry\nCHECK_DEADLOCK FALSE\nALIAS Alias\n\n"
  },
  {
    "path": "safekeeper/spec/models/MCProposerAcceptorReconfig_p2_a4_t2_l2_c3.cfg",
    "content": "CONSTANTS\nNULL = NULL\nproposers = {p1, p2}\nacceptors = {a1, a2, a3, a4}\nmax_term = 2\nmax_entries = 2\nmax_generation = 3\nSPECIFICATION Spec\nCONSTRAINT StateConstraint\nINVARIANT\nTypeOk\nElectionSafetyFull\nLogIsMonotonic\nLogSafety\nCommittedNotTruncated\nSYMMETRY ProposerAcceptorSymmetry\nCHECK_DEADLOCK FALSE\nALIAS Alias\n\n"
  },
  {
    "path": "safekeeper/spec/models/MCProposerAcceptorStatic_p2_a3_t2_l2.cfg",
    "content": "\\* A very small model just to play.\nCONSTANTS\nNULL = NULL\nproposers = {p1, p2}\nacceptors = {a1, a2, a3}\nmax_term = 2\nmax_entries = 2\nSPECIFICATION Spec\nCONSTRAINT StateConstraint\nINVARIANT\nTypeOk\nElectionSafetyFull\nLogIsMonotonic\nLogSafety\nCommittedNotTruncated\nSYMMETRY ProposerAcceptorSymmetry\nCHECK_DEADLOCK FALSE\nALIAS Alias\n\n"
  },
  {
    "path": "safekeeper/spec/models/MCProposerAcceptorStatic_p2_a3_t3_l2.cfg",
    "content": "\\* A model next to the smallest one.\nCONSTANTS\nNULL = NULL\nproposers = {p1, p2}\nacceptors = {a1, a2, a3}\nmax_term = 3\nmax_entries = 2\nSPECIFICATION Spec\nCONSTRAINT StateConstraint\nINVARIANT\nTypeOk\nElectionSafetyFull\nLogIsMonotonic\nLogSafety\nCommittedNotTruncated\nSYMMETRY ProposerAcceptorSymmetry\nCHECK_DEADLOCK FALSE\nALIAS Alias\n\n"
  },
  {
    "path": "safekeeper/spec/models/MCProposerAcceptorStatic_p2_a3_t3_l3.cfg",
    "content": "CONSTANTS\nNULL = NULL\nproposers = {p1, p2}\nacceptors = {a1, a2, a3}\nmax_term = 3\nmax_entries = 3\nSPECIFICATION Spec\nCONSTRAINT StateConstraint\nINVARIANT\nTypeOk\nElectionSafety\nLogIsMonotonic\nLogSafety\nCommittedNotTruncated\nSYMMETRY ProposerAcceptorSymmetry\nCHECK_DEADLOCK FALSE\nALIAS Alias\n"
  },
  {
    "path": "safekeeper/spec/models/MCProposerAcceptorStatic_p2_a3_t4_l4.cfg",
    "content": "CONSTANTS\nNULL = NULL\nproposers = {p1, p2}\nacceptors = {a1, a2, a3}\nmax_term = 4\nmax_entries = 4\nSPECIFICATION Spec\nCONSTRAINT StateConstraint\nINVARIANT\nTypeOk\nElectionSafety\nLogIsMonotonic\nLogSafety\nCommittedNotTruncated\nSYMMETRY ProposerAcceptorSymmetry\nCHECK_DEADLOCK FALSE\nALIAS Alias\n"
  },
  {
    "path": "safekeeper/spec/models/MCProposerAcceptorStatic_p2_a5_t2_l2.cfg",
    "content": "CONSTANTS\nNULL = NULL\nproposers = {p1, p2}\nacceptors = {a1, a2, a3, a4, a5}\nmax_term = 2\nmax_entries = 2\nSPECIFICATION Spec\nCONSTRAINT StateConstraint\nINVARIANT\nTypeOk\nElectionSafety\nLogIsMonotonic\nLogSafety\nSYMMETRY ProposerAcceptorSymmetry\nCHECK_DEADLOCK FALSE\nALIAS Alias\n"
  },
  {
    "path": "safekeeper/spec/models/MCProposerAcceptorStatic_p2_a5_t3_l3.cfg",
    "content": "CONSTANTS\nNULL = NULL\nproposers = {p1, p2}\nacceptors = {a1, a2, a3, a4, a5}\nmax_term = 3\nmax_entries = 3\nSPECIFICATION Spec\nCONSTRAINT StateConstraint\nINVARIANT\nTypeOk\nElectionSafety\nLogIsMonotonic\nLogSafety\nSYMMETRY ProposerAcceptorSymmetry\nCHECK_DEADLOCK FALSE\nALIAS Alias\n"
  },
  {
    "path": "safekeeper/spec/remove_interm_progress.awk",
    "content": "# Print all lines, but thin out lines starting with Progress:\n# leave only first and last 5 ones in the beginning, and only 1 of 1440\n# of others (once a day).\n# Also remove checkpointing logs.\n{\n    lines[NR] = $0\n}\n$0 ~ /^Progress/ {\n    ++pcount\n}\nEND {\n    progress_idx = 0\n    for (i = 1; i <= NR; i++) {\n        if (lines[i] ~ /^Progress/) {\n            if (progress_idx < 5 || progress_idx >= pcount - 5 || progress_idx % 1440 == 0) {\n                print lines[i]\n            }\n            progress_idx++\n        }\n        else if (lines[i] ~ /^Checkpointing/) {}\n        else {\n            print lines[i]\n        }\n    }\n}"
  },
  {
    "path": "safekeeper/spec/remove_interm_progress.sh",
    "content": "#!/bin/bash\n\nawk -f remove_interm_progress.awk $1 > $1.thin"
  },
  {
    "path": "safekeeper/spec/tlc-results/MCProposerAcceptorReconfig.tla-MCProposerAcceptorReconfig_p2_a2_t2_l2_c3.cfg-2024-12-11--04-24-12.log",
    "content": "git revision: 9e386917a\nPlatform: Linux neon-dev-arm64-1 6.8.0-49-generic #49-Ubuntu SMP PREEMPT_DYNAMIC Sun Nov  3 21:21:58 UTC 2024 aarch64 aarch64 aarch64 GNU/Linux\nCPU Info Linux: Neoverse-N1\nCPU Cores Linux: 80\nCPU Info Mac: \nCPU Cores Mac: \nSpec: MCProposerAcceptorReconfig.tla\nConfig: models/MCProposerAcceptorReconfig_p2_a2_t2_l2_c3.cfg\n----\nCONSTANTS\nNULL = NULL\nproposers = {p1, p2}\nacceptors = {a1, a2}\nmax_term = 2\nmax_entries = 2\nmax_generation = 3\nSPECIFICATION Spec\nCONSTRAINT StateConstraint\nINVARIANT\nTypeOk\nElectionSafetyFull\nLogIsMonotonic\nLogSafety\n\\* CommittedNotTruncated\nSYMMETRY ProposerAcceptorSymmetry\nCHECK_DEADLOCK FALSE\nALIAS Alias\n\n----\n\nTLC2 Version 2.20 of Day Month 20?? (rev: f68cb71)\nRunning breadth-first search Model-Checking with fp 99 and seed -9189733667206762985 with 35 workers on 80 cores with 27307MB heap and 30720MB offheap memory [pid: 391272] (Linux 6.8.0-49-generic aarch64, Ubuntu 21.0.5 x86_64, OffHeapDiskFPSet, DiskStateQueue).\nParsing file /home/arseny/neon2/safekeeper/spec/MCProposerAcceptorReconfig.tla\nParsing file /tmp/tlc-3211535543066978921/TLC.tla (jar:file:/home/arseny/tla2tools.jar!/tla2sany/StandardModules/TLC.tla)\nParsing file /home/arseny/neon2/safekeeper/spec/ProposerAcceptorReconfig.tla\nParsing file /tmp/tlc-3211535543066978921/_TLCTrace.tla (jar:file:/home/arseny/tla2tools.jar!/tla2sany/StandardModules/_TLCTrace.tla)\nParsing file /tmp/tlc-3211535543066978921/Integers.tla (jar:file:/home/arseny/tla2tools.jar!/tla2sany/StandardModules/Integers.tla)\nParsing file /tmp/tlc-3211535543066978921/Sequences.tla (jar:file:/home/arseny/tla2tools.jar!/tla2sany/StandardModules/Sequences.tla)\nParsing file /tmp/tlc-3211535543066978921/FiniteSets.tla (jar:file:/home/arseny/tla2tools.jar!/tla2sany/StandardModules/FiniteSets.tla)\nParsing file /tmp/tlc-3211535543066978921/Naturals.tla (jar:file:/home/arseny/tla2tools.jar!/tla2sany/StandardModules/Naturals.tla)\nParsing file /home/arseny/neon2/safekeeper/spec/ProposerAcceptorStatic.tla\nParsing file /tmp/tlc-3211535543066978921/TLCExt.tla (jar:file:/home/arseny/tla2tools.jar!/tla2sany/StandardModules/TLCExt.tla)\nSemantic processing of module Naturals\nSemantic processing of module Sequences\nSemantic processing of module FiniteSets\nSemantic processing of module TLC\nSemantic processing of module Integers\nSemantic processing of module ProposerAcceptorStatic\nSemantic processing of module ProposerAcceptorReconfig\nSemantic processing of module TLCExt\nSemantic processing of module _TLCTrace\nSemantic processing of module MCProposerAcceptorReconfig\nStarting... (2024-12-11 04:24:13)\nComputing initial states...\nFinished computing initial states: 2 states generated, with 1 of them distinct at 2024-12-11 04:24:15.\nProgress(16) at 2024-12-11 04:24:18: 1,427,589 states generated (1,427,589 s/min), 142,472 distinct states found (142,472 ds/min), 47,162 states left on queue.\nModel checking completed. No error has been found.\n  Estimates of the probability that TLC did not check all reachable states\n  because two distinct states had the same fingerprint:\n  calculated (optimistic):  val = 1.0E-6\n  based on the actual fingerprints:  val = 4.2E-8\n17746857 states generated, 1121659 distinct states found, 0 states left on queue.\nThe depth of the complete state graph search is 37.\nThe average outdegree of the complete state graph is 1 (minimum is 0, the maximum 9 and the 95th percentile is 3).\nFinished in 33s at (2024-12-11 04:24:46)\n"
  },
  {
    "path": "safekeeper/spec/tlc-results/MCProposerAcceptorReconfig.tla-MCProposerAcceptorReconfig_p2_a2_t2_l2_c5.cfg-2024-12-11--04-26-11.log",
    "content": "git revision: 9e386917a\nPlatform: Linux neon-dev-arm64-1 6.8.0-49-generic #49-Ubuntu SMP PREEMPT_DYNAMIC Sun Nov  3 21:21:58 UTC 2024 aarch64 aarch64 aarch64 GNU/Linux\nCPU Info Linux: Neoverse-N1\nCPU Cores Linux: 80\nCPU Info Mac: \nCPU Cores Mac: \nSpec: MCProposerAcceptorReconfig.tla\nConfig: models/MCProposerAcceptorReconfig_p2_a2_t2_l2_c5.cfg\n----\nCONSTANTS\nNULL = NULL\nproposers = {p1, p2}\nacceptors = {a1, a2}\nmax_term = 2\nmax_entries = 2\nmax_generation = 5\nSPECIFICATION Spec\nCONSTRAINT StateConstraint\nINVARIANT\nTypeOk\nElectionSafetyFull\nLogIsMonotonic\nLogSafety\n\\* CommittedNotTruncated\nSYMMETRY ProposerAcceptorSymmetry\nCHECK_DEADLOCK FALSE\nALIAS Alias\n\n----\n\nTLC2 Version 2.20 of Day Month 20?? (rev: f68cb71)\nRunning breadth-first search Model-Checking with fp 114 and seed -8099467489737745861 with 35 workers on 80 cores with 27307MB heap and 30720MB offheap memory [pid: 392020] (Linux 6.8.0-49-generic aarch64, Ubuntu 21.0.5 x86_64, OffHeapDiskFPSet, DiskStateQueue).\nParsing file /home/arseny/neon2/safekeeper/spec/MCProposerAcceptorReconfig.tla\nParsing file /tmp/tlc-11757875725969857497/TLC.tla (jar:file:/home/arseny/tla2tools.jar!/tla2sany/StandardModules/TLC.tla)\nParsing file /home/arseny/neon2/safekeeper/spec/ProposerAcceptorReconfig.tla\nParsing file /tmp/tlc-11757875725969857497/_TLCTrace.tla (jar:file:/home/arseny/tla2tools.jar!/tla2sany/StandardModules/_TLCTrace.tla)\nParsing file /tmp/tlc-11757875725969857497/Integers.tla (jar:file:/home/arseny/tla2tools.jar!/tla2sany/StandardModules/Integers.tla)\nParsing file /tmp/tlc-11757875725969857497/Sequences.tla (jar:file:/home/arseny/tla2tools.jar!/tla2sany/StandardModules/Sequences.tla)\nParsing file /tmp/tlc-11757875725969857497/FiniteSets.tla (jar:file:/home/arseny/tla2tools.jar!/tla2sany/StandardModules/FiniteSets.tla)\nParsing file /tmp/tlc-11757875725969857497/Naturals.tla (jar:file:/home/arseny/tla2tools.jar!/tla2sany/StandardModules/Naturals.tla)\nParsing file /home/arseny/neon2/safekeeper/spec/ProposerAcceptorStatic.tla\nParsing file /tmp/tlc-11757875725969857497/TLCExt.tla (jar:file:/home/arseny/tla2tools.jar!/tla2sany/StandardModules/TLCExt.tla)\nSemantic processing of module Naturals\nSemantic processing of module Sequences\nSemantic processing of module FiniteSets\nSemantic processing of module TLC\nSemantic processing of module Integers\nSemantic processing of module ProposerAcceptorStatic\nSemantic processing of module ProposerAcceptorReconfig\nSemantic processing of module TLCExt\nSemantic processing of module _TLCTrace\nSemantic processing of module MCProposerAcceptorReconfig\nStarting... (2024-12-11 04:26:12)\nComputing initial states...\nFinished computing initial states: 2 states generated, with 1 of them distinct at 2024-12-11 04:26:14.\nProgress(14) at 2024-12-11 04:26:17: 1,519,385 states generated (1,519,385 s/min), 231,263 distinct states found (231,263 ds/min), 121,410 states left on queue.\nProgress(20) at 2024-12-11 04:27:17: 42,757,204 states generated (41,237,819 s/min), 4,198,386 distinct states found (3,967,123 ds/min), 1,308,109 states left on queue.\nProgress(22) at 2024-12-11 04:28:17: 83,613,929 states generated (40,856,725 s/min), 7,499,873 distinct states found (3,301,487 ds/min), 1,929,464 states left on queue.\nProgress(23) at 2024-12-11 04:29:17: 124,086,758 states generated (40,472,829 s/min), 10,569,712 distinct states found (3,069,839 ds/min), 2,386,988 states left on queue.\nProgress(24) at 2024-12-11 04:30:17: 163,412,538 states generated (39,325,780 s/min), 13,314,303 distinct states found (2,744,591 ds/min), 2,610,637 states left on queue.\nProgress(25) at 2024-12-11 04:31:17: 202,643,708 states generated (39,231,170 s/min), 15,960,583 distinct states found (2,646,280 ds/min), 2,759,681 states left on queue.\nProgress(26) at 2024-12-11 04:32:17: 240,681,633 states generated (38,037,925 s/min), 18,443,440 distinct states found (2,482,857 ds/min), 2,852,177 states left on queue.\nProgress(27) at 2024-12-11 04:33:17: 278,559,134 states generated (37,877,501 s/min), 20,878,067 distinct states found (2,434,627 ds/min), 2,904,400 states left on queue.\nProgress(28) at 2024-12-11 04:34:17: 316,699,911 states generated (38,140,777 s/min), 23,212,229 distinct states found (2,334,162 ds/min), 2,864,969 states left on queue.\n"
  },
  {
    "path": "safekeeper/spec/tlc-results/MCProposerAcceptorStatic.tla-MCProposerAcceptorStatic_p2_a3_t3_l2.cfg-2024-11-15--09-09-58.log",
    "content": "git revision: bcbff084a\nPlatform: Linux nonlibrem 6.10.11-amd64 #1 SMP PREEMPT_DYNAMIC Debian 6.10.11-1 (2024-09-22) x86_64 GNU/Linux\nCPU Info Linux: 13th Gen Intel(R) Core(TM) i7-1355U\nCPU Cores Linux: 10\nCPU Info Mac: \nCPU Cores Mac: \nSpec: MCProposerAcceptorStatic.tla\nConfig: models/MCProposerAcceptorStatic_p2_a3_t3_l2.cfg\n----\n\\* A model next to the smallest one.\nCONSTANTS\nNULL = NULL\nproposers = {p1, p2}\nacceptors = {a1, a2, a3}\nmax_term = 3\nmax_entries = 2\nSPECIFICATION Spec\nCONSTRAINT StateConstraint\nINVARIANT\nTypeOk\nElectionSafety\nLogIsMonotonic\nLogSafety\nCommittedNotTruncated\nSYMMETRY ProposerAcceptorSymmetry\nCHECK_DEADLOCK FALSE\nALIAS Alias\n\n\n----\n\nTLC2 Version 2.20 of Day Month 20?? (rev: cc65eef)\nRunning breadth-first search Model-Checking with fp 41 and seed -3061068726727581619 with 10 workers on 10 cores with 6372MB heap and 7168MB offheap memory [pid: 1250346] (Linux 6.10.11-amd64 amd64, Debian 21.0.5 x86_64, OffHeapDiskFPSet, DiskStateQueue).\nParsing file /home/ars/neon/neon/safekeeper/spec/MCProposerAcceptorStatic.tla\nParsing file /tmp/tlc-3023124431504466774/TLC.tla (jar:file:/opt/TLA+Toolbox/tla2tools.jar!/tla2sany/StandardModules/TLC.tla)\nParsing file /home/ars/neon/neon/safekeeper/spec/ProposerAcceptorStatic.tla\nParsing file /tmp/tlc-3023124431504466774/_TLCTrace.tla (jar:file:/opt/TLA+Toolbox/tla2tools.jar!/tla2sany/StandardModules/_TLCTrace.tla)\nParsing file /tmp/tlc-3023124431504466774/Integers.tla (jar:file:/opt/TLA+Toolbox/tla2tools.jar!/tla2sany/StandardModules/Integers.tla)\nParsing file /tmp/tlc-3023124431504466774/Sequences.tla (jar:file:/opt/TLA+Toolbox/tla2tools.jar!/tla2sany/StandardModules/Sequences.tla)\nParsing file /tmp/tlc-3023124431504466774/FiniteSets.tla (jar:file:/opt/TLA+Toolbox/tla2tools.jar!/tla2sany/StandardModules/FiniteSets.tla)\nParsing file /tmp/tlc-3023124431504466774/Naturals.tla (jar:file:/opt/TLA+Toolbox/tla2tools.jar!/tla2sany/StandardModules/Naturals.tla)\nParsing file /tmp/tlc-3023124431504466774/TLCExt.tla (jar:file:/opt/TLA+Toolbox/tla2tools.jar!/tla2sany/StandardModules/TLCExt.tla)\nSemantic processing of module Naturals\nSemantic processing of module Sequences\nSemantic processing of module FiniteSets\nSemantic processing of module TLC\nSemantic processing of module Integers\nSemantic processing of module ProposerAcceptorStatic\nSemantic processing of module TLCExt\nSemantic processing of module _TLCTrace\nSemantic processing of module MCProposerAcceptorStatic\nStarting... (2024-11-15 12:09:59)\nComputing initial states...\nFinished computing initial states: 1 distinct state generated at 2024-11-15 12:10:00.\nProgress(19) at 2024-11-15 12:10:03: 464,696 states generated (464,696 s/min), 57,859 distinct states found (57,859 ds/min), 21,435 states left on queue.\nProgress(26) at 2024-11-15 12:11:03: 8,813,399 states generated (8,348,703 s/min), 877,254 distinct states found (819,395 ds/min), 214,794 states left on queue.\nProgress(27) at 2024-11-15 12:12:03: 16,121,858 states generated (7,308,459 s/min), 1,464,707 distinct states found (587,453 ds/min), 274,230 states left on queue.\nProgress(29) at 2024-11-15 12:13:03: 23,073,903 states generated (6,952,045 s/min), 1,948,802 distinct states found (484,095 ds/min), 263,697 states left on queue.\nProgress(31) at 2024-11-15 12:14:03: 29,740,681 states generated (6,666,778 s/min), 2,331,052 distinct states found (382,250 ds/min), 185,484 states left on queue.\nProgress(34) at 2024-11-15 12:15:03: 36,085,876 states generated (6,345,195 s/min), 2,602,370 distinct states found (271,318 ds/min), 31,659 states left on queue.\nModel checking completed. No error has been found.\n  Estimates of the probability that TLC did not check all reachable states\n  because two distinct states had the same fingerprint:\n  calculated (optimistic):  val = 4.9E-6\n  based on the actual fingerprints:  val = 6.9E-7\n36896322 states generated, 2623542 distinct states found, 0 states left on queue.\nThe depth of the complete state graph search is 39.\nThe average outdegree of the complete state graph is 1 (minimum is 0, the maximum 7 and the 95th percentile is 3).\nFinished in 05min 14s at (2024-11-15 12:15:13)\n"
  },
  {
    "path": "safekeeper/spec/tlc-results/MCProposerAcceptorStatic.tla-MCProposerAcceptorStatic_p2_a3_t4_l4.cfg-2024-11-06--14-20-25.log",
    "content": "# Shows LogSafety violation when \"don't commit separately entries from previous terms\" check is disabled.\ngit revision: 4f1ee6331\nPlatform: Linux neon-dev-arm64-1 6.8.0-48-generic #48-Ubuntu SMP PREEMPT_DYNAMIC Fri Sep 27 14:35:45 UTC 2024 aarch64 aarch64 aarch64 GNU/Linux\nCPU Info Linux: Neoverse-N1\nCPU Cores Linux: 80\nCPU Info Mac: \nCPU Cores Mac: \nSpec: MCProposerAcceptorStatic.tla\nConfig: models/MCProposerAcceptorStatic_p2_a3_t4_l4.cfg\n----\nCONSTANTS\nNULL = NULL\nproposers = {p1, p2}\nacceptors = {a1, a2, a3}\nmax_term = 4\nmax_entries = 4\nSPECIFICATION Spec\nCONSTRAINT StateConstraint\nINVARIANT\nTypeOk\nElectionSafety\nLogIsMonotonic\nLogSafety\nSYMMETRY ProposerAcceptorSymmetry\nCHECK_DEADLOCK FALSE\nALIAS Alias\n\n----\n\nTLC2 Version 2.20 of Day Month 20?? (rev: f68cb71)\nRunning breadth-first search Model-Checking with fp 12 and seed -5379034126224420237 with 80 workers on 80 cores with 54613MB heap and 61440MB offheap memory [pid: 52295] (Linux 6.8.0-48-generic aarch64, Ubuntu 21.0.4 x86_64, OffHeapDiskFPSet, DiskStateQueue).\nParsing file /home/arseny/neon/safekeeper/spec/MCProposerAcceptorStatic.tla\nParsing file /tmp/tlc-4533438058229992850/TLC.tla (jar:file:/home/arseny/tla2tools.jar!/tla2sany/StandardModules/TLC.tla)\nParsing file /home/arseny/neon/safekeeper/spec/ProposerAcceptorStatic.tla\nParsing file /tmp/tlc-4533438058229992850/_TLCTrace.tla (jar:file:/home/arseny/tla2tools.jar!/tla2sany/StandardModules/_TLCTrace.tla)\nParsing file /tmp/tlc-4533438058229992850/Integers.tla (jar:file:/home/arseny/tla2tools.jar!/tla2sany/StandardModules/Integers.tla)\nParsing file /tmp/tlc-4533438058229992850/Sequences.tla (jar:file:/home/arseny/tla2tools.jar!/tla2sany/StandardModules/Sequences.tla)\nParsing file /tmp/tlc-4533438058229992850/FiniteSets.tla (jar:file:/home/arseny/tla2tools.jar!/tla2sany/StandardModules/FiniteSets.tla)\nParsing file /tmp/tlc-4533438058229992850/Naturals.tla (jar:file:/home/arseny/tla2tools.jar!/tla2sany/StandardModules/Naturals.tla)\nParsing file /tmp/tlc-4533438058229992850/TLCExt.tla (jar:file:/home/arseny/tla2tools.jar!/tla2sany/StandardModules/TLCExt.tla)\nSemantic processing of module Naturals\nSemantic processing of module Sequences\nSemantic processing of module FiniteSets\nSemantic processing of module TLC\nSemantic processing of module Integers\nSemantic processing of module ProposerAcceptorStatic\nSemantic processing of module TLCExt\nSemantic processing of module _TLCTrace\nSemantic processing of module MCProposerAcceptorStatic\nStarting... (2024-11-06 14:20:26)\nComputing initial states...\nFinished computing initial states: 1 distinct state generated at 2024-11-06 14:20:29.\nProgress(20) at 2024-11-06 14:20:32: 1,011,898 states generated (1,011,898 s/min), 140,947 distinct states found (140,947 ds/min), 60,535 states left on queue.\nProgress(26) at 2024-11-06 14:21:32: 30,146,518 states generated (29,134,620 s/min), 3,742,736 distinct states found (3,601,789 ds/min), 1,438,779 states left on queue.\nProgress(27) at 2024-11-06 14:22:32: 59,362,708 states generated (29,216,190 s/min), 7,210,233 distinct states found (3,467,497 ds/min), 2,708,295 states left on queue.\nProgress(28) at 2024-11-06 14:23:32: 88,589,291 states generated (29,226,583 s/min), 10,552,781 distinct states found (3,342,548 ds/min), 3,874,296 states left on queue.\nProgress(29) at 2024-11-06 14:24:32: 117,894,209 states generated (29,304,918 s/min), 13,932,498 distinct states found (3,379,717 ds/min), 5,069,960 states left on queue.\nProgress(29) at 2024-11-06 14:25:32: 147,338,882 states generated (29,444,673 s/min), 17,180,069 distinct states found (3,247,571 ds/min), 6,146,371 states left on queue.\nProgress(29) at 2024-11-06 14:26:32: 176,498,135 states generated (29,159,253 s/min), 20,547,926 distinct states found (3,367,857 ds/min), 7,338,835 states left on queue.\nProgress(30) at 2024-11-06 14:27:32: 205,957,044 states generated (29,458,909 s/min), 23,661,090 distinct states found (3,113,164 ds/min), 8,293,570 states left on queue.\nProgress(30) at 2024-11-06 14:28:32: 235,390,133 states generated (29,433,089 s/min), 26,892,306 distinct states found (3,231,216 ds/min), 9,369,229 states left on queue.\nProgress(30) at 2024-11-06 14:29:32: 264,571,938 states generated (29,181,805 s/min), 30,176,971 distinct states found (3,284,665 ds/min), 10,493,429 states left on queue.\nProgress(31) at 2024-11-06 14:30:32: 293,928,191 states generated (29,356,253 s/min), 33,296,160 distinct states found (3,119,189 ds/min), 11,463,686 states left on queue.\nProgress(31) at 2024-11-06 14:31:32: 323,436,668 states generated (29,508,477 s/min), 36,347,973 distinct states found (3,051,813 ds/min), 12,365,578 states left on queue.\nProgress(31) at 2024-11-06 14:32:32: 352,943,790 states generated (29,507,122 s/min), 39,465,244 distinct states found (3,117,271 ds/min), 13,349,544 states left on queue.\nProgress(31) at 2024-11-06 14:33:32: 382,292,863 states generated (29,349,073 s/min), 42,654,621 distinct states found (3,189,377 ds/min), 14,384,363 states left on queue.\nProgress(31) at 2024-11-06 14:34:32: 411,385,854 states generated (29,092,991 s/min), 45,941,145 distinct states found (3,286,524 ds/min), 15,509,450 states left on queue.\nProgress(31) at 2024-11-06 14:35:32: 440,738,756 states generated (29,352,902 s/min), 48,984,566 distinct states found (3,043,421 ds/min), 16,419,882 states left on queue.\nProgress(32) at 2024-11-06 14:36:32: 470,251,558 states generated (29,512,802 s/min), 51,925,693 distinct states found (2,941,127 ds/min), 17,211,457 states left on queue.\nProgress(32) at 2024-11-06 14:37:32: 499,714,013 states generated (29,462,455 s/min), 54,955,581 distinct states found (3,029,888 ds/min), 18,114,624 states left on queue.\nProgress(32) at 2024-11-06 14:38:32: 529,254,608 states generated (29,540,595 s/min), 57,938,914 distinct states found (2,983,333 ds/min), 18,996,128 states left on queue.\nProgress(32) at 2024-11-06 14:39:32: 558,774,398 states generated (29,519,790 s/min), 61,072,943 distinct states found (3,134,029 ds/min), 19,975,689 states left on queue.\nProgress(32) at 2024-11-06 14:40:32: 588,134,665 states generated (29,360,267 s/min), 64,148,888 distinct states found (3,075,945 ds/min), 20,922,407 states left on queue.\nProgress(32) at 2024-11-06 14:41:32: 617,464,374 states generated (29,329,709 s/min), 67,306,855 distinct states found (3,157,967 ds/min), 21,928,799 states left on queue.\nProgress(32) at 2024-11-06 14:42:32: 646,525,281 states generated (29,060,907 s/min), 70,425,194 distinct states found (3,118,339 ds/min), 22,895,971 states left on queue.\nProgress(32) at 2024-11-06 14:43:32: 676,054,893 states generated (29,529,612 s/min), 73,351,905 distinct states found (2,926,711 ds/min), 23,703,779 states left on queue.\nProgress(33) at 2024-11-06 14:44:32: 705,581,782 states generated (29,526,889 s/min), 76,200,615 distinct states found (2,848,710 ds/min), 24,414,094 states left on queue.\nProgress(33) at 2024-11-06 14:45:32: 735,069,836 states generated (29,488,054 s/min), 79,168,244 distinct states found (2,967,629 ds/min), 25,255,224 states left on queue.\nProgress(33) at 2024-11-06 14:46:32: 764,659,188 states generated (29,589,352 s/min), 82,024,430 distinct states found (2,856,186 ds/min), 26,011,047 states left on queue.\nProgress(33) at 2024-11-06 14:47:32: 794,276,423 states generated (29,617,235 s/min), 84,974,312 distinct states found (2,949,882 ds/min), 26,868,750 states left on queue.\nProgress(33) at 2024-11-06 14:48:32: 823,875,831 states generated (29,599,408 s/min), 88,004,386 distinct states found (3,030,074 ds/min), 27,771,984 states left on queue.\nProgress(33) at 2024-11-06 14:49:32: 853,138,894 states generated (29,263,063 s/min), 91,006,890 distinct states found (3,002,504 ds/min), 28,636,661 states left on queue.\nCheckpointing of run states/24-11-06-14-20-25.868\nCheckpointing completed at (2024-11-06 14:50:32)\nProgress(33) at 2024-11-06 14:50:32: 882,514,167 states generated (29,375,273 s/min), 94,011,000 distinct states found (3,004,110 ds/min), 29,534,516 states left on queue.\nProgress(33) at 2024-11-06 14:51:32: 911,838,377 states generated (29,324,210 s/min), 97,108,937 distinct states found (3,097,937 ds/min), 30,498,587 states left on queue.\nProgress(33) at 2024-11-06 14:52:32: 940,646,920 states generated (28,808,543 s/min), 100,248,865 distinct states found (3,139,928 ds/min), 31,472,191 states left on queue.\nProgress(33) at 2024-11-06 14:53:32: 970,074,175 states generated (29,427,255 s/min), 103,170,815 distinct states found (2,921,950 ds/min), 32,265,691 states left on queue.\nProgress(33) at 2024-11-06 14:54:32: 999,627,974 states generated (29,553,799 s/min), 106,004,823 distinct states found (2,834,008 ds/min), 33,009,618 states left on queue.\nProgress(34) at 2024-11-06 14:55:32: 1,029,148,983 states generated (29,521,009 s/min), 108,740,783 distinct states found (2,735,960 ds/min), 33,616,222 states left on queue.\nProgress(34) at 2024-11-06 14:56:32: 1,058,582,001 states generated (29,433,018 s/min), 111,612,965 distinct states found (2,872,182 ds/min), 34,375,212 states left on queue.\nProgress(34) at 2024-11-06 14:57:32: 1,088,123,602 states generated (29,541,601 s/min), 114,464,196 distinct states found (2,851,231 ds/min), 35,116,195 states left on queue.\nProgress(34) at 2024-11-06 14:58:32: 1,117,684,936 states generated (29,561,334 s/min), 117,252,198 distinct states found (2,788,002 ds/min), 35,817,205 states left on queue.\nProgress(34) at 2024-11-06 14:59:32: 1,147,356,249 states generated (29,671,313 s/min), 120,014,476 distinct states found (2,762,278 ds/min), 36,517,255 states left on queue.\nProgress(34) at 2024-11-06 15:00:32: 1,176,921,098 states generated (29,564,849 s/min), 122,859,312 distinct states found (2,844,836 ds/min), 37,291,096 states left on queue.\nProgress(34) at 2024-11-06 15:01:32: 1,206,454,440 states generated (29,533,342 s/min), 125,830,942 distinct states found (2,971,630 ds/min), 38,147,762 states left on queue.\nProgress(34) at 2024-11-06 15:02:32: 1,235,721,673 states generated (29,267,233 s/min), 128,869,493 distinct states found (3,038,551 ds/min), 39,035,481 states left on queue.\nProgress(34) at 2024-11-06 15:03:32: 1,265,097,779 states generated (29,376,106 s/min), 131,669,552 distinct states found (2,800,059 ds/min), 39,746,864 states left on queue.\nProgress(34) at 2024-11-06 15:04:32: 1,294,408,098 states generated (29,310,319 s/min), 134,604,630 distinct states found (2,935,078 ds/min), 40,584,235 states left on queue.\nProgress(34) at 2024-11-06 15:05:32: 1,323,792,755 states generated (29,384,657 s/min), 137,579,390 distinct states found (2,974,760 ds/min), 41,446,478 states left on queue.\nProgress(34) at 2024-11-06 15:06:32: 1,353,085,163 states generated (29,292,408 s/min), 140,575,723 distinct states found (2,996,333 ds/min), 42,309,510 states left on queue.\nProgress(34) at 2024-11-06 15:07:32: 1,381,809,417 states generated (28,724,254 s/min), 143,655,566 distinct states found (3,079,843 ds/min), 43,220,682 states left on queue.\nProgress(34) at 2024-11-06 15:08:32: 1,411,255,848 states generated (29,446,431 s/min), 146,482,192 distinct states found (2,826,626 ds/min), 43,944,938 states left on queue.\nProgress(34) at 2024-11-06 15:09:32: 1,440,646,323 states generated (29,390,475 s/min), 149,419,989 distinct states found (2,937,797 ds/min), 44,763,293 states left on queue.\nProgress(34) at 2024-11-06 15:10:32: 1,470,298,568 states generated (29,652,245 s/min), 152,041,419 distinct states found (2,621,430 ds/min), 45,311,911 states left on queue.\nProgress(35) at 2024-11-06 15:11:32: 1,499,747,712 states generated (29,449,144 s/min), 154,696,867 distinct states found (2,655,448 ds/min), 45,842,895 states left on queue.\nProgress(35) at 2024-11-06 15:12:32: 1,529,256,993 states generated (29,509,281 s/min), 157,493,365 distinct states found (2,796,498 ds/min), 46,535,472 states left on queue.\nProgress(35) at 2024-11-06 15:13:32: 1,558,829,306 states generated (29,572,313 s/min), 160,256,575 distinct states found (2,763,210 ds/min), 47,212,471 states left on queue.\nProgress(35) at 2024-11-06 15:14:32: 1,588,345,878 states generated (29,516,572 s/min), 163,002,602 distinct states found (2,746,027 ds/min), 47,862,117 states left on queue.\nProgress(35) at 2024-11-06 15:15:32: 1,617,885,675 states generated (29,539,797 s/min), 165,699,121 distinct states found (2,696,519 ds/min), 48,472,896 states left on queue.\nProgress(35) at 2024-11-06 15:16:32: 1,647,559,965 states generated (29,674,290 s/min), 168,343,286 distinct states found (2,644,165 ds/min), 49,065,377 states left on queue.\nProgress(35) at 2024-11-06 15:17:32: 1,677,033,250 states generated (29,473,285 s/min), 171,134,409 distinct states found (2,791,123 ds/min), 49,823,330 states left on queue.\nProgress(35) at 2024-11-06 15:18:32: 1,706,730,266 states generated (29,697,016 s/min), 173,860,974 distinct states found (2,726,565 ds/min), 50,493,221 states left on queue.\nError: Invariant LogSafety is violated.\nError: The behavior up to this point is:\nState 1: <Initial predicate>\n/\\ prop_state = ( p1 :>\n      [ term |-> 1,\n        wal |-> <<>>,\n        state |-> \"campaign\",\n        votes |-> <<>>,\n        termHistory |-> <<>>,\n        nextSendLsn |-> <<>> ] @@\n  p2 :>\n      [ term |-> 1,\n        wal |-> <<>>,\n        state |-> \"campaign\",\n        votes |-> <<>>,\n        termHistory |-> <<>>,\n        nextSendLsn |-> <<>> ] )\n/\\ acc_state = ( a1 :>\n      [term |-> 0, wal |-> <<>>, termHistory |-> <<[term |-> 0, lsn |-> 1]>>] @@\n  a2 :>\n      [term |-> 0, wal |-> <<>>, termHistory |-> <<[term |-> 0, lsn |-> 1]>>] @@\n  a3 :>\n      [term |-> 0, wal |-> <<>>, termHistory |-> <<[term |-> 0, lsn |-> 1]>>] )\n/\\ committed = {}\n\nState 2: <Vote(p1,a1) line 207, col 2 to line 214, col 27 of module ProposerAcceptorStatic>\n/\\ prop_state = ( p1 :>\n      [ term |-> 1,\n        wal |-> <<>>,\n        state |-> \"campaign\",\n        votes |->\n            ( a1 :>\n                  [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>,\n                    flushLsn |-> 1 ] ),\n        termHistory |-> <<>>,\n        nextSendLsn |-> <<>> ] @@\n  p2 :>\n      [ term |-> 1,\n        wal |-> <<>>,\n        state |-> \"campaign\",\n        votes |-> <<>>,\n        termHistory |-> <<>>,\n        nextSendLsn |-> <<>> ] )\n/\\ acc_state = ( a1 :>\n      [term |-> 1, wal |-> <<>>, termHistory |-> <<[term |-> 0, lsn |-> 1]>>] @@\n  a2 :>\n      [term |-> 0, wal |-> <<>>, termHistory |-> <<[term |-> 0, lsn |-> 1]>>] @@\n  a3 :>\n      [term |-> 0, wal |-> <<>>, termHistory |-> <<[term |-> 0, lsn |-> 1]>>] )\n/\\ committed = {}\n\nState 3: <RestartProposer(p2,{a1, a2}) line 188, col 3 to line 198, col 44 of module ProposerAcceptorStatic>\n/\\ prop_state = ( p1 :>\n      [ term |-> 1,\n        wal |-> <<>>,\n        state |-> \"campaign\",\n        votes |->\n            ( a1 :>\n                  [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>,\n                    flushLsn |-> 1 ] ),\n        termHistory |-> <<>>,\n        nextSendLsn |-> <<>> ] @@\n  p2 :>\n      [ term |-> 2,\n        wal |-> <<>>,\n        state |-> \"campaign\",\n        votes |-> <<>>,\n        termHistory |-> <<>>,\n        nextSendLsn |-> <<>> ] )\n/\\ acc_state = ( a1 :>\n      [term |-> 1, wal |-> <<>>, termHistory |-> <<[term |-> 0, lsn |-> 1]>>] @@\n  a2 :>\n      [term |-> 0, wal |-> <<>>, termHistory |-> <<[term |-> 0, lsn |-> 1]>>] @@\n  a3 :>\n      [term |-> 0, wal |-> <<>>, termHistory |-> <<[term |-> 0, lsn |-> 1]>>] )\n/\\ committed = {}\n\nState 4: <Vote(p1,a2) line 207, col 2 to line 214, col 27 of module ProposerAcceptorStatic>\n/\\ prop_state = ( p1 :>\n      [ term |-> 1,\n        wal |-> <<>>,\n        state |-> \"campaign\",\n        votes |->\n            ( a1 :>\n                  [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>,\n                    flushLsn |-> 1 ] @@\n              a2 :>\n                  [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>,\n                    flushLsn |-> 1 ] ),\n        termHistory |-> <<>>,\n        nextSendLsn |-> <<>> ] @@\n  p2 :>\n      [ term |-> 2,\n        wal |-> <<>>,\n        state |-> \"campaign\",\n        votes |-> <<>>,\n        termHistory |-> <<>>,\n        nextSendLsn |-> <<>> ] )\n/\\ acc_state = ( a1 :>\n      [term |-> 1, wal |-> <<>>, termHistory |-> <<[term |-> 0, lsn |-> 1]>>] @@\n  a2 :>\n      [term |-> 1, wal |-> <<>>, termHistory |-> <<[term |-> 0, lsn |-> 1]>>] @@\n  a3 :>\n      [term |-> 0, wal |-> <<>>, termHistory |-> <<[term |-> 0, lsn |-> 1]>>] )\n/\\ committed = {}\n\nState 5: <BecomeLeader(p1) line 222, col 3 to line 245, col 44 of module ProposerAcceptorStatic>\n/\\ prop_state = ( p1 :>\n      [ term |-> 1,\n        wal |-> <<>>,\n        state |-> \"leader\",\n        votes |->\n            ( a1 :>\n                  [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>,\n                    flushLsn |-> 1 ] @@\n              a2 :>\n                  [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>,\n                    flushLsn |-> 1 ] ),\n        termHistory |-> <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>>,\n        nextSendLsn |-> <<>> ] @@\n  p2 :>\n      [ term |-> 2,\n        wal |-> <<>>,\n        state |-> \"campaign\",\n        votes |-> <<>>,\n        termHistory |-> <<>>,\n        nextSendLsn |-> <<>> ] )\n/\\ acc_state = ( a1 :>\n      [term |-> 1, wal |-> <<>>, termHistory |-> <<[term |-> 0, lsn |-> 1]>>] @@\n  a2 :>\n      [term |-> 1, wal |-> <<>>, termHistory |-> <<[term |-> 0, lsn |-> 1]>>] @@\n  a3 :>\n      [term |-> 0, wal |-> <<>>, termHistory |-> <<[term |-> 0, lsn |-> 1]>>] )\n/\\ committed = {}\n\nState 6: <Vote(p2,a1) line 207, col 2 to line 214, col 27 of module ProposerAcceptorStatic>\n/\\ prop_state = ( p1 :>\n      [ term |-> 1,\n        wal |-> <<>>,\n        state |-> \"leader\",\n        votes |->\n            ( a1 :>\n                  [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>,\n                    flushLsn |-> 1 ] @@\n              a2 :>\n                  [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>,\n                    flushLsn |-> 1 ] ),\n        termHistory |-> <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>>,\n        nextSendLsn |-> <<>> ] @@\n  p2 :>\n      [ term |-> 2,\n        wal |-> <<>>,\n        state |-> \"campaign\",\n        votes |->\n            ( a1 :>\n                  [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>,\n                    flushLsn |-> 1 ] ),\n        termHistory |-> <<>>,\n        nextSendLsn |-> <<>> ] )\n/\\ acc_state = ( a1 :>\n      [term |-> 2, wal |-> <<>>, termHistory |-> <<[term |-> 0, lsn |-> 1]>>] @@\n  a2 :>\n      [term |-> 1, wal |-> <<>>, termHistory |-> <<[term |-> 0, lsn |-> 1]>>] @@\n  a3 :>\n      [term |-> 0, wal |-> <<>>, termHistory |-> <<[term |-> 0, lsn |-> 1]>>] )\n/\\ committed = {}\n\nState 7: <TruncateWal(p1,a2) line 280, col 3 to line 293, col 33 of module ProposerAcceptorStatic>\n/\\ prop_state = ( p1 :>\n      [ term |-> 1,\n        wal |-> <<>>,\n        state |-> \"leader\",\n        votes |->\n            ( a1 :>\n                  [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>,\n                    flushLsn |-> 1 ] @@\n              a2 :>\n                  [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>,\n                    flushLsn |-> 1 ] ),\n        termHistory |-> <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>>,\n        nextSendLsn |-> (a2 :> 1) ] @@\n  p2 :>\n      [ term |-> 2,\n        wal |-> <<>>,\n        state |-> \"campaign\",\n        votes |->\n            ( a1 :>\n                  [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>,\n                    flushLsn |-> 1 ] ),\n        termHistory |-> <<>>,\n        nextSendLsn |-> <<>> ] )\n/\\ acc_state = ( a1 :>\n      [term |-> 2, wal |-> <<>>, termHistory |-> <<[term |-> 0, lsn |-> 1]>>] @@\n  a2 :>\n      [ term |-> 1,\n        wal |-> <<>>,\n        termHistory |->\n            <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>> ] @@\n  a3 :>\n      [term |-> 0, wal |-> <<>>, termHistory |-> <<[term |-> 0, lsn |-> 1]>>] )\n/\\ committed = {}\n\nState 8: <Vote(p2,a3) line 207, col 2 to line 214, col 27 of module ProposerAcceptorStatic>\n/\\ prop_state = ( p1 :>\n      [ term |-> 1,\n        wal |-> <<>>,\n        state |-> \"leader\",\n        votes |->\n            ( a1 :>\n                  [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>,\n                    flushLsn |-> 1 ] @@\n              a2 :>\n                  [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>,\n                    flushLsn |-> 1 ] ),\n        termHistory |-> <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>>,\n        nextSendLsn |-> (a2 :> 1) ] @@\n  p2 :>\n      [ term |-> 2,\n        wal |-> <<>>,\n        state |-> \"campaign\",\n        votes |->\n            ( a1 :>\n                  [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>,\n                    flushLsn |-> 1 ] @@\n              a3 :>\n                  [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>,\n                    flushLsn |-> 1 ] ),\n        termHistory |-> <<>>,\n        nextSendLsn |-> <<>> ] )\n/\\ acc_state = ( a1 :>\n      [term |-> 2, wal |-> <<>>, termHistory |-> <<[term |-> 0, lsn |-> 1]>>] @@\n  a2 :>\n      [ term |-> 1,\n        wal |-> <<>>,\n        termHistory |->\n            <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>> ] @@\n  a3 :>\n      [term |-> 2, wal |-> <<>>, termHistory |-> <<[term |-> 0, lsn |-> 1]>>] )\n/\\ committed = {}\n\nState 9: <BecomeLeader(p2) line 222, col 3 to line 245, col 44 of module ProposerAcceptorStatic>\n/\\ prop_state = ( p1 :>\n      [ term |-> 1,\n        wal |-> <<>>,\n        state |-> \"leader\",\n        votes |->\n            ( a1 :>\n                  [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>,\n                    flushLsn |-> 1 ] @@\n              a2 :>\n                  [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>,\n                    flushLsn |-> 1 ] ),\n        termHistory |-> <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>>,\n        nextSendLsn |-> (a2 :> 1) ] @@\n  p2 :>\n      [ term |-> 2,\n        wal |-> <<>>,\n        state |-> \"leader\",\n        votes |->\n            ( a1 :>\n                  [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>,\n                    flushLsn |-> 1 ] @@\n              a3 :>\n                  [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>,\n                    flushLsn |-> 1 ] ),\n        termHistory |-> <<[term |-> 0, lsn |-> 1], [term |-> 2, lsn |-> 1]>>,\n        nextSendLsn |-> <<>> ] )\n/\\ acc_state = ( a1 :>\n      [term |-> 2, wal |-> <<>>, termHistory |-> <<[term |-> 0, lsn |-> 1]>>] @@\n  a2 :>\n      [ term |-> 1,\n        wal |-> <<>>,\n        termHistory |->\n            <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>> ] @@\n  a3 :>\n      [term |-> 2, wal |-> <<>>, termHistory |-> <<[term |-> 0, lsn |-> 1]>>] )\n/\\ committed = {}\n\nState 10: <TruncateWal(p2,a1) line 280, col 3 to line 293, col 33 of module ProposerAcceptorStatic>\n/\\ prop_state = ( p1 :>\n      [ term |-> 1,\n        wal |-> <<>>,\n        state |-> \"leader\",\n        votes |->\n            ( a1 :>\n                  [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>,\n                    flushLsn |-> 1 ] @@\n              a2 :>\n                  [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>,\n                    flushLsn |-> 1 ] ),\n        termHistory |-> <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>>,\n        nextSendLsn |-> (a2 :> 1) ] @@\n  p2 :>\n      [ term |-> 2,\n        wal |-> <<>>,\n        state |-> \"leader\",\n        votes |->\n            ( a1 :>\n                  [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>,\n                    flushLsn |-> 1 ] @@\n              a3 :>\n                  [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>,\n                    flushLsn |-> 1 ] ),\n        termHistory |-> <<[term |-> 0, lsn |-> 1], [term |-> 2, lsn |-> 1]>>,\n        nextSendLsn |-> (a1 :> 1) ] )\n/\\ acc_state = ( a1 :>\n      [ term |-> 2,\n        wal |-> <<>>,\n        termHistory |->\n            <<[term |-> 0, lsn |-> 1], [term |-> 2, lsn |-> 1]>> ] @@\n  a2 :>\n      [ term |-> 1,\n        wal |-> <<>>,\n        termHistory |->\n            <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>> ] @@\n  a3 :>\n      [term |-> 2, wal |-> <<>>, termHistory |-> <<[term |-> 0, lsn |-> 1]>>] )\n/\\ committed = {}\n\nState 11: <RestartProposer(p2,{a1, a2}) line 188, col 3 to line 198, col 44 of module ProposerAcceptorStatic>\n/\\ prop_state = ( p1 :>\n      [ term |-> 1,\n        wal |-> <<>>,\n        state |-> \"leader\",\n        votes |->\n            ( a1 :>\n                  [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>,\n                    flushLsn |-> 1 ] @@\n              a2 :>\n                  [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>,\n                    flushLsn |-> 1 ] ),\n        termHistory |-> <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>>,\n        nextSendLsn |-> (a2 :> 1) ] @@\n  p2 :>\n      [ term |-> 3,\n        wal |-> <<>>,\n        state |-> \"campaign\",\n        votes |-> <<>>,\n        termHistory |-> <<>>,\n        nextSendLsn |-> <<>> ] )\n/\\ acc_state = ( a1 :>\n      [ term |-> 2,\n        wal |-> <<>>,\n        termHistory |->\n            <<[term |-> 0, lsn |-> 1], [term |-> 2, lsn |-> 1]>> ] @@\n  a2 :>\n      [ term |-> 1,\n        wal |-> <<>>,\n        termHistory |->\n            <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>> ] @@\n  a3 :>\n      [term |-> 2, wal |-> <<>>, termHistory |-> <<[term |-> 0, lsn |-> 1]>>] )\n/\\ committed = {}\n\nState 12: <Vote(p2,a3) line 207, col 2 to line 214, col 27 of module ProposerAcceptorStatic>\n/\\ prop_state = ( p1 :>\n      [ term |-> 1,\n        wal |-> <<>>,\n        state |-> \"leader\",\n        votes |->\n            ( a1 :>\n                  [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>,\n                    flushLsn |-> 1 ] @@\n              a2 :>\n                  [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>,\n                    flushLsn |-> 1 ] ),\n        termHistory |-> <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>>,\n        nextSendLsn |-> (a2 :> 1) ] @@\n  p2 :>\n      [ term |-> 3,\n        wal |-> <<>>,\n        state |-> \"campaign\",\n        votes |->\n            ( a3 :>\n                  [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>,\n                    flushLsn |-> 1 ] ),\n        termHistory |-> <<>>,\n        nextSendLsn |-> <<>> ] )\n/\\ acc_state = ( a1 :>\n      [ term |-> 2,\n        wal |-> <<>>,\n        termHistory |->\n            <<[term |-> 0, lsn |-> 1], [term |-> 2, lsn |-> 1]>> ] @@\n  a2 :>\n      [ term |-> 1,\n        wal |-> <<>>,\n        termHistory |->\n            <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>> ] @@\n  a3 :>\n      [term |-> 3, wal |-> <<>>, termHistory |-> <<[term |-> 0, lsn |-> 1]>>] )\n/\\ committed = {}\n\nState 13: <NewEntry(p1) line 297, col 3 to line 303, col 44 of module ProposerAcceptorStatic>\n/\\ prop_state = ( p1 :>\n      [ term |-> 1,\n        wal |-> <<1>>,\n        state |-> \"leader\",\n        votes |->\n            ( a1 :>\n                  [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>,\n                    flushLsn |-> 1 ] @@\n              a2 :>\n                  [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>,\n                    flushLsn |-> 1 ] ),\n        termHistory |-> <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>>,\n        nextSendLsn |-> (a2 :> 1) ] @@\n  p2 :>\n      [ term |-> 3,\n        wal |-> <<>>,\n        state |-> \"campaign\",\n        votes |->\n            ( a3 :>\n                  [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>,\n                    flushLsn |-> 1 ] ),\n        termHistory |-> <<>>,\n        nextSendLsn |-> <<>> ] )\n/\\ acc_state = ( a1 :>\n      [ term |-> 2,\n        wal |-> <<>>,\n        termHistory |->\n            <<[term |-> 0, lsn |-> 1], [term |-> 2, lsn |-> 1]>> ] @@\n  a2 :>\n      [ term |-> 1,\n        wal |-> <<>>,\n        termHistory |->\n            <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>> ] @@\n  a3 :>\n      [term |-> 3, wal |-> <<>>, termHistory |-> <<[term |-> 0, lsn |-> 1]>>] )\n/\\ committed = {}\n\nState 14: <NewEntry(p1) line 297, col 3 to line 303, col 44 of module ProposerAcceptorStatic>\n/\\ prop_state = ( p1 :>\n      [ term |-> 1,\n        wal |-> <<1, 1>>,\n        state |-> \"leader\",\n        votes |->\n            ( a1 :>\n                  [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>,\n                    flushLsn |-> 1 ] @@\n              a2 :>\n                  [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>,\n                    flushLsn |-> 1 ] ),\n        termHistory |-> <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>>,\n        nextSendLsn |-> (a2 :> 1) ] @@\n  p2 :>\n      [ term |-> 3,\n        wal |-> <<>>,\n        state |-> \"campaign\",\n        votes |->\n            ( a3 :>\n                  [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>,\n                    flushLsn |-> 1 ] ),\n        termHistory |-> <<>>,\n        nextSendLsn |-> <<>> ] )\n/\\ acc_state = ( a1 :>\n      [ term |-> 2,\n        wal |-> <<>>,\n        termHistory |->\n            <<[term |-> 0, lsn |-> 1], [term |-> 2, lsn |-> 1]>> ] @@\n  a2 :>\n      [ term |-> 1,\n        wal |-> <<>>,\n        termHistory |->\n            <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>> ] @@\n  a3 :>\n      [term |-> 3, wal |-> <<>>, termHistory |-> <<[term |-> 0, lsn |-> 1]>>] )\n/\\ committed = {}\n\nState 15: <AppendEntry(p1,a2) line 307, col 3 to line 319, col 33 of module ProposerAcceptorStatic>\n/\\ prop_state = ( p1 :>\n      [ term |-> 1,\n        wal |-> <<1, 1>>,\n        state |-> \"leader\",\n        votes |->\n            ( a1 :>\n                  [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>,\n                    flushLsn |-> 1 ] @@\n              a2 :>\n                  [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>,\n                    flushLsn |-> 1 ] ),\n        termHistory |-> <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>>,\n        nextSendLsn |-> (a2 :> 2) ] @@\n  p2 :>\n      [ term |-> 3,\n        wal |-> <<>>,\n        state |-> \"campaign\",\n        votes |->\n            ( a3 :>\n                  [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>,\n                    flushLsn |-> 1 ] ),\n        termHistory |-> <<>>,\n        nextSendLsn |-> <<>> ] )\n/\\ acc_state = ( a1 :>\n      [ term |-> 2,\n        wal |-> <<>>,\n        termHistory |->\n            <<[term |-> 0, lsn |-> 1], [term |-> 2, lsn |-> 1]>> ] @@\n  a2 :>\n      [ term |-> 1,\n        wal |-> <<1>>,\n        termHistory |->\n            <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>> ] @@\n  a3 :>\n      [term |-> 3, wal |-> <<>>, termHistory |-> <<[term |-> 0, lsn |-> 1]>>] )\n/\\ committed = {}\n\nState 16: <AppendEntry(p1,a2) line 307, col 3 to line 319, col 33 of module ProposerAcceptorStatic>\n/\\ prop_state = ( p1 :>\n      [ term |-> 1,\n        wal |-> <<1, 1>>,\n        state |-> \"leader\",\n        votes |->\n            ( a1 :>\n                  [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>,\n                    flushLsn |-> 1 ] @@\n              a2 :>\n                  [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>,\n                    flushLsn |-> 1 ] ),\n        termHistory |-> <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>>,\n        nextSendLsn |-> (a2 :> 3) ] @@\n  p2 :>\n      [ term |-> 3,\n        wal |-> <<>>,\n        state |-> \"campaign\",\n        votes |->\n            ( a3 :>\n                  [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>,\n                    flushLsn |-> 1 ] ),\n        termHistory |-> <<>>,\n        nextSendLsn |-> <<>> ] )\n/\\ acc_state = ( a1 :>\n      [ term |-> 2,\n        wal |-> <<>>,\n        termHistory |->\n            <<[term |-> 0, lsn |-> 1], [term |-> 2, lsn |-> 1]>> ] @@\n  a2 :>\n      [ term |-> 1,\n        wal |-> <<1, 1>>,\n        termHistory |->\n            <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>> ] @@\n  a3 :>\n      [term |-> 3, wal |-> <<>>, termHistory |-> <<[term |-> 0, lsn |-> 1]>>] )\n/\\ committed = {}\n\nState 17: <RestartProposer(p1,{a1, a3}) line 188, col 3 to line 198, col 44 of module ProposerAcceptorStatic>\n/\\ prop_state = ( p1 :>\n      [ term |-> 4,\n        wal |-> <<>>,\n        state |-> \"campaign\",\n        votes |-> <<>>,\n        termHistory |-> <<>>,\n        nextSendLsn |-> <<>> ] @@\n  p2 :>\n      [ term |-> 3,\n        wal |-> <<>>,\n        state |-> \"campaign\",\n        votes |->\n            ( a3 :>\n                  [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>,\n                    flushLsn |-> 1 ] ),\n        termHistory |-> <<>>,\n        nextSendLsn |-> <<>> ] )\n/\\ acc_state = ( a1 :>\n      [ term |-> 2,\n        wal |-> <<>>,\n        termHistory |->\n            <<[term |-> 0, lsn |-> 1], [term |-> 2, lsn |-> 1]>> ] @@\n  a2 :>\n      [ term |-> 1,\n        wal |-> <<1, 1>>,\n        termHistory |->\n            <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>> ] @@\n  a3 :>\n      [term |-> 3, wal |-> <<>>, termHistory |-> <<[term |-> 0, lsn |-> 1]>>] )\n/\\ committed = {}\n\nState 18: <Vote(p1,a1) line 207, col 2 to line 214, col 27 of module ProposerAcceptorStatic>\n/\\ prop_state = ( p1 :>\n      [ term |-> 4,\n        wal |-> <<>>,\n        state |-> \"campaign\",\n        votes |->\n            ( a1 :>\n                  [ termHistory |->\n                        <<[term |-> 0, lsn |-> 1], [term |-> 2, lsn |-> 1]>>,\n                    flushLsn |-> 1 ] ),\n        termHistory |-> <<>>,\n        nextSendLsn |-> <<>> ] @@\n  p2 :>\n      [ term |-> 3,\n        wal |-> <<>>,\n        state |-> \"campaign\",\n        votes |->\n            ( a3 :>\n                  [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>,\n                    flushLsn |-> 1 ] ),\n        termHistory |-> <<>>,\n        nextSendLsn |-> <<>> ] )\n/\\ acc_state = ( a1 :>\n      [ term |-> 4,\n        wal |-> <<>>,\n        termHistory |->\n            <<[term |-> 0, lsn |-> 1], [term |-> 2, lsn |-> 1]>> ] @@\n  a2 :>\n      [ term |-> 1,\n        wal |-> <<1, 1>>,\n        termHistory |->\n            <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>> ] @@\n  a3 :>\n      [term |-> 3, wal |-> <<>>, termHistory |-> <<[term |-> 0, lsn |-> 1]>>] )\n/\\ committed = {}\n\nState 19: <Vote(p2,a2) line 207, col 2 to line 214, col 27 of module ProposerAcceptorStatic>\n/\\ prop_state = ( p1 :>\n      [ term |-> 4,\n        wal |-> <<>>,\n        state |-> \"campaign\",\n        votes |->\n            ( a1 :>\n                  [ termHistory |->\n                        <<[term |-> 0, lsn |-> 1], [term |-> 2, lsn |-> 1]>>,\n                    flushLsn |-> 1 ] ),\n        termHistory |-> <<>>,\n        nextSendLsn |-> <<>> ] @@\n  p2 :>\n      [ term |-> 3,\n        wal |-> <<>>,\n        state |-> \"campaign\",\n        votes |->\n            ( a2 :>\n                  [ termHistory |->\n                        <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>>,\n                    flushLsn |-> 3 ] @@\n              a3 :>\n                  [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>,\n                    flushLsn |-> 1 ] ),\n        termHistory |-> <<>>,\n        nextSendLsn |-> <<>> ] )\n/\\ acc_state = ( a1 :>\n      [ term |-> 4,\n        wal |-> <<>>,\n        termHistory |->\n            <<[term |-> 0, lsn |-> 1], [term |-> 2, lsn |-> 1]>> ] @@\n  a2 :>\n      [ term |-> 3,\n        wal |-> <<1, 1>>,\n        termHistory |->\n            <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>> ] @@\n  a3 :>\n      [term |-> 3, wal |-> <<>>, termHistory |-> <<[term |-> 0, lsn |-> 1]>>] )\n/\\ committed = {}\n\nState 20: <BecomeLeader(p2) line 222, col 3 to line 245, col 44 of module ProposerAcceptorStatic>\n/\\ prop_state = ( p1 :>\n      [ term |-> 4,\n        wal |-> <<>>,\n        state |-> \"campaign\",\n        votes |->\n            ( a1 :>\n                  [ termHistory |->\n                        <<[term |-> 0, lsn |-> 1], [term |-> 2, lsn |-> 1]>>,\n                    flushLsn |-> 1 ] ),\n        termHistory |-> <<>>,\n        nextSendLsn |-> <<>> ] @@\n  p2 :>\n      [ term |-> 3,\n        wal |-> <<1, 1>>,\n        state |-> \"leader\",\n        votes |->\n            ( a2 :>\n                  [ termHistory |->\n                        <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>>,\n                    flushLsn |-> 3 ] @@\n              a3 :>\n                  [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>,\n                    flushLsn |-> 1 ] ),\n        termHistory |->\n            << [term |-> 0, lsn |-> 1],\n               [term |-> 1, lsn |-> 1],\n               [term |-> 3, lsn |-> 3] >>,\n        nextSendLsn |-> <<>> ] )\n/\\ acc_state = ( a1 :>\n      [ term |-> 4,\n        wal |-> <<>>,\n        termHistory |->\n            <<[term |-> 0, lsn |-> 1], [term |-> 2, lsn |-> 1]>> ] @@\n  a2 :>\n      [ term |-> 3,\n        wal |-> <<1, 1>>,\n        termHistory |->\n            <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>> ] @@\n  a3 :>\n      [term |-> 3, wal |-> <<>>, termHistory |-> <<[term |-> 0, lsn |-> 1]>>] )\n/\\ committed = {}\n\nState 21: <TruncateWal(p2,a2) line 280, col 3 to line 293, col 33 of module ProposerAcceptorStatic>\n/\\ prop_state = ( p1 :>\n      [ term |-> 4,\n        wal |-> <<>>,\n        state |-> \"campaign\",\n        votes |->\n            ( a1 :>\n                  [ termHistory |->\n                        <<[term |-> 0, lsn |-> 1], [term |-> 2, lsn |-> 1]>>,\n                    flushLsn |-> 1 ] ),\n        termHistory |-> <<>>,\n        nextSendLsn |-> <<>> ] @@\n  p2 :>\n      [ term |-> 3,\n        wal |-> <<1, 1>>,\n        state |-> \"leader\",\n        votes |->\n            ( a2 :>\n                  [ termHistory |->\n                        <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>>,\n                    flushLsn |-> 3 ] @@\n              a3 :>\n                  [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>,\n                    flushLsn |-> 1 ] ),\n        termHistory |->\n            << [term |-> 0, lsn |-> 1],\n               [term |-> 1, lsn |-> 1],\n               [term |-> 3, lsn |-> 3] >>,\n        nextSendLsn |-> (a2 :> 3) ] )\n/\\ acc_state = ( a1 :>\n      [ term |-> 4,\n        wal |-> <<>>,\n        termHistory |->\n            <<[term |-> 0, lsn |-> 1], [term |-> 2, lsn |-> 1]>> ] @@\n  a2 :>\n      [ term |-> 3,\n        wal |-> <<1, 1>>,\n        termHistory |->\n            << [term |-> 0, lsn |-> 1],\n               [term |-> 1, lsn |-> 1],\n               [term |-> 3, lsn |-> 3] >> ] @@\n  a3 :>\n      [term |-> 3, wal |-> <<>>, termHistory |-> <<[term |-> 0, lsn |-> 1]>>] )\n/\\ committed = {}\n\nState 22: <TruncateWal(p2,a3) line 280, col 3 to line 293, col 33 of module ProposerAcceptorStatic>\n/\\ prop_state = ( p1 :>\n      [ term |-> 4,\n        wal |-> <<>>,\n        state |-> \"campaign\",\n        votes |->\n            ( a1 :>\n                  [ termHistory |->\n                        <<[term |-> 0, lsn |-> 1], [term |-> 2, lsn |-> 1]>>,\n                    flushLsn |-> 1 ] ),\n        termHistory |-> <<>>,\n        nextSendLsn |-> <<>> ] @@\n  p2 :>\n      [ term |-> 3,\n        wal |-> <<1, 1>>,\n        state |-> \"leader\",\n        votes |->\n            ( a2 :>\n                  [ termHistory |->\n                        <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>>,\n                    flushLsn |-> 3 ] @@\n              a3 :>\n                  [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>,\n                    flushLsn |-> 1 ] ),\n        termHistory |->\n            << [term |-> 0, lsn |-> 1],\n               [term |-> 1, lsn |-> 1],\n               [term |-> 3, lsn |-> 3] >>,\n        nextSendLsn |-> (a2 :> 3 @@ a3 :> 1) ] )\n/\\ acc_state = ( a1 :>\n      [ term |-> 4,\n        wal |-> <<>>,\n        termHistory |->\n            <<[term |-> 0, lsn |-> 1], [term |-> 2, lsn |-> 1]>> ] @@\n  a2 :>\n      [ term |-> 3,\n        wal |-> <<1, 1>>,\n        termHistory |->\n            << [term |-> 0, lsn |-> 1],\n               [term |-> 1, lsn |-> 1],\n               [term |-> 3, lsn |-> 3] >> ] @@\n  a3 :>\n      [ term |-> 3,\n        wal |-> <<>>,\n        termHistory |->\n            << [term |-> 0, lsn |-> 1],\n               [term |-> 1, lsn |-> 1],\n               [term |-> 3, lsn |-> 3] >> ] )\n/\\ committed = {}\n\nState 23: <AppendEntry(p2,a3) line 307, col 3 to line 319, col 33 of module ProposerAcceptorStatic>\n/\\ prop_state = ( p1 :>\n      [ term |-> 4,\n        wal |-> <<>>,\n        state |-> \"campaign\",\n        votes |->\n            ( a1 :>\n                  [ termHistory |->\n                        <<[term |-> 0, lsn |-> 1], [term |-> 2, lsn |-> 1]>>,\n                    flushLsn |-> 1 ] ),\n        termHistory |-> <<>>,\n        nextSendLsn |-> <<>> ] @@\n  p2 :>\n      [ term |-> 3,\n        wal |-> <<1, 1>>,\n        state |-> \"leader\",\n        votes |->\n            ( a2 :>\n                  [ termHistory |->\n                        <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>>,\n                    flushLsn |-> 3 ] @@\n              a3 :>\n                  [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>,\n                    flushLsn |-> 1 ] ),\n        termHistory |->\n            << [term |-> 0, lsn |-> 1],\n               [term |-> 1, lsn |-> 1],\n               [term |-> 3, lsn |-> 3] >>,\n        nextSendLsn |-> (a2 :> 3 @@ a3 :> 2) ] )\n/\\ acc_state = ( a1 :>\n      [ term |-> 4,\n        wal |-> <<>>,\n        termHistory |->\n            <<[term |-> 0, lsn |-> 1], [term |-> 2, lsn |-> 1]>> ] @@\n  a2 :>\n      [ term |-> 3,\n        wal |-> <<1, 1>>,\n        termHistory |->\n            << [term |-> 0, lsn |-> 1],\n               [term |-> 1, lsn |-> 1],\n               [term |-> 3, lsn |-> 3] >> ] @@\n  a3 :>\n      [ term |-> 3,\n        wal |-> <<1>>,\n        termHistory |->\n            << [term |-> 0, lsn |-> 1],\n               [term |-> 1, lsn |-> 1],\n               [term |-> 3, lsn |-> 3] >> ] )\n/\\ committed = {}\n\nState 24: <CommitEntries(p2,{a2, a3}) line 329, col 3 to line 345, col 45 of module ProposerAcceptorStatic>\n/\\ prop_state = ( p1 :>\n      [ term |-> 4,\n        wal |-> <<>>,\n        state |-> \"campaign\",\n        votes |->\n            ( a1 :>\n                  [ termHistory |->\n                        <<[term |-> 0, lsn |-> 1], [term |-> 2, lsn |-> 1]>>,\n                    flushLsn |-> 1 ] ),\n        termHistory |-> <<>>,\n        nextSendLsn |-> <<>> ] @@\n  p2 :>\n      [ term |-> 3,\n        wal |-> <<1, 1>>,\n        state |-> \"leader\",\n        votes |->\n            ( a2 :>\n                  [ termHistory |->\n                        <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>>,\n                    flushLsn |-> 3 ] @@\n              a3 :>\n                  [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>,\n                    flushLsn |-> 1 ] ),\n        termHistory |->\n            << [term |-> 0, lsn |-> 1],\n               [term |-> 1, lsn |-> 1],\n               [term |-> 3, lsn |-> 3] >>,\n        nextSendLsn |-> (a2 :> 3 @@ a3 :> 2) ] )\n/\\ acc_state = ( a1 :>\n      [ term |-> 4,\n        wal |-> <<>>,\n        termHistory |->\n            <<[term |-> 0, lsn |-> 1], [term |-> 2, lsn |-> 1]>> ] @@\n  a2 :>\n      [ term |-> 3,\n        wal |-> <<1, 1>>,\n        termHistory |->\n            << [term |-> 0, lsn |-> 1],\n               [term |-> 1, lsn |-> 1],\n               [term |-> 3, lsn |-> 3] >> ] @@\n  a3 :>\n      [ term |-> 3,\n        wal |-> <<1>>,\n        termHistory |->\n            << [term |-> 0, lsn |-> 1],\n               [term |-> 1, lsn |-> 1],\n               [term |-> 3, lsn |-> 3] >> ] )\n/\\ committed = {[term |-> 1, lsn |-> 1]}\n\nState 25: <Vote(p1,a3) line 207, col 2 to line 214, col 27 of module ProposerAcceptorStatic>\n/\\ prop_state = ( p1 :>\n      [ term |-> 4,\n        wal |-> <<>>,\n        state |-> \"campaign\",\n        votes |->\n            ( a1 :>\n                  [ termHistory |->\n                        <<[term |-> 0, lsn |-> 1], [term |-> 2, lsn |-> 1]>>,\n                    flushLsn |-> 1 ] @@\n              a3 :>\n                  [ termHistory |->\n                        <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>>,\n                    flushLsn |-> 2 ] ),\n        termHistory |-> <<>>,\n        nextSendLsn |-> <<>> ] @@\n  p2 :>\n      [ term |-> 3,\n        wal |-> <<1, 1>>,\n        state |-> \"leader\",\n        votes |->\n            ( a2 :>\n                  [ termHistory |->\n                        <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>>,\n                    flushLsn |-> 3 ] @@\n              a3 :>\n                  [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>,\n                    flushLsn |-> 1 ] ),\n        termHistory |->\n            << [term |-> 0, lsn |-> 1],\n               [term |-> 1, lsn |-> 1],\n               [term |-> 3, lsn |-> 3] >>,\n        nextSendLsn |-> (a2 :> 3 @@ a3 :> 2) ] )\n/\\ acc_state = ( a1 :>\n      [ term |-> 4,\n        wal |-> <<>>,\n        termHistory |->\n            <<[term |-> 0, lsn |-> 1], [term |-> 2, lsn |-> 1]>> ] @@\n  a2 :>\n      [ term |-> 3,\n        wal |-> <<1, 1>>,\n        termHistory |->\n            << [term |-> 0, lsn |-> 1],\n               [term |-> 1, lsn |-> 1],\n               [term |-> 3, lsn |-> 3] >> ] @@\n  a3 :>\n      [ term |-> 4,\n        wal |-> <<1>>,\n        termHistory |->\n            << [term |-> 0, lsn |-> 1],\n               [term |-> 1, lsn |-> 1],\n               [term |-> 3, lsn |-> 3] >> ] )\n/\\ committed = {[term |-> 1, lsn |-> 1]}\n\nState 26: <BecomeLeader(p1) line 222, col 3 to line 245, col 44 of module ProposerAcceptorStatic>\n/\\ prop_state = ( p1 :>\n      [ term |-> 4,\n        wal |-> <<>>,\n        state |-> \"leader\",\n        votes |->\n            ( a1 :>\n                  [ termHistory |->\n                        <<[term |-> 0, lsn |-> 1], [term |-> 2, lsn |-> 1]>>,\n                    flushLsn |-> 1 ] @@\n              a3 :>\n                  [ termHistory |->\n                        <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>>,\n                    flushLsn |-> 2 ] ),\n        termHistory |->\n            << [term |-> 0, lsn |-> 1],\n               [term |-> 2, lsn |-> 1],\n               [term |-> 4, lsn |-> 1] >>,\n        nextSendLsn |-> <<>> ] @@\n  p2 :>\n      [ term |-> 3,\n        wal |-> <<1, 1>>,\n        state |-> \"leader\",\n        votes |->\n            ( a2 :>\n                  [ termHistory |->\n                        <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>>,\n                    flushLsn |-> 3 ] @@\n              a3 :>\n                  [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>,\n                    flushLsn |-> 1 ] ),\n        termHistory |->\n            << [term |-> 0, lsn |-> 1],\n               [term |-> 1, lsn |-> 1],\n               [term |-> 3, lsn |-> 3] >>,\n        nextSendLsn |-> (a2 :> 3 @@ a3 :> 2) ] )\n/\\ acc_state = ( a1 :>\n      [ term |-> 4,\n        wal |-> <<>>,\n        termHistory |->\n            <<[term |-> 0, lsn |-> 1], [term |-> 2, lsn |-> 1]>> ] @@\n  a2 :>\n      [ term |-> 3,\n        wal |-> <<1, 1>>,\n        termHistory |->\n            << [term |-> 0, lsn |-> 1],\n               [term |-> 1, lsn |-> 1],\n               [term |-> 3, lsn |-> 3] >> ] @@\n  a3 :>\n      [ term |-> 4,\n        wal |-> <<1>>,\n        termHistory |->\n            << [term |-> 0, lsn |-> 1],\n               [term |-> 1, lsn |-> 1],\n               [term |-> 3, lsn |-> 3] >> ] )\n/\\ committed = {[term |-> 1, lsn |-> 1]}\n\nState 27: <TruncateWal(p1,a3) line 280, col 3 to line 293, col 33 of module ProposerAcceptorStatic>\n/\\ prop_state = ( p1 :>\n      [ term |-> 4,\n        wal |-> <<>>,\n        state |-> \"leader\",\n        votes |->\n            ( a1 :>\n                  [ termHistory |->\n                        <<[term |-> 0, lsn |-> 1], [term |-> 2, lsn |-> 1]>>,\n                    flushLsn |-> 1 ] @@\n              a3 :>\n                  [ termHistory |->\n                        <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>>,\n                    flushLsn |-> 2 ] ),\n        termHistory |->\n            << [term |-> 0, lsn |-> 1],\n               [term |-> 2, lsn |-> 1],\n               [term |-> 4, lsn |-> 1] >>,\n        nextSendLsn |-> (a3 :> 1) ] @@\n  p2 :>\n      [ term |-> 3,\n        wal |-> <<1, 1>>,\n        state |-> \"leader\",\n        votes |->\n            ( a2 :>\n                  [ termHistory |->\n                        <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>>,\n                    flushLsn |-> 3 ] @@\n              a3 :>\n                  [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>,\n                    flushLsn |-> 1 ] ),\n        termHistory |->\n            << [term |-> 0, lsn |-> 1],\n               [term |-> 1, lsn |-> 1],\n               [term |-> 3, lsn |-> 3] >>,\n        nextSendLsn |-> (a2 :> 3 @@ a3 :> 2) ] )\n/\\ acc_state = ( a1 :>\n      [ term |-> 4,\n        wal |-> <<>>,\n        termHistory |->\n            <<[term |-> 0, lsn |-> 1], [term |-> 2, lsn |-> 1]>> ] @@\n  a2 :>\n      [ term |-> 3,\n        wal |-> <<1, 1>>,\n        termHistory |->\n            << [term |-> 0, lsn |-> 1],\n               [term |-> 1, lsn |-> 1],\n               [term |-> 3, lsn |-> 3] >> ] @@\n  a3 :>\n      [ term |-> 4,\n        wal |-> <<>>,\n        termHistory |->\n            << [term |-> 0, lsn |-> 1],\n               [term |-> 2, lsn |-> 1],\n               [term |-> 4, lsn |-> 1] >> ] )\n/\\ committed = {[term |-> 1, lsn |-> 1]}\n\nState 28: <NewEntry(p1) line 297, col 3 to line 303, col 44 of module ProposerAcceptorStatic>\n/\\ prop_state = ( p1 :>\n      [ term |-> 4,\n        wal |-> <<4>>,\n        state |-> \"leader\",\n        votes |->\n            ( a1 :>\n                  [ termHistory |->\n                        <<[term |-> 0, lsn |-> 1], [term |-> 2, lsn |-> 1]>>,\n                    flushLsn |-> 1 ] @@\n              a3 :>\n                  [ termHistory |->\n                        <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>>,\n                    flushLsn |-> 2 ] ),\n        termHistory |->\n            << [term |-> 0, lsn |-> 1],\n               [term |-> 2, lsn |-> 1],\n               [term |-> 4, lsn |-> 1] >>,\n        nextSendLsn |-> (a3 :> 1) ] @@\n  p2 :>\n      [ term |-> 3,\n        wal |-> <<1, 1>>,\n        state |-> \"leader\",\n        votes |->\n            ( a2 :>\n                  [ termHistory |->\n                        <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>>,\n                    flushLsn |-> 3 ] @@\n              a3 :>\n                  [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>,\n                    flushLsn |-> 1 ] ),\n        termHistory |->\n            << [term |-> 0, lsn |-> 1],\n               [term |-> 1, lsn |-> 1],\n               [term |-> 3, lsn |-> 3] >>,\n        nextSendLsn |-> (a2 :> 3 @@ a3 :> 2) ] )\n/\\ acc_state = ( a1 :>\n      [ term |-> 4,\n        wal |-> <<>>,\n        termHistory |->\n            <<[term |-> 0, lsn |-> 1], [term |-> 2, lsn |-> 1]>> ] @@\n  a2 :>\n      [ term |-> 3,\n        wal |-> <<1, 1>>,\n        termHistory |->\n            << [term |-> 0, lsn |-> 1],\n               [term |-> 1, lsn |-> 1],\n               [term |-> 3, lsn |-> 3] >> ] @@\n  a3 :>\n      [ term |-> 4,\n        wal |-> <<>>,\n        termHistory |->\n            << [term |-> 0, lsn |-> 1],\n               [term |-> 2, lsn |-> 1],\n               [term |-> 4, lsn |-> 1] >> ] )\n/\\ committed = {[term |-> 1, lsn |-> 1]}\n\nState 29: <AppendEntry(p1,a3) line 307, col 3 to line 319, col 33 of module ProposerAcceptorStatic>\n/\\ prop_state = ( p1 :>\n      [ term |-> 4,\n        wal |-> <<4>>,\n        state |-> \"leader\",\n        votes |->\n            ( a1 :>\n                  [ termHistory |->\n                        <<[term |-> 0, lsn |-> 1], [term |-> 2, lsn |-> 1]>>,\n                    flushLsn |-> 1 ] @@\n              a3 :>\n                  [ termHistory |->\n                        <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>>,\n                    flushLsn |-> 2 ] ),\n        termHistory |->\n            << [term |-> 0, lsn |-> 1],\n               [term |-> 2, lsn |-> 1],\n               [term |-> 4, lsn |-> 1] >>,\n        nextSendLsn |-> (a3 :> 2) ] @@\n  p2 :>\n      [ term |-> 3,\n        wal |-> <<1, 1>>,\n        state |-> \"leader\",\n        votes |->\n            ( a2 :>\n                  [ termHistory |->\n                        <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>>,\n                    flushLsn |-> 3 ] @@\n              a3 :>\n                  [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>,\n                    flushLsn |-> 1 ] ),\n        termHistory |->\n            << [term |-> 0, lsn |-> 1],\n               [term |-> 1, lsn |-> 1],\n               [term |-> 3, lsn |-> 3] >>,\n        nextSendLsn |-> (a2 :> 3 @@ a3 :> 2) ] )\n/\\ acc_state = ( a1 :>\n      [ term |-> 4,\n        wal |-> <<>>,\n        termHistory |->\n            <<[term |-> 0, lsn |-> 1], [term |-> 2, lsn |-> 1]>> ] @@\n  a2 :>\n      [ term |-> 3,\n        wal |-> <<1, 1>>,\n        termHistory |->\n            << [term |-> 0, lsn |-> 1],\n               [term |-> 1, lsn |-> 1],\n               [term |-> 3, lsn |-> 3] >> ] @@\n  a3 :>\n      [ term |-> 4,\n        wal |-> <<4>>,\n        termHistory |->\n            << [term |-> 0, lsn |-> 1],\n               [term |-> 2, lsn |-> 1],\n               [term |-> 4, lsn |-> 1] >> ] )\n/\\ committed = {[term |-> 1, lsn |-> 1]}\n\nState 30: <TruncateWal(p1,a1) line 280, col 3 to line 293, col 33 of module ProposerAcceptorStatic>\n/\\ prop_state = ( p1 :>\n      [ term |-> 4,\n        wal |-> <<4>>,\n        state |-> \"leader\",\n        votes |->\n            ( a1 :>\n                  [ termHistory |->\n                        <<[term |-> 0, lsn |-> 1], [term |-> 2, lsn |-> 1]>>,\n                    flushLsn |-> 1 ] @@\n              a3 :>\n                  [ termHistory |->\n                        <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>>,\n                    flushLsn |-> 2 ] ),\n        termHistory |->\n            << [term |-> 0, lsn |-> 1],\n               [term |-> 2, lsn |-> 1],\n               [term |-> 4, lsn |-> 1] >>,\n        nextSendLsn |-> (a1 :> 1 @@ a3 :> 2) ] @@\n  p2 :>\n      [ term |-> 3,\n        wal |-> <<1, 1>>,\n        state |-> \"leader\",\n        votes |->\n            ( a2 :>\n                  [ termHistory |->\n                        <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>>,\n                    flushLsn |-> 3 ] @@\n              a3 :>\n                  [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>,\n                    flushLsn |-> 1 ] ),\n        termHistory |->\n            << [term |-> 0, lsn |-> 1],\n               [term |-> 1, lsn |-> 1],\n               [term |-> 3, lsn |-> 3] >>,\n        nextSendLsn |-> (a2 :> 3 @@ a3 :> 2) ] )\n/\\ acc_state = ( a1 :>\n      [ term |-> 4,\n        wal |-> <<>>,\n        termHistory |->\n            << [term |-> 0, lsn |-> 1],\n               [term |-> 2, lsn |-> 1],\n               [term |-> 4, lsn |-> 1] >> ] @@\n  a2 :>\n      [ term |-> 3,\n        wal |-> <<1, 1>>,\n        termHistory |->\n            << [term |-> 0, lsn |-> 1],\n               [term |-> 1, lsn |-> 1],\n               [term |-> 3, lsn |-> 3] >> ] @@\n  a3 :>\n      [ term |-> 4,\n        wal |-> <<4>>,\n        termHistory |->\n            << [term |-> 0, lsn |-> 1],\n               [term |-> 2, lsn |-> 1],\n               [term |-> 4, lsn |-> 1] >> ] )\n/\\ committed = {[term |-> 1, lsn |-> 1]}\n\nState 31: <AppendEntry(p1,a1) line 307, col 3 to line 319, col 33 of module ProposerAcceptorStatic>\n/\\ prop_state = ( p1 :>\n      [ term |-> 4,\n        wal |-> <<4>>,\n        state |-> \"leader\",\n        votes |->\n            ( a1 :>\n                  [ termHistory |->\n                        <<[term |-> 0, lsn |-> 1], [term |-> 2, lsn |-> 1]>>,\n                    flushLsn |-> 1 ] @@\n              a3 :>\n                  [ termHistory |->\n                        <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>>,\n                    flushLsn |-> 2 ] ),\n        termHistory |->\n            << [term |-> 0, lsn |-> 1],\n               [term |-> 2, lsn |-> 1],\n               [term |-> 4, lsn |-> 1] >>,\n        nextSendLsn |-> (a1 :> 2 @@ a3 :> 2) ] @@\n  p2 :>\n      [ term |-> 3,\n        wal |-> <<1, 1>>,\n        state |-> \"leader\",\n        votes |->\n            ( a2 :>\n                  [ termHistory |->\n                        <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>>,\n                    flushLsn |-> 3 ] @@\n              a3 :>\n                  [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>,\n                    flushLsn |-> 1 ] ),\n        termHistory |->\n            << [term |-> 0, lsn |-> 1],\n               [term |-> 1, lsn |-> 1],\n               [term |-> 3, lsn |-> 3] >>,\n        nextSendLsn |-> (a2 :> 3 @@ a3 :> 2) ] )\n/\\ acc_state = ( a1 :>\n      [ term |-> 4,\n        wal |-> <<4>>,\n        termHistory |->\n            << [term |-> 0, lsn |-> 1],\n               [term |-> 2, lsn |-> 1],\n               [term |-> 4, lsn |-> 1] >> ] @@\n  a2 :>\n      [ term |-> 3,\n        wal |-> <<1, 1>>,\n        termHistory |->\n            << [term |-> 0, lsn |-> 1],\n               [term |-> 1, lsn |-> 1],\n               [term |-> 3, lsn |-> 3] >> ] @@\n  a3 :>\n      [ term |-> 4,\n        wal |-> <<4>>,\n        termHistory |->\n            << [term |-> 0, lsn |-> 1],\n               [term |-> 2, lsn |-> 1],\n               [term |-> 4, lsn |-> 1] >> ] )\n/\\ committed = {[term |-> 1, lsn |-> 1]}\n\nState 32: <CommitEntries(p1,{a1, a3}) line 329, col 3 to line 345, col 45 of module ProposerAcceptorStatic>\n/\\ prop_state = ( p1 :>\n      [ term |-> 4,\n        wal |-> <<4>>,\n        state |-> \"leader\",\n        votes |->\n            ( a1 :>\n                  [ termHistory |->\n                        <<[term |-> 0, lsn |-> 1], [term |-> 2, lsn |-> 1]>>,\n                    flushLsn |-> 1 ] @@\n              a3 :>\n                  [ termHistory |->\n                        <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>>,\n                    flushLsn |-> 2 ] ),\n        termHistory |->\n            << [term |-> 0, lsn |-> 1],\n               [term |-> 2, lsn |-> 1],\n               [term |-> 4, lsn |-> 1] >>,\n        nextSendLsn |-> (a1 :> 2 @@ a3 :> 2) ] @@\n  p2 :>\n      [ term |-> 3,\n        wal |-> <<1, 1>>,\n        state |-> \"leader\",\n        votes |->\n            ( a2 :>\n                  [ termHistory |->\n                        <<[term |-> 0, lsn |-> 1], [term |-> 1, lsn |-> 1]>>,\n                    flushLsn |-> 3 ] @@\n              a3 :>\n                  [ termHistory |-> <<[term |-> 0, lsn |-> 1]>>,\n                    flushLsn |-> 1 ] ),\n        termHistory |->\n            << [term |-> 0, lsn |-> 1],\n               [term |-> 1, lsn |-> 1],\n               [term |-> 3, lsn |-> 3] >>,\n        nextSendLsn |-> (a2 :> 3 @@ a3 :> 2) ] )\n/\\ acc_state = ( a1 :>\n      [ term |-> 4,\n        wal |-> <<4>>,\n        termHistory |->\n            << [term |-> 0, lsn |-> 1],\n               [term |-> 2, lsn |-> 1],\n               [term |-> 4, lsn |-> 1] >> ] @@\n  a2 :>\n      [ term |-> 3,\n        wal |-> <<1, 1>>,\n        termHistory |->\n            << [term |-> 0, lsn |-> 1],\n               [term |-> 1, lsn |-> 1],\n               [term |-> 3, lsn |-> 3] >> ] @@\n  a3 :>\n      [ term |-> 4,\n        wal |-> <<4>>,\n        termHistory |->\n            << [term |-> 0, lsn |-> 1],\n               [term |-> 2, lsn |-> 1],\n               [term |-> 4, lsn |-> 1] >> ] )\n/\\ committed = {[term |-> 1, lsn |-> 1], [term |-> 4, lsn |-> 1]}\n\n1712918117 states generated, 174460942 distinct states found, 50658619 states left on queue.\nThe depth of the complete state graph search is 35.\nFinished in 58min 19s at (2024-11-06 15:18:45)\nTrace exploration spec path: ./MCProposerAcceptorStatic_TTrace_1730902825.tla\n"
  },
  {
    "path": "safekeeper/src/bin/safekeeper.rs",
    "content": "//\n// Main entry point for the safekeeper executable\n//\nuse std::fs::{self, File};\nuse std::io::{ErrorKind, Write};\nuse std::str::FromStr;\nuse std::sync::Arc;\nuse std::time::{Duration, Instant};\n\nuse anyhow::{Context, Result, bail};\nuse camino::{Utf8Path, Utf8PathBuf};\nuse clap::{ArgAction, Parser};\nuse futures::future::BoxFuture;\nuse futures::stream::FuturesUnordered;\nuse futures::{FutureExt, StreamExt};\nuse http_utils::tls_certs::ReloadingCertificateResolver;\nuse metrics::set_build_info_metric;\nuse remote_storage::RemoteStorageConfig;\nuse safekeeper::defaults::{\n    DEFAULT_CONTROL_FILE_SAVE_INTERVAL, DEFAULT_EVICTION_MIN_RESIDENT,\n    DEFAULT_GLOBAL_DISK_CHECK_INTERVAL, DEFAULT_HEARTBEAT_TIMEOUT, DEFAULT_HTTP_LISTEN_ADDR,\n    DEFAULT_MAX_GLOBAL_DISK_USAGE_RATIO, DEFAULT_MAX_OFFLOADER_LAG_BYTES,\n    DEFAULT_MAX_REELECT_OFFLOADER_LAG_BYTES, DEFAULT_MAX_TIMELINE_DISK_USAGE_BYTES,\n    DEFAULT_PARTIAL_BACKUP_CONCURRENCY, DEFAULT_PARTIAL_BACKUP_TIMEOUT, DEFAULT_PG_LISTEN_ADDR,\n    DEFAULT_SSL_CERT_FILE, DEFAULT_SSL_CERT_RELOAD_PERIOD, DEFAULT_SSL_KEY_FILE,\n};\nuse safekeeper::hadron;\nuse safekeeper::wal_backup::WalBackup;\nuse safekeeper::{\n    BACKGROUND_RUNTIME, BROKER_RUNTIME, GlobalTimelines, HTTP_RUNTIME, SafeKeeperConf,\n    WAL_SERVICE_RUNTIME, broker, control_file, http, wal_service,\n};\nuse sd_notify::NotifyState;\nuse storage_broker::{DEFAULT_ENDPOINT, Uri};\nuse tokio::runtime::Handle;\nuse tokio::signal::unix::{SignalKind, signal};\nuse tokio::task::JoinError;\nuse tracing::*;\nuse utils::auth::{JwtAuth, Scope, SwappableJwtAuth};\nuse utils::id::NodeId;\nuse utils::logging::{self, LogFormat, SecretString};\nuse utils::metrics_collector::{METRICS_COLLECTION_INTERVAL, METRICS_COLLECTOR};\nuse utils::sentry_init::init_sentry;\nuse utils::{pid_file, project_build_tag, project_git_version, tcp_listener};\n\nuse safekeeper::hadron::{\n    GLOBAL_DISK_LIMIT_EXCEEDED, get_filesystem_capacity, get_filesystem_usage,\n};\nuse safekeeper::metrics::GLOBAL_DISK_UTIL_CHECK_SECONDS;\nuse std::sync::atomic::Ordering;\n\n#[global_allocator]\nstatic GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;\n\n/// Configure jemalloc to profile heap allocations by sampling stack traces every 2 MB (1 << 21).\n/// This adds roughly 3% overhead for allocations on average, which is acceptable considering\n/// performance-sensitive code will avoid allocations as far as possible anyway.\n#[allow(non_upper_case_globals)]\n#[unsafe(export_name = \"malloc_conf\")]\npub static malloc_conf: &[u8] = b\"prof:true,prof_active:true,lg_prof_sample:21\\0\";\n\nconst PID_FILE_NAME: &str = \"safekeeper.pid\";\nconst ID_FILE_NAME: &str = \"safekeeper.id\";\n\nproject_git_version!(GIT_VERSION);\nproject_build_tag!(BUILD_TAG);\n\nconst FEATURES: &[&str] = &[\n    #[cfg(feature = \"testing\")]\n    \"testing\",\n];\n\nfn version() -> String {\n    format!(\n        \"{GIT_VERSION} failpoints: {}, features: {:?}\",\n        fail::has_failpoints(),\n        FEATURES,\n    )\n}\n\nconst ABOUT: &str = r#\"\nA fleet of safekeepers is responsible for reliably storing WAL received from\ncompute, passing it through consensus (mitigating potential computes brain\nsplit), and serving the hardened part further downstream to pageserver(s).\n\"#;\n\n#[derive(Parser)]\n#[command(name = \"Neon safekeeper\", version = GIT_VERSION, about = ABOUT, long_about = None)]\nstruct Args {\n    /// Path to the safekeeper data directory.\n    #[arg(short = 'D', long, default_value = \"./\")]\n    datadir: Utf8PathBuf,\n    /// Safekeeper node id.\n    #[arg(long)]\n    id: Option<u64>,\n    /// Initialize safekeeper with given id and exit.\n    #[arg(long)]\n    init: bool,\n    /// Listen endpoint for receiving/sending WAL in the form host:port.\n    #[arg(short, long, default_value = DEFAULT_PG_LISTEN_ADDR)]\n    listen_pg: String,\n    /// Listen endpoint for receiving/sending WAL in the form host:port allowing\n    /// only tenant scoped auth tokens. Pointless if auth is disabled.\n    #[arg(long, default_value = None, verbatim_doc_comment)]\n    listen_pg_tenant_only: Option<String>,\n    /// Listen http endpoint for management and metrics in the form host:port.\n    #[arg(long, default_value = DEFAULT_HTTP_LISTEN_ADDR)]\n    listen_http: String,\n    /// Listen https endpoint for management and metrics in the form host:port.\n    #[arg(long, default_value = None)]\n    listen_https: Option<String>,\n    /// Advertised endpoint for receiving/sending WAL in the form host:port. If not\n    /// specified, listen_pg is used to advertise instead.\n    #[arg(long, default_value = None)]\n    advertise_pg: Option<String>,\n    /// Availability zone of the safekeeper.\n    #[arg(long)]\n    availability_zone: Option<String>,\n    /// Do not wait for changes to be written safely to disk. Unsafe.\n    #[arg(short, long)]\n    no_sync: bool,\n    /// Dump control file at path specified by this argument and exit.\n    #[arg(long)]\n    dump_control_file: Option<Utf8PathBuf>,\n    /// Broker endpoint for storage nodes coordination in the form\n    /// http[s]://host:port. In case of https schema TLS is connection is\n    /// established; plaintext otherwise.\n    #[arg(long, default_value = DEFAULT_ENDPOINT, verbatim_doc_comment)]\n    broker_endpoint: Uri,\n    /// Broker keepalive interval.\n    #[arg(long, value_parser= humantime::parse_duration, default_value = storage_broker::DEFAULT_KEEPALIVE_INTERVAL)]\n    broker_keepalive_interval: Duration,\n    /// Peer safekeeper is considered dead after not receiving heartbeats from\n    /// it during this period passed as a human readable duration.\n    #[arg(long, value_parser= humantime::parse_duration, default_value = DEFAULT_HEARTBEAT_TIMEOUT, verbatim_doc_comment)]\n    heartbeat_timeout: Duration,\n    /// Enable/disable peer recovery.\n    #[arg(long, default_value = \"false\", action=ArgAction::Set)]\n    peer_recovery: bool,\n    /// Remote storage configuration for WAL backup (offloading to s3) as TOML\n    /// inline table, e.g.\n    ///   {max_concurrent_syncs = 17, max_sync_errors = 13, bucket_name = \"<BUCKETNAME>\", bucket_region = \"<REGION>\", concurrency_limit = 119}\n    /// Safekeeper offloads WAL to\n    ///   [prefix_in_bucket/]<tenant_id>/<timeline_id>/<segment_file>, mirroring\n    /// structure on the file system.\n    #[arg(long, value_parser = parse_remote_storage, verbatim_doc_comment)]\n    remote_storage: Option<RemoteStorageConfig>,\n    /// Safekeeper won't be elected for WAL offloading if it is lagging for more than this value in bytes\n    #[arg(long, default_value_t = DEFAULT_MAX_OFFLOADER_LAG_BYTES)]\n    max_offloader_lag: u64,\n    /* BEGIN_HADRON */\n    /// Safekeeper will re-elect a new offloader if the current backup lagging for more than this value in bytes\n    #[arg(long, default_value_t = DEFAULT_MAX_REELECT_OFFLOADER_LAG_BYTES)]\n    max_reelect_offloader_lag_bytes: u64,\n    /// Safekeeper will stop accepting new WALs if the timeline disk usage exceeds this value in bytes.\n    /// Setting this value to 0 disables the limit.\n    #[arg(long, default_value_t = DEFAULT_MAX_TIMELINE_DISK_USAGE_BYTES)]\n    max_timeline_disk_usage_bytes: u64,\n    /* END_HADRON */\n    /// Number of max parallel WAL segments to be offloaded to remote storage.\n    #[arg(long, default_value = \"5\")]\n    wal_backup_parallel_jobs: usize,\n    /// Disable WAL backup to s3. When disabled, safekeeper removes WAL ignoring\n    /// WAL backup horizon.\n    #[arg(long)]\n    disable_wal_backup: bool,\n    /// If given, enables auth on incoming connections to WAL service endpoint\n    /// (--listen-pg). Value specifies path to a .pem public key used for\n    /// validations of JWT tokens. Empty string is allowed and means disabling\n    /// auth.\n    #[arg(long, verbatim_doc_comment, value_parser = opt_pathbuf_parser)]\n    pg_auth_public_key_path: Option<Utf8PathBuf>,\n    /// If given, enables auth on incoming connections to tenant only WAL\n    /// service endpoint (--listen-pg-tenant-only). Value specifies path to a\n    /// .pem public key used for validations of JWT tokens. Empty string is\n    /// allowed and means disabling auth.\n    #[arg(long, verbatim_doc_comment, value_parser = opt_pathbuf_parser)]\n    pg_tenant_only_auth_public_key_path: Option<Utf8PathBuf>,\n    /// If given, enables auth on incoming connections to http management\n    /// service endpoint (--listen-http). Value specifies path to a .pem public\n    /// key used for validations of JWT tokens. Empty string is allowed and\n    /// means disabling auth.\n    #[arg(long, verbatim_doc_comment, value_parser = opt_pathbuf_parser)]\n    http_auth_public_key_path: Option<Utf8PathBuf>,\n    /// Format for logging, either 'plain' or 'json'.\n    #[arg(long, default_value = \"plain\")]\n    log_format: String,\n    /// Run everything in single threaded current thread runtime, might be\n    /// useful for debugging.\n    #[arg(long)]\n    current_thread_runtime: bool,\n    /// Keep horizon for walsenders, i.e. don't remove WAL segments that are\n    /// still needed for existing replication connection.\n    #[arg(long)]\n    walsenders_keep_horizon: bool,\n    /// Controls how long backup will wait until uploading the partial segment.\n    #[arg(long, value_parser = humantime::parse_duration, default_value = DEFAULT_PARTIAL_BACKUP_TIMEOUT, verbatim_doc_comment)]\n    partial_backup_timeout: Duration,\n    /// Disable task to push messages to broker every second. Supposed to\n    /// be used in tests.\n    #[arg(long)]\n    disable_periodic_broker_push: bool,\n    /// Enable automatic switching to offloaded state.\n    #[arg(long)]\n    enable_offload: bool,\n    /// Delete local WAL files after offloading. When disabled, they will be left on disk.\n    #[arg(long)]\n    delete_offloaded_wal: bool,\n    /// Pending updates to control file will be automatically saved after this interval.\n    #[arg(long, value_parser = humantime::parse_duration, default_value = DEFAULT_CONTROL_FILE_SAVE_INTERVAL)]\n    control_file_save_interval: Duration,\n    /// Number of allowed concurrent uploads of partial segments to remote storage.\n    #[arg(long, default_value = DEFAULT_PARTIAL_BACKUP_CONCURRENCY)]\n    partial_backup_concurrency: usize,\n    /// How long a timeline must be resident before it is eligible for eviction.\n    /// Usually, timeline eviction has to wait for `partial_backup_timeout` before being eligible for eviction,\n    /// but if a timeline is un-evicted and then _not_ written to, it would immediately flap to evicting again,\n    /// if it weren't for `eviction_min_resident` preventing that.\n    ///\n    /// Also defines interval for eviction retries.\n    #[arg(long, value_parser = humantime::parse_duration, default_value = DEFAULT_EVICTION_MIN_RESIDENT)]\n    eviction_min_resident: Duration,\n    /// Enable fanning out WAL to different shards from the same reader\n    #[arg(long)]\n    wal_reader_fanout: bool,\n    /// Only fan out the WAL reader if the absoulte delta between the new requested position\n    /// and the current position of the reader is smaller than this value.\n    #[arg(long)]\n    max_delta_for_fanout: Option<u64>,\n    /// Path to a file with certificate's private key for https API.\n    #[arg(long, default_value = DEFAULT_SSL_KEY_FILE)]\n    ssl_key_file: Utf8PathBuf,\n    /// Path to a file with a X509 certificate for https API.\n    #[arg(long, default_value = DEFAULT_SSL_CERT_FILE)]\n    ssl_cert_file: Utf8PathBuf,\n    /// Period to reload certificate and private key from files.\n    #[arg(long, value_parser = humantime::parse_duration, default_value = DEFAULT_SSL_CERT_RELOAD_PERIOD)]\n    ssl_cert_reload_period: Duration,\n    /// Trusted root CA certificates to use in https APIs.\n    #[arg(long)]\n    ssl_ca_file: Option<Utf8PathBuf>,\n    /// Flag to use https for requests to peer's safekeeper API.\n    #[arg(long)]\n    use_https_safekeeper_api: bool,\n    /// Path to the JWT auth token used to authenticate with other safekeepers.\n    #[arg(long)]\n    auth_token_path: Option<Utf8PathBuf>,\n\n    /// Enable TLS in WAL service API.\n    /// Does not force TLS: the client negotiates TLS usage during the handshake.\n    /// Uses key and certificate from ssl_key_file/ssl_cert_file.\n    #[arg(long)]\n    enable_tls_wal_service_api: bool,\n\n    /// Controls whether to collect all metrics on each scrape or to return potentially stale\n    /// results.\n    #[arg(long, default_value_t = true)]\n    force_metric_collection_on_scrape: bool,\n\n    /// Run in development mode (disables security checks)\n    #[arg(long, help = \"Run in development mode (disables security checks)\")]\n    dev: bool,\n    /* BEGIN_HADRON */\n    #[arg(long)]\n    enable_pull_timeline_on_startup: bool,\n    /// How often to scan entire data-dir for total disk usage\n    #[arg(long, value_parser=humantime::parse_duration, default_value = DEFAULT_GLOBAL_DISK_CHECK_INTERVAL)]\n    global_disk_check_interval: Duration,\n    /// The portion of the filesystem capacity that can be used by all timelines.\n    /// A circuit breaker will trip and reject all WAL writes if the total usage\n    /// exceeds this ratio.\n    /// Set to 0 to disable the global disk usage limit.\n    #[arg(long, default_value_t = DEFAULT_MAX_GLOBAL_DISK_USAGE_RATIO)]\n    max_global_disk_usage_ratio: f64,\n    /* END_HADRON */\n}\n\n// Like PathBufValueParser, but allows empty string.\nfn opt_pathbuf_parser(s: &str) -> Result<Utf8PathBuf, String> {\n    Ok(Utf8PathBuf::from_str(s).unwrap())\n}\n\n#[tokio::main(flavor = \"current_thread\")]\nasync fn main() -> anyhow::Result<()> {\n    // We want to allow multiple occurences of the same arg (taking the last) so\n    // that neon_local could generate command with defaults + overrides without\n    // getting 'argument cannot be used multiple times' error. This seems to be\n    // impossible with pure Derive API, so convert struct to Command, modify it,\n    // parse arguments, and then fill the struct back.\n    let cmd = <Args as clap::CommandFactory>::command()\n        .args_override_self(true)\n        .version(version());\n    let mut matches = cmd.get_matches();\n    let mut args = <Args as clap::FromArgMatches>::from_arg_matches_mut(&mut matches)?;\n\n    // I failed to modify opt_pathbuf_parser to return Option<PathBuf> in\n    // reasonable time, so turn empty string into option post factum.\n    if let Some(pb) = &args.pg_auth_public_key_path {\n        if pb.as_os_str().is_empty() {\n            args.pg_auth_public_key_path = None;\n        }\n    }\n    if let Some(pb) = &args.pg_tenant_only_auth_public_key_path {\n        if pb.as_os_str().is_empty() {\n            args.pg_tenant_only_auth_public_key_path = None;\n        }\n    }\n    if let Some(pb) = &args.http_auth_public_key_path {\n        if pb.as_os_str().is_empty() {\n            args.http_auth_public_key_path = None;\n        }\n    }\n\n    if let Some(addr) = args.dump_control_file {\n        let state = control_file::FileStorage::load_control_file(addr)?;\n        let json = serde_json::to_string(&state)?;\n        print!(\"{json}\");\n        return Ok(());\n    }\n\n    // important to keep the order of:\n    // 1. init logging\n    // 2. tracing panic hook\n    // 3. sentry\n    logging::init(\n        LogFormat::from_config(&args.log_format)?,\n        logging::TracingErrorLayerEnablement::Disabled,\n        logging::Output::Stdout,\n    )?;\n    logging::replace_panic_hook_with_tracing_panic_hook().forget();\n    info!(\"version: {GIT_VERSION}\");\n    info!(\"buld_tag: {BUILD_TAG}\");\n\n    let args_workdir = &args.datadir;\n    let workdir = args_workdir.canonicalize_utf8().with_context(|| {\n        format!(\"Failed to get the absolute path for input workdir {args_workdir:?}\")\n    })?;\n\n    // Change into the data directory.\n    std::env::set_current_dir(&workdir)?;\n\n    // Prevent running multiple safekeepers on the same directory\n    let lock_file_path = workdir.join(PID_FILE_NAME);\n    let lock_file =\n        pid_file::claim_for_current_process(&lock_file_path).context(\"claim pid file\")?;\n    info!(\"claimed pid file at {lock_file_path:?}\");\n    // ensure that the lock file is held even if the main thread of the process is panics\n    // we need to release the lock file only when the current process is gone\n    std::mem::forget(lock_file);\n\n    // Set or read our ID.\n    let id = set_id(&workdir, args.id.map(NodeId))?;\n    if args.init {\n        return Ok(());\n    }\n\n    let pg_auth = match args.pg_auth_public_key_path.as_ref() {\n        None => {\n            info!(\"pg auth is disabled\");\n            None\n        }\n        Some(path) => {\n            info!(\"loading pg auth JWT key from {path}\");\n            Some(Arc::new(\n                JwtAuth::from_key_path(path).context(\"failed to load the auth key\")?,\n            ))\n        }\n    };\n    let pg_tenant_only_auth = match args.pg_tenant_only_auth_public_key_path.as_ref() {\n        None => {\n            info!(\"pg tenant only auth is disabled\");\n            None\n        }\n        Some(path) => {\n            info!(\"loading pg tenant only auth JWT key from {path}\");\n            Some(Arc::new(\n                JwtAuth::from_key_path(path).context(\"failed to load the auth key\")?,\n            ))\n        }\n    };\n    let http_auth = match args.http_auth_public_key_path.as_ref() {\n        None => {\n            info!(\"http auth is disabled\");\n            None\n        }\n        Some(path) => {\n            info!(\"loading http auth JWT key(s) from {path}\");\n            let jwt_auth = JwtAuth::from_key_path(path).context(\"failed to load the auth key\")?;\n            Some(Arc::new(SwappableJwtAuth::new(jwt_auth)))\n        }\n    };\n\n    // Load JWT auth token to connect to other safekeepers for pull_timeline.\n    let sk_auth_token = if let Some(auth_token_path) = args.auth_token_path.as_ref() {\n        info!(\"loading JWT token for authentication with safekeepers from {auth_token_path}\");\n        let auth_token = tokio::fs::read_to_string(auth_token_path).await?;\n        Some(SecretString::from(auth_token.trim().to_owned()))\n    } else {\n        info!(\"no JWT token for authentication with safekeepers detected\");\n        None\n    };\n\n    let ssl_ca_certs = match args.ssl_ca_file.as_ref() {\n        Some(ssl_ca_file) => {\n            tracing::info!(\"Using ssl root CA file: {ssl_ca_file:?}\");\n            let buf = tokio::fs::read(ssl_ca_file).await?;\n            pem::parse_many(&buf)?\n                .into_iter()\n                .filter(|pem| pem.tag() == \"CERTIFICATE\")\n                .collect()\n        }\n        None => Vec::new(),\n    };\n\n    let conf = Arc::new(SafeKeeperConf {\n        workdir,\n        my_id: id,\n        listen_pg_addr: args.listen_pg,\n        listen_pg_addr_tenant_only: args.listen_pg_tenant_only,\n        listen_http_addr: args.listen_http,\n        listen_https_addr: args.listen_https,\n        advertise_pg_addr: args.advertise_pg,\n        availability_zone: args.availability_zone,\n        no_sync: args.no_sync,\n        broker_endpoint: args.broker_endpoint,\n        broker_keepalive_interval: args.broker_keepalive_interval,\n        heartbeat_timeout: args.heartbeat_timeout,\n        peer_recovery_enabled: args.peer_recovery,\n        remote_storage: args.remote_storage,\n        max_offloader_lag_bytes: args.max_offloader_lag,\n        /* BEGIN_HADRON */\n        max_reelect_offloader_lag_bytes: args.max_reelect_offloader_lag_bytes,\n        max_timeline_disk_usage_bytes: args.max_timeline_disk_usage_bytes,\n        /* END_HADRON */\n        wal_backup_enabled: !args.disable_wal_backup,\n        backup_parallel_jobs: args.wal_backup_parallel_jobs,\n        pg_auth,\n        pg_tenant_only_auth,\n        http_auth,\n        sk_auth_token,\n        current_thread_runtime: args.current_thread_runtime,\n        walsenders_keep_horizon: args.walsenders_keep_horizon,\n        partial_backup_timeout: args.partial_backup_timeout,\n        disable_periodic_broker_push: args.disable_periodic_broker_push,\n        enable_offload: args.enable_offload,\n        delete_offloaded_wal: args.delete_offloaded_wal,\n        control_file_save_interval: args.control_file_save_interval,\n        partial_backup_concurrency: args.partial_backup_concurrency,\n        eviction_min_resident: args.eviction_min_resident,\n        wal_reader_fanout: args.wal_reader_fanout,\n        max_delta_for_fanout: args.max_delta_for_fanout,\n        ssl_key_file: args.ssl_key_file,\n        ssl_cert_file: args.ssl_cert_file,\n        ssl_cert_reload_period: args.ssl_cert_reload_period,\n        ssl_ca_certs,\n        use_https_safekeeper_api: args.use_https_safekeeper_api,\n        enable_tls_wal_service_api: args.enable_tls_wal_service_api,\n        force_metric_collection_on_scrape: args.force_metric_collection_on_scrape,\n        /* BEGIN_HADRON */\n        advertise_pg_addr_tenant_only: None,\n        enable_pull_timeline_on_startup: args.enable_pull_timeline_on_startup,\n        hcc_base_url: None,\n        global_disk_check_interval: args.global_disk_check_interval,\n        max_global_disk_usage_ratio: args.max_global_disk_usage_ratio,\n        /* END_HADRON */\n    });\n\n    // initialize sentry if SENTRY_DSN is provided\n    let _sentry_guard = init_sentry(\n        Some(GIT_VERSION.into()),\n        &[(\"node_id\", &conf.my_id.to_string())],\n    );\n    start_safekeeper(conf).await\n}\n\n/// Result of joining any of main tasks: upper error means task failed to\n/// complete, e.g. panicked, inner is error produced by task itself.\ntype JoinTaskRes = Result<anyhow::Result<()>, JoinError>;\n\nasync fn start_safekeeper(conf: Arc<SafeKeeperConf>) -> Result<()> {\n    // fsync the datadir to make sure we have a consistent state on disk.\n    if !conf.no_sync {\n        let dfd = File::open(&conf.workdir).context(\"open datadir for syncfs\")?;\n        let started = Instant::now();\n        utils::crashsafe::syncfs(dfd)?;\n        let elapsed = started.elapsed();\n        info!(\n            elapsed_ms = elapsed.as_millis(),\n            \"syncfs data directory done\"\n        );\n    }\n\n    info!(\"starting safekeeper WAL service on {}\", conf.listen_pg_addr);\n    let pg_listener = tcp_listener::bind(conf.listen_pg_addr.clone()).map_err(|e| {\n        error!(\"failed to bind to address {}: {}\", conf.listen_pg_addr, e);\n        e\n    })?;\n\n    let pg_listener_tenant_only =\n        if let Some(listen_pg_addr_tenant_only) = &conf.listen_pg_addr_tenant_only {\n            info!(\n                \"starting safekeeper tenant scoped WAL service on {}\",\n                listen_pg_addr_tenant_only\n            );\n            let listener = tcp_listener::bind(listen_pg_addr_tenant_only.clone()).map_err(|e| {\n                error!(\n                    \"failed to bind to address {}: {}\",\n                    listen_pg_addr_tenant_only, e\n                );\n                e\n            })?;\n            Some(listener)\n        } else {\n            None\n        };\n\n    info!(\n        \"starting safekeeper HTTP service on {}\",\n        conf.listen_http_addr\n    );\n    let http_listener = tcp_listener::bind(conf.listen_http_addr.clone()).map_err(|e| {\n        error!(\"failed to bind to address {}: {}\", conf.listen_http_addr, e);\n        e\n    })?;\n\n    let https_listener = match conf.listen_https_addr.as_ref() {\n        Some(listen_https_addr) => {\n            info!(\"starting safekeeper HTTPS service on {}\", listen_https_addr);\n            Some(tcp_listener::bind(listen_https_addr).map_err(|e| {\n                error!(\"failed to bind to address {}: {}\", listen_https_addr, e);\n                e\n            })?)\n        }\n        None => None,\n    };\n\n    let wal_backup = Arc::new(WalBackup::new(&conf).await?);\n\n    let global_timelines = Arc::new(GlobalTimelines::new(conf.clone(), wal_backup.clone()));\n\n    // Register metrics collector for active timelines. It's important to do this\n    // after daemonizing, otherwise process collector will be upset.\n    let timeline_collector = safekeeper::metrics::TimelineCollector::new(global_timelines.clone());\n    metrics::register_internal(Box::new(timeline_collector))?;\n\n    // Keep handles to main tasks to die if any of them disappears.\n    let mut tasks_handles: FuturesUnordered<BoxFuture<(String, JoinTaskRes)>> =\n        FuturesUnordered::new();\n\n    // Start wal backup launcher before loading timelines as we'll notify it\n    // through the channel about timelines which need offloading, not draining\n    // the channel would cause deadlock.\n    let current_thread_rt = conf\n        .current_thread_runtime\n        .then(|| Handle::try_current().expect(\"no runtime in main\"));\n\n    // Load all timelines from disk to memory.\n    global_timelines.init().await?;\n\n    /* BEGIN_HADRON */\n    if conf.enable_pull_timeline_on_startup && global_timelines.timelines_count() == 0 {\n        match hadron::hcc_pull_timelines(&conf, global_timelines.clone()).await {\n            Ok(_) => {\n                info!(\"Successfully pulled all timelines from peer safekeepers\");\n            }\n            Err(e) => {\n                error!(\"Failed to pull timelines from peer safekeepers: {:?}\", e);\n                return Err(e);\n            }\n        }\n    }\n    /* END_HADRON */\n\n    // Run everything in current thread rt, if asked.\n    if conf.current_thread_runtime {\n        info!(\"running in current thread runtime\");\n    }\n\n    let tls_server_config = if conf.listen_https_addr.is_some() || conf.enable_tls_wal_service_api {\n        let ssl_key_file = conf.ssl_key_file.clone();\n        let ssl_cert_file = conf.ssl_cert_file.clone();\n        let ssl_cert_reload_period = conf.ssl_cert_reload_period;\n\n        // Create resolver in BACKGROUND_RUNTIME, so the background certificate reloading\n        // task is run in this runtime.\n        let cert_resolver = current_thread_rt\n            .as_ref()\n            .unwrap_or_else(|| BACKGROUND_RUNTIME.handle())\n            .spawn(async move {\n                ReloadingCertificateResolver::new(\n                    \"main\",\n                    &ssl_key_file,\n                    &ssl_cert_file,\n                    ssl_cert_reload_period,\n                )\n                .await\n            })\n            .await??;\n\n        let config = rustls::ServerConfig::builder()\n            .with_no_client_auth()\n            .with_cert_resolver(cert_resolver);\n\n        Some(Arc::new(config))\n    } else {\n        None\n    };\n\n    let wal_service_handle = current_thread_rt\n        .as_ref()\n        .unwrap_or_else(|| WAL_SERVICE_RUNTIME.handle())\n        .spawn(wal_service::task_main(\n            conf.clone(),\n            pg_listener,\n            Scope::SafekeeperData,\n            conf.enable_tls_wal_service_api\n                .then(|| tls_server_config.clone())\n                .flatten(),\n            global_timelines.clone(),\n        ))\n        // wrap with task name for error reporting\n        .map(|res| (\"WAL service main\".to_owned(), res));\n    tasks_handles.push(Box::pin(wal_service_handle));\n\n    let global_timelines_ = global_timelines.clone();\n    let timeline_housekeeping_handle = current_thread_rt\n        .as_ref()\n        .unwrap_or_else(|| WAL_SERVICE_RUNTIME.handle())\n        .spawn(async move {\n            const TOMBSTONE_TTL: Duration = Duration::from_secs(3600 * 24);\n            loop {\n                tokio::time::sleep(TOMBSTONE_TTL).await;\n                global_timelines_.housekeeping(&TOMBSTONE_TTL);\n            }\n        })\n        .map(|res| (\"Timeline map housekeeping\".to_owned(), res));\n    tasks_handles.push(Box::pin(timeline_housekeeping_handle));\n\n    /* BEGIN_HADRON */\n    // Spawn global disk usage watcher task, if a global disk usage limit is specified.\n    let interval = conf.global_disk_check_interval;\n    let data_dir = conf.workdir.clone();\n    // Use the safekeeper data directory to compute filesystem capacity. This only runs once on startup, so\n    // there is little point to continue if we can't have the proper protections in place.\n    let fs_capacity_bytes = get_filesystem_capacity(data_dir.as_std_path())\n        .expect(\"Failed to get filesystem capacity for data directory\");\n    let limit: u64 = (conf.max_global_disk_usage_ratio * fs_capacity_bytes as f64) as u64;\n    if limit > 0 {\n        let disk_usage_watch_handle = BACKGROUND_RUNTIME\n            .handle()\n            .spawn(async move {\n                // Use Tokio interval to preserve fixed cadence between filesystem utilization checks\n                let mut ticker = tokio::time::interval(interval);\n                ticker.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Delay);\n\n                loop {\n                    ticker.tick().await;\n                    let data_dir_clone = data_dir.clone();\n                    let check_start = Instant::now();\n\n                    let usage = tokio::task::spawn_blocking(move || {\n                        get_filesystem_usage(data_dir_clone.as_std_path())\n                    })\n                    .await\n                    .unwrap_or(0);\n\n                    let elapsed = check_start.elapsed().as_secs_f64();\n                    GLOBAL_DISK_UTIL_CHECK_SECONDS.observe(elapsed);\n                    if usage > limit {\n                        warn!(\n                            \"Global disk usage exceeded limit. Usage: {} bytes, limit: {} bytes\",\n                            usage, limit\n                        );\n                    }\n                    GLOBAL_DISK_LIMIT_EXCEEDED.store(usage > limit, Ordering::Relaxed);\n                }\n            })\n            .map(|res| (\"Global disk usage watcher\".to_string(), res));\n        tasks_handles.push(Box::pin(disk_usage_watch_handle));\n    }\n    /* END_HADRON */\n    if let Some(pg_listener_tenant_only) = pg_listener_tenant_only {\n        let wal_service_handle = current_thread_rt\n            .as_ref()\n            .unwrap_or_else(|| WAL_SERVICE_RUNTIME.handle())\n            .spawn(wal_service::task_main(\n                conf.clone(),\n                pg_listener_tenant_only,\n                Scope::Tenant,\n                conf.enable_tls_wal_service_api\n                    .then(|| tls_server_config.clone())\n                    .flatten(),\n                global_timelines.clone(),\n            ))\n            // wrap with task name for error reporting\n            .map(|res| (\"WAL service tenant only main\".to_owned(), res));\n        tasks_handles.push(Box::pin(wal_service_handle));\n    }\n\n    let http_handle = current_thread_rt\n        .as_ref()\n        .unwrap_or_else(|| HTTP_RUNTIME.handle())\n        .spawn(http::task_main_http(\n            conf.clone(),\n            http_listener,\n            global_timelines.clone(),\n        ))\n        .map(|res| (\"HTTP service main\".to_owned(), res));\n    tasks_handles.push(Box::pin(http_handle));\n\n    if let Some(https_listener) = https_listener {\n        let https_handle = current_thread_rt\n            .as_ref()\n            .unwrap_or_else(|| HTTP_RUNTIME.handle())\n            .spawn(http::task_main_https(\n                conf.clone(),\n                https_listener,\n                tls_server_config.expect(\"tls_server_config is set earlier if https is enabled\"),\n                global_timelines.clone(),\n            ))\n            .map(|res| (\"HTTPS service main\".to_owned(), res));\n        tasks_handles.push(Box::pin(https_handle));\n    }\n\n    let broker_task_handle = current_thread_rt\n        .as_ref()\n        .unwrap_or_else(|| BROKER_RUNTIME.handle())\n        .spawn(\n            broker::task_main(conf.clone(), global_timelines.clone())\n                .instrument(info_span!(\"broker\")),\n        )\n        .map(|res| (\"broker main\".to_owned(), res));\n    tasks_handles.push(Box::pin(broker_task_handle));\n\n    /* BEGIN_HADRON */\n    if conf.force_metric_collection_on_scrape {\n        let metrics_handle = current_thread_rt\n            .as_ref()\n            .unwrap_or_else(|| BACKGROUND_RUNTIME.handle())\n            .spawn(async move {\n                let mut interval: tokio::time::Interval =\n                    tokio::time::interval(METRICS_COLLECTION_INTERVAL);\n                loop {\n                    interval.tick().await;\n                    tokio::task::spawn_blocking(|| {\n                        METRICS_COLLECTOR.run_once(true);\n                    });\n                }\n            })\n            .map(|res| (\"broker main\".to_owned(), res));\n        tasks_handles.push(Box::pin(metrics_handle));\n    }\n    /* END_HADRON */\n\n    set_build_info_metric(GIT_VERSION, BUILD_TAG);\n\n    // TODO: update tokio-stream, convert to real async Stream with\n    // SignalStream, map it to obtain missing signal name, combine streams into\n    // single stream we can easily sit on.\n    let mut sigquit_stream = signal(SignalKind::quit())?;\n    let mut sigint_stream = signal(SignalKind::interrupt())?;\n    let mut sigterm_stream = signal(SignalKind::terminate())?;\n\n    // Notify systemd that we are ready. This is important as currently loading\n    // timelines takes significant time (~30s in busy regions).\n    if let Err(e) = sd_notify::notify(true, &[NotifyState::Ready]) {\n        warn!(\"systemd notify failed: {:?}\", e);\n    }\n\n    tokio::select! {\n        Some((task_name, res)) = tasks_handles.next()=> {\n            error!(\"{} task failed: {:?}, exiting\", task_name, res);\n            std::process::exit(1);\n        }\n        // On any shutdown signal, log receival and exit. Additionally, handling\n        // SIGQUIT prevents coredump.\n        _ = sigquit_stream.recv() => info!(\"received SIGQUIT, terminating\"),\n        _ = sigint_stream.recv() => info!(\"received SIGINT, terminating\"),\n        _ = sigterm_stream.recv() => info!(\"received SIGTERM, terminating\")\n\n    };\n    std::process::exit(0);\n}\n\n/// Determine safekeeper id.\nfn set_id(workdir: &Utf8Path, given_id: Option<NodeId>) -> Result<NodeId> {\n    let id_file_path = workdir.join(ID_FILE_NAME);\n\n    let my_id: NodeId;\n    // If file with ID exists, read it in; otherwise set one passed.\n    match fs::read(&id_file_path) {\n        Ok(id_serialized) => {\n            my_id = NodeId(\n                std::str::from_utf8(&id_serialized)\n                    .context(\"failed to parse safekeeper id\")?\n                    .parse()\n                    .context(\"failed to parse safekeeper id\")?,\n            );\n            if let Some(given_id) = given_id {\n                if given_id != my_id {\n                    bail!(\n                        \"safekeeper already initialized with id {}, can't set {}\",\n                        my_id,\n                        given_id\n                    );\n                }\n            }\n            info!(\"safekeeper ID {}\", my_id);\n        }\n        Err(error) => match error.kind() {\n            ErrorKind::NotFound => {\n                my_id = if let Some(given_id) = given_id {\n                    given_id\n                } else {\n                    bail!(\"safekeeper id is not specified\");\n                };\n                let mut f = File::create(&id_file_path)\n                    .with_context(|| format!(\"Failed to create id file at {id_file_path:?}\"))?;\n                f.write_all(my_id.to_string().as_bytes())?;\n                f.sync_all()?;\n                info!(\"initialized safekeeper id {}\", my_id);\n            }\n            _ => {\n                return Err(error.into());\n            }\n        },\n    }\n    Ok(my_id)\n}\n\nfn parse_remote_storage(storage_conf: &str) -> anyhow::Result<RemoteStorageConfig> {\n    RemoteStorageConfig::from_toml(&storage_conf.parse()?)\n}\n\n#[test]\nfn verify_cli() {\n    use clap::CommandFactory;\n    Args::command().debug_assert()\n}\n"
  },
  {
    "path": "safekeeper/src/broker.rs",
    "content": "//! Communication with the broker, providing safekeeper peers and pageserver coordination.\n\nuse std::sync::Arc;\nuse std::sync::atomic::AtomicU64;\nuse std::time::{Duration, Instant, UNIX_EPOCH};\n\nuse anyhow::{Context, Error, Result, anyhow, bail};\nuse storage_broker::proto::subscribe_safekeeper_info_request::SubscriptionKey as ProtoSubscriptionKey;\nuse storage_broker::proto::{\n    FilterTenantTimelineId, MessageType, SafekeeperDiscoveryResponse, SubscribeByFilterRequest,\n    SubscribeSafekeeperInfoRequest, TypeSubscription, TypedMessage,\n};\nuse storage_broker::{Request, parse_proto_ttid};\nuse tokio::task::JoinHandle;\nuse tokio::time::sleep;\nuse tracing::*;\n\nuse crate::metrics::{\n    BROKER_ITERATION_TIMELINES, BROKER_PULLED_UPDATES, BROKER_PUSH_ALL_UPDATES_SECONDS,\n    BROKER_PUSHED_UPDATES,\n};\nuse crate::{GlobalTimelines, SafeKeeperConf};\n\nconst RETRY_INTERVAL_MSEC: u64 = 1000;\nconst PUSH_INTERVAL_MSEC: u64 = 1000;\n\nfn make_tls_config(conf: &SafeKeeperConf) -> storage_broker::ClientTlsConfig {\n    storage_broker::ClientTlsConfig::new().ca_certificates(\n        conf.ssl_ca_certs\n            .iter()\n            .map(pem::encode)\n            .map(storage_broker::Certificate::from_pem),\n    )\n}\n\n/// Push once in a while data about all active timelines to the broker.\nasync fn push_loop(\n    conf: Arc<SafeKeeperConf>,\n    global_timelines: Arc<GlobalTimelines>,\n) -> anyhow::Result<()> {\n    if conf.disable_periodic_broker_push {\n        info!(\"broker push_loop is disabled, doing nothing...\");\n        futures::future::pending::<()>().await; // sleep forever\n        return Ok(());\n    }\n\n    let active_timelines_set = global_timelines.get_global_broker_active_set();\n\n    let mut client = storage_broker::connect(\n        conf.broker_endpoint.clone(),\n        conf.broker_keepalive_interval,\n        make_tls_config(&conf),\n    )?;\n    let push_interval = Duration::from_millis(PUSH_INTERVAL_MSEC);\n\n    let outbound = async_stream::stream! {\n        loop {\n            // Note: we lock runtime here and in timeline methods as GlobalTimelines\n            // is under plain mutex. That's ok, all this code is not performance\n            // sensitive and there is no risk of deadlock as we don't await while\n            // lock is held.\n            let now = Instant::now();\n            let all_tlis = active_timelines_set.get_all();\n            let mut n_pushed_tlis = 0;\n            for tli in &all_tlis {\n                let sk_info = tli.get_safekeeper_info(&conf).await;\n                yield sk_info;\n                BROKER_PUSHED_UPDATES.inc();\n                n_pushed_tlis += 1;\n            }\n            let elapsed = now.elapsed();\n\n            BROKER_PUSH_ALL_UPDATES_SECONDS.observe(elapsed.as_secs_f64());\n            BROKER_ITERATION_TIMELINES.observe(n_pushed_tlis as f64);\n\n            if elapsed > push_interval / 2 {\n                info!(\"broker push is too long, pushed {} timeline updates to broker in {:?}\", n_pushed_tlis, elapsed);\n            }\n\n            sleep(push_interval).await;\n        }\n    };\n    client\n        .publish_safekeeper_info(Request::new(outbound))\n        .await?;\n    Ok(())\n}\n\n/// Subscribe and fetch all the interesting data from the broker.\n#[instrument(name = \"broker_pull\", skip_all)]\nasync fn pull_loop(\n    conf: Arc<SafeKeeperConf>,\n    global_timelines: Arc<GlobalTimelines>,\n    stats: Arc<BrokerStats>,\n) -> Result<()> {\n    let mut client = storage_broker::connect(\n        conf.broker_endpoint.clone(),\n        conf.broker_keepalive_interval,\n        make_tls_config(&conf),\n    )?;\n\n    // TODO: subscribe only to local timelines instead of all\n    let request = SubscribeSafekeeperInfoRequest {\n        subscription_key: Some(ProtoSubscriptionKey::All(())),\n    };\n\n    let mut stream = client\n        .subscribe_safekeeper_info(request)\n        .await\n        .context(\"subscribe_safekeper_info request failed\")?\n        .into_inner();\n\n    let ok_counter = BROKER_PULLED_UPDATES.with_label_values(&[\"ok\"]);\n    let not_found = BROKER_PULLED_UPDATES.with_label_values(&[\"not_found\"]);\n    let err_counter = BROKER_PULLED_UPDATES.with_label_values(&[\"error\"]);\n\n    while let Some(msg) = stream.message().await? {\n        stats.update_pulled();\n\n        let proto_ttid = msg\n            .tenant_timeline_id\n            .as_ref()\n            .ok_or_else(|| anyhow!(\"missing tenant_timeline_id\"))?;\n        let ttid = parse_proto_ttid(proto_ttid)?;\n        if let Ok(tli) = global_timelines.get(ttid) {\n            // Note that we also receive *our own* info. That's\n            // important, as it is used as an indication of live\n            // connection to the broker.\n\n            // note: there are blocking operations below, but it's considered fine for now\n            let res = tli.record_safekeeper_info(msg).await;\n            if res.is_ok() {\n                ok_counter.inc();\n            } else {\n                err_counter.inc();\n            }\n            res?;\n        } else {\n            not_found.inc();\n        }\n    }\n    bail!(\"end of stream\");\n}\n\n/// Process incoming discover requests. This is done in a separate task to avoid\n/// interfering with the normal pull/push loops.\nasync fn discover_loop(\n    conf: Arc<SafeKeeperConf>,\n    global_timelines: Arc<GlobalTimelines>,\n    stats: Arc<BrokerStats>,\n) -> Result<()> {\n    let mut client = storage_broker::connect(\n        conf.broker_endpoint.clone(),\n        conf.broker_keepalive_interval,\n        make_tls_config(&conf),\n    )?;\n\n    let request = SubscribeByFilterRequest {\n        types: vec![TypeSubscription {\n            r#type: MessageType::SafekeeperDiscoveryRequest as i32,\n        }],\n        tenant_timeline_id: Some(FilterTenantTimelineId {\n            enabled: false,\n            tenant_timeline_id: None,\n        }),\n    };\n\n    let mut stream = client\n        .subscribe_by_filter(request)\n        .await\n        .context(\"subscribe_by_filter request failed\")?\n        .into_inner();\n\n    let discover_counter = BROKER_PULLED_UPDATES.with_label_values(&[\"discover\"]);\n\n    while let Some(typed_msg) = stream.message().await? {\n        stats.update_pulled();\n\n        match typed_msg.r#type() {\n            MessageType::SafekeeperDiscoveryRequest => {\n                let msg = typed_msg\n                    .safekeeper_discovery_request\n                    .expect(\"proto type mismatch from broker message\");\n\n                let proto_ttid = msg\n                    .tenant_timeline_id\n                    .as_ref()\n                    .ok_or_else(|| anyhow!(\"missing tenant_timeline_id\"))?;\n                let ttid = parse_proto_ttid(proto_ttid)?;\n                if let Ok(tli) = global_timelines.get(ttid) {\n                    // we received a discovery request for a timeline we know about\n                    discover_counter.inc();\n\n                    // create and reply with discovery response\n                    let sk_info = tli.get_safekeeper_info(&conf).await;\n                    let response = SafekeeperDiscoveryResponse {\n                        safekeeper_id: sk_info.safekeeper_id,\n                        tenant_timeline_id: sk_info.tenant_timeline_id,\n                        commit_lsn: sk_info.commit_lsn,\n                        safekeeper_connstr: sk_info.safekeeper_connstr,\n                        availability_zone: sk_info.availability_zone,\n                        standby_horizon: 0,\n                    };\n\n                    // note this is a blocking call\n                    client\n                        .publish_one(TypedMessage {\n                            r#type: MessageType::SafekeeperDiscoveryResponse as i32,\n                            safekeeper_timeline_info: None,\n                            safekeeper_discovery_request: None,\n                            safekeeper_discovery_response: Some(response),\n                        })\n                        .await?;\n                }\n            }\n\n            _ => {\n                warn!(\n                    \"unexpected message type i32 {}, {:?}\",\n                    typed_msg.r#type,\n                    typed_msg.r#type()\n                );\n            }\n        }\n    }\n    bail!(\"end of stream\");\n}\n\npub async fn task_main(\n    conf: Arc<SafeKeeperConf>,\n    global_timelines: Arc<GlobalTimelines>,\n) -> anyhow::Result<()> {\n    info!(\"started, broker endpoint {:?}\", conf.broker_endpoint);\n\n    let mut ticker = tokio::time::interval(Duration::from_millis(RETRY_INTERVAL_MSEC));\n    let mut push_handle: Option<JoinHandle<Result<(), Error>>> = None;\n    let mut pull_handle: Option<JoinHandle<Result<(), Error>>> = None;\n    let mut discover_handle: Option<JoinHandle<Result<(), Error>>> = None;\n\n    let stats = Arc::new(BrokerStats::new());\n    let stats_task = task_stats(stats.clone());\n    tokio::pin!(stats_task);\n\n    // Selecting on JoinHandles requires some squats; is there a better way to\n    // reap tasks individually?\n\n    // Handling failures in task itself won't catch panic and in Tokio, task's\n    // panic doesn't kill the whole executor, so it is better to do reaping\n    // here.\n    loop {\n        tokio::select! {\n                res = async { push_handle.as_mut().unwrap().await }, if push_handle.is_some() => {\n                    // was it panic or normal error?\n                    let err = match res {\n                        Ok(res_internal) => res_internal.unwrap_err(),\n                        Err(err_outer) => err_outer.into(),\n                    };\n                    warn!(\"push task failed: {:?}\", err);\n                    push_handle = None;\n                },\n                res = async { pull_handle.as_mut().unwrap().await }, if pull_handle.is_some() => {\n                    // was it panic or normal error?\n                    match res {\n                        Ok(res_internal) => if let Err(err_inner) = res_internal {\n                            warn!(\"pull task failed: {:?}\", err_inner);\n                        }\n                        Err(err_outer) => { warn!(\"pull task panicked: {:?}\", err_outer) }\n                    };\n                    pull_handle = None;\n                },\n                res = async { discover_handle.as_mut().unwrap().await }, if discover_handle.is_some() => {\n                    // was it panic or normal error?\n                    match res {\n                        Ok(res_internal) => if let Err(err_inner) = res_internal {\n                            warn!(\"discover task failed: {:?}\", err_inner);\n                        }\n                        Err(err_outer) => { warn!(\"discover task panicked: {:?}\", err_outer) }\n                    };\n                    discover_handle = None;\n                },\n                _ = ticker.tick() => {\n                    if push_handle.is_none() {\n                        push_handle = Some(tokio::spawn(push_loop(conf.clone(), global_timelines.clone())));\n                    }\n                    if pull_handle.is_none() {\n                        pull_handle = Some(tokio::spawn(pull_loop(conf.clone(), global_timelines.clone(), stats.clone())));\n                    }\n                    if discover_handle.is_none() {\n                        discover_handle = Some(tokio::spawn(discover_loop(conf.clone(), global_timelines.clone(), stats.clone())));\n                    }\n                },\n                _ = &mut stats_task => {}\n        }\n    }\n}\n\nstruct BrokerStats {\n    /// Timestamp of the last received message from the broker.\n    last_pulled_ts: AtomicU64,\n}\n\nimpl BrokerStats {\n    fn new() -> Self {\n        BrokerStats {\n            last_pulled_ts: AtomicU64::new(0),\n        }\n    }\n\n    fn now_millis() -> u64 {\n        std::time::SystemTime::now()\n            .duration_since(UNIX_EPOCH)\n            .expect(\"time is before epoch\")\n            .as_millis() as u64\n    }\n\n    /// Update last_pulled timestamp to current time.\n    fn update_pulled(&self) {\n        self.last_pulled_ts\n            .store(Self::now_millis(), std::sync::atomic::Ordering::Relaxed);\n    }\n}\n\n/// Periodically write to logs if there are issues with receiving data from the broker.\nasync fn task_stats(stats: Arc<BrokerStats>) {\n    let warn_duration = Duration::from_secs(10);\n    let mut ticker = tokio::time::interval(warn_duration);\n\n    loop {\n        tokio::select! {\n            _ = ticker.tick() => {\n                let last_pulled = stats.last_pulled_ts.load(std::sync::atomic::Ordering::SeqCst);\n                if last_pulled == 0 {\n                    // no broker updates yet\n                    continue;\n                }\n\n                let now = BrokerStats::now_millis();\n                if now > last_pulled && now - last_pulled > warn_duration.as_millis() as u64 {\n                    let ts = chrono::DateTime::from_timestamp_millis(last_pulled as i64).expect(\"invalid timestamp\");\n                    info!(\"no broker updates for some time, last update: {:?}\", ts);\n                }\n            }\n        }\n    }\n}\n"
  },
  {
    "path": "storage_controller/migrations/.keep",
    "content": ""
  },
  {
    "path": "test_runner/fixtures/pageserver/makelayers/__init__.py",
    "content": ""
  }
]